summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.mailmap2
-rw-r--r--Documentation/ABI/testing/sysfs-block12
-rw-r--r--Documentation/ABI/testing/sysfs-block-device43
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-uncore13
-rw-r--r--Documentation/ABI/testing/sysfs-bus-platform14
-rw-r--r--Documentation/ABI/testing/sysfs-driver-ge-achc15
-rw-r--r--Documentation/ABI/testing/sysfs-platform-dptf40
-rw-r--r--Documentation/ABI/testing/sysfs-platform_profile7
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst29
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.rst8
-rw-r--r--Documentation/RCU/checklist.rst24
-rw-r--r--Documentation/RCU/rcu_dereference.rst6
-rw-r--r--Documentation/RCU/stallwarn.rst31
-rw-r--r--Documentation/admin-guide/binderfs.rst13
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst11
-rw-r--r--Documentation/admin-guide/device-mapper/dm-ima.rst715
-rw-r--r--Documentation/admin-guide/device-mapper/index.rst1
-rw-r--r--Documentation/admin-guide/device-mapper/writecache.rst16
-rw-r--r--Documentation/admin-guide/hw-vuln/index.rst1
-rw-r--r--Documentation/admin-guide/hw-vuln/l1d_flush.rst69
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt21
-rw-r--r--Documentation/atomic_t.txt94
-rw-r--r--Documentation/bpf/index.rst10
-rw-r--r--Documentation/bpf/libbpf/index.rst (renamed from Documentation/bpf/libbpf/libbpf.rst)8
-rw-r--r--Documentation/bpf/libbpf/libbpf_api.rst27
-rw-r--r--Documentation/bpf/libbpf/libbpf_naming_convention.rst6
-rw-r--r--Documentation/core-api/cpu_hotplug.rst2
-rw-r--r--Documentation/core-api/irq/irq-domain.rst28
-rw-r--r--Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt44
-rw-r--r--Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.yaml89
-rw-r--r--Documentation/devicetree/bindings/fpga/xlnx,versal-fpga.yaml33
-rw-r--r--Documentation/devicetree/bindings/fsi/ibm,fsi2spi.yaml1
-rw-r--r--Documentation/devicetree/bindings/gpio/rockchip,gpio-bank.yaml5
-rw-r--r--Documentation/devicetree/bindings/hwmon/amd,sbrmi.yaml53
-rw-r--r--Documentation/devicetree/bindings/hwmon/winbond,w83781d.yaml41
-rw-r--r--Documentation/devicetree/bindings/iio/st,st-sensors.yaml41
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml1
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml11
-rw-r--r--Documentation/devicetree/bindings/leds/common.yaml6
-rw-r--r--Documentation/devicetree/bindings/misc/ge-achc.txt26
-rw-r--r--Documentation/devicetree/bindings/misc/ge-achc.yaml65
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml18
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.yaml4
-rw-r--r--Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml133
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-msm.txt1
-rw-r--r--Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt43
-rw-r--r--Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml84
-rw-r--r--Documentation/devicetree/bindings/net/can/bosch,c_can.yaml119
-rw-r--r--Documentation/devicetree/bindings/net/can/bosch,m_can.yaml9
-rw-r--r--Documentation/devicetree/bindings/net/can/c_can.txt65
-rw-r--r--Documentation/devicetree/bindings/net/can/can-controller.yaml9
-rw-r--r--Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml17
-rw-r--r--Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml69
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fec.yaml244
-rw-r--r--Documentation/devicetree/bindings/net/fsl-fec.txt95
-rw-r--r--Documentation/devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml54
-rw-r--r--Documentation/devicetree/bindings/net/litex,liteeth.yaml98
-rw-r--r--Documentation/devicetree/bindings/net/macb.txt1
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ipa.yaml24
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml15
-rw-r--r--Documentation/devicetree/bindings/nvmem/nintendo-otp.yaml44
-rw-r--r--Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml3
-rw-r--r--Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.txt20
-rw-r--r--Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml53
-rw-r--r--Documentation/devicetree/bindings/phy/intel,keembay-phy-usb.yaml (renamed from Documentation/devicetree/bindings/phy/intel,phy-keembay-usb.yaml)2
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,tphy.yaml30
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml4
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml1
-rw-r--r--Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml15
-rw-r--r--Documentation/devicetree/bindings/phy/samsung,ufs-phy.yaml1
-rw-r--r--Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.txt82
-rw-r--r--Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.yaml103
-rw-r--r--Documentation/devicetree/bindings/power/supply/battery.yaml14
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml3
-rw-r--r--Documentation/devicetree/bindings/power/supply/mt6360_charger.yaml48
-rw-r--r--Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml30
-rw-r--r--Documentation/devicetree/bindings/power/supply/x-powers,axp20x-ac-power-supply.yaml11
-rw-r--r--Documentation/devicetree/bindings/power/supply/x-powers,axp20x-battery-power-supply.yaml12
-rw-r--r--Documentation/devicetree/bindings/power/supply/x-powers,axp20x-usb-power-supply.yaml14
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rtq2134-regulator.yaml106
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rtq6752-regulator.yaml76
-rw-r--r--Documentation/devicetree/bindings/regulator/socionext,uniphier-regulator.yaml85
-rw-r--r--Documentation/devicetree/bindings/regulator/uniphier-regulator.txt58
-rw-r--r--Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml8
-rw-r--r--Documentation/devicetree/bindings/spi/omap-spi.txt48
-rw-r--r--Documentation/devicetree/bindings/spi/omap-spi.yaml117
-rw-r--r--Documentation/devicetree/bindings/spi/rockchip-sfc.yaml91
-rw-r--r--Documentation/devicetree/bindings/spi/spi-mt65xx.txt1
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sprd-adi.txt63
-rw-r--r--Documentation/devicetree/bindings/spi/sprd,spi-adi.yaml104
-rw-r--r--Documentation/devicetree/bindings/timer/rockchip,rk-timer.txt27
-rw-r--r--Documentation/devicetree/bindings/timer/rockchip,rk-timer.yaml64
-rw-r--r--Documentation/driver-api/fpga/fpga-bridge.rst10
-rw-r--r--Documentation/driver-api/fpga/fpga-mgr.rst12
-rw-r--r--Documentation/driver-api/fpga/fpga-programming.rst8
-rw-r--r--Documentation/driver-api/fpga/fpga-region.rst20
-rw-r--r--Documentation/driver-api/index.rst1
-rw-r--r--Documentation/driver-api/lightnvm-pblk.rst21
-rw-r--r--Documentation/driver-api/nfc/nfc-hci.rst2
-rw-r--r--Documentation/fault-injection/fault-injection.rst18
-rw-r--r--Documentation/fault-injection/provoke-crashes.rst3
-rw-r--r--Documentation/filesystems/cifs/index.rst10
-rw-r--r--Documentation/filesystems/cifs/ksmbd.rst165
-rw-r--r--Documentation/filesystems/fscrypt.rst15
-rw-r--r--Documentation/filesystems/idmappings.rst1026
-rw-r--r--Documentation/filesystems/index.rst3
-rw-r--r--Documentation/filesystems/locking.rst79
-rw-r--r--Documentation/filesystems/mandatory-locking.rst188
-rw-r--r--Documentation/fpga/dfl.rst4
-rw-r--r--Documentation/gpu/rfc/i915_gem_lmem.rst109
-rw-r--r--Documentation/hwmon/aquacomputer_d5next.rst61
-rw-r--r--Documentation/hwmon/index.rst2
-rw-r--r--Documentation/hwmon/sbrmi.rst79
-rw-r--r--Documentation/hwmon/scpi-hwmon.rst2
-rw-r--r--Documentation/hwmon/sht4x.rst2
-rw-r--r--Documentation/i2c/index.rst1
-rw-r--r--Documentation/leds/well-known-leds.txt58
-rw-r--r--Documentation/networking/batman-adv.rst2
-rw-r--r--Documentation/networking/bonding.rst12
-rw-r--r--Documentation/networking/device_drivers/ethernet/freescale/dpaa2/index.rst1
-rw-r--r--Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst217
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst44
-rw-r--r--Documentation/networking/devlink/devlink-params.rst12
-rw-r--r--Documentation/networking/devlink/hns3.rst25
-rw-r--r--Documentation/networking/devlink/index.rst2
-rw-r--r--Documentation/networking/devlink/sja1105.rst49
-rw-r--r--Documentation/networking/dsa/dsa.rst29
-rw-r--r--Documentation/networking/dsa/sja1105.rst218
-rw-r--r--Documentation/networking/ethtool-netlink.rst23
-rw-r--r--Documentation/networking/filter.rst27
-rw-r--r--Documentation/networking/index.rst2
-rw-r--r--Documentation/networking/ioam6-sysctl.rst26
-rw-r--r--Documentation/networking/ip-sysctl.rst17
-rw-r--r--Documentation/networking/mctp.rst213
-rw-r--r--Documentation/networking/mptcp-sysctl.rst12
-rw-r--r--Documentation/networking/netdevices.rst29
-rw-r--r--Documentation/networking/nf_conntrack-sysctl.rst17
-rw-r--r--Documentation/networking/pktgen.rst18
-rw-r--r--Documentation/networking/timestamping.rst6
-rw-r--r--Documentation/networking/vrf.rst13
-rw-r--r--Documentation/trace/coresight/coresight-config.rst244
-rw-r--r--Documentation/trace/coresight/coresight.rst15
-rw-r--r--Documentation/trace/ftrace.rst2
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst1
-rw-r--r--Documentation/userspace-api/seccomp_filter.rst2
-rw-r--r--Documentation/userspace-api/spec_ctrl.rst8
-rw-r--r--Documentation/virt/kvm/locking.rst8
-rw-r--r--Documentation/x86/x86_64/boot-options.rst11
-rw-r--r--MAINTAINERS118
-rw-r--r--Makefile2
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/alpha/include/uapi/asm/socket.h2
-rw-r--r--arch/arc/Kconfig2
-rw-r--r--arch/arc/include/asm/checksum.h2
-rw-r--r--arch/arc/include/asm/perf_event.h2
-rw-r--r--arch/arc/kernel/fpu.c9
-rw-r--r--arch/arc/kernel/mcip.c2
-rw-r--r--arch/arc/kernel/unwind.c10
-rw-r--r--arch/arc/kernel/vmlinux.lds.S2
-rw-r--r--arch/arm/boot/dts/imx35.dtsi2
-rw-r--r--arch/arm/boot/dts/imx53-ppd.dts23
-rw-r--r--arch/arm/boot/dts/imx6q-novena.dts34
-rw-r--r--arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi18
-rw-r--r--arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi34
-rw-r--r--arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi34
-rw-r--r--arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi34
-rw-r--r--arch/arm/boot/dts/imx6qdl-sabrelite.dtsi34
-rw-r--r--arch/arm/boot/dts/imx7-mba7.dtsi1
-rw-r--r--arch/arm/boot/dts/imx7d-mba7.dts1
-rw-r--r--arch/arm/common/sa1111.c12
-rw-r--r--arch/arm/configs/nhk8815_defconfig8
-rw-r--r--arch/arm/crypto/curve25519-glue.c8
-rw-r--r--arch/arm/include/asm/memory.h7
-rw-r--r--arch/arm/kernel/head.S17
-rw-r--r--arch/arm/mach-ixp4xx/common.c14
-rw-r--r--arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h48
-rw-r--r--arch/arm/mach-pxa/pxa_cplds_irqs.c6
-rw-r--r--arch/arm/mach-s3c/irq-s3c24xx.c5
-rw-r--r--arch/arm/mm/mmu.c9
-rw-r--r--arch/arm/mm/pv-fixup-asm.S2
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/Makefile2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qxp-ss-conn.dtsi4
-rw-r--r--arch/arm64/boot/dts/microchip/sparx5.dtsi5
-rw-r--r--arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts12
-rw-r--r--arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts4
-rw-r--r--arch/arm64/boot/dts/qcom/sc7180.dtsi24
-rw-r--r--arch/arm64/boot/dts/qcom/sc7280.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi4
-rw-r--r--arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts4
-rw-r--r--arch/arm64/crypto/Kconfig2
-rw-r--r--arch/arm64/crypto/sm4-ce-glue.c20
-rw-r--r--arch/arm64/include/asm/acpi.h3
-rw-r--r--arch/arm64/include/asm/compat.h14
-rw-r--r--arch/arm64/include/asm/el2_setup.h3
-rw-r--r--arch/arm64/include/asm/page.h1
-rw-r--r--arch/arm64/kernel/acpi.c19
-rw-r--r--arch/arm64/kvm/arm.c12
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c2
-rw-r--r--arch/arm64/mm/init.c37
-rw-r--r--arch/m68k/Kconfig.cpu8
-rw-r--r--arch/m68k/configs/amiga_defconfig6
-rw-r--r--arch/m68k/configs/apollo_defconfig4
-rw-r--r--arch/m68k/configs/atari_defconfig6
-rw-r--r--arch/m68k/configs/bvme6000_defconfig4
-rw-r--r--arch/m68k/configs/hp300_defconfig4
-rw-r--r--arch/m68k/configs/mac_defconfig6
-rw-r--r--arch/m68k/configs/multi_defconfig6
-rw-r--r--arch/m68k/configs/mvme147_defconfig4
-rw-r--r--arch/m68k/configs/mvme16x_defconfig4
-rw-r--r--arch/m68k/configs/q40_defconfig6
-rw-r--r--arch/m68k/configs/stmark2_defconfig1
-rw-r--r--arch/m68k/configs/sun3_defconfig4
-rw-r--r--arch/m68k/configs/sun3x_defconfig4
-rw-r--r--arch/m68k/emu/nfeth.c4
-rw-r--r--arch/m68k/include/asm/atomic.h4
-rw-r--r--arch/mips/ath25/ar2315.c14
-rw-r--r--arch/mips/ath25/ar5312.c13
-rw-r--r--arch/mips/include/asm/compat.h24
-rw-r--r--arch/mips/include/asm/mach-rc32434/rb.h2
-rw-r--r--arch/mips/include/uapi/asm/socket.h2
-rw-r--r--arch/mips/lantiq/irq.c2
-rw-r--r--arch/mips/pci/pci-ar2315.c8
-rw-r--r--arch/mips/pci/pci-rt3883.c5
-rw-r--r--arch/mips/ralink/irq.c2
-rw-r--r--arch/mips/sgi-ip27/ip27-irq.c16
-rw-r--r--arch/mips/sgi-ip30/ip30-irq.c8
-rw-r--r--arch/nios2/kernel/irq.c4
-rw-r--r--arch/parisc/include/asm/compat.h14
-rw-r--r--arch/parisc/include/asm/string.h15
-rw-r--r--arch/parisc/include/uapi/asm/socket.h2
-rw-r--r--arch/parisc/kernel/parisc_ksyms.c4
-rw-r--r--arch/parisc/lib/Makefile4
-rw-r--r--arch/parisc/lib/memset.c72
-rw-r--r--arch/parisc/lib/string.S136
-rw-r--r--arch/powerpc/include/asm/book3s/32/kup.h20
-rw-r--r--arch/powerpc/include/asm/compat.h11
-rw-r--r--arch/powerpc/include/asm/interrupt.h3
-rw-r--r--arch/powerpc/include/asm/irq.h2
-rw-r--r--arch/powerpc/include/asm/ptrace.h16
-rw-r--r--arch/powerpc/kernel/asm-offsets.c31
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S7
-rw-r--r--arch/powerpc/kernel/head_book3s_32.S2
-rw-r--r--arch/powerpc/kernel/head_booke.h27
-rw-r--r--arch/powerpc/kernel/irq.c7
-rw-r--r--arch/powerpc/kernel/kprobes.c3
-rw-r--r--arch/powerpc/kernel/sysfs.c2
-rw-r--r--arch/powerpc/kernel/time.c2
-rw-r--r--arch/powerpc/kernel/traps.c9
-rw-r--r--arch/powerpc/mm/pageattr.c23
-rw-r--r--arch/powerpc/platforms/4xx/uic.c4
-rw-r--r--arch/powerpc/platforms/512x/mpc5121_ads_cpld.c23
-rw-r--r--arch/powerpc/platforms/52xx/media5200.c9
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_gpt.c7
-rw-r--r--arch/powerpc/platforms/82xx/pq2ads-pci-pic.c6
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype2
-rw-r--r--arch/powerpc/platforms/cell/interrupt.c8
-rw-r--r--arch/powerpc/platforms/cell/spider-pic.c11
-rw-r--r--arch/powerpc/platforms/embedded6xx/hlwd-pic.c15
-rw-r--r--arch/powerpc/platforms/powernv/opal-irqchip.c11
-rw-r--r--arch/powerpc/platforms/pseries/setup.c5
-rw-r--r--arch/powerpc/sysdev/fsl_mpic_err.c11
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c12
-rw-r--r--arch/powerpc/sysdev/xive/common.c35
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts4
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi2
-rw-r--r--arch/riscv/configs/defconfig1
-rw-r--r--arch/riscv/configs/rv32_defconfig1
-rw-r--r--arch/riscv/kernel/Makefile2
-rw-r--r--arch/riscv/kernel/ptrace.c4
-rw-r--r--arch/riscv/kernel/setup.c4
-rw-r--r--arch/riscv/mm/init.c2
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/Makefile3
-rw-r--r--arch/s390/boot/Makefile7
-rw-r--r--arch/s390/boot/boot.h14
-rw-r--r--arch/s390/boot/compressed/Makefile1
-rw-r--r--arch/s390/boot/compressed/decompressor.c5
-rw-r--r--arch/s390/boot/compressed/decompressor.h5
-rw-r--r--arch/s390/boot/compressed/vmlinux.lds.S35
-rw-r--r--arch/s390/boot/head.S56
-rw-r--r--arch/s390/boot/ipl_report.c6
-rw-r--r--arch/s390/boot/kaslr.c6
-rw-r--r--arch/s390/boot/mem_detect.c8
-rw-r--r--arch/s390/boot/pgm_check_info.c5
-rw-r--r--arch/s390/boot/sclp_early_core.c9
-rw-r--r--arch/s390/boot/startup.c78
-rw-r--r--arch/s390/boot/uv.c40
-rw-r--r--arch/s390/boot/uv.h19
-rw-r--r--arch/s390/configs/debug_defconfig3
-rw-r--r--arch/s390/configs/defconfig1
-rw-r--r--arch/s390/hypfs/hypfs_diag0c.c12
-rw-r--r--arch/s390/include/asm/ccwgroup.h2
-rw-r--r--arch/s390/include/asm/cio.h1
-rw-r--r--arch/s390/include/asm/compat.h14
-rw-r--r--arch/s390/include/asm/cpacf.h208
-rw-r--r--arch/s390/include/asm/cpufeature.h2
-rw-r--r--arch/s390/include/asm/ctl_reg.h17
-rw-r--r--arch/s390/include/asm/debug.h122
-rw-r--r--arch/s390/include/asm/diag.h15
-rw-r--r--arch/s390/include/asm/elf.h76
-rw-r--r--arch/s390/include/asm/extable.h4
-rw-r--r--arch/s390/include/asm/ftrace.h46
-rw-r--r--arch/s390/include/asm/ftrace.lds.h21
-rw-r--r--arch/s390/include/asm/ipl.h1
-rw-r--r--arch/s390/include/asm/kfence.h42
-rw-r--r--arch/s390/include/asm/kvm_para.h229
-rw-r--r--arch/s390/include/asm/linkage.h4
-rw-r--r--arch/s390/include/asm/lowcore.h3
-rw-r--r--arch/s390/include/asm/module.h14
-rw-r--r--arch/s390/include/asm/page.h3
-rw-r--r--arch/s390/include/asm/pci.h7
-rw-r--r--arch/s390/include/asm/pci_dma.h2
-rw-r--r--arch/s390/include/asm/pgtable.h10
-rw-r--r--arch/s390/include/asm/processor.h2
-rw-r--r--arch/s390/include/asm/qdio.h19
-rw-r--r--arch/s390/include/asm/sclp.h10
-rw-r--r--arch/s390/include/asm/sections.h4
-rw-r--r--arch/s390/include/asm/set_memory.h6
-rw-r--r--arch/s390/include/asm/setup.h46
-rw-r--r--arch/s390/include/asm/syscall.h59
-rw-r--r--arch/s390/include/asm/uv.h8
-rw-r--r--arch/s390/include/asm/vdso/gettimeofday.h22
-rw-r--r--arch/s390/kernel/Makefile2
-rw-r--r--arch/s390/kernel/asm-offsets.c8
-rw-r--r--arch/s390/kernel/crash_dump.c46
-rw-r--r--arch/s390/kernel/debug.c247
-rw-r--r--arch/s390/kernel/diag.c27
-rw-r--r--arch/s390/kernel/dis.c2
-rw-r--r--arch/s390/kernel/early.c4
-rw-r--r--arch/s390/kernel/entry.S11
-rw-r--r--arch/s390/kernel/entry.h11
-rw-r--r--arch/s390/kernel/ftrace.c222
-rw-r--r--arch/s390/kernel/ftrace.h26
-rw-r--r--arch/s390/kernel/head64.S17
-rw-r--r--arch/s390/kernel/ipl.c5
-rw-r--r--arch/s390/kernel/ipl_vmparm.c2
-rw-r--r--arch/s390/kernel/irq.c4
-rw-r--r--arch/s390/kernel/jump_label.c2
-rw-r--r--arch/s390/kernel/machine_kexec.c5
-rw-r--r--arch/s390/kernel/module.c45
-rw-r--r--arch/s390/kernel/os_info.c2
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c4
-rw-r--r--arch/s390/kernel/processor.c177
-rw-r--r--arch/s390/kernel/setup.c286
-rw-r--r--arch/s390/kernel/signal.c6
-rw-r--r--arch/s390/kernel/smp.c64
-rw-r--r--arch/s390/kernel/text_amode31.S (renamed from arch/s390/boot/text_dma.S)60
-rw-r--r--arch/s390/kernel/topology.c4
-rw-r--r--arch/s390/kernel/traps.c2
-rw-r--r--arch/s390/kernel/uv.c15
-rw-r--r--arch/s390/kernel/vdso32/Makefile1
-rw-r--r--arch/s390/kernel/vdso64/Makefile1
-rw-r--r--arch/s390/kernel/vmlinux.lds.S35
-rw-r--r--arch/s390/lib/delay.c11
-rw-r--r--arch/s390/mm/dump_pagetables.c16
-rw-r--r--arch/s390/mm/fault.c13
-rw-r--r--arch/s390/mm/init.c3
-rw-r--r--arch/s390/mm/kasan_init.c43
-rw-r--r--arch/s390/mm/maccess.c4
-rw-r--r--arch/s390/mm/page-states.c43
-rw-r--r--arch/s390/mm/pageattr.c19
-rw-r--r--arch/s390/mm/vmem.c2
-rw-r--r--arch/s390/pci/pci.c79
-rw-r--r--arch/s390/pci/pci_bus.c8
-rw-r--r--arch/s390/pci/pci_bus.h5
-rw-r--r--arch/s390/pci/pci_clp.c186
-rw-r--r--arch/s390/pci/pci_dma.c25
-rw-r--r--arch/s390/pci/pci_event.c5
-rw-r--r--arch/s390/pci/pci_irq.c4
-rw-r--r--arch/s390/pci/pci_sysfs.c19
-rw-r--r--arch/s390/purgatory/Makefile1
-rw-r--r--arch/s390/tools/opcodes.txt18
-rw-r--r--arch/sh/boards/mach-se/7343/irq.c2
-rw-r--r--arch/sh/boards/mach-se/7722/irq.c2
-rw-r--r--arch/sh/boards/mach-x3proto/gpio.c2
-rw-r--r--arch/sparc/include/asm/compat.h14
-rw-r--r--arch/sparc/include/uapi/asm/socket.h2
-rw-r--r--arch/um/drivers/ubd_kern.c3
-rw-r--r--arch/um/drivers/vector_kern.c8
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Makefile53
-rw-r--r--arch/x86/boot/Makefile7
-rw-r--r--arch/x86/boot/compressed/efi_thunk_64.S30
-rw-r--r--arch/x86/boot/compressed/head_64.S3
-rw-r--r--arch/x86/boot/compressed/kaslr.c2
-rw-r--r--arch/x86/crypto/Makefile6
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c5
-rw-r--r--arch/x86/crypto/sm4-aesni-avx-asm_64.S589
-rw-r--r--arch/x86/crypto/sm4-aesni-avx2-asm_64.S497
-rw-r--r--arch/x86/crypto/sm4-avx.h24
-rw-r--r--arch/x86/crypto/sm4_aesni_avx2_glue.c169
-rw-r--r--arch/x86/crypto/sm4_aesni_avx_glue.c487
-rw-r--r--arch/x86/events/Kconfig10
-rw-r--r--arch/x86/events/amd/Makefile5
-rw-r--r--arch/x86/events/amd/ibs.c32
-rw-r--r--arch/x86/events/amd/power.c1
-rw-r--r--arch/x86/events/amd/uncore.c40
-rw-r--r--arch/x86/events/core.c4
-rw-r--r--arch/x86/events/intel/core.c8
-rw-r--r--arch/x86/events/intel/pt.c6
-rw-r--r--arch/x86/events/intel/uncore.c45
-rw-r--r--arch/x86/events/intel/uncore.h4
-rw-r--r--arch/x86/events/intel/uncore_discovery.c42
-rw-r--r--arch/x86/events/intel/uncore_discovery.h21
-rw-r--r--arch/x86/events/intel/uncore_snbep.c585
-rw-r--r--arch/x86/include/asm/amd-ibs.h132
-rw-r--r--arch/x86/include/asm/compat.h14
-rw-r--r--arch/x86/include/asm/i8259.h2
-rw-r--r--arch/x86/include/asm/kfence.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h7
-rw-r--r--arch/x86/include/asm/mce.h1
-rw-r--r--arch/x86/include/asm/nospec-branch.h2
-rw-r--r--arch/x86/include/asm/pc-conf-reg.h33
-rw-r--r--arch/x86/include/asm/processor-cyrix.h8
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/signal.h1
-rw-r--r--arch/x86/include/asm/svm.h2
-rw-r--r--arch/x86/include/asm/thread_info.h6
-rw-r--r--arch/x86/include/asm/tlbflush.h2
-rw-r--r--arch/x86/kernel/acpi/boot.c12
-rw-r--r--arch/x86/kernel/amd_nb.c5
-rw-r--r--arch/x86/kernel/apic/apic.c9
-rw-r--r--arch/x86/kernel/apic/io_apic.c8
-rw-r--r--arch/x86/kernel/apic/msi.c11
-rw-r--r--arch/x86/kernel/apic/vector.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/bugs.c70
-rw-r--r--arch/x86/kernel/cpu/common.c6
-rw-r--r--arch/x86/kernel/cpu/mce/core.c11
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c8
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c18
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.c8
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c276
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c163
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h231
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c71
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c12
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c461
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/i8259.c8
-rw-r--r--arch/x86/kernel/mpparse.c3
-rw-r--r--arch/x86/kernel/reboot.c3
-rw-r--r--arch/x86/kernel/setup.c10
-rw-r--r--arch/x86/kernel/smpboot.c10
-rw-r--r--arch/x86/kvm/cpuid.c28
-rw-r--r--arch/x86/kvm/hyperv.c2
-rw-r--r--arch/x86/kvm/i8259.c20
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/mmu/mmu.c28
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c35
-rw-r--r--arch/x86/kvm/svm/nested.c13
-rw-r--r--arch/x86/kvm/svm/svm.c9
-rw-r--r--arch/x86/kvm/vmx/nested.c56
-rw-r--r--arch/x86/kvm/vmx/vmx.h2
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/pc-conf-reg.c13
-rw-r--r--arch/x86/mm/mmio-mod.c4
-rw-r--r--arch/x86/mm/tlb.c107
-rw-r--r--arch/x86/net/bpf_jit_comp.c19
-rw-r--r--arch/x86/pci/irq.c279
-rw-r--r--arch/x86/power/cpu.c31
-rw-r--r--arch/x86/tools/chkobjdump.awk1
-rw-r--r--arch/x86/tools/relocs.c37
-rw-r--r--arch/x86/tools/relocs.h1
-rw-r--r--arch/xtensa/kernel/irq.c4
-rw-r--r--block/Kconfig14
-rw-r--r--block/Kconfig.iosched6
-rw-r--r--block/Makefile4
-rw-r--r--block/bfq-iosched.c17
-rw-r--r--block/bfq-iosched.h6
-rw-r--r--block/bfq-wf2q.c6
-rw-r--r--block/bio-integrity.c21
-rw-r--r--block/bio.c231
-rw-r--r--block/blk-cgroup.c153
-rw-r--r--block/blk-core.c21
-rw-r--r--block/blk-crypto.c2
-rw-r--r--block/blk-flush.c13
-rw-r--r--block/blk-integrity.c12
-rw-r--r--block/blk-iocost.c31
-rw-r--r--block/blk-iolatency.c38
-rw-r--r--block/blk-map.c2
-rw-r--r--block/blk-merge.c20
-rw-r--r--block/blk-mq-sysfs.c55
-rw-r--r--block/blk-mq.c48
-rw-r--r--block/blk-settings.c34
-rw-r--r--block/blk-sysfs.c35
-rw-r--r--block/blk-throttle.c32
-rw-r--r--block/blk-wbt.c8
-rw-r--r--block/blk-zoned.c6
-rw-r--r--block/blk.h33
-rw-r--r--block/bounce.c39
-rw-r--r--block/cmdline-parser.c255
-rw-r--r--block/disk-events.c69
-rw-r--r--block/elevator.c7
-rw-r--r--block/genhd.c385
-rw-r--r--block/holder.c174
-rw-r--r--block/ioctl.c16
-rw-r--r--block/ioprio.c9
-rw-r--r--block/mq-deadline-cgroup.c126
-rw-r--r--block/mq-deadline-cgroup.h114
-rw-r--r--block/mq-deadline.c (renamed from block/mq-deadline-main.c)121
-rw-r--r--block/partitions/Kconfig1
-rw-r--r--block/partitions/acorn.c4
-rw-r--r--block/partitions/aix.c20
-rw-r--r--block/partitions/amiga.c7
-rw-r--r--block/partitions/atari.c4
-rw-r--r--block/partitions/check.h2
-rw-r--r--block/partitions/cmdline.c273
-rw-r--r--block/partitions/core.c73
-rw-r--r--block/partitions/efi.c48
-rw-r--r--block/partitions/ibm.c4
-rw-r--r--block/partitions/ldm.c18
-rw-r--r--block/partitions/mac.c2
-rw-r--r--block/partitions/msdos.c6
-rw-r--r--block/partitions/sgi.c5
-rw-r--r--block/partitions/sun.c5
-rw-r--r--block/t10-pi.c16
-rw-r--r--certs/Kconfig26
-rw-r--r--certs/Makefile21
-rw-r--r--crypto/Kconfig46
-rw-r--r--crypto/Makefile1
-rw-r--r--crypto/asymmetric_keys/pkcs7_parser.c8
-rw-r--r--crypto/ecc.h5
-rw-r--r--crypto/sha512_generic.c3
-rw-r--r--crypto/skcipher.c2
-rw-r--r--crypto/sm4_generic.c180
-rw-r--r--crypto/tcrypt.c100
-rw-r--r--crypto/testmgr.c29
-rw-r--r--crypto/testmgr.h148
-rw-r--r--crypto/wp512.c2
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/accessibility/speakup/i18n.c14
-rw-r--r--drivers/accessibility/speakup/speakup_soft.c15
-rw-r--r--drivers/acpi/Kconfig4
-rw-r--r--drivers/acpi/acpi_configfs.c54
-rw-r--r--drivers/acpi/acpi_fpdt.c7
-rw-r--r--drivers/acpi/acpi_pad.c4
-rw-r--r--drivers/acpi/acpi_processor.c8
-rw-r--r--drivers/acpi/acpica/dswexec.c5
-rw-r--r--drivers/acpi/bus.c12
-rw-r--r--drivers/acpi/button.c11
-rw-r--r--drivers/acpi/dptf/dptf_pch_fivr.c9
-rw-r--r--drivers/acpi/glue.c51
-rw-r--r--drivers/acpi/nfit/core.c3
-rw-r--r--drivers/acpi/numa/Kconfig2
-rw-r--r--drivers/acpi/numa/srat.c2
-rw-r--r--drivers/acpi/osl.c23
-rw-r--r--drivers/acpi/platform_profile.c3
-rw-r--r--drivers/acpi/pmic/intel_pmic_xpower.c47
-rw-r--r--drivers/acpi/power.c38
-rw-r--r--drivers/acpi/prmt.c6
-rw-r--r--drivers/acpi/processor_idle.c4
-rw-r--r--drivers/acpi/spcr.c1
-rw-r--r--drivers/acpi/x86/s2idle.c4
-rw-r--r--drivers/android/binder.c4
-rw-r--r--drivers/android/binderfs.c39
-rw-r--r--drivers/ata/libahci.c1
-rw-r--r--drivers/ata/libata-core.c272
-rw-r--r--drivers/ata/libata-sata.c62
-rw-r--r--drivers/ata/libata-scsi.c60
-rw-r--r--drivers/ata/sata_dwc_460ex.c12
-rw-r--r--drivers/atm/horizon.c6
-rw-r--r--drivers/atm/idt77252.c2
-rw-r--r--drivers/base/base.h3
-rw-r--r--drivers/base/core.c36
-rw-r--r--drivers/base/platform-msi.c20
-rw-r--r--drivers/base/power/domain.c40
-rw-r--r--drivers/base/property.c30
-rw-r--r--drivers/base/regmap/internal.h4
-rw-r--r--drivers/base/regmap/regmap-debugfs.c2
-rw-r--r--drivers/base/regmap/regmap-mmio.c2
-rw-r--r--drivers/base/regmap/regmap.c49
-rw-r--r--drivers/base/swnode.c61
-rw-r--r--drivers/bcma/main.c6
-rw-r--r--drivers/bcma/scan.c7
-rw-r--r--drivers/block/Kconfig4
-rw-r--r--drivers/block/brd.c3
-rw-r--r--drivers/block/cryptoloop.c2
-rw-r--r--drivers/block/drbd/drbd_nl.c2
-rw-r--r--drivers/block/drbd/drbd_req.c5
-rw-r--r--drivers/block/floppy.c30
-rw-r--r--drivers/block/loop.c13
-rw-r--r--drivers/block/nbd.c192
-rw-r--r--drivers/block/null_blk/main.c7
-rw-r--r--drivers/block/paride/pd.c2
-rw-r--r--drivers/block/pktcdvd.c8
-rw-r--r--drivers/block/ps3disk.c18
-rw-r--r--drivers/block/ps3vram.c2
-rw-r--r--drivers/block/rbd.c18
-rw-r--r--drivers/block/rnbd/rnbd-clt-sysfs.c33
-rw-r--r--drivers/block/rnbd/rnbd-clt.c2
-rw-r--r--drivers/block/rnbd/rnbd-srv-sysfs.c14
-rw-r--r--drivers/block/sx8.c2
-rw-r--r--drivers/block/virtio_blk.c51
-rw-r--r--drivers/block/xen-blkfront.c1
-rw-r--r--drivers/bluetooth/btbcm.c1
-rw-r--r--drivers/bluetooth/btintel.c1314
-rw-r--r--drivers/bluetooth/btintel.h119
-rw-r--r--drivers/bluetooth/btmrvl_sdio.c29
-rw-r--r--drivers/bluetooth/btrsi.c2
-rw-r--r--drivers/bluetooth/btrtl.c10
-rw-r--r--drivers/bluetooth/btusb.c1398
-rw-r--r--drivers/bluetooth/hci_bcm.c6
-rw-r--r--drivers/bluetooth/hci_h5.c116
-rw-r--r--drivers/bluetooth/hci_serdev.c3
-rw-r--r--drivers/bluetooth/hci_uart.h7
-rw-r--r--drivers/bus/fsl-mc/fsl-mc-bus.c138
-rw-r--r--drivers/bus/mhi/core/boot.c17
-rw-r--r--drivers/bus/mhi/core/init.c93
-rw-r--r--drivers/bus/mhi/core/internal.h22
-rw-r--r--drivers/bus/mhi/core/main.c15
-rw-r--r--drivers/bus/mhi/core/pm.c34
-rw-r--r--drivers/bus/mhi/pci_generic.c42
-rw-r--r--drivers/bus/ti-sysc.c4
-rw-r--r--drivers/char/Kconfig4
-rw-r--r--drivers/char/hw_random/Kconfig14
-rw-r--r--drivers/char/hw_random/Makefile1
-rw-r--r--drivers/char/hw_random/amd-rng.c8
-rw-r--r--drivers/char/hw_random/arm_smccc_trng.c123
-rw-r--r--drivers/char/hw_random/geode-rng.c8
-rw-r--r--drivers/char/hw_random/intel-rng.c8
-rw-r--r--drivers/char/hw_random/via-rng.c8
-rw-r--r--drivers/char/mwave/tp3780i.c4
-rw-r--r--drivers/char/pcmcia/synclink_cs.c23
-rw-r--r--drivers/char/tpm/Kconfig1
-rw-r--r--drivers/char/tpm/tpm_ibmvtpm.c26
-rw-r--r--drivers/char/tpm/tpm_ibmvtpm.h2
-rw-r--r--drivers/char/tpm/tpm_tis_i2c_cr50.c12
-rw-r--r--drivers/clk/imx/clk-imx6q.c2
-rw-r--r--drivers/clk/qcom/gdsc.c54
-rw-r--r--drivers/clk/renesas/rcar-usb2-clock-sel.c2
-rw-r--r--drivers/clocksource/exynos_mct.c16
-rw-r--r--drivers/clocksource/ingenic-sysost.c13
-rw-r--r--drivers/clocksource/sh_cmt.c30
-rw-r--r--drivers/clocksource/timer-fttmr010.c32
-rw-r--r--drivers/clocksource/timer-mediatek.c8
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c4
-rw-r--r--drivers/cpufreq/armada-37xx-cpufreq.c6
-rw-r--r--drivers/cpufreq/cpufreq-dt-platdev.c2
-rw-r--r--drivers/cpufreq/cpufreq.c6
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c4
-rw-r--r--drivers/cpufreq/intel_pstate.c43
-rw-r--r--drivers/cpufreq/powernow-k8.c6
-rw-r--r--drivers/cpufreq/powernv-cpufreq.c4
-rw-r--r--drivers/cpufreq/scmi-cpufreq.c2
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c9
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c3
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c9
-rw-r--r--drivers/crypto/atmel-aes.c154
-rw-r--r--drivers/crypto/atmel-tdes.c66
-rw-r--r--drivers/crypto/ccp/sev-dev.c49
-rw-r--r--drivers/crypto/ccp/sp-pci.c19
-rw-r--r--drivers/crypto/hisilicon/hpre/hpre_main.c123
-rw-r--r--drivers/crypto/hisilicon/qm.c430
-rw-r--r--drivers/crypto/hisilicon/qm.h8
-rw-r--r--drivers/crypto/hisilicon/sec2/sec.h5
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_main.c138
-rw-r--r--drivers/crypto/hisilicon/zip/zip_main.c83
-rw-r--r--drivers/crypto/mxs-dcp.c81
-rw-r--r--drivers/crypto/omap-aes.c8
-rw-r--r--drivers/crypto/omap-crypto.c2
-rw-r--r--drivers/crypto/omap-des.c8
-rw-r--r--drivers/crypto/omap-sham.c68
-rw-r--r--drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c8
-rw-r--r--drivers/crypto/qat/qat_4xxx/adf_drv.c14
-rw-r--r--drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c19
-rw-r--r--drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h1
-rw-r--r--drivers/crypto/qat/qat_c3xxx/adf_drv.c21
-rw-r--r--drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c14
-rw-r--r--drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h1
-rw-r--r--drivers/crypto/qat/qat_c3xxxvf/adf_drv.c16
-rw-r--r--drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c19
-rw-r--r--drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h1
-rw-r--r--drivers/crypto/qat/qat_c62x/adf_drv.c21
-rw-r--r--drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c14
-rw-r--r--drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h1
-rw-r--r--drivers/crypto/qat/qat_c62xvf/adf_drv.c16
-rw-r--r--drivers/crypto/qat/qat_common/adf_accel_devices.h8
-rw-r--r--drivers/crypto/qat/qat_common/adf_aer.c2
-rw-r--r--drivers/crypto/qat/qat_common/adf_common_drv.h21
-rw-r--r--drivers/crypto/qat/qat_common/adf_init.c13
-rw-r--r--drivers/crypto/qat/qat_common/adf_isr.c42
-rw-r--r--drivers/crypto/qat/qat_common/adf_pf2vf_msg.c78
-rw-r--r--drivers/crypto/qat/qat_common/adf_pf2vf_msg.h2
-rw-r--r--drivers/crypto/qat/qat_common/adf_sriov.c8
-rw-r--r--drivers/crypto/qat/qat_common/adf_vf2pf_msg.c12
-rw-r--r--drivers/crypto/qat/qat_common/adf_vf_isr.c64
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c19
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h1
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_drv.c21
-rw-r--r--drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c14
-rw-r--r--drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h1
-rw-r--r--drivers/crypto/qat/qat_dh895xccvf/adf_drv.c16
-rw-r--r--drivers/crypto/virtio/virtio_crypto_core.c4
-rw-r--r--drivers/dax/super.c2
-rw-r--r--drivers/dio/dio.c2
-rw-r--r--drivers/edac/altera_edac.c51
-rw-r--r--drivers/edac/amd64_edac.c21
-rw-r--r--drivers/edac/edac_mc.c1
-rw-r--r--drivers/edac/i10nm_base.c152
-rw-r--r--drivers/edac/mce_amd.c3
-rw-r--r--drivers/edac/skx_base.c3
-rw-r--r--drivers/edac/skx_common.c9
-rw-r--r--drivers/edac/skx_common.h7
-rw-r--r--drivers/firmware/efi/cper.c13
-rw-r--r--drivers/firmware/efi/libstub/arm64-stub.c71
-rw-r--r--drivers/firmware/efi/libstub/randomalloc.c2
-rw-r--r--drivers/firmware/iscsi_ibft.c10
-rw-r--r--drivers/firmware/iscsi_ibft_find.c48
-rw-r--r--drivers/firmware/raspberrypi.c10
-rw-r--r--drivers/firmware/smccc/smccc.c17
-rw-r--r--drivers/firmware/xilinx/zynqmp.c23
-rw-r--r--drivers/fpga/Kconfig11
-rw-r--r--drivers/fpga/Makefile1
-rw-r--r--drivers/fpga/altera-cvp.c2
-rw-r--r--drivers/fpga/altera-freeze-bridge.c2
-rw-r--r--drivers/fpga/dfl-fme-mgr.c6
-rw-r--r--drivers/fpga/dfl-fme-pr.c2
-rw-r--r--drivers/fpga/dfl-n3000-nios.c2
-rw-r--r--drivers/fpga/dfl-pci.c5
-rw-r--r--drivers/fpga/dfl.c27
-rw-r--r--drivers/fpga/dfl.h3
-rw-r--r--drivers/fpga/fpga-bridge.c8
-rw-r--r--drivers/fpga/fpga-mgr.c111
-rw-r--r--drivers/fpga/stratix10-soc.c6
-rw-r--r--drivers/fpga/ts73xx-fpga.c6
-rw-r--r--drivers/fpga/versal-fpga.c83
-rw-r--r--drivers/fpga/xilinx-pr-decoupler.c2
-rw-r--r--drivers/fpga/xilinx-spi.c2
-rw-r--r--drivers/fpga/zynq-fpga.c6
-rw-r--r--drivers/fpga/zynqmp-fpga.c10
-rw-r--r--drivers/gpio/Kconfig8
-rw-r--r--drivers/gpio/Makefile1
-rw-r--r--drivers/gpio/gpio-104-dio-48e.c4
-rw-r--r--drivers/gpio/gpio-104-idi-48.c4
-rw-r--r--drivers/gpio/gpio-104-idio-16.c2
-rw-r--r--drivers/gpio/gpio-altera.c11
-rw-r--r--drivers/gpio/gpio-aspeed-sgpio.c9
-rw-r--r--drivers/gpio/gpio-aspeed.c9
-rw-r--r--drivers/gpio/gpio-ath79.c7
-rw-r--r--drivers/gpio/gpio-bcm-kona.c6
-rw-r--r--drivers/gpio/gpio-brcmstb.c5
-rw-r--r--drivers/gpio/gpio-cadence.c2
-rw-r--r--drivers/gpio/gpio-davinci.c3
-rw-r--r--drivers/gpio/gpio-dln2.c22
-rw-r--r--drivers/gpio/gpio-em.c2
-rw-r--r--drivers/gpio/gpio-ep93xx.c8
-rw-r--r--drivers/gpio/gpio-ftgpio010.c3
-rw-r--r--drivers/gpio/gpio-hisi.c4
-rw-r--r--drivers/gpio/gpio-hlwd.c7
-rw-r--r--drivers/gpio/gpio-merrifield.c8
-rw-r--r--drivers/gpio/gpio-mpc8xxx.c2
-rw-r--r--drivers/gpio/gpio-mt7621.c4
-rw-r--r--drivers/gpio/gpio-mxc.c2
-rw-r--r--drivers/gpio/gpio-mxs.c2
-rw-r--r--drivers/gpio/gpio-omap.c3
-rw-r--r--drivers/gpio/gpio-pci-idio-16.c2
-rw-r--r--drivers/gpio/gpio-pcie-idio-24.c3
-rw-r--r--drivers/gpio/gpio-pl061.c4
-rw-r--r--drivers/gpio/gpio-pxa.c9
-rw-r--r--drivers/gpio/gpio-rcar.c4
-rw-r--r--drivers/gpio/gpio-rda.c8
-rw-r--r--drivers/gpio/gpio-realtek-otto.c7
-rw-r--r--drivers/gpio/gpio-rockchip.c771
-rw-r--r--drivers/gpio/gpio-sch.c2
-rw-r--r--drivers/gpio/gpio-sodaville.c2
-rw-r--r--drivers/gpio/gpio-sprd.c12
-rw-r--r--drivers/gpio/gpio-tb10x.c2
-rw-r--r--drivers/gpio/gpio-tegra.c9
-rw-r--r--drivers/gpio/gpio-tegra186.c9
-rw-r--r--drivers/gpio/gpio-tqmx86.c10
-rw-r--r--drivers/gpio/gpio-vf610.c2
-rw-r--r--drivers/gpio/gpio-ws16c48.c4
-rw-r--r--drivers/gpio/gpio-xgs-iproc.c2
-rw-r--r--drivers/gpio/gpio-xilinx.c2
-rw-r--r--drivers/gpio/gpio-xlp.c3
-rw-r--r--drivers/gpio/gpio-zynq.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c8
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c7
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c96
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h1
-rw-r--r--drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h5
-rw-r--r--drivers/gpu/drm/amd/include/atomfirmware.h2
-rw-r--r--drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_pmfw.h4
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c15
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c3
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c2
-rw-r--r--drivers/gpu/drm/drm_ioc32.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c24
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c34
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power.c16
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c9
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_link_training.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c9
-rw-r--r--drivers/gpu/drm/i915/gvt/handlers.c1
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c19
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c21
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h16
-rw-r--r--drivers/gpu/drm/imx/ipuv3-plane.c2
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_color.c2
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_ovl.c2
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dpi.c6
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_crtc.c3
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c2
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_plane.c60
-rw-r--r--drivers/gpu/drm/meson/meson_registers.h5
-rw-r--r--drivers/gpu/drm/meson/meson_viu.c7
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c15
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_mdss.c3
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/disp.c27
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/head.c13
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/head.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl0080.h3
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/class.h2
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/client.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/driver.h2
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/client.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/ioctl.h2
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_abi16.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_chan.c19
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drm.c3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_mem.c15
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_nvif.c4
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_svm.c9
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_usif.c57
-rw-r--r--drivers/gpu/drm/nouveau/nvif/client.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvif/object.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/ioctl.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/base.c21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/user.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c15
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c94
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c92
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c27
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c16
-rw-r--r--drivers/gpu/drm/ttm/ttm_device.c2
-rw-r--r--drivers/gpu/ipu-v3/ipu-common.c11
-rw-r--r--drivers/gpu/ipu-v3/ipu-cpmem.c30
-rw-r--r--drivers/hwmon/Kconfig20
-rw-r--r--drivers/hwmon/Makefile3
-rw-r--r--drivers/hwmon/adt7470.c1051
-rw-r--r--drivers/hwmon/aquacomputer_d5next.c363
-rw-r--r--drivers/hwmon/axi-fan-control.c117
-rw-r--r--drivers/hwmon/dell-smm-hwmon.c910
-rw-r--r--drivers/hwmon/fam15h_power.c4
-rw-r--r--drivers/hwmon/intel-m10-bmc-hwmon.c116
-rw-r--r--drivers/hwmon/k10temp.c30
-rw-r--r--drivers/hwmon/ntc_thermistor.c20
-rw-r--r--drivers/hwmon/pmbus/bpa-rs600.c68
-rw-r--r--drivers/hwmon/pmbus/ibm-cffps.c6
-rw-r--r--drivers/hwmon/sbrmi.c359
-rw-r--r--drivers/hwmon/w83627ehf.c122
-rw-r--r--drivers/hwmon/w83781d.c11
-rw-r--r--drivers/hwtracing/coresight/Kconfig1
-rw-r--r--drivers/hwtracing/coresight/Makefile7
-rw-r--r--drivers/hwtracing/coresight/coresight-cfg-afdo.c153
-rw-r--r--drivers/hwtracing/coresight/coresight-cfg-preload.c31
-rw-r--r--drivers/hwtracing/coresight/coresight-cfg-preload.h13
-rw-r--r--drivers/hwtracing/coresight/coresight-config.c272
-rw-r--r--drivers/hwtracing/coresight/coresight-config.h253
-rw-r--r--drivers/hwtracing/coresight/coresight-core.c12
-rw-r--r--drivers/hwtracing/coresight/coresight-cpu-debug.c4
-rw-r--r--drivers/hwtracing/coresight/coresight-etm-perf.c150
-rw-r--r--drivers/hwtracing/coresight/coresight-etm-perf.h12
-rw-r--r--drivers/hwtracing/coresight/coresight-etm4x-cfg.c182
-rw-r--r--drivers/hwtracing/coresight/coresight-etm4x-cfg.h30
-rw-r--r--drivers/hwtracing/coresight/coresight-etm4x-core.c38
-rw-r--r--drivers/hwtracing/coresight/coresight-etm4x-sysfs.c3
-rw-r--r--drivers/hwtracing/coresight/coresight-syscfg-configfs.c396
-rw-r--r--drivers/hwtracing/coresight/coresight-syscfg-configfs.h45
-rw-r--r--drivers/hwtracing/coresight/coresight-syscfg.c847
-rw-r--r--drivers/hwtracing/coresight/coresight-syscfg.h81
-rw-r--r--drivers/i2c/busses/Kconfig20
-rw-r--r--drivers/i2c/busses/Makefile2
-rw-r--r--drivers/i2c/busses/i2c-at91-core.c19
-rw-r--r--drivers/i2c/busses/i2c-at91-master.c4
-rw-r--r--drivers/i2c/busses/i2c-bcm-iproc.c4
-rw-r--r--drivers/i2c/busses/i2c-cadence.c38
-rw-r--r--drivers/i2c/busses/i2c-designware-common.c8
-rw-r--r--drivers/i2c/busses/i2c-designware-core.h4
-rw-r--r--drivers/i2c/busses/i2c-designware-platdrv.c5
-rw-r--r--drivers/i2c/busses/i2c-highlander.c2
-rw-r--r--drivers/i2c/busses/i2c-hix5hd2.c2
-rw-r--r--drivers/i2c/busses/i2c-i801.c46
-rw-r--r--drivers/i2c/busses/i2c-imx.c6
-rw-r--r--drivers/i2c/busses/i2c-iop3xx.c6
-rw-r--r--drivers/i2c/busses/i2c-mt65xx.c2
-rw-r--r--drivers/i2c/busses/i2c-mxs.c4
-rw-r--r--drivers/i2c/busses/i2c-parport.c36
-rw-r--r--drivers/i2c/busses/i2c-pmcmsp.c600
-rw-r--r--drivers/i2c/busses/i2c-qup.c2
-rw-r--r--drivers/i2c/busses/i2c-s3c2410.c2
-rw-r--r--drivers/i2c/busses/i2c-sh_mobile.c4
-rw-r--r--drivers/i2c/busses/i2c-sun6i-p2wi.c2
-rw-r--r--drivers/i2c/busses/i2c-synquacer.c2
-rw-r--r--drivers/i2c/busses/i2c-virtio.c290
-rw-r--r--drivers/i2c/busses/i2c-xlp9xx.c2
-rw-r--r--drivers/i2c/i2c-dev.c27
-rw-r--r--drivers/iio/accel/Kconfig2
-rw-r--r--drivers/iio/accel/fxls8962af-core.c2
-rw-r--r--drivers/iio/adc/palmas_gpadc.c4
-rw-r--r--drivers/iio/adc/rn5t618-adc.c23
-rw-r--r--drivers/iio/adc/ti-ads7950.c1
-rw-r--r--drivers/iio/humidity/hdc100x.c6
-rw-r--r--drivers/iio/imu/adis.c3
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c3
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c1
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c1
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c1
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c9
-rw-r--r--drivers/infiniband/hw/irdma/Kconfig2
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c6
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c10
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c77
-rw-r--r--drivers/infiniband/hw/mlx5/main.c47
-rw-r--r--drivers/infiniband/hw/mlx5/std_types.c10
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mcast.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c8
-rw-r--r--drivers/interconnect/core.c3
-rw-r--r--drivers/interconnect/qcom/Kconfig9
-rw-r--r--drivers/interconnect/qcom/Makefile2
-rw-r--r--drivers/interconnect/qcom/icc-rpmh.c103
-rw-r--r--drivers/interconnect/qcom/icc-rpmh.h2
-rw-r--r--drivers/interconnect/qcom/osm-l3.c60
-rw-r--r--drivers/interconnect/qcom/sc7180.c96
-rw-r--r--drivers/interconnect/qcom/sc7280.c96
-rw-r--r--drivers/interconnect/qcom/sc8180x.c626
-rw-r--r--drivers/interconnect/qcom/sc8180x.h174
-rw-r--r--drivers/interconnect/qcom/sdm845.c99
-rw-r--r--drivers/interconnect/qcom/sdx55.c96
-rw-r--r--drivers/interconnect/qcom/sm8150.c96
-rw-r--r--drivers/interconnect/qcom/sm8250.c96
-rw-r--r--drivers/interconnect/qcom/sm8350.c97
-rw-r--r--drivers/iommu/dma-iommu.c1
-rw-r--r--drivers/iommu/intel/pasid.c10
-rw-r--r--drivers/iommu/intel/pasid.h6
-rw-r--r--drivers/iommu/intel/svm.c3
-rw-r--r--drivers/iommu/iommu.c3
-rw-r--r--drivers/iommu/s390-iommu.c18
-rw-r--r--drivers/ipack/carriers/tpci200.c60
-rw-r--r--drivers/irqchip/irq-alpine-msi.c6
-rw-r--r--drivers/irqchip/irq-apple-aic.c2
-rw-r--r--drivers/irqchip/irq-gic-v2m.c5
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c6
-rw-r--r--drivers/irqchip/irq-gic-v3-mbi.c5
-rw-r--r--drivers/irqchip/irq-gic-v3.c84
-rw-r--r--drivers/irqchip/irq-loongson-pch-pic.c19
-rw-r--r--drivers/irqchip/irq-ls-scfg-msi.c5
-rw-r--r--drivers/irqchip/irq-mtk-sysirq.c1
-rw-r--r--drivers/irqchip/irq-mvebu-gicp.c4
-rw-r--r--drivers/irqchip/irq-mvebu-odmi.c5
-rw-r--r--drivers/irqchip/irq-partition-percpu.c3
-rw-r--r--drivers/irqchip/qcom-pdc.c68
-rw-r--r--drivers/leds/Kconfig59
-rw-r--r--drivers/leds/Makefile6
-rw-r--r--drivers/leds/blink/leds-lgm-sso.c39
-rw-r--r--drivers/leds/flash/Kconfig53
-rw-r--r--drivers/leds/flash/Makefile6
-rw-r--r--drivers/leds/flash/leds-aat1290.c (renamed from drivers/leds/leds-aat1290.c)0
-rw-r--r--drivers/leds/flash/leds-as3645a.c (renamed from drivers/leds/leds-as3645a.c)0
-rw-r--r--drivers/leds/flash/leds-ktd2692.c (renamed from drivers/leds/leds-ktd2692.c)0
-rw-r--r--drivers/leds/flash/leds-lm3601x.c (renamed from drivers/leds/leds-lm3601x.c)0
-rw-r--r--drivers/leds/flash/leds-max77693.c (renamed from drivers/leds/leds-max77693.c)0
-rw-r--r--drivers/leds/flash/leds-rt8515.c4
-rw-r--r--drivers/leds/flash/leds-sgm3140.c (renamed from drivers/leds/leds-sgm3140.c)0
-rw-r--r--drivers/leds/led-class-flash.c6
-rw-r--r--drivers/leds/led-class.c10
-rw-r--r--drivers/leds/led-core.c15
-rw-r--r--drivers/leds/leds-el15203000.c3
-rw-r--r--drivers/leds/leds-gpio.c12
-rw-r--r--drivers/leds/leds-is31fl32xx.c1
-rw-r--r--drivers/leds/leds-lm3692x.c3
-rw-r--r--drivers/leds/leds-lm3697.c16
-rw-r--r--drivers/leds/leds-lt3593.c5
-rw-r--r--drivers/leds/leds-pca955x.c232
-rw-r--r--drivers/leds/leds-pwm.c49
-rw-r--r--drivers/leds/leds.h1
-rw-r--r--drivers/leds/trigger/Kconfig2
-rw-r--r--drivers/leds/trigger/ledtrig-audio.c37
-rw-r--r--drivers/lightnvm/Kconfig44
-rw-r--r--drivers/lightnvm/Makefile11
-rw-r--r--drivers/lightnvm/core.c1440
-rw-r--r--drivers/lightnvm/pblk-cache.c137
-rw-r--r--drivers/lightnvm/pblk-core.c2151
-rw-r--r--drivers/lightnvm/pblk-gc.c726
-rw-r--r--drivers/lightnvm/pblk-init.c1324
-rw-r--r--drivers/lightnvm/pblk-map.c210
-rw-r--r--drivers/lightnvm/pblk-rb.c858
-rw-r--r--drivers/lightnvm/pblk-read.c474
-rw-r--r--drivers/lightnvm/pblk-recovery.c874
-rw-r--r--drivers/lightnvm/pblk-rl.c254
-rw-r--r--drivers/lightnvm/pblk-sysfs.c728
-rw-r--r--drivers/lightnvm/pblk-trace.h145
-rw-r--r--drivers/lightnvm/pblk-write.c665
-rw-r--r--drivers/lightnvm/pblk.h1358
-rw-r--r--drivers/md/Kconfig4
-rw-r--r--drivers/md/Makefile4
-rw-r--r--drivers/md/bcache/Kconfig1
-rw-r--r--drivers/md/bcache/btree.c2
-rw-r--r--drivers/md/bcache/super.c26
-rw-r--r--drivers/md/bcache/util.h2
-rw-r--r--drivers/md/dm-cache-target.c24
-rw-r--r--drivers/md/dm-clone-target.c5
-rw-r--r--drivers/md/dm-core.h5
-rw-r--r--drivers/md/dm-crypt.c38
-rw-r--r--drivers/md/dm-delay.c4
-rw-r--r--drivers/md/dm-dust.c4
-rw-r--r--drivers/md/dm-ebs-target.c5
-rw-r--r--drivers/md/dm-era-target.c4
-rw-r--r--drivers/md/dm-flakey.c4
-rw-r--r--drivers/md/dm-ima.c750
-rw-r--r--drivers/md/dm-ima.h78
-rw-r--r--drivers/md/dm-integrity.c28
-rw-r--r--drivers/md/dm-ioctl.c28
-rw-r--r--drivers/md/dm-linear.c10
-rw-r--r--drivers/md/dm-log-userspace-base.c3
-rw-r--r--drivers/md/dm-log-writes.c4
-rw-r--r--drivers/md/dm-log.c10
-rw-r--r--drivers/md/dm-mpath.c40
-rw-r--r--drivers/md/dm-ps-historical-service-time.c3
-rw-r--r--drivers/md/dm-ps-io-affinity.c3
-rw-r--r--drivers/md/dm-ps-queue-length.c3
-rw-r--r--drivers/md/dm-ps-round-robin.c4
-rw-r--r--drivers/md/dm-ps-service-time.c3
-rw-r--r--drivers/md/dm-raid.c39
-rw-r--r--drivers/md/dm-raid1.c17
-rw-r--r--drivers/md/dm-rq.c1
-rw-r--r--drivers/md/dm-snap-persistent.c4
-rw-r--r--drivers/md/dm-snap-transient.c4
-rw-r--r--drivers/md/dm-snap.c13
-rw-r--r--drivers/md/dm-stripe.c15
-rw-r--r--drivers/md/dm-switch.c4
-rw-r--r--drivers/md/dm-table.c2
-rw-r--r--drivers/md/dm-thin.c8
-rw-r--r--drivers/md/dm-unstripe.c4
-rw-r--r--drivers/md/dm-verity-target.c43
-rw-r--r--drivers/md/dm-writecache.c472
-rw-r--r--drivers/md/dm-zoned-target.c3
-rw-r--r--drivers/md/dm.c42
-rw-r--r--drivers/md/md.h4
-rw-r--r--drivers/md/raid1.c19
-rw-r--r--drivers/md/raid10.c14
-rw-r--r--drivers/md/raid5.c4
-rw-r--r--drivers/media/pci/intel/ipu3/cio2-bridge.c2
-rw-r--r--drivers/media/rc/bpf-lirc.c6
-rw-r--r--drivers/memstick/core/ms_block.c2
-rw-r--r--drivers/memstick/host/r592.c9
-rw-r--r--drivers/memstick/host/tifm_ms.c12
-rw-r--r--drivers/mfd/db8500-prcmu.c2
-rw-r--r--drivers/mfd/fsl-imx25-tsadc.c4
-rw-r--r--drivers/mfd/ioc3.c10
-rw-r--r--drivers/mfd/qcom-pm8xxx.c10
-rw-r--r--drivers/misc/Kconfig12
-rw-r--r--drivers/misc/Makefile1
-rw-r--r--drivers/misc/gehc-achc.c565
-rw-r--r--drivers/misc/lkdtm/bugs.c51
-rw-r--r--drivers/misc/lkdtm/core.c8
-rw-r--r--drivers/misc/lkdtm/fortify.c53
-rw-r--r--drivers/misc/lkdtm/heap.c9
-rw-r--r--drivers/misc/lkdtm/lkdtm.h24
-rw-r--r--drivers/misc/mei/bus.c18
-rw-r--r--drivers/misc/mei/client.h2
-rw-r--r--drivers/misc/mei/mei_dev.h2
-rw-r--r--drivers/misc/pci_endpoint_test.c1
-rw-r--r--drivers/misc/pvpanic/pvpanic-pci.c2
-rw-r--r--drivers/misc/pvpanic/pvpanic.c2
-rw-r--r--drivers/misc/sgi-gru/grumain.c6
-rw-r--r--drivers/misc/sgi-gru/grutables.h3
-rw-r--r--drivers/misc/sgi-xp/xpc_uv.c8
-rw-r--r--drivers/misc/sram.c103
-rw-r--r--drivers/misc/sram.h9
-rw-r--r--drivers/misc/vmw_vmci/vmci_queue_pair.c6
-rw-r--r--drivers/mmc/core/Kconfig2
-rw-r--r--drivers/mmc/core/block.c241
-rw-r--r--drivers/mmc/core/core.c48
-rw-r--r--drivers/mmc/core/core.h2
-rw-r--r--drivers/mmc/core/crypto.c15
-rw-r--r--drivers/mmc/core/host.c13
-rw-r--r--drivers/mmc/core/host.h6
-rw-r--r--drivers/mmc/core/mmc.c2
-rw-r--r--drivers/mmc/core/mmc_ops.c16
-rw-r--r--drivers/mmc/core/mmc_ops.h1
-rw-r--r--drivers/mmc/core/pwrseq_sd8787.c14
-rw-r--r--drivers/mmc/core/queue.c34
-rw-r--r--drivers/mmc/core/sdio_cis.c22
-rw-r--r--drivers/mmc/host/cqhci-crypto.h7
-rw-r--r--drivers/mmc/host/dw_mmc.c80
-rw-r--r--drivers/mmc/host/dw_mmc.h7
-rw-r--r--drivers/mmc/host/mmc_spi.c15
-rw-r--r--drivers/mmc/host/mmci.c3
-rw-r--r--drivers/mmc/host/mmci_stm32_sdmmc.c7
-rw-r--r--drivers/mmc/host/moxart-mmc.c1
-rw-r--r--drivers/mmc/host/renesas_sdhi.h9
-rw-r--r--drivers/mmc/host/renesas_sdhi_core.c90
-rw-r--r--drivers/mmc/host/renesas_sdhi_internal_dmac.c135
-rw-r--r--drivers/mmc/host/renesas_sdhi_sys_dmac.c7
-rw-r--r--drivers/mmc/host/rtsx_pci_sdmmc.c36
-rw-r--r--drivers/mmc/host/sdhci-esdhc-imx.c78
-rw-r--r--drivers/mmc/host/sdhci-iproc.c18
-rw-r--r--drivers/mmc/host/sdhci-msm.c21
-rw-r--r--drivers/mmc/host/sdhci-of-arasan.c51
-rw-r--r--drivers/mmc/host/sdhci-tegra.c9
-rw-r--r--drivers/mmc/host/sdhci.c27
-rw-r--r--drivers/mmc/host/sdhci.h1
-rw-r--r--drivers/mmc/host/sh_mmcif.c4
-rw-r--r--drivers/mmc/host/tifm_sd.c16
-rw-r--r--drivers/mmc/host/usdhi6rol0.c14
-rw-r--r--drivers/mmc/host/via-sdmmc.c4
-rw-r--r--drivers/most/most_cdev.c8
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0002.c2
-rw-r--r--drivers/mtd/devices/mchp48l640.c5
-rw-r--r--drivers/mtd/mtd_blkdevs.c11
-rw-r--r--drivers/mtd/mtdcore.c4
-rw-r--r--drivers/mtd/nand/raw/nand_base.c10
-rw-r--r--drivers/net/Kconfig17
-rw-r--r--drivers/net/Makefile6
-rw-r--r--drivers/net/Space.c178
-rw-r--r--drivers/net/appletalk/Kconfig4
-rw-r--r--drivers/net/appletalk/ipddp.c16
-rw-r--r--drivers/net/appletalk/ltpc.c7
-rw-r--r--drivers/net/bareudp.c16
-rw-r--r--drivers/net/bonding/bond_3ad.c11
-rw-r--r--drivers/net/bonding/bond_alb.c32
-rw-r--r--drivers/net/bonding/bond_main.c591
-rw-r--r--drivers/net/bonding/bond_netlink.c16
-rw-r--r--drivers/net/bonding/bond_options.c27
-rw-r--r--drivers/net/bonding/bond_procfs.c2
-rw-r--r--drivers/net/bonding/bond_sysfs.c25
-rw-r--r--drivers/net/can/Kconfig3
-rw-r--r--drivers/net/can/at91_can.c137
-rw-r--r--drivers/net/can/c_can/c_can.h25
-rw-r--r--drivers/net/can/c_can/c_can_main.c123
-rw-r--r--drivers/net/can/c_can/c_can_platform.c1
-rw-r--r--drivers/net/can/dev/dev.c66
-rw-r--r--drivers/net/can/dev/netlink.c11
-rw-r--r--drivers/net/can/dev/rx-offload.c90
-rw-r--r--drivers/net/can/flexcan.c129
-rw-r--r--drivers/net/can/janz-ican3.c23
-rw-r--r--drivers/net/can/m_can/m_can.c274
-rw-r--r--drivers/net/can/m_can/m_can.h11
-rw-r--r--drivers/net/can/m_can/m_can_pci.c11
-rw-r--r--drivers/net/can/m_can/m_can_platform.c31
-rw-r--r--drivers/net/can/m_can/tcan4x5x-core.c17
-rw-r--r--drivers/net/can/mscan/mpc5xxx_can.c7
-rw-r--r--drivers/net/can/rcar/Kconfig4
-rw-r--r--drivers/net/can/rcar/rcar_canfd.c338
-rw-r--r--drivers/net/can/sja1000/peak_pci.c119
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c30
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c4
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd.h2
-rw-r--r--drivers/net/can/ti_hecc.c2
-rw-r--r--drivers/net/can/usb/esd_usb2.c16
-rw-r--r--drivers/net/can/usb/etas_es58x/es581_4.c5
-rw-r--r--drivers/net/can/usb/etas_es58x/es58x_core.c82
-rw-r--r--drivers/net/can/usb/etas_es58x/es58x_core.h2
-rw-r--r--drivers/net/can/usb/etas_es58x/es58x_fd.c19
-rw-r--r--drivers/net/can/usb/etas_es58x/es58x_fd.h23
-rw-r--r--drivers/net/can/usb/peak_usb/pcan_usb.c228
-rw-r--r--drivers/net/dsa/b53/b53_common.c10
-rw-r--r--drivers/net/dsa/b53/b53_priv.h2
-rw-r--r--drivers/net/dsa/bcm_sf2.c1
-rw-r--r--drivers/net/dsa/hirschmann/hellcreek.c16
-rw-r--r--drivers/net/dsa/lan9303-core.c34
-rw-r--r--drivers/net/dsa/lantiq_gswip.c14
-rw-r--r--drivers/net/dsa/microchip/ksz8795.c82
-rw-r--r--drivers/net/dsa/microchip/ksz8795_reg.h4
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h9
-rw-r--r--drivers/net/dsa/mt7530.c174
-rw-r--r--drivers/net/dsa/mt7530.h23
-rw-r--r--drivers/net/dsa/mv88e6xxx/Kconfig1
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c103
-rw-r--r--drivers/net/dsa/mv88e6xxx/serdes.c11
-rw-r--r--drivers/net/dsa/ocelot/Kconfig2
-rw-r--r--drivers/net/dsa/ocelot/felix.c153
-rw-r--r--drivers/net/dsa/ocelot/felix.h2
-rw-r--r--drivers/net/dsa/qca/ar9331.c73
-rw-r--r--drivers/net/dsa/sja1105/Kconfig1
-rw-r--r--drivers/net/dsa/sja1105/sja1105.h33
-rw-r--r--drivers/net/dsa/sja1105/sja1105_devlink.c114
-rw-r--r--drivers/net/dsa/sja1105/sja1105_dynamic_config.c6
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c1887
-rw-r--r--drivers/net/dsa/sja1105/sja1105_mdio.c6
-rw-r--r--drivers/net/dsa/sja1105/sja1105_spi.c10
-rw-r--r--drivers/net/dsa/sja1105/sja1105_vl.c14
-rw-r--r--drivers/net/eql.c24
-rw-r--r--drivers/net/ethernet/3com/3c509.c7
-rw-r--r--drivers/net/ethernet/3com/3c515.c3
-rw-r--r--drivers/net/ethernet/3com/3c574_cs.c2
-rw-r--r--drivers/net/ethernet/3com/3c59x.c4
-rw-r--r--drivers/net/ethernet/3com/Kconfig1
-rw-r--r--drivers/net/ethernet/8390/Kconfig3
-rw-r--r--drivers/net/ethernet/8390/apne.c11
-rw-r--r--drivers/net/ethernet/8390/ax88796.c9
-rw-r--r--drivers/net/ethernet/8390/axnet_cs.c2
-rw-r--r--drivers/net/ethernet/8390/ne.c5
-rw-r--r--drivers/net/ethernet/8390/pcnet_cs.c2
-rw-r--r--drivers/net/ethernet/8390/smc-ultra.c9
-rw-r--r--drivers/net/ethernet/8390/wd.c7
-rw-r--r--drivers/net/ethernet/8390/xsurf100.c9
-rw-r--r--drivers/net/ethernet/Kconfig1
-rw-r--r--drivers/net/ethernet/Makefile1
-rw-r--r--drivers/net/ethernet/actions/Kconfig4
-rw-r--r--drivers/net/ethernet/actions/owl-emac.c6
-rw-r--r--drivers/net/ethernet/adaptec/starfire.c2
-rw-r--r--drivers/net/ethernet/agere/et131x.c2
-rw-r--r--drivers/net/ethernet/allwinner/sun4i-emac.c2
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_ethtool.c8
-rw-r--r--drivers/net/ethernet/amd/Kconfig4
-rw-r--r--drivers/net/ethernet/amd/amd8111e.c2
-rw-r--r--drivers/net/ethernet/amd/atarilance.c11
-rw-r--r--drivers/net/ethernet/amd/au1000_eth.c2
-rw-r--r--drivers/net/ethernet/amd/lance.c6
-rw-r--r--drivers/net/ethernet/amd/mvme147.c16
-rw-r--r--drivers/net/ethernet/amd/ni65.c6
-rw-r--r--drivers/net/ethernet/amd/pcnet32.c2
-rw-r--r--drivers/net/ethernet/amd/sun3lance.c19
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-drv.c2
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c8
-rw-r--r--drivers/net/ethernet/apm/xgene-v2/main.c4
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c8
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_main.c2
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c15
-rw-r--r--drivers/net/ethernet/arc/emac_main.c2
-rw-r--r--drivers/net/ethernet/atheros/ag71xx.c2
-rw-r--r--drivers/net/ethernet/atheros/alx/main.c2
-rw-r--r--drivers/net/ethernet/atheros/atl1c/atl1c_main.c2
-rw-r--r--drivers/net/ethernet/atheros/atl1e/atl1e_main.c2
-rw-r--r--drivers/net/ethernet/atheros/atlx/atl1.c2
-rw-r--r--drivers/net/ethernet/atheros/atlx/atl2.c2
-rw-r--r--drivers/net/ethernet/broadcom/Kconfig6
-rw-r--r--drivers/net/ethernet/broadcom/b44.c2
-rw-r--r--drivers/net/ethernet/broadcom/bcm63xx_enet.c5
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.c8
-rw-r--r--drivers/net/ethernet/broadcom/bgmac.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2.c70
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c8
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c6
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/Makefile2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c2430
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h136
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c185
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c90
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c573
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h76
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c763
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h145
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c393
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h63
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c455
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c264
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c31
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c62
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c2
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c12
-rw-r--r--drivers/net/ethernet/broadcom/sb1250-mac.c2
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c81
-rw-r--r--drivers/net/ethernet/brocade/bna/bnad_ethtool.c12
-rw-r--r--drivers/net/ethernet/cadence/Kconfig1
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c13
-rw-r--r--drivers/net/ethernet/cadence/macb_ptp.c11
-rw-r--r--drivers/net/ethernet/cavium/Kconfig4
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_ethtool.c8
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_main.c11
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_vf_main.c6
-rw-r--r--drivers/net/ethernet/cavium/octeon/octeon_mgmt.c2
-rw-r--r--drivers/net/ethernet/cavium/thunder/nic_main.c8
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c4
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c10
-rw-r--r--drivers/net/ethernet/chelsio/Kconfig1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb/cxgb2.c10
-rw-r--r--drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c32
-rw-r--r--drivers/net/ethernet/chelsio/cxgb3/sge.c101
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c8
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c24
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c8
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c20
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/sge.c8
-rw-r--r--drivers/net/ethernet/cirrus/Kconfig27
-rw-r--r--drivers/net/ethernet/cirrus/cs89x0.c31
-rw-r--r--drivers/net/ethernet/cirrus/ep93xx_eth.c2
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_ethtool.c8
-rw-r--r--drivers/net/ethernet/cortina/gemini.c8
-rw-r--r--drivers/net/ethernet/davicom/dm9000.c2
-rw-r--r--drivers/net/ethernet/dec/tulip/de4x5.c11
-rw-r--r--drivers/net/ethernet/dec/tulip/media.c2
-rw-r--r--drivers/net/ethernet/dec/tulip/tulip_core.c2
-rw-r--r--drivers/net/ethernet/dec/tulip/winbond-840.c2
-rw-r--r--drivers/net/ethernet/dlink/dl2k.c2
-rw-r--r--drivers/net/ethernet/dlink/sundance.c2
-rw-r--r--drivers/net/ethernet/dnet.c2
-rw-r--r--drivers/net/ethernet/ec_bhf.c10
-rw-r--r--drivers/net/ethernet/emulex/benet/be_ethtool.c8
-rw-r--r--drivers/net/ethernet/ethoc.c2
-rw-r--r--drivers/net/ethernet/faraday/ftgmac100.c2
-rw-r--r--drivers/net/ethernet/faraday/ftmac100.c2
-rw-r--r--drivers/net/ethernet/fealnx.c2
-rw-r--r--drivers/net/ethernet/freescale/Kconfig2
-rw-r--r--drivers/net/ethernet/freescale/dpaa/Kconfig1
-rw-r--r--drivers/net/ethernet/freescale/dpaa/dpaa_eth.c2
-rw-r--r--drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c8
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/Makefile2
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c7
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c4
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c8
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c56
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c530
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c420
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h62
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h19
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpsw.c80
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpsw.h36
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_ethtool.c8
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_pf.c2
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_vf.c2
-rw-r--r--drivers/net/ethernet/freescale/fec.h31
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c212
-rw-r--r--drivers/net/ethernet/freescale/fec_mpc52xx.c2
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c2
-rw-r--r--drivers/net/ethernet/freescale/gianfar.c2
-rw-r--r--drivers/net/ethernet/freescale/gianfar_ethtool.c8
-rw-r--r--drivers/net/ethernet/freescale/ucc_geth.c2
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.c6
-rw-r--r--drivers/net/ethernet/hisilicon/Kconfig4
-rw-r--r--drivers/net/ethernet/hisilicon/hip04_eth.c8
-rw-r--r--drivers/net/ethernet/hisilicon/hisi_femac.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_enet.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_ethtool.c12
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h5
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c31
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c228
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.h37
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c265
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.h31
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c81
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h38
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c64
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c70
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c148
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h15
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c1665
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c238
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h31
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c11
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c36
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h17
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c150
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.h15
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c52
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h27
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c124
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_devlink.c8
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_devlink.h4
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_ethtool.c8
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c2
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_main.c19
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_sriov.c6
-rw-r--r--drivers/net/ethernet/i825xx/82596.c24
-rw-r--r--drivers/net/ethernet/i825xx/sun3_82586.c17
-rw-r--r--drivers/net/ethernet/ibm/emac/core.c4
-rw-r--r--drivers/net/ethernet/ibm/ibmveth.c2
-rw-r--r--drivers/net/ethernet/intel/Kconfig12
-rw-r--r--drivers/net/ethernet/intel/e100.c6
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_ethtool.c8
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_main.c2
-rw-r--r--drivers/net/ethernet/intel/e1000e/ethtool.c10
-rw-r--r--drivers/net/ethernet/intel/e1000e/hw.h9
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c45
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.h6
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c372
-rw-r--r--drivers/net/ethernet/intel/e1000e/ptp.c1
-rw-r--r--drivers/net/ethernet/intel/e1000e/regs.h1
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c8
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h78
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c12
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c32
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ptp.c756
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_register.h29
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c23
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf.h10
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_ethtool.c22
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c136
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_virtchnl.c47
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_devlink.c8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c12
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c45
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c68
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c7
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_mac.c6
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c11
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c2
-rw-r--r--drivers/net/ethernet/intel/igbvf/ethtool.c8
-rw-r--r--drivers/net/ethernet/intel/igbvf/netdev.c2
-rw-r--r--drivers/net/ethernet/intel/igc/igc.h50
-rw-r--r--drivers/net/ethernet/intel/igc/igc_base.c10
-rw-r--r--drivers/net/ethernet/intel/igc/igc_defines.h91
-rw-r--r--drivers/net/ethernet/intel/igc/igc_ethtool.c49
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c501
-rw-r--r--drivers/net/ethernet/intel/igc/igc_phy.c6
-rw-r--r--drivers/net/ethernet/intel/igc/igc_ptp.c182
-rw-r--r--drivers/net/ethernet/intel/igc/igc_regs.h43
-rw-r--r--drivers/net/ethernet/intel/igc/igc_tsn.c176
-rw-r--r--drivers/net/ethernet/intel/igc/igc_tsn.h1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c8
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c5
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ethtool.c8
-rw-r--r--drivers/net/ethernet/jme.c84
-rw-r--r--drivers/net/ethernet/korina.c2
-rw-r--r--drivers/net/ethernet/lantiq_etop.c2
-rw-r--r--drivers/net/ethernet/litex/Kconfig28
-rw-r--r--drivers/net/ethernet/litex/Makefile5
-rw-r--r--drivers/net/ethernet/litex/litex_liteeth.c314
-rw-r--r--drivers/net/ethernet/marvell/mv643xx_eth.c14
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c46
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2.h2
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c27
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/Kconfig4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/Makefile5
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.c5
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.h7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/common.h31
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.c9
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.h114
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/npc.h9
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/ptp.c46
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/ptp.h3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rpm.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rpm.h2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.c226
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.h65
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c19
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c127
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c6
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c117
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c706
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c18
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c135
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c76
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c11
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h16
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c108
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c5
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h15
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/Makefile7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c8
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h21
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c84
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h46
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c156
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.h20
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_dmac_flt.c3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c72
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c154
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c69
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c5
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h6
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c58
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c42
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_devlink.c7
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_devlink.h2
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_main.c5
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_switchdev.c16
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_switchdev.h3
-rw-r--r--drivers/net/ethernet/marvell/pxa168_eth.c2
-rw-r--r--drivers/net/ethernet/marvell/skge.c10
-rw-r--r--drivers/net/ethernet/marvell/sky2.c14
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c2
-rw-r--r--drivers/net/ethernet/mediatek/mtk_star_emac.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/Kconfig2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_ethtool.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c76
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c176
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h84
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/channels.c46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/channels.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h99
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c329
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c170
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rss.c588
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rss.h49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c690
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c164
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c (renamed from drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c)475
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c163
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tir.c200
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tir.h58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/trap.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c72
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_common.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c140
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c671
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c156
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c904
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c172
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c323
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c363
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h123
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c869
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c358
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h68
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c413
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/events.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c76
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c268
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c602
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h70
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c117
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c85
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c271
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c152
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c59
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c101
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h68
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Kconfig2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c84
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c94
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c34
-rw-r--r--drivers/net/ethernet/micrel/ks8851_common.c2
-rw-r--r--drivers/net/ethernet/micrel/ksz884x.c2
-rw-r--r--drivers/net/ethernet/microchip/Kconfig1
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c2
-rw-r--r--drivers/net/ethernet/microchip/sparx5/Makefile2
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c593
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c2
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.c23
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.h69
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_packet.c13
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_port.c2
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_port.h1
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c24
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma.h32
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma_main.c88
-rw-r--r--drivers/net/ethernet/microsoft/mana/hw_channel.c2
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana.h29
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c162
-rw-r--r--drivers/net/ethernet/mscc/Kconfig3
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c247
-rw-r--r--drivers/net/ethernet/mscc/ocelot.h11
-rw-r--r--drivers/net/ethernet/mscc/ocelot_io.c16
-rw-r--r--drivers/net/ethernet/mscc/ocelot_net.c397
-rw-r--r--drivers/net/ethernet/mscc/ocelot_vsc7514.c71
-rw-r--r--drivers/net/ethernet/myricom/myri10ge/myri10ge.c71
-rw-r--r--drivers/net/ethernet/natsemi/jazzsonic.c2
-rw-r--r--drivers/net/ethernet/natsemi/natsemi.c2
-rw-r--r--drivers/net/ethernet/natsemi/xtsonic.c1
-rw-r--r--drivers/net/ethernet/neterion/s2io.c2
-rw-r--r--drivers/net/ethernet/neterion/vxge/vxge-main.c2
-rw-r--r--drivers/net/ethernet/netronome/Kconfig1
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c35
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/conntrack.c620
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/conntrack.h26
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.h79
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/match.c333
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/metadata.c7
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/offload.c51
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_main.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net.h20
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c144
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c29
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_main.c2
-rw-r--r--drivers/net/ethernet/ni/nixge.c24
-rw-r--r--drivers/net/ethernet/nvidia/forcedeth.c6
-rw-r--r--drivers/net/ethernet/nxp/lpc_eth.c2
-rw-r--r--drivers/net/ethernet/oki-semi/pch_gbe/Kconfig1
-rw-r--r--drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c10
-rw-r--r--drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.c4
-rw-r--r--drivers/net/ethernet/packetengines/hamachi.c63
-rw-r--r--drivers/net/ethernet/packetengines/yellowfin.c2
-rw-r--r--drivers/net/ethernet/pasemi/pasemi_mac.c32
-rw-r--r--drivers/net/ethernet/pensando/Kconfig2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c5
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.c41
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.h3
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_devlink.c18
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_ethtool.c29
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_if.h5
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c303
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.h10
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_main.c6
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_phc.c32
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c143
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h14
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_txrx.c27
-rw-r--r--drivers/net/ethernet/qlogic/Kconfig2
-rw-r--r--drivers/net/ethernet/qlogic/netxen/netxen_nic.h1
-rw-r--r--drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c8
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed.h15
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dcbx.c6
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_devlink.c7
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_int.c22
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iwarp.c2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_ll2.c20
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_main.c15
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.c1
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c1
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_rdma.c3
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_reg_addr.h8
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede.h13
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_ethtool.c14
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c35
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c4
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c10
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c16
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c32
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c6
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac.c2
-rw-r--r--drivers/net/ethernet/qualcomm/qca_spi.c2
-rw-r--r--drivers/net/ethernet/qualcomm/qca_uart.c2
-rw-r--r--drivers/net/ethernet/rdc/r6040.c2
-rw-r--r--drivers/net/ethernet/realtek/8139cp.c33
-rw-r--r--drivers/net/ethernet/realtek/8139too.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c69
-rw-r--r--drivers/net/ethernet/renesas/Kconfig2
-rw-r--r--drivers/net/ethernet/renesas/ravb.h36
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c379
-rw-r--r--drivers/net/ethernet/renesas/ravb_ptp.c8
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c4
-rw-r--r--drivers/net/ethernet/rocker/rocker.h3
-rw-r--r--drivers/net/ethernet/rocker/rocker_main.c11
-rw-r--r--drivers/net/ethernet/rocker/rocker_ofdpa.c21
-rw-r--r--drivers/net/ethernet/samsung/Kconfig2
-rw-r--r--drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c8
-rw-r--r--drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c2
-rw-r--r--drivers/net/ethernet/sfc/Kconfig2
-rw-r--r--drivers/net/ethernet/sfc/efx.c2
-rw-r--r--drivers/net/ethernet/sfc/ethtool.c8
-rw-r--r--drivers/net/ethernet/sfc/falcon/efx.c2
-rw-r--r--drivers/net/ethernet/sfc/falcon/ethtool.c8
-rw-r--r--drivers/net/ethernet/sgi/ioc3-eth.c2
-rw-r--r--drivers/net/ethernet/sgi/meth.c2
-rw-r--r--drivers/net/ethernet/sis/sis190.c2
-rw-r--r--drivers/net/ethernet/sis/sis900.c2
-rw-r--r--drivers/net/ethernet/smsc/Kconfig1
-rw-r--r--drivers/net/ethernet/smsc/epic100.c2
-rw-r--r--drivers/net/ethernet/smsc/smc9194.c6
-rw-r--r--drivers/net/ethernet/smsc/smc91c92_cs.c2
-rw-r--r--drivers/net/ethernet/smsc/smsc911x.c2
-rw-r--r--drivers/net/ethernet/smsc/smsc9420.c2
-rw-r--r--drivers/net/ethernet/socionext/netsec.c12
-rw-r--r--drivers/net/ethernet/socionext/sni_ave.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Kconfig2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h13
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c18
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c9
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c7
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c75
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c12
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c12
-rw-r--r--drivers/net/ethernet/sun/cassini.c2
-rw-r--r--drivers/net/ethernet/sun/niu.c22
-rw-r--r--drivers/net/ethernet/sun/sungem.c2
-rw-r--r--drivers/net/ethernet/sun/sunhme.c24
-rw-r--r--drivers/net/ethernet/synopsys/dwc-xlgmac-ethtool.c14
-rw-r--r--drivers/net/ethernet/synopsys/dwc-xlgmac-net.c2
-rw-r--r--drivers/net/ethernet/tehuti/tehuti.c30
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c81
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.h2
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-switchdev.c2
-rw-r--r--drivers/net/ethernet/ti/cpmac.c2
-rw-r--r--drivers/net/ethernet/ti/cpsw.c8
-rw-r--r--drivers/net/ethernet/ti/cpsw_ethtool.c8
-rw-r--r--drivers/net/ethernet/ti/cpsw_new.c35
-rw-r--r--drivers/net/ethernet/ti/cpsw_priv.h12
-rw-r--r--drivers/net/ethernet/ti/cpsw_switchdev.c2
-rw-r--r--drivers/net/ethernet/ti/davinci_emac.c18
-rw-r--r--drivers/net/ethernet/ti/netcp_core.c2
-rw-r--r--drivers/net/ethernet/ti/tlan.c2
-rw-r--r--drivers/net/ethernet/toshiba/spider_net.c29
-rw-r--r--drivers/net/ethernet/toshiba/tc35815.c2
-rw-r--r--drivers/net/ethernet/tundra/tsi108_eth.c2
-rw-r--r--drivers/net/ethernet/via/via-rhine.c11
-rw-r--r--drivers/net/ethernet/via/via-velocity.c16
-rw-r--r--drivers/net/ethernet/wiznet/w5100.c2
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac_main.c16
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c20
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_emaclite.c2
-rw-r--r--drivers/net/ethernet/xircom/xirc2ps_cs.c2
-rw-r--r--drivers/net/ethernet/xscale/Kconfig4
-rw-r--r--drivers/net/ethernet/xscale/Makefile6
-rw-r--r--drivers/net/ethernet/xscale/ixp46x_ts.h13
-rw-r--r--drivers/net/ethernet/xscale/ixp4xx_eth.c37
-rw-r--r--drivers/net/ethernet/xscale/ptp_ixp46x.c122
-rw-r--r--drivers/net/fddi/skfp/skfddi.c60
-rw-r--r--drivers/net/hamradio/6pack.c6
-rw-r--r--drivers/net/hamradio/baycom_epp.c9
-rw-r--r--drivers/net/hamradio/baycom_par.c12
-rw-r--r--drivers/net/hamradio/baycom_ser_fdx.c12
-rw-r--r--drivers/net/hamradio/baycom_ser_hdx.c12
-rw-r--r--drivers/net/hamradio/bpqether.c9
-rw-r--r--drivers/net/hamradio/dmascc.c18
-rw-r--r--drivers/net/hamradio/hdlcdrv.c20
-rw-r--r--drivers/net/hamradio/scc.c13
-rw-r--r--drivers/net/hamradio/yam.c19
-rw-r--r--drivers/net/hippi/rrunner.c11
-rw-r--r--drivers/net/hippi/rrunner.h3
-rw-r--r--drivers/net/ieee802154/mac802154_hwsim.c6
-rw-r--r--drivers/net/ipa/Makefile5
-rw-r--r--drivers/net/ipa/gsi.c241
-rw-r--r--drivers/net/ipa/gsi.h31
-rw-r--r--drivers/net/ipa/gsi_trans.c34
-rw-r--r--drivers/net/ipa/ipa.h30
-rw-r--r--drivers/net/ipa/ipa_clock.c331
-rw-r--r--drivers/net/ipa/ipa_clock.h64
-rw-r--r--drivers/net/ipa/ipa_cmd.c51
-rw-r--r--drivers/net/ipa/ipa_cmd.h22
-rw-r--r--drivers/net/ipa/ipa_data-v3.1.c4
-rw-r--r--drivers/net/ipa/ipa_data-v3.5.1.c4
-rw-r--r--drivers/net/ipa/ipa_data-v4.11.c19
-rw-r--r--drivers/net/ipa/ipa_data-v4.2.c4
-rw-r--r--drivers/net/ipa/ipa_data-v4.5.c6
-rw-r--r--drivers/net/ipa/ipa_data-v4.9.c15
-rw-r--r--drivers/net/ipa/ipa_data.h10
-rw-r--r--drivers/net/ipa/ipa_endpoint.c44
-rw-r--r--drivers/net/ipa/ipa_interrupt.c83
-rw-r--r--drivers/net/ipa/ipa_interrupt.h8
-rw-r--r--drivers/net/ipa/ipa_main.c222
-rw-r--r--drivers/net/ipa/ipa_modem.c140
-rw-r--r--drivers/net/ipa/ipa_modem.h4
-rw-r--r--drivers/net/ipa/ipa_power.c473
-rw-r--r--drivers/net/ipa/ipa_power.h73
-rw-r--r--drivers/net/ipa/ipa_qmi.c6
-rw-r--r--drivers/net/ipa/ipa_qmi.h19
-rw-r--r--drivers/net/ipa/ipa_reg.h12
-rw-r--r--drivers/net/ipa/ipa_resource.c3
-rw-r--r--drivers/net/ipa/ipa_smp2p.c93
-rw-r--r--drivers/net/ipa/ipa_smp2p.h2
-rw-r--r--drivers/net/ipa/ipa_table.c40
-rw-r--r--drivers/net/ipa/ipa_table.h16
-rw-r--r--drivers/net/ipa/ipa_uc.c70
-rw-r--r--drivers/net/ipa/ipa_uc.h22
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c1
-rw-r--r--drivers/net/macvlan.c8
-rw-r--r--drivers/net/mctp/Kconfig8
-rw-r--r--drivers/net/mctp/Makefile0
-rw-r--r--drivers/net/mdio/Kconfig3
-rw-r--r--drivers/net/mdio/mdio-ipq4019.c41
-rw-r--r--drivers/net/mdio/mdio-mscc-miim.c12
-rw-r--r--drivers/net/mdio/mdio-mux.c37
-rw-r--r--drivers/net/mhi/Makefile3
-rw-r--r--drivers/net/mhi/mhi.h41
-rw-r--r--drivers/net/mhi/proto_mbim.c304
-rw-r--r--drivers/net/mhi_net.c (renamed from drivers/net/mhi/net.c)168
-rw-r--r--drivers/net/mii.c6
-rw-r--r--drivers/net/netdevsim/bus.c43
-rw-r--r--drivers/net/netdevsim/dev.c25
-rw-r--r--drivers/net/netdevsim/ethtool.c8
-rw-r--r--drivers/net/netdevsim/fib.c2
-rw-r--r--drivers/net/netdevsim/netdev.c6
-rw-r--r--drivers/net/netdevsim/netdevsim.h2
-rw-r--r--drivers/net/pcs/pcs-xpcs.c6
-rw-r--r--drivers/net/phy/Kconfig8
-rw-r--r--drivers/net/phy/Makefile1
-rw-r--r--drivers/net/phy/at803x.c18
-rw-r--r--drivers/net/phy/dp83822.c8
-rw-r--r--drivers/net/phy/intel-xway.c76
-rw-r--r--drivers/net/phy/marvell.c144
-rw-r--r--drivers/net/phy/marvell10g.c97
-rw-r--r--drivers/net/phy/mediatek-ge.c4
-rw-r--r--drivers/net/phy/micrel.c2
-rw-r--r--drivers/net/phy/mscc/mscc_ptp.c8
-rw-r--r--drivers/net/phy/mxl-gpy.c727
-rw-r--r--drivers/net/phy/nxp-tja11xx.c13
-rw-r--r--drivers/net/phy/phy.c4
-rw-r--r--drivers/net/phy/phy_device.c27
-rw-r--r--drivers/net/phy/phylink.c21
-rw-r--r--drivers/net/phy/xilinx_gmii2rgmii.c46
-rw-r--r--drivers/net/plip/plip.c12
-rw-r--r--drivers/net/ppp/ppp_generic.c35
-rw-r--r--drivers/net/sb1000.c20
-rw-r--r--drivers/net/slip/slip.c13
-rw-r--r--drivers/net/team/team_mode_loadbalance.c2
-rw-r--r--drivers/net/tun.c8
-rw-r--r--drivers/net/usb/asix.h1
-rw-r--r--drivers/net/usb/asix_common.c70
-rw-r--r--drivers/net/usb/asix_devices.c61
-rw-r--r--drivers/net/usb/ax88172a.c2
-rw-r--r--drivers/net/usb/ax88179_178a.c2
-rw-r--r--drivers/net/usb/cdc-phonet.c5
-rw-r--r--drivers/net/usb/dm9601.c2
-rw-r--r--drivers/net/usb/hso.c13
-rw-r--r--drivers/net/usb/ipheth.c2
-rw-r--r--drivers/net/usb/lan78xx.c1062
-rw-r--r--drivers/net/usb/mcs7830.c2
-rw-r--r--drivers/net/usb/pegasus.c9
-rw-r--r--drivers/net/usb/r8152.c33
-rw-r--r--drivers/net/usb/rtl8150.c5
-rw-r--r--drivers/net/usb/smsc75xx.c2
-rw-r--r--drivers/net/usb/smsc95xx.c2
-rw-r--r--drivers/net/usb/sr9700.c2
-rw-r--r--drivers/net/usb/sr9800.c2
-rw-r--r--drivers/net/usb/usbnet.c8
-rw-r--r--drivers/net/veth.c307
-rw-r--r--drivers/net/virtio_net.c66
-rw-r--r--drivers/net/vmxnet3/Makefile2
-rw-r--r--drivers/net/vmxnet3/upt1_defs.h2
-rw-r--r--drivers/net/vmxnet3/vmxnet3_defs.h50
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c268
-rw-r--r--drivers/net/vmxnet3/vmxnet3_ethtool.c32
-rw-r--r--drivers/net/vmxnet3/vmxnet3_int.h22
-rw-r--r--drivers/net/vrf.c25
-rw-r--r--drivers/net/wan/Kconfig51
-rw-r--r--drivers/net/wan/Makefile1
-rw-r--r--drivers/net/wan/c101.c33
-rw-r--r--drivers/net/wan/cosa.c15
-rw-r--r--drivers/net/wan/farsync.c123
-rw-r--r--drivers/net/wan/fsl_ucc_hdlc.c19
-rw-r--r--drivers/net/wan/hdlc.c9
-rw-r--r--drivers/net/wan/hdlc_cisco.c14
-rw-r--r--drivers/net/wan/hdlc_fr.c40
-rw-r--r--drivers/net/wan/hdlc_ppp.c8
-rw-r--r--drivers/net/wan/hdlc_raw.c14
-rw-r--r--drivers/net/wan/hdlc_raw_eth.c14
-rw-r--r--drivers/net/wan/hdlc_x25.c16
-rw-r--r--drivers/net/wan/hostess_sv11.c13
-rw-r--r--drivers/net/wan/ixp4xx_hss.c22
-rw-r--r--drivers/net/wan/lmc/lmc.h2
-rw-r--r--drivers/net/wan/lmc/lmc_main.c33
-rw-r--r--drivers/net/wan/lmc/lmc_proto.c7
-rw-r--r--drivers/net/wan/lmc/lmc_proto.h1
-rw-r--r--drivers/net/wan/n2.c32
-rw-r--r--drivers/net/wan/pc300too.c44
-rw-r--r--drivers/net/wan/pci200syn.c32
-rw-r--r--drivers/net/wan/sbni.c1638
-rw-r--r--drivers/net/wan/sbni.h147
-rw-r--r--drivers/net/wan/sealevel.c10
-rw-r--r--drivers/net/wan/wanxl.c21
-rw-r--r--drivers/net/wireless/ath/ath10k/pci.c9
-rw-r--r--drivers/net/wireless/ath/ath11k/dp_rx.c5
-rw-r--r--drivers/net/wireless/ath/ath11k/mhi.c1
-rw-r--r--drivers/net/wireless/ath/ath11k/pci.c10
-rw-r--r--drivers/net/wireless/ath/ath5k/pci.c2
-rw-r--r--drivers/net/wireless/ath/ath6kl/wmi.c4
-rw-r--r--drivers/net/wireless/ath/ath9k/ar9003_eeprom.c3
-rw-r--r--drivers/net/wireless/ath/ath9k/hw.c12
-rw-r--r--drivers/net/wireless/ath/ath9k/pci.c8
-rw-r--r--drivers/net/wireless/ath/wcn36xx/main.c12
-rw-r--r--drivers/net/wireless/ath/wcn36xx/smd.c4
-rw-r--r--drivers/net/wireless/ath/wcn36xx/txrx.c4
-rw-r--r--drivers/net/wireless/ath/wcn36xx/wcn36xx.h2
-rw-r--r--drivers/net/wireless/ath/wil6210/ethtool.c14
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile3
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c4
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c8
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c29
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.h5
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c69
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c126
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h8
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c5
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c30
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c82
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.h31
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c2
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h1
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/include/soc.h2
-rw-r--r--drivers/net/wireless/cisco/airo.c15
-rw-r--r--drivers/net/wireless/intel/ipw2x00/libipw_rx.c56
-rw-r--r--drivers/net/wireless/intel/ipw2x00/libipw_tx.c4
-rw-r--r--drivers/net/wireless/intel/iwlegacy/3945-mac.c52
-rw-r--r--drivers/net/wireless/intel/iwlegacy/3945.c10
-rw-r--r--drivers/net/wireless/intel/iwlegacy/4965-mac.c78
-rw-r--r--drivers/net/wireless/intel/iwlegacy/common.c19
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/22000.c76
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/9000.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/dvm/main.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/dvm/rx.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/acpi.c304
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/acpi.h66
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/coex.h2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/commands.h3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/d3.h22
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/location.h189
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/mac.h4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/offload.h31
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/scan.h6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/sta.h8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/dbg.c144
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/dbg.h7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/error-dump.h22
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/file.h1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/pnvm.c40
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/pnvm.h20
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-config.h8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-csr.h22
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c34
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.h24
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-drv.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-io.c26
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c40
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-prph.h7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-trans.h6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/constants.h5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/d3.c580
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c11
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c85
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c27
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/fw.c108
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c44
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c35
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mvm.h3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/nvm.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ops.c74
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rfi.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c45
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/scan.c93
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/sta.c120
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/time-event.c41
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/drv.c78
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/internal.h24
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/rx.c17
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c53
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans.c59
-rw-r--r--drivers/net/wireless/intersil/Kconfig20
-rw-r--r--drivers/net/wireless/intersil/Makefile1
-rw-r--r--drivers/net/wireless/intersil/hostap/hostap.h3
-rw-r--r--drivers/net/wireless/intersil/hostap/hostap_ioctl.c30
-rw-r--r--drivers/net/wireless/intersil/hostap/hostap_main.c3
-rw-r--r--drivers/net/wireless/intersil/prism54/Makefile9
-rw-r--r--drivers/net/wireless/intersil/prism54/isl_38xx.c245
-rw-r--r--drivers/net/wireless/intersil/prism54/isl_38xx.h158
-rw-r--r--drivers/net/wireless/intersil/prism54/isl_ioctl.c2909
-rw-r--r--drivers/net/wireless/intersil/prism54/isl_ioctl.h35
-rw-r--r--drivers/net/wireless/intersil/prism54/isl_oid.h492
-rw-r--r--drivers/net/wireless/intersil/prism54/islpci_dev.c951
-rw-r--r--drivers/net/wireless/intersil/prism54/islpci_dev.h204
-rw-r--r--drivers/net/wireless/intersil/prism54/islpci_eth.c489
-rw-r--r--drivers/net/wireless/intersil/prism54/islpci_eth.h59
-rw-r--r--drivers/net/wireless/intersil/prism54/islpci_hotplug.c316
-rw-r--r--drivers/net/wireless/intersil/prism54/islpci_mgt.c491
-rw-r--r--drivers/net/wireless/intersil/prism54/islpci_mgt.h126
-rw-r--r--drivers/net/wireless/intersil/prism54/oid_mgt.c889
-rw-r--r--drivers/net/wireless/intersil/prism54/oid_mgt.h46
-rw-r--r--drivers/net/wireless/intersil/prism54/prismcompat.h30
-rw-r--r--drivers/net/wireless/marvell/libertas/ethtool.c9
-rw-r--r--drivers/net/wireless/marvell/mwifiex/Makefile1
-rw-r--r--drivers/net/wireless/marvell/mwifiex/cmdevt.c2
-rw-r--r--drivers/net/wireless/marvell/mwifiex/pcie.c11
-rw-r--r--drivers/net/wireless/marvell/mwifiex/pcie.h1
-rw-r--r--drivers/net/wireless/marvell/mwifiex/pcie_quirks.c161
-rw-r--r--drivers/net/wireless/marvell/mwifiex/pcie_quirks.h23
-rw-r--r--drivers/net/wireless/marvell/mwifiex/sta_ioctl.c4
-rw-r--r--drivers/net/wireless/marvell/mwifiex/usb.h2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/mcu.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/mcu.h3
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7921/mcu.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7921/mcu.h3
-rw-r--r--drivers/net/wireless/microchip/wilc1000/sdio.c29
-rw-r--r--drivers/net/wireless/microchip/wilc1000/spi.c44
-rw-r--r--drivers/net/wireless/microchip/wilc1000/wlan.c38
-rw-r--r--drivers/net/wireless/quantenna/qtnfmac/pcie/pearl_pcie.c28
-rw-r--r--drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c28
-rw-r--r--drivers/net/wireless/ray_cs.c8
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h2
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c37
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c52
-rw-r--r--drivers/net/wireless/realtek/rtw88/Makefile2
-rw-r--r--drivers/net/wireless/realtek/rtw88/fw.c8
-rw-r--r--drivers/net/wireless/realtek/rtw88/fw.h3
-rw-r--r--drivers/net/wireless/realtek/rtw88/main.c2
-rw-r--r--drivers/net/wireless/realtek/rtw88/main.h6
-rw-r--r--drivers/net/wireless/realtek/rtw88/pci.c47
-rw-r--r--drivers/net/wireless/realtek/rtw88/pci.h1
-rw-r--r--drivers/net/wireless/realtek/rtw88/rtw8822c.c1
-rw-r--r--drivers/net/wireless/realtek/rtw88/tx.c2
-rw-r--r--drivers/net/wireless/realtek/rtw88/wow.c107
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_debugfs.c2
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_hal.c4
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_usb.c1
-rw-r--r--drivers/net/wwan/Kconfig12
-rw-r--r--drivers/net/wwan/Makefile1
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c7
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_pcie.c19
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_protocol.c10
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c13
-rw-r--r--drivers/net/wwan/mhi_wwan_ctrl.c14
-rw-r--r--drivers/net/wwan/mhi_wwan_mbim.c658
-rw-r--r--drivers/net/wwan/wwan_core.c19
-rw-r--r--drivers/net/xen-netfront.c272
-rw-r--r--drivers/nfc/fdp/fdp.c38
-rw-r--r--drivers/nfc/fdp/fdp.h4
-rw-r--r--drivers/nfc/fdp/i2c.c8
-rw-r--r--drivers/nfc/mei_phy.c4
-rw-r--r--drivers/nfc/mei_phy.h2
-rw-r--r--drivers/nfc/microread/i2c.c4
-rw-r--r--drivers/nfc/microread/mei.c1
-rw-r--r--drivers/nfc/microread/microread.c15
-rw-r--r--drivers/nfc/microread/microread.h6
-rw-r--r--drivers/nfc/nfcmrvl/fw_dnld.c16
-rw-r--r--drivers/nfc/nfcmrvl/i2c.c7
-rw-r--r--drivers/nfc/nfcmrvl/main.c6
-rw-r--r--drivers/nfc/nfcmrvl/nfcmrvl.h6
-rw-r--r--drivers/nfc/nfcmrvl/spi.c7
-rw-r--r--drivers/nfc/nfcmrvl/uart.c4
-rw-r--r--drivers/nfc/nfcmrvl/usb.c2
-rw-r--r--drivers/nfc/nfcsim.c4
-rw-r--r--drivers/nfc/nxp-nci/core.c2
-rw-r--r--drivers/nfc/pn533/pn533.c2
-rw-r--r--drivers/nfc/pn544/i2c.c2
-rw-r--r--drivers/nfc/pn544/pn544.c18
-rw-r--r--drivers/nfc/pn544/pn544.h7
-rw-r--r--drivers/nfc/port100.c47
-rw-r--r--drivers/nfc/s3fwrn5/core.c7
-rw-r--r--drivers/nfc/s3fwrn5/firmware.c12
-rw-r--r--drivers/nfc/s3fwrn5/nci.c8
-rw-r--r--drivers/nfc/s3fwrn5/nci.h2
-rw-r--r--drivers/nfc/st-nci/core.c7
-rw-r--r--drivers/nfc/st-nci/i2c.c2
-rw-r--r--drivers/nfc/st-nci/ndlc.c6
-rw-r--r--drivers/nfc/st-nci/ndlc.h8
-rw-r--r--drivers/nfc/st-nci/spi.c2
-rw-r--r--drivers/nfc/st-nci/vendor_cmds.c2
-rw-r--r--drivers/nfc/st21nfca/core.c7
-rw-r--r--drivers/nfc/st21nfca/i2c.c8
-rw-r--r--drivers/nfc/st21nfca/st21nfca.h4
-rw-r--r--drivers/nfc/st21nfca/vendor_cmds.c2
-rw-r--r--drivers/nfc/st95hf/core.c3
-rw-r--r--drivers/nfc/trf7970a.c19
-rw-r--r--drivers/nfc/virtual_ncidev.c13
-rw-r--r--drivers/nvdimm/namespace_devs.c17
-rw-r--r--drivers/nvme/host/Kconfig4
-rw-r--r--drivers/nvme/host/Makefile1
-rw-r--r--drivers/nvme/host/core.c70
-rw-r--r--drivers/nvme/host/fabrics.c1
-rw-r--r--drivers/nvme/host/ioctl.c4
-rw-r--r--drivers/nvme/host/lightnvm.c1274
-rw-r--r--drivers/nvme/host/multipath.c2
-rw-r--r--drivers/nvme/host/nvme.h79
-rw-r--r--drivers/nvme/host/pci.c187
-rw-r--r--drivers/nvme/host/rdma.c8
-rw-r--r--drivers/nvme/host/tcp.c44
-rw-r--r--drivers/nvme/host/trace.c18
-rw-r--r--drivers/nvme/target/Kconfig2
-rw-r--r--drivers/nvme/target/core.c1
-rw-r--r--drivers/nvme/target/fabrics-cmd.c38
-rw-r--r--drivers/nvme/target/loop.c4
-rw-r--r--drivers/nvme/target/trace.c18
-rw-r--r--drivers/nvme/target/zns.c5
-rw-r--r--drivers/nvmem/Kconfig11
-rw-r--r--drivers/nvmem/Makefile2
-rw-r--r--drivers/nvmem/core.c7
-rw-r--r--drivers/nvmem/nintendo-otp.c124
-rw-r--r--drivers/nvmem/qfprom.c31
-rw-r--r--drivers/opp/core.c23
-rw-r--r--drivers/opp/of.c17
-rw-r--r--drivers/parport/ieee1284_ops.c2
-rw-r--r--drivers/parport/parport_serial.c9
-rw-r--r--drivers/pci/controller/pci-ixp4xx.c8
-rw-r--r--drivers/pci/msi.c398
-rw-r--r--drivers/pci/pci-sysfs.c2
-rw-r--r--drivers/pci/pci.c31
-rw-r--r--drivers/pci/pci.h3
-rw-r--r--drivers/pci/pcie/ptm.c9
-rw-r--r--drivers/pci/quirks.c1
-rw-r--r--drivers/phy/amlogic/phy-meson8b-usb2.c8
-rw-r--r--drivers/phy/cadence/phy-cadence-torrent.c2577
-rw-r--r--drivers/phy/marvell/phy-mvebu-a3700-comphy.c16
-rw-r--r--drivers/phy/marvell/phy-mvebu-cp110-comphy.c16
-rw-r--r--drivers/phy/mediatek/phy-mtk-hdmi.c7
-rw-r--r--drivers/phy/mediatek/phy-mtk-mipi-dsi.c13
-rw-r--r--drivers/phy/mediatek/phy-mtk-tphy.c224
-rw-r--r--drivers/phy/mediatek/phy-mtk-ufs.c44
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp.c321
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp.h18
-rw-r--r--drivers/phy/qualcomm/phy-qcom-usb-hs.c2
-rw-r--r--drivers/phy/renesas/phy-rcar-gen3-usb2.c97
-rw-r--r--drivers/phy/rockchip/phy-rockchip-inno-usb2.c4
-rw-r--r--drivers/phy/samsung/Makefile5
-rw-r--r--drivers/phy/samsung/phy-exynos7-ufs.c (renamed from drivers/phy/samsung/phy-exynos7-ufs.h)8
-rw-r--r--drivers/phy/samsung/phy-exynosautov9-ufs.c67
-rw-r--r--drivers/phy/samsung/phy-samsung-ufs.c3
-rw-r--r--drivers/phy/samsung/phy-samsung-ufs.h13
-rw-r--r--drivers/phy/tegra/xusb.c4
-rw-r--r--drivers/phy/ti/phy-twl4030-usb.c15
-rw-r--r--drivers/phy/xilinx/phy-zynqmp.c3
-rw-r--r--drivers/pinctrl/actions/pinctrl-owl.c5
-rw-r--r--drivers/pinctrl/bcm/pinctrl-bcm2835.c4
-rw-r--r--drivers/pinctrl/bcm/pinctrl-iproc-gpio.c3
-rw-r--r--drivers/pinctrl/bcm/pinctrl-nsp-gpio.c3
-rw-r--r--drivers/pinctrl/intel/pinctrl-baytrail.c7
-rw-r--r--drivers/pinctrl/intel/pinctrl-cherryview.c5
-rw-r--r--drivers/pinctrl/intel/pinctrl-lynxpoint.c8
-rw-r--r--drivers/pinctrl/intel/pinctrl-tigerlake.c26
-rw-r--r--drivers/pinctrl/mediatek/mtk-eint.c5
-rw-r--r--drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c8
-rw-r--r--drivers/pinctrl/nomadik/pinctrl-nomadik.c2
-rw-r--r--drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c2
-rw-r--r--drivers/pinctrl/pinctrl-amd.c9
-rw-r--r--drivers/pinctrl/pinctrl-at91.c6
-rw-r--r--drivers/pinctrl/pinctrl-equilibrium.c2
-rw-r--r--drivers/pinctrl/pinctrl-ingenic.c2
-rw-r--r--drivers/pinctrl/pinctrl-k210.c26
-rw-r--r--drivers/pinctrl/pinctrl-microchip-sgpio.c2
-rw-r--r--drivers/pinctrl/pinctrl-ocelot.c3
-rw-r--r--drivers/pinctrl/pinctrl-oxnas.c2
-rw-r--r--drivers/pinctrl/pinctrl-pic32.c2
-rw-r--r--drivers/pinctrl/pinctrl-pistachio.c2
-rw-r--r--drivers/pinctrl/pinctrl-rockchip.c909
-rw-r--r--drivers/pinctrl/pinctrl-rockchip.h287
-rw-r--r--drivers/pinctrl/pinctrl-single.c4
-rw-r--r--drivers/pinctrl/pinctrl-st.c2
-rw-r--r--drivers/pinctrl/qcom/Kconfig63
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm.c4
-rw-r--r--drivers/pinctrl/samsung/pinctrl-exynos.c15
-rw-r--r--drivers/pinctrl/samsung/pinctrl-s3c24xx.c25
-rw-r--r--drivers/pinctrl/samsung/pinctrl-s3c64xx.c17
-rw-r--r--drivers/pinctrl/spear/pinctrl-plgpio.c3
-rw-r--r--drivers/pinctrl/sunxi/pinctrl-sunxi.c16
-rw-r--r--drivers/platform/x86/Kconfig3
-rw-r--r--drivers/platform/x86/asus-nb-wmi.c28
-rw-r--r--drivers/platform/x86/dual_accel_detect.h76
-rw-r--r--drivers/platform/x86/gigabyte-wmi.c2
-rw-r--r--drivers/platform/x86/intel-hid.c21
-rw-r--r--drivers/platform/x86/intel-vbtn.c18
-rw-r--r--drivers/platform/x86/pcengines-apuv2.c2
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c3
-rw-r--r--drivers/power/reset/Kconfig6
-rw-r--r--drivers/power/reset/Makefile1
-rw-r--r--drivers/power/reset/linkstation-poweroff.c77
-rw-r--r--drivers/power/reset/tps65086-restart.c98
-rw-r--r--drivers/power/supply/Kconfig26
-rw-r--r--drivers/power/supply/Makefile4
-rw-r--r--drivers/power/supply/ab8500-bm.h219
-rw-r--r--drivers/power/supply/ab8500_bmdata.c34
-rw-r--r--drivers/power/supply/ab8500_btemp.c22
-rw-r--r--drivers/power/supply/ab8500_chargalg.c (renamed from drivers/power/supply/abx500_chargalg.c)601
-rw-r--r--drivers/power/supply/ab8500_charger.c4
-rw-r--r--drivers/power/supply/ab8500_fg.c20
-rw-r--r--drivers/power/supply/axp288_charger.c11
-rw-r--r--drivers/power/supply/axp288_fuel_gauge.c489
-rw-r--r--drivers/power/supply/bq24735-charger.c27
-rw-r--r--drivers/power/supply/cros_peripheral_charger.c386
-rw-r--r--drivers/power/supply/cw2015_battery.c4
-rw-r--r--drivers/power/supply/max17042_battery.c61
-rw-r--r--drivers/power/supply/mt6360_charger.c867
-rw-r--r--drivers/power/supply/power_supply_core.c19
-rw-r--r--drivers/power/supply/qcom_smbb.c5
-rw-r--r--drivers/power/supply/rn5t618_power.c38
-rw-r--r--drivers/power/supply/sbs-battery.c16
-rw-r--r--drivers/power/supply/sc27xx_fuel_gauge.c4
-rw-r--r--drivers/power/supply/smb347-charger.c271
-rw-r--r--drivers/powercap/intel_rapl_common.c50
-rw-r--r--drivers/powercap/intel_rapl_msr.c2
-rw-r--r--drivers/pps/clients/pps_parport.c42
-rw-r--r--drivers/ptp/Kconfig23
-rw-r--r--drivers/ptp/ptp_ocp.c1283
-rw-r--r--drivers/ptp/ptp_sysfs.c2
-rw-r--r--drivers/ptp/ptp_vclock.c2
-rw-r--r--drivers/regulator/Kconfig24
-rw-r--r--drivers/regulator/Makefile2
-rw-r--r--drivers/regulator/bd718x7-regulator.c369
-rw-r--r--drivers/regulator/da9063-regulator.c132
-rw-r--r--drivers/regulator/dbx500-prcmu.c4
-rw-r--r--drivers/regulator/devres.c69
-rw-r--r--drivers/regulator/fixed.c5
-rw-r--r--drivers/regulator/hi6421v600-regulator.c50
-rw-r--r--drivers/regulator/irq_helpers.c2
-rw-r--r--drivers/regulator/mt6358-regulator.c87
-rw-r--r--drivers/regulator/mt6359-regulator.c19
-rw-r--r--drivers/regulator/mt6397-regulator.c15
-rw-r--r--drivers/regulator/rt5033-regulator.c21
-rw-r--r--drivers/regulator/rt6245-regulator.c2
-rw-r--r--drivers/regulator/rtq2134-regulator.c373
-rw-r--r--drivers/regulator/rtq6752-regulator.c289
-rw-r--r--drivers/regulator/sy7636a-regulator.c41
-rw-r--r--drivers/regulator/sy8824x.c16
-rw-r--r--drivers/regulator/sy8827n.c14
-rw-r--r--drivers/regulator/tps65910-regulator.c10
-rw-r--r--drivers/regulator/vctrl-regulator.c73
-rw-r--r--drivers/reset/Kconfig2
-rw-r--r--drivers/reset/reset-zynqmp.c3
-rw-r--r--drivers/s390/block/dasd_diag.c2
-rw-r--r--drivers/s390/block/dasd_eckd.c14
-rw-r--r--drivers/s390/block/dasd_fba.c4
-rw-r--r--drivers/s390/block/dasd_genhd.c7
-rw-r--r--drivers/s390/block/dasd_ioctl.c4
-rw-r--r--drivers/s390/block/dcssblk.c3
-rw-r--r--drivers/s390/char/sclp.c230
-rw-r--r--drivers/s390/char/sclp.h2
-rw-r--r--drivers/s390/char/sclp_cmd.c2
-rw-r--r--drivers/s390/char/sclp_config.c4
-rw-r--r--drivers/s390/char/sclp_early_core.c19
-rw-r--r--drivers/s390/char/zcore.c2
-rw-r--r--drivers/s390/cio/ccwgroup.c22
-rw-r--r--drivers/s390/cio/css.c30
-rw-r--r--drivers/s390/cio/qdio.h40
-rw-r--r--drivers/s390/cio/qdio_debug.c3
-rw-r--r--drivers/s390/cio/qdio_main.c331
-rw-r--r--drivers/s390/cio/qdio_setup.c114
-rw-r--r--drivers/s390/crypto/ap_bus.c32
-rw-r--r--drivers/s390/crypto/ap_bus.h13
-rw-r--r--drivers/s390/crypto/ap_queue.c20
-rw-r--r--drivers/s390/crypto/vfio_ap_ops.c116
-rw-r--r--drivers/s390/crypto/zcrypt_api.c4
-rw-r--r--drivers/s390/crypto/zcrypt_card.c8
-rw-r--r--drivers/s390/crypto/zcrypt_ccamisc.c8
-rw-r--r--drivers/s390/crypto/zcrypt_cex2a.c17
-rw-r--r--drivers/s390/crypto/zcrypt_cex2c.c24
-rw-r--r--drivers/s390/crypto/zcrypt_cex4.c38
-rw-r--r--drivers/s390/crypto/zcrypt_queue.c8
-rw-r--r--drivers/s390/net/Kconfig10
-rw-r--r--drivers/s390/net/ctcm_fsms.c2
-rw-r--r--drivers/s390/net/ctcm_mpc.c2
-rw-r--r--drivers/s390/net/qeth_core.h51
-rw-r--r--drivers/s390/net/qeth_core_main.c199
-rw-r--r--drivers/s390/net/qeth_core_mpc.c3
-rw-r--r--drivers/s390/net/qeth_core_mpc.h23
-rw-r--r--drivers/s390/net/qeth_core_sys.c5
-rw-r--r--drivers/s390/net/qeth_ethtool.c11
-rw-r--r--drivers/s390/net/qeth_l2_main.c418
-rw-r--r--drivers/s390/net/qeth_l3_main.c19
-rw-r--r--drivers/s390/scsi/zfcp_qdio.c5
-rw-r--r--drivers/scsi/cxgbi/cxgb4i/Kconfig1
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c3
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.c2
-rw-r--r--drivers/scsi/scsi_sysfs.c9
-rw-r--r--drivers/scsi/sd.c8
-rw-r--r--drivers/scsi/sg.c32
-rw-r--r--drivers/scsi/sr.c7
-rw-r--r--drivers/scsi/st.c49
-rw-r--r--drivers/scsi/st.h2
-rw-r--r--drivers/scsi/storvsc_drv.c14
-rw-r--r--drivers/slimbus/messaging.c7
-rw-r--r--drivers/slimbus/qcom-ngd-ctrl.c22
-rw-r--r--drivers/soc/fsl/qe/qe_ic.c84
-rw-r--r--drivers/soundwire/bus.c14
-rw-r--r--drivers/soundwire/cadence_master.c229
-rw-r--r--drivers/soundwire/cadence_master.h7
-rw-r--r--drivers/soundwire/dmi-quirks.c36
-rw-r--r--drivers/soundwire/intel.c166
-rw-r--r--drivers/soundwire/intel.h1
-rw-r--r--drivers/soundwire/stream.c5
-rw-r--r--drivers/spi/Kconfig12
-rw-r--r--drivers/spi/Makefile1
-rw-r--r--drivers/spi/spi-altera-dfl.c21
-rw-r--r--drivers/spi/spi-bcm2835aux.c4
-rw-r--r--drivers/spi/spi-coldfire-qspi.c2
-rw-r--r--drivers/spi/spi-davinci.c8
-rw-r--r--drivers/spi/spi-ep93xx.c4
-rw-r--r--drivers/spi/spi-fsi.c125
-rw-r--r--drivers/spi/spi-fsl-dspi.c1
-rw-r--r--drivers/spi/spi-geni-qcom.c6
-rw-r--r--drivers/spi/spi-imx.c21
-rw-r--r--drivers/spi/spi-mt65xx.c159
-rw-r--r--drivers/spi/spi-mxic.c44
-rw-r--r--drivers/spi/spi-orion.c22
-rw-r--r--drivers/spi/spi-pic32.c1
-rw-r--r--drivers/spi/spi-pxa2xx.c35
-rw-r--r--drivers/spi/spi-rockchip-sfc.c694
-rw-r--r--drivers/spi/spi-sprd-adi.c287
-rw-r--r--drivers/spi/spi-stm32.c121
-rw-r--r--drivers/spi/spi-tegra114.c8
-rw-r--r--drivers/spi/spi-tegra20-slink.c77
-rw-r--r--drivers/spi/spi-zynq-qspi.c8
-rw-r--r--drivers/spi/spi.c6
-rw-r--r--drivers/spi/spidev.c1
-rw-r--r--drivers/staging/media/atomisp/pci/atomisp_ioctl.c4
-rw-r--r--drivers/staging/media/av7110/av7110.h7
-rw-r--r--drivers/staging/octeon/ethernet.c12
-rw-r--r--drivers/staging/qlge/qlge_ethtool.c10
-rw-r--r--drivers/staging/qlge/qlge_main.c5
-rw-r--r--drivers/staging/rtl8188eu/include/osdep_intf.h2
-rw-r--r--drivers/staging/rtl8188eu/include/rtw_android.h3
-rw-r--r--drivers/staging/rtl8188eu/os_dep/ioctl_linux.c3
-rw-r--r--drivers/staging/rtl8188eu/os_dep/os_intfs.c1
-rw-r--r--drivers/staging/rtl8188eu/os_dep/rtw_android.c14
-rw-r--r--drivers/staging/rtl8723bs/include/osdep_intf.h2
-rw-r--r--drivers/staging/rtl8723bs/os_dep/ioctl_linux.c18
-rw-r--r--drivers/staging/rtl8723bs/os_dep/os_intfs.c1
-rw-r--r--drivers/staging/wlan-ng/p80211netdev.c76
-rw-r--r--drivers/thermal/intel/therm_throt.c7
-rw-r--r--drivers/thermal/intel/thermal_interrupt.h3
-rw-r--r--drivers/tty/synclink_gt.c19
-rw-r--r--drivers/tty/vt/vt.c31
-rw-r--r--drivers/tty/vt/vt_ioctl.c10
-rw-r--r--drivers/usb/dwc3/gadget.c41
-rw-r--r--drivers/usb/gadget/function/u_audio.c23
-rw-r--r--drivers/usb/host/xhci-pci-renesas.c35
-rw-r--r--drivers/usb/serial/ch341.c1
-rw-r--r--drivers/usb/serial/option.c2
-rw-r--r--drivers/usb/typec/tcpm/tcpm.c94
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_main.c4
-rw-r--r--drivers/vdpa/mlx5/core/mr.c9
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.c19
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim.c4
-rw-r--r--drivers/vdpa/virtio_pci/vp_vdpa.c4
-rw-r--r--drivers/vhost/net.c2
-rw-r--r--drivers/vhost/vdpa.c3
-rw-r--r--drivers/vhost/vhost.c10
-rw-r--r--drivers/vhost/vringh.c2
-rw-r--r--drivers/virtio/virtio.c1
-rw-r--r--drivers/virtio/virtio_mem.c9
-rw-r--r--drivers/virtio/virtio_pci_common.c7
-rw-r--r--drivers/virtio/virtio_ring.c18
-rw-r--r--drivers/virtio/virtio_vdpa.c3
-rw-r--r--drivers/xen/events/events_base.c22
-rw-r--r--fs/9p/vfs_file.c20
-rw-r--r--fs/Kconfig18
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/flock.c4
-rw-r--r--fs/aio.c2
-rw-r--r--fs/block_dev.c263
-rw-r--r--fs/btrfs/Makefile1
-rw-r--r--fs/btrfs/acl.c11
-rw-r--r--fs/btrfs/backref.c6
-rw-r--r--fs/btrfs/backref.h2
-rw-r--r--fs/btrfs/block-group.c114
-rw-r--r--fs/btrfs/btrfs_inode.h27
-rw-r--r--fs/btrfs/check-integrity.c60
-rw-r--r--fs/btrfs/compression.c169
-rw-r--r--fs/btrfs/compression.h5
-rw-r--r--fs/btrfs/ctree.c62
-rw-r--r--fs/btrfs/ctree.h94
-rw-r--r--fs/btrfs/delayed-inode.c227
-rw-r--r--fs/btrfs/dir-item.c76
-rw-r--r--fs/btrfs/disk-io.c13
-rw-r--r--fs/btrfs/extent-tree.c12
-rw-r--r--fs/btrfs/extent_io.c318
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file-item.c5
-rw-r--r--fs/btrfs/file.c23
-rw-r--r--fs/btrfs/free-space-cache.c26
-rw-r--r--fs/btrfs/inode.c312
-rw-r--r--fs/btrfs/ioctl.c188
-rw-r--r--fs/btrfs/lzo.c236
-rw-r--r--fs/btrfs/ordered-data.c5
-rw-r--r--fs/btrfs/ordered-data.h2
-rw-r--r--fs/btrfs/qgroup.c8
-rw-r--r--fs/btrfs/raid56.c18
-rw-r--r--fs/btrfs/ref-verify.c10
-rw-r--r--fs/btrfs/relocation.c306
-rw-r--r--fs/btrfs/send.c35
-rw-r--r--fs/btrfs/space-info.c98
-rw-r--r--fs/btrfs/struct-funcs.c8
-rw-r--r--fs/btrfs/subpage.c24
-rw-r--r--fs/btrfs/subpage.h3
-rw-r--r--fs/btrfs/super.c56
-rw-r--r--fs/btrfs/sysfs.c108
-rw-r--r--fs/btrfs/tests/qgroup-tests.c30
-rw-r--r--fs/btrfs/tree-checker.c38
-rw-r--r--fs/btrfs/tree-log.c102
-rw-r--r--fs/btrfs/verity.c811
-rw-r--r--fs/btrfs/volumes.c234
-rw-r--r--fs/btrfs/volumes.h29
-rw-r--r--fs/btrfs/zlib.c54
-rw-r--r--fs/btrfs/zoned.c22
-rw-r--r--fs/btrfs/zoned.h1
-rw-r--r--fs/btrfs/zstd.c39
-rw-r--r--fs/buffer.c4
-rw-r--r--fs/ceph/addr.c9
-rw-r--r--fs/ceph/caps.c38
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/locks.c3
-rw-r--r--fs/ceph/mds_client.c32
-rw-r--r--fs/ceph/mdsmap.c8
-rw-r--r--fs/ceph/snap.c37
-rw-r--r--fs/ceph/super.h5
-rw-r--r--fs/cifs/Kconfig30
-rw-r--r--fs/cifs/cifs_debug.c11
-rw-r--r--fs/cifs/cifs_swn.c2
-rw-r--r--fs/cifs/cifs_unicode.c9
-rw-r--r--fs/cifs/cifsencrypt.c89
-rw-r--r--fs/cifs/cifsfs.c8
-rw-r--r--fs/cifs/cifsglob.h37
-rw-r--r--fs/cifs/cifspdu.h28
-rw-r--r--fs/cifs/cifsproto.h10
-rw-r--r--fs/cifs/cifssmb.c107
-rw-r--r--fs/cifs/connect.c32
-rw-r--r--fs/cifs/dir.c2
-rw-r--r--fs/cifs/file.c50
-rw-r--r--fs/cifs/fs_context.c25
-rw-r--r--fs/cifs/fs_context.h3
-rw-r--r--fs/cifs/fscache.c41
-rw-r--r--fs/cifs/fscache.h23
-rw-r--r--fs/cifs/inode.c25
-rw-r--r--fs/cifs/misc.c50
-rw-r--r--fs/cifs/readdir.c23
-rw-r--r--fs/cifs/sess.c257
-rw-r--r--fs/cifs/smb2maperror.c1
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/cifs/smb2pdu.c2
-rw-r--r--fs/cifs/smbencrypt.c139
-rw-r--r--fs/cifs_common/Makefile7
-rw-r--r--fs/cifs_common/arc4.h23
-rw-r--r--fs/cifs_common/cifs_arc4.c87
-rw-r--r--fs/cifs_common/cifs_md4.c197
-rw-r--r--fs/cifs_common/md4.h27
-rw-r--r--fs/configfs/file.c18
-rw-r--r--fs/crypto/fname.c106
-rw-r--r--fs/crypto/hooks.c44
-rw-r--r--fs/dax.c580
-rw-r--r--fs/eventfd.c12
-rw-r--r--fs/exec.c8
-rw-r--r--fs/ext2/Kconfig1
-rw-r--r--fs/ext2/ext2.h11
-rw-r--r--fs/ext2/file.c7
-rw-r--r--fs/ext2/inode.c27
-rw-r--r--fs/ext2/super.c3
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/extents.c25
-rw-r--r--fs/ext4/file.c13
-rw-r--r--fs/ext4/inode.c47
-rw-r--r--fs/ext4/ioctl.c4
-rw-r--r--fs/ext4/super.c13
-rw-r--r--fs/ext4/symlink.c12
-rw-r--r--fs/ext4/truncate.h8
-rw-r--r--fs/f2fs/data.c8
-rw-r--r--fs/f2fs/f2fs.h1
-rw-r--r--fs/f2fs/file.c62
-rw-r--r--fs/f2fs/namei.c12
-rw-r--r--fs/f2fs/super.c1
-rw-r--r--fs/f2fs/sysfs.c2
-rw-r--r--fs/fat/fatent.c1
-rw-r--r--fs/fcntl.c18
-rw-r--r--fs/file.c64
-rw-r--r--fs/fs-writeback.c19
-rw-r--r--fs/fuse/dax.c56
-rw-r--r--fs/fuse/dir.c11
-rw-r--r--fs/fuse/file.c10
-rw-r--r--fs/fuse/fuse_i.h7
-rw-r--r--fs/fuse/inode.c1
-rw-r--r--fs/gfs2/aops.c9
-rw-r--r--fs/gfs2/bmap.c5
-rw-r--r--fs/gfs2/file.c3
-rw-r--r--fs/gfs2/glock.c13
-rw-r--r--fs/gfs2/glops.c27
-rw-r--r--fs/gfs2/incore.h2
-rw-r--r--fs/gfs2/lock_dlm.c5
-rw-r--r--fs/gfs2/log.c2
-rw-r--r--fs/gfs2/lops.c44
-rw-r--r--fs/gfs2/meta_io.c7
-rw-r--r--fs/gfs2/ops_fstype.c53
-rw-r--r--fs/gfs2/super.c107
-rw-r--r--fs/gfs2/super.h3
-rw-r--r--fs/gfs2/util.c3
-rw-r--r--fs/gfs2/util.h5
-rw-r--r--fs/hpfs/Kconfig1
-rw-r--r--fs/hpfs/file.c51
-rw-r--r--fs/inode.c2
-rw-r--r--fs/internal.h12
-rw-r--r--fs/io-wq.c232
-rw-r--r--fs/io-wq.h3
-rw-r--r--fs/io_uring.c2003
-rw-r--r--fs/ioctl.c211
-rw-r--r--fs/iomap/Makefile2
-rw-r--r--fs/iomap/apply.c99
-rw-r--r--fs/iomap/buffered-io.c512
-rw-r--r--fs/iomap/direct-io.c172
-rw-r--r--fs/iomap/fiemap.c101
-rw-r--r--fs/iomap/iter.c80
-rw-r--r--fs/iomap/seek.c98
-rw-r--r--fs/iomap/swapfile.c44
-rw-r--r--fs/iomap/trace.h61
-rw-r--r--fs/isofs/inode.c27
-rw-r--r--fs/isofs/isofs.h1
-rw-r--r--fs/isofs/joliet.c4
-rw-r--r--fs/ksmbd/Kconfig68
-rw-r--r--fs/ksmbd/Makefile20
-rw-r--r--fs/ksmbd/asn1.c343
-rw-r--r--fs/ksmbd/asn1.h21
-rw-r--r--fs/ksmbd/auth.c1364
-rw-r--r--fs/ksmbd/auth.h67
-rw-r--r--fs/ksmbd/connection.c413
-rw-r--r--fs/ksmbd/connection.h213
-rw-r--r--fs/ksmbd/crypto_ctx.c282
-rw-r--r--fs/ksmbd/crypto_ctx.h74
-rw-r--r--fs/ksmbd/glob.h49
-rw-r--r--fs/ksmbd/ksmbd_netlink.h395
-rw-r--r--fs/ksmbd/ksmbd_spnego_negtokeninit.asn131
-rw-r--r--fs/ksmbd/ksmbd_spnego_negtokentarg.asn119
-rw-r--r--fs/ksmbd/ksmbd_work.c80
-rw-r--r--fs/ksmbd/ksmbd_work.h117
-rw-r--r--fs/ksmbd/mgmt/ksmbd_ida.c46
-rw-r--r--fs/ksmbd/mgmt/ksmbd_ida.h34
-rw-r--r--fs/ksmbd/mgmt/share_config.c238
-rw-r--r--fs/ksmbd/mgmt/share_config.h81
-rw-r--r--fs/ksmbd/mgmt/tree_connect.c121
-rw-r--r--fs/ksmbd/mgmt/tree_connect.h56
-rw-r--r--fs/ksmbd/mgmt/user_config.c69
-rw-r--r--fs/ksmbd/mgmt/user_config.h66
-rw-r--r--fs/ksmbd/mgmt/user_session.c369
-rw-r--r--fs/ksmbd/mgmt/user_session.h106
-rw-r--r--fs/ksmbd/misc.c338
-rw-r--r--fs/ksmbd/misc.h35
-rw-r--r--fs/ksmbd/ndr.c345
-rw-r--r--fs/ksmbd/ndr.h22
-rw-r--r--fs/ksmbd/nterr.h543
-rw-r--r--fs/ksmbd/ntlmssp.h169
-rw-r--r--fs/ksmbd/oplock.c1709
-rw-r--r--fs/ksmbd/oplock.h131
-rw-r--r--fs/ksmbd/server.c633
-rw-r--r--fs/ksmbd/server.h70
-rw-r--r--fs/ksmbd/smb2misc.c438
-rw-r--r--fs/ksmbd/smb2ops.c312
-rw-r--r--fs/ksmbd/smb2pdu.c8373
-rw-r--r--fs/ksmbd/smb2pdu.h1698
-rw-r--r--fs/ksmbd/smb_common.c674
-rw-r--r--fs/ksmbd/smb_common.h542
-rw-r--r--fs/ksmbd/smbacl.c1366
-rw-r--r--fs/ksmbd/smbacl.h212
-rw-r--r--fs/ksmbd/smbfsctl.h91
-rw-r--r--fs/ksmbd/smbstatus.h1822
-rw-r--r--fs/ksmbd/transport_ipc.c874
-rw-r--r--fs/ksmbd/transport_ipc.h47
-rw-r--r--fs/ksmbd/transport_rdma.c2058
-rw-r--r--fs/ksmbd/transport_rdma.h63
-rw-r--r--fs/ksmbd/transport_tcp.c618
-rw-r--r--fs/ksmbd/transport_tcp.h13
-rw-r--r--fs/ksmbd/unicode.c384
-rw-r--r--fs/ksmbd/unicode.h357
-rw-r--r--fs/ksmbd/uniupr.h268
-rw-r--r--fs/ksmbd/vfs.c1895
-rw-r--r--fs/ksmbd/vfs.h197
-rw-r--r--fs/ksmbd/vfs_cache.c725
-rw-r--r--fs/ksmbd/vfs_cache.h178
-rw-r--r--fs/ksmbd/xattr.h122
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/lockd/svc4proc.c7
-rw-r--r--fs/lockd/svclock.c82
-rw-r--r--fs/lockd/svcproc.c6
-rw-r--r--fs/lockd/svcsubs.c114
-rw-r--r--fs/locks.c117
-rw-r--r--fs/namei.c286
-rw-r--r--fs/namespace.c144
-rw-r--r--fs/nfs/export.c2
-rw-r--r--fs/nfs/file.c5
-rw-r--r--fs/nfsd/lockd.c8
-rw-r--r--fs/nfsd/nfs4state.c34
-rw-r--r--fs/nfsd/nfsproc.c1
-rw-r--r--fs/nfsd/trace.h17
-rw-r--r--fs/nfsd/vfs.c44
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/notify/fanotify/fanotify_user.c268
-rw-r--r--fs/notify/fsnotify.c6
-rw-r--r--fs/notify/fsnotify.h15
-rw-r--r--fs/notify/inotify/inotify_user.c17
-rw-r--r--fs/notify/mark.c52
-rw-r--r--fs/ocfs2/locks.c4
-rw-r--r--fs/open.c8
-rw-r--r--fs/overlayfs/export.c2
-rw-r--r--fs/overlayfs/file.c47
-rw-r--r--fs/overlayfs/readdir.c5
-rw-r--r--fs/pipe.c33
-rw-r--r--fs/read_write.c17
-rw-r--r--fs/remap_range.c12
-rw-r--r--fs/squashfs/block.c7
-rw-r--r--fs/squashfs/lz4_wrapper.c2
-rw-r--r--fs/squashfs/lzo_wrapper.c2
-rw-r--r--fs/squashfs/xz_wrapper.c2
-rw-r--r--fs/squashfs/zlib_wrapper.c2
-rw-r--r--fs/squashfs/zstd_wrapper.c2
-rw-r--r--fs/super.c2
-rw-r--r--fs/timerfd.c16
-rw-r--r--fs/ubifs/file.c13
-rw-r--r--fs/udf/dir.c5
-rw-r--r--fs/udf/ecma_167.h44
-rw-r--r--fs/udf/inode.c3
-rw-r--r--fs/udf/misc.c13
-rw-r--r--fs/udf/namei.c13
-rw-r--r--fs/udf/osta_udf.h22
-rw-r--r--fs/udf/super.c75
-rw-r--r--fs/udf/udf_sb.h2
-rw-r--r--fs/udf/udfdecl.h4
-rw-r--r--fs/udf/unicode.c4
-rw-r--r--fs/xfs/xfs_bmap_util.c15
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_file.c13
-rw-r--r--fs/xfs/xfs_inode.c121
-rw-r--r--fs/xfs/xfs_inode.h3
-rw-r--r--fs/xfs/xfs_super.c2
-rw-r--r--fs/zonefs/super.c23
-rw-r--r--fs/zonefs/zonefs.h7
-rw-r--r--include/acpi/acnames.h1
-rw-r--r--include/acpi/acoutput.h2
-rw-r--r--include/acpi/acpi_io.h8
-rw-r--r--include/acpi/acpixf.h2
-rw-r--r--include/acpi/actbl1.h16
-rw-r--r--include/acpi/actbl2.h170
-rw-r--r--include/acpi/actbl3.h4
-rw-r--r--include/asm-generic/atomic-long.h1014
-rw-r--r--include/asm-generic/bitops/atomic.h32
-rw-r--r--include/asm-generic/bitops/lock.h39
-rw-r--r--include/asm-generic/bitops/non-atomic.h39
-rw-r--r--include/asm-generic/compat.h17
-rw-r--r--include/asm-generic/vmlinux.lds.h1
-rw-r--r--include/crypto/public_key.h4
-rw-r--r--include/crypto/sm4.h25
-rw-r--r--include/dt-bindings/clock/ingenic,sysost.h19
-rw-r--r--include/dt-bindings/interconnect/qcom,sc8180x.h185
-rw-r--r--include/dt-bindings/power/summit,smb347-charger.h4
-rw-r--r--include/linux/acpi.h12
-rw-r--r--include/linux/atomic.h7
-rw-r--r--include/linux/atomic/atomic-arch-fallback.h (renamed from include/linux/atomic-arch-fallback.h)0
-rw-r--r--include/linux/atomic/atomic-instrumented.h (renamed from include/asm-generic/atomic-instrumented.h)586
-rw-r--r--include/linux/atomic/atomic-long.h1014
-rw-r--r--include/linux/backing-dev.h2
-rw-r--r--include/linux/bio.h57
-rw-r--r--include/linux/bitops.h50
-rw-r--r--include/linux/blk-cgroup.h4
-rw-r--r--include/linux/blk-mq.h16
-rw-r--r--include/linux/blk_types.h6
-rw-r--r--include/linux/blkdev.h38
-rw-r--r--include/linux/bpf-cgroup.h230
-rw-r--r--include/linux/bpf.h306
-rw-r--r--include/linux/bpf_types.h3
-rw-r--r--include/linux/bpf_verifier.h19
-rw-r--r--include/linux/bpfptr.h12
-rw-r--r--include/linux/btf.h1
-rw-r--r--include/linux/btf_ids.h9
-rw-r--r--include/linux/bvec.h64
-rw-r--r--include/linux/can/bittiming.h4
-rw-r--r--include/linux/can/dev.h8
-rw-r--r--include/linux/can/platform/flexcan.h23
-rw-r--r--include/linux/can/rx-offload.h8
-rw-r--r--include/linux/cmdline-parser.h46
-rw-r--r--include/linux/compat.h32
-rw-r--r--include/linux/coresight.h9
-rw-r--r--include/linux/cpuhotplug.h3
-rw-r--r--include/linux/cpuset.h8
-rw-r--r--include/linux/debug_locks.h3
-rw-r--r--include/linux/device-mapper.h7
-rw-r--r--include/linux/device.h2
-rw-r--r--include/linux/dfl.h1
-rw-r--r--include/linux/dsa/8021q.h44
-rw-r--r--include/linux/dsa/sja1105.h22
-rw-r--r--include/linux/edac.h3
-rw-r--r--include/linux/energy_model.h16
-rw-r--r--include/linux/errno.h1
-rw-r--r--include/linux/ethtool.h26
-rw-r--r--include/linux/eventfd.h11
-rw-r--r--include/linux/exportfs.h2
-rw-r--r--include/linux/fanotify.h3
-rw-r--r--include/linux/fiemap.h4
-rw-r--r--include/linux/filter.h80
-rw-r--r--include/linux/firmware/xlnx-zynqmp.h10
-rw-r--r--include/linux/fpga/fpga-mgr.h2
-rw-r--r--include/linux/fs.h141
-rw-r--r--include/linux/fscrypt.h116
-rw-r--r--include/linux/fsl/mc.h3
-rw-r--r--include/linux/fsnotify.h9
-rw-r--r--include/linux/ftrace.h16
-rw-r--r--include/linux/genetlink.h23
-rw-r--r--include/linux/genhd.h70
-rw-r--r--include/linux/hdlc.h4
-rw-r--r--include/linux/hdlcdrv.h2
-rw-r--r--include/linux/hrtimer.h8
-rw-r--r--include/linux/ieee80211.h106
-rw-r--r--include/linux/if_bridge.h40
-rw-r--r--include/linux/igmp.h3
-rw-r--r--include/linux/inetdevice.h11
-rw-r--r--include/linux/interrupt.h8
-rw-r--r--include/linux/io_uring.h11
-rw-r--r--include/linux/ioam6.h13
-rw-r--r--include/linux/ioam6_genl.h13
-rw-r--r--include/linux/ioam6_iptunnel.h13
-rw-r--r--include/linux/iomap.h91
-rw-r--r--include/linux/ioprio.h44
-rw-r--r--include/linux/ipv6.h3
-rw-r--r--include/linux/irq.h2
-rw-r--r--include/linux/iscsi_ibft.h18
-rw-r--r--include/linux/kfence.h7
-rw-r--r--include/linux/leds.h12
-rw-r--r--include/linux/libata.h7
-rw-r--r--include/linux/lightnvm.h697
-rw-r--r--include/linux/linear_range.h2
-rw-r--r--include/linux/local_lock_internal.h86
-rw-r--r--include/linux/lockd/bind.h3
-rw-r--r--include/linux/lockd/lockd.h11
-rw-r--r--include/linux/mei_cl_bus.h9
-rw-r--r--include/linux/memcontrol.h32
-rw-r--r--include/linux/mfd/rt5033-private.h4
-rw-r--r--include/linux/mhi.h11
-rw-r--r--include/linux/mii.h2
-rw-r--r--include/linux/mlx5/device.h71
-rw-r--r--include/linux/mlx5/driver.h21
-rw-r--r--include/linux/mlx5/eswitch.h16
-rw-r--r--include/linux/mlx5/fs.h2
-rw-r--r--include/linux/mlx5/mlx5_ifc.h25
-rw-r--r--include/linux/mlx5/mlx5_ifc_vdpa.h10
-rw-r--r--include/linux/mm_types.h18
-rw-r--r--include/linux/mmc/card.h1
-rw-r--r--include/linux/mmc/core.h3
-rw-r--r--include/linux/mmc/host.h3
-rw-r--r--include/linux/mmc/sdio_ids.h1
-rw-r--r--include/linux/mmu_context.h14
-rw-r--r--include/linux/moduleparam.h2
-rw-r--r--include/linux/msi.h14
-rw-r--r--include/linux/mutex.h92
-rw-r--r--include/linux/namei.h1
-rw-r--r--include/linux/netdevice.h92
-rw-r--r--include/linux/netfilter/ipset/ip_set.h3
-rw-r--r--include/linux/netfilter/x_tables.h6
-rw-r--r--include/linux/netfilter_bridge/ebtables.h2
-rw-r--r--include/linux/notifier.h2
-rw-r--r--include/linux/oid_registry.h7
-rw-r--r--include/linux/once.h4
-rw-r--r--include/linux/padata.h3
-rw-r--r--include/linux/pci.h10
-rw-r--r--include/linux/pci_ids.h3
-rw-r--r--include/linux/perf_event.h1
-rw-r--r--include/linux/phy.h1
-rw-r--r--include/linux/pid.h1
-rw-r--r--include/linux/pipe_fs_i.h2
-rw-r--r--include/linux/platform_data/cros_ec_commands.h67
-rw-r--r--include/linux/platform_data/mmc-esdhc-imx.h42
-rw-r--r--include/linux/platform_data/spi-mt65xx.h1
-rw-r--r--include/linux/pm_domain.h1
-rw-r--r--include/linux/posix-timers.h11
-rw-r--r--include/linux/power/max17042_battery.h12
-rw-r--r--include/linux/power_supply.h1
-rw-r--r--include/linux/preempt.h4
-rw-r--r--include/linux/property.h2
-rw-r--r--include/linux/ptp_clock_kernel.h48
-rw-r--r--include/linux/rbtree.h31
-rw-r--r--include/linux/rbtree_types.h34
-rw-r--r--include/linux/rculist.h35
-rw-r--r--include/linux/rcupdate.h4
-rw-r--r--include/linux/rcutiny.h3
-rw-r--r--include/linux/regmap.h7
-rw-r--r--include/linux/regulator/consumer.h15
-rw-r--r--include/linux/regulator/driver.h16
-rw-r--r--include/linux/regulator/machine.h2
-rw-r--r--include/linux/resctrl.h184
-rw-r--r--include/linux/rtmutex.h63
-rw-r--r--include/linux/rwbase_rt.h39
-rw-r--r--include/linux/rwlock_rt.h140
-rw-r--r--include/linux/rwlock_types.h53
-rw-r--r--include/linux/rwsem.h78
-rw-r--r--include/linux/sched.h157
-rw-r--r--include/linux/sched/signal.h6
-rw-r--r--include/linux/sched/sysctl.h18
-rw-r--r--include/linux/sched/wake_q.h7
-rw-r--r--include/linux/security.h3
-rw-r--r--include/linux/skbuff.h21
-rw-r--r--include/linux/socket.h9
-rw-r--r--include/linux/soundwire/sdw.h3
-rw-r--r--include/linux/spi/spi.h26
-rw-r--r--include/linux/spinlock.h15
-rw-r--r--include/linux/spinlock_api_smp.h3
-rw-r--r--include/linux/spinlock_rt.h159
-rw-r--r--include/linux/spinlock_types.h89
-rw-r--r--include/linux/spinlock_types_raw.h73
-rw-r--r--include/linux/srcutiny.h8
-rw-r--r--include/linux/ssb/ssb.h2
-rw-r--r--include/linux/ssb/ssb_driver_extif.h2
-rw-r--r--include/linux/static_call.h33
-rw-r--r--include/linux/sunrpc/msg_prot.h1
-rw-r--r--include/linux/sunrpc/svc.h5
-rw-r--r--include/linux/sunrpc/svc_rdma.h7
-rw-r--r--include/linux/sunrpc/xdr.h1
-rw-r--r--include/linux/sunrpc/xprt.h18
-rw-r--r--include/linux/sysctl.h2
-rw-r--r--include/linux/trace_events.h7
-rw-r--r--include/linux/typecheck.h9
-rw-r--r--include/linux/units.h16
-rw-r--r--include/linux/vdpa.h11
-rw-r--r--include/linux/virtio.h1
-rw-r--r--include/linux/vringh.h1
-rw-r--r--include/linux/wait.h2
-rw-r--r--include/linux/workqueue.h15
-rw-r--r--include/linux/writeback.h5
-rw-r--r--include/linux/ww_mutex.h50
-rw-r--r--include/net/Space.h10
-rw-r--r--include/net/act_api.h22
-rw-r--r--include/net/af_unix.h19
-rw-r--r--include/net/ax88796.h3
-rw-r--r--include/net/bluetooth/hci_core.h21
-rw-r--r--include/net/bond_3ad.h1
-rw-r--r--include/net/bond_options.h1
-rw-r--r--include/net/bonding.h14
-rw-r--r--include/net/cfg80211.h92
-rw-r--r--include/net/compat.h27
-rw-r--r--include/net/devlink.h58
-rw-r--r--include/net/dn_fib.h2
-rw-r--r--include/net/dsa.h72
-rw-r--r--include/net/dst.h2
-rw-r--r--include/net/flow_offload.h13
-rw-r--r--include/net/ieee80211_radiotap.h5
-rw-r--r--include/net/if_inet6.h5
-rw-r--r--include/net/inet_hashtables.h6
-rw-r--r--include/net/ioam6.h67
-rw-r--r--include/net/ip.h22
-rw-r--r--include/net/ip6_fib.h4
-rw-r--r--include/net/ip6_route.h5
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--include/net/ip_tunnels.h3
-rw-r--r--include/net/ipx.h171
-rw-r--r--include/net/lwtunnel.h3
-rw-r--r--include/net/mac80211.h41
-rw-r--r--include/net/mctp.h232
-rw-r--r--include/net/mctpdevice.h35
-rw-r--r--include/net/mptcp.h29
-rw-r--r--include/net/net_namespace.h6
-rw-r--r--include/net/netfilter/nf_conntrack_ecache.h32
-rw-r--r--include/net/netfilter/nf_hooks_lwtunnel.h7
-rw-r--r--include/net/netfilter/nf_queue.h4
-rw-r--r--include/net/netlink.h2
-rw-r--r--include/net/netns/conntrack.h3
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--include/net/netns/ipv6.h3
-rw-r--r--include/net/netns/mctp.h36
-rw-r--r--include/net/netns/netfilter.h1
-rw-r--r--include/net/netns/x_tables.h12
-rw-r--r--include/net/netns/xfrm.h7
-rw-r--r--include/net/nfc/digital.h4
-rw-r--r--include/net/nfc/hci.h6
-rw-r--r--include/net/nfc/nci_core.h30
-rw-r--r--include/net/nfc/nfc.h16
-rw-r--r--include/net/page_pool.h68
-rw-r--r--include/net/pkt_cls.h27
-rw-r--r--include/net/psample.h2
-rw-r--r--include/net/rtnetlink.h3
-rw-r--r--include/net/sch_generic.h2
-rw-r--r--include/net/sock.h18
-rw-r--r--include/net/switchdev.h108
-rw-r--r--include/net/tcp.h1
-rw-r--r--include/net/xdp.h5
-rw-r--r--include/net/xfrm.h36
-rw-r--r--include/soc/mscc/ocelot.h26
-rw-r--r--include/trace/events/btrfs.h21
-rw-r--r--include/trace/events/kyber.h6
-rw-r--r--include/trace/events/mmflags.h4
-rw-r--r--include/trace/events/qdisc.h2
-rw-r--r--include/trace/events/sunrpc.h8
-rw-r--r--include/trace/trace_events.h22
-rw-r--r--include/uapi/asm-generic/socket.h2
-rw-r--r--include/uapi/linux/bpf.h119
-rw-r--r--include/uapi/linux/btrfs.h1
-rw-r--r--include/uapi/linux/btrfs_tree.h35
-rw-r--r--include/uapi/linux/can/j1939.h9
-rw-r--r--include/uapi/linux/dm-ioctl.h6
-rw-r--r--include/uapi/linux/dvb/audio.h (renamed from drivers/staging/media/av7110/audio.h)0
-rw-r--r--include/uapi/linux/dvb/osd.h (renamed from drivers/staging/media/av7110/osd.h)0
-rw-r--r--include/uapi/linux/dvb/video.h (renamed from drivers/staging/media/av7110/video.h)0
-rw-r--r--include/uapi/linux/ethtool.h2
-rw-r--r--include/uapi/linux/ethtool_netlink.h2
-rw-r--r--include/uapi/linux/fanotify.h13
-rw-r--r--include/uapi/linux/fs.h1
-rw-r--r--include/uapi/linux/if_arp.h1
-rw-r--r--include/uapi/linux/if_bridge.h46
-rw-r--r--include/uapi/linux/if_ether.h3
-rw-r--r--include/uapi/linux/if_link.h13
-rw-r--r--include/uapi/linux/in.h42
-rw-r--r--include/uapi/linux/in6.h1
-rw-r--r--include/uapi/linux/io_uring.h22
-rw-r--r--include/uapi/linux/ioam6.h133
-rw-r--r--include/uapi/linux/ioam6_genl.h52
-rw-r--r--include/uapi/linux/ioam6_iptunnel.h20
-rw-r--r--include/uapi/linux/ioprio.h52
-rw-r--r--include/uapi/linux/ipv6.h3
-rw-r--r--include/uapi/linux/ipx.h87
-rw-r--r--include/uapi/linux/lightnvm.h224
-rw-r--r--include/uapi/linux/lwtunnel.h1
-rw-r--r--include/uapi/linux/mctp.h36
-rw-r--r--include/uapi/linux/mount.h3
-rw-r--r--include/uapi/linux/mptcp.h1
-rw-r--r--include/uapi/linux/neighbour.h7
-rw-r--r--include/uapi/linux/netfilter/nfnetlink_conntrack.h1
-rw-r--r--include/uapi/linux/netfilter/nfnetlink_hook.h9
-rw-r--r--include/uapi/linux/nfsd/nfsfh.h1
-rw-r--r--include/uapi/linux/nl80211-vnd-intel.h77
-rw-r--r--include/uapi/linux/nl80211.h43
-rw-r--r--include/uapi/linux/openvswitch.h8
-rw-r--r--include/uapi/linux/pkt_cls.h1
-rw-r--r--include/uapi/linux/prctl.h1
-rw-r--r--include/uapi/linux/socket.h5
-rw-r--r--include/uapi/linux/tc_act/tc_skbmod.h1
-rw-r--r--include/uapi/linux/virtio_i2c.h41
-rw-r--r--include/uapi/linux/virtio_ids.h1
-rw-r--r--include/uapi/linux/xfrm.h11
-rw-r--r--init/do_mounts.c4
-rw-r--r--init/init_task.c1
-rw-r--r--init/main.c15
-rw-r--r--kernel/Kconfig.locks2
-rw-r--r--kernel/audit.h5
-rw-r--r--kernel/audit_tree.c2
-rw-r--r--kernel/bpf/Kconfig2
-rw-r--r--kernel/bpf/arraymap.c21
-rw-r--r--kernel/bpf/bpf_iter.c24
-rw-r--r--kernel/bpf/bpf_struct_ops.c22
-rw-r--r--kernel/bpf/bpf_task_storage.c6
-rw-r--r--kernel/bpf/btf.c84
-rw-r--r--kernel/bpf/cgroup.c198
-rw-r--r--kernel/bpf/core.c38
-rw-r--r--kernel/bpf/cpumap.c116
-rw-r--r--kernel/bpf/devmap.c118
-rw-r--r--kernel/bpf/hashtab.c109
-rw-r--r--kernel/bpf/helpers.c402
-rw-r--r--kernel/bpf/local_storage.c20
-rw-r--r--kernel/bpf/map_in_map.c8
-rw-r--r--kernel/bpf/stackmap.c4
-rw-r--r--kernel/bpf/syscall.c220
-rw-r--r--kernel/bpf/task_iter.c11
-rw-r--r--kernel/bpf/trampoline.c14
-rw-r--r--kernel/bpf/verifier.c394
-rw-r--r--kernel/cfi.c8
-rw-r--r--kernel/cgroup/cgroup-v1.c8
-rw-r--r--kernel/cgroup/cgroup.c27
-rw-r--r--kernel/cgroup/cpuset.c221
-rw-r--r--kernel/cgroup/rstat.c19
-rw-r--r--kernel/cpu.c84
-rw-r--r--kernel/cpu_pm.c50
-rw-r--r--kernel/cred.c12
-rw-r--r--kernel/events/core.c77
-rw-r--r--kernel/events/hw_breakpoint.c4
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c11
-rw-r--r--kernel/futex.c556
-rw-r--r--kernel/irq/affinity.c8
-rw-r--r--kernel/irq/chip.c5
-rw-r--r--kernel/irq/cpuhotplug.c2
-rw-r--r--kernel/irq/generic-chip.c17
-rw-r--r--kernel/irq/ipi.c32
-rw-r--r--kernel/irq/irqdesc.c2
-rw-r--r--kernel/irq/irqdomain.c1
-rw-r--r--kernel/irq/manage.c19
-rw-r--r--kernel/irq/matrix.c3
-rw-r--r--kernel/irq/msi.c166
-rw-r--r--kernel/irq/pm.c2
-rw-r--r--kernel/irq/proc.c2
-rw-r--r--kernel/irq/timings.c7
-rw-r--r--kernel/kcsan/debugfs.c2
-rw-r--r--kernel/locking/Makefile3
-rw-r--r--kernel/locking/locktorture.c25
-rw-r--r--kernel/locking/mutex-debug.c5
-rw-r--r--kernel/locking/mutex-debug.h29
-rw-r--r--kernel/locking/mutex.c541
-rw-r--r--kernel/locking/mutex.h48
-rw-r--r--kernel/locking/rtmutex.c1172
-rw-r--r--kernel/locking/rtmutex_api.c590
-rw-r--r--kernel/locking/rtmutex_common.h135
-rw-r--r--kernel/locking/rwbase_rt.c263
-rw-r--r--kernel/locking/rwsem.c115
-rw-r--r--kernel/locking/semaphore.c4
-rw-r--r--kernel/locking/spinlock.c7
-rw-r--r--kernel/locking/spinlock_debug.c5
-rw-r--r--kernel/locking/spinlock_rt.c263
-rw-r--r--kernel/locking/ww_mutex.h569
-rw-r--r--kernel/locking/ww_rt_mutex.c76
-rw-r--r--kernel/notifier.c19
-rw-r--r--kernel/padata.c35
-rw-r--r--kernel/params.c18
-rw-r--r--kernel/pid.c15
-rw-r--r--kernel/power/energy_model.c4
-rw-r--r--kernel/power/main.c2
-rw-r--r--kernel/power/suspend.c4
-rw-r--r--kernel/power/suspend_test.c2
-rw-r--r--kernel/rcu/rcuscale.c4
-rw-r--r--kernel/rcu/rcutorture.c7
-rw-r--r--kernel/rcu/refscale.c36
-rw-r--r--kernel/rcu/srcutiny.c2
-rw-r--r--kernel/rcu/tasks.h36
-rw-r--r--kernel/rcu/tree.c107
-rw-r--r--kernel/rcu/tree_nocb.h1496
-rw-r--r--kernel/rcu/tree_plugin.h1512
-rw-r--r--kernel/rcu/tree_stall.h111
-rw-r--r--kernel/scftorture.c78
-rw-r--r--kernel/sched/core.c703
-rw-r--r--kernel/sched/cpufreq_schedutil.c16
-rw-r--r--kernel/sched/deadline.c8
-rw-r--r--kernel/sched/debug.c10
-rw-r--r--kernel/sched/fair.c211
-rw-r--r--kernel/sched/sched.h31
-rw-r--r--kernel/sched/topology.c65
-rw-r--r--kernel/seccomp.c2
-rw-r--r--kernel/signal.c15
-rw-r--r--kernel/smp.c14
-rw-r--r--kernel/smpboot.c8
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/sys.c3
-rw-r--r--kernel/sysctl.c42
-rw-r--r--kernel/time/clocksource-wdtest.c5
-rw-r--r--kernel/time/clocksource.c6
-rw-r--r--kernel/time/hrtimer.c340
-rw-r--r--kernel/time/jiffies.c21
-rw-r--r--kernel/time/posix-cpu-timers.c90
-rw-r--r--kernel/time/posix-timers.c2
-rw-r--r--kernel/time/tick-common.c7
-rw-r--r--kernel/time/tick-internal.h32
-rw-r--r--kernel/time/timekeeping.c36
-rw-r--r--kernel/torture.c6
-rw-r--r--kernel/trace/Kconfig5
-rw-r--r--kernel/trace/bpf_trace.c125
-rw-r--r--kernel/trace/ftrace.c4
-rw-r--r--kernel/trace/trace.c18
-rw-r--r--kernel/trace/trace.h32
-rw-r--r--kernel/trace/trace_events_hist.c2
-rw-r--r--kernel/trace/trace_osnoise.c56
-rw-r--r--kernel/ucount.c19
-rw-r--r--kernel/workqueue.c186
-rw-r--r--kernel/workqueue_internal.h3
-rw-r--r--lib/Kconfig.debug45
-rw-r--r--lib/crypto/Kconfig3
-rw-r--r--lib/crypto/Makefile3
-rw-r--r--lib/crypto/blake2s.c8
-rw-r--r--lib/crypto/chacha20poly1305.c8
-rw-r--r--lib/crypto/curve25519.c8
-rw-r--r--lib/crypto/sm4.c176
-rw-r--r--lib/debugobjects.c7
-rw-r--r--lib/devmem_is_allowed.c2
-rw-r--r--lib/linear_ranges.c31
-rw-r--r--lib/mpi/mpiutil.c2
-rw-r--r--lib/once.c11
-rw-r--r--lib/string.c16
-rw-r--r--lib/test_bpf.c2332
-rw-r--r--lib/test_lockup.c8
-rw-r--r--mm/backing-dev.c3
-rw-r--r--mm/filemap.c181
-rw-r--r--mm/gup.c7
-rw-r--r--mm/hugetlb.c19
-rw-r--r--mm/kfence/kfence_test.c13
-rw-r--r--mm/kmemleak.c6
-rw-r--r--mm/madvise.c6
-rw-r--r--mm/memcontrol.c32
-rw-r--r--mm/memory-failure.c14
-rw-r--r--mm/memory_hotplug.c1
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/nommu.c3
-rw-r--r--mm/page-writeback.c2
-rw-r--r--mm/page_alloc.c25
-rw-r--r--mm/readahead.c2
-rw-r--r--mm/rmap.c41
-rw-r--r--mm/shmem.c34
-rw-r--r--mm/slub.c25
-rw-r--r--mm/swap_slots.c4
-rw-r--r--mm/swap_state.c7
-rw-r--r--mm/truncate.c9
-rw-r--r--mm/vmscan.c30
-rw-r--r--mm/vmstat.c12
-rw-r--r--net/6lowpan/debugfs.c3
-rw-r--r--net/802/Makefile1
-rw-r--r--net/802/p8023.c60
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/8021q/vlan_dev.c6
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/ddp.c4
-rw-r--r--net/ax25/ax25_ip.c4
-rw-r--r--net/ax25/ax25_out.c13
-rw-r--r--net/ax25/ax25_route.c13
-rw-r--r--net/batman-adv/bat_iv_ogm.c75
-rw-r--r--net/batman-adv/bat_v.c30
-rw-r--r--net/batman-adv/bat_v_elp.c9
-rw-r--r--net/batman-adv/bat_v_ogm.c39
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c39
-rw-r--r--net/batman-adv/distributed-arp-table.c27
-rw-r--r--net/batman-adv/fragmentation.c6
-rw-r--r--net/batman-adv/gateway_client.c60
-rw-r--r--net/batman-adv/gateway_client.h16
-rw-r--r--net/batman-adv/gateway_common.c2
-rw-r--r--net/batman-adv/hard-interface.c21
-rw-r--r--net/batman-adv/hard-interface.h3
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c11
-rw-r--r--net/batman-adv/netlink.c6
-rw-r--r--net/batman-adv/network-coding.c24
-rw-r--r--net/batman-adv/originator.c114
-rw-r--r--net/batman-adv/originator.h96
-rw-r--r--net/batman-adv/routing.c39
-rw-r--r--net/batman-adv/send.c33
-rw-r--r--net/batman-adv/soft-interface.c27
-rw-r--r--net/batman-adv/soft-interface.h16
-rw-r--r--net/batman-adv/tp_meter.c27
-rw-r--r--net/batman-adv/translation-table.c109
-rw-r--r--net/batman-adv/translation-table.h18
-rw-r--r--net/batman-adv/tvlv.c9
-rw-r--r--net/bluetooth/cmtp/cmtp.h2
-rw-r--r--net/bluetooth/hci_core.c48
-rw-r--r--net/bluetooth/hci_event.c223
-rw-r--r--net/bluetooth/hci_request.c81
-rw-r--r--net/bluetooth/hci_sysfs.c3
-rw-r--r--net/bluetooth/mgmt.c4
-rw-r--r--net/bluetooth/rfcomm/sock.c8
-rw-r--r--net/bluetooth/sco.c106
-rw-r--r--net/bpf/test_run.c143
-rw-r--r--net/bridge/br.c65
-rw-r--r--net/bridge/br_device.c16
-rw-r--r--net/bridge/br_fdb.c39
-rw-r--r--net/bridge/br_forward.c16
-rw-r--r--net/bridge/br_if.c17
-rw-r--r--net/bridge/br_input.c24
-rw-r--r--net/bridge/br_ioctl.c83
-rw-r--r--net/bridge/br_mdb.c177
-rw-r--r--net/bridge/br_multicast.c1912
-rw-r--r--net/bridge/br_multicast_eht.c92
-rw-r--r--net/bridge/br_netlink.c61
-rw-r--r--net/bridge/br_private.h583
-rw-r--r--net/bridge/br_private_mcast_eht.h3
-rw-r--r--net/bridge/br_private_tunnel.h6
-rw-r--r--net/bridge/br_switchdev.c246
-rw-r--r--net/bridge/br_sysfs_br.c48
-rw-r--r--net/bridge/br_sysfs_if.c4
-rw-r--r--net/bridge/br_vlan.c136
-rw-r--r--net/bridge/br_vlan_options.c427
-rw-r--r--net/bridge/br_vlan_tunnel.c14
-rw-r--r--net/bridge/netfilter/ebtable_broute.c17
-rw-r--r--net/bridge/netfilter/ebtable_filter.c17
-rw-r--r--net/bridge/netfilter/ebtable_nat.c17
-rw-r--r--net/bridge/netfilter/ebtables.c109
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c6
-rw-r--r--net/can/j1939/j1939-priv.h10
-rw-r--r--net/can/j1939/socket.c143
-rw-r--r--net/can/j1939/transport.c70
-rw-r--r--net/can/raw.c8
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/bpf_sk_storage.c4
-rw-r--r--net/core/dev.c342
-rw-r--r--net/core/dev_addr_lists.c144
-rw-r--r--net/core/dev_ioctl.c264
-rw-r--r--net/core/devlink.c680
-rw-r--r--net/core/drop_monitor.c6
-rw-r--r--net/core/dst.c6
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/filter.c134
-rw-r--r--net/core/flow_dissector.c12
-rw-r--r--net/core/flow_offload.c90
-rw-r--r--net/core/link_watch.c5
-rw-r--r--net/core/lwtunnel.c5
-rw-r--r--net/core/neighbour.c29
-rw-r--r--net/core/net-procfs.c24
-rw-r--r--net/core/net_namespace.c52
-rw-r--r--net/core/page_pool.c124
-rw-r--r--net/core/pktgen.c167
-rw-r--r--net/core/ptp_classifier.c2
-rw-r--r--net/core/rtnetlink.c34
-rw-r--r--net/core/scm.c4
-rw-r--r--net/core/selftests.c12
-rw-r--r--net/core/skbuff.c75
-rw-r--r--net/core/sock.c31
-rw-r--r--net/core/sock_map.c23
-rw-r--r--net/dccp/dccp.h6
-rw-r--r--net/dccp/proto.c2
-rw-r--r--net/decnet/dn_dev.c6
-rw-r--r--net/decnet/dn_fib.c9
-rw-r--r--net/decnet/dn_route.c18
-rw-r--r--net/dsa/Kconfig13
-rw-r--r--net/dsa/Makefile3
-rw-r--r--net/dsa/dsa.c2
-rw-r--r--net/dsa/dsa2.c112
-rw-r--r--net/dsa/dsa_priv.h194
-rw-r--r--net/dsa/master.c6
-rw-r--r--net/dsa/port.c344
-rw-r--r--net/dsa/slave.c300
-rw-r--r--net/dsa/switch.c55
-rw-r--r--net/dsa/tag_8021q.c608
-rw-r--r--net/dsa/tag_ar9331.c3
-rw-r--r--net/dsa/tag_brcm.c34
-rw-r--r--net/dsa/tag_dsa.c95
-rw-r--r--net/dsa/tag_gswip.c3
-rw-r--r--net/dsa/tag_hellcreek.c5
-rw-r--r--net/dsa/tag_ksz.c8
-rw-r--r--net/dsa/tag_lan9303.c24
-rw-r--r--net/dsa/tag_mtk.c19
-rw-r--r--net/dsa/tag_ocelot.c5
-rw-r--r--net/dsa/tag_ocelot_8021q.c9
-rw-r--r--net/dsa/tag_qca.c16
-rw-r--r--net/dsa/tag_rtl4_a.c21
-rw-r--r--net/dsa/tag_sja1105.c284
-rw-r--r--net/dsa/tag_trailer.c3
-rw-r--r--net/dsa/tag_xrs700x.c5
-rw-r--r--net/ethernet/eth.c8
-rw-r--r--net/ethtool/coalesce.c29
-rw-r--r--net/ethtool/ioctl.c172
-rw-r--r--net/ethtool/netlink.c51
-rw-r--r--net/ethtool/netlink.h17
-rw-r--r--net/ieee802154/nl-phy.c3
-rw-r--r--net/ieee802154/nl802154.c3
-rw-r--r--net/ieee802154/socket.c14
-rw-r--r--net/ipv4/af_inet.c12
-rw-r--r--net/ipv4/bpf_tcp_ca.c41
-rw-r--r--net/ipv4/cipso_ipv4.c18
-rw-r--r--net/ipv4/devinet.c21
-rw-r--r--net/ipv4/esp4.c4
-rw-r--r--net/ipv4/fib_semantics.c12
-rw-r--r--net/ipv4/fib_trie.c4
-rw-r--r--net/ipv4/fou.c10
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/igmp.c51
-rw-r--r--net/ipv4/inet_connection_sock.c3
-rw-r--r--net/ipv4/ip_gre.c4
-rw-r--r--net/ipv4/ip_output.c39
-rw-r--r--net/ipv4/ip_sockglue.c24
-rw-r--r--net/ipv4/ip_tunnel.c9
-rw-r--r--net/ipv4/ip_vti.c2
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/netfilter/arptable_filter.c23
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c56
-rw-r--r--net/ipv4/netfilter/iptable_filter.c24
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c19
-rw-r--r--net/ipv4/netfilter/iptable_nat.c20
-rw-r--r--net/ipv4/netfilter/iptable_raw.c21
-rw-r--r--net/ipv4/netfilter/iptable_security.c23
-rw-r--r--net/ipv4/route.c91
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_bbr.c2
-rw-r--r--net/ipv4/tcp_fastopen.c20
-rw-r--r--net/ipv4/tcp_input.c54
-rw-r--r--net/ipv4/tcp_ipv4.c411
-rw-r--r--net/ipv4/tcp_output.c3
-rw-r--r--net/ipv4/tcp_recovery.c3
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv4/udp_bpf.c1
-rw-r--r--net/ipv4/udp_offload.c2
-rw-r--r--net/ipv6/Kconfig11
-rw-r--r--net/ipv6/Makefile3
-rw-r--r--net/ipv6/addrconf.c65
-rw-r--r--net/ipv6/af_inet6.c16
-rw-r--r--net/ipv6/exthdrs.c158
-rw-r--r--net/ipv6/ioam6.c910
-rw-r--r--net/ipv6/ioam6_iptunnel.c274
-rw-r--r--net/ipv6/ip6_fib.c6
-rw-r--r--net/ipv6/ip6_gre.c19
-rw-r--r--net/ipv6/ip6_output.c80
-rw-r--r--net/ipv6/ip6_tunnel.c21
-rw-r--r--net/ipv6/ip6_vti.c21
-rw-r--r--net/ipv6/ip6mr.c3
-rw-r--r--net/ipv6/ipv6_sockglue.c18
-rw-r--r--net/ipv6/mcast.c20
-rw-r--r--net/ipv6/ndisc.c17
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c23
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c22
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c16
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c24
-rw-r--r--net/ipv6/netfilter/ip6table_security.c22
-rw-r--r--net/ipv6/route.c50
-rw-r--r--net/ipv6/seg6_iptunnel.c74
-rw-r--r--net/ipv6/seg6_local.c110
-rw-r--r--net/ipv6/sit.c40
-rw-r--r--net/ipv6/sysctl_net_ipv6.c19
-rw-r--r--net/ipv6/udp.c2
-rw-r--r--net/iucv/af_iucv.c72
-rw-r--r--net/iucv/iucv.c60
-rw-r--r--net/llc/af_llc.c6
-rw-r--r--net/mac80211/cfg.c234
-rw-r--r--net/mac80211/driver-ops.h36
-rw-r--r--net/mac80211/ibss.c15
-rw-r--r--net/mac80211/ieee80211_i.h21
-rw-r--r--net/mac80211/iface.c54
-rw-r--r--net/mac80211/main.c4
-rw-r--r--net/mac80211/rx.c102
-rw-r--r--net/mac80211/s1g.c180
-rw-r--r--net/mac80211/sta_info.c2
-rw-r--r--net/mac80211/status.c33
-rw-r--r--net/mac80211/trace.h67
-rw-r--r--net/mac80211/tx.c33
-rw-r--r--net/mac80211/util.c12
-rw-r--r--net/mctp/Kconfig13
-rw-r--r--net/mctp/Makefile3
-rw-r--r--net/mctp/af_mctp.c395
-rw-r--r--net/mctp/device.c423
-rw-r--r--net/mctp/neigh.c342
-rw-r--r--net/mctp/route.c1116
-rw-r--r--net/mpls/af_mpls.c2
-rw-r--r--net/mptcp/ctrl.c26
-rw-r--r--net/mptcp/mib.c4
-rw-r--r--net/mptcp/mib.h4
-rw-r--r--net/mptcp/options.c468
-rw-r--r--net/mptcp/pm.c84
-rw-r--r--net/mptcp/pm_netlink.c247
-rw-r--r--net/mptcp/protocol.c201
-rw-r--r--net/mptcp/protocol.h114
-rw-r--r--net/mptcp/subflow.c69
-rw-r--r--net/netfilter/Makefile3
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c9
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c11
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c16
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c11
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c16
-rw-r--r--net/netfilter/nf_conntrack_core.c71
-rw-r--r--net/netfilter/nf_conntrack_ecache.c211
-rw-r--r--net/netfilter/nf_conntrack_netlink.c132
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c1
-rw-r--r--net/netfilter/nf_conntrack_standalone.c31
-rw-r--r--net/netfilter/nf_flow_table_core.c23
-rw-r--r--net/netfilter/nf_flow_table_offload.c4
-rw-r--r--net/netfilter/nf_hooks_lwtunnel.c53
-rw-r--r--net/netfilter/nf_queue.c43
-rw-r--r--net/netfilter/nf_tables_offload.c1
-rw-r--r--net/netfilter/nfnetlink_hook.c24
-rw-r--r--net/netfilter/nfnetlink_queue.c15
-rw-r--r--net/netfilter/nft_compat.c8
-rw-r--r--net/netfilter/x_tables.c98
-rw-r--r--net/netfilter/xt_CT.c11
-rw-r--r--net/netfilter/xt_bpf.c2
-rw-r--r--net/netlabel/netlabel_cipso_v4.c12
-rw-r--r--net/netlabel/netlabel_unlabeled.c6
-rw-r--r--net/netlink/af_netlink.c4
-rw-r--r--net/netlink/genetlink.c17
-rw-r--r--net/netrom/nr_loopback.c3
-rw-r--r--net/netrom/nr_route.c3
-rw-r--r--net/nfc/af_nfc.c2
-rw-r--r--net/nfc/core.c8
-rw-r--r--net/nfc/digital_core.c4
-rw-r--r--net/nfc/hci/core.c14
-rw-r--r--net/nfc/hci/llc.c4
-rw-r--r--net/nfc/hci/llc.h6
-rw-r--r--net/nfc/hci/llc_nop.c2
-rw-r--r--net/nfc/hci/llc_shdlc.c12
-rw-r--r--net/nfc/llcp.h8
-rw-r--r--net/nfc/llcp_commands.c46
-rw-r--r--net/nfc/llcp_core.c44
-rw-r--r--net/nfc/nci/core.c176
-rw-r--r--net/nfc/nci/data.c12
-rw-r--r--net/nfc/nci/hci.c52
-rw-r--r--net/nfc/nci/ntf.c87
-rw-r--r--net/nfc/nci/rsp.c48
-rw-r--r--net/nfc/nci/spi.c2
-rw-r--r--net/nfc/netlink.c4
-rw-r--r--net/nfc/nfc.h2
-rw-r--r--net/nfc/rawsock.c2
-rw-r--r--net/openvswitch/actions.c8
-rw-r--r--net/openvswitch/datapath.c76
-rw-r--r--net/openvswitch/datapath.h20
-rw-r--r--net/openvswitch/flow.c13
-rw-r--r--net/openvswitch/vport.c1
-rw-r--r--net/packet/af_packet.c15
-rw-r--r--net/phonet/af_phonet.c3
-rw-r--r--net/phonet/pn_dev.c12
-rw-r--r--net/phonet/socket.c3
-rw-r--r--net/qrtr/mhi.c16
-rw-r--r--net/qrtr/qrtr.c12
-rw-r--r--net/rds/ib_frmr.c4
-rw-r--r--net/rxrpc/Kconfig7
-rw-r--r--net/sched/act_api.c73
-rw-r--r--net/sched/act_bpf.c8
-rw-r--r--net/sched/act_connmark.c4
-rw-r--r--net/sched/act_csum.c7
-rw-r--r--net/sched/act_ct.c4
-rw-r--r--net/sched/act_ctinfo.c4
-rw-r--r--net/sched/act_gact.c4
-rw-r--r--net/sched/act_gate.c4
-rw-r--r--net/sched/act_ife.c9
-rw-r--r--net/sched/act_ipt.c21
-rw-r--r--net/sched/act_mirred.c13
-rw-r--r--net/sched/act_mpls.c4
-rw-r--r--net/sched/act_nat.c6
-rw-r--r--net/sched/act_pedit.c4
-rw-r--r--net/sched/act_police.c4
-rw-r--r--net/sched/act_sample.c7
-rw-r--r--net/sched/act_simple.c4
-rw-r--r--net/sched/act_skbedit.c4
-rw-r--r--net/sched/act_skbmod.c47
-rw-r--r--net/sched/act_tunnel_key.c4
-rw-r--r--net/sched/act_vlan.c4
-rw-r--r--net/sched/cls_api.c87
-rw-r--r--net/sched/cls_basic.c10
-rw-r--r--net/sched/cls_bpf.c12
-rw-r--r--net/sched/cls_cgroup.c6
-rw-r--r--net/sched/cls_flow.c6
-rw-r--r--net/sched/cls_flower.c18
-rw-r--r--net/sched/cls_fw.c13
-rw-r--r--net/sched/cls_matchall.c17
-rw-r--r--net/sched/cls_route.c10
-rw-r--r--net/sched/cls_rsvp.h7
-rw-r--r--net/sched/cls_tcindex.c10
-rw-r--r--net/sched/cls_u32.c24
-rw-r--r--net/sched/sch_api.c10
-rw-r--r--net/sched/sch_atm.c2
-rw-r--r--net/sched/sch_cake.c4
-rw-r--r--net/sched/sch_cbq.c4
-rw-r--r--net/sched/sch_drr.c2
-rw-r--r--net/sched/sch_dsmark.c2
-rw-r--r--net/sched/sch_ets.c9
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_fq_pie.c2
-rw-r--r--net/sched/sch_hfsc.c2
-rw-r--r--net/sched/sch_htb.c99
-rw-r--r--net/sched/sch_multiq.c2
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_qfq.c2
-rw-r--r--net/sched/sch_sfb.c2
-rw-r--r--net/sched/sch_sfq.c2
-rw-r--r--net/sched/sch_taprio.c4
-rw-r--r--net/smc/af_smc.c2
-rw-r--r--net/smc/smc_core.c35
-rw-r--r--net/smc/smc_core.h4
-rw-r--r--net/smc/smc_ib.c3
-rw-r--r--net/smc/smc_llc.c10
-rw-r--r--net/smc/smc_pnet.c3
-rw-r--r--net/smc/smc_tx.c18
-rw-r--r--net/smc/smc_wr.c10
-rw-r--r--net/socket.c423
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/debugfs.c73
-rw-r--r--net/sunrpc/fail.h25
-rw-r--r--net/sunrpc/svc.c44
-rw-r--r--net/sunrpc/svc_xprt.c6
-rw-r--r--net/sunrpc/xprt.c14
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c56
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c41
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c11
-rw-r--r--net/sunrpc/xprtsock.c18
-rw-r--r--net/switchdev/switchdev.c308
-rw-r--r--net/tipc/link.c6
-rw-r--r--net/tipc/socket.c36
-rw-r--r--net/unix/Kconfig5
-rw-r--r--net/unix/Makefile1
-rw-r--r--net/unix/af_unix.c441
-rw-r--r--net/unix/unix_bpf.c174
-rw-r--r--net/vmw_vsock/virtio_transport.c7
-rw-r--r--net/wireless/nl80211.c173
-rw-r--r--net/wireless/radiotap.c9
-rw-r--r--net/wireless/rdev-ops.h13
-rw-r--r--net/wireless/reg.c9
-rw-r--r--net/wireless/scan.c3
-rw-r--r--net/wireless/trace.h46
-rw-r--r--net/xfrm/xfrm_policy.c16
-rw-r--r--net/xfrm/xfrm_user.c57
-rw-r--r--samples/bpf/.gitignore2
-rw-r--r--samples/bpf/Makefile109
-rw-r--r--samples/bpf/Makefile.target11
-rw-r--r--samples/bpf/cookie_uid_helper_example.c11
-rw-r--r--samples/bpf/offwaketime_kern.c9
-rwxr-xr-xsamples/bpf/test_override_return.sh1
-rw-r--r--samples/bpf/tracex4_user.c2
-rw-r--r--samples/bpf/tracex7_user.c5
-rw-r--r--samples/bpf/xdp1_kern.c2
-rw-r--r--samples/bpf/xdp2_kern.c2
-rw-r--r--samples/bpf/xdp_monitor.bpf.c8
-rw-r--r--samples/bpf/xdp_monitor_kern.c257
-rw-r--r--samples/bpf/xdp_monitor_user.c798
-rw-r--r--samples/bpf/xdp_redirect.bpf.c49
-rw-r--r--samples/bpf/xdp_redirect_cpu.bpf.c (renamed from samples/bpf/xdp_redirect_cpu_kern.c)393
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c1132
-rw-r--r--samples/bpf/xdp_redirect_kern.c90
-rw-r--r--samples/bpf/xdp_redirect_map.bpf.c (renamed from samples/bpf/xdp_redirect_map_kern.c)89
-rw-r--r--samples/bpf/xdp_redirect_map_multi.bpf.c (renamed from samples/bpf/xdp_redirect_map_multi_kern.c)50
-rw-r--r--samples/bpf/xdp_redirect_map_multi_user.c345
-rw-r--r--samples/bpf/xdp_redirect_map_user.c385
-rw-r--r--samples/bpf/xdp_redirect_user.c270
-rw-r--r--samples/bpf/xdp_sample.bpf.c266
-rw-r--r--samples/bpf/xdp_sample.bpf.h141
-rw-r--r--samples/bpf/xdp_sample_shared.h17
-rw-r--r--samples/bpf/xdp_sample_user.c1673
-rw-r--r--samples/bpf/xdp_sample_user.h108
-rw-r--r--samples/bpf/xdpsock_user.c20
-rw-r--r--samples/mei/mei-amt-version.c51
-rw-r--r--samples/pktgen/functions.sh2
-rwxr-xr-xsamples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh19
-rwxr-xr-xsamples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh19
-rwxr-xr-xsamples/pktgen/pktgen_sample01_simple.sh13
-rwxr-xr-xsamples/pktgen/pktgen_sample02_multiqueue.sh19
-rwxr-xr-xsamples/pktgen/pktgen_sample03_burst_single_flow.sh6
-rwxr-xr-xsamples/pktgen/pktgen_sample04_many_flows.sh12
-rwxr-xr-xsamples/pktgen/pktgen_sample05_flow_per_thread.sh12
-rwxr-xr-xsamples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh19
-rw-r--r--samples/trace_events/trace-events-sample.h27
-rwxr-xr-xscripts/atomic/check-atomics.sh6
-rwxr-xr-xscripts/atomic/fallbacks/acquire4
-rwxr-xr-xscripts/atomic/fallbacks/add_negative6
-rwxr-xr-xscripts/atomic/fallbacks/add_unless6
-rwxr-xr-xscripts/atomic/fallbacks/andnot4
-rwxr-xr-xscripts/atomic/fallbacks/dec4
-rwxr-xr-xscripts/atomic/fallbacks/dec_and_test6
-rwxr-xr-xscripts/atomic/fallbacks/dec_if_positive6
-rwxr-xr-xscripts/atomic/fallbacks/dec_unless_positive6
-rwxr-xr-xscripts/atomic/fallbacks/fence4
-rwxr-xr-xscripts/atomic/fallbacks/fetch_add_unless8
-rwxr-xr-xscripts/atomic/fallbacks/inc4
-rwxr-xr-xscripts/atomic/fallbacks/inc_and_test6
-rwxr-xr-xscripts/atomic/fallbacks/inc_not_zero6
-rwxr-xr-xscripts/atomic/fallbacks/inc_unless_negative6
-rwxr-xr-xscripts/atomic/fallbacks/read_acquire2
-rwxr-xr-xscripts/atomic/fallbacks/release4
-rwxr-xr-xscripts/atomic/fallbacks/set_release2
-rwxr-xr-xscripts/atomic/fallbacks/sub_and_test6
-rwxr-xr-xscripts/atomic/fallbacks/try_cmpxchg4
-rwxr-xr-xscripts/atomic/gen-atomic-fallback.sh68
-rwxr-xr-xscripts/atomic/gen-atomic-instrumented.sh11
-rwxr-xr-xscripts/atomic/gen-atomic-long.sh10
-rwxr-xr-xscripts/atomic/gen-atomics.sh6
-rwxr-xr-xscripts/bpf_doc.py2
-rw-r--r--scripts/spdxcheck-test.sh16
-rw-r--r--security/integrity/ima/ima_main.c1
-rw-r--r--security/integrity/platform_certs/efi_parser.c2
-rw-r--r--security/security.c3
-rw-r--r--security/selinux/hooks.c8
-rw-r--r--security/selinux/include/classmap.h4
-rw-r--r--security/selinux/ss/services.c2
-rw-r--r--security/smack/smack.h2
-rw-r--r--security/smack/smack_access.c17
-rw-r--r--security/smack/smack_lsm.c2
-rw-r--r--sound/core/pcm_native.c5
-rw-r--r--sound/firewire/oxfw/oxfw-stream.c9
-rw-r--r--sound/firewire/oxfw/oxfw.c6
-rw-r--r--sound/firewire/oxfw/oxfw.h5
-rw-r--r--sound/pci/hda/hda_generic.c10
-rw-r--r--sound/pci/hda/hda_intel.c12
-rw-r--r--sound/pci/hda/patch_realtek.c14
-rw-r--r--sound/pci/hda/patch_via.c1
-rw-r--r--sound/soc/Kconfig1
-rw-r--r--sound/soc/amd/acp-da7219-max98357a.c5
-rw-r--r--sound/soc/amd/acp-pcm-dma.c2
-rw-r--r--sound/soc/amd/raven/acp3x-pcm-dma.c2
-rw-r--r--sound/soc/amd/renoir/acp3x-pdm-dma.c2
-rw-r--r--sound/soc/amd/renoir/rn-pci-acp3x.c2
-rw-r--r--sound/soc/codecs/Kconfig19
-rw-r--r--sound/soc/codecs/Makefile7
-rw-r--r--sound/soc/codecs/cs42l42.c104
-rw-r--r--sound/soc/codecs/cs42l42.h3
-rw-r--r--sound/soc/codecs/nau8824.c42
-rw-r--r--sound/soc/codecs/rt5682.c1
-rw-r--r--sound/soc/codecs/sdw-mockup.c312
-rw-r--r--sound/soc/codecs/tlv320aic31xx.c10
-rw-r--r--sound/soc/codecs/tlv320aic32x4.c33
-rw-r--r--sound/soc/codecs/wm_adsp.c1
-rw-r--r--sound/soc/intel/atom/sst-mfld-platform-pcm.c3
-rw-r--r--sound/soc/intel/boards/Kconfig1
-rw-r--r--sound/soc/intel/boards/sof_da7219_max98373.c2
-rw-r--r--sound/soc/intel/boards/sof_sdw.c41
-rw-r--r--sound/soc/intel/common/Makefile3
-rw-r--r--sound/soc/intel/common/soc-acpi-intel-cnl-match.c15
-rw-r--r--sound/soc/intel/common/soc-acpi-intel-sdw-mockup-match.c166
-rw-r--r--sound/soc/intel/common/soc-acpi-intel-sdw-mockup-match.h17
-rw-r--r--sound/soc/intel/common/soc-acpi-intel-tgl-match.c23
-rw-r--r--sound/soc/kirkwood/kirkwood-dma.c26
-rw-r--r--sound/soc/soc-component.c63
-rw-r--r--sound/soc/sof/intel/Kconfig4
-rw-r--r--sound/soc/sof/intel/hda-ipc.c4
-rw-r--r--sound/soc/sof/intel/hda.c12
-rw-r--r--sound/soc/uniphier/aio-dma.c2
-rw-r--r--sound/soc/xilinx/xlnx_formatter_pcm.c4
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-btf.rst48
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst3
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-feature.rst2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst9
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-iter.rst2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-link.rst3
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst3
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-net.rst2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-perf.rst2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst36
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst12
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool66
-rw-r--r--tools/bpf/bpftool/btf.c11
-rw-r--r--tools/bpf/bpftool/btf_dumper.c6
-rw-r--r--tools/bpf/bpftool/cgroup.c3
-rw-r--r--tools/bpf/bpftool/common.c6
-rw-r--r--tools/bpf/bpftool/feature.c1
-rw-r--r--tools/bpf/bpftool/gen.c3
-rw-r--r--tools/bpf/bpftool/iter.c2
-rw-r--r--tools/bpf/bpftool/link.c3
-rw-r--r--tools/bpf/bpftool/main.c3
-rw-r--r--tools/bpf/bpftool/main.h3
-rw-r--r--tools/bpf/bpftool/map.c19
-rw-r--r--tools/bpf/bpftool/net.c1
-rw-r--r--tools/bpf/bpftool/perf.c5
-rw-r--r--tools/bpf/bpftool/prog.c37
-rw-r--r--tools/bpf/bpftool/struct_ops.c2
-rw-r--r--tools/bpf/resolve_btfids/main.c13
-rw-r--r--tools/include/nolibc/nolibc.h15
-rw-r--r--tools/include/uapi/linux/bpf.h119
-rw-r--r--tools/include/uapi/linux/ethtool.h53
-rw-r--r--tools/include/uapi/linux/if_link.h2
-rw-r--r--tools/io_uring/io_uring-cp.c31
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/Makefile10
-rw-r--r--tools/lib/bpf/bpf.c32
-rw-r--r--tools/lib/bpf/bpf.h8
-rw-r--r--tools/lib/bpf/btf.c50
-rw-r--r--tools/lib/bpf/btf.h31
-rw-r--r--tools/lib/bpf/btf_dump.c871
-rw-r--r--tools/lib/bpf/libbpf.c1756
-rw-r--r--tools/lib/bpf/libbpf.h76
-rw-r--r--tools/lib/bpf/libbpf.map11
-rw-r--r--tools/lib/bpf/libbpf_internal.h113
-rw-r--r--tools/lib/bpf/libbpf_probes.c4
-rw-r--r--tools/lib/bpf/relo_core.c1295
-rw-r--r--tools/lib/bpf/relo_core.h100
-rw-r--r--tools/perf/util/bpf-event.c11
-rw-r--r--tools/perf/util/bpf_counter.c12
-rw-r--r--tools/testing/nvdimm/test/nfit.c2
-rw-r--r--tools/testing/selftests/Makefile2
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile7
-rw-r--r--tools/testing/selftests/bpf/README.rst7
-rw-r--r--tools/testing/selftests/bpf/bpf_tcp_helpers.h19
-rw-r--r--tools/testing/selftests/bpf/netcnt_common.h38
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c120
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c98
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c254
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c226
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c106
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c615
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_module.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_autosize.c22
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_btf.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netcnt.c82
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netns_cookie.c80
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_link.c89
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reference_tracking.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c445
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c70
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_pt_regs.c47
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_mim.c77
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bonding.c520
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c105
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c39
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp_release.c26
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter.h8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c72
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_unix.c80
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h10
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_ip_test.c84
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/netcnt_prog.c8
-rw-r--r--tools/testing/selftests/bpf/progs/netns_cookie_prog.c84
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c39
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_sk.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_cookie.c85
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_autosize.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_weak.c56
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_link.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_pt_regs.c29
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c20
-rw-r--r--tools/testing/selftests/bpf/progs/timer.c297
-rw-r--r--tools/testing/selftests/bpf/progs/timer_mim.c88
-rw-r--r--tools/testing/selftests/bpf/progs/timer_mim_reject.c74
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_tx.c2
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool.sh6
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_build.sh2
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_synctypes.py586
-rwxr-xr-xtools/testing/selftests/bpf/test_doc_build.sh10
-rw-r--r--tools/testing/selftests/bpf/test_maps.c90
-rw-r--r--tools/testing/selftests/bpf/test_netcnt.c148
-rw-r--r--tools/testing/selftests/bpf/test_progs.c107
-rw-r--r--tools/testing/selftests/bpf/test_progs.h12
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh2
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_veth.sh2
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh10
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c87
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h4
-rw-r--r--tools/testing/selftests/bpf/verifier/dead_code.c12
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.c681
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.h63
-rwxr-xr-xtools/testing/selftests/bpf/xsk_prereqs.sh30
-rw-r--r--tools/testing/selftests/filesystems/binderfs/binderfs_test.c17
-rw-r--r--tools/testing/selftests/lkdtm/config2
-rw-r--r--tools/testing/selftests/lkdtm/tests.txt3
-rw-r--r--tools/testing/selftests/move_mount_set_group/.gitignore1
-rw-r--r--tools/testing/selftests/move_mount_set_group/Makefile7
-rw-r--r--tools/testing/selftests/move_mount_set_group/config1
-rw-r--r--tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c375
-rw-r--r--tools/testing/selftests/nci/nci_dev.c416
-rw-r--r--tools/testing/selftests/net/Makefile5
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile5
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c437
-rw-r--r--tools/testing/selftests/net/config1
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh33
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh7
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh15
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh27
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh2
-rw-r--r--tools/testing/selftests/net/gro.c1095
-rwxr-xr-xtools/testing/selftests/net/gro.sh99
-rwxr-xr-xtools/testing/selftests/net/ioam6.sh652
-rw-r--r--tools/testing/selftests/net/ioam6_parser.c720
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh345
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c16
-rw-r--r--tools/testing/selftests/net/psock_fanout.c4
-rwxr-xr-xtools/testing/selftests/net/psock_snd.sh3
-rwxr-xr-xtools/testing/selftests/net/run_afpackettests5
-rwxr-xr-xtools/testing/selftests/net/setup_loopback.sh118
-rw-r--r--tools/testing/selftests/net/setup_veth.sh41
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh9
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh9
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh9
-rw-r--r--tools/testing/selftests/net/toeplitz.c585
-rwxr-xr-xtools/testing/selftests/net/toeplitz.sh199
-rwxr-xr-xtools/testing/selftests/net/toeplitz_client.sh28
-rwxr-xr-xtools/testing/selftests/net/unicast_extensions.sh5
-rwxr-xr-xtools/testing/selftests/net/veth.sh183
-rwxr-xr-xtools/testing/selftests/net/vrf_strict_mode_test.sh9
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh10
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kcsan-collapse.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-again.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh106
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh88
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh5
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh30
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote.sh20
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh24
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh49
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh39
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh37
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/RUDE012
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS012
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS032
-rw-r--r--tools/testing/selftests/sgx/sigstruct.c41
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json137
-rw-r--r--tools/testing/selftests/tc-testing/tdc_config.py1
-rw-r--r--tools/virtio/Makefile3
-rw-r--r--tools/virtio/linux/spinlock.h56
-rw-r--r--tools/virtio/linux/virtio.h2
3873 files changed, 170936 insertions, 84175 deletions
diff --git a/.mailmap b/.mailmap
index a35ae244dfda..6e849110cb4e 100644
--- a/.mailmap
+++ b/.mailmap
@@ -229,6 +229,7 @@ Matthew Wilcox <willy@infradead.org> <mawilcox@microsoft.com>
Matthew Wilcox <willy@infradead.org> <willy@debian.org>
Matthew Wilcox <willy@infradead.org> <willy@linux.intel.com>
Matthew Wilcox <willy@infradead.org> <willy@parisc-linux.org>
+Matthias Fuchs <socketcan@esd.eu> <matthias.fuchs@esd.eu>
Matthieu CASTET <castet.matthieu@free.fr>
Matt Ranostay <matt.ranostay@konsulko.com> <matt@ranostay.consulting>
Matt Ranostay <mranostay@gmail.com> Matthew Ranostay <mranostay@embeddedalley.com>
@@ -341,6 +342,7 @@ Sumit Semwal <sumit.semwal@ti.com>
Takashi YOSHII <takashi.yoshii.zj@renesas.com>
Tejun Heo <htejun@gmail.com>
Thomas Graf <tgraf@suug.ch>
+Thomas Körper <socketcan@esd.eu> <thomas.koerper@esd.eu>
Thomas Pedersen <twp@codeaurora.org>
Tiezhu Yang <yangtiezhu@loongson.cn> <kernelpatch@126.com>
Todor Tomov <todor.too@gmail.com> <todor.tomov@linaro.org>
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index e34cdeeeb9d4..a0ed87386639 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -28,6 +28,18 @@ Description:
For more details refer Documentation/admin-guide/iostats.rst
+What: /sys/block/<disk>/diskseq
+Date: February 2021
+Contact: Matteo Croce <mcroce@microsoft.com>
+Description:
+ The /sys/block/<disk>/diskseq files reports the disk
+ sequence number, which is a monotonically increasing
+ number assigned to every drive.
+ Some devices, like the loop device, refresh such number
+ every time the backing file is changed.
+ The value type is 64 bit unsigned.
+
+
What: /sys/block/<disk>/<part>/stat
Date: February 2008
Contact: Jerome Marchand <jmarchan@redhat.com>
diff --git a/Documentation/ABI/testing/sysfs-block-device b/Documentation/ABI/testing/sysfs-block-device
index aa0fb500e3c9..7ac7b19b2f72 100644
--- a/Documentation/ABI/testing/sysfs-block-device
+++ b/Documentation/ABI/testing/sysfs-block-device
@@ -55,6 +55,43 @@ Date: Oct, 2016
KernelVersion: v4.10
Contact: linux-ide@vger.kernel.org
Description:
- (RW) Write to the file to turn on or off the SATA ncq (native
- command queueing) support. By default this feature is turned
- off.
+ (RW) Write to the file to turn on or off the SATA NCQ (native
+ command queueing) priority support. By default this feature is
+ turned off. If the device does not support the SATA NCQ
+ priority feature, writing "1" to this file results in an error
+ (see ncq_prio_supported).
+
+
+What: /sys/block/*/device/sas_ncq_prio_enable
+Date: Oct, 2016
+KernelVersion: v4.10
+Contact: linux-ide@vger.kernel.org
+Description:
+ (RW) This is the equivalent of the ncq_prio_enable attribute
+ file for SATA devices connected to a SAS host-bus-adapter
+ (HBA) implementing support for the SATA NCQ priority feature.
+ This file does not exist if the HBA driver does not implement
+ support for the SATA NCQ priority feature, regardless of the
+ device support for this feature (see sas_ncq_prio_supported).
+
+
+What: /sys/block/*/device/ncq_prio_supported
+Date: Aug, 2021
+KernelVersion: v5.15
+Contact: linux-ide@vger.kernel.org
+Description:
+ (RO) Indicates if the device supports the SATA NCQ (native
+ command queueing) priority feature.
+
+
+What: /sys/block/*/device/sas_ncq_prio_supported
+Date: Aug, 2021
+KernelVersion: v5.15
+Contact: linux-ide@vger.kernel.org
+Description:
+ (RO) This is the equivalent of the ncq_prio_supported attribute
+ file for SATA devices connected to a SAS host-bus-adapter
+ (HBA) implementing support for the SATA NCQ priority feature.
+ This file does not exist if the HBA driver does not implement
+ support for the SATA NCQ priority feature, regardless of the
+ device support for this feature.
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-uncore b/Documentation/ABI/testing/sysfs-bus-event_source-devices-uncore
new file mode 100644
index 000000000000..b56e8f019fd4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-uncore
@@ -0,0 +1,13 @@
+What: /sys/bus/event_source/devices/uncore_*/alias
+Date: June 2021
+KernelVersion: 5.15
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Read-only. An attribute to describe the alias name of
+ the uncore PMU if an alias exists on some platforms.
+ The 'perf(1)' tool should treat both names the same.
+ They both can be used to access the uncore PMU.
+
+ Example:
+
+ $ cat /sys/devices/uncore_cha_2/alias
+ uncore_type_0_2
diff --git a/Documentation/ABI/testing/sysfs-bus-platform b/Documentation/ABI/testing/sysfs-bus-platform
index 194ca700e962..ff30728595ef 100644
--- a/Documentation/ABI/testing/sysfs-bus-platform
+++ b/Documentation/ABI/testing/sysfs-bus-platform
@@ -28,3 +28,17 @@ Description:
value comes from an ACPI _PXM method or a similar firmware
source. Initial users for this file would be devices like
arm smmu which are populated by arm64 acpi_iort.
+
+What: /sys/bus/platform/devices/.../msi_irqs/
+Date: August 2021
+Contact: Barry Song <song.bao.hua@hisilicon.com>
+Description:
+ The /sys/devices/.../msi_irqs directory contains a variable set
+ of files, with each file being named after a corresponding msi
+ irq vector allocated to that device.
+
+What: /sys/bus/platform/devices/.../msi_irqs/<N>
+Date: August 2021
+Contact: Barry Song <song.bao.hua@hisilicon.com>
+Description:
+ This attribute will show "msi" if <N> is a valid msi irq
diff --git a/Documentation/ABI/testing/sysfs-driver-ge-achc b/Documentation/ABI/testing/sysfs-driver-ge-achc
new file mode 100644
index 000000000000..a9e7a079190c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-ge-achc
@@ -0,0 +1,15 @@
+What: /sys/bus/spi/<dev>/update_firmware
+Date: Jul 2021
+Contact: sebastian.reichel@collabora.com
+Description: Write 1 to this file to update the ACHC microcontroller
+ firmware via the EzPort interface. For this the kernel
+ will load "achc.bin" via the firmware API (so usually
+ from /lib/firmware). The write will block until the FW
+ has either been flashed successfully or an error occured.
+
+What: /sys/bus/spi/<dev>/reset
+Date: Jul 2021
+Contact: sebastian.reichel@collabora.com
+Description: This file represents the microcontroller's reset line.
+ 1 means the reset line is asserted, 0 means it's not
+ asserted. The file is read and writable.
diff --git a/Documentation/ABI/testing/sysfs-platform-dptf b/Documentation/ABI/testing/sysfs-platform-dptf
index 141834342a4d..53c6b1000320 100644
--- a/Documentation/ABI/testing/sysfs-platform-dptf
+++ b/Documentation/ABI/testing/sysfs-platform-dptf
@@ -111,3 +111,43 @@ Contact: linux-acpi@vger.kernel.org
Description:
(RW) The PCH FIVR (Fully Integrated Voltage Regulator) switching frequency in MHz,
when FIVR clock is 38.4MHz.
+
+What: /sys/bus/platform/devices/INTC1045:00/pch_fivr_switch_frequency/fivr_switching_freq_mhz
+Date: September, 2021
+KernelVersion: v5.15
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) Get the FIVR switching control frequency in MHz.
+
+What: /sys/bus/platform/devices/INTC1045:00/pch_fivr_switch_frequency/fivr_switching_fault_status
+Date: September, 2021
+KernelVersion: v5.15
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) Read the FIVR switching frequency control fault status.
+
+What: /sys/bus/platform/devices/INTC1045:00/pch_fivr_switch_frequency/ssc_clock_info
+Date: September, 2021
+KernelVersion: v5.15
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) Presents SSC (spread spectrum clock) information for EMI
+ (Electro magnetic interference) control. This is a bit mask.
+ Bits Description
+ [7:0] Sets clock spectrum spread percentage:
+ 0x00=0.2% , 0x3F=10%
+ 1 LSB = 0.1% increase in spread (for
+ settings 0x01 thru 0x1C)
+ 1 LSB = 0.2% increase in spread (for
+ settings 0x1E thru 0x3F)
+ [8] When set to 1, enables spread
+ spectrum clock
+ [9] 0: Triangle mode. FFC frequency
+ walks around the Fcenter in a linear
+ fashion
+ 1: Random walk mode. FFC frequency
+ changes randomly within the SSC
+ (Spread spectrum clock) range
+ [10] 0: No white noise. 1: Add white noise
+ to spread waveform
+ [11] When 1, future writes are ignored.
diff --git a/Documentation/ABI/testing/sysfs-platform_profile b/Documentation/ABI/testing/sysfs-platform_profile
index dae9c8941905..baf1d125f9f8 100644
--- a/Documentation/ABI/testing/sysfs-platform_profile
+++ b/Documentation/ABI/testing/sysfs-platform_profile
@@ -26,3 +26,10 @@ Contact: Hans de Goede <hdegoede@redhat.com>
Description: Reading this file gives the current selected profile for this
device. Writing this file with one of the strings from
platform_profile_choices changes the profile to the new value.
+
+ This file can be monitored for changes by polling for POLLPRI,
+ POLLPRI will be signalled on any changes, independent of those
+ changes coming from a userspace write; or coming from another
+ source such as e.g. a hotkey triggered profile change handled
+ either directly by the embedded-controller or fully handled
+ inside the kernel.
diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
index 11cdab037bff..eeb351296df1 100644
--- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
+++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
@@ -112,6 +112,35 @@ on PowerPC.
The ``smp_mb__after_unlock_lock()`` invocations prevent this
``WARN_ON()`` from triggering.
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| But the chain of rcu_node-structure lock acquisitions guarantees |
+| that new readers will see all of the updater's pre-grace-period |
+| accesses and also guarantees that the updater's post-grace-period |
+| accesses will see all of the old reader's accesses. So why do we |
+| need all of those calls to smp_mb__after_unlock_lock()? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Because we must provide ordering for RCU's polling grace-period |
+| primitives, for example, get_state_synchronize_rcu() and |
+| poll_state_synchronize_rcu(). Consider this code:: |
+| |
+| CPU 0 CPU 1 |
+| ---- ---- |
+| WRITE_ONCE(X, 1) WRITE_ONCE(Y, 1) |
+| g = get_state_synchronize_rcu() smp_mb() |
+| while (!poll_state_synchronize_rcu(g)) r1 = READ_ONCE(X) |
+| continue; |
+| r0 = READ_ONCE(Y) |
+| |
+| RCU guarantees that the outcome r0 == 0 && r1 == 0 will not |
+| happen, even if CPU 1 is in an RCU extended quiescent state |
+| (idle or offline) and thus won't interact directly with the RCU |
+| core processing at all. |
++-----------------------------------------------------------------------+
+
This approach must be extended to include idle CPUs, which need
RCU's grace-period memory ordering guarantee to extend to any
RCU read-side critical sections preceding and following the current
diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
index 38a39476fc24..45278e2974c0 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@@ -362,9 +362,8 @@ do_something_gp() uses rcu_dereference() to fetch from ``gp``:
12 }
The rcu_dereference() uses volatile casts and (for DEC Alpha) memory
-barriers in the Linux kernel. Should a `high-quality implementation of
-C11 ``memory_order_consume``
-[PDF] <http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf>`__
+barriers in the Linux kernel. Should a |high-quality implementation of
+C11 memory_order_consume [PDF]|_
ever appear, then rcu_dereference() could be implemented as a
``memory_order_consume`` load. Regardless of the exact implementation, a
pointer fetched by rcu_dereference() may not be used outside of the
@@ -374,6 +373,9 @@ element has been passed from RCU to some other synchronization
mechanism, most commonly locking or `reference
counting <https://www.kernel.org/doc/Documentation/RCU/rcuref.txt>`__.
+.. |high-quality implementation of C11 memory_order_consume [PDF]| replace:: high-quality implementation of C11 ``memory_order_consume`` [PDF]
+.. _high-quality implementation of C11 memory_order_consume [PDF]: http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf
+
In short, updaters use rcu_assign_pointer() and readers use
rcu_dereference(), and these two RCU API elements work together to
ensure that readers have a consistent view of newly added data elements.
diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst
index 01cc21f17f7b..f4545b7c9a63 100644
--- a/Documentation/RCU/checklist.rst
+++ b/Documentation/RCU/checklist.rst
@@ -37,7 +37,7 @@ over a rather long period of time, but improvements are always welcome!
1. Does the update code have proper mutual exclusion?
- RCU does allow -readers- to run (almost) naked, but -writers- must
+ RCU does allow *readers* to run (almost) naked, but *writers* must
still use some sort of mutual exclusion, such as:
a. locking,
@@ -73,7 +73,7 @@ over a rather long period of time, but improvements are always welcome!
critical section is every bit as bad as letting them leak out
from under a lock. Unless, of course, you have arranged some
other means of protection, such as a lock or a reference count
- -before- letting them out of the RCU read-side critical section.
+ *before* letting them out of the RCU read-side critical section.
3. Does the update code tolerate concurrent accesses?
@@ -101,7 +101,7 @@ over a rather long period of time, but improvements are always welcome!
c. Make updates appear atomic to readers. For example,
pointer updates to properly aligned fields will
appear atomic, as will individual atomic primitives.
- Sequences of operations performed under a lock will -not-
+ Sequences of operations performed under a lock will *not*
appear to be atomic to RCU readers, nor will sequences
of multiple atomic primitives.
@@ -333,7 +333,7 @@ over a rather long period of time, but improvements are always welcome!
for example) may be omitted.
10. Conversely, if you are in an RCU read-side critical section,
- and you don't hold the appropriate update-side lock, you -must-
+ and you don't hold the appropriate update-side lock, you *must*
use the "_rcu()" variants of the list macros. Failing to do so
will break Alpha, cause aggressive compilers to generate bad code,
and confuse people trying to read your code.
@@ -359,12 +359,12 @@ over a rather long period of time, but improvements are always welcome!
callback pending, then that RCU callback will execute on some
surviving CPU. (If this was not the case, a self-spawning RCU
callback would prevent the victim CPU from ever going offline.)
- Furthermore, CPUs designated by rcu_nocbs= might well -always-
+ Furthermore, CPUs designated by rcu_nocbs= might well *always*
have their RCU callbacks executed on some other CPUs, in fact,
for some real-time workloads, this is the whole point of using
the rcu_nocbs= kernel boot parameter.
-13. Unlike other forms of RCU, it -is- permissible to block in an
+13. Unlike other forms of RCU, it *is* permissible to block in an
SRCU read-side critical section (demarked by srcu_read_lock()
and srcu_read_unlock()), hence the "SRCU": "sleepable RCU".
Please note that if you don't need to sleep in read-side critical
@@ -411,16 +411,16 @@ over a rather long period of time, but improvements are always welcome!
14. The whole point of call_rcu(), synchronize_rcu(), and friends
is to wait until all pre-existing readers have finished before
carrying out some otherwise-destructive operation. It is
- therefore critically important to -first- remove any path
+ therefore critically important to *first* remove any path
that readers can follow that could be affected by the
- destructive operation, and -only- -then- invoke call_rcu(),
+ destructive operation, and *only then* invoke call_rcu(),
synchronize_rcu(), or friends.
Because these primitives only wait for pre-existing readers, it
is the caller's responsibility to guarantee that any subsequent
readers will execute safely.
-15. The various RCU read-side primitives do -not- necessarily contain
+15. The various RCU read-side primitives do *not* necessarily contain
memory barriers. You should therefore plan for the CPU
and the compiler to freely reorder code into and out of RCU
read-side critical sections. It is the responsibility of the
@@ -459,8 +459,8 @@ over a rather long period of time, but improvements are always welcome!
pass in a function defined within a loadable module, then it in
necessary to wait for all pending callbacks to be invoked after
the last invocation and before unloading that module. Note that
- it is absolutely -not- sufficient to wait for a grace period!
- The current (say) synchronize_rcu() implementation is -not-
+ it is absolutely *not* sufficient to wait for a grace period!
+ The current (say) synchronize_rcu() implementation is *not*
guaranteed to wait for callbacks registered on other CPUs.
Or even on the current CPU if that CPU recently went offline
and came back online.
@@ -470,7 +470,7 @@ over a rather long period of time, but improvements are always welcome!
- call_rcu() -> rcu_barrier()
- call_srcu() -> srcu_barrier()
- However, these barrier functions are absolutely -not- guaranteed
+ However, these barrier functions are absolutely *not* guaranteed
to wait for a grace period. In fact, if there are no call_rcu()
callbacks waiting anywhere in the system, rcu_barrier() is within
its rights to return immediately.
diff --git a/Documentation/RCU/rcu_dereference.rst b/Documentation/RCU/rcu_dereference.rst
index f3e587acb4de..0b418a5b243c 100644
--- a/Documentation/RCU/rcu_dereference.rst
+++ b/Documentation/RCU/rcu_dereference.rst
@@ -43,7 +43,7 @@ Follow these rules to keep your RCU code working properly:
- Set bits and clear bits down in the must-be-zero low-order
bits of that pointer. This clearly means that the pointer
must have alignment constraints, for example, this does
- -not- work in general for char* pointers.
+ *not* work in general for char* pointers.
- XOR bits to translate pointers, as is done in some
classic buddy-allocator algorithms.
@@ -174,7 +174,7 @@ Follow these rules to keep your RCU code working properly:
Please see the "CONTROL DEPENDENCIES" section of
Documentation/memory-barriers.txt for more details.
- - The pointers are not equal -and- the compiler does
+ - The pointers are not equal *and* the compiler does
not have enough information to deduce the value of the
pointer. Note that the volatile cast in rcu_dereference()
will normally prevent the compiler from knowing too much.
@@ -360,7 +360,7 @@ in turn destroying the ordering between this load and the loads of the
return values. This can result in "p->b" returning pre-initialization
garbage values.
-In short, rcu_dereference() is -not- optional when you are going to
+In short, rcu_dereference() is *not* optional when you are going to
dereference the resulting pointer.
diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst
index 7148e9be08c3..5036df24ae61 100644
--- a/Documentation/RCU/stallwarn.rst
+++ b/Documentation/RCU/stallwarn.rst
@@ -32,7 +32,7 @@ warnings:
- Booting Linux using a console connection that is too slow to
keep up with the boot-time console-message rate. For example,
- a 115Kbaud serial console can be -way- too slow to keep up
+ a 115Kbaud serial console can be *way* too slow to keep up
with boot-time message rates, and will frequently result in
RCU CPU stall warning messages. Especially if you have added
debug printk()s.
@@ -105,7 +105,7 @@ warnings:
leading the realization that the CPU had failed.
The RCU, RCU-sched, and RCU-tasks implementations have CPU stall warning.
-Note that SRCU does -not- have CPU stall warnings. Please note that
+Note that SRCU does *not* have CPU stall warnings. Please note that
RCU only detects CPU stalls when there is a grace period in progress.
No grace period, no CPU stall warnings.
@@ -145,7 +145,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT
this parameter is checked only at the beginning of a cycle.
So if you are 10 seconds into a 40-second stall, setting this
sysfs parameter to (say) five will shorten the timeout for the
- -next- stall, or the following warning for the current stall
+ *next* stall, or the following warning for the current stall
(assuming the stall lasts long enough). It will not affect the
timing of the next warning for the current stall.
@@ -189,8 +189,8 @@ rcupdate.rcu_task_stall_timeout
Interpreting RCU's CPU Stall-Detector "Splats"
==============================================
-For non-RCU-tasks flavors of RCU, when a CPU detects that it is stalling,
-it will print a message similar to the following::
+For non-RCU-tasks flavors of RCU, when a CPU detects that some other
+CPU is stalling, it will print a message similar to the following::
INFO: rcu_sched detected stalls on CPUs/tasks:
2-...: (3 GPs behind) idle=06c/0/0 softirq=1453/1455 fqs=0
@@ -202,8 +202,10 @@ causing stalls, and that the stall was affecting RCU-sched. This message
will normally be followed by stack dumps for each CPU. Please note that
PREEMPT_RCU builds can be stalled by tasks as well as by CPUs, and that
the tasks will be indicated by PID, for example, "P3421". It is even
-possible for an rcu_state stall to be caused by both CPUs -and- tasks,
+possible for an rcu_state stall to be caused by both CPUs *and* tasks,
in which case the offending CPUs and tasks will all be called out in the list.
+In some cases, CPUs will detect themselves stalling, which will result
+in a self-detected stall.
CPU 2's "(3 GPs behind)" indicates that this CPU has not interacted with
the RCU core for the past three grace periods. In contrast, CPU 16's "(0
@@ -224,7 +226,7 @@ is the number that had executed since boot at the time that this CPU
last noted the beginning of a grace period, which might be the current
(stalled) grace period, or it might be some earlier grace period (for
example, if the CPU might have been in dyntick-idle mode for an extended
-time period. The number after the "/" is the number that have executed
+time period). The number after the "/" is the number that have executed
since boot until the current time. If this latter number stays constant
across repeated stall-warning messages, it is possible that RCU's softirq
handlers are no longer able to execute on this CPU. This can happen if
@@ -283,7 +285,8 @@ If the relevant grace-period kthread has been unable to run prior to
the stall warning, as was the case in the "All QSes seen" line above,
the following additional line is printed::
- kthread starved for 23807 jiffies! g7075 f0x0 RCU_GP_WAIT_FQS(3) ->state=0x1 ->cpu=5
+ rcu_sched kthread starved for 23807 jiffies! g7075 f0x0 RCU_GP_WAIT_FQS(3) ->state=0x1 ->cpu=5
+ Unless rcu_sched kthread gets sufficient CPU time, OOM is now expected behavior.
Starving the grace-period kthreads of CPU time can of course result
in RCU CPU stall warnings even when all CPUs and tasks have passed
@@ -313,15 +316,21 @@ is the current ``TIMER_SOFTIRQ`` count on cpu 4. If this value does not
change on successive RCU CPU stall warnings, there is further reason to
suspect a timer problem.
+These messages are usually followed by stack dumps of the CPUs and tasks
+involved in the stall. These stack traces can help you locate the cause
+of the stall, keeping in mind that the CPU detecting the stall will have
+an interrupt frame that is mainly devoted to detecting the stall.
+
Multiple Warnings From One Stall
================================
-If a stall lasts long enough, multiple stall-warning messages will be
-printed for it. The second and subsequent messages are printed at
+If a stall lasts long enough, multiple stall-warning messages will
+be printed for it. The second and subsequent messages are printed at
longer intervals, so that the time between (say) the first and second
message will be about three times the interval between the beginning
-of the stall and the first message.
+of the stall and the first message. It can be helpful to compare the
+stack dumps for the different messages for the same stalled grace period.
Stall Warnings for Expedited Grace Periods
diff --git a/Documentation/admin-guide/binderfs.rst b/Documentation/admin-guide/binderfs.rst
index 199d84314a14..41a4db00df8d 100644
--- a/Documentation/admin-guide/binderfs.rst
+++ b/Documentation/admin-guide/binderfs.rst
@@ -72,3 +72,16 @@ that the `rm() <rm_>`_ tool can be used to delete them. Note that the
``binder-control`` device cannot be deleted since this would make the binderfs
instance unusable. The ``binder-control`` device will be deleted when the
binderfs instance is unmounted and all references to it have been dropped.
+
+Binder features
+---------------
+
+Assuming an instance of binderfs has been mounted at ``/dev/binderfs``, the
+features supported by the binder driver can be located under
+``/dev/binderfs/features/``. The presence of individual files can be tested
+to determine whether a particular feature is supported by the driver.
+
+Example::
+
+ cat /dev/binderfs/features/oneway_spam_detection
+ 1
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 5c7377b5bd3e..babbe04c8d37 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -2056,6 +2056,17 @@ Cpuset Interface Files
The value of "cpuset.mems" stays constant until the next update
and won't be affected by any memory nodes hotplug events.
+ Setting a non-empty value to "cpuset.mems" causes memory of
+ tasks within the cgroup to be migrated to the designated nodes if
+ they are currently using memory outside of the designated nodes.
+
+ There is a cost for this memory migration. The migration
+ may not be complete and some memory pages may be left behind.
+ So it is recommended that "cpuset.mems" should be set properly
+ before spawning new tasks into the cpuset. Even if there is
+ a need to change "cpuset.mems" with active tasks, it shouldn't
+ be done frequently.
+
cpuset.mems.effective
A read-only multiple values file which exists on all
cpuset-enabled cgroups.
diff --git a/Documentation/admin-guide/device-mapper/dm-ima.rst b/Documentation/admin-guide/device-mapper/dm-ima.rst
new file mode 100644
index 000000000000..a4aa50a828e0
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-ima.rst
@@ -0,0 +1,715 @@
+======
+dm-ima
+======
+
+For a given system, various external services/infrastructure tools
+(including the attestation service) interact with it - both during the
+setup and during rest of the system run-time. They share sensitive data
+and/or execute critical workload on that system. The external services
+may want to verify the current run-time state of the relevant kernel
+subsystems before fully trusting the system with business-critical
+data/workload.
+
+Device mapper plays a critical role on a given system by providing
+various important functionalities to the block devices using various
+target types like crypt, verity, integrity etc. Each of these target
+types’ functionalities can be configured with various attributes.
+The attributes chosen to configure these target types can significantly
+impact the security profile of the block device, and in-turn, of the
+system itself. For instance, the type of encryption algorithm and the
+key size determines the strength of encryption for a given block device.
+
+Therefore, verifying the current state of various block devices as well
+as their various target attributes is crucial for external services before
+fully trusting the system with business-critical data/workload.
+
+IMA kernel subsystem provides the necessary functionality for
+device mapper to measure the state and configuration of
+various block devices -
+
+- by device mapper itself, from within the kernel,
+- in a tamper resistant way,
+- and re-measured - triggered on state/configuration change.
+
+Setting the IMA Policy:
+=======================
+For IMA to measure the data on a given system, the IMA policy on the
+system needs to be updated to have following line, and the system needs
+to be restarted for the measurements to take effect.
+
+::
+
+ /etc/ima/ima-policy
+ measure func=CRITICAL_DATA label=device-mapper template=ima-buf
+
+The measurements will be reflected in the IMA logs, which are located at:
+
+::
+
+ /sys/kernel/security/integrity/ima/ascii_runtime_measurements
+ /sys/kernel/security/integrity/ima/binary_runtime_measurements
+
+Then IMA ASCII measurement log has the following format:
+
+::
+
+ <PCR> <TEMPLATE_DATA_DIGEST> <TEMPLATE_NAME> <TEMPLATE_DATA>
+
+ PCR := Platform Configuration Register, in which the values are registered.
+ This is applicable if TPM chip is in use.
+
+ TEMPLATE_DATA_DIGEST := Template data digest of the IMA record.
+ TEMPLATE_NAME := Template name that registered the integrity value (e.g. ima-buf).
+
+ TEMPLATE_DATA := <ALG> ":" <EVENT_DIGEST> <EVENT_NAME> <EVENT_DATA>
+ It contains data for the specific event to be measured,
+ in a given template data format.
+
+ ALG := Algorithm to compute event digest
+ EVENT_DIGEST := Digest of the event data
+ EVENT_NAME := Description of the event (e.g. 'dm_table_load').
+ EVENT_DATA := The event data to be measured.
+
+|
+
+| *NOTE #1:*
+| The DM target data measured by IMA subsystem can alternatively
+ be queried from userspace by setting DM_IMA_MEASUREMENT_FLAG with
+ DM_TABLE_STATUS_CMD.
+
+|
+
+| *NOTE #2:*
+| The Kernel configuration CONFIG_IMA_DISABLE_HTABLE allows measurement of duplicate records.
+| To support recording duplicate IMA events in the IMA log, the Kernel needs to be configured with
+ CONFIG_IMA_DISABLE_HTABLE=y.
+
+Supported Device States:
+========================
+Following device state changes will trigger IMA measurements:
+
+ 1. Table load
+ #. Device resume
+ #. Device remove
+ #. Table clear
+ #. Device rename
+
+1. Table load:
+---------------
+When a new table is loaded in a device's inactive table slot,
+the device information and target specific details from the
+targets in the table are measured.
+
+The IMA measurement log has the following format for 'dm_table_load':
+
+::
+
+ EVENT_NAME := "dm_table_load"
+ EVENT_DATA := <dm_version_str> ";" <device_metadata> ";" <table_load_data>
+
+ dm_version_str := "dm_version=" <N> "." <N> "." <N>
+ Same as Device Mapper driver version.
+ device_metadata := <device_name> "," <device_uuid> "," <device_major> "," <device_minor> ","
+ <minor_count> "," <num_device_targets> ";"
+
+ device_name := "name=" <dm-device-name>
+ device_uuid := "uuid=" <dm-device-uuid>
+ device_major := "major=" <N>
+ device_minor := "minor=" <N>
+ minor_count := "minor_count=" <N>
+ num_device_targets := "num_targets=" <N>
+ dm-device-name := Name of the device. If it contains special characters like '\', ',', ';',
+ they are prefixed with '\'.
+ dm-device-uuid := UUID of the device. If it contains special characters like '\', ',', ';',
+ they are prefixed with '\'.
+
+ table_load_data := <target_data>
+ Represents the data (as name=value pairs) from various targets in the table,
+ which is being loaded into the DM device's inactive table slot.
+ target_data := <target_data_row> | <target_data><target_data_row>
+
+ target_data_row := <target_index> "," <target_begin> "," <target_len> "," <target_name> ","
+ <target_version> "," <target_attributes> ";"
+ target_index := "target_index=" <N>
+ Represents nth target in the table (from 0 to N-1 targets specified in <num_device_targets>)
+ If all the data for N targets doesn't fit in the given buffer - then the data that fits
+ in the buffer (say from target 0 to x) is measured in a given IMA event.
+ The remaining data from targets x+1 to N-1 is measured in the subsequent IMA events,
+ with the same format as that of 'dm_table_load'
+ i.e. <dm_version_str> ";" <device_metadata> ";" <table_load_data>.
+
+ target_begin := "target_begin=" <N>
+ target_len := "target_len=" <N>
+ target_name := Name of the target. 'linear', 'crypt', 'integrity' etc.
+ The targets that are supported for IMA measurements are documented below in the
+ 'Supported targets' section.
+ target_version := "target_version=" <N> "." <N> "." <N>
+ target_attributes := Data containing comma separated list of name=value pairs of target specific attributes.
+
+ For instance, if a linear device is created with the following table entries,
+ # dmsetup create linear1
+ 0 2 linear /dev/loop0 512
+ 2 2 linear /dev/loop0 512
+ 4 2 linear /dev/loop0 512
+ 6 2 linear /dev/loop0 512
+
+ Then IMA ASCII measurement log will have the following entry:
+ (converted from ASCII to text for readability)
+
+ 10 a8c5ff755561c7a28146389d1514c318592af49a ima-buf sha256:4d73481ecce5eadba8ab084640d85bb9ca899af4d0a122989252a76efadc5b72
+ dm_table_load
+ dm_version=4.45.0;
+ name=linear1,uuid=,major=253,minor=0,minor_count=1,num_targets=4;
+ target_index=0,target_begin=0,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512;
+ target_index=1,target_begin=2,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512;
+ target_index=2,target_begin=4,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512;
+ target_index=3,target_begin=6,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512;
+
+2. Device resume:
+------------------
+When a suspended device is resumed, the device information and the hash of the
+data from previous load of an active table are measured.
+
+The IMA measurement log has the following format for 'dm_device_resume':
+
+::
+
+ EVENT_NAME := "dm_device_resume"
+ EVENT_DATA := <dm_version_str> ";" <device_metadata> ";" <active_table_hash> ";" <current_device_capacity> ";"
+
+ dm_version_str := As described in the 'Table load' section above.
+ device_metadata := As described in the 'Table load' section above.
+ active_table_hash := "active_table_hash=" <table_hash_alg> ":" <table_hash>
+ Rerpresents the hash of the IMA data being measured for the
+ active table for the device.
+ table_hash_alg := Algorithm used to compute the hash.
+ table_hash := Hash of the (<dm_version_str> ";" <device_metadata> ";" <table_load_data> ";")
+ as described in the 'dm_table_load' above.
+ Note: If the table_load data spans across multiple IMA 'dm_table_load'
+ events for a given device, the hash is computed combining all the event data
+ i.e. (<dm_version_str> ";" <device_metadata> ";" <table_load_data> ";")
+ across all those events.
+ current_device_capacity := "current_device_capacity=" <N>
+
+ For instance, if a linear device is resumed with the following command,
+ #dmsetup resume linear1
+
+ then IMA ASCII measurement log will have an entry with:
+ (converted from ASCII to text for readability)
+
+ 10 56c00cc062ffc24ccd9ac2d67d194af3282b934e ima-buf sha256:e7d12c03b958b4e0e53e7363a06376be88d98a1ac191fdbd3baf5e4b77f329b6
+ dm_device_resume
+ dm_version=4.45.0;
+ name=linear1,uuid=,major=253,minor=0,minor_count=1,num_targets=4;
+ active_table_hash=sha256:4d73481ecce5eadba8ab084640d85bb9ca899af4d0a122989252a76efadc5b72;current_device_capacity=8;
+
+3. Device remove:
+------------------
+When a device is removed, the device information and a sha256 hash of the
+data from an active and inactive table are measured.
+
+The IMA measurement log has the following format for 'dm_device_remove':
+
+::
+
+ EVENT_NAME := "dm_device_remove"
+ EVENT_DATA := <dm_version_str> ";" <device_active_metadata> ";" <device_inactive_metadata> ";"
+ <active_table_hash> "," <inactive_table_hash> "," <remove_all> ";" <current_device_capacity> ";"
+
+ dm_version_str := As described in the 'Table load' section above.
+ device_active_metadata := Device metadata that reflects the currently loaded active table.
+ The format is same as 'device_metadata' described in the 'Table load' section above.
+ device_inactive_metadata := Device metadata that reflects the inactive table.
+ The format is same as 'device_metadata' described in the 'Table load' section above.
+ active_table_hash := Hash of the currently loaded active table.
+ The format is same as 'active_table_hash' described in the 'Device resume' section above.
+ inactive_table_hash := Hash of the inactive table.
+ The format is same as 'active_table_hash' described in the 'Device resume' section above.
+ remove_all := "remove_all=" <yes_no>
+ yes_no := "y" | "n"
+ current_device_capacity := "current_device_capacity=" <N>
+
+ For instance, if a linear device is removed with the following command,
+ #dmsetup remove l1
+
+ then IMA ASCII measurement log will have the following entry:
+ (converted from ASCII to text for readability)
+
+ 10 790e830a3a7a31590824ac0642b3b31c2d0e8b38 ima-buf sha256:ab9f3c959367a8f5d4403d6ce9c3627dadfa8f9f0e7ec7899299782388de3840
+ dm_device_remove
+ dm_version=4.45.0;
+ device_active_metadata=name=l1,uuid=,major=253,minor=2,minor_count=1,num_targets=2;
+ device_inactive_metadata=name=l1,uuid=,major=253,minor=2,minor_count=1,num_targets=1;
+ active_table_hash=sha256:4a7e62efaebfc86af755831998b7db6f59b60d23c9534fb16a4455907957953a,
+ inactive_table_hash=sha256:9d79c175bc2302d55a183e8f50ad4bafd60f7692fd6249e5fd213e2464384b86,remove_all=n;
+ current_device_capacity=2048;
+
+4. Table clear:
+----------------
+When an inactive table is cleared from the device, the device information and a sha256 hash of the
+data from an inactive table are measured.
+
+The IMA measurement log has the following format for 'dm_table_clear':
+
+::
+
+ EVENT_NAME := "dm_table_clear"
+ EVENT_DATA := <dm_version_str> ";" <device_inactive_metadata> ";" <inactive_table_hash> ";" <current_device_capacity> ";"
+
+ dm_version_str := As described in the 'Table load' section above.
+ device_inactive_metadata := Device metadata that was captured during the load time inactive table being cleared.
+ The format is same as 'device_metadata' described in the 'Table load' section above.
+ inactive_table_hash := Hash of the inactive table being cleared from the device.
+ The format is same as 'active_table_hash' described in the 'Device resume' section above.
+ current_device_capacity := "current_device_capacity=" <N>
+
+ For instance, if a linear device's inactive table is cleared,
+ #dmsetup clear l1
+
+ then IMA ASCII measurement log will have an entry with:
+ (converted from ASCII to text for readability)
+
+ 10 77d347408f557f68f0041acb0072946bb2367fe5 ima-buf sha256:42f9ca22163fdfa548e6229dece2959bc5ce295c681644240035827ada0e1db5
+ dm_table_clear
+ dm_version=4.45.0;
+ name=l1,uuid=,major=253,minor=2,minor_count=1,num_targets=1;
+ inactive_table_hash=sha256:75c0dc347063bf474d28a9907037eba060bfe39d8847fc0646d75e149045d545;current_device_capacity=1024;
+
+5. Device rename:
+------------------
+When an device's NAME or UUID is changed, the device information and the new NAME and UUID
+are measured.
+
+The IMA measurement log has the following format for 'dm_device_rename':
+
+::
+
+ EVENT_NAME := "dm_device_rename"
+ EVENT_DATA := <dm_version_str> ";" <device_active_metadata> ";" <new_device_name> "," <new_device_uuid> ";" <current_device_capacity> ";"
+
+ dm_version_str := As described in the 'Table load' section above.
+ device_active_metadata := Device metadata that reflects the currently loaded active table.
+ The format is same as 'device_metadata' described in the 'Table load' section above.
+ new_device_name := "new_name=" <dm-device-name>
+ dm-device-name := Same as <dm-device-name> described in 'Table load' section above
+ new_device_uuid := "new_uuid=" <dm-device-uuid>
+ dm-device-uuid := Same as <dm-device-uuid> described in 'Table load' section above
+ current_device_capacity := "current_device_capacity=" <N>
+
+ E.g 1: if a linear device's name is changed with the following command,
+ #dmsetup rename linear1 --setuuid 1234-5678
+
+ then IMA ASCII measurement log will have an entry with:
+ (converted from ASCII to text for readability)
+
+ 10 8b0423209b4c66ac1523f4c9848c9b51ee332f48 ima-buf sha256:6847b7258134189531db593e9230b257c84f04038b5a18fd2e1473860e0569ac
+ dm_device_rename
+ dm_version=4.45.0;
+ name=linear1,uuid=,major=253,minor=2,minor_count=1,num_targets=1;new_name=linear1,new_uuid=1234-5678;
+ current_device_capacity=1024;
+
+ E.g 2: if a linear device's name is changed with the following command,
+ # dmsetup rename linear1 linear=2
+
+ then IMA ASCII measurement log will have an entry with:
+ (converted from ASCII to text for readability)
+
+ 10 bef70476b99c2bdf7136fae033aa8627da1bf76f ima-buf sha256:8c6f9f53b9ef9dc8f92a2f2cca8910e622543d0f0d37d484870cb16b95111402
+ dm_device_rename
+ dm_version=4.45.0;
+ name=linear1,uuid=1234-5678,major=253,minor=2,minor_count=1,num_targets=1;
+ new_name=linear\=2,new_uuid=1234-5678;
+ current_device_capacity=1024;
+
+Supported targets:
+==================
+
+Following targets are supported to measure their data using IMA:
+
+ 1. cache
+ #. crypt
+ #. integrity
+ #. linear
+ #. mirror
+ #. multipath
+ #. raid
+ #. snapshot
+ #. striped
+ #. verity
+
+1. cache
+---------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'cache' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <metadata_mode> "," <cache_metadata_device> ","
+ <cache_device> "," <cache_origin_device> "," <writethrough> "," <writeback> ","
+ <passthrough> "," <no_discard_passdown> ";"
+
+ target_name := "target_name=cache"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ metadata_mode := "metadata_mode=" <cache_metadata_mode>
+ cache_metadata_mode := "fail" | "ro" | "rw"
+ cache_device := "cache_device=" <cache_device_name_string>
+ cache_origin_device := "cache_origin_device=" <cache_origin_device_string>
+ writethrough := "writethrough=" <yes_no>
+ writeback := "writeback=" <yes_no>
+ passthrough := "passthrough=" <yes_no>
+ no_discard_passdown := "no_discard_passdown=" <yes_no>
+ yes_no := "y" | "n"
+
+ E.g.
+ When a 'cache' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'cache' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;name=cache1,uuid=cache_uuid,major=253,minor=2,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=28672,target_name=cache,target_version=2.2.0,metadata_mode=rw,
+ cache_metadata_device=253:4,cache_device=253:3,cache_origin_device=253:5,writethrough=y,writeback=n,
+ passthrough=n,metadata2=y,no_discard_passdown=n;
+
+
+2. crypt
+---------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'crypt' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <allow_discards> "," <same_cpu_crypt> ","
+ <submit_from_crypt_cpus> "," <no_read_workqueue> "," <no_write_workqueue> ","
+ <iv_large_sectors> "," <iv_large_sectors> "," [<integrity_tag_size> ","] [<cipher_auth> ","]
+ [<sector_size> ","] [<cipher_string> ","] <key_size> "," <key_parts> ","
+ <key_extra_size> "," <key_mac_size> ";"
+
+ target_name := "target_name=crypt"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ allow_discards := "allow_discards=" <yes_no>
+ same_cpu_crypt := "same_cpu_crypt=" <yes_no>
+ submit_from_crypt_cpus := "submit_from_crypt_cpus=" <yes_no>
+ no_read_workqueue := "no_read_workqueue=" <yes_no>
+ no_write_workqueue := "no_write_workqueue=" <yes_no>
+ iv_large_sectors := "iv_large_sectors=" <yes_no>
+ integrity_tag_size := "integrity_tag_size=" <N>
+ cipher_auth := "cipher_auth=" <string>
+ sector_size := "sector_size=" <N>
+ cipher_string := "cipher_string="
+ key_size := "key_size=" <N>
+ key_parts := "key_parts=" <N>
+ key_extra_size := "key_extra_size=" <N>
+ key_mac_size := "key_mac_size=" <N>
+ yes_no := "y" | "n"
+
+ E.g.
+ When a 'crypt' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'crypt' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=crypt1,uuid=crypt_uuid1,major=253,minor=0,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=1953125,target_name=crypt,target_version=1.23.0,
+ allow_discards=y,same_cpu=n,submit_from_crypt_cpus=n,no_read_workqueue=n,no_write_workqueue=n,
+ iv_large_sectors=n,cipher_string=aes-xts-plain64,key_size=32,key_parts=1,key_extra_size=0,key_mac_size=0;
+
+3. integrity
+-------------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'integrity' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <dev_name> "," <start>
+ <tag_size> "," <mode> "," [<meta_device> ","] [<block_size> ","] <recalculate> ","
+ <allow_discards> "," <fix_padding> "," <fix_hmac> "," <legacy_recalculate> ","
+ <journal_sectors> "," <interleave_sectors> "," <buffer_sectors> ";"
+
+ target_name := "target_name=integrity"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ dev_name := "dev_name=" <device_name_str>
+ start := "start=" <N>
+ tag_size := "tag_size=" <N>
+ mode := "mode=" <integrity_mode_str>
+ integrity_mode_str := "J" | "B" | "D" | "R"
+ meta_device := "meta_device=" <meta_device_str>
+ block_size := "block_size=" <N>
+ recalculate := "recalculate=" <yes_no>
+ allow_discards := "allow_discards=" <yes_no>
+ fix_padding := "fix_padding=" <yes_no>
+ fix_hmac := "fix_hmac=" <yes_no>
+ legacy_recalculate := "legacy_recalculate=" <yes_no>
+ journal_sectors := "journal_sectors=" <N>
+ interleave_sectors := "interleave_sectors=" <N>
+ buffer_sectors := "buffer_sectors=" <N>
+ yes_no := "y" | "n"
+
+ E.g.
+ When a 'integrity' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'integrity' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=integrity1,uuid=,major=253,minor=1,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=7856,target_name=integrity,target_version=1.10.0,
+ dev_name=253:0,start=0,tag_size=32,mode=J,recalculate=n,allow_discards=n,fix_padding=n,
+ fix_hmac=n,legacy_recalculate=n,journal_sectors=88,interleave_sectors=32768,buffer_sectors=128;
+
+
+4. linear
+----------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'linear' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <device_name> <,> <start> ";"
+
+ target_name := "target_name=linear"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ device_name := "device_name=" <linear_device_name_str>
+ start := "start=" <N>
+
+ E.g.
+ When a 'linear' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'linear' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=linear1,uuid=linear_uuid1,major=253,minor=2,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=28672,target_name=linear,target_version=1.4.0,
+ device_name=253:1,start=2048;
+
+5. mirror
+----------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'mirror' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <nr_mirrors> ","
+ <mirror_device_data> "," <handle_errors> "," <keep_log> "," <log_type_status> ";"
+
+ target_name := "target_name=mirror"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ nr_mirrors := "nr_mirrors=" <NR>
+ mirror_device_data := <mirror_device_row> | <mirror_device_data><mirror_device_row>
+ mirror_device_row is repeated <NR> times - for <NR> described in <nr_mirrors>.
+ mirror_device_row := <mirror_device_name> "," <mirror_device_status>
+ mirror_device_name := "mirror_device_" <X> "=" <mirror_device_name_str>
+ where <X> ranges from 0 to (<NR> -1) - for <NR> described in <nr_mirrors>.
+ mirror_device_status := "mirror_device_" <X> "_status=" <mirror_device_status_char>
+ where <X> ranges from 0 to (<NR> -1) - for <NR> described in <nr_mirrors>.
+ mirror_device_status_char := "A" | "F" | "D" | "S" | "R" | "U"
+ handle_errors := "handle_errors=" <yes_no>
+ keep_log := "keep_log=" <yes_no>
+ log_type_status := "log_type_status=" <log_type_status_str>
+ yes_no := "y" | "n"
+
+ E.g.
+ When a 'mirror' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'mirror' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=mirror1,uuid=mirror_uuid1,major=253,minor=6,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=2048,target_name=mirror,target_version=1.14.0,nr_mirrors=2,
+ mirror_device_0=253:4,mirror_device_0_status=A,
+ mirror_device_1=253:5,mirror_device_1_status=A,
+ handle_errors=y,keep_log=n,log_type_status=;
+
+6. multipath
+-------------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'multipath' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <nr_priority_groups>
+ ["," <pg_state> "," <priority_groups> "," <priority_group_paths>] ";"
+
+ target_name := "target_name=multipath"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ nr_priority_groups := "nr_priority_groups=" <NPG>
+ priority_groups := <priority_groups_row>|<priority_groups_row><priority_groups>
+ priority_groups_row := "pg_state_" <X> "=" <pg_state_str> "," "nr_pgpaths_" <X> "=" <NPGP> ","
+ "path_selector_name_" <X> "=" <string> "," <priority_group_paths>
+ where <X> ranges from 0 to (<NPG> -1) - for <NPG> described in <nr_priority_groups>.
+ pg_state_str := "E" | "A" | "D"
+ <priority_group_paths> := <priority_group_paths_row> | <priority_group_paths_row><priority_group_paths>
+ priority_group_paths_row := "path_name_" <X> "_" <Y> "=" <string> "," "is_active_" <X> "_" <Y> "=" <is_active_str>
+ "fail_count_" <X> "_" <Y> "=" <N> "," "path_selector_status_" <X> "_" <Y> "=" <path_selector_status_str>
+ where <X> ranges from 0 to (<NPG> -1) - for <NPG> described in <nr_priority_groups>,
+ and <Y> ranges from 0 to (<NPGP> -1) - for <NPGP> described in <priority_groups_row>.
+ is_active_str := "A" | "F"
+
+ E.g.
+ When a 'multipath' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'multipath' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=mp,uuid=,major=253,minor=0,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=2097152,target_name=multipath,target_version=1.14.0,nr_priority_groups=2,
+ pg_state_0=E,nr_pgpaths_0=2,path_selector_name_0=queue-length,
+ path_name_0_0=8:16,is_active_0_0=A,fail_count_0_0=0,path_selector_status_0_0=,
+ path_name_0_1=8:32,is_active_0_1=A,fail_count_0_1=0,path_selector_status_0_1=,
+ pg_state_1=E,nr_pgpaths_1=2,path_selector_name_1=queue-length,
+ path_name_1_0=8:48,is_active_1_0=A,fail_count_1_0=0,path_selector_status_1_0=,
+ path_name_1_1=8:64,is_active_1_1=A,fail_count_1_1=0,path_selector_status_1_1=;
+
+7. raid
+--------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'raid' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <raid_type> "," <raid_disks> "," <raid_state>
+ <raid_device_status> ["," journal_dev_mode] ";"
+
+ target_name := "target_name=raid"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ raid_type := "raid_type=" <raid_type_str>
+ raid_disks := "raid_disks=" <NRD>
+ raid_state := "raid_state=" <raid_state_str>
+ raid_state_str := "frozen" | "reshape" |"resync" | "check" | "repair" | "recover" | "idle" |"undef"
+ raid_device_status := <raid_device_status_row> | <raid_device_status_row><raid_device_status>
+ <raid_device_status_row> is repeated <NRD> times - for <NRD> described in <raid_disks>.
+ raid_device_status_row := "raid_device_" <X> "_status=" <raid_device_status_str>
+ where <X> ranges from 0 to (<NRD> -1) - for <NRD> described in <raid_disks>.
+ raid_device_status_str := "A" | "D" | "a" | "-"
+ journal_dev_mode := "journal_dev_mode=" <journal_dev_mode_str>
+ journal_dev_mode_str := "writethrough" | "writeback" | "invalid"
+
+ E.g.
+ When a 'raid' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'raid' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=raid_LV1,uuid=uuid_raid_LV1,major=253,minor=12,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=2048,target_name=raid,target_version=1.15.1,
+ raid_type=raid10,raid_disks=4,raid_state=idle,
+ raid_device_0_status=A,
+ raid_device_1_status=A,
+ raid_device_2_status=A,
+ raid_device_3_status=A;
+
+
+8. snapshot
+------------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'snapshot' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <snap_origin_name> ","
+ <snap_cow_name> "," <snap_valid> "," <snap_merge_failed> "," <snapshot_overflowed> ";"
+
+ target_name := "target_name=snapshot"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ snap_origin_name := "snap_origin_name=" <string>
+ snap_cow_name := "snap_cow_name=" <string>
+ snap_valid := "snap_valid=" <yes_no>
+ snap_merge_failed := "snap_merge_failed=" <yes_no>
+ snapshot_overflowed := "snapshot_overflowed=" <yes_no>
+ yes_no := "y" | "n"
+
+ E.g.
+ When a 'snapshot' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'snapshot' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=snap1,uuid=snap_uuid1,major=253,minor=13,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=4096,target_name=snapshot,target_version=1.16.0,
+ snap_origin_name=253:11,snap_cow_name=253:12,snap_valid=y,snap_merge_failed=n,snapshot_overflowed=n;
+
+9. striped
+-----------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'striped' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <stripes> "," <chunk_size> ","
+ <stripe_data> ";"
+
+ target_name := "target_name=striped"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ stripes := "stripes=" <NS>
+ chunk_size := "chunk_size=" <N>
+ stripe_data := <stripe_data_row>|<stripe_data><stripe_data_row>
+ stripe_data_row := <stripe_device_name> "," <stripe_physical_start> "," <stripe_status>
+ stripe_device_name := "stripe_" <X> "_device_name=" <stripe_device_name_str>
+ where <X> ranges from 0 to (<NS> -1) - for <NS> described in <stripes>.
+ stripe_physical_start := "stripe_" <X> "_physical_start=" <N>
+ where <X> ranges from 0 to (<NS> -1) - for <NS> described in <stripes>.
+ stripe_status := "stripe_" <X> "_status=" <stripe_status_str>
+ where <X> ranges from 0 to (<NS> -1) - for <NS> described in <stripes>.
+ stripe_status_str := "D" | "A"
+
+ E.g.
+ When a 'striped' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'striped' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=striped1,uuid=striped_uuid1,major=253,minor=5,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=640,target_name=striped,target_version=1.6.0,stripes=2,chunk_size=64,
+ stripe_0_device_name=253:0,stripe_0_physical_start=2048,stripe_0_status=A,
+ stripe_1_device_name=253:3,stripe_1_physical_start=2048,stripe_1_status=A;
+
+10. verity
+----------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'verity' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <hash_failed> "," <verity_version> ","
+ <data_device_name> "," <hash_device_name> "," <verity_algorithm> "," <root_digest> ","
+ <salt> "," <ignore_zero_blocks> "," <check_at_most_once> ["," <root_hash_sig_key_desc>]
+ ["," <verity_mode>] ";"
+
+ target_name := "target_name=verity"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ hash_failed := "hash_failed=" <hash_failed_str>
+ hash_failed_str := "C" | "V"
+ verity_version := "verity_version=" <verity_version_str>
+ data_device_name := "data_device_name=" <data_device_name_str>
+ hash_device_name := "hash_device_name=" <hash_device_name_str>
+ verity_algorithm := "verity_algorithm=" <verity_algorithm_str>
+ root_digest := "root_digest=" <root_digest_str>
+ salt := "salt=" <salt_str>
+ salt_str := "-" <verity_salt_str>
+ ignore_zero_blocks := "ignore_zero_blocks=" <yes_no>
+ check_at_most_once := "check_at_most_once=" <yes_no>
+ root_hash_sig_key_desc := "root_hash_sig_key_desc="
+ verity_mode := "verity_mode=" <verity_mode_str>
+ verity_mode_str := "ignore_corruption" | "restart_on_corruption" | "panic_on_corruption" | "invalid"
+ yes_no := "y" | "n"
+
+ E.g.
+ When a 'verity' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'verity' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=test-verity,uuid=,major=253,minor=2,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=1953120,target_name=verity,target_version=1.8.0,hash_failed=V,
+ verity_version=1,data_device_name=253:1,hash_device_name=253:0,verity_algorithm=sha256,
+ root_digest=29cb87e60ce7b12b443ba6008266f3e41e93e403d7f298f8e3f316b29ff89c5e,
+ salt=e48da609055204e89ae53b655ca2216dd983cf3cb829f34f63a297d106d53e2d,
+ ignore_zero_blocks=n,check_at_most_once=n;
diff --git a/Documentation/admin-guide/device-mapper/index.rst b/Documentation/admin-guide/device-mapper/index.rst
index 6cf8adc86fa8..cde52cc09645 100644
--- a/Documentation/admin-guide/device-mapper/index.rst
+++ b/Documentation/admin-guide/device-mapper/index.rst
@@ -13,6 +13,7 @@ Device Mapper
dm-dust
dm-ebs
dm-flakey
+ dm-ima
dm-init
dm-integrity
dm-io
diff --git a/Documentation/admin-guide/device-mapper/writecache.rst b/Documentation/admin-guide/device-mapper/writecache.rst
index 65427d8dfca6..10429779a91a 100644
--- a/Documentation/admin-guide/device-mapper/writecache.rst
+++ b/Documentation/admin-guide/device-mapper/writecache.rst
@@ -78,13 +78,23 @@ Status:
2. the number of blocks
3. the number of free blocks
4. the number of blocks under writeback
+5. the number of read requests
+6. the number of read requests that hit the cache
+7. the number of write requests
+8. the number of write requests that hit uncommitted block
+9. the number of write requests that hit committed block
+10. the number of write requests that bypass the cache
+11. the number of write requests that are allocated in the cache
+12. the number of write requests that are blocked on the freelist
+13. the number of flush requests
+14. the number of discard requests
Messages:
flush
- flush the cache device. The message returns successfully
+ Flush the cache device. The message returns successfully
if the cache device was flushed without an error
flush_on_suspend
- flush the cache device on next suspend. Use this message
+ Flush the cache device on next suspend. Use this message
when you are going to remove the cache device. The proper
sequence for removing the cache device is:
@@ -98,3 +108,5 @@ Messages:
6. the cache device is now inactive and it can be deleted
cleaner
See above "cleaner" constructor documentation.
+ clear_stats
+ Clear the statistics that are reported on the status line
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index f12cda55538b..8cbc711cda93 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -16,3 +16,4 @@ are configurable at compile, boot or run time.
multihit.rst
special-register-buffer-data-sampling.rst
core-scheduling.rst
+ l1d_flush.rst
diff --git a/Documentation/admin-guide/hw-vuln/l1d_flush.rst b/Documentation/admin-guide/hw-vuln/l1d_flush.rst
new file mode 100644
index 000000000000..210020bc3f56
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/l1d_flush.rst
@@ -0,0 +1,69 @@
+L1D Flushing
+============
+
+With an increasing number of vulnerabilities being reported around data
+leaks from the Level 1 Data cache (L1D) the kernel provides an opt-in
+mechanism to flush the L1D cache on context switch.
+
+This mechanism can be used to address e.g. CVE-2020-0550. For applications
+the mechanism keeps them safe from vulnerabilities, related to leaks
+(snooping of) from the L1D cache.
+
+
+Related CVEs
+------------
+The following CVEs can be addressed by this
+mechanism
+
+ ============= ======================== ==================
+ CVE-2020-0550 Improper Data Forwarding OS related aspects
+ ============= ======================== ==================
+
+Usage Guidelines
+----------------
+
+Please see document: :ref:`Documentation/userspace-api/spec_ctrl.rst
+<set_spec_ctrl>` for details.
+
+**NOTE**: The feature is disabled by default, applications need to
+specifically opt into the feature to enable it.
+
+Mitigation
+----------
+
+When PR_SET_L1D_FLUSH is enabled for a task a flush of the L1D cache is
+performed when the task is scheduled out and the incoming task belongs to a
+different process and therefore to a different address space.
+
+If the underlying CPU supports L1D flushing in hardware, the hardware
+mechanism is used, software fallback for the mitigation, is not supported.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+The kernel command line allows to control the L1D flush mitigations at boot
+time with the option "l1d_flush=". The valid arguments for this option are:
+
+ ============ =============================================================
+ on Enables the prctl interface, applications trying to use
+ the prctl() will fail with an error if l1d_flush is not
+ enabled
+ ============ =============================================================
+
+By default the mechanism is disabled.
+
+Limitations
+-----------
+
+The mechanism does not mitigate L1D data leaks between tasks belonging to
+different processes which are concurrently executing on sibling threads of
+a physical CPU core when SMT is enabled on the system.
+
+This can be addressed by controlled placement of processes on physical CPU
+cores or by disabling SMT. See the relevant chapter in the L1TF mitigation
+document: :ref:`Documentation/admin-guide/hw-vuln/l1tf.rst <smt_control>`.
+
+**NOTE** : The opt-in of a task for L1D flushing works only when the task's
+affinity is limited to cores running in non-SMT mode. If a task which
+requested L1D flushing is scheduled on a SMT-enabled core the kernel sends
+a SIGBUS to the task.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bdb22006f713..2102467faad6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2421,6 +2421,23 @@
feature (tagged TLBs) on capable Intel chips.
Default is 1 (enabled)
+ l1d_flush= [X86,INTEL]
+ Control mitigation for L1D based snooping vulnerability.
+
+ Certain CPUs are vulnerable to an exploit against CPU
+ internal buffers which can forward information to a
+ disclosure gadget under certain conditions.
+
+ In vulnerable processors, the speculatively
+ forwarded data can be used in a cache side channel
+ attack, to access data to which the attacker does
+ not have direct access.
+
+ This parameter controls the mitigation. The
+ options are:
+
+ on - enable the interface for the mitigation
+
l1tf= [X86] Control mitigation of the L1TF vulnerability on
affected CPUs
@@ -4777,7 +4794,7 @@
reboot= [KNL]
Format (x86 or x86_64):
- [w[arm] | c[old] | h[ard] | s[oft] | g[pio]] \
+ [w[arm] | c[old] | h[ard] | s[oft] | g[pio]] | d[efault] \
[[,]s[mp]#### \
[[,]b[ios] | a[cpi] | k[bd] | t[riple] | e[fi] | p[ci]] \
[[,]f[orce]
@@ -4945,8 +4962,6 @@
sa1100ir [NET]
See drivers/net/irda/sa1100_ir.c.
- sbni= [NET] Granch SBNI12 leased line adapter
-
sched_verbose [KNL] Enables verbose scheduler debug messages.
schedstats= [KNL,X86] Enable or disable scheduled statistics.
diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt
index 0f1fdedf36bb..0f1ffa03db09 100644
--- a/Documentation/atomic_t.txt
+++ b/Documentation/atomic_t.txt
@@ -271,3 +271,97 @@ WRITE_ONCE. Thus:
SC *y, t;
is allowed.
+
+
+CMPXCHG vs TRY_CMPXCHG
+----------------------
+
+ int atomic_cmpxchg(atomic_t *ptr, int old, int new);
+ bool atomic_try_cmpxchg(atomic_t *ptr, int *oldp, int new);
+
+Both provide the same functionality, but try_cmpxchg() can lead to more
+compact code. The functions relate like:
+
+ bool atomic_try_cmpxchg(atomic_t *ptr, int *oldp, int new)
+ {
+ int ret, old = *oldp;
+ ret = atomic_cmpxchg(ptr, old, new);
+ if (ret != old)
+ *oldp = ret;
+ return ret == old;
+ }
+
+and:
+
+ int atomic_cmpxchg(atomic_t *ptr, int old, int new)
+ {
+ (void)atomic_try_cmpxchg(ptr, &old, new);
+ return old;
+ }
+
+Usage:
+
+ old = atomic_read(&v); old = atomic_read(&v);
+ for (;;) { do {
+ new = func(old); new = func(old);
+ tmp = atomic_cmpxchg(&v, old, new); } while (!atomic_try_cmpxchg(&v, &old, new));
+ if (tmp == old)
+ break;
+ old = tmp;
+ }
+
+NB. try_cmpxchg() also generates better code on some platforms (notably x86)
+where the function more closely matches the hardware instruction.
+
+
+FORWARD PROGRESS
+----------------
+
+In general strong forward progress is expected of all unconditional atomic
+operations -- those in the Arithmetic and Bitwise classes and xchg(). However
+a fair amount of code also requires forward progress from the conditional
+atomic operations.
+
+Specifically 'simple' cmpxchg() loops are expected to not starve one another
+indefinitely. However, this is not evident on LL/SC architectures, because
+while an LL/SC architecure 'can/should/must' provide forward progress
+guarantees between competing LL/SC sections, such a guarantee does not
+transfer to cmpxchg() implemented using LL/SC. Consider:
+
+ old = atomic_read(&v);
+ do {
+ new = func(old);
+ } while (!atomic_try_cmpxchg(&v, &old, new));
+
+which on LL/SC becomes something like:
+
+ old = atomic_read(&v);
+ do {
+ new = func(old);
+ } while (!({
+ volatile asm ("1: LL %[oldval], %[v]\n"
+ " CMP %[oldval], %[old]\n"
+ " BNE 2f\n"
+ " SC %[new], %[v]\n"
+ " BNE 1b\n"
+ "2:\n"
+ : [oldval] "=&r" (oldval), [v] "m" (v)
+ : [old] "r" (old), [new] "r" (new)
+ : "memory");
+ success = (oldval == old);
+ if (!success)
+ old = oldval;
+ success; }));
+
+However, even the forward branch from the failed compare can cause the LL/SC
+to fail on some architectures, let alone whatever the compiler makes of the C
+loop body. As a result there is no guarantee what so ever the cacheline
+containing @v will stay on the local CPU and progress is made.
+
+Even native CAS architectures can fail to provide forward progress for their
+primitive (See Sparc64 for an example).
+
+Such implementations are strongly encouraged to add exponential backoff loops
+to a failed CAS in order to ensure some progress. Affected architectures are
+also strongly encouraged to inspect/audit the atomic fallbacks, refcount_t and
+their locking primitives.
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index baea6c2abba5..1ceb5d704a97 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -15,15 +15,7 @@ that goes into great technical depth about the BPF Architecture.
libbpf
======
-Libbpf is a userspace library for loading and interacting with bpf programs.
-
-.. toctree::
- :maxdepth: 1
-
- libbpf/libbpf
- libbpf/libbpf_api
- libbpf/libbpf_build
- libbpf/libbpf_naming_convention
+Documentation/bpf/libbpf/libbpf.rst is a userspace library for loading and interacting with bpf programs.
BPF Type Format (BTF)
=====================
diff --git a/Documentation/bpf/libbpf/libbpf.rst b/Documentation/bpf/libbpf/index.rst
index 1b1e61d5ead1..4f8adfc3ab83 100644
--- a/Documentation/bpf/libbpf/libbpf.rst
+++ b/Documentation/bpf/libbpf/index.rst
@@ -3,6 +3,14 @@
libbpf
======
+For API documentation see the `versioned API documentation site <https://libbpf.readthedocs.io/en/latest/api.html>`_.
+
+.. toctree::
+ :maxdepth: 1
+
+ libbpf_naming_convention
+ libbpf_build
+
This is documentation for libbpf, a userspace library for loading and
interacting with bpf programs.
diff --git a/Documentation/bpf/libbpf/libbpf_api.rst b/Documentation/bpf/libbpf/libbpf_api.rst
deleted file mode 100644
index f07eecd054da..000000000000
--- a/Documentation/bpf/libbpf/libbpf_api.rst
+++ /dev/null
@@ -1,27 +0,0 @@
-.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-API
-===
-
-This documentation is autogenerated from header files in libbpf, tools/lib/bpf
-
-.. kernel-doc:: tools/lib/bpf/libbpf.h
- :internal:
-
-.. kernel-doc:: tools/lib/bpf/bpf.h
- :internal:
-
-.. kernel-doc:: tools/lib/bpf/btf.h
- :internal:
-
-.. kernel-doc:: tools/lib/bpf/xsk.h
- :internal:
-
-.. kernel-doc:: tools/lib/bpf/bpf_tracing.h
- :internal:
-
-.. kernel-doc:: tools/lib/bpf/bpf_core_read.h
- :internal:
-
-.. kernel-doc:: tools/lib/bpf/bpf_endian.h
- :internal: \ No newline at end of file
diff --git a/Documentation/bpf/libbpf/libbpf_naming_convention.rst b/Documentation/bpf/libbpf/libbpf_naming_convention.rst
index 3de1d51e41da..9c68d5014ff1 100644
--- a/Documentation/bpf/libbpf/libbpf_naming_convention.rst
+++ b/Documentation/bpf/libbpf/libbpf_naming_convention.rst
@@ -69,7 +69,7 @@ functions. These can be mixed and matched. Note that these functions
are not reentrant for performance reasons.
ABI
-==========
+---
libbpf can be both linked statically or used as DSO. To avoid possible
conflicts with other libraries an application is linked with, all
@@ -108,7 +108,7 @@ This bump in ABI version is at most once per kernel development cycle.
For example, if current state of ``libbpf.map`` is:
-.. code-block:: c
+.. code-block:: none
LIBBPF_0.0.1 {
global:
@@ -121,7 +121,7 @@ For example, if current state of ``libbpf.map`` is:
, and a new symbol ``bpf_func_c`` is being introduced, then
``libbpf.map`` should be changed like this:
-.. code-block:: c
+.. code-block:: none
LIBBPF_0.0.1 {
global:
diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst
index a2c96bec5ee8..1122cd3044c0 100644
--- a/Documentation/core-api/cpu_hotplug.rst
+++ b/Documentation/core-api/cpu_hotplug.rst
@@ -220,7 +220,7 @@ goes online (offline) and during initial setup (shutdown) of the driver. However
each registration and removal function is also available with a ``_nocalls``
suffix which does not invoke the provided callbacks if the invocation of the
callbacks is not desired. During the manual setup (or teardown) the functions
-``get_online_cpus()`` and ``put_online_cpus()`` should be used to inhibit CPU
+``cpus_read_lock()`` and ``cpus_read_unlock()`` should be used to inhibit CPU
hotplug operations.
diff --git a/Documentation/core-api/irq/irq-domain.rst b/Documentation/core-api/irq/irq-domain.rst
index 53283b3729a1..6979b4af2c1f 100644
--- a/Documentation/core-api/irq/irq-domain.rst
+++ b/Documentation/core-api/irq/irq-domain.rst
@@ -55,8 +55,24 @@ exist then it will allocate a new Linux irq_desc, associate it with
the hwirq, and call the .map() callback so the driver can perform any
required hardware setup.
-When an interrupt is received, irq_find_mapping() function should
-be used to find the Linux IRQ number from the hwirq number.
+Once a mapping has been established, it can be retrieved or used via a
+variety of methods:
+
+- irq_resolve_mapping() returns a pointer to the irq_desc structure
+ for a given domain and hwirq number, and NULL if there was no
+ mapping.
+- irq_find_mapping() returns a Linux IRQ number for a given domain and
+ hwirq number, and 0 if there was no mapping
+- irq_linear_revmap() is now identical to irq_find_mapping(), and is
+ deprecated
+- generic_handle_domain_irq() handles an interrupt described by a
+ domain and a hwirq number
+- handle_domain_irq() does the same thing for root interrupt
+ controllers and deals with the set_irq_reg()/irq_enter() sequences
+ that most architecture requires
+
+Note that irq domain lookups must happen in contexts that are
+compatible with a RCU read-side critical section.
The irq_create_mapping() function must be called *atleast once*
before any call to irq_find_mapping(), lest the descriptor will not
@@ -137,7 +153,9 @@ required. Calling irq_create_direct_mapping() will allocate a Linux
IRQ number and call the .map() callback so that driver can program the
Linux IRQ number into the hardware.
-Most drivers cannot use this mapping.
+Most drivers cannot use this mapping, and it is now gated on the
+CONFIG_IRQ_DOMAIN_NOMAP option. Please refrain from introducing new
+users of this API.
Legacy
------
@@ -157,6 +175,10 @@ for IRQ numbers that are passed to struct device registrations. In that
case the Linux IRQ numbers cannot be dynamically assigned and the legacy
mapping should be used.
+As the name implies, the *_legacy() functions are deprecated and only
+exist to ease the support of ancient platforms. No new users should be
+added.
+
The legacy map assumes a contiguous range of IRQ numbers has already
been allocated for the controller and that the IRQ number can be
calculated by adding a fixed offset to the hwirq number, and
diff --git a/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt b/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt
deleted file mode 100644
index 18c3aea90df2..000000000000
--- a/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt
+++ /dev/null
@@ -1,44 +0,0 @@
------------------------------------------------------------------
-Device Tree Bindings for the Xilinx Zynq MPSoC Firmware Interface
------------------------------------------------------------------
-
-The zynqmp-firmware node describes the interface to platform firmware.
-ZynqMP has an interface to communicate with secure firmware. Firmware
-driver provides an interface to firmware APIs. Interface APIs can be
-used by any driver to communicate to PMUFW(Platform Management Unit).
-These requests include clock management, pin control, device control,
-power management service, FPGA service and other platform management
-services.
-
-Required properties:
- - compatible: Must contain any of below:
- "xlnx,zynqmp-firmware" for Zynq Ultrascale+ MPSoC
- "xlnx,versal-firmware" for Versal
- - method: The method of calling the PM-API firmware layer.
- Permitted values are:
- - "smc" : SMC #0, following the SMCCC
- - "hvc" : HVC #0, following the SMCCC
-
--------
-Example
--------
-
-Zynq Ultrascale+ MPSoC
-----------------------
-firmware {
- zynqmp_firmware: zynqmp-firmware {
- compatible = "xlnx,zynqmp-firmware";
- method = "smc";
- ...
- };
-};
-
-Versal
-------
-firmware {
- versal_firmware: versal-firmware {
- compatible = "xlnx,versal-firmware";
- method = "smc";
- ...
- };
-};
diff --git a/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.yaml b/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.yaml
new file mode 100644
index 000000000000..f14f7b454f07
--- /dev/null
+++ b/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.yaml
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/firmware/xilinx/xlnx,zynqmp-firmware.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Xilinx firmware driver
+
+maintainers:
+ - Nava kishore Manne <nava.manne@xilinx.com>
+
+description: The zynqmp-firmware node describes the interface to platform
+ firmware. ZynqMP has an interface to communicate with secure firmware.
+ Firmware driver provides an interface to firmware APIs. Interface APIs
+ can be used by any driver to communicate to PMUFW(Platform Management Unit).
+ These requests include clock management, pin control, device control,
+ power management service, FPGA service and other platform management
+ services.
+
+properties:
+ compatible:
+ oneOf:
+ - description: For implementations complying for Zynq Ultrascale+ MPSoC.
+ const: xlnx,zynqmp-firmware
+
+ - description: For implementations complying for Versal.
+ const: xlnx,versal-firmware
+
+ method:
+ description: |
+ The method of calling the PM-API firmware layer.
+ Permitted values are.
+ - "smc" : SMC #0, following the SMCCC
+ - "hvc" : HVC #0, following the SMCCC
+
+ $ref: /schemas/types.yaml#/definitions/string-array
+ enum:
+ - smc
+ - hvc
+
+ versal_fpga:
+ $ref: /schemas/fpga/xlnx,versal-fpga.yaml#
+ description: Compatible of the FPGA device.
+ type: object
+
+ zynqmp-aes:
+ $ref: /schemas/crypto/xlnx,zynqmp-aes.yaml#
+ description: The ZynqMP AES-GCM hardened cryptographic accelerator is
+ used to encrypt or decrypt the data with provided key and initialization
+ vector.
+ type: object
+
+ clock-controller:
+ $ref: /schemas/clock/xlnx,versal-clk.yaml#
+ description: The clock controller is a hardware block of Xilinx versal
+ clock tree. It reads required input clock frequencies from the devicetree
+ and acts as clock provider for all clock consumers of PS clocks.list of
+ clock specifiers which are external input clocks to the given clock
+ controller.
+ type: object
+
+required:
+ - compatible
+
+additionalProperties: false
+
+examples:
+ - |
+ versal-firmware {
+ compatible = "xlnx,versal-firmware";
+ method = "smc";
+
+ versal_fpga: versal_fpga {
+ compatible = "xlnx,versal-fpga";
+ };
+
+ xlnx_aes: zynqmp-aes {
+ compatible = "xlnx,zynqmp-aes";
+ };
+
+ versal_clk: clock-controller {
+ #clock-cells = <1>;
+ compatible = "xlnx,versal-clk";
+ clocks = <&ref>, <&alt_ref>, <&pl_alt_ref>;
+ clock-names = "ref", "alt_ref", "pl_alt_ref";
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/fpga/xlnx,versal-fpga.yaml b/Documentation/devicetree/bindings/fpga/xlnx,versal-fpga.yaml
new file mode 100644
index 000000000000..ac6a207278d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/fpga/xlnx,versal-fpga.yaml
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/fpga/xlnx,versal-fpga.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Xilinx Versal FPGA driver.
+
+maintainers:
+ - Nava kishore Manne <nava.manne@xilinx.com>
+
+description: |
+ Device Tree Versal FPGA bindings for the Versal SoC, controlled
+ using firmware interface.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - xlnx,versal-fpga
+
+required:
+ - compatible
+
+additionalProperties: false
+
+examples:
+ - |
+ versal_fpga: versal_fpga {
+ compatible = "xlnx,versal-fpga";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/fsi/ibm,fsi2spi.yaml b/Documentation/devicetree/bindings/fsi/ibm,fsi2spi.yaml
index e425278653f5..e2ca0b000471 100644
--- a/Documentation/devicetree/bindings/fsi/ibm,fsi2spi.yaml
+++ b/Documentation/devicetree/bindings/fsi/ibm,fsi2spi.yaml
@@ -19,7 +19,6 @@ properties:
compatible:
enum:
- ibm,fsi2spi
- - ibm,fsi2spi-restricted
reg:
items:
diff --git a/Documentation/devicetree/bindings/gpio/rockchip,gpio-bank.yaml b/Documentation/devicetree/bindings/gpio/rockchip,gpio-bank.yaml
index d993e002cebe..0d62c28fb58d 100644
--- a/Documentation/devicetree/bindings/gpio/rockchip,gpio-bank.yaml
+++ b/Documentation/devicetree/bindings/gpio/rockchip,gpio-bank.yaml
@@ -22,7 +22,10 @@ properties:
maxItems: 1
clocks:
- maxItems: 1
+ minItems: 1
+ items:
+ - description: APB interface clock source
+ - description: GPIO debounce reference clock source
gpio-controller: true
diff --git a/Documentation/devicetree/bindings/hwmon/amd,sbrmi.yaml b/Documentation/devicetree/bindings/hwmon/amd,sbrmi.yaml
new file mode 100644
index 000000000000..7598b083979c
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/amd,sbrmi.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/hwmon/amd,sbrmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: >
+ Sideband Remote Management Interface (SB-RMI) compliant
+ AMD SoC power device.
+
+maintainers:
+ - Akshay Gupta <Akshay.Gupta@amd.com>
+
+description: |
+ SB Remote Management Interface (SB-RMI) is an SMBus compatible
+ interface that reports AMD SoC's Power (normalized Power) using,
+ Mailbox Service Request and resembles a typical 8-pin remote power
+ sensor's I2C interface to BMC. The power attributes in hwmon
+ reports power in microwatts.
+
+properties:
+ compatible:
+ enum:
+ - amd,sbrmi
+
+ reg:
+ maxItems: 1
+ description: |
+ I2C bus address of the device as specified in Section SBI SMBus Address
+ of the SoC register reference. The SB-RMI address is normally 78h for
+ socket 0 and 70h for socket 1, but it could vary based on hardware
+ address select pins.
+ \[open source SoC register reference\]
+ https://www.amd.com/en/support/tech-docs?keyword=55898
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ sbrmi@3c {
+ compatible = "amd,sbrmi";
+ reg = <0x3c>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/hwmon/winbond,w83781d.yaml b/Documentation/devicetree/bindings/hwmon/winbond,w83781d.yaml
new file mode 100644
index 000000000000..31ce77a4b087
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/winbond,w83781d.yaml
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+
+$id: http://devicetree.org/schemas/hwmon/winbond,w83781d.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Winbond W83781 and compatible hardware monitor IC
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+properties:
+ compatible:
+ enum:
+ - winbond,w83781d
+ - winbond,w83781g
+ - winbond,w83782d
+ - winbond,w83783s
+ - asus,as99127f
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ temperature-sensor@28 {
+ compatible = "winbond,w83781d";
+ reg = <0x28>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/iio/st,st-sensors.yaml b/Documentation/devicetree/bindings/iio/st,st-sensors.yaml
index b2a1e42c56fa..71de5631ebae 100644
--- a/Documentation/devicetree/bindings/iio/st,st-sensors.yaml
+++ b/Documentation/devicetree/bindings/iio/st,st-sensors.yaml
@@ -152,47 +152,6 @@ allOf:
maxItems: 1
st,drdy-int-pin: false
- - if:
- properties:
- compatible:
- enum:
- # Two intertial interrupts i.e. accelerometer/gyro interrupts
- - st,h3lis331dl-accel
- - st,l3g4200d-gyro
- - st,l3g4is-gyro
- - st,l3gd20-gyro
- - st,l3gd20h-gyro
- - st,lis2de12
- - st,lis2dw12
- - st,lis2hh12
- - st,lis2dh12-accel
- - st,lis331dl-accel
- - st,lis331dlh-accel
- - st,lis3de
- - st,lis3dh-accel
- - st,lis3dhh
- - st,lis3mdl-magn
- - st,lng2dm-accel
- - st,lps331ap-press
- - st,lsm303agr-accel
- - st,lsm303dlh-accel
- - st,lsm303dlhc-accel
- - st,lsm303dlm-accel
- - st,lsm330-accel
- - st,lsm330-gyro
- - st,lsm330d-accel
- - st,lsm330d-gyro
- - st,lsm330dl-accel
- - st,lsm330dl-gyro
- - st,lsm330dlc-accel
- - st,lsm330dlc-gyro
- - st,lsm9ds0-gyro
- - st,lsm9ds1-magn
- then:
- properties:
- interrupts:
- maxItems: 2
-
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml b/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml
index d6a95c3cb26f..e701524ee811 100644
--- a/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml
+++ b/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml
@@ -18,6 +18,7 @@ properties:
compatible:
enum:
- qcom,sc7180-osm-l3
+ - qcom,sc8180x-osm-l3
- qcom,sdm845-osm-l3
- qcom,sm8150-osm-l3
- qcom,sm8250-epss-l3
diff --git a/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml
index 5accc0d113be..3fd1a134162d 100644
--- a/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml
+++ b/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml
@@ -49,6 +49,17 @@ properties:
- qcom,sc7280-mmss-noc
- qcom,sc7280-nsp-noc
- qcom,sc7280-system-noc
+ - qcom,sc8180x-aggre1-noc
+ - qcom,sc8180x-aggre2-noc
+ - qcom,sc8180x-camnoc-virt
+ - qcom,sc8180x-compute-noc
+ - qcom,sc8180x-config-noc
+ - qcom,sc8180x-dc-noc
+ - qcom,sc8180x-gem-noc
+ - qcom,sc8180x-ipa-virt
+ - qcom,sc8180x-mc-virt
+ - qcom,sc8180x-mmss-noc
+ - qcom,sc8180x-system-noc
- qcom,sdm845-aggre1-noc
- qcom,sdm845-aggre2-noc
- qcom,sdm845-config-noc
diff --git a/Documentation/devicetree/bindings/leds/common.yaml b/Documentation/devicetree/bindings/leds/common.yaml
index b1f363747a62..697102707703 100644
--- a/Documentation/devicetree/bindings/leds/common.yaml
+++ b/Documentation/devicetree/bindings/leds/common.yaml
@@ -128,6 +128,12 @@ properties:
as a panic indicator.
type: boolean
+ retain-state-shutdown:
+ description:
+ This property specifies that the LED should not be turned off or changed
+ when the system shuts down.
+ type: boolean
+
trigger-sources:
description: |
List of devices which should be used as a source triggering this LED
diff --git a/Documentation/devicetree/bindings/misc/ge-achc.txt b/Documentation/devicetree/bindings/misc/ge-achc.txt
deleted file mode 100644
index 77df94d7a32f..000000000000
--- a/Documentation/devicetree/bindings/misc/ge-achc.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-* GE Healthcare USB Management Controller
-
-A device which handles data aquisition from compatible USB based peripherals.
-SPI is used for device management.
-
-Note: This device does not expose the peripherals as USB devices.
-
-Required properties:
-
-- compatible : Should be "ge,achc"
-
-Required SPI properties:
-
-- reg : Should be address of the device chip select within
- the controller.
-
-- spi-max-frequency : Maximum SPI clocking speed of device in Hz, should be
- 1MHz for the GE ACHC.
-
-Example:
-
-spidev0: spi@0 {
- compatible = "ge,achc";
- reg = <0>;
- spi-max-frequency = <1000000>;
-};
diff --git a/Documentation/devicetree/bindings/misc/ge-achc.yaml b/Documentation/devicetree/bindings/misc/ge-achc.yaml
new file mode 100644
index 000000000000..ff07aa62ed57
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/ge-achc.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+# Copyright (C) 2021 GE Inc.
+# Copyright (C) 2021 Collabora Ltd.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/misc/ge-achc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: GE Healthcare USB Management Controller
+
+description: |
+ A device which handles data acquisition from compatible USB based peripherals.
+ SPI is used for device management.
+
+ Note: This device does not expose the peripherals as USB devices.
+
+maintainers:
+ - Sebastian Reichel <sre@kernel.org>
+
+properties:
+ compatible:
+ items:
+ - const: ge,achc
+ - const: nxp,kinetis-k20
+
+ clocks:
+ maxItems: 1
+
+ vdd-supply:
+ description: Digital power supply regulator on VDD pin
+
+ vdda-supply:
+ description: Analog power supply regulator on VDDA pin
+
+ reg:
+ items:
+ - description: Control interface
+ - description: Firmware programming interface
+
+ reset-gpios:
+ description: GPIO used for hardware reset.
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - reg
+ - reset-gpios
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ spi@1 {
+ compatible = "ge,achc", "nxp,kinetis-k20";
+ reg = <1>, <0>;
+ clocks = <&achc_24M>;
+ reset-gpios = <&gpio3 6 GPIO_ACTIVE_LOW>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml
index b5baf439fbac..a3412f221104 100644
--- a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml
+++ b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml
@@ -29,6 +29,7 @@ properties:
- fsl,imx53-esdhc
- fsl,imx6q-usdhc
- fsl,imx6sl-usdhc
+ - fsl,imx6sll-usdhc
- fsl,imx6sx-usdhc
- fsl,imx6ull-usdhc
- fsl,imx7d-usdhc
@@ -115,12 +116,17 @@ properties:
- const: per
pinctrl-names:
- minItems: 1
- items:
- - const: default
- - const: state_100mhz
- - const: state_200mhz
- - const: sleep
+ oneOf:
+ - minItems: 3
+ items:
+ - const: default
+ - const: state_100mhz
+ - const: state_200mhz
+ - const: sleep
+ - minItems: 1
+ items:
+ - const: default
+ - const: sleep
required:
- compatible
diff --git a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.yaml b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.yaml
index e0169a285aa2..9e2396751030 100644
--- a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.yaml
+++ b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.yaml
@@ -11,7 +11,9 @@ maintainers:
properties:
compatible:
- const: mmc-pwrseq-sd8787
+ enum:
+ - mmc-pwrseq-sd8787
+ - mmc-pwrseq-wilc1000
powerdown-gpios:
minItems: 1
diff --git a/Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml b/Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml
index 677989bc5924..9f1e7092cf44 100644
--- a/Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml
+++ b/Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml
@@ -9,9 +9,6 @@ title: Renesas SDHI SD/MMC controller
maintainers:
- Wolfram Sang <wsa+renesas@sang-engineering.com>
-allOf:
- - $ref: "mmc-controller.yaml"
-
properties:
compatible:
oneOf:
@@ -47,19 +44,20 @@ properties:
- const: renesas,sdhi-mmc-r8a77470 # RZ/G1C (SDHI/MMC IP)
- items:
- enum:
- - renesas,sdhi-r8a774a1 # RZ/G2M
- - renesas,sdhi-r8a774b1 # RZ/G2N
- - renesas,sdhi-r8a774c0 # RZ/G2E
- - renesas,sdhi-r8a774e1 # RZ/G2H
- - renesas,sdhi-r8a7795 # R-Car H3
- - renesas,sdhi-r8a7796 # R-Car M3-W
- - renesas,sdhi-r8a77961 # R-Car M3-W+
- - renesas,sdhi-r8a77965 # R-Car M3-N
- - renesas,sdhi-r8a77970 # R-Car V3M
- - renesas,sdhi-r8a77980 # R-Car V3H
- - renesas,sdhi-r8a77990 # R-Car E3
- - renesas,sdhi-r8a77995 # R-Car D3
- - renesas,sdhi-r8a779a0 # R-Car V3U
+ - renesas,sdhi-r8a774a1 # RZ/G2M
+ - renesas,sdhi-r8a774b1 # RZ/G2N
+ - renesas,sdhi-r8a774c0 # RZ/G2E
+ - renesas,sdhi-r8a774e1 # RZ/G2H
+ - renesas,sdhi-r8a7795 # R-Car H3
+ - renesas,sdhi-r8a7796 # R-Car M3-W
+ - renesas,sdhi-r8a77961 # R-Car M3-W+
+ - renesas,sdhi-r8a77965 # R-Car M3-N
+ - renesas,sdhi-r8a77970 # R-Car V3M
+ - renesas,sdhi-r8a77980 # R-Car V3H
+ - renesas,sdhi-r8a77990 # R-Car E3
+ - renesas,sdhi-r8a77995 # R-Car D3
+ - renesas,sdhi-r8a779a0 # R-Car V3U
+ - renesas,sdhi-r9a07g044 # RZ/G2{L,LC}
- const: renesas,rcar-gen3-sdhi # R-Car Gen3 or RZ/G2
reg:
@@ -69,15 +67,9 @@ properties:
minItems: 1
maxItems: 3
- clocks:
- minItems: 1
- maxItems: 2
+ clocks: true
- clock-names:
- minItems: 1
- items:
- - const: core
- - const: cd
+ clock-names: true
dmas:
minItems: 4
@@ -104,14 +96,82 @@ properties:
pinctrl-1:
maxItems: 1
- pinctrl-names:
- minItems: 1
- items:
- - const: default
- - const: state_uhs
+ pinctrl-names: true
max-frequency: true
+allOf:
+ - $ref: "mmc-controller.yaml"
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: renesas,sdhi-r9a07g044
+ then:
+ properties:
+ clocks:
+ items:
+ - description: IMCLK, SDHI channel main clock1.
+ - description: IMCLK2, SDHI channel main clock2. When this clock is
+ turned off, external SD card detection cannot be
+ detected.
+ - description: CLK_HS, SDHI channel High speed clock which operates
+ 4 times that of SDHI channel main clock1.
+ - description: ACLK, SDHI channel bus clock.
+ clock-names:
+ items:
+ - const: imclk
+ - const: imclk2
+ - const: clk_hs
+ - const: aclk
+ required:
+ - clock-names
+ - resets
+ else:
+ properties:
+ clocks:
+ minItems: 1
+ maxItems: 2
+ clock-names:
+ minItems: 1
+ items:
+ - const: core
+ - const: cd
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: renesas,sdhi-mmc-r8a77470
+ then:
+ properties:
+ pinctrl-names:
+ items:
+ - const: state_uhs
+ else:
+ properties:
+ pinctrl-names:
+ minItems: 1
+ items:
+ - const: default
+ - const: state_uhs
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - renesas,sdhi-r7s72100
+ - renesas,sdhi-r7s9210
+ then:
+ required:
+ - clock-names
+ description:
+ The internal card detection logic that exists in these controllers is
+ sectioned off to be run by a separate second clock source to allow
+ the main core clock to be turned off to save power.
+
required:
- compatible
- reg
@@ -119,21 +179,6 @@ required:
- clocks
- power-domains
-if:
- properties:
- compatible:
- contains:
- enum:
- - renesas,sdhi-r7s72100
- - renesas,sdhi-r7s9210
-then:
- required:
- - clock-names
- description:
- The internal card detection logic that exists in these controllers is
- sectioned off to be run by a separate second clock source to allow
- the main core clock to be turned off to save power.
-
unevaluatedProperties: false
examples:
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
index 4c7fa6a4ed15..365c3fc122ea 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
@@ -19,6 +19,7 @@ Required properties:
"qcom,msm8996-sdhci", "qcom,sdhci-msm-v4"
"qcom,qcs404-sdhci", "qcom,sdhci-msm-v5"
"qcom,sc7180-sdhci", "qcom,sdhci-msm-v5";
+ "qcom,sc7280-sdhci", "qcom,sdhci-msm-v5";
"qcom,sdm845-sdhci", "qcom,sdhci-msm-v5"
"qcom,sdx55-sdhci", "qcom,sdhci-msm-v5";
"qcom,sm8250-sdhci", "qcom,sdhci-msm-v5"
diff --git a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt
deleted file mode 100644
index e15589f47787..000000000000
--- a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-* Broadcom UniMAC MDIO bus controller
-
-Required properties:
-- compatible: should one from "brcm,genet-mdio-v1", "brcm,genet-mdio-v2",
- "brcm,genet-mdio-v3", "brcm,genet-mdio-v4", "brcm,genet-mdio-v5" or
- "brcm,unimac-mdio"
-- reg: address and length of the register set for the device, first one is the
- base register, and the second one is optional and for indirect accesses to
- larger than 16-bits MDIO transactions
-- reg-names: name(s) of the register must be "mdio" and optional "mdio_indir_rw"
-- #size-cells: must be 1
-- #address-cells: must be 0
-
-Optional properties:
-- interrupts: must be one if the interrupt is shared with the Ethernet MAC or
- Ethernet switch this MDIO block is integrated from, or must be two, if there
- are two separate interrupts, first one must be "mdio done" and second must be
- for "mdio error"
-- interrupt-names: must be "mdio_done_error" when there is a share interrupt fed
- to this hardware block, or must be "mdio_done" for the first interrupt and
- "mdio_error" for the second when there are separate interrupts
-- clocks: A reference to the clock supplying the MDIO bus controller
-- clock-frequency: the MDIO bus clock that must be output by the MDIO bus
- hardware, if absent, the default hardware values are used
-
-Child nodes of this MDIO bus controller node are standard Ethernet PHY device
-nodes as described in Documentation/devicetree/bindings/net/phy.txt
-
-Example:
-
-mdio@403c0 {
- compatible = "brcm,unimac-mdio";
- reg = <0x403c0 0x8 0x40300 0x18>;
- reg-names = "mdio", "mdio_indir_rw";
- #size-cells = <1>;
- #address-cells = <0>;
-
- ...
- phy@0 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <0>;
- };
-};
diff --git a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml
new file mode 100644
index 000000000000..f4f4c37f1d4e
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/brcm,unimac-mdio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom UniMAC MDIO bus controller
+
+maintainers:
+ - Rafał Miłecki <rafal@milecki.pl>
+
+allOf:
+ - $ref: mdio.yaml#
+
+properties:
+ compatible:
+ enum:
+ - brcm,genet-mdio-v1
+ - brcm,genet-mdio-v2
+ - brcm,genet-mdio-v3
+ - brcm,genet-mdio-v4
+ - brcm,genet-mdio-v5
+ - brcm,unimac-mdio
+
+ reg:
+ minItems: 1
+ items:
+ - description: base register
+ - description: indirect accesses to larger than 16-bits MDIO transactions
+
+ reg-names:
+ minItems: 1
+ items:
+ - const: mdio
+ - const: mdio_indir_rw
+
+ interrupts:
+ oneOf:
+ - description: >
+ Interrupt shared with the Ethernet MAC or Ethernet switch this MDIO
+ block is integrated from
+ - items:
+ - description: |
+ "mdio done" interrupt
+ - description: |
+ "mdio error" interrupt
+
+ interrupt-names:
+ oneOf:
+ - const: mdio_done_error
+ - items:
+ - const: mdio_done
+ - const: mdio_error
+
+ clocks:
+ description: A reference to the clock supplying the MDIO bus controller
+
+ clock-frequency:
+ description: >
+ The MDIO bus clock that must be output by the MDIO bus hardware, if
+ absent, the default hardware values are used
+
+unevaluatedProperties: false
+
+required:
+ - reg
+ - reg-names
+ - '#address-cells'
+ - '#size-cells'
+
+examples:
+ - |
+ mdio@403c0 {
+ compatible = "brcm,unimac-mdio";
+ reg = <0x403c0 0x8>, <0x40300 0x18>;
+ reg-names = "mdio", "mdio_indir_rw";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml
new file mode 100644
index 000000000000..2cd145a642f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml
@@ -0,0 +1,119 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/can/bosch,c_can.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Bosch C_CAN/D_CAN controller Device Tree Bindings
+
+description: Bosch C_CAN/D_CAN controller for CAN bus
+
+maintainers:
+ - Dario Binacchi <dariobin@libero.it>
+
+allOf:
+ - $ref: can-controller.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - bosch,c_can
+ - bosch,d_can
+ - ti,dra7-d_can
+ - ti,am3352-d_can
+ - items:
+ - enum:
+ - ti,am4372-d_can
+ - const: ti,am3352-d_can
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ minItems: 1
+ maxItems: 4
+
+ power-domains:
+ description: |
+ Should contain a phandle to a PM domain provider node and an args
+ specifier containing the DCAN device id value. It's mandatory for
+ Keystone 2 66AK2G SoCs only.
+ maxItems: 1
+
+ clocks:
+ description: |
+ CAN functional clock phandle.
+ maxItems: 1
+
+ clock-names:
+ maxItems: 1
+
+ syscon-raminit:
+ description: |
+ Handle to system control region that contains the RAMINIT register,
+ register offset to the RAMINIT register and the CAN instance number (0
+ offset).
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ items:
+ items:
+ - description: The phandle to the system control region.
+ - description: The register offset.
+ - description: The CAN instance number.
+
+ resets:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - bosch,d_can
+
+then:
+ properties:
+ interrupts:
+ minItems: 4
+ maxItems: 4
+ items:
+ - description: Error and status IRQ
+ - description: Message object IRQ
+ - description: RAM ECC correctable error IRQ
+ - description: RAM ECC non-correctable error IRQ
+
+else:
+ properties:
+ interrupts:
+ maxItems: 1
+ items:
+ - description: Error and status IRQ
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/reset/altr,rst-mgr.h>
+
+ can@ffc00000 {
+ compatible = "bosch,d_can";
+ reg = <0xffc00000 0x1000>;
+ interrupts = <0 131 4>, <0 132 4>, <0 133 4>, <0 134 4>;
+ clocks = <&can0_clk>;
+ resets = <&rst CAN0_RESET>;
+ };
+ - |
+ can@0 {
+ compatible = "ti,am3352-d_can";
+ reg = <0x0 0x2000>;
+ clocks = <&dcan1_fck>;
+ clock-names = "fck";
+ syscon-raminit = <&scm_conf 0x644 1>;
+ interrupts = <55>;
+ };
diff --git a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
index f84e31348d80..fb547e26c676 100644
--- a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
+++ b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
@@ -104,9 +104,18 @@ properties:
maximum: 32
maxItems: 1
+ power-domains:
+ description:
+ Power domain provider node and an args specifier containing
+ the can device id value.
+ maxItems: 1
+
can-transceiver:
$ref: can-transceiver.yaml#
+ phys:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/net/can/c_can.txt b/Documentation/devicetree/bindings/net/can/c_can.txt
deleted file mode 100644
index 366479806acb..000000000000
--- a/Documentation/devicetree/bindings/net/can/c_can.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-Bosch C_CAN/D_CAN controller Device Tree Bindings
--------------------------------------------------
-
-Required properties:
-- compatible : Should be "bosch,c_can" for C_CAN controllers and
- "bosch,d_can" for D_CAN controllers.
- Can be "ti,dra7-d_can", "ti,am3352-d_can" or
- "ti,am4372-d_can".
-- reg : physical base address and size of the C_CAN/D_CAN
- registers map
-- interrupts : property with a value describing the interrupt
- number
-
-The following are mandatory properties for DRA7x, AM33xx and AM43xx SoCs only:
-- ti,hwmods : Must be "d_can<n>" or "c_can<n>", n being the
- instance number
-
-The following are mandatory properties for Keystone 2 66AK2G SoCs only:
-- power-domains : Should contain a phandle to a PM domain provider node
- and an args specifier containing the DCAN device id
- value. This property is as per the binding,
- Documentation/devicetree/bindings/soc/ti/sci-pm-domain.yaml
-- clocks : CAN functional clock phandle. This property is as per the
- binding,
- Documentation/devicetree/bindings/clock/ti,sci-clk.yaml
-
-Optional properties:
-- syscon-raminit : Handle to system control region that contains the
- RAMINIT register, register offset to the RAMINIT
- register and the CAN instance number (0 offset).
-
-Note: "ti,hwmods" field is used to fetch the base address and irq
-resources from TI, omap hwmod data base during device registration.
-Future plan is to migrate hwmod data base contents into device tree
-blob so that, all the required data will be used from device tree dts
-file.
-
-Example:
-
-Step1: SoC common .dtsi file
-
- dcan1: d_can@481d0000 {
- compatible = "bosch,d_can";
- reg = <0x481d0000 0x2000>;
- interrupts = <55>;
- interrupt-parent = <&intc>;
- status = "disabled";
- };
-
-(or)
-
- dcan1: d_can@481d0000 {
- compatible = "bosch,d_can";
- ti,hwmods = "d_can1";
- reg = <0x481d0000 0x2000>;
- interrupts = <55>;
- interrupt-parent = <&intc>;
- status = "disabled";
- };
-
-Step 2: board specific .dts file
-
- &dcan1 {
- status = "okay";
- };
diff --git a/Documentation/devicetree/bindings/net/can/can-controller.yaml b/Documentation/devicetree/bindings/net/can/can-controller.yaml
index 9cf2ae097156..1f0e98051074 100644
--- a/Documentation/devicetree/bindings/net/can/can-controller.yaml
+++ b/Documentation/devicetree/bindings/net/can/can-controller.yaml
@@ -13,6 +13,15 @@ properties:
$nodename:
pattern: "^can(@.*)?$"
+ termination-gpios:
+ description: GPIO pin to enable CAN bus termination.
+ maxItems: 1
+
+ termination-ohms:
+ description: The resistance value of the CAN bus termination resistor.
+ minimum: 1
+ maximum: 65535
+
additionalProperties: true
...
diff --git a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml
index 55bff1586b6f..3f0ee17c1461 100644
--- a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml
+++ b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml
@@ -119,6 +119,9 @@ properties:
minimum: 0
maximum: 2
+ termination-gpios: true
+ termination-ohms: true
+
required:
- compatible
- reg
@@ -148,3 +151,17 @@ examples:
fsl,stop-mode = <&gpr 0x34 28>;
fsl,scu-index = /bits/ 8 <1>;
};
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/gpio/gpio.h>
+
+ can@2090000 {
+ compatible = "fsl,imx6q-flexcan";
+ reg = <0x02090000 0x4000>;
+ interrupts = <0 110 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clks 1>, <&clks 2>;
+ clock-names = "ipg", "per";
+ fsl,stop-mode = <&gpr 0x34 28>;
+ termination-gpios = <&gpio1 0 GPIO_ACTIVE_LOW>;
+ termination-ohms = <120>;
+ };
diff --git a/Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml b/Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml
index 0b33ba9ccb47..546c6e6d2fb0 100644
--- a/Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml
+++ b/Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml
@@ -30,13 +30,15 @@ properties:
- renesas,r8a77995-canfd # R-Car D3
- const: renesas,rcar-gen3-canfd # R-Car Gen3 and RZ/G2
+ - items:
+ - enum:
+ - renesas,r9a07g044-canfd # RZ/G2{L,LC}
+ - const: renesas,rzg2l-canfd # RZ/G2L family
+
reg:
maxItems: 1
- interrupts:
- items:
- - description: Channel interrupt
- - description: Global interrupt
+ interrupts: true
clocks:
maxItems: 3
@@ -50,8 +52,7 @@ properties:
power-domains:
maxItems: 1
- resets:
- maxItems: 1
+ resets: true
renesas,no-can-fd:
$ref: /schemas/types.yaml#/definitions/flag
@@ -91,6 +92,62 @@ required:
- channel0
- channel1
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - renesas,rzg2l-canfd
+then:
+ properties:
+ interrupts:
+ items:
+ - description: CAN global error interrupt
+ - description: CAN receive FIFO interrupt
+ - description: CAN0 error interrupt
+ - description: CAN0 transmit interrupt
+ - description: CAN0 transmit/receive FIFO receive completion interrupt
+ - description: CAN1 error interrupt
+ - description: CAN1 transmit interrupt
+ - description: CAN1 transmit/receive FIFO receive completion interrupt
+
+ interrupt-names:
+ items:
+ - const: g_err
+ - const: g_recc
+ - const: ch0_err
+ - const: ch0_rec
+ - const: ch0_trx
+ - const: ch1_err
+ - const: ch1_rec
+ - const: ch1_trx
+
+ resets:
+ maxItems: 2
+
+ reset-names:
+ items:
+ - const: rstp_n
+ - const: rstc_n
+
+ required:
+ - interrupt-names
+ - reset-names
+else:
+ properties:
+ interrupts:
+ items:
+ - description: Channel interrupt
+ - description: Global interrupt
+
+ interrupt-names:
+ items:
+ - const: ch_int
+ - const: g_int
+
+ resets:
+ maxItems: 1
+
unevaluatedProperties: false
examples:
diff --git a/Documentation/devicetree/bindings/net/fsl,fec.yaml b/Documentation/devicetree/bindings/net/fsl,fec.yaml
new file mode 100644
index 000000000000..eca41443fcce
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/fsl,fec.yaml
@@ -0,0 +1,244 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/fsl,fec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale Fast Ethernet Controller (FEC)
+
+maintainers:
+ - Joakim Zhang <qiangqing.zhang@nxp.com>
+
+allOf:
+ - $ref: ethernet-controller.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - fsl,imx25-fec
+ - fsl,imx27-fec
+ - fsl,imx28-fec
+ - fsl,imx6q-fec
+ - fsl,mvf600-fec
+ - items:
+ - enum:
+ - fsl,imx53-fec
+ - fsl,imx6sl-fec
+ - const: fsl,imx25-fec
+ - items:
+ - enum:
+ - fsl,imx35-fec
+ - fsl,imx51-fec
+ - const: fsl,imx27-fec
+ - items:
+ - enum:
+ - fsl,imx6ul-fec
+ - fsl,imx6sx-fec
+ - const: fsl,imx6q-fec
+ - items:
+ - enum:
+ - fsl,imx7d-fec
+ - const: fsl,imx6sx-fec
+ - items:
+ - const: fsl,imx8mq-fec
+ - const: fsl,imx6sx-fec
+ - items:
+ - enum:
+ - fsl,imx8mm-fec
+ - fsl,imx8mn-fec
+ - fsl,imx8mp-fec
+ - const: fsl,imx8mq-fec
+ - const: fsl,imx6sx-fec
+ - items:
+ - const: fsl,imx8qm-fec
+ - const: fsl,imx6sx-fec
+ - items:
+ - enum:
+ - fsl,imx8qxp-fec
+ - const: fsl,imx8qm-fec
+ - const: fsl,imx6sx-fec
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ minItems: 1
+ maxItems: 4
+
+ interrupt-names:
+ oneOf:
+ - items:
+ - const: int0
+ - items:
+ - const: int0
+ - const: pps
+ - items:
+ - const: int0
+ - const: int1
+ - const: int2
+ - items:
+ - const: int0
+ - const: int1
+ - const: int2
+ - const: pps
+
+ clocks:
+ minItems: 2
+ maxItems: 5
+ description:
+ The "ipg", for MAC ipg_clk_s, ipg_clk_mac_s that are for register accessing.
+ The "ahb", for MAC ipg_clk, ipg_clk_mac that are bus clock.
+ The "ptp"(option), for IEEE1588 timer clock that requires the clock.
+ The "enet_clk_ref"(option), for MAC transmit/receiver reference clock like
+ RGMII TXC clock or RMII reference clock. It depends on board design,
+ the clock is required if RGMII TXC and RMII reference clock source from
+ SOC internal PLL.
+ The "enet_out"(option), output clock for external device, like supply clock
+ for PHY. The clock is required if PHY clock source from SOC.
+ The "enet_2x_txclk"(option), for RGMII sampling clock which fixed at 250Mhz.
+ The clock is required if SoC RGMII enable clock delay.
+
+ clock-names:
+ minItems: 2
+ maxItems: 5
+ items:
+ enum:
+ - ipg
+ - ahb
+ - ptp
+ - enet_clk_ref
+ - enet_out
+ - enet_2x_txclk
+
+ phy-mode: true
+
+ phy-handle: true
+
+ fixed-link: true
+
+ local-mac-address: true
+
+ mac-address: true
+
+ tx-internal-delay-ps:
+ enum: [0, 2000]
+
+ rx-internal-delay-ps:
+ enum: [0, 2000]
+
+ phy-supply:
+ description:
+ Regulator that powers the Ethernet PHY.
+
+ fsl,num-tx-queues:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ The property is valid for enet-avb IP, which supports hw multi queues.
+ Should specify the tx queue number, otherwise set tx queue number to 1.
+ enum: [1, 2, 3]
+
+ fsl,num-rx-queues:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ The property is valid for enet-avb IP, which supports hw multi queues.
+ Should specify the rx queue number, otherwise set rx queue number to 1.
+ enum: [1, 2, 3]
+
+ fsl,magic-packet:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ If present, indicates that the hardware supports waking up via magic packet.
+
+ fsl,err006687-workaround-present:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ If present indicates that the system has the hardware workaround for
+ ERR006687 applied and does not need a software workaround.
+
+ fsl,stop-mode:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ description:
+ Register bits of stop mode control, the format is <&gpr req_gpr req_bit>.
+ gpr is the phandle to general purpose register node.
+ req_gpr is the gpr register offset for ENET stop request.
+ req_bit is the gpr bit offset for ENET stop request.
+
+ mdio:
+ type: object
+ description:
+ Specifies the mdio bus in the FEC, used as a container for phy nodes.
+
+ # Deprecated optional properties:
+ # To avoid these, create a phy node according to ethernet-phy.yaml in the same
+ # directory, and point the FEC's "phy-handle" property to it. Then use
+ # the phy's reset binding, again described by ethernet-phy.yaml.
+
+ phy-reset-gpios:
+ deprecated: true
+ description:
+ Should specify the gpio for phy reset.
+
+ phy-reset-duration:
+ deprecated: true
+ description:
+ Reset duration in milliseconds. Should present only if property
+ "phy-reset-gpios" is available. Missing the property will have the
+ duration be 1 millisecond. Numbers greater than 1000 are invalid
+ and 1 millisecond will be used instead.
+
+ phy-reset-active-high:
+ deprecated: true
+ description:
+ If present then the reset sequence using the GPIO specified in the
+ "phy-reset-gpios" property is reversed (H=reset state, L=operation state).
+
+ phy-reset-post-delay:
+ deprecated: true
+ description:
+ Post reset delay in milliseconds. If present then a delay of phy-reset-post-delay
+ milliseconds will be observed after the phy-reset-gpios has been toggled.
+ Can be omitted thus no delay is observed. Delay is in range of 1ms to 1000ms.
+ Other delays are invalid.
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+# FIXME: We had better set additionalProperties to false to avoid invalid or at
+# least undocumented properties. However, PHY may have a deprecated option to
+# place PHY OF properties in the MAC node, such as Micrel PHY, and we can find
+# these boards which is based on i.MX6QDL.
+additionalProperties: false
+
+examples:
+ - |
+ ethernet@83fec000 {
+ compatible = "fsl,imx51-fec", "fsl,imx27-fec";
+ reg = <0x83fec000 0x4000>;
+ interrupts = <87>;
+ phy-mode = "mii";
+ phy-reset-gpios = <&gpio2 14 0>;
+ phy-supply = <&reg_fec_supply>;
+ };
+
+ ethernet@83fed000 {
+ compatible = "fsl,imx51-fec", "fsl,imx27-fec";
+ reg = <0x83fed000 0x4000>;
+ interrupts = <87>;
+ phy-mode = "mii";
+ phy-reset-gpios = <&gpio2 14 0>;
+ phy-supply = <&reg_fec_supply>;
+ phy-handle = <&ethphy0>;
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethphy0: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <0>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt
deleted file mode 100644
index 9b543789cd52..000000000000
--- a/Documentation/devicetree/bindings/net/fsl-fec.txt
+++ /dev/null
@@ -1,95 +0,0 @@
-* Freescale Fast Ethernet Controller (FEC)
-
-Required properties:
-- compatible : Should be "fsl,<soc>-fec"
-- reg : Address and length of the register set for the device
-- interrupts : Should contain fec interrupt
-- phy-mode : See ethernet.txt file in the same directory
-
-Optional properties:
-- phy-supply : regulator that powers the Ethernet PHY.
-- phy-handle : phandle to the PHY device connected to this device.
-- fixed-link : Assume a fixed link. See fixed-link.txt in the same directory.
- Use instead of phy-handle.
-- fsl,num-tx-queues : The property is valid for enet-avb IP, which supports
- hw multi queues. Should specify the tx queue number, otherwise set tx queue
- number to 1.
-- fsl,num-rx-queues : The property is valid for enet-avb IP, which supports
- hw multi queues. Should specify the rx queue number, otherwise set rx queue
- number to 1.
-- fsl,magic-packet : If present, indicates that the hardware supports waking
- up via magic packet.
-- fsl,err006687-workaround-present: If present indicates that the system has
- the hardware workaround for ERR006687 applied and does not need a software
- workaround.
-- fsl,stop-mode: register bits of stop mode control, the format is
- <&gpr req_gpr req_bit>.
- gpr is the phandle to general purpose register node.
- req_gpr is the gpr register offset for ENET stop request.
- req_bit is the gpr bit offset for ENET stop request.
- -interrupt-names: names of the interrupts listed in interrupts property in
- the same order. The defaults if not specified are
- __Number of interrupts__ __Default__
- 1 "int0"
- 2 "int0", "pps"
- 3 "int0", "int1", "int2"
- 4 "int0", "int1", "int2", "pps"
- The order may be changed as long as they correspond to the interrupts
- property. Currently, only i.mx7 uses "int1" and "int2". They correspond to
- tx/rx queues 1 and 2. "int0" will be used for queue 0 and ENET_MII interrupts.
- For imx6sx, "int0" handles all 3 queues and ENET_MII. "pps" is for the pulse
- per second interrupt associated with 1588 precision time protocol(PTP).
-
-Optional subnodes:
-- mdio : specifies the mdio bus in the FEC, used as a container for phy nodes
- according to phy.txt in the same directory
-
-Deprecated optional properties:
- To avoid these, create a phy node according to phy.txt in the same
- directory, and point the fec's "phy-handle" property to it. Then use
- the phy's reset binding, again described by phy.txt.
-- phy-reset-gpios : Should specify the gpio for phy reset
-- phy-reset-duration : Reset duration in milliseconds. Should present
- only if property "phy-reset-gpios" is available. Missing the property
- will have the duration be 1 millisecond. Numbers greater than 1000 are
- invalid and 1 millisecond will be used instead.
-- phy-reset-active-high : If present then the reset sequence using the GPIO
- specified in the "phy-reset-gpios" property is reversed (H=reset state,
- L=operation state).
-- phy-reset-post-delay : Post reset delay in milliseconds. If present then
- a delay of phy-reset-post-delay milliseconds will be observed after the
- phy-reset-gpios has been toggled. Can be omitted thus no delay is
- observed. Delay is in range of 1ms to 1000ms. Other delays are invalid.
-
-Example:
-
-ethernet@83fec000 {
- compatible = "fsl,imx51-fec", "fsl,imx27-fec";
- reg = <0x83fec000 0x4000>;
- interrupts = <87>;
- phy-mode = "mii";
- phy-reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>; /* GPIO2_14 */
- local-mac-address = [00 04 9F 01 1B B9];
- phy-supply = <&reg_fec_supply>;
-};
-
-Example with phy specified:
-
-ethernet@83fec000 {
- compatible = "fsl,imx51-fec", "fsl,imx27-fec";
- reg = <0x83fec000 0x4000>;
- interrupts = <87>;
- phy-mode = "mii";
- phy-reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>; /* GPIO2_14 */
- local-mac-address = [00 04 9F 01 1B B9];
- phy-supply = <&reg_fec_supply>;
- phy-handle = <&ethphy>;
- mdio {
- clock-frequency = <5000000>;
- ethphy: ethernet-phy@6 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <6>;
- max-speed = <100>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml b/Documentation/devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml
new file mode 100644
index 000000000000..8b9b3f915d92
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2018 Linaro Ltd.
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/net/intel,ixp46x-ptp-timer.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Intel IXP46x PTP Timer (TSYNC)
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+ The Intel IXP46x PTP timer is known in the manual as IEEE1588 Hardware
+ Assist and Time Synchronization Hardware Assist TSYNC provides a PTP
+ timer. It exists in the Intel IXP45x and IXP46x XScale SoCs.
+
+properties:
+ compatible:
+ const: intel,ixp46x-ptp-timer
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: Interrupt to trigger master mode snapshot from the
+ PRP timer, usually a GPIO interrupt.
+ - description: Interrupt to trigger slave mode snapshot from the
+ PRP timer, usually a GPIO interrupt.
+
+ interrupt-names:
+ items:
+ - const: master
+ - const: slave
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - interrupt-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ ptp-timer@c8010000 {
+ compatible = "intel,ixp46x-ptp-timer";
+ reg = <0xc8010000 0x1000>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <8 IRQ_TYPE_EDGE_FALLING>, <7 IRQ_TYPE_EDGE_FALLING>;
+ interrupt-names = "master", "slave";
+ };
diff --git a/Documentation/devicetree/bindings/net/litex,liteeth.yaml b/Documentation/devicetree/bindings/net/litex,liteeth.yaml
new file mode 100644
index 000000000000..76c164a8199a
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/litex,liteeth.yaml
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/litex,liteeth.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: LiteX LiteETH ethernet device
+
+maintainers:
+ - Joel Stanley <joel@jms.id.au>
+
+description: |
+ LiteETH is a small footprint and configurable Ethernet core for FPGA based
+ system on chips.
+
+ The hardware source is Open Source and can be found on at
+ https://github.com/enjoy-digital/liteeth/.
+
+allOf:
+ - $ref: ethernet-controller.yaml#
+
+properties:
+ compatible:
+ const: litex,liteeth
+
+ reg:
+ items:
+ - description: MAC registers
+ - description: MDIO registers
+ - description: Packet buffer
+
+ reg-names:
+ items:
+ - const: mac
+ - const: mdio
+ - const: buffer
+
+ interrupts:
+ maxItems: 1
+
+ litex,rx-slots:
+ description: Number of slots in the receive buffer
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 1
+ default: 2
+
+ litex,tx-slots:
+ description: Number of slots in the transmit buffer
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 1
+ default: 2
+
+ litex,slot-size:
+ description: Size in bytes of a slot in the tx/rx buffer
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0x800
+ default: 0x800
+
+ mac-address: true
+ local-mac-address: true
+ phy-handle: true
+
+ mdio:
+ $ref: mdio.yaml#
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ mac: ethernet@8020000 {
+ compatible = "litex,liteeth";
+ reg = <0x8021000 0x100>,
+ <0x8020800 0x100>,
+ <0x8030000 0x2000>;
+ reg-names = "mac", "mdio", "buffer";
+ litex,rx-slots = <2>;
+ litex,tx-slots = <2>;
+ litex,slot-size = <0x800>;
+ interrupts = <0x11 0x1>;
+ phy-handle = <&eth_phy>;
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ eth_phy: ethernet-phy@0 {
+ reg = <0>;
+ };
+ };
+ };
+...
+
+# vim: set ts=2 sw=2 sts=2 tw=80 et cc=80 ft=yaml :
diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt
index a4d547efc32a..af9df2f01a1c 100644
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -8,6 +8,7 @@ Required properties:
Use "cdns,np4-macb" for NP4 SoC devices.
Use "cdns,at32ap7000-macb" for other 10/100 usage or use the generic form: "cdns,macb".
Use "atmel,sama5d2-gem" for the GEM IP (10/100) available on Atmel sama5d2 SoCs.
+ Use "atmel,sama5d29-gem" for GEM XL IP (10/100) available on Atmel sama5d29 SoCs.
Use "atmel,sama5d3-macb" for the 10/100Mbit IP available on Atmel sama5d3 SoCs.
Use "atmel,sama5d3-gem" for the Gigabit IP available on Atmel sama5d3 SoCs.
Use "atmel,sama5d4-gem" for the GEM IP (10/100) available on Atmel sama5d4 SoCs.
diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
index ed88ba4b94df..b8a0b392b24e 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
@@ -87,16 +87,24 @@ properties:
- const: ipa-setup-ready
interconnects:
- items:
- - description: Interconnect path between IPA and main memory
- - description: Interconnect path between IPA and internal memory
- - description: Interconnect path between IPA and the AP subsystem
+ oneOf:
+ - items:
+ - description: Path leading to system memory
+ - description: Path between the AP and IPA config space
+ - items:
+ - description: Path leading to system memory
+ - description: Path leading to internal memory
+ - description: Path between the AP and IPA config space
interconnect-names:
- items:
- - const: memory
- - const: imem
- - const: config
+ oneOf:
+ - items:
+ - const: memory
+ - const: config
+ - items:
+ - const: memory
+ - const: imem
+ - const: config
qcom,smem-states:
$ref: /schemas/types.yaml#/definitions/phandle-array
diff --git a/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml b/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml
index 0c973310ada0..2af304341772 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml
@@ -14,7 +14,9 @@ allOf:
properties:
compatible:
- const: qcom,ipq4019-mdio
+ enum:
+ - qcom,ipq4019-mdio
+ - qcom,ipq5018-mdio
"#address-cells":
const: 1
@@ -23,7 +25,18 @@ properties:
const: 0
reg:
+ minItems: 1
+ maxItems: 2
+ description:
+ the first Address and length of the register set for the MDIO controller.
+ the second Address and length of the register for ethernet LDO, this second
+ address range is only required by the platform IPQ50xx.
+
+ clocks:
maxItems: 1
+ description: |
+ MDIO clock source frequency fixed to 100MHZ, this clock should be specified
+ by the platform IPQ807x, IPQ60xx and IPQ50xx.
required:
- compatible
diff --git a/Documentation/devicetree/bindings/nvmem/nintendo-otp.yaml b/Documentation/devicetree/bindings/nvmem/nintendo-otp.yaml
new file mode 100644
index 000000000000..dbe4ffdd644c
--- /dev/null
+++ b/Documentation/devicetree/bindings/nvmem/nintendo-otp.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/nvmem/nintendo-otp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Nintendo Wii and Wii U OTP Device Tree Bindings
+
+description: |
+ This binding represents the OTP memory as found on a Nintendo Wii or Wii U,
+ which contains common and per-console keys, signatures and related data
+ required to access peripherals.
+
+ See https://wiiubrew.org/wiki/Hardware/OTP
+
+maintainers:
+ - Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
+
+allOf:
+ - $ref: "nvmem.yaml#"
+
+properties:
+ compatible:
+ enum:
+ - nintendo,hollywood-otp
+ - nintendo,latte-otp
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ otp@d8001ec {
+ compatible = "nintendo,latte-otp";
+ reg = <0x0d8001ec 0x8>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml
index 861b205016b1..dede8892ee01 100644
--- a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml
+++ b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml
@@ -51,6 +51,9 @@ properties:
vcc-supply:
description: Our power supply.
+ power-domains:
+ maxItems: 1
+
# Needed if any child nodes are present.
"#address-cells":
const: 1
diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.txt b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.txt
deleted file mode 100644
index 7c70f2ad9942..000000000000
--- a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-* Freescale i.MX8MQ USB3 PHY binding
-
-Required properties:
-- compatible: Should be "fsl,imx8mq-usb-phy" or "fsl,imx8mp-usb-phy"
-- #phys-cells: must be 0 (see phy-bindings.txt in this directory)
-- reg: The base address and length of the registers
-- clocks: phandles to the clocks for each clock listed in clock-names
-- clock-names: must contain "phy"
-
-Optional properties:
-- vbus-supply: A phandle to the regulator for USB VBUS.
-
-Example:
- usb3_phy0: phy@381f0040 {
- compatible = "fsl,imx8mq-usb-phy";
- reg = <0x381f0040 0x40>;
- clocks = <&clk IMX8MQ_CLK_USB1_PHY_ROOT>;
- clock-names = "phy";
- #phy-cells = <0>;
- };
diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml
new file mode 100644
index 000000000000..2936f3510a6a
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/fsl,imx8mq-usb-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8MQ USB3 PHY binding
+
+maintainers:
+ - Li Jun <jun.li@nxp.com>
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx8mq-usb-phy
+ - fsl,imx8mp-usb-phy
+
+ reg:
+ maxItems: 1
+
+ "#phy-cells":
+ const: 0
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: phy
+
+ vbus-supply:
+ description:
+ A phandle to the regulator for USB VBUS.
+
+required:
+ - compatible
+ - reg
+ - "#phy-cells"
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx8mq-clock.h>
+ usb3_phy0: phy@381f0040 {
+ compatible = "fsl,imx8mq-usb-phy";
+ reg = <0x381f0040 0x40>;
+ clocks = <&clk IMX8MQ_CLK_USB1_PHY_ROOT>;
+ clock-names = "phy";
+ #phy-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/phy/intel,phy-keembay-usb.yaml b/Documentation/devicetree/bindings/phy/intel,keembay-phy-usb.yaml
index a217bb8ac5bc..52815b6c2b88 100644
--- a/Documentation/devicetree/bindings/phy/intel,phy-keembay-usb.yaml
+++ b/Documentation/devicetree/bindings/phy/intel,keembay-phy-usb.yaml
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
-$id: http://devicetree.org/schemas/phy/intel,phy-keembay-usb.yaml#
+$id: http://devicetree.org/schemas/phy/intel,keembay-phy-usb.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Intel Keem Bay USB PHY bindings
diff --git a/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml b/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml
index ef9d9d4e6875..9e6c0f43f1c6 100644
--- a/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml
+++ b/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml
@@ -15,7 +15,7 @@ description: |
controllers on MediaTek SoCs, includes USB2.0, USB3.0, PCIe and SATA.
Layout differences of banks between T-PHY V1 (mt8173/mt2701) and
- T-PHY V2 (mt2712) when works on USB mode:
+ T-PHY V2 (mt2712) / V3 (mt8195) when works on USB mode:
-----------------------------------
Version 1:
port offset bank
@@ -34,7 +34,7 @@ description: |
u2 port2 0x1800 U2PHY_COM
...
- Version 2:
+ Version 2/3:
port offset bank
u2 port0 0x0000 MISC
0x0100 FMREG
@@ -59,7 +59,8 @@ description: |
SPLLC shared by u3 ports and FMREG shared by u2 ports on V1 are put back
into each port; a new bank MISC for u2 ports and CHIP for u3 ports are
- added on V2.
+ added on V2; the FMREG bank for slew rate calibration is not used anymore
+ and reserved on V3;
properties:
$nodename:
@@ -79,8 +80,11 @@ properties:
- mediatek,mt2712-tphy
- mediatek,mt7629-tphy
- mediatek,mt8183-tphy
- - mediatek,mt8195-tphy
- const: mediatek,generic-tphy-v2
+ - items:
+ - enum:
+ - mediatek,mt8195-tphy
+ - const: mediatek,generic-tphy-v3
- const: mediatek,mt2701-u3phy
deprecated: true
- const: mediatek,mt2712-u3phy
@@ -91,7 +95,7 @@ properties:
description:
Register shared by multiple ports, exclude port's private register.
It is needed for T-PHY V1, such as mt2701 and mt8173, but not for
- T-PHY V2, such as mt2712.
+ T-PHY V2/V3, such as mt2712.
maxItems: 1
"#address-cells":
@@ -197,6 +201,22 @@ patternProperties:
Specify the flag to enable BC1.2 if support it
type: boolean
+ mediatek,syscon-type:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+ description:
+ A phandle to syscon used to access the register of type switch,
+ the field should always be 3 cells long.
+ items:
+ items:
+ - description:
+ The first cell represents a phandle to syscon
+ - description:
+ The second cell represents the register offset
+ - description:
+ The third cell represents the index of config segment
+ enum: [0, 1, 2, 3]
+
required:
- reg
- "#phy-cells"
diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml
index f0497b8623ad..75be5650a198 100644
--- a/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml
@@ -18,6 +18,7 @@ properties:
compatible:
enum:
- qcom,ipq6018-qmp-pcie-phy
+ - qcom,ipq6018-qmp-usb3-phy
- qcom,ipq8074-qmp-pcie-phy
- qcom,ipq8074-qmp-usb3-phy
- qcom,msm8996-qmp-pcie-phy
@@ -27,6 +28,7 @@ properties:
- qcom,msm8998-qmp-ufs-phy
- qcom,msm8998-qmp-usb3-phy
- qcom,sc7180-qmp-usb3-phy
+ - qcom,sc8180x-qmp-pcie-phy
- qcom,sc8180x-qmp-ufs-phy
- qcom,sc8180x-qmp-usb3-phy
- qcom,sdm845-qhp-pcie-phy
@@ -34,6 +36,7 @@ properties:
- qcom,sdm845-qmp-ufs-phy
- qcom,sdm845-qmp-usb3-phy
- qcom,sdm845-qmp-usb3-uni-phy
+ - qcom,sm6115-qmp-ufs-phy
- qcom,sm8150-qmp-ufs-phy
- qcom,sm8150-qmp-usb3-phy
- qcom,sm8150-qmp-usb3-uni-phy
@@ -326,6 +329,7 @@ allOf:
compatible:
contains:
enum:
+ - qcom,sc8180x-qmp-pcie-phy
- qcom,sdm845-qhp-pcie-phy
- qcom,sdm845-qmp-pcie-phy
- qcom,sdx55-qmp-pcie-phy
diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml
index 217aa6c91893..1d49cc3d4eae 100644
--- a/Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml
@@ -14,6 +14,7 @@ properties:
compatible:
enum:
- qcom,sc7180-qmp-usb3-dp-phy
+ - qcom,sc8180x-qmp-usb3-dp-phy
- qcom,sdm845-qmp-usb3-dp-phy
- qcom,sm8250-qmp-usb3-dp-phy
reg:
diff --git a/Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml b/Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml
index d5dc5a3cdceb..3a6e1165419c 100644
--- a/Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml
@@ -30,6 +30,11 @@ properties:
- renesas,usb2-phy-r8a77995 # R-Car D3
- const: renesas,rcar-gen3-usb2-phy
+ - items:
+ - enum:
+ - renesas,usb2-phy-r9a07g044 # RZ/G2{L,LC}
+ - const: renesas,rzg2l-usb2-phy # RZ/G2L family
+
reg:
maxItems: 1
@@ -91,6 +96,16 @@ required:
- clocks
- '#phy-cells'
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: renesas,rzg2l-usb2-phy
+ then:
+ required:
+ - resets
+
additionalProperties: false
examples:
diff --git a/Documentation/devicetree/bindings/phy/samsung,ufs-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,ufs-phy.yaml
index 636cc501b54f..f6ed1a005e7a 100644
--- a/Documentation/devicetree/bindings/phy/samsung,ufs-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/samsung,ufs-phy.yaml
@@ -16,6 +16,7 @@ properties:
compatible:
enum:
- samsung,exynos7-ufs-phy
+ - samsung,exynosautov9-ufs-phy
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.txt b/Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.txt
deleted file mode 100644
index 64b286d2d398..000000000000
--- a/Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.txt
+++ /dev/null
@@ -1,82 +0,0 @@
-TI AM654 SERDES
-
-Required properties:
- - compatible: Should be "ti,phy-am654-serdes"
- - reg : Address and length of the register set for the device.
- - #phy-cells: determine the number of cells that should be given in the
- phandle while referencing this phy. Should be "2". The 1st cell
- corresponds to the phy type (should be one of the types specified in
- include/dt-bindings/phy/phy.h) and the 2nd cell should be the serdes
- lane function.
- If SERDES0 is referenced 2nd cell should be:
- 0 - USB3
- 1 - PCIe0 Lane0
- 2 - ICSS2 SGMII Lane0
- If SERDES1 is referenced 2nd cell should be:
- 0 - PCIe1 Lane0
- 1 - PCIe0 Lane1
- 2 - ICSS2 SGMII Lane1
- - power-domains: As documented by the generic PM domain bindings in
- Documentation/devicetree/bindings/power/power_domain.txt.
- - clocks: List of clock-specifiers representing the input to the SERDES.
- Should have 3 items representing the left input clock, external
- reference clock and right input clock in that order.
- - clock-output-names: List of clock names for each of the clock outputs of
- SERDES. Should have 3 items for CMU reference clock,
- left output clock and right output clock in that order.
- - assigned-clocks: As defined in
- Documentation/devicetree/bindings/clock/clock-bindings.txt
- - assigned-clock-parents: As defined in
- Documentation/devicetree/bindings/clock/clock-bindings.txt
- - #clock-cells: Should be <1> to choose between the 3 output clocks.
- Defined in Documentation/devicetree/bindings/clock/clock-bindings.txt
-
- The following macros are defined in dt-bindings/phy/phy-am654-serdes.h
- for selecting the correct reference clock. This can be used while
- specifying the clocks created by SERDES.
- => AM654_SERDES_CMU_REFCLK
- => AM654_SERDES_LO_REFCLK
- => AM654_SERDES_RO_REFCLK
-
- - mux-controls: Phandle to the multiplexer that is used to select the lane
- function. See #phy-cells above to see the multiplex values.
-
-Example:
-
-Example for SERDES0 is given below. It has 3 clock inputs;
-left input reference clock as indicated by <&k3_clks 153 4>, external
-reference clock as indicated by <&k3_clks 153 1> and right input
-reference clock as indicated by <&serdes1 AM654_SERDES_LO_REFCLK>. (The
-right input of SERDES0 is connected to the left output of SERDES1).
-
-SERDES0 registers 3 clock outputs as indicated in clock-output-names. The
-first refers to the CMU reference clock, second refers to the left output
-reference clock and the third refers to the right output reference clock.
-
-The assigned-clocks and assigned-clock-parents is used here to set the
-parent of left input reference clock to MAINHSDIV_CLKOUT4 and parent of
-CMU reference clock to left input reference clock.
-
-serdes0: serdes@900000 {
- compatible = "ti,phy-am654-serdes";
- reg = <0x0 0x900000 0x0 0x2000>;
- reg-names = "serdes";
- #phy-cells = <2>;
- power-domains = <&k3_pds 153>;
- clocks = <&k3_clks 153 4>, <&k3_clks 153 1>,
- <&serdes1 AM654_SERDES_LO_REFCLK>;
- clock-output-names = "serdes0_cmu_refclk", "serdes0_lo_refclk",
- "serdes0_ro_refclk";
- assigned-clocks = <&k3_clks 153 4>, <&serdes0 AM654_SERDES_CMU_REFCLK>;
- assigned-clock-parents = <&k3_clks 153 8>, <&k3_clks 153 4>;
- ti,serdes-clk = <&serdes0_clk>;
- mux-controls = <&serdes_mux 0>;
- #clock-cells = <1>;
-};
-
-Example for PCIe consumer node using the SERDES PHY specifier is given below.
-&pcie0_rc {
- num-lanes = <2>;
- phys = <&serdes0 PHY_TYPE_PCIE 1>, <&serdes1 PHY_TYPE_PCIE 1>;
- phy-names = "pcie-phy0", "pcie-phy1";
-};
diff --git a/Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.yaml b/Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.yaml
new file mode 100644
index 000000000000..62dcb84c08aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.yaml
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/ti,phy-am654-serdes.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI AM654 SERDES binding
+
+description:
+ This binding describes the TI AM654 SERDES. AM654 SERDES can be configured
+ to be used with either PCIe or USB or SGMII.
+
+maintainers:
+ - Kishon Vijay Abraham I <kishon@ti.com>
+
+properties:
+ compatible:
+ enum:
+ - ti,phy-am654-serdes
+
+ reg:
+ maxItems: 1
+
+ reg-names:
+ items:
+ - const: serdes
+
+ power-domains:
+ maxItems: 1
+
+ clocks:
+ maxItems: 3
+ description:
+ Three input clocks referring to left input reference clock, refclk and right input reference
+ clock.
+
+ assigned-clocks:
+ $ref: "/schemas/types.yaml#/definitions/phandle-array"
+ assigned-clock-parents:
+ $ref: "/schemas/types.yaml#/definitions/phandle-array"
+
+ '#phy-cells':
+ const: 2
+ description:
+ The 1st cell corresponds to the phy type (should be one of the types specified in
+ include/dt-bindings/phy/phy.h) and the 2nd cell should be the serdes lane function.
+
+ ti,serdes-clk:
+ description: Phandle to the SYSCON entry required for configuring SERDES clock selection.
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ '#clock-cells':
+ const: 1
+
+ mux-controls:
+ maxItems: 1
+ description: Phandle to the SYSCON entry required for configuring SERDES lane function.
+
+ clock-output-names:
+ oneOf:
+ - description: Clock output names for SERDES 0
+ items:
+ - const: serdes0_cmu_refclk
+ - const: serdes0_lo_refclk
+ - const: serdes0_ro_refclk
+ - description: Clock output names for SERDES 1
+ items:
+ - const: serdes1_cmu_refclk
+ - const: serdes1_lo_refclk
+ - const: serdes1_ro_refclk
+
+required:
+ - compatible
+ - reg
+ - power-domains
+ - clocks
+ - assigned-clocks
+ - assigned-clock-parents
+ - ti,serdes-clk
+ - mux-controls
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/phy/phy-am654-serdes.h>
+
+ serdes0: serdes@900000 {
+ compatible = "ti,phy-am654-serdes";
+ reg = <0x900000 0x2000>;
+ reg-names = "serdes";
+ #phy-cells = <2>;
+ power-domains = <&k3_pds 153>;
+ clocks = <&k3_clks 153 4>, <&k3_clks 153 1>,
+ <&serdes1 AM654_SERDES_LO_REFCLK>;
+ clock-output-names = "serdes0_cmu_refclk", "serdes0_lo_refclk", "serdes0_ro_refclk";
+ assigned-clocks = <&k3_clks 153 4>, <&serdes0 AM654_SERDES_CMU_REFCLK>;
+ assigned-clock-parents = <&k3_clks 153 8>, <&k3_clks 153 4>;
+ ti,serdes-clk = <&serdes0_clk>;
+ mux-controls = <&serdes_mux 0>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/power/supply/battery.yaml b/Documentation/devicetree/bindings/power/supply/battery.yaml
index c3b4b7543591..d56ac484fec5 100644
--- a/Documentation/devicetree/bindings/power/supply/battery.yaml
+++ b/Documentation/devicetree/bindings/power/supply/battery.yaml
@@ -31,6 +31,20 @@ properties:
compatible:
const: simple-battery
+ device-chemistry:
+ description: This describes the chemical technology of the battery.
+ oneOf:
+ - const: nickel-cadmium
+ - const: nickel-metal-hydride
+ - const: lithium-ion
+ description: This is a blanket type for all lithium-ion batteries,
+ including those below. If possible, a precise compatible string
+ from below should be used, but sometimes it is unknown which specific
+ lithium ion battery is employed and this wide compatible can be used.
+ - const: lithium-ion-polymer
+ - const: lithium-ion-iron-phosphate
+ - const: lithium-ion-manganese-oxide
+
over-voltage-threshold-microvolt:
description: battery over-voltage limit
diff --git a/Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml b/Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml
index c70f05ea6d27..971b53c58cc6 100644
--- a/Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml
+++ b/Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml
@@ -19,12 +19,15 @@ properties:
- maxim,max17047
- maxim,max17050
- maxim,max17055
+ - maxim,max77849-battery
reg:
maxItems: 1
interrupts:
maxItems: 1
+ description: |
+ The ALRT pin, an open-drain interrupt.
maxim,rsns-microohm:
$ref: /schemas/types.yaml#/definitions/uint32
diff --git a/Documentation/devicetree/bindings/power/supply/mt6360_charger.yaml b/Documentation/devicetree/bindings/power/supply/mt6360_charger.yaml
new file mode 100644
index 000000000000..b89b15a5bfa4
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/mt6360_charger.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/supply/mt6360_charger.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Battery charger driver for MT6360 PMIC from MediaTek Integrated.
+
+maintainers:
+ - Gene Chen <gene_chen@richtek.com>
+
+description: |
+ This module is part of the MT6360 MFD device.
+ Provides Battery Charger, Boost for OTG devices and BC1.2 detection.
+
+properties:
+ compatible:
+ const: mediatek,mt6360-chg
+
+ richtek,vinovp-microvolt:
+ description: Maximum CHGIN regulation voltage in uV.
+ enum: [ 5500000, 6500000, 11000000, 14500000 ]
+
+
+ usb-otg-vbus-regulator:
+ type: object
+ description: OTG boost regulator.
+ $ref: /schemas/regulator/regulator.yaml#
+
+required:
+ - compatible
+
+additionalProperties: false
+
+examples:
+ - |
+ mt6360_charger: charger {
+ compatible = "mediatek,mt6360-chg";
+ richtek,vinovp-microvolt = <14500000>;
+
+ otg_vbus_regulator: usb-otg-vbus-regulator {
+ regulator-compatible = "usb-otg-vbus";
+ regulator-name = "usb-otg-vbus";
+ regulator-min-microvolt = <4425000>;
+ regulator-max-microvolt = <5825000>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml b/Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml
index 983fc215c1e5..20862cdfc116 100644
--- a/Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml
+++ b/Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml
@@ -73,6 +73,26 @@ properties:
- 1 # SMB3XX_SOFT_TEMP_COMPENSATE_CURRENT Current compensation
- 2 # SMB3XX_SOFT_TEMP_COMPENSATE_VOLTAGE Voltage compensation
+ summit,inok-polarity:
+ description: |
+ Polarity of INOK signal indicating presence of external power supply.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum:
+ - 0 # SMB3XX_SYSOK_INOK_ACTIVE_LOW
+ - 1 # SMB3XX_SYSOK_INOK_ACTIVE_HIGH
+
+ usb-vbus:
+ $ref: "../../regulator/regulator.yaml#"
+ type: object
+
+ properties:
+ summit,needs-inok-toggle:
+ type: boolean
+ description: INOK signal is fixed and polarity needs to be toggled
+ in order to enable/disable output mode.
+
+ unevaluatedProperties: false
+
allOf:
- if:
properties:
@@ -134,6 +154,7 @@ examples:
reg = <0x7f>;
summit,enable-charge-control = <SMB3XX_CHG_ENABLE_PIN_ACTIVE_HIGH>;
+ summit,inok-polarity = <SMB3XX_SYSOK_INOK_ACTIVE_LOW>;
summit,chip-temperature-threshold-celsius = <110>;
summit,mains-current-limit-microamp = <2000000>;
summit,usb-current-limit-microamp = <500000>;
@@ -141,6 +162,15 @@ examples:
summit,enable-mains-charging;
monitored-battery = <&battery>;
+
+ usb-vbus {
+ regulator-name = "usb_vbus";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ regulator-min-microamp = <750000>;
+ regulator-max-microamp = <750000>;
+ summit,needs-inok-toggle;
+ };
};
};
diff --git a/Documentation/devicetree/bindings/power/supply/x-powers,axp20x-ac-power-supply.yaml b/Documentation/devicetree/bindings/power/supply/x-powers,axp20x-ac-power-supply.yaml
index dcda6660b8ed..de6a23aee977 100644
--- a/Documentation/devicetree/bindings/power/supply/x-powers,axp20x-ac-power-supply.yaml
+++ b/Documentation/devicetree/bindings/power/supply/x-powers,axp20x-ac-power-supply.yaml
@@ -21,10 +21,13 @@ allOf:
properties:
compatible:
- enum:
- - x-powers,axp202-ac-power-supply
- - x-powers,axp221-ac-power-supply
- - x-powers,axp813-ac-power-supply
+ oneOf:
+ - const: x-powers,axp202-ac-power-supply
+ - const: x-powers,axp221-ac-power-supply
+ - items:
+ - const: x-powers,axp803-ac-power-supply
+ - const: x-powers,axp813-ac-power-supply
+ - const: x-powers,axp813-ac-power-supply
required:
- compatible
diff --git a/Documentation/devicetree/bindings/power/supply/x-powers,axp20x-battery-power-supply.yaml b/Documentation/devicetree/bindings/power/supply/x-powers,axp20x-battery-power-supply.yaml
index 86e8a713d4e2..d055428ae39f 100644
--- a/Documentation/devicetree/bindings/power/supply/x-powers,axp20x-battery-power-supply.yaml
+++ b/Documentation/devicetree/bindings/power/supply/x-powers,axp20x-battery-power-supply.yaml
@@ -19,10 +19,14 @@ allOf:
properties:
compatible:
- enum:
- - x-powers,axp209-battery-power-supply
- - x-powers,axp221-battery-power-supply
- - x-powers,axp813-battery-power-supply
+ oneOf:
+ - const: x-powers,axp202-battery-power-supply
+ - const: x-powers,axp209-battery-power-supply
+ - const: x-powers,axp221-battery-power-supply
+ - items:
+ - const: x-powers,axp803-battery-power-supply
+ - const: x-powers,axp813-battery-power-supply
+ - const: x-powers,axp813-battery-power-supply
required:
- compatible
diff --git a/Documentation/devicetree/bindings/power/supply/x-powers,axp20x-usb-power-supply.yaml b/Documentation/devicetree/bindings/power/supply/x-powers,axp20x-usb-power-supply.yaml
index 61f1b320c157..0c371b55c9e1 100644
--- a/Documentation/devicetree/bindings/power/supply/x-powers,axp20x-usb-power-supply.yaml
+++ b/Documentation/devicetree/bindings/power/supply/x-powers,axp20x-usb-power-supply.yaml
@@ -20,11 +20,15 @@ allOf:
properties:
compatible:
- enum:
- - x-powers,axp202-usb-power-supply
- - x-powers,axp221-usb-power-supply
- - x-powers,axp223-usb-power-supply
- - x-powers,axp813-usb-power-supply
+ oneOf:
+ - enum:
+ - x-powers,axp202-usb-power-supply
+ - x-powers,axp221-usb-power-supply
+ - x-powers,axp223-usb-power-supply
+ - x-powers,axp813-usb-power-supply
+ - items:
+ - const: x-powers,axp803-usb-power-supply
+ - const: x-powers,axp813-usb-power-supply
required:
diff --git a/Documentation/devicetree/bindings/regulator/richtek,rtq2134-regulator.yaml b/Documentation/devicetree/bindings/regulator/richtek,rtq2134-regulator.yaml
new file mode 100644
index 000000000000..3f47e8e6c4fd
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/richtek,rtq2134-regulator.yaml
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/richtek,rtq2134-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Richtek RTQ2134 SubPMIC Regulator
+
+maintainers:
+ - ChiYuan Huang <cy_huang@richtek.com>
+
+description: |
+ The RTQ2134 is a multi-phase, programmable power management IC that
+ integrates with four high efficient, synchronous step-down converter cores.
+
+ Datasheet is available at
+ https://www.richtek.com/assets/product_file/RTQ2134-QA/DSQ2134-QA-01.pdf
+
+properties:
+ compatible:
+ enum:
+ - richtek,rtq2134
+
+ reg:
+ maxItems: 1
+
+ regulators:
+ type: object
+
+ patternProperties:
+ "^buck[1-3]$":
+ type: object
+ $ref: regulator.yaml#
+ description: |
+ regulator description for buck[1-3].
+
+ properties:
+ richtek,use-vsel-dvs:
+ type: boolean
+ description: |
+ If specified, buck will listen to 'vsel' pin for dvs config.
+ Else, use dvs0 voltage by default.
+
+ richtek,uv-shutdown:
+ type: boolean
+ description: |
+ If specified, use shutdown as UV action. Else, hiccup by default.
+
+ unevaluatedProperties: false
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - regulators
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ rtq2134@18 {
+ compatible = "richtek,rtq2134";
+ reg = <0x18>;
+
+ regulators {
+ buck1 {
+ regulator-name = "rtq2134-buck1";
+ regulator-min-microvolt = <300000>;
+ regulator-max-microvolt = <1850000>;
+ regulator-always-on;
+ richtek,use-vsel-dvs;
+ regulator-state-mem {
+ regulator-suspend-min-microvolt = <550000>;
+ regulator-suspend-max-microvolt = <550000>;
+ };
+ };
+ buck2 {
+ regulator-name = "rtq2134-buck2";
+ regulator-min-microvolt = <1120000>;
+ regulator-max-microvolt = <1120000>;
+ regulator-always-on;
+ richtek,use-vsel-dvs;
+ regulator-state-mem {
+ regulator-suspend-min-microvolt = <1120000>;
+ regulator-suspend-max-microvolt = <1120000>;
+ };
+ };
+ buck3 {
+ regulator-name = "rtq2134-buck3";
+ regulator-min-microvolt = <600000>;
+ regulator-max-microvolt = <600000>;
+ regulator-always-on;
+ richtek,use-vsel-dvs;
+ regulator-state-mem {
+ regulator-suspend-min-microvolt = <600000>;
+ regulator-suspend-max-microvolt = <600000>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/richtek,rtq6752-regulator.yaml b/Documentation/devicetree/bindings/regulator/richtek,rtq6752-regulator.yaml
new file mode 100644
index 000000000000..e6e5a9a7d940
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/richtek,rtq6752-regulator.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/richtek,rtq6752-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Richtek RTQ6752 TFT LCD Voltage Regulator
+
+maintainers:
+ - ChiYuan Huang <cy_huang@richtek.com>
+
+description: |
+ The RTQ6752 is an I2C interface pgorammable power management IC. It includes
+ two synchronous boost converter for PAVDD, and one synchronous NAVDD
+ buck-boost. The device is suitable for automotive TFT-LCD panel.
+
+properties:
+ compatible:
+ enum:
+ - richtek,rtq6752
+
+ reg:
+ maxItems: 1
+
+ enable-gpios:
+ description: |
+ A connection of the chip 'enable' gpio line. If not provided, treat it as
+ external pull up.
+ maxItems: 1
+
+ regulators:
+ type: object
+
+ patternProperties:
+ "^(p|n)avdd$":
+ type: object
+ $ref: regulator.yaml#
+ description: |
+ regulator description for pavdd and navdd.
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - regulators
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ rtq6752@6b {
+ compatible = "richtek,rtq6752";
+ reg = <0x6b>;
+ enable-gpios = <&gpio26 2 0>;
+
+ regulators {
+ pavdd {
+ regulator-name = "rtq6752-pavdd";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <7300000>;
+ regulator-boot-on;
+ };
+ navdd {
+ regulator-name = "rtq6752-navdd";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <7300000>;
+ regulator-boot-on;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/socionext,uniphier-regulator.yaml b/Documentation/devicetree/bindings/regulator/socionext,uniphier-regulator.yaml
new file mode 100644
index 000000000000..861d5f3c79e8
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/socionext,uniphier-regulator.yaml
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/socionext,uniphier-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Socionext UniPhier regulator controller
+
+description: |
+ This regulator controls VBUS and belongs to USB3 glue layer. Before using
+ the regulator, it is necessary to control the clocks and resets to enable
+ this layer. These clocks and resets should be described in each property.
+
+maintainers:
+ - Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
+
+allOf:
+ - $ref: "regulator.yaml#"
+
+# USB3 Controller
+
+properties:
+ compatible:
+ enum:
+ - socionext,uniphier-pro4-usb3-regulator
+ - socionext,uniphier-pro5-usb3-regulator
+ - socionext,uniphier-pxs2-usb3-regulator
+ - socionext,uniphier-ld20-usb3-regulator
+ - socionext,uniphier-pxs3-usb3-regulator
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ oneOf:
+ - items: # for Pro4, Pro5
+ - const: gio
+ - const: link
+ - items: # for others
+ - const: link
+
+ resets:
+ minItems: 1
+ maxItems: 2
+
+ reset-names:
+ oneOf:
+ - items: # for Pro4, Pro5
+ - const: gio
+ - const: link
+ - items:
+ - const: link
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - resets
+ - reset-names
+
+examples:
+ - |
+ usb-glue@65b00000 {
+ compatible = "simple-mfd";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0x65b00000 0x400>;
+
+ usb_vbus0: regulators@100 {
+ compatible = "socionext,uniphier-ld20-usb3-regulator";
+ reg = <0x100 0x10>;
+ clock-names = "link";
+ clocks = <&sys_clk 14>;
+ reset-names = "link";
+ resets = <&sys_rst 14>;
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/regulator/uniphier-regulator.txt b/Documentation/devicetree/bindings/regulator/uniphier-regulator.txt
deleted file mode 100644
index 94fd38b0d163..000000000000
--- a/Documentation/devicetree/bindings/regulator/uniphier-regulator.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-Socionext UniPhier Regulator Controller
-
-This describes the devicetree bindings for regulator controller implemented
-on Socionext UniPhier SoCs.
-
-USB3 Controller
----------------
-
-This regulator controls VBUS and belongs to USB3 glue layer. Before using
-the regulator, it is necessary to control the clocks and resets to enable
-this layer. These clocks and resets should be described in each property.
-
-Required properties:
-- compatible: Should be
- "socionext,uniphier-pro4-usb3-regulator" - for Pro4 SoC
- "socionext,uniphier-pro5-usb3-regulator" - for Pro5 SoC
- "socionext,uniphier-pxs2-usb3-regulator" - for PXs2 SoC
- "socionext,uniphier-ld20-usb3-regulator" - for LD20 SoC
- "socionext,uniphier-pxs3-usb3-regulator" - for PXs3 SoC
-- reg: Specifies offset and length of the register set for the device.
-- clocks: A list of phandles to the clock gate for USB3 glue layer.
- According to the clock-names, appropriate clocks are required.
-- clock-names: Should contain
- "gio", "link" - for Pro4 and Pro5 SoCs
- "link" - for others
-- resets: A list of phandles to the reset control for USB3 glue layer.
- According to the reset-names, appropriate resets are required.
-- reset-names: Should contain
- "gio", "link" - for Pro4 and Pro5 SoCs
- "link" - for others
-
-See Documentation/devicetree/bindings/regulator/regulator.txt
-for more details about the regulator properties.
-
-Example:
-
- usb-glue@65b00000 {
- compatible = "socionext,uniphier-ld20-dwc3-glue",
- "simple-mfd";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0 0x65b00000 0x400>;
-
- usb_vbus0: regulators@100 {
- compatible = "socionext,uniphier-ld20-usb3-regulator";
- reg = <0x100 0x10>;
- clock-names = "link";
- clocks = <&sys_clk 14>;
- reset-names = "link";
- resets = <&sys_rst 14>;
- };
-
- phy {
- ...
- phy-supply = <&usb_vbus0>;
- };
- ...
- };
diff --git a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml
index 1d38ff76d18f..2b1f91603897 100644
--- a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml
+++ b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml
@@ -24,10 +24,10 @@ allOf:
select:
properties:
compatible:
- items:
- - enum:
- - sifive,fu540-c000-ccache
- - sifive,fu740-c000-ccache
+ contains:
+ enum:
+ - sifive,fu540-c000-ccache
+ - sifive,fu740-c000-ccache
required:
- compatible
diff --git a/Documentation/devicetree/bindings/spi/omap-spi.txt b/Documentation/devicetree/bindings/spi/omap-spi.txt
deleted file mode 100644
index 487208c256c0..000000000000
--- a/Documentation/devicetree/bindings/spi/omap-spi.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-OMAP2+ McSPI device
-
-Required properties:
-- compatible :
- - "ti,am654-mcspi" for AM654.
- - "ti,omap2-mcspi" for OMAP2 & OMAP3.
- - "ti,omap4-mcspi" for OMAP4+.
-- ti,spi-num-cs : Number of chipselect supported by the instance.
-- ti,hwmods: Name of the hwmod associated to the McSPI
-- ti,pindir-d0-out-d1-in: Select the D0 pin as output and D1 as
- input. The default is D0 as input and
- D1 as output.
-
-Optional properties:
-- dmas: List of DMA specifiers with the controller specific format
- as described in the generic DMA client binding. A tx and rx
- specifier is required for each chip select.
-- dma-names: List of DMA request names. These strings correspond
- 1:1 with the DMA specifiers listed in dmas. The string naming
- is to be "rxN" and "txN" for RX and TX requests,
- respectively, where N equals the chip select number.
-
-Examples:
-
-[hwmod populated DMA resources]
-
-mcspi1: mcspi@1 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "ti,omap4-mcspi";
- ti,hwmods = "mcspi1";
- ti,spi-num-cs = <4>;
-};
-
-[generic DMA request binding]
-
-mcspi1: mcspi@1 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "ti,omap4-mcspi";
- ti,hwmods = "mcspi1";
- ti,spi-num-cs = <2>;
- dmas = <&edma 42
- &edma 43
- &edma 44
- &edma 45>;
- dma-names = "tx0", "rx0", "tx1", "rx1";
-};
diff --git a/Documentation/devicetree/bindings/spi/omap-spi.yaml b/Documentation/devicetree/bindings/spi/omap-spi.yaml
new file mode 100644
index 000000000000..e55538186cf6
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/omap-spi.yaml
@@ -0,0 +1,117 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/omap-spi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SPI controller bindings for OMAP and K3 SoCs
+
+maintainers:
+ - Aswath Govindraju <a-govindraju@ti.com>
+
+allOf:
+ - $ref: spi-controller.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - ti,am654-mcspi
+ - ti,am4372-mcspi
+ - const: ti,omap4-mcspi
+ - items:
+ - enum:
+ - ti,omap2-mcspi
+ - ti,omap4-mcspi
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ ti,spi-num-cs:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Number of chipselect supported by the instance.
+ minimum: 1
+ maximum: 4
+
+ ti,hwmods:
+ $ref: /schemas/types.yaml#/definitions/string
+ description:
+ Must be "mcspi<n>", n being the instance number (1-based).
+ This property is applicable only on legacy platforms mainly omap2/3
+ and ti81xx and should not be used on other platforms.
+ deprecated: true
+
+ ti,pindir-d0-out-d1-in:
+ description:
+ Select the D0 pin as output and D1 as input. The default is D0
+ as input and D1 as output.
+ type: boolean
+
+ dmas:
+ description:
+ List of DMA specifiers with the controller specific format as
+ described in the generic DMA client binding. A tx and rx
+ specifier is required for each chip select.
+ minItems: 1
+ maxItems: 8
+
+ dma-names:
+ description:
+ List of DMA request names. These strings correspond 1:1 with
+ the DMA sepecifiers listed in dmas. The string names is to be
+ "rxN" and "txN" for RX and TX requests, respectively. Where N
+ is the chip select number.
+ minItems: 1
+ maxItems: 8
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+unevaluatedProperties: false
+
+if:
+ properties:
+ compatible:
+ oneOf:
+ - const: ti,omap2-mcspi
+ - const: ti,omap4-mcspi
+
+then:
+ properties:
+ ti,hwmods:
+ items:
+ - pattern: "^mcspi([1-9])$"
+
+else:
+ properties:
+ ti,hwmods: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/soc/ti,sci_pm_domain.h>
+
+ spi@2100000 {
+ compatible = "ti,am654-mcspi","ti,omap4-mcspi";
+ reg = <0x2100000 0x400>;
+ interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&k3_clks 137 1>;
+ power-domains = <&k3_pds 137 TI_SCI_PD_EXCLUSIVE>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ dmas = <&main_udmap 0xc500>, <&main_udmap 0x4500>;
+ dma-names = "tx0", "rx0";
+ };
diff --git a/Documentation/devicetree/bindings/spi/rockchip-sfc.yaml b/Documentation/devicetree/bindings/spi/rockchip-sfc.yaml
new file mode 100644
index 000000000000..339fb39529f3
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/rockchip-sfc.yaml
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/rockchip-sfc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip Serial Flash Controller (SFC)
+
+maintainers:
+ - Heiko Stuebner <heiko@sntech.de>
+ - Chris Morgan <macromorgan@hotmail.com>
+
+allOf:
+ - $ref: spi-controller.yaml#
+
+properties:
+ compatible:
+ const: rockchip,sfc
+ description:
+ The rockchip sfc controller is a standalone IP with version register,
+ and the driver can handle all the feature difference inside the IP
+ depending on the version register.
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Bus Clock
+ - description: Module Clock
+
+ clock-names:
+ items:
+ - const: clk_sfc
+ - const: hclk_sfc
+
+ power-domains:
+ maxItems: 1
+
+ rockchip,sfc-no-dma:
+ description: Disable DMA and utilize FIFO mode only
+ type: boolean
+
+patternProperties:
+ "^flash@[0-3]$":
+ type: object
+ properties:
+ reg:
+ minimum: 0
+ maximum: 3
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/px30-cru.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/power/px30-power.h>
+
+ sfc: spi@ff3a0000 {
+ compatible = "rockchip,sfc";
+ reg = <0xff3a0000 0x4000>;
+ interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cru SCLK_SFC>, <&cru HCLK_SFC>;
+ clock-names = "clk_sfc", "hclk_sfc";
+ pinctrl-0 = <&sfc_clk &sfc_cs &sfc_bus2>;
+ pinctrl-names = "default";
+ power-domains = <&power PX30_PD_MMC_NAND>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <108000000>;
+ spi-rx-bus-width = <2>;
+ spi-tx-bus-width = <2>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/spi/spi-mt65xx.txt b/Documentation/devicetree/bindings/spi/spi-mt65xx.txt
index 4d0e4c15c4ea..2a24969159cc 100644
--- a/Documentation/devicetree/bindings/spi/spi-mt65xx.txt
+++ b/Documentation/devicetree/bindings/spi/spi-mt65xx.txt
@@ -11,6 +11,7 @@ Required properties:
- mediatek,mt8135-spi: for mt8135 platforms
- mediatek,mt8173-spi: for mt8173 platforms
- mediatek,mt8183-spi: for mt8183 platforms
+ - mediatek,mt6893-spi: for mt6893 platforms
- "mediatek,mt8192-spi", "mediatek,mt6765-spi": for mt8192 platforms
- "mediatek,mt8195-spi", "mediatek,mt6765-spi": for mt8195 platforms
- "mediatek,mt8516-spi", "mediatek,mt2712-spi": for mt8516 platforms
diff --git a/Documentation/devicetree/bindings/spi/spi-sprd-adi.txt b/Documentation/devicetree/bindings/spi/spi-sprd-adi.txt
deleted file mode 100644
index 2567c829e2dc..000000000000
--- a/Documentation/devicetree/bindings/spi/spi-sprd-adi.txt
+++ /dev/null
@@ -1,63 +0,0 @@
-Spreadtrum ADI controller
-
-ADI is the abbreviation of Anolog-Digital interface, which is used to access
-analog chip (such as PMIC) from digital chip. ADI controller follows the SPI
-framework for its hardware implementation is alike to SPI bus and its timing
-is compatile to SPI timing.
-
-ADI controller has 50 channels including 2 software read/write channels and
-48 hardware channels to access analog chip. For 2 software read/write channels,
-users should set ADI registers to access analog chip. For hardware channels,
-we can configure them to allow other hardware components to use it independently,
-which means we can just link one analog chip address to one hardware channel,
-then users can access the mapped analog chip address by this hardware channel
-triggered by hardware components instead of ADI software channels.
-
-Thus we introduce one property named "sprd,hw-channels" to configure hardware
-channels, the first value specifies the hardware channel id which is used to
-transfer data triggered by hardware automatically, and the second value specifies
-the analog chip address where user want to access by hardware components.
-
-Since we have multi-subsystems will use unique ADI to access analog chip, when
-one system is reading/writing data by ADI software channels, that should be under
-one hardware spinlock protection to prevent other systems from reading/writing
-data by ADI software channels at the same time, or two parallel routine of setting
-ADI registers will make ADI controller registers chaos to lead incorrect results.
-Then we need one hardware spinlock to synchronize between the multiple subsystems.
-
-The new version ADI controller supplies multiple master channels for different
-subsystem accessing, that means no need to add hardware spinlock to synchronize,
-thus change the hardware spinlock support to be optional to keep backward
-compatibility.
-
-Required properties:
-- compatible: Should be "sprd,sc9860-adi".
-- reg: Offset and length of ADI-SPI controller register space.
-- #address-cells: Number of cells required to define a chip select address
- on the ADI-SPI bus. Should be set to 1.
-- #size-cells: Size of cells required to define a chip select address size
- on the ADI-SPI bus. Should be set to 0.
-
-Optional properties:
-- hwlocks: Reference to a phandle of a hwlock provider node.
-- hwlock-names: Reference to hwlock name strings defined in the same order
- as the hwlocks, should be "adi".
-- sprd,hw-channels: This is an array of channel values up to 49 channels.
- The first value specifies the hardware channel id which is used to
- transfer data triggered by hardware automatically, and the second
- value specifies the analog chip address where user want to access
- by hardware components.
-
-SPI slave nodes must be children of the SPI controller node and can contain
-properties described in Documentation/devicetree/bindings/spi/spi-bus.txt.
-
-Example:
- adi_bus: spi@40030000 {
- compatible = "sprd,sc9860-adi";
- reg = <0 0x40030000 0 0x10000>;
- hwlocks = <&hwlock1 0>;
- hwlock-names = "adi";
- #address-cells = <1>;
- #size-cells = <0>;
- sprd,hw-channels = <30 0x8c20>;
- };
diff --git a/Documentation/devicetree/bindings/spi/sprd,spi-adi.yaml b/Documentation/devicetree/bindings/spi/sprd,spi-adi.yaml
new file mode 100644
index 000000000000..fe014020da69
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/sprd,spi-adi.yaml
@@ -0,0 +1,104 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/spi/sprd,spi-adi.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Spreadtrum ADI controller
+
+maintainers:
+ - Orson Zhai <orsonzhai@gmail.com>
+ - Baolin Wang <baolin.wang7@gmail.com>
+ - Chunyan Zhang <zhang.lyra@gmail.com>
+
+description: |
+ ADI is the abbreviation of Anolog-Digital interface, which is used to access
+ analog chip (such as PMIC) from digital chip. ADI controller follows the SPI
+ framework for its hardware implementation is alike to SPI bus and its timing
+ is compatile to SPI timing.
+
+ ADI controller has 50 channels including 2 software read/write channels and
+ 48 hardware channels to access analog chip. For 2 software read/write channels,
+ users should set ADI registers to access analog chip. For hardware channels,
+ we can configure them to allow other hardware components to use it independently,
+ which means we can just link one analog chip address to one hardware channel,
+ then users can access the mapped analog chip address by this hardware channel
+ triggered by hardware components instead of ADI software channels.
+
+ Thus we introduce one property named "sprd,hw-channels" to configure hardware
+ channels, the first value specifies the hardware channel id which is used to
+ transfer data triggered by hardware automatically, and the second value specifies
+ the analog chip address where user want to access by hardware components.
+
+ Since we have multi-subsystems will use unique ADI to access analog chip, when
+ one system is reading/writing data by ADI software channels, that should be under
+ one hardware spinlock protection to prevent other systems from reading/writing
+ data by ADI software channels at the same time, or two parallel routine of setting
+ ADI registers will make ADI controller registers chaos to lead incorrect results.
+ Then we need one hardware spinlock to synchronize between the multiple subsystems.
+
+ The new version ADI controller supplies multiple master channels for different
+ subsystem accessing, that means no need to add hardware spinlock to synchronize,
+ thus change the hardware spinlock support to be optional to keep backward
+ compatibility.
+
+allOf:
+ - $ref: /spi/spi-controller.yaml#
+
+properties:
+ compatible:
+ enum:
+ - sprd,sc9860-adi
+ - sprd,sc9863-adi
+ - sprd,ums512-adi
+
+ reg:
+ maxItems: 1
+
+ hwlocks:
+ maxItems: 1
+
+ hwlock-names:
+ const: adi
+
+ sprd,hw-channels:
+ $ref: /schemas/types.yaml#/definitions/uint32-matrix
+ description: A list of hardware channels
+ minItems: 1
+ maxItems: 48
+ items:
+ items:
+ - description: The hardware channel id which is used to transfer data
+ triggered by hardware automatically, channel id 0-1 are for software
+ use, 2-49 are hardware channels.
+ minimum: 2
+ maximum: 49
+ - description: The analog chip address where user want to access by
+ hardware components.
+
+required:
+ - compatible
+ - reg
+ - '#address-cells'
+ - '#size-cells'
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ aon {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ adi_bus: spi@40030000 {
+ compatible = "sprd,sc9860-adi";
+ reg = <0 0x40030000 0 0x10000>;
+ hwlocks = <&hwlock1 0>;
+ hwlock-names = "adi";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ sprd,hw-channels = <30 0x8c20>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/timer/rockchip,rk-timer.txt b/Documentation/devicetree/bindings/timer/rockchip,rk-timer.txt
deleted file mode 100644
index d65fdce7c7f0..000000000000
--- a/Documentation/devicetree/bindings/timer/rockchip,rk-timer.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-Rockchip rk timer
-
-Required properties:
-- compatible: should be:
- "rockchip,rv1108-timer", "rockchip,rk3288-timer": for Rockchip RV1108
- "rockchip,rk3036-timer", "rockchip,rk3288-timer": for Rockchip RK3036
- "rockchip,rk3066-timer", "rockchip,rk3288-timer": for Rockchip RK3066
- "rockchip,rk3188-timer", "rockchip,rk3288-timer": for Rockchip RK3188
- "rockchip,rk3228-timer", "rockchip,rk3288-timer": for Rockchip RK3228
- "rockchip,rk3229-timer", "rockchip,rk3288-timer": for Rockchip RK3229
- "rockchip,rk3288-timer": for Rockchip RK3288
- "rockchip,rk3368-timer", "rockchip,rk3288-timer": for Rockchip RK3368
- "rockchip,rk3399-timer": for Rockchip RK3399
-- reg: base address of the timer register starting with TIMERS CONTROL register
-- interrupts: should contain the interrupts for Timer0
-- clocks : must contain an entry for each entry in clock-names
-- clock-names : must include the following entries:
- "timer", "pclk"
-
-Example:
- timer: timer@ff810000 {
- compatible = "rockchip,rk3288-timer";
- reg = <0xff810000 0x20>;
- interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&xin24m>, <&cru PCLK_TIMER>;
- clock-names = "timer", "pclk";
- };
diff --git a/Documentation/devicetree/bindings/timer/rockchip,rk-timer.yaml b/Documentation/devicetree/bindings/timer/rockchip,rk-timer.yaml
new file mode 100644
index 000000000000..e26ecb5893ae
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/rockchip,rk-timer.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/rockchip,rk-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip Timer Device Tree Bindings
+
+maintainers:
+ - Daniel Lezcano <daniel.lezcano@linaro.org>
+
+properties:
+ compatible:
+ oneOf:
+ - const: rockchip,rk3288-timer
+ - const: rockchip,rk3399-timer
+ - items:
+ - enum:
+ - rockchip,rv1108-timer
+ - rockchip,rk3036-timer
+ - rockchip,rk3066-timer
+ - rockchip,rk3188-timer
+ - rockchip,rk3228-timer
+ - rockchip,rk3229-timer
+ - rockchip,rk3288-timer
+ - rockchip,rk3368-timer
+ - rockchip,px30-timer
+ - const: rockchip,rk3288-timer
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ minItems: 2
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: pclk
+ - const: timer
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/rk3288-cru.h>
+
+ timer: timer@ff810000 {
+ compatible = "rockchip,rk3288-timer";
+ reg = <0xff810000 0x20>;
+ interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cru PCLK_TIMER>, <&xin24m>;
+ clock-names = "pclk", "timer";
+ };
diff --git a/Documentation/driver-api/fpga/fpga-bridge.rst b/Documentation/driver-api/fpga/fpga-bridge.rst
index 198aadafd3e7..8d650b4e2ce6 100644
--- a/Documentation/driver-api/fpga/fpga-bridge.rst
+++ b/Documentation/driver-api/fpga/fpga-bridge.rst
@@ -4,11 +4,11 @@ FPGA Bridge
API to implement a new FPGA bridge
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-* struct fpga_bridge — The FPGA Bridge structure
-* struct fpga_bridge_ops — Low level Bridge driver ops
-* devm_fpga_bridge_create() — Allocate and init a bridge struct
-* fpga_bridge_register() — Register a bridge
-* fpga_bridge_unregister() — Unregister a bridge
+* struct fpga_bridge - The FPGA Bridge structure
+* struct fpga_bridge_ops - Low level Bridge driver ops
+* devm_fpga_bridge_create() - Allocate and init a bridge struct
+* fpga_bridge_register() - Register a bridge
+* fpga_bridge_unregister() - Unregister a bridge
.. kernel-doc:: include/linux/fpga/fpga-bridge.h
:functions: fpga_bridge
diff --git a/Documentation/driver-api/fpga/fpga-mgr.rst b/Documentation/driver-api/fpga/fpga-mgr.rst
index 917ee22db429..4d926b452cb3 100644
--- a/Documentation/driver-api/fpga/fpga-mgr.rst
+++ b/Documentation/driver-api/fpga/fpga-mgr.rst
@@ -101,12 +101,12 @@ in state.
API for implementing a new FPGA Manager driver
----------------------------------------------
-* ``fpga_mgr_states`` — Values for :c:expr:`fpga_manager->state`.
-* struct fpga_manager — the FPGA manager struct
-* struct fpga_manager_ops — Low level FPGA manager driver ops
-* devm_fpga_mgr_create() — Allocate and init a manager struct
-* fpga_mgr_register() — Register an FPGA manager
-* fpga_mgr_unregister() — Unregister an FPGA manager
+* ``fpga_mgr_states`` - Values for :c:expr:`fpga_manager->state`.
+* struct fpga_manager - the FPGA manager struct
+* struct fpga_manager_ops - Low level FPGA manager driver ops
+* devm_fpga_mgr_create() - Allocate and init a manager struct
+* fpga_mgr_register() - Register an FPGA manager
+* fpga_mgr_unregister() - Unregister an FPGA manager
.. kernel-doc:: include/linux/fpga/fpga-mgr.h
:functions: fpga_mgr_states
diff --git a/Documentation/driver-api/fpga/fpga-programming.rst b/Documentation/driver-api/fpga/fpga-programming.rst
index 002392dab04f..fb4da4240e96 100644
--- a/Documentation/driver-api/fpga/fpga-programming.rst
+++ b/Documentation/driver-api/fpga/fpga-programming.rst
@@ -84,10 +84,10 @@ will generate that list. Here's some sample code of what to do next::
API for programming an FPGA
---------------------------
-* fpga_region_program_fpga() — Program an FPGA
-* fpga_image_info() — Specifies what FPGA image to program
-* fpga_image_info_alloc() — Allocate an FPGA image info struct
-* fpga_image_info_free() — Free an FPGA image info struct
+* fpga_region_program_fpga() - Program an FPGA
+* fpga_image_info() - Specifies what FPGA image to program
+* fpga_image_info_alloc() - Allocate an FPGA image info struct
+* fpga_image_info_free() - Free an FPGA image info struct
.. kernel-doc:: drivers/fpga/fpga-region.c
:functions: fpga_region_program_fpga
diff --git a/Documentation/driver-api/fpga/fpga-region.rst b/Documentation/driver-api/fpga/fpga-region.rst
index 363a8171ab0a..2636a27c11b2 100644
--- a/Documentation/driver-api/fpga/fpga-region.rst
+++ b/Documentation/driver-api/fpga/fpga-region.rst
@@ -45,19 +45,19 @@ An example of usage can be seen in the probe function of [#f2]_.
API to add a new FPGA region
----------------------------
-* struct fpga_region — The FPGA region struct
-* devm_fpga_region_create() — Allocate and init a region struct
-* fpga_region_register() — Register an FPGA region
-* fpga_region_unregister() — Unregister an FPGA region
+* struct fpga_region - The FPGA region struct
+* devm_fpga_region_create() - Allocate and init a region struct
+* fpga_region_register() - Register an FPGA region
+* fpga_region_unregister() - Unregister an FPGA region
The FPGA region's probe function will need to get a reference to the FPGA
Manager it will be using to do the programming. This usually would happen
during the region's probe function.
-* fpga_mgr_get() — Get a reference to an FPGA manager, raise ref count
-* of_fpga_mgr_get() — Get a reference to an FPGA manager, raise ref count,
+* fpga_mgr_get() - Get a reference to an FPGA manager, raise ref count
+* of_fpga_mgr_get() - Get a reference to an FPGA manager, raise ref count,
given a device node.
-* fpga_mgr_put() — Put an FPGA manager
+* fpga_mgr_put() - Put an FPGA manager
The FPGA region will need to specify which bridges to control while programming
the FPGA. The region driver can build a list of bridges during probe time
@@ -66,11 +66,11 @@ the list of bridges to program just before programming
(:c:expr:`fpga_region->get_bridges`). The FPGA bridge framework supplies the
following APIs to handle building or tearing down that list.
-* fpga_bridge_get_to_list() — Get a ref of an FPGA bridge, add it to a
+* fpga_bridge_get_to_list() - Get a ref of an FPGA bridge, add it to a
list
-* of_fpga_bridge_get_to_list() — Get a ref of an FPGA bridge, add it to a
+* of_fpga_bridge_get_to_list() - Get a ref of an FPGA bridge, add it to a
list, given a device node
-* fpga_bridges_put() — Given a list of bridges, put them
+* fpga_bridges_put() - Given a list of bridges, put them
.. kernel-doc:: include/linux/fpga/fpga-region.h
:functions: fpga_region
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index f5a3207aa7fa..c57c609ad2eb 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -85,7 +85,6 @@ available subsections can be seen below.
io-mapping
io_ordering
generic-counter
- lightnvm-pblk
memory-devices/index
men-chameleon-bus
ntb
diff --git a/Documentation/driver-api/lightnvm-pblk.rst b/Documentation/driver-api/lightnvm-pblk.rst
deleted file mode 100644
index 1040ed1cec81..000000000000
--- a/Documentation/driver-api/lightnvm-pblk.rst
+++ /dev/null
@@ -1,21 +0,0 @@
-pblk: Physical Block Device Target
-==================================
-
-pblk implements a fully associative, host-based FTL that exposes a traditional
-block I/O interface. Its primary responsibilities are:
-
- - Map logical addresses onto physical addresses (4KB granularity) in a
- logical-to-physical (L2P) table.
- - Maintain the integrity and consistency of the L2P table as well as its
- recovery from normal tear down and power outage.
- - Deal with controller- and media-specific constrains.
- - Handle I/O errors.
- - Implement garbage collection.
- - Maintain consistency across the I/O stack during synchronization points.
-
-For more information please refer to:
-
- http://lightnvm.io
-
-which maintains updated FAQs, manual pages, technical documentation, tools,
-contacts, etc.
diff --git a/Documentation/driver-api/nfc/nfc-hci.rst b/Documentation/driver-api/nfc/nfc-hci.rst
index eb8a1a14e919..f10fe53aa9fe 100644
--- a/Documentation/driver-api/nfc/nfc-hci.rst
+++ b/Documentation/driver-api/nfc/nfc-hci.rst
@@ -181,7 +181,7 @@ xmit_from_hci():
The llc must be registered with nfc before it can be used. Do that by
calling::
- nfc_llc_register(const char *name, struct nfc_llc_ops *ops);
+ nfc_llc_register(const char *name, const struct nfc_llc_ops *ops);
Again, note that the llc does not handle the physical link. It is thus very
easy to mix any physical link with any llc for a given chip driver.
diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst
index f47d05ed0d94..4a25c5eb6f07 100644
--- a/Documentation/fault-injection/fault-injection.rst
+++ b/Documentation/fault-injection/fault-injection.rst
@@ -24,6 +24,10 @@ Available fault injection capabilities
injects futex deadlock and uaddr fault errors.
+- fail_sunrpc
+
+ injects kernel RPC client and server failures.
+
- fail_make_request
injects disk IO errors on devices permitted by setting
@@ -151,6 +155,20 @@ configuration of fault-injection capabilities.
default is 'N', setting it to 'Y' will disable failure injections
when dealing with private (address space) futexes.
+- /sys/kernel/debug/fail_sunrpc/ignore-client-disconnect:
+
+ Format: { 'Y' | 'N' }
+
+ default is 'N', setting it to 'Y' will disable disconnect
+ injection on the RPC client.
+
+- /sys/kernel/debug/fail_sunrpc/ignore-server-disconnect:
+
+ Format: { 'Y' | 'N' }
+
+ default is 'N', setting it to 'Y' will disable disconnect
+ injection on the RPC server.
+
- /sys/kernel/debug/fail_function/inject:
Format: { 'function-name' | '!function-name' | '' }
diff --git a/Documentation/fault-injection/provoke-crashes.rst b/Documentation/fault-injection/provoke-crashes.rst
index a20ba5d93932..3abe84225613 100644
--- a/Documentation/fault-injection/provoke-crashes.rst
+++ b/Documentation/fault-injection/provoke-crashes.rst
@@ -29,8 +29,7 @@ recur_count
cpoint_name
Where in the kernel to trigger the action. It can be
one of INT_HARDWARE_ENTRY, INT_HW_IRQ_EN, INT_TASKLET_ENTRY,
- FS_DEVRW, MEM_SWAPOUT, TIMERADD, SCSI_DISPATCH_CMD,
- IDE_CORE_CP, or DIRECT
+ FS_DEVRW, MEM_SWAPOUT, TIMERADD, SCSI_QUEUE_RQ, or DIRECT.
cpoint_type
Indicates the action to be taken on hitting the crash point.
diff --git a/Documentation/filesystems/cifs/index.rst b/Documentation/filesystems/cifs/index.rst
new file mode 100644
index 000000000000..1c8597a679ab
--- /dev/null
+++ b/Documentation/filesystems/cifs/index.rst
@@ -0,0 +1,10 @@
+===============================
+CIFS
+===============================
+
+
+.. toctree::
+ :maxdepth: 1
+
+ ksmbd
+ cifsroot
diff --git a/Documentation/filesystems/cifs/ksmbd.rst b/Documentation/filesystems/cifs/ksmbd.rst
new file mode 100644
index 000000000000..a1326157d53f
--- /dev/null
+++ b/Documentation/filesystems/cifs/ksmbd.rst
@@ -0,0 +1,165 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+KSMBD - SMB3 Kernel Server
+==========================
+
+KSMBD is a linux kernel server which implements SMB3 protocol in kernel space
+for sharing files over network.
+
+KSMBD architecture
+==================
+
+The subset of performance related operations belong in kernelspace and
+the other subset which belong to operations which are not really related with
+performance in userspace. So, DCE/RPC management that has historically resulted
+into number of buffer overflow issues and dangerous security bugs and user
+account management are implemented in user space as ksmbd.mountd.
+File operations that are related with performance (open/read/write/close etc.)
+in kernel space (ksmbd). This also allows for easier integration with VFS
+interface for all file operations.
+
+ksmbd (kernel daemon)
+---------------------
+
+When the server daemon is started, It starts up a forker thread
+(ksmbd/interface name) at initialization time and open a dedicated port 445
+for listening to SMB requests. Whenever new clients make request, Forker
+thread will accept the client connection and fork a new thread for dedicated
+communication channel between the client and the server. It allows for parallel
+processing of SMB requests(commands) from clients as well as allowing for new
+clients to make new connections. Each instance is named ksmbd/1~n(port number)
+to indicate connected clients. Depending on the SMB request types, each new
+thread can decide to pass through the commands to the user space (ksmbd.mountd),
+currently DCE/RPC commands are identified to be handled through the user space.
+To further utilize the linux kernel, it has been chosen to process the commands
+as workitems and to be executed in the handlers of the ksmbd-io kworker threads.
+It allows for multiplexing of the handlers as the kernel take care of initiating
+extra worker threads if the load is increased and vice versa, if the load is
+decreased it destroys the extra worker threads. So, after connection is
+established with client. Dedicated ksmbd/1..n(port number) takes complete
+ownership of receiving/parsing of SMB commands. Each received command is worked
+in parallel i.e., There can be multiple clients commands which are worked in
+parallel. After receiving each command a separated kernel workitem is prepared
+for each command which is further queued to be handled by ksmbd-io kworkers.
+So, each SMB workitem is queued to the kworkers. This allows the benefit of load
+sharing to be managed optimally by the default kernel and optimizing client
+performance by handling client commands in parallel.
+
+ksmbd.mountd (user space daemon)
+--------------------------------
+
+ksmbd.mountd is userspace process to, transfer user account and password that
+are registered using ksmbd.adduser(part of utils for user space). Further it
+allows sharing information parameters that parsed from smb.conf to ksmbd in
+kernel. For the execution part it has a daemon which is continuously running
+and connected to the kernel interface using netlink socket, it waits for the
+requests(dcerpc and share/user info). It handles RPC calls (at a minimum few
+dozen) that are most important for file server from NetShareEnum and
+NetServerGetInfo. Complete DCE/RPC response is prepared from the user space
+and passed over to the associated kernel thread for the client.
+
+
+KSMBD Feature Status
+====================
+
+============================== =================================================
+Feature name Status
+============================== =================================================
+Dialects Supported. SMB2.1 SMB3.0, SMB3.1.1 dialects
+ (intentionally excludes security vulnerable SMB1
+ dialect).
+Auto Negotiation Supported.
+Compound Request Supported.
+Oplock Cache Mechanism Supported.
+SMB2 leases(v1 lease) Supported.
+Directory leases(v2 lease) Planned for future.
+Multi-credits Supported.
+NTLM/NTLMv2 Supported.
+HMAC-SHA256 Signing Supported.
+Secure negotiate Supported.
+Signing Update Supported.
+Pre-authentication integrity Supported.
+SMB3 encryption(CCM, GCM) Supported. (CCM and GCM128 supported, GCM256 in
+ progress)
+SMB direct(RDMA) Partially Supported. SMB3 Multi-channel is
+ required to connect to Windows client.
+SMB3 Multi-channel Partially Supported. Planned to implement
+ replay/retry mechanisms for future.
+SMB3.1.1 POSIX extension Supported.
+ACLs Partially Supported. only DACLs available, SACLs
+ (auditing) is planned for the future. For
+ ownership (SIDs) ksmbd generates random subauth
+ values(then store it to disk) and use uid/gid
+ get from inode as RID for local domain SID.
+ The current acl implementation is limited to
+ standalone server, not a domain member.
+ Integration with Samba tools is being worked on
+ to allow future support for running as a domain
+ member.
+Kerberos Supported.
+Durable handle v1,v2 Planned for future.
+Persistent handle Planned for future.
+SMB2 notify Planned for future.
+Sparse file support Supported.
+DCE/RPC support Partially Supported. a few calls(NetShareEnumAll,
+ NetServerGetInfo, SAMR, LSARPC) that are needed
+ for file server handled via netlink interface
+ from ksmbd.mountd. Additional integration with
+ Samba tools and libraries via upcall is being
+ investigated to allow support for additional
+ DCE/RPC management calls (and future support
+ for Witness protocol e.g.)
+ksmbd/nfsd interoperability Planned for future. The features that ksmbd
+ support are Leases, Notify, ACLs and Share modes.
+============================== =================================================
+
+
+How to run
+==========
+
+1. Download ksmbd-tools and compile them.
+ - https://github.com/cifsd-team/ksmbd-tools
+
+2. Create user/password for SMB share.
+
+ # mkdir /etc/ksmbd/
+ # ksmbd.adduser -a <Enter USERNAME for SMB share access>
+
+3. Create /etc/ksmbd/smb.conf file, add SMB share in smb.conf file
+ - Refer smb.conf.example and
+ https://github.com/cifsd-team/ksmbd-tools/blob/master/Documentation/configuration.txt
+
+4. Insert ksmbd.ko module
+
+ # insmod ksmbd.ko
+
+5. Start ksmbd user space daemon
+ # ksmbd.mountd
+
+6. Access share from Windows or Linux using CIFS
+
+Shutdown KSMBD
+==============
+
+1. kill user and kernel space daemon
+ # sudo ksmbd.control -s
+
+How to turn debug print on
+==========================
+
+Each layer
+/sys/class/ksmbd-control/debug
+
+1. Enable all component prints
+ # sudo ksmbd.control -d "all"
+
+2. Enable one of components(smb, auth, vfs, oplock, ipc, conn, rdma)
+ # sudo ksmbd.control -d "smb"
+
+3. Show what prints are enable.
+ # cat/sys/class/ksmbd-control/debug
+ [smb] auth vfs oplock ipc conn [rdma]
+
+4. Disable prints:
+ If you try the selected component once more, It is disabled without brackets.
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index 44b67ebd6e40..0eb799d9d05a 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -1063,11 +1063,6 @@ astute users may notice some differences in behavior:
- DAX (Direct Access) is not supported on encrypted files.
-- The st_size of an encrypted symlink will not necessarily give the
- length of the symlink target as required by POSIX. It will actually
- give the length of the ciphertext, which will be slightly longer
- than the plaintext due to NUL-padding and an extra 2-byte overhead.
-
- The maximum length of an encrypted symlink is 2 bytes shorter than
the maximum length of an unencrypted symlink. For example, on an
EXT4 filesystem with a 4K block size, unencrypted symlinks can be up
@@ -1235,12 +1230,12 @@ the user-supplied name to get the ciphertext.
Lookups without the key are more complicated. The raw ciphertext may
contain the ``\0`` and ``/`` characters, which are illegal in
-filenames. Therefore, readdir() must base64-encode the ciphertext for
-presentation. For most filenames, this works fine; on ->lookup(), the
-filesystem just base64-decodes the user-supplied name to get back to
-the raw ciphertext.
+filenames. Therefore, readdir() must base64url-encode the ciphertext
+for presentation. For most filenames, this works fine; on ->lookup(),
+the filesystem just base64url-decodes the user-supplied name to get
+back to the raw ciphertext.
-However, for very long filenames, base64 encoding would cause the
+However, for very long filenames, base64url encoding would cause the
filename length to exceed NAME_MAX. To prevent this, readdir()
actually presents long filenames in an abbreviated form which encodes
a strong "hash" of the ciphertext filename, along with the optional
diff --git a/Documentation/filesystems/idmappings.rst b/Documentation/filesystems/idmappings.rst
new file mode 100644
index 000000000000..1229a75ec75d
--- /dev/null
+++ b/Documentation/filesystems/idmappings.rst
@@ -0,0 +1,1026 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Idmappings
+==========
+
+Most filesystem developers will have encountered idmappings. They are used when
+reading from or writing ownership to disk, reporting ownership to userspace, or
+for permission checking. This document is aimed at filesystem developers that
+want to know how idmappings work.
+
+Formal notes
+------------
+
+An idmapping is essentially a translation of a range of ids into another or the
+same range of ids. The notational convention for idmappings that is widely used
+in userspace is::
+
+ u:k:r
+
+``u`` indicates the first element in the upper idmapset ``U`` and ``k``
+indicates the first element in the lower idmapset ``K``. The ``r`` parameter
+indicates the range of the idmapping, i.e. how many ids are mapped. From now
+on, we will always prefix ids with ``u`` or ``k`` to make it clear whether
+we're talking about an id in the upper or lower idmapset.
+
+To see what this looks like in practice, let's take the following idmapping::
+
+ u22:k10000:r3
+
+and write down the mappings it will generate::
+
+ u22 -> k10000
+ u23 -> k10001
+ u24 -> k10002
+
+From a mathematical viewpoint ``U`` and ``K`` are well-ordered sets and an
+idmapping is an order isomorphism from ``U`` into ``K``. So ``U`` and ``K`` are
+order isomorphic. In fact, ``U`` and ``K`` are always well-ordered subsets of
+the set of all possible ids useable on a given system.
+
+Looking at this mathematically briefly will help us highlight some properties
+that make it easier to understand how we can translate between idmappings. For
+example, we know that the inverse idmapping is an order isomorphism as well::
+
+ k10000 -> u22
+ k10001 -> u23
+ k10002 -> u24
+
+Given that we are dealing with order isomorphisms plus the fact that we're
+dealing with subsets we can embedd idmappings into each other, i.e. we can
+sensibly translate between different idmappings. For example, assume we've been
+given the three idmappings::
+
+ 1. u0:k10000:r10000
+ 2. u0:k20000:r10000
+ 3. u0:k30000:r10000
+
+and id ``k11000`` which has been generated by the first idmapping by mapping
+``u1000`` from the upper idmapset down to ``k11000`` in the lower idmapset.
+
+Because we're dealing with order isomorphic subsets it is meaningful to ask
+what id ``k11000`` corresponds to in the second or third idmapping. The
+straightfoward algorithm to use is to apply the inverse of the first idmapping,
+mapping ``k11000`` up to ``u1000``. Afterwards, we can map ``u1000`` down using
+either the second idmapping mapping or third idmapping mapping. The second
+idmapping would map ``u1000`` down to ``21000``. The third idmapping would map
+``u1000`` down to ``u31000``.
+
+If we were given the same task for the following three idmappings::
+
+ 1. u0:k10000:r10000
+ 2. u0:k20000:r200
+ 3. u0:k30000:r300
+
+we would fail to translate as the sets aren't order isomorphic over the full
+range of the first idmapping anymore (However they are order isomorphic over
+the full range of the second idmapping.). Neither the second or third idmapping
+contain ``u1000`` in the upper idmapset ``U``. This is equivalent to not having
+an id mapped. We can simply say that ``u1000`` is unmapped in the second and
+third idmapping. The kernel will report unmapped ids as the overflowuid
+``(uid_t)-1`` or overflowgid ``(gid_t)-1`` to userspace.
+
+The algorithm to calculate what a given id maps to is pretty simple. First, we
+need to verify that the range can contain our target id. We will skip this step
+for simplicity. After that if we want to know what ``id`` maps to we can do
+simple calculations:
+
+- If we want to map from left to right::
+
+ u:k:r
+ id - u + k = n
+
+- If we want to map from right to left::
+
+ u:k:r
+ id - k + u = n
+
+Instead of "left to right" we can also say "down" and instead of "right to
+left" we can also say "up". Obviously mapping down and up invert each other.
+
+To see whether the simple formulas above work, consider the following two
+idmappings::
+
+ 1. u0:k20000:r10000
+ 2. u500:k30000:r10000
+
+Assume we are given ``k21000`` in the lower idmapset of the first idmapping. We
+want to know what id this was mapped from in the upper idmapset of the first
+idmapping. So we're mapping up in the first idmapping::
+
+ id - k + u = n
+ k21000 - k20000 + u0 = u1000
+
+Now assume we are given the id ``u1100`` in the upper idmapset of the second
+idmapping and we want to know what this id maps down to in the lower idmapset
+of the second idmapping. This means we're mapping down in the second
+idmapping::
+
+ id - u + k = n
+ u1100 - u500 + k30000 = k30600
+
+General notes
+-------------
+
+In the context of the kernel an idmapping can be interpreted as mapping a range
+of userspace ids into a range of kernel ids::
+
+ userspace-id:kernel-id:range
+
+A userspace id is always an element in the upper idmapset of an idmapping of
+type ``uid_t`` or ``gid_t`` and a kernel id is always an element in the lower
+idmapset of an idmapping of type ``kuid_t`` or ``kgid_t``. From now on
+"userspace id" will be used to refer to the well known ``uid_t`` and ``gid_t``
+types and "kernel id" will be used to refer to ``kuid_t`` and ``kgid_t``.
+
+The kernel is mostly concerned with kernel ids. They are used when performing
+permission checks and are stored in an inode's ``i_uid`` and ``i_gid`` field.
+A userspace id on the other hand is an id that is reported to userspace by the
+kernel, or is passed by userspace to the kernel, or a raw device id that is
+written or read from disk.
+
+Note that we are only concerned with idmappings as the kernel stores them not
+how userspace would specify them.
+
+For the rest of this document we will prefix all userspace ids with ``u`` and
+all kernel ids with ``k``. Ranges of idmappings will be prefixed with ``r``. So
+an idmapping will be written as ``u0:k10000:r10000``.
+
+For example, the id ``u1000`` is an id in the upper idmapset or "userspace
+idmapset" starting with ``u1000``. And it is mapped to ``k11000`` which is a
+kernel id in the lower idmapset or "kernel idmapset" starting with ``k10000``.
+
+A kernel id is always created by an idmapping. Such idmappings are associated
+with user namespaces. Since we mainly care about how idmappings work we're not
+going to be concerned with how idmappings are created nor how they are used
+outside of the filesystem context. This is best left to an explanation of user
+namespaces.
+
+The initial user namespace is special. It always has an idmapping of the
+following form::
+
+ u0:k0:r4294967295
+
+which is an identity idmapping over the full range of ids available on this
+system.
+
+Other user namespaces usually have non-identity idmappings such as::
+
+ u0:k10000:r10000
+
+When a process creates or wants to change ownership of a file, or when the
+ownership of a file is read from disk by a filesystem, the userspace id is
+immediately translated into a kernel id according to the idmapping associated
+with the relevant user namespace.
+
+For instance, consider a file that is stored on disk by a filesystem as being
+owned by ``u1000``:
+
+- If a filesystem were to be mounted in the initial user namespaces (as most
+ filesystems are) then the initial idmapping will be used. As we saw this is
+ simply the identity idmapping. This would mean id ``u1000`` read from disk
+ would be mapped to id ``k1000``. So an inode's ``i_uid`` and ``i_gid`` field
+ would contain ``k1000``.
+
+- If a filesystem were to be mounted with an idmapping of ``u0:k10000:r10000``
+ then ``u1000`` read from disk would be mapped to ``k11000``. So an inode's
+ ``i_uid`` and ``i_gid`` would contain ``k11000``.
+
+Translation algorithms
+----------------------
+
+We've already seen briefly that it is possible to translate between different
+idmappings. We'll now take a closer look how that works.
+
+Crossmapping
+~~~~~~~~~~~~
+
+This translation algorithm is used by the kernel in quite a few places. For
+example, it is used when reporting back the ownership of a file to userspace
+via the ``stat()`` system call family.
+
+If we've been given ``k11000`` from one idmapping we can map that id up in
+another idmapping. In order for this to work both idmappings need to contain
+the same kernel id in their kernel idmapsets. For example, consider the
+following idmappings::
+
+ 1. u0:k10000:r10000
+ 2. u20000:k10000:r10000
+
+and we are mapping ``u1000`` down to ``k11000`` in the first idmapping . We can
+then translate ``k11000`` into a userspace id in the second idmapping using the
+kernel idmapset of the second idmapping::
+
+ /* Map the kernel id up into a userspace id in the second idmapping. */
+ from_kuid(u20000:k10000:r10000, k11000) = u21000
+
+Note, how we can get back to the kernel id in the first idmapping by inverting
+the algorithm::
+
+ /* Map the userspace id down into a kernel id in the second idmapping. */
+ make_kuid(u20000:k10000:r10000, u21000) = k11000
+
+ /* Map the kernel id up into a userspace id in the first idmapping. */
+ from_kuid(u0:k10000:r10000, k11000) = u1000
+
+This algorithm allows us to answer the question what userspace id a given
+kernel id corresponds to in a given idmapping. In order to be able to answer
+this question both idmappings need to contain the same kernel id in their
+respective kernel idmapsets.
+
+For example, when the kernel reads a raw userspace id from disk it maps it down
+into a kernel id according to the idmapping associated with the filesystem.
+Let's assume the filesystem was mounted with an idmapping of
+``u0:k20000:r10000`` and it reads a file owned by ``u1000`` from disk. This
+means ``u1000`` will be mapped to ``k21000`` which is what will be stored in
+the inode's ``i_uid`` and ``i_gid`` field.
+
+When someone in userspace calls ``stat()`` or a related function to get
+ownership information about the file the kernel can't simply map the id back up
+according to the filesystem's idmapping as this would give the wrong owner if
+the caller is using an idmapping.
+
+So the kernel will map the id back up in the idmapping of the caller. Let's
+assume the caller has the slighly unconventional idmapping
+``u3000:k20000:r10000`` then ``k21000`` would map back up to ``u4000``.
+Consequently the user would see that this file is owned by ``u4000``.
+
+Remapping
+~~~~~~~~~
+
+It is possible to translate a kernel id from one idmapping to another one via
+the userspace idmapset of the two idmappings. This is equivalent to remapping
+a kernel id.
+
+Let's look at an example. We are given the following two idmappings::
+
+ 1. u0:k10000:r10000
+ 2. u0:k20000:r10000
+
+and we are given ``k11000`` in the first idmapping. In order to translate this
+kernel id in the first idmapping into a kernel id in the second idmapping we
+need to perform two steps:
+
+1. Map the kernel id up into a userspace id in the first idmapping::
+
+ /* Map the kernel id up into a userspace id in the first idmapping. */
+ from_kuid(u0:k10000:r10000, k11000) = u1000
+
+2. Map the userspace id down into a kernel id in the second idmapping::
+
+ /* Map the userspace id down into a kernel id in the second idmapping. */
+ make_kuid(u0:k20000:r10000, u1000) = k21000
+
+As you can see we used the userspace idmapset in both idmappings to translate
+the kernel id in one idmapping to a kernel id in another idmapping.
+
+This allows us to answer the question what kernel id we would need to use to
+get the same userspace id in another idmapping. In order to be able to answer
+this question both idmappings need to contain the same userspace id in their
+respective userspace idmapsets.
+
+Note, how we can easily get back to the kernel id in the first idmapping by
+inverting the algorithm:
+
+1. Map the kernel id up into a userspace id in the second idmapping::
+
+ /* Map the kernel id up into a userspace id in the second idmapping. */
+ from_kuid(u0:k20000:r10000, k21000) = u1000
+
+2. Map the userspace id down into a kernel id in the first idmapping::
+
+ /* Map the userspace id down into a kernel id in the first idmapping. */
+ make_kuid(u0:k10000:r10000, u1000) = k11000
+
+Another way to look at this translation is to treat it as inverting one
+idmapping and applying another idmapping if both idmappings have the relevant
+userspace id mapped. This will come in handy when working with idmapped mounts.
+
+Invalid translations
+~~~~~~~~~~~~~~~~~~~~
+
+It is never valid to use an id in the kernel idmapset of one idmapping as the
+id in the userspace idmapset of another or the same idmapping. While the kernel
+idmapset always indicates an idmapset in the kernel id space the userspace
+idmapset indicates a userspace id. So the following translations are forbidden::
+
+ /* Map the userspace id down into a kernel id in the first idmapping. */
+ make_kuid(u0:k10000:r10000, u1000) = k11000
+
+ /* INVALID: Map the kernel id down into a kernel id in the second idmapping. */
+ make_kuid(u10000:k20000:r10000, k110000) = k21000
+ ~~~~~~~
+
+and equally wrong::
+
+ /* Map the kernel id up into a userspace id in the first idmapping. */
+ from_kuid(u0:k10000:r10000, k11000) = u1000
+
+ /* INVALID: Map the userspace id up into a userspace id in the second idmapping. */
+ from_kuid(u20000:k0:r10000, u1000) = k21000
+ ~~~~~
+
+Idmappings when creating filesystem objects
+-------------------------------------------
+
+The concepts of mapping an id down or mapping an id up are expressed in the two
+kernel functions filesystem developers are rather familiar with and which we've
+already used in this document::
+
+ /* Map the userspace id down into a kernel id. */
+ make_kuid(idmapping, uid)
+
+ /* Map the kernel id up into a userspace id. */
+ from_kuid(idmapping, kuid)
+
+We will take an abbreviated look into how idmappings figure into creating
+filesystem objects. For simplicity we will only look at what happens when the
+VFS has already completed path lookup right before it calls into the filesystem
+itself. So we're concerned with what happens when e.g. ``vfs_mkdir()`` is
+called. We will also assume that the directory we're creating filesystem
+objects in is readable and writable for everyone.
+
+When creating a filesystem object the caller will look at the caller's
+filesystem ids. These are just regular ``uid_t`` and ``gid_t`` userspace ids
+but they are exclusively used when determining file ownership which is why they
+are called "filesystem ids". They are usually identical to the uid and gid of
+the caller but can differ. We will just assume they are always identical to not
+get lost in too many details.
+
+When the caller enters the kernel two things happen:
+
+1. Map the caller's userspace ids down into kernel ids in the caller's
+ idmapping.
+ (To be precise, the kernel will simply look at the kernel ids stashed in the
+ credentials of the current task but for our education we'll pretend this
+ translation happens just in time.)
+2. Verify that the caller's kernel ids can be mapped up to userspace ids in the
+ filesystem's idmapping.
+
+The second step is important as regular filesystem will ultimately need to map
+the kernel id back up into a userspace id when writing to disk.
+So with the second step the kernel guarantees that a valid userspace id can be
+written to disk. If it can't the kernel will refuse the creation request to not
+even remotely risk filesystem corruption.
+
+The astute reader will have realized that this is simply a varation of the
+crossmapping algorithm we mentioned above in a previous section. First, the
+kernel maps the caller's userspace id down into a kernel id according to the
+caller's idmapping and then maps that kernel id up according to the
+filesystem's idmapping.
+
+Example 1
+~~~~~~~~~
+
+::
+
+ caller id: u1000
+ caller idmapping: u0:k0:r4294967295
+ filesystem idmapping: u0:k0:r4294967295
+
+Both the caller and the filesystem use the identity idmapping:
+
+1. Map the caller's userspace ids into kernel ids in the caller's idmapping::
+
+ make_kuid(u0:k0:r4294967295, u1000) = k1000
+
+2. Verify that the caller's kernel ids can be mapped to userspace ids in the
+ filesystem's idmapping.
+
+ For this second step the kernel will call the function
+ ``fsuidgid_has_mapping()`` which ultimately boils down to calling
+ ``from_kuid()``::
+
+ from_kuid(u0:k0:r4294967295, k1000) = u1000
+
+In this example both idmappings are the same so there's nothing exciting going
+on. Ultimately the userspace id that lands on disk will be ``u1000``.
+
+Example 2
+~~~~~~~~~
+
+::
+
+ caller id: u1000
+ caller idmapping: u0:k10000:r10000
+ filesystem idmapping: u0:k20000:r10000
+
+1. Map the caller's userspace ids down into kernel ids in the caller's
+ idmapping::
+
+ make_kuid(u0:k10000:r10000, u1000) = k11000
+
+2. Verify that the caller's kernel ids can be mapped up to userspace ids in the
+ filesystem's idmapping::
+
+ from_kuid(u0:k20000:r10000, k11000) = u-1
+
+It's immediately clear that while the caller's userspace id could be
+successfully mapped down into kernel ids in the caller's idmapping the kernel
+ids could not be mapped up according to the filesystem's idmapping. So the
+kernel will deny this creation request.
+
+Note that while this example is less common, because most filesystem can't be
+mounted with non-initial idmappings this is a general problem as we can see in
+the next examples.
+
+Example 3
+~~~~~~~~~
+
+::
+
+ caller id: u1000
+ caller idmapping: u0:k10000:r10000
+ filesystem idmapping: u0:k0:r4294967295
+
+1. Map the caller's userspace ids down into kernel ids in the caller's
+ idmapping::
+
+ make_kuid(u0:k10000:r10000, u1000) = k11000
+
+2. Verify that the caller's kernel ids can be mapped up to userspace ids in the
+ filesystem's idmapping::
+
+ from_kuid(u0:k0:r4294967295, k11000) = u11000
+
+We can see that the translation always succeeds. The userspace id that the
+filesystem will ultimately put to disk will always be identical to the value of
+the kernel id that was created in the caller's idmapping. This has mainly two
+consequences.
+
+First, that we can't allow a caller to ultimately write to disk with another
+userspace id. We could only do this if we were to mount the whole fileystem
+with the caller's or another idmapping. But that solution is limited to a few
+filesystems and not very flexible. But this is a use-case that is pretty
+important in containerized workloads.
+
+Second, the caller will usually not be able to create any files or access
+directories that have stricter permissions because none of the filesystem's
+kernel ids map up into valid userspace ids in the caller's idmapping
+
+1. Map raw userspace ids down to kernel ids in the filesystem's idmapping::
+
+ make_kuid(u0:k0:r4294967295, u1000) = k1000
+
+2. Map kernel ids up to userspace ids in the caller's idmapping::
+
+ from_kuid(u0:k10000:r10000, k1000) = u-1
+
+Example 4
+~~~~~~~~~
+
+::
+
+ file id: u1000
+ caller idmapping: u0:k10000:r10000
+ filesystem idmapping: u0:k0:r4294967295
+
+In order to report ownership to userspace the kernel uses the crossmapping
+algorithm introduced in a previous section:
+
+1. Map the userspace id on disk down into a kernel id in the filesystem's
+ idmapping::
+
+ make_kuid(u0:k0:r4294967295, u1000) = k1000
+
+2. Map the kernel id up into a userspace id in the caller's idmapping::
+
+ from_kuid(u0:k10000:r10000, k1000) = u-1
+
+The crossmapping algorithm fails in this case because the kernel id in the
+filesystem idmapping cannot be mapped up to a userspace id in the caller's
+idmapping. Thus, the kernel will report the ownership of this file as the
+overflowid.
+
+Example 5
+~~~~~~~~~
+
+::
+
+ file id: u1000
+ caller idmapping: u0:k10000:r10000
+ filesystem idmapping: u0:k20000:r10000
+
+In order to report ownership to userspace the kernel uses the crossmapping
+algorithm introduced in a previous section:
+
+1. Map the userspace id on disk down into a kernel id in the filesystem's
+ idmapping::
+
+ make_kuid(u0:k20000:r10000, u1000) = k21000
+
+2. Map the kernel id up into a userspace id in the caller's idmapping::
+
+ from_kuid(u0:k10000:r10000, k21000) = u-1
+
+Again, the crossmapping algorithm fails in this case because the kernel id in
+the filesystem idmapping cannot be mapped to a userspace id in the caller's
+idmapping. Thus, the kernel will report the ownership of this file as the
+overflowid.
+
+Note how in the last two examples things would be simple if the caller would be
+using the initial idmapping. For a filesystem mounted with the initial
+idmapping it would be trivial. So we only consider a filesystem with an
+idmapping of ``u0:k20000:r10000``:
+
+1. Map the userspace id on disk down into a kernel id in the filesystem's
+ idmapping::
+
+ make_kuid(u0:k20000:r10000, u1000) = k21000
+
+2. Map the kernel id up into a userspace id in the caller's idmapping::
+
+ from_kuid(u0:k0:r4294967295, k21000) = u21000
+
+Idmappings on idmapped mounts
+-----------------------------
+
+The examples we've seen in the previous section where the caller's idmapping
+and the filesystem's idmapping are incompatible causes various issues for
+workloads. For a more complex but common example, consider two containers
+started on the host. To completely prevent the two containers from affecting
+each other, an administrator may often use different non-overlapping idmappings
+for the two containers::
+
+ container1 idmapping: u0:k10000:r10000
+ container2 idmapping: u0:k20000:r10000
+ filesystem idmapping: u0:k30000:r10000
+
+An administrator wanting to provide easy read-write access to the following set
+of files::
+
+ dir id: u0
+ dir/file1 id: u1000
+ dir/file2 id: u2000
+
+to both containers currently can't.
+
+Of course the administrator has the option to recursively change ownership via
+``chown()``. For example, they could change ownership so that ``dir`` and all
+files below it can be crossmapped from the filesystem's into the container's
+idmapping. Let's assume they change ownership so it is compatible with the
+first container's idmapping::
+
+ dir id: u10000
+ dir/file1 id: u11000
+ dir/file2 id: u12000
+
+This would still leave ``dir`` rather useless to the second container. In fact,
+``dir`` and all files below it would continue to appear owned by the overflowid
+for the second container.
+
+Or consider another increasingly popular example. Some service managers such as
+systemd implement a concept called "portable home directories". A user may want
+to use their home directories on different machines where they are assigned
+different login userspace ids. Most users will have ``u1000`` as the login id
+on their machine at home and all files in their home directory will usually be
+owned by ``u1000``. At uni or at work they may have another login id such as
+``u1125``. This makes it rather difficult to interact with their home directory
+on their work machine.
+
+In both cases changing ownership recursively has grave implications. The most
+obvious one is that ownership is changed globally and permanently. In the home
+directory case this change in ownership would even need to happen everytime the
+user switches from their home to their work machine. For really large sets of
+files this becomes increasingly costly.
+
+If the user is lucky, they are dealing with a filesystem that is mountable
+inside user namespaces. But this would also change ownership globally and the
+change in ownership is tied to the lifetime of the filesystem mount, i.e. the
+superblock. The only way to change ownership is to completely unmount the
+filesystem and mount it again in another user namespace. This is usually
+impossible because it would mean that all users currently accessing the
+filesystem can't anymore. And it means that ``dir`` still can't be shared
+between two containers with different idmappings.
+But usually the user doesn't even have this option since most filesystems
+aren't mountable inside containers. And not having them mountable might be
+desirable as it doesn't require the filesystem to deal with malicious
+filesystem images.
+
+But the usecases mentioned above and more can be handled by idmapped mounts.
+They allow to expose the same set of dentries with different ownership at
+different mounts. This is achieved by marking the mounts with a user namespace
+through the ``mount_setattr()`` system call. The idmapping associated with it
+is then used to translate from the caller's idmapping to the filesystem's
+idmapping and vica versa using the remapping algorithm we introduced above.
+
+Idmapped mounts make it possible to change ownership in a temporary and
+localized way. The ownership changes are restricted to a specific mount and the
+ownership changes are tied to the lifetime of the mount. All other users and
+locations where the filesystem is exposed are unaffected.
+
+Filesystems that support idmapped mounts don't have any real reason to support
+being mountable inside user namespaces. A filesystem could be exposed
+completely under an idmapped mount to get the same effect. This has the
+advantage that filesystems can leave the creation of the superblock to
+privileged users in the initial user namespace.
+
+However, it is perfectly possible to combine idmapped mounts with filesystems
+mountable inside user namespaces. We will touch on this further below.
+
+Remapping helpers
+~~~~~~~~~~~~~~~~~
+
+Idmapping functions were added that translate between idmappings. They make use
+of the remapping algorithm we've introduced earlier. We're going to look at
+two:
+
+- ``i_uid_into_mnt()`` and ``i_gid_into_mnt()``
+
+ The ``i_*id_into_mnt()`` functions translate filesystem's kernel ids into
+ kernel ids in the mount's idmapping::
+
+ /* Map the filesystem's kernel id up into a userspace id in the filesystem's idmapping. */
+ from_kuid(filesystem, kid) = uid
+
+ /* Map the filesystem's userspace id down ito a kernel id in the mount's idmapping. */
+ make_kuid(mount, uid) = kuid
+
+- ``mapped_fsuid()`` and ``mapped_fsgid()``
+
+ The ``mapped_fs*id()`` functions translate the caller's kernel ids into
+ kernel ids in the filesystem's idmapping. This translation is achieved by
+ remapping the caller's kernel ids using the mount's idmapping::
+
+ /* Map the caller's kernel id up into a userspace id in the mount's idmapping. */
+ from_kuid(mount, kid) = uid
+
+ /* Map the mount's userspace id down into a kernel id in the filesystem's idmapping. */
+ make_kuid(filesystem, uid) = kuid
+
+Note that these two functions invert each other. Consider the following
+idmappings::
+
+ caller idmapping: u0:k10000:r10000
+ filesystem idmapping: u0:k20000:r10000
+ mount idmapping: u0:k10000:r10000
+
+Assume a file owned by ``u1000`` is read from disk. The filesystem maps this id
+to ``k21000`` according to it's idmapping. This is what is stored in the
+inode's ``i_uid`` and ``i_gid`` fields.
+
+When the caller queries the ownership of this file via ``stat()`` the kernel
+would usually simply use the crossmapping algorithm and map the filesystem's
+kernel id up to a userspace id in the caller's idmapping.
+
+But when the caller is accessing the file on an idmapped mount the kernel will
+first call ``i_uid_into_mnt()`` thereby translating the filesystem's kernel id
+into a kernel id in the mount's idmapping::
+
+ i_uid_into_mnt(k21000):
+ /* Map the filesystem's kernel id up into a userspace id. */
+ from_kuid(u0:k20000:r10000, k21000) = u1000
+
+ /* Map the filesystem's userspace id down ito a kernel id in the mount's idmapping. */
+ make_kuid(u0:k10000:r10000, u1000) = k11000
+
+Finally, when the kernel reports the owner to the caller it will turn the
+kernel id in the mount's idmapping into a userspace id in the caller's
+idmapping::
+
+ from_kuid(u0:k10000:r10000, k11000) = u1000
+
+We can test whether this algorithm really works by verifying what happens when
+we create a new file. Let's say the user is creating a file with ``u1000``.
+
+The kernel maps this to ``k11000`` in the caller's idmapping. Usually the
+kernel would now apply the crossmapping, verifying that ``k11000`` can be
+mapped to a userspace id in the filesystem's idmapping. Since ``k11000`` can't
+be mapped up in the filesystem's idmapping directly this creation request
+fails.
+
+But when the caller is accessing the file on an idmapped mount the kernel will
+first call ``mapped_fs*id()`` thereby translating the caller's kernel id into
+a kernel id according to the mount's idmapping::
+
+ mapped_fsuid(k11000):
+ /* Map the caller's kernel id up into a userspace id in the mount's idmapping. */
+ from_kuid(u0:k10000:r10000, k11000) = u1000
+
+ /* Map the mount's userspace id down into a kernel id in the filesystem's idmapping. */
+ make_kuid(u0:k20000:r10000, u1000) = k21000
+
+When finally writing to disk the kernel will then map ``k21000`` up into a
+userspace id in the filesystem's idmapping::
+
+ from_kuid(u0:k20000:r10000, k21000) = u1000
+
+As we can see, we end up with an invertible and therefore information
+preserving algorithm. A file created from ``u1000`` on an idmapped mount will
+also be reported as being owned by ``u1000`` and vica versa.
+
+Let's now briefly reconsider the failing examples from earlier in the context
+of idmapped mounts.
+
+Example 2 reconsidered
+~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ caller id: u1000
+ caller idmapping: u0:k10000:r10000
+ filesystem idmapping: u0:k20000:r10000
+ mount idmapping: u0:k10000:r10000
+
+When the caller is using a non-initial idmapping the common case is to attach
+the same idmapping to the mount. We now perform three steps:
+
+1. Map the caller's userspace ids into kernel ids in the caller's idmapping::
+
+ make_kuid(u0:k10000:r10000, u1000) = k11000
+
+2. Translate the caller's kernel id into a kernel id in the filesystem's
+ idmapping::
+
+ mapped_fsuid(k11000):
+ /* Map the kernel id up into a userspace id in the mount's idmapping. */
+ from_kuid(u0:k10000:r10000, k11000) = u1000
+
+ /* Map the userspace id down into a kernel id in the filesystem's idmapping. */
+ make_kuid(u0:k20000:r10000, u1000) = k21000
+
+2. Verify that the caller's kernel ids can be mapped to userspace ids in the
+ filesystem's idmapping::
+
+ from_kuid(u0:k20000:r10000, k21000) = u1000
+
+So the ownership that lands on disk will be ``u1000``.
+
+Example 3 reconsidered
+~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ caller id: u1000
+ caller idmapping: u0:k10000:r10000
+ filesystem idmapping: u0:k0:r4294967295
+ mount idmapping: u0:k10000:r10000
+
+The same translation algorithm works with the third example.
+
+1. Map the caller's userspace ids into kernel ids in the caller's idmapping::
+
+ make_kuid(u0:k10000:r10000, u1000) = k11000
+
+2. Translate the caller's kernel id into a kernel id in the filesystem's
+ idmapping::
+
+ mapped_fsuid(k11000):
+ /* Map the kernel id up into a userspace id in the mount's idmapping. */
+ from_kuid(u0:k10000:r10000, k11000) = u1000
+
+ /* Map the userspace id down into a kernel id in the filesystem's idmapping. */
+ make_kuid(u0:k0:r4294967295, u1000) = k1000
+
+2. Verify that the caller's kernel ids can be mapped to userspace ids in the
+ filesystem's idmapping::
+
+ from_kuid(u0:k0:r4294967295, k21000) = u1000
+
+So the ownership that lands on disk will be ``u1000``.
+
+Example 4 reconsidered
+~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ file id: u1000
+ caller idmapping: u0:k10000:r10000
+ filesystem idmapping: u0:k0:r4294967295
+ mount idmapping: u0:k10000:r10000
+
+In order to report ownership to userspace the kernel now does three steps using
+the translation algorithm we introduced earlier:
+
+1. Map the userspace id on disk down into a kernel id in the filesystem's
+ idmapping::
+
+ make_kuid(u0:k0:r4294967295, u1000) = k1000
+
+2. Translate the kernel id into a kernel id in the mount's idmapping::
+
+ i_uid_into_mnt(k1000):
+ /* Map the kernel id up into a userspace id in the filesystem's idmapping. */
+ from_kuid(u0:k0:r4294967295, k1000) = u1000
+
+ /* Map the userspace id down into a kernel id in the mounts's idmapping. */
+ make_kuid(u0:k10000:r10000, u1000) = k11000
+
+3. Map the kernel id up into a userspace id in the caller's idmapping::
+
+ from_kuid(u0:k10000:r10000, k11000) = u1000
+
+Earlier, the caller's kernel id couldn't be crossmapped in the filesystems's
+idmapping. With the idmapped mount in place it now can be crossmapped into the
+filesystem's idmapping via the mount's idmapping. The file will now be created
+with ``u1000`` according to the mount's idmapping.
+
+Example 5 reconsidered
+~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ file id: u1000
+ caller idmapping: u0:k10000:r10000
+ filesystem idmapping: u0:k20000:r10000
+ mount idmapping: u0:k10000:r10000
+
+Again, in order to report ownership to userspace the kernel now does three
+steps using the translation algorithm we introduced earlier:
+
+1. Map the userspace id on disk down into a kernel id in the filesystem's
+ idmapping::
+
+ make_kuid(u0:k20000:r10000, u1000) = k21000
+
+2. Translate the kernel id into a kernel id in the mount's idmapping::
+
+ i_uid_into_mnt(k21000):
+ /* Map the kernel id up into a userspace id in the filesystem's idmapping. */
+ from_kuid(u0:k20000:r10000, k21000) = u1000
+
+ /* Map the userspace id down into a kernel id in the mounts's idmapping. */
+ make_kuid(u0:k10000:r10000, u1000) = k11000
+
+3. Map the kernel id up into a userspace id in the caller's idmapping::
+
+ from_kuid(u0:k10000:r10000, k11000) = u1000
+
+Earlier, the file's kernel id couldn't be crossmapped in the filesystems's
+idmapping. With the idmapped mount in place it now can be crossmapped into the
+filesystem's idmapping via the mount's idmapping. The file is now owned by
+``u1000`` according to the mount's idmapping.
+
+Changing ownership on a home directory
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We've seen above how idmapped mounts can be used to translate between
+idmappings when either the caller, the filesystem or both uses a non-initial
+idmapping. A wide range of usecases exist when the caller is using
+a non-initial idmapping. This mostly happens in the context of containerized
+workloads. The consequence is as we have seen that for both, filesystem's
+mounted with the initial idmapping and filesystems mounted with non-initial
+idmappings, access to the filesystem isn't working because the kernel ids can't
+be crossmapped between the caller's and the filesystem's idmapping.
+
+As we've seen above idmapped mounts provide a solution to this by remapping the
+caller's or filesystem's idmapping according to the mount's idmapping.
+
+Aside from containerized workloads, idmapped mounts have the advantage that
+they also work when both the caller and the filesystem use the initial
+idmapping which means users on the host can change the ownership of directories
+and files on a per-mount basis.
+
+Consider our previous example where a user has their home directory on portable
+storage. At home they have id ``u1000`` and all files in their home directory
+are owned by ``u1000`` whereas at uni or work they have login id ``u1125``.
+
+Taking their home directory with them becomes problematic. They can't easily
+access their files, they might not be able to write to disk without applying
+lax permissions or ACLs and even if they can, they will end up with an annoying
+mix of files and directories owned by ``u1000`` and ``u1125``.
+
+Idmapped mounts allow to solve this problem. A user can create an idmapped
+mount for their home directory on their work computer or their computer at home
+depending on what ownership they would prefer to end up on the portable storage
+itself.
+
+Let's assume they want all files on disk to belong to ``u1000``. When the user
+plugs in their portable storage at their work station they can setup a job that
+creates an idmapped mount with the minimal idmapping ``u1000:k1125:r1``. So now
+when they create a file the kernel performs the following steps we already know
+from above:::
+
+ caller id: u1125
+ caller idmapping: u0:k0:r4294967295
+ filesystem idmapping: u0:k0:r4294967295
+ mount idmapping: u1000:k1125:r1
+
+1. Map the caller's userspace ids into kernel ids in the caller's idmapping::
+
+ make_kuid(u0:k0:r4294967295, u1125) = k1125
+
+2. Translate the caller's kernel id into a kernel id in the filesystem's
+ idmapping::
+
+ mapped_fsuid(k1125):
+ /* Map the kernel id up into a userspace id in the mount's idmapping. */
+ from_kuid(u1000:k1125:r1, k1125) = u1000
+
+ /* Map the userspace id down into a kernel id in the filesystem's idmapping. */
+ make_kuid(u0:k0:r4294967295, u1000) = k1000
+
+2. Verify that the caller's kernel ids can be mapped to userspace ids in the
+ filesystem's idmapping::
+
+ from_kuid(u0:k0:r4294967295, k1000) = u1000
+
+So ultimately the file will be created with ``u1000`` on disk.
+
+Now let's briefly look at what ownership the caller with id ``u1125`` will see
+on their work computer:
+
+::
+
+ file id: u1000
+ caller idmapping: u0:k0:r4294967295
+ filesystem idmapping: u0:k0:r4294967295
+ mount idmapping: u1000:k1125:r1
+
+1. Map the userspace id on disk down into a kernel id in the filesystem's
+ idmapping::
+
+ make_kuid(u0:k0:r4294967295, u1000) = k1000
+
+2. Translate the kernel id into a kernel id in the mount's idmapping::
+
+ i_uid_into_mnt(k1000):
+ /* Map the kernel id up into a userspace id in the filesystem's idmapping. */
+ from_kuid(u0:k0:r4294967295, k1000) = u1000
+
+ /* Map the userspace id down into a kernel id in the mounts's idmapping. */
+ make_kuid(u1000:k1125:r1, u1000) = k1125
+
+3. Map the kernel id up into a userspace id in the caller's idmapping::
+
+ from_kuid(u0:k0:r4294967295, k1125) = u1125
+
+So ultimately the caller will be reported that the file belongs to ``u1125``
+which is the caller's userspace id on their workstation in our example.
+
+The raw userspace id that is put on disk is ``u1000`` so when the user takes
+their home directory back to their home computer where they are assigned
+``u1000`` using the initial idmapping and mount the filesystem with the initial
+idmapping they will see all those files owned by ``u1000``.
+
+Shortcircuting
+--------------
+
+Currently, the implementation of idmapped mounts enforces that the filesystem
+is mounted with the initial idmapping. The reason is simply that none of the
+filesystems that we targeted were mountable with a non-initial idmapping. But
+that might change soon enough. As we've seen above, thanks to the properties of
+idmappings the translation works for both filesystems mounted with the initial
+idmapping and filesystem with non-initial idmappings.
+
+Based on this current restriction to filesystem mounted with the initial
+idmapping two noticeable shortcuts have been taken:
+
+1. We always stash a reference to the initial user namespace in ``struct
+ vfsmount``. Idmapped mounts are thus mounts that have a non-initial user
+ namespace attached to them.
+
+ In order to support idmapped mounts this needs to be changed. Instead of
+ stashing the initial user namespace the user namespace the filesystem was
+ mounted with must be stashed. An idmapped mount is then any mount that has
+ a different user namespace attached then the filesystem was mounted with.
+ This has no user-visible consequences.
+
+2. The translation algorithms in ``mapped_fs*id()`` and ``i_*id_into_mnt()``
+ are simplified.
+
+ Let's consider ``mapped_fs*id()`` first. This function translates the
+ caller's kernel id into a kernel id in the filesystem's idmapping via
+ a mount's idmapping. The full algorithm is::
+
+ mapped_fsuid(kid):
+ /* Map the kernel id up into a userspace id in the mount's idmapping. */
+ from_kuid(mount-idmapping, kid) = uid
+
+ /* Map the userspace id down into a kernel id in the filesystem's idmapping. */
+ make_kuid(filesystem-idmapping, uid) = kuid
+
+ We know that the filesystem is always mounted with the initial idmapping as
+ we enforce this in ``mount_setattr()``. So this can be shortened to::
+
+ mapped_fsuid(kid):
+ /* Map the kernel id up into a userspace id in the mount's idmapping. */
+ from_kuid(mount-idmapping, kid) = uid
+
+ /* Map the userspace id down into a kernel id in the filesystem's idmapping. */
+ KUIDT_INIT(uid) = kuid
+
+ Similarly, for ``i_*id_into_mnt()`` which translated the filesystem's kernel
+ id into a mount's kernel id::
+
+ i_uid_into_mnt(kid):
+ /* Map the kernel id up into a userspace id in the filesystem's idmapping. */
+ from_kuid(filesystem-idmapping, kid) = uid
+
+ /* Map the userspace id down into a kernel id in the mounts's idmapping. */
+ make_kuid(mount-idmapping, uid) = kuid
+
+ Again, we know that the filesystem is always mounted with the initial
+ idmapping as we enforce this in ``mount_setattr()``. So this can be
+ shortened to::
+
+ i_uid_into_mnt(kid):
+ /* Map the kernel id up into a userspace id in the filesystem's idmapping. */
+ __kuid_val(kid) = uid
+
+ /* Map the userspace id down into a kernel id in the mounts's idmapping. */
+ make_kuid(mount-idmapping, uid) = kuid
+
+Handling filesystems mounted with non-initial idmappings requires that the
+translation functions be converted to their full form. They can still be
+shortcircuited on non-idmapped mounts. This has no user-visible consequences.
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 246af51b277a..1a2dd4d35717 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -34,6 +34,7 @@ algorithms work.
quota
seq_file
sharedsubtree
+ idmappings
automount-support
@@ -72,7 +73,7 @@ Documentation for filesystem implementations.
befs
bfs
btrfs
- cifs/cifsroot
+ cifs/index
ceph
coda
configfs
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 2183fd8cc350..2a75dd5da7b5 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -271,19 +271,19 @@ prototypes::
locking rules:
All except set_page_dirty and freepage may block
-====================== ======================== =========
-ops PageLocked(page) i_rwsem
-====================== ======================== =========
+====================== ======================== ========= ===============
+ops PageLocked(page) i_rwsem invalidate_lock
+====================== ======================== ========= ===============
writepage: yes, unlocks (see below)
-readpage: yes, unlocks
+readpage: yes, unlocks shared
writepages:
set_page_dirty no
-readahead: yes, unlocks
-readpages: no
+readahead: yes, unlocks shared
+readpages: no shared
write_begin: locks the page exclusive
write_end: yes, unlocks exclusive
bmap:
-invalidatepage: yes
+invalidatepage: yes exclusive
releasepage: yes
freepage: yes
direct_IO:
@@ -295,7 +295,7 @@ is_partially_uptodate: yes
error_remove_page: yes
swap_activate: no
swap_deactivate: no
-====================== ======================== =========
+====================== ======================== ========= ===============
->write_begin(), ->write_end() and ->readpage() may be called from
the request handler (/dev/loop).
@@ -378,7 +378,10 @@ keep it that way and don't breed new callers.
->invalidatepage() is called when the filesystem must attempt to drop
some or all of the buffers from the page when it is being truncated. It
returns zero on success. If ->invalidatepage is zero, the kernel uses
-block_invalidatepage() instead.
+block_invalidatepage() instead. The filesystem must exclusively acquire
+invalidate_lock before invalidating page cache in truncate / hole punch path
+(and thus calling into ->invalidatepage) to block races between page cache
+invalidation and page cache filling functions (fault, read, ...).
->releasepage() is called when the kernel is about to try to drop the
buffers from the page in preparation for freeing it. It returns zero to
@@ -506,6 +509,7 @@ prototypes::
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
+ int (*iopoll) (struct kiocb *kiocb, bool spin);
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
@@ -518,12 +522,6 @@ prototypes::
int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
- ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
- loff_t *);
- ssize_t (*writev) (struct file *, const struct iovec *, unsigned long,
- loff_t *);
- ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t,
- void __user *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t,
loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long,
@@ -536,6 +534,14 @@ prototypes::
size_t, unsigned int);
int (*setlease)(struct file *, long, struct file_lock **, void **);
long (*fallocate)(struct file *, int, loff_t, loff_t);
+ void (*show_fdinfo)(struct seq_file *m, struct file *f);
+ unsigned (*mmap_capabilities)(struct file *);
+ ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
+ loff_t, size_t, unsigned int);
+ loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags);
+ int (*fadvise)(struct file *, loff_t, loff_t, int);
locking rules:
All may block.
@@ -570,6 +576,25 @@ in sys_read() and friends.
the lease within the individual filesystem to record the result of the
operation
+->fallocate implementation must be really careful to maintain page cache
+consistency when punching holes or performing other operations that invalidate
+page cache contents. Usually the filesystem needs to call
+truncate_inode_pages_range() to invalidate relevant range of the page cache.
+However the filesystem usually also needs to update its internal (and on disk)
+view of file offset -> disk block mapping. Until this update is finished, the
+filesystem needs to block page faults and reads from reloading now-stale page
+cache contents from the disk. Since VFS acquires mapping->invalidate_lock in
+shared mode when loading pages from disk (filemap_fault(), filemap_read(),
+readahead paths), the fallocate implementation must take the invalidate_lock to
+prevent reloading.
+
+->copy_file_range and ->remap_file_range implementations need to serialize
+against modifications of file data while the operation is running. For
+blocking changes through write(2) and similar operations inode->i_rwsem can be
+used. To block changes to file contents via a memory mapping during the
+operation, the filesystem must take mapping->invalidate_lock to coordinate
+with ->page_mkwrite.
+
dquot_operations
================
@@ -627,11 +652,11 @@ pfn_mkwrite: yes
access: yes
============= ========= ===========================
-->fault() is called when a previously not present pte is about
-to be faulted in. The filesystem must find and return the page associated
-with the passed in "pgoff" in the vm_fault structure. If it is possible that
-the page may be truncated and/or invalidated, then the filesystem must lock
-the page, then ensure it is not already truncated (the page lock will block
+->fault() is called when a previously not present pte is about to be faulted
+in. The filesystem must find and return the page associated with the passed in
+"pgoff" in the vm_fault structure. If it is possible that the page may be
+truncated and/or invalidated, then the filesystem must lock invalidate_lock,
+then ensure the page is not already truncated (invalidate_lock will block
subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
locked. The VM will unlock the page.
@@ -644,12 +669,14 @@ page table entry. Pointer to entry associated with the page is passed in
"pte" field in vm_fault structure. Pointers to entries for other offsets
should be calculated relative to "pte".
-->page_mkwrite() is called when a previously read-only pte is
-about to become writeable. The filesystem again must ensure that there are
-no truncate/invalidate races, and then return with the page locked. If
-the page has been truncated, the filesystem should not look up a new page
-like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
-will cause the VM to retry the fault.
+->page_mkwrite() is called when a previously read-only pte is about to become
+writeable. The filesystem again must ensure that there are no
+truncate/invalidate races or races with operations such as ->remap_file_range
+or ->copy_file_range, and then return with the page locked. Usually
+mapping->invalidate_lock is suitable for proper serialization. If the page has
+been truncated, the filesystem should not look up a new page like the ->fault()
+handler, but simply return with VM_FAULT_NOPAGE, which will cause the VM to
+retry the fault.
->pfn_mkwrite() is the same as page_mkwrite but when the pte is
VM_PFNMAP or VM_MIXEDMAP with a page-less entry. Expected return is
diff --git a/Documentation/filesystems/mandatory-locking.rst b/Documentation/filesystems/mandatory-locking.rst
deleted file mode 100644
index 9ce73544a8f0..000000000000
--- a/Documentation/filesystems/mandatory-locking.rst
+++ /dev/null
@@ -1,188 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-=====================================================
-Mandatory File Locking For The Linux Operating System
-=====================================================
-
- Andy Walker <andy@lysaker.kvaerner.no>
-
- 15 April 1996
-
- (Updated September 2007)
-
-0. Why you should avoid mandatory locking
------------------------------------------
-
-The Linux implementation is prey to a number of difficult-to-fix race
-conditions which in practice make it not dependable:
-
- - The write system call checks for a mandatory lock only once
- at its start. It is therefore possible for a lock request to
- be granted after this check but before the data is modified.
- A process may then see file data change even while a mandatory
- lock was held.
- - Similarly, an exclusive lock may be granted on a file after
- the kernel has decided to proceed with a read, but before the
- read has actually completed, and the reading process may see
- the file data in a state which should not have been visible
- to it.
- - Similar races make the claimed mutual exclusion between lock
- and mmap similarly unreliable.
-
-1. What is mandatory locking?
-------------------------------
-
-Mandatory locking is kernel enforced file locking, as opposed to the more usual
-cooperative file locking used to guarantee sequential access to files among
-processes. File locks are applied using the flock() and fcntl() system calls
-(and the lockf() library routine which is a wrapper around fcntl().) It is
-normally a process' responsibility to check for locks on a file it wishes to
-update, before applying its own lock, updating the file and unlocking it again.
-The most commonly used example of this (and in the case of sendmail, the most
-troublesome) is access to a user's mailbox. The mail user agent and the mail
-transfer agent must guard against updating the mailbox at the same time, and
-prevent reading the mailbox while it is being updated.
-
-In a perfect world all processes would use and honour a cooperative, or
-"advisory" locking scheme. However, the world isn't perfect, and there's
-a lot of poorly written code out there.
-
-In trying to address this problem, the designers of System V UNIX came up
-with a "mandatory" locking scheme, whereby the operating system kernel would
-block attempts by a process to write to a file that another process holds a
-"read" -or- "shared" lock on, and block attempts to both read and write to a
-file that a process holds a "write " -or- "exclusive" lock on.
-
-The System V mandatory locking scheme was intended to have as little impact as
-possible on existing user code. The scheme is based on marking individual files
-as candidates for mandatory locking, and using the existing fcntl()/lockf()
-interface for applying locks just as if they were normal, advisory locks.
-
-.. Note::
-
- 1. In saying "file" in the paragraphs above I am actually not telling
- the whole truth. System V locking is based on fcntl(). The granularity of
- fcntl() is such that it allows the locking of byte ranges in files, in
- addition to entire files, so the mandatory locking rules also have byte
- level granularity.
-
- 2. POSIX.1 does not specify any scheme for mandatory locking, despite
- borrowing the fcntl() locking scheme from System V. The mandatory locking
- scheme is defined by the System V Interface Definition (SVID) Version 3.
-
-2. Marking a file for mandatory locking
----------------------------------------
-
-A file is marked as a candidate for mandatory locking by setting the group-id
-bit in its file mode but removing the group-execute bit. This is an otherwise
-meaningless combination, and was chosen by the System V implementors so as not
-to break existing user programs.
-
-Note that the group-id bit is usually automatically cleared by the kernel when
-a setgid file is written to. This is a security measure. The kernel has been
-modified to recognize the special case of a mandatory lock candidate and to
-refrain from clearing this bit. Similarly the kernel has been modified not
-to run mandatory lock candidates with setgid privileges.
-
-3. Available implementations
-----------------------------
-
-I have considered the implementations of mandatory locking available with
-SunOS 4.1.x, Solaris 2.x and HP-UX 9.x.
-
-Generally I have tried to make the most sense out of the behaviour exhibited
-by these three reference systems. There are many anomalies.
-
-All the reference systems reject all calls to open() for a file on which
-another process has outstanding mandatory locks. This is in direct
-contravention of SVID 3, which states that only calls to open() with the
-O_TRUNC flag set should be rejected. The Linux implementation follows the SVID
-definition, which is the "Right Thing", since only calls with O_TRUNC can
-modify the contents of the file.
-
-HP-UX even disallows open() with O_TRUNC for a file with advisory locks, not
-just mandatory locks. That would appear to contravene POSIX.1.
-
-mmap() is another interesting case. All the operating systems mentioned
-prevent mandatory locks from being applied to an mmap()'ed file, but HP-UX
-also disallows advisory locks for such a file. SVID actually specifies the
-paranoid HP-UX behaviour.
-
-In my opinion only MAP_SHARED mappings should be immune from locking, and then
-only from mandatory locks - that is what is currently implemented.
-
-SunOS is so hopeless that it doesn't even honour the O_NONBLOCK flag for
-mandatory locks, so reads and writes to locked files always block when they
-should return EAGAIN.
-
-I'm afraid that this is such an esoteric area that the semantics described
-below are just as valid as any others, so long as the main points seem to
-agree.
-
-4. Semantics
-------------
-
-1. Mandatory locks can only be applied via the fcntl()/lockf() locking
- interface - in other words the System V/POSIX interface. BSD style
- locks using flock() never result in a mandatory lock.
-
-2. If a process has locked a region of a file with a mandatory read lock, then
- other processes are permitted to read from that region. If any of these
- processes attempts to write to the region it will block until the lock is
- released, unless the process has opened the file with the O_NONBLOCK
- flag in which case the system call will return immediately with the error
- status EAGAIN.
-
-3. If a process has locked a region of a file with a mandatory write lock, all
- attempts to read or write to that region block until the lock is released,
- unless a process has opened the file with the O_NONBLOCK flag in which case
- the system call will return immediately with the error status EAGAIN.
-
-4. Calls to open() with O_TRUNC, or to creat(), on a existing file that has
- any mandatory locks owned by other processes will be rejected with the
- error status EAGAIN.
-
-5. Attempts to apply a mandatory lock to a file that is memory mapped and
- shared (via mmap() with MAP_SHARED) will be rejected with the error status
- EAGAIN.
-
-6. Attempts to create a shared memory map of a file (via mmap() with MAP_SHARED)
- that has any mandatory locks in effect will be rejected with the error status
- EAGAIN.
-
-5. Which system calls are affected?
------------------------------------
-
-Those which modify a file's contents, not just the inode. That gives read(),
-write(), readv(), writev(), open(), creat(), mmap(), truncate() and
-ftruncate(). truncate() and ftruncate() are considered to be "write" actions
-for the purposes of mandatory locking.
-
-The affected region is usually defined as stretching from the current position
-for the total number of bytes read or written. For the truncate calls it is
-defined as the bytes of a file removed or added (we must also consider bytes
-added, as a lock can specify just "the whole file", rather than a specific
-range of bytes.)
-
-Note 3: I may have overlooked some system calls that need mandatory lock
-checking in my eagerness to get this code out the door. Please let me know, or
-better still fix the system calls yourself and submit a patch to me or Linus.
-
-6. Warning!
------------
-
-Not even root can override a mandatory lock, so runaway processes can wreak
-havoc if they lock crucial files. The way around it is to change the file
-permissions (remove the setgid bit) before trying to read or write to it.
-Of course, that might be a bit tricky if the system is hung :-(
-
-7. The "mand" mount option
---------------------------
-Mandatory locking is disabled on all filesystems by default, and must be
-administratively enabled by mounting with "-o mand". That mount option
-is only allowed if the mounting task has the CAP_SYS_ADMIN capability.
-
-Since kernel v4.5, it is possible to disable mandatory locking
-altogether by setting CONFIG_MANDATORY_FILE_LOCKING to "n". A kernel
-with this disabled will reject attempts to mount filesystems with the
-"mand" mount option with the error status EPERM.
diff --git a/Documentation/fpga/dfl.rst b/Documentation/fpga/dfl.rst
index 75df90d1e54c..ef9eec71f6f3 100644
--- a/Documentation/fpga/dfl.rst
+++ b/Documentation/fpga/dfl.rst
@@ -10,7 +10,7 @@ Authors:
- Xu Yilun <yilun.xu@intel.com>
The Device Feature List (DFL) FPGA framework (and drivers according to
-this framework) hides the very details of low layer hardwares and provides
+this framework) hides the very details of low layer hardware and provides
unified interfaces to userspace. Applications could use these interfaces to
configure, enumerate, open and access FPGA accelerators on platforms which
implement the DFL in the device memory. Besides this, the DFL framework
@@ -205,7 +205,7 @@ given Device Feature Lists and create platform devices for feature devices
also abstracts operations for the private features and exposes common ops to
feature device drivers.
-The FPGA DFL Device could be different hardwares, e.g. PCIe device, platform
+The FPGA DFL Device could be different hardware, e.g. PCIe device, platform
device and etc. Its driver module is always loaded first once the device is
created by the system. This driver plays an infrastructural role in the
driver architecture. It locates the DFLs in the device memory, handles them
diff --git a/Documentation/gpu/rfc/i915_gem_lmem.rst b/Documentation/gpu/rfc/i915_gem_lmem.rst
index 675ba8620d66..b421a3c1806e 100644
--- a/Documentation/gpu/rfc/i915_gem_lmem.rst
+++ b/Documentation/gpu/rfc/i915_gem_lmem.rst
@@ -18,114 +18,5 @@ real, with all the uAPI bits is:
* Route shmem backend over to TTM SYSTEM for discrete
* TTM purgeable object support
* Move i915 buddy allocator over to TTM
- * MMAP ioctl mode(see `I915 MMAP`_)
- * SET/GET ioctl caching(see `I915 SET/GET CACHING`_)
* Send RFC(with mesa-dev on cc) for final sign off on the uAPI
* Add pciid for DG1 and turn on uAPI for real
-
-New object placement and region query uAPI
-==========================================
-Starting from DG1 we need to give userspace the ability to allocate buffers from
-device local-memory. Currently the driver supports gem_create, which can place
-buffers in system memory via shmem, and the usual assortment of other
-interfaces, like dumb buffers and userptr.
-
-To support this new capability, while also providing a uAPI which will work
-beyond just DG1, we propose to offer three new bits of uAPI:
-
-DRM_I915_QUERY_MEMORY_REGIONS
------------------------------
-New query ID which allows userspace to discover the list of supported memory
-regions(like system-memory and local-memory) for a given device. We identify
-each region with a class and instance pair, which should be unique. The class
-here would be DEVICE or SYSTEM, and the instance would be zero, on platforms
-like DG1.
-
-Side note: The class/instance design is borrowed from our existing engine uAPI,
-where we describe every physical engine in terms of its class, and the
-particular instance, since we can have more than one per class.
-
-In the future we also want to expose more information which can further
-describe the capabilities of a region.
-
-.. kernel-doc:: include/uapi/drm/i915_drm.h
- :functions: drm_i915_gem_memory_class drm_i915_gem_memory_class_instance drm_i915_memory_region_info drm_i915_query_memory_regions
-
-GEM_CREATE_EXT
---------------
-New ioctl which is basically just gem_create but now allows userspace to provide
-a chain of possible extensions. Note that if we don't provide any extensions and
-set flags=0 then we get the exact same behaviour as gem_create.
-
-Side note: We also need to support PXP[1] in the near future, which is also
-applicable to integrated platforms, and adds its own gem_create_ext extension,
-which basically lets userspace mark a buffer as "protected".
-
-.. kernel-doc:: include/uapi/drm/i915_drm.h
- :functions: drm_i915_gem_create_ext
-
-I915_GEM_CREATE_EXT_MEMORY_REGIONS
-----------------------------------
-Implemented as an extension for gem_create_ext, we would now allow userspace to
-optionally provide an immutable list of preferred placements at creation time,
-in priority order, for a given buffer object. For the placements we expect
-them each to use the class/instance encoding, as per the output of the regions
-query. Having the list in priority order will be useful in the future when
-placing an object, say during eviction.
-
-.. kernel-doc:: include/uapi/drm/i915_drm.h
- :functions: drm_i915_gem_create_ext_memory_regions
-
-One fair criticism here is that this seems a little over-engineered[2]. If we
-just consider DG1 then yes, a simple gem_create.flags or something is totally
-all that's needed to tell the kernel to allocate the buffer in local-memory or
-whatever. However looking to the future we need uAPI which can also support
-upcoming Xe HP multi-tile architecture in a sane way, where there can be
-multiple local-memory instances for a given device, and so using both class and
-instance in our uAPI to describe regions is desirable, although specifically
-for DG1 it's uninteresting, since we only have a single local-memory instance.
-
-Existing uAPI issues
-====================
-Some potential issues we still need to resolve.
-
-I915 MMAP
----------
-In i915 there are multiple ways to MMAP GEM object, including mapping the same
-object using different mapping types(WC vs WB), i.e multiple active mmaps per
-object. TTM expects one MMAP at most for the lifetime of the object. If it
-turns out that we have to backpedal here, there might be some potential
-userspace fallout.
-
-I915 SET/GET CACHING
---------------------
-In i915 we have set/get_caching ioctl. TTM doesn't let us to change this, but
-DG1 doesn't support non-snooped pcie transactions, so we can just always
-allocate as WB for smem-only buffers. If/when our hw gains support for
-non-snooped pcie transactions then we must fix this mode at allocation time as
-a new GEM extension.
-
-This is related to the mmap problem, because in general (meaning, when we're
-not running on intel cpus) the cpu mmap must not, ever, be inconsistent with
-allocation mode.
-
-Possible idea is to let the kernel picks the mmap mode for userspace from the
-following table:
-
-smem-only: WB. Userspace does not need to call clflush.
-
-smem+lmem: We only ever allow a single mode, so simply allocate this as uncached
-memory, and always give userspace a WC mapping. GPU still does snooped access
-here(assuming we can't turn it off like on DG1), which is a bit inefficient.
-
-lmem only: always WC
-
-This means on discrete you only get a single mmap mode, all others must be
-rejected. That's probably going to be a new default mode or something like
-that.
-
-Links
-=====
-[1] https://patchwork.freedesktop.org/series/86798/
-
-[2] https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5599#note_553791
diff --git a/Documentation/hwmon/aquacomputer_d5next.rst b/Documentation/hwmon/aquacomputer_d5next.rst
new file mode 100644
index 000000000000..1f4bb4ba2e4b
--- /dev/null
+++ b/Documentation/hwmon/aquacomputer_d5next.rst
@@ -0,0 +1,61 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+Kernel driver aquacomputer-d5next
+=================================
+
+Supported devices:
+
+* Aquacomputer D5 Next watercooling pump
+
+Author: Aleksa Savic
+
+Description
+-----------
+
+This driver exposes hardware sensors of the Aquacomputer D5 Next watercooling
+pump, which communicates through a proprietary USB HID protocol.
+
+Available sensors are pump and fan speed, power, voltage and current, as
+well as coolant temperature. Also available through debugfs are the serial
+number, firmware version and power-on count.
+
+Attaching a fan is optional and allows it to be controlled using temperature
+curves directly from the pump. If it's not connected, the fan-related sensors
+will report zeroes.
+
+The pump can be configured either through software or via its physical
+interface. Configuring the pump through this driver is not implemented, as it
+seems to require sending it a complete configuration. That includes addressable
+RGB LEDs, for which there is no standard sysfs interface. Thus, that task is
+better suited for userspace tools.
+
+Usage notes
+-----------
+
+The pump communicates via HID reports. The driver is loaded automatically by
+the kernel and supports hotswapping.
+
+Sysfs entries
+-------------
+
+============ =============================================
+temp1_input Coolant temperature (in millidegrees Celsius)
+fan1_input Pump speed (in RPM)
+fan2_input Fan speed (in RPM)
+power1_input Pump power (in micro Watts)
+power2_input Fan power (in micro Watts)
+in0_input Pump voltage (in milli Volts)
+in1_input Fan voltage (in milli Volts)
+in2_input +5V rail voltage (in milli Volts)
+curr1_input Pump current (in milli Amperes)
+curr2_input Fan current (in milli Amperes)
+============ =============================================
+
+Debugfs entries
+---------------
+
+================ ===============================================
+serial_number Serial number of the pump
+firmware_version Version of installed firmware
+power_cycles Count of how many times the pump was powered on
+================ ===============================================
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index bc01601ea81a..f790f1260c33 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -39,6 +39,7 @@ Hardware Monitoring Kernel Drivers
adt7475
aht10
amc6821
+ aquacomputer_d5next
asb100
asc7621
aspeed-pwm-tacho
@@ -160,6 +161,7 @@ Hardware Monitoring Kernel Drivers
pwm-fan
q54sj108a2
raspberrypi-hwmon
+ sbrmi
sbtsi_temp
sch5627
sch5636
diff --git a/Documentation/hwmon/sbrmi.rst b/Documentation/hwmon/sbrmi.rst
new file mode 100644
index 000000000000..296049e13ac9
--- /dev/null
+++ b/Documentation/hwmon/sbrmi.rst
@@ -0,0 +1,79 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+Kernel driver sbrmi
+===================
+
+Supported hardware:
+
+ * Sideband Remote Management Interface (SB-RMI) compliant AMD SoC
+ device connected to the BMC via the APML.
+
+ Prefix: 'sbrmi'
+
+ Addresses scanned: This driver doesn't support address scanning.
+
+ To instantiate this driver on an AMD CPU with SB-RMI
+ support, the i2c bus number would be the bus connected from the board
+ management controller (BMC) to the CPU.
+ The SMBus address is really 7 bits. Some vendors and the SMBus
+ specification show the address as 8 bits, left justified with the R/W
+ bit as a write (0) making bit 0. Some vendors use only the 7 bits
+ to describe the address.
+ As mentioned in AMD's APML specification, The SB-RMI address is
+ normally 78h(0111 100W) or 3Ch(011 1100) for socket 0 and 70h(0111 000W)
+ or 38h(011 1000) for socket 1, but it could vary based on hardware
+ address select pins.
+
+ Datasheet: The SB-RMI interface and protocol along with the Advanced
+ Platform Management Link (APML) Specification is available
+ as part of the open source SoC register reference at:
+
+ https://www.amd.com/en/support/tech-docs?keyword=55898
+
+Author: Akshay Gupta <akshay.gupta@amd.com>
+
+Description
+-----------
+
+The APML provides a way to communicate with the SB Remote Management interface
+(SB-RMI) module from the external SMBus master that can be used to report socket
+power on AMD platforms using mailbox command and resembles a typical 8-pin remote
+power sensor's I2C interface to BMC.
+
+This driver implements current power with power cap and power cap max.
+
+sysfs-Interface
+---------------
+Power sensors can be queried and set via the standard ``hwmon`` interface
+on ``sysfs``, under the directory ``/sys/class/hwmon/hwmonX`` for some value
+of ``X`` (search for the ``X`` such that ``/sys/class/hwmon/hwmonX/name`` has
+content ``sbrmi``)
+
+================ ===== ========================================================
+Name Perm Description
+================ ===== ========================================================
+power1_input RO Current Power consumed
+power1_cap RW Power limit can be set between 0 and power1_cap_max
+power1_cap_max RO Maximum powerlimit calculated and reported by the SMU FW
+================ ===== ========================================================
+
+The following example show how the 'Power' attribute from the i2c-addresses
+can be monitored using the userspace utilities like ``sensors`` binary::
+
+ # sensors
+ sbrmi-i2c-1-38
+ Adapter: bcm2835 I2C adapter
+ power1: 61.00 W (cap = 225.00 W)
+
+ sbrmi-i2c-1-3c
+ Adapter: bcm2835 I2C adapter
+ power1: 28.39 W (cap = 224.77 W)
+ #
+
+Also, Below shows how get and set the values from sysfs entries individually::
+ # cat /sys/class/hwmon/hwmon1/power1_cap_max
+ 225000000
+
+ # echo 180000000 > /sys/class/hwmon/hwmon1/power1_cap
+ # cat /sys/class/hwmon/hwmon1/power1_cap
+ 180000000
diff --git a/Documentation/hwmon/scpi-hwmon.rst b/Documentation/hwmon/scpi-hwmon.rst
index eee7022b44db..1e3f83ec0658 100644
--- a/Documentation/hwmon/scpi-hwmon.rst
+++ b/Documentation/hwmon/scpi-hwmon.rst
@@ -32,5 +32,5 @@ Usage Notes
The driver relies on device tree node to indicate the presence of SCPI
support in the kernel. See
-Documentation/devicetree/bindings/arm/arm,scpi.txt for details of the
+Documentation/devicetree/bindings/firmware/arm,scpi.yaml for details of the
devicetree node.
diff --git a/Documentation/hwmon/sht4x.rst b/Documentation/hwmon/sht4x.rst
index 3b37abcd4a46..c318e5582ead 100644
--- a/Documentation/hwmon/sht4x.rst
+++ b/Documentation/hwmon/sht4x.rst
@@ -42,4 +42,4 @@ humidity1_input Measured humidity in %H
update_interval The minimum interval for polling the sensor,
in milliseconds. Writable. Must be at least
2000.
-============== =============================================
+=============== ============================================
diff --git a/Documentation/i2c/index.rst b/Documentation/i2c/index.rst
index 8b76217e370a..6270f1fd7d4e 100644
--- a/Documentation/i2c/index.rst
+++ b/Documentation/i2c/index.rst
@@ -17,6 +17,7 @@ Introduction
busses/index
i2c-topology
muxes/i2c-mux-gpio
+ i2c-sysfs
Writing device drivers
======================
diff --git a/Documentation/leds/well-known-leds.txt b/Documentation/leds/well-known-leds.txt
new file mode 100644
index 000000000000..4a8b9dc4bf52
--- /dev/null
+++ b/Documentation/leds/well-known-leds.txt
@@ -0,0 +1,58 @@
+-*- org -*-
+
+It is somehow important to provide consistent interface to the
+userland. LED devices have one problem there, and that is naming of
+directories in /sys/class/leds. It would be nice if userland would
+just know right "name" for given LED function, but situation got more
+complex.
+
+Anyway, if backwards compatibility is not an issue, new code should
+use one of the "good" names from this list, and you should extend the
+list where applicable.
+
+Legacy names are listed, too; in case you are writing application that
+wants to use particular feature, you should probe for good name, first,
+but then try the legacy ones, too.
+
+Notice there's a list of functions in include/dt-bindings/leds/common.h .
+
+* Keyboards
+
+Good: "input*:*:capslock"
+Good: "input*:*:scrolllock"
+Good: "input*:*:numlock"
+Legacy: "shift-key-light" (Motorola Droid 4, capslock)
+
+Set of common keyboard LEDs, going back to PC AT or so.
+
+Legacy: "tpacpi::thinklight" (IBM/Lenovo Thinkpads)
+Legacy: "lp5523:kb{1,2,3,4,5,6}" (Nokia N900)
+
+Frontlight/backlight of main keyboard.
+
+Legacy: "button-backlight" (Motorola Droid 4)
+
+Some phones have touch buttons below screen; it is different from main
+keyboard. And this is their backlight.
+
+* Sound subsystem
+
+Good: "platform:*:mute"
+Good: "platform:*:micmute"
+
+LEDs on notebook body, indicating that sound input / output is muted.
+
+* System notification
+
+Legacy: "status-led:{red,green,blue}" (Motorola Droid 4)
+Legacy: "lp5523:{r,g,b}" (Nokia N900)
+
+Phones usually have multi-color status LED.
+
+* Power management
+
+Good: "platform:*:charging" (allwinner sun50i)
+
+* Screen
+
+Good: ":backlight" (Motorola Droid 4)
diff --git a/Documentation/networking/batman-adv.rst b/Documentation/networking/batman-adv.rst
index 74821d29a22f..b85563ea3682 100644
--- a/Documentation/networking/batman-adv.rst
+++ b/Documentation/networking/batman-adv.rst
@@ -157,7 +157,7 @@ Contact
Please send us comments, experiences, questions, anything :)
IRC:
- #batman on irc.freenode.org
+ #batadv on ircs://irc.hackint.org/
Mailing-list:
b.a.t.m.a.n@open-mesh.org (optional subscription at
https://lists.open-mesh.org/mailman3/postorius/lists/b.a.t.m.a.n.lists.open-mesh.org/)
diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst
index 62f2aab8eaec..31cfd7d674a6 100644
--- a/Documentation/networking/bonding.rst
+++ b/Documentation/networking/bonding.rst
@@ -501,6 +501,18 @@ fail_over_mac
This option was added in bonding version 3.2.0. The "follow"
policy was added in bonding version 3.3.0.
+lacp_active
+ Option specifying whether to send LACPDU frames periodically.
+
+ off or 0
+ LACPDU frames acts as "speak when spoken to".
+
+ on or 1
+ LACPDU frames are sent along the configured links
+ periodically. See lacp_rate for more details.
+
+ The default is on.
+
lacp_rate
Option specifying the rate in which we'll ask our link partner
diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/index.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/index.rst
index ee40fcc5ddff..62f4a4aff6ec 100644
--- a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/index.rst
+++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/index.rst
@@ -9,3 +9,4 @@ DPAA2 Documentation
dpio-driver
ethernet-driver
mac-phy-support
+ switch-driver
diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst
new file mode 100644
index 000000000000..8bf411b857d4
--- /dev/null
+++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst
@@ -0,0 +1,217 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+===================
+DPAA2 Switch driver
+===================
+
+:Copyright: |copy| 2021 NXP
+
+The DPAA2 Switch driver probes on the Datapath Switch (DPSW) object which can
+be instantiated on the following DPAA2 SoCs and their variants: LS2088A and
+LX2160A.
+
+The driver uses the switch device driver model and exposes each switch port as
+a network interface, which can be included in a bridge or used as a standalone
+interface. Traffic switched between ports is offloaded into the hardware.
+
+The DPSW can have ports connected to DPNIs or to DPMACs for external access.
+::
+
+ [ethA] [ethB] [ethC] [ethD] [ethE] [ethF]
+ : : : : : :
+ : : : : : :
+ [dpaa2-eth] [dpaa2-eth] [ dpaa2-switch ]
+ : : : : : : kernel
+ =============================================================================
+ : : : : : : hardware
+ [DPNI] [DPNI] [============= DPSW =================]
+ | | | | | |
+ | ---------- | [DPMAC] [DPMAC]
+ ------------------------------- | |
+ | |
+ [PHY] [PHY]
+
+Creating an Ethernet Switch
+===========================
+
+The dpaa2-switch driver probes on DPSW devices found on the fsl-mc bus. These
+devices can be either created statically through the boot time configuration
+file - DataPath Layout (DPL) - or at runtime using the DPAA2 object APIs
+(incorporated already into the restool userspace tool).
+
+At the moment, the dpaa2-switch driver imposes the following restrictions on
+the DPSW object that it will probe:
+
+ * The minimum number of FDBs should be at least equal to the number of switch
+ interfaces. This is necessary so that separation of switch ports can be
+ done, ie when not under a bridge, each switch port will have its own FDB.
+ ::
+
+ fsl_dpaa2_switch dpsw.0: The number of FDBs is lower than the number of ports, cannot probe
+
+ * Both the broadcast and flooding configuration should be per FDB. This
+ enables the driver to restrict the broadcast and flooding domains of each
+ FDB depending on the switch ports that are sharing it (aka are under the
+ same bridge).
+ ::
+
+ fsl_dpaa2_switch dpsw.0: Flooding domain is not per FDB, cannot probe
+ fsl_dpaa2_switch dpsw.0: Broadcast domain is not per FDB, cannot probe
+
+ * The control interface of the switch should not be disabled
+ (DPSW_OPT_CTRL_IF_DIS not passed as a create time option). Without the
+ control interface, the driver is not capable to provide proper Rx/Tx traffic
+ support on the switch port netdevices.
+ ::
+
+ fsl_dpaa2_switch dpsw.0: Control Interface is disabled, cannot probe
+
+Besides the configuration of the actual DPSW object, the dpaa2-switch driver
+will need the following DPAA2 objects:
+
+ * 1 DPMCP - A Management Command Portal object is needed for any interraction
+ with the MC firmware.
+
+ * 1 DPBP - A Buffer Pool is used for seeding buffers intended for the Rx path
+ on the control interface.
+
+ * Access to at least one DPIO object (Software Portal) is needed for any
+ enqueue/dequeue operation to be performed on the control interface queues.
+ The DPIO object will be shared, no need for a private one.
+
+Switching features
+==================
+
+The driver supports the configuration of L2 forwarding rules in hardware for
+port bridging as well as standalone usage of the independent switch interfaces.
+
+The hardware is not configurable with respect to VLAN awareness, thus any DPAA2
+switch port should be used only in usecases with a VLAN aware bridge::
+
+ $ ip link add dev br0 type bridge vlan_filtering 1
+
+ $ ip link add dev br1 type bridge
+ $ ip link set dev ethX master br1
+ Error: fsl_dpaa2_switch: Cannot join a VLAN-unaware bridge
+
+Topology and loop detection through STP is supported when ``stp_state 1`` is
+used at bridge create ::
+
+ $ ip link add dev br0 type bridge vlan_filtering 1 stp_state 1
+
+L2 FDB manipulation (add/delete/dump) is supported.
+
+HW FDB learning can be configured on each switch port independently through
+bridge commands. When the HW learning is disabled, a fast age procedure will be
+run and any previously learnt addresses will be removed.
+::
+
+ $ bridge link set dev ethX learning off
+ $ bridge link set dev ethX learning on
+
+Restricting the unknown unicast and multicast flooding domain is supported, but
+not independently of each other::
+
+ $ ip link set dev ethX type bridge_slave flood off mcast_flood off
+ $ ip link set dev ethX type bridge_slave flood off mcast_flood on
+ Error: fsl_dpaa2_switch: Cannot configure multicast flooding independently of unicast.
+
+Broadcast flooding on a switch port can be disabled/enabled through the brport sysfs::
+
+ $ echo 0 > /sys/bus/fsl-mc/devices/dpsw.Y/net/ethX/brport/broadcast_flood
+
+Offloads
+========
+
+Routing actions (redirect, trap, drop)
+--------------------------------------
+
+The DPAA2 switch is able to offload flow-based redirection of packets making
+use of ACL tables. Shared filter blocks are supported by sharing a single ACL
+table between multiple ports.
+
+The following flow keys are supported:
+
+ * Ethernet: dst_mac/src_mac
+ * IPv4: dst_ip/src_ip/ip_proto/tos
+ * VLAN: vlan_id/vlan_prio/vlan_tpid/vlan_dei
+ * L4: dst_port/src_port
+
+Also, the matchall filter can be used to redirect the entire traffic received
+on a port.
+
+As per flow actions, the following are supported:
+
+ * drop
+ * mirred egress redirect
+ * trap
+
+Each ACL entry (filter) can be setup with only one of the listed
+actions.
+
+Example 1: send frames received on eth4 with a SA of 00:01:02:03:04:05 to the
+CPU::
+
+ $ tc qdisc add dev eth4 clsact
+ $ tc filter add dev eth4 ingress flower src_mac 00:01:02:03:04:05 skip_sw action trap
+
+Example 2: drop frames received on eth4 with VID 100 and PCP of 3::
+
+ $ tc filter add dev eth4 ingress protocol 802.1q flower skip_sw vlan_id 100 vlan_prio 3 action drop
+
+Example 3: redirect all frames received on eth4 to eth1::
+
+ $ tc filter add dev eth4 ingress matchall action mirred egress redirect dev eth1
+
+Example 4: Use a single shared filter block on both eth5 and eth6::
+
+ $ tc qdisc add dev eth5 ingress_block 1 clsact
+ $ tc qdisc add dev eth6 ingress_block 1 clsact
+ $ tc filter add block 1 ingress flower dst_mac 00:01:02:03:04:04 skip_sw \
+ action trap
+ $ tc filter add block 1 ingress protocol ipv4 flower src_ip 192.168.1.1 skip_sw \
+ action mirred egress redirect dev eth3
+
+Mirroring
+~~~~~~~~~
+
+The DPAA2 switch supports only per port mirroring and per VLAN mirroring.
+Adding mirroring filters in shared blocks is also supported.
+
+When using the tc-flower classifier with the 802.1q protocol, only the
+''vlan_id'' key will be accepted. Mirroring based on any other fields from the
+802.1q protocol will be rejected::
+
+ $ tc qdisc add dev eth8 ingress_block 1 clsact
+ $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_prio 3 action mirred egress mirror dev eth6
+ Error: fsl_dpaa2_switch: Only matching on VLAN ID supported.
+ We have an error talking to the kernel
+
+If a mirroring VLAN filter is requested on a port, the VLAN must to be
+installed on the switch port in question either using ''bridge'' or by creating
+a VLAN upper device if the switch port is used as a standalone interface::
+
+ $ tc qdisc add dev eth8 ingress_block 1 clsact
+ $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_id 200 action mirred egress mirror dev eth6
+ Error: VLAN must be installed on the switch port.
+ We have an error talking to the kernel
+
+ $ bridge vlan add vid 200 dev eth8
+ $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_id 200 action mirred egress mirror dev eth6
+
+ $ ip link add link eth8 name eth8.200 type vlan id 200
+ $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_id 200 action mirred egress mirror dev eth6
+
+Also, it should be noted that the mirrored traffic will be subject to the same
+egress restrictions as any other traffic. This means that when a mirrored
+packet will reach the mirror port, if the VLAN found in the packet is not
+installed on the port it will get dropped.
+
+The DPAA2 switch supports only a single mirroring destination, thus multiple
+mirror rules can be installed but their ''to'' port has to be the same::
+
+ $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_id 200 action mirred egress mirror dev eth6
+ $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_id 100 action mirred egress mirror dev eth7
+ Error: fsl_dpaa2_switch: Multiple mirror ports not supported.
+ We have an error talking to the kernel
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
index ef8cb62e82a1..4b59cf2c599f 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
@@ -656,3 +656,47 @@ Bridge offloads tracepoints:
$ cat /sys/kernel/debug/tracing/trace
...
ip-5387 [000] ...1 573713: mlx5_esw_bridge_vport_cleanup: vport_num=1
+
+Eswitch QoS tracepoints:
+
+- mlx5_esw_vport_qos_create: trace creation of transmit scheduler arbiter for vport::
+
+ $ echo mlx5:mlx5_esw_vport_qos_create >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-23496 [018] .... 73136.838831: mlx5_esw_vport_qos_create: (0000:82:00.0) vport=2 tsar_ix=4 bw_share=0, max_rate=0 group=000000007b576bb3
+
+- mlx5_esw_vport_qos_config: trace configuration of transmit scheduler arbiter for vport::
+
+ $ echo mlx5:mlx5_esw_vport_qos_config >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-26548 [023] .... 75754.223823: mlx5_esw_vport_qos_config: (0000:82:00.0) vport=1 tsar_ix=3 bw_share=34, max_rate=10000 group=000000007b576bb3
+
+- mlx5_esw_vport_qos_destroy: trace deletion of transmit scheduler arbiter for vport::
+
+ $ echo mlx5:mlx5_esw_vport_qos_destroy >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-27418 [004] .... 76546.680901: mlx5_esw_vport_qos_destroy: (0000:82:00.0) vport=1 tsar_ix=3
+
+- mlx5_esw_group_qos_create: trace creation of transmit scheduler arbiter for rate group::
+
+ $ echo mlx5:mlx5_esw_group_qos_create >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-26578 [008] .... 75776.022112: mlx5_esw_group_qos_create: (0000:82:00.0) group=000000008dac63ea tsar_ix=5
+
+- mlx5_esw_group_qos_config: trace configuration of transmit scheduler arbiter for rate group::
+
+ $ echo mlx5:mlx5_esw_group_qos_config >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-27303 [020] .... 76461.455356: mlx5_esw_group_qos_config: (0000:82:00.0) group=000000008dac63ea tsar_ix=5 bw_share=100 max_rate=20000
+
+- mlx5_esw_group_qos_destroy: trace deletion of transmit scheduler arbiter for group::
+
+ $ echo mlx5:mlx5_esw_group_qos_destroy >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-27418 [006] .... 76547.187258: mlx5_esw_group_qos_destroy: (0000:82:00.0) group=000000007b576bb3 tsar_ix=1
diff --git a/Documentation/networking/devlink/devlink-params.rst b/Documentation/networking/devlink/devlink-params.rst
index 54c9f107c4b0..4878907e9232 100644
--- a/Documentation/networking/devlink/devlink-params.rst
+++ b/Documentation/networking/devlink/devlink-params.rst
@@ -97,6 +97,18 @@ own name.
* - ``enable_roce``
- Boolean
- Enable handling of RoCE traffic in the device.
+ * - ``enable_eth``
+ - Boolean
+ - When enabled, the device driver will instantiate Ethernet specific
+ auxiliary device of the devlink device.
+ * - ``enable_rdma``
+ - Boolean
+ - When enabled, the device driver will instantiate RDMA specific
+ auxiliary device of the devlink device.
+ * - ``enable_vnet``
+ - Boolean
+ - When enabled, the device driver will instantiate VDPA networking
+ specific auxiliary device of the devlink device.
* - ``internal_err_reset``
- Boolean
- When enabled, the device driver will reset the device on internal
diff --git a/Documentation/networking/devlink/hns3.rst b/Documentation/networking/devlink/hns3.rst
new file mode 100644
index 000000000000..4562a6e4782f
--- /dev/null
+++ b/Documentation/networking/devlink/hns3.rst
@@ -0,0 +1,25 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+hns3 devlink support
+====================
+
+This document describes the devlink features implemented by the ``hns3``
+device driver.
+
+The ``hns3`` driver supports reloading via ``DEVLINK_CMD_RELOAD``.
+
+Info versions
+=============
+
+The ``hns3`` driver reports the following versions
+
+.. list-table:: devlink info versions implemented
+ :widths: 10 10 80
+
+ * - Name
+ - Type
+ - Description
+ * - ``fw``
+ - running
+ - Used to represent the firmware version.
diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst
index b3b9e0692088..45b5f8b341df 100644
--- a/Documentation/networking/devlink/index.rst
+++ b/Documentation/networking/devlink/index.rst
@@ -34,6 +34,7 @@ parameters, info versions, and other features it supports.
:maxdepth: 1
bnxt
+ hns3
ionic
ice
mlx4
@@ -42,7 +43,6 @@ parameters, info versions, and other features it supports.
mv88e6xxx
netdevsim
nfp
- sja1105
qed
ti-cpsw-switch
am65-nuss-cpsw-switch
diff --git a/Documentation/networking/devlink/sja1105.rst b/Documentation/networking/devlink/sja1105.rst
deleted file mode 100644
index e2679c274085..000000000000
--- a/Documentation/networking/devlink/sja1105.rst
+++ /dev/null
@@ -1,49 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-=======================
-sja1105 devlink support
-=======================
-
-This document describes the devlink features implemented
-by the ``sja1105`` device driver.
-
-Parameters
-==========
-
-.. list-table:: Driver-specific parameters implemented
- :widths: 5 5 5 85
-
- * - Name
- - Type
- - Mode
- - Description
- * - ``best_effort_vlan_filtering``
- - Boolean
- - runtime
- - Allow plain ETH_P_8021Q headers to be used as DSA tags.
-
- Benefits:
-
- - Can terminate untagged traffic over switch net
- devices even when enslaved to a bridge with
- vlan_filtering=1.
- - Can terminate VLAN-tagged traffic over switch net
- devices even when enslaved to a bridge with
- vlan_filtering=1, with some constraints (no more than
- 7 non-pvid VLANs per user port).
- - Can do QoS based on VLAN PCP and VLAN membership
- admission control for autonomously forwarded frames
- (regardless of whether they can be terminated on the
- CPU or not).
-
- Drawbacks:
-
- - User cannot use VLANs in range 1024-3071. If the
- switch receives frames with such VIDs, it will
- misinterpret them as DSA tags.
- - Switch uses Shared VLAN Learning (FDB lookup uses
- only DMAC as key).
- - When VLANs span cross-chip topologies, the total
- number of permitted VLANs may be less than 7 per
- port, due to a maximum number of 32 VLAN retagging
- rules per switch.
diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index 20baacf2bc5c..89bb4fa4c362 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -200,19 +200,6 @@ receive all frames regardless of the value of the MAC DA. This can be done by
setting the ``promisc_on_master`` property of the ``struct dsa_device_ops``.
Note that this assumes a DSA-unaware master driver, which is the norm.
-Hardware manufacturers are strongly discouraged to do this, but some tagging
-protocols might not provide source port information on RX for all packets, but
-e.g. only for control traffic (link-local PDUs). In this case, by implementing
-the ``filter`` method of ``struct dsa_device_ops``, the tagger might select
-which packets are to be redirected on RX towards the virtual DSA user network
-interfaces, and which are to be left in the DSA master's RX data path.
-
-It might also happen (although silicon vendors are strongly discouraged to
-produce hardware like this) that a tagging protocol splits the switch-specific
-information into a header portion and a tail portion, therefore not falling
-cleanly into any of the above 3 categories. DSA does not support this
-configuration.
-
Master network devices
----------------------
@@ -663,6 +650,22 @@ Bridge layer
CPU port, and flooding towards the CPU port should also be enabled, due to a
lack of an explicit address filtering mechanism in the DSA core.
+- ``port_bridge_tx_fwd_offload``: bridge layer function invoked after
+ ``port_bridge_join`` when a driver sets ``ds->num_fwd_offloading_bridges`` to
+ a non-zero value. Returning success in this function activates the TX
+ forwarding offload bridge feature for this port, which enables the tagging
+ protocol driver to inject data plane packets towards the bridging domain that
+ the port is a part of. Data plane packets are subject to FDB lookup, hardware
+ learning on the CPU port, and do not override the port STP state.
+ Additionally, replication of data plane packets (multicast, flooding) is
+ handled in hardware and the bridge driver will transmit a single skb for each
+ packet that needs replication. The method is provided as a configuration
+ point for drivers that need to configure the hardware for enabling this
+ feature.
+
+- ``port_bridge_tx_fwd_unoffload``: bridge layer function invoken when a driver
+ leaves a bridge port which had the TX forwarding offload feature enabled.
+
Bridge VLAN filtering
---------------------
diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst
index da4057ba37f1..564caeebe2b2 100644
--- a/Documentation/networking/dsa/sja1105.rst
+++ b/Documentation/networking/dsa/sja1105.rst
@@ -65,199 +65,6 @@ If that changed setting can be transmitted to the switch through the dynamic
reconfiguration interface, it is; otherwise the switch is reset and
reprogrammed with the updated static configuration.
-Traffic support
-===============
-
-The switches do not have hardware support for DSA tags, except for "slow
-protocols" for switch control as STP and PTP. For these, the switches have two
-programmable filters for link-local destination MACs.
-These are used to trap BPDUs and PTP traffic to the master netdevice, and are
-further used to support STP and 1588 ordinary clock/boundary clock
-functionality. For frames trapped to the CPU, source port and switch ID
-information is encoded by the hardware into the frames.
-
-But by leveraging ``CONFIG_NET_DSA_TAG_8021Q`` (a software-defined DSA tagging
-format based on VLANs), general-purpose traffic termination through the network
-stack can be supported under certain circumstances.
-
-Depending on VLAN awareness state, the following operating modes are possible
-with the switch:
-
-- Mode 1 (VLAN-unaware): a port is in this mode when it is used as a standalone
- net device, or when it is enslaved to a bridge with ``vlan_filtering=0``.
-- Mode 2 (fully VLAN-aware): a port is in this mode when it is enslaved to a
- bridge with ``vlan_filtering=1``. Access to the entire VLAN range is given to
- the user through ``bridge vlan`` commands, but general-purpose (anything
- other than STP, PTP etc) traffic termination is not possible through the
- switch net devices. The other packets can be still by user space processed
- through the DSA master interface (similar to ``DSA_TAG_PROTO_NONE``).
-- Mode 3 (best-effort VLAN-aware): a port is in this mode when enslaved to a
- bridge with ``vlan_filtering=1``, and the devlink property of its parent
- switch named ``best_effort_vlan_filtering`` is set to ``true``. When
- configured like this, the range of usable VIDs is reduced (0 to 1023 and 3072
- to 4094), so is the number of usable VIDs (maximum of 7 non-pvid VLANs per
- port*), and shared VLAN learning is performed (FDB lookup is done only by
- DMAC, not also by VID).
-
-To summarize, in each mode, the following types of traffic are supported over
-the switch net devices:
-
-+-------------+-----------+--------------+------------+
-| | Mode 1 | Mode 2 | Mode 3 |
-+=============+===========+==============+============+
-| Regular | Yes | No | Yes |
-| traffic | | (use master) | |
-+-------------+-----------+--------------+------------+
-| Management | Yes | Yes | Yes |
-| traffic | | | |
-| (BPDU, PTP) | | | |
-+-------------+-----------+--------------+------------+
-
-To configure the switch to operate in Mode 3, the following steps can be
-followed::
-
- ip link add dev br0 type bridge
- # swp2 operates in Mode 1 now
- ip link set dev swp2 master br0
- # swp2 temporarily moves to Mode 2
- ip link set dev br0 type bridge vlan_filtering 1
- [ 61.204770] sja1105 spi0.1: Reset switch and programmed static config. Reason: VLAN filtering
- [ 61.239944] sja1105 spi0.1: Disabled switch tagging
- # swp3 now operates in Mode 3
- devlink dev param set spi/spi0.1 name best_effort_vlan_filtering value true cmode runtime
- [ 64.682927] sja1105 spi0.1: Reset switch and programmed static config. Reason: VLAN filtering
- [ 64.711925] sja1105 spi0.1: Enabled switch tagging
- # Cannot use VLANs in range 1024-3071 while in Mode 3.
- bridge vlan add dev swp2 vid 1025 untagged pvid
- RTNETLINK answers: Operation not permitted
- bridge vlan add dev swp2 vid 100
- bridge vlan add dev swp2 vid 101 untagged
- bridge vlan
- port vlan ids
- swp5 1 PVID Egress Untagged
-
- swp2 1 PVID Egress Untagged
- 100
- 101 Egress Untagged
-
- swp3 1 PVID Egress Untagged
-
- swp4 1 PVID Egress Untagged
-
- br0 1 PVID Egress Untagged
- bridge vlan add dev swp2 vid 102
- bridge vlan add dev swp2 vid 103
- bridge vlan add dev swp2 vid 104
- bridge vlan add dev swp2 vid 105
- bridge vlan add dev swp2 vid 106
- bridge vlan add dev swp2 vid 107
- # Cannot use mode than 7 VLANs per port while in Mode 3.
- [ 3885.216832] sja1105 spi0.1: No more free subvlans
-
-\* "maximum of 7 non-pvid VLANs per port": Decoding VLAN-tagged packets on the
-CPU in mode 3 is possible through VLAN retagging of packets that go from the
-switch to the CPU. In cross-chip topologies, the port that goes to the CPU
-might also go to other switches. In that case, those other switches will see
-only a retagged packet (which only has meaning for the CPU). So if they are
-interested in this VLAN, they need to apply retagging in the reverse direction,
-to recover the original value from it. This consumes extra hardware resources
-for this switch. There is a maximum of 32 entries in the Retagging Table of
-each switch device.
-
-As an example, consider this cross-chip topology::
-
- +-------------------------------------------------+
- | Host SoC |
- | +-------------------------+ |
- | | DSA master for embedded | |
- | | switch (non-sja1105) | |
- | +--------+-------------------------+--------+ |
- | | embedded L2 switch | |
- | | | |
- | | +--------------+ +--------------+ | |
- | | |DSA master for| |DSA master for| | |
- | | | SJA1105 1 | | SJA1105 2 | | |
- +--+---+--------------+-----+--------------+---+--+
-
- +-----------------------+ +-----------------------+
- | SJA1105 switch 1 | | SJA1105 switch 2 |
- +-----+-----+-----+-----+ +-----+-----+-----+-----+
- |sw1p0|sw1p1|sw1p2|sw1p3| |sw2p0|sw2p1|sw2p2|sw2p3|
- +-----+-----+-----+-----+ +-----+-----+-----+-----+
-
-To reach the CPU, SJA1105 switch 1 (spi/spi2.1) uses the same port as is uses
-to reach SJA1105 switch 2 (spi/spi2.2), which would be port 4 (not drawn).
-Similarly for SJA1105 switch 2.
-
-Also consider the following commands, that add VLAN 100 to every sja1105 user
-port::
-
- devlink dev param set spi/spi2.1 name best_effort_vlan_filtering value true cmode runtime
- devlink dev param set spi/spi2.2 name best_effort_vlan_filtering value true cmode runtime
- ip link add dev br0 type bridge
- for port in sw1p0 sw1p1 sw1p2 sw1p3 \
- sw2p0 sw2p1 sw2p2 sw2p3; do
- ip link set dev $port master br0
- done
- ip link set dev br0 type bridge vlan_filtering 1
- for port in sw1p0 sw1p1 sw1p2 sw1p3 \
- sw2p0 sw2p1 sw2p2; do
- bridge vlan add dev $port vid 100
- done
- ip link add link br0 name br0.100 type vlan id 100 && ip link set dev br0.100 up
- ip addr add 192.168.100.3/24 dev br0.100
- bridge vlan add dev br0 vid 100 self
-
- bridge vlan
- port vlan ids
- sw1p0 1 PVID Egress Untagged
- 100
-
- sw1p1 1 PVID Egress Untagged
- 100
-
- sw1p2 1 PVID Egress Untagged
- 100
-
- sw1p3 1 PVID Egress Untagged
- 100
-
- sw2p0 1 PVID Egress Untagged
- 100
-
- sw2p1 1 PVID Egress Untagged
- 100
-
- sw2p2 1 PVID Egress Untagged
- 100
-
- sw2p3 1 PVID Egress Untagged
-
- br0 1 PVID Egress Untagged
- 100
-
-SJA1105 switch 1 consumes 1 retagging entry for each VLAN on each user port
-towards the CPU. It also consumes 1 retagging entry for each non-pvid VLAN that
-it is also interested in, which is configured on any port of any neighbor
-switch.
-
-In this case, SJA1105 switch 1 consumes a total of 11 retagging entries, as
-follows:
-
-- 8 retagging entries for VLANs 1 and 100 installed on its user ports
- (``sw1p0`` - ``sw1p3``)
-- 3 retagging entries for VLAN 100 installed on the user ports of SJA1105
- switch 2 (``sw2p0`` - ``sw2p2``), because it also has ports that are
- interested in it. The VLAN 1 is a pvid on SJA1105 switch 2 and does not need
- reverse retagging.
-
-SJA1105 switch 2 also consumes 11 retagging entries, but organized as follows:
-
-- 7 retagging entries for the bridge VLANs on its user ports (``sw2p0`` -
- ``sw2p3``).
-- 4 retagging entries for VLAN 100 installed on the user ports of SJA1105
- switch 1 (``sw1p0`` - ``sw1p3``).
-
Switching features
==================
@@ -282,33 +89,10 @@ untagged), and therefore this mode is also supported.
Segregating the switch ports in multiple bridges is supported (e.g. 2 + 2), but
all bridges should have the same level of VLAN awareness (either both have
-``vlan_filtering`` 0, or both 1). Also an inevitable limitation of the fact
-that VLAN awareness is global at the switch level is that once a bridge with
-``vlan_filtering`` enslaves at least one switch port, the other un-bridged
-ports are no longer available for standalone traffic termination.
+``vlan_filtering`` 0, or both 1).
Topology and loop detection through STP is supported.
-L2 FDB manipulation (add/delete/dump) is currently possible for the first
-generation devices. Aging time of FDB entries, as well as enabling fully static
-management (no address learning and no flooding of unknown traffic) is not yet
-configurable in the driver.
-
-A special comment about bridging with other netdevices (illustrated with an
-example):
-
-A board has eth0, eth1, swp0@eth1, swp1@eth1, swp2@eth1, swp3@eth1.
-The switch ports (swp0-3) are under br0.
-It is desired that eth0 is turned into another switched port that communicates
-with swp0-3.
-
-If br0 has vlan_filtering 0, then eth0 can simply be added to br0 with the
-intended results.
-If br0 has vlan_filtering 1, then a new br1 interface needs to be created that
-enslaves eth0 and eth1 (the DSA master of the switch ports). This is because in
-this mode, the switch ports beneath br0 are not capable of regular traffic, and
-are only used as a conduit for switchdev operations.
-
Offloads
========
diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index c86628e6a235..d9b55b7a1a4d 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -595,6 +595,14 @@ Link extended substates:
that is not formally
supported, which led to
signal integrity issues
+
+ ``ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST`` The external clock signal for
+ SerDes is too weak or
+ unavailable.
+
+ ``ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS`` The received signal for
+ SerDes is too weak because
+ analog loss of signal.
================================================================= =============================
Cable issue substates:
@@ -939,12 +947,25 @@ Kernel response contents:
``ETHTOOL_A_COALESCE_TX_USECS_HIGH`` u32 delay (us), high Tx
``ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH`` u32 max packets, high Tx
``ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL`` u32 rate sampling interval
+ ``ETHTOOL_A_COALESCE_USE_CQE_TX`` bool timer reset mode, Tx
+ ``ETHTOOL_A_COALESCE_USE_CQE_RX`` bool timer reset mode, Rx
=========================================== ====== =======================
Attributes are only included in reply if their value is not zero or the
corresponding bit in ``ethtool_ops::supported_coalesce_params`` is set (i.e.
they are declared as supported by driver).
+Timer reset mode (``ETHTOOL_A_COALESCE_USE_CQE_TX`` and
+``ETHTOOL_A_COALESCE_USE_CQE_RX``) controls the interaction between packet
+arrival and the various time based delay parameters. By default timers are
+expected to limit the max delay between any packet arrival/departure and a
+corresponding interrupt. In this mode timer should be started by packet
+arrival (sometimes delivery of previous interrupt) and reset when interrupt
+is delivered.
+Setting the appropriate attribute to 1 will enable ``CQE`` mode, where
+each packet event resets the timer. In this mode timer is used to force
+the interrupt if queue goes idle, while busy queues depend on the packet
+limit to trigger interrupts.
COALESCE_SET
============
@@ -977,6 +998,8 @@ Request contents:
``ETHTOOL_A_COALESCE_TX_USECS_HIGH`` u32 delay (us), high Tx
``ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH`` u32 max packets, high Tx
``ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL`` u32 rate sampling interval
+ ``ETHTOOL_A_COALESCE_USE_CQE_TX`` bool timer reset mode, Tx
+ ``ETHTOOL_A_COALESCE_USE_CQE_RX`` bool timer reset mode, Rx
=========================================== ====== =======================
Request is rejected if it attributes declared as unsupported by driver (i.e.
diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst
index 3e2221f4abe4..ce2b8e8bb9ab 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -320,13 +320,6 @@ Examples for low-level BPF:
ret #-1
drop: ret #0
-**(Accelerated) VLAN w/ id 10**::
-
- ld vlan_tci
- jneq #10, drop
- ret #-1
- drop: ret #0
-
**icmp random packet sampling, 1 in 4**::
ldh [12]
@@ -358,6 +351,22 @@ Examples for low-level BPF:
bad: ret #0 /* SECCOMP_RET_KILL_THREAD */
good: ret #0x7fff0000 /* SECCOMP_RET_ALLOW */
+Examples for low-level BPF extension:
+
+**Packet for interface index 13**::
+
+ ld ifidx
+ jneq #13, drop
+ ret #-1
+ drop: ret #0
+
+**(Accelerated) VLAN w/ id 10**::
+
+ ld vlan_tci
+ jneq #10, drop
+ ret #-1
+ drop: ret #0
+
The above example code can be placed into a file (here called "foo"), and
then be passed to the bpf_asm tool for generating opcodes, output that xt_bpf
and cls_bpf understands and can directly be loaded with. Example with above
@@ -629,8 +638,8 @@ extension, PTP dissector/classifier, and much more. They are all internally
converted by the kernel into the new instruction set representation and run
in the eBPF interpreter. For in-kernel handlers, this all works transparently
by using bpf_prog_create() for setting up the filter, resp.
-bpf_prog_destroy() for destroying it. The macro
-BPF_PROG_RUN(filter, ctx) transparently invokes eBPF interpreter or JITed
+bpf_prog_destroy() for destroying it. The function
+bpf_prog_run(filter, ctx) transparently invokes eBPF interpreter or JITed
code to run the filter. 'filter' is a pointer to struct bpf_prog that we
got from bpf_prog_create(), and 'ctx' the given context (e.g.
skb pointer). All constraints and restrictions from bpf_check_classic() apply
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index e9ce55992aa9..58bc8cd367c6 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -57,6 +57,7 @@ Contents:
gen_stats
gtp
ila
+ ioam6-sysctl
ipddp
ip_dynaddr
ipsec
@@ -68,6 +69,7 @@ Contents:
l2tp
lapb-module
mac80211-injection
+ mctp
mpls-sysctl
mptcp-sysctl
multiqueue
diff --git a/Documentation/networking/ioam6-sysctl.rst b/Documentation/networking/ioam6-sysctl.rst
new file mode 100644
index 000000000000..c18cab2c481a
--- /dev/null
+++ b/Documentation/networking/ioam6-sysctl.rst
@@ -0,0 +1,26 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+IOAM6 Sysfs variables
+=====================
+
+
+/proc/sys/net/conf/<iface>/ioam6_* variables:
+=============================================
+
+ioam6_enabled - BOOL
+ Accept (= enabled) or ignore (= disabled) IPv6 IOAM options on ingress
+ for this interface.
+
+ * 0 - disabled (default)
+ * 1 - enabled
+
+ioam6_id - SHORT INTEGER
+ Define the IOAM id of this interface.
+
+ Default is ~0.
+
+ioam6_id_wide - INTEGER
+ Define the wide IOAM id of this interface.
+
+ Default is ~0.
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 316c7dfa9693..d91ab28718d4 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1926,6 +1926,23 @@ fib_notify_on_flag_change - INTEGER
- 1 - Emit notifications.
- 2 - Emit notifications only for RTM_F_OFFLOAD_FAILED flag change.
+ioam6_id - INTEGER
+ Define the IOAM id of this node. Uses only 24 bits out of 32 in total.
+
+ Min: 0
+ Max: 0xFFFFFF
+
+ Default: 0xFFFFFF
+
+ioam6_id_wide - LONG INTEGER
+ Define the wide IOAM id of this node. Uses only 56 bits out of 64 in
+ total. Can be different from ioam6_id.
+
+ Min: 0
+ Max: 0xFFFFFFFFFFFFFF
+
+ Default: 0xFFFFFFFFFFFFFF
+
IPv6 Fragmentation:
ip6frag_high_thresh - INTEGER
diff --git a/Documentation/networking/mctp.rst b/Documentation/networking/mctp.rst
new file mode 100644
index 000000000000..6100cdc220f6
--- /dev/null
+++ b/Documentation/networking/mctp.rst
@@ -0,0 +1,213 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================================
+Management Component Transport Protocol (MCTP)
+==============================================
+
+net/mctp/ contains protocol support for MCTP, as defined by DMTF standard
+DSP0236. Physical interface drivers ("bindings" in the specification) are
+provided in drivers/net/mctp/.
+
+The core code provides a socket-based interface to send and receive MCTP
+messages, through an AF_MCTP, SOCK_DGRAM socket.
+
+Structure: interfaces & networks
+================================
+
+The kernel models the local MCTP topology through two items: interfaces and
+networks.
+
+An interface (or "link") is an instance of an MCTP physical transport binding
+(as defined by DSP0236, section 3.2.47), likely connected to a specific hardware
+device. This is represented as a ``struct netdevice``.
+
+A network defines a unique address space for MCTP endpoints by endpoint-ID
+(described by DSP0236, section 3.2.31). A network has a user-visible identifier
+to allow references from userspace. Route definitions are specific to one
+network.
+
+Interfaces are associated with one network. A network may be associated with one
+or more interfaces.
+
+If multiple networks are present, each may contain endpoint IDs (EIDs) that are
+also present on other networks.
+
+Sockets API
+===========
+
+Protocol definitions
+--------------------
+
+MCTP uses ``AF_MCTP`` / ``PF_MCTP`` for the address- and protocol- families.
+Since MCTP is message-based, only ``SOCK_DGRAM`` sockets are supported.
+
+.. code-block:: C
+
+ int sd = socket(AF_MCTP, SOCK_DGRAM, 0);
+
+The only (current) value for the ``protocol`` argument is 0.
+
+As with all socket address families, source and destination addresses are
+specified with a ``sockaddr`` type, with a single-byte endpoint address:
+
+.. code-block:: C
+
+ typedef __u8 mctp_eid_t;
+
+ struct mctp_addr {
+ mctp_eid_t s_addr;
+ };
+
+ struct sockaddr_mctp {
+ unsigned short int smctp_family;
+ int smctp_network;
+ struct mctp_addr smctp_addr;
+ __u8 smctp_type;
+ __u8 smctp_tag;
+ };
+
+ #define MCTP_NET_ANY 0x0
+ #define MCTP_ADDR_ANY 0xff
+
+
+Syscall behaviour
+-----------------
+
+The following sections describe the MCTP-specific behaviours of the standard
+socket system calls. These behaviours have been chosen to map closely to the
+existing sockets APIs.
+
+``bind()`` : set local socket address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sockets that receive incoming request packets will bind to a local address,
+using the ``bind()`` syscall.
+
+.. code-block:: C
+
+ struct sockaddr_mctp addr;
+
+ addr.smctp_family = AF_MCTP;
+ addr.smctp_network = MCTP_NET_ANY;
+ addr.smctp_addr.s_addr = MCTP_ADDR_ANY;
+ addr.smctp_type = MCTP_TYPE_PLDM;
+ addr.smctp_tag = MCTP_TAG_OWNER;
+
+ int rc = bind(sd, (struct sockaddr *)&addr, sizeof(addr));
+
+This establishes the local address of the socket. Incoming MCTP messages that
+match the network, address, and message type will be received by this socket.
+The reference to 'incoming' is important here; a bound socket will only receive
+messages with the TO bit set, to indicate an incoming request message, rather
+than a response.
+
+The ``smctp_tag`` value will configure the tags accepted from the remote side of
+this socket. Given the above, the only valid value is ``MCTP_TAG_OWNER``, which
+will result in remotely "owned" tags being routed to this socket. Since
+``MCTP_TAG_OWNER`` is set, the 3 least-significant bits of ``smctp_tag`` are not
+used; callers must set them to zero.
+
+A ``smctp_network`` value of ``MCTP_NET_ANY`` will configure the socket to
+receive incoming packets from any locally-connected network. A specific network
+value will cause the socket to only receive incoming messages from that network.
+
+The ``smctp_addr`` field specifies a local address to bind to. A value of
+``MCTP_ADDR_ANY`` configures the socket to receive messages addressed to any
+local destination EID.
+
+The ``smctp_type`` field specifies which message types to receive. Only the
+lower 7 bits of the type is matched on incoming messages (ie., the
+most-significant IC bit is not part of the match). This results in the socket
+receiving packets with and without a message integrity check footer.
+
+``sendto()``, ``sendmsg()``, ``send()`` : transmit an MCTP message
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+An MCTP message is transmitted using one of the ``sendto()``, ``sendmsg()`` or
+``send()`` syscalls. Using ``sendto()`` as the primary example:
+
+.. code-block:: C
+
+ struct sockaddr_mctp addr;
+ char buf[14];
+ ssize_t len;
+
+ /* set message destination */
+ addr.smctp_family = AF_MCTP;
+ addr.smctp_network = 0;
+ addr.smctp_addr.s_addr = 8;
+ addr.smctp_tag = MCTP_TAG_OWNER;
+ addr.smctp_type = MCTP_TYPE_ECHO;
+
+ /* arbitrary message to send, with message-type header */
+ buf[0] = MCTP_TYPE_ECHO;
+ memcpy(buf + 1, "hello, world!", sizeof(buf) - 1);
+
+ len = sendto(sd, buf, sizeof(buf), 0,
+ (struct sockaddr_mctp *)&addr, sizeof(addr));
+
+The network and address fields of ``addr`` define the remote address to send to.
+If ``smctp_tag`` has the ``MCTP_TAG_OWNER``, the kernel will ignore any bits set
+in ``MCTP_TAG_VALUE``, and generate a tag value suitable for the destination
+EID. If ``MCTP_TAG_OWNER`` is not set, the message will be sent with the tag
+value as specified. If a tag value cannot be allocated, the system call will
+report an errno of ``EAGAIN``.
+
+The application must provide the message type byte as the first byte of the
+message buffer passed to ``sendto()``. If a message integrity check is to be
+included in the transmitted message, it must also be provided in the message
+buffer, and the most-significant bit of the message type byte must be 1.
+
+The ``sendmsg()`` system call allows a more compact argument interface, and the
+message buffer to be specified as a scatter-gather list. At present no ancillary
+message types (used for the ``msg_control`` data passed to ``sendmsg()``) are
+defined.
+
+Transmitting a message on an unconnected socket with ``MCTP_TAG_OWNER``
+specified will cause an allocation of a tag, if no valid tag is already
+allocated for that destination. The (destination-eid,tag) tuple acts as an
+implicit local socket address, to allow the socket to receive responses to this
+outgoing message. If any previous allocation has been performed (to for a
+different remote EID), that allocation is lost.
+
+Sockets will only receive responses to requests they have sent (with TO=1) and
+may only respond (with TO=0) to requests they have received.
+
+``recvfrom()``, ``recvmsg()``, ``recv()`` : receive an MCTP message
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+An MCTP message can be received by an application using one of the
+``recvfrom()``, ``recvmsg()``, or ``recv()`` system calls. Using ``recvfrom()``
+as the primary example:
+
+.. code-block:: C
+
+ struct sockaddr_mctp addr;
+ socklen_t addrlen;
+ char buf[14];
+ ssize_t len;
+
+ addrlen = sizeof(addr);
+
+ len = recvfrom(sd, buf, sizeof(buf), 0,
+ (struct sockaddr_mctp *)&addr, &addrlen);
+
+ /* We can expect addr to describe an MCTP address */
+ assert(addrlen >= sizeof(buf));
+ assert(addr.smctp_family == AF_MCTP);
+
+ printf("received %zd bytes from remote EID %d\n", rc, addr.smctp_addr);
+
+The address argument to ``recvfrom`` and ``recvmsg`` is populated with the
+remote address of the incoming message, including tag value (this will be needed
+in order to reply to the message).
+
+The first byte of the message buffer will contain the message type byte. If an
+integrity check follows the message, it will be included in the received buffer.
+
+The ``recv()`` system call behaves in a similar way, but does not provide a
+remote address to the application. Therefore, these are only useful if the
+remote address is already known, or the message does not require a reply.
+
+Like the send calls, sockets will only receive responses to requests they have
+sent (TO=1) and may only respond (TO=0) to requests they have received.
diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst
index 76d939e688b8..b0d4da71e68e 100644
--- a/Documentation/networking/mptcp-sysctl.rst
+++ b/Documentation/networking/mptcp-sysctl.rst
@@ -45,3 +45,15 @@ allow_join_initial_addr_port - BOOLEAN
This is a per-namespace sysctl.
Default: 1
+
+stale_loss_cnt - INTEGER
+ The number of MPTCP-level retransmission intervals with no traffic and
+ pending outstanding data on a given subflow required to declare it stale.
+ The packet scheduler ignores stale subflows.
+ A low stale_loss_cnt value allows for fast active-backup switch-over,
+ an high value maximize links utilization on edge scenarios e.g. lossy
+ link with high BER or peer pausing the data processing.
+
+ This is a per-namespace sysctl.
+
+ Default: 4
diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst
index 17bdcb746dcf..9e4cccb90b87 100644
--- a/Documentation/networking/netdevices.rst
+++ b/Documentation/networking/netdevices.rst
@@ -222,6 +222,35 @@ ndo_do_ioctl:
Synchronization: rtnl_lock() semaphore.
Context: process
+ This is only called by network subsystems internally,
+ not by user space calling ioctl as it was in before
+ linux-5.14.
+
+ndo_siocbond:
+ Synchronization: rtnl_lock() semaphore.
+ Context: process
+
+ Used by the bonding driver for the SIOCBOND family of
+ ioctl commands.
+
+ndo_siocwandev:
+ Synchronization: rtnl_lock() semaphore.
+ Context: process
+
+ Used by the drivers/net/wan framework to handle
+ the SIOCWANDEV ioctl with the if_settings structure.
+
+ndo_siocdevprivate:
+ Synchronization: rtnl_lock() semaphore.
+ Context: process
+
+ This is used to implement SIOCDEVPRIVATE ioctl helpers.
+ These should not be added to new drivers, so don't use.
+
+ndo_eth_ioctl:
+ Synchronization: rtnl_lock() semaphore.
+ Context: process
+
ndo_get_stats:
Synchronization: rtnl_lock() semaphore, dev_base_lock rwlock, or RCU.
Context: atomic (can't sleep under rwlock or RCU)
diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst
index d31ed6c1cb0d..34ca762ea56f 100644
--- a/Documentation/networking/nf_conntrack-sysctl.rst
+++ b/Documentation/networking/nf_conntrack-sysctl.rst
@@ -184,6 +184,13 @@ nf_conntrack_gre_timeout_stream - INTEGER (seconds)
This extended timeout will be used in case there is an GRE stream
detected.
+nf_hooks_lwtunnel - BOOLEAN
+ - 0 - disabled (default)
+ - not 0 - enabled
+
+ If this option is enabled, the lightweight tunnel netfilter hooks are
+ enabled. This option cannot be disabled once it is enabled.
+
nf_flowtable_tcp_timeout - INTEGER (seconds)
default 30
@@ -191,19 +198,9 @@ nf_flowtable_tcp_timeout - INTEGER (seconds)
TCP connections may be offloaded from nf conntrack to nf flow table.
Once aged, the connection is returned to nf conntrack with tcp pickup timeout.
-nf_flowtable_tcp_pickup - INTEGER (seconds)
- default 120
-
- TCP connection timeout after being aged from nf flow table offload.
-
nf_flowtable_udp_timeout - INTEGER (seconds)
default 30
Control offload timeout for udp connections.
UDP connections may be offloaded from nf conntrack to nf flow table.
Once aged, the connection is returned to nf conntrack with udp pickup timeout.
-
-nf_flowtable_udp_pickup - INTEGER (seconds)
- default 30
-
- UDP connection timeout after being aged from nf flow table offload.
diff --git a/Documentation/networking/pktgen.rst b/Documentation/networking/pktgen.rst
index 7afa1c9f1183..1225f0f63ff0 100644
--- a/Documentation/networking/pktgen.rst
+++ b/Documentation/networking/pktgen.rst
@@ -248,26 +248,24 @@ Usage:::
-i : ($DEV) output interface/device (required)
-s : ($PKT_SIZE) packet size
- -d : ($DEST_IP) destination IP
+ -d : ($DEST_IP) destination IP. CIDR (e.g. 198.18.0.0/15) is also allowed
-m : ($DST_MAC) destination MAC-addr
+ -p : ($DST_PORT) destination PORT range (e.g. 433-444) is also allowed
-t : ($THREADS) threads to start
+ -f : ($F_THREAD) index of first thread (zero indexed CPU number)
-c : ($SKB_CLONE) SKB clones send before alloc new SKB
+ -n : ($COUNT) num messages to send per thread, 0 means indefinitely
-b : ($BURST) HW level bursting of SKBs
-v : ($VERBOSE) verbose
-x : ($DEBUG) debug
+ -6 : ($IP6) IPv6
+ -w : ($DELAY) Tx Delay value (ns)
+ -a : ($APPEND) Script will not reset generator's state, but will append its config
The global variables being set are also listed. E.g. the required
interface/device parameter "-i" sets variable $DEV. Copy the
pktgen_sampleXX scripts and modify them to fit your own needs.
-The old scripts::
-
- pktgen.conf-1-2 # 1 CPU 2 dev
- pktgen.conf-1-1-rdos # 1 CPU 1 dev w. route DoS
- pktgen.conf-1-1-ip6 # 1 CPU 1 dev ipv6
- pktgen.conf-1-1-ip6-rdos # 1 CPU 1 dev ipv6 w. route DoS
- pktgen.conf-1-1-flows # 1 CPU 1 dev multiple flows.
-
Interrupt affinity
===================
@@ -398,7 +396,7 @@ Current commands and configuration options
References:
- ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/
-- tp://robur.slu.se/pub/Linux/net-development/pktgen-testing/examples/
+- ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/examples/
Paper from Linux-Kongress in Erlangen 2004.
- ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/pktgen_paper.pdf
diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst
index 7db3985359bc..a722eb30e014 100644
--- a/Documentation/networking/timestamping.rst
+++ b/Documentation/networking/timestamping.rst
@@ -625,7 +625,7 @@ interfaces of a DSA switch to share the same PHC.
By design, PTP timestamping with a DSA switch does not need any special
handling in the driver for the host port it is attached to. However, when the
host port also supports PTP timestamping, DSA will take care of intercepting
-the ``.ndo_do_ioctl`` calls towards the host port, and block attempts to enable
+the ``.ndo_eth_ioctl`` calls towards the host port, and block attempts to enable
hardware timestamping on it. This is because the SO_TIMESTAMPING API does not
allow the delivery of multiple hardware timestamps for the same packet, so
anybody else except for the DSA switch port must be prevented from doing so.
@@ -688,7 +688,7 @@ ethtool ioctl operations for them need to be mediated by their respective MAC
driver. Therefore, as opposed to DSA switches, modifications need to be done
to each individual MAC driver for PHY timestamping support. This entails:
-- Checking, in ``.ndo_do_ioctl``, whether ``phy_has_hwtstamp(netdev->phydev)``
+- Checking, in ``.ndo_eth_ioctl``, whether ``phy_has_hwtstamp(netdev->phydev)``
is true or not. If it is, then the MAC driver should not process this request
but instead pass it on to the PHY using ``phy_mii_ioctl()``.
@@ -747,7 +747,7 @@ For example, a typical driver design for TX timestamping might be to split the
transmission part into 2 portions:
1. "TX": checks whether PTP timestamping has been previously enabled through
- the ``.ndo_do_ioctl`` ("``priv->hwtstamp_tx_enabled == true``") and the
+ the ``.ndo_eth_ioctl`` ("``priv->hwtstamp_tx_enabled == true``") and the
current skb requires a TX timestamp ("``skb_shinfo(skb)->tx_flags &
SKBTX_HW_TSTAMP``"). If this is true, it sets the
"``skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS``" flag. Note: as
diff --git a/Documentation/networking/vrf.rst b/Documentation/networking/vrf.rst
index 0dde145043bc..0a9a6f968cb9 100644
--- a/Documentation/networking/vrf.rst
+++ b/Documentation/networking/vrf.rst
@@ -144,6 +144,19 @@ default VRF are only handled by a socket not bound to any VRF::
netfilter rules on the VRF device can be used to limit access to services
running in the default VRF context as well.
+Using VRF-aware applications (applications which simultaneously create sockets
+outside and inside VRFs) in conjunction with ``net.ipv4.tcp_l3mdev_accept=1``
+is possible but may lead to problems in some situations. With that sysctl
+value, it is unspecified which listening socket will be selected to handle
+connections for VRF traffic; ie. either a socket bound to the VRF or an unbound
+socket may be used to accept new connections from a VRF. This somewhat
+unexpected behavior can lead to problems if sockets are configured with extra
+options (ex. TCP MD5 keys) with the expectation that VRF traffic will
+exclusively be handled by sockets bound to VRFs, as would be the case with
+``net.ipv4.tcp_l3mdev_accept=0``. Finally and as a reminder, regardless of
+which listening socket is selected, established sockets will be created in the
+VRF based on the ingress interface, as documented earlier.
+
--------------------------------------------------------------------------------
Using iproute2 for VRFs
diff --git a/Documentation/trace/coresight/coresight-config.rst b/Documentation/trace/coresight/coresight-config.rst
new file mode 100644
index 000000000000..a4e3ef295240
--- /dev/null
+++ b/Documentation/trace/coresight/coresight-config.rst
@@ -0,0 +1,244 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================
+CoreSight System Configuration Manager
+======================================
+
+ :Author: Mike Leach <mike.leach@linaro.org>
+ :Date: October 2020
+
+Introduction
+============
+
+The CoreSight System Configuration manager is an API that allows the
+programming of the CoreSight system with pre-defined configurations that
+can then be easily enabled from sysfs or perf.
+
+Many CoreSight components can be programmed in complex ways - especially ETMs.
+In addition, components can interact across the CoreSight system, often via
+the cross trigger components such as CTI and CTM. These system settings can
+be defined and enabled as named configurations.
+
+
+Basic Concepts
+==============
+
+This section introduces the basic concepts of a CoreSight system configuration.
+
+
+Features
+--------
+
+A feature is a named set of programming for a CoreSight device. The programming
+is device dependent, and can be defined in terms of absolute register values,
+resource usage and parameter values.
+
+The feature is defined using a descriptor. This descriptor is used to load onto
+a matching device, either when the feature is loaded into the system, or when the
+CoreSight device is registered with the configuration manager.
+
+The load process involves interpreting the descriptor into a set of register
+accesses in the driver - the resource usage and parameter descriptions
+translated into appropriate register accesses. This interpretation makes it easy
+and efficient for the feature to be programmed onto the device when required.
+
+The feature will not be active on the device until the feature is enabled, and
+the device itself is enabled. When the device is enabled then enabled features
+will be programmed into the device hardware.
+
+A feature is enabled as part of a configuration being enabled on the system.
+
+
+Parameter Value
+~~~~~~~~~~~~~~~
+
+A parameter value is a named value that may be set by the user prior to the
+feature being enabled that can adjust the behaviour of the operation programmed
+by the feature.
+
+For example, this could be a count value in a programmed operation that repeats
+at a given rate. When the feature is enabled then the current value of the
+parameter is used in programming the device.
+
+The feature descriptor defines a default value for a parameter, which is used
+if the user does not supply a new value.
+
+Users can update parameter values using the configfs API for the CoreSight
+system - which is described below.
+
+The current value of the parameter is loaded into the device when the feature
+is enabled on that device.
+
+
+Configurations
+--------------
+
+A configuration defines a set of features that are to be used in a trace
+session where the configuration is selected. For any trace session only one
+configuration may be selected.
+
+The features defined may be on any type of device that is registered
+to support system configuration. A configuration may select features to be
+enabled on a class of devices - i.e. any ETMv4, or specific devices, e.g. a
+specific CTI on the system.
+
+As with the feature, a descriptor is used to define the configuration.
+This will define the features that must be enabled as part of the configuration
+as well as any preset values that can be used to override default parameter
+values.
+
+
+Preset Values
+~~~~~~~~~~~~~
+
+Preset values are easily selectable sets of parameter values for the features
+that the configuration uses. The number of values in a single preset set, equals
+the sum of parameter values in the features used by the configuration.
+
+e.g. a configuration consists of 3 features, one has 2 parameters, one has
+a single parameter, and another has no parameters. A single preset set will
+therefore have 3 values.
+
+Presets are optionally defined by the configuration, up to 15 can be defined.
+If no preset is selected, then the parameter values defined in the feature
+are used as normal.
+
+
+Operation
+~~~~~~~~~
+
+The following steps take place in the operation of a configuration.
+
+1) In this example, the configuration is 'autofdo', which has an
+ associated feature 'strobing' that works on ETMv4 CoreSight Devices.
+
+2) The configuration is enabled. For example 'perf' may select the
+ configuration as part of its command line::
+
+ perf record -e cs_etm/autofdo/ myapp
+
+ which will enable the 'autofdo' configuration.
+
+3) perf starts tracing on the system. As each ETMv4 that perf uses for
+ trace is enabled, the configuration manager will check if the ETMv4
+ has a feature that relates to the currently active configuration.
+ In this case 'strobing' is enabled & programmed into the ETMv4.
+
+4) When the ETMv4 is disabled, any registers marked as needing to be
+ saved will be read back.
+
+5) At the end of the perf session, the configuration will be disabled.
+
+
+Viewing Configurations and Features
+===================================
+
+The set of configurations and features that are currently loaded into the
+system can be viewed using the configfs API.
+
+Mount configfs as normal and the 'cs-syscfg' subsystem will appear::
+
+ $ ls /config
+ cs-syscfg stp-policy
+
+This has two sub-directories::
+
+ $ cd cs-syscfg/
+ $ ls
+ configurations features
+
+The system has the configuration 'autofdo' built in. It may be examined as
+follows::
+
+ $ cd configurations/
+ $ ls
+ autofdo
+ $ cd autofdo/
+ $ ls
+ description preset1 preset3 preset5 preset7 preset9
+ feature_refs preset2 preset4 preset6 preset8
+ $ cat description
+ Setup ETMs with strobing for autofdo
+ $ cat feature_refs
+ strobing
+
+Each preset declared has a preset<n> subdirectory declared. The values for
+the preset can be examined::
+
+ $ cat preset1/values
+ strobing.window = 0x1388 strobing.period = 0x2
+ $ cat preset2/values
+ strobing.window = 0x1388 strobing.period = 0x4
+
+The features referenced by the configuration can be examined in the features
+directory::
+
+ $ cd ../../features/strobing/
+ $ ls
+ description matches nr_params params
+ $ cat description
+ Generate periodic trace capture windows.
+ parameter 'window': a number of CPU cycles (W)
+ parameter 'period': trace enabled for W cycles every period x W cycles
+ $ cat matches
+ SRC_ETMV4
+ $ cat nr_params
+ 2
+
+Move to the params directory to examine and adjust parameters::
+
+ cd params
+ $ ls
+ period window
+ $ cd period
+ $ ls
+ value
+ $ cat value
+ 0x2710
+ # echo 15000 > value
+ # cat value
+ 0x3a98
+
+Parameters adjusted in this way are reflected in all device instances that have
+loaded the feature.
+
+
+Using Configurations in perf
+============================
+
+The configurations loaded into the CoreSight configuration management are
+also declared in the perf 'cs_etm' event infrastructure so that they can
+be selected when running trace under perf::
+
+ $ ls /sys/devices/cs_etm
+ configurations format perf_event_mux_interval_ms sinks type
+ events nr_addr_filters power
+
+Key directories here are 'configurations' - which lists the loaded
+configurations, and 'events' - a generic perf directory which allows
+selection on the perf command line.::
+
+ $ ls configurations/
+ autofdo
+ $ cat configurations/autofdo
+ 0xa7c3dddd
+
+As with the sinks entries, this provides a hash of the configuration name.
+The entry in the 'events' directory uses perfs built in syntax generator
+to substitute the syntax for the name when evaluating the command::
+
+ $ ls events/
+ autofdo
+ $ cat events/autofdo
+ configid=0xa7c3dddd
+
+The 'autofdo' configuration may be selected on the perf command line::
+
+ $ perf record -e cs_etm/autofdo/u --per-thread <application>
+
+A preset to override the current parameter values can also be selected::
+
+ $ perf record -e cs_etm/autofdo,preset=1/u --per-thread <application>
+
+When configurations are selected in this way, then the trace sink used is
+automatically selected.
diff --git a/Documentation/trace/coresight/coresight.rst b/Documentation/trace/coresight/coresight.rst
index 1ec8dc35b1d8..a15571d96cc8 100644
--- a/Documentation/trace/coresight/coresight.rst
+++ b/Documentation/trace/coresight/coresight.rst
@@ -620,6 +620,19 @@ channels on the CTM (Cross Trigger Matrix).
A separate documentation file is provided to explain the use of these devices.
(Documentation/trace/coresight/coresight-ect.rst) [#fourth]_.
+CoreSight System Configuration
+------------------------------
+
+CoreSight components can be complex devices with many programming options.
+Furthermore, components can be programmed to interact with each other across the
+complete system.
+
+A CoreSight System Configuration manager is provided to allow these complex programming
+configurations to be selected and used easily from perf and sysfs.
+
+See the separate document for further information.
+(Documentation/trace/coresight/coresight-config.rst) [#fifth]_.
+
.. [#first] Documentation/ABI/testing/sysfs-bus-coresight-devices-stm
@@ -628,3 +641,5 @@ A separate documentation file is provided to explain the use of these devices.
.. [#third] https://github.com/Linaro/perf-opencsd
.. [#fourth] Documentation/trace/coresight/coresight-ect.rst
+
+.. [#fifth] Documentation/trace/coresight/coresight-config.rst
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index cfc81e98e0b8..4e5b26f03d5b 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -2762,7 +2762,7 @@ listed in:
put_prev_task_idle
kmem_cache_create
pick_next_task_rt
- get_online_cpus
+ cpus_read_lock
pick_next_task_fair
mutex_lock
[...]
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 1409e40e6345..b7070d76f076 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -160,7 +160,6 @@ Code Seq# Include File Comments
'K' all linux/kd.h
'L' 00-1F linux/loop.h conflict!
'L' 10-1F drivers/scsi/mpt3sas/mpt3sas_ctl.h conflict!
-'L' 20-2F linux/lightnvm.h
'L' E0-FF linux/ppdd.h encrypted disk device driver
<http://linux01.gwdg.de/~alatham/ppdd.html>
'M' all linux/soundcard.h conflict!
diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index d61219889e49..539e9d4a4860 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -263,7 +263,7 @@ Userspace can also add file descriptors to the notifying process via
``ioctl(SECCOMP_IOCTL_NOTIF_ADDFD)``. The ``id`` member of
``struct seccomp_notif_addfd`` should be the same ``id`` as in
``struct seccomp_notif``. The ``newfd_flags`` flag may be used to set flags
-like O_EXEC on the file descriptor in the notifying process. If the supervisor
+like O_CLOEXEC on the file descriptor in the notifying process. If the supervisor
wants to inject the file descriptor with a specific number, the
``SECCOMP_ADDFD_FLAG_SETFD`` flag can be used, and set the ``newfd`` member to
the specific number to use. If that file descriptor is already open in the
diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst
index 7ddd8f667459..5e8ed9eef9aa 100644
--- a/Documentation/userspace-api/spec_ctrl.rst
+++ b/Documentation/userspace-api/spec_ctrl.rst
@@ -106,3 +106,11 @@ Speculation misfeature controls
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);
+
+- PR_SPEC_L1D_FLUSH: Flush L1D Cache on context switch out of the task
+ (works only when tasks run on non SMT cores)
+
+ Invocations:
+ * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_L1D_FLUSH, 0, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_L1D_FLUSH, PR_SPEC_ENABLE, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_L1D_FLUSH, PR_SPEC_DISABLE, 0, 0);
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst
index 35eca377543d..88fa495abbac 100644
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@@ -25,10 +25,10 @@ On x86:
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
-- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock is
- taken inside kvm->arch.mmu_lock, and cannot be taken without already
- holding kvm->arch.mmu_lock (typically with ``read_lock``, otherwise
- there's no need to take kvm->arch.tdp_mmu_pages_lock at all).
+- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock and
+ kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and
+ cannot be taken without already holding kvm->arch.mmu_lock (typically with
+ ``read_lock`` for the TDP MMU, thus the need for additional spinlocks).
Everything else is a leaf: no other lock is taken inside the critical
sections.
diff --git a/Documentation/x86/x86_64/boot-options.rst b/Documentation/x86/x86_64/boot-options.rst
index 5f62b3b86357..ccb7e86bf8d9 100644
--- a/Documentation/x86/x86_64/boot-options.rst
+++ b/Documentation/x86/x86_64/boot-options.rst
@@ -126,7 +126,7 @@ Idle loop
Rebooting
=========
- reboot=b[ios] | t[riple] | k[bd] | a[cpi] | e[fi] [, [w]arm | [c]old]
+ reboot=b[ios] | t[riple] | k[bd] | a[cpi] | e[fi] | p[ci] [, [w]arm | [c]old]
bios
Use the CPU reboot vector for warm reset
warm
@@ -145,6 +145,8 @@ Rebooting
Use efi reset_system runtime service. If EFI is not configured or
the EFI reset does not work, the reboot path attempts the reset using
the keyboard controller.
+ pci
+ Use a write to the PCI config space register 0xcf9 to trigger reboot.
Using warm reset will be much faster especially on big memory
systems because the BIOS will not go through the memory check.
@@ -155,6 +157,13 @@ Rebooting
Don't stop other CPUs on reboot. This can make reboot more reliable
in some cases.
+ reboot=default
+ There are some built-in platform specific "quirks" - you may see:
+ "reboot: <name> series board detected. Selecting <type> for reboots."
+ In the case where you think the quirk is in error (e.g. you have
+ newer BIOS, or newer board) using this option will ignore the built-in
+ quirk table, and use the generic default reboot actions.
+
Non Executable Mappings
=======================
diff --git a/MAINTAINERS b/MAINTAINERS
index b227a96e543e..f6914e5206fd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1316,6 +1316,13 @@ L: linux-media@vger.kernel.org
S: Maintained
F: drivers/media/i2c/aptina-pll.*
+AQUACOMPUTER D5 NEXT PUMP SENSOR DRIVER
+M: Aleksa Savic <savicaleksa83@gmail.com>
+L: linux-hwmon@vger.kernel.org
+S: Maintained
+F: Documentation/hwmon/aquacomputer_d5next.rst
+F: drivers/hwmon/aquacomputer_d5next.c
+
AQUANTIA ETHERNET DRIVER (atlantic)
M: Igor Russkikh <irusskikh@marvell.com>
L: netdev@vger.kernel.org
@@ -2842,7 +2849,7 @@ AS3645A LED FLASH CONTROLLER DRIVER
M: Sakari Ailus <sakari.ailus@iki.fi>
L: linux-leds@vger.kernel.org
S: Maintained
-F: drivers/leds/leds-as3645a.c
+F: drivers/leds/flash/leds-as3645a.c
ASAHI KASEI AK7375 LENS VOICE COIL DRIVER
M: Tianshu Qiu <tian.shu.qiu@intel.com>
@@ -3197,7 +3204,7 @@ S: Maintained
W: https://www.open-mesh.org/
Q: https://patchwork.open-mesh.org/project/batman/list/
B: https://www.open-mesh.org/projects/batman-adv/issues
-C: irc://chat.freenode.net/batman
+C: ircs://irc.hackint.org/batadv
T: git https://git.open-mesh.org/linux-merge.git
F: Documentation/networking/batman-adv.rst
F: include/uapi/linux/batadv_packet.h
@@ -3409,7 +3416,6 @@ F: drivers/net/ethernet/netronome/nfp/bpf/
BPF JIT for POWERPC (32-BIT AND 64-BIT)
M: Naveen N. Rao <naveen.n.rao@linux.ibm.com>
-M: Sandipan Das <sandipan@linux.ibm.com>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
S: Maintained
@@ -3866,6 +3872,16 @@ L: bcm-kernel-feedback-list@broadcom.com
S: Maintained
F: drivers/mtd/nand/raw/brcmnand/
+BROADCOM STB PCIE DRIVER
+M: Jim Quinlan <jim2101024@gmail.com>
+M: Nicolas Saenz Julienne <nsaenz@kernel.org>
+M: Florian Fainelli <f.fainelli@gmail.com>
+M: bcm-kernel-feedback-list@broadcom.com
+L: linux-pci@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml
+F: drivers/pci/controller/pcie-brcmstb.c
+
BROADCOM SYSTEMPORT ETHERNET DRIVER
M: Florian Fainelli <f.fainelli@gmail.com>
L: bcm-kernel-feedback-list@broadcom.com
@@ -4498,7 +4514,7 @@ L: clang-built-linux@googlegroups.com
S: Supported
W: https://clangbuiltlinux.github.io/
B: https://github.com/ClangBuiltLinux/linux/issues
-C: irc://chat.freenode.net/clangbuiltlinux
+C: irc://irc.libera.chat/clangbuiltlinux
F: Documentation/kbuild/llvm.rst
F: include/linux/compiler-clang.h
F: scripts/clang-tools/
@@ -4610,7 +4626,7 @@ F: include/linux/clk/
F: include/linux/of_clk.h
X: drivers/clk/clkdev.c
-COMMON INTERNET FILE SYSTEM (CIFS)
+COMMON INTERNET FILE SYSTEM CLIENT (CIFS)
M: Steve French <sfrench@samba.org>
L: linux-cifs@vger.kernel.org
L: samba-technical@lists.samba.org (moderated for non-subscribers)
@@ -4619,6 +4635,7 @@ W: http://linux-cifs.samba.org/
T: git git://git.samba.org/sfrench/cifs-2.6.git
F: Documentation/admin-guide/cifs/
F: fs/cifs/
+F: fs/cifs_common/
COMPACTPCI HOTPLUG CORE
M: Scott Murray <scott@spiteful.org>
@@ -5684,6 +5701,7 @@ DPAA2 ETHERNET SWITCH DRIVER
M: Ioana Ciornei <ioana.ciornei@nxp.com>
L: netdev@vger.kernel.org
S: Maintained
+F: Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst
F: drivers/net/ethernet/freescale/dpaa2/dpaa2-switch*
F: drivers/net/ethernet/freescale/dpaa2/dpsw*
@@ -6909,6 +6927,12 @@ M: Mark Einon <mark.einon@gmail.com>
S: Odd Fixes
F: drivers/net/ethernet/agere/
+ETAS ES58X CAN/USB DRIVER
+M: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
+L: linux-can@vger.kernel.org
+S: Maintained
+F: drivers/net/can/usb/etas_es58x/
+
ETHERNET BRIDGE
M: Roopa Prabhu <roopa@nvidia.com>
M: Nikolay Aleksandrov <nikolay@nvidia.com>
@@ -6950,7 +6974,7 @@ F: include/uapi/linux/mdio.h
F: include/uapi/linux/mii.h
EXFAT FILE SYSTEM
-M: Namjae Jeon <namjae.jeon@samsung.com>
+M: Namjae Jeon <linkinjeon@kernel.org>
M: Sungjong Seo <sj1557.seo@samsung.com>
L: linux-fsdevel@vger.kernel.org
S: Maintained
@@ -9754,11 +9778,6 @@ M: David Sterba <dsterba@suse.com>
S: Odd Fixes
F: drivers/tty/ipwireless/
-IPX NETWORK LAYER
-L: netdev@vger.kernel.org
-S: Obsolete
-F: include/uapi/linux/ipx.h
-
IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY)
M: Marc Zyngier <maz@kernel.org>
S: Maintained
@@ -10108,6 +10127,17 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git
F: Documentation/dev-tools/kselftest*
F: tools/testing/selftests/
+KERNEL SMB3 SERVER (KSMBD)
+M: Namjae Jeon <linkinjeon@kernel.org>
+M: Sergey Senozhatsky <senozhatsky@chromium.org>
+M: Steve French <sfrench@samba.org>
+M: Hyunchul Lee <hyc.lee@gmail.com>
+L: linux-cifs@vger.kernel.org
+S: Maintained
+T: git git://git.samba.org/ksmbd.git
+F: fs/cifs_common/
+F: fs/ksmbd/
+
KERNEL UNIT TESTING FRAMEWORK (KUnit)
M: Brendan Higgins <brendanhiggins@google.com>
L: linux-kselftest@vger.kernel.org
@@ -10393,6 +10423,7 @@ F: net/core/skmsg.c
F: net/core/sock_map.c
F: net/ipv4/tcp_bpf.c
F: net/ipv4/udp_bpf.c
+F: net/unix/unix_bpf.c
LANDLOCK SECURITY MODULE
M: Mickaël Salaün <mic@digikod.net>
@@ -10614,15 +10645,6 @@ F: LICENSES/
F: scripts/spdxcheck-test.sh
F: scripts/spdxcheck.py
-LIGHTNVM PLATFORM SUPPORT
-M: Matias Bjorling <mb@lightnvm.io>
-L: linux-block@vger.kernel.org
-S: Maintained
-W: http://github/OpenChannelSSD
-F: drivers/lightnvm/
-F: include/linux/lightnvm.h
-F: include/uapi/linux/lightnvm.h
-
LINEAR RANGES HELPERS
M: Mark Brown <broonie@kernel.org>
R: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
@@ -11035,6 +11057,18 @@ F: drivers/mailbox/arm_mhuv2.c
F: include/linux/mailbox/arm_mhuv2_message.h
F: Documentation/devicetree/bindings/mailbox/arm,mhuv2.yaml
+MANAGEMENT COMPONENT TRANSPORT PROTOCOL (MCTP)
+M: Jeremy Kerr <jk@codeconstruct.com.au>
+M: Matt Johnston <matt@codeconstruct.com.au>
+L: netdev@vger.kernel.org
+S: Maintained
+F: Documentation/networking/mctp.rst
+F: drivers/net/mctp/
+F: include/net/mctp.h
+F: include/net/mctpdevice.h
+F: include/net/netns/mctp.h
+F: net/mctp/
+
MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
M: Michael Kerrisk <mtk.manpages@gmail.com>
L: linux-man@vger.kernel.org
@@ -11332,7 +11366,13 @@ W: https://linuxtv.org
T: git git://linuxtv.org/media_tree.git
F: drivers/media/radio/radio-maxiradio*
-MCAB MICROCHIP CAN BUS ANALYZER TOOL DRIVER
+MAXLINEAR ETHERNET PHY DRIVER
+M: Xu Liang <lxu@maxlinear.com>
+L: netdev@vger.kernel.org
+S: Supported
+F: drivers/net/phy/mxl-gpy.c
+
+MCBA MICROCHIP CAN BUS ANALYZER TOOL DRIVER
R: Yasushi SHOJI <yashi@spacecubics.com>
L: linux-can@vger.kernel.org
S: Maintained
@@ -13875,6 +13915,12 @@ F: Documentation/devicetree/
F: arch/*/boot/dts/
F: include/dt-bindings/
+OPENCOMPUTE PTP CLOCK DRIVER
+M: Jonathan Lemon <jonathan.lemon@gmail.com>
+L: netdev@vger.kernel.org
+S: Maintained
+F: drivers/ptp/ptp_ocp.c
+
OPENCORES I2C BUS DRIVER
M: Peter Korsgaard <peter@korsgaard.com>
M: Andrew Lunn <andrew@lunn.ch>
@@ -14435,6 +14481,13 @@ S: Maintained
F: Documentation/devicetree/bindings/pci/hisilicon-histb-pcie.txt
F: drivers/pci/controller/dwc/pcie-histb.c
+PCIE DRIVER FOR INTEL LGM GW SOC
+M: Rahul Tanwar <rtanwar@maxlinear.com>
+L: linux-pci@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/pci/intel-gw-pcie.yaml
+F: drivers/pci/controller/dwc/pcie-intel-gw.c
+
PCIE DRIVER FOR MEDIATEK
M: Ryder Lee <ryder.lee@mediatek.com>
M: Jianjun Wang <jianjun.wang@mediatek.com>
@@ -14931,13 +14984,6 @@ S: Maintained
F: include/linux/printk.h
F: kernel/printk/
-PRISM54 WIRELESS DRIVER
-M: Luis Chamberlain <mcgrof@kernel.org>
-L: linux-wireless@vger.kernel.org
-S: Obsolete
-W: https://wireless.wiki.kernel.org/en/users/Drivers/p54
-F: drivers/net/wireless/intersil/prism54/
-
PROC FILESYSTEM
L: linux-kernel@vger.kernel.org
L: linux-fsdevel@vger.kernel.org
@@ -15808,7 +15854,7 @@ F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml
F: drivers/i2c/busses/i2c-emev2.c
RENESAS ETHERNET DRIVERS
-R: Sergei Shtylyov <sergei.shtylyov@gmail.com>
+R: Sergey Shtylyov <s.shtylyov@omp.ru>
L: netdev@vger.kernel.org
L: linux-renesas-soc@vger.kernel.org
F: Documentation/devicetree/bindings/net/renesas,*.yaml
@@ -17820,7 +17866,7 @@ F: include/linux/sync_file.h
F: include/uapi/linux/sync_file.h
SYNOPSYS ARC ARCHITECTURE
-M: Vineet Gupta <vgupta@synopsys.com>
+M: Vineet Gupta <vgupta@kernel.org>
L: linux-snps-arc@lists.infradead.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc.git
@@ -19721,6 +19767,15 @@ S: Maintained
F: include/uapi/linux/virtio_snd.h
F: sound/virtio/*
+VIRTIO I2C DRIVER
+M: Jie Deng <jie.deng@intel.com>
+M: Viresh Kumar <viresh.kumar@linaro.org>
+L: linux-i2c@vger.kernel.org
+L: virtualization@lists.linux-foundation.org
+S: Maintained
+F: drivers/i2c/busses/i2c-virtio.c
+F: include/uapi/linux/virtio_i2c.h
+
VIRTUAL BOX GUEST DEVICE DRIVER
M: Hans de Goede <hdegoede@redhat.com>
M: Arnd Bergmann <arnd@arndb.de>
@@ -20022,7 +20077,8 @@ F: Documentation/devicetree/bindings/extcon/wlf,arizona.yaml
F: Documentation/devicetree/bindings/mfd/wlf,arizona.yaml
F: Documentation/devicetree/bindings/mfd/wm831x.txt
F: Documentation/devicetree/bindings/regulator/wlf,arizona.yaml
-F: Documentation/devicetree/bindings/sound/wlf,arizona.yaml
+F: Documentation/devicetree/bindings/sound/wlf,*.yaml
+F: Documentation/devicetree/bindings/sound/wm*
F: Documentation/hwmon/wm83??.rst
F: arch/arm/mach-s3c/mach-crag6410*
F: drivers/clk/clk-wm83*.c
diff --git a/Makefile b/Makefile
index eae1314a5b86..61741e9d9c6e 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 14
SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION =
NAME = Opossums on Parade
# *DOCUMENTATION*
diff --git a/arch/Kconfig b/arch/Kconfig
index 129df498a8e1..98db63496bab 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1282,6 +1282,9 @@ config ARCH_SPLIT_ARG64
config ARCH_HAS_ELFCORE_COMPAT
bool
+config ARCH_HAS_PARANOID_L1D_FLUSH
+ bool
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 6b3daba60987..1dd9baf4a6c2 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -129,6 +129,8 @@
#define SO_NETNS_COOKIE 71
+#define SO_BUF_LOCK 72
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index d8f51eb8963b..b5bf68e74732 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -409,7 +409,7 @@ choice
help
Depending on the configuration, CPU can contain DSP registers
(ACC0_GLO, ACC0_GHI, DSP_BFLY0, DSP_CTRL, DSP_FFT_CTRL).
- Bellow is options describing how to handle these registers in
+ Below are options describing how to handle these registers in
interrupt entry / exit and in context switch.
config ARC_DSP_NONE
diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h
index 69debd77cd04..0b485800a392 100644
--- a/arch/arc/include/asm/checksum.h
+++ b/arch/arc/include/asm/checksum.h
@@ -24,7 +24,7 @@
*/
static inline __sum16 csum_fold(__wsum s)
{
- unsigned r = s << 16 | s >> 16; /* ror */
+ unsigned int r = s << 16 | s >> 16; /* ror */
s = ~s;
s -= r;
return s >> 16;
diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h
index 30b9ae511ea9..e1971d34ef30 100644
--- a/arch/arc/include/asm/perf_event.h
+++ b/arch/arc/include/asm/perf_event.h
@@ -123,7 +123,7 @@ static const char * const arc_pmu_ev_hw_map[] = {
#define C(_x) PERF_COUNT_HW_CACHE_##_x
#define CACHE_OP_UNSUPPORTED 0xffff
-static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static const unsigned int arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC,
diff --git a/arch/arc/kernel/fpu.c b/arch/arc/kernel/fpu.c
index c67c0f0f5f77..ec640219d989 100644
--- a/arch/arc/kernel/fpu.c
+++ b/arch/arc/kernel/fpu.c
@@ -57,23 +57,26 @@ void fpu_save_restore(struct task_struct *prev, struct task_struct *next)
void fpu_init_task(struct pt_regs *regs)
{
+ const unsigned int fwe = 0x80000000;
+
/* default rounding mode */
write_aux_reg(ARC_REG_FPU_CTRL, 0x100);
- /* set "Write enable" to allow explicit write to exception flags */
- write_aux_reg(ARC_REG_FPU_STATUS, 0x80000000);
+ /* Initialize to zero: setting requires FWE be set */
+ write_aux_reg(ARC_REG_FPU_STATUS, fwe);
}
void fpu_save_restore(struct task_struct *prev, struct task_struct *next)
{
struct arc_fpu *save = &prev->thread.fpu;
struct arc_fpu *restore = &next->thread.fpu;
+ const unsigned int fwe = 0x80000000;
save->ctrl = read_aux_reg(ARC_REG_FPU_CTRL);
save->status = read_aux_reg(ARC_REG_FPU_STATUS);
write_aux_reg(ARC_REG_FPU_CTRL, restore->ctrl);
- write_aux_reg(ARC_REG_FPU_STATUS, restore->status);
+ write_aux_reg(ARC_REG_FPU_STATUS, (fwe | restore->status));
}
#endif
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index abf9398cc333..f9fdb557c263 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -352,7 +352,7 @@ static void idu_cascade_isr(struct irq_desc *desc)
irq_hw_number_t idu_hwirq = core_hwirq - FIRST_EXT_IRQ;
chained_irq_enter(core_chip, desc);
- generic_handle_irq(irq_find_mapping(idu_domain, idu_hwirq));
+ generic_handle_domain_irq(idu_domain, idu_hwirq);
chained_irq_exit(core_chip, desc);
}
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index 47bab67f8649..9e28058cdba8 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -260,7 +260,7 @@ static void init_unwind_hdr(struct unwind_table *table,
{
const u8 *ptr;
unsigned long tableSize = table->size, hdrSize;
- unsigned n;
+ unsigned int n;
const u32 *fde;
struct {
u8 version;
@@ -462,7 +462,7 @@ static uleb128_t get_uleb128(const u8 **pcur, const u8 *end)
{
const u8 *cur = *pcur;
uleb128_t value;
- unsigned shift;
+ unsigned int shift;
for (shift = 0, value = 0; cur < end; shift += 7) {
if (shift + 7 > 8 * sizeof(value)
@@ -483,7 +483,7 @@ static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
{
const u8 *cur = *pcur;
sleb128_t value;
- unsigned shift;
+ unsigned int shift;
for (shift = 0, value = 0; cur < end; shift += 7) {
if (shift + 7 > 8 * sizeof(value)
@@ -609,7 +609,7 @@ static unsigned long read_pointer(const u8 **pLoc, const void *end,
static signed fde_pointer_type(const u32 *cie)
{
const u8 *ptr = (const u8 *)(cie + 2);
- unsigned version = *ptr;
+ unsigned int version = *ptr;
if (*++ptr) {
const char *aug;
@@ -904,7 +904,7 @@ int arc_unwind(struct unwind_frame_info *frame)
const u8 *ptr = NULL, *end = NULL;
unsigned long pc = UNW_PC(frame) - frame->call_frame;
unsigned long startLoc = 0, endLoc = 0, cfa;
- unsigned i;
+ unsigned int i;
signed ptrType = -1;
uleb128_t retAddrReg = 0;
const struct unwind_table *table;
diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S
index e2146a8da195..529ae50f9fe2 100644
--- a/arch/arc/kernel/vmlinux.lds.S
+++ b/arch/arc/kernel/vmlinux.lds.S
@@ -88,6 +88,8 @@ SECTIONS
CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
+ IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
}
diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi
index 98ccc81ca6d9..8e41c8b7bd70 100644
--- a/arch/arm/boot/dts/imx35.dtsi
+++ b/arch/arm/boot/dts/imx35.dtsi
@@ -189,7 +189,7 @@
status = "disabled";
};
- fec: fec@50038000 {
+ fec: ethernet@50038000 {
compatible = "fsl,imx35-fec", "fsl,imx27-fec";
reg = <0x50038000 0x4000>;
clocks = <&clks 46>, <&clks 8>;
diff --git a/arch/arm/boot/dts/imx53-ppd.dts b/arch/arm/boot/dts/imx53-ppd.dts
index 5a5fa6190a52..37d0cffea99c 100644
--- a/arch/arm/boot/dts/imx53-ppd.dts
+++ b/arch/arm/boot/dts/imx53-ppd.dts
@@ -70,6 +70,12 @@
clock-frequency = <11289600>;
};
+ achc_24M: achc-clock {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <24000000>;
+ };
+
sgtlsound: sound {
compatible = "fsl,imx53-cpuvo-sgtl5000",
"fsl,imx-audio-sgtl5000";
@@ -314,16 +320,13 @@
&gpio4 12 GPIO_ACTIVE_LOW>;
status = "okay";
- spidev0: spi@0 {
- compatible = "ge,achc";
- reg = <0>;
- spi-max-frequency = <1000000>;
- };
-
- spidev1: spi@1 {
- compatible = "ge,achc";
- reg = <1>;
- spi-max-frequency = <1000000>;
+ spidev0: spi@1 {
+ compatible = "ge,achc", "nxp,kinetis-k20";
+ reg = <1>, <0>;
+ vdd-supply = <&reg_3v3>;
+ vdda-supply = <&reg_3v3>;
+ clocks = <&achc_24M>;
+ reset-gpios = <&gpio3 6 GPIO_ACTIVE_LOW>;
};
gpioxra0: gpio@2 {
diff --git a/arch/arm/boot/dts/imx6q-novena.dts b/arch/arm/boot/dts/imx6q-novena.dts
index 52e3567d1859..225cf6b7a7a4 100644
--- a/arch/arm/boot/dts/imx6q-novena.dts
+++ b/arch/arm/boot/dts/imx6q-novena.dts
@@ -222,20 +222,30 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enet_novena>;
phy-mode = "rgmii";
+ phy-handle = <&ethphy>;
phy-reset-gpios = <&gpio3 23 GPIO_ACTIVE_LOW>;
- rxc-skew-ps = <3000>;
- rxdv-skew-ps = <0>;
- txc-skew-ps = <3000>;
- txen-skew-ps = <0>;
- rxd0-skew-ps = <0>;
- rxd1-skew-ps = <0>;
- rxd2-skew-ps = <0>;
- rxd3-skew-ps = <0>;
- txd0-skew-ps = <3000>;
- txd1-skew-ps = <3000>;
- txd2-skew-ps = <3000>;
- txd3-skew-ps = <3000>;
status = "okay";
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethphy: ethernet-phy {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ rxc-skew-ps = <3000>;
+ rxdv-skew-ps = <0>;
+ txc-skew-ps = <3000>;
+ txen-skew-ps = <0>;
+ rxd0-skew-ps = <0>;
+ rxd1-skew-ps = <0>;
+ rxd2-skew-ps = <0>;
+ rxd3-skew-ps = <0>;
+ txd0-skew-ps = <3000>;
+ txd1-skew-ps = <3000>;
+ txd2-skew-ps = <3000>;
+ txd3-skew-ps = <3000>;
+ };
+ };
};
&hdmi {
diff --git a/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi b/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi
index ead7ba27e105..563bf9d44fe0 100644
--- a/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi
@@ -316,12 +316,22 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enet>;
phy-mode = "rgmii";
+ phy-handle = <&ethphy>;
phy-reset-gpios = <&gpio7 18 GPIO_ACTIVE_LOW>;
- txd0-skew-ps = <0>;
- txd1-skew-ps = <0>;
- txd2-skew-ps = <0>;
- txd3-skew-ps = <0>;
status = "okay";
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethphy: ethernet-phy {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ txd0-skew-ps = <0>;
+ txd1-skew-ps = <0>;
+ txd2-skew-ps = <0>;
+ txd3-skew-ps = <0>;
+ };
+ };
};
&gpmi {
diff --git a/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi b/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi
index d526f01a2c52..ac34709e9741 100644
--- a/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi
@@ -190,23 +190,33 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enet>;
phy-mode = "rgmii";
+ phy-handle = <&ethphy>;
phy-reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>;
- txen-skew-ps = <0>;
- txc-skew-ps = <3000>;
- rxdv-skew-ps = <0>;
- rxc-skew-ps = <3000>;
- rxd0-skew-ps = <0>;
- rxd1-skew-ps = <0>;
- rxd2-skew-ps = <0>;
- rxd3-skew-ps = <0>;
- txd0-skew-ps = <0>;
- txd1-skew-ps = <0>;
- txd2-skew-ps = <0>;
- txd3-skew-ps = <0>;
interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>,
<&intc 0 119 IRQ_TYPE_LEVEL_HIGH>;
fsl,err006687-workaround-present;
status = "okay";
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethphy: ethernet-phy {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ txen-skew-ps = <0>;
+ txc-skew-ps = <3000>;
+ rxdv-skew-ps = <0>;
+ rxc-skew-ps = <3000>;
+ rxd0-skew-ps = <0>;
+ rxd1-skew-ps = <0>;
+ rxd2-skew-ps = <0>;
+ rxd3-skew-ps = <0>;
+ txd0-skew-ps = <0>;
+ txd1-skew-ps = <0>;
+ txd2-skew-ps = <0>;
+ txd3-skew-ps = <0>;
+ };
+ };
};
&hdmi {
diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi
index a0917823c244..c96f4d7e1e0d 100644
--- a/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi
@@ -332,23 +332,33 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enet>;
phy-mode = "rgmii";
+ phy-handle = <&ethphy>;
phy-reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>;
- txen-skew-ps = <0>;
- txc-skew-ps = <3000>;
- rxdv-skew-ps = <0>;
- rxc-skew-ps = <3000>;
- rxd0-skew-ps = <0>;
- rxd1-skew-ps = <0>;
- rxd2-skew-ps = <0>;
- rxd3-skew-ps = <0>;
- txd0-skew-ps = <0>;
- txd1-skew-ps = <0>;
- txd2-skew-ps = <0>;
- txd3-skew-ps = <0>;
interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>,
<&intc 0 119 IRQ_TYPE_LEVEL_HIGH>;
fsl,err006687-workaround-present;
status = "okay";
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethphy: ethernet-phy {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ txen-skew-ps = <0>;
+ txc-skew-ps = <3000>;
+ rxdv-skew-ps = <0>;
+ rxc-skew-ps = <3000>;
+ rxd0-skew-ps = <0>;
+ rxd1-skew-ps = <0>;
+ rxd2-skew-ps = <0>;
+ rxd3-skew-ps = <0>;
+ txd0-skew-ps = <0>;
+ txd1-skew-ps = <0>;
+ txd2-skew-ps = <0>;
+ txd3-skew-ps = <0>;
+ };
+ };
};
&hdmi {
diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi
index 1243677b5f97..49da30d7510c 100644
--- a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi
@@ -265,23 +265,33 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enet>;
phy-mode = "rgmii";
+ phy-handle = <&ethphy>;
phy-reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>;
- txen-skew-ps = <0>;
- txc-skew-ps = <3000>;
- rxdv-skew-ps = <0>;
- rxc-skew-ps = <3000>;
- rxd0-skew-ps = <0>;
- rxd1-skew-ps = <0>;
- rxd2-skew-ps = <0>;
- rxd3-skew-ps = <0>;
- txd0-skew-ps = <0>;
- txd1-skew-ps = <0>;
- txd2-skew-ps = <0>;
- txd3-skew-ps = <0>;
interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>,
<&intc 0 119 IRQ_TYPE_LEVEL_HIGH>;
fsl,err006687-workaround-present;
status = "okay";
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethphy: ethernet-phy {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ txen-skew-ps = <0>;
+ txc-skew-ps = <3000>;
+ rxdv-skew-ps = <0>;
+ rxc-skew-ps = <3000>;
+ rxd0-skew-ps = <0>;
+ rxd1-skew-ps = <0>;
+ rxd2-skew-ps = <0>;
+ rxd3-skew-ps = <0>;
+ txd0-skew-ps = <0>;
+ txd1-skew-ps = <0>;
+ txd2-skew-ps = <0>;
+ txd3-skew-ps = <0>;
+ };
+ };
};
&hdmi {
diff --git a/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi b/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi
index fdc3aa9d544d..eb9a0b104f1c 100644
--- a/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi
@@ -324,20 +324,30 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enet>;
phy-mode = "rgmii";
+ phy-handle = <&ethphy>;
phy-reset-gpios = <&gpio3 23 GPIO_ACTIVE_LOW>;
- txen-skew-ps = <0>;
- txc-skew-ps = <3000>;
- rxdv-skew-ps = <0>;
- rxc-skew-ps = <3000>;
- rxd0-skew-ps = <0>;
- rxd1-skew-ps = <0>;
- rxd2-skew-ps = <0>;
- rxd3-skew-ps = <0>;
- txd0-skew-ps = <0>;
- txd1-skew-ps = <0>;
- txd2-skew-ps = <0>;
- txd3-skew-ps = <0>;
status = "okay";
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethphy: ethernet-phy {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ txen-skew-ps = <0>;
+ txc-skew-ps = <3000>;
+ rxdv-skew-ps = <0>;
+ rxc-skew-ps = <3000>;
+ rxd0-skew-ps = <0>;
+ rxd1-skew-ps = <0>;
+ rxd2-skew-ps = <0>;
+ rxd3-skew-ps = <0>;
+ txd0-skew-ps = <0>;
+ txd1-skew-ps = <0>;
+ txd2-skew-ps = <0>;
+ txd3-skew-ps = <0>;
+ };
+ };
};
&hdmi {
diff --git a/arch/arm/boot/dts/imx7-mba7.dtsi b/arch/arm/boot/dts/imx7-mba7.dtsi
index c6d1c63f7905..5e6bef230dc7 100644
--- a/arch/arm/boot/dts/imx7-mba7.dtsi
+++ b/arch/arm/boot/dts/imx7-mba7.dtsi
@@ -216,7 +216,6 @@
phy-mode = "rgmii-id";
phy-reset-gpios = <&gpio7 15 GPIO_ACTIVE_LOW>;
phy-reset-duration = <1>;
- phy-reset-delay = <1>;
phy-supply = <&reg_fec1_pwdn>;
phy-handle = <&ethphy1_0>;
fsl,magic-packet;
diff --git a/arch/arm/boot/dts/imx7d-mba7.dts b/arch/arm/boot/dts/imx7d-mba7.dts
index 23856a8d4b8c..36ef6a3cdb0b 100644
--- a/arch/arm/boot/dts/imx7d-mba7.dts
+++ b/arch/arm/boot/dts/imx7d-mba7.dts
@@ -23,7 +23,6 @@
phy-mode = "rgmii-id";
phy-reset-gpios = <&gpio2 28 GPIO_ACTIVE_LOW>;
phy-reset-duration = <1>;
- phy-reset-delay = <1>;
phy-supply = <&reg_fec2_pwdn>;
phy-handle = <&ethphy2_0>;
fsl,magic-packet;
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 092a2ebc0c28..5367f03beb46 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -196,14 +196,6 @@ static int sa1111_map_irq(struct sa1111 *sachip, irq_hw_number_t hwirq)
return irq_create_mapping(sachip->irqdomain, hwirq);
}
-static void sa1111_handle_irqdomain(struct irq_domain *irqdomain, int irq)
-{
- struct irq_desc *d = irq_to_desc(irq_linear_revmap(irqdomain, irq));
-
- if (d)
- generic_handle_irq_desc(d);
-}
-
/*
* SA1111 interrupt support. Since clearing an IRQ while there are
* active IRQs causes the interrupt output to pulse, the upper levels
@@ -234,11 +226,11 @@ static void sa1111_irq_handler(struct irq_desc *desc)
for (i = 0; stat0; i++, stat0 >>= 1)
if (stat0 & 1)
- sa1111_handle_irqdomain(irqdomain, i);
+ generic_handle_domain_irq(irqdomain, i);
for (i = 32; stat1; i++, stat1 >>= 1)
if (stat1 & 1)
- sa1111_handle_irqdomain(irqdomain, i);
+ generic_handle_domain_irq(irqdomain, i);
/* For level-based interrupts */
desc->irq_data.chip->irq_unmask(&desc->irq_data);
diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig
index 3f35761dc9ff..23595fc5a29a 100644
--- a/arch/arm/configs/nhk8815_defconfig
+++ b/arch/arm/configs/nhk8815_defconfig
@@ -15,8 +15,6 @@ CONFIG_SLAB=y
CONFIG_ARCH_NOMADIK=y
CONFIG_MACH_NOMADIK_8815NHK=y
CONFIG_AEABI=y
-CONFIG_ZBOOT_ROM_TEXT=0x0
-CONFIG_ZBOOT_ROM_BSS=0x0
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
@@ -52,9 +50,9 @@ CONFIG_MTD_BLOCK=y
CONFIG_MTD_ONENAND=y
CONFIG_MTD_ONENAND_VERIFY_WRITE=y
CONFIG_MTD_ONENAND_GENERIC=y
-CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y
CONFIG_MTD_RAW_NAND=y
CONFIG_MTD_NAND_FSMC=y
+CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_CRYPTOLOOP=y
CONFIG_BLK_DEV_RAM=y
@@ -97,6 +95,7 @@ CONFIG_REGULATOR=y
CONFIG_DRM=y
CONFIG_DRM_PANEL_TPO_TPG110=y
CONFIG_DRM_PL111=y
+CONFIG_FB=y
CONFIG_BACKLIGHT_CLASS_DEVICE=y
CONFIG_BACKLIGHT_PWM=y
CONFIG_FRAMEBUFFER_CONSOLE=y
@@ -136,9 +135,8 @@ CONFIG_NLS_ISO8859_15=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_SHA1=y
CONFIG_CRYPTO_DES=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_DEBUG_INFO=y
-# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_DEBUG_FS=y
# CONFIG_SCHED_DEBUG is not set
# CONFIG_DEBUG_PREEMPT is not set
-# CONFIG_DEBUG_BUGVERBOSE is not set
diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c
index 31eb75b6002f..9bdafd57888c 100644
--- a/arch/arm/crypto/curve25519-glue.c
+++ b/arch/arm/crypto/curve25519-glue.c
@@ -112,7 +112,7 @@ static struct kpp_alg curve25519_alg = {
.max_size = curve25519_max_size,
};
-static int __init mod_init(void)
+static int __init arm_curve25519_init(void)
{
if (elf_hwcap & HWCAP_NEON) {
static_branch_enable(&have_neon);
@@ -122,14 +122,14 @@ static int __init mod_init(void)
return 0;
}
-static void __exit mod_exit(void)
+static void __exit arm_curve25519_exit(void)
{
if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON)
crypto_unregister_kpp(&curve25519_alg);
}
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(arm_curve25519_init);
+module_exit(arm_curve25519_exit);
MODULE_ALIAS_CRYPTO("curve25519");
MODULE_ALIAS_CRYPTO("curve25519-neon");
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index cfc9dfd70aad..f673e13e0f94 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -160,10 +160,11 @@ extern unsigned long vectors_base;
/*
* Physical start and end address of the kernel sections. These addresses are
- * 2MB-aligned to match the section mappings placed over the kernel.
+ * 2MB-aligned to match the section mappings placed over the kernel. We use
+ * u64 so that LPAE mappings beyond the 32bit limit will work out as well.
*/
-extern u32 kernel_sec_start;
-extern u32 kernel_sec_end;
+extern u64 kernel_sec_start;
+extern u64 kernel_sec_end;
/*
* Physical vs virtual RAM address space conversion. These are
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 9eb0b4dbcc12..29070eb8df7d 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -49,7 +49,8 @@
/*
* This needs to be assigned at runtime when the linker symbols are
- * resolved.
+ * resolved. These are unsigned 64bit really, but in this assembly code
+ * We store them as 32bit.
*/
.pushsection .data
.align 2
@@ -57,8 +58,10 @@
.globl kernel_sec_end
kernel_sec_start:
.long 0
+ .long 0
kernel_sec_end:
.long 0
+ .long 0
.popsection
.macro pgtbl, rd, phys
@@ -250,7 +253,11 @@ __create_page_tables:
add r0, r4, #KERNEL_OFFSET >> (SECTION_SHIFT - PMD_ORDER)
ldr r6, =(_end - 1)
adr_l r5, kernel_sec_start @ _pa(kernel_sec_start)
- str r8, [r5] @ Save physical start of kernel
+#ifdef CONFIG_CPU_ENDIAN_BE8
+ str r8, [r5, #4] @ Save physical start of kernel (BE)
+#else
+ str r8, [r5] @ Save physical start of kernel (LE)
+#endif
orr r3, r8, r7 @ Add the MMU flags
add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
1: str r3, [r0], #1 << PMD_ORDER
@@ -259,7 +266,11 @@ __create_page_tables:
bls 1b
eor r3, r3, r7 @ Remove the MMU flags
adr_l r5, kernel_sec_end @ _pa(kernel_sec_end)
- str r3, [r5] @ Save physical end of kernel
+#ifdef CONFIG_CPU_ENDIAN_BE8
+ str r3, [r5, #4] @ Save physical end of kernel (BE)
+#else
+ str r3, [r5] @ Save physical end of kernel (LE)
+#endif
#ifdef CONFIG_XIP_KERNEL
/*
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index b5eadd70d903..cdc720f54daa 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -268,9 +268,23 @@ static struct platform_device ixp46x_i2c_controller = {
.resource = ixp46x_i2c_resources
};
+static struct resource ixp46x_ptp_resources[] = {
+ DEFINE_RES_MEM(IXP4XX_TIMESYNC_BASE_PHYS, SZ_4K),
+ DEFINE_RES_IRQ_NAMED(IRQ_IXP4XX_GPIO8, "master"),
+ DEFINE_RES_IRQ_NAMED(IRQ_IXP4XX_GPIO7, "slave"),
+};
+
+static struct platform_device ixp46x_ptp = {
+ .name = "ptp-ixp46x",
+ .id = -1,
+ .resource = ixp46x_ptp_resources,
+ .num_resources = ARRAY_SIZE(ixp46x_ptp_resources),
+};
+
static struct platform_device *ixp46x_devices[] __initdata = {
&ixp46x_hwrandom_device,
&ixp46x_i2c_controller,
+ &ixp46x_ptp,
};
unsigned long ixp4xx_exp_bus_size;
diff --git a/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h b/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h
index abb07f105515..74e63d4531aa 100644
--- a/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h
+++ b/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h
@@ -218,30 +218,30 @@
/*
* PCI Control/Status Registers
*/
-#define IXP4XX_PCI_CSR(x) ((volatile u32 *)(IXP4XX_PCI_CFG_BASE_VIRT+(x)))
-
-#define PCI_NP_AD IXP4XX_PCI_CSR(PCI_NP_AD_OFFSET)
-#define PCI_NP_CBE IXP4XX_PCI_CSR(PCI_NP_CBE_OFFSET)
-#define PCI_NP_WDATA IXP4XX_PCI_CSR(PCI_NP_WDATA_OFFSET)
-#define PCI_NP_RDATA IXP4XX_PCI_CSR(PCI_NP_RDATA_OFFSET)
-#define PCI_CRP_AD_CBE IXP4XX_PCI_CSR(PCI_CRP_AD_CBE_OFFSET)
-#define PCI_CRP_WDATA IXP4XX_PCI_CSR(PCI_CRP_WDATA_OFFSET)
-#define PCI_CRP_RDATA IXP4XX_PCI_CSR(PCI_CRP_RDATA_OFFSET)
-#define PCI_CSR IXP4XX_PCI_CSR(PCI_CSR_OFFSET)
-#define PCI_ISR IXP4XX_PCI_CSR(PCI_ISR_OFFSET)
-#define PCI_INTEN IXP4XX_PCI_CSR(PCI_INTEN_OFFSET)
-#define PCI_DMACTRL IXP4XX_PCI_CSR(PCI_DMACTRL_OFFSET)
-#define PCI_AHBMEMBASE IXP4XX_PCI_CSR(PCI_AHBMEMBASE_OFFSET)
-#define PCI_AHBIOBASE IXP4XX_PCI_CSR(PCI_AHBIOBASE_OFFSET)
-#define PCI_PCIMEMBASE IXP4XX_PCI_CSR(PCI_PCIMEMBASE_OFFSET)
-#define PCI_AHBDOORBELL IXP4XX_PCI_CSR(PCI_AHBDOORBELL_OFFSET)
-#define PCI_PCIDOORBELL IXP4XX_PCI_CSR(PCI_PCIDOORBELL_OFFSET)
-#define PCI_ATPDMA0_AHBADDR IXP4XX_PCI_CSR(PCI_ATPDMA0_AHBADDR_OFFSET)
-#define PCI_ATPDMA0_PCIADDR IXP4XX_PCI_CSR(PCI_ATPDMA0_PCIADDR_OFFSET)
-#define PCI_ATPDMA0_LENADDR IXP4XX_PCI_CSR(PCI_ATPDMA0_LENADDR_OFFSET)
-#define PCI_ATPDMA1_AHBADDR IXP4XX_PCI_CSR(PCI_ATPDMA1_AHBADDR_OFFSET)
-#define PCI_ATPDMA1_PCIADDR IXP4XX_PCI_CSR(PCI_ATPDMA1_PCIADDR_OFFSET)
-#define PCI_ATPDMA1_LENADDR IXP4XX_PCI_CSR(PCI_ATPDMA1_LENADDR_OFFSET)
+#define _IXP4XX_PCI_CSR(x) ((volatile u32 *)(IXP4XX_PCI_CFG_BASE_VIRT+(x)))
+
+#define PCI_NP_AD _IXP4XX_PCI_CSR(PCI_NP_AD_OFFSET)
+#define PCI_NP_CBE _IXP4XX_PCI_CSR(PCI_NP_CBE_OFFSET)
+#define PCI_NP_WDATA _IXP4XX_PCI_CSR(PCI_NP_WDATA_OFFSET)
+#define PCI_NP_RDATA _IXP4XX_PCI_CSR(PCI_NP_RDATA_OFFSET)
+#define PCI_CRP_AD_CBE _IXP4XX_PCI_CSR(PCI_CRP_AD_CBE_OFFSET)
+#define PCI_CRP_WDATA _IXP4XX_PCI_CSR(PCI_CRP_WDATA_OFFSET)
+#define PCI_CRP_RDATA _IXP4XX_PCI_CSR(PCI_CRP_RDATA_OFFSET)
+#define PCI_CSR _IXP4XX_PCI_CSR(PCI_CSR_OFFSET)
+#define PCI_ISR _IXP4XX_PCI_CSR(PCI_ISR_OFFSET)
+#define PCI_INTEN _IXP4XX_PCI_CSR(PCI_INTEN_OFFSET)
+#define PCI_DMACTRL _IXP4XX_PCI_CSR(PCI_DMACTRL_OFFSET)
+#define PCI_AHBMEMBASE _IXP4XX_PCI_CSR(PCI_AHBMEMBASE_OFFSET)
+#define PCI_AHBIOBASE _IXP4XX_PCI_CSR(PCI_AHBIOBASE_OFFSET)
+#define PCI_PCIMEMBASE _IXP4XX_PCI_CSR(PCI_PCIMEMBASE_OFFSET)
+#define PCI_AHBDOORBELL _IXP4XX_PCI_CSR(PCI_AHBDOORBELL_OFFSET)
+#define PCI_PCIDOORBELL _IXP4XX_PCI_CSR(PCI_PCIDOORBELL_OFFSET)
+#define PCI_ATPDMA0_AHBADDR _IXP4XX_PCI_CSR(PCI_ATPDMA0_AHBADDR_OFFSET)
+#define PCI_ATPDMA0_PCIADDR _IXP4XX_PCI_CSR(PCI_ATPDMA0_PCIADDR_OFFSET)
+#define PCI_ATPDMA0_LENADDR _IXP4XX_PCI_CSR(PCI_ATPDMA0_LENADDR_OFFSET)
+#define PCI_ATPDMA1_AHBADDR _IXP4XX_PCI_CSR(PCI_ATPDMA1_AHBADDR_OFFSET)
+#define PCI_ATPDMA1_PCIADDR _IXP4XX_PCI_CSR(PCI_ATPDMA1_PCIADDR_OFFSET)
+#define PCI_ATPDMA1_LENADDR _IXP4XX_PCI_CSR(PCI_ATPDMA1_LENADDR_OFFSET)
/*
* PCI register values and bit definitions
diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c
index bddfc7cd5d40..eda5a47d7fbb 100644
--- a/arch/arm/mach-pxa/pxa_cplds_irqs.c
+++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c
@@ -39,10 +39,8 @@ static irqreturn_t cplds_irq_handler(int in_irq, void *d)
do {
pending = readl(fpga->base + FPGA_IRQ_SET_CLR) & fpga->irq_mask;
- for_each_set_bit(bit, &pending, CPLDS_NB_IRQ) {
- generic_handle_irq(irq_find_mapping(fpga->irqdomain,
- bit));
- }
+ for_each_set_bit(bit, &pending, CPLDS_NB_IRQ)
+ generic_handle_domain_irq(fpga->irqdomain, bit);
} while (pending);
return IRQ_HANDLED;
diff --git a/arch/arm/mach-s3c/irq-s3c24xx.c b/arch/arm/mach-s3c/irq-s3c24xx.c
index 0c631c14a817..3edc5f614eef 100644
--- a/arch/arm/mach-s3c/irq-s3c24xx.c
+++ b/arch/arm/mach-s3c/irq-s3c24xx.c
@@ -298,7 +298,7 @@ static void s3c_irq_demux(struct irq_desc *desc)
struct s3c_irq_data *irq_data = irq_desc_get_chip_data(desc);
struct s3c_irq_intc *intc = irq_data->intc;
struct s3c_irq_intc *sub_intc = irq_data->sub_intc;
- unsigned int n, offset, irq;
+ unsigned int n, offset;
unsigned long src, msk;
/* we're using individual domains for the non-dt case
@@ -318,8 +318,7 @@ static void s3c_irq_demux(struct irq_desc *desc)
while (src) {
n = __ffs(src);
src &= ~(1 << n);
- irq = irq_find_mapping(sub_intc->domain, offset + n);
- generic_handle_irq(irq);
+ generic_handle_domain_irq(sub_intc->domain, offset + n);
}
chained_irq_exit(chip, desc);
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 7583bda5ea7d..a4e006005107 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1609,6 +1609,13 @@ static void __init early_paging_init(const struct machine_desc *mdesc)
return;
/*
+ * Offset the kernel section physical offsets so that the kernel
+ * mapping will work out later on.
+ */
+ kernel_sec_start += offset;
+ kernel_sec_end += offset;
+
+ /*
* Get the address of the remap function in the 1:1 identity
* mapping setup by the early page table assembly code. We
* must get this prior to the pv update. The following barrier
@@ -1716,7 +1723,7 @@ void __init paging_init(const struct machine_desc *mdesc)
{
void *zero_page;
- pr_debug("physical kernel sections: 0x%08x-0x%08x\n",
+ pr_debug("physical kernel sections: 0x%08llx-0x%08llx\n",
kernel_sec_start, kernel_sec_end);
prepare_page_table();
diff --git a/arch/arm/mm/pv-fixup-asm.S b/arch/arm/mm/pv-fixup-asm.S
index 5c5e1952000a..f8e11f7c7880 100644
--- a/arch/arm/mm/pv-fixup-asm.S
+++ b/arch/arm/mm/pv-fixup-asm.S
@@ -29,7 +29,7 @@ ENTRY(lpae_pgtables_remap_asm)
ldr r6, =(_end - 1)
add r7, r2, #0x1000
add r6, r7, r6, lsr #SECTION_SHIFT - L2_ORDER
- add r7, r7, #PAGE_OFFSET >> (SECTION_SHIFT - L2_ORDER)
+ add r7, r7, #KERNEL_OFFSET >> (SECTION_SHIFT - L2_ORDER)
1: ldrd r4, r5, [r7]
adds r4, r4, r0
adc r5, r5, r1
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index fdcd54d39c1e..62c3c1d2190f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -156,6 +156,7 @@ config ARM64
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+ select HAVE_ARCH_PFN_VALID
select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_SECCOMP_FILTER
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 7b668db43261..1110d386f3b4 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -183,6 +183,8 @@ endif
# We use MRPROPER_FILES and CLEAN_FILES now
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
+ $(Q)$(MAKE) $(clean)=arch/arm64/kernel/vdso
+ $(Q)$(MAKE) $(clean)=arch/arm64/kernel/vdso32
ifeq ($(KBUILD_EXTMOD),)
# We need to generate vdso-offsets.h before compiling certain files in kernel/.
diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
index e7648c3b8390..1608a48495b6 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
@@ -920,7 +920,7 @@
};
fec1: ethernet@30be0000 {
- compatible = "fsl,imx8mm-fec", "fsl,imx6sx-fec";
+ compatible = "fsl,imx8mm-fec", "fsl,imx8mq-fec", "fsl,imx6sx-fec";
reg = <0x30be0000 0x10000>;
interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
index d4231e061403..e6de293865b0 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
@@ -923,7 +923,7 @@
};
fec1: ethernet@30be0000 {
- compatible = "fsl,imx8mn-fec", "fsl,imx6sx-fec";
+ compatible = "fsl,imx8mn-fec", "fsl,imx8mq-fec", "fsl,imx6sx-fec";
reg = <0x30be0000 0x10000>;
interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8qxp-ss-conn.dtsi
index f5f58959f65c..46da21af3702 100644
--- a/arch/arm64/boot/dts/freescale/imx8qxp-ss-conn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qxp-ss-conn.dtsi
@@ -17,9 +17,9 @@
};
&fec1 {
- compatible = "fsl,imx8qxp-fec", "fsl,imx6sx-fec";
+ compatible = "fsl,imx8qxp-fec", "fsl,imx8qm-fec", "fsl,imx6sx-fec";
};
&fec2 {
- compatible = "fsl,imx8qxp-fec", "fsl,imx6sx-fec";
+ compatible = "fsl,imx8qxp-fec", "fsl,imx8qm-fec", "fsl,imx6sx-fec";
};
diff --git a/arch/arm64/boot/dts/microchip/sparx5.dtsi b/arch/arm64/boot/dts/microchip/sparx5.dtsi
index ad07fff40544..787ebcec121d 100644
--- a/arch/arm64/boot/dts/microchip/sparx5.dtsi
+++ b/arch/arm64/boot/dts/microchip/sparx5.dtsi
@@ -471,8 +471,9 @@
<0x6 0x10004000 0x7fc000>,
<0x6 0x11010000 0xaf0000>;
reg-names = "cpu", "dev", "gcb";
- interrupt-names = "xtr";
- interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "xtr", "fdma";
+ interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>;
resets = <&reset 0>;
reset-names = "switch";
};
diff --git a/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts b/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts
index 23cdcc9f7c72..1ccca83292ac 100644
--- a/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts
+++ b/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2015, LGE Inc. All rights reserved.
* Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021, Petr Vorel <petr.vorel@gmail.com>
*/
/dts-v1/;
@@ -9,6 +10,9 @@
#include "pm8994.dtsi"
#include "pmi8994.dtsi"
+/* cont_splash_mem has different memory mapping */
+/delete-node/ &cont_splash_mem;
+
/ {
model = "LG Nexus 5X";
compatible = "lg,bullhead", "qcom,msm8992";
@@ -17,6 +21,9 @@
qcom,board-id = <0xb64 0>;
qcom,pmic-id = <0x10009 0x1000A 0x0 0x0>;
+ /* Bullhead firmware doesn't support PSCI */
+ /delete-node/ psci;
+
aliases {
serial0 = &blsp1_uart2;
};
@@ -38,6 +45,11 @@
ftrace-size = <0x10000>;
pmsg-size = <0x20000>;
};
+
+ cont_splash_mem: memory@3400000 {
+ reg = <0 0x03400000 0 0x1200000>;
+ no-map;
+ };
};
};
diff --git a/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts b/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts
index ffe1a9bd8f70..c096b7758aa0 100644
--- a/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts
+++ b/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts
@@ -1,12 +1,16 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2015, Huawei Inc. All rights reserved.
* Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021, Petr Vorel <petr.vorel@gmail.com>
*/
/dts-v1/;
#include "msm8994.dtsi"
+/* Angler's firmware does not report where the memory is allocated */
+/delete-node/ &cont_splash_mem;
+
/ {
model = "Huawei Nexus 6P";
compatible = "huawei,angler", "qcom,msm8994";
diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index a9a052f8c63c..e7f0e5cde424 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -786,6 +786,8 @@
<&aggre1_noc MASTER_QUP_0 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
+ power-domains = <&rpmhpd SC7180_CX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
@@ -838,6 +840,8 @@
<&aggre1_noc MASTER_QUP_0 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
+ power-domains = <&rpmhpd SC7180_CX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
@@ -890,6 +894,8 @@
<&aggre1_noc MASTER_QUP_0 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
+ power-domains = <&rpmhpd SC7180_CX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
@@ -924,6 +930,8 @@
<&aggre1_noc MASTER_QUP_0 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
+ power-domains = <&rpmhpd SC7180_CX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
@@ -976,6 +984,8 @@
<&aggre1_noc MASTER_QUP_0 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
+ power-domains = <&rpmhpd SC7180_CX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
@@ -1010,6 +1020,8 @@
<&aggre1_noc MASTER_QUP_0 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
+ power-domains = <&rpmhpd SC7180_CX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
@@ -1075,6 +1087,8 @@
<&aggre2_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
+ power-domains = <&rpmhpd SC7180_CX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
@@ -1127,6 +1141,8 @@
<&aggre2_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
+ power-domains = <&rpmhpd SC7180_CX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
@@ -1161,6 +1177,8 @@
<&aggre2_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
+ power-domains = <&rpmhpd SC7180_CX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
@@ -1213,6 +1231,8 @@
<&aggre2_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
+ power-domains = <&rpmhpd SC7180_CX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
@@ -1247,6 +1267,8 @@
<&aggre2_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
+ power-domains = <&rpmhpd SC7180_CX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
@@ -1299,6 +1321,8 @@
<&aggre2_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
+ power-domains = <&rpmhpd SC7180_CX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi
index a8c274ad74c4..188c5768a55a 100644
--- a/arch/arm64/boot/dts/qcom/sc7280.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi
@@ -200,7 +200,7 @@
&BIG_CPU_SLEEP_1
&CLUSTER_SLEEP_0>;
next-level-cache = <&L2_700>;
- qcom,freq-domain = <&cpufreq_hw 1>;
+ qcom,freq-domain = <&cpufreq_hw 2>;
#cooling-cells = <2>;
L2_700: l2-cache {
compatible = "cache";
diff --git a/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi b/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi
index 4d052e39b348..eb6b1d15293d 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi
@@ -69,7 +69,7 @@
};
rmtfs_upper_guard: memory@f5d01000 {
no-map;
- reg = <0 0xf5d01000 0 0x2000>;
+ reg = <0 0xf5d01000 0 0x1000>;
};
/*
@@ -78,7 +78,7 @@
*/
removed_region: memory@88f00000 {
no-map;
- reg = <0 0x88f00000 0 0x200000>;
+ reg = <0 0x88f00000 0 0x1c00000>;
};
ramoops: ramoops@ac300000 {
diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
index c2a709a384e9..d7591a4621a2 100644
--- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
+++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
@@ -700,7 +700,7 @@
left_spkr: wsa8810-left{
compatible = "sdw10217211000";
reg = <0 3>;
- powerdown-gpios = <&wcdgpio 2 GPIO_ACTIVE_HIGH>;
+ powerdown-gpios = <&wcdgpio 1 GPIO_ACTIVE_HIGH>;
#thermal-sensor-cells = <0>;
sound-name-prefix = "SpkrLeft";
#sound-dai-cells = <0>;
@@ -708,7 +708,7 @@
right_spkr: wsa8810-right{
compatible = "sdw10217211000";
- powerdown-gpios = <&wcdgpio 3 GPIO_ACTIVE_HIGH>;
+ powerdown-gpios = <&wcdgpio 2 GPIO_ACTIVE_HIGH>;
reg = <0 4>;
#thermal-sensor-cells = <0>;
sound-name-prefix = "SpkrRight";
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index b8eb0453123d..55f19450091b 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -51,7 +51,7 @@ config CRYPTO_SM4_ARM64_CE
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
- select CRYPTO_SM4
+ select CRYPTO_LIB_SM4
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index 2754c875d39c..9c93cfc4841b 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -17,12 +17,20 @@ MODULE_LICENSE("GPL v2");
asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in);
+static int sm4_ce_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ return sm4_expandkey(ctx, key, key_len);
+}
+
static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
- const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+ const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
if (!crypto_simd_usable()) {
- crypto_sm4_encrypt(tfm, out, in);
+ sm4_crypt_block(ctx->rkey_enc, out, in);
} else {
kernel_neon_begin();
sm4_ce_do_crypt(ctx->rkey_enc, out, in);
@@ -32,10 +40,10 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
- const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+ const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
if (!crypto_simd_usable()) {
- crypto_sm4_decrypt(tfm, out, in);
+ sm4_crypt_block(ctx->rkey_dec, out, in);
} else {
kernel_neon_begin();
sm4_ce_do_crypt(ctx->rkey_dec, out, in);
@@ -49,12 +57,12 @@ static struct crypto_alg sm4_ce_alg = {
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = SM4_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_sm4_ctx),
+ .cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
.cra_u.cipher = {
.cia_min_keysize = SM4_KEY_SIZE,
.cia_max_keysize = SM4_KEY_SIZE,
- .cia_setkey = crypto_sm4_set_key,
+ .cia_setkey = sm4_ce_setkey,
.cia_encrypt = sm4_ce_encrypt,
.cia_decrypt = sm4_ce_decrypt
}
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index bd68e1b7f29f..7535dc7cc5aa 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -50,6 +50,9 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr);
void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
#define acpi_os_ioremap acpi_os_ioremap
+void __iomem *acpi_os_memmap(acpi_physical_address phys, acpi_size size);
+#define acpi_os_memmap acpi_os_memmap
+
typedef u64 phys_cpuid_t;
#define PHYS_CPUID_INVALID INVALID_HWID
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 23a9fb73c04f..79c1a750e357 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -5,6 +5,9 @@
#ifndef __ASM_COMPAT_H
#define __ASM_COMPAT_H
+#define compat_mode_t compat_mode_t
+typedef u16 compat_mode_t;
+
#include <asm-generic/compat.h>
#ifdef CONFIG_COMPAT
@@ -27,13 +30,9 @@ typedef u16 __compat_uid_t;
typedef u16 __compat_gid_t;
typedef u16 __compat_uid16_t;
typedef u16 __compat_gid16_t;
-typedef u32 __compat_uid32_t;
-typedef u32 __compat_gid32_t;
-typedef u16 compat_mode_t;
typedef u32 compat_dev_t;
typedef s32 compat_nlink_t;
typedef u16 compat_ipc_pid_t;
-typedef u32 compat_caddr_t;
typedef __kernel_fsid_t compat_fsid_t;
struct compat_stat {
@@ -103,13 +102,6 @@ struct compat_statfs {
#define COMPAT_RLIM_INFINITY 0xffffffff
-typedef u32 compat_old_sigset_t;
-
-#define _COMPAT_NSIG 64
-#define _COMPAT_NSIG_BPW 32
-
-typedef u32 compat_sigset_word;
-
#define COMPAT_OFF_T_MAX 0x7fffffff
#define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current)))
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 21fa330f498d..b83fb24954b7 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -33,8 +33,7 @@
* EL2.
*/
.macro __init_el2_timers
- mrs x0, cnthctl_el2
- orr x0, x0, #3 // Enable EL1 physical timers
+ mov x0, #3 // Enable EL1 physical timers
msr cnthctl_el2, x0
msr cntvoff_el2, xzr // Clear virtual offset
.endm
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 993a27ea6f54..f98c91bbd7c1 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -41,6 +41,7 @@ void tag_clear_highpage(struct page *to);
typedef struct page *pgtable_t;
+int pfn_valid(unsigned long pfn);
int pfn_is_map_memory(unsigned long pfn);
#include <asm/memory.h>
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index f3851724fe35..1c9c2f7a1c04 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -273,7 +273,8 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr)
return __pgprot(PROT_DEVICE_nGnRnE);
}
-void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
+static void __iomem *__acpi_os_ioremap(acpi_physical_address phys,
+ acpi_size size, bool memory)
{
efi_memory_desc_t *md, *region = NULL;
pgprot_t prot;
@@ -299,9 +300,11 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
* It is fine for AML to remap regions that are not represented in the
* EFI memory map at all, as it only describes normal memory, and MMIO
* regions that require a virtual mapping to make them accessible to
- * the EFI runtime services.
+ * the EFI runtime services. Determine the region default
+ * attributes by checking the requested memory semantics.
*/
- prot = __pgprot(PROT_DEVICE_nGnRnE);
+ prot = memory ? __pgprot(PROT_NORMAL_NC) :
+ __pgprot(PROT_DEVICE_nGnRnE);
if (region) {
switch (region->type) {
case EFI_LOADER_CODE:
@@ -361,6 +364,16 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
return __ioremap(phys, size, prot);
}
+void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
+{
+ return __acpi_os_ioremap(phys, size, false);
+}
+
+void __iomem *acpi_os_memmap(acpi_physical_address phys, acpi_size size)
+{
+ return __acpi_os_ioremap(phys, size, true);
+}
+
/*
* Claim Synchronous External Aborts as a firmware first notification.
*
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e9a2b8f27792..0ca72f5cda41 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -94,10 +94,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
kvm->arch.return_nisv_io_abort_to_user = true;
break;
case KVM_CAP_ARM_MTE:
- if (!system_supports_mte() || kvm->created_vcpus)
- return -EINVAL;
- r = 0;
- kvm->arch.mte_enabled = true;
+ mutex_lock(&kvm->lock);
+ if (!system_supports_mte() || kvm->created_vcpus) {
+ r = -EINVAL;
+ } else {
+ r = 0;
+ kvm->arch.mte_enabled = true;
+ }
+ mutex_unlock(&kvm->lock);
break;
default:
r = -EINVAL;
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index d938ce95d3bd..a6ce991b1467 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -193,7 +193,7 @@ static bool range_is_memory(u64 start, u64 end)
{
struct kvm_mem_range r1, r2;
- if (!find_mem_range(start, &r1) || !find_mem_range(end, &r2))
+ if (!find_mem_range(start, &r1) || !find_mem_range(end - 1, &r2))
return false;
if (r1.start != r2.start)
return false;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 8490ed2917ff..1fdb7bb7c198 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -219,6 +219,43 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
free_area_init(max_zone_pfns);
}
+int pfn_valid(unsigned long pfn)
+{
+ phys_addr_t addr = PFN_PHYS(pfn);
+ struct mem_section *ms;
+
+ /*
+ * Ensure the upper PAGE_SHIFT bits are clear in the
+ * pfn. Else it might lead to false positives when
+ * some of the upper bits are set, but the lower bits
+ * match a valid pfn.
+ */
+ if (PHYS_PFN(addr) != pfn)
+ return 0;
+
+ if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
+ return 0;
+
+ ms = __pfn_to_section(pfn);
+ if (!valid_section(ms))
+ return 0;
+
+ /*
+ * ZONE_DEVICE memory does not have the memblock entries.
+ * memblock_is_map_memory() check for ZONE_DEVICE based
+ * addresses will always fail. Even the normal hotplugged
+ * memory will never have MEMBLOCK_NOMAP flag set in their
+ * memblock entries. Skip memblock search for all non early
+ * memory sections covering all of hotplug memory including
+ * both normal and ZONE_DEVICE based.
+ */
+ if (!early_section(ms))
+ return pfn_section_valid(ms, pfn);
+
+ return memblock_is_memory(addr);
+}
+EXPORT_SYMBOL(pfn_valid);
+
int pfn_is_map_memory(unsigned long pfn)
{
phys_addr_t addr = PFN_PHYS(pfn);
diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
index 29e946394fdb..277d61a09463 100644
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu
@@ -26,6 +26,7 @@ config COLDFIRE
bool "Coldfire CPU family support"
select ARCH_HAVE_CUSTOM_GPIO_H
select CPU_HAS_NO_BITFIELDS
+ select CPU_HAS_NO_CAS
select CPU_HAS_NO_MULDIV64
select GENERIC_CSUM
select GPIOLIB
@@ -39,6 +40,7 @@ config M68000
bool
depends on !MMU
select CPU_HAS_NO_BITFIELDS
+ select CPU_HAS_NO_CAS
select CPU_HAS_NO_MULDIV64
select CPU_HAS_NO_UNALIGNED
select GENERIC_CSUM
@@ -54,6 +56,7 @@ config M68000
config MCPU32
bool
select CPU_HAS_NO_BITFIELDS
+ select CPU_HAS_NO_CAS
select CPU_HAS_NO_UNALIGNED
select CPU_NO_EFFICIENT_FFS
help
@@ -383,7 +386,7 @@ config ADVANCED
config RMW_INSNS
bool "Use read-modify-write instructions"
- depends on ADVANCED
+ depends on ADVANCED && !CPU_HAS_NO_CAS
help
This allows to use certain instructions that work with indivisible
read-modify-write bus cycles. While this is faster than the
@@ -450,6 +453,9 @@ config M68K_L2_CACHE
config CPU_HAS_NO_BITFIELDS
bool
+config CPU_HAS_NO_CAS
+ bool
+
config CPU_HAS_NO_MULDIV64
bool
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 0a2cacf7be08..5f536286f5fc 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -84,6 +84,7 @@ CONFIG_IPV6_ILA=m
CONFIG_IPV6_VTI=m
CONFIG_IPV6_GRE=m
CONFIG_NETFILTER=y
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_ZONES=y
# CONFIG_NF_CONNTRACK_PROCFS is not set
@@ -323,7 +324,6 @@ CONFIG_CDROM_PKTCDVD=m
CONFIG_ATA_OVER_ETH=m
CONFIG_DUMMY_IRQ=m
CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=m
CONFIG_BLK_DEV_SR=y
@@ -502,6 +502,7 @@ CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
CONFIG_NFSD_V3=y
CONFIG_CIFS=m
+# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
CONFIG_CODA_FS=m
CONFIG_NLS_CODEPAGE_437=y
@@ -616,6 +617,7 @@ CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
+CONFIG_GLOB_SELFTEST=m
CONFIG_STRING_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
@@ -624,7 +626,6 @@ CONFIG_WW_MUTEX_SELFTEST=m
CONFIG_EARLY_PRINTK=y
CONFIG_KUNIT=m
CONFIG_KUNIT_ALL_TESTS=m
-CONFIG_TEST_LIST_SORT=m
CONFIG_TEST_MIN_HEAP=m
CONFIG_TEST_SORT=m
CONFIG_TEST_DIV64=m
@@ -636,6 +637,7 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 4dc6dcfaf28a..d9568644051a 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -80,6 +80,7 @@ CONFIG_IPV6_ILA=m
CONFIG_IPV6_VTI=m
CONFIG_IPV6_GRE=m
CONFIG_NETFILTER=y
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_ZONES=y
# CONFIG_NF_CONNTRACK_PROCFS is not set
@@ -458,6 +459,7 @@ CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
CONFIG_NFSD_V3=y
CONFIG_CIFS=m
+# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
CONFIG_CODA_FS=m
CONFIG_NLS_CODEPAGE_437=y
@@ -580,7 +582,6 @@ CONFIG_WW_MUTEX_SELFTEST=m
CONFIG_EARLY_PRINTK=y
CONFIG_KUNIT=m
CONFIG_KUNIT_ALL_TESTS=m
-CONFIG_TEST_LIST_SORT=m
CONFIG_TEST_MIN_HEAP=m
CONFIG_TEST_SORT=m
CONFIG_TEST_DIV64=m
@@ -592,6 +593,7 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 23d910a692ab..dbf1960c6669 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -87,6 +87,7 @@ CONFIG_IPV6_ILA=m
CONFIG_IPV6_VTI=m
CONFIG_IPV6_GRE=m
CONFIG_NETFILTER=y
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_ZONES=y
# CONFIG_NF_CONNTRACK_PROCFS is not set
@@ -324,7 +325,6 @@ CONFIG_CDROM_PKTCDVD=m
CONFIG_ATA_OVER_ETH=m
CONFIG_DUMMY_IRQ=m
CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=m
CONFIG_BLK_DEV_SR=y
@@ -480,6 +480,7 @@ CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
CONFIG_NFSD_V3=y
CONFIG_CIFS=m
+# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
CONFIG_CODA_FS=m
CONFIG_NLS_CODEPAGE_437=y
@@ -594,6 +595,7 @@ CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
+CONFIG_GLOB_SELFTEST=m
CONFIG_STRING_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
@@ -602,7 +604,6 @@ CONFIG_WW_MUTEX_SELFTEST=m
CONFIG_EARLY_PRINTK=y
CONFIG_KUNIT=m
CONFIG_KUNIT_ALL_TESTS=m
-CONFIG_TEST_LIST_SORT=m
CONFIG_TEST_MIN_HEAP=m
CONFIG_TEST_SORT=m
CONFIG_TEST_DIV64=m
@@ -614,6 +615,7 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 2c3f42833846..7620db3e33e7 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -77,6 +77,7 @@ CONFIG_IPV6_ILA=m
CONFIG_IPV6_VTI=m
CONFIG_IPV6_GRE=m
CONFIG_NETFILTER=y
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_ZONES=y
# CONFIG_NF_CONNTRACK_PROCFS is not set
@@ -451,6 +452,7 @@ CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
CONFIG_NFSD_V3=y
CONFIG_CIFS=m
+# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
CONFIG_CODA_FS=m
CONFIG_NLS_CODEPAGE_437=y
@@ -573,7 +575,6 @@ CONFIG_WW_MUTEX_SELFTEST=m
CONFIG_EARLY_PRINTK=y
CONFIG_KUNIT=m
CONFIG_KUNIT_ALL_TESTS=m
-CONFIG_TEST_LIST_SORT=m
CONFIG_TEST_MIN_HEAP=m
CONFIG_TEST_SORT=m
CONFIG_TEST_DIV64=m
@@ -585,6 +586,7 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 5b1898d4b249..113a02d47ebb 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -79,6 +79,7 @@ CONFIG_IPV6_ILA=m
CONFIG_IPV6_VTI=m
CONFIG_IPV6_GRE=m
CONFIG_NETFILTER=y
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_ZONES=y
# CONFIG_NF_CONNTRACK_PROCFS is not set
@@ -460,6 +461,7 @@ CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
CONFIG_NFSD_V3=y
CONFIG_CIFS=m
+# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
CONFIG_CODA_FS=m
CONFIG_NLS_CODEPAGE_437=y
@@ -582,7 +584,6 @@ CONFIG_WW_MUTEX_SELFTEST=m
CONFIG_EARLY_PRINTK=y
CONFIG_KUNIT=m
CONFIG_KUNIT_ALL_TESTS=m
-CONFIG_TEST_LIST_SORT=m
CONFIG_TEST_MIN_HEAP=m
CONFIG_TEST_SORT=m
CONFIG_TEST_DIV64=m
@@ -594,6 +595,7 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 9606ccd8dafa..a8e006e8da66 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -78,6 +78,7 @@ CONFIG_IPV6_ILA=m
CONFIG_IPV6_VTI=m
CONFIG_IPV6_GRE=m
CONFIG_NETFILTER=y
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_ZONES=y
# CONFIG_NF_CONNTRACK_PROCFS is not set
@@ -315,7 +316,6 @@ CONFIG_CDROM_PKTCDVD=m
CONFIG_ATA_OVER_ETH=m
CONFIG_DUMMY_IRQ=m
CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=m
CONFIG_BLK_DEV_SR=y
@@ -482,6 +482,7 @@ CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
CONFIG_NFSD_V3=y
CONFIG_CIFS=m
+# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
CONFIG_CODA_FS=m
CONFIG_NLS_CODEPAGE_437=y
@@ -596,6 +597,7 @@ CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
+CONFIG_GLOB_SELFTEST=m
CONFIG_STRING_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
@@ -604,7 +606,6 @@ CONFIG_WW_MUTEX_SELFTEST=m
CONFIG_EARLY_PRINTK=y
CONFIG_KUNIT=m
CONFIG_KUNIT_ALL_TESTS=m
-CONFIG_TEST_LIST_SORT=m
CONFIG_TEST_MIN_HEAP=m
CONFIG_TEST_SORT=m
CONFIG_TEST_DIV64=m
@@ -616,6 +617,7 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 3175ba5007e1..b6655907a1f3 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -98,6 +98,7 @@ CONFIG_IPV6_ILA=m
CONFIG_IPV6_VTI=m
CONFIG_IPV6_GRE=m
CONFIG_NETFILTER=y
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_ZONES=y
# CONFIG_NF_CONNTRACK_PROCFS is not set
@@ -344,7 +345,6 @@ CONFIG_CDROM_PKTCDVD=m
CONFIG_ATA_OVER_ETH=m
CONFIG_DUMMY_IRQ=m
CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=m
CONFIG_BLK_DEV_SR=y
@@ -567,6 +567,7 @@ CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
CONFIG_NFSD_V3=y
CONFIG_CIFS=m
+# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
CONFIG_CODA_FS=m
CONFIG_NLS_CODEPAGE_437=y
@@ -681,6 +682,7 @@ CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
+CONFIG_GLOB_SELFTEST=m
CONFIG_STRING_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
@@ -689,7 +691,6 @@ CONFIG_WW_MUTEX_SELFTEST=m
CONFIG_EARLY_PRINTK=y
CONFIG_KUNIT=m
CONFIG_KUNIT_ALL_TESTS=m
-CONFIG_TEST_LIST_SORT=m
CONFIG_TEST_MIN_HEAP=m
CONFIG_TEST_SORT=m
CONFIG_TEST_DIV64=m
@@ -701,6 +702,7 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 793085f00c99..563ba47db8c6 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -76,6 +76,7 @@ CONFIG_IPV6_ILA=m
CONFIG_IPV6_VTI=m
CONFIG_IPV6_GRE=m
CONFIG_NETFILTER=y
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_ZONES=y
# CONFIG_NF_CONNTRACK_PROCFS is not set
@@ -450,6 +451,7 @@ CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
CONFIG_NFSD_V3=y
CONFIG_CIFS=m
+# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
CONFIG_CODA_FS=m
CONFIG_NLS_CODEPAGE_437=y
@@ -572,7 +574,6 @@ CONFIG_WW_MUTEX_SELFTEST=m
CONFIG_EARLY_PRINTK=y
CONFIG_KUNIT=m
CONFIG_KUNIT_ALL_TESTS=m
-CONFIG_TEST_LIST_SORT=m
CONFIG_TEST_MIN_HEAP=m
CONFIG_TEST_SORT=m
CONFIG_TEST_DIV64=m
@@ -584,6 +585,7 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 56fbac7943b2..9f1b44de4706 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -77,6 +77,7 @@ CONFIG_IPV6_ILA=m
CONFIG_IPV6_VTI=m
CONFIG_IPV6_GRE=m
CONFIG_NETFILTER=y
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_ZONES=y
# CONFIG_NF_CONNTRACK_PROCFS is not set
@@ -451,6 +452,7 @@ CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
CONFIG_NFSD_V3=y
CONFIG_CIFS=m
+# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
CONFIG_CODA_FS=m
CONFIG_NLS_CODEPAGE_437=y
@@ -573,7 +575,6 @@ CONFIG_WW_MUTEX_SELFTEST=m
CONFIG_EARLY_PRINTK=y
CONFIG_KUNIT=m
CONFIG_KUNIT_ALL_TESTS=m
-CONFIG_TEST_LIST_SORT=m
CONFIG_TEST_MIN_HEAP=m
CONFIG_TEST_SORT=m
CONFIG_TEST_DIV64=m
@@ -585,6 +586,7 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 0e15431b65e2..1993433d0840 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -78,6 +78,7 @@ CONFIG_IPV6_ILA=m
CONFIG_IPV6_VTI=m
CONFIG_IPV6_GRE=m
CONFIG_NETFILTER=y
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_ZONES=y
# CONFIG_NF_CONNTRACK_PROCFS is not set
@@ -314,7 +315,6 @@ CONFIG_CDROM_PKTCDVD=m
CONFIG_ATA_OVER_ETH=m
CONFIG_DUMMY_IRQ=m
CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=m
CONFIG_BLK_DEV_SR=y
@@ -469,6 +469,7 @@ CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
CONFIG_NFSD_V3=y
CONFIG_CIFS=m
+# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
CONFIG_CODA_FS=m
CONFIG_NLS_CODEPAGE_437=y
@@ -583,6 +584,7 @@ CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
+CONFIG_GLOB_SELFTEST=m
CONFIG_STRING_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
@@ -591,7 +593,6 @@ CONFIG_WW_MUTEX_SELFTEST=m
CONFIG_EARLY_PRINTK=y
CONFIG_KUNIT=m
CONFIG_KUNIT_ALL_TESTS=m
-CONFIG_TEST_LIST_SORT=m
CONFIG_TEST_MIN_HEAP=m
CONFIG_TEST_SORT=m
CONFIG_TEST_DIV64=m
@@ -603,6 +604,7 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig
index d92306472fce..8898ae321779 100644
--- a/arch/m68k/configs/stmark2_defconfig
+++ b/arch/m68k/configs/stmark2_defconfig
@@ -22,7 +22,6 @@ CONFIG_RAMSIZE=0x8000000
CONFIG_VECTORBASE=0x40000000
CONFIG_KERNELBASE=0x40001000
# CONFIG_BLK_DEV_BSG is not set
-CONFIG_BLK_CMDLINE_PARSER=y
CONFIG_BINFMT_FLAT=y
CONFIG_BINFMT_ZFLAT=y
CONFIG_BINFMT_MISC=y
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 3490a05f29b8..56dbc63cef5b 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -74,6 +74,7 @@ CONFIG_IPV6_ILA=m
CONFIG_IPV6_VTI=m
CONFIG_IPV6_GRE=m
CONFIG_NETFILTER=y
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_ZONES=y
# CONFIG_NF_CONNTRACK_PROCFS is not set
@@ -453,6 +454,7 @@ CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
CONFIG_NFSD_V3=y
CONFIG_CIFS=m
+# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
CONFIG_CODA_FS=m
CONFIG_NLS_CODEPAGE_437=y
@@ -574,7 +576,6 @@ CONFIG_TEST_LOCKUP=m
CONFIG_WW_MUTEX_SELFTEST=m
CONFIG_KUNIT=m
CONFIG_KUNIT_ALL_TESTS=m
-CONFIG_TEST_LIST_SORT=m
CONFIG_TEST_MIN_HEAP=m
CONFIG_TEST_SORT=m
CONFIG_TEST_DIV64=m
@@ -586,6 +587,7 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 4e92c8c332fc..6bd1bba81ac3 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -74,6 +74,7 @@ CONFIG_IPV6_ILA=m
CONFIG_IPV6_VTI=m
CONFIG_IPV6_GRE=m
CONFIG_NETFILTER=y
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_ZONES=y
# CONFIG_NF_CONNTRACK_PROCFS is not set
@@ -452,6 +453,7 @@ CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
CONFIG_NFSD_V3=y
CONFIG_CIFS=m
+# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
CONFIG_CODA_FS=m
CONFIG_NLS_CODEPAGE_437=y
@@ -574,7 +576,6 @@ CONFIG_WW_MUTEX_SELFTEST=m
CONFIG_EARLY_PRINTK=y
CONFIG_KUNIT=m
CONFIG_KUNIT_ALL_TESTS=m
-CONFIG_TEST_LIST_SORT=m
CONFIG_TEST_MIN_HEAP=m
CONFIG_TEST_SORT=m
CONFIG_TEST_DIV64=m
@@ -586,6 +587,7 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
diff --git a/arch/m68k/emu/nfeth.c b/arch/m68k/emu/nfeth.c
index d2875e32abfc..79e55421cfb1 100644
--- a/arch/m68k/emu/nfeth.c
+++ b/arch/m68k/emu/nfeth.c
@@ -254,8 +254,8 @@ static void __exit nfeth_cleanup(void)
for (i = 0; i < MAX_UNIT; i++) {
if (nfeth_dev[i]) {
- unregister_netdev(nfeth_dev[0]);
- free_netdev(nfeth_dev[0]);
+ unregister_netdev(nfeth_dev[i]);
+ free_netdev(nfeth_dev[i]);
}
}
free_irq(nfEtherIRQ, nfeth_interrupt);
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 8637bf8a2f65..cfba83d230fd 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -48,7 +48,7 @@ static inline int arch_atomic_##op##_return(int i, atomic_t *v) \
" casl %2,%1,%0\n" \
" jne 1b" \
: "+m" (*v), "=&d" (t), "=&d" (tmp) \
- : "g" (i), "2" (arch_atomic_read(v))); \
+ : "di" (i), "2" (arch_atomic_read(v))); \
return t; \
}
@@ -63,7 +63,7 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
" casl %2,%1,%0\n" \
" jne 1b" \
: "+m" (*v), "=&d" (t), "=&d" (tmp) \
- : "g" (i), "2" (arch_atomic_read(v))); \
+ : "di" (i), "2" (arch_atomic_read(v))); \
return tmp; \
}
diff --git a/arch/mips/ath25/ar2315.c b/arch/mips/ath25/ar2315.c
index 9dbed7b5ea76..76e43a73ba1b 100644
--- a/arch/mips/ath25/ar2315.c
+++ b/arch/mips/ath25/ar2315.c
@@ -69,24 +69,24 @@ static void ar2315_misc_irq_handler(struct irq_desc *desc)
{
u32 pending = ar2315_rst_reg_read(AR2315_ISR) &
ar2315_rst_reg_read(AR2315_IMR);
- unsigned nr, misc_irq = 0;
+ unsigned nr;
+ int ret = 0;
if (pending) {
struct irq_domain *domain = irq_desc_get_handler_data(desc);
nr = __ffs(pending);
- misc_irq = irq_find_mapping(domain, nr);
- }
- if (misc_irq) {
if (nr == AR2315_MISC_IRQ_GPIO)
ar2315_rst_reg_write(AR2315_ISR, AR2315_ISR_GPIO);
else if (nr == AR2315_MISC_IRQ_WATCHDOG)
ar2315_rst_reg_write(AR2315_ISR, AR2315_ISR_WD);
- generic_handle_irq(misc_irq);
- } else {
- spurious_interrupt();
+
+ ret = generic_handle_domain_irq(domain, nr);
}
+
+ if (!pending || ret)
+ spurious_interrupt();
}
static void ar2315_misc_irq_unmask(struct irq_data *d)
diff --git a/arch/mips/ath25/ar5312.c b/arch/mips/ath25/ar5312.c
index 23c879f4b734..822b639dbd1e 100644
--- a/arch/mips/ath25/ar5312.c
+++ b/arch/mips/ath25/ar5312.c
@@ -73,22 +73,21 @@ static void ar5312_misc_irq_handler(struct irq_desc *desc)
{
u32 pending = ar5312_rst_reg_read(AR5312_ISR) &
ar5312_rst_reg_read(AR5312_IMR);
- unsigned nr, misc_irq = 0;
+ unsigned nr;
+ int ret = 0;
if (pending) {
struct irq_domain *domain = irq_desc_get_handler_data(desc);
nr = __ffs(pending);
- misc_irq = irq_find_mapping(domain, nr);
- }
- if (misc_irq) {
- generic_handle_irq(misc_irq);
+ ret = generic_handle_domain_irq(domain, nr);
if (nr == AR5312_MISC_IRQ_TIMER)
ar5312_rst_reg_read(AR5312_TIMER);
- } else {
- spurious_interrupt();
}
+
+ if (!pending || ret)
+ spurious_interrupt();
}
/* Enable the specified AR5312_MISC_IRQ interrupt */
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index 65975712a22d..53f015a1b0a7 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -9,20 +9,25 @@
#include <asm/page.h>
#include <asm/ptrace.h>
+typedef s32 __compat_uid_t;
+typedef s32 __compat_gid_t;
+typedef __compat_uid_t __compat_uid32_t;
+typedef __compat_gid_t __compat_gid32_t;
+#define __compat_uid32_t __compat_uid32_t
+#define __compat_gid32_t __compat_gid32_t
+
+#define _COMPAT_NSIG 128 /* Don't ask !$@#% ... */
+#define _COMPAT_NSIG_BPW 32
+typedef u32 compat_sigset_word;
+
#include <asm-generic/compat.h>
#define COMPAT_USER_HZ 100
#define COMPAT_UTS_MACHINE "mips\0\0\0"
-typedef s32 __compat_uid_t;
-typedef s32 __compat_gid_t;
-typedef __compat_uid_t __compat_uid32_t;
-typedef __compat_gid_t __compat_gid32_t;
-typedef u32 compat_mode_t;
typedef u32 compat_dev_t;
typedef u32 compat_nlink_t;
typedef s32 compat_ipc_pid_t;
-typedef s32 compat_caddr_t;
typedef struct {
s32 val[2];
} compat_fsid_t;
@@ -89,13 +94,6 @@ struct compat_statfs {
#define COMPAT_RLIM_INFINITY 0x7fffffffUL
-typedef u32 compat_old_sigset_t; /* at least 32 bits */
-
-#define _COMPAT_NSIG 128 /* Don't ask !$@#% ... */
-#define _COMPAT_NSIG_BPW 32
-
-typedef u32 compat_sigset_word;
-
#define COMPAT_OFF_T_MAX 0x7fffffff
static inline void __user *arch_compat_alloc_user_space(long len)
diff --git a/arch/mips/include/asm/mach-rc32434/rb.h b/arch/mips/include/asm/mach-rc32434/rb.h
index d502673a4f6c..34d179ca020b 100644
--- a/arch/mips/include/asm/mach-rc32434/rb.h
+++ b/arch/mips/include/asm/mach-rc32434/rb.h
@@ -7,8 +7,6 @@
#ifndef __ASM_RC32434_RB_H
#define __ASM_RC32434_RB_H
-#include <linux/genhd.h>
-
#define REGBASE 0x18000000
#define IDT434_REG_BASE ((volatile void *) KSEG1ADDR(REGBASE))
#define UART0BASE 0x58000
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index cdf404a831b2..1eaf6a1ca561 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -140,6 +140,8 @@
#define SO_NETNS_COOKIE 71
+#define SO_BUF_LOCK 72
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
index acfbdc01b0ac..b732495f138a 100644
--- a/arch/mips/lantiq/irq.c
+++ b/arch/mips/lantiq/irq.c
@@ -300,7 +300,7 @@ static void ltq_hw_irq_handler(struct irq_desc *desc)
*/
irq = __fls(irq);
hwirq = irq + MIPS_CPU_IRQ_CASCADE + (INT_NUM_IM_OFFSET * module);
- generic_handle_irq(irq_linear_revmap(ltq_domain, hwirq));
+ generic_handle_domain_irq(ltq_domain, hwirq);
/* if this is a EBU irq, we need to ack it or get a deadlock */
if (irq == LTQ_ICU_EBU_IRQ && !module && LTQ_EBU_PCC_ISTAT != 0)
diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c
index c1a655aee599..9a4bfb4e63e3 100644
--- a/arch/mips/pci/pci-ar2315.c
+++ b/arch/mips/pci/pci-ar2315.c
@@ -337,14 +337,12 @@ static void ar2315_pci_irq_handler(struct irq_desc *desc)
struct ar2315_pci_ctrl *apc = irq_desc_get_handler_data(desc);
u32 pending = ar2315_pci_reg_read(apc, AR2315_PCI_ISR) &
ar2315_pci_reg_read(apc, AR2315_PCI_IMR);
- unsigned pci_irq = 0;
+ int ret = 0;
if (pending)
- pci_irq = irq_find_mapping(apc->domain, __ffs(pending));
+ ret = generic_handle_domain_irq(apc->domain, __ffs(pending));
- if (pci_irq)
- generic_handle_irq(pci_irq);
- else
+ if (!pending || ret)
spurious_interrupt();
}
diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c
index c48e23cf5b5e..d3c947fa2969 100644
--- a/arch/mips/pci/pci-rt3883.c
+++ b/arch/mips/pci/pci-rt3883.c
@@ -140,10 +140,9 @@ static void rt3883_pci_irq_handler(struct irq_desc *desc)
}
while (pending) {
- unsigned irq, bit = __ffs(pending);
+ unsigned bit = __ffs(pending);
- irq = irq_find_mapping(rpc->irq_domain, bit);
- generic_handle_irq(irq);
+ generic_handle_domain_irq(rpc->irq_domain, bit);
pending &= ~BIT(bit);
}
diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c
index 220ca0cd7945..fa353bc13947 100644
--- a/arch/mips/ralink/irq.c
+++ b/arch/mips/ralink/irq.c
@@ -100,7 +100,7 @@ static void ralink_intc_irq_handler(struct irq_desc *desc)
if (pending) {
struct irq_domain *domain = irq_desc_get_handler_data(desc);
- generic_handle_irq(irq_find_mapping(domain, __ffs(pending)));
+ generic_handle_domain_irq(domain, __ffs(pending));
} else {
spurious_interrupt();
}
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 95c1bff1ab9f..a0dd3bd2b81b 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -190,7 +190,7 @@ static void ip27_do_irq_mask0(struct irq_desc *desc)
unsigned long *mask = per_cpu(irq_enable_mask, cpu);
struct irq_domain *domain;
u64 pend0;
- int irq;
+ int ret;
/* copied from Irix intpend0() */
pend0 = LOCAL_HUB_L(PI_INT_PEND0);
@@ -216,10 +216,8 @@ static void ip27_do_irq_mask0(struct irq_desc *desc)
#endif
{
domain = irq_desc_get_handler_data(desc);
- irq = irq_linear_revmap(domain, __ffs(pend0));
- if (irq)
- generic_handle_irq(irq);
- else
+ ret = generic_handle_domain_irq(domain, __ffs(pend0));
+ if (ret)
spurious_interrupt();
}
@@ -232,7 +230,7 @@ static void ip27_do_irq_mask1(struct irq_desc *desc)
unsigned long *mask = per_cpu(irq_enable_mask, cpu);
struct irq_domain *domain;
u64 pend1;
- int irq;
+ int ret;
/* copied from Irix intpend0() */
pend1 = LOCAL_HUB_L(PI_INT_PEND1);
@@ -242,10 +240,8 @@ static void ip27_do_irq_mask1(struct irq_desc *desc)
return;
domain = irq_desc_get_handler_data(desc);
- irq = irq_linear_revmap(domain, __ffs(pend1) + 64);
- if (irq)
- generic_handle_irq(irq);
- else
+ ret = generic_handle_domain_irq(domain, __ffs(pend1) + 64);
+ if (ret)
spurious_interrupt();
LOCAL_HUB_L(PI_INT_PEND1);
diff --git a/arch/mips/sgi-ip30/ip30-irq.c b/arch/mips/sgi-ip30/ip30-irq.c
index ba87704073c8..423c32cb66ed 100644
--- a/arch/mips/sgi-ip30/ip30-irq.c
+++ b/arch/mips/sgi-ip30/ip30-irq.c
@@ -99,7 +99,7 @@ static void ip30_normal_irq(struct irq_desc *desc)
int cpu = smp_processor_id();
struct irq_domain *domain;
u64 pend, mask;
- int irq;
+ int ret;
pend = heart_read(&heart_regs->isr);
mask = (heart_read(&heart_regs->imr[cpu]) &
@@ -130,10 +130,8 @@ static void ip30_normal_irq(struct irq_desc *desc)
#endif
{
domain = irq_desc_get_handler_data(desc);
- irq = irq_linear_revmap(domain, __ffs(pend));
- if (irq)
- generic_handle_irq(irq);
- else
+ ret = generic_handle_domain_irq(domain, __ffs(pend));
+ if (ret)
spurious_interrupt();
}
}
diff --git a/arch/nios2/kernel/irq.c b/arch/nios2/kernel/irq.c
index c6a1a9f6ac42..6b7890e5f7af 100644
--- a/arch/nios2/kernel/irq.c
+++ b/arch/nios2/kernel/irq.c
@@ -19,11 +19,9 @@ static u32 ienable;
asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
{
struct pt_regs *oldregs = set_irq_regs(regs);
- int irq;
irq_enter();
- irq = irq_find_mapping(NULL, hwirq);
- generic_handle_irq(irq);
+ generic_handle_domain_irq(NULL, hwirq);
irq_exit();
set_irq_regs(oldregs);
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index 1a609d38f667..b5d90e82b65d 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -8,6 +8,9 @@
#include <linux/sched.h>
#include <linux/thread_info.h>
+#define compat_mode_t compat_mode_t
+typedef u16 compat_mode_t;
+
#include <asm-generic/compat.h>
#define COMPAT_USER_HZ 100
@@ -15,13 +18,9 @@
typedef u32 __compat_uid_t;
typedef u32 __compat_gid_t;
-typedef u32 __compat_uid32_t;
-typedef u32 __compat_gid32_t;
-typedef u16 compat_mode_t;
typedef u32 compat_dev_t;
typedef u16 compat_nlink_t;
typedef u16 compat_ipc_pid_t;
-typedef u32 compat_caddr_t;
struct compat_stat {
compat_dev_t st_dev; /* dev_t is 32 bits on parisc */
@@ -96,13 +95,6 @@ struct compat_sigcontext {
#define COMPAT_RLIM_INFINITY 0xffffffff
-typedef u32 compat_old_sigset_t; /* at least 32 bits */
-
-#define _COMPAT_NSIG 64
-#define _COMPAT_NSIG_BPW 32
-
-typedef u32 compat_sigset_word;
-
#define COMPAT_OFF_T_MAX 0x7fffffff
struct compat_ipc64_perm {
diff --git a/arch/parisc/include/asm/string.h b/arch/parisc/include/asm/string.h
index 4a0c9dbd62fd..f6e1132f4e35 100644
--- a/arch/parisc/include/asm/string.h
+++ b/arch/parisc/include/asm/string.h
@@ -8,19 +8,4 @@ extern void * memset(void *, int, size_t);
#define __HAVE_ARCH_MEMCPY
void * memcpy(void * dest,const void *src,size_t count);
-#define __HAVE_ARCH_STRLEN
-extern size_t strlen(const char *s);
-
-#define __HAVE_ARCH_STRCPY
-extern char *strcpy(char *dest, const char *src);
-
-#define __HAVE_ARCH_STRNCPY
-extern char *strncpy(char *dest, const char *src, size_t count);
-
-#define __HAVE_ARCH_STRCAT
-extern char *strcat(char *dest, const char *src);
-
-#define __HAVE_ARCH_MEMSET
-extern void *memset(void *, int, size_t);
-
#endif
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 5b5351cdcb33..8baaad52d799 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -121,6 +121,8 @@
#define SO_NETNS_COOKIE 0x4045
+#define SO_BUF_LOCK 0x4046
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index 8ed409ecec93..e8a6a751dfd8 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -17,10 +17,6 @@
#include <linux/string.h>
EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(strlen);
-EXPORT_SYMBOL(strcpy);
-EXPORT_SYMBOL(strncpy);
-EXPORT_SYMBOL(strcat);
#include <linux/atomic.h>
EXPORT_SYMBOL(__xchg8);
diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile
index 2d7a9974dbae..7b197667faf6 100644
--- a/arch/parisc/lib/Makefile
+++ b/arch/parisc/lib/Makefile
@@ -3,7 +3,7 @@
# Makefile for parisc-specific library files
#
-lib-y := lusercopy.o bitops.o checksum.o io.o memcpy.o \
- ucmpdi2.o delay.o string.o
+lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \
+ ucmpdi2.o delay.o
obj-y := iomap.o
diff --git a/arch/parisc/lib/memset.c b/arch/parisc/lib/memset.c
new file mode 100644
index 000000000000..133e4809859a
--- /dev/null
+++ b/arch/parisc/lib/memset.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <linux/types.h>
+#include <asm/string.h>
+
+#define OPSIZ (BITS_PER_LONG/8)
+typedef unsigned long op_t;
+
+void *
+memset (void *dstpp, int sc, size_t len)
+{
+ unsigned int c = sc;
+ long int dstp = (long int) dstpp;
+
+ if (len >= 8)
+ {
+ size_t xlen;
+ op_t cccc;
+
+ cccc = (unsigned char) c;
+ cccc |= cccc << 8;
+ cccc |= cccc << 16;
+ if (OPSIZ > 4)
+ /* Do the shift in two steps to avoid warning if long has 32 bits. */
+ cccc |= (cccc << 16) << 16;
+
+ /* There are at least some bytes to set.
+ No need to test for LEN == 0 in this alignment loop. */
+ while (dstp % OPSIZ != 0)
+ {
+ ((unsigned char *) dstp)[0] = c;
+ dstp += 1;
+ len -= 1;
+ }
+
+ /* Write 8 `op_t' per iteration until less than 8 `op_t' remain. */
+ xlen = len / (OPSIZ * 8);
+ while (xlen > 0)
+ {
+ ((op_t *) dstp)[0] = cccc;
+ ((op_t *) dstp)[1] = cccc;
+ ((op_t *) dstp)[2] = cccc;
+ ((op_t *) dstp)[3] = cccc;
+ ((op_t *) dstp)[4] = cccc;
+ ((op_t *) dstp)[5] = cccc;
+ ((op_t *) dstp)[6] = cccc;
+ ((op_t *) dstp)[7] = cccc;
+ dstp += 8 * OPSIZ;
+ xlen -= 1;
+ }
+ len %= OPSIZ * 8;
+
+ /* Write 1 `op_t' per iteration until less than OPSIZ bytes remain. */
+ xlen = len / OPSIZ;
+ while (xlen > 0)
+ {
+ ((op_t *) dstp)[0] = cccc;
+ dstp += OPSIZ;
+ xlen -= 1;
+ }
+ len %= OPSIZ;
+ }
+
+ /* Write the last few bytes. */
+ while (len > 0)
+ {
+ ((unsigned char *) dstp)[0] = c;
+ dstp += 1;
+ len -= 1;
+ }
+
+ return dstpp;
+}
diff --git a/arch/parisc/lib/string.S b/arch/parisc/lib/string.S
deleted file mode 100644
index 4a64264427a6..000000000000
--- a/arch/parisc/lib/string.S
+++ /dev/null
@@ -1,136 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * PA-RISC assembly string functions
- *
- * Copyright (C) 2019 Helge Deller <deller@gmx.de>
- */
-
-#include <asm/assembly.h>
-#include <linux/linkage.h>
-
- .section .text.hot
- .level PA_ASM_LEVEL
-
- t0 = r20
- t1 = r21
- t2 = r22
-
-ENTRY_CFI(strlen, frame=0,no_calls)
- or,COND(<>) arg0,r0,ret0
- b,l,n .Lstrlen_null_ptr,r0
- depwi 0,31,2,ret0
- cmpb,COND(<>) arg0,ret0,.Lstrlen_not_aligned
- ldw,ma 4(ret0),t0
- cmpib,tr 0,r0,.Lstrlen_loop
- uxor,nbz r0,t0,r0
-.Lstrlen_not_aligned:
- uaddcm arg0,ret0,t1
- shladd t1,3,r0,t1
- mtsar t1
- depwi -1,%sar,32,t0
- uxor,nbz r0,t0,r0
-.Lstrlen_loop:
- b,l,n .Lstrlen_end_loop,r0
- ldw,ma 4(ret0),t0
- cmpib,tr 0,r0,.Lstrlen_loop
- uxor,nbz r0,t0,r0
-.Lstrlen_end_loop:
- extrw,u,<> t0,7,8,r0
- addib,tr,n -3,ret0,.Lstrlen_out
- extrw,u,<> t0,15,8,r0
- addib,tr,n -2,ret0,.Lstrlen_out
- extrw,u,<> t0,23,8,r0
- addi -1,ret0,ret0
-.Lstrlen_out:
- bv r0(rp)
- uaddcm ret0,arg0,ret0
-.Lstrlen_null_ptr:
- bv,n r0(rp)
-ENDPROC_CFI(strlen)
-
-
-ENTRY_CFI(strcpy, frame=0,no_calls)
- ldb 0(arg1),t0
- stb t0,0(arg0)
- ldo 0(arg0),ret0
- ldo 1(arg1),t1
- cmpb,= r0,t0,2f
- ldo 1(arg0),t2
-1: ldb 0(t1),arg1
- stb arg1,0(t2)
- ldo 1(t1),t1
- cmpb,<> r0,arg1,1b
- ldo 1(t2),t2
-2: bv,n r0(rp)
-ENDPROC_CFI(strcpy)
-
-
-ENTRY_CFI(strncpy, frame=0,no_calls)
- ldb 0(arg1),t0
- stb t0,0(arg0)
- ldo 1(arg1),t1
- ldo 0(arg0),ret0
- cmpb,= r0,t0,2f
- ldo 1(arg0),arg1
-1: ldo -1(arg2),arg2
- cmpb,COND(=),n r0,arg2,2f
- ldb 0(t1),arg0
- stb arg0,0(arg1)
- ldo 1(t1),t1
- cmpb,<> r0,arg0,1b
- ldo 1(arg1),arg1
-2: bv,n r0(rp)
-ENDPROC_CFI(strncpy)
-
-
-ENTRY_CFI(strcat, frame=0,no_calls)
- ldb 0(arg0),t0
- cmpb,= t0,r0,2f
- ldo 0(arg0),ret0
- ldo 1(arg0),arg0
-1: ldb 0(arg0),t1
- cmpb,<>,n r0,t1,1b
- ldo 1(arg0),arg0
-2: ldb 0(arg1),t2
- stb t2,0(arg0)
- ldo 1(arg0),arg0
- ldb 0(arg1),t0
- cmpb,<> r0,t0,2b
- ldo 1(arg1),arg1
- bv,n r0(rp)
-ENDPROC_CFI(strcat)
-
-
-ENTRY_CFI(memset, frame=0,no_calls)
- copy arg0,ret0
- cmpb,COND(=) r0,arg0,4f
- copy arg0,t2
- cmpb,COND(=) r0,arg2,4f
- ldo -1(arg2),arg3
- subi -1,arg3,t0
- subi 0,t0,t1
- cmpiclr,COND(>=) 0,t1,arg2
- ldo -1(t1),arg2
- extru arg2,31,2,arg0
-2: stb arg1,0(t2)
- ldo 1(t2),t2
- addib,>= -1,arg0,2b
- ldo -1(arg3),arg3
- cmpiclr,COND(<=) 4,arg2,r0
- b,l,n 4f,r0
-#ifdef CONFIG_64BIT
- depd,* r0,63,2,arg2
-#else
- depw r0,31,2,arg2
-#endif
- ldo 1(t2),t2
-3: stb arg1,-1(t2)
- stb arg1,0(t2)
- stb arg1,1(t2)
- stb arg1,2(t2)
- addib,COND(>) -4,arg2,3b
- ldo 4(t2),t2
-4: bv,n r0(rp)
-ENDPROC_CFI(memset)
-
- .end
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 64201125a287..d4b145b279f6 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -4,6 +4,8 @@
#include <asm/bug.h>
#include <asm/book3s/32/mmu-hash.h>
+#include <asm/mmu.h>
+#include <asm/synch.h>
#ifndef __ASSEMBLY__
@@ -28,6 +30,15 @@ static inline void kuep_lock(void)
return;
update_user_segments(mfsr(0) | SR_NX);
+ /*
+ * This isync() shouldn't be necessary as the kernel is not excepted to
+ * run any instruction in userspace soon after the update of segments,
+ * but hash based cores (at least G3) seem to exhibit a random
+ * behaviour when the 'isync' is not there. 603 cores don't have this
+ * behaviour so don't do the 'isync' as it saves several CPU cycles.
+ */
+ if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ isync(); /* Context sync required after mtsr() */
}
static inline void kuep_unlock(void)
@@ -36,6 +47,15 @@ static inline void kuep_unlock(void)
return;
update_user_segments(mfsr(0) & ~SR_NX);
+ /*
+ * This isync() shouldn't be necessary as a 'rfi' will soon be executed
+ * to return to userspace, but hash based cores (at least G3) seem to
+ * exhibit a random behaviour when the 'isync' is not there. 603 cores
+ * don't have this behaviour so don't do the 'isync' as it saves several
+ * CPU cycles.
+ */
+ if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ isync(); /* Context sync required after mtsr() */
}
#ifdef CONFIG_PPC_KUAP
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 9191fc29e6ed..e33dcf134cdd 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -19,13 +19,9 @@
typedef u32 __compat_uid_t;
typedef u32 __compat_gid_t;
-typedef u32 __compat_uid32_t;
-typedef u32 __compat_gid32_t;
-typedef u32 compat_mode_t;
typedef u32 compat_dev_t;
typedef s16 compat_nlink_t;
typedef u16 compat_ipc_pid_t;
-typedef u32 compat_caddr_t;
typedef __kernel_fsid_t compat_fsid_t;
struct compat_stat {
@@ -85,13 +81,6 @@ struct compat_statfs {
#define COMPAT_RLIM_INFINITY 0xffffffff
-typedef u32 compat_old_sigset_t;
-
-#define _COMPAT_NSIG 64
-#define _COMPAT_NSIG_BPW 32
-
-typedef u32 compat_sigset_word;
-
#define COMPAT_OFF_T_MAX 0x7fffffff
static inline void __user *arch_compat_alloc_user_space(long len)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index d4bdf7d274ac..6b800d3e2681 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -583,6 +583,9 @@ DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode);
DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException);
+/* irq.c */
+DECLARE_INTERRUPT_HANDLER_ASYNC(do_IRQ);
+
void __noreturn unrecoverable_exception(struct pt_regs *regs);
void replay_system_reset(void);
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 4982f3711fc3..2b3278534bc1 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -52,7 +52,7 @@ extern void *mcheckirq_ctx[NR_CPUS];
extern void *hardirq_ctx[NR_CPUS];
extern void *softirq_ctx[NR_CPUS];
-extern void do_IRQ(struct pt_regs *regs);
+void __do_IRQ(struct pt_regs *regs);
extern void __init init_IRQ(void);
extern void __do_irq(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 3e5d470a6155..14422e851494 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -70,6 +70,22 @@ struct pt_regs
unsigned long __pad[4]; /* Maintain 16 byte interrupt stack alignment */
};
#endif
+#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE)
+ struct { /* Must be a multiple of 16 bytes */
+ unsigned long mas0;
+ unsigned long mas1;
+ unsigned long mas2;
+ unsigned long mas3;
+ unsigned long mas6;
+ unsigned long mas7;
+ unsigned long srr0;
+ unsigned long srr1;
+ unsigned long csrr0;
+ unsigned long csrr1;
+ unsigned long dsrr0;
+ unsigned long dsrr1;
+ };
+#endif
};
#endif
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index a47eefa09bcb..5bee245d832b 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -309,24 +309,21 @@ int main(void)
STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr);
#endif
-#if defined(CONFIG_PPC32)
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
- DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE);
- DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
+#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE)
+ STACK_PT_REGS_OFFSET(MAS0, mas0);
/* we overload MMUCR for 44x on MAS0 since they are mutually exclusive */
- DEFINE(MMUCR, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
- DEFINE(MAS1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas1));
- DEFINE(MAS2, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas2));
- DEFINE(MAS3, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas3));
- DEFINE(MAS6, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas6));
- DEFINE(MAS7, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas7));
- DEFINE(_SRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr0));
- DEFINE(_SRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr1));
- DEFINE(_CSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr0));
- DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1));
- DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0));
- DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1));
-#endif
+ STACK_PT_REGS_OFFSET(MMUCR, mas0);
+ STACK_PT_REGS_OFFSET(MAS1, mas1);
+ STACK_PT_REGS_OFFSET(MAS2, mas2);
+ STACK_PT_REGS_OFFSET(MAS3, mas3);
+ STACK_PT_REGS_OFFSET(MAS6, mas6);
+ STACK_PT_REGS_OFFSET(MAS7, mas7);
+ STACK_PT_REGS_OFFSET(_SRR0, srr0);
+ STACK_PT_REGS_OFFSET(_SRR1, srr1);
+ STACK_PT_REGS_OFFSET(_CSRR0, csrr0);
+ STACK_PT_REGS_OFFSET(_CSRR1, csrr1);
+ STACK_PT_REGS_OFFSET(_DSRR0, dsrr0);
+ STACK_PT_REGS_OFFSET(_DSRR1, dsrr1);
#endif
/* About the CPU features table */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 4aec59a77d4c..37859e62a8dc 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -812,7 +812,6 @@ __start_interrupts:
* syscall register convention is in Documentation/powerpc/syscall64-abi.rst
*/
EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000)
-1:
/* SCV 0 */
mr r9,r13
GET_PACA(r13)
@@ -842,10 +841,12 @@ EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000)
b system_call_vectored_sigill
#endif
.endr
-2:
EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000)
-SOFT_MASK_TABLE(1b, 2b) // Treat scv vectors as soft-masked, see comment above.
+// Treat scv vectors as soft-masked, see comment above.
+// Use absolute values rather than labels here, so they don't get relocated,
+// because this code runs unrelocated.
+SOFT_MASK_TABLE(0xc000000000003000, 0xc000000000004000)
#ifdef CONFIG_RELOCATABLE
TRAMP_VIRT_BEGIN(system_call_vectored_tramp)
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 764edd860ed4..68e5c0a7e99d 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -300,7 +300,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
EXCEPTION_PROLOG_1
EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataAccess handle_dar_dsisr=1
prepare_transfer_to_handler
- lwz r5, _DSISR(r11)
+ lwz r5, _DSISR(r1)
andis. r0, r5, DSISR_DABRMATCH@h
bne- 1f
bl do_page_fault
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 87b806e8eded..e5503420b6c6 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -168,20 +168,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
/* only on e500mc */
#define DBG_STACK_BASE dbgirq_ctx
-#define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE)
-
#ifdef CONFIG_SMP
#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \
mfspr r8,SPRN_PIR; \
slwi r8,r8,2; \
addis r8,r8,level##_STACK_BASE@ha; \
lwz r8,level##_STACK_BASE@l(r8); \
- addi r8,r8,EXC_LVL_FRAME_OVERHEAD;
+ addi r8,r8,THREAD_SIZE - INT_FRAME_SIZE;
#else
#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \
lis r8,level##_STACK_BASE@ha; \
lwz r8,level##_STACK_BASE@l(r8); \
- addi r8,r8,EXC_LVL_FRAME_OVERHEAD;
+ addi r8,r8,THREAD_SIZE - INT_FRAME_SIZE;
#endif
/*
@@ -208,7 +206,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
mtmsr r11; \
mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
lwz r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\
- addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\
+ addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE; /* allocate stack frame */\
beq 1f; \
/* COMING FROM USER MODE */ \
stw r9,_CCR(r11); /* save CR */\
@@ -516,24 +514,5 @@ label:
bl kernel_fp_unavailable_exception; \
b interrupt_return
-#else /* __ASSEMBLY__ */
-struct exception_regs {
- unsigned long mas0;
- unsigned long mas1;
- unsigned long mas2;
- unsigned long mas3;
- unsigned long mas6;
- unsigned long mas7;
- unsigned long srr0;
- unsigned long srr1;
- unsigned long csrr0;
- unsigned long csrr1;
- unsigned long dsrr0;
- unsigned long dsrr1;
-};
-
-/* ensure this structure is always sized to a multiple of the stack alignment */
-#define STACK_EXC_LVL_FRAME_SIZE ALIGN(sizeof (struct exception_regs), 16)
-
#endif /* __ASSEMBLY__ */
#endif /* __HEAD_BOOKE_H__ */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 91e63eac4e8f..551b653228c4 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -750,7 +750,7 @@ void __do_irq(struct pt_regs *regs)
trace_irq_exit(regs);
}
-DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ)
+void __do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
void *cursp, *irqsp, *sirqsp;
@@ -774,6 +774,11 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ)
set_irq_regs(old_regs);
}
+DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ)
+{
+ __do_IRQ(regs);
+}
+
static void *__init alloc_vm_stack(void)
{
return __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, THREADINFO_GFP,
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index cbc28d1a2e1b..7a7cd6bda53e 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -292,7 +292,8 @@ int kprobe_handler(struct pt_regs *regs)
if (user_mode(regs))
return 0;
- if (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR))
+ if (!IS_ENABLED(CONFIG_BOOKE) &&
+ (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR)))
return 0;
/*
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 5ff0e55d0db1..defecb3b1b15 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -1167,7 +1167,7 @@ static int __init topology_init(void)
* CPU. For instance, the boot cpu might never be valid
* for hotplugging.
*/
- if (smp_ops->cpu_offline_self)
+ if (smp_ops && smp_ops->cpu_offline_self)
c->hotpluggable = 1;
#endif
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e45ce427bffb..c487ba5a6e11 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -586,7 +586,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
if (atomic_read(&ppc_n_lost_interrupts) != 0)
- do_IRQ(regs);
+ __do_IRQ(regs);
#endif
old_regs = set_irq_regs(regs);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index dfbce527c98e..d56254f05e17 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1104,7 +1104,7 @@ DEFINE_INTERRUPT_HANDLER(RunModeException)
_exception(SIGTRAP, regs, TRAP_UNK, 0);
}
-DEFINE_INTERRUPT_HANDLER(single_step_exception)
+static void __single_step_exception(struct pt_regs *regs)
{
clear_single_step(regs);
clear_br_trace(regs);
@@ -1121,6 +1121,11 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception)
_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
}
+DEFINE_INTERRUPT_HANDLER(single_step_exception)
+{
+ __single_step_exception(regs);
+}
+
/*
* After we have successfully emulated an instruction, we have to
* check if the instruction was being single-stepped, and if so,
@@ -1130,7 +1135,7 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception)
static void emulate_single_step(struct pt_regs *regs)
{
if (single_stepping(regs))
- single_step_exception(regs);
+ __single_step_exception(regs);
}
static inline int __parse_fpscr(unsigned long fpscr)
diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c
index 0876216ceee6..edea388e9d3f 100644
--- a/arch/powerpc/mm/pageattr.c
+++ b/arch/powerpc/mm/pageattr.c
@@ -18,16 +18,12 @@
/*
* Updates the attributes of a page in three steps:
*
- * 1. invalidate the page table entry
- * 2. flush the TLB
- * 3. install the new entry with the updated attributes
- *
- * Invalidating the pte means there are situations where this will not work
- * when in theory it should.
- * For example:
- * - removing write from page whilst it is being executed
- * - setting a page read-only whilst it is being read by another CPU
+ * 1. take the page_table_lock
+ * 2. install the new entry with the updated attributes
+ * 3. flush the TLB
*
+ * This sequence is safe against concurrent updates, and also allows updating the
+ * attributes of a page currently being executed or accessed.
*/
static int change_page_attr(pte_t *ptep, unsigned long addr, void *data)
{
@@ -36,9 +32,7 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data)
spin_lock(&init_mm.page_table_lock);
- /* invalidate the PTE so it's safe to modify */
- pte = ptep_get_and_clear(&init_mm, addr, ptep);
- flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+ pte = ptep_get(ptep);
/* modify the PTE bits as desired, then apply */
switch (action) {
@@ -59,11 +53,14 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data)
break;
}
- set_pte_at(&init_mm, addr, ptep, pte);
+ pte_update(&init_mm, addr, ptep, ~0UL, pte_val(pte), 0);
/* See ptesync comment in radix__set_pte_at() */
if (radix_enabled())
asm volatile("ptesync": : :"memory");
+
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
spin_unlock(&init_mm.page_table_lock);
return 0;
diff --git a/arch/powerpc/platforms/4xx/uic.c b/arch/powerpc/platforms/4xx/uic.c
index 36fb66ce54cf..89e2587b1a59 100644
--- a/arch/powerpc/platforms/4xx/uic.c
+++ b/arch/powerpc/platforms/4xx/uic.c
@@ -198,7 +198,6 @@ static void uic_irq_cascade(struct irq_desc *desc)
struct uic *uic = irq_desc_get_handler_data(desc);
u32 msr;
int src;
- int subvirq;
raw_spin_lock(&desc->lock);
if (irqd_is_level_type(idata))
@@ -213,8 +212,7 @@ static void uic_irq_cascade(struct irq_desc *desc)
src = 32 - ffs(msr);
- subvirq = irq_linear_revmap(uic->irqhost, src);
- generic_handle_irq(subvirq);
+ generic_handle_domain_irq(uic->irqhost, src);
uic_irq_ret:
raw_spin_lock(&desc->lock);
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
index b2981634f1f8..ea46870e5d6e 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -81,11 +81,10 @@ static struct irq_chip cpld_pic = {
.irq_unmask = cpld_unmask_irq,
};
-static int
+static unsigned int
cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp,
u8 __iomem *maskp)
{
- int cpld_irq;
u8 status = in_8(statusp);
u8 mask = in_8(maskp);
@@ -93,28 +92,26 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp,
status |= (ignore | mask);
if (status == 0xff)
- return 0;
-
- cpld_irq = ffz(status) + offset;
+ return ~0;
- return irq_linear_revmap(cpld_pic_host, cpld_irq);
+ return ffz(status) + offset;
}
static void cpld_pic_cascade(struct irq_desc *desc)
{
- unsigned int irq;
+ unsigned int hwirq;
- irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status,
+ hwirq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status,
&cpld_regs->pci_mask);
- if (irq) {
- generic_handle_irq(irq);
+ if (hwirq != ~0) {
+ generic_handle_domain_irq(cpld_pic_host, hwirq);
return;
}
- irq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status,
+ hwirq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status,
&cpld_regs->misc_mask);
- if (irq) {
- generic_handle_irq(irq);
+ if (hwirq != ~0) {
+ generic_handle_domain_irq(cpld_pic_host, hwirq);
return;
}
}
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
index efb8bdecbcc7..110c444f4bc7 100644
--- a/arch/powerpc/platforms/52xx/media5200.c
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -78,7 +78,7 @@ static struct irq_chip media5200_irq_chip = {
static void media5200_irq_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
- int sub_virq, val;
+ int val;
u32 status, enable;
/* Mask off the cascaded IRQ */
@@ -92,11 +92,10 @@ static void media5200_irq_cascade(struct irq_desc *desc)
enable = in_be32(media5200_irq.regs + MEDIA5200_IRQ_STATUS);
val = ffs((status & enable) >> MEDIA5200_IRQ_SHIFT);
if (val) {
- sub_virq = irq_linear_revmap(media5200_irq.irqhost, val - 1);
- /* pr_debug("%s: virq=%i s=%.8x e=%.8x hwirq=%i subvirq=%i\n",
- * __func__, virq, status, enable, val - 1, sub_virq);
+ generic_handle_domain_irq(media5200_irq.irqhost, val - 1);
+ /* pr_debug("%s: virq=%i s=%.8x e=%.8x hwirq=%i\n",
+ * __func__, virq, status, enable, val - 1);
*/
- generic_handle_irq(sub_virq);
}
/* Processing done; can reenable the cascade now */
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 3823df235f25..f862b48b4824 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -190,14 +190,11 @@ static struct irq_chip mpc52xx_gpt_irq_chip = {
static void mpc52xx_gpt_irq_cascade(struct irq_desc *desc)
{
struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc);
- int sub_virq;
u32 status;
status = in_be32(&gpt->regs->status) & MPC52xx_GPT_STATUS_IRQMASK;
- if (status) {
- sub_virq = irq_linear_revmap(gpt->irqhost, 0);
- generic_handle_irq(sub_virq);
- }
+ if (status)
+ generic_handle_domain_irq(gpt->irqhost, 0);
}
static int mpc52xx_gpt_irq_map(struct irq_domain *h, unsigned int virq,
diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
index f82f75a6085c..285bfe19b798 100644
--- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
+++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
@@ -91,10 +91,8 @@ static void pq2ads_pci_irq_demux(struct irq_desc *desc)
break;
for (bit = 0; pend != 0; ++bit, pend <<= 1) {
- if (pend & 0x80000000) {
- int virq = irq_linear_revmap(priv->host, bit);
- generic_handle_irq(virq);
- }
+ if (pend & 0x80000000)
+ generic_handle_domain_irq(priv->host, bit);
}
}
}
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 6794145603de..a208997ade88 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -98,7 +98,7 @@ config PPC_BOOK3S_64
select PPC_HAVE_PMU_SUPPORT
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
- select ARCH_ENABLE_PMD_SPLIT_PTLOCK
+ select ARCH_ENABLE_SPLIT_PMD_PTLOCK
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_SUPPORTS_HUGETLBFS
select ARCH_SUPPORTS_NUMA_BALANCING
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index c0ab62ba6f16..0873a7a20271 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -106,13 +106,9 @@ static void iic_ioexc_cascade(struct irq_desc *desc)
out_be64(&node_iic->iic_is, ack);
/* handle them */
for (cascade = 63; cascade >= 0; cascade--)
- if (bits & (0x8000000000000000UL >> cascade)) {
- unsigned int cirq =
- irq_linear_revmap(iic_host,
+ if (bits & (0x8000000000000000UL >> cascade))
+ generic_handle_domain_irq(iic_host,
base | cascade);
- if (cirq)
- generic_handle_irq(cirq);
- }
/* post-ack level interrupts */
ack = bits & ~IIC_ISR_EDGE_MASK;
if (ack)
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index 210785f59271..8af75867cb42 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -190,16 +190,11 @@ static void spider_irq_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
struct spider_pic *pic = irq_desc_get_handler_data(desc);
- unsigned int cs, virq;
+ unsigned int cs;
cs = in_be32(pic->regs + TIR_CS) >> 24;
- if (cs == SPIDER_IRQ_INVALID)
- virq = 0;
- else
- virq = irq_linear_revmap(pic->host, cs);
-
- if (virq)
- generic_handle_irq(virq);
+ if (cs != SPIDER_IRQ_INVALID)
+ generic_handle_domain_irq(pic->host, cs);
chip->irq_eoi(&desc->irq_data);
}
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index a1b7f79a8a15..15396333a90b 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -108,7 +108,6 @@ static const struct irq_domain_ops hlwd_irq_domain_ops = {
static unsigned int __hlwd_pic_get_irq(struct irq_domain *h)
{
void __iomem *io_base = h->host_data;
- int irq;
u32 irq_status;
irq_status = in_be32(io_base + HW_BROADWAY_ICR) &
@@ -116,23 +115,22 @@ static unsigned int __hlwd_pic_get_irq(struct irq_domain *h)
if (irq_status == 0)
return 0; /* no more IRQs pending */
- irq = __ffs(irq_status);
- return irq_linear_revmap(h, irq);
+ return __ffs(irq_status);
}
static void hlwd_pic_irq_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
struct irq_domain *irq_domain = irq_desc_get_handler_data(desc);
- unsigned int virq;
+ unsigned int hwirq;
raw_spin_lock(&desc->lock);
chip->irq_mask(&desc->irq_data); /* IRQ_LEVEL */
raw_spin_unlock(&desc->lock);
- virq = __hlwd_pic_get_irq(irq_domain);
- if (virq)
- generic_handle_irq(virq);
+ hwirq = __hlwd_pic_get_irq(irq_domain);
+ if (hwirq)
+ generic_handle_domain_irq(irq_domain, hwirq);
else
pr_err("spurious interrupt!\n");
@@ -190,7 +188,8 @@ static struct irq_domain *hlwd_pic_init(struct device_node *np)
unsigned int hlwd_pic_get_irq(void)
{
- return __hlwd_pic_get_irq(hlwd_irq_host);
+ unsigned int hwirq = __hlwd_pic_get_irq(hlwd_irq_host);
+ return hwirq ? irq_linear_revmap(hlwd_irq_host, hwirq) : 0;
}
/*
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index c164419e254d..d55652b5f6fa 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -46,18 +46,15 @@ void opal_handle_events(void)
e = READ_ONCE(last_outstanding_events) & opal_event_irqchip.mask;
again:
while (e) {
- int virq, hwirq;
+ int hwirq;
hwirq = fls64(e) - 1;
e &= ~BIT_ULL(hwirq);
local_irq_disable();
- virq = irq_find_mapping(opal_event_irqchip.domain, hwirq);
- if (virq) {
- irq_enter();
- generic_handle_irq(virq);
- irq_exit();
- }
+ irq_enter();
+ generic_handle_domain_irq(opal_event_irqchip.domain, hwirq);
+ irq_exit();
local_irq_enable();
cond_resched();
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 6b0886668465..0dfaa6ab44cc 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -539,9 +539,10 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
* H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if
* H_CPU_BEHAV_FAVOUR_SECURITY is.
*/
- if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))
+ if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) {
security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
- else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H)
+ pseries_security_flavor = 0;
+ } else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H)
pseries_security_flavor = 1;
else
pseries_security_flavor = 2;
diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c
index 5fa5fa215541..9a98bb212922 100644
--- a/arch/powerpc/sysdev/fsl_mpic_err.c
+++ b/arch/powerpc/sysdev/fsl_mpic_err.c
@@ -99,7 +99,6 @@ static irqreturn_t fsl_error_int_handler(int irq, void *data)
struct mpic *mpic = (struct mpic *) data;
u32 eisr, eimr;
int errint;
- unsigned int cascade_irq;
eisr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EISR);
eimr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EIMR);
@@ -108,13 +107,11 @@ static irqreturn_t fsl_error_int_handler(int irq, void *data)
return IRQ_NONE;
while (eisr) {
+ int ret;
errint = __builtin_clz(eisr);
- cascade_irq = irq_linear_revmap(mpic->irqhost,
- mpic->err_int_vecs[errint]);
- WARN_ON(!cascade_irq);
- if (cascade_irq) {
- generic_handle_irq(cascade_irq);
- } else {
+ ret = generic_handle_domain_irq(mpic->irqhost,
+ mpic->err_int_vecs[errint]);
+ if (WARN_ON(ret)) {
eimr |= 1 << (31 - errint);
mpic_fsl_err_write(mpic->err_regs, eimr);
}
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 808e7118abfc..e6b06c3f8197 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -266,7 +266,6 @@ out_free:
static irqreturn_t fsl_msi_cascade(int irq, void *data)
{
- unsigned int cascade_irq;
struct fsl_msi *msi_data;
int msir_index = -1;
u32 msir_value = 0;
@@ -279,9 +278,6 @@ static irqreturn_t fsl_msi_cascade(int irq, void *data)
msir_index = cascade_data->index;
- if (msir_index >= NR_MSI_REG_MAX)
- cascade_irq = 0;
-
switch (msi_data->feature & FSL_PIC_IP_MASK) {
case FSL_PIC_IP_MPIC:
msir_value = fsl_msi_read(msi_data->msi_regs,
@@ -305,15 +301,15 @@ static irqreturn_t fsl_msi_cascade(int irq, void *data)
}
while (msir_value) {
+ int err;
intr_index = ffs(msir_value) - 1;
- cascade_irq = irq_linear_revmap(msi_data->irqhost,
+ err = generic_handle_domain_irq(msi_data->irqhost,
msi_hwirq(msi_data, msir_index,
intr_index + have_shift));
- if (cascade_irq) {
- generic_handle_irq(cascade_irq);
+ if (!err)
ret = IRQ_HANDLED;
- }
+
have_shift += intr_index + 1;
msir_value = msir_value >> (intr_index + 1);
}
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index dbdbbc2f1dc5..8183ca343675 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -67,6 +67,7 @@ static struct irq_domain *xive_irq_domain;
static struct xive_ipi_desc {
unsigned int irq;
char name[16];
+ atomic_t started;
} *xive_ipis;
/*
@@ -1120,7 +1121,7 @@ static const struct irq_domain_ops xive_ipi_irq_domain_ops = {
.alloc = xive_ipi_irq_domain_alloc,
};
-static int __init xive_request_ipi(void)
+static int __init xive_init_ipis(void)
{
struct fwnode_handle *fwnode;
struct irq_domain *ipi_domain;
@@ -1144,10 +1145,6 @@ static int __init xive_request_ipi(void)
struct xive_ipi_desc *xid = &xive_ipis[node];
struct xive_ipi_alloc_info info = { node };
- /* Skip nodes without CPUs */
- if (cpumask_empty(cpumask_of_node(node)))
- continue;
-
/*
* Map one IPI interrupt per node for all cpus of that node.
* Since the HW interrupt number doesn't have any meaning,
@@ -1159,11 +1156,6 @@ static int __init xive_request_ipi(void)
xid->irq = ret;
snprintf(xid->name, sizeof(xid->name), "IPI-%d", node);
-
- ret = request_irq(xid->irq, xive_muxed_ipi_action,
- IRQF_PERCPU | IRQF_NO_THREAD, xid->name, NULL);
-
- WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
}
return ret;
@@ -1178,6 +1170,22 @@ out:
return ret;
}
+static int xive_request_ipi(unsigned int cpu)
+{
+ struct xive_ipi_desc *xid = &xive_ipis[early_cpu_to_node(cpu)];
+ int ret;
+
+ if (atomic_inc_return(&xid->started) > 1)
+ return 0;
+
+ ret = request_irq(xid->irq, xive_muxed_ipi_action,
+ IRQF_PERCPU | IRQF_NO_THREAD,
+ xid->name, NULL);
+
+ WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
+ return ret;
+}
+
static int xive_setup_cpu_ipi(unsigned int cpu)
{
unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
@@ -1192,6 +1200,9 @@ static int xive_setup_cpu_ipi(unsigned int cpu)
if (xc->hw_ipi != XIVE_BAD_IRQ)
return 0;
+ /* Register the IPI */
+ xive_request_ipi(cpu);
+
/* Grab an IPI from the backend, this will populate xc->hw_ipi */
if (xive_ops->get_ipi(cpu, xc))
return -EIO;
@@ -1231,6 +1242,8 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc)
if (xc->hw_ipi == XIVE_BAD_IRQ)
return;
+ /* TODO: clear IPI mapping */
+
/* Mask the IPI */
xive_do_source_set_mask(&xc->ipi_data, true);
@@ -1253,7 +1266,7 @@ void __init xive_smp_probe(void)
smp_ops->cause_ipi = xive_cause_ipi;
/* Register the IPI */
- xive_request_ipi();
+ xive_init_ipis();
/* Allocate and setup IPI for the boot CPU */
xive_setup_cpu_ipi(smp_processor_id());
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
index ec79944065c9..baea7d204639 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
@@ -14,6 +14,10 @@
model = "Microchip PolarFire-SoC Icicle Kit";
compatible = "microchip,mpfs-icicle-kit";
+ aliases {
+ ethernet0 = &emac1;
+ };
+
chosen {
stdout-path = &serial0;
};
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
index b9819570a7d1..9d2fbbc1f777 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
@@ -317,7 +317,7 @@
reg = <0x0 0x20112000 0x0 0x2000>;
interrupt-parent = <&plic>;
interrupts = <70 71 72 73>;
- mac-address = [00 00 00 00 00 00];
+ local-mac-address = [00 00 00 00 00 00];
clocks = <&clkcfg 5>, <&clkcfg 2>;
status = "disabled";
clock-names = "pclk", "hclk";
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 1f2be234b11c..bc68231a8fb7 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -132,7 +132,6 @@ CONFIG_DEBUG_PLIST=y
CONFIG_DEBUG_SG=y
# CONFIG_RCU_TRACE is not set
CONFIG_RCU_EQS_DEBUG=y
-CONFIG_DEBUG_BLOCK_EXT_DEVT=y
# CONFIG_FTRACE is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_MEMTEST=y
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
index 8dd02b842fef..434ef5b64599 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -127,7 +127,6 @@ CONFIG_DEBUG_PLIST=y
CONFIG_DEBUG_SG=y
# CONFIG_RCU_TRACE is not set
CONFIG_RCU_EQS_DEBUG=y
-CONFIG_DEBUG_BLOCK_EXT_DEVT=y
# CONFIG_FTRACE is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_MEMTEST=y
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index d3081e4d9600..3397ddac1a30 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -11,7 +11,7 @@ endif
CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,)
ifdef CONFIG_KEXEC
-AFLAGS_kexec_relocate.o := -mcmodel=medany -mno-relax
+AFLAGS_kexec_relocate.o := -mcmodel=medany $(call cc-option,-mno-relax)
endif
extra-y += head.o
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 1a85305720e8..9c0511119bad 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -10,6 +10,7 @@
#include <asm/ptrace.h>
#include <asm/syscall.h>
#include <asm/thread_info.h>
+#include <asm/switch_to.h>
#include <linux/audit.h>
#include <linux/ptrace.h>
#include <linux/elf.h>
@@ -56,6 +57,9 @@ static int riscv_fpr_get(struct task_struct *target,
{
struct __riscv_d_ext_state *fstate = &target->thread.fstate;
+ if (target == current)
+ fstate_save(current, task_pt_regs(current));
+
membuf_write(&to, fstate, offsetof(struct __riscv_d_ext_state, fcsr));
membuf_store(&to, fstate->fcsr);
return membuf_zero(&to, 4); // explicitly pad
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 18bd0e4bc36c..120b2f6f71bc 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -229,8 +229,8 @@ static void __init init_resources(void)
}
/* Clean-up any unused pre-allocated resources */
- mem_res_sz = (num_resources - res_idx + 1) * sizeof(*mem_res);
- memblock_free(__pa(mem_res), mem_res_sz);
+ if (res_idx >= 0)
+ memblock_free(__pa(mem_res), (res_idx + 1) * sizeof(*mem_res));
return;
error:
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 88134cc288d9..7cb4f391d106 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -197,7 +197,7 @@ static void __init setup_bootmem(void)
* if end of dram is equal to maximum addressable memory. For 64-bit
* kernel, this problem can't happen here as the end of the virtual
* address space is occupied by the kernel mapping then this check must
- * be done in create_kernel_page_table.
+ * be done as soon as the kernel mapping base address is determined.
*/
max_mapped_addr = __pa(~(ulong)0);
if (max_mapped_addr == (phys_ram_end - 1))
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a0e2130f0100..92c0a1b4c528 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -138,6 +138,8 @@ config S390
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN
select HAVE_ARCH_KASAN_VMALLOC
+ select HAVE_ARCH_KCSAN
+ select HAVE_ARCH_KFENCE
select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_SOFT_DIRTY
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 1e3172877982..17dc4f1ac4fa 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -142,7 +142,8 @@ all: bzImage
KBUILD_IMAGE := $(boot)/bzImage
install:
- $(Q)$(MAKE) $(build)=$(boot) $@
+ sh -x $(srctree)/$(boot)/install.sh $(KERNELRELEASE) $(KBUILD_IMAGE) \
+ System.map "$(INSTALL_PATH)"
bzImage: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 41a64b8dce25..0ba646899131 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -7,6 +7,7 @@ KCOV_INSTRUMENT := n
GCOV_PROFILE := n
UBSAN_SANITIZE := n
KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
@@ -36,7 +37,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
-obj-y += version.o pgm_check_info.o ctype.o text_dma.o
+obj-y += version.o pgm_check_info.o ctype.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
@@ -69,7 +70,3 @@ $(obj)/compressed/vmlinux: $(obj)/startup.a FORCE
$(obj)/startup.a: $(OBJECTS) FORCE
$(call if_changed,ar)
-
-install:
- sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
- System.map "$(INSTALL_PATH)"
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index ae04e1c93764..641ce0fc5c3e 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -2,14 +2,9 @@
#ifndef BOOT_BOOT_H
#define BOOT_BOOT_H
+#include <asm/extable.h>
#include <linux/types.h>
-#define BOOT_STACK_OFFSET 0x8000
-
-#ifndef __ASSEMBLY__
-
-#include <linux/compiler.h>
-
void startup_kernel(void);
unsigned long detect_memory(void);
bool is_ipl_block_dump(void);
@@ -18,17 +13,22 @@ void setup_boot_command_line(void);
void parse_boot_command_line(void);
void verify_facilities(void);
void print_missing_facilities(void);
+void sclp_early_setup_buffer(void);
void print_pgm_check_info(void);
unsigned long get_random_base(unsigned long safe_addr);
void __printf(1, 2) decompressor_printk(const char *fmt, ...);
+/* Symbols defined by linker scripts */
extern const char kernel_version[];
extern unsigned long memory_limit;
extern unsigned long vmalloc_size;
extern int vmalloc_size_set;
extern int kaslr_enabled;
+extern char __boot_data_start[], __boot_data_end[];
+extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
+extern char _decompressor_syms_start[], _decompressor_syms_end[];
+extern char _stack_start[], _stack_end[];
unsigned long read_ipl_report(unsigned long safe_offset);
-#endif /* __ASSEMBLY__ */
#endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index e30d3fdbbc78..3b860061e84d 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -9,6 +9,7 @@ KCOV_INSTRUMENT := n
GCOV_PROFILE := n
UBSAN_SANITIZE := n
KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/compressed/decompressor.c
index 37a4a8d33c6c..e27c2140d620 100644
--- a/arch/s390/boot/compressed/decompressor.c
+++ b/arch/s390/boot/compressed/decompressor.c
@@ -23,11 +23,6 @@
#define memmove memmove
#define memzero(s, n) memset((s), 0, (n))
-/* Symbols defined by linker scripts */
-extern char _end[];
-extern unsigned char _compressed_start[];
-extern unsigned char _compressed_end[];
-
#ifdef CONFIG_KERNEL_BZIP2
#define BOOT_HEAP_SIZE 0x400000
#elif CONFIG_KERNEL_ZSTD
diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h
index 41f0ad97a4db..a59f75c5b049 100644
--- a/arch/s390/boot/compressed/decompressor.h
+++ b/arch/s390/boot/compressed/decompressor.h
@@ -26,7 +26,12 @@ struct vmlinux_info {
unsigned long rela_dyn_end;
};
+/* Symbols defined by linker scripts */
+extern char _end[];
+extern unsigned char _compressed_start[];
+extern unsigned char _compressed_end[];
extern char _vmlinux_info[];
+
#define vmlinux (*(struct vmlinux_info *)_vmlinux_info)
#endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
index 27a09c1c78f6..918e05137d4c 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -1,6 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <asm-generic/vmlinux.lds.h>
#include <asm/vmlinux.lds.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/sclp.h>
OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
OUTPUT_ARCH(s390:64-bit)
@@ -34,27 +37,6 @@ SECTIONS
*(.data.*)
_edata = . ;
}
- /*
- * .dma section for code, data, ex_table that need to stay below 2 GB,
- * even when the kernel is relocate: above 2 GB.
- */
- . = ALIGN(PAGE_SIZE);
- _sdma = .;
- .dma.text : {
- _stext_dma = .;
- *(.dma.text)
- . = ALIGN(PAGE_SIZE);
- _etext_dma = .;
- }
- . = ALIGN(16);
- .dma.ex_table : {
- _start_dma_ex_table = .;
- KEEP(*(.dma.ex_table))
- _stop_dma_ex_table = .;
- }
- .dma.data : { *(.dma.data) }
- . = ALIGN(PAGE_SIZE);
- _edma = .;
BOOT_DATA
BOOT_DATA_PRESERVED
@@ -69,6 +51,17 @@ SECTIONS
*(.bss)
*(.bss.*)
*(COMMON)
+ /*
+ * Stacks for the decompressor
+ */
+ . = ALIGN(PAGE_SIZE);
+ _dump_info_stack_start = .;
+ . += PAGE_SIZE;
+ _dump_info_stack_end = .;
+ . = ALIGN(PAGE_SIZE);
+ _stack_start = .;
+ . += BOOT_STACK_SIZE;
+ _stack_end = .;
_ebss = .;
}
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index 51693cfb65c2..40f4cff538b8 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -25,13 +25,15 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
#include <asm/page.h>
#include <asm/ptrace.h>
-#include "boot.h"
+#include <asm/sclp.h>
#define ARCH_OFFSET 4
+#define EP_OFFSET 0x10008
+#define EP_STRING "S390EP"
+
__HEAD
#define IPL_BS 0x730
@@ -275,11 +277,11 @@ iplstart:
.Lcpuid:.fill 8,1,0
#
-# startup-code at 0x10000, running in absolute addressing mode
+# normal startup-code, running in absolute addressing mode
# this is called either by the ipl loader or directly by PSW restart
# or linload or SALIPL
#
- .org 0x10000
+ .org STARTUP_NORMAL_OFFSET
SYM_CODE_START(startup)
j startup_normal
.org EP_OFFSET
@@ -292,9 +294,9 @@ SYM_CODE_START(startup)
.ascii EP_STRING
.byte 0x00,0x01
#
-# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode
+# kdump startup-code, running in 64 bit absolute addressing mode
#
- .org 0x10010
+ .org STARTUP_KDUMP_OFFSET
j startup_kdump
SYM_CODE_END(startup)
SYM_CODE_START_LOCAL(startup_normal)
@@ -315,18 +317,16 @@ SYM_CODE_START_LOCAL(startup_normal)
xc 0x300(256),0x300
xc 0xe00(256),0xe00
xc 0xf00(256),0xf00
- lctlg %c0,%c15,.Lctl-.LPG0(%r13) # load control registers
stcke __LC_BOOT_CLOCK
mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
spt 6f-.LPG0(%r13)
mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
- l %r15,.Lstack-.LPG0(%r13)
+ larl %r15,_stack_end-STACK_FRAME_OVERHEAD
+ brasl %r14,sclp_early_setup_buffer
brasl %r14,verify_facilities
brasl %r14,startup_kernel
SYM_CODE_END(startup_normal)
-.Lstack:
- .long BOOT_STACK_OFFSET + BOOT_STACK_SIZE - STACK_FRAME_OVERHEAD
.align 8
6: .long 0x7fffffff,0xffffffff
.Lext_new_psw:
@@ -335,35 +335,6 @@ SYM_CODE_END(startup_normal)
.quad 0x0000000180000000,startup_pgm_check_handler
.Lio_new_psw:
.quad 0x0002000180000000,0x1f0 # disabled wait
-.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space
- .quad 0 # cr1: primary space segment table
- .quad .Lduct # cr2: dispatchable unit control table
- .quad 0 # cr3: instruction authorization
- .quad 0xffff # cr4: instruction authorization
- .quad .Lduct # cr5: primary-aste origin
- .quad 0 # cr6: I/O interrupts
- .quad 0 # cr7: secondary space segment table
- .quad 0x0000000000008000 # cr8: access registers translation
- .quad 0 # cr9: tracing off
- .quad 0 # cr10: tracing off
- .quad 0 # cr11: tracing off
- .quad 0 # cr12: tracing off
- .quad 0 # cr13: home space segment table
- .quad 0xc0000000 # cr14: machine check handling off
- .quad .Llinkage_stack # cr15: linkage stack operations
-
- .section .dma.data,"aw",@progbits
-.Lduct: .long 0,.Laste,.Laste,0,.Lduald,0,0,0
- .long 0,0,0,0,0,0,0,0
-.Llinkage_stack:
- .long 0,0,0x89000000,0,0,0,0x8a000000,0
- .align 64
-.Laste: .quad 0,0xffffffffffffffff,0,0,0,0,0,0
- .align 128
-.Lduald:.rept 8
- .long 0x80000000,0,0,0 # invalid access-list entries
- .endr
- .previous
#include "head_kdump.S"
@@ -386,15 +357,13 @@ SYM_CODE_START_LOCAL(startup_pgm_check_handler)
oi __LC_RETURN_PSW+1,0x2 # set wait state bit
larl %r9,.Lold_psw_disabled_wait
stg %r9,__LC_PGM_NEW_PSW+8
- l %r15,.Ldump_info_stack-.Lold_psw_disabled_wait(%r9)
+ larl %r15,_dump_info_stack_end-STACK_FRAME_OVERHEAD
brasl %r14,print_pgm_check_info
.Lold_psw_disabled_wait:
la %r8,4095
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8)
lpswe __LC_RETURN_PSW # disabled wait
SYM_CODE_END(startup_pgm_check_handler)
-.Ldump_info_stack:
- .long 0x5000 + PAGE_SIZE - STACK_FRAME_OVERHEAD
#
# params at 10400 (setup.h)
@@ -415,7 +384,4 @@ SYM_DATA_START(parmarea)
.org PARMAREA+__PARMAREA_SIZE
SYM_DATA_END(parmarea)
- .org EARLY_SCCB_OFFSET
- .fill 4096
-
.org HEAD_END
diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
index 0b4965573656..9b14045065b6 100644
--- a/arch/s390/boot/ipl_report.c
+++ b/arch/s390/boot/ipl_report.c
@@ -54,9 +54,9 @@ static unsigned long find_bootdata_space(struct ipl_rb_components *comps,
* not overlap with any component or any certificate.
*/
repeat:
- if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
- intersects(INITRD_START, INITRD_SIZE, safe_addr, size))
- safe_addr = INITRD_START + INITRD_SIZE;
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size &&
+ intersects(initrd_data.start, initrd_data.size, safe_addr, size))
+ safe_addr = initrd_data.start + initrd_data.size;
for_each_rb_entry(comp, comps)
if (intersects(safe_addr, size, comp->addr, comp->len)) {
safe_addr = comp->addr + comp->len;
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index 0dd48fbdbaa4..d8984462071f 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -186,9 +186,9 @@ unsigned long get_random_base(unsigned long safe_addr)
*/
memory_limit -= kasan_estimate_memory_needs(memory_limit);
- if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE) {
- if (safe_addr < INITRD_START + INITRD_SIZE)
- safe_addr = INITRD_START + INITRD_SIZE;
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size) {
+ if (safe_addr < initrd_data.start + initrd_data.size)
+ safe_addr = initrd_data.start + initrd_data.size;
}
safe_addr = ALIGN(safe_addr, THREAD_SIZE);
diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c
index 4e17adbde495..2f949cd9076b 100644
--- a/arch/s390/boot/mem_detect.c
+++ b/arch/s390/boot/mem_detect.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/errno.h>
#include <linux/init.h>
+#include <asm/setup.h>
+#include <asm/processor.h>
#include <asm/sclp.h>
#include <asm/sections.h>
#include <asm/mem_detect.h>
@@ -24,9 +26,9 @@ static void *mem_detect_alloc_extended(void)
{
unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64));
- if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
- INITRD_START < offset + ENTRIES_EXTENDED_MAX)
- offset = ALIGN(INITRD_START + INITRD_SIZE, sizeof(u64));
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size &&
+ initrd_data.start < offset + ENTRIES_EXTENDED_MAX)
+ offset = ALIGN(initrd_data.start + initrd_data.size, sizeof(u64));
return (void *)offset;
}
diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c
index 3a46abed2549..209f6ae5a197 100644
--- a/arch/s390/boot/pgm_check_info.c
+++ b/arch/s390/boot/pgm_check_info.c
@@ -29,7 +29,6 @@ static char *symstart(char *p)
return p + 1;
}
-extern char _decompressor_syms_start[], _decompressor_syms_end[];
static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned short *len)
{
/* symbol entries are in a form "10000 c4 startup\0" */
@@ -126,8 +125,8 @@ out:
static noinline void print_stacktrace(void)
{
- struct stack_info boot_stack = { STACK_TYPE_TASK, BOOT_STACK_OFFSET,
- BOOT_STACK_OFFSET + BOOT_STACK_SIZE };
+ struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start,
+ (unsigned long)_stack_end };
unsigned long sp = S390_lowcore.gpregs_save_area[15];
bool first = true;
diff --git a/arch/s390/boot/sclp_early_core.c b/arch/s390/boot/sclp_early_core.c
index 5a19fd7020b5..6f30646afbd0 100644
--- a/arch/s390/boot/sclp_early_core.c
+++ b/arch/s390/boot/sclp_early_core.c
@@ -1,2 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
+#include "boot.h"
#include "../../../drivers/s390/char/sclp_early_core.c"
+
+/* SCLP early buffer must stay page-aligned and below 2GB */
+static char __sclp_early_sccb[EXT_SCCB_READ_SCP] __aligned(PAGE_SIZE);
+
+void sclp_early_setup_buffer(void)
+{
+ sclp_early_set_buffer(&__sclp_early_sccb);
+}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index d0cf21641e3a..6dc8d0a53864 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -12,9 +12,8 @@
#include <asm/uv.h>
#include "compressed/decompressor.h"
#include "boot.h"
+#include "uv.h"
-extern char __boot_data_start[], __boot_data_end[];
-extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
unsigned long __bootdata_preserved(__kaslr_offset);
unsigned long __bootdata_preserved(VMALLOC_START);
unsigned long __bootdata_preserved(VMALLOC_END);
@@ -24,44 +23,11 @@ unsigned long __bootdata_preserved(MODULES_VADDR);
unsigned long __bootdata_preserved(MODULES_END);
unsigned long __bootdata(ident_map_size);
int __bootdata(is_full_image) = 1;
+struct initrd_data __bootdata(initrd_data);
u64 __bootdata_preserved(stfle_fac_list[16]);
u64 __bootdata_preserved(alt_stfle_fac_list[16]);
-
-/*
- * Some code and data needs to stay below 2 GB, even when the kernel would be
- * relocated above 2 GB, because it has to use 31 bit addresses.
- * Such code and data is part of the .dma section, and its location is passed
- * over to the decompressed / relocated kernel via the .boot.preserved.data
- * section.
- */
-extern char _sdma[], _edma[];
-extern char _stext_dma[], _etext_dma[];
-extern struct exception_table_entry _start_dma_ex_table[];
-extern struct exception_table_entry _stop_dma_ex_table[];
-unsigned long __bootdata_preserved(__sdma) = __pa(&_sdma);
-unsigned long __bootdata_preserved(__edma) = __pa(&_edma);
-unsigned long __bootdata_preserved(__stext_dma) = __pa(&_stext_dma);
-unsigned long __bootdata_preserved(__etext_dma) = __pa(&_etext_dma);
-struct exception_table_entry *
- __bootdata_preserved(__start_dma_ex_table) = _start_dma_ex_table;
-struct exception_table_entry *
- __bootdata_preserved(__stop_dma_ex_table) = _stop_dma_ex_table;
-
-int _diag210_dma(struct diag210 *addr);
-int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode);
-int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode);
-void _diag0c_dma(struct hypfs_diag0c_entry *entry);
-void _diag308_reset_dma(void);
-struct diag_ops __bootdata_preserved(diag_dma_ops) = {
- .diag210 = _diag210_dma,
- .diag26c = _diag26c_dma,
- .diag14 = _diag14_dma,
- .diag0c = _diag0c_dma,
- .diag308_reset = _diag308_reset_dma
-};
-static struct diag210 _diag210_tmp_dma __section(".dma.data");
-struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma;
+struct oldmem_data __bootdata_preserved(oldmem_data);
void error(char *x)
{
@@ -91,12 +57,12 @@ static void rescue_initrd(unsigned long addr)
{
if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
return;
- if (!INITRD_START || !INITRD_SIZE)
+ if (!initrd_data.start || !initrd_data.size)
return;
- if (addr <= INITRD_START)
+ if (addr <= initrd_data.start)
return;
- memmove((void *)addr, (void *)INITRD_START, INITRD_SIZE);
- INITRD_START = addr;
+ memmove((void *)addr, (void *)initrd_data.start, initrd_data.size);
+ initrd_data.start = addr;
}
static void copy_bootdata(void)
@@ -169,9 +135,9 @@ static void setup_ident_map_size(unsigned long max_physmem_end)
ident_map_size = min(ident_map_size, 1UL << MAX_PHYSMEM_BITS);
#ifdef CONFIG_CRASH_DUMP
- if (OLDMEM_BASE) {
+ if (oldmem_data.start) {
kaslr_enabled = 0;
- ident_map_size = min(ident_map_size, OLDMEM_SIZE);
+ ident_map_size = min(ident_map_size, oldmem_data.size);
} else if (ipl_block_valid && is_ipl_block_dump()) {
kaslr_enabled = 0;
if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size)
@@ -282,12 +248,28 @@ static void setup_vmalloc_size(void)
vmalloc_size = max(size, vmalloc_size);
}
+static void offset_vmlinux_info(unsigned long offset)
+{
+ vmlinux.default_lma += offset;
+ *(unsigned long *)(&vmlinux.entry) += offset;
+ vmlinux.bootdata_off += offset;
+ vmlinux.bootdata_preserved_off += offset;
+ vmlinux.rela_dyn_start += offset;
+ vmlinux.rela_dyn_end += offset;
+ vmlinux.dynsym_start += offset;
+}
+
void startup_kernel(void)
{
unsigned long random_lma;
unsigned long safe_addr;
void *img;
+ initrd_data.start = parmarea.initrd_start;
+ initrd_data.size = parmarea.initrd_size;
+ oldmem_data.start = parmarea.oldmem_base;
+ oldmem_data.size = parmarea.oldmem_size;
+
setup_lpp();
store_ipl_parmblock();
safe_addr = mem_safe_offset();
@@ -297,23 +279,17 @@ void startup_kernel(void)
sclp_early_read_info();
setup_boot_command_line();
parse_boot_command_line();
+ sanitize_prot_virt_host();
setup_ident_map_size(detect_memory());
setup_vmalloc_size();
setup_kernel_memory_layout();
- random_lma = __kaslr_offset = 0;
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) {
random_lma = get_random_base(safe_addr);
if (random_lma) {
__kaslr_offset = random_lma - vmlinux.default_lma;
img = (void *)vmlinux.default_lma;
- vmlinux.default_lma += __kaslr_offset;
- vmlinux.entry += __kaslr_offset;
- vmlinux.bootdata_off += __kaslr_offset;
- vmlinux.bootdata_preserved_off += __kaslr_offset;
- vmlinux.rela_dyn_start += __kaslr_offset;
- vmlinux.rela_dyn_end += __kaslr_offset;
- vmlinux.dynsym_start += __kaslr_offset;
+ offset_vmlinux_info(__kaslr_offset);
}
}
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index f6b0c4f43c99..e6be155ab2e5 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -1,8 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include <asm/uv.h>
+#include <asm/boot_data.h>
#include <asm/facility.h>
#include <asm/sections.h>
+#include "boot.h"
+#include "uv.h"
+
/* will be used in arch/s390/kernel/uv.c */
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
@@ -47,26 +51,34 @@ void uv_query_info(void)
}
#if IS_ENABLED(CONFIG_KVM)
-static bool has_uv_sec_stor_limit(void)
+void adjust_to_uv_max(unsigned long *vmax)
{
- /*
- * keep these conditions in line with setup_uv()
- */
- if (!is_prot_virt_host())
- return false;
+ if (is_prot_virt_host() && uv_info.max_sec_stor_addr)
+ *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr);
+}
+static int is_prot_virt_host_capable(void)
+{
+ /* disable if no prot_virt=1 given on command-line */
+ if (!is_prot_virt_host())
+ return 0;
+ /* disable if protected guest virtualization is enabled */
if (is_prot_virt_guest())
- return false;
-
+ return 0;
+ /* disable if no hardware support */
if (!test_facility(158))
- return false;
-
- return !!uv_info.max_sec_stor_addr;
+ return 0;
+ /* disable if kdump */
+ if (oldmem_data.start)
+ return 0;
+ /* disable if stand-alone dump */
+ if (ipl_block_valid && is_ipl_block_dump())
+ return 0;
+ return 1;
}
-void adjust_to_uv_max(unsigned long *vmax)
+void sanitize_prot_virt_host(void)
{
- if (has_uv_sec_stor_limit())
- *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr);
+ prot_virt_host = is_prot_virt_host_capable();
}
#endif
diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h
new file mode 100644
index 000000000000..690ce019af5a
--- /dev/null
+++ b/arch/s390/boot/uv.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_UV_H
+#define BOOT_UV_H
+
+#if IS_ENABLED(CONFIG_KVM)
+void adjust_to_uv_max(unsigned long *vmax);
+void sanitize_prot_virt_host(void);
+#else
+static inline void adjust_to_uv_max(unsigned long *vmax) {}
+static inline void sanitize_prot_virt_host(void) {}
+#endif
+
+#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
+void uv_query_info(void);
+#else
+static inline void uv_query_info(void) {}
+#endif
+
+#endif /* BOOT_UV_H */
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index b88184019af9..11ffc7c37ada 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -10,7 +10,6 @@ CONFIG_BPF_JIT=y
CONFIG_BPF_JIT_ALWAYS_ON=y
CONFIG_BPF_LSM=y
CONFIG_PREEMPT=y
-CONFIG_SCHED_CORE=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_TASKSTATS=y
@@ -75,7 +74,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_MODULE_SIG_SHA256=y
-CONFIG_BLK_DEV_INTEGRITY=y
CONFIG_BLK_DEV_THROTTLING=y
CONFIG_BLK_WBT=y
CONFIG_BLK_CGROUP_IOLATENCY=y
@@ -466,6 +464,7 @@ CONFIG_DM_FLAKEY=m
CONFIG_DM_VERITY=m
CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
CONFIG_DM_SWITCH=m
+CONFIG_DM_INTEGRITY=m
CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 1667a3cdcf0a..e1642d2cba59 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -8,7 +8,6 @@ CONFIG_BPF_SYSCALL=y
CONFIG_BPF_JIT=y
CONFIG_BPF_JIT_ALWAYS_ON=y
CONFIG_BPF_LSM=y
-CONFIG_SCHED_CORE=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_TASKSTATS=y
diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c
index 6c43d2ba2079..9a2786079e3a 100644
--- a/arch/s390/hypfs/hypfs_diag0c.c
+++ b/arch/s390/hypfs/hypfs_diag0c.c
@@ -21,7 +21,7 @@
static void diag0c_fn(void *data)
{
diag_stat_inc(DIAG_STAT_X00C);
- diag_dma_ops.diag0c(((void **) data)[smp_processor_id()]);
+ diag_amode31_ops.diag0c(((void **)data)[smp_processor_id()]);
}
/*
@@ -33,12 +33,12 @@ static void *diag0c_store(unsigned int *count)
unsigned int cpu_count, cpu, i;
void **cpu_vec;
- get_online_cpus();
+ cpus_read_lock();
cpu_count = num_online_cpus();
cpu_vec = kmalloc_array(num_possible_cpus(), sizeof(*cpu_vec),
GFP_KERNEL);
if (!cpu_vec)
- goto fail_put_online_cpus;
+ goto fail_unlock_cpus;
/* Note: Diag 0c needs 8 byte alignment and real storage */
diag0c_data = kzalloc(struct_size(diag0c_data, entry, cpu_count),
GFP_KERNEL | GFP_DMA);
@@ -54,13 +54,13 @@ static void *diag0c_store(unsigned int *count)
on_each_cpu(diag0c_fn, cpu_vec, 1);
*count = cpu_count;
kfree(cpu_vec);
- put_online_cpus();
+ cpus_read_unlock();
return diag0c_data;
fail_kfree_cpu_vec:
kfree(cpu_vec);
-fail_put_online_cpus:
- put_online_cpus();
+fail_unlock_cpus:
+ cpus_read_unlock();
return ERR_PTR(-ENOMEM);
}
diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
index 20f169b6db4e..36dbf5043fc0 100644
--- a/arch/s390/include/asm/ccwgroup.h
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -53,8 +53,6 @@ extern int ccwgroup_driver_register (struct ccwgroup_driver *cdriver);
extern void ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver);
int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv,
int num_devices, const char *buf);
-struct ccwgroup_device *get_ccwgroupdev_by_busid(struct ccwgroup_driver *gdrv,
- char *bus_id);
extern int ccwgroup_set_online(struct ccwgroup_device *gdev);
extern int ccwgroup_set_offline(struct ccwgroup_device *gdev);
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index f58c92f28701..1effac6a0152 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -5,7 +5,6 @@
#ifndef _ASM_S390_CIO_H_
#define _ASM_S390_CIO_H_
-#include <linux/spinlock.h>
#include <linux/bitops.h>
#include <linux/genalloc.h>
#include <asm/types.h>
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index ea5b9c34b7be..8d49505b4a43 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -9,6 +9,9 @@
#include <linux/sched/task_stack.h>
#include <linux/thread_info.h>
+#define compat_mode_t compat_mode_t
+typedef u16 compat_mode_t;
+
#include <asm-generic/compat.h>
#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p( \
@@ -55,13 +58,9 @@
typedef u16 __compat_uid_t;
typedef u16 __compat_gid_t;
-typedef u32 __compat_uid32_t;
-typedef u32 __compat_gid32_t;
-typedef u16 compat_mode_t;
typedef u16 compat_dev_t;
typedef u16 compat_nlink_t;
typedef u16 compat_ipc_pid_t;
-typedef u32 compat_caddr_t;
typedef __kernel_fsid_t compat_fsid_t;
typedef struct {
@@ -155,13 +154,6 @@ struct compat_statfs64 {
#define COMPAT_RLIM_INFINITY 0xffffffff
-typedef u32 compat_old_sigset_t; /* at least 32 bits */
-
-#define _COMPAT_NSIG 64
-#define _COMPAT_NSIG_BPW 32
-
-typedef u32 compat_sigset_word;
-
#define COMPAT_OFF_T_MAX 0x7fffffff
/*
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index c0f3bfeddcbe..646b12981f20 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -173,17 +173,16 @@ typedef struct { unsigned char bytes[16]; } cpacf_mask_t;
*/
static __always_inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
{
- register unsigned long r0 asm("0") = 0; /* query function */
- register unsigned long r1 asm("1") = (unsigned long) mask;
-
asm volatile(
- " spm 0\n" /* pckmo doesn't change the cc */
+ " lghi 0,0\n" /* query function */
+ " lgr 1,%[mask]\n"
+ " spm 0\n" /* pckmo doesn't change the cc */
/* Parameter regs are ignored, but must be nonzero and unique */
"0: .insn rrf,%[opc] << 16,2,4,6,0\n"
" brc 1,0b\n" /* handle partial completion */
: "=m" (*mask)
- : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (opcode)
- : "cc");
+ : [mask] "d" ((unsigned long)mask), [opc] "i" (opcode)
+ : "cc", "0", "1");
}
static __always_inline int __cpacf_check_opcode(unsigned int opcode)
@@ -249,20 +248,22 @@ static __always_inline int cpacf_query_func(unsigned int opcode, unsigned int fu
static inline int cpacf_km(unsigned long func, void *param,
u8 *dest, const u8 *src, long src_len)
{
- register unsigned long r0 asm("0") = (unsigned long) func;
- register unsigned long r1 asm("1") = (unsigned long) param;
- register unsigned long r2 asm("2") = (unsigned long) src;
- register unsigned long r3 asm("3") = (unsigned long) src_len;
- register unsigned long r4 asm("4") = (unsigned long) dest;
+ union register_pair d, s;
+ d.even = (unsigned long)dest;
+ s.even = (unsigned long)src;
+ s.odd = (unsigned long)src_len;
asm volatile(
+ " lgr 0,%[fc]\n"
+ " lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,%[dst],%[src]\n"
" brc 1,0b\n" /* handle partial completion */
- : [src] "+a" (r2), [len] "+d" (r3), [dst] "+a" (r4)
- : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KM)
- : "cc", "memory");
+ : [src] "+&d" (s.pair), [dst] "+&d" (d.pair)
+ : [fc] "d" (func), [pba] "d" ((unsigned long)param),
+ [opc] "i" (CPACF_KM)
+ : "cc", "memory", "0", "1");
- return src_len - r3;
+ return src_len - s.odd;
}
/**
@@ -279,20 +280,22 @@ static inline int cpacf_km(unsigned long func, void *param,
static inline int cpacf_kmc(unsigned long func, void *param,
u8 *dest, const u8 *src, long src_len)
{
- register unsigned long r0 asm("0") = (unsigned long) func;
- register unsigned long r1 asm("1") = (unsigned long) param;
- register unsigned long r2 asm("2") = (unsigned long) src;
- register unsigned long r3 asm("3") = (unsigned long) src_len;
- register unsigned long r4 asm("4") = (unsigned long) dest;
+ union register_pair d, s;
+ d.even = (unsigned long)dest;
+ s.even = (unsigned long)src;
+ s.odd = (unsigned long)src_len;
asm volatile(
+ " lgr 0,%[fc]\n"
+ " lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,%[dst],%[src]\n"
" brc 1,0b\n" /* handle partial completion */
- : [src] "+a" (r2), [len] "+d" (r3), [dst] "+a" (r4)
- : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMC)
- : "cc", "memory");
+ : [src] "+&d" (s.pair), [dst] "+&d" (d.pair)
+ : [fc] "d" (func), [pba] "d" ((unsigned long)param),
+ [opc] "i" (CPACF_KMC)
+ : "cc", "memory", "0", "1");
- return src_len - r3;
+ return src_len - s.odd;
}
/**
@@ -306,17 +309,19 @@ static inline int cpacf_kmc(unsigned long func, void *param,
static inline void cpacf_kimd(unsigned long func, void *param,
const u8 *src, long src_len)
{
- register unsigned long r0 asm("0") = (unsigned long) func;
- register unsigned long r1 asm("1") = (unsigned long) param;
- register unsigned long r2 asm("2") = (unsigned long) src;
- register unsigned long r3 asm("3") = (unsigned long) src_len;
+ union register_pair s;
+ s.even = (unsigned long)src;
+ s.odd = (unsigned long)src_len;
asm volatile(
+ " lgr 0,%[fc]\n"
+ " lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,0,%[src]\n"
" brc 1,0b\n" /* handle partial completion */
- : [src] "+a" (r2), [len] "+d" (r3)
- : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KIMD)
- : "cc", "memory");
+ : [src] "+&d" (s.pair)
+ : [fc] "d" (func), [pba] "d" ((unsigned long)(param)),
+ [opc] "i" (CPACF_KIMD)
+ : "cc", "memory", "0", "1");
}
/**
@@ -329,17 +334,19 @@ static inline void cpacf_kimd(unsigned long func, void *param,
static inline void cpacf_klmd(unsigned long func, void *param,
const u8 *src, long src_len)
{
- register unsigned long r0 asm("0") = (unsigned long) func;
- register unsigned long r1 asm("1") = (unsigned long) param;
- register unsigned long r2 asm("2") = (unsigned long) src;
- register unsigned long r3 asm("3") = (unsigned long) src_len;
+ union register_pair s;
+ s.even = (unsigned long)src;
+ s.odd = (unsigned long)src_len;
asm volatile(
+ " lgr 0,%[fc]\n"
+ " lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,0,%[src]\n"
" brc 1,0b\n" /* handle partial completion */
- : [src] "+a" (r2), [len] "+d" (r3)
- : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KLMD)
- : "cc", "memory");
+ : [src] "+&d" (s.pair)
+ : [fc] "d" (func), [pba] "d" ((unsigned long)param),
+ [opc] "i" (CPACF_KLMD)
+ : "cc", "memory", "0", "1");
}
/**
@@ -355,19 +362,21 @@ static inline void cpacf_klmd(unsigned long func, void *param,
static inline int cpacf_kmac(unsigned long func, void *param,
const u8 *src, long src_len)
{
- register unsigned long r0 asm("0") = (unsigned long) func;
- register unsigned long r1 asm("1") = (unsigned long) param;
- register unsigned long r2 asm("2") = (unsigned long) src;
- register unsigned long r3 asm("3") = (unsigned long) src_len;
+ union register_pair s;
+ s.even = (unsigned long)src;
+ s.odd = (unsigned long)src_len;
asm volatile(
+ " lgr 0,%[fc]\n"
+ " lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,0,%[src]\n"
" brc 1,0b\n" /* handle partial completion */
- : [src] "+a" (r2), [len] "+d" (r3)
- : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMAC)
- : "cc", "memory");
+ : [src] "+&d" (s.pair)
+ : [fc] "d" (func), [pba] "d" ((unsigned long)param),
+ [opc] "i" (CPACF_KMAC)
+ : "cc", "memory", "0", "1");
- return src_len - r3;
+ return src_len - s.odd;
}
/**
@@ -385,22 +394,24 @@ static inline int cpacf_kmac(unsigned long func, void *param,
static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest,
const u8 *src, long src_len, u8 *counter)
{
- register unsigned long r0 asm("0") = (unsigned long) func;
- register unsigned long r1 asm("1") = (unsigned long) param;
- register unsigned long r2 asm("2") = (unsigned long) src;
- register unsigned long r3 asm("3") = (unsigned long) src_len;
- register unsigned long r4 asm("4") = (unsigned long) dest;
- register unsigned long r6 asm("6") = (unsigned long) counter;
+ union register_pair d, s, c;
+ d.even = (unsigned long)dest;
+ s.even = (unsigned long)src;
+ s.odd = (unsigned long)src_len;
+ c.even = (unsigned long)counter;
asm volatile(
+ " lgr 0,%[fc]\n"
+ " lgr 1,%[pba]\n"
"0: .insn rrf,%[opc] << 16,%[dst],%[src],%[ctr],0\n"
" brc 1,0b\n" /* handle partial completion */
- : [src] "+a" (r2), [len] "+d" (r3),
- [dst] "+a" (r4), [ctr] "+a" (r6)
- : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMCTR)
- : "cc", "memory");
+ : [src] "+&d" (s.pair), [dst] "+&d" (d.pair),
+ [ctr] "+&d" (c.pair)
+ : [fc] "d" (func), [pba] "d" ((unsigned long)param),
+ [opc] "i" (CPACF_KMCTR)
+ : "cc", "memory", "0", "1");
- return src_len - r3;
+ return src_len - s.odd;
}
/**
@@ -417,20 +428,21 @@ static inline void cpacf_prno(unsigned long func, void *param,
u8 *dest, unsigned long dest_len,
const u8 *seed, unsigned long seed_len)
{
- register unsigned long r0 asm("0") = (unsigned long) func;
- register unsigned long r1 asm("1") = (unsigned long) param;
- register unsigned long r2 asm("2") = (unsigned long) dest;
- register unsigned long r3 asm("3") = (unsigned long) dest_len;
- register unsigned long r4 asm("4") = (unsigned long) seed;
- register unsigned long r5 asm("5") = (unsigned long) seed_len;
+ union register_pair d, s;
+ d.even = (unsigned long)dest;
+ d.odd = (unsigned long)dest_len;
+ s.even = (unsigned long)seed;
+ s.odd = (unsigned long)seed_len;
asm volatile (
+ " lgr 0,%[fc]\n"
+ " lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,%[dst],%[seed]\n"
" brc 1,0b\n" /* handle partial completion */
- : [dst] "+a" (r2), [dlen] "+d" (r3)
- : [fc] "d" (r0), [pba] "a" (r1),
- [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PRNO)
- : "cc", "memory");
+ : [dst] "+&d" (d.pair)
+ : [fc] "d" (func), [pba] "d" ((unsigned long)param),
+ [seed] "d" (s.pair), [opc] "i" (CPACF_PRNO)
+ : "cc", "memory", "0", "1");
}
/**
@@ -443,19 +455,19 @@ static inline void cpacf_prno(unsigned long func, void *param,
static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len,
u8 *cbuf, unsigned long cbuf_len)
{
- register unsigned long r0 asm("0") = (unsigned long) CPACF_PRNO_TRNG;
- register unsigned long r2 asm("2") = (unsigned long) ucbuf;
- register unsigned long r3 asm("3") = (unsigned long) ucbuf_len;
- register unsigned long r4 asm("4") = (unsigned long) cbuf;
- register unsigned long r5 asm("5") = (unsigned long) cbuf_len;
+ union register_pair u, c;
+ u.even = (unsigned long)ucbuf;
+ u.odd = (unsigned long)ucbuf_len;
+ c.even = (unsigned long)cbuf;
+ c.odd = (unsigned long)cbuf_len;
asm volatile (
+ " lghi 0,%[fc]\n"
"0: .insn rre,%[opc] << 16,%[ucbuf],%[cbuf]\n"
" brc 1,0b\n" /* handle partial completion */
- : [ucbuf] "+a" (r2), [ucbuflen] "+d" (r3),
- [cbuf] "+a" (r4), [cbuflen] "+d" (r5)
- : [fc] "d" (r0), [opc] "i" (CPACF_PRNO)
- : "cc", "memory");
+ : [ucbuf] "+&d" (u.pair), [cbuf] "+&d" (c.pair)
+ : [fc] "K" (CPACF_PRNO_TRNG), [opc] "i" (CPACF_PRNO)
+ : "cc", "memory", "0");
}
/**
@@ -466,15 +478,15 @@ static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len,
*/
static inline void cpacf_pcc(unsigned long func, void *param)
{
- register unsigned long r0 asm("0") = (unsigned long) func;
- register unsigned long r1 asm("1") = (unsigned long) param;
-
asm volatile(
+ " lgr 0,%[fc]\n"
+ " lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,0,0\n" /* PCC opcode */
" brc 1,0b\n" /* handle partial completion */
:
- : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_PCC)
- : "cc", "memory");
+ : [fc] "d" (func), [pba] "d" ((unsigned long)param),
+ [opc] "i" (CPACF_PCC)
+ : "cc", "memory", "0", "1");
}
/**
@@ -487,14 +499,14 @@ static inline void cpacf_pcc(unsigned long func, void *param)
*/
static inline void cpacf_pckmo(long func, void *param)
{
- register unsigned long r0 asm("0") = (unsigned long) func;
- register unsigned long r1 asm("1") = (unsigned long) param;
-
asm volatile(
+ " lgr 0,%[fc]\n"
+ " lgr 1,%[pba]\n"
" .insn rre,%[opc] << 16,0,0\n" /* PCKMO opcode */
:
- : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_PCKMO)
- : "cc", "memory");
+ : [fc] "d" (func), [pba] "d" ((unsigned long)param),
+ [opc] "i" (CPACF_PCKMO)
+ : "cc", "memory", "0", "1");
}
/**
@@ -512,21 +524,23 @@ static inline void cpacf_kma(unsigned long func, void *param, u8 *dest,
const u8 *src, unsigned long src_len,
const u8 *aad, unsigned long aad_len)
{
- register unsigned long r0 asm("0") = (unsigned long) func;
- register unsigned long r1 asm("1") = (unsigned long) param;
- register unsigned long r2 asm("2") = (unsigned long) src;
- register unsigned long r3 asm("3") = (unsigned long) src_len;
- register unsigned long r4 asm("4") = (unsigned long) aad;
- register unsigned long r5 asm("5") = (unsigned long) aad_len;
- register unsigned long r6 asm("6") = (unsigned long) dest;
+ union register_pair d, s, a;
+ d.even = (unsigned long)dest;
+ s.even = (unsigned long)src;
+ s.odd = (unsigned long)src_len;
+ a.even = (unsigned long)aad;
+ a.odd = (unsigned long)aad_len;
asm volatile(
+ " lgr 0,%[fc]\n"
+ " lgr 1,%[pba]\n"
"0: .insn rrf,%[opc] << 16,%[dst],%[src],%[aad],0\n"
" brc 1,0b\n" /* handle partial completion */
- : [dst] "+a" (r6), [src] "+a" (r2), [slen] "+d" (r3),
- [aad] "+a" (r4), [alen] "+d" (r5)
- : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMA)
- : "cc", "memory");
+ : [dst] "+&d" (d.pair), [src] "+&d" (s.pair),
+ [aad] "+&d" (a.pair)
+ : [fc] "d" (func), [pba] "d" ((unsigned long)param),
+ [opc] "i" (CPACF_KMA)
+ : "cc", "memory", "0", "1");
}
#endif /* _ASM_S390_CPACF_H */
diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h
index 1d007c6ede95..14cfd48d598e 100644
--- a/arch/s390/include/asm/cpufeature.h
+++ b/arch/s390/include/asm/cpufeature.h
@@ -23,7 +23,7 @@
#define MAX_ELF_HWCAP_FEATURES (8 * sizeof(elf_hwcap))
#define MAX_CPU_FEATURES MAX_ELF_HWCAP_FEATURES
-#define cpu_feature(feat) ilog2(HWCAP_S390_ ## feat)
+#define cpu_feature(feat) ilog2(HWCAP_ ## feat)
int cpu_have_feature(unsigned int nr);
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index adc0179fa34e..04dc65f8901d 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -111,6 +111,23 @@ union ctlreg2 {
};
};
+union ctlreg5 {
+ unsigned long val;
+ struct {
+ unsigned long : 33;
+ unsigned long pasteo: 25;
+ unsigned long : 6;
+ };
+};
+
+union ctlreg15 {
+ unsigned long val;
+ struct {
+ unsigned long lsea : 61;
+ unsigned long : 3;
+ };
+};
+
#define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
#define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index c1b82bcc017c..19a55e1e3a0c 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -13,6 +13,7 @@
#include <linux/time.h>
#include <linux/refcount.h>
#include <linux/fs.h>
+#include <linux/init.h>
#define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */
#define DEBUG_OFF_LEVEL -1 /* level where debug is switched off */
@@ -391,38 +392,99 @@ int debug_register_view(debug_info_t *id, struct debug_view *view);
int debug_unregister_view(debug_info_t *id, struct debug_view *view);
+#ifndef MODULE
+
+/*
+ * Note: Initial page and area numbers must be fixed to allow static
+ * initialization. This enables very early tracing. Changes to these values
+ * must be reflected in __DEFINE_STATIC_AREA.
+ */
+#define EARLY_PAGES 8
+#define EARLY_AREAS 1
+
+#define VNAME(var, suffix) __##var##_##suffix
+
/*
- define the debug levels:
- - 0 No debugging output to console or syslog
- - 1 Log internal errors to syslog, ignore check conditions
- - 2 Log internal errors and check conditions to syslog
- - 3 Log internal errors to console, log check conditions to syslog
- - 4 Log internal errors and check conditions to console
- - 5 panic on internal errors, log check conditions to console
- - 6 panic on both, internal errors and check conditions
+ * Define static areas for early trace data. During boot debug_register_static()
+ * will replace these with dynamically allocated areas to allow custom page and
+ * area sizes, and dynamic resizing.
*/
+#define __DEFINE_STATIC_AREA(var) \
+static char VNAME(var, data)[EARLY_PAGES][PAGE_SIZE] __initdata; \
+static debug_entry_t *VNAME(var, pages)[EARLY_PAGES] __initdata = { \
+ (debug_entry_t *)VNAME(var, data)[0], \
+ (debug_entry_t *)VNAME(var, data)[1], \
+ (debug_entry_t *)VNAME(var, data)[2], \
+ (debug_entry_t *)VNAME(var, data)[3], \
+ (debug_entry_t *)VNAME(var, data)[4], \
+ (debug_entry_t *)VNAME(var, data)[5], \
+ (debug_entry_t *)VNAME(var, data)[6], \
+ (debug_entry_t *)VNAME(var, data)[7], \
+}; \
+static debug_entry_t **VNAME(var, areas)[EARLY_AREAS] __initdata = { \
+ (debug_entry_t **)VNAME(var, pages), \
+}; \
+static int VNAME(var, active_pages)[EARLY_AREAS] __initdata; \
+static int VNAME(var, active_entries)[EARLY_AREAS] __initdata
+
+#define __DEBUG_INFO_INIT(var, _name, _buf_size) { \
+ .next = NULL, \
+ .prev = NULL, \
+ .ref_count = REFCOUNT_INIT(1), \
+ .lock = __SPIN_LOCK_UNLOCKED(var.lock), \
+ .level = DEBUG_DEFAULT_LEVEL, \
+ .nr_areas = EARLY_AREAS, \
+ .pages_per_area = EARLY_PAGES, \
+ .buf_size = (_buf_size), \
+ .entry_size = sizeof(debug_entry_t) + (_buf_size), \
+ .areas = VNAME(var, areas), \
+ .active_area = 0, \
+ .active_pages = VNAME(var, active_pages), \
+ .active_entries = VNAME(var, active_entries), \
+ .debugfs_root_entry = NULL, \
+ .debugfs_entries = { NULL }, \
+ .views = { NULL }, \
+ .name = (_name), \
+ .mode = 0600, \
+}
+
+#define __REGISTER_STATIC_DEBUG_INFO(var, name, pages, areas, view) \
+static int __init VNAME(var, reg)(void) \
+{ \
+ debug_register_static(&var, (pages), (areas)); \
+ debug_register_view(&var, (view)); \
+ return 0; \
+} \
+arch_initcall(VNAME(var, reg))
+
+/**
+ * DEFINE_STATIC_DEBUG_INFO - Define static debug_info_t
+ *
+ * @var: Name of debug_info_t variable
+ * @name: Name of debug log (e.g. used for debugfs entry)
+ * @pages_per_area: Number of pages per area
+ * @nr_areas: Number of debug areas
+ * @buf_size: Size of data area in each debug entry
+ * @view: Pointer to debug view struct
+ *
+ * Define a static debug_info_t for early tracing. The associated debugfs log
+ * is automatically registered with the specified debug view.
+ *
+ * Important: Users of this macro must not call any of the
+ * debug_register/_unregister() functions for this debug_info_t!
+ *
+ * Note: Tracing will start with a fixed number of initial pages and areas.
+ * The debug area will be changed to use the specified numbers during
+ * arch_initcall.
+ */
+#define DEFINE_STATIC_DEBUG_INFO(var, name, pages, nr_areas, buf_size, view) \
+__DEFINE_STATIC_AREA(var); \
+static debug_info_t __refdata var = \
+ __DEBUG_INFO_INIT(var, (name), (buf_size)); \
+__REGISTER_STATIC_DEBUG_INFO(var, name, pages, nr_areas, view)
+
+void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas);
-#ifndef DEBUG_LEVEL
-#define DEBUG_LEVEL 4
-#endif
-
-#define INTERNAL_ERRMSG(x,y...) "E" __FILE__ "%d: " x, __LINE__, y
-#define INTERNAL_WRNMSG(x,y...) "W" __FILE__ "%d: " x, __LINE__, y
-#define INTERNAL_INFMSG(x,y...) "I" __FILE__ "%d: " x, __LINE__, y
-#define INTERNAL_DEBMSG(x,y...) "D" __FILE__ "%d: " x, __LINE__, y
-
-#if DEBUG_LEVEL > 0
-#define PRINT_DEBUG(x...) printk(KERN_DEBUG PRINTK_HEADER x)
-#define PRINT_INFO(x...) printk(KERN_INFO PRINTK_HEADER x)
-#define PRINT_WARN(x...) printk(KERN_WARNING PRINTK_HEADER x)
-#define PRINT_ERR(x...) printk(KERN_ERR PRINTK_HEADER x)
-#define PRINT_FATAL(x...) panic(PRINTK_HEADER x)
-#else
-#define PRINT_DEBUG(x...) printk(KERN_DEBUG PRINTK_HEADER x)
-#define PRINT_INFO(x...) printk(KERN_DEBUG PRINTK_HEADER x)
-#define PRINT_WARN(x...) printk(KERN_DEBUG PRINTK_HEADER x)
-#define PRINT_ERR(x...) printk(KERN_DEBUG PRINTK_HEADER x)
-#define PRINT_FATAL(x...) printk(KERN_DEBUG PRINTK_HEADER x)
-#endif /* DASD_DEBUG */
+#endif /* MODULE */
#endif /* DEBUG_H */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index ca8f85b53a90..b3a8cb4daed6 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -309,6 +309,10 @@ int diag26c(void *req, void *resp, enum diag26c_sc subcode);
struct hypfs_diag0c_entry;
+/*
+ * This structure must contain only pointers/references into
+ * the AMODE31 text section.
+ */
struct diag_ops {
int (*diag210)(struct diag210 *addr);
int (*diag26c)(void *req, void *resp, enum diag26c_sc subcode);
@@ -317,6 +321,13 @@ struct diag_ops {
void (*diag308_reset)(void);
};
-extern struct diag_ops diag_dma_ops;
-extern struct diag210 *__diag210_tmp_dma;
+extern struct diag_ops diag_amode31_ops;
+extern struct diag210 *__diag210_tmp_amode31;
+
+int _diag210_amode31(struct diag210 *addr);
+int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode);
+int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode);
+void _diag0c_amode31(struct hypfs_diag0c_entry *entry);
+void _diag308_reset_amode31(void);
+
#endif /* _ASM_S390_DIAG_H */
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index bd00c94620d3..70a30ae258b7 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -91,29 +91,57 @@
/* Keep this the last entry. */
#define R_390_NUM 61
-/* Bits present in AT_HWCAP. */
-#define HWCAP_S390_ESAN3 1
-#define HWCAP_S390_ZARCH 2
-#define HWCAP_S390_STFLE 4
-#define HWCAP_S390_MSA 8
-#define HWCAP_S390_LDISP 16
-#define HWCAP_S390_EIMM 32
-#define HWCAP_S390_DFP 64
-#define HWCAP_S390_HPAGE 128
-#define HWCAP_S390_ETF3EH 256
-#define HWCAP_S390_HIGH_GPRS 512
-#define HWCAP_S390_TE 1024
-#define HWCAP_S390_VXRS 2048
-#define HWCAP_S390_VXRS_BCD 4096
-#define HWCAP_S390_VXRS_EXT 8192
-#define HWCAP_S390_GS 16384
-#define HWCAP_S390_VXRS_EXT2 32768
-#define HWCAP_S390_VXRS_PDE 65536
-#define HWCAP_S390_SORT 131072
-#define HWCAP_S390_DFLT 262144
+enum {
+ HWCAP_NR_ESAN3 = 0,
+ HWCAP_NR_ZARCH = 1,
+ HWCAP_NR_STFLE = 2,
+ HWCAP_NR_MSA = 3,
+ HWCAP_NR_LDISP = 4,
+ HWCAP_NR_EIMM = 5,
+ HWCAP_NR_DFP = 6,
+ HWCAP_NR_HPAGE = 7,
+ HWCAP_NR_ETF3EH = 8,
+ HWCAP_NR_HIGH_GPRS = 9,
+ HWCAP_NR_TE = 10,
+ HWCAP_NR_VXRS = 11,
+ HWCAP_NR_VXRS_BCD = 12,
+ HWCAP_NR_VXRS_EXT = 13,
+ HWCAP_NR_GS = 14,
+ HWCAP_NR_VXRS_EXT2 = 15,
+ HWCAP_NR_VXRS_PDE = 16,
+ HWCAP_NR_SORT = 17,
+ HWCAP_NR_DFLT = 18,
+ HWCAP_NR_VXRS_PDE2 = 19,
+ HWCAP_NR_NNPA = 20,
+ HWCAP_NR_PCI_MIO = 21,
+ HWCAP_NR_SIE = 22,
+ HWCAP_NR_MAX
+};
-/* Internal bits, not exposed via elf */
-#define HWCAP_INT_SIE 1UL
+/* Bits present in AT_HWCAP. */
+#define HWCAP_ESAN3 BIT(HWCAP_NR_ESAN3)
+#define HWCAP_ZARCH BIT(HWCAP_NR_ZARCH)
+#define HWCAP_STFLE BIT(HWCAP_NR_STFLE)
+#define HWCAP_MSA BIT(HWCAP_NR_MSA)
+#define HWCAP_LDISP BIT(HWCAP_NR_LDISP)
+#define HWCAP_EIMM BIT(HWCAP_NR_EIMM)
+#define HWCAP_DFP BIT(HWCAP_NR_DFP)
+#define HWCAP_HPAGE BIT(HWCAP_NR_HPAGE)
+#define HWCAP_ETF3EH BIT(HWCAP_NR_ETF3EH)
+#define HWCAP_HIGH_GPRS BIT(HWCAP_NR_HIGH_GPRS)
+#define HWCAP_TE BIT(HWCAP_NR_TE)
+#define HWCAP_VXRS BIT(HWCAP_NR_VXRS)
+#define HWCAP_VXRS_BCD BIT(HWCAP_NR_VXRS_BCD)
+#define HWCAP_VXRS_EXT BIT(HWCAP_NR_VXRS_EXT)
+#define HWCAP_GS BIT(HWCAP_NR_GS)
+#define HWCAP_VXRS_EXT2 BIT(HWCAP_NR_VXRS_EXT2)
+#define HWCAP_VXRS_PDE BIT(HWCAP_NR_VXRS_PDE)
+#define HWCAP_SORT BIT(HWCAP_NR_SORT)
+#define HWCAP_DFLT BIT(HWCAP_NR_DFLT)
+#define HWCAP_VXRS_PDE2 BIT(HWCAP_NR_VXRS_PDE2)
+#define HWCAP_NNPA BIT(HWCAP_NR_NNPA)
+#define HWCAP_PCI_MIO BIT(HWCAP_NR_PCI_MIO)
+#define HWCAP_SIE BIT(HWCAP_NR_SIE)
/*
* These are used to set parameters in the core dumps.
@@ -209,10 +237,6 @@ struct arch_elf_state {
extern unsigned long elf_hwcap;
#define ELF_HWCAP (elf_hwcap)
-/* Internal hardware capabilities, not exposed via elf */
-
-extern unsigned long int_hwcap;
-
/* This yields a string that ld.so will use to load implementation
specific libraries for optimization. This is more specific in
intent than poking at uname or /proc/cpuinfo.
diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h
index 3beb294fd553..16dc57dd90b3 100644
--- a/arch/s390/include/asm/extable.h
+++ b/arch/s390/include/asm/extable.h
@@ -28,8 +28,8 @@ struct exception_table_entry
long handler;
};
-extern struct exception_table_entry *__start_dma_ex_table;
-extern struct exception_table_entry *__stop_dma_ex_table;
+extern struct exception_table_entry *__start_amode31_ex_table;
+extern struct exception_table_entry *__stop_amode31_ex_table;
const struct exception_table_entry *s390_search_extables(unsigned long addr);
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 345cbe982a8b..e8b460f39c58 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -18,7 +18,6 @@
void ftrace_caller(void);
extern char ftrace_graph_caller_end;
-extern unsigned long ftrace_plt;
extern void *ftrace_func;
struct dyn_arch_ftrace { };
@@ -31,10 +30,11 @@ struct dyn_arch_ftrace { };
struct module;
struct dyn_ftrace;
-/*
- * Either -mhotpatch or -mnop-mcount is used - no explicit init is required
- */
-static inline int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) { return 0; }
+
+bool ftrace_need_init_nop(void);
+#define ftrace_need_init_nop ftrace_need_init_nop
+
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
#define ftrace_init_nop ftrace_init_nop
static inline unsigned long ftrace_call_adjust(unsigned long addr)
@@ -42,42 +42,6 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
return addr;
}
-struct ftrace_insn {
- u16 opc;
- s32 disp;
-} __packed;
-
-static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn)
-{
-#ifdef CONFIG_FUNCTION_TRACER
- /* brcl 0,0 */
- insn->opc = 0xc004;
- insn->disp = 0;
-#endif
-}
-
-static inline int is_ftrace_nop(struct ftrace_insn *insn)
-{
-#ifdef CONFIG_FUNCTION_TRACER
- if (insn->disp == 0)
- return 1;
-#endif
- return 0;
-}
-
-static inline void ftrace_generate_call_insn(struct ftrace_insn *insn,
- unsigned long ip)
-{
-#ifdef CONFIG_FUNCTION_TRACER
- unsigned long target;
-
- /* brasl r0,ftrace_caller */
- target = is_module_addr((void *) ip) ? ftrace_plt : FTRACE_ADDR;
- insn->opc = 0xc005;
- insn->disp = (target - ip) / 2;
-#endif
-}
-
/*
* Even though the system call numbers are identical for s390/s390x a
* different system call table is used for compat tasks. This may lead
diff --git a/arch/s390/include/asm/ftrace.lds.h b/arch/s390/include/asm/ftrace.lds.h
new file mode 100644
index 000000000000..968adfd41240
--- /dev/null
+++ b/arch/s390/include/asm/ftrace.lds.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef DIV_ROUND_UP
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#endif
+
+#define SIZEOF_MCOUNT_LOC_ENTRY 8
+#define SIZEOF_FTRACE_HOTPATCH_TRAMPOLINE 24
+#define FTRACE_HOTPATCH_TRAMPOLINES_SIZE(n) \
+ DIV_ROUND_UP(SIZEOF_FTRACE_HOTPATCH_TRAMPOLINE * (n), \
+ SIZEOF_MCOUNT_LOC_ENTRY)
+
+#ifdef CONFIG_FUNCTION_TRACER
+#define FTRACE_HOTPATCH_TRAMPOLINES_TEXT \
+ . = ALIGN(8); \
+ __ftrace_hotpatch_trampolines_start = .; \
+ . = . + FTRACE_HOTPATCH_TRAMPOLINES_SIZE(__stop_mcount_loc - \
+ __start_mcount_loc); \
+ __ftrace_hotpatch_trampolines_end = .;
+#else
+#define FTRACE_HOTPATCH_TRAMPOLINES_TEXT
+#endif
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index a9e2c7295b35..3f8ee257f9aa 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -12,6 +12,7 @@
#include <asm/types.h>
#include <asm/cio.h>
#include <asm/setup.h>
+#include <asm/page.h>
#include <uapi/asm/ipl.h>
struct ipl_parameter_block {
diff --git a/arch/s390/include/asm/kfence.h b/arch/s390/include/asm/kfence.h
new file mode 100644
index 000000000000..d55ba878378b
--- /dev/null
+++ b/arch/s390/include/asm/kfence.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_KFENCE_H
+#define _ASM_S390_KFENCE_H
+
+#include <linux/mm.h>
+#include <linux/kfence.h>
+#include <asm/set_memory.h>
+#include <asm/page.h>
+
+void __kernel_map_pages(struct page *page, int numpages, int enable);
+
+static __always_inline bool arch_kfence_init_pool(void)
+{
+ return true;
+}
+
+#define arch_kfence_test_address(addr) ((addr) & PAGE_MASK)
+
+/*
+ * Do not split kfence pool to 4k mapping with arch_kfence_init_pool(),
+ * but earlier where page table allocations still happen with memblock.
+ * Reason is that arch_kfence_init_pool() gets called when the system
+ * is still in a limbo state - disabling and enabling bottom halves is
+ * not yet allowed, but that is what our page_table_alloc() would do.
+ */
+static __always_inline void kfence_split_mapping(void)
+{
+#ifdef CONFIG_KFENCE
+ unsigned long pool_pages = KFENCE_POOL_SIZE >> PAGE_SHIFT;
+
+ set_memory_4k((unsigned long)__kfence_pool, pool_pages);
+#endif
+}
+
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+ __kernel_map_pages(virt_to_page(addr), 1, !protect);
+ return true;
+}
+
+#endif /* _ASM_S390_KFENCE_H */
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
index cbc7c3a68e4d..df73a052760c 100644
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -24,162 +24,79 @@
#include <uapi/asm/kvm_para.h>
#include <asm/diag.h>
-static inline long __kvm_hypercall0(unsigned long nr)
-{
- register unsigned long __nr asm("1") = nr;
- register long __rc asm("2");
-
- asm volatile ("diag 2,4,0x500\n"
- : "=d" (__rc) : "d" (__nr): "memory", "cc");
- return __rc;
-}
-
-static inline long kvm_hypercall0(unsigned long nr)
-{
- diag_stat_inc(DIAG_STAT_X500);
- return __kvm_hypercall0(nr);
-}
-
-static inline long __kvm_hypercall1(unsigned long nr, unsigned long p1)
-{
- register unsigned long __nr asm("1") = nr;
- register unsigned long __p1 asm("2") = p1;
- register long __rc asm("2");
-
- asm volatile ("diag 2,4,0x500\n"
- : "=d" (__rc) : "d" (__nr), "0" (__p1) : "memory", "cc");
- return __rc;
-}
-
-static inline long kvm_hypercall1(unsigned long nr, unsigned long p1)
-{
- diag_stat_inc(DIAG_STAT_X500);
- return __kvm_hypercall1(nr, p1);
-}
-
-static inline long __kvm_hypercall2(unsigned long nr, unsigned long p1,
- unsigned long p2)
-{
- register unsigned long __nr asm("1") = nr;
- register unsigned long __p1 asm("2") = p1;
- register unsigned long __p2 asm("3") = p2;
- register long __rc asm("2");
-
- asm volatile ("diag 2,4,0x500\n"
- : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2)
- : "memory", "cc");
- return __rc;
-}
-
-static inline long kvm_hypercall2(unsigned long nr, unsigned long p1,
- unsigned long p2)
-{
- diag_stat_inc(DIAG_STAT_X500);
- return __kvm_hypercall2(nr, p1, p2);
-}
-
-static inline long __kvm_hypercall3(unsigned long nr, unsigned long p1,
- unsigned long p2, unsigned long p3)
-{
- register unsigned long __nr asm("1") = nr;
- register unsigned long __p1 asm("2") = p1;
- register unsigned long __p2 asm("3") = p2;
- register unsigned long __p3 asm("4") = p3;
- register long __rc asm("2");
-
- asm volatile ("diag 2,4,0x500\n"
- : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
- "d" (__p3) : "memory", "cc");
- return __rc;
-}
-
-static inline long kvm_hypercall3(unsigned long nr, unsigned long p1,
- unsigned long p2, unsigned long p3)
-{
- diag_stat_inc(DIAG_STAT_X500);
- return __kvm_hypercall3(nr, p1, p2, p3);
-}
-
-static inline long __kvm_hypercall4(unsigned long nr, unsigned long p1,
- unsigned long p2, unsigned long p3,
- unsigned long p4)
-{
- register unsigned long __nr asm("1") = nr;
- register unsigned long __p1 asm("2") = p1;
- register unsigned long __p2 asm("3") = p2;
- register unsigned long __p3 asm("4") = p3;
- register unsigned long __p4 asm("5") = p4;
- register long __rc asm("2");
-
- asm volatile ("diag 2,4,0x500\n"
- : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
- "d" (__p3), "d" (__p4) : "memory", "cc");
- return __rc;
-}
-
-static inline long kvm_hypercall4(unsigned long nr, unsigned long p1,
- unsigned long p2, unsigned long p3,
- unsigned long p4)
-{
- diag_stat_inc(DIAG_STAT_X500);
- return __kvm_hypercall4(nr, p1, p2, p3, p4);
-}
-
-static inline long __kvm_hypercall5(unsigned long nr, unsigned long p1,
- unsigned long p2, unsigned long p3,
- unsigned long p4, unsigned long p5)
-{
- register unsigned long __nr asm("1") = nr;
- register unsigned long __p1 asm("2") = p1;
- register unsigned long __p2 asm("3") = p2;
- register unsigned long __p3 asm("4") = p3;
- register unsigned long __p4 asm("5") = p4;
- register unsigned long __p5 asm("6") = p5;
- register long __rc asm("2");
-
- asm volatile ("diag 2,4,0x500\n"
- : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
- "d" (__p3), "d" (__p4), "d" (__p5) : "memory", "cc");
- return __rc;
-}
-
-static inline long kvm_hypercall5(unsigned long nr, unsigned long p1,
- unsigned long p2, unsigned long p3,
- unsigned long p4, unsigned long p5)
-{
- diag_stat_inc(DIAG_STAT_X500);
- return __kvm_hypercall5(nr, p1, p2, p3, p4, p5);
-}
-
-static inline long __kvm_hypercall6(unsigned long nr, unsigned long p1,
- unsigned long p2, unsigned long p3,
- unsigned long p4, unsigned long p5,
- unsigned long p6)
-{
- register unsigned long __nr asm("1") = nr;
- register unsigned long __p1 asm("2") = p1;
- register unsigned long __p2 asm("3") = p2;
- register unsigned long __p3 asm("4") = p3;
- register unsigned long __p4 asm("5") = p4;
- register unsigned long __p5 asm("6") = p5;
- register unsigned long __p6 asm("7") = p6;
- register long __rc asm("2");
-
- asm volatile ("diag 2,4,0x500\n"
- : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
- "d" (__p3), "d" (__p4), "d" (__p5), "d" (__p6)
- : "memory", "cc");
- return __rc;
-}
-
-static inline long kvm_hypercall6(unsigned long nr, unsigned long p1,
- unsigned long p2, unsigned long p3,
- unsigned long p4, unsigned long p5,
- unsigned long p6)
-{
- diag_stat_inc(DIAG_STAT_X500);
- return __kvm_hypercall6(nr, p1, p2, p3, p4, p5, p6);
-}
+#define HYPERCALL_FMT_0
+#define HYPERCALL_FMT_1 , "0" (r2)
+#define HYPERCALL_FMT_2 , "d" (r3) HYPERCALL_FMT_1
+#define HYPERCALL_FMT_3 , "d" (r4) HYPERCALL_FMT_2
+#define HYPERCALL_FMT_4 , "d" (r5) HYPERCALL_FMT_3
+#define HYPERCALL_FMT_5 , "d" (r6) HYPERCALL_FMT_4
+#define HYPERCALL_FMT_6 , "d" (r7) HYPERCALL_FMT_5
+
+#define HYPERCALL_PARM_0
+#define HYPERCALL_PARM_1 , unsigned long arg1
+#define HYPERCALL_PARM_2 HYPERCALL_PARM_1, unsigned long arg2
+#define HYPERCALL_PARM_3 HYPERCALL_PARM_2, unsigned long arg3
+#define HYPERCALL_PARM_4 HYPERCALL_PARM_3, unsigned long arg4
+#define HYPERCALL_PARM_5 HYPERCALL_PARM_4, unsigned long arg5
+#define HYPERCALL_PARM_6 HYPERCALL_PARM_5, unsigned long arg6
+
+#define HYPERCALL_REGS_0
+#define HYPERCALL_REGS_1 \
+ register unsigned long r2 asm("2") = arg1
+#define HYPERCALL_REGS_2 \
+ HYPERCALL_REGS_1; \
+ register unsigned long r3 asm("3") = arg2
+#define HYPERCALL_REGS_3 \
+ HYPERCALL_REGS_2; \
+ register unsigned long r4 asm("4") = arg3
+#define HYPERCALL_REGS_4 \
+ HYPERCALL_REGS_3; \
+ register unsigned long r5 asm("5") = arg4
+#define HYPERCALL_REGS_5 \
+ HYPERCALL_REGS_4; \
+ register unsigned long r6 asm("6") = arg5
+#define HYPERCALL_REGS_6 \
+ HYPERCALL_REGS_5; \
+ register unsigned long r7 asm("7") = arg6
+
+#define HYPERCALL_ARGS_0
+#define HYPERCALL_ARGS_1 , arg1
+#define HYPERCALL_ARGS_2 HYPERCALL_ARGS_1, arg2
+#define HYPERCALL_ARGS_3 HYPERCALL_ARGS_2, arg3
+#define HYPERCALL_ARGS_4 HYPERCALL_ARGS_3, arg4
+#define HYPERCALL_ARGS_5 HYPERCALL_ARGS_4, arg5
+#define HYPERCALL_ARGS_6 HYPERCALL_ARGS_5, arg6
+
+#define GENERATE_KVM_HYPERCALL_FUNC(args) \
+static inline \
+long __kvm_hypercall##args(unsigned long nr HYPERCALL_PARM_##args) \
+{ \
+ register unsigned long __nr asm("1") = nr; \
+ register long __rc asm("2"); \
+ HYPERCALL_REGS_##args; \
+ \
+ asm volatile ( \
+ " diag 2,4,0x500\n" \
+ : "=d" (__rc) \
+ : "d" (__nr) HYPERCALL_FMT_##args \
+ : "memory", "cc"); \
+ return __rc; \
+} \
+ \
+static inline \
+long kvm_hypercall##args(unsigned long nr HYPERCALL_PARM_##args) \
+{ \
+ diag_stat_inc(DIAG_STAT_X500); \
+ return __kvm_hypercall##args(nr HYPERCALL_ARGS_##args); \
+}
+
+GENERATE_KVM_HYPERCALL_FUNC(0)
+GENERATE_KVM_HYPERCALL_FUNC(1)
+GENERATE_KVM_HYPERCALL_FUNC(2)
+GENERATE_KVM_HYPERCALL_FUNC(3)
+GENERATE_KVM_HYPERCALL_FUNC(4)
+GENERATE_KVM_HYPERCALL_FUNC(5)
+GENERATE_KVM_HYPERCALL_FUNC(6)
/* kvm on s390 is always paravirtualization enabled */
static inline int kvm_para_available(void)
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
index 24e8fed150cf..1ffea75b8ebc 100644
--- a/arch/s390/include/asm/linkage.h
+++ b/arch/s390/include/asm/linkage.h
@@ -22,7 +22,7 @@
#define EX_TABLE(_fault, _target) \
__EX_TABLE(__ex_table, _fault, _target)
-#define EX_TABLE_DMA(_fault, _target) \
- __EX_TABLE(.dma.ex_table, _fault, _target)
+#define EX_TABLE_AMODE31(_fault, _target) \
+ __EX_TABLE(.amode31.ex_table, _fault, _target)
#endif
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 47bde5a20a41..11213c8bfca5 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -124,7 +124,8 @@ struct lowcore {
/* Restart function and parameter. */
__u64 restart_fn; /* 0x0370 */
__u64 restart_data; /* 0x0378 */
- __u64 restart_source; /* 0x0380 */
+ __u32 restart_source; /* 0x0380 */
+ __u32 restart_flags; /* 0x0384 */
/* Address space pointer. */
__u64 kernel_asce; /* 0x0388 */
diff --git a/arch/s390/include/asm/module.h b/arch/s390/include/asm/module.h
index e0a6d29846e2..9f1eea15872c 100644
--- a/arch/s390/include/asm/module.h
+++ b/arch/s390/include/asm/module.h
@@ -8,16 +8,14 @@
* This file contains the s390 architecture specific module code.
*/
-struct mod_arch_syminfo
-{
+struct mod_arch_syminfo {
unsigned long got_offset;
unsigned long plt_offset;
int got_initialized;
int plt_initialized;
};
-struct mod_arch_specific
-{
+struct mod_arch_specific {
/* Starting offset of got in the module core memory. */
unsigned long got_offset;
/* Starting offset of plt in the module core memory. */
@@ -30,6 +28,14 @@ struct mod_arch_specific
int nsyms;
/* Additional symbol information (got and plt offsets). */
struct mod_arch_syminfo *syminfo;
+#ifdef CONFIG_FUNCTION_TRACER
+ /* Start of memory reserved for ftrace hotpatch trampolines. */
+ struct ftrace_hotpatch_trampoline *trampolines_start;
+ /* End of memory reserved for ftrace hotpatch trampolines. */
+ struct ftrace_hotpatch_trampoline *trampolines_end;
+ /* Next unused ftrace hotpatch trampoline slot. */
+ struct ftrace_hotpatch_trampoline *next_trampoline;
+#endif /* CONFIG_FUNCTION_TRACER */
};
#endif /* _ASM_S390_MODULE_H */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 3ba945c6b9dc..d98d17a36c7b 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -144,9 +144,6 @@ struct page;
void arch_free_page(struct page *page, int order);
void arch_alloc_page(struct page *page, int order);
void arch_set_page_dat(struct page *page, int order);
-void arch_set_page_nodat(struct page *page, int order);
-int arch_test_page_nodat(struct page *page);
-void arch_set_page_states(int make_stable);
static inline int devmem_is_allowed(unsigned long pfn)
{
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 5509b224c2ec..e4803ec51110 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -216,9 +216,10 @@ void zpci_remove_reserved_devices(void);
int clp_setup_writeback_mio(void);
int clp_scan_pci_devices(void);
int clp_query_pci_fn(struct zpci_dev *zdev);
-int clp_enable_fh(struct zpci_dev *, u8);
-int clp_disable_fh(struct zpci_dev *);
+int clp_enable_fh(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as);
+int clp_disable_fh(struct zpci_dev *zdev, u32 *fh);
int clp_get_state(u32 fid, enum zpci_state *state);
+int clp_refresh_fh(u32 fid, u32 *fh);
/* UID */
void update_uid_checking(bool new);
@@ -271,6 +272,8 @@ struct zpci_dev *get_zdev_by_fid(u32);
/* DMA */
int zpci_dma_init(void);
void zpci_dma_exit(void);
+int zpci_dma_init_device(struct zpci_dev *zdev);
+int zpci_dma_exit_device(struct zpci_dev *zdev);
/* IRQ */
int __init zpci_irq_init(void);
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index f62cd3ed2d44..3b8e89d4578a 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -182,8 +182,6 @@ static inline unsigned long *get_st_pto(unsigned long entry)
}
/* Prototypes */
-int zpci_dma_init_device(struct zpci_dev *);
-void zpci_dma_exit_device(struct zpci_dev *);
void dma_free_seg_table(unsigned long);
unsigned long *dma_alloc_cpu_table(void);
void dma_cleanup_tables(unsigned long *);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index dcac7b2df72c..b61426c9ef17 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -67,15 +67,15 @@ extern unsigned long zero_page_mask;
/* TODO: s390 cannot support io_remap_pfn_range... */
#define pte_ERROR(e) \
- printk("%s:%d: bad pte %p.\n", __FILE__, __LINE__, (void *) pte_val(e))
+ pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
#define pmd_ERROR(e) \
- printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e))
+ pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
#define pud_ERROR(e) \
- printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e))
+ pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
#define p4d_ERROR(e) \
- printk("%s:%d: bad p4d %p.\n", __FILE__, __LINE__, (void *) p4d_val(e))
+ pr_err("%s:%d: bad p4d %016lx.\n", __FILE__, __LINE__, p4d_val(e))
#define pgd_ERROR(e) \
- printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e))
+ pr_err("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e))
/*
* The vmalloc and module area will always be on the topmost area of the
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index ddc7858bbce4..879b8e3f609c 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -26,6 +26,8 @@
#define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST)
#define _CIF_DEDICATED_CPU BIT(CIF_DEDICATED_CPU)
+#define RESTART_FLAG_CTLREGS _AC(1 << 0, U)
+
#ifndef __ASSEMBLY__
#include <linux/cpumask.h>
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index cb4f73c7228d..25b5dc34db75 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -291,16 +291,15 @@ struct qdio_ssqd_desc {
typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
int, int, unsigned long);
-/* qdio errors reported to the upper-layer program */
+/* qdio errors reported through the queue handlers: */
#define QDIO_ERROR_ACTIVATE 0x0001
#define QDIO_ERROR_GET_BUF_STATE 0x0002
#define QDIO_ERROR_SET_BUF_STATE 0x0004
+
+/* extra info for completed SBALs: */
#define QDIO_ERROR_SLSB_STATE 0x0100
#define QDIO_ERROR_SLSB_PENDING 0x0200
-#define QDIO_ERROR_FATAL 0x00ff
-#define QDIO_ERROR_TEMPORARY 0xff00
-
/* for qdio_cleanup */
#define QDIO_FLAG_CLEANUP_USING_CLEAR 0x01
#define QDIO_FLAG_CLEANUP_USING_HALT 0x02
@@ -312,8 +311,6 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
* @qib_param_field_format: format for qib_parm_field
* @qib_param_field: pointer to 128 bytes or NULL, if no param field
* @qib_rflags: rflags to set
- * @input_slib_elements: pointer to no_input_qs * 128 words of data or NULL
- * @output_slib_elements: pointer to no_output_qs * 128 words of data or NULL
* @no_input_qs: number of input queues
* @no_output_qs: number of output queues
* @input_handler: handler to be called for input queues
@@ -330,27 +327,18 @@ struct qdio_initialize {
unsigned int qib_param_field_format;
unsigned char *qib_param_field;
unsigned char qib_rflags;
- unsigned long *input_slib_elements;
- unsigned long *output_slib_elements;
unsigned int no_input_qs;
unsigned int no_output_qs;
qdio_handler_t *input_handler;
qdio_handler_t *output_handler;
void (*irq_poll)(struct ccw_device *cdev, unsigned long data);
- unsigned int scan_threshold;
unsigned long int_parm;
struct qdio_buffer ***input_sbal_addr_array;
struct qdio_buffer ***output_sbal_addr_array;
};
-#define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */
-#define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */
-#define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */
-#define QDIO_STATE_STOPPED 0x00000010 /* after queues went down */
-
#define QDIO_FLAG_SYNC_INPUT 0x01
#define QDIO_FLAG_SYNC_OUTPUT 0x02
-#define QDIO_FLAG_PCI_OUT 0x10
int qdio_alloc_buffers(struct qdio_buffer **buf, unsigned int count);
void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count);
@@ -367,7 +355,6 @@ extern int do_QDIO(struct ccw_device *cdev, unsigned int callflags, int q_nr,
unsigned int bufnr, unsigned int count, struct qaob *aob);
extern int qdio_start_irq(struct ccw_device *cdev);
extern int qdio_stop_irq(struct ccw_device *cdev);
-extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *);
extern int qdio_inspect_queue(struct ccw_device *cdev, unsigned int nr,
bool is_input, unsigned int *bufnr,
unsigned int *error);
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 5763769a39b6..e3ae937bef1c 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -8,8 +8,6 @@
#define _ASM_S390_SCLP_H
#include <linux/types.h>
-#include <asm/chpid.h>
-#include <asm/cpu.h>
#define SCLP_CHP_INFO_MASK_SIZE 32
#define EARLY_SCCB_SIZE PAGE_SIZE
@@ -19,6 +17,10 @@
/* 24 + 16 * SCLP_MAX_CORES */
#define EXT_SCCB_READ_CPU (3 * PAGE_SIZE)
+#ifndef __ASSEMBLY__
+#include <asm/chpid.h>
+#include <asm/cpu.h>
+
struct sclp_chp_info {
u8 recognized[SCLP_CHP_INFO_MASK_SIZE];
u8 standby[SCLP_CHP_INFO_MASK_SIZE];
@@ -113,6 +115,9 @@ struct zpci_report_error_header {
u8 data[0]; /* Subsequent Data passed verbatim to SCLP ET 24 */
} __packed;
+extern char *sclp_early_sccb;
+
+void sclp_early_set_buffer(void *sccb);
int sclp_early_read_info(void);
int sclp_early_read_storage_info(void);
int sclp_early_get_core_info(struct sclp_core_info *info);
@@ -147,4 +152,5 @@ static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
return _sclp_get_core_info(info);
}
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
index 0c2151451ba5..85881dd48022 100644
--- a/arch/s390/include/asm/sections.h
+++ b/arch/s390/include/asm/sections.h
@@ -35,7 +35,7 @@ static inline int arch_is_kernel_initmem_freed(unsigned long addr)
*/
#define __bootdata_preserved(var) __section(".boot.preserved.data." #var) var
-extern unsigned long __sdma, __edma;
-extern unsigned long __stext_dma, __etext_dma;
+extern unsigned long __samode31, __eamode31;
+extern unsigned long __stext_amode31, __etext_amode31;
#endif
diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h
index a22a5a81811c..950d87bd997a 100644
--- a/arch/s390/include/asm/set_memory.h
+++ b/arch/s390/include/asm/set_memory.h
@@ -10,6 +10,7 @@ extern struct mutex cpa_mutex;
#define SET_MEMORY_RW 2UL
#define SET_MEMORY_NX 4UL
#define SET_MEMORY_X 8UL
+#define SET_MEMORY_4K 16UL
int __set_memory(unsigned long addr, int numpages, unsigned long flags);
@@ -33,4 +34,9 @@ static inline int set_memory_x(unsigned long addr, int numpages)
return __set_memory(addr, numpages, SET_MEMORY_X);
}
+static inline int set_memory_4k(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, SET_MEMORY_4K);
+}
+
#endif
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 3a77aa96d092..b6606ffd85d8 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -10,11 +10,8 @@
#include <uapi/asm/setup.h>
#include <linux/build_bug.h>
-#define EP_OFFSET 0x10008
-#define EP_STRING "S390EP"
#define PARMAREA 0x10400
-#define EARLY_SCCB_OFFSET 0x11000
-#define HEAD_END 0x12000
+#define HEAD_END 0x11000
/*
* Machine features detected in early.c
@@ -36,6 +33,7 @@
#define MACHINE_FLAG_NX BIT(15)
#define MACHINE_FLAG_GS BIT(16)
#define MACHINE_FLAG_SCC BIT(17)
+#define MACHINE_FLAG_PCI_MIO BIT(18)
#define LPP_MAGIC BIT(31)
#define LPP_PID_MASK _AC(0xffffffff, UL)
@@ -45,28 +43,11 @@
#define STARTUP_NORMAL_OFFSET 0x10000
#define STARTUP_KDUMP_OFFSET 0x10010
-/* Offsets to parameters in kernel/head.S */
-
-#define IPL_DEVICE_OFFSET 0x10400
-#define INITRD_START_OFFSET 0x10408
-#define INITRD_SIZE_OFFSET 0x10410
-#define OLDMEM_BASE_OFFSET 0x10418
-#define OLDMEM_SIZE_OFFSET 0x10420
-#define KERNEL_VERSION_OFFSET 0x10428
-#define COMMAND_LINE_OFFSET 0x10480
-
#ifndef __ASSEMBLY__
#include <asm/lowcore.h>
#include <asm/types.h>
-#define IPL_DEVICE (*(unsigned long *) (IPL_DEVICE_OFFSET))
-#define INITRD_START (*(unsigned long *) (INITRD_START_OFFSET))
-#define INITRD_SIZE (*(unsigned long *) (INITRD_SIZE_OFFSET))
-#define OLDMEM_BASE (*(unsigned long *) (OLDMEM_BASE_OFFSET))
-#define OLDMEM_SIZE (*(unsigned long *) (OLDMEM_SIZE_OFFSET))
-#define COMMAND_LINE ((char *) (COMMAND_LINE_OFFSET))
-
struct parmarea {
unsigned long ipl_device; /* 0x10400 */
unsigned long initrd_start; /* 0x10408 */
@@ -110,6 +91,7 @@ extern unsigned long mio_wb_bit_mask;
#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
#define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC)
+#define MACHINE_HAS_PCI_MIO (S390_lowcore.machine_flags & MACHINE_FLAG_PCI_MIO)
/*
* Console mode. Override with conmode=
@@ -161,20 +143,22 @@ static inline unsigned long kaslr_offset(void)
extern int is_full_image;
+struct initrd_data {
+ unsigned long start;
+ unsigned long size;
+};
+extern struct initrd_data initrd_data;
+
+struct oldmem_data {
+ unsigned long start;
+ unsigned long size;
+};
+extern struct oldmem_data oldmem_data;
+
static inline u32 gen_lpswe(unsigned long addr)
{
BUILD_BUG_ON(addr > 0xfff);
return 0xb2b20000 | addr;
}
-
-#else /* __ASSEMBLY__ */
-
-#define IPL_DEVICE (IPL_DEVICE_OFFSET)
-#define INITRD_START (INITRD_START_OFFSET)
-#define INITRD_SIZE (INITRD_SIZE_OFFSET)
-#define OLDMEM_BASE (OLDMEM_BASE_OFFSET)
-#define OLDMEM_SIZE (OLDMEM_SIZE_OFFSET)
-#define COMMAND_LINE (COMMAND_LINE_OFFSET)
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_SETUP_H */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 9107e3dab68c..b3dd883699e7 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -104,4 +104,63 @@ static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
return false;
}
+#define SYSCALL_FMT_0
+#define SYSCALL_FMT_1 , "0" (r2)
+#define SYSCALL_FMT_2 , "d" (r3) SYSCALL_FMT_1
+#define SYSCALL_FMT_3 , "d" (r4) SYSCALL_FMT_2
+#define SYSCALL_FMT_4 , "d" (r5) SYSCALL_FMT_3
+#define SYSCALL_FMT_5 , "d" (r6) SYSCALL_FMT_4
+#define SYSCALL_FMT_6 , "d" (r7) SYSCALL_FMT_5
+
+#define SYSCALL_PARM_0
+#define SYSCALL_PARM_1 , long arg1
+#define SYSCALL_PARM_2 SYSCALL_PARM_1, long arg2
+#define SYSCALL_PARM_3 SYSCALL_PARM_2, long arg3
+#define SYSCALL_PARM_4 SYSCALL_PARM_3, long arg4
+#define SYSCALL_PARM_5 SYSCALL_PARM_4, long arg5
+#define SYSCALL_PARM_6 SYSCALL_PARM_5, long arg6
+
+#define SYSCALL_REGS_0
+#define SYSCALL_REGS_1 \
+ register long r2 asm("2") = arg1
+#define SYSCALL_REGS_2 \
+ SYSCALL_REGS_1; \
+ register long r3 asm("3") = arg2
+#define SYSCALL_REGS_3 \
+ SYSCALL_REGS_2; \
+ register long r4 asm("4") = arg3
+#define SYSCALL_REGS_4 \
+ SYSCALL_REGS_3; \
+ register long r5 asm("5") = arg4
+#define SYSCALL_REGS_5 \
+ SYSCALL_REGS_4; \
+ register long r6 asm("6") = arg5
+#define SYSCALL_REGS_6 \
+ SYSCALL_REGS_5; \
+ register long r7 asm("7") = arg6
+
+#define GENERATE_SYSCALL_FUNC(nr) \
+static __always_inline \
+long syscall##nr(unsigned long syscall SYSCALL_PARM_##nr) \
+{ \
+ register unsigned long r1 asm ("1") = syscall; \
+ register long rc asm ("2"); \
+ SYSCALL_REGS_##nr; \
+ \
+ asm volatile ( \
+ " svc 0\n" \
+ : "=d" (rc) \
+ : "d" (r1) SYSCALL_FMT_##nr \
+ : "memory"); \
+ return rc; \
+}
+
+GENERATE_SYSCALL_FUNC(0)
+GENERATE_SYSCALL_FUNC(1)
+GENERATE_SYSCALL_FUNC(2)
+GENERATE_SYSCALL_FUNC(3)
+GENERATE_SYSCALL_FUNC(4)
+GENERATE_SYSCALL_FUNC(5)
+GENERATE_SYSCALL_FUNC(6)
+
#endif /* _ASM_SYSCALL_H */
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index 12c5f006c136..fe92a4caf5ec 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -356,11 +356,9 @@ int uv_convert_from_secure(unsigned long paddr);
int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
void setup_uv(void);
-void adjust_to_uv_max(unsigned long *vmax);
#else
#define is_prot_virt_host() 0
static inline void setup_uv(void) {}
-static inline void adjust_to_uv_max(unsigned long *vmax) {}
static inline int uv_destroy_page(unsigned long paddr)
{
@@ -373,10 +371,4 @@ static inline int uv_convert_from_secure(unsigned long paddr)
}
#endif
-#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
-void uv_query_info(void);
-#else
-static inline void uv_query_info(void) {}
-#endif
-
#endif /* _ASM_S390_UV_H */
diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h
index d6465b22ffe3..db84942eb78f 100644
--- a/arch/s390/include/asm/vdso/gettimeofday.h
+++ b/arch/s390/include/asm/vdso/gettimeofday.h
@@ -6,6 +6,7 @@
#define VDSO_HAS_CLOCK_GETRES 1
+#include <asm/syscall.h>
#include <asm/timex.h>
#include <asm/unistd.h>
#include <linux/compiler.h>
@@ -35,35 +36,20 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *
static __always_inline
long clock_gettime_fallback(clockid_t clkid, struct __kernel_timespec *ts)
{
- register unsigned long r1 __asm__("r1") = __NR_clock_gettime;
- register unsigned long r2 __asm__("r2") = (unsigned long)clkid;
- register void *r3 __asm__("r3") = ts;
-
- asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory");
- return r2;
+ return syscall2(__NR_clock_gettime, (long)clkid, (long)ts);
}
static __always_inline
long gettimeofday_fallback(register struct __kernel_old_timeval *tv,
register struct timezone *tz)
{
- register unsigned long r1 __asm__("r1") = __NR_gettimeofday;
- register unsigned long r2 __asm__("r2") = (unsigned long)tv;
- register void *r3 __asm__("r3") = tz;
-
- asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory");
- return r2;
+ return syscall2(__NR_gettimeofday, (long)tv, (long)tz);
}
static __always_inline
long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts)
{
- register unsigned long r1 __asm__("r1") = __NR_clock_getres;
- register unsigned long r2 __asm__("r2") = (unsigned long)clkid;
- register void *r3 __asm__("r3") = ts;
-
- asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory");
- return r2;
+ return syscall2(__NR_clock_getres, (long)clkid, (long)ts);
}
#ifdef CONFIG_TIME_NS
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 4a44ba5a2d73..80f500ffb55c 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -40,7 +40,7 @@ obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y += smp.o
+obj-y += smp.o text_amode31.o
extra-y += head64.o vmlinux.lds
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 77ff2130cb04..b57da9338588 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -116,6 +116,7 @@ int main(void)
OFFSET(__LC_RESTART_FN, lowcore, restart_fn);
OFFSET(__LC_RESTART_DATA, lowcore, restart_data);
OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source);
+ OFFSET(__LC_RESTART_FLAGS, lowcore, restart_flags);
OFFSET(__LC_KERNEL_ASCE, lowcore, kernel_asce);
OFFSET(__LC_USER_ASCE, lowcore, user_asce);
OFFSET(__LC_LPP, lowcore, lpp);
@@ -152,5 +153,12 @@ int main(void)
DEFINE(__KEXEC_SHA_REGION_SIZE, sizeof(struct kexec_sha_region));
/* sizeof kernel parameter area */
DEFINE(__PARMAREA_SIZE, sizeof(struct parmarea));
+ /* kernel parameter area offsets */
+ DEFINE(IPL_DEVICE, PARMAREA + offsetof(struct parmarea, ipl_device));
+ DEFINE(INITRD_START, PARMAREA + offsetof(struct parmarea, initrd_start));
+ DEFINE(INITRD_SIZE, PARMAREA + offsetof(struct parmarea, initrd_size));
+ DEFINE(OLDMEM_BASE, PARMAREA + offsetof(struct parmarea, oldmem_base));
+ DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size));
+ DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line));
return 0;
}
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 0e36dfc9ccd6..d72a6df058d7 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -140,7 +140,7 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count)
while (count) {
from = __pa(src);
- if (!OLDMEM_BASE && from < sclp.hsa_size) {
+ if (!oldmem_data.start && from < sclp.hsa_size) {
/* Copy from zfcp/nvme dump HSA area */
len = min(count, sclp.hsa_size - from);
rc = memcpy_hsa_kernel(dst, from, len);
@@ -148,12 +148,12 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count)
return rc;
} else {
/* Check for swapped kdump oldmem areas */
- if (OLDMEM_BASE && from - OLDMEM_BASE < OLDMEM_SIZE) {
- from -= OLDMEM_BASE;
- len = min(count, OLDMEM_SIZE - from);
- } else if (OLDMEM_BASE && from < OLDMEM_SIZE) {
- len = min(count, OLDMEM_SIZE - from);
- from += OLDMEM_BASE;
+ if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) {
+ from -= oldmem_data.start;
+ len = min(count, oldmem_data.size - from);
+ } else if (oldmem_data.start && from < oldmem_data.size) {
+ len = min(count, oldmem_data.size - from);
+ from += oldmem_data.start;
} else {
len = count;
}
@@ -183,7 +183,7 @@ static int copy_oldmem_user(void __user *dst, void *src, size_t count)
while (count) {
from = __pa(src);
- if (!OLDMEM_BASE && from < sclp.hsa_size) {
+ if (!oldmem_data.start && from < sclp.hsa_size) {
/* Copy from zfcp/nvme dump HSA area */
len = min(count, sclp.hsa_size - from);
rc = memcpy_hsa_user(dst, from, len);
@@ -191,12 +191,12 @@ static int copy_oldmem_user(void __user *dst, void *src, size_t count)
return rc;
} else {
/* Check for swapped kdump oldmem areas */
- if (OLDMEM_BASE && from - OLDMEM_BASE < OLDMEM_SIZE) {
- from -= OLDMEM_BASE;
- len = min(count, OLDMEM_SIZE - from);
- } else if (OLDMEM_BASE && from < OLDMEM_SIZE) {
- len = min(count, OLDMEM_SIZE - from);
- from += OLDMEM_BASE;
+ if (oldmem_data.start && from - oldmem_data.size < oldmem_data.size) {
+ from -= oldmem_data.size;
+ len = min(count, oldmem_data.size - from);
+ } else if (oldmem_data.start && from < oldmem_data.size) {
+ len = min(count, oldmem_data.size - from);
+ from += oldmem_data.start;
} else {
len = count;
}
@@ -243,10 +243,10 @@ static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma,
unsigned long size_old;
int rc;
- if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) {
- size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT));
+ if (pfn < oldmem_data.size >> PAGE_SHIFT) {
+ size_old = min(size, oldmem_data.size - (pfn << PAGE_SHIFT));
rc = remap_pfn_range(vma, from,
- pfn + (OLDMEM_BASE >> PAGE_SHIFT),
+ pfn + (oldmem_data.start >> PAGE_SHIFT),
size_old, prot);
if (rc || size == size_old)
return rc;
@@ -288,7 +288,7 @@ static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma,
int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
unsigned long pfn, unsigned long size, pgprot_t prot)
{
- if (OLDMEM_BASE)
+ if (oldmem_data.start)
return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot);
else
return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size,
@@ -633,17 +633,17 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
u64 hdr_off;
/* If we are not in kdump or zfcp/nvme dump mode return */
- if (!OLDMEM_BASE && !is_ipl_type_dump())
+ if (!oldmem_data.start && !is_ipl_type_dump())
return 0;
/* If we cannot get HSA size for zfcp/nvme dump return error */
if (is_ipl_type_dump() && !sclp.hsa_size)
return -ENODEV;
/* For kdump, exclude previous crashkernel memory */
- if (OLDMEM_BASE) {
- oldmem_region.base = OLDMEM_BASE;
- oldmem_region.size = OLDMEM_SIZE;
- oldmem_type.total_size = OLDMEM_SIZE;
+ if (oldmem_data.start) {
+ oldmem_region.base = oldmem_data.start;
+ oldmem_region.size = oldmem_data.size;
+ oldmem_type.total_size = oldmem_data.size;
}
mem_chunk_cnt = get_mem_chunk_cnt();
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 09b6c6402f9b..4331c7e6e1c0 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -24,6 +24,7 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/minmax.h>
#include <linux/debugfs.h>
#include <asm/debug.h>
@@ -92,6 +93,8 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
char *out_buf, const char *in_buf);
static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
char *out_buf, debug_sprintf_entry_t *curr_event);
+static void debug_areas_swap(debug_info_t *a, debug_info_t *b);
+static void debug_events_append(debug_info_t *dest, debug_info_t *src);
/* globals */
@@ -311,24 +314,6 @@ static debug_info_t *debug_info_create(const char *name, int pages_per_area,
goto out;
rc->mode = mode & ~S_IFMT;
-
- /* create root directory */
- rc->debugfs_root_entry = debugfs_create_dir(rc->name,
- debug_debugfs_root_entry);
-
- /* append new element to linked list */
- if (!debug_area_first) {
- /* first element in list */
- debug_area_first = rc;
- rc->prev = NULL;
- } else {
- /* append element to end of list */
- debug_area_last->next = rc;
- rc->prev = debug_area_last;
- }
- debug_area_last = rc;
- rc->next = NULL;
-
refcount_set(&rc->ref_count, 1);
out:
return rc;
@@ -388,27 +373,10 @@ static void debug_info_get(debug_info_t *db_info)
*/
static void debug_info_put(debug_info_t *db_info)
{
- int i;
-
if (!db_info)
return;
- if (refcount_dec_and_test(&db_info->ref_count)) {
- for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
- if (!db_info->views[i])
- continue;
- debugfs_remove(db_info->debugfs_entries[i]);
- }
- debugfs_remove(db_info->debugfs_root_entry);
- if (db_info == debug_area_first)
- debug_area_first = db_info->next;
- if (db_info == debug_area_last)
- debug_area_last = db_info->prev;
- if (db_info->prev)
- db_info->prev->next = db_info->next;
- if (db_info->next)
- db_info->next->prev = db_info->prev;
+ if (refcount_dec_and_test(&db_info->ref_count))
debug_info_free(db_info);
- }
}
/*
@@ -632,6 +600,31 @@ static int debug_close(struct inode *inode, struct file *file)
return 0; /* success */
}
+/* Create debugfs entries and add to internal list. */
+static void _debug_register(debug_info_t *id)
+{
+ /* create root directory */
+ id->debugfs_root_entry = debugfs_create_dir(id->name,
+ debug_debugfs_root_entry);
+
+ /* append new element to linked list */
+ if (!debug_area_first) {
+ /* first element in list */
+ debug_area_first = id;
+ id->prev = NULL;
+ } else {
+ /* append element to end of list */
+ debug_area_last->next = id;
+ id->prev = debug_area_last;
+ }
+ debug_area_last = id;
+ id->next = NULL;
+
+ debug_register_view(id, &debug_level_view);
+ debug_register_view(id, &debug_flush_view);
+ debug_register_view(id, &debug_pages_view);
+}
+
/**
* debug_register_mode() - creates and initializes debug area.
*
@@ -661,19 +654,16 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area,
if ((uid != 0) || (gid != 0))
pr_warn("Root becomes the owner of all s390dbf files in sysfs\n");
BUG_ON(!initialized);
- mutex_lock(&debug_mutex);
/* create new debug_info */
rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode);
- if (!rc)
- goto out;
- debug_register_view(rc, &debug_level_view);
- debug_register_view(rc, &debug_flush_view);
- debug_register_view(rc, &debug_pages_view);
-out:
- if (!rc)
+ if (rc) {
+ mutex_lock(&debug_mutex);
+ _debug_register(rc);
+ mutex_unlock(&debug_mutex);
+ } else {
pr_err("Registering debug feature %s failed\n", name);
- mutex_unlock(&debug_mutex);
+ }
return rc;
}
EXPORT_SYMBOL(debug_register_mode);
@@ -703,6 +693,82 @@ debug_info_t *debug_register(const char *name, int pages_per_area,
EXPORT_SYMBOL(debug_register);
/**
+ * debug_register_static() - registers a static debug area
+ *
+ * @id: Handle for static debug area
+ * @pages_per_area: Number of pages per area
+ * @nr_areas: Number of debug areas
+ *
+ * Register debug_info_t defined using DEFINE_STATIC_DEBUG_INFO.
+ *
+ * Note: This function is called automatically via an initcall generated by
+ * DEFINE_STATIC_DEBUG_INFO.
+ */
+void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas)
+{
+ unsigned long flags;
+ debug_info_t *copy;
+
+ if (!initialized) {
+ pr_err("Tried to register debug feature %s too early\n",
+ id->name);
+ return;
+ }
+
+ copy = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size,
+ id->level, ALL_AREAS);
+ if (!copy) {
+ pr_err("Registering debug feature %s failed\n", id->name);
+
+ /* Clear pointers to prevent tracing into released initdata. */
+ spin_lock_irqsave(&id->lock, flags);
+ id->areas = NULL;
+ id->active_pages = NULL;
+ id->active_entries = NULL;
+ spin_unlock_irqrestore(&id->lock, flags);
+
+ return;
+ }
+
+ /* Replace static trace area with dynamic copy. */
+ spin_lock_irqsave(&id->lock, flags);
+ debug_events_append(copy, id);
+ debug_areas_swap(id, copy);
+ spin_unlock_irqrestore(&id->lock, flags);
+
+ /* Clear pointers to initdata and discard copy. */
+ copy->areas = NULL;
+ copy->active_pages = NULL;
+ copy->active_entries = NULL;
+ debug_info_free(copy);
+
+ mutex_lock(&debug_mutex);
+ _debug_register(id);
+ mutex_unlock(&debug_mutex);
+}
+
+/* Remove debugfs entries and remove from internal list. */
+static void _debug_unregister(debug_info_t *id)
+{
+ int i;
+
+ for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+ if (!id->views[i])
+ continue;
+ debugfs_remove(id->debugfs_entries[i]);
+ }
+ debugfs_remove(id->debugfs_root_entry);
+ if (id == debug_area_first)
+ debug_area_first = id->next;
+ if (id == debug_area_last)
+ debug_area_last = id->prev;
+ if (id->prev)
+ id->prev->next = id->next;
+ if (id->next)
+ id->next->prev = id->prev;
+}
+
+/**
* debug_unregister() - give back debug area.
*
* @id: handle for debug log
@@ -715,8 +781,10 @@ void debug_unregister(debug_info_t *id)
if (!id)
return;
mutex_lock(&debug_mutex);
- debug_info_put(id);
+ _debug_unregister(id);
mutex_unlock(&debug_mutex);
+
+ debug_info_put(id);
}
EXPORT_SYMBOL(debug_unregister);
@@ -726,35 +794,28 @@ EXPORT_SYMBOL(debug_unregister);
*/
static int debug_set_size(debug_info_t *id, int nr_areas, int pages_per_area)
{
- debug_entry_t ***new_areas;
+ debug_info_t *new_id;
unsigned long flags;
- int rc = 0;
if (!id || (nr_areas <= 0) || (pages_per_area < 0))
return -EINVAL;
- if (pages_per_area > 0) {
- new_areas = debug_areas_alloc(pages_per_area, nr_areas);
- if (!new_areas) {
- pr_info("Allocating memory for %i pages failed\n",
- pages_per_area);
- rc = -ENOMEM;
- goto out;
- }
- } else {
- new_areas = NULL;
+
+ new_id = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size,
+ id->level, ALL_AREAS);
+ if (!new_id) {
+ pr_info("Allocating memory for %i pages failed\n",
+ pages_per_area);
+ return -ENOMEM;
}
+
spin_lock_irqsave(&id->lock, flags);
- debug_areas_free(id);
- id->areas = new_areas;
- id->nr_areas = nr_areas;
- id->pages_per_area = pages_per_area;
- id->active_area = 0;
- memset(id->active_entries, 0, sizeof(int)*id->nr_areas);
- memset(id->active_pages, 0, sizeof(int)*id->nr_areas);
+ debug_events_append(new_id, id);
+ debug_areas_swap(new_id, id);
+ debug_info_free(new_id);
spin_unlock_irqrestore(&id->lock, flags);
pr_info("%s: set new size (%i pages)\n", id->name, pages_per_area);
-out:
- return rc;
+
+ return 0;
}
/**
@@ -772,16 +833,17 @@ void debug_set_level(debug_info_t *id, int new_level)
if (!id)
return;
- spin_lock_irqsave(&id->lock, flags);
+
if (new_level == DEBUG_OFF_LEVEL) {
- id->level = DEBUG_OFF_LEVEL;
pr_info("%s: switched off\n", id->name);
} else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) {
pr_info("%s: level %i is out of range (%i - %i)\n",
id->name, new_level, 0, DEBUG_MAX_LEVEL);
- } else {
- id->level = new_level;
+ return;
}
+
+ spin_lock_irqsave(&id->lock, flags);
+ id->level = new_level;
spin_unlock_irqrestore(&id->lock, flags);
}
EXPORT_SYMBOL(debug_set_level);
@@ -821,6 +883,42 @@ static inline debug_entry_t *get_active_entry(debug_info_t *id)
id->active_entries[id->active_area]);
}
+/* Swap debug areas of a and b. */
+static void debug_areas_swap(debug_info_t *a, debug_info_t *b)
+{
+ swap(a->nr_areas, b->nr_areas);
+ swap(a->pages_per_area, b->pages_per_area);
+ swap(a->areas, b->areas);
+ swap(a->active_area, b->active_area);
+ swap(a->active_pages, b->active_pages);
+ swap(a->active_entries, b->active_entries);
+}
+
+/* Append all debug events in active area from source to destination log. */
+static void debug_events_append(debug_info_t *dest, debug_info_t *src)
+{
+ debug_entry_t *from, *to, *last;
+
+ if (!src->areas || !dest->areas)
+ return;
+
+ /* Loop over all entries in src, starting with oldest. */
+ from = get_active_entry(src);
+ last = from;
+ do {
+ if (from->clock != 0LL) {
+ to = get_active_entry(dest);
+ memset(to, 0, dest->entry_size);
+ memcpy(to, from, min(src->entry_size,
+ dest->entry_size));
+ proceed_active_entry(dest);
+ }
+
+ proceed_active_entry(src);
+ from = get_active_entry(src);
+ } while (from != last);
+}
+
/*
* debug_finish_entry:
* - set timestamp, caller address, cpu number etc.
@@ -1111,16 +1209,17 @@ int debug_register_view(debug_info_t *id, struct debug_view *view)
break;
}
if (i == DEBUG_MAX_VIEWS) {
- pr_err("Registering view %s/%s would exceed the maximum "
- "number of views %i\n", id->name, view->name, i);
rc = -1;
} else {
id->views[i] = view;
id->debugfs_entries[i] = pde;
}
spin_unlock_irqrestore(&id->lock, flags);
- if (rc)
+ if (rc) {
+ pr_err("Registering view %s/%s would exceed the maximum "
+ "number of views %i\n", id->name, view->name, i);
debugfs_remove(pde);
+ }
out:
return rc;
}
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index a3f47464c3f1..76a656b2146f 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -14,6 +14,7 @@
#include <asm/diag.h>
#include <asm/trace/diag.h>
#include <asm/sections.h>
+#include "entry.h"
struct diag_stat {
unsigned int counter[NR_DIAG_STAT];
@@ -50,8 +51,16 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
};
-struct diag_ops __bootdata_preserved(diag_dma_ops);
-struct diag210 *__bootdata_preserved(__diag210_tmp_dma);
+struct diag_ops __amode31_ref diag_amode31_ops = {
+ .diag210 = _diag210_amode31,
+ .diag26c = _diag26c_amode31,
+ .diag14 = _diag14_amode31,
+ .diag0c = _diag0c_amode31,
+ .diag308_reset = _diag308_reset_amode31
+};
+
+static struct diag210 _diag210_tmp_amode31 __section(".amode31.data");
+struct diag210 __amode31_ref *__diag210_tmp_amode31 = &_diag210_tmp_amode31;
static int show_diag_stat(struct seq_file *m, void *v)
{
@@ -59,7 +68,7 @@ static int show_diag_stat(struct seq_file *m, void *v)
unsigned long n = (unsigned long) v - 1;
int cpu, prec, tmp;
- get_online_cpus();
+ cpus_read_lock();
if (n == 0) {
seq_puts(m, " ");
@@ -78,7 +87,7 @@ static int show_diag_stat(struct seq_file *m, void *v)
}
seq_printf(m, " %s\n", diag_map[n-1].name);
}
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
@@ -135,7 +144,7 @@ EXPORT_SYMBOL(diag_stat_inc_norecursion);
int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
{
diag_stat_inc(DIAG_STAT_X014);
- return diag_dma_ops.diag14(rx, ry1, subcode);
+ return diag_amode31_ops.diag14(rx, ry1, subcode);
}
EXPORT_SYMBOL(diag14);
@@ -172,12 +181,12 @@ int diag210(struct diag210 *addr)
int ccode;
spin_lock_irqsave(&diag210_lock, flags);
- *__diag210_tmp_dma = *addr;
+ *__diag210_tmp_amode31 = *addr;
diag_stat_inc(DIAG_STAT_X210);
- ccode = diag_dma_ops.diag210(__diag210_tmp_dma);
+ ccode = diag_amode31_ops.diag210(__diag210_tmp_amode31);
- *addr = *__diag210_tmp_dma;
+ *addr = *__diag210_tmp_amode31;
spin_unlock_irqrestore(&diag210_lock, flags);
return ccode;
@@ -205,6 +214,6 @@ EXPORT_SYMBOL(diag224);
int diag26c(void *req, void *resp, enum diag26c_sc subcode)
{
diag_stat_inc(DIAG_STAT_X26C);
- return diag_dma_ops.diag26c(req, resp, subcode);
+ return diag_amode31_ops.diag26c(req, resp, subcode);
}
EXPORT_SYMBOL(diag26c);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 5412efe328f8..ec5515423f17 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -312,10 +312,12 @@ static const unsigned char formats[][6] = {
[INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 },
[INSTR_VRR_VV0U] = { V_8, V_12, U4_32, 0, 0, 0 },
[INSTR_VRR_VV0U0U] = { V_8, V_12, U4_32, U4_24, 0, 0 },
+ [INSTR_VRR_VV0U2] = { V_8, V_12, U4_24, 0, 0, 0 },
[INSTR_VRR_VV0UU2] = { V_8, V_12, U4_32, U4_28, 0, 0 },
[INSTR_VRR_VV0UUU] = { V_8, V_12, U4_32, U4_28, U4_24, 0 },
[INSTR_VRR_VVV] = { V_8, V_12, V_16, 0, 0, 0 },
[INSTR_VRR_VVV0U] = { V_8, V_12, V_16, U4_32, 0, 0 },
+ [INSTR_VRR_VVV0U0] = { V_8, V_12, V_16, U4_24, 0, 0 },
[INSTR_VRR_VVV0U0U] = { V_8, V_12, V_16, U4_32, U4_24, 0 },
[INSTR_VRR_VVV0UU] = { V_8, V_12, V_16, U4_32, U4_28, 0 },
[INSTR_VRR_VVV0UUU] = { V_8, V_12, V_16, U4_32, U4_28, U4_24 },
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index fb84e3fc1686..9857cb046726 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -236,6 +236,10 @@ static __init void detect_machine_facilities(void)
clock_comparator_max = -1ULL >> 1;
__ctl_set_bit(0, 53);
}
+ if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) {
+ S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO;
+ /* the control bit is set during PCI initialization */
+ }
}
static inline void save_vector_registers(void)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 5a2f70cbd3a9..b9716a7e326d 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -624,12 +624,15 @@ ENTRY(mcck_int_handler)
4: j 4b
ENDPROC(mcck_int_handler)
-#
-# PSW restart interrupt handler
-#
ENTRY(restart_int_handler)
ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
stg %r15,__LC_SAVE_AREA_RESTART
+ TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4
+ jz 0f
+ la %r15,4095
+ lctlg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r15)
+0: larl %r15,.Lstosm_tmp
+ stosm 0(%r15),0x04 # turn dat on, keep irqs off
lg %r15,__LC_RESTART_STACK
xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
@@ -638,7 +641,7 @@ ENTRY(restart_int_handler)
xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
lg %r1,__LC_RESTART_FN # load fn, parm & source cpu
lg %r2,__LC_RESTART_DATA
- lg %r3,__LC_RESTART_SOURCE
+ lgf %r3,__LC_RESTART_SOURCE
ltgr %r3,%r3 # test source cpu address
jm 1f # negative -> skip source stop
0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 1ab33465382f..7f2696e8d511 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -28,10 +28,8 @@ void do_non_secure_storage_access(struct pt_regs *regs);
void do_secure_storage_violation(struct pt_regs *regs);
void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
void kernel_stack_overflow(struct pt_regs * regs);
-void do_signal(struct pt_regs *regs);
void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
struct pt_regs *regs);
-void do_notify_resume(struct pt_regs *regs);
void __init init_IRQ(void);
void do_io_irq(struct pt_regs *regs);
@@ -64,4 +62,13 @@ void stack_free(unsigned long stack);
extern char kprobes_insn_page[];
+extern char _samode31[], _eamode31[];
+extern char _stext_amode31[], _etext_amode31[];
+extern struct exception_table_entry _start_amode31_ex_table[];
+extern struct exception_table_entry _stop_amode31_ex_table[];
+
+#define __amode31_data __section(".amode31.data")
+#define __amode31_ref __section(".amode31.refs")
+extern long _start_amode31_refs[], _end_amode31_refs[];
+
#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 2d8f595d9196..0a464d328467 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -18,8 +18,11 @@
#include <trace/syscall.h>
#include <asm/asm-offsets.h>
#include <asm/cacheflush.h>
+#include <asm/ftrace.lds.h>
+#include <asm/nospec-branch.h>
#include <asm/set_memory.h>
#include "entry.h"
+#include "ftrace.h"
/*
* To generate function prologue either gcc's hotpatch feature (since gcc 4.8)
@@ -41,7 +44,130 @@
*/
void *ftrace_func __read_mostly = ftrace_stub;
-unsigned long ftrace_plt;
+struct ftrace_insn {
+ u16 opc;
+ s32 disp;
+} __packed;
+
+asm(
+ " .align 16\n"
+ "ftrace_shared_hotpatch_trampoline_br:\n"
+ " lmg %r0,%r1,2(%r1)\n"
+ " br %r1\n"
+ "ftrace_shared_hotpatch_trampoline_br_end:\n"
+);
+
+#ifdef CONFIG_EXPOLINE
+asm(
+ " .align 16\n"
+ "ftrace_shared_hotpatch_trampoline_ex:\n"
+ " lmg %r0,%r1,2(%r1)\n"
+ " ex %r0," __stringify(__LC_BR_R1) "(%r0)\n"
+ " j .\n"
+ "ftrace_shared_hotpatch_trampoline_ex_end:\n"
+);
+
+asm(
+ " .align 16\n"
+ "ftrace_shared_hotpatch_trampoline_exrl:\n"
+ " lmg %r0,%r1,2(%r1)\n"
+ " .insn ril,0xc60000000000,%r0,0f\n" /* exrl */
+ " j .\n"
+ "0: br %r1\n"
+ "ftrace_shared_hotpatch_trampoline_exrl_end:\n"
+);
+#endif /* CONFIG_EXPOLINE */
+
+#ifdef CONFIG_MODULES
+static char *ftrace_plt;
+
+asm(
+ " .data\n"
+ "ftrace_plt_template:\n"
+ " basr %r1,%r0\n"
+ " lg %r1,0f-.(%r1)\n"
+ " br %r1\n"
+ "0: .quad ftrace_caller\n"
+ "ftrace_plt_template_end:\n"
+ " .previous\n"
+);
+#endif /* CONFIG_MODULES */
+
+static const char *ftrace_shared_hotpatch_trampoline(const char **end)
+{
+ const char *tstart, *tend;
+
+ tstart = ftrace_shared_hotpatch_trampoline_br;
+ tend = ftrace_shared_hotpatch_trampoline_br_end;
+#ifdef CONFIG_EXPOLINE
+ if (!nospec_disable) {
+ tstart = ftrace_shared_hotpatch_trampoline_ex;
+ tend = ftrace_shared_hotpatch_trampoline_ex_end;
+ if (test_facility(35)) { /* exrl */
+ tstart = ftrace_shared_hotpatch_trampoline_exrl;
+ tend = ftrace_shared_hotpatch_trampoline_exrl_end;
+ }
+ }
+#endif /* CONFIG_EXPOLINE */
+ if (end)
+ *end = tend;
+ return tstart;
+}
+
+bool ftrace_need_init_nop(void)
+{
+ return ftrace_shared_hotpatch_trampoline(NULL);
+}
+
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+{
+ static struct ftrace_hotpatch_trampoline *next_vmlinux_trampoline =
+ __ftrace_hotpatch_trampolines_start;
+ static const char orig[6] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 };
+ static struct ftrace_hotpatch_trampoline *trampoline;
+ struct ftrace_hotpatch_trampoline **next_trampoline;
+ struct ftrace_hotpatch_trampoline *trampolines_end;
+ struct ftrace_hotpatch_trampoline tmp;
+ struct ftrace_insn *insn;
+ const char *shared;
+ s32 disp;
+
+ BUILD_BUG_ON(sizeof(struct ftrace_hotpatch_trampoline) !=
+ SIZEOF_FTRACE_HOTPATCH_TRAMPOLINE);
+
+ next_trampoline = &next_vmlinux_trampoline;
+ trampolines_end = __ftrace_hotpatch_trampolines_end;
+ shared = ftrace_shared_hotpatch_trampoline(NULL);
+#ifdef CONFIG_MODULES
+ if (mod) {
+ next_trampoline = &mod->arch.next_trampoline;
+ trampolines_end = mod->arch.trampolines_end;
+ shared = ftrace_plt;
+ }
+#endif
+
+ if (WARN_ON_ONCE(*next_trampoline >= trampolines_end))
+ return -ENOMEM;
+ trampoline = (*next_trampoline)++;
+
+ /* Check for the compiler-generated fentry nop (brcl 0, .). */
+ if (WARN_ON_ONCE(memcmp((const void *)rec->ip, &orig, sizeof(orig))))
+ return -EINVAL;
+
+ /* Generate the trampoline. */
+ tmp.brasl_opc = 0xc015; /* brasl %r1, shared */
+ tmp.brasl_disp = (shared - (const char *)&trampoline->brasl_opc) / 2;
+ tmp.interceptor = FTRACE_ADDR;
+ tmp.rest_of_intercepted_function = rec->ip + sizeof(struct ftrace_insn);
+ s390_kernel_write(trampoline, &tmp, sizeof(tmp));
+
+ /* Generate a jump to the trampoline. */
+ disp = ((char *)trampoline - (char *)rec->ip) / 2;
+ insn = (struct ftrace_insn *)rec->ip;
+ s390_kernel_write(&insn->disp, &disp, sizeof(disp));
+
+ return 0;
+}
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
@@ -49,11 +175,45 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
return 0;
}
+static void ftrace_generate_nop_insn(struct ftrace_insn *insn)
+{
+ /* brcl 0,0 */
+ insn->opc = 0xc004;
+ insn->disp = 0;
+}
+
+static void ftrace_generate_call_insn(struct ftrace_insn *insn,
+ unsigned long ip)
+{
+ unsigned long target;
+
+ /* brasl r0,ftrace_caller */
+ target = FTRACE_ADDR;
+#ifdef CONFIG_MODULES
+ if (is_module_addr((void *)ip))
+ target = (unsigned long)ftrace_plt;
+#endif /* CONFIG_MODULES */
+ insn->opc = 0xc005;
+ insn->disp = (target - ip) / 2;
+}
+
+static void brcl_disable(void *brcl)
+{
+ u8 op = 0x04; /* set mask field to zero */
+
+ s390_kernel_write((char *)brcl + 1, &op, sizeof(op));
+}
+
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
struct ftrace_insn orig, new, old;
+ if (ftrace_shared_hotpatch_trampoline(NULL)) {
+ brcl_disable((void *)rec->ip);
+ return 0;
+ }
+
if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old)))
return -EFAULT;
/* Replace ftrace call with a nop. */
@@ -67,10 +227,22 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
return 0;
}
+static void brcl_enable(void *brcl)
+{
+ u8 op = 0xf4; /* set mask field to all ones */
+
+ s390_kernel_write((char *)brcl + 1, &op, sizeof(op));
+}
+
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
struct ftrace_insn orig, new, old;
+ if (ftrace_shared_hotpatch_trampoline(NULL)) {
+ brcl_enable((void *)rec->ip);
+ return 0;
+ }
+
if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old)))
return -EFAULT;
/* Replace nop with an ftrace call. */
@@ -95,22 +267,44 @@ int __init ftrace_dyn_arch_init(void)
return 0;
}
+void arch_ftrace_update_code(int command)
+{
+ if (ftrace_shared_hotpatch_trampoline(NULL))
+ ftrace_modify_all_code(command);
+ else
+ ftrace_run_stop_machine(command);
+}
+
+static void __ftrace_sync(void *dummy)
+{
+}
+
+int ftrace_arch_code_modify_post_process(void)
+{
+ if (ftrace_shared_hotpatch_trampoline(NULL)) {
+ /* Send SIGP to the other CPUs, so they see the new code. */
+ smp_call_function(__ftrace_sync, NULL, 1);
+ }
+ return 0;
+}
+
#ifdef CONFIG_MODULES
static int __init ftrace_plt_init(void)
{
- unsigned int *ip;
+ const char *start, *end;
- ftrace_plt = (unsigned long) module_alloc(PAGE_SIZE);
+ ftrace_plt = module_alloc(PAGE_SIZE);
if (!ftrace_plt)
panic("cannot allocate ftrace plt\n");
- ip = (unsigned int *) ftrace_plt;
- ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */
- ip[1] = 0x100a0004;
- ip[2] = 0x07f10000;
- ip[3] = FTRACE_ADDR >> 32;
- ip[4] = FTRACE_ADDR & 0xffffffff;
- set_memory_ro(ftrace_plt, 1);
+
+ start = ftrace_shared_hotpatch_trampoline(&end);
+ if (!start) {
+ start = ftrace_plt_template;
+ end = ftrace_plt_template_end;
+ }
+ memcpy(ftrace_plt, start, end - start);
+ set_memory_ro((unsigned long)ftrace_plt, 1);
return 0;
}
device_initcall(ftrace_plt_init);
@@ -147,17 +341,13 @@ NOKPROBE_SYMBOL(prepare_ftrace_return);
*/
int ftrace_enable_ftrace_graph_caller(void)
{
- u8 op = 0x04; /* set mask field to zero */
-
- s390_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op));
+ brcl_disable(__va(ftrace_graph_caller));
return 0;
}
int ftrace_disable_ftrace_graph_caller(void)
{
- u8 op = 0xf4; /* set mask field to all ones */
-
- s390_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op));
+ brcl_enable(__va(ftrace_graph_caller));
return 0;
}
diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h
new file mode 100644
index 000000000000..69e416f4c6b0
--- /dev/null
+++ b/arch/s390/kernel/ftrace.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _FTRACE_H
+#define _FTRACE_H
+
+#include <asm/types.h>
+
+struct ftrace_hotpatch_trampoline {
+ u16 brasl_opc;
+ s32 brasl_disp;
+ s16: 16;
+ u64 rest_of_intercepted_function;
+ u64 interceptor;
+} __packed;
+
+extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_start[];
+extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_end[];
+extern const char ftrace_shared_hotpatch_trampoline_br[];
+extern const char ftrace_shared_hotpatch_trampoline_br_end[];
+extern const char ftrace_shared_hotpatch_trampoline_ex[];
+extern const char ftrace_shared_hotpatch_trampoline_ex_end[];
+extern const char ftrace_shared_hotpatch_trampoline_exrl[];
+extern const char ftrace_shared_hotpatch_trampoline_exrl_end[];
+extern const char ftrace_plt_template[];
+extern const char ftrace_plt_template_end[];
+
+#endif /* _FTRACE_H */
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 0c253886da78..114b5490ad8e 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -21,6 +21,7 @@ ENTRY(startup_continue)
larl %r1,tod_clock_base
mvc 0(16,%r1),__LC_BOOT_CLOCK
larl %r13,.LPG1 # get base
+ lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
#
# Setup stack
#
@@ -41,3 +42,19 @@ ENTRY(startup_continue)
.align 16
.LPG1:
.Ldw: .quad 0x0002000180000000,0x0000000000000000
+.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space
+ .quad 0 # cr1: primary space segment table
+ .quad 0 # cr2: dispatchable unit control table
+ .quad 0 # cr3: instruction authorization
+ .quad 0xffff # cr4: instruction authorization
+ .quad 0 # cr5: primary-aste origin
+ .quad 0 # cr6: I/O interrupts
+ .quad 0 # cr7: secondary space segment table
+ .quad 0x0000000000008000 # cr8: access registers translation
+ .quad 0 # cr9: tracing off
+ .quad 0 # cr10: tracing off
+ .quad 0 # cr11: tracing off
+ .quad 0 # cr12: tracing off
+ .quad 0 # cr13: home space segment table
+ .quad 0xc0000000 # cr14: machine check handling off
+ .quad 0 # cr15: linkage stack operations
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 50e2c21e0ec9..e2cc35775b99 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -179,8 +179,6 @@ static inline int __diag308(unsigned long subcode, void *addr)
int diag308(unsigned long subcode, void *addr)
{
- if (IS_ENABLED(CONFIG_KASAN))
- __arch_local_irq_stosm(0x04); /* enable DAT */
diag_stat_inc(DIAG_STAT_X308);
return __diag308(subcode, addr);
}
@@ -1843,7 +1841,6 @@ static struct kobj_attribute on_restart_attr = __ATTR_RW(on_restart);
static void __do_restart(void *ignore)
{
- __arch_local_irq_stosm(0x04); /* enable DAT */
smp_send_stop();
#ifdef CONFIG_CRASH_DUMP
crash_kexec(NULL);
@@ -2082,7 +2079,7 @@ void s390_reset_system(void)
/* Disable lowcore protection */
__ctl_clear_bit(0, 28);
- diag_dma_ops.diag308_reset();
+ diag_amode31_ops.diag308_reset();
}
#ifdef CONFIG_KEXEC_FILE
diff --git a/arch/s390/kernel/ipl_vmparm.c b/arch/s390/kernel/ipl_vmparm.c
index af43535a976d..b5245fadcfb0 100644
--- a/arch/s390/kernel/ipl_vmparm.c
+++ b/arch/s390/kernel/ipl_vmparm.c
@@ -1,4 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/minmax.h>
+#include <linux/string.h>
#include <asm/ebcdic.h>
#include <asm/ipl.h>
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 234d085257eb..3a3145c4a3ba 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -228,7 +228,7 @@ int show_interrupts(struct seq_file *p, void *v)
int index = *(loff_t *) v;
int cpu, irq;
- get_online_cpus();
+ cpus_read_lock();
if (index == 0) {
seq_puts(p, " ");
for_each_online_cpu(cpu)
@@ -258,7 +258,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
}
out:
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index ab584e8e3527..9156653b56f6 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -36,7 +36,7 @@ static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
unsigned char *ipe = (unsigned char *)expected;
unsigned char *ipn = (unsigned char *)new;
- pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc);
+ pr_emerg("Jump label code mismatch at %pS [%px]\n", ipc, ipc);
pr_emerg("Found: %6ph\n", ipc);
pr_emerg("Expected: %6ph\n", ipe);
pr_emerg("New: %6ph\n", ipn);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 1005a6935fbe..0505e55a6297 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -224,8 +224,8 @@ void arch_crash_save_vmcoreinfo(void)
VMCOREINFO_SYMBOL(lowcore_ptr);
VMCOREINFO_SYMBOL(high_memory);
VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
- vmcoreinfo_append_str("SDMA=%lx\n", __sdma);
- vmcoreinfo_append_str("EDMA=%lx\n", __edma);
+ vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
+ vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
}
@@ -263,7 +263,6 @@ static void __do_machine_kexec(void *data)
*/
static void __machine_kexec(void *data)
{
- __arch_local_irq_stosm(0x04); /* enable DAT */
pfault_fini();
tracing_off();
debug_locks_off();
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 4055f1c49814..b01ba460b7ca 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -14,6 +14,7 @@
#include <linux/elf.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
+#include <linux/ftrace.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/kasan.h>
@@ -23,6 +24,8 @@
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
#include <asm/facility.h>
+#include <asm/ftrace.lds.h>
+#include <asm/set_memory.h>
#if 0
#define DEBUGP printk
@@ -48,6 +51,13 @@ void *module_alloc(unsigned long size)
return p;
}
+#ifdef CONFIG_FUNCTION_TRACER
+void module_arch_cleanup(struct module *mod)
+{
+ module_memfree(mod->arch.trampolines_start);
+}
+#endif
+
void module_arch_freeing_init(struct module *mod)
{
if (is_livepatch_module(mod) &&
@@ -466,6 +476,30 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
write);
}
+#ifdef CONFIG_FUNCTION_TRACER
+static int module_alloc_ftrace_hotpatch_trampolines(struct module *me,
+ const Elf_Shdr *s)
+{
+ char *start, *end;
+ int numpages;
+ size_t size;
+
+ size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
+ numpages = DIV_ROUND_UP(size, PAGE_SIZE);
+ start = module_alloc(numpages * PAGE_SIZE);
+ if (!start)
+ return -ENOMEM;
+ set_memory_ro((unsigned long)start, numpages);
+ end = start + size;
+
+ me->arch.trampolines_start = (struct ftrace_hotpatch_trampoline *)start;
+ me->arch.trampolines_end = (struct ftrace_hotpatch_trampoline *)end;
+ me->arch.next_trampoline = me->arch.trampolines_start;
+
+ return 0;
+}
+#endif /* CONFIG_FUNCTION_TRACER */
+
int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
@@ -473,6 +507,9 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *s;
char *secstrings, *secname;
void *aseg;
+#ifdef CONFIG_FUNCTION_TRACER
+ int ret;
+#endif
if (IS_ENABLED(CONFIG_EXPOLINE) &&
!nospec_disable && me->arch.plt_size) {
@@ -507,6 +544,14 @@ int module_finalize(const Elf_Ehdr *hdr,
if (IS_ENABLED(CONFIG_EXPOLINE) &&
(str_has_prefix(secname, ".s390_return")))
nospec_revert(aseg, aseg + s->sh_size);
+
+#ifdef CONFIG_FUNCTION_TRACER
+ if (!strcmp(FTRACE_CALLSITE_SECTION, secname)) {
+ ret = module_alloc_ftrace_hotpatch_trampolines(me, s);
+ if (ret < 0)
+ return ret;
+ }
+#endif /* CONFIG_FUNCTION_TRACER */
}
jump_label_apply_nops(me);
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index 5a7420b23aa8..4bef35b79b93 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -121,7 +121,7 @@ static void os_info_old_init(void)
if (os_info_init)
return;
- if (!OLDMEM_BASE)
+ if (!oldmem_data.start)
goto fail;
if (copy_oldmem_kernel(&addr, &S390_lowcore.os_info, sizeof(addr)))
goto fail;
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index d7dc36ec0a60..2e3bb633acf6 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -1138,7 +1138,7 @@ static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
int ret;
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&cfset_ctrset_mutex);
switch (cmd) {
case S390_HWCTR_START:
@@ -1155,7 +1155,7 @@ static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
break;
}
mutex_unlock(&cfset_ctrset_mutex);
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 82df39b17bb5..d9d4a806979e 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -11,6 +11,7 @@
#include <linux/cpufeature.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
+#include <linux/random.h>
#include <linux/sched/mm.h>
#include <linux/init.h>
#include <linux/seq_file.h>
@@ -23,8 +24,12 @@
#include <asm/elf.h>
#include <asm/lowcore.h>
#include <asm/param.h>
+#include <asm/sclp.h>
#include <asm/smp.h>
+unsigned long __read_mostly elf_hwcap;
+char elf_platform[ELF_PLATFORM_SIZE];
+
struct cpu_info {
unsigned int cpu_mhz_dynamic;
unsigned int cpu_mhz_static;
@@ -113,15 +118,33 @@ static void show_facilities(struct seq_file *m)
static void show_cpu_summary(struct seq_file *m, void *v)
{
static const char *hwcap_str[] = {
- "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
- "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs",
- "vxe2", "vxp", "sort", "dflt"
- };
- static const char * const int_hwcap_str[] = {
- "sie"
+ [HWCAP_NR_ESAN3] = "esan3",
+ [HWCAP_NR_ZARCH] = "zarch",
+ [HWCAP_NR_STFLE] = "stfle",
+ [HWCAP_NR_MSA] = "msa",
+ [HWCAP_NR_LDISP] = "ldisp",
+ [HWCAP_NR_EIMM] = "eimm",
+ [HWCAP_NR_DFP] = "dfp",
+ [HWCAP_NR_HPAGE] = "edat",
+ [HWCAP_NR_ETF3EH] = "etf3eh",
+ [HWCAP_NR_HIGH_GPRS] = "highgprs",
+ [HWCAP_NR_TE] = "te",
+ [HWCAP_NR_VXRS] = "vx",
+ [HWCAP_NR_VXRS_BCD] = "vxd",
+ [HWCAP_NR_VXRS_EXT] = "vxe",
+ [HWCAP_NR_GS] = "gs",
+ [HWCAP_NR_VXRS_EXT2] = "vxe2",
+ [HWCAP_NR_VXRS_PDE] = "vxp",
+ [HWCAP_NR_SORT] = "sort",
+ [HWCAP_NR_DFLT] = "dflt",
+ [HWCAP_NR_VXRS_PDE2] = "vxp2",
+ [HWCAP_NR_NNPA] = "nnpa",
+ [HWCAP_NR_PCI_MIO] = "pcimio",
+ [HWCAP_NR_SIE] = "sie",
};
int i, cpu;
+ BUILD_BUG_ON(ARRAY_SIZE(hwcap_str) != HWCAP_NR_MAX);
seq_printf(m, "vendor_id : IBM/S390\n"
"# processors : %i\n"
"bogomips per cpu: %lu.%02lu\n",
@@ -132,9 +155,6 @@ static void show_cpu_summary(struct seq_file *m, void *v)
for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
seq_printf(m, "%s ", hwcap_str[i]);
- for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++)
- if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
- seq_printf(m, "%s ", int_hwcap_str[i]);
seq_puts(m, "\n");
show_facilities(m);
show_cacheinfo(m);
@@ -149,6 +169,141 @@ static void show_cpu_summary(struct seq_file *m, void *v)
}
}
+static int __init setup_hwcaps(void)
+{
+ /* instructions named N3, "backported" to esa-mode */
+ if (test_facility(0))
+ elf_hwcap |= HWCAP_ESAN3;
+
+ /* z/Architecture mode active */
+ elf_hwcap |= HWCAP_ZARCH;
+
+ /* store-facility-list-extended */
+ if (test_facility(7))
+ elf_hwcap |= HWCAP_STFLE;
+
+ /* message-security assist */
+ if (test_facility(17))
+ elf_hwcap |= HWCAP_MSA;
+
+ /* long-displacement */
+ if (test_facility(19))
+ elf_hwcap |= HWCAP_LDISP;
+
+ /* extended-immediate */
+ if (test_facility(21))
+ elf_hwcap |= HWCAP_EIMM;
+
+ /* extended-translation facility 3 enhancement */
+ if (test_facility(22) && test_facility(30))
+ elf_hwcap |= HWCAP_ETF3EH;
+
+ /* decimal floating point & perform floating point operation */
+ if (test_facility(42) && test_facility(44))
+ elf_hwcap |= HWCAP_DFP;
+
+ /* huge page support */
+ if (MACHINE_HAS_EDAT1)
+ elf_hwcap |= HWCAP_HPAGE;
+
+ /* 64-bit register support for 31-bit processes */
+ elf_hwcap |= HWCAP_HIGH_GPRS;
+
+ /* transactional execution */
+ if (MACHINE_HAS_TE)
+ elf_hwcap |= HWCAP_TE;
+
+ /*
+ * Vector extension can be disabled with the "novx" parameter.
+ * Use MACHINE_HAS_VX instead of facility bit 129.
+ */
+ if (MACHINE_HAS_VX) {
+ elf_hwcap |= HWCAP_VXRS;
+ if (test_facility(134))
+ elf_hwcap |= HWCAP_VXRS_BCD;
+ if (test_facility(135))
+ elf_hwcap |= HWCAP_VXRS_EXT;
+ if (test_facility(148))
+ elf_hwcap |= HWCAP_VXRS_EXT2;
+ if (test_facility(152))
+ elf_hwcap |= HWCAP_VXRS_PDE;
+ if (test_facility(192))
+ elf_hwcap |= HWCAP_VXRS_PDE2;
+ }
+
+ if (test_facility(150))
+ elf_hwcap |= HWCAP_SORT;
+
+ if (test_facility(151))
+ elf_hwcap |= HWCAP_DFLT;
+
+ if (test_facility(165))
+ elf_hwcap |= HWCAP_NNPA;
+
+ /* guarded storage */
+ if (MACHINE_HAS_GS)
+ elf_hwcap |= HWCAP_GS;
+
+ if (MACHINE_HAS_PCI_MIO)
+ elf_hwcap |= HWCAP_PCI_MIO;
+
+ /* virtualization support */
+ if (sclp.has_sief2)
+ elf_hwcap |= HWCAP_SIE;
+
+ return 0;
+}
+arch_initcall(setup_hwcaps);
+
+static int __init setup_elf_platform(void)
+{
+ struct cpuid cpu_id;
+
+ get_cpu_id(&cpu_id);
+ add_device_randomness(&cpu_id, sizeof(cpu_id));
+ switch (cpu_id.machine) {
+ case 0x2064:
+ case 0x2066:
+ default: /* Use "z900" as default for 64 bit kernels. */
+ strcpy(elf_platform, "z900");
+ break;
+ case 0x2084:
+ case 0x2086:
+ strcpy(elf_platform, "z990");
+ break;
+ case 0x2094:
+ case 0x2096:
+ strcpy(elf_platform, "z9-109");
+ break;
+ case 0x2097:
+ case 0x2098:
+ strcpy(elf_platform, "z10");
+ break;
+ case 0x2817:
+ case 0x2818:
+ strcpy(elf_platform, "z196");
+ break;
+ case 0x2827:
+ case 0x2828:
+ strcpy(elf_platform, "zEC12");
+ break;
+ case 0x2964:
+ case 0x2965:
+ strcpy(elf_platform, "z13");
+ break;
+ case 0x3906:
+ case 0x3907:
+ strcpy(elf_platform, "z14");
+ break;
+ case 0x8561:
+ case 0x8562:
+ strcpy(elf_platform, "z15");
+ break;
+ }
+ return 0;
+}
+arch_initcall(setup_elf_platform);
+
static void show_cpu_topology(struct seq_file *m, unsigned long n)
{
#ifdef CONFIG_SCHED_TOPOLOGY
@@ -210,7 +365,7 @@ static inline void *c_update(loff_t *pos)
static void *c_start(struct seq_file *m, loff_t *pos)
{
- get_online_cpus();
+ cpus_read_lock();
return c_update(pos);
}
@@ -222,7 +377,7 @@ static void *c_next(struct seq_file *m, void *v, loff_t *pos)
static void c_stop(struct seq_file *m, void *v)
{
- put_online_cpus();
+ cpus_read_unlock();
}
const struct seq_operations cpuinfo_op = {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index ff0f9e838916..fe14beb338e5 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -89,27 +89,71 @@ EXPORT_SYMBOL(console_devno);
unsigned int console_irq = -1;
EXPORT_SYMBOL(console_irq);
-unsigned long elf_hwcap __read_mostly = 0;
-char elf_platform[ELF_PLATFORM_SIZE];
+/*
+ * Some code and data needs to stay below 2 GB, even when the kernel would be
+ * relocated above 2 GB, because it has to use 31 bit addresses.
+ * Such code and data is part of the .amode31 section.
+ */
+unsigned long __amode31_ref __samode31 = __pa(&_samode31);
+unsigned long __amode31_ref __eamode31 = __pa(&_eamode31);
+unsigned long __amode31_ref __stext_amode31 = __pa(&_stext_amode31);
+unsigned long __amode31_ref __etext_amode31 = __pa(&_etext_amode31);
+struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
+struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
+
+/*
+ * Control registers CR2, CR5 and CR15 are initialized with addresses
+ * of tables that must be placed below 2G which is handled by the AMODE31
+ * sections.
+ * Because the AMODE31 sections are relocated below 2G at startup,
+ * the content of control registers CR2, CR5 and CR15 must be updated
+ * with new addresses after the relocation. The initial initialization of
+ * control registers occurs in head64.S and then gets updated again after AMODE31
+ * relocation. We must access the relevant AMODE31 tables indirectly via
+ * pointers placed in the .amode31.refs linker section. Those pointers get
+ * updated automatically during AMODE31 relocation and always contain a valid
+ * address within AMODE31 sections.
+ */
+
+static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64);
+
+static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = {
+ [1] = 0xffffffffffffffff
+};
+
+static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = {
+ 0x80000000, 0, 0, 0,
+ 0x80000000, 0, 0, 0,
+ 0x80000000, 0, 0, 0,
+ 0x80000000, 0, 0, 0,
+ 0x80000000, 0, 0, 0,
+ 0x80000000, 0, 0, 0,
+ 0x80000000, 0, 0, 0,
+ 0x80000000, 0, 0, 0
+};
+
+static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = {
+ 0, 0, 0x89000000, 0,
+ 0, 0, 0x8a000000, 0
+};
-unsigned long int_hwcap = 0;
+static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31;
+static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31;
+static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
+static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
int __bootdata(noexec_disabled);
unsigned long __bootdata(ident_map_size);
struct mem_detect_info __bootdata(mem_detect);
+struct initrd_data __bootdata(initrd_data);
-struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table);
-struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table);
-unsigned long __bootdata_preserved(__stext_dma);
-unsigned long __bootdata_preserved(__etext_dma);
-unsigned long __bootdata_preserved(__sdma);
-unsigned long __bootdata_preserved(__edma);
unsigned long __bootdata_preserved(__kaslr_offset);
unsigned int __bootdata_preserved(zlib_dfltcc_support);
EXPORT_SYMBOL(zlib_dfltcc_support);
u64 __bootdata_preserved(stfle_fac_list[16]);
EXPORT_SYMBOL(stfle_fac_list);
u64 __bootdata_preserved(alt_stfle_fac_list[16]);
+struct oldmem_data __bootdata_preserved(oldmem_data);
unsigned long VMALLOC_START;
EXPORT_SYMBOL(VMALLOC_START);
@@ -254,7 +298,7 @@ static void __init setup_zfcpdump(void)
{
if (!is_ipl_type_dump())
return;
- if (OLDMEM_BASE)
+ if (oldmem_data.start)
return;
strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
console_loglevel = 2;
@@ -421,7 +465,7 @@ static void __init setup_lowcore_dat_off(void)
lc->restart_stack = (unsigned long) restart_stack;
lc->restart_fn = (unsigned long) do_restart;
lc->restart_data = 0;
- lc->restart_source = -1UL;
+ lc->restart_source = -1U;
mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
if (!mcck_stack)
@@ -450,12 +494,19 @@ static void __init setup_lowcore_dat_off(void)
static void __init setup_lowcore_dat_on(void)
{
+ struct lowcore *lc = lowcore_ptr[0];
+
__ctl_clear_bit(0, 28);
S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
+ __ctl_store(S390_lowcore.cregs_save_area, 0, 15);
__ctl_set_bit(0, 28);
+ mem_assign_absolute(S390_lowcore.restart_flags, RESTART_FLAG_CTLREGS);
+ mem_assign_absolute(S390_lowcore.program_new_psw, lc->program_new_psw);
+ memcpy_absolute(&S390_lowcore.cregs_save_area, lc->cregs_save_area,
+ sizeof(S390_lowcore.cregs_save_area));
}
static struct resource code_resource = {
@@ -610,9 +661,9 @@ static void __init reserve_crashkernel(void)
return;
}
- low = crash_base ?: OLDMEM_BASE;
+ low = crash_base ?: oldmem_data.start;
high = low + crash_size;
- if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) {
+ if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) {
/* The crashkernel fits into OLDMEM, reuse OLDMEM */
crash_base = low;
} else {
@@ -639,7 +690,7 @@ static void __init reserve_crashkernel(void)
if (register_memory_notifier(&kdump_mem_nb))
return;
- if (!OLDMEM_BASE && MACHINE_IS_VM)
+ if (!oldmem_data.start && MACHINE_IS_VM)
diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
@@ -658,11 +709,11 @@ static void __init reserve_crashkernel(void)
static void __init reserve_initrd(void)
{
#ifdef CONFIG_BLK_DEV_INITRD
- if (!INITRD_START || !INITRD_SIZE)
+ if (!initrd_data.start || !initrd_data.size)
return;
- initrd_start = INITRD_START;
- initrd_end = initrd_start + INITRD_SIZE;
- memblock_reserve(INITRD_START, INITRD_SIZE);
+ initrd_start = initrd_data.start;
+ initrd_end = initrd_start + initrd_data.size;
+ memblock_reserve(initrd_data.start, initrd_data.size);
#endif
}
@@ -732,10 +783,10 @@ static void __init memblock_add_mem_detect_info(void)
static void __init check_initrd(void)
{
#ifdef CONFIG_BLK_DEV_INITRD
- if (INITRD_START && INITRD_SIZE &&
- !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) {
+ if (initrd_data.start && initrd_data.size &&
+ !memblock_is_region_memory(initrd_data.start, initrd_data.size)) {
pr_err("The initial RAM disk does not fit into the memory\n");
- memblock_free(INITRD_START, INITRD_SIZE);
+ memblock_free(initrd_data.start, initrd_data.size);
initrd_start = initrd_end = 0;
}
#endif
@@ -748,10 +799,10 @@ static void __init reserve_kernel(void)
{
unsigned long start_pfn = PFN_UP(__pa(_end));
- memblock_reserve(0, HEAD_END);
+ memblock_reserve(0, STARTUP_NORMAL_OFFSET);
+ memblock_reserve((unsigned long)sclp_early_sccb, EXT_SCCB_READ_SCP);
memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
- (unsigned long)_stext);
- memblock_reserve(__sdma, __edma - __sdma);
}
static void __init setup_memory(void)
@@ -771,152 +822,52 @@ static void __init setup_memory(void)
memblock_enforce_memory_limit(memblock_end_of_DRAM());
}
-/*
- * Setup hardware capabilities.
- */
-static int __init setup_hwcaps(void)
+static void __init relocate_amode31_section(void)
{
- static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
- struct cpuid cpu_id;
- int i;
-
- /*
- * The store facility list bits numbers as found in the principles
- * of operation are numbered with bit 1UL<<31 as number 0 to
- * bit 1UL<<0 as number 31.
- * Bit 0: instructions named N3, "backported" to esa-mode
- * Bit 2: z/Architecture mode is active
- * Bit 7: the store-facility-list-extended facility is installed
- * Bit 17: the message-security assist is installed
- * Bit 19: the long-displacement facility is installed
- * Bit 21: the extended-immediate facility is installed
- * Bit 22: extended-translation facility 3 is installed
- * Bit 30: extended-translation facility 3 enhancement facility
- * These get translated to:
- * HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
- * HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
- * HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
- * HWCAP_S390_ETF3EH bit 8 (22 && 30).
- */
- for (i = 0; i < 6; i++)
- if (test_facility(stfl_bits[i]))
- elf_hwcap |= 1UL << i;
-
- if (test_facility(22) && test_facility(30))
- elf_hwcap |= HWCAP_S390_ETF3EH;
-
- /*
- * Check for additional facilities with store-facility-list-extended.
- * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
- * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information
- * as stored by stfl, bits 32-xxx contain additional facilities.
- * How many facility words are stored depends on the number of
- * doublewords passed to the instruction. The additional facilities
- * are:
- * Bit 42: decimal floating point facility is installed
- * Bit 44: perform floating point operation facility is installed
- * translated to:
- * HWCAP_S390_DFP bit 6 (42 && 44).
- */
- if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44))
- elf_hwcap |= HWCAP_S390_DFP;
-
- /*
- * Huge page support HWCAP_S390_HPAGE is bit 7.
- */
- if (MACHINE_HAS_EDAT1)
- elf_hwcap |= HWCAP_S390_HPAGE;
-
- /*
- * 64-bit register support for 31-bit processes
- * HWCAP_S390_HIGH_GPRS is bit 9.
- */
- elf_hwcap |= HWCAP_S390_HIGH_GPRS;
-
- /*
- * Transactional execution support HWCAP_S390_TE is bit 10.
- */
- if (MACHINE_HAS_TE)
- elf_hwcap |= HWCAP_S390_TE;
-
- /*
- * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension
- * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX
- * instead of facility bit 129.
- */
- if (MACHINE_HAS_VX) {
- elf_hwcap |= HWCAP_S390_VXRS;
- if (test_facility(134))
- elf_hwcap |= HWCAP_S390_VXRS_BCD;
- if (test_facility(135))
- elf_hwcap |= HWCAP_S390_VXRS_EXT;
- if (test_facility(148))
- elf_hwcap |= HWCAP_S390_VXRS_EXT2;
- if (test_facility(152))
- elf_hwcap |= HWCAP_S390_VXRS_PDE;
- }
- if (test_facility(150))
- elf_hwcap |= HWCAP_S390_SORT;
- if (test_facility(151))
- elf_hwcap |= HWCAP_S390_DFLT;
-
- /*
- * Guarded storage support HWCAP_S390_GS is bit 12.
- */
- if (MACHINE_HAS_GS)
- elf_hwcap |= HWCAP_S390_GS;
-
- get_cpu_id(&cpu_id);
- add_device_randomness(&cpu_id, sizeof(cpu_id));
- switch (cpu_id.machine) {
- case 0x2064:
- case 0x2066:
- default: /* Use "z900" as default for 64 bit kernels. */
- strcpy(elf_platform, "z900");
- break;
- case 0x2084:
- case 0x2086:
- strcpy(elf_platform, "z990");
- break;
- case 0x2094:
- case 0x2096:
- strcpy(elf_platform, "z9-109");
- break;
- case 0x2097:
- case 0x2098:
- strcpy(elf_platform, "z10");
- break;
- case 0x2817:
- case 0x2818:
- strcpy(elf_platform, "z196");
- break;
- case 0x2827:
- case 0x2828:
- strcpy(elf_platform, "zEC12");
- break;
- case 0x2964:
- case 0x2965:
- strcpy(elf_platform, "z13");
- break;
- case 0x3906:
- case 0x3907:
- strcpy(elf_platform, "z14");
- break;
- case 0x8561:
- case 0x8562:
- strcpy(elf_platform, "z15");
- break;
- }
-
- /*
- * Virtualization support HWCAP_INT_SIE is bit 0.
- */
- if (sclp.has_sief2)
- int_hwcap |= HWCAP_INT_SIE;
+ unsigned long amode31_addr, amode31_size;
+ long amode31_offset;
+ long *ptr;
+
+ /* Allocate a new AMODE31 capable memory region */
+ amode31_size = __eamode31 - __samode31;
+ pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
+ amode31_addr = (unsigned long)memblock_alloc_low(amode31_size, PAGE_SIZE);
+ if (!amode31_addr)
+ panic("Failed to allocate memory for AMODE31 section\n");
+ amode31_offset = amode31_addr - __samode31;
+
+ /* Move original AMODE31 section to the new one */
+ memmove((void *)amode31_addr, (void *)__samode31, amode31_size);
+ /* Zero out the old AMODE31 section to catch invalid accesses within it */
+ memset((void *)__samode31, 0, amode31_size);
+
+ /* Update all AMODE31 region references */
+ for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
+ *ptr += amode31_offset;
+}
- return 0;
+/* This must be called after AMODE31 relocation */
+static void __init setup_cr(void)
+{
+ union ctlreg2 cr2;
+ union ctlreg5 cr5;
+ union ctlreg15 cr15;
+
+ __ctl_duct[1] = (unsigned long)__ctl_aste;
+ __ctl_duct[2] = (unsigned long)__ctl_aste;
+ __ctl_duct[4] = (unsigned long)__ctl_duald;
+
+ /* Update control registers CR2, CR5 and CR15 */
+ __ctl_store(cr2.val, 2, 2);
+ __ctl_store(cr5.val, 5, 5);
+ __ctl_store(cr15.val, 15, 15);
+ cr2.ducto = (unsigned long)__ctl_duct >> 6;
+ cr5.pasteo = (unsigned long)__ctl_duct >> 6;
+ cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3;
+ __ctl_load(cr2.val, 2, 2);
+ __ctl_load(cr5.val, 5, 5);
+ __ctl_load(cr15.val, 15, 15);
}
-arch_initcall(setup_hwcaps);
/*
* Add system information as device randomness
@@ -1059,6 +1010,9 @@ void __init setup_arch(char **cmdline_p)
free_mem_detect_info();
+ relocate_amode31_section();
+ setup_cr();
+
setup_uv();
setup_memory_end();
setup_memory();
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 78ef53b29958..307f5d99514d 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -533,9 +533,3 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal)
*/
restore_saved_sigmask();
}
-
-void do_notify_resume(struct pt_regs *regs)
-{
- tracehook_notify_resume(regs);
- rseq_handle_notify_resume(NULL, regs);
-}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 8984711f72ed..2a991e43ead3 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -252,6 +252,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
lc->cpu_nr = cpu;
+ lc->restart_flags = RESTART_FLAG_CTLREGS;
lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->spinlock_index = 0;
lc->percpu_offset = __per_cpu_offset[cpu];
@@ -294,10 +295,10 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
- lc->restart_stack = lc->nodat_stack;
+ lc->restart_stack = lc->kernel_stack;
lc->restart_fn = (unsigned long) func;
lc->restart_data = (unsigned long) data;
- lc->restart_source = -1UL;
+ lc->restart_source = -1U;
pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
}
@@ -311,12 +312,12 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data)
func(data); /* should not return */
}
-static void __no_sanitize_address pcpu_delegate(struct pcpu *pcpu,
- pcpu_delegate_fn *func,
- void *data, unsigned long stack)
+static void pcpu_delegate(struct pcpu *pcpu,
+ pcpu_delegate_fn *func,
+ void *data, unsigned long stack)
{
struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
- unsigned long source_cpu = stap();
+ unsigned int source_cpu = stap();
__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
if (pcpu->address == source_cpu) {
@@ -569,6 +570,9 @@ static void smp_ctl_bit_callback(void *info)
__ctl_load(cregs, 0, 15);
}
+static DEFINE_SPINLOCK(ctl_lock);
+static unsigned long ctlreg;
+
/*
* Set a bit in a control register of all cpus
*/
@@ -576,6 +580,11 @@ void smp_ctl_set_bit(int cr, int bit)
{
struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr };
+ spin_lock(&ctl_lock);
+ memcpy_absolute(&ctlreg, &S390_lowcore.cregs_save_area[cr], sizeof(ctlreg));
+ __set_bit(bit, &ctlreg);
+ memcpy_absolute(&S390_lowcore.cregs_save_area[cr], &ctlreg, sizeof(ctlreg));
+ spin_unlock(&ctl_lock);
on_each_cpu(smp_ctl_bit_callback, &parms, 1);
}
EXPORT_SYMBOL(smp_ctl_set_bit);
@@ -587,6 +596,11 @@ void smp_ctl_clear_bit(int cr, int bit)
{
struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr };
+ spin_lock(&ctl_lock);
+ memcpy_absolute(&ctlreg, &S390_lowcore.cregs_save_area[cr], sizeof(ctlreg));
+ __clear_bit(bit, &ctlreg);
+ memcpy_absolute(&S390_lowcore.cregs_save_area[cr], &ctlreg, sizeof(ctlreg));
+ spin_unlock(&ctl_lock);
on_each_cpu(smp_ctl_bit_callback, &parms, 1);
}
EXPORT_SYMBOL(smp_ctl_clear_bit);
@@ -673,7 +687,7 @@ void __init smp_save_dump_cpus(void)
unsigned long page;
bool is_boot_cpu;
- if (!(OLDMEM_BASE || is_ipl_type_dump()))
+ if (!(oldmem_data.start || is_ipl_type_dump()))
/* No previous system present, normal boot. */
return;
/* Allocate a page as dumping area for the store status sigps */
@@ -704,12 +718,12 @@ void __init smp_save_dump_cpus(void)
* these registers an SCLP request is required which is
* done by drivers/s390/char/zcore.c:init_cpu_info()
*/
- if (!is_boot_cpu || OLDMEM_BASE)
+ if (!is_boot_cpu || oldmem_data.start)
/* Get the CPU registers */
smp_save_cpu_regs(sa, addr, is_boot_cpu, page);
}
memblock_free(page, PAGE_SIZE);
- diag_dma_ops.diag308_reset();
+ diag_amode31_ops.diag308_reset();
pcpu_set_smt(0);
}
#endif /* CONFIG_CRASH_DUMP */
@@ -793,7 +807,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
u16 core_id;
int nr, i;
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&smp_cpu_state_mutex);
nr = 0;
cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
@@ -816,7 +830,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
nr += smp_add_core(&info->core[i], &avail, configured, early);
}
mutex_unlock(&smp_cpu_state_mutex);
- put_online_cpus();
+ cpus_read_unlock();
return nr;
}
@@ -868,11 +882,19 @@ void __init smp_detect_cpus(void)
memblock_free_early((unsigned long)info, sizeof(*info));
}
-static void smp_init_secondary(void)
+/*
+ * Activate a secondary processor.
+ */
+static void smp_start_secondary(void *cpuvoid)
{
int cpu = raw_smp_processor_id();
S390_lowcore.last_update_clock = get_tod_clock();
+ S390_lowcore.restart_stack = (unsigned long)restart_stack;
+ S390_lowcore.restart_fn = (unsigned long)do_restart;
+ S390_lowcore.restart_data = 0;
+ S390_lowcore.restart_source = -1U;
+ S390_lowcore.restart_flags = 0;
restore_access_regs(S390_lowcore.access_regs_save_area);
cpu_init();
rcu_cpu_starting(cpu);
@@ -892,20 +914,6 @@ static void smp_init_secondary(void)
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
-/*
- * Activate a secondary processor.
- */
-static void __no_sanitize_address smp_start_secondary(void *cpuvoid)
-{
- S390_lowcore.restart_stack = (unsigned long) restart_stack;
- S390_lowcore.restart_fn = (unsigned long) do_restart;
- S390_lowcore.restart_data = 0;
- S390_lowcore.restart_source = -1UL;
- __ctl_load(S390_lowcore.cregs_save_area, 0, 15);
- __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
- call_on_stack_noreturn(smp_init_secondary, S390_lowcore.kernel_stack);
-}
-
/* Upping and downing of CPUs */
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
@@ -1055,7 +1063,7 @@ static ssize_t cpu_configure_store(struct device *dev,
return -EINVAL;
if (val != 0 && val != 1)
return -EINVAL;
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&smp_cpu_state_mutex);
rc = -EBUSY;
/* disallow configuration changes of online cpus and cpu 0 */
@@ -1104,7 +1112,7 @@ static ssize_t cpu_configure_store(struct device *dev,
}
out:
mutex_unlock(&smp_cpu_state_mutex);
- put_online_cpus();
+ cpus_read_unlock();
return rc ? rc : count;
}
static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
diff --git a/arch/s390/boot/text_dma.S b/arch/s390/kernel/text_amode31.S
index 5ff5fee02801..868e4a604110 100644
--- a/arch/s390/boot/text_dma.S
+++ b/arch/s390/kernel/text_amode31.S
@@ -9,14 +9,14 @@
#include <asm/errno.h>
#include <asm/sigp.h>
- .section .dma.text,"ax"
+ .section .amode31.text,"ax"
/*
* Simplified version of expoline thunk. The normal thunks can not be used here,
* because they might be more than 2 GB away, and not reachable by the relative
* branch. No comdat, exrl, etc. optimizations used here, because it only
* affects a few functions that are not performance-relevant.
*/
- .macro BR_EX_DMA_r14
+ .macro BR_EX_AMODE31_r14
larl %r1,0f
ex 0,0(%r1)
j .
@@ -24,9 +24,9 @@
.endm
/*
- * int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode)
+ * int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode)
*/
-ENTRY(_diag14_dma)
+ENTRY(_diag14_amode31)
lgr %r1,%r2
lgr %r2,%r3
lgr %r3,%r4
@@ -39,14 +39,14 @@ ENTRY(_diag14_dma)
.Ldiag14_fault:
sam64
lgfr %r2,%r5
- BR_EX_DMA_r14
- EX_TABLE_DMA(.Ldiag14_ex, .Ldiag14_fault)
-ENDPROC(_diag14_dma)
+ BR_EX_AMODE31_r14
+ EX_TABLE_AMODE31(.Ldiag14_ex, .Ldiag14_fault)
+ENDPROC(_diag14_amode31)
/*
- * int _diag210_dma(struct diag210 *addr)
+ * int _diag210_amode31(struct diag210 *addr)
*/
-ENTRY(_diag210_dma)
+ENTRY(_diag210_amode31)
lgr %r1,%r2
lhi %r2,-1
sam31
@@ -57,40 +57,40 @@ ENTRY(_diag210_dma)
.Ldiag210_fault:
sam64
lgfr %r2,%r2
- BR_EX_DMA_r14
- EX_TABLE_DMA(.Ldiag210_ex, .Ldiag210_fault)
-ENDPROC(_diag210_dma)
+ BR_EX_AMODE31_r14
+ EX_TABLE_AMODE31(.Ldiag210_ex, .Ldiag210_fault)
+ENDPROC(_diag210_amode31)
/*
- * int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode)
+ * int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode)
*/
-ENTRY(_diag26c_dma)
+ENTRY(_diag26c_amode31)
lghi %r5,-EOPNOTSUPP
sam31
diag %r2,%r4,0x26c
.Ldiag26c_ex:
sam64
lgfr %r2,%r5
- BR_EX_DMA_r14
- EX_TABLE_DMA(.Ldiag26c_ex, .Ldiag26c_ex)
-ENDPROC(_diag26c_dma)
+ BR_EX_AMODE31_r14
+ EX_TABLE_AMODE31(.Ldiag26c_ex, .Ldiag26c_ex)
+ENDPROC(_diag26c_amode31)
/*
- * void _diag0c_dma(struct hypfs_diag0c_entry *entry)
+ * void _diag0c_amode31(struct hypfs_diag0c_entry *entry)
*/
-ENTRY(_diag0c_dma)
+ENTRY(_diag0c_amode31)
sam31
diag %r2,%r2,0x0c
sam64
- BR_EX_DMA_r14
-ENDPROC(_diag0c_dma)
+ BR_EX_AMODE31_r14
+ENDPROC(_diag0c_amode31)
/*
- * void _diag308_reset_dma(void)
+ * void _diag308_reset_amode31(void)
*
* Calls diag 308 subcode 1 and continues execution
*/
-ENTRY(_diag308_reset_dma)
+ENTRY(_diag308_reset_amode31)
larl %r4,.Lctlregs # Save control registers
stctg %c0,%c15,0(%r4)
lg %r2,0(%r4) # Disable lowcore protection
@@ -107,7 +107,7 @@ ENTRY(_diag308_reset_dma)
larl %r4,.Lcontinue_psw # Save PSW flags
epsw %r2,%r3
stm %r2,%r3,0(%r4)
- larl %r4,restart_part2 # Setup restart PSW at absolute 0
+ larl %r4,.Lrestart_part2 # Setup restart PSW at absolute 0
larl %r3,.Lrestart_diag308_psw
og %r4,0(%r3) # Save PSW
lghi %r3,0
@@ -115,7 +115,7 @@ ENTRY(_diag308_reset_dma)
lghi %r1,1
lghi %r0,0
diag %r0,%r1,0x308
-restart_part2:
+.Lrestart_part2:
lhi %r0,0 # Load r0 with zero
lhi %r1,2 # Use mode 2 = ESAME (dump)
sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode
@@ -127,19 +127,21 @@ restart_part2:
larl %r4,.Lprefix # Restore prefix register
spx 0(%r4)
larl %r4,.Lcontinue_psw # Restore PSW flags
+ larl %r2,.Lcontinue
+ stg %r2,8(%r4)
lpswe 0(%r4)
.Lcontinue:
- BR_EX_DMA_r14
-ENDPROC(_diag308_reset_dma)
+ BR_EX_AMODE31_r14
+ENDPROC(_diag308_reset_amode31)
- .section .dma.data,"aw",@progbits
+ .section .amode31.data,"aw",@progbits
.align 8
.Lrestart_diag308_psw:
.long 0x00080000,0x80000000
.align 8
.Lcontinue_psw:
- .quad 0,.Lcontinue
+ .quad 0,0
.align 8
.Lctlreg0:
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 26aa2614ee35..d2458a29618f 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -406,7 +406,7 @@ static ssize_t dispatching_store(struct device *dev,
if (val != 0 && val != 1)
return -EINVAL;
rc = 0;
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&smp_cpu_state_mutex);
if (cpu_management == val)
goto out;
@@ -417,7 +417,7 @@ static ssize_t dispatching_store(struct device *dev,
topology_expect_change();
out:
mutex_unlock(&smp_cpu_state_mutex);
- put_online_cpus();
+ cpus_read_unlock();
return rc ? rc : count;
}
static DEVICE_ATTR_RW(dispatching);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 76947275fe8b..bcefc2173de4 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -291,7 +291,7 @@ static void __init test_monitor_call(void)
void __init trap_init(void)
{
- sort_extable(__start_dma_ex_table, __stop_dma_ex_table);
+ sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table);
local_mcck_enable();
test_monitor_call();
}
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index aeb0a15bcbb7..5a656c7b7a67 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -51,24 +51,9 @@ void __init setup_uv(void)
{
unsigned long uv_stor_base;
- /*
- * keep these conditions in line with has_uv_sec_stor_limit()
- */
if (!is_prot_virt_host())
return;
- if (is_prot_virt_guest()) {
- prot_virt_host = 0;
- pr_warn("Protected virtualization not available in protected guests.");
- return;
- }
-
- if (!test_facility(158)) {
- prot_virt_host = 0;
- pr_warn("Protected virtualization not supported by the hardware.");
- return;
- }
-
uv_stor_base = (unsigned long)memblock_alloc_try_nid(
uv_info.uv_base_stor_len, SZ_1M, SZ_2G,
MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index 3457dcf10396..e3e6ac5686df 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -36,6 +36,7 @@ CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
GCOV_PROFILE := n
UBSAN_SANITIZE := n
KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
# Force dependency (incbin is bad)
$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 2a2092ce19f1..6568de236701 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -39,6 +39,7 @@ CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
GCOV_PROFILE := n
UBSAN_SANITIZE := n
KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
# Force dependency (incbin is bad)
$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 4c0e19145cc6..63bdb9e1bfc1 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -5,6 +5,7 @@
#include <asm/thread_info.h>
#include <asm/page.h>
+#include <asm/ftrace.lds.h>
/*
* Put .bss..swapper_pg_dir as the first thing in .bss. This will
@@ -46,6 +47,7 @@ SECTIONS
KPROBES_TEXT
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
+ FTRACE_HOTPATCH_TRAMPOLINES_TEXT
*(.text.*_indirect_*)
*(.fixup)
*(.gnu.warning)
@@ -71,6 +73,13 @@ SECTIONS
RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE)
BOOT_DATA_PRESERVED
+ . = ALIGN(8);
+ .amode31.refs : {
+ _start_amode31_refs = .;
+ *(.amode31.refs)
+ _end_amode31_refs = .;
+ }
+
_edata = .; /* End of data section */
/* will be freed after init */
@@ -136,6 +145,32 @@ SECTIONS
BOOT_DATA
+ /*
+ * .amode31 section for code, data, ex_table that need to stay
+ * below 2 GB, even when the kernel is relocated above 2 GB.
+ */
+ . = ALIGN(PAGE_SIZE);
+ _samode31 = .;
+ .amode31.text : {
+ _stext_amode31 = .;
+ *(.amode31.text)
+ *(.amode31.text.*_indirect_*)
+ . = ALIGN(PAGE_SIZE);
+ _etext_amode31 = .;
+ }
+ . = ALIGN(16);
+ .amode31.ex_table : {
+ _start_amode31_ex_table = .;
+ KEEP(*(.amode31.ex_table))
+ _stop_amode31_ex_table = .;
+ }
+ . = ALIGN(PAGE_SIZE);
+ .amode31.data : {
+ *(.amode31.data)
+ }
+ . = ALIGN(PAGE_SIZE);
+ _eamode31 = .;
+
/* early.c uses stsi, which requires page aligned data. */
. = ALIGN(PAGE_SIZE);
INIT_DATA_SECTION(0x100)
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index f289afeb3f31..bccbf394ae7e 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -7,17 +7,10 @@
* Heiko Carstens <heiko.carstens@de.ibm.com>,
*/
-#include <linux/sched.h>
+#include <linux/processor.h>
#include <linux/delay.h>
-#include <linux/timex.h>
-#include <linux/export.h>
-#include <linux/irqflags.h>
-#include <linux/interrupt.h>
-#include <linux/jump_label.h>
-#include <linux/irq.h>
-#include <asm/vtimer.h>
#include <asm/div64.h>
-#include <asm/idle.h>
+#include <asm/timex.h>
void __delay(unsigned long loops)
{
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index e40a30647d99..0b0c8c284953 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -4,6 +4,7 @@
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/mm.h>
+#include <linux/kfence.h>
#include <linux/kasan.h>
#include <asm/ptdump.h>
#include <asm/kasan.h>
@@ -21,6 +22,10 @@ enum address_markers_idx {
IDENTITY_BEFORE_END_NR,
KERNEL_START_NR,
KERNEL_END_NR,
+#ifdef CONFIG_KFENCE
+ KFENCE_START_NR,
+ KFENCE_END_NR,
+#endif
IDENTITY_AFTER_NR,
IDENTITY_AFTER_END_NR,
#ifdef CONFIG_KASAN
@@ -40,6 +45,10 @@ static struct addr_marker address_markers[] = {
[IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"},
[KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"},
[KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"},
+#ifdef CONFIG_KFENCE
+ [KFENCE_START_NR] = {0, "KFence Pool Start"},
+ [KFENCE_END_NR] = {0, "KFence Pool End"},
+#endif
[IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"},
[IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"},
#ifdef CONFIG_KASAN
@@ -248,6 +257,9 @@ static void sort_address_markers(void)
static int pt_dump_init(void)
{
+#ifdef CONFIG_KFENCE
+ unsigned long kfence_start = (unsigned long)__kfence_pool;
+#endif
/*
* Figure out the maximum virtual address being accessible with the
* kernel ASCE. We need this to keep the page table walker functions
@@ -262,6 +274,10 @@ static int pt_dump_init(void)
address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size;
address_markers[VMALLOC_NR].start_address = VMALLOC_START;
address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
+#ifdef CONFIG_KFENCE
+ address_markers[KFENCE_START_NR].start_address = kfence_start;
+ address_markers[KFENCE_END_NR].start_address = kfence_start + KFENCE_POOL_SIZE;
+#endif
sort_address_markers();
#ifdef CONFIG_PTDUMP_DEBUGFS
debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index e33c43b38afe..212632d57db9 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -31,6 +31,7 @@
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/hugetlb.h>
+#include <linux/kfence.h>
#include <asm/asm-offsets.h>
#include <asm/diag.h>
#include <asm/gmap.h>
@@ -230,8 +231,8 @@ const struct exception_table_entry *s390_search_extables(unsigned long addr)
{
const struct exception_table_entry *fixup;
- fixup = search_extable(__start_dma_ex_table,
- __stop_dma_ex_table - __start_dma_ex_table,
+ fixup = search_extable(__start_amode31_ex_table,
+ __stop_amode31_ex_table - __start_amode31_ex_table,
addr);
if (!fixup)
fixup = search_exception_tables(addr);
@@ -356,6 +357,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
unsigned long address;
unsigned int flags;
vm_fault_t fault;
+ bool is_write;
tsk = current;
/*
@@ -369,6 +371,8 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
mm = tsk->mm;
trans_exc_code = regs->int_parm_long;
+ address = trans_exc_code & __FAIL_ADDR_MASK;
+ is_write = (trans_exc_code & store_indication) == 0x400;
/*
* Verify that the fault happened in user space, that
@@ -379,6 +383,8 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
type = get_fault_type(regs);
switch (type) {
case KERNEL_FAULT:
+ if (kfence_handle_page_fault(address, is_write, regs))
+ return 0;
goto out;
case USER_FAULT:
case GMAP_FAULT:
@@ -387,12 +393,11 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
break;
}
- address = trans_exc_code & __FAIL_ADDR_MASK;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
flags = FAULT_FLAG_DEFAULT;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
- if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
+ if (access == VM_WRITE || is_write)
flags |= FAULT_FLAG_WRITE;
mmap_read_lock(mm);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 8ac710de1ab1..f3db3caa8447 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -34,6 +34,7 @@
#include <asm/processor.h>
#include <linux/uaccess.h>
#include <asm/pgalloc.h>
+#include <asm/kfence.h>
#include <asm/ptdump.h>
#include <asm/dma.h>
#include <asm/lowcore.h>
@@ -200,7 +201,7 @@ void __init mem_init(void)
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
pv_init();
-
+ kfence_split_mapping();
/* Setup guest page hinting */
cmma_init();
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
index a0fdc6dc5f9d..3e4735168019 100644
--- a/arch/s390/mm/kasan_init.c
+++ b/arch/s390/mm/kasan_init.c
@@ -107,6 +107,9 @@ static void __init kasan_early_pgtable_populate(unsigned long address,
sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
}
+ /*
+ * The first 1MB of 1:1 mapping is mapped with 4KB pages
+ */
while (address < end) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
@@ -157,30 +160,26 @@ static void __init kasan_early_pgtable_populate(unsigned long address,
pm_dir = pmd_offset(pu_dir, address);
if (pmd_none(*pm_dir)) {
- if (mode == POPULATE_ZERO_SHADOW &&
- IS_ALIGNED(address, PMD_SIZE) &&
+ if (IS_ALIGNED(address, PMD_SIZE) &&
end - address >= PMD_SIZE) {
- pmd_populate(&init_mm, pm_dir,
- kasan_early_shadow_pte);
- address = (address + PMD_SIZE) & PMD_MASK;
- continue;
- }
- /* the first megabyte of 1:1 is mapped with 4k pages */
- if (has_edat && address && end - address >= PMD_SIZE &&
- mode != POPULATE_ZERO_SHADOW) {
- void *page;
-
- if (mode == POPULATE_ONE2ONE) {
- page = (void *)address;
- } else {
- page = kasan_early_alloc_segment();
- memset(page, 0, _SEGMENT_SIZE);
+ if (mode == POPULATE_ZERO_SHADOW) {
+ pmd_populate(&init_mm, pm_dir, kasan_early_shadow_pte);
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ } else if (has_edat && address) {
+ void *page;
+
+ if (mode == POPULATE_ONE2ONE) {
+ page = (void *)address;
+ } else {
+ page = kasan_early_alloc_segment();
+ memset(page, 0, _SEGMENT_SIZE);
+ }
+ pmd_val(*pm_dir) = __pa(page) | sgt_prot;
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
}
- pmd_val(*pm_dir) = __pa(page) | sgt_prot;
- address = (address + PMD_SIZE) & PMD_MASK;
- continue;
}
-
pt_dir = kasan_early_pte_alloc();
pmd_populate(&init_mm, pm_dir, pt_dir);
} else if (pmd_large(*pm_dir)) {
@@ -300,7 +299,7 @@ void __init kasan_early_init(void)
pgalloc_low = round_up((unsigned long)_end, _SEGMENT_SIZE);
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
initrd_end =
- round_up(INITRD_START + INITRD_SIZE, _SEGMENT_SIZE);
+ round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE);
pgalloc_low = max(pgalloc_low, initrd_end);
}
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index a0f54bd5e98a..9663ce3625bc 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -228,7 +228,7 @@ void *xlate_dev_mem_ptr(phys_addr_t addr)
void *bounce = (void *) addr;
unsigned long size;
- get_online_cpus();
+ cpus_read_lock();
preempt_disable();
if (is_swapped(addr)) {
size = PAGE_SIZE - (addr & ~PAGE_MASK);
@@ -237,7 +237,7 @@ void *xlate_dev_mem_ptr(phys_addr_t addr)
memcpy_absolute(bounce, (void *) addr, size);
}
preempt_enable();
- put_online_cpus();
+ cpus_read_unlock();
return bounce;
}
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 68b153083a92..18a6381097a9 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -228,46 +228,3 @@ void arch_set_page_dat(struct page *page, int order)
return;
set_page_stable_dat(page, order);
}
-
-void arch_set_page_nodat(struct page *page, int order)
-{
- if (cmma_flag < 2)
- return;
- set_page_stable_nodat(page, order);
-}
-
-int arch_test_page_nodat(struct page *page)
-{
- unsigned char state;
-
- if (cmma_flag < 2)
- return 0;
- state = get_page_state(page);
- return !!(state & 0x20);
-}
-
-void arch_set_page_states(int make_stable)
-{
- unsigned long flags, order, t;
- struct list_head *l;
- struct page *page;
- struct zone *zone;
-
- if (!cmma_flag)
- return;
- if (make_stable)
- drain_local_pages(NULL);
- for_each_populated_zone(zone) {
- spin_lock_irqsave(&zone->lock, flags);
- for_each_migratetype_order(order, t) {
- list_for_each(l, &zone->free_area[order].free_list[t]) {
- page = list_entry(l, struct page, lru);
- if (make_stable)
- set_page_stable_dat(page, order);
- else
- set_page_unused(page, order);
- }
- }
- spin_unlock_irqrestore(&zone->lock, flags);
- }
-}
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index ed8e5b3575d5..fdc86c0e4e6c 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -8,6 +8,7 @@
#include <asm/cacheflush.h>
#include <asm/facility.h>
#include <asm/pgalloc.h>
+#include <asm/kfence.h>
#include <asm/page.h>
#include <asm/set_memory.h>
@@ -85,6 +86,8 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
{
pte_t *ptep, new;
+ if (flags == SET_MEMORY_4K)
+ return 0;
ptep = pte_offset_kernel(pmdp, addr);
do {
new = *ptep;
@@ -155,6 +158,7 @@ static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
unsigned long flags)
{
unsigned long next;
+ int need_split;
pmd_t *pmdp;
int rc = 0;
@@ -164,7 +168,10 @@ static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
return -EINVAL;
next = pmd_addr_end(addr, end);
if (pmd_large(*pmdp)) {
- if (addr & ~PMD_MASK || addr + PMD_SIZE > next) {
+ need_split = !!(flags & SET_MEMORY_4K);
+ need_split |= !!(addr & ~PMD_MASK);
+ need_split |= !!(addr + PMD_SIZE > next);
+ if (need_split) {
rc = split_pmd_page(pmdp, addr);
if (rc)
return rc;
@@ -232,6 +239,7 @@ static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
unsigned long flags)
{
unsigned long next;
+ int need_split;
pud_t *pudp;
int rc = 0;
@@ -241,7 +249,10 @@ static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
return -EINVAL;
next = pud_addr_end(addr, end);
if (pud_large(*pudp)) {
- if (addr & ~PUD_MASK || addr + PUD_SIZE > next) {
+ need_split = !!(flags & SET_MEMORY_4K);
+ need_split |= !!(addr & ~PUD_MASK);
+ need_split |= !!(addr + PUD_SIZE > next);
+ if (need_split) {
rc = split_pud_page(pudp, addr);
if (rc)
break;
@@ -316,7 +327,7 @@ int __set_memory(unsigned long addr, int numpages, unsigned long flags)
return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags);
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
static void ipte_range(pte_t *pte, unsigned long address, int nr)
{
@@ -340,7 +351,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
pte_t *pte;
for (i = 0; i < numpages;) {
- address = page_to_phys(page + i);
+ address = (unsigned long)page_to_virt(page + i);
pte = virt_to_kpte(address);
nr = (unsigned long)pte >> ilog2(sizeof(long));
nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 96897fab89dc..2b1c6d916cf9 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -581,7 +581,7 @@ void __init vmem_map_init(void)
__set_memory((unsigned long)_sinittext,
(unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
- __set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT,
+ __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
/* we need lowcore executable for our LPSWE instructions */
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index b0993e05affe..e7e6788d75a8 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -113,13 +113,16 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT);
struct zpci_fib fib = {0};
- u8 status;
+ u8 cc, status;
WARN_ON_ONCE(iota & 0x3fff);
fib.pba = base;
fib.pal = limit;
fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
- return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc)
+ zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
+ return cc;
}
/* Modify PCI: Unregister I/O address translation parameters */
@@ -130,9 +133,9 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
u8 cc, status;
cc = zpci_mod_fc(req, &fib, &status);
- if (cc == 3) /* Function already gone. */
- cc = 0;
- return cc ? -EIO : 0;
+ if (cc)
+ zpci_dbg(3, "unreg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
+ return cc;
}
/* Modify PCI: Set PCI function measurement parameters */
@@ -560,9 +563,12 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
int pcibios_add_device(struct pci_dev *pdev)
{
+ struct zpci_dev *zdev = to_zpci(pdev);
struct resource *res;
int i;
+ /* The pdev has a reference to the zdev via its bus */
+ zpci_zdev_get(zdev);
if (pdev->is_physfn)
pdev->no_vf_scan = 1;
@@ -582,7 +588,10 @@ int pcibios_add_device(struct pci_dev *pdev)
void pcibios_release_device(struct pci_dev *pdev)
{
+ struct zpci_dev *zdev = to_zpci(pdev);
+
zpci_unmap_resources(pdev);
+ zpci_zdev_put(zdev);
}
int pcibios_enable_device(struct pci_dev *pdev, int mask)
@@ -653,32 +662,37 @@ void zpci_free_domain(int domain)
int zpci_enable_device(struct zpci_dev *zdev)
{
- int rc;
-
- rc = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
- if (rc)
- goto out;
-
- rc = zpci_dma_init_device(zdev);
- if (rc)
- goto out_dma;
+ u32 fh = zdev->fh;
+ int rc = 0;
- return 0;
-
-out_dma:
- clp_disable_fh(zdev);
-out:
+ if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES))
+ rc = -EIO;
+ else
+ zdev->fh = fh;
return rc;
}
int zpci_disable_device(struct zpci_dev *zdev)
{
- zpci_dma_exit_device(zdev);
- /*
- * The zPCI function may already be disabled by the platform, this is
- * detected in clp_disable_fh() which becomes a no-op.
- */
- return clp_disable_fh(zdev);
+ u32 fh = zdev->fh;
+ int cc, rc = 0;
+
+ cc = clp_disable_fh(zdev, &fh);
+ if (!cc) {
+ zdev->fh = fh;
+ } else if (cc == CLP_RC_SETPCIFN_ALRDY) {
+ pr_info("Disabling PCI function %08x had no effect as it was already disabled\n",
+ zdev->fid);
+ /* Function is already disabled - update handle */
+ rc = clp_refresh_fh(zdev->fid, &fh);
+ if (!rc) {
+ zdev->fh = fh;
+ rc = -EINVAL;
+ }
+ } else {
+ rc = -EIO;
+ }
+ return rc;
}
/**
@@ -788,6 +802,11 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
if (zdev->zbus->bus)
zpci_bus_remove_device(zdev, false);
+ if (zdev->dma_table) {
+ rc = zpci_dma_exit_device(zdev);
+ if (rc)
+ return rc;
+ }
if (zdev_enabled(zdev)) {
rc = zpci_disable_device(zdev);
if (rc)
@@ -811,6 +830,8 @@ void zpci_release_device(struct kref *kref)
if (zdev->zbus->bus)
zpci_bus_remove_device(zdev, false);
+ if (zdev->dma_table)
+ zpci_dma_exit_device(zdev);
if (zdev_enabled(zdev))
zpci_disable_device(zdev);
@@ -822,7 +843,8 @@ void zpci_release_device(struct kref *kref)
case ZPCI_FN_STATE_STANDBY:
if (zdev->has_hp_slot)
zpci_exit_slot(zdev);
- zpci_cleanup_bus_resources(zdev);
+ if (zdev->has_resources)
+ zpci_cleanup_bus_resources(zdev);
zpci_bus_device_unregister(zdev);
zpci_destroy_iommu(zdev);
fallthrough;
@@ -886,7 +908,6 @@ static void zpci_mem_exit(void)
}
static unsigned int s390_pci_probe __initdata = 1;
-static unsigned int s390_pci_no_mio __initdata;
unsigned int s390_pci_force_floating __initdata;
static unsigned int s390_pci_initialized;
@@ -897,7 +918,7 @@ char * __init pcibios_setup(char *str)
return NULL;
}
if (!strcmp(str, "nomio")) {
- s390_pci_no_mio = 1;
+ S390_lowcore.machine_flags &= ~MACHINE_FLAG_PCI_MIO;
return NULL;
}
if (!strcmp(str, "force_floating")) {
@@ -928,7 +949,7 @@ static int __init pci_base_init(void)
return 0;
}
- if (test_facility(153) && !s390_pci_no_mio) {
+ if (MACHINE_HAS_PCI_MIO) {
static_branch_enable(&have_mio);
ctl_set_bit(2, 5);
}
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 9629f9779c79..5d77acbd1c87 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -49,6 +49,11 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev)
rc = zpci_enable_device(zdev);
if (rc)
return rc;
+ rc = zpci_dma_init_device(zdev);
+ if (rc) {
+ zpci_disable_device(zdev);
+ return rc;
+ }
}
if (!zdev->has_resources) {
@@ -343,11 +348,11 @@ static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
{
int rc = -EINVAL;
- zdev->zbus = zbus;
if (zbus->function[zdev->devfn]) {
pr_err("devfn %04x is already assigned\n", zdev->devfn);
return rc;
}
+ zdev->zbus = zbus;
zbus->function[zdev->devfn] = zdev;
zpci_nb_devices++;
@@ -367,6 +372,7 @@ static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
error:
zbus->function[zdev->devfn] = NULL;
+ zdev->zbus = NULL;
zpci_nb_devices--;
return rc;
}
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
index b877a97e6745..e359d2686178 100644
--- a/arch/s390/pci/pci_bus.h
+++ b/arch/s390/pci/pci_bus.h
@@ -22,6 +22,11 @@ static inline void zpci_zdev_put(struct zpci_dev *zdev)
kref_put(&zdev->kref, zpci_release_device);
}
+static inline void zpci_zdev_get(struct zpci_dev *zdev)
+{
+ kref_get(&zdev->kref);
+}
+
int zpci_alloc_domain(int domain);
void zpci_free_domain(int domain);
int zpci_setup_bus_resources(struct zpci_dev *zdev,
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index d3331596ddbe..51dc2215a2b7 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -212,17 +212,22 @@ out:
return rc;
}
-static int clp_refresh_fh(u32 fid);
-/*
- * Enable/Disable a given PCI function and update its function handle if
- * necessary
+/**
+ * clp_set_pci_fn() - Execute a command on a PCI function
+ * @zdev: Function that will be affected
+ * @fh: Out parameter for updated function handle
+ * @nr_dma_as: DMA address space number
+ * @command: The command code to execute
+ *
+ * Returns: 0 on success, < 0 for Linux errors (e.g. -ENOMEM), and
+ * > 0 for non-success platform responses
*/
-static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command)
+static int clp_set_pci_fn(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as, u8 command)
{
struct clp_req_rsp_set_pci *rrb;
int rc, retries = 100;
- u32 fid = zdev->fid;
+ *fh = 0;
rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
@@ -245,17 +250,13 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command)
}
} while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
- if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
+ if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
+ *fh = rrb->response.fh;
+ } else {
zpci_err("Set PCI FN:\n");
zpci_err_clp(rrb->response.hdr.rsp, rc);
- }
-
- if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
- zdev->fh = rrb->response.fh;
- } else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY &&
- rrb->response.fh == 0) {
- /* Function is already in desired state - update handle */
- rc = clp_refresh_fh(fid);
+ if (!rc)
+ rc = rrb->response.hdr.rsp;
}
clp_free_block(rrb);
return rc;
@@ -295,35 +296,62 @@ int clp_setup_writeback_mio(void)
return rc;
}
-int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as)
+int clp_enable_fh(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as)
{
int rc;
- rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
- zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
- if (rc)
- goto out;
-
- if (zpci_use_mio(zdev)) {
- rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_MIO);
+ rc = clp_set_pci_fn(zdev, fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
+ zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, *fh, rc);
+ if (!rc && zpci_use_mio(zdev)) {
+ rc = clp_set_pci_fn(zdev, fh, nr_dma_as, CLP_SET_ENABLE_MIO);
zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n",
- zdev->fid, zdev->fh, rc);
+ zdev->fid, *fh, rc);
if (rc)
- clp_disable_fh(zdev);
+ clp_disable_fh(zdev, fh);
}
-out:
return rc;
}
-int clp_disable_fh(struct zpci_dev *zdev)
+int clp_disable_fh(struct zpci_dev *zdev, u32 *fh)
{
int rc;
if (!zdev_enabled(zdev))
return 0;
- rc = clp_set_pci_fn(zdev, 0, CLP_SET_DISABLE_PCI_FN);
- zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
+ rc = clp_set_pci_fn(zdev, fh, 0, CLP_SET_DISABLE_PCI_FN);
+ zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, *fh, rc);
+ return rc;
+}
+
+static int clp_list_pci_req(struct clp_req_rsp_list_pci *rrb,
+ u64 *resume_token, int *nentries)
+{
+ int rc;
+
+ memset(rrb, 0, sizeof(*rrb));
+ rrb->request.hdr.len = sizeof(rrb->request);
+ rrb->request.hdr.cmd = CLP_LIST_PCI;
+ /* store as many entries as possible */
+ rrb->response.hdr.len = CLP_BLK_SIZE - LIST_PCI_HDR_LEN;
+ rrb->request.resume_token = *resume_token;
+
+ /* Get PCI function handle list */
+ rc = clp_req(rrb, CLP_LPS_PCI);
+ if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
+ zpci_err("List PCI FN:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
+ return -EIO;
+ }
+
+ update_uid_checking(rrb->response.uid_checking);
+ WARN_ON_ONCE(rrb->response.entry_size !=
+ sizeof(struct clp_fh_list_entry));
+
+ *nentries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) /
+ rrb->response.entry_size;
+ *resume_token = rrb->response.resume_token;
+
return rc;
}
@@ -331,38 +359,40 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, void *data,
void (*cb)(struct clp_fh_list_entry *, void *))
{
u64 resume_token = 0;
- int entries, i, rc;
+ int nentries, i, rc;
do {
- memset(rrb, 0, sizeof(*rrb));
- rrb->request.hdr.len = sizeof(rrb->request);
- rrb->request.hdr.cmd = CLP_LIST_PCI;
- /* store as many entries as possible */
- rrb->response.hdr.len = CLP_BLK_SIZE - LIST_PCI_HDR_LEN;
- rrb->request.resume_token = resume_token;
-
- /* Get PCI function handle list */
- rc = clp_req(rrb, CLP_LPS_PCI);
- if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
- zpci_err("List PCI FN:\n");
- zpci_err_clp(rrb->response.hdr.rsp, rc);
- rc = -EIO;
- goto out;
- }
+ rc = clp_list_pci_req(rrb, &resume_token, &nentries);
+ if (rc)
+ return rc;
+ for (i = 0; i < nentries; i++)
+ cb(&rrb->response.fh_list[i], data);
+ } while (resume_token);
- update_uid_checking(rrb->response.uid_checking);
- WARN_ON_ONCE(rrb->response.entry_size !=
- sizeof(struct clp_fh_list_entry));
+ return rc;
+}
- entries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) /
- rrb->response.entry_size;
+static int clp_find_pci(struct clp_req_rsp_list_pci *rrb, u32 fid,
+ struct clp_fh_list_entry *entry)
+{
+ struct clp_fh_list_entry *fh_list;
+ u64 resume_token = 0;
+ int nentries, i, rc;
- resume_token = rrb->response.resume_token;
- for (i = 0; i < entries; i++)
- cb(&rrb->response.fh_list[i], data);
+ do {
+ rc = clp_list_pci_req(rrb, &resume_token, &nentries);
+ if (rc)
+ return rc;
+ for (i = 0; i < nentries; i++) {
+ fh_list = rrb->response.fh_list;
+ if (fh_list[i].fid == fid) {
+ *entry = fh_list[i];
+ return 0;
+ }
+ }
} while (resume_token);
-out:
- return rc;
+
+ return -ENODEV;
}
static void __clp_add(struct clp_fh_list_entry *entry, void *data)
@@ -392,67 +422,41 @@ int clp_scan_pci_devices(void)
return rc;
}
-static void __clp_refresh_fh(struct clp_fh_list_entry *entry, void *data)
-{
- struct zpci_dev *zdev;
- u32 fid = *((u32 *)data);
-
- if (!entry->vendor_id || fid != entry->fid)
- return;
-
- zdev = get_zdev_by_fid(fid);
- if (!zdev)
- return;
-
- zdev->fh = entry->fh;
-}
-
/*
- * Refresh the function handle of the function matching @fid
+ * Get the current function handle of the function matching @fid
*/
-static int clp_refresh_fh(u32 fid)
+int clp_refresh_fh(u32 fid, u32 *fh)
{
struct clp_req_rsp_list_pci *rrb;
+ struct clp_fh_list_entry entry;
int rc;
rrb = clp_alloc_block(GFP_NOWAIT);
if (!rrb)
return -ENOMEM;
- rc = clp_list_pci(rrb, &fid, __clp_refresh_fh);
+ rc = clp_find_pci(rrb, fid, &entry);
+ if (!rc)
+ *fh = entry.fh;
clp_free_block(rrb);
return rc;
}
-struct clp_state_data {
- u32 fid;
- enum zpci_state state;
-};
-
-static void __clp_get_state(struct clp_fh_list_entry *entry, void *data)
-{
- struct clp_state_data *sd = data;
-
- if (entry->fid != sd->fid)
- return;
-
- sd->state = entry->config_state;
-}
-
int clp_get_state(u32 fid, enum zpci_state *state)
{
struct clp_req_rsp_list_pci *rrb;
- struct clp_state_data sd = {fid, ZPCI_FN_STATE_RESERVED};
+ struct clp_fh_list_entry entry;
int rc;
+ *state = ZPCI_FN_STATE_RESERVED;
rrb = clp_alloc_block(GFP_ATOMIC);
if (!rrb)
return -ENOMEM;
- rc = clp_list_pci(rrb, &sd, __clp_get_state);
+ rc = clp_find_pci(rrb, fid, &entry);
if (!rc)
- *state = sd.state;
+ *state = entry.config_state;
clp_free_block(rrb);
return rc;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index ebc9a49523aa..58f2f7abea96 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -590,10 +590,11 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
}
}
- rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- (u64) zdev->dma_table);
- if (rc)
+ if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ (u64)zdev->dma_table)) {
+ rc = -EIO;
goto free_bitmap;
+ }
return 0;
free_bitmap:
@@ -608,17 +609,25 @@ out:
return rc;
}
-void zpci_dma_exit_device(struct zpci_dev *zdev)
+int zpci_dma_exit_device(struct zpci_dev *zdev)
{
+ int cc = 0;
+
/*
* At this point, if the device is part of an IOMMU domain, this would
* be a strong hint towards a bug in the IOMMU API (common) code and/or
* simultaneous access via IOMMU and DMA API. So let's issue a warning.
*/
WARN_ON(zdev->s390_domain);
-
- if (zpci_unregister_ioat(zdev, 0))
- return;
+ if (zdev_enabled(zdev))
+ cc = zpci_unregister_ioat(zdev, 0);
+ /*
+ * cc == 3 indicates the function is gone already. This can happen
+ * if the function was deconfigured/disabled suddenly and we have not
+ * received a new handle yet.
+ */
+ if (cc && cc != 3)
+ return -EIO;
dma_cleanup_tables(zdev->dma_table);
zdev->dma_table = NULL;
@@ -626,8 +635,8 @@ void zpci_dma_exit_device(struct zpci_dev *zdev)
zdev->iommu_bitmap = NULL;
vfree(zdev->lazy_bitmap);
zdev->lazy_bitmap = NULL;
-
zdev->next_bit = 0;
+ return 0;
}
static int __init dma_alloc_cpu_table_caches(void)
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index cd447b96b4b1..c856f80cb21b 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -84,7 +84,10 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
/* Even though the device is already gone we still
* need to free zPCI resources as part of the disable.
*/
- zpci_disable_device(zdev);
+ if (zdev->dma_table)
+ zpci_dma_exit_device(zdev);
+ if (zdev_enabled(zdev))
+ zpci_disable_device(zdev);
zdev->state = ZPCI_FN_STATE_STANDBY;
}
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 9c7de9089939..3823e159bf74 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -365,10 +365,6 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
for_each_pci_msi_entry(msi, pdev) {
if (!msi->irq)
continue;
- if (msi->msi_attrib.is_msix)
- __pci_msix_desc_mask_irq(msi, 1);
- else
- __pci_msi_desc_mask_irq(msi, 1, 1);
irq_set_msi_desc(msi->irq, NULL);
irq_free_desc(msi->irq);
msi->msg.address_lo = 0;
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 6e2450c2b9c1..335c281811c7 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -82,13 +82,26 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
pci_lock_rescan_remove();
if (pci_dev_is_added(pdev)) {
pci_stop_and_remove_bus_device(pdev);
- ret = zpci_disable_device(zdev);
- if (ret)
- goto out;
+ if (zdev->dma_table) {
+ ret = zpci_dma_exit_device(zdev);
+ if (ret)
+ goto out;
+ }
+
+ if (zdev_enabled(zdev)) {
+ ret = zpci_disable_device(zdev);
+ if (ret)
+ goto out;
+ }
ret = zpci_enable_device(zdev);
if (ret)
goto out;
+ ret = zpci_dma_init_device(zdev);
+ if (ret) {
+ zpci_disable_device(zdev);
+ goto out;
+ }
pci_rescan_bus(zdev->zbus->bus);
}
out:
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index 21c4ebe29b9a..360ada80d20c 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -19,6 +19,7 @@ KCOV_INSTRUMENT := n
GCOV_PROFILE := n
UBSAN_SANITIZE := n
KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt
index 0e207c46e8da..6db9820d104a 100644
--- a/arch/s390/tools/opcodes.txt
+++ b/arch/s390/tools/opcodes.txt
@@ -189,6 +189,8 @@ ad stosm SI_URD
ae sigp RS_RRRD
af mc SI_URD
b1 lra RX_RRRD
+b200 lbear S_RD
+b201 stbear S_RD
b202 stidp S_RD
b204 sck S_RD
b205 stck S_RD
@@ -523,6 +525,7 @@ b931 clgfr RRE_RR
b938 sortl RRE_RR
b939 dfltcc RRF_R0RR2
b93a kdsa RRE_RR
+b93b nnpa RRE_00
b93c ppno RRE_RR
b93e kimd RRE_RR
b93f klmd RRE_RR
@@ -562,6 +565,7 @@ b987 dlgr RRE_RR
b988 alcgr RRE_RR
b989 slbgr RRE_RR
b98a cspg RRE_RR
+b98b rdp RRF_RURR2
b98d epsw RRE_RR
b98e idte RRF_RURR2
b98f crdte RRF_RURR2
@@ -876,19 +880,32 @@ e63d vstrl VSI_URDV
e63f vstrlr VRS_RRDV
e649 vlip VRI_V0UU2
e650 vcvb VRR_RV0UU
+e651 vclzdp VRR_VV0U2
e652 vcvbg VRR_RV0UU
+e654 vupkzh VRR_VV0U2
+e655 vcnf VRR_VV0UU2
+e656 vclfnh VRR_VV0UU2
e658 vcvd VRI_VR0UU
e659 vsrp VRI_VVUUU2
e65a vcvdg VRI_VR0UU
e65b vpsop VRI_VVUUU2
+e65c vupkzl VRR_VV0U2
+e65d vcfn VRR_VV0UU2
+e65e vclfnl VRR_VV0UU2
e65f vtp VRR_0V
+e670 vpkzr VRI_VVV0UU2
e671 vap VRI_VVV0UU2
+e672 vsrpr VRI_VVV0UU2
e673 vsp VRI_VVV0UU2
+e674 vschp VRR_VVV0U0U
+e675 vcrnf VRR_VVV0UU
e677 vcp VRR_0VV0U
e678 vmp VRI_VVV0UU2
e679 vmsp VRI_VVV0UU2
e67a vdp VRI_VVV0UU2
e67b vrp VRI_VVV0UU2
+e67c vscshp VRR_VVV
+e67d vcsph VRR_VVV0U0
e67e vsdp VRI_VVV0UU2
e700 vleb VRX_VRRDU
e701 vleh VRX_VRRDU
@@ -1081,6 +1098,7 @@ eb61 stric RSY_RDRU
eb62 mric RSY_RDRU
eb6a asi SIY_IRD
eb6e alsi SIY_IRD
+eb71 lpswey SIY_URD
eb7a agsi SIY_IRD
eb7e algsi SIY_IRD
eb80 icmh RSY_RURD
diff --git a/arch/sh/boards/mach-se/7343/irq.c b/arch/sh/boards/mach-se/7343/irq.c
index 1aedbfe32654..f9f3b14f70d5 100644
--- a/arch/sh/boards/mach-se/7343/irq.c
+++ b/arch/sh/boards/mach-se/7343/irq.c
@@ -38,7 +38,7 @@ static void se7343_irq_demux(struct irq_desc *desc)
mask = ioread16(se7343_irq_regs + PA_CPLD_ST_REG);
for_each_set_bit(bit, &mask, SE7343_FPGA_IRQ_NR)
- generic_handle_irq(irq_linear_revmap(se7343_irq_domain, bit));
+ generic_handle_domain_irq(se7343_irq_domain, bit);
chip->irq_unmask(data);
}
diff --git a/arch/sh/boards/mach-se/7722/irq.c b/arch/sh/boards/mach-se/7722/irq.c
index 6d34592767f8..efa96edd47dc 100644
--- a/arch/sh/boards/mach-se/7722/irq.c
+++ b/arch/sh/boards/mach-se/7722/irq.c
@@ -37,7 +37,7 @@ static void se7722_irq_demux(struct irq_desc *desc)
mask = ioread16(se7722_irq_regs + IRQ01_STS_REG);
for_each_set_bit(bit, &mask, SE7722_FPGA_IRQ_NR)
- generic_handle_irq(irq_linear_revmap(se7722_irq_domain, bit));
+ generic_handle_domain_irq(se7722_irq_domain, bit);
chip->irq_unmask(data);
}
diff --git a/arch/sh/boards/mach-x3proto/gpio.c b/arch/sh/boards/mach-x3proto/gpio.c
index efc992f641a6..f82d3a6a844a 100644
--- a/arch/sh/boards/mach-x3proto/gpio.c
+++ b/arch/sh/boards/mach-x3proto/gpio.c
@@ -68,7 +68,7 @@ static void x3proto_gpio_irq_handler(struct irq_desc *desc)
mask = __raw_readw(KEYDETR);
for_each_set_bit(pin, &mask, NR_BASEBOARD_GPIOS)
- generic_handle_irq(irq_linear_revmap(x3proto_irq_domain, pin));
+ generic_handle_domain_irq(x3proto_irq_domain, pin);
chip->irq_unmask(data);
}
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index b85842cda99f..8b63410e830f 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -6,6 +6,9 @@
*/
#include <linux/types.h>
+#define compat_mode_t compat_mode_t
+typedef u16 compat_mode_t;
+
#include <asm-generic/compat.h>
#define COMPAT_USER_HZ 100
@@ -13,13 +16,9 @@
typedef u16 __compat_uid_t;
typedef u16 __compat_gid_t;
-typedef u32 __compat_uid32_t;
-typedef u32 __compat_gid32_t;
-typedef u16 compat_mode_t;
typedef u16 compat_dev_t;
typedef s16 compat_nlink_t;
typedef u16 compat_ipc_pid_t;
-typedef u32 compat_caddr_t;
typedef __kernel_fsid_t compat_fsid_t;
struct compat_stat {
@@ -115,13 +114,6 @@ struct compat_statfs {
#define COMPAT_RLIM_INFINITY 0x7fffffff
-typedef u32 compat_old_sigset_t;
-
-#define _COMPAT_NSIG 64
-#define _COMPAT_NSIG_BPW 32
-
-typedef u32 compat_sigset_word;
-
#define COMPAT_OFF_T_MAX 0x7fffffff
#ifdef CONFIG_COMPAT
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 92675dc380fa..e80ee8641ac3 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -122,6 +122,8 @@
#define SO_NETNS_COOKIE 0x0050
+#define SO_BUF_LOCK 0x0051
+
#if !defined(__KERNEL__)
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index e497185dd393..cd9dc0556e91 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1268,8 +1268,7 @@ static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
rq_for_each_segment(bvec, req, iter) {
BUG_ON(i >= io_req->desc_cnt);
- io_req->io_desc[i].buffer =
- page_address(bvec.bv_page) + bvec.bv_offset;
+ io_req->io_desc[i].buffer = bvec_virt(&bvec);
io_req->io_desc[i].length = bvec.bv_len;
i++;
}
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index d27a2a9faf3e..cde6db184c26 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1488,7 +1488,9 @@ static void vector_get_ethtool_stats(struct net_device *dev,
}
static int vector_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct vector_private *vp = netdev_priv(netdev);
@@ -1497,7 +1499,9 @@ static int vector_get_coalesce(struct net_device *netdev,
}
static int vector_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct vector_private *vp = netdev_priv(netdev);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 88fb922c23a0..421fa9e38c60 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -119,6 +119,7 @@ config X86
select ARCH_WANT_HUGE_PMD_SHARE
select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANTS_THP_SWAP if X86_64
+ select ARCH_HAS_PARANOID_L1D_FLUSH
select BUILDTIME_TABLE_SORT
select CLKEVT_I8253
select CLOCKSOURCE_VALIDATE_LAST_CYCLE
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 307fd0000a83..d82d01490dd3 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -31,8 +31,8 @@ REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING \
REALMODE_CFLAGS += -ffreestanding
REALMODE_CFLAGS += -fno-stack-protector
-REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -Wno-address-of-packed-member)
-REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4))
+REALMODE_CFLAGS += -Wno-address-of-packed-member
+REALMODE_CFLAGS += $(cc_stack_align4)
REALMODE_CFLAGS += $(CLANG_FLAGS)
export REALMODE_CFLAGS
@@ -48,8 +48,7 @@ export BITS
#
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
#
-KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
-KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
+KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
# Intel CET isn't enabled in the kernel
KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
@@ -59,9 +58,8 @@ ifeq ($(CONFIG_X86_32),y)
UTS_MACHINE := i386
CHECKFLAGS += -D__i386__
- biarch := $(call cc-option,-m32)
- KBUILD_AFLAGS += $(biarch)
- KBUILD_CFLAGS += $(biarch)
+ KBUILD_AFLAGS += -m32
+ KBUILD_CFLAGS += -m32
KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return
@@ -72,7 +70,7 @@ ifeq ($(CONFIG_X86_32),y)
# Align the stack to the register width instead of using the default
# alignment of 16 bytes. This reduces stack usage and the number of
# alignment instructions.
- KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align4))
+ KBUILD_CFLAGS += $(cc_stack_align4)
# CPU-specific tuning. Anything which can be shared with UML should go here.
include arch/x86/Makefile_32.cpu
@@ -93,7 +91,6 @@ else
UTS_MACHINE := x86_64
CHECKFLAGS += -D__x86_64__
- biarch := -m64
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
@@ -104,7 +101,7 @@ else
KBUILD_CFLAGS += $(call cc-option,-falign-loops=1)
# Don't autogenerate traditional x87 instructions
- KBUILD_CFLAGS += $(call cc-option,-mno-80387)
+ KBUILD_CFLAGS += -mno-80387
KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387)
# By default gcc and clang use a stack alignment of 16 bytes for x86.
@@ -114,20 +111,17 @@ else
# default alignment which keep the stack *mis*aligned.
# Furthermore an alignment to the register width reduces stack usage
# and the number of alignment instructions.
- KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align8))
+ KBUILD_CFLAGS += $(cc_stack_align8)
# Use -mskip-rax-setup if supported.
KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
# FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
- cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
- cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
-
- cflags-$(CONFIG_MCORE2) += \
- $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
- cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
- $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
- cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
+ cflags-$(CONFIG_MK8) += -march=k8
+ cflags-$(CONFIG_MPSC) += -march=nocona
+ cflags-$(CONFIG_MCORE2) += -march=core2
+ cflags-$(CONFIG_MATOM) += -march=atom
+ cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic
KBUILD_CFLAGS += $(cflags-y)
KBUILD_CFLAGS += -mno-red-zone
@@ -158,18 +152,6 @@ export CONFIG_X86_X32_ABI
ifdef CONFIG_FUNCTION_GRAPH_TRACER
ifndef CONFIG_HAVE_FENTRY
ACCUMULATE_OUTGOING_ARGS := 1
- else
- ifeq ($(call cc-option-yn, -mfentry), n)
- ACCUMULATE_OUTGOING_ARGS := 1
-
- # GCC ignores '-maccumulate-outgoing-args' when used with '-Os'.
- # If '-Os' is enabled, disable it and print a warning.
- ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
- undefine CONFIG_CC_OPTIMIZE_FOR_SIZE
- $(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE. Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.)
- endif
-
- endif
endif
endif
@@ -193,7 +175,7 @@ ifdef CONFIG_RETPOLINE
# only been fixed starting from gcc stable version 8.4.0 and
# onwards, but not for older ones. See gcc bug #86952.
ifndef CONFIG_CC_IS_CLANG
- KBUILD_CFLAGS += $(call cc-option,-fno-jump-tables)
+ KBUILD_CFLAGS += -fno-jump-tables
endif
endif
@@ -275,9 +257,10 @@ endif
$(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $@
-PHONY += install bzlilo
-install bzlilo:
- $(Q)$(MAKE) $(build)=$(boot) $@
+PHONY += install
+install:
+ $(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \
+ $(KBUILD_IMAGE) System.map "$(INSTALL_PATH)"
PHONY += vdso_install
vdso_install:
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index dfbc26a8e924..b5aecb524a8a 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -133,7 +133,7 @@ quiet_cmd_genimage = GENIMAGE $3
cmd_genimage = $(BASH) $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \
$(obj)/mtools.conf '$(FDARGS)' $(FDINITRD)
-PHONY += bzdisk fdimage fdimage144 fdimage288 hdimage isoimage install
+PHONY += bzdisk fdimage fdimage144 fdimage288 hdimage isoimage
# This requires write access to /dev/fd0
# All images require syslinux to be installed; hdimage also requires
@@ -156,8 +156,3 @@ hdimage: $(imgdeps)
isoimage: $(imgdeps)
$(call cmd,genimage,isoimage,$(obj)/image.iso)
@$(kecho) 'Kernel: $(obj)/image.iso is ready'
-
-install:
- $(CONFIG_SHELL) $(srctree)/$(src)/install.sh \
- $(KERNELRELEASE) $(obj)/bzImage \
- System.map "$(INSTALL_PATH)"
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
index 95a223b3e56a..8bb92e9f4e97 100644
--- a/arch/x86/boot/compressed/efi_thunk_64.S
+++ b/arch/x86/boot/compressed/efi_thunk_64.S
@@ -5,9 +5,8 @@
* Early support for invoking 32-bit EFI services from a 64-bit kernel.
*
* Because this thunking occurs before ExitBootServices() we have to
- * restore the firmware's 32-bit GDT before we make EFI service calls,
- * since the firmware's 32-bit IDT is still currently installed and it
- * needs to be able to service interrupts.
+ * restore the firmware's 32-bit GDT and IDT before we make EFI service
+ * calls.
*
* On the plus side, we don't have to worry about mangling 64-bit
* addresses into 32-bits because we're executing with an identity
@@ -39,7 +38,7 @@ SYM_FUNC_START(__efi64_thunk)
/*
* Convert x86-64 ABI params to i386 ABI
*/
- subq $32, %rsp
+ subq $64, %rsp
movl %esi, 0x0(%rsp)
movl %edx, 0x4(%rsp)
movl %ecx, 0x8(%rsp)
@@ -49,14 +48,19 @@ SYM_FUNC_START(__efi64_thunk)
leaq 0x14(%rsp), %rbx
sgdt (%rbx)
+ addq $16, %rbx
+ sidt (%rbx)
+
/*
- * Switch to gdt with 32-bit segments. This is the firmware GDT
- * that was installed when the kernel started executing. This
- * pointer was saved at the EFI stub entry point in head_64.S.
+ * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT
+ * and IDT that was installed when the kernel started executing. The
+ * pointers were saved at the EFI stub entry point in head_64.S.
*
* Pass the saved DS selector to the 32-bit code, and use far return to
* restore the saved CS selector.
*/
+ leaq efi32_boot_idt(%rip), %rax
+ lidt (%rax)
leaq efi32_boot_gdt(%rip), %rax
lgdt (%rax)
@@ -67,7 +71,7 @@ SYM_FUNC_START(__efi64_thunk)
pushq %rax
lretq
-1: addq $32, %rsp
+1: addq $64, %rsp
movq %rdi, %rax
pop %rbx
@@ -128,10 +132,13 @@ SYM_FUNC_START_LOCAL(efi_enter32)
/*
* Some firmware will return with interrupts enabled. Be sure to
- * disable them before we switch GDTs.
+ * disable them before we switch GDTs and IDTs.
*/
cli
+ lidtl (%ebx)
+ subl $16, %ebx
+
lgdtl (%ebx)
movl %cr4, %eax
@@ -166,6 +173,11 @@ SYM_DATA_START(efi32_boot_gdt)
.quad 0
SYM_DATA_END(efi32_boot_gdt)
+SYM_DATA_START(efi32_boot_idt)
+ .word 0
+ .quad 0
+SYM_DATA_END(efi32_boot_idt)
+
SYM_DATA_START(efi32_boot_cs)
.word 0
SYM_DATA_END(efi32_boot_cs)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index a2347ded77ea..572c535cf45b 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -319,6 +319,9 @@ SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL)
movw %cs, rva(efi32_boot_cs)(%ebp)
movw %ds, rva(efi32_boot_ds)(%ebp)
+ /* Store firmware IDT descriptor */
+ sidtl rva(efi32_boot_idt)(%ebp)
+
/* Disable paging */
movl %cr0, %eax
btrl $X86_CR0_PG_BIT, %eax
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index e36690778497..67c3208b668a 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -668,7 +668,7 @@ static bool process_mem_region(struct mem_vector *region,
if (slot_area_index == MAX_SLOT_AREA) {
debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n");
- return 1;
+ return true;
}
}
#endif
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index d0959e7b809f..f307c93fc90a 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -88,6 +88,12 @@ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
+obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o
+sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o
+
+obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64) += sm4-aesni-avx2-x86_64.o
+sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o
+
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $< > $@
$(obj)/%.S: $(src)/%.pl FORCE
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 2144e54a6c89..0fc961bef299 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -849,6 +849,8 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt)
return -EINVAL;
err = skcipher_walk_virt(&walk, req, false);
+ if (!walk.nbytes)
+ return err;
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
@@ -862,7 +864,10 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt)
skcipher_request_set_crypt(&subreq, req->src, req->dst,
blocks * AES_BLOCK_SIZE, req->iv);
req = &subreq;
+
err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
} else {
tail = 0;
}
diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
new file mode 100644
index 000000000000..fa2c3f50aecb
--- /dev/null
+++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
@@ -0,0 +1,589 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 Cipher Algorithm, AES-NI/AVX optimized.
+ * as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (C) 2018 Markku-Juhani O. Saarinen <mjos@iki.fi>
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at:
+ * https://github.com/mjosaarinen/sm4ni
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define rRIP (%rip)
+
+#define RX0 %xmm0
+#define RX1 %xmm1
+#define MASK_4BIT %xmm2
+#define RTMP0 %xmm3
+#define RTMP1 %xmm4
+#define RTMP2 %xmm5
+#define RTMP3 %xmm6
+#define RTMP4 %xmm7
+
+#define RA0 %xmm8
+#define RA1 %xmm9
+#define RA2 %xmm10
+#define RA3 %xmm11
+
+#define RB0 %xmm12
+#define RB1 %xmm13
+#define RB2 %xmm14
+#define RB3 %xmm15
+
+#define RNOT %xmm0
+#define RBSWAP %xmm1
+
+
+/* Transpose four 32-bit words between 128-bit vectors. */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x1, x0, x0; \
+ \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x2; \
+ \
+ vpunpckhqdq t1, x0, x1; \
+ vpunpcklqdq t1, x0, x0; \
+ \
+ vpunpckhqdq x2, t2, x3; \
+ vpunpcklqdq x2, t2, x2;
+
+/* pre-SubByte transform. */
+#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpand x, mask4bit, tmp0; \
+ vpandn x, mask4bit, x; \
+ vpsrld $4, x, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by
+ * 'vaeslastenc' instruction.
+ */
+#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpandn mask4bit, x, tmp0; \
+ vpsrld $4, x, x; \
+ vpand x, mask4bit, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+
+.section .rodata.cst164, "aM", @progbits, 164
+.align 16
+
+/*
+ * Following four affine transform look-up tables are from work by
+ * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
+ *
+ * These allow exposing SM4 S-Box from AES SubByte.
+ */
+
+/* pre-SubByte affine transform, from SM4 field to AES field. */
+.Lpre_tf_lo_s:
+ .quad 0x9197E2E474720701, 0xC7C1B4B222245157
+.Lpre_tf_hi_s:
+ .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012
+
+/* post-SubByte affine transform, from AES field to SM4 field. */
+.Lpost_tf_lo_s:
+ .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82
+.Lpost_tf_hi_s:
+ .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF
+
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+ .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+ .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+
+/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_8:
+ .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e
+ .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06
+
+/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_16:
+ .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01
+ .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09
+
+/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_24:
+ .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04
+ .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/* For input word byte-swap */
+.Lbswap32_mask:
+ .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+
+.align 4
+/* 4-bit mask */
+.L0f0f0f0f:
+ .long 0x0f0f0f0f
+
+
+.text
+.align 16
+
+/*
+ * void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
+ * const u8 *src, int nblocks)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx_crypt4)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (1..4 blocks)
+ * %rdx: src (1..4 blocks)
+ * %rcx: num blocks (1..4)
+ */
+ FRAME_BEGIN
+
+ vmovdqu 0*16(%rdx), RA0;
+ vmovdqa RA0, RA1;
+ vmovdqa RA0, RA2;
+ vmovdqa RA0, RA3;
+ cmpq $2, %rcx;
+ jb .Lblk4_load_input_done;
+ vmovdqu 1*16(%rdx), RA1;
+ je .Lblk4_load_input_done;
+ vmovdqu 2*16(%rdx), RA2;
+ cmpq $3, %rcx;
+ je .Lblk4_load_input_done;
+ vmovdqu 3*16(%rdx), RA3;
+
+.Lblk4_load_input_done:
+
+ vmovdqa .Lbswap32_mask rRIP, RTMP2;
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+
+ vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT;
+ vmovdqa .Lpre_tf_lo_s rRIP, RTMP4;
+ vmovdqa .Lpre_tf_hi_s rRIP, RB0;
+ vmovdqa .Lpost_tf_lo_s rRIP, RB1;
+ vmovdqa .Lpost_tf_hi_s rRIP, RB2;
+ vmovdqa .Linv_shift_row rRIP, RB3;
+ vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP2;
+ vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP3;
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+
+#define ROUND(round, s0, s1, s2, s3) \
+ vbroadcastss (4*(round))(%rdi), RX0; \
+ vpxor s1, RX0, RX0; \
+ vpxor s2, RX0, RX0; \
+ vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+ \
+ /* sbox, non-linear part */ \
+ transform_pre(RX0, RTMP4, RB0, MASK_4BIT, RTMP0); \
+ vaesenclast MASK_4BIT, RX0, RX0; \
+ transform_post(RX0, RB1, RB2, MASK_4BIT, RTMP0); \
+ \
+ /* linear part */ \
+ vpshufb RB3, RX0, RTMP0; \
+ vpxor RTMP0, s0, s0; /* s0 ^ x */ \
+ vpshufb RTMP2, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
+ vpshufb RTMP3, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb .Linv_shift_row_rol_24 rRIP, RX0, RTMP1; \
+ vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
+ vpslld $2, RTMP0, RTMP1; \
+ vpsrld $30, RTMP0, RTMP0; \
+ vpxor RTMP0, s0, s0; \
+ /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpxor RTMP1, s0, s0;
+
+ leaq (32*4)(%rdi), %rax;
+.align 16
+.Lroundloop_blk4:
+ ROUND(0, RA0, RA1, RA2, RA3);
+ ROUND(1, RA1, RA2, RA3, RA0);
+ ROUND(2, RA2, RA3, RA0, RA1);
+ ROUND(3, RA3, RA0, RA1, RA2);
+ leaq (4*4)(%rdi), %rdi;
+ cmpq %rax, %rdi;
+ jne .Lroundloop_blk4;
+
+#undef ROUND
+
+ vmovdqa .Lbswap128_mask rRIP, RTMP2;
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+
+ vmovdqu RA0, 0*16(%rsi);
+ cmpq $2, %rcx;
+ jb .Lblk4_store_output_done;
+ vmovdqu RA1, 1*16(%rsi);
+ je .Lblk4_store_output_done;
+ vmovdqu RA2, 2*16(%rsi);
+ cmpq $3, %rcx;
+ je .Lblk4_store_output_done;
+ vmovdqu RA3, 3*16(%rsi);
+
+.Lblk4_store_output_done:
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx_crypt4)
+
+.align 8
+SYM_FUNC_START_LOCAL(__sm4_crypt_blk8)
+ /* input:
+ * %rdi: round key array, CTX
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
+ * plaintext blocks
+ * output:
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
+ * ciphertext blocks
+ */
+ FRAME_BEGIN
+
+ vmovdqa .Lbswap32_mask rRIP, RTMP2;
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+ vpshufb RTMP2, RB0, RB0;
+ vpshufb RTMP2, RB1, RB1;
+ vpshufb RTMP2, RB2, RB2;
+ vpshufb RTMP2, RB3, RB3;
+
+ vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT;
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+
+#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \
+ vbroadcastss (4*(round))(%rdi), RX0; \
+ vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; \
+ vmovdqa .Lpre_tf_hi_s rRIP, RTMP1; \
+ vmovdqa RX0, RX1; \
+ vpxor s1, RX0, RX0; \
+ vpxor s2, RX0, RX0; \
+ vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+ vmovdqa .Lpost_tf_lo_s rRIP, RTMP2; \
+ vmovdqa .Lpost_tf_hi_s rRIP, RTMP3; \
+ vpxor r1, RX1, RX1; \
+ vpxor r2, RX1, RX1; \
+ vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \
+ \
+ /* sbox, non-linear part */ \
+ transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+ transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+ vmovdqa .Linv_shift_row rRIP, RTMP4; \
+ vaesenclast MASK_4BIT, RX0, RX0; \
+ vaesenclast MASK_4BIT, RX1, RX1; \
+ transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+ transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+ \
+ /* linear part */ \
+ vpshufb RTMP4, RX0, RTMP0; \
+ vpxor RTMP0, s0, s0; /* s0 ^ x */ \
+ vpshufb RTMP4, RX1, RTMP2; \
+ vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP4; \
+ vpxor RTMP2, r0, r0; /* r0 ^ x */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP4; \
+ vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vmovdqa .Linv_shift_row_rol_24 rRIP, RTMP4; \
+ vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
+ /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpslld $2, RTMP0, RTMP1; \
+ vpsrld $30, RTMP0, RTMP0; \
+ vpxor RTMP0, s0, s0; \
+ vpxor RTMP1, s0, s0; \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \
+ /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpslld $2, RTMP2, RTMP3; \
+ vpsrld $30, RTMP2, RTMP2; \
+ vpxor RTMP2, r0, r0; \
+ vpxor RTMP3, r0, r0;
+
+ leaq (32*4)(%rdi), %rax;
+.align 16
+.Lroundloop_blk8:
+ ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3);
+ ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0);
+ ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1);
+ ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2);
+ leaq (4*4)(%rdi), %rdi;
+ cmpq %rax, %rdi;
+ jne .Lroundloop_blk8;
+
+#undef ROUND
+
+ vmovdqa .Lbswap128_mask rRIP, RTMP2;
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+ vpshufb RTMP2, RB0, RB0;
+ vpshufb RTMP2, RB1, RB1;
+ vpshufb RTMP2, RB2, RB2;
+ vpshufb RTMP2, RB3, RB3;
+
+ FRAME_END
+ ret;
+SYM_FUNC_END(__sm4_crypt_blk8)
+
+/*
+ * void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
+ * const u8 *src, int nblocks)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx_crypt8)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (1..8 blocks)
+ * %rdx: src (1..8 blocks)
+ * %rcx: num blocks (1..8)
+ */
+ FRAME_BEGIN
+
+ cmpq $5, %rcx;
+ jb sm4_aesni_avx_crypt4;
+ vmovdqu (0 * 16)(%rdx), RA0;
+ vmovdqu (1 * 16)(%rdx), RA1;
+ vmovdqu (2 * 16)(%rdx), RA2;
+ vmovdqu (3 * 16)(%rdx), RA3;
+ vmovdqu (4 * 16)(%rdx), RB0;
+ vmovdqa RB0, RB1;
+ vmovdqa RB0, RB2;
+ vmovdqa RB0, RB3;
+ je .Lblk8_load_input_done;
+ vmovdqu (5 * 16)(%rdx), RB1;
+ cmpq $7, %rcx;
+ jb .Lblk8_load_input_done;
+ vmovdqu (6 * 16)(%rdx), RB2;
+ je .Lblk8_load_input_done;
+ vmovdqu (7 * 16)(%rdx), RB3;
+
+.Lblk8_load_input_done:
+ call __sm4_crypt_blk8;
+
+ cmpq $6, %rcx;
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ jb .Lblk8_store_output_done;
+ vmovdqu RB1, (5 * 16)(%rsi);
+ je .Lblk8_store_output_done;
+ vmovdqu RB2, (6 * 16)(%rsi);
+ cmpq $7, %rcx;
+ je .Lblk8_store_output_done;
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+.Lblk8_store_output_done:
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx_crypt8)
+
+/*
+ * void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: iv (big endian, 128bit)
+ */
+ FRAME_BEGIN
+
+ /* load IV and byteswap */
+ vmovdqu (%rcx), RA0;
+
+ vmovdqa .Lbswap128_mask rRIP, RBSWAP;
+ vpshufb RBSWAP, RA0, RTMP0; /* be => le */
+
+ vpcmpeqd RNOT, RNOT, RNOT;
+ vpsrldq $8, RNOT, RNOT; /* low: -1, high: 0 */
+
+#define inc_le128(x, minus_one, tmp) \
+ vpcmpeqq minus_one, x, tmp; \
+ vpsubq minus_one, x, x; \
+ vpslldq $8, tmp, tmp; \
+ vpsubq tmp, x, x;
+
+ /* construct IVs */
+ inc_le128(RTMP0, RNOT, RTMP2); /* +1 */
+ vpshufb RBSWAP, RTMP0, RA1;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +2 */
+ vpshufb RBSWAP, RTMP0, RA2;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +3 */
+ vpshufb RBSWAP, RTMP0, RA3;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +4 */
+ vpshufb RBSWAP, RTMP0, RB0;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +5 */
+ vpshufb RBSWAP, RTMP0, RB1;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +6 */
+ vpshufb RBSWAP, RTMP0, RB2;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +7 */
+ vpshufb RBSWAP, RTMP0, RB3;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +8 */
+ vpshufb RBSWAP, RTMP0, RTMP1;
+
+ /* store new IV */
+ vmovdqu RTMP1, (%rcx);
+
+ call __sm4_crypt_blk8;
+
+ vpxor (0 * 16)(%rdx), RA0, RA0;
+ vpxor (1 * 16)(%rdx), RA1, RA1;
+ vpxor (2 * 16)(%rdx), RA2, RA2;
+ vpxor (3 * 16)(%rdx), RA3, RA3;
+ vpxor (4 * 16)(%rdx), RB0, RB0;
+ vpxor (5 * 16)(%rdx), RB1, RB1;
+ vpxor (6 * 16)(%rdx), RB2, RB2;
+ vpxor (7 * 16)(%rdx), RB3, RB3;
+
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ vmovdqu RB1, (5 * 16)(%rsi);
+ vmovdqu RB2, (6 * 16)(%rsi);
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8)
+
+/*
+ * void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: iv
+ */
+ FRAME_BEGIN
+
+ vmovdqu (0 * 16)(%rdx), RA0;
+ vmovdqu (1 * 16)(%rdx), RA1;
+ vmovdqu (2 * 16)(%rdx), RA2;
+ vmovdqu (3 * 16)(%rdx), RA3;
+ vmovdqu (4 * 16)(%rdx), RB0;
+ vmovdqu (5 * 16)(%rdx), RB1;
+ vmovdqu (6 * 16)(%rdx), RB2;
+ vmovdqu (7 * 16)(%rdx), RB3;
+
+ call __sm4_crypt_blk8;
+
+ vmovdqu (7 * 16)(%rdx), RNOT;
+ vpxor (%rcx), RA0, RA0;
+ vpxor (0 * 16)(%rdx), RA1, RA1;
+ vpxor (1 * 16)(%rdx), RA2, RA2;
+ vpxor (2 * 16)(%rdx), RA3, RA3;
+ vpxor (3 * 16)(%rdx), RB0, RB0;
+ vpxor (4 * 16)(%rdx), RB1, RB1;
+ vpxor (5 * 16)(%rdx), RB2, RB2;
+ vpxor (6 * 16)(%rdx), RB3, RB3;
+ vmovdqu RNOT, (%rcx); /* store new IV */
+
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ vmovdqu RB1, (5 * 16)(%rsi);
+ vmovdqu RB2, (6 * 16)(%rsi);
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8)
+
+/*
+ * void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: iv
+ */
+ FRAME_BEGIN
+
+ /* Load input */
+ vmovdqu (%rcx), RA0;
+ vmovdqu 0 * 16(%rdx), RA1;
+ vmovdqu 1 * 16(%rdx), RA2;
+ vmovdqu 2 * 16(%rdx), RA3;
+ vmovdqu 3 * 16(%rdx), RB0;
+ vmovdqu 4 * 16(%rdx), RB1;
+ vmovdqu 5 * 16(%rdx), RB2;
+ vmovdqu 6 * 16(%rdx), RB3;
+
+ /* Update IV */
+ vmovdqu 7 * 16(%rdx), RNOT;
+ vmovdqu RNOT, (%rcx);
+
+ call __sm4_crypt_blk8;
+
+ vpxor (0 * 16)(%rdx), RA0, RA0;
+ vpxor (1 * 16)(%rdx), RA1, RA1;
+ vpxor (2 * 16)(%rdx), RA2, RA2;
+ vpxor (3 * 16)(%rdx), RA3, RA3;
+ vpxor (4 * 16)(%rdx), RB0, RB0;
+ vpxor (5 * 16)(%rdx), RB1, RB1;
+ vpxor (6 * 16)(%rdx), RB2, RB2;
+ vpxor (7 * 16)(%rdx), RB3, RB3;
+
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ vmovdqu RB1, (5 * 16)(%rsi);
+ vmovdqu RB2, (6 * 16)(%rsi);
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx_cfb_dec_blk8)
diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
new file mode 100644
index 000000000000..d2ffd7f76ee2
--- /dev/null
+++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
@@ -0,0 +1,497 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SM4 Cipher Algorithm, AES-NI/AVX2 optimized.
+ * as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (C) 2018 Markku-Juhani O. Saarinen <mjos@iki.fi>
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at:
+ * https://github.com/mjosaarinen/sm4ni
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define rRIP (%rip)
+
+/* vector registers */
+#define RX0 %ymm0
+#define RX1 %ymm1
+#define MASK_4BIT %ymm2
+#define RTMP0 %ymm3
+#define RTMP1 %ymm4
+#define RTMP2 %ymm5
+#define RTMP3 %ymm6
+#define RTMP4 %ymm7
+
+#define RA0 %ymm8
+#define RA1 %ymm9
+#define RA2 %ymm10
+#define RA3 %ymm11
+
+#define RB0 %ymm12
+#define RB1 %ymm13
+#define RB2 %ymm14
+#define RB3 %ymm15
+
+#define RNOT %ymm0
+#define RBSWAP %ymm1
+
+#define RX0x %xmm0
+#define RX1x %xmm1
+#define MASK_4BITx %xmm2
+
+#define RNOTx %xmm0
+#define RBSWAPx %xmm1
+
+#define RTMP0x %xmm3
+#define RTMP1x %xmm4
+#define RTMP2x %xmm5
+#define RTMP3x %xmm6
+#define RTMP4x %xmm7
+
+
+/* helper macros */
+
+/* Transpose four 32-bit words between 128-bit vector lanes. */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x1, x0, x0; \
+ \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x2; \
+ \
+ vpunpckhqdq t1, x0, x1; \
+ vpunpcklqdq t1, x0, x0; \
+ \
+ vpunpckhqdq x2, t2, x3; \
+ vpunpcklqdq x2, t2, x2;
+
+/* post-SubByte transform. */
+#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpand x, mask4bit, tmp0; \
+ vpandn x, mask4bit, x; \
+ vpsrld $4, x, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by
+ * 'vaeslastenc' instruction. */
+#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpandn mask4bit, x, tmp0; \
+ vpsrld $4, x, x; \
+ vpand x, mask4bit, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+
+.section .rodata.cst164, "aM", @progbits, 164
+.align 16
+
+/*
+ * Following four affine transform look-up tables are from work by
+ * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
+ *
+ * These allow exposing SM4 S-Box from AES SubByte.
+ */
+
+/* pre-SubByte affine transform, from SM4 field to AES field. */
+.Lpre_tf_lo_s:
+ .quad 0x9197E2E474720701, 0xC7C1B4B222245157
+.Lpre_tf_hi_s:
+ .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012
+
+/* post-SubByte affine transform, from AES field to SM4 field. */
+.Lpost_tf_lo_s:
+ .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82
+.Lpost_tf_hi_s:
+ .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF
+
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+ .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+ .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+
+/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_8:
+ .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e
+ .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06
+
+/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_16:
+ .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01
+ .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09
+
+/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_24:
+ .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04
+ .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/* For input word byte-swap */
+.Lbswap32_mask:
+ .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+
+.align 4
+/* 4-bit mask */
+.L0f0f0f0f:
+ .long 0x0f0f0f0f
+
+.text
+.align 16
+
+.align 8
+SYM_FUNC_START_LOCAL(__sm4_crypt_blk16)
+ /* input:
+ * %rdi: round key array, CTX
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
+ * plaintext blocks
+ * output:
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
+ * ciphertext blocks
+ */
+ FRAME_BEGIN
+
+ vbroadcasti128 .Lbswap32_mask rRIP, RTMP2;
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+ vpshufb RTMP2, RB0, RB0;
+ vpshufb RTMP2, RB1, RB1;
+ vpshufb RTMP2, RB2, RB2;
+ vpshufb RTMP2, RB3, RB3;
+
+ vpbroadcastd .L0f0f0f0f rRIP, MASK_4BIT;
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+
+#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \
+ vpbroadcastd (4*(round))(%rdi), RX0; \
+ vbroadcasti128 .Lpre_tf_lo_s rRIP, RTMP4; \
+ vbroadcasti128 .Lpre_tf_hi_s rRIP, RTMP1; \
+ vmovdqa RX0, RX1; \
+ vpxor s1, RX0, RX0; \
+ vpxor s2, RX0, RX0; \
+ vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+ vbroadcasti128 .Lpost_tf_lo_s rRIP, RTMP2; \
+ vbroadcasti128 .Lpost_tf_hi_s rRIP, RTMP3; \
+ vpxor r1, RX1, RX1; \
+ vpxor r2, RX1, RX1; \
+ vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \
+ \
+ /* sbox, non-linear part */ \
+ transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+ transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+ vextracti128 $1, RX0, RTMP4x; \
+ vextracti128 $1, RX1, RTMP0x; \
+ vaesenclast MASK_4BITx, RX0x, RX0x; \
+ vaesenclast MASK_4BITx, RTMP4x, RTMP4x; \
+ vaesenclast MASK_4BITx, RX1x, RX1x; \
+ vaesenclast MASK_4BITx, RTMP0x, RTMP0x; \
+ vinserti128 $1, RTMP4x, RX0, RX0; \
+ vbroadcasti128 .Linv_shift_row rRIP, RTMP4; \
+ vinserti128 $1, RTMP0x, RX1, RX1; \
+ transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+ transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+ \
+ /* linear part */ \
+ vpshufb RTMP4, RX0, RTMP0; \
+ vpxor RTMP0, s0, s0; /* s0 ^ x */ \
+ vpshufb RTMP4, RX1, RTMP2; \
+ vbroadcasti128 .Linv_shift_row_rol_8 rRIP, RTMP4; \
+ vpxor RTMP2, r0, r0; /* r0 ^ x */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vbroadcasti128 .Linv_shift_row_rol_16 rRIP, RTMP4; \
+ vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vbroadcasti128 .Linv_shift_row_rol_24 rRIP, RTMP4; \
+ vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
+ vpslld $2, RTMP0, RTMP1; \
+ vpsrld $30, RTMP0, RTMP0; \
+ vpxor RTMP0, s0, s0; \
+ /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpxor RTMP1, s0, s0; \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \
+ vpslld $2, RTMP2, RTMP3; \
+ vpsrld $30, RTMP2, RTMP2; \
+ vpxor RTMP2, r0, r0; \
+ /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpxor RTMP3, r0, r0;
+
+ leaq (32*4)(%rdi), %rax;
+.align 16
+.Lroundloop_blk8:
+ ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3);
+ ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0);
+ ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1);
+ ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2);
+ leaq (4*4)(%rdi), %rdi;
+ cmpq %rax, %rdi;
+ jne .Lroundloop_blk8;
+
+#undef ROUND
+
+ vbroadcasti128 .Lbswap128_mask rRIP, RTMP2;
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+ vpshufb RTMP2, RB0, RB0;
+ vpshufb RTMP2, RB1, RB1;
+ vpshufb RTMP2, RB2, RB2;
+ vpshufb RTMP2, RB3, RB3;
+
+ FRAME_END
+ ret;
+SYM_FUNC_END(__sm4_crypt_blk16)
+
+#define inc_le128(x, minus_one, tmp) \
+ vpcmpeqq minus_one, x, tmp; \
+ vpsubq minus_one, x, x; \
+ vpslldq $8, tmp, tmp; \
+ vpsubq tmp, x, x;
+
+/*
+ * void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv (big endian, 128bit)
+ */
+ FRAME_BEGIN
+
+ movq 8(%rcx), %rax;
+ bswapq %rax;
+
+ vzeroupper;
+
+ vbroadcasti128 .Lbswap128_mask rRIP, RTMP3;
+ vpcmpeqd RNOT, RNOT, RNOT;
+ vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */
+ vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */
+
+ /* load IV and byteswap */
+ vmovdqu (%rcx), RTMP4x;
+ vpshufb RTMP3x, RTMP4x, RTMP4x;
+ vmovdqa RTMP4x, RTMP0x;
+ inc_le128(RTMP4x, RNOTx, RTMP1x);
+ vinserti128 $1, RTMP4x, RTMP0, RTMP0;
+ vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */
+
+ /* check need for handling 64-bit overflow and carry */
+ cmpq $(0xffffffffffffffff - 16), %rax;
+ ja .Lhandle_ctr_carry;
+
+ /* construct IVs */
+ vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */
+ vpshufb RTMP3, RTMP0, RA1;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */
+ vpshufb RTMP3, RTMP0, RA2;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */
+ vpshufb RTMP3, RTMP0, RA3;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */
+ vpshufb RTMP3, RTMP0, RB0;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */
+ vpshufb RTMP3, RTMP0, RB1;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */
+ vpshufb RTMP3, RTMP0, RB2;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */
+ vpshufb RTMP3, RTMP0, RB3;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +16 */
+ vpshufb RTMP3x, RTMP0x, RTMP0x;
+
+ jmp .Lctr_carry_done;
+
+.Lhandle_ctr_carry:
+ /* construct IVs */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vextracti128 $1, RTMP0, RTMP0x;
+ vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */
+
+.align 4
+.Lctr_carry_done:
+ /* store new IV */
+ vmovdqu RTMP0x, (%rcx);
+
+ call __sm4_crypt_blk16;
+
+ vpxor (0 * 32)(%rdx), RA0, RA0;
+ vpxor (1 * 32)(%rdx), RA1, RA1;
+ vpxor (2 * 32)(%rdx), RA2, RA2;
+ vpxor (3 * 32)(%rdx), RA3, RA3;
+ vpxor (4 * 32)(%rdx), RB0, RB0;
+ vpxor (5 * 32)(%rdx), RB1, RB1;
+ vpxor (6 * 32)(%rdx), RB2, RB2;
+ vpxor (7 * 32)(%rdx), RB3, RB3;
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA3, (3 * 32)(%rsi);
+ vmovdqu RB0, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB3, (7 * 32)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16)
+
+/*
+ * void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv
+ */
+ FRAME_BEGIN
+
+ vzeroupper;
+
+ vmovdqu (0 * 32)(%rdx), RA0;
+ vmovdqu (1 * 32)(%rdx), RA1;
+ vmovdqu (2 * 32)(%rdx), RA2;
+ vmovdqu (3 * 32)(%rdx), RA3;
+ vmovdqu (4 * 32)(%rdx), RB0;
+ vmovdqu (5 * 32)(%rdx), RB1;
+ vmovdqu (6 * 32)(%rdx), RB2;
+ vmovdqu (7 * 32)(%rdx), RB3;
+
+ call __sm4_crypt_blk16;
+
+ vmovdqu (%rcx), RNOTx;
+ vinserti128 $1, (%rdx), RNOT, RNOT;
+ vpxor RNOT, RA0, RA0;
+ vpxor (0 * 32 + 16)(%rdx), RA1, RA1;
+ vpxor (1 * 32 + 16)(%rdx), RA2, RA2;
+ vpxor (2 * 32 + 16)(%rdx), RA3, RA3;
+ vpxor (3 * 32 + 16)(%rdx), RB0, RB0;
+ vpxor (4 * 32 + 16)(%rdx), RB1, RB1;
+ vpxor (5 * 32 + 16)(%rdx), RB2, RB2;
+ vpxor (6 * 32 + 16)(%rdx), RB3, RB3;
+ vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+ vmovdqu RNOTx, (%rcx); /* store new IV */
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA3, (3 * 32)(%rsi);
+ vmovdqu RB0, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB3, (7 * 32)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16)
+
+/*
+ * void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv
+ */
+ FRAME_BEGIN
+
+ vzeroupper;
+
+ /* Load input */
+ vmovdqu (%rcx), RNOTx;
+ vinserti128 $1, (%rdx), RNOT, RA0;
+ vmovdqu (0 * 32 + 16)(%rdx), RA1;
+ vmovdqu (1 * 32 + 16)(%rdx), RA2;
+ vmovdqu (2 * 32 + 16)(%rdx), RA3;
+ vmovdqu (3 * 32 + 16)(%rdx), RB0;
+ vmovdqu (4 * 32 + 16)(%rdx), RB1;
+ vmovdqu (5 * 32 + 16)(%rdx), RB2;
+ vmovdqu (6 * 32 + 16)(%rdx), RB3;
+
+ /* Update IV */
+ vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+ vmovdqu RNOTx, (%rcx);
+
+ call __sm4_crypt_blk16;
+
+ vpxor (0 * 32)(%rdx), RA0, RA0;
+ vpxor (1 * 32)(%rdx), RA1, RA1;
+ vpxor (2 * 32)(%rdx), RA2, RA2;
+ vpxor (3 * 32)(%rdx), RA3, RA3;
+ vpxor (4 * 32)(%rdx), RB0, RB0;
+ vpxor (5 * 32)(%rdx), RB1, RB1;
+ vpxor (6 * 32)(%rdx), RB2, RB2;
+ vpxor (7 * 32)(%rdx), RB3, RB3;
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA3, (3 * 32)(%rsi);
+ vmovdqu RB0, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB3, (7 * 32)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx2_cfb_dec_blk16)
diff --git a/arch/x86/crypto/sm4-avx.h b/arch/x86/crypto/sm4-avx.h
new file mode 100644
index 000000000000..1bceab7516aa
--- /dev/null
+++ b/arch/x86/crypto/sm4-avx.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef ASM_X86_SM4_AVX_H
+#define ASM_X86_SM4_AVX_H
+
+#include <linux/types.h>
+#include <crypto/sm4.h>
+
+typedef void (*sm4_crypt_func)(const u32 *rk, u8 *dst, const u8 *src, u8 *iv);
+
+int sm4_avx_ecb_encrypt(struct skcipher_request *req);
+int sm4_avx_ecb_decrypt(struct skcipher_request *req);
+
+int sm4_cbc_encrypt(struct skcipher_request *req);
+int sm4_avx_cbc_decrypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func);
+
+int sm4_cfb_encrypt(struct skcipher_request *req);
+int sm4_avx_cfb_decrypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func);
+
+int sm4_avx_ctr_crypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func);
+
+#endif
diff --git a/arch/x86/crypto/sm4_aesni_avx2_glue.c b/arch/x86/crypto/sm4_aesni_avx2_glue.c
new file mode 100644
index 000000000000..84bc718f49a3
--- /dev/null
+++ b/arch/x86/crypto/sm4_aesni_avx2_glue.c
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 Cipher Algorithm, AES-NI/AVX2 optimized.
+ * as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (c) 2021, Alibaba Group.
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <asm/simd.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/sm4.h>
+#include "sm4-avx.h"
+
+#define SM4_CRYPT16_BLOCK_SIZE (SM4_BLOCK_SIZE * 16)
+
+asmlinkage void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+asmlinkage void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+asmlinkage void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+
+static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return sm4_expandkey(ctx, key, key_len);
+}
+
+static int cbc_decrypt(struct skcipher_request *req)
+{
+ return sm4_avx_cbc_decrypt(req, SM4_CRYPT16_BLOCK_SIZE,
+ sm4_aesni_avx2_cbc_dec_blk16);
+}
+
+
+static int cfb_decrypt(struct skcipher_request *req)
+{
+ return sm4_avx_cfb_decrypt(req, SM4_CRYPT16_BLOCK_SIZE,
+ sm4_aesni_avx2_cfb_dec_blk16);
+}
+
+static int ctr_crypt(struct skcipher_request *req)
+{
+ return sm4_avx_ctr_crypt(req, SM4_CRYPT16_BLOCK_SIZE,
+ sm4_aesni_avx2_ctr_enc_blk16);
+}
+
+static struct skcipher_alg sm4_aesni_avx2_skciphers[] = {
+ {
+ .base = {
+ .cra_name = "__ecb(sm4)",
+ .cra_driver_name = "__ecb-sm4-aesni-avx2",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .walksize = 16 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_avx_ecb_encrypt,
+ .decrypt = sm4_avx_ecb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__cbc(sm4)",
+ .cra_driver_name = "__cbc-sm4-aesni-avx2",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .walksize = 16 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__cfb(sm4)",
+ .cra_driver_name = "__cfb-sm4-aesni-avx2",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .walksize = 16 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_cfb_encrypt,
+ .decrypt = cfb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__ctr(sm4)",
+ .cra_driver_name = "__ctr-sm4-aesni-avx2",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .walksize = 16 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ }
+};
+
+static struct simd_skcipher_alg *
+simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)];
+
+static int __init sm4_init(void)
+{
+ const char *feature_name;
+
+ if (!boot_cpu_has(X86_FEATURE_AVX) ||
+ !boot_cpu_has(X86_FEATURE_AVX2) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+ pr_info("AVX2 or AES-NI instructions are not detected.\n");
+ return -ENODEV;
+ }
+
+ if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+ &feature_name)) {
+ pr_info("CPU feature '%s' is not supported.\n", feature_name);
+ return -ENODEV;
+ }
+
+ return simd_register_skciphers_compat(sm4_aesni_avx2_skciphers,
+ ARRAY_SIZE(sm4_aesni_avx2_skciphers),
+ simd_sm4_aesni_avx2_skciphers);
+}
+
+static void __exit sm4_exit(void)
+{
+ simd_unregister_skciphers(sm4_aesni_avx2_skciphers,
+ ARRAY_SIZE(sm4_aesni_avx2_skciphers),
+ simd_sm4_aesni_avx2_skciphers);
+}
+
+module_init(sm4_init);
+module_exit(sm4_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
+MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX2 optimized");
+MODULE_ALIAS_CRYPTO("sm4");
+MODULE_ALIAS_CRYPTO("sm4-aesni-avx2");
diff --git a/arch/x86/crypto/sm4_aesni_avx_glue.c b/arch/x86/crypto/sm4_aesni_avx_glue.c
new file mode 100644
index 000000000000..7800f77d68ad
--- /dev/null
+++ b/arch/x86/crypto/sm4_aesni_avx_glue.c
@@ -0,0 +1,487 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 Cipher Algorithm, AES-NI/AVX optimized.
+ * as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (c) 2021, Alibaba Group.
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <asm/simd.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/sm4.h>
+#include "sm4-avx.h"
+
+#define SM4_CRYPT8_BLOCK_SIZE (SM4_BLOCK_SIZE * 8)
+
+asmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
+ const u8 *src, int nblocks);
+asmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
+ const u8 *src, int nblocks);
+asmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+asmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+asmlinkage void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+
+static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return sm4_expandkey(ctx, key, key_len);
+}
+
+static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
+{
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ kernel_fpu_begin();
+ while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) {
+ sm4_aesni_avx_crypt8(rkey, dst, src, 8);
+ dst += SM4_CRYPT8_BLOCK_SIZE;
+ src += SM4_CRYPT8_BLOCK_SIZE;
+ nbytes -= SM4_CRYPT8_BLOCK_SIZE;
+ }
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ unsigned int nblocks = min(nbytes >> 4, 4u);
+ sm4_aesni_avx_crypt4(rkey, dst, src, nblocks);
+ dst += nblocks * SM4_BLOCK_SIZE;
+ src += nblocks * SM4_BLOCK_SIZE;
+ nbytes -= nblocks * SM4_BLOCK_SIZE;
+ }
+ kernel_fpu_end();
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+
+int sm4_avx_ecb_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return ecb_do_crypt(req, ctx->rkey_enc);
+}
+EXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt);
+
+int sm4_avx_ecb_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return ecb_do_crypt(req, ctx->rkey_dec);
+}
+EXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt);
+
+int sm4_cbc_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *iv = walk.iv;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE);
+ sm4_crypt_block(ctx->rkey_enc, dst, dst);
+ iv = dst;
+ src += SM4_BLOCK_SIZE;
+ dst += SM4_BLOCK_SIZE;
+ nbytes -= SM4_BLOCK_SIZE;
+ }
+ if (iv != walk.iv)
+ memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(sm4_cbc_encrypt);
+
+int sm4_avx_cbc_decrypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ kernel_fpu_begin();
+
+ while (nbytes >= bsize) {
+ func(ctx->rkey_dec, dst, src, walk.iv);
+ dst += bsize;
+ src += bsize;
+ nbytes -= bsize;
+ }
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ u8 keystream[SM4_BLOCK_SIZE * 8];
+ u8 iv[SM4_BLOCK_SIZE];
+ unsigned int nblocks = min(nbytes >> 4, 8u);
+ int i;
+
+ sm4_aesni_avx_crypt8(ctx->rkey_dec, keystream,
+ src, nblocks);
+
+ src += ((int)nblocks - 2) * SM4_BLOCK_SIZE;
+ dst += (nblocks - 1) * SM4_BLOCK_SIZE;
+ memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
+
+ for (i = nblocks - 1; i > 0; i--) {
+ crypto_xor_cpy(dst, src,
+ &keystream[i * SM4_BLOCK_SIZE],
+ SM4_BLOCK_SIZE);
+ src -= SM4_BLOCK_SIZE;
+ dst -= SM4_BLOCK_SIZE;
+ }
+ crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE);
+ memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
+ dst += nblocks * SM4_BLOCK_SIZE;
+ src += (nblocks + 1) * SM4_BLOCK_SIZE;
+ nbytes -= nblocks * SM4_BLOCK_SIZE;
+ }
+
+ kernel_fpu_end();
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt);
+
+static int cbc_decrypt(struct skcipher_request *req)
+{
+ return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
+ sm4_aesni_avx_cbc_dec_blk8);
+}
+
+int sm4_cfb_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+ const u8 *iv = walk.iv;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ sm4_crypt_block(ctx->rkey_enc, keystream, iv);
+ crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE);
+ iv = dst;
+ src += SM4_BLOCK_SIZE;
+ dst += SM4_BLOCK_SIZE;
+ nbytes -= SM4_BLOCK_SIZE;
+ }
+ if (iv != walk.iv)
+ memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ nbytes = 0;
+ }
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(sm4_cfb_encrypt);
+
+int sm4_avx_cfb_decrypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ kernel_fpu_begin();
+
+ while (nbytes >= bsize) {
+ func(ctx->rkey_enc, dst, src, walk.iv);
+ dst += bsize;
+ src += bsize;
+ nbytes -= bsize;
+ }
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ u8 keystream[SM4_BLOCK_SIZE * 8];
+ unsigned int nblocks = min(nbytes >> 4, 8u);
+
+ memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
+ if (nblocks > 1)
+ memcpy(&keystream[SM4_BLOCK_SIZE], src,
+ (nblocks - 1) * SM4_BLOCK_SIZE);
+ memcpy(walk.iv, src + (nblocks - 1) * SM4_BLOCK_SIZE,
+ SM4_BLOCK_SIZE);
+
+ sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
+ keystream, nblocks);
+
+ crypto_xor_cpy(dst, src, keystream,
+ nblocks * SM4_BLOCK_SIZE);
+ dst += nblocks * SM4_BLOCK_SIZE;
+ src += nblocks * SM4_BLOCK_SIZE;
+ nbytes -= nblocks * SM4_BLOCK_SIZE;
+ }
+
+ kernel_fpu_end();
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+
+ sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ nbytes = 0;
+ }
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(sm4_avx_cfb_decrypt);
+
+static int cfb_decrypt(struct skcipher_request *req)
+{
+ return sm4_avx_cfb_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
+ sm4_aesni_avx_cfb_dec_blk8);
+}
+
+int sm4_avx_ctr_crypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ kernel_fpu_begin();
+
+ while (nbytes >= bsize) {
+ func(ctx->rkey_enc, dst, src, walk.iv);
+ dst += bsize;
+ src += bsize;
+ nbytes -= bsize;
+ }
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ u8 keystream[SM4_BLOCK_SIZE * 8];
+ unsigned int nblocks = min(nbytes >> 4, 8u);
+ int i;
+
+ for (i = 0; i < nblocks; i++) {
+ memcpy(&keystream[i * SM4_BLOCK_SIZE],
+ walk.iv, SM4_BLOCK_SIZE);
+ crypto_inc(walk.iv, SM4_BLOCK_SIZE);
+ }
+ sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
+ keystream, nblocks);
+
+ crypto_xor_cpy(dst, src, keystream,
+ nblocks * SM4_BLOCK_SIZE);
+ dst += nblocks * SM4_BLOCK_SIZE;
+ src += nblocks * SM4_BLOCK_SIZE;
+ nbytes -= nblocks * SM4_BLOCK_SIZE;
+ }
+
+ kernel_fpu_end();
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+
+ memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
+ crypto_inc(walk.iv, SM4_BLOCK_SIZE);
+
+ sm4_crypt_block(ctx->rkey_enc, keystream, keystream);
+
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ dst += nbytes;
+ src += nbytes;
+ nbytes = 0;
+ }
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt);
+
+static int ctr_crypt(struct skcipher_request *req)
+{
+ return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE,
+ sm4_aesni_avx_ctr_enc_blk8);
+}
+
+static struct skcipher_alg sm4_aesni_avx_skciphers[] = {
+ {
+ .base = {
+ .cra_name = "__ecb(sm4)",
+ .cra_driver_name = "__ecb-sm4-aesni-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .walksize = 8 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_avx_ecb_encrypt,
+ .decrypt = sm4_avx_ecb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__cbc(sm4)",
+ .cra_driver_name = "__cbc-sm4-aesni-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .walksize = 8 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__cfb(sm4)",
+ .cra_driver_name = "__cfb-sm4-aesni-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .walksize = 8 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_cfb_encrypt,
+ .decrypt = cfb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__ctr(sm4)",
+ .cra_driver_name = "__ctr-sm4-aesni-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .walksize = 8 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ }
+};
+
+static struct simd_skcipher_alg *
+simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)];
+
+static int __init sm4_init(void)
+{
+ const char *feature_name;
+
+ if (!boot_cpu_has(X86_FEATURE_AVX) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+ pr_info("AVX or AES-NI instructions are not detected.\n");
+ return -ENODEV;
+ }
+
+ if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+ &feature_name)) {
+ pr_info("CPU feature '%s' is not supported.\n", feature_name);
+ return -ENODEV;
+ }
+
+ return simd_register_skciphers_compat(sm4_aesni_avx_skciphers,
+ ARRAY_SIZE(sm4_aesni_avx_skciphers),
+ simd_sm4_aesni_avx_skciphers);
+}
+
+static void __exit sm4_exit(void)
+{
+ simd_unregister_skciphers(sm4_aesni_avx_skciphers,
+ ARRAY_SIZE(sm4_aesni_avx_skciphers),
+ simd_sm4_aesni_avx_skciphers);
+}
+
+module_init(sm4_init);
+module_exit(sm4_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
+MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized");
+MODULE_ALIAS_CRYPTO("sm4");
+MODULE_ALIAS_CRYPTO("sm4-aesni-avx");
diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig
index 39d9ded9e25a..d6cdfe631674 100644
--- a/arch/x86/events/Kconfig
+++ b/arch/x86/events/Kconfig
@@ -34,4 +34,14 @@ config PERF_EVENTS_AMD_POWER
(CPUID Fn8000_0007_EDX[12]) interface to calculate the
average power consumption on Family 15h processors.
+config PERF_EVENTS_AMD_UNCORE
+ tristate "AMD Uncore performance events"
+ depends on PERF_EVENTS && CPU_SUP_AMD
+ default y
+ help
+ Include support for AMD uncore performance events for use with
+ e.g., perf stat -e amd_l3/.../,amd_df/.../.
+
+ To compile this driver as a module, choose M here: the
+ module will be called 'amd-uncore'.
endmenu
diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile
index fe8795a67385..6cbe38d5fd9d 100644
--- a/arch/x86/events/amd/Makefile
+++ b/arch/x86/events/amd/Makefile
@@ -1,8 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_CPU_SUP_AMD) += core.o uncore.o
+obj-$(CONFIG_CPU_SUP_AMD) += core.o
obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o
obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o
+obj-$(CONFIG_PERF_EVENTS_AMD_UNCORE) += amd-uncore.o
+amd-uncore-objs := uncore.o
ifdef CONFIG_AMD_IOMMU
obj-$(CONFIG_CPU_SUP_AMD) += iommu.o
endif
-
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 40669eac9d6d..9739019d4b67 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -26,6 +26,7 @@ static u32 ibs_caps;
#include <linux/hardirq.h>
#include <asm/nmi.h>
+#include <asm/amd-ibs.h>
#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
@@ -90,6 +91,7 @@ struct perf_ibs {
unsigned long offset_mask[1];
int offset_max;
unsigned int fetch_count_reset_broken : 1;
+ unsigned int fetch_ignore_if_zero_rip : 1;
struct cpu_perf_ibs __percpu *pcpu;
struct attribute **format_attrs;
@@ -99,15 +101,6 @@ struct perf_ibs {
u64 (*get_count)(u64 config);
};
-struct perf_ibs_data {
- u32 size;
- union {
- u32 data[0]; /* data buffer starts here */
- u32 caps;
- };
- u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
-};
-
static int
perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
{
@@ -328,11 +321,14 @@ static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
static u64 get_ibs_fetch_count(u64 config)
{
- return (config & IBS_FETCH_CNT) >> 12;
+ union ibs_fetch_ctl fetch_ctl = (union ibs_fetch_ctl)config;
+
+ return fetch_ctl.fetch_cnt << 4;
}
static u64 get_ibs_op_count(u64 config)
{
+ union ibs_op_ctl op_ctl = (union ibs_op_ctl)config;
u64 count = 0;
/*
@@ -340,12 +336,12 @@ static u64 get_ibs_op_count(u64 config)
* and the lower 7 bits of CurCnt are randomized.
* Otherwise CurCnt has the full 27-bit current counter value.
*/
- if (config & IBS_OP_VAL) {
- count = (config & IBS_OP_MAX_CNT) << 4;
+ if (op_ctl.op_val) {
+ count = op_ctl.opmaxcnt << 4;
if (ibs_caps & IBS_CAPS_OPCNTEXT)
- count += config & IBS_OP_MAX_CNT_EXT_MASK;
+ count += op_ctl.opmaxcnt_ext << 20;
} else if (ibs_caps & IBS_CAPS_RDWROPCNT) {
- count = (config & IBS_OP_CUR_CNT) >> 32;
+ count = op_ctl.opcurcnt;
}
return count;
@@ -570,6 +566,7 @@ static struct perf_ibs perf_ibs_op = {
.start = perf_ibs_start,
.stop = perf_ibs_stop,
.read = perf_ibs_read,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
},
.msr = MSR_AMD64_IBSOPCTL,
.config_mask = IBS_OP_CONFIG_MASK,
@@ -672,6 +669,10 @@ fail:
if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
regs.flags &= ~PERF_EFLAGS_EXACT;
} else {
+ /* Workaround for erratum #1197 */
+ if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1]))
+ goto out;
+
set_linear_ip(&regs, ibs_data.regs[1]);
regs.flags |= PERF_EFLAGS_EXACT;
}
@@ -769,6 +770,9 @@ static __init void perf_event_ibs_init(void)
if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
perf_ibs_fetch.fetch_count_reset_broken = 1;
+ if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10)
+ perf_ibs_fetch.fetch_ignore_if_zero_rip = 1;
+
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
if (ibs_caps & IBS_CAPS_OPCNT) {
diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
index 16a2369c586e..37d5b380516e 100644
--- a/arch/x86/events/amd/power.c
+++ b/arch/x86/events/amd/power.c
@@ -213,6 +213,7 @@ static struct pmu pmu_class = {
.stop = pmu_event_stop,
.read = pmu_event_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .module = THIS_MODULE,
};
static int power_cpu_exit(unsigned int cpu)
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 582c0ffb5e98..0d04414b97d2 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -12,11 +12,11 @@
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
+#include <linux/cpufeature.h>
+#include <linux/smp.h>
-#include <asm/cpufeature.h>
#include <asm/perf_event.h>
#include <asm/msr.h>
-#include <asm/smp.h>
#define NUM_COUNTERS_NB 4
#define NUM_COUNTERS_L2 4
@@ -347,6 +347,7 @@ static struct pmu amd_nb_pmu = {
.stop = amd_uncore_stop,
.read = amd_uncore_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+ .module = THIS_MODULE,
};
static struct pmu amd_llc_pmu = {
@@ -360,6 +361,7 @@ static struct pmu amd_llc_pmu = {
.stop = amd_uncore_stop,
.read = amd_uncore_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+ .module = THIS_MODULE,
};
static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
@@ -452,7 +454,7 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
if (amd_uncore_llc) {
uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
- uncore->id = per_cpu(cpu_llc_id, cpu);
+ uncore->id = get_llc_id(cpu);
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
@@ -659,12 +661,34 @@ fail_prep:
fail_llc:
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
perf_pmu_unregister(&amd_nb_pmu);
- if (amd_uncore_llc)
- free_percpu(amd_uncore_llc);
+ free_percpu(amd_uncore_llc);
fail_nb:
- if (amd_uncore_nb)
- free_percpu(amd_uncore_nb);
+ free_percpu(amd_uncore_nb);
return ret;
}
-device_initcall(amd_uncore_init);
+
+static void __exit amd_uncore_exit(void)
+{
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE);
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
+ cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
+
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
+ perf_pmu_unregister(&amd_llc_pmu);
+ free_percpu(amd_uncore_llc);
+ amd_uncore_llc = NULL;
+ }
+
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
+ perf_pmu_unregister(&amd_nb_pmu);
+ free_percpu(amd_uncore_nb);
+ amd_uncore_nb = NULL;
+ }
+}
+
+module_init(amd_uncore_init);
+module_exit(amd_uncore_exit);
+
+MODULE_DESCRIPTION("AMD Uncore Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 3092fbf9dbe4..2a57dbed4894 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1087,10 +1087,8 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
* validate an event group (assign == NULL)
*/
if (!unsched && assign) {
- for (i = 0; i < n; i++) {
- e = cpuc->event_list[i];
+ for (i = 0; i < n; i++)
static_call_cond(x86_pmu_commit_scheduling)(cpuc, i, assign[i]);
- }
} else {
for (i = n0; i < n; i++) {
e = cpuc->event_list[i];
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index ac6fd2dabf6a..7011e87be6d0 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -5032,9 +5032,9 @@ static ssize_t freeze_on_smi_store(struct device *cdev,
x86_pmu.attr_freeze_on_smi = val;
- get_online_cpus();
+ cpus_read_lock();
on_each_cpu(flip_smm_bit, &val, 1);
- put_online_cpus();
+ cpus_read_unlock();
done:
mutex_unlock(&freeze_on_smi_mutex);
@@ -5077,9 +5077,9 @@ static ssize_t set_sysctl_tfa(struct device *cdev,
allow_tsx_force_abort = val;
- get_online_cpus();
+ cpus_read_lock();
on_each_cpu(update_tfa_sched, NULL, 1);
- put_online_cpus();
+ cpus_read_unlock();
return count;
}
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 915847655c06..7f406c14715f 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -62,7 +62,7 @@ static struct pt_cap_desc {
PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)),
PT_CAP(output_subsys, 0, CPUID_ECX, BIT(3)),
PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)),
- PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x3),
+ PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x7),
PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000),
PT_CAP(cycle_thresholds, 1, CPUID_EBX, 0xffff),
PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000),
@@ -1708,7 +1708,7 @@ static __init int pt_init(void)
if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
return -ENODEV;
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu) {
u64 ctl;
@@ -1716,7 +1716,7 @@ static __init int pt_init(void)
if (!ret && (ctl & RTIT_CTL_TRACEEN))
prior_warn++;
}
- put_online_cpus();
+ cpus_read_unlock();
if (prior_warn) {
x86_add_exclusive(x86_lbr_exclusive_pt);
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 9bf4dbbc26e2..c72e368dd164 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -842,6 +842,18 @@ static const struct attribute_group uncore_pmu_attr_group = {
.attrs = uncore_pmu_attrs,
};
+void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
+{
+ struct intel_uncore_type *type = pmu->type;
+
+ if (type->num_boxes == 1)
+ sprintf(pmu_name, "uncore_type_%u", type->type_id);
+ else {
+ sprintf(pmu_name, "uncore_type_%u_%d",
+ type->type_id, type->box_ids[pmu->pmu_idx]);
+ }
+}
+
static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
{
struct intel_uncore_type *type = pmu->type;
@@ -851,12 +863,7 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
* Use uncore_type_&typeid_&boxid as name.
*/
if (!type->name) {
- if (type->num_boxes == 1)
- sprintf(pmu->name, "uncore_type_%u", type->type_id);
- else {
- sprintf(pmu->name, "uncore_type_%u_%d",
- type->type_id, type->box_ids[pmu->pmu_idx]);
- }
+ uncore_get_alias_name(pmu->name, pmu);
return;
}
@@ -865,9 +872,13 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
sprintf(pmu->name, "uncore_%s", type->name);
else
sprintf(pmu->name, "uncore");
- } else
- sprintf(pmu->name, "uncore_%s_%d", type->name, pmu->pmu_idx);
-
+ } else {
+ /*
+ * Use the box ID from the discovery table if applicable.
+ */
+ sprintf(pmu->name, "uncore_%s_%d", type->name,
+ type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx);
+ }
}
static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
@@ -1663,6 +1674,7 @@ struct intel_uncore_init_fun {
void (*cpu_init)(void);
int (*pci_init)(void);
void (*mmio_init)(void);
+ bool use_discovery;
};
static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
@@ -1765,6 +1777,13 @@ static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
.mmio_init = snr_uncore_mmio_init,
};
+static const struct intel_uncore_init_fun spr_uncore_init __initconst = {
+ .cpu_init = spr_uncore_cpu_init,
+ .pci_init = spr_uncore_pci_init,
+ .mmio_init = spr_uncore_mmio_init,
+ .use_discovery = true,
+};
+
static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
.cpu_init = intel_uncore_generic_uncore_cpu_init,
.pci_init = intel_uncore_generic_uncore_pci_init,
@@ -1809,6 +1828,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
{},
};
@@ -1832,8 +1852,13 @@ static int __init intel_uncore_init(void)
uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
else
return -ENODEV;
- } else
+ } else {
uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
+ if (uncore_no_discover && uncore_init->use_discovery)
+ return -ENODEV;
+ if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables())
+ return -ENODEV;
+ }
if (uncore_init->pci_init) {
pret = uncore_init->pci_init();
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 187d7287039c..b9687980aab6 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -561,6 +561,7 @@ struct event_constraint *
uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event);
void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event);
u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
+void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu);
extern struct intel_uncore_type *empty_uncore[];
extern struct intel_uncore_type **uncore_msr_uncores;
@@ -608,6 +609,9 @@ void snr_uncore_mmio_init(void);
int icx_uncore_pci_init(void);
void icx_uncore_cpu_init(void);
void icx_uncore_mmio_init(void);
+int spr_uncore_pci_init(void);
+void spr_uncore_cpu_init(void);
+void spr_uncore_mmio_init(void);
/* uncore_nhmex.c */
void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index aba9bff95413..3049c646fa20 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -337,17 +337,17 @@ static const struct attribute_group generic_uncore_format_group = {
.attrs = generic_uncore_formats_attr,
};
-static void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box)
+void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box)
{
wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_INT);
}
-static void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box)
+void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box)
{
wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ);
}
-static void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box)
+void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box)
{
wrmsrl(uncore_msr_box_ctl(box), 0);
}
@@ -377,7 +377,7 @@ static struct intel_uncore_ops generic_uncore_msr_ops = {
.read_counter = uncore_msr_read_counter,
};
-static void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box)
+void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
int box_ctl = uncore_pci_box_ctl(box);
@@ -386,7 +386,7 @@ static void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box)
pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_INT);
}
-static void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box)
+void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
int box_ctl = uncore_pci_box_ctl(box);
@@ -394,7 +394,7 @@ static void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box)
pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_FRZ);
}
-static void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box)
+void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
int box_ctl = uncore_pci_box_ctl(box);
@@ -411,8 +411,8 @@ static void intel_generic_uncore_pci_enable_event(struct intel_uncore_box *box,
pci_write_config_dword(pdev, hwc->config_base, hwc->config);
}
-static void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box,
- struct perf_event *event)
+void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
{
struct pci_dev *pdev = box->pci_dev;
struct hw_perf_event *hwc = &event->hw;
@@ -420,8 +420,8 @@ static void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box,
pci_write_config_dword(pdev, hwc->config_base, 0);
}
-static u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box,
- struct perf_event *event)
+u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box,
+ struct perf_event *event)
{
struct pci_dev *pdev = box->pci_dev;
struct hw_perf_event *hwc = &event->hw;
@@ -454,7 +454,7 @@ static unsigned int generic_uncore_mmio_box_ctl(struct intel_uncore_box *box)
return type->box_ctls[box->dieid] + type->mmio_offsets[box->pmu->pmu_idx];
}
-static void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box)
+void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box)
{
unsigned int box_ctl = generic_uncore_mmio_box_ctl(box);
struct intel_uncore_type *type = box->pmu->type;
@@ -478,7 +478,7 @@ static void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box)
writel(GENERIC_PMON_BOX_CTL_INT, box->io_addr);
}
-static void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box)
+void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box)
{
if (!box->io_addr)
return;
@@ -486,7 +486,7 @@ static void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box)
writel(GENERIC_PMON_BOX_CTL_FRZ, box->io_addr);
}
-static void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box)
+void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box)
{
if (!box->io_addr)
return;
@@ -505,8 +505,8 @@ static void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
writel(hwc->config, box->io_addr + hwc->config_base);
}
-static void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box,
- struct perf_event *event)
+void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -568,8 +568,8 @@ static bool uncore_update_uncore_type(enum uncore_access_type type_id,
return true;
}
-static struct intel_uncore_type **
-intel_uncore_generic_init_uncores(enum uncore_access_type type_id)
+struct intel_uncore_type **
+intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra)
{
struct intel_uncore_discovery_type *type;
struct intel_uncore_type **uncores;
@@ -577,7 +577,7 @@ intel_uncore_generic_init_uncores(enum uncore_access_type type_id)
struct rb_node *node;
int i = 0;
- uncores = kcalloc(num_discovered_types[type_id] + 1,
+ uncores = kcalloc(num_discovered_types[type_id] + num_extra + 1,
sizeof(struct intel_uncore_type *), GFP_KERNEL);
if (!uncores)
return empty_uncore;
@@ -606,17 +606,17 @@ intel_uncore_generic_init_uncores(enum uncore_access_type type_id)
void intel_uncore_generic_uncore_cpu_init(void)
{
- uncore_msr_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MSR);
+ uncore_msr_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MSR, 0);
}
int intel_uncore_generic_uncore_pci_init(void)
{
- uncore_pci_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_PCI);
+ uncore_pci_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_PCI, 0);
return 0;
}
void intel_uncore_generic_uncore_mmio_init(void)
{
- uncore_mmio_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MMIO);
+ uncore_mmio_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MMIO, 0);
}
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index 1d652939a01c..7280c8a3c831 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -129,3 +129,24 @@ void intel_uncore_clear_discovery_tables(void);
void intel_uncore_generic_uncore_cpu_init(void);
int intel_uncore_generic_uncore_pci_init(void);
void intel_uncore_generic_uncore_mmio_init(void);
+
+void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box);
+void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box);
+void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box);
+
+void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box);
+void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box);
+void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box);
+void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event);
+
+void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box);
+void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box);
+void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box);
+void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event);
+u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box,
+ struct perf_event *event);
+
+struct intel_uncore_type **
+intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra);
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 609c24aec71a..5ddc0f30db6f 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* SandyBridge-EP/IvyTown uncore support */
#include "uncore.h"
+#include "uncore_discovery.h"
/* SNB-EP pci bus to socket mapping */
#define SNBEP_CPUNODEID 0x40
@@ -454,6 +455,17 @@
#define ICX_NUMBER_IMC_CHN 2
#define ICX_IMC_MEM_STRIDE 0x4
+/* SPR */
+#define SPR_RAW_EVENT_MASK_EXT 0xffffff
+
+/* SPR CHA */
+#define SPR_CHA_PMON_CTL_TID_EN (1 << 16)
+#define SPR_CHA_PMON_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
+ SPR_CHA_PMON_CTL_TID_EN)
+#define SPR_CHA_PMON_BOX_FILTER_TID 0x3ff
+
+#define SPR_C0_MSR_PMON_BOX_FILTER0 0x200e
+
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6");
DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
@@ -466,6 +478,7 @@ DEFINE_UNCORE_FORMAT_ATTR(umask_ext4, umask, "config:8-15,32-55");
DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
+DEFINE_UNCORE_FORMAT_ATTR(tid_en2, tid_en, "config:16");
DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
DEFINE_UNCORE_FORMAT_ATTR(thresh9, thresh, "config:24-35");
DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
@@ -3838,26 +3851,32 @@ clear_attr_update:
return ret;
}
-static int skx_iio_set_mapping(struct intel_uncore_type *type)
-{
- return pmu_iio_set_mapping(type, &skx_iio_mapping_group);
-}
-
-static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
+static void
+pmu_iio_cleanup_mapping(struct intel_uncore_type *type, struct attribute_group *ag)
{
- struct attribute **attr = skx_iio_mapping_group.attrs;
+ struct attribute **attr = ag->attrs;
if (!attr)
return;
for (; *attr; attr++)
kfree((*attr)->name);
- kfree(attr_to_ext_attr(*skx_iio_mapping_group.attrs));
- kfree(skx_iio_mapping_group.attrs);
- skx_iio_mapping_group.attrs = NULL;
+ kfree(attr_to_ext_attr(*ag->attrs));
+ kfree(ag->attrs);
+ ag->attrs = NULL;
kfree(type->topology);
}
+static int skx_iio_set_mapping(struct intel_uncore_type *type)
+{
+ return pmu_iio_set_mapping(type, &skx_iio_mapping_group);
+}
+
+static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
+{
+ pmu_iio_cleanup_mapping(type, &skx_iio_mapping_group);
+}
+
static struct intel_uncore_type skx_uncore_iio = {
.name = "iio",
.num_counters = 4,
@@ -4501,6 +4520,11 @@ static int snr_iio_set_mapping(struct intel_uncore_type *type)
return pmu_iio_set_mapping(type, &snr_iio_mapping_group);
}
+static void snr_iio_cleanup_mapping(struct intel_uncore_type *type)
+{
+ pmu_iio_cleanup_mapping(type, &snr_iio_mapping_group);
+}
+
static struct intel_uncore_type snr_uncore_iio = {
.name = "iio",
.num_counters = 4,
@@ -4517,7 +4541,7 @@ static struct intel_uncore_type snr_uncore_iio = {
.attr_update = snr_iio_attr_update,
.get_topology = snr_iio_get_topology,
.set_mapping = snr_iio_set_mapping,
- .cleanup_mapping = skx_iio_cleanup_mapping,
+ .cleanup_mapping = snr_iio_cleanup_mapping,
};
static struct intel_uncore_type snr_uncore_irp = {
@@ -4783,13 +4807,15 @@ int snr_uncore_pci_init(void)
return 0;
}
-static struct pci_dev *snr_uncore_get_mc_dev(int id)
+#define SNR_MC_DEVICE_ID 0x3451
+
+static struct pci_dev *snr_uncore_get_mc_dev(unsigned int device, int id)
{
struct pci_dev *mc_dev = NULL;
int pkg;
while (1) {
- mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3451, mc_dev);
+ mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, mc_dev);
if (!mc_dev)
break;
pkg = uncore_pcibus_to_dieid(mc_dev->bus);
@@ -4799,19 +4825,20 @@ static struct pci_dev *snr_uncore_get_mc_dev(int id)
return mc_dev;
}
-static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box,
- unsigned int box_ctl, int mem_offset)
+static int snr_uncore_mmio_map(struct intel_uncore_box *box,
+ unsigned int box_ctl, int mem_offset,
+ unsigned int device)
{
- struct pci_dev *pdev = snr_uncore_get_mc_dev(box->dieid);
+ struct pci_dev *pdev = snr_uncore_get_mc_dev(device, box->dieid);
struct intel_uncore_type *type = box->pmu->type;
resource_size_t addr;
u32 pci_dword;
if (!pdev)
- return;
+ return -ENODEV;
pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword);
- addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23;
+ addr = ((resource_size_t)pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23;
pci_read_config_dword(pdev, mem_offset, &pci_dword);
addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12;
@@ -4821,16 +4848,25 @@ static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box,
box->io_addr = ioremap(addr, type->mmio_map_size);
if (!box->io_addr) {
pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
- return;
+ return -EINVAL;
}
- writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr);
+ return 0;
+}
+
+static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box,
+ unsigned int box_ctl, int mem_offset,
+ unsigned int device)
+{
+ if (!snr_uncore_mmio_map(box, box_ctl, mem_offset, device))
+ writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr);
}
static void snr_uncore_mmio_init_box(struct intel_uncore_box *box)
{
__snr_uncore_mmio_init_box(box, uncore_mmio_box_ctl(box),
- SNR_IMC_MMIO_MEM0_OFFSET);
+ SNR_IMC_MMIO_MEM0_OFFSET,
+ SNR_MC_DEVICE_ID);
}
static void snr_uncore_mmio_disable_box(struct intel_uncore_box *box)
@@ -5092,6 +5128,11 @@ static int icx_iio_set_mapping(struct intel_uncore_type *type)
return pmu_iio_set_mapping(type, &icx_iio_mapping_group);
}
+static void icx_iio_cleanup_mapping(struct intel_uncore_type *type)
+{
+ pmu_iio_cleanup_mapping(type, &icx_iio_mapping_group);
+}
+
static struct intel_uncore_type icx_uncore_iio = {
.name = "iio",
.num_counters = 4,
@@ -5109,7 +5150,7 @@ static struct intel_uncore_type icx_uncore_iio = {
.attr_update = icx_iio_attr_update,
.get_topology = icx_iio_get_topology,
.set_mapping = icx_iio_set_mapping,
- .cleanup_mapping = skx_iio_cleanup_mapping,
+ .cleanup_mapping = icx_iio_cleanup_mapping,
};
static struct intel_uncore_type icx_uncore_irp = {
@@ -5405,7 +5446,8 @@ static void icx_uncore_imc_init_box(struct intel_uncore_box *box)
int mem_offset = (box->pmu->pmu_idx / ICX_NUMBER_IMC_CHN) * ICX_IMC_MEM_STRIDE +
SNR_IMC_MMIO_MEM0_OFFSET;
- __snr_uncore_mmio_init_box(box, box_ctl, mem_offset);
+ __snr_uncore_mmio_init_box(box, box_ctl, mem_offset,
+ SNR_MC_DEVICE_ID);
}
static struct intel_uncore_ops icx_uncore_mmio_ops = {
@@ -5475,7 +5517,8 @@ static void icx_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
int mem_offset = box->pmu->pmu_idx * ICX_IMC_MEM_STRIDE +
SNR_IMC_MMIO_MEM0_OFFSET;
- __snr_uncore_mmio_init_box(box, uncore_mmio_box_ctl(box), mem_offset);
+ snr_uncore_mmio_map(box, uncore_mmio_box_ctl(box),
+ mem_offset, SNR_MC_DEVICE_ID);
}
static struct intel_uncore_ops icx_uncore_imc_freerunning_ops = {
@@ -5509,3 +5552,497 @@ void icx_uncore_mmio_init(void)
}
/* end of ICX uncore support */
+
+/* SPR uncore support */
+
+static void spr_uncore_msr_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE)
+ wrmsrl(reg1->reg, reg1->config);
+
+ wrmsrl(hwc->config_base, hwc->config);
+}
+
+static void spr_uncore_msr_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE)
+ wrmsrl(reg1->reg, 0);
+
+ wrmsrl(hwc->config_base, 0);
+}
+
+static int spr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ bool tie_en = !!(event->hw.config & SPR_CHA_PMON_CTL_TID_EN);
+ struct intel_uncore_type *type = box->pmu->type;
+
+ if (tie_en) {
+ reg1->reg = SPR_C0_MSR_PMON_BOX_FILTER0 +
+ HSWEP_CBO_MSR_OFFSET * type->box_ids[box->pmu->pmu_idx];
+ reg1->config = event->attr.config1 & SPR_CHA_PMON_BOX_FILTER_TID;
+ reg1->idx = 0;
+ }
+
+ return 0;
+}
+
+static struct intel_uncore_ops spr_uncore_chabox_ops = {
+ .init_box = intel_generic_uncore_msr_init_box,
+ .disable_box = intel_generic_uncore_msr_disable_box,
+ .enable_box = intel_generic_uncore_msr_enable_box,
+ .disable_event = spr_uncore_msr_disable_event,
+ .enable_event = spr_uncore_msr_enable_event,
+ .read_counter = uncore_msr_read_counter,
+ .hw_config = spr_cha_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
+static struct attribute *spr_uncore_cha_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask_ext4.attr,
+ &format_attr_tid_en2.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_filter_tid5.attr,
+ NULL,
+};
+static const struct attribute_group spr_uncore_chabox_format_group = {
+ .name = "format",
+ .attrs = spr_uncore_cha_formats_attr,
+};
+
+static ssize_t alias_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(dev);
+ char pmu_name[UNCORE_PMU_NAME_LEN];
+
+ uncore_get_alias_name(pmu_name, pmu);
+ return sysfs_emit(buf, "%s\n", pmu_name);
+}
+
+static DEVICE_ATTR_RO(alias);
+
+static struct attribute *uncore_alias_attrs[] = {
+ &dev_attr_alias.attr,
+ NULL
+};
+
+ATTRIBUTE_GROUPS(uncore_alias);
+
+static struct intel_uncore_type spr_uncore_chabox = {
+ .name = "cha",
+ .event_mask = SPR_CHA_PMON_EVENT_MASK,
+ .event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
+ .num_shared_regs = 1,
+ .ops = &spr_uncore_chabox_ops,
+ .format_group = &spr_uncore_chabox_format_group,
+ .attr_update = uncore_alias_groups,
+};
+
+static struct intel_uncore_type spr_uncore_iio = {
+ .name = "iio",
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT,
+ .format_group = &snr_uncore_iio_format_group,
+ .attr_update = uncore_alias_groups,
+};
+
+static struct attribute *spr_uncore_raw_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask_ext4.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static const struct attribute_group spr_uncore_raw_format_group = {
+ .name = "format",
+ .attrs = spr_uncore_raw_formats_attr,
+};
+
+#define SPR_UNCORE_COMMON_FORMAT() \
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \
+ .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, \
+ .format_group = &spr_uncore_raw_format_group, \
+ .attr_update = uncore_alias_groups
+
+static struct intel_uncore_type spr_uncore_irp = {
+ SPR_UNCORE_COMMON_FORMAT(),
+ .name = "irp",
+
+};
+
+static struct intel_uncore_type spr_uncore_m2pcie = {
+ SPR_UNCORE_COMMON_FORMAT(),
+ .name = "m2pcie",
+};
+
+static struct intel_uncore_type spr_uncore_pcu = {
+ .name = "pcu",
+ .attr_update = uncore_alias_groups,
+};
+
+static void spr_uncore_mmio_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!box->io_addr)
+ return;
+
+ if (uncore_pmc_fixed(hwc->idx))
+ writel(SNBEP_PMON_CTL_EN, box->io_addr + hwc->config_base);
+ else
+ writel(hwc->config, box->io_addr + hwc->config_base);
+}
+
+static struct intel_uncore_ops spr_uncore_mmio_ops = {
+ .init_box = intel_generic_uncore_mmio_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .disable_box = intel_generic_uncore_mmio_disable_box,
+ .enable_box = intel_generic_uncore_mmio_enable_box,
+ .disable_event = intel_generic_uncore_mmio_disable_event,
+ .enable_event = spr_uncore_mmio_enable_event,
+ .read_counter = uncore_mmio_read_counter,
+};
+
+static struct intel_uncore_type spr_uncore_imc = {
+ SPR_UNCORE_COMMON_FORMAT(),
+ .name = "imc",
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
+ .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
+ .ops = &spr_uncore_mmio_ops,
+};
+
+static void spr_uncore_pci_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ pci_write_config_dword(pdev, hwc->config_base + 4, (u32)(hwc->config >> 32));
+ pci_write_config_dword(pdev, hwc->config_base, (u32)hwc->config);
+}
+
+static struct intel_uncore_ops spr_uncore_pci_ops = {
+ .init_box = intel_generic_uncore_pci_init_box,
+ .disable_box = intel_generic_uncore_pci_disable_box,
+ .enable_box = intel_generic_uncore_pci_enable_box,
+ .disable_event = intel_generic_uncore_pci_disable_event,
+ .enable_event = spr_uncore_pci_enable_event,
+ .read_counter = intel_generic_uncore_pci_read_counter,
+};
+
+#define SPR_UNCORE_PCI_COMMON_FORMAT() \
+ SPR_UNCORE_COMMON_FORMAT(), \
+ .ops = &spr_uncore_pci_ops
+
+static struct intel_uncore_type spr_uncore_m2m = {
+ SPR_UNCORE_PCI_COMMON_FORMAT(),
+ .name = "m2m",
+};
+
+static struct intel_uncore_type spr_uncore_upi = {
+ SPR_UNCORE_PCI_COMMON_FORMAT(),
+ .name = "upi",
+};
+
+static struct intel_uncore_type spr_uncore_m3upi = {
+ SPR_UNCORE_PCI_COMMON_FORMAT(),
+ .name = "m3upi",
+};
+
+static struct intel_uncore_type spr_uncore_mdf = {
+ SPR_UNCORE_COMMON_FORMAT(),
+ .name = "mdf",
+};
+
+#define UNCORE_SPR_NUM_UNCORE_TYPES 12
+#define UNCORE_SPR_IIO 1
+#define UNCORE_SPR_IMC 6
+
+static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = {
+ &spr_uncore_chabox,
+ &spr_uncore_iio,
+ &spr_uncore_irp,
+ &spr_uncore_m2pcie,
+ &spr_uncore_pcu,
+ NULL,
+ &spr_uncore_imc,
+ &spr_uncore_m2m,
+ &spr_uncore_upi,
+ &spr_uncore_m3upi,
+ NULL,
+ &spr_uncore_mdf,
+};
+
+enum perf_uncore_spr_iio_freerunning_type_id {
+ SPR_IIO_MSR_IOCLK,
+ SPR_IIO_MSR_BW_IN,
+ SPR_IIO_MSR_BW_OUT,
+
+ SPR_IIO_FREERUNNING_TYPE_MAX,
+};
+
+static struct freerunning_counters spr_iio_freerunning[] = {
+ [SPR_IIO_MSR_IOCLK] = { 0x340e, 0x1, 0x10, 1, 48 },
+ [SPR_IIO_MSR_BW_IN] = { 0x3800, 0x1, 0x10, 8, 48 },
+ [SPR_IIO_MSR_BW_OUT] = { 0x3808, 0x1, 0x10, 8, 48 },
+};
+
+static struct uncore_event_desc spr_uncore_iio_freerunning_events[] = {
+ /* Free-Running IIO CLOCKS Counter */
+ INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"),
+ /* Free-Running IIO BANDWIDTH IN Counters */
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"),
+ /* Free-Running IIO BANDWIDTH OUT Counters */
+ INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x30"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x31"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x32"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x33"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port4, "event=0xff,umask=0x34"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port4.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port4.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port5, "event=0xff,umask=0x35"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port5.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port5.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port6, "event=0xff,umask=0x36"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port6.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port6.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port7, "event=0xff,umask=0x37"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port7.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port7.unit, "MiB"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type spr_uncore_iio_free_running = {
+ .name = "iio_free_running",
+ .num_counters = 17,
+ .num_freerunning_types = SPR_IIO_FREERUNNING_TYPE_MAX,
+ .freerunning = spr_iio_freerunning,
+ .ops = &skx_uncore_iio_freerunning_ops,
+ .event_descs = spr_uncore_iio_freerunning_events,
+ .format_group = &skx_uncore_iio_freerunning_format_group,
+};
+
+enum perf_uncore_spr_imc_freerunning_type_id {
+ SPR_IMC_DCLK,
+ SPR_IMC_PQ_CYCLES,
+
+ SPR_IMC_FREERUNNING_TYPE_MAX,
+};
+
+static struct freerunning_counters spr_imc_freerunning[] = {
+ [SPR_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 },
+ [SPR_IMC_PQ_CYCLES] = { 0x2318, 0x8, 0, 2, 48 },
+};
+
+static struct uncore_event_desc spr_uncore_imc_freerunning_events[] = {
+ INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"),
+
+ INTEL_UNCORE_EVENT_DESC(rpq_cycles, "event=0xff,umask=0x20"),
+ INTEL_UNCORE_EVENT_DESC(wpq_cycles, "event=0xff,umask=0x21"),
+ { /* end: all zeroes */ },
+};
+
+#define SPR_MC_DEVICE_ID 0x3251
+
+static void spr_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+{
+ int mem_offset = box->pmu->pmu_idx * ICX_IMC_MEM_STRIDE + SNR_IMC_MMIO_MEM0_OFFSET;
+
+ snr_uncore_mmio_map(box, uncore_mmio_box_ctl(box),
+ mem_offset, SPR_MC_DEVICE_ID);
+}
+
+static struct intel_uncore_ops spr_uncore_imc_freerunning_ops = {
+ .init_box = spr_uncore_imc_freerunning_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .read_counter = uncore_mmio_read_counter,
+ .hw_config = uncore_freerunning_hw_config,
+};
+
+static struct intel_uncore_type spr_uncore_imc_free_running = {
+ .name = "imc_free_running",
+ .num_counters = 3,
+ .mmio_map_size = SNR_IMC_MMIO_SIZE,
+ .num_freerunning_types = SPR_IMC_FREERUNNING_TYPE_MAX,
+ .freerunning = spr_imc_freerunning,
+ .ops = &spr_uncore_imc_freerunning_ops,
+ .event_descs = spr_uncore_imc_freerunning_events,
+ .format_group = &skx_uncore_iio_freerunning_format_group,
+};
+
+#define UNCORE_SPR_MSR_EXTRA_UNCORES 1
+#define UNCORE_SPR_MMIO_EXTRA_UNCORES 1
+
+static struct intel_uncore_type *spr_msr_uncores[UNCORE_SPR_MSR_EXTRA_UNCORES] = {
+ &spr_uncore_iio_free_running,
+};
+
+static struct intel_uncore_type *spr_mmio_uncores[UNCORE_SPR_MMIO_EXTRA_UNCORES] = {
+ &spr_uncore_imc_free_running,
+};
+
+static void uncore_type_customized_copy(struct intel_uncore_type *to_type,
+ struct intel_uncore_type *from_type)
+{
+ if (!to_type || !from_type)
+ return;
+
+ if (from_type->name)
+ to_type->name = from_type->name;
+ if (from_type->fixed_ctr_bits)
+ to_type->fixed_ctr_bits = from_type->fixed_ctr_bits;
+ if (from_type->event_mask)
+ to_type->event_mask = from_type->event_mask;
+ if (from_type->event_mask_ext)
+ to_type->event_mask_ext = from_type->event_mask_ext;
+ if (from_type->fixed_ctr)
+ to_type->fixed_ctr = from_type->fixed_ctr;
+ if (from_type->fixed_ctl)
+ to_type->fixed_ctl = from_type->fixed_ctl;
+ if (from_type->fixed_ctr_bits)
+ to_type->fixed_ctr_bits = from_type->fixed_ctr_bits;
+ if (from_type->num_shared_regs)
+ to_type->num_shared_regs = from_type->num_shared_regs;
+ if (from_type->constraints)
+ to_type->constraints = from_type->constraints;
+ if (from_type->ops)
+ to_type->ops = from_type->ops;
+ if (from_type->event_descs)
+ to_type->event_descs = from_type->event_descs;
+ if (from_type->format_group)
+ to_type->format_group = from_type->format_group;
+ if (from_type->attr_update)
+ to_type->attr_update = from_type->attr_update;
+}
+
+static struct intel_uncore_type **
+uncore_get_uncores(enum uncore_access_type type_id, int num_extra,
+ struct intel_uncore_type **extra)
+{
+ struct intel_uncore_type **types, **start_types;
+ int i;
+
+ start_types = types = intel_uncore_generic_init_uncores(type_id, num_extra);
+
+ /* Only copy the customized features */
+ for (; *types; types++) {
+ if ((*types)->type_id >= UNCORE_SPR_NUM_UNCORE_TYPES)
+ continue;
+ uncore_type_customized_copy(*types, spr_uncores[(*types)->type_id]);
+ }
+
+ for (i = 0; i < num_extra; i++, types++)
+ *types = extra[i];
+
+ return start_types;
+}
+
+static struct intel_uncore_type *
+uncore_find_type_by_id(struct intel_uncore_type **types, int type_id)
+{
+ for (; *types; types++) {
+ if (type_id == (*types)->type_id)
+ return *types;
+ }
+
+ return NULL;
+}
+
+static int uncore_type_max_boxes(struct intel_uncore_type **types,
+ int type_id)
+{
+ struct intel_uncore_type *type;
+ int i, max = 0;
+
+ type = uncore_find_type_by_id(types, type_id);
+ if (!type)
+ return 0;
+
+ for (i = 0; i < type->num_boxes; i++) {
+ if (type->box_ids[i] > max)
+ max = type->box_ids[i];
+ }
+
+ return max + 1;
+}
+
+void spr_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR,
+ UNCORE_SPR_MSR_EXTRA_UNCORES,
+ spr_msr_uncores);
+
+ spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO);
+}
+
+int spr_uncore_pci_init(void)
+{
+ uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, 0, NULL);
+ return 0;
+}
+
+void spr_uncore_mmio_init(void)
+{
+ int ret = snbep_pci2phy_map_init(0x3250, SKX_CPUNODEID, SKX_GIDNIDMAP, true);
+
+ if (ret)
+ uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, 0, NULL);
+ else {
+ uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO,
+ UNCORE_SPR_MMIO_EXTRA_UNCORES,
+ spr_mmio_uncores);
+
+ spr_uncore_imc_free_running.num_boxes = uncore_type_max_boxes(uncore_mmio_uncores, UNCORE_SPR_IMC) / 2;
+ }
+}
+
+/* end of SPR uncore support */
diff --git a/arch/x86/include/asm/amd-ibs.h b/arch/x86/include/asm/amd-ibs.h
new file mode 100644
index 000000000000..46e1df45efc0
--- /dev/null
+++ b/arch/x86/include/asm/amd-ibs.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * From PPR Vol 1 for AMD Family 19h Model 01h B1
+ * 55898 Rev 0.35 - Feb 5, 2021
+ */
+
+#include <asm/msr-index.h>
+
+/*
+ * IBS Hardware MSRs
+ */
+
+/* MSR 0xc0011030: IBS Fetch Control */
+union ibs_fetch_ctl {
+ __u64 val;
+ struct {
+ __u64 fetch_maxcnt:16,/* 0-15: instruction fetch max. count */
+ fetch_cnt:16, /* 16-31: instruction fetch count */
+ fetch_lat:16, /* 32-47: instruction fetch latency */
+ fetch_en:1, /* 48: instruction fetch enable */
+ fetch_val:1, /* 49: instruction fetch valid */
+ fetch_comp:1, /* 50: instruction fetch complete */
+ ic_miss:1, /* 51: i-cache miss */
+ phy_addr_valid:1,/* 52: physical address valid */
+ l1tlb_pgsz:2, /* 53-54: i-cache L1TLB page size
+ * (needs IbsPhyAddrValid) */
+ l1tlb_miss:1, /* 55: i-cache fetch missed in L1TLB */
+ l2tlb_miss:1, /* 56: i-cache fetch missed in L2TLB */
+ rand_en:1, /* 57: random tagging enable */
+ fetch_l2_miss:1,/* 58: L2 miss for sampled fetch
+ * (needs IbsFetchComp) */
+ reserved:5; /* 59-63: reserved */
+ };
+};
+
+/* MSR 0xc0011033: IBS Execution Control */
+union ibs_op_ctl {
+ __u64 val;
+ struct {
+ __u64 opmaxcnt:16, /* 0-15: periodic op max. count */
+ reserved0:1, /* 16: reserved */
+ op_en:1, /* 17: op sampling enable */
+ op_val:1, /* 18: op sample valid */
+ cnt_ctl:1, /* 19: periodic op counter control */
+ opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */
+ reserved1:5, /* 27-31: reserved */
+ opcurcnt:27, /* 32-58: periodic op counter current count */
+ reserved2:5; /* 59-63: reserved */
+ };
+};
+
+/* MSR 0xc0011035: IBS Op Data 2 */
+union ibs_op_data {
+ __u64 val;
+ struct {
+ __u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */
+ tag_to_ret_ctr:16, /* 15-31: op tag to retire count */
+ reserved1:2, /* 32-33: reserved */
+ op_return:1, /* 34: return op */
+ op_brn_taken:1, /* 35: taken branch op */
+ op_brn_misp:1, /* 36: mispredicted branch op */
+ op_brn_ret:1, /* 37: branch op retired */
+ op_rip_invalid:1, /* 38: RIP is invalid */
+ op_brn_fuse:1, /* 39: fused branch op */
+ op_microcode:1, /* 40: microcode op */
+ reserved2:23; /* 41-63: reserved */
+ };
+};
+
+/* MSR 0xc0011036: IBS Op Data 2 */
+union ibs_op_data2 {
+ __u64 val;
+ struct {
+ __u64 data_src:3, /* 0-2: data source */
+ reserved0:1, /* 3: reserved */
+ rmt_node:1, /* 4: destination node */
+ cache_hit_st:1, /* 5: cache hit state */
+ reserved1:57; /* 5-63: reserved */
+ };
+};
+
+/* MSR 0xc0011037: IBS Op Data 3 */
+union ibs_op_data3 {
+ __u64 val;
+ struct {
+ __u64 ld_op:1, /* 0: load op */
+ st_op:1, /* 1: store op */
+ dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */
+ dc_l2tlb_miss:1, /* 3: data cache L2TLB hit in 2M page */
+ dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */
+ dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */
+ dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */
+ dc_miss:1, /* 7: data cache miss */
+ dc_mis_acc:1, /* 8: misaligned access */
+ reserved:4, /* 9-12: reserved */
+ dc_wc_mem_acc:1, /* 13: write combining memory access */
+ dc_uc_mem_acc:1, /* 14: uncacheable memory access */
+ dc_locked_op:1, /* 15: locked operation */
+ dc_miss_no_mab_alloc:1, /* 16: DC miss with no MAB allocated */
+ dc_lin_addr_valid:1, /* 17: data cache linear address valid */
+ dc_phy_addr_valid:1, /* 18: data cache physical address valid */
+ dc_l2_tlb_hit_1g:1, /* 19: data cache L2 hit in 1GB page */
+ l2_miss:1, /* 20: L2 cache miss */
+ sw_pf:1, /* 21: software prefetch */
+ op_mem_width:4, /* 22-25: load/store size in bytes */
+ op_dc_miss_open_mem_reqs:6, /* 26-31: outstanding mem reqs on DC fill */
+ dc_miss_lat:16, /* 32-47: data cache miss latency */
+ tlb_refill_lat:16; /* 48-63: L1 TLB refill latency */
+ };
+};
+
+/* MSR 0xc001103c: IBS Fetch Control Extended */
+union ic_ibs_extd_ctl {
+ __u64 val;
+ struct {
+ __u64 itlb_refill_lat:16, /* 0-15: ITLB Refill latency for sampled fetch */
+ reserved:48; /* 16-63: reserved */
+ };
+};
+
+/*
+ * IBS driver related
+ */
+
+struct perf_ibs_data {
+ u32 size;
+ union {
+ u32 data[0]; /* data buffer starts here */
+ u32 caps;
+ };
+ u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
+};
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index be09c7eac89f..4ae01cdb99de 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -12,6 +12,9 @@
#include <asm/user32.h>
#include <asm/unistd.h>
+#define compat_mode_t compat_mode_t
+typedef u16 compat_mode_t;
+
#include <asm-generic/compat.h>
#define COMPAT_USER_HZ 100
@@ -19,13 +22,9 @@
typedef u16 __compat_uid_t;
typedef u16 __compat_gid_t;
-typedef u32 __compat_uid32_t;
-typedef u32 __compat_gid32_t;
-typedef u16 compat_mode_t;
typedef u16 compat_dev_t;
typedef u16 compat_nlink_t;
typedef u16 compat_ipc_pid_t;
-typedef u32 compat_caddr_t;
typedef __kernel_fsid_t compat_fsid_t;
struct compat_stat {
@@ -92,13 +91,6 @@ struct compat_statfs {
#define COMPAT_RLIM_INFINITY 0xffffffff
-typedef u32 compat_old_sigset_t; /* at least 32 bits */
-
-#define _COMPAT_NSIG 64
-#define _COMPAT_NSIG_BPW 32
-
-typedef u32 compat_sigset_word;
-
#define COMPAT_OFF_T_MAX 0x7fffffff
struct compat_ipc64_perm {
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 89789e8c80f6..637fa1df3512 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -19,6 +19,8 @@ extern unsigned int cached_irq_mask;
#define PIC_MASTER_OCW3 PIC_MASTER_ISR
#define PIC_SLAVE_CMD 0xa0
#define PIC_SLAVE_IMR 0xa1
+#define PIC_ELCR1 0x4d0
+#define PIC_ELCR2 0x4d1
/* i8259A PIC related value */
#define PIC_CASCADE_IR 2
diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h
index 05b48b33baf0..ff5c7134a37a 100644
--- a/arch/x86/include/asm/kfence.h
+++ b/arch/x86/include/asm/kfence.h
@@ -8,6 +8,8 @@
#ifndef _ASM_X86_KFENCE_H
#define _ASM_X86_KFENCE_H
+#ifndef MODULE
+
#include <linux/bug.h>
#include <linux/kfence.h>
@@ -66,4 +68,6 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect)
return true;
}
+#endif /* !MODULE */
+
#endif /* _ASM_X86_KFENCE_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 974cbfb1eefe..af6ce8d4c86a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1038,6 +1038,13 @@ struct kvm_arch {
struct list_head lpage_disallowed_mmu_pages;
struct kvm_page_track_notifier_node mmu_sp_tracker;
struct kvm_page_track_notifier_head track_notifier_head;
+ /*
+ * Protects marking pages unsync during page faults, as TDP MMU page
+ * faults only take mmu_lock for read. For simplicity, the unsync
+ * pages lock is always taken when marking pages unsync regardless of
+ * whether mmu_lock is held for read or write.
+ */
+ spinlock_t mmu_unsync_pages_lock;
struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain;
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 0607ec4f5091..da9321548f6f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -265,6 +265,7 @@ enum mcp_flags {
MCP_TIMESTAMP = BIT(0), /* log time stamp */
MCP_UC = BIT(1), /* log uncorrected errors */
MCP_DONTLOG = BIT(2), /* only clear, don't log */
+ MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */
};
bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 3ad8c6d3cbb3..ec2d5c8c6694 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -252,6 +252,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
DECLARE_STATIC_KEY_FALSE(mds_user_clear);
DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
+DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
+
#include <asm/segment.h>
/**
diff --git a/arch/x86/include/asm/pc-conf-reg.h b/arch/x86/include/asm/pc-conf-reg.h
new file mode 100644
index 000000000000..56bceceacf5f
--- /dev/null
+++ b/arch/x86/include/asm/pc-conf-reg.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for the configuration register space at port I/O locations
+ * 0x22 and 0x23 variously used by PC architectures, e.g. the MP Spec,
+ * Cyrix CPUs, numerous chipsets.
+ */
+#ifndef _ASM_X86_PC_CONF_REG_H
+#define _ASM_X86_PC_CONF_REG_H
+
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#define PC_CONF_INDEX 0x22
+#define PC_CONF_DATA 0x23
+
+#define PC_CONF_MPS_IMCR 0x70
+
+extern raw_spinlock_t pc_conf_lock;
+
+static inline u8 pc_conf_get(u8 reg)
+{
+ outb(reg, PC_CONF_INDEX);
+ return inb(PC_CONF_DATA);
+}
+
+static inline void pc_conf_set(u8 reg, u8 data)
+{
+ outb(reg, PC_CONF_INDEX);
+ outb(data, PC_CONF_DATA);
+}
+
+#endif /* _ASM_X86_PC_CONF_REG_H */
diff --git a/arch/x86/include/asm/processor-cyrix.h b/arch/x86/include/asm/processor-cyrix.h
index df700a6cc869..efe3e46e454b 100644
--- a/arch/x86/include/asm/processor-cyrix.h
+++ b/arch/x86/include/asm/processor-cyrix.h
@@ -5,14 +5,14 @@
* Access order is always 0x22 (=offset), 0x23 (=value)
*/
+#include <asm/pc-conf-reg.h>
+
static inline u8 getCx86(u8 reg)
{
- outb(reg, 0x22);
- return inb(0x23);
+ return pc_conf_get(reg);
}
static inline void setCx86(u8 reg, u8 data)
{
- outb(reg, 0x22);
- outb(data, 0x23);
+ pc_conf_set(reg, data);
}
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f3020c54e2cb..9ad2acaaae9b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -136,6 +136,8 @@ struct cpuinfo_x86 {
u16 logical_die_id;
/* Index into per_cpu list: */
u16 cpu_index;
+ /* Is SMT active on this core? */
+ bool smt_active;
u32 microcode;
/* Address space bits used by the cache internally */
u8 x86_cache_bits;
@@ -795,6 +797,8 @@ extern int set_tsc_mode(unsigned int val);
DECLARE_PER_CPU(u64, msr_misc_features_shadow);
+extern u16 get_llc_id(unsigned int cpu);
+
#ifdef CONFIG_CPU_SUP_AMD
extern u32 amd_get_nodes_per_socket(void);
extern u32 amd_get_highest_perf(void);
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 6fd8410a3910..2dfb5fea13af 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -29,6 +29,7 @@ typedef struct {
#define SA_X32_ABI 0x01000000u
#ifndef CONFIG_COMPAT
+#define compat_sigset_t compat_sigset_t
typedef sigset_t compat_sigset_t;
#endif
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index e322676039f4..b00dbc5fac2b 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -184,6 +184,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define V_IGN_TPR_SHIFT 20
#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+#define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK)
+
#define V_INTR_MASKING_SHIFT 24
#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index de406d93b515..cf132663c219 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -81,7 +81,7 @@ struct thread_info {
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
#define TIF_SSBD 5 /* Speculative store bypass disable */
#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
-#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */
+#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
#define TIF_PATCH_PENDING 13 /* pending live patching update */
@@ -93,6 +93,7 @@ struct thread_info {
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
+#define TIF_SPEC_FORCE_UPDATE 23 /* Force speculation MSR update in context switch */
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
@@ -104,7 +105,7 @@ struct thread_info {
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_SSBD (1 << TIF_SSBD)
#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
-#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
+#define _TIF_SPEC_L1D_FLUSH (1 << TIF_SPEC_L1D_FLUSH)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
@@ -115,6 +116,7 @@ struct thread_info {
#define _TIF_SLD (1 << TIF_SLD)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
+#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index fa952eadbc2e..b587a9ee9cb2 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -83,7 +83,7 @@ struct tlb_state {
/* Last user mm for optimizing IBPB */
union {
struct mm_struct *last_user_mm;
- unsigned long last_user_mm_ibpb;
+ unsigned long last_user_mm_spec;
};
u16 loaded_mm_asid;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e55e0c1fad8c..14bcd59bcdee 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -558,10 +558,10 @@ acpi_parse_nmi_src(union acpi_subtable_headers * header, const unsigned long end
* If a PIC-mode SCI is not recognized or gives spurious IRQ7's
* it may require Edge Trigger -- use "acpi_sci=edge"
*
- * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
+ * Port 0x4d0-4d1 are ELCR1 and ELCR2, the Edge/Level Control Registers
* for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge.
- * ECLR1 is IRQs 0-7 (IRQ 0, 1, 2 must be 0)
- * ECLR2 is IRQs 8-15 (IRQ 8, 13 must be 0)
+ * ELCR1 is IRQs 0-7 (IRQ 0, 1, 2 must be 0)
+ * ELCR2 is IRQs 8-15 (IRQ 8, 13 must be 0)
*/
void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
@@ -570,7 +570,7 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
unsigned int old, new;
/* Real old ELCR mask */
- old = inb(0x4d0) | (inb(0x4d1) << 8);
+ old = inb(PIC_ELCR1) | (inb(PIC_ELCR2) << 8);
/*
* If we use ACPI to set PCI IRQs, then we should clear ELCR
@@ -596,8 +596,8 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
return;
pr_warn("setting ELCR to %04x (from %04x)\n", new, old);
- outb(new, 0x4d0);
- outb(new >> 8, 0x4d1);
+ outb(new, PIC_ELCR1);
+ outb(new >> 8, PIC_ELCR2);
}
int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 23dda362dc0f..c92c9c774c0e 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -25,6 +25,8 @@
#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654
+#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5
+#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d
#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e
/* Protect the PCI config register pairs used for SMN and DF indirect access. */
@@ -37,6 +39,7 @@ static const struct pci_device_id amd_root_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_ROOT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) },
{}
};
@@ -58,6 +61,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) },
{}
};
@@ -74,6 +78,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
{}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d262811ce14b..b70344bf6600 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -38,6 +38,7 @@
#include <asm/trace/irq_vectors.h>
#include <asm/irq_remapping.h>
+#include <asm/pc-conf-reg.h>
#include <asm/perf_event.h>
#include <asm/x86_init.h>
#include <linux/atomic.h>
@@ -132,18 +133,14 @@ static int enabled_via_apicbase __ro_after_init;
*/
static inline void imcr_pic_to_apic(void)
{
- /* select IMCR register */
- outb(0x70, 0x22);
/* NMI and 8259 INTR go through APIC */
- outb(0x01, 0x23);
+ pc_conf_set(PC_CONF_MPS_IMCR, 0x01);
}
static inline void imcr_apic_to_pic(void)
{
- /* select IMCR register */
- outb(0x70, 0x22);
/* NMI and 8259 INTR go directly to BSP */
- outb(0x00, 0x23);
+ pc_conf_set(PC_CONF_MPS_IMCR, 0x00);
}
#endif
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index d5c691a3208b..c1bb384935b0 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -764,7 +764,7 @@ static bool irq_active_low(int idx)
static bool EISA_ELCR(unsigned int irq)
{
if (irq < nr_legacy_irqs()) {
- unsigned int port = 0x4d0 + (irq >> 3);
+ unsigned int port = PIC_ELCR1 + (irq >> 3);
return (inb(port) >> (irq & 7)) & 1;
}
apic_printk(APIC_VERBOSE, KERN_INFO
@@ -1986,7 +1986,8 @@ static struct irq_chip ioapic_chip __read_mostly = {
.irq_set_affinity = ioapic_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_get_irqchip_state = ioapic_irq_get_chip_state,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
};
static struct irq_chip ioapic_ir_chip __read_mostly = {
@@ -1999,7 +2000,8 @@ static struct irq_chip ioapic_ir_chip __read_mostly = {
.irq_set_affinity = ioapic_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_get_irqchip_state = ioapic_irq_get_chip_state,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
};
static inline void init_IO_APIC_traps(void)
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 44ebe25e7703..dbacb9ec8843 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -58,11 +58,13 @@ msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force)
* The quirk bit is not set in this case.
* - The new vector is the same as the old vector
* - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up)
+ * - The interrupt is not yet started up
* - The new destination CPU is the same as the old destination CPU
*/
if (!irqd_msi_nomask_quirk(irqd) ||
cfg->vector == old_cfg.vector ||
old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR ||
+ !irqd_is_started(irqd) ||
cfg->dest_apicid == old_cfg.dest_apicid) {
irq_msi_update_msg(irqd, cfg);
return ret;
@@ -150,7 +152,8 @@ static struct irq_chip pci_msi_controller = {
.irq_ack = irq_chip_ack_parent,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_set_affinity = msi_set_affinity,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
};
int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
@@ -219,7 +222,8 @@ static struct irq_chip pci_msi_ir_controller = {
.irq_mask = pci_msi_mask_irq,
.irq_ack = irq_chip_ack_parent,
.irq_retrigger = irq_chip_retrigger_hierarchy,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
};
static struct msi_domain_info pci_msi_ir_domain_info = {
@@ -273,7 +277,8 @@ static struct irq_chip dmar_msi_controller = {
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_compose_msi_msg = dmar_msi_compose_msg,
.irq_write_msi_msg = dmar_msi_write_msg,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
};
static int dmar_msi_init(struct irq_domain *domain,
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index fb67ed5e7e6a..c132daabe615 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -1299,7 +1299,7 @@ static void __init print_PIC(void)
pr_debug("... PIC ISR: %04x\n", v);
- v = inb(0x4d1) << 8 | inb(0x4d0);
+ v = inb(PIC_ELCR2) << 8 | inb(PIC_ELCR1);
pr_debug("... PIC ELCR: %04x\n", v);
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b7c003013d41..2131af9f2fa2 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -438,7 +438,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
node = numa_cpu_node(cpu);
if (node == NUMA_NO_NODE)
- node = per_cpu(cpu_llc_id, cpu);
+ node = get_llc_id(cpu);
/*
* On multi-fabric platform (e.g. Numascale NumaChip) a
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d41b70fe4918..ecfca3bbcd96 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -43,6 +43,7 @@ static void __init mds_select_mitigation(void);
static void __init mds_print_mitigation(void);
static void __init taa_select_mitigation(void);
static void __init srbds_select_mitigation(void);
+static void __init l1d_flush_select_mitigation(void);
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
u64 x86_spec_ctrl_base;
@@ -76,6 +77,13 @@ EXPORT_SYMBOL_GPL(mds_user_clear);
DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
EXPORT_SYMBOL_GPL(mds_idle_clear);
+/*
+ * Controls whether l1d flush based mitigations are enabled,
+ * based on hw features and admin setting via boot parameter
+ * defaults to false
+ */
+DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
+
void __init check_bugs(void)
{
identify_boot_cpu();
@@ -111,6 +119,7 @@ void __init check_bugs(void)
mds_select_mitigation();
taa_select_mitigation();
srbds_select_mitigation();
+ l1d_flush_select_mitigation();
/*
* As MDS and TAA mitigations are inter-related, print MDS
@@ -492,6 +501,34 @@ static int __init srbds_parse_cmdline(char *str)
early_param("srbds", srbds_parse_cmdline);
#undef pr_fmt
+#define pr_fmt(fmt) "L1D Flush : " fmt
+
+enum l1d_flush_mitigations {
+ L1D_FLUSH_OFF = 0,
+ L1D_FLUSH_ON,
+};
+
+static enum l1d_flush_mitigations l1d_flush_mitigation __initdata = L1D_FLUSH_OFF;
+
+static void __init l1d_flush_select_mitigation(void)
+{
+ if (!l1d_flush_mitigation || !boot_cpu_has(X86_FEATURE_FLUSH_L1D))
+ return;
+
+ static_branch_enable(&switch_mm_cond_l1d_flush);
+ pr_info("Conditional flush on switch_mm() enabled\n");
+}
+
+static int __init l1d_flush_parse_cmdline(char *str)
+{
+ if (!strcmp(str, "on"))
+ l1d_flush_mitigation = L1D_FLUSH_ON;
+
+ return 0;
+}
+early_param("l1d_flush", l1d_flush_parse_cmdline);
+
+#undef pr_fmt
#define pr_fmt(fmt) "Spectre V1 : " fmt
enum spectre_v1_mitigation {
@@ -1215,6 +1252,24 @@ static void task_update_spec_tif(struct task_struct *tsk)
speculation_ctrl_update_current();
}
+static int l1d_flush_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+
+ if (!static_branch_unlikely(&switch_mm_cond_l1d_flush))
+ return -EPERM;
+
+ switch (ctrl) {
+ case PR_SPEC_ENABLE:
+ set_ti_thread_flag(&task->thread_info, TIF_SPEC_L1D_FLUSH);
+ return 0;
+ case PR_SPEC_DISABLE:
+ clear_ti_thread_flag(&task->thread_info, TIF_SPEC_L1D_FLUSH);
+ return 0;
+ default:
+ return -ERANGE;
+ }
+}
+
static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
{
if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
@@ -1324,6 +1379,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
return ssb_prctl_set(task, ctrl);
case PR_SPEC_INDIRECT_BRANCH:
return ib_prctl_set(task, ctrl);
+ case PR_SPEC_L1D_FLUSH:
+ return l1d_flush_prctl_set(task, ctrl);
default:
return -ENODEV;
}
@@ -1340,6 +1397,17 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
}
#endif
+static int l1d_flush_prctl_get(struct task_struct *task)
+{
+ if (!static_branch_unlikely(&switch_mm_cond_l1d_flush))
+ return PR_SPEC_FORCE_DISABLE;
+
+ if (test_ti_thread_flag(&task->thread_info, TIF_SPEC_L1D_FLUSH))
+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+ else
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+}
+
static int ssb_prctl_get(struct task_struct *task)
{
switch (ssb_mode) {
@@ -1390,6 +1458,8 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
return ssb_prctl_get(task);
case PR_SPEC_INDIRECT_BRANCH:
return ib_prctl_get(task);
+ case PR_SPEC_L1D_FLUSH:
+ return l1d_flush_prctl_get(task);
default:
return -ENODEV;
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 64b805bd6a54..0f8885949e8c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -79,6 +79,12 @@ EXPORT_SYMBOL(smp_num_siblings);
/* Last level cache ID of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
+u16 get_llc_id(unsigned int cpu)
+{
+ return per_cpu(cpu_llc_id, cpu);
+}
+EXPORT_SYMBOL_GPL(get_llc_id);
+
/* correctly size the local cpu masks */
void __init setup_cpu_local_masks(void)
{
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 22791aadc085..8cb7816d03b4 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -817,7 +817,10 @@ log_it:
if (mca_cfg.dont_log_ce && !mce_usable_address(&m))
goto clear_it;
- mce_log(&m);
+ if (flags & MCP_QUEUE_LOG)
+ mce_gen_pool_add(&m);
+ else
+ mce_log(&m);
clear_it:
/*
@@ -1639,10 +1642,12 @@ static void __mcheck_cpu_init_generic(void)
m_fl = MCP_DONTLOG;
/*
- * Log the machine checks left over from the previous reset.
+ * Log the machine checks left over from the previous reset. Log them
+ * only, do not start processing them. That will happen in mcheck_late_init()
+ * when all consumers have been registered on the notifier chain.
*/
bitmap_fill(all_banks, MAX_NR_BANKS);
- machine_check_poll(MCP_UC | m_fl, &all_banks);
+ machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks);
cr4_set_bits(X86_CR4_MCE);
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 4e86d97f9653..0bfc14041bbb 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -235,7 +235,7 @@ static void __maybe_unused raise_mce(struct mce *m)
unsigned long start;
int cpu;
- get_online_cpus();
+ cpus_read_lock();
cpumask_copy(mce_inject_cpumask, cpu_online_mask);
cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
for_each_online_cpu(cpu) {
@@ -269,7 +269,7 @@ static void __maybe_unused raise_mce(struct mce *m)
}
raise_local();
put_cpu();
- put_online_cpus();
+ cpus_read_unlock();
} else {
preempt_disable();
raise_local();
@@ -529,7 +529,7 @@ static void do_inject(void)
cpu = get_nbc_for_node(topology_die_id(cpu));
}
- get_online_cpus();
+ cpus_read_lock();
if (!cpu_online(cpu))
goto err;
@@ -553,7 +553,7 @@ static void do_inject(void)
}
err:
- put_online_cpus();
+ cpus_read_unlock();
}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 6a6318e9590c..efb69be41ab1 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -55,7 +55,7 @@ LIST_HEAD(microcode_cache);
* All non cpu-hotplug-callback call sites use:
*
* - microcode_mutex to synchronize with each other;
- * - get/put_online_cpus() to synchronize with
+ * - cpus_read_lock/unlock() to synchronize with
* the cpu-hotplug-callback call sites.
*
* We guarantee that only a single cpu is being
@@ -431,7 +431,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
return ret;
}
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&microcode_mutex);
if (do_microcode_update(buf, len) == 0)
@@ -441,7 +441,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
perf_check_microcode();
mutex_unlock(&microcode_mutex);
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
@@ -629,7 +629,7 @@ static ssize_t reload_store(struct device *dev,
if (val != 1)
return size;
- get_online_cpus();
+ cpus_read_lock();
ret = check_online_cpus();
if (ret)
@@ -644,7 +644,7 @@ static ssize_t reload_store(struct device *dev,
mutex_unlock(&microcode_mutex);
put:
- put_online_cpus();
+ cpus_read_unlock();
if (ret == 0)
ret = size;
@@ -853,14 +853,14 @@ static int __init microcode_init(void)
if (IS_ERR(microcode_pdev))
return PTR_ERR(microcode_pdev);
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&microcode_mutex);
error = subsys_interface_register(&mc_cpu_interface);
if (!error)
perf_check_microcode();
mutex_unlock(&microcode_mutex);
- put_online_cpus();
+ cpus_read_unlock();
if (error)
goto out_pdev;
@@ -892,13 +892,13 @@ static int __init microcode_init(void)
&cpu_root_microcode_group);
out_driver:
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&microcode_mutex);
subsys_interface_unregister(&mc_cpu_interface);
mutex_unlock(&microcode_mutex);
- put_online_cpus();
+ cpus_read_unlock();
out_pdev:
platform_device_unregister(microcode_pdev);
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index a76694bffe86..2746cac9d8a9 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -336,7 +336,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
replace = -1;
/* No CPU hotplug when we change MTRR entries */
- get_online_cpus();
+ cpus_read_lock();
/* Search for existing MTRR */
mutex_lock(&mtrr_mutex);
@@ -398,7 +398,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
error = i;
out:
mutex_unlock(&mtrr_mutex);
- put_online_cpus();
+ cpus_read_unlock();
return error;
}
@@ -485,7 +485,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
max = num_var_ranges;
/* No CPU hotplug when we change MTRR entries */
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&mtrr_mutex);
if (reg < 0) {
/* Search for existing MTRR */
@@ -520,7 +520,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
error = reg;
out:
mutex_unlock(&mtrr_mutex);
- put_online_cpus();
+ cpus_read_unlock();
return error;
}
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 23001ae03e82..4b8813bafffd 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -57,128 +57,57 @@ static void
mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m,
struct rdt_resource *r);
-#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
+#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains)
-struct rdt_resource rdt_resources_all[] = {
+struct rdt_hw_resource rdt_resources_all[] = {
[RDT_RESOURCE_L3] =
{
- .rid = RDT_RESOURCE_L3,
- .name = "L3",
- .domains = domain_init(RDT_RESOURCE_L3),
- .msr_base = MSR_IA32_L3_CBM_BASE,
- .msr_update = cat_wrmsr,
- .cache_level = 3,
- .cache = {
- .min_cbm_bits = 1,
- .cbm_idx_mult = 1,
- .cbm_idx_offset = 0,
- },
- .parse_ctrlval = parse_cbm,
- .format_str = "%d=%0*x",
- .fflags = RFTYPE_RES_CACHE,
- },
- [RDT_RESOURCE_L3DATA] =
- {
- .rid = RDT_RESOURCE_L3DATA,
- .name = "L3DATA",
- .domains = domain_init(RDT_RESOURCE_L3DATA),
- .msr_base = MSR_IA32_L3_CBM_BASE,
- .msr_update = cat_wrmsr,
- .cache_level = 3,
- .cache = {
- .min_cbm_bits = 1,
- .cbm_idx_mult = 2,
- .cbm_idx_offset = 0,
+ .r_resctrl = {
+ .rid = RDT_RESOURCE_L3,
+ .name = "L3",
+ .cache_level = 3,
+ .cache = {
+ .min_cbm_bits = 1,
+ },
+ .domains = domain_init(RDT_RESOURCE_L3),
+ .parse_ctrlval = parse_cbm,
+ .format_str = "%d=%0*x",
+ .fflags = RFTYPE_RES_CACHE,
},
- .parse_ctrlval = parse_cbm,
- .format_str = "%d=%0*x",
- .fflags = RFTYPE_RES_CACHE,
- },
- [RDT_RESOURCE_L3CODE] =
- {
- .rid = RDT_RESOURCE_L3CODE,
- .name = "L3CODE",
- .domains = domain_init(RDT_RESOURCE_L3CODE),
.msr_base = MSR_IA32_L3_CBM_BASE,
.msr_update = cat_wrmsr,
- .cache_level = 3,
- .cache = {
- .min_cbm_bits = 1,
- .cbm_idx_mult = 2,
- .cbm_idx_offset = 1,
- },
- .parse_ctrlval = parse_cbm,
- .format_str = "%d=%0*x",
- .fflags = RFTYPE_RES_CACHE,
},
[RDT_RESOURCE_L2] =
{
- .rid = RDT_RESOURCE_L2,
- .name = "L2",
- .domains = domain_init(RDT_RESOURCE_L2),
- .msr_base = MSR_IA32_L2_CBM_BASE,
- .msr_update = cat_wrmsr,
- .cache_level = 2,
- .cache = {
- .min_cbm_bits = 1,
- .cbm_idx_mult = 1,
- .cbm_idx_offset = 0,
+ .r_resctrl = {
+ .rid = RDT_RESOURCE_L2,
+ .name = "L2",
+ .cache_level = 2,
+ .cache = {
+ .min_cbm_bits = 1,
+ },
+ .domains = domain_init(RDT_RESOURCE_L2),
+ .parse_ctrlval = parse_cbm,
+ .format_str = "%d=%0*x",
+ .fflags = RFTYPE_RES_CACHE,
},
- .parse_ctrlval = parse_cbm,
- .format_str = "%d=%0*x",
- .fflags = RFTYPE_RES_CACHE,
- },
- [RDT_RESOURCE_L2DATA] =
- {
- .rid = RDT_RESOURCE_L2DATA,
- .name = "L2DATA",
- .domains = domain_init(RDT_RESOURCE_L2DATA),
.msr_base = MSR_IA32_L2_CBM_BASE,
.msr_update = cat_wrmsr,
- .cache_level = 2,
- .cache = {
- .min_cbm_bits = 1,
- .cbm_idx_mult = 2,
- .cbm_idx_offset = 0,
- },
- .parse_ctrlval = parse_cbm,
- .format_str = "%d=%0*x",
- .fflags = RFTYPE_RES_CACHE,
- },
- [RDT_RESOURCE_L2CODE] =
- {
- .rid = RDT_RESOURCE_L2CODE,
- .name = "L2CODE",
- .domains = domain_init(RDT_RESOURCE_L2CODE),
- .msr_base = MSR_IA32_L2_CBM_BASE,
- .msr_update = cat_wrmsr,
- .cache_level = 2,
- .cache = {
- .min_cbm_bits = 1,
- .cbm_idx_mult = 2,
- .cbm_idx_offset = 1,
- },
- .parse_ctrlval = parse_cbm,
- .format_str = "%d=%0*x",
- .fflags = RFTYPE_RES_CACHE,
},
[RDT_RESOURCE_MBA] =
{
- .rid = RDT_RESOURCE_MBA,
- .name = "MB",
- .domains = domain_init(RDT_RESOURCE_MBA),
- .cache_level = 3,
- .parse_ctrlval = parse_bw,
- .format_str = "%d=%*u",
- .fflags = RFTYPE_RES_MB,
+ .r_resctrl = {
+ .rid = RDT_RESOURCE_MBA,
+ .name = "MB",
+ .cache_level = 3,
+ .domains = domain_init(RDT_RESOURCE_MBA),
+ .parse_ctrlval = parse_bw,
+ .format_str = "%d=%*u",
+ .fflags = RFTYPE_RES_MB,
+ },
},
};
-static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid)
-{
- return closid * r->cache.cbm_idx_mult + r->cache.cbm_idx_offset;
-}
-
/*
* cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
* as they do not have CPUID enumeration support for Cache allocation.
@@ -199,7 +128,8 @@ static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid)
*/
static inline void cache_alloc_hsw_probe(void)
{
- struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
+ struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3];
+ struct rdt_resource *r = &hw_res->r_resctrl;
u32 l, h, max_cbm = BIT_MASK(20) - 1;
if (wrmsr_safe(MSR_IA32_L3_CBM_BASE, max_cbm, 0))
@@ -211,7 +141,7 @@ static inline void cache_alloc_hsw_probe(void)
if (l != max_cbm)
return;
- r->num_closid = 4;
+ hw_res->num_closid = 4;
r->default_ctrl = max_cbm;
r->cache.cbm_len = 20;
r->cache.shareable_bits = 0xc0000;
@@ -225,7 +155,7 @@ static inline void cache_alloc_hsw_probe(void)
bool is_mba_sc(struct rdt_resource *r)
{
if (!r)
- return rdt_resources_all[RDT_RESOURCE_MBA].membw.mba_sc;
+ return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc;
return r->membw.mba_sc;
}
@@ -253,12 +183,13 @@ static inline bool rdt_get_mb_table(struct rdt_resource *r)
static bool __get_mem_config_intel(struct rdt_resource *r)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
union cpuid_0x10_3_eax eax;
union cpuid_0x10_x_edx edx;
u32 ebx, ecx, max_delay;
cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
- r->num_closid = edx.split.cos_max + 1;
+ hw_res->num_closid = edx.split.cos_max + 1;
max_delay = eax.split.max_delay + 1;
r->default_ctrl = MAX_MBA_BW;
r->membw.arch_needs_linear = true;
@@ -287,12 +218,13 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
union cpuid_0x10_3_eax eax;
union cpuid_0x10_x_edx edx;
u32 ebx, ecx;
cpuid_count(0x80000020, 1, &eax.full, &ebx, &ecx, &edx.full);
- r->num_closid = edx.split.cos_max + 1;
+ hw_res->num_closid = edx.split.cos_max + 1;
r->default_ctrl = MAX_MBA_BW_AMD;
/* AMD does not use delay */
@@ -317,12 +249,13 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
union cpuid_0x10_1_eax eax;
union cpuid_0x10_x_edx edx;
u32 ebx, ecx;
cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full);
- r->num_closid = edx.split.cos_max + 1;
+ hw_res->num_closid = edx.split.cos_max + 1;
r->cache.cbm_len = eax.split.cbm_len + 1;
r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
r->cache.shareable_bits = ebx & r->default_ctrl;
@@ -331,43 +264,35 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
r->alloc_enabled = true;
}
-static void rdt_get_cdp_config(int level, int type)
+static void rdt_get_cdp_config(int level)
{
- struct rdt_resource *r_l = &rdt_resources_all[level];
- struct rdt_resource *r = &rdt_resources_all[type];
-
- r->num_closid = r_l->num_closid / 2;
- r->cache.cbm_len = r_l->cache.cbm_len;
- r->default_ctrl = r_l->default_ctrl;
- r->cache.shareable_bits = r_l->cache.shareable_bits;
- r->data_width = (r->cache.cbm_len + 3) / 4;
- r->alloc_capable = true;
/*
* By default, CDP is disabled. CDP can be enabled by mount parameter
* "cdp" during resctrl file system mount time.
*/
- r->alloc_enabled = false;
+ rdt_resources_all[level].cdp_enabled = false;
+ rdt_resources_all[level].r_resctrl.cdp_capable = true;
}
static void rdt_get_cdp_l3_config(void)
{
- rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA);
- rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3CODE);
+ rdt_get_cdp_config(RDT_RESOURCE_L3);
}
static void rdt_get_cdp_l2_config(void)
{
- rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA);
- rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE);
+ rdt_get_cdp_config(RDT_RESOURCE_L2);
}
static void
mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
{
unsigned int i;
+ struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
for (i = m->low; i < m->high; i++)
- wrmsrl(r->msr_base + i, d->ctrl_val[i]);
+ wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
}
/*
@@ -389,19 +314,23 @@ mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m,
struct rdt_resource *r)
{
unsigned int i;
+ struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
/* Write the delay values for mba. */
for (i = m->low; i < m->high; i++)
- wrmsrl(r->msr_base + i, delay_bw_map(d->ctrl_val[i], r));
+ wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], r));
}
static void
cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
{
unsigned int i;
+ struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
for (i = m->low; i < m->high; i++)
- wrmsrl(r->msr_base + cbm_idx(r, i), d->ctrl_val[i]);
+ wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
}
struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
@@ -417,16 +346,22 @@ struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
return NULL;
}
+u32 resctrl_arch_get_num_closid(struct rdt_resource *r)
+{
+ return resctrl_to_arch_res(r)->num_closid;
+}
+
void rdt_ctrl_update(void *arg)
{
struct msr_param *m = arg;
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
struct rdt_resource *r = m->res;
int cpu = smp_processor_id();
struct rdt_domain *d;
d = get_domain_from_cpu(cpu, r);
if (d) {
- r->msr_update(d, m, r);
+ hw_res->msr_update(d, m, r);
return;
}
pr_warn_once("cpu %d not found in any domain for resource %s\n",
@@ -468,6 +403,7 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
int i;
/*
@@ -476,7 +412,7 @@ void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm)
* For Memory Allocation: Set b/w requested to 100%
* and the bandwidth in MBps to U32_MAX
*/
- for (i = 0; i < r->num_closid; i++, dc++, dm++) {
+ for (i = 0; i < hw_res->num_closid; i++, dc++, dm++) {
*dc = r->default_ctrl;
*dm = MBA_MAX_MBPS;
}
@@ -484,26 +420,30 @@ void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm)
static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+ struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
struct msr_param m;
u32 *dc, *dm;
- dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
+ dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val),
+ GFP_KERNEL);
if (!dc)
return -ENOMEM;
- dm = kmalloc_array(r->num_closid, sizeof(*d->mbps_val), GFP_KERNEL);
+ dm = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->mbps_val),
+ GFP_KERNEL);
if (!dm) {
kfree(dc);
return -ENOMEM;
}
- d->ctrl_val = dc;
- d->mbps_val = dm;
+ hw_dom->ctrl_val = dc;
+ hw_dom->mbps_val = dm;
setup_default_ctrlval(r, dc, dm);
m.low = 0;
- m.high = r->num_closid;
- r->msr_update(d, &m, r);
+ m.high = hw_res->num_closid;
+ hw_res->msr_update(d, &m, r);
return 0;
}
@@ -560,6 +500,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
{
int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
struct list_head *add_pos = NULL;
+ struct rdt_hw_domain *hw_dom;
struct rdt_domain *d;
d = rdt_find_domain(r, id, &add_pos);
@@ -575,10 +516,11 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
return;
}
- d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu));
- if (!d)
+ hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
+ if (!hw_dom)
return;
+ d = &hw_dom->d_resctrl;
d->id = id;
cpumask_set_cpu(cpu, &d->cpu_mask);
@@ -607,6 +549,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
static void domain_remove_cpu(int cpu, struct rdt_resource *r)
{
int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
+ struct rdt_hw_domain *hw_dom;
struct rdt_domain *d;
d = rdt_find_domain(r, id, NULL);
@@ -614,6 +557,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
pr_warn("Couldn't find cache id for CPU %d\n", cpu);
return;
}
+ hw_dom = resctrl_to_arch_dom(d);
cpumask_clear_cpu(cpu, &d->cpu_mask);
if (cpumask_empty(&d->cpu_mask)) {
@@ -646,16 +590,16 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
if (d->plr)
d->plr->d = NULL;
- kfree(d->ctrl_val);
- kfree(d->mbps_val);
+ kfree(hw_dom->ctrl_val);
+ kfree(hw_dom->mbps_val);
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
- kfree(d);
+ kfree(hw_dom);
return;
}
- if (r == &rdt_resources_all[RDT_RESOURCE_L3]) {
+ if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
cancel_delayed_work(&d->mbm_over);
mbm_setup_overflow_handler(d, 0);
@@ -732,13 +676,8 @@ static int resctrl_offline_cpu(unsigned int cpu)
static __init void rdt_init_padding(void)
{
struct rdt_resource *r;
- int cl;
for_each_alloc_capable_rdt_resource(r) {
- cl = strlen(r->name);
- if (cl > max_name_width)
- max_name_width = cl;
-
if (r->data_width > max_data_width)
max_data_width = r->data_width;
}
@@ -827,19 +766,22 @@ static bool __init rdt_cpu_has(int flag)
static __init bool get_mem_config(void)
{
+ struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA];
+
if (!rdt_cpu_has(X86_FEATURE_MBA))
return false;
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
- return __get_mem_config_intel(&rdt_resources_all[RDT_RESOURCE_MBA]);
+ return __get_mem_config_intel(&hw_res->r_resctrl);
else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
- return __rdt_get_mem_config_amd(&rdt_resources_all[RDT_RESOURCE_MBA]);
+ return __rdt_get_mem_config_amd(&hw_res->r_resctrl);
return false;
}
static __init bool get_rdt_alloc_resources(void)
{
+ struct rdt_resource *r;
bool ret = false;
if (rdt_alloc_capable)
@@ -849,14 +791,16 @@ static __init bool get_rdt_alloc_resources(void)
return false;
if (rdt_cpu_has(X86_FEATURE_CAT_L3)) {
- rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]);
+ r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ rdt_get_cache_alloc_cfg(1, r);
if (rdt_cpu_has(X86_FEATURE_CDP_L3))
rdt_get_cdp_l3_config();
ret = true;
}
if (rdt_cpu_has(X86_FEATURE_CAT_L2)) {
/* CPUID 0x10.2 fields are same format at 0x10.1 */
- rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]);
+ r = &rdt_resources_all[RDT_RESOURCE_L2].r_resctrl;
+ rdt_get_cache_alloc_cfg(2, r);
if (rdt_cpu_has(X86_FEATURE_CDP_L2))
rdt_get_cdp_l2_config();
ret = true;
@@ -870,6 +814,8 @@ static __init bool get_rdt_alloc_resources(void)
static __init bool get_rdt_mon_resources(void)
{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+
if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL))
@@ -880,7 +826,7 @@ static __init bool get_rdt_mon_resources(void)
if (!rdt_mon_features)
return false;
- return !rdt_get_mon_l3_config(&rdt_resources_all[RDT_RESOURCE_L3]);
+ return !rdt_get_mon_l3_config(r);
}
static __init void __check_quirks_intel(void)
@@ -918,42 +864,40 @@ static __init bool get_rdt_resources(void)
static __init void rdt_init_res_defs_intel(void)
{
+ struct rdt_hw_resource *hw_res;
struct rdt_resource *r;
for_each_rdt_resource(r) {
+ hw_res = resctrl_to_arch_res(r);
+
if (r->rid == RDT_RESOURCE_L3 ||
- r->rid == RDT_RESOURCE_L3DATA ||
- r->rid == RDT_RESOURCE_L3CODE ||
- r->rid == RDT_RESOURCE_L2 ||
- r->rid == RDT_RESOURCE_L2DATA ||
- r->rid == RDT_RESOURCE_L2CODE) {
+ r->rid == RDT_RESOURCE_L2) {
r->cache.arch_has_sparse_bitmaps = false;
r->cache.arch_has_empty_bitmaps = false;
r->cache.arch_has_per_cpu_cfg = false;
} else if (r->rid == RDT_RESOURCE_MBA) {
- r->msr_base = MSR_IA32_MBA_THRTL_BASE;
- r->msr_update = mba_wrmsr_intel;
+ hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE;
+ hw_res->msr_update = mba_wrmsr_intel;
}
}
}
static __init void rdt_init_res_defs_amd(void)
{
+ struct rdt_hw_resource *hw_res;
struct rdt_resource *r;
for_each_rdt_resource(r) {
+ hw_res = resctrl_to_arch_res(r);
+
if (r->rid == RDT_RESOURCE_L3 ||
- r->rid == RDT_RESOURCE_L3DATA ||
- r->rid == RDT_RESOURCE_L3CODE ||
- r->rid == RDT_RESOURCE_L2 ||
- r->rid == RDT_RESOURCE_L2DATA ||
- r->rid == RDT_RESOURCE_L2CODE) {
+ r->rid == RDT_RESOURCE_L2) {
r->cache.arch_has_sparse_bitmaps = true;
r->cache.arch_has_empty_bitmaps = true;
r->cache.arch_has_per_cpu_cfg = true;
} else if (r->rid == RDT_RESOURCE_MBA) {
- r->msr_base = MSR_IA32_MBA_BW_BASE;
- r->msr_update = mba_wrmsr_amd;
+ hw_res->msr_base = MSR_IA32_MBA_BW_BASE;
+ hw_res->msr_update = mba_wrmsr_amd;
}
}
}
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index c877642e8a14..87666275eed9 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -57,20 +57,23 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return true;
}
-int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
+int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
struct rdt_domain *d)
{
+ struct resctrl_staged_config *cfg;
+ struct rdt_resource *r = s->res;
unsigned long bw_val;
- if (d->have_new_ctrl) {
+ cfg = &d->staged_config[s->conf_type];
+ if (cfg->have_new_ctrl) {
rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
return -EINVAL;
}
if (!bw_validate(data->buf, &bw_val, r))
return -EINVAL;
- d->new_ctrl = bw_val;
- d->have_new_ctrl = true;
+ cfg->new_ctrl = bw_val;
+ cfg->have_new_ctrl = true;
return 0;
}
@@ -125,13 +128,16 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
* Read one cache bit mask (hex). Check that it is valid for the current
* resource type.
*/
-int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
+int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
struct rdt_domain *d)
{
struct rdtgroup *rdtgrp = data->rdtgrp;
+ struct resctrl_staged_config *cfg;
+ struct rdt_resource *r = s->res;
u32 cbm_val;
- if (d->have_new_ctrl) {
+ cfg = &d->staged_config[s->conf_type];
+ if (cfg->have_new_ctrl) {
rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
return -EINVAL;
}
@@ -160,12 +166,12 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
* The CBM may not overlap with the CBM of another closid if
* either is exclusive.
*/
- if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, true)) {
+ if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, true)) {
rdt_last_cmd_puts("Overlaps with exclusive group\n");
return -EINVAL;
}
- if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, false)) {
+ if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, false)) {
if (rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
rdt_last_cmd_puts("Overlaps with other group\n");
@@ -173,8 +179,8 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
}
}
- d->new_ctrl = cbm_val;
- d->have_new_ctrl = true;
+ cfg->new_ctrl = cbm_val;
+ cfg->have_new_ctrl = true;
return 0;
}
@@ -185,9 +191,12 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
* separated by ";". The "id" is in decimal, and must match one of
* the "id"s for this resource.
*/
-static int parse_line(char *line, struct rdt_resource *r,
+static int parse_line(char *line, struct resctrl_schema *s,
struct rdtgroup *rdtgrp)
{
+ enum resctrl_conf_type t = s->conf_type;
+ struct resctrl_staged_config *cfg;
+ struct rdt_resource *r = s->res;
struct rdt_parse_data data;
char *dom = NULL, *id;
struct rdt_domain *d;
@@ -213,9 +222,10 @@ next:
if (d->id == dom_id) {
data.buf = dom;
data.rdtgrp = rdtgrp;
- if (r->parse_ctrlval(&data, r, d))
+ if (r->parse_ctrlval(&data, s, d))
return -EINVAL;
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+ cfg = &d->staged_config[t];
/*
* In pseudo-locking setup mode and just
* parsed a valid CBM that should be
@@ -224,9 +234,9 @@ next:
* the required initialization for single
* region and return.
*/
- rdtgrp->plr->r = r;
+ rdtgrp->plr->s = s;
rdtgrp->plr->d = d;
- rdtgrp->plr->cbm = d->new_ctrl;
+ rdtgrp->plr->cbm = cfg->new_ctrl;
d->plr = rdtgrp->plr;
return 0;
}
@@ -236,28 +246,72 @@ next:
return -EINVAL;
}
-int update_domains(struct rdt_resource *r, int closid)
+static u32 get_config_index(u32 closid, enum resctrl_conf_type type)
{
+ switch (type) {
+ default:
+ case CDP_NONE:
+ return closid;
+ case CDP_CODE:
+ return closid * 2 + 1;
+ case CDP_DATA:
+ return closid * 2;
+ }
+}
+
+static bool apply_config(struct rdt_hw_domain *hw_dom,
+ struct resctrl_staged_config *cfg, u32 idx,
+ cpumask_var_t cpu_mask, bool mba_sc)
+{
+ struct rdt_domain *dom = &hw_dom->d_resctrl;
+ u32 *dc = !mba_sc ? hw_dom->ctrl_val : hw_dom->mbps_val;
+
+ if (cfg->new_ctrl != dc[idx]) {
+ cpumask_set_cpu(cpumask_any(&dom->cpu_mask), cpu_mask);
+ dc[idx] = cfg->new_ctrl;
+
+ return true;
+ }
+
+ return false;
+}
+
+int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
+{
+ struct resctrl_staged_config *cfg;
+ struct rdt_hw_domain *hw_dom;
struct msr_param msr_param;
+ enum resctrl_conf_type t;
cpumask_var_t cpu_mask;
struct rdt_domain *d;
bool mba_sc;
- u32 *dc;
int cpu;
+ u32 idx;
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
return -ENOMEM;
- msr_param.low = closid;
- msr_param.high = msr_param.low + 1;
- msr_param.res = r;
-
mba_sc = is_mba_sc(r);
+ msr_param.res = NULL;
list_for_each_entry(d, &r->domains, list) {
- dc = !mba_sc ? d->ctrl_val : d->mbps_val;
- if (d->have_new_ctrl && d->new_ctrl != dc[closid]) {
- cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
- dc[closid] = d->new_ctrl;
+ hw_dom = resctrl_to_arch_dom(d);
+ for (t = 0; t < CDP_NUM_TYPES; t++) {
+ cfg = &hw_dom->d_resctrl.staged_config[t];
+ if (!cfg->have_new_ctrl)
+ continue;
+
+ idx = get_config_index(closid, t);
+ if (!apply_config(hw_dom, cfg, idx, cpu_mask, mba_sc))
+ continue;
+
+ if (!msr_param.res) {
+ msr_param.low = idx;
+ msr_param.high = msr_param.low + 1;
+ msr_param.res = r;
+ } else {
+ msr_param.low = min(msr_param.low, idx);
+ msr_param.high = max(msr_param.high, idx + 1);
+ }
}
}
@@ -284,11 +338,11 @@ done:
static int rdtgroup_parse_resource(char *resname, char *tok,
struct rdtgroup *rdtgrp)
{
- struct rdt_resource *r;
+ struct resctrl_schema *s;
- for_each_alloc_enabled_rdt_resource(r) {
- if (!strcmp(resname, r->name) && rdtgrp->closid < r->num_closid)
- return parse_line(tok, r, rdtgrp);
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ if (!strcmp(resname, s->name) && rdtgrp->closid < s->num_closid)
+ return parse_line(tok, s, rdtgrp);
}
rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname);
return -EINVAL;
@@ -297,6 +351,7 @@ static int rdtgroup_parse_resource(char *resname, char *tok,
ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
+ struct resctrl_schema *s;
struct rdtgroup *rdtgrp;
struct rdt_domain *dom;
struct rdt_resource *r;
@@ -327,9 +382,9 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
goto out;
}
- for_each_alloc_enabled_rdt_resource(r) {
- list_for_each_entry(dom, &r->domains, list)
- dom->have_new_ctrl = false;
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ list_for_each_entry(dom, &s->res->domains, list)
+ memset(dom->staged_config, 0, sizeof(dom->staged_config));
}
while ((tok = strsep(&buf, "\n")) != NULL) {
@@ -349,8 +404,9 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
goto out;
}
- for_each_alloc_enabled_rdt_resource(r) {
- ret = update_domains(r, rdtgrp->closid);
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ r = s->res;
+ ret = resctrl_arch_update_domains(r, rdtgrp->closid);
if (ret)
goto out;
}
@@ -371,19 +427,31 @@ out:
return ret ?: nbytes;
}
-static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
+u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
+ u32 closid, enum resctrl_conf_type type)
+{
+ struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ u32 idx = get_config_index(closid, type);
+
+ if (!is_mba_sc(r))
+ return hw_dom->ctrl_val[idx];
+ return hw_dom->mbps_val[idx];
+}
+
+static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid)
{
+ struct rdt_resource *r = schema->res;
struct rdt_domain *dom;
bool sep = false;
u32 ctrl_val;
- seq_printf(s, "%*s:", max_name_width, r->name);
+ seq_printf(s, "%*s:", max_name_width, schema->name);
list_for_each_entry(dom, &r->domains, list) {
if (sep)
seq_puts(s, ";");
- ctrl_val = (!is_mba_sc(r) ? dom->ctrl_val[closid] :
- dom->mbps_val[closid]);
+ ctrl_val = resctrl_arch_get_config(r, dom, closid,
+ schema->conf_type);
seq_printf(s, r->format_str, dom->id, max_data_width,
ctrl_val);
sep = true;
@@ -394,16 +462,17 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
int rdtgroup_schemata_show(struct kernfs_open_file *of,
struct seq_file *s, void *v)
{
+ struct resctrl_schema *schema;
struct rdtgroup *rdtgrp;
- struct rdt_resource *r;
int ret = 0;
u32 closid;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (rdtgrp) {
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
- for_each_alloc_enabled_rdt_resource(r)
- seq_printf(s, "%s:uninitialized\n", r->name);
+ list_for_each_entry(schema, &resctrl_schema_all, list) {
+ seq_printf(s, "%s:uninitialized\n", schema->name);
+ }
} else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
if (!rdtgrp->plr->d) {
rdt_last_cmd_clear();
@@ -411,15 +480,15 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
ret = -ENODEV;
} else {
seq_printf(s, "%s:%d=%x\n",
- rdtgrp->plr->r->name,
+ rdtgrp->plr->s->res->name,
rdtgrp->plr->d->id,
rdtgrp->plr->cbm);
}
} else {
closid = rdtgrp->closid;
- for_each_alloc_enabled_rdt_resource(r) {
- if (closid < r->num_closid)
- show_doms(s, r, closid);
+ list_for_each_entry(schema, &resctrl_schema_all, list) {
+ if (closid < schema->num_closid)
+ show_doms(s, schema, closid);
}
}
} else {
@@ -449,6 +518,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
int rdtgroup_mondata_show(struct seq_file *m, void *arg)
{
struct kernfs_open_file *of = m->private;
+ struct rdt_hw_resource *hw_res;
u32 resid, evtid, domid;
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
@@ -468,7 +538,8 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
domid = md.u.domid;
evtid = md.u.evtid;
- r = &rdt_resources_all[resid];
+ hw_res = &rdt_resources_all[resid];
+ r = &hw_res->r_resctrl;
d = rdt_find_domain(r, domid, NULL);
if (IS_ERR_OR_NULL(d)) {
ret = -ENOENT;
@@ -482,7 +553,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
else if (rr.val & RMID_VAL_UNAVAIL)
seq_puts(m, "Unavailable\n");
else
- seq_printf(m, "%llu\n", rr.val * r->mon_scale);
+ seq_printf(m, "%llu\n", rr.val * hw_res->mon_scale);
out:
rdtgroup_kn_unlock(of->kn);
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 6a5f60a37219..1d647188a43b 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -2,6 +2,7 @@
#ifndef _ASM_X86_RESCTRL_INTERNAL_H
#define _ASM_X86_RESCTRL_INTERNAL_H
+#include <linux/resctrl.h>
#include <linux/sched.h>
#include <linux/kernfs.h>
#include <linux/fs_context.h>
@@ -109,6 +110,7 @@ extern unsigned int resctrl_cqm_threshold;
extern bool rdt_alloc_capable;
extern bool rdt_mon_capable;
extern unsigned int rdt_mon_features;
+extern struct list_head resctrl_schema_all;
enum rdt_group_type {
RDTCTRL_GROUP = 0,
@@ -161,8 +163,8 @@ struct mongroup {
/**
* struct pseudo_lock_region - pseudo-lock region information
- * @r: RDT resource to which this pseudo-locked region
- * belongs
+ * @s: Resctrl schema for the resource to which this
+ * pseudo-locked region belongs
* @d: RDT domain to which this pseudo-locked region
* belongs
* @cbm: bitmask of the pseudo-locked region
@@ -182,7 +184,7 @@ struct mongroup {
* @pm_reqs: Power management QoS requests related to this region
*/
struct pseudo_lock_region {
- struct rdt_resource *r;
+ struct resctrl_schema *s;
struct rdt_domain *d;
u32 cbm;
wait_queue_head_t lock_thread_wq;
@@ -303,44 +305,25 @@ struct mbm_state {
};
/**
- * struct rdt_domain - group of cpus sharing an RDT resource
- * @list: all instances of this resource
- * @id: unique id for this instance
- * @cpu_mask: which cpus share this resource
- * @rmid_busy_llc:
- * bitmap of which limbo RMIDs are above threshold
- * @mbm_total: saved state for MBM total bandwidth
- * @mbm_local: saved state for MBM local bandwidth
- * @mbm_over: worker to periodically read MBM h/w counters
- * @cqm_limbo: worker to periodically read CQM h/w counters
- * @mbm_work_cpu:
- * worker cpu for MBM h/w counters
- * @cqm_work_cpu:
- * worker cpu for CQM h/w counters
+ * struct rdt_hw_domain - Arch private attributes of a set of CPUs that share
+ * a resource
+ * @d_resctrl: Properties exposed to the resctrl file system
* @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
* @mbps_val: When mba_sc is enabled, this holds the bandwidth in MBps
- * @new_ctrl: new ctrl value to be loaded
- * @have_new_ctrl: did user provide new_ctrl for this domain
- * @plr: pseudo-locked region (if any) associated with domain
+ *
+ * Members of this structure are accessed via helpers that provide abstraction.
*/
-struct rdt_domain {
- struct list_head list;
- int id;
- struct cpumask cpu_mask;
- unsigned long *rmid_busy_llc;
- struct mbm_state *mbm_total;
- struct mbm_state *mbm_local;
- struct delayed_work mbm_over;
- struct delayed_work cqm_limbo;
- int mbm_work_cpu;
- int cqm_work_cpu;
+struct rdt_hw_domain {
+ struct rdt_domain d_resctrl;
u32 *ctrl_val;
u32 *mbps_val;
- u32 new_ctrl;
- bool have_new_ctrl;
- struct pseudo_lock_region *plr;
};
+static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r)
+{
+ return container_of(r, struct rdt_hw_domain, d_resctrl);
+}
+
/**
* struct msr_param - set a range of MSRs from a domain
* @res: The resource to use
@@ -349,69 +332,8 @@ struct rdt_domain {
*/
struct msr_param {
struct rdt_resource *res;
- int low;
- int high;
-};
-
-/**
- * struct rdt_cache - Cache allocation related data
- * @cbm_len: Length of the cache bit mask
- * @min_cbm_bits: Minimum number of consecutive bits to be set
- * @cbm_idx_mult: Multiplier of CBM index
- * @cbm_idx_offset: Offset of CBM index. CBM index is computed by:
- * closid * cbm_idx_multi + cbm_idx_offset
- * in a cache bit mask
- * @shareable_bits: Bitmask of shareable resource with other
- * executing entities
- * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid.
- * @arch_has_empty_bitmaps: True if the '0' bitmap is valid.
- * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache
- * level has CPU scope.
- */
-struct rdt_cache {
- unsigned int cbm_len;
- unsigned int min_cbm_bits;
- unsigned int cbm_idx_mult;
- unsigned int cbm_idx_offset;
- unsigned int shareable_bits;
- bool arch_has_sparse_bitmaps;
- bool arch_has_empty_bitmaps;
- bool arch_has_per_cpu_cfg;
-};
-
-/**
- * enum membw_throttle_mode - System's memory bandwidth throttling mode
- * @THREAD_THROTTLE_UNDEFINED: Not relevant to the system
- * @THREAD_THROTTLE_MAX: Memory bandwidth is throttled at the core
- * always using smallest bandwidth percentage
- * assigned to threads, aka "max throttling"
- * @THREAD_THROTTLE_PER_THREAD: Memory bandwidth is throttled at the thread
- */
-enum membw_throttle_mode {
- THREAD_THROTTLE_UNDEFINED = 0,
- THREAD_THROTTLE_MAX,
- THREAD_THROTTLE_PER_THREAD,
-};
-
-/**
- * struct rdt_membw - Memory bandwidth allocation related data
- * @min_bw: Minimum memory bandwidth percentage user can request
- * @bw_gran: Granularity at which the memory bandwidth is allocated
- * @delay_linear: True if memory B/W delay is in linear scale
- * @arch_needs_linear: True if we can't configure non-linear resources
- * @throttle_mode: Bandwidth throttling mode when threads request
- * different memory bandwidths
- * @mba_sc: True if MBA software controller(mba_sc) is enabled
- * @mb_map: Mapping of memory B/W percentage to memory B/W delay
- */
-struct rdt_membw {
- u32 min_bw;
- u32 bw_gran;
- u32 delay_linear;
- bool arch_needs_linear;
- enum membw_throttle_mode throttle_mode;
- bool mba_sc;
- u32 *mb_map;
+ u32 low;
+ u32 high;
};
static inline bool is_llc_occupancy_enabled(void)
@@ -446,111 +368,103 @@ struct rdt_parse_data {
};
/**
- * struct rdt_resource - attributes of an RDT resource
- * @rid: The index of the resource
- * @alloc_enabled: Is allocation enabled on this machine
- * @mon_enabled: Is monitoring enabled for this feature
- * @alloc_capable: Is allocation available on this machine
- * @mon_capable: Is monitor feature available on this machine
- * @name: Name to use in "schemata" file
- * @num_closid: Number of CLOSIDs available
- * @cache_level: Which cache level defines scope of this resource
- * @default_ctrl: Specifies default cache cbm or memory B/W percent.
+ * struct rdt_hw_resource - arch private attributes of a resctrl resource
+ * @r_resctrl: Attributes of the resource used directly by resctrl.
+ * @num_closid: Maximum number of closid this hardware can support,
+ * regardless of CDP. This is exposed via
+ * resctrl_arch_get_num_closid() to avoid confusion
+ * with struct resctrl_schema's property of the same name,
+ * which has been corrected for features like CDP.
* @msr_base: Base MSR address for CBMs
* @msr_update: Function pointer to update QOS MSRs
- * @data_width: Character width of data when displaying
- * @domains: All domains for this resource
- * @cache: Cache allocation related data
- * @membw: If the component has bandwidth controls, their properties.
- * @format_str: Per resource format string to show domain value
- * @parse_ctrlval: Per resource function pointer to parse control values
- * @evt_list: List of monitoring events
- * @num_rmid: Number of RMIDs available
* @mon_scale: cqm counter * mon_scale = occupancy in bytes
* @mbm_width: Monitor width, to detect and correct for overflow.
- * @fflags: flags to choose base and info files
+ * @cdp_enabled: CDP state of this resource
+ *
+ * Members of this structure are either private to the architecture
+ * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g.
+ * msr_update and msr_base.
*/
-struct rdt_resource {
- int rid;
- bool alloc_enabled;
- bool mon_enabled;
- bool alloc_capable;
- bool mon_capable;
- char *name;
- int num_closid;
- int cache_level;
- u32 default_ctrl;
+struct rdt_hw_resource {
+ struct rdt_resource r_resctrl;
+ u32 num_closid;
unsigned int msr_base;
void (*msr_update) (struct rdt_domain *d, struct msr_param *m,
struct rdt_resource *r);
- int data_width;
- struct list_head domains;
- struct rdt_cache cache;
- struct rdt_membw membw;
- const char *format_str;
- int (*parse_ctrlval)(struct rdt_parse_data *data,
- struct rdt_resource *r,
- struct rdt_domain *d);
- struct list_head evt_list;
- int num_rmid;
unsigned int mon_scale;
unsigned int mbm_width;
- unsigned long fflags;
+ bool cdp_enabled;
};
-int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
+static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r)
+{
+ return container_of(r, struct rdt_hw_resource, r_resctrl);
+}
+
+int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
struct rdt_domain *d);
-int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
+int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
struct rdt_domain *d);
extern struct mutex rdtgroup_mutex;
-extern struct rdt_resource rdt_resources_all[];
+extern struct rdt_hw_resource rdt_resources_all[];
extern struct rdtgroup rdtgroup_default;
DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
extern struct dentry *debugfs_resctrl;
-enum {
+enum resctrl_res_level {
RDT_RESOURCE_L3,
- RDT_RESOURCE_L3DATA,
- RDT_RESOURCE_L3CODE,
RDT_RESOURCE_L2,
- RDT_RESOURCE_L2DATA,
- RDT_RESOURCE_L2CODE,
RDT_RESOURCE_MBA,
/* Must be the last */
RDT_NUM_RESOURCES,
};
+static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res)
+{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(res);
+
+ hw_res++;
+ return &hw_res->r_resctrl;
+}
+
+static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l)
+{
+ return rdt_resources_all[l].cdp_enabled;
+}
+
+int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
+
+/*
+ * To return the common struct rdt_resource, which is contained in struct
+ * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource.
+ */
#define for_each_rdt_resource(r) \
- for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
- r++)
+ for (r = &rdt_resources_all[0].r_resctrl; \
+ r <= &rdt_resources_all[RDT_NUM_RESOURCES - 1].r_resctrl; \
+ r = resctrl_inc(r))
#define for_each_capable_rdt_resource(r) \
- for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
- r++) \
+ for_each_rdt_resource(r) \
if (r->alloc_capable || r->mon_capable)
#define for_each_alloc_capable_rdt_resource(r) \
- for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
- r++) \
+ for_each_rdt_resource(r) \
if (r->alloc_capable)
#define for_each_mon_capable_rdt_resource(r) \
- for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
- r++) \
+ for_each_rdt_resource(r) \
if (r->mon_capable)
#define for_each_alloc_enabled_rdt_resource(r) \
- for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
- r++) \
+ for_each_rdt_resource(r) \
if (r->alloc_enabled)
#define for_each_mon_enabled_rdt_resource(r) \
- for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
- r++) \
+ for_each_rdt_resource(r) \
if (r->mon_enabled)
/* CPUID.(EAX=10H, ECX=ResID=1).EAX */
@@ -594,7 +508,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off);
int rdtgroup_schemata_show(struct kernfs_open_file *of,
struct seq_file *s, void *v);
-bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
+bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
unsigned long cbm, int closid, bool exclusive);
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
unsigned long cbm);
@@ -609,7 +523,6 @@ void rdt_pseudo_lock_release(void);
int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
-int update_domains(struct rdt_resource *r, int closid);
int closids_supported(void);
void closid_free(int closid);
int alloc_rmid(void);
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index f07c10b87a87..c9f0f3d63f75 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -174,7 +174,7 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
struct rdt_resource *r;
u32 crmid = 1, nrmid;
- r = &rdt_resources_all[RDT_RESOURCE_L3];
+ r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
/*
* Skip RMID 0 and start from RMID 1 and check all the RMIDs that
@@ -232,7 +232,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
int cpu;
u64 val;
- r = &rdt_resources_all[RDT_RESOURCE_L3];
+ r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
entry->busy = 0;
cpu = get_cpu();
@@ -285,15 +285,15 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
return chunks >>= shift;
}
-static int __mon_event_count(u32 rmid, struct rmid_read *rr)
+static u64 __mon_event_count(u32 rmid, struct rmid_read *rr)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r);
struct mbm_state *m;
u64 chunks, tval;
tval = __rmid_read(rmid, rr->evtid);
if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
- rr->val = tval;
- return -EINVAL;
+ return tval;
}
switch (rr->evtid) {
case QOS_L3_OCCUP_EVENT_ID:
@@ -307,10 +307,10 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
break;
default:
/*
- * Code would never reach here because
- * an invalid event id would fail the __rmid_read.
+ * Code would never reach here because an invalid
+ * event id would fail the __rmid_read.
*/
- return -EINVAL;
+ return RMID_VAL_ERROR;
}
if (rr->first) {
@@ -319,7 +319,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
return 0;
}
- chunks = mbm_overflow_count(m->prev_msr, tval, rr->r->mbm_width);
+ chunks = mbm_overflow_count(m->prev_msr, tval, hw_res->mbm_width);
m->chunks += chunks;
m->prev_msr = tval;
@@ -334,7 +334,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
*/
static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
{
- struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r);
struct mbm_state *m = &rr->d->mbm_local[rmid];
u64 tval, cur_bw, chunks;
@@ -342,8 +342,8 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
return;
- chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
- cur_bw = (get_corrected_mbm_count(rmid, chunks) * r->mon_scale) >> 20;
+ chunks = mbm_overflow_count(m->prev_bw_msr, tval, hw_res->mbm_width);
+ cur_bw = (get_corrected_mbm_count(rmid, chunks) * hw_res->mon_scale) >> 20;
if (m->delta_comp)
m->delta_bw = abs(cur_bw - m->prev_bw);
@@ -361,23 +361,29 @@ void mon_event_count(void *info)
struct rdtgroup *rdtgrp, *entry;
struct rmid_read *rr = info;
struct list_head *head;
+ u64 ret_val;
rdtgrp = rr->rgrp;
- if (__mon_event_count(rdtgrp->mon.rmid, rr))
- return;
+ ret_val = __mon_event_count(rdtgrp->mon.rmid, rr);
/*
- * For Ctrl groups read data from child monitor groups.
+ * For Ctrl groups read data from child monitor groups and
+ * add them together. Count events which are read successfully.
+ * Discard the rmid_read's reporting errors.
*/
head = &rdtgrp->mon.crdtgrp_list;
if (rdtgrp->type == RDTCTRL_GROUP) {
list_for_each_entry(entry, head, mon.crdtgrp_list) {
- if (__mon_event_count(entry->mon.rmid, rr))
- return;
+ if (__mon_event_count(entry->mon.rmid, rr) == 0)
+ ret_val = 0;
}
}
+
+ /* Report error if none of rmid_reads are successful */
+ if (ret_val)
+ rr->val = ret_val;
}
/*
@@ -416,6 +422,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
{
u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
+ struct rdt_hw_resource *hw_r_mba;
+ struct rdt_hw_domain *hw_dom_mba;
u32 cur_bw, delta_bw, user_bw;
struct rdt_resource *r_mba;
struct rdt_domain *dom_mba;
@@ -425,7 +433,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
if (!is_mbm_local_enabled())
return;
- r_mba = &rdt_resources_all[RDT_RESOURCE_MBA];
+ hw_r_mba = &rdt_resources_all[RDT_RESOURCE_MBA];
+ r_mba = &hw_r_mba->r_resctrl;
closid = rgrp->closid;
rmid = rgrp->mon.rmid;
pmbm_data = &dom_mbm->mbm_local[rmid];
@@ -435,11 +444,16 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
pr_warn_once("Failure to get domain for MBA update\n");
return;
}
+ hw_dom_mba = resctrl_to_arch_dom(dom_mba);
cur_bw = pmbm_data->prev_bw;
- user_bw = dom_mba->mbps_val[closid];
+ user_bw = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
delta_bw = pmbm_data->delta_bw;
- cur_msr_val = dom_mba->ctrl_val[closid];
+ /*
+ * resctrl_arch_get_config() chooses the mbps/ctrl value to return
+ * based on is_mba_sc(). For now, reach into the hw_dom.
+ */
+ cur_msr_val = hw_dom_mba->ctrl_val[closid];
/*
* For Ctrl groups read data from child monitor groups.
@@ -474,9 +488,9 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
return;
}
- cur_msr = r_mba->msr_base + closid;
+ cur_msr = hw_r_mba->msr_base + closid;
wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba));
- dom_mba->ctrl_val[closid] = new_msr_val;
+ hw_dom_mba->ctrl_val[closid] = new_msr_val;
/*
* Delta values are updated dynamically package wise for each
@@ -538,7 +552,7 @@ void cqm_handle_limbo(struct work_struct *work)
mutex_lock(&rdtgroup_mutex);
- r = &rdt_resources_all[RDT_RESOURCE_L3];
+ r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
d = container_of(work, struct rdt_domain, cqm_limbo.work);
__check_limbo(d, false);
@@ -574,7 +588,7 @@ void mbm_handle_overflow(struct work_struct *work)
if (!static_branch_likely(&rdt_mon_enable_key))
goto out_unlock;
- r = &rdt_resources_all[RDT_RESOURCE_L3];
+ r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
d = container_of(work, struct rdt_domain, mbm_over.work);
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
@@ -671,15 +685,16 @@ static void l3_mon_evt_init(struct rdt_resource *r)
int rdt_get_mon_l3_config(struct rdt_resource *r)
{
unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
unsigned int cl_size = boot_cpu_data.x86_cache_size;
int ret;
- r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
+ hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale;
r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
- r->mbm_width = MBM_CNTR_WIDTH_BASE;
+ hw_res->mbm_width = MBM_CNTR_WIDTH_BASE;
if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
- r->mbm_width += mbm_offset;
+ hw_res->mbm_width += mbm_offset;
else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX)
pr_warn("Ignoring impossible MBM counter offset\n");
@@ -693,7 +708,7 @@ int rdt_get_mon_l3_config(struct rdt_resource *r)
resctrl_cqm_threshold = cl_size * 1024 / r->num_rmid;
/* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */
- resctrl_cqm_threshold /= r->mon_scale;
+ resctrl_cqm_threshold /= hw_res->mon_scale;
ret = dom_data_init(r);
if (ret)
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 2207916cae65..db813f819ad6 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -250,7 +250,7 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
plr->line_size = 0;
kfree(plr->kmem);
plr->kmem = NULL;
- plr->r = NULL;
+ plr->s = NULL;
if (plr->d)
plr->d->plr = NULL;
plr->d = NULL;
@@ -294,10 +294,10 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
ci = get_cpu_cacheinfo(plr->cpu);
- plr->size = rdtgroup_cbm_to_size(plr->r, plr->d, plr->cbm);
+ plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
for (i = 0; i < ci->num_leaves; i++) {
- if (ci->info_list[i].level == plr->r->cache_level) {
+ if (ci->info_list[i].level == plr->s->res->cache_level) {
plr->line_size = ci->info_list[i].coherency_line_size;
return 0;
}
@@ -688,8 +688,8 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
* resource, the portion of cache used by it should be made
* unavailable to all future allocations from both resources.
*/
- if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled ||
- rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) {
+ if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3) ||
+ resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) {
rdt_last_cmd_puts("CDP enabled\n");
return -EINVAL;
}
@@ -800,7 +800,7 @@ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm
unsigned long cbm_b;
if (d->plr) {
- cbm_len = d->plr->r->cache.cbm_len;
+ cbm_len = d->plr->s->res->cache.cbm_len;
cbm_b = d->plr->cbm;
if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
return true;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 01fd30e7829d..b57b3db9a6a7 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -39,6 +39,9 @@ static struct kernfs_root *rdt_root;
struct rdtgroup rdtgroup_default;
LIST_HEAD(rdt_all_groups);
+/* list of entries for the schemata file */
+LIST_HEAD(resctrl_schema_all);
+
/* Kernel fs node for "info" directory under root */
static struct kernfs_node *kn_info;
@@ -100,12 +103,12 @@ int closids_supported(void)
static void closid_init(void)
{
- struct rdt_resource *r;
- int rdt_min_closid = 32;
+ struct resctrl_schema *s;
+ u32 rdt_min_closid = 32;
/* Compute rdt_min_closid across all resources */
- for_each_alloc_enabled_rdt_resource(r)
- rdt_min_closid = min(rdt_min_closid, r->num_closid);
+ list_for_each_entry(s, &resctrl_schema_all, list)
+ rdt_min_closid = min(rdt_min_closid, s->num_closid);
closid_free_map = BIT_MASK(rdt_min_closid) - 1;
@@ -842,16 +845,17 @@ static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
static int rdt_num_closids_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
- seq_printf(seq, "%d\n", r->num_closid);
+ seq_printf(seq, "%u\n", s->num_closid);
return 0;
}
static int rdt_default_ctrl_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
+ struct rdt_resource *r = s->res;
seq_printf(seq, "%x\n", r->default_ctrl);
return 0;
@@ -860,7 +864,8 @@ static int rdt_default_ctrl_show(struct kernfs_open_file *of,
static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
+ struct rdt_resource *r = s->res;
seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
return 0;
@@ -869,7 +874,8 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
static int rdt_shareable_bits_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
+ struct rdt_resource *r = s->res;
seq_printf(seq, "%x\n", r->cache.shareable_bits);
return 0;
@@ -892,38 +898,40 @@ static int rdt_shareable_bits_show(struct kernfs_open_file *of,
static int rdt_bit_usage_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
/*
* Use unsigned long even though only 32 bits are used to ensure
* test_bit() is used safely.
*/
unsigned long sw_shareable = 0, hw_shareable = 0;
unsigned long exclusive = 0, pseudo_locked = 0;
+ struct rdt_resource *r = s->res;
struct rdt_domain *dom;
int i, hwb, swb, excl, psl;
enum rdtgrp_mode mode;
bool sep = false;
- u32 *ctrl;
+ u32 ctrl_val;
mutex_lock(&rdtgroup_mutex);
hw_shareable = r->cache.shareable_bits;
list_for_each_entry(dom, &r->domains, list) {
if (sep)
seq_putc(seq, ';');
- ctrl = dom->ctrl_val;
sw_shareable = 0;
exclusive = 0;
seq_printf(seq, "%d=", dom->id);
- for (i = 0; i < closids_supported(); i++, ctrl++) {
+ for (i = 0; i < closids_supported(); i++) {
if (!closid_allocated(i))
continue;
+ ctrl_val = resctrl_arch_get_config(r, dom, i,
+ s->conf_type);
mode = rdtgroup_mode_by_closid(i);
switch (mode) {
case RDT_MODE_SHAREABLE:
- sw_shareable |= *ctrl;
+ sw_shareable |= ctrl_val;
break;
case RDT_MODE_EXCLUSIVE:
- exclusive |= *ctrl;
+ exclusive |= ctrl_val;
break;
case RDT_MODE_PSEUDO_LOCKSETUP:
/*
@@ -970,7 +978,8 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
static int rdt_min_bw_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
+ struct rdt_resource *r = s->res;
seq_printf(seq, "%u\n", r->membw.min_bw);
return 0;
@@ -1001,7 +1010,8 @@ static int rdt_mon_features_show(struct kernfs_open_file *of,
static int rdt_bw_gran_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
+ struct rdt_resource *r = s->res;
seq_printf(seq, "%u\n", r->membw.bw_gran);
return 0;
@@ -1010,7 +1020,8 @@ static int rdt_bw_gran_show(struct kernfs_open_file *of,
static int rdt_delay_linear_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
+ struct rdt_resource *r = s->res;
seq_printf(seq, "%u\n", r->membw.delay_linear);
return 0;
@@ -1020,8 +1031,9 @@ static int max_threshold_occ_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
struct rdt_resource *r = of->kn->parent->priv;
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- seq_printf(seq, "%u\n", resctrl_cqm_threshold * r->mon_scale);
+ seq_printf(seq, "%u\n", resctrl_cqm_threshold * hw_res->mon_scale);
return 0;
}
@@ -1029,7 +1041,8 @@ static int max_threshold_occ_show(struct kernfs_open_file *of,
static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
+ struct rdt_resource *r = s->res;
if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
seq_puts(seq, "per-thread\n");
@@ -1042,7 +1055,7 @@ static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct rdt_hw_resource *hw_res;
unsigned int bytes;
int ret;
@@ -1053,7 +1066,8 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
if (bytes > (boot_cpu_data.x86_cache_size * 1024))
return -EINVAL;
- resctrl_cqm_threshold = bytes / r->mon_scale;
+ hw_res = resctrl_to_arch_res(of->kn->parent->priv);
+ resctrl_cqm_threshold = bytes / hw_res->mon_scale;
return nbytes;
}
@@ -1078,76 +1092,17 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
return 0;
}
-/**
- * rdt_cdp_peer_get - Retrieve CDP peer if it exists
- * @r: RDT resource to which RDT domain @d belongs
- * @d: Cache instance for which a CDP peer is requested
- * @r_cdp: RDT resource that shares hardware with @r (RDT resource peer)
- * Used to return the result.
- * @d_cdp: RDT domain that shares hardware with @d (RDT domain peer)
- * Used to return the result.
- *
- * RDT resources are managed independently and by extension the RDT domains
- * (RDT resource instances) are managed independently also. The Code and
- * Data Prioritization (CDP) RDT resources, while managed independently,
- * could refer to the same underlying hardware. For example,
- * RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache.
- *
- * When provided with an RDT resource @r and an instance of that RDT
- * resource @d rdt_cdp_peer_get() will return if there is a peer RDT
- * resource and the exact instance that shares the same hardware.
- *
- * Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists.
- * If a CDP peer was found, @r_cdp will point to the peer RDT resource
- * and @d_cdp will point to the peer RDT domain.
- */
-static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d,
- struct rdt_resource **r_cdp,
- struct rdt_domain **d_cdp)
+static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
{
- struct rdt_resource *_r_cdp = NULL;
- struct rdt_domain *_d_cdp = NULL;
- int ret = 0;
-
- switch (r->rid) {
- case RDT_RESOURCE_L3DATA:
- _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE];
- break;
- case RDT_RESOURCE_L3CODE:
- _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3DATA];
- break;
- case RDT_RESOURCE_L2DATA:
- _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2CODE];
- break;
- case RDT_RESOURCE_L2CODE:
- _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2DATA];
- break;
+ switch (my_type) {
+ case CDP_CODE:
+ return CDP_DATA;
+ case CDP_DATA:
+ return CDP_CODE;
default:
- ret = -ENOENT;
- goto out;
- }
-
- /*
- * When a new CPU comes online and CDP is enabled then the new
- * RDT domains (if any) associated with both CDP RDT resources
- * are added in the same CPU online routine while the
- * rdtgroup_mutex is held. It should thus not happen for one
- * RDT domain to exist and be associated with its RDT CDP
- * resource but there is no RDT domain associated with the
- * peer RDT CDP resource. Hence the WARN.
- */
- _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL);
- if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) {
- _r_cdp = NULL;
- _d_cdp = NULL;
- ret = -EINVAL;
+ case CDP_NONE:
+ return CDP_NONE;
}
-
-out:
- *r_cdp = _r_cdp;
- *d_cdp = _d_cdp;
-
- return ret;
}
/**
@@ -1171,11 +1126,11 @@ out:
* Return: false if CBM does not overlap, true if it does.
*/
static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
- unsigned long cbm, int closid, bool exclusive)
+ unsigned long cbm, int closid,
+ enum resctrl_conf_type type, bool exclusive)
{
enum rdtgrp_mode mode;
unsigned long ctrl_b;
- u32 *ctrl;
int i;
/* Check for any overlap with regions used by hardware directly */
@@ -1186,9 +1141,8 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d
}
/* Check for overlap with other resource groups */
- ctrl = d->ctrl_val;
- for (i = 0; i < closids_supported(); i++, ctrl++) {
- ctrl_b = *ctrl;
+ for (i = 0; i < closids_supported(); i++) {
+ ctrl_b = resctrl_arch_get_config(r, d, i, type);
mode = rdtgroup_mode_by_closid(i);
if (closid_allocated(i) && i != closid &&
mode != RDT_MODE_PSEUDO_LOCKSETUP) {
@@ -1208,7 +1162,7 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d
/**
* rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
- * @r: Resource to which domain instance @d belongs.
+ * @s: Schema for the resource to which domain instance @d belongs.
* @d: The domain instance for which @closid is being tested.
* @cbm: Capacity bitmask being tested.
* @closid: Intended closid for @cbm.
@@ -1226,19 +1180,19 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d
*
* Return: true if CBM overlap detected, false if there is no overlap
*/
-bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
+bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
unsigned long cbm, int closid, bool exclusive)
{
- struct rdt_resource *r_cdp;
- struct rdt_domain *d_cdp;
+ enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
+ struct rdt_resource *r = s->res;
- if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive))
+ if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type,
+ exclusive))
return true;
- if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0)
+ if (!resctrl_arch_get_cdp_enabled(r->rid))
return false;
-
- return __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive);
+ return __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive);
}
/**
@@ -1256,17 +1210,21 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
{
int closid = rdtgrp->closid;
+ struct resctrl_schema *s;
struct rdt_resource *r;
bool has_cache = false;
struct rdt_domain *d;
+ u32 ctrl;
- for_each_alloc_enabled_rdt_resource(r) {
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ r = s->res;
if (r->rid == RDT_RESOURCE_MBA)
continue;
has_cache = true;
list_for_each_entry(d, &r->domains, list) {
- if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
- rdtgrp->closid, false)) {
+ ctrl = resctrl_arch_get_config(r, d, closid,
+ s->conf_type);
+ if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
rdt_last_cmd_puts("Schemata overlaps\n");
return false;
}
@@ -1397,6 +1355,7 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
static int rdtgroup_size_show(struct kernfs_open_file *of,
struct seq_file *s, void *v)
{
+ struct resctrl_schema *schema;
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
struct rdt_domain *d;
@@ -1418,8 +1377,8 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
ret = -ENODEV;
} else {
seq_printf(s, "%*s:", max_name_width,
- rdtgrp->plr->r->name);
- size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
+ rdtgrp->plr->s->name);
+ size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
rdtgrp->plr->d,
rdtgrp->plr->cbm);
seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
@@ -1427,18 +1386,19 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
goto out;
}
- for_each_alloc_enabled_rdt_resource(r) {
+ list_for_each_entry(schema, &resctrl_schema_all, list) {
+ r = schema->res;
sep = false;
- seq_printf(s, "%*s:", max_name_width, r->name);
+ seq_printf(s, "%*s:", max_name_width, schema->name);
list_for_each_entry(d, &r->domains, list) {
if (sep)
seq_putc(s, ';');
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
size = 0;
} else {
- ctrl = (!is_mba_sc(r) ?
- d->ctrl_val[rdtgrp->closid] :
- d->mbps_val[rdtgrp->closid]);
+ ctrl = resctrl_arch_get_config(r, d,
+ rdtgrp->closid,
+ schema->conf_type);
if (r->rid == RDT_RESOURCE_MBA)
size = ctrl;
else
@@ -1757,14 +1717,14 @@ int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
return ret;
}
-static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
+static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
unsigned long fflags)
{
struct kernfs_node *kn_subdir;
int ret;
kn_subdir = kernfs_create_dir(kn_info, name,
- kn_info->mode, r);
+ kn_info->mode, priv);
if (IS_ERR(kn_subdir))
return PTR_ERR(kn_subdir);
@@ -1781,6 +1741,7 @@ static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
{
+ struct resctrl_schema *s;
struct rdt_resource *r;
unsigned long fflags;
char name[32];
@@ -1795,9 +1756,11 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
if (ret)
goto out_destroy;
- for_each_alloc_enabled_rdt_resource(r) {
+ /* loop over enabled controls, these are all alloc_enabled */
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ r = s->res;
fflags = r->fflags | RF_CTRL_INFO;
- ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags);
+ ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
if (ret)
goto out_destroy;
}
@@ -1867,7 +1830,7 @@ static void l2_qos_cfg_update(void *arg)
static inline bool is_mba_linear(void)
{
- return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear;
+ return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear;
}
static int set_cache_qos_cfg(int level, bool enable)
@@ -1888,7 +1851,7 @@ static int set_cache_qos_cfg(int level, bool enable)
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
return -ENOMEM;
- r_l = &rdt_resources_all[level];
+ r_l = &rdt_resources_all[level].r_resctrl;
list_for_each_entry(d, &r_l->domains, list) {
if (r_l->cache.arch_has_per_cpu_cfg)
/* Pick all the CPUs in the domain instance */
@@ -1914,14 +1877,16 @@ static int set_cache_qos_cfg(int level, bool enable)
/* Restore the qos cfg state when a domain comes online */
void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
{
- if (!r->alloc_capable)
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+
+ if (!r->cdp_capable)
return;
- if (r == &rdt_resources_all[RDT_RESOURCE_L2DATA])
- l2_qos_cfg_update(&r->alloc_enabled);
+ if (r->rid == RDT_RESOURCE_L2)
+ l2_qos_cfg_update(&hw_res->cdp_enabled);
- if (r == &rdt_resources_all[RDT_RESOURCE_L3DATA])
- l3_qos_cfg_update(&r->alloc_enabled);
+ if (r->rid == RDT_RESOURCE_L3)
+ l3_qos_cfg_update(&hw_res->cdp_enabled);
}
/*
@@ -1932,7 +1897,8 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
*/
static int set_mba_sc(bool mba_sc)
{
- struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA];
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+ struct rdt_hw_domain *hw_dom;
struct rdt_domain *d;
if (!is_mbm_enabled() || !is_mba_linear() ||
@@ -1940,73 +1906,60 @@ static int set_mba_sc(bool mba_sc)
return -EINVAL;
r->membw.mba_sc = mba_sc;
- list_for_each_entry(d, &r->domains, list)
- setup_default_ctrlval(r, d->ctrl_val, d->mbps_val);
+ list_for_each_entry(d, &r->domains, list) {
+ hw_dom = resctrl_to_arch_dom(d);
+ setup_default_ctrlval(r, hw_dom->ctrl_val, hw_dom->mbps_val);
+ }
return 0;
}
-static int cdp_enable(int level, int data_type, int code_type)
+static int cdp_enable(int level)
{
- struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
- struct rdt_resource *r_lcode = &rdt_resources_all[code_type];
- struct rdt_resource *r_l = &rdt_resources_all[level];
+ struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl;
int ret;
- if (!r_l->alloc_capable || !r_ldata->alloc_capable ||
- !r_lcode->alloc_capable)
+ if (!r_l->alloc_capable)
return -EINVAL;
ret = set_cache_qos_cfg(level, true);
- if (!ret) {
- r_l->alloc_enabled = false;
- r_ldata->alloc_enabled = true;
- r_lcode->alloc_enabled = true;
- }
+ if (!ret)
+ rdt_resources_all[level].cdp_enabled = true;
+
return ret;
}
-static int cdpl3_enable(void)
+static void cdp_disable(int level)
{
- return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA,
- RDT_RESOURCE_L3CODE);
-}
+ struct rdt_hw_resource *r_hw = &rdt_resources_all[level];
-static int cdpl2_enable(void)
-{
- return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA,
- RDT_RESOURCE_L2CODE);
+ if (r_hw->cdp_enabled) {
+ set_cache_qos_cfg(level, false);
+ r_hw->cdp_enabled = false;
+ }
}
-static void cdp_disable(int level, int data_type, int code_type)
+int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable)
{
- struct rdt_resource *r = &rdt_resources_all[level];
+ struct rdt_hw_resource *hw_res = &rdt_resources_all[l];
- r->alloc_enabled = r->alloc_capable;
+ if (!hw_res->r_resctrl.cdp_capable)
+ return -EINVAL;
- if (rdt_resources_all[data_type].alloc_enabled) {
- rdt_resources_all[data_type].alloc_enabled = false;
- rdt_resources_all[code_type].alloc_enabled = false;
- set_cache_qos_cfg(level, false);
- }
-}
+ if (enable)
+ return cdp_enable(l);
-static void cdpl3_disable(void)
-{
- cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE);
-}
+ cdp_disable(l);
-static void cdpl2_disable(void)
-{
- cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE);
+ return 0;
}
static void cdp_disable_all(void)
{
- if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
- cdpl3_disable();
- if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
- cdpl2_disable();
+ if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
+ resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
+ if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
+ resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
}
/*
@@ -2084,10 +2037,10 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx)
int ret = 0;
if (ctx->enable_cdpl2)
- ret = cdpl2_enable();
+ ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true);
if (!ret && ctx->enable_cdpl3)
- ret = cdpl3_enable();
+ ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true);
if (!ret && ctx->enable_mba_mbps)
ret = set_mba_sc(true);
@@ -2095,6 +2048,92 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx)
return ret;
}
+static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type)
+{
+ struct resctrl_schema *s;
+ const char *suffix = "";
+ int ret, cl;
+
+ s = kzalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ s->res = r;
+ s->num_closid = resctrl_arch_get_num_closid(r);
+ if (resctrl_arch_get_cdp_enabled(r->rid))
+ s->num_closid /= 2;
+
+ s->conf_type = type;
+ switch (type) {
+ case CDP_CODE:
+ suffix = "CODE";
+ break;
+ case CDP_DATA:
+ suffix = "DATA";
+ break;
+ case CDP_NONE:
+ suffix = "";
+ break;
+ }
+
+ ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix);
+ if (ret >= sizeof(s->name)) {
+ kfree(s);
+ return -EINVAL;
+ }
+
+ cl = strlen(s->name);
+
+ /*
+ * If CDP is supported by this resource, but not enabled,
+ * include the suffix. This ensures the tabular format of the
+ * schemata file does not change between mounts of the filesystem.
+ */
+ if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid))
+ cl += 4;
+
+ if (cl > max_name_width)
+ max_name_width = cl;
+
+ INIT_LIST_HEAD(&s->list);
+ list_add(&s->list, &resctrl_schema_all);
+
+ return 0;
+}
+
+static int schemata_list_create(void)
+{
+ struct rdt_resource *r;
+ int ret = 0;
+
+ for_each_alloc_enabled_rdt_resource(r) {
+ if (resctrl_arch_get_cdp_enabled(r->rid)) {
+ ret = schemata_list_add(r, CDP_CODE);
+ if (ret)
+ break;
+
+ ret = schemata_list_add(r, CDP_DATA);
+ } else {
+ ret = schemata_list_add(r, CDP_NONE);
+ }
+
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static void schemata_list_destroy(void)
+{
+ struct resctrl_schema *s, *tmp;
+
+ list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) {
+ list_del(&s->list);
+ kfree(s);
+ }
+}
+
static int rdt_get_tree(struct fs_context *fc)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
@@ -2116,11 +2155,17 @@ static int rdt_get_tree(struct fs_context *fc)
if (ret < 0)
goto out_cdp;
+ ret = schemata_list_create();
+ if (ret) {
+ schemata_list_destroy();
+ goto out_mba;
+ }
+
closid_init();
ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
if (ret < 0)
- goto out_mba;
+ goto out_schemata_free;
if (rdt_mon_capable) {
ret = mongroup_create_dir(rdtgroup_default.kn,
@@ -2153,7 +2198,7 @@ static int rdt_get_tree(struct fs_context *fc)
static_branch_enable_cpuslocked(&rdt_enable_key);
if (is_mbm_enabled()) {
- r = &rdt_resources_all[RDT_RESOURCE_L3];
+ r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
list_for_each_entry(dom, &r->domains, list)
mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
}
@@ -2170,6 +2215,8 @@ out_mongrp:
kernfs_remove(kn_mongrp);
out_info:
kernfs_remove(kn_info);
+out_schemata_free:
+ schemata_list_destroy();
out_mba:
if (ctx->enable_mba_mbps)
set_mba_sc(false);
@@ -2257,6 +2304,8 @@ static int rdt_init_fs_context(struct fs_context *fc)
static int reset_all_ctrls(struct rdt_resource *r)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+ struct rdt_hw_domain *hw_dom;
struct msr_param msr_param;
cpumask_var_t cpu_mask;
struct rdt_domain *d;
@@ -2267,7 +2316,7 @@ static int reset_all_ctrls(struct rdt_resource *r)
msr_param.res = r;
msr_param.low = 0;
- msr_param.high = r->num_closid;
+ msr_param.high = hw_res->num_closid;
/*
* Disable resource control for this resource by setting all
@@ -2275,10 +2324,11 @@ static int reset_all_ctrls(struct rdt_resource *r)
* from each domain to update the MSRs below.
*/
list_for_each_entry(d, &r->domains, list) {
+ hw_dom = resctrl_to_arch_dom(d);
cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
- for (i = 0; i < r->num_closid; i++)
- d->ctrl_val[i] = r->default_ctrl;
+ for (i = 0; i < hw_res->num_closid; i++)
+ hw_dom->ctrl_val[i] = r->default_ctrl;
}
cpu = get_cpu();
/* Update CBM on this cpu if it's in cpu_mask. */
@@ -2408,6 +2458,7 @@ static void rdt_kill_sb(struct super_block *sb)
rmdir_all_sub();
rdt_pseudo_lock_release();
rdtgroup_default.mode = RDT_MODE_SHAREABLE;
+ schemata_list_destroy();
static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
static_branch_disable_cpuslocked(&rdt_mon_enable_key);
static_branch_disable_cpuslocked(&rdt_enable_key);
@@ -2642,23 +2693,24 @@ static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
* Set the RDT domain up to start off with all usable allocations. That is,
* all shareable and unused bits. All-zero CBM is invalid.
*/
-static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
+static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
u32 closid)
{
- struct rdt_resource *r_cdp = NULL;
- struct rdt_domain *d_cdp = NULL;
+ enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
+ enum resctrl_conf_type t = s->conf_type;
+ struct resctrl_staged_config *cfg;
+ struct rdt_resource *r = s->res;
u32 used_b = 0, unused_b = 0;
unsigned long tmp_cbm;
enum rdtgrp_mode mode;
- u32 peer_ctl, *ctrl;
+ u32 peer_ctl, ctrl_val;
int i;
- rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
- d->have_new_ctrl = false;
- d->new_ctrl = r->cache.shareable_bits;
+ cfg = &d->staged_config[t];
+ cfg->have_new_ctrl = false;
+ cfg->new_ctrl = r->cache.shareable_bits;
used_b = r->cache.shareable_bits;
- ctrl = d->ctrl_val;
- for (i = 0; i < closids_supported(); i++, ctrl++) {
+ for (i = 0; i < closids_supported(); i++) {
if (closid_allocated(i) && i != closid) {
mode = rdtgroup_mode_by_closid(i);
if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
@@ -2673,35 +2725,38 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
* usage to ensure there is no overlap
* with an exclusive group.
*/
- if (d_cdp)
- peer_ctl = d_cdp->ctrl_val[i];
+ if (resctrl_arch_get_cdp_enabled(r->rid))
+ peer_ctl = resctrl_arch_get_config(r, d, i,
+ peer_type);
else
peer_ctl = 0;
- used_b |= *ctrl | peer_ctl;
+ ctrl_val = resctrl_arch_get_config(r, d, i,
+ s->conf_type);
+ used_b |= ctrl_val | peer_ctl;
if (mode == RDT_MODE_SHAREABLE)
- d->new_ctrl |= *ctrl | peer_ctl;
+ cfg->new_ctrl |= ctrl_val | peer_ctl;
}
}
if (d->plr && d->plr->cbm > 0)
used_b |= d->plr->cbm;
unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
- d->new_ctrl |= unused_b;
+ cfg->new_ctrl |= unused_b;
/*
* Force the initial CBM to be valid, user can
* modify the CBM based on system availability.
*/
- d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r);
+ cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r);
/*
* Assign the u32 CBM to an unsigned long to ensure that
* bitmap_weight() does not access out-of-bound memory.
*/
- tmp_cbm = d->new_ctrl;
+ tmp_cbm = cfg->new_ctrl;
if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
- rdt_last_cmd_printf("No space on %s:%d\n", r->name, d->id);
+ rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id);
return -ENOSPC;
}
- d->have_new_ctrl = true;
+ cfg->have_new_ctrl = true;
return 0;
}
@@ -2716,13 +2771,13 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
* If there are no more shareable bits available on any domain then
* the entire allocation will fail.
*/
-static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid)
+static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
{
struct rdt_domain *d;
int ret;
- list_for_each_entry(d, &r->domains, list) {
- ret = __init_one_rdt_domain(d, r, closid);
+ list_for_each_entry(d, &s->res->domains, list) {
+ ret = __init_one_rdt_domain(d, s, closid);
if (ret < 0)
return ret;
}
@@ -2733,30 +2788,34 @@ static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid)
/* Initialize MBA resource with default values. */
static void rdtgroup_init_mba(struct rdt_resource *r)
{
+ struct resctrl_staged_config *cfg;
struct rdt_domain *d;
list_for_each_entry(d, &r->domains, list) {
- d->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl;
- d->have_new_ctrl = true;
+ cfg = &d->staged_config[CDP_NONE];
+ cfg->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl;
+ cfg->have_new_ctrl = true;
}
}
/* Initialize the RDT group's allocations. */
static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
{
+ struct resctrl_schema *s;
struct rdt_resource *r;
int ret;
- for_each_alloc_enabled_rdt_resource(r) {
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ r = s->res;
if (r->rid == RDT_RESOURCE_MBA) {
rdtgroup_init_mba(r);
} else {
- ret = rdtgroup_init_cat(r, rdtgrp->closid);
+ ret = rdtgroup_init_cat(s, rdtgrp->closid);
if (ret < 0)
return ret;
}
- ret = update_domains(r, rdtgrp->closid);
+ ret = resctrl_arch_update_domains(r, rdtgrp->closid);
if (ret < 0) {
rdt_last_cmd_puts("Failed to initialize allocations\n");
return ret;
@@ -3124,13 +3183,13 @@ out:
static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
{
- if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
+ if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
seq_puts(seq, ",cdp");
- if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
+ if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
seq_puts(seq, ",cdpl2");
- if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA]))
+ if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
seq_puts(seq, ",mba_MBps");
return 0;
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 08651a4e6aa0..42fc41dd0e1f 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -508,7 +508,7 @@ static struct irq_chip hpet_msi_controller __ro_after_init = {
.irq_set_affinity = msi_domain_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_write_msi_msg = hpet_msi_write_msg,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_AFFINITY_PRE_STARTUP,
};
static int hpet_msi_init(struct irq_domain *domain,
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 282b4ee1339f..15aefa3f3e18 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -235,15 +235,15 @@ static char irq_trigger[2];
*/
static void restore_ELCR(char *trigger)
{
- outb(trigger[0], 0x4d0);
- outb(trigger[1], 0x4d1);
+ outb(trigger[0], PIC_ELCR1);
+ outb(trigger[1], PIC_ELCR2);
}
static void save_ELCR(char *trigger)
{
/* IRQ 0,1,2,8,13 are marked as reserved */
- trigger[0] = inb(0x4d0) & 0xF8;
- trigger[1] = inb(0x4d1) & 0xDE;
+ trigger[0] = inb(PIC_ELCR1) & 0xF8;
+ trigger[1] = inb(PIC_ELCR2) & 0xDE;
}
static void i8259A_resume(void)
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 8f06449aab27..fed721f90116 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -19,6 +19,7 @@
#include <linux/smp.h>
#include <linux/pci.h>
+#include <asm/i8259.h>
#include <asm/io_apic.h>
#include <asm/acpi.h>
#include <asm/irqdomain.h>
@@ -251,7 +252,7 @@ static int __init ELCR_trigger(unsigned int irq)
{
unsigned int port;
- port = 0x4d0 + (irq >> 3);
+ port = PIC_ELCR1 + (irq >> 3);
return (inb(port) >> (irq & 7)) & 1;
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index ebfb91108232..0a40df66a40d 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -388,10 +388,11 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = {
},
{ /* Handle problems with rebooting on the OptiPlex 990. */
.callback = set_pci_reboot,
- .ident = "Dell OptiPlex 990",
+ .ident = "Dell OptiPlex 990 BIOS A0x",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"),
+ DMI_MATCH(DMI_BIOS_VERSION, "A0"),
},
},
{ /* Handle problems with rebooting on Dell 300's */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index bff3a784aec5..63b20536c8d2 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -572,16 +572,6 @@ void __init reserve_standard_io_resources(void)
}
-static __init void reserve_ibft_region(void)
-{
- unsigned long addr, size = 0;
-
- addr = find_ibft_region(&size);
-
- if (size)
- memblock_reserve(addr, size);
-}
-
static bool __init snb_gfx_workaround_needed(void)
{
#ifdef CONFIG_PCI
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9320285a5e29..85f6e242b6b4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -610,6 +610,9 @@ void set_cpu_sibling_map(int cpu)
if (threads > __max_smt_threads)
__max_smt_threads = threads;
+ for_each_cpu(i, topology_sibling_cpumask(cpu))
+ cpu_data(i).smt_active = threads > 1;
+
/*
* This needs a separate iteration over the cpus because we rely on all
* topology_sibling_cpumask links to be set-up.
@@ -1552,8 +1555,13 @@ static void remove_siblinginfo(int cpu)
for_each_cpu(sibling, topology_die_cpumask(cpu))
cpumask_clear_cpu(cpu, topology_die_cpumask(sibling));
- for_each_cpu(sibling, topology_sibling_cpumask(cpu))
+
+ for_each_cpu(sibling, topology_sibling_cpumask(cpu)) {
cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
+ if (cpumask_weight(topology_sibling_cpumask(sibling)) == 1)
+ cpu_data(sibling).smt_active = false;
+ }
+
for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
cpumask_clear(cpu_llc_shared_mask(cpu));
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 739be5da3bca..fe03bd978761 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -208,30 +208,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_mmu_after_set_cpuid(vcpu);
}
-static int is_efer_nx(void)
-{
- return host_efer & EFER_NX;
-}
-
-static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
-{
- int i;
- struct kvm_cpuid_entry2 *e, *entry;
-
- entry = NULL;
- for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
- e = &vcpu->arch.cpuid_entries[i];
- if (e->function == 0x80000001) {
- entry = e;
- break;
- }
- }
- if (entry && cpuid_entry_has(entry, X86_FEATURE_NX) && !is_efer_nx()) {
- cpuid_entry_clear(entry, X86_FEATURE_NX);
- printk(KERN_INFO "kvm: guest NX capability removed\n");
- }
-}
-
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -302,7 +278,6 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
vcpu->arch.cpuid_entries = e2;
vcpu->arch.cpuid_nent = cpuid->nent;
- cpuid_fix_nx_cap(vcpu);
kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
@@ -401,7 +376,6 @@ static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask)
void kvm_set_cpu_caps(void)
{
- unsigned int f_nx = is_efer_nx() ? F(NX) : 0;
#ifdef CONFIG_X86_64
unsigned int f_gbpages = F(GBPAGES);
unsigned int f_lm = F(LM);
@@ -515,7 +489,7 @@ void kvm_set_cpu_caps(void)
F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
F(PAT) | F(PSE36) | 0 /* Reserved */ |
- f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
+ F(NX) | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW)
);
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 0b38f944c6b6..41d2a53c5dea 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1933,7 +1933,7 @@ ret_success:
void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *entry;
- struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+ struct kvm_vcpu_hv *hv_vcpu;
entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE, 0);
if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) {
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 629a09ca9860..0b80263d46d8 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -541,17 +541,17 @@ static int picdev_slave_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
addr, len, val);
}
-static int picdev_eclr_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+static int picdev_elcr_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
gpa_t addr, int len, const void *val)
{
- return picdev_write(container_of(dev, struct kvm_pic, dev_eclr),
+ return picdev_write(container_of(dev, struct kvm_pic, dev_elcr),
addr, len, val);
}
-static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+static int picdev_elcr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
gpa_t addr, int len, void *val)
{
- return picdev_read(container_of(dev, struct kvm_pic, dev_eclr),
+ return picdev_read(container_of(dev, struct kvm_pic, dev_elcr),
addr, len, val);
}
@@ -577,9 +577,9 @@ static const struct kvm_io_device_ops picdev_slave_ops = {
.write = picdev_slave_write,
};
-static const struct kvm_io_device_ops picdev_eclr_ops = {
- .read = picdev_eclr_read,
- .write = picdev_eclr_write,
+static const struct kvm_io_device_ops picdev_elcr_ops = {
+ .read = picdev_elcr_read,
+ .write = picdev_elcr_write,
};
int kvm_pic_init(struct kvm *kvm)
@@ -602,7 +602,7 @@ int kvm_pic_init(struct kvm *kvm)
*/
kvm_iodevice_init(&s->dev_master, &picdev_master_ops);
kvm_iodevice_init(&s->dev_slave, &picdev_slave_ops);
- kvm_iodevice_init(&s->dev_eclr, &picdev_eclr_ops);
+ kvm_iodevice_init(&s->dev_elcr, &picdev_elcr_ops);
mutex_lock(&kvm->slots_lock);
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2,
&s->dev_master);
@@ -613,7 +613,7 @@ int kvm_pic_init(struct kvm *kvm)
if (ret < 0)
goto fail_unreg_2;
- ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2, &s->dev_eclr);
+ ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2, &s->dev_elcr);
if (ret < 0)
goto fail_unreg_1;
@@ -647,7 +647,7 @@ void kvm_pic_destroy(struct kvm *kvm)
mutex_lock(&kvm->slots_lock);
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master);
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave);
- kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr);
+ kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_elcr);
mutex_unlock(&kvm->slots_lock);
kvm->arch.vpic = NULL;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 9b64abf9b3f1..650642b18d15 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -55,7 +55,7 @@ struct kvm_pic {
int output; /* intr from master PIC */
struct kvm_io_device dev_master;
struct kvm_io_device dev_slave;
- struct kvm_io_device dev_eclr;
+ struct kvm_io_device dev_elcr;
void (*ack_notifier)(void *opaque, int irq);
unsigned long irq_states[PIC_NUM_PINS];
};
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c4f4fa23320e..47b765270239 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2535,6 +2535,7 @@ static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync)
{
struct kvm_mmu_page *sp;
+ bool locked = false;
/*
* Force write-protection if the page is being tracked. Note, the page
@@ -2557,9 +2558,34 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync)
if (sp->unsync)
continue;
+ /*
+ * TDP MMU page faults require an additional spinlock as they
+ * run with mmu_lock held for read, not write, and the unsync
+ * logic is not thread safe. Take the spinklock regardless of
+ * the MMU type to avoid extra conditionals/parameters, there's
+ * no meaningful penalty if mmu_lock is held for write.
+ */
+ if (!locked) {
+ locked = true;
+ spin_lock(&vcpu->kvm->arch.mmu_unsync_pages_lock);
+
+ /*
+ * Recheck after taking the spinlock, a different vCPU
+ * may have since marked the page unsync. A false
+ * positive on the unprotected check above is not
+ * possible as clearing sp->unsync _must_ hold mmu_lock
+ * for write, i.e. unsync cannot transition from 0->1
+ * while this CPU holds mmu_lock for read (or write).
+ */
+ if (READ_ONCE(sp->unsync))
+ continue;
+ }
+
WARN_ON(sp->role.level != PG_LEVEL_4K);
kvm_unsync_page(vcpu, sp);
}
+ if (locked)
+ spin_unlock(&vcpu->kvm->arch.mmu_unsync_pages_lock);
/*
* We need to ensure that the marking of unsync pages is visible
@@ -5537,6 +5563,8 @@ void kvm_mmu_init_vm(struct kvm *kvm)
{
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
+ spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
+
if (!kvm_mmu_init_tdp_mmu(kvm))
/*
* No smp_load/store wrappers needed here as we are in
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 0853370bd811..d80cb122b5f3 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -43,6 +43,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
if (!kvm->arch.tdp_mmu_enabled)
return;
+ WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages));
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
/*
@@ -81,8 +82,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
bool shared)
{
- gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
-
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
@@ -94,7 +93,7 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
list_del_rcu(&root->link);
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
- zap_gfn_range(kvm, root, 0, max_gfn, false, false, shared);
+ zap_gfn_range(kvm, root, 0, -1ull, false, false, shared);
call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback);
}
@@ -724,13 +723,29 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t start, gfn_t end, bool can_yield, bool flush,
bool shared)
{
+ gfn_t max_gfn_host = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
+ bool zap_all = (start == 0 && end >= max_gfn_host);
struct tdp_iter iter;
+ /*
+ * No need to try to step down in the iterator when zapping all SPTEs,
+ * zapping the top-level non-leaf SPTEs will recurse on their children.
+ */
+ int min_level = zap_all ? root->role.level : PG_LEVEL_4K;
+
+ /*
+ * Bound the walk at host.MAXPHYADDR, guest accesses beyond that will
+ * hit a #PF(RSVD) and never get to an EPT Violation/Misconfig / #NPF,
+ * and so KVM will never install a SPTE for such addresses.
+ */
+ end = min(end, max_gfn_host);
+
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
rcu_read_lock();
- tdp_root_for_each_pte(iter, root, start, end) {
+ for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
+ min_level, start, end) {
retry:
if (can_yield &&
tdp_mmu_iter_cond_resched(kvm, &iter, flush, shared)) {
@@ -744,9 +759,10 @@ retry:
/*
* If this is a non-last-level SPTE that covers a larger range
* than should be zapped, continue, and zap the mappings at a
- * lower level.
+ * lower level, except when zapping all SPTEs.
*/
- if ((iter.gfn < start ||
+ if (!zap_all &&
+ (iter.gfn < start ||
iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) &&
!is_last_spte(iter.old_spte, iter.level))
continue;
@@ -794,12 +810,11 @@ bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
void kvm_tdp_mmu_zap_all(struct kvm *kvm)
{
- gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
bool flush = false;
int i;
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
- flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, max_gfn,
+ flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull,
flush, false);
if (flush)
@@ -838,7 +853,6 @@ static struct kvm_mmu_page *next_invalidated_root(struct kvm *kvm,
*/
void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
{
- gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
struct kvm_mmu_page *next_root;
struct kvm_mmu_page *root;
bool flush = false;
@@ -854,8 +868,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
rcu_read_unlock();
- flush = zap_gfn_range(kvm, root, 0, max_gfn, true, flush,
- true);
+ flush = zap_gfn_range(kvm, root, 0, -1ull, true, flush, true);
/*
* Put the reference acquired in
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 61738ff8ef33..e5515477c30a 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -158,6 +158,9 @@ void recalc_intercepts(struct vcpu_svm *svm)
/* If SMI is not intercepted, ignore guest SMI intercept as well */
if (!intercept_smi)
vmcb_clr_intercept(c, INTERCEPT_SMI);
+
+ vmcb_set_intercept(c, INTERCEPT_VMLOAD);
+ vmcb_set_intercept(c, INTERCEPT_VMSAVE);
}
static void copy_vmcb_control_area(struct vmcb_control_area *dst,
@@ -503,7 +506,11 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
{
- const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
+ const u32 int_ctl_vmcb01_bits =
+ V_INTR_MASKING_MASK | V_GIF_MASK | V_GIF_ENABLE_MASK;
+
+ const u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
+
struct kvm_vcpu *vcpu = &svm->vcpu;
/*
@@ -535,8 +542,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
vcpu->arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
svm->vmcb->control.int_ctl =
- (svm->nested.ctl.int_ctl & ~mask) |
- (svm->vmcb01.ptr->control.int_ctl & mask);
+ (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
+ (svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits);
svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext;
svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e8ccab50ebf6..69639f9624f5 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1589,17 +1589,18 @@ static void svm_set_vintr(struct vcpu_svm *svm)
static void svm_clear_vintr(struct vcpu_svm *svm)
{
- const u32 mask = V_TPR_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK | V_INTR_MASKING_MASK;
svm_clr_intercept(svm, INTERCEPT_VINTR);
/* Drop int_ctl fields related to VINTR injection. */
- svm->vmcb->control.int_ctl &= mask;
+ svm->vmcb->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK;
if (is_guest_mode(&svm->vcpu)) {
- svm->vmcb01.ptr->control.int_ctl &= mask;
+ svm->vmcb01.ptr->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK;
WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) !=
(svm->nested.ctl.int_ctl & V_TPR_MASK));
- svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask;
+
+ svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl &
+ V_IRQ_INJECTION_BITS_MASK;
}
vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 1a52134b0c42..b3f77d18eb5a 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -330,6 +330,31 @@ void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu)
vcpu_put(vcpu);
}
+#define EPTP_PA_MASK GENMASK_ULL(51, 12)
+
+static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
+{
+ return VALID_PAGE(root_hpa) &&
+ ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
+}
+
+static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp,
+ gpa_t addr)
+{
+ uint i;
+ struct kvm_mmu_root_info *cached_root;
+
+ WARN_ON_ONCE(!mmu_is_nested(vcpu));
+
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+ cached_root = &vcpu->arch.mmu->prev_roots[i];
+
+ if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd,
+ eptp))
+ vcpu->arch.mmu->invlpg(vcpu, addr, cached_root->hpa);
+ }
+}
+
static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
struct x86_exception *fault)
{
@@ -342,10 +367,22 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
vm_exit_reason = EXIT_REASON_PML_FULL;
vmx->nested.pml_full = false;
exit_qualification &= INTR_INFO_UNBLOCK_NMI;
- } else if (fault->error_code & PFERR_RSVD_MASK)
- vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
- else
- vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
+ } else {
+ if (fault->error_code & PFERR_RSVD_MASK)
+ vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
+ else
+ vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
+
+ /*
+ * Although the caller (kvm_inject_emulated_page_fault) would
+ * have already synced the faulting address in the shadow EPT
+ * tables for the current EPTP12, we also need to sync it for
+ * any other cached EPTP02s based on the same EP4TA, since the
+ * TLB associates mappings to the EP4TA rather than the full EPTP.
+ */
+ nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer,
+ fault->address);
+ }
nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
vmcs12->guest_physical_address = fault->address;
@@ -5325,14 +5362,6 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
return nested_vmx_succeed(vcpu);
}
-#define EPTP_PA_MASK GENMASK_ULL(51, 12)
-
-static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
-{
- return VALID_PAGE(root_hpa) &&
- ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
-}
-
/* Emulate the INVEPT instruction */
static int handle_invept(struct kvm_vcpu *vcpu)
{
@@ -5826,7 +5855,8 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
if (is_nmi(intr_info))
return true;
else if (is_page_fault(intr_info))
- return vcpu->arch.apf.host_apf_flags || !enable_ept;
+ return vcpu->arch.apf.host_apf_flags ||
+ vmx_need_pf_intercept(vcpu);
else if (is_debug(intr_info) &&
vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index db88ed4f2121..17a1cb4b059d 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -522,7 +522,7 @@ static inline struct vmcs *alloc_vmcs(bool shadow)
static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
{
- return vmx->secondary_exec_control &
+ return secondary_exec_controls_get(vmx) &
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
}
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index bad4dee4f0e4..c6506c6a7092 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
lib-y := delay.o misc.o cmdline.o cpu.o
lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
+lib-y += pc-conf-reg.o
lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
diff --git a/arch/x86/lib/pc-conf-reg.c b/arch/x86/lib/pc-conf-reg.c
new file mode 100644
index 000000000000..febb52749e8d
--- /dev/null
+++ b/arch/x86/lib/pc-conf-reg.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for the configuration register space at port I/O locations
+ * 0x22 and 0x23 variously used by PC architectures, e.g. the MP Spec,
+ * Cyrix CPUs, numerous chipsets. As the space is indirectly addressed
+ * it may have to be protected with a spinlock, depending on the context.
+ */
+
+#include <linux/spinlock.h>
+
+#include <asm/pc-conf-reg.h>
+
+DEFINE_RAW_SPINLOCK(pc_conf_lock);
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index cd768dafca9e..933a2ebad471 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -376,12 +376,12 @@ static void enter_uniprocessor(void)
goto out;
}
- get_online_cpus();
+ cpus_read_lock();
cpumask_copy(downed_cpus, cpu_online_mask);
cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus);
if (num_online_cpus() > 1)
pr_notice("Disabling non-boot CPUs...\n");
- put_online_cpus();
+ cpus_read_unlock();
for_each_cpu(cpu, downed_cpus) {
err = remove_cpu(cpu);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index cfe6b1e85fa6..59ba2968af1b 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -8,11 +8,13 @@
#include <linux/export.h>
#include <linux/cpu.h>
#include <linux/debugfs.h>
+#include <linux/sched/smt.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/nospec-branch.h>
#include <asm/cache.h>
+#include <asm/cacheflush.h>
#include <asm/apic.h>
#include <asm/perf_event.h>
@@ -43,10 +45,15 @@
*/
/*
- * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
- * stored in cpu_tlb_state.last_user_mm_ibpb.
+ * Bits to mangle the TIF_SPEC_* state into the mm pointer which is
+ * stored in cpu_tlb_state.last_user_mm_spec.
*/
#define LAST_USER_MM_IBPB 0x1UL
+#define LAST_USER_MM_L1D_FLUSH 0x2UL
+#define LAST_USER_MM_SPEC_MASK (LAST_USER_MM_IBPB | LAST_USER_MM_L1D_FLUSH)
+
+/* Bits to set when tlbstate and flush is (re)initialized */
+#define LAST_USER_MM_INIT LAST_USER_MM_IBPB
/*
* The x86 feature is called PCID (Process Context IDentifier). It is similar
@@ -317,20 +324,70 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
local_irq_restore(flags);
}
-static unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
+/*
+ * Invoked from return to user/guest by a task that opted-in to L1D
+ * flushing but ended up running on an SMT enabled core due to wrong
+ * affinity settings or CPU hotplug. This is part of the paranoid L1D flush
+ * contract which this task requested.
+ */
+static void l1d_flush_force_sigbus(struct callback_head *ch)
+{
+ force_sig(SIGBUS);
+}
+
+static void l1d_flush_evaluate(unsigned long prev_mm, unsigned long next_mm,
+ struct task_struct *next)
+{
+ /* Flush L1D if the outgoing task requests it */
+ if (prev_mm & LAST_USER_MM_L1D_FLUSH)
+ wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+
+ /* Check whether the incoming task opted in for L1D flush */
+ if (likely(!(next_mm & LAST_USER_MM_L1D_FLUSH)))
+ return;
+
+ /*
+ * Validate that it is not running on an SMT sibling as this would
+ * make the excercise pointless because the siblings share L1D. If
+ * it runs on a SMT sibling, notify it with SIGBUS on return to
+ * user/guest
+ */
+ if (this_cpu_read(cpu_info.smt_active)) {
+ clear_ti_thread_flag(&next->thread_info, TIF_SPEC_L1D_FLUSH);
+ next->l1d_flush_kill.func = l1d_flush_force_sigbus;
+ task_work_add(next, &next->l1d_flush_kill, TWA_RESUME);
+ }
+}
+
+static unsigned long mm_mangle_tif_spec_bits(struct task_struct *next)
{
unsigned long next_tif = task_thread_info(next)->flags;
- unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
+ unsigned long spec_bits = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_SPEC_MASK;
- return (unsigned long)next->mm | ibpb;
+ /*
+ * Ensure that the bit shift above works as expected and the two flags
+ * end up in bit 0 and 1.
+ */
+ BUILD_BUG_ON(TIF_SPEC_L1D_FLUSH != TIF_SPEC_IB + 1);
+
+ return (unsigned long)next->mm | spec_bits;
}
-static void cond_ibpb(struct task_struct *next)
+static void cond_mitigation(struct task_struct *next)
{
+ unsigned long prev_mm, next_mm;
+
if (!next || !next->mm)
return;
+ next_mm = mm_mangle_tif_spec_bits(next);
+ prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec);
+
/*
+ * Avoid user/user BTB poisoning by flushing the branch predictor
+ * when switching between processes. This stops one process from
+ * doing Spectre-v2 attacks on another.
+ *
* Both, the conditional and the always IBPB mode use the mm
* pointer to avoid the IBPB when switching between tasks of the
* same process. Using the mm pointer instead of mm->context.ctx_id
@@ -340,8 +397,6 @@ static void cond_ibpb(struct task_struct *next)
* exposed data is not really interesting.
*/
if (static_branch_likely(&switch_mm_cond_ibpb)) {
- unsigned long prev_mm, next_mm;
-
/*
* This is a bit more complex than the always mode because
* it has to handle two cases:
@@ -371,20 +426,14 @@ static void cond_ibpb(struct task_struct *next)
* Optimize this with reasonably small overhead for the
* above cases. Mangle the TIF_SPEC_IB bit into the mm
* pointer of the incoming task which is stored in
- * cpu_tlbstate.last_user_mm_ibpb for comparison.
- */
- next_mm = mm_mangle_tif_spec_ib(next);
- prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
-
- /*
+ * cpu_tlbstate.last_user_mm_spec for comparison.
+ *
* Issue IBPB only if the mm's are different and one or
* both have the IBPB bit set.
*/
if (next_mm != prev_mm &&
(next_mm | prev_mm) & LAST_USER_MM_IBPB)
indirect_branch_prediction_barrier();
-
- this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
}
if (static_branch_unlikely(&switch_mm_always_ibpb)) {
@@ -393,11 +442,22 @@ static void cond_ibpb(struct task_struct *next)
* different context than the user space task which ran
* last on this CPU.
*/
- if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
+ if ((prev_mm & ~LAST_USER_MM_SPEC_MASK) !=
+ (unsigned long)next->mm)
indirect_branch_prediction_barrier();
- this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
- }
}
+
+ if (static_branch_unlikely(&switch_mm_cond_l1d_flush)) {
+ /*
+ * Flush L1D when the outgoing task requested it and/or
+ * check whether the incoming task requested L1D flushing
+ * and ended up on an SMT sibling.
+ */
+ if (unlikely((prev_mm | next_mm) & LAST_USER_MM_L1D_FLUSH))
+ l1d_flush_evaluate(prev_mm, next_mm, next);
+ }
+
+ this_cpu_write(cpu_tlbstate.last_user_mm_spec, next_mm);
}
#ifdef CONFIG_PERF_EVENTS
@@ -531,11 +591,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
need_flush = true;
} else {
/*
- * Avoid user/user BTB poisoning by flushing the branch
- * predictor when switching between processes. This stops
- * one process from doing Spectre-v2 attacks on another.
+ * Apply process to process speculation vulnerability
+ * mitigations if applicable.
*/
- cond_ibpb(tsk);
+ cond_mitigation(tsk);
/*
* Stop remote flushes for the previous mm.
@@ -643,7 +702,7 @@ void initialize_tlbstate_and_flush(void)
write_cr3(build_cr3(mm->pgd, 0));
/* Reinitialize tlbstate. */
- this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
+ this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
this_cpu_write(cpu_tlbstate.next_asid, 1);
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 16d76f814e9b..0fe6aacef3db 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1961,6 +1961,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (flags & BPF_TRAMP_F_CALL_ORIG)
stack_size += 8; /* room for return value of orig_call */
+ if (flags & BPF_TRAMP_F_IP_ARG)
+ stack_size += 8; /* room for IP address argument */
+
if (flags & BPF_TRAMP_F_SKIP_FRAME)
/* skip patched call instruction and point orig_call to actual
* body of the kernel function.
@@ -1974,6 +1977,22 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
EMIT1(0x53); /* push rbx */
+ if (flags & BPF_TRAMP_F_IP_ARG) {
+ /* Store IP address of the traced function:
+ * mov rax, QWORD PTR [rbp + 8]
+ * sub rax, X86_PATCH_SIZE
+ * mov QWORD PTR [rbp - stack_size], rax
+ */
+ emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
+ EMIT4(0x48, 0x83, 0xe8, X86_PATCH_SIZE);
+ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -stack_size);
+
+ /* Continue with stack_size for regs storage, stack will
+ * be correctly restored with 'leave' instruction.
+ */
+ stack_size -= 8;
+ }
+
save_regs(m, &prog, nr_args, stack_size);
if (flags & BPF_TRAMP_F_CALL_ORIG) {
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index d3a73f9335e1..97b63e35e152 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -13,9 +13,13 @@
#include <linux/dmi.h>
#include <linux/io.h>
#include <linux/smp.h>
+#include <linux/spinlock.h>
#include <asm/io_apic.h>
#include <linux/irq.h>
#include <linux/acpi.h>
+
+#include <asm/i8259.h>
+#include <asm/pc-conf-reg.h>
#include <asm/pci_x86.h>
#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
@@ -47,6 +51,8 @@ struct irq_router {
int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq,
int new);
+ int (*lvl)(struct pci_dev *router, struct pci_dev *dev, int pirq,
+ int irq);
};
struct irq_router_handler {
@@ -153,7 +159,7 @@ static void __init pirq_peer_trick(void)
void elcr_set_level_irq(unsigned int irq)
{
unsigned char mask = 1 << (irq & 7);
- unsigned int port = 0x4d0 + (irq >> 3);
+ unsigned int port = PIC_ELCR1 + (irq >> 3);
unsigned char val;
static u16 elcr_irq_mask;
@@ -170,6 +176,139 @@ void elcr_set_level_irq(unsigned int irq)
}
/*
+ * PIRQ routing for the M1487 ISA Bus Controller (IBC) ASIC used
+ * with the ALi FinALi 486 chipset. The IBC is not decoded in the
+ * PCI configuration space, so we identify it by the accompanying
+ * M1489 Cache-Memory PCI Controller (CMP) ASIC.
+ *
+ * There are four 4-bit mappings provided, spread across two PCI
+ * INTx Routing Table Mapping Registers, available in the port I/O
+ * space accessible indirectly via the index/data register pair at
+ * 0x22/0x23, located at indices 0x42 and 0x43 for the INT1/INT2
+ * and INT3/INT4 lines respectively. The INT1/INT3 and INT2/INT4
+ * lines are mapped in the low and the high 4-bit nibble of the
+ * corresponding register as follows:
+ *
+ * 0000 : Disabled
+ * 0001 : IRQ9
+ * 0010 : IRQ3
+ * 0011 : IRQ10
+ * 0100 : IRQ4
+ * 0101 : IRQ5
+ * 0110 : IRQ7
+ * 0111 : IRQ6
+ * 1000 : Reserved
+ * 1001 : IRQ11
+ * 1010 : Reserved
+ * 1011 : IRQ12
+ * 1100 : Reserved
+ * 1101 : IRQ14
+ * 1110 : Reserved
+ * 1111 : IRQ15
+ *
+ * In addition to the usual ELCR register pair there is a separate
+ * PCI INTx Sensitivity Register at index 0x44 in the same port I/O
+ * space, whose bits 3:0 select the trigger mode for INT[4:1] lines
+ * respectively. Any bit set to 1 causes interrupts coming on the
+ * corresponding line to be passed to ISA as edge-triggered and
+ * otherwise they are passed as level-triggered. Manufacturer's
+ * documentation says this register has to be set consistently with
+ * the relevant ELCR register.
+ *
+ * Accesses to the port I/O space concerned here need to be unlocked
+ * by writing the value of 0xc5 to the Lock Register at index 0x03
+ * beforehand. Any other value written to said register prevents
+ * further accesses from reaching the register file, except for the
+ * Lock Register being written with 0xc5 again.
+ *
+ * References:
+ *
+ * "M1489/M1487: 486 PCI Chip Set", Version 1.2, Acer Laboratories
+ * Inc., July 1997
+ */
+
+#define PC_CONF_FINALI_LOCK 0x03u
+#define PC_CONF_FINALI_PCI_INTX_RT1 0x42u
+#define PC_CONF_FINALI_PCI_INTX_RT2 0x43u
+#define PC_CONF_FINALI_PCI_INTX_SENS 0x44u
+
+#define PC_CONF_FINALI_LOCK_KEY 0xc5u
+
+static u8 read_pc_conf_nybble(u8 base, u8 index)
+{
+ u8 reg = base + (index >> 1);
+ u8 x;
+
+ x = pc_conf_get(reg);
+ return index & 1 ? x >> 4 : x & 0xf;
+}
+
+static void write_pc_conf_nybble(u8 base, u8 index, u8 val)
+{
+ u8 reg = base + (index >> 1);
+ u8 x;
+
+ x = pc_conf_get(reg);
+ x = index & 1 ? (x & 0x0f) | (val << 4) : (x & 0xf0) | val;
+ pc_conf_set(reg, x);
+}
+
+static int pirq_finali_get(struct pci_dev *router, struct pci_dev *dev,
+ int pirq)
+{
+ static const u8 irqmap[16] = {
+ 0, 9, 3, 10, 4, 5, 7, 6, 0, 11, 0, 12, 0, 14, 0, 15
+ };
+ unsigned long flags;
+ u8 x;
+
+ raw_spin_lock_irqsave(&pc_conf_lock, flags);
+ pc_conf_set(PC_CONF_FINALI_LOCK, PC_CONF_FINALI_LOCK_KEY);
+ x = irqmap[read_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, pirq - 1)];
+ pc_conf_set(PC_CONF_FINALI_LOCK, 0);
+ raw_spin_unlock_irqrestore(&pc_conf_lock, flags);
+ return x;
+}
+
+static int pirq_finali_set(struct pci_dev *router, struct pci_dev *dev,
+ int pirq, int irq)
+{
+ static const u8 irqmap[16] = {
+ 0, 0, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15
+ };
+ u8 val = irqmap[irq];
+ unsigned long flags;
+
+ if (!val)
+ return 0;
+
+ raw_spin_lock_irqsave(&pc_conf_lock, flags);
+ pc_conf_set(PC_CONF_FINALI_LOCK, PC_CONF_FINALI_LOCK_KEY);
+ write_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, pirq - 1, val);
+ pc_conf_set(PC_CONF_FINALI_LOCK, 0);
+ raw_spin_unlock_irqrestore(&pc_conf_lock, flags);
+ return 1;
+}
+
+static int pirq_finali_lvl(struct pci_dev *router, struct pci_dev *dev,
+ int pirq, int irq)
+{
+ u8 mask = ~(1u << (pirq - 1));
+ unsigned long flags;
+ u8 trig;
+
+ elcr_set_level_irq(irq);
+ raw_spin_lock_irqsave(&pc_conf_lock, flags);
+ pc_conf_set(PC_CONF_FINALI_LOCK, PC_CONF_FINALI_LOCK_KEY);
+ trig = pc_conf_get(PC_CONF_FINALI_PCI_INTX_SENS);
+ trig &= mask;
+ pc_conf_set(PC_CONF_FINALI_PCI_INTX_SENS, trig);
+ pc_conf_set(PC_CONF_FINALI_LOCK, 0);
+ raw_spin_unlock_irqrestore(&pc_conf_lock, flags);
+ return 1;
+}
+
+/*
* Common IRQ routing practice: nibbles in config space,
* offset by some magic constant.
*/
@@ -220,6 +359,74 @@ static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i
}
/*
+ * PIRQ routing for the 82374EB/82374SB EISA System Component (ESC)
+ * ASIC used with the Intel 82420 and 82430 PCIsets. The ESC is not
+ * decoded in the PCI configuration space, so we identify it by the
+ * accompanying 82375EB/82375SB PCI-EISA Bridge (PCEB) ASIC.
+ *
+ * There are four PIRQ Route Control registers, available in the
+ * port I/O space accessible indirectly via the index/data register
+ * pair at 0x22/0x23, located at indices 0x60/0x61/0x62/0x63 for the
+ * PIRQ0/1/2/3# lines respectively. The semantics is the same as
+ * with the PIIX router.
+ *
+ * Accesses to the port I/O space concerned here need to be unlocked
+ * by writing the value of 0x0f to the ESC ID Register at index 0x02
+ * beforehand. Any other value written to said register prevents
+ * further accesses from reaching the register file, except for the
+ * ESC ID Register being written with 0x0f again.
+ *
+ * References:
+ *
+ * "82374EB/82374SB EISA System Component (ESC)", Intel Corporation,
+ * Order Number: 290476-004, March 1996
+ *
+ * "82375EB/82375SB PCI-EISA Bridge (PCEB)", Intel Corporation, Order
+ * Number: 290477-004, March 1996
+ */
+
+#define PC_CONF_I82374_ESC_ID 0x02u
+#define PC_CONF_I82374_PIRQ_ROUTE_CONTROL 0x60u
+
+#define PC_CONF_I82374_ESC_ID_KEY 0x0fu
+
+static int pirq_esc_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ unsigned long flags;
+ int reg;
+ u8 x;
+
+ reg = pirq;
+ if (reg >= 1 && reg <= 4)
+ reg += PC_CONF_I82374_PIRQ_ROUTE_CONTROL - 1;
+
+ raw_spin_lock_irqsave(&pc_conf_lock, flags);
+ pc_conf_set(PC_CONF_I82374_ESC_ID, PC_CONF_I82374_ESC_ID_KEY);
+ x = pc_conf_get(reg);
+ pc_conf_set(PC_CONF_I82374_ESC_ID, 0);
+ raw_spin_unlock_irqrestore(&pc_conf_lock, flags);
+ return (x < 16) ? x : 0;
+}
+
+static int pirq_esc_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
+ int irq)
+{
+ unsigned long flags;
+ int reg;
+
+ reg = pirq;
+ if (reg >= 1 && reg <= 4)
+ reg += PC_CONF_I82374_PIRQ_ROUTE_CONTROL - 1;
+
+ raw_spin_lock_irqsave(&pc_conf_lock, flags);
+ pc_conf_set(PC_CONF_I82374_ESC_ID, PC_CONF_I82374_ESC_ID_KEY);
+ pc_conf_set(reg, irq);
+ pc_conf_set(PC_CONF_I82374_ESC_ID, 0);
+ raw_spin_unlock_irqrestore(&pc_conf_lock, flags);
+ return 1;
+}
+
+/*
* The Intel PIIX4 pirq rules are fairly simple: "pirq" is
* just a pointer to the config space.
*/
@@ -238,6 +445,50 @@ static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
}
/*
+ * PIRQ routing for the 82426EX ISA Bridge (IB) ASIC used with the
+ * Intel 82420EX PCIset.
+ *
+ * There are only two PIRQ Route Control registers, available in the
+ * combined 82425EX/82426EX PCI configuration space, at 0x66 and 0x67
+ * for the PIRQ0# and PIRQ1# lines respectively. The semantics is
+ * the same as with the PIIX router.
+ *
+ * References:
+ *
+ * "82420EX PCIset Data Sheet, 82425EX PCI System Controller (PSC)
+ * and 82426EX ISA Bridge (IB)", Intel Corporation, Order Number:
+ * 290488-004, December 1995
+ */
+
+#define PCI_I82426EX_PIRQ_ROUTE_CONTROL 0x66u
+
+static int pirq_ib_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ int reg;
+ u8 x;
+
+ reg = pirq;
+ if (reg >= 1 && reg <= 2)
+ reg += PCI_I82426EX_PIRQ_ROUTE_CONTROL - 1;
+
+ pci_read_config_byte(router, reg, &x);
+ return (x < 16) ? x : 0;
+}
+
+static int pirq_ib_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
+ int irq)
+{
+ int reg;
+
+ reg = pirq;
+ if (reg >= 1 && reg <= 2)
+ reg += PCI_I82426EX_PIRQ_ROUTE_CONTROL - 1;
+
+ pci_write_config_byte(router, reg, irq);
+ return 1;
+}
+
+/*
* The VIA pirq rules are nibble-based, like ALI,
* but without the ugly irq number munging.
* However, PIRQD is in the upper instead of lower 4 bits.
@@ -549,6 +800,11 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
return 0;
switch (device) {
+ case PCI_DEVICE_ID_INTEL_82375:
+ r->name = "PCEB/ESC";
+ r->get = pirq_esc_get;
+ r->set = pirq_esc_set;
+ return 1;
case PCI_DEVICE_ID_INTEL_82371FB_0:
case PCI_DEVICE_ID_INTEL_82371SB_0:
case PCI_DEVICE_ID_INTEL_82371AB_0:
@@ -594,6 +850,11 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
r->get = pirq_piix_get;
r->set = pirq_piix_set;
return 1;
+ case PCI_DEVICE_ID_INTEL_82425:
+ r->name = "PSC/IB";
+ r->get = pirq_ib_get;
+ r->set = pirq_ib_set;
+ return 1;
}
if ((device >= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN &&
@@ -745,6 +1006,12 @@ static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router,
static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
{
switch (device) {
+ case PCI_DEVICE_ID_AL_M1489:
+ r->name = "FinALi";
+ r->get = pirq_finali_get;
+ r->set = pirq_finali_set;
+ r->lvl = pirq_finali_lvl;
+ return 1;
case PCI_DEVICE_ID_AL_M1533:
case PCI_DEVICE_ID_AL_M1563:
r->name = "ALI";
@@ -968,11 +1235,17 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
} else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \
((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) {
msg = "found";
- elcr_set_level_irq(irq);
+ if (r->lvl)
+ r->lvl(pirq_router_dev, dev, pirq, irq);
+ else
+ elcr_set_level_irq(irq);
} else if (newirq && r->set &&
(dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
if (r->set(pirq_router_dev, dev, pirq, newirq)) {
- elcr_set_level_irq(newirq);
+ if (r->lvl)
+ r->lvl(pirq_router_dev, dev, pirq, newirq);
+ else
+ elcr_set_level_irq(newirq);
msg = "assigned";
irq = newirq;
}
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 3a070e7cdb8b..6665f8802098 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -58,19 +58,20 @@ static void msr_restore_context(struct saved_context *ctxt)
}
/**
- * __save_processor_state - save CPU registers before creating a
- * hibernation image and before restoring the memory state from it
- * @ctxt - structure to store the registers contents in
+ * __save_processor_state() - Save CPU registers before creating a
+ * hibernation image and before restoring
+ * the memory state from it
+ * @ctxt: Structure to store the registers contents in.
*
- * NOTE: If there is a CPU register the modification of which by the
- * boot kernel (ie. the kernel used for loading the hibernation image)
- * might affect the operations of the restored target kernel (ie. the one
- * saved in the hibernation image), then its contents must be saved by this
- * function. In other words, if kernel A is hibernated and different
- * kernel B is used for loading the hibernation image into memory, the
- * kernel A's __save_processor_state() function must save all registers
- * needed by kernel A, so that it can operate correctly after the resume
- * regardless of what kernel B does in the meantime.
+ * NOTE: If there is a CPU register the modification of which by the
+ * boot kernel (ie. the kernel used for loading the hibernation image)
+ * might affect the operations of the restored target kernel (ie. the one
+ * saved in the hibernation image), then its contents must be saved by this
+ * function. In other words, if kernel A is hibernated and different
+ * kernel B is used for loading the hibernation image into memory, the
+ * kernel A's __save_processor_state() function must save all registers
+ * needed by kernel A, so that it can operate correctly after the resume
+ * regardless of what kernel B does in the meantime.
*/
static void __save_processor_state(struct saved_context *ctxt)
{
@@ -181,9 +182,9 @@ static void fix_processor_context(void)
}
/**
- * __restore_processor_state - restore the contents of CPU registers saved
- * by __save_processor_state()
- * @ctxt - structure to load the registers contents from
+ * __restore_processor_state() - Restore the contents of CPU registers saved
+ * by __save_processor_state()
+ * @ctxt: Structure to load the registers contents from.
*
* The asm code that gets us here will have restored a usable GDT, although
* it will be pointing to the wrong alias.
diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk
index fd1ab80be0de..a4cf678cf5c8 100644
--- a/arch/x86/tools/chkobjdump.awk
+++ b/arch/x86/tools/chkobjdump.awk
@@ -10,6 +10,7 @@ BEGIN {
/^GNU objdump/ {
verstr = ""
+ gsub(/\(.*\)/, "");
for (i = 3; i <= NF; i++)
if (match($(i), "^[0-9]")) {
verstr = $(i);
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 9ba700dc47de..27c82207d387 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -26,6 +26,9 @@ static struct relocs relocs32;
#if ELF_BITS == 64
static struct relocs relocs32neg;
static struct relocs relocs64;
+#define FMT PRIu64
+#else
+#define FMT PRIu32
#endif
struct section {
@@ -389,7 +392,7 @@ static void read_ehdr(FILE *fp)
Elf_Shdr shdr;
if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0)
- die("Seek to %d failed: %s\n", ehdr.e_shoff, strerror(errno));
+ die("Seek to %" FMT " failed: %s\n", ehdr.e_shoff, strerror(errno));
if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
die("Cannot read initial ELF section header: %s\n", strerror(errno));
@@ -412,17 +415,17 @@ static void read_shdrs(FILE *fp)
secs = calloc(shnum, sizeof(struct section));
if (!secs) {
- die("Unable to allocate %d section headers\n",
+ die("Unable to allocate %ld section headers\n",
shnum);
}
if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) {
- die("Seek to %d failed: %s\n",
- ehdr.e_shoff, strerror(errno));
+ die("Seek to %" FMT " failed: %s\n",
+ ehdr.e_shoff, strerror(errno));
}
for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
- die("Cannot read ELF section headers %d/%d: %s\n",
+ die("Cannot read ELF section headers %d/%ld: %s\n",
i, shnum, strerror(errno));
sec->shdr.sh_name = elf_word_to_cpu(shdr.sh_name);
sec->shdr.sh_type = elf_word_to_cpu(shdr.sh_type);
@@ -450,12 +453,12 @@ static void read_strtabs(FILE *fp)
}
sec->strtab = malloc(sec->shdr.sh_size);
if (!sec->strtab) {
- die("malloc of %d bytes for strtab failed\n",
- sec->shdr.sh_size);
+ die("malloc of %" FMT " bytes for strtab failed\n",
+ sec->shdr.sh_size);
}
if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
- die("Seek to %d failed: %s\n",
- sec->shdr.sh_offset, strerror(errno));
+ die("Seek to %" FMT " failed: %s\n",
+ sec->shdr.sh_offset, strerror(errno));
}
if (fread(sec->strtab, 1, sec->shdr.sh_size, fp)
!= sec->shdr.sh_size) {
@@ -475,12 +478,12 @@ static void read_symtabs(FILE *fp)
}
sec->symtab = malloc(sec->shdr.sh_size);
if (!sec->symtab) {
- die("malloc of %d bytes for symtab failed\n",
- sec->shdr.sh_size);
+ die("malloc of %" FMT " bytes for symtab failed\n",
+ sec->shdr.sh_size);
}
if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
- die("Seek to %d failed: %s\n",
- sec->shdr.sh_offset, strerror(errno));
+ die("Seek to %" FMT " failed: %s\n",
+ sec->shdr.sh_offset, strerror(errno));
}
if (fread(sec->symtab, 1, sec->shdr.sh_size, fp)
!= sec->shdr.sh_size) {
@@ -508,12 +511,12 @@ static void read_relocs(FILE *fp)
}
sec->reltab = malloc(sec->shdr.sh_size);
if (!sec->reltab) {
- die("malloc of %d bytes for relocs failed\n",
- sec->shdr.sh_size);
+ die("malloc of %" FMT " bytes for relocs failed\n",
+ sec->shdr.sh_size);
}
if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
- die("Seek to %d failed: %s\n",
- sec->shdr.sh_offset, strerror(errno));
+ die("Seek to %" FMT " failed: %s\n",
+ sec->shdr.sh_offset, strerror(errno));
}
if (fread(sec->reltab, 1, sec->shdr.sh_size, fp)
!= sec->shdr.sh_size) {
diff --git a/arch/x86/tools/relocs.h b/arch/x86/tools/relocs.h
index 43c83c0fd22c..4c49c82446eb 100644
--- a/arch/x86/tools/relocs.h
+++ b/arch/x86/tools/relocs.h
@@ -17,6 +17,7 @@
#include <regex.h>
#include <tools/le_byteshift.h>
+__attribute__((__format__(printf, 1, 2)))
void die(char *fmt, ...) __attribute__((noreturn));
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index a48bf2d10ac2..764b54bef701 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -33,8 +33,6 @@ DECLARE_PER_CPU(unsigned long, nmi_count);
asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
{
- int irq = irq_find_mapping(NULL, hwirq);
-
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/* Debugging check for stack overflow: is there less than 1KB free? */
{
@@ -48,7 +46,7 @@ asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
sp - sizeof(struct thread_info));
}
#endif
- generic_handle_irq(irq);
+ generic_handle_domain_irq(NULL, hwirq);
}
int arch_show_interrupts(struct seq_file *p, int prec)
diff --git a/block/Kconfig b/block/Kconfig
index fd732aede922..bac87d773c54 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -114,16 +114,6 @@ config BLK_DEV_THROTTLING_LOW
Note, this is an experimental interface and could be changed someday.
-config BLK_CMDLINE_PARSER
- bool "Block device command line partition parser"
- help
- Enabling this option allows you to specify the partition layout from
- the kernel boot args. This is typically of use for embedded devices
- which don't otherwise have any standardized method for listing the
- partitions on a block device.
-
- See Documentation/block/cmdline-partition.rst for more information.
-
config BLK_WBT
bool "Enable support for block device writeback throttling"
help
@@ -251,4 +241,8 @@ config BLK_MQ_RDMA
config BLK_PM
def_bool BLOCK && PM
+# do not use in new code
+config BLOCK_HOLDER_DEPRECATED
+ bool
+
source "block/Kconfig.iosched"
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 64053d67a97b..2f2158e05a91 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -9,12 +9,6 @@ config MQ_IOSCHED_DEADLINE
help
MQ version of the deadline IO scheduler.
-config MQ_IOSCHED_DEADLINE_CGROUP
- tristate
- default y
- depends on MQ_IOSCHED_DEADLINE
- depends on BLK_CGROUP
-
config MQ_IOSCHED_KYBER
tristate "Kyber I/O scheduler"
default y
diff --git a/block/Makefile b/block/Makefile
index bfbe4e13ca1e..1d0d466f2182 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -22,13 +22,10 @@ obj-$(CONFIG_BLK_CGROUP_IOPRIO) += blk-ioprio.o
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o
obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
-mq-deadline-y += mq-deadline-main.o
-mq-deadline-$(CONFIG_MQ_IOSCHED_DEADLINE_CGROUP)+= mq-deadline-cgroup.o
obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
-obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o
obj-$(CONFIG_BLK_DEV_INTEGRITY_T10) += t10-pi.o
obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
@@ -42,3 +39,4 @@ obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o
obj-$(CONFIG_BLK_PM) += blk-pm.o
obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += keyslot-manager.o blk-crypto.o
obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) += blk-crypto-fallback.o
+obj-$(CONFIG_BLOCK_HOLDER_DEPRECATED) += holder.o
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 727955918563..480e1a134859 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2361,6 +2361,9 @@ static int bfq_request_merge(struct request_queue *q, struct request **req,
__rq = bfq_find_rq_fmerge(bfqd, bio, q);
if (__rq && elv_bio_merge_ok(__rq, bio)) {
*req = __rq;
+
+ if (blk_discard_mergable(__rq))
+ return ELEVATOR_DISCARD_MERGE;
return ELEVATOR_FRONT_MERGE;
}
@@ -2505,7 +2508,7 @@ void bfq_end_wr_async_queues(struct bfq_data *bfqd,
int i, j;
for (i = 0; i < 2; i++)
- for (j = 0; j < IOPRIO_BE_NR; j++)
+ for (j = 0; j < IOPRIO_NR_LEVELS; j++)
if (bfqg->async_bfqq[i][j])
bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]);
if (bfqg->async_idle_bfqq)
@@ -5266,8 +5269,8 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
switch (ioprio_class) {
default:
pr_err("bdi %s: bfq: bad prio class %d\n",
- bdi_dev_name(bfqq->bfqd->queue->backing_dev_info),
- ioprio_class);
+ bdi_dev_name(bfqq->bfqd->queue->disk->bdi),
+ ioprio_class);
fallthrough;
case IOPRIO_CLASS_NONE:
/*
@@ -5290,10 +5293,10 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
break;
}
- if (bfqq->new_ioprio >= IOPRIO_BE_NR) {
+ if (bfqq->new_ioprio >= IOPRIO_NR_LEVELS) {
pr_crit("bfq_set_next_ioprio_data: new_ioprio %d\n",
bfqq->new_ioprio);
- bfqq->new_ioprio = IOPRIO_BE_NR;
+ bfqq->new_ioprio = IOPRIO_NR_LEVELS - 1;
}
bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio);
@@ -5408,7 +5411,7 @@ static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
case IOPRIO_CLASS_RT:
return &bfqg->async_bfqq[0][ioprio];
case IOPRIO_CLASS_NONE:
- ioprio = IOPRIO_NORM;
+ ioprio = IOPRIO_BE_NORM;
fallthrough;
case IOPRIO_CLASS_BE:
return &bfqg->async_bfqq[1][ioprio];
@@ -6822,7 +6825,7 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
int i, j;
for (i = 0; i < 2; i++)
- for (j = 0; j < IOPRIO_BE_NR; j++)
+ for (j = 0; j < IOPRIO_NR_LEVELS; j++)
__bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]);
__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 99c2a3cb081e..a73488eec8a4 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -931,7 +931,7 @@ struct bfq_group {
void *bfqd;
- struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
+ struct bfq_queue *async_bfqq[2][IOPRIO_NR_LEVELS];
struct bfq_queue *async_idle_bfqq;
struct bfq_entity *my_entity;
@@ -948,15 +948,13 @@ struct bfq_group {
struct bfq_entity entity;
struct bfq_sched_data sched_data;
- struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
+ struct bfq_queue *async_bfqq[2][IOPRIO_NR_LEVELS];
struct bfq_queue *async_idle_bfqq;
struct rb_root rq_pos_tree;
};
#endif
-struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
-
/* --------------- main algorithm interface ----------------- */
#define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 7a462df71f68..b74cc0da118e 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -505,7 +505,7 @@ static void bfq_active_insert(struct bfq_service_tree *st,
*/
unsigned short bfq_ioprio_to_weight(int ioprio)
{
- return (IOPRIO_BE_NR - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF;
+ return (IOPRIO_NR_LEVELS - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF;
}
/**
@@ -514,12 +514,12 @@ unsigned short bfq_ioprio_to_weight(int ioprio)
*
* To preserve as much as possible the old only-ioprio user interface,
* 0 is used as an escape ioprio value for weights (numerically) equal or
- * larger than IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF.
+ * larger than IOPRIO_NR_LEVELS * BFQ_WEIGHT_CONVERSION_COEFF.
*/
static unsigned short bfq_weight_to_ioprio(int weight)
{
return max_t(int, 0,
- IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - weight);
+ IOPRIO_NR_LEVELS * BFQ_WEIGHT_CONVERSION_COEFF - weight);
}
static void bfq_get_entity(struct bfq_entity *entity)
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 4b4eb8964a6f..6b47cddbbca1 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -104,8 +104,7 @@ void bio_integrity_free(struct bio *bio)
struct bio_set *bs = bio->bi_pool;
if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
- kfree(page_address(bip->bip_vec->bv_page) +
- bip->bip_vec->bv_offset);
+ kfree(bvec_virt(bip->bip_vec));
__bio_integrity_free(bs, bip);
bio->bi_integrity = NULL;
@@ -163,27 +162,23 @@ static blk_status_t bio_integrity_process(struct bio *bio,
struct bio_vec bv;
struct bio_integrity_payload *bip = bio_integrity(bio);
blk_status_t ret = BLK_STS_OK;
- void *prot_buf = page_address(bip->bip_vec->bv_page) +
- bip->bip_vec->bv_offset;
iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
iter.interval = 1 << bi->interval_exp;
iter.seed = proc_iter->bi_sector;
- iter.prot_buf = prot_buf;
+ iter.prot_buf = bvec_virt(bip->bip_vec);
__bio_for_each_segment(bv, bio, bviter, *proc_iter) {
- void *kaddr = kmap_atomic(bv.bv_page);
+ void *kaddr = bvec_kmap_local(&bv);
- iter.data_buf = kaddr + bv.bv_offset;
+ iter.data_buf = kaddr;
iter.data_size = bv.bv_len;
-
ret = proc_fn(&iter);
- if (ret) {
- kunmap_atomic(kaddr);
- return ret;
- }
+ kunmap_local(kaddr);
+
+ if (ret)
+ break;
- kunmap_atomic(kaddr);
}
return ret;
}
diff --git a/block/bio.c b/block/bio.c
index 1fab762e079b..e16849f46b0e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -25,6 +25,11 @@
#include "blk.h"
#include "blk-rq-qos.h"
+struct bio_alloc_cache {
+ struct bio_list free_list;
+ unsigned int nr;
+};
+
static struct biovec_slab {
int nr_vecs;
char *name;
@@ -246,12 +251,40 @@ static void bio_free(struct bio *bio)
void bio_init(struct bio *bio, struct bio_vec *table,
unsigned short max_vecs)
{
- memset(bio, 0, sizeof(*bio));
+ bio->bi_next = NULL;
+ bio->bi_bdev = NULL;
+ bio->bi_opf = 0;
+ bio->bi_flags = 0;
+ bio->bi_ioprio = 0;
+ bio->bi_write_hint = 0;
+ bio->bi_status = 0;
+ bio->bi_iter.bi_sector = 0;
+ bio->bi_iter.bi_size = 0;
+ bio->bi_iter.bi_idx = 0;
+ bio->bi_iter.bi_bvec_done = 0;
+ bio->bi_end_io = NULL;
+ bio->bi_private = NULL;
+#ifdef CONFIG_BLK_CGROUP
+ bio->bi_blkg = NULL;
+ bio->bi_issue.value = 0;
+#ifdef CONFIG_BLK_CGROUP_IOCOST
+ bio->bi_iocost_cost = 0;
+#endif
+#endif
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+ bio->bi_crypt_context = NULL;
+#endif
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+ bio->bi_integrity = NULL;
+#endif
+ bio->bi_vcnt = 0;
+
atomic_set(&bio->__bi_remaining, 1);
atomic_set(&bio->__bi_cnt, 1);
- bio->bi_io_vec = table;
bio->bi_max_vecs = max_vecs;
+ bio->bi_io_vec = table;
+ bio->bi_pool = NULL;
}
EXPORT_SYMBOL(bio_init);
@@ -495,16 +528,11 @@ EXPORT_SYMBOL(bio_kmalloc);
void zero_fill_bio(struct bio *bio)
{
- unsigned long flags;
struct bio_vec bv;
struct bvec_iter iter;
- bio_for_each_segment(bv, bio, iter) {
- char *data = bvec_kmap_irq(&bv, &flags);
- memset(data, 0, bv.bv_len);
- flush_dcache_page(bv.bv_page);
- bvec_kunmap_irq(data, &flags);
- }
+ bio_for_each_segment(bv, bio, iter)
+ memzero_bvec(&bv);
}
EXPORT_SYMBOL(zero_fill_bio);
@@ -591,6 +619,53 @@ void guard_bio_eod(struct bio *bio)
bio_truncate(bio, maxsector << 9);
}
+#define ALLOC_CACHE_MAX 512
+#define ALLOC_CACHE_SLACK 64
+
+static void bio_alloc_cache_prune(struct bio_alloc_cache *cache,
+ unsigned int nr)
+{
+ unsigned int i = 0;
+ struct bio *bio;
+
+ while ((bio = bio_list_pop(&cache->free_list)) != NULL) {
+ cache->nr--;
+ bio_free(bio);
+ if (++i == nr)
+ break;
+ }
+}
+
+static int bio_cpu_dead(unsigned int cpu, struct hlist_node *node)
+{
+ struct bio_set *bs;
+
+ bs = hlist_entry_safe(node, struct bio_set, cpuhp_dead);
+ if (bs->cache) {
+ struct bio_alloc_cache *cache = per_cpu_ptr(bs->cache, cpu);
+
+ bio_alloc_cache_prune(cache, -1U);
+ }
+ return 0;
+}
+
+static void bio_alloc_cache_destroy(struct bio_set *bs)
+{
+ int cpu;
+
+ if (!bs->cache)
+ return;
+
+ cpuhp_state_remove_instance_nocalls(CPUHP_BIO_DEAD, &bs->cpuhp_dead);
+ for_each_possible_cpu(cpu) {
+ struct bio_alloc_cache *cache;
+
+ cache = per_cpu_ptr(bs->cache, cpu);
+ bio_alloc_cache_prune(cache, -1U);
+ }
+ free_percpu(bs->cache);
+}
+
/**
* bio_put - release a reference to a bio
* @bio: bio to release reference to
@@ -601,16 +676,23 @@ void guard_bio_eod(struct bio *bio)
**/
void bio_put(struct bio *bio)
{
- if (!bio_flagged(bio, BIO_REFFED))
- bio_free(bio);
- else {
+ if (unlikely(bio_flagged(bio, BIO_REFFED))) {
BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
+ if (!atomic_dec_and_test(&bio->__bi_cnt))
+ return;
+ }
- /*
- * last put frees it
- */
- if (atomic_dec_and_test(&bio->__bi_cnt))
- bio_free(bio);
+ if (bio_flagged(bio, BIO_PERCPU_CACHE)) {
+ struct bio_alloc_cache *cache;
+
+ bio_uninit(bio);
+ cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu());
+ bio_list_add_head(&cache->free_list, bio);
+ if (++cache->nr > ALLOC_CACHE_MAX + ALLOC_CACHE_SLACK)
+ bio_alloc_cache_prune(cache, ALLOC_CACHE_SLACK);
+ put_cpu();
+ } else {
+ bio_free(bio);
}
}
EXPORT_SYMBOL(bio_put);
@@ -979,6 +1061,14 @@ static int bio_iov_bvec_set_append(struct bio *bio, struct iov_iter *iter)
return 0;
}
+static void bio_put_pages(struct page **pages, size_t size, size_t off)
+{
+ size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE);
+
+ for (i = 0; i < nr; i++)
+ put_page(pages[i]);
+}
+
#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
/**
@@ -1023,8 +1113,10 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
if (same_page)
put_page(page);
} else {
- if (WARN_ON_ONCE(bio_full(bio, len)))
- return -EINVAL;
+ if (WARN_ON_ONCE(bio_full(bio, len))) {
+ bio_put_pages(pages + i, left, offset);
+ return -EINVAL;
+ }
__bio_add_page(bio, page, len, offset);
}
offset = 0;
@@ -1069,6 +1161,7 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
len = min_t(size_t, PAGE_SIZE - offset, left);
if (bio_add_hw_page(q, bio, page, len, offset,
max_append_sectors, &same_page) != len) {
+ bio_put_pages(pages + i, left, offset);
ret = -EINVAL;
break;
}
@@ -1191,27 +1284,15 @@ EXPORT_SYMBOL(bio_advance);
void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
struct bio *src, struct bvec_iter *src_iter)
{
- struct bio_vec src_bv, dst_bv;
- void *src_p, *dst_p;
- unsigned bytes;
-
while (src_iter->bi_size && dst_iter->bi_size) {
- src_bv = bio_iter_iovec(src, *src_iter);
- dst_bv = bio_iter_iovec(dst, *dst_iter);
-
- bytes = min(src_bv.bv_len, dst_bv.bv_len);
-
- src_p = kmap_atomic(src_bv.bv_page);
- dst_p = kmap_atomic(dst_bv.bv_page);
-
- memcpy(dst_p + dst_bv.bv_offset,
- src_p + src_bv.bv_offset,
- bytes);
+ struct bio_vec src_bv = bio_iter_iovec(src, *src_iter);
+ struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter);
+ unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len);
+ void *src_buf;
- kunmap_atomic(dst_p);
- kunmap_atomic(src_p);
-
- flush_dcache_page(dst_bv.bv_page);
+ src_buf = bvec_kmap_local(&src_bv);
+ memcpy_to_bvec(&dst_bv, src_buf);
+ kunmap_local(src_buf);
bio_advance_iter_single(src, src_iter, bytes);
bio_advance_iter_single(dst, dst_iter, bytes);
@@ -1463,12 +1544,15 @@ EXPORT_SYMBOL(bio_split);
* @bio: bio to trim
* @offset: number of sectors to trim from the front of @bio
* @size: size we want to trim @bio to, in sectors
+ *
+ * This function is typically used for bios that are cloned and submitted
+ * to the underlying device in parts.
*/
-void bio_trim(struct bio *bio, int offset, int size)
+void bio_trim(struct bio *bio, sector_t offset, sector_t size)
{
- /* 'bio' is a cloned bio which we need to trim to match
- * the given offset and size.
- */
+ if (WARN_ON_ONCE(offset > BIO_MAX_SECTORS || size > BIO_MAX_SECTORS ||
+ offset + size > bio->bi_iter.bi_size))
+ return;
size <<= 9;
if (offset == 0 && size == bio->bi_iter.bi_size)
@@ -1479,7 +1563,6 @@ void bio_trim(struct bio *bio, int offset, int size)
if (bio_integrity(bio))
bio_integrity_trim(bio);
-
}
EXPORT_SYMBOL_GPL(bio_trim);
@@ -1502,6 +1585,7 @@ int biovec_init_pool(mempool_t *pool, int pool_entries)
*/
void bioset_exit(struct bio_set *bs)
{
+ bio_alloc_cache_destroy(bs);
if (bs->rescue_workqueue)
destroy_workqueue(bs->rescue_workqueue);
bs->rescue_workqueue = NULL;
@@ -1563,12 +1647,18 @@ int bioset_init(struct bio_set *bs,
biovec_init_pool(&bs->bvec_pool, pool_size))
goto bad;
- if (!(flags & BIOSET_NEED_RESCUER))
- return 0;
-
- bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
- if (!bs->rescue_workqueue)
- goto bad;
+ if (flags & BIOSET_NEED_RESCUER) {
+ bs->rescue_workqueue = alloc_workqueue("bioset",
+ WQ_MEM_RECLAIM, 0);
+ if (!bs->rescue_workqueue)
+ goto bad;
+ }
+ if (flags & BIOSET_PERCPU_CACHE) {
+ bs->cache = alloc_percpu(struct bio_alloc_cache);
+ if (!bs->cache)
+ goto bad;
+ cpuhp_state_add_instance_nocalls(CPUHP_BIO_DEAD, &bs->cpuhp_dead);
+ }
return 0;
bad:
@@ -1595,6 +1685,46 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
}
EXPORT_SYMBOL(bioset_init_from_src);
+/**
+ * bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb
+ * @kiocb: kiocb describing the IO
+ * @nr_iovecs: number of iovecs to pre-allocate
+ * @bs: bio_set to allocate from
+ *
+ * Description:
+ * Like @bio_alloc_bioset, but pass in the kiocb. The kiocb is only
+ * used to check if we should dip into the per-cpu bio_set allocation
+ * cache. The allocation uses GFP_KERNEL internally. On return, the
+ * bio is marked BIO_PERCPU_CACHEABLE, and the final put of the bio
+ * MUST be done from process context, not hard/soft IRQ.
+ *
+ */
+struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
+ struct bio_set *bs)
+{
+ struct bio_alloc_cache *cache;
+ struct bio *bio;
+
+ if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS)
+ return bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
+
+ cache = per_cpu_ptr(bs->cache, get_cpu());
+ bio = bio_list_pop(&cache->free_list);
+ if (bio) {
+ cache->nr--;
+ put_cpu();
+ bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs);
+ bio->bi_pool = bs;
+ bio_set_flag(bio, BIO_PERCPU_CACHE);
+ return bio;
+ }
+ put_cpu();
+ bio = bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
+ bio_set_flag(bio, BIO_PERCPU_CACHE);
+ return bio;
+}
+EXPORT_SYMBOL_GPL(bio_alloc_kiocb);
+
static int __init init_bio(void)
{
int i;
@@ -1609,6 +1739,9 @@ static int __init init_bio(void)
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
}
+ cpuhp_setup_state_multi(CPUHP_BIO_DEAD, "block/bio:dead", NULL,
+ bio_cpu_dead);
+
if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS))
panic("bio: can't allocate bios\n");
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 575d7a2e7203..3c88a79a319b 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -489,10 +489,9 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
const char *blkg_dev_name(struct blkcg_gq *blkg)
{
- /* some drivers (floppy) instantiate a queue w/o disk registered */
- if (blkg->q->backing_dev_info->dev)
- return bdi_dev_name(blkg->q->backing_dev_info);
- return NULL;
+ if (!blkg->q->disk || !blkg->q->disk->bdi->dev)
+ return NULL;
+ return bdi_dev_name(blkg->q->disk->bdi);
}
/**
@@ -790,6 +789,7 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
struct blkcg_gq *parent = blkg->parent;
struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
struct blkg_iostat cur, delta;
+ unsigned long flags;
unsigned int seq;
/* fetch the current per-cpu values */
@@ -799,21 +799,21 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
} while (u64_stats_fetch_retry(&bisc->sync, seq));
/* propagate percpu delta to global */
- u64_stats_update_begin(&blkg->iostat.sync);
+ flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
blkg_iostat_set(&delta, &cur);
blkg_iostat_sub(&delta, &bisc->last);
blkg_iostat_add(&blkg->iostat.cur, &delta);
blkg_iostat_add(&bisc->last, &delta);
- u64_stats_update_end(&blkg->iostat.sync);
+ u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
/* propagate global delta to parent (unless that's root) */
if (parent && parent->parent) {
- u64_stats_update_begin(&parent->iostat.sync);
+ flags = u64_stats_update_begin_irqsave(&parent->iostat.sync);
blkg_iostat_set(&delta, &blkg->iostat.cur);
blkg_iostat_sub(&delta, &blkg->iostat.last);
blkg_iostat_add(&parent->iostat.cur, &delta);
blkg_iostat_add(&blkg->iostat.last, &delta);
- u64_stats_update_end(&parent->iostat.sync);
+ u64_stats_update_end_irqrestore(&parent->iostat.sync, flags);
}
}
@@ -848,6 +848,7 @@ static void blkcg_fill_root_iostats(void)
memset(&tmp, 0, sizeof(tmp));
for_each_possible_cpu(cpu) {
struct disk_stats *cpu_dkstats;
+ unsigned long flags;
cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu);
tmp.ios[BLKG_IOSTAT_READ] +=
@@ -864,104 +865,86 @@ static void blkcg_fill_root_iostats(void)
tmp.bytes[BLKG_IOSTAT_DISCARD] +=
cpu_dkstats->sectors[STAT_DISCARD] << 9;
- u64_stats_update_begin(&blkg->iostat.sync);
+ flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
blkg_iostat_set(&blkg->iostat.cur, &tmp);
- u64_stats_update_end(&blkg->iostat.sync);
+ u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
}
}
}
-static int blkcg_print_stat(struct seq_file *sf, void *v)
+static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
{
- struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
- struct blkcg_gq *blkg;
-
- if (!seq_css(sf)->parent)
- blkcg_fill_root_iostats();
- else
- cgroup_rstat_flush(blkcg->css.cgroup);
-
- rcu_read_lock();
-
- hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
- struct blkg_iostat_set *bis = &blkg->iostat;
- const char *dname;
- char *buf;
- u64 rbytes, wbytes, rios, wios, dbytes, dios;
- size_t size = seq_get_buf(sf, &buf), off = 0;
- int i;
- bool has_stats = false;
- unsigned seq;
+ struct blkg_iostat_set *bis = &blkg->iostat;
+ u64 rbytes, wbytes, rios, wios, dbytes, dios;
+ bool has_stats = false;
+ const char *dname;
+ unsigned seq;
+ int i;
- spin_lock_irq(&blkg->q->queue_lock);
+ if (!blkg->online)
+ return;
- if (!blkg->online)
- goto skip;
+ dname = blkg_dev_name(blkg);
+ if (!dname)
+ return;
- dname = blkg_dev_name(blkg);
- if (!dname)
- goto skip;
+ seq_printf(s, "%s ", dname);
- /*
- * Hooray string manipulation, count is the size written NOT
- * INCLUDING THE \0, so size is now count+1 less than what we
- * had before, but we want to start writing the next bit from
- * the \0 so we only add count to buf.
- */
- off += scnprintf(buf+off, size-off, "%s ", dname);
+ do {
+ seq = u64_stats_fetch_begin(&bis->sync);
+
+ rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
+ wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
+ dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
+ rios = bis->cur.ios[BLKG_IOSTAT_READ];
+ wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
+ dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
+ } while (u64_stats_fetch_retry(&bis->sync, seq));
+
+ if (rbytes || wbytes || rios || wios) {
+ has_stats = true;
+ seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
+ rbytes, wbytes, rios, wios,
+ dbytes, dios);
+ }
- do {
- seq = u64_stats_fetch_begin(&bis->sync);
+ if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
+ has_stats = true;
+ seq_printf(s, " use_delay=%d delay_nsec=%llu",
+ atomic_read(&blkg->use_delay),
+ atomic64_read(&blkg->delay_nsec));
+ }
- rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
- wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
- dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
- rios = bis->cur.ios[BLKG_IOSTAT_READ];
- wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
- dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
- } while (u64_stats_fetch_retry(&bis->sync, seq));
+ for (i = 0; i < BLKCG_MAX_POLS; i++) {
+ struct blkcg_policy *pol = blkcg_policy[i];
- if (rbytes || wbytes || rios || wios) {
- has_stats = true;
- off += scnprintf(buf+off, size-off,
- "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
- rbytes, wbytes, rios, wios,
- dbytes, dios);
- }
+ if (!blkg->pd[i] || !pol->pd_stat_fn)
+ continue;
- if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
+ if (pol->pd_stat_fn(blkg->pd[i], s))
has_stats = true;
- off += scnprintf(buf+off, size-off,
- " use_delay=%d delay_nsec=%llu",
- atomic_read(&blkg->use_delay),
- (unsigned long long)atomic64_read(&blkg->delay_nsec));
- }
+ }
- for (i = 0; i < BLKCG_MAX_POLS; i++) {
- struct blkcg_policy *pol = blkcg_policy[i];
- size_t written;
+ if (has_stats)
+ seq_printf(s, "\n");
+}
- if (!blkg->pd[i] || !pol->pd_stat_fn)
- continue;
+static int blkcg_print_stat(struct seq_file *sf, void *v)
+{
+ struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+ struct blkcg_gq *blkg;
- written = pol->pd_stat_fn(blkg->pd[i], buf+off, size-off);
- if (written)
- has_stats = true;
- off += written;
- }
+ if (!seq_css(sf)->parent)
+ blkcg_fill_root_iostats();
+ else
+ cgroup_rstat_flush(blkcg->css.cgroup);
- if (has_stats) {
- if (off < size - 1) {
- off += scnprintf(buf+off, size-off, "\n");
- seq_commit(sf, off);
- } else {
- seq_commit(sf, -1);
- }
- }
- skip:
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
+ spin_lock_irq(&blkg->q->queue_lock);
+ blkcg_print_one_stat(blkg, sf);
spin_unlock_irq(&blkg->q->queue_lock);
}
-
rcu_read_unlock();
return 0;
}
diff --git a/block/blk-core.c b/block/blk-core.c
index 04477697ee4b..5454db2fa263 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -14,7 +14,6 @@
*/
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
@@ -122,7 +121,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->internal_tag = BLK_MQ_NO_TAG;
rq->start_time_ns = ktime_get_ns();
rq->part = NULL;
- refcount_set(&rq->ref, 1);
blk_crypto_rq_set_defaults(rq);
}
EXPORT_SYMBOL(blk_rq_init);
@@ -394,10 +392,7 @@ void blk_cleanup_queue(struct request_queue *q)
/* for synchronous bio-based driver finish in-flight integrity i/o */
blk_flush_integrity();
- /* @q won't process any more request, flush async actions */
- del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
blk_sync_queue(q);
-
if (queue_is_mq(q))
blk_mq_exit_queue(q);
@@ -534,20 +529,14 @@ struct request_queue *blk_alloc_queue(int node_id)
if (ret)
goto fail_id;
- q->backing_dev_info = bdi_alloc(node_id);
- if (!q->backing_dev_info)
- goto fail_split;
-
q->stats = blk_alloc_queue_stats();
if (!q->stats)
- goto fail_stats;
+ goto fail_split;
q->node = node_id;
atomic_set(&q->nr_active_requests_shared_sbitmap, 0);
- timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
- laptop_mode_timer_fn, 0);
timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
INIT_WORK(&q->timeout_work, blk_timeout_work);
INIT_LIST_HEAD(&q->icq_list);
@@ -572,7 +561,7 @@ struct request_queue *blk_alloc_queue(int node_id)
if (percpu_ref_init(&q->q_usage_counter,
blk_queue_usage_counter_release,
PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
- goto fail_bdi;
+ goto fail_stats;
if (blkcg_init_queue(q))
goto fail_ref;
@@ -585,10 +574,8 @@ struct request_queue *blk_alloc_queue(int node_id)
fail_ref:
percpu_ref_exit(&q->q_usage_counter);
-fail_bdi:
- blk_free_queue_stats(q->stats);
fail_stats:
- bdi_put(q->backing_dev_info);
+ blk_free_queue_stats(q->stats);
fail_split:
bioset_exit(&q->bio_split);
fail_id:
@@ -834,7 +821,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
}
if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
- bio->bi_opf &= ~REQ_HIPRI;
+ bio_clear_hipri(bio);
switch (bio_op(bio)) {
case REQ_OP_DISCARD:
diff --git a/block/blk-crypto.c b/block/blk-crypto.c
index c5bdaafffa29..103c2e2d50d6 100644
--- a/block/blk-crypto.c
+++ b/block/blk-crypto.c
@@ -332,7 +332,7 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key,
if (mode->keysize == 0)
return -EINVAL;
- if (dun_bytes == 0 || dun_bytes > BLK_CRYPTO_MAX_IV_SIZE)
+ if (dun_bytes == 0 || dun_bytes > mode->ivsize)
return -EINVAL;
if (!is_power_of_2(data_unit_size))
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 1002f6c58181..4201728bf3a5 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -262,6 +262,11 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
}
+bool is_flush_rq(struct request *rq)
+{
+ return rq->end_io == flush_end_io;
+}
+
/**
* blk_kick_flush - consider issuing flush request
* @q: request_queue being kicked
@@ -329,6 +334,14 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
flush_rq->rq_flags |= RQF_FLUSH_SEQ;
flush_rq->rq_disk = first_rq->rq_disk;
flush_rq->end_io = flush_end_io;
+ /*
+ * Order WRITE ->end_io and WRITE rq->ref, and its pair is the one
+ * implied in refcount_inc_not_zero() called from
+ * blk_mq_find_and_get_req(), which orders WRITE/READ flush_rq->ref
+ * and READ flush_rq->end_io
+ */
+ smp_wmb();
+ refcount_set(&flush_rq->ref, 1);
blk_flush_queue_rq(flush_rq, false);
}
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 410da060d1f5..69a12177dfb6 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -431,13 +431,15 @@ void blk_integrity_unregister(struct gendisk *disk)
}
EXPORT_SYMBOL(blk_integrity_unregister);
-void blk_integrity_add(struct gendisk *disk)
+int blk_integrity_add(struct gendisk *disk)
{
- if (kobject_init_and_add(&disk->integrity_kobj, &integrity_ktype,
- &disk_to_dev(disk)->kobj, "%s", "integrity"))
- return;
+ int ret;
- kobject_uevent(&disk->integrity_kobj, KOBJ_ADD);
+ ret = kobject_init_and_add(&disk->integrity_kobj, &integrity_ktype,
+ &disk_to_dev(disk)->kobj, "%s", "integrity");
+ if (!ret)
+ kobject_uevent(&disk->integrity_kobj, KOBJ_ADD);
+ return ret;
}
void blk_integrity_del(struct gendisk *disk)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 5fac3757e6e0..b3880e4ba22a 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2988,34 +2988,29 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
kfree(iocg);
}
-static size_t ioc_pd_stat(struct blkg_policy_data *pd, char *buf, size_t size)
+static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
{
struct ioc_gq *iocg = pd_to_iocg(pd);
struct ioc *ioc = iocg->ioc;
- size_t pos = 0;
if (!ioc->enabled)
- return 0;
+ return false;
if (iocg->level == 0) {
unsigned vp10k = DIV64_U64_ROUND_CLOSEST(
ioc->vtime_base_rate * 10000,
VTIME_PER_USEC);
- pos += scnprintf(buf + pos, size - pos, " cost.vrate=%u.%02u",
- vp10k / 100, vp10k % 100);
+ seq_printf(s, " cost.vrate=%u.%02u", vp10k / 100, vp10k % 100);
}
- pos += scnprintf(buf + pos, size - pos, " cost.usage=%llu",
- iocg->last_stat.usage_us);
+ seq_printf(s, " cost.usage=%llu", iocg->last_stat.usage_us);
if (blkcg_debug_stats)
- pos += scnprintf(buf + pos, size - pos,
- " cost.wait=%llu cost.indebt=%llu cost.indelay=%llu",
- iocg->last_stat.wait_us,
- iocg->last_stat.indebt_us,
- iocg->last_stat.indelay_us);
-
- return pos;
+ seq_printf(s, " cost.wait=%llu cost.indebt=%llu cost.indelay=%llu",
+ iocg->last_stat.wait_us,
+ iocg->last_stat.indebt_us,
+ iocg->last_stat.indelay_us);
+ return true;
}
static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
@@ -3061,19 +3056,19 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
if (v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX)
return -EINVAL;
- spin_lock(&blkcg->lock);
+ spin_lock_irq(&blkcg->lock);
iocc->dfl_weight = v * WEIGHT_ONE;
hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
struct ioc_gq *iocg = blkg_to_iocg(blkg);
if (iocg) {
- spin_lock_irq(&iocg->ioc->lock);
+ spin_lock(&iocg->ioc->lock);
ioc_now(iocg->ioc, &now);
weight_updated(iocg, &now);
- spin_unlock_irq(&iocg->ioc->lock);
+ spin_unlock(&iocg->ioc->lock);
}
}
- spin_unlock(&blkcg->lock);
+ spin_unlock_irq(&blkcg->lock);
return nbytes;
}
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index d8b0d8bd132b..c0545f9da549 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -890,8 +890,7 @@ static int iolatency_print_limit(struct seq_file *sf, void *v)
return 0;
}
-static size_t iolatency_ssd_stat(struct iolatency_grp *iolat, char *buf,
- size_t size)
+static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
{
struct latency_stat stat;
int cpu;
@@ -906,39 +905,40 @@ static size_t iolatency_ssd_stat(struct iolatency_grp *iolat, char *buf,
preempt_enable();
if (iolat->rq_depth.max_depth == UINT_MAX)
- return scnprintf(buf, size, " missed=%llu total=%llu depth=max",
- (unsigned long long)stat.ps.missed,
- (unsigned long long)stat.ps.total);
- return scnprintf(buf, size, " missed=%llu total=%llu depth=%u",
- (unsigned long long)stat.ps.missed,
- (unsigned long long)stat.ps.total,
- iolat->rq_depth.max_depth);
+ seq_printf(s, " missed=%llu total=%llu depth=max",
+ (unsigned long long)stat.ps.missed,
+ (unsigned long long)stat.ps.total);
+ else
+ seq_printf(s, " missed=%llu total=%llu depth=%u",
+ (unsigned long long)stat.ps.missed,
+ (unsigned long long)stat.ps.total,
+ iolat->rq_depth.max_depth);
+ return true;
}
-static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
- size_t size)
+static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
{
struct iolatency_grp *iolat = pd_to_lat(pd);
unsigned long long avg_lat;
unsigned long long cur_win;
if (!blkcg_debug_stats)
- return 0;
+ return false;
if (iolat->ssd)
- return iolatency_ssd_stat(iolat, buf, size);
+ return iolatency_ssd_stat(iolat, s);
avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
if (iolat->rq_depth.max_depth == UINT_MAX)
- return scnprintf(buf, size, " depth=max avg_lat=%llu win=%llu",
- avg_lat, cur_win);
-
- return scnprintf(buf, size, " depth=%u avg_lat=%llu win=%llu",
- iolat->rq_depth.max_depth, avg_lat, cur_win);
+ seq_printf(s, " depth=max avg_lat=%llu win=%llu",
+ avg_lat, cur_win);
+ else
+ seq_printf(s, " depth=%u avg_lat=%llu win=%llu",
+ iolat->rq_depth.max_depth, avg_lat, cur_win);
+ return true;
}
-
static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
struct request_queue *q,
struct blkcg *blkcg)
diff --git a/block/blk-map.c b/block/blk-map.c
index 3743158ddaeb..d1448aaad980 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -400,7 +400,7 @@ static void bio_copy_kern_endio_read(struct bio *bio)
struct bvec_iter_all iter_all;
bio_for_each_segment_all(bvec, bio, iter_all) {
- memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
+ memcpy_from_bvec(p, bvec);
p += bvec->bv_len;
}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index a11b3b53717e..7a5c81c02c80 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -285,7 +285,7 @@ split:
* iopoll in direct IO routine. Given performance gain of iopoll for
* big IO can be trival, disable iopoll when split needed.
*/
- bio->bi_opf &= ~REQ_HIPRI;
+ bio_clear_hipri(bio);
return bio_split(bio, sectors, GFP_NOIO, bs);
}
@@ -348,6 +348,8 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
trace_block_split(split, (*bio)->bi_iter.bi_sector);
submit_bio_noacct(*bio);
*bio = split;
+
+ blk_throtl_charge_bio_split(*bio);
}
}
@@ -705,22 +707,6 @@ static void blk_account_io_merge_request(struct request *req)
}
}
-/*
- * Two cases of handling DISCARD merge:
- * If max_discard_segments > 1, the driver takes every bio
- * as a range and send them to controller together. The ranges
- * needn't to be contiguous.
- * Otherwise, the bios/requests will be handled as same as
- * others which should be contiguous.
- */
-static inline bool blk_discard_mergable(struct request *req)
-{
- if (req_op(req) == REQ_OP_DISCARD &&
- queue_max_discard_segments(req->q) > 1)
- return true;
- return false;
-}
-
static enum elv_merge blk_try_req_merge(struct request *req,
struct request *next)
{
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 7b52e7657b2d..253c857cba47 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -45,60 +45,12 @@ static void blk_mq_hw_sysfs_release(struct kobject *kobj)
kfree(hctx);
}
-struct blk_mq_ctx_sysfs_entry {
- struct attribute attr;
- ssize_t (*show)(struct blk_mq_ctx *, char *);
- ssize_t (*store)(struct blk_mq_ctx *, const char *, size_t);
-};
-
struct blk_mq_hw_ctx_sysfs_entry {
struct attribute attr;
ssize_t (*show)(struct blk_mq_hw_ctx *, char *);
ssize_t (*store)(struct blk_mq_hw_ctx *, const char *, size_t);
};
-static ssize_t blk_mq_sysfs_show(struct kobject *kobj, struct attribute *attr,
- char *page)
-{
- struct blk_mq_ctx_sysfs_entry *entry;
- struct blk_mq_ctx *ctx;
- struct request_queue *q;
- ssize_t res;
-
- entry = container_of(attr, struct blk_mq_ctx_sysfs_entry, attr);
- ctx = container_of(kobj, struct blk_mq_ctx, kobj);
- q = ctx->queue;
-
- if (!entry->show)
- return -EIO;
-
- mutex_lock(&q->sysfs_lock);
- res = entry->show(ctx, page);
- mutex_unlock(&q->sysfs_lock);
- return res;
-}
-
-static ssize_t blk_mq_sysfs_store(struct kobject *kobj, struct attribute *attr,
- const char *page, size_t length)
-{
- struct blk_mq_ctx_sysfs_entry *entry;
- struct blk_mq_ctx *ctx;
- struct request_queue *q;
- ssize_t res;
-
- entry = container_of(attr, struct blk_mq_ctx_sysfs_entry, attr);
- ctx = container_of(kobj, struct blk_mq_ctx, kobj);
- q = ctx->queue;
-
- if (!entry->store)
- return -EIO;
-
- mutex_lock(&q->sysfs_lock);
- res = entry->store(ctx, page, length);
- mutex_unlock(&q->sysfs_lock);
- return res;
-}
-
static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
struct attribute *attr, char *page)
{
@@ -198,23 +150,16 @@ static struct attribute *default_hw_ctx_attrs[] = {
};
ATTRIBUTE_GROUPS(default_hw_ctx);
-static const struct sysfs_ops blk_mq_sysfs_ops = {
- .show = blk_mq_sysfs_show,
- .store = blk_mq_sysfs_store,
-};
-
static const struct sysfs_ops blk_mq_hw_sysfs_ops = {
.show = blk_mq_hw_sysfs_show,
.store = blk_mq_hw_sysfs_store,
};
static struct kobj_type blk_mq_ktype = {
- .sysfs_ops = &blk_mq_sysfs_ops,
.release = blk_mq_sysfs_release,
};
static struct kobj_type blk_mq_ctx_ktype = {
- .sysfs_ops = &blk_mq_sysfs_ops,
.release = blk_mq_ctx_sysfs_release,
};
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2c4ac51e54eb..944049982e6e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -525,7 +525,7 @@ void blk_mq_free_request(struct request *rq)
__blk_mq_dec_active_requests(hctx);
if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
- laptop_io_completion(q->backing_dev_info);
+ laptop_io_completion(q->disk->bdi);
rq_qos_done(q, rq);
@@ -606,7 +606,7 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq)
* This is probably worse than completing the request on a different
* cache domain.
*/
- if (force_irqthreads)
+ if (force_irqthreads())
return false;
/* same CPU or cache domain? Complete locally */
@@ -911,7 +911,7 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
void blk_mq_put_rq_ref(struct request *rq)
{
- if (is_flush_rq(rq, rq->mq_hctx))
+ if (is_flush_rq(rq))
rq->end_io(rq, 0);
else if (refcount_dec_and_test(&rq->ref))
__blk_mq_free_request(rq);
@@ -923,34 +923,14 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
unsigned long *next = priv;
/*
- * Just do a quick check if it is expired before locking the request in
- * so we're not unnecessarilly synchronizing across CPUs.
- */
- if (!blk_mq_req_expired(rq, next))
- return true;
-
- /*
- * We have reason to believe the request may be expired. Take a
- * reference on the request to lock this request lifetime into its
- * currently allocated context to prevent it from being reallocated in
- * the event the completion by-passes this timeout handler.
- *
- * If the reference was already released, then the driver beat the
- * timeout handler to posting a natural completion.
- */
- if (!refcount_inc_not_zero(&rq->ref))
- return true;
-
- /*
- * The request is now locked and cannot be reallocated underneath the
- * timeout handler's processing. Re-verify this exact request is truly
- * expired; if it is not expired, then the request was completed and
- * reallocated as a new request.
+ * blk_mq_queue_tag_busy_iter() has locked the request, so it cannot
+ * be reallocated underneath the timeout handler's processing, then
+ * the expire check is reliable. If the request is not expired, then
+ * it was completed and reallocated as a new request after returning
+ * from blk_mq_check_expired().
*/
if (blk_mq_req_expired(rq, next))
blk_mq_rq_timed_out(rq, reserved);
-
- blk_mq_put_rq_ref(rq);
return true;
}
@@ -2994,10 +2974,12 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared)
int i;
queue_for_each_hw_ctx(q, hctx, i) {
- if (shared)
+ if (shared) {
hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;
- else
+ } else {
+ blk_mq_tag_idle(hctx);
hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;
+ }
}
}
@@ -3133,7 +3115,8 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
}
EXPORT_SYMBOL(blk_mq_init_queue);
-struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata)
+struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
+ struct lock_class_key *lkclass)
{
struct request_queue *q;
struct gendisk *disk;
@@ -3142,12 +3125,11 @@ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata)
if (IS_ERR(q))
return ERR_CAST(q);
- disk = __alloc_disk_node(0, set->numa_node);
+ disk = __alloc_disk_node(q, set->numa_node, lkclass);
if (!disk) {
blk_cleanup_queue(q);
return ERR_PTR(-ENOMEM);
}
- disk->queue = q;
return disk;
}
EXPORT_SYMBOL(__blk_mq_alloc_disk);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 902c40d67120..a7c857ad7d10 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -8,6 +8,7 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/pagemap.h>
+#include <linux/backing-dev-defs.h>
#include <linux/gcd.h>
#include <linux/lcm.h>
#include <linux/jiffies.h>
@@ -140,7 +141,9 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
limits->logical_block_size >> SECTOR_SHIFT);
limits->max_sectors = max_sectors;
- q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
+ if (!q->disk)
+ return;
+ q->disk->bdi->io_pages = max_sectors >> (PAGE_SHIFT - 9);
}
EXPORT_SYMBOL(blk_queue_max_hw_sectors);
@@ -380,18 +383,19 @@ void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
}
EXPORT_SYMBOL(blk_queue_alignment_offset);
-void blk_queue_update_readahead(struct request_queue *q)
+void disk_update_readahead(struct gendisk *disk)
{
+ struct request_queue *q = disk->queue;
+
/*
* For read-ahead of large files to be effective, we need to read ahead
* at least twice the optimal I/O size.
*/
- q->backing_dev_info->ra_pages =
+ disk->bdi->ra_pages =
max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
- q->backing_dev_info->io_pages =
- queue_max_sectors(q) >> (PAGE_SHIFT - 9);
+ disk->bdi->io_pages = queue_max_sectors(q) >> (PAGE_SHIFT - 9);
}
-EXPORT_SYMBOL_GPL(blk_queue_update_readahead);
+EXPORT_SYMBOL_GPL(disk_update_readahead);
/**
* blk_limits_io_min - set minimum request size for a device
@@ -471,7 +475,9 @@ EXPORT_SYMBOL(blk_limits_io_opt);
void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
{
blk_limits_io_opt(&q->limits, opt);
- q->backing_dev_info->ra_pages =
+ if (!q->disk)
+ return;
+ q->disk->bdi->ra_pages =
max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
}
EXPORT_SYMBOL(blk_queue_io_opt);
@@ -661,17 +667,11 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
struct request_queue *t = disk->queue;
if (blk_stack_limits(&t->limits, &bdev_get_queue(bdev)->limits,
- get_start_sect(bdev) + (offset >> 9)) < 0) {
- char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];
-
- disk_name(disk, 0, top);
- bdevname(bdev, bottom);
-
- printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
- top, bottom);
- }
+ get_start_sect(bdev) + (offset >> 9)) < 0)
+ pr_notice("%s: Warning: Device %pg is misaligned\n",
+ disk->disk_name, bdev);
- blk_queue_update_readahead(disk->queue);
+ disk_update_readahead(disk);
}
EXPORT_SYMBOL(disk_stack_limits);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 370d83c18057..614d9d47de36 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -88,9 +88,11 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
static ssize_t queue_ra_show(struct request_queue *q, char *page)
{
- unsigned long ra_kb = q->backing_dev_info->ra_pages <<
- (PAGE_SHIFT - 10);
+ unsigned long ra_kb;
+ if (!q->disk)
+ return -EINVAL;
+ ra_kb = q->disk->bdi->ra_pages << (PAGE_SHIFT - 10);
return queue_var_show(ra_kb, page);
}
@@ -98,13 +100,14 @@ static ssize_t
queue_ra_store(struct request_queue *q, const char *page, size_t count)
{
unsigned long ra_kb;
- ssize_t ret = queue_var_store(&ra_kb, page, count);
+ ssize_t ret;
+ if (!q->disk)
+ return -EINVAL;
+ ret = queue_var_store(&ra_kb, page, count);
if (ret < 0)
return ret;
-
- q->backing_dev_info->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
-
+ q->disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
return ret;
}
@@ -251,7 +254,8 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
spin_lock_irq(&q->queue_lock);
q->limits.max_sectors = max_sectors_kb << 1;
- q->backing_dev_info->io_pages = max_sectors_kb >> (PAGE_SHIFT - 10);
+ if (q->disk)
+ q->disk->bdi->io_pages = max_sectors_kb >> (PAGE_SHIFT - 10);
spin_unlock_irq(&q->queue_lock);
return ret;
@@ -766,13 +770,6 @@ static void blk_exit_queue(struct request_queue *q)
* e.g. blkcg_print_blkgs() to crash.
*/
blkcg_exit_queue(q);
-
- /*
- * Since the cgroup code may dereference the @q->backing_dev_info
- * pointer, only decrease its reference count after having removed the
- * association with the block cgroup controller.
- */
- bdi_put(q->backing_dev_info);
}
/**
@@ -859,15 +856,6 @@ int blk_register_queue(struct gendisk *disk)
struct device *dev = disk_to_dev(disk);
struct request_queue *q = disk->queue;
- if (WARN_ON(!q))
- return -ENXIO;
-
- WARN_ONCE(blk_queue_registered(q),
- "%s is registering an already registered queue\n",
- kobject_name(&dev->kobj));
-
- blk_queue_update_readahead(q);
-
ret = blk_trace_init_sysfs(dev);
if (ret)
return ret;
@@ -941,7 +929,6 @@ unlock:
return ret;
}
-EXPORT_SYMBOL_GPL(blk_register_queue);
/**
* blk_unregister_queue - counterpart of blk_register_queue()
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index b1b22d863bdf..55c49015e533 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -178,6 +178,9 @@ struct throtl_grp {
unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
unsigned long bio_cnt_reset_time;
+ atomic_t io_split_cnt[2];
+ atomic_t last_io_split_cnt[2];
+
struct blkg_rwstat stat_bytes;
struct blkg_rwstat stat_ios;
};
@@ -777,6 +780,8 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
tg->bytes_disp[rw] = 0;
tg->io_disp[rw] = 0;
+ atomic_set(&tg->io_split_cnt[rw], 0);
+
/*
* Previous slice has expired. We must have trimmed it after last
* bio dispatch. That means since start of last slice, we never used
@@ -799,6 +804,9 @@ static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
tg->io_disp[rw] = 0;
tg->slice_start[rw] = jiffies;
tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
+
+ atomic_set(&tg->io_split_cnt[rw], 0);
+
throtl_log(&tg->service_queue,
"[%c] new slice start=%lu end=%lu jiffies=%lu",
rw == READ ? 'R' : 'W', tg->slice_start[rw],
@@ -1031,6 +1039,9 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
jiffies + tg->td->throtl_slice);
}
+ if (iops_limit != UINT_MAX)
+ tg->io_disp[rw] += atomic_xchg(&tg->io_split_cnt[rw], 0);
+
if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) &&
tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) {
if (wait)
@@ -2052,12 +2063,14 @@ static void throtl_downgrade_check(struct throtl_grp *tg)
}
if (tg->iops[READ][LIMIT_LOW]) {
+ tg->last_io_disp[READ] += atomic_xchg(&tg->last_io_split_cnt[READ], 0);
iops = tg->last_io_disp[READ] * HZ / elapsed_time;
if (iops >= tg->iops[READ][LIMIT_LOW])
tg->last_low_overflow_time[READ] = now;
}
if (tg->iops[WRITE][LIMIT_LOW]) {
+ tg->last_io_disp[WRITE] += atomic_xchg(&tg->last_io_split_cnt[WRITE], 0);
iops = tg->last_io_disp[WRITE] * HZ / elapsed_time;
if (iops >= tg->iops[WRITE][LIMIT_LOW])
tg->last_low_overflow_time[WRITE] = now;
@@ -2176,6 +2189,25 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
}
#endif
+void blk_throtl_charge_bio_split(struct bio *bio)
+{
+ struct blkcg_gq *blkg = bio->bi_blkg;
+ struct throtl_grp *parent = blkg_to_tg(blkg);
+ struct throtl_service_queue *parent_sq;
+ bool rw = bio_data_dir(bio);
+
+ do {
+ if (!parent->has_rules[rw])
+ break;
+
+ atomic_inc(&parent->io_split_cnt[rw]);
+ atomic_inc(&parent->last_io_split_cnt[rw]);
+
+ parent_sq = parent->service_queue.parent_sq;
+ parent = sq_to_tg(parent_sq);
+ } while (parent);
+}
+
bool blk_throtl_bio(struct bio *bio)
{
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 3ed71b8da887..874c1c37bf0c 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -97,7 +97,7 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
*/
static bool wb_recent_wait(struct rq_wb *rwb)
{
- struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb;
+ struct bdi_writeback *wb = &rwb->rqos.q->disk->bdi->wb;
return time_before(jiffies, wb->dirty_sleep + HZ);
}
@@ -234,7 +234,7 @@ enum {
static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
{
- struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
+ struct backing_dev_info *bdi = rwb->rqos.q->disk->bdi;
struct rq_depth *rqd = &rwb->rq_depth;
u64 thislat;
@@ -287,7 +287,7 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
{
- struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
+ struct backing_dev_info *bdi = rwb->rqos.q->disk->bdi;
struct rq_depth *rqd = &rwb->rq_depth;
trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec,
@@ -359,7 +359,7 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
status = latency_exceeded(rwb, cb->stat);
- trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step,
+ trace_wbt_timer(rwb->rqos.q->disk->bdi, status, rqd->scale_step,
inflight);
/*
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 86fce751bb17..1d0c76c18fc5 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -360,9 +360,6 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
if (!blk_queue_is_zoned(q))
return -ENOTTY;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
-
if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
return -EFAULT;
@@ -421,9 +418,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
if (!blk_queue_is_zoned(q))
return -ENOTTY;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
-
if (!(mode & FMODE_WRITE))
return -EBADF;
diff --git a/block/blk.h b/block/blk.h
index 4b885c0f6708..8c96b0c90c48 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -44,11 +44,7 @@ static inline void __blk_get_queue(struct request_queue *q)
kobject_get(&q->kobj);
}
-static inline bool
-is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx)
-{
- return hctx->fq->flush_rq == req;
-}
+bool is_flush_rq(struct request *req);
struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
gfp_t flags);
@@ -132,7 +128,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
bip_next->bip_vec[0].bv_offset);
}
-void blk_integrity_add(struct gendisk *);
+int blk_integrity_add(struct gendisk *disk);
void blk_integrity_del(struct gendisk *);
#else /* CONFIG_BLK_DEV_INTEGRITY */
static inline bool blk_integrity_merge_rq(struct request_queue *rq,
@@ -166,8 +162,9 @@ static inline bool bio_integrity_endio(struct bio *bio)
static inline void bio_integrity_free(struct bio *bio)
{
}
-static inline void blk_integrity_add(struct gendisk *disk)
+static inline int blk_integrity_add(struct gendisk *disk)
{
+ return 0;
}
static inline void blk_integrity_del(struct gendisk *disk)
{
@@ -293,11 +290,13 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
extern int blk_throtl_init(struct request_queue *q);
extern void blk_throtl_exit(struct request_queue *q);
extern void blk_throtl_register_queue(struct request_queue *q);
+extern void blk_throtl_charge_bio_split(struct bio *bio);
bool blk_throtl_bio(struct bio *bio);
#else /* CONFIG_BLK_DEV_THROTTLING */
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
static inline void blk_throtl_exit(struct request_queue *q) { }
static inline void blk_throtl_register_queue(struct request_queue *q) { }
+static inline void blk_throtl_charge_bio_split(struct bio *bio) { }
static inline bool blk_throtl_bio(struct bio *bio) { return false; }
#endif /* CONFIG_BLK_DEV_THROTTLING */
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
@@ -344,15 +343,14 @@ static inline void blk_queue_clear_zone_settings(struct request_queue *q) {}
int blk_alloc_ext_minor(void);
void blk_free_ext_minor(unsigned int minor);
-char *disk_name(struct gendisk *hd, int partno, char *buf);
#define ADDPART_FLAG_NONE 0
#define ADDPART_FLAG_RAID 1
#define ADDPART_FLAG_WHOLEDISK 2
-int bdev_add_partition(struct block_device *bdev, int partno,
- sector_t start, sector_t length);
-int bdev_del_partition(struct block_device *bdev, int partno);
-int bdev_resize_partition(struct block_device *bdev, int partno,
- sector_t start, sector_t length);
+int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
+ sector_t length);
+int bdev_del_partition(struct gendisk *disk, int partno);
+int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start,
+ sector_t length);
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
@@ -360,7 +358,7 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct request_queue *blk_alloc_queue(int node_id);
-void disk_alloc_events(struct gendisk *disk);
+int disk_alloc_events(struct gendisk *disk);
void disk_add_events(struct gendisk *disk);
void disk_del_events(struct gendisk *disk);
void disk_release_events(struct gendisk *disk);
@@ -368,4 +366,11 @@ extern struct device_attribute dev_attr_events;
extern struct device_attribute dev_attr_events_async;
extern struct device_attribute dev_attr_events_poll_msecs;
+static inline void bio_clear_hipri(struct bio *bio)
+{
+ /* can't support alloc cache if we turn off polling */
+ bio_clear_flag(bio, BIO_PERCPU_CACHE);
+ bio->bi_opf &= ~REQ_HIPRI;
+}
+
#endif /* BLK_INTERNAL_H */
diff --git a/block/bounce.c b/block/bounce.c
index 94081e013c58..05fc7148489d 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -68,25 +68,12 @@ static __init int init_emergency_pool(void)
__initcall(init_emergency_pool);
/*
- * highmem version, map in to vec
- */
-static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
-{
- unsigned char *vto;
-
- vto = kmap_atomic(to->bv_page);
- memcpy(vto + to->bv_offset, vfrom, to->bv_len);
- kunmap_atomic(vto);
-}
-
-/*
* Simple bounce buffer support for highmem pages. Depending on the
* queue gfp mask set, *to may or may not be a highmem page. kmap it
* always, it will do the Right Thing
*/
static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
{
- unsigned char *vfrom;
struct bio_vec tovec, fromvec;
struct bvec_iter iter;
/*
@@ -104,11 +91,8 @@ static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
* been modified by the block layer, so use the original
* copy, bounce_copy_vec already uses tovec->bv_len
*/
- vfrom = page_address(fromvec.bv_page) +
- tovec.bv_offset;
-
- bounce_copy_vec(&tovec, vfrom);
- flush_dcache_page(tovec.bv_page);
+ memcpy_to_bvec(&tovec, page_address(fromvec.bv_page) +
+ tovec.bv_offset);
}
bio_advance_iter(from, &from_iter, tovec.bv_len);
}
@@ -255,24 +239,19 @@ void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
* because the 'bio' is single-page bvec.
*/
for (i = 0, to = bio->bi_io_vec; i < bio->bi_vcnt; to++, i++) {
- struct page *page = to->bv_page;
+ struct page *bounce_page;
- if (!PageHighMem(page))
+ if (!PageHighMem(to->bv_page))
continue;
- to->bv_page = mempool_alloc(&page_pool, GFP_NOIO);
- inc_zone_page_state(to->bv_page, NR_BOUNCE);
+ bounce_page = mempool_alloc(&page_pool, GFP_NOIO);
+ inc_zone_page_state(bounce_page, NR_BOUNCE);
if (rw == WRITE) {
- char *vto, *vfrom;
-
- flush_dcache_page(page);
-
- vto = page_address(to->bv_page) + to->bv_offset;
- vfrom = kmap_atomic(page) + to->bv_offset;
- memcpy(vto, vfrom, to->bv_len);
- kunmap_atomic(vfrom);
+ flush_dcache_page(to->bv_page);
+ memcpy_from_bvec(page_address(bounce_page), to);
}
+ to->bv_page = bounce_page;
}
trace_block_bio_bounce(*bio_orig);
diff --git a/block/cmdline-parser.c b/block/cmdline-parser.c
deleted file mode 100644
index f2a14571882b..000000000000
--- a/block/cmdline-parser.c
+++ /dev/null
@@ -1,255 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Parse command line, get partition information
- *
- * Written by Cai Zhiyong <caizhiyong@huawei.com>
- *
- */
-#include <linux/export.h>
-#include <linux/cmdline-parser.h>
-
-static int parse_subpart(struct cmdline_subpart **subpart, char *partdef)
-{
- int ret = 0;
- struct cmdline_subpart *new_subpart;
-
- *subpart = NULL;
-
- new_subpart = kzalloc(sizeof(struct cmdline_subpart), GFP_KERNEL);
- if (!new_subpart)
- return -ENOMEM;
-
- if (*partdef == '-') {
- new_subpart->size = (sector_t)(~0ULL);
- partdef++;
- } else {
- new_subpart->size = (sector_t)memparse(partdef, &partdef);
- if (new_subpart->size < (sector_t)PAGE_SIZE) {
- pr_warn("cmdline partition size is invalid.");
- ret = -EINVAL;
- goto fail;
- }
- }
-
- if (*partdef == '@') {
- partdef++;
- new_subpart->from = (sector_t)memparse(partdef, &partdef);
- } else {
- new_subpart->from = (sector_t)(~0ULL);
- }
-
- if (*partdef == '(') {
- int length;
- char *next = strchr(++partdef, ')');
-
- if (!next) {
- pr_warn("cmdline partition format is invalid.");
- ret = -EINVAL;
- goto fail;
- }
-
- length = min_t(int, next - partdef,
- sizeof(new_subpart->name) - 1);
- strncpy(new_subpart->name, partdef, length);
- new_subpart->name[length] = '\0';
-
- partdef = ++next;
- } else
- new_subpart->name[0] = '\0';
-
- new_subpart->flags = 0;
-
- if (!strncmp(partdef, "ro", 2)) {
- new_subpart->flags |= PF_RDONLY;
- partdef += 2;
- }
-
- if (!strncmp(partdef, "lk", 2)) {
- new_subpart->flags |= PF_POWERUP_LOCK;
- partdef += 2;
- }
-
- *subpart = new_subpart;
- return 0;
-fail:
- kfree(new_subpart);
- return ret;
-}
-
-static void free_subpart(struct cmdline_parts *parts)
-{
- struct cmdline_subpart *subpart;
-
- while (parts->subpart) {
- subpart = parts->subpart;
- parts->subpart = subpart->next_subpart;
- kfree(subpart);
- }
-}
-
-static int parse_parts(struct cmdline_parts **parts, const char *bdevdef)
-{
- int ret = -EINVAL;
- char *next;
- int length;
- struct cmdline_subpart **next_subpart;
- struct cmdline_parts *newparts;
- char buf[BDEVNAME_SIZE + 32 + 4];
-
- *parts = NULL;
-
- newparts = kzalloc(sizeof(struct cmdline_parts), GFP_KERNEL);
- if (!newparts)
- return -ENOMEM;
-
- next = strchr(bdevdef, ':');
- if (!next) {
- pr_warn("cmdline partition has no block device.");
- goto fail;
- }
-
- length = min_t(int, next - bdevdef, sizeof(newparts->name) - 1);
- strncpy(newparts->name, bdevdef, length);
- newparts->name[length] = '\0';
- newparts->nr_subparts = 0;
-
- next_subpart = &newparts->subpart;
-
- while (next && *(++next)) {
- bdevdef = next;
- next = strchr(bdevdef, ',');
-
- length = (!next) ? (sizeof(buf) - 1) :
- min_t(int, next - bdevdef, sizeof(buf) - 1);
-
- strncpy(buf, bdevdef, length);
- buf[length] = '\0';
-
- ret = parse_subpart(next_subpart, buf);
- if (ret)
- goto fail;
-
- newparts->nr_subparts++;
- next_subpart = &(*next_subpart)->next_subpart;
- }
-
- if (!newparts->subpart) {
- pr_warn("cmdline partition has no valid partition.");
- ret = -EINVAL;
- goto fail;
- }
-
- *parts = newparts;
-
- return 0;
-fail:
- free_subpart(newparts);
- kfree(newparts);
- return ret;
-}
-
-void cmdline_parts_free(struct cmdline_parts **parts)
-{
- struct cmdline_parts *next_parts;
-
- while (*parts) {
- next_parts = (*parts)->next_parts;
- free_subpart(*parts);
- kfree(*parts);
- *parts = next_parts;
- }
-}
-EXPORT_SYMBOL(cmdline_parts_free);
-
-int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline)
-{
- int ret;
- char *buf;
- char *pbuf;
- char *next;
- struct cmdline_parts **next_parts;
-
- *parts = NULL;
-
- next = pbuf = buf = kstrdup(cmdline, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- next_parts = parts;
-
- while (next && *pbuf) {
- next = strchr(pbuf, ';');
- if (next)
- *next = '\0';
-
- ret = parse_parts(next_parts, pbuf);
- if (ret)
- goto fail;
-
- if (next)
- pbuf = ++next;
-
- next_parts = &(*next_parts)->next_parts;
- }
-
- if (!*parts) {
- pr_warn("cmdline partition has no valid partition.");
- ret = -EINVAL;
- goto fail;
- }
-
- ret = 0;
-done:
- kfree(buf);
- return ret;
-
-fail:
- cmdline_parts_free(parts);
- goto done;
-}
-EXPORT_SYMBOL(cmdline_parts_parse);
-
-struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
- const char *bdev)
-{
- while (parts && strncmp(bdev, parts->name, sizeof(parts->name)))
- parts = parts->next_parts;
- return parts;
-}
-EXPORT_SYMBOL(cmdline_parts_find);
-
-/*
- * add_part()
- * 0 success.
- * 1 can not add so many partitions.
- */
-int cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
- int slot,
- int (*add_part)(int, struct cmdline_subpart *, void *),
- void *param)
-{
- sector_t from = 0;
- struct cmdline_subpart *subpart;
-
- for (subpart = parts->subpart; subpart;
- subpart = subpart->next_subpart, slot++) {
- if (subpart->from == (sector_t)(~0ULL))
- subpart->from = from;
- else
- from = subpart->from;
-
- if (from >= disk_size)
- break;
-
- if (subpart->size > (disk_size - from))
- subpart->size = disk_size - from;
-
- from += subpart->size;
-
- if (add_part(slot, subpart, param))
- break;
- }
-
- return slot;
-}
-EXPORT_SYMBOL(cmdline_parts_set);
diff --git a/block/disk-events.c b/block/disk-events.c
index a75931ff5da4..8d5496e7592a 100644
--- a/block/disk-events.c
+++ b/block/disk-events.c
@@ -163,15 +163,31 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
spin_unlock_irq(&ev->lock);
}
+/*
+ * Tell userland about new events. Only the events listed in @disk->events are
+ * reported, and only if DISK_EVENT_FLAG_UEVENT is set. Otherwise, events are
+ * processed internally but never get reported to userland.
+ */
+static void disk_event_uevent(struct gendisk *disk, unsigned int events)
+{
+ char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
+ int nr_events = 0, i;
+
+ for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
+ if (events & disk->events & (1 << i))
+ envp[nr_events++] = disk_uevents[i];
+
+ if (nr_events)
+ kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
+}
+
static void disk_check_events(struct disk_events *ev,
unsigned int *clearing_ptr)
{
struct gendisk *disk = ev->disk;
- char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
unsigned int clearing = *clearing_ptr;
unsigned int events;
unsigned long intv;
- int nr_events = 0, i;
/* check events */
events = disk->fops->check_events(disk, clearing);
@@ -190,19 +206,11 @@ static void disk_check_events(struct disk_events *ev,
spin_unlock_irq(&ev->lock);
- /*
- * Tell userland about new events. Only the events listed in
- * @disk->events are reported, and only if DISK_EVENT_FLAG_UEVENT
- * is set. Otherwise, events are processed internally but never
- * get reported to userland.
- */
- for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
- if ((events & disk->events & (1 << i)) &&
- (disk->event_flags & DISK_EVENT_FLAG_UEVENT))
- envp[nr_events++] = disk_uevents[i];
+ if (events & DISK_EVENT_MEDIA_CHANGE)
+ inc_diskseq(disk);
- if (nr_events)
- kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
+ if (disk->event_flags & DISK_EVENT_FLAG_UEVENT)
+ disk_event_uevent(disk, events);
}
/**
@@ -281,6 +289,32 @@ bool bdev_check_media_change(struct block_device *bdev)
}
EXPORT_SYMBOL(bdev_check_media_change);
+/**
+ * disk_force_media_change - force a media change event
+ * @disk: the disk which will raise the event
+ * @events: the events to raise
+ *
+ * Generate uevents for the disk. If DISK_EVENT_MEDIA_CHANGE is present,
+ * attempt to free all dentries and inodes and invalidates all block
+ * device page cache entries in that case.
+ *
+ * Returns %true if DISK_EVENT_MEDIA_CHANGE was raised, or %false if not.
+ */
+bool disk_force_media_change(struct gendisk *disk, unsigned int events)
+{
+ disk_event_uevent(disk, events);
+
+ if (!(events & DISK_EVENT_MEDIA_CHANGE))
+ return false;
+
+ if (__invalidate_device(disk->part0, true))
+ pr_warn("VFS: busy inodes on changed media %s\n",
+ disk->disk_name);
+ set_bit(GD_NEED_PART_SCAN, &disk->state);
+ return true;
+}
+EXPORT_SYMBOL_GPL(disk_force_media_change);
+
/*
* Separate this part out so that a different pointer for clearing_ptr can be
* passed in for disk_clear_events.
@@ -410,17 +444,17 @@ module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
/*
* disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
*/
-void disk_alloc_events(struct gendisk *disk)
+int disk_alloc_events(struct gendisk *disk)
{
struct disk_events *ev;
if (!disk->fops->check_events || !disk->events)
- return;
+ return 0;
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
if (!ev) {
pr_warn("%s: failed to initialize events\n", disk->disk_name);
- return;
+ return -ENOMEM;
}
INIT_LIST_HEAD(&ev->node);
@@ -432,6 +466,7 @@ void disk_alloc_events(struct gendisk *disk)
INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
disk->ev = ev;
+ return 0;
}
void disk_add_events(struct gendisk *disk)
diff --git a/block/elevator.c b/block/elevator.c
index 52ada14cfe45..ff45d8388f48 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -336,6 +336,9 @@ enum elv_merge elv_merge(struct request_queue *q, struct request **req,
__rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
if (__rq && elv_bio_merge_ok(__rq, bio)) {
*req = __rq;
+
+ if (blk_discard_mergable(__rq))
+ return ELEVATOR_DISCARD_MERGE;
return ELEVATOR_BACK_MERGE;
}
@@ -630,6 +633,9 @@ static inline bool elv_support_iosched(struct request_queue *q)
*/
static struct elevator_type *elevator_get_default(struct request_queue *q)
{
+ if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
+ return NULL;
+
if (q->nr_hw_queues != 1 &&
!blk_mq_is_sbitmap_shared(q->tag_set->flags))
return NULL;
@@ -702,7 +708,6 @@ void elevator_init_mq(struct request_queue *q)
elevator_put(e);
}
}
-EXPORT_SYMBOL_GPL(elevator_init_mq); /* only for dm-rq */
/*
* switch to new_e io scheduler. be careful not to introduce deadlocks -
diff --git a/block/genhd.c b/block/genhd.c
index 298ee78c1bda..567549a011d1 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -29,6 +29,23 @@
static struct kobject *block_depr;
+/*
+ * Unique, monotonically increasing sequential number associated with block
+ * devices instances (i.e. incremented each time a device is attached).
+ * Associating uevents with block devices in userspace is difficult and racy:
+ * the uevent netlink socket is lossy, and on slow and overloaded systems has
+ * a very high latency.
+ * Block devices do not have exclusive owners in userspace, any process can set
+ * one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0
+ * can be reused again and again).
+ * A userspace process setting up a block device and watching for its events
+ * cannot thus reliably tell whether an event relates to the device it just set
+ * up or another earlier instance with the same name.
+ * This sequential number allows userspace processes to solve this problem, and
+ * uniquely associate an uevent to the lifetime to a device.
+ */
+static atomic64_t diskseq;
+
/* for extended dynamic devt allocation, currently only one major is used */
#define NR_EXT_DEVT (1 << MINORBITS)
static DEFINE_IDA(ext_devt_ida);
@@ -60,7 +77,8 @@ bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
* initial capacity during probing.
*/
if (size == capacity ||
- (disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
+ !disk_live(disk) ||
+ (disk->flags & GENHD_FL_HIDDEN))
return false;
pr_info("%s: detected capacity change from %lld to %lld\n",
@@ -78,11 +96,17 @@ bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
EXPORT_SYMBOL_GPL(set_capacity_and_notify);
/*
- * Format the device name of the indicated disk into the supplied buffer and
- * return a pointer to that same buffer for convenience.
+ * Format the device name of the indicated block device into the supplied buffer
+ * and return a pointer to that same buffer for convenience.
+ *
+ * Note: do not use this in new code, use the %pg specifier to sprintf and
+ * printk insted.
*/
-char *disk_name(struct gendisk *hd, int partno, char *buf)
+const char *bdevname(struct block_device *bdev, char *buf)
{
+ struct gendisk *hd = bdev->bd_disk;
+ int partno = bdev->bd_partno;
+
if (!partno)
snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
@@ -92,11 +116,6 @@ char *disk_name(struct gendisk *hd, int partno, char *buf)
return buf;
}
-
-const char *bdevname(struct block_device *bdev, char *buf)
-{
- return disk_name(bdev->bd_disk, bdev->bd_partno, buf);
-}
EXPORT_SYMBOL(bdevname);
static void part_stat_read_all(struct block_device *part,
@@ -294,54 +313,19 @@ void unregister_blkdev(unsigned int major, const char *name)
EXPORT_SYMBOL(unregister_blkdev);
-/**
- * blk_mangle_minor - scatter minor numbers apart
- * @minor: minor number to mangle
- *
- * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
- * is enabled. Mangling twice gives the original value.
- *
- * RETURNS:
- * Mangled value.
- *
- * CONTEXT:
- * Don't care.
- */
-static int blk_mangle_minor(int minor)
-{
-#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
- int i;
-
- for (i = 0; i < MINORBITS / 2; i++) {
- int low = minor & (1 << i);
- int high = minor & (1 << (MINORBITS - 1 - i));
- int distance = MINORBITS - 1 - 2 * i;
-
- minor ^= low | high; /* clear both bits */
- low <<= distance; /* swap the positions */
- high >>= distance;
- minor |= low | high; /* and set */
- }
-#endif
- return minor;
-}
-
int blk_alloc_ext_minor(void)
{
int idx;
idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL);
- if (idx < 0) {
- if (idx == -ENOSPC)
- return -EBUSY;
- return idx;
- }
- return blk_mangle_minor(idx);
+ if (idx == -ENOSPC)
+ return -EBUSY;
+ return idx;
}
void blk_free_ext_minor(unsigned int minor)
{
- ida_free(&ext_devt_ida, blk_mangle_minor(minor));
+ ida_free(&ext_devt_ida, minor);
}
static char *bdevt_str(dev_t devt, char *buf)
@@ -390,78 +374,20 @@ static void disk_scan_partitions(struct gendisk *disk)
blkdev_put(bdev, FMODE_READ);
}
-static void register_disk(struct device *parent, struct gendisk *disk,
- const struct attribute_group **groups)
-{
- struct device *ddev = disk_to_dev(disk);
- int err;
-
- ddev->parent = parent;
-
- dev_set_name(ddev, "%s", disk->disk_name);
-
- /* delay uevents, until we scanned partition table */
- dev_set_uevent_suppress(ddev, 1);
-
- if (groups) {
- WARN_ON(ddev->groups);
- ddev->groups = groups;
- }
- if (device_add(ddev))
- return;
- if (!sysfs_deprecated) {
- err = sysfs_create_link(block_depr, &ddev->kobj,
- kobject_name(&ddev->kobj));
- if (err) {
- device_del(ddev);
- return;
- }
- }
-
- /*
- * avoid probable deadlock caused by allocating memory with
- * GFP_KERNEL in runtime_resume callback of its all ancestor
- * devices
- */
- pm_runtime_set_memalloc_noio(ddev, true);
-
- disk->part0->bd_holder_dir =
- kobject_create_and_add("holders", &ddev->kobj);
- disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
-
- if (disk->flags & GENHD_FL_HIDDEN)
- return;
-
- disk_scan_partitions(disk);
-
- /* announce the disk and partitions after all partitions are created */
- dev_set_uevent_suppress(ddev, 0);
- disk_uevent(disk, KOBJ_ADD);
-
- if (disk->queue->backing_dev_info->dev) {
- err = sysfs_create_link(&ddev->kobj,
- &disk->queue->backing_dev_info->dev->kobj,
- "bdi");
- WARN_ON(err);
- }
-}
-
/**
- * __device_add_disk - add disk information to kernel list
+ * device_add_disk - add disk information to kernel list
* @parent: parent device for the disk
* @disk: per-device partitioning information
* @groups: Additional per-device sysfs groups
- * @register_queue: register the queue if set to true
*
* This function registers the partitioning information in @disk
* with the kernel.
- *
- * FIXME: error handling
*/
-static void __device_add_disk(struct device *parent, struct gendisk *disk,
- const struct attribute_group **groups,
- bool register_queue)
+int device_add_disk(struct device *parent, struct gendisk *disk,
+ const struct attribute_group **groups)
+
{
+ struct device *ddev = disk_to_dev(disk);
int ret;
/*
@@ -470,8 +396,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
* elevator if one is needed, that is, for devices requesting queue
* registration.
*/
- if (register_queue)
- elevator_init_mq(disk->queue);
+ elevator_init_mq(disk->queue);
/*
* If the driver provides an explicit major number it also must provide
@@ -481,7 +406,8 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
* and all partitions from the extended dev_t space.
*/
if (disk->major) {
- WARN_ON(!disk->minors);
+ if (WARN_ON(!disk->minors))
+ return -EINVAL;
if (disk->minors > DISK_MAX_PARTS) {
pr_err("block: can't allocate more than %d partitions\n",
@@ -489,21 +415,65 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
disk->minors = DISK_MAX_PARTS;
}
} else {
- WARN_ON(disk->minors);
+ if (WARN_ON(disk->minors))
+ return -EINVAL;
ret = blk_alloc_ext_minor();
- if (ret < 0) {
- WARN_ON(1);
- return;
- }
+ if (ret < 0)
+ return ret;
disk->major = BLOCK_EXT_MAJOR;
- disk->first_minor = MINOR(ret);
+ disk->first_minor = ret;
disk->flags |= GENHD_FL_EXT_DEVT;
}
- disk->flags |= GENHD_FL_UP;
+ ret = disk_alloc_events(disk);
+ if (ret)
+ goto out_free_ext_minor;
- disk_alloc_events(disk);
+ /* delay uevents, until we scanned partition table */
+ dev_set_uevent_suppress(ddev, 1);
+
+ ddev->parent = parent;
+ ddev->groups = groups;
+ dev_set_name(ddev, "%s", disk->disk_name);
+ if (!(disk->flags & GENHD_FL_HIDDEN))
+ ddev->devt = MKDEV(disk->major, disk->first_minor);
+ ret = device_add(ddev);
+ if (ret)
+ goto out_disk_release_events;
+ if (!sysfs_deprecated) {
+ ret = sysfs_create_link(block_depr, &ddev->kobj,
+ kobject_name(&ddev->kobj));
+ if (ret)
+ goto out_device_del;
+ }
+
+ /*
+ * avoid probable deadlock caused by allocating memory with
+ * GFP_KERNEL in runtime_resume callback of its all ancestor
+ * devices
+ */
+ pm_runtime_set_memalloc_noio(ddev, true);
+
+ ret = blk_integrity_add(disk);
+ if (ret)
+ goto out_del_block_link;
+
+ disk->part0->bd_holder_dir =
+ kobject_create_and_add("holders", &ddev->kobj);
+ if (!disk->part0->bd_holder_dir)
+ goto out_del_integrity;
+ disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
+ if (!disk->slave_dir)
+ goto out_put_holder_dir;
+
+ ret = bd_register_pending_holders(disk);
+ if (ret < 0)
+ goto out_put_slave_dir;
+
+ ret = blk_register_queue(disk);
+ if (ret)
+ goto out_put_slave_dir;
if (disk->flags & GENHD_FL_HIDDEN) {
/*
@@ -513,48 +483,56 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
disk->flags |= GENHD_FL_NO_PART_SCAN;
} else {
- struct backing_dev_info *bdi = disk->queue->backing_dev_info;
- struct device *dev = disk_to_dev(disk);
-
- /* Register BDI before referencing it from bdev */
- dev->devt = MKDEV(disk->major, disk->first_minor);
- ret = bdi_register(bdi, "%u:%u",
+ ret = bdi_register(disk->bdi, "%u:%u",
disk->major, disk->first_minor);
- WARN_ON(ret);
- bdi_set_owner(bdi, dev);
- bdev_add(disk->part0, dev->devt);
- }
- register_disk(parent, disk, groups);
- if (register_queue)
- blk_register_queue(disk);
+ if (ret)
+ goto out_unregister_queue;
+ bdi_set_owner(disk->bdi, ddev);
+ ret = sysfs_create_link(&ddev->kobj,
+ &disk->bdi->dev->kobj, "bdi");
+ if (ret)
+ goto out_unregister_bdi;
- /*
- * Take an extra ref on queue which will be put on disk_release()
- * so that it sticks around as long as @disk is there.
- */
- if (blk_get_queue(disk->queue))
- set_bit(GD_QUEUE_REF, &disk->state);
- else
- WARN_ON_ONCE(1);
+ bdev_add(disk->part0, ddev->devt);
+ disk_scan_partitions(disk);
- disk_add_events(disk);
- blk_integrity_add(disk);
-}
+ /*
+ * Announce the disk and partitions after all partitions are
+ * created. (for hidden disks uevents remain suppressed forever)
+ */
+ dev_set_uevent_suppress(ddev, 0);
+ disk_uevent(disk, KOBJ_ADD);
+ }
-void device_add_disk(struct device *parent, struct gendisk *disk,
- const struct attribute_group **groups)
+ disk_update_readahead(disk);
+ disk_add_events(disk);
+ return 0;
-{
- __device_add_disk(parent, disk, groups, true);
+out_unregister_bdi:
+ if (!(disk->flags & GENHD_FL_HIDDEN))
+ bdi_unregister(disk->bdi);
+out_unregister_queue:
+ blk_unregister_queue(disk);
+out_put_slave_dir:
+ kobject_put(disk->slave_dir);
+out_put_holder_dir:
+ kobject_put(disk->part0->bd_holder_dir);
+out_del_integrity:
+ blk_integrity_del(disk);
+out_del_block_link:
+ if (!sysfs_deprecated)
+ sysfs_remove_link(block_depr, dev_name(ddev));
+out_device_del:
+ device_del(ddev);
+out_disk_release_events:
+ disk_release_events(disk);
+out_free_ext_minor:
+ if (disk->major == BLOCK_EXT_MAJOR)
+ blk_free_ext_minor(disk->first_minor);
+ return WARN_ON_ONCE(ret); /* keep until all callers handle errors */
}
EXPORT_SYMBOL(device_add_disk);
-void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk)
-{
- __device_add_disk(parent, disk, NULL, false);
-}
-EXPORT_SYMBOL(device_add_disk_no_queue_reg);
-
/**
* del_gendisk - remove the gendisk
* @disk: the struct gendisk to remove
@@ -578,26 +556,20 @@ void del_gendisk(struct gendisk *disk)
{
might_sleep();
- if (WARN_ON_ONCE(!disk->queue))
+ if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN)))
return;
blk_integrity_del(disk);
disk_del_events(disk);
mutex_lock(&disk->open_mutex);
- disk->flags &= ~GENHD_FL_UP;
+ remove_inode_hash(disk->part0->bd_inode);
blk_drop_partitions(disk);
mutex_unlock(&disk->open_mutex);
fsync_bdev(disk->part0);
__invalidate_device(disk->part0, true);
- /*
- * Unhash the bdev inode for this device so that it can't be looked
- * up any more even if openers still hold references to it.
- */
- remove_inode_hash(disk->part0->bd_inode);
-
set_capacity(disk, 0);
if (!(disk->flags & GENHD_FL_HIDDEN)) {
@@ -607,7 +579,7 @@ void del_gendisk(struct gendisk *disk)
* Unregister bdi before releasing device numbers (as they can
* get reused and we'd get clashes in sysfs).
*/
- bdi_unregister(disk->queue->backing_dev_info);
+ bdi_unregister(disk->bdi);
}
blk_unregister_queue(disk);
@@ -683,7 +655,6 @@ void __init printk_all_partitions(void)
while ((dev = class_dev_iter_next(&iter))) {
struct gendisk *disk = dev_to_disk(dev);
struct block_device *part;
- char name_buf[BDEVNAME_SIZE];
char devt_buf[BDEVT_SIZE];
unsigned long idx;
@@ -703,11 +674,10 @@ void __init printk_all_partitions(void)
xa_for_each(&disk->part_tbl, idx, part) {
if (!bdev_nr_sectors(part))
continue;
- printk("%s%s %10llu %s %s",
+ printk("%s%s %10llu %pg %s",
bdev_is_partition(part) ? " " : "",
bdevt_str(part->bd_dev, devt_buf),
- bdev_nr_sectors(part) >> 1,
- disk_name(disk, part->bd_partno, name_buf),
+ bdev_nr_sectors(part) >> 1, part,
part->bd_meta_info ?
part->bd_meta_info->uuid : "");
if (bdev_is_partition(part))
@@ -785,7 +755,6 @@ static int show_partition(struct seq_file *seqf, void *v)
struct gendisk *sgp = v;
struct block_device *part;
unsigned long idx;
- char buf[BDEVNAME_SIZE];
/* Don't show non-partitionable removeable devices or empty devices */
if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
@@ -798,10 +767,9 @@ static int show_partition(struct seq_file *seqf, void *v)
xa_for_each(&sgp->part_tbl, idx, part) {
if (!bdev_nr_sectors(part))
continue;
- seq_printf(seqf, "%4d %7d %10llu %s\n",
+ seq_printf(seqf, "%4d %7d %10llu %pg\n",
MAJOR(part->bd_dev), MINOR(part->bd_dev),
- bdev_nr_sectors(part) >> 1,
- disk_name(sgp, part->bd_partno, buf));
+ bdev_nr_sectors(part) >> 1, part);
}
rcu_read_unlock();
return 0;
@@ -968,6 +936,14 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
}
+static ssize_t diskseq_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+
+ return sprintf(buf, "%llu\n", disk->diskseq);
+}
+
static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
@@ -980,6 +956,7 @@ static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
+static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
#ifdef CONFIG_FAIL_MAKE_REQUEST
ssize_t part_fail_show(struct device *dev,
@@ -1025,6 +1002,7 @@ static struct attribute *disk_attrs[] = {
&dev_attr_events.attr,
&dev_attr_events_async.attr,
&dev_attr_events_poll_msecs.attr,
+ &dev_attr_diskseq.attr,
#ifdef CONFIG_FAIL_MAKE_REQUEST
&dev_attr_fail.attr,
#endif
@@ -1074,17 +1052,24 @@ static void disk_release(struct device *dev)
might_sleep();
- if (MAJOR(dev->devt) == BLOCK_EXT_MAJOR)
- blk_free_ext_minor(MINOR(dev->devt));
disk_release_events(disk);
kfree(disk->random);
xa_destroy(&disk->part_tbl);
- if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue)
- blk_put_queue(disk->queue);
- bdput(disk->part0); /* frees the disk */
+ disk->queue->disk = NULL;
+ blk_put_queue(disk->queue);
+ iput(disk->part0->bd_inode); /* frees the disk */
+}
+
+static int block_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+
+ return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq);
}
+
struct class block_class = {
.name = "block",
+ .dev_uevent = block_uevent,
};
static char *block_devnode(struct device *dev, umode_t *mode,
@@ -1116,7 +1101,6 @@ static int diskstats_show(struct seq_file *seqf, void *v)
{
struct gendisk *gp = v;
struct block_device *hd;
- char buf[BDEVNAME_SIZE];
unsigned int inflight;
struct disk_stats stat;
unsigned long idx;
@@ -1139,15 +1123,14 @@ static int diskstats_show(struct seq_file *seqf, void *v)
else
inflight = part_in_flight(hd);
- seq_printf(seqf, "%4d %7d %s "
+ seq_printf(seqf, "%4d %7d %pg "
"%lu %lu %lu %u "
"%lu %lu %lu %u "
"%u %u %u "
"%lu %lu %lu %u "
"%lu %u"
"\n",
- MAJOR(hd->bd_dev), MINOR(hd->bd_dev),
- disk_name(gp, hd->bd_partno, buf),
+ MAJOR(hd->bd_dev), MINOR(hd->bd_dev), hd,
stat.ios[STAT_READ],
stat.merges[STAT_READ],
stat.sectors[STAT_READ],
@@ -1239,17 +1222,25 @@ dev_t blk_lookup_devt(const char *name, int partno)
return devt;
}
-struct gendisk *__alloc_disk_node(int minors, int node_id)
+struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
+ struct lock_class_key *lkclass)
{
struct gendisk *disk;
+ if (!blk_get_queue(q))
+ return NULL;
+
disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
if (!disk)
- return NULL;
+ goto out_put_queue;
+
+ disk->bdi = bdi_alloc(node_id);
+ if (!disk->bdi)
+ goto out_free_disk;
disk->part0 = bdev_alloc(disk, 0);
if (!disk->part0)
- goto out_free_disk;
+ goto out_free_bdi;
disk->node_id = node_id;
mutex_init(&disk->open_mutex);
@@ -1257,23 +1248,33 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
goto out_destroy_part_tbl;
- disk->minors = minors;
rand_initialize_disk(disk);
disk_to_dev(disk)->class = &block_class;
disk_to_dev(disk)->type = &disk_type;
device_initialize(disk_to_dev(disk));
+ inc_diskseq(disk);
+ disk->queue = q;
+ q->disk = disk;
+ lockdep_init_map(&disk->lockdep_map, "(bio completion)", lkclass, 0);
+#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
+ INIT_LIST_HEAD(&disk->slave_bdevs);
+#endif
return disk;
out_destroy_part_tbl:
xa_destroy(&disk->part_tbl);
- bdput(disk->part0);
+ iput(disk->part0->bd_inode);
+out_free_bdi:
+ bdi_put(disk->bdi);
out_free_disk:
kfree(disk);
+out_put_queue:
+ blk_put_queue(q);
return NULL;
}
EXPORT_SYMBOL(__alloc_disk_node);
-struct gendisk *__blk_alloc_disk(int node)
+struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass)
{
struct request_queue *q;
struct gendisk *disk;
@@ -1282,12 +1283,11 @@ struct gendisk *__blk_alloc_disk(int node)
if (!q)
return NULL;
- disk = __alloc_disk_node(0, node);
+ disk = __alloc_disk_node(q, node, lkclass);
if (!disk) {
blk_cleanup_queue(q);
return NULL;
}
- disk->queue = q;
return disk;
}
EXPORT_SYMBOL(__blk_alloc_disk);
@@ -1362,3 +1362,8 @@ int bdev_read_only(struct block_device *bdev)
return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
}
EXPORT_SYMBOL(bdev_read_only);
+
+void inc_diskseq(struct gendisk *disk)
+{
+ disk->diskseq = atomic64_inc_return(&diskseq);
+}
diff --git a/block/holder.c b/block/holder.c
new file mode 100644
index 000000000000..9dc084182337
--- /dev/null
+++ b/block/holder.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/genhd.h>
+
+struct bd_holder_disk {
+ struct list_head list;
+ struct block_device *bdev;
+ int refcnt;
+};
+
+static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
+ struct gendisk *disk)
+{
+ struct bd_holder_disk *holder;
+
+ list_for_each_entry(holder, &disk->slave_bdevs, list)
+ if (holder->bdev == bdev)
+ return holder;
+ return NULL;
+}
+
+static int add_symlink(struct kobject *from, struct kobject *to)
+{
+ return sysfs_create_link(from, to, kobject_name(to));
+}
+
+static void del_symlink(struct kobject *from, struct kobject *to)
+{
+ sysfs_remove_link(from, kobject_name(to));
+}
+
+static int __link_disk_holder(struct block_device *bdev, struct gendisk *disk)
+{
+ int ret;
+
+ ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
+ if (ret)
+ return ret;
+ ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
+ if (ret)
+ del_symlink(disk->slave_dir, bdev_kobj(bdev));
+ return ret;
+}
+
+/**
+ * bd_link_disk_holder - create symlinks between holding disk and slave bdev
+ * @bdev: the claimed slave bdev
+ * @disk: the holding disk
+ *
+ * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
+ *
+ * This functions creates the following sysfs symlinks.
+ *
+ * - from "slaves" directory of the holder @disk to the claimed @bdev
+ * - from "holders" directory of the @bdev to the holder @disk
+ *
+ * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is
+ * passed to bd_link_disk_holder(), then:
+ *
+ * /sys/block/dm-0/slaves/sda --> /sys/block/sda
+ * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
+ *
+ * The caller must have claimed @bdev before calling this function and
+ * ensure that both @bdev and @disk are valid during the creation and
+ * lifetime of these symlinks.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
+{
+ struct bd_holder_disk *holder;
+ int ret = 0;
+
+ mutex_lock(&disk->open_mutex);
+
+ WARN_ON_ONCE(!bdev->bd_holder);
+
+ /* FIXME: remove the following once add_disk() handles errors */
+ if (WARN_ON(!bdev->bd_holder_dir))
+ goto out_unlock;
+
+ holder = bd_find_holder_disk(bdev, disk);
+ if (holder) {
+ holder->refcnt++;
+ goto out_unlock;
+ }
+
+ holder = kzalloc(sizeof(*holder), GFP_KERNEL);
+ if (!holder) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ INIT_LIST_HEAD(&holder->list);
+ holder->bdev = bdev;
+ holder->refcnt = 1;
+ if (disk->slave_dir) {
+ ret = __link_disk_holder(bdev, disk);
+ if (ret) {
+ kfree(holder);
+ goto out_unlock;
+ }
+ }
+
+ list_add(&holder->list, &disk->slave_bdevs);
+ /*
+ * del_gendisk drops the initial reference to bd_holder_dir, so we need
+ * to keep our own here to allow for cleanup past that point.
+ */
+ kobject_get(bdev->bd_holder_dir);
+
+out_unlock:
+ mutex_unlock(&disk->open_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(bd_link_disk_holder);
+
+static void __unlink_disk_holder(struct block_device *bdev,
+ struct gendisk *disk)
+{
+ del_symlink(disk->slave_dir, bdev_kobj(bdev));
+ del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
+}
+
+/**
+ * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder()
+ * @bdev: the calimed slave bdev
+ * @disk: the holding disk
+ *
+ * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
+{
+ struct bd_holder_disk *holder;
+
+ mutex_lock(&disk->open_mutex);
+ holder = bd_find_holder_disk(bdev, disk);
+ if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
+ if (disk->slave_dir)
+ __unlink_disk_holder(bdev, disk);
+ kobject_put(bdev->bd_holder_dir);
+ list_del_init(&holder->list);
+ kfree(holder);
+ }
+ mutex_unlock(&disk->open_mutex);
+}
+EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
+
+int bd_register_pending_holders(struct gendisk *disk)
+{
+ struct bd_holder_disk *holder;
+ int ret;
+
+ mutex_lock(&disk->open_mutex);
+ list_for_each_entry(holder, &disk->slave_bdevs, list) {
+ ret = __link_disk_holder(holder->bdev, disk);
+ if (ret)
+ goto out_undo;
+ }
+ mutex_unlock(&disk->open_mutex);
+ return 0;
+
+out_undo:
+ list_for_each_entry_continue_reverse(holder, &disk->slave_bdevs, list)
+ __unlink_disk_holder(holder->bdev, disk);
+ mutex_unlock(&disk->open_mutex);
+ return ret;
+}
diff --git a/block/ioctl.c b/block/ioctl.c
index 24beec9ca9c9..eb0491e90b9a 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -16,6 +16,7 @@
static int blkpg_do_ioctl(struct block_device *bdev,
struct blkpg_partition __user *upart, int op)
{
+ struct gendisk *disk = bdev->bd_disk;
struct blkpg_partition p;
long long start, length;
@@ -30,7 +31,7 @@ static int blkpg_do_ioctl(struct block_device *bdev,
return -EINVAL;
if (op == BLKPG_DEL_PARTITION)
- return bdev_del_partition(bdev, p.pno);
+ return bdev_del_partition(disk, p.pno);
start = p.start >> SECTOR_SHIFT;
length = p.length >> SECTOR_SHIFT;
@@ -40,9 +41,9 @@ static int blkpg_do_ioctl(struct block_device *bdev,
/* check if partition is aligned to blocksize */
if (p.start & (bdev_logical_block_size(bdev) - 1))
return -EINVAL;
- return bdev_add_partition(bdev, p.pno, start, length);
+ return bdev_add_partition(disk, p.pno, start, length);
case BLKPG_RESIZE_PARTITION:
- return bdev_resize_partition(bdev, p.pno, start, length);
+ return bdev_resize_partition(disk, p.pno, start, length);
default:
return -EINVAL;
}
@@ -469,6 +470,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
BLKDEV_DISCARD_SECURE);
case BLKZEROOUT:
return blk_ioctl_zeroout(bdev, mode, arg);
+ case BLKGETDISKSEQ:
+ return put_u64(argp, bdev->bd_disk->diskseq);
case BLKREPORTZONE:
return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
case BLKRESETZONE:
@@ -504,7 +507,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
case BLKFRASET:
if(!capable(CAP_SYS_ADMIN))
return -EACCES;
- bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE;
+ bdev->bd_disk->bdi->ra_pages = (arg * 512) / PAGE_SIZE;
return 0;
case BLKRRPART:
return blkdev_reread_part(bdev, mode);
@@ -554,7 +557,8 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKFRAGET:
if (!argp)
return -EINVAL;
- return put_long(argp, (bdev->bd_bdi->ra_pages*PAGE_SIZE) / 512);
+ return put_long(argp,
+ (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512);
case BLKGETSIZE:
size = i_size_read(bdev->bd_inode);
if ((size >> 9) > ~0UL)
@@ -626,7 +630,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
if (!argp)
return -EINVAL;
return compat_put_long(argp,
- (bdev->bd_bdi->ra_pages * PAGE_SIZE) / 512);
+ (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512);
case BLKGETSIZE:
size = i_size_read(bdev->bd_inode);
if ((size >> 9) > ~0UL)
diff --git a/block/ioprio.c b/block/ioprio.c
index bee628f9f1b2..0e4ff245f2bf 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -74,9 +74,8 @@ int ioprio_check_cap(int ioprio)
fallthrough;
/* rt has prio field too */
case IOPRIO_CLASS_BE:
- if (data >= IOPRIO_BE_NR || data < 0)
+ if (data >= IOPRIO_NR_LEVELS || data < 0)
return -EINVAL;
-
break;
case IOPRIO_CLASS_IDLE:
break;
@@ -171,7 +170,7 @@ static int get_task_ioprio(struct task_struct *p)
ret = security_task_getioprio(p);
if (ret)
goto out;
- ret = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, IOPRIO_NORM);
+ ret = IOPRIO_DEFAULT;
task_lock(p);
if (p->io_context)
ret = p->io_context->ioprio;
@@ -183,9 +182,9 @@ out:
int ioprio_best(unsigned short aprio, unsigned short bprio)
{
if (!ioprio_valid(aprio))
- aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
+ aprio = IOPRIO_DEFAULT;
if (!ioprio_valid(bprio))
- bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
+ bprio = IOPRIO_DEFAULT;
return min(aprio, bprio);
}
diff --git a/block/mq-deadline-cgroup.c b/block/mq-deadline-cgroup.c
deleted file mode 100644
index 3b4bfddec39f..000000000000
--- a/block/mq-deadline-cgroup.c
+++ /dev/null
@@ -1,126 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/blk-cgroup.h>
-#include <linux/ioprio.h>
-
-#include "mq-deadline-cgroup.h"
-
-static struct blkcg_policy dd_blkcg_policy;
-
-static struct blkcg_policy_data *dd_cpd_alloc(gfp_t gfp)
-{
- struct dd_blkcg *pd;
-
- pd = kzalloc(sizeof(*pd), gfp);
- if (!pd)
- return NULL;
- pd->stats = alloc_percpu_gfp(typeof(*pd->stats),
- GFP_KERNEL | __GFP_ZERO);
- if (!pd->stats) {
- kfree(pd);
- return NULL;
- }
- return &pd->cpd;
-}
-
-static void dd_cpd_free(struct blkcg_policy_data *cpd)
-{
- struct dd_blkcg *dd_blkcg = container_of(cpd, typeof(*dd_blkcg), cpd);
-
- free_percpu(dd_blkcg->stats);
- kfree(dd_blkcg);
-}
-
-static struct dd_blkcg *dd_blkcg_from_pd(struct blkg_policy_data *pd)
-{
- return container_of(blkcg_to_cpd(pd->blkg->blkcg, &dd_blkcg_policy),
- struct dd_blkcg, cpd);
-}
-
-/*
- * Convert an association between a block cgroup and a request queue into a
- * pointer to the mq-deadline information associated with a (blkcg, queue) pair.
- */
-struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio)
-{
- struct blkg_policy_data *pd;
-
- pd = blkg_to_pd(bio->bi_blkg, &dd_blkcg_policy);
- if (!pd)
- return NULL;
-
- return dd_blkcg_from_pd(pd);
-}
-
-static size_t dd_pd_stat(struct blkg_policy_data *pd, char *buf, size_t size)
-{
- static const char *const prio_class_name[] = {
- [IOPRIO_CLASS_NONE] = "NONE",
- [IOPRIO_CLASS_RT] = "RT",
- [IOPRIO_CLASS_BE] = "BE",
- [IOPRIO_CLASS_IDLE] = "IDLE",
- };
- struct dd_blkcg *blkcg = dd_blkcg_from_pd(pd);
- int res = 0;
- u8 prio;
-
- for (prio = 0; prio < ARRAY_SIZE(blkcg->stats->stats); prio++)
- res += scnprintf(buf + res, size - res,
- " [%s] dispatched=%u inserted=%u merged=%u",
- prio_class_name[prio],
- ddcg_sum(blkcg, dispatched, prio) +
- ddcg_sum(blkcg, merged, prio) -
- ddcg_sum(blkcg, completed, prio),
- ddcg_sum(blkcg, inserted, prio) -
- ddcg_sum(blkcg, completed, prio),
- ddcg_sum(blkcg, merged, prio));
-
- return res;
-}
-
-static struct blkg_policy_data *dd_pd_alloc(gfp_t gfp, struct request_queue *q,
- struct blkcg *blkcg)
-{
- struct dd_blkg *pd;
-
- pd = kzalloc(sizeof(*pd), gfp);
- if (!pd)
- return NULL;
- return &pd->pd;
-}
-
-static void dd_pd_free(struct blkg_policy_data *pd)
-{
- struct dd_blkg *dd_blkg = container_of(pd, typeof(*dd_blkg), pd);
-
- kfree(dd_blkg);
-}
-
-static struct blkcg_policy dd_blkcg_policy = {
- .cpd_alloc_fn = dd_cpd_alloc,
- .cpd_free_fn = dd_cpd_free,
-
- .pd_alloc_fn = dd_pd_alloc,
- .pd_free_fn = dd_pd_free,
- .pd_stat_fn = dd_pd_stat,
-};
-
-int dd_activate_policy(struct request_queue *q)
-{
- return blkcg_activate_policy(q, &dd_blkcg_policy);
-}
-
-void dd_deactivate_policy(struct request_queue *q)
-{
- blkcg_deactivate_policy(q, &dd_blkcg_policy);
-}
-
-int __init dd_blkcg_init(void)
-{
- return blkcg_policy_register(&dd_blkcg_policy);
-}
-
-void __exit dd_blkcg_exit(void)
-{
- blkcg_policy_unregister(&dd_blkcg_policy);
-}
diff --git a/block/mq-deadline-cgroup.h b/block/mq-deadline-cgroup.h
deleted file mode 100644
index 0143fd74f3ce..000000000000
--- a/block/mq-deadline-cgroup.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#if !defined(_MQ_DEADLINE_CGROUP_H_)
-#define _MQ_DEADLINE_CGROUP_H_
-
-#include <linux/blk-cgroup.h>
-
-struct request_queue;
-
-/**
- * struct io_stats_per_prio - I/O statistics per I/O priority class.
- * @inserted: Number of inserted requests.
- * @merged: Number of merged requests.
- * @dispatched: Number of dispatched requests.
- * @completed: Number of I/O completions.
- */
-struct io_stats_per_prio {
- local_t inserted;
- local_t merged;
- local_t dispatched;
- local_t completed;
-};
-
-/* I/O statistics per I/O cgroup per I/O priority class (IOPRIO_CLASS_*). */
-struct blkcg_io_stats {
- struct io_stats_per_prio stats[4];
-};
-
-/**
- * struct dd_blkcg - Per cgroup data.
- * @cpd: blkcg_policy_data structure.
- * @stats: I/O statistics.
- */
-struct dd_blkcg {
- struct blkcg_policy_data cpd; /* must be the first member */
- struct blkcg_io_stats __percpu *stats;
-};
-
-/*
- * Count one event of type 'event_type' and with I/O priority class
- * 'prio_class'.
- */
-#define ddcg_count(ddcg, event_type, prio_class) do { \
-if (ddcg) { \
- struct blkcg_io_stats *io_stats = get_cpu_ptr((ddcg)->stats); \
- \
- BUILD_BUG_ON(!__same_type((ddcg), struct dd_blkcg *)); \
- BUILD_BUG_ON(!__same_type((prio_class), u8)); \
- local_inc(&io_stats->stats[(prio_class)].event_type); \
- put_cpu_ptr(io_stats); \
-} \
-} while (0)
-
-/*
- * Returns the total number of ddcg_count(ddcg, event_type, prio_class) calls
- * across all CPUs. No locking or barriers since it is fine if the returned
- * sum is slightly outdated.
- */
-#define ddcg_sum(ddcg, event_type, prio) ({ \
- unsigned int cpu; \
- u32 sum = 0; \
- \
- BUILD_BUG_ON(!__same_type((ddcg), struct dd_blkcg *)); \
- BUILD_BUG_ON(!__same_type((prio), u8)); \
- for_each_present_cpu(cpu) \
- sum += local_read(&per_cpu_ptr((ddcg)->stats, cpu)-> \
- stats[(prio)].event_type); \
- sum; \
-})
-
-#ifdef CONFIG_BLK_CGROUP
-
-/**
- * struct dd_blkg - Per (cgroup, request queue) data.
- * @pd: blkg_policy_data structure.
- */
-struct dd_blkg {
- struct blkg_policy_data pd; /* must be the first member */
-};
-
-struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio);
-int dd_activate_policy(struct request_queue *q);
-void dd_deactivate_policy(struct request_queue *q);
-int __init dd_blkcg_init(void);
-void __exit dd_blkcg_exit(void);
-
-#else /* CONFIG_BLK_CGROUP */
-
-static inline struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio)
-{
- return NULL;
-}
-
-static inline int dd_activate_policy(struct request_queue *q)
-{
- return 0;
-}
-
-static inline void dd_deactivate_policy(struct request_queue *q)
-{
-}
-
-static inline int dd_blkcg_init(void)
-{
- return 0;
-}
-
-static inline void dd_blkcg_exit(void)
-{
-}
-
-#endif /* CONFIG_BLK_CGROUP */
-
-#endif /* _MQ_DEADLINE_CGROUP_H_ */
diff --git a/block/mq-deadline-main.c b/block/mq-deadline.c
index 6f612e6dc82b..3c3693c34f06 100644
--- a/block/mq-deadline-main.c
+++ b/block/mq-deadline.c
@@ -25,18 +25,12 @@
#include "blk-mq-debugfs.h"
#include "blk-mq-tag.h"
#include "blk-mq-sched.h"
-#include "mq-deadline-cgroup.h"
/*
* See Documentation/block/deadline-iosched.rst
*/
static const int read_expire = HZ / 2; /* max time before a read is submitted. */
static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
-/*
- * Time after which to dispatch lower priority requests even if higher
- * priority requests are pending.
- */
-static const int aging_expire = 10 * HZ;
static const int writes_starved = 2; /* max times reads can starve a write */
static const int fifo_batch = 16; /* # of sequential requests treated as one
by the above parameters. For throughput. */
@@ -57,6 +51,14 @@ enum dd_prio {
enum { DD_PRIO_COUNT = 3 };
+/* I/O statistics per I/O priority. */
+struct io_stats_per_prio {
+ local_t inserted;
+ local_t merged;
+ local_t dispatched;
+ local_t completed;
+};
+
/* I/O statistics for all I/O priorities (enum dd_prio). */
struct io_stats {
struct io_stats_per_prio stats[DD_PRIO_COUNT];
@@ -79,9 +81,6 @@ struct deadline_data {
* run time data
*/
- /* Request queue that owns this data structure. */
- struct request_queue *queue;
-
struct dd_per_prio per_prio[DD_PRIO_COUNT];
/* Data direction of latest dispatched request. */
@@ -99,7 +98,6 @@ struct deadline_data {
int writes_starved;
int front_merges;
u32 async_depth;
- int aging_expire;
spinlock_t lock;
spinlock_t zone_lock;
@@ -234,10 +232,8 @@ static void dd_merged_requests(struct request_queue *q, struct request *req,
struct deadline_data *dd = q->elevator->elevator_data;
const u8 ioprio_class = dd_rq_ioclass(next);
const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
- struct dd_blkcg *blkcg = next->elv.priv[0];
dd_count(dd, merged, prio);
- ddcg_count(blkcg, merged, ioprio_class);
/*
* if next expires before rq, assign its expire time to rq
@@ -367,15 +363,13 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
/*
* deadline_dispatch_requests selects the best request according to
- * read/write expire, fifo_batch, etc and with a start time <= @latest.
+ * read/write expire, fifo_batch, etc
*/
static struct request *__dd_dispatch_request(struct deadline_data *dd,
- struct dd_per_prio *per_prio,
- u64 latest_start_ns)
+ struct dd_per_prio *per_prio)
{
struct request *rq, *next_rq;
enum dd_data_dir data_dir;
- struct dd_blkcg *blkcg;
enum dd_prio prio;
u8 ioprio_class;
@@ -384,8 +378,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
if (!list_empty(&per_prio->dispatch)) {
rq = list_first_entry(&per_prio->dispatch, struct request,
queuelist);
- if (rq->start_time_ns > latest_start_ns)
- return NULL;
list_del_init(&rq->queuelist);
goto done;
}
@@ -463,8 +455,6 @@ dispatch_find_request:
dd->batching = 0;
dispatch_request:
- if (rq->start_time_ns > latest_start_ns)
- return NULL;
/*
* rq is the selected appropriate request.
*/
@@ -474,8 +464,6 @@ done:
ioprio_class = dd_rq_ioclass(rq);
prio = ioprio_class_to_prio[ioprio_class];
dd_count(dd, dispatched, prio);
- blkcg = rq->elv.priv[0];
- ddcg_count(blkcg, dispatched, ioprio_class);
/*
* If the request needs its target zone locked, do it.
*/
@@ -495,32 +483,15 @@ done:
static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
{
struct deadline_data *dd = hctx->queue->elevator->elevator_data;
- const u64 now_ns = ktime_get_ns();
- struct request *rq = NULL;
+ struct request *rq;
enum dd_prio prio;
spin_lock(&dd->lock);
- /*
- * Start with dispatching requests whose deadline expired more than
- * aging_expire jiffies ago.
- */
- for (prio = DD_BE_PRIO; prio <= DD_PRIO_MAX; prio++) {
- rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now_ns -
- jiffies_to_nsecs(dd->aging_expire));
- if (rq)
- goto unlock;
- }
- /*
- * Next, dispatch requests in priority order. Ignore lower priority
- * requests if any higher priority requests are pending.
- */
for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
- rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now_ns);
- if (rq || dd_queued(dd, prio))
+ rq = __dd_dispatch_request(dd, &dd->per_prio[prio]);
+ if (rq)
break;
}
-
-unlock:
spin_unlock(&dd->lock);
return rq;
@@ -569,8 +540,6 @@ static void dd_exit_sched(struct elevator_queue *e)
struct deadline_data *dd = e->elevator_data;
enum dd_prio prio;
- dd_deactivate_policy(dd->queue);
-
for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
struct dd_per_prio *per_prio = &dd->per_prio[prio];
@@ -584,7 +553,7 @@ static void dd_exit_sched(struct elevator_queue *e)
}
/*
- * Initialize elevator private data (deadline_data) and associate with blkcg.
+ * initialize elevator private data (deadline_data).
*/
static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
{
@@ -593,12 +562,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
enum dd_prio prio;
int ret = -ENOMEM;
- /*
- * Initialization would be very tricky if the queue is not frozen,
- * hence the warning statement below.
- */
- WARN_ON_ONCE(!percpu_ref_is_zero(&q->q_usage_counter));
-
eq = elevator_alloc(q, e);
if (!eq)
return ret;
@@ -614,8 +577,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
if (!dd->stats)
goto free_dd;
- dd->queue = q;
-
for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
struct dd_per_prio *per_prio = &dd->per_prio[prio];
@@ -631,21 +592,12 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
dd->front_merges = 1;
dd->last_dir = DD_WRITE;
dd->fifo_batch = fifo_batch;
- dd->aging_expire = aging_expire;
spin_lock_init(&dd->lock);
spin_lock_init(&dd->zone_lock);
- ret = dd_activate_policy(q);
- if (ret)
- goto free_stats;
-
- ret = 0;
q->elevator = eq;
return 0;
-free_stats:
- free_percpu(dd->stats);
-
free_dd:
kfree(dd);
@@ -677,6 +629,8 @@ static int dd_request_merge(struct request_queue *q, struct request **rq,
if (elv_bio_merge_ok(__rq, bio)) {
*rq = __rq;
+ if (blk_discard_mergable(__rq))
+ return ELEVATOR_DISCARD_MERGE;
return ELEVATOR_FRONT_MERGE;
}
}
@@ -718,7 +672,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
struct dd_per_prio *per_prio;
enum dd_prio prio;
- struct dd_blkcg *blkcg;
LIST_HEAD(free);
lockdep_assert_held(&dd->lock);
@@ -729,18 +682,9 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
*/
blk_req_zone_write_unlock(rq);
- /*
- * If a block cgroup has been associated with the submitter and if an
- * I/O priority has been set in the associated block cgroup, use the
- * lowest of the cgroup priority and the request priority for the
- * request. If no priority has been set in the request, use the cgroup
- * priority.
- */
prio = ioprio_class_to_prio[ioprio_class];
dd_count(dd, inserted, prio);
- blkcg = dd_blkcg_from_bio(rq->bio);
- ddcg_count(blkcg, inserted, ioprio_class);
- rq->elv.priv[0] = blkcg;
+ rq->elv.priv[0] = (void *)(uintptr_t)1;
if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
blk_mq_free_requests(&free);
@@ -815,13 +759,18 @@ static void dd_finish_request(struct request *rq)
{
struct request_queue *q = rq->q;
struct deadline_data *dd = q->elevator->elevator_data;
- struct dd_blkcg *blkcg = rq->elv.priv[0];
const u8 ioprio_class = dd_rq_ioclass(rq);
const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
struct dd_per_prio *per_prio = &dd->per_prio[prio];
- dd_count(dd, completed, prio);
- ddcg_count(blkcg, completed, ioprio_class);
+ /*
+ * The block layer core may call dd_finish_request() without having
+ * called dd_insert_requests(). Hence only update statistics for
+ * requests for which dd_insert_requests() has been called. See also
+ * blk_mq_request_bypass_insert().
+ */
+ if (rq->elv.priv[0])
+ dd_count(dd, completed, prio);
if (blk_queue_is_zoned(q)) {
unsigned long flags;
@@ -866,7 +815,6 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page) \
#define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR))
SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]);
SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
-SHOW_JIFFIES(deadline_aging_expire_show, dd->aging_expire);
SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
SHOW_INT(deadline_front_merges_show, dd->front_merges);
SHOW_INT(deadline_async_depth_show, dd->front_merges);
@@ -896,7 +844,6 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, msecs_to_jiffies)
STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX);
STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX);
-STORE_JIFFIES(deadline_aging_expire_store, &dd->aging_expire, 0, INT_MAX);
STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX);
@@ -915,7 +862,6 @@ static struct elv_fs_entry deadline_attrs[] = {
DD_ATTR(front_merges),
DD_ATTR(async_depth),
DD_ATTR(fifo_batch),
- DD_ATTR(aging_expire),
__ATTR_NULL
};
@@ -1144,26 +1090,11 @@ MODULE_ALIAS("mq-deadline-iosched");
static int __init deadline_init(void)
{
- int ret;
-
- ret = elv_register(&mq_deadline);
- if (ret)
- goto out;
- ret = dd_blkcg_init();
- if (ret)
- goto unreg;
-
-out:
- return ret;
-
-unreg:
- elv_unregister(&mq_deadline);
- goto out;
+ return elv_register(&mq_deadline);
}
static void __exit deadline_exit(void)
{
- dd_blkcg_exit();
elv_unregister(&mq_deadline);
}
diff --git a/block/partitions/Kconfig b/block/partitions/Kconfig
index 6e2a649669e5..278593b8e4e9 100644
--- a/block/partitions/Kconfig
+++ b/block/partitions/Kconfig
@@ -264,7 +264,6 @@ config SYSV68_PARTITION
config CMDLINE_PARTITION
bool "Command line partition support" if PARTITION_ADVANCED
- select BLK_CMDLINE_PARSER
help
Say Y here if you want to read the partition table from bootargs.
The format for the command line is just like mtdparts.
diff --git a/block/partitions/acorn.c b/block/partitions/acorn.c
index c64c57b958bf..2c381c694c57 100644
--- a/block/partitions/acorn.c
+++ b/block/partitions/acorn.c
@@ -275,7 +275,7 @@ int adfspart_check_ADFS(struct parsed_partitions *state)
/*
* Work out start of non-adfs partition.
*/
- nr_sects = (state->bdev->bd_inode->i_size >> 9) - start_sect;
+ nr_sects = get_capacity(state->disk) - start_sect;
if (start_sect) {
switch (id) {
@@ -540,7 +540,7 @@ int adfspart_check_EESOX(struct parsed_partitions *state)
if (i != 0) {
sector_t size;
- size = get_capacity(state->bdev->bd_disk);
+ size = get_capacity(state->disk);
put_partition(state, slot++, start, size - start);
strlcat(state->pp_buf, "\n", PAGE_SIZE);
}
diff --git a/block/partitions/aix.c b/block/partitions/aix.c
index c7b4fd1a4a97..85f4b967565e 100644
--- a/block/partitions/aix.c
+++ b/block/partitions/aix.c
@@ -67,29 +67,13 @@ struct pvd {
#define LVM_MAXLVS 256
/**
- * last_lba(): return number of last logical block of device
- * @bdev: block device
- *
- * Description: Returns last LBA value on success, 0 on error.
- * This is stored (by sd and ide-geometry) in
- * the part[0] entry for this disk, and is the number of
- * physical sectors available on the disk.
- */
-static u64 last_lba(struct block_device *bdev)
-{
- if (!bdev || !bdev->bd_inode)
- return 0;
- return (bdev->bd_inode->i_size >> 9) - 1ULL;
-}
-
-/**
* read_lba(): Read bytes from disk, starting at given LBA
* @state
* @lba
* @buffer
* @count
*
- * Description: Reads @count bytes from @state->bdev into @buffer.
+ * Description: Reads @count bytes from @state->disk into @buffer.
* Returns number of bytes read on success, 0 on error.
*/
static size_t read_lba(struct parsed_partitions *state, u64 lba, u8 *buffer,
@@ -97,7 +81,7 @@ static size_t read_lba(struct parsed_partitions *state, u64 lba, u8 *buffer,
{
size_t totalreadcount = 0;
- if (!buffer || lba + count / 512 > last_lba(state->bdev))
+ if (!buffer || lba + count / 512 > get_capacity(state->disk) - 1ULL)
return 0;
while (count) {
diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c
index 9526491d9aed..5c8624e26a54 100644
--- a/block/partitions/amiga.c
+++ b/block/partitions/amiga.c
@@ -34,7 +34,6 @@ int amiga_partition(struct parsed_partitions *state)
int start_sect, nr_sects, blk, part, res = 0;
int blksize = 1; /* Multiplier for disk block size */
int slot = 1;
- char b[BDEVNAME_SIZE];
for (blk = 0; ; blk++, put_dev_sector(sect)) {
if (blk == RDB_ALLOCATION_LIMIT)
@@ -42,7 +41,7 @@ int amiga_partition(struct parsed_partitions *state)
data = read_part_sector(state, blk, &sect);
if (!data) {
pr_err("Dev %s: unable to read RDB block %d\n",
- bdevname(state->bdev, b), blk);
+ state->disk->disk_name, blk);
res = -1;
goto rdb_done;
}
@@ -64,7 +63,7 @@ int amiga_partition(struct parsed_partitions *state)
}
pr_err("Dev %s: RDB in block %d has bad checksum\n",
- bdevname(state->bdev, b), blk);
+ state->disk->disk_name, blk);
}
/* blksize is blocks per 512 byte standard block */
@@ -84,7 +83,7 @@ int amiga_partition(struct parsed_partitions *state)
data = read_part_sector(state, blk, &sect);
if (!data) {
pr_err("Dev %s: unable to read partition block %d\n",
- bdevname(state->bdev, b), blk);
+ state->disk->disk_name, blk);
res = -1;
goto rdb_done;
}
diff --git a/block/partitions/atari.c b/block/partitions/atari.c
index 2305840c8522..da5994175416 100644
--- a/block/partitions/atari.c
+++ b/block/partitions/atari.c
@@ -47,7 +47,7 @@ int atari_partition(struct parsed_partitions *state)
* ATARI partition scheme supports 512 lba only. If this is not
* the case, bail early to avoid miscalculating hd_size.
*/
- if (bdev_logical_block_size(state->bdev) != 512)
+ if (queue_logical_block_size(state->disk->queue) != 512)
return 0;
rs = read_part_sector(state, 0, &sect);
@@ -55,7 +55,7 @@ int atari_partition(struct parsed_partitions *state)
return -1;
/* Verify this is an Atari rootsector: */
- hd_size = state->bdev->bd_inode->i_size >> 9;
+ hd_size = get_capacity(state->disk);
if (!VALID_PARTITION(&rs->part[0], hd_size) &&
!VALID_PARTITION(&rs->part[1], hd_size) &&
!VALID_PARTITION(&rs->part[2], hd_size) &&
diff --git a/block/partitions/check.h b/block/partitions/check.h
index c577e9ee67f0..d5b28e309d64 100644
--- a/block/partitions/check.h
+++ b/block/partitions/check.h
@@ -9,7 +9,7 @@
* description.
*/
struct parsed_partitions {
- struct block_device *bdev;
+ struct gendisk *disk;
char name[BDEVNAME_SIZE];
struct {
sector_t from;
diff --git a/block/partitions/cmdline.c b/block/partitions/cmdline.c
index 8f545c36cde4..1af610f0ba8c 100644
--- a/block/partitions/cmdline.c
+++ b/block/partitions/cmdline.c
@@ -14,20 +14,248 @@
* For further information, see "Documentation/block/cmdline-partition.rst"
*
*/
+#include <linux/blkdev.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include "check.h"
-#include <linux/cmdline-parser.h>
-#include "check.h"
+/* partition flags */
+#define PF_RDONLY 0x01 /* Device is read only */
+#define PF_POWERUP_LOCK 0x02 /* Always locked after reset */
+
+struct cmdline_subpart {
+ char name[BDEVNAME_SIZE]; /* partition name, such as 'rootfs' */
+ sector_t from;
+ sector_t size;
+ int flags;
+ struct cmdline_subpart *next_subpart;
+};
+
+struct cmdline_parts {
+ char name[BDEVNAME_SIZE]; /* block device, such as 'mmcblk0' */
+ unsigned int nr_subparts;
+ struct cmdline_subpart *subpart;
+ struct cmdline_parts *next_parts;
+};
+
+static int parse_subpart(struct cmdline_subpart **subpart, char *partdef)
+{
+ int ret = 0;
+ struct cmdline_subpart *new_subpart;
+
+ *subpart = NULL;
+
+ new_subpart = kzalloc(sizeof(struct cmdline_subpart), GFP_KERNEL);
+ if (!new_subpart)
+ return -ENOMEM;
+
+ if (*partdef == '-') {
+ new_subpart->size = (sector_t)(~0ULL);
+ partdef++;
+ } else {
+ new_subpart->size = (sector_t)memparse(partdef, &partdef);
+ if (new_subpart->size < (sector_t)PAGE_SIZE) {
+ pr_warn("cmdline partition size is invalid.");
+ ret = -EINVAL;
+ goto fail;
+ }
+ }
+
+ if (*partdef == '@') {
+ partdef++;
+ new_subpart->from = (sector_t)memparse(partdef, &partdef);
+ } else {
+ new_subpart->from = (sector_t)(~0ULL);
+ }
+
+ if (*partdef == '(') {
+ int length;
+ char *next = strchr(++partdef, ')');
+
+ if (!next) {
+ pr_warn("cmdline partition format is invalid.");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ length = min_t(int, next - partdef,
+ sizeof(new_subpart->name) - 1);
+ strncpy(new_subpart->name, partdef, length);
+ new_subpart->name[length] = '\0';
+
+ partdef = ++next;
+ } else
+ new_subpart->name[0] = '\0';
+
+ new_subpart->flags = 0;
+
+ if (!strncmp(partdef, "ro", 2)) {
+ new_subpart->flags |= PF_RDONLY;
+ partdef += 2;
+ }
+
+ if (!strncmp(partdef, "lk", 2)) {
+ new_subpart->flags |= PF_POWERUP_LOCK;
+ partdef += 2;
+ }
+
+ *subpart = new_subpart;
+ return 0;
+fail:
+ kfree(new_subpart);
+ return ret;
+}
+
+static void free_subpart(struct cmdline_parts *parts)
+{
+ struct cmdline_subpart *subpart;
+
+ while (parts->subpart) {
+ subpart = parts->subpart;
+ parts->subpart = subpart->next_subpart;
+ kfree(subpart);
+ }
+}
+
+static int parse_parts(struct cmdline_parts **parts, const char *bdevdef)
+{
+ int ret = -EINVAL;
+ char *next;
+ int length;
+ struct cmdline_subpart **next_subpart;
+ struct cmdline_parts *newparts;
+ char buf[BDEVNAME_SIZE + 32 + 4];
+
+ *parts = NULL;
+
+ newparts = kzalloc(sizeof(struct cmdline_parts), GFP_KERNEL);
+ if (!newparts)
+ return -ENOMEM;
+
+ next = strchr(bdevdef, ':');
+ if (!next) {
+ pr_warn("cmdline partition has no block device.");
+ goto fail;
+ }
+
+ length = min_t(int, next - bdevdef, sizeof(newparts->name) - 1);
+ strncpy(newparts->name, bdevdef, length);
+ newparts->name[length] = '\0';
+ newparts->nr_subparts = 0;
+
+ next_subpart = &newparts->subpart;
+
+ while (next && *(++next)) {
+ bdevdef = next;
+ next = strchr(bdevdef, ',');
+
+ length = (!next) ? (sizeof(buf) - 1) :
+ min_t(int, next - bdevdef, sizeof(buf) - 1);
+
+ strncpy(buf, bdevdef, length);
+ buf[length] = '\0';
+
+ ret = parse_subpart(next_subpart, buf);
+ if (ret)
+ goto fail;
+
+ newparts->nr_subparts++;
+ next_subpart = &(*next_subpart)->next_subpart;
+ }
+
+ if (!newparts->subpart) {
+ pr_warn("cmdline partition has no valid partition.");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ *parts = newparts;
+
+ return 0;
+fail:
+ free_subpart(newparts);
+ kfree(newparts);
+ return ret;
+}
+
+static void cmdline_parts_free(struct cmdline_parts **parts)
+{
+ struct cmdline_parts *next_parts;
+
+ while (*parts) {
+ next_parts = (*parts)->next_parts;
+ free_subpart(*parts);
+ kfree(*parts);
+ *parts = next_parts;
+ }
+}
+
+static int cmdline_parts_parse(struct cmdline_parts **parts,
+ const char *cmdline)
+{
+ int ret;
+ char *buf;
+ char *pbuf;
+ char *next;
+ struct cmdline_parts **next_parts;
+
+ *parts = NULL;
+
+ next = pbuf = buf = kstrdup(cmdline, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ next_parts = parts;
+
+ while (next && *pbuf) {
+ next = strchr(pbuf, ';');
+ if (next)
+ *next = '\0';
+
+ ret = parse_parts(next_parts, pbuf);
+ if (ret)
+ goto fail;
+
+ if (next)
+ pbuf = ++next;
+
+ next_parts = &(*next_parts)->next_parts;
+ }
+
+ if (!*parts) {
+ pr_warn("cmdline partition has no valid partition.");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ ret = 0;
+done:
+ kfree(buf);
+ return ret;
+
+fail:
+ cmdline_parts_free(parts);
+ goto done;
+}
+
+static struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
+ const char *bdev)
+{
+ while (parts && strncmp(bdev, parts->name, sizeof(parts->name)))
+ parts = parts->next_parts;
+ return parts;
+}
static char *cmdline;
static struct cmdline_parts *bdev_parts;
-static int add_part(int slot, struct cmdline_subpart *subpart, void *param)
+static int add_part(int slot, struct cmdline_subpart *subpart,
+ struct parsed_partitions *state)
{
int label_min;
struct partition_meta_info *info;
char tmp[sizeof(info->volname) + 4];
- struct parsed_partitions *state = (struct parsed_partitions *)param;
if (slot >= state->limit)
return 1;
@@ -50,6 +278,35 @@ static int add_part(int slot, struct cmdline_subpart *subpart, void *param)
return 0;
}
+static int cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
+ struct parsed_partitions *state)
+{
+ sector_t from = 0;
+ struct cmdline_subpart *subpart;
+ int slot = 1;
+
+ for (subpart = parts->subpart; subpart;
+ subpart = subpart->next_subpart, slot++) {
+ if (subpart->from == (sector_t)(~0ULL))
+ subpart->from = from;
+ else
+ from = subpart->from;
+
+ if (from >= disk_size)
+ break;
+
+ if (subpart->size > (disk_size - from))
+ subpart->size = disk_size - from;
+
+ from += subpart->size;
+
+ if (add_part(slot, subpart, state))
+ break;
+ }
+
+ return slot;
+}
+
static int __init cmdline_parts_setup(char *s)
{
cmdline = s;
@@ -123,7 +380,6 @@ static void cmdline_parts_verifier(int slot, struct parsed_partitions *state)
int cmdline_partition(struct parsed_partitions *state)
{
sector_t disk_size;
- char bdev[BDEVNAME_SIZE];
struct cmdline_parts *parts;
if (cmdline) {
@@ -140,14 +396,13 @@ int cmdline_partition(struct parsed_partitions *state)
if (!bdev_parts)
return 0;
- bdevname(state->bdev, bdev);
- parts = cmdline_parts_find(bdev_parts, bdev);
+ parts = cmdline_parts_find(bdev_parts, state->disk->disk_name);
if (!parts)
return 0;
- disk_size = get_capacity(state->bdev->bd_disk) << 9;
+ disk_size = get_capacity(state->disk) << 9;
- cmdline_parts_set(parts, disk_size, 1, add_part, (void *)state);
+ cmdline_parts_set(parts, disk_size, state);
cmdline_parts_verifier(1, state);
strlcat(state->pp_buf, "\n", PAGE_SIZE);
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 4230d4f71879..58c4c362c94f 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -135,8 +135,8 @@ static struct parsed_partitions *check_partition(struct gendisk *hd)
}
state->pp_buf[0] = '\0';
- state->bdev = hd->part0;
- disk_name(hd, 0, state->name);
+ state->disk = hd;
+ snprintf(state->name, BDEVNAME_SIZE, "%s", hd->disk_name);
snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
if (isdigit(state->name[strlen(state->name)-1]))
sprintf(state->name, "p");
@@ -259,9 +259,8 @@ static const struct attribute_group *part_attr_groups[] = {
static void part_release(struct device *dev)
{
- if (MAJOR(dev->devt) == BLOCK_EXT_MAJOR)
- blk_free_ext_minor(MINOR(dev->devt));
- bdput(dev_to_bdev(dev));
+ put_disk(dev_to_bdev(dev)->bd_disk);
+ iput(dev_to_bdev(dev)->bd_inode);
}
static int part_uevent(struct device *dev, struct kobj_uevent_env *env)
@@ -281,12 +280,10 @@ struct device_type part_type = {
.uevent = part_uevent,
};
-/*
- * Must be called either with open_mutex held, before a disk can be opened or
- * after all disk users are gone.
- */
static void delete_partition(struct block_device *part)
{
+ lockdep_assert_held(&part->bd_disk->open_mutex);
+
fsync_bdev(part);
__invalidate_device(part, true);
@@ -351,20 +348,17 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
if (xa_load(&disk->part_tbl, partno))
return ERR_PTR(-EBUSY);
+ /* ensure we always have a reference to the whole disk */
+ get_device(disk_to_dev(disk));
+
+ err = -ENOMEM;
bdev = bdev_alloc(disk, partno);
if (!bdev)
- return ERR_PTR(-ENOMEM);
+ goto out_put_disk;
bdev->bd_start_sect = start;
bdev_set_nr_sectors(bdev, len);
- if (info) {
- err = -ENOMEM;
- bdev->bd_meta_info = kmemdup(info, sizeof(*info), GFP_KERNEL);
- if (!bdev->bd_meta_info)
- goto out_bdput;
- }
-
pdev = &bdev->bd_device;
dname = dev_name(ddev);
if (isdigit(dname[strlen(dname) - 1]))
@@ -388,6 +382,13 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
}
pdev->devt = devt;
+ if (info) {
+ err = -ENOMEM;
+ bdev->bd_meta_info = kmemdup(info, sizeof(*info), GFP_KERNEL);
+ if (!bdev->bd_meta_info)
+ goto out_put;
+ }
+
/* delay uevent until 'holders' subdir is created */
dev_set_uevent_suppress(pdev, 1);
err = device_add(pdev);
@@ -417,14 +418,13 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
kobject_uevent(&pdev->kobj, KOBJ_ADD);
return bdev;
-out_bdput:
- bdput(bdev);
- return ERR_PTR(err);
out_del:
kobject_put(bdev->bd_holder_dir);
device_del(pdev);
out_put:
put_device(pdev);
+out_put_disk:
+ put_disk(disk);
return ERR_PTR(err);
}
@@ -449,15 +449,14 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start,
return overlap;
}
-int bdev_add_partition(struct block_device *bdev, int partno,
- sector_t start, sector_t length)
+int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
+ sector_t length)
{
struct block_device *part;
- struct gendisk *disk = bdev->bd_disk;
int ret;
mutex_lock(&disk->open_mutex);
- if (!(disk->flags & GENHD_FL_UP)) {
+ if (!disk_live(disk)) {
ret = -ENXIO;
goto out;
}
@@ -475,13 +474,13 @@ out:
return ret;
}
-int bdev_del_partition(struct block_device *bdev, int partno)
+int bdev_del_partition(struct gendisk *disk, int partno)
{
struct block_device *part = NULL;
int ret = -ENXIO;
- mutex_lock(&bdev->bd_disk->open_mutex);
- part = xa_load(&bdev->bd_disk->part_tbl, partno);
+ mutex_lock(&disk->open_mutex);
+ part = xa_load(&disk->part_tbl, partno);
if (!part)
goto out_unlock;
@@ -492,18 +491,18 @@ int bdev_del_partition(struct block_device *bdev, int partno)
delete_partition(part);
ret = 0;
out_unlock:
- mutex_unlock(&bdev->bd_disk->open_mutex);
+ mutex_unlock(&disk->open_mutex);
return ret;
}
-int bdev_resize_partition(struct block_device *bdev, int partno,
- sector_t start, sector_t length)
+int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start,
+ sector_t length)
{
struct block_device *part = NULL;
int ret = -ENXIO;
- mutex_lock(&bdev->bd_disk->open_mutex);
- part = xa_load(&bdev->bd_disk->part_tbl, partno);
+ mutex_lock(&disk->open_mutex);
+ part = xa_load(&disk->part_tbl, partno);
if (!part)
goto out_unlock;
@@ -512,14 +511,14 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
goto out_unlock;
ret = -EBUSY;
- if (partition_overlaps(bdev->bd_disk, start, length, partno))
+ if (partition_overlaps(disk, start, length, partno))
goto out_unlock;
bdev_set_nr_sectors(part, length);
ret = 0;
out_unlock:
- mutex_unlock(&bdev->bd_disk->open_mutex);
+ mutex_unlock(&disk->open_mutex);
return ret;
}
@@ -667,7 +666,7 @@ int bdev_disk_changed(struct gendisk *disk, bool invalidate)
lockdep_assert_held(&disk->open_mutex);
- if (!(disk->flags & GENHD_FL_UP))
+ if (!disk_live(disk))
return -ENXIO;
rescan:
@@ -715,10 +714,10 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed);
void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p)
{
- struct address_space *mapping = state->bdev->bd_inode->i_mapping;
+ struct address_space *mapping = state->disk->part0->bd_inode->i_mapping;
struct page *page;
- if (n >= get_capacity(state->bdev->bd_disk)) {
+ if (n >= get_capacity(state->disk)) {
state->access_beyond_eod = true;
return NULL;
}
diff --git a/block/partitions/efi.c b/block/partitions/efi.c
index e2716792ecc1..7ca5c4c374d4 100644
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -124,19 +124,17 @@ efi_crc32(const void *buf, unsigned long len)
/**
* last_lba(): return number of last logical block of device
- * @bdev: block device
+ * @disk: block device
*
* Description: Returns last LBA value on success, 0 on error.
* This is stored (by sd and ide-geometry) in
* the part[0] entry for this disk, and is the number of
* physical sectors available on the disk.
*/
-static u64 last_lba(struct block_device *bdev)
+static u64 last_lba(struct gendisk *disk)
{
- if (!bdev || !bdev->bd_inode)
- return 0;
- return div_u64(bdev->bd_inode->i_size,
- bdev_logical_block_size(bdev)) - 1ULL;
+ return div_u64(disk->part0->bd_inode->i_size,
+ queue_logical_block_size(disk->queue)) - 1ULL;
}
static inline int pmbr_part_valid(gpt_mbr_record *part)
@@ -231,17 +229,17 @@ done:
* @buffer: destination buffer
* @count: bytes to read
*
- * Description: Reads @count bytes from @state->bdev into @buffer.
+ * Description: Reads @count bytes from @state->disk into @buffer.
* Returns number of bytes read on success, 0 on error.
*/
static size_t read_lba(struct parsed_partitions *state,
u64 lba, u8 *buffer, size_t count)
{
size_t totalreadcount = 0;
- struct block_device *bdev = state->bdev;
- sector_t n = lba * (bdev_logical_block_size(bdev) / 512);
+ sector_t n = lba *
+ (queue_logical_block_size(state->disk->queue) / 512);
- if (!buffer || lba > last_lba(bdev))
+ if (!buffer || lba > last_lba(state->disk))
return 0;
while (count) {
@@ -302,14 +300,14 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state,
* @lba: the Logical Block Address of the partition table
*
* Description: returns GPT header on success, NULL on error. Allocates
- * and fills a GPT header starting at @ from @state->bdev.
+ * and fills a GPT header starting at @ from @state->disk.
* Note: remember to free gpt when finished with it.
*/
static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state,
u64 lba)
{
gpt_header *gpt;
- unsigned ssz = bdev_logical_block_size(state->bdev);
+ unsigned ssz = queue_logical_block_size(state->disk->queue);
gpt = kmalloc(ssz, GFP_KERNEL);
if (!gpt)
@@ -356,10 +354,10 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
/* Check the GUID Partition Table header size is too big */
if (le32_to_cpu((*gpt)->header_size) >
- bdev_logical_block_size(state->bdev)) {
+ queue_logical_block_size(state->disk->queue)) {
pr_debug("GUID Partition Table Header size is too large: %u > %u\n",
le32_to_cpu((*gpt)->header_size),
- bdev_logical_block_size(state->bdev));
+ queue_logical_block_size(state->disk->queue));
goto fail;
}
@@ -395,7 +393,7 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
/* Check the first_usable_lba and last_usable_lba are
* within the disk.
*/
- lastlba = last_lba(state->bdev);
+ lastlba = last_lba(state->disk);
if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
(unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
@@ -587,13 +585,15 @@ static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt,
gpt_header *pgpt = NULL, *agpt = NULL;
gpt_entry *pptes = NULL, *aptes = NULL;
legacy_mbr *legacymbr;
- sector_t total_sectors = i_size_read(state->bdev->bd_inode) >> 9;
+ struct gendisk *disk = state->disk;
+ const struct block_device_operations *fops = disk->fops;
+ sector_t total_sectors = get_capacity(state->disk);
u64 lastlba;
if (!ptes)
return 0;
- lastlba = last_lba(state->bdev);
+ lastlba = last_lba(state->disk);
if (!force_gpt) {
/* This will be added to the EFI Spec. per Intel after v1.02. */
legacymbr = kzalloc(sizeof(*legacymbr), GFP_KERNEL);
@@ -621,6 +621,16 @@ static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt,
if (!good_agpt && force_gpt)
good_agpt = is_gpt_valid(state, lastlba, &agpt, &aptes);
+ if (!good_agpt && force_gpt && fops->alternative_gpt_sector) {
+ sector_t agpt_sector;
+ int err;
+
+ err = fops->alternative_gpt_sector(disk, &agpt_sector);
+ if (!err)
+ good_agpt = is_gpt_valid(state, agpt_sector,
+ &agpt, &aptes);
+ }
+
/* The obviously unsuccessful case */
if (!good_pgpt && !good_agpt)
goto fail;
@@ -705,7 +715,7 @@ int efi_partition(struct parsed_partitions *state)
gpt_header *gpt = NULL;
gpt_entry *ptes = NULL;
u32 i;
- unsigned ssz = bdev_logical_block_size(state->bdev) / 512;
+ unsigned ssz = queue_logical_block_size(state->disk->queue) / 512;
if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) {
kfree(gpt);
@@ -722,7 +732,7 @@ int efi_partition(struct parsed_partitions *state)
u64 size = le64_to_cpu(ptes[i].ending_lba) -
le64_to_cpu(ptes[i].starting_lba) + 1ULL;
- if (!is_pte_valid(&ptes[i], last_lba(state->bdev)))
+ if (!is_pte_valid(&ptes[i], last_lba(state->disk)))
continue;
put_partition(state, i+1, start * ssz, size * ssz);
diff --git a/block/partitions/ibm.c b/block/partitions/ibm.c
index 4b044e620d35..9bca396aef4a 100644
--- a/block/partitions/ibm.c
+++ b/block/partitions/ibm.c
@@ -290,8 +290,8 @@ static int find_cms1_partitions(struct parsed_partitions *state,
int ibm_partition(struct parsed_partitions *state)
{
int (*fn)(struct gendisk *disk, dasd_information2_t *info);
- struct block_device *bdev = state->bdev;
- struct gendisk *disk = bdev->bd_disk;
+ struct gendisk *disk = state->disk;
+ struct block_device *bdev = disk->part0;
int blocksize, res;
loff_t i_size, offset, size;
dasd_information2_t *info;
diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c
index b8b518d7fb77..27f6c7d9c776 100644
--- a/block/partitions/ldm.c
+++ b/block/partitions/ldm.c
@@ -304,7 +304,7 @@ static bool ldm_validate_privheads(struct parsed_partitions *state,
}
}
- num_sects = state->bdev->bd_inode->i_size >> 9;
+ num_sects = get_capacity(state->disk);
if ((ph[0]->config_start > num_sects) ||
((ph[0]->config_start + ph[0]->config_size) > num_sects)) {
@@ -339,11 +339,11 @@ out:
/**
* ldm_validate_tocblocks - Validate the table of contents and its backups
* @state: Partition check state including device holding the LDM Database
- * @base: Offset, into @state->bdev, of the database
+ * @base: Offset, into @state->disk, of the database
* @ldb: Cache of the database structures
*
* Find and compare the four tables of contents of the LDM Database stored on
- * @state->bdev and return the parsed information into @toc1.
+ * @state->disk and return the parsed information into @toc1.
*
* The offsets and sizes of the configs are range-checked against a privhead.
*
@@ -486,8 +486,8 @@ out:
* only likely to happen if the underlying device is strange. If that IS
* the case we should return zero to let someone else try.
*
- * Return: 'true' @state->bdev is a dynamic disk
- * 'false' @state->bdev is not a dynamic disk, or an error occurred
+ * Return: 'true' @state->disk is a dynamic disk
+ * 'false' @state->disk is not a dynamic disk, or an error occurred
*/
static bool ldm_validate_partition_table(struct parsed_partitions *state)
{
@@ -1340,7 +1340,7 @@ static bool ldm_frag_commit (struct list_head *frags, struct ldmdb *ldb)
/**
* ldm_get_vblks - Read the on-disk database of VBLKs into memory
* @state: Partition check state including device holding the LDM Database
- * @base: Offset, into @state->bdev, of the database
+ * @base: Offset, into @state->disk, of the database
* @ldb: Cache of the database structures
*
* To use the information from the VBLKs, they need to be read from the disk,
@@ -1432,10 +1432,10 @@ static void ldm_free_vblks (struct list_head *lh)
* example, if the device is hda, we would have: hda1: LDM database, hda2, hda3,
* and so on: the actual data containing partitions.
*
- * Return: 1 Success, @state->bdev is a dynamic disk and we handled it
- * 0 Success, @state->bdev is not a dynamic disk
+ * Return: 1 Success, @state->disk is a dynamic disk and we handled it
+ * 0 Success, @state->disk is not a dynamic disk
* -1 An error occurred before enough information had been read
- * Or @state->bdev is a dynamic disk, but it may be corrupted
+ * Or @state->disk is a dynamic disk, but it may be corrupted
*/
int ldm_partition(struct parsed_partitions *state)
{
diff --git a/block/partitions/mac.c b/block/partitions/mac.c
index b6095335636c..7b521df00a39 100644
--- a/block/partitions/mac.c
+++ b/block/partitions/mac.c
@@ -133,7 +133,7 @@ int mac_partition(struct parsed_partitions *state)
}
#ifdef CONFIG_PPC_PMAC
if (found_root_goodness)
- note_bootable_part(state->bdev->bd_dev, found_root,
+ note_bootable_part(state->disk->part0->bd_dev, found_root,
found_root_goodness);
#endif
diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c
index f5102596a984..b5d5c229cc3b 100644
--- a/block/partitions/msdos.c
+++ b/block/partitions/msdos.c
@@ -135,11 +135,12 @@ static void parse_extended(struct parsed_partitions *state,
Sector sect;
unsigned char *data;
sector_t this_sector, this_size;
- sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
+ sector_t sector_size;
int loopct = 0; /* number of links followed
without finding a data partition */
int i;
+ sector_size = queue_logical_block_size(state->disk->queue) / 512;
this_sector = first_sector;
this_size = first_size;
@@ -579,7 +580,7 @@ static struct {
int msdos_partition(struct parsed_partitions *state)
{
- sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
+ sector_t sector_size;
Sector sect;
unsigned char *data;
struct msdos_partition *p;
@@ -587,6 +588,7 @@ int msdos_partition(struct parsed_partitions *state)
int slot;
u32 disksig;
+ sector_size = queue_logical_block_size(state->disk->queue) / 512;
data = read_part_sector(state, 0, &sect);
if (!data)
return -1;
diff --git a/block/partitions/sgi.c b/block/partitions/sgi.c
index 4273f1bb0515..9cc6b8c1eea4 100644
--- a/block/partitions/sgi.c
+++ b/block/partitions/sgi.c
@@ -43,7 +43,6 @@ int sgi_partition(struct parsed_partitions *state)
Sector sect;
struct sgi_disklabel *label;
struct sgi_partition *p;
- char b[BDEVNAME_SIZE];
label = read_part_sector(state, 0, &sect);
if (!label)
@@ -52,7 +51,7 @@ int sgi_partition(struct parsed_partitions *state)
magic = label->magic_mushroom;
if(be32_to_cpu(magic) != SGI_LABEL_MAGIC) {
/*printk("Dev %s SGI disklabel: bad magic %08x\n",
- bdevname(bdev, b), be32_to_cpu(magic));*/
+ state->disk->disk_name, be32_to_cpu(magic));*/
put_dev_sector(sect);
return 0;
}
@@ -63,7 +62,7 @@ int sgi_partition(struct parsed_partitions *state)
}
if(csum) {
printk(KERN_WARNING "Dev %s SGI disklabel: csum bad, label corrupted\n",
- bdevname(state->bdev, b));
+ state->disk->disk_name);
put_dev_sector(sect);
return 0;
}
diff --git a/block/partitions/sun.c b/block/partitions/sun.c
index 47dc53eccf77..ddf9e6def4b2 100644
--- a/block/partitions/sun.c
+++ b/block/partitions/sun.c
@@ -65,7 +65,6 @@ int sun_partition(struct parsed_partitions *state)
} * label;
struct sun_partition *p;
unsigned long spc;
- char b[BDEVNAME_SIZE];
int use_vtoc;
int nparts;
@@ -76,7 +75,7 @@ int sun_partition(struct parsed_partitions *state)
p = label->partitions;
if (be16_to_cpu(label->magic) != SUN_LABEL_MAGIC) {
/* printk(KERN_INFO "Dev %s Sun disklabel: bad magic %04x\n",
- bdevname(bdev, b), be16_to_cpu(label->magic)); */
+ state->disk->disk_name, be16_to_cpu(label->magic)); */
put_dev_sector(sect);
return 0;
}
@@ -86,7 +85,7 @@ int sun_partition(struct parsed_partitions *state)
csum ^= *ush--;
if (csum) {
printk("Dev %s Sun disklabel: Csum bad, label corrupted\n",
- bdevname(state->bdev, b));
+ state->disk->disk_name);
put_dev_sector(sect);
return 0;
}
diff --git a/block/t10-pi.c b/block/t10-pi.c
index d910534b3a41..00c203b2a921 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -147,11 +147,10 @@ static void t10_pi_type1_prepare(struct request *rq)
break;
bip_for_each_vec(iv, bip, iter) {
- void *p, *pmap;
unsigned int j;
+ void *p;
- pmap = kmap_atomic(iv.bv_page);
- p = pmap + iv.bv_offset;
+ p = bvec_kmap_local(&iv);
for (j = 0; j < iv.bv_len; j += tuple_sz) {
struct t10_pi_tuple *pi = p;
@@ -161,8 +160,7 @@ static void t10_pi_type1_prepare(struct request *rq)
ref_tag++;
p += tuple_sz;
}
-
- kunmap_atomic(pmap);
+ kunmap_local(p);
}
bip->bip_flags |= BIP_MAPPED_INTEGRITY;
@@ -195,11 +193,10 @@ static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
struct bvec_iter iter;
bip_for_each_vec(iv, bip, iter) {
- void *p, *pmap;
unsigned int j;
+ void *p;
- pmap = kmap_atomic(iv.bv_page);
- p = pmap + iv.bv_offset;
+ p = bvec_kmap_local(&iv);
for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
struct t10_pi_tuple *pi = p;
@@ -210,8 +207,7 @@ static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
intervals--;
p += tuple_sz;
}
-
- kunmap_atomic(pmap);
+ kunmap_local(p);
}
}
}
diff --git a/certs/Kconfig b/certs/Kconfig
index f4e61116f94e..ae7f2e876a31 100644
--- a/certs/Kconfig
+++ b/certs/Kconfig
@@ -15,6 +15,32 @@ config MODULE_SIG_KEY
then the kernel will automatically generate the private key and
certificate as described in Documentation/admin-guide/module-signing.rst
+choice
+ prompt "Type of module signing key to be generated"
+ default MODULE_SIG_KEY_TYPE_RSA
+ help
+ The type of module signing key type to generate. This option
+ does not apply if a #PKCS11 URI is used.
+
+config MODULE_SIG_KEY_TYPE_RSA
+ bool "RSA"
+ depends on MODULE_SIG || (IMA_APPRAISE_MODSIG && MODULES)
+ help
+ Use an RSA key for module signing.
+
+config MODULE_SIG_KEY_TYPE_ECDSA
+ bool "ECDSA"
+ select CRYPTO_ECDSA
+ depends on MODULE_SIG || (IMA_APPRAISE_MODSIG && MODULES)
+ help
+ Use an elliptic curve key (NIST P384) for module signing. Consider
+ using a strong hash like sha256 or sha384 for hashing modules.
+
+ Note: Remove all ECDSA signing keys, e.g. certs/signing_key.pem,
+ when falling back to building Linux 5.14 and older kernels.
+
+endchoice
+
config SYSTEM_TRUSTED_KEYRING
bool "Provide system-wide ring of trusted keys"
depends on KEYS
diff --git a/certs/Makefile b/certs/Makefile
index 359239a0ee9e..279433783b10 100644
--- a/certs/Makefile
+++ b/certs/Makefile
@@ -57,11 +57,31 @@ endif
redirect_openssl = 2>&1
quiet_redirect_openssl = 2>&1
silent_redirect_openssl = 2>/dev/null
+openssl_available = $(shell openssl help 2>/dev/null && echo yes)
# We do it this way rather than having a boolean option for enabling an
# external private key, because 'make randconfig' might enable such a
# boolean option and we unfortunately can't make it depend on !RANDCONFIG.
ifeq ($(CONFIG_MODULE_SIG_KEY),"certs/signing_key.pem")
+
+ifeq ($(openssl_available),yes)
+X509TEXT=$(shell openssl x509 -in "certs/signing_key.pem" -text 2>/dev/null)
+endif
+
+# Support user changing key type
+ifdef CONFIG_MODULE_SIG_KEY_TYPE_ECDSA
+keytype_openssl = -newkey ec -pkeyopt ec_paramgen_curve:secp384r1
+ifeq ($(openssl_available),yes)
+$(if $(findstring id-ecPublicKey,$(X509TEXT)),,$(shell rm -f "certs/signing_key.pem"))
+endif
+endif # CONFIG_MODULE_SIG_KEY_TYPE_ECDSA
+
+ifdef CONFIG_MODULE_SIG_KEY_TYPE_RSA
+ifeq ($(openssl_available),yes)
+$(if $(findstring rsaEncryption,$(X509TEXT)),,$(shell rm -f "certs/signing_key.pem"))
+endif
+endif # CONFIG_MODULE_SIG_KEY_TYPE_RSA
+
$(obj)/signing_key.pem: $(obj)/x509.genkey
@$(kecho) "###"
@$(kecho) "### Now generating an X.509 key pair to be used for signing modules."
@@ -75,6 +95,7 @@ $(obj)/signing_key.pem: $(obj)/x509.genkey
-batch -x509 -config $(obj)/x509.genkey \
-outform PEM -out $(obj)/signing_key.pem \
-keyout $(obj)/signing_key.pem \
+ $(keytype_openssl) \
$($(quiet)redirect_openssl)
@$(kecho) "###"
@$(kecho) "### Key pair generated."
diff --git a/crypto/Kconfig b/crypto/Kconfig
index ca3b02dcbbfa..536df4b6b825 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1547,6 +1547,7 @@ config CRYPTO_SERPENT_AVX2_X86_64
config CRYPTO_SM4
tristate "SM4 cipher algorithm"
select CRYPTO_ALGAPI
+ select CRYPTO_LIB_SM4
help
SM4 cipher algorithms (OSCCA GB/T 32907-2016).
@@ -1569,6 +1570,49 @@ config CRYPTO_SM4
If unsure, say N.
+config CRYPTO_SM4_AESNI_AVX_X86_64
+ tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX)"
+ depends on X86 && 64BIT
+ select CRYPTO_SKCIPHER
+ select CRYPTO_SIMD
+ select CRYPTO_ALGAPI
+ select CRYPTO_LIB_SM4
+ help
+ SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX).
+
+ SM4 (GBT.32907-2016) is a cryptographic standard issued by the
+ Organization of State Commercial Administration of China (OSCCA)
+ as an authorized cryptographic algorithms for the use within China.
+
+ This is SM4 optimized implementation using AES-NI/AVX/x86_64
+ instruction set for block cipher. Through two affine transforms,
+ we can use the AES S-Box to simulate the SM4 S-Box to achieve the
+ effect of instruction acceleration.
+
+ If unsure, say N.
+
+config CRYPTO_SM4_AESNI_AVX2_X86_64
+ tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX2)"
+ depends on X86 && 64BIT
+ select CRYPTO_SKCIPHER
+ select CRYPTO_SIMD
+ select CRYPTO_ALGAPI
+ select CRYPTO_LIB_SM4
+ select CRYPTO_SM4_AESNI_AVX_X86_64
+ help
+ SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX2).
+
+ SM4 (GBT.32907-2016) is a cryptographic standard issued by the
+ Organization of State Commercial Administration of China (OSCCA)
+ as an authorized cryptographic algorithms for the use within China.
+
+ This is SM4 optimized implementation using AES-NI/AVX2/x86_64
+ instruction set for block cipher. Through two affine transforms,
+ we can use the AES S-Box to simulate the SM4 S-Box to achieve the
+ effect of instruction acceleration.
+
+ If unsure, say N.
+
config CRYPTO_TEA
tristate "TEA, XTEA and XETA cipher algorithms"
depends on CRYPTO_USER_API_ENABLE_OBSOLETE
@@ -1768,7 +1812,7 @@ config CRYPTO_DRBG_HMAC
bool
default y
select CRYPTO_HMAC
- select CRYPTO_SHA256
+ select CRYPTO_SHA512
config CRYPTO_DRBG_HASH
bool "Enable Hash DRBG"
diff --git a/crypto/Makefile b/crypto/Makefile
index 10526d4559b8..c633f15a0481 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -74,7 +74,6 @@ obj-$(CONFIG_CRYPTO_NULL2) += crypto_null.o
obj-$(CONFIG_CRYPTO_MD4) += md4.o
obj-$(CONFIG_CRYPTO_MD5) += md5.o
obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o
-obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o
obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o
obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o
diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c
index 967329e0a07b..6592279d839a 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.c
+++ b/crypto/asymmetric_keys/pkcs7_parser.c
@@ -269,6 +269,14 @@ int pkcs7_sig_note_pkey_algo(void *context, size_t hdrlen,
ctx->sinfo->sig->pkey_algo = "rsa";
ctx->sinfo->sig->encoding = "pkcs1";
break;
+ case OID_id_ecdsa_with_sha1:
+ case OID_id_ecdsa_with_sha224:
+ case OID_id_ecdsa_with_sha256:
+ case OID_id_ecdsa_with_sha384:
+ case OID_id_ecdsa_with_sha512:
+ ctx->sinfo->sig->pkey_algo = "ecdsa";
+ ctx->sinfo->sig->encoding = "x962";
+ break;
default:
printk("Unsupported pkey algo: %u\n", ctx->last_oid);
return -ENOPKG;
diff --git a/crypto/ecc.h b/crypto/ecc.h
index a006132646a4..1350e8eb6ac2 100644
--- a/crypto/ecc.h
+++ b/crypto/ecc.h
@@ -27,6 +27,7 @@
#define _CRYPTO_ECC_H
#include <crypto/ecc_curve.h>
+#include <asm/unaligned.h>
/* One digit is u64 qword. */
#define ECC_CURVE_NIST_P192_DIGITS 3
@@ -46,13 +47,13 @@
* @out: Output array
* @ndigits: Number of digits to copy
*/
-static inline void ecc_swap_digits(const u64 *in, u64 *out, unsigned int ndigits)
+static inline void ecc_swap_digits(const void *in, u64 *out, unsigned int ndigits)
{
const __be64 *src = (__force __be64 *)in;
int i;
for (i = 0; i < ndigits; i++)
- out[i] = be64_to_cpu(src[ndigits - 1 - i]);
+ out[i] = get_unaligned_be64(&src[ndigits - 1 - i]);
}
/**
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index c72d72ad828e..be70e76d6d86 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -143,9 +143,6 @@ sha512_transform(u64 *state, const u8 *input)
state[0] += a; state[1] += b; state[2] += c; state[3] += d;
state[4] += e; state[5] += f; state[6] += g; state[7] += h;
-
- /* erase our data */
- a = b = c = d = e = f = g = h = t1 = t2 = 0;
}
static void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src,
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index a15376245416..418211180cee 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -431,7 +431,7 @@ static int skcipher_copy_iv(struct skcipher_walk *walk)
static int skcipher_walk_first(struct skcipher_walk *walk)
{
- if (WARN_ON_ONCE(in_irq()))
+ if (WARN_ON_ONCE(in_hardirq()))
return -EDEADLK;
walk->buffer = NULL;
diff --git a/crypto/sm4_generic.c b/crypto/sm4_generic.c
index 016dbc595705..4a6480a27fee 100644
--- a/crypto/sm4_generic.c
+++ b/crypto/sm4_generic.c
@@ -16,191 +16,43 @@
#include <asm/byteorder.h>
#include <asm/unaligned.h>
-static const u32 fk[4] = {
- 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
-};
-
-static const u8 sbox[256] = {
- 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
- 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
- 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
- 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
- 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
- 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
- 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
- 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
- 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
- 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
- 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
- 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
- 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
- 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
- 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
- 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
- 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
- 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
- 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
- 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
- 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
- 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
- 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
- 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
- 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
- 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
- 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
- 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
- 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
- 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
- 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
- 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
-};
-
-static const u32 ck[] = {
- 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
- 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
- 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
- 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
- 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
- 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
- 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
- 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
-};
-
-static u32 sm4_t_non_lin_sub(u32 x)
-{
- int i;
- u8 *b = (u8 *)&x;
-
- for (i = 0; i < 4; ++i)
- b[i] = sbox[b[i]];
-
- return x;
-}
-
-static u32 sm4_key_lin_sub(u32 x)
-{
- return x ^ rol32(x, 13) ^ rol32(x, 23);
-
-}
-
-static u32 sm4_enc_lin_sub(u32 x)
-{
- return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24);
-}
-
-static u32 sm4_key_sub(u32 x)
-{
- return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
-}
-
-static u32 sm4_enc_sub(u32 x)
-{
- return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
-}
-
-static u32 sm4_round(const u32 *x, const u32 rk)
-{
- return x[0] ^ sm4_enc_sub(x[1] ^ x[2] ^ x[3] ^ rk);
-}
-
-
/**
- * crypto_sm4_expand_key - Expands the SM4 key as described in GB/T 32907-2016
- * @ctx: The location where the computed key will be stored.
- * @in_key: The supplied key.
- * @key_len: The length of the supplied key.
- *
- * Returns 0 on success. The function fails only if an invalid key size (or
- * pointer) is supplied.
- */
-int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key,
- unsigned int key_len)
-{
- u32 rk[4], t;
- const u32 *key = (u32 *)in_key;
- int i;
-
- if (key_len != SM4_KEY_SIZE)
- return -EINVAL;
-
- for (i = 0; i < 4; ++i)
- rk[i] = get_unaligned_be32(&key[i]) ^ fk[i];
-
- for (i = 0; i < 32; ++i) {
- t = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i]);
- ctx->rkey_enc[i] = t;
- rk[0] = rk[1];
- rk[1] = rk[2];
- rk[2] = rk[3];
- rk[3] = t;
- }
-
- for (i = 0; i < 32; ++i)
- ctx->rkey_dec[i] = ctx->rkey_enc[31 - i];
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_sm4_expand_key);
-
-/**
- * crypto_sm4_set_key - Set the SM4 key.
+ * sm4_setkey - Set the SM4 key.
* @tfm: The %crypto_tfm that is used in the context.
* @in_key: The input key.
* @key_len: The size of the key.
*
- * This function uses crypto_sm4_expand_key() to expand the key.
- * &crypto_sm4_ctx _must_ be the private data embedded in @tfm which is
+ * This function uses sm4_expandkey() to expand the key.
+ * &sm4_ctx _must_ be the private data embedded in @tfm which is
* retrieved with crypto_tfm_ctx().
*
* Return: 0 on success; -EINVAL on failure (only happens for bad key lengths)
*/
-int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int sm4_setkey(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
-
- return crypto_sm4_expand_key(ctx, in_key, key_len);
-}
-EXPORT_SYMBOL_GPL(crypto_sm4_set_key);
-
-static void sm4_do_crypt(const u32 *rk, u32 *out, const u32 *in)
-{
- u32 x[4], i, t;
-
- for (i = 0; i < 4; ++i)
- x[i] = get_unaligned_be32(&in[i]);
-
- for (i = 0; i < 32; ++i) {
- t = sm4_round(x, rk[i]);
- x[0] = x[1];
- x[1] = x[2];
- x[2] = x[3];
- x[3] = t;
- }
+ struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
- for (i = 0; i < 4; ++i)
- put_unaligned_be32(x[3 - i], &out[i]);
+ return sm4_expandkey(ctx, in_key, key_len);
}
/* encrypt a block of text */
-void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
- const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+ const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
- sm4_do_crypt(ctx->rkey_enc, (u32 *)out, (u32 *)in);
+ sm4_crypt_block(ctx->rkey_enc, out, in);
}
-EXPORT_SYMBOL_GPL(crypto_sm4_encrypt);
/* decrypt a block of text */
-void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
- const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+ const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
- sm4_do_crypt(ctx->rkey_dec, (u32 *)out, (u32 *)in);
+ sm4_crypt_block(ctx->rkey_dec, out, in);
}
-EXPORT_SYMBOL_GPL(crypto_sm4_decrypt);
static struct crypto_alg sm4_alg = {
.cra_name = "sm4",
@@ -208,15 +60,15 @@ static struct crypto_alg sm4_alg = {
.cra_priority = 100,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = SM4_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_sm4_ctx),
+ .cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = SM4_KEY_SIZE,
.cia_max_keysize = SM4_KEY_SIZE,
- .cia_setkey = crypto_sm4_set_key,
- .cia_encrypt = crypto_sm4_encrypt,
- .cia_decrypt = crypto_sm4_decrypt
+ .cia_setkey = sm4_setkey,
+ .cia_encrypt = sm4_encrypt,
+ .cia_decrypt = sm4_decrypt
}
}
};
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index f8d06da78e4f..82b0400985a5 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -77,7 +77,7 @@ static const char *check[] = {
NULL
};
-static const int block_sizes[] = { 16, 64, 256, 1024, 1420, 4096, 0 };
+static const int block_sizes[] = { 16, 64, 128, 256, 1024, 1420, 4096, 0 };
static const int aead_sizes[] = { 16, 64, 256, 512, 1024, 1420, 4096, 8192, 0 };
#define XBUFSIZE 8
@@ -290,6 +290,11 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs,
}
ret = crypto_aead_setauthsize(tfm, authsize);
+ if (ret) {
+ pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo,
+ ret);
+ goto out_free_tfm;
+ }
for (i = 0; i < num_mb; ++i)
if (testmgr_alloc_buf(data[i].xbuf)) {
@@ -315,7 +320,7 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs,
for (i = 0; i < num_mb; ++i) {
data[i].req = aead_request_alloc(tfm, GFP_KERNEL);
if (!data[i].req) {
- pr_err("alg: skcipher: Failed to allocate request for %s\n",
+ pr_err("alg: aead: Failed to allocate request for %s\n",
algo);
while (i--)
aead_request_free(data[i].req);
@@ -567,13 +572,19 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
sgout = &sg[9];
tfm = crypto_alloc_aead(algo, 0, 0);
-
if (IS_ERR(tfm)) {
pr_err("alg: aead: Failed to load transform for %s: %ld\n", algo,
PTR_ERR(tfm));
goto out_notfm;
}
+ ret = crypto_aead_setauthsize(tfm, authsize);
+ if (ret) {
+ pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo,
+ ret);
+ goto out_noreq;
+ }
+
crypto_init_wait(&wait);
printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo,
get_driver_name(crypto_aead, tfm), e);
@@ -611,8 +622,13 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
break;
}
}
+
ret = crypto_aead_setkey(tfm, key, *keysize);
- ret = crypto_aead_setauthsize(tfm, authsize);
+ if (ret) {
+ pr_err("setkey() failed flags=%x: %d\n",
+ crypto_aead_get_flags(tfm), ret);
+ goto out;
+ }
iv_len = crypto_aead_ivsize(tfm);
if (iv_len)
@@ -622,15 +638,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ",
i, *keysize * 8, bs);
-
memset(tvmem[0], 0xff, PAGE_SIZE);
- if (ret) {
- pr_err("setkey() failed flags=%x\n",
- crypto_aead_get_flags(tfm));
- goto out;
- }
-
sg_init_aead(sg, xbuf, bs + (enc ? 0 : authsize),
assoc, aad_size);
@@ -1907,6 +1916,14 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
ret += tcrypt_test("streebog512");
break;
+ case 55:
+ ret += tcrypt_test("gcm(sm4)");
+ break;
+
+ case 56:
+ ret += tcrypt_test("ccm(sm4)");
+ break;
+
case 100:
ret += tcrypt_test("hmac(md5)");
break;
@@ -1998,6 +2015,15 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
case 157:
ret += tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))");
break;
+
+ case 158:
+ ret += tcrypt_test("cbcmac(sm4)");
+ break;
+
+ case 159:
+ ret += tcrypt_test("cmac(sm4)");
+ break;
+
case 181:
ret += tcrypt_test("authenc(hmac(sha1),cbc(des))");
break;
@@ -2031,6 +2057,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
case 191:
ret += tcrypt_test("ecb(sm4)");
ret += tcrypt_test("cbc(sm4)");
+ ret += tcrypt_test("cfb(sm4)");
ret += tcrypt_test("ctr(sm4)");
break;
case 200:
@@ -2289,6 +2316,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
speed_template_16);
test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
+ test_cipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_cipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0,
+ speed_template_16);
test_cipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
@@ -2322,6 +2353,34 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
NULL, 0, 16, 8, speed_template_16);
break;
+ case 222:
+ test_aead_speed("gcm(sm4)", ENCRYPT, sec,
+ NULL, 0, 16, 8, speed_template_16);
+ test_aead_speed("gcm(sm4)", DECRYPT, sec,
+ NULL, 0, 16, 8, speed_template_16);
+ break;
+
+ case 223:
+ test_aead_speed("rfc4309(ccm(sm4))", ENCRYPT, sec,
+ NULL, 0, 16, 16, aead_speed_template_19);
+ test_aead_speed("rfc4309(ccm(sm4))", DECRYPT, sec,
+ NULL, 0, 16, 16, aead_speed_template_19);
+ break;
+
+ case 224:
+ test_mb_aead_speed("gcm(sm4)", ENCRYPT, sec, NULL, 0, 16, 8,
+ speed_template_16, num_mb);
+ test_mb_aead_speed("gcm(sm4)", DECRYPT, sec, NULL, 0, 16, 8,
+ speed_template_16, num_mb);
+ break;
+
+ case 225:
+ test_mb_aead_speed("rfc4309(ccm(sm4))", ENCRYPT, sec, NULL, 0,
+ 16, 16, aead_speed_template_19, num_mb);
+ test_mb_aead_speed("rfc4309(ccm(sm4))", DECRYPT, sec, NULL, 0,
+ 16, 16, aead_speed_template_19, num_mb);
+ break;
+
case 300:
if (alg) {
test_hash_speed(alg, sec, generic_hash_speed_template);
@@ -2757,6 +2816,25 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
speed_template_8_32);
break;
+ case 518:
+ test_acipher_speed("ecb(sm4)", ENCRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_acipher_speed("ecb(sm4)", DECRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_acipher_speed("cbc(sm4)", ENCRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_acipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_acipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_acipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_acipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_acipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
+ speed_template_16);
+ break;
+
case 600:
test_mb_skcipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
speed_template_16_24_32, num_mb);
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index c978e41f11a1..70f69f0910c9 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4451,6 +4451,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.hash = __VECS(aes_cbcmac_tv_template)
}
}, {
+ .alg = "cbcmac(sm4)",
+ .test = alg_test_hash,
+ .suite = {
+ .hash = __VECS(sm4_cbcmac_tv_template)
+ }
+ }, {
.alg = "ccm(aes)",
.generic_driver = "ccm_base(ctr(aes-generic),cbcmac(aes-generic))",
.test = alg_test_aead,
@@ -4462,6 +4468,16 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}, {
+ .alg = "ccm(sm4)",
+ .generic_driver = "ccm_base(ctr(sm4-generic),cbcmac(sm4-generic))",
+ .test = alg_test_aead,
+ .suite = {
+ .aead = {
+ ____VECS(sm4_ccm_tv_template),
+ .einval_allowed = 1,
+ }
+ }
+ }, {
.alg = "cfb(aes)",
.test = alg_test_skcipher,
.fips_allowed = 1,
@@ -4495,6 +4511,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.hash = __VECS(des3_ede_cmac64_tv_template)
}
}, {
+ .alg = "cmac(sm4)",
+ .test = alg_test_hash,
+ .suite = {
+ .hash = __VECS(sm4_cmac128_tv_template)
+ }
+ }, {
.alg = "compress_null",
.test = alg_test_null,
}, {
@@ -4968,6 +4990,13 @@ static const struct alg_test_desc alg_test_descs[] = {
.aead = __VECS(aes_gcm_tv_template)
}
}, {
+ .alg = "gcm(sm4)",
+ .generic_driver = "gcm_base(ctr(sm4-generic),ghash-generic)",
+ .test = alg_test_aead,
+ .suite = {
+ .aead = __VECS(sm4_gcm_tv_template)
+ }
+ }, {
.alg = "ghash",
.test = alg_test_hash,
.fips_allowed = 1,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 3ed6ab34ab51..e6fca34b5b25 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -13328,6 +13328,154 @@ static const struct cipher_testvec sm4_cfb_tv_template[] = {
}
};
+static const struct aead_testvec sm4_gcm_tv_template[] = {
+ { /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.1 */
+ .key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+ "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+ .klen = 16,
+ .iv = "\x00\x00\x12\x34\x56\x78\x00\x00"
+ "\x00\x00\xAB\xCD",
+ .ptext = "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
+ "\xBB\xBB\xBB\xBB\xBB\xBB\xBB\xBB"
+ "\xCC\xCC\xCC\xCC\xCC\xCC\xCC\xCC"
+ "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
+ "\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
+ "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
+ "\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
+ "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA",
+ .plen = 64,
+ .assoc = "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
+ "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
+ "\xAB\xAD\xDA\xD2",
+ .alen = 20,
+ .ctext = "\x17\xF3\x99\xF0\x8C\x67\xD5\xEE"
+ "\x19\xD0\xDC\x99\x69\xC4\xBB\x7D"
+ "\x5F\xD4\x6F\xD3\x75\x64\x89\x06"
+ "\x91\x57\xB2\x82\xBB\x20\x07\x35"
+ "\xD8\x27\x10\xCA\x5C\x22\xF0\xCC"
+ "\xFA\x7C\xBF\x93\xD4\x96\xAC\x15"
+ "\xA5\x68\x34\xCB\xCF\x98\xC3\x97"
+ "\xB4\x02\x4A\x26\x91\x23\x3B\x8D"
+ "\x83\xDE\x35\x41\xE4\xC2\xB5\x81"
+ "\x77\xE0\x65\xA9\xBF\x7B\x62\xEC",
+ .clen = 80,
+ }
+};
+
+static const struct aead_testvec sm4_ccm_tv_template[] = {
+ { /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.2 */
+ .key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+ "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+ .klen = 16,
+ .iv = "\x02\x00\x00\x12\x34\x56\x78\x00"
+ "\x00\x00\x00\xAB\xCD\x00\x00\x00",
+ .ptext = "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
+ "\xBB\xBB\xBB\xBB\xBB\xBB\xBB\xBB"
+ "\xCC\xCC\xCC\xCC\xCC\xCC\xCC\xCC"
+ "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
+ "\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
+ "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
+ "\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
+ "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA",
+ .plen = 64,
+ .assoc = "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
+ "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
+ "\xAB\xAD\xDA\xD2",
+ .alen = 20,
+ .ctext = "\x48\xAF\x93\x50\x1F\xA6\x2A\xDB"
+ "\xCD\x41\x4C\xCE\x60\x34\xD8\x95"
+ "\xDD\xA1\xBF\x8F\x13\x2F\x04\x20"
+ "\x98\x66\x15\x72\xE7\x48\x30\x94"
+ "\xFD\x12\xE5\x18\xCE\x06\x2C\x98"
+ "\xAC\xEE\x28\xD9\x5D\xF4\x41\x6B"
+ "\xED\x31\xA2\xF0\x44\x76\xC1\x8B"
+ "\xB4\x0C\x84\xA7\x4B\x97\xDC\x5B"
+ "\x16\x84\x2D\x4F\xA1\x86\xF5\x6A"
+ "\xB3\x32\x56\x97\x1F\xA1\x10\xF4",
+ .clen = 80,
+ }
+};
+
+static const struct hash_testvec sm4_cbcmac_tv_template[] = {
+ {
+ .key = "\xff\xee\xdd\xcc\xbb\xaa\x99\x88"
+ "\x77\x66\x55\x44\x33\x22\x11\x00",
+ .plaintext = "\x01\x23\x45\x67\x89\xab\xcd\xef"
+ "\xfe\xdc\xba\x98\x76\x54\x32\x10",
+ .digest = "\x97\xb4\x75\x8f\x84\x92\x3d\x3f"
+ "\x86\x81\x0e\x0e\xea\x14\x6d\x73",
+ .psize = 16,
+ .ksize = 16,
+ }, {
+ .key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
+ "\xfe\xdc\xBA\x98\x76\x54\x32\x10",
+ .plaintext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb"
+ "\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xee",
+ .digest = "\xc7\xdb\x17\x71\xa1\x5c\x0d\x22"
+ "\xa3\x39\x3a\x31\x88\x91\x49\xa1",
+ .psize = 33,
+ .ksize = 16,
+ }, {
+ .key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
+ "\xfe\xdc\xBA\x98\x76\x54\x32\x10",
+ .plaintext = "\xfb\xd1\xbe\x92\x7e\x50\x3f\x16"
+ "\xf9\xdd\xbe\x91\x73\x53\x37\x1a"
+ "\xfe\xdd\xba\x97\x7e\x53\x3c\x1c"
+ "\xfe\xd7\xbf\x9c\x75\x5f\x3e\x11"
+ "\xf0\xd8\xbc\x96\x73\x5c\x34\x11"
+ "\xf5\xdb\xb1\x99\x7a\x5a\x32\x1f"
+ "\xf6\xdf\xb4\x95\x7f\x5f\x3b\x17"
+ "\xfd\xdb\xb1\x9b\x76\x5c\x37",
+ .digest = "\x9b\x07\x88\x7f\xd5\x95\x23\x12"
+ "\x64\x0a\x66\x7f\x4e\x25\xca\xd0",
+ .psize = 63,
+ .ksize = 16,
+ }
+};
+
+static const struct hash_testvec sm4_cmac128_tv_template[] = {
+ {
+ .key = "\xff\xee\xdd\xcc\xbb\xaa\x99\x88"
+ "\x77\x66\x55\x44\x33\x22\x11\x00",
+ .plaintext = "\x01\x23\x45\x67\x89\xab\xcd\xef"
+ "\xfe\xdc\xba\x98\x76\x54\x32\x10",
+ .digest = "\x00\xd4\x63\xb4\x9a\xf3\x52\xe2"
+ "\x74\xa9\x00\x55\x13\x54\x2a\xd1",
+ .psize = 16,
+ .ksize = 16,
+ }, {
+ .key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
+ "\xfe\xdc\xBA\x98\x76\x54\x32\x10",
+ .plaintext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb"
+ "\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xee",
+ .digest = "\x8a\x8a\xe9\xc0\xc8\x97\x0e\x85"
+ "\x21\x57\x02\x10\x1a\xbf\x9c\xc6",
+ .psize = 33,
+ .ksize = 16,
+ }, {
+ .key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
+ "\xfe\xdc\xBA\x98\x76\x54\x32\x10",
+ .plaintext = "\xfb\xd1\xbe\x92\x7e\x50\x3f\x16"
+ "\xf9\xdd\xbe\x91\x73\x53\x37\x1a"
+ "\xfe\xdd\xba\x97\x7e\x53\x3c\x1c"
+ "\xfe\xd7\xbf\x9c\x75\x5f\x3e\x11"
+ "\xf0\xd8\xbc\x96\x73\x5c\x34\x11"
+ "\xf5\xdb\xb1\x99\x7a\x5a\x32\x1f"
+ "\xf6\xdf\xb4\x95\x7f\x5f\x3b\x17"
+ "\xfd\xdb\xb1\x9b\x76\x5c\x37",
+ .digest = "\x5f\x14\xc9\xa9\x20\xb2\xb4\xf0"
+ "\x76\xe0\xd8\xd6\xdc\x4f\xe1\xbc",
+ .psize = 63,
+ .ksize = 16,
+ }
+};
+
/* Cast6 test vectors from RFC 2612 */
static const struct cipher_testvec cast6_tv_template[] = {
{
diff --git a/crypto/wp512.c b/crypto/wp512.c
index bf79fbb2340f..5e820afa3c78 100644
--- a/crypto/wp512.c
+++ b/crypto/wp512.c
@@ -775,7 +775,7 @@ static const u64 rc[WHIRLPOOL_ROUNDS] = {
0xca2dbf07ad5a8333ULL,
};
-/**
+/*
* The core Whirlpool transform.
*/
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 8bad63417a50..30d2db37cc87 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -51,8 +51,6 @@ source "drivers/net/Kconfig"
source "drivers/isdn/Kconfig"
-source "drivers/lightnvm/Kconfig"
-
# input before char - char/joystick depends on it. As does USB.
source "drivers/input/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 27c018bdf4de..be5d40ae1488 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -70,7 +70,6 @@ obj-$(CONFIG_FB_I810) += video/fbdev/i810/
obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
obj-$(CONFIG_PARPORT) += parport/
-obj-$(CONFIG_NVM) += lightnvm/
obj-y += base/ block/ misc/ mfd/ nfc/
obj-$(CONFIG_LIBNVDIMM) += nvdimm/
obj-$(CONFIG_DAX) += dax/
diff --git a/drivers/accessibility/speakup/i18n.c b/drivers/accessibility/speakup/i18n.c
index bc7b47d1876f..d62079b1661f 100644
--- a/drivers/accessibility/speakup/i18n.c
+++ b/drivers/accessibility/speakup/i18n.c
@@ -90,13 +90,13 @@ static char *speakup_default_msgs[MSG_LAST_INDEX] = {
[MSG_COLOR_YELLOW] = "yellow",
[MSG_COLOR_WHITE] = "white",
[MSG_COLOR_GREY] = "grey",
- [MSG_COLOR_BRIGHTBLUE] "bright blue",
- [MSG_COLOR_BRIGHTGREEN] "bright green",
- [MSG_COLOR_BRIGHTCYAN] "bright cyan",
- [MSG_COLOR_BRIGHTRED] "bright red",
- [MSG_COLOR_BRIGHTMAGENTA] "bright magenta",
- [MSG_COLOR_BRIGHTYELLOW] "bright yellow",
- [MSG_COLOR_BRIGHTWHITE] "bright white",
+ [MSG_COLOR_BRIGHTBLUE] = "bright blue",
+ [MSG_COLOR_BRIGHTGREEN] = "bright green",
+ [MSG_COLOR_BRIGHTCYAN] = "bright cyan",
+ [MSG_COLOR_BRIGHTRED] = "bright red",
+ [MSG_COLOR_BRIGHTMAGENTA] = "bright magenta",
+ [MSG_COLOR_BRIGHTYELLOW] = "bright yellow",
+ [MSG_COLOR_BRIGHTWHITE] = "bright white",
/* Names of key states. */
[MSG_STATE_DOUBLE] = "double",
diff --git a/drivers/accessibility/speakup/speakup_soft.c b/drivers/accessibility/speakup/speakup_soft.c
index c3f97c572fb6..19824e7006fe 100644
--- a/drivers/accessibility/speakup/speakup_soft.c
+++ b/drivers/accessibility/speakup/speakup_soft.c
@@ -153,18 +153,25 @@ static char *get_initstring(void)
static char buf[40];
char *cp;
struct var_t *var;
+ size_t len;
+ size_t n;
memset(buf, 0, sizeof(buf));
cp = buf;
+ len = sizeof(buf);
+
var = synth_soft.vars;
while (var->var_id != MAXVARS) {
if (var->var_id != CAPS_START && var->var_id != CAPS_STOP &&
- var->var_id != PAUSE && var->var_id != DIRECT)
- cp = cp + sprintf(cp, var->u.n.synth_fmt,
- var->u.n.value);
+ var->var_id != PAUSE && var->var_id != DIRECT) {
+ n = scnprintf(cp, len, var->u.n.synth_fmt,
+ var->u.n.value);
+ cp = cp + n;
+ len = len - n;
+ }
var++;
}
- cp = cp + sprintf(cp, "\n");
+ cp = cp + scnprintf(cp, len, "\n");
return buf;
}
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 8f9940f40baa..1da360c51d66 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -280,9 +280,9 @@ config ACPI_CPPC_LIB
config ACPI_PROCESSOR
tristate "Processor"
- depends on X86 || IA64 || ARM64
+ depends on X86 || IA64 || ARM64 || LOONGARCH
select ACPI_PROCESSOR_IDLE
- select ACPI_CPU_FREQ_PSS if X86 || IA64
+ select ACPI_CPU_FREQ_PSS if X86 || IA64 || LOONGARCH
default y
help
This driver adds support for the ACPI Processor package. It is required
diff --git a/drivers/acpi/acpi_configfs.c b/drivers/acpi/acpi_configfs.c
index 76b83b181356..c970792b11a4 100644
--- a/drivers/acpi/acpi_configfs.c
+++ b/drivers/acpi/acpi_configfs.c
@@ -70,7 +70,7 @@ static inline struct acpi_table_header *get_header(struct config_item *cfg)
if (!table->header)
pr_err("table not loaded\n");
- return table->header;
+ return table->header ?: ERR_PTR(-EINVAL);
}
static ssize_t acpi_table_aml_read(struct config_item *cfg,
@@ -78,8 +78,8 @@ static ssize_t acpi_table_aml_read(struct config_item *cfg,
{
struct acpi_table_header *h = get_header(cfg);
- if (!h)
- return -EINVAL;
+ if (IS_ERR(h))
+ return PTR_ERR(h);
if (data)
memcpy(data, h, h->length);
@@ -100,60 +100,60 @@ static ssize_t acpi_table_signature_show(struct config_item *cfg, char *str)
{
struct acpi_table_header *h = get_header(cfg);
- if (!h)
- return -EINVAL;
+ if (IS_ERR(h))
+ return PTR_ERR(h);
- return sprintf(str, "%.*s\n", ACPI_NAMESEG_SIZE, h->signature);
+ return sysfs_emit(str, "%.*s\n", ACPI_NAMESEG_SIZE, h->signature);
}
static ssize_t acpi_table_length_show(struct config_item *cfg, char *str)
{
struct acpi_table_header *h = get_header(cfg);
- if (!h)
- return -EINVAL;
+ if (IS_ERR(h))
+ return PTR_ERR(h);
- return sprintf(str, "%d\n", h->length);
+ return sysfs_emit(str, "%d\n", h->length);
}
static ssize_t acpi_table_revision_show(struct config_item *cfg, char *str)
{
struct acpi_table_header *h = get_header(cfg);
- if (!h)
- return -EINVAL;
+ if (IS_ERR(h))
+ return PTR_ERR(h);
- return sprintf(str, "%d\n", h->revision);
+ return sysfs_emit(str, "%d\n", h->revision);
}
static ssize_t acpi_table_oem_id_show(struct config_item *cfg, char *str)
{
struct acpi_table_header *h = get_header(cfg);
- if (!h)
- return -EINVAL;
+ if (IS_ERR(h))
+ return PTR_ERR(h);
- return sprintf(str, "%.*s\n", ACPI_OEM_ID_SIZE, h->oem_id);
+ return sysfs_emit(str, "%.*s\n", ACPI_OEM_ID_SIZE, h->oem_id);
}
static ssize_t acpi_table_oem_table_id_show(struct config_item *cfg, char *str)
{
struct acpi_table_header *h = get_header(cfg);
- if (!h)
- return -EINVAL;
+ if (IS_ERR(h))
+ return PTR_ERR(h);
- return sprintf(str, "%.*s\n", ACPI_OEM_TABLE_ID_SIZE, h->oem_table_id);
+ return sysfs_emit(str, "%.*s\n", ACPI_OEM_TABLE_ID_SIZE, h->oem_table_id);
}
static ssize_t acpi_table_oem_revision_show(struct config_item *cfg, char *str)
{
struct acpi_table_header *h = get_header(cfg);
- if (!h)
- return -EINVAL;
+ if (IS_ERR(h))
+ return PTR_ERR(h);
- return sprintf(str, "%d\n", h->oem_revision);
+ return sysfs_emit(str, "%d\n", h->oem_revision);
}
static ssize_t acpi_table_asl_compiler_id_show(struct config_item *cfg,
@@ -161,10 +161,10 @@ static ssize_t acpi_table_asl_compiler_id_show(struct config_item *cfg,
{
struct acpi_table_header *h = get_header(cfg);
- if (!h)
- return -EINVAL;
+ if (IS_ERR(h))
+ return PTR_ERR(h);
- return sprintf(str, "%.*s\n", ACPI_NAMESEG_SIZE, h->asl_compiler_id);
+ return sysfs_emit(str, "%.*s\n", ACPI_NAMESEG_SIZE, h->asl_compiler_id);
}
static ssize_t acpi_table_asl_compiler_revision_show(struct config_item *cfg,
@@ -172,10 +172,10 @@ static ssize_t acpi_table_asl_compiler_revision_show(struct config_item *cfg,
{
struct acpi_table_header *h = get_header(cfg);
- if (!h)
- return -EINVAL;
+ if (IS_ERR(h))
+ return PTR_ERR(h);
- return sprintf(str, "%d\n", h->asl_compiler_revision);
+ return sysfs_emit(str, "%d\n", h->asl_compiler_revision);
}
CONFIGFS_ATTR_RO(acpi_table_, signature);
diff --git a/drivers/acpi/acpi_fpdt.c b/drivers/acpi/acpi_fpdt.c
index 4ee2ad234e3d..6922a44b3ce7 100644
--- a/drivers/acpi/acpi_fpdt.c
+++ b/drivers/acpi/acpi_fpdt.c
@@ -220,8 +220,8 @@ static int fpdt_process_subtable(u64 address, u32 subtable_type)
break;
default:
- pr_err(FW_BUG "Invalid record %d found.\n", record_header->type);
- return -EINVAL;
+ /* Other types are reserved in ACPI 6.4 spec. */
+ break;
}
}
return 0;
@@ -254,8 +254,7 @@ static int __init acpi_init_fpdt(void)
subtable->type);
break;
default:
- pr_info(FW_BUG "Invalid subtable type %d found.\n",
- subtable->type);
+ /* Other types are reserved in ACPI 6.4 spec. */
break;
}
offset += sizeof(*subtable);
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
index df4adeb335b2..f45979aa2d64 100644
--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -249,12 +249,12 @@ static void set_power_saving_task_num(unsigned int num)
static void acpi_pad_idle_cpus(unsigned int num_cpus)
{
- get_online_cpus();
+ cpus_read_lock();
num_cpus = min_t(unsigned int, num_cpus, num_online_cpus());
set_power_saving_task_num(num_cpus);
- put_online_cpus();
+ cpus_read_unlock();
}
static uint32_t acpi_pad_idle_cpus_num(void)
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 2d5bd2a6ddce..6737b1cbf6d6 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -182,7 +182,7 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr)
return -ENODEV;
cpu_maps_update_begin();
- cpu_hotplug_begin();
+ cpus_write_lock();
ret = acpi_map_cpu(pr->handle, pr->phys_id, pr->acpi_id, &pr->id);
if (ret)
@@ -203,7 +203,7 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr)
pr->flags.need_hotplug_init = 1;
out:
- cpu_hotplug_done();
+ cpus_write_unlock();
cpu_maps_update_done();
return ret;
}
@@ -454,13 +454,13 @@ static void acpi_processor_remove(struct acpi_device *device)
per_cpu(processors, pr->id) = NULL;
cpu_maps_update_begin();
- cpu_hotplug_begin();
+ cpus_write_lock();
/* Remove the CPU. */
arch_unregister_cpu(pr->id);
acpi_unmap_cpu(pr->id);
- cpu_hotplug_done();
+ cpus_write_unlock();
cpu_maps_update_done();
try_offline_node(cpu_to_node(pr->id));
diff --git a/drivers/acpi/acpica/dswexec.c b/drivers/acpi/acpica/dswexec.c
index 41ba7773fd10..f2d2267054af 100644
--- a/drivers/acpi/acpica/dswexec.c
+++ b/drivers/acpi/acpica/dswexec.c
@@ -561,11 +561,10 @@ acpi_status acpi_ds_exec_end_op(struct acpi_walk_state *walk_state)
op->common.
node->object,
NULL);
- if ACPI_FAILURE
- (status) {
+ if (ACPI_FAILURE(status)) {
ACPI_EXCEPTION((AE_INFO, status,
"While writing to buffer field"));
- }
+ }
}
ACPI_FREE(namepath);
status = AE_OK;
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index b941555cb5e4..fa923a929224 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -498,24 +498,24 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
acpi_evaluate_ost(handle, type, ost_code, NULL);
}
-static void acpi_device_notify(acpi_handle handle, u32 event, void *data)
+static void acpi_notify_device(acpi_handle handle, u32 event, void *data)
{
struct acpi_device *device = data;
device->driver->ops.notify(device, event);
}
-static void acpi_device_notify_fixed(void *data)
+static void acpi_notify_device_fixed(void *data)
{
struct acpi_device *device = data;
/* Fixed hardware devices have no handles */
- acpi_device_notify(NULL, ACPI_FIXED_HARDWARE_EVENT, device);
+ acpi_notify_device(NULL, ACPI_FIXED_HARDWARE_EVENT, device);
}
static u32 acpi_device_fixed_event(void *data)
{
- acpi_os_execute(OSL_NOTIFY_HANDLER, acpi_device_notify_fixed, data);
+ acpi_os_execute(OSL_NOTIFY_HANDLER, acpi_notify_device_fixed, data);
return ACPI_INTERRUPT_HANDLED;
}
@@ -536,7 +536,7 @@ static int acpi_device_install_notify_handler(struct acpi_device *device)
else
status = acpi_install_notify_handler(device->handle,
ACPI_DEVICE_NOTIFY,
- acpi_device_notify,
+ acpi_notify_device,
device);
if (ACPI_FAILURE(status))
@@ -554,7 +554,7 @@ static void acpi_device_remove_notify_handler(struct acpi_device *device)
acpi_device_fixed_event);
else
acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY,
- acpi_device_notify);
+ acpi_notify_device);
}
/* Handle events targeting \_SB device (at present only graceful shutdown) */
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index f25bd336113b..1f9b9a4c38c7 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -79,6 +79,17 @@ static const struct dmi_system_id dmi_lid_quirks[] = {
},
{
/*
+ * Lenovo Yoga 9 14ITL5, initial notification of the LID device
+ * never happens.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "82BG"),
+ },
+ .driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_OPEN,
+ },
+ {
+ /*
* Medion Akoya E2215T, notification of the LID device only
* happens on close, not on open and _LID always returns closed.
*/
diff --git a/drivers/acpi/dptf/dptf_pch_fivr.c b/drivers/acpi/dptf/dptf_pch_fivr.c
index 550b9081fcbc..f4e9c2ef2f88 100644
--- a/drivers/acpi/dptf/dptf_pch_fivr.c
+++ b/drivers/acpi/dptf/dptf_pch_fivr.c
@@ -90,15 +90,24 @@ static ssize_t name##_store(struct device *dev,\
PCH_FIVR_SHOW(freq_mhz_low_clock, GFC0)
PCH_FIVR_SHOW(freq_mhz_high_clock, GFC1)
+PCH_FIVR_SHOW(ssc_clock_info, GEMI)
+PCH_FIVR_SHOW(fivr_switching_freq_mhz, GFCS)
+PCH_FIVR_SHOW(fivr_switching_fault_status, GFFS)
PCH_FIVR_STORE(freq_mhz_low_clock, RFC0)
PCH_FIVR_STORE(freq_mhz_high_clock, RFC1)
static DEVICE_ATTR_RW(freq_mhz_low_clock);
static DEVICE_ATTR_RW(freq_mhz_high_clock);
+static DEVICE_ATTR_RO(ssc_clock_info);
+static DEVICE_ATTR_RO(fivr_switching_freq_mhz);
+static DEVICE_ATTR_RO(fivr_switching_fault_status);
static struct attribute *fivr_attrs[] = {
&dev_attr_freq_mhz_low_clock.attr,
&dev_attr_freq_mhz_high_clock.attr,
+ &dev_attr_ssc_clock_info.attr,
+ &dev_attr_fivr_switching_freq_mhz.attr,
+ &dev_attr_fivr_switching_fault_status.attr,
NULL
};
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index fce3f3bba714..7a33a6d985f8 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -285,29 +285,27 @@ int acpi_unbind_one(struct device *dev)
}
EXPORT_SYMBOL_GPL(acpi_unbind_one);
-static int acpi_device_notify(struct device *dev)
+void acpi_device_notify(struct device *dev)
{
struct acpi_bus_type *type = acpi_get_bus_type(dev);
struct acpi_device *adev;
int ret;
ret = acpi_bind_one(dev, NULL);
- if (ret && type) {
- struct acpi_device *adev;
+ if (ret) {
+ if (!type)
+ goto err;
adev = type->find_companion(dev);
if (!adev) {
- pr_debug("Unable to get handle for %s\n", dev_name(dev));
- ret = -ENODEV;
- goto out;
+ dev_dbg(dev, "ACPI companion not found\n");
+ goto err;
}
ret = acpi_bind_one(dev, adev);
if (ret)
- goto out;
+ goto err;
}
adev = ACPI_COMPANION(dev);
- if (!adev)
- goto out;
if (dev_is_platform(dev))
acpi_configure_pmsi_domain(dev);
@@ -317,27 +315,22 @@ static int acpi_device_notify(struct device *dev)
else if (adev->handler && adev->handler->bind)
adev->handler->bind(dev);
- out:
- if (!ret) {
- struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ acpi_handle_debug(ACPI_HANDLE(dev), "Bound to device %s\n",
+ dev_name(dev));
- acpi_get_name(ACPI_HANDLE(dev), ACPI_FULL_PATHNAME, &buffer);
- pr_debug("Device %s -> %s\n", dev_name(dev), (char *)buffer.pointer);
- kfree(buffer.pointer);
- } else {
- pr_debug("Device %s -> No ACPI support\n", dev_name(dev));
- }
+ return;
- return ret;
+err:
+ dev_dbg(dev, "No ACPI support\n");
}
-static int acpi_device_notify_remove(struct device *dev)
+void acpi_device_notify_remove(struct device *dev)
{
struct acpi_device *adev = ACPI_COMPANION(dev);
struct acpi_bus_type *type;
if (!adev)
- return 0;
+ return;
type = acpi_get_bus_type(dev);
if (type && type->cleanup)
@@ -346,20 +339,4 @@ static int acpi_device_notify_remove(struct device *dev)
adev->handler->unbind(dev);
acpi_unbind_one(dev);
- return 0;
-}
-
-int acpi_platform_notify(struct device *dev, enum kobject_action action)
-{
- switch (action) {
- case KOBJ_ADD:
- acpi_device_notify(dev);
- break;
- case KOBJ_REMOVE:
- acpi_device_notify_remove(dev);
- break;
- default:
- break;
- }
- return 0;
}
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 23d9a09d7060..a3ef6cce644c 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -3021,6 +3021,9 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
struct nd_mapping_desc *mapping;
+ /* range index 0 == unmapped in SPA or invalid-SPA */
+ if (memdev->range_index == 0 || spa->range_index == 0)
+ continue;
if (memdev->range_index != spa->range_index)
continue;
if (count >= ND_MAX_MAPPINGS) {
diff --git a/drivers/acpi/numa/Kconfig b/drivers/acpi/numa/Kconfig
index fcf2e556d69d..39b1f34c21df 100644
--- a/drivers/acpi/numa/Kconfig
+++ b/drivers/acpi/numa/Kconfig
@@ -2,7 +2,7 @@
config ACPI_NUMA
bool "NUMA support"
depends on NUMA
- depends on (X86 || IA64 || ARM64)
+ depends on (X86 || IA64 || ARM64 || LOONGARCH)
default y if IA64 || ARM64
config ACPI_HMAT
diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c
index 6021a1013442..b8795fc49097 100644
--- a/drivers/acpi/numa/srat.c
+++ b/drivers/acpi/numa/srat.c
@@ -206,7 +206,7 @@ int __init srat_disabled(void)
return acpi_numa < 0;
}
-#if defined(CONFIG_X86) || defined(CONFIG_ARM64)
+#if defined(CONFIG_X86) || defined(CONFIG_ARM64) || defined(CONFIG_LOONGARCH)
/*
* Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for
* I/O localities since SRAT does not list them. I/O localities are
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 45c5c0e45e33..a43f1521efe6 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -284,7 +284,8 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size)
#define should_use_kmap(pfn) page_is_ram(pfn)
#endif
-static void __iomem *acpi_map(acpi_physical_address pg_off, unsigned long pg_sz)
+static void __iomem *acpi_map(acpi_physical_address pg_off, unsigned long pg_sz,
+ bool memory)
{
unsigned long pfn;
@@ -294,7 +295,8 @@ static void __iomem *acpi_map(acpi_physical_address pg_off, unsigned long pg_sz)
return NULL;
return (void __iomem __force *)kmap(pfn_to_page(pfn));
} else
- return acpi_os_ioremap(pg_off, pg_sz);
+ return memory ? acpi_os_memmap(pg_off, pg_sz) :
+ acpi_os_ioremap(pg_off, pg_sz);
}
static void acpi_unmap(acpi_physical_address pg_off, void __iomem *vaddr)
@@ -309,9 +311,10 @@ static void acpi_unmap(acpi_physical_address pg_off, void __iomem *vaddr)
}
/**
- * acpi_os_map_iomem - Get a virtual address for a given physical address range.
+ * __acpi_os_map_iomem - Get a virtual address for a given physical address range.
* @phys: Start of the physical address range to map.
* @size: Size of the physical address range to map.
+ * @memory: true if remapping memory, false if IO
*
* Look up the given physical address range in the list of existing ACPI memory
* mappings. If found, get a reference to it and return a pointer to it (its
@@ -321,8 +324,8 @@ static void acpi_unmap(acpi_physical_address pg_off, void __iomem *vaddr)
* During early init (when acpi_permanent_mmap has not been set yet) this
* routine simply calls __acpi_map_table() to get the job done.
*/
-void __iomem __ref
-*acpi_os_map_iomem(acpi_physical_address phys, acpi_size size)
+static void __iomem __ref
+*__acpi_os_map_iomem(acpi_physical_address phys, acpi_size size, bool memory)
{
struct acpi_ioremap *map;
void __iomem *virt;
@@ -353,7 +356,7 @@ void __iomem __ref
pg_off = round_down(phys, PAGE_SIZE);
pg_sz = round_up(phys + size, PAGE_SIZE) - pg_off;
- virt = acpi_map(phys, size);
+ virt = acpi_map(phys, size, memory);
if (!virt) {
mutex_unlock(&acpi_ioremap_lock);
kfree(map);
@@ -372,11 +375,17 @@ out:
mutex_unlock(&acpi_ioremap_lock);
return map->virt + (phys - map->phys);
}
+
+void __iomem *__ref
+acpi_os_map_iomem(acpi_physical_address phys, acpi_size size)
+{
+ return __acpi_os_map_iomem(phys, size, false);
+}
EXPORT_SYMBOL_GPL(acpi_os_map_iomem);
void *__ref acpi_os_map_memory(acpi_physical_address phys, acpi_size size)
{
- return (void *)acpi_os_map_iomem(phys, size);
+ return (void *)__acpi_os_map_iomem(phys, size, true);
}
EXPORT_SYMBOL_GPL(acpi_os_map_memory);
diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c
index dd2fbf38e414..d418462ab791 100644
--- a/drivers/acpi/platform_profile.c
+++ b/drivers/acpi/platform_profile.c
@@ -106,6 +106,9 @@ static ssize_t platform_profile_store(struct device *dev,
}
err = cur_profile->profile_set(cur_profile, i);
+ if (!err)
+ sysfs_notify(acpi_kobj, NULL, "platform_profile");
+
mutex_unlock(&profile_lock);
if (err)
return err;
diff --git a/drivers/acpi/pmic/intel_pmic_xpower.c b/drivers/acpi/pmic/intel_pmic_xpower.c
index a091d5a8392c..cbe08e600fa3 100644
--- a/drivers/acpi/pmic/intel_pmic_xpower.c
+++ b/drivers/acpi/pmic/intel_pmic_xpower.c
@@ -178,15 +178,17 @@ static int intel_xpower_pmic_update_power(struct regmap *regmap, int reg,
{
int data, ret;
- /* GPIO1 LDO regulator needs special handling */
- if (reg == XPOWER_GPI1_CTRL)
- return regmap_update_bits(regmap, reg, GPI1_LDO_MASK,
- on ? GPI1_LDO_ON : GPI1_LDO_OFF);
-
ret = iosf_mbi_block_punit_i2c_access();
if (ret)
return ret;
+ /* GPIO1 LDO regulator needs special handling */
+ if (reg == XPOWER_GPI1_CTRL) {
+ ret = regmap_update_bits(regmap, reg, GPI1_LDO_MASK,
+ on ? GPI1_LDO_ON : GPI1_LDO_OFF);
+ goto out;
+ }
+
if (regmap_read(regmap, reg, &data)) {
ret = -EIO;
goto out;
@@ -234,6 +236,11 @@ static int intel_xpower_pmic_get_raw_temp(struct regmap *regmap, int reg)
return ret;
if (adc_ts_pin_ctrl & AXP288_ADC_TS_CURRENT_ON_OFF_MASK) {
+ /*
+ * AXP288_ADC_TS_PIN_CTRL reads are cached by the regmap, so
+ * this does to a single I2C-transfer, and thus there is no
+ * need to explicitly call iosf_mbi_block_punit_i2c_access().
+ */
ret = regmap_update_bits(regmap, AXP288_ADC_TS_PIN_CTRL,
AXP288_ADC_TS_CURRENT_ON_OFF_MASK,
AXP288_ADC_TS_CURRENT_ON_ONDEMAND);
@@ -244,6 +251,10 @@ static int intel_xpower_pmic_get_raw_temp(struct regmap *regmap, int reg)
usleep_range(6000, 10000);
}
+ ret = iosf_mbi_block_punit_i2c_access();
+ if (ret)
+ return ret;
+
ret = regmap_bulk_read(regmap, AXP288_GP_ADC_H, buf, 2);
if (ret == 0)
ret = (buf[0] << 4) + ((buf[1] >> 4) & 0x0f);
@@ -254,6 +265,31 @@ static int intel_xpower_pmic_get_raw_temp(struct regmap *regmap, int reg)
AXP288_ADC_TS_CURRENT_ON);
}
+ iosf_mbi_unblock_punit_i2c_access();
+
+ return ret;
+}
+
+static int intel_xpower_exec_mipi_pmic_seq_element(struct regmap *regmap,
+ u16 i2c_address, u32 reg_address,
+ u32 value, u32 mask)
+{
+ int ret;
+
+ if (i2c_address != 0x34) {
+ pr_err("%s: Unexpected i2c-addr: 0x%02x (reg-addr 0x%x value 0x%x mask 0x%x)\n",
+ __func__, i2c_address, reg_address, value, mask);
+ return -ENXIO;
+ }
+
+ ret = iosf_mbi_block_punit_i2c_access();
+ if (ret)
+ return ret;
+
+ ret = regmap_update_bits(regmap, reg_address, mask, value);
+
+ iosf_mbi_unblock_punit_i2c_access();
+
return ret;
}
@@ -261,6 +297,7 @@ static struct intel_pmic_opregion_data intel_xpower_pmic_opregion_data = {
.get_power = intel_xpower_pmic_get_power,
.update_power = intel_xpower_pmic_update_power,
.get_raw_temp = intel_xpower_pmic_get_raw_temp,
+ .exec_mipi_pmic_seq_element = intel_xpower_exec_mipi_pmic_seq_element,
.power_table = power_table,
.power_table_count = ARRAY_SIZE(power_table),
.thermal_table = thermal_table,
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index eba7785047ca..b9863e22b952 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -48,7 +48,6 @@ struct acpi_power_dependent_device {
struct acpi_power_resource {
struct acpi_device device;
struct list_head list_node;
- char *name;
u32 system_level;
u32 order;
unsigned int ref_count;
@@ -70,6 +69,11 @@ static DEFINE_MUTEX(power_resource_list_lock);
Power Resource Management
-------------------------------------------------------------------------- */
+static inline const char *resource_dev_name(struct acpi_power_resource *pr)
+{
+ return dev_name(&pr->device.dev);
+}
+
static inline
struct acpi_power_resource *to_power_resource(struct acpi_device *device)
{
@@ -264,7 +268,8 @@ acpi_power_resource_add_dependent(struct acpi_power_resource *resource,
dep->dev = dev;
list_add_tail(&dep->node, &resource->dependents);
- dev_dbg(dev, "added power dependency to [%s]\n", resource->name);
+ dev_dbg(dev, "added power dependency to [%s]\n",
+ resource_dev_name(resource));
unlock:
mutex_unlock(&resource->resource_lock);
@@ -283,7 +288,7 @@ acpi_power_resource_remove_dependent(struct acpi_power_resource *resource,
list_del(&dep->node);
kfree(dep);
dev_dbg(dev, "removed power dependency to [%s]\n",
- resource->name);
+ resource_dev_name(resource));
break;
}
}
@@ -356,10 +361,11 @@ void acpi_device_power_remove_dependent(struct acpi_device *adev,
static int __acpi_power_on(struct acpi_power_resource *resource)
{
+ acpi_handle handle = resource->device.handle;
struct acpi_power_dependent_device *dep;
acpi_status status = AE_OK;
- status = acpi_evaluate_object(resource->device.handle, "_ON", NULL, NULL);
+ status = acpi_evaluate_object(handle, "_ON", NULL, NULL);
if (ACPI_FAILURE(status)) {
resource->state = ACPI_POWER_RESOURCE_STATE_UNKNOWN;
return -ENODEV;
@@ -367,7 +373,7 @@ static int __acpi_power_on(struct acpi_power_resource *resource)
resource->state = ACPI_POWER_RESOURCE_STATE_ON;
- pr_debug("Power resource [%s] turned on\n", resource->name);
+ acpi_handle_debug(handle, "Power resource turned on\n");
/*
* If there are other dependents on this power resource we need to
@@ -380,7 +386,7 @@ static int __acpi_power_on(struct acpi_power_resource *resource)
list_for_each_entry(dep, &resource->dependents, node) {
dev_dbg(dep->dev, "runtime resuming because [%s] turned on\n",
- resource->name);
+ resource_dev_name(resource));
pm_request_resume(dep->dev);
}
@@ -392,7 +398,8 @@ static int acpi_power_on_unlocked(struct acpi_power_resource *resource)
int result = 0;
if (resource->ref_count++) {
- pr_debug("Power resource [%s] already on\n", resource->name);
+ acpi_handle_debug(resource->device.handle,
+ "Power resource already on\n");
} else {
result = __acpi_power_on(resource);
if (result)
@@ -413,10 +420,10 @@ static int acpi_power_on(struct acpi_power_resource *resource)
static int __acpi_power_off(struct acpi_power_resource *resource)
{
+ acpi_handle handle = resource->device.handle;
acpi_status status;
- status = acpi_evaluate_object(resource->device.handle, "_OFF",
- NULL, NULL);
+ status = acpi_evaluate_object(handle, "_OFF", NULL, NULL);
if (ACPI_FAILURE(status)) {
resource->state = ACPI_POWER_RESOURCE_STATE_UNKNOWN;
return -ENODEV;
@@ -424,7 +431,7 @@ static int __acpi_power_off(struct acpi_power_resource *resource)
resource->state = ACPI_POWER_RESOURCE_STATE_OFF;
- pr_debug("Power resource [%s] turned off\n", resource->name);
+ acpi_handle_debug(handle, "Power resource turned off\n");
return 0;
}
@@ -434,12 +441,14 @@ static int acpi_power_off_unlocked(struct acpi_power_resource *resource)
int result = 0;
if (!resource->ref_count) {
- pr_debug("Power resource [%s] already off\n", resource->name);
+ acpi_handle_debug(resource->device.handle,
+ "Power resource already off\n");
return 0;
}
if (--resource->ref_count) {
- pr_debug("Power resource [%s] still in use\n", resource->name);
+ acpi_handle_debug(resource->device.handle,
+ "Power resource still in use\n");
} else {
result = __acpi_power_off(resource);
if (result)
@@ -949,7 +958,6 @@ struct acpi_device *acpi_add_power_resource(acpi_handle handle)
mutex_init(&resource->resource_lock);
INIT_LIST_HEAD(&resource->list_node);
INIT_LIST_HEAD(&resource->dependents);
- resource->name = device->pnp.bus_id;
strcpy(acpi_device_name(device), ACPI_POWER_DEVICE_NAME);
strcpy(acpi_device_class(device), ACPI_POWER_CLASS);
device->power.state = ACPI_STATE_UNKNOWN;
@@ -1004,7 +1012,7 @@ void acpi_resume_power_resources(void)
if (state == ACPI_POWER_RESOURCE_STATE_OFF
&& resource->ref_count) {
- dev_dbg(&resource->device.dev, "Turning ON\n");
+ acpi_handle_debug(resource->device.handle, "Turning ON\n");
__acpi_power_on(resource);
}
@@ -1034,7 +1042,7 @@ void acpi_turn_off_unused_power_resources(void)
*/
if (!resource->ref_count &&
resource->state != ACPI_POWER_RESOURCE_STATE_OFF) {
- dev_dbg(&resource->device.dev, "Turning OFF\n");
+ acpi_handle_debug(resource->device.handle, "Turning OFF\n");
__acpi_power_off(resource);
}
diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c
index 31cf9aee5edd..1f6007abcf18 100644
--- a/drivers/acpi/prmt.c
+++ b/drivers/acpi/prmt.c
@@ -292,6 +292,12 @@ void __init init_prmt(void)
int mc = acpi_table_parse_entries(ACPI_SIG_PRMT, sizeof(struct acpi_table_prmt) +
sizeof (struct acpi_table_prmt_header),
0, acpi_parse_prmt, 0);
+ /*
+ * Return immediately if PRMT table is not present or no PRM module found.
+ */
+ if (mc <= 0)
+ return;
+
pr_info("PRM: found %u modules\n", mc);
status = acpi_install_address_space_handler(ACPI_ROOT_OBJECT,
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 095c8aca141e..f37fba9e5ba0 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -1301,7 +1301,7 @@ int acpi_processor_power_state_has_changed(struct acpi_processor *pr)
if (pr->id == 0 && cpuidle_get_driver() == &acpi_idle_driver) {
/* Protect against cpu-hotplug */
- get_online_cpus();
+ cpus_read_lock();
cpuidle_pause_and_lock();
/* Disable all cpuidle devices */
@@ -1330,7 +1330,7 @@ int acpi_processor_power_state_has_changed(struct acpi_processor *pr)
}
}
cpuidle_resume_and_unlock();
- put_online_cpus();
+ cpus_read_unlock();
}
return 0;
diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c
index 88460bacd5ae..25c2d0be953e 100644
--- a/drivers/acpi/spcr.c
+++ b/drivers/acpi/spcr.c
@@ -136,6 +136,7 @@ int __init acpi_parse_spcr(bool enable_earlycon, bool enable_console)
break;
case ACPI_DBG2_16550_COMPATIBLE:
case ACPI_DBG2_16550_SUBSET:
+ case ACPI_DBG2_16550_WITH_GAS:
uart = "uart";
break;
default:
diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index fbdbef0ab552..3a308461246a 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -452,7 +452,7 @@ int acpi_s2idle_prepare_late(void)
if (lps0_dsm_func_mask_microsoft > 0) {
acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF,
lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
- acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_EXIT,
+ acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_ENTRY,
lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY,
lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
@@ -479,7 +479,7 @@ void acpi_s2idle_restore_early(void)
if (lps0_dsm_func_mask_microsoft > 0) {
acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT,
lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
- acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_ENTRY,
+ acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_EXIT,
lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON,
lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index bcec598b89f2..d9030cb6b1e4 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2547,8 +2547,8 @@ static void binder_transaction(struct binder_proc *proc,
ref->node, &target_proc,
&return_error);
} else {
- binder_user_error("%d:%d got transaction to invalid handle\n",
- proc->pid, thread->pid);
+ binder_user_error("%d:%d got transaction to invalid handle, %u\n",
+ proc->pid, thread->pid, tr->target.handle);
return_error = BR_FAILED_REPLY;
}
binder_proc_unlock(proc);
diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c
index e80ba93c62a9..e3605cdd4335 100644
--- a/drivers/android/binderfs.c
+++ b/drivers/android/binderfs.c
@@ -58,6 +58,10 @@ enum binderfs_stats_mode {
binderfs_stats_mode_global,
};
+struct binder_features {
+ bool oneway_spam_detection;
+};
+
static const struct constant_table binderfs_param_stats[] = {
{ "global", binderfs_stats_mode_global },
{}
@@ -69,6 +73,10 @@ static const struct fs_parameter_spec binderfs_fs_parameters[] = {
{}
};
+static struct binder_features binder_features = {
+ .oneway_spam_detection = true,
+};
+
static inline struct binderfs_info *BINDERFS_SB(const struct super_block *sb)
{
return sb->s_fs_info;
@@ -583,6 +591,33 @@ out:
return dentry;
}
+static int binder_features_show(struct seq_file *m, void *unused)
+{
+ bool *feature = m->private;
+
+ seq_printf(m, "%d\n", *feature);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(binder_features);
+
+static int init_binder_features(struct super_block *sb)
+{
+ struct dentry *dentry, *dir;
+
+ dir = binderfs_create_dir(sb->s_root, "features");
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
+ dentry = binderfs_create_file(dir, "oneway_spam_detection",
+ &binder_features_fops,
+ &binder_features.oneway_spam_detection);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ return 0;
+}
+
static int init_binder_logs(struct super_block *sb)
{
struct dentry *binder_logs_root_dir, *dentry, *proc_log_dir;
@@ -723,6 +758,10 @@ static int binderfs_fill_super(struct super_block *sb, struct fs_context *fc)
name++;
}
+ ret = init_binder_features(sb);
+ if (ret)
+ return ret;
+
if (info->mount_opts.stats_mode == binderfs_stats_mode_global)
return init_binder_logs(sb);
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index fec2e9754aed..5b3fa2cbe722 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -125,6 +125,7 @@ EXPORT_SYMBOL_GPL(ahci_shost_attrs);
struct device_attribute *ahci_sdev_attrs[] = {
&dev_attr_sw_activity,
&dev_attr_unload_heads,
+ &dev_attr_ncq_prio_supported,
&dev_attr_ncq_prio_enable,
NULL
};
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 61c762961ca8..b8459c54f739 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -159,6 +159,12 @@ MODULE_DESCRIPTION("Library module for ATA devices");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);
+static inline bool ata_dev_print_info(struct ata_device *dev)
+{
+ struct ata_eh_context *ehc = &dev->link->eh_context;
+
+ return ehc->i.flags & ATA_EHI_PRINTINFO;
+}
static bool ata_sstatus_online(u32 sstatus)
{
@@ -706,11 +712,9 @@ int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
if (tf->flags & ATA_TFLAG_FUA)
tf->device |= 1 << 7;
- if (dev->flags & ATA_DFLAG_NCQ_PRIO) {
- if (class == IOPRIO_CLASS_RT)
- tf->hob_nsect |= ATA_PRIO_HIGH <<
- ATA_SHIFT_PRIO;
- }
+ if (dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLE &&
+ class == IOPRIO_CLASS_RT)
+ tf->hob_nsect |= ATA_PRIO_HIGH << ATA_SHIFT_PRIO;
} else if (dev->flags & ATA_DFLAG_LBA) {
tf->flags |= ATA_TFLAG_LBA;
@@ -1266,8 +1270,7 @@ static int ata_set_max_sectors(struct ata_device *dev, u64 new_sectors)
*/
static int ata_hpa_resize(struct ata_device *dev)
{
- struct ata_eh_context *ehc = &dev->link->eh_context;
- int print_info = ehc->i.flags & ATA_EHI_PRINTINFO;
+ bool print_info = ata_dev_print_info(dev);
bool unlock_hpa = ata_ignore_hpa || dev->flags & ATA_DFLAG_UNLOCK_HPA;
u64 sectors = ata_id_n_sectors(dev->id);
u64 native_sectors;
@@ -2023,13 +2026,15 @@ retry:
err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE,
buf, sectors * ATA_SECT_SIZE, 0);
- if (err_mask && dma) {
- dev->horkage |= ATA_HORKAGE_NO_DMA_LOG;
- ata_dev_warn(dev, "READ LOG DMA EXT failed, trying PIO\n");
- goto retry;
+ if (err_mask) {
+ if (dma) {
+ dev->horkage |= ATA_HORKAGE_NO_DMA_LOG;
+ goto retry;
+ }
+ ata_dev_err(dev, "Read log page 0x%02x failed, Emask 0x%x\n",
+ (unsigned int)page, err_mask);
}
- DPRINTK("EXIT, err_mask=%x\n", err_mask);
return err_mask;
}
@@ -2058,12 +2063,8 @@ static bool ata_identify_page_supported(struct ata_device *dev, u8 page)
*/
err = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE, 0, ap->sector_buf,
1);
- if (err) {
- ata_dev_info(dev,
- "failed to get Device Identify Log Emask 0x%x\n",
- err);
+ if (err)
return false;
- }
for (i = 0; i < ap->sector_buf[8]; i++) {
if (ap->sector_buf[9 + i] == page)
@@ -2127,11 +2128,7 @@ static void ata_dev_config_ncq_send_recv(struct ata_device *dev)
}
err_mask = ata_read_log_page(dev, ATA_LOG_NCQ_SEND_RECV,
0, ap->sector_buf, 1);
- if (err_mask) {
- ata_dev_dbg(dev,
- "failed to get NCQ Send/Recv Log Emask 0x%x\n",
- err_mask);
- } else {
+ if (!err_mask) {
u8 *cmds = dev->ncq_send_recv_cmds;
dev->flags |= ATA_DFLAG_NCQ_SEND_RECV;
@@ -2157,11 +2154,7 @@ static void ata_dev_config_ncq_non_data(struct ata_device *dev)
}
err_mask = ata_read_log_page(dev, ATA_LOG_NCQ_NON_DATA,
0, ap->sector_buf, 1);
- if (err_mask) {
- ata_dev_dbg(dev,
- "failed to get NCQ Non-Data Log Emask 0x%x\n",
- err_mask);
- } else {
+ if (!err_mask) {
u8 *cmds = dev->ncq_non_data_cmds;
memcpy(cmds, ap->sector_buf, ATA_LOG_NCQ_NON_DATA_SIZE);
@@ -2173,30 +2166,24 @@ static void ata_dev_config_ncq_prio(struct ata_device *dev)
struct ata_port *ap = dev->link->ap;
unsigned int err_mask;
- if (!(dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLE)) {
- dev->flags &= ~ATA_DFLAG_NCQ_PRIO;
- return;
- }
-
err_mask = ata_read_log_page(dev,
ATA_LOG_IDENTIFY_DEVICE,
ATA_LOG_SATA_SETTINGS,
ap->sector_buf,
1);
- if (err_mask) {
- ata_dev_dbg(dev,
- "failed to get Identify Device data, Emask 0x%x\n",
- err_mask);
- return;
- }
+ if (err_mask)
+ goto not_supported;
- if (ap->sector_buf[ATA_LOG_NCQ_PRIO_OFFSET] & BIT(3)) {
- dev->flags |= ATA_DFLAG_NCQ_PRIO;
- } else {
- dev->flags &= ~ATA_DFLAG_NCQ_PRIO;
- ata_dev_dbg(dev, "SATA page does not support priority\n");
- }
+ if (!(ap->sector_buf[ATA_LOG_NCQ_PRIO_OFFSET] & BIT(3)))
+ goto not_supported;
+
+ dev->flags |= ATA_DFLAG_NCQ_PRIO;
+
+ return;
+not_supported:
+ dev->flags &= ~ATA_DFLAG_NCQ_PRIO_ENABLE;
+ dev->flags &= ~ATA_DFLAG_NCQ_PRIO;
}
static int ata_dev_config_ncq(struct ata_device *dev,
@@ -2346,11 +2333,8 @@ static void ata_dev_config_trusted(struct ata_device *dev)
err = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE, ATA_LOG_SECURITY,
ap->sector_buf, 1);
- if (err) {
- ata_dev_dbg(dev,
- "failed to read Security Log, Emask 0x%x\n", err);
+ if (err)
return;
- }
trusted_cap = get_unaligned_le64(&ap->sector_buf[40]);
if (!(trusted_cap & (1ULL << 63))) {
@@ -2363,6 +2347,106 @@ static void ata_dev_config_trusted(struct ata_device *dev)
dev->flags |= ATA_DFLAG_TRUSTED;
}
+static int ata_dev_config_lba(struct ata_device *dev)
+{
+ struct ata_port *ap = dev->link->ap;
+ const u16 *id = dev->id;
+ const char *lba_desc;
+ char ncq_desc[24];
+ int ret;
+
+ dev->flags |= ATA_DFLAG_LBA;
+
+ if (ata_id_has_lba48(id)) {
+ lba_desc = "LBA48";
+ dev->flags |= ATA_DFLAG_LBA48;
+ if (dev->n_sectors >= (1UL << 28) &&
+ ata_id_has_flush_ext(id))
+ dev->flags |= ATA_DFLAG_FLUSH_EXT;
+ } else {
+ lba_desc = "LBA";
+ }
+
+ /* config NCQ */
+ ret = ata_dev_config_ncq(dev, ncq_desc, sizeof(ncq_desc));
+
+ /* print device info to dmesg */
+ if (ata_msg_drv(ap) && ata_dev_print_info(dev))
+ ata_dev_info(dev,
+ "%llu sectors, multi %u: %s %s\n",
+ (unsigned long long)dev->n_sectors,
+ dev->multi_count, lba_desc, ncq_desc);
+
+ return ret;
+}
+
+static void ata_dev_config_chs(struct ata_device *dev)
+{
+ struct ata_port *ap = dev->link->ap;
+ const u16 *id = dev->id;
+
+ if (ata_id_current_chs_valid(id)) {
+ /* Current CHS translation is valid. */
+ dev->cylinders = id[54];
+ dev->heads = id[55];
+ dev->sectors = id[56];
+ } else {
+ /* Default translation */
+ dev->cylinders = id[1];
+ dev->heads = id[3];
+ dev->sectors = id[6];
+ }
+
+ /* print device info to dmesg */
+ if (ata_msg_drv(ap) && ata_dev_print_info(dev))
+ ata_dev_info(dev,
+ "%llu sectors, multi %u, CHS %u/%u/%u\n",
+ (unsigned long long)dev->n_sectors,
+ dev->multi_count, dev->cylinders,
+ dev->heads, dev->sectors);
+}
+
+static void ata_dev_config_devslp(struct ata_device *dev)
+{
+ u8 *sata_setting = dev->link->ap->sector_buf;
+ unsigned int err_mask;
+ int i, j;
+
+ /*
+ * Check device sleep capability. Get DevSlp timing variables
+ * from SATA Settings page of Identify Device Data Log.
+ */
+ if (!ata_id_has_devslp(dev->id))
+ return;
+
+ err_mask = ata_read_log_page(dev,
+ ATA_LOG_IDENTIFY_DEVICE,
+ ATA_LOG_SATA_SETTINGS,
+ sata_setting, 1);
+ if (err_mask)
+ return;
+
+ dev->flags |= ATA_DFLAG_DEVSLP;
+ for (i = 0; i < ATA_LOG_DEVSLP_SIZE; i++) {
+ j = ATA_LOG_DEVSLP_OFFSET + i;
+ dev->devslp_timing[i] = sata_setting[j];
+ }
+}
+
+static void ata_dev_print_features(struct ata_device *dev)
+{
+ if (!(dev->flags & ATA_DFLAG_FEATURES_MASK))
+ return;
+
+ ata_dev_info(dev,
+ "Features:%s%s%s%s%s\n",
+ dev->flags & ATA_DFLAG_TRUSTED ? " Trust" : "",
+ dev->flags & ATA_DFLAG_DA ? " Dev-Attention" : "",
+ dev->flags & ATA_DFLAG_DEVSLP ? " Dev-Sleep" : "",
+ dev->flags & ATA_DFLAG_NCQ_SEND_RECV ? " NCQ-sndrcv" : "",
+ dev->flags & ATA_DFLAG_NCQ_PRIO ? " NCQ-prio" : "");
+}
+
/**
* ata_dev_configure - Configure the specified ATA/ATAPI device
* @dev: Target device to configure
@@ -2379,8 +2463,7 @@ static void ata_dev_config_trusted(struct ata_device *dev)
int ata_dev_configure(struct ata_device *dev)
{
struct ata_port *ap = dev->link->ap;
- struct ata_eh_context *ehc = &dev->link->eh_context;
- int print_info = ehc->i.flags & ATA_EHI_PRINTINFO;
+ bool print_info = ata_dev_print_info(dev);
const u16 *id = dev->id;
unsigned long xfer_mask;
unsigned int err_mask;
@@ -2507,91 +2590,28 @@ int ata_dev_configure(struct ata_device *dev)
dev->multi_count = cnt;
}
- if (ata_id_has_lba(id)) {
- const char *lba_desc;
- char ncq_desc[24];
-
- lba_desc = "LBA";
- dev->flags |= ATA_DFLAG_LBA;
- if (ata_id_has_lba48(id)) {
- dev->flags |= ATA_DFLAG_LBA48;
- lba_desc = "LBA48";
-
- if (dev->n_sectors >= (1UL << 28) &&
- ata_id_has_flush_ext(id))
- dev->flags |= ATA_DFLAG_FLUSH_EXT;
- }
+ /* print device info to dmesg */
+ if (ata_msg_drv(ap) && print_info)
+ ata_dev_info(dev, "%s: %s, %s, max %s\n",
+ revbuf, modelbuf, fwrevbuf,
+ ata_mode_string(xfer_mask));
- /* config NCQ */
- rc = ata_dev_config_ncq(dev, ncq_desc, sizeof(ncq_desc));
+ if (ata_id_has_lba(id)) {
+ rc = ata_dev_config_lba(dev);
if (rc)
return rc;
-
- /* print device info to dmesg */
- if (ata_msg_drv(ap) && print_info) {
- ata_dev_info(dev, "%s: %s, %s, max %s\n",
- revbuf, modelbuf, fwrevbuf,
- ata_mode_string(xfer_mask));
- ata_dev_info(dev,
- "%llu sectors, multi %u: %s %s\n",
- (unsigned long long)dev->n_sectors,
- dev->multi_count, lba_desc, ncq_desc);
- }
} else {
- /* CHS */
-
- /* Default translation */
- dev->cylinders = id[1];
- dev->heads = id[3];
- dev->sectors = id[6];
-
- if (ata_id_current_chs_valid(id)) {
- /* Current CHS translation is valid. */
- dev->cylinders = id[54];
- dev->heads = id[55];
- dev->sectors = id[56];
- }
-
- /* print device info to dmesg */
- if (ata_msg_drv(ap) && print_info) {
- ata_dev_info(dev, "%s: %s, %s, max %s\n",
- revbuf, modelbuf, fwrevbuf,
- ata_mode_string(xfer_mask));
- ata_dev_info(dev,
- "%llu sectors, multi %u, CHS %u/%u/%u\n",
- (unsigned long long)dev->n_sectors,
- dev->multi_count, dev->cylinders,
- dev->heads, dev->sectors);
- }
+ ata_dev_config_chs(dev);
}
- /* Check and mark DevSlp capability. Get DevSlp timing variables
- * from SATA Settings page of Identify Device Data Log.
- */
- if (ata_id_has_devslp(dev->id)) {
- u8 *sata_setting = ap->sector_buf;
- int i, j;
-
- dev->flags |= ATA_DFLAG_DEVSLP;
- err_mask = ata_read_log_page(dev,
- ATA_LOG_IDENTIFY_DEVICE,
- ATA_LOG_SATA_SETTINGS,
- sata_setting,
- 1);
- if (err_mask)
- ata_dev_dbg(dev,
- "failed to get Identify Device Data, Emask 0x%x\n",
- err_mask);
- else
- for (i = 0; i < ATA_LOG_DEVSLP_SIZE; i++) {
- j = ATA_LOG_DEVSLP_OFFSET + i;
- dev->devslp_timing[i] = sata_setting[j];
- }
- }
+ ata_dev_config_devslp(dev);
ata_dev_config_sense_reporting(dev);
ata_dev_config_zac(dev);
ata_dev_config_trusted(dev);
dev->cdb_len = 32;
+
+ if (ata_msg_drv(ap) && print_info)
+ ata_dev_print_features(dev);
}
/* ATAPI-specific feature tests */
@@ -5573,7 +5593,7 @@ int ata_host_start(struct ata_host *host)
have_stop = 1;
}
- if (host->ops->host_stop)
+ if (host->ops && host->ops->host_stop)
have_stop = 1;
if (have_stop) {
diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c
index 8adeab76dd38..8f3ff830ab0c 100644
--- a/drivers/ata/libata-sata.c
+++ b/drivers/ata/libata-sata.c
@@ -834,28 +834,46 @@ DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR,
ata_scsi_lpm_show, ata_scsi_lpm_store);
EXPORT_SYMBOL_GPL(dev_attr_link_power_management_policy);
+static ssize_t ata_ncq_prio_supported_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct scsi_device *sdev = to_scsi_device(device);
+ struct ata_port *ap = ata_shost_to_port(sdev->host);
+ struct ata_device *dev;
+ bool ncq_prio_supported;
+ int rc = 0;
+
+ spin_lock_irq(ap->lock);
+ dev = ata_scsi_find_dev(ap, sdev);
+ if (!dev)
+ rc = -ENODEV;
+ else
+ ncq_prio_supported = dev->flags & ATA_DFLAG_NCQ_PRIO;
+ spin_unlock_irq(ap->lock);
+
+ return rc ? rc : sysfs_emit(buf, "%u\n", ncq_prio_supported);
+}
+
+DEVICE_ATTR(ncq_prio_supported, S_IRUGO, ata_ncq_prio_supported_show, NULL);
+EXPORT_SYMBOL_GPL(dev_attr_ncq_prio_supported);
+
static ssize_t ata_ncq_prio_enable_show(struct device *device,
struct device_attribute *attr,
char *buf)
{
struct scsi_device *sdev = to_scsi_device(device);
- struct ata_port *ap;
+ struct ata_port *ap = ata_shost_to_port(sdev->host);
struct ata_device *dev;
bool ncq_prio_enable;
int rc = 0;
- ap = ata_shost_to_port(sdev->host);
-
spin_lock_irq(ap->lock);
dev = ata_scsi_find_dev(ap, sdev);
- if (!dev) {
+ if (!dev)
rc = -ENODEV;
- goto unlock;
- }
-
- ncq_prio_enable = dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLE;
-
-unlock:
+ else
+ ncq_prio_enable = dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLE;
spin_unlock_irq(ap->lock);
return rc ? rc : snprintf(buf, 20, "%u\n", ncq_prio_enable);
@@ -869,7 +887,7 @@ static ssize_t ata_ncq_prio_enable_store(struct device *device,
struct ata_port *ap;
struct ata_device *dev;
long int input;
- int rc;
+ int rc = 0;
rc = kstrtol(buf, 10, &input);
if (rc)
@@ -883,27 +901,20 @@ static ssize_t ata_ncq_prio_enable_store(struct device *device,
return -ENODEV;
spin_lock_irq(ap->lock);
+
+ if (!(dev->flags & ATA_DFLAG_NCQ_PRIO)) {
+ rc = -EINVAL;
+ goto unlock;
+ }
+
if (input)
dev->flags |= ATA_DFLAG_NCQ_PRIO_ENABLE;
else
dev->flags &= ~ATA_DFLAG_NCQ_PRIO_ENABLE;
- dev->link->eh_info.action |= ATA_EH_REVALIDATE;
- dev->link->eh_info.flags |= ATA_EHI_QUIET;
- ata_port_schedule_eh(ap);
+unlock:
spin_unlock_irq(ap->lock);
- ata_port_wait_eh(ap);
-
- if (input) {
- spin_lock_irq(ap->lock);
- if (!(dev->flags & ATA_DFLAG_NCQ_PRIO)) {
- dev->flags &= ~ATA_DFLAG_NCQ_PRIO_ENABLE;
- rc = -EIO;
- }
- spin_unlock_irq(ap->lock);
- }
-
return rc ? rc : len;
}
@@ -914,6 +925,7 @@ EXPORT_SYMBOL_GPL(dev_attr_ncq_prio_enable);
struct device_attribute *ata_ncq_sdev_attrs[] = {
&dev_attr_unload_heads,
&dev_attr_ncq_prio_enable,
+ &dev_attr_ncq_prio_supported,
NULL
};
EXPORT_SYMBOL_GPL(ata_ncq_sdev_attrs);
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index b9588c52815d..0b7b4624e4df 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1766,53 +1766,6 @@ struct ata_scsi_args {
};
/**
- * ata_scsi_rbuf_get - Map response buffer.
- * @cmd: SCSI command containing buffer to be mapped.
- * @flags: unsigned long variable to store irq enable status
- * @copy_in: copy in from user buffer
- *
- * Prepare buffer for simulated SCSI commands.
- *
- * LOCKING:
- * spin_lock_irqsave(ata_scsi_rbuf_lock) on success
- *
- * RETURNS:
- * Pointer to response buffer.
- */
-static void *ata_scsi_rbuf_get(struct scsi_cmnd *cmd, bool copy_in,
- unsigned long *flags)
-{
- spin_lock_irqsave(&ata_scsi_rbuf_lock, *flags);
-
- memset(ata_scsi_rbuf, 0, ATA_SCSI_RBUF_SIZE);
- if (copy_in)
- sg_copy_to_buffer(scsi_sglist(cmd), scsi_sg_count(cmd),
- ata_scsi_rbuf, ATA_SCSI_RBUF_SIZE);
- return ata_scsi_rbuf;
-}
-
-/**
- * ata_scsi_rbuf_put - Unmap response buffer.
- * @cmd: SCSI command containing buffer to be unmapped.
- * @copy_out: copy out result
- * @flags: @flags passed to ata_scsi_rbuf_get()
- *
- * Returns rbuf buffer. The result is copied to @cmd's buffer if
- * @copy_back is true.
- *
- * LOCKING:
- * Unlocks ata_scsi_rbuf_lock.
- */
-static inline void ata_scsi_rbuf_put(struct scsi_cmnd *cmd, bool copy_out,
- unsigned long *flags)
-{
- if (copy_out)
- sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd),
- ata_scsi_rbuf, ATA_SCSI_RBUF_SIZE);
- spin_unlock_irqrestore(&ata_scsi_rbuf_lock, *flags);
-}
-
-/**
* ata_scsi_rbuf_fill - wrapper for SCSI command simulators
* @args: device IDENTIFY data / SCSI command of interest.
* @actor: Callback hook for desired SCSI command simulator
@@ -1830,14 +1783,19 @@ static inline void ata_scsi_rbuf_put(struct scsi_cmnd *cmd, bool copy_out,
static void ata_scsi_rbuf_fill(struct ata_scsi_args *args,
unsigned int (*actor)(struct ata_scsi_args *args, u8 *rbuf))
{
- u8 *rbuf;
unsigned int rc;
struct scsi_cmnd *cmd = args->cmd;
unsigned long flags;
- rbuf = ata_scsi_rbuf_get(cmd, false, &flags);
- rc = actor(args, rbuf);
- ata_scsi_rbuf_put(cmd, rc == 0, &flags);
+ spin_lock_irqsave(&ata_scsi_rbuf_lock, flags);
+
+ memset(ata_scsi_rbuf, 0, ATA_SCSI_RBUF_SIZE);
+ rc = actor(args, ata_scsi_rbuf);
+ if (rc == 0)
+ sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd),
+ ata_scsi_rbuf, ATA_SCSI_RBUF_SIZE);
+
+ spin_unlock_irqrestore(&ata_scsi_rbuf_lock, flags);
if (rc == 0)
cmd->result = SAM_STAT_GOOD;
diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c
index f0ef844428bb..338c2e50f759 100644
--- a/drivers/ata/sata_dwc_460ex.c
+++ b/drivers/ata/sata_dwc_460ex.c
@@ -1259,24 +1259,20 @@ static int sata_dwc_probe(struct platform_device *ofdev)
irq = irq_of_parse_and_map(np, 0);
if (irq == NO_IRQ) {
dev_err(&ofdev->dev, "no SATA DMA irq\n");
- err = -ENODEV;
- goto error_out;
+ return -ENODEV;
}
#ifdef CONFIG_SATA_DWC_OLD_DMA
if (!of_find_property(np, "dmas", NULL)) {
err = sata_dwc_dma_init_old(ofdev, hsdev);
if (err)
- goto error_out;
+ return err;
}
#endif
hsdev->phy = devm_phy_optional_get(hsdev->dev, "sata-phy");
- if (IS_ERR(hsdev->phy)) {
- err = PTR_ERR(hsdev->phy);
- hsdev->phy = NULL;
- goto error_out;
- }
+ if (IS_ERR(hsdev->phy))
+ return PTR_ERR(hsdev->phy);
err = phy_init(hsdev->phy);
if (err)
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c
index 4f2951cbe69c..d0e67ec46216 100644
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -2167,10 +2167,10 @@ static int hrz_open (struct atm_vcc *atm_vcc)
// Part of the job is done by atm_pcr_goal which gives us a PCR
// specification which says: EITHER grab the maximum available PCR
- // (and perhaps a lower bound which we musn't pass), OR grab this
+ // (and perhaps a lower bound which we must not pass), OR grab this
// amount, rounding down if you have to (and perhaps a lower bound
- // which we musn't pass) OR grab this amount, rounding up if you
- // have to (and perhaps an upper bound which we musn't pass). If any
+ // which we must not pass) OR grab this amount, rounding up if you
+ // have to (and perhaps an upper bound which we must not pass). If any
// bounds ARE passed we fail. Note that rounding is only rounding to
// match device limitations, we do not round down to satisfy
// bandwidth availability even if this would not violate any given
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 9e4bd751db79..81ce81a75fc6 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -3536,7 +3536,7 @@ static int idt77252_preset(struct idt77252_dev *card)
return -1;
}
if (!(pci_command & PCI_COMMAND_IO)) {
- printk("%s: PCI_COMMAND: %04x (???)\n",
+ printk("%s: PCI_COMMAND: %04x (?)\n",
card->name, pci_command);
deinit_card(card);
return (-1);
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 404db83ee5ec..2882af26392a 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -202,3 +202,6 @@ int devtmpfs_delete_node(struct device *dev);
static inline int devtmpfs_create_node(struct device *dev) { return 0; }
static inline int devtmpfs_delete_node(struct device *dev) { return 0; }
#endif
+
+void software_node_notify(struct device *dev);
+void software_node_notify_remove(struct device *dev);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index f6360490a4a3..3a72241b87c6 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2002,24 +2002,24 @@ static inline int device_is_not_partition(struct device *dev)
}
#endif
-static int
-device_platform_notify(struct device *dev, enum kobject_action action)
+static void device_platform_notify(struct device *dev)
{
- int ret;
+ acpi_device_notify(dev);
- ret = acpi_platform_notify(dev, action);
- if (ret)
- return ret;
+ software_node_notify(dev);
- ret = software_node_notify(dev, action);
- if (ret)
- return ret;
-
- if (platform_notify && action == KOBJ_ADD)
+ if (platform_notify)
platform_notify(dev);
- else if (platform_notify_remove && action == KOBJ_REMOVE)
+}
+
+static void device_platform_notify_remove(struct device *dev)
+{
+ acpi_device_notify_remove(dev);
+
+ software_node_notify_remove(dev);
+
+ if (platform_notify_remove)
platform_notify_remove(dev);
- return 0;
}
/**
@@ -2837,6 +2837,7 @@ void device_initialize(struct device *dev)
device_pm_init(dev);
set_dev_node(dev, -1);
#ifdef CONFIG_GENERIC_MSI_IRQ
+ raw_spin_lock_init(&dev->msi_lock);
INIT_LIST_HEAD(&dev->msi_list);
#endif
INIT_LIST_HEAD(&dev->links.consumers);
@@ -3291,9 +3292,7 @@ int device_add(struct device *dev)
}
/* notify platform of device entry */
- error = device_platform_notify(dev, KOBJ_ADD);
- if (error)
- goto platform_error;
+ device_platform_notify(dev);
error = device_create_file(dev, &dev_attr_uevent);
if (error)
@@ -3396,8 +3395,7 @@ done:
SymlinkError:
device_remove_file(dev, &dev_attr_uevent);
attrError:
- device_platform_notify(dev, KOBJ_REMOVE);
-platform_error:
+ device_platform_notify_remove(dev);
kobject_uevent(&dev->kobj, KOBJ_REMOVE);
glue_dir = get_glue_dir(dev);
kobject_del(&dev->kobj);
@@ -3542,7 +3540,7 @@ void device_del(struct device *dev)
bus_remove_device(dev);
device_pm_remove(dev);
driver_deferred_probe_del(dev);
- device_platform_notify(dev, KOBJ_REMOVE);
+ device_platform_notify_remove(dev);
device_remove_properties(dev);
device_links_purge(dev);
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 0b72b134a304..3d6c8f9caf43 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -21,11 +21,12 @@
* and the callback to write the MSI message.
*/
struct platform_msi_priv_data {
- struct device *dev;
- void *host_data;
- msi_alloc_info_t arg;
- irq_write_msi_msg_t write_msg;
- int devid;
+ struct device *dev;
+ void *host_data;
+ const struct attribute_group **msi_irq_groups;
+ msi_alloc_info_t arg;
+ irq_write_msi_msg_t write_msg;
+ int devid;
};
/* The devid allocator */
@@ -272,8 +273,16 @@ int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
if (err)
goto out_free_desc;
+ priv_data->msi_irq_groups = msi_populate_sysfs(dev);
+ if (IS_ERR(priv_data->msi_irq_groups)) {
+ err = PTR_ERR(priv_data->msi_irq_groups);
+ goto out_free_irqs;
+ }
+
return 0;
+out_free_irqs:
+ msi_domain_free_irqs(dev->msi_domain, dev);
out_free_desc:
platform_msi_free_descs(dev, 0, nvec);
out_free_priv_data:
@@ -293,6 +302,7 @@ void platform_msi_domain_free_irqs(struct device *dev)
struct msi_desc *desc;
desc = first_msi_entry(dev);
+ msi_destroy_sysfs(dev, desc->platform.msi_priv_data->msi_irq_groups);
platform_msi_free_priv_data(desc->platform.msi_priv_data);
}
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index a934c679e6ce..5db704f02e71 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -435,7 +435,7 @@ static void genpd_restore_performance_state(struct device *dev,
int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state)
{
struct generic_pm_domain *genpd;
- int ret;
+ int ret = 0;
genpd = dev_to_genpd_safe(dev);
if (!genpd)
@@ -446,7 +446,13 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state)
return -EINVAL;
genpd_lock(genpd);
- ret = genpd_set_performance_state(dev, state);
+ if (pm_runtime_suspended(dev)) {
+ dev_gpd_data(dev)->rpm_pstate = state;
+ } else {
+ ret = genpd_set_performance_state(dev, state);
+ if (!ret)
+ dev_gpd_data(dev)->rpm_pstate = 0;
+ }
genpd_unlock(genpd);
return ret;
@@ -2598,6 +2604,12 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off)
dev_dbg(dev, "removing from PM domain %s\n", pd->name);
+ /* Drop the default performance state */
+ if (dev_gpd_data(dev)->default_pstate) {
+ dev_pm_genpd_set_performance_state(dev, 0);
+ dev_gpd_data(dev)->default_pstate = 0;
+ }
+
for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) {
ret = genpd_remove_device(pd, dev);
if (ret != -EAGAIN)
@@ -2637,6 +2649,7 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
{
struct of_phandle_args pd_args;
struct generic_pm_domain *pd;
+ int pstate;
int ret;
ret = of_parse_phandle_with_args(dev->of_node, "power-domains",
@@ -2675,10 +2688,29 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
genpd_unlock(pd);
}
- if (ret)
+ if (ret) {
genpd_remove_device(pd, dev);
+ return -EPROBE_DEFER;
+ }
- return ret ? -EPROBE_DEFER : 1;
+ /* Set the default performance state */
+ pstate = of_get_required_opp_performance_state(dev->of_node, index);
+ if (pstate < 0 && pstate != -ENODEV && pstate != -EOPNOTSUPP) {
+ ret = pstate;
+ goto err;
+ } else if (pstate > 0) {
+ ret = dev_pm_genpd_set_performance_state(dev, pstate);
+ if (ret)
+ goto err;
+ dev_gpd_data(dev)->default_pstate = pstate;
+ }
+ return 1;
+
+err:
+ dev_err(dev, "failed to set required performance state for power-domain %s: %d\n",
+ pd->name, ret);
+ genpd_remove_device(pd, dev);
+ return ret;
}
/**
diff --git a/drivers/base/property.c b/drivers/base/property.c
index d0874f6c29bb..453918eb7390 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -1033,7 +1033,26 @@ struct fwnode_handle *
fwnode_graph_get_next_endpoint(const struct fwnode_handle *fwnode,
struct fwnode_handle *prev)
{
- return fwnode_call_ptr_op(fwnode, graph_get_next_endpoint, prev);
+ const struct fwnode_handle *parent;
+ struct fwnode_handle *ep;
+
+ /*
+ * If this function is in a loop and the previous iteration returned
+ * an endpoint from fwnode->secondary, then we need to use the secondary
+ * as parent rather than @fwnode.
+ */
+ if (prev)
+ parent = fwnode_graph_get_port_parent(prev);
+ else
+ parent = fwnode;
+
+ ep = fwnode_call_ptr_op(parent, graph_get_next_endpoint, prev);
+
+ if (IS_ERR_OR_NULL(ep) &&
+ !IS_ERR_OR_NULL(parent) && !IS_ERR_OR_NULL(parent->secondary))
+ ep = fwnode_graph_get_next_endpoint(parent->secondary, NULL);
+
+ return ep;
}
EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint);
@@ -1212,14 +1231,7 @@ fwnode_graph_get_endpoint_by_id(const struct fwnode_handle *fwnode,
best_ep_id = fwnode_ep.id;
}
- if (best_ep)
- return best_ep;
-
- if (fwnode && !IS_ERR_OR_NULL(fwnode->secondary))
- return fwnode_graph_get_endpoint_by_id(fwnode->secondary, port,
- endpoint, flags);
-
- return NULL;
+ return best_ep;
}
EXPORT_SYMBOL_GPL(fwnode_graph_get_endpoint_by_id);
diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 0097696c31de..b1905916f7af 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -53,6 +53,10 @@ struct regmap {
spinlock_t spinlock;
unsigned long spinlock_flags;
};
+ struct {
+ raw_spinlock_t raw_spinlock;
+ unsigned long raw_spinlock_flags;
+ };
};
regmap_lock lock;
regmap_unlock unlock;
diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c
index 211a335a608d..ad684d37c2da 100644
--- a/drivers/base/regmap/regmap-debugfs.c
+++ b/drivers/base/regmap/regmap-debugfs.c
@@ -368,7 +368,7 @@ static ssize_t regmap_reg_ranges_read_file(struct file *file,
char *buf;
char *entry;
int ret;
- unsigned entry_len;
+ unsigned int entry_len;
if (*ppos < 0 || !count)
return -EINVAL;
diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c
index f9cd51afb9d2..71f16be7e717 100644
--- a/drivers/base/regmap/regmap-mmio.c
+++ b/drivers/base/regmap/regmap-mmio.c
@@ -15,7 +15,7 @@
struct regmap_mmio_context {
void __iomem *regs;
- unsigned val_bytes;
+ unsigned int val_bytes;
bool relaxed_mmio;
bool attached_clk;
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index fe3e38dd5324..21a0c2562ec0 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -533,6 +533,23 @@ __releases(&map->spinlock)
spin_unlock_irqrestore(&map->spinlock, map->spinlock_flags);
}
+static void regmap_lock_raw_spinlock(void *__map)
+__acquires(&map->raw_spinlock)
+{
+ struct regmap *map = __map;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&map->raw_spinlock, flags);
+ map->raw_spinlock_flags = flags;
+}
+
+static void regmap_unlock_raw_spinlock(void *__map)
+__releases(&map->raw_spinlock)
+{
+ struct regmap *map = __map;
+ raw_spin_unlock_irqrestore(&map->raw_spinlock, map->raw_spinlock_flags);
+}
+
static void dev_get_regmap_release(struct device *dev, void *res)
{
/*
@@ -770,11 +787,19 @@ struct regmap *__regmap_init(struct device *dev,
} else {
if ((bus && bus->fast_io) ||
config->fast_io) {
- spin_lock_init(&map->spinlock);
- map->lock = regmap_lock_spinlock;
- map->unlock = regmap_unlock_spinlock;
- lockdep_set_class_and_name(&map->spinlock,
- lock_key, lock_name);
+ if (config->use_raw_spinlock) {
+ raw_spin_lock_init(&map->raw_spinlock);
+ map->lock = regmap_lock_raw_spinlock;
+ map->unlock = regmap_unlock_raw_spinlock;
+ lockdep_set_class_and_name(&map->raw_spinlock,
+ lock_key, lock_name);
+ } else {
+ spin_lock_init(&map->spinlock);
+ map->lock = regmap_lock_spinlock;
+ map->unlock = regmap_unlock_spinlock;
+ lockdep_set_class_and_name(&map->spinlock,
+ lock_key, lock_name);
+ }
} else {
mutex_init(&map->mutex);
map->lock = regmap_lock_mutex;
@@ -1126,10 +1151,10 @@ skip_format_initialization:
/* Make sure, that this register range has no selector
or data window within its boundary */
for (j = 0; j < config->num_ranges; j++) {
- unsigned sel_reg = config->ranges[j].selector_reg;
- unsigned win_min = config->ranges[j].window_start;
- unsigned win_max = win_min +
- config->ranges[j].window_len - 1;
+ unsigned int sel_reg = config->ranges[j].selector_reg;
+ unsigned int win_min = config->ranges[j].window_start;
+ unsigned int win_max = win_min +
+ config->ranges[j].window_len - 1;
/* Allow data window inside its own virtual range */
if (j == i)
@@ -1298,7 +1323,7 @@ EXPORT_SYMBOL_GPL(devm_regmap_field_alloc);
*/
int regmap_field_bulk_alloc(struct regmap *regmap,
struct regmap_field **rm_field,
- struct reg_field *reg_field,
+ const struct reg_field *reg_field,
int num_fields)
{
struct regmap_field *rf;
@@ -1334,7 +1359,7 @@ EXPORT_SYMBOL_GPL(regmap_field_bulk_alloc);
int devm_regmap_field_bulk_alloc(struct device *dev,
struct regmap *regmap,
struct regmap_field **rm_field,
- struct reg_field *reg_field,
+ const struct reg_field *reg_field,
int num_fields)
{
struct regmap_field *rf;
@@ -1667,7 +1692,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
if (ret) {
dev_err(map->dev,
"Error in caching of register: %x ret: %d\n",
- reg + i, ret);
+ reg + regmap_get_offset(map, i), ret);
return ret;
}
}
diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c
index d1f1a8240120..7bd0f3cfb7eb 100644
--- a/drivers/base/swnode.c
+++ b/drivers/base/swnode.c
@@ -11,6 +11,8 @@
#include <linux/property.h>
#include <linux/slab.h>
+#include "base.h"
+
struct swnode {
struct kobject kobj;
struct fwnode_handle fwnode;
@@ -1053,7 +1055,7 @@ int device_add_software_node(struct device *dev, const struct software_node *nod
* balance.
*/
if (device_is_registered(dev))
- software_node_notify(dev, KOBJ_ADD);
+ software_node_notify(dev);
return 0;
}
@@ -1074,7 +1076,8 @@ void device_remove_software_node(struct device *dev)
return;
if (device_is_registered(dev))
- software_node_notify(dev, KOBJ_REMOVE);
+ software_node_notify_remove(dev);
+
set_secondary_fwnode(dev, NULL);
kobject_put(&swnode->kobj);
}
@@ -1117,44 +1120,44 @@ int device_create_managed_software_node(struct device *dev,
}
EXPORT_SYMBOL_GPL(device_create_managed_software_node);
-int software_node_notify(struct device *dev, unsigned long action)
+void software_node_notify(struct device *dev)
{
struct swnode *swnode;
int ret;
swnode = dev_to_swnode(dev);
if (!swnode)
- return 0;
+ return;
- switch (action) {
- case KOBJ_ADD:
- ret = sysfs_create_link(&dev->kobj, &swnode->kobj, "software_node");
- if (ret)
- break;
+ ret = sysfs_create_link(&dev->kobj, &swnode->kobj, "software_node");
+ if (ret)
+ return;
- ret = sysfs_create_link(&swnode->kobj, &dev->kobj,
- dev_name(dev));
- if (ret) {
- sysfs_remove_link(&dev->kobj, "software_node");
- break;
- }
- kobject_get(&swnode->kobj);
- break;
- case KOBJ_REMOVE:
- sysfs_remove_link(&swnode->kobj, dev_name(dev));
+ ret = sysfs_create_link(&swnode->kobj, &dev->kobj, dev_name(dev));
+ if (ret) {
sysfs_remove_link(&dev->kobj, "software_node");
- kobject_put(&swnode->kobj);
-
- if (swnode->managed) {
- set_secondary_fwnode(dev, NULL);
- kobject_put(&swnode->kobj);
- }
- break;
- default:
- break;
+ return;
}
- return 0;
+ kobject_get(&swnode->kobj);
+}
+
+void software_node_notify_remove(struct device *dev)
+{
+ struct swnode *swnode;
+
+ swnode = dev_to_swnode(dev);
+ if (!swnode)
+ return;
+
+ sysfs_remove_link(&swnode->kobj, dev_name(dev));
+ sysfs_remove_link(&dev->kobj, "software_node");
+ kobject_put(&swnode->kobj);
+
+ if (swnode->managed) {
+ set_secondary_fwnode(dev, NULL);
+ kobject_put(&swnode->kobj);
+ }
}
static int __init software_node_init(void)
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index e076630d17bd..c6d6ba0d00b1 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -236,6 +236,7 @@ EXPORT_SYMBOL(bcma_core_irq);
void bcma_prepare_core(struct bcma_bus *bus, struct bcma_device *core)
{
+ device_initialize(&core->dev);
core->dev.release = bcma_release_core_dev;
core->dev.bus = &bcma_bus_type;
dev_set_name(&core->dev, "bcma%d:%d", bus->num, core->core_index);
@@ -277,11 +278,10 @@ static void bcma_register_core(struct bcma_bus *bus, struct bcma_device *core)
{
int err;
- err = device_register(&core->dev);
+ err = device_add(&core->dev);
if (err) {
bcma_err(bus, "Could not register dev for core 0x%03X\n",
core->id.id);
- put_device(&core->dev);
return;
}
core->dev_registered = true;
@@ -372,7 +372,7 @@ void bcma_unregister_cores(struct bcma_bus *bus)
/* Now noone uses internally-handled cores, we can free them */
list_for_each_entry_safe(core, tmp, &bus->cores, list) {
list_del(&core->list);
- kfree(core);
+ put_device(&core->dev);
}
}
diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c
index d49e7c0de2b6..26d12a7e6ca0 100644
--- a/drivers/bcma/scan.c
+++ b/drivers/bcma/scan.c
@@ -141,8 +141,7 @@ static const char *bcma_device_name(const struct bcma_device_id *id)
return "UNKNOWN";
}
-static u32 bcma_scan_read32(struct bcma_bus *bus, u8 current_coreidx,
- u16 offset)
+static u32 bcma_scan_read32(struct bcma_bus *bus, u16 offset)
{
return readl(bus->mmio + offset);
}
@@ -443,7 +442,7 @@ void bcma_detect_chip(struct bcma_bus *bus)
bcma_scan_switch_core(bus, BCMA_ADDR_BASE);
- tmp = bcma_scan_read32(bus, 0, BCMA_CC_ID);
+ tmp = bcma_scan_read32(bus, BCMA_CC_ID);
chipinfo->id = (tmp & BCMA_CC_ID_ID) >> BCMA_CC_ID_ID_SHIFT;
chipinfo->rev = (tmp & BCMA_CC_ID_REV) >> BCMA_CC_ID_REV_SHIFT;
chipinfo->pkg = (tmp & BCMA_CC_ID_PKG) >> BCMA_CC_ID_PKG_SHIFT;
@@ -465,7 +464,7 @@ int bcma_bus_scan(struct bcma_bus *bus)
if (bus->nr_cores)
return 0;
- erombase = bcma_scan_read32(bus, 0, BCMA_CC_EROM);
+ erombase = bcma_scan_read32(bus, BCMA_CC_EROM);
if (bus->hosttype == BCMA_HOSTTYPE_SOC) {
eromptr = ioremap(erombase, BCMA_CORE_SIZE);
if (!eromptr)
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 63056cfd4b62..fbb3a558139f 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -213,7 +213,7 @@ config BLK_DEV_LOOP_MIN_COUNT
dynamically allocated with the /dev/loop-control interface.
config BLK_DEV_CRYPTOLOOP
- tristate "Cryptoloop Support"
+ tristate "Cryptoloop Support (DEPRECATED)"
select CRYPTO
select CRYPTO_CBC
depends on BLK_DEV_LOOP
@@ -225,7 +225,7 @@ config BLK_DEV_CRYPTOLOOP
WARNING: This device is not safe for journaled file systems like
ext3 or Reiserfs. Please use the Device Mapper crypto module
instead, which can be configured to be on-disk compatible with the
- cryptoloop device.
+ cryptoloop device. cryptoloop support will be removed in Linux 5.16.
source "drivers/block/drbd/Kconfig"
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 95694113e38e..58ec167aa018 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -27,9 +27,6 @@
#include <linux/uaccess.h>
-#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
-#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
-
/*
* Each block ramdisk device has a radix_tree brd_pages of pages that stores
* the pages containing the block device's contents. A brd page's ->index is
diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c
index 3cabc335ae74..f0a91faa43a8 100644
--- a/drivers/block/cryptoloop.c
+++ b/drivers/block/cryptoloop.c
@@ -189,6 +189,8 @@ init_cryptoloop(void)
if (rc)
printk(KERN_ERR "cryptoloop: loop_register_transfer failed\n");
+ else
+ pr_warn("the cryptoloop driver has been deprecated and will be removed in in Linux 5.16\n");
return rc;
}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index e7d0e637e632..44ccf8b4f4b2 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1364,7 +1364,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
if (b) {
blk_stack_limits(&q->limits, &b->limits, 0);
- blk_queue_update_readahead(q);
+ disk_update_readahead(device->vdisk);
}
fixup_discard_if_not_supported(q);
fixup_write_zeroes(device, q);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 13beb98a7c5a..5ca233644d70 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -905,13 +905,12 @@ static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector,
static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t sector,
enum drbd_read_balancing rbm)
{
- struct backing_dev_info *bdi;
int stripe_shift;
switch (rbm) {
case RB_CONGESTED_REMOTE:
- bdi = device->ldev->backing_bdev->bd_disk->queue->backing_dev_info;
- return bdi_read_congested(bdi);
+ return bdi_read_congested(
+ device->ldev->backing_bdev->bd_disk->bdi);
case RB_LEAST_PENDING:
return atomic_read(&device->local_cnt) >
atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 87460e0e5c72..fef79ea52e3e 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4029,23 +4029,23 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
if (fdc_state[FDC(drive)].rawcmd == 1)
fdc_state[FDC(drive)].rawcmd = 2;
- if (mode & (FMODE_READ|FMODE_WRITE)) {
- drive_state[drive].last_checked = 0;
- clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags);
- if (bdev_check_media_change(bdev))
- floppy_revalidate(bdev->bd_disk);
- if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags))
- goto out;
- if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags))
+ if (!(mode & FMODE_NDELAY)) {
+ if (mode & (FMODE_READ|FMODE_WRITE)) {
+ drive_state[drive].last_checked = 0;
+ clear_bit(FD_OPEN_SHOULD_FAIL_BIT,
+ &drive_state[drive].flags);
+ if (bdev_check_media_change(bdev))
+ floppy_revalidate(bdev->bd_disk);
+ if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags))
+ goto out;
+ if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags))
+ goto out;
+ }
+ res = -EROFS;
+ if ((mode & FMODE_WRITE) &&
+ !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags))
goto out;
}
-
- res = -EROFS;
-
- if ((mode & FMODE_WRITE) &&
- !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags))
- goto out;
-
mutex_unlock(&open_lock);
mutex_unlock(&floppy_mutex);
return 0;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f0cdff0c5fbf..fa1c298a8cfb 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -774,6 +774,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
goto out_err;
/* and ... switch */
+ disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
blk_mq_freeze_queue(lo->lo_queue);
mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
lo->lo_backing_file = file;
@@ -1257,6 +1258,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
goto out_unlock;
}
+ disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
@@ -1304,10 +1306,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
if (partscan)
lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
- /* Grab the block_device to prevent its destruction after we
- * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
- */
- bdgrab(bdev);
loop_global_unlock(lo, is_loop);
if (partscan)
loop_reread_partitions(lo);
@@ -1398,7 +1396,6 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
blk_queue_physical_block_size(lo->lo_queue, 512);
blk_queue_io_min(lo->lo_queue, 512);
if (bdev) {
- bdput(bdev);
invalidate_bdev(bdev);
bdev->bd_inode->i_mapping->wb_err = 0;
}
@@ -1415,6 +1412,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN && bdev;
lo_number = lo->lo_number;
+ disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
out_unlock:
mutex_unlock(&lo->lo_mutex);
if (partscan) {
@@ -2335,7 +2333,8 @@ static int loop_add(int i)
lo->tag_set.queue_depth = 128;
lo->tag_set.numa_node = NUMA_NO_NODE;
lo->tag_set.cmd_size = sizeof(struct loop_cmd);
- lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING;
+ lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING |
+ BLK_MQ_F_NO_SCHED_BY_DEFAULT;
lo->tag_set.driver_data = lo;
err = blk_mq_alloc_tag_set(&lo->tag_set);
@@ -2391,6 +2390,8 @@ static int loop_add(int i)
disk->fops = &lo_fops;
disk->private_data = lo;
disk->queue = lo->lo_queue;
+ disk->events = DISK_EVENT_MEDIA_CHANGE;
+ disk->event_flags = DISK_EVENT_FLAG_UEVENT;
sprintf(disk->disk_name, "loop%d", i);
add_disk(disk);
mutex_unlock(&loop_ctl_mutex);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index c38317979f74..5170a630778d 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -49,6 +49,7 @@
static DEFINE_IDR(nbd_index_idr);
static DEFINE_MUTEX(nbd_index_mutex);
+static struct workqueue_struct *nbd_del_wq;
static int nbd_total_devices = 0;
struct nbd_sock {
@@ -113,12 +114,12 @@ struct nbd_device {
struct mutex config_lock;
struct gendisk *disk;
struct workqueue_struct *recv_workq;
+ struct work_struct remove_work;
struct list_head list;
struct task_struct *task_recv;
struct task_struct *task_setup;
- struct completion *destroy_complete;
unsigned long flags;
char *backend;
@@ -237,32 +238,36 @@ static void nbd_dev_remove(struct nbd_device *nbd)
{
struct gendisk *disk = nbd->disk;
- if (disk) {
- del_gendisk(disk);
- blk_cleanup_disk(disk);
- blk_mq_free_tag_set(&nbd->tag_set);
- }
+ del_gendisk(disk);
+ blk_cleanup_disk(disk);
+ blk_mq_free_tag_set(&nbd->tag_set);
/*
- * Place this in the last just before the nbd is freed to
- * make sure that the disk and the related kobject are also
- * totally removed to avoid duplicate creation of the same
- * one.
+ * Remove from idr after del_gendisk() completes, so if the same ID is
+ * reused, the following add_disk() will succeed.
*/
- if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete)
- complete(nbd->destroy_complete);
+ mutex_lock(&nbd_index_mutex);
+ idr_remove(&nbd_index_idr, nbd->index);
+ mutex_unlock(&nbd_index_mutex);
kfree(nbd);
}
+static void nbd_dev_remove_work(struct work_struct *work)
+{
+ nbd_dev_remove(container_of(work, struct nbd_device, remove_work));
+}
+
static void nbd_put(struct nbd_device *nbd)
{
- if (refcount_dec_and_mutex_lock(&nbd->refs,
- &nbd_index_mutex)) {
- idr_remove(&nbd_index_idr, nbd->index);
+ if (!refcount_dec_and_test(&nbd->refs))
+ return;
+
+ /* Call del_gendisk() asynchrounously to prevent deadlock */
+ if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags))
+ queue_work(nbd_del_wq, &nbd->remove_work);
+ else
nbd_dev_remove(nbd);
- mutex_unlock(&nbd_index_mutex);
- }
}
static int nbd_disconnected(struct nbd_config *config)
@@ -818,6 +823,10 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
+ /* don't abort one completed request */
+ if (blk_mq_request_completed(req))
+ return true;
+
mutex_lock(&cmd->lock);
cmd->status = BLK_STS_IOERR;
mutex_unlock(&cmd->lock);
@@ -1384,6 +1393,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
unsigned int cmd, unsigned long arg)
{
struct nbd_config *config = nbd->config;
+ loff_t bytesize;
switch (cmd) {
case NBD_DISCONNECT:
@@ -1398,8 +1408,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
case NBD_SET_SIZE:
return nbd_set_size(nbd, arg, config->blksize);
case NBD_SET_SIZE_BLOCKS:
- return nbd_set_size(nbd, arg * config->blksize,
- config->blksize);
+ if (check_mul_overflow((loff_t)arg, config->blksize, &bytesize))
+ return -EINVAL;
+ return nbd_set_size(nbd, bytesize, config->blksize);
case NBD_SET_TIMEOUT:
nbd_set_cmd_timeout(nbd, arg);
return 0;
@@ -1661,7 +1672,7 @@ static const struct blk_mq_ops nbd_mq_ops = {
.timeout = nbd_xmit_timeout,
};
-static int nbd_dev_add(int index)
+static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
{
struct nbd_device *nbd;
struct gendisk *disk;
@@ -1679,13 +1690,14 @@ static int nbd_dev_add(int index)
nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
BLK_MQ_F_BLOCKING;
nbd->tag_set.driver_data = nbd;
- nbd->destroy_complete = NULL;
+ INIT_WORK(&nbd->remove_work, nbd_dev_remove_work);
nbd->backend = NULL;
err = blk_mq_alloc_tag_set(&nbd->tag_set);
if (err)
goto out_free_nbd;
+ mutex_lock(&nbd_index_mutex);
if (index >= 0) {
err = idr_alloc(&nbd_index_idr, nbd, index, index + 1,
GFP_KERNEL);
@@ -1696,9 +1708,10 @@ static int nbd_dev_add(int index)
if (err >= 0)
index = err;
}
+ nbd->index = index;
+ mutex_unlock(&nbd_index_mutex);
if (err < 0)
goto out_free_tags;
- nbd->index = index;
disk = blk_mq_alloc_disk(&nbd->tag_set, NULL);
if (IS_ERR(disk)) {
@@ -1722,38 +1735,65 @@ static int nbd_dev_add(int index)
mutex_init(&nbd->config_lock);
refcount_set(&nbd->config_refs, 0);
- refcount_set(&nbd->refs, 1);
+ /*
+ * Start out with a zero references to keep other threads from using
+ * this device until it is fully initialized.
+ */
+ refcount_set(&nbd->refs, 0);
INIT_LIST_HEAD(&nbd->list);
disk->major = NBD_MAJOR;
+
+ /* Too big first_minor can cause duplicate creation of
+ * sysfs files/links, since first_minor will be truncated to
+ * byte in __device_add_disk().
+ */
disk->first_minor = index << part_shift;
+ if (disk->first_minor > 0xff) {
+ err = -EINVAL;
+ goto out_free_idr;
+ }
+
disk->minors = 1 << part_shift;
disk->fops = &nbd_fops;
disk->private_data = nbd;
sprintf(disk->disk_name, "nbd%d", index);
add_disk(disk);
+
+ /*
+ * Now publish the device.
+ */
+ refcount_set(&nbd->refs, refs);
nbd_total_devices++;
- return index;
+ return nbd;
out_free_idr:
+ mutex_lock(&nbd_index_mutex);
idr_remove(&nbd_index_idr, index);
+ mutex_unlock(&nbd_index_mutex);
out_free_tags:
blk_mq_free_tag_set(&nbd->tag_set);
out_free_nbd:
kfree(nbd);
out:
- return err;
+ return ERR_PTR(err);
}
-static int find_free_cb(int id, void *ptr, void *data)
+static struct nbd_device *nbd_find_get_unused(void)
{
- struct nbd_device *nbd = ptr;
- struct nbd_device **found = data;
+ struct nbd_device *nbd;
+ int id;
- if (!refcount_read(&nbd->config_refs)) {
- *found = nbd;
- return 1;
+ lockdep_assert_held(&nbd_index_mutex);
+
+ idr_for_each_entry(&nbd_index_idr, nbd, id) {
+ if (refcount_read(&nbd->config_refs) ||
+ test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags))
+ continue;
+ if (refcount_inc_not_zero(&nbd->refs))
+ return nbd;
}
- return 0;
+
+ return NULL;
}
/* Netlink interface. */
@@ -1802,8 +1842,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
{
- DECLARE_COMPLETION_ONSTACK(destroy_complete);
- struct nbd_device *nbd = NULL;
+ struct nbd_device *nbd;
struct nbd_config *config;
int index = -1;
int ret;
@@ -1825,55 +1864,29 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
again:
mutex_lock(&nbd_index_mutex);
if (index == -1) {
- ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd);
- if (ret == 0) {
- int new_index;
- new_index = nbd_dev_add(-1);
- if (new_index < 0) {
- mutex_unlock(&nbd_index_mutex);
- printk(KERN_ERR "nbd: failed to add new device\n");
- return new_index;
- }
- nbd = idr_find(&nbd_index_idr, new_index);
- }
+ nbd = nbd_find_get_unused();
} else {
nbd = idr_find(&nbd_index_idr, index);
- if (!nbd) {
- ret = nbd_dev_add(index);
- if (ret < 0) {
+ if (nbd) {
+ if ((test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
+ test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) ||
+ !refcount_inc_not_zero(&nbd->refs)) {
mutex_unlock(&nbd_index_mutex);
- printk(KERN_ERR "nbd: failed to add new device\n");
- return ret;
+ pr_err("nbd: device at index %d is going down\n",
+ index);
+ return -EINVAL;
}
- nbd = idr_find(&nbd_index_idr, index);
}
}
- if (!nbd) {
- printk(KERN_ERR "nbd: couldn't find device at index %d\n",
- index);
- mutex_unlock(&nbd_index_mutex);
- return -EINVAL;
- }
-
- if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
- test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {
- nbd->destroy_complete = &destroy_complete;
- mutex_unlock(&nbd_index_mutex);
-
- /* Wait untill the the nbd stuff is totally destroyed */
- wait_for_completion(&destroy_complete);
- goto again;
- }
+ mutex_unlock(&nbd_index_mutex);
- if (!refcount_inc_not_zero(&nbd->refs)) {
- mutex_unlock(&nbd_index_mutex);
- if (index == -1)
- goto again;
- printk(KERN_ERR "nbd: device at index %d is going down\n",
- index);
- return -EINVAL;
+ if (!nbd) {
+ nbd = nbd_dev_add(index, 2);
+ if (IS_ERR(nbd)) {
+ pr_err("nbd: failed to add new device\n");
+ return PTR_ERR(nbd);
+ }
}
- mutex_unlock(&nbd_index_mutex);
mutex_lock(&nbd->config_lock);
if (refcount_read(&nbd->config_refs)) {
@@ -2004,15 +2017,19 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
{
mutex_lock(&nbd->config_lock);
nbd_disconnect(nbd);
- nbd_clear_sock(nbd);
- mutex_unlock(&nbd->config_lock);
+ sock_shutdown(nbd);
/*
* Make sure recv thread has finished, so it does not drop the last
* config ref and try to destroy the workqueue from inside the work
- * queue.
+ * queue. And this also ensure that we can safely call nbd_clear_que()
+ * to cancel the inflight I/Os.
*/
if (nbd->recv_workq)
flush_workqueue(nbd->recv_workq);
+ nbd_clear_que(nbd);
+ nbd->task_setup = NULL;
+ mutex_unlock(&nbd->config_lock);
+
if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
&nbd->config->runtime_flags))
nbd_config_put(nbd);
@@ -2416,16 +2433,21 @@ static int __init nbd_init(void)
if (register_blkdev(NBD_MAJOR, "nbd"))
return -EIO;
+ nbd_del_wq = alloc_workqueue("nbd-del", WQ_UNBOUND, 0);
+ if (!nbd_del_wq) {
+ unregister_blkdev(NBD_MAJOR, "nbd");
+ return -ENOMEM;
+ }
+
if (genl_register_family(&nbd_genl_family)) {
+ destroy_workqueue(nbd_del_wq);
unregister_blkdev(NBD_MAJOR, "nbd");
return -EINVAL;
}
nbd_dbg_init();
- mutex_lock(&nbd_index_mutex);
for (i = 0; i < nbds_max; i++)
- nbd_dev_add(i);
- mutex_unlock(&nbd_index_mutex);
+ nbd_dev_add(i, 1);
return 0;
}
@@ -2434,7 +2456,10 @@ static int nbd_exit_cb(int id, void *ptr, void *data)
struct list_head *list = (struct list_head *)data;
struct nbd_device *nbd = ptr;
- list_add_tail(&nbd->list, list);
+ /* Skip nbd that is being removed asynchronously */
+ if (refcount_read(&nbd->refs))
+ list_add_tail(&nbd->list, list);
+
return 0;
}
@@ -2457,6 +2482,9 @@ static void __exit nbd_cleanup(void)
nbd_put(nbd);
}
+ /* Also wait for nbd_dev_remove_work() completes */
+ destroy_workqueue(nbd_del_wq);
+
idr_destroy(&nbd_index_idr);
genl_unregister_family(&nbd_genl_family);
unregister_blkdev(NBD_MAJOR, "nbd");
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index d734e9ee1546..187d779c8ca0 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -11,10 +11,6 @@
#include <linux/init.h>
#include "null_blk.h"
-#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
-#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
-#define SECTOR_MASK (PAGE_SECTORS - 1)
-
#define FREE_BATCH 16
#define TICKS_PER_SEC 50ULL
@@ -1721,8 +1717,7 @@ static int null_gendisk_register(struct nullb *nullb)
return ret;
}
- add_disk(disk);
- return 0;
+ return add_disk(disk);
}
static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 9b3298926356..675327df6aff 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -892,7 +892,7 @@ static void pd_probe_drive(struct pd_unit *disk)
return;
p = blk_mq_alloc_disk(&disk->tag_set, disk);
- if (!p) {
+ if (IS_ERR(p)) {
blk_mq_free_tag_set(&disk->tag_set);
return;
}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 538446b652de..0f26b2510a75 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1183,10 +1183,8 @@ try_next_bio:
wakeup = (pd->write_congestion_on > 0
&& pd->bio_queue_size <= pd->write_congestion_off);
spin_unlock(&pd->lock);
- if (wakeup) {
- clear_bdi_congested(pd->disk->queue->backing_dev_info,
- BLK_RW_ASYNC);
- }
+ if (wakeup)
+ clear_bdi_congested(pd->disk->bdi, BLK_RW_ASYNC);
pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
pkt_set_state(pkt, PACKET_WAITING_STATE);
@@ -2366,7 +2364,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
spin_lock(&pd->lock);
if (pd->write_congestion_on > 0
&& pd->bio_queue_size >= pd->write_congestion_on) {
- set_bdi_congested(q->backing_dev_info, BLK_RW_ASYNC);
+ set_bdi_congested(bio->bi_bdev->bd_disk->bdi, BLK_RW_ASYNC);
do {
spin_unlock(&pd->lock);
congestion_wait(BLK_RW_ASYNC, HZ);
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index f374ea2c67ce..8d51efbe045d 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -83,26 +83,12 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev,
unsigned int offset = 0;
struct req_iterator iter;
struct bio_vec bvec;
- unsigned int i = 0;
- size_t size;
- void *buf;
rq_for_each_segment(bvec, req, iter) {
- unsigned long flags;
- dev_dbg(&dev->sbd.core, "%s:%u: bio %u: %u sectors from %llu\n",
- __func__, __LINE__, i, bio_sectors(iter.bio),
- iter.bio->bi_iter.bi_sector);
-
- size = bvec.bv_len;
- buf = bvec_kmap_irq(&bvec, &flags);
if (gather)
- memcpy(dev->bounce_buf+offset, buf, size);
+ memcpy_from_bvec(dev->bounce_buf + offset, &bvec);
else
- memcpy(buf, dev->bounce_buf+offset, size);
- offset += size;
- flush_kernel_dcache_page(bvec.bv_page);
- bvec_kunmap_irq(buf, &flags);
- i++;
+ memcpy_to_bvec(&bvec, dev->bounce_buf + offset);
}
}
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 7fbf469651c4..c7b19e128b03 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -541,7 +541,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
bio_for_each_segment(bvec, bio, iter) {
/* PS3 is ppc64, so we don't handle highmem */
- char *ptr = page_address(bvec.bv_page) + bvec.bv_offset;
+ char *ptr = bvec_virt(&bvec);
size_t len = bvec.bv_len, retlen;
dev_dbg(&dev->core, " %s %zu bytes at offset %llu\n", op,
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 90b947c96402..e65c9d706f6f 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1219,24 +1219,13 @@ static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev)
rbd_dev->mapping.size = 0;
}
-static void zero_bvec(struct bio_vec *bv)
-{
- void *buf;
- unsigned long flags;
-
- buf = bvec_kmap_irq(bv, &flags);
- memset(buf, 0, bv->bv_len);
- flush_dcache_page(bv->bv_page);
- bvec_kunmap_irq(buf, &flags);
-}
-
static void zero_bios(struct ceph_bio_iter *bio_pos, u32 off, u32 bytes)
{
struct ceph_bio_iter it = *bio_pos;
ceph_bio_iter_advance(&it, off);
ceph_bio_iter_advance_step(&it, bytes, ({
- zero_bvec(&bv);
+ memzero_bvec(&bv);
}));
}
@@ -1246,7 +1235,7 @@ static void zero_bvecs(struct ceph_bvec_iter *bvec_pos, u32 off, u32 bytes)
ceph_bvec_iter_advance(&it, off);
ceph_bvec_iter_advance_step(&it, bytes, ({
- zero_bvec(&bv);
+ memzero_bvec(&bv);
}));
}
@@ -2997,8 +2986,7 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes)
};
ceph_bvec_iter_advance_step(&it, bytes, ({
- if (memchr_inv(page_address(bv.bv_page) + bv.bv_offset, 0,
- bv.bv_len))
+ if (memchr_inv(bvec_virt(&bv), 0, bv.bv_len))
return false;
}));
return true;
diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c
index 324afdd63a96..4b93fd83bf79 100644
--- a/drivers/block/rnbd/rnbd-clt-sysfs.c
+++ b/drivers/block/rnbd/rnbd-clt-sysfs.c
@@ -227,17 +227,17 @@ static ssize_t state_show(struct kobject *kobj,
switch (dev->dev_state) {
case DEV_STATE_INIT:
- return snprintf(page, PAGE_SIZE, "init\n");
+ return sysfs_emit(page, "init\n");
case DEV_STATE_MAPPED:
/* TODO fix cli tool before changing to proper state */
- return snprintf(page, PAGE_SIZE, "open\n");
+ return sysfs_emit(page, "open\n");
case DEV_STATE_MAPPED_DISCONNECTED:
/* TODO fix cli tool before changing to proper state */
- return snprintf(page, PAGE_SIZE, "closed\n");
+ return sysfs_emit(page, "closed\n");
case DEV_STATE_UNMAPPED:
- return snprintf(page, PAGE_SIZE, "unmapped\n");
+ return sysfs_emit(page, "unmapped\n");
default:
- return snprintf(page, PAGE_SIZE, "unknown\n");
+ return sysfs_emit(page, "unknown\n");
}
}
@@ -263,7 +263,7 @@ static ssize_t mapping_path_show(struct kobject *kobj,
dev = container_of(kobj, struct rnbd_clt_dev, kobj);
- return scnprintf(page, PAGE_SIZE, "%s\n", dev->pathname);
+ return sysfs_emit(page, "%s\n", dev->pathname);
}
static struct kobj_attribute rnbd_clt_mapping_path_attr =
@@ -276,8 +276,7 @@ static ssize_t access_mode_show(struct kobject *kobj,
dev = container_of(kobj, struct rnbd_clt_dev, kobj);
- return snprintf(page, PAGE_SIZE, "%s\n",
- rnbd_access_mode_str(dev->access_mode));
+ return sysfs_emit(page, "%s\n", rnbd_access_mode_str(dev->access_mode));
}
static struct kobj_attribute rnbd_clt_access_mode =
@@ -286,8 +285,8 @@ static struct kobj_attribute rnbd_clt_access_mode =
static ssize_t rnbd_clt_unmap_dev_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return scnprintf(page, PAGE_SIZE, "Usage: echo <normal|force> > %s\n",
- attr->attr.name);
+ return sysfs_emit(page, "Usage: echo <normal|force> > %s\n",
+ attr->attr.name);
}
static ssize_t rnbd_clt_unmap_dev_store(struct kobject *kobj,
@@ -357,9 +356,8 @@ static ssize_t rnbd_clt_resize_dev_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *page)
{
- return scnprintf(page, PAGE_SIZE,
- "Usage: echo <new size in sectors> > %s\n",
- attr->attr.name);
+ return sysfs_emit(page, "Usage: echo <new size in sectors> > %s\n",
+ attr->attr.name);
}
static ssize_t rnbd_clt_resize_dev_store(struct kobject *kobj,
@@ -390,8 +388,7 @@ static struct kobj_attribute rnbd_clt_resize_dev_attr =
static ssize_t rnbd_clt_remap_dev_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return scnprintf(page, PAGE_SIZE, "Usage: echo <1> > %s\n",
- attr->attr.name);
+ return sysfs_emit(page, "Usage: echo <1> > %s\n", attr->attr.name);
}
static ssize_t rnbd_clt_remap_dev_store(struct kobject *kobj,
@@ -436,7 +433,7 @@ static ssize_t session_show(struct kobject *kobj, struct kobj_attribute *attr,
dev = container_of(kobj, struct rnbd_clt_dev, kobj);
- return scnprintf(page, PAGE_SIZE, "%s\n", dev->sess->sessname);
+ return sysfs_emit(page, "%s\n", dev->sess->sessname);
}
static struct kobj_attribute rnbd_clt_session_attr =
@@ -499,8 +496,8 @@ static ssize_t rnbd_clt_map_device_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *page)
{
- return scnprintf(page, PAGE_SIZE,
- "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
+ return sysfs_emit(page,
+ "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
attr->attr.name);
}
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index e9cc413495f0..bd4a41afbbfc 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -271,7 +271,7 @@ unlock:
*/
if (cpu_q)
*cpup = cpu_q->cpu;
- put_cpu_var(sess->cpu_rr);
+ put_cpu_ptr(sess->cpu_rr);
if (q)
rnbd_clt_dev_requeue(q);
diff --git a/drivers/block/rnbd/rnbd-srv-sysfs.c b/drivers/block/rnbd/rnbd-srv-sysfs.c
index acf5fced11ef..4db98e0e76f0 100644
--- a/drivers/block/rnbd/rnbd-srv-sysfs.c
+++ b/drivers/block/rnbd/rnbd-srv-sysfs.c
@@ -90,8 +90,8 @@ static ssize_t read_only_show(struct kobject *kobj, struct kobj_attribute *attr,
sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj);
- return scnprintf(page, PAGE_SIZE, "%d\n",
- !(sess_dev->open_flags & FMODE_WRITE));
+ return sysfs_emit(page, "%d\n",
+ !(sess_dev->open_flags & FMODE_WRITE));
}
static struct kobj_attribute rnbd_srv_dev_session_ro_attr =
@@ -105,8 +105,8 @@ static ssize_t access_mode_show(struct kobject *kobj,
sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj);
- return scnprintf(page, PAGE_SIZE, "%s\n",
- rnbd_access_mode_str(sess_dev->access_mode));
+ return sysfs_emit(page, "%s\n",
+ rnbd_access_mode_str(sess_dev->access_mode));
}
static struct kobj_attribute rnbd_srv_dev_session_access_mode_attr =
@@ -119,7 +119,7 @@ static ssize_t mapping_path_show(struct kobject *kobj,
sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj);
- return scnprintf(page, PAGE_SIZE, "%s\n", sess_dev->pathname);
+ return sysfs_emit(page, "%s\n", sess_dev->pathname);
}
static struct kobj_attribute rnbd_srv_dev_session_mapping_path_attr =
@@ -128,8 +128,8 @@ static struct kobj_attribute rnbd_srv_dev_session_mapping_path_attr =
static ssize_t rnbd_srv_dev_session_force_close_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n",
- attr->attr.name);
+ return sysfs_emit(page, "Usage: echo 1 > %s\n",
+ attr->attr.name);
}
static ssize_t rnbd_srv_dev_session_force_close_store(struct kobject *kobj,
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 7b54353ee92b..420cd952ddc4 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -1373,7 +1373,7 @@ static void carm_free_disk(struct carm_host *host, unsigned int port_no)
if (!disk)
return;
- if (disk->flags & GENHD_FL_UP)
+ if (host->state > HST_DEV_ACTIVATE)
del_gendisk(disk);
blk_cleanup_disk(disk);
}
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4b49df2dfd23..57c6ae7debd9 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -166,11 +166,8 @@ static inline void virtblk_request_done(struct request *req)
{
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
- if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
- kfree(page_address(req->special_vec.bv_page) +
- req->special_vec.bv_offset);
- }
-
+ if (req->rq_flags & RQF_SPECIAL_PAYLOAD)
+ kfree(bvec_virt(&req->special_vec));
blk_mq_end_request(req, virtblk_result(vbr));
}
@@ -692,6 +689,28 @@ static const struct blk_mq_ops virtio_mq_ops = {
static unsigned int virtblk_queue_depth;
module_param_named(queue_depth, virtblk_queue_depth, uint, 0444);
+static int virtblk_validate(struct virtio_device *vdev)
+{
+ u32 blk_size;
+
+ if (!vdev->config->get) {
+ dev_err(&vdev->dev, "%s failure: config access disabled\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ if (!virtio_has_feature(vdev, VIRTIO_BLK_F_BLK_SIZE))
+ return 0;
+
+ blk_size = virtio_cread32(vdev,
+ offsetof(struct virtio_blk_config, blk_size));
+
+ if (blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE)
+ __virtio_clear_bit(vdev, VIRTIO_BLK_F_BLK_SIZE);
+
+ return 0;
+}
+
static int virtblk_probe(struct virtio_device *vdev)
{
struct virtio_blk *vblk;
@@ -703,12 +722,6 @@ static int virtblk_probe(struct virtio_device *vdev)
u8 physical_block_exp, alignment_offset;
unsigned int queue_depth;
- if (!vdev->config->get) {
- dev_err(&vdev->dev, "%s failure: config access disabled\n",
- __func__);
- return -EINVAL;
- }
-
err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
GFP_KERNEL);
if (err < 0)
@@ -823,6 +836,14 @@ static int virtblk_probe(struct virtio_device *vdev)
else
blk_size = queue_logical_block_size(q);
+ if (unlikely(blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE)) {
+ dev_err(&vdev->dev,
+ "block size is changed unexpectedly, now is %u\n",
+ blk_size);
+ err = -EINVAL;
+ goto out_cleanup_disk;
+ }
+
/* Use topology information if available */
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
struct virtio_blk_config, physical_block_exp,
@@ -878,9 +899,14 @@ static int virtblk_probe(struct virtio_device *vdev)
virtblk_update_capacity(vblk, false);
virtio_device_ready(vdev);
- device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups);
+ err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups);
+ if (err)
+ goto out_cleanup_disk;
+
return 0;
+out_cleanup_disk:
+ blk_cleanup_disk(vblk->disk);
out_free_tags:
blk_mq_free_tag_set(&vblk->tag_set);
out_free_vq:
@@ -983,6 +1009,7 @@ static struct virtio_driver virtio_blk = {
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,
+ .validate = virtblk_validate,
.probe = virtblk_probe,
.remove = virtblk_remove,
.config_changed = virtblk_config_changed,
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index d83fee21f6c5..715bfa8aca7f 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1092,7 +1092,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
err = xlbd_reserve_minors(minor, nr_minors);
if (err)
return err;
- err = -ENODEV;
memset(&info->tag_set, 0, sizeof(info->tag_set));
info->tag_set.ops = &blkfront_mq_ops;
diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index e5d706ed55ea..e4182acee488 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -387,6 +387,7 @@ struct bcm_subver_table {
};
static const struct bcm_subver_table bcm_uart_subver_table[] = {
+ { 0x1111, "BCM4362A2" }, /* 000.017.017 */
{ 0x4103, "BCM4330B1" }, /* 002.001.003 */
{ 0x410d, "BCM4334B0" }, /* 002.001.013 */
{ 0x410e, "BCM43341B0" }, /* 002.001.014 */
diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index e44b6993cf91..f1705b46fc88 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -131,6 +131,26 @@ int btintel_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
}
EXPORT_SYMBOL_GPL(btintel_set_bdaddr);
+static int btintel_set_event_mask(struct hci_dev *hdev, bool debug)
+{
+ u8 mask[8] = { 0x87, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+ struct sk_buff *skb;
+ int err;
+
+ if (debug)
+ mask[1] |= 0x62;
+
+ skb = __hci_cmd_sync(hdev, 0xfc52, 8, mask, HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ bt_dev_err(hdev, "Setting Intel event mask failed (%d)", err);
+ return err;
+ }
+ kfree_skb(skb);
+
+ return 0;
+}
+
int btintel_set_diag(struct hci_dev *hdev, bool enable)
{
struct sk_buff *skb;
@@ -164,7 +184,7 @@ done:
}
EXPORT_SYMBOL_GPL(btintel_set_diag);
-int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable)
+static int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable)
{
int err, ret;
@@ -180,9 +200,25 @@ int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable)
return ret;
}
-EXPORT_SYMBOL_GPL(btintel_set_diag_mfg);
-void btintel_hw_error(struct hci_dev *hdev, u8 code)
+static int btintel_set_diag_combined(struct hci_dev *hdev, bool enable)
+{
+ int ret;
+
+ /* Legacy ROM device needs to be in the manufacturer mode to apply
+ * diagnostic setting
+ *
+ * This flag is set after reading the Intel version.
+ */
+ if (btintel_test_flag(hdev, INTEL_ROM_LEGACY))
+ ret = btintel_set_diag_mfg(hdev, enable);
+ else
+ ret = btintel_set_diag(hdev, enable);
+
+ return ret;
+}
+
+static void btintel_hw_error(struct hci_dev *hdev, u8 code)
{
struct sk_buff *skb;
u8 type = 0x00;
@@ -214,7 +250,6 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code)
kfree_skb(skb);
}
-EXPORT_SYMBOL_GPL(btintel_hw_error);
int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver)
{
@@ -236,6 +271,8 @@ int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver)
* compatibility options when newer hardware variants come along.
*/
switch (ver->hw_variant) {
+ case 0x07: /* WP - Legacy ROM */
+ case 0x08: /* StP - Legacy ROM */
case 0x0b: /* SfP */
case 0x0c: /* WsP */
case 0x11: /* JfP */
@@ -250,9 +287,15 @@ int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver)
}
switch (ver->fw_variant) {
+ case 0x01:
+ variant = "Legacy ROM 2.5";
+ break;
case 0x06:
variant = "Bootloader";
break;
+ case 0x22:
+ variant = "Legacy ROM 2.x";
+ break;
case 0x23:
variant = "Firmware";
break;
@@ -270,8 +313,8 @@ int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver)
}
EXPORT_SYMBOL_GPL(btintel_version_info);
-int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type, u32 plen,
- const void *param)
+static int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type, u32 plen,
+ const void *param)
{
while (plen > 0) {
struct sk_buff *skb;
@@ -293,7 +336,6 @@ int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type, u32 plen,
return 0;
}
-EXPORT_SYMBOL_GPL(btintel_secure_send);
int btintel_load_ddc_config(struct hci_dev *hdev, const char *ddc_name)
{
@@ -340,27 +382,6 @@ int btintel_load_ddc_config(struct hci_dev *hdev, const char *ddc_name)
}
EXPORT_SYMBOL_GPL(btintel_load_ddc_config);
-int btintel_set_event_mask(struct hci_dev *hdev, bool debug)
-{
- u8 mask[8] = { 0x87, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
- struct sk_buff *skb;
- int err;
-
- if (debug)
- mask[1] |= 0x62;
-
- skb = __hci_cmd_sync(hdev, 0xfc52, 8, mask, HCI_INIT_TIMEOUT);
- if (IS_ERR(skb)) {
- err = PTR_ERR(skb);
- bt_dev_err(hdev, "Setting Intel event mask failed (%d)", err);
- return err;
- }
- kfree_skb(skb);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(btintel_set_event_mask);
-
int btintel_set_event_mask_mfg(struct hci_dev *hdev, bool debug)
{
int err, ret;
@@ -404,7 +425,8 @@ int btintel_read_version(struct hci_dev *hdev, struct intel_version *ver)
}
EXPORT_SYMBOL_GPL(btintel_read_version);
-int btintel_version_info_tlv(struct hci_dev *hdev, struct intel_version_tlv *version)
+static int btintel_version_info_tlv(struct hci_dev *hdev,
+ struct intel_version_tlv *version)
{
const char *variant;
@@ -481,30 +503,11 @@ int btintel_version_info_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver
return 0;
}
-EXPORT_SYMBOL_GPL(btintel_version_info_tlv);
-int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *version)
+static int btintel_parse_version_tlv(struct hci_dev *hdev,
+ struct intel_version_tlv *version,
+ struct sk_buff *skb)
{
- struct sk_buff *skb;
- const u8 param[1] = { 0xFF };
-
- if (!version)
- return -EINVAL;
-
- skb = __hci_cmd_sync(hdev, 0xfc05, 1, param, HCI_CMD_TIMEOUT);
- if (IS_ERR(skb)) {
- bt_dev_err(hdev, "Reading Intel version information failed (%ld)",
- PTR_ERR(skb));
- return PTR_ERR(skb);
- }
-
- if (skb->data[0]) {
- bt_dev_err(hdev, "Intel Read Version command failed (%02x)",
- skb->data[0]);
- kfree_skb(skb);
- return -EIO;
- }
-
/* Consume Command Complete Status field */
skb_pull(skb, 1);
@@ -516,7 +519,16 @@ int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver
while (skb->len) {
struct intel_tlv *tlv;
+ /* Make sure skb has a minimum length of the header */
+ if (skb->len < sizeof(*tlv))
+ return -EINVAL;
+
tlv = (struct intel_tlv *)skb->data;
+
+ /* Make sure skb has a enough data */
+ if (skb->len < tlv->len + sizeof(*tlv))
+ return -EINVAL;
+
switch (tlv->type) {
case INTEL_TLV_CNVI_TOP:
version->cnvi_top = get_unaligned_le32(tlv->val);
@@ -580,7 +592,8 @@ int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver
version->sbe_type = tlv->val[0];
break;
case INTEL_TLV_OTP_BDADDR:
- memcpy(&version->otp_bd_addr, tlv->val, tlv->len);
+ memcpy(&version->otp_bd_addr, tlv->val,
+ sizeof(bdaddr_t));
break;
default:
/* Ignore rest of information */
@@ -590,10 +603,37 @@ int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver
skb_pull(skb, tlv->len + sizeof(*tlv));
}
+ return 0;
+}
+
+static int btintel_read_version_tlv(struct hci_dev *hdev,
+ struct intel_version_tlv *version)
+{
+ struct sk_buff *skb;
+ const u8 param[1] = { 0xFF };
+
+ if (!version)
+ return -EINVAL;
+
+ skb = __hci_cmd_sync(hdev, 0xfc05, 1, param, HCI_CMD_TIMEOUT);
+ if (IS_ERR(skb)) {
+ bt_dev_err(hdev, "Reading Intel version information failed (%ld)",
+ PTR_ERR(skb));
+ return PTR_ERR(skb);
+ }
+
+ if (skb->data[0]) {
+ bt_dev_err(hdev, "Intel Read Version command failed (%02x)",
+ skb->data[0]);
+ kfree_skb(skb);
+ return -EIO;
+ }
+
+ btintel_parse_version_tlv(hdev, version, skb);
+
kfree_skb(skb);
return 0;
}
-EXPORT_SYMBOL_GPL(btintel_read_version_tlv);
/* ------- REGMAP IBT SUPPORT ------- */
@@ -1066,10 +1106,10 @@ int btintel_download_firmware(struct hci_dev *hdev,
}
EXPORT_SYMBOL_GPL(btintel_download_firmware);
-int btintel_download_firmware_newgen(struct hci_dev *hdev,
- struct intel_version_tlv *ver,
- const struct firmware *fw, u32 *boot_param,
- u8 hw_variant, u8 sbe_type)
+static int btintel_download_fw_tlv(struct hci_dev *hdev,
+ struct intel_version_tlv *ver,
+ const struct firmware *fw, u32 *boot_param,
+ u8 hw_variant, u8 sbe_type)
{
int err;
u32 css_header_ver;
@@ -1166,9 +1206,8 @@ int btintel_download_firmware_newgen(struct hci_dev *hdev,
}
return 0;
}
-EXPORT_SYMBOL_GPL(btintel_download_firmware_newgen);
-void btintel_reset_to_bootloader(struct hci_dev *hdev)
+static void btintel_reset_to_bootloader(struct hci_dev *hdev)
{
struct intel_reset params;
struct sk_buff *skb;
@@ -1211,10 +1250,9 @@ void btintel_reset_to_bootloader(struct hci_dev *hdev)
*/
msleep(150);
}
-EXPORT_SYMBOL_GPL(btintel_reset_to_bootloader);
-int btintel_read_debug_features(struct hci_dev *hdev,
- struct intel_debug_features *features)
+static int btintel_read_debug_features(struct hci_dev *hdev,
+ struct intel_debug_features *features)
{
struct sk_buff *skb;
u8 page_no = 1;
@@ -1243,9 +1281,8 @@ int btintel_read_debug_features(struct hci_dev *hdev,
kfree_skb(skb);
return 0;
}
-EXPORT_SYMBOL_GPL(btintel_read_debug_features);
-int btintel_set_debug_features(struct hci_dev *hdev,
+static int btintel_set_debug_features(struct hci_dev *hdev,
const struct intel_debug_features *features)
{
u8 mask[11] = { 0x0a, 0x92, 0x02, 0x07, 0x00, 0x00, 0x00, 0x00,
@@ -1270,7 +1307,1154 @@ int btintel_set_debug_features(struct hci_dev *hdev,
kfree_skb(skb);
return 0;
}
-EXPORT_SYMBOL_GPL(btintel_set_debug_features);
+
+static const struct firmware *btintel_legacy_rom_get_fw(struct hci_dev *hdev,
+ struct intel_version *ver)
+{
+ const struct firmware *fw;
+ char fwname[64];
+ int ret;
+
+ snprintf(fwname, sizeof(fwname),
+ "intel/ibt-hw-%x.%x.%x-fw-%x.%x.%x.%x.%x.bseq",
+ ver->hw_platform, ver->hw_variant, ver->hw_revision,
+ ver->fw_variant, ver->fw_revision, ver->fw_build_num,
+ ver->fw_build_ww, ver->fw_build_yy);
+
+ ret = request_firmware(&fw, fwname, &hdev->dev);
+ if (ret < 0) {
+ if (ret == -EINVAL) {
+ bt_dev_err(hdev, "Intel firmware file request failed (%d)",
+ ret);
+ return NULL;
+ }
+
+ bt_dev_err(hdev, "failed to open Intel firmware file: %s (%d)",
+ fwname, ret);
+
+ /* If the correct firmware patch file is not found, use the
+ * default firmware patch file instead
+ */
+ snprintf(fwname, sizeof(fwname), "intel/ibt-hw-%x.%x.bseq",
+ ver->hw_platform, ver->hw_variant);
+ if (request_firmware(&fw, fwname, &hdev->dev) < 0) {
+ bt_dev_err(hdev, "failed to open default fw file: %s",
+ fwname);
+ return NULL;
+ }
+ }
+
+ bt_dev_info(hdev, "Intel Bluetooth firmware file: %s", fwname);
+
+ return fw;
+}
+
+static int btintel_legacy_rom_patching(struct hci_dev *hdev,
+ const struct firmware *fw,
+ const u8 **fw_ptr, int *disable_patch)
+{
+ struct sk_buff *skb;
+ struct hci_command_hdr *cmd;
+ const u8 *cmd_param;
+ struct hci_event_hdr *evt = NULL;
+ const u8 *evt_param = NULL;
+ int remain = fw->size - (*fw_ptr - fw->data);
+
+ /* The first byte indicates the types of the patch command or event.
+ * 0x01 means HCI command and 0x02 is HCI event. If the first bytes
+ * in the current firmware buffer doesn't start with 0x01 or
+ * the size of remain buffer is smaller than HCI command header,
+ * the firmware file is corrupted and it should stop the patching
+ * process.
+ */
+ if (remain > HCI_COMMAND_HDR_SIZE && *fw_ptr[0] != 0x01) {
+ bt_dev_err(hdev, "Intel fw corrupted: invalid cmd read");
+ return -EINVAL;
+ }
+ (*fw_ptr)++;
+ remain--;
+
+ cmd = (struct hci_command_hdr *)(*fw_ptr);
+ *fw_ptr += sizeof(*cmd);
+ remain -= sizeof(*cmd);
+
+ /* Ensure that the remain firmware data is long enough than the length
+ * of command parameter. If not, the firmware file is corrupted.
+ */
+ if (remain < cmd->plen) {
+ bt_dev_err(hdev, "Intel fw corrupted: invalid cmd len");
+ return -EFAULT;
+ }
+
+ /* If there is a command that loads a patch in the firmware
+ * file, then enable the patch upon success, otherwise just
+ * disable the manufacturer mode, for example patch activation
+ * is not required when the default firmware patch file is used
+ * because there are no patch data to load.
+ */
+ if (*disable_patch && le16_to_cpu(cmd->opcode) == 0xfc8e)
+ *disable_patch = 0;
+
+ cmd_param = *fw_ptr;
+ *fw_ptr += cmd->plen;
+ remain -= cmd->plen;
+
+ /* This reads the expected events when the above command is sent to the
+ * device. Some vendor commands expects more than one events, for
+ * example command status event followed by vendor specific event.
+ * For this case, it only keeps the last expected event. so the command
+ * can be sent with __hci_cmd_sync_ev() which returns the sk_buff of
+ * last expected event.
+ */
+ while (remain > HCI_EVENT_HDR_SIZE && *fw_ptr[0] == 0x02) {
+ (*fw_ptr)++;
+ remain--;
+
+ evt = (struct hci_event_hdr *)(*fw_ptr);
+ *fw_ptr += sizeof(*evt);
+ remain -= sizeof(*evt);
+
+ if (remain < evt->plen) {
+ bt_dev_err(hdev, "Intel fw corrupted: invalid evt len");
+ return -EFAULT;
+ }
+
+ evt_param = *fw_ptr;
+ *fw_ptr += evt->plen;
+ remain -= evt->plen;
+ }
+
+ /* Every HCI commands in the firmware file has its correspond event.
+ * If event is not found or remain is smaller than zero, the firmware
+ * file is corrupted.
+ */
+ if (!evt || !evt_param || remain < 0) {
+ bt_dev_err(hdev, "Intel fw corrupted: invalid evt read");
+ return -EFAULT;
+ }
+
+ skb = __hci_cmd_sync_ev(hdev, le16_to_cpu(cmd->opcode), cmd->plen,
+ cmd_param, evt->evt, HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ bt_dev_err(hdev, "sending Intel patch command (0x%4.4x) failed (%ld)",
+ cmd->opcode, PTR_ERR(skb));
+ return PTR_ERR(skb);
+ }
+
+ /* It ensures that the returned event matches the event data read from
+ * the firmware file. At fist, it checks the length and then
+ * the contents of the event.
+ */
+ if (skb->len != evt->plen) {
+ bt_dev_err(hdev, "mismatch event length (opcode 0x%4.4x)",
+ le16_to_cpu(cmd->opcode));
+ kfree_skb(skb);
+ return -EFAULT;
+ }
+
+ if (memcmp(skb->data, evt_param, evt->plen)) {
+ bt_dev_err(hdev, "mismatch event parameter (opcode 0x%4.4x)",
+ le16_to_cpu(cmd->opcode));
+ kfree_skb(skb);
+ return -EFAULT;
+ }
+ kfree_skb(skb);
+
+ return 0;
+}
+
+static int btintel_legacy_rom_setup(struct hci_dev *hdev,
+ struct intel_version *ver)
+{
+ const struct firmware *fw;
+ const u8 *fw_ptr;
+ int disable_patch, err;
+ struct intel_version new_ver;
+
+ BT_DBG("%s", hdev->name);
+
+ /* fw_patch_num indicates the version of patch the device currently
+ * have. If there is no patch data in the device, it is always 0x00.
+ * So, if it is other than 0x00, no need to patch the device again.
+ */
+ if (ver->fw_patch_num) {
+ bt_dev_info(hdev,
+ "Intel device is already patched. patch num: %02x",
+ ver->fw_patch_num);
+ goto complete;
+ }
+
+ /* Opens the firmware patch file based on the firmware version read
+ * from the controller. If it fails to open the matching firmware
+ * patch file, it tries to open the default firmware patch file.
+ * If no patch file is found, allow the device to operate without
+ * a patch.
+ */
+ fw = btintel_legacy_rom_get_fw(hdev, ver);
+ if (!fw)
+ goto complete;
+ fw_ptr = fw->data;
+
+ /* Enable the manufacturer mode of the controller.
+ * Only while this mode is enabled, the driver can download the
+ * firmware patch data and configuration parameters.
+ */
+ err = btintel_enter_mfg(hdev);
+ if (err) {
+ release_firmware(fw);
+ return err;
+ }
+
+ disable_patch = 1;
+
+ /* The firmware data file consists of list of Intel specific HCI
+ * commands and its expected events. The first byte indicates the
+ * type of the message, either HCI command or HCI event.
+ *
+ * It reads the command and its expected event from the firmware file,
+ * and send to the controller. Once __hci_cmd_sync_ev() returns,
+ * the returned event is compared with the event read from the firmware
+ * file and it will continue until all the messages are downloaded to
+ * the controller.
+ *
+ * Once the firmware patching is completed successfully,
+ * the manufacturer mode is disabled with reset and activating the
+ * downloaded patch.
+ *
+ * If the firmware patching fails, the manufacturer mode is
+ * disabled with reset and deactivating the patch.
+ *
+ * If the default patch file is used, no reset is done when disabling
+ * the manufacturer.
+ */
+ while (fw->size > fw_ptr - fw->data) {
+ int ret;
+
+ ret = btintel_legacy_rom_patching(hdev, fw, &fw_ptr,
+ &disable_patch);
+ if (ret < 0)
+ goto exit_mfg_deactivate;
+ }
+
+ release_firmware(fw);
+
+ if (disable_patch)
+ goto exit_mfg_disable;
+
+ /* Patching completed successfully and disable the manufacturer mode
+ * with reset and activate the downloaded firmware patches.
+ */
+ err = btintel_exit_mfg(hdev, true, true);
+ if (err)
+ return err;
+
+ /* Need build number for downloaded fw patches in
+ * every power-on boot
+ */
+ err = btintel_read_version(hdev, &new_ver);
+ if (err)
+ return err;
+
+ bt_dev_info(hdev, "Intel BT fw patch 0x%02x completed & activated",
+ new_ver.fw_patch_num);
+
+ goto complete;
+
+exit_mfg_disable:
+ /* Disable the manufacturer mode without reset */
+ err = btintel_exit_mfg(hdev, false, false);
+ if (err)
+ return err;
+
+ bt_dev_info(hdev, "Intel firmware patch completed");
+
+ goto complete;
+
+exit_mfg_deactivate:
+ release_firmware(fw);
+
+ /* Patching failed. Disable the manufacturer mode with reset and
+ * deactivate the downloaded firmware patches.
+ */
+ err = btintel_exit_mfg(hdev, true, false);
+ if (err)
+ return err;
+
+ bt_dev_info(hdev, "Intel firmware patch completed and deactivated");
+
+complete:
+ /* Set the event mask for Intel specific vendor events. This enables
+ * a few extra events that are useful during general operation.
+ */
+ btintel_set_event_mask_mfg(hdev, false);
+
+ btintel_check_bdaddr(hdev);
+
+ return 0;
+}
+
+static int btintel_download_wait(struct hci_dev *hdev, ktime_t calltime, int msec)
+{
+ ktime_t delta, rettime;
+ unsigned long long duration;
+ int err;
+
+ btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED);
+
+ bt_dev_info(hdev, "Waiting for firmware download to complete");
+
+ err = btintel_wait_on_flag_timeout(hdev, INTEL_DOWNLOADING,
+ TASK_INTERRUPTIBLE,
+ msecs_to_jiffies(msec));
+ if (err == -EINTR) {
+ bt_dev_err(hdev, "Firmware loading interrupted");
+ return err;
+ }
+
+ if (err) {
+ bt_dev_err(hdev, "Firmware loading timeout");
+ return -ETIMEDOUT;
+ }
+
+ if (btintel_test_flag(hdev, INTEL_FIRMWARE_FAILED)) {
+ bt_dev_err(hdev, "Firmware loading failed");
+ return -ENOEXEC;
+ }
+
+ rettime = ktime_get();
+ delta = ktime_sub(rettime, calltime);
+ duration = (unsigned long long)ktime_to_ns(delta) >> 10;
+
+ bt_dev_info(hdev, "Firmware loaded in %llu usecs", duration);
+
+ return 0;
+}
+
+static int btintel_boot_wait(struct hci_dev *hdev, ktime_t calltime, int msec)
+{
+ ktime_t delta, rettime;
+ unsigned long long duration;
+ int err;
+
+ bt_dev_info(hdev, "Waiting for device to boot");
+
+ err = btintel_wait_on_flag_timeout(hdev, INTEL_BOOTING,
+ TASK_INTERRUPTIBLE,
+ msecs_to_jiffies(msec));
+ if (err == -EINTR) {
+ bt_dev_err(hdev, "Device boot interrupted");
+ return -EINTR;
+ }
+
+ if (err) {
+ bt_dev_err(hdev, "Device boot timeout");
+ return -ETIMEDOUT;
+ }
+
+ rettime = ktime_get();
+ delta = ktime_sub(rettime, calltime);
+ duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+
+ bt_dev_info(hdev, "Device booted in %llu usecs", duration);
+
+ return 0;
+}
+
+static int btintel_boot(struct hci_dev *hdev, u32 boot_addr)
+{
+ ktime_t calltime;
+ int err;
+
+ calltime = ktime_get();
+
+ btintel_set_flag(hdev, INTEL_BOOTING);
+
+ err = btintel_send_intel_reset(hdev, boot_addr);
+ if (err) {
+ bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err);
+ btintel_reset_to_bootloader(hdev);
+ return err;
+ }
+
+ /* The bootloader will not indicate when the device is ready. This
+ * is done by the operational firmware sending bootup notification.
+ *
+ * Booting into operational firmware should not take longer than
+ * 1 second. However if that happens, then just fail the setup
+ * since something went wrong.
+ */
+ err = btintel_boot_wait(hdev, calltime, 1000);
+ if (err == -ETIMEDOUT)
+ btintel_reset_to_bootloader(hdev);
+
+ return err;
+}
+
+static int btintel_get_fw_name(struct intel_version *ver,
+ struct intel_boot_params *params,
+ char *fw_name, size_t len,
+ const char *suffix)
+{
+ switch (ver->hw_variant) {
+ case 0x0b: /* SfP */
+ case 0x0c: /* WsP */
+ snprintf(fw_name, len, "intel/ibt-%u-%u.%s",
+ le16_to_cpu(ver->hw_variant),
+ le16_to_cpu(params->dev_revid),
+ suffix);
+ break;
+ case 0x11: /* JfP */
+ case 0x12: /* ThP */
+ case 0x13: /* HrP */
+ case 0x14: /* CcP */
+ snprintf(fw_name, len, "intel/ibt-%u-%u-%u.%s",
+ le16_to_cpu(ver->hw_variant),
+ le16_to_cpu(ver->hw_revision),
+ le16_to_cpu(ver->fw_revision),
+ suffix);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int btintel_download_fw(struct hci_dev *hdev,
+ struct intel_version *ver,
+ struct intel_boot_params *params,
+ u32 *boot_param)
+{
+ const struct firmware *fw;
+ char fwname[64];
+ int err;
+ ktime_t calltime;
+
+ if (!ver || !params)
+ return -EINVAL;
+
+ /* The firmware variant determines if the device is in bootloader
+ * mode or is running operational firmware. The value 0x06 identifies
+ * the bootloader and the value 0x23 identifies the operational
+ * firmware.
+ *
+ * When the operational firmware is already present, then only
+ * the check for valid Bluetooth device address is needed. This
+ * determines if the device will be added as configured or
+ * unconfigured controller.
+ *
+ * It is not possible to use the Secure Boot Parameters in this
+ * case since that command is only available in bootloader mode.
+ */
+ if (ver->fw_variant == 0x23) {
+ btintel_clear_flag(hdev, INTEL_BOOTLOADER);
+ btintel_check_bdaddr(hdev);
+
+ /* SfP and WsP don't seem to update the firmware version on file
+ * so version checking is currently possible.
+ */
+ switch (ver->hw_variant) {
+ case 0x0b: /* SfP */
+ case 0x0c: /* WsP */
+ return 0;
+ }
+
+ /* Proceed to download to check if the version matches */
+ goto download;
+ }
+
+ /* Read the secure boot parameters to identify the operating
+ * details of the bootloader.
+ */
+ err = btintel_read_boot_params(hdev, params);
+ if (err)
+ return err;
+
+ /* It is required that every single firmware fragment is acknowledged
+ * with a command complete event. If the boot parameters indicate
+ * that this bootloader does not send them, then abort the setup.
+ */
+ if (params->limited_cce != 0x00) {
+ bt_dev_err(hdev, "Unsupported Intel firmware loading method (%u)",
+ params->limited_cce);
+ return -EINVAL;
+ }
+
+ /* If the OTP has no valid Bluetooth device address, then there will
+ * also be no valid address for the operational firmware.
+ */
+ if (!bacmp(&params->otp_bdaddr, BDADDR_ANY)) {
+ bt_dev_info(hdev, "No device address configured");
+ set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
+ }
+
+download:
+ /* With this Intel bootloader only the hardware variant and device
+ * revision information are used to select the right firmware for SfP
+ * and WsP.
+ *
+ * The firmware filename is ibt-<hw_variant>-<dev_revid>.sfi.
+ *
+ * Currently the supported hardware variants are:
+ * 11 (0x0b) for iBT3.0 (LnP/SfP)
+ * 12 (0x0c) for iBT3.5 (WsP)
+ *
+ * For ThP/JfP and for future SKU's, the FW name varies based on HW
+ * variant, HW revision and FW revision, as these are dependent on CNVi
+ * and RF Combination.
+ *
+ * 17 (0x11) for iBT3.5 (JfP)
+ * 18 (0x12) for iBT3.5 (ThP)
+ *
+ * The firmware file name for these will be
+ * ibt-<hw_variant>-<hw_revision>-<fw_revision>.sfi.
+ *
+ */
+ err = btintel_get_fw_name(ver, params, fwname, sizeof(fwname), "sfi");
+ if (err < 0) {
+ if (!btintel_test_flag(hdev, INTEL_BOOTLOADER)) {
+ /* Firmware has already been loaded */
+ btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED);
+ return 0;
+ }
+
+ bt_dev_err(hdev, "Unsupported Intel firmware naming");
+ return -EINVAL;
+ }
+
+ err = firmware_request_nowarn(&fw, fwname, &hdev->dev);
+ if (err < 0) {
+ if (!btintel_test_flag(hdev, INTEL_BOOTLOADER)) {
+ /* Firmware has already been loaded */
+ btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED);
+ return 0;
+ }
+
+ bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)",
+ fwname, err);
+ return err;
+ }
+
+ bt_dev_info(hdev, "Found device firmware: %s", fwname);
+
+ if (fw->size < 644) {
+ bt_dev_err(hdev, "Invalid size of firmware file (%zu)",
+ fw->size);
+ err = -EBADF;
+ goto done;
+ }
+
+ calltime = ktime_get();
+
+ btintel_set_flag(hdev, INTEL_DOWNLOADING);
+
+ /* Start firmware downloading and get boot parameter */
+ err = btintel_download_firmware(hdev, ver, fw, boot_param);
+ if (err < 0) {
+ if (err == -EALREADY) {
+ /* Firmware has already been loaded */
+ btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED);
+ err = 0;
+ goto done;
+ }
+
+ /* When FW download fails, send Intel Reset to retry
+ * FW download.
+ */
+ btintel_reset_to_bootloader(hdev);
+ goto done;
+ }
+
+ /* Before switching the device into operational mode and with that
+ * booting the loaded firmware, wait for the bootloader notification
+ * that all fragments have been successfully received.
+ *
+ * When the event processing receives the notification, then the
+ * INTEL_DOWNLOADING flag will be cleared.
+ *
+ * The firmware loading should not take longer than 5 seconds
+ * and thus just timeout if that happens and fail the setup
+ * of this device.
+ */
+ err = btintel_download_wait(hdev, calltime, 5000);
+ if (err == -ETIMEDOUT)
+ btintel_reset_to_bootloader(hdev);
+
+done:
+ release_firmware(fw);
+ return err;
+}
+
+static int btintel_bootloader_setup(struct hci_dev *hdev,
+ struct intel_version *ver)
+{
+ struct intel_version new_ver;
+ struct intel_boot_params params;
+ u32 boot_param;
+ char ddcname[64];
+ int err;
+ struct intel_debug_features features;
+
+ BT_DBG("%s", hdev->name);
+
+ /* Set the default boot parameter to 0x0 and it is updated to
+ * SKU specific boot parameter after reading Intel_Write_Boot_Params
+ * command while downloading the firmware.
+ */
+ boot_param = 0x00000000;
+
+ btintel_set_flag(hdev, INTEL_BOOTLOADER);
+
+ err = btintel_download_fw(hdev, ver, &params, &boot_param);
+ if (err)
+ return err;
+
+ /* controller is already having an operational firmware */
+ if (ver->fw_variant == 0x23)
+ goto finish;
+
+ err = btintel_boot(hdev, boot_param);
+ if (err)
+ return err;
+
+ btintel_clear_flag(hdev, INTEL_BOOTLOADER);
+
+ err = btintel_get_fw_name(ver, &params, ddcname,
+ sizeof(ddcname), "ddc");
+
+ if (err < 0) {
+ bt_dev_err(hdev, "Unsupported Intel firmware naming");
+ } else {
+ /* Once the device is running in operational mode, it needs to
+ * apply the device configuration (DDC) parameters.
+ *
+ * The device can work without DDC parameters, so even if it
+ * fails to load the file, no need to fail the setup.
+ */
+ btintel_load_ddc_config(hdev, ddcname);
+ }
+
+ /* Read the Intel supported features and if new exception formats
+ * supported, need to load the additional DDC config to enable.
+ */
+ err = btintel_read_debug_features(hdev, &features);
+ if (!err) {
+ /* Set DDC mask for available debug features */
+ btintel_set_debug_features(hdev, &features);
+ }
+
+ /* Read the Intel version information after loading the FW */
+ err = btintel_read_version(hdev, &new_ver);
+ if (err)
+ return err;
+
+ btintel_version_info(hdev, &new_ver);
+
+finish:
+ /* Set the event mask for Intel specific vendor events. This enables
+ * a few extra events that are useful during general operation. It
+ * does not enable any debugging related events.
+ *
+ * The device will function correctly without these events enabled
+ * and thus no need to fail the setup.
+ */
+ btintel_set_event_mask(hdev, false);
+
+ return 0;
+}
+
+static void btintel_get_fw_name_tlv(const struct intel_version_tlv *ver,
+ char *fw_name, size_t len,
+ const char *suffix)
+{
+ /* The firmware file name for new generation controllers will be
+ * ibt-<cnvi_top type+cnvi_top step>-<cnvr_top type+cnvr_top step>
+ */
+ snprintf(fw_name, len, "intel/ibt-%04x-%04x.%s",
+ INTEL_CNVX_TOP_PACK_SWAB(INTEL_CNVX_TOP_TYPE(ver->cnvi_top),
+ INTEL_CNVX_TOP_STEP(ver->cnvi_top)),
+ INTEL_CNVX_TOP_PACK_SWAB(INTEL_CNVX_TOP_TYPE(ver->cnvr_top),
+ INTEL_CNVX_TOP_STEP(ver->cnvr_top)),
+ suffix);
+}
+
+static int btintel_prepare_fw_download_tlv(struct hci_dev *hdev,
+ struct intel_version_tlv *ver,
+ u32 *boot_param)
+{
+ const struct firmware *fw;
+ char fwname[64];
+ int err;
+ ktime_t calltime;
+
+ if (!ver || !boot_param)
+ return -EINVAL;
+
+ /* The firmware variant determines if the device is in bootloader
+ * mode or is running operational firmware. The value 0x03 identifies
+ * the bootloader and the value 0x23 identifies the operational
+ * firmware.
+ *
+ * When the operational firmware is already present, then only
+ * the check for valid Bluetooth device address is needed. This
+ * determines if the device will be added as configured or
+ * unconfigured controller.
+ *
+ * It is not possible to use the Secure Boot Parameters in this
+ * case since that command is only available in bootloader mode.
+ */
+ if (ver->img_type == 0x03) {
+ btintel_clear_flag(hdev, INTEL_BOOTLOADER);
+ btintel_check_bdaddr(hdev);
+ }
+
+ /* If the OTP has no valid Bluetooth device address, then there will
+ * also be no valid address for the operational firmware.
+ */
+ if (!bacmp(&ver->otp_bd_addr, BDADDR_ANY)) {
+ bt_dev_info(hdev, "No device address configured");
+ set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
+ }
+
+ btintel_get_fw_name_tlv(ver, fwname, sizeof(fwname), "sfi");
+ err = firmware_request_nowarn(&fw, fwname, &hdev->dev);
+ if (err < 0) {
+ if (!btintel_test_flag(hdev, INTEL_BOOTLOADER)) {
+ /* Firmware has already been loaded */
+ btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED);
+ return 0;
+ }
+
+ bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)",
+ fwname, err);
+
+ return err;
+ }
+
+ bt_dev_info(hdev, "Found device firmware: %s", fwname);
+
+ if (fw->size < 644) {
+ bt_dev_err(hdev, "Invalid size of firmware file (%zu)",
+ fw->size);
+ err = -EBADF;
+ goto done;
+ }
+
+ calltime = ktime_get();
+
+ btintel_set_flag(hdev, INTEL_DOWNLOADING);
+
+ /* Start firmware downloading and get boot parameter */
+ err = btintel_download_fw_tlv(hdev, ver, fw, boot_param,
+ INTEL_HW_VARIANT(ver->cnvi_bt),
+ ver->sbe_type);
+ if (err < 0) {
+ if (err == -EALREADY) {
+ /* Firmware has already been loaded */
+ btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED);
+ err = 0;
+ goto done;
+ }
+
+ /* When FW download fails, send Intel Reset to retry
+ * FW download.
+ */
+ btintel_reset_to_bootloader(hdev);
+ goto done;
+ }
+
+ /* Before switching the device into operational mode and with that
+ * booting the loaded firmware, wait for the bootloader notification
+ * that all fragments have been successfully received.
+ *
+ * When the event processing receives the notification, then the
+ * BTUSB_DOWNLOADING flag will be cleared.
+ *
+ * The firmware loading should not take longer than 5 seconds
+ * and thus just timeout if that happens and fail the setup
+ * of this device.
+ */
+ err = btintel_download_wait(hdev, calltime, 5000);
+ if (err == -ETIMEDOUT)
+ btintel_reset_to_bootloader(hdev);
+
+done:
+ release_firmware(fw);
+ return err;
+}
+
+static int btintel_bootloader_setup_tlv(struct hci_dev *hdev,
+ struct intel_version_tlv *ver)
+{
+ u32 boot_param;
+ char ddcname[64];
+ int err;
+ struct intel_debug_features features;
+ struct intel_version_tlv new_ver;
+
+ bt_dev_dbg(hdev, "");
+
+ /* Set the default boot parameter to 0x0 and it is updated to
+ * SKU specific boot parameter after reading Intel_Write_Boot_Params
+ * command while downloading the firmware.
+ */
+ boot_param = 0x00000000;
+
+ btintel_set_flag(hdev, INTEL_BOOTLOADER);
+
+ err = btintel_prepare_fw_download_tlv(hdev, ver, &boot_param);
+ if (err)
+ return err;
+
+ /* check if controller is already having an operational firmware */
+ if (ver->img_type == 0x03)
+ goto finish;
+
+ err = btintel_boot(hdev, boot_param);
+ if (err)
+ return err;
+
+ btintel_clear_flag(hdev, INTEL_BOOTLOADER);
+
+ btintel_get_fw_name_tlv(ver, ddcname, sizeof(ddcname), "ddc");
+ /* Once the device is running in operational mode, it needs to
+ * apply the device configuration (DDC) parameters.
+ *
+ * The device can work without DDC parameters, so even if it
+ * fails to load the file, no need to fail the setup.
+ */
+ btintel_load_ddc_config(hdev, ddcname);
+
+ /* Read the Intel supported features and if new exception formats
+ * supported, need to load the additional DDC config to enable.
+ */
+ err = btintel_read_debug_features(hdev, &features);
+ if (!err) {
+ /* Set DDC mask for available debug features */
+ btintel_set_debug_features(hdev, &features);
+ }
+
+ /* Read the Intel version information after loading the FW */
+ err = btintel_read_version_tlv(hdev, &new_ver);
+ if (err)
+ return err;
+
+ btintel_version_info_tlv(hdev, &new_ver);
+
+finish:
+ /* Set the event mask for Intel specific vendor events. This enables
+ * a few extra events that are useful during general operation. It
+ * does not enable any debugging related events.
+ *
+ * The device will function correctly without these events enabled
+ * and thus no need to fail the setup.
+ */
+ btintel_set_event_mask(hdev, false);
+
+ return 0;
+}
+
+static void btintel_set_msft_opcode(struct hci_dev *hdev, u8 hw_variant)
+{
+ switch (hw_variant) {
+ /* Legacy bootloader devices that supports MSFT Extension */
+ case 0x11: /* JfP */
+ case 0x12: /* ThP */
+ case 0x13: /* HrP */
+ case 0x14: /* CcP */
+ /* All Intel new genration controllers support the Microsoft vendor
+ * extension are using 0xFC1E for VsMsftOpCode.
+ */
+ case 0x17:
+ case 0x18:
+ case 0x19:
+ hci_set_msft_opcode(hdev, 0xFC1E);
+ break;
+ default:
+ /* Not supported */
+ break;
+ }
+}
+
+static int btintel_setup_combined(struct hci_dev *hdev)
+{
+ const u8 param[1] = { 0xFF };
+ struct intel_version ver;
+ struct intel_version_tlv ver_tlv;
+ struct sk_buff *skb;
+ int err;
+
+ BT_DBG("%s", hdev->name);
+
+ /* The some controllers have a bug with the first HCI command sent to it
+ * returning number of completed commands as zero. This would stall the
+ * command processing in the Bluetooth core.
+ *
+ * As a workaround, send HCI Reset command first which will reset the
+ * number of completed commands and allow normal command processing
+ * from now on.
+ */
+ if (btintel_test_flag(hdev, INTEL_BROKEN_INITIAL_NCMD)) {
+ skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL,
+ HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ bt_dev_err(hdev,
+ "sending initial HCI reset failed (%ld)",
+ PTR_ERR(skb));
+ return PTR_ERR(skb);
+ }
+ kfree_skb(skb);
+ }
+
+ /* Starting from TyP device, the command parameter and response are
+ * changed even though the OCF for HCI_Intel_Read_Version command
+ * remains same. The legacy devices can handle even if the
+ * command has a parameter and returns a correct version information.
+ * So, it uses new format to support both legacy and new format.
+ */
+ skb = __hci_cmd_sync(hdev, 0xfc05, 1, param, HCI_CMD_TIMEOUT);
+ if (IS_ERR(skb)) {
+ bt_dev_err(hdev, "Reading Intel version command failed (%ld)",
+ PTR_ERR(skb));
+ return PTR_ERR(skb);
+ }
+
+ /* Check the status */
+ if (skb->data[0]) {
+ bt_dev_err(hdev, "Intel Read Version command failed (%02x)",
+ skb->data[0]);
+ err = -EIO;
+ goto exit_error;
+ }
+
+ /* Apply the common HCI quirks for Intel device */
+ set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
+ set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+ set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks);
+
+ /* For Legacy device, check the HW platform value and size */
+ if (skb->len == sizeof(ver) && skb->data[1] == 0x37) {
+ bt_dev_dbg(hdev, "Read the legacy Intel version information");
+
+ memcpy(&ver, skb->data, sizeof(ver));
+
+ /* Display version information */
+ btintel_version_info(hdev, &ver);
+
+ /* Check for supported iBT hardware variants of this firmware
+ * loading method.
+ *
+ * This check has been put in place to ensure correct forward
+ * compatibility options when newer hardware variants come
+ * along.
+ */
+ switch (ver.hw_variant) {
+ case 0x07: /* WP */
+ case 0x08: /* StP */
+ /* Legacy ROM product */
+ btintel_set_flag(hdev, INTEL_ROM_LEGACY);
+
+ /* Apply the device specific HCI quirks
+ *
+ * WBS for SdP - SdP and Stp have a same hw_varaint but
+ * different fw_variant
+ */
+ if (ver.hw_variant == 0x08 && ver.fw_variant == 0x22)
+ set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
+ &hdev->quirks);
+
+ /* These devices have an issue with LED which doesn't
+ * go off immediately during shutdown. Set the flag
+ * here to send the LED OFF command during shutdown.
+ */
+ btintel_set_flag(hdev, INTEL_BROKEN_LED);
+
+ err = btintel_legacy_rom_setup(hdev, &ver);
+ break;
+ case 0x0b: /* SfP */
+ case 0x0c: /* WsP */
+ case 0x11: /* JfP */
+ case 0x12: /* ThP */
+ case 0x13: /* HrP */
+ case 0x14: /* CcP */
+ /* Apply the device specific HCI quirks
+ *
+ * All Legacy bootloader devices support WBS
+ */
+ set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
+ &hdev->quirks);
+
+ /* Valid LE States quirk for JfP/ThP familiy */
+ if (ver.hw_variant == 0x11 || ver.hw_variant == 0x12)
+ set_bit(HCI_QUIRK_VALID_LE_STATES,
+ &hdev->quirks);
+
+ /* Setup MSFT Extension support */
+ btintel_set_msft_opcode(hdev, ver.hw_variant);
+
+ err = btintel_bootloader_setup(hdev, &ver);
+ break;
+ default:
+ bt_dev_err(hdev, "Unsupported Intel hw variant (%u)",
+ ver.hw_variant);
+ err = -EINVAL;
+ }
+
+ goto exit_error;
+ }
+
+ /* For TLV type device, parse the tlv data */
+ err = btintel_parse_version_tlv(hdev, &ver_tlv, skb);
+ if (err) {
+ bt_dev_err(hdev, "Failed to parse TLV version information");
+ goto exit_error;
+ }
+
+ if (INTEL_HW_PLATFORM(ver_tlv.cnvi_bt) != 0x37) {
+ bt_dev_err(hdev, "Unsupported Intel hardware platform (0x%2x)",
+ INTEL_HW_PLATFORM(ver_tlv.cnvi_bt));
+ err = -EINVAL;
+ goto exit_error;
+ }
+
+ /* Check for supported iBT hardware variants of this firmware
+ * loading method.
+ *
+ * This check has been put in place to ensure correct forward
+ * compatibility options when newer hardware variants come
+ * along.
+ */
+ switch (INTEL_HW_VARIANT(ver_tlv.cnvi_bt)) {
+ case 0x11: /* JfP */
+ case 0x12: /* ThP */
+ case 0x13: /* HrP */
+ case 0x14: /* CcP */
+ /* Some legacy bootloader devices from JfP supports both old
+ * and TLV based HCI_Intel_Read_Version command. But we don't
+ * want to use the TLV based setup routines for those legacy
+ * bootloader device.
+ *
+ * Also, it is not easy to convert TLV based version from the
+ * legacy version format.
+ *
+ * So, as a workaround for those devices, use the legacy
+ * HCI_Intel_Read_Version to get the version information and
+ * run the legacy bootloader setup.
+ */
+ err = btintel_read_version(hdev, &ver);
+ if (err)
+ return err;
+ err = btintel_bootloader_setup(hdev, &ver);
+ break;
+ case 0x17:
+ case 0x18:
+ case 0x19:
+ /* Display version information of TLV type */
+ btintel_version_info_tlv(hdev, &ver_tlv);
+
+ /* Apply the device specific HCI quirks for TLV based devices
+ *
+ * All TLV based devices support WBS
+ */
+ set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
+
+ /* Valid LE States quirk for GfP */
+ if (INTEL_HW_VARIANT(ver_tlv.cnvi_bt) == 0x18)
+ set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks);
+
+ /* Setup MSFT Extension support */
+ btintel_set_msft_opcode(hdev,
+ INTEL_HW_VARIANT(ver_tlv.cnvi_bt));
+
+ err = btintel_bootloader_setup_tlv(hdev, &ver_tlv);
+ break;
+ default:
+ bt_dev_err(hdev, "Unsupported Intel hw variant (%u)",
+ INTEL_HW_VARIANT(ver_tlv.cnvi_bt));
+ return -EINVAL;
+ }
+
+exit_error:
+ kfree_skb(skb);
+
+ return err;
+}
+
+static int btintel_shutdown_combined(struct hci_dev *hdev)
+{
+ struct sk_buff *skb;
+ int ret;
+
+ /* Send HCI Reset to the controller to stop any BT activity which
+ * were triggered. This will help to save power and maintain the
+ * sync b/w Host and controller
+ */
+ skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ bt_dev_err(hdev, "HCI reset during shutdown failed");
+ return PTR_ERR(skb);
+ }
+ kfree_skb(skb);
+
+
+ /* Some platforms have an issue with BT LED when the interface is
+ * down or BT radio is turned off, which takes 5 seconds to BT LED
+ * goes off. This command turns off the BT LED immediately.
+ */
+ if (btintel_test_flag(hdev, INTEL_BROKEN_LED)) {
+ skb = __hci_cmd_sync(hdev, 0xfc3f, 0, NULL, HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ ret = PTR_ERR(skb);
+ bt_dev_err(hdev, "turning off Intel device LED failed");
+ return ret;
+ }
+ kfree_skb(skb);
+ }
+
+ return 0;
+}
+
+int btintel_configure_setup(struct hci_dev *hdev)
+{
+ hdev->manufacturer = 2;
+ hdev->setup = btintel_setup_combined;
+ hdev->shutdown = btintel_shutdown_combined;
+ hdev->hw_error = btintel_hw_error;
+ hdev->set_diag = btintel_set_diag_combined;
+ hdev->set_bdaddr = btintel_set_bdaddr;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(btintel_configure_setup);
+
+void btintel_bootup(struct hci_dev *hdev, const void *ptr, unsigned int len)
+{
+ const struct intel_bootup *evt = ptr;
+
+ if (len != sizeof(*evt))
+ return;
+
+ if (btintel_test_and_clear_flag(hdev, INTEL_BOOTING))
+ btintel_wake_up_flag(hdev, INTEL_BOOTING);
+}
+EXPORT_SYMBOL_GPL(btintel_bootup);
+
+void btintel_secure_send_result(struct hci_dev *hdev,
+ const void *ptr, unsigned int len)
+{
+ const struct intel_secure_send_result *evt = ptr;
+
+ if (len != sizeof(*evt))
+ return;
+
+ if (evt->result)
+ btintel_set_flag(hdev, INTEL_FIRMWARE_FAILED);
+
+ if (btintel_test_and_clear_flag(hdev, INTEL_DOWNLOADING) &&
+ btintel_test_flag(hdev, INTEL_FIRMWARE_LOADED))
+ btintel_wake_up_flag(hdev, INTEL_DOWNLOADING);
+}
+EXPORT_SYMBOL_GPL(btintel_secure_send_result);
MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
MODULE_DESCRIPTION("Bluetooth support for Intel devices ver " VERSION);
diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h
index d184064a5e7c..aa64072bbe68 100644
--- a/drivers/bluetooth/btintel.h
+++ b/drivers/bluetooth/btintel.h
@@ -138,6 +138,49 @@ struct intel_debug_features {
#define INTEL_CNVX_TOP_STEP(cnvx_top) (((cnvx_top) & 0x0f000000) >> 24)
#define INTEL_CNVX_TOP_PACK_SWAB(t, s) __swab16(((__u16)(((t) << 4) | (s))))
+enum {
+ INTEL_BOOTLOADER,
+ INTEL_DOWNLOADING,
+ INTEL_FIRMWARE_LOADED,
+ INTEL_FIRMWARE_FAILED,
+ INTEL_BOOTING,
+ INTEL_BROKEN_INITIAL_NCMD,
+ INTEL_BROKEN_LED,
+ INTEL_ROM_LEGACY,
+
+ __INTEL_NUM_FLAGS,
+};
+
+struct btintel_data {
+ DECLARE_BITMAP(flags, __INTEL_NUM_FLAGS);
+};
+
+#define btintel_set_flag(hdev, nr) \
+ do { \
+ struct btintel_data *intel = hci_get_priv((hdev)); \
+ set_bit((nr), intel->flags); \
+ } while (0)
+
+#define btintel_clear_flag(hdev, nr) \
+ do { \
+ struct btintel_data *intel = hci_get_priv((hdev)); \
+ clear_bit((nr), intel->flags); \
+ } while (0)
+
+#define btintel_wake_up_flag(hdev, nr) \
+ do { \
+ struct btintel_data *intel = hci_get_priv((hdev)); \
+ wake_up_bit(intel->flags, (nr)); \
+ } while (0)
+
+#define btintel_get_flag(hdev) \
+ (((struct btintel_data *)hci_get_priv(hdev))->flags)
+
+#define btintel_test_flag(hdev, nr) test_bit((nr), btintel_get_flag(hdev))
+#define btintel_test_and_clear_flag(hdev, nr) test_and_clear_bit((nr), btintel_get_flag(hdev))
+#define btintel_wait_on_flag_timeout(hdev, nr, m, to) \
+ wait_on_bit_timeout(btintel_get_flag(hdev), (nr), m, to)
+
#if IS_ENABLED(CONFIG_BT_INTEL)
int btintel_check_bdaddr(struct hci_dev *hdev);
@@ -145,19 +188,11 @@ int btintel_enter_mfg(struct hci_dev *hdev);
int btintel_exit_mfg(struct hci_dev *hdev, bool reset, bool patched);
int btintel_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr);
int btintel_set_diag(struct hci_dev *hdev, bool enable);
-int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable);
-void btintel_hw_error(struct hci_dev *hdev, u8 code);
int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver);
-int btintel_version_info_tlv(struct hci_dev *hdev, struct intel_version_tlv *version);
-int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type, u32 plen,
- const void *param);
int btintel_load_ddc_config(struct hci_dev *hdev, const char *ddc_name);
-int btintel_set_event_mask(struct hci_dev *hdev, bool debug);
int btintel_set_event_mask_mfg(struct hci_dev *hdev, bool debug);
int btintel_read_version(struct hci_dev *hdev, struct intel_version *ver);
-int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver);
-
struct regmap *btintel_regmap_init(struct hci_dev *hdev, u16 opcode_read,
u16 opcode_write);
int btintel_send_intel_reset(struct hci_dev *hdev, u32 boot_param);
@@ -165,16 +200,10 @@ int btintel_read_boot_params(struct hci_dev *hdev,
struct intel_boot_params *params);
int btintel_download_firmware(struct hci_dev *dev, struct intel_version *ver,
const struct firmware *fw, u32 *boot_param);
-int btintel_download_firmware_newgen(struct hci_dev *hdev,
- struct intel_version_tlv *ver,
- const struct firmware *fw,
- u32 *boot_param, u8 hw_variant,
- u8 sbe_type);
-void btintel_reset_to_bootloader(struct hci_dev *hdev);
-int btintel_read_debug_features(struct hci_dev *hdev,
- struct intel_debug_features *features);
-int btintel_set_debug_features(struct hci_dev *hdev,
- const struct intel_debug_features *features);
+int btintel_configure_setup(struct hci_dev *hdev);
+void btintel_bootup(struct hci_dev *hdev, const void *ptr, unsigned int len);
+void btintel_secure_send_result(struct hci_dev *hdev,
+ const void *ptr, unsigned int len);
#else
static inline int btintel_check_bdaddr(struct hci_dev *hdev)
@@ -202,44 +231,18 @@ static inline int btintel_set_diag(struct hci_dev *hdev, bool enable)
return -EOPNOTSUPP;
}
-static inline int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable)
-{
- return -EOPNOTSUPP;
-}
-
-static inline void btintel_hw_error(struct hci_dev *hdev, u8 code)
-{
-}
-
static inline int btintel_version_info(struct hci_dev *hdev,
struct intel_version *ver)
{
return -EOPNOTSUPP;
}
-static inline int btintel_version_info_tlv(struct hci_dev *hdev,
- struct intel_version_tlv *version)
-{
- return -EOPNOTSUPP;
-}
-
-static inline int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type,
- u32 plen, const void *param)
-{
- return -EOPNOTSUPP;
-}
-
static inline int btintel_load_ddc_config(struct hci_dev *hdev,
const char *ddc_name)
{
return -EOPNOTSUPP;
}
-static inline int btintel_set_event_mask(struct hci_dev *hdev, bool debug)
-{
- return -EOPNOTSUPP;
-}
-
static inline int btintel_set_event_mask_mfg(struct hci_dev *hdev, bool debug)
{
return -EOPNOTSUPP;
@@ -251,12 +254,6 @@ static inline int btintel_read_version(struct hci_dev *hdev,
return -EOPNOTSUPP;
}
-static inline int btintel_read_version_tlv(struct hci_dev *hdev,
- struct intel_version_tlv *ver)
-{
- return -EOPNOTSUPP;
-}
-
static inline struct regmap *btintel_regmap_init(struct hci_dev *hdev,
u16 opcode_read,
u16 opcode_write)
@@ -283,28 +280,18 @@ static inline int btintel_download_firmware(struct hci_dev *dev,
return -EOPNOTSUPP;
}
-static inline int btintel_download_firmware_newgen(struct hci_dev *hdev,
- const struct firmware *fw,
- u32 *boot_param,
- u8 hw_variant, u8 sbe_type)
-{
- return -EOPNOTSUPP;
-}
-
-static inline void btintel_reset_to_bootloader(struct hci_dev *hdev)
+static inline int btintel_configure_setup(struct hci_dev *hdev)
{
+ return -ENODEV;
}
-static inline int btintel_read_debug_features(struct hci_dev *hdev,
- struct intel_debug_features *features)
+static inline void btintel_bootup(struct hci_dev *hdev,
+ const void *ptr, unsigned int len)
{
- return -EOPNOTSUPP;
}
-static inline int btintel_set_debug_features(struct hci_dev *hdev,
- const struct intel_debug_features *features)
+static inline void btintel_secure_send_result(struct hci_dev *hdev,
+ const void *ptr, unsigned int len)
{
- return -EOPNOTSUPP;
}
-
#endif
diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c
index cddd350beba3..68378b42ea7f 100644
--- a/drivers/bluetooth/btmrvl_sdio.c
+++ b/drivers/bluetooth/btmrvl_sdio.c
@@ -1350,6 +1350,7 @@ static void btmrvl_sdio_coredump(struct device *dev)
u8 *dbg_ptr, *end_ptr, *fw_dump_data, *fw_dump_ptr;
u8 dump_num = 0, idx, i, read_reg, doneflag = 0;
u32 memory_size, fw_dump_len = 0;
+ int size = 0;
card = sdio_get_drvdata(func);
priv = card->priv;
@@ -1478,7 +1479,7 @@ done:
if (fw_dump_len == 0)
return;
- fw_dump_data = vzalloc(fw_dump_len+1);
+ fw_dump_data = vzalloc(fw_dump_len + 1);
if (!fw_dump_data) {
BT_ERR("Vzalloc fw_dump_data fail!");
return;
@@ -1493,20 +1494,18 @@ done:
struct memory_type_mapping *entry = &mem_type_mapping_tbl[idx];
if (entry->mem_ptr) {
- strcpy(fw_dump_ptr, "========Start dump ");
- fw_dump_ptr += strlen("========Start dump ");
-
- strcpy(fw_dump_ptr, entry->mem_name);
- fw_dump_ptr += strlen(entry->mem_name);
-
- strcpy(fw_dump_ptr, "========\n");
- fw_dump_ptr += strlen("========\n");
-
- memcpy(fw_dump_ptr, entry->mem_ptr, entry->mem_size);
- fw_dump_ptr += entry->mem_size;
-
- strcpy(fw_dump_ptr, "\n========End dump========\n");
- fw_dump_ptr += strlen("\n========End dump========\n");
+ size += scnprintf(fw_dump_ptr + size,
+ fw_dump_len + 1 - size,
+ "========Start dump %s========\n",
+ entry->mem_name);
+
+ memcpy(fw_dump_ptr + size, entry->mem_ptr,
+ entry->mem_size);
+ size += entry->mem_size;
+
+ size += scnprintf(fw_dump_ptr + size,
+ fw_dump_len + 1 - size,
+ "\n========End dump========\n");
vfree(mem_type_mapping_tbl[idx].mem_ptr);
mem_type_mapping_tbl[idx].mem_ptr = NULL;
diff --git a/drivers/bluetooth/btrsi.c b/drivers/bluetooth/btrsi.c
index bea1595f6432..8646b6dd11e9 100644
--- a/drivers/bluetooth/btrsi.c
+++ b/drivers/bluetooth/btrsi.c
@@ -1,4 +1,4 @@
-/**
+/*
* Copyright (c) 2017 Redpine Signals Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c
index cce0125ec4fd..1f8afa0244d8 100644
--- a/drivers/bluetooth/btrtl.c
+++ b/drivers/bluetooth/btrtl.c
@@ -681,11 +681,15 @@ out_free:
}
}
- /* RTL8822CE supports the Microsoft vendor extension and uses 0xFCF0
- * for VsMsftOpCode.
+ /* The following chips supports the Microsoft vendor extension,
+ * therefore set the corresponding VsMsftOpCode.
*/
- if (lmp_subver == RTL_ROM_LMP_8822B)
+ switch (lmp_subver) {
+ case RTL_ROM_LMP_8822B:
+ case RTL_ROM_LMP_8852A:
hci_set_msft_opcode(hdev, 0xFCF0);
+ break;
+ }
return btrtl_dev;
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index a9855a2dd561..60d2fce59a71 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -43,12 +43,11 @@ static struct usb_driver btusb_driver;
#define BTUSB_BROKEN_ISOC 0x20
#define BTUSB_WRONG_SCO_MTU 0x40
#define BTUSB_ATH3012 0x80
-#define BTUSB_INTEL 0x100
+#define BTUSB_INTEL_COMBINED 0x100
#define BTUSB_INTEL_BOOT 0x200
#define BTUSB_BCM_PATCHRAM 0x400
#define BTUSB_MARVELL 0x800
#define BTUSB_SWAVE 0x1000
-#define BTUSB_INTEL_NEW 0x2000
#define BTUSB_AMP 0x4000
#define BTUSB_QCA_ROME 0x8000
#define BTUSB_BCM_APPLE 0x10000
@@ -60,7 +59,7 @@ static struct usb_driver btusb_driver;
#define BTUSB_WIDEBAND_SPEECH 0x400000
#define BTUSB_VALID_LE_STATES 0x800000
#define BTUSB_QCA_WCN6855 0x1000000
-#define BTUSB_INTEL_NEWGEN 0x2000000
+#define BTUSB_INTEL_BROKEN_INITIAL_NCMD 0x4000000
static const struct usb_device_id btusb_table[] = {
/* Generic Bluetooth USB device */
@@ -119,9 +118,6 @@ static const struct usb_device_id btusb_table[] = {
/* Canyon CN-BTU1 with HID interfaces */
{ USB_DEVICE(0x0c10, 0x0000) },
- /* Broadcom BCM20702A0 */
- { USB_DEVICE(0x413c, 0x8197) },
-
/* Broadcom BCM20702B0 (Dynex/Insignia) */
{ USB_DEVICE(0x19ff, 0x0239), .driver_info = BTUSB_BCM_PATCHRAM },
@@ -297,7 +293,8 @@ static const struct usb_device_id blacklist_table[] = {
/* QCA WCN6855 chipset */
{ USB_DEVICE(0x0cf3, 0xe600), .driver_info = BTUSB_QCA_WCN6855 |
- BTUSB_WIDEBAND_SPEECH },
+ BTUSB_WIDEBAND_SPEECH |
+ BTUSB_VALID_LE_STATES },
/* Broadcom BCM2035 */
{ USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 },
@@ -361,27 +358,18 @@ static const struct usb_device_id blacklist_table[] = {
{ USB_DEVICE(0x1286, 0x204e), .driver_info = BTUSB_MARVELL },
/* Intel Bluetooth devices */
- { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW |
- BTUSB_WIDEBAND_SPEECH |
- BTUSB_VALID_LE_STATES },
- { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW |
- BTUSB_WIDEBAND_SPEECH },
- { USB_DEVICE(0x8087, 0x0029), .driver_info = BTUSB_INTEL_NEW |
- BTUSB_WIDEBAND_SPEECH },
- { USB_DEVICE(0x8087, 0x0032), .driver_info = BTUSB_INTEL_NEWGEN |
- BTUSB_WIDEBAND_SPEECH},
- { USB_DEVICE(0x8087, 0x0033), .driver_info = BTUSB_INTEL_NEWGEN |
- BTUSB_WIDEBAND_SPEECH},
+ { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_COMBINED },
+ { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_COMBINED },
+ { USB_DEVICE(0x8087, 0x0029), .driver_info = BTUSB_INTEL_COMBINED },
+ { USB_DEVICE(0x8087, 0x0032), .driver_info = BTUSB_INTEL_COMBINED },
+ { USB_DEVICE(0x8087, 0x0033), .driver_info = BTUSB_INTEL_COMBINED },
{ USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR },
- { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL },
- { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL },
- { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_NEW |
- BTUSB_WIDEBAND_SPEECH },
- { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL |
- BTUSB_WIDEBAND_SPEECH },
- { USB_DEVICE(0x8087, 0x0aaa), .driver_info = BTUSB_INTEL_NEW |
- BTUSB_WIDEBAND_SPEECH |
- BTUSB_VALID_LE_STATES },
+ { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL_COMBINED |
+ BTUSB_INTEL_BROKEN_INITIAL_NCMD },
+ { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL_COMBINED },
+ { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_COMBINED },
+ { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL_COMBINED },
+ { USB_DEVICE(0x8087, 0x0aaa), .driver_info = BTUSB_INTEL_COMBINED },
/* Other Intel Bluetooth devices */
{ USB_VENDOR_AND_INTERFACE_INFO(0x8087, 0xe0, 0x01, 0x01),
@@ -410,10 +398,21 @@ static const struct usb_device_id blacklist_table[] = {
/* Additional MediaTek MT7615E Bluetooth devices */
{ USB_DEVICE(0x13d3, 0x3560), .driver_info = BTUSB_MEDIATEK},
+ /* Additional MediaTek MT7668 Bluetooth devices */
+ { USB_DEVICE(0x043e, 0x3109), .driver_info = BTUSB_MEDIATEK |
+ BTUSB_WIDEBAND_SPEECH |
+ BTUSB_VALID_LE_STATES },
+
/* Additional MediaTek MT7921 Bluetooth devices */
{ USB_DEVICE(0x04ca, 0x3802), .driver_info = BTUSB_MEDIATEK |
BTUSB_WIDEBAND_SPEECH |
BTUSB_VALID_LE_STATES },
+ { USB_DEVICE(0x13d3, 0x3563), .driver_info = BTUSB_MEDIATEK |
+ BTUSB_WIDEBAND_SPEECH |
+ BTUSB_VALID_LE_STATES },
+ { USB_DEVICE(0x0489, 0xe0cd), .driver_info = BTUSB_MEDIATEK |
+ BTUSB_WIDEBAND_SPEECH |
+ BTUSB_VALID_LE_STATES },
/* Additional Realtek 8723AE Bluetooth devices */
{ USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK },
@@ -452,6 +451,10 @@ static const struct usb_device_id blacklist_table[] = {
/* Additional Realtek 8822CE Bluetooth devices */
{ USB_DEVICE(0x04ca, 0x4005), .driver_info = BTUSB_REALTEK |
BTUSB_WIDEBAND_SPEECH },
+ /* Bluetooth component of Realtek 8852AE device */
+ { USB_DEVICE(0x04ca, 0x4006), .driver_info = BTUSB_REALTEK |
+ BTUSB_WIDEBAND_SPEECH },
+
{ USB_DEVICE(0x04c5, 0x161f), .driver_info = BTUSB_REALTEK |
BTUSB_WIDEBAND_SPEECH },
{ USB_DEVICE(0x0b05, 0x18ef), .driver_info = BTUSB_REALTEK |
@@ -524,7 +527,8 @@ static const struct dmi_system_id btusb_needs_reset_resume_table[] = {
#define BTUSB_OOB_WAKE_ENABLED 11
#define BTUSB_HW_RESET_ACTIVE 12
#define BTUSB_TX_WAIT_VND_EVT 13
-#define BTUSB_WAKEUP_DISABLE 14
+#define BTUSB_WAKEUP_AUTOSUSPEND 14
+#define BTUSB_USE_ALT3_FOR_WBS 15
struct btusb_data {
struct hci_dev *hdev;
@@ -575,6 +579,7 @@ struct btusb_data {
int suspend_count;
int (*recv_event)(struct hci_dev *hdev, struct sk_buff *skb);
+ int (*recv_acl)(struct hci_dev *hdev, struct sk_buff *skb);
int (*recv_bulk)(struct btusb_data *data, void *buffer, int count);
int (*setup_on_usb)(struct hci_dev *hdev);
@@ -782,7 +787,7 @@ static int btusb_recv_bulk(struct btusb_data *data, void *buffer, int count)
if (!hci_skb_expect(skb)) {
/* Complete frame */
- hci_recv_frame(data->hdev, skb);
+ data->recv_acl(data->hdev, skb);
skb = NULL;
}
}
@@ -1345,13 +1350,6 @@ static int btusb_open(struct hci_dev *hdev)
data->intf->needs_remote_wakeup = 1;
- /* Disable device remote wakeup when host is suspended
- * For Realtek chips, global suspend without
- * SET_FEATURE (DEVICE_REMOTE_WAKEUP) can save more power in device.
- */
- if (test_bit(BTUSB_WAKEUP_DISABLE, &data->flags))
- device_wakeup_disable(&data->udev->dev);
-
if (test_and_set_bit(BTUSB_INTR_RUNNING, &data->flags))
goto done;
@@ -1418,7 +1416,7 @@ static int btusb_close(struct hci_dev *hdev)
data->intf->needs_remote_wakeup = 0;
/* Enable remote wake up for auto-suspend */
- if (test_bit(BTUSB_WAKEUP_DISABLE, &data->flags))
+ if (test_bit(BTUSB_WAKEUP_AUTOSUSPEND, &data->flags))
data->intf->needs_remote_wakeup = 1;
usb_autopm_put_interface(data->intf);
@@ -1757,16 +1755,20 @@ static void btusb_work(struct work_struct *work)
/* Bluetooth USB spec recommends alt 6 (63 bytes), but
* many adapters do not support it. Alt 1 appears to
* work for all adapters that do not have alt 6, and
- * which work with WBS at all.
+ * which work with WBS at all. Some devices prefer
+ * alt 3 (HCI payload >= 60 Bytes let air packet
+ * data satisfy 60 bytes), requiring
+ * MTU >= 3 (packets) * 25 (size) - 3 (headers) = 72
+ * see also Core spec 5, vol 4, B 2.1.1 & Table 2.1.
*/
- new_alts = btusb_find_altsetting(data, 6) ? 6 : 1;
- /* Because mSBC frames do not need to be aligned to the
- * SCO packet boundary. If support the Alt 3, use the
- * Alt 3 for HCI payload >= 60 Bytes let air packet
- * data satisfy 60 bytes.
- */
- if (new_alts == 1 && btusb_find_altsetting(data, 3))
+ if (btusb_find_altsetting(data, 6))
+ new_alts = 6;
+ else if (btusb_find_altsetting(data, 3) &&
+ hdev->sco_mtu >= 72 &&
+ test_bit(BTUSB_USE_ALT3_FOR_WBS, &data->flags))
new_alts = 3;
+ else
+ new_alts = 1;
}
if (btusb_switch_alt_setting(hdev, new_alts) < 0)
@@ -1890,7 +1892,7 @@ static int btusb_setup_csr(struct hci_dev *hdev)
is_fake = true;
if (is_fake) {
- bt_dev_warn(hdev, "CSR: Unbranded CSR clone detected; adding workarounds...");
+ bt_dev_warn(hdev, "CSR: Unbranded CSR clone detected; adding workarounds and force-suspending once...");
/* Generally these clones have big discrepancies between
* advertised features and what's actually supported.
@@ -1907,361 +1909,53 @@ static int btusb_setup_csr(struct hci_dev *hdev)
clear_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
/*
- * Special workaround for clones with a Barrot 8041a02 chip,
- * these clones are really messed-up:
- * 1. Their bulk rx endpoint will never report any data unless
- * the device was suspended at least once (yes really).
+ * Special workaround for these BT 4.0 chip clones, and potentially more:
+ *
+ * - 0x0134: a Barrot 8041a02 (HCI rev: 0x1012 sub: 0x0810)
+ * - 0x7558: IC markings FR3191AHAL 749H15143 (HCI rev/sub-version: 0x0709)
+ *
+ * These controllers are really messed-up.
+ *
+ * 1. Their bulk RX endpoint will never report any data unless
+ * the device was suspended at least once (yes, really).
* 2. They will not wakeup when autosuspended and receiving data
- * on their bulk rx endpoint from e.g. a keyboard or mouse
+ * on their bulk RX endpoint from e.g. a keyboard or mouse
* (IOW remote-wakeup support is broken for the bulk endpoint).
*
* To fix 1. enable runtime-suspend, force-suspend the
- * hci and then wake-it up by disabling runtime-suspend.
+ * HCI and then wake-it up by disabling runtime-suspend.
*
- * To fix 2. clear the hci's can_wake flag, this way the hci
+ * To fix 2. clear the HCI's can_wake flag, this way the HCI
* will still be autosuspended when it is not open.
+ *
+ * --
+ *
+ * Because these are widespread problems we prefer generic solutions; so
+ * apply this initialization quirk to every controller that gets here,
+ * it should be harmless. The alternative is to not work at all.
*/
- if (bcdDevice == 0x8891 &&
- le16_to_cpu(rp->lmp_subver) == 0x1012 &&
- le16_to_cpu(rp->hci_rev) == 0x0810 &&
- le16_to_cpu(rp->hci_ver) == BLUETOOTH_VER_4_0) {
- bt_dev_warn(hdev, "CSR: detected a fake CSR dongle using a Barrot 8041a02 chip, this chip is very buggy and may have issues");
-
- pm_runtime_allow(&data->udev->dev);
-
- ret = pm_runtime_suspend(&data->udev->dev);
- if (ret >= 0)
- msleep(200);
- else
- bt_dev_err(hdev, "Failed to suspend the device for Barrot 8041a02 receive-issue workaround");
-
- pm_runtime_forbid(&data->udev->dev);
-
- device_set_wakeup_capable(&data->udev->dev, false);
- /* Re-enable autosuspend if this was requested */
- if (enable_autosuspend)
- usb_enable_autosuspend(data->udev);
- }
- }
-
- kfree_skb(skb);
-
- return 0;
-}
-
-static const struct firmware *btusb_setup_intel_get_fw(struct hci_dev *hdev,
- struct intel_version *ver)
-{
- const struct firmware *fw;
- char fwname[64];
- int ret;
-
- snprintf(fwname, sizeof(fwname),
- "intel/ibt-hw-%x.%x.%x-fw-%x.%x.%x.%x.%x.bseq",
- ver->hw_platform, ver->hw_variant, ver->hw_revision,
- ver->fw_variant, ver->fw_revision, ver->fw_build_num,
- ver->fw_build_ww, ver->fw_build_yy);
-
- ret = request_firmware(&fw, fwname, &hdev->dev);
- if (ret < 0) {
- if (ret == -EINVAL) {
- bt_dev_err(hdev, "Intel firmware file request failed (%d)",
- ret);
- return NULL;
- }
-
- bt_dev_err(hdev, "failed to open Intel firmware file: %s (%d)",
- fwname, ret);
-
- /* If the correct firmware patch file is not found, use the
- * default firmware patch file instead
- */
- snprintf(fwname, sizeof(fwname), "intel/ibt-hw-%x.%x.bseq",
- ver->hw_platform, ver->hw_variant);
- if (request_firmware(&fw, fwname, &hdev->dev) < 0) {
- bt_dev_err(hdev, "failed to open default fw file: %s",
- fwname);
- return NULL;
- }
- }
+ pm_runtime_allow(&data->udev->dev);
- bt_dev_info(hdev, "Intel Bluetooth firmware file: %s", fwname);
-
- return fw;
-}
-
-static int btusb_setup_intel_patching(struct hci_dev *hdev,
- const struct firmware *fw,
- const u8 **fw_ptr, int *disable_patch)
-{
- struct sk_buff *skb;
- struct hci_command_hdr *cmd;
- const u8 *cmd_param;
- struct hci_event_hdr *evt = NULL;
- const u8 *evt_param = NULL;
- int remain = fw->size - (*fw_ptr - fw->data);
-
- /* The first byte indicates the types of the patch command or event.
- * 0x01 means HCI command and 0x02 is HCI event. If the first bytes
- * in the current firmware buffer doesn't start with 0x01 or
- * the size of remain buffer is smaller than HCI command header,
- * the firmware file is corrupted and it should stop the patching
- * process.
- */
- if (remain > HCI_COMMAND_HDR_SIZE && *fw_ptr[0] != 0x01) {
- bt_dev_err(hdev, "Intel fw corrupted: invalid cmd read");
- return -EINVAL;
- }
- (*fw_ptr)++;
- remain--;
-
- cmd = (struct hci_command_hdr *)(*fw_ptr);
- *fw_ptr += sizeof(*cmd);
- remain -= sizeof(*cmd);
-
- /* Ensure that the remain firmware data is long enough than the length
- * of command parameter. If not, the firmware file is corrupted.
- */
- if (remain < cmd->plen) {
- bt_dev_err(hdev, "Intel fw corrupted: invalid cmd len");
- return -EFAULT;
- }
-
- /* If there is a command that loads a patch in the firmware
- * file, then enable the patch upon success, otherwise just
- * disable the manufacturer mode, for example patch activation
- * is not required when the default firmware patch file is used
- * because there are no patch data to load.
- */
- if (*disable_patch && le16_to_cpu(cmd->opcode) == 0xfc8e)
- *disable_patch = 0;
-
- cmd_param = *fw_ptr;
- *fw_ptr += cmd->plen;
- remain -= cmd->plen;
-
- /* This reads the expected events when the above command is sent to the
- * device. Some vendor commands expects more than one events, for
- * example command status event followed by vendor specific event.
- * For this case, it only keeps the last expected event. so the command
- * can be sent with __hci_cmd_sync_ev() which returns the sk_buff of
- * last expected event.
- */
- while (remain > HCI_EVENT_HDR_SIZE && *fw_ptr[0] == 0x02) {
- (*fw_ptr)++;
- remain--;
-
- evt = (struct hci_event_hdr *)(*fw_ptr);
- *fw_ptr += sizeof(*evt);
- remain -= sizeof(*evt);
-
- if (remain < evt->plen) {
- bt_dev_err(hdev, "Intel fw corrupted: invalid evt len");
- return -EFAULT;
- }
-
- evt_param = *fw_ptr;
- *fw_ptr += evt->plen;
- remain -= evt->plen;
- }
+ ret = pm_runtime_suspend(&data->udev->dev);
+ if (ret >= 0)
+ msleep(200);
+ else
+ bt_dev_err(hdev, "CSR: Failed to suspend the device for our Barrot 8041a02 receive-issue workaround");
- /* Every HCI commands in the firmware file has its correspond event.
- * If event is not found or remain is smaller than zero, the firmware
- * file is corrupted.
- */
- if (!evt || !evt_param || remain < 0) {
- bt_dev_err(hdev, "Intel fw corrupted: invalid evt read");
- return -EFAULT;
- }
+ pm_runtime_forbid(&data->udev->dev);
- skb = __hci_cmd_sync_ev(hdev, le16_to_cpu(cmd->opcode), cmd->plen,
- cmd_param, evt->evt, HCI_INIT_TIMEOUT);
- if (IS_ERR(skb)) {
- bt_dev_err(hdev, "sending Intel patch command (0x%4.4x) failed (%ld)",
- cmd->opcode, PTR_ERR(skb));
- return PTR_ERR(skb);
- }
+ device_set_wakeup_capable(&data->udev->dev, false);
- /* It ensures that the returned event matches the event data read from
- * the firmware file. At fist, it checks the length and then
- * the contents of the event.
- */
- if (skb->len != evt->plen) {
- bt_dev_err(hdev, "mismatch event length (opcode 0x%4.4x)",
- le16_to_cpu(cmd->opcode));
- kfree_skb(skb);
- return -EFAULT;
+ /* Re-enable autosuspend if this was requested */
+ if (enable_autosuspend)
+ usb_enable_autosuspend(data->udev);
}
- if (memcmp(skb->data, evt_param, evt->plen)) {
- bt_dev_err(hdev, "mismatch event parameter (opcode 0x%4.4x)",
- le16_to_cpu(cmd->opcode));
- kfree_skb(skb);
- return -EFAULT;
- }
kfree_skb(skb);
return 0;
}
-static int btusb_setup_intel(struct hci_dev *hdev)
-{
- struct sk_buff *skb;
- const struct firmware *fw;
- const u8 *fw_ptr;
- int disable_patch, err;
- struct intel_version ver;
-
- BT_DBG("%s", hdev->name);
-
- /* The controller has a bug with the first HCI command sent to it
- * returning number of completed commands as zero. This would stall the
- * command processing in the Bluetooth core.
- *
- * As a workaround, send HCI Reset command first which will reset the
- * number of completed commands and allow normal command processing
- * from now on.
- */
- skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
- if (IS_ERR(skb)) {
- bt_dev_err(hdev, "sending initial HCI reset command failed (%ld)",
- PTR_ERR(skb));
- return PTR_ERR(skb);
- }
- kfree_skb(skb);
-
- /* Read Intel specific controller version first to allow selection of
- * which firmware file to load.
- *
- * The returned information are hardware variant and revision plus
- * firmware variant, revision and build number.
- */
- err = btintel_read_version(hdev, &ver);
- if (err)
- return err;
-
- bt_dev_info(hdev, "read Intel version: %02x%02x%02x%02x%02x%02x%02x%02x%02x",
- ver.hw_platform, ver.hw_variant, ver.hw_revision,
- ver.fw_variant, ver.fw_revision, ver.fw_build_num,
- ver.fw_build_ww, ver.fw_build_yy, ver.fw_patch_num);
-
- /* fw_patch_num indicates the version of patch the device currently
- * have. If there is no patch data in the device, it is always 0x00.
- * So, if it is other than 0x00, no need to patch the device again.
- */
- if (ver.fw_patch_num) {
- bt_dev_info(hdev, "Intel device is already patched. "
- "patch num: %02x", ver.fw_patch_num);
- goto complete;
- }
-
- /* Opens the firmware patch file based on the firmware version read
- * from the controller. If it fails to open the matching firmware
- * patch file, it tries to open the default firmware patch file.
- * If no patch file is found, allow the device to operate without
- * a patch.
- */
- fw = btusb_setup_intel_get_fw(hdev, &ver);
- if (!fw)
- goto complete;
- fw_ptr = fw->data;
-
- /* Enable the manufacturer mode of the controller.
- * Only while this mode is enabled, the driver can download the
- * firmware patch data and configuration parameters.
- */
- err = btintel_enter_mfg(hdev);
- if (err) {
- release_firmware(fw);
- return err;
- }
-
- disable_patch = 1;
-
- /* The firmware data file consists of list of Intel specific HCI
- * commands and its expected events. The first byte indicates the
- * type of the message, either HCI command or HCI event.
- *
- * It reads the command and its expected event from the firmware file,
- * and send to the controller. Once __hci_cmd_sync_ev() returns,
- * the returned event is compared with the event read from the firmware
- * file and it will continue until all the messages are downloaded to
- * the controller.
- *
- * Once the firmware patching is completed successfully,
- * the manufacturer mode is disabled with reset and activating the
- * downloaded patch.
- *
- * If the firmware patching fails, the manufacturer mode is
- * disabled with reset and deactivating the patch.
- *
- * If the default patch file is used, no reset is done when disabling
- * the manufacturer.
- */
- while (fw->size > fw_ptr - fw->data) {
- int ret;
-
- ret = btusb_setup_intel_patching(hdev, fw, &fw_ptr,
- &disable_patch);
- if (ret < 0)
- goto exit_mfg_deactivate;
- }
-
- release_firmware(fw);
-
- if (disable_patch)
- goto exit_mfg_disable;
-
- /* Patching completed successfully and disable the manufacturer mode
- * with reset and activate the downloaded firmware patches.
- */
- err = btintel_exit_mfg(hdev, true, true);
- if (err)
- return err;
-
- /* Need build number for downloaded fw patches in
- * every power-on boot
- */
- err = btintel_read_version(hdev, &ver);
- if (err)
- return err;
- bt_dev_info(hdev, "Intel BT fw patch 0x%02x completed & activated",
- ver.fw_patch_num);
-
- goto complete;
-
-exit_mfg_disable:
- /* Disable the manufacturer mode without reset */
- err = btintel_exit_mfg(hdev, false, false);
- if (err)
- return err;
-
- bt_dev_info(hdev, "Intel firmware patch completed");
-
- goto complete;
-
-exit_mfg_deactivate:
- release_firmware(fw);
-
- /* Patching failed. Disable the manufacturer mode with reset and
- * deactivate the downloaded firmware patches.
- */
- err = btintel_exit_mfg(hdev, true, false);
- if (err)
- return err;
-
- bt_dev_info(hdev, "Intel firmware patch completed and deactivated");
-
-complete:
- /* Set the event mask for Intel specific vendor events. This enables
- * a few extra events that are useful during general operation.
- */
- btintel_set_event_mask_mfg(hdev, false);
-
- btintel_check_bdaddr(hdev);
- return 0;
-}
-
static int inject_cmd_complete(struct hci_dev *hdev, __u16 opcode)
{
struct sk_buff *skb;
@@ -2290,49 +1984,21 @@ static int inject_cmd_complete(struct hci_dev *hdev, __u16 opcode)
static int btusb_recv_bulk_intel(struct btusb_data *data, void *buffer,
int count)
{
+ struct hci_dev *hdev = data->hdev;
+
/* When the device is in bootloader mode, then it can send
* events via the bulk endpoint. These events are treated the
* same way as the ones received from the interrupt endpoint.
*/
- if (test_bit(BTUSB_BOOTLOADER, &data->flags))
+ if (btintel_test_flag(hdev, INTEL_BOOTLOADER))
return btusb_recv_intr(data, buffer, count);
return btusb_recv_bulk(data, buffer, count);
}
-static void btusb_intel_bootup(struct btusb_data *data, const void *ptr,
- unsigned int len)
-{
- const struct intel_bootup *evt = ptr;
-
- if (len != sizeof(*evt))
- return;
-
- if (test_and_clear_bit(BTUSB_BOOTING, &data->flags))
- wake_up_bit(&data->flags, BTUSB_BOOTING);
-}
-
-static void btusb_intel_secure_send_result(struct btusb_data *data,
- const void *ptr, unsigned int len)
-{
- const struct intel_secure_send_result *evt = ptr;
-
- if (len != sizeof(*evt))
- return;
-
- if (evt->result)
- set_bit(BTUSB_FIRMWARE_FAILED, &data->flags);
-
- if (test_and_clear_bit(BTUSB_DOWNLOADING, &data->flags) &&
- test_bit(BTUSB_FIRMWARE_LOADED, &data->flags))
- wake_up_bit(&data->flags, BTUSB_DOWNLOADING);
-}
-
static int btusb_recv_event_intel(struct hci_dev *hdev, struct sk_buff *skb)
{
- struct btusb_data *data = hci_get_drvdata(hdev);
-
- if (test_bit(BTUSB_BOOTLOADER, &data->flags)) {
+ if (btintel_test_flag(hdev, INTEL_BOOTLOADER)) {
struct hci_event_hdr *hdr = (void *)skb->data;
if (skb->len > HCI_EVENT_HDR_SIZE && hdr->evt == 0xff &&
@@ -2346,7 +2012,7 @@ static int btusb_recv_event_intel(struct hci_dev *hdev, struct sk_buff *skb)
* the device sends a vendor specific event
* indicating that the bootup completed.
*/
- btusb_intel_bootup(data, ptr, len);
+ btintel_bootup(hdev, ptr, len);
break;
case 0x06:
/* When the firmware loading completes the
@@ -2354,7 +2020,7 @@ static int btusb_recv_event_intel(struct hci_dev *hdev, struct sk_buff *skb)
* indicating the result of the firmware
* loading.
*/
- btusb_intel_secure_send_result(data, ptr, len);
+ btintel_secure_send_result(hdev, ptr, len);
break;
}
}
@@ -2365,14 +2031,13 @@ static int btusb_recv_event_intel(struct hci_dev *hdev, struct sk_buff *skb)
static int btusb_send_frame_intel(struct hci_dev *hdev, struct sk_buff *skb)
{
- struct btusb_data *data = hci_get_drvdata(hdev);
struct urb *urb;
BT_DBG("%s", hdev->name);
switch (hci_skb_pkt_type(skb)) {
case HCI_COMMAND_PKT:
- if (test_bit(BTUSB_BOOTLOADER, &data->flags)) {
+ if (btintel_test_flag(hdev, INTEL_BOOTLOADER)) {
struct hci_command_hdr *cmd = (void *)skb->data;
__u16 opcode = le16_to_cpu(cmd->opcode);
@@ -2424,663 +2089,17 @@ static int btusb_send_frame_intel(struct hci_dev *hdev, struct sk_buff *skb)
return -EILSEQ;
}
-static int btusb_setup_intel_new_get_fw_name(struct intel_version *ver,
- struct intel_boot_params *params,
- char *fw_name, size_t len,
- const char *suffix)
-{
- switch (ver->hw_variant) {
- case 0x0b: /* SfP */
- case 0x0c: /* WsP */
- snprintf(fw_name, len, "intel/ibt-%u-%u.%s",
- le16_to_cpu(ver->hw_variant),
- le16_to_cpu(params->dev_revid),
- suffix);
- break;
- case 0x11: /* JfP */
- case 0x12: /* ThP */
- case 0x13: /* HrP */
- case 0x14: /* CcP */
- snprintf(fw_name, len, "intel/ibt-%u-%u-%u.%s",
- le16_to_cpu(ver->hw_variant),
- le16_to_cpu(ver->hw_revision),
- le16_to_cpu(ver->fw_revision),
- suffix);
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static void btusb_setup_intel_newgen_get_fw_name(const struct intel_version_tlv *ver_tlv,
- char *fw_name, size_t len,
- const char *suffix)
-{
- /* The firmware file name for new generation controllers will be
- * ibt-<cnvi_top type+cnvi_top step>-<cnvr_top type+cnvr_top step>
- */
- snprintf(fw_name, len, "intel/ibt-%04x-%04x.%s",
- INTEL_CNVX_TOP_PACK_SWAB(INTEL_CNVX_TOP_TYPE(ver_tlv->cnvi_top),
- INTEL_CNVX_TOP_STEP(ver_tlv->cnvi_top)),
- INTEL_CNVX_TOP_PACK_SWAB(INTEL_CNVX_TOP_TYPE(ver_tlv->cnvr_top),
- INTEL_CNVX_TOP_STEP(ver_tlv->cnvr_top)),
- suffix);
-}
-
-static int btusb_download_wait(struct hci_dev *hdev, ktime_t calltime, int msec)
-{
- struct btusb_data *data = hci_get_drvdata(hdev);
- ktime_t delta, rettime;
- unsigned long long duration;
- int err;
-
- set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
-
- bt_dev_info(hdev, "Waiting for firmware download to complete");
-
- err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING,
- TASK_INTERRUPTIBLE,
- msecs_to_jiffies(msec));
- if (err == -EINTR) {
- bt_dev_err(hdev, "Firmware loading interrupted");
- return err;
- }
-
- if (err) {
- bt_dev_err(hdev, "Firmware loading timeout");
- return -ETIMEDOUT;
- }
-
- if (test_bit(BTUSB_FIRMWARE_FAILED, &data->flags)) {
- bt_dev_err(hdev, "Firmware loading failed");
- return -ENOEXEC;
- }
-
- rettime = ktime_get();
- delta = ktime_sub(rettime, calltime);
- duration = (unsigned long long)ktime_to_ns(delta) >> 10;
-
- bt_dev_info(hdev, "Firmware loaded in %llu usecs", duration);
-
- return 0;
-}
-
-static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev,
- struct intel_version_tlv *ver,
- u32 *boot_param)
-{
- const struct firmware *fw;
- char fwname[64];
- int err;
- struct btusb_data *data = hci_get_drvdata(hdev);
- ktime_t calltime;
-
- if (!ver || !boot_param)
- return -EINVAL;
-
- /* The firmware variant determines if the device is in bootloader
- * mode or is running operational firmware. The value 0x03 identifies
- * the bootloader and the value 0x23 identifies the operational
- * firmware.
- *
- * When the operational firmware is already present, then only
- * the check for valid Bluetooth device address is needed. This
- * determines if the device will be added as configured or
- * unconfigured controller.
- *
- * It is not possible to use the Secure Boot Parameters in this
- * case since that command is only available in bootloader mode.
- */
- if (ver->img_type == 0x03) {
- clear_bit(BTUSB_BOOTLOADER, &data->flags);
- btintel_check_bdaddr(hdev);
- }
-
- /* If the OTP has no valid Bluetooth device address, then there will
- * also be no valid address for the operational firmware.
- */
- if (!bacmp(&ver->otp_bd_addr, BDADDR_ANY)) {
- bt_dev_info(hdev, "No device address configured");
- set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
- }
-
- btusb_setup_intel_newgen_get_fw_name(ver, fwname, sizeof(fwname), "sfi");
- err = firmware_request_nowarn(&fw, fwname, &hdev->dev);
- if (err < 0) {
- if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) {
- /* Firmware has already been loaded */
- set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
- return 0;
- }
-
- bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)",
- fwname, err);
-
- return err;
- }
-
- bt_dev_info(hdev, "Found device firmware: %s", fwname);
-
- if (fw->size < 644) {
- bt_dev_err(hdev, "Invalid size of firmware file (%zu)",
- fw->size);
- err = -EBADF;
- goto done;
- }
-
- calltime = ktime_get();
-
- set_bit(BTUSB_DOWNLOADING, &data->flags);
-
- /* Start firmware downloading and get boot parameter */
- err = btintel_download_firmware_newgen(hdev, ver, fw, boot_param,
- INTEL_HW_VARIANT(ver->cnvi_bt),
- ver->sbe_type);
- if (err < 0) {
- if (err == -EALREADY) {
- /* Firmware has already been loaded */
- set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
- err = 0;
- goto done;
- }
-
- /* When FW download fails, send Intel Reset to retry
- * FW download.
- */
- btintel_reset_to_bootloader(hdev);
- goto done;
- }
-
- /* Before switching the device into operational mode and with that
- * booting the loaded firmware, wait for the bootloader notification
- * that all fragments have been successfully received.
- *
- * When the event processing receives the notification, then the
- * BTUSB_DOWNLOADING flag will be cleared.
- *
- * The firmware loading should not take longer than 5 seconds
- * and thus just timeout if that happens and fail the setup
- * of this device.
- */
- err = btusb_download_wait(hdev, calltime, 5000);
- if (err == -ETIMEDOUT)
- btintel_reset_to_bootloader(hdev);
-
-done:
- release_firmware(fw);
- return err;
-}
-
-static int btusb_intel_download_firmware(struct hci_dev *hdev,
- struct intel_version *ver,
- struct intel_boot_params *params,
- u32 *boot_param)
-{
- const struct firmware *fw;
- char fwname[64];
- int err;
- struct btusb_data *data = hci_get_drvdata(hdev);
- ktime_t calltime;
-
- if (!ver || !params)
- return -EINVAL;
-
- /* The firmware variant determines if the device is in bootloader
- * mode or is running operational firmware. The value 0x06 identifies
- * the bootloader and the value 0x23 identifies the operational
- * firmware.
- *
- * When the operational firmware is already present, then only
- * the check for valid Bluetooth device address is needed. This
- * determines if the device will be added as configured or
- * unconfigured controller.
- *
- * It is not possible to use the Secure Boot Parameters in this
- * case since that command is only available in bootloader mode.
- */
- if (ver->fw_variant == 0x23) {
- clear_bit(BTUSB_BOOTLOADER, &data->flags);
- btintel_check_bdaddr(hdev);
-
- /* SfP and WsP don't seem to update the firmware version on file
- * so version checking is currently possible.
- */
- switch (ver->hw_variant) {
- case 0x0b: /* SfP */
- case 0x0c: /* WsP */
- return 0;
- }
-
- /* Proceed to download to check if the version matches */
- goto download;
- }
-
- /* Read the secure boot parameters to identify the operating
- * details of the bootloader.
- */
- err = btintel_read_boot_params(hdev, params);
- if (err)
- return err;
-
- /* It is required that every single firmware fragment is acknowledged
- * with a command complete event. If the boot parameters indicate
- * that this bootloader does not send them, then abort the setup.
- */
- if (params->limited_cce != 0x00) {
- bt_dev_err(hdev, "Unsupported Intel firmware loading method (%u)",
- params->limited_cce);
- return -EINVAL;
- }
-
- /* If the OTP has no valid Bluetooth device address, then there will
- * also be no valid address for the operational firmware.
- */
- if (!bacmp(&params->otp_bdaddr, BDADDR_ANY)) {
- bt_dev_info(hdev, "No device address configured");
- set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
- }
-
-download:
- /* With this Intel bootloader only the hardware variant and device
- * revision information are used to select the right firmware for SfP
- * and WsP.
- *
- * The firmware filename is ibt-<hw_variant>-<dev_revid>.sfi.
- *
- * Currently the supported hardware variants are:
- * 11 (0x0b) for iBT3.0 (LnP/SfP)
- * 12 (0x0c) for iBT3.5 (WsP)
- *
- * For ThP/JfP and for future SKU's, the FW name varies based on HW
- * variant, HW revision and FW revision, as these are dependent on CNVi
- * and RF Combination.
- *
- * 17 (0x11) for iBT3.5 (JfP)
- * 18 (0x12) for iBT3.5 (ThP)
- *
- * The firmware file name for these will be
- * ibt-<hw_variant>-<hw_revision>-<fw_revision>.sfi.
- *
- */
- err = btusb_setup_intel_new_get_fw_name(ver, params, fwname,
- sizeof(fwname), "sfi");
- if (err < 0) {
- if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) {
- /* Firmware has already been loaded */
- set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
- return 0;
- }
-
- bt_dev_err(hdev, "Unsupported Intel firmware naming");
- return -EINVAL;
- }
-
- err = firmware_request_nowarn(&fw, fwname, &hdev->dev);
- if (err < 0) {
- if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) {
- /* Firmware has already been loaded */
- set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
- return 0;
- }
-
- bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)",
- fwname, err);
- return err;
- }
-
- bt_dev_info(hdev, "Found device firmware: %s", fwname);
-
- if (fw->size < 644) {
- bt_dev_err(hdev, "Invalid size of firmware file (%zu)",
- fw->size);
- err = -EBADF;
- goto done;
- }
-
- calltime = ktime_get();
-
- set_bit(BTUSB_DOWNLOADING, &data->flags);
-
- /* Start firmware downloading and get boot parameter */
- err = btintel_download_firmware(hdev, ver, fw, boot_param);
- if (err < 0) {
- if (err == -EALREADY) {
- /* Firmware has already been loaded */
- set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
- err = 0;
- goto done;
- }
-
- /* When FW download fails, send Intel Reset to retry
- * FW download.
- */
- btintel_reset_to_bootloader(hdev);
- goto done;
- }
-
- /* Before switching the device into operational mode and with that
- * booting the loaded firmware, wait for the bootloader notification
- * that all fragments have been successfully received.
- *
- * When the event processing receives the notification, then the
- * BTUSB_DOWNLOADING flag will be cleared.
- *
- * The firmware loading should not take longer than 5 seconds
- * and thus just timeout if that happens and fail the setup
- * of this device.
- */
- err = btusb_download_wait(hdev, calltime, 5000);
- if (err == -ETIMEDOUT)
- btintel_reset_to_bootloader(hdev);
-
-done:
- release_firmware(fw);
- return err;
-}
-
-static int btusb_boot_wait(struct hci_dev *hdev, ktime_t calltime, int msec)
-{
- struct btusb_data *data = hci_get_drvdata(hdev);
- ktime_t delta, rettime;
- unsigned long long duration;
- int err;
-
- bt_dev_info(hdev, "Waiting for device to boot");
-
- err = wait_on_bit_timeout(&data->flags, BTUSB_BOOTING,
- TASK_INTERRUPTIBLE,
- msecs_to_jiffies(msec));
- if (err == -EINTR) {
- bt_dev_err(hdev, "Device boot interrupted");
- return -EINTR;
- }
-
- if (err) {
- bt_dev_err(hdev, "Device boot timeout");
- return -ETIMEDOUT;
- }
-
- rettime = ktime_get();
- delta = ktime_sub(rettime, calltime);
- duration = (unsigned long long) ktime_to_ns(delta) >> 10;
-
- bt_dev_info(hdev, "Device booted in %llu usecs", duration);
-
- return 0;
-}
-
-static int btusb_intel_boot(struct hci_dev *hdev, u32 boot_addr)
-{
- struct btusb_data *data = hci_get_drvdata(hdev);
- ktime_t calltime;
- int err;
-
- calltime = ktime_get();
-
- set_bit(BTUSB_BOOTING, &data->flags);
-
- err = btintel_send_intel_reset(hdev, boot_addr);
- if (err) {
- bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err);
- btintel_reset_to_bootloader(hdev);
- return err;
- }
-
- /* The bootloader will not indicate when the device is ready. This
- * is done by the operational firmware sending bootup notification.
- *
- * Booting into operational firmware should not take longer than
- * 1 second. However if that happens, then just fail the setup
- * since something went wrong.
- */
- err = btusb_boot_wait(hdev, calltime, 1000);
- if (err == -ETIMEDOUT)
- btintel_reset_to_bootloader(hdev);
-
- return err;
-}
-
-static int btusb_setup_intel_new(struct hci_dev *hdev)
-{
- struct btusb_data *data = hci_get_drvdata(hdev);
- struct intel_version ver;
- struct intel_boot_params params;
- u32 boot_param;
- char ddcname[64];
- int err;
- struct intel_debug_features features;
-
- BT_DBG("%s", hdev->name);
-
- /* Set the default boot parameter to 0x0 and it is updated to
- * SKU specific boot parameter after reading Intel_Write_Boot_Params
- * command while downloading the firmware.
- */
- boot_param = 0x00000000;
-
- /* Read the Intel version information to determine if the device
- * is in bootloader mode or if it already has operational firmware
- * loaded.
- */
- err = btintel_read_version(hdev, &ver);
- if (err) {
- bt_dev_err(hdev, "Intel Read version failed (%d)", err);
- btintel_reset_to_bootloader(hdev);
- return err;
- }
-
- err = btintel_version_info(hdev, &ver);
- if (err)
- return err;
-
- err = btusb_intel_download_firmware(hdev, &ver, &params, &boot_param);
- if (err)
- return err;
-
- /* controller is already having an operational firmware */
- if (ver.fw_variant == 0x23)
- goto finish;
-
- err = btusb_intel_boot(hdev, boot_param);
- if (err)
- return err;
-
- clear_bit(BTUSB_BOOTLOADER, &data->flags);
-
- err = btusb_setup_intel_new_get_fw_name(&ver, &params, ddcname,
- sizeof(ddcname), "ddc");
-
- if (err < 0) {
- bt_dev_err(hdev, "Unsupported Intel firmware naming");
- } else {
- /* Once the device is running in operational mode, it needs to
- * apply the device configuration (DDC) parameters.
- *
- * The device can work without DDC parameters, so even if it
- * fails to load the file, no need to fail the setup.
- */
- btintel_load_ddc_config(hdev, ddcname);
- }
-
- /* Read the Intel supported features and if new exception formats
- * supported, need to load the additional DDC config to enable.
- */
- btintel_read_debug_features(hdev, &features);
-
- /* Set DDC mask for available debug features */
- btintel_set_debug_features(hdev, &features);
-
- /* Read the Intel version information after loading the FW */
- err = btintel_read_version(hdev, &ver);
- if (err)
- return err;
-
- btintel_version_info(hdev, &ver);
-
-finish:
- /* All Intel controllers that support the Microsoft vendor
- * extension are using 0xFC1E for VsMsftOpCode.
- */
- switch (ver.hw_variant) {
- case 0x11: /* JfP */
- case 0x12: /* ThP */
- case 0x13: /* HrP */
- case 0x14: /* CcP */
- hci_set_msft_opcode(hdev, 0xFC1E);
- break;
- }
-
- /* Set the event mask for Intel specific vendor events. This enables
- * a few extra events that are useful during general operation. It
- * does not enable any debugging related events.
- *
- * The device will function correctly without these events enabled
- * and thus no need to fail the setup.
- */
- btintel_set_event_mask(hdev, false);
-
- return 0;
-}
-
-static int btusb_setup_intel_newgen(struct hci_dev *hdev)
-{
- struct btusb_data *data = hci_get_drvdata(hdev);
- u32 boot_param;
- char ddcname[64];
- int err;
- struct intel_debug_features features;
- struct intel_version_tlv version;
-
- bt_dev_dbg(hdev, "");
-
- /* Set the default boot parameter to 0x0 and it is updated to
- * SKU specific boot parameter after reading Intel_Write_Boot_Params
- * command while downloading the firmware.
- */
- boot_param = 0x00000000;
-
- /* Read the Intel version information to determine if the device
- * is in bootloader mode or if it already has operational firmware
- * loaded.
- */
- err = btintel_read_version_tlv(hdev, &version);
- if (err) {
- bt_dev_err(hdev, "Intel Read version failed (%d)", err);
- btintel_reset_to_bootloader(hdev);
- return err;
- }
-
- err = btintel_version_info_tlv(hdev, &version);
- if (err)
- return err;
-
- err = btusb_intel_download_firmware_newgen(hdev, &version, &boot_param);
- if (err)
- return err;
-
- /* check if controller is already having an operational firmware */
- if (version.img_type == 0x03)
- goto finish;
-
- err = btusb_intel_boot(hdev, boot_param);
- if (err)
- return err;
-
- clear_bit(BTUSB_BOOTLOADER, &data->flags);
-
- btusb_setup_intel_newgen_get_fw_name(&version, ddcname, sizeof(ddcname),
- "ddc");
- /* Once the device is running in operational mode, it needs to
- * apply the device configuration (DDC) parameters.
- *
- * The device can work without DDC parameters, so even if it
- * fails to load the file, no need to fail the setup.
- */
- btintel_load_ddc_config(hdev, ddcname);
-
- /* Read the Intel supported features and if new exception formats
- * supported, need to load the additional DDC config to enable.
- */
- btintel_read_debug_features(hdev, &features);
-
- /* Set DDC mask for available debug features */
- btintel_set_debug_features(hdev, &features);
-
- /* Read the Intel version information after loading the FW */
- err = btintel_read_version_tlv(hdev, &version);
- if (err)
- return err;
-
- btintel_version_info_tlv(hdev, &version);
-
-finish:
- /* Set the event mask for Intel specific vendor events. This enables
- * a few extra events that are useful during general operation. It
- * does not enable any debugging related events.
- *
- * The device will function correctly without these events enabled
- * and thus no need to fail the setup.
- */
- btintel_set_event_mask(hdev, false);
-
- return 0;
-}
-static int btusb_shutdown_intel(struct hci_dev *hdev)
-{
- struct sk_buff *skb;
- long ret;
-
- /* In the shutdown sequence where Bluetooth is turned off followed
- * by WiFi being turned off, turning WiFi back on causes issue with
- * the RF calibration.
- *
- * To ensure that any RF activity has been stopped, issue HCI Reset
- * command to clear all ongoing activity including advertising,
- * scanning etc.
- */
- skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
- if (IS_ERR(skb)) {
- ret = PTR_ERR(skb);
- bt_dev_err(hdev, "HCI reset during shutdown failed");
- return ret;
- }
- kfree_skb(skb);
-
- /* Some platforms have an issue with BT LED when the interface is
- * down or BT radio is turned off, which takes 5 seconds to BT LED
- * goes off. This command turns off the BT LED immediately.
- */
- skb = __hci_cmd_sync(hdev, 0xfc3f, 0, NULL, HCI_INIT_TIMEOUT);
- if (IS_ERR(skb)) {
- ret = PTR_ERR(skb);
- bt_dev_err(hdev, "turning off Intel device LED failed");
- return ret;
- }
- kfree_skb(skb);
-
- return 0;
-}
-
-static int btusb_shutdown_intel_new(struct hci_dev *hdev)
-{
- struct sk_buff *skb;
-
- /* Send HCI Reset to the controller to stop any BT activity which
- * were triggered. This will help to save power and maintain the
- * sync b/w Host and controller
- */
- skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
- if (IS_ERR(skb)) {
- bt_dev_err(hdev, "HCI reset during shutdown failed");
- return PTR_ERR(skb);
- }
- kfree_skb(skb);
-
- return 0;
-}
-
+/* UHW CR mapping */
+#define MTK_BT_MISC 0x70002510
+#define MTK_BT_SUBSYS_RST 0x70002610
+#define MTK_UDMA_INT_STA_BT 0x74000024
+#define MTK_UDMA_INT_STA_BT1 0x74000308
+#define MTK_BT_WDT_STATUS 0x740003A0
+#define MTK_EP_RST_OPT 0x74011890
+#define MTK_EP_RST_IN_OUT_OPT 0x00010001
+#define MTK_BT_RST_DONE 0x00000100
+#define MTK_BT_RESET_WAIT_MS 100
+#define MTK_BT_RESET_NUM_TRIES 10
#define FIRMWARE_MT7663 "mediatek/mt7663pr2h.bin"
#define FIRMWARE_MT7668 "mediatek/mt7668pr2h.bin"
@@ -3655,6 +2674,63 @@ static int btusb_mtk_func_query(struct hci_dev *hdev)
return status;
}
+static int btusb_mtk_uhw_reg_write(struct btusb_data *data, u32 reg, u32 val)
+{
+ struct hci_dev *hdev = data->hdev;
+ int pipe, err;
+ void *buf;
+
+ buf = kzalloc(4, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ put_unaligned_le32(val, buf);
+
+ pipe = usb_sndctrlpipe(data->udev, 0);
+ err = usb_control_msg(data->udev, pipe, 0x02,
+ 0x5E,
+ reg >> 16, reg & 0xffff,
+ buf, 4, USB_CTRL_SET_TIMEOUT);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to write uhw reg(%d)", err);
+ goto err_free_buf;
+ }
+
+err_free_buf:
+ kfree(buf);
+
+ return err;
+}
+
+static int btusb_mtk_uhw_reg_read(struct btusb_data *data, u32 reg, u32 *val)
+{
+ struct hci_dev *hdev = data->hdev;
+ int pipe, err;
+ void *buf;
+
+ buf = kzalloc(4, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ pipe = usb_rcvctrlpipe(data->udev, 0);
+ err = usb_control_msg(data->udev, pipe, 0x01,
+ 0xDE,
+ reg >> 16, reg & 0xffff,
+ buf, 4, USB_CTRL_SET_TIMEOUT);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to read uhw reg(%d)", err);
+ goto err_free_buf;
+ }
+
+ *val = get_unaligned_le32(buf);
+ bt_dev_dbg(hdev, "reg=%x, value=0x%08x", reg, *val);
+
+err_free_buf:
+ kfree(buf);
+
+ return err;
+}
+
static int btusb_mtk_reg_read(struct btusb_data *data, u32 reg, u32 *val)
{
int pipe, err, size = sizeof(u32);
@@ -3734,6 +2810,9 @@ static int btusb_mtk_setup(struct hci_dev *hdev)
dev_id & 0xffff, (fw_version & 0xff) + 1);
err = btusb_mtk_setup_firmware_79xx(hdev, fw_bin_name);
+ /* It's Device EndPoint Reset Option Register */
+ btusb_mtk_uhw_reg_write(data, MTK_EP_RST_OPT, MTK_EP_RST_IN_OUT_OPT);
+
/* Enable Bluetooth protocol */
param = 1;
wmt_params.op = BTMTK_WMT_FUNC_CTRL;
@@ -3747,6 +2826,8 @@ static int btusb_mtk_setup(struct hci_dev *hdev)
bt_dev_err(hdev, "Failed to send wmt func ctrl (%d)", err);
return err;
}
+
+ hci_set_msft_opcode(hdev, 0xFD30);
goto done;
default:
bt_dev_err(hdev, "Unsupported hardware variant (%08x)",
@@ -3857,6 +2938,83 @@ static int btusb_mtk_shutdown(struct hci_dev *hdev)
return 0;
}
+static void btusb_mtk_cmd_timeout(struct hci_dev *hdev)
+{
+ struct btusb_data *data = hci_get_drvdata(hdev);
+ u32 val;
+ int err, retry = 0;
+
+ /* It's MediaTek specific bluetooth reset mechanism via USB */
+ if (test_and_set_bit(BTUSB_HW_RESET_ACTIVE, &data->flags)) {
+ bt_dev_err(hdev, "last reset failed? Not resetting again");
+ return;
+ }
+
+ err = usb_autopm_get_interface(data->intf);
+ if (err < 0)
+ return;
+
+ btusb_stop_traffic(data);
+ usb_kill_anchored_urbs(&data->tx_anchor);
+
+ /* It's Device EndPoint Reset Option Register */
+ bt_dev_dbg(hdev, "Initiating reset mechanism via uhw");
+ btusb_mtk_uhw_reg_write(data, MTK_EP_RST_OPT, MTK_EP_RST_IN_OUT_OPT);
+ btusb_mtk_uhw_reg_read(data, MTK_BT_WDT_STATUS, &val);
+
+ /* Reset the bluetooth chip via USB interface. */
+ btusb_mtk_uhw_reg_write(data, MTK_BT_SUBSYS_RST, 1);
+ btusb_mtk_uhw_reg_write(data, MTK_UDMA_INT_STA_BT, 0x000000FF);
+ btusb_mtk_uhw_reg_read(data, MTK_UDMA_INT_STA_BT, &val);
+ btusb_mtk_uhw_reg_write(data, MTK_UDMA_INT_STA_BT1, 0x000000FF);
+ btusb_mtk_uhw_reg_read(data, MTK_UDMA_INT_STA_BT1, &val);
+ /* MT7921 need to delay 20ms between toggle reset bit */
+ msleep(20);
+ btusb_mtk_uhw_reg_write(data, MTK_BT_SUBSYS_RST, 0);
+ btusb_mtk_uhw_reg_read(data, MTK_BT_SUBSYS_RST, &val);
+
+ /* Poll the register until reset is completed */
+ do {
+ btusb_mtk_uhw_reg_read(data, MTK_BT_MISC, &val);
+ if (val & MTK_BT_RST_DONE) {
+ bt_dev_dbg(hdev, "Bluetooth Reset Successfully");
+ break;
+ }
+
+ bt_dev_dbg(hdev, "Polling Bluetooth Reset CR");
+ retry++;
+ msleep(MTK_BT_RESET_WAIT_MS);
+ } while (retry < MTK_BT_RESET_NUM_TRIES);
+
+ btusb_mtk_id_get(data, 0x70010200, &val);
+ if (!val)
+ bt_dev_err(hdev, "Can't get device id, subsys reset fail.");
+
+ usb_queue_reset_device(data->intf);
+
+ clear_bit(BTUSB_HW_RESET_ACTIVE, &data->flags);
+}
+
+static int btusb_recv_acl_mtk(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct btusb_data *data = hci_get_drvdata(hdev);
+ u16 handle = le16_to_cpu(hci_acl_hdr(skb)->handle);
+
+ switch (handle) {
+ case 0xfc6f: /* Firmware dump from device */
+ /* When the firmware hangs, the device can no longer
+ * suspend and thus disable auto-suspend.
+ */
+ usb_disable_autosuspend(data->udev);
+ fallthrough;
+ case 0x05ff: /* Firmware debug logging 1 */
+ case 0x05fe: /* Firmware debug logging 2 */
+ return hci_recv_diag(hdev, skb);
+ }
+
+ return hci_recv_frame(hdev, skb);
+}
+
MODULE_FIRMWARE(FIRMWARE_MT7663);
MODULE_FIRMWARE(FIRMWARE_MT7668);
@@ -4437,9 +3595,6 @@ static bool btusb_prevent_wake(struct hci_dev *hdev)
{
struct btusb_data *data = hci_get_drvdata(hdev);
- if (test_bit(BTUSB_WAKEUP_DISABLE, &data->flags))
- return true;
-
return !device_may_wakeup(&data->udev->dev);
}
@@ -4465,7 +3620,7 @@ static int btusb_probe(struct usb_interface *intf,
struct btusb_data *data;
struct hci_dev *hdev;
unsigned ifnum_base;
- int i, err;
+ int i, err, priv_size;
BT_DBG("intf %p id %p", intf, id);
@@ -4551,16 +3706,23 @@ static int btusb_probe(struct usb_interface *intf,
init_usb_anchor(&data->ctrl_anchor);
spin_lock_init(&data->rxlock);
- if (id->driver_info & BTUSB_INTEL_NEW) {
+ priv_size = 0;
+
+ data->recv_event = hci_recv_frame;
+ data->recv_bulk = btusb_recv_bulk;
+
+ if (id->driver_info & BTUSB_INTEL_COMBINED) {
+ /* Allocate extra space for Intel device */
+ priv_size += sizeof(struct btintel_data);
+
+ /* Override the rx handlers */
data->recv_event = btusb_recv_event_intel;
data->recv_bulk = btusb_recv_bulk_intel;
- set_bit(BTUSB_BOOTLOADER, &data->flags);
- } else {
- data->recv_event = hci_recv_frame;
- data->recv_bulk = btusb_recv_bulk;
}
- hdev = hci_alloc_dev();
+ data->recv_acl = hci_recv_frame;
+
+ hdev = hci_alloc_dev_priv(priv_size);
if (!hdev)
return -ENOMEM;
@@ -4634,48 +3796,18 @@ static int btusb_probe(struct usb_interface *intf,
data->diag = usb_ifnum_to_if(data->udev, ifnum_base + 2);
}
- if (id->driver_info & BTUSB_INTEL) {
- hdev->manufacturer = 2;
- hdev->setup = btusb_setup_intel;
- hdev->shutdown = btusb_shutdown_intel;
- hdev->set_diag = btintel_set_diag_mfg;
- hdev->set_bdaddr = btintel_set_bdaddr;
- hdev->cmd_timeout = btusb_intel_cmd_timeout;
- set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
- set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
- set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks);
- }
-
- if (id->driver_info & BTUSB_INTEL_NEW) {
- hdev->manufacturer = 2;
- hdev->send = btusb_send_frame_intel;
- hdev->setup = btusb_setup_intel_new;
- hdev->shutdown = btusb_shutdown_intel_new;
- hdev->hw_error = btintel_hw_error;
- hdev->set_diag = btintel_set_diag;
- hdev->set_bdaddr = btintel_set_bdaddr;
- hdev->cmd_timeout = btusb_intel_cmd_timeout;
- set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
- set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
- set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks);
- }
+ /* Combined Intel Device setup to support multiple setup routine */
+ if (id->driver_info & BTUSB_INTEL_COMBINED) {
+ err = btintel_configure_setup(hdev);
+ if (err)
+ goto out_free_dev;
- if (id->driver_info & BTUSB_INTEL_NEWGEN) {
- hdev->manufacturer = 2;
+ /* Transport specific configuration */
hdev->send = btusb_send_frame_intel;
- hdev->setup = btusb_setup_intel_newgen;
- hdev->shutdown = btusb_shutdown_intel_new;
- hdev->hw_error = btintel_hw_error;
- hdev->set_diag = btintel_set_diag;
- hdev->set_bdaddr = btintel_set_bdaddr;
hdev->cmd_timeout = btusb_intel_cmd_timeout;
- set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
- set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
- set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks);
- data->recv_event = btusb_recv_event_intel;
- data->recv_bulk = btusb_recv_bulk_intel;
- set_bit(BTUSB_BOOTLOADER, &data->flags);
+ if (id->driver_info & BTUSB_INTEL_BROKEN_INITIAL_NCMD)
+ btintel_set_flag(hdev, INTEL_BROKEN_INITIAL_NCMD);
}
if (id->driver_info & BTUSB_MARVELL)
@@ -4686,7 +3818,9 @@ static int btusb_probe(struct usb_interface *intf,
hdev->setup = btusb_mtk_setup;
hdev->shutdown = btusb_mtk_shutdown;
hdev->manufacturer = 70;
+ hdev->cmd_timeout = btusb_mtk_cmd_timeout;
set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks);
+ data->recv_acl = btusb_recv_acl_mtk;
}
if (id->driver_info & BTUSB_SWAVE) {
@@ -4720,6 +3854,7 @@ static int btusb_probe(struct usb_interface *intf,
hdev->set_bdaddr = btusb_set_bdaddr_wcn6855;
hdev->cmd_timeout = btusb_qca_cmd_timeout;
set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+ hci_set_msft_opcode(hdev, 0xFD70);
}
if (id->driver_info & BTUSB_AMP) {
@@ -4737,11 +3872,9 @@ static int btusb_probe(struct usb_interface *intf,
hdev->shutdown = btrtl_shutdown_realtek;
hdev->cmd_timeout = btusb_rtl_cmd_timeout;
- /* Realtek devices lose their updated firmware over global
- * suspend that means host doesn't send SET_FEATURE
- * (DEVICE_REMOTE_WAKEUP)
- */
- set_bit(BTUSB_WAKEUP_DISABLE, &data->flags);
+ /* Realtek devices need to set remote wakeup on auto-suspend */
+ set_bit(BTUSB_WAKEUP_AUTOSUSPEND, &data->flags);
+ set_bit(BTUSB_USE_ALT3_FOR_WBS, &data->flags);
}
if (!reset)
@@ -4916,12 +4049,15 @@ static int btusb_suspend(struct usb_interface *intf, pm_message_t message)
* Actually, it depends on whether the usb host sends
* set feature (enable wakeup) or not.
*/
- if (test_bit(BTUSB_WAKEUP_DISABLE, &data->flags)) {
+ if (test_bit(BTUSB_WAKEUP_AUTOSUSPEND, &data->flags)) {
if (PMSG_IS_AUTO(message) &&
device_can_wakeup(&data->udev->dev))
data->udev->do_remote_wakeup = 1;
- else if (!PMSG_IS_AUTO(message))
+ else if (!PMSG_IS_AUTO(message) &&
+ !device_may_wakeup(&data->udev->dev)) {
+ data->udev->do_remote_wakeup = 0;
data->udev->reset_resume = 1;
+ }
}
return 0;
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 3cd57fc56ade..ef54afa29357 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -51,6 +51,7 @@
/**
* struct bcm_device_data - device specific data
* @no_early_set_baudrate: Disallow set baudrate before driver setup()
+ * @drive_rts_on_open: drive RTS signal on ->open() when platform requires it
*/
struct bcm_device_data {
bool no_early_set_baudrate;
@@ -77,6 +78,8 @@ struct bcm_device_data {
* @btlp: Apple ACPI method to toggle BT_WAKE pin ("Bluetooth Low Power")
* @btpu: Apple ACPI method to drive BT_REG_ON pin high ("Bluetooth Power Up")
* @btpd: Apple ACPI method to drive BT_REG_ON pin low ("Bluetooth Power Down")
+ * @gpio_count: internal counter for GPIO resources associated with ACPI device
+ * @gpio_int_idx: index in _CRS for GpioInt() resource
* @txco_clk: external reference frequency clock used by Bluetooth device
* @lpo_clk: external LPO clock used by Bluetooth device
* @supplies: VBAT and VDDIO supplies used by Bluetooth device
@@ -88,10 +91,13 @@ struct bcm_device_data {
* set to 0 if @init_speed is already the preferred baudrate
* @irq: interrupt triggered by HOST_WAKE_BT pin
* @irq_active_low: whether @irq is active low
+ * @irq_acquired: flag to show if IRQ handler has been assigned
* @hu: pointer to HCI UART controller struct,
* used to disable flow control during runtime suspend and system sleep
* @is_suspended: whether flow control is currently disabled
* @no_early_set_baudrate: don't set_baudrate before setup()
+ * @drive_rts_on_open: drive RTS signal on ->open() when platform requires it
+ * @pcm_int_params: keep the initial PCM configuration
*/
struct bcm_device {
/* Must be the first member, hci_serdev.c expects this. */
diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c
index e0520639f4ba..0c0dedece59c 100644
--- a/drivers/bluetooth/hci_h5.c
+++ b/drivers/bluetooth/hci_h5.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/mod_devicetable.h>
#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
#include <linux/serdev.h>
#include <linux/skbuff.h>
@@ -21,6 +22,8 @@
#include "btrtl.h"
#include "hci_uart.h"
+#define SUSPEND_TIMEOUT_MS 6000
+
#define HCI_3WIRE_ACK_PKT 0
#define HCI_3WIRE_LINK_PKT 15
@@ -51,8 +54,10 @@
/* H5 state flags */
enum {
- H5_RX_ESC, /* SLIP escape mode */
- H5_TX_ACK_REQ, /* Pending ack to send */
+ H5_RX_ESC, /* SLIP escape mode */
+ H5_TX_ACK_REQ, /* Pending ack to send */
+ H5_WAKEUP_DISABLE, /* Device cannot wake host */
+ H5_HW_FLOW_CONTROL, /* Use HW flow control */
};
struct h5 {
@@ -97,6 +102,10 @@ struct h5 {
struct gpio_desc *device_wake_gpio;
};
+enum h5_driver_info {
+ H5_INFO_WAKEUP_DISABLE = BIT(0),
+};
+
struct h5_vnd {
int (*setup)(struct h5 *h5);
void (*open)(struct h5 *h5);
@@ -106,6 +115,11 @@ struct h5_vnd {
const struct acpi_gpio_mapping *acpi_gpio_map;
};
+struct h5_device_data {
+ uint32_t driver_info;
+ struct h5_vnd *vnd;
+};
+
static void h5_reset_rx(struct h5 *h5);
static void h5_link_control(struct hci_uart *hu, const void *data, size_t len)
@@ -573,6 +587,10 @@ static int h5_recv(struct hci_uart *hu, const void *data, int count)
count -= processed;
}
+ pm_runtime_get(&hu->serdev->dev);
+ pm_runtime_mark_last_busy(&hu->serdev->dev);
+ pm_runtime_put_autosuspend(&hu->serdev->dev);
+
return 0;
}
@@ -609,6 +627,10 @@ static int h5_enqueue(struct hci_uart *hu, struct sk_buff *skb)
break;
}
+ pm_runtime_get_sync(&hu->serdev->dev);
+ pm_runtime_mark_last_busy(&hu->serdev->dev);
+ pm_runtime_put_autosuspend(&hu->serdev->dev);
+
return 0;
}
@@ -791,6 +813,8 @@ static int h5_serdev_probe(struct serdev_device *serdev)
{
struct device *dev = &serdev->dev;
struct h5 *h5;
+ const struct h5_device_data *data;
+ int err;
h5 = devm_kzalloc(dev, sizeof(*h5), GFP_KERNEL);
if (!h5)
@@ -807,20 +831,19 @@ static int h5_serdev_probe(struct serdev_device *serdev)
if (!match)
return -ENODEV;
- h5->vnd = (const struct h5_vnd *)match->driver_data;
+ data = (const struct h5_device_data *)match->driver_data;
+ h5->vnd = data->vnd;
h5->id = (char *)match->id;
if (h5->vnd->acpi_gpio_map)
devm_acpi_dev_add_driver_gpios(dev,
h5->vnd->acpi_gpio_map);
} else {
- const void *data;
-
data = of_device_get_match_data(dev);
if (!data)
return -ENODEV;
- h5->vnd = (const struct h5_vnd *)data;
+ h5->vnd = data->vnd;
}
@@ -833,7 +856,14 @@ static int h5_serdev_probe(struct serdev_device *serdev)
if (IS_ERR(h5->device_wake_gpio))
return PTR_ERR(h5->device_wake_gpio);
- return hci_uart_register_device(&h5->serdev_hu, &h5p);
+ err = hci_uart_register_device(&h5->serdev_hu, &h5p);
+ if (err)
+ return err;
+
+ if (data->driver_info & H5_INFO_WAKEUP_DISABLE)
+ set_bit(H5_WAKEUP_DISABLE, &h5->flags);
+
+ return 0;
}
static void h5_serdev_remove(struct serdev_device *serdev)
@@ -902,6 +932,9 @@ static int h5_btrtl_setup(struct h5 *h5)
serdev_device_set_baudrate(h5->hu->serdev, controller_baudrate);
serdev_device_set_flow_control(h5->hu->serdev, flow_control);
+ if (flow_control)
+ set_bit(H5_HW_FLOW_CONTROL, &h5->flags);
+
err = btrtl_download_firmware(h5->hu->hdev, btrtl_dev);
/* Give the device some time before the hci-core sends it a reset */
usleep_range(10000, 20000);
@@ -916,11 +949,25 @@ out_free:
static void h5_btrtl_open(struct h5 *h5)
{
+ /*
+ * Since h5_btrtl_resume() does a device_reprobe() the suspend handling
+ * done by the hci_suspend_notifier is not necessary; it actually causes
+ * delays and a bunch of errors to get logged, so disable it.
+ */
+ if (test_bit(H5_WAKEUP_DISABLE, &h5->flags))
+ set_bit(HCI_UART_NO_SUSPEND_NOTIFIER, &h5->hu->flags);
+
/* Devices always start with these fixed parameters */
serdev_device_set_flow_control(h5->hu->serdev, false);
serdev_device_set_parity(h5->hu->serdev, SERDEV_PARITY_EVEN);
serdev_device_set_baudrate(h5->hu->serdev, 115200);
+ pm_runtime_set_active(&h5->hu->serdev->dev);
+ pm_runtime_use_autosuspend(&h5->hu->serdev->dev);
+ pm_runtime_set_autosuspend_delay(&h5->hu->serdev->dev,
+ SUSPEND_TIMEOUT_MS);
+ pm_runtime_enable(&h5->hu->serdev->dev);
+
/* The controller needs up to 500ms to wakeup */
gpiod_set_value_cansleep(h5->enable_gpio, 1);
gpiod_set_value_cansleep(h5->device_wake_gpio, 1);
@@ -929,21 +976,26 @@ static void h5_btrtl_open(struct h5 *h5)
static void h5_btrtl_close(struct h5 *h5)
{
+ pm_runtime_disable(&h5->hu->serdev->dev);
+
gpiod_set_value_cansleep(h5->device_wake_gpio, 0);
gpiod_set_value_cansleep(h5->enable_gpio, 0);
}
/* Suspend/resume support. On many devices the RTL BT device loses power during
* suspend/resume, causing it to lose its firmware and all state. So we simply
- * turn it off on suspend and reprobe on resume. This mirrors how RTL devices
- * are handled in the USB driver, where the USB_QUIRK_RESET_RESUME is used which
+ * turn it off on suspend and reprobe on resume. This mirrors how RTL devices
+ * are handled in the USB driver, where the BTUSB_WAKEUP_DISABLE is used which
* also causes a reprobe on resume.
*/
static int h5_btrtl_suspend(struct h5 *h5)
{
serdev_device_set_flow_control(h5->hu->serdev, false);
gpiod_set_value_cansleep(h5->device_wake_gpio, 0);
- gpiod_set_value_cansleep(h5->enable_gpio, 0);
+
+ if (test_bit(H5_WAKEUP_DISABLE, &h5->flags))
+ gpiod_set_value_cansleep(h5->enable_gpio, 0);
+
return 0;
}
@@ -969,17 +1021,25 @@ static void h5_btrtl_reprobe_worker(struct work_struct *work)
static int h5_btrtl_resume(struct h5 *h5)
{
- struct h5_btrtl_reprobe *reprobe;
+ if (test_bit(H5_WAKEUP_DISABLE, &h5->flags)) {
+ struct h5_btrtl_reprobe *reprobe;
- reprobe = kzalloc(sizeof(*reprobe), GFP_KERNEL);
- if (!reprobe)
- return -ENOMEM;
+ reprobe = kzalloc(sizeof(*reprobe), GFP_KERNEL);
+ if (!reprobe)
+ return -ENOMEM;
- __module_get(THIS_MODULE);
+ __module_get(THIS_MODULE);
+
+ INIT_WORK(&reprobe->work, h5_btrtl_reprobe_worker);
+ reprobe->dev = get_device(&h5->hu->serdev->dev);
+ queue_work(system_long_wq, &reprobe->work);
+ } else {
+ gpiod_set_value_cansleep(h5->device_wake_gpio, 1);
+
+ if (test_bit(H5_HW_FLOW_CONTROL, &h5->flags))
+ serdev_device_set_flow_control(h5->hu->serdev, true);
+ }
- INIT_WORK(&reprobe->work, h5_btrtl_reprobe_worker);
- reprobe->dev = get_device(&h5->hu->serdev->dev);
- queue_work(system_long_wq, &reprobe->work);
return 0;
}
@@ -1001,13 +1061,22 @@ static struct h5_vnd rtl_vnd = {
.resume = h5_btrtl_resume,
.acpi_gpio_map = acpi_btrtl_gpios,
};
+
+static const struct h5_device_data h5_data_rtl8822cs = {
+ .vnd = &rtl_vnd,
+};
+
+static const struct h5_device_data h5_data_rtl8723bs = {
+ .driver_info = H5_INFO_WAKEUP_DISABLE,
+ .vnd = &rtl_vnd,
+};
#endif
#ifdef CONFIG_ACPI
static const struct acpi_device_id h5_acpi_match[] = {
#ifdef CONFIG_BT_HCIUART_RTL
- { "OBDA0623", (kernel_ulong_t)&rtl_vnd },
- { "OBDA8723", (kernel_ulong_t)&rtl_vnd },
+ { "OBDA0623", (kernel_ulong_t)&h5_data_rtl8723bs },
+ { "OBDA8723", (kernel_ulong_t)&h5_data_rtl8723bs },
#endif
{ },
};
@@ -1016,16 +1085,17 @@ MODULE_DEVICE_TABLE(acpi, h5_acpi_match);
static const struct dev_pm_ops h5_serdev_pm_ops = {
SET_SYSTEM_SLEEP_PM_OPS(h5_serdev_suspend, h5_serdev_resume)
+ SET_RUNTIME_PM_OPS(h5_serdev_suspend, h5_serdev_resume, NULL)
};
static const struct of_device_id rtl_bluetooth_of_match[] = {
#ifdef CONFIG_BT_HCIUART_RTL
{ .compatible = "realtek,rtl8822cs-bt",
- .data = (const void *)&rtl_vnd },
+ .data = (const void *)&h5_data_rtl8822cs },
{ .compatible = "realtek,rtl8723bs-bt",
- .data = (const void *)&rtl_vnd },
+ .data = (const void *)&h5_data_rtl8723bs },
{ .compatible = "realtek,rtl8723ds-bt",
- .data = (const void *)&rtl_vnd },
+ .data = (const void *)&h5_data_rtl8723bs },
#endif
{ },
};
diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c
index 9e03402ef1b3..3b00d82d36cf 100644
--- a/drivers/bluetooth/hci_serdev.c
+++ b/drivers/bluetooth/hci_serdev.c
@@ -343,6 +343,9 @@ int hci_uart_register_device(struct hci_uart *hu,
hdev->setup = hci_uart_setup;
SET_HCIDEV_DEV(hdev, &hu->serdev->dev);
+ if (test_bit(HCI_UART_NO_SUSPEND_NOTIFIER, &hu->flags))
+ set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks);
+
if (test_bit(HCI_UART_RAW_DEVICE, &hu->hdev_flags))
set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks);
diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h
index 4e039d7a16f8..fb4a2d0d8cc8 100644
--- a/drivers/bluetooth/hci_uart.h
+++ b/drivers/bluetooth/hci_uart.h
@@ -86,9 +86,10 @@ struct hci_uart {
};
/* HCI_UART proto flag bits */
-#define HCI_UART_PROTO_SET 0
-#define HCI_UART_REGISTERED 1
-#define HCI_UART_PROTO_READY 2
+#define HCI_UART_PROTO_SET 0
+#define HCI_UART_REGISTERED 1
+#define HCI_UART_PROTO_READY 2
+#define HCI_UART_NO_SUSPEND_NOTIFIER 3
/* TX states */
#define HCI_UART_SENDING 1
diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c
index 09c8ab5e0959..8fd4a356a86e 100644
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -63,11 +63,14 @@ struct fsl_mc_addr_translation_range {
#define FSL_MC_GCR1 0x0
#define GCR1_P1_STOP BIT(31)
+#define GCR1_P2_STOP BIT(30)
#define FSL_MC_FAPR 0x28
#define MC_FAPR_PL BIT(18)
#define MC_FAPR_BMT BIT(17)
+static phys_addr_t mc_portal_base_phys_addr;
+
/**
* fsl_mc_bus_match - device to driver matching callback
* @dev: the fsl-mc device to match against
@@ -220,7 +223,7 @@ static int scan_fsl_mc_bus(struct device *dev, void *data)
root_mc_dev = to_fsl_mc_device(dev);
root_mc_bus = to_fsl_mc_bus(root_mc_dev);
mutex_lock(&root_mc_bus->scan_mutex);
- dprc_scan_objects(root_mc_dev, NULL);
+ dprc_scan_objects(root_mc_dev, false);
mutex_unlock(&root_mc_bus->scan_mutex);
exit:
@@ -703,14 +706,30 @@ static int fsl_mc_device_get_mmio_regions(struct fsl_mc_device *mc_dev,
* If base address is in the region_desc use it otherwise
* revert to old mechanism
*/
- if (region_desc.base_address)
+ if (region_desc.base_address) {
regions[i].start = region_desc.base_address +
region_desc.base_offset;
- else
+ } else {
error = translate_mc_addr(mc_dev, mc_region_type,
region_desc.base_offset,
&regions[i].start);
+ /*
+ * Some versions of the MC firmware wrongly report
+ * 0 for register base address of the DPMCP associated
+ * with child DPRC objects thus rendering them unusable.
+ * This is particularly troublesome in ACPI boot
+ * scenarios where the legacy way of extracting this
+ * base address from the device tree does not apply.
+ * Given that DPMCPs share the same base address,
+ * workaround this by using the base address extracted
+ * from the root DPRC container.
+ */
+ if (is_fsl_mc_bus_dprc(mc_dev) &&
+ regions[i].start == region_desc.base_offset)
+ regions[i].start += mc_portal_base_phys_addr;
+ }
+
if (error < 0) {
dev_err(parent_dev,
"Invalid MC offset: %#x (for %s.%d\'s region %d)\n",
@@ -895,6 +914,8 @@ error_cleanup_dev:
}
EXPORT_SYMBOL_GPL(fsl_mc_device_add);
+static struct notifier_block fsl_mc_nb;
+
/**
* fsl_mc_device_remove - Remove an fsl-mc device from being visible to
* Linux
@@ -914,7 +935,8 @@ void fsl_mc_device_remove(struct fsl_mc_device *mc_dev)
}
EXPORT_SYMBOL_GPL(fsl_mc_device_remove);
-struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev)
+struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev,
+ u16 if_id)
{
struct fsl_mc_device *mc_bus_dev, *endpoint;
struct fsl_mc_obj_desc endpoint_desc = {{ 0 }};
@@ -925,6 +947,7 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev)
mc_bus_dev = to_fsl_mc_device(mc_dev->dev.parent);
strcpy(endpoint1.type, mc_dev->obj_desc.type);
endpoint1.id = mc_dev->obj_desc.id;
+ endpoint1.if_id = if_id;
err = dprc_get_connection(mc_bus_dev->mc_io, 0,
mc_bus_dev->mc_handle,
@@ -947,10 +970,28 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev)
* We know that the device has an endpoint because we verified by
* interrogating the firmware. This is the case when the device was not
* yet discovered by the fsl-mc bus, thus the lookup returned NULL.
- * Differentiate this case by returning EPROBE_DEFER.
+ * Force a rescan of the devices in this container and retry the lookup.
+ */
+ if (!endpoint) {
+ struct fsl_mc_bus *mc_bus = to_fsl_mc_bus(mc_bus_dev);
+
+ if (mutex_trylock(&mc_bus->scan_mutex)) {
+ err = dprc_scan_objects(mc_bus_dev, true);
+ mutex_unlock(&mc_bus->scan_mutex);
+ }
+
+ if (err < 0)
+ return ERR_PTR(err);
+ }
+
+ endpoint = fsl_mc_device_lookup(&endpoint_desc, mc_bus_dev);
+ /*
+ * This means that the endpoint might reside in a different isolation
+ * context (DPRC/container). Not much to do, so return a permssion
+ * error.
*/
if (!endpoint)
- return ERR_PTR(-EPROBE_DEFER);
+ return ERR_PTR(-EPERM);
return endpoint;
}
@@ -1089,17 +1130,6 @@ static int fsl_mc_bus_probe(struct platform_device *pdev)
}
if (mc->fsl_mc_regs) {
- /*
- * Some bootloaders pause the MC firmware before booting the
- * kernel so that MC will not cause faults as soon as the
- * SMMU probes due to the fact that there's no configuration
- * in place for MC.
- * At this point MC should have all its SMMU setup done so make
- * sure it is resumed.
- */
- writel(readl(mc->fsl_mc_regs + FSL_MC_GCR1) & (~GCR1_P1_STOP),
- mc->fsl_mc_regs + FSL_MC_GCR1);
-
if (IS_ENABLED(CONFIG_ACPI) && !dev_of_node(&pdev->dev)) {
mc_stream_id = readl(mc->fsl_mc_regs + FSL_MC_FAPR);
/*
@@ -1113,11 +1143,25 @@ static int fsl_mc_bus_probe(struct platform_device *pdev)
error = acpi_dma_configure_id(&pdev->dev,
DEV_DMA_COHERENT,
&mc_stream_id);
+ if (error == -EPROBE_DEFER)
+ return error;
if (error)
dev_warn(&pdev->dev,
"failed to configure dma: %d.\n",
error);
}
+
+ /*
+ * Some bootloaders pause the MC firmware before booting the
+ * kernel so that MC will not cause faults as soon as the
+ * SMMU probes due to the fact that there's no configuration
+ * in place for MC.
+ * At this point MC should have all its SMMU setup done so make
+ * sure it is resumed.
+ */
+ writel(readl(mc->fsl_mc_regs + FSL_MC_GCR1) &
+ (~(GCR1_P1_STOP | GCR1_P2_STOP)),
+ mc->fsl_mc_regs + FSL_MC_GCR1);
}
/*
@@ -1126,6 +1170,8 @@ static int fsl_mc_bus_probe(struct platform_device *pdev)
plat_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
mc_portal_phys_addr = plat_res->start;
mc_portal_size = resource_size(plat_res);
+ mc_portal_base_phys_addr = mc_portal_phys_addr & ~0x3ffffff;
+
error = fsl_create_mc_io(&pdev->dev, mc_portal_phys_addr,
mc_portal_size, NULL,
FSL_MC_IO_ATOMIC_CONTEXT_PORTAL, &mc_io);
@@ -1199,9 +1245,26 @@ static int fsl_mc_bus_remove(struct platform_device *pdev)
fsl_destroy_mc_io(mc->root_mc_bus_dev->mc_io);
mc->root_mc_bus_dev->mc_io = NULL;
+ bus_unregister_notifier(&fsl_mc_bus_type, &fsl_mc_nb);
+
+ if (mc->fsl_mc_regs) {
+ /*
+ * Pause the MC firmware so that it doesn't crash in certain
+ * scenarios, such as kexec.
+ */
+ writel(readl(mc->fsl_mc_regs + FSL_MC_GCR1) |
+ (GCR1_P1_STOP | GCR1_P2_STOP),
+ mc->fsl_mc_regs + FSL_MC_GCR1);
+ }
+
return 0;
}
+static void fsl_mc_bus_shutdown(struct platform_device *pdev)
+{
+ fsl_mc_bus_remove(pdev);
+}
+
static const struct of_device_id fsl_mc_bus_match_table[] = {
{.compatible = "fsl,qoriq-mc",},
{},
@@ -1224,6 +1287,45 @@ static struct platform_driver fsl_mc_bus_driver = {
},
.probe = fsl_mc_bus_probe,
.remove = fsl_mc_bus_remove,
+ .shutdown = fsl_mc_bus_shutdown,
+};
+
+static int fsl_mc_bus_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+ struct resource *res;
+ void __iomem *fsl_mc_regs;
+
+ if (action != BUS_NOTIFY_ADD_DEVICE)
+ return 0;
+
+ if (!of_match_device(fsl_mc_bus_match_table, dev) &&
+ !acpi_match_device(fsl_mc_bus_acpi_match_table, dev))
+ return 0;
+
+ res = platform_get_resource(to_platform_device(dev), IORESOURCE_MEM, 1);
+ if (!res)
+ return 0;
+
+ fsl_mc_regs = ioremap(res->start, resource_size(res));
+ if (!fsl_mc_regs)
+ return 0;
+
+ /*
+ * Make sure that the MC firmware is paused before the IOMMU setup for
+ * it is done or otherwise the firmware will crash right after the SMMU
+ * gets probed and enabled.
+ */
+ writel(readl(fsl_mc_regs + FSL_MC_GCR1) | (GCR1_P1_STOP | GCR1_P2_STOP),
+ fsl_mc_regs + FSL_MC_GCR1);
+ iounmap(fsl_mc_regs);
+
+ return 0;
+}
+
+static struct notifier_block fsl_mc_nb = {
+ .notifier_call = fsl_mc_bus_notifier,
};
static int __init fsl_mc_bus_driver_init(void)
@@ -1250,7 +1352,7 @@ static int __init fsl_mc_bus_driver_init(void)
if (error < 0)
goto error_cleanup_dprc_driver;
- return 0;
+ return bus_register_notifier(&platform_bus_type, &fsl_mc_nb);
error_cleanup_dprc_driver:
dprc_driver_exit();
diff --git a/drivers/bus/mhi/core/boot.c b/drivers/bus/mhi/core/boot.c
index 8100cf51cd09..0a972620a403 100644
--- a/drivers/bus/mhi/core/boot.c
+++ b/drivers/bus/mhi/core/boot.c
@@ -302,8 +302,8 @@ void mhi_free_bhie_table(struct mhi_controller *mhi_cntrl,
struct mhi_buf *mhi_buf = image_info->mhi_buf;
for (i = 0; i < image_info->entries; i++, mhi_buf++)
- mhi_free_coherent(mhi_cntrl, mhi_buf->len, mhi_buf->buf,
- mhi_buf->dma_addr);
+ dma_free_coherent(mhi_cntrl->cntrl_dev, mhi_buf->len,
+ mhi_buf->buf, mhi_buf->dma_addr);
kfree(image_info->mhi_buf);
kfree(image_info);
@@ -339,8 +339,8 @@ int mhi_alloc_bhie_table(struct mhi_controller *mhi_cntrl,
vec_size = sizeof(struct bhi_vec_entry) * i;
mhi_buf->len = vec_size;
- mhi_buf->buf = mhi_alloc_coherent(mhi_cntrl, vec_size,
- &mhi_buf->dma_addr,
+ mhi_buf->buf = dma_alloc_coherent(mhi_cntrl->cntrl_dev,
+ vec_size, &mhi_buf->dma_addr,
GFP_KERNEL);
if (!mhi_buf->buf)
goto error_alloc_segment;
@@ -354,8 +354,8 @@ int mhi_alloc_bhie_table(struct mhi_controller *mhi_cntrl,
error_alloc_segment:
for (--i, --mhi_buf; i >= 0; i--, mhi_buf--)
- mhi_free_coherent(mhi_cntrl, mhi_buf->len, mhi_buf->buf,
- mhi_buf->dma_addr);
+ dma_free_coherent(mhi_cntrl->cntrl_dev, mhi_buf->len,
+ mhi_buf->buf, mhi_buf->dma_addr);
error_alloc_mhi_buf:
kfree(img_info);
@@ -442,7 +442,8 @@ void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl)
if (size > firmware->size)
size = firmware->size;
- buf = mhi_alloc_coherent(mhi_cntrl, size, &dma_addr, GFP_KERNEL);
+ buf = dma_alloc_coherent(mhi_cntrl->cntrl_dev, size, &dma_addr,
+ GFP_KERNEL);
if (!buf) {
release_firmware(firmware);
goto error_fw_load;
@@ -451,7 +452,7 @@ void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl)
/* Download image using BHI */
memcpy(buf, firmware->data, size);
ret = mhi_fw_load_bhi(mhi_cntrl, dma_addr, size);
- mhi_free_coherent(mhi_cntrl, size, buf, dma_addr);
+ dma_free_coherent(mhi_cntrl->cntrl_dev, size, buf, dma_addr);
/* Error or in EDL mode, we're done */
if (ret) {
diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c
index c81b377fca8f..5aaca6d0f52b 100644
--- a/drivers/bus/mhi/core/init.c
+++ b/drivers/bus/mhi/core/init.c
@@ -129,7 +129,7 @@ static int mhi_alloc_aligned_ring(struct mhi_controller *mhi_cntrl,
u64 len)
{
ring->alloc_size = len + (len - 1);
- ring->pre_aligned = mhi_alloc_coherent(mhi_cntrl, ring->alloc_size,
+ ring->pre_aligned = dma_alloc_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size,
&ring->dma_handle, GFP_KERNEL);
if (!ring->pre_aligned)
return -ENOMEM;
@@ -221,13 +221,13 @@ void mhi_deinit_dev_ctxt(struct mhi_controller *mhi_cntrl)
mhi_cmd = mhi_cntrl->mhi_cmd;
for (i = 0; i < NR_OF_CMD_RINGS; i++, mhi_cmd++) {
ring = &mhi_cmd->ring;
- mhi_free_coherent(mhi_cntrl, ring->alloc_size,
+ dma_free_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size,
ring->pre_aligned, ring->dma_handle);
ring->base = NULL;
ring->iommu_base = 0;
}
- mhi_free_coherent(mhi_cntrl,
+ dma_free_coherent(mhi_cntrl->cntrl_dev,
sizeof(*mhi_ctxt->cmd_ctxt) * NR_OF_CMD_RINGS,
mhi_ctxt->cmd_ctxt, mhi_ctxt->cmd_ctxt_addr);
@@ -237,17 +237,17 @@ void mhi_deinit_dev_ctxt(struct mhi_controller *mhi_cntrl)
continue;
ring = &mhi_event->ring;
- mhi_free_coherent(mhi_cntrl, ring->alloc_size,
+ dma_free_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size,
ring->pre_aligned, ring->dma_handle);
ring->base = NULL;
ring->iommu_base = 0;
}
- mhi_free_coherent(mhi_cntrl, sizeof(*mhi_ctxt->er_ctxt) *
+ dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->er_ctxt) *
mhi_cntrl->total_ev_rings, mhi_ctxt->er_ctxt,
mhi_ctxt->er_ctxt_addr);
- mhi_free_coherent(mhi_cntrl, sizeof(*mhi_ctxt->chan_ctxt) *
+ dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->chan_ctxt) *
mhi_cntrl->max_chan, mhi_ctxt->chan_ctxt,
mhi_ctxt->chan_ctxt_addr);
@@ -275,7 +275,7 @@ int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl)
return -ENOMEM;
/* Setup channel ctxt */
- mhi_ctxt->chan_ctxt = mhi_alloc_coherent(mhi_cntrl,
+ mhi_ctxt->chan_ctxt = dma_alloc_coherent(mhi_cntrl->cntrl_dev,
sizeof(*mhi_ctxt->chan_ctxt) *
mhi_cntrl->max_chan,
&mhi_ctxt->chan_ctxt_addr,
@@ -307,7 +307,7 @@ int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl)
}
/* Setup event context */
- mhi_ctxt->er_ctxt = mhi_alloc_coherent(mhi_cntrl,
+ mhi_ctxt->er_ctxt = dma_alloc_coherent(mhi_cntrl->cntrl_dev,
sizeof(*mhi_ctxt->er_ctxt) *
mhi_cntrl->total_ev_rings,
&mhi_ctxt->er_ctxt_addr,
@@ -354,7 +354,7 @@ int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl)
/* Setup cmd context */
ret = -ENOMEM;
- mhi_ctxt->cmd_ctxt = mhi_alloc_coherent(mhi_cntrl,
+ mhi_ctxt->cmd_ctxt = dma_alloc_coherent(mhi_cntrl->cntrl_dev,
sizeof(*mhi_ctxt->cmd_ctxt) *
NR_OF_CMD_RINGS,
&mhi_ctxt->cmd_ctxt_addr,
@@ -389,10 +389,10 @@ error_alloc_cmd:
for (--i, --mhi_cmd; i >= 0; i--, mhi_cmd--) {
struct mhi_ring *ring = &mhi_cmd->ring;
- mhi_free_coherent(mhi_cntrl, ring->alloc_size,
+ dma_free_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size,
ring->pre_aligned, ring->dma_handle);
}
- mhi_free_coherent(mhi_cntrl,
+ dma_free_coherent(mhi_cntrl->cntrl_dev,
sizeof(*mhi_ctxt->cmd_ctxt) * NR_OF_CMD_RINGS,
mhi_ctxt->cmd_ctxt, mhi_ctxt->cmd_ctxt_addr);
i = mhi_cntrl->total_ev_rings;
@@ -405,15 +405,15 @@ error_alloc_er:
if (mhi_event->offload_ev)
continue;
- mhi_free_coherent(mhi_cntrl, ring->alloc_size,
+ dma_free_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size,
ring->pre_aligned, ring->dma_handle);
}
- mhi_free_coherent(mhi_cntrl, sizeof(*mhi_ctxt->er_ctxt) *
+ dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->er_ctxt) *
mhi_cntrl->total_ev_rings, mhi_ctxt->er_ctxt,
mhi_ctxt->er_ctxt_addr);
error_alloc_er_ctxt:
- mhi_free_coherent(mhi_cntrl, sizeof(*mhi_ctxt->chan_ctxt) *
+ dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->chan_ctxt) *
mhi_cntrl->max_chan, mhi_ctxt->chan_ctxt,
mhi_ctxt->chan_ctxt_addr);
@@ -567,7 +567,7 @@ void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl,
if (!chan_ctxt->rbase) /* Already uninitialized */
return;
- mhi_free_coherent(mhi_cntrl, tre_ring->alloc_size,
+ dma_free_coherent(mhi_cntrl->cntrl_dev, tre_ring->alloc_size,
tre_ring->pre_aligned, tre_ring->dma_handle);
vfree(buf_ring->base);
@@ -610,7 +610,7 @@ int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl,
buf_ring->base = vzalloc(buf_ring->len);
if (!buf_ring->base) {
- mhi_free_coherent(mhi_cntrl, tre_ring->alloc_size,
+ dma_free_coherent(mhi_cntrl->cntrl_dev, tre_ring->alloc_size,
tre_ring->pre_aligned, tre_ring->dma_handle);
return -ENOMEM;
}
@@ -885,7 +885,8 @@ int mhi_register_controller(struct mhi_controller *mhi_cntrl,
if (!mhi_cntrl || !mhi_cntrl->cntrl_dev || !mhi_cntrl->regs ||
!mhi_cntrl->runtime_get || !mhi_cntrl->runtime_put ||
!mhi_cntrl->status_cb || !mhi_cntrl->read_reg ||
- !mhi_cntrl->write_reg || !mhi_cntrl->nr_irqs || !mhi_cntrl->irq)
+ !mhi_cntrl->write_reg || !mhi_cntrl->nr_irqs ||
+ !mhi_cntrl->irq || !mhi_cntrl->reg_len)
return -EINVAL;
ret = parse_config(mhi_cntrl, config);
@@ -1063,7 +1064,7 @@ EXPORT_SYMBOL_GPL(mhi_free_controller);
int mhi_prepare_for_power_up(struct mhi_controller *mhi_cntrl)
{
struct device *dev = &mhi_cntrl->mhi_dev->dev;
- u32 bhie_off;
+ u32 bhi_off, bhie_off;
int ret;
mutex_lock(&mhi_cntrl->pm_mutex);
@@ -1072,29 +1073,51 @@ int mhi_prepare_for_power_up(struct mhi_controller *mhi_cntrl)
if (ret)
goto error_dev_ctxt;
- /*
- * Allocate RDDM table if specified, this table is for debugging purpose
- */
- if (mhi_cntrl->rddm_size) {
- mhi_alloc_bhie_table(mhi_cntrl, &mhi_cntrl->rddm_image,
- mhi_cntrl->rddm_size);
+ ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->regs, BHIOFF, &bhi_off);
+ if (ret) {
+ dev_err(dev, "Error getting BHI offset\n");
+ goto error_reg_offset;
+ }
- /*
- * This controller supports RDDM, so we need to manually clear
- * BHIE RX registers since POR values are undefined.
- */
+ if (bhi_off >= mhi_cntrl->reg_len) {
+ dev_err(dev, "BHI offset: 0x%x is out of range: 0x%zx\n",
+ bhi_off, mhi_cntrl->reg_len);
+ ret = -EINVAL;
+ goto error_reg_offset;
+ }
+ mhi_cntrl->bhi = mhi_cntrl->regs + bhi_off;
+
+ if (mhi_cntrl->fbc_download || mhi_cntrl->rddm_size) {
ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->regs, BHIEOFF,
&bhie_off);
if (ret) {
dev_err(dev, "Error getting BHIE offset\n");
- goto bhie_error;
+ goto error_reg_offset;
}
+ if (bhie_off >= mhi_cntrl->reg_len) {
+ dev_err(dev,
+ "BHIe offset: 0x%x is out of range: 0x%zx\n",
+ bhie_off, mhi_cntrl->reg_len);
+ ret = -EINVAL;
+ goto error_reg_offset;
+ }
mhi_cntrl->bhie = mhi_cntrl->regs + bhie_off;
+ }
+
+ if (mhi_cntrl->rddm_size) {
+ /*
+ * This controller supports RDDM, so we need to manually clear
+ * BHIE RX registers since POR values are undefined.
+ */
memset_io(mhi_cntrl->bhie + BHIE_RXVECADDR_LOW_OFFS,
0, BHIE_RXVECSTATUS_OFFS - BHIE_RXVECADDR_LOW_OFFS +
4);
-
+ /*
+ * Allocate RDDM table for debugging purpose if specified
+ */
+ mhi_alloc_bhie_table(mhi_cntrl, &mhi_cntrl->rddm_image,
+ mhi_cntrl->rddm_size);
if (mhi_cntrl->rddm_image)
mhi_rddm_prepare(mhi_cntrl, mhi_cntrl->rddm_image);
}
@@ -1103,11 +1126,8 @@ int mhi_prepare_for_power_up(struct mhi_controller *mhi_cntrl)
return 0;
-bhie_error:
- if (mhi_cntrl->rddm_image) {
- mhi_free_bhie_table(mhi_cntrl, mhi_cntrl->rddm_image);
- mhi_cntrl->rddm_image = NULL;
- }
+error_reg_offset:
+ mhi_deinit_dev_ctxt(mhi_cntrl);
error_dev_ctxt:
mutex_unlock(&mhi_cntrl->pm_mutex);
@@ -1128,6 +1148,9 @@ void mhi_unprepare_after_power_down(struct mhi_controller *mhi_cntrl)
mhi_cntrl->rddm_image = NULL;
}
+ mhi_cntrl->bhi = NULL;
+ mhi_cntrl->bhie = NULL;
+
mhi_deinit_dev_ctxt(mhi_cntrl);
}
EXPORT_SYMBOL_GPL(mhi_unprepare_after_power_down);
diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h
index bc239a11aa69..3a732afaf73e 100644
--- a/drivers/bus/mhi/core/internal.h
+++ b/drivers/bus/mhi/core/internal.h
@@ -682,7 +682,7 @@ void mhi_rddm_prepare(struct mhi_controller *mhi_cntrl,
struct image_info *img_info);
void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl);
int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
- struct mhi_chan *mhi_chan, unsigned int flags);
+ struct mhi_chan *mhi_chan);
int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl,
struct mhi_chan *mhi_chan);
void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl,
@@ -690,26 +690,6 @@ void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl,
void mhi_reset_chan(struct mhi_controller *mhi_cntrl,
struct mhi_chan *mhi_chan);
-/* Memory allocation methods */
-static inline void *mhi_alloc_coherent(struct mhi_controller *mhi_cntrl,
- size_t size,
- dma_addr_t *dma_handle,
- gfp_t gfp)
-{
- void *buf = dma_alloc_coherent(mhi_cntrl->cntrl_dev, size, dma_handle,
- gfp);
-
- return buf;
-}
-
-static inline void mhi_free_coherent(struct mhi_controller *mhi_cntrl,
- size_t size,
- void *vaddr,
- dma_addr_t dma_handle)
-{
- dma_free_coherent(mhi_cntrl->cntrl_dev, size, vaddr, dma_handle);
-}
-
/* Event processing methods */
void mhi_ctrl_ev_task(unsigned long data);
void mhi_ev_task(unsigned long data);
diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c
index 84448233f64c..b15c5bc37dd4 100644
--- a/drivers/bus/mhi/core/main.c
+++ b/drivers/bus/mhi/core/main.c
@@ -193,7 +193,7 @@ int mhi_map_single_no_bb(struct mhi_controller *mhi_cntrl,
int mhi_map_single_use_bb(struct mhi_controller *mhi_cntrl,
struct mhi_buf_info *buf_info)
{
- void *buf = mhi_alloc_coherent(mhi_cntrl, buf_info->len,
+ void *buf = dma_alloc_coherent(mhi_cntrl->cntrl_dev, buf_info->len,
&buf_info->p_addr, GFP_ATOMIC);
if (!buf)
@@ -220,8 +220,8 @@ void mhi_unmap_single_use_bb(struct mhi_controller *mhi_cntrl,
if (buf_info->dir == DMA_FROM_DEVICE)
memcpy(buf_info->v_addr, buf_info->bb_addr, buf_info->len);
- mhi_free_coherent(mhi_cntrl, buf_info->len, buf_info->bb_addr,
- buf_info->p_addr);
+ dma_free_coherent(mhi_cntrl->cntrl_dev, buf_info->len,
+ buf_info->bb_addr, buf_info->p_addr);
}
static int get_nr_avail_ring_elements(struct mhi_controller *mhi_cntrl,
@@ -1430,7 +1430,7 @@ exit_unprepare_channel:
}
int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
- struct mhi_chan *mhi_chan, unsigned int flags)
+ struct mhi_chan *mhi_chan)
{
int ret = 0;
struct device *dev = &mhi_chan->mhi_dev->dev;
@@ -1455,9 +1455,6 @@ int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
if (ret)
goto error_pm_state;
- if (mhi_chan->dir == DMA_FROM_DEVICE)
- mhi_chan->pre_alloc = !!(flags & MHI_CH_INBOUND_ALLOC_BUFS);
-
/* Pre-allocate buffer for xfer ring */
if (mhi_chan->pre_alloc) {
int nr_el = get_nr_avail_ring_elements(mhi_cntrl,
@@ -1613,7 +1610,7 @@ void mhi_reset_chan(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan)
}
/* Move channel to start state */
-int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, unsigned int flags)
+int mhi_prepare_for_transfer(struct mhi_device *mhi_dev)
{
int ret, dir;
struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
@@ -1624,7 +1621,7 @@ int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, unsigned int flags)
if (!mhi_chan)
continue;
- ret = mhi_prepare_channel(mhi_cntrl, mhi_chan, flags);
+ ret = mhi_prepare_channel(mhi_cntrl, mhi_chan);
if (ret)
goto error_open_chan;
}
diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c
index bbf6cd04861e..fb99e3727155 100644
--- a/drivers/bus/mhi/core/pm.c
+++ b/drivers/bus/mhi/core/pm.c
@@ -1059,28 +1059,8 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
if (ret)
goto error_setup_irq;
- /* Setup BHI offset & INTVEC */
+ /* Setup BHI INTVEC */
write_lock_irq(&mhi_cntrl->pm_lock);
- ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->regs, BHIOFF, &val);
- if (ret) {
- write_unlock_irq(&mhi_cntrl->pm_lock);
- goto error_bhi_offset;
- }
-
- mhi_cntrl->bhi = mhi_cntrl->regs + val;
-
- /* Setup BHIE offset */
- if (mhi_cntrl->fbc_download) {
- ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->regs, BHIEOFF, &val);
- if (ret) {
- write_unlock_irq(&mhi_cntrl->pm_lock);
- dev_err(dev, "Error reading BHIE offset\n");
- goto error_bhi_offset;
- }
-
- mhi_cntrl->bhie = mhi_cntrl->regs + val;
- }
-
mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0);
mhi_cntrl->pm_state = MHI_PM_POR;
mhi_cntrl->ee = MHI_EE_MAX;
@@ -1089,12 +1069,16 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
/* Confirm that the device is in valid exec env */
if (!MHI_IN_PBL(current_ee) && current_ee != MHI_EE_AMSS) {
- dev_err(dev, "Not a valid EE for power on\n");
+ dev_err(dev, "%s is not a valid EE for power on\n",
+ TO_MHI_EXEC_STR(current_ee));
ret = -EIO;
- goto error_bhi_offset;
+ goto error_async_power_up;
}
state = mhi_get_mhi_state(mhi_cntrl);
+ dev_dbg(dev, "Attempting power on with EE: %s, state: %s\n",
+ TO_MHI_EXEC_STR(current_ee), TO_MHI_STATE_STR(state));
+
if (state == MHI_STATE_SYS_ERR) {
mhi_set_mhi_state(mhi_cntrl, MHI_STATE_RESET);
ret = wait_event_timeout(mhi_cntrl->state_event,
@@ -1110,7 +1094,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
if (!ret) {
ret = -EIO;
dev_info(dev, "Failed to reset MHI due to syserr state\n");
- goto error_bhi_offset;
+ goto error_async_power_up;
}
/*
@@ -1132,7 +1116,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
return 0;
-error_bhi_offset:
+error_async_power_up:
mhi_deinit_free_irq(mhi_cntrl);
error_setup_irq:
diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c
index 4dd1077354af..59a4896a8030 100644
--- a/drivers/bus/mhi/pci_generic.c
+++ b/drivers/bus/mhi/pci_generic.c
@@ -32,6 +32,7 @@
* @edl: emergency download mode firmware path (if any)
* @bar_num: PCI base address register to use for MHI MMIO register space
* @dma_data_width: DMA transfer word size (32 or 64 bits)
+ * @mru_default: default MRU size for MBIM network packets
* @sideband_wake: Devices using dedicated sideband GPIO for wakeup instead
* of inband wake support (such as sdx24)
*/
@@ -42,6 +43,7 @@ struct mhi_pci_dev_info {
const char *edl;
unsigned int bar_num;
unsigned int dma_data_width;
+ unsigned int mru_default;
bool sideband_wake;
};
@@ -272,6 +274,7 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = {
.config = &modem_qcom_v1_mhiv_config,
.bar_num = MHI_PCI_DEFAULT_BAR_NUM,
.dma_data_width = 32,
+ .mru_default = 32768,
.sideband_wake = false,
};
@@ -366,6 +369,40 @@ static const struct mhi_pci_dev_info mhi_foxconn_sdx55_info = {
.sideband_wake = false,
};
+static const struct mhi_channel_config mhi_mv31_channels[] = {
+ MHI_CHANNEL_CONFIG_UL(0, "LOOPBACK", 64, 0),
+ MHI_CHANNEL_CONFIG_DL(1, "LOOPBACK", 64, 0),
+ /* MBIM Control Channel */
+ MHI_CHANNEL_CONFIG_UL(12, "MBIM", 64, 0),
+ MHI_CHANNEL_CONFIG_DL(13, "MBIM", 64, 0),
+ /* MBIM Data Channel */
+ MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0_MBIM", 512, 2),
+ MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0_MBIM", 512, 3),
+};
+
+static struct mhi_event_config mhi_mv31_events[] = {
+ MHI_EVENT_CONFIG_CTRL(0, 256),
+ MHI_EVENT_CONFIG_DATA(1, 256),
+ MHI_EVENT_CONFIG_HW_DATA(2, 1024, 100),
+ MHI_EVENT_CONFIG_HW_DATA(3, 1024, 101),
+};
+
+static const struct mhi_controller_config modem_mv31_config = {
+ .max_channels = 128,
+ .timeout_ms = 20000,
+ .num_channels = ARRAY_SIZE(mhi_mv31_channels),
+ .ch_cfg = mhi_mv31_channels,
+ .num_events = ARRAY_SIZE(mhi_mv31_events),
+ .event_cfg = mhi_mv31_events,
+};
+
+static const struct mhi_pci_dev_info mhi_mv31_info = {
+ .name = "cinterion-mv31",
+ .config = &modem_mv31_config,
+ .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
+ .dma_data_width = 32,
+};
+
static const struct pci_device_id mhi_pci_id_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0306),
.driver_data = (kernel_ulong_t) &mhi_qcom_sdx55_info },
@@ -386,6 +423,9 @@ static const struct pci_device_id mhi_pci_id_table[] = {
/* DW5930e (sdx55), Non-eSIM, It's also T99W175 */
{ PCI_DEVICE(PCI_VENDOR_ID_FOXCONN, 0xe0b1),
.driver_data = (kernel_ulong_t) &mhi_foxconn_sdx55_info },
+ /* MV31-W (Cinterion) */
+ { PCI_DEVICE(0x1269, 0x00b3),
+ .driver_data = (kernel_ulong_t) &mhi_mv31_info },
{ }
};
MODULE_DEVICE_TABLE(pci, mhi_pci_id_table);
@@ -487,6 +527,7 @@ static int mhi_pci_claim(struct mhi_controller *mhi_cntrl,
return err;
}
mhi_cntrl->regs = pcim_iomap_table(pdev)[bar_num];
+ mhi_cntrl->reg_len = pci_resource_len(pdev, bar_num);
err = pci_set_dma_mask(pdev, dma_mask);
if (err) {
@@ -664,6 +705,7 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
mhi_cntrl->status_cb = mhi_pci_status_cb;
mhi_cntrl->runtime_get = mhi_pci_runtime_get;
mhi_cntrl->runtime_put = mhi_pci_runtime_put;
+ mhi_cntrl->mru = info->mru_default;
if (info->sideband_wake) {
mhi_cntrl->wake_get = mhi_pci_wake_get_nop;
diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 0ef98e3ba341..148a4dd8cb9a 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -3097,8 +3097,10 @@ static int sysc_probe(struct platform_device *pdev)
return error;
error = sysc_check_active_timer(ddata);
- if (error == -EBUSY)
+ if (error == -ENXIO)
ddata->reserved = true;
+ else if (error)
+ return error;
error = sysc_get_clocks(ddata);
if (error)
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index ea3ead00f30f..740811893c57 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -427,8 +427,6 @@ config ADI
and SSM (Silicon Secured Memory). Intended consumers of this
driver include crash and makedumpfile.
-endmenu
-
config RANDOM_TRUST_CPU
bool "Trust the CPU manufacturer to initialize Linux's CRNG"
depends on ARCH_RANDOM
@@ -452,3 +450,5 @@ config RANDOM_TRUST_BOOTLOADER
booloader is trustworthy so it will be added to the kernel's entropy
pool. Otherwise, say N here so it will be regarded as device input that
only mixes the entropy pool.
+
+endmenu
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 3f166c8a4099..239eca4d6805 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -524,6 +524,20 @@ config HW_RANDOM_XIPHERA
To compile this driver as a module, choose M here: the
module will be called xiphera-trng.
+config HW_RANDOM_ARM_SMCCC_TRNG
+ tristate "Arm SMCCC TRNG firmware interface support"
+ depends on HAVE_ARM_SMCCC_DISCOVERY
+ default HW_RANDOM
+ help
+ Say 'Y' to enable the True Random Number Generator driver using
+ the Arm SMCCC TRNG firmware interface. This reads entropy from
+ higher exception levels (firmware, hypervisor). Uses SMCCC for
+ communicating with the firmware:
+ https://developer.arm.com/documentation/den0098/latest/
+
+ To compile this driver as a module, choose M here: the
+ module will be called arm_smccc_trng.
+
endif # HW_RANDOM
config UML_RANDOM
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index 8933fada74f2..a5a1c765a394 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -45,3 +45,4 @@ obj-$(CONFIG_HW_RANDOM_OPTEE) += optee-rng.o
obj-$(CONFIG_HW_RANDOM_NPCM) += npcm-rng.o
obj-$(CONFIG_HW_RANDOM_CCTRNG) += cctrng.o
obj-$(CONFIG_HW_RANDOM_XIPHERA) += xiphera-trng.o
+obj-$(CONFIG_HW_RANDOM_ARM_SMCCC_TRNG) += arm_smccc_trng.o
diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c
index d8d4ef5214a1..c22d4184bb61 100644
--- a/drivers/char/hw_random/amd-rng.c
+++ b/drivers/char/hw_random/amd-rng.c
@@ -124,7 +124,7 @@ static struct hwrng amd_rng = {
.read = amd_rng_read,
};
-static int __init mod_init(void)
+static int __init amd_rng_mod_init(void)
{
int err;
struct pci_dev *pdev = NULL;
@@ -188,7 +188,7 @@ out:
return err;
}
-static void __exit mod_exit(void)
+static void __exit amd_rng_mod_exit(void)
{
struct amd768_priv *priv;
@@ -203,8 +203,8 @@ static void __exit mod_exit(void)
kfree(priv);
}
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(amd_rng_mod_init);
+module_exit(amd_rng_mod_exit);
MODULE_AUTHOR("The Linux Kernel team");
MODULE_DESCRIPTION("H/W RNG driver for AMD chipsets");
diff --git a/drivers/char/hw_random/arm_smccc_trng.c b/drivers/char/hw_random/arm_smccc_trng.c
new file mode 100644
index 000000000000..b24ac39a903b
--- /dev/null
+++ b/drivers/char/hw_random/arm_smccc_trng.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Randomness driver for the ARM SMCCC TRNG Firmware Interface
+ * https://developer.arm.com/documentation/den0098/latest/
+ *
+ * Copyright (C) 2020 Arm Ltd.
+ *
+ * The ARM TRNG firmware interface specifies a protocol to read entropy
+ * from a higher exception level, to abstract from any machine specific
+ * implemenations and allow easier use in hypervisors.
+ *
+ * The firmware interface is realised using the SMCCC specification.
+ */
+
+#include <linux/bits.h>
+#include <linux/device.h>
+#include <linux/hw_random.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/arm-smccc.h>
+
+#ifdef CONFIG_ARM64
+#define ARM_SMCCC_TRNG_RND ARM_SMCCC_TRNG_RND64
+#define MAX_BITS_PER_CALL (3 * 64UL)
+#else
+#define ARM_SMCCC_TRNG_RND ARM_SMCCC_TRNG_RND32
+#define MAX_BITS_PER_CALL (3 * 32UL)
+#endif
+
+/* We don't want to allow the firmware to stall us forever. */
+#define SMCCC_TRNG_MAX_TRIES 20
+
+#define SMCCC_RET_TRNG_INVALID_PARAMETER -2
+#define SMCCC_RET_TRNG_NO_ENTROPY -3
+
+static int copy_from_registers(char *buf, struct arm_smccc_res *res,
+ size_t bytes)
+{
+ unsigned int chunk, copied;
+
+ if (bytes == 0)
+ return 0;
+
+ chunk = min(bytes, sizeof(long));
+ memcpy(buf, &res->a3, chunk);
+ copied = chunk;
+ if (copied >= bytes)
+ return copied;
+
+ chunk = min((bytes - copied), sizeof(long));
+ memcpy(&buf[copied], &res->a2, chunk);
+ copied += chunk;
+ if (copied >= bytes)
+ return copied;
+
+ chunk = min((bytes - copied), sizeof(long));
+ memcpy(&buf[copied], &res->a1, chunk);
+
+ return copied + chunk;
+}
+
+static int smccc_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+ struct arm_smccc_res res;
+ u8 *buf = data;
+ unsigned int copied = 0;
+ int tries = 0;
+
+ while (copied < max) {
+ size_t bits = min_t(size_t, (max - copied) * BITS_PER_BYTE,
+ MAX_BITS_PER_CALL);
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND, bits, &res);
+ if ((int)res.a0 < 0)
+ return (int)res.a0;
+
+ switch ((int)res.a0) {
+ case SMCCC_RET_SUCCESS:
+ copied += copy_from_registers(buf + copied, &res,
+ bits / BITS_PER_BYTE);
+ tries = 0;
+ break;
+ case SMCCC_RET_TRNG_NO_ENTROPY:
+ if (!wait)
+ return copied;
+ tries++;
+ if (tries >= SMCCC_TRNG_MAX_TRIES)
+ return copied;
+ cond_resched();
+ break;
+ }
+ }
+
+ return copied;
+}
+
+static int smccc_trng_probe(struct platform_device *pdev)
+{
+ struct hwrng *trng;
+
+ trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL);
+ if (!trng)
+ return -ENOMEM;
+
+ trng->name = "smccc_trng";
+ trng->read = smccc_trng_read;
+
+ platform_set_drvdata(pdev, trng);
+
+ return devm_hwrng_register(&pdev->dev, trng);
+}
+
+static struct platform_driver smccc_trng_driver = {
+ .driver = {
+ .name = "smccc_trng",
+ },
+ .probe = smccc_trng_probe,
+};
+module_platform_driver(smccc_trng_driver);
+
+MODULE_ALIAS("platform:smccc_trng");
+MODULE_AUTHOR("Andre Przywara");
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/geode-rng.c b/drivers/char/hw_random/geode-rng.c
index e1d421a36a13..138ce434f86b 100644
--- a/drivers/char/hw_random/geode-rng.c
+++ b/drivers/char/hw_random/geode-rng.c
@@ -83,7 +83,7 @@ static struct hwrng geode_rng = {
};
-static int __init mod_init(void)
+static int __init geode_rng_init(void)
{
int err = -ENODEV;
struct pci_dev *pdev = NULL;
@@ -124,7 +124,7 @@ err_unmap:
goto out;
}
-static void __exit mod_exit(void)
+static void __exit geode_rng_exit(void)
{
void __iomem *mem = (void __iomem *)geode_rng.priv;
@@ -132,8 +132,8 @@ static void __exit mod_exit(void)
iounmap(mem);
}
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(geode_rng_init);
+module_exit(geode_rng_exit);
MODULE_DESCRIPTION("H/W RNG driver for AMD Geode LX CPUs");
MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/intel-rng.c b/drivers/char/hw_random/intel-rng.c
index d740b8814bf3..7b171cb3b825 100644
--- a/drivers/char/hw_random/intel-rng.c
+++ b/drivers/char/hw_random/intel-rng.c
@@ -325,7 +325,7 @@ PFX "RNG, try using the 'no_fwh_detect' option.\n";
}
-static int __init mod_init(void)
+static int __init intel_rng_mod_init(void)
{
int err = -ENODEV;
int i;
@@ -403,7 +403,7 @@ out:
}
-static void __exit mod_exit(void)
+static void __exit intel_rng_mod_exit(void)
{
void __iomem *mem = (void __iomem *)intel_rng.priv;
@@ -411,8 +411,8 @@ static void __exit mod_exit(void)
iounmap(mem);
}
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(intel_rng_mod_init);
+module_exit(intel_rng_mod_exit);
MODULE_DESCRIPTION("H/W RNG driver for Intel chipsets");
MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c
index 39943bc3651a..7444cc146e86 100644
--- a/drivers/char/hw_random/via-rng.c
+++ b/drivers/char/hw_random/via-rng.c
@@ -192,7 +192,7 @@ static struct hwrng via_rng = {
};
-static int __init mod_init(void)
+static int __init via_rng_mod_init(void)
{
int err;
@@ -209,13 +209,13 @@ static int __init mod_init(void)
out:
return err;
}
-module_init(mod_init);
+module_init(via_rng_mod_init);
-static void __exit mod_exit(void)
+static void __exit via_rng_mod_exit(void)
{
hwrng_unregister(&via_rng);
}
-module_exit(mod_exit);
+module_exit(via_rng_mod_exit);
static struct x86_cpu_id __maybe_unused via_rng_cpu_id[] = {
X86_MATCH_FEATURE(X86_FEATURE_XSTORE, NULL),
diff --git a/drivers/char/mwave/tp3780i.c b/drivers/char/mwave/tp3780i.c
index 8588b51202e5..83eaffeb22c8 100644
--- a/drivers/char/mwave/tp3780i.c
+++ b/drivers/char/mwave/tp3780i.c
@@ -470,8 +470,6 @@ int tp3780I_StartDSP(THINKPAD_BD_DATA * pBDData)
int tp3780I_QueryAbilities(THINKPAD_BD_DATA * pBDData, MW_ABILITIES * pAbilities)
{
- int retval = 0;
-
PRINTK_2(TRACE_TP3780I,
"tp3780i::tp3780I_QueryAbilities entry pBDData %p\n", pBDData);
@@ -502,7 +500,7 @@ int tp3780I_QueryAbilities(THINKPAD_BD_DATA * pBDData, MW_ABILITIES * pAbilities
PRINTK_1(TRACE_TP3780I,
"tp3780i::tp3780I_QueryAbilities exit retval=SUCCESSFUL\n");
- return retval;
+ return 0;
}
int tp3780I_ReadWriteDspDStore(THINKPAD_BD_DATA * pBDData, unsigned int uOpcode,
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 6eaefea0520e..5ac53dcb3a6a 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -4050,16 +4050,15 @@ static int hdlcdev_close(struct net_device *dev)
* called by network layer to process IOCTL call to network device
*
* dev pointer to network device structure
- * ifr pointer to network interface request structure
- * cmd IOCTL command code
+ * ifs pointer to network interface settings structure
*
* returns 0 if success, otherwise error code
*/
-static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int hdlcdev_wan_ioctl(struct net_device *dev, struct if_settings *ifs)
{
const size_t size = sizeof(sync_serial_settings);
sync_serial_settings new_line;
- sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync;
+ sync_serial_settings __user *line = ifs->ifs_ifsu.sync;
MGSLPC_INFO *info = dev_to_port(dev);
unsigned int flags;
@@ -4070,17 +4069,14 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (info->port.count)
return -EBUSY;
- if (cmd != SIOCWANDEV)
- return hdlc_ioctl(dev, ifr, cmd);
-
memset(&new_line, 0, size);
- switch(ifr->ifr_settings.type) {
+ switch (ifs->type) {
case IF_GET_IFACE: /* return current sync_serial_settings */
- ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL;
- if (ifr->ifr_settings.size < size) {
- ifr->ifr_settings.size = size; /* data size wanted */
+ ifs->type = IF_IFACE_SYNC_SERIAL;
+ if (ifs->size < size) {
+ ifs->size = size; /* data size wanted */
return -ENOBUFS;
}
@@ -4148,9 +4144,8 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
tty_kref_put(tty);
}
return 0;
-
default:
- return hdlc_ioctl(dev, ifr, cmd);
+ return hdlc_ioctl(dev, ifs);
}
}
@@ -4225,7 +4220,7 @@ static const struct net_device_ops hdlcdev_ops = {
.ndo_open = hdlcdev_open,
.ndo_stop = hdlcdev_close,
.ndo_start_xmit = hdlc_start_xmit,
- .ndo_do_ioctl = hdlcdev_ioctl,
+ .ndo_siocwandev = hdlcdev_wan_ioctl,
.ndo_tx_timeout = hdlcdev_tx_timeout,
};
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index 4308f9ca7a43..d6ba644f6b00 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -89,7 +89,6 @@ config TCG_TIS_SYNQUACER
config TCG_TIS_I2C_CR50
tristate "TPM Interface Specification 2.0 Interface (I2C - CR50)"
depends on I2C
- select TCG_CR50
help
This is a driver for the Google cr50 I2C TPM interface which is a
custom microcontroller and requires a custom i2c protocol interface
diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index 903604769de9..3af4c07a9342 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -106,17 +106,12 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
{
struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev);
u16 len;
- int sig;
if (!ibmvtpm->rtce_buf) {
dev_err(ibmvtpm->dev, "ibmvtpm device is not ready\n");
return 0;
}
- sig = wait_event_interruptible(ibmvtpm->wq, !ibmvtpm->tpm_processing_cmd);
- if (sig)
- return -EINTR;
-
len = ibmvtpm->res_len;
if (count < len) {
@@ -237,7 +232,7 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
* set the processing flag before the Hcall, since we may get the
* result (interrupt) before even being able to check rc.
*/
- ibmvtpm->tpm_processing_cmd = true;
+ ibmvtpm->tpm_processing_cmd = 1;
again:
rc = ibmvtpm_send_crq(ibmvtpm->vdev,
@@ -255,7 +250,7 @@ again:
goto again;
}
dev_err(ibmvtpm->dev, "tpm_ibmvtpm_send failed rc=%d\n", rc);
- ibmvtpm->tpm_processing_cmd = false;
+ ibmvtpm->tpm_processing_cmd = 0;
}
spin_unlock(&ibmvtpm->rtce_lock);
@@ -269,7 +264,9 @@ static void tpm_ibmvtpm_cancel(struct tpm_chip *chip)
static u8 tpm_ibmvtpm_status(struct tpm_chip *chip)
{
- return 0;
+ struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev);
+
+ return ibmvtpm->tpm_processing_cmd;
}
/**
@@ -457,7 +454,7 @@ static const struct tpm_class_ops tpm_ibmvtpm = {
.send = tpm_ibmvtpm_send,
.cancel = tpm_ibmvtpm_cancel,
.status = tpm_ibmvtpm_status,
- .req_complete_mask = 0,
+ .req_complete_mask = 1,
.req_complete_val = 0,
.req_canceled = tpm_ibmvtpm_req_canceled,
};
@@ -550,7 +547,7 @@ static void ibmvtpm_crq_process(struct ibmvtpm_crq *crq,
case VTPM_TPM_COMMAND_RES:
/* len of the data in rtce buffer */
ibmvtpm->res_len = be16_to_cpu(crq->len);
- ibmvtpm->tpm_processing_cmd = false;
+ ibmvtpm->tpm_processing_cmd = 0;
wake_up_interruptible(&ibmvtpm->wq);
return;
default:
@@ -688,8 +685,15 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev,
goto init_irq_cleanup;
}
- if (!strcmp(id->compat, "IBM,vtpm20")) {
+
+ if (!strcmp(id->compat, "IBM,vtpm20"))
chip->flags |= TPM_CHIP_FLAG_TPM2;
+
+ rc = tpm_get_timeouts(chip);
+ if (rc)
+ goto init_irq_cleanup;
+
+ if (chip->flags & TPM_CHIP_FLAG_TPM2) {
rc = tpm2_get_cc_attrs_tbl(chip);
if (rc)
goto init_irq_cleanup;
diff --git a/drivers/char/tpm/tpm_ibmvtpm.h b/drivers/char/tpm/tpm_ibmvtpm.h
index b92aa7d3e93e..51198b137461 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.h
+++ b/drivers/char/tpm/tpm_ibmvtpm.h
@@ -41,7 +41,7 @@ struct ibmvtpm_dev {
wait_queue_head_t wq;
u16 res_len;
u32 vtpm_version;
- bool tpm_processing_cmd;
+ u8 tpm_processing_cmd;
};
#define CRQ_RES_BUF_SIZE PAGE_SIZE
diff --git a/drivers/char/tpm/tpm_tis_i2c_cr50.c b/drivers/char/tpm/tpm_tis_i2c_cr50.c
index 44dde2fbe2fb..c89278103703 100644
--- a/drivers/char/tpm/tpm_tis_i2c_cr50.c
+++ b/drivers/char/tpm/tpm_tis_i2c_cr50.c
@@ -639,12 +639,6 @@ static const struct tpm_class_ops cr50_i2c = {
.req_canceled = &tpm_cr50_i2c_req_canceled,
};
-static const struct i2c_device_id cr50_i2c_table[] = {
- {"cr50_i2c", 0},
- {}
-};
-MODULE_DEVICE_TABLE(i2c, cr50_i2c_table);
-
#ifdef CONFIG_ACPI
static const struct acpi_device_id cr50_i2c_acpi_id[] = {
{ "GOOG0005", 0 },
@@ -670,8 +664,7 @@ MODULE_DEVICE_TABLE(of, of_cr50_i2c_match);
* - 0: Success.
* - -errno: A POSIX error code.
*/
-static int tpm_cr50_i2c_probe(struct i2c_client *client,
- const struct i2c_device_id *id)
+static int tpm_cr50_i2c_probe(struct i2c_client *client)
{
struct tpm_i2c_cr50_priv_data *priv;
struct device *dev = &client->dev;
@@ -774,8 +767,7 @@ static int tpm_cr50_i2c_remove(struct i2c_client *client)
static SIMPLE_DEV_PM_OPS(cr50_i2c_pm, tpm_pm_suspend, tpm_pm_resume);
static struct i2c_driver cr50_i2c_driver = {
- .id_table = cr50_i2c_table,
- .probe = tpm_cr50_i2c_probe,
+ .probe_new = tpm_cr50_i2c_probe,
.remove = tpm_cr50_i2c_remove,
.driver = {
.name = "cr50_i2c",
diff --git a/drivers/clk/imx/clk-imx6q.c b/drivers/clk/imx/clk-imx6q.c
index 496900de0b0b..de36f58d551c 100644
--- a/drivers/clk/imx/clk-imx6q.c
+++ b/drivers/clk/imx/clk-imx6q.c
@@ -974,6 +974,6 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node)
hws[IMX6QDL_CLK_PLL3_USB_OTG]->clk);
}
- imx_register_uart_clocks(1);
+ imx_register_uart_clocks(2);
}
CLK_OF_DECLARE(imx6q, "fsl,imx6q-ccm", imx6q_clocks_init);
diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c
index 51ed640e527b..4ece326ea233 100644
--- a/drivers/clk/qcom/gdsc.c
+++ b/drivers/clk/qcom/gdsc.c
@@ -357,27 +357,43 @@ static int gdsc_init(struct gdsc *sc)
if (on < 0)
return on;
- /*
- * Votable GDSCs can be ON due to Vote from other masters.
- * If a Votable GDSC is ON, make sure we have a Vote.
- */
- if ((sc->flags & VOTABLE) && on)
- gdsc_enable(&sc->pd);
+ if (on) {
+ /* The regulator must be on, sync the kernel state */
+ if (sc->rsupply) {
+ ret = regulator_enable(sc->rsupply);
+ if (ret < 0)
+ return ret;
+ }
- /*
- * Make sure the retain bit is set if the GDSC is already on, otherwise
- * we end up turning off the GDSC and destroying all the register
- * contents that we thought we were saving.
- */
- if ((sc->flags & RETAIN_FF_ENABLE) && on)
- gdsc_retain_ff_on(sc);
+ /*
+ * Votable GDSCs can be ON due to Vote from other masters.
+ * If a Votable GDSC is ON, make sure we have a Vote.
+ */
+ if (sc->flags & VOTABLE) {
+ ret = regmap_update_bits(sc->regmap, sc->gdscr,
+ SW_COLLAPSE_MASK, val);
+ if (ret)
+ return ret;
+ }
+
+ /* Turn on HW trigger mode if supported */
+ if (sc->flags & HW_CTRL) {
+ ret = gdsc_hwctrl(sc, true);
+ if (ret < 0)
+ return ret;
+ }
- /* If ALWAYS_ON GDSCs are not ON, turn them ON */
- if (sc->flags & ALWAYS_ON) {
- if (!on)
- gdsc_enable(&sc->pd);
+ /*
+ * Make sure the retain bit is set if the GDSC is already on,
+ * otherwise we end up turning off the GDSC and destroying all
+ * the register contents that we thought we were saving.
+ */
+ if (sc->flags & RETAIN_FF_ENABLE)
+ gdsc_retain_ff_on(sc);
+ } else if (sc->flags & ALWAYS_ON) {
+ /* If ALWAYS_ON GDSCs are not ON, turn them ON */
+ gdsc_enable(&sc->pd);
on = true;
- sc->pd.flags |= GENPD_FLAG_ALWAYS_ON;
}
if (on || (sc->pwrsts & PWRSTS_RET))
@@ -385,6 +401,8 @@ static int gdsc_init(struct gdsc *sc)
else
gdsc_clear_mem_on(sc);
+ if (sc->flags & ALWAYS_ON)
+ sc->pd.flags |= GENPD_FLAG_ALWAYS_ON;
if (!sc->pd.power_off)
sc->pd.power_off = gdsc_disable;
if (!sc->pd.power_on)
diff --git a/drivers/clk/renesas/rcar-usb2-clock-sel.c b/drivers/clk/renesas/rcar-usb2-clock-sel.c
index 9fb79bd79435..684d8937965e 100644
--- a/drivers/clk/renesas/rcar-usb2-clock-sel.c
+++ b/drivers/clk/renesas/rcar-usb2-clock-sel.c
@@ -187,7 +187,7 @@ static int rcar_usb2_clock_sel_probe(struct platform_device *pdev)
init.ops = &usb2_clock_sel_clock_ops;
priv->hw.init = &init;
- ret = devm_clk_hw_register(NULL, &priv->hw);
+ ret = devm_clk_hw_register(dev, &priv->hw);
if (ret)
goto pm_put;
diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index fabad79baafc..5e3e96d3d1b9 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -51,6 +51,15 @@
#define TICK_BASE_CNT 1
+#ifdef CONFIG_ARM
+/* Use values higher than ARM arch timer. See 6282edb72bed. */
+#define MCT_CLKSOURCE_RATING 450
+#define MCT_CLKEVENTS_RATING 500
+#else
+#define MCT_CLKSOURCE_RATING 350
+#define MCT_CLKEVENTS_RATING 350
+#endif
+
enum {
MCT_INT_SPI,
MCT_INT_PPI
@@ -206,7 +215,7 @@ static void exynos4_frc_resume(struct clocksource *cs)
static struct clocksource mct_frc = {
.name = "mct-frc",
- .rating = 450, /* use value higher than ARM arch timer */
+ .rating = MCT_CLKSOURCE_RATING,
.read = exynos4_frc_read,
.mask = CLOCKSOURCE_MASK(32),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
@@ -456,8 +465,9 @@ static int exynos4_mct_starting_cpu(unsigned int cpu)
evt->set_state_oneshot = set_state_shutdown;
evt->set_state_oneshot_stopped = set_state_shutdown;
evt->tick_resume = set_state_shutdown;
- evt->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
- evt->rating = 500; /* use value higher than ARM arch timer */
+ evt->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT |
+ CLOCK_EVT_FEAT_PERCPU;
+ evt->rating = MCT_CLKEVENTS_RATING,
exynos4_mct_write(TICK_BASE_CNT, mevt->base + MCT_L_TCNTB_OFFSET);
diff --git a/drivers/clocksource/ingenic-sysost.c b/drivers/clocksource/ingenic-sysost.c
index a129840f14f9..cb6fc2f152d4 100644
--- a/drivers/clocksource/ingenic-sysost.c
+++ b/drivers/clocksource/ingenic-sysost.c
@@ -4,6 +4,7 @@
* Copyright (c) 2020 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
*/
+#include <linux/bitfield.h>
#include <linux/bitops.h>
#include <linux/clk.h>
#include <linux/clk-provider.h>
@@ -34,8 +35,6 @@
/* bits within the OSTCCR register */
#define OSTCCR_PRESCALE1_MASK 0x3
#define OSTCCR_PRESCALE2_MASK 0xc
-#define OSTCCR_PRESCALE1_LSB 0
-#define OSTCCR_PRESCALE2_LSB 2
/* bits within the OSTCR register */
#define OSTCR_OST1CLR BIT(0)
@@ -98,7 +97,7 @@ static unsigned long ingenic_ost_percpu_timer_recalc_rate(struct clk_hw *hw,
prescale = readl(ost_clk->ost->base + info->ostccr_reg);
- prescale = (prescale & OSTCCR_PRESCALE1_MASK) >> OSTCCR_PRESCALE1_LSB;
+ prescale = FIELD_GET(OSTCCR_PRESCALE1_MASK, prescale);
return parent_rate >> (prescale * 2);
}
@@ -112,7 +111,7 @@ static unsigned long ingenic_ost_global_timer_recalc_rate(struct clk_hw *hw,
prescale = readl(ost_clk->ost->base + info->ostccr_reg);
- prescale = (prescale & OSTCCR_PRESCALE2_MASK) >> OSTCCR_PRESCALE2_LSB;
+ prescale = FIELD_GET(OSTCCR_PRESCALE2_MASK, prescale);
return parent_rate >> (prescale * 2);
}
@@ -151,7 +150,8 @@ static int ingenic_ost_percpu_timer_set_rate(struct clk_hw *hw, unsigned long re
int val;
val = readl(ost_clk->ost->base + info->ostccr_reg);
- val = (val & ~OSTCCR_PRESCALE1_MASK) | (prescale << OSTCCR_PRESCALE1_LSB);
+ val &= ~OSTCCR_PRESCALE1_MASK;
+ val |= FIELD_PREP(OSTCCR_PRESCALE1_MASK, prescale);
writel(val, ost_clk->ost->base + info->ostccr_reg);
return 0;
@@ -166,7 +166,8 @@ static int ingenic_ost_global_timer_set_rate(struct clk_hw *hw, unsigned long re
int val;
val = readl(ost_clk->ost->base + info->ostccr_reg);
- val = (val & ~OSTCCR_PRESCALE2_MASK) | (prescale << OSTCCR_PRESCALE2_LSB);
+ val &= ~OSTCCR_PRESCALE2_MASK;
+ val |= FIELD_PREP(OSTCCR_PRESCALE2_MASK, prescale);
writel(val, ost_clk->ost->base + info->ostccr_reg);
return 0;
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index d7ed99f0001f..dd0956ad969c 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -579,7 +579,8 @@ static int sh_cmt_start(struct sh_cmt_channel *ch, unsigned long flag)
ch->flags |= flag;
/* setup timeout if no clockevent */
- if ((flag == FLAG_CLOCKSOURCE) && (!(ch->flags & FLAG_CLOCKEVENT)))
+ if (ch->cmt->num_channels == 1 &&
+ flag == FLAG_CLOCKSOURCE && (!(ch->flags & FLAG_CLOCKEVENT)))
__sh_cmt_set_next(ch, ch->max_match_value);
out:
raw_spin_unlock_irqrestore(&ch->lock, flags);
@@ -621,20 +622,25 @@ static struct sh_cmt_channel *cs_to_sh_cmt(struct clocksource *cs)
static u64 sh_cmt_clocksource_read(struct clocksource *cs)
{
struct sh_cmt_channel *ch = cs_to_sh_cmt(cs);
- unsigned long flags;
u32 has_wrapped;
- u64 value;
- u32 raw;
- raw_spin_lock_irqsave(&ch->lock, flags);
- value = ch->total_cycles;
- raw = sh_cmt_get_counter(ch, &has_wrapped);
+ if (ch->cmt->num_channels == 1) {
+ unsigned long flags;
+ u64 value;
+ u32 raw;
- if (unlikely(has_wrapped))
- raw += ch->match_value + 1;
- raw_spin_unlock_irqrestore(&ch->lock, flags);
+ raw_spin_lock_irqsave(&ch->lock, flags);
+ value = ch->total_cycles;
+ raw = sh_cmt_get_counter(ch, &has_wrapped);
+
+ if (unlikely(has_wrapped))
+ raw += ch->match_value + 1;
+ raw_spin_unlock_irqrestore(&ch->lock, flags);
+
+ return value + raw;
+ }
- return value + raw;
+ return sh_cmt_get_counter(ch, &has_wrapped);
}
static int sh_cmt_clocksource_enable(struct clocksource *cs)
@@ -697,7 +703,7 @@ static int sh_cmt_register_clocksource(struct sh_cmt_channel *ch,
cs->disable = sh_cmt_clocksource_disable;
cs->suspend = sh_cmt_clocksource_suspend;
cs->resume = sh_cmt_clocksource_resume;
- cs->mask = CLOCKSOURCE_MASK(sizeof(u64) * 8);
+ cs->mask = CLOCKSOURCE_MASK(ch->cmt->info->width);
cs->flags = CLOCK_SOURCE_IS_CONTINUOUS;
dev_info(&ch->cmt->pdev->dev, "ch%u: used as clock source\n",
diff --git a/drivers/clocksource/timer-fttmr010.c b/drivers/clocksource/timer-fttmr010.c
index edb1d5f193f5..126fb1f259b2 100644
--- a/drivers/clocksource/timer-fttmr010.c
+++ b/drivers/clocksource/timer-fttmr010.c
@@ -271,9 +271,7 @@ static irqreturn_t ast2600_timer_interrupt(int irq, void *dev_id)
}
static int __init fttmr010_common_init(struct device_node *np,
- bool is_aspeed,
- int (*timer_shutdown)(struct clock_event_device *),
- irq_handler_t irq_handler)
+ bool is_aspeed, bool is_ast2600)
{
struct fttmr010 *fttmr010;
int irq;
@@ -374,8 +372,6 @@ static int __init fttmr010_common_init(struct device_node *np,
fttmr010->tick_rate);
}
- fttmr010->timer_shutdown = timer_shutdown;
-
/*
* Setup clockevent timer (interrupt-driven) on timer 1.
*/
@@ -383,8 +379,18 @@ static int __init fttmr010_common_init(struct device_node *np,
writel(0, fttmr010->base + TIMER1_LOAD);
writel(0, fttmr010->base + TIMER1_MATCH1);
writel(0, fttmr010->base + TIMER1_MATCH2);
- ret = request_irq(irq, irq_handler, IRQF_TIMER,
- "FTTMR010-TIMER1", &fttmr010->clkevt);
+
+ if (is_ast2600) {
+ fttmr010->timer_shutdown = ast2600_timer_shutdown;
+ ret = request_irq(irq, ast2600_timer_interrupt,
+ IRQF_TIMER, "FTTMR010-TIMER1",
+ &fttmr010->clkevt);
+ } else {
+ fttmr010->timer_shutdown = fttmr010_timer_shutdown;
+ ret = request_irq(irq, fttmr010_timer_interrupt,
+ IRQF_TIMER, "FTTMR010-TIMER1",
+ &fttmr010->clkevt);
+ }
if (ret) {
pr_err("FTTMR010-TIMER1 no IRQ\n");
goto out_unmap;
@@ -432,23 +438,17 @@ out_disable_clock:
static __init int ast2600_timer_init(struct device_node *np)
{
- return fttmr010_common_init(np, true,
- ast2600_timer_shutdown,
- ast2600_timer_interrupt);
+ return fttmr010_common_init(np, true, true);
}
static __init int aspeed_timer_init(struct device_node *np)
{
- return fttmr010_common_init(np, true,
- fttmr010_timer_shutdown,
- fttmr010_timer_interrupt);
+ return fttmr010_common_init(np, true, false);
}
static __init int fttmr010_timer_init(struct device_node *np)
{
- return fttmr010_common_init(np, false,
- fttmr010_timer_shutdown,
- fttmr010_timer_interrupt);
+ return fttmr010_common_init(np, false, false);
}
TIMER_OF_DECLARE(fttmr010, "faraday,fttmr010", fttmr010_timer_init);
diff --git a/drivers/clocksource/timer-mediatek.c b/drivers/clocksource/timer-mediatek.c
index ab63b95e414f..7bcb4a3f26fb 100644
--- a/drivers/clocksource/timer-mediatek.c
+++ b/drivers/clocksource/timer-mediatek.c
@@ -60,9 +60,9 @@
* SYST_CON_EN: Clock enable. Shall be set to
* - Start timer countdown.
* - Allow timeout ticks being updated.
- * - Allow changing interrupt functions.
+ * - Allow changing interrupt status,like clear irq pending.
*
- * SYST_CON_IRQ_EN: Set to allow interrupt.
+ * SYST_CON_IRQ_EN: Set to enable interrupt.
*
* SYST_CON_IRQ_CLR: Set to clear interrupt.
*/
@@ -75,6 +75,7 @@ static void __iomem *gpt_sched_reg __read_mostly;
static void mtk_syst_ack_irq(struct timer_of *to)
{
/* Clear and disable interrupt */
+ writel(SYST_CON_EN, SYST_CON_REG(to));
writel(SYST_CON_IRQ_CLR | SYST_CON_EN, SYST_CON_REG(to));
}
@@ -111,6 +112,9 @@ static int mtk_syst_clkevt_next_event(unsigned long ticks,
static int mtk_syst_clkevt_shutdown(struct clock_event_device *clkevt)
{
+ /* Clear any irq */
+ mtk_syst_ack_irq(to_timer_of(clkevt));
+
/* Disable timer */
writel(0, SYST_CON_REG(to_timer_of(clkevt)));
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 7e7450453714..b49612895c78 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -163,9 +163,9 @@ static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
if (ret || val > 1)
return -EINVAL;
- get_online_cpus();
+ cpus_read_lock();
set_boost(policy, val);
- put_online_cpus();
+ cpus_read_unlock();
return count;
}
diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c
index 3fc98a3ffd91..c10fc33b29b1 100644
--- a/drivers/cpufreq/armada-37xx-cpufreq.c
+++ b/drivers/cpufreq/armada-37xx-cpufreq.c
@@ -104,7 +104,11 @@ struct armada_37xx_dvfs {
};
static struct armada_37xx_dvfs armada_37xx_dvfs[] = {
- {.cpu_freq_max = 1200*1000*1000, .divider = {1, 2, 4, 6} },
+ /*
+ * The cpufreq scaling for 1.2 GHz variant of the SOC is currently
+ * unstable because we do not know how to configure it properly.
+ */
+ /* {.cpu_freq_max = 1200*1000*1000, .divider = {1, 2, 4, 6} }, */
{.cpu_freq_max = 1000*1000*1000, .divider = {1, 2, 4, 5} },
{.cpu_freq_max = 800*1000*1000, .divider = {1, 2, 3, 4} },
{.cpu_freq_max = 600*1000*1000, .divider = {2, 4, 5, 6} },
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index bef7528aecd3..231e585f6ba2 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -139,7 +139,9 @@ static const struct of_device_id blocklist[] __initconst = {
{ .compatible = "qcom,qcs404", },
{ .compatible = "qcom,sc7180", },
{ .compatible = "qcom,sc7280", },
+ { .compatible = "qcom,sc8180x", },
{ .compatible = "qcom,sdm845", },
+ { .compatible = "qcom,sm8150", },
{ .compatible = "st,stih407", },
{ .compatible = "st,stih410", },
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 45f3416988f1..06c526d66dd3 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2654,18 +2654,18 @@ int cpufreq_boost_trigger_state(int state)
cpufreq_driver->boost_enabled = state;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
- get_online_cpus();
+ cpus_read_lock();
for_each_active_policy(policy) {
ret = cpufreq_driver->set_boost(policy, state);
if (ret)
goto err_reset_state;
}
- put_online_cpus();
+ cpus_read_unlock();
return 0;
err_reset_state:
- put_online_cpus();
+ cpus_read_unlock();
write_lock_irqsave(&cpufreq_driver_lock, flags);
cpufreq_driver->boost_enabled = !state;
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index ac361a8b1d3b..eb4320b619c9 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -418,7 +418,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias)
default_powersave_bias = powersave_bias;
cpumask_clear(&done);
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu) {
struct cpufreq_policy *policy;
struct policy_dbs_info *policy_dbs;
@@ -442,7 +442,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias)
od_tuners = dbs_data->tuners;
od_tuners->powersave_bias = default_powersave_bias;
}
- put_online_cpus();
+ cpus_read_unlock();
}
void od_register_powersave_bias_handler(unsigned int (*f)
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index bb4549959b11..b4ffe6c8a0d0 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -32,6 +32,7 @@
#include <asm/cpu_device_id.h>
#include <asm/cpufeature.h>
#include <asm/intel-family.h>
+#include "../drivers/thermal/intel/thermal_interrupt.h"
#define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC)
@@ -219,6 +220,7 @@ struct global_params {
* @sched_flags: Store scheduler flags for possible cross CPU update
* @hwp_boost_min: Last HWP boosted min performance
* @suspended: Whether or not the driver has been suspended.
+ * @hwp_notify_work: workqueue for HWP notifications.
*
* This structure stores per CPU instance data for all CPUs.
*/
@@ -257,6 +259,7 @@ struct cpudata {
unsigned int sched_flags;
u32 hwp_boost_min;
bool suspended;
+ struct delayed_work hwp_notify_work;
};
static struct cpudata **all_cpu_data;
@@ -1625,6 +1628,40 @@ static void intel_pstate_sysfs_hide_hwp_dynamic_boost(void)
/************************** sysfs end ************************/
+static void intel_pstate_notify_work(struct work_struct *work)
+{
+ mutex_lock(&intel_pstate_driver_lock);
+ cpufreq_update_policy(smp_processor_id());
+ wrmsrl(MSR_HWP_STATUS, 0);
+ mutex_unlock(&intel_pstate_driver_lock);
+}
+
+void notify_hwp_interrupt(void)
+{
+ unsigned int this_cpu = smp_processor_id();
+ struct cpudata *cpudata;
+ u64 value;
+
+ if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
+ return;
+
+ rdmsrl(MSR_HWP_STATUS, value);
+ if (!(value & 0x01))
+ return;
+
+ cpudata = all_cpu_data[this_cpu];
+ schedule_delayed_work_on(this_cpu, &cpudata->hwp_notify_work, msecs_to_jiffies(10));
+}
+
+static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
+{
+ /* Enable HWP notification interrupt for guaranteed performance change */
+ if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) {
+ INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work);
+ wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01);
+ }
+}
+
static void intel_pstate_hwp_enable(struct cpudata *cpudata)
{
/* First disable HWP notification interrupt as we don't process them */
@@ -1634,6 +1671,8 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
if (cpudata->epp_default == -EINVAL)
cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
+
+ intel_pstate_enable_hwp_interrupt(cpudata);
}
static int atom_get_min_pstate(void)
@@ -2969,7 +3008,7 @@ static void intel_pstate_driver_cleanup(void)
{
unsigned int cpu;
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu) {
if (all_cpu_data[cpu]) {
if (intel_pstate_driver == &intel_pstate)
@@ -2979,7 +3018,7 @@ static void intel_pstate_driver_cleanup(void)
all_cpu_data[cpu] = NULL;
}
}
- put_online_cpus();
+ cpus_read_unlock();
intel_pstate_driver = NULL;
}
diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index b9ccb6a3dad9..12ab4014af71 100644
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -1180,7 +1180,7 @@ static int powernowk8_init(void)
if (!x86_match_cpu(powernow_k8_ids))
return -ENODEV;
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(i) {
smp_call_function_single(i, check_supported_cpu, &ret, 1);
if (!ret)
@@ -1188,10 +1188,10 @@ static int powernowk8_init(void)
}
if (supported_cpus != num_online_cpus()) {
- put_online_cpus();
+ cpus_read_unlock();
return -ENODEV;
}
- put_online_cpus();
+ cpus_read_unlock();
ret = cpufreq_register_driver(&cpufreq_amd64_driver);
if (ret)
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 005600cef273..23a06cba392c 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -918,7 +918,7 @@ static void powernv_cpufreq_work_fn(struct work_struct *work)
unsigned int cpu;
cpumask_t mask;
- get_online_cpus();
+ cpus_read_lock();
cpumask_and(&mask, &chip->mask, cpu_online_mask);
smp_call_function_any(&mask,
powernv_cpufreq_throttle_check, NULL, 0);
@@ -939,7 +939,7 @@ static void powernv_cpufreq_work_fn(struct work_struct *work)
cpufreq_cpu_put(policy);
}
out:
- put_online_cpus();
+ cpus_read_unlock();
}
static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index ec9a87ca2dbb..75f818d04b48 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -134,7 +134,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
}
if (!zalloc_cpumask_var(&opp_shared_cpus, GFP_KERNEL))
- ret = -ENOMEM;
+ return -ENOMEM;
/* Obtain CPUs that share SCMI performance controls */
ret = scmi_get_sharing_cpus(cpu_dev, policy->cpus);
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c
index cd1baee424a1..b3a9bbfb8831 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c
@@ -26,8 +26,7 @@ void sun8i_ce_prng_exit(struct crypto_tfm *tfm)
{
struct sun8i_ce_rng_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
- memzero_explicit(ctx->seed, ctx->slen);
- kfree(ctx->seed);
+ kfree_sensitive(ctx->seed);
ctx->seed = NULL;
ctx->slen = 0;
}
@@ -38,8 +37,7 @@ int sun8i_ce_prng_seed(struct crypto_rng *tfm, const u8 *seed,
struct sun8i_ce_rng_tfm_ctx *ctx = crypto_rng_ctx(tfm);
if (ctx->seed && ctx->slen != slen) {
- memzero_explicit(ctx->seed, ctx->slen);
- kfree(ctx->seed);
+ kfree_sensitive(ctx->seed);
ctx->slen = 0;
ctx->seed = NULL;
}
@@ -157,9 +155,8 @@ err_dst:
memcpy(dst, d, dlen);
memcpy(ctx->seed, d + dlen, ctx->slen);
}
- memzero_explicit(d, todo);
err_iv:
- kfree(d);
+ kfree_sensitive(d);
err_mem:
return err;
}
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c
index 5b7af4498bd5..19cd2e52f89d 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c
@@ -95,9 +95,8 @@ err_pm:
memcpy(data, d, max);
err = max;
}
- memzero_explicit(d, todo);
err_dst:
- kfree(d);
+ kfree_sensitive(d);
return err;
}
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c
index 3191527928e4..246a6782674c 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c
@@ -20,8 +20,7 @@ int sun8i_ss_prng_seed(struct crypto_rng *tfm, const u8 *seed,
struct sun8i_ss_rng_tfm_ctx *ctx = crypto_rng_ctx(tfm);
if (ctx->seed && ctx->slen != slen) {
- memzero_explicit(ctx->seed, ctx->slen);
- kfree(ctx->seed);
+ kfree_sensitive(ctx->seed);
ctx->slen = 0;
ctx->seed = NULL;
}
@@ -48,8 +47,7 @@ void sun8i_ss_prng_exit(struct crypto_tfm *tfm)
{
struct sun8i_ss_rng_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
- memzero_explicit(ctx->seed, ctx->slen);
- kfree(ctx->seed);
+ kfree_sensitive(ctx->seed);
ctx->seed = NULL;
ctx->slen = 0;
}
@@ -167,9 +165,8 @@ err_iv:
/* Update seed */
memcpy(ctx->seed, d + dlen, ctx->slen);
}
- memzero_explicit(d, todo);
err_free:
- kfree(d);
+ kfree_sensitive(d);
return err;
}
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index b1d286004295..9391ccc03382 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -143,6 +143,7 @@ struct atmel_aes_xts_ctx {
struct atmel_aes_base_ctx base;
u32 key2[AES_KEYSIZE_256 / sizeof(u32)];
+ struct crypto_skcipher *fallback_tfm;
};
#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
@@ -155,6 +156,7 @@ struct atmel_aes_authenc_ctx {
struct atmel_aes_reqctx {
unsigned long mode;
u8 lastc[AES_BLOCK_SIZE];
+ struct skcipher_request fallback_req;
};
#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
@@ -418,24 +420,15 @@ static inline size_t atmel_aes_padlen(size_t len, size_t block_size)
return len ? block_size - len : 0;
}
-static struct atmel_aes_dev *atmel_aes_find_dev(struct atmel_aes_base_ctx *ctx)
+static struct atmel_aes_dev *atmel_aes_dev_alloc(struct atmel_aes_base_ctx *ctx)
{
- struct atmel_aes_dev *aes_dd = NULL;
- struct atmel_aes_dev *tmp;
+ struct atmel_aes_dev *aes_dd;
spin_lock_bh(&atmel_aes.lock);
- if (!ctx->dd) {
- list_for_each_entry(tmp, &atmel_aes.dev_list, list) {
- aes_dd = tmp;
- break;
- }
- ctx->dd = aes_dd;
- } else {
- aes_dd = ctx->dd;
- }
-
+ /* One AES IP per SoC. */
+ aes_dd = list_first_entry_or_null(&atmel_aes.dev_list,
+ struct atmel_aes_dev, list);
spin_unlock_bh(&atmel_aes.lock);
-
return aes_dd;
}
@@ -967,7 +960,6 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd,
ctx = crypto_tfm_ctx(areq->tfm);
dd->areq = areq;
- dd->ctx = ctx;
start_async = (areq != new_areq);
dd->is_async = start_async;
@@ -1083,12 +1075,48 @@ static int atmel_aes_ctr_start(struct atmel_aes_dev *dd)
return atmel_aes_ctr_transfer(dd);
}
+static int atmel_aes_xts_fallback(struct skcipher_request *req, bool enc)
+{
+ struct atmel_aes_reqctx *rctx = skcipher_request_ctx(req);
+ struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(
+ crypto_skcipher_reqtfm(req));
+
+ skcipher_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
+ skcipher_request_set_callback(&rctx->fallback_req, req->base.flags,
+ req->base.complete, req->base.data);
+ skcipher_request_set_crypt(&rctx->fallback_req, req->src, req->dst,
+ req->cryptlen, req->iv);
+
+ return enc ? crypto_skcipher_encrypt(&rctx->fallback_req) :
+ crypto_skcipher_decrypt(&rctx->fallback_req);
+}
+
static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode)
{
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
struct atmel_aes_base_ctx *ctx = crypto_skcipher_ctx(skcipher);
struct atmel_aes_reqctx *rctx;
- struct atmel_aes_dev *dd;
+ u32 opmode = mode & AES_FLAGS_OPMODE_MASK;
+
+ if (opmode == AES_FLAGS_XTS) {
+ if (req->cryptlen < XTS_BLOCK_SIZE)
+ return -EINVAL;
+
+ if (!IS_ALIGNED(req->cryptlen, XTS_BLOCK_SIZE))
+ return atmel_aes_xts_fallback(req,
+ mode & AES_FLAGS_ENCRYPT);
+ }
+
+ /*
+ * ECB, CBC, CFB, OFB or CTR mode require the plaintext and ciphertext
+ * to have a positve integer length.
+ */
+ if (!req->cryptlen && opmode != AES_FLAGS_XTS)
+ return 0;
+
+ if ((opmode == AES_FLAGS_ECB || opmode == AES_FLAGS_CBC) &&
+ !IS_ALIGNED(req->cryptlen, crypto_skcipher_blocksize(skcipher)))
+ return -EINVAL;
switch (mode & AES_FLAGS_OPMODE_MASK) {
case AES_FLAGS_CFB8:
@@ -1113,14 +1141,10 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode)
}
ctx->is_aead = false;
- dd = atmel_aes_find_dev(ctx);
- if (!dd)
- return -ENODEV;
-
rctx = skcipher_request_ctx(req);
rctx->mode = mode;
- if ((mode & AES_FLAGS_OPMODE_MASK) != AES_FLAGS_ECB &&
+ if (opmode != AES_FLAGS_ECB &&
!(mode & AES_FLAGS_ENCRYPT) && req->src == req->dst) {
unsigned int ivsize = crypto_skcipher_ivsize(skcipher);
@@ -1130,7 +1154,7 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode)
ivsize, 0);
}
- return atmel_aes_handle_queue(dd, &req->base);
+ return atmel_aes_handle_queue(ctx->dd, &req->base);
}
static int atmel_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
@@ -1242,8 +1266,15 @@ static int atmel_aes_ctr_decrypt(struct skcipher_request *req)
static int atmel_aes_init_tfm(struct crypto_skcipher *tfm)
{
struct atmel_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct atmel_aes_dev *dd;
+
+ dd = atmel_aes_dev_alloc(&ctx->base);
+ if (!dd)
+ return -ENODEV;
crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
+ ctx->base.dd = dd;
+ ctx->base.dd->ctx = &ctx->base;
ctx->base.start = atmel_aes_start;
return 0;
@@ -1252,8 +1283,15 @@ static int atmel_aes_init_tfm(struct crypto_skcipher *tfm)
static int atmel_aes_ctr_init_tfm(struct crypto_skcipher *tfm)
{
struct atmel_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct atmel_aes_dev *dd;
+
+ dd = atmel_aes_dev_alloc(&ctx->base);
+ if (!dd)
+ return -ENODEV;
crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
+ ctx->base.dd = dd;
+ ctx->base.dd->ctx = &ctx->base;
ctx->base.start = atmel_aes_ctr_start;
return 0;
@@ -1290,7 +1328,7 @@ static struct skcipher_alg aes_algs[] = {
{
.base.cra_name = "ofb(aes)",
.base.cra_driver_name = "atmel-ofb-aes",
- .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct atmel_aes_ctx),
.init = atmel_aes_init_tfm,
@@ -1691,20 +1729,15 @@ static int atmel_aes_gcm_crypt(struct aead_request *req,
{
struct atmel_aes_base_ctx *ctx;
struct atmel_aes_reqctx *rctx;
- struct atmel_aes_dev *dd;
ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
ctx->block_size = AES_BLOCK_SIZE;
ctx->is_aead = true;
- dd = atmel_aes_find_dev(ctx);
- if (!dd)
- return -ENODEV;
-
rctx = aead_request_ctx(req);
rctx->mode = AES_FLAGS_GCM | mode;
- return atmel_aes_handle_queue(dd, &req->base);
+ return atmel_aes_handle_queue(ctx->dd, &req->base);
}
static int atmel_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
@@ -1742,8 +1775,15 @@ static int atmel_aes_gcm_decrypt(struct aead_request *req)
static int atmel_aes_gcm_init(struct crypto_aead *tfm)
{
struct atmel_aes_gcm_ctx *ctx = crypto_aead_ctx(tfm);
+ struct atmel_aes_dev *dd;
+
+ dd = atmel_aes_dev_alloc(&ctx->base);
+ if (!dd)
+ return -ENODEV;
crypto_aead_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
+ ctx->base.dd = dd;
+ ctx->base.dd->ctx = &ctx->base;
ctx->base.start = atmel_aes_gcm_start;
return 0;
@@ -1819,12 +1859,8 @@ static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd)
* the order of the ciphered tweak bytes need to be reversed before
* writing them into the ODATARx registers.
*/
- for (i = 0; i < AES_BLOCK_SIZE/2; ++i) {
- u8 tmp = tweak_bytes[AES_BLOCK_SIZE - 1 - i];
-
- tweak_bytes[AES_BLOCK_SIZE - 1 - i] = tweak_bytes[i];
- tweak_bytes[i] = tmp;
- }
+ for (i = 0; i < AES_BLOCK_SIZE/2; ++i)
+ swap(tweak_bytes[i], tweak_bytes[AES_BLOCK_SIZE - 1 - i]);
/* Process the data. */
atmel_aes_write_ctrl(dd, use_dma, NULL);
@@ -1849,6 +1885,13 @@ static int atmel_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
if (err)
return err;
+ crypto_skcipher_clear_flags(ctx->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+ crypto_skcipher_set_flags(ctx->fallback_tfm, tfm->base.crt_flags &
+ CRYPTO_TFM_REQ_MASK);
+ err = crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen);
+ if (err)
+ return err;
+
memcpy(ctx->base.key, key, keylen/2);
memcpy(ctx->key2, key + keylen/2, keylen/2);
ctx->base.keylen = keylen/2;
@@ -1869,18 +1912,40 @@ static int atmel_aes_xts_decrypt(struct skcipher_request *req)
static int atmel_aes_xts_init_tfm(struct crypto_skcipher *tfm)
{
struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct atmel_aes_dev *dd;
+ const char *tfm_name = crypto_tfm_alg_name(&tfm->base);
- crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
+ dd = atmel_aes_dev_alloc(&ctx->base);
+ if (!dd)
+ return -ENODEV;
+
+ ctx->fallback_tfm = crypto_alloc_skcipher(tfm_name, 0,
+ CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(ctx->fallback_tfm))
+ return PTR_ERR(ctx->fallback_tfm);
+
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx) +
+ crypto_skcipher_reqsize(ctx->fallback_tfm));
+ ctx->base.dd = dd;
+ ctx->base.dd->ctx = &ctx->base;
ctx->base.start = atmel_aes_xts_start;
return 0;
}
+static void atmel_aes_xts_exit_tfm(struct crypto_skcipher *tfm)
+{
+ struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ crypto_free_skcipher(ctx->fallback_tfm);
+}
+
static struct skcipher_alg aes_xts_alg = {
.base.cra_name = "xts(aes)",
.base.cra_driver_name = "atmel-xts-aes",
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct atmel_aes_xts_ctx),
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
@@ -1889,6 +1954,7 @@ static struct skcipher_alg aes_xts_alg = {
.encrypt = atmel_aes_xts_encrypt,
.decrypt = atmel_aes_xts_decrypt,
.init = atmel_aes_xts_init_tfm,
+ .exit = atmel_aes_xts_exit_tfm,
};
#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
@@ -2075,6 +2141,11 @@ static int atmel_aes_authenc_init_tfm(struct crypto_aead *tfm,
{
struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm);
unsigned int auth_reqsize = atmel_sha_authenc_get_reqsize();
+ struct atmel_aes_dev *dd;
+
+ dd = atmel_aes_dev_alloc(&ctx->base);
+ if (!dd)
+ return -ENODEV;
ctx->auth = atmel_sha_authenc_spawn(auth_mode);
if (IS_ERR(ctx->auth))
@@ -2082,6 +2153,8 @@ static int atmel_aes_authenc_init_tfm(struct crypto_aead *tfm,
crypto_aead_set_reqsize(tfm, (sizeof(struct atmel_aes_authenc_reqctx) +
auth_reqsize));
+ ctx->base.dd = dd;
+ ctx->base.dd->ctx = &ctx->base;
ctx->base.start = atmel_aes_authenc_start;
return 0;
@@ -2127,7 +2200,6 @@ static int atmel_aes_authenc_crypt(struct aead_request *req,
struct atmel_aes_base_ctx *ctx = crypto_aead_ctx(tfm);
u32 authsize = crypto_aead_authsize(tfm);
bool enc = (mode & AES_FLAGS_ENCRYPT);
- struct atmel_aes_dev *dd;
/* Compute text length. */
if (!enc && req->cryptlen < authsize)
@@ -2146,11 +2218,7 @@ static int atmel_aes_authenc_crypt(struct aead_request *req,
ctx->block_size = AES_BLOCK_SIZE;
ctx->is_aead = true;
- dd = atmel_aes_find_dev(ctx);
- if (!dd)
- return -ENODEV;
-
- return atmel_aes_handle_queue(dd, &req->base);
+ return atmel_aes_handle_queue(ctx->dd, &req->base);
}
static int atmel_aes_authenc_cbc_aes_encrypt(struct aead_request *req)
@@ -2358,7 +2426,7 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd)
static void atmel_aes_crypto_alg_init(struct crypto_alg *alg)
{
- alg->cra_flags = CRYPTO_ALG_ASYNC;
+ alg->cra_flags |= CRYPTO_ALG_ASYNC;
alg->cra_alignmask = 0xf;
alg->cra_priority = ATMEL_AES_PRIORITY;
alg->cra_module = THIS_MODULE;
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index 6f01c51e3c37..e30786ec9f2d 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -196,23 +196,15 @@ static void atmel_tdes_write_n(struct atmel_tdes_dev *dd, u32 offset,
atmel_tdes_write(dd, offset, *value);
}
-static struct atmel_tdes_dev *atmel_tdes_find_dev(struct atmel_tdes_ctx *ctx)
+static struct atmel_tdes_dev *atmel_tdes_dev_alloc(void)
{
- struct atmel_tdes_dev *tdes_dd = NULL;
- struct atmel_tdes_dev *tmp;
+ struct atmel_tdes_dev *tdes_dd;
spin_lock_bh(&atmel_tdes.lock);
- if (!ctx->dd) {
- list_for_each_entry(tmp, &atmel_tdes.dev_list, list) {
- tdes_dd = tmp;
- break;
- }
- ctx->dd = tdes_dd;
- } else {
- tdes_dd = ctx->dd;
- }
+ /* One TDES IP per SoC. */
+ tdes_dd = list_first_entry_or_null(&atmel_tdes.dev_list,
+ struct atmel_tdes_dev, list);
spin_unlock_bh(&atmel_tdes.lock);
-
return tdes_dd;
}
@@ -320,7 +312,7 @@ static int atmel_tdes_crypt_pdc_stop(struct atmel_tdes_dev *dd)
dd->buf_out, dd->buflen, dd->dma_size, 1);
if (count != dd->dma_size) {
err = -EINVAL;
- pr_err("not all data converted: %zu\n", count);
+ dev_dbg(dd->dev, "not all data converted: %zu\n", count);
}
}
@@ -337,24 +329,24 @@ static int atmel_tdes_buff_init(struct atmel_tdes_dev *dd)
dd->buflen &= ~(DES_BLOCK_SIZE - 1);
if (!dd->buf_in || !dd->buf_out) {
- dev_err(dd->dev, "unable to alloc pages.\n");
+ dev_dbg(dd->dev, "unable to alloc pages.\n");
goto err_alloc;
}
/* MAP here */
dd->dma_addr_in = dma_map_single(dd->dev, dd->buf_in,
dd->buflen, DMA_TO_DEVICE);
- if (dma_mapping_error(dd->dev, dd->dma_addr_in)) {
- dev_err(dd->dev, "dma %zd bytes error\n", dd->buflen);
- err = -EINVAL;
+ err = dma_mapping_error(dd->dev, dd->dma_addr_in);
+ if (err) {
+ dev_dbg(dd->dev, "dma %zd bytes error\n", dd->buflen);
goto err_map_in;
}
dd->dma_addr_out = dma_map_single(dd->dev, dd->buf_out,
dd->buflen, DMA_FROM_DEVICE);
- if (dma_mapping_error(dd->dev, dd->dma_addr_out)) {
- dev_err(dd->dev, "dma %zd bytes error\n", dd->buflen);
- err = -EINVAL;
+ err = dma_mapping_error(dd->dev, dd->dma_addr_out);
+ if (err) {
+ dev_dbg(dd->dev, "dma %zd bytes error\n", dd->buflen);
goto err_map_out;
}
@@ -367,8 +359,6 @@ err_map_in:
err_alloc:
free_page((unsigned long)dd->buf_out);
free_page((unsigned long)dd->buf_in);
- if (err)
- pr_err("error: %d\n", err);
return err;
}
@@ -520,14 +510,14 @@ static int atmel_tdes_crypt_start(struct atmel_tdes_dev *dd)
err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
if (!err) {
- dev_err(dd->dev, "dma_map_sg() error\n");
+ dev_dbg(dd->dev, "dma_map_sg() error\n");
return -EINVAL;
}
err = dma_map_sg(dd->dev, dd->out_sg, 1,
DMA_FROM_DEVICE);
if (!err) {
- dev_err(dd->dev, "dma_map_sg() error\n");
+ dev_dbg(dd->dev, "dma_map_sg() error\n");
dma_unmap_sg(dd->dev, dd->in_sg, 1,
DMA_TO_DEVICE);
return -EINVAL;
@@ -646,7 +636,6 @@ static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd,
rctx->mode &= TDES_FLAGS_MODE_MASK;
dd->flags = (dd->flags & ~TDES_FLAGS_MODE_MASK) | rctx->mode;
dd->ctx = ctx;
- ctx->dd = dd;
err = atmel_tdes_write_ctrl(dd);
if (!err)
@@ -679,7 +668,7 @@ static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd)
dd->buf_out, dd->buflen, dd->dma_size, 1);
if (count != dd->dma_size) {
err = -EINVAL;
- pr_err("not all data converted: %zu\n", count);
+ dev_dbg(dd->dev, "not all data converted: %zu\n", count);
}
}
}
@@ -691,11 +680,15 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(skcipher);
struct atmel_tdes_reqctx *rctx = skcipher_request_ctx(req);
+ struct device *dev = ctx->dd->dev;
+
+ if (!req->cryptlen)
+ return 0;
switch (mode & TDES_FLAGS_OPMODE_MASK) {
case TDES_FLAGS_CFB8:
if (!IS_ALIGNED(req->cryptlen, CFB8_BLOCK_SIZE)) {
- pr_err("request size is not exact amount of CFB8 blocks\n");
+ dev_dbg(dev, "request size is not exact amount of CFB8 blocks\n");
return -EINVAL;
}
ctx->block_size = CFB8_BLOCK_SIZE;
@@ -703,7 +696,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
case TDES_FLAGS_CFB16:
if (!IS_ALIGNED(req->cryptlen, CFB16_BLOCK_SIZE)) {
- pr_err("request size is not exact amount of CFB16 blocks\n");
+ dev_dbg(dev, "request size is not exact amount of CFB16 blocks\n");
return -EINVAL;
}
ctx->block_size = CFB16_BLOCK_SIZE;
@@ -711,7 +704,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
case TDES_FLAGS_CFB32:
if (!IS_ALIGNED(req->cryptlen, CFB32_BLOCK_SIZE)) {
- pr_err("request size is not exact amount of CFB32 blocks\n");
+ dev_dbg(dev, "request size is not exact amount of CFB32 blocks\n");
return -EINVAL;
}
ctx->block_size = CFB32_BLOCK_SIZE;
@@ -719,7 +712,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
default:
if (!IS_ALIGNED(req->cryptlen, DES_BLOCK_SIZE)) {
- pr_err("request size is not exact amount of DES blocks\n");
+ dev_dbg(dev, "request size is not exact amount of DES blocks\n");
return -EINVAL;
}
ctx->block_size = DES_BLOCK_SIZE;
@@ -897,14 +890,13 @@ static int atmel_tdes_ofb_decrypt(struct skcipher_request *req)
static int atmel_tdes_init_tfm(struct crypto_skcipher *tfm)
{
struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct atmel_tdes_dev *dd;
-
- crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_tdes_reqctx));
- dd = atmel_tdes_find_dev(ctx);
- if (!dd)
+ ctx->dd = atmel_tdes_dev_alloc();
+ if (!ctx->dd)
return -ENODEV;
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_tdes_reqctx));
+
return 0;
}
@@ -999,7 +991,7 @@ static struct skcipher_alg tdes_algs[] = {
{
.base.cra_name = "ofb(des)",
.base.cra_driver_name = "atmel-ofb-des",
- .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_blocksize = 1,
.base.cra_alignmask = 0x7,
.min_keysize = DES_KEY_SIZE,
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 91808402e0bf..2ecb0e1f65d8 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -300,6 +300,9 @@ static int __sev_platform_shutdown_locked(int *error)
struct sev_device *sev = psp_master->sev_data;
int ret;
+ if (sev->state == SEV_STATE_UNINIT)
+ return 0;
+
ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
if (ret)
return ret;
@@ -1019,6 +1022,20 @@ e_err:
return ret;
}
+static void sev_firmware_shutdown(struct sev_device *sev)
+{
+ sev_platform_shutdown(NULL);
+
+ if (sev_es_tmr) {
+ /* The TMR area was encrypted, flush it from the cache */
+ wbinvd_on_all_cpus();
+
+ free_pages((unsigned long)sev_es_tmr,
+ get_order(SEV_ES_TMR_SIZE));
+ sev_es_tmr = NULL;
+ }
+}
+
void sev_dev_destroy(struct psp_device *psp)
{
struct sev_device *sev = psp->sev_data;
@@ -1026,6 +1043,8 @@ void sev_dev_destroy(struct psp_device *psp)
if (!sev)
return;
+ sev_firmware_shutdown(sev);
+
if (sev->misc)
kref_put(&misc_dev->refcount, sev_exit);
@@ -1056,21 +1075,6 @@ void sev_pci_init(void)
if (sev_get_api_version())
goto err;
- /*
- * If platform is not in UNINIT state then firmware upgrade and/or
- * platform INIT command will fail. These command require UNINIT state.
- *
- * In a normal boot we should never run into case where the firmware
- * is not in UNINIT state on boot. But in case of kexec boot, a reboot
- * may not go through a typical shutdown sequence and may leave the
- * firmware in INIT or WORKING state.
- */
-
- if (sev->state != SEV_STATE_UNINIT) {
- sev_platform_shutdown(NULL);
- sev->state = SEV_STATE_UNINIT;
- }
-
if (sev_version_greater_or_equal(0, 15) &&
sev_update_firmware(sev->dev) == 0)
sev_get_api_version();
@@ -1115,17 +1119,10 @@ err:
void sev_pci_exit(void)
{
- if (!psp_master->sev_data)
- return;
-
- sev_platform_shutdown(NULL);
+ struct sev_device *sev = psp_master->sev_data;
- if (sev_es_tmr) {
- /* The TMR area was encrypted, flush it from the cache */
- wbinvd_on_all_cpus();
+ if (!sev)
+ return;
- free_pages((unsigned long)sev_es_tmr,
- get_order(SEV_ES_TMR_SIZE));
- sev_es_tmr = NULL;
- }
+ sev_firmware_shutdown(sev);
}
diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index 6fb6ba35f89d..88c672ad27e4 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -241,6 +241,17 @@ e_err:
return ret;
}
+static void sp_pci_shutdown(struct pci_dev *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct sp_device *sp = dev_get_drvdata(dev);
+
+ if (!sp)
+ return;
+
+ sp_destroy(sp);
+}
+
static void sp_pci_remove(struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
@@ -351,6 +362,12 @@ static const struct sp_dev_vdata dev_vdata[] = {
.psp_vdata = &pspv3,
#endif
},
+ { /* 5 */
+ .bar = 2,
+#ifdef CONFIG_CRYPTO_DEV_SP_PSP
+ .psp_vdata = &pspv2,
+#endif
+ },
};
static const struct pci_device_id sp_pci_table[] = {
{ PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&dev_vdata[0] },
@@ -359,6 +376,7 @@ static const struct pci_device_id sp_pci_table[] = {
{ PCI_VDEVICE(AMD, 0x1486), (kernel_ulong_t)&dev_vdata[3] },
{ PCI_VDEVICE(AMD, 0x15DF), (kernel_ulong_t)&dev_vdata[4] },
{ PCI_VDEVICE(AMD, 0x1649), (kernel_ulong_t)&dev_vdata[4] },
+ { PCI_VDEVICE(AMD, 0x14CA), (kernel_ulong_t)&dev_vdata[5] },
/* Last entry must be zero */
{ 0, }
};
@@ -371,6 +389,7 @@ static struct pci_driver sp_pci_driver = {
.id_table = sp_pci_table,
.probe = sp_pci_probe,
.remove = sp_pci_remove,
+ .shutdown = sp_pci_shutdown,
.driver.pm = &sp_pci_pm_ops,
};
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 8b0640fb04be..65a641396c07 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/pm_runtime.h>
#include <linux/topology.h>
#include <linux/uacce.h>
#include "hpre.h"
@@ -81,6 +82,16 @@
#define HPRE_PREFETCH_DISABLE BIT(30)
#define HPRE_SVA_DISABLE_READY (BIT(4) | BIT(8))
+/* clock gate */
+#define HPRE_CLKGATE_CTL 0x301a10
+#define HPRE_PEH_CFG_AUTO_GATE 0x301a2c
+#define HPRE_CLUSTER_DYN_CTL 0x302010
+#define HPRE_CORE_SHB_CFG 0x302088
+#define HPRE_CLKGATE_CTL_EN BIT(0)
+#define HPRE_PEH_CFG_AUTO_GATE_EN BIT(0)
+#define HPRE_CLUSTER_DYN_CTL_EN BIT(0)
+#define HPRE_CORE_GATE_EN (BIT(30) | BIT(31))
+
#define HPRE_AM_OOO_SHUTDOWN_ENB 0x301044
#define HPRE_AM_OOO_SHUTDOWN_ENABLE BIT(0)
#define HPRE_WR_MSI_PORT BIT(2)
@@ -417,12 +428,63 @@ static void hpre_close_sva_prefetch(struct hisi_qm *qm)
pci_err(qm->pdev, "failed to close sva prefetch\n");
}
+static void hpre_enable_clock_gate(struct hisi_qm *qm)
+{
+ u32 val;
+
+ if (qm->ver < QM_HW_V3)
+ return;
+
+ val = readl(qm->io_base + HPRE_CLKGATE_CTL);
+ val |= HPRE_CLKGATE_CTL_EN;
+ writel(val, qm->io_base + HPRE_CLKGATE_CTL);
+
+ val = readl(qm->io_base + HPRE_PEH_CFG_AUTO_GATE);
+ val |= HPRE_PEH_CFG_AUTO_GATE_EN;
+ writel(val, qm->io_base + HPRE_PEH_CFG_AUTO_GATE);
+
+ val = readl(qm->io_base + HPRE_CLUSTER_DYN_CTL);
+ val |= HPRE_CLUSTER_DYN_CTL_EN;
+ writel(val, qm->io_base + HPRE_CLUSTER_DYN_CTL);
+
+ val = readl_relaxed(qm->io_base + HPRE_CORE_SHB_CFG);
+ val |= HPRE_CORE_GATE_EN;
+ writel(val, qm->io_base + HPRE_CORE_SHB_CFG);
+}
+
+static void hpre_disable_clock_gate(struct hisi_qm *qm)
+{
+ u32 val;
+
+ if (qm->ver < QM_HW_V3)
+ return;
+
+ val = readl(qm->io_base + HPRE_CLKGATE_CTL);
+ val &= ~HPRE_CLKGATE_CTL_EN;
+ writel(val, qm->io_base + HPRE_CLKGATE_CTL);
+
+ val = readl(qm->io_base + HPRE_PEH_CFG_AUTO_GATE);
+ val &= ~HPRE_PEH_CFG_AUTO_GATE_EN;
+ writel(val, qm->io_base + HPRE_PEH_CFG_AUTO_GATE);
+
+ val = readl(qm->io_base + HPRE_CLUSTER_DYN_CTL);
+ val &= ~HPRE_CLUSTER_DYN_CTL_EN;
+ writel(val, qm->io_base + HPRE_CLUSTER_DYN_CTL);
+
+ val = readl_relaxed(qm->io_base + HPRE_CORE_SHB_CFG);
+ val &= ~HPRE_CORE_GATE_EN;
+ writel(val, qm->io_base + HPRE_CORE_SHB_CFG);
+}
+
static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;
u32 val;
int ret;
+ /* disabel dynamic clock gate before sram init */
+ hpre_disable_clock_gate(qm);
+
writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_ARUSER_M_CFG_ENABLE);
writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_AWUSER_M_CFG_ENABLE);
writel_relaxed(HPRE_QM_AXI_CFG_MASK, qm->io_base + QM_AXI_M_CFG);
@@ -473,6 +535,8 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
/* Config data buffer pasid needed by Kunpeng 920 */
hpre_config_pasid(qm);
+ hpre_enable_clock_gate(qm);
+
return ret;
}
@@ -595,10 +659,15 @@ static ssize_t hpre_ctrl_debug_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
struct hpre_debugfs_file *file = filp->private_data;
+ struct hisi_qm *qm = hpre_file_to_qm(file);
char tbuf[HPRE_DBGFS_VAL_MAX_LEN];
u32 val;
int ret;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
spin_lock_irq(&file->lock);
switch (file->type) {
case HPRE_CLEAR_ENABLE:
@@ -608,18 +677,25 @@ static ssize_t hpre_ctrl_debug_read(struct file *filp, char __user *buf,
val = hpre_cluster_inqry_read(file);
break;
default:
- spin_unlock_irq(&file->lock);
- return -EINVAL;
+ goto err_input;
}
spin_unlock_irq(&file->lock);
+
+ hisi_qm_put_dfx_access(qm);
ret = snprintf(tbuf, HPRE_DBGFS_VAL_MAX_LEN, "%u\n", val);
return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+
+err_input:
+ spin_unlock_irq(&file->lock);
+ hisi_qm_put_dfx_access(qm);
+ return -EINVAL;
}
static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct hpre_debugfs_file *file = filp->private_data;
+ struct hisi_qm *qm = hpre_file_to_qm(file);
char tbuf[HPRE_DBGFS_VAL_MAX_LEN];
unsigned long val;
int len, ret;
@@ -639,6 +715,10 @@ static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf,
if (kstrtoul(tbuf, 0, &val))
return -EFAULT;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
spin_lock_irq(&file->lock);
switch (file->type) {
case HPRE_CLEAR_ENABLE:
@@ -655,12 +735,12 @@ static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf,
ret = -EINVAL;
goto err_input;
}
- spin_unlock_irq(&file->lock);
- return count;
+ ret = count;
err_input:
spin_unlock_irq(&file->lock);
+ hisi_qm_put_dfx_access(qm);
return ret;
}
@@ -700,6 +780,24 @@ static int hpre_debugfs_atomic64_set(void *data, u64 val)
DEFINE_DEBUGFS_ATTRIBUTE(hpre_atomic64_ops, hpre_debugfs_atomic64_get,
hpre_debugfs_atomic64_set, "%llu\n");
+static int hpre_com_regs_show(struct seq_file *s, void *unused)
+{
+ hisi_qm_regs_dump(s, s->private);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(hpre_com_regs);
+
+static int hpre_cluster_regs_show(struct seq_file *s, void *unused)
+{
+ hisi_qm_regs_dump(s, s->private);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(hpre_cluster_regs);
+
static int hpre_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir,
enum hpre_ctrl_dbgfs_file type, int indx)
{
@@ -737,8 +835,11 @@ static int hpre_pf_comm_regs_debugfs_init(struct hisi_qm *qm)
regset->regs = hpre_com_dfx_regs;
regset->nregs = ARRAY_SIZE(hpre_com_dfx_regs);
regset->base = qm->io_base;
+ regset->dev = dev;
+
+ debugfs_create_file("regs", 0444, qm->debug.debug_root,
+ regset, &hpre_com_regs_fops);
- debugfs_create_regset32("regs", 0444, qm->debug.debug_root, regset);
return 0;
}
@@ -764,8 +865,10 @@ static int hpre_cluster_debugfs_init(struct hisi_qm *qm)
regset->regs = hpre_cluster_dfx_regs;
regset->nregs = ARRAY_SIZE(hpre_cluster_dfx_regs);
regset->base = qm->io_base + hpre_cluster_offsets[i];
+ regset->dev = dev;
- debugfs_create_regset32("regs", 0444, tmp_d, regset);
+ debugfs_create_file("regs", 0444, tmp_d, regset,
+ &hpre_cluster_regs_fops);
ret = hpre_create_debugfs_file(qm, tmp_d, HPRE_CLUSTER_CTRL,
i + HPRE_CLUSTER_CTRL);
if (ret)
@@ -1017,6 +1120,8 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_with_alg_register;
}
+ hisi_qm_pm_init(qm);
+
return 0;
err_with_alg_register:
@@ -1040,6 +1145,7 @@ static void hpre_remove(struct pci_dev *pdev)
struct hisi_qm *qm = pci_get_drvdata(pdev);
int ret;
+ hisi_qm_pm_uninit(qm);
hisi_qm_wait_task_finish(qm, &hpre_devices);
hisi_qm_alg_unregister(qm, &hpre_devices);
if (qm->fun_type == QM_HW_PF && qm->vfs_num) {
@@ -1062,6 +1168,10 @@ static void hpre_remove(struct pci_dev *pdev)
hisi_qm_uninit(qm);
}
+static const struct dev_pm_ops hpre_pm_ops = {
+ SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL)
+};
+
static const struct pci_error_handlers hpre_err_handler = {
.error_detected = hisi_qm_dev_err_detected,
.slot_reset = hisi_qm_dev_slot_reset,
@@ -1078,6 +1188,7 @@ static struct pci_driver hpre_pci_driver = {
hisi_qm_sriov_configure : NULL,
.err_handler = &hpre_err_handler,
.shutdown = hisi_qm_dev_shutdown,
+ .driver.pm = &hpre_pm_ops,
};
static void hpre_register_debugfs(void)
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 1d67f94a1d56..369562d34d66 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -4,12 +4,12 @@
#include <linux/acpi.h>
#include <linux/aer.h>
#include <linux/bitmap.h>
-#include <linux/debugfs.h>
#include <linux/dma-mapping.h>
#include <linux/idr.h>
#include <linux/io.h>
#include <linux/irqreturn.h>
#include <linux/log2.h>
+#include <linux/pm_runtime.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/uacce.h>
@@ -270,6 +270,8 @@
#define QM_QOS_MAX_CIR_S 11
#define QM_QOS_VAL_MAX_LEN 32
+#define QM_AUTOSUSPEND_DELAY 3000
+
#define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \
(((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \
((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT) | \
@@ -734,6 +736,34 @@ static u32 qm_get_irq_num_v3(struct hisi_qm *qm)
return QM_IRQ_NUM_VF_V3;
}
+static int qm_pm_get_sync(struct hisi_qm *qm)
+{
+ struct device *dev = &qm->pdev->dev;
+ int ret;
+
+ if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3)
+ return 0;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0) {
+ dev_err(dev, "failed to get_sync(%d).\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void qm_pm_put_sync(struct hisi_qm *qm)
+{
+ struct device *dev = &qm->pdev->dev;
+
+ if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3)
+ return;
+
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
+}
+
static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe)
{
u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
@@ -1173,16 +1203,13 @@ static struct hisi_qm *file_to_qm(struct debugfs_file *file)
return container_of(debug, struct hisi_qm, debug);
}
-static u32 current_q_read(struct debugfs_file *file)
+static u32 current_q_read(struct hisi_qm *qm)
{
- struct hisi_qm *qm = file_to_qm(file);
-
return readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) >> QM_DFX_QN_SHIFT;
}
-static int current_q_write(struct debugfs_file *file, u32 val)
+static int current_q_write(struct hisi_qm *qm, u32 val)
{
- struct hisi_qm *qm = file_to_qm(file);
u32 tmp;
if (val >= qm->debug.curr_qm_qp_num)
@@ -1199,18 +1226,14 @@ static int current_q_write(struct debugfs_file *file, u32 val)
return 0;
}
-static u32 clear_enable_read(struct debugfs_file *file)
+static u32 clear_enable_read(struct hisi_qm *qm)
{
- struct hisi_qm *qm = file_to_qm(file);
-
return readl(qm->io_base + QM_DFX_CNT_CLR_CE);
}
/* rd_clr_ctrl 1 enable read clear, otherwise 0 disable it */
-static int clear_enable_write(struct debugfs_file *file, u32 rd_clr_ctrl)
+static int clear_enable_write(struct hisi_qm *qm, u32 rd_clr_ctrl)
{
- struct hisi_qm *qm = file_to_qm(file);
-
if (rd_clr_ctrl > 1)
return -EINVAL;
@@ -1219,16 +1242,13 @@ static int clear_enable_write(struct debugfs_file *file, u32 rd_clr_ctrl)
return 0;
}
-static u32 current_qm_read(struct debugfs_file *file)
+static u32 current_qm_read(struct hisi_qm *qm)
{
- struct hisi_qm *qm = file_to_qm(file);
-
return readl(qm->io_base + QM_DFX_MB_CNT_VF);
}
-static int current_qm_write(struct debugfs_file *file, u32 val)
+static int current_qm_write(struct hisi_qm *qm, u32 val)
{
- struct hisi_qm *qm = file_to_qm(file);
u32 tmp;
if (val > qm->vfs_num)
@@ -1259,29 +1279,39 @@ static ssize_t qm_debug_read(struct file *filp, char __user *buf,
{
struct debugfs_file *file = filp->private_data;
enum qm_debug_file index = file->index;
+ struct hisi_qm *qm = file_to_qm(file);
char tbuf[QM_DBG_TMP_BUF_LEN];
u32 val;
int ret;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
mutex_lock(&file->lock);
switch (index) {
case CURRENT_QM:
- val = current_qm_read(file);
+ val = current_qm_read(qm);
break;
case CURRENT_Q:
- val = current_q_read(file);
+ val = current_q_read(qm);
break;
case CLEAR_ENABLE:
- val = clear_enable_read(file);
+ val = clear_enable_read(qm);
break;
default:
- mutex_unlock(&file->lock);
- return -EINVAL;
+ goto err_input;
}
mutex_unlock(&file->lock);
+ hisi_qm_put_dfx_access(qm);
ret = scnprintf(tbuf, QM_DBG_TMP_BUF_LEN, "%u\n", val);
return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+
+err_input:
+ mutex_unlock(&file->lock);
+ hisi_qm_put_dfx_access(qm);
+ return -EINVAL;
}
static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
@@ -1289,6 +1319,7 @@ static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
{
struct debugfs_file *file = filp->private_data;
enum qm_debug_file index = file->index;
+ struct hisi_qm *qm = file_to_qm(file);
unsigned long val;
char tbuf[QM_DBG_TMP_BUF_LEN];
int len, ret;
@@ -1308,22 +1339,28 @@ static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
if (kstrtoul(tbuf, 0, &val))
return -EFAULT;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
mutex_lock(&file->lock);
switch (index) {
case CURRENT_QM:
- ret = current_qm_write(file, val);
+ ret = current_qm_write(qm, val);
break;
case CURRENT_Q:
- ret = current_q_write(file, val);
+ ret = current_q_write(qm, val);
break;
case CLEAR_ENABLE:
- ret = clear_enable_write(file, val);
+ ret = clear_enable_write(qm, val);
break;
default:
ret = -EINVAL;
}
mutex_unlock(&file->lock);
+ hisi_qm_put_dfx_access(qm);
+
if (ret)
return ret;
@@ -1337,13 +1374,8 @@ static const struct file_operations qm_debug_fops = {
.write = qm_debug_write,
};
-struct qm_dfx_registers {
- char *reg_name;
- u64 reg_offset;
-};
-
#define CNT_CYC_REGS_NUM 10
-static struct qm_dfx_registers qm_dfx_regs[] = {
+static const struct debugfs_reg32 qm_dfx_regs[] = {
/* XXX_CNT are reading clear register */
{"QM_ECC_1BIT_CNT ", 0x104000ull},
{"QM_ECC_MBIT_CNT ", 0x104008ull},
@@ -1369,31 +1401,59 @@ static struct qm_dfx_registers qm_dfx_regs[] = {
{"QM_DFX_FF_ST5 ", 0x1040dcull},
{"QM_DFX_FF_ST6 ", 0x1040e0ull},
{"QM_IN_IDLE_ST ", 0x1040e4ull},
- { NULL, 0}
};
-static struct qm_dfx_registers qm_vf_dfx_regs[] = {
+static const struct debugfs_reg32 qm_vf_dfx_regs[] = {
{"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull},
- { NULL, 0}
};
-static int qm_regs_show(struct seq_file *s, void *unused)
+/**
+ * hisi_qm_regs_dump() - Dump registers's value.
+ * @s: debugfs file handle.
+ * @regset: accelerator registers information.
+ *
+ * Dump accelerator registers.
+ */
+void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset)
{
- struct hisi_qm *qm = s->private;
- struct qm_dfx_registers *regs;
+ struct pci_dev *pdev = to_pci_dev(regset->dev);
+ struct hisi_qm *qm = pci_get_drvdata(pdev);
+ const struct debugfs_reg32 *regs = regset->regs;
+ int regs_len = regset->nregs;
+ int i, ret;
u32 val;
- if (qm->fun_type == QM_HW_PF)
- regs = qm_dfx_regs;
- else
- regs = qm_vf_dfx_regs;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return;
- while (regs->reg_name) {
- val = readl(qm->io_base + regs->reg_offset);
- seq_printf(s, "%s= 0x%08x\n", regs->reg_name, val);
- regs++;
+ for (i = 0; i < regs_len; i++) {
+ val = readl(regset->base + regs[i].offset);
+ seq_printf(s, "%s= 0x%08x\n", regs[i].name, val);
}
+ hisi_qm_put_dfx_access(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_regs_dump);
+
+static int qm_regs_show(struct seq_file *s, void *unused)
+{
+ struct hisi_qm *qm = s->private;
+ struct debugfs_regset32 regset;
+
+ if (qm->fun_type == QM_HW_PF) {
+ regset.regs = qm_dfx_regs;
+ regset.nregs = ARRAY_SIZE(qm_dfx_regs);
+ } else {
+ regset.regs = qm_vf_dfx_regs;
+ regset.nregs = ARRAY_SIZE(qm_vf_dfx_regs);
+ }
+
+ regset.base = qm->io_base;
+ regset.dev = &qm->pdev->dev;
+
+ hisi_qm_regs_dump(s, &regset);
+
return 0;
}
@@ -1823,16 +1883,24 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer,
if (*pos)
return 0;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
/* Judge if the instance is being reset. */
if (unlikely(atomic_read(&qm->status.flags) == QM_STOP))
return 0;
- if (count > QM_DBG_WRITE_LEN)
- return -ENOSPC;
+ if (count > QM_DBG_WRITE_LEN) {
+ ret = -ENOSPC;
+ goto put_dfx_access;
+ }
cmd_buf = memdup_user_nul(buffer, count);
- if (IS_ERR(cmd_buf))
- return PTR_ERR(cmd_buf);
+ if (IS_ERR(cmd_buf)) {
+ ret = PTR_ERR(cmd_buf);
+ goto put_dfx_access;
+ }
cmd_buf_tmp = strchr(cmd_buf, '\n');
if (cmd_buf_tmp) {
@@ -1843,12 +1911,16 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer,
ret = qm_cmd_write_dump(qm, cmd_buf);
if (ret) {
kfree(cmd_buf);
- return ret;
+ goto put_dfx_access;
}
kfree(cmd_buf);
- return count;
+ ret = count;
+
+put_dfx_access:
+ hisi_qm_put_dfx_access(qm);
+ return ret;
}
static const struct file_operations qm_cmd_fops = {
@@ -2445,11 +2517,19 @@ static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type)
struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
{
struct hisi_qp *qp;
+ int ret;
+
+ ret = qm_pm_get_sync(qm);
+ if (ret)
+ return ERR_PTR(ret);
down_write(&qm->qps_lock);
qp = qm_create_qp_nolock(qm, alg_type);
up_write(&qm->qps_lock);
+ if (IS_ERR(qp))
+ qm_pm_put_sync(qm);
+
return qp;
}
EXPORT_SYMBOL_GPL(hisi_qm_create_qp);
@@ -2475,6 +2555,8 @@ void hisi_qm_release_qp(struct hisi_qp *qp)
idr_remove(&qm->qp_idr, qp->qp_id);
up_write(&qm->qps_lock);
+
+ qm_pm_put_sync(qm);
}
EXPORT_SYMBOL_GPL(hisi_qm_release_qp);
@@ -3200,6 +3282,10 @@ static void hisi_qm_pre_init(struct hisi_qm *qm)
init_rwsem(&qm->qps_lock);
qm->qp_in_used = 0;
qm->misc_ctl = false;
+ if (qm->fun_type == QM_HW_PF && qm->ver > QM_HW_V2) {
+ if (!acpi_device_power_manageable(ACPI_COMPANION(&pdev->dev)))
+ dev_info(&pdev->dev, "_PS0 and _PR0 are not defined");
+ }
}
static void qm_cmd_uninit(struct hisi_qm *qm)
@@ -4057,10 +4143,15 @@ static ssize_t qm_algqos_read(struct file *filp, char __user *buf,
u32 qos_val, ir;
int ret;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
/* Mailbox and reset cannot be operated at the same time */
if (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
pci_err(qm->pdev, "dev resetting, read alg qos failed!\n");
- return -EAGAIN;
+ ret = -EAGAIN;
+ goto err_put_dfx_access;
}
if (qm->fun_type == QM_HW_PF) {
@@ -4079,6 +4170,8 @@ static ssize_t qm_algqos_read(struct file *filp, char __user *buf,
err_get_status:
clear_bit(QM_RESETTING, &qm->misc_ctl);
+err_put_dfx_access:
+ hisi_qm_put_dfx_access(qm);
return ret;
}
@@ -4159,15 +4252,23 @@ static ssize_t qm_algqos_write(struct file *filp, const char __user *buf,
fun_index = device * 8 + function;
+ ret = qm_pm_get_sync(qm);
+ if (ret) {
+ ret = -EINVAL;
+ goto err_get_status;
+ }
+
ret = qm_func_shaper_enable(qm, fun_index, val);
if (ret) {
pci_err(qm->pdev, "failed to enable function shaper!\n");
ret = -EINVAL;
- goto err_get_status;
+ goto err_put_sync;
}
- ret = count;
+ ret = count;
+err_put_sync:
+ qm_pm_put_sync(qm);
err_get_status:
clear_bit(QM_RESETTING, &qm->misc_ctl);
return ret;
@@ -4245,7 +4346,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_debug_init);
*/
void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
{
- struct qm_dfx_registers *regs;
+ const struct debugfs_reg32 *regs;
int i;
/* clear current_qm */
@@ -4264,7 +4365,7 @@ void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
regs = qm_dfx_regs;
for (i = 0; i < CNT_CYC_REGS_NUM; i++) {
- readl(qm->io_base + regs->reg_offset);
+ readl(qm->io_base + regs->offset);
regs++;
}
@@ -4287,19 +4388,23 @@ int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs)
struct hisi_qm *qm = pci_get_drvdata(pdev);
int pre_existing_vfs, num_vfs, total_vfs, ret;
+ ret = qm_pm_get_sync(qm);
+ if (ret)
+ return ret;
+
total_vfs = pci_sriov_get_totalvfs(pdev);
pre_existing_vfs = pci_num_vf(pdev);
if (pre_existing_vfs) {
pci_err(pdev, "%d VFs already enabled. Please disable pre-enabled VFs!\n",
pre_existing_vfs);
- return 0;
+ goto err_put_sync;
}
num_vfs = min_t(int, max_vfs, total_vfs);
ret = qm_vf_q_assign(qm, num_vfs);
if (ret) {
pci_err(pdev, "Can't assign queues for VF!\n");
- return ret;
+ goto err_put_sync;
}
qm->vfs_num = num_vfs;
@@ -4308,12 +4413,16 @@ int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs)
if (ret) {
pci_err(pdev, "Can't enable VF!\n");
qm_clear_vft_config(qm);
- return ret;
+ goto err_put_sync;
}
pci_info(pdev, "VF enabled, vfs_num(=%d)!\n", num_vfs);
return num_vfs;
+
+err_put_sync:
+ qm_pm_put_sync(qm);
+ return ret;
}
EXPORT_SYMBOL_GPL(hisi_qm_sriov_enable);
@@ -4328,6 +4437,7 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen)
{
struct hisi_qm *qm = pci_get_drvdata(pdev);
int total_vfs = pci_sriov_get_totalvfs(qm->pdev);
+ int ret;
if (pci_vfs_assigned(pdev)) {
pci_err(pdev, "Failed to disable VFs as VFs are assigned!\n");
@@ -4343,8 +4453,13 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen)
pci_disable_sriov(pdev);
/* clear vf function shaper configure array */
memset(qm->factor + 1, 0, sizeof(struct qm_shaper_factor) * total_vfs);
+ ret = qm_clear_vft_config(qm);
+ if (ret)
+ return ret;
- return qm_clear_vft_config(qm);
+ qm_pm_put_sync(qm);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(hisi_qm_sriov_disable);
@@ -5164,11 +5279,18 @@ static void hisi_qm_controller_reset(struct work_struct *rst_work)
struct hisi_qm *qm = container_of(rst_work, struct hisi_qm, rst_work);
int ret;
+ ret = qm_pm_get_sync(qm);
+ if (ret) {
+ clear_bit(QM_RST_SCHED, &qm->misc_ctl);
+ return;
+ }
+
/* reset pcie device controller */
ret = qm_controller_reset(qm);
if (ret)
dev_err(&qm->pdev->dev, "controller reset failed (%d)\n", ret);
+ qm_pm_put_sync(qm);
}
static void qm_pf_reset_vf_prepare(struct hisi_qm *qm,
@@ -5680,6 +5802,194 @@ err_pci_init:
}
EXPORT_SYMBOL_GPL(hisi_qm_init);
+/**
+ * hisi_qm_get_dfx_access() - Try to get dfx access.
+ * @qm: pointer to accelerator device.
+ *
+ * Try to get dfx access, then user can get message.
+ *
+ * If device is in suspended, return failure, otherwise
+ * bump up the runtime PM usage counter.
+ */
+int hisi_qm_get_dfx_access(struct hisi_qm *qm)
+{
+ struct device *dev = &qm->pdev->dev;
+
+ if (pm_runtime_suspended(dev)) {
+ dev_info(dev, "can not read/write - device in suspended.\n");
+ return -EAGAIN;
+ }
+
+ return qm_pm_get_sync(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_get_dfx_access);
+
+/**
+ * hisi_qm_put_dfx_access() - Put dfx access.
+ * @qm: pointer to accelerator device.
+ *
+ * Put dfx access, drop runtime PM usage counter.
+ */
+void hisi_qm_put_dfx_access(struct hisi_qm *qm)
+{
+ qm_pm_put_sync(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_put_dfx_access);
+
+/**
+ * hisi_qm_pm_init() - Initialize qm runtime PM.
+ * @qm: pointer to accelerator device.
+ *
+ * Function that initialize qm runtime PM.
+ */
+void hisi_qm_pm_init(struct hisi_qm *qm)
+{
+ struct device *dev = &qm->pdev->dev;
+
+ if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3)
+ return;
+
+ pm_runtime_set_autosuspend_delay(dev, QM_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_put_noidle(dev);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_pm_init);
+
+/**
+ * hisi_qm_pm_uninit() - Uninitialize qm runtime PM.
+ * @qm: pointer to accelerator device.
+ *
+ * Function that uninitialize qm runtime PM.
+ */
+void hisi_qm_pm_uninit(struct hisi_qm *qm)
+{
+ struct device *dev = &qm->pdev->dev;
+
+ if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3)
+ return;
+
+ pm_runtime_get_noresume(dev);
+ pm_runtime_dont_use_autosuspend(dev);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_pm_uninit);
+
+static int qm_prepare_for_suspend(struct hisi_qm *qm)
+{
+ struct pci_dev *pdev = qm->pdev;
+ int ret;
+ u32 val;
+
+ ret = qm->ops->set_msi(qm, false);
+ if (ret) {
+ pci_err(pdev, "failed to disable MSI before suspending!\n");
+ return ret;
+ }
+
+ /* shutdown OOO register */
+ writel(ACC_MASTER_GLOBAL_CTRL_SHUTDOWN,
+ qm->io_base + ACC_MASTER_GLOBAL_CTRL);
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + ACC_MASTER_TRANS_RETURN,
+ val,
+ (val == ACC_MASTER_TRANS_RETURN_RW),
+ POLL_PERIOD, POLL_TIMEOUT);
+ if (ret) {
+ pci_emerg(pdev, "Bus lock! Please reset system.\n");
+ return ret;
+ }
+
+ ret = qm_set_pf_mse(qm, false);
+ if (ret)
+ pci_err(pdev, "failed to disable MSE before suspending!\n");
+
+ return ret;
+}
+
+static int qm_rebuild_for_resume(struct hisi_qm *qm)
+{
+ struct pci_dev *pdev = qm->pdev;
+ int ret;
+
+ ret = qm_set_pf_mse(qm, true);
+ if (ret) {
+ pci_err(pdev, "failed to enable MSE after resuming!\n");
+ return ret;
+ }
+
+ ret = qm->ops->set_msi(qm, true);
+ if (ret) {
+ pci_err(pdev, "failed to enable MSI after resuming!\n");
+ return ret;
+ }
+
+ ret = qm_dev_hw_init(qm);
+ if (ret) {
+ pci_err(pdev, "failed to init device after resuming\n");
+ return ret;
+ }
+
+ qm_cmd_init(qm);
+ hisi_qm_dev_err_init(qm);
+
+ return 0;
+}
+
+/**
+ * hisi_qm_suspend() - Runtime suspend of given device.
+ * @dev: device to suspend.
+ *
+ * Function that suspend the device.
+ */
+int hisi_qm_suspend(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct hisi_qm *qm = pci_get_drvdata(pdev);
+ int ret;
+
+ pci_info(pdev, "entering suspended state\n");
+
+ ret = hisi_qm_stop(qm, QM_NORMAL);
+ if (ret) {
+ pci_err(pdev, "failed to stop qm(%d)\n", ret);
+ return ret;
+ }
+
+ ret = qm_prepare_for_suspend(qm);
+ if (ret)
+ pci_err(pdev, "failed to prepare suspended(%d)\n", ret);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_suspend);
+
+/**
+ * hisi_qm_resume() - Runtime resume of given device.
+ * @dev: device to resume.
+ *
+ * Function that resume the device.
+ */
+int hisi_qm_resume(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct hisi_qm *qm = pci_get_drvdata(pdev);
+ int ret;
+
+ pci_info(pdev, "resuming from suspend state\n");
+
+ ret = qm_rebuild_for_resume(qm);
+ if (ret) {
+ pci_err(pdev, "failed to rebuild resume(%d)\n", ret);
+ return ret;
+ }
+
+ ret = hisi_qm_start(qm);
+ if (ret)
+ pci_err(pdev, "failed to start qm(%d)\n", ret);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_resume);
+
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>");
MODULE_DESCRIPTION("HiSilicon Accelerator queue manager driver");
diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
index 035eaf8c442d..3068093229a5 100644
--- a/drivers/crypto/hisilicon/qm.h
+++ b/drivers/crypto/hisilicon/qm.h
@@ -4,6 +4,7 @@
#define HISI_ACC_QM_H
#include <linux/bitfield.h>
+#include <linux/debugfs.h>
#include <linux/iopoll.h>
#include <linux/module.h>
#include <linux/pci.h>
@@ -430,4 +431,11 @@ void hisi_qm_dev_shutdown(struct pci_dev *pdev);
void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
+int hisi_qm_resume(struct device *dev);
+int hisi_qm_suspend(struct device *dev);
+void hisi_qm_pm_uninit(struct hisi_qm *qm);
+void hisi_qm_pm_init(struct hisi_qm *qm);
+int hisi_qm_get_dfx_access(struct hisi_qm *qm);
+void hisi_qm_put_dfx_access(struct hisi_qm *qm);
+void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset);
#endif
diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index 018415b9840a..d97cf02b1df7 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -157,11 +157,6 @@ struct sec_ctx {
struct device *dev;
};
-enum sec_endian {
- SEC_LE = 0,
- SEC_32BE,
- SEC_64BE
-};
enum sec_debug_file_index {
SEC_CLEAR_ENABLE,
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 490db7bccf61..90551bf38b52 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/pm_runtime.h>
#include <linux/seq_file.h>
#include <linux/topology.h>
#include <linux/uacce.h>
@@ -57,10 +58,16 @@
#define SEC_MEM_START_INIT_REG 0x301100
#define SEC_MEM_INIT_DONE_REG 0x301104
+/* clock gating */
#define SEC_CONTROL_REG 0x301200
-#define SEC_TRNG_EN_SHIFT 8
+#define SEC_DYNAMIC_GATE_REG 0x30121c
+#define SEC_CORE_AUTO_GATE 0x30212c
+#define SEC_DYNAMIC_GATE_EN 0x7bff
+#define SEC_CORE_AUTO_GATE_EN GENMASK(3, 0)
#define SEC_CLK_GATE_ENABLE BIT(3)
#define SEC_CLK_GATE_DISABLE (~BIT(3))
+
+#define SEC_TRNG_EN_SHIFT 8
#define SEC_AXI_SHUTDOWN_ENABLE BIT(12)
#define SEC_AXI_SHUTDOWN_DISABLE 0xFFFFEFFF
@@ -312,31 +319,20 @@ static const struct pci_device_id sec_dev_ids[] = {
};
MODULE_DEVICE_TABLE(pci, sec_dev_ids);
-static u8 sec_get_endian(struct hisi_qm *qm)
+static void sec_set_endian(struct hisi_qm *qm)
{
u32 reg;
- /*
- * As for VF, it is a wrong way to get endian setting by
- * reading a register of the engine
- */
- if (qm->pdev->is_virtfn) {
- dev_err_ratelimited(&qm->pdev->dev,
- "cannot access a register in VF!\n");
- return SEC_LE;
- }
reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
- /* BD little endian mode */
- if (!(reg & BIT(0)))
- return SEC_LE;
+ reg &= ~(BIT(1) | BIT(0));
+ if (!IS_ENABLED(CONFIG_64BIT))
+ reg |= BIT(1);
- /* BD 32-bits big endian mode */
- else if (!(reg & BIT(1)))
- return SEC_32BE;
- /* BD 64-bits big endian mode */
- else
- return SEC_64BE;
+ if (!IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+ reg |= BIT(0);
+
+ writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
}
static void sec_open_sva_prefetch(struct hisi_qm *qm)
@@ -378,15 +374,43 @@ static void sec_close_sva_prefetch(struct hisi_qm *qm)
pci_err(qm->pdev, "failed to close sva prefetch\n");
}
+static void sec_enable_clock_gate(struct hisi_qm *qm)
+{
+ u32 val;
+
+ if (qm->ver < QM_HW_V3)
+ return;
+
+ val = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
+ val |= SEC_CLK_GATE_ENABLE;
+ writel_relaxed(val, qm->io_base + SEC_CONTROL_REG);
+
+ val = readl(qm->io_base + SEC_DYNAMIC_GATE_REG);
+ val |= SEC_DYNAMIC_GATE_EN;
+ writel(val, qm->io_base + SEC_DYNAMIC_GATE_REG);
+
+ val = readl(qm->io_base + SEC_CORE_AUTO_GATE);
+ val |= SEC_CORE_AUTO_GATE_EN;
+ writel(val, qm->io_base + SEC_CORE_AUTO_GATE);
+}
+
+static void sec_disable_clock_gate(struct hisi_qm *qm)
+{
+ u32 val;
+
+ /* Kunpeng920 needs to close clock gating */
+ val = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
+ val &= SEC_CLK_GATE_DISABLE;
+ writel_relaxed(val, qm->io_base + SEC_CONTROL_REG);
+}
+
static int sec_engine_init(struct hisi_qm *qm)
{
int ret;
u32 reg;
- /* disable clock gate control */
- reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
- reg &= SEC_CLK_GATE_DISABLE;
- writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
+ /* disable clock gate control before mem init */
+ sec_disable_clock_gate(qm);
writel_relaxed(0x1, qm->io_base + SEC_MEM_START_INIT_REG);
@@ -429,9 +453,9 @@ static int sec_engine_init(struct hisi_qm *qm)
qm->io_base + SEC_BD_ERR_CHK_EN_REG3);
/* config endian */
- reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
- reg |= sec_get_endian(qm);
- writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
+ sec_set_endian(qm);
+
+ sec_enable_clock_gate(qm);
return 0;
}
@@ -533,17 +557,14 @@ static void sec_hw_error_disable(struct hisi_qm *qm)
writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_NFE_REG);
}
-static u32 sec_clear_enable_read(struct sec_debug_file *file)
+static u32 sec_clear_enable_read(struct hisi_qm *qm)
{
- struct hisi_qm *qm = file->qm;
-
return readl(qm->io_base + SEC_CTRL_CNT_CLR_CE) &
SEC_CTRL_CNT_CLR_CE_BIT;
}
-static int sec_clear_enable_write(struct sec_debug_file *file, u32 val)
+static int sec_clear_enable_write(struct hisi_qm *qm, u32 val)
{
- struct hisi_qm *qm = file->qm;
u32 tmp;
if (val != 1 && val)
@@ -561,24 +582,34 @@ static ssize_t sec_debug_read(struct file *filp, char __user *buf,
{
struct sec_debug_file *file = filp->private_data;
char tbuf[SEC_DBGFS_VAL_MAX_LEN];
+ struct hisi_qm *qm = file->qm;
u32 val;
int ret;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
spin_lock_irq(&file->lock);
switch (file->index) {
case SEC_CLEAR_ENABLE:
- val = sec_clear_enable_read(file);
+ val = sec_clear_enable_read(qm);
break;
default:
- spin_unlock_irq(&file->lock);
- return -EINVAL;
+ goto err_input;
}
spin_unlock_irq(&file->lock);
- ret = snprintf(tbuf, SEC_DBGFS_VAL_MAX_LEN, "%u\n", val);
+ hisi_qm_put_dfx_access(qm);
+ ret = snprintf(tbuf, SEC_DBGFS_VAL_MAX_LEN, "%u\n", val);
return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+
+err_input:
+ spin_unlock_irq(&file->lock);
+ hisi_qm_put_dfx_access(qm);
+ return -EINVAL;
}
static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
@@ -586,6 +617,7 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
{
struct sec_debug_file *file = filp->private_data;
char tbuf[SEC_DBGFS_VAL_MAX_LEN];
+ struct hisi_qm *qm = file->qm;
unsigned long val;
int len, ret;
@@ -604,11 +636,15 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
if (kstrtoul(tbuf, 0, &val))
return -EFAULT;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
spin_lock_irq(&file->lock);
switch (file->index) {
case SEC_CLEAR_ENABLE:
- ret = sec_clear_enable_write(file, val);
+ ret = sec_clear_enable_write(qm, val);
if (ret)
goto err_input;
break;
@@ -617,12 +653,11 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
goto err_input;
}
- spin_unlock_irq(&file->lock);
-
- return count;
+ ret = count;
err_input:
spin_unlock_irq(&file->lock);
+ hisi_qm_put_dfx_access(qm);
return ret;
}
@@ -653,6 +688,15 @@ static int sec_debugfs_atomic64_set(void *data, u64 val)
DEFINE_DEBUGFS_ATTRIBUTE(sec_atomic64_ops, sec_debugfs_atomic64_get,
sec_debugfs_atomic64_set, "%lld\n");
+static int sec_regs_show(struct seq_file *s, void *unused)
+{
+ hisi_qm_regs_dump(s, s->private);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(sec_regs);
+
static int sec_core_debug_init(struct hisi_qm *qm)
{
struct sec_dev *sec = container_of(qm, struct sec_dev, qm);
@@ -671,9 +715,10 @@ static int sec_core_debug_init(struct hisi_qm *qm)
regset->regs = sec_dfx_regs;
regset->nregs = ARRAY_SIZE(sec_dfx_regs);
regset->base = qm->io_base;
+ regset->dev = dev;
if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID)
- debugfs_create_regset32("regs", 0444, tmp_d, regset);
+ debugfs_create_file("regs", 0444, tmp_d, regset, &sec_regs_fops);
for (i = 0; i < ARRAY_SIZE(sec_dfx_labels); i++) {
atomic64_t *data = (atomic64_t *)((uintptr_t)dfx +
@@ -981,10 +1026,13 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_alg_unregister;
}
+ hisi_qm_pm_init(qm);
+
return 0;
err_alg_unregister:
- hisi_qm_alg_unregister(qm, &sec_devices);
+ if (qm->qp_num >= ctx_q_num)
+ hisi_qm_alg_unregister(qm, &sec_devices);
err_qm_stop:
sec_debugfs_exit(qm);
hisi_qm_stop(qm, QM_NORMAL);
@@ -999,6 +1047,7 @@ static void sec_remove(struct pci_dev *pdev)
{
struct hisi_qm *qm = pci_get_drvdata(pdev);
+ hisi_qm_pm_uninit(qm);
hisi_qm_wait_task_finish(qm, &sec_devices);
if (qm->qp_num >= ctx_q_num)
hisi_qm_alg_unregister(qm, &sec_devices);
@@ -1018,6 +1067,10 @@ static void sec_remove(struct pci_dev *pdev)
sec_qm_uninit(qm);
}
+static const struct dev_pm_ops sec_pm_ops = {
+ SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL)
+};
+
static const struct pci_error_handlers sec_err_handler = {
.error_detected = hisi_qm_dev_err_detected,
.slot_reset = hisi_qm_dev_slot_reset,
@@ -1033,6 +1086,7 @@ static struct pci_driver sec_pci_driver = {
.err_handler = &sec_err_handler,
.sriov_configure = hisi_qm_sriov_configure,
.shutdown = hisi_qm_dev_shutdown,
+ .driver.pm = &sec_pm_ops,
};
static void sec_register_debugfs(void)
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index f8482ceebf2a..7148201ce76e 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/pm_runtime.h>
#include <linux/seq_file.h>
#include <linux/topology.h>
#include <linux/uacce.h>
@@ -107,6 +108,14 @@
#define HZIP_DELAY_1_US 1
#define HZIP_POLL_TIMEOUT_US 1000
+/* clock gating */
+#define HZIP_PEH_CFG_AUTO_GATE 0x3011A8
+#define HZIP_PEH_CFG_AUTO_GATE_EN BIT(0)
+#define HZIP_CORE_GATED_EN GENMASK(15, 8)
+#define HZIP_CORE_GATED_OOO_EN BIT(29)
+#define HZIP_CLOCK_GATED_EN (HZIP_CORE_GATED_EN | \
+ HZIP_CORE_GATED_OOO_EN)
+
static const char hisi_zip_name[] = "hisi_zip";
static struct dentry *hzip_debugfs_root;
@@ -312,6 +321,22 @@ static void hisi_zip_close_sva_prefetch(struct hisi_qm *qm)
pci_err(qm->pdev, "failed to close sva prefetch\n");
}
+static void hisi_zip_enable_clock_gate(struct hisi_qm *qm)
+{
+ u32 val;
+
+ if (qm->ver < QM_HW_V3)
+ return;
+
+ val = readl(qm->io_base + HZIP_CLOCK_GATE_CTRL);
+ val |= HZIP_CLOCK_GATED_EN;
+ writel(val, qm->io_base + HZIP_CLOCK_GATE_CTRL);
+
+ val = readl(qm->io_base + HZIP_PEH_CFG_AUTO_GATE);
+ val |= HZIP_PEH_CFG_AUTO_GATE_EN;
+ writel(val, qm->io_base + HZIP_PEH_CFG_AUTO_GATE);
+}
+
static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm)
{
void __iomem *base = qm->io_base;
@@ -359,6 +384,8 @@ static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm)
CQC_CACHE_WB_ENABLE | FIELD_PREP(SQC_CACHE_WB_THRD, 1) |
FIELD_PREP(CQC_CACHE_WB_THRD, 1), base + QM_CACHE_CTL);
+ hisi_zip_enable_clock_gate(qm);
+
return 0;
}
@@ -423,17 +450,14 @@ static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file)
return &hisi_zip->qm;
}
-static u32 clear_enable_read(struct ctrl_debug_file *file)
+static u32 clear_enable_read(struct hisi_qm *qm)
{
- struct hisi_qm *qm = file_to_qm(file);
-
return readl(qm->io_base + HZIP_SOFT_CTRL_CNT_CLR_CE) &
HZIP_SOFT_CTRL_CNT_CLR_CE_BIT;
}
-static int clear_enable_write(struct ctrl_debug_file *file, u32 val)
+static int clear_enable_write(struct hisi_qm *qm, u32 val)
{
- struct hisi_qm *qm = file_to_qm(file);
u32 tmp;
if (val != 1 && val != 0)
@@ -450,22 +474,33 @@ static ssize_t hisi_zip_ctrl_debug_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
struct ctrl_debug_file *file = filp->private_data;
+ struct hisi_qm *qm = file_to_qm(file);
char tbuf[HZIP_BUF_SIZE];
u32 val;
int ret;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
spin_lock_irq(&file->lock);
switch (file->index) {
case HZIP_CLEAR_ENABLE:
- val = clear_enable_read(file);
+ val = clear_enable_read(qm);
break;
default:
- spin_unlock_irq(&file->lock);
- return -EINVAL;
+ goto err_input;
}
spin_unlock_irq(&file->lock);
+
+ hisi_qm_put_dfx_access(qm);
ret = scnprintf(tbuf, sizeof(tbuf), "%u\n", val);
return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+
+err_input:
+ spin_unlock_irq(&file->lock);
+ hisi_qm_put_dfx_access(qm);
+ return -EINVAL;
}
static ssize_t hisi_zip_ctrl_debug_write(struct file *filp,
@@ -473,6 +508,7 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp,
size_t count, loff_t *pos)
{
struct ctrl_debug_file *file = filp->private_data;
+ struct hisi_qm *qm = file_to_qm(file);
char tbuf[HZIP_BUF_SIZE];
unsigned long val;
int len, ret;
@@ -491,10 +527,14 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp,
if (kstrtoul(tbuf, 0, &val))
return -EFAULT;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
spin_lock_irq(&file->lock);
switch (file->index) {
case HZIP_CLEAR_ENABLE:
- ret = clear_enable_write(file, val);
+ ret = clear_enable_write(qm, val);
if (ret)
goto err_input;
break;
@@ -502,12 +542,12 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp,
ret = -EINVAL;
goto err_input;
}
- spin_unlock_irq(&file->lock);
- return count;
+ ret = count;
err_input:
spin_unlock_irq(&file->lock);
+ hisi_qm_put_dfx_access(qm);
return ret;
}
@@ -538,6 +578,15 @@ static int zip_debugfs_atomic64_get(void *data, u64 *val)
DEFINE_DEBUGFS_ATTRIBUTE(zip_atomic64_ops, zip_debugfs_atomic64_get,
zip_debugfs_atomic64_set, "%llu\n");
+static int hisi_zip_regs_show(struct seq_file *s, void *unused)
+{
+ hisi_qm_regs_dump(s, s->private);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(hisi_zip_regs);
+
static int hisi_zip_core_debug_init(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;
@@ -560,9 +609,11 @@ static int hisi_zip_core_debug_init(struct hisi_qm *qm)
regset->regs = hzip_dfx_regs;
regset->nregs = ARRAY_SIZE(hzip_dfx_regs);
regset->base = qm->io_base + core_offsets[i];
+ regset->dev = dev;
tmp_d = debugfs_create_dir(buf, qm->debug.debug_root);
- debugfs_create_regset32("regs", 0444, tmp_d, regset);
+ debugfs_create_file("regs", 0444, tmp_d, regset,
+ &hisi_zip_regs_fops);
}
return 0;
@@ -898,6 +949,8 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_qm_alg_unregister;
}
+ hisi_qm_pm_init(qm);
+
return 0;
err_qm_alg_unregister:
@@ -920,6 +973,7 @@ static void hisi_zip_remove(struct pci_dev *pdev)
{
struct hisi_qm *qm = pci_get_drvdata(pdev);
+ hisi_qm_pm_uninit(qm);
hisi_qm_wait_task_finish(qm, &zip_devices);
hisi_qm_alg_unregister(qm, &zip_devices);
@@ -932,6 +986,10 @@ static void hisi_zip_remove(struct pci_dev *pdev)
hisi_zip_qm_uninit(qm);
}
+static const struct dev_pm_ops hisi_zip_pm_ops = {
+ SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL)
+};
+
static const struct pci_error_handlers hisi_zip_err_handler = {
.error_detected = hisi_qm_dev_err_detected,
.slot_reset = hisi_qm_dev_slot_reset,
@@ -948,6 +1006,7 @@ static struct pci_driver hisi_zip_pci_driver = {
hisi_qm_sriov_configure : NULL,
.err_handler = &hisi_zip_err_handler,
.shutdown = hisi_qm_dev_shutdown,
+ .driver.pm = &hisi_zip_pm_ops,
};
static void hisi_zip_register_debugfs(void)
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index d6a7784d2988..d19e5ffb5104 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -170,15 +170,19 @@ static struct dcp *global_sdcp;
static int mxs_dcp_start_dma(struct dcp_async_ctx *actx)
{
+ int dma_err;
struct dcp *sdcp = global_sdcp;
const int chan = actx->chan;
uint32_t stat;
unsigned long ret;
struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
-
dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc),
DMA_TO_DEVICE);
+ dma_err = dma_mapping_error(sdcp->dev, desc_phys);
+ if (dma_err)
+ return dma_err;
+
reinit_completion(&sdcp->completion[chan]);
/* Clear status register. */
@@ -216,18 +220,29 @@ static int mxs_dcp_start_dma(struct dcp_async_ctx *actx)
static int mxs_dcp_run_aes(struct dcp_async_ctx *actx,
struct skcipher_request *req, int init)
{
+ dma_addr_t key_phys, src_phys, dst_phys;
struct dcp *sdcp = global_sdcp;
struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
struct dcp_aes_req_ctx *rctx = skcipher_request_ctx(req);
int ret;
- dma_addr_t key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key,
- 2 * AES_KEYSIZE_128,
- DMA_TO_DEVICE);
- dma_addr_t src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf,
- DCP_BUF_SZ, DMA_TO_DEVICE);
- dma_addr_t dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf,
- DCP_BUF_SZ, DMA_FROM_DEVICE);
+ key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key,
+ 2 * AES_KEYSIZE_128, DMA_TO_DEVICE);
+ ret = dma_mapping_error(sdcp->dev, key_phys);
+ if (ret)
+ return ret;
+
+ src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf,
+ DCP_BUF_SZ, DMA_TO_DEVICE);
+ ret = dma_mapping_error(sdcp->dev, src_phys);
+ if (ret)
+ goto err_src;
+
+ dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf,
+ DCP_BUF_SZ, DMA_FROM_DEVICE);
+ ret = dma_mapping_error(sdcp->dev, dst_phys);
+ if (ret)
+ goto err_dst;
if (actx->fill % AES_BLOCK_SIZE) {
dev_err(sdcp->dev, "Invalid block size!\n");
@@ -265,10 +280,12 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx,
ret = mxs_dcp_start_dma(actx);
aes_done_run:
+ dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE);
+err_dst:
+ dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE);
+err_src:
dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128,
DMA_TO_DEVICE);
- dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE);
- dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE);
return ret;
}
@@ -283,21 +300,20 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
struct scatterlist *dst = req->dst;
struct scatterlist *src = req->src;
- const int nents = sg_nents(req->src);
+ int dst_nents = sg_nents(dst);
const int out_off = DCP_BUF_SZ;
uint8_t *in_buf = sdcp->coh->aes_in_buf;
uint8_t *out_buf = sdcp->coh->aes_out_buf;
- uint8_t *out_tmp, *src_buf, *dst_buf = NULL;
uint32_t dst_off = 0;
+ uint8_t *src_buf = NULL;
uint32_t last_out_len = 0;
uint8_t *key = sdcp->coh->aes_key;
int ret = 0;
- int split = 0;
- unsigned int i, len, clen, rem = 0, tlen = 0;
+ unsigned int i, len, clen, tlen = 0;
int init = 0;
bool limit_hit = false;
@@ -315,7 +331,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128);
}
- for_each_sg(req->src, src, nents, i) {
+ for_each_sg(req->src, src, sg_nents(src), i) {
src_buf = sg_virt(src);
len = sg_dma_len(src);
tlen += len;
@@ -340,34 +356,17 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
* submit the buffer.
*/
if (actx->fill == out_off || sg_is_last(src) ||
- limit_hit) {
+ limit_hit) {
ret = mxs_dcp_run_aes(actx, req, init);
if (ret)
return ret;
init = 0;
- out_tmp = out_buf;
+ sg_pcopy_from_buffer(dst, dst_nents, out_buf,
+ actx->fill, dst_off);
+ dst_off += actx->fill;
last_out_len = actx->fill;
- while (dst && actx->fill) {
- if (!split) {
- dst_buf = sg_virt(dst);
- dst_off = 0;
- }
- rem = min(sg_dma_len(dst) - dst_off,
- actx->fill);
-
- memcpy(dst_buf + dst_off, out_tmp, rem);
- out_tmp += rem;
- dst_off += rem;
- actx->fill -= rem;
-
- if (dst_off == sg_dma_len(dst)) {
- dst = sg_next(dst);
- split = 0;
- } else {
- split = 1;
- }
- }
+ actx->fill = 0;
}
} while (len);
@@ -557,6 +556,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req)
dma_addr_t buf_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_in_buf,
DCP_BUF_SZ, DMA_TO_DEVICE);
+ ret = dma_mapping_error(sdcp->dev, buf_phys);
+ if (ret)
+ return ret;
+
/* Fill in the DMA descriptor. */
desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE |
MXS_DCP_CONTROL0_INTERRUPT |
@@ -589,6 +592,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req)
if (rctx->fini) {
digest_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_out_buf,
DCP_SHA_PAY_SZ, DMA_FROM_DEVICE);
+ ret = dma_mapping_error(sdcp->dev, digest_phys);
+ if (ret)
+ goto done_run;
+
desc->control0 |= MXS_DCP_CONTROL0_HASH_TERM;
desc->payload = digest_phys;
}
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index 0dd4c6b157de..9b968ac4ee7b 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -1175,9 +1175,9 @@ static int omap_aes_probe(struct platform_device *pdev)
spin_lock_init(&dd->lock);
INIT_LIST_HEAD(&dd->list);
- spin_lock(&list_lock);
+ spin_lock_bh(&list_lock);
list_add_tail(&dd->list, &dev_list);
- spin_unlock(&list_lock);
+ spin_unlock_bh(&list_lock);
/* Initialize crypto engine */
dd->engine = crypto_engine_alloc_init(dev, 1);
@@ -1264,9 +1264,9 @@ static int omap_aes_remove(struct platform_device *pdev)
if (!dd)
return -ENODEV;
- spin_lock(&list_lock);
+ spin_lock_bh(&list_lock);
list_del(&dd->list);
- spin_unlock(&list_lock);
+ spin_unlock_bh(&list_lock);
for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) {
diff --git a/drivers/crypto/omap-crypto.c b/drivers/crypto/omap-crypto.c
index 31bdb1d76d11..a4cc6bf146ec 100644
--- a/drivers/crypto/omap-crypto.c
+++ b/drivers/crypto/omap-crypto.c
@@ -210,7 +210,7 @@ void omap_crypto_cleanup(struct scatterlist *sg, struct scatterlist *orig,
buf = sg_virt(sg);
pages = get_order(len);
- if (orig && (flags & OMAP_CRYPTO_COPY_MASK))
+ if (orig && (flags & OMAP_CRYPTO_DATA_COPIED))
omap_crypto_copy_data(sg, orig, offset, len);
if (flags & OMAP_CRYPTO_DATA_COPIED)
diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
index bc8631363d72..be77656864e3 100644
--- a/drivers/crypto/omap-des.c
+++ b/drivers/crypto/omap-des.c
@@ -1033,9 +1033,9 @@ static int omap_des_probe(struct platform_device *pdev)
INIT_LIST_HEAD(&dd->list);
- spin_lock(&list_lock);
+ spin_lock_bh(&list_lock);
list_add_tail(&dd->list, &dev_list);
- spin_unlock(&list_lock);
+ spin_unlock_bh(&list_lock);
/* Initialize des crypto engine */
dd->engine = crypto_engine_alloc_init(dev, 1);
@@ -1094,9 +1094,9 @@ static int omap_des_remove(struct platform_device *pdev)
if (!dd)
return -ENODEV;
- spin_lock(&list_lock);
+ spin_lock_bh(&list_lock);
list_del(&dd->list);
- spin_unlock(&list_lock);
+ spin_unlock_bh(&list_lock);
for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index dd53ad9987b0..f6bf53c00b61 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -105,7 +105,6 @@
#define FLAGS_FINAL 1
#define FLAGS_DMA_ACTIVE 2
#define FLAGS_OUTPUT_READY 3
-#define FLAGS_INIT 4
#define FLAGS_CPU 5
#define FLAGS_DMA_READY 6
#define FLAGS_AUTO_XOR 7
@@ -368,24 +367,6 @@ static void omap_sham_copy_ready_hash(struct ahash_request *req)
hash[i] = le32_to_cpup((__le32 *)in + i);
}
-static int omap_sham_hw_init(struct omap_sham_dev *dd)
-{
- int err;
-
- err = pm_runtime_resume_and_get(dd->dev);
- if (err < 0) {
- dev_err(dd->dev, "failed to get sync: %d\n", err);
- return err;
- }
-
- if (!test_bit(FLAGS_INIT, &dd->flags)) {
- set_bit(FLAGS_INIT, &dd->flags);
- dd->err = 0;
- }
-
- return 0;
-}
-
static void omap_sham_write_ctrl_omap2(struct omap_sham_dev *dd, size_t length,
int final, int dma)
{
@@ -1093,11 +1074,14 @@ static int omap_sham_hash_one_req(struct crypto_engine *engine, void *areq)
dev_dbg(dd->dev, "hash-one: op: %u, total: %u, digcnt: %zd, final: %d",
ctx->op, ctx->total, ctx->digcnt, final);
- dd->req = req;
-
- err = omap_sham_hw_init(dd);
- if (err)
+ err = pm_runtime_resume_and_get(dd->dev);
+ if (err < 0) {
+ dev_err(dd->dev, "failed to get sync: %d\n", err);
return err;
+ }
+
+ dd->err = 0;
+ dd->req = req;
if (ctx->digcnt)
dd->pdata->copy_hash(req, 0);
@@ -1736,7 +1720,7 @@ static void omap_sham_done_task(unsigned long data)
if (test_and_clear_bit(FLAGS_OUTPUT_READY, &dd->flags))
goto finish;
} else if (test_bit(FLAGS_DMA_READY, &dd->flags)) {
- if (test_and_clear_bit(FLAGS_DMA_ACTIVE, &dd->flags)) {
+ if (test_bit(FLAGS_DMA_ACTIVE, &dd->flags)) {
omap_sham_update_dma_stop(dd);
if (dd->err) {
err = dd->err;
@@ -2129,7 +2113,6 @@ static int omap_sham_probe(struct platform_device *pdev)
dd->fallback_sz = OMAP_SHA_DMA_THRESHOLD;
pm_runtime_enable(dev);
- pm_runtime_irq_safe(dev);
err = pm_runtime_get_sync(dev);
if (err < 0) {
@@ -2144,9 +2127,9 @@ static int omap_sham_probe(struct platform_device *pdev)
(rev & dd->pdata->major_mask) >> dd->pdata->major_shift,
(rev & dd->pdata->minor_mask) >> dd->pdata->minor_shift);
- spin_lock(&sham.lock);
+ spin_lock_bh(&sham.lock);
list_add_tail(&dd->list, &sham.dev_list);
- spin_unlock(&sham.lock);
+ spin_unlock_bh(&sham.lock);
dd->engine = crypto_engine_alloc_init(dev, 1);
if (!dd->engine) {
@@ -2194,10 +2177,11 @@ err_algs:
err_engine_start:
crypto_engine_exit(dd->engine);
err_engine:
- spin_lock(&sham.lock);
+ spin_lock_bh(&sham.lock);
list_del(&dd->list);
- spin_unlock(&sham.lock);
+ spin_unlock_bh(&sham.lock);
err_pm:
+ pm_runtime_dont_use_autosuspend(dev);
pm_runtime_disable(dev);
if (!dd->polling_mode)
dma_release_channel(dd->dma_lch);
@@ -2215,9 +2199,9 @@ static int omap_sham_remove(struct platform_device *pdev)
dd = platform_get_drvdata(pdev);
if (!dd)
return -ENODEV;
- spin_lock(&sham.lock);
+ spin_lock_bh(&sham.lock);
list_del(&dd->list);
- spin_unlock(&sham.lock);
+ spin_unlock_bh(&sham.lock);
for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) {
crypto_unregister_ahash(
@@ -2225,6 +2209,7 @@ static int omap_sham_remove(struct platform_device *pdev)
dd->pdata->algs_info[i].registered--;
}
tasklet_kill(&dd->done_task);
+ pm_runtime_dont_use_autosuspend(&pdev->dev);
pm_runtime_disable(&pdev->dev);
if (!dd->polling_mode)
@@ -2235,32 +2220,11 @@ static int omap_sham_remove(struct platform_device *pdev)
return 0;
}
-#ifdef CONFIG_PM_SLEEP
-static int omap_sham_suspend(struct device *dev)
-{
- pm_runtime_put_sync(dev);
- return 0;
-}
-
-static int omap_sham_resume(struct device *dev)
-{
- int err = pm_runtime_resume_and_get(dev);
- if (err < 0) {
- dev_err(dev, "failed to get sync: %d\n", err);
- return err;
- }
- return 0;
-}
-#endif
-
-static SIMPLE_DEV_PM_OPS(omap_sham_pm_ops, omap_sham_suspend, omap_sham_resume);
-
static struct platform_driver omap_sham_driver = {
.probe = omap_sham_probe,
.remove = omap_sham_remove,
.driver = {
.name = "omap-sham",
- .pm = &omap_sham_pm_ops,
.of_match_table = omap_sham_of_match,
},
};
diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
index 3524ddd48930..33d8e50dcbda 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -161,7 +161,7 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
ADF_CSR_WR(addr, ADF_4XXX_SMIAPF_MASK_OFFSET, 0);
}
-static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev)
+static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
{
return 0;
}
@@ -210,21 +210,21 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data)
hw_data->fw_mmp_name = ADF_4XXX_MMP;
hw_data->init_admin_comms = adf_init_admin_comms;
hw_data->exit_admin_comms = adf_exit_admin_comms;
- hw_data->disable_iov = adf_disable_sriov;
hw_data->send_admin_init = adf_send_admin_init;
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
hw_data->enable_ints = adf_enable_ints;
- hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
hw_data->reset_device = adf_reset_flr;
- hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
hw_data->admin_ae_mask = ADF_4XXX_ADMIN_AE_MASK;
hw_data->uof_get_num_objs = uof_get_num_objs;
hw_data->uof_get_name = uof_get_name;
hw_data->uof_get_ae_mask = uof_get_ae_mask;
hw_data->set_msix_rttable = set_msix_default_rttable;
hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer;
+ hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms;
+ hw_data->disable_iov = adf_disable_sriov;
+ hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
adf_gen4_init_hw_csr_ops(&hw_data->csr_ops);
}
diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c
index a8805c815d16..359fb7989dfb 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c
@@ -221,16 +221,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* Set DMA identifier */
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
- dev_err(&pdev->dev, "No usable DMA configuration.\n");
- ret = -EFAULT;
- goto out_err;
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- }
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (ret) {
+ dev_err(&pdev->dev, "No usable DMA configuration.\n");
+ goto out_err;
}
/* Get accelerator capabilities mask */
diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
index 1dd64af22bea..3027c01bc89e 100644
--- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
+++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
@@ -111,11 +111,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_C3XXX_PF2VF_OFFSET(i);
}
-static u32 get_vintmsk_offset(u32 i)
-{
- return ADF_C3XXX_VINTMSK_OFFSET(i);
-}
-
static void adf_enable_error_correction(struct adf_accel_dev *accel_dev)
{
struct adf_hw_device_data *hw_device = accel_dev->hw_device;
@@ -159,8 +154,10 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
ADF_C3XXX_SMIA1_MASK);
}
-static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev)
+static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
{
+ spin_lock_init(&accel_dev->pf.vf2pf_ints_lock);
+
return 0;
}
@@ -193,8 +190,6 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
hw_data->get_sram_bar_id = get_sram_bar_id;
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
- hw_data->get_pf2vf_offset = get_pf2vf_offset;
- hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_admin_info = adf_gen2_get_admin_info;
hw_data->get_arb_info = adf_gen2_get_arb_info;
hw_data->get_sku = get_sku;
@@ -203,16 +198,18 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
hw_data->init_admin_comms = adf_init_admin_comms;
hw_data->exit_admin_comms = adf_exit_admin_comms;
hw_data->configure_iov_threads = configure_iov_threads;
- hw_data->disable_iov = adf_disable_sriov;
hw_data->send_admin_init = adf_send_admin_init;
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
hw_data->enable_ints = adf_enable_ints;
- hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
hw_data->reset_device = adf_reset_flr;
- hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer;
+ hw_data->get_pf2vf_offset = get_pf2vf_offset;
+ hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms;
+ hw_data->disable_iov = adf_disable_sriov;
+ hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
+
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
}
diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h
index fece8e38025a..86ee02a86789 100644
--- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h
+++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h
@@ -29,7 +29,6 @@
#define ADF_C3XXX_ERRSSMSH_EN BIT(3)
#define ADF_C3XXX_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04))
-#define ADF_C3XXX_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04))
/* AE to function mapping */
#define ADF_C3XXX_AE2FUNC_MAP_GRP_A_NUM_REGS 48
diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c
index 7fb3343ae8b0..cc6e75dc60de 100644
--- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c
@@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
- dev_err(&pdev->dev, "No usable DMA configuration\n");
- ret = -EFAULT;
- goto out_err_disable;
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- }
-
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
+ if (ret) {
+ dev_err(&pdev->dev, "No usable DMA configuration\n");
+ goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_C3XXX_DEVICE_NAME)) {
@@ -208,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pci_save_state(pdev)) {
dev_err(&pdev->dev, "Failed to save pci state\n");
ret = -ENOMEM;
- goto out_err_free_reg;
+ goto out_err_disable_aer;
}
ret = qat_crypto_dev_config(accel_dev);
if (ret)
- goto out_err_free_reg;
+ goto out_err_disable_aer;
ret = adf_dev_init(accel_dev);
if (ret)
@@ -229,6 +222,8 @@ out_err_dev_stop:
adf_dev_stop(accel_dev);
out_err_dev_shutdown:
adf_dev_shutdown(accel_dev);
+out_err_disable_aer:
+ adf_disable_aer(accel_dev);
out_err_free_reg:
pci_release_regions(accel_pci_dev->pci_dev);
out_err_disable:
diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
index 15f6b9bdfb22..3e69b520e82f 100644
--- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
+++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
@@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_C3XXXIOV_PF2VF_OFFSET;
}
-static u32 get_vintmsk_offset(u32 i)
-{
- return ADF_C3XXXIOV_VINTMSK_OFFSET;
-}
-
static int adf_vf_int_noop(struct adf_accel_dev *accel_dev)
{
return 0;
@@ -81,10 +76,10 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data)
hw_data->enable_error_correction = adf_vf_void_noop;
hw_data->init_admin_comms = adf_vf_int_noop;
hw_data->exit_admin_comms = adf_vf_void_noop;
- hw_data->send_admin_init = adf_vf2pf_init;
+ hw_data->send_admin_init = adf_vf2pf_notify_init;
hw_data->init_arb = adf_vf_int_noop;
hw_data->exit_arb = adf_vf_void_noop;
- hw_data->disable_iov = adf_vf2pf_shutdown;
+ hw_data->disable_iov = adf_vf2pf_notify_shutdown;
hw_data->get_accel_mask = get_accel_mask;
hw_data->get_ae_mask = get_ae_mask;
hw_data->get_num_accels = get_num_accels;
@@ -92,11 +87,10 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data)
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
hw_data->get_pf2vf_offset = get_pf2vf_offset;
- hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_sku = get_sku;
hw_data->enable_ints = adf_vf_void_noop;
- hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms;
- hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
+ hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms;
+ hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
hw_data->dev_class->instances++;
adf_devmgr_update_class_index(hw_data);
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h
index 7945a9cd1c60..f5de4ce66014 100644
--- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h
+++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h
@@ -13,7 +13,6 @@
#define ADF_C3XXXIOV_ETR_BAR 0
#define ADF_C3XXXIOV_ETR_MAX_BANKS 1
#define ADF_C3XXXIOV_PF2VF_OFFSET 0x200
-#define ADF_C3XXXIOV_VINTMSK_OFFSET 0x208
void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data);
void adf_clean_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data);
diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
index 067ca5e17d38..1df1b868978d 100644
--- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
@@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
- dev_err(&pdev->dev, "No usable DMA configuration\n");
- ret = -EFAULT;
- goto out_err_disable;
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- }
-
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
+ if (ret) {
+ dev_err(&pdev->dev, "No usable DMA configuration\n");
+ goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_C3XXXVF_DEVICE_NAME)) {
@@ -218,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev)
pr_err("QAT: Driver removal failed\n");
return;
}
+ adf_flush_vf_wq(accel_dev);
adf_dev_stop(accel_dev);
adf_dev_shutdown(accel_dev);
adf_cleanup_accel(accel_dev);
diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
index 30337390513c..b023c80873bb 100644
--- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
+++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
@@ -113,11 +113,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_C62X_PF2VF_OFFSET(i);
}
-static u32 get_vintmsk_offset(u32 i)
-{
- return ADF_C62X_VINTMSK_OFFSET(i);
-}
-
static void adf_enable_error_correction(struct adf_accel_dev *accel_dev)
{
struct adf_hw_device_data *hw_device = accel_dev->hw_device;
@@ -161,8 +156,10 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
ADF_C62X_SMIA1_MASK);
}
-static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev)
+static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
{
+ spin_lock_init(&accel_dev->pf.vf2pf_ints_lock);
+
return 0;
}
@@ -195,8 +192,6 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data)
hw_data->get_sram_bar_id = get_sram_bar_id;
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
- hw_data->get_pf2vf_offset = get_pf2vf_offset;
- hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_admin_info = adf_gen2_get_admin_info;
hw_data->get_arb_info = adf_gen2_get_arb_info;
hw_data->get_sku = get_sku;
@@ -205,16 +200,18 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data)
hw_data->init_admin_comms = adf_init_admin_comms;
hw_data->exit_admin_comms = adf_exit_admin_comms;
hw_data->configure_iov_threads = configure_iov_threads;
- hw_data->disable_iov = adf_disable_sriov;
hw_data->send_admin_init = adf_send_admin_init;
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
hw_data->enable_ints = adf_enable_ints;
- hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
hw_data->reset_device = adf_reset_flr;
- hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer;
+ hw_data->get_pf2vf_offset = get_pf2vf_offset;
+ hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms;
+ hw_data->disable_iov = adf_disable_sriov;
+ hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
+
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
}
diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h
index 53d3cb577f5b..e6664bd20c91 100644
--- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h
+++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h
@@ -30,7 +30,6 @@
#define ADF_C62X_ERRSSMSH_EN BIT(3)
#define ADF_C62X_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04))
-#define ADF_C62X_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04))
/* AE to function mapping */
#define ADF_C62X_AE2FUNC_MAP_GRP_A_NUM_REGS 80
diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c
index 1f5de442e1e6..bf251dfe74b3 100644
--- a/drivers/crypto/qat/qat_c62x/adf_drv.c
+++ b/drivers/crypto/qat/qat_c62x/adf_drv.c
@@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
- dev_err(&pdev->dev, "No usable DMA configuration\n");
- ret = -EFAULT;
- goto out_err_disable;
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- }
-
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
+ if (ret) {
+ dev_err(&pdev->dev, "No usable DMA configuration\n");
+ goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_C62X_DEVICE_NAME)) {
@@ -208,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pci_save_state(pdev)) {
dev_err(&pdev->dev, "Failed to save pci state\n");
ret = -ENOMEM;
- goto out_err_free_reg;
+ goto out_err_disable_aer;
}
ret = qat_crypto_dev_config(accel_dev);
if (ret)
- goto out_err_free_reg;
+ goto out_err_disable_aer;
ret = adf_dev_init(accel_dev);
if (ret)
@@ -229,6 +222,8 @@ out_err_dev_stop:
adf_dev_stop(accel_dev);
out_err_dev_shutdown:
adf_dev_shutdown(accel_dev);
+out_err_disable_aer:
+ adf_disable_aer(accel_dev);
out_err_free_reg:
pci_release_regions(accel_pci_dev->pci_dev);
out_err_disable:
diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c
index d231583428c9..3bee3e467363 100644
--- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c
+++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c
@@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_C62XIOV_PF2VF_OFFSET;
}
-static u32 get_vintmsk_offset(u32 i)
-{
- return ADF_C62XIOV_VINTMSK_OFFSET;
-}
-
static int adf_vf_int_noop(struct adf_accel_dev *accel_dev)
{
return 0;
@@ -81,10 +76,10 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data)
hw_data->enable_error_correction = adf_vf_void_noop;
hw_data->init_admin_comms = adf_vf_int_noop;
hw_data->exit_admin_comms = adf_vf_void_noop;
- hw_data->send_admin_init = adf_vf2pf_init;
+ hw_data->send_admin_init = adf_vf2pf_notify_init;
hw_data->init_arb = adf_vf_int_noop;
hw_data->exit_arb = adf_vf_void_noop;
- hw_data->disable_iov = adf_vf2pf_shutdown;
+ hw_data->disable_iov = adf_vf2pf_notify_shutdown;
hw_data->get_accel_mask = get_accel_mask;
hw_data->get_ae_mask = get_ae_mask;
hw_data->get_num_accels = get_num_accels;
@@ -92,11 +87,10 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data)
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
hw_data->get_pf2vf_offset = get_pf2vf_offset;
- hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_sku = get_sku;
hw_data->enable_ints = adf_vf_void_noop;
- hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms;
- hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
+ hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms;
+ hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
hw_data->dev_class->instances++;
adf_devmgr_update_class_index(hw_data);
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h
index a6c04cf7a43c..794778c48678 100644
--- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h
+++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h
@@ -13,7 +13,6 @@
#define ADF_C62XIOV_ETR_BAR 0
#define ADF_C62XIOV_ETR_MAX_BANKS 1
#define ADF_C62XIOV_PF2VF_OFFSET 0x200
-#define ADF_C62XIOV_VINTMSK_OFFSET 0x208
void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data);
void adf_clean_hw_data_c62xiov(struct adf_hw_device_data *hw_data);
diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c
index 51ea88c0b17d..8103bd81d617 100644
--- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c
@@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
- dev_err(&pdev->dev, "No usable DMA configuration\n");
- ret = -EFAULT;
- goto out_err_disable;
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- }
-
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
+ if (ret) {
+ dev_err(&pdev->dev, "No usable DMA configuration\n");
+ goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_C62XVF_DEVICE_NAME)) {
@@ -218,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev)
pr_err("QAT: Driver removal failed\n");
return;
}
+ adf_flush_vf_wq(accel_dev);
adf_dev_stop(accel_dev);
adf_dev_shutdown(accel_dev);
adf_cleanup_accel(accel_dev);
diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h
index ac435b44f1d2..38c0af6d4e43 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h
@@ -18,8 +18,6 @@
#define ADF_4XXX_DEVICE_NAME "4xxx"
#define ADF_4XXX_PCI_DEVICE_ID 0x4940
#define ADF_4XXXIOV_PCI_DEVICE_ID 0x4941
-#define ADF_ERRSOU3 (0x3A000 + 0x0C)
-#define ADF_ERRSOU5 (0x3A000 + 0xD8)
#define ADF_DEVICE_FUSECTL_OFFSET 0x40
#define ADF_DEVICE_LEGFUSE_OFFSET 0x4C
#define ADF_DEVICE_FUSECTL_MASK 0x80000000
@@ -156,7 +154,6 @@ struct adf_hw_device_data {
u32 (*get_num_aes)(struct adf_hw_device_data *self);
u32 (*get_num_accels)(struct adf_hw_device_data *self);
u32 (*get_pf2vf_offset)(u32 i);
- u32 (*get_vintmsk_offset)(u32 i);
void (*get_arb_info)(struct arb_info *arb_csrs_info);
void (*get_admin_info)(struct admin_info *admin_csrs_info);
enum dev_sku_info (*get_sku)(struct adf_hw_device_data *self);
@@ -174,7 +171,7 @@ struct adf_hw_device_data {
bool enable);
void (*enable_ints)(struct adf_accel_dev *accel_dev);
void (*set_ssm_wdtimer)(struct adf_accel_dev *accel_dev);
- int (*enable_vf2pf_comms)(struct adf_accel_dev *accel_dev);
+ int (*enable_pfvf_comms)(struct adf_accel_dev *accel_dev);
void (*reset_device)(struct adf_accel_dev *accel_dev);
void (*set_msix_rttable)(struct adf_accel_dev *accel_dev);
char *(*uof_get_name)(u32 obj_num);
@@ -227,7 +224,6 @@ struct adf_fw_loader_data {
struct adf_accel_vf_info {
struct adf_accel_dev *accel_dev;
- struct tasklet_struct vf2pf_bh_tasklet;
struct mutex pf2vf_lock; /* protect CSR access for PF2VF messages */
struct ratelimit_state vf2pf_ratelimit;
u32 vf_nr;
@@ -249,6 +245,8 @@ struct adf_accel_dev {
struct adf_accel_pci accel_pci_dev;
union {
struct {
+ /* protects VF2PF interrupts access */
+ spinlock_t vf2pf_ints_lock;
/* vf_info is non-zero when SR-IOV is init'ed */
struct adf_accel_vf_info *vf_info;
} pf;
diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c
index d2ae293d0df6..ed3e40bc56eb 100644
--- a/drivers/crypto/qat/qat_common/adf_aer.c
+++ b/drivers/crypto/qat/qat_common/adf_aer.c
@@ -194,7 +194,7 @@ int adf_enable_aer(struct adf_accel_dev *accel_dev)
EXPORT_SYMBOL_GPL(adf_enable_aer);
/**
- * adf_disable_aer() - Enable Advance Error Reporting for acceleration device
+ * adf_disable_aer() - Disable Advance Error Reporting for acceleration device
* @accel_dev: Pointer to acceleration device.
*
* Function disables PCI Advance Error Reporting for the
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index c61476553728..4261749fae8d 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -193,22 +193,23 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs);
void adf_disable_sriov(struct adf_accel_dev *accel_dev);
void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
u32 vf_mask);
+void adf_disable_vf2pf_interrupts_irq(struct adf_accel_dev *accel_dev,
+ u32 vf_mask);
void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
u32 vf_mask);
void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev);
void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev);
+void adf_schedule_vf2pf_handler(struct adf_accel_vf_info *vf_info);
-int adf_vf2pf_init(struct adf_accel_dev *accel_dev);
-void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev);
+int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev);
+void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev);
int adf_init_pf_wq(void);
void adf_exit_pf_wq(void);
int adf_init_vf_wq(void);
void adf_exit_vf_wq(void);
+void adf_flush_vf_wq(struct adf_accel_dev *accel_dev);
#else
-static inline int adf_sriov_configure(struct pci_dev *pdev, int numvfs)
-{
- return 0;
-}
+#define adf_sriov_configure NULL
static inline void adf_disable_sriov(struct adf_accel_dev *accel_dev)
{
@@ -222,12 +223,12 @@ static inline void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
{
}
-static inline int adf_vf2pf_init(struct adf_accel_dev *accel_dev)
+static inline int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev)
{
return 0;
}
-static inline void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev)
+static inline void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev)
{
}
@@ -249,5 +250,9 @@ static inline void adf_exit_vf_wq(void)
{
}
+static inline void adf_flush_vf_wq(struct adf_accel_dev *accel_dev)
+{
+}
+
#endif
#endif
diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c
index 744c40351428..60bc7b991d35 100644
--- a/drivers/crypto/qat/qat_common/adf_init.c
+++ b/drivers/crypto/qat/qat_common/adf_init.c
@@ -61,6 +61,7 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
struct service_hndl *service;
struct list_head *list_itr;
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ int ret;
if (!hw_data) {
dev_err(&GET_DEV(accel_dev),
@@ -88,8 +89,6 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
return -EFAULT;
}
- hw_data->enable_ints(accel_dev);
-
if (adf_ae_init(accel_dev)) {
dev_err(&GET_DEV(accel_dev),
"Failed to initialise Acceleration Engine\n");
@@ -110,6 +109,13 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
}
set_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status);
+ hw_data->enable_ints(accel_dev);
+ hw_data->enable_error_correction(accel_dev);
+
+ ret = hw_data->enable_pfvf_comms(accel_dev);
+ if (ret)
+ return ret;
+
/*
* Subservice initialisation is divided into two stages: init and start.
* This is to facilitate any ordering dependencies between services
@@ -126,9 +132,6 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
set_bit(accel_dev->accel_id, service->init_status);
}
- hw_data->enable_error_correction(accel_dev);
- hw_data->enable_vf2pf_comms(accel_dev);
-
return 0;
}
EXPORT_SYMBOL_GPL(adf_dev_init);
diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c
index e3ad5587be49..c678d5c531aa 100644
--- a/drivers/crypto/qat/qat_common/adf_isr.c
+++ b/drivers/crypto/qat/qat_common/adf_isr.c
@@ -15,6 +15,14 @@
#include "adf_transport_access_macros.h"
#include "adf_transport_internal.h"
+#define ADF_MAX_NUM_VFS 32
+#define ADF_ERRSOU3 (0x3A000 + 0x0C)
+#define ADF_ERRSOU5 (0x3A000 + 0xD8)
+#define ADF_ERRMSK3 (0x3A000 + 0x1C)
+#define ADF_ERRMSK5 (0x3A000 + 0xDC)
+#define ADF_ERR_REG_VF2PF_L(vf_src) (((vf_src) & 0x01FFFE00) >> 9)
+#define ADF_ERR_REG_VF2PF_U(vf_src) (((vf_src) & 0x0000FFFF) << 16)
+
static int adf_enable_msix(struct adf_accel_dev *accel_dev)
{
struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev;
@@ -71,14 +79,23 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
struct adf_bar *pmisc =
&GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)];
- void __iomem *pmisc_bar_addr = pmisc->virt_addr;
- u32 vf_mask;
+ void __iomem *pmisc_addr = pmisc->virt_addr;
+ u32 errsou3, errsou5, errmsk3, errmsk5;
+ unsigned long vf_mask;
/* Get the interrupt sources triggered by VFs */
- vf_mask = ((ADF_CSR_RD(pmisc_bar_addr, ADF_ERRSOU5) &
- 0x0000FFFF) << 16) |
- ((ADF_CSR_RD(pmisc_bar_addr, ADF_ERRSOU3) &
- 0x01FFFE00) >> 9);
+ errsou3 = ADF_CSR_RD(pmisc_addr, ADF_ERRSOU3);
+ errsou5 = ADF_CSR_RD(pmisc_addr, ADF_ERRSOU5);
+ vf_mask = ADF_ERR_REG_VF2PF_L(errsou3);
+ vf_mask |= ADF_ERR_REG_VF2PF_U(errsou5);
+
+ /* To avoid adding duplicate entries to work queue, clear
+ * vf_int_mask_sets bits that are already masked in ERRMSK register.
+ */
+ errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_ERRMSK3);
+ errmsk5 = ADF_CSR_RD(pmisc_addr, ADF_ERRMSK5);
+ vf_mask &= ~ADF_ERR_REG_VF2PF_L(errmsk3);
+ vf_mask &= ~ADF_ERR_REG_VF2PF_U(errmsk5);
if (vf_mask) {
struct adf_accel_vf_info *vf_info;
@@ -86,15 +103,13 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
int i;
/* Disable VF2PF interrupts for VFs with pending ints */
- adf_disable_vf2pf_interrupts(accel_dev, vf_mask);
+ adf_disable_vf2pf_interrupts_irq(accel_dev, vf_mask);
/*
- * Schedule tasklets to handle VF2PF interrupt BHs
- * unless the VF is malicious and is attempting to
- * flood the host OS with VF2PF interrupts.
+ * Handle VF2PF interrupt unless the VF is malicious and
+ * is attempting to flood the host OS with VF2PF interrupts.
*/
- for_each_set_bit(i, (const unsigned long *)&vf_mask,
- (sizeof(vf_mask) * BITS_PER_BYTE)) {
+ for_each_set_bit(i, &vf_mask, ADF_MAX_NUM_VFS) {
vf_info = accel_dev->pf.vf_info + i;
if (!__ratelimit(&vf_info->vf2pf_ratelimit)) {
@@ -104,8 +119,7 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
continue;
}
- /* Tasklet will re-enable ints from this VF */
- tasklet_hi_schedule(&vf_info->vf2pf_bh_tasklet);
+ adf_schedule_vf2pf_handler(vf_info);
irq_handled = true;
}
diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c
index a1b77bd7a894..976b9ab7617c 100644
--- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c
+++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c
@@ -11,28 +11,8 @@
#define ADF_DH895XCC_ERRMSK5 (ADF_DH895XCC_EP_OFFSET + 0xDC)
#define ADF_DH895XCC_ERRMSK5_VF2PF_U_MASK(vf_mask) (vf_mask >> 16)
-void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
-{
- struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
- struct adf_hw_device_data *hw_data = accel_dev->hw_device;
- void __iomem *pmisc_bar_addr =
- pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr;
-
- ADF_CSR_WR(pmisc_bar_addr, hw_data->get_vintmsk_offset(0), 0x0);
-}
-
-void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
-{
- struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
- struct adf_hw_device_data *hw_data = accel_dev->hw_device;
- void __iomem *pmisc_bar_addr =
- pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr;
-
- ADF_CSR_WR(pmisc_bar_addr, hw_data->get_vintmsk_offset(0), 0x2);
-}
-
-void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
- u32 vf_mask)
+static void __adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
+ u32 vf_mask)
{
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
struct adf_bar *pmisc =
@@ -55,7 +35,17 @@ void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
}
}
-void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
+void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags);
+ __adf_enable_vf2pf_interrupts(accel_dev, vf_mask);
+ spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags);
+}
+
+static void __adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
+ u32 vf_mask)
{
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
struct adf_bar *pmisc =
@@ -78,6 +68,22 @@ void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
}
}
+void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags);
+ __adf_disable_vf2pf_interrupts(accel_dev, vf_mask);
+ spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags);
+}
+
+void adf_disable_vf2pf_interrupts_irq(struct adf_accel_dev *accel_dev, u32 vf_mask)
+{
+ spin_lock(&accel_dev->pf.vf2pf_ints_lock);
+ __adf_disable_vf2pf_interrupts(accel_dev, vf_mask);
+ spin_unlock(&accel_dev->pf.vf2pf_ints_lock);
+}
+
static int __adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr)
{
struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
@@ -186,7 +192,6 @@ int adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr)
return ret;
}
-EXPORT_SYMBOL_GPL(adf_iov_putmsg);
void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
{
@@ -216,7 +221,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
resp = (ADF_PF2VF_MSGORIGIN_SYSTEM |
(ADF_PF2VF_MSGTYPE_VERSION_RESP <<
ADF_PF2VF_MSGTYPE_SHIFT) |
- (ADF_PFVF_COMPATIBILITY_VERSION <<
+ (ADF_PFVF_COMPAT_THIS_VERSION <<
ADF_PF2VF_VERSION_RESP_VERS_SHIFT));
dev_dbg(&GET_DEV(accel_dev),
@@ -226,19 +231,19 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
if (vf_compat_ver < hw_data->min_iov_compat_ver) {
dev_err(&GET_DEV(accel_dev),
"VF (vers %d) incompatible with PF (vers %d)\n",
- vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION);
+ vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION);
resp |= ADF_PF2VF_VF_INCOMPATIBLE <<
ADF_PF2VF_VERSION_RESP_RESULT_SHIFT;
- } else if (vf_compat_ver > ADF_PFVF_COMPATIBILITY_VERSION) {
+ } else if (vf_compat_ver > ADF_PFVF_COMPAT_THIS_VERSION) {
dev_err(&GET_DEV(accel_dev),
"VF (vers %d) compat with PF (vers %d) unkn.\n",
- vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION);
+ vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION);
resp |= ADF_PF2VF_VF_COMPAT_UNKNOWN <<
ADF_PF2VF_VERSION_RESP_RESULT_SHIFT;
} else {
dev_dbg(&GET_DEV(accel_dev),
"VF (vers %d) compatible with PF (vers %d)\n",
- vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION);
+ vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION);
resp |= ADF_PF2VF_VF_COMPATIBLE <<
ADF_PF2VF_VERSION_RESP_RESULT_SHIFT;
}
@@ -251,7 +256,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
resp = (ADF_PF2VF_MSGORIGIN_SYSTEM |
(ADF_PF2VF_MSGTYPE_VERSION_RESP <<
ADF_PF2VF_MSGTYPE_SHIFT) |
- (ADF_PFVF_COMPATIBILITY_VERSION <<
+ (ADF_PFVF_COMPAT_THIS_VERSION <<
ADF_PF2VF_VERSION_RESP_VERS_SHIFT));
resp |= ADF_PF2VF_VF_COMPATIBLE <<
ADF_PF2VF_VERSION_RESP_RESULT_SHIFT;
@@ -284,6 +289,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
/* re-enable interrupt on PF from this VF */
adf_enable_vf2pf_interrupts(accel_dev, (1 << vf_nr));
+
return;
err:
dev_dbg(&GET_DEV(accel_dev), "Unknown message from VF%d (0x%x);\n",
@@ -313,8 +319,10 @@ static int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev)
msg = ADF_VF2PF_MSGORIGIN_SYSTEM;
msg |= ADF_VF2PF_MSGTYPE_COMPAT_VER_REQ << ADF_VF2PF_MSGTYPE_SHIFT;
- msg |= ADF_PFVF_COMPATIBILITY_VERSION << ADF_VF2PF_COMPAT_VER_REQ_SHIFT;
- BUILD_BUG_ON(ADF_PFVF_COMPATIBILITY_VERSION > 255);
+ msg |= ADF_PFVF_COMPAT_THIS_VERSION << ADF_VF2PF_COMPAT_VER_REQ_SHIFT;
+ BUILD_BUG_ON(ADF_PFVF_COMPAT_THIS_VERSION > 255);
+
+ reinit_completion(&accel_dev->vf.iov_msg_completion);
/* Send request from VF to PF */
ret = adf_iov_putmsg(accel_dev, msg, 0);
@@ -338,14 +346,16 @@ static int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev)
break;
case ADF_PF2VF_VF_COMPAT_UNKNOWN:
/* VF is newer than PF and decides whether it is compatible */
- if (accel_dev->vf.pf_version >= hw_data->min_iov_compat_ver)
+ if (accel_dev->vf.pf_version >= hw_data->min_iov_compat_ver) {
+ accel_dev->vf.compatible = ADF_PF2VF_VF_COMPATIBLE;
break;
+ }
fallthrough;
case ADF_PF2VF_VF_INCOMPATIBLE:
dev_err(&GET_DEV(accel_dev),
"PF (vers %d) and VF (vers %d) are not compatible\n",
accel_dev->vf.pf_version,
- ADF_PFVF_COMPATIBILITY_VERSION);
+ ADF_PFVF_COMPAT_THIS_VERSION);
return -EINVAL;
default:
dev_err(&GET_DEV(accel_dev),
diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.h b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.h
index 0690c031bfce..ffd43aa50b57 100644
--- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.h
+++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.h
@@ -52,7 +52,7 @@
* IN_USE_BY pattern as part of a collision control scheme (see adf_iov_putmsg).
*/
-#define ADF_PFVF_COMPATIBILITY_VERSION 0x1 /* PF<->VF compat */
+#define ADF_PFVF_COMPAT_THIS_VERSION 0x1 /* PF<->VF compat */
/* PF->VF messages */
#define ADF_PF2VF_INT BIT(0)
diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c
index 8c822c2861c2..90ec057f9183 100644
--- a/drivers/crypto/qat/qat_common/adf_sriov.c
+++ b/drivers/crypto/qat/qat_common/adf_sriov.c
@@ -24,9 +24,8 @@ static void adf_iov_send_resp(struct work_struct *work)
kfree(pf2vf_resp);
}
-static void adf_vf2pf_bh_handler(void *data)
+void adf_schedule_vf2pf_handler(struct adf_accel_vf_info *vf_info)
{
- struct adf_accel_vf_info *vf_info = (struct adf_accel_vf_info *)data;
struct adf_pf2vf_resp *pf2vf_resp;
pf2vf_resp = kzalloc(sizeof(*pf2vf_resp), GFP_ATOMIC);
@@ -52,9 +51,6 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev)
vf_info->accel_dev = accel_dev;
vf_info->vf_nr = i;
- tasklet_init(&vf_info->vf2pf_bh_tasklet,
- (void *)adf_vf2pf_bh_handler,
- (unsigned long)vf_info);
mutex_init(&vf_info->pf2vf_lock);
ratelimit_state_init(&vf_info->vf2pf_ratelimit,
DEFAULT_RATELIMIT_INTERVAL,
@@ -110,8 +106,6 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev)
hw_data->configure_iov_threads(accel_dev, false);
for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) {
- tasklet_disable(&vf->vf2pf_bh_tasklet);
- tasklet_kill(&vf->vf2pf_bh_tasklet);
mutex_destroy(&vf->pf2vf_lock);
}
diff --git a/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c b/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c
index e85bd62d134a..3e25fac051b2 100644
--- a/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c
+++ b/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c
@@ -5,14 +5,14 @@
#include "adf_pf2vf_msg.h"
/**
- * adf_vf2pf_init() - send init msg to PF
+ * adf_vf2pf_notify_init() - send init msg to PF
* @accel_dev: Pointer to acceleration VF device.
*
* Function sends an init message from the VF to a PF
*
* Return: 0 on success, error code otherwise.
*/
-int adf_vf2pf_init(struct adf_accel_dev *accel_dev)
+int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev)
{
u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM |
(ADF_VF2PF_MSGTYPE_INIT << ADF_VF2PF_MSGTYPE_SHIFT));
@@ -25,17 +25,17 @@ int adf_vf2pf_init(struct adf_accel_dev *accel_dev)
set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status);
return 0;
}
-EXPORT_SYMBOL_GPL(adf_vf2pf_init);
+EXPORT_SYMBOL_GPL(adf_vf2pf_notify_init);
/**
- * adf_vf2pf_shutdown() - send shutdown msg to PF
+ * adf_vf2pf_notify_shutdown() - send shutdown msg to PF
* @accel_dev: Pointer to acceleration VF device.
*
* Function sends a shutdown message from the VF to a PF
*
* Return: void
*/
-void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev)
+void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev)
{
u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM |
(ADF_VF2PF_MSGTYPE_SHUTDOWN << ADF_VF2PF_MSGTYPE_SHIFT));
@@ -45,4 +45,4 @@ void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev)
dev_err(&GET_DEV(accel_dev),
"Failed to send Shutdown event to PF\n");
}
-EXPORT_SYMBOL_GPL(adf_vf2pf_shutdown);
+EXPORT_SYMBOL_GPL(adf_vf2pf_notify_shutdown);
diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c
index 888388acb6bd..7828a6573f3e 100644
--- a/drivers/crypto/qat/qat_common/adf_vf_isr.c
+++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c
@@ -18,6 +18,7 @@
#include "adf_pf2vf_msg.h"
#define ADF_VINTSOU_OFFSET 0x204
+#define ADF_VINTMSK_OFFSET 0x208
#define ADF_VINTSOU_BUN BIT(0)
#define ADF_VINTSOU_PF2VF BIT(1)
@@ -28,6 +29,27 @@ struct adf_vf_stop_data {
struct work_struct work;
};
+void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
+{
+ struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ void __iomem *pmisc_bar_addr =
+ pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr;
+
+ ADF_CSR_WR(pmisc_bar_addr, ADF_VINTMSK_OFFSET, 0x0);
+}
+
+void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
+{
+ struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ void __iomem *pmisc_bar_addr =
+ pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr;
+
+ ADF_CSR_WR(pmisc_bar_addr, ADF_VINTMSK_OFFSET, 0x2);
+}
+EXPORT_SYMBOL_GPL(adf_disable_pf2vf_interrupts);
+
static int adf_enable_msi(struct adf_accel_dev *accel_dev)
{
struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev;
@@ -160,11 +182,21 @@ static irqreturn_t adf_isr(int irq, void *privdata)
struct adf_bar *pmisc =
&GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)];
void __iomem *pmisc_bar_addr = pmisc->virt_addr;
- u32 v_int;
+ bool handled = false;
+ u32 v_int, v_mask;
/* Read VF INT source CSR to determine the source of VF interrupt */
v_int = ADF_CSR_RD(pmisc_bar_addr, ADF_VINTSOU_OFFSET);
+ /* Read VF INT mask CSR to determine which sources are masked */
+ v_mask = ADF_CSR_RD(pmisc_bar_addr, ADF_VINTMSK_OFFSET);
+
+ /*
+ * Recompute v_int ignoring sources that are masked. This is to
+ * avoid rescheduling the tasklet for interrupts already handled
+ */
+ v_int &= ~v_mask;
+
/* Check for PF2VF interrupt */
if (v_int & ADF_VINTSOU_PF2VF) {
/* Disable PF to VF interrupt */
@@ -172,7 +204,7 @@ static irqreturn_t adf_isr(int irq, void *privdata)
/* Schedule tasklet to handle interrupt BH */
tasklet_hi_schedule(&accel_dev->vf.pf2vf_bh_tasklet);
- return IRQ_HANDLED;
+ handled = true;
}
/* Check bundle interrupt */
@@ -184,10 +216,10 @@ static irqreturn_t adf_isr(int irq, void *privdata)
csr_ops->write_csr_int_flag_and_col(bank->csr_addr,
bank->bank_number, 0);
tasklet_hi_schedule(&bank->resp_handler);
- return IRQ_HANDLED;
+ handled = true;
}
- return IRQ_NONE;
+ return handled ? IRQ_HANDLED : IRQ_NONE;
}
static int adf_request_msi_irq(struct adf_accel_dev *accel_dev)
@@ -285,6 +317,30 @@ err_out:
}
EXPORT_SYMBOL_GPL(adf_vf_isr_resource_alloc);
+/**
+ * adf_flush_vf_wq() - Flush workqueue for VF
+ * @accel_dev: Pointer to acceleration device.
+ *
+ * Function disables the PF/VF interrupts on the VF so that no new messages
+ * are received and flushes the workqueue 'adf_vf_stop_wq'.
+ *
+ * Return: void.
+ */
+void adf_flush_vf_wq(struct adf_accel_dev *accel_dev)
+{
+ adf_disable_pf2vf_interrupts(accel_dev);
+
+ flush_workqueue(adf_vf_stop_wq);
+}
+EXPORT_SYMBOL_GPL(adf_flush_vf_wq);
+
+/**
+ * adf_init_vf_wq() - Init workqueue for VF
+ *
+ * Function init workqueue 'adf_vf_stop_wq' for VF.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
int __init adf_init_vf_wq(void)
{
adf_vf_stop_wq = alloc_workqueue("adf_vf_stop_wq", WQ_MEM_RECLAIM, 0);
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
index 7dd7cd6c3ef8..0a9ce365a544 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
@@ -131,11 +131,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_DH895XCC_PF2VF_OFFSET(i);
}
-static u32 get_vintmsk_offset(u32 i)
-{
- return ADF_DH895XCC_VINTMSK_OFFSET(i);
-}
-
static void adf_enable_error_correction(struct adf_accel_dev *accel_dev)
{
struct adf_hw_device_data *hw_device = accel_dev->hw_device;
@@ -180,8 +175,10 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
ADF_DH895XCC_SMIA1_MASK);
}
-static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev)
+static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
{
+ spin_lock_init(&accel_dev->pf.vf2pf_ints_lock);
+
return 0;
}
@@ -213,8 +210,6 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
hw_data->get_num_aes = get_num_aes;
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
- hw_data->get_pf2vf_offset = get_pf2vf_offset;
- hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_admin_info = adf_gen2_get_admin_info;
hw_data->get_arb_info = adf_gen2_get_arb_info;
hw_data->get_sram_bar_id = get_sram_bar_id;
@@ -224,15 +219,17 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
hw_data->init_admin_comms = adf_init_admin_comms;
hw_data->exit_admin_comms = adf_exit_admin_comms;
hw_data->configure_iov_threads = configure_iov_threads;
- hw_data->disable_iov = adf_disable_sriov;
hw_data->send_admin_init = adf_send_admin_init;
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
hw_data->enable_ints = adf_enable_ints;
- hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
hw_data->reset_device = adf_reset_sbr;
- hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
+ hw_data->get_pf2vf_offset = get_pf2vf_offset;
+ hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms;
+ hw_data->disable_iov = adf_disable_sriov;
+ hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
+
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
}
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
index 4d613923d155..f99319cd4543 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
@@ -35,7 +35,6 @@
#define ADF_DH895XCC_ERRSSMSH_EN BIT(3)
#define ADF_DH895XCC_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04))
-#define ADF_DH895XCC_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04))
/* AE to function mapping */
#define ADF_DH895XCC_AE2FUNC_MAP_GRP_A_NUM_REGS 96
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
index a9ec4357144c..3976a81bd99b 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
@@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
- dev_err(&pdev->dev, "No usable DMA configuration\n");
- ret = -EFAULT;
- goto out_err_disable;
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- }
-
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
+ if (ret) {
+ dev_err(&pdev->dev, "No usable DMA configuration\n");
+ goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_DH895XCC_DEVICE_NAME)) {
@@ -208,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pci_save_state(pdev)) {
dev_err(&pdev->dev, "Failed to save pci state\n");
ret = -ENOMEM;
- goto out_err_free_reg;
+ goto out_err_disable_aer;
}
ret = qat_crypto_dev_config(accel_dev);
if (ret)
- goto out_err_free_reg;
+ goto out_err_disable_aer;
ret = adf_dev_init(accel_dev);
if (ret)
@@ -229,6 +222,8 @@ out_err_dev_stop:
adf_dev_stop(accel_dev);
out_err_dev_shutdown:
adf_dev_shutdown(accel_dev);
+out_err_disable_aer:
+ adf_disable_aer(accel_dev);
out_err_free_reg:
pci_release_regions(accel_pci_dev->pci_dev);
out_err_disable:
diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
index f14fb82ed6df..7c6ed6bc8abf 100644
--- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
+++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
@@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_DH895XCCIOV_PF2VF_OFFSET;
}
-static u32 get_vintmsk_offset(u32 i)
-{
- return ADF_DH895XCCIOV_VINTMSK_OFFSET;
-}
-
static int adf_vf_int_noop(struct adf_accel_dev *accel_dev)
{
return 0;
@@ -81,10 +76,10 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data)
hw_data->enable_error_correction = adf_vf_void_noop;
hw_data->init_admin_comms = adf_vf_int_noop;
hw_data->exit_admin_comms = adf_vf_void_noop;
- hw_data->send_admin_init = adf_vf2pf_init;
+ hw_data->send_admin_init = adf_vf2pf_notify_init;
hw_data->init_arb = adf_vf_int_noop;
hw_data->exit_arb = adf_vf_void_noop;
- hw_data->disable_iov = adf_vf2pf_shutdown;
+ hw_data->disable_iov = adf_vf2pf_notify_shutdown;
hw_data->get_accel_mask = get_accel_mask;
hw_data->get_ae_mask = get_ae_mask;
hw_data->get_num_accels = get_num_accels;
@@ -92,11 +87,10 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data)
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
hw_data->get_pf2vf_offset = get_pf2vf_offset;
- hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_sku = get_sku;
hw_data->enable_ints = adf_vf_void_noop;
- hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms;
- hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
+ hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms;
+ hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
hw_data->dev_class->instances++;
adf_devmgr_update_class_index(hw_data);
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h
index 2bfcc67f8f39..306ebb71a408 100644
--- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h
+++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h
@@ -13,7 +13,6 @@
#define ADF_DH895XCCIOV_ETR_BAR 0
#define ADF_DH895XCCIOV_ETR_MAX_BANKS 1
#define ADF_DH895XCCIOV_PF2VF_OFFSET 0x200
-#define ADF_DH895XCCIOV_VINTMSK_OFFSET 0x208
void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data);
void adf_clean_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data);
diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
index 29999da716cc..99d90f3ea2b7 100644
--- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
@@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
- dev_err(&pdev->dev, "No usable DMA configuration\n");
- ret = -EFAULT;
- goto out_err_disable;
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- }
-
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
+ if (ret) {
+ dev_err(&pdev->dev, "No usable DMA configuration\n");
+ goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_DH895XCCVF_DEVICE_NAME)) {
@@ -218,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev)
pr_err("QAT: Driver removal failed\n");
return;
}
+ adf_flush_vf_wq(accel_dev);
adf_dev_stop(accel_dev);
adf_dev_shutdown(accel_dev);
adf_cleanup_accel(accel_dev);
diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
index 080955a1dd9c..e2375d992308 100644
--- a/drivers/crypto/virtio/virtio_crypto_core.c
+++ b/drivers/crypto/virtio/virtio_crypto_core.c
@@ -187,9 +187,9 @@ static int virtcrypto_init_vqs(struct virtio_crypto *vi)
if (ret)
goto err_free;
- get_online_cpus();
+ cpus_read_lock();
virtcrypto_set_affinity(vi);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 5fa6ae9dbc8b..44736cbd446e 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -313,7 +313,7 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
return -ENXIO;
if (nr_pages < 0)
- return nr_pages;
+ return -EINVAL;
avail = dax_dev->ops->direct_access(dax_dev, pgoff, nr_pages,
kaddr, pfn);
diff --git a/drivers/dio/dio.c b/drivers/dio/dio.c
index 193b40e7aec0..4c06c93c93d3 100644
--- a/drivers/dio/dio.c
+++ b/drivers/dio/dio.c
@@ -219,7 +219,7 @@ static int __init dio_init(void)
/* Found a board, allocate it an entry in the list */
dev = kzalloc(sizeof(struct dio_dev), GFP_KERNEL);
if (!dev)
- return 0;
+ return -ENOMEM;
dev->bus = &dio_bus;
dev->dev.parent = &dio_bus.dev;
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 61c21bd880a4..3a6d2416cb0f 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -539,10 +539,18 @@ module_platform_driver(altr_edac_driver);
* trigger testing are different for each memory.
*/
+#ifdef CONFIG_EDAC_ALTERA_OCRAM
static const struct edac_device_prv_data ocramecc_data;
+#endif
+#ifdef CONFIG_EDAC_ALTERA_L2C
static const struct edac_device_prv_data l2ecc_data;
+#endif
+#ifdef CONFIG_EDAC_ALTERA_OCRAM
static const struct edac_device_prv_data a10_ocramecc_data;
+#endif
+#ifdef CONFIG_EDAC_ALTERA_L2C
static const struct edac_device_prv_data a10_l2ecc_data;
+#endif
static irqreturn_t altr_edac_device_handler(int irq, void *dev_id)
{
@@ -569,9 +577,9 @@ static irqreturn_t altr_edac_device_handler(int irq, void *dev_id)
return ret_value;
}
-static ssize_t altr_edac_device_trig(struct file *file,
- const char __user *user_buf,
- size_t count, loff_t *ppos)
+static ssize_t __maybe_unused
+altr_edac_device_trig(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
{
u32 *ptemp, i, error_mask;
@@ -640,27 +648,27 @@ static ssize_t altr_edac_device_trig(struct file *file,
return count;
}
-static const struct file_operations altr_edac_device_inject_fops = {
+static const struct file_operations altr_edac_device_inject_fops __maybe_unused = {
.open = simple_open,
.write = altr_edac_device_trig,
.llseek = generic_file_llseek,
};
-static ssize_t altr_edac_a10_device_trig(struct file *file,
- const char __user *user_buf,
- size_t count, loff_t *ppos);
+static ssize_t __maybe_unused
+altr_edac_a10_device_trig(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos);
-static const struct file_operations altr_edac_a10_device_inject_fops = {
+static const struct file_operations altr_edac_a10_device_inject_fops __maybe_unused = {
.open = simple_open,
.write = altr_edac_a10_device_trig,
.llseek = generic_file_llseek,
};
-static ssize_t altr_edac_a10_device_trig2(struct file *file,
- const char __user *user_buf,
- size_t count, loff_t *ppos);
+static ssize_t __maybe_unused
+altr_edac_a10_device_trig2(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos);
-static const struct file_operations altr_edac_a10_device_inject2_fops = {
+static const struct file_operations altr_edac_a10_device_inject2_fops __maybe_unused = {
.open = simple_open,
.write = altr_edac_a10_device_trig2,
.llseek = generic_file_llseek,
@@ -1697,9 +1705,9 @@ MODULE_DEVICE_TABLE(of, altr_edac_a10_device_of_match);
* Based on xgene_edac.c peripheral code.
*/
-static ssize_t altr_edac_a10_device_trig(struct file *file,
- const char __user *user_buf,
- size_t count, loff_t *ppos)
+static ssize_t __maybe_unused
+altr_edac_a10_device_trig(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
{
struct edac_device_ctl_info *edac_dci = file->private_data;
struct altr_edac_device_dev *drvdata = edac_dci->pvt_info;
@@ -1729,9 +1737,9 @@ static ssize_t altr_edac_a10_device_trig(struct file *file,
* slightly. A few Arria10 peripherals can use this injection function.
* Inject the error into the memory and then readback to trigger the IRQ.
*/
-static ssize_t altr_edac_a10_device_trig2(struct file *file,
- const char __user *user_buf,
- size_t count, loff_t *ppos)
+static ssize_t __maybe_unused
+altr_edac_a10_device_trig2(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
{
struct edac_device_ctl_info *edac_dci = file->private_data;
struct altr_edac_device_dev *drvdata = edac_dci->pvt_info;
@@ -1804,11 +1812,8 @@ static void altr_edac_a10_irq_handler(struct irq_desc *desc)
regmap_read(edac->ecc_mgr_map, sm_offset, &irq_status);
bits = irq_status;
- for_each_set_bit(bit, &bits, 32) {
- irq = irq_linear_revmap(edac->domain, dberr * 32 + bit);
- if (irq)
- generic_handle_irq(irq);
- }
+ for_each_set_bit(bit, &bits, 32)
+ generic_handle_domain_irq(edac->domain, dberr * 32 + bit);
chained_irq_exit(chip, desc);
}
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index f0d8f60acee1..99b06a3e8fb1 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -571,8 +571,8 @@ EDAC_DCT_ATTR_SHOW(dbam0);
EDAC_DCT_ATTR_SHOW(top_mem);
EDAC_DCT_ATTR_SHOW(top_mem2);
-static ssize_t hole_show(struct device *dev, struct device_attribute *mattr,
- char *data)
+static ssize_t dram_hole_show(struct device *dev, struct device_attribute *mattr,
+ char *data)
{
struct mem_ctl_info *mci = to_mci(dev);
@@ -593,7 +593,7 @@ static DEVICE_ATTR(dhar, S_IRUGO, dhar_show, NULL);
static DEVICE_ATTR(dbam, S_IRUGO, dbam0_show, NULL);
static DEVICE_ATTR(topmem, S_IRUGO, top_mem_show, NULL);
static DEVICE_ATTR(topmem2, S_IRUGO, top_mem2_show, NULL);
-static DEVICE_ATTR(dram_hole, S_IRUGO, hole_show, NULL);
+static DEVICE_ATTR_RO(dram_hole);
static struct attribute *dbg_attrs[] = {
&dev_attr_dhar.attr,
@@ -802,16 +802,11 @@ static ssize_t inject_write_store(struct device *dev,
* update NUM_INJ_ATTRS in case you add new members
*/
-static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR,
- inject_section_show, inject_section_store);
-static DEVICE_ATTR(inject_word, S_IRUGO | S_IWUSR,
- inject_word_show, inject_word_store);
-static DEVICE_ATTR(inject_ecc_vector, S_IRUGO | S_IWUSR,
- inject_ecc_vector_show, inject_ecc_vector_store);
-static DEVICE_ATTR(inject_write, S_IWUSR,
- NULL, inject_write_store);
-static DEVICE_ATTR(inject_read, S_IWUSR,
- NULL, inject_read_store);
+static DEVICE_ATTR_RW(inject_section);
+static DEVICE_ATTR_RW(inject_word);
+static DEVICE_ATTR_RW(inject_ecc_vector);
+static DEVICE_ATTR_WO(inject_write);
+static DEVICE_ATTR_WO(inject_read);
static struct attribute *inj_attrs[] = {
&dev_attr_inject_section.attr,
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index f6d462d0be2d..2c5975674723 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -166,6 +166,7 @@ const char * const edac_mem_types[] = {
[MEM_DDR5] = "Unbuffered-DDR5",
[MEM_NVDIMM] = "Non-volatile-RAM",
[MEM_WIO2] = "Wide-IO-2",
+ [MEM_HBM2] = "High-bandwidth-memory-Gen2",
};
EXPORT_SYMBOL_GPL(edac_mem_types);
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index 6ce0ed2ffaaf..83345bfac246 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -33,15 +33,21 @@
#define I10NM_GET_DIMMMTR(m, i, j) \
readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \
(i) * (m)->chan_mmio_sz + (j) * 4)
-#define I10NM_GET_MCDDRTCFG(m, i, j) \
+#define I10NM_GET_MCDDRTCFG(m, i) \
readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \
- (i) * (m)->chan_mmio_sz + (j) * 4)
+ (i) * (m)->chan_mmio_sz)
#define I10NM_GET_MCMTR(m, i) \
readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \
(i) * (m)->chan_mmio_sz)
#define I10NM_GET_AMAP(m, i) \
readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \
(i) * (m)->chan_mmio_sz)
+#define I10NM_GET_REG32(m, i, offset) \
+ readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
+#define I10NM_GET_REG64(m, i, offset) \
+ readq((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
+#define I10NM_SET_REG32(m, i, offset, v) \
+ writel(v, (m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
#define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23)
#define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12)
@@ -58,8 +64,125 @@
#define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
#define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5)
+#define RETRY_RD_ERR_LOG_UC BIT(1)
+#define RETRY_RD_ERR_LOG_NOOVER BIT(14)
+#define RETRY_RD_ERR_LOG_EN BIT(15)
+#define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1))
+#define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0))
+
static struct list_head *i10nm_edac_list;
+static struct res_config *res_cfg;
+static int retry_rd_err_log;
+
+static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8};
+static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8};
+static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0};
+static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0};
+
+static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable)
+{
+ u32 s, d;
+
+ if (!imc->mbase)
+ return;
+
+ s = I10NM_GET_REG32(imc, chan, res_cfg->offsets_scrub[0]);
+ d = I10NM_GET_REG32(imc, chan, res_cfg->offsets_demand[0]);
+
+ if (enable) {
+ /* Save default configurations */
+ imc->chan[chan].retry_rd_err_log_s = s;
+ imc->chan[chan].retry_rd_err_log_d = d;
+
+ s &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
+ s |= RETRY_RD_ERR_LOG_EN;
+ d &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
+ d |= RETRY_RD_ERR_LOG_EN;
+ } else {
+ /* Restore default configurations */
+ if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC)
+ s |= RETRY_RD_ERR_LOG_UC;
+ if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER)
+ s |= RETRY_RD_ERR_LOG_NOOVER;
+ if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN))
+ s &= ~RETRY_RD_ERR_LOG_EN;
+ if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC)
+ d |= RETRY_RD_ERR_LOG_UC;
+ if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER)
+ d |= RETRY_RD_ERR_LOG_NOOVER;
+ if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN))
+ d &= ~RETRY_RD_ERR_LOG_EN;
+ }
+
+ I10NM_SET_REG32(imc, chan, res_cfg->offsets_scrub[0], s);
+ I10NM_SET_REG32(imc, chan, res_cfg->offsets_demand[0], d);
+}
+
+static void enable_retry_rd_err_log(bool enable)
+{
+ struct skx_dev *d;
+ int i, j;
+
+ edac_dbg(2, "\n");
+
+ list_for_each_entry(d, i10nm_edac_list, list)
+ for (i = 0; i < I10NM_NUM_IMC; i++)
+ for (j = 0; j < I10NM_NUM_CHANNELS; j++)
+ __enable_retry_rd_err_log(&d->imc[i], j, enable);
+}
+
+static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
+ int len, bool scrub_err)
+{
+ struct skx_imc *imc = &res->dev->imc[res->imc];
+ u32 log0, log1, log2, log3, log4;
+ u32 corr0, corr1, corr2, corr3;
+ u64 log2a, log5;
+ u32 *offsets;
+ int n;
+
+ if (!imc->mbase)
+ return;
+
+ offsets = scrub_err ? res_cfg->offsets_scrub : res_cfg->offsets_demand;
+
+ log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]);
+ log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]);
+ log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]);
+ log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]);
+ log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]);
+
+ if (res_cfg->type == SPR) {
+ log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]);
+ n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx]",
+ log0, log1, log2a, log3, log4, log5);
+ } else {
+ log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]);
+ n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]",
+ log0, log1, log2, log3, log4, log5);
+ }
+
+ corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18);
+ corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c);
+ corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20);
+ corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24);
+
+ if (len - n > 0)
+ snprintf(msg + n, len - n,
+ " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]",
+ corr0 & 0xffff, corr0 >> 16,
+ corr1 & 0xffff, corr1 >> 16,
+ corr2 & 0xffff, corr2 >> 16,
+ corr3 & 0xffff, corr3 >> 16);
+
+ /* Clear status bits */
+ if (retry_rd_err_log == 2 && (log0 & RETRY_RD_ERR_LOG_OVER_UC_V)) {
+ log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
+ I10NM_SET_REG32(imc, res->channel, offsets[0], log0);
+ }
+}
+
static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
unsigned int dev, unsigned int fun)
{
@@ -263,6 +386,8 @@ static struct res_config i10nm_cfg0 = {
.ddr_chan_mmio_sz = 0x4000,
.sad_all_devfn = PCI_DEVFN(29, 0),
.sad_all_offset = 0x108,
+ .offsets_scrub = offsets_scrub_icx,
+ .offsets_demand = offsets_demand_icx,
};
static struct res_config i10nm_cfg1 = {
@@ -272,6 +397,8 @@ static struct res_config i10nm_cfg1 = {
.ddr_chan_mmio_sz = 0x4000,
.sad_all_devfn = PCI_DEVFN(29, 0),
.sad_all_offset = 0x108,
+ .offsets_scrub = offsets_scrub_icx,
+ .offsets_demand = offsets_demand_icx,
};
static struct res_config spr_cfg = {
@@ -283,6 +410,8 @@ static struct res_config spr_cfg = {
.support_ddr5 = true,
.sad_all_devfn = PCI_DEVFN(10, 0),
.sad_all_offset = 0x300,
+ .offsets_scrub = offsets_scrub_spr,
+ .offsets_demand = offsets_demand_spr,
};
static const struct x86_cpu_id i10nm_cpuids[] = {
@@ -321,10 +450,10 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
ndimms = 0;
amap = I10NM_GET_AMAP(imc, i);
+ mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i);
for (j = 0; j < imc->num_dimms; j++) {
dimm = edac_get_dimm(mci, i, j, 0);
mtr = I10NM_GET_DIMMMTR(imc, i, j);
- mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i, j);
edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n",
mtr, mcddrtcfg, imc->mc, i, j);
@@ -422,6 +551,7 @@ static int __init i10nm_init(void)
return -ENODEV;
cfg = (struct res_config *)id->driver_data;
+ res_cfg = cfg;
rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);
if (rc)
@@ -486,6 +616,12 @@ static int __init i10nm_init(void)
mce_register_decode_chain(&i10nm_mce_dec);
setup_i10nm_debug();
+ if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
+ skx_set_decode(NULL, show_retry_rd_err_log);
+ if (retry_rd_err_log == 2)
+ enable_retry_rd_err_log(true);
+ }
+
i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION);
return 0;
@@ -497,6 +633,13 @@ fail:
static void __exit i10nm_exit(void)
{
edac_dbg(2, "\n");
+
+ if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
+ skx_set_decode(NULL, NULL);
+ if (retry_rd_err_log == 2)
+ enable_retry_rd_err_log(false);
+ }
+
teardown_i10nm_debug();
mce_unregister_decode_chain(&i10nm_mce_dec);
skx_adxl_put();
@@ -506,5 +649,8 @@ static void __exit i10nm_exit(void)
module_init(i10nm_init);
module_exit(i10nm_exit);
+module_param(retry_rd_err_log, int, 0444);
+MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)");
+
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors");
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 27d56920b469..67dbf4c31271 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1246,6 +1246,9 @@ static int __init mce_amd_init(void)
c->x86_vendor != X86_VENDOR_HYGON)
return -ENODEV;
+ if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+ return -ENODEV;
+
if (boot_cpu_has(X86_FEATURE_SMCA)) {
xec_mask = 0x3f;
goto out;
diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c
index 4dbd46575bfb..1abc020d49ab 100644
--- a/drivers/edac/skx_base.c
+++ b/drivers/edac/skx_base.c
@@ -230,7 +230,8 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg)
#define SKX_ILV_TARGET(tgt) ((tgt) & 7)
static void skx_show_retry_rd_err_log(struct decoded_addr *res,
- char *msg, int len)
+ char *msg, int len,
+ bool scrub_err)
{
u32 log0, log1, log2, log3, log4;
u32 corr0, corr1, corr2, corr3;
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 5e83f59bef8a..19c17c5198c5 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -345,7 +345,10 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
rows = numrow(mtr);
cols = imc->hbm_mc ? 6 : numcol(mtr);
- if (cfg->support_ddr5 && ((amap & 0x8) || imc->hbm_mc)) {
+ if (imc->hbm_mc) {
+ banks = 32;
+ mtype = MEM_HBM2;
+ } else if (cfg->support_ddr5 && (amap & 0x8)) {
banks = 32;
mtype = MEM_DDR5;
} else {
@@ -529,6 +532,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
bool overflow = GET_BITFIELD(m->status, 62, 62);
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
+ bool scrub_err = false;
bool recoverable;
int len;
u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
@@ -580,6 +584,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
break;
case 4:
optype = "memory scrubbing error";
+ scrub_err = true;
break;
default:
optype = "reserved";
@@ -602,7 +607,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
}
if (skx_show_retry_rd_err_log)
- skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len);
+ skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len, scrub_err);
edac_dbg(0, "%s\n", skx_msg);
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index 01f67e731766..03ac067a80b9 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -80,6 +80,8 @@ struct skx_dev {
struct skx_channel {
struct pci_dev *cdev;
struct pci_dev *edev;
+ u32 retry_rd_err_log_s;
+ u32 retry_rd_err_log_d;
struct skx_dimm {
u8 close_pg;
u8 bank_xor_enable;
@@ -150,12 +152,15 @@ struct res_config {
/* SAD device number and function number */
unsigned int sad_all_devfn;
int sad_all_offset;
+ /* Offsets of retry_rd_err_log registers */
+ u32 *offsets_scrub;
+ u32 *offsets_demand;
};
typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
struct res_config *cfg);
typedef bool (*skx_decode_f)(struct decoded_addr *res);
-typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len);
+typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err);
int __init skx_adxl_get(void);
void __exit skx_adxl_put(void);
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index ea7ca74fc173..73bdbd207e7a 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -221,7 +221,7 @@ static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
return 0;
n = 0;
- len = CPER_REC_LEN - 1;
+ len = CPER_REC_LEN;
if (mem->validation_bits & CPER_MEM_VALID_NODE)
n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
if (mem->validation_bits & CPER_MEM_VALID_CARD)
@@ -258,13 +258,12 @@ static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
mem->responder_id);
if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
- scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
- mem->target_id);
+ n += scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
+ mem->target_id);
if (mem->validation_bits & CPER_MEM_VALID_CHIP_ID)
- scnprintf(msg + n, len - n, "chip_id: %d ",
- mem->extended >> CPER_MEM_CHIP_ID_SHIFT);
+ n += scnprintf(msg + n, len - n, "chip_id: %d ",
+ mem->extended >> CPER_MEM_CHIP_ID_SHIFT);
- msg[n] = '\0';
return n;
}
@@ -633,7 +632,7 @@ int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
data_len = estatus->data_length;
apei_estatus_for_each_section(estatus, gdata) {
- if (sizeof(struct acpi_hest_generic_data) > data_len)
+ if (acpi_hest_get_size(gdata) > data_len)
return -EINVAL;
record_size = acpi_hest_get_record_size(gdata);
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index 7bf0a7acae5e..2363fee9211c 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -35,15 +35,48 @@ efi_status_t check_platform_features(void)
}
/*
- * Although relocatable kernels can fix up the misalignment with respect to
- * MIN_KIMG_ALIGN, the resulting virtual text addresses are subtly out of
- * sync with those recorded in the vmlinux when kaslr is disabled but the
- * image required relocation anyway. Therefore retain 2M alignment unless
- * KASLR is in use.
+ * Distro versions of GRUB may ignore the BSS allocation entirely (i.e., fail
+ * to provide space, and fail to zero it). Check for this condition by double
+ * checking that the first and the last byte of the image are covered by the
+ * same EFI memory map entry.
*/
-static u64 min_kimg_align(void)
+static bool check_image_region(u64 base, u64 size)
{
- return efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN;
+ unsigned long map_size, desc_size, buff_size;
+ efi_memory_desc_t *memory_map;
+ struct efi_boot_memmap map;
+ efi_status_t status;
+ bool ret = false;
+ int map_offset;
+
+ map.map = &memory_map;
+ map.map_size = &map_size;
+ map.desc_size = &desc_size;
+ map.desc_ver = NULL;
+ map.key_ptr = NULL;
+ map.buff_size = &buff_size;
+
+ status = efi_get_memory_map(&map);
+ if (status != EFI_SUCCESS)
+ return false;
+
+ for (map_offset = 0; map_offset < map_size; map_offset += desc_size) {
+ efi_memory_desc_t *md = (void *)memory_map + map_offset;
+ u64 end = md->phys_addr + md->num_pages * EFI_PAGE_SIZE;
+
+ /*
+ * Find the region that covers base, and return whether
+ * it covers base+size bytes.
+ */
+ if (base >= md->phys_addr && base < end) {
+ ret = (base + size) <= end;
+ break;
+ }
+ }
+
+ efi_bs_call(free_pool, memory_map);
+
+ return ret;
}
efi_status_t handle_kernel_image(unsigned long *image_addr,
@@ -56,6 +89,16 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
unsigned long kernel_size, kernel_memsize = 0;
u32 phys_seed = 0;
+ /*
+ * Although relocatable kernels can fix up the misalignment with
+ * respect to MIN_KIMG_ALIGN, the resulting virtual text addresses are
+ * subtly out of sync with those recorded in the vmlinux when kaslr is
+ * disabled but the image required relocation anyway. Therefore retain
+ * 2M alignment if KASLR was explicitly disabled, even if it was not
+ * going to be activated to begin with.
+ */
+ u64 min_kimg_align = efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN;
+
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
if (!efi_nokaslr) {
status = efi_get_random_bytes(sizeof(phys_seed),
@@ -76,6 +119,10 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
if (image->image_base != _text)
efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n");
+ if (!IS_ALIGNED((u64)_text, EFI_KIMG_ALIGN))
+ efi_err("FIRMWARE BUG: kernel image not aligned on %ldk boundary\n",
+ EFI_KIMG_ALIGN >> 10);
+
kernel_size = _edata - _text;
kernel_memsize = kernel_size + (_end - _edata);
*reserve_size = kernel_memsize;
@@ -85,14 +132,18 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
* If KASLR is enabled, and we have some randomness available,
* locate the kernel at a randomized offset in physical memory.
*/
- status = efi_random_alloc(*reserve_size, min_kimg_align(),
+ status = efi_random_alloc(*reserve_size, min_kimg_align,
reserve_addr, phys_seed);
+ if (status != EFI_SUCCESS)
+ efi_warn("efi_random_alloc() failed: 0x%lx\n", status);
} else {
status = EFI_OUT_OF_RESOURCES;
}
if (status != EFI_SUCCESS) {
- if (IS_ALIGNED((u64)_text, min_kimg_align())) {
+ if (!check_image_region((u64)_text, kernel_memsize)) {
+ efi_err("FIRMWARE BUG: Image BSS overlaps adjacent EFI memory region\n");
+ } else if (IS_ALIGNED((u64)_text, min_kimg_align)) {
/*
* Just execute from wherever we were loaded by the
* UEFI PE/COFF loader if the alignment is suitable.
@@ -103,7 +154,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
}
status = efi_allocate_pages_aligned(*reserve_size, reserve_addr,
- ULONG_MAX, min_kimg_align());
+ ULONG_MAX, min_kimg_align);
if (status != EFI_SUCCESS) {
efi_err("Failed to relocate kernel\n");
diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c
index a408df474d83..724155b9e10d 100644
--- a/drivers/firmware/efi/libstub/randomalloc.c
+++ b/drivers/firmware/efi/libstub/randomalloc.c
@@ -30,6 +30,8 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1,
(u64)ULONG_MAX);
+ if (region_end < size)
+ return 0;
first_slot = round_up(md->phys_addr, align);
last_slot = round_down(region_end - size + 1, align);
diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index 7127a04bca19..612a59e213df 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -84,8 +84,10 @@ MODULE_DESCRIPTION("sysfs interface to BIOS iBFT information");
MODULE_LICENSE("GPL");
MODULE_VERSION(IBFT_ISCSI_VERSION);
+static struct acpi_table_ibft *ibft_addr;
+
#ifndef CONFIG_ISCSI_IBFT_FIND
-struct acpi_table_ibft *ibft_addr;
+phys_addr_t ibft_phys_addr;
#endif
struct ibft_hdr {
@@ -858,11 +860,13 @@ static int __init ibft_init(void)
int rc = 0;
/*
- As on UEFI systems the setup_arch()/find_ibft_region()
+ As on UEFI systems the setup_arch()/reserve_ibft_region()
is called before ACPI tables are parsed and it only does
legacy finding.
*/
- if (!ibft_addr)
+ if (ibft_phys_addr)
+ ibft_addr = isa_bus_to_virt(ibft_phys_addr);
+ else
acpi_find_ibft_region();
if (ibft_addr) {
diff --git a/drivers/firmware/iscsi_ibft_find.c b/drivers/firmware/iscsi_ibft_find.c
index 64bb94523281..94b49ccd23ac 100644
--- a/drivers/firmware/iscsi_ibft_find.c
+++ b/drivers/firmware/iscsi_ibft_find.c
@@ -31,8 +31,8 @@
/*
* Physical location of iSCSI Boot Format Table.
*/
-struct acpi_table_ibft *ibft_addr;
-EXPORT_SYMBOL_GPL(ibft_addr);
+phys_addr_t ibft_phys_addr;
+EXPORT_SYMBOL_GPL(ibft_phys_addr);
static const struct {
char *sign;
@@ -47,13 +47,24 @@ static const struct {
#define VGA_MEM 0xA0000 /* VGA buffer */
#define VGA_SIZE 0x20000 /* 128kB */
-static int __init find_ibft_in_mem(void)
+/*
+ * Routine used to find and reserve the iSCSI Boot Format Table
+ */
+void __init reserve_ibft_region(void)
{
unsigned long pos;
unsigned int len = 0;
void *virt;
int i;
+ ibft_phys_addr = 0;
+
+ /* iBFT 1.03 section 1.4.3.1 mandates that UEFI machines will
+ * only use ACPI for this
+ */
+ if (efi_enabled(EFI_BOOT))
+ return;
+
for (pos = IBFT_START; pos < IBFT_END; pos += 16) {
/* The table can't be inside the VGA BIOS reserved space,
* so skip that area */
@@ -70,35 +81,12 @@ static int __init find_ibft_in_mem(void)
/* if the length of the table extends past 1M,
* the table cannot be valid. */
if (pos + len <= (IBFT_END-1)) {
- ibft_addr = (struct acpi_table_ibft *)virt;
- pr_info("iBFT found at 0x%lx.\n", pos);
- goto done;
+ ibft_phys_addr = pos;
+ memblock_reserve(ibft_phys_addr, PAGE_ALIGN(len));
+ pr_info("iBFT found at %pa.\n", &ibft_phys_addr);
+ return;
}
}
}
}
-done:
- return len;
-}
-/*
- * Routine used to find the iSCSI Boot Format Table. The logical
- * kernel address is set in the ibft_addr global variable.
- */
-unsigned long __init find_ibft_region(unsigned long *sizep)
-{
- ibft_addr = NULL;
-
- /* iBFT 1.03 section 1.4.3.1 mandates that UEFI machines will
- * only use ACPI for this */
-
- if (!efi_enabled(EFI_BOOT))
- find_ibft_in_mem();
-
- if (ibft_addr) {
- *sizep = PAGE_ALIGN(ibft_addr->header.length);
- return (u64)virt_to_phys(ibft_addr);
- }
-
- *sizep = 0;
- return 0;
}
diff --git a/drivers/firmware/raspberrypi.c b/drivers/firmware/raspberrypi.c
index 250e01680742..4b8978b254f9 100644
--- a/drivers/firmware/raspberrypi.c
+++ b/drivers/firmware/raspberrypi.c
@@ -329,12 +329,18 @@ struct rpi_firmware *rpi_firmware_get(struct device_node *firmware_node)
fw = platform_get_drvdata(pdev);
if (!fw)
- return NULL;
+ goto err_put_device;
if (!kref_get_unless_zero(&fw->consumers))
- return NULL;
+ goto err_put_device;
+
+ put_device(&pdev->dev);
return fw;
+
+err_put_device:
+ put_device(&pdev->dev);
+ return NULL;
}
EXPORT_SYMBOL_GPL(rpi_firmware_get);
diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c
index 9f937b125ab0..60ccf3e90d7d 100644
--- a/drivers/firmware/smccc/smccc.c
+++ b/drivers/firmware/smccc/smccc.c
@@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/arm-smccc.h>
#include <linux/kernel.h>
+#include <linux/platform_device.h>
#include <asm/archrandom.h>
static u32 smccc_version = ARM_SMCCC_VERSION_1_0;
@@ -42,3 +43,19 @@ u32 arm_smccc_get_version(void)
return smccc_version;
}
EXPORT_SYMBOL_GPL(arm_smccc_get_version);
+
+static int __init smccc_devices_init(void)
+{
+ struct platform_device *pdev;
+
+ if (smccc_trng_available) {
+ pdev = platform_device_register_simple("smccc_trng", -1,
+ NULL, 0);
+ if (IS_ERR(pdev))
+ pr_err("smccc_trng: could not register device: %ld\n",
+ PTR_ERR(pdev));
+ }
+
+ return 0;
+}
+device_initcall(smccc_devices_init);
diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index 15b138326ecc..a3cadbaf3cba 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -664,7 +664,7 @@ int zynqmp_pm_write_ggs(u32 index, u32 value)
EXPORT_SYMBOL_GPL(zynqmp_pm_write_ggs);
/**
- * zynqmp_pm_write_ggs() - PM API for reading global general storage (ggs)
+ * zynqmp_pm_read_ggs() - PM API for reading global general storage (ggs)
* @index: GGS register index
* @value: Register value to be written
*
@@ -697,7 +697,7 @@ int zynqmp_pm_write_pggs(u32 index, u32 value)
EXPORT_SYMBOL_GPL(zynqmp_pm_write_pggs);
/**
- * zynqmp_pm_write_pggs() - PM API for reading persistent global general
+ * zynqmp_pm_read_pggs() - PM API for reading persistent global general
* storage (pggs)
* @index: PGGS register index
* @value: Register value to be written
@@ -1012,7 +1012,24 @@ int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities,
EXPORT_SYMBOL_GPL(zynqmp_pm_set_requirement);
/**
- * zynqmp_pm_aes - Access AES hardware to encrypt/decrypt the data using
+ * zynqmp_pm_load_pdi - Load and process PDI
+ * @src: Source device where PDI is located
+ * @address: PDI src address
+ *
+ * This function provides support to load PDI from linux
+ *
+ * Return: Returns status, either success or error+reason
+ */
+int zynqmp_pm_load_pdi(const u32 src, const u64 address)
+{
+ return zynqmp_pm_invoke_fn(PM_LOAD_PDI, src,
+ lower_32_bits(address),
+ upper_32_bits(address), 0, NULL);
+}
+EXPORT_SYMBOL_GPL(zynqmp_pm_load_pdi);
+
+/**
+ * zynqmp_pm_aes_engine - Access AES hardware to encrypt/decrypt the data using
* AES-GCM core.
* @address: Address of the AesParams structure.
* @out: Returned output value
diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig
index 8cd454ee20c0..991b3f361ec9 100644
--- a/drivers/fpga/Kconfig
+++ b/drivers/fpga/Kconfig
@@ -119,7 +119,7 @@ config XILINX_PR_DECOUPLER
depends on HAS_IOMEM
help
Say Y to enable drivers for Xilinx LogiCORE PR Decoupler
- or Xilinx Dynamic Function eXchnage AIX Shutdown Manager.
+ or Xilinx Dynamic Function eXchange AIX Shutdown Manager.
The PR Decoupler exists in the FPGA fabric to isolate one
region of the FPGA from the busses while that region is
being reprogrammed during partial reconfig.
@@ -234,4 +234,13 @@ config FPGA_MGR_ZYNQMP_FPGA
to configure the programmable logic(PL) through PS
on ZynqMP SoC.
+config FPGA_MGR_VERSAL_FPGA
+ tristate "Xilinx Versal FPGA"
+ depends on ARCH_ZYNQMP || COMPILE_TEST
+ help
+ Select this option to enable FPGA manager driver support for
+ Xilinx Versal SoC. This driver uses the firmware interface to
+ configure the programmable logic(PL).
+
+ To compile this as a module, choose M here.
endif # FPGA
diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
index 18dc9885883a..0bff783d1b61 100644
--- a/drivers/fpga/Makefile
+++ b/drivers/fpga/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_FPGA_MGR_TS73XX) += ts73xx-fpga.o
obj-$(CONFIG_FPGA_MGR_XILINX_SPI) += xilinx-spi.o
obj-$(CONFIG_FPGA_MGR_ZYNQ_FPGA) += zynq-fpga.o
obj-$(CONFIG_FPGA_MGR_ZYNQMP_FPGA) += zynqmp-fpga.o
+obj-$(CONFIG_FPGA_MGR_VERSAL_FPGA) += versal-fpga.o
obj-$(CONFIG_ALTERA_PR_IP_CORE) += altera-pr-ip-core.o
obj-$(CONFIG_ALTERA_PR_IP_CORE_PLAT) += altera-pr-ip-core-plat.o
diff --git a/drivers/fpga/altera-cvp.c b/drivers/fpga/altera-cvp.c
index 4e0edb60bfba..ccf4546eff29 100644
--- a/drivers/fpga/altera-cvp.c
+++ b/drivers/fpga/altera-cvp.c
@@ -346,7 +346,7 @@ static int altera_cvp_write_init(struct fpga_manager *mgr,
}
if (val & VSE_CVP_STATUS_CFG_RDY) {
- dev_warn(&mgr->dev, "CvP already started, teardown first\n");
+ dev_warn(&mgr->dev, "CvP already started, tear down first\n");
ret = altera_cvp_teardown(mgr, info);
if (ret)
return ret;
diff --git a/drivers/fpga/altera-freeze-bridge.c b/drivers/fpga/altera-freeze-bridge.c
index dd58c4aea92e..7d22a44d652e 100644
--- a/drivers/fpga/altera-freeze-bridge.c
+++ b/drivers/fpga/altera-freeze-bridge.c
@@ -198,11 +198,13 @@ static const struct fpga_bridge_ops altera_freeze_br_br_ops = {
.enable_show = altera_freeze_br_enable_show,
};
+#ifdef CONFIG_OF
static const struct of_device_id altera_freeze_br_of_match[] = {
{ .compatible = "altr,freeze-bridge-controller", },
{},
};
MODULE_DEVICE_TABLE(of, altera_freeze_br_of_match);
+#endif
static int altera_freeze_br_probe(struct platform_device *pdev)
{
diff --git a/drivers/fpga/dfl-fme-mgr.c b/drivers/fpga/dfl-fme-mgr.c
index d5861d13b306..313420405d5e 100644
--- a/drivers/fpga/dfl-fme-mgr.c
+++ b/drivers/fpga/dfl-fme-mgr.c
@@ -252,11 +252,6 @@ static int fme_mgr_write_complete(struct fpga_manager *mgr,
return 0;
}
-static enum fpga_mgr_states fme_mgr_state(struct fpga_manager *mgr)
-{
- return FPGA_MGR_STATE_UNKNOWN;
-}
-
static u64 fme_mgr_status(struct fpga_manager *mgr)
{
struct fme_mgr_priv *priv = mgr->priv;
@@ -268,7 +263,6 @@ static const struct fpga_manager_ops fme_mgr_ops = {
.write_init = fme_mgr_write_init,
.write = fme_mgr_write,
.write_complete = fme_mgr_write_complete,
- .state = fme_mgr_state,
.status = fme_mgr_status,
};
diff --git a/drivers/fpga/dfl-fme-pr.c b/drivers/fpga/dfl-fme-pr.c
index 1194c0e850e0..d61ce9a18879 100644
--- a/drivers/fpga/dfl-fme-pr.c
+++ b/drivers/fpga/dfl-fme-pr.c
@@ -148,7 +148,7 @@ static int fme_pr(struct platform_device *pdev, unsigned long arg)
/*
* it allows userspace to reset the PR region's logic by disabling and
- * reenabling the bridge to clear things out between accleration runs.
+ * reenabling the bridge to clear things out between acceleration runs.
* so no need to hold the bridges after partial reconfiguration.
*/
if (region->get_bridges)
diff --git a/drivers/fpga/dfl-n3000-nios.c b/drivers/fpga/dfl-n3000-nios.c
index 7a95366f6516..9ddf1d1d392f 100644
--- a/drivers/fpga/dfl-n3000-nios.c
+++ b/drivers/fpga/dfl-n3000-nios.c
@@ -461,7 +461,7 @@ static int n3000_nios_poll_stat_timeout(void __iomem *base, u64 *v)
* We don't use the time based timeout here for performance.
*
* The regbus read/write is on the critical path of Intel PAC N3000
- * image programing. The time based timeout checking will add too much
+ * image programming. The time based timeout checking will add too much
* overhead on it. Usually the state changes in 1 or 2 loops on the
* test server, and we set 10000 times loop here for safety.
*/
diff --git a/drivers/fpga/dfl-pci.c b/drivers/fpga/dfl-pci.c
index b44523ea8c91..4d68719e608f 100644
--- a/drivers/fpga/dfl-pci.c
+++ b/drivers/fpga/dfl-pci.c
@@ -74,6 +74,9 @@ static void cci_pci_free_irq(struct pci_dev *pcidev)
#define PCIE_DEVICE_ID_PF_DSC_1_X 0x09C4
#define PCIE_DEVICE_ID_INTEL_PAC_N3000 0x0B30
#define PCIE_DEVICE_ID_INTEL_PAC_D5005 0x0B2B
+#define PCIE_DEVICE_ID_SILICOM_PAC_N5010 0x1000
+#define PCIE_DEVICE_ID_SILICOM_PAC_N5011 0x1001
+
/* VF Device */
#define PCIE_DEVICE_ID_VF_INT_5_X 0xBCBF
#define PCIE_DEVICE_ID_VF_INT_6_X 0xBCC1
@@ -90,6 +93,8 @@ static struct pci_device_id cci_pcie_id_tbl[] = {
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCIE_DEVICE_ID_INTEL_PAC_N3000),},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCIE_DEVICE_ID_INTEL_PAC_D5005),},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCIE_DEVICE_ID_INTEL_PAC_D5005_VF),},
+ {PCI_DEVICE(PCI_VENDOR_ID_SILICOM_DENMARK, PCIE_DEVICE_ID_SILICOM_PAC_N5010),},
+ {PCI_DEVICE(PCI_VENDOR_ID_SILICOM_DENMARK, PCIE_DEVICE_ID_SILICOM_PAC_N5011),},
{0,}
};
MODULE_DEVICE_TABLE(pci, cci_pcie_id_tbl);
diff --git a/drivers/fpga/dfl.c b/drivers/fpga/dfl.c
index 1ae6779a0dd6..c99b78ee008a 100644
--- a/drivers/fpga/dfl.c
+++ b/drivers/fpga/dfl.c
@@ -379,6 +379,7 @@ dfl_dev_add(struct dfl_feature_platform_data *pdata,
ddev->type = feature_dev_id_type(pdev);
ddev->feature_id = feature->id;
+ ddev->revision = feature->revision;
ddev->cdev = pdata->dfl_cdev;
/* add mmio resource */
@@ -715,6 +716,7 @@ struct build_feature_devs_info {
*/
struct dfl_feature_info {
u16 fid;
+ u8 revision;
struct resource mmio_res;
void __iomem *ioaddr;
struct list_head node;
@@ -794,6 +796,7 @@ static int build_info_commit_dev(struct build_feature_devs_info *binfo)
/* save resource information for each feature */
feature->dev = fdev;
feature->id = finfo->fid;
+ feature->revision = finfo->revision;
/*
* the FIU header feature has some fundamental functions (sriov
@@ -908,19 +911,17 @@ static void build_info_free(struct build_feature_devs_info *binfo)
devm_kfree(binfo->dev, binfo);
}
-static inline u32 feature_size(void __iomem *start)
+static inline u32 feature_size(u64 value)
{
- u64 v = readq(start + DFH);
- u32 ofst = FIELD_GET(DFH_NEXT_HDR_OFST, v);
+ u32 ofst = FIELD_GET(DFH_NEXT_HDR_OFST, value);
/* workaround for private features with invalid size, use 4K instead */
return ofst ? ofst : 4096;
}
-static u16 feature_id(void __iomem *start)
+static u16 feature_id(u64 value)
{
- u64 v = readq(start + DFH);
- u16 id = FIELD_GET(DFH_ID, v);
- u8 type = FIELD_GET(DFH_TYPE, v);
+ u16 id = FIELD_GET(DFH_ID, value);
+ u8 type = FIELD_GET(DFH_TYPE, value);
if (type == DFH_TYPE_FIU)
return FEATURE_ID_FIU_HEADER;
@@ -1019,10 +1020,15 @@ create_feature_instance(struct build_feature_devs_info *binfo,
unsigned int irq_base, nr_irqs;
struct dfl_feature_info *finfo;
int ret;
+ u8 revision;
+ u64 v;
+
+ v = readq(binfo->ioaddr + ofst);
+ revision = FIELD_GET(DFH_REVISION, v);
/* read feature size and id if inputs are invalid */
- size = size ? size : feature_size(binfo->ioaddr + ofst);
- fid = fid ? fid : feature_id(binfo->ioaddr + ofst);
+ size = size ? size : feature_size(v);
+ fid = fid ? fid : feature_id(v);
if (binfo->len - ofst < size)
return -EINVAL;
@@ -1036,6 +1042,7 @@ create_feature_instance(struct build_feature_devs_info *binfo,
return -ENOMEM;
finfo->fid = fid;
+ finfo->revision = revision;
finfo->mmio_res.start = binfo->start + ofst;
finfo->mmio_res.end = finfo->mmio_res.start + size - 1;
finfo->mmio_res.flags = IORESOURCE_MEM;
@@ -1164,7 +1171,7 @@ static int parse_feature_private(struct build_feature_devs_info *binfo,
{
if (!is_feature_dev_detected(binfo)) {
dev_err(binfo->dev, "the private feature 0x%x does not belong to any AFU.\n",
- feature_id(binfo->ioaddr + ofst));
+ feature_id(readq(binfo->ioaddr + ofst)));
return -EINVAL;
}
diff --git a/drivers/fpga/dfl.h b/drivers/fpga/dfl.h
index 2b82c96ba56c..53572c7aced0 100644
--- a/drivers/fpga/dfl.h
+++ b/drivers/fpga/dfl.h
@@ -232,7 +232,7 @@ struct dfl_feature_irq_ctx {
* @id: sub feature id.
* @resource_index: each sub feature has one mmio resource for its registers.
* this index is used to find its mmio resource from the
- * feature dev (platform device)'s reources.
+ * feature dev (platform device)'s resources.
* @ioaddr: mapped mmio resource address.
* @irq_ctx: interrupt context list.
* @nr_irqs: number of interrupt contexts.
@@ -243,6 +243,7 @@ struct dfl_feature_irq_ctx {
struct dfl_feature {
struct platform_device *dev;
u16 id;
+ u8 revision;
int resource_index;
void __iomem *ioaddr;
struct dfl_feature_irq_ctx *irq_ctx;
diff --git a/drivers/fpga/fpga-bridge.c b/drivers/fpga/fpga-bridge.c
index 2bfb2ff86930..798f55670646 100644
--- a/drivers/fpga/fpga-bridge.c
+++ b/drivers/fpga/fpga-bridge.c
@@ -228,9 +228,9 @@ EXPORT_SYMBOL_GPL(fpga_bridges_put);
* @info: fpga image specific information
* @bridge_list: list of FPGA bridges
*
- * Get an exclusive reference to the bridge and and it to the list.
+ * Get an exclusive reference to the bridge and it to the list.
*
- * Return 0 for success, error code from of_fpga_bridge_get() othewise.
+ * Return 0 for success, error code from of_fpga_bridge_get() otherwise.
*/
int of_fpga_bridge_get_to_list(struct device_node *np,
struct fpga_image_info *info,
@@ -258,9 +258,9 @@ EXPORT_SYMBOL_GPL(of_fpga_bridge_get_to_list);
* @info: fpga image specific information
* @bridge_list: list of FPGA bridges
*
- * Get an exclusive reference to the bridge and and it to the list.
+ * Get an exclusive reference to the bridge and it to the list.
*
- * Return 0 for success, error code from fpga_bridge_get() othewise.
+ * Return 0 for success, error code from fpga_bridge_get() otherwise.
*/
int fpga_bridge_get_to_list(struct device *dev,
struct fpga_image_info *info,
diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c
index ecb4c3c795fa..aa30889e2320 100644
--- a/drivers/fpga/fpga-mgr.c
+++ b/drivers/fpga/fpga-mgr.c
@@ -25,6 +25,72 @@ struct fpga_mgr_devres {
struct fpga_manager *mgr;
};
+static inline void fpga_mgr_fpga_remove(struct fpga_manager *mgr)
+{
+ if (mgr->mops->fpga_remove)
+ mgr->mops->fpga_remove(mgr);
+}
+
+static inline enum fpga_mgr_states fpga_mgr_state(struct fpga_manager *mgr)
+{
+ if (mgr->mops->state)
+ return mgr->mops->state(mgr);
+ return FPGA_MGR_STATE_UNKNOWN;
+}
+
+static inline u64 fpga_mgr_status(struct fpga_manager *mgr)
+{
+ if (mgr->mops->status)
+ return mgr->mops->status(mgr);
+ return 0;
+}
+
+static inline int fpga_mgr_write(struct fpga_manager *mgr, const char *buf, size_t count)
+{
+ if (mgr->mops->write)
+ return mgr->mops->write(mgr, buf, count);
+ return -EOPNOTSUPP;
+}
+
+/*
+ * After all the FPGA image has been written, do the device specific steps to
+ * finish and set the FPGA into operating mode.
+ */
+static inline int fpga_mgr_write_complete(struct fpga_manager *mgr,
+ struct fpga_image_info *info)
+{
+ int ret = 0;
+
+ mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE;
+ if (mgr->mops->write_complete)
+ ret = mgr->mops->write_complete(mgr, info);
+ if (ret) {
+ dev_err(&mgr->dev, "Error after writing image data to FPGA\n");
+ mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE_ERR;
+ return ret;
+ }
+ mgr->state = FPGA_MGR_STATE_OPERATING;
+
+ return 0;
+}
+
+static inline int fpga_mgr_write_init(struct fpga_manager *mgr,
+ struct fpga_image_info *info,
+ const char *buf, size_t count)
+{
+ if (mgr->mops->write_init)
+ return mgr->mops->write_init(mgr, info, buf, count);
+ return 0;
+}
+
+static inline int fpga_mgr_write_sg(struct fpga_manager *mgr,
+ struct sg_table *sgt)
+{
+ if (mgr->mops->write_sg)
+ return mgr->mops->write_sg(mgr, sgt);
+ return -EOPNOTSUPP;
+}
+
/**
* fpga_image_info_alloc - Allocate an FPGA image info struct
* @dev: owning device
@@ -83,9 +149,9 @@ static int fpga_mgr_write_init_buf(struct fpga_manager *mgr,
mgr->state = FPGA_MGR_STATE_WRITE_INIT;
if (!mgr->mops->initial_header_size)
- ret = mgr->mops->write_init(mgr, info, NULL, 0);
+ ret = fpga_mgr_write_init(mgr, info, NULL, 0);
else
- ret = mgr->mops->write_init(
+ ret = fpga_mgr_write_init(
mgr, info, buf, min(mgr->mops->initial_header_size, count));
if (ret) {
@@ -137,27 +203,6 @@ static int fpga_mgr_write_init_sg(struct fpga_manager *mgr,
return ret;
}
-/*
- * After all the FPGA image has been written, do the device specific steps to
- * finish and set the FPGA into operating mode.
- */
-static int fpga_mgr_write_complete(struct fpga_manager *mgr,
- struct fpga_image_info *info)
-{
- int ret;
-
- mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE;
- ret = mgr->mops->write_complete(mgr, info);
- if (ret) {
- dev_err(&mgr->dev, "Error after writing image data to FPGA\n");
- mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE_ERR;
- return ret;
- }
- mgr->state = FPGA_MGR_STATE_OPERATING;
-
- return 0;
-}
-
/**
* fpga_mgr_buf_load_sg - load fpga from image in buffer from a scatter list
* @mgr: fpga manager
@@ -188,13 +233,13 @@ static int fpga_mgr_buf_load_sg(struct fpga_manager *mgr,
/* Write the FPGA image to the FPGA. */
mgr->state = FPGA_MGR_STATE_WRITE;
if (mgr->mops->write_sg) {
- ret = mgr->mops->write_sg(mgr, sgt);
+ ret = fpga_mgr_write_sg(mgr, sgt);
} else {
struct sg_mapping_iter miter;
sg_miter_start(&miter, sgt->sgl, sgt->nents, SG_MITER_FROM_SG);
while (sg_miter_next(&miter)) {
- ret = mgr->mops->write(mgr, miter.addr, miter.length);
+ ret = fpga_mgr_write(mgr, miter.addr, miter.length);
if (ret)
break;
}
@@ -224,7 +269,7 @@ static int fpga_mgr_buf_load_mapped(struct fpga_manager *mgr,
* Write the FPGA image to the FPGA.
*/
mgr->state = FPGA_MGR_STATE_WRITE;
- ret = mgr->mops->write(mgr, buf, count);
+ ret = fpga_mgr_write(mgr, buf, count);
if (ret) {
dev_err(&mgr->dev, "Error while writing image data to FPGA\n");
mgr->state = FPGA_MGR_STATE_WRITE_ERR;
@@ -417,10 +462,7 @@ static ssize_t status_show(struct device *dev,
u64 status;
int len = 0;
- if (!mgr->mops->status)
- return -ENOENT;
-
- status = mgr->mops->status(mgr);
+ status = fpga_mgr_status(mgr);
if (status & FPGA_MGR_STATUS_OPERATION_ERR)
len += sprintf(buf + len, "reconfig operation error\n");
@@ -568,9 +610,7 @@ struct fpga_manager *fpga_mgr_create(struct device *parent, const char *name,
struct fpga_manager *mgr;
int id, ret;
- if (!mops || !mops->write_complete || !mops->state ||
- !mops->write_init || (!mops->write && !mops->write_sg) ||
- (mops->write && mops->write_sg)) {
+ if (!mops) {
dev_err(parent, "Attempt to register without fpga_manager_ops\n");
return NULL;
}
@@ -688,7 +728,7 @@ int fpga_mgr_register(struct fpga_manager *mgr)
* from device. FPGA may be in reset mode or may have been programmed
* by bootloader or EEPROM.
*/
- mgr->state = mgr->mops->state(mgr);
+ mgr->state = fpga_mgr_state(mgr);
ret = device_add(&mgr->dev);
if (ret)
@@ -719,8 +759,7 @@ void fpga_mgr_unregister(struct fpga_manager *mgr)
* If the low level driver provides a method for putting fpga into
* a desired state upon unregister, do it.
*/
- if (mgr->mops->fpga_remove)
- mgr->mops->fpga_remove(mgr);
+ fpga_mgr_fpga_remove(mgr);
device_unregister(&mgr->dev);
}
diff --git a/drivers/fpga/stratix10-soc.c b/drivers/fpga/stratix10-soc.c
index a2cea500f7cc..047fd7f23706 100644
--- a/drivers/fpga/stratix10-soc.c
+++ b/drivers/fpga/stratix10-soc.c
@@ -388,13 +388,7 @@ static int s10_ops_write_complete(struct fpga_manager *mgr,
return ret;
}
-static enum fpga_mgr_states s10_ops_state(struct fpga_manager *mgr)
-{
- return FPGA_MGR_STATE_UNKNOWN;
-}
-
static const struct fpga_manager_ops s10_ops = {
- .state = s10_ops_state,
.write_init = s10_ops_write_init,
.write = s10_ops_write,
.write_complete = s10_ops_write_complete,
diff --git a/drivers/fpga/ts73xx-fpga.c b/drivers/fpga/ts73xx-fpga.c
index 101f016c6ed8..167abb0b08d4 100644
--- a/drivers/fpga/ts73xx-fpga.c
+++ b/drivers/fpga/ts73xx-fpga.c
@@ -32,11 +32,6 @@ struct ts73xx_fpga_priv {
struct device *dev;
};
-static enum fpga_mgr_states ts73xx_fpga_state(struct fpga_manager *mgr)
-{
- return FPGA_MGR_STATE_UNKNOWN;
-}
-
static int ts73xx_fpga_write_init(struct fpga_manager *mgr,
struct fpga_image_info *info,
const char *buf, size_t count)
@@ -98,7 +93,6 @@ static int ts73xx_fpga_write_complete(struct fpga_manager *mgr,
}
static const struct fpga_manager_ops ts73xx_fpga_ops = {
- .state = ts73xx_fpga_state,
.write_init = ts73xx_fpga_write_init,
.write = ts73xx_fpga_write,
.write_complete = ts73xx_fpga_write_complete,
diff --git a/drivers/fpga/versal-fpga.c b/drivers/fpga/versal-fpga.c
new file mode 100644
index 000000000000..5b0dda304bd2
--- /dev/null
+++ b/drivers/fpga/versal-fpga.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019-2021 Xilinx, Inc.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/fpga/fpga-mgr.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/string.h>
+#include <linux/firmware/xlnx-zynqmp.h>
+
+static int versal_fpga_ops_write_init(struct fpga_manager *mgr,
+ struct fpga_image_info *info,
+ const char *buf, size_t size)
+{
+ return 0;
+}
+
+static int versal_fpga_ops_write(struct fpga_manager *mgr,
+ const char *buf, size_t size)
+{
+ dma_addr_t dma_addr = 0;
+ char *kbuf;
+ int ret;
+
+ kbuf = dma_alloc_coherent(mgr->dev.parent, size, &dma_addr, GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+
+ memcpy(kbuf, buf, size);
+ ret = zynqmp_pm_load_pdi(PDI_SRC_DDR, dma_addr);
+ dma_free_coherent(mgr->dev.parent, size, kbuf, dma_addr);
+
+ return ret;
+}
+
+static const struct fpga_manager_ops versal_fpga_ops = {
+ .write_init = versal_fpga_ops_write_init,
+ .write = versal_fpga_ops_write,
+};
+
+static int versal_fpga_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct fpga_manager *mgr;
+ int ret;
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ if (ret < 0) {
+ dev_err(dev, "no usable DMA configuration\n");
+ return ret;
+ }
+
+ mgr = devm_fpga_mgr_create(dev, "Xilinx Versal FPGA Manager",
+ &versal_fpga_ops, NULL);
+ if (!mgr)
+ return -ENOMEM;
+
+ return devm_fpga_mgr_register(dev, mgr);
+}
+
+static const struct of_device_id versal_fpga_of_match[] = {
+ { .compatible = "xlnx,versal-fpga", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, versal_fpga_of_match);
+
+static struct platform_driver versal_fpga_driver = {
+ .probe = versal_fpga_probe,
+ .driver = {
+ .name = "versal_fpga_manager",
+ .of_match_table = of_match_ptr(versal_fpga_of_match),
+ },
+};
+module_platform_driver(versal_fpga_driver);
+
+MODULE_AUTHOR("Nava kishore Manne <nava.manne@xilinx.com>");
+MODULE_AUTHOR("Appana Durga Kedareswara rao <appanad.durga.rao@xilinx.com>");
+MODULE_DESCRIPTION("Xilinx Versal FPGA Manager");
+MODULE_LICENSE("GPL");
diff --git a/drivers/fpga/xilinx-pr-decoupler.c b/drivers/fpga/xilinx-pr-decoupler.c
index ea2bde6e5bc4..e986ed47c4ed 100644
--- a/drivers/fpga/xilinx-pr-decoupler.c
+++ b/drivers/fpga/xilinx-pr-decoupler.c
@@ -81,6 +81,7 @@ static const struct fpga_bridge_ops xlnx_pr_decoupler_br_ops = {
.enable_show = xlnx_pr_decoupler_enable_show,
};
+#ifdef CONFIG_OF
static const struct xlnx_config_data decoupler_config = {
.name = "Xilinx PR Decoupler",
};
@@ -99,6 +100,7 @@ static const struct of_device_id xlnx_pr_decoupler_of_match[] = {
{},
};
MODULE_DEVICE_TABLE(of, xlnx_pr_decoupler_of_match);
+#endif
static int xlnx_pr_decoupler_probe(struct platform_device *pdev)
{
diff --git a/drivers/fpga/xilinx-spi.c b/drivers/fpga/xilinx-spi.c
index fee4d0abf6bf..b6bcf1d9233d 100644
--- a/drivers/fpga/xilinx-spi.c
+++ b/drivers/fpga/xilinx-spi.c
@@ -256,11 +256,13 @@ static int xilinx_spi_probe(struct spi_device *spi)
return devm_fpga_mgr_register(&spi->dev, mgr);
}
+#ifdef CONFIG_OF
static const struct of_device_id xlnx_spi_of_match[] = {
{ .compatible = "xlnx,fpga-slave-serial", },
{}
};
MODULE_DEVICE_TABLE(of, xlnx_spi_of_match);
+#endif
static struct spi_driver xilinx_slave_spi_driver = {
.driver = {
diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c
index 07fa8d9ec675..9b75bd4f93d8 100644
--- a/drivers/fpga/zynq-fpga.c
+++ b/drivers/fpga/zynq-fpga.c
@@ -192,7 +192,7 @@ static void zynq_step_dma(struct zynq_fpga_priv *priv)
/* Once the first transfer is queued we can turn on the ISR, future
* calls to zynq_step_dma will happen from the ISR context. The
- * dma_lock spinlock guarentees this handover is done coherently, the
+ * dma_lock spinlock guarantees this handover is done coherently, the
* ISR enable is put at the end to avoid another CPU spinning in the
* ISR on this lock.
*/
@@ -267,7 +267,7 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr,
ctrl = zynq_fpga_read(priv, CTRL_OFFSET);
if (!(ctrl & CTRL_SEC_EN_MASK)) {
dev_err(&mgr->dev,
- "System not secure, can't use crypted bitstreams\n");
+ "System not secure, can't use encrypted bitstreams\n");
err = -EINVAL;
goto out_err;
}
@@ -344,7 +344,7 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr,
/* set configuration register with following options:
* - enable PCAP interface
- * - set throughput for maximum speed (if bistream not crypted)
+ * - set throughput for maximum speed (if bistream not encrypted)
* - set CPU in user mode
*/
ctrl = zynq_fpga_read(priv, CTRL_OFFSET);
diff --git a/drivers/fpga/zynqmp-fpga.c b/drivers/fpga/zynqmp-fpga.c
index 125743c9797f..7d3d5650c322 100644
--- a/drivers/fpga/zynqmp-fpga.c
+++ b/drivers/fpga/zynqmp-fpga.c
@@ -66,12 +66,6 @@ static int zynqmp_fpga_ops_write(struct fpga_manager *mgr,
return ret;
}
-static int zynqmp_fpga_ops_write_complete(struct fpga_manager *mgr,
- struct fpga_image_info *info)
-{
- return 0;
-}
-
static enum fpga_mgr_states zynqmp_fpga_ops_state(struct fpga_manager *mgr)
{
u32 status = 0;
@@ -87,7 +81,6 @@ static const struct fpga_manager_ops zynqmp_fpga_ops = {
.state = zynqmp_fpga_ops_state,
.write_init = zynqmp_fpga_ops_write_init,
.write = zynqmp_fpga_ops_write,
- .write_complete = zynqmp_fpga_ops_write_complete,
};
static int zynqmp_fpga_probe(struct platform_device *pdev)
@@ -110,12 +103,13 @@ static int zynqmp_fpga_probe(struct platform_device *pdev)
return devm_fpga_mgr_register(dev, mgr);
}
+#ifdef CONFIG_OF
static const struct of_device_id zynqmp_fpga_of_match[] = {
{ .compatible = "xlnx,zynqmp-pcap-fpga", },
{},
};
-
MODULE_DEVICE_TABLE(of, zynqmp_fpga_of_match);
+#endif
static struct platform_driver zynqmp_fpga_driver = {
.probe = zynqmp_fpga_probe,
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index fab571016adf..81abd890b364 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -520,6 +520,14 @@ config GPIO_REG
A 32-bit single register GPIO fixed in/out implementation. This
can be used to represent any register as a set of GPIO signals.
+config GPIO_ROCKCHIP
+ tristate "Rockchip GPIO support"
+ depends on ARCH_ROCKCHIP || COMPILE_TEST
+ select GPIOLIB_IRQCHIP
+ default ARCH_ROCKCHIP
+ help
+ Say yes here to support GPIO on Rockchip SoCs.
+
config GPIO_SAMA5D2_PIOBU
tristate "SAMA5D2 PIOBU GPIO support"
depends on MFD_SYSCON
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 32a32659866a..5243e2d1c207 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -128,6 +128,7 @@ obj-$(CONFIG_GPIO_RDA) += gpio-rda.o
obj-$(CONFIG_GPIO_RDC321X) += gpio-rdc321x.o
obj-$(CONFIG_GPIO_REALTEK_OTTO) += gpio-realtek-otto.o
obj-$(CONFIG_GPIO_REG) += gpio-reg.o
+obj-$(CONFIG_GPIO_ROCKCHIP) += gpio-rockchip.o
obj-$(CONFIG_ARCH_SA1100) += gpio-sa1100.o
obj-$(CONFIG_GPIO_SAMA5D2_PIOBU) += gpio-sama5d2-piobu.o
obj-$(CONFIG_GPIO_SCH311X) += gpio-sch311x.o
diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c
index 71c0bea34d7b..6bf41040c41f 100644
--- a/drivers/gpio/gpio-104-dio-48e.c
+++ b/drivers/gpio/gpio-104-dio-48e.c
@@ -336,8 +336,8 @@ static irqreturn_t dio48e_irq_handler(int irq, void *dev_id)
unsigned long gpio;
for_each_set_bit(gpio, &irq_mask, 2)
- generic_handle_irq(irq_find_mapping(chip->irq.domain,
- 19 + gpio*24));
+ generic_handle_domain_irq(chip->irq.domain,
+ 19 + gpio*24);
raw_spin_lock(&dio48egpio->lock);
diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c
index b132afaf7d99..34be7dd9f5b9 100644
--- a/drivers/gpio/gpio-104-idi-48.c
+++ b/drivers/gpio/gpio-104-idi-48.c
@@ -223,8 +223,8 @@ static irqreturn_t idi_48_irq_handler(int irq, void *dev_id)
for_each_set_bit(bit_num, &irq_mask, 8) {
gpio = bit_num + boundary * 8;
- generic_handle_irq(irq_find_mapping(chip->irq.domain,
- gpio));
+ generic_handle_domain_irq(chip->irq.domain,
+ gpio);
}
}
diff --git a/drivers/gpio/gpio-104-idio-16.c b/drivers/gpio/gpio-104-idio-16.c
index 55b40299ebfa..c68ed1a135fa 100644
--- a/drivers/gpio/gpio-104-idio-16.c
+++ b/drivers/gpio/gpio-104-idio-16.c
@@ -208,7 +208,7 @@ static irqreturn_t idio_16_irq_handler(int irq, void *dev_id)
int gpio;
for_each_set_bit(gpio, &idio16gpio->irq_mask, chip->ngpio)
- generic_handle_irq(irq_find_mapping(chip->irq.domain, gpio));
+ generic_handle_domain_irq(chip->irq.domain, gpio);
raw_spin_lock(&idio16gpio->lock);
diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c
index b7932ecc3b61..b59fae993626 100644
--- a/drivers/gpio/gpio-altera.c
+++ b/drivers/gpio/gpio-altera.c
@@ -201,9 +201,8 @@ static void altera_gpio_irq_edge_handler(struct irq_desc *desc)
(readl(mm_gc->regs + ALTERA_GPIO_EDGE_CAP) &
readl(mm_gc->regs + ALTERA_GPIO_IRQ_MASK)))) {
writel(status, mm_gc->regs + ALTERA_GPIO_EDGE_CAP);
- for_each_set_bit(i, &status, mm_gc->gc.ngpio) {
- generic_handle_irq(irq_find_mapping(irqdomain, i));
- }
+ for_each_set_bit(i, &status, mm_gc->gc.ngpio)
+ generic_handle_domain_irq(irqdomain, i);
}
chained_irq_exit(chip, desc);
@@ -228,9 +227,9 @@ static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc)
status = readl(mm_gc->regs + ALTERA_GPIO_DATA);
status &= readl(mm_gc->regs + ALTERA_GPIO_IRQ_MASK);
- for_each_set_bit(i, &status, mm_gc->gc.ngpio) {
- generic_handle_irq(irq_find_mapping(irqdomain, i));
- }
+ for_each_set_bit(i, &status, mm_gc->gc.ngpio)
+ generic_handle_domain_irq(irqdomain, i);
+
chained_irq_exit(chip, desc);
}
diff --git a/drivers/gpio/gpio-aspeed-sgpio.c b/drivers/gpio/gpio-aspeed-sgpio.c
index 64e54f8c30d2..a99ece15db95 100644
--- a/drivers/gpio/gpio-aspeed-sgpio.c
+++ b/drivers/gpio/gpio-aspeed-sgpio.c
@@ -392,7 +392,7 @@ static void aspeed_sgpio_irq_handler(struct irq_desc *desc)
struct gpio_chip *gc = irq_desc_get_handler_data(desc);
struct irq_chip *ic = irq_desc_get_chip(desc);
struct aspeed_sgpio *data = gpiochip_get_data(gc);
- unsigned int i, p, girq;
+ unsigned int i, p;
unsigned long reg;
chained_irq_enter(ic, desc);
@@ -402,11 +402,8 @@ static void aspeed_sgpio_irq_handler(struct irq_desc *desc)
reg = ioread32(bank_reg(data, bank, reg_irq_status));
- for_each_set_bit(p, &reg, 32) {
- girq = irq_find_mapping(gc->irq.domain, i * 32 + p);
- generic_handle_irq(girq);
- }
-
+ for_each_set_bit(p, &reg, 32)
+ generic_handle_domain_irq(gc->irq.domain, i * 32 + p);
}
chained_irq_exit(ic, desc);
diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
index b966f5e28ebf..3c8f20c57695 100644
--- a/drivers/gpio/gpio-aspeed.c
+++ b/drivers/gpio/gpio-aspeed.c
@@ -661,7 +661,7 @@ static void aspeed_gpio_irq_handler(struct irq_desc *desc)
struct gpio_chip *gc = irq_desc_get_handler_data(desc);
struct irq_chip *ic = irq_desc_get_chip(desc);
struct aspeed_gpio *data = gpiochip_get_data(gc);
- unsigned int i, p, girq, banks;
+ unsigned int i, p, banks;
unsigned long reg;
struct aspeed_gpio *gpio = gpiochip_get_data(gc);
@@ -673,11 +673,8 @@ static void aspeed_gpio_irq_handler(struct irq_desc *desc)
reg = ioread32(bank_reg(data, bank, reg_irq_status));
- for_each_set_bit(p, &reg, 32) {
- girq = irq_find_mapping(gc->irq.domain, i * 32 + p);
- generic_handle_irq(girq);
- }
-
+ for_each_set_bit(p, &reg, 32)
+ generic_handle_domain_irq(gc->irq.domain, i * 32 + p);
}
chained_irq_exit(ic, desc);
diff --git a/drivers/gpio/gpio-ath79.c b/drivers/gpio/gpio-ath79.c
index 9b780dc5d390..3958c6d97639 100644
--- a/drivers/gpio/gpio-ath79.c
+++ b/drivers/gpio/gpio-ath79.c
@@ -204,11 +204,8 @@ static void ath79_gpio_irq_handler(struct irq_desc *desc)
raw_spin_unlock_irqrestore(&ctrl->lock, flags);
- if (pending) {
- for_each_set_bit(irq, &pending, gc->ngpio)
- generic_handle_irq(
- irq_linear_revmap(gc->irq.domain, irq));
- }
+ for_each_set_bit(irq, &pending, gc->ngpio)
+ generic_handle_domain_irq(gc->irq.domain, irq);
chained_irq_exit(irqchip, desc);
}
diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
index 1e6b427f2c4a..d329a143f5ec 100644
--- a/drivers/gpio/gpio-bcm-kona.c
+++ b/drivers/gpio/gpio-bcm-kona.c
@@ -466,9 +466,6 @@ static void bcm_kona_gpio_irq_handler(struct irq_desc *desc)
(~(readl(reg_base + GPIO_INT_MASK(bank_id)))))) {
for_each_set_bit(bit, &sta, 32) {
int hwirq = GPIO_PER_BANK * bank_id + bit;
- int child_irq =
- irq_find_mapping(bank->kona_gpio->irq_domain,
- hwirq);
/*
* Clear interrupt before handler is called so we don't
* miss any interrupt occurred during executing them.
@@ -476,7 +473,8 @@ static void bcm_kona_gpio_irq_handler(struct irq_desc *desc)
writel(readl(reg_base + GPIO_INT_STATUS(bank_id)) |
BIT(bit), reg_base + GPIO_INT_STATUS(bank_id));
/* Invoke interrupt handler */
- generic_handle_irq(child_irq);
+ generic_handle_domain_irq(bank->kona_gpio->irq_domain,
+ hwirq);
}
}
diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c
index fcfc1a1f1a5c..74b7c91c3d1a 100644
--- a/drivers/gpio/gpio-brcmstb.c
+++ b/drivers/gpio/gpio-brcmstb.c
@@ -277,15 +277,14 @@ static void brcmstb_gpio_irq_bank_handler(struct brcmstb_gpio_bank *bank)
unsigned long status;
while ((status = brcmstb_gpio_get_active_irqs(bank))) {
- unsigned int irq, offset;
+ unsigned int offset;
for_each_set_bit(offset, &status, 32) {
if (offset >= bank->width)
dev_warn(&priv->pdev->dev,
"IRQ for invalid GPIO (bank=%d, offset=%d)\n",
bank->id, offset);
- irq = irq_linear_revmap(domain, hwbase + offset);
- generic_handle_irq(irq);
+ generic_handle_domain_irq(domain, hwbase + offset);
}
}
}
diff --git a/drivers/gpio/gpio-cadence.c b/drivers/gpio/gpio-cadence.c
index 4ab3fcd9b9ba..562f8f7e7d1f 100644
--- a/drivers/gpio/gpio-cadence.c
+++ b/drivers/gpio/gpio-cadence.c
@@ -133,7 +133,7 @@ static void cdns_gpio_irq_handler(struct irq_desc *desc)
~ioread32(cgpio->regs + CDNS_GPIO_IRQ_MASK);
for_each_set_bit(hwirq, &status, chip->ngpio)
- generic_handle_irq(irq_find_mapping(chip->irq.domain, hwirq));
+ generic_handle_domain_irq(chip->irq.domain, hwirq);
chained_irq_exit(irqchip, desc);
}
diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index 6f2138503726..cb5afaa7ed48 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -369,8 +369,7 @@ static void gpio_irq_handler(struct irq_desc *desc)
*/
hw_irq = (bank_num / 2) * 32 + bit;
- generic_handle_irq(
- irq_find_mapping(d->irq_domain, hw_irq));
+ generic_handle_domain_irq(d->irq_domain, hw_irq);
}
}
chained_irq_exit(irq_desc_get_chip(desc), desc);
diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c
index 4c5f6d0c8d74..026903e3ef54 100644
--- a/drivers/gpio/gpio-dln2.c
+++ b/drivers/gpio/gpio-dln2.c
@@ -395,7 +395,7 @@ static struct irq_chip dln2_gpio_irqchip = {
static void dln2_gpio_event(struct platform_device *pdev, u16 echo,
const void *data, int len)
{
- int pin, irq;
+ int pin, ret;
const struct {
__le16 count;
@@ -416,24 +416,20 @@ static void dln2_gpio_event(struct platform_device *pdev, u16 echo,
return;
}
- irq = irq_find_mapping(dln2->gpio.irq.domain, pin);
- if (!irq) {
- dev_err(dln2->gpio.parent, "pin %d not mapped to IRQ\n", pin);
- return;
- }
-
switch (dln2->irq_type[pin]) {
case DLN2_GPIO_EVENT_CHANGE_RISING:
- if (event->value)
- generic_handle_irq(irq);
+ if (!event->value)
+ return;
break;
case DLN2_GPIO_EVENT_CHANGE_FALLING:
- if (!event->value)
- generic_handle_irq(irq);
+ if (event->value)
+ return;
break;
- default:
- generic_handle_irq(irq);
}
+
+ ret = generic_handle_domain_irq(dln2->gpio.irq.domain, pin);
+ if (unlikely(ret))
+ dev_err(dln2->gpio.parent, "pin %d not mapped to IRQ\n", pin);
}
static int dln2_gpio_probe(struct platform_device *pdev)
diff --git a/drivers/gpio/gpio-em.c b/drivers/gpio/gpio-em.c
index 17a243c528ad..90b336e6ee27 100644
--- a/drivers/gpio/gpio-em.c
+++ b/drivers/gpio/gpio-em.c
@@ -173,7 +173,7 @@ static irqreturn_t em_gio_irq_handler(int irq, void *dev_id)
while ((pending = em_gio_read(p, GIO_MST))) {
offset = __ffs(pending);
em_gio_write(p, GIO_IIR, BIT(offset));
- generic_handle_irq(irq_find_mapping(p->irq_domain, offset));
+ generic_handle_domain_irq(p->irq_domain, offset);
irqs_handled++;
}
diff --git a/drivers/gpio/gpio-ep93xx.c b/drivers/gpio/gpio-ep93xx.c
index ef148b26b587..2e1779709113 100644
--- a/drivers/gpio/gpio-ep93xx.c
+++ b/drivers/gpio/gpio-ep93xx.c
@@ -128,13 +128,13 @@ static void ep93xx_gpio_ab_irq_handler(struct irq_desc *desc)
*/
stat = readb(epg->base + EP93XX_GPIO_A_INT_STATUS);
for_each_set_bit(offset, &stat, 8)
- generic_handle_irq(irq_find_mapping(epg->gc[0].gc.irq.domain,
- offset));
+ generic_handle_domain_irq(epg->gc[0].gc.irq.domain,
+ offset);
stat = readb(epg->base + EP93XX_GPIO_B_INT_STATUS);
for_each_set_bit(offset, &stat, 8)
- generic_handle_irq(irq_find_mapping(epg->gc[1].gc.irq.domain,
- offset));
+ generic_handle_domain_irq(epg->gc[1].gc.irq.domain,
+ offset);
chained_irq_exit(irqchip, desc);
}
diff --git a/drivers/gpio/gpio-ftgpio010.c b/drivers/gpio/gpio-ftgpio010.c
index 4031164780f7..b90a45c939a4 100644
--- a/drivers/gpio/gpio-ftgpio010.c
+++ b/drivers/gpio/gpio-ftgpio010.c
@@ -149,8 +149,7 @@ static void ftgpio_gpio_irq_handler(struct irq_desc *desc)
stat = readl(g->base + GPIO_INT_STAT_RAW);
if (stat)
for_each_set_bit(offset, &stat, gc->ngpio)
- generic_handle_irq(irq_find_mapping(gc->irq.domain,
- offset));
+ generic_handle_domain_irq(gc->irq.domain, offset);
chained_irq_exit(irqchip, desc);
}
diff --git a/drivers/gpio/gpio-hisi.c b/drivers/gpio/gpio-hisi.c
index ad3d4da25160..3caabef5c7a2 100644
--- a/drivers/gpio/gpio-hisi.c
+++ b/drivers/gpio/gpio-hisi.c
@@ -186,8 +186,8 @@ static void hisi_gpio_irq_handler(struct irq_desc *desc)
chained_irq_enter(irq_c, desc);
for_each_set_bit(hwirq, &irq_msk, HISI_GPIO_LINE_NUM_MAX)
- generic_handle_irq(irq_find_mapping(hisi_gpio->chip.irq.domain,
- hwirq));
+ generic_handle_domain_irq(hisi_gpio->chip.irq.domain,
+ hwirq);
chained_irq_exit(irq_c, desc);
}
diff --git a/drivers/gpio/gpio-hlwd.c b/drivers/gpio/gpio-hlwd.c
index 4a17599f6d44..641719a96a1a 100644
--- a/drivers/gpio/gpio-hlwd.c
+++ b/drivers/gpio/gpio-hlwd.c
@@ -97,11 +97,8 @@ static void hlwd_gpio_irqhandler(struct irq_desc *desc)
chained_irq_enter(chip, desc);
- for_each_set_bit(hwirq, &pending, 32) {
- int irq = irq_find_mapping(hlwd->gpioc.irq.domain, hwirq);
-
- generic_handle_irq(irq);
- }
+ for_each_set_bit(hwirq, &pending, 32)
+ generic_handle_domain_irq(hlwd->gpioc.irq.domain, hwirq);
chained_irq_exit(chip, desc);
}
diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c
index 22f3ce218f5d..42c4d9d0cd50 100644
--- a/drivers/gpio/gpio-merrifield.c
+++ b/drivers/gpio/gpio-merrifield.c
@@ -359,12 +359,8 @@ static void mrfld_irq_handler(struct irq_desc *desc)
/* Only interrupts that are enabled */
pending &= enabled;
- for_each_set_bit(gpio, &pending, 32) {
- unsigned int irq;
-
- irq = irq_find_mapping(gc->irq.domain, base + gpio);
- generic_handle_irq(irq);
- }
+ for_each_set_bit(gpio, &pending, 32)
+ generic_handle_domain_irq(gc->irq.domain, base + gpio);
}
chained_irq_exit(irqchip, desc);
diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c
index 50b321a1ab1b..67dc38976ab6 100644
--- a/drivers/gpio/gpio-mpc8xxx.c
+++ b/drivers/gpio/gpio-mpc8xxx.c
@@ -120,7 +120,7 @@ static irqreturn_t mpc8xxx_gpio_irq_cascade(int irq, void *data)
mask = gc->read_reg(mpc8xxx_gc->regs + GPIO_IER)
& gc->read_reg(mpc8xxx_gc->regs + GPIO_IMR);
for_each_set_bit(i, &mask, 32)
- generic_handle_irq(irq_linear_revmap(mpc8xxx_gc->irq, 31 - i));
+ generic_handle_domain_irq(mpc8xxx_gc->irq, 31 - i);
return IRQ_HANDLED;
}
diff --git a/drivers/gpio/gpio-mt7621.c b/drivers/gpio/gpio-mt7621.c
index 82fb20dca53a..10c0a9bc5ea1 100644
--- a/drivers/gpio/gpio-mt7621.c
+++ b/drivers/gpio/gpio-mt7621.c
@@ -95,9 +95,7 @@ mediatek_gpio_irq_handler(int irq, void *data)
pending = mtk_gpio_r32(rg, GPIO_REG_STAT);
for_each_set_bit(bit, &pending, MTK_BANK_WIDTH) {
- u32 map = irq_find_mapping(gc->irq.domain, bit);
-
- generic_handle_irq(map);
+ generic_handle_domain_irq(gc->irq.domain, bit);
mtk_gpio_w32(rg, GPIO_REG_STAT, BIT(bit));
ret |= IRQ_HANDLED;
}
diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c
index b9fdf05d7669..c871602fc5ba 100644
--- a/drivers/gpio/gpio-mxc.c
+++ b/drivers/gpio/gpio-mxc.c
@@ -241,7 +241,7 @@ static void mxc_gpio_irq_handler(struct mxc_gpio_port *port, u32 irq_stat)
if (port->both_edges & (1 << irqoffset))
mxc_flip_edge(port, irqoffset);
- generic_handle_irq(irq_find_mapping(port->domain, irqoffset));
+ generic_handle_domain_irq(port->domain, irqoffset);
irq_stat &= ~(1 << irqoffset);
}
diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c
index 31a336b86ff2..c5166cd47c9c 100644
--- a/drivers/gpio/gpio-mxs.c
+++ b/drivers/gpio/gpio-mxs.c
@@ -157,7 +157,7 @@ static void mxs_gpio_irq_handler(struct irq_desc *desc)
if (port->both_edges & (1 << irqoffset))
mxs_flip_edge(port, irqoffset);
- generic_handle_irq(irq_find_mapping(port->domain, irqoffset));
+ generic_handle_domain_irq(port->domain, irqoffset);
irq_stat &= ~(1 << irqoffset);
}
}
diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index ca23f72165ca..415e8df89d6f 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -611,8 +611,7 @@ static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank)
raw_spin_lock_irqsave(&bank->wa_lock, wa_lock_flags);
- generic_handle_irq(irq_find_mapping(bank->chip.irq.domain,
- bit));
+ generic_handle_domain_irq(bank->chip.irq.domain, bit);
raw_spin_unlock_irqrestore(&bank->wa_lock,
wa_lock_flags);
diff --git a/drivers/gpio/gpio-pci-idio-16.c b/drivers/gpio/gpio-pci-idio-16.c
index 9acec76e0b51..71a13a394050 100644
--- a/drivers/gpio/gpio-pci-idio-16.c
+++ b/drivers/gpio/gpio-pci-idio-16.c
@@ -260,7 +260,7 @@ static irqreturn_t idio_16_irq_handler(int irq, void *dev_id)
return IRQ_NONE;
for_each_set_bit(gpio, &idio16gpio->irq_mask, chip->ngpio)
- generic_handle_irq(irq_find_mapping(chip->irq.domain, gpio));
+ generic_handle_domain_irq(chip->irq.domain, gpio);
raw_spin_lock(&idio16gpio->lock);
diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c
index 2a07fd96707e..8a9b98fa418f 100644
--- a/drivers/gpio/gpio-pcie-idio-24.c
+++ b/drivers/gpio/gpio-pcie-idio-24.c
@@ -468,8 +468,7 @@ static irqreturn_t idio_24_irq_handler(int irq, void *dev_id)
irq_mask = idio24gpio->irq_mask & irq_status;
for_each_set_bit(gpio, &irq_mask, chip->ngpio - 24)
- generic_handle_irq(irq_find_mapping(chip->irq.domain,
- gpio + 24));
+ generic_handle_domain_irq(chip->irq.domain, gpio + 24);
raw_spin_lock(&idio24gpio->lock);
diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c
index f1b53dd1df1a..4ecab700f23f 100644
--- a/drivers/gpio/gpio-pl061.c
+++ b/drivers/gpio/gpio-pl061.c
@@ -223,8 +223,8 @@ static void pl061_irq_handler(struct irq_desc *desc)
pending = readb(pl061->base + GPIOMIS);
if (pending) {
for_each_set_bit(offset, &pending, PL061_GPIO_NR)
- generic_handle_irq(irq_find_mapping(gc->irq.domain,
- offset));
+ generic_handle_domain_irq(gc->irq.domain,
+ offset);
}
chained_irq_exit(irqchip, desc);
diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c
index 0cb6600b8eee..382468e294e1 100644
--- a/drivers/gpio/gpio-pxa.c
+++ b/drivers/gpio/gpio-pxa.c
@@ -455,9 +455,8 @@ static irqreturn_t pxa_gpio_demux_handler(int in_irq, void *d)
for_each_set_bit(n, &gedr, BITS_PER_LONG) {
loop = 1;
- generic_handle_irq(
- irq_find_mapping(pchip->irqdomain,
- gpio + n));
+ generic_handle_domain_irq(pchip->irqdomain,
+ gpio + n);
}
}
handled += loop;
@@ -471,9 +470,9 @@ static irqreturn_t pxa_gpio_direct_handler(int in_irq, void *d)
struct pxa_gpio_chip *pchip = d;
if (in_irq == pchip->irq0) {
- generic_handle_irq(irq_find_mapping(pchip->irqdomain, 0));
+ generic_handle_domain_irq(pchip->irqdomain, 0);
} else if (in_irq == pchip->irq1) {
- generic_handle_irq(irq_find_mapping(pchip->irqdomain, 1));
+ generic_handle_domain_irq(pchip->irqdomain, 1);
} else {
pr_err("%s() unknown irq %d\n", __func__, in_irq);
return IRQ_NONE;
diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c
index e7092d5fe700..b378aba32602 100644
--- a/drivers/gpio/gpio-rcar.c
+++ b/drivers/gpio/gpio-rcar.c
@@ -213,8 +213,8 @@ static irqreturn_t gpio_rcar_irq_handler(int irq, void *dev_id)
gpio_rcar_read(p, INTMSK))) {
offset = __ffs(pending);
gpio_rcar_write(p, INTCLR, BIT(offset));
- generic_handle_irq(irq_find_mapping(p->gpio_chip.irq.domain,
- offset));
+ generic_handle_domain_irq(p->gpio_chip.irq.domain,
+ offset);
irqs_handled++;
}
diff --git a/drivers/gpio/gpio-rda.c b/drivers/gpio/gpio-rda.c
index 28dcbb58b76b..463846431183 100644
--- a/drivers/gpio/gpio-rda.c
+++ b/drivers/gpio/gpio-rda.c
@@ -181,7 +181,7 @@ static void rda_gpio_irq_handler(struct irq_desc *desc)
struct irq_chip *ic = irq_desc_get_chip(desc);
struct rda_gpio *rda_gpio = gpiochip_get_data(chip);
unsigned long status;
- u32 n, girq;
+ u32 n;
chained_irq_enter(ic, desc);
@@ -189,10 +189,8 @@ static void rda_gpio_irq_handler(struct irq_desc *desc)
/* Only lower 8 bits are capable of generating interrupts */
status &= RDA_GPIO_IRQ_MASK;
- for_each_set_bit(n, &status, RDA_GPIO_BANK_NR) {
- girq = irq_find_mapping(chip->irq.domain, n);
- generic_handle_irq(girq);
- }
+ for_each_set_bit(n, &status, RDA_GPIO_BANK_NR)
+ generic_handle_domain_irq(chip->irq.domain, n);
chained_irq_exit(ic, desc);
}
diff --git a/drivers/gpio/gpio-realtek-otto.c b/drivers/gpio/gpio-realtek-otto.c
index cb64fb5a51aa..eeeb39bc171d 100644
--- a/drivers/gpio/gpio-realtek-otto.c
+++ b/drivers/gpio/gpio-realtek-otto.c
@@ -196,7 +196,6 @@ static void realtek_gpio_irq_handler(struct irq_desc *desc)
struct irq_chip *irq_chip = irq_desc_get_chip(desc);
unsigned int lines_done;
unsigned int port_pin_count;
- unsigned int irq;
unsigned long status;
int offset;
@@ -205,10 +204,8 @@ static void realtek_gpio_irq_handler(struct irq_desc *desc)
for (lines_done = 0; lines_done < gc->ngpio; lines_done += 8) {
status = realtek_gpio_read_isr(ctrl, lines_done / 8);
port_pin_count = min(gc->ngpio - lines_done, 8U);
- for_each_set_bit(offset, &status, port_pin_count) {
- irq = irq_find_mapping(gc->irq.domain, offset);
- generic_handle_irq(irq);
- }
+ for_each_set_bit(offset, &status, port_pin_count)
+ generic_handle_domain_irq(gc->irq.domain, offset);
}
chained_irq_exit(irq_chip, desc);
diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c
new file mode 100644
index 000000000000..036b2d959503
--- /dev/null
+++ b/drivers/gpio/gpio-rockchip.c
@@ -0,0 +1,771 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2013 MundoReader S.L.
+ * Author: Heiko Stuebner <heiko@sntech.de>
+ *
+ * Copyright (c) 2021 Rockchip Electronics Co. Ltd.
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/gpio/driver.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/regmap.h>
+
+#include "../pinctrl/core.h"
+#include "../pinctrl/pinctrl-rockchip.h"
+
+#define GPIO_TYPE_V1 (0) /* GPIO Version ID reserved */
+#define GPIO_TYPE_V2 (0x01000C2B) /* GPIO Version ID 0x01000C2B */
+
+static const struct rockchip_gpio_regs gpio_regs_v1 = {
+ .port_dr = 0x00,
+ .port_ddr = 0x04,
+ .int_en = 0x30,
+ .int_mask = 0x34,
+ .int_type = 0x38,
+ .int_polarity = 0x3c,
+ .int_status = 0x40,
+ .int_rawstatus = 0x44,
+ .debounce = 0x48,
+ .port_eoi = 0x4c,
+ .ext_port = 0x50,
+};
+
+static const struct rockchip_gpio_regs gpio_regs_v2 = {
+ .port_dr = 0x00,
+ .port_ddr = 0x08,
+ .int_en = 0x10,
+ .int_mask = 0x18,
+ .int_type = 0x20,
+ .int_polarity = 0x28,
+ .int_bothedge = 0x30,
+ .int_status = 0x50,
+ .int_rawstatus = 0x58,
+ .debounce = 0x38,
+ .dbclk_div_en = 0x40,
+ .dbclk_div_con = 0x48,
+ .port_eoi = 0x60,
+ .ext_port = 0x70,
+ .version_id = 0x78,
+};
+
+static inline void gpio_writel_v2(u32 val, void __iomem *reg)
+{
+ writel((val & 0xffff) | 0xffff0000, reg);
+ writel((val >> 16) | 0xffff0000, reg + 0x4);
+}
+
+static inline u32 gpio_readl_v2(void __iomem *reg)
+{
+ return readl(reg + 0x4) << 16 | readl(reg);
+}
+
+static inline void rockchip_gpio_writel(struct rockchip_pin_bank *bank,
+ u32 value, unsigned int offset)
+{
+ void __iomem *reg = bank->reg_base + offset;
+
+ if (bank->gpio_type == GPIO_TYPE_V2)
+ gpio_writel_v2(value, reg);
+ else
+ writel(value, reg);
+}
+
+static inline u32 rockchip_gpio_readl(struct rockchip_pin_bank *bank,
+ unsigned int offset)
+{
+ void __iomem *reg = bank->reg_base + offset;
+ u32 value;
+
+ if (bank->gpio_type == GPIO_TYPE_V2)
+ value = gpio_readl_v2(reg);
+ else
+ value = readl(reg);
+
+ return value;
+}
+
+static inline void rockchip_gpio_writel_bit(struct rockchip_pin_bank *bank,
+ u32 bit, u32 value,
+ unsigned int offset)
+{
+ void __iomem *reg = bank->reg_base + offset;
+ u32 data;
+
+ if (bank->gpio_type == GPIO_TYPE_V2) {
+ if (value)
+ data = BIT(bit % 16) | BIT(bit % 16 + 16);
+ else
+ data = BIT(bit % 16 + 16);
+ writel(data, bit >= 16 ? reg + 0x4 : reg);
+ } else {
+ data = readl(reg);
+ data &= ~BIT(bit);
+ if (value)
+ data |= BIT(bit);
+ writel(data, reg);
+ }
+}
+
+static inline u32 rockchip_gpio_readl_bit(struct rockchip_pin_bank *bank,
+ u32 bit, unsigned int offset)
+{
+ void __iomem *reg = bank->reg_base + offset;
+ u32 data;
+
+ if (bank->gpio_type == GPIO_TYPE_V2) {
+ data = readl(bit >= 16 ? reg + 0x4 : reg);
+ data >>= bit % 16;
+ } else {
+ data = readl(reg);
+ data >>= bit;
+ }
+
+ return data & (0x1);
+}
+
+static int rockchip_gpio_get_direction(struct gpio_chip *chip,
+ unsigned int offset)
+{
+ struct rockchip_pin_bank *bank = gpiochip_get_data(chip);
+ u32 data;
+
+ data = rockchip_gpio_readl_bit(bank, offset, bank->gpio_regs->port_ddr);
+ if (data & BIT(offset))
+ return GPIO_LINE_DIRECTION_OUT;
+
+ return GPIO_LINE_DIRECTION_IN;
+}
+
+static int rockchip_gpio_set_direction(struct gpio_chip *chip,
+ unsigned int offset, bool input)
+{
+ struct rockchip_pin_bank *bank = gpiochip_get_data(chip);
+ unsigned long flags;
+ u32 data = input ? 0 : 1;
+
+ raw_spin_lock_irqsave(&bank->slock, flags);
+ rockchip_gpio_writel_bit(bank, offset, data, bank->gpio_regs->port_ddr);
+ raw_spin_unlock_irqrestore(&bank->slock, flags);
+
+ return 0;
+}
+
+static void rockchip_gpio_set(struct gpio_chip *gc, unsigned int offset,
+ int value)
+{
+ struct rockchip_pin_bank *bank = gpiochip_get_data(gc);
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&bank->slock, flags);
+ rockchip_gpio_writel_bit(bank, offset, value, bank->gpio_regs->port_dr);
+ raw_spin_unlock_irqrestore(&bank->slock, flags);
+}
+
+static int rockchip_gpio_get(struct gpio_chip *gc, unsigned int offset)
+{
+ struct rockchip_pin_bank *bank = gpiochip_get_data(gc);
+ u32 data;
+
+ data = readl(bank->reg_base + bank->gpio_regs->ext_port);
+ data >>= offset;
+ data &= 1;
+
+ return data;
+}
+
+static int rockchip_gpio_set_debounce(struct gpio_chip *gc,
+ unsigned int offset,
+ unsigned int debounce)
+{
+ struct rockchip_pin_bank *bank = gpiochip_get_data(gc);
+ const struct rockchip_gpio_regs *reg = bank->gpio_regs;
+ unsigned long flags, div_reg, freq, max_debounce;
+ bool div_debounce_support;
+ unsigned int cur_div_reg;
+ u64 div;
+
+ if (!IS_ERR(bank->db_clk)) {
+ div_debounce_support = true;
+ freq = clk_get_rate(bank->db_clk);
+ max_debounce = (GENMASK(23, 0) + 1) * 2 * 1000000 / freq;
+ if (debounce > max_debounce)
+ return -EINVAL;
+
+ div = debounce * freq;
+ div_reg = DIV_ROUND_CLOSEST_ULL(div, 2 * USEC_PER_SEC) - 1;
+ } else {
+ div_debounce_support = false;
+ }
+
+ raw_spin_lock_irqsave(&bank->slock, flags);
+
+ /* Only the v1 needs to configure div_en and div_con for dbclk */
+ if (debounce) {
+ if (div_debounce_support) {
+ /* Configure the max debounce from consumers */
+ cur_div_reg = readl(bank->reg_base +
+ reg->dbclk_div_con);
+ if (cur_div_reg < div_reg)
+ writel(div_reg, bank->reg_base +
+ reg->dbclk_div_con);
+ rockchip_gpio_writel_bit(bank, offset, 1,
+ reg->dbclk_div_en);
+ }
+
+ rockchip_gpio_writel_bit(bank, offset, 1, reg->debounce);
+ } else {
+ if (div_debounce_support)
+ rockchip_gpio_writel_bit(bank, offset, 0,
+ reg->dbclk_div_en);
+
+ rockchip_gpio_writel_bit(bank, offset, 0, reg->debounce);
+ }
+
+ raw_spin_unlock_irqrestore(&bank->slock, flags);
+
+ /* Enable or disable dbclk at last */
+ if (div_debounce_support) {
+ if (debounce)
+ clk_prepare_enable(bank->db_clk);
+ else
+ clk_disable_unprepare(bank->db_clk);
+ }
+
+ return 0;
+}
+
+static int rockchip_gpio_direction_input(struct gpio_chip *gc,
+ unsigned int offset)
+{
+ return rockchip_gpio_set_direction(gc, offset, true);
+}
+
+static int rockchip_gpio_direction_output(struct gpio_chip *gc,
+ unsigned int offset, int value)
+{
+ rockchip_gpio_set(gc, offset, value);
+
+ return rockchip_gpio_set_direction(gc, offset, false);
+}
+
+/*
+ * gpiolib set_config callback function. The setting of the pin
+ * mux function as 'gpio output' will be handled by the pinctrl subsystem
+ * interface.
+ */
+static int rockchip_gpio_set_config(struct gpio_chip *gc, unsigned int offset,
+ unsigned long config)
+{
+ enum pin_config_param param = pinconf_to_config_param(config);
+
+ switch (param) {
+ case PIN_CONFIG_INPUT_DEBOUNCE:
+ rockchip_gpio_set_debounce(gc, offset, true);
+ /*
+ * Rockchip's gpio could only support up to one period
+ * of the debounce clock(pclk), which is far away from
+ * satisftying the requirement, as pclk is usually near
+ * 100MHz shared by all peripherals. So the fact is it
+ * has crippled debounce capability could only be useful
+ * to prevent any spurious glitches from waking up the system
+ * if the gpio is conguired as wakeup interrupt source. Let's
+ * still return -ENOTSUPP as before, to make sure the caller
+ * of gpiod_set_debounce won't change its behaviour.
+ */
+ return -ENOTSUPP;
+ default:
+ return -ENOTSUPP;
+ }
+}
+
+/*
+ * gpiolib gpio_to_irq callback function. Creates a mapping between a GPIO pin
+ * and a virtual IRQ, if not already present.
+ */
+static int rockchip_gpio_to_irq(struct gpio_chip *gc, unsigned int offset)
+{
+ struct rockchip_pin_bank *bank = gpiochip_get_data(gc);
+ unsigned int virq;
+
+ if (!bank->domain)
+ return -ENXIO;
+
+ virq = irq_create_mapping(bank->domain, offset);
+
+ return (virq) ? : -ENXIO;
+}
+
+static const struct gpio_chip rockchip_gpiolib_chip = {
+ .request = gpiochip_generic_request,
+ .free = gpiochip_generic_free,
+ .set = rockchip_gpio_set,
+ .get = rockchip_gpio_get,
+ .get_direction = rockchip_gpio_get_direction,
+ .direction_input = rockchip_gpio_direction_input,
+ .direction_output = rockchip_gpio_direction_output,
+ .set_config = rockchip_gpio_set_config,
+ .to_irq = rockchip_gpio_to_irq,
+ .owner = THIS_MODULE,
+};
+
+static void rockchip_irq_demux(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct rockchip_pin_bank *bank = irq_desc_get_handler_data(desc);
+ u32 pend;
+
+ dev_dbg(bank->dev, "got irq for bank %s\n", bank->name);
+
+ chained_irq_enter(chip, desc);
+
+ pend = readl_relaxed(bank->reg_base + bank->gpio_regs->int_status);
+
+ while (pend) {
+ unsigned int irq, virq;
+
+ irq = __ffs(pend);
+ pend &= ~BIT(irq);
+ virq = irq_find_mapping(bank->domain, irq);
+
+ if (!virq) {
+ dev_err(bank->dev, "unmapped irq %d\n", irq);
+ continue;
+ }
+
+ dev_dbg(bank->dev, "handling irq %d\n", irq);
+
+ /*
+ * Triggering IRQ on both rising and falling edge
+ * needs manual intervention.
+ */
+ if (bank->toggle_edge_mode & BIT(irq)) {
+ u32 data, data_old, polarity;
+ unsigned long flags;
+
+ data = readl_relaxed(bank->reg_base +
+ bank->gpio_regs->ext_port);
+ do {
+ raw_spin_lock_irqsave(&bank->slock, flags);
+
+ polarity = readl_relaxed(bank->reg_base +
+ bank->gpio_regs->int_polarity);
+ if (data & BIT(irq))
+ polarity &= ~BIT(irq);
+ else
+ polarity |= BIT(irq);
+ writel(polarity,
+ bank->reg_base +
+ bank->gpio_regs->int_polarity);
+
+ raw_spin_unlock_irqrestore(&bank->slock, flags);
+
+ data_old = data;
+ data = readl_relaxed(bank->reg_base +
+ bank->gpio_regs->ext_port);
+ } while ((data & BIT(irq)) != (data_old & BIT(irq)));
+ }
+
+ generic_handle_irq(virq);
+ }
+
+ chained_irq_exit(chip, desc);
+}
+
+static int rockchip_irq_set_type(struct irq_data *d, unsigned int type)
+{
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ struct rockchip_pin_bank *bank = gc->private;
+ u32 mask = BIT(d->hwirq);
+ u32 polarity;
+ u32 level;
+ u32 data;
+ unsigned long flags;
+ int ret = 0;
+
+ raw_spin_lock_irqsave(&bank->slock, flags);
+
+ rockchip_gpio_writel_bit(bank, d->hwirq, 0,
+ bank->gpio_regs->port_ddr);
+
+ raw_spin_unlock_irqrestore(&bank->slock, flags);
+
+ if (type & IRQ_TYPE_EDGE_BOTH)
+ irq_set_handler_locked(d, handle_edge_irq);
+ else
+ irq_set_handler_locked(d, handle_level_irq);
+
+ raw_spin_lock_irqsave(&bank->slock, flags);
+
+ level = rockchip_gpio_readl(bank, bank->gpio_regs->int_type);
+ polarity = rockchip_gpio_readl(bank, bank->gpio_regs->int_polarity);
+
+ switch (type) {
+ case IRQ_TYPE_EDGE_BOTH:
+ if (bank->gpio_type == GPIO_TYPE_V2) {
+ bank->toggle_edge_mode &= ~mask;
+ rockchip_gpio_writel_bit(bank, d->hwirq, 1,
+ bank->gpio_regs->int_bothedge);
+ goto out;
+ } else {
+ bank->toggle_edge_mode |= mask;
+ level |= mask;
+
+ /*
+ * Determine gpio state. If 1 next interrupt should be
+ * falling otherwise rising.
+ */
+ data = readl(bank->reg_base + bank->gpio_regs->ext_port);
+ if (data & mask)
+ polarity &= ~mask;
+ else
+ polarity |= mask;
+ }
+ break;
+ case IRQ_TYPE_EDGE_RISING:
+ bank->toggle_edge_mode &= ~mask;
+ level |= mask;
+ polarity |= mask;
+ break;
+ case IRQ_TYPE_EDGE_FALLING:
+ bank->toggle_edge_mode &= ~mask;
+ level |= mask;
+ polarity &= ~mask;
+ break;
+ case IRQ_TYPE_LEVEL_HIGH:
+ bank->toggle_edge_mode &= ~mask;
+ level &= ~mask;
+ polarity |= mask;
+ break;
+ case IRQ_TYPE_LEVEL_LOW:
+ bank->toggle_edge_mode &= ~mask;
+ level &= ~mask;
+ polarity &= ~mask;
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+
+ rockchip_gpio_writel(bank, level, bank->gpio_regs->int_type);
+ rockchip_gpio_writel(bank, polarity, bank->gpio_regs->int_polarity);
+out:
+ raw_spin_unlock_irqrestore(&bank->slock, flags);
+
+ return ret;
+}
+
+static void rockchip_irq_suspend(struct irq_data *d)
+{
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ struct rockchip_pin_bank *bank = gc->private;
+
+ bank->saved_masks = irq_reg_readl(gc, bank->gpio_regs->int_mask);
+ irq_reg_writel(gc, ~gc->wake_active, bank->gpio_regs->int_mask);
+}
+
+static void rockchip_irq_resume(struct irq_data *d)
+{
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ struct rockchip_pin_bank *bank = gc->private;
+
+ irq_reg_writel(gc, bank->saved_masks, bank->gpio_regs->int_mask);
+}
+
+static void rockchip_irq_enable(struct irq_data *d)
+{
+ irq_gc_mask_clr_bit(d);
+}
+
+static void rockchip_irq_disable(struct irq_data *d)
+{
+ irq_gc_mask_set_bit(d);
+}
+
+static int rockchip_interrupts_register(struct rockchip_pin_bank *bank)
+{
+ unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
+ struct irq_chip_generic *gc;
+ int ret;
+
+ bank->domain = irq_domain_add_linear(bank->of_node, 32,
+ &irq_generic_chip_ops, NULL);
+ if (!bank->domain) {
+ dev_warn(bank->dev, "could not init irq domain for bank %s\n",
+ bank->name);
+ return -EINVAL;
+ }
+
+ ret = irq_alloc_domain_generic_chips(bank->domain, 32, 1,
+ "rockchip_gpio_irq",
+ handle_level_irq,
+ clr, 0, 0);
+ if (ret) {
+ dev_err(bank->dev, "could not alloc generic chips for bank %s\n",
+ bank->name);
+ irq_domain_remove(bank->domain);
+ return -EINVAL;
+ }
+
+ gc = irq_get_domain_generic_chip(bank->domain, 0);
+ if (bank->gpio_type == GPIO_TYPE_V2) {
+ gc->reg_writel = gpio_writel_v2;
+ gc->reg_readl = gpio_readl_v2;
+ }
+
+ gc->reg_base = bank->reg_base;
+ gc->private = bank;
+ gc->chip_types[0].regs.mask = bank->gpio_regs->int_mask;
+ gc->chip_types[0].regs.ack = bank->gpio_regs->port_eoi;
+ gc->chip_types[0].chip.irq_ack = irq_gc_ack_set_bit;
+ gc->chip_types[0].chip.irq_mask = irq_gc_mask_set_bit;
+ gc->chip_types[0].chip.irq_unmask = irq_gc_mask_clr_bit;
+ gc->chip_types[0].chip.irq_enable = rockchip_irq_enable;
+ gc->chip_types[0].chip.irq_disable = rockchip_irq_disable;
+ gc->chip_types[0].chip.irq_set_wake = irq_gc_set_wake;
+ gc->chip_types[0].chip.irq_suspend = rockchip_irq_suspend;
+ gc->chip_types[0].chip.irq_resume = rockchip_irq_resume;
+ gc->chip_types[0].chip.irq_set_type = rockchip_irq_set_type;
+ gc->wake_enabled = IRQ_MSK(bank->nr_pins);
+
+ /*
+ * Linux assumes that all interrupts start out disabled/masked.
+ * Our driver only uses the concept of masked and always keeps
+ * things enabled, so for us that's all masked and all enabled.
+ */
+ rockchip_gpio_writel(bank, 0xffffffff, bank->gpio_regs->int_mask);
+ rockchip_gpio_writel(bank, 0xffffffff, bank->gpio_regs->port_eoi);
+ rockchip_gpio_writel(bank, 0xffffffff, bank->gpio_regs->int_en);
+ gc->mask_cache = 0xffffffff;
+
+ irq_set_chained_handler_and_data(bank->irq,
+ rockchip_irq_demux, bank);
+
+ return 0;
+}
+
+static int rockchip_gpiolib_register(struct rockchip_pin_bank *bank)
+{
+ struct gpio_chip *gc;
+ int ret;
+
+ bank->gpio_chip = rockchip_gpiolib_chip;
+
+ gc = &bank->gpio_chip;
+ gc->base = bank->pin_base;
+ gc->ngpio = bank->nr_pins;
+ gc->label = bank->name;
+ gc->parent = bank->dev;
+#ifdef CONFIG_OF_GPIO
+ gc->of_node = of_node_get(bank->of_node);
+#endif
+
+ ret = gpiochip_add_data(gc, bank);
+ if (ret) {
+ dev_err(bank->dev, "failed to add gpiochip %s, %d\n",
+ gc->label, ret);
+ return ret;
+ }
+
+ /*
+ * For DeviceTree-supported systems, the gpio core checks the
+ * pinctrl's device node for the "gpio-ranges" property.
+ * If it is present, it takes care of adding the pin ranges
+ * for the driver. In this case the driver can skip ahead.
+ *
+ * In order to remain compatible with older, existing DeviceTree
+ * files which don't set the "gpio-ranges" property or systems that
+ * utilize ACPI the driver has to call gpiochip_add_pin_range().
+ */
+ if (!of_property_read_bool(bank->of_node, "gpio-ranges")) {
+ struct device_node *pctlnp = of_get_parent(bank->of_node);
+ struct pinctrl_dev *pctldev = NULL;
+
+ if (!pctlnp)
+ return -ENODATA;
+
+ pctldev = of_pinctrl_get(pctlnp);
+ if (!pctldev)
+ return -ENODEV;
+
+ ret = gpiochip_add_pin_range(gc, dev_name(pctldev->dev), 0,
+ gc->base, gc->ngpio);
+ if (ret) {
+ dev_err(bank->dev, "Failed to add pin range\n");
+ goto fail;
+ }
+ }
+
+ ret = rockchip_interrupts_register(bank);
+ if (ret) {
+ dev_err(bank->dev, "failed to register interrupt, %d\n", ret);
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ gpiochip_remove(&bank->gpio_chip);
+
+ return ret;
+}
+
+static int rockchip_get_bank_data(struct rockchip_pin_bank *bank)
+{
+ struct resource res;
+ int id = 0;
+
+ if (of_address_to_resource(bank->of_node, 0, &res)) {
+ dev_err(bank->dev, "cannot find IO resource for bank\n");
+ return -ENOENT;
+ }
+
+ bank->reg_base = devm_ioremap_resource(bank->dev, &res);
+ if (IS_ERR(bank->reg_base))
+ return PTR_ERR(bank->reg_base);
+
+ bank->irq = irq_of_parse_and_map(bank->of_node, 0);
+ if (!bank->irq)
+ return -EINVAL;
+
+ bank->clk = of_clk_get(bank->of_node, 0);
+ if (IS_ERR(bank->clk))
+ return PTR_ERR(bank->clk);
+
+ clk_prepare_enable(bank->clk);
+ id = readl(bank->reg_base + gpio_regs_v2.version_id);
+
+ /* If not gpio v2, that is default to v1. */
+ if (id == GPIO_TYPE_V2) {
+ bank->gpio_regs = &gpio_regs_v2;
+ bank->gpio_type = GPIO_TYPE_V2;
+ bank->db_clk = of_clk_get(bank->of_node, 1);
+ if (IS_ERR(bank->db_clk)) {
+ dev_err(bank->dev, "cannot find debounce clk\n");
+ clk_disable_unprepare(bank->clk);
+ return -EINVAL;
+ }
+ } else {
+ bank->gpio_regs = &gpio_regs_v1;
+ bank->gpio_type = GPIO_TYPE_V1;
+ }
+
+ return 0;
+}
+
+static struct rockchip_pin_bank *
+rockchip_gpio_find_bank(struct pinctrl_dev *pctldev, int id)
+{
+ struct rockchip_pinctrl *info;
+ struct rockchip_pin_bank *bank;
+ int i, found = 0;
+
+ info = pinctrl_dev_get_drvdata(pctldev);
+ bank = info->ctrl->pin_banks;
+ for (i = 0; i < info->ctrl->nr_banks; i++, bank++) {
+ if (bank->bank_num == id) {
+ found = 1;
+ break;
+ }
+ }
+
+ return found ? bank : NULL;
+}
+
+static int rockchip_gpio_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
+ struct device_node *pctlnp = of_get_parent(np);
+ struct pinctrl_dev *pctldev = NULL;
+ struct rockchip_pin_bank *bank = NULL;
+ static int gpio;
+ int id, ret;
+
+ if (!np || !pctlnp)
+ return -ENODEV;
+
+ pctldev = of_pinctrl_get(pctlnp);
+ if (!pctldev)
+ return -EPROBE_DEFER;
+
+ id = of_alias_get_id(np, "gpio");
+ if (id < 0)
+ id = gpio++;
+
+ bank = rockchip_gpio_find_bank(pctldev, id);
+ if (!bank)
+ return -EINVAL;
+
+ bank->dev = dev;
+ bank->of_node = np;
+
+ raw_spin_lock_init(&bank->slock);
+
+ ret = rockchip_get_bank_data(bank);
+ if (ret)
+ return ret;
+
+ ret = rockchip_gpiolib_register(bank);
+ if (ret) {
+ clk_disable_unprepare(bank->clk);
+ return ret;
+ }
+
+ platform_set_drvdata(pdev, bank);
+ dev_info(dev, "probed %pOF\n", np);
+
+ return 0;
+}
+
+static int rockchip_gpio_remove(struct platform_device *pdev)
+{
+ struct rockchip_pin_bank *bank = platform_get_drvdata(pdev);
+
+ clk_disable_unprepare(bank->clk);
+ gpiochip_remove(&bank->gpio_chip);
+
+ return 0;
+}
+
+static const struct of_device_id rockchip_gpio_match[] = {
+ { .compatible = "rockchip,gpio-bank", },
+ { .compatible = "rockchip,rk3188-gpio-bank0" },
+ { },
+};
+
+static struct platform_driver rockchip_gpio_driver = {
+ .probe = rockchip_gpio_probe,
+ .remove = rockchip_gpio_remove,
+ .driver = {
+ .name = "rockchip-gpio",
+ .of_match_table = rockchip_gpio_match,
+ },
+};
+
+static int __init rockchip_gpio_init(void)
+{
+ return platform_driver_register(&rockchip_gpio_driver);
+}
+postcore_initcall(rockchip_gpio_init);
+
+static void __exit rockchip_gpio_exit(void)
+{
+ platform_driver_unregister(&rockchip_gpio_driver);
+}
+module_exit(rockchip_gpio_exit);
+
+MODULE_DESCRIPTION("Rockchip gpio driver");
+MODULE_ALIAS("platform:rockchip-gpio");
+MODULE_LICENSE("GPL v2");
+MODULE_DEVICE_TABLE(of, rockchip_gpio_match);
diff --git a/drivers/gpio/gpio-sch.c b/drivers/gpio/gpio-sch.c
index a6f0421d6e50..0600f71462b5 100644
--- a/drivers/gpio/gpio-sch.c
+++ b/drivers/gpio/gpio-sch.c
@@ -259,7 +259,7 @@ static u32 sch_gpio_gpe_handler(acpi_handle gpe_device, u32 gpe, void *context)
pending = (resume_status << sch->resume_base) | core_status;
for_each_set_bit(offset, &pending, sch->chip.ngpio)
- generic_handle_irq(irq_find_mapping(gc->irq.domain, offset));
+ generic_handle_domain_irq(gc->irq.domain, offset);
/* Set returning value depending on whether we handled an interrupt */
ret = pending ? ACPI_INTERRUPT_HANDLED : ACPI_INTERRUPT_NOT_HANDLED;
diff --git a/drivers/gpio/gpio-sodaville.c b/drivers/gpio/gpio-sodaville.c
index aed988e78251..c2a2c76c1652 100644
--- a/drivers/gpio/gpio-sodaville.c
+++ b/drivers/gpio/gpio-sodaville.c
@@ -84,7 +84,7 @@ static irqreturn_t sdv_gpio_pub_irq_handler(int irq, void *data)
return IRQ_NONE;
for_each_set_bit(irq_bit, &irq_stat, 32)
- generic_handle_irq(irq_find_mapping(sd->id, irq_bit));
+ generic_handle_domain_irq(sd->id, irq_bit);
return IRQ_HANDLED;
}
diff --git a/drivers/gpio/gpio-sprd.c b/drivers/gpio/gpio-sprd.c
index 25c37edcbc6c..9dd9dabb579e 100644
--- a/drivers/gpio/gpio-sprd.c
+++ b/drivers/gpio/gpio-sprd.c
@@ -189,7 +189,7 @@ static void sprd_gpio_irq_handler(struct irq_desc *desc)
struct gpio_chip *chip = irq_desc_get_handler_data(desc);
struct irq_chip *ic = irq_desc_get_chip(desc);
struct sprd_gpio *sprd_gpio = gpiochip_get_data(chip);
- u32 bank, n, girq;
+ u32 bank, n;
chained_irq_enter(ic, desc);
@@ -198,13 +198,9 @@ static void sprd_gpio_irq_handler(struct irq_desc *desc)
unsigned long reg = readl_relaxed(base + SPRD_GPIO_MIS) &
SPRD_GPIO_BANK_MASK;
- for_each_set_bit(n, &reg, SPRD_GPIO_BANK_NR) {
- girq = irq_find_mapping(chip->irq.domain,
- bank * SPRD_GPIO_BANK_NR + n);
-
- generic_handle_irq(girq);
- }
-
+ for_each_set_bit(n, &reg, SPRD_GPIO_BANK_NR)
+ generic_handle_domain_irq(chip->irq.domain,
+ bank * SPRD_GPIO_BANK_NR + n);
}
chained_irq_exit(ic, desc);
}
diff --git a/drivers/gpio/gpio-tb10x.c b/drivers/gpio/gpio-tb10x.c
index 866201cf5f65..718a508d3b2f 100644
--- a/drivers/gpio/gpio-tb10x.c
+++ b/drivers/gpio/gpio-tb10x.c
@@ -100,7 +100,7 @@ static irqreturn_t tb10x_gpio_irq_cascade(int irq, void *data)
int i;
for_each_set_bit(i, &bits, 32)
- generic_handle_irq(irq_find_mapping(tb10x_gpio->domain, i));
+ generic_handle_domain_irq(tb10x_gpio->domain, i);
return IRQ_HANDLED;
}
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index 0025f613d9b3..7f5bc10a6479 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -408,6 +408,8 @@ static void tegra_gpio_irq_handler(struct irq_desc *desc)
lvl = tegra_gpio_readl(tgi, GPIO_INT_LVL(tgi, gpio));
for_each_set_bit(pin, &sta, 8) {
+ int ret;
+
tegra_gpio_writel(tgi, 1 << pin,
GPIO_INT_CLR(tgi, gpio));
@@ -420,11 +422,8 @@ static void tegra_gpio_irq_handler(struct irq_desc *desc)
chained_irq_exit(chip, desc);
}
- irq = irq_find_mapping(domain, gpio + pin);
- if (WARN_ON(irq == 0))
- continue;
-
- generic_handle_irq(irq);
+ ret = generic_handle_domain_irq(domain, gpio + pin);
+ WARN_RATELIMIT(ret, "hwirq = %d", gpio + pin);
}
}
diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c
index d38980b9923a..05c90d76cb22 100644
--- a/drivers/gpio/gpio-tegra186.c
+++ b/drivers/gpio/gpio-tegra186.c
@@ -456,7 +456,7 @@ static void tegra186_gpio_irq(struct irq_desc *desc)
for (i = 0; i < gpio->soc->num_ports; i++) {
const struct tegra_gpio_port *port = &gpio->soc->ports[i];
- unsigned int pin, irq;
+ unsigned int pin;
unsigned long value;
void __iomem *base;
@@ -469,11 +469,8 @@ static void tegra186_gpio_irq(struct irq_desc *desc)
value = readl(base + TEGRA186_GPIO_INTERRUPT_STATUS(1));
for_each_set_bit(pin, &value, port->pins) {
- irq = irq_find_mapping(domain, offset + pin);
- if (WARN_ON(irq == 0))
- continue;
-
- generic_handle_irq(irq);
+ int ret = generic_handle_domain_irq(domain, offset + pin);
+ WARN_RATELIMIT(ret, "hwirq = %d", offset + pin);
}
skip:
diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c
index 0f5d17f343f1..5b103221b58d 100644
--- a/drivers/gpio/gpio-tqmx86.c
+++ b/drivers/gpio/gpio-tqmx86.c
@@ -183,7 +183,7 @@ static void tqmx86_gpio_irq_handler(struct irq_desc *desc)
struct tqmx86_gpio_data *gpio = gpiochip_get_data(chip);
struct irq_chip *irq_chip = irq_desc_get_chip(desc);
unsigned long irq_bits;
- int i = 0, child_irq;
+ int i = 0;
u8 irq_status;
chained_irq_enter(irq_chip, desc);
@@ -192,11 +192,9 @@ static void tqmx86_gpio_irq_handler(struct irq_desc *desc)
tqmx86_gpio_write(gpio, irq_status, TQMX86_GPIIS);
irq_bits = irq_status;
- for_each_set_bit(i, &irq_bits, TQMX86_NGPI) {
- child_irq = irq_find_mapping(gpio->chip.irq.domain,
- i + TQMX86_NGPO);
- generic_handle_irq(child_irq);
- }
+ for_each_set_bit(i, &irq_bits, TQMX86_NGPI)
+ generic_handle_domain_irq(gpio->chip.irq.domain,
+ i + TQMX86_NGPO);
chained_irq_exit(irq_chip, desc);
}
diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c
index 58776f2d69ff..e0f2b67558e7 100644
--- a/drivers/gpio/gpio-vf610.c
+++ b/drivers/gpio/gpio-vf610.c
@@ -149,7 +149,7 @@ static void vf610_gpio_irq_handler(struct irq_desc *desc)
for_each_set_bit(pin, &irq_isfr, VF610_GPIO_PER_PORT) {
vf610_gpio_writel(BIT(pin), port->base + PORT_ISFR);
- generic_handle_irq(irq_find_mapping(port->gc.irq.domain, pin));
+ generic_handle_domain_irq(port->gc.irq.domain, pin);
}
chained_irq_exit(chip, desc);
diff --git a/drivers/gpio/gpio-ws16c48.c b/drivers/gpio/gpio-ws16c48.c
index 2d89d0529135..bb02a82e22f4 100644
--- a/drivers/gpio/gpio-ws16c48.c
+++ b/drivers/gpio/gpio-ws16c48.c
@@ -339,8 +339,8 @@ static irqreturn_t ws16c48_irq_handler(int irq, void *dev_id)
for_each_set_bit(port, &int_pending, 3) {
int_id = inb(ws16c48gpio->base + 8 + port);
for_each_set_bit(gpio, &int_id, 8)
- generic_handle_irq(irq_find_mapping(
- chip->irq.domain, gpio + 8*port));
+ generic_handle_domain_irq(chip->irq.domain,
+ gpio + 8*port);
}
int_pending = inb(ws16c48gpio->base + 6) & 0x7;
diff --git a/drivers/gpio/gpio-xgs-iproc.c b/drivers/gpio/gpio-xgs-iproc.c
index ad5489a65d54..fa9b4d8c3ff5 100644
--- a/drivers/gpio/gpio-xgs-iproc.c
+++ b/drivers/gpio/gpio-xgs-iproc.c
@@ -185,7 +185,7 @@ static irqreturn_t iproc_gpio_irq_handler(int irq, void *data)
int_bits = level | event;
for_each_set_bit(bit, &int_bits, gc->ngpio)
- generic_handle_irq(irq_linear_revmap(gc->irq.domain, bit));
+ generic_handle_domain_irq(gc->irq.domain, bit);
}
return int_bits ? IRQ_HANDLED : IRQ_NONE;
diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c
index c329c3a606e8..a1b66338d077 100644
--- a/drivers/gpio/gpio-xilinx.c
+++ b/drivers/gpio/gpio-xilinx.c
@@ -538,7 +538,7 @@ static void xgpio_irqhandler(struct irq_desc *desc)
for_each_set_bit(bit, all, 64) {
irq_offset = xgpio_from_bit(chip, bit);
- generic_handle_irq(irq_find_mapping(gc->irq.domain, irq_offset));
+ generic_handle_domain_irq(gc->irq.domain, irq_offset);
}
chained_irq_exit(irqchip, desc);
diff --git a/drivers/gpio/gpio-xlp.c b/drivers/gpio/gpio-xlp.c
index d7b16bb9e4e4..0d94d3aef752 100644
--- a/drivers/gpio/gpio-xlp.c
+++ b/drivers/gpio/gpio-xlp.c
@@ -216,8 +216,7 @@ static void xlp_gpio_generic_handler(struct irq_desc *desc)
}
if (gpio_stat & BIT(gpio % XLP_GPIO_REGSZ))
- generic_handle_irq(irq_find_mapping(
- priv->chip.irq.domain, gpio));
+ generic_handle_domain_irq(priv->chip.irq.domain, gpio);
}
chained_irq_exit(irqchip, desc);
}
diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c
index f0cb8ccd03ed..06c6401f02b8 100644
--- a/drivers/gpio/gpio-zynq.c
+++ b/drivers/gpio/gpio-zynq.c
@@ -628,12 +628,8 @@ static void zynq_gpio_handle_bank_irq(struct zynq_gpio *gpio,
if (!pending)
return;
- for_each_set_bit(offset, &pending, 32) {
- unsigned int gpio_irq;
-
- gpio_irq = irq_find_mapping(irqdomain, offset + bank_offset);
- generic_handle_irq(gpio_irq);
- }
+ for_each_set_bit(offset, &pending, 32)
+ generic_handle_domain_irq(irqdomain, offset + bank_offset);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 4137e848f6a2..a9ce3b20d371 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1040,7 +1040,7 @@ void amdgpu_acpi_detect(void)
*/
bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)
{
-#if IS_ENABLED(CONFIG_AMD_PMC) && IS_ENABLED(CONFIG_PM_SLEEP)
+#if IS_ENABLED(CONFIG_AMD_PMC) && IS_ENABLED(CONFIG_SUSPEND)
if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
if (adev->flags & AMD_IS_APU)
return pm_suspend_target_state == PM_SUSPEND_TO_IDLE;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 3b5d13189073..8f53837d4d3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -468,6 +468,46 @@ bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *ade
return (fw_cap & ATOM_FIRMWARE_CAP_DYNAMIC_BOOT_CFG_ENABLE) ? true : false;
}
+/*
+ * Helper function to query RAS EEPROM address
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return true if vbios supports ras rom address reporting
+ */
+bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index;
+ u16 data_offset, size;
+ union firmware_info *firmware_info;
+ u8 frev, crev;
+
+ if (i2c_address == NULL)
+ return false;
+
+ *i2c_address = 0;
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ firmwareinfo);
+
+ if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context,
+ index, &size, &frev, &crev, &data_offset)) {
+ /* support firmware_info 3.4 + */
+ if ((frev == 3 && crev >=4) || (frev > 3)) {
+ firmware_info = (union firmware_info *)
+ (mode_info->atom_context->bios + data_offset);
+ *i2c_address = firmware_info->v34.ras_rom_i2c_slave_addr;
+ }
+ }
+
+ if (*i2c_address != 0)
+ return true;
+
+ return false;
+}
+
+
union smu_info {
struct atom_smu_info_v3_1 v31;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 1bbbb195015d..751248b253de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -36,6 +36,7 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev);
+bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address);
bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f3fd5ec710b6..f944ed858f3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2777,12 +2777,11 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
- mutex_lock(&adev->gfx.gfx_off_mutex);
- if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
- if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
- adev->gfx.gfx_off_state = true;
- }
- mutex_unlock(&adev->gfx.gfx_off_mutex);
+ WARN_ON_ONCE(adev->gfx.gfx_off_state);
+ WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
+
+ if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
+ adev->gfx.gfx_off_state = true;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 43e7b61d1c5c..ada7bc19118a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -299,6 +299,9 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
ip->major, ip->minor,
ip->revision);
+ if (le16_to_cpu(ip->hw_id) == VCN_HWID)
+ adev->vcn.num_vcn_inst++;
+
for (k = 0; k < num_base_address; k++) {
/*
* convert the endianness of base addresses in place,
@@ -385,7 +388,7 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
{
struct binary_header *bhdr;
struct harvest_table *harvest_info;
- int i;
+ int i, vcn_harvest_count = 0;
bhdr = (struct binary_header *)adev->mman.discovery_bin;
harvest_info = (struct harvest_table *)(adev->mman.discovery_bin +
@@ -397,8 +400,7 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
switch (le32_to_cpu(harvest_info->list[i].hw_id)) {
case VCN_HWID:
- adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
- adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
+ vcn_harvest_count++;
break;
case DMU_HWID:
adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
@@ -407,6 +409,10 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
break;
}
}
+ if (vcn_harvest_count == adev->vcn.num_vcn_inst) {
+ adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
+ adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
+ }
}
int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 5ed8381ae0f5..971c5b8e75dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1571,6 +1571,8 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
pci_ignore_hotplug(pdev);
pci_set_power_state(pdev, PCI_D3cold);
drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
+ } else if (amdgpu_device_supports_boco(drm_dev)) {
+ /* nothing to do */
} else if (amdgpu_device_supports_baco(drm_dev)) {
amdgpu_device_baco_enter(drm_dev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a0be0772c8b3..b4ced45301be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -563,24 +563,38 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
mutex_lock(&adev->gfx.gfx_off_mutex);
- if (!enable)
- adev->gfx.gfx_off_req_count++;
- else if (adev->gfx.gfx_off_req_count > 0)
+ if (enable) {
+ /* If the count is already 0, it means there's an imbalance bug somewhere.
+ * Note that the bug may be in a different caller than the one which triggers the
+ * WARN_ON_ONCE.
+ */
+ if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
+ goto unlock;
+
adev->gfx.gfx_off_req_count--;
- if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
- schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE);
- } else if (!enable && adev->gfx.gfx_off_state) {
- if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
- adev->gfx.gfx_off_state = false;
+ if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state)
+ schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE);
+ } else {
+ if (adev->gfx.gfx_off_req_count == 0) {
+ cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
+
+ if (adev->gfx.gfx_off_state &&
+ !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
+ adev->gfx.gfx_off_state = false;
- if (adev->gfx.funcs->init_spm_golden) {
- dev_dbg(adev->dev, "GFXOFF is disabled, re-init SPM golden settings\n");
- amdgpu_gfx_init_spm_golden(adev);
+ if (adev->gfx.funcs->init_spm_golden) {
+ dev_dbg(adev->dev,
+ "GFXOFF is disabled, re-init SPM golden settings\n");
+ amdgpu_gfx_init_spm_golden(adev);
+ }
}
}
+
+ adev->gfx.gfx_off_req_count++;
}
+unlock:
mutex_unlock(&adev->gfx.gfx_off_mutex);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 83af307e97cd..cd2e18f072fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -502,7 +502,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
} else if ((client_id == AMDGPU_IRQ_CLIENTID_LEGACY) &&
adev->irq.virq[src_id]) {
- generic_handle_irq(irq_find_mapping(adev->irq.domain, src_id));
+ generic_handle_domain_irq(adev->irq.domain, src_id);
} else if (!adev->irq.client[client_id].sources) {
DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 795fa7445abe..92c8e6e7f346 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -920,11 +920,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return -EINVAL;
}
- /* This assumes only APU display buffers are pinned with (VRAM|GTT).
- * See function amdgpu_display_supported_domains()
- */
- domain = amdgpu_bo_get_preferred_pin_domain(adev, domain);
-
if (bo->tbo.pin_count) {
uint32_t mem_type = bo->tbo.resource->mem_type;
uint32_t mem_flags = bo->tbo.resource->placement;
@@ -949,6 +944,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return 0;
}
+ /* This assumes only APU display buffers are pinned with (VRAM|GTT).
+ * See function amdgpu_display_supported_domains()
+ */
+ domain = amdgpu_bo_get_preferred_pin_domain(adev, domain);
+
if (bo->tbo.base.import_attach)
dma_buf_pin(bo->tbo.base.import_attach);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index f40c871da0c6..38222de921d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -26,6 +26,7 @@
#include "amdgpu_ras.h"
#include <linux/bits.h>
#include "atom.h"
+#include "amdgpu_atomfirmware.h"
#define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0
#define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8
@@ -96,6 +97,9 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
if (!i2c_addr)
return false;
+ if (amdgpu_atomfirmware_ras_rom_addr(adev, (uint8_t*)i2c_addr))
+ return true;
+
switch (adev->asic_type) {
case CHIP_VEGA20:
*i2c_addr = EEPROM_I2C_TARGET_ADDR_VEGA20;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index c7b364e4a287..e883731c3f8f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -3026,6 +3026,14 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", &p->svms, start,
start + size - 1, nattr);
+ /* Flush pending deferred work to avoid racing with deferred actions from
+ * previous memory map changes (e.g. munmap). Concurrent memory map changes
+ * can still race with get_attr because we don't hold the mmap lock. But that
+ * would be a race condition in the application anyway, and undefined
+ * behaviour is acceptable in that case.
+ */
+ flush_work(&p->svms.deferred_list_work);
+
mmap_read_lock(mm);
if (!svm_range_is_valid(mm, start, size)) {
pr_debug("invalid range\n");
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index c0ae73b0691c..afa96c8f721b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9605,7 +9605,12 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
} else if (amdgpu_freesync_vid_mode && aconnector &&
is_freesync_video_mode(&new_crtc_state->mode,
aconnector)) {
- set_freesync_fixed_config(dm_new_crtc_state);
+ struct drm_display_mode *high_mode;
+
+ high_mode = get_highest_refresh_rate_mode(aconnector, false);
+ if (!drm_mode_equal(&new_crtc_state->mode, high_mode)) {
+ set_freesync_fixed_config(dm_new_crtc_state);
+ }
}
ret = dm_atomic_get_state(state, &dm_state);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
index 40f617bbb86f..4aba0e8c84f8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -584,7 +584,7 @@ static void amdgpu_dm_irq_schedule_work(struct amdgpu_device *adev,
handler_data = container_of(handler_list->next, struct amdgpu_dm_irq_handler_data, list);
/*allocate a new amdgpu_dm_irq_handler_data*/
- handler_data_add = kzalloc(sizeof(*handler_data), GFP_KERNEL);
+ handler_data_add = kzalloc(sizeof(*handler_data), GFP_ATOMIC);
if (!handler_data_add) {
DRM_ERROR("DM_IRQ: failed to allocate irq handler!\n");
return;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 605e297b7a59..a30283fa5173 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1530,6 +1530,12 @@ void dc_z10_restore(struct dc *dc)
if (dc->hwss.z10_restore)
dc->hwss.z10_restore(dc);
}
+
+void dc_z10_save_init(struct dc *dc)
+{
+ if (dc->hwss.z10_save_init)
+ dc->hwss.z10_save_init(dc);
+}
#endif
/*
* Applies given context to HW and copy it into current context.
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c b/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c
index f2b39ec35c89..cde8ed2560b3 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c
@@ -47,6 +47,9 @@ int dc_setup_system_context(struct dc *dc, struct dc_phy_addr_space_config *pa_c
*/
memcpy(&dc->vm_pa_config, pa_config, sizeof(struct dc_phy_addr_space_config));
dc->vm_pa_config.valid = true;
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+ dc_z10_save_init(dc);
+#endif
}
return num_vmids;
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index af7b60108e9d..21d78289b048 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -1338,6 +1338,7 @@ void dc_hardware_release(struct dc *dc);
bool dc_set_psr_allow_active(struct dc *dc, bool enable);
#if defined(CONFIG_DRM_AMD_DC_DCN)
void dc_z10_restore(struct dc *dc);
+void dc_z10_save_init(struct dc *dc);
#endif
bool dc_enable_dmub_notifications(struct dc *dc);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
index 253654d605c2..28e15ebf2f43 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
@@ -1788,7 +1788,6 @@ static bool dcn30_split_stream_for_mpc_or_odm(
}
pri_pipe->next_odm_pipe = sec_pipe;
sec_pipe->prev_odm_pipe = pri_pipe;
- ASSERT(sec_pipe->top_pipe == NULL);
if (!sec_pipe->top_pipe)
sec_pipe->stream_res.opp = pool->opps[pipe_idx];
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
index 9776d1737818..912285fdce18 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
@@ -1622,106 +1622,12 @@ static void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *b
dml_init_instance(&dc->dml, &dcn3_01_soc, &dcn3_01_ip, DML_PROJECT_DCN30);
}
-static void calculate_wm_set_for_vlevel(
- int vlevel,
- struct wm_range_table_entry *table_entry,
- struct dcn_watermarks *wm_set,
- struct display_mode_lib *dml,
- display_e2e_pipe_params_st *pipes,
- int pipe_cnt)
-{
- double dram_clock_change_latency_cached = dml->soc.dram_clock_change_latency_us;
-
- ASSERT(vlevel < dml->soc.num_states);
- /* only pipe 0 is read for voltage and dcf/soc clocks */
- pipes[0].clks_cfg.voltage = vlevel;
- pipes[0].clks_cfg.dcfclk_mhz = dml->soc.clock_limits[vlevel].dcfclk_mhz;
- pipes[0].clks_cfg.socclk_mhz = dml->soc.clock_limits[vlevel].socclk_mhz;
-
- dml->soc.dram_clock_change_latency_us = table_entry->pstate_latency_us;
- dml->soc.sr_exit_time_us = table_entry->sr_exit_time_us;
- dml->soc.sr_enter_plus_exit_time_us = table_entry->sr_enter_plus_exit_time_us;
-
- wm_set->urgent_ns = get_wm_urgent(dml, pipes, pipe_cnt) * 1000;
- wm_set->cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(dml, pipes, pipe_cnt) * 1000;
- wm_set->cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(dml, pipes, pipe_cnt) * 1000;
- wm_set->cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
- wm_set->pte_meta_urgent_ns = get_wm_memory_trip(dml, pipes, pipe_cnt) * 1000;
- wm_set->frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(dml, pipes, pipe_cnt) * 1000;
- wm_set->frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(dml, pipes, pipe_cnt) * 1000;
- wm_set->urgent_latency_ns = get_urgent_latency(dml, pipes, pipe_cnt) * 1000;
- dml->soc.dram_clock_change_latency_us = dram_clock_change_latency_cached;
-
-}
-
-static void dcn301_calculate_wm_and_dlg(
- struct dc *dc, struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int pipe_cnt,
- int vlevel_req)
-{
- int i, pipe_idx;
- int vlevel, vlevel_max;
- struct wm_range_table_entry *table_entry;
- struct clk_bw_params *bw_params = dc->clk_mgr->bw_params;
-
- ASSERT(bw_params);
-
- vlevel_max = bw_params->clk_table.num_entries - 1;
-
- /* WM Set D */
- table_entry = &bw_params->wm_table.entries[WM_D];
- if (table_entry->wm_type == WM_TYPE_RETRAINING)
- vlevel = 0;
- else
- vlevel = vlevel_max;
- calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.d,
- &context->bw_ctx.dml, pipes, pipe_cnt);
- /* WM Set C */
- table_entry = &bw_params->wm_table.entries[WM_C];
- vlevel = min(max(vlevel_req, 2), vlevel_max);
- calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.c,
- &context->bw_ctx.dml, pipes, pipe_cnt);
- /* WM Set B */
- table_entry = &bw_params->wm_table.entries[WM_B];
- vlevel = min(max(vlevel_req, 1), vlevel_max);
- calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.b,
- &context->bw_ctx.dml, pipes, pipe_cnt);
-
- /* WM Set A */
- table_entry = &bw_params->wm_table.entries[WM_A];
- vlevel = min(vlevel_req, vlevel_max);
- calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.a,
- &context->bw_ctx.dml, pipes, pipe_cnt);
-
- for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
-
- pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
- pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
-
- if (dc->config.forced_clocks) {
- pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
- pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
- }
- if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
- pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
- if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
- pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
-
- pipe_idx++;
- }
-
- dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
-}
-
static struct resource_funcs dcn301_res_pool_funcs = {
.destroy = dcn301_destroy_resource_pool,
.link_enc_create = dcn301_link_encoder_create,
.panel_cntl_create = dcn301_panel_cntl_create,
.validate_bandwidth = dcn30_validate_bandwidth,
- .calculate_wm_and_dlg = dcn301_calculate_wm_and_dlg,
+ .calculate_wm_and_dlg = dcn30_calculate_wm_and_dlg,
.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
.populate_dml_pipes = dcn30_populate_dml_pipes_from_context,
.acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
index 6ac6faf0c533..8a2119d8ca0d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
@@ -404,6 +404,18 @@ void dcn31_update_info_frame(struct pipe_ctx *pipe_ctx)
&pipe_ctx->stream_res.encoder_info_frame);
}
}
+void dcn31_z10_save_init(struct dc *dc)
+{
+ union dmub_rb_cmd cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.dcn_restore.header.type = DMUB_CMD__IDLE_OPT;
+ cmd.dcn_restore.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_SAVE_INIT;
+
+ dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
+ dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
+ dc_dmub_srv_wait_idle(dc->ctx->dmub_srv);
+}
void dcn31_z10_restore(struct dc *dc)
{
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h
index 40dfebe78fdd..140435e4f7ff 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h
@@ -44,6 +44,7 @@ void dcn31_enable_power_gating_plane(
void dcn31_update_info_frame(struct pipe_ctx *pipe_ctx);
void dcn31_z10_restore(struct dc *dc);
+void dcn31_z10_save_init(struct dc *dc);
void dcn31_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on);
int dcn31_init_sys_ctx(struct dce_hwseq *hws, struct dc *dc, struct dc_phy_addr_space_config *pa_config);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
index aaf2dbd095fe..b30d923471cb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
@@ -97,6 +97,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
.set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
.set_pipe = dcn21_set_pipe,
.z10_restore = dcn31_z10_restore,
+ .z10_save_init = dcn31_z10_save_init,
.is_abm_supported = dcn31_is_abm_supported,
.set_disp_pattern_generator = dcn30_set_disp_pattern_generator,
.update_visual_confirm_color = dcn20_update_visual_confirm_color,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
index 5ab008e62b82..ad5f2adcc40d 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
@@ -237,6 +237,7 @@ struct hw_sequencer_funcs {
int width, int height, int offset);
void (*z10_restore)(struct dc *dc);
+ void (*z10_save_init)(struct dc *dc);
void (*update_visual_confirm_color)(struct dc *dc,
struct pipe_ctx *pipe_ctx,
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 7c4734f905d9..7fafb8d6c1da 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -856,6 +856,11 @@ enum dmub_cmd_idle_opt_type {
* DCN hardware restore.
*/
DMUB_CMD__IDLE_OPT_DCN_RESTORE = 0,
+
+ /**
+ * DCN hardware save.
+ */
+ DMUB_CMD__IDLE_OPT_DCN_SAVE_INIT = 1
};
/**
diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index 3811e58dd857..44955458fe38 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -590,7 +590,7 @@ struct atom_firmware_info_v3_4 {
uint8_t board_i2c_feature_id; // enum of atom_board_i2c_feature_id_def
uint8_t board_i2c_feature_gpio_id; // i2c id find in gpio_lut data table gpio_id
uint8_t board_i2c_feature_slave_addr;
- uint8_t reserved3;
+ uint8_t ras_rom_i2c_slave_addr;
uint16_t bootup_mvddq_mv;
uint16_t bootup_mvpp_mv;
uint32_t zfbstartaddrin16mb;
diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_pmfw.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_pmfw.h
index 5627de734246..c5e26d619bf0 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_pmfw.h
@@ -111,7 +111,9 @@ typedef struct {
uint32_t InWhisperMode : 1;
uint32_t spare0 : 1;
uint32_t ZstateStatus : 4;
- uint32_t spare1 :12;
+ uint32_t spare1 : 4;
+ uint32_t DstateFun : 4;
+ uint32_t DstateDev : 4;
// MP1_EXT_SCRATCH2
uint32_t P2JobHandler :24;
uint32_t RsmuPmiP2FinishedCnt : 8;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
index 25979106fd25..02e8c6e5448d 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
@@ -5127,6 +5127,13 @@ static int vega10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf)
return size;
}
+static bool vega10_get_power_profile_mode_quirks(struct pp_hwmgr *hwmgr)
+{
+ struct amdgpu_device *adev = hwmgr->adev;
+
+ return (adev->pdev->device == 0x6860);
+}
+
static int vega10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint32_t size)
{
struct vega10_hwmgr *data = hwmgr->backend;
@@ -5163,9 +5170,15 @@ static int vega10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, ui
}
out:
- smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask,
+ if (vega10_get_power_profile_mode_quirks(hwmgr))
+ smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask,
+ 1 << power_profile_mode,
+ NULL);
+ else
+ smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask,
(!power_profile_mode) ? 0 : 1 << (power_profile_mode - 1),
NULL);
+
hwmgr->power_profile_mode = power_profile_mode;
return 0;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index c751f717a0da..d92dd2c7448e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -353,8 +353,7 @@ static void sienna_cichlid_check_bxco_support(struct smu_context *smu)
struct amdgpu_device *adev = smu->adev;
uint32_t val;
- if (powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_BACO ||
- powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_MACO) {
+ if (powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_BACO) {
val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0);
smu_baco->platform_support =
(val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true :
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 18681dc458da..bcaaa086fc2f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -256,7 +256,7 @@ static int vangogh_tables_init(struct smu_context *smu)
return 0;
err3_out:
- kfree(smu_table->clocks_table);
+ kfree(smu_table->watermarks_table);
err2_out:
kfree(smu_table->gpu_metrics_table);
err1_out:
diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
index d29907955ff7..5d82891c3222 100644
--- a/drivers/gpu/drm/drm_ioc32.c
+++ b/drivers/gpu/drm/drm_ioc32.c
@@ -855,8 +855,6 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd,
req.request.sequence = req32.request.sequence;
req.request.signal = req32.request.signal;
err = drm_ioctl_kernel(file, drm_wait_vblank_ioctl, &req, DRM_UNLOCKED);
- if (err)
- return err;
req32.reply.type = req.reply.type;
req32.reply.sequence = req.reply.sequence;
@@ -865,7 +863,7 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd,
if (copy_to_user(argp, &req32, sizeof(req32)))
return -EFAULT;
- return 0;
+ return err;
}
#if defined(CONFIG_X86)
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index be716b56e8e0..00dade49665b 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -2463,6 +2463,15 @@ static void intel_ddi_power_up_lanes(struct intel_encoder *encoder,
}
}
+/* Splitter enable for eDP MSO is limited to certain pipes. */
+static u8 intel_ddi_splitter_pipe_mask(struct drm_i915_private *i915)
+{
+ if (IS_ALDERLAKE_P(i915))
+ return BIT(PIPE_A) | BIT(PIPE_B);
+ else
+ return BIT(PIPE_A);
+}
+
static void intel_ddi_mso_get_config(struct intel_encoder *encoder,
struct intel_crtc_state *pipe_config)
{
@@ -2480,8 +2489,7 @@ static void intel_ddi_mso_get_config(struct intel_encoder *encoder,
if (!pipe_config->splitter.enable)
return;
- /* Splitter enable is supported for pipe A only. */
- if (drm_WARN_ON(&i915->drm, pipe != PIPE_A)) {
+ if (drm_WARN_ON(&i915->drm, !(intel_ddi_splitter_pipe_mask(i915) & BIT(pipe)))) {
pipe_config->splitter.enable = false;
return;
}
@@ -2513,10 +2521,6 @@ static void intel_ddi_mso_configure(const struct intel_crtc_state *crtc_state)
return;
if (crtc_state->splitter.enable) {
- /* Splitter enable is supported for pipe A only. */
- if (drm_WARN_ON(&i915->drm, pipe != PIPE_A))
- return;
-
dss1 |= SPLITTER_ENABLE;
dss1 |= OVERLAP_PIXELS(crtc_state->splitter.pixel_overlap);
if (crtc_state->splitter.link_count == 2)
@@ -4743,12 +4747,8 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
dig_port->hpd_pulse = intel_dp_hpd_pulse;
- /* Splitter enable for eDP MSO is limited to certain pipes. */
- if (dig_port->dp.mso_link_count) {
- encoder->pipe_mask = BIT(PIPE_A);
- if (IS_ALDERLAKE_P(dev_priv))
- encoder->pipe_mask |= BIT(PIPE_B);
- }
+ if (dig_port->dp.mso_link_count)
+ encoder->pipe_mask = intel_ddi_splitter_pipe_mask(dev_priv);
}
/* In theory we don't need the encoder->type check, but leave it just in
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 2d5d21740c25..0a8a2395c8ac 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -5746,16 +5746,18 @@ static void bdw_set_pipemisc(const struct intel_crtc_state *crtc_state)
switch (crtc_state->pipe_bpp) {
case 18:
- val |= PIPEMISC_DITHER_6_BPC;
+ val |= PIPEMISC_6_BPC;
break;
case 24:
- val |= PIPEMISC_DITHER_8_BPC;
+ val |= PIPEMISC_8_BPC;
break;
case 30:
- val |= PIPEMISC_DITHER_10_BPC;
+ val |= PIPEMISC_10_BPC;
break;
case 36:
- val |= PIPEMISC_DITHER_12_BPC;
+ /* Port output 12BPC defined for ADLP+ */
+ if (DISPLAY_VER(dev_priv) > 12)
+ val |= PIPEMISC_12_BPC_ADLP;
break;
default:
MISSING_CASE(crtc_state->pipe_bpp);
@@ -5808,15 +5810,27 @@ int bdw_get_pipemisc_bpp(struct intel_crtc *crtc)
tmp = intel_de_read(dev_priv, PIPEMISC(crtc->pipe));
- switch (tmp & PIPEMISC_DITHER_BPC_MASK) {
- case PIPEMISC_DITHER_6_BPC:
+ switch (tmp & PIPEMISC_BPC_MASK) {
+ case PIPEMISC_6_BPC:
return 18;
- case PIPEMISC_DITHER_8_BPC:
+ case PIPEMISC_8_BPC:
return 24;
- case PIPEMISC_DITHER_10_BPC:
+ case PIPEMISC_10_BPC:
return 30;
- case PIPEMISC_DITHER_12_BPC:
- return 36;
+ /*
+ * PORT OUTPUT 12 BPC defined for ADLP+.
+ *
+ * TODO:
+ * For previous platforms with DSI interface, bits 5:7
+ * are used for storing pipe_bpp irrespective of dithering.
+ * Since the value of 12 BPC is not defined for these bits
+ * on older platforms, need to find a workaround for 12 BPC
+ * MIPI DSI HW readout.
+ */
+ case PIPEMISC_12_BPC_ADLP:
+ if (DISPLAY_VER(dev_priv) > 12)
+ return 36;
+ fallthrough;
default:
MISSING_CASE(tmp);
return 0;
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index 4298ae684d7d..86b7ac7b65ec 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -6387,13 +6387,13 @@ void intel_display_power_suspend_late(struct drm_i915_private *i915)
if (DISPLAY_VER(i915) >= 11 || IS_GEMINILAKE(i915) ||
IS_BROXTON(i915)) {
bxt_enable_dc9(i915);
- /* Tweaked Wa_14010685332:icp,jsp,mcc */
- if (INTEL_PCH_TYPE(i915) >= PCH_ICP && INTEL_PCH_TYPE(i915) <= PCH_MCC)
- intel_de_rmw(i915, SOUTH_CHICKEN1,
- SBCLK_RUN_REFCLK_DIS, SBCLK_RUN_REFCLK_DIS);
} else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) {
hsw_enable_pc8(i915);
}
+
+ /* Tweaked Wa_14010685332:cnp,icp,jsp,mcc,tgp,adp */
+ if (INTEL_PCH_TYPE(i915) >= PCH_CNP && INTEL_PCH_TYPE(i915) < PCH_DG1)
+ intel_de_rmw(i915, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, SBCLK_RUN_REFCLK_DIS);
}
void intel_display_power_resume_early(struct drm_i915_private *i915)
@@ -6402,13 +6402,13 @@ void intel_display_power_resume_early(struct drm_i915_private *i915)
IS_BROXTON(i915)) {
gen9_sanitize_dc_state(i915);
bxt_disable_dc9(i915);
- /* Tweaked Wa_14010685332:icp,jsp,mcc */
- if (INTEL_PCH_TYPE(i915) >= PCH_ICP && INTEL_PCH_TYPE(i915) <= PCH_MCC)
- intel_de_rmw(i915, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, 0);
-
} else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) {
hsw_disable_pc8(i915);
}
+
+ /* Tweaked Wa_14010685332:cnp,icp,jsp,mcc,tgp,adp */
+ if (INTEL_PCH_TYPE(i915) >= PCH_CNP && INTEL_PCH_TYPE(i915) < PCH_DG1)
+ intel_de_rmw(i915, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, 0);
}
void intel_display_power_suspend(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 6cc03b9e4321..862c1df69cc2 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -3850,23 +3850,18 @@ static void intel_dp_check_device_service_irq(struct intel_dp *intel_dp)
static void intel_dp_check_link_service_irq(struct intel_dp *intel_dp)
{
- struct drm_i915_private *i915 = dp_to_i915(intel_dp);
u8 val;
if (intel_dp->dpcd[DP_DPCD_REV] < 0x11)
return;
if (drm_dp_dpcd_readb(&intel_dp->aux,
- DP_LINK_SERVICE_IRQ_VECTOR_ESI0, &val) != 1 || !val) {
- drm_dbg_kms(&i915->drm, "Error in reading link service irq vector\n");
+ DP_LINK_SERVICE_IRQ_VECTOR_ESI0, &val) != 1 || !val)
return;
- }
if (drm_dp_dpcd_writeb(&intel_dp->aux,
- DP_LINK_SERVICE_IRQ_VECTOR_ESI0, val) != 1) {
- drm_dbg_kms(&i915->drm, "Error in writing link service irq vector\n");
+ DP_LINK_SERVICE_IRQ_VECTOR_ESI0, val) != 1)
return;
- }
if (val & HDMI_LINK_STATUS_CHANGED)
intel_dp_handle_hdmi_link_status_change(intel_dp);
diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
index 08bceae40aa8..053a3c2f7267 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
@@ -206,7 +206,6 @@ int intel_dp_init_lttpr_and_dprx_caps(struct intel_dp *intel_dp)
return lttpr_count;
}
-EXPORT_SYMBOL(intel_dp_init_lttpr_and_dprx_caps);
static u8 dp_voltage_max(u8 preemph)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index c4a126c8caef..1257f4f11e66 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -127,6 +127,15 @@ static void intel_timeline_fini(struct rcu_head *rcu)
i915_vma_put(timeline->hwsp_ggtt);
i915_active_fini(&timeline->active);
+
+ /*
+ * A small race exists between intel_gt_retire_requests_timeout and
+ * intel_timeline_exit which could result in the syncmap not getting
+ * free'd. Rather than work to hard to seal this race, simply cleanup
+ * the syncmap on fini.
+ */
+ i915_syncmap_free(&timeline->sync);
+
kfree(timeline);
}
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 06024d321a1a..cde0a477fb49 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -3149,6 +3149,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt)
MMIO_DFH(_MMIO(0xb100), D_BDW, F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(_MMIO(0xb10c), D_BDW, F_CMD_ACCESS, NULL, NULL);
MMIO_D(_MMIO(0xb110), D_BDW);
+ MMIO_D(GEN9_SCRATCH_LNCF1, D_BDW_PLUS);
MMIO_F(_MMIO(0x24d0), 48, F_CMD_ACCESS | F_CMD_WRITE_PATCH, 0, 0,
D_BDW_PLUS, NULL, force_nonpriv_write);
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index b8ac80765461..f776c470914d 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -105,6 +105,8 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
{RCS0, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */
{RCS0, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */
{RCS0, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */
+ {RCS0, GEN9_SCRATCH1, 0, false}, /* 0xb11c */
+ {RCS0, GEN9_SCRATCH_LNCF1, 0, false}, /* 0xb008 */
{RCS0, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */
{RCS0, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */
{RCS0, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 35c97c39f125..966664610c8c 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -727,9 +727,18 @@ static void err_print_gt(struct drm_i915_error_state_buf *m,
if (GRAPHICS_VER(m->i915) >= 12) {
int i;
- for (i = 0; i < GEN12_SFC_DONE_MAX; i++)
+ for (i = 0; i < GEN12_SFC_DONE_MAX; i++) {
+ /*
+ * SFC_DONE resides in the VD forcewake domain, so it
+ * only exists if the corresponding VCS engine is
+ * present.
+ */
+ if (!HAS_ENGINE(gt->_gt, _VCS(i * 2)))
+ continue;
+
err_printf(m, " SFC_DONE[%d]: 0x%08x\n", i,
gt->sfc_done[i]);
+ }
err_printf(m, " GAM_DONE: 0x%08x\n", gt->gam_done);
}
@@ -1581,6 +1590,14 @@ static void gt_record_regs(struct intel_gt_coredump *gt)
if (GRAPHICS_VER(i915) >= 12) {
for (i = 0; i < GEN12_SFC_DONE_MAX; i++) {
+ /*
+ * SFC_DONE resides in the VD forcewake domain, so it
+ * only exists if the corresponding VCS engine is
+ * present.
+ */
+ if (!HAS_ENGINE(gt->_gt, _VCS(i * 2)))
+ continue;
+
gt->sfc_done[i] =
intel_uncore_read(uncore, GEN12_SFC_DONE(i));
}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c03943198089..c3816f5c6900 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3064,24 +3064,6 @@ static void valleyview_irq_reset(struct drm_i915_private *dev_priv)
spin_unlock_irq(&dev_priv->irq_lock);
}
-static void cnp_display_clock_wa(struct drm_i915_private *dev_priv)
-{
- struct intel_uncore *uncore = &dev_priv->uncore;
-
- /*
- * Wa_14010685332:cnp/cmp,tgp,adp
- * TODO: Clarify which platforms this applies to
- * TODO: Figure out if this workaround can be applied in the s0ix suspend/resume handlers as
- * on earlier platforms and whether the workaround is also needed for runtime suspend/resume
- */
- if (INTEL_PCH_TYPE(dev_priv) == PCH_CNP ||
- (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP && INTEL_PCH_TYPE(dev_priv) < PCH_DG1)) {
- intel_uncore_rmw(uncore, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS,
- SBCLK_RUN_REFCLK_DIS);
- intel_uncore_rmw(uncore, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, 0);
- }
-}
-
static void gen8_display_irq_reset(struct drm_i915_private *dev_priv)
{
struct intel_uncore *uncore = &dev_priv->uncore;
@@ -3115,7 +3097,6 @@ static void gen8_irq_reset(struct drm_i915_private *dev_priv)
if (HAS_PCH_SPLIT(dev_priv))
ibx_irq_reset(dev_priv);
- cnp_display_clock_wa(dev_priv);
}
static void gen11_display_irq_reset(struct drm_i915_private *dev_priv)
@@ -3159,8 +3140,6 @@ static void gen11_display_irq_reset(struct drm_i915_private *dev_priv)
if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
GEN3_IRQ_RESET(uncore, SDE);
-
- cnp_display_clock_wa(dev_priv);
}
static void gen11_irq_reset(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 41186c1f771e..476bb3b9ad11 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -6163,11 +6163,17 @@ enum {
#define PIPEMISC_HDR_MODE_PRECISION (1 << 23) /* icl+ */
#define PIPEMISC_OUTPUT_COLORSPACE_YUV (1 << 11)
#define PIPEMISC_PIXEL_ROUNDING_TRUNC REG_BIT(8) /* tgl+ */
-#define PIPEMISC_DITHER_BPC_MASK (7 << 5)
-#define PIPEMISC_DITHER_8_BPC (0 << 5)
-#define PIPEMISC_DITHER_10_BPC (1 << 5)
-#define PIPEMISC_DITHER_6_BPC (2 << 5)
-#define PIPEMISC_DITHER_12_BPC (3 << 5)
+/*
+ * For Display < 13, Bits 5-7 of PIPE MISC represent DITHER BPC with
+ * valid values of: 6, 8, 10 BPC.
+ * ADLP+, the bits 5-7 represent PORT OUTPUT BPC with valid values of:
+ * 6, 8, 10, 12 BPC.
+ */
+#define PIPEMISC_BPC_MASK (7 << 5)
+#define PIPEMISC_8_BPC (0 << 5)
+#define PIPEMISC_10_BPC (1 << 5)
+#define PIPEMISC_6_BPC (2 << 5)
+#define PIPEMISC_12_BPC_ADLP (4 << 5) /* adlp+ */
#define PIPEMISC_DITHER_ENABLE (1 << 4)
#define PIPEMISC_DITHER_TYPE_MASK (3 << 2)
#define PIPEMISC_DITHER_TYPE_SP (0 << 2)
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index 8710f55d2579..bd1f9f0366d3 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -683,7 +683,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
break;
}
- ipu_dmfc_config_wait4eot(ipu_plane->dmfc, drm_rect_width(dst));
+ ipu_dmfc_config_wait4eot(ipu_plane->dmfc, ALIGN(drm_rect_width(dst), 8));
width = ipu_src_rect_width(new_state);
height = drm_rect_height(&new_state->src) >> 16;
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_color.c b/drivers/gpu/drm/mediatek/mtk_disp_color.c
index 6f4c80bbc0eb..473f5bb5cbad 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_color.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_color.c
@@ -133,6 +133,8 @@ static int mtk_disp_color_probe(struct platform_device *pdev)
static int mtk_disp_color_remove(struct platform_device *pdev)
{
+ component_del(&pdev->dev, &mtk_disp_color_component_ops);
+
return 0;
}
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
index fa9d79963cd3..5326989d5206 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
@@ -423,6 +423,8 @@ static int mtk_disp_ovl_probe(struct platform_device *pdev)
static int mtk_disp_ovl_remove(struct platform_device *pdev)
{
+ component_del(&pdev->dev, &mtk_disp_ovl_component_ops);
+
return 0;
}
diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c
index bced555648b0..e94738fe4db8 100644
--- a/drivers/gpu/drm/mediatek/mtk_dpi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dpi.c
@@ -605,11 +605,15 @@ static int mtk_dpi_bridge_atomic_check(struct drm_bridge *bridge,
struct drm_crtc_state *crtc_state,
struct drm_connector_state *conn_state)
{
- struct mtk_dpi *dpi = bridge->driver_private;
+ struct mtk_dpi *dpi = bridge_to_dpi(bridge);
unsigned int out_bus_format;
out_bus_format = bridge_state->output_bus_cfg.format;
+ if (out_bus_format == MEDIA_BUS_FMT_FIXED)
+ if (dpi->conf->num_output_fmts)
+ out_bus_format = dpi->conf->output_fmts[0];
+
dev_dbg(dpi->dev, "input format 0x%04x, output format 0x%04x\n",
bridge_state->input_bus_cfg.format,
bridge_state->output_bus_cfg.format);
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index 474efb844249..735efe79f075 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -532,13 +532,10 @@ void mtk_drm_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane,
struct drm_atomic_state *state)
{
struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
- const struct drm_plane_helper_funcs *plane_helper_funcs =
- plane->helper_private;
if (!mtk_crtc->enabled)
return;
- plane_helper_funcs->atomic_update(plane, state);
mtk_drm_crtc_update_config(mtk_crtc, false);
}
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
index 75bc00e17fc4..50d20562e612 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
@@ -34,6 +34,7 @@
#define DISP_AAL_EN 0x0000
#define DISP_AAL_SIZE 0x0030
+#define DISP_AAL_OUTPUT_SIZE 0x04d8
#define DISP_DITHER_EN 0x0000
#define DITHER_EN BIT(0)
@@ -197,6 +198,7 @@ static void mtk_aal_config(struct device *dev, unsigned int w,
struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev);
mtk_ddp_write(cmdq_pkt, w << 16 | h, &priv->cmdq_reg, priv->regs, DISP_AAL_SIZE);
+ mtk_ddp_write(cmdq_pkt, w << 16 | h, &priv->cmdq_reg, priv->regs, DISP_AAL_OUTPUT_SIZE);
}
static void mtk_aal_gamma_set(struct device *dev, struct drm_crtc_state *state)
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
index b5582dcf564c..e6dcb34d3052 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
@@ -110,6 +110,35 @@ static int mtk_plane_atomic_async_check(struct drm_plane *plane,
true, true);
}
+static void mtk_plane_update_new_state(struct drm_plane_state *new_state,
+ struct mtk_plane_state *mtk_plane_state)
+{
+ struct drm_framebuffer *fb = new_state->fb;
+ struct drm_gem_object *gem;
+ struct mtk_drm_gem_obj *mtk_gem;
+ unsigned int pitch, format;
+ dma_addr_t addr;
+
+ gem = fb->obj[0];
+ mtk_gem = to_mtk_gem_obj(gem);
+ addr = mtk_gem->dma_addr;
+ pitch = fb->pitches[0];
+ format = fb->format->format;
+
+ addr += (new_state->src.x1 >> 16) * fb->format->cpp[0];
+ addr += (new_state->src.y1 >> 16) * pitch;
+
+ mtk_plane_state->pending.enable = true;
+ mtk_plane_state->pending.pitch = pitch;
+ mtk_plane_state->pending.format = format;
+ mtk_plane_state->pending.addr = addr;
+ mtk_plane_state->pending.x = new_state->dst.x1;
+ mtk_plane_state->pending.y = new_state->dst.y1;
+ mtk_plane_state->pending.width = drm_rect_width(&new_state->dst);
+ mtk_plane_state->pending.height = drm_rect_height(&new_state->dst);
+ mtk_plane_state->pending.rotation = new_state->rotation;
+}
+
static void mtk_plane_atomic_async_update(struct drm_plane *plane,
struct drm_atomic_state *state)
{
@@ -126,8 +155,10 @@ static void mtk_plane_atomic_async_update(struct drm_plane *plane,
plane->state->src_h = new_state->src_h;
plane->state->src_w = new_state->src_w;
swap(plane->state->fb, new_state->fb);
- new_plane_state->pending.async_dirty = true;
+ mtk_plane_update_new_state(new_state, new_plane_state);
+ wmb(); /* Make sure the above parameters are set before update */
+ new_plane_state->pending.async_dirty = true;
mtk_drm_crtc_async_update(new_state->crtc, plane, state);
}
@@ -189,14 +220,8 @@ static void mtk_plane_atomic_update(struct drm_plane *plane,
struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state,
plane);
struct mtk_plane_state *mtk_plane_state = to_mtk_plane_state(new_state);
- struct drm_crtc *crtc = new_state->crtc;
- struct drm_framebuffer *fb = new_state->fb;
- struct drm_gem_object *gem;
- struct mtk_drm_gem_obj *mtk_gem;
- unsigned int pitch, format;
- dma_addr_t addr;
- if (!crtc || WARN_ON(!fb))
+ if (!new_state->crtc || WARN_ON(!new_state->fb))
return;
if (!new_state->visible) {
@@ -204,24 +229,7 @@ static void mtk_plane_atomic_update(struct drm_plane *plane,
return;
}
- gem = fb->obj[0];
- mtk_gem = to_mtk_gem_obj(gem);
- addr = mtk_gem->dma_addr;
- pitch = fb->pitches[0];
- format = fb->format->format;
-
- addr += (new_state->src.x1 >> 16) * fb->format->cpp[0];
- addr += (new_state->src.y1 >> 16) * pitch;
-
- mtk_plane_state->pending.enable = true;
- mtk_plane_state->pending.pitch = pitch;
- mtk_plane_state->pending.format = format;
- mtk_plane_state->pending.addr = addr;
- mtk_plane_state->pending.x = new_state->dst.x1;
- mtk_plane_state->pending.y = new_state->dst.y1;
- mtk_plane_state->pending.width = drm_rect_width(&new_state->dst);
- mtk_plane_state->pending.height = drm_rect_height(&new_state->dst);
- mtk_plane_state->pending.rotation = new_state->rotation;
+ mtk_plane_update_new_state(new_state, mtk_plane_state);
wmb(); /* Make sure the above parameters are set before update */
mtk_plane_state->pending.dirty = true;
}
diff --git a/drivers/gpu/drm/meson/meson_registers.h b/drivers/gpu/drm/meson/meson_registers.h
index 446e7961da48..0f3cafab8860 100644
--- a/drivers/gpu/drm/meson/meson_registers.h
+++ b/drivers/gpu/drm/meson/meson_registers.h
@@ -634,6 +634,11 @@
#define VPP_WRAP_OSD3_MATRIX_PRE_OFFSET2 0x3dbc
#define VPP_WRAP_OSD3_MATRIX_EN_CTRL 0x3dbd
+/* osd1 HDR */
+#define OSD1_HDR2_CTRL 0x38a0
+#define OSD1_HDR2_CTRL_VDIN0_HDR2_TOP_EN BIT(13)
+#define OSD1_HDR2_CTRL_REG_ONLY_MAT BIT(16)
+
/* osd2 scaler */
#define OSD2_VSC_PHASE_STEP 0x3d00
#define OSD2_VSC_INI_PHASE 0x3d01
diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c
index aede0c67a57f..259f3e6bec90 100644
--- a/drivers/gpu/drm/meson/meson_viu.c
+++ b/drivers/gpu/drm/meson/meson_viu.c
@@ -425,9 +425,14 @@ void meson_viu_init(struct meson_drm *priv)
if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) ||
meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXL))
meson_viu_load_matrix(priv);
- else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
+ else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) {
meson_viu_set_g12a_osd1_matrix(priv, RGB709_to_YUV709l_coeff,
true);
+ /* fix green/pink color distortion from vendor u-boot */
+ writel_bits_relaxed(OSD1_HDR2_CTRL_REG_ONLY_MAT |
+ OSD1_HDR2_CTRL_VDIN0_HDR2_TOP_EN, 0,
+ priv->io_base + _REG(OSD1_HDR2_CTRL));
+ }
/* Initialize OSD1 fifo control register */
reg = VIU_OSD_DDR_PRIORITY_URGENT |
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
index 6b0a7bc87eb7..b466784d9822 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
@@ -45,20 +45,13 @@ static void dpu_mdss_irq(struct irq_desc *desc)
while (interrupts) {
irq_hw_number_t hwirq = fls(interrupts) - 1;
- unsigned int mapping;
int rc;
- mapping = irq_find_mapping(dpu_mdss->irq_controller.domain,
- hwirq);
- if (mapping == 0) {
- DRM_ERROR("couldn't find irq mapping for %lu\n", hwirq);
- break;
- }
-
- rc = generic_handle_irq(mapping);
+ rc = generic_handle_domain_irq(dpu_mdss->irq_controller.domain,
+ hwirq);
if (rc < 0) {
- DRM_ERROR("handle irq fail: irq=%lu mapping=%u rc=%d\n",
- hwirq, mapping, rc);
+ DRM_ERROR("handle irq fail: irq=%lu rc=%d\n",
+ hwirq, rc);
break;
}
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mdss.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mdss.c
index 09bd46ad820b..2f4895bcb0b0 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mdss.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mdss.c
@@ -50,8 +50,7 @@ static irqreturn_t mdss_irq(int irq, void *arg)
while (intr) {
irq_hw_number_t hwirq = fls(intr) - 1;
- generic_handle_irq(irq_find_mapping(
- mdp5_mdss->irqcontroller.domain, hwirq));
+ generic_handle_domain_irq(mdp5_mdss->irqcontroller.domain, hwirq);
intr &= ~(1 << hwirq);
}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index f949767698fc..bcb0310a41b6 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -2237,6 +2237,33 @@ nv50_disp_atomic_commit_tail(struct drm_atomic_state *state)
interlock[NV50_DISP_INTERLOCK_CORE] = 0;
}
+ /* Finish updating head(s)...
+ *
+ * NVD is rather picky about both where window assignments can change,
+ * *and* about certain core and window channel states matching.
+ *
+ * The EFI GOP driver on newer GPUs configures window channels with a
+ * different output format to what we do, and the core channel update
+ * in the assign_windows case above would result in a state mismatch.
+ *
+ * Delay some of the head update until after that point to workaround
+ * the issue. This only affects the initial modeset.
+ *
+ * TODO: handle this better when adding flexible window mapping
+ */
+ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+ struct nv50_head_atom *asyh = nv50_head_atom(new_crtc_state);
+ struct nv50_head *head = nv50_head(crtc);
+
+ NV_ATOMIC(drm, "%s: set %04x (clr %04x)\n", crtc->name,
+ asyh->set.mask, asyh->clr.mask);
+
+ if (asyh->set.mask) {
+ nv50_head_flush_set_wndw(head, asyh);
+ interlock[NV50_DISP_INTERLOCK_CORE] = 1;
+ }
+ }
+
/* Update plane(s). */
for_each_new_plane_in_state(state, plane, new_plane_state, i) {
struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c
index ec361d17e900..d66f97280282 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/head.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/head.c
@@ -50,11 +50,8 @@ nv50_head_flush_clr(struct nv50_head *head,
}
void
-nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh)
+nv50_head_flush_set_wndw(struct nv50_head *head, struct nv50_head_atom *asyh)
{
- if (asyh->set.view ) head->func->view (head, asyh);
- if (asyh->set.mode ) head->func->mode (head, asyh);
- if (asyh->set.core ) head->func->core_set(head, asyh);
if (asyh->set.olut ) {
asyh->olut.offset = nv50_lut_load(&head->olut,
asyh->olut.buffer,
@@ -62,6 +59,14 @@ nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh)
asyh->olut.load);
head->func->olut_set(head, asyh);
}
+}
+
+void
+nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+ if (asyh->set.view ) head->func->view (head, asyh);
+ if (asyh->set.mode ) head->func->mode (head, asyh);
+ if (asyh->set.core ) head->func->core_set(head, asyh);
if (asyh->set.curs ) head->func->curs_set(head, asyh);
if (asyh->set.base ) head->func->base (head, asyh);
if (asyh->set.ovly ) head->func->ovly (head, asyh);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.h b/drivers/gpu/drm/nouveau/dispnv50/head.h
index dae841dc05fd..0bac6be9ba34 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/head.h
+++ b/drivers/gpu/drm/nouveau/dispnv50/head.h
@@ -21,6 +21,7 @@ struct nv50_head {
struct nv50_head *nv50_head_create(struct drm_device *, int index);
void nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh);
+void nv50_head_flush_set_wndw(struct nv50_head *head, struct nv50_head_atom *asyh);
void nv50_head_flush_clr(struct nv50_head *head,
struct nv50_head_atom *asyh, bool flush);
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
index 0b86c44878e0..59759c4fb62e 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
@@ -4,7 +4,8 @@
struct nv_device_v0 {
__u8 version;
- __u8 pad01[7];
+ __u8 priv;
+ __u8 pad02[6];
__u64 device; /* device identifier, ~0 for client default */
};
diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h
index ba2c28ea43d2..c68cc957248e 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/class.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/class.h
@@ -61,8 +61,6 @@
#define NV10_CHANNEL_DMA /* cl506b.h */ 0x0000006e
#define NV17_CHANNEL_DMA /* cl506b.h */ 0x0000176e
#define NV40_CHANNEL_DMA /* cl506b.h */ 0x0000406e
-#define NV50_CHANNEL_DMA /* cl506e.h */ 0x0000506e
-#define G82_CHANNEL_DMA /* cl826e.h */ 0x0000826e
#define NV50_CHANNEL_GPFIFO /* cl506f.h */ 0x0000506f
#define G82_CHANNEL_GPFIFO /* cl826f.h */ 0x0000826f
diff --git a/drivers/gpu/drm/nouveau/include/nvif/client.h b/drivers/gpu/drm/nouveau/include/nvif/client.h
index 347d2c020bd1..5d9395e651b6 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/client.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/client.h
@@ -9,7 +9,6 @@ struct nvif_client {
const struct nvif_driver *driver;
u64 version;
u8 route;
- bool super;
};
int nvif_client_ctor(struct nvif_client *parent, const char *name, u64 device,
diff --git a/drivers/gpu/drm/nouveau/include/nvif/driver.h b/drivers/gpu/drm/nouveau/include/nvif/driver.h
index 8e85b936eaa0..7a3af05f7f98 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/driver.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/driver.h
@@ -11,7 +11,7 @@ struct nvif_driver {
void (*fini)(void *priv);
int (*suspend)(void *priv);
int (*resume)(void *priv);
- int (*ioctl)(void *priv, bool super, void *data, u32 size, void **hack);
+ int (*ioctl)(void *priv, void *data, u32 size, void **hack);
void __iomem *(*map)(void *priv, u64 handle, u32 size);
void (*unmap)(void *priv, void __iomem *ptr, u32 size);
bool keep;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
index 5d7017fe5039..2f86606e708c 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
@@ -13,7 +13,6 @@ struct nvkm_client {
struct nvkm_client_notify *notify[32];
struct rb_root objroot;
- bool super;
void *data;
int (*ntfy)(const void *, u32, const void *, u32);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/ioctl.h b/drivers/gpu/drm/nouveau/include/nvkm/core/ioctl.h
index 71ed147ad077..f52918a43246 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/ioctl.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/ioctl.h
@@ -4,5 +4,5 @@
#include <core/os.h>
struct nvkm_client;
-int nvkm_ioctl(struct nvkm_client *, bool, void *, u32, void **);
+int nvkm_ioctl(struct nvkm_client *, void *, u32, void **);
#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
index 0911e73f7424..70e7887ef4b4 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
@@ -15,7 +15,6 @@ struct nvkm_vma {
u8 refd:3; /* Current page type (index, or NONE for unreferenced). */
bool used:1; /* Region allocated. */
bool part:1; /* Region was split from an allocated region by map(). */
- bool user:1; /* Region user-allocated. */
bool busy:1; /* Region busy (for temporarily preventing user access). */
bool mapped:1; /* Region contains valid pages. */
struct nvkm_memory *memory; /* Memory currently mapped into VMA. */
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index b45ec3086285..4107b7006539 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -570,11 +570,9 @@ nouveau_abi16_ioctl_notifierobj_alloc(ABI16_IOCTL_ARGS)
}
client->route = NVDRM_OBJECT_ABI16;
- client->super = true;
ret = nvif_object_ctor(&chan->chan->user, "abi16Ntfy", info->handle,
NV_DMA_IN_MEMORY, &args, sizeof(args),
&ntfy->object);
- client->super = false;
client->route = NVDRM_OBJECT_NVIF;
if (ret)
goto done;
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index 40362600eed2..80099ef75702 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -86,12 +86,6 @@ nouveau_channel_del(struct nouveau_channel **pchan)
struct nouveau_channel *chan = *pchan;
if (chan) {
struct nouveau_cli *cli = (void *)chan->user.client;
- bool super;
-
- if (cli) {
- super = cli->base.super;
- cli->base.super = true;
- }
if (chan->fence)
nouveau_fence(chan->drm)->context_del(chan);
@@ -111,9 +105,6 @@ nouveau_channel_del(struct nouveau_channel **pchan)
nouveau_bo_unpin(chan->push.buffer);
nouveau_bo_ref(NULL, &chan->push.buffer);
kfree(chan);
-
- if (cli)
- cli->base.super = super;
}
*pchan = NULL;
}
@@ -512,20 +503,16 @@ nouveau_channel_new(struct nouveau_drm *drm, struct nvif_device *device,
struct nouveau_channel **pchan)
{
struct nouveau_cli *cli = (void *)device->object.client;
- bool super;
int ret;
/* hack until fencenv50 is fixed, and agp access relaxed */
- super = cli->base.super;
- cli->base.super = true;
-
ret = nouveau_channel_ind(drm, device, arg0, priv, pchan);
if (ret) {
NV_PRINTK(dbg, cli, "ib channel create, %d\n", ret);
ret = nouveau_channel_dma(drm, device, pchan);
if (ret) {
NV_PRINTK(dbg, cli, "dma channel create, %d\n", ret);
- goto done;
+ return ret;
}
}
@@ -533,15 +520,13 @@ nouveau_channel_new(struct nouveau_drm *drm, struct nvif_device *device,
if (ret) {
NV_PRINTK(err, cli, "channel failed to initialise, %d\n", ret);
nouveau_channel_del(pchan);
- goto done;
+ return ret;
}
ret = nouveau_svmm_join((*pchan)->vmm->svmm, (*pchan)->inst);
if (ret)
nouveau_channel_del(pchan);
-done:
- cli->base.super = super;
return ret;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index a616cf4573b8..ba4cd5f83725 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -244,6 +244,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
ret = nvif_device_ctor(&cli->base.object, "drmDevice", 0, NV_DEVICE,
&(struct nv_device_v0) {
.device = ~0,
+ .priv = true,
}, sizeof(struct nv_device_v0),
&cli->device);
if (ret) {
@@ -1086,8 +1087,6 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv)
if (ret)
goto done;
- cli->base.super = false;
-
fpriv->driver_priv = cli;
mutex_lock(&drm->client.mutex);
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 0de6549fb875..2ca3207c13fc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -41,8 +41,6 @@ nouveau_mem_map(struct nouveau_mem *mem,
struct gf100_vmm_map_v0 gf100;
} args;
u32 argc = 0;
- bool super;
- int ret;
switch (vmm->object.oclass) {
case NVIF_CLASS_VMM_NV04:
@@ -73,12 +71,7 @@ nouveau_mem_map(struct nouveau_mem *mem,
return -ENOSYS;
}
- super = vmm->object.client->super;
- vmm->object.client->super = true;
- ret = nvif_vmm_map(vmm, vma->addr, mem->mem.size, &args, argc,
- &mem->mem, 0);
- vmm->object.client->super = super;
- return ret;
+ return nvif_vmm_map(vmm, vma->addr, mem->mem.size, &args, argc, &mem->mem, 0);
}
void
@@ -99,7 +92,6 @@ nouveau_mem_host(struct ttm_resource *reg, struct ttm_tt *tt)
struct nouveau_drm *drm = cli->drm;
struct nvif_mmu *mmu = &cli->mmu;
struct nvif_mem_ram_v0 args = {};
- bool super = cli->base.super;
u8 type;
int ret;
@@ -122,11 +114,9 @@ nouveau_mem_host(struct ttm_resource *reg, struct ttm_tt *tt)
args.dma = tt->dma_address;
mutex_lock(&drm->master.lock);
- cli->base.super = true;
ret = nvif_mem_ctor_type(mmu, "ttmHostMem", cli->mem->oclass, type, PAGE_SHIFT,
reg->num_pages << PAGE_SHIFT,
&args, sizeof(args), &mem->mem);
- cli->base.super = super;
mutex_unlock(&drm->master.lock);
return ret;
}
@@ -138,12 +128,10 @@ nouveau_mem_vram(struct ttm_resource *reg, bool contig, u8 page)
struct nouveau_cli *cli = mem->cli;
struct nouveau_drm *drm = cli->drm;
struct nvif_mmu *mmu = &cli->mmu;
- bool super = cli->base.super;
u64 size = ALIGN(reg->num_pages << PAGE_SHIFT, 1 << page);
int ret;
mutex_lock(&drm->master.lock);
- cli->base.super = true;
switch (cli->mem->oclass) {
case NVIF_CLASS_MEM_GF100:
ret = nvif_mem_ctor_type(mmu, "ttmVram", cli->mem->oclass,
@@ -167,7 +155,6 @@ nouveau_mem_vram(struct ttm_resource *reg, bool contig, u8 page)
WARN_ON(1);
break;
}
- cli->base.super = super;
mutex_unlock(&drm->master.lock);
reg->start = mem->mem.addr >> PAGE_SHIFT;
diff --git a/drivers/gpu/drm/nouveau/nouveau_nvif.c b/drivers/gpu/drm/nouveau/nouveau_nvif.c
index b3f29b1ce9ea..52f5793b7274 100644
--- a/drivers/gpu/drm/nouveau/nouveau_nvif.c
+++ b/drivers/gpu/drm/nouveau/nouveau_nvif.c
@@ -52,9 +52,9 @@ nvkm_client_map(void *priv, u64 handle, u32 size)
}
static int
-nvkm_client_ioctl(void *priv, bool super, void *data, u32 size, void **hack)
+nvkm_client_ioctl(void *priv, void *data, u32 size, void **hack)
{
- return nvkm_ioctl(priv, super, data, size, hack);
+ return nvkm_ioctl(priv, data, size, hack);
}
static int
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 82b583f5fca8..b0c3422cb01f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -237,14 +237,11 @@ void
nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit)
{
if (limit > start) {
- bool super = svmm->vmm->vmm.object.client->super;
- svmm->vmm->vmm.object.client->super = true;
nvif_object_mthd(&svmm->vmm->vmm.object, NVIF_VMM_V0_PFNCLR,
&(struct nvif_vmm_pfnclr_v0) {
.addr = start,
.size = limit - start,
}, sizeof(struct nvif_vmm_pfnclr_v0));
- svmm->vmm->vmm.object.client->super = super;
}
}
@@ -634,9 +631,7 @@ static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm,
NVIF_VMM_PFNMAP_V0_A |
NVIF_VMM_PFNMAP_V0_HOST;
- svmm->vmm->vmm.object.client->super = true;
ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL);
- svmm->vmm->vmm.object.client->super = false;
mutex_unlock(&svmm->mutex);
unlock_page(page);
@@ -702,9 +697,7 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm,
nouveau_hmm_convert_pfn(drm, &range, args);
- svmm->vmm->vmm.object.client->super = true;
ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL);
- svmm->vmm->vmm.object.client->super = false;
mutex_unlock(&svmm->mutex);
out:
@@ -928,10 +921,8 @@ nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm,
mutex_lock(&svmm->mutex);
- svmm->vmm->vmm.object.client->super = true;
ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, sizeof(*args) +
npages * sizeof(args->p.phys[0]), NULL);
- svmm->vmm->vmm.object.client->super = false;
mutex_unlock(&svmm->mutex);
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_usif.c b/drivers/gpu/drm/nouveau/nouveau_usif.c
index 9dc10b17ad34..5da1f4d223d7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_usif.c
+++ b/drivers/gpu/drm/nouveau/nouveau_usif.c
@@ -32,6 +32,9 @@
#include <nvif/event.h>
#include <nvif/ioctl.h>
+#include <nvif/class.h>
+#include <nvif/cl0080.h>
+
struct usif_notify_p {
struct drm_pending_event base;
struct {
@@ -261,7 +264,7 @@ usif_object_dtor(struct usif_object *object)
}
static int
-usif_object_new(struct drm_file *f, void *data, u32 size, void *argv, u32 argc)
+usif_object_new(struct drm_file *f, void *data, u32 size, void *argv, u32 argc, bool parent_abi16)
{
struct nouveau_cli *cli = nouveau_cli(f);
struct nvif_client *client = &cli->base;
@@ -271,23 +274,48 @@ usif_object_new(struct drm_file *f, void *data, u32 size, void *argv, u32 argc)
struct usif_object *object;
int ret = -ENOSYS;
+ if ((ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true)))
+ return ret;
+
+ switch (args->v0.oclass) {
+ case NV_DMA_FROM_MEMORY:
+ case NV_DMA_TO_MEMORY:
+ case NV_DMA_IN_MEMORY:
+ return -EINVAL;
+ case NV_DEVICE: {
+ union {
+ struct nv_device_v0 v0;
+ } *args = data;
+
+ if ((ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false)))
+ return ret;
+
+ args->v0.priv = false;
+ break;
+ }
+ default:
+ if (!parent_abi16)
+ return -EINVAL;
+ break;
+ }
+
if (!(object = kmalloc(sizeof(*object), GFP_KERNEL)))
return -ENOMEM;
list_add(&object->head, &cli->objects);
- if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
- object->route = args->v0.route;
- object->token = args->v0.token;
- args->v0.route = NVDRM_OBJECT_USIF;
- args->v0.token = (unsigned long)(void *)object;
- ret = nvif_client_ioctl(client, argv, argc);
- args->v0.token = object->token;
- args->v0.route = object->route;
+ object->route = args->v0.route;
+ object->token = args->v0.token;
+ args->v0.route = NVDRM_OBJECT_USIF;
+ args->v0.token = (unsigned long)(void *)object;
+ ret = nvif_client_ioctl(client, argv, argc);
+ if (ret) {
+ usif_object_dtor(object);
+ return ret;
}
- if (ret)
- usif_object_dtor(object);
- return ret;
+ args->v0.token = object->token;
+ args->v0.route = object->route;
+ return 0;
}
int
@@ -301,6 +329,7 @@ usif_ioctl(struct drm_file *filp, void __user *user, u32 argc)
struct nvif_ioctl_v0 v0;
} *argv = data;
struct usif_object *object;
+ bool abi16 = false;
u8 owner;
int ret;
@@ -331,11 +360,13 @@ usif_ioctl(struct drm_file *filp, void __user *user, u32 argc)
mutex_unlock(&cli->mutex);
goto done;
}
+
+ abi16 = true;
}
switch (argv->v0.type) {
case NVIF_IOCTL_V0_NEW:
- ret = usif_object_new(filp, data, size, argv, argc);
+ ret = usif_object_new(filp, data, size, argv, argc, abi16);
break;
case NVIF_IOCTL_V0_NTFY_NEW:
ret = usif_notify_new(filp, data, size, argv, argc);
diff --git a/drivers/gpu/drm/nouveau/nvif/client.c b/drivers/gpu/drm/nouveau/nvif/client.c
index 12644f811b3e..a3264a0e933a 100644
--- a/drivers/gpu/drm/nouveau/nvif/client.c
+++ b/drivers/gpu/drm/nouveau/nvif/client.c
@@ -32,7 +32,7 @@
int
nvif_client_ioctl(struct nvif_client *client, void *data, u32 size)
{
- return client->driver->ioctl(client->object.priv, client->super, data, size, NULL);
+ return client->driver->ioctl(client->object.priv, data, size, NULL);
}
int
@@ -80,7 +80,6 @@ nvif_client_ctor(struct nvif_client *parent, const char *name, u64 device,
client->object.client = client;
client->object.handle = ~0;
client->route = NVIF_IOCTL_V0_ROUTE_NVIF;
- client->super = true;
client->driver = parent->driver;
if (ret == 0) {
diff --git a/drivers/gpu/drm/nouveau/nvif/object.c b/drivers/gpu/drm/nouveau/nvif/object.c
index 671a5c0199e0..dce1ecee2af5 100644
--- a/drivers/gpu/drm/nouveau/nvif/object.c
+++ b/drivers/gpu/drm/nouveau/nvif/object.c
@@ -44,8 +44,7 @@ nvif_object_ioctl(struct nvif_object *object, void *data, u32 size, void **hack)
} else
return -ENOSYS;
- return client->driver->ioctl(client->object.priv, client->super,
- data, size, hack);
+ return client->driver->ioctl(client->object.priv, data, size, hack);
}
void
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
index d777df5a64e6..735cb6816f10 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
@@ -426,8 +426,7 @@ nvkm_ioctl_path(struct nvkm_client *client, u64 handle, u32 type,
}
int
-nvkm_ioctl(struct nvkm_client *client, bool supervisor,
- void *data, u32 size, void **hack)
+nvkm_ioctl(struct nvkm_client *client, void *data, u32 size, void **hack)
{
struct nvkm_object *object = &client->object;
union {
@@ -435,7 +434,6 @@ nvkm_ioctl(struct nvkm_client *client, bool supervisor,
} *args = data;
int ret = -ENOSYS;
- client->super = supervisor;
nvif_ioctl(object, "size %d\n", size);
if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index b930f539feec..93ddf63d1114 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2624,6 +2624,26 @@ nv174_chipset = {
.dma = { 0x00000001, gv100_dma_new },
};
+static const struct nvkm_device_chip
+nv177_chipset = {
+ .name = "GA107",
+ .bar = { 0x00000001, tu102_bar_new },
+ .bios = { 0x00000001, nvkm_bios_new },
+ .devinit = { 0x00000001, ga100_devinit_new },
+ .fb = { 0x00000001, ga102_fb_new },
+ .gpio = { 0x00000001, ga102_gpio_new },
+ .i2c = { 0x00000001, gm200_i2c_new },
+ .imem = { 0x00000001, nv50_instmem_new },
+ .mc = { 0x00000001, ga100_mc_new },
+ .mmu = { 0x00000001, tu102_mmu_new },
+ .pci = { 0x00000001, gp100_pci_new },
+ .privring = { 0x00000001, gm200_privring_new },
+ .timer = { 0x00000001, gk20a_timer_new },
+ .top = { 0x00000001, ga100_top_new },
+ .disp = { 0x00000001, ga102_disp_new },
+ .dma = { 0x00000001, gv100_dma_new },
+};
+
static int
nvkm_device_event_ctor(struct nvkm_object *object, void *data, u32 size,
struct nvkm_notify *notify)
@@ -3049,6 +3069,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
case 0x168: device->chip = &nv168_chipset; break;
case 0x172: device->chip = &nv172_chipset; break;
case 0x174: device->chip = &nv174_chipset; break;
+ case 0x177: device->chip = &nv177_chipset; break;
default:
if (nvkm_boolopt(device->cfgopt, "NvEnableUnsupportedChipsets", false)) {
switch (device->chipset) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
index fea9d8f2b10c..f28894fdede9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
@@ -397,7 +397,7 @@ nvkm_udevice_new(const struct nvkm_oclass *oclass, void *data, u32 size,
return ret;
/* give priviledged clients register access */
- if (client->super)
+ if (args->v0.priv)
func = &nvkm_udevice_super;
else
func = &nvkm_udevice;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
index 55fbfe28c6dc..9669472a2749 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
@@ -440,7 +440,7 @@ nvkm_dp_train(struct nvkm_dp *dp, u32 dataKBps)
return ret;
}
-static void
+void
nvkm_dp_disable(struct nvkm_outp *outp, struct nvkm_ior *ior)
{
struct nvkm_dp *dp = nvkm_dp(outp);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h
index 428b3f488f03..e484d0c3b0d4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h
@@ -32,6 +32,7 @@ struct nvkm_dp {
int nvkm_dp_new(struct nvkm_disp *, int index, struct dcb_output *,
struct nvkm_outp **);
+void nvkm_dp_disable(struct nvkm_outp *, struct nvkm_ior *);
/* DPCD Receiver Capabilities */
#define DPCD_RC00_DPCD_REV 0x00000
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c
index dffcac249211..129982fef7ef 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c
@@ -22,6 +22,7 @@
* Authors: Ben Skeggs
*/
#include "outp.h"
+#include "dp.h"
#include "ior.h"
#include <subdev/bios.h>
@@ -257,6 +258,14 @@ nvkm_outp_init_route(struct nvkm_outp *outp)
if (!ior->arm.head || ior->arm.proto != proto) {
OUTP_DBG(outp, "no heads (%x %d %d)", ior->arm.head,
ior->arm.proto, proto);
+
+ /* The EFI GOP driver on Ampere can leave unused DP links routed,
+ * which we don't expect. The DisableLT IED script *should* get
+ * us back to where we need to be.
+ */
+ if (ior->func->route.get && !ior->arm.head && outp->info.type == DCB_OUTPUT_DP)
+ nvkm_dp_disable(outp, ior);
+
return;
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c
index d20cc0681a88..797131ed7d67 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c
@@ -26,7 +26,6 @@
#include <core/client.h>
#include <core/gpuobj.h>
#include <subdev/fb.h>
-#include <subdev/instmem.h>
#include <nvif/cl0002.h>
#include <nvif/unpack.h>
@@ -72,11 +71,7 @@ nvkm_dmaobj_ctor(const struct nvkm_dmaobj_func *func, struct nvkm_dma *dma,
union {
struct nv_dma_v0 v0;
} *args = *pdata;
- struct nvkm_device *device = dma->engine.subdev.device;
- struct nvkm_client *client = oclass->client;
struct nvkm_object *parent = oclass->parent;
- struct nvkm_instmem *instmem = device->imem;
- struct nvkm_fb *fb = device->fb;
void *data = *pdata;
u32 size = *psize;
int ret = -ENOSYS;
@@ -109,23 +104,13 @@ nvkm_dmaobj_ctor(const struct nvkm_dmaobj_func *func, struct nvkm_dma *dma,
dmaobj->target = NV_MEM_TARGET_VM;
break;
case NV_DMA_V0_TARGET_VRAM:
- if (!client->super) {
- if (dmaobj->limit >= fb->ram->size - instmem->reserved)
- return -EACCES;
- if (device->card_type >= NV_50)
- return -EACCES;
- }
dmaobj->target = NV_MEM_TARGET_VRAM;
break;
case NV_DMA_V0_TARGET_PCI:
- if (!client->super)
- return -EACCES;
dmaobj->target = NV_MEM_TARGET_PCI;
break;
case NV_DMA_V0_TARGET_PCI_US:
case NV_DMA_V0_TARGET_AGP:
- if (!client->super)
- return -EACCES;
dmaobj->target = NV_MEM_TARGET_PCI_NOSNOOP;
break;
default:
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
index 90e9a0972a44..3209eb7af65f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
@@ -27,8 +27,6 @@ nvkm-y += nvkm/engine/fifo/dmanv04.o
nvkm-y += nvkm/engine/fifo/dmanv10.o
nvkm-y += nvkm/engine/fifo/dmanv17.o
nvkm-y += nvkm/engine/fifo/dmanv40.o
-nvkm-y += nvkm/engine/fifo/dmanv50.o
-nvkm-y += nvkm/engine/fifo/dmag84.o
nvkm-y += nvkm/engine/fifo/gpfifonv50.o
nvkm-y += nvkm/engine/fifo/gpfifog84.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h
index af8bdf275552..3a95730d7ff5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h
@@ -48,8 +48,6 @@ void nv50_fifo_chan_object_dtor(struct nvkm_fifo_chan *, int);
int g84_fifo_chan_ctor(struct nv50_fifo *, u64 vmm, u64 push,
const struct nvkm_oclass *, struct nv50_fifo_chan *);
-extern const struct nvkm_fifo_chan_oclass nv50_fifo_dma_oclass;
extern const struct nvkm_fifo_chan_oclass nv50_fifo_gpfifo_oclass;
-extern const struct nvkm_fifo_chan_oclass g84_fifo_dma_oclass;
extern const struct nvkm_fifo_chan_oclass g84_fifo_gpfifo_oclass;
#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c
deleted file mode 100644
index fc34cddcd2f5..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv50.h"
-
-#include <core/client.h>
-#include <core/ramht.h>
-
-#include <nvif/class.h>
-#include <nvif/cl826e.h>
-#include <nvif/unpack.h>
-
-static int
-g84_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
- void *data, u32 size, struct nvkm_object **pobject)
-{
- struct nvkm_object *parent = oclass->parent;
- union {
- struct g82_channel_dma_v0 v0;
- } *args = data;
- struct nv50_fifo *fifo = nv50_fifo(base);
- struct nv50_fifo_chan *chan;
- int ret = -ENOSYS;
-
- nvif_ioctl(parent, "create channel dma size %d\n", size);
- if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
- nvif_ioctl(parent, "create channel dma vers %d vmm %llx "
- "pushbuf %llx offset %016llx\n",
- args->v0.version, args->v0.vmm, args->v0.pushbuf,
- args->v0.offset);
- if (!args->v0.pushbuf)
- return -EINVAL;
- } else
- return ret;
-
- if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
- return -ENOMEM;
- *pobject = &chan->base.object;
-
- ret = g84_fifo_chan_ctor(fifo, args->v0.vmm, args->v0.pushbuf,
- oclass, chan);
- if (ret)
- return ret;
-
- args->v0.chid = chan->base.chid;
-
- nvkm_kmap(chan->ramfc);
- nvkm_wo32(chan->ramfc, 0x08, lower_32_bits(args->v0.offset));
- nvkm_wo32(chan->ramfc, 0x0c, upper_32_bits(args->v0.offset));
- nvkm_wo32(chan->ramfc, 0x10, lower_32_bits(args->v0.offset));
- nvkm_wo32(chan->ramfc, 0x14, upper_32_bits(args->v0.offset));
- nvkm_wo32(chan->ramfc, 0x3c, 0x003f6078);
- nvkm_wo32(chan->ramfc, 0x44, 0x01003fff);
- nvkm_wo32(chan->ramfc, 0x48, chan->base.push->node->offset >> 4);
- nvkm_wo32(chan->ramfc, 0x4c, 0xffffffff);
- nvkm_wo32(chan->ramfc, 0x60, 0x7fffffff);
- nvkm_wo32(chan->ramfc, 0x78, 0x00000000);
- nvkm_wo32(chan->ramfc, 0x7c, 0x30000001);
- nvkm_wo32(chan->ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
- (4 << 24) /* SEARCH_FULL */ |
- (chan->ramht->gpuobj->node->offset >> 4));
- nvkm_wo32(chan->ramfc, 0x88, chan->cache->addr >> 10);
- nvkm_wo32(chan->ramfc, 0x98, chan->base.inst->addr >> 12);
- nvkm_done(chan->ramfc);
- return 0;
-}
-
-const struct nvkm_fifo_chan_oclass
-g84_fifo_dma_oclass = {
- .base.oclass = G82_CHANNEL_DMA,
- .base.minver = 0,
- .base.maxver = 0,
- .ctor = g84_fifo_dma_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c
deleted file mode 100644
index 8043718ad150..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv50.h"
-
-#include <core/client.h>
-#include <core/ramht.h>
-
-#include <nvif/class.h>
-#include <nvif/cl506e.h>
-#include <nvif/unpack.h>
-
-static int
-nv50_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
- void *data, u32 size, struct nvkm_object **pobject)
-{
- struct nvkm_object *parent = oclass->parent;
- union {
- struct nv50_channel_dma_v0 v0;
- } *args = data;
- struct nv50_fifo *fifo = nv50_fifo(base);
- struct nv50_fifo_chan *chan;
- int ret = -ENOSYS;
-
- nvif_ioctl(parent, "create channel dma size %d\n", size);
- if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
- nvif_ioctl(parent, "create channel dma vers %d vmm %llx "
- "pushbuf %llx offset %016llx\n",
- args->v0.version, args->v0.vmm, args->v0.pushbuf,
- args->v0.offset);
- if (!args->v0.pushbuf)
- return -EINVAL;
- } else
- return ret;
-
- if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
- return -ENOMEM;
- *pobject = &chan->base.object;
-
- ret = nv50_fifo_chan_ctor(fifo, args->v0.vmm, args->v0.pushbuf,
- oclass, chan);
- if (ret)
- return ret;
-
- args->v0.chid = chan->base.chid;
-
- nvkm_kmap(chan->ramfc);
- nvkm_wo32(chan->ramfc, 0x08, lower_32_bits(args->v0.offset));
- nvkm_wo32(chan->ramfc, 0x0c, upper_32_bits(args->v0.offset));
- nvkm_wo32(chan->ramfc, 0x10, lower_32_bits(args->v0.offset));
- nvkm_wo32(chan->ramfc, 0x14, upper_32_bits(args->v0.offset));
- nvkm_wo32(chan->ramfc, 0x3c, 0x003f6078);
- nvkm_wo32(chan->ramfc, 0x44, 0x01003fff);
- nvkm_wo32(chan->ramfc, 0x48, chan->base.push->node->offset >> 4);
- nvkm_wo32(chan->ramfc, 0x4c, 0xffffffff);
- nvkm_wo32(chan->ramfc, 0x60, 0x7fffffff);
- nvkm_wo32(chan->ramfc, 0x78, 0x00000000);
- nvkm_wo32(chan->ramfc, 0x7c, 0x30000001);
- nvkm_wo32(chan->ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
- (4 << 24) /* SEARCH_FULL */ |
- (chan->ramht->gpuobj->node->offset >> 4));
- nvkm_done(chan->ramfc);
- return 0;
-}
-
-const struct nvkm_fifo_chan_oclass
-nv50_fifo_dma_oclass = {
- .base.oclass = NV50_CHANNEL_DMA,
- .base.minver = 0,
- .base.maxver = 0,
- .ctor = nv50_fifo_dma_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c
index c0a7d0f21dac..3885c3830b94 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c
@@ -119,7 +119,6 @@ g84_fifo = {
.uevent_init = g84_fifo_uevent_init,
.uevent_fini = g84_fifo_uevent_fini,
.chan = {
- &g84_fifo_dma_oclass,
&g84_fifo_gpfifo_oclass,
NULL
},
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
index b6900a52bcce..ae6c4d846eb5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
@@ -341,8 +341,6 @@ gk104_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
"runlist %016llx priv %d\n",
args->v0.version, args->v0.vmm, args->v0.ioffset,
args->v0.ilength, args->v0.runlist, args->v0.priv);
- if (args->v0.priv && !oclass->client->super)
- return -EINVAL;
return gk104_fifo_gpfifo_new_(fifo,
&args->v0.runlist,
&args->v0.chid,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c
index ee4967b706a7..743791c514fe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c
@@ -226,8 +226,6 @@ gv100_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
"runlist %016llx priv %d\n",
args->v0.version, args->v0.vmm, args->v0.ioffset,
args->v0.ilength, args->v0.runlist, args->v0.priv);
- if (args->v0.priv && !oclass->client->super)
- return -EINVAL;
return gv100_fifo_gpfifo_new_(&gv100_fifo_gpfifo, fifo,
&args->v0.runlist,
&args->v0.chid,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c
index abef7fb6e2d3..99aafa103a31 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c
@@ -65,8 +65,6 @@ tu102_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
"runlist %016llx priv %d\n",
args->v0.version, args->v0.vmm, args->v0.ioffset,
args->v0.ilength, args->v0.runlist, args->v0.priv);
- if (args->v0.priv && !oclass->client->super)
- return -EINVAL;
return gv100_fifo_gpfifo_new_(&tu102_fifo_gpfifo, fifo,
&args->v0.runlist,
&args->v0.chid,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c
index be94156ea248..a08742cf425a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c
@@ -136,7 +136,6 @@ nv50_fifo = {
.pause = nv04_fifo_pause,
.start = nv04_fifo_start,
.chan = {
- &nv50_fifo_dma_oclass,
&nv50_fifo_gpfifo_oclass,
NULL
},
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c
index fac2f9a45ea6..e530bb8b3b17 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c
@@ -41,7 +41,7 @@ nvkm_umem_search(struct nvkm_client *client, u64 handle)
object = nvkm_object_search(client, handle, &nvkm_umem);
if (IS_ERR(object)) {
- if (client->super && client != master) {
+ if (client != master) {
spin_lock(&master->lock);
list_for_each_entry(umem, &master->umem, head) {
if (umem->object.object == handle) {
@@ -53,8 +53,7 @@ nvkm_umem_search(struct nvkm_client *client, u64 handle)
}
} else {
umem = nvkm_umem(object);
- if (!umem->priv || client->super)
- memory = nvkm_memory_ref(umem->memory);
+ memory = nvkm_memory_ref(umem->memory);
}
return memory ? memory : ERR_PTR(-ENOENT);
@@ -167,7 +166,6 @@ nvkm_umem_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
nvkm_object_ctor(&nvkm_umem, oclass, &umem->object);
umem->mmu = mmu;
umem->type = mmu->type[type].type;
- umem->priv = oclass->client->super;
INIT_LIST_HEAD(&umem->head);
*pobject = &umem->object;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h
index 85cf692d620a..d56a594016cc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h
@@ -8,7 +8,6 @@ struct nvkm_umem {
struct nvkm_object object;
struct nvkm_mmu *mmu;
u8 type:8;
- bool priv:1;
bool mappable:1;
bool io:1;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c
index 0e4b8941da37..6870fda4b188 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c
@@ -34,7 +34,7 @@ nvkm_ummu_sclass(struct nvkm_object *object, int index,
{
struct nvkm_mmu *mmu = nvkm_ummu(object)->mmu;
- if (mmu->func->mem.user.oclass && oclass->client->super) {
+ if (mmu->func->mem.user.oclass) {
if (index-- == 0) {
oclass->base = mmu->func->mem.user;
oclass->ctor = nvkm_umem_new;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
index c43b8248c682..d6a1f8d04c09 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
@@ -45,7 +45,6 @@ nvkm_uvmm_search(struct nvkm_client *client, u64 handle)
static int
nvkm_uvmm_mthd_pfnclr(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
{
- struct nvkm_client *client = uvmm->object.client;
union {
struct nvif_vmm_pfnclr_v0 v0;
} *args = argv;
@@ -59,9 +58,6 @@ nvkm_uvmm_mthd_pfnclr(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
} else
return ret;
- if (!client->super)
- return -ENOENT;
-
if (size) {
mutex_lock(&vmm->mutex);
ret = nvkm_vmm_pfn_unmap(vmm, addr, size);
@@ -74,7 +70,6 @@ nvkm_uvmm_mthd_pfnclr(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
static int
nvkm_uvmm_mthd_pfnmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
{
- struct nvkm_client *client = uvmm->object.client;
union {
struct nvif_vmm_pfnmap_v0 v0;
} *args = argv;
@@ -93,9 +88,6 @@ nvkm_uvmm_mthd_pfnmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
} else
return ret;
- if (!client->super)
- return -ENOENT;
-
if (size) {
mutex_lock(&vmm->mutex);
ret = nvkm_vmm_pfn_map(vmm, page, addr, size, phys);
@@ -108,7 +100,6 @@ nvkm_uvmm_mthd_pfnmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
static int
nvkm_uvmm_mthd_unmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
{
- struct nvkm_client *client = uvmm->object.client;
union {
struct nvif_vmm_unmap_v0 v0;
} *args = argv;
@@ -130,9 +121,8 @@ nvkm_uvmm_mthd_unmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
goto done;
}
- if (ret = -ENOENT, (!vma->user && !client->super) || vma->busy) {
- VMM_DEBUG(vmm, "denied %016llx: %d %d %d", addr,
- vma->user, !client->super, vma->busy);
+ if (ret = -ENOENT, vma->busy) {
+ VMM_DEBUG(vmm, "denied %016llx: %d", addr, vma->busy);
goto done;
}
@@ -181,9 +171,8 @@ nvkm_uvmm_mthd_map(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
goto fail;
}
- if (ret = -ENOENT, (!vma->user && !client->super) || vma->busy) {
- VMM_DEBUG(vmm, "denied %016llx: %d %d %d", addr,
- vma->user, !client->super, vma->busy);
+ if (ret = -ENOENT, vma->busy) {
+ VMM_DEBUG(vmm, "denied %016llx: %d", addr, vma->busy);
goto fail;
}
@@ -230,7 +219,6 @@ fail:
static int
nvkm_uvmm_mthd_put(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
{
- struct nvkm_client *client = uvmm->object.client;
union {
struct nvif_vmm_put_v0 v0;
} *args = argv;
@@ -252,9 +240,8 @@ nvkm_uvmm_mthd_put(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
goto done;
}
- if (ret = -ENOENT, (!vma->user && !client->super) || vma->busy) {
- VMM_DEBUG(vmm, "denied %016llx: %d %d %d", addr,
- vma->user, !client->super, vma->busy);
+ if (ret = -ENOENT, vma->busy) {
+ VMM_DEBUG(vmm, "denied %016llx: %d", addr, vma->busy);
goto done;
}
@@ -268,7 +255,6 @@ done:
static int
nvkm_uvmm_mthd_get(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
{
- struct nvkm_client *client = uvmm->object.client;
union {
struct nvif_vmm_get_v0 v0;
} *args = argv;
@@ -297,7 +283,6 @@ nvkm_uvmm_mthd_get(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
return ret;
args->v0.addr = vma->addr;
- vma->user = !client->super;
return ret;
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
index 710f3f8dc7c9..8bf00b396ec1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
@@ -774,7 +774,6 @@ nvkm_vma_tail(struct nvkm_vma *vma, u64 tail)
new->refd = vma->refd;
new->used = vma->used;
new->part = vma->part;
- new->user = vma->user;
new->busy = vma->busy;
new->mapped = vma->mapped;
list_add(&new->head, &vma->head);
@@ -951,7 +950,7 @@ nvkm_vmm_node_split(struct nvkm_vmm *vmm,
static void
nvkm_vma_dump(struct nvkm_vma *vma)
{
- printk(KERN_ERR "%016llx %016llx %c%c%c%c%c%c%c%c%c %p\n",
+ printk(KERN_ERR "%016llx %016llx %c%c%c%c%c%c%c%c %p\n",
vma->addr, (u64)vma->size,
vma->used ? '-' : 'F',
vma->mapref ? 'R' : '-',
@@ -959,7 +958,6 @@ nvkm_vma_dump(struct nvkm_vma *vma)
vma->page != NVKM_VMA_PAGE_NONE ? '0' + vma->page : '-',
vma->refd != NVKM_VMA_PAGE_NONE ? '0' + vma->refd : '-',
vma->part ? 'P' : '-',
- vma->user ? 'U' : '-',
vma->busy ? 'B' : '-',
vma->mapped ? 'M' : '-',
vma->memory);
@@ -1024,7 +1022,6 @@ nvkm_vmm_ctor_managed(struct nvkm_vmm *vmm, u64 addr, u64 size)
vma->mapref = true;
vma->sparse = false;
vma->used = true;
- vma->user = true;
nvkm_vmm_node_insert(vmm, vma);
list_add_tail(&vma->head, &vmm->list);
return 0;
@@ -1615,7 +1612,6 @@ nvkm_vmm_put_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
vma->page = NVKM_VMA_PAGE_NONE;
vma->refd = NVKM_VMA_PAGE_NONE;
vma->used = false;
- vma->user = false;
nvkm_vmm_put_region(vmm, vma);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
index f02abd9cb4dd..b5e733783b5b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
@@ -534,15 +534,13 @@ int
gp100_vmm_mthd(struct nvkm_vmm *vmm,
struct nvkm_client *client, u32 mthd, void *argv, u32 argc)
{
- if (client->super) {
- switch (mthd) {
- case GP100_VMM_VN_FAULT_REPLAY:
- return gp100_vmm_fault_replay(vmm, argv, argc);
- case GP100_VMM_VN_FAULT_CANCEL:
- return gp100_vmm_fault_cancel(vmm, argv, argc);
- default:
- break;
- }
+ switch (mthd) {
+ case GP100_VMM_VN_FAULT_REPLAY:
+ return gp100_vmm_fault_replay(vmm, argv, argc);
+ case GP100_VMM_VN_FAULT_CANCEL:
+ return gp100_vmm_fault_cancel(vmm, argv, argc);
+ default:
+ break;
}
return -EINVAL;
}
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 74e3b460132b..2df59b3c2ea1 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -78,9 +78,7 @@ static int ttm_global_init(void)
ttm_debugfs_root = debugfs_create_dir("ttm", NULL);
if (IS_ERR(ttm_debugfs_root)) {
- ret = PTR_ERR(ttm_debugfs_root);
ttm_debugfs_root = NULL;
- goto out;
}
/* Limit the number of pages in the pool to about 50% of the total
diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index d166ee262ce4..118318513e2d 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -1003,19 +1003,16 @@ err_cpmem:
static void ipu_irq_handle(struct ipu_soc *ipu, const int *regs, int num_regs)
{
unsigned long status;
- int i, bit, irq;
+ int i, bit;
for (i = 0; i < num_regs; i++) {
status = ipu_cm_read(ipu, IPU_INT_STAT(regs[i]));
status &= ipu_cm_read(ipu, IPU_INT_CTRL(regs[i]));
- for_each_set_bit(bit, &status, 32) {
- irq = irq_linear_revmap(ipu->domain,
- regs[i] * 32 + bit);
- if (irq)
- generic_handle_irq(irq);
- }
+ for_each_set_bit(bit, &status, 32)
+ generic_handle_domain_irq(ipu->domain,
+ regs[i] * 32 + bit);
}
}
diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c
index a1c85d1521f5..82b244cb313e 100644
--- a/drivers/gpu/ipu-v3/ipu-cpmem.c
+++ b/drivers/gpu/ipu-v3/ipu-cpmem.c
@@ -585,21 +585,21 @@ static const struct ipu_rgb def_bgra_16 = {
.bits_per_pixel = 16,
};
-#define Y_OFFSET(pix, x, y) ((x) + pix->width * (y))
-#define U_OFFSET(pix, x, y) ((pix->width * pix->height) + \
- (pix->width * ((y) / 2) / 2) + (x) / 2)
-#define V_OFFSET(pix, x, y) ((pix->width * pix->height) + \
- (pix->width * pix->height / 4) + \
- (pix->width * ((y) / 2) / 2) + (x) / 2)
-#define U2_OFFSET(pix, x, y) ((pix->width * pix->height) + \
- (pix->width * (y) / 2) + (x) / 2)
-#define V2_OFFSET(pix, x, y) ((pix->width * pix->height) + \
- (pix->width * pix->height / 2) + \
- (pix->width * (y) / 2) + (x) / 2)
-#define UV_OFFSET(pix, x, y) ((pix->width * pix->height) + \
- (pix->width * ((y) / 2)) + (x))
-#define UV2_OFFSET(pix, x, y) ((pix->width * pix->height) + \
- (pix->width * y) + (x))
+#define Y_OFFSET(pix, x, y) ((x) + pix->bytesperline * (y))
+#define U_OFFSET(pix, x, y) ((pix->bytesperline * pix->height) + \
+ (pix->bytesperline * ((y) / 2) / 2) + (x) / 2)
+#define V_OFFSET(pix, x, y) ((pix->bytesperline * pix->height) + \
+ (pix->bytesperline * pix->height / 4) + \
+ (pix->bytesperline * ((y) / 2) / 2) + (x) / 2)
+#define U2_OFFSET(pix, x, y) ((pix->bytesperline * pix->height) + \
+ (pix->bytesperline * (y) / 2) + (x) / 2)
+#define V2_OFFSET(pix, x, y) ((pix->bytesperline * pix->height) + \
+ (pix->bytesperline * pix->height / 2) + \
+ (pix->bytesperline * (y) / 2) + (x) / 2)
+#define UV_OFFSET(pix, x, y) ((pix->bytesperline * pix->height) + \
+ (pix->bytesperline * ((y) / 2)) + (x))
+#define UV2_OFFSET(pix, x, y) ((pix->bytesperline * pix->height) + \
+ (pix->bytesperline * y) + (x))
#define NUM_ALPHA_CHANNELS 7
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index e3675377bc5d..c4578e8f34bb 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -254,6 +254,16 @@ config SENSORS_AHT10
This driver can also be built as a module. If so, the module
will be called aht10.
+config SENSORS_AQUACOMPUTER_D5NEXT
+ tristate "Aquacomputer D5 Next watercooling pump"
+ depends on USB_HID
+ help
+ If you say yes here you get support for the Aquacomputer D5 Next
+ watercooling pump sensors.
+
+ This driver can also be built as a module. If so, the module
+ will be called aquacomputer_d5next.
+
config SENSORS_AS370
tristate "Synaptics AS370 SoC hardware monitoring driver"
help
@@ -1551,6 +1561,16 @@ config SENSORS_SBTSI
This driver can also be built as a module. If so, the module will
be called sbtsi_temp.
+config SENSORS_SBRMI
+ tristate "Emulated SB-RMI sensor"
+ depends on I2C
+ help
+ If you say yes here you get support for emulated RMI
+ sensors on AMD SoCs with APML interface connected to a BMC device.
+
+ This driver can also be built as a module. If so, the module will
+ be called sbrmi.
+
config SENSORS_SHT15
tristate "Sensiron humidity and temperature sensors. SHT15 and compat."
depends on GPIOLIB || COMPILE_TEST
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index d712c61c1f5e..162940270661 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -45,8 +45,8 @@ obj-$(CONFIG_SENSORS_ADT7462) += adt7462.o
obj-$(CONFIG_SENSORS_ADT7470) += adt7470.o
obj-$(CONFIG_SENSORS_ADT7475) += adt7475.o
obj-$(CONFIG_SENSORS_AHT10) += aht10.o
-obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
obj-$(CONFIG_SENSORS_APPLESMC) += applesmc.o
+obj-$(CONFIG_SENSORS_AQUACOMPUTER_D5NEXT) += aquacomputer_d5next.o
obj-$(CONFIG_SENSORS_ARM_SCMI) += scmi-hwmon.o
obj-$(CONFIG_SENSORS_ARM_SCPI) += scpi-hwmon.o
obj-$(CONFIG_SENSORS_AS370) += as370-hwmon.o
@@ -164,6 +164,7 @@ obj-$(CONFIG_SENSORS_PWM_FAN) += pwm-fan.o
obj-$(CONFIG_SENSORS_RASPBERRYPI_HWMON) += raspberrypi-hwmon.o
obj-$(CONFIG_SENSORS_S3C) += s3c-hwmon.o
obj-$(CONFIG_SENSORS_SBTSI) += sbtsi_temp.o
+obj-$(CONFIG_SENSORS_SBRMI) += sbrmi.o
obj-$(CONFIG_SENSORS_SCH56XX_COMMON)+= sch56xx-common.o
obj-$(CONFIG_SENSORS_SCH5627) += sch5627.o
obj-$(CONFIG_SENSORS_SCH5636) += sch5636.o
diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c
index 2e8feacccf84..d519aca4a9d6 100644
--- a/drivers/hwmon/adt7470.c
+++ b/drivers/hwmon/adt7470.c
@@ -18,6 +18,7 @@
#include <linux/delay.h>
#include <linux/log2.h>
#include <linux/kthread.h>
+#include <linux/regmap.h>
#include <linux/slab.h>
#include <linux/util_macros.h>
@@ -35,7 +36,10 @@ static const unsigned short normal_i2c[] = { 0x2C, 0x2E, 0x2F, I2C_CLIENT_END };
#define ADT7470_REG_PWM_MAX_BASE_ADDR 0x38
#define ADT7470_REG_PWM_MAX_MAX_ADDR 0x3B
#define ADT7470_REG_CFG 0x40
+#define ADT7470_STRT_MASK 0x01
+#define ADT7470_TEST_MASK 0x02
#define ADT7470_FSPD_MASK 0x04
+#define ADT7470_T05_STB_MASK 0x80
#define ADT7470_REG_ALARM1 0x41
#define ADT7470_R1T_ALARM 0x01
#define ADT7470_R2T_ALARM 0x02
@@ -137,7 +141,7 @@ static const unsigned short normal_i2c[] = { 0x2C, 0x2E, 0x2F, I2C_CLIENT_END };
#define ADT7470_FREQ_SHIFT 4
struct adt7470_data {
- struct i2c_client *client;
+ struct regmap *regmap;
struct mutex lock;
char sensors_valid;
char limits_valid;
@@ -171,51 +175,76 @@ struct adt7470_data {
* 16-bit registers on the ADT7470 are low-byte first. The data sheet says
* that the low byte must be read before the high byte.
*/
-static inline int adt7470_read_word_data(struct i2c_client *client, u8 reg)
+static inline int adt7470_read_word_data(struct adt7470_data *data, unsigned int reg,
+ unsigned int *val)
{
- u16 foo;
- foo = i2c_smbus_read_byte_data(client, reg);
- foo |= ((u16)i2c_smbus_read_byte_data(client, reg + 1) << 8);
- return foo;
+ u8 regval[2];
+ int err;
+
+ err = regmap_bulk_read(data->regmap, reg, &regval, 2);
+ if (err < 0)
+ return err;
+
+ *val = regval[0] | (regval[1] << 8);
+
+ return 0;
}
-static inline int adt7470_write_word_data(struct i2c_client *client, u8 reg,
- u16 value)
+static inline int adt7470_write_word_data(struct adt7470_data *data, unsigned int reg,
+ unsigned int val)
{
- return i2c_smbus_write_byte_data(client, reg, value & 0xFF)
- || i2c_smbus_write_byte_data(client, reg + 1, value >> 8);
+ u8 regval[2];
+
+ regval[0] = val & 0xFF;
+ regval[1] = val >> 8;
+
+ return regmap_bulk_write(data->regmap, reg, &regval, 2);
}
/* Probe for temperature sensors. Assumes lock is held */
-static int adt7470_read_temperatures(struct i2c_client *client,
- struct adt7470_data *data)
+static int adt7470_read_temperatures(struct adt7470_data *data)
{
unsigned long res;
+ unsigned int pwm_cfg[2];
+ int err;
int i;
- u8 cfg, pwm[4], pwm_cfg[2];
+ u8 pwm[ADT7470_FAN_COUNT];
/* save pwm[1-4] config register */
- pwm_cfg[0] = i2c_smbus_read_byte_data(client, ADT7470_REG_PWM_CFG(0));
- pwm_cfg[1] = i2c_smbus_read_byte_data(client, ADT7470_REG_PWM_CFG(2));
+ err = regmap_read(data->regmap, ADT7470_REG_PWM_CFG(0), &pwm_cfg[0]);
+ if (err < 0)
+ return err;
+ err = regmap_read(data->regmap, ADT7470_REG_PWM_CFG(2), &pwm_cfg[1]);
+ if (err < 0)
+ return err;
/* set manual pwm to whatever it is set to now */
- for (i = 0; i < ADT7470_FAN_COUNT; i++)
- pwm[i] = i2c_smbus_read_byte_data(client, ADT7470_REG_PWM(i));
+ err = regmap_bulk_read(data->regmap, ADT7470_REG_PWM(0), &pwm[0],
+ ADT7470_PWM_COUNT);
+ if (err < 0)
+ return err;
/* put pwm in manual mode */
- i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(0),
- pwm_cfg[0] & ~(ADT7470_PWM_AUTO_MASK));
- i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(2),
- pwm_cfg[1] & ~(ADT7470_PWM_AUTO_MASK));
+ err = regmap_update_bits(data->regmap, ADT7470_REG_PWM_CFG(0),
+ ADT7470_PWM_AUTO_MASK, 0);
+ if (err < 0)
+ return err;
+ err = regmap_update_bits(data->regmap, ADT7470_REG_PWM_CFG(2),
+ ADT7470_PWM_AUTO_MASK, 0);
+ if (err < 0)
+ return err;
/* write pwm control to whatever it was */
- for (i = 0; i < ADT7470_FAN_COUNT; i++)
- i2c_smbus_write_byte_data(client, ADT7470_REG_PWM(i), pwm[i]);
+ err = regmap_bulk_write(data->regmap, ADT7470_REG_PWM(0), &pwm[0],
+ ADT7470_PWM_COUNT);
+ if (err < 0)
+ return err;
/* start reading temperature sensors */
- cfg = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG);
- cfg |= 0x80;
- i2c_smbus_write_byte_data(client, ADT7470_REG_CFG, cfg);
+ err = regmap_update_bits(data->regmap, ADT7470_REG_CFG,
+ ADT7470_T05_STB_MASK, ADT7470_T05_STB_MASK);
+ if (err < 0)
+ return err;
/* Delay is 200ms * number of temp sensors. */
res = msleep_interruptible((data->num_temp_sensors >= 0 ?
@@ -223,26 +252,31 @@ static int adt7470_read_temperatures(struct i2c_client *client,
TEMP_COLLECTION_TIME));
/* done reading temperature sensors */
- cfg = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG);
- cfg &= ~0x80;
- i2c_smbus_write_byte_data(client, ADT7470_REG_CFG, cfg);
+ err = regmap_update_bits(data->regmap, ADT7470_REG_CFG,
+ ADT7470_T05_STB_MASK, 0);
+ if (err < 0)
+ return err;
/* restore pwm[1-4] config registers */
- i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(0), pwm_cfg[0]);
- i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(2), pwm_cfg[1]);
-
- if (res) {
- pr_err("ha ha, interrupted\n");
+ err = regmap_write(data->regmap, ADT7470_REG_PWM_CFG(0), pwm_cfg[0]);
+ if (err < 0)
+ return err;
+ err = regmap_write(data->regmap, ADT7470_REG_PWM_CFG(2), pwm_cfg[1]);
+ if (err < 0)
+ return err;
+
+ if (res)
return -EAGAIN;
- }
/* Only count fans if we have to */
if (data->num_temp_sensors >= 0)
return 0;
+ err = regmap_bulk_read(data->regmap, ADT7470_TEMP_REG(0), &data->temp[0],
+ ADT7470_TEMP_COUNT);
+ if (err < 0)
+ return err;
for (i = 0; i < ADT7470_TEMP_COUNT; i++) {
- data->temp[i] = i2c_smbus_read_byte_data(client,
- ADT7470_TEMP_REG(i));
if (data->temp[i])
data->num_temp_sensors = i + 1;
}
@@ -257,7 +291,7 @@ static int adt7470_update_thread(void *p)
while (!kthread_should_stop()) {
mutex_lock(&data->lock);
- adt7470_read_temperatures(client, data);
+ adt7470_read_temperatures(data);
mutex_unlock(&data->lock);
set_current_state(TASK_INTERRUPTIBLE);
@@ -272,89 +306,116 @@ static int adt7470_update_thread(void *p)
static int adt7470_update_sensors(struct adt7470_data *data)
{
- struct i2c_client *client = data->client;
- u8 cfg;
+ unsigned int val;
+ int err;
int i;
if (!data->temperatures_probed)
- adt7470_read_temperatures(client, data);
+ err = adt7470_read_temperatures(data);
else
- for (i = 0; i < ADT7470_TEMP_COUNT; i++)
- data->temp[i] = i2c_smbus_read_byte_data(client,
- ADT7470_TEMP_REG(i));
+ err = regmap_bulk_read(data->regmap, ADT7470_TEMP_REG(0), &data->temp[0],
+ ADT7470_TEMP_COUNT);
+ if (err < 0)
+ return err;
- for (i = 0; i < ADT7470_FAN_COUNT; i++)
- data->fan[i] = adt7470_read_word_data(client,
- ADT7470_REG_FAN(i));
+ for (i = 0; i < ADT7470_FAN_COUNT; i++) {
+ err = adt7470_read_word_data(data, ADT7470_REG_FAN(i), &val);
+ if (err < 0)
+ return err;
+ data->fan[i] = val;
+ }
- for (i = 0; i < ADT7470_PWM_COUNT; i++) {
- int reg;
- int reg_mask;
+ err = regmap_bulk_read(data->regmap, ADT7470_REG_PWM(0), &data->pwm[0], ADT7470_PWM_COUNT);
+ if (err < 0)
+ return err;
- data->pwm[i] = i2c_smbus_read_byte_data(client,
- ADT7470_REG_PWM(i));
+ for (i = 0; i < ADT7470_PWM_COUNT; i++) {
+ unsigned int mask;
if (i % 2)
- reg_mask = ADT7470_PWM2_AUTO_MASK;
+ mask = ADT7470_PWM2_AUTO_MASK;
else
- reg_mask = ADT7470_PWM1_AUTO_MASK;
+ mask = ADT7470_PWM1_AUTO_MASK;
- reg = ADT7470_REG_PWM_CFG(i);
- if (i2c_smbus_read_byte_data(client, reg) & reg_mask)
- data->pwm_automatic[i] = 1;
- else
- data->pwm_automatic[i] = 0;
+ err = regmap_read(data->regmap, ADT7470_REG_PWM_CFG(i), &val);
+ if (err < 0)
+ return err;
+ data->pwm_automatic[i] = !!(val & mask);
- reg = ADT7470_REG_PWM_AUTO_TEMP(i);
- cfg = i2c_smbus_read_byte_data(client, reg);
+ err = regmap_read(data->regmap, ADT7470_REG_PWM_AUTO_TEMP(i), &val);
+ if (err < 0)
+ return err;
if (!(i % 2))
- data->pwm_auto_temp[i] = cfg >> 4;
+ data->pwm_auto_temp[i] = val >> 4;
else
- data->pwm_auto_temp[i] = cfg & 0xF;
+ data->pwm_auto_temp[i] = val & 0xF;
}
- if (i2c_smbus_read_byte_data(client, ADT7470_REG_CFG) &
- ADT7470_FSPD_MASK)
- data->force_pwm_max = 1;
- else
- data->force_pwm_max = 0;
+ err = regmap_read(data->regmap, ADT7470_REG_CFG, &val);
+ if (err < 0)
+ return err;
+ data->force_pwm_max = !!(val & ADT7470_FSPD_MASK);
+
+ err = regmap_read(data->regmap, ADT7470_REG_ALARM1, &val);
+ if (err < 0)
+ return err;
+ data->alarm = val;
+ if (data->alarm & ADT7470_OOL_ALARM) {
+ err = regmap_read(data->regmap, ADT7470_REG_ALARM2, &val);
+ if (err < 0)
+ return err;
+ data->alarm |= ALARM2(val);
+ }
- data->alarm = i2c_smbus_read_byte_data(client, ADT7470_REG_ALARM1);
- if (data->alarm & ADT7470_OOL_ALARM)
- data->alarm |= ALARM2(i2c_smbus_read_byte_data(client,
- ADT7470_REG_ALARM2));
- data->alarms_mask = adt7470_read_word_data(client,
- ADT7470_REG_ALARM1_MASK);
+ err = adt7470_read_word_data(data, ADT7470_REG_ALARM1_MASK, &val);
+ if (err < 0)
+ return err;
+ data->alarms_mask = val;
return 0;
}
static int adt7470_update_limits(struct adt7470_data *data)
{
- struct i2c_client *client = data->client;
+ unsigned int val;
+ int err;
int i;
for (i = 0; i < ADT7470_TEMP_COUNT; i++) {
- data->temp_min[i] = i2c_smbus_read_byte_data(client,
- ADT7470_TEMP_MIN_REG(i));
- data->temp_max[i] = i2c_smbus_read_byte_data(client,
- ADT7470_TEMP_MAX_REG(i));
+ err = regmap_read(data->regmap, ADT7470_TEMP_MIN_REG(i), &val);
+ if (err < 0)
+ return err;
+ data->temp_min[i] = (s8)val;
+ err = regmap_read(data->regmap, ADT7470_TEMP_MAX_REG(i), &val);
+ if (err < 0)
+ return err;
+ data->temp_max[i] = (s8)val;
}
for (i = 0; i < ADT7470_FAN_COUNT; i++) {
- data->fan_min[i] = adt7470_read_word_data(client,
- ADT7470_REG_FAN_MIN(i));
- data->fan_max[i] = adt7470_read_word_data(client,
- ADT7470_REG_FAN_MAX(i));
+ err = adt7470_read_word_data(data, ADT7470_REG_FAN_MIN(i), &val);
+ if (err < 0)
+ return err;
+ data->fan_min[i] = val;
+ err = adt7470_read_word_data(data, ADT7470_REG_FAN_MAX(i), &val);
+ if (err < 0)
+ return err;
+ data->fan_max[i] = val;
}
for (i = 0; i < ADT7470_PWM_COUNT; i++) {
- data->pwm_max[i] = i2c_smbus_read_byte_data(client,
- ADT7470_REG_PWM_MAX(i));
- data->pwm_min[i] = i2c_smbus_read_byte_data(client,
- ADT7470_REG_PWM_MIN(i));
- data->pwm_tmin[i] = i2c_smbus_read_byte_data(client,
- ADT7470_REG_PWM_TMIN(i));
+ err = regmap_read(data->regmap, ADT7470_REG_PWM_MAX(i), &val);
+ if (err < 0)
+ return err;
+ data->pwm_max[i] = val;
+ err = regmap_read(data->regmap, ADT7470_REG_PWM_MIN(i), &val);
+ if (err < 0)
+ return err;
+ data->pwm_min[i] = val;
+ err = regmap_read(data->regmap, ADT7470_REG_PWM_TMIN(i), &val);
+ if (err < 0)
+ return err;
+ data->pwm_tmin[i] = (s8)val;
}
return 0;
@@ -472,93 +533,63 @@ static ssize_t num_temp_sensors_store(struct device *dev,
return count;
}
-static ssize_t temp_min_show(struct device *dev,
- struct device_attribute *devattr, char *buf)
+static int adt7470_temp_read(struct device *dev, u32 attr, int channel, long *val)
{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct adt7470_data *data = adt7470_update_device(dev);
if (IS_ERR(data))
return PTR_ERR(data);
- return sprintf(buf, "%d\n", 1000 * data->temp_min[attr->index]);
-}
-
-static ssize_t temp_min_store(struct device *dev,
- struct device_attribute *devattr,
- const char *buf, size_t count)
-{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct adt7470_data *data = dev_get_drvdata(dev);
- struct i2c_client *client = data->client;
- long temp;
-
- if (kstrtol(buf, 10, &temp))
- return -EINVAL;
-
- temp = clamp_val(temp, -128000, 127000);
- temp = DIV_ROUND_CLOSEST(temp, 1000);
-
- mutex_lock(&data->lock);
- data->temp_min[attr->index] = temp;
- i2c_smbus_write_byte_data(client, ADT7470_TEMP_MIN_REG(attr->index),
- temp);
- mutex_unlock(&data->lock);
-
- return count;
-}
-
-static ssize_t temp_max_show(struct device *dev,
- struct device_attribute *devattr, char *buf)
-{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct adt7470_data *data = adt7470_update_device(dev);
-
- if (IS_ERR(data))
- return PTR_ERR(data);
+ switch (attr) {
+ case hwmon_temp_input:
+ *val = 1000 * data->temp[channel];
+ break;
+ case hwmon_temp_min:
+ *val = 1000 * data->temp_min[channel];
+ break;
+ case hwmon_temp_max:
+ *val = 1000 * data->temp_max[channel];
+ break;
+ case hwmon_temp_alarm:
+ *val = !!(data->alarm & channel);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
- return sprintf(buf, "%d\n", 1000 * data->temp_max[attr->index]);
+ return 0;
}
-static ssize_t temp_max_store(struct device *dev,
- struct device_attribute *devattr,
- const char *buf, size_t count)
+static int adt7470_temp_write(struct device *dev, u32 attr, int channel, long val)
{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct adt7470_data *data = dev_get_drvdata(dev);
- struct i2c_client *client = data->client;
- long temp;
-
- if (kstrtol(buf, 10, &temp))
- return -EINVAL;
-
- temp = clamp_val(temp, -128000, 127000);
- temp = DIV_ROUND_CLOSEST(temp, 1000);
-
- mutex_lock(&data->lock);
- data->temp_max[attr->index] = temp;
- i2c_smbus_write_byte_data(client, ADT7470_TEMP_MAX_REG(attr->index),
- temp);
- mutex_unlock(&data->lock);
-
- return count;
-}
+ int err;
-static ssize_t temp_show(struct device *dev, struct device_attribute *devattr,
- char *buf)
-{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct adt7470_data *data = adt7470_update_device(dev);
+ val = clamp_val(val, -128000, 127000);
+ val = DIV_ROUND_CLOSEST(val, 1000);
- if (IS_ERR(data))
- return PTR_ERR(data);
+ switch (attr) {
+ case hwmon_temp_min:
+ mutex_lock(&data->lock);
+ data->temp_min[channel] = val;
+ err = regmap_write(data->regmap, ADT7470_TEMP_MIN_REG(channel), val);
+ mutex_unlock(&data->lock);
+ break;
+ case hwmon_temp_max:
+ mutex_lock(&data->lock);
+ data->temp_max[channel] = val;
+ err = regmap_write(data->regmap, ADT7470_TEMP_MAX_REG(channel), val);
+ mutex_unlock(&data->lock);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
- return sprintf(buf, "%d\n", 1000 * data->temp[attr->index]);
+ return err;
}
static ssize_t alarm_mask_show(struct device *dev,
- struct device_attribute *devattr,
- char *buf)
+ struct device_attribute *devattr, char *buf)
{
struct adt7470_data *data = adt7470_update_device(dev);
@@ -574,6 +605,7 @@ static ssize_t alarm_mask_store(struct device *dev,
{
struct adt7470_data *data = dev_get_drvdata(dev);
long mask;
+ int err;
if (kstrtoul(buf, 0, &mask))
return -EINVAL;
@@ -583,104 +615,74 @@ static ssize_t alarm_mask_store(struct device *dev,
mutex_lock(&data->lock);
data->alarms_mask = mask;
- adt7470_write_word_data(data->client, ADT7470_REG_ALARM1_MASK, mask);
+ err = adt7470_write_word_data(data, ADT7470_REG_ALARM1_MASK, mask);
mutex_unlock(&data->lock);
- return count;
+ return err < 0 ? err : count;
}
-static ssize_t fan_max_show(struct device *dev,
- struct device_attribute *devattr, char *buf)
+static int adt7470_fan_read(struct device *dev, u32 attr, int channel, long *val)
{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct adt7470_data *data = adt7470_update_device(dev);
if (IS_ERR(data))
return PTR_ERR(data);
- if (FAN_DATA_VALID(data->fan_max[attr->index]))
- return sprintf(buf, "%d\n",
- FAN_PERIOD_TO_RPM(data->fan_max[attr->index]));
- else
- return sprintf(buf, "0\n");
-}
-
-static ssize_t fan_max_store(struct device *dev,
- struct device_attribute *devattr,
- const char *buf, size_t count)
-{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct adt7470_data *data = dev_get_drvdata(dev);
- struct i2c_client *client = data->client;
- long temp;
-
- if (kstrtol(buf, 10, &temp) || !temp)
- return -EINVAL;
-
- temp = FAN_RPM_TO_PERIOD(temp);
- temp = clamp_val(temp, 1, 65534);
-
- mutex_lock(&data->lock);
- data->fan_max[attr->index] = temp;
- adt7470_write_word_data(client, ADT7470_REG_FAN_MAX(attr->index), temp);
- mutex_unlock(&data->lock);
-
- return count;
-}
-
-static ssize_t fan_min_show(struct device *dev,
- struct device_attribute *devattr, char *buf)
-{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct adt7470_data *data = adt7470_update_device(dev);
-
- if (IS_ERR(data))
- return PTR_ERR(data);
+ switch (attr) {
+ case hwmon_fan_input:
+ if (FAN_DATA_VALID(data->fan[channel]))
+ *val = FAN_PERIOD_TO_RPM(data->fan[channel]);
+ else
+ *val = 0;
+ break;
+ case hwmon_fan_min:
+ if (FAN_DATA_VALID(data->fan_min[channel]))
+ *val = FAN_PERIOD_TO_RPM(data->fan_min[channel]);
+ else
+ *val = 0;
+ break;
+ case hwmon_fan_max:
+ if (FAN_DATA_VALID(data->fan_max[channel]))
+ *val = FAN_PERIOD_TO_RPM(data->fan_max[channel]);
+ else
+ *val = 0;
+ break;
+ case hwmon_fan_alarm:
+ *val = !!(data->alarm & (1 << (12 + channel)));
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
- if (FAN_DATA_VALID(data->fan_min[attr->index]))
- return sprintf(buf, "%d\n",
- FAN_PERIOD_TO_RPM(data->fan_min[attr->index]));
- else
- return sprintf(buf, "0\n");
+ return 0;
}
-static ssize_t fan_min_store(struct device *dev,
- struct device_attribute *devattr,
- const char *buf, size_t count)
+static int adt7470_fan_write(struct device *dev, u32 attr, int channel, long val)
{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct adt7470_data *data = dev_get_drvdata(dev);
- struct i2c_client *client = data->client;
- long temp;
-
- if (kstrtol(buf, 10, &temp) || !temp)
- return -EINVAL;
-
- temp = FAN_RPM_TO_PERIOD(temp);
- temp = clamp_val(temp, 1, 65534);
-
- mutex_lock(&data->lock);
- data->fan_min[attr->index] = temp;
- adt7470_write_word_data(client, ADT7470_REG_FAN_MIN(attr->index), temp);
- mutex_unlock(&data->lock);
-
- return count;
-}
+ int err;
-static ssize_t fan_show(struct device *dev, struct device_attribute *devattr,
- char *buf)
-{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct adt7470_data *data = adt7470_update_device(dev);
+ val = FAN_RPM_TO_PERIOD(val);
+ val = clamp_val(val, 1, 65534);
- if (IS_ERR(data))
- return PTR_ERR(data);
+ switch (attr) {
+ case hwmon_fan_min:
+ mutex_lock(&data->lock);
+ data->fan_min[channel] = val;
+ err = adt7470_write_word_data(data, ADT7470_REG_FAN_MIN(channel), val);
+ mutex_unlock(&data->lock);
+ break;
+ case hwmon_fan_max:
+ mutex_lock(&data->lock);
+ data->fan_max[channel] = val;
+ err = adt7470_write_word_data(data, ADT7470_REG_FAN_MAX(channel), val);
+ mutex_unlock(&data->lock);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
- if (FAN_DATA_VALID(data->fan[attr->index]))
- return sprintf(buf, "%d\n",
- FAN_PERIOD_TO_RPM(data->fan[attr->index]));
- else
- return sprintf(buf, "0\n");
+ return err;
}
static ssize_t force_pwm_max_show(struct device *dev,
@@ -699,57 +701,20 @@ static ssize_t force_pwm_max_store(struct device *dev,
const char *buf, size_t count)
{
struct adt7470_data *data = dev_get_drvdata(dev);
- struct i2c_client *client = data->client;
long temp;
- u8 reg;
+ int err;
if (kstrtol(buf, 10, &temp))
return -EINVAL;
mutex_lock(&data->lock);
data->force_pwm_max = temp;
- reg = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG);
- if (temp)
- reg |= ADT7470_FSPD_MASK;
- else
- reg &= ~ADT7470_FSPD_MASK;
- i2c_smbus_write_byte_data(client, ADT7470_REG_CFG, reg);
+ err = regmap_update_bits(data->regmap, ADT7470_REG_CFG,
+ ADT7470_FSPD_MASK,
+ temp ? ADT7470_FSPD_MASK : 0);
mutex_unlock(&data->lock);
- return count;
-}
-
-static ssize_t pwm_show(struct device *dev, struct device_attribute *devattr,
- char *buf)
-{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct adt7470_data *data = adt7470_update_device(dev);
-
- if (IS_ERR(data))
- return PTR_ERR(data);
-
- return sprintf(buf, "%d\n", data->pwm[attr->index]);
-}
-
-static ssize_t pwm_store(struct device *dev, struct device_attribute *devattr,
- const char *buf, size_t count)
-{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct adt7470_data *data = dev_get_drvdata(dev);
- struct i2c_client *client = data->client;
- long temp;
-
- if (kstrtol(buf, 10, &temp))
- return -EINVAL;
-
- temp = clamp_val(temp, 0, 255);
-
- mutex_lock(&data->lock);
- data->pwm[attr->index] = temp;
- i2c_smbus_write_byte_data(client, ADT7470_REG_PWM(attr->index), temp);
- mutex_unlock(&data->lock);
-
- return count;
+ return err < 0 ? err : count;
}
/* These are the valid PWM frequencies to the nearest Hz */
@@ -757,17 +722,20 @@ static const int adt7470_freq_map[] = {
11, 15, 22, 29, 35, 44, 59, 88, 1400, 22500
};
-static ssize_t pwm1_freq_show(struct device *dev,
- struct device_attribute *devattr, char *buf)
+static int pwm1_freq_get(struct device *dev)
{
- struct adt7470_data *data = adt7470_update_device(dev);
- unsigned char cfg_reg_1;
- unsigned char cfg_reg_2;
+ struct adt7470_data *data = dev_get_drvdata(dev);
+ unsigned int cfg_reg_1, cfg_reg_2;
int index;
+ int err;
mutex_lock(&data->lock);
- cfg_reg_1 = i2c_smbus_read_byte_data(data->client, ADT7470_REG_CFG);
- cfg_reg_2 = i2c_smbus_read_byte_data(data->client, ADT7470_REG_CFG_2);
+ err = regmap_read(data->regmap, ADT7470_REG_CFG, &cfg_reg_1);
+ if (err < 0)
+ goto out;
+ err = regmap_read(data->regmap, ADT7470_REG_CFG_2, &cfg_reg_2);
+ if (err < 0)
+ goto out;
mutex_unlock(&data->lock);
index = (cfg_reg_2 & ADT7470_FREQ_MASK) >> ADT7470_FREQ_SHIFT;
@@ -776,22 +744,43 @@ static ssize_t pwm1_freq_show(struct device *dev,
if (index >= ARRAY_SIZE(adt7470_freq_map))
index = ARRAY_SIZE(adt7470_freq_map) - 1;
- return scnprintf(buf, PAGE_SIZE, "%d\n", adt7470_freq_map[index]);
+ return adt7470_freq_map[index];
+
+out:
+ mutex_unlock(&data->lock);
+ return err;
+}
+
+static int adt7470_pwm_read(struct device *dev, u32 attr, int channel, long *val)
+{
+ struct adt7470_data *data = adt7470_update_device(dev);
+
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ switch (attr) {
+ case hwmon_pwm_input:
+ *val = data->pwm[channel];
+ break;
+ case hwmon_pwm_enable:
+ *val = 1 + data->pwm_automatic[channel];
+ break;
+ case hwmon_pwm_freq:
+ *val = pwm1_freq_get(dev);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
}
-static ssize_t pwm1_freq_store(struct device *dev,
- struct device_attribute *devattr,
- const char *buf, size_t count)
+static int pwm1_freq_set(struct device *dev, long freq)
{
struct adt7470_data *data = dev_get_drvdata(dev);
- struct i2c_client *client = data->client;
- long freq;
+ unsigned int low_freq = ADT7470_CFG_LF;
int index;
- int low_freq = ADT7470_CFG_LF;
- unsigned char val;
-
- if (kstrtol(buf, 10, &freq))
- return -EINVAL;
+ int err;
/* Round the user value given to the closest available frequency */
index = find_closest(freq, adt7470_freq_map,
@@ -804,16 +793,61 @@ static ssize_t pwm1_freq_store(struct device *dev,
mutex_lock(&data->lock);
/* Configuration Register 1 */
- val = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG);
- i2c_smbus_write_byte_data(client, ADT7470_REG_CFG,
- (val & ~ADT7470_CFG_LF) | low_freq);
+ err = regmap_update_bits(data->regmap, ADT7470_REG_CFG,
+ ADT7470_CFG_LF, low_freq);
+ if (err < 0)
+ goto out;
+
/* Configuration Register 2 */
- val = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG_2);
- i2c_smbus_write_byte_data(client, ADT7470_REG_CFG_2,
- (val & ~ADT7470_FREQ_MASK) | (index << ADT7470_FREQ_SHIFT));
+ err = regmap_update_bits(data->regmap, ADT7470_REG_CFG_2,
+ ADT7470_FREQ_MASK,
+ index << ADT7470_FREQ_SHIFT);
+out:
mutex_unlock(&data->lock);
- return count;
+ return err;
+}
+
+static int adt7470_pwm_write(struct device *dev, u32 attr, int channel, long val)
+{
+ struct adt7470_data *data = dev_get_drvdata(dev);
+ unsigned int pwm_auto_reg_mask;
+ int err;
+
+ switch (attr) {
+ case hwmon_pwm_input:
+ val = clamp_val(val, 0, 255);
+ mutex_lock(&data->lock);
+ data->pwm[channel] = val;
+ err = regmap_write(data->regmap, ADT7470_REG_PWM(channel),
+ data->pwm[channel]);
+ mutex_unlock(&data->lock);
+ break;
+ case hwmon_pwm_enable:
+ if (channel % 2)
+ pwm_auto_reg_mask = ADT7470_PWM2_AUTO_MASK;
+ else
+ pwm_auto_reg_mask = ADT7470_PWM1_AUTO_MASK;
+
+ if (val != 2 && val != 1)
+ return -EINVAL;
+ val--;
+
+ mutex_lock(&data->lock);
+ data->pwm_automatic[channel] = val;
+ err = regmap_update_bits(data->regmap, ADT7470_REG_PWM_CFG(channel),
+ pwm_auto_reg_mask,
+ val ? pwm_auto_reg_mask : 0);
+ mutex_unlock(&data->lock);
+ break;
+ case hwmon_pwm_freq:
+ err = pwm1_freq_set(dev, val);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return err;
}
static ssize_t pwm_max_show(struct device *dev,
@@ -834,8 +868,8 @@ static ssize_t pwm_max_store(struct device *dev,
{
struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct adt7470_data *data = dev_get_drvdata(dev);
- struct i2c_client *client = data->client;
long temp;
+ int err;
if (kstrtol(buf, 10, &temp))
return -EINVAL;
@@ -844,11 +878,11 @@ static ssize_t pwm_max_store(struct device *dev,
mutex_lock(&data->lock);
data->pwm_max[attr->index] = temp;
- i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_MAX(attr->index),
- temp);
+ err = regmap_write(data->regmap, ADT7470_REG_PWM_MAX(attr->index),
+ temp);
mutex_unlock(&data->lock);
- return count;
+ return err < 0 ? err : count;
}
static ssize_t pwm_min_show(struct device *dev,
@@ -869,8 +903,8 @@ static ssize_t pwm_min_store(struct device *dev,
{
struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct adt7470_data *data = dev_get_drvdata(dev);
- struct i2c_client *client = data->client;
long temp;
+ int err;
if (kstrtol(buf, 10, &temp))
return -EINVAL;
@@ -879,11 +913,11 @@ static ssize_t pwm_min_store(struct device *dev,
mutex_lock(&data->lock);
data->pwm_min[attr->index] = temp;
- i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_MIN(attr->index),
- temp);
+ err = regmap_write(data->regmap, ADT7470_REG_PWM_MIN(attr->index),
+ temp);
mutex_unlock(&data->lock);
- return count;
+ return err < 0 ? err : count;
}
static ssize_t pwm_tmax_show(struct device *dev,
@@ -917,8 +951,8 @@ static ssize_t pwm_tmin_store(struct device *dev,
{
struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct adt7470_data *data = dev_get_drvdata(dev);
- struct i2c_client *client = data->client;
long temp;
+ int err;
if (kstrtol(buf, 10, &temp))
return -EINVAL;
@@ -928,60 +962,11 @@ static ssize_t pwm_tmin_store(struct device *dev,
mutex_lock(&data->lock);
data->pwm_tmin[attr->index] = temp;
- i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_TMIN(attr->index),
- temp);
- mutex_unlock(&data->lock);
-
- return count;
-}
-
-static ssize_t pwm_auto_show(struct device *dev,
- struct device_attribute *devattr, char *buf)
-{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct adt7470_data *data = adt7470_update_device(dev);
-
- if (IS_ERR(data))
- return PTR_ERR(data);
-
- return sprintf(buf, "%d\n", 1 + data->pwm_automatic[attr->index]);
-}
-
-static ssize_t pwm_auto_store(struct device *dev,
- struct device_attribute *devattr,
- const char *buf, size_t count)
-{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct adt7470_data *data = dev_get_drvdata(dev);
- struct i2c_client *client = data->client;
- int pwm_auto_reg = ADT7470_REG_PWM_CFG(attr->index);
- int pwm_auto_reg_mask;
- long temp;
- u8 reg;
-
- if (kstrtol(buf, 10, &temp))
- return -EINVAL;
-
- if (attr->index % 2)
- pwm_auto_reg_mask = ADT7470_PWM2_AUTO_MASK;
- else
- pwm_auto_reg_mask = ADT7470_PWM1_AUTO_MASK;
-
- if (temp != 2 && temp != 1)
- return -EINVAL;
- temp--;
-
- mutex_lock(&data->lock);
- data->pwm_automatic[attr->index] = temp;
- reg = i2c_smbus_read_byte_data(client, pwm_auto_reg);
- if (temp)
- reg |= pwm_auto_reg_mask;
- else
- reg &= ~pwm_auto_reg_mask;
- i2c_smbus_write_byte_data(client, pwm_auto_reg, reg);
+ err = regmap_write(data->regmap, ADT7470_REG_PWM_TMIN(attr->index),
+ temp);
mutex_unlock(&data->lock);
- return count;
+ return err < 0 ? err : count;
}
static ssize_t pwm_auto_temp_show(struct device *dev,
@@ -1016,10 +1001,10 @@ static ssize_t pwm_auto_temp_store(struct device *dev,
{
struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct adt7470_data *data = dev_get_drvdata(dev);
- struct i2c_client *client = data->client;
int pwm_auto_reg = ADT7470_REG_PWM_AUTO_TEMP(attr->index);
+ unsigned int mask, val;
long temp;
- u8 reg;
+ int err;
if (kstrtol(buf, 10, &temp))
return -EINVAL;
@@ -1030,111 +1015,27 @@ static ssize_t pwm_auto_temp_store(struct device *dev,
mutex_lock(&data->lock);
data->pwm_automatic[attr->index] = temp;
- reg = i2c_smbus_read_byte_data(client, pwm_auto_reg);
if (!(attr->index % 2)) {
- reg &= 0xF;
- reg |= (temp << 4) & 0xF0;
+ mask = 0xF0;
+ val = (temp << 4) & 0xF0;
} else {
- reg &= 0xF0;
- reg |= temp & 0xF;
+ mask = 0x0F;
+ val = temp & 0x0F;
}
- i2c_smbus_write_byte_data(client, pwm_auto_reg, reg);
+ err = regmap_update_bits(data->regmap, pwm_auto_reg, mask, val);
mutex_unlock(&data->lock);
- return count;
-}
-
-static ssize_t alarm_show(struct device *dev,
- struct device_attribute *devattr, char *buf)
-{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct adt7470_data *data = adt7470_update_device(dev);
-
- if (data->alarm & attr->index)
- return sprintf(buf, "1\n");
- else
- return sprintf(buf, "0\n");
+ return err < 0 ? err : count;
}
static DEVICE_ATTR_RW(alarm_mask);
static DEVICE_ATTR_RW(num_temp_sensors);
static DEVICE_ATTR_RW(auto_update_interval);
-static SENSOR_DEVICE_ATTR_RW(temp1_max, temp_max, 0);
-static SENSOR_DEVICE_ATTR_RW(temp2_max, temp_max, 1);
-static SENSOR_DEVICE_ATTR_RW(temp3_max, temp_max, 2);
-static SENSOR_DEVICE_ATTR_RW(temp4_max, temp_max, 3);
-static SENSOR_DEVICE_ATTR_RW(temp5_max, temp_max, 4);
-static SENSOR_DEVICE_ATTR_RW(temp6_max, temp_max, 5);
-static SENSOR_DEVICE_ATTR_RW(temp7_max, temp_max, 6);
-static SENSOR_DEVICE_ATTR_RW(temp8_max, temp_max, 7);
-static SENSOR_DEVICE_ATTR_RW(temp9_max, temp_max, 8);
-static SENSOR_DEVICE_ATTR_RW(temp10_max, temp_max, 9);
-
-static SENSOR_DEVICE_ATTR_RW(temp1_min, temp_min, 0);
-static SENSOR_DEVICE_ATTR_RW(temp2_min, temp_min, 1);
-static SENSOR_DEVICE_ATTR_RW(temp3_min, temp_min, 2);
-static SENSOR_DEVICE_ATTR_RW(temp4_min, temp_min, 3);
-static SENSOR_DEVICE_ATTR_RW(temp5_min, temp_min, 4);
-static SENSOR_DEVICE_ATTR_RW(temp6_min, temp_min, 5);
-static SENSOR_DEVICE_ATTR_RW(temp7_min, temp_min, 6);
-static SENSOR_DEVICE_ATTR_RW(temp8_min, temp_min, 7);
-static SENSOR_DEVICE_ATTR_RW(temp9_min, temp_min, 8);
-static SENSOR_DEVICE_ATTR_RW(temp10_min, temp_min, 9);
-
-static SENSOR_DEVICE_ATTR_RO(temp1_input, temp, 0);
-static SENSOR_DEVICE_ATTR_RO(temp2_input, temp, 1);
-static SENSOR_DEVICE_ATTR_RO(temp3_input, temp, 2);
-static SENSOR_DEVICE_ATTR_RO(temp4_input, temp, 3);
-static SENSOR_DEVICE_ATTR_RO(temp5_input, temp, 4);
-static SENSOR_DEVICE_ATTR_RO(temp6_input, temp, 5);
-static SENSOR_DEVICE_ATTR_RO(temp7_input, temp, 6);
-static SENSOR_DEVICE_ATTR_RO(temp8_input, temp, 7);
-static SENSOR_DEVICE_ATTR_RO(temp9_input, temp, 8);
-static SENSOR_DEVICE_ATTR_RO(temp10_input, temp, 9);
-
-static SENSOR_DEVICE_ATTR_RO(temp1_alarm, alarm, ADT7470_R1T_ALARM);
-static SENSOR_DEVICE_ATTR_RO(temp2_alarm, alarm, ADT7470_R2T_ALARM);
-static SENSOR_DEVICE_ATTR_RO(temp3_alarm, alarm, ADT7470_R3T_ALARM);
-static SENSOR_DEVICE_ATTR_RO(temp4_alarm, alarm, ADT7470_R4T_ALARM);
-static SENSOR_DEVICE_ATTR_RO(temp5_alarm, alarm, ADT7470_R5T_ALARM);
-static SENSOR_DEVICE_ATTR_RO(temp6_alarm, alarm, ADT7470_R6T_ALARM);
-static SENSOR_DEVICE_ATTR_RO(temp7_alarm, alarm, ADT7470_R7T_ALARM);
-static SENSOR_DEVICE_ATTR_RO(temp8_alarm, alarm, ALARM2(ADT7470_R8T_ALARM));
-static SENSOR_DEVICE_ATTR_RO(temp9_alarm, alarm, ALARM2(ADT7470_R9T_ALARM));
-static SENSOR_DEVICE_ATTR_RO(temp10_alarm, alarm, ALARM2(ADT7470_R10T_ALARM));
-
-static SENSOR_DEVICE_ATTR_RW(fan1_max, fan_max, 0);
-static SENSOR_DEVICE_ATTR_RW(fan2_max, fan_max, 1);
-static SENSOR_DEVICE_ATTR_RW(fan3_max, fan_max, 2);
-static SENSOR_DEVICE_ATTR_RW(fan4_max, fan_max, 3);
-
-static SENSOR_DEVICE_ATTR_RW(fan1_min, fan_min, 0);
-static SENSOR_DEVICE_ATTR_RW(fan2_min, fan_min, 1);
-static SENSOR_DEVICE_ATTR_RW(fan3_min, fan_min, 2);
-static SENSOR_DEVICE_ATTR_RW(fan4_min, fan_min, 3);
-
-static SENSOR_DEVICE_ATTR_RO(fan1_input, fan, 0);
-static SENSOR_DEVICE_ATTR_RO(fan2_input, fan, 1);
-static SENSOR_DEVICE_ATTR_RO(fan3_input, fan, 2);
-static SENSOR_DEVICE_ATTR_RO(fan4_input, fan, 3);
-
-static SENSOR_DEVICE_ATTR_RO(fan1_alarm, alarm, ALARM2(ADT7470_FAN1_ALARM));
-static SENSOR_DEVICE_ATTR_RO(fan2_alarm, alarm, ALARM2(ADT7470_FAN2_ALARM));
-static SENSOR_DEVICE_ATTR_RO(fan3_alarm, alarm, ALARM2(ADT7470_FAN3_ALARM));
-static SENSOR_DEVICE_ATTR_RO(fan4_alarm, alarm, ALARM2(ADT7470_FAN4_ALARM));
-
static SENSOR_DEVICE_ATTR_RW(force_pwm_max, force_pwm_max, 0);
-static SENSOR_DEVICE_ATTR_RW(pwm1, pwm, 0);
-static SENSOR_DEVICE_ATTR_RW(pwm2, pwm, 1);
-static SENSOR_DEVICE_ATTR_RW(pwm3, pwm, 2);
-static SENSOR_DEVICE_ATTR_RW(pwm4, pwm, 3);
-
-static DEVICE_ATTR_RW(pwm1_freq);
-
static SENSOR_DEVICE_ATTR_RW(pwm1_auto_point1_pwm, pwm_min, 0);
static SENSOR_DEVICE_ATTR_RW(pwm2_auto_point1_pwm, pwm_min, 1);
static SENSOR_DEVICE_ATTR_RW(pwm3_auto_point1_pwm, pwm_min, 2);
@@ -1155,11 +1056,6 @@ static SENSOR_DEVICE_ATTR_RO(pwm2_auto_point2_temp, pwm_tmax, 1);
static SENSOR_DEVICE_ATTR_RO(pwm3_auto_point2_temp, pwm_tmax, 2);
static SENSOR_DEVICE_ATTR_RO(pwm4_auto_point2_temp, pwm_tmax, 3);
-static SENSOR_DEVICE_ATTR_RW(pwm1_enable, pwm_auto, 0);
-static SENSOR_DEVICE_ATTR_RW(pwm2_enable, pwm_auto, 1);
-static SENSOR_DEVICE_ATTR_RW(pwm3_enable, pwm_auto, 2);
-static SENSOR_DEVICE_ATTR_RW(pwm4_enable, pwm_auto, 3);
-
static SENSOR_DEVICE_ATTR_RW(pwm1_auto_channels_temp, pwm_auto_temp, 0);
static SENSOR_DEVICE_ATTR_RW(pwm2_auto_channels_temp, pwm_auto_temp, 1);
static SENSOR_DEVICE_ATTR_RW(pwm3_auto_channels_temp, pwm_auto_temp, 2);
@@ -1169,68 +1065,7 @@ static struct attribute *adt7470_attrs[] = {
&dev_attr_alarm_mask.attr,
&dev_attr_num_temp_sensors.attr,
&dev_attr_auto_update_interval.attr,
- &sensor_dev_attr_temp1_max.dev_attr.attr,
- &sensor_dev_attr_temp2_max.dev_attr.attr,
- &sensor_dev_attr_temp3_max.dev_attr.attr,
- &sensor_dev_attr_temp4_max.dev_attr.attr,
- &sensor_dev_attr_temp5_max.dev_attr.attr,
- &sensor_dev_attr_temp6_max.dev_attr.attr,
- &sensor_dev_attr_temp7_max.dev_attr.attr,
- &sensor_dev_attr_temp8_max.dev_attr.attr,
- &sensor_dev_attr_temp9_max.dev_attr.attr,
- &sensor_dev_attr_temp10_max.dev_attr.attr,
- &sensor_dev_attr_temp1_min.dev_attr.attr,
- &sensor_dev_attr_temp2_min.dev_attr.attr,
- &sensor_dev_attr_temp3_min.dev_attr.attr,
- &sensor_dev_attr_temp4_min.dev_attr.attr,
- &sensor_dev_attr_temp5_min.dev_attr.attr,
- &sensor_dev_attr_temp6_min.dev_attr.attr,
- &sensor_dev_attr_temp7_min.dev_attr.attr,
- &sensor_dev_attr_temp8_min.dev_attr.attr,
- &sensor_dev_attr_temp9_min.dev_attr.attr,
- &sensor_dev_attr_temp10_min.dev_attr.attr,
- &sensor_dev_attr_temp1_input.dev_attr.attr,
- &sensor_dev_attr_temp2_input.dev_attr.attr,
- &sensor_dev_attr_temp3_input.dev_attr.attr,
- &sensor_dev_attr_temp4_input.dev_attr.attr,
- &sensor_dev_attr_temp5_input.dev_attr.attr,
- &sensor_dev_attr_temp6_input.dev_attr.attr,
- &sensor_dev_attr_temp7_input.dev_attr.attr,
- &sensor_dev_attr_temp8_input.dev_attr.attr,
- &sensor_dev_attr_temp9_input.dev_attr.attr,
- &sensor_dev_attr_temp10_input.dev_attr.attr,
- &sensor_dev_attr_temp1_alarm.dev_attr.attr,
- &sensor_dev_attr_temp2_alarm.dev_attr.attr,
- &sensor_dev_attr_temp3_alarm.dev_attr.attr,
- &sensor_dev_attr_temp4_alarm.dev_attr.attr,
- &sensor_dev_attr_temp5_alarm.dev_attr.attr,
- &sensor_dev_attr_temp6_alarm.dev_attr.attr,
- &sensor_dev_attr_temp7_alarm.dev_attr.attr,
- &sensor_dev_attr_temp8_alarm.dev_attr.attr,
- &sensor_dev_attr_temp9_alarm.dev_attr.attr,
- &sensor_dev_attr_temp10_alarm.dev_attr.attr,
- &sensor_dev_attr_fan1_max.dev_attr.attr,
- &sensor_dev_attr_fan2_max.dev_attr.attr,
- &sensor_dev_attr_fan3_max.dev_attr.attr,
- &sensor_dev_attr_fan4_max.dev_attr.attr,
- &sensor_dev_attr_fan1_min.dev_attr.attr,
- &sensor_dev_attr_fan2_min.dev_attr.attr,
- &sensor_dev_attr_fan3_min.dev_attr.attr,
- &sensor_dev_attr_fan4_min.dev_attr.attr,
- &sensor_dev_attr_fan1_input.dev_attr.attr,
- &sensor_dev_attr_fan2_input.dev_attr.attr,
- &sensor_dev_attr_fan3_input.dev_attr.attr,
- &sensor_dev_attr_fan4_input.dev_attr.attr,
- &sensor_dev_attr_fan1_alarm.dev_attr.attr,
- &sensor_dev_attr_fan2_alarm.dev_attr.attr,
- &sensor_dev_attr_fan3_alarm.dev_attr.attr,
- &sensor_dev_attr_fan4_alarm.dev_attr.attr,
&sensor_dev_attr_force_pwm_max.dev_attr.attr,
- &sensor_dev_attr_pwm1.dev_attr.attr,
- &dev_attr_pwm1_freq.attr,
- &sensor_dev_attr_pwm2.dev_attr.attr,
- &sensor_dev_attr_pwm3.dev_attr.attr,
- &sensor_dev_attr_pwm4.dev_attr.attr,
&sensor_dev_attr_pwm1_auto_point1_pwm.dev_attr.attr,
&sensor_dev_attr_pwm2_auto_point1_pwm.dev_attr.attr,
&sensor_dev_attr_pwm3_auto_point1_pwm.dev_attr.attr,
@@ -1247,10 +1082,6 @@ static struct attribute *adt7470_attrs[] = {
&sensor_dev_attr_pwm2_auto_point2_temp.dev_attr.attr,
&sensor_dev_attr_pwm3_auto_point2_temp.dev_attr.attr,
&sensor_dev_attr_pwm4_auto_point2_temp.dev_attr.attr,
- &sensor_dev_attr_pwm1_enable.dev_attr.attr,
- &sensor_dev_attr_pwm2_enable.dev_attr.attr,
- &sensor_dev_attr_pwm3_enable.dev_attr.attr,
- &sensor_dev_attr_pwm4_enable.dev_attr.attr,
&sensor_dev_attr_pwm1_auto_channels_temp.dev_attr.attr,
&sensor_dev_attr_pwm2_auto_channels_temp.dev_attr.attr,
&sensor_dev_attr_pwm3_auto_channels_temp.dev_attr.attr,
@@ -1260,6 +1091,129 @@ static struct attribute *adt7470_attrs[] = {
ATTRIBUTE_GROUPS(adt7470);
+static int adt7470_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+ int channel, long *val)
+{
+ switch (type) {
+ case hwmon_temp:
+ return adt7470_temp_read(dev, attr, channel, val);
+ case hwmon_fan:
+ return adt7470_fan_read(dev, attr, channel, val);
+ case hwmon_pwm:
+ return adt7470_pwm_read(dev, attr, channel, val);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int adt7470_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+ int channel, long val)
+{
+ switch (type) {
+ case hwmon_temp:
+ return adt7470_temp_write(dev, attr, channel, val);
+ case hwmon_fan:
+ return adt7470_fan_write(dev, attr, channel, val);
+ case hwmon_pwm:
+ return adt7470_pwm_write(dev, attr, channel, val);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static umode_t adt7470_is_visible(const void *_data, enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ umode_t mode = 0;
+
+ switch (type) {
+ case hwmon_temp:
+ switch (attr) {
+ case hwmon_temp:
+ case hwmon_temp_alarm:
+ mode = 0444;
+ break;
+ case hwmon_temp_min:
+ case hwmon_temp_max:
+ mode = 0644;
+ break;
+ default:
+ break;
+ }
+ break;
+ case hwmon_fan:
+ switch (attr) {
+ case hwmon_fan_input:
+ case hwmon_fan_alarm:
+ mode = 0444;
+ break;
+ case hwmon_fan_min:
+ case hwmon_fan_max:
+ mode = 0644;
+ break;
+ default:
+ break;
+ }
+ break;
+ case hwmon_pwm:
+ switch (attr) {
+ case hwmon_pwm_input:
+ case hwmon_pwm_enable:
+ mode = 0644;
+ break;
+ case hwmon_pwm_freq:
+ if (channel == 0)
+ mode = 0644;
+ else
+ mode = 0;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return mode;
+}
+
+static const struct hwmon_ops adt7470_hwmon_ops = {
+ .is_visible = adt7470_is_visible,
+ .read = adt7470_read,
+ .write = adt7470_write,
+};
+
+static const struct hwmon_channel_info *adt7470_info[] = {
+ HWMON_CHANNEL_INFO(temp,
+ HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | HWMON_T_ALARM,
+ HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | HWMON_T_ALARM,
+ HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | HWMON_T_ALARM,
+ HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | HWMON_T_ALARM,
+ HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | HWMON_T_ALARM,
+ HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | HWMON_T_ALARM,
+ HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | HWMON_T_ALARM,
+ HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | HWMON_T_ALARM,
+ HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | HWMON_T_ALARM,
+ HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | HWMON_T_ALARM),
+ HWMON_CHANNEL_INFO(fan,
+ HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_MAX | HWMON_F_DIV | HWMON_F_ALARM,
+ HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_MAX | HWMON_F_DIV | HWMON_F_ALARM,
+ HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_MAX | HWMON_F_DIV | HWMON_F_ALARM,
+ HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_MAX | HWMON_F_DIV | HWMON_F_ALARM),
+ HWMON_CHANNEL_INFO(pwm,
+ HWMON_PWM_INPUT | HWMON_PWM_ENABLE | HWMON_PWM_FREQ,
+ HWMON_PWM_INPUT | HWMON_PWM_ENABLE,
+ HWMON_PWM_INPUT | HWMON_PWM_ENABLE,
+ HWMON_PWM_INPUT | HWMON_PWM_ENABLE),
+ NULL
+};
+
+static const struct hwmon_chip_info adt7470_chip_info = {
+ .ops = &adt7470_hwmon_ops,
+ .info = adt7470_info,
+};
+
/* Return 0 if detection is successful, -ENODEV otherwise */
static int adt7470_detect(struct i2c_client *client,
struct i2c_board_info *info)
@@ -1282,28 +1236,24 @@ static int adt7470_detect(struct i2c_client *client,
if (revision != ADT7470_REVISION)
return -ENODEV;
- strlcpy(info->type, "adt7470", I2C_NAME_SIZE);
+ strscpy(info->type, "adt7470", I2C_NAME_SIZE);
return 0;
}
-static void adt7470_init_client(struct i2c_client *client)
-{
- int reg = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG);
-
- if (reg < 0) {
- dev_err(&client->dev, "cannot read configuration register\n");
- } else {
- /* start monitoring (and do a self-test) */
- i2c_smbus_write_byte_data(client, ADT7470_REG_CFG, reg | 3);
- }
-}
+static const struct regmap_config adt7470_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .use_single_read = true,
+ .use_single_write = true,
+};
static int adt7470_probe(struct i2c_client *client)
{
struct device *dev = &client->dev;
struct adt7470_data *data;
struct device *hwmon_dev;
+ int err;
data = devm_kzalloc(dev, sizeof(struct adt7470_data), GFP_KERNEL);
if (!data)
@@ -1311,29 +1261,34 @@ static int adt7470_probe(struct i2c_client *client)
data->num_temp_sensors = -1;
data->auto_update_interval = AUTO_UPDATE_INTERVAL;
+ data->regmap = devm_regmap_init_i2c(client, &adt7470_regmap_config);
+ if (IS_ERR(data->regmap))
+ return PTR_ERR(data->regmap);
i2c_set_clientdata(client, data);
- data->client = client;
mutex_init(&data->lock);
dev_info(&client->dev, "%s chip found\n", client->name);
/* Initialize the ADT7470 chip */
- adt7470_init_client(client);
+ err = regmap_update_bits(data->regmap, ADT7470_REG_CFG,
+ ADT7470_STRT_MASK | ADT7470_TEST_MASK,
+ ADT7470_STRT_MASK | ADT7470_TEST_MASK);
+ if (err < 0)
+ return err;
/* Register sysfs hooks */
- hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
- data,
- adt7470_groups);
+ hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name, data,
+ &adt7470_chip_info,
+ adt7470_groups);
if (IS_ERR(hwmon_dev))
return PTR_ERR(hwmon_dev);
data->auto_update = kthread_run(adt7470_update_thread, client, "%s",
dev_name(hwmon_dev));
- if (IS_ERR(data->auto_update)) {
+ if (IS_ERR(data->auto_update))
return PTR_ERR(data->auto_update);
- }
return 0;
}
diff --git a/drivers/hwmon/aquacomputer_d5next.c b/drivers/hwmon/aquacomputer_d5next.c
new file mode 100644
index 000000000000..fb9341a53051
--- /dev/null
+++ b/drivers/hwmon/aquacomputer_d5next.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * hwmon driver for Aquacomputer D5 Next watercooling pump
+ *
+ * The D5 Next sends HID reports (with ID 0x01) every second to report sensor values
+ * (coolant temperature, pump and fan speed, voltage, current and power). It responds to
+ * Get_Report requests, but returns a dummy value of no use.
+ *
+ * Copyright 2021 Aleksa Savic <savicaleksa83@gmail.com>
+ */
+
+#include <asm/unaligned.h>
+#include <linux/debugfs.h>
+#include <linux/hid.h>
+#include <linux/hwmon.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+
+#define DRIVER_NAME "aquacomputer-d5next"
+
+#define D5NEXT_STATUS_REPORT_ID 0x01
+#define D5NEXT_STATUS_UPDATE_INTERVAL (2 * HZ) /* In seconds */
+
+/* Register offsets for the D5 Next pump */
+
+#define D5NEXT_SERIAL_FIRST_PART 3
+#define D5NEXT_SERIAL_SECOND_PART 5
+#define D5NEXT_FIRMWARE_VERSION 13
+#define D5NEXT_POWER_CYCLES 24
+
+#define D5NEXT_COOLANT_TEMP 87
+
+#define D5NEXT_PUMP_SPEED 116
+#define D5NEXT_FAN_SPEED 103
+
+#define D5NEXT_PUMP_POWER 114
+#define D5NEXT_FAN_POWER 101
+
+#define D5NEXT_PUMP_VOLTAGE 110
+#define D5NEXT_FAN_VOLTAGE 97
+#define D5NEXT_5V_VOLTAGE 57
+
+#define D5NEXT_PUMP_CURRENT 112
+#define D5NEXT_FAN_CURRENT 99
+
+/* Labels for provided values */
+
+#define L_COOLANT_TEMP "Coolant temp"
+
+#define L_PUMP_SPEED "Pump speed"
+#define L_FAN_SPEED "Fan speed"
+
+#define L_PUMP_POWER "Pump power"
+#define L_FAN_POWER "Fan power"
+
+#define L_PUMP_VOLTAGE "Pump voltage"
+#define L_FAN_VOLTAGE "Fan voltage"
+#define L_5V_VOLTAGE "+5V voltage"
+
+#define L_PUMP_CURRENT "Pump current"
+#define L_FAN_CURRENT "Fan current"
+
+static const char *const label_speeds[] = {
+ L_PUMP_SPEED,
+ L_FAN_SPEED,
+};
+
+static const char *const label_power[] = {
+ L_PUMP_POWER,
+ L_FAN_POWER,
+};
+
+static const char *const label_voltages[] = {
+ L_PUMP_VOLTAGE,
+ L_FAN_VOLTAGE,
+ L_5V_VOLTAGE,
+};
+
+static const char *const label_current[] = {
+ L_PUMP_CURRENT,
+ L_FAN_CURRENT,
+};
+
+struct d5next_data {
+ struct hid_device *hdev;
+ struct device *hwmon_dev;
+ struct dentry *debugfs;
+ s32 temp_input;
+ u16 speed_input[2];
+ u32 power_input[2];
+ u16 voltage_input[3];
+ u16 current_input[2];
+ u32 serial_number[2];
+ u16 firmware_version;
+ u32 power_cycles; /* How many times the device was powered on */
+ unsigned long updated;
+};
+
+static umode_t d5next_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr,
+ int channel)
+{
+ return 0444;
+}
+
+static int d5next_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel,
+ long *val)
+{
+ struct d5next_data *priv = dev_get_drvdata(dev);
+
+ if (time_after(jiffies, priv->updated + D5NEXT_STATUS_UPDATE_INTERVAL))
+ return -ENODATA;
+
+ switch (type) {
+ case hwmon_temp:
+ *val = priv->temp_input;
+ break;
+ case hwmon_fan:
+ *val = priv->speed_input[channel];
+ break;
+ case hwmon_power:
+ *val = priv->power_input[channel];
+ break;
+ case hwmon_in:
+ *val = priv->voltage_input[channel];
+ break;
+ case hwmon_curr:
+ *val = priv->current_input[channel];
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int d5next_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+ int channel, const char **str)
+{
+ switch (type) {
+ case hwmon_temp:
+ *str = L_COOLANT_TEMP;
+ break;
+ case hwmon_fan:
+ *str = label_speeds[channel];
+ break;
+ case hwmon_power:
+ *str = label_power[channel];
+ break;
+ case hwmon_in:
+ *str = label_voltages[channel];
+ break;
+ case hwmon_curr:
+ *str = label_current[channel];
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static const struct hwmon_ops d5next_hwmon_ops = {
+ .is_visible = d5next_is_visible,
+ .read = d5next_read,
+ .read_string = d5next_read_string,
+};
+
+static const struct hwmon_channel_info *d5next_info[] = {
+ HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_LABEL),
+ HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT | HWMON_F_LABEL, HWMON_F_INPUT | HWMON_F_LABEL),
+ HWMON_CHANNEL_INFO(power, HWMON_P_INPUT | HWMON_P_LABEL, HWMON_P_INPUT | HWMON_P_LABEL),
+ HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_LABEL),
+ HWMON_CHANNEL_INFO(curr, HWMON_C_INPUT | HWMON_C_LABEL, HWMON_C_INPUT | HWMON_C_LABEL),
+ NULL
+};
+
+static const struct hwmon_chip_info d5next_chip_info = {
+ .ops = &d5next_hwmon_ops,
+ .info = d5next_info,
+};
+
+static int d5next_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size)
+{
+ struct d5next_data *priv;
+
+ if (report->id != D5NEXT_STATUS_REPORT_ID)
+ return 0;
+
+ priv = hid_get_drvdata(hdev);
+
+ /* Info provided with every report */
+
+ priv->serial_number[0] = get_unaligned_be16(data + D5NEXT_SERIAL_FIRST_PART);
+ priv->serial_number[1] = get_unaligned_be16(data + D5NEXT_SERIAL_SECOND_PART);
+
+ priv->firmware_version = get_unaligned_be16(data + D5NEXT_FIRMWARE_VERSION);
+ priv->power_cycles = get_unaligned_be32(data + D5NEXT_POWER_CYCLES);
+
+ /* Sensor readings */
+
+ priv->temp_input = get_unaligned_be16(data + D5NEXT_COOLANT_TEMP) * 10;
+
+ priv->speed_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_SPEED);
+ priv->speed_input[1] = get_unaligned_be16(data + D5NEXT_FAN_SPEED);
+
+ priv->power_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_POWER) * 10000;
+ priv->power_input[1] = get_unaligned_be16(data + D5NEXT_FAN_POWER) * 10000;
+
+ priv->voltage_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_VOLTAGE) * 10;
+ priv->voltage_input[1] = get_unaligned_be16(data + D5NEXT_FAN_VOLTAGE) * 10;
+ priv->voltage_input[2] = get_unaligned_be16(data + D5NEXT_5V_VOLTAGE) * 10;
+
+ priv->current_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_CURRENT);
+ priv->current_input[1] = get_unaligned_be16(data + D5NEXT_FAN_CURRENT);
+
+ priv->updated = jiffies;
+
+ return 0;
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+static int serial_number_show(struct seq_file *seqf, void *unused)
+{
+ struct d5next_data *priv = seqf->private;
+
+ seq_printf(seqf, "%05u-%05u\n", priv->serial_number[0], priv->serial_number[1]);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(serial_number);
+
+static int firmware_version_show(struct seq_file *seqf, void *unused)
+{
+ struct d5next_data *priv = seqf->private;
+
+ seq_printf(seqf, "%u\n", priv->firmware_version);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(firmware_version);
+
+static int power_cycles_show(struct seq_file *seqf, void *unused)
+{
+ struct d5next_data *priv = seqf->private;
+
+ seq_printf(seqf, "%u\n", priv->power_cycles);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(power_cycles);
+
+static void d5next_debugfs_init(struct d5next_data *priv)
+{
+ char name[32];
+
+ scnprintf(name, sizeof(name), "%s-%s", DRIVER_NAME, dev_name(&priv->hdev->dev));
+
+ priv->debugfs = debugfs_create_dir(name, NULL);
+ debugfs_create_file("serial_number", 0444, priv->debugfs, priv, &serial_number_fops);
+ debugfs_create_file("firmware_version", 0444, priv->debugfs, priv, &firmware_version_fops);
+ debugfs_create_file("power_cycles", 0444, priv->debugfs, priv, &power_cycles_fops);
+}
+
+#else
+
+static void d5next_debugfs_init(struct d5next_data *priv)
+{
+}
+
+#endif
+
+static int d5next_probe(struct hid_device *hdev, const struct hid_device_id *id)
+{
+ struct d5next_data *priv;
+ int ret;
+
+ priv = devm_kzalloc(&hdev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->hdev = hdev;
+ hid_set_drvdata(hdev, priv);
+
+ priv->updated = jiffies - D5NEXT_STATUS_UPDATE_INTERVAL;
+
+ ret = hid_parse(hdev);
+ if (ret)
+ return ret;
+
+ ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW);
+ if (ret)
+ return ret;
+
+ ret = hid_hw_open(hdev);
+ if (ret)
+ goto fail_and_stop;
+
+ priv->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, "d5next", priv,
+ &d5next_chip_info, NULL);
+
+ if (IS_ERR(priv->hwmon_dev)) {
+ ret = PTR_ERR(priv->hwmon_dev);
+ goto fail_and_close;
+ }
+
+ d5next_debugfs_init(priv);
+
+ return 0;
+
+fail_and_close:
+ hid_hw_close(hdev);
+fail_and_stop:
+ hid_hw_stop(hdev);
+ return ret;
+}
+
+static void d5next_remove(struct hid_device *hdev)
+{
+ struct d5next_data *priv = hid_get_drvdata(hdev);
+
+ debugfs_remove_recursive(priv->debugfs);
+ hwmon_device_unregister(priv->hwmon_dev);
+
+ hid_hw_close(hdev);
+ hid_hw_stop(hdev);
+}
+
+static const struct hid_device_id d5next_table[] = {
+ { HID_USB_DEVICE(0x0c70, 0xf00e) }, /* Aquacomputer D5 Next */
+ {},
+};
+
+MODULE_DEVICE_TABLE(hid, d5next_table);
+
+static struct hid_driver d5next_driver = {
+ .name = DRIVER_NAME,
+ .id_table = d5next_table,
+ .probe = d5next_probe,
+ .remove = d5next_remove,
+ .raw_event = d5next_raw_event,
+};
+
+static int __init d5next_init(void)
+{
+ return hid_register_driver(&d5next_driver);
+}
+
+static void __exit d5next_exit(void)
+{
+ hid_unregister_driver(&d5next_driver);
+}
+
+/* Request to initialize after the HID bus to ensure it's not being loaded before */
+
+late_initcall(d5next_init);
+module_exit(d5next_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Aleksa Savic <savicaleksa83@gmail.com>");
+MODULE_DESCRIPTION("Hwmon driver for Aquacomputer D5 Next pump");
diff --git a/drivers/hwmon/axi-fan-control.c b/drivers/hwmon/axi-fan-control.c
index e3f6b03e6764..d2092c17d993 100644
--- a/drivers/hwmon/axi-fan-control.c
+++ b/drivers/hwmon/axi-fan-control.c
@@ -8,6 +8,7 @@
#include <linux/clk.h>
#include <linux/fpga/adi-axi-common.h>
#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/kernel.h>
@@ -23,6 +24,14 @@
#define ADI_REG_PWM_PERIOD 0x00c0
#define ADI_REG_TACH_MEASUR 0x00c4
#define ADI_REG_TEMPERATURE 0x00c8
+#define ADI_REG_TEMP_00_H 0x0100
+#define ADI_REG_TEMP_25_L 0x0104
+#define ADI_REG_TEMP_25_H 0x0108
+#define ADI_REG_TEMP_50_L 0x010c
+#define ADI_REG_TEMP_50_H 0x0110
+#define ADI_REG_TEMP_75_L 0x0114
+#define ADI_REG_TEMP_75_H 0x0118
+#define ADI_REG_TEMP_100_L 0x011c
#define ADI_REG_IRQ_MASK 0x0040
#define ADI_REG_IRQ_PENDING 0x0044
@@ -62,6 +71,39 @@ static inline u32 axi_ioread(const u32 reg,
return ioread32(ctl->base + reg);
}
+/*
+ * The core calculates the temperature as:
+ * T = /raw * 509.3140064 / 65535) - 280.2308787
+ */
+static ssize_t axi_fan_control_show(struct device *dev, struct device_attribute *da, char *buf)
+{
+ struct axi_fan_control_data *ctl = dev_get_drvdata(dev);
+ struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+ u32 temp = axi_ioread(attr->index, ctl);
+
+ temp = DIV_ROUND_CLOSEST_ULL(temp * 509314ULL, 65535) - 280230;
+
+ return sprintf(buf, "%u\n", temp);
+}
+
+static ssize_t axi_fan_control_store(struct device *dev, struct device_attribute *da,
+ const char *buf, size_t count)
+{
+ struct axi_fan_control_data *ctl = dev_get_drvdata(dev);
+ struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+ u32 temp;
+ int ret;
+
+ ret = kstrtou32(buf, 10, &temp);
+ if (ret)
+ return ret;
+
+ temp = DIV_ROUND_CLOSEST_ULL((temp + 280230) * 65535ULL, 509314);
+ axi_iowrite(temp, attr->index, ctl);
+
+ return count;
+}
+
static long axi_fan_control_get_pwm_duty(const struct axi_fan_control_data *ctl)
{
u32 pwm_width = axi_ioread(ADI_REG_PWM_WIDTH, ctl);
@@ -283,18 +325,9 @@ static irqreturn_t axi_fan_control_irq_handler(int irq, void *data)
u32 irq_pending = axi_ioread(ADI_REG_IRQ_PENDING, ctl);
u32 clear_mask;
- if (irq_pending & ADI_IRQ_SRC_NEW_MEASUR) {
- if (ctl->update_tacho_params) {
- u32 new_tach = axi_ioread(ADI_REG_TACH_MEASUR, ctl);
-
- /* get 25% tolerance */
- u32 tach_tol = DIV_ROUND_CLOSEST(new_tach * 25, 100);
- /* set new tacho parameters */
- axi_iowrite(new_tach, ADI_REG_TACH_PERIOD, ctl);
- axi_iowrite(tach_tol, ADI_REG_TACH_TOLERANCE, ctl);
- ctl->update_tacho_params = false;
- }
- }
+ if (irq_pending & ADI_IRQ_SRC_TEMP_INCREASE)
+ /* hardware requested a new pwm */
+ ctl->hw_pwm_req = true;
if (irq_pending & ADI_IRQ_SRC_PWM_CHANGED) {
/*
@@ -310,9 +343,18 @@ static irqreturn_t axi_fan_control_irq_handler(int irq, void *data)
}
}
- if (irq_pending & ADI_IRQ_SRC_TEMP_INCREASE)
- /* hardware requested a new pwm */
- ctl->hw_pwm_req = true;
+ if (irq_pending & ADI_IRQ_SRC_NEW_MEASUR) {
+ if (ctl->update_tacho_params) {
+ u32 new_tach = axi_ioread(ADI_REG_TACH_MEASUR, ctl);
+ /* get 25% tolerance */
+ u32 tach_tol = DIV_ROUND_CLOSEST(new_tach * 25, 100);
+
+ /* set new tacho parameters */
+ axi_iowrite(new_tach, ADI_REG_TACH_PERIOD, ctl);
+ axi_iowrite(tach_tol, ADI_REG_TACH_TOLERANCE, ctl);
+ ctl->update_tacho_params = false;
+ }
+ }
if (irq_pending & ADI_IRQ_SRC_TACH_ERR)
ctl->fan_fault = 1;
@@ -351,6 +393,11 @@ static int axi_fan_control_init(struct axi_fan_control_data *ctl,
return ret;
}
+static void axi_fan_control_clk_disable(void *clk)
+{
+ clk_disable_unprepare(clk);
+}
+
static const struct hwmon_channel_info *axi_fan_control_info[] = {
HWMON_CHANNEL_INFO(pwm, HWMON_PWM_INPUT),
HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT | HWMON_F_FAULT | HWMON_F_LABEL),
@@ -370,6 +417,36 @@ static const struct hwmon_chip_info axi_chip_info = {
.info = axi_fan_control_info,
};
+/* temperature threshold below which PWM should be 0% */
+static SENSOR_DEVICE_ATTR_RW(pwm1_auto_point1_temp_hyst, axi_fan_control, ADI_REG_TEMP_00_H);
+/* temperature threshold above which PWM should be 25% */
+static SENSOR_DEVICE_ATTR_RW(pwm1_auto_point1_temp, axi_fan_control, ADI_REG_TEMP_25_L);
+/* temperature threshold below which PWM should be 25% */
+static SENSOR_DEVICE_ATTR_RW(pwm1_auto_point2_temp_hyst, axi_fan_control, ADI_REG_TEMP_25_H);
+/* temperature threshold above which PWM should be 50% */
+static SENSOR_DEVICE_ATTR_RW(pwm1_auto_point2_temp, axi_fan_control, ADI_REG_TEMP_50_L);
+/* temperature threshold below which PWM should be 50% */
+static SENSOR_DEVICE_ATTR_RW(pwm1_auto_point3_temp_hyst, axi_fan_control, ADI_REG_TEMP_50_H);
+/* temperature threshold above which PWM should be 75% */
+static SENSOR_DEVICE_ATTR_RW(pwm1_auto_point3_temp, axi_fan_control, ADI_REG_TEMP_75_L);
+/* temperature threshold below which PWM should be 75% */
+static SENSOR_DEVICE_ATTR_RW(pwm1_auto_point4_temp_hyst, axi_fan_control, ADI_REG_TEMP_75_H);
+/* temperature threshold above which PWM should be 100% */
+static SENSOR_DEVICE_ATTR_RW(pwm1_auto_point4_temp, axi_fan_control, ADI_REG_TEMP_100_L);
+
+static struct attribute *axi_fan_control_attrs[] = {
+ &sensor_dev_attr_pwm1_auto_point1_temp_hyst.dev_attr.attr,
+ &sensor_dev_attr_pwm1_auto_point1_temp.dev_attr.attr,
+ &sensor_dev_attr_pwm1_auto_point2_temp_hyst.dev_attr.attr,
+ &sensor_dev_attr_pwm1_auto_point2_temp.dev_attr.attr,
+ &sensor_dev_attr_pwm1_auto_point3_temp_hyst.dev_attr.attr,
+ &sensor_dev_attr_pwm1_auto_point3_temp.dev_attr.attr,
+ &sensor_dev_attr_pwm1_auto_point4_temp_hyst.dev_attr.attr,
+ &sensor_dev_attr_pwm1_auto_point4_temp.dev_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(axi_fan_control);
+
static const u32 version_1_0_0 = ADI_AXI_PCORE_VER(1, 0, 'a');
static const struct of_device_id axi_fan_control_of_match[] = {
@@ -406,6 +483,14 @@ static int axi_fan_control_probe(struct platform_device *pdev)
return PTR_ERR(clk);
}
+ ret = clk_prepare_enable(clk);
+ if (ret)
+ return ret;
+
+ ret = devm_add_action_or_reset(&pdev->dev, axi_fan_control_clk_disable, clk);
+ if (ret)
+ return ret;
+
ctl->clk_rate = clk_get_rate(clk);
if (!ctl->clk_rate)
return -EINVAL;
@@ -446,7 +531,7 @@ static int axi_fan_control_probe(struct platform_device *pdev)
name,
ctl,
&axi_chip_info,
- NULL);
+ axi_fan_control_groups);
return PTR_ERR_OR_ZERO(ctl->hdev);
}
diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index f2221ca0aa7b..774c1b0715d9 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -14,7 +14,9 @@
#include <linux/cpu.h>
#include <linux/delay.h>
+#include <linux/err.h>
#include <linux/module.h>
+#include <linux/platform_device.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
@@ -23,7 +25,6 @@
#include <linux/capability.h>
#include <linux/mutex.h>
#include <linux/hwmon.h>
-#include <linux/hwmon-sysfs.h>
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/sched.h>
@@ -58,32 +59,24 @@
#define I8K_POWER_AC 0x05
#define I8K_POWER_BATTERY 0x01
-static DEFINE_MUTEX(i8k_mutex);
-static char bios_version[4];
-static char bios_machineid[16];
-static struct device *i8k_hwmon_dev;
-static u32 i8k_hwmon_flags;
-static uint i8k_fan_mult = I8K_FAN_MULT;
-static uint i8k_pwm_mult;
-static uint i8k_fan_max = I8K_FAN_HIGH;
-static bool disallow_fan_type_call;
-static bool disallow_fan_support;
-static unsigned int manual_fan;
-static unsigned int auto_fan;
-
-#define I8K_HWMON_HAVE_TEMP1 (1 << 0)
-#define I8K_HWMON_HAVE_TEMP2 (1 << 1)
-#define I8K_HWMON_HAVE_TEMP3 (1 << 2)
-#define I8K_HWMON_HAVE_TEMP4 (1 << 3)
-#define I8K_HWMON_HAVE_TEMP5 (1 << 4)
-#define I8K_HWMON_HAVE_TEMP6 (1 << 5)
-#define I8K_HWMON_HAVE_TEMP7 (1 << 6)
-#define I8K_HWMON_HAVE_TEMP8 (1 << 7)
-#define I8K_HWMON_HAVE_TEMP9 (1 << 8)
-#define I8K_HWMON_HAVE_TEMP10 (1 << 9)
-#define I8K_HWMON_HAVE_FAN1 (1 << 10)
-#define I8K_HWMON_HAVE_FAN2 (1 << 11)
-#define I8K_HWMON_HAVE_FAN3 (1 << 12)
+#define DELL_SMM_NO_TEMP 10
+#define DELL_SMM_NO_FANS 3
+
+struct dell_smm_data {
+ struct mutex i8k_mutex; /* lock for sensors writes */
+ char bios_version[4];
+ char bios_machineid[16];
+ uint i8k_fan_mult;
+ uint i8k_pwm_mult;
+ uint i8k_fan_max;
+ bool disallow_fan_type_call;
+ bool disallow_fan_support;
+ unsigned int manual_fan;
+ unsigned int auto_fan;
+ int temp_type[DELL_SMM_NO_TEMP];
+ bool fan[DELL_SMM_NO_FANS];
+ int fan_type[DELL_SMM_NO_FANS];
+};
MODULE_AUTHOR("Massimo Dal Zotto (dz@debian.org)");
MODULE_AUTHOR("Pali Rohár <pali@kernel.org>");
@@ -126,7 +119,34 @@ struct smm_regs {
unsigned int edi __packed;
};
-static inline const char *i8k_get_dmi_data(int field)
+static const char * const temp_labels[] = {
+ "CPU",
+ "GPU",
+ "SODIMM",
+ "Other",
+ "Ambient",
+ "Other",
+};
+
+static const char * const fan_labels[] = {
+ "Processor Fan",
+ "Motherboard Fan",
+ "Video Fan",
+ "Power Supply Fan",
+ "Chipset Fan",
+ "Other Fan",
+};
+
+static const char * const docking_labels[] = {
+ "Docking Processor Fan",
+ "Docking Motherboard Fan",
+ "Docking Video Fan",
+ "Docking Power Supply Fan",
+ "Docking Chipset Fan",
+ "Docking Other Fan",
+};
+
+static inline const char __init *i8k_get_dmi_data(int field)
{
const char *dmi_data = dmi_get_system_info(field);
@@ -138,17 +158,12 @@ static inline const char *i8k_get_dmi_data(int field)
*/
static int i8k_smm_func(void *par)
{
- int rc;
+ ktime_t calltime = ktime_get();
struct smm_regs *regs = par;
int eax = regs->eax;
-
-#ifdef DEBUG
int ebx = regs->ebx;
- unsigned long duration;
- ktime_t calltime, delta, rettime;
-
- calltime = ktime_get();
-#endif
+ long long duration;
+ int rc;
/* SMM requires CPU 0 */
if (smp_processor_id() != 0)
@@ -210,13 +225,9 @@ static int i8k_smm_func(void *par)
if (rc != 0 || (regs->eax & 0xffff) == 0xffff || regs->eax == eax)
rc = -EINVAL;
-#ifdef DEBUG
- rettime = ktime_get();
- delta = ktime_sub(rettime, calltime);
- duration = ktime_to_ns(delta) >> 10;
- pr_debug("smm(0x%.4x 0x%.4x) = 0x%.4x (took %7lu usecs)\n", eax, ebx,
- (rc ? 0xffff : regs->eax & 0xffff), duration);
-#endif
+ duration = ktime_us_delta(ktime_get(), calltime);
+ pr_debug("smm(0x%.4x 0x%.4x) = 0x%.4x (took %7lld usecs)\n", eax, ebx,
+ (rc ? 0xffff : regs->eax & 0xffff), duration);
return rc;
}
@@ -228,9 +239,9 @@ static int i8k_smm(struct smm_regs *regs)
{
int ret;
- get_online_cpus();
+ cpus_read_lock();
ret = smp_call_on_cpu(0, i8k_smm_func, regs, true);
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
@@ -238,11 +249,11 @@ static int i8k_smm(struct smm_regs *regs)
/*
* Read the fan status.
*/
-static int i8k_get_fan_status(int fan)
+static int i8k_get_fan_status(const struct dell_smm_data *data, int fan)
{
struct smm_regs regs = { .eax = I8K_SMM_GET_FAN, };
- if (disallow_fan_support)
+ if (data->disallow_fan_support)
return -EINVAL;
regs.ebx = fan & 0xff;
@@ -252,87 +263,85 @@ static int i8k_get_fan_status(int fan)
/*
* Read the fan speed in RPM.
*/
-static int i8k_get_fan_speed(int fan)
+static int i8k_get_fan_speed(const struct dell_smm_data *data, int fan)
{
struct smm_regs regs = { .eax = I8K_SMM_GET_SPEED, };
- if (disallow_fan_support)
+ if (data->disallow_fan_support)
return -EINVAL;
regs.ebx = fan & 0xff;
- return i8k_smm(&regs) ? : (regs.eax & 0xffff) * i8k_fan_mult;
+ return i8k_smm(&regs) ? : (regs.eax & 0xffff) * data->i8k_fan_mult;
}
/*
* Read the fan type.
*/
-static int _i8k_get_fan_type(int fan)
+static int _i8k_get_fan_type(const struct dell_smm_data *data, int fan)
{
struct smm_regs regs = { .eax = I8K_SMM_GET_FAN_TYPE, };
- if (disallow_fan_support || disallow_fan_type_call)
+ if (data->disallow_fan_support || data->disallow_fan_type_call)
return -EINVAL;
regs.ebx = fan & 0xff;
return i8k_smm(&regs) ? : regs.eax & 0xff;
}
-static int i8k_get_fan_type(int fan)
+static int i8k_get_fan_type(struct dell_smm_data *data, int fan)
{
/* I8K_SMM_GET_FAN_TYPE SMM call is expensive, so cache values */
- static int types[3] = { INT_MIN, INT_MIN, INT_MIN };
-
- if (types[fan] == INT_MIN)
- types[fan] = _i8k_get_fan_type(fan);
+ if (data->fan_type[fan] == INT_MIN)
+ data->fan_type[fan] = _i8k_get_fan_type(data, fan);
- return types[fan];
+ return data->fan_type[fan];
}
/*
* Read the fan nominal rpm for specific fan speed.
*/
-static int i8k_get_fan_nominal_speed(int fan, int speed)
+static int __init i8k_get_fan_nominal_speed(const struct dell_smm_data *data, int fan, int speed)
{
struct smm_regs regs = { .eax = I8K_SMM_GET_NOM_SPEED, };
- if (disallow_fan_support)
+ if (data->disallow_fan_support)
return -EINVAL;
regs.ebx = (fan & 0xff) | (speed << 8);
- return i8k_smm(&regs) ? : (regs.eax & 0xffff) * i8k_fan_mult;
+ return i8k_smm(&regs) ? : (regs.eax & 0xffff) * data->i8k_fan_mult;
}
/*
* Enable or disable automatic BIOS fan control support
*/
-static int i8k_enable_fan_auto_mode(bool enable)
+static int i8k_enable_fan_auto_mode(const struct dell_smm_data *data, bool enable)
{
struct smm_regs regs = { };
- if (disallow_fan_support)
+ if (data->disallow_fan_support)
return -EINVAL;
- regs.eax = enable ? auto_fan : manual_fan;
+ regs.eax = enable ? data->auto_fan : data->manual_fan;
return i8k_smm(&regs);
}
/*
* Set the fan speed (off, low, high). Returns the new fan status.
*/
-static int i8k_set_fan(int fan, int speed)
+static int i8k_set_fan(const struct dell_smm_data *data, int fan, int speed)
{
struct smm_regs regs = { .eax = I8K_SMM_SET_FAN, };
- if (disallow_fan_support)
+ if (data->disallow_fan_support)
return -EINVAL;
- speed = (speed < 0) ? 0 : ((speed > i8k_fan_max) ? i8k_fan_max : speed);
+ speed = (speed < 0) ? 0 : ((speed > data->i8k_fan_max) ? data->i8k_fan_max : speed);
regs.ebx = (fan & 0xff) | (speed << 8);
- return i8k_smm(&regs) ? : i8k_get_fan_status(fan);
+ return i8k_smm(&regs) ? : i8k_get_fan_status(data, fan);
}
-static int i8k_get_temp_type(int sensor)
+static int __init i8k_get_temp_type(int sensor)
{
struct smm_regs regs = { .eax = I8K_SMM_GET_TEMP_TYPE, };
@@ -382,7 +391,7 @@ static int i8k_get_temp(int sensor)
return temp;
}
-static int i8k_get_dell_signature(int req_fn)
+static int __init i8k_get_dell_signature(int req_fn)
{
struct smm_regs regs = { .eax = req_fn, };
int rc;
@@ -440,7 +449,7 @@ static int i8k_get_power_status(void)
*/
static int
-i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg)
+i8k_ioctl_unlocked(struct file *fp, struct dell_smm_data *data, unsigned int cmd, unsigned long arg)
{
int val = 0;
int speed;
@@ -452,12 +461,12 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg)
switch (cmd) {
case I8K_BIOS_VERSION:
- if (!isdigit(bios_version[0]) || !isdigit(bios_version[1]) ||
- !isdigit(bios_version[2]))
+ if (!isdigit(data->bios_version[0]) || !isdigit(data->bios_version[1]) ||
+ !isdigit(data->bios_version[2]))
return -EINVAL;
- val = (bios_version[0] << 16) |
- (bios_version[1] << 8) | bios_version[2];
+ val = (data->bios_version[0] << 16) |
+ (data->bios_version[1] << 8) | data->bios_version[2];
break;
case I8K_MACHINE_ID:
@@ -465,7 +474,7 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg)
return -EPERM;
memset(buff, 0, sizeof(buff));
- strlcpy(buff, bios_machineid, sizeof(buff));
+ strscpy(buff, data->bios_machineid, sizeof(buff));
break;
case I8K_FN_STATUS:
@@ -484,14 +493,14 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg)
if (copy_from_user(&val, argp, sizeof(int)))
return -EFAULT;
- val = i8k_get_fan_speed(val);
+ val = i8k_get_fan_speed(data, val);
break;
case I8K_GET_FAN:
if (copy_from_user(&val, argp, sizeof(int)))
return -EFAULT;
- val = i8k_get_fan_status(val);
+ val = i8k_get_fan_status(data, val);
break;
case I8K_SET_FAN:
@@ -504,7 +513,7 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg)
if (copy_from_user(&speed, argp + 1, sizeof(int)))
return -EFAULT;
- val = i8k_set_fan(val, speed);
+ val = i8k_set_fan(data, val, speed);
break;
default:
@@ -537,11 +546,12 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg)
static long i8k_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
{
+ struct dell_smm_data *data = PDE_DATA(file_inode(fp));
long ret;
- mutex_lock(&i8k_mutex);
- ret = i8k_ioctl_unlocked(fp, cmd, arg);
- mutex_unlock(&i8k_mutex);
+ mutex_lock(&data->i8k_mutex);
+ ret = i8k_ioctl_unlocked(fp, data, cmd, arg);
+ mutex_unlock(&data->i8k_mutex);
return ret;
}
@@ -551,17 +561,18 @@ static long i8k_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
*/
static int i8k_proc_show(struct seq_file *seq, void *offset)
{
+ struct dell_smm_data *data = seq->private;
int fn_key, cpu_temp, ac_power;
int left_fan, right_fan, left_speed, right_speed;
- cpu_temp = i8k_get_temp(0); /* 11100 µs */
- left_fan = i8k_get_fan_status(I8K_FAN_LEFT); /* 580 µs */
- right_fan = i8k_get_fan_status(I8K_FAN_RIGHT); /* 580 µs */
- left_speed = i8k_get_fan_speed(I8K_FAN_LEFT); /* 580 µs */
- right_speed = i8k_get_fan_speed(I8K_FAN_RIGHT); /* 580 µs */
- fn_key = i8k_get_fn_status(); /* 750 µs */
+ cpu_temp = i8k_get_temp(0); /* 11100 µs */
+ left_fan = i8k_get_fan_status(data, I8K_FAN_LEFT); /* 580 µs */
+ right_fan = i8k_get_fan_status(data, I8K_FAN_RIGHT); /* 580 µs */
+ left_speed = i8k_get_fan_speed(data, I8K_FAN_LEFT); /* 580 µs */
+ right_speed = i8k_get_fan_speed(data, I8K_FAN_RIGHT); /* 580 µs */
+ fn_key = i8k_get_fn_status(); /* 750 µs */
if (power_status)
- ac_power = i8k_get_power_status(); /* 14700 µs */
+ ac_power = i8k_get_power_status(); /* 14700 µs */
else
ac_power = -1;
@@ -581,8 +592,8 @@ static int i8k_proc_show(struct seq_file *seq, void *offset)
*/
seq_printf(seq, "%s %s %s %d %d %d %d %d %d %d\n",
I8K_PROC_FMT,
- bios_version,
- (restricted && !capable(CAP_SYS_ADMIN)) ? "-1" : bios_machineid,
+ data->bios_version,
+ (restricted && !capable(CAP_SYS_ADMIN)) ? "-1" : data->bios_machineid,
cpu_temp,
left_fan, right_fan, left_speed, right_speed,
ac_power, fn_key);
@@ -592,7 +603,7 @@ static int i8k_proc_show(struct seq_file *seq, void *offset)
static int i8k_open_fs(struct inode *inode, struct file *file)
{
- return single_open(file, i8k_proc_show, NULL);
+ return single_open(file, i8k_proc_show, PDE_DATA(inode));
}
static const struct proc_ops i8k_proc_ops = {
@@ -603,24 +614,24 @@ static const struct proc_ops i8k_proc_ops = {
.proc_ioctl = i8k_ioctl,
};
-static void __init i8k_init_procfs(void)
+static void i8k_exit_procfs(void *param)
{
- /* Register the proc entry */
- proc_create("i8k", 0, NULL, &i8k_proc_ops);
+ remove_proc_entry("i8k", NULL);
}
-static void __exit i8k_exit_procfs(void)
+static void __init i8k_init_procfs(struct device *dev)
{
- remove_proc_entry("i8k", NULL);
-}
+ struct dell_smm_data *data = dev_get_drvdata(dev);
-#else
+ /* Register the proc entry */
+ proc_create_data("i8k", 0, NULL, &i8k_proc_ops, data);
-static inline void __init i8k_init_procfs(void)
-{
+ devm_add_action_or_reset(dev, i8k_exit_procfs, NULL);
}
-static inline void __exit i8k_exit_procfs(void)
+#else
+
+static void __init i8k_init_procfs(struct device *dev)
{
}
@@ -630,341 +641,299 @@ static inline void __exit i8k_exit_procfs(void)
* Hwmon interface
*/
-static ssize_t i8k_hwmon_temp_label_show(struct device *dev,
- struct device_attribute *devattr,
- char *buf)
+static umode_t dell_smm_is_visible(const void *drvdata, enum hwmon_sensor_types type, u32 attr,
+ int channel)
{
- static const char * const labels[] = {
- "CPU",
- "GPU",
- "SODIMM",
- "Other",
- "Ambient",
- "Other",
- };
- int index = to_sensor_dev_attr(devattr)->index;
- int type;
+ const struct dell_smm_data *data = drvdata;
- type = i8k_get_temp_type(index);
- if (type < 0)
- return type;
- if (type >= ARRAY_SIZE(labels))
- type = ARRAY_SIZE(labels) - 1;
- return sprintf(buf, "%s\n", labels[type]);
+ switch (type) {
+ case hwmon_temp:
+ switch (attr) {
+ case hwmon_temp_input:
+ case hwmon_temp_label:
+ if (data->temp_type[channel] >= 0)
+ return 0444;
+
+ break;
+ default:
+ break;
+ }
+ break;
+ case hwmon_fan:
+ if (data->disallow_fan_support)
+ break;
+
+ switch (attr) {
+ case hwmon_fan_input:
+ if (data->fan[channel])
+ return 0444;
+
+ break;
+ case hwmon_fan_label:
+ if (data->fan[channel] && !data->disallow_fan_type_call)
+ return 0444;
+
+ break;
+ default:
+ break;
+ }
+ break;
+ case hwmon_pwm:
+ if (data->disallow_fan_support)
+ break;
+
+ switch (attr) {
+ case hwmon_pwm_input:
+ if (data->fan[channel])
+ return 0644;
+
+ break;
+ case hwmon_pwm_enable:
+ if (data->auto_fan)
+ /*
+ * There is no command for retrieve the current status
+ * from BIOS, and userspace/firmware itself can change
+ * it.
+ * Thus we can only provide write-only access for now.
+ */
+ return 0200;
+
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
}
-static ssize_t i8k_hwmon_temp_show(struct device *dev,
- struct device_attribute *devattr,
- char *buf)
+static int dell_smm_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel,
+ long *val)
{
- int index = to_sensor_dev_attr(devattr)->index;
- int temp;
+ struct dell_smm_data *data = dev_get_drvdata(dev);
+ int ret;
+
+ switch (type) {
+ case hwmon_temp:
+ switch (attr) {
+ case hwmon_temp_input:
+ ret = i8k_get_temp(channel);
+ if (ret < 0)
+ return ret;
+
+ *val = ret * 1000;
+
+ return 0;
+ default:
+ break;
+ }
+ break;
+ case hwmon_fan:
+ switch (attr) {
+ case hwmon_fan_input:
+ ret = i8k_get_fan_speed(data, channel);
+ if (ret < 0)
+ return ret;
- temp = i8k_get_temp(index);
- if (temp < 0)
- return temp;
- return sprintf(buf, "%d\n", temp * 1000);
+ *val = ret;
+
+ return 0;
+ default:
+ break;
+ }
+ break;
+ case hwmon_pwm:
+ switch (attr) {
+ case hwmon_pwm_input:
+ ret = i8k_get_fan_status(data, channel);
+ if (ret < 0)
+ return ret;
+
+ *val = clamp_val(ret * data->i8k_pwm_mult, 0, 255);
+
+ return 0;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return -EOPNOTSUPP;
}
-static ssize_t i8k_hwmon_fan_label_show(struct device *dev,
- struct device_attribute *devattr,
- char *buf)
+static const char *dell_smm_fan_label(struct dell_smm_data *data, int channel)
{
- static const char * const labels[] = {
- "Processor Fan",
- "Motherboard Fan",
- "Video Fan",
- "Power Supply Fan",
- "Chipset Fan",
- "Other Fan",
- };
- int index = to_sensor_dev_attr(devattr)->index;
bool dock = false;
- int type;
+ int type = i8k_get_fan_type(data, channel);
- type = i8k_get_fan_type(index);
if (type < 0)
- return type;
+ return ERR_PTR(type);
if (type & 0x10) {
dock = true;
type &= 0x0F;
}
- if (type >= ARRAY_SIZE(labels))
- type = (ARRAY_SIZE(labels) - 1);
-
- return sprintf(buf, "%s%s\n", (dock ? "Docking " : ""), labels[type]);
-}
-
-static ssize_t i8k_hwmon_fan_show(struct device *dev,
- struct device_attribute *devattr, char *buf)
-{
- int index = to_sensor_dev_attr(devattr)->index;
- int fan_speed;
+ if (type >= ARRAY_SIZE(fan_labels))
+ type = ARRAY_SIZE(fan_labels) - 1;
- fan_speed = i8k_get_fan_speed(index);
- if (fan_speed < 0)
- return fan_speed;
- return sprintf(buf, "%d\n", fan_speed);
+ return dock ? docking_labels[type] : fan_labels[type];
}
-static ssize_t i8k_hwmon_pwm_show(struct device *dev,
- struct device_attribute *devattr, char *buf)
+static int dell_smm_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+ int channel, const char **str)
{
- int index = to_sensor_dev_attr(devattr)->index;
- int status;
+ struct dell_smm_data *data = dev_get_drvdata(dev);
+
+ switch (type) {
+ case hwmon_temp:
+ switch (attr) {
+ case hwmon_temp_label:
+ *str = temp_labels[data->temp_type[channel]];
+ return 0;
+ default:
+ break;
+ }
+ break;
+ case hwmon_fan:
+ switch (attr) {
+ case hwmon_fan_label:
+ *str = dell_smm_fan_label(data, channel);
+ return PTR_ERR_OR_ZERO(*str);
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
- status = i8k_get_fan_status(index);
- if (status < 0)
- return -EIO;
- return sprintf(buf, "%d\n", clamp_val(status * i8k_pwm_mult, 0, 255));
+ return -EOPNOTSUPP;
}
-static ssize_t i8k_hwmon_pwm_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static int dell_smm_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel,
+ long val)
{
- int index = to_sensor_dev_attr(attr)->index;
- unsigned long val;
+ struct dell_smm_data *data = dev_get_drvdata(dev);
+ unsigned long pwm;
+ bool enable;
int err;
- err = kstrtoul(buf, 10, &val);
- if (err)
- return err;
- val = clamp_val(DIV_ROUND_CLOSEST(val, i8k_pwm_mult), 0, i8k_fan_max);
+ switch (type) {
+ case hwmon_pwm:
+ switch (attr) {
+ case hwmon_pwm_input:
+ pwm = clamp_val(DIV_ROUND_CLOSEST(val, data->i8k_pwm_mult), 0,
+ data->i8k_fan_max);
- mutex_lock(&i8k_mutex);
- err = i8k_set_fan(index, val);
- mutex_unlock(&i8k_mutex);
+ mutex_lock(&data->i8k_mutex);
+ err = i8k_set_fan(data, channel, pwm);
+ mutex_unlock(&data->i8k_mutex);
- return err < 0 ? -EIO : count;
-}
+ if (err < 0)
+ return err;
-static ssize_t i8k_hwmon_pwm_enable_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- int err;
- bool enable;
- unsigned long val;
+ return 0;
+ case hwmon_pwm_enable:
+ if (!val)
+ return -EINVAL;
- if (!auto_fan)
- return -ENODEV;
+ if (val == 1)
+ enable = false;
+ else
+ enable = true;
- err = kstrtoul(buf, 10, &val);
- if (err)
- return err;
+ mutex_lock(&data->i8k_mutex);
+ err = i8k_enable_fan_auto_mode(data, enable);
+ mutex_unlock(&data->i8k_mutex);
- if (val == 1)
- enable = false;
- else if (val == 2)
- enable = true;
- else
- return -EINVAL;
+ if (err < 0)
+ return err;
- mutex_lock(&i8k_mutex);
- err = i8k_enable_fan_auto_mode(enable);
- mutex_unlock(&i8k_mutex);
+ return 0;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
- return err ? err : count;
+ return -EOPNOTSUPP;
}
-static SENSOR_DEVICE_ATTR_RO(temp1_input, i8k_hwmon_temp, 0);
-static SENSOR_DEVICE_ATTR_RO(temp1_label, i8k_hwmon_temp_label, 0);
-static SENSOR_DEVICE_ATTR_RO(temp2_input, i8k_hwmon_temp, 1);
-static SENSOR_DEVICE_ATTR_RO(temp2_label, i8k_hwmon_temp_label, 1);
-static SENSOR_DEVICE_ATTR_RO(temp3_input, i8k_hwmon_temp, 2);
-static SENSOR_DEVICE_ATTR_RO(temp3_label, i8k_hwmon_temp_label, 2);
-static SENSOR_DEVICE_ATTR_RO(temp4_input, i8k_hwmon_temp, 3);
-static SENSOR_DEVICE_ATTR_RO(temp4_label, i8k_hwmon_temp_label, 3);
-static SENSOR_DEVICE_ATTR_RO(temp5_input, i8k_hwmon_temp, 4);
-static SENSOR_DEVICE_ATTR_RO(temp5_label, i8k_hwmon_temp_label, 4);
-static SENSOR_DEVICE_ATTR_RO(temp6_input, i8k_hwmon_temp, 5);
-static SENSOR_DEVICE_ATTR_RO(temp6_label, i8k_hwmon_temp_label, 5);
-static SENSOR_DEVICE_ATTR_RO(temp7_input, i8k_hwmon_temp, 6);
-static SENSOR_DEVICE_ATTR_RO(temp7_label, i8k_hwmon_temp_label, 6);
-static SENSOR_DEVICE_ATTR_RO(temp8_input, i8k_hwmon_temp, 7);
-static SENSOR_DEVICE_ATTR_RO(temp8_label, i8k_hwmon_temp_label, 7);
-static SENSOR_DEVICE_ATTR_RO(temp9_input, i8k_hwmon_temp, 8);
-static SENSOR_DEVICE_ATTR_RO(temp9_label, i8k_hwmon_temp_label, 8);
-static SENSOR_DEVICE_ATTR_RO(temp10_input, i8k_hwmon_temp, 9);
-static SENSOR_DEVICE_ATTR_RO(temp10_label, i8k_hwmon_temp_label, 9);
-static SENSOR_DEVICE_ATTR_RO(fan1_input, i8k_hwmon_fan, 0);
-static SENSOR_DEVICE_ATTR_RO(fan1_label, i8k_hwmon_fan_label, 0);
-static SENSOR_DEVICE_ATTR_RW(pwm1, i8k_hwmon_pwm, 0);
-static SENSOR_DEVICE_ATTR_WO(pwm1_enable, i8k_hwmon_pwm_enable, 0);
-static SENSOR_DEVICE_ATTR_RO(fan2_input, i8k_hwmon_fan, 1);
-static SENSOR_DEVICE_ATTR_RO(fan2_label, i8k_hwmon_fan_label, 1);
-static SENSOR_DEVICE_ATTR_RW(pwm2, i8k_hwmon_pwm, 1);
-static SENSOR_DEVICE_ATTR_RO(fan3_input, i8k_hwmon_fan, 2);
-static SENSOR_DEVICE_ATTR_RO(fan3_label, i8k_hwmon_fan_label, 2);
-static SENSOR_DEVICE_ATTR_RW(pwm3, i8k_hwmon_pwm, 2);
-
-static struct attribute *i8k_attrs[] = {
- &sensor_dev_attr_temp1_input.dev_attr.attr, /* 0 */
- &sensor_dev_attr_temp1_label.dev_attr.attr, /* 1 */
- &sensor_dev_attr_temp2_input.dev_attr.attr, /* 2 */
- &sensor_dev_attr_temp2_label.dev_attr.attr, /* 3 */
- &sensor_dev_attr_temp3_input.dev_attr.attr, /* 4 */
- &sensor_dev_attr_temp3_label.dev_attr.attr, /* 5 */
- &sensor_dev_attr_temp4_input.dev_attr.attr, /* 6 */
- &sensor_dev_attr_temp4_label.dev_attr.attr, /* 7 */
- &sensor_dev_attr_temp5_input.dev_attr.attr, /* 8 */
- &sensor_dev_attr_temp5_label.dev_attr.attr, /* 9 */
- &sensor_dev_attr_temp6_input.dev_attr.attr, /* 10 */
- &sensor_dev_attr_temp6_label.dev_attr.attr, /* 11 */
- &sensor_dev_attr_temp7_input.dev_attr.attr, /* 12 */
- &sensor_dev_attr_temp7_label.dev_attr.attr, /* 13 */
- &sensor_dev_attr_temp8_input.dev_attr.attr, /* 14 */
- &sensor_dev_attr_temp8_label.dev_attr.attr, /* 15 */
- &sensor_dev_attr_temp9_input.dev_attr.attr, /* 16 */
- &sensor_dev_attr_temp9_label.dev_attr.attr, /* 17 */
- &sensor_dev_attr_temp10_input.dev_attr.attr, /* 18 */
- &sensor_dev_attr_temp10_label.dev_attr.attr, /* 19 */
- &sensor_dev_attr_fan1_input.dev_attr.attr, /* 20 */
- &sensor_dev_attr_fan1_label.dev_attr.attr, /* 21 */
- &sensor_dev_attr_pwm1.dev_attr.attr, /* 22 */
- &sensor_dev_attr_pwm1_enable.dev_attr.attr, /* 23 */
- &sensor_dev_attr_fan2_input.dev_attr.attr, /* 24 */
- &sensor_dev_attr_fan2_label.dev_attr.attr, /* 25 */
- &sensor_dev_attr_pwm2.dev_attr.attr, /* 26 */
- &sensor_dev_attr_fan3_input.dev_attr.attr, /* 27 */
- &sensor_dev_attr_fan3_label.dev_attr.attr, /* 28 */
- &sensor_dev_attr_pwm3.dev_attr.attr, /* 29 */
+static const struct hwmon_ops dell_smm_ops = {
+ .is_visible = dell_smm_is_visible,
+ .read = dell_smm_read,
+ .read_string = dell_smm_read_string,
+ .write = dell_smm_write,
+};
+
+static const struct hwmon_channel_info *dell_smm_info[] = {
+ HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ),
+ HWMON_CHANNEL_INFO(temp,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL
+ ),
+ HWMON_CHANNEL_INFO(fan,
+ HWMON_F_INPUT | HWMON_F_LABEL,
+ HWMON_F_INPUT | HWMON_F_LABEL,
+ HWMON_F_INPUT | HWMON_F_LABEL
+ ),
+ HWMON_CHANNEL_INFO(pwm,
+ HWMON_PWM_INPUT | HWMON_PWM_ENABLE,
+ HWMON_PWM_INPUT,
+ HWMON_PWM_INPUT
+ ),
NULL
};
-static umode_t i8k_is_visible(struct kobject *kobj, struct attribute *attr,
- int index)
-{
- if (disallow_fan_support && index >= 20)
- return 0;
- if (disallow_fan_type_call &&
- (index == 21 || index == 25 || index == 28))
- return 0;
- if (index >= 0 && index <= 1 &&
- !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP1))
- return 0;
- if (index >= 2 && index <= 3 &&
- !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP2))
- return 0;
- if (index >= 4 && index <= 5 &&
- !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP3))
- return 0;
- if (index >= 6 && index <= 7 &&
- !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP4))
- return 0;
- if (index >= 8 && index <= 9 &&
- !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP5))
- return 0;
- if (index >= 10 && index <= 11 &&
- !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP6))
- return 0;
- if (index >= 12 && index <= 13 &&
- !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP7))
- return 0;
- if (index >= 14 && index <= 15 &&
- !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP8))
- return 0;
- if (index >= 16 && index <= 17 &&
- !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP9))
- return 0;
- if (index >= 18 && index <= 19 &&
- !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP10))
- return 0;
+static const struct hwmon_chip_info dell_smm_chip_info = {
+ .ops = &dell_smm_ops,
+ .info = dell_smm_info,
+};
- if (index >= 20 && index <= 23 &&
- !(i8k_hwmon_flags & I8K_HWMON_HAVE_FAN1))
- return 0;
- if (index >= 24 && index <= 26 &&
- !(i8k_hwmon_flags & I8K_HWMON_HAVE_FAN2))
- return 0;
- if (index >= 27 && index <= 29 &&
- !(i8k_hwmon_flags & I8K_HWMON_HAVE_FAN3))
- return 0;
+static int __init dell_smm_init_hwmon(struct device *dev)
+{
+ struct dell_smm_data *data = dev_get_drvdata(dev);
+ struct device *dell_smm_hwmon_dev;
+ int i, err;
- if (index == 23 && !auto_fan)
- return 0;
+ for (i = 0; i < DELL_SMM_NO_TEMP; i++) {
+ data->temp_type[i] = i8k_get_temp_type(i);
+ if (data->temp_type[i] < 0)
+ continue;
- return attr->mode;
-}
+ if (data->temp_type[i] >= ARRAY_SIZE(temp_labels))
+ data->temp_type[i] = ARRAY_SIZE(temp_labels) - 1;
+ }
-static const struct attribute_group i8k_group = {
- .attrs = i8k_attrs,
- .is_visible = i8k_is_visible,
-};
-__ATTRIBUTE_GROUPS(i8k);
+ for (i = 0; i < DELL_SMM_NO_FANS; i++) {
+ data->fan_type[i] = INT_MIN;
+ err = i8k_get_fan_status(data, i);
+ if (err < 0)
+ err = i8k_get_fan_type(data, i);
+ if (err >= 0)
+ data->fan[i] = true;
+ }
-static int __init i8k_init_hwmon(void)
-{
- int err;
+ dell_smm_hwmon_dev = devm_hwmon_device_register_with_info(dev, "dell_smm", data,
+ &dell_smm_chip_info, NULL);
- i8k_hwmon_flags = 0;
-
- /* CPU temperature attributes, if temperature type is OK */
- err = i8k_get_temp_type(0);
- if (err >= 0)
- i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP1;
- /* check for additional temperature sensors */
- err = i8k_get_temp_type(1);
- if (err >= 0)
- i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP2;
- err = i8k_get_temp_type(2);
- if (err >= 0)
- i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP3;
- err = i8k_get_temp_type(3);
- if (err >= 0)
- i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP4;
- err = i8k_get_temp_type(4);
- if (err >= 0)
- i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP5;
- err = i8k_get_temp_type(5);
- if (err >= 0)
- i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP6;
- err = i8k_get_temp_type(6);
- if (err >= 0)
- i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP7;
- err = i8k_get_temp_type(7);
- if (err >= 0)
- i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP8;
- err = i8k_get_temp_type(8);
- if (err >= 0)
- i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP9;
- err = i8k_get_temp_type(9);
- if (err >= 0)
- i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP10;
-
- /* First fan attributes, if fan status or type is OK */
- err = i8k_get_fan_status(0);
- if (err < 0)
- err = i8k_get_fan_type(0);
- if (err >= 0)
- i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN1;
-
- /* Second fan attributes, if fan status or type is OK */
- err = i8k_get_fan_status(1);
- if (err < 0)
- err = i8k_get_fan_type(1);
- if (err >= 0)
- i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN2;
-
- /* Third fan attributes, if fan status or type is OK */
- err = i8k_get_fan_status(2);
- if (err < 0)
- err = i8k_get_fan_type(2);
- if (err >= 0)
- i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN3;
-
- i8k_hwmon_dev = hwmon_device_register_with_groups(NULL, "dell_smm",
- NULL, i8k_groups);
- if (IS_ERR(i8k_hwmon_dev)) {
- err = PTR_ERR(i8k_hwmon_dev);
- i8k_hwmon_dev = NULL;
- pr_err("hwmon registration failed (%d)\n", err);
- return err;
- }
- return 0;
+ return PTR_ERR_OR_ZERO(dell_smm_hwmon_dev);
}
struct i8k_config_data {
@@ -979,7 +948,7 @@ enum i8k_configs {
DELL_XPS,
};
-static const struct i8k_config_data i8k_config_data[] = {
+static const struct i8k_config_data i8k_config_data[] __initconst = {
[DELL_LATITUDE_D520] = {
.fan_mult = 1,
.fan_max = I8K_FAN_TURBO,
@@ -1137,7 +1106,7 @@ static const struct dmi_system_id i8k_blacklist_fan_type_dmi_table[] __initconst
* support for affected blacklisted Dell machines stay disabled.
* See bug: https://bugzilla.kernel.org/show_bug.cgi?id=195751
*/
-static struct dmi_system_id i8k_blacklist_fan_support_dmi_table[] __initdata = {
+static const struct dmi_system_id i8k_blacklist_fan_support_dmi_table[] __initconst = {
{
.ident = "Dell Inspiron 7720",
.matches = {
@@ -1178,22 +1147,14 @@ enum i8k_fan_controls {
I8K_FAN_34A3_35A3,
};
-static const struct i8k_fan_control_data i8k_fan_control_data[] = {
+static const struct i8k_fan_control_data i8k_fan_control_data[] __initconst = {
[I8K_FAN_34A3_35A3] = {
.manual_fan = 0x34a3,
.auto_fan = 0x35a3,
},
};
-static struct dmi_system_id i8k_whitelist_fan_control[] __initdata = {
- {
- .ident = "Dell Precision 5530",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Precision 5530"),
- },
- .driver_data = (void *)&i8k_fan_control_data[I8K_FAN_34A3_35A3],
- },
+static const struct dmi_system_id i8k_whitelist_fan_control[] __initconst = {
{
.ident = "Dell Latitude 5480",
.matches = {
@@ -1218,57 +1179,56 @@ static struct dmi_system_id i8k_whitelist_fan_control[] __initdata = {
},
.driver_data = (void *)&i8k_fan_control_data[I8K_FAN_34A3_35A3],
},
+ {
+ .ident = "Dell Precision 5530",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Precision 5530"),
+ },
+ .driver_data = (void *)&i8k_fan_control_data[I8K_FAN_34A3_35A3],
+ },
+ {
+ .ident = "Dell Precision 7510",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Precision 7510"),
+ },
+ .driver_data = (void *)&i8k_fan_control_data[I8K_FAN_34A3_35A3],
+ },
{ }
};
-/*
- * Probe for the presence of a supported laptop.
- */
-static int __init i8k_probe(void)
+static int __init dell_smm_probe(struct platform_device *pdev)
{
+ struct dell_smm_data *data;
const struct dmi_system_id *id, *fan_control;
int fan, ret;
- /*
- * Get DMI information
- */
- if (!dmi_check_system(i8k_dmi_table)) {
- if (!ignore_dmi && !force)
- return -ENODEV;
+ data = devm_kzalloc(&pdev->dev, sizeof(struct dell_smm_data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
- pr_info("not running on a supported Dell system.\n");
- pr_info("vendor=%s, model=%s, version=%s\n",
- i8k_get_dmi_data(DMI_SYS_VENDOR),
- i8k_get_dmi_data(DMI_PRODUCT_NAME),
- i8k_get_dmi_data(DMI_BIOS_VERSION));
- }
+ mutex_init(&data->i8k_mutex);
+ data->i8k_fan_mult = I8K_FAN_MULT;
+ data->i8k_fan_max = I8K_FAN_HIGH;
+ platform_set_drvdata(pdev, data);
if (dmi_check_system(i8k_blacklist_fan_support_dmi_table)) {
- pr_warn("broken Dell BIOS detected, disallow fan support\n");
+ dev_warn(&pdev->dev, "broken Dell BIOS detected, disallow fan support\n");
if (!force)
- disallow_fan_support = true;
+ data->disallow_fan_support = true;
}
if (dmi_check_system(i8k_blacklist_fan_type_dmi_table)) {
- pr_warn("broken Dell BIOS detected, disallow fan type call\n");
+ dev_warn(&pdev->dev, "broken Dell BIOS detected, disallow fan type call\n");
if (!force)
- disallow_fan_type_call = true;
+ data->disallow_fan_type_call = true;
}
- strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION),
- sizeof(bios_version));
- strlcpy(bios_machineid, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
- sizeof(bios_machineid));
-
- /*
- * Get SMM Dell signature
- */
- if (i8k_get_dell_signature(I8K_SMM_GET_DELL_SIG1) &&
- i8k_get_dell_signature(I8K_SMM_GET_DELL_SIG2)) {
- pr_err("unable to get SMM Dell signature\n");
- if (!force)
- return -ENODEV;
- }
+ strscpy(data->bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION),
+ sizeof(data->bios_version));
+ strscpy(data->bios_machineid, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+ sizeof(data->bios_machineid));
/*
* Set fan multiplier and maximal fan speed from dmi config
@@ -1277,22 +1237,24 @@ static int __init i8k_probe(void)
id = dmi_first_match(i8k_dmi_table);
if (id && id->driver_data) {
const struct i8k_config_data *conf = id->driver_data;
+
if (!fan_mult && conf->fan_mult)
fan_mult = conf->fan_mult;
+
if (!fan_max && conf->fan_max)
fan_max = conf->fan_max;
}
- i8k_fan_max = fan_max ? : I8K_FAN_HIGH; /* Must not be 0 */
- i8k_pwm_mult = DIV_ROUND_UP(255, i8k_fan_max);
+ data->i8k_fan_max = fan_max ? : I8K_FAN_HIGH; /* Must not be 0 */
+ data->i8k_pwm_mult = DIV_ROUND_UP(255, data->i8k_fan_max);
fan_control = dmi_first_match(i8k_whitelist_fan_control);
if (fan_control && fan_control->driver_data) {
- const struct i8k_fan_control_data *data = fan_control->driver_data;
+ const struct i8k_fan_control_data *control = fan_control->driver_data;
- manual_fan = data->manual_fan;
- auto_fan = data->auto_fan;
- pr_info("enabling support for setting automatic/manual fan control\n");
+ data->manual_fan = control->manual_fan;
+ data->auto_fan = control->auto_fan;
+ dev_info(&pdev->dev, "enabling support for setting automatic/manual fan control\n");
}
if (!fan_mult) {
@@ -1300,42 +1262,76 @@ static int __init i8k_probe(void)
* Autodetect fan multiplier based on nominal rpm
* If fan reports rpm value too high then set multiplier to 1
*/
- for (fan = 0; fan < 2; ++fan) {
- ret = i8k_get_fan_nominal_speed(fan, i8k_fan_max);
+ for (fan = 0; fan < DELL_SMM_NO_FANS; ++fan) {
+ ret = i8k_get_fan_nominal_speed(data, fan, data->i8k_fan_max);
if (ret < 0)
continue;
+
if (ret > I8K_FAN_MAX_RPM)
- i8k_fan_mult = 1;
+ data->i8k_fan_mult = 1;
break;
}
} else {
/* Fan multiplier was specified in module param or in dmi */
- i8k_fan_mult = fan_mult;
+ data->i8k_fan_mult = fan_mult;
}
+ ret = dell_smm_init_hwmon(&pdev->dev);
+ if (ret)
+ return ret;
+
+ i8k_init_procfs(&pdev->dev);
+
return 0;
}
+static struct platform_driver dell_smm_driver = {
+ .driver = {
+ .name = KBUILD_MODNAME,
+ },
+};
+
+static struct platform_device *dell_smm_device;
+
+/*
+ * Probe for the presence of a supported laptop.
+ */
static int __init i8k_init(void)
{
- int err;
+ /*
+ * Get DMI information
+ */
+ if (!dmi_check_system(i8k_dmi_table)) {
+ if (!ignore_dmi && !force)
+ return -ENODEV;
- /* Are we running on an supported laptop? */
- if (i8k_probe())
- return -ENODEV;
+ pr_info("not running on a supported Dell system.\n");
+ pr_info("vendor=%s, model=%s, version=%s\n",
+ i8k_get_dmi_data(DMI_SYS_VENDOR),
+ i8k_get_dmi_data(DMI_PRODUCT_NAME),
+ i8k_get_dmi_data(DMI_BIOS_VERSION));
+ }
- err = i8k_init_hwmon();
- if (err)
- return err;
+ /*
+ * Get SMM Dell signature
+ */
+ if (i8k_get_dell_signature(I8K_SMM_GET_DELL_SIG1) &&
+ i8k_get_dell_signature(I8K_SMM_GET_DELL_SIG2)) {
+ pr_err("unable to get SMM Dell signature\n");
+ if (!force)
+ return -ENODEV;
+ }
- i8k_init_procfs();
- return 0;
+ dell_smm_device = platform_create_bundle(&dell_smm_driver, dell_smm_probe, NULL, 0, NULL,
+ 0);
+
+ return PTR_ERR_OR_ZERO(dell_smm_device);
}
static void __exit i8k_exit(void)
{
- hwmon_device_unregister(i8k_hwmon_dev);
- i8k_exit_procfs();
+ platform_device_unregister(dell_smm_device);
+ platform_driver_unregister(&dell_smm_driver);
}
module_init(i8k_init);
diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index 29f5fed28c2a..521534d5c1e5 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -166,7 +166,7 @@ static int read_registers(struct fam15h_power_data *data)
memset(data->cu_on, 0, sizeof(int) * MAX_CUS);
- get_online_cpus();
+ cpus_read_lock();
/*
* Choose the first online core of each compute unit, and then
@@ -190,7 +190,7 @@ static int read_registers(struct fam15h_power_data *data)
on_each_cpu_mask(mask, do_read_registers_on_cu, data, true);
- put_online_cpus();
+ cpus_read_unlock();
free_cpumask_var(mask);
return 0;
diff --git a/drivers/hwmon/intel-m10-bmc-hwmon.c b/drivers/hwmon/intel-m10-bmc-hwmon.c
index bd7ed2ed3a1e..7a08e4c44a4b 100644
--- a/drivers/hwmon/intel-m10-bmc-hwmon.c
+++ b/drivers/hwmon/intel-m10-bmc-hwmon.c
@@ -228,6 +228,118 @@ static const struct m10bmc_hwmon_board_data d5005bmc_hwmon_bdata = {
.hinfo = d5005bmc_hinfo,
};
+static const struct m10bmc_sdata n5010bmc_temp_tbl[] = {
+ { 0x100, 0x0, 0x104, 0x0, 0x0, 1000, "Board Local Temperature" },
+ { 0x108, 0x0, 0x10c, 0x0, 0x0, 1000, "FPGA 1 Temperature" },
+ { 0x110, 0x0, 0x114, 0x0, 0x0, 1000, "FPGA 2 Temperature" },
+ { 0x118, 0x0, 0x0, 0x0, 0x0, 1000, "Card Top Temperature" },
+ { 0x11c, 0x0, 0x0, 0x0, 0x0, 1000, "Card Bottom Temperature" },
+ { 0x128, 0x0, 0x0, 0x0, 0x0, 1000, "FPGA 1.2V Temperature" },
+ { 0x134, 0x0, 0x0, 0x0, 0x0, 1000, "FPGA 5V Temperature" },
+ { 0x140, 0x0, 0x0, 0x0, 0x0, 1000, "FPGA 0.9V Temperature" },
+ { 0x14c, 0x0, 0x0, 0x0, 0x0, 1000, "FPGA 0.85V Temperature" },
+ { 0x158, 0x0, 0x0, 0x0, 0x0, 1000, "AUX 12V Temperature" },
+ { 0x164, 0x0, 0x0, 0x0, 0x0, 1000, "Backplane 12V Temperature" },
+ { 0x1a8, 0x0, 0x0, 0x0, 0x0, 1000, "QSFP28-1 Temperature" },
+ { 0x1ac, 0x0, 0x0, 0x0, 0x0, 1000, "QSFP28-2 Temperature" },
+ { 0x1b0, 0x0, 0x0, 0x0, 0x0, 1000, "QSFP28-3 Temperature" },
+ { 0x1b4, 0x0, 0x0, 0x0, 0x0, 1000, "QSFP28-4 Temperature" },
+ { 0x1b8, 0x0, 0x0, 0x0, 0x0, 1000, "CVL1 Internal Temperature" },
+ { 0x1bc, 0x0, 0x0, 0x0, 0x0, 1000, "CVL2 Internal Temperature" },
+};
+
+static const struct m10bmc_sdata n5010bmc_in_tbl[] = {
+ { 0x120, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 1.2V Voltage" },
+ { 0x12c, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 5V Voltage" },
+ { 0x138, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 0.9V Voltage" },
+ { 0x144, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 0.85V Voltage" },
+ { 0x150, 0x0, 0x0, 0x0, 0x0, 1, "AUX 12V Voltage" },
+ { 0x15c, 0x0, 0x0, 0x0, 0x0, 1, "Backplane 12V Voltage" },
+ { 0x16c, 0x0, 0x0, 0x0, 0x0, 1, "DDR4 1.2V Voltage" },
+ { 0x17c, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 1.8V Voltage" },
+ { 0x184, 0x0, 0x0, 0x0, 0x0, 1, "QDR 1.3V Voltage" },
+ { 0x18c, 0x0, 0x0, 0x0, 0x0, 1, "CVL1 0.8V Voltage" },
+ { 0x194, 0x0, 0x0, 0x0, 0x0, 1, "CVL1 1.05V Voltage" },
+ { 0x19c, 0x0, 0x0, 0x0, 0x0, 1, "CVL2 1.05V Voltage" },
+ { 0x1a4, 0x0, 0x0, 0x0, 0x0, 1, "CVL2 0.8V Voltage" },
+};
+
+static const struct m10bmc_sdata n5010bmc_curr_tbl[] = {
+ { 0x124, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 1.2V Current" },
+ { 0x130, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 5V Current" },
+ { 0x13c, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 0.9V Current" },
+ { 0x148, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 0.85V Current" },
+ { 0x154, 0x0, 0x0, 0x0, 0x0, 1, "AUX 12V Current" },
+ { 0x160, 0x0, 0x0, 0x0, 0x0, 1, "Backplane 12V Current" },
+ { 0x168, 0x0, 0x0, 0x0, 0x0, 1, "DDR4 1.2V Current" },
+ { 0x178, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 1.8V Current" },
+ { 0x180, 0x0, 0x0, 0x0, 0x0, 1, "QDR 1.3V Current" },
+ { 0x188, 0x0, 0x0, 0x0, 0x0, 1, "CVL1 0.8V Current" },
+ { 0x190, 0x0, 0x0, 0x0, 0x0, 1, "CVL1 1.05V Current" },
+ { 0x198, 0x0, 0x0, 0x0, 0x0, 1, "CVL2 1.05V Current" },
+ { 0x1a0, 0x0, 0x0, 0x0, 0x0, 1, "CVL2 0.8V Current" },
+};
+
+static const struct hwmon_channel_info *n5010bmc_hinfo[] = {
+ HWMON_CHANNEL_INFO(temp,
+ HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL),
+ HWMON_CHANNEL_INFO(in,
+ HWMON_I_INPUT | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_LABEL),
+ HWMON_CHANNEL_INFO(curr,
+ HWMON_C_INPUT | HWMON_C_LABEL,
+ HWMON_C_INPUT | HWMON_C_LABEL,
+ HWMON_C_INPUT | HWMON_C_LABEL,
+ HWMON_C_INPUT | HWMON_C_LABEL,
+ HWMON_C_INPUT | HWMON_C_LABEL,
+ HWMON_C_INPUT | HWMON_C_LABEL,
+ HWMON_C_INPUT | HWMON_C_LABEL,
+ HWMON_C_INPUT | HWMON_C_LABEL,
+ HWMON_C_INPUT | HWMON_C_LABEL,
+ HWMON_C_INPUT | HWMON_C_LABEL,
+ HWMON_C_INPUT | HWMON_C_LABEL,
+ HWMON_C_INPUT | HWMON_C_LABEL,
+ HWMON_C_INPUT | HWMON_C_LABEL),
+ NULL
+};
+
+static const struct m10bmc_hwmon_board_data n5010bmc_hwmon_bdata = {
+ .tables = {
+ [hwmon_temp] = n5010bmc_temp_tbl,
+ [hwmon_in] = n5010bmc_in_tbl,
+ [hwmon_curr] = n5010bmc_curr_tbl,
+ },
+
+ .hinfo = n5010bmc_hinfo,
+};
+
static umode_t
m10bmc_hwmon_is_visible(const void *data, enum hwmon_sensor_types type,
u32 attr, int channel)
@@ -438,6 +550,10 @@ static const struct platform_device_id intel_m10bmc_hwmon_ids[] = {
.name = "d5005bmc-hwmon",
.driver_data = (unsigned long)&d5005bmc_hwmon_bdata,
},
+ {
+ .name = "n5010bmc-hwmon",
+ .driver_data = (unsigned long)&n5010bmc_hwmon_bdata,
+ },
{ }
};
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index 5ff3669c2b60..38bc35ac8135 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -65,10 +65,11 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
#define F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET 0xd8200c64
#define F15H_M60H_REPORTED_TEMP_CTRL_OFFSET 0xd8200ca4
-/* Common for Zen CPU families (Family 17h and 18h) */
-#define ZEN_REPORTED_TEMP_CTRL_OFFSET 0x00059800
+/* Common for Zen CPU families (Family 17h and 18h and 19h) */
+#define ZEN_REPORTED_TEMP_CTRL_BASE 0x00059800
-#define ZEN_CCD_TEMP(x) (0x00059954 + ((x) * 4))
+#define ZEN_CCD_TEMP(offset, x) (ZEN_REPORTED_TEMP_CTRL_BASE + \
+ (offset) + ((x) * 4))
#define ZEN_CCD_TEMP_VALID BIT(11)
#define ZEN_CCD_TEMP_MASK GENMASK(10, 0)
@@ -103,6 +104,7 @@ struct k10temp_data {
u32 temp_adjust_mask;
u32 show_temp;
bool is_zen;
+ u32 ccd_offset;
};
#define TCTL_BIT 0
@@ -163,7 +165,7 @@ static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval)
static void read_tempreg_nb_zen(struct pci_dev *pdev, u32 *regval)
{
amd_smn_read(amd_pci_dev_to_node_id(pdev),
- ZEN_REPORTED_TEMP_CTRL_OFFSET, regval);
+ ZEN_REPORTED_TEMP_CTRL_BASE, regval);
}
static long get_raw_temp(struct k10temp_data *data)
@@ -226,7 +228,8 @@ static int k10temp_read_temp(struct device *dev, u32 attr, int channel,
break;
case 2 ... 9: /* Tccd{1-8} */
amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
- ZEN_CCD_TEMP(channel - 2), &regval);
+ ZEN_CCD_TEMP(data->ccd_offset, channel - 2),
+ &regval);
*val = (regval & ZEN_CCD_TEMP_MASK) * 125 - 49000;
break;
default:
@@ -387,7 +390,7 @@ static void k10temp_get_ccd_support(struct pci_dev *pdev,
for (i = 0; i < limit; i++) {
amd_smn_read(amd_pci_dev_to_node_id(pdev),
- ZEN_CCD_TEMP(i), &regval);
+ ZEN_CCD_TEMP(data->ccd_offset, i), &regval);
if (regval & ZEN_CCD_TEMP_VALID)
data->show_temp |= BIT(TCCD_BIT(i));
}
@@ -426,7 +429,6 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
} else if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) {
data->temp_adjust_mask = ZEN_CUR_TEMP_RANGE_SEL_MASK;
data->read_tempreg = read_tempreg_nb_zen;
- data->show_temp |= BIT(TDIE_BIT); /* show Tdie */
data->is_zen = true;
switch (boot_cpu_data.x86_model) {
@@ -434,22 +436,31 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
case 0x8: /* Zen+ */
case 0x11: /* Zen APU */
case 0x18: /* Zen+ APU */
+ data->ccd_offset = 0x154;
k10temp_get_ccd_support(pdev, data, 4);
break;
case 0x31: /* Zen2 Threadripper */
+ case 0x60: /* Renoir */
+ case 0x68: /* Lucienne */
case 0x71: /* Zen2 */
+ data->ccd_offset = 0x154;
k10temp_get_ccd_support(pdev, data, 8);
break;
}
} else if (boot_cpu_data.x86 == 0x19) {
data->temp_adjust_mask = ZEN_CUR_TEMP_RANGE_SEL_MASK;
data->read_tempreg = read_tempreg_nb_zen;
- data->show_temp |= BIT(TDIE_BIT);
data->is_zen = true;
switch (boot_cpu_data.x86_model) {
case 0x0 ... 0x1: /* Zen3 SP3/TR */
case 0x21: /* Zen3 Ryzen Desktop */
+ case 0x50 ... 0x5f: /* Green Sardine */
+ data->ccd_offset = 0x154;
+ k10temp_get_ccd_support(pdev, data, 8);
+ break;
+ case 0x40 ... 0x4f: /* Yellow Carp */
+ data->ccd_offset = 0x300;
k10temp_get_ccd_support(pdev, data, 8);
break;
}
@@ -463,6 +474,7 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (boot_cpu_data.x86 == entry->model &&
strstr(boot_cpu_data.x86_model_id, entry->id)) {
+ data->show_temp |= BIT(TDIE_BIT); /* show Tdie */
data->temp_offset = entry->offset;
break;
}
@@ -491,6 +503,8 @@ static const struct pci_device_id k10temp_id_table[] = {
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
+ { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F3) },
+ { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) },
{ PCI_VDEVICE(HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) },
{}
};
diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c
index 18fd6f12ca16..cf26c44f2b88 100644
--- a/drivers/hwmon/ntc_thermistor.c
+++ b/drivers/hwmon/ntc_thermistor.c
@@ -13,6 +13,7 @@
#include <linux/err.h>
#include <linux/of.h>
#include <linux/of_device.h>
+#include <linux/fixp-arith.h>
#include <linux/platform_data/ntc_thermistor.h>
@@ -549,15 +550,16 @@ static int get_temp_mc(struct ntc_data *data, unsigned int ohm)
int temp;
lookup_comp(data, ohm, &low, &high);
- if (low == high) {
- /* Unable to use linear approximation */
- temp = data->comp[low].temp_c * 1000;
- } else {
- temp = data->comp[low].temp_c * 1000 +
- ((data->comp[high].temp_c - data->comp[low].temp_c) *
- 1000 * ((int)ohm - (int)data->comp[low].ohm)) /
- ((int)data->comp[high].ohm - (int)data->comp[low].ohm);
- }
+ /*
+ * First multiplying the table temperatures with 1000 to get to
+ * millicentigrades (which is what we want) and then interpolating
+ * will give the best precision.
+ */
+ temp = fixp_linear_interpolate(data->comp[low].ohm,
+ data->comp[low].temp_c * 1000,
+ data->comp[high].ohm,
+ data->comp[high].temp_c * 1000,
+ ohm);
return temp;
}
diff --git a/drivers/hwmon/pmbus/bpa-rs600.c b/drivers/hwmon/pmbus/bpa-rs600.c
index 2be69fedfa36..f2d4e378a775 100644
--- a/drivers/hwmon/pmbus/bpa-rs600.c
+++ b/drivers/hwmon/pmbus/bpa-rs600.c
@@ -12,14 +12,7 @@
#include <linux/pmbus.h>
#include "pmbus.h"
-#define BPARS600_MFR_VIN_MIN 0xa0
-#define BPARS600_MFR_VIN_MAX 0xa1
-#define BPARS600_MFR_IIN_MAX 0xa2
-#define BPARS600_MFR_PIN_MAX 0xa3
-#define BPARS600_MFR_VOUT_MIN 0xa4
-#define BPARS600_MFR_VOUT_MAX 0xa5
-#define BPARS600_MFR_IOUT_MAX 0xa6
-#define BPARS600_MFR_POUT_MAX 0xa7
+enum chips { bpa_rs600, bpd_rs600 };
static int bpa_rs600_read_byte_data(struct i2c_client *client, int page, int reg)
{
@@ -72,6 +65,26 @@ static int bpa_rs600_read_vin(struct i2c_client *client)
return ret;
}
+/*
+ * Firmware V5.70 incorrectly reports 1640W for MFR_PIN_MAX.
+ * Deal with this by returning a sensible value.
+ */
+static int bpa_rs600_read_pin_max(struct i2c_client *client)
+{
+ int ret;
+
+ ret = pmbus_read_word_data(client, 0, 0xff, PMBUS_MFR_PIN_MAX);
+ if (ret < 0)
+ return ret;
+
+ /* Detect invalid 1640W (linear encoding) */
+ if (ret == 0x0b34)
+ /* Report 700W (linear encoding) */
+ return 0x095e;
+
+ return ret;
+}
+
static int bpa_rs600_read_word_data(struct i2c_client *client, int page, int phase, int reg)
{
int ret;
@@ -81,29 +94,13 @@ static int bpa_rs600_read_word_data(struct i2c_client *client, int page, int pha
switch (reg) {
case PMBUS_VIN_UV_WARN_LIMIT:
- ret = pmbus_read_word_data(client, 0, 0xff, BPARS600_MFR_VIN_MIN);
- break;
case PMBUS_VIN_OV_WARN_LIMIT:
- ret = pmbus_read_word_data(client, 0, 0xff, BPARS600_MFR_VIN_MAX);
- break;
case PMBUS_VOUT_UV_WARN_LIMIT:
- ret = pmbus_read_word_data(client, 0, 0xff, BPARS600_MFR_VOUT_MIN);
- break;
case PMBUS_VOUT_OV_WARN_LIMIT:
- ret = pmbus_read_word_data(client, 0, 0xff, BPARS600_MFR_VOUT_MAX);
- break;
case PMBUS_IIN_OC_WARN_LIMIT:
- ret = pmbus_read_word_data(client, 0, 0xff, BPARS600_MFR_IIN_MAX);
- break;
case PMBUS_IOUT_OC_WARN_LIMIT:
- ret = pmbus_read_word_data(client, 0, 0xff, BPARS600_MFR_IOUT_MAX);
- break;
case PMBUS_PIN_OP_WARN_LIMIT:
- ret = pmbus_read_word_data(client, 0, 0xff, BPARS600_MFR_PIN_MAX);
- break;
case PMBUS_POUT_OP_WARN_LIMIT:
- ret = pmbus_read_word_data(client, 0, 0xff, BPARS600_MFR_POUT_MAX);
- break;
case PMBUS_VIN_UV_FAULT_LIMIT:
case PMBUS_VIN_OV_FAULT_LIMIT:
case PMBUS_VOUT_UV_FAULT_LIMIT:
@@ -114,6 +111,9 @@ static int bpa_rs600_read_word_data(struct i2c_client *client, int page, int pha
case PMBUS_READ_VIN:
ret = bpa_rs600_read_vin(client);
break;
+ case PMBUS_MFR_PIN_MAX:
+ ret = bpa_rs600_read_pin_max(client);
+ break;
default:
if (reg >= PMBUS_VIRT_BASE)
ret = -ENXIO;
@@ -146,11 +146,19 @@ static struct pmbus_driver_info bpa_rs600_info = {
.read_word_data = bpa_rs600_read_word_data,
};
+static const struct i2c_device_id bpa_rs600_id[] = {
+ { "bpa-rs600", bpa_rs600 },
+ { "bpd-rs600", bpd_rs600 },
+ {},
+};
+MODULE_DEVICE_TABLE(i2c, bpa_rs600_id);
+
static int bpa_rs600_probe(struct i2c_client *client)
{
struct device *dev = &client->dev;
u8 buf[I2C_SMBUS_BLOCK_MAX + 1];
int ret;
+ const struct i2c_device_id *mid;
if (!i2c_check_functionality(client->adapter,
I2C_FUNC_SMBUS_READ_BYTE_DATA
@@ -164,7 +172,11 @@ static int bpa_rs600_probe(struct i2c_client *client)
return ret;
}
- if (strncmp(buf, "BPA-RS600", 8)) {
+ for (mid = bpa_rs600_id; mid->name[0]; mid++) {
+ if (!strncasecmp(buf, mid->name, strlen(mid->name)))
+ break;
+ }
+ if (!mid->name[0]) {
buf[ret] = '\0';
dev_err(dev, "Unsupported Manufacturer Model '%s'\n", buf);
return -ENODEV;
@@ -173,12 +185,6 @@ static int bpa_rs600_probe(struct i2c_client *client)
return pmbus_do_probe(client, &bpa_rs600_info);
}
-static const struct i2c_device_id bpa_rs600_id[] = {
- { "bpars600", 0 },
- {},
-};
-MODULE_DEVICE_TABLE(i2c, bpa_rs600_id);
-
static const struct of_device_id __maybe_unused bpa_rs600_of_match[] = {
{ .compatible = "blutek,bpa-rs600" },
{},
diff --git a/drivers/hwmon/pmbus/ibm-cffps.c b/drivers/hwmon/pmbus/ibm-cffps.c
index 5668d8305b78..df712ce4b164 100644
--- a/drivers/hwmon/pmbus/ibm-cffps.c
+++ b/drivers/hwmon/pmbus/ibm-cffps.c
@@ -50,9 +50,9 @@
#define CFFPS_MFR_VAUX_FAULT BIT(6)
#define CFFPS_MFR_CURRENT_SHARE_WARNING BIT(7)
-#define CFFPS_LED_BLINK BIT(0)
-#define CFFPS_LED_ON BIT(1)
-#define CFFPS_LED_OFF BIT(2)
+#define CFFPS_LED_BLINK (BIT(0) | BIT(6))
+#define CFFPS_LED_ON (BIT(1) | BIT(6))
+#define CFFPS_LED_OFF (BIT(2) | BIT(6))
#define CFFPS_BLINK_RATE_MS 250
enum {
diff --git a/drivers/hwmon/sbrmi.c b/drivers/hwmon/sbrmi.c
new file mode 100644
index 000000000000..7bf0c3fba75f
--- /dev/null
+++ b/drivers/hwmon/sbrmi.c
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * sbrmi.c - hwmon driver for a SB-RMI mailbox
+ * compliant AMD SoC device.
+ *
+ * Copyright (C) 2020-2021 Advanced Micro Devices, Inc.
+ */
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/hwmon.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+
+/* Do not allow setting negative power limit */
+#define SBRMI_PWR_MIN 0
+/* Mask for Status Register bit[1] */
+#define SW_ALERT_MASK 0x2
+
+/* Software Interrupt for triggering */
+#define START_CMD 0x80
+#define TRIGGER_MAILBOX 0x01
+
+/*
+ * SB-RMI supports soft mailbox service request to MP1 (power management
+ * firmware) through SBRMI inbound/outbound message registers.
+ * SB-RMI message IDs
+ */
+enum sbrmi_msg_id {
+ SBRMI_READ_PKG_PWR_CONSUMPTION = 0x1,
+ SBRMI_WRITE_PKG_PWR_LIMIT,
+ SBRMI_READ_PKG_PWR_LIMIT,
+ SBRMI_READ_PKG_MAX_PWR_LIMIT,
+};
+
+/* SB-RMI registers */
+enum sbrmi_reg {
+ SBRMI_CTRL = 0x01,
+ SBRMI_STATUS,
+ SBRMI_OUTBNDMSG0 = 0x30,
+ SBRMI_OUTBNDMSG1,
+ SBRMI_OUTBNDMSG2,
+ SBRMI_OUTBNDMSG3,
+ SBRMI_OUTBNDMSG4,
+ SBRMI_OUTBNDMSG5,
+ SBRMI_OUTBNDMSG6,
+ SBRMI_OUTBNDMSG7,
+ SBRMI_INBNDMSG0,
+ SBRMI_INBNDMSG1,
+ SBRMI_INBNDMSG2,
+ SBRMI_INBNDMSG3,
+ SBRMI_INBNDMSG4,
+ SBRMI_INBNDMSG5,
+ SBRMI_INBNDMSG6,
+ SBRMI_INBNDMSG7,
+ SBRMI_SW_INTERRUPT,
+};
+
+/* Each client has this additional data */
+struct sbrmi_data {
+ struct i2c_client *client;
+ struct mutex lock;
+ u32 pwr_limit_max;
+};
+
+struct sbrmi_mailbox_msg {
+ u8 cmd;
+ bool read;
+ u32 data_in;
+ u32 data_out;
+};
+
+static int sbrmi_enable_alert(struct i2c_client *client)
+{
+ int ctrl;
+
+ /*
+ * Enable the SB-RMI Software alert status
+ * by writing 0 to bit 4 of Control register(0x1)
+ */
+ ctrl = i2c_smbus_read_byte_data(client, SBRMI_CTRL);
+ if (ctrl < 0)
+ return ctrl;
+
+ if (ctrl & 0x10) {
+ ctrl &= ~0x10;
+ return i2c_smbus_write_byte_data(client,
+ SBRMI_CTRL, ctrl);
+ }
+
+ return 0;
+}
+
+static int rmi_mailbox_xfer(struct sbrmi_data *data,
+ struct sbrmi_mailbox_msg *msg)
+{
+ int i, ret, retry = 10;
+ int sw_status;
+ u8 byte;
+
+ mutex_lock(&data->lock);
+
+ /* Indicate firmware a command is to be serviced */
+ ret = i2c_smbus_write_byte_data(data->client,
+ SBRMI_INBNDMSG7, START_CMD);
+ if (ret < 0)
+ goto exit_unlock;
+
+ /* Write the command to SBRMI::InBndMsg_inst0 */
+ ret = i2c_smbus_write_byte_data(data->client,
+ SBRMI_INBNDMSG0, msg->cmd);
+ if (ret < 0)
+ goto exit_unlock;
+
+ /*
+ * For both read and write the initiator (BMC) writes
+ * Command Data In[31:0] to SBRMI::InBndMsg_inst[4:1]
+ * SBRMI_x3C(MSB):SBRMI_x39(LSB)
+ */
+ for (i = 0; i < 4; i++) {
+ byte = (msg->data_in >> i * 8) & 0xff;
+ ret = i2c_smbus_write_byte_data(data->client,
+ SBRMI_INBNDMSG1 + i, byte);
+ if (ret < 0)
+ goto exit_unlock;
+ }
+
+ /*
+ * Write 0x01 to SBRMI::SoftwareInterrupt to notify firmware to
+ * perform the requested read or write command
+ */
+ ret = i2c_smbus_write_byte_data(data->client,
+ SBRMI_SW_INTERRUPT, TRIGGER_MAILBOX);
+ if (ret < 0)
+ goto exit_unlock;
+
+ /*
+ * Firmware will write SBRMI::Status[SwAlertSts]=1 to generate
+ * an ALERT (if enabled) to initiator (BMC) to indicate completion
+ * of the requested command
+ */
+ do {
+ sw_status = i2c_smbus_read_byte_data(data->client,
+ SBRMI_STATUS);
+ if (sw_status < 0) {
+ ret = sw_status;
+ goto exit_unlock;
+ }
+ if (sw_status & SW_ALERT_MASK)
+ break;
+ usleep_range(50, 100);
+ } while (retry--);
+
+ if (retry < 0) {
+ dev_err(&data->client->dev,
+ "Firmware fail to indicate command completion\n");
+ ret = -EIO;
+ goto exit_unlock;
+ }
+
+ /*
+ * For a read operation, the initiator (BMC) reads the firmware
+ * response Command Data Out[31:0] from SBRMI::OutBndMsg_inst[4:1]
+ * {SBRMI_x34(MSB):SBRMI_x31(LSB)}.
+ */
+ if (msg->read) {
+ for (i = 0; i < 4; i++) {
+ ret = i2c_smbus_read_byte_data(data->client,
+ SBRMI_OUTBNDMSG1 + i);
+ if (ret < 0)
+ goto exit_unlock;
+ msg->data_out |= ret << i * 8;
+ }
+ }
+
+ /*
+ * BMC must write 1'b1 to SBRMI::Status[SwAlertSts] to clear the
+ * ALERT to initiator
+ */
+ ret = i2c_smbus_write_byte_data(data->client, SBRMI_STATUS,
+ sw_status | SW_ALERT_MASK);
+
+exit_unlock:
+ mutex_unlock(&data->lock);
+ return ret;
+}
+
+static int sbrmi_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ struct sbrmi_data *data = dev_get_drvdata(dev);
+ struct sbrmi_mailbox_msg msg = { 0 };
+ int ret;
+
+ if (type != hwmon_power)
+ return -EINVAL;
+
+ msg.read = true;
+ switch (attr) {
+ case hwmon_power_input:
+ msg.cmd = SBRMI_READ_PKG_PWR_CONSUMPTION;
+ ret = rmi_mailbox_xfer(data, &msg);
+ break;
+ case hwmon_power_cap:
+ msg.cmd = SBRMI_READ_PKG_PWR_LIMIT;
+ ret = rmi_mailbox_xfer(data, &msg);
+ break;
+ case hwmon_power_cap_max:
+ msg.data_out = data->pwr_limit_max;
+ ret = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (ret < 0)
+ return ret;
+ /* hwmon power attributes are in microWatt */
+ *val = (long)msg.data_out * 1000;
+ return ret;
+}
+
+static int sbrmi_write(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long val)
+{
+ struct sbrmi_data *data = dev_get_drvdata(dev);
+ struct sbrmi_mailbox_msg msg = { 0 };
+
+ if (type != hwmon_power && attr != hwmon_power_cap)
+ return -EINVAL;
+ /*
+ * hwmon power attributes are in microWatt
+ * mailbox read/write is in mWatt
+ */
+ val /= 1000;
+
+ val = clamp_val(val, SBRMI_PWR_MIN, data->pwr_limit_max);
+
+ msg.cmd = SBRMI_WRITE_PKG_PWR_LIMIT;
+ msg.data_in = val;
+ msg.read = false;
+
+ return rmi_mailbox_xfer(data, &msg);
+}
+
+static umode_t sbrmi_is_visible(const void *data,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ switch (type) {
+ case hwmon_power:
+ switch (attr) {
+ case hwmon_power_input:
+ case hwmon_power_cap_max:
+ return 0444;
+ case hwmon_power_cap:
+ return 0644;
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static const struct hwmon_channel_info *sbrmi_info[] = {
+ HWMON_CHANNEL_INFO(power,
+ HWMON_P_INPUT | HWMON_P_CAP | HWMON_P_CAP_MAX),
+ NULL
+};
+
+static const struct hwmon_ops sbrmi_hwmon_ops = {
+ .is_visible = sbrmi_is_visible,
+ .read = sbrmi_read,
+ .write = sbrmi_write,
+};
+
+static const struct hwmon_chip_info sbrmi_chip_info = {
+ .ops = &sbrmi_hwmon_ops,
+ .info = sbrmi_info,
+};
+
+static int sbrmi_get_max_pwr_limit(struct sbrmi_data *data)
+{
+ struct sbrmi_mailbox_msg msg = { 0 };
+ int ret;
+
+ msg.cmd = SBRMI_READ_PKG_MAX_PWR_LIMIT;
+ msg.read = true;
+ ret = rmi_mailbox_xfer(data, &msg);
+ if (ret < 0)
+ return ret;
+ data->pwr_limit_max = msg.data_out;
+
+ return ret;
+}
+
+static int sbrmi_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct device *dev = &client->dev;
+ struct device *hwmon_dev;
+ struct sbrmi_data *data;
+ int ret;
+
+ data = devm_kzalloc(dev, sizeof(struct sbrmi_data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->client = client;
+ mutex_init(&data->lock);
+
+ /* Enable alert for SB-RMI sequence */
+ ret = sbrmi_enable_alert(client);
+ if (ret < 0)
+ return ret;
+
+ /* Cache maximum power limit */
+ ret = sbrmi_get_max_pwr_limit(data);
+ if (ret < 0)
+ return ret;
+
+ hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name, data,
+ &sbrmi_chip_info, NULL);
+
+ return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+static const struct i2c_device_id sbrmi_id[] = {
+ {"sbrmi", 0},
+ {}
+};
+MODULE_DEVICE_TABLE(i2c, sbrmi_id);
+
+static const struct of_device_id __maybe_unused sbrmi_of_match[] = {
+ {
+ .compatible = "amd,sbrmi",
+ },
+ { },
+};
+MODULE_DEVICE_TABLE(of, sbrmi_of_match);
+
+static struct i2c_driver sbrmi_driver = {
+ .class = I2C_CLASS_HWMON,
+ .driver = {
+ .name = "sbrmi",
+ .of_match_table = of_match_ptr(sbrmi_of_match),
+ },
+ .probe = sbrmi_probe,
+ .id_table = sbrmi_id,
+};
+
+module_i2c_driver(sbrmi_driver);
+
+MODULE_AUTHOR("Akshay Gupta <akshay.gupta@amd.com>");
+MODULE_DESCRIPTION("Hwmon driver for AMD SB-RMI emulated sensor");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c
index 8618aaf32350..705a59663d42 100644
--- a/drivers/hwmon/w83627ehf.c
+++ b/drivers/hwmon/w83627ehf.c
@@ -372,12 +372,10 @@ struct w83627ehf_data {
u8 temp3_val_only:1;
u8 have_vid:1;
-#ifdef CONFIG_PM
/* Remember extra register values over suspend/resume */
u8 vbat;
u8 fandiv1;
u8 fandiv2;
-#endif
};
struct w83627ehf_sio_data {
@@ -1083,7 +1081,7 @@ cpu0_vid_show(struct device *dev, struct device_attribute *attr, char *buf)
struct w83627ehf_data *data = dev_get_drvdata(dev);
return sprintf(buf, "%d\n", vid_from_reg(data->vid, data->vrm));
}
-DEVICE_ATTR_RO(cpu0_vid);
+static DEVICE_ATTR_RO(cpu0_vid);
/* Case open detection */
@@ -1694,7 +1692,7 @@ static const struct hwmon_chip_info w83627ehf_chip_info = {
.info = w83627ehf_info,
};
-static int w83627ehf_probe(struct platform_device *pdev)
+static int __init w83627ehf_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct w83627ehf_sio_data *sio_data = dev_get_platdata(dev);
@@ -1705,20 +1703,12 @@ static int w83627ehf_probe(struct platform_device *pdev)
struct device *hwmon_dev;
res = platform_get_resource(pdev, IORESOURCE_IO, 0);
- if (!request_region(res->start, IOREGION_LENGTH, DRVNAME)) {
- err = -EBUSY;
- dev_err(dev, "Failed to request region 0x%lx-0x%lx\n",
- (unsigned long)res->start,
- (unsigned long)res->start + IOREGION_LENGTH - 1);
- goto exit;
- }
+ if (!devm_request_region(dev, res->start, IOREGION_LENGTH, DRVNAME))
+ return -EBUSY;
- data = devm_kzalloc(&pdev->dev, sizeof(struct w83627ehf_data),
- GFP_KERNEL);
- if (!data) {
- err = -ENOMEM;
- goto exit_release;
- }
+ data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
data->addr = res->start;
mutex_init(&data->lock);
@@ -1882,7 +1872,7 @@ static int w83627ehf_probe(struct platform_device *pdev)
err = superio_enter(sio_data->sioreg);
if (err)
- goto exit_release;
+ return err;
/* Read VID value */
if (sio_data->kind == w83667hg || sio_data->kind == w83667hg_b) {
@@ -1951,30 +1941,10 @@ static int w83627ehf_probe(struct platform_device *pdev)
data,
&w83627ehf_chip_info,
w83627ehf_groups);
- if (IS_ERR(hwmon_dev)) {
- err = PTR_ERR(hwmon_dev);
- goto exit_release;
- }
-
- return 0;
-
-exit_release:
- release_region(res->start, IOREGION_LENGTH);
-exit:
- return err;
+ return PTR_ERR_OR_ZERO(hwmon_dev);
}
-static int w83627ehf_remove(struct platform_device *pdev)
-{
- struct w83627ehf_data *data = platform_get_drvdata(pdev);
-
- release_region(data->addr, IOREGION_LENGTH);
-
- return 0;
-}
-
-#ifdef CONFIG_PM
-static int w83627ehf_suspend(struct device *dev)
+static int __maybe_unused w83627ehf_suspend(struct device *dev)
{
struct w83627ehf_data *data = w83627ehf_update_device(dev);
@@ -1985,7 +1955,7 @@ static int w83627ehf_suspend(struct device *dev)
return 0;
}
-static int w83627ehf_resume(struct device *dev)
+static int __maybe_unused w83627ehf_resume(struct device *dev)
{
struct w83627ehf_data *data = dev_get_drvdata(dev);
int i;
@@ -2040,25 +2010,13 @@ static int w83627ehf_resume(struct device *dev)
return 0;
}
-static const struct dev_pm_ops w83627ehf_dev_pm_ops = {
- .suspend = w83627ehf_suspend,
- .resume = w83627ehf_resume,
- .freeze = w83627ehf_suspend,
- .restore = w83627ehf_resume,
-};
-
-#define W83627EHF_DEV_PM_OPS (&w83627ehf_dev_pm_ops)
-#else
-#define W83627EHF_DEV_PM_OPS NULL
-#endif /* CONFIG_PM */
+static SIMPLE_DEV_PM_OPS(w83627ehf_dev_pm_ops, w83627ehf_suspend, w83627ehf_resume);
static struct platform_driver w83627ehf_driver = {
.driver = {
.name = DRVNAME,
- .pm = W83627EHF_DEV_PM_OPS,
+ .pm = &w83627ehf_dev_pm_ops,
},
- .probe = w83627ehf_probe,
- .remove = w83627ehf_remove,
};
/* w83627ehf_find() looks for a '627 in the Super-I/O config space */
@@ -2150,8 +2108,7 @@ static int __init w83627ehf_find(int sioaddr, unsigned short *addr,
/*
* when Super-I/O functions move to a separate file, the Super-I/O
* bus will manage the lifetime of the device and this module will only keep
- * track of the w83627ehf driver. But since we platform_device_alloc(), we
- * must keep track of the device
+ * track of the w83627ehf driver.
*/
static struct platform_device *pdev;
@@ -2159,7 +2116,10 @@ static int __init sensors_w83627ehf_init(void)
{
int err;
unsigned short address;
- struct resource res;
+ struct resource res = {
+ .name = DRVNAME,
+ .flags = IORESOURCE_IO,
+ };
struct w83627ehf_sio_data sio_data;
/*
@@ -2173,55 +2133,17 @@ static int __init sensors_w83627ehf_init(void)
w83627ehf_find(0x4e, &address, &sio_data))
return -ENODEV;
- err = platform_driver_register(&w83627ehf_driver);
- if (err)
- goto exit;
-
- pdev = platform_device_alloc(DRVNAME, address);
- if (!pdev) {
- err = -ENOMEM;
- pr_err("Device allocation failed\n");
- goto exit_unregister;
- }
-
- err = platform_device_add_data(pdev, &sio_data,
- sizeof(struct w83627ehf_sio_data));
- if (err) {
- pr_err("Platform data allocation failed\n");
- goto exit_device_put;
- }
-
- memset(&res, 0, sizeof(res));
- res.name = DRVNAME;
res.start = address + IOREGION_OFFSET;
res.end = address + IOREGION_OFFSET + IOREGION_LENGTH - 1;
- res.flags = IORESOURCE_IO;
err = acpi_check_resource_conflict(&res);
if (err)
- goto exit_device_put;
-
- err = platform_device_add_resources(pdev, &res, 1);
- if (err) {
- pr_err("Device resource addition failed (%d)\n", err);
- goto exit_device_put;
- }
-
- /* platform_device_add calls probe() */
- err = platform_device_add(pdev);
- if (err) {
- pr_err("Device addition failed (%d)\n", err);
- goto exit_device_put;
- }
+ return err;
- return 0;
+ pdev = platform_create_bundle(&w83627ehf_driver, w83627ehf_probe, &res, 1, &sio_data,
+ sizeof(struct w83627ehf_sio_data));
-exit_device_put:
- platform_device_put(pdev);
-exit_unregister:
- platform_driver_unregister(&w83627ehf_driver);
-exit:
- return err;
+ return PTR_ERR_OR_ZERO(pdev);
}
static void __exit sensors_w83627ehf_exit(void)
diff --git a/drivers/hwmon/w83781d.c b/drivers/hwmon/w83781d.c
index e84aa5604e64..ce8e2c10e854 100644
--- a/drivers/hwmon/w83781d.c
+++ b/drivers/hwmon/w83781d.c
@@ -1571,10 +1571,21 @@ static const struct i2c_device_id w83781d_ids[] = {
};
MODULE_DEVICE_TABLE(i2c, w83781d_ids);
+static const struct of_device_id w83781d_of_match[] = {
+ { .compatible = "winbond,w83781d" },
+ { .compatible = "winbond,w83781g" },
+ { .compatible = "winbond,w83782d" },
+ { .compatible = "winbond,w83783s" },
+ { .compatible = "asus,as99127f" },
+ { },
+};
+MODULE_DEVICE_TABLE(of, w83781d_of_match);
+
static struct i2c_driver w83781d_driver = {
.class = I2C_CLASS_HWMON,
.driver = {
.name = "w83781d",
+ .of_match_table = w83781d_of_match,
},
.probe_new = w83781d_probe,
.remove = w83781d_remove,
diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig
index 84530fd80998..f026e5c0e777 100644
--- a/drivers/hwtracing/coresight/Kconfig
+++ b/drivers/hwtracing/coresight/Kconfig
@@ -8,6 +8,7 @@ menuconfig CORESIGHT
depends on OF || ACPI
select ARM_AMBA
select PERF_EVENTS
+ select CONFIGFS_FS
help
This framework provides a kernel interface for the CoreSight debug
and trace drivers to register themselves with. It's intended to build
diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile
index d60816509755..b6c4a48140ec 100644
--- a/drivers/hwtracing/coresight/Makefile
+++ b/drivers/hwtracing/coresight/Makefile
@@ -4,7 +4,9 @@
#
obj-$(CONFIG_CORESIGHT) += coresight.o
coresight-y := coresight-core.o coresight-etm-perf.o coresight-platform.o \
- coresight-sysfs.o
+ coresight-sysfs.o coresight-syscfg.o coresight-config.o \
+ coresight-cfg-preload.o coresight-cfg-afdo.o \
+ coresight-syscfg-configfs.o
obj-$(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) += coresight-tmc.o
coresight-tmc-y := coresight-tmc-core.o coresight-tmc-etf.o \
coresight-tmc-etr.o
@@ -16,7 +18,8 @@ obj-$(CONFIG_CORESIGHT_SOURCE_ETM3X) += coresight-etm3x.o
coresight-etm3x-y := coresight-etm3x-core.o coresight-etm-cp14.o \
coresight-etm3x-sysfs.o
obj-$(CONFIG_CORESIGHT_SOURCE_ETM4X) += coresight-etm4x.o
-coresight-etm4x-y := coresight-etm4x-core.o coresight-etm4x-sysfs.o
+coresight-etm4x-y := coresight-etm4x-core.o coresight-etm4x-sysfs.o \
+ coresight-etm4x-cfg.o
obj-$(CONFIG_CORESIGHT_STM) += coresight-stm.o
obj-$(CONFIG_CORESIGHT_CPU_DEBUG) += coresight-cpu-debug.o
obj-$(CONFIG_CORESIGHT_CATU) += coresight-catu.o
diff --git a/drivers/hwtracing/coresight/coresight-cfg-afdo.c b/drivers/hwtracing/coresight/coresight-cfg-afdo.c
new file mode 100644
index 000000000000..84b31184252b
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-cfg-afdo.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2020 Linaro Limited. All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include "coresight-config.h"
+
+/* ETMv4 includes and features */
+#if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM4X)
+#include "coresight-etm4x-cfg.h"
+
+/* preload configurations and features */
+
+/* preload in features for ETMv4 */
+
+/* strobe feature */
+static struct cscfg_parameter_desc strobe_params[] = {
+ {
+ .name = "window",
+ .value = 5000,
+ },
+ {
+ .name = "period",
+ .value = 10000,
+ },
+};
+
+static struct cscfg_regval_desc strobe_regs[] = {
+ /* resource selectors */
+ {
+ .type = CS_CFG_REG_TYPE_RESOURCE,
+ .offset = TRCRSCTLRn(2),
+ .hw_info = ETM4_CFG_RES_SEL,
+ .val32 = 0x20001,
+ },
+ {
+ .type = CS_CFG_REG_TYPE_RESOURCE,
+ .offset = TRCRSCTLRn(3),
+ .hw_info = ETM4_CFG_RES_SEQ,
+ .val32 = 0x20002,
+ },
+ /* strobe window counter 0 - reload from param 0 */
+ {
+ .type = CS_CFG_REG_TYPE_RESOURCE | CS_CFG_REG_TYPE_VAL_SAVE,
+ .offset = TRCCNTVRn(0),
+ .hw_info = ETM4_CFG_RES_CTR,
+ },
+ {
+ .type = CS_CFG_REG_TYPE_RESOURCE | CS_CFG_REG_TYPE_VAL_PARAM,
+ .offset = TRCCNTRLDVRn(0),
+ .hw_info = ETM4_CFG_RES_CTR,
+ .val32 = 0,
+ },
+ {
+ .type = CS_CFG_REG_TYPE_RESOURCE,
+ .offset = TRCCNTCTLRn(0),
+ .hw_info = ETM4_CFG_RES_CTR,
+ .val32 = 0x10001,
+ },
+ /* strobe period counter 1 - reload from param 1 */
+ {
+ .type = CS_CFG_REG_TYPE_RESOURCE | CS_CFG_REG_TYPE_VAL_SAVE,
+ .offset = TRCCNTVRn(1),
+ .hw_info = ETM4_CFG_RES_CTR,
+ },
+ {
+ .type = CS_CFG_REG_TYPE_RESOURCE | CS_CFG_REG_TYPE_VAL_PARAM,
+ .offset = TRCCNTRLDVRn(1),
+ .hw_info = ETM4_CFG_RES_CTR,
+ .val32 = 1,
+ },
+ {
+ .type = CS_CFG_REG_TYPE_RESOURCE,
+ .offset = TRCCNTCTLRn(1),
+ .hw_info = ETM4_CFG_RES_CTR,
+ .val32 = 0x8102,
+ },
+ /* sequencer */
+ {
+ .type = CS_CFG_REG_TYPE_RESOURCE,
+ .offset = TRCSEQEVRn(0),
+ .hw_info = ETM4_CFG_RES_SEQ,
+ .val32 = 0x0081,
+ },
+ {
+ .type = CS_CFG_REG_TYPE_RESOURCE,
+ .offset = TRCSEQEVRn(1),
+ .hw_info = ETM4_CFG_RES_SEQ,
+ .val32 = 0x0000,
+ },
+ /* view-inst */
+ {
+ .type = CS_CFG_REG_TYPE_STD | CS_CFG_REG_TYPE_VAL_MASK,
+ .offset = TRCVICTLR,
+ .val32 = 0x0003,
+ .mask32 = 0x0003,
+ },
+ /* end of regs */
+};
+
+struct cscfg_feature_desc strobe_etm4x = {
+ .name = "strobing",
+ .description = "Generate periodic trace capture windows.\n"
+ "parameter \'window\': a number of CPU cycles (W)\n"
+ "parameter \'period\': trace enabled for W cycles every period x W cycles\n",
+ .match_flags = CS_CFG_MATCH_CLASS_SRC_ETM4,
+ .nr_params = ARRAY_SIZE(strobe_params),
+ .params_desc = strobe_params,
+ .nr_regs = ARRAY_SIZE(strobe_regs),
+ .regs_desc = strobe_regs,
+};
+
+/* create an autofdo configuration */
+
+/* we will provide 9 sets of preset parameter values */
+#define AFDO_NR_PRESETS 9
+/* the total number of parameters in used features */
+#define AFDO_NR_PARAMS ARRAY_SIZE(strobe_params)
+
+static const char *afdo_ref_names[] = {
+ "strobing",
+};
+
+/*
+ * set of presets leaves strobing window constant while varying period to allow
+ * experimentation with mark / space ratios for various workloads
+ */
+static u64 afdo_presets[AFDO_NR_PRESETS][AFDO_NR_PARAMS] = {
+ { 5000, 2 },
+ { 5000, 4 },
+ { 5000, 8 },
+ { 5000, 16 },
+ { 5000, 64 },
+ { 5000, 128 },
+ { 5000, 512 },
+ { 5000, 1024 },
+ { 5000, 4096 },
+};
+
+struct cscfg_config_desc afdo_etm4x = {
+ .name = "autofdo",
+ .description = "Setup ETMs with strobing for autofdo\n"
+ "Supplied presets allow experimentation with mark-space ratio for various loads\n",
+ .nr_feat_refs = ARRAY_SIZE(afdo_ref_names),
+ .feat_ref_names = afdo_ref_names,
+ .nr_presets = AFDO_NR_PRESETS,
+ .nr_total_params = AFDO_NR_PARAMS,
+ .presets = &afdo_presets[0][0],
+};
+
+/* end of ETM4x configurations */
+#endif /* IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM4X) */
diff --git a/drivers/hwtracing/coresight/coresight-cfg-preload.c b/drivers/hwtracing/coresight/coresight-cfg-preload.c
new file mode 100644
index 000000000000..751af3710d56
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-cfg-preload.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2020 Linaro Limited. All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include "coresight-cfg-preload.h"
+#include "coresight-config.h"
+#include "coresight-syscfg.h"
+
+/* Basic features and configurations pre-loaded on initialisation */
+
+static struct cscfg_feature_desc *preload_feats[] = {
+#if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM4X)
+ &strobe_etm4x,
+#endif
+ NULL
+};
+
+static struct cscfg_config_desc *preload_cfgs[] = {
+#if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM4X)
+ &afdo_etm4x,
+#endif
+ NULL
+};
+
+/* preload called on initialisation */
+int cscfg_preload(void)
+{
+ return cscfg_load_config_sets(preload_cfgs, preload_feats);
+}
diff --git a/drivers/hwtracing/coresight/coresight-cfg-preload.h b/drivers/hwtracing/coresight/coresight-cfg-preload.h
new file mode 100644
index 000000000000..21299e175477
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-cfg-preload.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(C) 2020 Linaro Limited. All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+/* declare preloaded configurations and features */
+
+/* from coresight-cfg-afdo.c - etm 4x features */
+#if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM4X)
+extern struct cscfg_feature_desc strobe_etm4x;
+extern struct cscfg_config_desc afdo_etm4x;
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-config.c b/drivers/hwtracing/coresight/coresight-config.c
new file mode 100644
index 000000000000..4723bf7402a2
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-config.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2020 Linaro Limited. All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include <linux/sysfs.h>
+#include "coresight-config.h"
+#include "coresight-priv.h"
+
+/*
+ * This provides a set of generic functions that operate on configurations
+ * and features to manage the handling of parameters, the programming and
+ * saving of registers used by features on devices.
+ */
+
+/*
+ * Write the value held in the register structure into the driver internal memory
+ * location.
+ */
+static void cscfg_set_reg(struct cscfg_regval_csdev *reg_csdev)
+{
+ u32 *p_val32 = (u32 *)reg_csdev->driver_regval;
+ u32 tmp32 = reg_csdev->reg_desc.val32;
+
+ if (reg_csdev->reg_desc.type & CS_CFG_REG_TYPE_VAL_64BIT) {
+ *((u64 *)reg_csdev->driver_regval) = reg_csdev->reg_desc.val64;
+ return;
+ }
+
+ if (reg_csdev->reg_desc.type & CS_CFG_REG_TYPE_VAL_MASK) {
+ tmp32 = *p_val32;
+ tmp32 &= ~reg_csdev->reg_desc.mask32;
+ tmp32 |= reg_csdev->reg_desc.val32 & reg_csdev->reg_desc.mask32;
+ }
+ *p_val32 = tmp32;
+}
+
+/*
+ * Read the driver value into the reg if this is marked as one we want to save.
+ */
+static void cscfg_save_reg(struct cscfg_regval_csdev *reg_csdev)
+{
+ if (!(reg_csdev->reg_desc.type & CS_CFG_REG_TYPE_VAL_SAVE))
+ return;
+ if (reg_csdev->reg_desc.type & CS_CFG_REG_TYPE_VAL_64BIT)
+ reg_csdev->reg_desc.val64 = *(u64 *)(reg_csdev->driver_regval);
+ else
+ reg_csdev->reg_desc.val32 = *(u32 *)(reg_csdev->driver_regval);
+}
+
+/*
+ * Some register values are set from parameters. Initialise these registers
+ * from the current parameter values.
+ */
+static void cscfg_init_reg_param(struct cscfg_feature_csdev *feat_csdev,
+ struct cscfg_regval_desc *reg_desc,
+ struct cscfg_regval_csdev *reg_csdev)
+{
+ struct cscfg_parameter_csdev *param_csdev;
+
+ /* for param, load routines have validated the index */
+ param_csdev = &feat_csdev->params_csdev[reg_desc->param_idx];
+ param_csdev->reg_csdev = reg_csdev;
+ param_csdev->val64 = reg_csdev->reg_desc.type & CS_CFG_REG_TYPE_VAL_64BIT;
+
+ if (param_csdev->val64)
+ reg_csdev->reg_desc.val64 = param_csdev->current_value;
+ else
+ reg_csdev->reg_desc.val32 = (u32)param_csdev->current_value;
+}
+
+/* set values into the driver locations referenced in cscfg_reg_csdev */
+static int cscfg_set_on_enable(struct cscfg_feature_csdev *feat_csdev)
+{
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(feat_csdev->drv_spinlock, flags);
+ for (i = 0; i < feat_csdev->nr_regs; i++)
+ cscfg_set_reg(&feat_csdev->regs_csdev[i]);
+ spin_unlock_irqrestore(feat_csdev->drv_spinlock, flags);
+ dev_dbg(&feat_csdev->csdev->dev, "Feature %s: %s",
+ feat_csdev->feat_desc->name, "set on enable");
+ return 0;
+}
+
+/* copy back values from the driver locations referenced in cscfg_reg_csdev */
+static void cscfg_save_on_disable(struct cscfg_feature_csdev *feat_csdev)
+{
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(feat_csdev->drv_spinlock, flags);
+ for (i = 0; i < feat_csdev->nr_regs; i++)
+ cscfg_save_reg(&feat_csdev->regs_csdev[i]);
+ spin_unlock_irqrestore(feat_csdev->drv_spinlock, flags);
+ dev_dbg(&feat_csdev->csdev->dev, "Feature %s: %s",
+ feat_csdev->feat_desc->name, "save on disable");
+}
+
+/* default reset - restore default values */
+void cscfg_reset_feat(struct cscfg_feature_csdev *feat_csdev)
+{
+ struct cscfg_regval_desc *reg_desc;
+ struct cscfg_regval_csdev *reg_csdev;
+ int i;
+
+ /*
+ * set the default values for all parameters and regs from the
+ * relevant static descriptors.
+ */
+ for (i = 0; i < feat_csdev->nr_params; i++)
+ feat_csdev->params_csdev[i].current_value =
+ feat_csdev->feat_desc->params_desc[i].value;
+
+ for (i = 0; i < feat_csdev->nr_regs; i++) {
+ reg_desc = &feat_csdev->feat_desc->regs_desc[i];
+ reg_csdev = &feat_csdev->regs_csdev[i];
+ reg_csdev->reg_desc.type = reg_desc->type;
+
+ /* check if reg set from a parameter otherwise desc default */
+ if (reg_desc->type & CS_CFG_REG_TYPE_VAL_PARAM)
+ cscfg_init_reg_param(feat_csdev, reg_desc, reg_csdev);
+ else
+ /*
+ * for normal values the union between val64 & val32 + mask32
+ * allows us to init using the 64 bit value
+ */
+ reg_csdev->reg_desc.val64 = reg_desc->val64;
+ }
+}
+
+/*
+ * For the selected presets, we set the register associated with the parameter, to
+ * the value of the preset index associated with the parameter.
+ */
+static int cscfg_update_presets(struct cscfg_config_csdev *config_csdev, int preset)
+{
+ int i, j, val_idx = 0, nr_cfg_params;
+ struct cscfg_parameter_csdev *param_csdev;
+ struct cscfg_feature_csdev *feat_csdev;
+ const struct cscfg_config_desc *config_desc = config_csdev->config_desc;
+ const char *name;
+ const u64 *preset_base;
+ u64 val;
+
+ /* preset in range 1 to nr_presets */
+ if (preset < 1 || preset > config_desc->nr_presets)
+ return -EINVAL;
+ /*
+ * Go through the array of features, assigning preset values to
+ * feature parameters in the order they appear.
+ * There should be precisely the same number of preset values as the
+ * sum of number of parameters over all the features - but we will
+ * ensure there is no overrun.
+ */
+ nr_cfg_params = config_desc->nr_total_params;
+ preset_base = &config_desc->presets[(preset - 1) * nr_cfg_params];
+ for (i = 0; i < config_csdev->nr_feat; i++) {
+ feat_csdev = config_csdev->feats_csdev[i];
+ if (!feat_csdev->nr_params)
+ continue;
+
+ for (j = 0; j < feat_csdev->nr_params; j++) {
+ param_csdev = &feat_csdev->params_csdev[j];
+ name = feat_csdev->feat_desc->params_desc[j].name;
+ val = preset_base[val_idx++];
+ if (param_csdev->val64) {
+ dev_dbg(&config_csdev->csdev->dev,
+ "set param %s (%lld)", name, val);
+ param_csdev->reg_csdev->reg_desc.val64 = val;
+ } else {
+ param_csdev->reg_csdev->reg_desc.val32 = (u32)val;
+ dev_dbg(&config_csdev->csdev->dev,
+ "set param %s (%d)", name, (u32)val);
+ }
+ }
+
+ /* exit early if all params filled */
+ if (val_idx >= nr_cfg_params)
+ break;
+ }
+ return 0;
+}
+
+/*
+ * if we are not using a preset, then need to update the feature params
+ * with current values. This sets the register associated with the parameter
+ * with the current value of that parameter.
+ */
+static int cscfg_update_curr_params(struct cscfg_config_csdev *config_csdev)
+{
+ int i, j;
+ struct cscfg_feature_csdev *feat_csdev;
+ struct cscfg_parameter_csdev *param_csdev;
+ const char *name;
+ u64 val;
+
+ for (i = 0; i < config_csdev->nr_feat; i++) {
+ feat_csdev = config_csdev->feats_csdev[i];
+ if (!feat_csdev->nr_params)
+ continue;
+ for (j = 0; j < feat_csdev->nr_params; j++) {
+ param_csdev = &feat_csdev->params_csdev[j];
+ name = feat_csdev->feat_desc->params_desc[j].name;
+ val = param_csdev->current_value;
+ if (param_csdev->val64) {
+ dev_dbg(&config_csdev->csdev->dev,
+ "set param %s (%lld)", name, val);
+ param_csdev->reg_csdev->reg_desc.val64 = val;
+ } else {
+ param_csdev->reg_csdev->reg_desc.val32 = (u32)val;
+ dev_dbg(&config_csdev->csdev->dev,
+ "set param %s (%d)", name, (u32)val);
+ }
+ }
+ }
+ return 0;
+}
+
+/*
+ * Configuration values will be programmed into the driver locations if enabling, or read
+ * from relevant locations on disable.
+ */
+static int cscfg_prog_config(struct cscfg_config_csdev *config_csdev, bool enable)
+{
+ int i, err = 0;
+ struct cscfg_feature_csdev *feat_csdev;
+ struct coresight_device *csdev;
+
+ for (i = 0; i < config_csdev->nr_feat; i++) {
+ feat_csdev = config_csdev->feats_csdev[i];
+ csdev = feat_csdev->csdev;
+ dev_dbg(&csdev->dev, "cfg %s; %s feature:%s", config_csdev->config_desc->name,
+ enable ? "enable" : "disable", feat_csdev->feat_desc->name);
+
+ if (enable)
+ err = cscfg_set_on_enable(feat_csdev);
+ else
+ cscfg_save_on_disable(feat_csdev);
+
+ if (err)
+ break;
+ }
+ return err;
+}
+
+/*
+ * Enable configuration for the device. Will result in the internal driver data
+ * being updated ready for programming into the device.
+ *
+ * @config_csdev: config_csdev to set.
+ * @preset: preset values to use - 0 for default.
+ */
+int cscfg_csdev_enable_config(struct cscfg_config_csdev *config_csdev, int preset)
+{
+ int err = 0;
+
+ if (preset)
+ err = cscfg_update_presets(config_csdev, preset);
+ else
+ err = cscfg_update_curr_params(config_csdev);
+ if (!err)
+ err = cscfg_prog_config(config_csdev, true);
+ return err;
+}
+
+void cscfg_csdev_disable_config(struct cscfg_config_csdev *config_csdev)
+{
+ cscfg_prog_config(config_csdev, false);
+}
diff --git a/drivers/hwtracing/coresight/coresight-config.h b/drivers/hwtracing/coresight/coresight-config.h
new file mode 100644
index 000000000000..25eb6c632692
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-config.h
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2020 Linaro Limited, All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#ifndef _CORESIGHT_CORESIGHT_CONFIG_H
+#define _CORESIGHT_CORESIGHT_CONFIG_H
+
+#include <linux/coresight.h>
+#include <linux/types.h>
+
+/* CoreSight Configuration Management - component and system wide configuration */
+
+/*
+ * Register type flags for register value descriptor:
+ * describe how the value is interpreted, and handled.
+ */
+#define CS_CFG_REG_TYPE_STD 0x80 /* reg is standard reg */
+#define CS_CFG_REG_TYPE_RESOURCE 0x40 /* reg is a resource */
+#define CS_CFG_REG_TYPE_VAL_PARAM 0x08 /* reg value uses param */
+#define CS_CFG_REG_TYPE_VAL_MASK 0x04 /* reg value bit masked */
+#define CS_CFG_REG_TYPE_VAL_64BIT 0x02 /* reg value 64 bit */
+#define CS_CFG_REG_TYPE_VAL_SAVE 0x01 /* reg value save on disable */
+
+/*
+ * flags defining what device class a feature will match to when processing a
+ * system configuration - used by config data and devices.
+ */
+#define CS_CFG_MATCH_CLASS_SRC_ALL 0x0001 /* match any source */
+#define CS_CFG_MATCH_CLASS_SRC_ETM4 0x0002 /* match any ETMv4 device */
+
+/* flags defining device instance matching - used in config match desc data. */
+#define CS_CFG_MATCH_INST_ANY 0x80000000 /* any instance of a class */
+
+/*
+ * Limit number of presets in a configuration
+ * This is related to the number of bits (4) we use to select the preset on
+ * the perf command line. Preset 0 is always none selected.
+ * See PMU_FORMAT_ATTR(preset, "config:0-3") in coresight-etm-perf.c
+ */
+#define CS_CFG_CONFIG_PRESET_MAX 15
+
+/**
+ * Parameter descriptor for a device feature.
+ *
+ * @name: Name of parameter.
+ * @value: Initial or default value.
+ */
+struct cscfg_parameter_desc {
+ const char *name;
+ u64 value;
+};
+
+/**
+ * Representation of register value and a descriptor of register usage.
+ *
+ * Used as a descriptor in the feature descriptors.
+ * Used as a value in when in a feature loading into a csdev.
+ *
+ * Supports full 64 bit register value, or 32 bit value with optional mask
+ * value.
+ *
+ * @type: define register usage and interpretation.
+ * @offset: the address offset for register in the hardware device (per device specification).
+ * @hw_info: optional hardware device type specific information. (ETM / CTI specific etc)
+ * @val64: 64 bit value.
+ * @val32: 32 bit value.
+ * @mask32: 32 bit mask when using 32 bit value to access device register - if mask type.
+ * @param_idx: parameter index value into parameter array if param type.
+ */
+struct cscfg_regval_desc {
+ struct {
+ u32 type:8;
+ u32 offset:12;
+ u32 hw_info:12;
+ };
+ union {
+ u64 val64;
+ struct {
+ u32 val32;
+ u32 mask32;
+ };
+ u32 param_idx;
+ };
+};
+
+/**
+ * Device feature descriptor - combination of registers and parameters to
+ * program a device to implement a specific complex function.
+ *
+ * @name: feature name.
+ * @description: brief description of the feature.
+ * @item: List entry.
+ * @match_flags: matching information if loading into a device
+ * @nr_params: number of parameters used.
+ * @params_desc: array of parameters used.
+ * @nr_regs: number of registers used.
+ * @regs_desc: array of registers used.
+ */
+struct cscfg_feature_desc {
+ const char *name;
+ const char *description;
+ struct list_head item;
+ u32 match_flags;
+ int nr_params;
+ struct cscfg_parameter_desc *params_desc;
+ int nr_regs;
+ struct cscfg_regval_desc *regs_desc;
+};
+
+/**
+ * Configuration descriptor - describes selectable system configuration.
+ *
+ * A configuration describes device features in use, and may provide preset
+ * values for the parameters in those features.
+ *
+ * A single set of presets is the sum of the parameters declared by
+ * all the features in use - this value is @nr_total_params.
+ *
+ * @name: name of the configuration - used for selection.
+ * @description: description of the purpose of the configuration.
+ * @item: list entry.
+ * @nr_feat_refs: Number of features used in this configuration.
+ * @feat_ref_names: references to features used in this configuration.
+ * @nr_presets: Number of sets of presets supplied by this configuration.
+ * @nr_total_params: Sum of all parameters declared by used features
+ * @presets: Array of preset values.
+ * @event_ea: Extended attribute for perf event value
+ * @active_cnt: ref count for activate on this configuration.
+ *
+ */
+struct cscfg_config_desc {
+ const char *name;
+ const char *description;
+ struct list_head item;
+ int nr_feat_refs;
+ const char **feat_ref_names;
+ int nr_presets;
+ int nr_total_params;
+ const u64 *presets; /* nr_presets * nr_total_params */
+ struct dev_ext_attribute *event_ea;
+ atomic_t active_cnt;
+};
+
+/**
+ * config register instance - part of a loaded feature.
+ * maps register values to csdev driver structures
+ *
+ * @reg_desc: value to use when setting feature on device / store for
+ * readback of volatile values.
+ * @driver_regval: pointer to internal driver element used to set the value
+ * in hardware.
+ */
+struct cscfg_regval_csdev {
+ struct cscfg_regval_desc reg_desc;
+ void *driver_regval;
+};
+
+/**
+ * config parameter instance - part of a loaded feature.
+ *
+ * @feat_csdev: parent feature
+ * @reg_csdev: register value updated by this parameter.
+ * @current_value: current value of parameter - may be set by user via
+ * sysfs, or modified during device operation.
+ * @val64: true if 64 bit value
+ */
+struct cscfg_parameter_csdev {
+ struct cscfg_feature_csdev *feat_csdev;
+ struct cscfg_regval_csdev *reg_csdev;
+ u64 current_value;
+ bool val64;
+};
+
+/**
+ * Feature instance loaded into a CoreSight device.
+ *
+ * When a feature is loaded into a specific device, then this structure holds
+ * the connections between the register / parameter values used and the
+ * internal data structures that are written when the feature is enabled.
+ *
+ * Since applying a feature modifies internal data structures in the device,
+ * then we have a reference to the device spinlock to protect access to these
+ * structures (@drv_spinlock).
+ *
+ * @feat_desc: pointer to the static descriptor for this feature.
+ * @csdev: parent CoreSight device instance.
+ * @node: list entry into feature list for this device.
+ * @drv_spinlock: device spinlock for access to driver register data.
+ * @nr_params: number of parameters.
+ * @params_csdev: current parameter values on this device
+ * @nr_regs: number of registers to be programmed.
+ * @regs_csdev: Programming details for the registers
+ */
+struct cscfg_feature_csdev {
+ const struct cscfg_feature_desc *feat_desc;
+ struct coresight_device *csdev;
+ struct list_head node;
+ spinlock_t *drv_spinlock;
+ int nr_params;
+ struct cscfg_parameter_csdev *params_csdev;
+ int nr_regs;
+ struct cscfg_regval_csdev *regs_csdev;
+};
+
+/**
+ * Configuration instance when loaded into a CoreSight device.
+ *
+ * The instance contains references to loaded features on this device that are
+ * used by the configuration.
+ *
+ * @config_desc:reference to the descriptor for this configuration
+ * @csdev: parent coresight device for this configuration instance.
+ * @enabled: true if configuration is enabled on this device.
+ * @node: list entry within the coresight device
+ * @nr_feat: Number of features on this device that are used in the
+ * configuration.
+ * @feats_csdev:references to the device features to enable.
+ */
+struct cscfg_config_csdev {
+ const struct cscfg_config_desc *config_desc;
+ struct coresight_device *csdev;
+ bool enabled;
+ struct list_head node;
+ int nr_feat;
+ struct cscfg_feature_csdev *feats_csdev[0];
+};
+
+/**
+ * Coresight device operations.
+ *
+ * Registered coresight devices provide these operations to manage feature
+ * instances compatible with the device hardware and drivers
+ *
+ * @load_feat: Pass a feature descriptor into the device and create the
+ * loaded feature instance (struct cscfg_feature_csdev).
+ */
+struct cscfg_csdev_feat_ops {
+ int (*load_feat)(struct coresight_device *csdev,
+ struct cscfg_feature_csdev *feat_csdev);
+};
+
+/* coresight config helper functions*/
+
+/* enable / disable config on a device - called with appropriate locks set.*/
+int cscfg_csdev_enable_config(struct cscfg_config_csdev *config_csdev, int preset);
+void cscfg_csdev_disable_config(struct cscfg_config_csdev *config_csdev);
+
+/* reset a feature to default values */
+void cscfg_reset_feat(struct cscfg_feature_csdev *feat_csdev);
+
+#endif /* _CORESIGHT_CORESIGHT_CONFIG_H */
diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index 1002605db8ba..8a18c71df37a 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -21,6 +21,7 @@
#include "coresight-etm-perf.h"
#include "coresight-priv.h"
+#include "coresight-syscfg.h"
static DEFINE_MUTEX(coresight_mutex);
static DEFINE_PER_CPU(struct coresight_device *, csdev_sink);
@@ -1763,13 +1764,22 @@ static int __init coresight_init(void)
ret = etm_perf_init();
if (ret)
- bus_unregister(&coresight_bustype);
+ goto exit_bus_unregister;
+ /* initialise the coresight syscfg API */
+ ret = cscfg_init();
+ if (!ret)
+ return 0;
+
+ etm_perf_exit();
+exit_bus_unregister:
+ bus_unregister(&coresight_bustype);
return ret;
}
static void __exit coresight_exit(void)
{
+ cscfg_exit();
etm_perf_exit();
bus_unregister(&coresight_bustype);
}
diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c
index 9731d3a96073..00de46565bc4 100644
--- a/drivers/hwtracing/coresight/coresight-cpu-debug.c
+++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c
@@ -588,11 +588,11 @@ static int debug_probe(struct amba_device *adev, const struct amba_id *id)
drvdata->base = base;
- get_online_cpus();
+ cpus_read_lock();
per_cpu(debug_drvdata, drvdata->cpu) = drvdata;
ret = smp_call_function_single(drvdata->cpu, debug_init_arch_data,
drvdata, 1);
- put_online_cpus();
+ cpus_read_unlock();
if (ret) {
dev_err(dev, "CPU%d debug arch init failed\n", drvdata->cpu);
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 6f398377fec9..8ebd728d3a80 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -18,8 +18,10 @@
#include <linux/types.h>
#include <linux/workqueue.h>
+#include "coresight-config.h"
#include "coresight-etm-perf.h"
#include "coresight-priv.h"
+#include "coresight-syscfg.h"
static struct pmu etm_pmu;
static bool etm_perf_up;
@@ -57,8 +59,13 @@ PMU_FORMAT_ATTR(contextid1, "config:" __stringify(ETM_OPT_CTXTID));
PMU_FORMAT_ATTR(contextid2, "config:" __stringify(ETM_OPT_CTXTID2));
PMU_FORMAT_ATTR(timestamp, "config:" __stringify(ETM_OPT_TS));
PMU_FORMAT_ATTR(retstack, "config:" __stringify(ETM_OPT_RETSTK));
+/* preset - if sink ID is used as a configuration selector */
+PMU_FORMAT_ATTR(preset, "config:0-3");
/* Sink ID - same for all ETMs */
PMU_FORMAT_ATTR(sinkid, "config2:0-31");
+/* config ID - set if a system configuration is selected */
+PMU_FORMAT_ATTR(configid, "config2:32-63");
+
/*
* contextid always traces the "PID". The PID is in CONTEXTIDR_EL1
@@ -88,6 +95,8 @@ static struct attribute *etm_config_formats_attr[] = {
&format_attr_timestamp.attr,
&format_attr_retstack.attr,
&format_attr_sinkid.attr,
+ &format_attr_preset.attr,
+ &format_attr_configid.attr,
NULL,
};
@@ -105,9 +114,19 @@ static const struct attribute_group etm_pmu_sinks_group = {
.attrs = etm_config_sinks_attr,
};
+static struct attribute *etm_config_events_attr[] = {
+ NULL,
+};
+
+static const struct attribute_group etm_pmu_events_group = {
+ .name = "events",
+ .attrs = etm_config_events_attr,
+};
+
static const struct attribute_group *etm_pmu_attr_groups[] = {
&etm_pmu_format_group,
&etm_pmu_sinks_group,
+ &etm_pmu_events_group,
NULL,
};
@@ -196,6 +215,10 @@ static void free_event_data(struct work_struct *work)
/* Free the sink buffers, if there are any */
free_sink_buffer(event_data);
+ /* clear any configuration we were using */
+ if (event_data->cfg_hash)
+ cscfg_deactivate_config(event_data->cfg_hash);
+
for_each_cpu(cpu, mask) {
struct list_head **ppath;
@@ -273,7 +296,7 @@ static bool sinks_compatible(struct coresight_device *a,
static void *etm_setup_aux(struct perf_event *event, void **pages,
int nr_pages, bool overwrite)
{
- u32 id;
+ u32 id, cfg_hash;
int cpu = event->cpu;
cpumask_t *mask;
struct coresight_device *sink = NULL;
@@ -286,11 +309,19 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
INIT_WORK(&event_data->work, free_event_data);
/* First get the selected sink from user space. */
- if (event->attr.config2) {
+ if (event->attr.config2 & GENMASK_ULL(31, 0)) {
id = (u32)event->attr.config2;
sink = user_sink = coresight_get_sink_by_id(id);
}
+ /* check if user wants a coresight configuration selected */
+ cfg_hash = (u32)((event->attr.config2 & GENMASK_ULL(63, 32)) >> 32);
+ if (cfg_hash) {
+ if (cscfg_activate_config(cfg_hash))
+ goto err;
+ event_data->cfg_hash = cfg_hash;
+ }
+
mask = &event_data->mask;
/*
@@ -658,68 +689,127 @@ static ssize_t etm_perf_sink_name_show(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)(ea->var));
}
-int etm_perf_add_symlink_sink(struct coresight_device *csdev)
+static struct dev_ext_attribute *
+etm_perf_add_symlink_group(struct device *dev, const char *name, const char *group_name)
{
- int ret;
+ struct dev_ext_attribute *ea;
unsigned long hash;
- const char *name;
+ int ret;
struct device *pmu_dev = etm_pmu.dev;
- struct device *dev = &csdev->dev;
- struct dev_ext_attribute *ea;
-
- if (csdev->type != CORESIGHT_DEV_TYPE_SINK &&
- csdev->type != CORESIGHT_DEV_TYPE_LINKSINK)
- return -EINVAL;
-
- if (csdev->ea != NULL)
- return -EINVAL;
if (!etm_perf_up)
- return -EPROBE_DEFER;
+ return ERR_PTR(-EPROBE_DEFER);
ea = devm_kzalloc(dev, sizeof(*ea), GFP_KERNEL);
if (!ea)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- name = dev_name(dev);
- /* See function coresight_get_sink_by_id() to know where this is used */
+ /*
+ * If this function is called adding a sink then the hash is used for
+ * sink selection - see function coresight_get_sink_by_id().
+ * If adding a configuration then the hash is used for selection in
+ * cscfg_activate_config()
+ */
hash = hashlen_hash(hashlen_string(NULL, name));
sysfs_attr_init(&ea->attr.attr);
ea->attr.attr.name = devm_kstrdup(dev, name, GFP_KERNEL);
if (!ea->attr.attr.name)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
ea->attr.attr.mode = 0444;
- ea->attr.show = etm_perf_sink_name_show;
ea->var = (unsigned long *)hash;
ret = sysfs_add_file_to_group(&pmu_dev->kobj,
- &ea->attr.attr, "sinks");
+ &ea->attr.attr, group_name);
- if (!ret)
- csdev->ea = ea;
+ return ret ? ERR_PTR(ret) : ea;
+}
- return ret;
+int etm_perf_add_symlink_sink(struct coresight_device *csdev)
+{
+ const char *name;
+ struct device *dev = &csdev->dev;
+ int err = 0;
+
+ if (csdev->type != CORESIGHT_DEV_TYPE_SINK &&
+ csdev->type != CORESIGHT_DEV_TYPE_LINKSINK)
+ return -EINVAL;
+
+ if (csdev->ea != NULL)
+ return -EINVAL;
+
+ name = dev_name(dev);
+ csdev->ea = etm_perf_add_symlink_group(dev, name, "sinks");
+ if (IS_ERR(csdev->ea)) {
+ err = PTR_ERR(csdev->ea);
+ csdev->ea = NULL;
+ } else
+ csdev->ea->attr.show = etm_perf_sink_name_show;
+
+ return err;
}
-void etm_perf_del_symlink_sink(struct coresight_device *csdev)
+static void etm_perf_del_symlink_group(struct dev_ext_attribute *ea, const char *group_name)
{
struct device *pmu_dev = etm_pmu.dev;
- struct dev_ext_attribute *ea = csdev->ea;
+ sysfs_remove_file_from_group(&pmu_dev->kobj,
+ &ea->attr.attr, group_name);
+}
+
+void etm_perf_del_symlink_sink(struct coresight_device *csdev)
+{
if (csdev->type != CORESIGHT_DEV_TYPE_SINK &&
csdev->type != CORESIGHT_DEV_TYPE_LINKSINK)
return;
- if (!ea)
+ if (!csdev->ea)
return;
- sysfs_remove_file_from_group(&pmu_dev->kobj,
- &ea->attr.attr, "sinks");
+ etm_perf_del_symlink_group(csdev->ea, "sinks");
csdev->ea = NULL;
}
+static ssize_t etm_perf_cscfg_event_show(struct device *dev,
+ struct device_attribute *dattr,
+ char *buf)
+{
+ struct dev_ext_attribute *ea;
+
+ ea = container_of(dattr, struct dev_ext_attribute, attr);
+ return scnprintf(buf, PAGE_SIZE, "configid=0x%lx\n", (unsigned long)(ea->var));
+}
+
+int etm_perf_add_symlink_cscfg(struct device *dev, struct cscfg_config_desc *config_desc)
+{
+ int err = 0;
+
+ if (config_desc->event_ea != NULL)
+ return 0;
+
+ config_desc->event_ea = etm_perf_add_symlink_group(dev, config_desc->name, "events");
+
+ /* set the show function to the custom cscfg event */
+ if (!IS_ERR(config_desc->event_ea))
+ config_desc->event_ea->attr.show = etm_perf_cscfg_event_show;
+ else {
+ err = PTR_ERR(config_desc->event_ea);
+ config_desc->event_ea = NULL;
+ }
+
+ return err;
+}
+
+void etm_perf_del_symlink_cscfg(struct cscfg_config_desc *config_desc)
+{
+ if (!config_desc->event_ea)
+ return;
+
+ etm_perf_del_symlink_group(config_desc->event_ea, "events");
+ config_desc->event_ea = NULL;
+}
+
int __init etm_perf_init(void)
{
int ret;
@@ -748,7 +838,7 @@ int __init etm_perf_init(void)
return ret;
}
-void __exit etm_perf_exit(void)
+void etm_perf_exit(void)
{
perf_pmu_unregister(&etm_pmu);
}
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h
index 3e4f2ad5e193..468f7799ab4f 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.h
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.h
@@ -11,6 +11,7 @@
#include "coresight-priv.h"
struct coresight_device;
+struct cscfg_config_desc;
/*
* In both ETMv3 and v4 the maximum number of address comparator implentable
@@ -48,12 +49,14 @@ struct etm_filters {
* @work: Handle to free allocated memory outside IRQ context.
* @mask: Hold the CPU(s) this event was set for.
* @snk_config: The sink configuration.
+ * @cfg_hash: The hash id of any coresight config selected.
* @path: An array of path, each slot for one CPU.
*/
struct etm_event_data {
struct work_struct work;
cpumask_t mask;
void *snk_config;
+ u32 cfg_hash;
struct list_head * __percpu *path;
};
@@ -69,6 +72,9 @@ static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
return data->snk_config;
return NULL;
}
+int etm_perf_add_symlink_cscfg(struct device *dev,
+ struct cscfg_config_desc *config_desc);
+void etm_perf_del_symlink_cscfg(struct cscfg_config_desc *config_desc);
#else
static inline int etm_perf_symlink(struct coresight_device *csdev, bool link)
{ return -EINVAL; }
@@ -79,10 +85,14 @@ static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
{
return NULL;
}
+int etm_perf_add_symlink_cscfg(struct device *dev,
+ struct cscfg_config_desc *config_desc)
+{ return -EINVAL; }
+void etm_perf_del_symlink_cscfg(struct cscfg_config_desc *config_desc) {}
#endif /* CONFIG_CORESIGHT */
int __init etm_perf_init(void);
-void __exit etm_perf_exit(void);
+void etm_perf_exit(void);
#endif
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-cfg.c b/drivers/hwtracing/coresight/coresight-etm4x-cfg.c
new file mode 100644
index 000000000000..d2ea903231b2
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm4x-cfg.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2020 Linaro Limited. All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include "coresight-etm4x.h"
+#include "coresight-etm4x-cfg.h"
+#include "coresight-priv.h"
+#include "coresight-syscfg.h"
+
+/* defines to associate register IDs with driver data locations */
+#define CHECKREG(cval, elem) \
+ { \
+ if (offset == cval) { \
+ reg_csdev->driver_regval = &drvcfg->elem; \
+ err = 0; \
+ break; \
+ } \
+ }
+
+#define CHECKREGIDX(cval, elem, off_idx, mask) \
+ { \
+ if (mask == cval) { \
+ reg_csdev->driver_regval = &drvcfg->elem[off_idx]; \
+ err = 0; \
+ break; \
+ } \
+ }
+
+/**
+ * etm4_cfg_map_reg_offset - validate and map the register offset into a
+ * location in the driver config struct.
+ *
+ * Limits the number of registers that can be accessed and programmed in
+ * features, to those which are used to control the trace capture parameters.
+ *
+ * Omits or limits access to those which the driver must use exclusively.
+ *
+ * Invalid offsets will result in fail code return and feature load failure.
+ *
+ * @drvdata: driver data to map into.
+ * @reg: register to map.
+ * @offset: device offset for the register
+ */
+static int etm4_cfg_map_reg_offset(struct etmv4_drvdata *drvdata,
+ struct cscfg_regval_csdev *reg_csdev, u32 offset)
+{
+ int err = -EINVAL, idx;
+ struct etmv4_config *drvcfg = &drvdata->config;
+ u32 off_mask;
+
+ if (((offset >= TRCEVENTCTL0R) && (offset <= TRCVIPCSSCTLR)) ||
+ ((offset >= TRCSEQRSTEVR) && (offset <= TRCEXTINSELR)) ||
+ ((offset >= TRCCIDCCTLR0) && (offset <= TRCVMIDCCTLR1))) {
+ do {
+ CHECKREG(TRCEVENTCTL0R, eventctrl0);
+ CHECKREG(TRCEVENTCTL1R, eventctrl1);
+ CHECKREG(TRCSTALLCTLR, stall_ctrl);
+ CHECKREG(TRCTSCTLR, ts_ctrl);
+ CHECKREG(TRCSYNCPR, syncfreq);
+ CHECKREG(TRCCCCTLR, ccctlr);
+ CHECKREG(TRCBBCTLR, bb_ctrl);
+ CHECKREG(TRCVICTLR, vinst_ctrl);
+ CHECKREG(TRCVIIECTLR, viiectlr);
+ CHECKREG(TRCVISSCTLR, vissctlr);
+ CHECKREG(TRCVIPCSSCTLR, vipcssctlr);
+ CHECKREG(TRCSEQRSTEVR, seq_rst);
+ CHECKREG(TRCSEQSTR, seq_state);
+ CHECKREG(TRCEXTINSELR, ext_inp);
+ CHECKREG(TRCCIDCCTLR0, ctxid_mask0);
+ CHECKREG(TRCCIDCCTLR1, ctxid_mask1);
+ CHECKREG(TRCVMIDCCTLR0, vmid_mask0);
+ CHECKREG(TRCVMIDCCTLR1, vmid_mask1);
+ } while (0);
+ } else if ((offset & GENMASK(11, 4)) == TRCSEQEVRn(0)) {
+ /* sequencer state control registers */
+ idx = (offset & GENMASK(3, 0)) / 4;
+ if (idx < ETM_MAX_SEQ_STATES) {
+ reg_csdev->driver_regval = &drvcfg->seq_ctrl[idx];
+ err = 0;
+ }
+ } else if ((offset >= TRCSSCCRn(0)) && (offset <= TRCSSPCICRn(7))) {
+ /* 32 bit, 8 off indexed register sets */
+ idx = (offset & GENMASK(4, 0)) / 4;
+ off_mask = (offset & GENMASK(11, 5));
+ do {
+ CHECKREGIDX(TRCSSCCRn(0), ss_ctrl, idx, off_mask);
+ CHECKREGIDX(TRCSSCSRn(0), ss_status, idx, off_mask);
+ CHECKREGIDX(TRCSSPCICRn(0), ss_pe_cmp, idx, off_mask);
+ } while (0);
+ } else if ((offset >= TRCCIDCVRn(0)) && (offset <= TRCVMIDCVRn(7))) {
+ /* 64 bit, 8 off indexed register sets */
+ idx = (offset & GENMASK(5, 0)) / 8;
+ off_mask = (offset & GENMASK(11, 6));
+ do {
+ CHECKREGIDX(TRCCIDCVRn(0), ctxid_pid, idx, off_mask);
+ CHECKREGIDX(TRCVMIDCVRn(0), vmid_val, idx, off_mask);
+ } while (0);
+ } else if ((offset >= TRCRSCTLRn(2)) &&
+ (offset <= TRCRSCTLRn((ETM_MAX_RES_SEL - 1)))) {
+ /* 32 bit resource selection regs, 32 off, skip fixed 0,1 */
+ idx = (offset & GENMASK(6, 0)) / 4;
+ if (idx < ETM_MAX_RES_SEL) {
+ reg_csdev->driver_regval = &drvcfg->res_ctrl[idx];
+ err = 0;
+ }
+ } else if ((offset >= TRCACVRn(0)) &&
+ (offset <= TRCACATRn((ETM_MAX_SINGLE_ADDR_CMP - 1)))) {
+ /* 64 bit addr cmp regs, 16 off */
+ idx = (offset & GENMASK(6, 0)) / 8;
+ off_mask = offset & GENMASK(11, 7);
+ do {
+ CHECKREGIDX(TRCACVRn(0), addr_val, idx, off_mask);
+ CHECKREGIDX(TRCACATRn(0), addr_acc, idx, off_mask);
+ } while (0);
+ } else if ((offset >= TRCCNTRLDVRn(0)) &&
+ (offset <= TRCCNTVRn((ETMv4_MAX_CNTR - 1)))) {
+ /* 32 bit counter regs, 4 off (ETMv4_MAX_CNTR - 1) */
+ idx = (offset & GENMASK(3, 0)) / 4;
+ off_mask = offset & GENMASK(11, 4);
+ do {
+ CHECKREGIDX(TRCCNTRLDVRn(0), cntrldvr, idx, off_mask);
+ CHECKREGIDX(TRCCNTCTLRn(0), cntr_ctrl, idx, off_mask);
+ CHECKREGIDX(TRCCNTVRn(0), cntr_val, idx, off_mask);
+ } while (0);
+ }
+ return err;
+}
+
+/**
+ * etm4_cfg_load_feature - load a feature into a device instance.
+ *
+ * @csdev: An ETMv4 CoreSight device.
+ * @feat: The feature to be loaded.
+ *
+ * The function will load a feature instance into the device, checking that
+ * the register definitions are valid for the device.
+ *
+ * Parameter and register definitions will be converted into internal
+ * structures that are used to set the values in the driver when the
+ * feature is enabled for the device.
+ *
+ * The feature spinlock pointer is initialised to the same spinlock
+ * that the driver uses to protect the internal register values.
+ */
+static int etm4_cfg_load_feature(struct coresight_device *csdev,
+ struct cscfg_feature_csdev *feat_csdev)
+{
+ struct device *dev = csdev->dev.parent;
+ struct etmv4_drvdata *drvdata = dev_get_drvdata(dev);
+ const struct cscfg_feature_desc *feat_desc = feat_csdev->feat_desc;
+ u32 offset;
+ int i = 0, err = 0;
+
+ /*
+ * essential we set the device spinlock - this is used in the generic
+ * programming routines when copying values into the drvdata structures
+ * via the pointers setup in etm4_cfg_map_reg_offset().
+ */
+ feat_csdev->drv_spinlock = &drvdata->spinlock;
+
+ /* process the register descriptions */
+ for (i = 0; i < feat_csdev->nr_regs && !err; i++) {
+ offset = feat_desc->regs_desc[i].offset;
+ err = etm4_cfg_map_reg_offset(drvdata, &feat_csdev->regs_csdev[i], offset);
+ }
+ return err;
+}
+
+/* match information when loading configurations */
+#define CS_CFG_ETM4_MATCH_FLAGS (CS_CFG_MATCH_CLASS_SRC_ALL | \
+ CS_CFG_MATCH_CLASS_SRC_ETM4)
+
+int etm4_cscfg_register(struct coresight_device *csdev)
+{
+ struct cscfg_csdev_feat_ops ops;
+
+ ops.load_feat = &etm4_cfg_load_feature;
+
+ return cscfg_register_csdev(csdev, CS_CFG_ETM4_MATCH_FLAGS, &ops);
+}
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-cfg.h b/drivers/hwtracing/coresight/coresight-etm4x-cfg.h
new file mode 100644
index 000000000000..32dab34c1dac
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm4x-cfg.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2014-2020, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _CORESIGHT_ETM4X_CFG_H
+#define _CORESIGHT_ETM4X_CFG_H
+
+#include "coresight-config.h"
+#include "coresight-etm4x.h"
+
+/* ETMv4 specific config defines */
+
+/* resource IDs */
+
+#define ETM4_CFG_RES_CTR 0x001
+#define ETM4_CFG_RES_CMP 0x002
+#define ETM4_CFG_RES_CMP_PAIR0 0x003
+#define ETM4_CFG_RES_CMP_PAIR1 0x004
+#define ETM4_CFG_RES_SEL 0x005
+#define ETM4_CFG_RES_SEL_PAIR0 0x006
+#define ETM4_CFG_RES_SEL_PAIR1 0x007
+#define ETM4_CFG_RES_SEQ 0x008
+#define ETM4_CFG_RES_TS 0x009
+#define ETM4_CFG_RES_MASK 0x00F
+
+/* ETMv4 specific config functions */
+int etm4_cscfg_register(struct coresight_device *csdev);
+
+#endif /* CORESIGHT_ETM4X_CFG_H */
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index da27cd4a3c38..e24252eaf8e4 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -39,6 +39,8 @@
#include "coresight-etm4x.h"
#include "coresight-etm-perf.h"
+#include "coresight-etm4x-cfg.h"
+#include "coresight-syscfg.h"
static int boot_enable;
module_param(boot_enable, int, 0444);
@@ -561,12 +563,15 @@ out:
return ret;
}
-static int etm4_parse_event_config(struct etmv4_drvdata *drvdata,
+static int etm4_parse_event_config(struct coresight_device *csdev,
struct perf_event *event)
{
int ret = 0;
+ struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
struct etmv4_config *config = &drvdata->config;
struct perf_event_attr *attr = &event->attr;
+ unsigned long cfg_hash;
+ int preset;
/* Clear configuration from previous run */
memset(config, 0, sizeof(struct etmv4_config));
@@ -632,6 +637,20 @@ static int etm4_parse_event_config(struct etmv4_drvdata *drvdata,
/* bit[12], Return stack enable bit */
config->cfg |= BIT(12);
+ /*
+ * Set any selected configuration and preset.
+ *
+ * This extracts the values of PMU_FORMAT_ATTR(configid) and PMU_FORMAT_ATTR(preset)
+ * in the perf attributes defined in coresight-etm-perf.c.
+ * configid uses bits 63:32 of attr->config2, preset uses bits 3:0 of attr->config.
+ * A zero configid means no configuration active, preset = 0 means no preset selected.
+ */
+ if (attr->config2 & GENMASK_ULL(63, 32)) {
+ cfg_hash = (u32)(attr->config2 >> 32);
+ preset = attr->config & 0xF;
+ ret = cscfg_csdev_enable_active_config(csdev, cfg_hash, preset);
+ }
+
out:
return ret;
}
@@ -648,7 +667,7 @@ static int etm4_enable_perf(struct coresight_device *csdev,
}
/* Configure the tracer based on the session's specifics */
- ret = etm4_parse_event_config(drvdata, event);
+ ret = etm4_parse_event_config(csdev, event);
if (ret)
goto out;
/* And enable it */
@@ -794,11 +813,18 @@ static int etm4_disable_perf(struct coresight_device *csdev,
u32 control;
struct etm_filters *filters = event->hw.addr_filters;
struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+ struct perf_event_attr *attr = &event->attr;
if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id()))
return -EINVAL;
etm4_disable_hw(drvdata);
+ /*
+ * The config_id occupies bits 63:32 of the config2 perf event attr
+ * field. If this is non-zero then we will have enabled a config.
+ */
+ if (attr->config2 & GENMASK_ULL(63, 32))
+ cscfg_csdev_disable_active_config(csdev);
/*
* Check if the start/stop logic was active when the unit was stopped.
@@ -1939,6 +1965,13 @@ static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid)
return ret;
}
+ /* register with config infrastructure & load any current features */
+ ret = etm4_cscfg_register(drvdata->csdev);
+ if (ret) {
+ coresight_unregister(drvdata->csdev);
+ return ret;
+ }
+
etmdrvdata[drvdata->cpu] = drvdata;
dev_info(&drvdata->csdev->dev, "CPU%d: %s v%d.%d initialized\n",
@@ -2025,6 +2058,7 @@ static int __exit etm4_remove_dev(struct etmv4_drvdata *drvdata)
cpus_read_unlock();
+ cscfg_unregister_csdev(drvdata->csdev);
coresight_unregister(drvdata->csdev);
return 0;
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
index 007bad9e7ad8..a0640fa5c55b 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -9,6 +9,7 @@
#include <linux/sysfs.h>
#include "coresight-etm4x.h"
#include "coresight-priv.h"
+#include "coresight-syscfg.h"
static int etm4_set_mode_exclude(struct etmv4_drvdata *drvdata, bool exclude)
{
@@ -269,6 +270,8 @@ static ssize_t reset_store(struct device *dev,
spin_unlock(&drvdata->spinlock);
+ cscfg_csdev_reset_feats(to_coresight_device(dev));
+
return size;
}
static DEVICE_ATTR_WO(reset);
diff --git a/drivers/hwtracing/coresight/coresight-syscfg-configfs.c b/drivers/hwtracing/coresight/coresight-syscfg-configfs.c
new file mode 100644
index 000000000000..c547816b9000
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-syscfg-configfs.c
@@ -0,0 +1,396 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 Linaro Limited, All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include <linux/configfs.h>
+
+#include "coresight-syscfg-configfs.h"
+
+/* create a default ci_type. */
+static inline struct config_item_type *cscfg_create_ci_type(void)
+{
+ struct config_item_type *ci_type;
+
+ ci_type = devm_kzalloc(cscfg_device(), sizeof(*ci_type), GFP_KERNEL);
+ if (ci_type)
+ ci_type->ct_owner = THIS_MODULE;
+
+ return ci_type;
+}
+
+/* configurations sub-group */
+
+/* attributes for the config view group */
+static ssize_t cscfg_cfg_description_show(struct config_item *item, char *page)
+{
+ struct cscfg_fs_config *fs_config = container_of(to_config_group(item),
+ struct cscfg_fs_config, group);
+
+ return scnprintf(page, PAGE_SIZE, "%s", fs_config->config_desc->description);
+}
+CONFIGFS_ATTR_RO(cscfg_cfg_, description);
+
+static ssize_t cscfg_cfg_feature_refs_show(struct config_item *item, char *page)
+{
+ struct cscfg_fs_config *fs_config = container_of(to_config_group(item),
+ struct cscfg_fs_config, group);
+ const struct cscfg_config_desc *config_desc = fs_config->config_desc;
+ ssize_t ch_used = 0;
+ int i;
+
+ for (i = 0; i < config_desc->nr_feat_refs; i++)
+ ch_used += scnprintf(page + ch_used, PAGE_SIZE - ch_used,
+ "%s\n", config_desc->feat_ref_names[i]);
+ return ch_used;
+}
+CONFIGFS_ATTR_RO(cscfg_cfg_, feature_refs);
+
+/* list preset values in order of features and params */
+static ssize_t cscfg_cfg_values_show(struct config_item *item, char *page)
+{
+ const struct cscfg_feature_desc *feat_desc;
+ const struct cscfg_config_desc *config_desc;
+ struct cscfg_fs_preset *fs_preset;
+ int i, j, val_idx, preset_idx;
+ ssize_t used = 0;
+
+ fs_preset = container_of(to_config_group(item), struct cscfg_fs_preset, group);
+ config_desc = fs_preset->config_desc;
+
+ if (!config_desc->nr_presets)
+ return 0;
+
+ preset_idx = fs_preset->preset_num - 1;
+
+ /* start index on the correct array line */
+ val_idx = config_desc->nr_total_params * preset_idx;
+
+ /*
+ * A set of presets is the sum of all params in used features,
+ * in order of declaration of features and params in the features
+ */
+ for (i = 0; i < config_desc->nr_feat_refs; i++) {
+ feat_desc = cscfg_get_named_feat_desc(config_desc->feat_ref_names[i]);
+ for (j = 0; j < feat_desc->nr_params; j++) {
+ used += scnprintf(page + used, PAGE_SIZE - used,
+ "%s.%s = 0x%llx ",
+ feat_desc->name,
+ feat_desc->params_desc[j].name,
+ config_desc->presets[val_idx++]);
+ }
+ }
+ used += scnprintf(page + used, PAGE_SIZE - used, "\n");
+
+ return used;
+}
+CONFIGFS_ATTR_RO(cscfg_cfg_, values);
+
+static struct configfs_attribute *cscfg_config_view_attrs[] = {
+ &cscfg_cfg_attr_description,
+ &cscfg_cfg_attr_feature_refs,
+ NULL,
+};
+
+static struct config_item_type cscfg_config_view_type = {
+ .ct_owner = THIS_MODULE,
+ .ct_attrs = cscfg_config_view_attrs,
+};
+
+static struct configfs_attribute *cscfg_config_preset_attrs[] = {
+ &cscfg_cfg_attr_values,
+ NULL,
+};
+
+static struct config_item_type cscfg_config_preset_type = {
+ .ct_owner = THIS_MODULE,
+ .ct_attrs = cscfg_config_preset_attrs,
+};
+
+static int cscfg_add_preset_groups(struct cscfg_fs_config *cfg_view)
+{
+ int preset_num;
+ struct cscfg_fs_preset *cfg_fs_preset;
+ struct cscfg_config_desc *config_desc = cfg_view->config_desc;
+ char name[CONFIGFS_ITEM_NAME_LEN];
+
+ if (!config_desc->nr_presets)
+ return 0;
+
+ for (preset_num = 1; preset_num <= config_desc->nr_presets; preset_num++) {
+ cfg_fs_preset = devm_kzalloc(cscfg_device(),
+ sizeof(struct cscfg_fs_preset), GFP_KERNEL);
+
+ if (!cfg_fs_preset)
+ return -ENOMEM;
+
+ snprintf(name, CONFIGFS_ITEM_NAME_LEN, "preset%d", preset_num);
+ cfg_fs_preset->preset_num = preset_num;
+ cfg_fs_preset->config_desc = cfg_view->config_desc;
+ config_group_init_type_name(&cfg_fs_preset->group, name,
+ &cscfg_config_preset_type);
+ configfs_add_default_group(&cfg_fs_preset->group, &cfg_view->group);
+ }
+ return 0;
+}
+
+static struct config_group *cscfg_create_config_group(struct cscfg_config_desc *config_desc)
+{
+ struct cscfg_fs_config *cfg_view;
+ struct device *dev = cscfg_device();
+ int err;
+
+ if (!dev)
+ return ERR_PTR(-EINVAL);
+
+ cfg_view = devm_kzalloc(dev, sizeof(struct cscfg_fs_config), GFP_KERNEL);
+ if (!cfg_view)
+ return ERR_PTR(-ENOMEM);
+
+ cfg_view->config_desc = config_desc;
+ config_group_init_type_name(&cfg_view->group, config_desc->name, &cscfg_config_view_type);
+
+ /* add in a preset<n> dir for each preset */
+ err = cscfg_add_preset_groups(cfg_view);
+ if (err)
+ return ERR_PTR(err);
+
+ return &cfg_view->group;
+}
+
+/* attributes for features view */
+
+static ssize_t cscfg_feat_description_show(struct config_item *item, char *page)
+{
+ struct cscfg_fs_feature *fs_feat = container_of(to_config_group(item),
+ struct cscfg_fs_feature, group);
+
+ return scnprintf(page, PAGE_SIZE, "%s", fs_feat->feat_desc->description);
+}
+CONFIGFS_ATTR_RO(cscfg_feat_, description);
+
+static ssize_t cscfg_feat_matches_show(struct config_item *item, char *page)
+{
+ struct cscfg_fs_feature *fs_feat = container_of(to_config_group(item),
+ struct cscfg_fs_feature, group);
+ u32 match_flags = fs_feat->feat_desc->match_flags;
+ int used = 0;
+
+ if (match_flags & CS_CFG_MATCH_CLASS_SRC_ALL)
+ used = scnprintf(page, PAGE_SIZE, "SRC_ALL ");
+
+ if (match_flags & CS_CFG_MATCH_CLASS_SRC_ETM4)
+ used += scnprintf(page + used, PAGE_SIZE - used, "SRC_ETMV4 ");
+
+ used += scnprintf(page + used, PAGE_SIZE - used, "\n");
+ return used;
+}
+CONFIGFS_ATTR_RO(cscfg_feat_, matches);
+
+static ssize_t cscfg_feat_nr_params_show(struct config_item *item, char *page)
+{
+ struct cscfg_fs_feature *fs_feat = container_of(to_config_group(item),
+ struct cscfg_fs_feature, group);
+
+ return scnprintf(page, PAGE_SIZE, "%d\n", fs_feat->feat_desc->nr_params);
+}
+CONFIGFS_ATTR_RO(cscfg_feat_, nr_params);
+
+/* base feature desc attrib structures */
+static struct configfs_attribute *cscfg_feature_view_attrs[] = {
+ &cscfg_feat_attr_description,
+ &cscfg_feat_attr_matches,
+ &cscfg_feat_attr_nr_params,
+ NULL,
+};
+
+static struct config_item_type cscfg_feature_view_type = {
+ .ct_owner = THIS_MODULE,
+ .ct_attrs = cscfg_feature_view_attrs,
+};
+
+static ssize_t cscfg_param_value_show(struct config_item *item, char *page)
+{
+ struct cscfg_fs_param *param_item = container_of(to_config_group(item),
+ struct cscfg_fs_param, group);
+ u64 value = param_item->feat_desc->params_desc[param_item->param_idx].value;
+
+ return scnprintf(page, PAGE_SIZE, "0x%llx\n", value);
+}
+
+static ssize_t cscfg_param_value_store(struct config_item *item,
+ const char *page, size_t size)
+{
+ struct cscfg_fs_param *param_item = container_of(to_config_group(item),
+ struct cscfg_fs_param, group);
+ struct cscfg_feature_desc *feat_desc = param_item->feat_desc;
+ int param_idx = param_item->param_idx;
+ u64 value;
+ int err;
+
+ err = kstrtoull(page, 0, &value);
+ if (!err)
+ err = cscfg_update_feat_param_val(feat_desc, param_idx, value);
+
+ return err ? err : size;
+}
+CONFIGFS_ATTR(cscfg_param_, value);
+
+static struct configfs_attribute *cscfg_param_view_attrs[] = {
+ &cscfg_param_attr_value,
+ NULL,
+};
+
+static struct config_item_type cscfg_param_view_type = {
+ .ct_owner = THIS_MODULE,
+ .ct_attrs = cscfg_param_view_attrs,
+};
+
+/*
+ * configfs has far less functionality provided to add attributes dynamically than sysfs,
+ * and the show and store fns pass the enclosing config_item so the actual attribute cannot
+ * be determined. Therefore we add each item as a group directory, with a value attribute.
+ */
+static int cscfg_create_params_group_items(struct cscfg_feature_desc *feat_desc,
+ struct config_group *params_group)
+{
+ struct device *dev = cscfg_device();
+ struct cscfg_fs_param *param_item;
+ int i;
+
+ /* parameter items - as groups with default_value attribute */
+ for (i = 0; i < feat_desc->nr_params; i++) {
+ param_item = devm_kzalloc(dev, sizeof(struct cscfg_fs_param), GFP_KERNEL);
+ if (!param_item)
+ return -ENOMEM;
+ param_item->feat_desc = feat_desc;
+ param_item->param_idx = i;
+ config_group_init_type_name(&param_item->group,
+ feat_desc->params_desc[i].name,
+ &cscfg_param_view_type);
+ configfs_add_default_group(&param_item->group, params_group);
+ }
+ return 0;
+}
+
+static struct config_group *cscfg_create_feature_group(struct cscfg_feature_desc *feat_desc)
+{
+ struct cscfg_fs_feature *feat_view;
+ struct config_item_type *params_group_type;
+ struct config_group *params_group = NULL;
+ struct device *dev = cscfg_device();
+ int item_err;
+
+ if (!dev)
+ return ERR_PTR(-EINVAL);
+
+ feat_view = devm_kzalloc(dev, sizeof(struct cscfg_fs_feature), GFP_KERNEL);
+ if (!feat_view)
+ return ERR_PTR(-ENOMEM);
+
+ if (feat_desc->nr_params) {
+ params_group = devm_kzalloc(dev, sizeof(struct config_group), GFP_KERNEL);
+ if (!params_group)
+ return ERR_PTR(-ENOMEM);
+
+ params_group_type = cscfg_create_ci_type();
+ if (!params_group_type)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ feat_view->feat_desc = feat_desc;
+ config_group_init_type_name(&feat_view->group,
+ feat_desc->name,
+ &cscfg_feature_view_type);
+ if (params_group) {
+ config_group_init_type_name(params_group, "params", params_group_type);
+ configfs_add_default_group(params_group, &feat_view->group);
+ item_err = cscfg_create_params_group_items(feat_desc, params_group);
+ if (item_err)
+ return ERR_PTR(item_err);
+ }
+ return &feat_view->group;
+}
+
+static struct config_item_type cscfg_configs_type = {
+ .ct_owner = THIS_MODULE,
+};
+
+static struct config_group cscfg_configs_grp = {
+ .cg_item = {
+ .ci_namebuf = "configurations",
+ .ci_type = &cscfg_configs_type,
+ },
+};
+
+/* add configuration to configurations group */
+int cscfg_configfs_add_config(struct cscfg_config_desc *config_desc)
+{
+ struct config_group *new_group;
+ int err;
+
+ new_group = cscfg_create_config_group(config_desc);
+ if (IS_ERR(new_group))
+ return PTR_ERR(new_group);
+ err = configfs_register_group(&cscfg_configs_grp, new_group);
+ return err;
+}
+
+static struct config_item_type cscfg_features_type = {
+ .ct_owner = THIS_MODULE,
+};
+
+static struct config_group cscfg_features_grp = {
+ .cg_item = {
+ .ci_namebuf = "features",
+ .ci_type = &cscfg_features_type,
+ },
+};
+
+/* add feature to features group */
+int cscfg_configfs_add_feature(struct cscfg_feature_desc *feat_desc)
+{
+ struct config_group *new_group;
+ int err;
+
+ new_group = cscfg_create_feature_group(feat_desc);
+ if (IS_ERR(new_group))
+ return PTR_ERR(new_group);
+ err = configfs_register_group(&cscfg_features_grp, new_group);
+ return err;
+}
+
+int cscfg_configfs_init(struct cscfg_manager *cscfg_mgr)
+{
+ struct configfs_subsystem *subsys;
+ struct config_item_type *ci_type;
+
+ if (!cscfg_mgr)
+ return -EINVAL;
+
+ ci_type = cscfg_create_ci_type();
+ if (!ci_type)
+ return -ENOMEM;
+
+ subsys = &cscfg_mgr->cfgfs_subsys;
+ config_item_set_name(&subsys->su_group.cg_item, CSCFG_FS_SUBSYS_NAME);
+ subsys->su_group.cg_item.ci_type = ci_type;
+
+ config_group_init(&subsys->su_group);
+ mutex_init(&subsys->su_mutex);
+
+ /* Add default groups to subsystem */
+ config_group_init(&cscfg_configs_grp);
+ configfs_add_default_group(&cscfg_configs_grp, &subsys->su_group);
+
+ config_group_init(&cscfg_features_grp);
+ configfs_add_default_group(&cscfg_features_grp, &subsys->su_group);
+
+ return configfs_register_subsystem(subsys);
+}
+
+void cscfg_configfs_release(struct cscfg_manager *cscfg_mgr)
+{
+ configfs_unregister_subsystem(&cscfg_mgr->cfgfs_subsys);
+}
diff --git a/drivers/hwtracing/coresight/coresight-syscfg-configfs.h b/drivers/hwtracing/coresight/coresight-syscfg-configfs.h
new file mode 100644
index 000000000000..7d6ffe35ca4c
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-syscfg-configfs.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Coresight system configuration driver - support for configfs.
+ */
+
+#ifndef CORESIGHT_SYSCFG_CONFIGFS_H
+#define CORESIGHT_SYSCFG_CONFIGFS_H
+
+#include <linux/configfs.h>
+#include "coresight-syscfg.h"
+
+#define CSCFG_FS_SUBSYS_NAME "cs-syscfg"
+
+/* container for configuration view */
+struct cscfg_fs_config {
+ struct cscfg_config_desc *config_desc;
+ struct config_group group;
+};
+
+/* container for feature view */
+struct cscfg_fs_feature {
+ struct cscfg_feature_desc *feat_desc;
+ struct config_group group;
+};
+
+/* container for parameter view */
+struct cscfg_fs_param {
+ int param_idx;
+ struct cscfg_feature_desc *feat_desc;
+ struct config_group group;
+};
+
+/* container for preset view */
+struct cscfg_fs_preset {
+ int preset_num;
+ struct cscfg_config_desc *config_desc;
+ struct config_group group;
+};
+
+int cscfg_configfs_init(struct cscfg_manager *cscfg_mgr);
+void cscfg_configfs_release(struct cscfg_manager *cscfg_mgr);
+int cscfg_configfs_add_config(struct cscfg_config_desc *config_desc);
+int cscfg_configfs_add_feature(struct cscfg_feature_desc *feat_desc);
+
+#endif /* CORESIGHT_SYSCFG_CONFIGFS_H */
diff --git a/drivers/hwtracing/coresight/coresight-syscfg.c b/drivers/hwtracing/coresight/coresight-syscfg.c
new file mode 100644
index 000000000000..fc0760f55c53
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-syscfg.c
@@ -0,0 +1,847 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 Linaro Limited, All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include <linux/platform_device.h>
+
+#include "coresight-config.h"
+#include "coresight-etm-perf.h"
+#include "coresight-syscfg.h"
+#include "coresight-syscfg-configfs.h"
+
+/*
+ * cscfg_ API manages configurations and features for the entire coresight
+ * infrastructure.
+ *
+ * It allows the loading of configurations and features, and loads these into
+ * coresight devices as appropriate.
+ */
+
+/* protect the cscsg_data and device */
+static DEFINE_MUTEX(cscfg_mutex);
+
+/* only one of these */
+static struct cscfg_manager *cscfg_mgr;
+
+/* load features and configuations into the lists */
+
+/* get name feature instance from a coresight device list of features */
+static struct cscfg_feature_csdev *
+cscfg_get_feat_csdev(struct coresight_device *csdev, const char *name)
+{
+ struct cscfg_feature_csdev *feat_csdev = NULL;
+
+ list_for_each_entry(feat_csdev, &csdev->feature_csdev_list, node) {
+ if (strcmp(feat_csdev->feat_desc->name, name) == 0)
+ return feat_csdev;
+ }
+ return NULL;
+}
+
+/* allocate the device config instance - with max number of used features */
+static struct cscfg_config_csdev *
+cscfg_alloc_csdev_cfg(struct coresight_device *csdev, int nr_feats)
+{
+ struct cscfg_config_csdev *config_csdev = NULL;
+ struct device *dev = csdev->dev.parent;
+
+ /* this is being allocated using the devm for the coresight device */
+ config_csdev = devm_kzalloc(dev,
+ offsetof(struct cscfg_config_csdev, feats_csdev[nr_feats]),
+ GFP_KERNEL);
+ if (!config_csdev)
+ return NULL;
+
+ config_csdev->csdev = csdev;
+ return config_csdev;
+}
+
+/* Load a config into a device if there are any feature matches between config and device */
+static int cscfg_add_csdev_cfg(struct coresight_device *csdev,
+ struct cscfg_config_desc *config_desc)
+{
+ struct cscfg_config_csdev *config_csdev = NULL;
+ struct cscfg_feature_csdev *feat_csdev;
+ unsigned long flags;
+ int i;
+
+ /* look at each required feature and see if it matches any feature on the device */
+ for (i = 0; i < config_desc->nr_feat_refs; i++) {
+ /* look for a matching name */
+ feat_csdev = cscfg_get_feat_csdev(csdev, config_desc->feat_ref_names[i]);
+ if (feat_csdev) {
+ /*
+ * At least one feature on this device matches the config
+ * add a config instance to the device and a reference to the feature.
+ */
+ if (!config_csdev) {
+ config_csdev = cscfg_alloc_csdev_cfg(csdev,
+ config_desc->nr_feat_refs);
+ if (!config_csdev)
+ return -ENOMEM;
+ config_csdev->config_desc = config_desc;
+ }
+ config_csdev->feats_csdev[config_csdev->nr_feat++] = feat_csdev;
+ }
+ }
+ /* if matched features, add config to device.*/
+ if (config_csdev) {
+ spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags);
+ list_add(&config_csdev->node, &csdev->config_csdev_list);
+ spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags);
+ }
+
+ return 0;
+}
+
+/*
+ * Add the config to the set of registered devices - call with mutex locked.
+ * Iterates through devices - any device that matches one or more of the
+ * configuration features will load it, the others will ignore it.
+ */
+static int cscfg_add_cfg_to_csdevs(struct cscfg_config_desc *config_desc)
+{
+ struct cscfg_registered_csdev *csdev_item;
+ int err;
+
+ list_for_each_entry(csdev_item, &cscfg_mgr->csdev_desc_list, item) {
+ err = cscfg_add_csdev_cfg(csdev_item->csdev, config_desc);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+/*
+ * Allocate a feature object for load into a csdev.
+ * memory allocated using the csdev->dev object using devm managed allocator.
+ */
+static struct cscfg_feature_csdev *
+cscfg_alloc_csdev_feat(struct coresight_device *csdev, struct cscfg_feature_desc *feat_desc)
+{
+ struct cscfg_feature_csdev *feat_csdev = NULL;
+ struct device *dev = csdev->dev.parent;
+ int i;
+
+ feat_csdev = devm_kzalloc(dev, sizeof(struct cscfg_feature_csdev), GFP_KERNEL);
+ if (!feat_csdev)
+ return NULL;
+
+ /* parameters are optional - could be 0 */
+ feat_csdev->nr_params = feat_desc->nr_params;
+
+ /*
+ * if we need parameters, zero alloc the space here, the load routine in
+ * the csdev device driver will fill out some information according to
+ * feature descriptor.
+ */
+ if (feat_csdev->nr_params) {
+ feat_csdev->params_csdev = devm_kcalloc(dev, feat_csdev->nr_params,
+ sizeof(struct cscfg_parameter_csdev),
+ GFP_KERNEL);
+ if (!feat_csdev->params_csdev)
+ return NULL;
+
+ /*
+ * fill in the feature reference in the param - other fields
+ * handled by loader in csdev.
+ */
+ for (i = 0; i < feat_csdev->nr_params; i++)
+ feat_csdev->params_csdev[i].feat_csdev = feat_csdev;
+ }
+
+ /*
+ * Always have registers to program - again the load routine in csdev device
+ * will fill out according to feature descriptor and device requirements.
+ */
+ feat_csdev->nr_regs = feat_desc->nr_regs;
+ feat_csdev->regs_csdev = devm_kcalloc(dev, feat_csdev->nr_regs,
+ sizeof(struct cscfg_regval_csdev),
+ GFP_KERNEL);
+ if (!feat_csdev->regs_csdev)
+ return NULL;
+
+ /* load the feature default values */
+ feat_csdev->feat_desc = feat_desc;
+ feat_csdev->csdev = csdev;
+
+ return feat_csdev;
+}
+
+/* load one feature into one coresight device */
+static int cscfg_load_feat_csdev(struct coresight_device *csdev,
+ struct cscfg_feature_desc *feat_desc,
+ struct cscfg_csdev_feat_ops *ops)
+{
+ struct cscfg_feature_csdev *feat_csdev;
+ unsigned long flags;
+ int err;
+
+ if (!ops->load_feat)
+ return -EINVAL;
+
+ feat_csdev = cscfg_alloc_csdev_feat(csdev, feat_desc);
+ if (!feat_csdev)
+ return -ENOMEM;
+
+ /* load the feature into the device */
+ err = ops->load_feat(csdev, feat_csdev);
+ if (err)
+ return err;
+
+ /* add to internal csdev feature list & initialise using reset call */
+ cscfg_reset_feat(feat_csdev);
+ spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags);
+ list_add(&feat_csdev->node, &csdev->feature_csdev_list);
+ spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags);
+
+ return 0;
+}
+
+/*
+ * Add feature to any matching devices - call with mutex locked.
+ * Iterates through devices - any device that matches the feature will be
+ * called to load it.
+ */
+static int cscfg_add_feat_to_csdevs(struct cscfg_feature_desc *feat_desc)
+{
+ struct cscfg_registered_csdev *csdev_item;
+ int err;
+
+ list_for_each_entry(csdev_item, &cscfg_mgr->csdev_desc_list, item) {
+ if (csdev_item->match_flags & feat_desc->match_flags) {
+ err = cscfg_load_feat_csdev(csdev_item->csdev, feat_desc, &csdev_item->ops);
+ if (err)
+ return err;
+ }
+ }
+ return 0;
+}
+
+/* check feature list for a named feature - call with mutex locked. */
+static bool cscfg_match_list_feat(const char *name)
+{
+ struct cscfg_feature_desc *feat_desc;
+
+ list_for_each_entry(feat_desc, &cscfg_mgr->feat_desc_list, item) {
+ if (strcmp(feat_desc->name, name) == 0)
+ return true;
+ }
+ return false;
+}
+
+/* check all feat needed for cfg are in the list - call with mutex locked. */
+static int cscfg_check_feat_for_cfg(struct cscfg_config_desc *config_desc)
+{
+ int i;
+
+ for (i = 0; i < config_desc->nr_feat_refs; i++)
+ if (!cscfg_match_list_feat(config_desc->feat_ref_names[i]))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * load feature - add to feature list.
+ */
+static int cscfg_load_feat(struct cscfg_feature_desc *feat_desc)
+{
+ int err;
+
+ /* add feature to any matching registered devices */
+ err = cscfg_add_feat_to_csdevs(feat_desc);
+ if (err)
+ return err;
+
+ list_add(&feat_desc->item, &cscfg_mgr->feat_desc_list);
+ return 0;
+}
+
+/*
+ * load config into the system - validate used features exist then add to
+ * config list.
+ */
+static int cscfg_load_config(struct cscfg_config_desc *config_desc)
+{
+ int err;
+
+ /* validate features are present */
+ err = cscfg_check_feat_for_cfg(config_desc);
+ if (err)
+ return err;
+
+ /* add config to any matching registered device */
+ err = cscfg_add_cfg_to_csdevs(config_desc);
+ if (err)
+ return err;
+
+ /* add config to perf fs to allow selection */
+ err = etm_perf_add_symlink_cscfg(cscfg_device(), config_desc);
+ if (err)
+ return err;
+
+ list_add(&config_desc->item, &cscfg_mgr->config_desc_list);
+ atomic_set(&config_desc->active_cnt, 0);
+ return 0;
+}
+
+/* get a feature descriptor by name */
+const struct cscfg_feature_desc *cscfg_get_named_feat_desc(const char *name)
+{
+ const struct cscfg_feature_desc *feat_desc = NULL, *feat_desc_item;
+
+ mutex_lock(&cscfg_mutex);
+
+ list_for_each_entry(feat_desc_item, &cscfg_mgr->feat_desc_list, item) {
+ if (strcmp(feat_desc_item->name, name) == 0) {
+ feat_desc = feat_desc_item;
+ break;
+ }
+ }
+
+ mutex_unlock(&cscfg_mutex);
+ return feat_desc;
+}
+
+/* called with cscfg_mutex held */
+static struct cscfg_feature_csdev *
+cscfg_csdev_get_feat_from_desc(struct coresight_device *csdev,
+ struct cscfg_feature_desc *feat_desc)
+{
+ struct cscfg_feature_csdev *feat_csdev;
+
+ list_for_each_entry(feat_csdev, &csdev->feature_csdev_list, node) {
+ if (feat_csdev->feat_desc == feat_desc)
+ return feat_csdev;
+ }
+ return NULL;
+}
+
+int cscfg_update_feat_param_val(struct cscfg_feature_desc *feat_desc,
+ int param_idx, u64 value)
+{
+ int err = 0;
+ struct cscfg_feature_csdev *feat_csdev;
+ struct cscfg_registered_csdev *csdev_item;
+
+ mutex_lock(&cscfg_mutex);
+
+ /* check if any config active & return busy */
+ if (atomic_read(&cscfg_mgr->sys_active_cnt)) {
+ err = -EBUSY;
+ goto unlock_exit;
+ }
+
+ /* set the value */
+ if ((param_idx < 0) || (param_idx >= feat_desc->nr_params)) {
+ err = -EINVAL;
+ goto unlock_exit;
+ }
+ feat_desc->params_desc[param_idx].value = value;
+
+ /* update loaded instances.*/
+ list_for_each_entry(csdev_item, &cscfg_mgr->csdev_desc_list, item) {
+ feat_csdev = cscfg_csdev_get_feat_from_desc(csdev_item->csdev, feat_desc);
+ if (feat_csdev)
+ feat_csdev->params_csdev[param_idx].current_value = value;
+ }
+
+unlock_exit:
+ mutex_unlock(&cscfg_mutex);
+ return err;
+}
+
+/**
+ * cscfg_load_config_sets - API function to load feature and config sets.
+ *
+ * Take a 0 terminated array of feature descriptors and/or configuration
+ * descriptors and load into the system.
+ * Features are loaded first to ensure configuration dependencies can be met.
+ *
+ * @config_descs: 0 terminated array of configuration descriptors.
+ * @feat_descs: 0 terminated array of feature descriptors.
+ */
+int cscfg_load_config_sets(struct cscfg_config_desc **config_descs,
+ struct cscfg_feature_desc **feat_descs)
+{
+ int err, i = 0;
+
+ mutex_lock(&cscfg_mutex);
+
+ /* load features first */
+ if (feat_descs) {
+ while (feat_descs[i]) {
+ err = cscfg_load_feat(feat_descs[i]);
+ if (!err)
+ err = cscfg_configfs_add_feature(feat_descs[i]);
+ if (err) {
+ pr_err("coresight-syscfg: Failed to load feature %s\n",
+ feat_descs[i]->name);
+ goto exit_unlock;
+ }
+ i++;
+ }
+ }
+
+ /* next any configurations to check feature dependencies */
+ i = 0;
+ if (config_descs) {
+ while (config_descs[i]) {
+ err = cscfg_load_config(config_descs[i]);
+ if (!err)
+ err = cscfg_configfs_add_config(config_descs[i]);
+ if (err) {
+ pr_err("coresight-syscfg: Failed to load configuration %s\n",
+ config_descs[i]->name);
+ goto exit_unlock;
+ }
+ i++;
+ }
+ }
+
+exit_unlock:
+ mutex_unlock(&cscfg_mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(cscfg_load_config_sets);
+
+/* Handle coresight device registration and add configs and features to devices */
+
+/* iterate through config lists and load matching configs to device */
+static int cscfg_add_cfgs_csdev(struct coresight_device *csdev)
+{
+ struct cscfg_config_desc *config_desc;
+ int err = 0;
+
+ list_for_each_entry(config_desc, &cscfg_mgr->config_desc_list, item) {
+ err = cscfg_add_csdev_cfg(csdev, config_desc);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+/* iterate through feature lists and load matching features to device */
+static int cscfg_add_feats_csdev(struct coresight_device *csdev,
+ u32 match_flags,
+ struct cscfg_csdev_feat_ops *ops)
+{
+ struct cscfg_feature_desc *feat_desc;
+ int err = 0;
+
+ if (!ops->load_feat)
+ return -EINVAL;
+
+ list_for_each_entry(feat_desc, &cscfg_mgr->feat_desc_list, item) {
+ if (feat_desc->match_flags & match_flags) {
+ err = cscfg_load_feat_csdev(csdev, feat_desc, ops);
+ if (err)
+ break;
+ }
+ }
+ return err;
+}
+
+/* Add coresight device to list and copy its matching info */
+static int cscfg_list_add_csdev(struct coresight_device *csdev,
+ u32 match_flags,
+ struct cscfg_csdev_feat_ops *ops)
+{
+ struct cscfg_registered_csdev *csdev_item;
+
+ /* allocate the list entry structure */
+ csdev_item = kzalloc(sizeof(struct cscfg_registered_csdev), GFP_KERNEL);
+ if (!csdev_item)
+ return -ENOMEM;
+
+ csdev_item->csdev = csdev;
+ csdev_item->match_flags = match_flags;
+ csdev_item->ops.load_feat = ops->load_feat;
+ list_add(&csdev_item->item, &cscfg_mgr->csdev_desc_list);
+
+ INIT_LIST_HEAD(&csdev->feature_csdev_list);
+ INIT_LIST_HEAD(&csdev->config_csdev_list);
+ spin_lock_init(&csdev->cscfg_csdev_lock);
+
+ return 0;
+}
+
+/* remove a coresight device from the list and free data */
+static void cscfg_list_remove_csdev(struct coresight_device *csdev)
+{
+ struct cscfg_registered_csdev *csdev_item, *tmp;
+
+ list_for_each_entry_safe(csdev_item, tmp, &cscfg_mgr->csdev_desc_list, item) {
+ if (csdev_item->csdev == csdev) {
+ list_del(&csdev_item->item);
+ kfree(csdev_item);
+ break;
+ }
+ }
+}
+
+/**
+ * cscfg_register_csdev - register a coresight device with the syscfg manager.
+ *
+ * Registers the coresight device with the system. @match_flags used to check
+ * if the device is a match for registered features. Any currently registered
+ * configurations and features that match the device will be loaded onto it.
+ *
+ * @csdev: The coresight device to register.
+ * @match_flags: Matching information to load features.
+ * @ops: Standard operations supported by the device.
+ */
+int cscfg_register_csdev(struct coresight_device *csdev,
+ u32 match_flags,
+ struct cscfg_csdev_feat_ops *ops)
+{
+ int ret = 0;
+
+ mutex_lock(&cscfg_mutex);
+
+ /* add device to list of registered devices */
+ ret = cscfg_list_add_csdev(csdev, match_flags, ops);
+ if (ret)
+ goto reg_csdev_unlock;
+
+ /* now load any registered features and configs matching the device. */
+ ret = cscfg_add_feats_csdev(csdev, match_flags, ops);
+ if (ret) {
+ cscfg_list_remove_csdev(csdev);
+ goto reg_csdev_unlock;
+ }
+
+ ret = cscfg_add_cfgs_csdev(csdev);
+ if (ret) {
+ cscfg_list_remove_csdev(csdev);
+ goto reg_csdev_unlock;
+ }
+
+ pr_info("CSCFG registered %s", dev_name(&csdev->dev));
+
+reg_csdev_unlock:
+ mutex_unlock(&cscfg_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cscfg_register_csdev);
+
+/**
+ * cscfg_unregister_csdev - remove coresight device from syscfg manager.
+ *
+ * @csdev: Device to remove.
+ */
+void cscfg_unregister_csdev(struct coresight_device *csdev)
+{
+ mutex_lock(&cscfg_mutex);
+ cscfg_list_remove_csdev(csdev);
+ mutex_unlock(&cscfg_mutex);
+}
+EXPORT_SYMBOL_GPL(cscfg_unregister_csdev);
+
+/**
+ * cscfg_csdev_reset_feats - reset features for a CoreSight device.
+ *
+ * Resets all parameters and register values for any features loaded
+ * into @csdev to their default values.
+ *
+ * @csdev: The CoreSight device.
+ */
+void cscfg_csdev_reset_feats(struct coresight_device *csdev)
+{
+ struct cscfg_feature_csdev *feat_csdev;
+ unsigned long flags;
+
+ spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags);
+ if (list_empty(&csdev->feature_csdev_list))
+ goto unlock_exit;
+
+ list_for_each_entry(feat_csdev, &csdev->feature_csdev_list, node)
+ cscfg_reset_feat(feat_csdev);
+
+unlock_exit:
+ spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags);
+}
+EXPORT_SYMBOL_GPL(cscfg_csdev_reset_feats);
+
+/**
+ * cscfg_activate_config - Mark a configuration descriptor as active.
+ *
+ * This will be seen when csdev devices are enabled in the system.
+ * Only activated configurations can be enabled on individual devices.
+ * Activation protects the configuration from alteration or removal while
+ * active.
+ *
+ * Selection by hash value - generated from the configuration name when it
+ * was loaded and added to the cs_etm/configurations file system for selection
+ * by perf.
+ *
+ * Increments the configuration descriptor active count and the global active
+ * count.
+ *
+ * @cfg_hash: Hash value of the selected configuration name.
+ */
+int cscfg_activate_config(unsigned long cfg_hash)
+{
+ struct cscfg_config_desc *config_desc;
+ int err = -EINVAL;
+
+ mutex_lock(&cscfg_mutex);
+
+ list_for_each_entry(config_desc, &cscfg_mgr->config_desc_list, item) {
+ if ((unsigned long)config_desc->event_ea->var == cfg_hash) {
+ /*
+ * increment the global active count - control changes to
+ * active configurations
+ */
+ atomic_inc(&cscfg_mgr->sys_active_cnt);
+
+ /*
+ * mark the descriptor as active so enable config on a
+ * device instance will use it
+ */
+ atomic_inc(&config_desc->active_cnt);
+
+ err = 0;
+ dev_dbg(cscfg_device(), "Activate config %s.\n", config_desc->name);
+ break;
+ }
+ }
+ mutex_unlock(&cscfg_mutex);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(cscfg_activate_config);
+
+/**
+ * cscfg_deactivate_config - Mark a config descriptor as inactive.
+ *
+ * Decrement the configuration and global active counts.
+ *
+ * @cfg_hash: Hash value of the selected configuration name.
+ */
+void cscfg_deactivate_config(unsigned long cfg_hash)
+{
+ struct cscfg_config_desc *config_desc;
+
+ mutex_lock(&cscfg_mutex);
+
+ list_for_each_entry(config_desc, &cscfg_mgr->config_desc_list, item) {
+ if ((unsigned long)config_desc->event_ea->var == cfg_hash) {
+ atomic_dec(&config_desc->active_cnt);
+ atomic_dec(&cscfg_mgr->sys_active_cnt);
+ dev_dbg(cscfg_device(), "Deactivate config %s.\n", config_desc->name);
+ break;
+ }
+ }
+ mutex_unlock(&cscfg_mutex);
+}
+EXPORT_SYMBOL_GPL(cscfg_deactivate_config);
+
+/**
+ * cscfg_csdev_enable_active_config - Enable matching active configuration for device.
+ *
+ * Enables the configuration selected by @cfg_hash if the configuration is supported
+ * on the device and has been activated.
+ *
+ * If active and supported the CoreSight device @csdev will be programmed with the
+ * configuration, using @preset parameters.
+ *
+ * Should be called before driver hardware enable for the requested device, prior to
+ * programming and enabling the physical hardware.
+ *
+ * @csdev: CoreSight device to program.
+ * @cfg_hash: Selector for the configuration.
+ * @preset: Preset parameter values to use, 0 for current / default values.
+ */
+int cscfg_csdev_enable_active_config(struct coresight_device *csdev,
+ unsigned long cfg_hash, int preset)
+{
+ struct cscfg_config_csdev *config_csdev_active = NULL, *config_csdev_item;
+ const struct cscfg_config_desc *config_desc;
+ unsigned long flags;
+ int err = 0;
+
+ /* quickly check global count */
+ if (!atomic_read(&cscfg_mgr->sys_active_cnt))
+ return 0;
+
+ /*
+ * Look for matching configuration - set the active configuration
+ * context if found.
+ */
+ spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags);
+ list_for_each_entry(config_csdev_item, &csdev->config_csdev_list, node) {
+ config_desc = config_csdev_item->config_desc;
+ if ((atomic_read(&config_desc->active_cnt)) &&
+ ((unsigned long)config_desc->event_ea->var == cfg_hash)) {
+ config_csdev_active = config_csdev_item;
+ csdev->active_cscfg_ctxt = (void *)config_csdev_active;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags);
+
+ /*
+ * If found, attempt to enable
+ */
+ if (config_csdev_active) {
+ /*
+ * Call the generic routine that will program up the internal
+ * driver structures prior to programming up the hardware.
+ * This routine takes the driver spinlock saved in the configs.
+ */
+ err = cscfg_csdev_enable_config(config_csdev_active, preset);
+ if (!err) {
+ /*
+ * Successful programming. Check the active_cscfg_ctxt
+ * pointer to ensure no pre-emption disabled it via
+ * cscfg_csdev_disable_active_config() before
+ * we could start.
+ *
+ * Set enabled if OK, err if not.
+ */
+ spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags);
+ if (csdev->active_cscfg_ctxt)
+ config_csdev_active->enabled = true;
+ else
+ err = -EBUSY;
+ spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags);
+ }
+ }
+ return err;
+}
+EXPORT_SYMBOL_GPL(cscfg_csdev_enable_active_config);
+
+/**
+ * cscfg_csdev_disable_active_config - disable an active config on the device.
+ *
+ * Disables the active configuration on the CoreSight device @csdev.
+ * Disable will save the values of any registers marked in the configurations
+ * as save on disable.
+ *
+ * Should be called after driver hardware disable for the requested device,
+ * after disabling the physical hardware and reading back registers.
+ *
+ * @csdev: The CoreSight device.
+ */
+void cscfg_csdev_disable_active_config(struct coresight_device *csdev)
+{
+ struct cscfg_config_csdev *config_csdev;
+ unsigned long flags;
+
+ /*
+ * Check if we have an active config, and that it was successfully enabled.
+ * If it was not enabled, we have no work to do, otherwise mark as disabled.
+ * Clear the active config pointer.
+ */
+ spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags);
+ config_csdev = (struct cscfg_config_csdev *)csdev->active_cscfg_ctxt;
+ if (config_csdev) {
+ if (!config_csdev->enabled)
+ config_csdev = NULL;
+ else
+ config_csdev->enabled = false;
+ }
+ csdev->active_cscfg_ctxt = NULL;
+ spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags);
+
+ /* true if there was an enabled active config */
+ if (config_csdev)
+ cscfg_csdev_disable_config(config_csdev);
+}
+EXPORT_SYMBOL_GPL(cscfg_csdev_disable_active_config);
+
+/* Initialise system configuration management device. */
+
+struct device *cscfg_device(void)
+{
+ return cscfg_mgr ? &cscfg_mgr->dev : NULL;
+}
+
+/* Must have a release function or the kernel will complain on module unload */
+static void cscfg_dev_release(struct device *dev)
+{
+ kfree(cscfg_mgr);
+ cscfg_mgr = NULL;
+}
+
+/* a device is needed to "own" some kernel elements such as sysfs entries. */
+static int cscfg_create_device(void)
+{
+ struct device *dev;
+ int err = -ENOMEM;
+
+ mutex_lock(&cscfg_mutex);
+ if (cscfg_mgr) {
+ err = -EINVAL;
+ goto create_dev_exit_unlock;
+ }
+
+ cscfg_mgr = kzalloc(sizeof(struct cscfg_manager), GFP_KERNEL);
+ if (!cscfg_mgr)
+ goto create_dev_exit_unlock;
+
+ /* setup the device */
+ dev = cscfg_device();
+ dev->release = cscfg_dev_release;
+ dev->init_name = "cs_system_cfg";
+
+ err = device_register(dev);
+ if (err)
+ cscfg_dev_release(dev);
+
+create_dev_exit_unlock:
+ mutex_unlock(&cscfg_mutex);
+ return err;
+}
+
+static void cscfg_clear_device(void)
+{
+ struct cscfg_config_desc *cfg_desc;
+
+ mutex_lock(&cscfg_mutex);
+ list_for_each_entry(cfg_desc, &cscfg_mgr->config_desc_list, item) {
+ etm_perf_del_symlink_cscfg(cfg_desc);
+ }
+ cscfg_configfs_release(cscfg_mgr);
+ device_unregister(cscfg_device());
+ mutex_unlock(&cscfg_mutex);
+}
+
+/* Initialise system config management API device */
+int __init cscfg_init(void)
+{
+ int err = 0;
+
+ err = cscfg_create_device();
+ if (err)
+ return err;
+
+ err = cscfg_configfs_init(cscfg_mgr);
+ if (err)
+ goto exit_err;
+
+ INIT_LIST_HEAD(&cscfg_mgr->csdev_desc_list);
+ INIT_LIST_HEAD(&cscfg_mgr->feat_desc_list);
+ INIT_LIST_HEAD(&cscfg_mgr->config_desc_list);
+ atomic_set(&cscfg_mgr->sys_active_cnt, 0);
+
+ /* preload built-in configurations */
+ err = cscfg_preload();
+ if (err)
+ goto exit_err;
+
+ dev_info(cscfg_device(), "CoreSight Configuration manager initialised");
+ return 0;
+
+exit_err:
+ cscfg_clear_device();
+ return err;
+}
+
+void cscfg_exit(void)
+{
+ cscfg_clear_device();
+}
diff --git a/drivers/hwtracing/coresight/coresight-syscfg.h b/drivers/hwtracing/coresight/coresight-syscfg.h
new file mode 100644
index 000000000000..8d018efd6ead
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-syscfg.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Coresight system configuration driver.
+ */
+
+#ifndef CORESIGHT_SYSCFG_H
+#define CORESIGHT_SYSCFG_H
+
+#include <linux/configfs.h>
+#include <linux/coresight.h>
+#include <linux/device.h>
+
+#include "coresight-config.h"
+
+/**
+ * System configuration manager device.
+ *
+ * Contains lists of the loaded configurations and features, plus a list of CoreSight devices
+ * registered with the system as supporting configuration management.
+ *
+ * Need a device to 'own' some coresight system wide sysfs entries in
+ * perf events, configfs etc.
+ *
+ * @dev: The device.
+ * @csdev_desc_list: List of coresight devices registered with the configuration manager.
+ * @feat_desc_list: List of feature descriptors to load into registered devices.
+ * @config_desc_list: List of system configuration descriptors to load into registered devices.
+ * @sys_active_cnt: Total number of active config descriptor references.
+ * @cfgfs_subsys: configfs subsystem used to manage configurations.
+ */
+struct cscfg_manager {
+ struct device dev;
+ struct list_head csdev_desc_list;
+ struct list_head feat_desc_list;
+ struct list_head config_desc_list;
+ atomic_t sys_active_cnt;
+ struct configfs_subsystem cfgfs_subsys;
+};
+
+/* get reference to dev in cscfg_manager */
+struct device *cscfg_device(void);
+
+/**
+ * List entry for Coresight devices that are registered as supporting complex
+ * config operations.
+ *
+ * @csdev: The registered device.
+ * @match_flags: The matching type information for adding features.
+ * @ops: Operations supported by the registered device.
+ * @item: list entry.
+ */
+struct cscfg_registered_csdev {
+ struct coresight_device *csdev;
+ u32 match_flags;
+ struct cscfg_csdev_feat_ops ops;
+ struct list_head item;
+};
+
+/* internal core operations for cscfg */
+int __init cscfg_init(void);
+void cscfg_exit(void);
+int cscfg_preload(void);
+const struct cscfg_feature_desc *cscfg_get_named_feat_desc(const char *name);
+int cscfg_update_feat_param_val(struct cscfg_feature_desc *feat_desc,
+ int param_idx, u64 value);
+
+
+/* syscfg manager external API */
+int cscfg_load_config_sets(struct cscfg_config_desc **cfg_descs,
+ struct cscfg_feature_desc **feat_descs);
+int cscfg_register_csdev(struct coresight_device *csdev, u32 match_flags,
+ struct cscfg_csdev_feat_ops *ops);
+void cscfg_unregister_csdev(struct coresight_device *csdev);
+int cscfg_activate_config(unsigned long cfg_hash);
+void cscfg_deactivate_config(unsigned long cfg_hash);
+void cscfg_csdev_reset_feats(struct coresight_device *csdev);
+int cscfg_csdev_enable_active_config(struct coresight_device *csdev,
+ unsigned long cfg_hash, int preset);
+void cscfg_csdev_disable_active_config(struct coresight_device *csdev);
+
+#endif /* CORESIGHT_SYSCFG_H */
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 10acece9d7b9..e17790fe35a7 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -866,15 +866,6 @@ config I2C_PCA_PLATFORM
This driver can also be built as a module. If so, the module
will be called i2c-pca-platform.
-config I2C_PMCMSP
- tristate "PMC MSP I2C TWI Controller"
- depends on PMC_MSP || COMPILE_TEST
- help
- This driver supports the PMC TWI controller on MSP devices.
-
- This driver can also be built as module. If so, the module
- will be called i2c-pmcmsp.
-
config I2C_PNX
tristate "I2C bus support for Philips PNX and NXP LPC targets"
depends on ARCH_LPC32XX || COMPILE_TEST
@@ -1402,4 +1393,15 @@ config I2C_FSI
This driver can also be built as a module. If so, the module will be
called as i2c-fsi.
+config I2C_VIRTIO
+ tristate "Virtio I2C Adapter"
+ select VIRTIO
+ help
+ If you say yes to this option, support will be included for the virtio
+ I2C adapter driver. The hardware can be emulated by any device model
+ software according to the virtio protocol.
+
+ This driver can also be built as a module. If so, the module
+ will be called i2c-virtio.
+
endmenu
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 69e9963615f6..1336b04f40e2 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -86,7 +86,6 @@ obj-$(CONFIG_I2C_OMAP) += i2c-omap.o
obj-$(CONFIG_I2C_OWL) += i2c-owl.o
obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi.o
obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o
-obj-$(CONFIG_I2C_PMCMSP) += i2c-pmcmsp.o
obj-$(CONFIG_I2C_PNX) += i2c-pnx.o
obj-$(CONFIG_I2C_PXA) += i2c-pxa.o
obj-$(CONFIG_I2C_PXA_PCI) += i2c-pxa-pci.o
@@ -146,5 +145,6 @@ obj-$(CONFIG_I2C_SIBYTE) += i2c-sibyte.o
obj-$(CONFIG_I2C_XGENE_SLIMPRO) += i2c-xgene-slimpro.o
obj-$(CONFIG_SCx200_ACB) += scx200_acb.o
obj-$(CONFIG_I2C_FSI) += i2c-fsi.o
+obj-$(CONFIG_I2C_VIRTIO) += i2c-virtio.o
ccflags-$(CONFIG_I2C_DEBUG_BUS) := -DDEBUG
diff --git a/drivers/i2c/busses/i2c-at91-core.c b/drivers/i2c/busses/i2c-at91-core.c
index e14edd236108..2df9df585131 100644
--- a/drivers/i2c/busses/i2c-at91-core.c
+++ b/drivers/i2c/busses/i2c-at91-core.c
@@ -286,9 +286,7 @@ static int at91_twi_remove(struct platform_device *pdev)
return 0;
}
-#ifdef CONFIG_PM
-
-static int at91_twi_runtime_suspend(struct device *dev)
+static int __maybe_unused at91_twi_runtime_suspend(struct device *dev)
{
struct at91_twi_dev *twi_dev = dev_get_drvdata(dev);
@@ -299,7 +297,7 @@ static int at91_twi_runtime_suspend(struct device *dev)
return 0;
}
-static int at91_twi_runtime_resume(struct device *dev)
+static int __maybe_unused at91_twi_runtime_resume(struct device *dev)
{
struct at91_twi_dev *twi_dev = dev_get_drvdata(dev);
@@ -308,7 +306,7 @@ static int at91_twi_runtime_resume(struct device *dev)
return clk_prepare_enable(twi_dev->clk);
}
-static int at91_twi_suspend_noirq(struct device *dev)
+static int __maybe_unused at91_twi_suspend_noirq(struct device *dev)
{
if (!pm_runtime_status_suspended(dev))
at91_twi_runtime_suspend(dev);
@@ -316,7 +314,7 @@ static int at91_twi_suspend_noirq(struct device *dev)
return 0;
}
-static int at91_twi_resume_noirq(struct device *dev)
+static int __maybe_unused at91_twi_resume_noirq(struct device *dev)
{
struct at91_twi_dev *twi_dev = dev_get_drvdata(dev);
int ret;
@@ -335,18 +333,13 @@ static int at91_twi_resume_noirq(struct device *dev)
return 0;
}
-static const struct dev_pm_ops at91_twi_pm = {
+static const struct dev_pm_ops __maybe_unused at91_twi_pm = {
.suspend_noirq = at91_twi_suspend_noirq,
.resume_noirq = at91_twi_resume_noirq,
.runtime_suspend = at91_twi_runtime_suspend,
.runtime_resume = at91_twi_runtime_resume,
};
-#define at91_twi_pm_ops (&at91_twi_pm)
-#else
-#define at91_twi_pm_ops NULL
-#endif
-
static struct platform_driver at91_twi_driver = {
.probe = at91_twi_probe,
.remove = at91_twi_remove,
@@ -354,7 +347,7 @@ static struct platform_driver at91_twi_driver = {
.driver = {
.name = "at91_i2c",
.of_match_table = of_match_ptr(atmel_twi_dt_ids),
- .pm = at91_twi_pm_ops,
+ .pm = pm_ptr(&at91_twi_pm),
},
};
diff --git a/drivers/i2c/busses/i2c-at91-master.c b/drivers/i2c/busses/i2c-at91-master.c
index 1cceb6866689..b0eae94909f4 100644
--- a/drivers/i2c/busses/i2c-at91-master.c
+++ b/drivers/i2c/busses/i2c-at91-master.c
@@ -138,9 +138,9 @@ static void at91_twi_dma_cleanup(struct at91_twi_dev *dev)
if (dma->xfer_in_progress) {
if (dma->direction == DMA_FROM_DEVICE)
- dmaengine_terminate_all(dma->chan_rx);
+ dmaengine_terminate_sync(dma->chan_rx);
else
- dmaengine_terminate_all(dma->chan_tx);
+ dmaengine_terminate_sync(dma->chan_tx);
dma->xfer_in_progress = false;
}
if (dma->buf_mapped) {
diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c
index cceaf69279a9..6304d1dd2dd6 100644
--- a/drivers/i2c/busses/i2c-bcm-iproc.c
+++ b/drivers/i2c/busses/i2c-bcm-iproc.c
@@ -1224,14 +1224,14 @@ static int bcm_iproc_i2c_unreg_slave(struct i2c_client *slave)
disable_irq(iproc_i2c->irq);
+ tasklet_kill(&iproc_i2c->slave_rx_tasklet);
+
/* disable all slave interrupts */
tmp = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET);
tmp &= ~(IE_S_ALL_INTERRUPT_MASK <<
IE_S_ALL_INTERRUPT_SHIFT);
iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, tmp);
- tasklet_kill(&iproc_i2c->slave_rx_tasklet);
-
/* Erase the slave address programmed */
tmp = iproc_i2c_rd_reg(iproc_i2c, S_CFG_SMBUS_ADDR_OFFSET);
tmp &= ~BIT(S_CFG_EN_NIC_SMB_ADDR3_SHIFT);
diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
index 20aa3398e642..805c77143a0f 100644
--- a/drivers/i2c/busses/i2c-cadence.c
+++ b/drivers/i2c/busses/i2c-cadence.c
@@ -178,6 +178,7 @@ enum cdns_i2c_slave_state {
* @clk: Pointer to struct clk
* @clk_rate_change_nb: Notifier block for clock rate changes
* @quirks: flag for broken hold bit usage in r1p10
+ * @ctrl_reg: Cached value of the control register.
* @ctrl_reg_diva_divb: value of fields DIV_A and DIV_B from CR register
* @slave: Registered slave instance.
* @dev_mode: I2C operating role(master/slave).
@@ -202,6 +203,7 @@ struct cdns_i2c {
struct clk *clk;
struct notifier_block clk_rate_change_nb;
u32 quirks;
+ u32 ctrl_reg;
#if IS_ENABLED(CONFIG_I2C_SLAVE)
u16 ctrl_reg_diva_divb;
struct i2c_client *slave;
@@ -1071,10 +1073,11 @@ static int cdns_i2c_setclk(unsigned long clk_in, struct cdns_i2c *id)
if (ret)
return ret;
- ctrl_reg = cdns_i2c_readreg(CDNS_I2C_CR_OFFSET);
+ ctrl_reg = id->ctrl_reg;
ctrl_reg &= ~(CDNS_I2C_CR_DIVA_MASK | CDNS_I2C_CR_DIVB_MASK);
ctrl_reg |= ((div_a << CDNS_I2C_CR_DIVA_SHIFT) |
(div_b << CDNS_I2C_CR_DIVB_SHIFT));
+ id->ctrl_reg = ctrl_reg;
cdns_i2c_writereg(ctrl_reg, CDNS_I2C_CR_OFFSET);
#if IS_ENABLED(CONFIG_I2C_SLAVE)
id->ctrl_reg_diva_divb = ctrl_reg & (CDNS_I2C_CR_DIVA_MASK |
@@ -1163,6 +1166,26 @@ static int __maybe_unused cdns_i2c_runtime_suspend(struct device *dev)
}
/**
+ * cdns_i2c_init - Controller initialisation
+ * @id: Device private data structure
+ *
+ * Initialise the i2c controller.
+ *
+ */
+static void cdns_i2c_init(struct cdns_i2c *id)
+{
+ cdns_i2c_writereg(id->ctrl_reg, CDNS_I2C_CR_OFFSET);
+ /*
+ * Cadence I2C controller has a bug wherein it generates
+ * invalid read transaction after HW timeout in master receiver mode.
+ * HW timeout is not used by this driver and the interrupt is disabled.
+ * But the feature itself cannot be disabled. Hence maximum value
+ * is written to this register to reduce the chances of error.
+ */
+ cdns_i2c_writereg(CDNS_I2C_TIMEOUT_MAX, CDNS_I2C_TIME_OUT_OFFSET);
+}
+
+/**
* cdns_i2c_runtime_resume - Runtime resume
* @dev: Address of the platform_device structure
*
@@ -1180,6 +1203,7 @@ static int __maybe_unused cdns_i2c_runtime_resume(struct device *dev)
dev_err(dev, "Cannot enable clock.\n");
return ret;
}
+ cdns_i2c_init(xi2c);
return 0;
}
@@ -1279,7 +1303,7 @@ static int cdns_i2c_probe(struct platform_device *pdev)
id->dev_mode = CDNS_I2C_MODE_MASTER;
id->slave_state = CDNS_I2C_SLAVE_STATE_IDLE;
#endif
- cdns_i2c_writereg(CDNS_I2C_CR_MASTER_EN_MASK, CDNS_I2C_CR_OFFSET);
+ id->ctrl_reg = CDNS_I2C_CR_ACK_EN | CDNS_I2C_CR_NEA | CDNS_I2C_CR_MS;
ret = cdns_i2c_setclk(id->input_clk, id);
if (ret) {
@@ -1294,15 +1318,7 @@ static int cdns_i2c_probe(struct platform_device *pdev)
dev_err(&pdev->dev, "cannot get irq %d\n", id->irq);
goto err_clk_dis;
}
-
- /*
- * Cadence I2C controller has a bug wherein it generates
- * invalid read transaction after HW timeout in master receiver mode.
- * HW timeout is not used by this driver and the interrupt is disabled.
- * But the feature itself cannot be disabled. Hence maximum value
- * is written to this register to reduce the chances of error.
- */
- cdns_i2c_writereg(CDNS_I2C_TIMEOUT_MAX, CDNS_I2C_TIME_OUT_OFFSET);
+ cdns_i2c_init(id);
ret = i2c_add_adapter(&id->adap);
if (ret < 0)
diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c
index fdc34d9e3702..bf2a4920638a 100644
--- a/drivers/i2c/busses/i2c-designware-common.c
+++ b/drivers/i2c/busses/i2c-designware-common.c
@@ -24,6 +24,7 @@
#include <linux/regmap.h>
#include <linux/swab.h>
#include <linux/types.h>
+#include <linux/units.h>
#include "i2c-designware-core.h"
@@ -350,7 +351,7 @@ u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset)
*
* If your hardware is free from tHD;STA issue, try this one.
*/
- return (ic_clk * tSYMBOL + 500000) / 1000000 - 8 + offset;
+ return DIV_ROUND_CLOSEST(ic_clk * tSYMBOL, MICRO) - 8 + offset;
else
/*
* Conditional expression:
@@ -366,8 +367,7 @@ u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset)
* The reason why we need to take into account "tf" here,
* is the same as described in i2c_dw_scl_lcnt().
*/
- return (ic_clk * (tSYMBOL + tf) + 500000) / 1000000
- - 3 + offset;
+ return DIV_ROUND_CLOSEST(ic_clk * (tSYMBOL + tf), MICRO) - 3 + offset;
}
u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset)
@@ -383,7 +383,7 @@ u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset)
* account the fall time of SCL signal (tf). Default tf value
* should be 0.3 us, for safety.
*/
- return ((ic_clk * (tLOW + tf) + 500000) / 1000000) - 1 + offset;
+ return DIV_ROUND_CLOSEST(ic_clk * (tLOW + tf), MICRO) - 1 + offset;
}
int i2c_dw_set_sda_hold(struct dw_i2c_dev *dev)
diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index 6a53f75abf7c..60a2e750cee9 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -117,7 +117,7 @@
#define DW_IC_ERR_TX_ABRT 0x1
-#define DW_IC_TAR_10BITADDR_MASTER BIT(12)
+#define DW_IC_TAR_10BITADDR_MASTER BIT(12)
#define DW_IC_COMP_PARAM_1_SPEED_MODE_HIGH (BIT(2) | BIT(3))
#define DW_IC_COMP_PARAM_1_SPEED_MODE_MASK GENMASK(3, 2)
@@ -245,7 +245,7 @@ struct dw_i2c_dev {
struct clk *clk;
struct clk *pclk;
struct reset_control *rst;
- struct i2c_client *slave;
+ struct i2c_client *slave;
u32 (*get_clk_rate_khz) (struct dw_i2c_dev *dev);
int cmd_err;
struct i2c_msg *msgs;
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 4b37f28ec0c6..21113665ddea 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -31,12 +31,13 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/suspend.h>
+#include <linux/units.h>
#include "i2c-designware-core.h"
static u32 i2c_dw_get_clk_rate_khz(struct dw_i2c_dev *dev)
{
- return clk_get_rate(dev->clk)/1000;
+ return clk_get_rate(dev->clk) / KILO;
}
#ifdef CONFIG_ACPI
@@ -270,7 +271,7 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
if (!dev->sda_hold_time && t->sda_hold_ns)
dev->sda_hold_time =
- div_u64(clk_khz * t->sda_hold_ns + 500000, 1000000);
+ DIV_S64_ROUND_CLOSEST(clk_khz * t->sda_hold_ns, MICRO);
}
adap = &dev->adapter;
diff --git a/drivers/i2c/busses/i2c-highlander.c b/drivers/i2c/busses/i2c-highlander.c
index 803dad70e2a7..a2add128d084 100644
--- a/drivers/i2c/busses/i2c-highlander.c
+++ b/drivers/i2c/busses/i2c-highlander.c
@@ -379,7 +379,7 @@ static int highlander_i2c_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, dev);
dev->irq = platform_get_irq(pdev, 0);
- if (iic_force_poll)
+ if (dev->irq < 0 || iic_force_poll)
dev->irq = 0;
if (dev->irq) {
diff --git a/drivers/i2c/busses/i2c-hix5hd2.c b/drivers/i2c/busses/i2c-hix5hd2.c
index aa00ba8bcb70..61ae58f57047 100644
--- a/drivers/i2c/busses/i2c-hix5hd2.c
+++ b/drivers/i2c/busses/i2c-hix5hd2.c
@@ -413,7 +413,7 @@ static int hix5hd2_i2c_probe(struct platform_device *pdev)
return PTR_ERR(priv->regs);
irq = platform_get_irq(pdev, 0);
- if (irq <= 0)
+ if (irq < 0)
return irq;
priv->clk = devm_clk_get(&pdev->dev, NULL);
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index aa3f60e69230..89ae78ef1a1c 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -110,6 +110,7 @@
#include <linux/platform_device.h>
#include <linux/platform_data/itco_wdt.h>
#include <linux/pm_runtime.h>
+#include <linux/mutex.h>
#if IS_ENABLED(CONFIG_I2C_MUX_GPIO) && defined CONFIG_DMI
#include <linux/gpio/machine.h>
@@ -503,19 +504,16 @@ static int i801_transaction(struct i801_priv *priv, int xact)
static int i801_block_transaction_by_block(struct i801_priv *priv,
union i2c_smbus_data *data,
- char read_write, int command,
- int hwpec)
+ char read_write, int command)
{
- int i, len;
- int status;
- int xact = hwpec ? SMBHSTCNT_PEC_EN : 0;
+ int i, len, status, xact;
switch (command) {
case I2C_SMBUS_BLOCK_PROC_CALL:
- xact |= I801_BLOCK_PROC_CALL;
+ xact = I801_BLOCK_PROC_CALL;
break;
case I2C_SMBUS_BLOCK_DATA:
- xact |= I801_BLOCK_DATA;
+ xact = I801_BLOCK_DATA;
break;
default:
return -EOPNOTSUPP;
@@ -561,10 +559,6 @@ static void i801_isr_byte_done(struct i801_priv *priv)
priv->len);
/* FIXME: Recover */
priv->len = I2C_SMBUS_BLOCK_MAX;
- } else {
- dev_dbg(&priv->pci_dev->dev,
- "SMBus block read size is %d\n",
- priv->len);
}
priv->data[-1] = priv->len;
}
@@ -665,8 +659,7 @@ static irqreturn_t i801_isr(int irq, void *dev_id)
*/
static int i801_block_transaction_byte_by_byte(struct i801_priv *priv,
union i2c_smbus_data *data,
- char read_write, int command,
- int hwpec)
+ char read_write, int command)
{
int i, len;
int smbcmd;
@@ -764,9 +757,8 @@ static int i801_set_block_buffer_mode(struct i801_priv *priv)
}
/* Block transaction function */
-static int i801_block_transaction(struct i801_priv *priv,
- union i2c_smbus_data *data, char read_write,
- int command, int hwpec)
+static int i801_block_transaction(struct i801_priv *priv, union i2c_smbus_data *data,
+ char read_write, int command)
{
int result = 0;
unsigned char hostc;
@@ -802,11 +794,11 @@ static int i801_block_transaction(struct i801_priv *priv,
&& i801_set_block_buffer_mode(priv) == 0)
result = i801_block_transaction_by_block(priv, data,
read_write,
- command, hwpec);
+ command);
else
result = i801_block_transaction_byte_by_byte(priv, data,
read_write,
- command, hwpec);
+ command);
if (command == I2C_SMBUS_I2C_BLOCK_DATA
&& read_write == I2C_SMBUS_WRITE) {
@@ -917,8 +909,7 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
SMBAUXCTL(priv));
if (block)
- ret = i801_block_transaction(priv, data, read_write, size,
- hwpec);
+ ret = i801_block_transaction(priv, data, read_write, size);
else
ret = i801_transaction(priv, xact);
@@ -1498,12 +1489,11 @@ static const struct itco_wdt_platform_data spt_tco_platform_data = {
.version = 4,
};
-static DEFINE_SPINLOCK(p2sb_spinlock);
-
static struct platform_device *
i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
struct resource *tco_res)
{
+ static DEFINE_MUTEX(p2sb_mutex);
struct resource *res;
unsigned int devfn;
u64 base64_addr;
@@ -1516,7 +1506,7 @@ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
* enumerated by the PCI subsystem, so we need to unhide/hide it
* to lookup the P2SB BAR.
*/
- spin_lock(&p2sb_spinlock);
+ mutex_lock(&p2sb_mutex);
devfn = PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 1);
@@ -1534,7 +1524,7 @@ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
/* Hide the P2SB device, if it was hidden before */
if (hidden)
pci_bus_write_config_byte(pci_dev->bus, devfn, 0xe1, hidden);
- spin_unlock(&p2sb_spinlock);
+ mutex_unlock(&p2sb_mutex);
res = &tco_res[1];
if (pci_dev->device == PCI_DEVICE_ID_INTEL_DNV_SMBUS)
@@ -1634,7 +1624,7 @@ i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits,
* BIOS is accessing the host controller so prevent it from
* suspending automatically from now on.
*/
- pm_runtime_get_sync(&pdev->dev);
+ pm_runtime_set_autosuspend_delay(&pdev->dev, -1);
}
if ((function & ACPI_IO_MASK) == ACPI_READ)
@@ -1674,11 +1664,6 @@ static void i801_acpi_remove(struct i801_priv *priv)
acpi_remove_address_space_handler(adev->handle,
ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler);
-
- mutex_lock(&priv->acpi_lock);
- if (priv->acpi_reserved)
- pm_runtime_put(&priv->pci_dev->dev);
- mutex_unlock(&priv->acpi_lock);
}
#else
static inline int i801_acpi_probe(struct i801_priv *priv) { return 0; }
@@ -1690,6 +1675,7 @@ static void i801_setup_hstcfg(struct i801_priv *priv)
unsigned char hstcfg = priv->original_hstcfg;
hstcfg &= ~SMBHSTCFG_I2C_EN; /* SMBus timing */
+ hstcfg &= ~SMBHSTCNT_PEC_EN; /* Disable software PEC */
hstcfg |= SMBHSTCFG_HST_EN;
pci_write_config_byte(priv->pci_dev, SMBHSTCFG, hstcfg);
}
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index d5b5f084a27d..3576b63a6c03 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -423,7 +423,7 @@ static int i2c_imx_dma_xfer(struct imx_i2c_struct *i2c_imx,
return 0;
err_submit:
- dmaengine_terminate_all(dma->chan_using);
+ dmaengine_terminate_sync(dma->chan_using);
err_desc:
dma_unmap_single(chan_dev, dma->dma_buf,
dma->dma_len, dma->dma_data_dir);
@@ -894,7 +894,7 @@ static int i2c_imx_dma_write(struct imx_i2c_struct *i2c_imx,
&i2c_imx->dma->cmd_complete,
msecs_to_jiffies(DMA_TIMEOUT));
if (time_left == 0) {
- dmaengine_terminate_all(dma->chan_using);
+ dmaengine_terminate_sync(dma->chan_using);
return -ETIMEDOUT;
}
@@ -949,7 +949,7 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx,
&i2c_imx->dma->cmd_complete,
msecs_to_jiffies(DMA_TIMEOUT));
if (time_left == 0) {
- dmaengine_terminate_all(dma->chan_using);
+ dmaengine_terminate_sync(dma->chan_using);
return -ETIMEDOUT;
}
diff --git a/drivers/i2c/busses/i2c-iop3xx.c b/drivers/i2c/busses/i2c-iop3xx.c
index cfecaf18ccbb..4a6ff54d87fe 100644
--- a/drivers/i2c/busses/i2c-iop3xx.c
+++ b/drivers/i2c/busses/i2c-iop3xx.c
@@ -469,16 +469,14 @@ iop3xx_i2c_probe(struct platform_device *pdev)
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
- ret = -ENXIO;
+ ret = irq;
goto unmap;
}
ret = request_irq(irq, iop3xx_i2c_irq_handler, 0,
pdev->name, adapter_data);
- if (ret) {
- ret = -EIO;
+ if (ret)
goto unmap;
- }
memcpy(new_adapter->name, pdev->name, strlen(pdev->name));
new_adapter->owner = THIS_MODULE;
diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c
index 4ca716e09149..477480d1de6b 100644
--- a/drivers/i2c/busses/i2c-mt65xx.c
+++ b/drivers/i2c/busses/i2c-mt65xx.c
@@ -1211,7 +1211,7 @@ static int mtk_i2c_probe(struct platform_device *pdev)
return PTR_ERR(i2c->pdmabase);
irq = platform_get_irq(pdev, 0);
- if (irq <= 0)
+ if (irq < 0)
return irq;
init_completion(&i2c->msg_complete);
diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c
index f97243f02231..864a3f1bd4e1 100644
--- a/drivers/i2c/busses/i2c-mxs.c
+++ b/drivers/i2c/busses/i2c-mxs.c
@@ -290,14 +290,14 @@ read_init_dma_fail:
select_init_dma_fail:
dma_unmap_sg(i2c->dev, &i2c->sg_io[0], 1, DMA_TO_DEVICE);
select_init_pio_fail:
- dmaengine_terminate_all(i2c->dmach);
+ dmaengine_terminate_sync(i2c->dmach);
return -EINVAL;
/* Write failpath. */
write_init_dma_fail:
dma_unmap_sg(i2c->dev, i2c->sg_io, 2, DMA_TO_DEVICE);
write_init_pio_fail:
- dmaengine_terminate_all(i2c->dmach);
+ dmaengine_terminate_sync(i2c->dmach);
return -EINVAL;
}
diff --git a/drivers/i2c/busses/i2c-parport.c b/drivers/i2c/busses/i2c-parport.c
index a535889acca6..231145c48728 100644
--- a/drivers/i2c/busses/i2c-parport.c
+++ b/drivers/i2c/busses/i2c-parport.c
@@ -267,6 +267,16 @@ static void i2c_parport_attach(struct parport *port)
int i;
struct pardev_cb i2c_parport_cb;
+ if (type < 0) {
+ pr_warn("adapter type unspecified\n");
+ return;
+ }
+
+ if (type >= ARRAY_SIZE(adapter_parm)) {
+ pr_warn("invalid type (%d)\n", type);
+ return;
+ }
+
for (i = 0; i < MAX_DEVICE; i++) {
if (parport[i] == -1)
continue;
@@ -392,32 +402,8 @@ static struct parport_driver i2c_parport_driver = {
.detach = i2c_parport_detach,
.devmodel = true,
};
-
-/* ----- Module loading, unloading and information ------------------------ */
-
-static int __init i2c_parport_init(void)
-{
- if (type < 0) {
- pr_warn("adapter type unspecified\n");
- return -ENODEV;
- }
-
- if (type >= ARRAY_SIZE(adapter_parm)) {
- pr_warn("invalid type (%d)\n", type);
- return -ENODEV;
- }
-
- return parport_register_driver(&i2c_parport_driver);
-}
-
-static void __exit i2c_parport_exit(void)
-{
- parport_unregister_driver(&i2c_parport_driver);
-}
+module_parport_driver(i2c_parport_driver);
MODULE_AUTHOR("Jean Delvare <jdelvare@suse.de>");
MODULE_DESCRIPTION("I2C bus over parallel port");
MODULE_LICENSE("GPL");
-
-module_init(i2c_parport_init);
-module_exit(i2c_parport_exit);
diff --git a/drivers/i2c/busses/i2c-pmcmsp.c b/drivers/i2c/busses/i2c-pmcmsp.c
deleted file mode 100644
index 5d89c7c1b3a8..000000000000
--- a/drivers/i2c/busses/i2c-pmcmsp.c
+++ /dev/null
@@ -1,600 +0,0 @@
-/*
- * Specific bus support for PMC-TWI compliant implementation on MSP71xx.
- *
- * Copyright 2005-2007 PMC-Sierra, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
- * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/i2c.h>
-#include <linux/interrupt.h>
-#include <linux/completion.h>
-#include <linux/mutex.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-
-#define DRV_NAME "pmcmsptwi"
-
-#define MSP_TWI_SF_CLK_REG_OFFSET 0x00
-#define MSP_TWI_HS_CLK_REG_OFFSET 0x04
-#define MSP_TWI_CFG_REG_OFFSET 0x08
-#define MSP_TWI_CMD_REG_OFFSET 0x0c
-#define MSP_TWI_ADD_REG_OFFSET 0x10
-#define MSP_TWI_DAT_0_REG_OFFSET 0x14
-#define MSP_TWI_DAT_1_REG_OFFSET 0x18
-#define MSP_TWI_INT_STS_REG_OFFSET 0x1c
-#define MSP_TWI_INT_MSK_REG_OFFSET 0x20
-#define MSP_TWI_BUSY_REG_OFFSET 0x24
-
-#define MSP_TWI_INT_STS_DONE (1 << 0)
-#define MSP_TWI_INT_STS_LOST_ARBITRATION (1 << 1)
-#define MSP_TWI_INT_STS_NO_RESPONSE (1 << 2)
-#define MSP_TWI_INT_STS_DATA_COLLISION (1 << 3)
-#define MSP_TWI_INT_STS_BUSY (1 << 4)
-#define MSP_TWI_INT_STS_ALL 0x1f
-
-#define MSP_MAX_BYTES_PER_RW 8
-#define MSP_MAX_POLL 5
-#define MSP_POLL_DELAY 10
-#define MSP_IRQ_TIMEOUT (MSP_MAX_POLL * MSP_POLL_DELAY)
-
-/* IO Operation macros */
-#define pmcmsptwi_readl __raw_readl
-#define pmcmsptwi_writel __raw_writel
-
-/* TWI command type */
-enum pmcmsptwi_cmd_type {
- MSP_TWI_CMD_WRITE = 0, /* Write only */
- MSP_TWI_CMD_READ = 1, /* Read only */
- MSP_TWI_CMD_WRITE_READ = 2, /* Write then Read */
-};
-
-/* The possible results of the xferCmd */
-enum pmcmsptwi_xfer_result {
- MSP_TWI_XFER_OK = 0,
- MSP_TWI_XFER_TIMEOUT,
- MSP_TWI_XFER_BUSY,
- MSP_TWI_XFER_DATA_COLLISION,
- MSP_TWI_XFER_NO_RESPONSE,
- MSP_TWI_XFER_LOST_ARBITRATION,
-};
-
-/* Corresponds to a PMCTWI clock configuration register */
-struct pmcmsptwi_clock {
- u8 filter; /* Bits 15:12, default = 0x03 */
- u16 clock; /* Bits 9:0, default = 0x001f */
-};
-
-struct pmcmsptwi_clockcfg {
- struct pmcmsptwi_clock standard; /* The standard/fast clock config */
- struct pmcmsptwi_clock highspeed; /* The highspeed clock config */
-};
-
-/* Corresponds to the main TWI configuration register */
-struct pmcmsptwi_cfg {
- u8 arbf; /* Bits 15:12, default=0x03 */
- u8 nak; /* Bits 11:8, default=0x03 */
- u8 add10; /* Bit 7, default=0x00 */
- u8 mst_code; /* Bits 6:4, default=0x00 */
- u8 arb; /* Bit 1, default=0x01 */
- u8 highspeed; /* Bit 0, default=0x00 */
-};
-
-/* A single pmctwi command to issue */
-struct pmcmsptwi_cmd {
- u16 addr; /* The slave address (7 or 10 bits) */
- enum pmcmsptwi_cmd_type type; /* The command type */
- u8 write_len; /* Number of bytes in the write buffer */
- u8 read_len; /* Number of bytes in the read buffer */
- u8 *write_data; /* Buffer of characters to send */
- u8 *read_data; /* Buffer to fill with incoming data */
-};
-
-/* The private data */
-struct pmcmsptwi_data {
- void __iomem *iobase; /* iomapped base for IO */
- int irq; /* IRQ to use (0 disables) */
- struct completion wait; /* Completion for xfer */
- struct mutex lock; /* Used for threadsafeness */
- enum pmcmsptwi_xfer_result last_result; /* result of last xfer */
-};
-
-/* The default settings */
-static const struct pmcmsptwi_clockcfg pmcmsptwi_defclockcfg = {
- .standard = {
- .filter = 0x3,
- .clock = 0x1f,
- },
- .highspeed = {
- .filter = 0x3,
- .clock = 0x1f,
- },
-};
-
-static const struct pmcmsptwi_cfg pmcmsptwi_defcfg = {
- .arbf = 0x03,
- .nak = 0x03,
- .add10 = 0x00,
- .mst_code = 0x00,
- .arb = 0x01,
- .highspeed = 0x00,
-};
-
-static struct pmcmsptwi_data pmcmsptwi_data;
-
-static struct i2c_adapter pmcmsptwi_adapter;
-
-/* inline helper functions */
-static inline u32 pmcmsptwi_clock_to_reg(
- const struct pmcmsptwi_clock *clock)
-{
- return ((clock->filter & 0xf) << 12) | (clock->clock & 0x03ff);
-}
-
-static inline u32 pmcmsptwi_cfg_to_reg(const struct pmcmsptwi_cfg *cfg)
-{
- return ((cfg->arbf & 0xf) << 12) |
- ((cfg->nak & 0xf) << 8) |
- ((cfg->add10 & 0x1) << 7) |
- ((cfg->mst_code & 0x7) << 4) |
- ((cfg->arb & 0x1) << 1) |
- (cfg->highspeed & 0x1);
-}
-
-static inline void pmcmsptwi_reg_to_cfg(u32 reg, struct pmcmsptwi_cfg *cfg)
-{
- cfg->arbf = (reg >> 12) & 0xf;
- cfg->nak = (reg >> 8) & 0xf;
- cfg->add10 = (reg >> 7) & 0x1;
- cfg->mst_code = (reg >> 4) & 0x7;
- cfg->arb = (reg >> 1) & 0x1;
- cfg->highspeed = reg & 0x1;
-}
-
-/*
- * Sets the current clock configuration
- */
-static void pmcmsptwi_set_clock_config(const struct pmcmsptwi_clockcfg *cfg,
- struct pmcmsptwi_data *data)
-{
- mutex_lock(&data->lock);
- pmcmsptwi_writel(pmcmsptwi_clock_to_reg(&cfg->standard),
- data->iobase + MSP_TWI_SF_CLK_REG_OFFSET);
- pmcmsptwi_writel(pmcmsptwi_clock_to_reg(&cfg->highspeed),
- data->iobase + MSP_TWI_HS_CLK_REG_OFFSET);
- mutex_unlock(&data->lock);
-}
-
-/*
- * Gets the current TWI bus configuration
- */
-static void pmcmsptwi_get_twi_config(struct pmcmsptwi_cfg *cfg,
- struct pmcmsptwi_data *data)
-{
- mutex_lock(&data->lock);
- pmcmsptwi_reg_to_cfg(pmcmsptwi_readl(
- data->iobase + MSP_TWI_CFG_REG_OFFSET), cfg);
- mutex_unlock(&data->lock);
-}
-
-/*
- * Sets the current TWI bus configuration
- */
-static void pmcmsptwi_set_twi_config(const struct pmcmsptwi_cfg *cfg,
- struct pmcmsptwi_data *data)
-{
- mutex_lock(&data->lock);
- pmcmsptwi_writel(pmcmsptwi_cfg_to_reg(cfg),
- data->iobase + MSP_TWI_CFG_REG_OFFSET);
- mutex_unlock(&data->lock);
-}
-
-/*
- * Parses the 'int_sts' register and returns a well-defined error code
- */
-static enum pmcmsptwi_xfer_result pmcmsptwi_get_result(u32 reg)
-{
- if (reg & MSP_TWI_INT_STS_LOST_ARBITRATION) {
- dev_dbg(&pmcmsptwi_adapter.dev,
- "Result: Lost arbitration\n");
- return MSP_TWI_XFER_LOST_ARBITRATION;
- } else if (reg & MSP_TWI_INT_STS_NO_RESPONSE) {
- dev_dbg(&pmcmsptwi_adapter.dev,
- "Result: No response\n");
- return MSP_TWI_XFER_NO_RESPONSE;
- } else if (reg & MSP_TWI_INT_STS_DATA_COLLISION) {
- dev_dbg(&pmcmsptwi_adapter.dev,
- "Result: Data collision\n");
- return MSP_TWI_XFER_DATA_COLLISION;
- } else if (reg & MSP_TWI_INT_STS_BUSY) {
- dev_dbg(&pmcmsptwi_adapter.dev,
- "Result: Bus busy\n");
- return MSP_TWI_XFER_BUSY;
- }
-
- dev_dbg(&pmcmsptwi_adapter.dev, "Result: Operation succeeded\n");
- return MSP_TWI_XFER_OK;
-}
-
-/*
- * In interrupt mode, handle the interrupt.
- * NOTE: Assumes data->lock is held.
- */
-static irqreturn_t pmcmsptwi_interrupt(int irq, void *ptr)
-{
- struct pmcmsptwi_data *data = ptr;
-
- u32 reason = pmcmsptwi_readl(data->iobase +
- MSP_TWI_INT_STS_REG_OFFSET);
- pmcmsptwi_writel(reason, data->iobase + MSP_TWI_INT_STS_REG_OFFSET);
-
- dev_dbg(&pmcmsptwi_adapter.dev, "Got interrupt 0x%08x\n", reason);
- if (!(reason & MSP_TWI_INT_STS_DONE))
- return IRQ_NONE;
-
- data->last_result = pmcmsptwi_get_result(reason);
- complete(&data->wait);
-
- return IRQ_HANDLED;
-}
-
-/*
- * Probe for and register the device and return 0 if there is one.
- */
-static int pmcmsptwi_probe(struct platform_device *pldev)
-{
- struct resource *res;
- int rc = -ENODEV;
-
- /* get the static platform resources */
- res = platform_get_resource(pldev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(&pldev->dev, "IOMEM resource not found\n");
- goto ret_err;
- }
-
- /* reserve the memory region */
- if (!request_mem_region(res->start, resource_size(res),
- pldev->name)) {
- dev_err(&pldev->dev,
- "Unable to get memory/io address region %pap\n",
- &res->start);
- rc = -EBUSY;
- goto ret_err;
- }
-
- /* remap the memory */
- pmcmsptwi_data.iobase = ioremap(res->start,
- resource_size(res));
- if (!pmcmsptwi_data.iobase) {
- dev_err(&pldev->dev,
- "Unable to ioremap address %pap\n", &res->start);
- rc = -EIO;
- goto ret_unreserve;
- }
-
- /* request the irq */
- pmcmsptwi_data.irq = platform_get_irq(pldev, 0);
- if (pmcmsptwi_data.irq) {
- rc = request_irq(pmcmsptwi_data.irq, &pmcmsptwi_interrupt,
- IRQF_SHARED, pldev->name, &pmcmsptwi_data);
- if (rc == 0) {
- /*
- * Enable 'DONE' interrupt only.
- *
- * If you enable all interrupts, you will get one on
- * error and another when the operation completes.
- * This way you only have to handle one interrupt,
- * but you can still check all result flags.
- */
- pmcmsptwi_writel(MSP_TWI_INT_STS_DONE,
- pmcmsptwi_data.iobase +
- MSP_TWI_INT_MSK_REG_OFFSET);
- } else {
- dev_warn(&pldev->dev,
- "Could not assign TWI IRQ handler "
- "to irq %d (continuing with poll)\n",
- pmcmsptwi_data.irq);
- pmcmsptwi_data.irq = 0;
- }
- }
-
- init_completion(&pmcmsptwi_data.wait);
- mutex_init(&pmcmsptwi_data.lock);
-
- pmcmsptwi_set_clock_config(&pmcmsptwi_defclockcfg, &pmcmsptwi_data);
- pmcmsptwi_set_twi_config(&pmcmsptwi_defcfg, &pmcmsptwi_data);
-
- printk(KERN_INFO DRV_NAME ": Registering MSP71xx I2C adapter\n");
-
- pmcmsptwi_adapter.dev.parent = &pldev->dev;
- platform_set_drvdata(pldev, &pmcmsptwi_adapter);
- i2c_set_adapdata(&pmcmsptwi_adapter, &pmcmsptwi_data);
-
- rc = i2c_add_adapter(&pmcmsptwi_adapter);
- if (rc)
- goto ret_unmap;
-
- return 0;
-
-ret_unmap:
- if (pmcmsptwi_data.irq) {
- pmcmsptwi_writel(0,
- pmcmsptwi_data.iobase + MSP_TWI_INT_MSK_REG_OFFSET);
- free_irq(pmcmsptwi_data.irq, &pmcmsptwi_data);
- }
-
- iounmap(pmcmsptwi_data.iobase);
-
-ret_unreserve:
- release_mem_region(res->start, resource_size(res));
-
-ret_err:
- return rc;
-}
-
-/*
- * Release the device and return 0 if there is one.
- */
-static int pmcmsptwi_remove(struct platform_device *pldev)
-{
- struct resource *res;
-
- i2c_del_adapter(&pmcmsptwi_adapter);
-
- if (pmcmsptwi_data.irq) {
- pmcmsptwi_writel(0,
- pmcmsptwi_data.iobase + MSP_TWI_INT_MSK_REG_OFFSET);
- free_irq(pmcmsptwi_data.irq, &pmcmsptwi_data);
- }
-
- iounmap(pmcmsptwi_data.iobase);
-
- res = platform_get_resource(pldev, IORESOURCE_MEM, 0);
- release_mem_region(res->start, resource_size(res));
-
- return 0;
-}
-
-/*
- * Polls the 'busy' register until the command is complete.
- * NOTE: Assumes data->lock is held.
- */
-static void pmcmsptwi_poll_complete(struct pmcmsptwi_data *data)
-{
- int i;
-
- for (i = 0; i < MSP_MAX_POLL; i++) {
- u32 val = pmcmsptwi_readl(data->iobase +
- MSP_TWI_BUSY_REG_OFFSET);
- if (val == 0) {
- u32 reason = pmcmsptwi_readl(data->iobase +
- MSP_TWI_INT_STS_REG_OFFSET);
- pmcmsptwi_writel(reason, data->iobase +
- MSP_TWI_INT_STS_REG_OFFSET);
- data->last_result = pmcmsptwi_get_result(reason);
- return;
- }
- udelay(MSP_POLL_DELAY);
- }
-
- dev_dbg(&pmcmsptwi_adapter.dev, "Result: Poll timeout\n");
- data->last_result = MSP_TWI_XFER_TIMEOUT;
-}
-
-/*
- * Do the transfer (low level):
- * May use interrupt-driven or polling, depending on if an IRQ is
- * presently registered.
- * NOTE: Assumes data->lock is held.
- */
-static enum pmcmsptwi_xfer_result pmcmsptwi_do_xfer(
- u32 reg, struct pmcmsptwi_data *data)
-{
- dev_dbg(&pmcmsptwi_adapter.dev, "Writing cmd reg 0x%08x\n", reg);
- pmcmsptwi_writel(reg, data->iobase + MSP_TWI_CMD_REG_OFFSET);
- if (data->irq) {
- unsigned long timeleft = wait_for_completion_timeout(
- &data->wait, MSP_IRQ_TIMEOUT);
- if (timeleft == 0) {
- dev_dbg(&pmcmsptwi_adapter.dev,
- "Result: IRQ timeout\n");
- complete(&data->wait);
- data->last_result = MSP_TWI_XFER_TIMEOUT;
- }
- } else
- pmcmsptwi_poll_complete(data);
-
- return data->last_result;
-}
-
-/*
- * Helper routine, converts 'pmctwi_cmd' struct to register format
- */
-static inline u32 pmcmsptwi_cmd_to_reg(const struct pmcmsptwi_cmd *cmd)
-{
- return ((cmd->type & 0x3) << 8) |
- (((cmd->write_len - 1) & 0x7) << 4) |
- ((cmd->read_len - 1) & 0x7);
-}
-
-/*
- * Do the transfer (high level)
- */
-static enum pmcmsptwi_xfer_result pmcmsptwi_xfer_cmd(
- struct pmcmsptwi_cmd *cmd,
- struct pmcmsptwi_data *data)
-{
- enum pmcmsptwi_xfer_result retval;
-
- mutex_lock(&data->lock);
- dev_dbg(&pmcmsptwi_adapter.dev,
- "Setting address to 0x%04x\n", cmd->addr);
- pmcmsptwi_writel(cmd->addr, data->iobase + MSP_TWI_ADD_REG_OFFSET);
-
- if (cmd->type == MSP_TWI_CMD_WRITE ||
- cmd->type == MSP_TWI_CMD_WRITE_READ) {
- u64 tmp = be64_to_cpup((__be64 *)cmd->write_data);
- tmp >>= (MSP_MAX_BYTES_PER_RW - cmd->write_len) * 8;
- dev_dbg(&pmcmsptwi_adapter.dev, "Writing 0x%016llx\n", tmp);
- pmcmsptwi_writel(tmp & 0x00000000ffffffffLL,
- data->iobase + MSP_TWI_DAT_0_REG_OFFSET);
- if (cmd->write_len > 4)
- pmcmsptwi_writel(tmp >> 32,
- data->iobase + MSP_TWI_DAT_1_REG_OFFSET);
- }
-
- retval = pmcmsptwi_do_xfer(pmcmsptwi_cmd_to_reg(cmd), data);
- if (retval != MSP_TWI_XFER_OK)
- goto xfer_err;
-
- if (cmd->type == MSP_TWI_CMD_READ ||
- cmd->type == MSP_TWI_CMD_WRITE_READ) {
- int i;
- u64 rmsk = ~(0xffffffffffffffffLL << (cmd->read_len * 8));
- u64 tmp = (u64)pmcmsptwi_readl(data->iobase +
- MSP_TWI_DAT_0_REG_OFFSET);
- if (cmd->read_len > 4)
- tmp |= (u64)pmcmsptwi_readl(data->iobase +
- MSP_TWI_DAT_1_REG_OFFSET) << 32;
- tmp &= rmsk;
- dev_dbg(&pmcmsptwi_adapter.dev, "Read 0x%016llx\n", tmp);
-
- for (i = 0; i < cmd->read_len; i++)
- cmd->read_data[i] = tmp >> i;
- }
-
-xfer_err:
- mutex_unlock(&data->lock);
-
- return retval;
-}
-
-/* -- Algorithm functions -- */
-
-/*
- * Sends an i2c command out on the adapter
- */
-static int pmcmsptwi_master_xfer(struct i2c_adapter *adap,
- struct i2c_msg *msg, int num)
-{
- struct pmcmsptwi_data *data = i2c_get_adapdata(adap);
- struct pmcmsptwi_cmd cmd;
- struct pmcmsptwi_cfg oldcfg, newcfg;
- int ret;
-
- if (num == 2) {
- struct i2c_msg *nextmsg = msg + 1;
-
- cmd.type = MSP_TWI_CMD_WRITE_READ;
- cmd.write_len = msg->len;
- cmd.write_data = msg->buf;
- cmd.read_len = nextmsg->len;
- cmd.read_data = nextmsg->buf;
- } else if (msg->flags & I2C_M_RD) {
- cmd.type = MSP_TWI_CMD_READ;
- cmd.read_len = msg->len;
- cmd.read_data = msg->buf;
- cmd.write_len = 0;
- cmd.write_data = NULL;
- } else {
- cmd.type = MSP_TWI_CMD_WRITE;
- cmd.read_len = 0;
- cmd.read_data = NULL;
- cmd.write_len = msg->len;
- cmd.write_data = msg->buf;
- }
-
- cmd.addr = msg->addr;
-
- if (msg->flags & I2C_M_TEN) {
- pmcmsptwi_get_twi_config(&newcfg, data);
- memcpy(&oldcfg, &newcfg, sizeof(oldcfg));
-
- /* Set the special 10-bit address flag */
- newcfg.add10 = 1;
-
- pmcmsptwi_set_twi_config(&newcfg, data);
- }
-
- /* Execute the command */
- ret = pmcmsptwi_xfer_cmd(&cmd, data);
-
- if (msg->flags & I2C_M_TEN)
- pmcmsptwi_set_twi_config(&oldcfg, data);
-
- dev_dbg(&adap->dev, "I2C %s of %d bytes %s\n",
- (msg->flags & I2C_M_RD) ? "read" : "write", msg->len,
- (ret == MSP_TWI_XFER_OK) ? "succeeded" : "failed");
-
- if (ret != MSP_TWI_XFER_OK) {
- /*
- * TODO: We could potentially loop and retry in the case
- * of MSP_TWI_XFER_TIMEOUT.
- */
- return -EIO;
- }
-
- return num;
-}
-
-static u32 pmcmsptwi_i2c_func(struct i2c_adapter *adapter)
-{
- return I2C_FUNC_I2C | I2C_FUNC_10BIT_ADDR |
- I2C_FUNC_SMBUS_BYTE | I2C_FUNC_SMBUS_BYTE_DATA |
- I2C_FUNC_SMBUS_WORD_DATA | I2C_FUNC_SMBUS_PROC_CALL;
-}
-
-static const struct i2c_adapter_quirks pmcmsptwi_i2c_quirks = {
- .flags = I2C_AQ_COMB_WRITE_THEN_READ | I2C_AQ_NO_ZERO_LEN,
- .max_write_len = MSP_MAX_BYTES_PER_RW,
- .max_read_len = MSP_MAX_BYTES_PER_RW,
- .max_comb_1st_msg_len = MSP_MAX_BYTES_PER_RW,
- .max_comb_2nd_msg_len = MSP_MAX_BYTES_PER_RW,
-};
-
-/* -- Initialization -- */
-
-static const struct i2c_algorithm pmcmsptwi_algo = {
- .master_xfer = pmcmsptwi_master_xfer,
- .functionality = pmcmsptwi_i2c_func,
-};
-
-static struct i2c_adapter pmcmsptwi_adapter = {
- .owner = THIS_MODULE,
- .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
- .algo = &pmcmsptwi_algo,
- .quirks = &pmcmsptwi_i2c_quirks,
- .name = DRV_NAME,
-};
-
-static struct platform_driver pmcmsptwi_driver = {
- .probe = pmcmsptwi_probe,
- .remove = pmcmsptwi_remove,
- .driver = {
- .name = DRV_NAME,
- },
-};
-
-module_platform_driver(pmcmsptwi_driver);
-
-MODULE_DESCRIPTION("PMC MSP TWI/SMBus/I2C driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
index 61dc20fd1191..fcd35e8de83c 100644
--- a/drivers/i2c/busses/i2c-qup.c
+++ b/drivers/i2c/busses/i2c-qup.c
@@ -778,7 +778,7 @@ static int qup_i2c_bam_schedule_desc(struct qup_i2c_dev *qup)
ret = -EINVAL;
/* abort TX descriptors */
- dmaengine_terminate_all(qup->btx.dma);
+ dmaengine_terminate_sync(qup->btx.dma);
goto desc_err;
}
diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index 4d82761e1585..b49a1b170bb2 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -1137,7 +1137,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev)
*/
if (!(i2c->quirks & QUIRK_POLL)) {
i2c->irq = ret = platform_get_irq(pdev, 0);
- if (ret <= 0) {
+ if (ret < 0) {
dev_err(&pdev->dev, "cannot find IRQ\n");
clk_unprepare(i2c->clk);
return ret;
diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index 2d2e630fd438..db8fa4186814 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c
@@ -458,9 +458,9 @@ static void sh_mobile_i2c_cleanup_dma(struct sh_mobile_i2c_data *pd)
if (pd->dma_direction == DMA_NONE)
return;
else if (pd->dma_direction == DMA_FROM_DEVICE)
- dmaengine_terminate_all(pd->dma_rx);
+ dmaengine_terminate_sync(pd->dma_rx);
else if (pd->dma_direction == DMA_TO_DEVICE)
- dmaengine_terminate_all(pd->dma_tx);
+ dmaengine_terminate_sync(pd->dma_tx);
sh_mobile_i2c_dma_unmap(pd);
}
diff --git a/drivers/i2c/busses/i2c-sun6i-p2wi.c b/drivers/i2c/busses/i2c-sun6i-p2wi.c
index 2f6f6468214d..9e3483f507ff 100644
--- a/drivers/i2c/busses/i2c-sun6i-p2wi.c
+++ b/drivers/i2c/busses/i2c-sun6i-p2wi.c
@@ -234,7 +234,7 @@ static int p2wi_probe(struct platform_device *pdev)
if (IS_ERR(p2wi->regs))
return PTR_ERR(p2wi->regs);
- strlcpy(p2wi->adapter.name, pdev->name, sizeof(p2wi->adapter.name));
+ strscpy(p2wi->adapter.name, pdev->name, sizeof(p2wi->adapter.name));
irq = platform_get_irq(pdev, 0);
if (irq < 0)
return irq;
diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c
index 31be1811d5e6..e4026c5416b1 100644
--- a/drivers/i2c/busses/i2c-synquacer.c
+++ b/drivers/i2c/busses/i2c-synquacer.c
@@ -578,7 +578,7 @@ static int synquacer_i2c_probe(struct platform_device *pdev)
i2c->irq = platform_get_irq(pdev, 0);
if (i2c->irq < 0)
- return -ENODEV;
+ return i2c->irq;
ret = devm_request_irq(&pdev->dev, i2c->irq, synquacer_i2c_isr,
0, dev_name(&pdev->dev), i2c);
diff --git a/drivers/i2c/busses/i2c-virtio.c b/drivers/i2c/busses/i2c-virtio.c
new file mode 100644
index 000000000000..f10a603b13fb
--- /dev/null
+++ b/drivers/i2c/busses/i2c-virtio.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Virtio I2C Bus Driver
+ *
+ * The Virtio I2C Specification:
+ * https://raw.githubusercontent.com/oasis-tcs/virtio-spec/master/virtio-i2c.tex
+ *
+ * Copyright (c) 2021 Intel Corporation. All rights reserved.
+ */
+
+#include <linux/acpi.h>
+#include <linux/completion.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/virtio.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_i2c.h>
+
+/**
+ * struct virtio_i2c - virtio I2C data
+ * @vdev: virtio device for this controller
+ * @completion: completion of virtio I2C message
+ * @adap: I2C adapter for this controller
+ * @vq: the virtio virtqueue for communication
+ */
+struct virtio_i2c {
+ struct virtio_device *vdev;
+ struct completion completion;
+ struct i2c_adapter adap;
+ struct virtqueue *vq;
+};
+
+/**
+ * struct virtio_i2c_req - the virtio I2C request structure
+ * @out_hdr: the OUT header of the virtio I2C message
+ * @buf: the buffer into which data is read, or from which it's written
+ * @in_hdr: the IN header of the virtio I2C message
+ */
+struct virtio_i2c_req {
+ struct virtio_i2c_out_hdr out_hdr ____cacheline_aligned;
+ uint8_t *buf ____cacheline_aligned;
+ struct virtio_i2c_in_hdr in_hdr ____cacheline_aligned;
+};
+
+static void virtio_i2c_msg_done(struct virtqueue *vq)
+{
+ struct virtio_i2c *vi = vq->vdev->priv;
+
+ complete(&vi->completion);
+}
+
+static int virtio_i2c_prepare_reqs(struct virtqueue *vq,
+ struct virtio_i2c_req *reqs,
+ struct i2c_msg *msgs, int num)
+{
+ struct scatterlist *sgs[3], out_hdr, msg_buf, in_hdr;
+ int i;
+
+ for (i = 0; i < num; i++) {
+ int outcnt = 0, incnt = 0;
+
+ /*
+ * We don't support 0 length messages and so filter out
+ * 0 length transfers by using i2c_adapter_quirks.
+ */
+ if (!msgs[i].len)
+ break;
+
+ /*
+ * Only 7-bit mode supported for this moment. For the address
+ * format, Please check the Virtio I2C Specification.
+ */
+ reqs[i].out_hdr.addr = cpu_to_le16(msgs[i].addr << 1);
+
+ if (i != num - 1)
+ reqs[i].out_hdr.flags = cpu_to_le32(VIRTIO_I2C_FLAGS_FAIL_NEXT);
+
+ sg_init_one(&out_hdr, &reqs[i].out_hdr, sizeof(reqs[i].out_hdr));
+ sgs[outcnt++] = &out_hdr;
+
+ reqs[i].buf = i2c_get_dma_safe_msg_buf(&msgs[i], 1);
+ if (!reqs[i].buf)
+ break;
+
+ sg_init_one(&msg_buf, reqs[i].buf, msgs[i].len);
+
+ if (msgs[i].flags & I2C_M_RD)
+ sgs[outcnt + incnt++] = &msg_buf;
+ else
+ sgs[outcnt++] = &msg_buf;
+
+ sg_init_one(&in_hdr, &reqs[i].in_hdr, sizeof(reqs[i].in_hdr));
+ sgs[outcnt + incnt++] = &in_hdr;
+
+ if (virtqueue_add_sgs(vq, sgs, outcnt, incnt, &reqs[i], GFP_KERNEL)) {
+ i2c_put_dma_safe_msg_buf(reqs[i].buf, &msgs[i], false);
+ break;
+ }
+ }
+
+ return i;
+}
+
+static int virtio_i2c_complete_reqs(struct virtqueue *vq,
+ struct virtio_i2c_req *reqs,
+ struct i2c_msg *msgs, int num,
+ bool timedout)
+{
+ struct virtio_i2c_req *req;
+ bool failed = timedout;
+ unsigned int len;
+ int i, j = 0;
+
+ for (i = 0; i < num; i++) {
+ /* Detach the ith request from the vq */
+ req = virtqueue_get_buf(vq, &len);
+
+ /*
+ * Condition req == &reqs[i] should always meet since we have
+ * total num requests in the vq. reqs[i] can never be NULL here.
+ */
+ if (!failed && (WARN_ON(req != &reqs[i]) ||
+ req->in_hdr.status != VIRTIO_I2C_MSG_OK))
+ failed = true;
+
+ i2c_put_dma_safe_msg_buf(reqs[i].buf, &msgs[i], !failed);
+
+ if (!failed)
+ j++;
+ }
+
+ return timedout ? -ETIMEDOUT : j;
+}
+
+static int virtio_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+ int num)
+{
+ struct virtio_i2c *vi = i2c_get_adapdata(adap);
+ struct virtqueue *vq = vi->vq;
+ struct virtio_i2c_req *reqs;
+ unsigned long time_left;
+ int count;
+
+ reqs = kcalloc(num, sizeof(*reqs), GFP_KERNEL);
+ if (!reqs)
+ return -ENOMEM;
+
+ count = virtio_i2c_prepare_reqs(vq, reqs, msgs, num);
+ if (!count)
+ goto err_free;
+
+ /*
+ * For the case where count < num, i.e. we weren't able to queue all the
+ * msgs, ideally we should abort right away and return early, but some
+ * of the messages are already sent to the remote I2C controller and the
+ * virtqueue will be left in undefined state in that case. We kick the
+ * remote here to clear the virtqueue, so we can try another set of
+ * messages later on.
+ */
+
+ reinit_completion(&vi->completion);
+ virtqueue_kick(vq);
+
+ time_left = wait_for_completion_timeout(&vi->completion, adap->timeout);
+ if (!time_left)
+ dev_err(&adap->dev, "virtio i2c backend timeout.\n");
+
+ count = virtio_i2c_complete_reqs(vq, reqs, msgs, count, !time_left);
+
+err_free:
+ kfree(reqs);
+ return count;
+}
+
+static void virtio_i2c_del_vqs(struct virtio_device *vdev)
+{
+ vdev->config->reset(vdev);
+ vdev->config->del_vqs(vdev);
+}
+
+static int virtio_i2c_setup_vqs(struct virtio_i2c *vi)
+{
+ struct virtio_device *vdev = vi->vdev;
+
+ vi->vq = virtio_find_single_vq(vdev, virtio_i2c_msg_done, "msg");
+ return PTR_ERR_OR_ZERO(vi->vq);
+}
+
+static u32 virtio_i2c_func(struct i2c_adapter *adap)
+{
+ return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK);
+}
+
+static struct i2c_algorithm virtio_algorithm = {
+ .master_xfer = virtio_i2c_xfer,
+ .functionality = virtio_i2c_func,
+};
+
+static const struct i2c_adapter_quirks virtio_i2c_quirks = {
+ .flags = I2C_AQ_NO_ZERO_LEN,
+};
+
+static int virtio_i2c_probe(struct virtio_device *vdev)
+{
+ struct virtio_i2c *vi;
+ int ret;
+
+ vi = devm_kzalloc(&vdev->dev, sizeof(*vi), GFP_KERNEL);
+ if (!vi)
+ return -ENOMEM;
+
+ vdev->priv = vi;
+ vi->vdev = vdev;
+
+ init_completion(&vi->completion);
+
+ ret = virtio_i2c_setup_vqs(vi);
+ if (ret)
+ return ret;
+
+ vi->adap.owner = THIS_MODULE;
+ snprintf(vi->adap.name, sizeof(vi->adap.name),
+ "i2c_virtio at virtio bus %d", vdev->index);
+ vi->adap.algo = &virtio_algorithm;
+ vi->adap.quirks = &virtio_i2c_quirks;
+ vi->adap.dev.parent = &vdev->dev;
+ vi->adap.dev.of_node = vdev->dev.of_node;
+ i2c_set_adapdata(&vi->adap, vi);
+
+ /*
+ * Setup ACPI node for controlled devices which will be probed through
+ * ACPI.
+ */
+ ACPI_COMPANION_SET(&vi->adap.dev, ACPI_COMPANION(vdev->dev.parent));
+
+ ret = i2c_add_adapter(&vi->adap);
+ if (ret)
+ virtio_i2c_del_vqs(vdev);
+
+ return ret;
+}
+
+static void virtio_i2c_remove(struct virtio_device *vdev)
+{
+ struct virtio_i2c *vi = vdev->priv;
+
+ i2c_del_adapter(&vi->adap);
+ virtio_i2c_del_vqs(vdev);
+}
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_I2C_ADAPTER, VIRTIO_DEV_ANY_ID },
+ {}
+};
+MODULE_DEVICE_TABLE(virtio, id_table);
+
+#ifdef CONFIG_PM_SLEEP
+static int virtio_i2c_freeze(struct virtio_device *vdev)
+{
+ virtio_i2c_del_vqs(vdev);
+ return 0;
+}
+
+static int virtio_i2c_restore(struct virtio_device *vdev)
+{
+ return virtio_i2c_setup_vqs(vdev->priv);
+}
+#endif
+
+static struct virtio_driver virtio_i2c_driver = {
+ .id_table = id_table,
+ .probe = virtio_i2c_probe,
+ .remove = virtio_i2c_remove,
+ .driver = {
+ .name = "i2c_virtio",
+ },
+#ifdef CONFIG_PM_SLEEP
+ .freeze = virtio_i2c_freeze,
+ .restore = virtio_i2c_restore,
+#endif
+};
+module_virtio_driver(virtio_i2c_driver);
+
+MODULE_AUTHOR("Jie Deng <jie.deng@intel.com>");
+MODULE_AUTHOR("Conghui Chen <conghui.chen@intel.com>");
+MODULE_DESCRIPTION("Virtio i2c bus driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/i2c/busses/i2c-xlp9xx.c b/drivers/i2c/busses/i2c-xlp9xx.c
index f2241cedf5d3..6d24dc385522 100644
--- a/drivers/i2c/busses/i2c-xlp9xx.c
+++ b/drivers/i2c/busses/i2c-xlp9xx.c
@@ -517,7 +517,7 @@ static int xlp9xx_i2c_probe(struct platform_device *pdev)
return PTR_ERR(priv->base);
priv->irq = platform_get_irq(pdev, 0);
- if (priv->irq <= 0)
+ if (priv->irq < 0)
return priv->irq;
/* SMBAlert irq */
priv->alert_data.irq = platform_get_irq(pdev, 1);
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index cb64fe649390..bce0e8bb7852 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -14,6 +14,8 @@
/* The I2C_RDWR ioctl code is written by Kolja Waschk <waschk@telos.de> */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/cdev.h>
#include <linux/compat.h>
#include <linux/device.h>
@@ -68,8 +70,7 @@ static struct i2c_dev *get_free_i2c_dev(struct i2c_adapter *adap)
struct i2c_dev *i2c_dev;
if (adap->nr >= I2C_MINORS) {
- printk(KERN_ERR "i2c-dev: Out of device minors (%d)\n",
- adap->nr);
+ pr_err("Out of device minors (%d)\n", adap->nr);
return ERR_PTR(-ENODEV);
}
@@ -101,7 +102,7 @@ static ssize_t name_show(struct device *dev,
if (!i2c_dev)
return -ENODEV;
- return sprintf(buf, "%s\n", i2c_dev->adap->name);
+ return sysfs_emit(buf, "%s\n", i2c_dev->adap->name);
}
static DEVICE_ATTR_RO(name);
@@ -141,16 +142,16 @@ static ssize_t i2cdev_read(struct file *file, char __user *buf, size_t count,
if (count > 8192)
count = 8192;
- tmp = kmalloc(count, GFP_KERNEL);
+ tmp = kzalloc(count, GFP_KERNEL);
if (tmp == NULL)
return -ENOMEM;
- pr_debug("i2c-dev: i2c-%d reading %zu bytes.\n",
- iminor(file_inode(file)), count);
+ pr_debug("i2c-%d reading %zu bytes.\n", iminor(file_inode(file)), count);
ret = i2c_master_recv(client, tmp, count);
if (ret >= 0)
- ret = copy_to_user(buf, tmp, count) ? -EFAULT : ret;
+ if (copy_to_user(buf, tmp, ret))
+ ret = -EFAULT;
kfree(tmp);
return ret;
}
@@ -169,8 +170,7 @@ static ssize_t i2cdev_write(struct file *file, const char __user *buf,
if (IS_ERR(tmp))
return PTR_ERR(tmp);
- pr_debug("i2c-dev: i2c-%d writing %zu bytes.\n",
- iminor(file_inode(file)), count);
+ pr_debug("i2c-%d writing %zu bytes.\n", iminor(file_inode(file)), count);
ret = i2c_master_send(client, tmp, count);
kfree(tmp);
@@ -673,8 +673,7 @@ static int i2cdev_attach_adapter(struct device *dev, void *dummy)
return res;
}
- pr_debug("i2c-dev: adapter [%s] registered as minor %d\n",
- adap->name, adap->nr);
+ pr_debug("adapter [%s] registered as minor %d\n", adap->name, adap->nr);
return 0;
}
@@ -693,7 +692,7 @@ static int i2cdev_detach_adapter(struct device *dev, void *dummy)
put_i2c_dev(i2c_dev, true);
- pr_debug("i2c-dev: adapter [%s] unregistered\n", adap->name);
+ pr_debug("adapter [%s] unregistered\n", adap->name);
return 0;
}
@@ -726,7 +725,7 @@ static int __init i2c_dev_init(void)
{
int res;
- printk(KERN_INFO "i2c /dev entries driver\n");
+ pr_info("i2c /dev entries driver\n");
res = register_chrdev_region(MKDEV(I2C_MAJOR, 0), I2C_MINORS, "i2c");
if (res)
@@ -754,7 +753,7 @@ out_unreg_class:
out_unreg_chrdev:
unregister_chrdev_region(MKDEV(I2C_MAJOR, 0), I2C_MINORS);
out:
- printk(KERN_ERR "%s: Driver Initialisation failed\n", __FILE__);
+ pr_err("Driver Initialisation failed\n");
return res;
}
diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig
index 0e56ace61103..8d8b1ba42ff8 100644
--- a/drivers/iio/accel/Kconfig
+++ b/drivers/iio/accel/Kconfig
@@ -231,6 +231,7 @@ config DMARD10
config FXLS8962AF
tristate
+ depends on I2C || !I2C # cannot be built-in for modular I2C
config FXLS8962AF_I2C
tristate "NXP FXLS8962AF/FXLS8964AF Accelerometer I2C Driver"
@@ -247,6 +248,7 @@ config FXLS8962AF_I2C
config FXLS8962AF_SPI
tristate "NXP FXLS8962AF/FXLS8964AF Accelerometer SPI Driver"
depends on SPI
+ depends on I2C || !I2C
select FXLS8962AF
select REGMAP_SPI
help
diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c
index 078d87865fde..0019f1ea7df2 100644
--- a/drivers/iio/accel/fxls8962af-core.c
+++ b/drivers/iio/accel/fxls8962af-core.c
@@ -637,7 +637,7 @@ static int fxls8962af_i2c_raw_read_errata3(struct fxls8962af_data *data,
return ret;
}
- return ret;
+ return 0;
}
static int fxls8962af_fifo_transfer(struct fxls8962af_data *data,
diff --git a/drivers/iio/adc/palmas_gpadc.c b/drivers/iio/adc/palmas_gpadc.c
index 6ef09609be9f..f9c8385c72d3 100644
--- a/drivers/iio/adc/palmas_gpadc.c
+++ b/drivers/iio/adc/palmas_gpadc.c
@@ -664,8 +664,8 @@ static int palmas_adc_wakeup_configure(struct palmas_gpadc *adc)
adc_period = adc->auto_conversion_period;
for (i = 0; i < 16; ++i) {
- if (((1000 * (1 << i)) / 32) < adc_period)
- continue;
+ if (((1000 * (1 << i)) / 32) >= adc_period)
+ break;
}
if (i > 0)
i--;
diff --git a/drivers/iio/adc/rn5t618-adc.c b/drivers/iio/adc/rn5t618-adc.c
index 7010c4276947..c56fccb2c8e1 100644
--- a/drivers/iio/adc/rn5t618-adc.c
+++ b/drivers/iio/adc/rn5t618-adc.c
@@ -16,6 +16,8 @@
#include <linux/completion.h>
#include <linux/regmap.h>
#include <linux/iio/iio.h>
+#include <linux/iio/driver.h>
+#include <linux/iio/machine.h>
#include <linux/slab.h>
#define RN5T618_ADC_CONVERSION_TIMEOUT (msecs_to_jiffies(500))
@@ -189,6 +191,19 @@ static const struct iio_chan_spec rn5t618_adc_iio_channels[] = {
RN5T618_ADC_CHANNEL(AIN0, IIO_VOLTAGE, "AIN0")
};
+static struct iio_map rn5t618_maps[] = {
+ IIO_MAP("VADP", "rn5t618-power", "vadp"),
+ IIO_MAP("VUSB", "rn5t618-power", "vusb"),
+ { /* sentinel */ }
+};
+
+static void unregister_map(void *data)
+{
+ struct iio_dev *iio_dev = (struct iio_dev *) data;
+
+ iio_map_array_unregister(iio_dev);
+}
+
static int rn5t618_adc_probe(struct platform_device *pdev)
{
int ret;
@@ -239,6 +254,14 @@ static int rn5t618_adc_probe(struct platform_device *pdev)
return ret;
}
+ ret = iio_map_array_register(iio_dev, rn5t618_maps);
+ if (ret < 0)
+ return ret;
+
+ ret = devm_add_action_or_reset(adc->dev, unregister_map, iio_dev);
+ if (ret < 0)
+ return ret;
+
return devm_iio_device_register(adc->dev, iio_dev);
}
diff --git a/drivers/iio/adc/ti-ads7950.c b/drivers/iio/adc/ti-ads7950.c
index 2383eacada87..a2b83f0bd526 100644
--- a/drivers/iio/adc/ti-ads7950.c
+++ b/drivers/iio/adc/ti-ads7950.c
@@ -568,7 +568,6 @@ static int ti_ads7950_probe(struct spi_device *spi)
st->ring_xfer.tx_buf = &st->tx_buf[0];
st->ring_xfer.rx_buf = &st->rx_buf[0];
/* len will be set later */
- st->ring_xfer.cs_change = true;
spi_message_add_tail(&st->ring_xfer, &st->ring_msg);
diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c
index 2a957f19048e..9e0fce917ce4 100644
--- a/drivers/iio/humidity/hdc100x.c
+++ b/drivers/iio/humidity/hdc100x.c
@@ -25,6 +25,8 @@
#include <linux/iio/trigger_consumer.h>
#include <linux/iio/triggered_buffer.h>
+#include <linux/time.h>
+
#define HDC100X_REG_TEMP 0x00
#define HDC100X_REG_HUMIDITY 0x01
@@ -166,7 +168,7 @@ static int hdc100x_get_measurement(struct hdc100x_data *data,
struct iio_chan_spec const *chan)
{
struct i2c_client *client = data->client;
- int delay = data->adc_int_us[chan->address];
+ int delay = data->adc_int_us[chan->address] + 1*USEC_PER_MSEC;
int ret;
__be16 val;
@@ -316,7 +318,7 @@ static irqreturn_t hdc100x_trigger_handler(int irq, void *p)
struct iio_dev *indio_dev = pf->indio_dev;
struct hdc100x_data *data = iio_priv(indio_dev);
struct i2c_client *client = data->client;
- int delay = data->adc_int_us[0] + data->adc_int_us[1];
+ int delay = data->adc_int_us[0] + data->adc_int_us[1] + 2*USEC_PER_MSEC;
int ret;
/* dual read starts at temp register */
diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c
index a5b421f42287..b9a06ca29bee 100644
--- a/drivers/iio/imu/adis.c
+++ b/drivers/iio/imu/adis.c
@@ -411,12 +411,11 @@ int __adis_initial_startup(struct adis *adis)
int ret;
/* check if the device has rst pin low */
- gpio = devm_gpiod_get_optional(&adis->spi->dev, "reset", GPIOD_ASIS);
+ gpio = devm_gpiod_get_optional(&adis->spi->dev, "reset", GPIOD_OUT_HIGH);
if (IS_ERR(gpio))
return PTR_ERR(gpio);
if (gpio) {
- gpiod_set_value_cansleep(gpio, 1);
msleep(10);
/* bring device out of reset */
gpiod_set_value_cansleep(gpio, 0);
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index f782d5e1aa25..03e1db5d1e8c 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -249,6 +249,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_REG_DMABUF_MR)(
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&mr->res, NULL);
+ rdma_restrack_add(&mr->res);
uobj->object = mr;
uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DMABUF_MR_HANDLE);
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 283b6b81563c..ea0054c60fbc 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -1681,6 +1681,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
if (nq)
nq->budget++;
atomic_inc(&rdev->srq_count);
+ spin_lock_init(&srq->lock);
return 0;
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index a8688a92c760..4678bd6ec7d6 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1397,7 +1397,6 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode)
memset(&rattr, 0, sizeof(rattr));
rc = bnxt_re_register_netdev(rdev);
if (rc) {
- rtnl_unlock();
ibdev_err(&rdev->ibdev,
"Failed to register with netedev: %#x\n", rc);
return -EINVAL;
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 203e6ddcacbc..be4a07bd268a 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -357,6 +357,7 @@ static int efa_enable_msix(struct efa_dev *dev)
}
if (irq_num != msix_vecs) {
+ efa_disable_msix(dev);
dev_err(&dev->pdev->dev,
"Allocated %d MSI-X (out of %d requested)\n",
irq_num, msix_vecs);
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index eb15c310d63d..e83dc562629e 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -3055,6 +3055,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
{
int i;
+ struct sdma_desc *descp;
/* Handle last descriptor */
if (unlikely((tx->num_desc == (MAX_DESC - 1)))) {
@@ -3075,12 +3076,10 @@ static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
if (unlikely(tx->num_desc == MAX_DESC))
goto enomem;
- tx->descp = kmalloc_array(
- MAX_DESC,
- sizeof(struct sdma_desc),
- GFP_ATOMIC);
- if (!tx->descp)
+ descp = kmalloc_array(MAX_DESC, sizeof(struct sdma_desc), GFP_ATOMIC);
+ if (!descp)
goto enomem;
+ tx->descp = descp;
/* reserve last descriptor for coalescing */
tx->desc_limit = MAX_DESC - 1;
diff --git a/drivers/infiniband/hw/irdma/Kconfig b/drivers/infiniband/hw/irdma/Kconfig
index dab88286d549..b6f9c41bca51 100644
--- a/drivers/infiniband/hw/irdma/Kconfig
+++ b/drivers/infiniband/hw/irdma/Kconfig
@@ -6,7 +6,7 @@ config INFINIBAND_IRDMA
depends on PCI
depends on ICE && I40E
select GENERIC_ALLOCATOR
- select CONFIG_AUXILIARY_BUS
+ select AUXILIARY_BUS
help
This is an Intel(R) Ethernet Protocol Driver for RDMA driver
that support E810 (iWARP/RoCE) and X722 (iWARP) network devices.
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 7abeb576b3c5..a190fb581591 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -945,7 +945,6 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
u32 *cqb = NULL;
void *cqc;
int cqe_size;
- unsigned int irqn;
int eqn;
int err;
@@ -984,7 +983,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
}
- err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
+ err = mlx5_vector2eqn(dev->mdev, vector, &eqn);
if (err)
goto err_cqb;
@@ -997,7 +996,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
MLX5_SET(cqc, cqc, uar_page, index);
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
MLX5_SET(cqc, cqc, oi, 1);
@@ -1007,7 +1006,6 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
goto err_cqb;
mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
- cq->mcq.irqn = irqn;
if (udata)
cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp;
else
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index eb9b0a2707f8..e95967aefe78 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -975,7 +975,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
struct mlx5_ib_dev *dev;
int user_vector;
int dev_eqn;
- unsigned int irqn;
int err;
if (uverbs_copy_from(&user_vector, attrs,
@@ -987,7 +986,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
return PTR_ERR(c);
dev = to_mdev(c->ibucontext.device);
- err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn);
+ err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn);
if (err < 0)
return err;
@@ -1437,11 +1436,10 @@ out:
rcu_read_unlock();
}
-static bool is_apu_thread_cq(struct mlx5_ib_dev *dev, const void *in)
+static bool is_apu_cq(struct mlx5_ib_dev *dev, const void *in)
{
if (!MLX5_CAP_GEN(dev->mdev, apu) ||
- !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
- apu_thread_cq))
+ !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), apu_cq))
return false;
return true;
@@ -1501,7 +1499,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in,
cmd_in_len, cmd_out, cmd_out_len);
} else if (opcode == MLX5_CMD_OP_CREATE_CQ &&
- !is_apu_thread_cq(dev, cmd_in)) {
+ !is_apu_cq(dev, cmd_in)) {
obj->flags |= DEVX_OBJ_FLAGS_CQ;
obj->core_cq.comp = devx_cq_comp;
err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index b25e0b33a11a..52821485371a 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -8,13 +8,15 @@
#include "srq.h"
static int
-mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
+ struct mlx5_eswitch_rep *rep,
+ int vport_index)
{
struct mlx5_ib_dev *ibdev;
- int vport_index;
ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
- vport_index = rep->vport_index;
+ if (!ibdev)
+ return -EINVAL;
ibdev->port[vport_index].rep = rep;
rep->rep_data[REP_IB].priv = ibdev;
@@ -26,19 +28,39 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
return 0;
}
+static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
+
static int
mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
u32 num_ports = mlx5_eswitch_get_total_vports(dev);
const struct mlx5_ib_profile *profile;
+ struct mlx5_core_dev *peer_dev;
struct mlx5_ib_dev *ibdev;
+ u32 peer_num_ports;
int vport_index;
int ret;
+ vport_index = rep->vport_index;
+
+ if (mlx5_lag_is_shared_fdb(dev)) {
+ peer_dev = mlx5_lag_get_peer_mdev(dev);
+ peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
+ if (mlx5_lag_is_master(dev)) {
+ /* Only 1 ib port is the representor for both uplinks */
+ num_ports += peer_num_ports - 1;
+ } else {
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ return 0;
+ vport_index += peer_num_ports;
+ dev = peer_dev;
+ }
+ }
+
if (rep->vport == MLX5_VPORT_UPLINK)
profile = &raw_eth_profile;
else
- return mlx5_ib_set_vport_rep(dev, rep);
+ return mlx5_ib_set_vport_rep(dev, rep, vport_index);
ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
if (!ibdev)
@@ -64,6 +86,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
goto fail_add;
rep->rep_data[REP_IB].priv = ibdev;
+ if (mlx5_lag_is_shared_fdb(dev))
+ mlx5_ib_register_peer_vport_reps(dev);
return 0;
@@ -82,18 +106,45 @@ static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
static void
mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
{
+ struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw);
struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
+ int vport_index = rep->vport_index;
struct mlx5_ib_port *port;
- port = &dev->port[rep->vport_index];
+ if (WARN_ON(!mdev))
+ return;
+
+ if (mlx5_lag_is_shared_fdb(mdev) &&
+ !mlx5_lag_is_master(mdev)) {
+ struct mlx5_core_dev *peer_mdev;
+
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ return;
+ peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+ vport_index += mlx5_eswitch_get_total_vports(peer_mdev);
+ }
+
+ if (!dev)
+ return;
+
+ port = &dev->port[vport_index];
write_lock(&port->roce.netdev_lock);
port->roce.netdev = NULL;
write_unlock(&port->roce.netdev_lock);
rep->rep_data[REP_IB].priv = NULL;
port->rep = NULL;
- if (rep->vport == MLX5_VPORT_UPLINK)
+ if (rep->vport == MLX5_VPORT_UPLINK) {
+ struct mlx5_core_dev *peer_mdev;
+ struct mlx5_eswitch *esw;
+
+ if (mlx5_lag_is_shared_fdb(mdev)) {
+ peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+ esw = peer_mdev->priv.eswitch;
+ mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+ }
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+ }
}
static const struct mlx5_eswitch_rep_ops rep_ops = {
@@ -102,6 +153,18 @@ static const struct mlx5_eswitch_rep_ops rep_ops = {
.get_proto_dev = mlx5_ib_rep_to_dev,
};
+static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+ struct mlx5_eswitch *esw;
+
+ if (!peer_mdev)
+ return;
+
+ esw = peer_mdev->priv.eswitch;
+ mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+}
+
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
u16 vport_num)
{
@@ -123,7 +186,7 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
rep = dev->port[port - 1].rep;
- return mlx5_eswitch_add_send_to_vport_rule(esw, rep, sq->base.mqp.qpn);
+ return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn);
}
static int mlx5r_rep_probe(struct auxiliary_device *adev,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 094c976b1eed..466f0a521940 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -126,6 +126,7 @@ static int get_port_state(struct ib_device *ibdev,
static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
struct net_device *ndev,
+ struct net_device *upper,
u32 *port_num)
{
struct net_device *rep_ndev;
@@ -137,6 +138,14 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
if (!port->rep)
continue;
+ if (upper == ndev && port->rep->vport == MLX5_VPORT_UPLINK) {
+ *port_num = i + 1;
+ return &port->roce;
+ }
+
+ if (upper && port->rep->vport == MLX5_VPORT_UPLINK)
+ continue;
+
read_lock(&port->roce.netdev_lock);
rep_ndev = mlx5_ib_get_rep_netdev(port->rep->esw,
port->rep->vport);
@@ -196,11 +205,12 @@ static int mlx5_netdev_event(struct notifier_block *this,
}
if (ibdev->is_rep)
- roce = mlx5_get_rep_roce(ibdev, ndev, &port_num);
+ roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num);
if (!roce)
return NOTIFY_DONE;
- if ((upper == ndev || (!upper && ndev == roce->netdev))
- && ibdev->ib_active) {
+ if ((upper == ndev ||
+ ((!upper || ibdev->is_rep) && ndev == roce->netdev)) &&
+ ibdev->ib_active) {
struct ib_event ibev = { };
enum ib_port_state port_state;
@@ -3012,7 +3022,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
struct mlx5_flow_table *ft;
int err;
- if (!ns || !mlx5_lag_is_roce(mdev))
+ if (!ns || !mlx5_lag_is_active(mdev))
return 0;
err = mlx5_cmd_create_vport_lag(mdev);
@@ -3074,9 +3084,11 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
{
int err;
- err = mlx5_nic_vport_enable_roce(dev->mdev);
- if (err)
- return err;
+ if (!dev->is_rep && dev->profile != &raw_eth_profile) {
+ err = mlx5_nic_vport_enable_roce(dev->mdev);
+ if (err)
+ return err;
+ }
err = mlx5_eth_lag_init(dev);
if (err)
@@ -3085,7 +3097,8 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
return 0;
err_disable_roce:
- mlx5_nic_vport_disable_roce(dev->mdev);
+ if (!dev->is_rep && dev->profile != &raw_eth_profile)
+ mlx5_nic_vport_disable_roce(dev->mdev);
return err;
}
@@ -3093,7 +3106,8 @@ err_disable_roce:
static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
{
mlx5_eth_lag_cleanup(dev);
- mlx5_nic_vport_disable_roce(dev->mdev);
+ if (!dev->is_rep && dev->profile != &raw_eth_profile)
+ mlx5_nic_vport_disable_roce(dev->mdev);
}
static int mlx5_ib_rn_get_params(struct ib_device *device, u32 port_num,
@@ -3950,12 +3964,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
/* Register only for native ports */
err = mlx5_add_netdev_notifier(dev, port_num);
- if (err || dev->is_rep || !mlx5_is_roce_init_enabled(mdev))
- /*
- * We don't enable ETH interface for
- * 1. IB representors
- * 2. User disabled ROCE through devlink interface
- */
+ if (err)
return err;
err = mlx5_enable_eth(dev);
@@ -3980,8 +3989,7 @@ static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- if (!dev->is_rep)
- mlx5_disable_eth(dev);
+ mlx5_disable_eth(dev);
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
mlx5_remove_netdev_notifier(dev, port_num);
@@ -4037,7 +4045,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
const char *name;
- if (!mlx5_lag_is_roce(dev->mdev))
+ if (!mlx5_lag_is_active(dev->mdev))
name = "mlx5_%d";
else
name = "mlx5_bond_%d";
@@ -4454,7 +4462,8 @@ static void mlx5r_mp_remove(struct auxiliary_device *adev)
mutex_lock(&mlx5_ib_multiport_mutex);
if (mpi->ibdev)
mlx5_ib_unbind_slave_port(mpi->ibdev, mpi);
- list_del(&mpi->list);
+ else
+ list_del(&mpi->list);
mutex_unlock(&mlx5_ib_multiport_mutex);
kfree(mpi);
}
diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c
index c0ddf7b3c6e2..bbfcce3bdc84 100644
--- a/drivers/infiniband/hw/mlx5/std_types.c
+++ b/drivers/infiniband/hw/mlx5/std_types.c
@@ -114,14 +114,18 @@ out:
static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num,
struct mlx5_ib_uapi_query_port *info)
{
- struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_eswitch_rep *rep;
+ struct mlx5_core_dev *mdev;
int err;
rep = dev->port[port_num - 1].rep;
if (!rep)
return -EOPNOTSUPP;
+ mdev = mlx5_eswitch_get_core_dev(rep->esw);
+ if (!mdev)
+ return -EINVAL;
+
info->vport = rep->vport;
info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT;
@@ -138,9 +142,9 @@ static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num,
if (err)
return err;
- if (mlx5_eswitch_vport_match_metadata_enabled(mdev->priv.eswitch)) {
+ if (mlx5_eswitch_vport_match_metadata_enabled(rep->esw)) {
info->reg_c0.value = mlx5_eswitch_get_vport_metadata_for_match(
- mdev->priv.eswitch, rep->vport);
+ rep->esw, rep->vport);
info->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask();
info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_REG_C0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index 0ea9a5aa4ec0..1c1d1b53312d 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -85,7 +85,7 @@ int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
goto out;
}
- elem = rxe_alloc(&rxe->mc_elem_pool);
+ elem = rxe_alloc_locked(&rxe->mc_elem_pool);
if (!elem) {
err = -ENOMEM;
goto out;
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c
index 85b812586ed4..72d95398e604 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.c
+++ b/drivers/infiniband/sw/rxe/rxe_queue.c
@@ -63,7 +63,7 @@ struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem,
if (*num_elem < 0)
goto err1;
- q = kmalloc(sizeof(*q), GFP_KERNEL);
+ q = kzalloc(sizeof(*q), GFP_KERNEL);
if (!q)
goto err1;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 823f6831e7ea..a09ca21f7dff 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -72,7 +72,9 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
}
static int ipoib_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -83,7 +85,9 @@ static int ipoib_get_coalesce(struct net_device *dev,
}
static int ipoib_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index abf60f4d9203..0aa8629fdf62 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1745,10 +1745,10 @@ static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr,
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- if (!priv->rn_ops->ndo_do_ioctl)
+ if (!priv->rn_ops->ndo_eth_ioctl)
return -EOPNOTSUPP;
- return priv->rn_ops->ndo_do_ioctl(dev, ifr, cmd);
+ return priv->rn_ops->ndo_eth_ioctl(dev, ifr, cmd);
}
static int ipoib_dev_init(struct net_device *dev)
@@ -2078,7 +2078,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
.ndo_set_vf_guid = ipoib_set_vf_guid,
.ndo_set_mac_address = ipoib_set_mac,
.ndo_get_stats64 = ipoib_get_stats,
- .ndo_do_ioctl = ipoib_ioctl,
+ .ndo_eth_ioctl = ipoib_ioctl,
};
static const struct net_device_ops ipoib_netdev_ops_vf = {
@@ -2093,7 +2093,7 @@ static const struct net_device_ops ipoib_netdev_ops_vf = {
.ndo_set_rx_mode = ipoib_set_mcast_list,
.ndo_get_iflink = ipoib_get_iflink,
.ndo_get_stats64 = ipoib_get_stats,
- .ndo_do_ioctl = ipoib_ioctl,
+ .ndo_eth_ioctl = ipoib_ioctl,
};
static const struct net_device_ops ipoib_netdev_default_pf = {
diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 7887941730db..9050ca1f4285 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -959,6 +959,9 @@ EXPORT_SYMBOL_GPL(icc_link_destroy);
*/
void icc_node_add(struct icc_node *node, struct icc_provider *provider)
{
+ if (WARN_ON(node->provider))
+ return;
+
mutex_lock(&icc_lock);
node->provider = provider;
diff --git a/drivers/interconnect/qcom/Kconfig b/drivers/interconnect/qcom/Kconfig
index 0d7a2500d0b8..daf1e25f6042 100644
--- a/drivers/interconnect/qcom/Kconfig
+++ b/drivers/interconnect/qcom/Kconfig
@@ -83,6 +83,15 @@ config INTERCONNECT_QCOM_SC7280
This is a driver for the Qualcomm Network-on-Chip on sc7280-based
platforms.
+config INTERCONNECT_QCOM_SC8180X
+ tristate "Qualcomm SC8180X interconnect driver"
+ depends on INTERCONNECT_QCOM_RPMH_POSSIBLE
+ select INTERCONNECT_QCOM_RPMH
+ select INTERCONNECT_QCOM_BCM_VOTER
+ help
+ This is a driver for the Qualcomm Network-on-Chip on sc8180x-based
+ platforms.
+
config INTERCONNECT_QCOM_SDM660
tristate "Qualcomm SDM660 interconnect driver"
depends on INTERCONNECT_QCOM
diff --git a/drivers/interconnect/qcom/Makefile b/drivers/interconnect/qcom/Makefile
index 2880129a6fe4..69300b1d48ef 100644
--- a/drivers/interconnect/qcom/Makefile
+++ b/drivers/interconnect/qcom/Makefile
@@ -9,6 +9,7 @@ qnoc-qcs404-objs := qcs404.o
icc-rpmh-obj := icc-rpmh.o
qnoc-sc7180-objs := sc7180.o
qnoc-sc7280-objs := sc7280.o
+qnoc-sc8180x-objs := sc8180x.o
qnoc-sdm660-objs := sdm660.o
qnoc-sdm845-objs := sdm845.o
qnoc-sdx55-objs := sdx55.o
@@ -26,6 +27,7 @@ obj-$(CONFIG_INTERCONNECT_QCOM_QCS404) += qnoc-qcs404.o
obj-$(CONFIG_INTERCONNECT_QCOM_RPMH) += icc-rpmh.o
obj-$(CONFIG_INTERCONNECT_QCOM_SC7180) += qnoc-sc7180.o
obj-$(CONFIG_INTERCONNECT_QCOM_SC7280) += qnoc-sc7280.o
+obj-$(CONFIG_INTERCONNECT_QCOM_SC8180X) += qnoc-sc8180x.o
obj-$(CONFIG_INTERCONNECT_QCOM_SDM660) += qnoc-sdm660.o
obj-$(CONFIG_INTERCONNECT_QCOM_SDM845) += qnoc-sdm845.o
obj-$(CONFIG_INTERCONNECT_QCOM_SDX55) += qnoc-sdx55.o
diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c
index 27cc5f03611c..3eb7936d2cf6 100644
--- a/drivers/interconnect/qcom/icc-rpmh.c
+++ b/drivers/interconnect/qcom/icc-rpmh.c
@@ -7,6 +7,7 @@
#include <linux/interconnect-provider.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/of_device.h>
#include <linux/slab.h>
#include "bcm-voter.h"
@@ -20,18 +21,13 @@ void qcom_icc_pre_aggregate(struct icc_node *node)
{
size_t i;
struct qcom_icc_node *qn;
- struct qcom_icc_provider *qp;
qn = node->data;
- qp = to_qcom_provider(node->provider);
for (i = 0; i < QCOM_ICC_NUM_BUCKETS; i++) {
qn->sum_avg[i] = 0;
qn->max_peak[i] = 0;
}
-
- for (i = 0; i < qn->num_bcms; i++)
- qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]);
}
EXPORT_SYMBOL_GPL(qcom_icc_pre_aggregate);
@@ -49,8 +45,10 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
{
size_t i;
struct qcom_icc_node *qn;
+ struct qcom_icc_provider *qp;
qn = node->data;
+ qp = to_qcom_provider(node->provider);
if (!tag)
tag = QCOM_ICC_TAG_ALWAYS;
@@ -70,6 +68,9 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
*agg_avg += avg_bw;
*agg_peak = max_t(u32, *agg_peak, peak_bw);
+ for (i = 0; i < qn->num_bcms; i++)
+ qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]);
+
return 0;
}
EXPORT_SYMBOL_GPL(qcom_icc_aggregate);
@@ -182,4 +183,96 @@ int qcom_icc_bcm_init(struct qcom_icc_bcm *bcm, struct device *dev)
}
EXPORT_SYMBOL_GPL(qcom_icc_bcm_init);
+int qcom_icc_rpmh_probe(struct platform_device *pdev)
+{
+ const struct qcom_icc_desc *desc;
+ struct device *dev = &pdev->dev;
+ struct icc_onecell_data *data;
+ struct icc_provider *provider;
+ struct qcom_icc_node **qnodes, *qn;
+ struct qcom_icc_provider *qp;
+ struct icc_node *node;
+ size_t num_nodes, i, j;
+ int ret;
+
+ desc = of_device_get_match_data(dev);
+ if (!desc)
+ return -EINVAL;
+
+ qnodes = desc->nodes;
+ num_nodes = desc->num_nodes;
+
+ qp = devm_kzalloc(dev, sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return -ENOMEM;
+
+ data = devm_kzalloc(dev, struct_size(data, nodes, num_nodes), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ provider = &qp->provider;
+ provider->dev = dev;
+ provider->set = qcom_icc_set;
+ provider->pre_aggregate = qcom_icc_pre_aggregate;
+ provider->aggregate = qcom_icc_aggregate;
+ provider->xlate_extended = qcom_icc_xlate_extended;
+ INIT_LIST_HEAD(&provider->nodes);
+ provider->data = data;
+
+ qp->dev = dev;
+ qp->bcms = desc->bcms;
+ qp->num_bcms = desc->num_bcms;
+
+ qp->voter = of_bcm_voter_get(qp->dev, NULL);
+ if (IS_ERR(qp->voter))
+ return PTR_ERR(qp->voter);
+
+ ret = icc_provider_add(provider);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < qp->num_bcms; i++)
+ qcom_icc_bcm_init(qp->bcms[i], dev);
+
+ for (i = 0; i < num_nodes; i++) {
+ qn = qnodes[i];
+ if (!qn)
+ continue;
+
+ node = icc_node_create(qn->id);
+ if (IS_ERR(node)) {
+ ret = PTR_ERR(node);
+ goto err;
+ }
+
+ node->name = qn->name;
+ node->data = qn;
+ icc_node_add(node, provider);
+
+ for (j = 0; j < qn->num_links; j++)
+ icc_link_create(node, qn->links[j]);
+
+ data->nodes[i] = node;
+ }
+
+ data->num_nodes = num_nodes;
+ platform_set_drvdata(pdev, qp);
+
+ return 0;
+err:
+ icc_nodes_remove(provider);
+ icc_provider_del(provider);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(qcom_icc_rpmh_probe);
+
+int qcom_icc_rpmh_remove(struct platform_device *pdev)
+{
+ struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
+
+ icc_nodes_remove(&qp->provider);
+ return icc_provider_del(&qp->provider);
+}
+EXPORT_SYMBOL_GPL(qcom_icc_rpmh_remove);
+
MODULE_LICENSE("GPL v2");
diff --git a/drivers/interconnect/qcom/icc-rpmh.h b/drivers/interconnect/qcom/icc-rpmh.h
index e5f61ab989e7..4bfc060529ba 100644
--- a/drivers/interconnect/qcom/icc-rpmh.h
+++ b/drivers/interconnect/qcom/icc-rpmh.h
@@ -134,5 +134,7 @@ int qcom_icc_set(struct icc_node *src, struct icc_node *dst);
struct icc_node_data *qcom_icc_xlate_extended(struct of_phandle_args *spec, void *data);
int qcom_icc_bcm_init(struct qcom_icc_bcm *bcm, struct device *dev);
void qcom_icc_pre_aggregate(struct icc_node *node);
+int qcom_icc_rpmh_probe(struct platform_device *pdev);
+int qcom_icc_rpmh_remove(struct platform_device *pdev);
#endif
diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c
index 695f28789e98..c7af143980de 100644
--- a/drivers/interconnect/qcom/osm-l3.c
+++ b/drivers/interconnect/qcom/osm-l3.c
@@ -15,6 +15,7 @@
#include <dt-bindings/interconnect/qcom,osm-l3.h>
#include "sc7180.h"
+#include "sc8180x.h"
#include "sdm845.h"
#include "sm8150.h"
#include "sm8250.h"
@@ -37,7 +38,7 @@
#define OSM_L3_MAX_LINKS 1
-#define to_qcom_provider(_provider) \
+#define to_osm_l3_provider(_provider) \
container_of(_provider, struct qcom_osm_l3_icc_provider, provider)
struct qcom_osm_l3_icc_provider {
@@ -49,14 +50,14 @@ struct qcom_osm_l3_icc_provider {
};
/**
- * struct qcom_icc_node - Qualcomm specific interconnect nodes
+ * struct qcom_osm_l3_node - Qualcomm specific interconnect nodes
* @name: the node name used in debugfs
* @links: an array of nodes where we can go next while traversing
* @id: a unique node identifier
* @num_links: the total number of @links
* @buswidth: width of the interconnect between a node and the bus
*/
-struct qcom_icc_node {
+struct qcom_osm_l3_node {
const char *name;
u16 links[OSM_L3_MAX_LINKS];
u16 id;
@@ -64,8 +65,8 @@ struct qcom_icc_node {
u16 buswidth;
};
-struct qcom_icc_desc {
- const struct qcom_icc_node **nodes;
+struct qcom_osm_l3_desc {
+ const struct qcom_osm_l3_node **nodes;
size_t num_nodes;
unsigned int lut_row_size;
unsigned int reg_freq_lut;
@@ -73,7 +74,7 @@ struct qcom_icc_desc {
};
#define DEFINE_QNODE(_name, _id, _buswidth, ...) \
- static const struct qcom_icc_node _name = { \
+ static const struct qcom_osm_l3_node _name = { \
.name = #_name, \
.id = _id, \
.buswidth = _buswidth, \
@@ -84,12 +85,12 @@ struct qcom_icc_desc {
DEFINE_QNODE(sdm845_osm_apps_l3, SDM845_MASTER_OSM_L3_APPS, 16, SDM845_SLAVE_OSM_L3);
DEFINE_QNODE(sdm845_osm_l3, SDM845_SLAVE_OSM_L3, 16);
-static const struct qcom_icc_node *sdm845_osm_l3_nodes[] = {
+static const struct qcom_osm_l3_node *sdm845_osm_l3_nodes[] = {
[MASTER_OSM_L3_APPS] = &sdm845_osm_apps_l3,
[SLAVE_OSM_L3] = &sdm845_osm_l3,
};
-static const struct qcom_icc_desc sdm845_icc_osm_l3 = {
+static const struct qcom_osm_l3_desc sdm845_icc_osm_l3 = {
.nodes = sdm845_osm_l3_nodes,
.num_nodes = ARRAY_SIZE(sdm845_osm_l3_nodes),
.lut_row_size = OSM_LUT_ROW_SIZE,
@@ -100,12 +101,12 @@ static const struct qcom_icc_desc sdm845_icc_osm_l3 = {
DEFINE_QNODE(sc7180_osm_apps_l3, SC7180_MASTER_OSM_L3_APPS, 16, SC7180_SLAVE_OSM_L3);
DEFINE_QNODE(sc7180_osm_l3, SC7180_SLAVE_OSM_L3, 16);
-static const struct qcom_icc_node *sc7180_osm_l3_nodes[] = {
+static const struct qcom_osm_l3_node *sc7180_osm_l3_nodes[] = {
[MASTER_OSM_L3_APPS] = &sc7180_osm_apps_l3,
[SLAVE_OSM_L3] = &sc7180_osm_l3,
};
-static const struct qcom_icc_desc sc7180_icc_osm_l3 = {
+static const struct qcom_osm_l3_desc sc7180_icc_osm_l3 = {
.nodes = sc7180_osm_l3_nodes,
.num_nodes = ARRAY_SIZE(sc7180_osm_l3_nodes),
.lut_row_size = OSM_LUT_ROW_SIZE,
@@ -113,15 +114,31 @@ static const struct qcom_icc_desc sc7180_icc_osm_l3 = {
.reg_perf_state = OSM_REG_PERF_STATE,
};
+DEFINE_QNODE(sc8180x_osm_apps_l3, SC8180X_MASTER_OSM_L3_APPS, 32, SC8180X_SLAVE_OSM_L3);
+DEFINE_QNODE(sc8180x_osm_l3, SC8180X_SLAVE_OSM_L3, 32);
+
+static const struct qcom_osm_l3_node *sc8180x_osm_l3_nodes[] = {
+ [MASTER_OSM_L3_APPS] = &sc8180x_osm_apps_l3,
+ [SLAVE_OSM_L3] = &sc8180x_osm_l3,
+};
+
+static const struct qcom_osm_l3_desc sc8180x_icc_osm_l3 = {
+ .nodes = sc8180x_osm_l3_nodes,
+ .num_nodes = ARRAY_SIZE(sc8180x_osm_l3_nodes),
+ .lut_row_size = OSM_LUT_ROW_SIZE,
+ .reg_freq_lut = OSM_REG_FREQ_LUT,
+ .reg_perf_state = OSM_REG_PERF_STATE,
+};
+
DEFINE_QNODE(sm8150_osm_apps_l3, SM8150_MASTER_OSM_L3_APPS, 32, SM8150_SLAVE_OSM_L3);
DEFINE_QNODE(sm8150_osm_l3, SM8150_SLAVE_OSM_L3, 32);
-static const struct qcom_icc_node *sm8150_osm_l3_nodes[] = {
+static const struct qcom_osm_l3_node *sm8150_osm_l3_nodes[] = {
[MASTER_OSM_L3_APPS] = &sm8150_osm_apps_l3,
[SLAVE_OSM_L3] = &sm8150_osm_l3,
};
-static const struct qcom_icc_desc sm8150_icc_osm_l3 = {
+static const struct qcom_osm_l3_desc sm8150_icc_osm_l3 = {
.nodes = sm8150_osm_l3_nodes,
.num_nodes = ARRAY_SIZE(sm8150_osm_l3_nodes),
.lut_row_size = OSM_LUT_ROW_SIZE,
@@ -132,12 +149,12 @@ static const struct qcom_icc_desc sm8150_icc_osm_l3 = {
DEFINE_QNODE(sm8250_epss_apps_l3, SM8250_MASTER_EPSS_L3_APPS, 32, SM8250_SLAVE_EPSS_L3);
DEFINE_QNODE(sm8250_epss_l3, SM8250_SLAVE_EPSS_L3, 32);
-static const struct qcom_icc_node *sm8250_epss_l3_nodes[] = {
+static const struct qcom_osm_l3_node *sm8250_epss_l3_nodes[] = {
[MASTER_EPSS_L3_APPS] = &sm8250_epss_apps_l3,
[SLAVE_EPSS_L3_SHARED] = &sm8250_epss_l3,
};
-static const struct qcom_icc_desc sm8250_icc_epss_l3 = {
+static const struct qcom_osm_l3_desc sm8250_icc_epss_l3 = {
.nodes = sm8250_epss_l3_nodes,
.num_nodes = ARRAY_SIZE(sm8250_epss_l3_nodes),
.lut_row_size = EPSS_LUT_ROW_SIZE,
@@ -145,11 +162,11 @@ static const struct qcom_icc_desc sm8250_icc_epss_l3 = {
.reg_perf_state = EPSS_REG_PERF_STATE,
};
-static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
+static int qcom_osm_l3_set(struct icc_node *src, struct icc_node *dst)
{
struct qcom_osm_l3_icc_provider *qp;
struct icc_provider *provider;
- const struct qcom_icc_node *qn;
+ const struct qcom_osm_l3_node *qn;
struct icc_node *n;
unsigned int index;
u32 agg_peak = 0;
@@ -158,7 +175,7 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
qn = src->data;
provider = src->provider;
- qp = to_qcom_provider(provider);
+ qp = to_osm_l3_provider(provider);
list_for_each_entry(n, &provider->nodes, node_list)
provider->aggregate(n, 0, n->avg_bw, n->peak_bw,
@@ -191,10 +208,10 @@ static int qcom_osm_l3_probe(struct platform_device *pdev)
u32 info, src, lval, i, prev_freq = 0, freq;
static unsigned long hw_rate, xo_rate;
struct qcom_osm_l3_icc_provider *qp;
- const struct qcom_icc_desc *desc;
+ const struct qcom_osm_l3_desc *desc;
struct icc_onecell_data *data;
struct icc_provider *provider;
- const struct qcom_icc_node **qnodes;
+ const struct qcom_osm_l3_node **qnodes;
struct icc_node *node;
size_t num_nodes;
struct clk *clk;
@@ -264,7 +281,7 @@ static int qcom_osm_l3_probe(struct platform_device *pdev)
provider = &qp->provider;
provider->dev = &pdev->dev;
- provider->set = qcom_icc_set;
+ provider->set = qcom_osm_l3_set;
provider->aggregate = icc_std_aggregate;
provider->xlate = of_icc_xlate_onecell;
INIT_LIST_HEAD(&provider->nodes);
@@ -286,7 +303,7 @@ static int qcom_osm_l3_probe(struct platform_device *pdev)
}
node->name = qnodes[i]->name;
- /* Cast away const and add it back in qcom_icc_set() */
+ /* Cast away const and add it back in qcom_osm_l3_set() */
node->data = (void *)qnodes[i];
icc_node_add(node, provider);
@@ -311,6 +328,7 @@ static const struct of_device_id osm_l3_of_match[] = {
{ .compatible = "qcom,sc7180-osm-l3", .data = &sc7180_icc_osm_l3 },
{ .compatible = "qcom,sdm845-osm-l3", .data = &sdm845_icc_osm_l3 },
{ .compatible = "qcom,sm8150-osm-l3", .data = &sm8150_icc_osm_l3 },
+ { .compatible = "qcom,sc8180x-osm-l3", .data = &sc8180x_icc_osm_l3 },
{ .compatible = "qcom,sm8250-epss-l3", .data = &sm8250_icc_epss_l3 },
{ }
};
diff --git a/drivers/interconnect/qcom/sc7180.c b/drivers/interconnect/qcom/sc7180.c
index 8d9044ed18ab..12d59c36df53 100644
--- a/drivers/interconnect/qcom/sc7180.c
+++ b/drivers/interconnect/qcom/sc7180.c
@@ -504,98 +504,6 @@ static struct qcom_icc_desc sc7180_system_noc = {
.num_bcms = ARRAY_SIZE(system_noc_bcms),
};
-static int qnoc_probe(struct platform_device *pdev)
-{
- const struct qcom_icc_desc *desc;
- struct icc_onecell_data *data;
- struct icc_provider *provider;
- struct qcom_icc_node **qnodes;
- struct qcom_icc_provider *qp;
- struct icc_node *node;
- size_t num_nodes, i;
- int ret;
-
- desc = device_get_match_data(&pdev->dev);
- if (!desc)
- return -EINVAL;
-
- qnodes = desc->nodes;
- num_nodes = desc->num_nodes;
-
- qp = devm_kzalloc(&pdev->dev, sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return -ENOMEM;
-
- data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- provider = &qp->provider;
- provider->dev = &pdev->dev;
- provider->set = qcom_icc_set;
- provider->pre_aggregate = qcom_icc_pre_aggregate;
- provider->aggregate = qcom_icc_aggregate;
- provider->xlate_extended = qcom_icc_xlate_extended;
- INIT_LIST_HEAD(&provider->nodes);
- provider->data = data;
-
- qp->dev = &pdev->dev;
- qp->bcms = desc->bcms;
- qp->num_bcms = desc->num_bcms;
-
- qp->voter = of_bcm_voter_get(qp->dev, NULL);
- if (IS_ERR(qp->voter))
- return PTR_ERR(qp->voter);
-
- ret = icc_provider_add(provider);
- if (ret) {
- dev_err(&pdev->dev, "error adding interconnect provider\n");
- return ret;
- }
-
- for (i = 0; i < qp->num_bcms; i++)
- qcom_icc_bcm_init(qp->bcms[i], &pdev->dev);
-
- for (i = 0; i < num_nodes; i++) {
- size_t j;
-
- if (!qnodes[i])
- continue;
-
- node = icc_node_create(qnodes[i]->id);
- if (IS_ERR(node)) {
- ret = PTR_ERR(node);
- goto err;
- }
-
- node->name = qnodes[i]->name;
- node->data = qnodes[i];
- icc_node_add(node, provider);
-
- for (j = 0; j < qnodes[i]->num_links; j++)
- icc_link_create(node, qnodes[i]->links[j]);
-
- data->nodes[i] = node;
- }
- data->num_nodes = num_nodes;
-
- platform_set_drvdata(pdev, qp);
-
- return 0;
-err:
- icc_nodes_remove(provider);
- icc_provider_del(provider);
- return ret;
-}
-
-static int qnoc_remove(struct platform_device *pdev)
-{
- struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
-
- icc_nodes_remove(&qp->provider);
- return icc_provider_del(&qp->provider);
-}
-
static const struct of_device_id qnoc_of_match[] = {
{ .compatible = "qcom,sc7180-aggre1-noc",
.data = &sc7180_aggre1_noc},
@@ -628,8 +536,8 @@ static const struct of_device_id qnoc_of_match[] = {
MODULE_DEVICE_TABLE(of, qnoc_of_match);
static struct platform_driver qnoc_driver = {
- .probe = qnoc_probe,
- .remove = qnoc_remove,
+ .probe = qcom_icc_rpmh_probe,
+ .remove = qcom_icc_rpmh_remove,
.driver = {
.name = "qnoc-sc7180",
.of_match_table = qnoc_of_match,
diff --git a/drivers/interconnect/qcom/sc7280.c b/drivers/interconnect/qcom/sc7280.c
index 8d1b55c3705c..f8b34f6cbb0d 100644
--- a/drivers/interconnect/qcom/sc7280.c
+++ b/drivers/interconnect/qcom/sc7280.c
@@ -1802,98 +1802,6 @@ static struct qcom_icc_desc sc7280_system_noc = {
.num_bcms = ARRAY_SIZE(system_noc_bcms),
};
-static int qnoc_probe(struct platform_device *pdev)
-{
- const struct qcom_icc_desc *desc;
- struct icc_onecell_data *data;
- struct icc_provider *provider;
- struct qcom_icc_node **qnodes;
- struct qcom_icc_provider *qp;
- struct icc_node *node;
- size_t num_nodes, i;
- int ret;
-
- desc = device_get_match_data(&pdev->dev);
- if (!desc)
- return -EINVAL;
-
- qnodes = desc->nodes;
- num_nodes = desc->num_nodes;
-
- qp = devm_kzalloc(&pdev->dev, sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return -ENOMEM;
-
- data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- provider = &qp->provider;
- provider->dev = &pdev->dev;
- provider->set = qcom_icc_set;
- provider->pre_aggregate = qcom_icc_pre_aggregate;
- provider->aggregate = qcom_icc_aggregate;
- provider->xlate_extended = qcom_icc_xlate_extended;
- INIT_LIST_HEAD(&provider->nodes);
- provider->data = data;
-
- qp->dev = &pdev->dev;
- qp->bcms = desc->bcms;
- qp->num_bcms = desc->num_bcms;
-
- qp->voter = of_bcm_voter_get(qp->dev, NULL);
- if (IS_ERR(qp->voter))
- return PTR_ERR(qp->voter);
-
- ret = icc_provider_add(provider);
- if (ret) {
- dev_err(&pdev->dev, "error adding interconnect provider\n");
- return ret;
- }
-
- for (i = 0; i < qp->num_bcms; i++)
- qcom_icc_bcm_init(qp->bcms[i], &pdev->dev);
-
- for (i = 0; i < num_nodes; i++) {
- size_t j;
-
- if (!qnodes[i])
- continue;
-
- node = icc_node_create(qnodes[i]->id);
- if (IS_ERR(node)) {
- ret = PTR_ERR(node);
- goto err;
- }
-
- node->name = qnodes[i]->name;
- node->data = qnodes[i];
- icc_node_add(node, provider);
-
- for (j = 0; j < qnodes[i]->num_links; j++)
- icc_link_create(node, qnodes[i]->links[j]);
-
- data->nodes[i] = node;
- }
- data->num_nodes = num_nodes;
-
- platform_set_drvdata(pdev, qp);
-
- return 0;
-err:
- icc_nodes_remove(provider);
- icc_provider_del(provider);
- return ret;
-}
-
-static int qnoc_remove(struct platform_device *pdev)
-{
- struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
-
- icc_nodes_remove(&qp->provider);
- return icc_provider_del(&qp->provider);
-}
-
static const struct of_device_id qnoc_of_match[] = {
{ .compatible = "qcom,sc7280-aggre1-noc",
.data = &sc7280_aggre1_noc},
@@ -1924,8 +1832,8 @@ static const struct of_device_id qnoc_of_match[] = {
MODULE_DEVICE_TABLE(of, qnoc_of_match);
static struct platform_driver qnoc_driver = {
- .probe = qnoc_probe,
- .remove = qnoc_remove,
+ .probe = qcom_icc_rpmh_probe,
+ .remove = qcom_icc_rpmh_remove,
.driver = {
.name = "qnoc-sc7280",
.of_match_table = qnoc_of_match,
diff --git a/drivers/interconnect/qcom/sc8180x.c b/drivers/interconnect/qcom/sc8180x.c
new file mode 100644
index 000000000000..e9adf05b9330
--- /dev/null
+++ b/drivers/interconnect/qcom/sc8180x.c
@@ -0,0 +1,626 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021, Linaro Ltd.
+ */
+
+#include <linux/device.h>
+#include <linux/interconnect-provider.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+
+#include <dt-bindings/interconnect/qcom,sc8180x.h>
+
+#include "bcm-voter.h"
+#include "icc-rpmh.h"
+#include "sc8180x.h"
+
+DEFINE_QNODE(mas_qhm_a1noc_cfg, SC8180X_MASTER_A1NOC_CFG, 1, 4, SC8180X_SLAVE_SERVICE_A1NOC);
+DEFINE_QNODE(mas_xm_ufs_card, SC8180X_MASTER_UFS_CARD, 1, 8, SC8180X_A1NOC_SNOC_SLV);
+DEFINE_QNODE(mas_xm_ufs_g4, SC8180X_MASTER_UFS_GEN4, 1, 8, SC8180X_A1NOC_SNOC_SLV);
+DEFINE_QNODE(mas_xm_ufs_mem, SC8180X_MASTER_UFS_MEM, 1, 8, SC8180X_A1NOC_SNOC_SLV);
+DEFINE_QNODE(mas_xm_usb3_0, SC8180X_MASTER_USB3, 1, 8, SC8180X_A1NOC_SNOC_SLV);
+DEFINE_QNODE(mas_xm_usb3_1, SC8180X_MASTER_USB3_1, 1, 8, SC8180X_A1NOC_SNOC_SLV);
+DEFINE_QNODE(mas_xm_usb3_2, SC8180X_MASTER_USB3_2, 1, 16, SC8180X_A1NOC_SNOC_SLV);
+DEFINE_QNODE(mas_qhm_a2noc_cfg, SC8180X_MASTER_A2NOC_CFG, 1, 4, SC8180X_SLAVE_SERVICE_A2NOC);
+DEFINE_QNODE(mas_qhm_qdss_bam, SC8180X_MASTER_QDSS_BAM, 1, 4, SC8180X_A2NOC_SNOC_SLV);
+DEFINE_QNODE(mas_qhm_qspi, SC8180X_MASTER_QSPI_0, 1, 4, SC8180X_A2NOC_SNOC_SLV);
+DEFINE_QNODE(mas_qhm_qspi1, SC8180X_MASTER_QSPI_1, 1, 4, SC8180X_A2NOC_SNOC_SLV);
+DEFINE_QNODE(mas_qhm_qup0, SC8180X_MASTER_QUP_0, 1, 4, SC8180X_A2NOC_SNOC_SLV);
+DEFINE_QNODE(mas_qhm_qup1, SC8180X_MASTER_QUP_1, 1, 4, SC8180X_A2NOC_SNOC_SLV);
+DEFINE_QNODE(mas_qhm_qup2, SC8180X_MASTER_QUP_2, 1, 4, SC8180X_A2NOC_SNOC_SLV);
+DEFINE_QNODE(mas_qhm_sensorss_ahb, SC8180X_MASTER_SENSORS_AHB, 1, 4, SC8180X_A2NOC_SNOC_SLV);
+DEFINE_QNODE(mas_qxm_crypto, SC8180X_MASTER_CRYPTO_CORE_0, 1, 8, SC8180X_A2NOC_SNOC_SLV);
+DEFINE_QNODE(mas_qxm_ipa, SC8180X_MASTER_IPA, 1, 8, SC8180X_A2NOC_SNOC_SLV);
+DEFINE_QNODE(mas_xm_emac, SC8180X_MASTER_EMAC, 1, 8, SC8180X_A2NOC_SNOC_SLV);
+DEFINE_QNODE(mas_xm_pcie3_0, SC8180X_MASTER_PCIE, 1, 8, SC8180X_SLAVE_ANOC_PCIE_GEM_NOC);
+DEFINE_QNODE(mas_xm_pcie3_1, SC8180X_MASTER_PCIE_1, 1, 16, SC8180X_SLAVE_ANOC_PCIE_GEM_NOC);
+DEFINE_QNODE(mas_xm_pcie3_2, SC8180X_MASTER_PCIE_2, 1, 8, SC8180X_SLAVE_ANOC_PCIE_GEM_NOC);
+DEFINE_QNODE(mas_xm_pcie3_3, SC8180X_MASTER_PCIE_3, 1, 16, SC8180X_SLAVE_ANOC_PCIE_GEM_NOC);
+DEFINE_QNODE(mas_xm_qdss_etr, SC8180X_MASTER_QDSS_ETR, 1, 8, SC8180X_A2NOC_SNOC_SLV);
+DEFINE_QNODE(mas_xm_sdc2, SC8180X_MASTER_SDCC_2, 1, 8, SC8180X_A2NOC_SNOC_SLV);
+DEFINE_QNODE(mas_xm_sdc4, SC8180X_MASTER_SDCC_4, 1, 8, SC8180X_A2NOC_SNOC_SLV);
+DEFINE_QNODE(mas_qxm_camnoc_hf0_uncomp, SC8180X_MASTER_CAMNOC_HF0_UNCOMP, 1, 32, SC8180X_SLAVE_CAMNOC_UNCOMP);
+DEFINE_QNODE(mas_qxm_camnoc_hf1_uncomp, SC8180X_MASTER_CAMNOC_HF1_UNCOMP, 1, 32, SC8180X_SLAVE_CAMNOC_UNCOMP);
+DEFINE_QNODE(mas_qxm_camnoc_sf_uncomp, SC8180X_MASTER_CAMNOC_SF_UNCOMP, 1, 32, SC8180X_SLAVE_CAMNOC_UNCOMP);
+DEFINE_QNODE(mas_qnm_npu, SC8180X_MASTER_NPU, 1, 32, SC8180X_SLAVE_CDSP_MEM_NOC);
+DEFINE_QNODE(mas_qnm_snoc, SC8180X_SNOC_CNOC_MAS, 1, 8, SC8180X_SLAVE_TLMM_SOUTH, SC8180X_SLAVE_CDSP_CFG, SC8180X_SLAVE_SPSS_CFG, SC8180X_SLAVE_CAMERA_CFG, SC8180X_SLAVE_SDCC_4, SC8180X_SLAVE_AHB2PHY_CENTER, SC8180X_SLAVE_SDCC_2, SC8180X_SLAVE_PCIE_2_CFG, SC8180X_SLAVE_CNOC_MNOC_CFG, SC8180X_SLAVE_EMAC_CFG, SC8180X_SLAVE_QSPI_0, SC8180X_SLAVE_QSPI_1, SC8180X_SLAVE_TLMM_EAST, SC8180X_SLAVE_SNOC_CFG, SC8180X_SLAVE_AHB2PHY_EAST, SC8180X_SLAVE_GLM, SC8180X_SLAVE_PDM, SC8180X_SLAVE_PCIE_1_CFG, SC8180X_SLAVE_A2NOC_CFG, SC8180X_SLAVE_QDSS_CFG, SC8180X_SLAVE_DISPLAY_CFG, SC8180X_SLAVE_TCSR, SC8180X_SLAVE_UFS_MEM_0_CFG, SC8180X_SLAVE_CNOC_DDRSS, SC8180X_SLAVE_PCIE_0_CFG, SC8180X_SLAVE_QUP_1, SC8180X_SLAVE_QUP_2, SC8180X_SLAVE_NPU_CFG, SC8180X_SLAVE_CRYPTO_0_CFG, SC8180X_SLAVE_GRAPHICS_3D_CFG, SC8180X_SLAVE_VENUS_CFG, SC8180X_SLAVE_TSIF, SC8180X_SLAVE_IPA_CFG, SC8180X_SLAVE_CLK_CTL, SC8180X_SLAVE_SECURITY, SC8180X_SLAVE_AOP, SC8180X_SLAVE_AHB2PHY_WEST, SC8180X_SLAVE_AHB2PHY_SOUTH, SC8180X_SLAVE_SERVICE_CNOC, SC8180X_SLAVE_UFS_CARD_CFG, SC8180X_SLAVE_USB3_1, SC8180X_SLAVE_USB3_2, SC8180X_SLAVE_PCIE_3_CFG, SC8180X_SLAVE_RBCPR_CX_CFG, SC8180X_SLAVE_TLMM_WEST, SC8180X_SLAVE_A1NOC_CFG, SC8180X_SLAVE_AOSS, SC8180X_SLAVE_PRNG, SC8180X_SLAVE_VSENSE_CTRL_CFG, SC8180X_SLAVE_QUP_0, SC8180X_SLAVE_USB3, SC8180X_SLAVE_RBCPR_MMCX_CFG, SC8180X_SLAVE_PIMEM_CFG, SC8180X_SLAVE_UFS_MEM_1_CFG, SC8180X_SLAVE_RBCPR_MX_CFG, SC8180X_SLAVE_IMEM_CFG);
+DEFINE_QNODE(mas_qhm_cnoc_dc_noc, SC8180X_MASTER_CNOC_DC_NOC, 1, 4, SC8180X_SLAVE_LLCC_CFG, SC8180X_SLAVE_GEM_NOC_CFG);
+DEFINE_QNODE(mas_acm_apps, SC8180X_MASTER_AMPSS_M0, 4, 64, SC8180X_SLAVE_ECC, SC8180X_SLAVE_LLCC, SC8180X_SLAVE_GEM_NOC_SNOC);
+DEFINE_QNODE(mas_acm_gpu_tcu, SC8180X_MASTER_GPU_TCU, 1, 8, SC8180X_SLAVE_LLCC, SC8180X_SLAVE_GEM_NOC_SNOC);
+DEFINE_QNODE(mas_acm_sys_tcu, SC8180X_MASTER_SYS_TCU, 1, 8, SC8180X_SLAVE_LLCC, SC8180X_SLAVE_GEM_NOC_SNOC);
+DEFINE_QNODE(mas_qhm_gemnoc_cfg, SC8180X_MASTER_GEM_NOC_CFG, 1, 4, SC8180X_SLAVE_SERVICE_GEM_NOC_1, SC8180X_SLAVE_SERVICE_GEM_NOC, SC8180X_SLAVE_MSS_PROC_MS_MPU_CFG);
+DEFINE_QNODE(mas_qnm_cmpnoc, SC8180X_MASTER_COMPUTE_NOC, 2, 32, SC8180X_SLAVE_ECC, SC8180X_SLAVE_LLCC, SC8180X_SLAVE_GEM_NOC_SNOC);
+DEFINE_QNODE(mas_qnm_gpu, SC8180X_MASTER_GRAPHICS_3D, 4, 32, SC8180X_SLAVE_LLCC, SC8180X_SLAVE_GEM_NOC_SNOC);
+DEFINE_QNODE(mas_qnm_mnoc_hf, SC8180X_MASTER_MNOC_HF_MEM_NOC, 2, 32, SC8180X_SLAVE_LLCC);
+DEFINE_QNODE(mas_qnm_mnoc_sf, SC8180X_MASTER_MNOC_SF_MEM_NOC, 1, 32, SC8180X_SLAVE_LLCC, SC8180X_SLAVE_GEM_NOC_SNOC);
+DEFINE_QNODE(mas_qnm_pcie, SC8180X_MASTER_GEM_NOC_PCIE_SNOC, 1, 32, SC8180X_SLAVE_LLCC, SC8180X_SLAVE_GEM_NOC_SNOC);
+DEFINE_QNODE(mas_qnm_snoc_gc, SC8180X_MASTER_SNOC_GC_MEM_NOC, 1, 8, SC8180X_SLAVE_LLCC);
+DEFINE_QNODE(mas_qnm_snoc_sf, SC8180X_MASTER_SNOC_SF_MEM_NOC, 1, 32, SC8180X_SLAVE_LLCC);
+DEFINE_QNODE(mas_qxm_ecc, SC8180X_MASTER_ECC, 2, 32, SC8180X_SLAVE_LLCC);
+DEFINE_QNODE(mas_ipa_core_master, SC8180X_MASTER_IPA_CORE, 1, 8, SC8180X_SLAVE_IPA_CORE);
+DEFINE_QNODE(mas_llcc_mc, SC8180X_MASTER_LLCC, 8, 4, SC8180X_SLAVE_EBI_CH0);
+DEFINE_QNODE(mas_qhm_mnoc_cfg, SC8180X_MASTER_CNOC_MNOC_CFG, 1, 4, SC8180X_SLAVE_SERVICE_MNOC);
+DEFINE_QNODE(mas_qxm_camnoc_hf0, SC8180X_MASTER_CAMNOC_HF0, 1, 32, SC8180X_SLAVE_MNOC_HF_MEM_NOC);
+DEFINE_QNODE(mas_qxm_camnoc_hf1, SC8180X_MASTER_CAMNOC_HF1, 1, 32, SC8180X_SLAVE_MNOC_HF_MEM_NOC);
+DEFINE_QNODE(mas_qxm_camnoc_sf, SC8180X_MASTER_CAMNOC_SF, 1, 32, SC8180X_SLAVE_MNOC_SF_MEM_NOC);
+DEFINE_QNODE(mas_qxm_mdp0, SC8180X_MASTER_MDP_PORT0, 1, 32, SC8180X_SLAVE_MNOC_HF_MEM_NOC);
+DEFINE_QNODE(mas_qxm_mdp1, SC8180X_MASTER_MDP_PORT1, 1, 32, SC8180X_SLAVE_MNOC_HF_MEM_NOC);
+DEFINE_QNODE(mas_qxm_rot, SC8180X_MASTER_ROTATOR, 1, 32, SC8180X_SLAVE_MNOC_SF_MEM_NOC);
+DEFINE_QNODE(mas_qxm_venus0, SC8180X_MASTER_VIDEO_P0, 1, 32, SC8180X_SLAVE_MNOC_SF_MEM_NOC);
+DEFINE_QNODE(mas_qxm_venus1, SC8180X_MASTER_VIDEO_P1, 1, 32, SC8180X_SLAVE_MNOC_SF_MEM_NOC);
+DEFINE_QNODE(mas_qxm_venus_arm9, SC8180X_MASTER_VIDEO_PROC, 1, 8, SC8180X_SLAVE_MNOC_SF_MEM_NOC);
+DEFINE_QNODE(mas_qhm_snoc_cfg, SC8180X_MASTER_SNOC_CFG, 1, 4, SC8180X_SLAVE_SERVICE_SNOC);
+DEFINE_QNODE(mas_qnm_aggre1_noc, SC8180X_A1NOC_SNOC_MAS, 1, 32, SC8180X_SLAVE_SNOC_GEM_NOC_SF, SC8180X_SLAVE_PIMEM, SC8180X_SLAVE_OCIMEM, SC8180X_SLAVE_APPSS, SC8180X_SNOC_CNOC_SLV, SC8180X_SLAVE_QDSS_STM);
+DEFINE_QNODE(mas_qnm_aggre2_noc, SC8180X_A2NOC_SNOC_MAS, 1, 16, SC8180X_SLAVE_SNOC_GEM_NOC_SF, SC8180X_SLAVE_PIMEM, SC8180X_SLAVE_PCIE_3, SC8180X_SLAVE_OCIMEM, SC8180X_SLAVE_APPSS, SC8180X_SLAVE_PCIE_2, SC8180X_SNOC_CNOC_SLV, SC8180X_SLAVE_PCIE_0, SC8180X_SLAVE_PCIE_1, SC8180X_SLAVE_TCU, SC8180X_SLAVE_QDSS_STM);
+DEFINE_QNODE(mas_qnm_gemnoc, SC8180X_MASTER_GEM_NOC_SNOC, 1, 8, SC8180X_SLAVE_PIMEM, SC8180X_SLAVE_OCIMEM, SC8180X_SLAVE_APPSS, SC8180X_SNOC_CNOC_SLV, SC8180X_SLAVE_TCU, SC8180X_SLAVE_QDSS_STM);
+DEFINE_QNODE(mas_qxm_pimem, SC8180X_MASTER_PIMEM, 1, 8, SC8180X_SLAVE_SNOC_GEM_NOC_GC, SC8180X_SLAVE_OCIMEM);
+DEFINE_QNODE(mas_xm_gic, SC8180X_MASTER_GIC, 1, 8, SC8180X_SLAVE_SNOC_GEM_NOC_GC, SC8180X_SLAVE_OCIMEM);
+DEFINE_QNODE(slv_qns_a1noc_snoc, SC8180X_A1NOC_SNOC_SLV, 1, 32, SC8180X_A1NOC_SNOC_MAS);
+DEFINE_QNODE(slv_srvc_aggre1_noc, SC8180X_SLAVE_SERVICE_A1NOC, 1, 4);
+DEFINE_QNODE(slv_qns_a2noc_snoc, SC8180X_A2NOC_SNOC_SLV, 1, 16, SC8180X_A2NOC_SNOC_MAS);
+DEFINE_QNODE(slv_qns_pcie_mem_noc, SC8180X_SLAVE_ANOC_PCIE_GEM_NOC, 1, 32, SC8180X_MASTER_GEM_NOC_PCIE_SNOC);
+DEFINE_QNODE(slv_srvc_aggre2_noc, SC8180X_SLAVE_SERVICE_A2NOC, 1, 4);
+DEFINE_QNODE(slv_qns_camnoc_uncomp, SC8180X_SLAVE_CAMNOC_UNCOMP, 1, 32);
+DEFINE_QNODE(slv_qns_cdsp_mem_noc, SC8180X_SLAVE_CDSP_MEM_NOC, 2, 32, SC8180X_MASTER_COMPUTE_NOC);
+DEFINE_QNODE(slv_qhs_a1_noc_cfg, SC8180X_SLAVE_A1NOC_CFG, 1, 4, SC8180X_MASTER_A1NOC_CFG);
+DEFINE_QNODE(slv_qhs_a2_noc_cfg, SC8180X_SLAVE_A2NOC_CFG, 1, 4, SC8180X_MASTER_A2NOC_CFG);
+DEFINE_QNODE(slv_qhs_ahb2phy_refgen_center, SC8180X_SLAVE_AHB2PHY_CENTER, 1, 4);
+DEFINE_QNODE(slv_qhs_ahb2phy_refgen_east, SC8180X_SLAVE_AHB2PHY_EAST, 1, 4);
+DEFINE_QNODE(slv_qhs_ahb2phy_refgen_west, SC8180X_SLAVE_AHB2PHY_WEST, 1, 4);
+DEFINE_QNODE(slv_qhs_ahb2phy_south, SC8180X_SLAVE_AHB2PHY_SOUTH, 1, 4);
+DEFINE_QNODE(slv_qhs_aop, SC8180X_SLAVE_AOP, 1, 4);
+DEFINE_QNODE(slv_qhs_aoss, SC8180X_SLAVE_AOSS, 1, 4);
+DEFINE_QNODE(slv_qhs_camera_cfg, SC8180X_SLAVE_CAMERA_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_clk_ctl, SC8180X_SLAVE_CLK_CTL, 1, 4);
+DEFINE_QNODE(slv_qhs_compute_dsp, SC8180X_SLAVE_CDSP_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_cpr_cx, SC8180X_SLAVE_RBCPR_CX_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_cpr_mmcx, SC8180X_SLAVE_RBCPR_MMCX_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_cpr_mx, SC8180X_SLAVE_RBCPR_MX_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_crypto0_cfg, SC8180X_SLAVE_CRYPTO_0_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_ddrss_cfg, SC8180X_SLAVE_CNOC_DDRSS, 1, 4, SC8180X_MASTER_CNOC_DC_NOC);
+DEFINE_QNODE(slv_qhs_display_cfg, SC8180X_SLAVE_DISPLAY_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_emac_cfg, SC8180X_SLAVE_EMAC_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_glm, SC8180X_SLAVE_GLM, 1, 4);
+DEFINE_QNODE(slv_qhs_gpuss_cfg, SC8180X_SLAVE_GRAPHICS_3D_CFG, 1, 8);
+DEFINE_QNODE(slv_qhs_imem_cfg, SC8180X_SLAVE_IMEM_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_ipa, SC8180X_SLAVE_IPA_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_mnoc_cfg, SC8180X_SLAVE_CNOC_MNOC_CFG, 1, 4, SC8180X_MASTER_CNOC_MNOC_CFG);
+DEFINE_QNODE(slv_qhs_npu_cfg, SC8180X_SLAVE_NPU_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_pcie0_cfg, SC8180X_SLAVE_PCIE_0_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_pcie1_cfg, SC8180X_SLAVE_PCIE_1_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_pcie2_cfg, SC8180X_SLAVE_PCIE_2_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_pcie3_cfg, SC8180X_SLAVE_PCIE_3_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_pdm, SC8180X_SLAVE_PDM, 1, 4);
+DEFINE_QNODE(slv_qhs_pimem_cfg, SC8180X_SLAVE_PIMEM_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_prng, SC8180X_SLAVE_PRNG, 1, 4);
+DEFINE_QNODE(slv_qhs_qdss_cfg, SC8180X_SLAVE_QDSS_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_qspi_0, SC8180X_SLAVE_QSPI_0, 1, 4);
+DEFINE_QNODE(slv_qhs_qspi_1, SC8180X_SLAVE_QSPI_1, 1, 4);
+DEFINE_QNODE(slv_qhs_qupv3_east0, SC8180X_SLAVE_QUP_1, 1, 4);
+DEFINE_QNODE(slv_qhs_qupv3_east1, SC8180X_SLAVE_QUP_2, 1, 4);
+DEFINE_QNODE(slv_qhs_qupv3_west, SC8180X_SLAVE_QUP_0, 1, 4);
+DEFINE_QNODE(slv_qhs_sdc2, SC8180X_SLAVE_SDCC_2, 1, 4);
+DEFINE_QNODE(slv_qhs_sdc4, SC8180X_SLAVE_SDCC_4, 1, 4);
+DEFINE_QNODE(slv_qhs_security, SC8180X_SLAVE_SECURITY, 1, 4);
+DEFINE_QNODE(slv_qhs_snoc_cfg, SC8180X_SLAVE_SNOC_CFG, 1, 4, SC8180X_MASTER_SNOC_CFG);
+DEFINE_QNODE(slv_qhs_spss_cfg, SC8180X_SLAVE_SPSS_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_tcsr, SC8180X_SLAVE_TCSR, 1, 4);
+DEFINE_QNODE(slv_qhs_tlmm_east, SC8180X_SLAVE_TLMM_EAST, 1, 4);
+DEFINE_QNODE(slv_qhs_tlmm_south, SC8180X_SLAVE_TLMM_SOUTH, 1, 4);
+DEFINE_QNODE(slv_qhs_tlmm_west, SC8180X_SLAVE_TLMM_WEST, 1, 4);
+DEFINE_QNODE(slv_qhs_tsif, SC8180X_SLAVE_TSIF, 1, 4);
+DEFINE_QNODE(slv_qhs_ufs_card_cfg, SC8180X_SLAVE_UFS_CARD_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_ufs_mem0_cfg, SC8180X_SLAVE_UFS_MEM_0_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_ufs_mem1_cfg, SC8180X_SLAVE_UFS_MEM_1_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_usb3_0, SC8180X_SLAVE_USB3, 1, 4);
+DEFINE_QNODE(slv_qhs_usb3_1, SC8180X_SLAVE_USB3_1, 1, 4);
+DEFINE_QNODE(slv_qhs_usb3_2, SC8180X_SLAVE_USB3_2, 1, 4);
+DEFINE_QNODE(slv_qhs_venus_cfg, SC8180X_SLAVE_VENUS_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_vsense_ctrl_cfg, SC8180X_SLAVE_VSENSE_CTRL_CFG, 1, 4);
+DEFINE_QNODE(slv_srvc_cnoc, SC8180X_SLAVE_SERVICE_CNOC, 1, 4);
+DEFINE_QNODE(slv_qhs_gemnoc, SC8180X_SLAVE_GEM_NOC_CFG, 1, 4, SC8180X_MASTER_GEM_NOC_CFG);
+DEFINE_QNODE(slv_qhs_llcc, SC8180X_SLAVE_LLCC_CFG, 1, 4);
+DEFINE_QNODE(slv_qhs_mdsp_ms_mpu_cfg, SC8180X_SLAVE_MSS_PROC_MS_MPU_CFG, 1, 4);
+DEFINE_QNODE(slv_qns_ecc, SC8180X_SLAVE_ECC, 1, 32);
+DEFINE_QNODE(slv_qns_gem_noc_snoc, SC8180X_SLAVE_GEM_NOC_SNOC, 1, 8, SC8180X_MASTER_GEM_NOC_SNOC);
+DEFINE_QNODE(slv_qns_llcc, SC8180X_SLAVE_LLCC, 8, 16, SC8180X_MASTER_LLCC);
+DEFINE_QNODE(slv_srvc_gemnoc, SC8180X_SLAVE_SERVICE_GEM_NOC, 1, 4);
+DEFINE_QNODE(slv_srvc_gemnoc1, SC8180X_SLAVE_SERVICE_GEM_NOC_1, 1, 4);
+DEFINE_QNODE(slv_ipa_core_slave, SC8180X_SLAVE_IPA_CORE, 1, 8);
+DEFINE_QNODE(slv_ebi, SC8180X_SLAVE_EBI_CH0, 8, 4);
+DEFINE_QNODE(slv_qns2_mem_noc, SC8180X_SLAVE_MNOC_SF_MEM_NOC, 1, 32, SC8180X_MASTER_MNOC_SF_MEM_NOC);
+DEFINE_QNODE(slv_qns_mem_noc_hf, SC8180X_SLAVE_MNOC_HF_MEM_NOC, 2, 32, SC8180X_MASTER_MNOC_HF_MEM_NOC);
+DEFINE_QNODE(slv_srvc_mnoc, SC8180X_SLAVE_SERVICE_MNOC, 1, 4);
+DEFINE_QNODE(slv_qhs_apss, SC8180X_SLAVE_APPSS, 1, 8);
+DEFINE_QNODE(slv_qns_cnoc, SC8180X_SNOC_CNOC_SLV, 1, 8, SC8180X_SNOC_CNOC_MAS);
+DEFINE_QNODE(slv_qns_gemnoc_gc, SC8180X_SLAVE_SNOC_GEM_NOC_GC, 1, 8, SC8180X_MASTER_SNOC_GC_MEM_NOC);
+DEFINE_QNODE(slv_qns_gemnoc_sf, SC8180X_SLAVE_SNOC_GEM_NOC_SF, 1, 32, SC8180X_MASTER_SNOC_SF_MEM_NOC);
+DEFINE_QNODE(slv_qxs_imem, SC8180X_SLAVE_OCIMEM, 1, 8);
+DEFINE_QNODE(slv_qxs_pimem, SC8180X_SLAVE_PIMEM, 1, 8);
+DEFINE_QNODE(slv_srvc_snoc, SC8180X_SLAVE_SERVICE_SNOC, 1, 4);
+DEFINE_QNODE(slv_xs_pcie_0, SC8180X_SLAVE_PCIE_0, 1, 8);
+DEFINE_QNODE(slv_xs_pcie_1, SC8180X_SLAVE_PCIE_1, 1, 8);
+DEFINE_QNODE(slv_xs_pcie_2, SC8180X_SLAVE_PCIE_2, 1, 8);
+DEFINE_QNODE(slv_xs_pcie_3, SC8180X_SLAVE_PCIE_3, 1, 8);
+DEFINE_QNODE(slv_xs_qdss_stm, SC8180X_SLAVE_QDSS_STM, 1, 4);
+DEFINE_QNODE(slv_xs_sys_tcu_cfg, SC8180X_SLAVE_TCU, 1, 8);
+
+DEFINE_QBCM(bcm_acv, "ACV", false, &slv_ebi);
+DEFINE_QBCM(bcm_mc0, "MC0", false, &slv_ebi);
+DEFINE_QBCM(bcm_sh0, "SH0", false, &slv_qns_llcc);
+DEFINE_QBCM(bcm_mm0, "MM0", false, &slv_qns_mem_noc_hf);
+DEFINE_QBCM(bcm_co0, "CO0", false, &slv_qns_cdsp_mem_noc);
+DEFINE_QBCM(bcm_ce0, "CE0", false, &mas_qxm_crypto);
+DEFINE_QBCM(bcm_cn0, "CN0", false, &mas_qnm_snoc, &slv_qhs_a1_noc_cfg, &slv_qhs_a2_noc_cfg, &slv_qhs_ahb2phy_refgen_center, &slv_qhs_ahb2phy_refgen_east, &slv_qhs_ahb2phy_refgen_west, &slv_qhs_ahb2phy_south, &slv_qhs_aop, &slv_qhs_aoss, &slv_qhs_camera_cfg, &slv_qhs_clk_ctl, &slv_qhs_compute_dsp, &slv_qhs_cpr_cx, &slv_qhs_cpr_mmcx, &slv_qhs_cpr_mx, &slv_qhs_crypto0_cfg, &slv_qhs_ddrss_cfg, &slv_qhs_display_cfg, &slv_qhs_emac_cfg, &slv_qhs_glm, &slv_qhs_gpuss_cfg, &slv_qhs_imem_cfg, &slv_qhs_ipa, &slv_qhs_mnoc_cfg, &slv_qhs_npu_cfg, &slv_qhs_pcie0_cfg, &slv_qhs_pcie1_cfg, &slv_qhs_pcie2_cfg, &slv_qhs_pcie3_cfg, &slv_qhs_pdm, &slv_qhs_pimem_cfg, &slv_qhs_prng, &slv_qhs_qdss_cfg, &slv_qhs_qspi_0, &slv_qhs_qspi_1, &slv_qhs_qupv3_east0, &slv_qhs_qupv3_east1, &slv_qhs_qupv3_west, &slv_qhs_sdc2, &slv_qhs_sdc4, &slv_qhs_security, &slv_qhs_snoc_cfg, &slv_qhs_spss_cfg, &slv_qhs_tcsr, &slv_qhs_tlmm_east, &slv_qhs_tlmm_south, &slv_qhs_tlmm_west, &slv_qhs_tsif, &slv_qhs_ufs_card_cfg, &slv_qhs_ufs_mem0_cfg, &slv_qhs_ufs_mem1_cfg, &slv_qhs_usb3_0, &slv_qhs_usb3_1, &slv_qhs_usb3_2, &slv_qhs_venus_cfg, &slv_qhs_vsense_ctrl_cfg, &slv_srvc_cnoc);
+DEFINE_QBCM(bcm_mm1, "MM1", false, &mas_qxm_camnoc_hf0_uncomp, &mas_qxm_camnoc_hf1_uncomp, &mas_qxm_camnoc_sf_uncomp, &mas_qxm_camnoc_hf0, &mas_qxm_camnoc_hf1, &mas_qxm_mdp0, &mas_qxm_mdp1);
+DEFINE_QBCM(bcm_qup0, "QUP0", false, &mas_qhm_qup0, &mas_qhm_qup1, &mas_qhm_qup2);
+DEFINE_QBCM(bcm_sh2, "SH2", false, &slv_qns_gem_noc_snoc);
+DEFINE_QBCM(bcm_mm2, "MM2", false, &mas_qxm_camnoc_sf, &mas_qxm_rot, &mas_qxm_venus0, &mas_qxm_venus1, &mas_qxm_venus_arm9, &slv_qns2_mem_noc);
+DEFINE_QBCM(bcm_sh3, "SH3", false, &mas_acm_apps);
+DEFINE_QBCM(bcm_sn0, "SN0", false, &slv_qns_gemnoc_sf);
+DEFINE_QBCM(bcm_sn1, "SN1", false, &slv_qxs_imem);
+DEFINE_QBCM(bcm_sn2, "SN2", false, &slv_qns_gemnoc_gc);
+DEFINE_QBCM(bcm_co2, "CO2", false, &mas_qnm_npu);
+DEFINE_QBCM(bcm_ip0, "IP0", false, &slv_ipa_core_slave);
+DEFINE_QBCM(bcm_sn3, "SN3", false, &slv_srvc_aggre1_noc, &slv_qns_cnoc);
+DEFINE_QBCM(bcm_sn4, "SN4", false, &slv_qxs_pimem);
+DEFINE_QBCM(bcm_sn8, "SN8", false, &slv_xs_pcie_0, &slv_xs_pcie_1, &slv_xs_pcie_2, &slv_xs_pcie_3);
+DEFINE_QBCM(bcm_sn9, "SN9", false, &mas_qnm_aggre1_noc);
+DEFINE_QBCM(bcm_sn11, "SN11", false, &mas_qnm_aggre2_noc);
+DEFINE_QBCM(bcm_sn14, "SN14", false, &slv_qns_pcie_mem_noc);
+DEFINE_QBCM(bcm_sn15, "SN15", false, &mas_qnm_gemnoc);
+
+static struct qcom_icc_bcm *aggre1_noc_bcms[] = {
+ &bcm_sn3,
+ &bcm_ce0,
+ &bcm_qup0,
+};
+
+static struct qcom_icc_bcm *aggre2_noc_bcms[] = {
+ &bcm_sn14,
+ &bcm_ce0,
+ &bcm_qup0,
+};
+
+static struct qcom_icc_bcm *camnoc_virt_bcms[] = {
+ &bcm_mm1,
+};
+
+static struct qcom_icc_bcm *compute_noc_bcms[] = {
+ &bcm_co0,
+ &bcm_co2,
+};
+
+static struct qcom_icc_bcm *config_noc_bcms[] = {
+ &bcm_cn0,
+};
+
+static struct qcom_icc_bcm *gem_noc_bcms[] = {
+ &bcm_sh0,
+ &bcm_sh2,
+ &bcm_sh3,
+};
+
+static struct qcom_icc_bcm *ipa_virt_bcms[] = {
+ &bcm_ip0,
+};
+
+static struct qcom_icc_bcm *mc_virt_bcms[] = {
+ &bcm_mc0,
+ &bcm_acv,
+};
+
+static struct qcom_icc_bcm *mmss_noc_bcms[] = {
+ &bcm_mm0,
+ &bcm_mm1,
+ &bcm_mm2,
+};
+
+static struct qcom_icc_bcm *system_noc_bcms[] = {
+ &bcm_sn0,
+ &bcm_sn1,
+ &bcm_sn2,
+ &bcm_sn3,
+ &bcm_sn4,
+ &bcm_sn8,
+ &bcm_sn9,
+ &bcm_sn11,
+ &bcm_sn15,
+};
+
+static struct qcom_icc_node *aggre1_noc_nodes[] = {
+ [MASTER_A1NOC_CFG] = &mas_qhm_a1noc_cfg,
+ [MASTER_UFS_CARD] = &mas_xm_ufs_card,
+ [MASTER_UFS_GEN4] = &mas_xm_ufs_g4,
+ [MASTER_UFS_MEM] = &mas_xm_ufs_mem,
+ [MASTER_USB3] = &mas_xm_usb3_0,
+ [MASTER_USB3_1] = &mas_xm_usb3_1,
+ [MASTER_USB3_2] = &mas_xm_usb3_2,
+ [A1NOC_SNOC_SLV] = &slv_qns_a1noc_snoc,
+ [SLAVE_SERVICE_A1NOC] = &slv_srvc_aggre1_noc,
+};
+
+static struct qcom_icc_node *aggre2_noc_nodes[] = {
+ [MASTER_A2NOC_CFG] = &mas_qhm_a2noc_cfg,
+ [MASTER_QDSS_BAM] = &mas_qhm_qdss_bam,
+ [MASTER_QSPI_0] = &mas_qhm_qspi,
+ [MASTER_QSPI_1] = &mas_qhm_qspi1,
+ [MASTER_QUP_0] = &mas_qhm_qup0,
+ [MASTER_QUP_1] = &mas_qhm_qup1,
+ [MASTER_QUP_2] = &mas_qhm_qup2,
+ [MASTER_SENSORS_AHB] = &mas_qhm_sensorss_ahb,
+ [MASTER_CRYPTO_CORE_0] = &mas_qxm_crypto,
+ [MASTER_IPA] = &mas_qxm_ipa,
+ [MASTER_EMAC] = &mas_xm_emac,
+ [MASTER_PCIE] = &mas_xm_pcie3_0,
+ [MASTER_PCIE_1] = &mas_xm_pcie3_1,
+ [MASTER_PCIE_2] = &mas_xm_pcie3_2,
+ [MASTER_PCIE_3] = &mas_xm_pcie3_3,
+ [MASTER_QDSS_ETR] = &mas_xm_qdss_etr,
+ [MASTER_SDCC_2] = &mas_xm_sdc2,
+ [MASTER_SDCC_4] = &mas_xm_sdc4,
+ [A2NOC_SNOC_SLV] = &slv_qns_a2noc_snoc,
+ [SLAVE_ANOC_PCIE_GEM_NOC] = &slv_qns_pcie_mem_noc,
+ [SLAVE_SERVICE_A2NOC] = &slv_srvc_aggre2_noc,
+};
+
+static struct qcom_icc_node *camnoc_virt_nodes[] = {
+ [MASTER_CAMNOC_HF0_UNCOMP] = &mas_qxm_camnoc_hf0_uncomp,
+ [MASTER_CAMNOC_HF1_UNCOMP] = &mas_qxm_camnoc_hf1_uncomp,
+ [MASTER_CAMNOC_SF_UNCOMP] = &mas_qxm_camnoc_sf_uncomp,
+ [SLAVE_CAMNOC_UNCOMP] = &slv_qns_camnoc_uncomp,
+};
+
+static struct qcom_icc_node *compute_noc_nodes[] = {
+ [MASTER_NPU] = &mas_qnm_npu,
+ [SLAVE_CDSP_MEM_NOC] = &slv_qns_cdsp_mem_noc,
+};
+
+static struct qcom_icc_node *config_noc_nodes[] = {
+ [SNOC_CNOC_MAS] = &mas_qnm_snoc,
+ [SLAVE_A1NOC_CFG] = &slv_qhs_a1_noc_cfg,
+ [SLAVE_A2NOC_CFG] = &slv_qhs_a2_noc_cfg,
+ [SLAVE_AHB2PHY_CENTER] = &slv_qhs_ahb2phy_refgen_center,
+ [SLAVE_AHB2PHY_EAST] = &slv_qhs_ahb2phy_refgen_east,
+ [SLAVE_AHB2PHY_WEST] = &slv_qhs_ahb2phy_refgen_west,
+ [SLAVE_AHB2PHY_SOUTH] = &slv_qhs_ahb2phy_south,
+ [SLAVE_AOP] = &slv_qhs_aop,
+ [SLAVE_AOSS] = &slv_qhs_aoss,
+ [SLAVE_CAMERA_CFG] = &slv_qhs_camera_cfg,
+ [SLAVE_CLK_CTL] = &slv_qhs_clk_ctl,
+ [SLAVE_CDSP_CFG] = &slv_qhs_compute_dsp,
+ [SLAVE_RBCPR_CX_CFG] = &slv_qhs_cpr_cx,
+ [SLAVE_RBCPR_MMCX_CFG] = &slv_qhs_cpr_mmcx,
+ [SLAVE_RBCPR_MX_CFG] = &slv_qhs_cpr_mx,
+ [SLAVE_CRYPTO_0_CFG] = &slv_qhs_crypto0_cfg,
+ [SLAVE_CNOC_DDRSS] = &slv_qhs_ddrss_cfg,
+ [SLAVE_DISPLAY_CFG] = &slv_qhs_display_cfg,
+ [SLAVE_EMAC_CFG] = &slv_qhs_emac_cfg,
+ [SLAVE_GLM] = &slv_qhs_glm,
+ [SLAVE_GRAPHICS_3D_CFG] = &slv_qhs_gpuss_cfg,
+ [SLAVE_IMEM_CFG] = &slv_qhs_imem_cfg,
+ [SLAVE_IPA_CFG] = &slv_qhs_ipa,
+ [SLAVE_CNOC_MNOC_CFG] = &slv_qhs_mnoc_cfg,
+ [SLAVE_NPU_CFG] = &slv_qhs_npu_cfg,
+ [SLAVE_PCIE_0_CFG] = &slv_qhs_pcie0_cfg,
+ [SLAVE_PCIE_1_CFG] = &slv_qhs_pcie1_cfg,
+ [SLAVE_PCIE_2_CFG] = &slv_qhs_pcie2_cfg,
+ [SLAVE_PCIE_3_CFG] = &slv_qhs_pcie3_cfg,
+ [SLAVE_PDM] = &slv_qhs_pdm,
+ [SLAVE_PIMEM_CFG] = &slv_qhs_pimem_cfg,
+ [SLAVE_PRNG] = &slv_qhs_prng,
+ [SLAVE_QDSS_CFG] = &slv_qhs_qdss_cfg,
+ [SLAVE_QSPI_0] = &slv_qhs_qspi_0,
+ [SLAVE_QSPI_1] = &slv_qhs_qspi_1,
+ [SLAVE_QUP_1] = &slv_qhs_qupv3_east0,
+ [SLAVE_QUP_2] = &slv_qhs_qupv3_east1,
+ [SLAVE_QUP_0] = &slv_qhs_qupv3_west,
+ [SLAVE_SDCC_2] = &slv_qhs_sdc2,
+ [SLAVE_SDCC_4] = &slv_qhs_sdc4,
+ [SLAVE_SECURITY] = &slv_qhs_security,
+ [SLAVE_SNOC_CFG] = &slv_qhs_snoc_cfg,
+ [SLAVE_SPSS_CFG] = &slv_qhs_spss_cfg,
+ [SLAVE_TCSR] = &slv_qhs_tcsr,
+ [SLAVE_TLMM_EAST] = &slv_qhs_tlmm_east,
+ [SLAVE_TLMM_SOUTH] = &slv_qhs_tlmm_south,
+ [SLAVE_TLMM_WEST] = &slv_qhs_tlmm_west,
+ [SLAVE_TSIF] = &slv_qhs_tsif,
+ [SLAVE_UFS_CARD_CFG] = &slv_qhs_ufs_card_cfg,
+ [SLAVE_UFS_MEM_0_CFG] = &slv_qhs_ufs_mem0_cfg,
+ [SLAVE_UFS_MEM_1_CFG] = &slv_qhs_ufs_mem1_cfg,
+ [SLAVE_USB3] = &slv_qhs_usb3_0,
+ [SLAVE_USB3_1] = &slv_qhs_usb3_1,
+ [SLAVE_USB3_2] = &slv_qhs_usb3_2,
+ [SLAVE_VENUS_CFG] = &slv_qhs_venus_cfg,
+ [SLAVE_VSENSE_CTRL_CFG] = &slv_qhs_vsense_ctrl_cfg,
+ [SLAVE_SERVICE_CNOC] = &slv_srvc_cnoc,
+};
+
+static struct qcom_icc_node *dc_noc_nodes[] = {
+ [MASTER_CNOC_DC_NOC] = &mas_qhm_cnoc_dc_noc,
+ [SLAVE_GEM_NOC_CFG] = &slv_qhs_gemnoc,
+ [SLAVE_LLCC_CFG] = &slv_qhs_llcc,
+};
+
+static struct qcom_icc_node *gem_noc_nodes[] = {
+ [MASTER_AMPSS_M0] = &mas_acm_apps,
+ [MASTER_GPU_TCU] = &mas_acm_gpu_tcu,
+ [MASTER_SYS_TCU] = &mas_acm_sys_tcu,
+ [MASTER_GEM_NOC_CFG] = &mas_qhm_gemnoc_cfg,
+ [MASTER_COMPUTE_NOC] = &mas_qnm_cmpnoc,
+ [MASTER_GRAPHICS_3D] = &mas_qnm_gpu,
+ [MASTER_MNOC_HF_MEM_NOC] = &mas_qnm_mnoc_hf,
+ [MASTER_MNOC_SF_MEM_NOC] = &mas_qnm_mnoc_sf,
+ [MASTER_GEM_NOC_PCIE_SNOC] = &mas_qnm_pcie,
+ [MASTER_SNOC_GC_MEM_NOC] = &mas_qnm_snoc_gc,
+ [MASTER_SNOC_SF_MEM_NOC] = &mas_qnm_snoc_sf,
+ [MASTER_ECC] = &mas_qxm_ecc,
+ [SLAVE_MSS_PROC_MS_MPU_CFG] = &slv_qhs_mdsp_ms_mpu_cfg,
+ [SLAVE_ECC] = &slv_qns_ecc,
+ [SLAVE_GEM_NOC_SNOC] = &slv_qns_gem_noc_snoc,
+ [SLAVE_LLCC] = &slv_qns_llcc,
+ [SLAVE_SERVICE_GEM_NOC] = &slv_srvc_gemnoc,
+ [SLAVE_SERVICE_GEM_NOC_1] = &slv_srvc_gemnoc1,
+};
+
+static struct qcom_icc_node *ipa_virt_nodes[] = {
+ [MASTER_IPA_CORE] = &mas_ipa_core_master,
+ [SLAVE_IPA_CORE] = &slv_ipa_core_slave,
+};
+
+static struct qcom_icc_node *mc_virt_nodes[] = {
+ [MASTER_LLCC] = &mas_llcc_mc,
+ [SLAVE_EBI_CH0] = &slv_ebi,
+};
+
+static struct qcom_icc_node *mmss_noc_nodes[] = {
+ [MASTER_CNOC_MNOC_CFG] = &mas_qhm_mnoc_cfg,
+ [MASTER_CAMNOC_HF0] = &mas_qxm_camnoc_hf0,
+ [MASTER_CAMNOC_HF1] = &mas_qxm_camnoc_hf1,
+ [MASTER_CAMNOC_SF] = &mas_qxm_camnoc_sf,
+ [MASTER_MDP_PORT0] = &mas_qxm_mdp0,
+ [MASTER_MDP_PORT1] = &mas_qxm_mdp1,
+ [MASTER_ROTATOR] = &mas_qxm_rot,
+ [MASTER_VIDEO_P0] = &mas_qxm_venus0,
+ [MASTER_VIDEO_P1] = &mas_qxm_venus1,
+ [MASTER_VIDEO_PROC] = &mas_qxm_venus_arm9,
+ [SLAVE_MNOC_SF_MEM_NOC] = &slv_qns2_mem_noc,
+ [SLAVE_MNOC_HF_MEM_NOC] = &slv_qns_mem_noc_hf,
+ [SLAVE_SERVICE_MNOC] = &slv_srvc_mnoc,
+};
+
+static struct qcom_icc_node *system_noc_nodes[] = {
+ [MASTER_SNOC_CFG] = &mas_qhm_snoc_cfg,
+ [A1NOC_SNOC_MAS] = &mas_qnm_aggre1_noc,
+ [A2NOC_SNOC_MAS] = &mas_qnm_aggre2_noc,
+ [MASTER_GEM_NOC_SNOC] = &mas_qnm_gemnoc,
+ [MASTER_PIMEM] = &mas_qxm_pimem,
+ [MASTER_GIC] = &mas_xm_gic,
+ [SLAVE_APPSS] = &slv_qhs_apss,
+ [SNOC_CNOC_SLV] = &slv_qns_cnoc,
+ [SLAVE_SNOC_GEM_NOC_GC] = &slv_qns_gemnoc_gc,
+ [SLAVE_SNOC_GEM_NOC_SF] = &slv_qns_gemnoc_sf,
+ [SLAVE_OCIMEM] = &slv_qxs_imem,
+ [SLAVE_PIMEM] = &slv_qxs_pimem,
+ [SLAVE_SERVICE_SNOC] = &slv_srvc_snoc,
+ [SLAVE_QDSS_STM] = &slv_xs_qdss_stm,
+ [SLAVE_TCU] = &slv_xs_sys_tcu_cfg,
+};
+
+static const struct qcom_icc_desc sc8180x_aggre1_noc = {
+ .nodes = aggre1_noc_nodes,
+ .num_nodes = ARRAY_SIZE(aggre1_noc_nodes),
+ .bcms = aggre1_noc_bcms,
+ .num_bcms = ARRAY_SIZE(aggre1_noc_bcms),
+};
+
+static const struct qcom_icc_desc sc8180x_aggre2_noc = {
+ .nodes = aggre2_noc_nodes,
+ .num_nodes = ARRAY_SIZE(aggre2_noc_nodes),
+ .bcms = aggre2_noc_bcms,
+ .num_bcms = ARRAY_SIZE(aggre2_noc_bcms),
+};
+
+static const struct qcom_icc_desc sc8180x_camnoc_virt = {
+ .nodes = camnoc_virt_nodes,
+ .num_nodes = ARRAY_SIZE(camnoc_virt_nodes),
+ .bcms = camnoc_virt_bcms,
+ .num_bcms = ARRAY_SIZE(camnoc_virt_bcms),
+};
+
+static const struct qcom_icc_desc sc8180x_compute_noc = {
+ .nodes = compute_noc_nodes,
+ .num_nodes = ARRAY_SIZE(compute_noc_nodes),
+ .bcms = compute_noc_bcms,
+ .num_bcms = ARRAY_SIZE(compute_noc_bcms),
+};
+
+static const struct qcom_icc_desc sc8180x_config_noc = {
+ .nodes = config_noc_nodes,
+ .num_nodes = ARRAY_SIZE(config_noc_nodes),
+ .bcms = config_noc_bcms,
+ .num_bcms = ARRAY_SIZE(config_noc_bcms),
+};
+
+static const struct qcom_icc_desc sc8180x_dc_noc = {
+ .nodes = dc_noc_nodes,
+ .num_nodes = ARRAY_SIZE(dc_noc_nodes),
+};
+
+static const struct qcom_icc_desc sc8180x_gem_noc = {
+ .nodes = gem_noc_nodes,
+ .num_nodes = ARRAY_SIZE(gem_noc_nodes),
+ .bcms = gem_noc_bcms,
+ .num_bcms = ARRAY_SIZE(gem_noc_bcms),
+};
+
+static const struct qcom_icc_desc sc8180x_ipa_virt = {
+ .nodes = ipa_virt_nodes,
+ .num_nodes = ARRAY_SIZE(ipa_virt_nodes),
+ .bcms = ipa_virt_bcms,
+ .num_bcms = ARRAY_SIZE(ipa_virt_bcms),
+};
+
+static const struct qcom_icc_desc sc8180x_mc_virt = {
+ .nodes = mc_virt_nodes,
+ .num_nodes = ARRAY_SIZE(mc_virt_nodes),
+ .bcms = mc_virt_bcms,
+ .num_bcms = ARRAY_SIZE(mc_virt_bcms),
+};
+
+static const struct qcom_icc_desc sc8180x_mmss_noc = {
+ .nodes = mmss_noc_nodes,
+ .num_nodes = ARRAY_SIZE(mmss_noc_nodes),
+ .bcms = mmss_noc_bcms,
+ .num_bcms = ARRAY_SIZE(mmss_noc_bcms),
+};
+
+static const struct qcom_icc_desc sc8180x_system_noc = {
+ .nodes = system_noc_nodes,
+ .num_nodes = ARRAY_SIZE(system_noc_nodes),
+ .bcms = system_noc_bcms,
+ .num_bcms = ARRAY_SIZE(system_noc_bcms),
+};
+
+static int qnoc_probe(struct platform_device *pdev)
+{
+ const struct qcom_icc_desc *desc;
+ struct icc_onecell_data *data;
+ struct icc_provider *provider;
+ struct qcom_icc_node **qnodes;
+ struct qcom_icc_provider *qp;
+ struct icc_node *node;
+ size_t num_nodes, i;
+ int ret;
+
+ desc = device_get_match_data(&pdev->dev);
+ if (!desc)
+ return -EINVAL;
+
+ qnodes = desc->nodes;
+ num_nodes = desc->num_nodes;
+
+ qp = devm_kzalloc(&pdev->dev, sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return -ENOMEM;
+
+ data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ provider = &qp->provider;
+ provider->dev = &pdev->dev;
+ provider->set = qcom_icc_set;
+ provider->pre_aggregate = qcom_icc_pre_aggregate;
+ provider->aggregate = qcom_icc_aggregate;
+ provider->xlate = of_icc_xlate_onecell;
+ INIT_LIST_HEAD(&provider->nodes);
+ provider->data = data;
+
+ qp->dev = &pdev->dev;
+ qp->bcms = desc->bcms;
+ qp->num_bcms = desc->num_bcms;
+
+ qp->voter = of_bcm_voter_get(qp->dev, NULL);
+ if (IS_ERR(qp->voter))
+ return PTR_ERR(qp->voter);
+
+ ret = icc_provider_add(provider);
+ if (ret) {
+ dev_err(&pdev->dev, "error adding interconnect provider\n");
+ return ret;
+ }
+
+ for (i = 0; i < qp->num_bcms; i++)
+ qcom_icc_bcm_init(qp->bcms[i], &pdev->dev);
+
+ for (i = 0; i < num_nodes; i++) {
+ size_t j;
+
+ if (!qnodes[i])
+ continue;
+
+ node = icc_node_create(qnodes[i]->id);
+ if (IS_ERR(node)) {
+ ret = PTR_ERR(node);
+ goto err;
+ }
+
+ node->name = qnodes[i]->name;
+ node->data = qnodes[i];
+ icc_node_add(node, provider);
+
+ for (j = 0; j < qnodes[i]->num_links; j++)
+ icc_link_create(node, qnodes[i]->links[j]);
+
+ data->nodes[i] = node;
+ }
+ data->num_nodes = num_nodes;
+
+ platform_set_drvdata(pdev, qp);
+
+ return 0;
+err:
+ icc_nodes_remove(provider);
+ icc_provider_del(provider);
+ return ret;
+}
+
+static int qnoc_remove(struct platform_device *pdev)
+{
+ struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
+
+ icc_nodes_remove(&qp->provider);
+ return icc_provider_del(&qp->provider);
+}
+
+static const struct of_device_id qnoc_of_match[] = {
+ { .compatible = "qcom,sc8180x-aggre1-noc", .data = &sc8180x_aggre1_noc },
+ { .compatible = "qcom,sc8180x-aggre2-noc", .data = &sc8180x_aggre2_noc },
+ { .compatible = "qcom,sc8180x-camnoc-virt", .data = &sc8180x_camnoc_virt },
+ { .compatible = "qcom,sc8180x-compute-noc", .data = &sc8180x_compute_noc, },
+ { .compatible = "qcom,sc8180x-config-noc", .data = &sc8180x_config_noc },
+ { .compatible = "qcom,sc8180x-dc-noc", .data = &sc8180x_dc_noc },
+ { .compatible = "qcom,sc8180x-gem-noc", .data = &sc8180x_gem_noc },
+ { .compatible = "qcom,sc8180x-ipa-virt", .data = &sc8180x_ipa_virt },
+ { .compatible = "qcom,sc8180x-mc-virt", .data = &sc8180x_mc_virt },
+ { .compatible = "qcom,sc8180x-mmss-noc", .data = &sc8180x_mmss_noc },
+ { .compatible = "qcom,sc8180x-system-noc", .data = &sc8180x_system_noc },
+ { }
+};
+MODULE_DEVICE_TABLE(of, qnoc_of_match);
+
+static struct platform_driver qnoc_driver = {
+ .probe = qnoc_probe,
+ .remove = qnoc_remove,
+ .driver = {
+ .name = "qnoc-sc8180x",
+ .of_match_table = qnoc_of_match,
+ .sync_state = icc_sync_state,
+ },
+};
+module_platform_driver(qnoc_driver);
+
+MODULE_DESCRIPTION("Qualcomm sc8180x NoC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/interconnect/qcom/sc8180x.h b/drivers/interconnect/qcom/sc8180x.h
new file mode 100644
index 000000000000..e70cf7032f80
--- /dev/null
+++ b/drivers/interconnect/qcom/sc8180x.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Qualcomm #define SC8180X interconnect IDs
+ *
+ * Copyright (c) 2020, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef __DRIVERS_INTERCONNECT_QCOM_SC8180X_H
+#define __DRIVERS_INTERCONNECT_QCOM_SC8180X_H
+
+#define SC8180X_MASTER_A1NOC_CFG 1
+#define SC8180X_MASTER_UFS_CARD 2
+#define SC8180X_MASTER_UFS_GEN4 3
+#define SC8180X_MASTER_UFS_MEM 4
+#define SC8180X_MASTER_USB3 5
+#define SC8180X_MASTER_USB3_1 6
+#define SC8180X_MASTER_USB3_2 7
+#define SC8180X_MASTER_A2NOC_CFG 8
+#define SC8180X_MASTER_QDSS_BAM 9
+#define SC8180X_MASTER_QSPI_0 10
+#define SC8180X_MASTER_QSPI_1 11
+#define SC8180X_MASTER_QUP_0 12
+#define SC8180X_MASTER_QUP_1 13
+#define SC8180X_MASTER_QUP_2 14
+#define SC8180X_MASTER_SENSORS_AHB 15
+#define SC8180X_MASTER_CRYPTO_CORE_0 16
+#define SC8180X_MASTER_IPA 17
+#define SC8180X_MASTER_EMAC 18
+#define SC8180X_MASTER_PCIE 19
+#define SC8180X_MASTER_PCIE_1 20
+#define SC8180X_MASTER_PCIE_2 21
+#define SC8180X_MASTER_PCIE_3 22
+#define SC8180X_MASTER_QDSS_ETR 23
+#define SC8180X_MASTER_SDCC_2 24
+#define SC8180X_MASTER_SDCC_4 25
+#define SC8180X_MASTER_CAMNOC_HF0_UNCOMP 26
+#define SC8180X_MASTER_CAMNOC_HF1_UNCOMP 27
+#define SC8180X_MASTER_CAMNOC_SF_UNCOMP 28
+#define SC8180X_MASTER_NPU 29
+#define SC8180X_SNOC_CNOC_MAS 30
+#define SC8180X_MASTER_CNOC_DC_NOC 31
+#define SC8180X_MASTER_AMPSS_M0 32
+#define SC8180X_MASTER_GPU_TCU 33
+#define SC8180X_MASTER_SYS_TCU 34
+#define SC8180X_MASTER_GEM_NOC_CFG 35
+#define SC8180X_MASTER_COMPUTE_NOC 36
+#define SC8180X_MASTER_GRAPHICS_3D 37
+#define SC8180X_MASTER_MNOC_HF_MEM_NOC 38
+#define SC8180X_MASTER_MNOC_SF_MEM_NOC 39
+#define SC8180X_MASTER_GEM_NOC_PCIE_SNOC 40
+#define SC8180X_MASTER_SNOC_GC_MEM_NOC 41
+#define SC8180X_MASTER_SNOC_SF_MEM_NOC 42
+#define SC8180X_MASTER_ECC 43
+#define SC8180X_MASTER_IPA_CORE 44
+#define SC8180X_MASTER_LLCC 45
+#define SC8180X_MASTER_CNOC_MNOC_CFG 46
+#define SC8180X_MASTER_CAMNOC_HF0 47
+#define SC8180X_MASTER_CAMNOC_HF1 48
+#define SC8180X_MASTER_CAMNOC_SF 49
+#define SC8180X_MASTER_MDP_PORT0 50
+#define SC8180X_MASTER_MDP_PORT1 51
+#define SC8180X_MASTER_ROTATOR 52
+#define SC8180X_MASTER_VIDEO_P0 53
+#define SC8180X_MASTER_VIDEO_P1 54
+#define SC8180X_MASTER_VIDEO_PROC 55
+#define SC8180X_MASTER_SNOC_CFG 56
+#define SC8180X_A1NOC_SNOC_MAS 57
+#define SC8180X_A2NOC_SNOC_MAS 58
+#define SC8180X_MASTER_GEM_NOC_SNOC 59
+#define SC8180X_MASTER_PIMEM 60
+#define SC8180X_MASTER_GIC 61
+#define SC8180X_MASTER_MNOC_HF_MEM_NOC_DISPLAY 62
+#define SC8180X_MASTER_MNOC_SF_MEM_NOC_DISPLAY 63
+#define SC8180X_MASTER_LLCC_DISPLAY 64
+#define SC8180X_MASTER_MDP_PORT0_DISPLAY 65
+#define SC8180X_MASTER_MDP_PORT1_DISPLAY 66
+#define SC8180X_MASTER_ROTATOR_DISPLAY 67
+#define SC8180X_A1NOC_SNOC_SLV 68
+#define SC8180X_SLAVE_SERVICE_A1NOC 69
+#define SC8180X_A2NOC_SNOC_SLV 70
+#define SC8180X_SLAVE_ANOC_PCIE_GEM_NOC 71
+#define SC8180X_SLAVE_SERVICE_A2NOC 72
+#define SC8180X_SLAVE_CAMNOC_UNCOMP 73
+#define SC8180X_SLAVE_CDSP_MEM_NOC 74
+#define SC8180X_SLAVE_A1NOC_CFG 75
+#define SC8180X_SLAVE_A2NOC_CFG 76
+#define SC8180X_SLAVE_AHB2PHY_CENTER 77
+#define SC8180X_SLAVE_AHB2PHY_EAST 78
+#define SC8180X_SLAVE_AHB2PHY_WEST 79
+#define SC8180X_SLAVE_AHB2PHY_SOUTH 80
+#define SC8180X_SLAVE_AOP 81
+#define SC8180X_SLAVE_AOSS 82
+#define SC8180X_SLAVE_CAMERA_CFG 83
+#define SC8180X_SLAVE_CLK_CTL 84
+#define SC8180X_SLAVE_CDSP_CFG 85
+#define SC8180X_SLAVE_RBCPR_CX_CFG 86
+#define SC8180X_SLAVE_RBCPR_MMCX_CFG 87
+#define SC8180X_SLAVE_RBCPR_MX_CFG 88
+#define SC8180X_SLAVE_CRYPTO_0_CFG 89
+#define SC8180X_SLAVE_CNOC_DDRSS 90
+#define SC8180X_SLAVE_DISPLAY_CFG 91
+#define SC8180X_SLAVE_EMAC_CFG 92
+#define SC8180X_SLAVE_GLM 93
+#define SC8180X_SLAVE_GRAPHICS_3D_CFG 94
+#define SC8180X_SLAVE_IMEM_CFG 95
+#define SC8180X_SLAVE_IPA_CFG 96
+#define SC8180X_SLAVE_CNOC_MNOC_CFG 97
+#define SC8180X_SLAVE_NPU_CFG 98
+#define SC8180X_SLAVE_PCIE_0_CFG 99
+#define SC8180X_SLAVE_PCIE_1_CFG 100
+#define SC8180X_SLAVE_PCIE_2_CFG 101
+#define SC8180X_SLAVE_PCIE_3_CFG 102
+#define SC8180X_SLAVE_PDM 103
+#define SC8180X_SLAVE_PIMEM_CFG 104
+#define SC8180X_SLAVE_PRNG 105
+#define SC8180X_SLAVE_QDSS_CFG 106
+#define SC8180X_SLAVE_QSPI_0 107
+#define SC8180X_SLAVE_QSPI_1 108
+#define SC8180X_SLAVE_QUP_1 109
+#define SC8180X_SLAVE_QUP_2 110
+#define SC8180X_SLAVE_QUP_0 111
+#define SC8180X_SLAVE_SDCC_2 112
+#define SC8180X_SLAVE_SDCC_4 113
+#define SC8180X_SLAVE_SECURITY 114
+#define SC8180X_SLAVE_SNOC_CFG 115
+#define SC8180X_SLAVE_SPSS_CFG 116
+#define SC8180X_SLAVE_TCSR 117
+#define SC8180X_SLAVE_TLMM_EAST 118
+#define SC8180X_SLAVE_TLMM_SOUTH 119
+#define SC8180X_SLAVE_TLMM_WEST 120
+#define SC8180X_SLAVE_TSIF 121
+#define SC8180X_SLAVE_UFS_CARD_CFG 122
+#define SC8180X_SLAVE_UFS_MEM_0_CFG 123
+#define SC8180X_SLAVE_UFS_MEM_1_CFG 124
+#define SC8180X_SLAVE_USB3 125
+#define SC8180X_SLAVE_USB3_1 126
+#define SC8180X_SLAVE_USB3_2 127
+#define SC8180X_SLAVE_VENUS_CFG 128
+#define SC8180X_SLAVE_VSENSE_CTRL_CFG 129
+#define SC8180X_SLAVE_SERVICE_CNOC 130
+#define SC8180X_SLAVE_GEM_NOC_CFG 131
+#define SC8180X_SLAVE_LLCC_CFG 132
+#define SC8180X_SLAVE_MSS_PROC_MS_MPU_CFG 133
+#define SC8180X_SLAVE_ECC 134
+#define SC8180X_SLAVE_GEM_NOC_SNOC 135
+#define SC8180X_SLAVE_LLCC 136
+#define SC8180X_SLAVE_SERVICE_GEM_NOC 137
+#define SC8180X_SLAVE_SERVICE_GEM_NOC_1 138
+#define SC8180X_SLAVE_IPA_CORE 139
+#define SC8180X_SLAVE_EBI_CH0 140
+#define SC8180X_SLAVE_MNOC_SF_MEM_NOC 141
+#define SC8180X_SLAVE_MNOC_HF_MEM_NOC 142
+#define SC8180X_SLAVE_SERVICE_MNOC 143
+#define SC8180X_SLAVE_APPSS 144
+#define SC8180X_SNOC_CNOC_SLV 145
+#define SC8180X_SLAVE_SNOC_GEM_NOC_GC 146
+#define SC8180X_SLAVE_SNOC_GEM_NOC_SF 147
+#define SC8180X_SLAVE_OCIMEM 148
+#define SC8180X_SLAVE_PIMEM 149
+#define SC8180X_SLAVE_SERVICE_SNOC 150
+#define SC8180X_SLAVE_PCIE_0 151
+#define SC8180X_SLAVE_PCIE_1 152
+#define SC8180X_SLAVE_PCIE_2 153
+#define SC8180X_SLAVE_PCIE_3 154
+#define SC8180X_SLAVE_QDSS_STM 155
+#define SC8180X_SLAVE_TCU 156
+#define SC8180X_SLAVE_LLCC_DISPLAY 157
+#define SC8180X_SLAVE_EBI_CH0_DISPLAY 158
+#define SC8180X_SLAVE_MNOC_SF_MEM_NOC_DISPLAY 159
+#define SC8180X_SLAVE_MNOC_HF_MEM_NOC_DISPLAY 160
+#define SC8180X_MASTER_OSM_L3_APPS 161
+#define SC8180X_SLAVE_OSM_L3 162
+
+#endif
diff --git a/drivers/interconnect/qcom/sdm845.c b/drivers/interconnect/qcom/sdm845.c
index 366870150cbd..d2195079c228 100644
--- a/drivers/interconnect/qcom/sdm845.c
+++ b/drivers/interconnect/qcom/sdm845.c
@@ -440,101 +440,6 @@ static const struct qcom_icc_desc sdm845_system_noc = {
.num_bcms = ARRAY_SIZE(system_noc_bcms),
};
-static int qnoc_probe(struct platform_device *pdev)
-{
- const struct qcom_icc_desc *desc;
- struct icc_onecell_data *data;
- struct icc_provider *provider;
- struct qcom_icc_node **qnodes;
- struct qcom_icc_provider *qp;
- struct icc_node *node;
- size_t num_nodes, i;
- int ret;
-
- desc = device_get_match_data(&pdev->dev);
- if (!desc)
- return -EINVAL;
-
- qnodes = desc->nodes;
- num_nodes = desc->num_nodes;
-
- qp = devm_kzalloc(&pdev->dev, sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return -ENOMEM;
-
- data = devm_kzalloc(&pdev->dev, struct_size(data, nodes, num_nodes),
- GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- provider = &qp->provider;
- provider->dev = &pdev->dev;
- provider->set = qcom_icc_set;
- provider->pre_aggregate = qcom_icc_pre_aggregate;
- provider->aggregate = qcom_icc_aggregate;
- provider->xlate_extended = qcom_icc_xlate_extended;
- INIT_LIST_HEAD(&provider->nodes);
- provider->data = data;
-
- qp->dev = &pdev->dev;
- qp->bcms = desc->bcms;
- qp->num_bcms = desc->num_bcms;
-
- qp->voter = of_bcm_voter_get(qp->dev, NULL);
- if (IS_ERR(qp->voter)) {
- dev_err(&pdev->dev, "bcm_voter err:%ld\n", PTR_ERR(qp->voter));
- return PTR_ERR(qp->voter);
- }
-
- ret = icc_provider_add(provider);
- if (ret) {
- dev_err(&pdev->dev, "error adding interconnect provider\n");
- return ret;
- }
-
- for (i = 0; i < qp->num_bcms; i++)
- qcom_icc_bcm_init(qp->bcms[i], &pdev->dev);
-
- for (i = 0; i < num_nodes; i++) {
- size_t j;
-
- if (!qnodes[i])
- continue;
-
- node = icc_node_create(qnodes[i]->id);
- if (IS_ERR(node)) {
- ret = PTR_ERR(node);
- goto err;
- }
-
- node->name = qnodes[i]->name;
- node->data = qnodes[i];
- icc_node_add(node, provider);
-
- for (j = 0; j < qnodes[i]->num_links; j++)
- icc_link_create(node, qnodes[i]->links[j]);
-
- data->nodes[i] = node;
- }
- data->num_nodes = num_nodes;
-
- platform_set_drvdata(pdev, qp);
-
- return 0;
-err:
- icc_nodes_remove(provider);
- icc_provider_del(provider);
- return ret;
-}
-
-static int qnoc_remove(struct platform_device *pdev)
-{
- struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
-
- icc_nodes_remove(&qp->provider);
- return icc_provider_del(&qp->provider);
-}
-
static const struct of_device_id qnoc_of_match[] = {
{ .compatible = "qcom,sdm845-aggre1-noc",
.data = &sdm845_aggre1_noc},
@@ -557,8 +462,8 @@ static const struct of_device_id qnoc_of_match[] = {
MODULE_DEVICE_TABLE(of, qnoc_of_match);
static struct platform_driver qnoc_driver = {
- .probe = qnoc_probe,
- .remove = qnoc_remove,
+ .probe = qcom_icc_rpmh_probe,
+ .remove = qcom_icc_rpmh_remove,
.driver = {
.name = "qnoc-sdm845",
.of_match_table = qnoc_of_match,
diff --git a/drivers/interconnect/qcom/sdx55.c b/drivers/interconnect/qcom/sdx55.c
index a5a122ee3d21..03d604f84cc5 100644
--- a/drivers/interconnect/qcom/sdx55.c
+++ b/drivers/interconnect/qcom/sdx55.c
@@ -235,98 +235,6 @@ static const struct qcom_icc_desc sdx55_ipa_virt = {
.num_bcms = ARRAY_SIZE(ipa_virt_bcms),
};
-static int qnoc_probe(struct platform_device *pdev)
-{
- const struct qcom_icc_desc *desc;
- struct icc_onecell_data *data;
- struct icc_provider *provider;
- struct qcom_icc_node **qnodes;
- struct qcom_icc_provider *qp;
- struct icc_node *node;
- size_t num_nodes, i;
- int ret;
-
- desc = device_get_match_data(&pdev->dev);
- if (!desc)
- return -EINVAL;
-
- qnodes = desc->nodes;
- num_nodes = desc->num_nodes;
-
- qp = devm_kzalloc(&pdev->dev, sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return -ENOMEM;
-
- data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- provider = &qp->provider;
- provider->dev = &pdev->dev;
- provider->set = qcom_icc_set;
- provider->pre_aggregate = qcom_icc_pre_aggregate;
- provider->aggregate = qcom_icc_aggregate;
- provider->xlate = of_icc_xlate_onecell;
- INIT_LIST_HEAD(&provider->nodes);
- provider->data = data;
-
- qp->dev = &pdev->dev;
- qp->bcms = desc->bcms;
- qp->num_bcms = desc->num_bcms;
-
- qp->voter = of_bcm_voter_get(qp->dev, NULL);
- if (IS_ERR(qp->voter))
- return PTR_ERR(qp->voter);
-
- ret = icc_provider_add(provider);
- if (ret) {
- dev_err(&pdev->dev, "error adding interconnect provider\n");
- return ret;
- }
-
- for (i = 0; i < qp->num_bcms; i++)
- qcom_icc_bcm_init(qp->bcms[i], &pdev->dev);
-
- for (i = 0; i < num_nodes; i++) {
- size_t j;
-
- if (!qnodes[i])
- continue;
-
- node = icc_node_create(qnodes[i]->id);
- if (IS_ERR(node)) {
- ret = PTR_ERR(node);
- goto err;
- }
-
- node->name = qnodes[i]->name;
- node->data = qnodes[i];
- icc_node_add(node, provider);
-
- for (j = 0; j < qnodes[i]->num_links; j++)
- icc_link_create(node, qnodes[i]->links[j]);
-
- data->nodes[i] = node;
- }
- data->num_nodes = num_nodes;
-
- platform_set_drvdata(pdev, qp);
-
- return 0;
-err:
- icc_nodes_remove(provider);
- icc_provider_del(provider);
- return ret;
-}
-
-static int qnoc_remove(struct platform_device *pdev)
-{
- struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
-
- icc_nodes_remove(&qp->provider);
- return icc_provider_del(&qp->provider);
-}
-
static const struct of_device_id qnoc_of_match[] = {
{ .compatible = "qcom,sdx55-mc-virt",
.data = &sdx55_mc_virt},
@@ -341,8 +249,8 @@ static const struct of_device_id qnoc_of_match[] = {
MODULE_DEVICE_TABLE(of, qnoc_of_match);
static struct platform_driver qnoc_driver = {
- .probe = qnoc_probe,
- .remove = qnoc_remove,
+ .probe = qcom_icc_rpmh_probe,
+ .remove = qcom_icc_rpmh_remove,
.driver = {
.name = "qnoc-sdx55",
.of_match_table = qnoc_of_match,
diff --git a/drivers/interconnect/qcom/sm8150.c b/drivers/interconnect/qcom/sm8150.c
index c76b2c7f9b10..2a85f53802b5 100644
--- a/drivers/interconnect/qcom/sm8150.c
+++ b/drivers/interconnect/qcom/sm8150.c
@@ -502,98 +502,6 @@ static struct qcom_icc_desc sm8150_system_noc = {
.num_bcms = ARRAY_SIZE(system_noc_bcms),
};
-static int qnoc_probe(struct platform_device *pdev)
-{
- const struct qcom_icc_desc *desc;
- struct icc_onecell_data *data;
- struct icc_provider *provider;
- struct qcom_icc_node **qnodes;
- struct qcom_icc_provider *qp;
- struct icc_node *node;
- size_t num_nodes, i;
- int ret;
-
- desc = device_get_match_data(&pdev->dev);
- if (!desc)
- return -EINVAL;
-
- qnodes = desc->nodes;
- num_nodes = desc->num_nodes;
-
- qp = devm_kzalloc(&pdev->dev, sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return -ENOMEM;
-
- data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- provider = &qp->provider;
- provider->dev = &pdev->dev;
- provider->set = qcom_icc_set;
- provider->pre_aggregate = qcom_icc_pre_aggregate;
- provider->aggregate = qcom_icc_aggregate;
- provider->xlate = of_icc_xlate_onecell;
- INIT_LIST_HEAD(&provider->nodes);
- provider->data = data;
-
- qp->dev = &pdev->dev;
- qp->bcms = desc->bcms;
- qp->num_bcms = desc->num_bcms;
-
- qp->voter = of_bcm_voter_get(qp->dev, NULL);
- if (IS_ERR(qp->voter))
- return PTR_ERR(qp->voter);
-
- ret = icc_provider_add(provider);
- if (ret) {
- dev_err(&pdev->dev, "error adding interconnect provider\n");
- return ret;
- }
-
- for (i = 0; i < qp->num_bcms; i++)
- qcom_icc_bcm_init(qp->bcms[i], &pdev->dev);
-
- for (i = 0; i < num_nodes; i++) {
- size_t j;
-
- if (!qnodes[i])
- continue;
-
- node = icc_node_create(qnodes[i]->id);
- if (IS_ERR(node)) {
- ret = PTR_ERR(node);
- goto err;
- }
-
- node->name = qnodes[i]->name;
- node->data = qnodes[i];
- icc_node_add(node, provider);
-
- for (j = 0; j < qnodes[i]->num_links; j++)
- icc_link_create(node, qnodes[i]->links[j]);
-
- data->nodes[i] = node;
- }
- data->num_nodes = num_nodes;
-
- platform_set_drvdata(pdev, qp);
-
- return 0;
-err:
- icc_nodes_remove(provider);
- icc_provider_del(provider);
- return ret;
-}
-
-static int qnoc_remove(struct platform_device *pdev)
-{
- struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
-
- icc_nodes_remove(&qp->provider);
- return icc_provider_del(&qp->provider);
-}
-
static const struct of_device_id qnoc_of_match[] = {
{ .compatible = "qcom,sm8150-aggre1-noc",
.data = &sm8150_aggre1_noc},
@@ -622,8 +530,8 @@ static const struct of_device_id qnoc_of_match[] = {
MODULE_DEVICE_TABLE(of, qnoc_of_match);
static struct platform_driver qnoc_driver = {
- .probe = qnoc_probe,
- .remove = qnoc_remove,
+ .probe = qcom_icc_rpmh_probe,
+ .remove = qcom_icc_rpmh_remove,
.driver = {
.name = "qnoc-sm8150",
.of_match_table = qnoc_of_match,
diff --git a/drivers/interconnect/qcom/sm8250.c b/drivers/interconnect/qcom/sm8250.c
index cc558fec74e3..8dfb5dea562a 100644
--- a/drivers/interconnect/qcom/sm8250.c
+++ b/drivers/interconnect/qcom/sm8250.c
@@ -518,98 +518,6 @@ static struct qcom_icc_desc sm8250_system_noc = {
.num_bcms = ARRAY_SIZE(system_noc_bcms),
};
-static int qnoc_probe(struct platform_device *pdev)
-{
- const struct qcom_icc_desc *desc;
- struct icc_onecell_data *data;
- struct icc_provider *provider;
- struct qcom_icc_node **qnodes;
- struct qcom_icc_provider *qp;
- struct icc_node *node;
- size_t num_nodes, i;
- int ret;
-
- desc = device_get_match_data(&pdev->dev);
- if (!desc)
- return -EINVAL;
-
- qnodes = desc->nodes;
- num_nodes = desc->num_nodes;
-
- qp = devm_kzalloc(&pdev->dev, sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return -ENOMEM;
-
- data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- provider = &qp->provider;
- provider->dev = &pdev->dev;
- provider->set = qcom_icc_set;
- provider->pre_aggregate = qcom_icc_pre_aggregate;
- provider->aggregate = qcom_icc_aggregate;
- provider->xlate = of_icc_xlate_onecell;
- INIT_LIST_HEAD(&provider->nodes);
- provider->data = data;
-
- qp->dev = &pdev->dev;
- qp->bcms = desc->bcms;
- qp->num_bcms = desc->num_bcms;
-
- qp->voter = of_bcm_voter_get(qp->dev, NULL);
- if (IS_ERR(qp->voter))
- return PTR_ERR(qp->voter);
-
- ret = icc_provider_add(provider);
- if (ret) {
- dev_err(&pdev->dev, "error adding interconnect provider\n");
- return ret;
- }
-
- for (i = 0; i < qp->num_bcms; i++)
- qcom_icc_bcm_init(qp->bcms[i], &pdev->dev);
-
- for (i = 0; i < num_nodes; i++) {
- size_t j;
-
- if (!qnodes[i])
- continue;
-
- node = icc_node_create(qnodes[i]->id);
- if (IS_ERR(node)) {
- ret = PTR_ERR(node);
- goto err;
- }
-
- node->name = qnodes[i]->name;
- node->data = qnodes[i];
- icc_node_add(node, provider);
-
- for (j = 0; j < qnodes[i]->num_links; j++)
- icc_link_create(node, qnodes[i]->links[j]);
-
- data->nodes[i] = node;
- }
- data->num_nodes = num_nodes;
-
- platform_set_drvdata(pdev, qp);
-
- return 0;
-err:
- icc_nodes_remove(provider);
- icc_provider_del(provider);
- return ret;
-}
-
-static int qnoc_remove(struct platform_device *pdev)
-{
- struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
-
- icc_nodes_remove(&qp->provider);
- return icc_provider_del(&qp->provider);
-}
-
static const struct of_device_id qnoc_of_match[] = {
{ .compatible = "qcom,sm8250-aggre1-noc",
.data = &sm8250_aggre1_noc},
@@ -638,8 +546,8 @@ static const struct of_device_id qnoc_of_match[] = {
MODULE_DEVICE_TABLE(of, qnoc_of_match);
static struct platform_driver qnoc_driver = {
- .probe = qnoc_probe,
- .remove = qnoc_remove,
+ .probe = qcom_icc_rpmh_probe,
+ .remove = qcom_icc_rpmh_remove,
.driver = {
.name = "qnoc-sm8250",
.of_match_table = qnoc_of_match,
diff --git a/drivers/interconnect/qcom/sm8350.c b/drivers/interconnect/qcom/sm8350.c
index 579b6ce8e046..3e26a2175b28 100644
--- a/drivers/interconnect/qcom/sm8350.c
+++ b/drivers/interconnect/qcom/sm8350.c
@@ -510,99 +510,6 @@ static struct qcom_icc_desc sm8350_system_noc = {
.num_bcms = ARRAY_SIZE(system_noc_bcms),
};
-static int qnoc_probe(struct platform_device *pdev)
-{
- const struct qcom_icc_desc *desc;
- struct icc_onecell_data *data;
- struct icc_provider *provider;
- struct qcom_icc_node **qnodes;
- struct qcom_icc_provider *qp;
- struct icc_node *node;
- size_t num_nodes, i;
- int ret;
-
- desc = of_device_get_match_data(&pdev->dev);
- if (!desc)
- return -EINVAL;
-
- qnodes = desc->nodes;
- num_nodes = desc->num_nodes;
-
- qp = devm_kzalloc(&pdev->dev, sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return -ENOMEM;
-
- data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- provider = &qp->provider;
- provider->dev = &pdev->dev;
- provider->set = qcom_icc_set;
- provider->pre_aggregate = qcom_icc_pre_aggregate;
- provider->aggregate = qcom_icc_aggregate;
- provider->xlate = of_icc_xlate_onecell;
- INIT_LIST_HEAD(&provider->nodes);
- provider->data = data;
-
- qp->dev = &pdev->dev;
- qp->bcms = desc->bcms;
- qp->num_bcms = desc->num_bcms;
-
- qp->voter = of_bcm_voter_get(qp->dev, NULL);
- if (IS_ERR(qp->voter))
- return PTR_ERR(qp->voter);
-
- ret = icc_provider_add(provider);
- if (ret) {
- dev_err(&pdev->dev, "error adding interconnect provider\n");
- return ret;
- }
-
- for (i = 0; i < qp->num_bcms; i++)
- qcom_icc_bcm_init(qp->bcms[i], &pdev->dev);
-
- for (i = 0; i < num_nodes; i++) {
- size_t j;
-
- if (!qnodes[i])
- continue;
-
- node = icc_node_create(qnodes[i]->id);
- if (IS_ERR(node)) {
- ret = PTR_ERR(node);
- goto err;
- }
-
- node->name = qnodes[i]->name;
- node->data = qnodes[i];
- icc_node_add(node, provider);
-
- for (j = 0; j < qnodes[i]->num_links; j++)
- icc_link_create(node, qnodes[i]->links[j]);
-
- data->nodes[i] = node;
- }
- data->num_nodes = num_nodes;
-
- platform_set_drvdata(pdev, qp);
-
- return ret;
-
-err:
- icc_nodes_remove(provider);
- icc_provider_del(provider);
- return ret;
-}
-
-static int qnoc_remove(struct platform_device *pdev)
-{
- struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
-
- icc_nodes_remove(&qp->provider);
- return icc_provider_del(&qp->provider);
-}
-
static const struct of_device_id qnoc_of_match[] = {
{ .compatible = "qcom,sm8350-aggre1-noc", .data = &sm8350_aggre1_noc},
{ .compatible = "qcom,sm8350-aggre2-noc", .data = &sm8350_aggre2_noc},
@@ -619,8 +526,8 @@ static const struct of_device_id qnoc_of_match[] = {
MODULE_DEVICE_TABLE(of, qnoc_of_match);
static struct platform_driver qnoc_driver = {
- .probe = qnoc_probe,
- .remove = qnoc_remove,
+ .probe = qcom_icc_rpmh_probe,
+ .remove = qcom_icc_rpmh_remove,
.driver = {
.name = "qnoc-sm8350",
.of_match_table = qnoc_of_match,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 98ba927aee1a..6f0df629353f 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -768,6 +768,7 @@ static void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
__iommu_dma_unmap(dev, sgt->sgl->dma_address, size);
__iommu_dma_free_pages(sh->pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
sg_free_table(&sh->sgt);
+ kfree(sh);
}
#endif /* CONFIG_DMA_REMAP */
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index c6cf44a6c923..9ec374e17469 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -511,7 +511,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
u32 pasid, bool fault_ignore)
{
struct pasid_entry *pte;
- u16 did;
+ u16 did, pgtt;
pte = intel_pasid_get_entry(dev, pasid);
if (WARN_ON(!pte))
@@ -521,13 +521,19 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
return;
did = pasid_get_domain_id(pte);
+ pgtt = pasid_pte_get_pgtt(pte);
+
intel_pasid_clear_entry(dev, pasid, fault_ignore);
if (!ecap_coherent(iommu->ecap))
clflush_cache_range(pte, sizeof(*pte));
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
- qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
+
+ if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY)
+ qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
+ else
+ iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
/* Device IOTLB doesn't need to be flushed in caching mode. */
if (!cap_caching_mode(iommu->cap))
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index 5ff61c3d401f..c11bc8b833b8 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -99,6 +99,12 @@ static inline bool pasid_pte_is_present(struct pasid_entry *pte)
return READ_ONCE(pte->val[0]) & PASID_PTE_PRESENT;
}
+/* Get PGTT field of a PASID table entry */
+static inline u16 pasid_pte_get_pgtt(struct pasid_entry *pte)
+{
+ return (u16)((READ_ONCE(pte->val[0]) >> 6) & 0x7);
+}
+
extern unsigned int intel_pasid_max_id;
int intel_pasid_alloc_table(struct device *dev);
void intel_pasid_free_table(struct device *dev);
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 9b0f22bc0514..4b9b3f35ba0e 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -675,7 +675,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
kfree_rcu(sdev, rcu);
if (list_empty(&svm->devs)) {
- intel_svm_free_pasid(mm);
if (svm->notifier.ops) {
mmu_notifier_unregister(&svm->notifier, mm);
/* Clear mm's pasid. */
@@ -690,6 +689,8 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
kfree(svm);
}
}
+ /* Drop a PASID reference and free it if no reference. */
+ intel_svm_free_pasid(mm);
}
out:
return ret;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 5419c4b9f27a..63f0af10c403 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -924,6 +924,9 @@ void iommu_group_remove_device(struct device *dev)
struct iommu_group *group = dev->iommu_group;
struct group_device *tmp_device, *device = NULL;
+ if (!group)
+ return;
+
dev_info(dev, "Removing from iommu group %d\n", group->id);
/* Pre-notify listeners that a device is being removed. */
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 6019e58ce4fb..83df387e70a3 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -90,7 +90,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
struct zpci_dev *zdev = to_zpci_dev(dev);
struct s390_domain_device *domain_device;
unsigned long flags;
- int rc;
+ int cc, rc;
if (!zdev)
return -ENODEV;
@@ -99,14 +99,21 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
if (!domain_device)
return -ENOMEM;
- if (zdev->dma_table)
- zpci_dma_exit_device(zdev);
+ if (zdev->dma_table) {
+ cc = zpci_dma_exit_device(zdev);
+ if (cc) {
+ rc = -EIO;
+ goto out_free;
+ }
+ }
zdev->dma_table = s390_domain->dma_table;
- rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
(u64) zdev->dma_table);
- if (rc)
+ if (cc) {
+ rc = -EIO;
goto out_restore;
+ }
spin_lock_irqsave(&s390_domain->list_lock, flags);
/* First device defines the DMA range limits */
@@ -130,6 +137,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
out_restore:
zpci_dma_init_device(zdev);
+out_free:
kfree(domain_device);
return rc;
diff --git a/drivers/ipack/carriers/tpci200.c b/drivers/ipack/carriers/tpci200.c
index 3461b0a7dc62..cbfdadecb23b 100644
--- a/drivers/ipack/carriers/tpci200.c
+++ b/drivers/ipack/carriers/tpci200.c
@@ -89,16 +89,13 @@ static void tpci200_unregister(struct tpci200_board *tpci200)
free_irq(tpci200->info->pdev->irq, (void *) tpci200);
pci_iounmap(tpci200->info->pdev, tpci200->info->interface_regs);
- pci_iounmap(tpci200->info->pdev, tpci200->info->cfg_regs);
pci_release_region(tpci200->info->pdev, TPCI200_IP_INTERFACE_BAR);
pci_release_region(tpci200->info->pdev, TPCI200_IO_ID_INT_SPACES_BAR);
pci_release_region(tpci200->info->pdev, TPCI200_MEM16_SPACE_BAR);
pci_release_region(tpci200->info->pdev, TPCI200_MEM8_SPACE_BAR);
- pci_release_region(tpci200->info->pdev, TPCI200_CFG_MEM_BAR);
pci_disable_device(tpci200->info->pdev);
- pci_dev_put(tpci200->info->pdev);
}
static void tpci200_enable_irq(struct tpci200_board *tpci200,
@@ -257,7 +254,7 @@ static int tpci200_register(struct tpci200_board *tpci200)
"(bn 0x%X, sn 0x%X) failed to allocate PCI resource for BAR 2 !",
tpci200->info->pdev->bus->number,
tpci200->info->pdev->devfn);
- goto out_disable_pci;
+ goto err_disable_device;
}
/* Request IO ID INT space (Bar 3) */
@@ -269,7 +266,7 @@ static int tpci200_register(struct tpci200_board *tpci200)
"(bn 0x%X, sn 0x%X) failed to allocate PCI resource for BAR 3 !",
tpci200->info->pdev->bus->number,
tpci200->info->pdev->devfn);
- goto out_release_ip_space;
+ goto err_ip_interface_bar;
}
/* Request MEM8 space (Bar 5) */
@@ -280,7 +277,7 @@ static int tpci200_register(struct tpci200_board *tpci200)
"(bn 0x%X, sn 0x%X) failed to allocate PCI resource for BAR 5!",
tpci200->info->pdev->bus->number,
tpci200->info->pdev->devfn);
- goto out_release_ioid_int_space;
+ goto err_io_id_int_spaces_bar;
}
/* Request MEM16 space (Bar 4) */
@@ -291,7 +288,7 @@ static int tpci200_register(struct tpci200_board *tpci200)
"(bn 0x%X, sn 0x%X) failed to allocate PCI resource for BAR 4!",
tpci200->info->pdev->bus->number,
tpci200->info->pdev->devfn);
- goto out_release_mem8_space;
+ goto err_mem8_space_bar;
}
/* Map internal tpci200 driver user space */
@@ -305,7 +302,7 @@ static int tpci200_register(struct tpci200_board *tpci200)
tpci200->info->pdev->bus->number,
tpci200->info->pdev->devfn);
res = -ENOMEM;
- goto out_release_mem8_space;
+ goto err_mem16_space_bar;
}
/* Initialize lock that protects interface_regs */
@@ -344,18 +341,22 @@ static int tpci200_register(struct tpci200_board *tpci200)
"(bn 0x%X, sn 0x%X) unable to register IRQ !",
tpci200->info->pdev->bus->number,
tpci200->info->pdev->devfn);
- goto out_release_ioid_int_space;
+ goto err_interface_regs;
}
return 0;
-out_release_mem8_space:
+err_interface_regs:
+ pci_iounmap(tpci200->info->pdev, tpci200->info->interface_regs);
+err_mem16_space_bar:
+ pci_release_region(tpci200->info->pdev, TPCI200_MEM16_SPACE_BAR);
+err_mem8_space_bar:
pci_release_region(tpci200->info->pdev, TPCI200_MEM8_SPACE_BAR);
-out_release_ioid_int_space:
+err_io_id_int_spaces_bar:
pci_release_region(tpci200->info->pdev, TPCI200_IO_ID_INT_SPACES_BAR);
-out_release_ip_space:
+err_ip_interface_bar:
pci_release_region(tpci200->info->pdev, TPCI200_IP_INTERFACE_BAR);
-out_disable_pci:
+err_disable_device:
pci_disable_device(tpci200->info->pdev);
return res;
}
@@ -527,7 +528,7 @@ static int tpci200_pci_probe(struct pci_dev *pdev,
tpci200->info = kzalloc(sizeof(struct tpci200_infos), GFP_KERNEL);
if (!tpci200->info) {
ret = -ENOMEM;
- goto out_err_info;
+ goto err_tpci200;
}
pci_dev_get(pdev);
@@ -538,7 +539,7 @@ static int tpci200_pci_probe(struct pci_dev *pdev,
if (ret) {
dev_err(&pdev->dev, "Failed to allocate PCI Configuration Memory");
ret = -EBUSY;
- goto out_err_pci_request;
+ goto err_tpci200_info;
}
tpci200->info->cfg_regs = ioremap(
pci_resource_start(pdev, TPCI200_CFG_MEM_BAR),
@@ -546,7 +547,7 @@ static int tpci200_pci_probe(struct pci_dev *pdev,
if (!tpci200->info->cfg_regs) {
dev_err(&pdev->dev, "Failed to map PCI Configuration Memory");
ret = -EFAULT;
- goto out_err_ioremap;
+ goto err_request_region;
}
/* Disable byte swapping for 16 bit IP module access. This will ensure
@@ -569,7 +570,7 @@ static int tpci200_pci_probe(struct pci_dev *pdev,
if (ret) {
dev_err(&pdev->dev, "error during tpci200 install\n");
ret = -ENODEV;
- goto out_err_install;
+ goto err_cfg_regs;
}
/* Register the carrier in the industry pack bus driver */
@@ -581,7 +582,7 @@ static int tpci200_pci_probe(struct pci_dev *pdev,
dev_err(&pdev->dev,
"error registering the carrier on ipack driver\n");
ret = -EFAULT;
- goto out_err_bus_register;
+ goto err_tpci200_install;
}
/* save the bus number given by ipack to logging purpose */
@@ -592,19 +593,16 @@ static int tpci200_pci_probe(struct pci_dev *pdev,
tpci200_create_device(tpci200, i);
return 0;
-out_err_bus_register:
+err_tpci200_install:
tpci200_uninstall(tpci200);
- /* tpci200->info->cfg_regs is unmapped in tpci200_uninstall */
- tpci200->info->cfg_regs = NULL;
-out_err_install:
- if (tpci200->info->cfg_regs)
- iounmap(tpci200->info->cfg_regs);
-out_err_ioremap:
+err_cfg_regs:
+ pci_iounmap(tpci200->info->pdev, tpci200->info->cfg_regs);
+err_request_region:
pci_release_region(pdev, TPCI200_CFG_MEM_BAR);
-out_err_pci_request:
- pci_dev_put(pdev);
+err_tpci200_info:
kfree(tpci200->info);
-out_err_info:
+ pci_dev_put(pdev);
+err_tpci200:
kfree(tpci200);
return ret;
}
@@ -614,6 +612,12 @@ static void __tpci200_pci_remove(struct tpci200_board *tpci200)
ipack_bus_unregister(tpci200->info->ipack_bus);
tpci200_uninstall(tpci200);
+ pci_iounmap(tpci200->info->pdev, tpci200->info->cfg_regs);
+
+ pci_release_region(tpci200->info->pdev, TPCI200_CFG_MEM_BAR);
+
+ pci_dev_put(tpci200->info->pdev);
+
kfree(tpci200->info);
kfree(tpci200);
}
diff --git a/drivers/irqchip/irq-alpine-msi.c b/drivers/irqchip/irq-alpine-msi.c
index ede02dc2bcd0..5ddb8e578ac6 100644
--- a/drivers/irqchip/irq-alpine-msi.c
+++ b/drivers/irqchip/irq-alpine-msi.c
@@ -267,9 +267,7 @@ static int alpine_msix_init(struct device_node *node,
goto err_priv;
}
- priv->msi_map = kcalloc(BITS_TO_LONGS(priv->num_spis),
- sizeof(*priv->msi_map),
- GFP_KERNEL);
+ priv->msi_map = bitmap_zalloc(priv->num_spis, GFP_KERNEL);
if (!priv->msi_map) {
ret = -ENOMEM;
goto err_priv;
@@ -285,7 +283,7 @@ static int alpine_msix_init(struct device_node *node,
return 0;
err_map:
- kfree(priv->msi_map);
+ bitmap_free(priv->msi_map);
err_priv:
kfree(priv);
return ret;
diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index b8c06bd8659e..6fc145aacaf0 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -226,7 +226,7 @@ static void aic_irq_eoi(struct irq_data *d)
* Reading the interrupt reason automatically acknowledges and masks
* the IRQ, so we just unmask it here if needed.
*/
- if (!irqd_irq_disabled(d) && !irqd_irq_masked(d))
+ if (!irqd_irq_masked(d))
aic_irq_unmask(d);
}
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index be9ea6fd6f8b..9349fc68b81a 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -269,7 +269,7 @@ static void gicv2m_teardown(void)
list_for_each_entry_safe(v2m, tmp, &v2m_nodes, entry) {
list_del(&v2m->entry);
- kfree(v2m->bm);
+ bitmap_free(v2m->bm);
iounmap(v2m->base);
of_node_put(to_of_node(v2m->fwnode));
if (is_fwnode_irqchip(v2m->fwnode))
@@ -386,8 +386,7 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode,
break;
}
}
- v2m->bm = kcalloc(BITS_TO_LONGS(v2m->nr_spis), sizeof(long),
- GFP_KERNEL);
+ v2m->bm = bitmap_zalloc(v2m->nr_spis, GFP_KERNEL);
if (!v2m->bm) {
ret = -ENOMEM;
goto err_iounmap;
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index ba39668c3e08..7f40dca8cda5 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2140,7 +2140,7 @@ static unsigned long *its_lpi_alloc(int nr_irqs, u32 *base, int *nr_ids)
if (err)
goto out;
- bitmap = kcalloc(BITS_TO_LONGS(nr_irqs), sizeof (long), GFP_ATOMIC);
+ bitmap = bitmap_zalloc(nr_irqs, GFP_ATOMIC);
if (!bitmap)
goto out;
@@ -2156,7 +2156,7 @@ out:
static void its_lpi_free(unsigned long *bitmap, u32 base, u32 nr_ids)
{
WARN_ON(free_lpi_range(base, nr_ids));
- kfree(bitmap);
+ bitmap_free(bitmap);
}
static void gic_reset_prop_table(void *va)
@@ -3387,7 +3387,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
if (!dev || !itt || !col_map || (!lpi_map && alloc_lpis)) {
kfree(dev);
kfree(itt);
- kfree(lpi_map);
+ bitmap_free(lpi_map);
kfree(col_map);
return NULL;
}
diff --git a/drivers/irqchip/irq-gic-v3-mbi.c b/drivers/irqchip/irq-gic-v3-mbi.c
index e81e89a81cb5..b84c9c2eccdc 100644
--- a/drivers/irqchip/irq-gic-v3-mbi.c
+++ b/drivers/irqchip/irq-gic-v3-mbi.c
@@ -290,8 +290,7 @@ int __init mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent)
if (ret)
goto err_free_mbi;
- mbi_ranges[n].bm = kcalloc(BITS_TO_LONGS(mbi_ranges[n].nr_spis),
- sizeof(long), GFP_KERNEL);
+ mbi_ranges[n].bm = bitmap_zalloc(mbi_ranges[n].nr_spis, GFP_KERNEL);
if (!mbi_ranges[n].bm) {
ret = -ENOMEM;
goto err_free_mbi;
@@ -329,7 +328,7 @@ int __init mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent)
err_free_mbi:
if (mbi_ranges) {
for (n = 0; n < mbi_range_nr; n++)
- kfree(mbi_ranges[n].bm);
+ bitmap_free(mbi_ranges[n].bm);
kfree(mbi_ranges);
}
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index e0f4debe64e1..fd4e9a37fea6 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -100,6 +100,27 @@ EXPORT_SYMBOL(gic_pmr_sync);
DEFINE_STATIC_KEY_FALSE(gic_nonsecure_priorities);
EXPORT_SYMBOL(gic_nonsecure_priorities);
+/*
+ * When the Non-secure world has access to group 0 interrupts (as a
+ * consequence of SCR_EL3.FIQ == 0), reading the ICC_RPR_EL1 register will
+ * return the Distributor's view of the interrupt priority.
+ *
+ * When GIC security is enabled (GICD_CTLR.DS == 0), the interrupt priority
+ * written by software is moved to the Non-secure range by the Distributor.
+ *
+ * If both are true (which is when gic_nonsecure_priorities gets enabled),
+ * we need to shift down the priority programmed by software to match it
+ * against the value returned by ICC_RPR_EL1.
+ */
+#define GICD_INT_RPR_PRI(priority) \
+ ({ \
+ u32 __priority = (priority); \
+ if (static_branch_unlikely(&gic_nonsecure_priorities)) \
+ __priority = 0x80 | (__priority >> 1); \
+ \
+ __priority; \
+ })
+
/* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */
static refcount_t *ppi_nmi_refs;
@@ -446,18 +467,23 @@ static void gic_irq_set_prio(struct irq_data *d, u8 prio)
writeb_relaxed(prio, base + offset + index);
}
-static u32 gic_get_ppi_index(struct irq_data *d)
+static u32 __gic_get_ppi_index(irq_hw_number_t hwirq)
{
- switch (get_intid_range(d)) {
+ switch (__get_intid_range(hwirq)) {
case PPI_RANGE:
- return d->hwirq - 16;
+ return hwirq - 16;
case EPPI_RANGE:
- return d->hwirq - EPPI_BASE_INTID + 16;
+ return hwirq - EPPI_BASE_INTID + 16;
default:
unreachable();
}
}
+static u32 gic_get_ppi_index(struct irq_data *d)
+{
+ return __gic_get_ppi_index(d->hwirq);
+}
+
static int gic_irq_nmi_setup(struct irq_data *d)
{
struct irq_desc *desc = irq_to_desc(d->irq);
@@ -687,7 +713,7 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
return;
if (gic_supports_nmi() &&
- unlikely(gic_read_rpr() == GICD_INT_NMI_PRI)) {
+ unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI))) {
gic_handle_nmi(irqnr, regs);
return;
}
@@ -1467,10 +1493,34 @@ static void gic_irq_domain_free(struct irq_domain *domain, unsigned int virq,
}
}
+static bool fwspec_is_partitioned_ppi(struct irq_fwspec *fwspec,
+ irq_hw_number_t hwirq)
+{
+ enum gic_intid_range range;
+
+ if (!gic_data.ppi_descs)
+ return false;
+
+ if (!is_of_node(fwspec->fwnode))
+ return false;
+
+ if (fwspec->param_count < 4 || !fwspec->param[3])
+ return false;
+
+ range = __get_intid_range(hwirq);
+ if (range != PPI_RANGE && range != EPPI_RANGE)
+ return false;
+
+ return true;
+}
+
static int gic_irq_domain_select(struct irq_domain *d,
struct irq_fwspec *fwspec,
enum irq_domain_bus_token bus_token)
{
+ unsigned int type, ret, ppi_idx;
+ irq_hw_number_t hwirq;
+
/* Not for us */
if (fwspec->fwnode != d->fwnode)
return 0;
@@ -1479,16 +1529,19 @@ static int gic_irq_domain_select(struct irq_domain *d,
if (!is_of_node(fwspec->fwnode))
return 1;
+ ret = gic_irq_domain_translate(d, fwspec, &hwirq, &type);
+ if (WARN_ON_ONCE(ret))
+ return 0;
+
+ if (!fwspec_is_partitioned_ppi(fwspec, hwirq))
+ return d == gic_data.domain;
+
/*
* If this is a PPI and we have a 4th (non-null) parameter,
* then we need to match the partition domain.
*/
- if (fwspec->param_count >= 4 &&
- fwspec->param[0] == 1 && fwspec->param[3] != 0 &&
- gic_data.ppi_descs)
- return d == partition_get_domain(gic_data.ppi_descs[fwspec->param[1]]);
-
- return d == gic_data.domain;
+ ppi_idx = __gic_get_ppi_index(hwirq);
+ return d == partition_get_domain(gic_data.ppi_descs[ppi_idx]);
}
static const struct irq_domain_ops gic_irq_domain_ops = {
@@ -1503,7 +1556,9 @@ static int partition_domain_translate(struct irq_domain *d,
unsigned long *hwirq,
unsigned int *type)
{
+ unsigned long ppi_intid;
struct device_node *np;
+ unsigned int ppi_idx;
int ret;
if (!gic_data.ppi_descs)
@@ -1513,7 +1568,12 @@ static int partition_domain_translate(struct irq_domain *d,
if (WARN_ON(!np))
return -EINVAL;
- ret = partition_translate_id(gic_data.ppi_descs[fwspec->param[1]],
+ ret = gic_irq_domain_translate(d, fwspec, &ppi_intid, type);
+ if (WARN_ON_ONCE(ret))
+ return 0;
+
+ ppi_idx = __gic_get_ppi_index(ppi_intid);
+ ret = partition_translate_id(gic_data.ppi_descs[ppi_idx],
of_node_to_fwnode(np));
if (ret < 0)
return ret;
diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c
index f790ca6d78aa..a4eb8a2181c7 100644
--- a/drivers/irqchip/irq-loongson-pch-pic.c
+++ b/drivers/irqchip/irq-loongson-pch-pic.c
@@ -92,18 +92,22 @@ static int pch_pic_set_type(struct irq_data *d, unsigned int type)
case IRQ_TYPE_EDGE_RISING:
pch_pic_bitset(priv, PCH_PIC_EDGE, d->hwirq);
pch_pic_bitclr(priv, PCH_PIC_POL, d->hwirq);
+ irq_set_handler_locked(d, handle_edge_irq);
break;
case IRQ_TYPE_EDGE_FALLING:
pch_pic_bitset(priv, PCH_PIC_EDGE, d->hwirq);
pch_pic_bitset(priv, PCH_PIC_POL, d->hwirq);
+ irq_set_handler_locked(d, handle_edge_irq);
break;
case IRQ_TYPE_LEVEL_HIGH:
pch_pic_bitclr(priv, PCH_PIC_EDGE, d->hwirq);
pch_pic_bitclr(priv, PCH_PIC_POL, d->hwirq);
+ irq_set_handler_locked(d, handle_level_irq);
break;
case IRQ_TYPE_LEVEL_LOW:
pch_pic_bitclr(priv, PCH_PIC_EDGE, d->hwirq);
pch_pic_bitset(priv, PCH_PIC_POL, d->hwirq);
+ irq_set_handler_locked(d, handle_level_irq);
break;
default:
ret = -EINVAL;
@@ -113,11 +117,24 @@ static int pch_pic_set_type(struct irq_data *d, unsigned int type)
return ret;
}
+static void pch_pic_ack_irq(struct irq_data *d)
+{
+ unsigned int reg;
+ struct pch_pic *priv = irq_data_get_irq_chip_data(d);
+
+ reg = readl(priv->base + PCH_PIC_EDGE + PIC_REG_IDX(d->hwirq) * 4);
+ if (reg & BIT(PIC_REG_BIT(d->hwirq))) {
+ writel(BIT(PIC_REG_BIT(d->hwirq)),
+ priv->base + PCH_PIC_CLR + PIC_REG_IDX(d->hwirq) * 4);
+ }
+ irq_chip_ack_parent(d);
+}
+
static struct irq_chip pch_pic_irq_chip = {
.name = "PCH PIC",
.irq_mask = pch_pic_mask_irq,
.irq_unmask = pch_pic_unmask_irq,
- .irq_ack = irq_chip_ack_parent,
+ .irq_ack = pch_pic_ack_irq,
.irq_set_affinity = irq_chip_set_affinity_parent,
.irq_set_type = pch_pic_set_type,
};
diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c
index 55322da51c56..b4927e425f7b 100644
--- a/drivers/irqchip/irq-ls-scfg-msi.c
+++ b/drivers/irqchip/irq-ls-scfg-msi.c
@@ -362,10 +362,7 @@ static int ls_scfg_msi_probe(struct platform_device *pdev)
msi_data->irqs_num = MSI_IRQS_PER_MSIR *
(1 << msi_data->cfg->ibs_shift);
- msi_data->used = devm_kcalloc(&pdev->dev,
- BITS_TO_LONGS(msi_data->irqs_num),
- sizeof(*msi_data->used),
- GFP_KERNEL);
+ msi_data->used = devm_bitmap_zalloc(&pdev->dev, msi_data->irqs_num, GFP_KERNEL);
if (!msi_data->used)
return -ENOMEM;
/*
diff --git a/drivers/irqchip/irq-mtk-sysirq.c b/drivers/irqchip/irq-mtk-sysirq.c
index 6ff98b87e5c0..586e52d5442b 100644
--- a/drivers/irqchip/irq-mtk-sysirq.c
+++ b/drivers/irqchip/irq-mtk-sysirq.c
@@ -65,6 +65,7 @@ static struct irq_chip mtk_sysirq_chip = {
.irq_set_type = mtk_sysirq_set_type,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_set_affinity = irq_chip_set_affinity_parent,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
};
static int mtk_sysirq_domain_translate(struct irq_domain *d,
diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c
index 3be5c5dba1da..fe88a782173d 100644
--- a/drivers/irqchip/irq-mvebu-gicp.c
+++ b/drivers/irqchip/irq-mvebu-gicp.c
@@ -210,9 +210,7 @@ static int mvebu_gicp_probe(struct platform_device *pdev)
gicp->spi_cnt += gicp->spi_ranges[i].count;
}
- gicp->spi_bitmap = devm_kcalloc(&pdev->dev,
- BITS_TO_LONGS(gicp->spi_cnt), sizeof(long),
- GFP_KERNEL);
+ gicp->spi_bitmap = devm_bitmap_zalloc(&pdev->dev, gicp->spi_cnt, GFP_KERNEL);
if (!gicp->spi_bitmap)
return -ENOMEM;
diff --git a/drivers/irqchip/irq-mvebu-odmi.c b/drivers/irqchip/irq-mvebu-odmi.c
index b4d367868dbb..dc4145abdd6f 100644
--- a/drivers/irqchip/irq-mvebu-odmi.c
+++ b/drivers/irqchip/irq-mvebu-odmi.c
@@ -171,8 +171,7 @@ static int __init mvebu_odmi_init(struct device_node *node,
if (!odmis)
return -ENOMEM;
- odmis_bm = kcalloc(BITS_TO_LONGS(odmis_count * NODMIS_PER_FRAME),
- sizeof(long), GFP_KERNEL);
+ odmis_bm = bitmap_zalloc(odmis_count * NODMIS_PER_FRAME, GFP_KERNEL);
if (!odmis_bm) {
ret = -ENOMEM;
goto err_alloc;
@@ -227,7 +226,7 @@ err_unmap:
if (odmi->base && !IS_ERR(odmi->base))
iounmap(odmis[i].base);
}
- kfree(odmis_bm);
+ bitmap_free(odmis_bm);
err_alloc:
kfree(odmis);
return ret;
diff --git a/drivers/irqchip/irq-partition-percpu.c b/drivers/irqchip/irq-partition-percpu.c
index 89c23a1566dc..8e76d2913e6b 100644
--- a/drivers/irqchip/irq-partition-percpu.c
+++ b/drivers/irqchip/irq-partition-percpu.c
@@ -215,8 +215,7 @@ struct partition_desc *partition_create_desc(struct fwnode_handle *fwnode,
goto out;
desc->domain = d;
- desc->bitmap = kcalloc(BITS_TO_LONGS(nr_parts), sizeof(long),
- GFP_KERNEL);
+ desc->bitmap = bitmap_zalloc(nr_parts, GFP_KERNEL);
if (WARN_ON(!desc->bitmap))
goto out;
diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
index 32d59202d408..173e6520e06e 100644
--- a/drivers/irqchip/qcom-pdc.c
+++ b/drivers/irqchip/qcom-pdc.c
@@ -53,26 +53,6 @@ static u32 pdc_reg_read(int reg, u32 i)
return readl_relaxed(pdc_base + reg + i * sizeof(u32));
}
-static int qcom_pdc_gic_get_irqchip_state(struct irq_data *d,
- enum irqchip_irq_state which,
- bool *state)
-{
- if (d->hwirq == GPIO_NO_WAKE_IRQ)
- return 0;
-
- return irq_chip_get_parent_state(d, which, state);
-}
-
-static int qcom_pdc_gic_set_irqchip_state(struct irq_data *d,
- enum irqchip_irq_state which,
- bool value)
-{
- if (d->hwirq == GPIO_NO_WAKE_IRQ)
- return 0;
-
- return irq_chip_set_parent_state(d, which, value);
-}
-
static void pdc_enable_intr(struct irq_data *d, bool on)
{
int pin_out = d->hwirq;
@@ -91,38 +71,16 @@ static void pdc_enable_intr(struct irq_data *d, bool on)
static void qcom_pdc_gic_disable(struct irq_data *d)
{
- if (d->hwirq == GPIO_NO_WAKE_IRQ)
- return;
-
pdc_enable_intr(d, false);
irq_chip_disable_parent(d);
}
static void qcom_pdc_gic_enable(struct irq_data *d)
{
- if (d->hwirq == GPIO_NO_WAKE_IRQ)
- return;
-
pdc_enable_intr(d, true);
irq_chip_enable_parent(d);
}
-static void qcom_pdc_gic_mask(struct irq_data *d)
-{
- if (d->hwirq == GPIO_NO_WAKE_IRQ)
- return;
-
- irq_chip_mask_parent(d);
-}
-
-static void qcom_pdc_gic_unmask(struct irq_data *d)
-{
- if (d->hwirq == GPIO_NO_WAKE_IRQ)
- return;
-
- irq_chip_unmask_parent(d);
-}
-
/*
* GIC does not handle falling edge or active low. To allow falling edge and
* active low interrupts to be handled at GIC, PDC has an inverter that inverts
@@ -159,14 +117,10 @@ enum pdc_irq_config_bits {
*/
static int qcom_pdc_gic_set_type(struct irq_data *d, unsigned int type)
{
- int pin_out = d->hwirq;
enum pdc_irq_config_bits pdc_type;
enum pdc_irq_config_bits old_pdc_type;
int ret;
- if (pin_out == GPIO_NO_WAKE_IRQ)
- return 0;
-
switch (type) {
case IRQ_TYPE_EDGE_RISING:
pdc_type = PDC_EDGE_RISING;
@@ -191,8 +145,8 @@ static int qcom_pdc_gic_set_type(struct irq_data *d, unsigned int type)
return -EINVAL;
}
- old_pdc_type = pdc_reg_read(IRQ_i_CFG, pin_out);
- pdc_reg_write(IRQ_i_CFG, pin_out, pdc_type);
+ old_pdc_type = pdc_reg_read(IRQ_i_CFG, d->hwirq);
+ pdc_reg_write(IRQ_i_CFG, d->hwirq, pdc_type);
ret = irq_chip_set_type_parent(d, type);
if (ret)
@@ -216,12 +170,12 @@ static int qcom_pdc_gic_set_type(struct irq_data *d, unsigned int type)
static struct irq_chip qcom_pdc_gic_chip = {
.name = "PDC",
.irq_eoi = irq_chip_eoi_parent,
- .irq_mask = qcom_pdc_gic_mask,
- .irq_unmask = qcom_pdc_gic_unmask,
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
.irq_disable = qcom_pdc_gic_disable,
.irq_enable = qcom_pdc_gic_enable,
- .irq_get_irqchip_state = qcom_pdc_gic_get_irqchip_state,
- .irq_set_irqchip_state = qcom_pdc_gic_set_irqchip_state,
+ .irq_get_irqchip_state = irq_chip_get_parent_state,
+ .irq_set_irqchip_state = irq_chip_set_parent_state,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_set_type = qcom_pdc_gic_set_type,
.flags = IRQCHIP_MASK_ON_SUSPEND |
@@ -282,7 +236,7 @@ static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq,
parent_hwirq = get_parent_hwirq(hwirq);
if (parent_hwirq == PDC_NO_PARENT_IRQ)
- return 0;
+ return irq_domain_disconnect_hierarchy(domain->parent, virq);
if (type & IRQ_TYPE_EDGE_BOTH)
type = IRQ_TYPE_EDGE_RISING;
@@ -319,17 +273,17 @@ static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq,
if (ret)
return ret;
+ if (hwirq == GPIO_NO_WAKE_IRQ)
+ return irq_domain_disconnect_hierarchy(domain, virq);
+
ret = irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
&qcom_pdc_gic_chip, NULL);
if (ret)
return ret;
- if (hwirq == GPIO_NO_WAKE_IRQ)
- return 0;
-
parent_hwirq = get_parent_hwirq(hwirq);
if (parent_hwirq == PDC_NO_PARENT_IRQ)
- return 0;
+ return irq_domain_disconnect_hierarchy(domain->parent, virq);
if (type & IRQ_TYPE_EDGE_BOTH)
type = IRQ_TYPE_EDGE_RISING;
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index bdf16180f5ff..ed800f5da7d8 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -59,16 +59,6 @@ config LEDS_88PM860X
This option enables support for on-chip LED drivers found on Marvell
Semiconductor 88PM8606 PMIC.
-config LEDS_AAT1290
- tristate "LED support for the AAT1290"
- depends on LEDS_CLASS_FLASH
- depends on V4L2_FLASH_LED_CLASS || !V4L2_FLASH_LED_CLASS
- depends on GPIOLIB || COMPILE_TEST
- depends on OF
- depends on PINCTRL
- help
- This option enables support for the LEDs on the AAT1290.
-
config LEDS_AN30259A
tristate "LED support for Panasonic AN30259A"
depends on LEDS_CLASS && I2C && OF
@@ -104,15 +94,6 @@ config LEDS_ARIEL
Say Y to if your machine is a Dell Wyse 3020 thin client.
-config LEDS_AS3645A
- tristate "AS3645A and LM3555 LED flash controllers support"
- depends on I2C && LEDS_CLASS_FLASH
- depends on V4L2_FLASH_LED_CLASS || !V4L2_FLASH_LED_CLASS
- help
- Enable LED flash class support for AS3645A LED flash
- controller. V4L2 flash API is provided as well if
- CONFIG_V4L2_FLASH_API is enabled.
-
config LEDS_AW2013
tristate "LED support for Awinic AW2013"
depends on LEDS_CLASS && I2C && OF
@@ -239,15 +220,6 @@ config LEDS_LM3692X
This option enables support for the TI LM3692x family
of white LED string drivers used for backlighting.
-config LEDS_LM3601X
- tristate "LED support for LM3601x Chips"
- depends on LEDS_CLASS && I2C
- depends on LEDS_CLASS_FLASH
- select REGMAP_I2C
- help
- This option enables support for the TI LM3601x family
- of flash, torch and indicator classes.
-
config LEDS_LOCOMO
tristate "LED Support for Locomo device"
depends on LEDS_CLASS
@@ -397,7 +369,7 @@ config LEDS_LP3952
module will be called leds-lp3952.
config LEDS_LP50XX
- tristate "LED Support for TI LP5036/30/24/18/12/9 LED driver chip"
+ tristate "LED Support for TI LP5036/30/24/18/12/09 LED driver chip"
depends on LEDS_CLASS && REGMAP_I2C
depends on LEDS_CLASS_MULTICOLOR || !LEDS_CLASS_MULTICOLOR
help
@@ -699,17 +671,6 @@ config LEDS_MAX77650
help
LEDs driver for MAX77650 family of PMICs from Maxim Integrated.
-config LEDS_MAX77693
- tristate "LED support for MAX77693 Flash"
- depends on LEDS_CLASS_FLASH
- depends on V4L2_FLASH_LED_CLASS || !V4L2_FLASH_LED_CLASS
- depends on MFD_MAX77693
- depends on OF
- help
- This option enables support for the flash part of the MAX77693
- multifunction device. It has build in control for two leds in flash
- and torch mode.
-
config LEDS_MAX8997
tristate "LED support for MAX8997 PMIC"
depends on LEDS_CLASS && MFD_MAX8997
@@ -741,16 +702,6 @@ config LEDS_MENF21BMC
This driver can also be built as a module. If so the module
will be called leds-menf21bmc.
-config LEDS_KTD2692
- tristate "LED support for KTD2692 flash LED controller"
- depends on LEDS_CLASS_FLASH && OF
- depends on GPIOLIB || COMPILE_TEST
- help
- This option enables support for KTD2692 LED flash connected
- through ExpressWire interface.
-
- Say Y to enable this driver.
-
config LEDS_IS31FL319X
tristate "LED Support for ISSI IS31FL319x I2C LED controller family"
depends on LEDS_CLASS && I2C && OF
@@ -913,14 +864,6 @@ config LEDS_IP30
To compile this driver as a module, choose M here: the module
will be called leds-ip30.
-config LEDS_SGM3140
- tristate "LED support for the SGM3140"
- depends on LEDS_CLASS_FLASH
- depends on V4L2_FLASH_LED_CLASS || !V4L2_FLASH_LED_CLASS
- help
- This option enables support for the SGM3140 500mA Buck/Boost Charge
- Pump LED Driver.
-
config LEDS_ACER_A500
tristate "Power button LED support for Acer Iconia Tab A500"
depends on LEDS_CLASS && MFD_ACER_A500_EC
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 7e604d3028c8..c636ec069612 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -9,13 +9,11 @@ obj-$(CONFIG_LEDS_TRIGGERS) += led-triggers.o
# LED Platform Drivers (keep this sorted, M-| sort)
obj-$(CONFIG_LEDS_88PM860X) += leds-88pm860x.o
-obj-$(CONFIG_LEDS_AAT1290) += leds-aat1290.o
obj-$(CONFIG_LEDS_ACER_A500) += leds-acer-a500.o
obj-$(CONFIG_LEDS_ADP5520) += leds-adp5520.o
obj-$(CONFIG_LEDS_AN30259A) += leds-an30259a.o
obj-$(CONFIG_LEDS_APU) += leds-apu.o
obj-$(CONFIG_LEDS_ARIEL) += leds-ariel.o
-obj-$(CONFIG_LEDS_AS3645A) += leds-as3645a.o
obj-$(CONFIG_LEDS_ASIC3) += leds-asic3.o
obj-$(CONFIG_LEDS_AW2013) += leds-aw2013.o
obj-$(CONFIG_LEDS_BCM6328) += leds-bcm6328.o
@@ -37,12 +35,10 @@ obj-$(CONFIG_LEDS_IP30) += leds-ip30.o
obj-$(CONFIG_LEDS_IPAQ_MICRO) += leds-ipaq-micro.o
obj-$(CONFIG_LEDS_IS31FL319X) += leds-is31fl319x.o
obj-$(CONFIG_LEDS_IS31FL32XX) += leds-is31fl32xx.o
-obj-$(CONFIG_LEDS_KTD2692) += leds-ktd2692.o
obj-$(CONFIG_LEDS_LM3530) += leds-lm3530.o
obj-$(CONFIG_LEDS_LM3532) += leds-lm3532.o
obj-$(CONFIG_LEDS_LM3533) += leds-lm3533.o
obj-$(CONFIG_LEDS_LM355x) += leds-lm355x.o
-obj-$(CONFIG_LEDS_LM3601X) += leds-lm3601x.o
obj-$(CONFIG_LEDS_LM36274) += leds-lm36274.o
obj-$(CONFIG_LEDS_LM3642) += leds-lm3642.o
obj-$(CONFIG_LEDS_LM3692X) += leds-lm3692x.o
@@ -60,7 +56,6 @@ obj-$(CONFIG_LEDS_LP8788) += leds-lp8788.o
obj-$(CONFIG_LEDS_LP8860) += leds-lp8860.o
obj-$(CONFIG_LEDS_LT3593) += leds-lt3593.o
obj-$(CONFIG_LEDS_MAX77650) += leds-max77650.o
-obj-$(CONFIG_LEDS_MAX77693) += leds-max77693.o
obj-$(CONFIG_LEDS_MAX8997) += leds-max8997.o
obj-$(CONFIG_LEDS_MC13783) += leds-mc13783.o
obj-$(CONFIG_LEDS_MENF21BMC) += leds-menf21bmc.o
@@ -82,7 +77,6 @@ obj-$(CONFIG_LEDS_PWM) += leds-pwm.o
obj-$(CONFIG_LEDS_REGULATOR) += leds-regulator.o
obj-$(CONFIG_LEDS_S3C24XX) += leds-s3c24xx.o
obj-$(CONFIG_LEDS_SC27XX_BLTC) += leds-sc27xx-bltc.o
-obj-$(CONFIG_LEDS_SGM3140) += leds-sgm3140.o
obj-$(CONFIG_LEDS_SUNFIRE) += leds-sunfire.o
obj-$(CONFIG_LEDS_SYSCON) += leds-syscon.o
obj-$(CONFIG_LEDS_TCA6507) += leds-tca6507.o
diff --git a/drivers/leds/blink/leds-lgm-sso.c b/drivers/leds/blink/leds-lgm-sso.c
index 7eb2f44f16be..fd8b7573285a 100644
--- a/drivers/leds/blink/leds-lgm-sso.c
+++ b/drivers/leds/blink/leds-lgm-sso.c
@@ -611,9 +611,6 @@ static void sso_led_shutdown(struct sso_led *led)
if (led->desc.hw_trig)
regmap_update_bits(priv->mmap, SSO_CON3, BIT(led->desc.pin), 0);
- if (led->gpiod)
- devm_gpiod_put(priv->dev, led->gpiod);
-
led->priv = NULL;
}
@@ -624,15 +621,16 @@ __sso_led_dt_parse(struct sso_led_priv *priv, struct fwnode_handle *fw_ssoled)
struct device *dev = priv->dev;
struct sso_led_desc *desc;
struct sso_led *led;
- struct list_head *p;
const char *tmp;
u32 prop;
int ret;
fwnode_for_each_child_node(fw_ssoled, fwnode_child) {
led = devm_kzalloc(dev, sizeof(*led), GFP_KERNEL);
- if (!led)
- return -ENOMEM;
+ if (!led) {
+ ret = -ENOMEM;
+ goto __dt_err;
+ }
INIT_LIST_HEAD(&led->list);
led->priv = priv;
@@ -642,7 +640,7 @@ __sso_led_dt_parse(struct sso_led_priv *priv, struct fwnode_handle *fw_ssoled)
fwnode_child,
GPIOD_ASIS, NULL);
if (IS_ERR(led->gpiod)) {
- dev_err(dev, "led: get gpio fail!\n");
+ ret = dev_err_probe(dev, PTR_ERR(led->gpiod), "led: get gpio fail!\n");
goto __dt_err;
}
@@ -662,8 +660,11 @@ __sso_led_dt_parse(struct sso_led_priv *priv, struct fwnode_handle *fw_ssoled)
desc->panic_indicator = 1;
ret = fwnode_property_read_u32(fwnode_child, "reg", &prop);
- if (ret != 0 || prop >= SSO_LED_MAX_NUM) {
+ if (ret)
+ goto __dt_err;
+ if (prop >= SSO_LED_MAX_NUM) {
dev_err(dev, "invalid LED pin:%u\n", prop);
+ ret = -EINVAL;
goto __dt_err;
}
desc->pin = prop;
@@ -699,21 +700,20 @@ __sso_led_dt_parse(struct sso_led_priv *priv, struct fwnode_handle *fw_ssoled)
desc->brightness = LED_FULL;
}
- if (sso_create_led(priv, led, fwnode_child))
+ ret = sso_create_led(priv, led, fwnode_child);
+ if (ret)
goto __dt_err;
}
- fwnode_handle_put(fw_ssoled);
return 0;
+
__dt_err:
- fwnode_handle_put(fw_ssoled);
+ fwnode_handle_put(fwnode_child);
/* unregister leds */
- list_for_each(p, &priv->led_list) {
- led = list_entry(p, struct sso_led, list);
+ list_for_each_entry(led, &priv->led_list, list)
sso_led_shutdown(led);
- }
- return -EINVAL;
+ return ret;
}
static int sso_led_dt_parse(struct sso_led_priv *priv)
@@ -731,6 +731,7 @@ static int sso_led_dt_parse(struct sso_led_priv *priv)
fw_ssoled = fwnode_get_named_child_node(fwnode, "ssoled");
if (fw_ssoled) {
ret = __sso_led_dt_parse(priv, fw_ssoled);
+ fwnode_handle_put(fw_ssoled);
if (ret)
return ret;
}
@@ -841,14 +842,12 @@ static int intel_sso_led_probe(struct platform_device *pdev)
static int intel_sso_led_remove(struct platform_device *pdev)
{
struct sso_led_priv *priv;
- struct list_head *pos, *n;
- struct sso_led *led;
+ struct sso_led *led, *n;
priv = platform_get_drvdata(pdev);
- list_for_each_safe(pos, n, &priv->led_list) {
- list_del(pos);
- led = list_entry(pos, struct sso_led, list);
+ list_for_each_entry_safe(led, n, &priv->led_list, list) {
+ list_del(&led->list);
sso_led_shutdown(led);
}
diff --git a/drivers/leds/flash/Kconfig b/drivers/leds/flash/Kconfig
index 3f49f3edbffb..b230f3d65eb0 100644
--- a/drivers/leds/flash/Kconfig
+++ b/drivers/leds/flash/Kconfig
@@ -2,6 +2,52 @@
if LEDS_CLASS_FLASH
+config LEDS_AAT1290
+ tristate "LED support for the AAT1290"
+ depends on V4L2_FLASH_LED_CLASS || !V4L2_FLASH_LED_CLASS
+ depends on GPIOLIB || COMPILE_TEST
+ depends on OF
+ depends on PINCTRL
+ help
+ This option enables support for the LEDs on the AAT1290.
+
+config LEDS_AS3645A
+ tristate "AS3645A and LM3555 LED flash controllers support"
+ depends on I2C
+ depends on V4L2_FLASH_LED_CLASS || !V4L2_FLASH_LED_CLASS
+ help
+ Enable LED flash class support for AS3645A LED flash
+ controller. V4L2 flash API is provided as well if
+ CONFIG_V4L2_FLASH_API is enabled.
+
+config LEDS_KTD2692
+ tristate "LED support for Kinetic KTD2692 flash LED controller"
+ depends on OF
+ depends on GPIOLIB || COMPILE_TEST
+ help
+ This option enables support for Kinetic KTD2692 LED flash connected
+ through ExpressWire interface.
+
+ Say Y to enable this driver.
+
+config LEDS_LM3601X
+ tristate "LED support for LM3601x Chips"
+ depends on LEDS_CLASS && I2C
+ select REGMAP_I2C
+ help
+ This option enables support for the TI LM3601x family
+ of flash, torch and indicator classes.
+
+config LEDS_MAX77693
+ tristate "LED support for MAX77693 Flash"
+ depends on V4L2_FLASH_LED_CLASS || !V4L2_FLASH_LED_CLASS
+ depends on MFD_MAX77693
+ depends on OF
+ help
+ This option enables support for the flash part of the MAX77693
+ multifunction device. It has build in control for two leds in flash
+ and torch mode.
+
config LEDS_RT4505
tristate "LED support for RT4505 flashlight controller"
depends on I2C && OF
@@ -24,4 +70,11 @@ config LEDS_RT8515
To compile this driver as a module, choose M here: the module
will be called leds-rt8515.
+config LEDS_SGM3140
+ tristate "LED support for the SGM3140"
+ depends on V4L2_FLASH_LED_CLASS || !V4L2_FLASH_LED_CLASS
+ help
+ This option enables support for the SGM3140 500mA Buck/Boost Charge
+ Pump LED Driver.
+
endif # LEDS_CLASS_FLASH
diff --git a/drivers/leds/flash/Makefile b/drivers/leds/flash/Makefile
index 09aee561f769..ebea42f9c37e 100644
--- a/drivers/leds/flash/Makefile
+++ b/drivers/leds/flash/Makefile
@@ -1,4 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_LEDS_AAT1290) += leds-aat1290.o
+obj-$(CONFIG_LEDS_AS3645A) += leds-as3645a.o
+obj-$(CONFIG_LEDS_KTD2692) += leds-ktd2692.o
+obj-$(CONFIG_LEDS_LM3601X) += leds-lm3601x.o
+obj-$(CONFIG_LEDS_MAX77693) += leds-max77693.o
obj-$(CONFIG_LEDS_RT4505) += leds-rt4505.o
obj-$(CONFIG_LEDS_RT8515) += leds-rt8515.o
+obj-$(CONFIG_LEDS_SGM3140) += leds-sgm3140.o
diff --git a/drivers/leds/leds-aat1290.c b/drivers/leds/flash/leds-aat1290.c
index 589484b22c79..589484b22c79 100644
--- a/drivers/leds/leds-aat1290.c
+++ b/drivers/leds/flash/leds-aat1290.c
diff --git a/drivers/leds/leds-as3645a.c b/drivers/leds/flash/leds-as3645a.c
index aa3f82be0a9c..aa3f82be0a9c 100644
--- a/drivers/leds/leds-as3645a.c
+++ b/drivers/leds/flash/leds-as3645a.c
diff --git a/drivers/leds/leds-ktd2692.c b/drivers/leds/flash/leds-ktd2692.c
index f341da1503a4..f341da1503a4 100644
--- a/drivers/leds/leds-ktd2692.c
+++ b/drivers/leds/flash/leds-ktd2692.c
diff --git a/drivers/leds/leds-lm3601x.c b/drivers/leds/flash/leds-lm3601x.c
index d0e1d4814042..d0e1d4814042 100644
--- a/drivers/leds/leds-lm3601x.c
+++ b/drivers/leds/flash/leds-lm3601x.c
diff --git a/drivers/leds/leds-max77693.c b/drivers/leds/flash/leds-max77693.c
index 5c1faeb55a31..5c1faeb55a31 100644
--- a/drivers/leds/leds-max77693.c
+++ b/drivers/leds/flash/leds-max77693.c
diff --git a/drivers/leds/flash/leds-rt8515.c b/drivers/leds/flash/leds-rt8515.c
index 590bfa180d10..44904fdee3cc 100644
--- a/drivers/leds/flash/leds-rt8515.c
+++ b/drivers/leds/flash/leds-rt8515.c
@@ -343,8 +343,9 @@ static int rt8515_probe(struct platform_device *pdev)
ret = devm_led_classdev_flash_register_ext(dev, fled, &init_data);
if (ret) {
- dev_err(dev, "can't register LED %s\n", led->name);
+ fwnode_handle_put(child);
mutex_destroy(&rt->lock);
+ dev_err(dev, "can't register LED %s\n", led->name);
return ret;
}
@@ -362,6 +363,7 @@ static int rt8515_probe(struct platform_device *pdev)
*/
}
+ fwnode_handle_put(child);
return 0;
}
diff --git a/drivers/leds/leds-sgm3140.c b/drivers/leds/flash/leds-sgm3140.c
index f4f831570f11..f4f831570f11 100644
--- a/drivers/leds/leds-sgm3140.c
+++ b/drivers/leds/flash/leds-sgm3140.c
diff --git a/drivers/leds/led-class-flash.c b/drivers/leds/led-class-flash.c
index 6eeb9effcf65..185e17055317 100644
--- a/drivers/leds/led-class-flash.c
+++ b/drivers/leds/led-class-flash.c
@@ -92,14 +92,12 @@ static ssize_t flash_strobe_store(struct device *dev,
struct led_classdev *led_cdev = dev_get_drvdata(dev);
struct led_classdev_flash *fled_cdev = lcdev_to_flcdev(led_cdev);
unsigned long state;
- ssize_t ret = -EINVAL;
+ ssize_t ret = -EBUSY;
mutex_lock(&led_cdev->led_access);
- if (led_sysfs_is_disabled(led_cdev)) {
- ret = -EBUSY;
+ if (led_sysfs_is_disabled(led_cdev))
goto unlock;
- }
ret = kstrtoul(buf, 10, &state);
if (ret)
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index f704391d57a8..f4bb02f6e042 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -350,10 +350,15 @@ int led_classdev_register_ext(struct device *parent,
if (ret < 0)
return ret;
- if (init_data->fwnode)
+ if (init_data->fwnode) {
fwnode_property_read_string(init_data->fwnode,
"linux,default-trigger",
&led_cdev->default_trigger);
+
+ if (fwnode_property_present(init_data->fwnode,
+ "retain-state-shutdown"))
+ led_cdev->flags |= LED_RETAIN_AT_SHUTDOWN;
+ }
} else {
proposed_name = led_cdev->name;
}
@@ -444,7 +449,8 @@ void led_classdev_unregister(struct led_classdev *led_cdev)
/* Stop blinking */
led_stop_software_blink(led_cdev);
- led_set_brightness(led_cdev, LED_OFF);
+ if (!(led_cdev->flags & LED_RETAIN_AT_SHUTDOWN))
+ led_set_brightness(led_cdev, LED_OFF);
flush_work(&led_cdev->set_brightness_work);
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 8eb8054ef9c6..4a97cb745788 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -477,3 +477,18 @@ int led_compose_name(struct device *dev, struct led_init_data *init_data,
return 0;
}
EXPORT_SYMBOL_GPL(led_compose_name);
+
+enum led_default_state led_init_default_state_get(struct fwnode_handle *fwnode)
+{
+ const char *state = NULL;
+
+ if (!fwnode_property_read_string(fwnode, "default-state", &state)) {
+ if (!strcmp(state, "keep"))
+ return LEDS_DEFSTATE_KEEP;
+ if (!strcmp(state, "on"))
+ return LEDS_DEFSTATE_ON;
+ }
+
+ return LEDS_DEFSTATE_OFF;
+}
+EXPORT_SYMBOL_GPL(led_init_default_state_get);
diff --git a/drivers/leds/leds-el15203000.c b/drivers/leds/leds-el15203000.c
index 76b455e87574..f9eb59a25570 100644
--- a/drivers/leds/leds-el15203000.c
+++ b/drivers/leds/leds-el15203000.c
@@ -4,8 +4,9 @@
#include <linux/delay.h>
#include <linux/leds.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/property.h>
#include <linux/spi/spi.h>
/*
diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index b5d5e22d2d1e..092eb59a7d32 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c
@@ -16,6 +16,7 @@
#include <linux/platform_device.h>
#include <linux/property.h>
#include <linux/slab.h>
+#include "leds.h"
struct gpio_led_data {
struct led_classdev cdev;
@@ -144,7 +145,6 @@ static struct gpio_leds_priv *gpio_leds_create(struct platform_device *pdev)
device_for_each_child_node(dev, child) {
struct gpio_led_data *led_dat = &priv->leds[priv->num_leds];
struct gpio_led led = {};
- const char *state = NULL;
/*
* Acquire gpiod from DT with uninitialized label, which
@@ -161,15 +161,7 @@ static struct gpio_leds_priv *gpio_leds_create(struct platform_device *pdev)
led_dat->gpiod = led.gpiod;
- if (!fwnode_property_read_string(child, "default-state",
- &state)) {
- if (!strcmp(state, "keep"))
- led.default_state = LEDS_GPIO_DEFSTATE_KEEP;
- else if (!strcmp(state, "on"))
- led.default_state = LEDS_GPIO_DEFSTATE_ON;
- else
- led.default_state = LEDS_GPIO_DEFSTATE_OFF;
- }
+ led.default_state = led_init_default_state_get(child);
if (fwnode_property_present(child, "retain-state-suspended"))
led.retain_state_suspended = 1;
diff --git a/drivers/leds/leds-is31fl32xx.c b/drivers/leds/leds-is31fl32xx.c
index 3b55af9a8c58..22c092a4394a 100644
--- a/drivers/leds/leds-is31fl32xx.c
+++ b/drivers/leds/leds-is31fl32xx.c
@@ -386,6 +386,7 @@ static int is31fl32xx_parse_dt(struct device *dev,
dev_err(dev,
"Node %pOF 'reg' conflicts with another LED\n",
child);
+ ret = -EINVAL;
goto err;
}
diff --git a/drivers/leds/leds-lm3692x.c b/drivers/leds/leds-lm3692x.c
index a02756d7ed8f..afe6fb297855 100644
--- a/drivers/leds/leds-lm3692x.c
+++ b/drivers/leds/leds-lm3692x.c
@@ -7,10 +7,9 @@
#include <linux/init.h>
#include <linux/leds.h>
#include <linux/log2.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/mutex.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
#include <linux/slab.h>
diff --git a/drivers/leds/leds-lm3697.c b/drivers/leds/leds-lm3697.c
index 970a4f34791b..a8c9322558cc 100644
--- a/drivers/leds/leds-lm3697.c
+++ b/drivers/leds/leds-lm3697.c
@@ -2,11 +2,16 @@
// TI LM3697 LED chip family driver
// Copyright (C) 2018 Texas Instruments Incorporated - https://www.ti.com/
+#include <linux/bits.h>
#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
+#include <linux/types.h>
+
#include <linux/leds-ti-lmu-common.h>
#define LM3697_REV 0x0
@@ -221,14 +226,12 @@ static int lm3697_probe_dt(struct lm3697 *priv)
ret = fwnode_property_read_u32(child, "reg", &control_bank);
if (ret) {
dev_err(dev, "reg property missing\n");
- fwnode_handle_put(child);
goto child_out;
}
if (control_bank > LM3697_CONTROL_B) {
dev_err(dev, "reg property is invalid\n");
ret = -EINVAL;
- fwnode_handle_put(child);
goto child_out;
}
@@ -259,7 +262,6 @@ static int lm3697_probe_dt(struct lm3697 *priv)
led->num_leds);
if (ret) {
dev_err(dev, "led-sources property missing\n");
- fwnode_handle_put(child);
goto child_out;
}
@@ -284,14 +286,16 @@ static int lm3697_probe_dt(struct lm3697 *priv)
&init_data);
if (ret) {
dev_err(dev, "led register err: %d\n", ret);
- fwnode_handle_put(child);
goto child_out;
}
i++;
}
+ return ret;
+
child_out:
+ fwnode_handle_put(child);
return ret;
}
diff --git a/drivers/leds/leds-lt3593.c b/drivers/leds/leds-lt3593.c
index 3bb52d3165d9..d0160fde0f94 100644
--- a/drivers/leds/leds-lt3593.c
+++ b/drivers/leds/leds-lt3593.c
@@ -97,10 +97,9 @@ static int lt3593_led_probe(struct platform_device *pdev)
init_data.default_label = ":";
ret = devm_led_classdev_register_ext(dev, &led_data->cdev, &init_data);
- if (ret < 0) {
- fwnode_handle_put(child);
+ fwnode_handle_put(child);
+ if (ret < 0)
return ret;
- }
platform_set_drvdata(pdev, led_data);
diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c
index 7087ca4592fc..a6b5699aeae4 100644
--- a/drivers/leds/leds-pca955x.c
+++ b/drivers/leds/leds-pca955x.c
@@ -127,9 +127,9 @@ struct pca955x_led {
struct pca955x *pca955x;
struct led_classdev led_cdev;
int led_num; /* 0 .. 15 potentially */
- char name[32];
u32 type;
- const char *default_trigger;
+ int default_state;
+ struct fwnode_handle *fwnode;
};
struct pca955x_platform_data {
@@ -166,11 +166,10 @@ static inline u8 pca955x_ledsel(u8 oldval, int led_num, int state)
static int pca955x_write_psc(struct i2c_client *client, int n, u8 val)
{
struct pca955x *pca955x = i2c_get_clientdata(client);
+ u8 cmd = pca95xx_num_input_regs(pca955x->chipdef->bits) + (2 * n);
int ret;
- ret = i2c_smbus_write_byte_data(client,
- pca95xx_num_input_regs(pca955x->chipdef->bits) + 2*n,
- val);
+ ret = i2c_smbus_write_byte_data(client, cmd, val);
if (ret < 0)
dev_err(&client->dev, "%s: reg 0x%x, val 0x%x, err %d\n",
__func__, n, val, ret);
@@ -187,11 +186,10 @@ static int pca955x_write_psc(struct i2c_client *client, int n, u8 val)
static int pca955x_write_pwm(struct i2c_client *client, int n, u8 val)
{
struct pca955x *pca955x = i2c_get_clientdata(client);
+ u8 cmd = pca95xx_num_input_regs(pca955x->chipdef->bits) + 1 + (2 * n);
int ret;
- ret = i2c_smbus_write_byte_data(client,
- pca95xx_num_input_regs(pca955x->chipdef->bits) + 1 + 2*n,
- val);
+ ret = i2c_smbus_write_byte_data(client, cmd, val);
if (ret < 0)
dev_err(&client->dev, "%s: reg 0x%x, val 0x%x, err %d\n",
__func__, n, val, ret);
@@ -205,11 +203,10 @@ static int pca955x_write_pwm(struct i2c_client *client, int n, u8 val)
static int pca955x_write_ls(struct i2c_client *client, int n, u8 val)
{
struct pca955x *pca955x = i2c_get_clientdata(client);
+ u8 cmd = pca95xx_num_input_regs(pca955x->chipdef->bits) + 4 + n;
int ret;
- ret = i2c_smbus_write_byte_data(client,
- pca95xx_num_input_regs(pca955x->chipdef->bits) + 4 + n,
- val);
+ ret = i2c_smbus_write_byte_data(client, cmd, val);
if (ret < 0)
dev_err(&client->dev, "%s: reg 0x%x, val 0x%x, err %d\n",
__func__, n, val, ret);
@@ -223,10 +220,10 @@ static int pca955x_write_ls(struct i2c_client *client, int n, u8 val)
static int pca955x_read_ls(struct i2c_client *client, int n, u8 *val)
{
struct pca955x *pca955x = i2c_get_clientdata(client);
+ u8 cmd = pca95xx_num_input_regs(pca955x->chipdef->bits) + 4 + n;
int ret;
- ret = i2c_smbus_read_byte_data(client,
- pca95xx_num_input_regs(pca955x->chipdef->bits) + 4 + n);
+ ret = i2c_smbus_read_byte_data(client, cmd);
if (ret < 0) {
dev_err(&client->dev, "%s: reg 0x%x, err %d\n",
__func__, n, ret);
@@ -236,6 +233,57 @@ static int pca955x_read_ls(struct i2c_client *client, int n, u8 *val)
return 0;
}
+static int pca955x_read_pwm(struct i2c_client *client, int n, u8 *val)
+{
+ struct pca955x *pca955x = i2c_get_clientdata(client);
+ u8 cmd = pca95xx_num_input_regs(pca955x->chipdef->bits) + 1 + (2 * n);
+ int ret;
+
+ ret = i2c_smbus_read_byte_data(client, cmd);
+ if (ret < 0) {
+ dev_err(&client->dev, "%s: reg 0x%x, err %d\n",
+ __func__, n, ret);
+ return ret;
+ }
+ *val = (u8)ret;
+ return 0;
+}
+
+static enum led_brightness pca955x_led_get(struct led_classdev *led_cdev)
+{
+ struct pca955x_led *pca955x_led = container_of(led_cdev,
+ struct pca955x_led,
+ led_cdev);
+ struct pca955x *pca955x = pca955x_led->pca955x;
+ u8 ls, pwm;
+ int ret;
+
+ ret = pca955x_read_ls(pca955x->client, pca955x_led->led_num / 4, &ls);
+ if (ret)
+ return ret;
+
+ ls = (ls >> ((pca955x_led->led_num % 4) << 1)) & 0x3;
+ switch (ls) {
+ case PCA955X_LS_LED_ON:
+ ret = LED_FULL;
+ break;
+ case PCA955X_LS_LED_OFF:
+ ret = LED_OFF;
+ break;
+ case PCA955X_LS_BLINK0:
+ ret = LED_HALF;
+ break;
+ case PCA955X_LS_BLINK1:
+ ret = pca955x_read_pwm(pca955x->client, 1, &pwm);
+ if (ret)
+ return ret;
+ ret = 255 - pwm;
+ break;
+ }
+
+ return ret;
+}
+
static int pca955x_led_set(struct led_classdev *led_cdev,
enum led_brightness value)
{
@@ -371,6 +419,7 @@ static struct pca955x_platform_data *
pca955x_get_pdata(struct i2c_client *client, struct pca955x_chipdef *chip)
{
struct pca955x_platform_data *pdata;
+ struct pca955x_led *led;
struct fwnode_handle *child;
int count;
@@ -389,7 +438,7 @@ pca955x_get_pdata(struct i2c_client *client, struct pca955x_chipdef *chip)
return ERR_PTR(-ENOMEM);
device_for_each_child_node(&client->dev, child) {
- const char *name;
+ const char *state;
u32 reg;
int res;
@@ -397,17 +446,22 @@ pca955x_get_pdata(struct i2c_client *client, struct pca955x_chipdef *chip)
if ((res != 0) || (reg >= chip->bits))
continue;
- res = fwnode_property_read_string(child, "label", &name);
- if ((res != 0) && is_of_node(child))
- name = to_of_node(child)->name;
-
- snprintf(pdata->leds[reg].name, sizeof(pdata->leds[reg].name),
- "%s", name);
-
- pdata->leds[reg].type = PCA955X_TYPE_LED;
- fwnode_property_read_u32(child, "type", &pdata->leds[reg].type);
- fwnode_property_read_string(child, "linux,default-trigger",
- &pdata->leds[reg].default_trigger);
+ led = &pdata->leds[reg];
+ led->type = PCA955X_TYPE_LED;
+ led->fwnode = child;
+ fwnode_property_read_u32(child, "type", &led->type);
+
+ if (!fwnode_property_read_string(child, "default-state",
+ &state)) {
+ if (!strcmp(state, "keep"))
+ led->default_state = LEDS_GPIO_DEFSTATE_KEEP;
+ else if (!strcmp(state, "on"))
+ led->default_state = LEDS_GPIO_DEFSTATE_ON;
+ else
+ led->default_state = LEDS_GPIO_DEFSTATE_OFF;
+ } else {
+ led->default_state = LEDS_GPIO_DEFSTATE_OFF;
+ }
}
pdata->num_leds = chip->bits;
@@ -425,18 +479,38 @@ static const struct of_device_id of_pca955x_match[] = {
};
MODULE_DEVICE_TABLE(of, of_pca955x_match);
-static int pca955x_probe(struct i2c_client *client,
- const struct i2c_device_id *id)
+static int pca955x_probe(struct i2c_client *client)
{
struct pca955x *pca955x;
struct pca955x_led *pca955x_led;
struct pca955x_chipdef *chip;
+ struct led_classdev *led;
+ struct led_init_data init_data;
struct i2c_adapter *adapter;
int i, err;
struct pca955x_platform_data *pdata;
int ngpios = 0;
+ bool set_default_label = false;
+ bool keep_pwm = false;
+ char default_label[8];
+ enum pca955x_type chip_type;
+ const void *md = device_get_match_data(&client->dev);
+
+ if (md) {
+ chip_type = (enum pca955x_type)md;
+ } else {
+ const struct i2c_device_id *id = i2c_match_id(pca955x_id,
+ client);
+
+ if (id) {
+ chip_type = (enum pca955x_type)id->driver_data;
+ } else {
+ dev_err(&client->dev, "unknown chip\n");
+ return -ENODEV;
+ }
+ }
- chip = &pca955x_chipdefs[id->driver_data];
+ chip = &pca955x_chipdefs[chip_type];
adapter = client->adapter;
pdata = dev_get_platdata(&client->dev);
if (!pdata) {
@@ -449,13 +523,13 @@ static int pca955x_probe(struct i2c_client *client,
if ((client->addr & ~((1 << chip->slv_addr_shift) - 1)) !=
chip->slv_addr) {
dev_err(&client->dev, "invalid slave address %02x\n",
- client->addr);
+ client->addr);
return -ENODEV;
}
dev_info(&client->dev, "leds-pca955x: Using %s %d-bit LED driver at "
- "slave address 0x%02x\n",
- client->name, chip->bits, client->addr);
+ "slave address 0x%02x\n", client->name, chip->bits,
+ client->addr);
if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
return -EIO;
@@ -471,8 +545,8 @@ static int pca955x_probe(struct i2c_client *client,
if (!pca955x)
return -ENOMEM;
- pca955x->leds = devm_kcalloc(&client->dev,
- chip->bits, sizeof(*pca955x_led), GFP_KERNEL);
+ pca955x->leds = devm_kcalloc(&client->dev, chip->bits,
+ sizeof(*pca955x_led), GFP_KERNEL);
if (!pca955x->leds)
return -ENOMEM;
@@ -482,6 +556,9 @@ static int pca955x_probe(struct i2c_client *client,
pca955x->client = client;
pca955x->chipdef = chip;
+ init_data.devname_mandatory = false;
+ init_data.devicename = "pca955x";
+
for (i = 0; i < chip->bits; i++) {
pca955x_led = &pca955x->leds[i];
pca955x_led->led_num = i;
@@ -495,35 +572,60 @@ static int pca955x_probe(struct i2c_client *client,
ngpios++;
break;
case PCA955X_TYPE_LED:
- /*
- * Platform data can specify LED names and
- * default triggers
- */
- if (pdata->leds[i].name[0] == '\0')
- snprintf(pdata->leds[i].name,
- sizeof(pdata->leds[i].name), "%d", i);
-
- snprintf(pca955x_led->name,
- sizeof(pca955x_led->name), "pca955x:%s",
- pdata->leds[i].name);
-
- if (pdata->leds[i].default_trigger)
- pca955x_led->led_cdev.default_trigger =
- pdata->leds[i].default_trigger;
-
- pca955x_led->led_cdev.name = pca955x_led->name;
- pca955x_led->led_cdev.brightness_set_blocking =
- pca955x_led_set;
-
- err = devm_led_classdev_register(&client->dev,
- &pca955x_led->led_cdev);
+ led = &pca955x_led->led_cdev;
+ led->brightness_set_blocking = pca955x_led_set;
+ led->brightness_get = pca955x_led_get;
+
+ if (pdata->leds[i].default_state ==
+ LEDS_GPIO_DEFSTATE_OFF) {
+ err = pca955x_led_set(led, LED_OFF);
+ if (err)
+ return err;
+ } else if (pdata->leds[i].default_state ==
+ LEDS_GPIO_DEFSTATE_ON) {
+ err = pca955x_led_set(led, LED_FULL);
+ if (err)
+ return err;
+ }
+
+ init_data.fwnode = pdata->leds[i].fwnode;
+
+ if (is_of_node(init_data.fwnode)) {
+ if (to_of_node(init_data.fwnode)->name[0] ==
+ '\0')
+ set_default_label = true;
+ else
+ set_default_label = false;
+ } else {
+ set_default_label = true;
+ }
+
+ if (set_default_label) {
+ snprintf(default_label, sizeof(default_label),
+ "%d", i);
+ init_data.default_label = default_label;
+ } else {
+ init_data.default_label = NULL;
+ }
+
+ err = devm_led_classdev_register_ext(&client->dev, led,
+ &init_data);
if (err)
return err;
- /* Turn off LED */
- err = pca955x_led_set(&pca955x_led->led_cdev, LED_OFF);
- if (err)
- return err;
+ /*
+ * For default-state == "keep", let the core update the
+ * brightness from the hardware, then check the
+ * brightness to see if it's using PWM1. If so, PWM1
+ * should not be written below.
+ */
+ if (pdata->leds[i].default_state ==
+ LEDS_GPIO_DEFSTATE_KEEP) {
+ if (led->brightness != LED_FULL &&
+ led->brightness != LED_OFF &&
+ led->brightness != LED_HALF)
+ keep_pwm = true;
+ }
}
}
@@ -532,10 +634,12 @@ static int pca955x_probe(struct i2c_client *client,
if (err)
return err;
- /* PWM1 is used for variable brightness, default to OFF */
- err = pca955x_write_pwm(client, 1, 0);
- if (err)
- return err;
+ if (!keep_pwm) {
+ /* PWM1 is used for variable brightness, default to OFF */
+ err = pca955x_write_pwm(client, 1, 0);
+ if (err)
+ return err;
+ }
/* Set to fast frequency so we do not see flashing */
err = pca955x_write_psc(client, 0, 0);
@@ -581,7 +685,7 @@ static struct i2c_driver pca955x_driver = {
.name = "leds-pca955x",
.of_match_table = of_pca955x_match,
},
- .probe = pca955x_probe,
+ .probe_new = pca955x_probe,
.id_table = pca955x_id,
};
diff --git a/drivers/leds/leds-pwm.c b/drivers/leds/leds-pwm.c
index d71e9fa5c8de..6832180c1c54 100644
--- a/drivers/leds/leds-pwm.c
+++ b/drivers/leds/leds-pwm.c
@@ -17,10 +17,12 @@
#include <linux/err.h>
#include <linux/pwm.h>
#include <linux/slab.h>
+#include "leds.h"
struct led_pwm {
const char *name;
u8 active_low;
+ u8 default_state;
unsigned int max_brightness;
};
@@ -77,7 +79,38 @@ static int led_pwm_add(struct device *dev, struct led_pwm_priv *priv,
led_data->cdev.brightness_set_blocking = led_pwm_set;
- pwm_init_state(led_data->pwm, &led_data->pwmstate);
+ /* init PWM state */
+ switch (led->default_state) {
+ case LEDS_DEFSTATE_KEEP:
+ pwm_get_state(led_data->pwm, &led_data->pwmstate);
+ if (led_data->pwmstate.period)
+ break;
+ led->default_state = LEDS_DEFSTATE_OFF;
+ dev_warn(dev,
+ "failed to read period for %s, default to off",
+ led->name);
+ fallthrough;
+ default:
+ pwm_init_state(led_data->pwm, &led_data->pwmstate);
+ break;
+ }
+
+ /* set brightness */
+ switch (led->default_state) {
+ case LEDS_DEFSTATE_ON:
+ led_data->cdev.brightness = led->max_brightness;
+ break;
+ case LEDS_DEFSTATE_KEEP:
+ {
+ uint64_t brightness;
+
+ brightness = led->max_brightness;
+ brightness *= led_data->pwmstate.duty_cycle;
+ do_div(brightness, led_data->pwmstate.period);
+ led_data->cdev.brightness = brightness;
+ }
+ break;
+ }
ret = devm_led_classdev_register_ext(dev, &led_data->cdev, &init_data);
if (ret) {
@@ -86,11 +119,13 @@ static int led_pwm_add(struct device *dev, struct led_pwm_priv *priv,
return ret;
}
- ret = led_pwm_set(&led_data->cdev, led_data->cdev.brightness);
- if (ret) {
- dev_err(dev, "failed to set led PWM value for %s: %d",
- led->name, ret);
- return ret;
+ if (led->default_state != LEDS_DEFSTATE_KEEP) {
+ ret = led_pwm_set(&led_data->cdev, led_data->cdev.brightness);
+ if (ret) {
+ dev_err(dev, "failed to set led PWM value for %s: %d",
+ led->name, ret);
+ return ret;
+ }
}
priv->num_leds++;
@@ -120,6 +155,8 @@ static int led_pwm_create_fwnode(struct device *dev, struct led_pwm_priv *priv)
fwnode_property_read_u32(fwnode, "max-brightness",
&led.max_brightness);
+ led.default_state = led_init_default_state_get(fwnode);
+
ret = led_pwm_add(dev, priv, &led, fwnode);
if (ret)
goto err_child_out;
diff --git a/drivers/leds/leds.h b/drivers/leds/leds.h
index 345062ccabda..aa64757a4d89 100644
--- a/drivers/leds/leds.h
+++ b/drivers/leds/leds.h
@@ -27,6 +27,7 @@ ssize_t led_trigger_read(struct file *filp, struct kobject *kobj,
ssize_t led_trigger_write(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf,
loff_t pos, size_t count);
+enum led_default_state led_init_default_state_get(struct fwnode_handle *fwnode);
extern struct rw_semaphore leds_list_lock;
extern struct list_head leds_list;
diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig
index b77a01bd27f4..1f1d57288085 100644
--- a/drivers/leds/trigger/Kconfig
+++ b/drivers/leds/trigger/Kconfig
@@ -34,7 +34,7 @@ config LEDS_TRIGGER_ONESHOT
config LEDS_TRIGGER_DISK
bool "LED Disk Trigger"
- depends on IDE_GD_ATA || ATA
+ depends on ATA
help
This allows LEDs to be controlled by disk activity.
If unsure, say Y.
diff --git a/drivers/leds/trigger/ledtrig-audio.c b/drivers/leds/trigger/ledtrig-audio.c
index f76621e88482..c6b437e6369b 100644
--- a/drivers/leds/trigger/ledtrig-audio.c
+++ b/drivers/leds/trigger/ledtrig-audio.c
@@ -6,10 +6,33 @@
#include <linux/kernel.h>
#include <linux/leds.h>
#include <linux/module.h>
+#include "../leds.h"
-static struct led_trigger *ledtrig_audio[NUM_AUDIO_LEDS];
static enum led_brightness audio_state[NUM_AUDIO_LEDS];
+static int ledtrig_audio_mute_activate(struct led_classdev *led_cdev)
+{
+ led_set_brightness_nosleep(led_cdev, audio_state[LED_AUDIO_MUTE]);
+ return 0;
+}
+
+static int ledtrig_audio_micmute_activate(struct led_classdev *led_cdev)
+{
+ led_set_brightness_nosleep(led_cdev, audio_state[LED_AUDIO_MICMUTE]);
+ return 0;
+}
+
+static struct led_trigger ledtrig_audio[NUM_AUDIO_LEDS] = {
+ [LED_AUDIO_MUTE] = {
+ .name = "audio-mute",
+ .activate = ledtrig_audio_mute_activate,
+ },
+ [LED_AUDIO_MICMUTE] = {
+ .name = "audio-micmute",
+ .activate = ledtrig_audio_micmute_activate,
+ },
+};
+
enum led_brightness ledtrig_audio_get(enum led_audio type)
{
return audio_state[type];
@@ -19,24 +42,22 @@ EXPORT_SYMBOL_GPL(ledtrig_audio_get);
void ledtrig_audio_set(enum led_audio type, enum led_brightness state)
{
audio_state[type] = state;
- led_trigger_event(ledtrig_audio[type], state);
+ led_trigger_event(&ledtrig_audio[type], state);
}
EXPORT_SYMBOL_GPL(ledtrig_audio_set);
static int __init ledtrig_audio_init(void)
{
- led_trigger_register_simple("audio-mute",
- &ledtrig_audio[LED_AUDIO_MUTE]);
- led_trigger_register_simple("audio-micmute",
- &ledtrig_audio[LED_AUDIO_MICMUTE]);
+ led_trigger_register(&ledtrig_audio[LED_AUDIO_MUTE]);
+ led_trigger_register(&ledtrig_audio[LED_AUDIO_MICMUTE]);
return 0;
}
module_init(ledtrig_audio_init);
static void __exit ledtrig_audio_exit(void)
{
- led_trigger_unregister_simple(ledtrig_audio[LED_AUDIO_MUTE]);
- led_trigger_unregister_simple(ledtrig_audio[LED_AUDIO_MICMUTE]);
+ led_trigger_unregister(&ledtrig_audio[LED_AUDIO_MUTE]);
+ led_trigger_unregister(&ledtrig_audio[LED_AUDIO_MICMUTE]);
}
module_exit(ledtrig_audio_exit);
diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig
deleted file mode 100644
index 04caa0f2d445..000000000000
--- a/drivers/lightnvm/Kconfig
+++ /dev/null
@@ -1,44 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Open-Channel SSD NVM configuration
-#
-
-menuconfig NVM
- bool "Open-Channel SSD target support (DEPRECATED)"
- depends on BLOCK
- help
- Say Y here to get to enable Open-channel SSDs.
-
- Open-Channel SSDs implement a set of extension to SSDs, that
- exposes direct access to the underlying non-volatile memory.
-
- If you say N, all options in this submenu will be skipped and disabled
- only do this if you know what you are doing.
-
- This code is deprecated and will be removed in Linux 5.15.
-
-if NVM
-
-config NVM_PBLK
- tristate "Physical Block Device Open-Channel SSD target"
- select CRC32
- help
- Allows an open-channel SSD to be exposed as a block device to the
- host. The target assumes the device exposes raw flash and must be
- explicitly managed by the host.
-
- Please note the disk format is considered EXPERIMENTAL for now.
-
-if NVM_PBLK
-
-config NVM_PBLK_DEBUG
- bool "PBlk Debug Support"
- default n
- help
- Enables debug support for pblk. This includes extra checks, more
- vocal error messages, and extra tracking fields in the pblk sysfs
- entries.
-
-endif # NVM_PBLK_DEBUG
-
-endif # NVM
diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile
deleted file mode 100644
index 97d9d7c71550..000000000000
--- a/drivers/lightnvm/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for Open-Channel SSDs.
-#
-
-obj-$(CONFIG_NVM) := core.o
-obj-$(CONFIG_NVM_PBLK) += pblk.o
-pblk-y := pblk-init.o pblk-core.o pblk-rb.o \
- pblk-write.o pblk-cache.o pblk-read.o \
- pblk-gc.o pblk-recovery.o pblk-map.o \
- pblk-rl.o pblk-sysfs.o
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
deleted file mode 100644
index cf8a75494833..000000000000
--- a/drivers/lightnvm/core.c
+++ /dev/null
@@ -1,1440 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2015 IT University of Copenhagen. All rights reserved.
- * Initial release: Matias Bjorling <m@bjorling.me>
- */
-
-#define pr_fmt(fmt) "nvm: " fmt
-
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/sem.h>
-#include <linux/bitmap.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/miscdevice.h>
-#include <linux/lightnvm.h>
-#include <linux/sched/sysctl.h>
-
-static LIST_HEAD(nvm_tgt_types);
-static DECLARE_RWSEM(nvm_tgtt_lock);
-static LIST_HEAD(nvm_devices);
-static DECLARE_RWSEM(nvm_lock);
-
-/* Map between virtual and physical channel and lun */
-struct nvm_ch_map {
- int ch_off;
- int num_lun;
- int *lun_offs;
-};
-
-struct nvm_dev_map {
- struct nvm_ch_map *chnls;
- int num_ch;
-};
-
-static void nvm_free(struct kref *ref);
-
-static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name)
-{
- struct nvm_target *tgt;
-
- list_for_each_entry(tgt, &dev->targets, list)
- if (!strcmp(name, tgt->disk->disk_name))
- return tgt;
-
- return NULL;
-}
-
-static bool nvm_target_exists(const char *name)
-{
- struct nvm_dev *dev;
- struct nvm_target *tgt;
- bool ret = false;
-
- down_write(&nvm_lock);
- list_for_each_entry(dev, &nvm_devices, devices) {
- mutex_lock(&dev->mlock);
- list_for_each_entry(tgt, &dev->targets, list) {
- if (!strcmp(name, tgt->disk->disk_name)) {
- ret = true;
- mutex_unlock(&dev->mlock);
- goto out;
- }
- }
- mutex_unlock(&dev->mlock);
- }
-
-out:
- up_write(&nvm_lock);
- return ret;
-}
-
-static int nvm_reserve_luns(struct nvm_dev *dev, int lun_begin, int lun_end)
-{
- int i;
-
- for (i = lun_begin; i <= lun_end; i++) {
- if (test_and_set_bit(i, dev->lun_map)) {
- pr_err("lun %d already allocated\n", i);
- goto err;
- }
- }
-
- return 0;
-err:
- while (--i >= lun_begin)
- clear_bit(i, dev->lun_map);
-
- return -EBUSY;
-}
-
-static void nvm_release_luns_err(struct nvm_dev *dev, int lun_begin,
- int lun_end)
-{
- int i;
-
- for (i = lun_begin; i <= lun_end; i++)
- WARN_ON(!test_and_clear_bit(i, dev->lun_map));
-}
-
-static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev, int clear)
-{
- struct nvm_dev *dev = tgt_dev->parent;
- struct nvm_dev_map *dev_map = tgt_dev->map;
- int i, j;
-
- for (i = 0; i < dev_map->num_ch; i++) {
- struct nvm_ch_map *ch_map = &dev_map->chnls[i];
- int *lun_offs = ch_map->lun_offs;
- int ch = i + ch_map->ch_off;
-
- if (clear) {
- for (j = 0; j < ch_map->num_lun; j++) {
- int lun = j + lun_offs[j];
- int lunid = (ch * dev->geo.num_lun) + lun;
-
- WARN_ON(!test_and_clear_bit(lunid,
- dev->lun_map));
- }
- }
-
- kfree(ch_map->lun_offs);
- }
-
- kfree(dev_map->chnls);
- kfree(dev_map);
-
- kfree(tgt_dev->luns);
- kfree(tgt_dev);
-}
-
-static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev,
- u16 lun_begin, u16 lun_end,
- u16 op)
-{
- struct nvm_tgt_dev *tgt_dev = NULL;
- struct nvm_dev_map *dev_rmap = dev->rmap;
- struct nvm_dev_map *dev_map;
- struct ppa_addr *luns;
- int num_lun = lun_end - lun_begin + 1;
- int luns_left = num_lun;
- int num_ch = num_lun / dev->geo.num_lun;
- int num_ch_mod = num_lun % dev->geo.num_lun;
- int bch = lun_begin / dev->geo.num_lun;
- int blun = lun_begin % dev->geo.num_lun;
- int lunid = 0;
- int lun_balanced = 1;
- int sec_per_lun, prev_num_lun;
- int i, j;
-
- num_ch = (num_ch_mod == 0) ? num_ch : num_ch + 1;
-
- dev_map = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL);
- if (!dev_map)
- goto err_dev;
-
- dev_map->chnls = kcalloc(num_ch, sizeof(struct nvm_ch_map), GFP_KERNEL);
- if (!dev_map->chnls)
- goto err_chnls;
-
- luns = kcalloc(num_lun, sizeof(struct ppa_addr), GFP_KERNEL);
- if (!luns)
- goto err_luns;
-
- prev_num_lun = (luns_left > dev->geo.num_lun) ?
- dev->geo.num_lun : luns_left;
- for (i = 0; i < num_ch; i++) {
- struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[i + bch];
- int *lun_roffs = ch_rmap->lun_offs;
- struct nvm_ch_map *ch_map = &dev_map->chnls[i];
- int *lun_offs;
- int luns_in_chnl = (luns_left > dev->geo.num_lun) ?
- dev->geo.num_lun : luns_left;
-
- if (lun_balanced && prev_num_lun != luns_in_chnl)
- lun_balanced = 0;
-
- ch_map->ch_off = ch_rmap->ch_off = bch;
- ch_map->num_lun = luns_in_chnl;
-
- lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
- if (!lun_offs)
- goto err_ch;
-
- for (j = 0; j < luns_in_chnl; j++) {
- luns[lunid].ppa = 0;
- luns[lunid].a.ch = i;
- luns[lunid++].a.lun = j;
-
- lun_offs[j] = blun;
- lun_roffs[j + blun] = blun;
- }
-
- ch_map->lun_offs = lun_offs;
-
- /* when starting a new channel, lun offset is reset */
- blun = 0;
- luns_left -= luns_in_chnl;
- }
-
- dev_map->num_ch = num_ch;
-
- tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL);
- if (!tgt_dev)
- goto err_ch;
-
- /* Inherit device geometry from parent */
- memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo));
-
- /* Target device only owns a portion of the physical device */
- tgt_dev->geo.num_ch = num_ch;
- tgt_dev->geo.num_lun = (lun_balanced) ? prev_num_lun : -1;
- tgt_dev->geo.all_luns = num_lun;
- tgt_dev->geo.all_chunks = num_lun * dev->geo.num_chk;
-
- tgt_dev->geo.op = op;
-
- sec_per_lun = dev->geo.clba * dev->geo.num_chk;
- tgt_dev->geo.total_secs = num_lun * sec_per_lun;
-
- tgt_dev->q = dev->q;
- tgt_dev->map = dev_map;
- tgt_dev->luns = luns;
- tgt_dev->parent = dev;
-
- return tgt_dev;
-err_ch:
- while (--i >= 0)
- kfree(dev_map->chnls[i].lun_offs);
- kfree(luns);
-err_luns:
- kfree(dev_map->chnls);
-err_chnls:
- kfree(dev_map);
-err_dev:
- return tgt_dev;
-}
-
-static struct nvm_tgt_type *__nvm_find_target_type(const char *name)
-{
- struct nvm_tgt_type *tt;
-
- list_for_each_entry(tt, &nvm_tgt_types, list)
- if (!strcmp(name, tt->name))
- return tt;
-
- return NULL;
-}
-
-static struct nvm_tgt_type *nvm_find_target_type(const char *name)
-{
- struct nvm_tgt_type *tt;
-
- down_write(&nvm_tgtt_lock);
- tt = __nvm_find_target_type(name);
- up_write(&nvm_tgtt_lock);
-
- return tt;
-}
-
-static int nvm_config_check_luns(struct nvm_geo *geo, int lun_begin,
- int lun_end)
-{
- if (lun_begin > lun_end || lun_end >= geo->all_luns) {
- pr_err("lun out of bound (%u:%u > %u)\n",
- lun_begin, lun_end, geo->all_luns - 1);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int __nvm_config_simple(struct nvm_dev *dev,
- struct nvm_ioctl_create_simple *s)
-{
- struct nvm_geo *geo = &dev->geo;
-
- if (s->lun_begin == -1 && s->lun_end == -1) {
- s->lun_begin = 0;
- s->lun_end = geo->all_luns - 1;
- }
-
- return nvm_config_check_luns(geo, s->lun_begin, s->lun_end);
-}
-
-static int __nvm_config_extended(struct nvm_dev *dev,
- struct nvm_ioctl_create_extended *e)
-{
- if (e->lun_begin == 0xFFFF && e->lun_end == 0xFFFF) {
- e->lun_begin = 0;
- e->lun_end = dev->geo.all_luns - 1;
- }
-
- /* op not set falls into target's default */
- if (e->op == 0xFFFF) {
- e->op = NVM_TARGET_DEFAULT_OP;
- } else if (e->op < NVM_TARGET_MIN_OP || e->op > NVM_TARGET_MAX_OP) {
- pr_err("invalid over provisioning value\n");
- return -EINVAL;
- }
-
- return nvm_config_check_luns(&dev->geo, e->lun_begin, e->lun_end);
-}
-
-static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
-{
- struct nvm_ioctl_create_extended e;
- struct gendisk *tdisk;
- struct nvm_tgt_type *tt;
- struct nvm_target *t;
- struct nvm_tgt_dev *tgt_dev;
- void *targetdata;
- unsigned int mdts;
- int ret;
-
- switch (create->conf.type) {
- case NVM_CONFIG_TYPE_SIMPLE:
- ret = __nvm_config_simple(dev, &create->conf.s);
- if (ret)
- return ret;
-
- e.lun_begin = create->conf.s.lun_begin;
- e.lun_end = create->conf.s.lun_end;
- e.op = NVM_TARGET_DEFAULT_OP;
- break;
- case NVM_CONFIG_TYPE_EXTENDED:
- ret = __nvm_config_extended(dev, &create->conf.e);
- if (ret)
- return ret;
-
- e = create->conf.e;
- break;
- default:
- pr_err("config type not valid\n");
- return -EINVAL;
- }
-
- tt = nvm_find_target_type(create->tgttype);
- if (!tt) {
- pr_err("target type %s not found\n", create->tgttype);
- return -EINVAL;
- }
-
- if ((tt->flags & NVM_TGT_F_HOST_L2P) != (dev->geo.dom & NVM_RSP_L2P)) {
- pr_err("device is incompatible with target L2P type.\n");
- return -EINVAL;
- }
-
- if (nvm_target_exists(create->tgtname)) {
- pr_err("target name already exists (%s)\n",
- create->tgtname);
- return -EINVAL;
- }
-
- ret = nvm_reserve_luns(dev, e.lun_begin, e.lun_end);
- if (ret)
- return ret;
-
- t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
- if (!t) {
- ret = -ENOMEM;
- goto err_reserve;
- }
-
- tgt_dev = nvm_create_tgt_dev(dev, e.lun_begin, e.lun_end, e.op);
- if (!tgt_dev) {
- pr_err("could not create target device\n");
- ret = -ENOMEM;
- goto err_t;
- }
-
- tdisk = blk_alloc_disk(dev->q->node);
- if (!tdisk) {
- ret = -ENOMEM;
- goto err_dev;
- }
-
- strlcpy(tdisk->disk_name, create->tgtname, sizeof(tdisk->disk_name));
- tdisk->major = 0;
- tdisk->first_minor = 0;
- tdisk->fops = tt->bops;
-
- targetdata = tt->init(tgt_dev, tdisk, create->flags);
- if (IS_ERR(targetdata)) {
- ret = PTR_ERR(targetdata);
- goto err_init;
- }
-
- tdisk->private_data = targetdata;
- tdisk->queue->queuedata = targetdata;
-
- mdts = (dev->geo.csecs >> 9) * NVM_MAX_VLBA;
- if (dev->geo.mdts) {
- mdts = min_t(u32, dev->geo.mdts,
- (dev->geo.csecs >> 9) * NVM_MAX_VLBA);
- }
- blk_queue_max_hw_sectors(tdisk->queue, mdts);
-
- set_capacity(tdisk, tt->capacity(targetdata));
- add_disk(tdisk);
-
- if (tt->sysfs_init && tt->sysfs_init(tdisk)) {
- ret = -ENOMEM;
- goto err_sysfs;
- }
-
- t->type = tt;
- t->disk = tdisk;
- t->dev = tgt_dev;
-
- mutex_lock(&dev->mlock);
- list_add_tail(&t->list, &dev->targets);
- mutex_unlock(&dev->mlock);
-
- __module_get(tt->owner);
-
- return 0;
-err_sysfs:
- if (tt->exit)
- tt->exit(targetdata, true);
-err_init:
- blk_cleanup_disk(tdisk);
-err_dev:
- nvm_remove_tgt_dev(tgt_dev, 0);
-err_t:
- kfree(t);
-err_reserve:
- nvm_release_luns_err(dev, e.lun_begin, e.lun_end);
- return ret;
-}
-
-static void __nvm_remove_target(struct nvm_target *t, bool graceful)
-{
- struct nvm_tgt_type *tt = t->type;
- struct gendisk *tdisk = t->disk;
-
- del_gendisk(tdisk);
-
- if (tt->sysfs_exit)
- tt->sysfs_exit(tdisk);
-
- if (tt->exit)
- tt->exit(tdisk->private_data, graceful);
-
- nvm_remove_tgt_dev(t->dev, 1);
- blk_cleanup_disk(tdisk);
- module_put(t->type->owner);
-
- list_del(&t->list);
- kfree(t);
-}
-
-/**
- * nvm_remove_tgt - Removes a target from the media manager
- * @remove: ioctl structure with target name to remove.
- *
- * Returns:
- * 0: on success
- * 1: on not found
- * <0: on error
- */
-static int nvm_remove_tgt(struct nvm_ioctl_remove *remove)
-{
- struct nvm_target *t = NULL;
- struct nvm_dev *dev;
-
- down_read(&nvm_lock);
- list_for_each_entry(dev, &nvm_devices, devices) {
- mutex_lock(&dev->mlock);
- t = nvm_find_target(dev, remove->tgtname);
- if (t) {
- mutex_unlock(&dev->mlock);
- break;
- }
- mutex_unlock(&dev->mlock);
- }
- up_read(&nvm_lock);
-
- if (!t) {
- pr_err("failed to remove target %s\n",
- remove->tgtname);
- return 1;
- }
-
- __nvm_remove_target(t, true);
- kref_put(&dev->ref, nvm_free);
-
- return 0;
-}
-
-static int nvm_register_map(struct nvm_dev *dev)
-{
- struct nvm_dev_map *rmap;
- int i, j;
-
- rmap = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL);
- if (!rmap)
- goto err_rmap;
-
- rmap->chnls = kcalloc(dev->geo.num_ch, sizeof(struct nvm_ch_map),
- GFP_KERNEL);
- if (!rmap->chnls)
- goto err_chnls;
-
- for (i = 0; i < dev->geo.num_ch; i++) {
- struct nvm_ch_map *ch_rmap;
- int *lun_roffs;
- int luns_in_chnl = dev->geo.num_lun;
-
- ch_rmap = &rmap->chnls[i];
-
- ch_rmap->ch_off = -1;
- ch_rmap->num_lun = luns_in_chnl;
-
- lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
- if (!lun_roffs)
- goto err_ch;
-
- for (j = 0; j < luns_in_chnl; j++)
- lun_roffs[j] = -1;
-
- ch_rmap->lun_offs = lun_roffs;
- }
-
- dev->rmap = rmap;
-
- return 0;
-err_ch:
- while (--i >= 0)
- kfree(rmap->chnls[i].lun_offs);
-err_chnls:
- kfree(rmap);
-err_rmap:
- return -ENOMEM;
-}
-
-static void nvm_unregister_map(struct nvm_dev *dev)
-{
- struct nvm_dev_map *rmap = dev->rmap;
- int i;
-
- for (i = 0; i < dev->geo.num_ch; i++)
- kfree(rmap->chnls[i].lun_offs);
-
- kfree(rmap->chnls);
- kfree(rmap);
-}
-
-static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
-{
- struct nvm_dev_map *dev_map = tgt_dev->map;
- struct nvm_ch_map *ch_map = &dev_map->chnls[p->a.ch];
- int lun_off = ch_map->lun_offs[p->a.lun];
-
- p->a.ch += ch_map->ch_off;
- p->a.lun += lun_off;
-}
-
-static void nvm_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
-{
- struct nvm_dev *dev = tgt_dev->parent;
- struct nvm_dev_map *dev_rmap = dev->rmap;
- struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[p->a.ch];
- int lun_roff = ch_rmap->lun_offs[p->a.lun];
-
- p->a.ch -= ch_rmap->ch_off;
- p->a.lun -= lun_roff;
-}
-
-static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev,
- struct ppa_addr *ppa_list, int nr_ppas)
-{
- int i;
-
- for (i = 0; i < nr_ppas; i++) {
- nvm_map_to_dev(tgt_dev, &ppa_list[i]);
- ppa_list[i] = generic_to_dev_addr(tgt_dev->parent, ppa_list[i]);
- }
-}
-
-static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev,
- struct ppa_addr *ppa_list, int nr_ppas)
-{
- int i;
-
- for (i = 0; i < nr_ppas; i++) {
- ppa_list[i] = dev_to_generic_addr(tgt_dev->parent, ppa_list[i]);
- nvm_map_to_tgt(tgt_dev, &ppa_list[i]);
- }
-}
-
-static void nvm_rq_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
-{
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
-
- nvm_ppa_tgt_to_dev(tgt_dev, ppa_list, rqd->nr_ppas);
-}
-
-static void nvm_rq_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
-{
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
-
- nvm_ppa_dev_to_tgt(tgt_dev, ppa_list, rqd->nr_ppas);
-}
-
-int nvm_register_tgt_type(struct nvm_tgt_type *tt)
-{
- int ret = 0;
-
- down_write(&nvm_tgtt_lock);
- if (__nvm_find_target_type(tt->name))
- ret = -EEXIST;
- else
- list_add(&tt->list, &nvm_tgt_types);
- up_write(&nvm_tgtt_lock);
-
- return ret;
-}
-EXPORT_SYMBOL(nvm_register_tgt_type);
-
-void nvm_unregister_tgt_type(struct nvm_tgt_type *tt)
-{
- if (!tt)
- return;
-
- down_write(&nvm_tgtt_lock);
- list_del(&tt->list);
- up_write(&nvm_tgtt_lock);
-}
-EXPORT_SYMBOL(nvm_unregister_tgt_type);
-
-void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
- dma_addr_t *dma_handler)
-{
- return dev->ops->dev_dma_alloc(dev, dev->dma_pool, mem_flags,
- dma_handler);
-}
-EXPORT_SYMBOL(nvm_dev_dma_alloc);
-
-void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler)
-{
- dev->ops->dev_dma_free(dev->dma_pool, addr, dma_handler);
-}
-EXPORT_SYMBOL(nvm_dev_dma_free);
-
-static struct nvm_dev *nvm_find_nvm_dev(const char *name)
-{
- struct nvm_dev *dev;
-
- list_for_each_entry(dev, &nvm_devices, devices)
- if (!strcmp(name, dev->name))
- return dev;
-
- return NULL;
-}
-
-static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
- const struct ppa_addr *ppas, int nr_ppas)
-{
- struct nvm_dev *dev = tgt_dev->parent;
- struct nvm_geo *geo = &tgt_dev->geo;
- int i, plane_cnt, pl_idx;
- struct ppa_addr ppa;
-
- if (geo->pln_mode == NVM_PLANE_SINGLE && nr_ppas == 1) {
- rqd->nr_ppas = nr_ppas;
- rqd->ppa_addr = ppas[0];
-
- return 0;
- }
-
- rqd->nr_ppas = nr_ppas;
- rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
- if (!rqd->ppa_list) {
- pr_err("failed to allocate dma memory\n");
- return -ENOMEM;
- }
-
- plane_cnt = geo->pln_mode;
- rqd->nr_ppas *= plane_cnt;
-
- for (i = 0; i < nr_ppas; i++) {
- for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) {
- ppa = ppas[i];
- ppa.g.pl = pl_idx;
- rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppa;
- }
- }
-
- return 0;
-}
-
-static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
- struct nvm_rq *rqd)
-{
- if (!rqd->ppa_list)
- return;
-
- nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
-}
-
-static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
-{
- int flags = 0;
-
- if (geo->version == NVM_OCSSD_SPEC_20)
- return 0;
-
- if (rqd->is_seq)
- flags |= geo->pln_mode >> 1;
-
- if (rqd->opcode == NVM_OP_PREAD)
- flags |= (NVM_IO_SCRAMBLE_ENABLE | NVM_IO_SUSPEND);
- else if (rqd->opcode == NVM_OP_PWRITE)
- flags |= NVM_IO_SCRAMBLE_ENABLE;
-
- return flags;
-}
-
-int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, void *buf)
-{
- struct nvm_dev *dev = tgt_dev->parent;
- int ret;
-
- if (!dev->ops->submit_io)
- return -ENODEV;
-
- nvm_rq_tgt_to_dev(tgt_dev, rqd);
-
- rqd->dev = tgt_dev;
- rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd);
-
- /* In case of error, fail with right address format */
- ret = dev->ops->submit_io(dev, rqd, buf);
- if (ret)
- nvm_rq_dev_to_tgt(tgt_dev, rqd);
- return ret;
-}
-EXPORT_SYMBOL(nvm_submit_io);
-
-static void nvm_sync_end_io(struct nvm_rq *rqd)
-{
- struct completion *waiting = rqd->private;
-
- complete(waiting);
-}
-
-static int nvm_submit_io_wait(struct nvm_dev *dev, struct nvm_rq *rqd,
- void *buf)
-{
- DECLARE_COMPLETION_ONSTACK(wait);
- int ret = 0;
-
- rqd->end_io = nvm_sync_end_io;
- rqd->private = &wait;
-
- ret = dev->ops->submit_io(dev, rqd, buf);
- if (ret)
- return ret;
-
- wait_for_completion_io(&wait);
-
- return 0;
-}
-
-int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
- void *buf)
-{
- struct nvm_dev *dev = tgt_dev->parent;
- int ret;
-
- if (!dev->ops->submit_io)
- return -ENODEV;
-
- nvm_rq_tgt_to_dev(tgt_dev, rqd);
-
- rqd->dev = tgt_dev;
- rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd);
-
- ret = nvm_submit_io_wait(dev, rqd, buf);
-
- return ret;
-}
-EXPORT_SYMBOL(nvm_submit_io_sync);
-
-void nvm_end_io(struct nvm_rq *rqd)
-{
- struct nvm_tgt_dev *tgt_dev = rqd->dev;
-
- /* Convert address space */
- if (tgt_dev)
- nvm_rq_dev_to_tgt(tgt_dev, rqd);
-
- if (rqd->end_io)
- rqd->end_io(rqd);
-}
-EXPORT_SYMBOL(nvm_end_io);
-
-static int nvm_submit_io_sync_raw(struct nvm_dev *dev, struct nvm_rq *rqd)
-{
- if (!dev->ops->submit_io)
- return -ENODEV;
-
- rqd->dev = NULL;
- rqd->flags = nvm_set_flags(&dev->geo, rqd);
-
- return nvm_submit_io_wait(dev, rqd, NULL);
-}
-
-static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa)
-{
- struct nvm_rq rqd = { NULL };
- struct bio bio;
- struct bio_vec bio_vec;
- struct page *page;
- int ret;
-
- page = alloc_page(GFP_KERNEL);
- if (!page)
- return -ENOMEM;
-
- bio_init(&bio, &bio_vec, 1);
- bio_add_page(&bio, page, PAGE_SIZE, 0);
- bio_set_op_attrs(&bio, REQ_OP_READ, 0);
-
- rqd.bio = &bio;
- rqd.opcode = NVM_OP_PREAD;
- rqd.is_seq = 1;
- rqd.nr_ppas = 1;
- rqd.ppa_addr = generic_to_dev_addr(dev, ppa);
-
- ret = nvm_submit_io_sync_raw(dev, &rqd);
- __free_page(page);
- if (ret)
- return ret;
-
- return rqd.error;
-}
-
-/*
- * Scans a 1.2 chunk first and last page to determine if its state.
- * If the chunk is found to be open, also scan it to update the write
- * pointer.
- */
-static int nvm_bb_chunk_scan(struct nvm_dev *dev, struct ppa_addr ppa,
- struct nvm_chk_meta *meta)
-{
- struct nvm_geo *geo = &dev->geo;
- int ret, pg, pl;
-
- /* sense first page */
- ret = nvm_bb_chunk_sense(dev, ppa);
- if (ret < 0) /* io error */
- return ret;
- else if (ret == 0) /* valid data */
- meta->state = NVM_CHK_ST_OPEN;
- else if (ret > 0) {
- /*
- * If empty page, the chunk is free, else it is an
- * actual io error. In that case, mark it offline.
- */
- switch (ret) {
- case NVM_RSP_ERR_EMPTYPAGE:
- meta->state = NVM_CHK_ST_FREE;
- return 0;
- case NVM_RSP_ERR_FAILCRC:
- case NVM_RSP_ERR_FAILECC:
- case NVM_RSP_WARN_HIGHECC:
- meta->state = NVM_CHK_ST_OPEN;
- goto scan;
- default:
- return -ret; /* other io error */
- }
- }
-
- /* sense last page */
- ppa.g.pg = geo->num_pg - 1;
- ppa.g.pl = geo->num_pln - 1;
-
- ret = nvm_bb_chunk_sense(dev, ppa);
- if (ret < 0) /* io error */
- return ret;
- else if (ret == 0) { /* Chunk fully written */
- meta->state = NVM_CHK_ST_CLOSED;
- meta->wp = geo->clba;
- return 0;
- } else if (ret > 0) {
- switch (ret) {
- case NVM_RSP_ERR_EMPTYPAGE:
- case NVM_RSP_ERR_FAILCRC:
- case NVM_RSP_ERR_FAILECC:
- case NVM_RSP_WARN_HIGHECC:
- meta->state = NVM_CHK_ST_OPEN;
- break;
- default:
- return -ret; /* other io error */
- }
- }
-
-scan:
- /*
- * chunk is open, we scan sequentially to update the write pointer.
- * We make the assumption that targets write data across all planes
- * before moving to the next page.
- */
- for (pg = 0; pg < geo->num_pg; pg++) {
- for (pl = 0; pl < geo->num_pln; pl++) {
- ppa.g.pg = pg;
- ppa.g.pl = pl;
-
- ret = nvm_bb_chunk_sense(dev, ppa);
- if (ret < 0) /* io error */
- return ret;
- else if (ret == 0) {
- meta->wp += geo->ws_min;
- } else if (ret > 0) {
- switch (ret) {
- case NVM_RSP_ERR_EMPTYPAGE:
- return 0;
- case NVM_RSP_ERR_FAILCRC:
- case NVM_RSP_ERR_FAILECC:
- case NVM_RSP_WARN_HIGHECC:
- meta->wp += geo->ws_min;
- break;
- default:
- return -ret; /* other io error */
- }
- }
- }
- }
-
- return 0;
-}
-
-/*
- * folds a bad block list from its plane representation to its
- * chunk representation.
- *
- * If any of the planes status are bad or grown bad, the chunk is marked
- * offline. If not bad, the first plane state acts as the chunk state.
- */
-static int nvm_bb_to_chunk(struct nvm_dev *dev, struct ppa_addr ppa,
- u8 *blks, int nr_blks, struct nvm_chk_meta *meta)
-{
- struct nvm_geo *geo = &dev->geo;
- int ret, blk, pl, offset, blktype;
-
- for (blk = 0; blk < geo->num_chk; blk++) {
- offset = blk * geo->pln_mode;
- blktype = blks[offset];
-
- for (pl = 0; pl < geo->pln_mode; pl++) {
- if (blks[offset + pl] &
- (NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) {
- blktype = blks[offset + pl];
- break;
- }
- }
-
- ppa.g.blk = blk;
-
- meta->wp = 0;
- meta->type = NVM_CHK_TP_W_SEQ;
- meta->wi = 0;
- meta->slba = generic_to_dev_addr(dev, ppa).ppa;
- meta->cnlb = dev->geo.clba;
-
- if (blktype == NVM_BLK_T_FREE) {
- ret = nvm_bb_chunk_scan(dev, ppa, meta);
- if (ret)
- return ret;
- } else {
- meta->state = NVM_CHK_ST_OFFLINE;
- }
-
- meta++;
- }
-
- return 0;
-}
-
-static int nvm_get_bb_meta(struct nvm_dev *dev, sector_t slba,
- int nchks, struct nvm_chk_meta *meta)
-{
- struct nvm_geo *geo = &dev->geo;
- struct ppa_addr ppa;
- u8 *blks;
- int ch, lun, nr_blks;
- int ret = 0;
-
- ppa.ppa = slba;
- ppa = dev_to_generic_addr(dev, ppa);
-
- if (ppa.g.blk != 0)
- return -EINVAL;
-
- if ((nchks % geo->num_chk) != 0)
- return -EINVAL;
-
- nr_blks = geo->num_chk * geo->pln_mode;
-
- blks = kmalloc(nr_blks, GFP_KERNEL);
- if (!blks)
- return -ENOMEM;
-
- for (ch = ppa.g.ch; ch < geo->num_ch; ch++) {
- for (lun = ppa.g.lun; lun < geo->num_lun; lun++) {
- struct ppa_addr ppa_gen, ppa_dev;
-
- if (!nchks)
- goto done;
-
- ppa_gen.ppa = 0;
- ppa_gen.g.ch = ch;
- ppa_gen.g.lun = lun;
- ppa_dev = generic_to_dev_addr(dev, ppa_gen);
-
- ret = dev->ops->get_bb_tbl(dev, ppa_dev, blks);
- if (ret)
- goto done;
-
- ret = nvm_bb_to_chunk(dev, ppa_gen, blks, nr_blks,
- meta);
- if (ret)
- goto done;
-
- meta += geo->num_chk;
- nchks -= geo->num_chk;
- }
- }
-done:
- kfree(blks);
- return ret;
-}
-
-int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa,
- int nchks, struct nvm_chk_meta *meta)
-{
- struct nvm_dev *dev = tgt_dev->parent;
-
- nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1);
-
- if (dev->geo.version == NVM_OCSSD_SPEC_12)
- return nvm_get_bb_meta(dev, (sector_t)ppa.ppa, nchks, meta);
-
- return dev->ops->get_chk_meta(dev, (sector_t)ppa.ppa, nchks, meta);
-}
-EXPORT_SYMBOL_GPL(nvm_get_chunk_meta);
-
-int nvm_set_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
- int nr_ppas, int type)
-{
- struct nvm_dev *dev = tgt_dev->parent;
- struct nvm_rq rqd;
- int ret;
-
- if (dev->geo.version == NVM_OCSSD_SPEC_20)
- return 0;
-
- if (nr_ppas > NVM_MAX_VLBA) {
- pr_err("unable to update all blocks atomically\n");
- return -EINVAL;
- }
-
- memset(&rqd, 0, sizeof(struct nvm_rq));
-
- nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas);
- nvm_rq_tgt_to_dev(tgt_dev, &rqd);
-
- ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
- nvm_free_rqd_ppalist(tgt_dev, &rqd);
- if (ret)
- return -EINVAL;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(nvm_set_chunk_meta);
-
-static int nvm_core_init(struct nvm_dev *dev)
-{
- struct nvm_geo *geo = &dev->geo;
- int ret;
-
- dev->lun_map = kcalloc(BITS_TO_LONGS(geo->all_luns),
- sizeof(unsigned long), GFP_KERNEL);
- if (!dev->lun_map)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&dev->area_list);
- INIT_LIST_HEAD(&dev->targets);
- mutex_init(&dev->mlock);
- spin_lock_init(&dev->lock);
-
- ret = nvm_register_map(dev);
- if (ret)
- goto err_fmtype;
-
- return 0;
-err_fmtype:
- kfree(dev->lun_map);
- return ret;
-}
-
-static void nvm_free(struct kref *ref)
-{
- struct nvm_dev *dev = container_of(ref, struct nvm_dev, ref);
-
- if (dev->dma_pool)
- dev->ops->destroy_dma_pool(dev->dma_pool);
-
- if (dev->rmap)
- nvm_unregister_map(dev);
-
- kfree(dev->lun_map);
- kfree(dev);
-}
-
-static int nvm_init(struct nvm_dev *dev)
-{
- struct nvm_geo *geo = &dev->geo;
- int ret = -EINVAL;
-
- if (dev->ops->identity(dev)) {
- pr_err("device could not be identified\n");
- goto err;
- }
-
- pr_debug("ver:%u.%u nvm_vendor:%x\n", geo->major_ver_id,
- geo->minor_ver_id, geo->vmnt);
-
- ret = nvm_core_init(dev);
- if (ret) {
- pr_err("could not initialize core structures.\n");
- goto err;
- }
-
- pr_info("registered %s [%u/%u/%u/%u/%u]\n",
- dev->name, dev->geo.ws_min, dev->geo.ws_opt,
- dev->geo.num_chk, dev->geo.all_luns,
- dev->geo.num_ch);
- return 0;
-err:
- pr_err("failed to initialize nvm\n");
- return ret;
-}
-
-struct nvm_dev *nvm_alloc_dev(int node)
-{
- struct nvm_dev *dev;
-
- dev = kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node);
- if (dev)
- kref_init(&dev->ref);
-
- return dev;
-}
-EXPORT_SYMBOL(nvm_alloc_dev);
-
-int nvm_register(struct nvm_dev *dev)
-{
- int ret, exp_pool_size;
-
- pr_warn_once("lightnvm support is deprecated and will be removed in Linux 5.15.\n");
-
- if (!dev->q || !dev->ops) {
- kref_put(&dev->ref, nvm_free);
- return -EINVAL;
- }
-
- ret = nvm_init(dev);
- if (ret) {
- kref_put(&dev->ref, nvm_free);
- return ret;
- }
-
- exp_pool_size = max_t(int, PAGE_SIZE,
- (NVM_MAX_VLBA * (sizeof(u64) + dev->geo.sos)));
- exp_pool_size = round_up(exp_pool_size, PAGE_SIZE);
-
- dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist",
- exp_pool_size);
- if (!dev->dma_pool) {
- pr_err("could not create dma pool\n");
- kref_put(&dev->ref, nvm_free);
- return -ENOMEM;
- }
-
- /* register device with a supported media manager */
- down_write(&nvm_lock);
- list_add(&dev->devices, &nvm_devices);
- up_write(&nvm_lock);
-
- return 0;
-}
-EXPORT_SYMBOL(nvm_register);
-
-void nvm_unregister(struct nvm_dev *dev)
-{
- struct nvm_target *t, *tmp;
-
- mutex_lock(&dev->mlock);
- list_for_each_entry_safe(t, tmp, &dev->targets, list) {
- if (t->dev->parent != dev)
- continue;
- __nvm_remove_target(t, false);
- kref_put(&dev->ref, nvm_free);
- }
- mutex_unlock(&dev->mlock);
-
- down_write(&nvm_lock);
- list_del(&dev->devices);
- up_write(&nvm_lock);
-
- kref_put(&dev->ref, nvm_free);
-}
-EXPORT_SYMBOL(nvm_unregister);
-
-static int __nvm_configure_create(struct nvm_ioctl_create *create)
-{
- struct nvm_dev *dev;
- int ret;
-
- down_write(&nvm_lock);
- dev = nvm_find_nvm_dev(create->dev);
- up_write(&nvm_lock);
-
- if (!dev) {
- pr_err("device not found\n");
- return -EINVAL;
- }
-
- kref_get(&dev->ref);
- ret = nvm_create_tgt(dev, create);
- if (ret)
- kref_put(&dev->ref, nvm_free);
-
- return ret;
-}
-
-static long nvm_ioctl_info(struct file *file, void __user *arg)
-{
- struct nvm_ioctl_info *info;
- struct nvm_tgt_type *tt;
- int tgt_iter = 0;
-
- info = memdup_user(arg, sizeof(struct nvm_ioctl_info));
- if (IS_ERR(info))
- return PTR_ERR(info);
-
- info->version[0] = NVM_VERSION_MAJOR;
- info->version[1] = NVM_VERSION_MINOR;
- info->version[2] = NVM_VERSION_PATCH;
-
- down_write(&nvm_tgtt_lock);
- list_for_each_entry(tt, &nvm_tgt_types, list) {
- struct nvm_ioctl_info_tgt *tgt = &info->tgts[tgt_iter];
-
- tgt->version[0] = tt->version[0];
- tgt->version[1] = tt->version[1];
- tgt->version[2] = tt->version[2];
- strncpy(tgt->tgtname, tt->name, NVM_TTYPE_NAME_MAX);
-
- tgt_iter++;
- }
-
- info->tgtsize = tgt_iter;
- up_write(&nvm_tgtt_lock);
-
- if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info))) {
- kfree(info);
- return -EFAULT;
- }
-
- kfree(info);
- return 0;
-}
-
-static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
-{
- struct nvm_ioctl_get_devices *devices;
- struct nvm_dev *dev;
- int i = 0;
-
- devices = kzalloc(sizeof(struct nvm_ioctl_get_devices), GFP_KERNEL);
- if (!devices)
- return -ENOMEM;
-
- down_write(&nvm_lock);
- list_for_each_entry(dev, &nvm_devices, devices) {
- struct nvm_ioctl_device_info *info = &devices->info[i];
-
- strlcpy(info->devname, dev->name, sizeof(info->devname));
-
- /* kept for compatibility */
- info->bmversion[0] = 1;
- info->bmversion[1] = 0;
- info->bmversion[2] = 0;
- strlcpy(info->bmname, "gennvm", sizeof(info->bmname));
- i++;
-
- if (i >= ARRAY_SIZE(devices->info)) {
- pr_err("max %zd devices can be reported.\n",
- ARRAY_SIZE(devices->info));
- break;
- }
- }
- up_write(&nvm_lock);
-
- devices->nr_devices = i;
-
- if (copy_to_user(arg, devices,
- sizeof(struct nvm_ioctl_get_devices))) {
- kfree(devices);
- return -EFAULT;
- }
-
- kfree(devices);
- return 0;
-}
-
-static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
-{
- struct nvm_ioctl_create create;
-
- if (copy_from_user(&create, arg, sizeof(struct nvm_ioctl_create)))
- return -EFAULT;
-
- if (create.conf.type == NVM_CONFIG_TYPE_EXTENDED &&
- create.conf.e.rsv != 0) {
- pr_err("reserved config field in use\n");
- return -EINVAL;
- }
-
- create.dev[DISK_NAME_LEN - 1] = '\0';
- create.tgttype[NVM_TTYPE_NAME_MAX - 1] = '\0';
- create.tgtname[DISK_NAME_LEN - 1] = '\0';
-
- if (create.flags != 0) {
- __u32 flags = create.flags;
-
- /* Check for valid flags */
- if (flags & NVM_TARGET_FACTORY)
- flags &= ~NVM_TARGET_FACTORY;
-
- if (flags) {
- pr_err("flag not supported\n");
- return -EINVAL;
- }
- }
-
- return __nvm_configure_create(&create);
-}
-
-static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
-{
- struct nvm_ioctl_remove remove;
-
- if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove)))
- return -EFAULT;
-
- remove.tgtname[DISK_NAME_LEN - 1] = '\0';
-
- if (remove.flags != 0) {
- pr_err("no flags supported\n");
- return -EINVAL;
- }
-
- return nvm_remove_tgt(&remove);
-}
-
-/* kept for compatibility reasons */
-static long nvm_ioctl_dev_init(struct file *file, void __user *arg)
-{
- struct nvm_ioctl_dev_init init;
-
- if (copy_from_user(&init, arg, sizeof(struct nvm_ioctl_dev_init)))
- return -EFAULT;
-
- if (init.flags != 0) {
- pr_err("no flags supported\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-/* Kept for compatibility reasons */
-static long nvm_ioctl_dev_factory(struct file *file, void __user *arg)
-{
- struct nvm_ioctl_dev_factory fact;
-
- if (copy_from_user(&fact, arg, sizeof(struct nvm_ioctl_dev_factory)))
- return -EFAULT;
-
- fact.dev[DISK_NAME_LEN - 1] = '\0';
-
- if (fact.flags & ~(NVM_FACTORY_NR_BITS - 1))
- return -EINVAL;
-
- return 0;
-}
-
-static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)
-{
- void __user *argp = (void __user *)arg;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- switch (cmd) {
- case NVM_INFO:
- return nvm_ioctl_info(file, argp);
- case NVM_GET_DEVICES:
- return nvm_ioctl_get_devices(file, argp);
- case NVM_DEV_CREATE:
- return nvm_ioctl_dev_create(file, argp);
- case NVM_DEV_REMOVE:
- return nvm_ioctl_dev_remove(file, argp);
- case NVM_DEV_INIT:
- return nvm_ioctl_dev_init(file, argp);
- case NVM_DEV_FACTORY:
- return nvm_ioctl_dev_factory(file, argp);
- }
- return 0;
-}
-
-static const struct file_operations _ctl_fops = {
- .open = nonseekable_open,
- .unlocked_ioctl = nvm_ctl_ioctl,
- .owner = THIS_MODULE,
- .llseek = noop_llseek,
-};
-
-static struct miscdevice _nvm_misc = {
- .minor = MISC_DYNAMIC_MINOR,
- .name = "lightnvm",
- .nodename = "lightnvm/control",
- .fops = &_ctl_fops,
-};
-builtin_misc_device(_nvm_misc);
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
deleted file mode 100644
index f185f1a00008..000000000000
--- a/drivers/lightnvm/pblk-cache.c
+++ /dev/null
@@ -1,137 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-cache.c - pblk's write cache
- */
-
-#include "pblk.h"
-
-void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
- unsigned long flags)
-{
- struct pblk_w_ctx w_ctx;
- sector_t lba = pblk_get_lba(bio);
- unsigned long start_time;
- unsigned int bpos, pos;
- int nr_entries = pblk_get_secs(bio);
- int i, ret;
-
- start_time = bio_start_io_acct(bio);
-
- /* Update the write buffer head (mem) with the entries that we can
- * write. The write in itself cannot fail, so there is no need to
- * rollback from here on.
- */
-retry:
- ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
- switch (ret) {
- case NVM_IO_REQUEUE:
- io_schedule();
- goto retry;
- case NVM_IO_ERR:
- pblk_pipeline_stop(pblk);
- bio_io_error(bio);
- goto out;
- }
-
- pblk_ppa_set_empty(&w_ctx.ppa);
- w_ctx.flags = flags;
- if (bio->bi_opf & REQ_PREFLUSH) {
- w_ctx.flags |= PBLK_FLUSH_ENTRY;
- pblk_write_kick(pblk);
- }
-
- if (unlikely(!bio_has_data(bio)))
- goto out;
-
- for (i = 0; i < nr_entries; i++) {
- void *data = bio_data(bio);
-
- w_ctx.lba = lba + i;
-
- pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + i);
- pblk_rb_write_entry_user(&pblk->rwb, data, w_ctx, pos);
-
- bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
- }
-
- atomic64_add(nr_entries, &pblk->user_wa);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(nr_entries, &pblk->inflight_writes);
- atomic_long_add(nr_entries, &pblk->req_writes);
-#endif
-
- pblk_rl_inserted(&pblk->rl, nr_entries);
-
-out:
- bio_end_io_acct(bio, start_time);
- pblk_write_should_kick(pblk);
-
- if (ret == NVM_IO_DONE)
- bio_endio(bio);
-}
-
-/*
- * On GC the incoming lbas are not necessarily sequential. Also, some of the
- * lbas might not be valid entries, which are marked as empty by the GC thread
- */
-int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
-{
- struct pblk_w_ctx w_ctx;
- unsigned int bpos, pos;
- void *data = gc_rq->data;
- int i, valid_entries;
-
- /* Update the write buffer head (mem) with the entries that we can
- * write. The write in itself cannot fail, so there is no need to
- * rollback from here on.
- */
-retry:
- if (!pblk_rb_may_write_gc(&pblk->rwb, gc_rq->secs_to_gc, &bpos)) {
- io_schedule();
- goto retry;
- }
-
- w_ctx.flags = PBLK_IOTYPE_GC;
- pblk_ppa_set_empty(&w_ctx.ppa);
-
- for (i = 0, valid_entries = 0; i < gc_rq->nr_secs; i++) {
- if (gc_rq->lba_list[i] == ADDR_EMPTY)
- continue;
-
- w_ctx.lba = gc_rq->lba_list[i];
-
- pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries);
- pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_rq->line,
- gc_rq->paddr_list[i], pos);
-
- data += PBLK_EXPOSED_PAGE_SIZE;
- valid_entries++;
- }
-
- WARN_ONCE(gc_rq->secs_to_gc != valid_entries,
- "pblk: inconsistent GC write\n");
-
- atomic64_add(valid_entries, &pblk->gc_wa);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(valid_entries, &pblk->inflight_writes);
- atomic_long_add(valid_entries, &pblk->recov_gc_writes);
-#endif
-
- pblk_write_should_kick(pblk);
- return NVM_IO_OK;
-}
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
deleted file mode 100644
index 33d39d3dd343..000000000000
--- a/drivers/lightnvm/pblk-core.c
+++ /dev/null
@@ -1,2151 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-core.c - pblk's core functionality
- *
- */
-
-#define CREATE_TRACE_POINTS
-
-#include "pblk.h"
-#include "pblk-trace.h"
-
-static void pblk_line_mark_bb(struct work_struct *work)
-{
- struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
- ws);
- struct pblk *pblk = line_ws->pblk;
- struct nvm_tgt_dev *dev = pblk->dev;
- struct ppa_addr *ppa = line_ws->priv;
- int ret;
-
- ret = nvm_set_chunk_meta(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
- if (ret) {
- struct pblk_line *line;
- int pos;
-
- line = pblk_ppa_to_line(pblk, *ppa);
- pos = pblk_ppa_to_pos(&dev->geo, *ppa);
-
- pblk_err(pblk, "failed to mark bb, line:%d, pos:%d\n",
- line->id, pos);
- }
-
- kfree(ppa);
- mempool_free(line_ws, &pblk->gen_ws_pool);
-}
-
-static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
- struct ppa_addr ppa_addr)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct ppa_addr *ppa;
- int pos = pblk_ppa_to_pos(geo, ppa_addr);
-
- pblk_debug(pblk, "erase failed: line:%d, pos:%d\n", line->id, pos);
- atomic_long_inc(&pblk->erase_failed);
-
- atomic_dec(&line->blk_in_line);
- if (test_and_set_bit(pos, line->blk_bitmap))
- pblk_err(pblk, "attempted to erase bb: line:%d, pos:%d\n",
- line->id, pos);
-
- /* Not necessary to mark bad blocks on 2.0 spec. */
- if (geo->version == NVM_OCSSD_SPEC_20)
- return;
-
- ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC);
- if (!ppa)
- return;
-
- *ppa = ppa_addr;
- pblk_gen_run_ws(pblk, NULL, ppa, pblk_line_mark_bb,
- GFP_ATOMIC, pblk->bb_wq);
-}
-
-static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct nvm_chk_meta *chunk;
- struct pblk_line *line;
- int pos;
-
- line = pblk_ppa_to_line(pblk, rqd->ppa_addr);
- pos = pblk_ppa_to_pos(geo, rqd->ppa_addr);
- chunk = &line->chks[pos];
-
- atomic_dec(&line->left_seblks);
-
- if (rqd->error) {
- trace_pblk_chunk_reset(pblk_disk_name(pblk),
- &rqd->ppa_addr, PBLK_CHUNK_RESET_FAILED);
-
- chunk->state = NVM_CHK_ST_OFFLINE;
- pblk_mark_bb(pblk, line, rqd->ppa_addr);
- } else {
- trace_pblk_chunk_reset(pblk_disk_name(pblk),
- &rqd->ppa_addr, PBLK_CHUNK_RESET_DONE);
-
- chunk->state = NVM_CHK_ST_FREE;
- }
-
- trace_pblk_chunk_state(pblk_disk_name(pblk), &rqd->ppa_addr,
- chunk->state);
-
- atomic_dec(&pblk->inflight_io);
-}
-
-/* Erase completion assumes that only one block is erased at the time */
-static void pblk_end_io_erase(struct nvm_rq *rqd)
-{
- struct pblk *pblk = rqd->private;
-
- __pblk_end_io_erase(pblk, rqd);
- mempool_free(rqd, &pblk->e_rq_pool);
-}
-
-/*
- * Get information for all chunks from the device.
- *
- * The caller is responsible for freeing (vmalloc) the returned structure
- */
-struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct nvm_chk_meta *meta;
- struct ppa_addr ppa;
- unsigned long len;
- int ret;
-
- ppa.ppa = 0;
-
- len = geo->all_chunks * sizeof(*meta);
- meta = vzalloc(len);
- if (!meta)
- return ERR_PTR(-ENOMEM);
-
- ret = nvm_get_chunk_meta(dev, ppa, geo->all_chunks, meta);
- if (ret) {
- vfree(meta);
- return ERR_PTR(-EIO);
- }
-
- return meta;
-}
-
-struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk,
- struct nvm_chk_meta *meta,
- struct ppa_addr ppa)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int ch_off = ppa.m.grp * geo->num_chk * geo->num_lun;
- int lun_off = ppa.m.pu * geo->num_chk;
- int chk_off = ppa.m.chk;
-
- return meta + ch_off + lun_off + chk_off;
-}
-
-void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct list_head *move_list = NULL;
-
- /* Lines being reclaimed (GC'ed) cannot be invalidated. Before the L2P
- * table is modified with reclaimed sectors, a check is done to endure
- * that newer updates are not overwritten.
- */
- spin_lock(&line->lock);
- WARN_ON(line->state == PBLK_LINESTATE_FREE);
-
- if (test_and_set_bit(paddr, line->invalid_bitmap)) {
- WARN_ONCE(1, "pblk: double invalidate\n");
- spin_unlock(&line->lock);
- return;
- }
- le32_add_cpu(line->vsc, -1);
-
- if (line->state == PBLK_LINESTATE_CLOSED)
- move_list = pblk_line_gc_list(pblk, line);
- spin_unlock(&line->lock);
-
- if (move_list) {
- spin_lock(&l_mg->gc_lock);
- spin_lock(&line->lock);
- /* Prevent moving a line that has just been chosen for GC */
- if (line->state == PBLK_LINESTATE_GC) {
- spin_unlock(&line->lock);
- spin_unlock(&l_mg->gc_lock);
- return;
- }
- spin_unlock(&line->lock);
-
- list_move_tail(&line->list, move_list);
- spin_unlock(&l_mg->gc_lock);
- }
-}
-
-void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
-{
- struct pblk_line *line;
- u64 paddr;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Callers must ensure that the ppa points to a device address */
- BUG_ON(pblk_addr_in_cache(ppa));
- BUG_ON(pblk_ppa_empty(ppa));
-#endif
-
- line = pblk_ppa_to_line(pblk, ppa);
- paddr = pblk_dev_ppa_to_line_addr(pblk, ppa);
-
- __pblk_map_invalidate(pblk, line, paddr);
-}
-
-static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
- unsigned int nr_secs)
-{
- sector_t lba;
-
- spin_lock(&pblk->trans_lock);
- for (lba = slba; lba < slba + nr_secs; lba++) {
- struct ppa_addr ppa;
-
- ppa = pblk_trans_map_get(pblk, lba);
-
- if (!pblk_addr_in_cache(ppa) && !pblk_ppa_empty(ppa))
- pblk_map_invalidate(pblk, ppa);
-
- pblk_ppa_set_empty(&ppa);
- pblk_trans_map_set(pblk, lba, ppa);
- }
- spin_unlock(&pblk->trans_lock);
-}
-
-int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
-
- rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd->dma_meta_list);
- if (!rqd->meta_list)
- return -ENOMEM;
-
- if (rqd->nr_ppas == 1)
- return 0;
-
- rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size(pblk);
- rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size(pblk);
-
- return 0;
-}
-
-void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
-
- if (rqd->meta_list)
- nvm_dev_dma_free(dev->parent, rqd->meta_list,
- rqd->dma_meta_list);
-}
-
-/* Caller must guarantee that the request is a valid type */
-struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type)
-{
- mempool_t *pool;
- struct nvm_rq *rqd;
- int rq_size;
-
- switch (type) {
- case PBLK_WRITE:
- case PBLK_WRITE_INT:
- pool = &pblk->w_rq_pool;
- rq_size = pblk_w_rq_size;
- break;
- case PBLK_READ:
- pool = &pblk->r_rq_pool;
- rq_size = pblk_g_rq_size;
- break;
- default:
- pool = &pblk->e_rq_pool;
- rq_size = pblk_g_rq_size;
- }
-
- rqd = mempool_alloc(pool, GFP_KERNEL);
- memset(rqd, 0, rq_size);
-
- return rqd;
-}
-
-/* Typically used on completion path. Cannot guarantee request consistency */
-void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type)
-{
- mempool_t *pool;
-
- switch (type) {
- case PBLK_WRITE:
- kfree(((struct pblk_c_ctx *)nvm_rq_to_pdu(rqd))->lun_bitmap);
- fallthrough;
- case PBLK_WRITE_INT:
- pool = &pblk->w_rq_pool;
- break;
- case PBLK_READ:
- pool = &pblk->r_rq_pool;
- break;
- case PBLK_ERASE:
- pool = &pblk->e_rq_pool;
- break;
- default:
- pblk_err(pblk, "trying to free unknown rqd type\n");
- return;
- }
-
- pblk_free_rqd_meta(pblk, rqd);
- mempool_free(rqd, pool);
-}
-
-void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
- int nr_pages)
-{
- struct bio_vec *bv;
- struct page *page;
- int i, e, nbv = 0;
-
- for (i = 0; i < bio->bi_vcnt; i++) {
- bv = &bio->bi_io_vec[i];
- page = bv->bv_page;
- for (e = 0; e < bv->bv_len; e += PBLK_EXPOSED_PAGE_SIZE, nbv++)
- if (nbv >= off)
- mempool_free(page++, &pblk->page_bio_pool);
- }
-}
-
-int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
- int nr_pages)
-{
- struct request_queue *q = pblk->dev->q;
- struct page *page;
- int i, ret;
-
- for (i = 0; i < nr_pages; i++) {
- page = mempool_alloc(&pblk->page_bio_pool, flags);
-
- ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0);
- if (ret != PBLK_EXPOSED_PAGE_SIZE) {
- pblk_err(pblk, "could not add page to bio\n");
- mempool_free(page, &pblk->page_bio_pool);
- goto err;
- }
- }
-
- return 0;
-err:
- pblk_bio_free_pages(pblk, bio, (bio->bi_vcnt - i), i);
- return -1;
-}
-
-void pblk_write_kick(struct pblk *pblk)
-{
- wake_up_process(pblk->writer_ts);
- mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000));
-}
-
-void pblk_write_timer_fn(struct timer_list *t)
-{
- struct pblk *pblk = from_timer(pblk, t, wtimer);
-
- /* kick the write thread every tick to flush outstanding data */
- pblk_write_kick(pblk);
-}
-
-void pblk_write_should_kick(struct pblk *pblk)
-{
- unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb);
-
- if (secs_avail >= pblk->min_write_pgs_data)
- pblk_write_kick(pblk);
-}
-
-static void pblk_wait_for_meta(struct pblk *pblk)
-{
- do {
- if (!atomic_read(&pblk->inflight_io))
- break;
-
- schedule();
- } while (1);
-}
-
-static void pblk_flush_writer(struct pblk *pblk)
-{
- pblk_rb_flush(&pblk->rwb);
- do {
- if (!pblk_rb_sync_count(&pblk->rwb))
- break;
-
- pblk_write_kick(pblk);
- schedule();
- } while (1);
-}
-
-struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct list_head *move_list = NULL;
- int packed_meta = (le32_to_cpu(*line->vsc) / pblk->min_write_pgs_data)
- * (pblk->min_write_pgs - pblk->min_write_pgs_data);
- int vsc = le32_to_cpu(*line->vsc) + packed_meta;
-
- lockdep_assert_held(&line->lock);
-
- if (line->w_err_gc->has_write_err) {
- if (line->gc_group != PBLK_LINEGC_WERR) {
- line->gc_group = PBLK_LINEGC_WERR;
- move_list = &l_mg->gc_werr_list;
- pblk_rl_werr_line_in(&pblk->rl);
- }
- } else if (!vsc) {
- if (line->gc_group != PBLK_LINEGC_FULL) {
- line->gc_group = PBLK_LINEGC_FULL;
- move_list = &l_mg->gc_full_list;
- }
- } else if (vsc < lm->high_thrs) {
- if (line->gc_group != PBLK_LINEGC_HIGH) {
- line->gc_group = PBLK_LINEGC_HIGH;
- move_list = &l_mg->gc_high_list;
- }
- } else if (vsc < lm->mid_thrs) {
- if (line->gc_group != PBLK_LINEGC_MID) {
- line->gc_group = PBLK_LINEGC_MID;
- move_list = &l_mg->gc_mid_list;
- }
- } else if (vsc < line->sec_in_line) {
- if (line->gc_group != PBLK_LINEGC_LOW) {
- line->gc_group = PBLK_LINEGC_LOW;
- move_list = &l_mg->gc_low_list;
- }
- } else if (vsc == line->sec_in_line) {
- if (line->gc_group != PBLK_LINEGC_EMPTY) {
- line->gc_group = PBLK_LINEGC_EMPTY;
- move_list = &l_mg->gc_empty_list;
- }
- } else {
- line->state = PBLK_LINESTATE_CORRUPT;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
-
- line->gc_group = PBLK_LINEGC_NONE;
- move_list = &l_mg->corrupt_list;
- pblk_err(pblk, "corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
- line->id, vsc,
- line->sec_in_line,
- lm->high_thrs, lm->mid_thrs);
- }
-
- return move_list;
-}
-
-void pblk_discard(struct pblk *pblk, struct bio *bio)
-{
- sector_t slba = pblk_get_lba(bio);
- sector_t nr_secs = pblk_get_secs(bio);
-
- pblk_invalidate_range(pblk, slba, nr_secs);
-}
-
-void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd)
-{
- atomic_long_inc(&pblk->write_failed);
-#ifdef CONFIG_NVM_PBLK_DEBUG
- pblk_print_failed_rqd(pblk, rqd, rqd->error);
-#endif
-}
-
-void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
-{
- /* Empty page read is not necessarily an error (e.g., L2P recovery) */
- if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
- atomic_long_inc(&pblk->read_empty);
- return;
- }
-
- switch (rqd->error) {
- case NVM_RSP_WARN_HIGHECC:
- atomic_long_inc(&pblk->read_high_ecc);
- break;
- case NVM_RSP_ERR_FAILECC:
- case NVM_RSP_ERR_FAILCRC:
- atomic_long_inc(&pblk->read_failed);
- break;
- default:
- pblk_err(pblk, "unknown read error:%d\n", rqd->error);
- }
-#ifdef CONFIG_NVM_PBLK_DEBUG
- pblk_print_failed_rqd(pblk, rqd, rqd->error);
-#endif
-}
-
-void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
-{
- pblk->sec_per_write = sec_per_write;
-}
-
-int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd, void *buf)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
-
- atomic_inc(&pblk->inflight_io);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- if (pblk_check_io(pblk, rqd))
- return NVM_IO_ERR;
-#endif
-
- return nvm_submit_io(dev, rqd, buf);
-}
-
-void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
-
- int i;
-
- for (i = 0; i < rqd->nr_ppas; i++) {
- struct ppa_addr *ppa = &ppa_list[i];
- struct nvm_chk_meta *chunk = pblk_dev_ppa_to_chunk(pblk, *ppa);
- u64 caddr = pblk_dev_ppa_to_chunk_addr(pblk, *ppa);
-
- if (caddr == 0)
- trace_pblk_chunk_state(pblk_disk_name(pblk),
- ppa, NVM_CHK_ST_OPEN);
- else if (caddr == (chunk->cnlb - 1))
- trace_pblk_chunk_state(pblk_disk_name(pblk),
- ppa, NVM_CHK_ST_CLOSED);
- }
-}
-
-int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd, void *buf)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- int ret;
-
- atomic_inc(&pblk->inflight_io);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- if (pblk_check_io(pblk, rqd))
- return NVM_IO_ERR;
-#endif
-
- ret = nvm_submit_io_sync(dev, rqd, buf);
-
- if (trace_pblk_chunk_state_enabled() && !ret &&
- rqd->opcode == NVM_OP_PWRITE)
- pblk_check_chunk_state_update(pblk, rqd);
-
- return ret;
-}
-
-static int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd,
- void *buf)
-{
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
- int ret;
-
- pblk_down_chunk(pblk, ppa_list[0]);
- ret = pblk_submit_io_sync(pblk, rqd, buf);
- pblk_up_chunk(pblk, ppa_list[0]);
-
- return ret;
-}
-
-int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
- unsigned long secs_to_flush, bool skip_meta)
-{
- int max = pblk->sec_per_write;
- int min = pblk->min_write_pgs;
- int secs_to_sync = 0;
-
- if (skip_meta && pblk->min_write_pgs_data != pblk->min_write_pgs)
- min = max = pblk->min_write_pgs_data;
-
- if (secs_avail >= max)
- secs_to_sync = max;
- else if (secs_avail >= min)
- secs_to_sync = min * (secs_avail / min);
- else if (secs_to_flush)
- secs_to_sync = min;
-
- return secs_to_sync;
-}
-
-void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
-{
- u64 addr;
- int i;
-
- spin_lock(&line->lock);
- addr = find_next_zero_bit(line->map_bitmap,
- pblk->lm.sec_per_line, line->cur_sec);
- line->cur_sec = addr - nr_secs;
-
- for (i = 0; i < nr_secs; i++, line->cur_sec--)
- WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
- spin_unlock(&line->lock);
-}
-
-u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
-{
- u64 addr;
- int i;
-
- lockdep_assert_held(&line->lock);
-
- /* logic error: ppa out-of-bounds. Prevent generating bad address */
- if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
- WARN(1, "pblk: page allocation out of bounds\n");
- nr_secs = pblk->lm.sec_per_line - line->cur_sec;
- }
-
- line->cur_sec = addr = find_next_zero_bit(line->map_bitmap,
- pblk->lm.sec_per_line, line->cur_sec);
- for (i = 0; i < nr_secs; i++, line->cur_sec++)
- WARN_ON(test_and_set_bit(line->cur_sec, line->map_bitmap));
-
- return addr;
-}
-
-u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
-{
- u64 addr;
-
- /* Lock needed in case a write fails and a recovery needs to remap
- * failed write buffer entries
- */
- spin_lock(&line->lock);
- addr = __pblk_alloc_page(pblk, line, nr_secs);
- line->left_msecs -= nr_secs;
- WARN(line->left_msecs < 0, "pblk: page allocation out of bounds\n");
- spin_unlock(&line->lock);
-
- return addr;
-}
-
-u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
-{
- u64 paddr;
-
- spin_lock(&line->lock);
- paddr = find_next_zero_bit(line->map_bitmap,
- pblk->lm.sec_per_line, line->cur_sec);
- spin_unlock(&line->lock);
-
- return paddr;
-}
-
-u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- int bit;
-
- /* This usually only happens on bad lines */
- bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
- if (bit >= lm->blk_per_line)
- return -1;
-
- return bit * geo->ws_opt;
-}
-
-int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct ppa_addr *ppa_list;
- struct nvm_rq rqd;
- u64 paddr = pblk_line_smeta_start(pblk, line);
- int i, ret;
-
- memset(&rqd, 0, sizeof(struct nvm_rq));
-
- ret = pblk_alloc_rqd_meta(pblk, &rqd);
- if (ret)
- return ret;
-
- rqd.opcode = NVM_OP_PREAD;
- rqd.nr_ppas = lm->smeta_sec;
- rqd.is_seq = 1;
- ppa_list = nvm_rq_to_ppa_list(&rqd);
-
- for (i = 0; i < lm->smeta_sec; i++, paddr++)
- ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
-
- ret = pblk_submit_io_sync(pblk, &rqd, line->smeta);
- if (ret) {
- pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
- goto clear_rqd;
- }
-
- atomic_dec(&pblk->inflight_io);
-
- if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) {
- pblk_log_read_err(pblk, &rqd);
- ret = -EIO;
- }
-
-clear_rqd:
- pblk_free_rqd_meta(pblk, &rqd);
- return ret;
-}
-
-static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
- u64 paddr)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct ppa_addr *ppa_list;
- struct nvm_rq rqd;
- __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
- __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
- int i, ret;
-
- memset(&rqd, 0, sizeof(struct nvm_rq));
-
- ret = pblk_alloc_rqd_meta(pblk, &rqd);
- if (ret)
- return ret;
-
- rqd.opcode = NVM_OP_PWRITE;
- rqd.nr_ppas = lm->smeta_sec;
- rqd.is_seq = 1;
- ppa_list = nvm_rq_to_ppa_list(&rqd);
-
- for (i = 0; i < lm->smeta_sec; i++, paddr++) {
- struct pblk_sec_meta *meta = pblk_get_meta(pblk,
- rqd.meta_list, i);
-
- ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
- meta->lba = lba_list[paddr] = addr_empty;
- }
-
- ret = pblk_submit_io_sync_sem(pblk, &rqd, line->smeta);
- if (ret) {
- pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
- goto clear_rqd;
- }
-
- atomic_dec(&pblk->inflight_io);
-
- if (rqd.error) {
- pblk_log_write_err(pblk, &rqd);
- ret = -EIO;
- }
-
-clear_rqd:
- pblk_free_rqd_meta(pblk, &rqd);
- return ret;
-}
-
-int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
- void *emeta_buf)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- void *ppa_list_buf, *meta_list;
- struct ppa_addr *ppa_list;
- struct nvm_rq rqd;
- u64 paddr = line->emeta_ssec;
- dma_addr_t dma_ppa_list, dma_meta_list;
- int min = pblk->min_write_pgs;
- int left_ppas = lm->emeta_sec[0];
- int line_id = line->id;
- int rq_ppas, rq_len;
- int i, j;
- int ret;
-
- meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &dma_meta_list);
- if (!meta_list)
- return -ENOMEM;
-
- ppa_list_buf = meta_list + pblk_dma_meta_size(pblk);
- dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
-
-next_rq:
- memset(&rqd, 0, sizeof(struct nvm_rq));
-
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
- rq_len = rq_ppas * geo->csecs;
-
- rqd.meta_list = meta_list;
- rqd.ppa_list = ppa_list_buf;
- rqd.dma_meta_list = dma_meta_list;
- rqd.dma_ppa_list = dma_ppa_list;
- rqd.opcode = NVM_OP_PREAD;
- rqd.nr_ppas = rq_ppas;
- ppa_list = nvm_rq_to_ppa_list(&rqd);
-
- for (i = 0; i < rqd.nr_ppas; ) {
- struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id);
- int pos = pblk_ppa_to_pos(geo, ppa);
-
- if (pblk_io_aligned(pblk, rq_ppas))
- rqd.is_seq = 1;
-
- while (test_bit(pos, line->blk_bitmap)) {
- paddr += min;
- if (pblk_boundary_paddr_checks(pblk, paddr)) {
- ret = -EINTR;
- goto free_rqd_dma;
- }
-
- ppa = addr_to_gen_ppa(pblk, paddr, line_id);
- pos = pblk_ppa_to_pos(geo, ppa);
- }
-
- if (pblk_boundary_paddr_checks(pblk, paddr + min)) {
- ret = -EINTR;
- goto free_rqd_dma;
- }
-
- for (j = 0; j < min; j++, i++, paddr++)
- ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id);
- }
-
- ret = pblk_submit_io_sync(pblk, &rqd, emeta_buf);
- if (ret) {
- pblk_err(pblk, "emeta I/O submission failed: %d\n", ret);
- goto free_rqd_dma;
- }
-
- atomic_dec(&pblk->inflight_io);
-
- if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) {
- pblk_log_read_err(pblk, &rqd);
- ret = -EIO;
- goto free_rqd_dma;
- }
-
- emeta_buf += rq_len;
- left_ppas -= rq_ppas;
- if (left_ppas)
- goto next_rq;
-
-free_rqd_dma:
- nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
- return ret;
-}
-
-static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
- struct ppa_addr ppa)
-{
- rqd->opcode = NVM_OP_ERASE;
- rqd->ppa_addr = ppa;
- rqd->nr_ppas = 1;
- rqd->is_seq = 1;
- rqd->bio = NULL;
-}
-
-static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
-{
- struct nvm_rq rqd = {NULL};
- int ret;
-
- trace_pblk_chunk_reset(pblk_disk_name(pblk), &ppa,
- PBLK_CHUNK_RESET_START);
-
- pblk_setup_e_rq(pblk, &rqd, ppa);
-
- /* The write thread schedules erases so that it minimizes disturbances
- * with writes. Thus, there is no need to take the LUN semaphore.
- */
- ret = pblk_submit_io_sync(pblk, &rqd, NULL);
- rqd.private = pblk;
- __pblk_end_io_erase(pblk, &rqd);
-
- return ret;
-}
-
-int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct ppa_addr ppa;
- int ret, bit = -1;
-
- /* Erase only good blocks, one at a time */
- do {
- spin_lock(&line->lock);
- bit = find_next_zero_bit(line->erase_bitmap, lm->blk_per_line,
- bit + 1);
- if (bit >= lm->blk_per_line) {
- spin_unlock(&line->lock);
- break;
- }
-
- ppa = pblk->luns[bit].bppa; /* set ch and lun */
- ppa.a.blk = line->id;
-
- atomic_dec(&line->left_eblks);
- WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
- spin_unlock(&line->lock);
-
- ret = pblk_blk_erase_sync(pblk, ppa);
- if (ret) {
- pblk_err(pblk, "failed to erase line %d\n", line->id);
- return ret;
- }
- } while (1);
-
- return 0;
-}
-
-static void pblk_line_setup_metadata(struct pblk_line *line,
- struct pblk_line_mgmt *l_mg,
- struct pblk_line_meta *lm)
-{
- int meta_line;
-
- lockdep_assert_held(&l_mg->free_lock);
-
-retry_meta:
- meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- if (meta_line == PBLK_DATA_LINES) {
- spin_unlock(&l_mg->free_lock);
- io_schedule();
- spin_lock(&l_mg->free_lock);
- goto retry_meta;
- }
-
- set_bit(meta_line, &l_mg->meta_bitmap);
- line->meta_line = meta_line;
-
- line->smeta = l_mg->sline_meta[meta_line];
- line->emeta = l_mg->eline_meta[meta_line];
-
- memset(line->smeta, 0, lm->smeta_len);
- memset(line->emeta->buf, 0, lm->emeta_len[0]);
-
- line->emeta->mem = 0;
- atomic_set(&line->emeta->sync, 0);
-}
-
-/* For now lines are always assumed full lines. Thus, smeta former and current
- * lun bitmaps are omitted.
- */
-static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
- struct pblk_line *cur)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_emeta *emeta = line->emeta;
- struct line_emeta *emeta_buf = emeta->buf;
- struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
- int nr_blk_line;
-
- /* After erasing the line, new bad blocks might appear and we risk
- * having an invalid line
- */
- nr_blk_line = lm->blk_per_line -
- bitmap_weight(line->blk_bitmap, lm->blk_per_line);
- if (nr_blk_line < lm->min_blk_line) {
- spin_lock(&l_mg->free_lock);
- spin_lock(&line->lock);
- line->state = PBLK_LINESTATE_BAD;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- spin_unlock(&line->lock);
-
- list_add_tail(&line->list, &l_mg->bad_list);
- spin_unlock(&l_mg->free_lock);
-
- pblk_debug(pblk, "line %d is bad\n", line->id);
-
- return 0;
- }
-
- /* Run-time metadata */
- line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);
-
- /* Mark LUNs allocated in this line (all for now) */
- bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
-
- smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
- export_guid(smeta_buf->header.uuid, &pblk->instance_uuid);
- smeta_buf->header.id = cpu_to_le32(line->id);
- smeta_buf->header.type = cpu_to_le16(line->type);
- smeta_buf->header.version_major = SMETA_VERSION_MAJOR;
- smeta_buf->header.version_minor = SMETA_VERSION_MINOR;
-
- /* Start metadata */
- smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
- smeta_buf->window_wr_lun = cpu_to_le32(geo->all_luns);
-
- /* Fill metadata among lines */
- if (cur) {
- memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
- smeta_buf->prev_id = cpu_to_le32(cur->id);
- cur->emeta->buf->next_id = cpu_to_le32(line->id);
- } else {
- smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
- }
-
- /* All smeta must be set at this point */
- smeta_buf->header.crc = cpu_to_le32(
- pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
- smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));
-
- /* End metadata */
- memcpy(&emeta_buf->header, &smeta_buf->header,
- sizeof(struct line_header));
-
- emeta_buf->header.version_major = EMETA_VERSION_MAJOR;
- emeta_buf->header.version_minor = EMETA_VERSION_MINOR;
- emeta_buf->header.crc = cpu_to_le32(
- pblk_calc_meta_header_crc(pblk, &emeta_buf->header));
-
- emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
- emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
- emeta_buf->nr_valid_lbas = cpu_to_le64(0);
- emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
- emeta_buf->crc = cpu_to_le32(0);
- emeta_buf->prev_id = smeta_buf->prev_id;
-
- return 1;
-}
-
-static int pblk_line_alloc_bitmaps(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-
- line->map_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL);
- if (!line->map_bitmap)
- return -ENOMEM;
-
- memset(line->map_bitmap, 0, lm->sec_bitmap_len);
-
- /* will be initialized using bb info from map_bitmap */
- line->invalid_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL);
- if (!line->invalid_bitmap) {
- mempool_free(line->map_bitmap, l_mg->bitmap_pool);
- line->map_bitmap = NULL;
- return -ENOMEM;
- }
-
- return 0;
-}
-
-/* For now lines are always assumed full lines. Thus, smeta former and current
- * lun bitmaps are omitted.
- */
-static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
- int init)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- u64 off;
- int bit = -1;
- int emeta_secs;
-
- line->sec_in_line = lm->sec_per_line;
-
- /* Capture bad block information on line mapping bitmaps */
- while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line,
- bit + 1)) < lm->blk_per_line) {
- off = bit * geo->ws_opt;
- bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off,
- lm->sec_per_line);
- bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux,
- lm->sec_per_line);
- line->sec_in_line -= geo->clba;
- }
-
- /* Mark smeta metadata sectors as bad sectors */
- bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
- off = bit * geo->ws_opt;
- bitmap_set(line->map_bitmap, off, lm->smeta_sec);
- line->sec_in_line -= lm->smeta_sec;
- line->cur_sec = off + lm->smeta_sec;
-
- if (init && pblk_line_smeta_write(pblk, line, off)) {
- pblk_debug(pblk, "line smeta I/O failed. Retry\n");
- return 0;
- }
-
- bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
-
- /* Mark emeta metadata sectors as bad sectors. We need to consider bad
- * blocks to make sure that there are enough sectors to store emeta
- */
- emeta_secs = lm->emeta_sec[0];
- off = lm->sec_per_line;
- while (emeta_secs) {
- off -= geo->ws_opt;
- if (!test_bit(off, line->invalid_bitmap)) {
- bitmap_set(line->invalid_bitmap, off, geo->ws_opt);
- emeta_secs -= geo->ws_opt;
- }
- }
-
- line->emeta_ssec = off;
- line->sec_in_line -= lm->emeta_sec[0];
- line->nr_valid_lbas = 0;
- line->left_msecs = line->sec_in_line;
- *line->vsc = cpu_to_le32(line->sec_in_line);
-
- if (lm->sec_per_line - line->sec_in_line !=
- bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
- spin_lock(&line->lock);
- line->state = PBLK_LINESTATE_BAD;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- spin_unlock(&line->lock);
-
- list_add_tail(&line->list, &l_mg->bad_list);
- pblk_err(pblk, "unexpected line %d is bad\n", line->id);
-
- return 0;
- }
-
- return 1;
-}
-
-static int pblk_prepare_new_line(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int blk_to_erase = atomic_read(&line->blk_in_line);
- int i;
-
- for (i = 0; i < lm->blk_per_line; i++) {
- struct pblk_lun *rlun = &pblk->luns[i];
- int pos = pblk_ppa_to_pos(geo, rlun->bppa);
- int state = line->chks[pos].state;
-
- /* Free chunks should not be erased */
- if (state & NVM_CHK_ST_FREE) {
- set_bit(pblk_ppa_to_pos(geo, rlun->bppa),
- line->erase_bitmap);
- blk_to_erase--;
- }
- }
-
- return blk_to_erase;
-}
-
-static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- int blk_in_line = atomic_read(&line->blk_in_line);
- int blk_to_erase;
-
- /* Bad blocks do not need to be erased */
- bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
-
- spin_lock(&line->lock);
-
- /* If we have not written to this line, we need to mark up free chunks
- * as already erased
- */
- if (line->state == PBLK_LINESTATE_NEW) {
- blk_to_erase = pblk_prepare_new_line(pblk, line);
- line->state = PBLK_LINESTATE_FREE;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- } else {
- blk_to_erase = blk_in_line;
- }
-
- if (blk_in_line < lm->min_blk_line) {
- spin_unlock(&line->lock);
- return -EAGAIN;
- }
-
- if (line->state != PBLK_LINESTATE_FREE) {
- WARN(1, "pblk: corrupted line %d, state %d\n",
- line->id, line->state);
- spin_unlock(&line->lock);
- return -EINTR;
- }
-
- line->state = PBLK_LINESTATE_OPEN;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
-
- atomic_set(&line->left_eblks, blk_to_erase);
- atomic_set(&line->left_seblks, blk_to_erase);
-
- line->meta_distance = lm->meta_distance;
- spin_unlock(&line->lock);
-
- kref_init(&line->ref);
- atomic_set(&line->sec_to_update, 0);
-
- return 0;
-}
-
-/* Line allocations in the recovery path are always single threaded */
-int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- int ret;
-
- spin_lock(&l_mg->free_lock);
- l_mg->data_line = line;
- list_del(&line->list);
-
- ret = pblk_line_prepare(pblk, line);
- if (ret) {
- list_add(&line->list, &l_mg->free_list);
- spin_unlock(&l_mg->free_lock);
- return ret;
- }
- spin_unlock(&l_mg->free_lock);
-
- ret = pblk_line_alloc_bitmaps(pblk, line);
- if (ret)
- goto fail;
-
- if (!pblk_line_init_bb(pblk, line, 0)) {
- ret = -EINTR;
- goto fail;
- }
-
- pblk_rl_free_lines_dec(&pblk->rl, line, true);
- return 0;
-
-fail:
- spin_lock(&l_mg->free_lock);
- list_add(&line->list, &l_mg->free_list);
- spin_unlock(&l_mg->free_lock);
-
- return ret;
-}
-
-void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-
- mempool_free(line->map_bitmap, l_mg->bitmap_pool);
- line->map_bitmap = NULL;
- line->smeta = NULL;
- line->emeta = NULL;
-}
-
-static void pblk_line_reinit(struct pblk_line *line)
-{
- *line->vsc = cpu_to_le32(EMPTY_ENTRY);
-
- line->map_bitmap = NULL;
- line->invalid_bitmap = NULL;
- line->smeta = NULL;
- line->emeta = NULL;
-}
-
-void pblk_line_free(struct pblk_line *line)
-{
- struct pblk *pblk = line->pblk;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-
- mempool_free(line->map_bitmap, l_mg->bitmap_pool);
- mempool_free(line->invalid_bitmap, l_mg->bitmap_pool);
-
- pblk_line_reinit(line);
-}
-
-struct pblk_line *pblk_line_get(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *line;
- int ret, bit;
-
- lockdep_assert_held(&l_mg->free_lock);
-
-retry:
- if (list_empty(&l_mg->free_list)) {
- pblk_err(pblk, "no free lines\n");
- return NULL;
- }
-
- line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
- list_del(&line->list);
- l_mg->nr_free_lines--;
-
- bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
- if (unlikely(bit >= lm->blk_per_line)) {
- spin_lock(&line->lock);
- line->state = PBLK_LINESTATE_BAD;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- spin_unlock(&line->lock);
-
- list_add_tail(&line->list, &l_mg->bad_list);
-
- pblk_debug(pblk, "line %d is bad\n", line->id);
- goto retry;
- }
-
- ret = pblk_line_prepare(pblk, line);
- if (ret) {
- switch (ret) {
- case -EAGAIN:
- list_add(&line->list, &l_mg->bad_list);
- goto retry;
- case -EINTR:
- list_add(&line->list, &l_mg->corrupt_list);
- goto retry;
- default:
- pblk_err(pblk, "failed to prepare line %d\n", line->id);
- list_add(&line->list, &l_mg->free_list);
- l_mg->nr_free_lines++;
- return NULL;
- }
- }
-
- return line;
-}
-
-static struct pblk_line *pblk_line_retry(struct pblk *pblk,
- struct pblk_line *line)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *retry_line;
-
-retry:
- spin_lock(&l_mg->free_lock);
- retry_line = pblk_line_get(pblk);
- if (!retry_line) {
- l_mg->data_line = NULL;
- spin_unlock(&l_mg->free_lock);
- return NULL;
- }
-
- retry_line->map_bitmap = line->map_bitmap;
- retry_line->invalid_bitmap = line->invalid_bitmap;
- retry_line->smeta = line->smeta;
- retry_line->emeta = line->emeta;
- retry_line->meta_line = line->meta_line;
-
- pblk_line_reinit(line);
-
- l_mg->data_line = retry_line;
- spin_unlock(&l_mg->free_lock);
-
- pblk_rl_free_lines_dec(&pblk->rl, line, false);
-
- if (pblk_line_erase(pblk, retry_line))
- goto retry;
-
- return retry_line;
-}
-
-static void pblk_set_space_limit(struct pblk *pblk)
-{
- struct pblk_rl *rl = &pblk->rl;
-
- atomic_set(&rl->rb_space, 0);
-}
-
-struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *line;
-
- spin_lock(&l_mg->free_lock);
- line = pblk_line_get(pblk);
- if (!line) {
- spin_unlock(&l_mg->free_lock);
- return NULL;
- }
-
- line->seq_nr = l_mg->d_seq_nr++;
- line->type = PBLK_LINETYPE_DATA;
- l_mg->data_line = line;
-
- pblk_line_setup_metadata(line, l_mg, &pblk->lm);
-
- /* Allocate next line for preparation */
- l_mg->data_next = pblk_line_get(pblk);
- if (!l_mg->data_next) {
- /* If we cannot get a new line, we need to stop the pipeline.
- * Only allow as many writes in as we can store safely and then
- * fail gracefully
- */
- pblk_set_space_limit(pblk);
-
- l_mg->data_next = NULL;
- } else {
- l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
- l_mg->data_next->type = PBLK_LINETYPE_DATA;
- }
- spin_unlock(&l_mg->free_lock);
-
- if (pblk_line_alloc_bitmaps(pblk, line))
- return NULL;
-
- if (pblk_line_erase(pblk, line)) {
- line = pblk_line_retry(pblk, line);
- if (!line)
- return NULL;
- }
-
-retry_setup:
- if (!pblk_line_init_metadata(pblk, line, NULL)) {
- line = pblk_line_retry(pblk, line);
- if (!line)
- return NULL;
-
- goto retry_setup;
- }
-
- if (!pblk_line_init_bb(pblk, line, 1)) {
- line = pblk_line_retry(pblk, line);
- if (!line)
- return NULL;
-
- goto retry_setup;
- }
-
- pblk_rl_free_lines_dec(&pblk->rl, line, true);
-
- return line;
-}
-
-void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa)
-{
- struct pblk_line *line;
-
- line = pblk_ppa_to_line(pblk, ppa);
- kref_put(&line->ref, pblk_line_put_wq);
-}
-
-void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
- int i;
-
- for (i = 0; i < rqd->nr_ppas; i++)
- pblk_ppa_to_line_put(pblk, ppa_list[i]);
-}
-
-static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line)
-{
- lockdep_assert_held(&pblk->l_mg.free_lock);
-
- pblk_set_space_limit(pblk);
- pblk->state = PBLK_STATE_STOPPING;
- trace_pblk_state(pblk_disk_name(pblk), pblk->state);
-}
-
-static void pblk_line_close_meta_sync(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *line, *tline;
- LIST_HEAD(list);
-
- spin_lock(&l_mg->close_lock);
- if (list_empty(&l_mg->emeta_list)) {
- spin_unlock(&l_mg->close_lock);
- return;
- }
-
- list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev);
- spin_unlock(&l_mg->close_lock);
-
- list_for_each_entry_safe(line, tline, &list, list) {
- struct pblk_emeta *emeta = line->emeta;
-
- while (emeta->mem < lm->emeta_len[0]) {
- int ret;
-
- ret = pblk_submit_meta_io(pblk, line);
- if (ret) {
- pblk_err(pblk, "sync meta line %d failed (%d)\n",
- line->id, ret);
- return;
- }
- }
- }
-
- pblk_wait_for_meta(pblk);
- flush_workqueue(pblk->close_wq);
-}
-
-void __pblk_pipeline_flush(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- int ret;
-
- spin_lock(&l_mg->free_lock);
- if (pblk->state == PBLK_STATE_RECOVERING ||
- pblk->state == PBLK_STATE_STOPPED) {
- spin_unlock(&l_mg->free_lock);
- return;
- }
- pblk->state = PBLK_STATE_RECOVERING;
- trace_pblk_state(pblk_disk_name(pblk), pblk->state);
- spin_unlock(&l_mg->free_lock);
-
- pblk_flush_writer(pblk);
- pblk_wait_for_meta(pblk);
-
- ret = pblk_recov_pad(pblk);
- if (ret) {
- pblk_err(pblk, "could not close data on teardown(%d)\n", ret);
- return;
- }
-
- flush_workqueue(pblk->bb_wq);
- pblk_line_close_meta_sync(pblk);
-}
-
-void __pblk_pipeline_stop(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-
- spin_lock(&l_mg->free_lock);
- pblk->state = PBLK_STATE_STOPPED;
- trace_pblk_state(pblk_disk_name(pblk), pblk->state);
- l_mg->data_line = NULL;
- l_mg->data_next = NULL;
- spin_unlock(&l_mg->free_lock);
-}
-
-void pblk_pipeline_stop(struct pblk *pblk)
-{
- __pblk_pipeline_flush(pblk);
- __pblk_pipeline_stop(pblk);
-}
-
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *cur, *new = NULL;
- unsigned int left_seblks;
-
- new = l_mg->data_next;
- if (!new)
- goto out;
-
- spin_lock(&l_mg->free_lock);
- cur = l_mg->data_line;
- l_mg->data_line = new;
-
- pblk_line_setup_metadata(new, l_mg, &pblk->lm);
- spin_unlock(&l_mg->free_lock);
-
-retry_erase:
- left_seblks = atomic_read(&new->left_seblks);
- if (left_seblks) {
- /* If line is not fully erased, erase it */
- if (atomic_read(&new->left_eblks)) {
- if (pblk_line_erase(pblk, new))
- goto out;
- } else {
- io_schedule();
- }
- goto retry_erase;
- }
-
- if (pblk_line_alloc_bitmaps(pblk, new))
- return NULL;
-
-retry_setup:
- if (!pblk_line_init_metadata(pblk, new, cur)) {
- new = pblk_line_retry(pblk, new);
- if (!new)
- goto out;
-
- goto retry_setup;
- }
-
- if (!pblk_line_init_bb(pblk, new, 1)) {
- new = pblk_line_retry(pblk, new);
- if (!new)
- goto out;
-
- goto retry_setup;
- }
-
- pblk_rl_free_lines_dec(&pblk->rl, new, true);
-
- /* Allocate next line for preparation */
- spin_lock(&l_mg->free_lock);
- l_mg->data_next = pblk_line_get(pblk);
- if (!l_mg->data_next) {
- /* If we cannot get a new line, we need to stop the pipeline.
- * Only allow as many writes in as we can store safely and then
- * fail gracefully
- */
- pblk_stop_writes(pblk, new);
- l_mg->data_next = NULL;
- } else {
- l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
- l_mg->data_next->type = PBLK_LINETYPE_DATA;
- }
- spin_unlock(&l_mg->free_lock);
-
-out:
- return new;
-}
-
-static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_gc *gc = &pblk->gc;
-
- spin_lock(&line->lock);
- WARN_ON(line->state != PBLK_LINESTATE_GC);
- if (line->w_err_gc->has_gc_err) {
- spin_unlock(&line->lock);
- pblk_err(pblk, "line %d had errors during GC\n", line->id);
- pblk_put_line_back(pblk, line);
- line->w_err_gc->has_gc_err = 0;
- return;
- }
-
- line->state = PBLK_LINESTATE_FREE;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- line->gc_group = PBLK_LINEGC_NONE;
- pblk_line_free(line);
-
- if (line->w_err_gc->has_write_err) {
- pblk_rl_werr_line_out(&pblk->rl);
- line->w_err_gc->has_write_err = 0;
- }
-
- spin_unlock(&line->lock);
- atomic_dec(&gc->pipeline_gc);
-
- spin_lock(&l_mg->free_lock);
- list_add_tail(&line->list, &l_mg->free_list);
- l_mg->nr_free_lines++;
- spin_unlock(&l_mg->free_lock);
-
- pblk_rl_free_lines_inc(&pblk->rl, line);
-}
-
-static void pblk_line_put_ws(struct work_struct *work)
-{
- struct pblk_line_ws *line_put_ws = container_of(work,
- struct pblk_line_ws, ws);
- struct pblk *pblk = line_put_ws->pblk;
- struct pblk_line *line = line_put_ws->line;
-
- __pblk_line_put(pblk, line);
- mempool_free(line_put_ws, &pblk->gen_ws_pool);
-}
-
-void pblk_line_put(struct kref *ref)
-{
- struct pblk_line *line = container_of(ref, struct pblk_line, ref);
- struct pblk *pblk = line->pblk;
-
- __pblk_line_put(pblk, line);
-}
-
-void pblk_line_put_wq(struct kref *ref)
-{
- struct pblk_line *line = container_of(ref, struct pblk_line, ref);
- struct pblk *pblk = line->pblk;
- struct pblk_line_ws *line_put_ws;
-
- line_put_ws = mempool_alloc(&pblk->gen_ws_pool, GFP_ATOMIC);
- if (!line_put_ws)
- return;
-
- line_put_ws->pblk = pblk;
- line_put_ws->line = line;
- line_put_ws->priv = NULL;
-
- INIT_WORK(&line_put_ws->ws, pblk_line_put_ws);
- queue_work(pblk->r_end_wq, &line_put_ws->ws);
-}
-
-int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
-{
- struct nvm_rq *rqd;
- int err;
-
- rqd = pblk_alloc_rqd(pblk, PBLK_ERASE);
-
- pblk_setup_e_rq(pblk, rqd, ppa);
-
- rqd->end_io = pblk_end_io_erase;
- rqd->private = pblk;
-
- trace_pblk_chunk_reset(pblk_disk_name(pblk),
- &ppa, PBLK_CHUNK_RESET_START);
-
- /* The write thread schedules erases so that it minimizes disturbances
- * with writes. Thus, there is no need to take the LUN semaphore.
- */
- err = pblk_submit_io(pblk, rqd, NULL);
- if (err) {
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
-
- pblk_err(pblk, "could not async erase line:%d,blk:%d\n",
- pblk_ppa_to_line_id(ppa),
- pblk_ppa_to_pos(geo, ppa));
- }
-
- return err;
-}
-
-struct pblk_line *pblk_line_get_data(struct pblk *pblk)
-{
- return pblk->l_mg.data_line;
-}
-
-/* For now, always erase next line */
-struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
-{
- return pblk->l_mg.data_next;
-}
-
-int pblk_line_is_full(struct pblk_line *line)
-{
- return (line->left_msecs == 0);
-}
-
-static void pblk_line_should_sync_meta(struct pblk *pblk)
-{
- if (pblk_rl_is_limit(&pblk->rl))
- pblk_line_close_meta_sync(pblk);
-}
-
-void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct list_head *move_list;
- int i;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
- "pblk: corrupt closed line %d\n", line->id);
-#endif
-
- spin_lock(&l_mg->free_lock);
- WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
- spin_unlock(&l_mg->free_lock);
-
- spin_lock(&l_mg->gc_lock);
- spin_lock(&line->lock);
- WARN_ON(line->state != PBLK_LINESTATE_OPEN);
- line->state = PBLK_LINESTATE_CLOSED;
- move_list = pblk_line_gc_list(pblk, line);
- list_add_tail(&line->list, move_list);
-
- mempool_free(line->map_bitmap, l_mg->bitmap_pool);
- line->map_bitmap = NULL;
- line->smeta = NULL;
- line->emeta = NULL;
-
- for (i = 0; i < lm->blk_per_line; i++) {
- struct pblk_lun *rlun = &pblk->luns[i];
- int pos = pblk_ppa_to_pos(geo, rlun->bppa);
- int state = line->chks[pos].state;
-
- if (!(state & NVM_CHK_ST_OFFLINE))
- state = NVM_CHK_ST_CLOSED;
- }
-
- spin_unlock(&line->lock);
- spin_unlock(&l_mg->gc_lock);
-
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
-}
-
-void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_emeta *emeta = line->emeta;
- struct line_emeta *emeta_buf = emeta->buf;
- struct wa_counters *wa = emeta_to_wa(lm, emeta_buf);
-
- /* No need for exact vsc value; avoid a big line lock and take aprox. */
- memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
- memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
-
- wa->user = cpu_to_le64(atomic64_read(&pblk->user_wa));
- wa->pad = cpu_to_le64(atomic64_read(&pblk->pad_wa));
- wa->gc = cpu_to_le64(atomic64_read(&pblk->gc_wa));
-
- if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC) {
- emeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
- export_guid(emeta_buf->header.uuid, &pblk->instance_uuid);
- emeta_buf->header.id = cpu_to_le32(line->id);
- emeta_buf->header.type = cpu_to_le16(line->type);
- emeta_buf->header.version_major = EMETA_VERSION_MAJOR;
- emeta_buf->header.version_minor = EMETA_VERSION_MINOR;
- emeta_buf->header.crc = cpu_to_le32(
- pblk_calc_meta_header_crc(pblk, &emeta_buf->header));
- }
-
- emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
- emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
-
- spin_lock(&l_mg->close_lock);
- spin_lock(&line->lock);
-
- /* Update the in-memory start address for emeta, in case it has
- * shifted due to write errors
- */
- if (line->emeta_ssec != line->cur_sec)
- line->emeta_ssec = line->cur_sec;
-
- list_add_tail(&line->list, &l_mg->emeta_list);
- spin_unlock(&line->lock);
- spin_unlock(&l_mg->close_lock);
-
- pblk_line_should_sync_meta(pblk);
-}
-
-static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- unsigned int lba_list_size = lm->emeta_len[2];
- struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
- struct pblk_emeta *emeta = line->emeta;
-
- w_err_gc->lba_list = kvmalloc(lba_list_size, GFP_KERNEL);
- memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
- lba_list_size);
-}
-
-void pblk_line_close_ws(struct work_struct *work)
-{
- struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
- ws);
- struct pblk *pblk = line_ws->pblk;
- struct pblk_line *line = line_ws->line;
- struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
-
- /* Write errors makes the emeta start address stored in smeta invalid,
- * so keep a copy of the lba list until we've gc'd the line
- */
- if (w_err_gc->has_write_err)
- pblk_save_lba_list(pblk, line);
-
- pblk_line_close(pblk, line);
- mempool_free(line_ws, &pblk->gen_ws_pool);
-}
-
-void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
- void (*work)(struct work_struct *), gfp_t gfp_mask,
- struct workqueue_struct *wq)
-{
- struct pblk_line_ws *line_ws;
-
- line_ws = mempool_alloc(&pblk->gen_ws_pool, gfp_mask);
- if (!line_ws) {
- pblk_err(pblk, "pblk: could not allocate memory\n");
- return;
- }
-
- line_ws->pblk = pblk;
- line_ws->line = line;
- line_ws->priv = priv;
-
- INIT_WORK(&line_ws->ws, work);
- queue_work(wq, &line_ws->ws);
-}
-
-static void __pblk_down_chunk(struct pblk *pblk, int pos)
-{
- struct pblk_lun *rlun = &pblk->luns[pos];
- int ret;
-
- /*
- * Only send one inflight I/O per LUN. Since we map at a page
- * granurality, all ppas in the I/O will map to the same LUN
- */
-
- ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000));
- if (ret == -ETIME || ret == -EINTR)
- pblk_err(pblk, "taking lun semaphore timed out: err %d\n",
- -ret);
-}
-
-void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int pos = pblk_ppa_to_pos(geo, ppa);
-
- __pblk_down_chunk(pblk, pos);
-}
-
-void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa,
- unsigned long *lun_bitmap)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int pos = pblk_ppa_to_pos(geo, ppa);
-
- /* If the LUN has been locked for this same request, do no attempt to
- * lock it again
- */
- if (test_and_set_bit(pos, lun_bitmap))
- return;
-
- __pblk_down_chunk(pblk, pos);
-}
-
-void pblk_up_chunk(struct pblk *pblk, struct ppa_addr ppa)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_lun *rlun;
- int pos = pblk_ppa_to_pos(geo, ppa);
-
- rlun = &pblk->luns[pos];
- up(&rlun->wr_sem);
-}
-
-void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_lun *rlun;
- int num_lun = geo->all_luns;
- int bit = -1;
-
- while ((bit = find_next_bit(lun_bitmap, num_lun, bit + 1)) < num_lun) {
- rlun = &pblk->luns[bit];
- up(&rlun->wr_sem);
- }
-}
-
-void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
-{
- struct ppa_addr ppa_l2p;
-
- /* logic error: lba out-of-bounds. Ignore update */
- if (!(lba < pblk->capacity)) {
- WARN(1, "pblk: corrupted L2P map request\n");
- return;
- }
-
- spin_lock(&pblk->trans_lock);
- ppa_l2p = pblk_trans_map_get(pblk, lba);
-
- if (!pblk_addr_in_cache(ppa_l2p) && !pblk_ppa_empty(ppa_l2p))
- pblk_map_invalidate(pblk, ppa_l2p);
-
- pblk_trans_map_set(pblk, lba, ppa);
- spin_unlock(&pblk->trans_lock);
-}
-
-void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
-{
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Callers must ensure that the ppa points to a cache address */
- BUG_ON(!pblk_addr_in_cache(ppa));
- BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
-#endif
-
- pblk_update_map(pblk, lba, ppa);
-}
-
-int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa_new,
- struct pblk_line *gc_line, u64 paddr_gc)
-{
- struct ppa_addr ppa_l2p, ppa_gc;
- int ret = 1;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Callers must ensure that the ppa points to a cache address */
- BUG_ON(!pblk_addr_in_cache(ppa_new));
- BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa_new)));
-#endif
-
- /* logic error: lba out-of-bounds. Ignore update */
- if (!(lba < pblk->capacity)) {
- WARN(1, "pblk: corrupted L2P map request\n");
- return 0;
- }
-
- spin_lock(&pblk->trans_lock);
- ppa_l2p = pblk_trans_map_get(pblk, lba);
- ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, gc_line->id);
-
- if (!pblk_ppa_comp(ppa_l2p, ppa_gc)) {
- spin_lock(&gc_line->lock);
- WARN(!test_bit(paddr_gc, gc_line->invalid_bitmap),
- "pblk: corrupted GC update");
- spin_unlock(&gc_line->lock);
-
- ret = 0;
- goto out;
- }
-
- pblk_trans_map_set(pblk, lba, ppa_new);
-out:
- spin_unlock(&pblk->trans_lock);
- return ret;
-}
-
-void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
- struct ppa_addr ppa_mapped, struct ppa_addr ppa_cache)
-{
- struct ppa_addr ppa_l2p;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Callers must ensure that the ppa points to a device address */
- BUG_ON(pblk_addr_in_cache(ppa_mapped));
-#endif
- /* Invalidate and discard padded entries */
- if (lba == ADDR_EMPTY) {
- atomic64_inc(&pblk->pad_wa);
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_inc(&pblk->padded_wb);
-#endif
- if (!pblk_ppa_empty(ppa_mapped))
- pblk_map_invalidate(pblk, ppa_mapped);
- return;
- }
-
- /* logic error: lba out-of-bounds. Ignore update */
- if (!(lba < pblk->capacity)) {
- WARN(1, "pblk: corrupted L2P map request\n");
- return;
- }
-
- spin_lock(&pblk->trans_lock);
- ppa_l2p = pblk_trans_map_get(pblk, lba);
-
- /* Do not update L2P if the cacheline has been updated. In this case,
- * the mapped ppa must be invalidated
- */
- if (!pblk_ppa_comp(ppa_l2p, ppa_cache)) {
- if (!pblk_ppa_empty(ppa_mapped))
- pblk_map_invalidate(pblk, ppa_mapped);
- goto out;
- }
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- WARN_ON(!pblk_addr_in_cache(ppa_l2p) && !pblk_ppa_empty(ppa_l2p));
-#endif
-
- pblk_trans_map_set(pblk, lba, ppa_mapped);
-out:
- spin_unlock(&pblk->trans_lock);
-}
-
-int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
- sector_t blba, int nr_secs, bool *from_cache)
-{
- int i;
-
- spin_lock(&pblk->trans_lock);
- for (i = 0; i < nr_secs; i++) {
- struct ppa_addr ppa;
-
- ppa = ppas[i] = pblk_trans_map_get(pblk, blba + i);
-
- /* If the L2P entry maps to a line, the reference is valid */
- if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) {
- struct pblk_line *line = pblk_ppa_to_line(pblk, ppa);
-
- if (i > 0 && *from_cache)
- break;
- *from_cache = false;
-
- kref_get(&line->ref);
- } else {
- if (i > 0 && !*from_cache)
- break;
- *from_cache = true;
- }
- }
- spin_unlock(&pblk->trans_lock);
- return i;
-}
-
-void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
- u64 *lba_list, int nr_secs)
-{
- u64 lba;
- int i;
-
- spin_lock(&pblk->trans_lock);
- for (i = 0; i < nr_secs; i++) {
- lba = lba_list[i];
- if (lba != ADDR_EMPTY) {
- /* logic error: lba out-of-bounds. Ignore update */
- if (!(lba < pblk->capacity)) {
- WARN(1, "pblk: corrupted L2P map request\n");
- continue;
- }
- ppas[i] = pblk_trans_map_get(pblk, lba);
- }
- }
- spin_unlock(&pblk->trans_lock);
-}
-
-void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd)
-{
- void *buffer;
-
- if (pblk_is_oob_meta_supported(pblk)) {
- /* Just use OOB metadata buffer as always */
- buffer = rqd->meta_list;
- } else {
- /* We need to reuse last page of request (packed metadata)
- * in similar way as traditional oob metadata
- */
- buffer = page_to_virt(
- rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page);
- }
-
- return buffer;
-}
-
-void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd)
-{
- void *meta_list = rqd->meta_list;
- void *page;
- int i = 0;
-
- if (pblk_is_oob_meta_supported(pblk))
- return;
-
- page = page_to_virt(rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page);
- /* We need to fill oob meta buffer with data from packed metadata */
- for (; i < rqd->nr_ppas; i++)
- memcpy(pblk_get_meta(pblk, meta_list, i),
- page + (i * sizeof(struct pblk_sec_meta)),
- sizeof(struct pblk_sec_meta));
-}
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
deleted file mode 100644
index b31658be35a7..000000000000
--- a/drivers/lightnvm/pblk-gc.c
+++ /dev/null
@@ -1,726 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-gc.c - pblk's garbage collector
- */
-
-#include "pblk.h"
-#include "pblk-trace.h"
-#include <linux/delay.h>
-
-
-static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
-{
- vfree(gc_rq->data);
- kfree(gc_rq);
-}
-
-static int pblk_gc_write(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
- struct pblk_gc_rq *gc_rq, *tgc_rq;
- LIST_HEAD(w_list);
-
- spin_lock(&gc->w_lock);
- if (list_empty(&gc->w_list)) {
- spin_unlock(&gc->w_lock);
- return 1;
- }
-
- list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
- gc->w_entries = 0;
- spin_unlock(&gc->w_lock);
-
- list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
- pblk_write_gc_to_cache(pblk, gc_rq);
- list_del(&gc_rq->list);
- kref_put(&gc_rq->line->ref, pblk_line_put);
- pblk_gc_free_gc_rq(gc_rq);
- }
-
- return 0;
-}
-
-static void pblk_gc_writer_kick(struct pblk_gc *gc)
-{
- wake_up_process(gc->gc_writer_ts);
-}
-
-void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct list_head *move_list;
-
- spin_lock(&l_mg->gc_lock);
- spin_lock(&line->lock);
- WARN_ON(line->state != PBLK_LINESTATE_GC);
- line->state = PBLK_LINESTATE_CLOSED;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
-
- /* We need to reset gc_group in order to ensure that
- * pblk_line_gc_list will return proper move_list
- * since right now current line is not on any of the
- * gc lists.
- */
- line->gc_group = PBLK_LINEGC_NONE;
- move_list = pblk_line_gc_list(pblk, line);
- spin_unlock(&line->lock);
- list_add_tail(&line->list, move_list);
- spin_unlock(&l_mg->gc_lock);
-}
-
-static void pblk_gc_line_ws(struct work_struct *work)
-{
- struct pblk_line_ws *gc_rq_ws = container_of(work,
- struct pblk_line_ws, ws);
- struct pblk *pblk = gc_rq_ws->pblk;
- struct pblk_gc *gc = &pblk->gc;
- struct pblk_line *line = gc_rq_ws->line;
- struct pblk_gc_rq *gc_rq = gc_rq_ws->priv;
- int ret;
-
- up(&gc->gc_sem);
-
- /* Read from GC victim block */
- ret = pblk_submit_read_gc(pblk, gc_rq);
- if (ret) {
- line->w_err_gc->has_gc_err = 1;
- goto out;
- }
-
- if (!gc_rq->secs_to_gc)
- goto out;
-
-retry:
- spin_lock(&gc->w_lock);
- if (gc->w_entries >= PBLK_GC_RQ_QD) {
- spin_unlock(&gc->w_lock);
- pblk_gc_writer_kick(&pblk->gc);
- usleep_range(128, 256);
- goto retry;
- }
- gc->w_entries++;
- list_add_tail(&gc_rq->list, &gc->w_list);
- spin_unlock(&gc->w_lock);
-
- pblk_gc_writer_kick(&pblk->gc);
-
- kfree(gc_rq_ws);
- return;
-
-out:
- pblk_gc_free_gc_rq(gc_rq);
- kref_put(&line->ref, pblk_line_put);
- kfree(gc_rq_ws);
-}
-
-static __le64 *get_lba_list_from_emeta(struct pblk *pblk,
- struct pblk_line *line)
-{
- struct line_emeta *emeta_buf;
- struct pblk_line_meta *lm = &pblk->lm;
- unsigned int lba_list_size = lm->emeta_len[2];
- __le64 *lba_list;
- int ret;
-
- emeta_buf = kvmalloc(lm->emeta_len[0], GFP_KERNEL);
- if (!emeta_buf)
- return NULL;
-
- ret = pblk_line_emeta_read(pblk, line, emeta_buf);
- if (ret) {
- pblk_err(pblk, "line %d read emeta failed (%d)\n",
- line->id, ret);
- kvfree(emeta_buf);
- return NULL;
- }
-
- /* If this read fails, it means that emeta is corrupted.
- * For now, leave the line untouched.
- * TODO: Implement a recovery routine that scans and moves
- * all sectors on the line.
- */
-
- ret = pblk_recov_check_emeta(pblk, emeta_buf);
- if (ret) {
- pblk_err(pblk, "inconsistent emeta (line %d)\n",
- line->id);
- kvfree(emeta_buf);
- return NULL;
- }
-
- lba_list = kvmalloc(lba_list_size, GFP_KERNEL);
-
- if (lba_list)
- memcpy(lba_list, emeta_to_lbas(pblk, emeta_buf), lba_list_size);
-
- kvfree(emeta_buf);
-
- return lba_list;
-}
-
-static void pblk_gc_line_prepare_ws(struct work_struct *work)
-{
- struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
- ws);
- struct pblk *pblk = line_ws->pblk;
- struct pblk_line *line = line_ws->line;
- struct pblk_line_meta *lm = &pblk->lm;
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_gc *gc = &pblk->gc;
- struct pblk_line_ws *gc_rq_ws;
- struct pblk_gc_rq *gc_rq;
- __le64 *lba_list;
- unsigned long *invalid_bitmap;
- int sec_left, nr_secs, bit;
-
- invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
- if (!invalid_bitmap)
- goto fail_free_ws;
-
- if (line->w_err_gc->has_write_err) {
- lba_list = line->w_err_gc->lba_list;
- line->w_err_gc->lba_list = NULL;
- } else {
- lba_list = get_lba_list_from_emeta(pblk, line);
- if (!lba_list) {
- pblk_err(pblk, "could not interpret emeta (line %d)\n",
- line->id);
- goto fail_free_invalid_bitmap;
- }
- }
-
- spin_lock(&line->lock);
- bitmap_copy(invalid_bitmap, line->invalid_bitmap, lm->sec_per_line);
- sec_left = pblk_line_vsc(line);
- spin_unlock(&line->lock);
-
- if (sec_left < 0) {
- pblk_err(pblk, "corrupted GC line (%d)\n", line->id);
- goto fail_free_lba_list;
- }
-
- bit = -1;
-next_rq:
- gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
- if (!gc_rq)
- goto fail_free_lba_list;
-
- nr_secs = 0;
- do {
- bit = find_next_zero_bit(invalid_bitmap, lm->sec_per_line,
- bit + 1);
- if (bit > line->emeta_ssec)
- break;
-
- gc_rq->paddr_list[nr_secs] = bit;
- gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
- } while (nr_secs < pblk->max_write_pgs);
-
- if (unlikely(!nr_secs)) {
- kfree(gc_rq);
- goto out;
- }
-
- gc_rq->nr_secs = nr_secs;
- gc_rq->line = line;
-
- gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs));
- if (!gc_rq->data)
- goto fail_free_gc_rq;
-
- gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
- if (!gc_rq_ws)
- goto fail_free_gc_data;
-
- gc_rq_ws->pblk = pblk;
- gc_rq_ws->line = line;
- gc_rq_ws->priv = gc_rq;
-
- /* The write GC path can be much slower than the read GC one due to
- * the budget imposed by the rate-limiter. Balance in case that we get
- * back pressure from the write GC path.
- */
- while (down_timeout(&gc->gc_sem, msecs_to_jiffies(30000)))
- io_schedule();
-
- kref_get(&line->ref);
-
- INIT_WORK(&gc_rq_ws->ws, pblk_gc_line_ws);
- queue_work(gc->gc_line_reader_wq, &gc_rq_ws->ws);
-
- sec_left -= nr_secs;
- if (sec_left > 0)
- goto next_rq;
-
-out:
- kvfree(lba_list);
- kfree(line_ws);
- kfree(invalid_bitmap);
-
- kref_put(&line->ref, pblk_line_put);
- atomic_dec(&gc->read_inflight_gc);
-
- return;
-
-fail_free_gc_data:
- vfree(gc_rq->data);
-fail_free_gc_rq:
- kfree(gc_rq);
-fail_free_lba_list:
- kvfree(lba_list);
-fail_free_invalid_bitmap:
- kfree(invalid_bitmap);
-fail_free_ws:
- kfree(line_ws);
-
- /* Line goes back to closed state, so we cannot release additional
- * reference for line, since we do that only when we want to do
- * gc to free line state transition.
- */
- pblk_put_line_back(pblk, line);
- atomic_dec(&gc->read_inflight_gc);
-
- pblk_err(pblk, "failed to GC line %d\n", line->id);
-}
-
-static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_gc *gc = &pblk->gc;
- struct pblk_line_ws *line_ws;
-
- pblk_debug(pblk, "line '%d' being reclaimed for GC\n", line->id);
-
- line_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
- if (!line_ws)
- return -ENOMEM;
-
- line_ws->pblk = pblk;
- line_ws->line = line;
-
- atomic_inc(&gc->pipeline_gc);
- INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
- queue_work(gc->gc_reader_wq, &line_ws->ws);
-
- return 0;
-}
-
-static void pblk_gc_reader_kick(struct pblk_gc *gc)
-{
- wake_up_process(gc->gc_reader_ts);
-}
-
-static void pblk_gc_kick(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- pblk_gc_writer_kick(gc);
- pblk_gc_reader_kick(gc);
-
- /* If we're shutting down GC, let's not start it up again */
- if (gc->gc_enabled) {
- wake_up_process(gc->gc_ts);
- mod_timer(&gc->gc_timer,
- jiffies + msecs_to_jiffies(GC_TIME_MSECS));
- }
-}
-
-static int pblk_gc_read(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
- struct pblk_line *line;
-
- spin_lock(&gc->r_lock);
- if (list_empty(&gc->r_list)) {
- spin_unlock(&gc->r_lock);
- return 1;
- }
-
- line = list_first_entry(&gc->r_list, struct pblk_line, list);
- list_del(&line->list);
- spin_unlock(&gc->r_lock);
-
- pblk_gc_kick(pblk);
-
- if (pblk_gc_line(pblk, line)) {
- pblk_err(pblk, "failed to GC line %d\n", line->id);
- /* rollback */
- spin_lock(&gc->r_lock);
- list_add_tail(&line->list, &gc->r_list);
- spin_unlock(&gc->r_lock);
- }
-
- return 0;
-}
-
-static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
- struct list_head *group_list)
-{
- struct pblk_line *line, *victim;
- unsigned int line_vsc = ~0x0L, victim_vsc = ~0x0L;
-
- victim = list_first_entry(group_list, struct pblk_line, list);
-
- list_for_each_entry(line, group_list, list) {
- if (!atomic_read(&line->sec_to_update))
- line_vsc = le32_to_cpu(*line->vsc);
- if (line_vsc < victim_vsc) {
- victim = line;
- victim_vsc = le32_to_cpu(*victim->vsc);
- }
- }
-
- if (victim_vsc == ~0x0)
- return NULL;
-
- return victim;
-}
-
-static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
-{
- unsigned int nr_blocks_free, nr_blocks_need;
- unsigned int werr_lines = atomic_read(&rl->werr_lines);
-
- nr_blocks_need = pblk_rl_high_thrs(rl);
- nr_blocks_free = pblk_rl_nr_free_blks(rl);
-
- /* This is not critical, no need to take lock here */
- return ((werr_lines > 0) ||
- ((gc->gc_active) && (nr_blocks_need > nr_blocks_free)));
-}
-
-void pblk_gc_free_full_lines(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_gc *gc = &pblk->gc;
- struct pblk_line *line;
-
- do {
- spin_lock(&l_mg->gc_lock);
- if (list_empty(&l_mg->gc_full_list)) {
- spin_unlock(&l_mg->gc_lock);
- return;
- }
-
- line = list_first_entry(&l_mg->gc_full_list,
- struct pblk_line, list);
-
- spin_lock(&line->lock);
- WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
- line->state = PBLK_LINESTATE_GC;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- spin_unlock(&line->lock);
-
- list_del(&line->list);
- spin_unlock(&l_mg->gc_lock);
-
- atomic_inc(&gc->pipeline_gc);
- kref_put(&line->ref, pblk_line_put);
- } while (1);
-}
-
-/*
- * Lines with no valid sectors will be returned to the free list immediately. If
- * GC is activated - either because the free block count is under the determined
- * threshold, or because it is being forced from user space - only lines with a
- * high count of invalid sectors will be recycled.
- */
-static void pblk_gc_run(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_gc *gc = &pblk->gc;
- struct pblk_line *line;
- struct list_head *group_list;
- bool run_gc;
- int read_inflight_gc, gc_group = 0, prev_group = 0;
-
- pblk_gc_free_full_lines(pblk);
-
- run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
- if (!run_gc || (atomic_read(&gc->read_inflight_gc) >= PBLK_GC_L_QD))
- return;
-
-next_gc_group:
- group_list = l_mg->gc_lists[gc_group++];
-
- do {
- spin_lock(&l_mg->gc_lock);
-
- line = pblk_gc_get_victim_line(pblk, group_list);
- if (!line) {
- spin_unlock(&l_mg->gc_lock);
- break;
- }
-
- spin_lock(&line->lock);
- WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
- line->state = PBLK_LINESTATE_GC;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- spin_unlock(&line->lock);
-
- list_del(&line->list);
- spin_unlock(&l_mg->gc_lock);
-
- spin_lock(&gc->r_lock);
- list_add_tail(&line->list, &gc->r_list);
- spin_unlock(&gc->r_lock);
-
- read_inflight_gc = atomic_inc_return(&gc->read_inflight_gc);
- pblk_gc_reader_kick(gc);
-
- prev_group = 1;
-
- /* No need to queue up more GC lines than we can handle */
- run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
- if (!run_gc || read_inflight_gc >= PBLK_GC_L_QD)
- break;
- } while (1);
-
- if (!prev_group && pblk->rl.rb_state > gc_group &&
- gc_group < PBLK_GC_NR_LISTS)
- goto next_gc_group;
-}
-
-static void pblk_gc_timer(struct timer_list *t)
-{
- struct pblk *pblk = from_timer(pblk, t, gc.gc_timer);
-
- pblk_gc_kick(pblk);
-}
-
-static int pblk_gc_ts(void *data)
-{
- struct pblk *pblk = data;
-
- while (!kthread_should_stop()) {
- pblk_gc_run(pblk);
- set_current_state(TASK_INTERRUPTIBLE);
- io_schedule();
- }
-
- return 0;
-}
-
-static int pblk_gc_writer_ts(void *data)
-{
- struct pblk *pblk = data;
-
- while (!kthread_should_stop()) {
- if (!pblk_gc_write(pblk))
- continue;
- set_current_state(TASK_INTERRUPTIBLE);
- io_schedule();
- }
-
- return 0;
-}
-
-static int pblk_gc_reader_ts(void *data)
-{
- struct pblk *pblk = data;
- struct pblk_gc *gc = &pblk->gc;
-
- while (!kthread_should_stop()) {
- if (!pblk_gc_read(pblk))
- continue;
- set_current_state(TASK_INTERRUPTIBLE);
- io_schedule();
- }
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- pblk_info(pblk, "flushing gc pipeline, %d lines left\n",
- atomic_read(&gc->pipeline_gc));
-#endif
-
- do {
- if (!atomic_read(&gc->pipeline_gc))
- break;
-
- schedule();
- } while (1);
-
- return 0;
-}
-
-static void pblk_gc_start(struct pblk *pblk)
-{
- pblk->gc.gc_active = 1;
- pblk_debug(pblk, "gc start\n");
-}
-
-void pblk_gc_should_start(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- if (gc->gc_enabled && !gc->gc_active) {
- pblk_gc_start(pblk);
- pblk_gc_kick(pblk);
- }
-}
-
-void pblk_gc_should_stop(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- if (gc->gc_active && !gc->gc_forced)
- gc->gc_active = 0;
-}
-
-void pblk_gc_should_kick(struct pblk *pblk)
-{
- pblk_rl_update_rates(&pblk->rl);
-}
-
-void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
- int *gc_active)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- spin_lock(&gc->lock);
- *gc_enabled = gc->gc_enabled;
- *gc_active = gc->gc_active;
- spin_unlock(&gc->lock);
-}
-
-int pblk_gc_sysfs_force(struct pblk *pblk, int force)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- if (force < 0 || force > 1)
- return -EINVAL;
-
- spin_lock(&gc->lock);
- gc->gc_forced = force;
-
- if (force)
- gc->gc_enabled = 1;
- else
- gc->gc_enabled = 0;
- spin_unlock(&gc->lock);
-
- pblk_gc_should_start(pblk);
-
- return 0;
-}
-
-int pblk_gc_init(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
- int ret;
-
- gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
- if (IS_ERR(gc->gc_ts)) {
- pblk_err(pblk, "could not allocate GC main kthread\n");
- return PTR_ERR(gc->gc_ts);
- }
-
- gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
- "pblk-gc-writer-ts");
- if (IS_ERR(gc->gc_writer_ts)) {
- pblk_err(pblk, "could not allocate GC writer kthread\n");
- ret = PTR_ERR(gc->gc_writer_ts);
- goto fail_free_main_kthread;
- }
-
- gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
- "pblk-gc-reader-ts");
- if (IS_ERR(gc->gc_reader_ts)) {
- pblk_err(pblk, "could not allocate GC reader kthread\n");
- ret = PTR_ERR(gc->gc_reader_ts);
- goto fail_free_writer_kthread;
- }
-
- timer_setup(&gc->gc_timer, pblk_gc_timer, 0);
- mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
-
- gc->gc_active = 0;
- gc->gc_forced = 0;
- gc->gc_enabled = 1;
- gc->w_entries = 0;
- atomic_set(&gc->read_inflight_gc, 0);
- atomic_set(&gc->pipeline_gc, 0);
-
- /* Workqueue that reads valid sectors from a line and submit them to the
- * GC writer to be recycled.
- */
- gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
- if (!gc->gc_line_reader_wq) {
- pblk_err(pblk, "could not allocate GC line reader workqueue\n");
- ret = -ENOMEM;
- goto fail_free_reader_kthread;
- }
-
- /* Workqueue that prepare lines for GC */
- gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
- if (!gc->gc_reader_wq) {
- pblk_err(pblk, "could not allocate GC reader workqueue\n");
- ret = -ENOMEM;
- goto fail_free_reader_line_wq;
- }
-
- spin_lock_init(&gc->lock);
- spin_lock_init(&gc->w_lock);
- spin_lock_init(&gc->r_lock);
-
- sema_init(&gc->gc_sem, PBLK_GC_RQ_QD);
-
- INIT_LIST_HEAD(&gc->w_list);
- INIT_LIST_HEAD(&gc->r_list);
-
- return 0;
-
-fail_free_reader_line_wq:
- destroy_workqueue(gc->gc_line_reader_wq);
-fail_free_reader_kthread:
- kthread_stop(gc->gc_reader_ts);
-fail_free_writer_kthread:
- kthread_stop(gc->gc_writer_ts);
-fail_free_main_kthread:
- kthread_stop(gc->gc_ts);
-
- return ret;
-}
-
-void pblk_gc_exit(struct pblk *pblk, bool graceful)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- gc->gc_enabled = 0;
- del_timer_sync(&gc->gc_timer);
- gc->gc_active = 0;
-
- if (gc->gc_ts)
- kthread_stop(gc->gc_ts);
-
- if (gc->gc_reader_ts)
- kthread_stop(gc->gc_reader_ts);
-
- if (graceful) {
- flush_workqueue(gc->gc_reader_wq);
- flush_workqueue(gc->gc_line_reader_wq);
- }
-
- destroy_workqueue(gc->gc_reader_wq);
- destroy_workqueue(gc->gc_line_reader_wq);
-
- if (gc->gc_writer_ts)
- kthread_stop(gc->gc_writer_ts);
-}
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
deleted file mode 100644
index 5924f09c217b..000000000000
--- a/drivers/lightnvm/pblk-init.c
+++ /dev/null
@@ -1,1324 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2015 IT University of Copenhagen (rrpc.c)
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Implementation of a physical block-device target for Open-channel SSDs.
- *
- * pblk-init.c - pblk's initialization.
- */
-
-#include "pblk.h"
-#include "pblk-trace.h"
-
-static unsigned int write_buffer_size;
-
-module_param(write_buffer_size, uint, 0644);
-MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer");
-
-struct pblk_global_caches {
- struct kmem_cache *ws;
- struct kmem_cache *rec;
- struct kmem_cache *g_rq;
- struct kmem_cache *w_rq;
-
- struct kref kref;
-
- struct mutex mutex; /* Ensures consistency between
- * caches and kref
- */
-};
-
-static struct pblk_global_caches pblk_caches = {
- .mutex = __MUTEX_INITIALIZER(pblk_caches.mutex),
- .kref = KREF_INIT(0),
-};
-
-struct bio_set pblk_bio_set;
-
-static blk_qc_t pblk_submit_bio(struct bio *bio)
-{
- struct pblk *pblk = bio->bi_bdev->bd_disk->queue->queuedata;
-
- if (bio_op(bio) == REQ_OP_DISCARD) {
- pblk_discard(pblk, bio);
- if (!(bio->bi_opf & REQ_PREFLUSH)) {
- bio_endio(bio);
- return BLK_QC_T_NONE;
- }
- }
-
- /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
- * constraint. Writes can be of arbitrary size.
- */
- if (bio_data_dir(bio) == READ) {
- blk_queue_split(&bio);
- pblk_submit_read(pblk, bio);
- } else {
- /* Prevent deadlock in the case of a modest LUN configuration
- * and large user I/Os. Unless stalled, the rate limiter
- * leaves at least 256KB available for user I/O.
- */
- if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl))
- blk_queue_split(&bio);
-
- pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
- }
-
- return BLK_QC_T_NONE;
-}
-
-static const struct block_device_operations pblk_bops = {
- .owner = THIS_MODULE,
- .submit_bio = pblk_submit_bio,
-};
-
-
-static size_t pblk_trans_map_size(struct pblk *pblk)
-{
- int entry_size = 8;
-
- if (pblk->addrf_len < 32)
- entry_size = 4;
-
- return entry_size * pblk->capacity;
-}
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
-static u32 pblk_l2p_crc(struct pblk *pblk)
-{
- size_t map_size;
- u32 crc = ~(u32)0;
-
- map_size = pblk_trans_map_size(pblk);
- crc = crc32_le(crc, pblk->trans_map, map_size);
- return crc;
-}
-#endif
-
-static void pblk_l2p_free(struct pblk *pblk)
-{
- vfree(pblk->trans_map);
-}
-
-static int pblk_l2p_recover(struct pblk *pblk, bool factory_init)
-{
- struct pblk_line *line = NULL;
-
- if (factory_init) {
- guid_gen(&pblk->instance_uuid);
- } else {
- line = pblk_recov_l2p(pblk);
- if (IS_ERR(line)) {
- pblk_err(pblk, "could not recover l2p table\n");
- return -EFAULT;
- }
- }
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- pblk_info(pblk, "init: L2P CRC: %x\n", pblk_l2p_crc(pblk));
-#endif
-
- /* Free full lines directly as GC has not been started yet */
- pblk_gc_free_full_lines(pblk);
-
- if (!line) {
- /* Configure next line for user data */
- line = pblk_line_get_first_data(pblk);
- if (!line)
- return -EFAULT;
- }
-
- return 0;
-}
-
-static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
-{
- sector_t i;
- struct ppa_addr ppa;
- size_t map_size;
- int ret = 0;
-
- map_size = pblk_trans_map_size(pblk);
- pblk->trans_map = __vmalloc(map_size, GFP_KERNEL | __GFP_NOWARN |
- __GFP_RETRY_MAYFAIL | __GFP_HIGHMEM);
- if (!pblk->trans_map) {
- pblk_err(pblk, "failed to allocate L2P (need %zu of memory)\n",
- map_size);
- return -ENOMEM;
- }
-
- pblk_ppa_set_empty(&ppa);
-
- for (i = 0; i < pblk->capacity; i++)
- pblk_trans_map_set(pblk, i, ppa);
-
- ret = pblk_l2p_recover(pblk, factory_init);
- if (ret)
- vfree(pblk->trans_map);
-
- return ret;
-}
-
-static void pblk_rwb_free(struct pblk *pblk)
-{
- if (pblk_rb_tear_down_check(&pblk->rwb))
- pblk_err(pblk, "write buffer error on tear down\n");
-
- pblk_rb_free(&pblk->rwb);
-}
-
-static int pblk_rwb_init(struct pblk *pblk)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- unsigned long buffer_size;
- int pgs_in_buffer, threshold;
-
- threshold = geo->mw_cunits * geo->all_luns;
- pgs_in_buffer = (max(geo->mw_cunits, geo->ws_opt) + geo->ws_opt)
- * geo->all_luns;
-
- if (write_buffer_size && (write_buffer_size > pgs_in_buffer))
- buffer_size = write_buffer_size;
- else
- buffer_size = pgs_in_buffer;
-
- return pblk_rb_init(&pblk->rwb, buffer_size, threshold, geo->csecs);
-}
-
-static int pblk_set_addrf_12(struct pblk *pblk, struct nvm_geo *geo,
- struct nvm_addrf_12 *dst)
-{
- struct nvm_addrf_12 *src = (struct nvm_addrf_12 *)&geo->addrf;
- int power_len;
-
- /* Re-calculate channel and lun format to adapt to configuration */
- power_len = get_count_order(geo->num_ch);
- if (1 << power_len != geo->num_ch) {
- pblk_err(pblk, "supports only power-of-two channel config.\n");
- return -EINVAL;
- }
- dst->ch_len = power_len;
-
- power_len = get_count_order(geo->num_lun);
- if (1 << power_len != geo->num_lun) {
- pblk_err(pblk, "supports only power-of-two LUN config.\n");
- return -EINVAL;
- }
- dst->lun_len = power_len;
-
- dst->blk_len = src->blk_len;
- dst->pg_len = src->pg_len;
- dst->pln_len = src->pln_len;
- dst->sec_len = src->sec_len;
-
- dst->sec_offset = 0;
- dst->pln_offset = dst->sec_len;
- dst->ch_offset = dst->pln_offset + dst->pln_len;
- dst->lun_offset = dst->ch_offset + dst->ch_len;
- dst->pg_offset = dst->lun_offset + dst->lun_len;
- dst->blk_offset = dst->pg_offset + dst->pg_len;
-
- dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset;
- dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset;
- dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
- dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
- dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset;
- dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset;
-
- return dst->blk_offset + src->blk_len;
-}
-
-static int pblk_set_addrf_20(struct nvm_geo *geo, struct nvm_addrf *adst,
- struct pblk_addrf *udst)
-{
- struct nvm_addrf *src = &geo->addrf;
-
- adst->ch_len = get_count_order(geo->num_ch);
- adst->lun_len = get_count_order(geo->num_lun);
- adst->chk_len = src->chk_len;
- adst->sec_len = src->sec_len;
-
- adst->sec_offset = 0;
- adst->ch_offset = adst->sec_len;
- adst->lun_offset = adst->ch_offset + adst->ch_len;
- adst->chk_offset = adst->lun_offset + adst->lun_len;
-
- adst->sec_mask = ((1ULL << adst->sec_len) - 1) << adst->sec_offset;
- adst->chk_mask = ((1ULL << adst->chk_len) - 1) << adst->chk_offset;
- adst->lun_mask = ((1ULL << adst->lun_len) - 1) << adst->lun_offset;
- adst->ch_mask = ((1ULL << adst->ch_len) - 1) << adst->ch_offset;
-
- udst->sec_stripe = geo->ws_opt;
- udst->ch_stripe = geo->num_ch;
- udst->lun_stripe = geo->num_lun;
-
- udst->sec_lun_stripe = udst->sec_stripe * udst->ch_stripe;
- udst->sec_ws_stripe = udst->sec_lun_stripe * udst->lun_stripe;
-
- return adst->chk_offset + adst->chk_len;
-}
-
-static int pblk_set_addrf(struct pblk *pblk)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int mod;
-
- switch (geo->version) {
- case NVM_OCSSD_SPEC_12:
- div_u64_rem(geo->clba, pblk->min_write_pgs, &mod);
- if (mod) {
- pblk_err(pblk, "bad configuration of sectors/pages\n");
- return -EINVAL;
- }
-
- pblk->addrf_len = pblk_set_addrf_12(pblk, geo,
- (void *)&pblk->addrf);
- break;
- case NVM_OCSSD_SPEC_20:
- pblk->addrf_len = pblk_set_addrf_20(geo, (void *)&pblk->addrf,
- &pblk->uaddrf);
- break;
- default:
- pblk_err(pblk, "OCSSD revision not supported (%d)\n",
- geo->version);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int pblk_create_global_caches(void)
-{
-
- pblk_caches.ws = kmem_cache_create("pblk_blk_ws",
- sizeof(struct pblk_line_ws), 0, 0, NULL);
- if (!pblk_caches.ws)
- return -ENOMEM;
-
- pblk_caches.rec = kmem_cache_create("pblk_rec",
- sizeof(struct pblk_rec_ctx), 0, 0, NULL);
- if (!pblk_caches.rec)
- goto fail_destroy_ws;
-
- pblk_caches.g_rq = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
- 0, 0, NULL);
- if (!pblk_caches.g_rq)
- goto fail_destroy_rec;
-
- pblk_caches.w_rq = kmem_cache_create("pblk_w_rq", pblk_w_rq_size,
- 0, 0, NULL);
- if (!pblk_caches.w_rq)
- goto fail_destroy_g_rq;
-
- return 0;
-
-fail_destroy_g_rq:
- kmem_cache_destroy(pblk_caches.g_rq);
-fail_destroy_rec:
- kmem_cache_destroy(pblk_caches.rec);
-fail_destroy_ws:
- kmem_cache_destroy(pblk_caches.ws);
-
- return -ENOMEM;
-}
-
-static int pblk_get_global_caches(void)
-{
- int ret = 0;
-
- mutex_lock(&pblk_caches.mutex);
-
- if (kref_get_unless_zero(&pblk_caches.kref))
- goto out;
-
- ret = pblk_create_global_caches();
- if (!ret)
- kref_init(&pblk_caches.kref);
-
-out:
- mutex_unlock(&pblk_caches.mutex);
- return ret;
-}
-
-static void pblk_destroy_global_caches(struct kref *ref)
-{
- struct pblk_global_caches *c;
-
- c = container_of(ref, struct pblk_global_caches, kref);
-
- kmem_cache_destroy(c->ws);
- kmem_cache_destroy(c->rec);
- kmem_cache_destroy(c->g_rq);
- kmem_cache_destroy(c->w_rq);
-}
-
-static void pblk_put_global_caches(void)
-{
- mutex_lock(&pblk_caches.mutex);
- kref_put(&pblk_caches.kref, pblk_destroy_global_caches);
- mutex_unlock(&pblk_caches.mutex);
-}
-
-static int pblk_core_init(struct pblk *pblk)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int ret, max_write_ppas;
-
- atomic64_set(&pblk->user_wa, 0);
- atomic64_set(&pblk->pad_wa, 0);
- atomic64_set(&pblk->gc_wa, 0);
- pblk->user_rst_wa = 0;
- pblk->pad_rst_wa = 0;
- pblk->gc_rst_wa = 0;
-
- atomic64_set(&pblk->nr_flush, 0);
- pblk->nr_flush_rst = 0;
-
- pblk->min_write_pgs = geo->ws_opt;
- pblk->min_write_pgs_data = pblk->min_write_pgs;
- max_write_ppas = pblk->min_write_pgs * geo->all_luns;
- pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA);
- pblk->max_write_pgs = min_t(int, pblk->max_write_pgs,
- queue_max_hw_sectors(dev->q) / (geo->csecs >> SECTOR_SHIFT));
- pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
-
- pblk->oob_meta_size = geo->sos;
- if (!pblk_is_oob_meta_supported(pblk)) {
- /* For drives which does not have OOB metadata feature
- * in order to support recovery feature we need to use
- * so called packed metadata. Packed metada will store
- * the same information as OOB metadata (l2p table mapping,
- * but in the form of the single page at the end of
- * every write request.
- */
- if (pblk->min_write_pgs
- * sizeof(struct pblk_sec_meta) > PAGE_SIZE) {
- /* We want to keep all the packed metadata on single
- * page per write requests. So we need to ensure that
- * it will fit.
- *
- * This is more like sanity check, since there is
- * no device with such a big minimal write size
- * (above 1 metabytes).
- */
- pblk_err(pblk, "Not supported min write size\n");
- return -EINVAL;
- }
- /* For packed meta approach we do some simplification.
- * On read path we always issue requests which size
- * equal to max_write_pgs, with all pages filled with
- * user payload except of last one page which will be
- * filled with packed metadata.
- */
- pblk->max_write_pgs = pblk->min_write_pgs;
- pblk->min_write_pgs_data = pblk->min_write_pgs - 1;
- }
-
- pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t),
- GFP_KERNEL);
- if (!pblk->pad_dist)
- return -ENOMEM;
-
- if (pblk_get_global_caches())
- goto fail_free_pad_dist;
-
- /* Internal bios can be at most the sectors signaled by the device. */
- ret = mempool_init_page_pool(&pblk->page_bio_pool, NVM_MAX_VLBA, 0);
- if (ret)
- goto free_global_caches;
-
- ret = mempool_init_slab_pool(&pblk->gen_ws_pool, PBLK_GEN_WS_POOL_SIZE,
- pblk_caches.ws);
- if (ret)
- goto free_page_bio_pool;
-
- ret = mempool_init_slab_pool(&pblk->rec_pool, geo->all_luns,
- pblk_caches.rec);
- if (ret)
- goto free_gen_ws_pool;
-
- ret = mempool_init_slab_pool(&pblk->r_rq_pool, geo->all_luns,
- pblk_caches.g_rq);
- if (ret)
- goto free_rec_pool;
-
- ret = mempool_init_slab_pool(&pblk->e_rq_pool, geo->all_luns,
- pblk_caches.g_rq);
- if (ret)
- goto free_r_rq_pool;
-
- ret = mempool_init_slab_pool(&pblk->w_rq_pool, geo->all_luns,
- pblk_caches.w_rq);
- if (ret)
- goto free_e_rq_pool;
-
- pblk->close_wq = alloc_workqueue("pblk-close-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS);
- if (!pblk->close_wq)
- goto free_w_rq_pool;
-
- pblk->bb_wq = alloc_workqueue("pblk-bb-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
- if (!pblk->bb_wq)
- goto free_close_wq;
-
- pblk->r_end_wq = alloc_workqueue("pblk-read-end-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
- if (!pblk->r_end_wq)
- goto free_bb_wq;
-
- if (pblk_set_addrf(pblk))
- goto free_r_end_wq;
-
- INIT_LIST_HEAD(&pblk->compl_list);
- INIT_LIST_HEAD(&pblk->resubmit_list);
-
- return 0;
-
-free_r_end_wq:
- destroy_workqueue(pblk->r_end_wq);
-free_bb_wq:
- destroy_workqueue(pblk->bb_wq);
-free_close_wq:
- destroy_workqueue(pblk->close_wq);
-free_w_rq_pool:
- mempool_exit(&pblk->w_rq_pool);
-free_e_rq_pool:
- mempool_exit(&pblk->e_rq_pool);
-free_r_rq_pool:
- mempool_exit(&pblk->r_rq_pool);
-free_rec_pool:
- mempool_exit(&pblk->rec_pool);
-free_gen_ws_pool:
- mempool_exit(&pblk->gen_ws_pool);
-free_page_bio_pool:
- mempool_exit(&pblk->page_bio_pool);
-free_global_caches:
- pblk_put_global_caches();
-fail_free_pad_dist:
- kfree(pblk->pad_dist);
- return -ENOMEM;
-}
-
-static void pblk_core_free(struct pblk *pblk)
-{
- if (pblk->close_wq)
- destroy_workqueue(pblk->close_wq);
-
- if (pblk->r_end_wq)
- destroy_workqueue(pblk->r_end_wq);
-
- if (pblk->bb_wq)
- destroy_workqueue(pblk->bb_wq);
-
- mempool_exit(&pblk->page_bio_pool);
- mempool_exit(&pblk->gen_ws_pool);
- mempool_exit(&pblk->rec_pool);
- mempool_exit(&pblk->r_rq_pool);
- mempool_exit(&pblk->e_rq_pool);
- mempool_exit(&pblk->w_rq_pool);
-
- pblk_put_global_caches();
- kfree(pblk->pad_dist);
-}
-
-static void pblk_line_mg_free(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- int i;
-
- kfree(l_mg->bb_template);
- kfree(l_mg->bb_aux);
- kfree(l_mg->vsc_list);
-
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- kfree(l_mg->sline_meta[i]);
- kvfree(l_mg->eline_meta[i]->buf);
- kfree(l_mg->eline_meta[i]);
- }
-
- mempool_destroy(l_mg->bitmap_pool);
- kmem_cache_destroy(l_mg->bitmap_cache);
-}
-
-static void pblk_line_meta_free(struct pblk_line_mgmt *l_mg,
- struct pblk_line *line)
-{
- struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
-
- kfree(line->blk_bitmap);
- kfree(line->erase_bitmap);
- kfree(line->chks);
-
- kvfree(w_err_gc->lba_list);
- kfree(w_err_gc);
-}
-
-static void pblk_lines_free(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *line;
- int i;
-
- for (i = 0; i < l_mg->nr_lines; i++) {
- line = &pblk->lines[i];
-
- pblk_line_free(line);
- pblk_line_meta_free(l_mg, line);
- }
-
- pblk_line_mg_free(pblk);
-
- kfree(pblk->luns);
- kfree(pblk->lines);
-}
-
-static int pblk_luns_init(struct pblk *pblk)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_lun *rlun;
- int i;
-
- /* TODO: Implement unbalanced LUN support */
- if (geo->num_lun < 0) {
- pblk_err(pblk, "unbalanced LUN config.\n");
- return -EINVAL;
- }
-
- pblk->luns = kcalloc(geo->all_luns, sizeof(struct pblk_lun),
- GFP_KERNEL);
- if (!pblk->luns)
- return -ENOMEM;
-
- for (i = 0; i < geo->all_luns; i++) {
- /* Stripe across channels */
- int ch = i % geo->num_ch;
- int lun_raw = i / geo->num_ch;
- int lunid = lun_raw + ch * geo->num_lun;
-
- rlun = &pblk->luns[i];
- rlun->bppa = dev->luns[lunid];
-
- sema_init(&rlun->wr_sem, 1);
- }
-
- return 0;
-}
-
-/* See comment over struct line_emeta definition */
-static unsigned int calc_emeta_len(struct pblk *pblk)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
-
- /* Round to sector size so that lba_list starts on its own sector */
- lm->emeta_sec[1] = DIV_ROUND_UP(
- sizeof(struct line_emeta) + lm->blk_bitmap_len +
- sizeof(struct wa_counters), geo->csecs);
- lm->emeta_len[1] = lm->emeta_sec[1] * geo->csecs;
-
- /* Round to sector size so that vsc_list starts on its own sector */
- lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
- lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
- geo->csecs);
- lm->emeta_len[2] = lm->emeta_sec[2] * geo->csecs;
-
- lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
- geo->csecs);
- lm->emeta_len[3] = lm->emeta_sec[3] * geo->csecs;
-
- lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);
-
- return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
-}
-
-static int pblk_set_provision(struct pblk *pblk, int nr_free_chks)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
- struct nvm_geo *geo = &dev->geo;
- sector_t provisioned;
- int sec_meta, blk_meta, clba;
- int minimum;
-
- if (geo->op == NVM_TARGET_DEFAULT_OP)
- pblk->op = PBLK_DEFAULT_OP;
- else
- pblk->op = geo->op;
-
- minimum = pblk_get_min_chks(pblk);
- provisioned = nr_free_chks;
- provisioned *= (100 - pblk->op);
- sector_div(provisioned, 100);
-
- if ((nr_free_chks - provisioned) < minimum) {
- if (geo->op != NVM_TARGET_DEFAULT_OP) {
- pblk_err(pblk, "OP too small to create a sane instance\n");
- return -EINTR;
- }
-
- /* If the user did not specify an OP value, and PBLK_DEFAULT_OP
- * is not enough, calculate and set sane value
- */
-
- provisioned = nr_free_chks - minimum;
- pblk->op = (100 * minimum) / nr_free_chks;
- pblk_info(pblk, "Default OP insufficient, adjusting OP to %d\n",
- pblk->op);
- }
-
- pblk->op_blks = nr_free_chks - provisioned;
-
- /* Internally pblk manages all free blocks, but all calculations based
- * on user capacity consider only provisioned blocks
- */
- pblk->rl.total_blocks = nr_free_chks;
-
- /* Consider sectors used for metadata */
- sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
- blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
-
- clba = (geo->clba / pblk->min_write_pgs) * pblk->min_write_pgs_data;
- pblk->capacity = (provisioned - blk_meta) * clba;
-
- atomic_set(&pblk->rl.free_blocks, nr_free_chks);
- atomic_set(&pblk->rl.free_user_blocks, nr_free_chks);
-
- return 0;
-}
-
-static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line,
- struct nvm_chk_meta *meta)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- int i, nr_bad_chks = 0;
-
- for (i = 0; i < lm->blk_per_line; i++) {
- struct pblk_lun *rlun = &pblk->luns[i];
- struct nvm_chk_meta *chunk;
- struct nvm_chk_meta *chunk_meta;
- struct ppa_addr ppa;
- int pos;
-
- ppa = rlun->bppa;
- pos = pblk_ppa_to_pos(geo, ppa);
- chunk = &line->chks[pos];
-
- ppa.m.chk = line->id;
- chunk_meta = pblk_chunk_get_off(pblk, meta, ppa);
-
- chunk->state = chunk_meta->state;
- chunk->type = chunk_meta->type;
- chunk->wi = chunk_meta->wi;
- chunk->slba = chunk_meta->slba;
- chunk->cnlb = chunk_meta->cnlb;
- chunk->wp = chunk_meta->wp;
-
- trace_pblk_chunk_state(pblk_disk_name(pblk), &ppa,
- chunk->state);
-
- if (chunk->type & NVM_CHK_TP_SZ_SPEC) {
- WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n");
- continue;
- }
-
- if (!(chunk->state & NVM_CHK_ST_OFFLINE))
- continue;
-
- set_bit(pos, line->blk_bitmap);
- nr_bad_chks++;
- }
-
- return nr_bad_chks;
-}
-
-static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line,
- void *chunk_meta, int line_id)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
- long nr_bad_chks, chk_in_line;
-
- line->pblk = pblk;
- line->id = line_id;
- line->type = PBLK_LINETYPE_FREE;
- line->state = PBLK_LINESTATE_NEW;
- line->gc_group = PBLK_LINEGC_NONE;
- line->vsc = &l_mg->vsc_list[line_id];
- spin_lock_init(&line->lock);
-
- nr_bad_chks = pblk_setup_line_meta_chk(pblk, line, chunk_meta);
-
- chk_in_line = lm->blk_per_line - nr_bad_chks;
- if (nr_bad_chks < 0 || nr_bad_chks > lm->blk_per_line ||
- chk_in_line < lm->min_blk_line) {
- line->state = PBLK_LINESTATE_BAD;
- list_add_tail(&line->list, &l_mg->bad_list);
- return 0;
- }
-
- atomic_set(&line->blk_in_line, chk_in_line);
- list_add_tail(&line->list, &l_mg->free_list);
- l_mg->nr_free_lines++;
-
- return chk_in_line;
-}
-
-static int pblk_alloc_line_meta(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
-
- line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
- if (!line->blk_bitmap)
- return -ENOMEM;
-
- line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
- if (!line->erase_bitmap)
- goto free_blk_bitmap;
-
-
- line->chks = kmalloc_array(lm->blk_per_line,
- sizeof(struct nvm_chk_meta), GFP_KERNEL);
- if (!line->chks)
- goto free_erase_bitmap;
-
- line->w_err_gc = kzalloc(sizeof(struct pblk_w_err_gc), GFP_KERNEL);
- if (!line->w_err_gc)
- goto free_chks;
-
- return 0;
-
-free_chks:
- kfree(line->chks);
-free_erase_bitmap:
- kfree(line->erase_bitmap);
-free_blk_bitmap:
- kfree(line->blk_bitmap);
- return -ENOMEM;
-}
-
-static int pblk_line_mg_init(struct pblk *pblk)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
- int i, bb_distance;
-
- l_mg->nr_lines = geo->num_chk;
- l_mg->log_line = l_mg->data_line = NULL;
- l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
- l_mg->nr_free_lines = 0;
- bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
-
- INIT_LIST_HEAD(&l_mg->free_list);
- INIT_LIST_HEAD(&l_mg->corrupt_list);
- INIT_LIST_HEAD(&l_mg->bad_list);
- INIT_LIST_HEAD(&l_mg->gc_full_list);
- INIT_LIST_HEAD(&l_mg->gc_high_list);
- INIT_LIST_HEAD(&l_mg->gc_mid_list);
- INIT_LIST_HEAD(&l_mg->gc_low_list);
- INIT_LIST_HEAD(&l_mg->gc_empty_list);
- INIT_LIST_HEAD(&l_mg->gc_werr_list);
-
- INIT_LIST_HEAD(&l_mg->emeta_list);
-
- l_mg->gc_lists[0] = &l_mg->gc_werr_list;
- l_mg->gc_lists[1] = &l_mg->gc_high_list;
- l_mg->gc_lists[2] = &l_mg->gc_mid_list;
- l_mg->gc_lists[3] = &l_mg->gc_low_list;
-
- spin_lock_init(&l_mg->free_lock);
- spin_lock_init(&l_mg->close_lock);
- spin_lock_init(&l_mg->gc_lock);
-
- l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
- if (!l_mg->vsc_list)
- goto fail;
-
- l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
- if (!l_mg->bb_template)
- goto fail_free_vsc_list;
-
- l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
- if (!l_mg->bb_aux)
- goto fail_free_bb_template;
-
- /* smeta is always small enough to fit on a kmalloc memory allocation,
- * emeta depends on the number of LUNs allocated to the pblk instance
- */
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL);
- if (!l_mg->sline_meta[i])
- goto fail_free_smeta;
- }
-
- l_mg->bitmap_cache = kmem_cache_create("pblk_lm_bitmap",
- lm->sec_bitmap_len, 0, 0, NULL);
- if (!l_mg->bitmap_cache)
- goto fail_free_smeta;
-
- /* the bitmap pool is used for both valid and map bitmaps */
- l_mg->bitmap_pool = mempool_create_slab_pool(PBLK_DATA_LINES * 2,
- l_mg->bitmap_cache);
- if (!l_mg->bitmap_pool)
- goto fail_destroy_bitmap_cache;
-
- /* emeta allocates three different buffers for managing metadata with
- * in-memory and in-media layouts
- */
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- struct pblk_emeta *emeta;
-
- emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL);
- if (!emeta)
- goto fail_free_emeta;
-
- emeta->buf = kvmalloc(lm->emeta_len[0], GFP_KERNEL);
- if (!emeta->buf) {
- kfree(emeta);
- goto fail_free_emeta;
- }
-
- emeta->nr_entries = lm->emeta_sec[0];
- l_mg->eline_meta[i] = emeta;
- }
-
- for (i = 0; i < l_mg->nr_lines; i++)
- l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);
-
- bb_distance = (geo->all_luns) * geo->ws_opt;
- for (i = 0; i < lm->sec_per_line; i += bb_distance)
- bitmap_set(l_mg->bb_template, i, geo->ws_opt);
-
- return 0;
-
-fail_free_emeta:
- while (--i >= 0) {
- kvfree(l_mg->eline_meta[i]->buf);
- kfree(l_mg->eline_meta[i]);
- }
-
- mempool_destroy(l_mg->bitmap_pool);
-fail_destroy_bitmap_cache:
- kmem_cache_destroy(l_mg->bitmap_cache);
-fail_free_smeta:
- for (i = 0; i < PBLK_DATA_LINES; i++)
- kfree(l_mg->sline_meta[i]);
- kfree(l_mg->bb_aux);
-fail_free_bb_template:
- kfree(l_mg->bb_template);
-fail_free_vsc_list:
- kfree(l_mg->vsc_list);
-fail:
- return -ENOMEM;
-}
-
-static int pblk_line_meta_init(struct pblk *pblk)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- unsigned int smeta_len, emeta_len;
- int i;
-
- lm->sec_per_line = geo->clba * geo->all_luns;
- lm->blk_per_line = geo->all_luns;
- lm->blk_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long);
- lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long);
- lm->lun_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long);
- lm->mid_thrs = lm->sec_per_line / 2;
- lm->high_thrs = lm->sec_per_line / 4;
- lm->meta_distance = (geo->all_luns / 2) * pblk->min_write_pgs;
-
- /* Calculate necessary pages for smeta. See comment over struct
- * line_smeta definition
- */
- i = 1;
-add_smeta_page:
- lm->smeta_sec = i * geo->ws_opt;
- lm->smeta_len = lm->smeta_sec * geo->csecs;
-
- smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
- if (smeta_len > lm->smeta_len) {
- i++;
- goto add_smeta_page;
- }
-
- /* Calculate necessary pages for emeta. See comment over struct
- * line_emeta definition
- */
- i = 1;
-add_emeta_page:
- lm->emeta_sec[0] = i * geo->ws_opt;
- lm->emeta_len[0] = lm->emeta_sec[0] * geo->csecs;
-
- emeta_len = calc_emeta_len(pblk);
- if (emeta_len > lm->emeta_len[0]) {
- i++;
- goto add_emeta_page;
- }
-
- lm->emeta_bb = geo->all_luns > i ? geo->all_luns - i : 0;
-
- lm->min_blk_line = 1;
- if (geo->all_luns > 1)
- lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec +
- lm->emeta_sec[0], geo->clba);
-
- if (lm->min_blk_line > lm->blk_per_line) {
- pblk_err(pblk, "config. not supported. Min. LUN in line:%d\n",
- lm->blk_per_line);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int pblk_lines_init(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *line;
- void *chunk_meta;
- int nr_free_chks = 0;
- int i, ret;
-
- ret = pblk_line_meta_init(pblk);
- if (ret)
- return ret;
-
- ret = pblk_line_mg_init(pblk);
- if (ret)
- return ret;
-
- ret = pblk_luns_init(pblk);
- if (ret)
- goto fail_free_meta;
-
- chunk_meta = pblk_get_chunk_meta(pblk);
- if (IS_ERR(chunk_meta)) {
- ret = PTR_ERR(chunk_meta);
- goto fail_free_luns;
- }
-
- pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
- GFP_KERNEL);
- if (!pblk->lines) {
- ret = -ENOMEM;
- goto fail_free_chunk_meta;
- }
-
- for (i = 0; i < l_mg->nr_lines; i++) {
- line = &pblk->lines[i];
-
- ret = pblk_alloc_line_meta(pblk, line);
- if (ret)
- goto fail_free_lines;
-
- nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i);
-
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- }
-
- if (!nr_free_chks) {
- pblk_err(pblk, "too many bad blocks prevent for sane instance\n");
- ret = -EINTR;
- goto fail_free_lines;
- }
-
- ret = pblk_set_provision(pblk, nr_free_chks);
- if (ret)
- goto fail_free_lines;
-
- vfree(chunk_meta);
- return 0;
-
-fail_free_lines:
- while (--i >= 0)
- pblk_line_meta_free(l_mg, &pblk->lines[i]);
- kfree(pblk->lines);
-fail_free_chunk_meta:
- vfree(chunk_meta);
-fail_free_luns:
- kfree(pblk->luns);
-fail_free_meta:
- pblk_line_mg_free(pblk);
-
- return ret;
-}
-
-static int pblk_writer_init(struct pblk *pblk)
-{
- pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t");
- if (IS_ERR(pblk->writer_ts)) {
- int err = PTR_ERR(pblk->writer_ts);
-
- if (err != -EINTR)
- pblk_err(pblk, "could not allocate writer kthread (%d)\n",
- err);
- return err;
- }
-
- timer_setup(&pblk->wtimer, pblk_write_timer_fn, 0);
- mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100));
-
- return 0;
-}
-
-static void pblk_writer_stop(struct pblk *pblk)
-{
- /* The pipeline must be stopped and the write buffer emptied before the
- * write thread is stopped
- */
- WARN(pblk_rb_read_count(&pblk->rwb),
- "Stopping not fully persisted write buffer\n");
-
- WARN(pblk_rb_sync_count(&pblk->rwb),
- "Stopping not fully synced write buffer\n");
-
- del_timer_sync(&pblk->wtimer);
- if (pblk->writer_ts)
- kthread_stop(pblk->writer_ts);
-}
-
-static void pblk_free(struct pblk *pblk)
-{
- pblk_lines_free(pblk);
- pblk_l2p_free(pblk);
- pblk_rwb_free(pblk);
- pblk_core_free(pblk);
-
- kfree(pblk);
-}
-
-static void pblk_tear_down(struct pblk *pblk, bool graceful)
-{
- if (graceful)
- __pblk_pipeline_flush(pblk);
- __pblk_pipeline_stop(pblk);
- pblk_writer_stop(pblk);
- pblk_rb_sync_l2p(&pblk->rwb);
- pblk_rl_free(&pblk->rl);
-
- pblk_debug(pblk, "consistent tear down (graceful:%d)\n", graceful);
-}
-
-static void pblk_exit(void *private, bool graceful)
-{
- struct pblk *pblk = private;
-
- pblk_gc_exit(pblk, graceful);
- pblk_tear_down(pblk, graceful);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- pblk_info(pblk, "exit: L2P CRC: %x\n", pblk_l2p_crc(pblk));
-#endif
-
- pblk_free(pblk);
-}
-
-static sector_t pblk_capacity(void *private)
-{
- struct pblk *pblk = private;
-
- return pblk->capacity * NR_PHY_IN_LOG;
-}
-
-static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
- int flags)
-{
- struct nvm_geo *geo = &dev->geo;
- struct request_queue *bqueue = dev->q;
- struct request_queue *tqueue = tdisk->queue;
- struct pblk *pblk;
- int ret;
-
- pblk = kzalloc(sizeof(struct pblk), GFP_KERNEL);
- if (!pblk)
- return ERR_PTR(-ENOMEM);
-
- pblk->dev = dev;
- pblk->disk = tdisk;
- pblk->state = PBLK_STATE_RUNNING;
- trace_pblk_state(pblk_disk_name(pblk), pblk->state);
- pblk->gc.gc_enabled = 0;
-
- if (!(geo->version == NVM_OCSSD_SPEC_12 ||
- geo->version == NVM_OCSSD_SPEC_20)) {
- pblk_err(pblk, "OCSSD version not supported (%u)\n",
- geo->version);
- kfree(pblk);
- return ERR_PTR(-EINVAL);
- }
-
- if (geo->ext) {
- pblk_err(pblk, "extended metadata not supported\n");
- kfree(pblk);
- return ERR_PTR(-EINVAL);
- }
-
- spin_lock_init(&pblk->resubmit_lock);
- spin_lock_init(&pblk->trans_lock);
- spin_lock_init(&pblk->lock);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_set(&pblk->inflight_writes, 0);
- atomic_long_set(&pblk->padded_writes, 0);
- atomic_long_set(&pblk->padded_wb, 0);
- atomic_long_set(&pblk->req_writes, 0);
- atomic_long_set(&pblk->sub_writes, 0);
- atomic_long_set(&pblk->sync_writes, 0);
- atomic_long_set(&pblk->inflight_reads, 0);
- atomic_long_set(&pblk->cache_reads, 0);
- atomic_long_set(&pblk->sync_reads, 0);
- atomic_long_set(&pblk->recov_writes, 0);
- atomic_long_set(&pblk->recov_writes, 0);
- atomic_long_set(&pblk->recov_gc_writes, 0);
- atomic_long_set(&pblk->recov_gc_reads, 0);
-#endif
-
- atomic_long_set(&pblk->read_failed, 0);
- atomic_long_set(&pblk->read_empty, 0);
- atomic_long_set(&pblk->read_high_ecc, 0);
- atomic_long_set(&pblk->read_failed_gc, 0);
- atomic_long_set(&pblk->write_failed, 0);
- atomic_long_set(&pblk->erase_failed, 0);
-
- ret = pblk_core_init(pblk);
- if (ret) {
- pblk_err(pblk, "could not initialize core\n");
- goto fail;
- }
-
- ret = pblk_lines_init(pblk);
- if (ret) {
- pblk_err(pblk, "could not initialize lines\n");
- goto fail_free_core;
- }
-
- ret = pblk_rwb_init(pblk);
- if (ret) {
- pblk_err(pblk, "could not initialize write buffer\n");
- goto fail_free_lines;
- }
-
- ret = pblk_l2p_init(pblk, flags & NVM_TARGET_FACTORY);
- if (ret) {
- pblk_err(pblk, "could not initialize maps\n");
- goto fail_free_rwb;
- }
-
- ret = pblk_writer_init(pblk);
- if (ret) {
- if (ret != -EINTR)
- pblk_err(pblk, "could not initialize write thread\n");
- goto fail_free_l2p;
- }
-
- ret = pblk_gc_init(pblk);
- if (ret) {
- pblk_err(pblk, "could not initialize gc\n");
- goto fail_stop_writer;
- }
-
- /* inherit the size from the underlying device */
- blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue));
- blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue));
-
- blk_queue_write_cache(tqueue, true, false);
-
- tqueue->limits.discard_granularity = geo->clba * geo->csecs;
- tqueue->limits.discard_alignment = 0;
- blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, tqueue);
-
- pblk_info(pblk, "luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
- geo->all_luns, pblk->l_mg.nr_lines,
- (unsigned long long)pblk->capacity,
- pblk->rwb.nr_entries);
-
- wake_up_process(pblk->writer_ts);
-
- /* Check if we need to start GC */
- pblk_gc_should_kick(pblk);
-
- return pblk;
-
-fail_stop_writer:
- pblk_writer_stop(pblk);
-fail_free_l2p:
- pblk_l2p_free(pblk);
-fail_free_rwb:
- pblk_rwb_free(pblk);
-fail_free_lines:
- pblk_lines_free(pblk);
-fail_free_core:
- pblk_core_free(pblk);
-fail:
- kfree(pblk);
- return ERR_PTR(ret);
-}
-
-/* physical block device target */
-static struct nvm_tgt_type tt_pblk = {
- .name = "pblk",
- .version = {1, 0, 0},
-
- .bops = &pblk_bops,
- .capacity = pblk_capacity,
-
- .init = pblk_init,
- .exit = pblk_exit,
-
- .sysfs_init = pblk_sysfs_init,
- .sysfs_exit = pblk_sysfs_exit,
- .owner = THIS_MODULE,
-};
-
-static int __init pblk_module_init(void)
-{
- int ret;
-
- ret = bioset_init(&pblk_bio_set, BIO_POOL_SIZE, 0, 0);
- if (ret)
- return ret;
- ret = nvm_register_tgt_type(&tt_pblk);
- if (ret)
- bioset_exit(&pblk_bio_set);
- return ret;
-}
-
-static void pblk_module_exit(void)
-{
- bioset_exit(&pblk_bio_set);
- nvm_unregister_tgt_type(&tt_pblk);
-}
-
-module_init(pblk_module_init);
-module_exit(pblk_module_exit);
-MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>");
-MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>");
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs");
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
deleted file mode 100644
index 5408e32b2f13..000000000000
--- a/drivers/lightnvm/pblk-map.c
+++ /dev/null
@@ -1,210 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-map.c - pblk's lba-ppa mapping strategy
- *
- */
-
-#include "pblk.h"
-
-static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
- struct ppa_addr *ppa_list,
- unsigned long *lun_bitmap,
- void *meta_list,
- unsigned int valid_secs)
-{
- struct pblk_line *line = pblk_line_get_data(pblk);
- struct pblk_emeta *emeta;
- struct pblk_w_ctx *w_ctx;
- __le64 *lba_list;
- u64 paddr;
- int nr_secs = pblk->min_write_pgs;
- int i;
-
- if (!line)
- return -ENOSPC;
-
- if (pblk_line_is_full(line)) {
- struct pblk_line *prev_line = line;
-
- /* If we cannot allocate a new line, make sure to store metadata
- * on current line and then fail
- */
- line = pblk_line_replace_data(pblk);
- pblk_line_close_meta(pblk, prev_line);
-
- if (!line) {
- pblk_pipeline_stop(pblk);
- return -ENOSPC;
- }
-
- }
-
- emeta = line->emeta;
- lba_list = emeta_to_lbas(pblk, emeta->buf);
-
- paddr = pblk_alloc_page(pblk, line, nr_secs);
-
- for (i = 0; i < nr_secs; i++, paddr++) {
- struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
- __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
-
- /* ppa to be sent to the device */
- ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
-
- /* Write context for target bio completion on write buffer. Note
- * that the write buffer is protected by the sync backpointer,
- * and a single writer thread have access to each specific entry
- * at a time. Thus, it is safe to modify the context for the
- * entry we are setting up for submission without taking any
- * lock or memory barrier.
- */
- if (i < valid_secs) {
- kref_get(&line->ref);
- atomic_inc(&line->sec_to_update);
- w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
- w_ctx->ppa = ppa_list[i];
- meta->lba = cpu_to_le64(w_ctx->lba);
- lba_list[paddr] = cpu_to_le64(w_ctx->lba);
- if (lba_list[paddr] != addr_empty)
- line->nr_valid_lbas++;
- else
- atomic64_inc(&pblk->pad_wa);
- } else {
- lba_list[paddr] = addr_empty;
- meta->lba = addr_empty;
- __pblk_map_invalidate(pblk, line, paddr);
- }
- }
-
- pblk_down_rq(pblk, ppa_list[0], lun_bitmap);
- return 0;
-}
-
-int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
- unsigned long *lun_bitmap, unsigned int valid_secs,
- unsigned int off)
-{
- void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
- void *meta_buffer;
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
- unsigned int map_secs;
- int min = pblk->min_write_pgs;
- int i;
- int ret;
-
- for (i = off; i < rqd->nr_ppas; i += min) {
- map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
- meta_buffer = pblk_get_meta(pblk, meta_list, i);
-
- ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
- lun_bitmap, meta_buffer, map_secs);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
-/* only if erase_ppa is set, acquire erase semaphore */
-int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
- unsigned int sentry, unsigned long *lun_bitmap,
- unsigned int valid_secs, struct ppa_addr *erase_ppa)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
- void *meta_buffer;
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
- struct pblk_line *e_line, *d_line;
- unsigned int map_secs;
- int min = pblk->min_write_pgs;
- int i, erase_lun;
- int ret;
-
-
- for (i = 0; i < rqd->nr_ppas; i += min) {
- map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
- meta_buffer = pblk_get_meta(pblk, meta_list, i);
-
- ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
- lun_bitmap, meta_buffer, map_secs);
- if (ret)
- return ret;
-
- erase_lun = pblk_ppa_to_pos(geo, ppa_list[i]);
-
- /* line can change after page map. We might also be writing the
- * last line.
- */
- e_line = pblk_line_get_erase(pblk);
- if (!e_line)
- return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
- valid_secs, i + min);
-
- spin_lock(&e_line->lock);
- if (!test_bit(erase_lun, e_line->erase_bitmap)) {
- set_bit(erase_lun, e_line->erase_bitmap);
- atomic_dec(&e_line->left_eblks);
-
- *erase_ppa = ppa_list[i];
- erase_ppa->a.blk = e_line->id;
- erase_ppa->a.reserved = 0;
-
- spin_unlock(&e_line->lock);
-
- /* Avoid evaluating e_line->left_eblks */
- return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
- valid_secs, i + min);
- }
- spin_unlock(&e_line->lock);
- }
-
- d_line = pblk_line_get_data(pblk);
-
- /* line can change after page map. We might also be writing the
- * last line.
- */
- e_line = pblk_line_get_erase(pblk);
- if (!e_line)
- return -ENOSPC;
-
- /* Erase blocks that are bad in this line but might not be in next */
- if (unlikely(pblk_ppa_empty(*erase_ppa)) &&
- bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
- int bit = -1;
-
-retry:
- bit = find_next_bit(d_line->blk_bitmap,
- lm->blk_per_line, bit + 1);
- if (bit >= lm->blk_per_line)
- return 0;
-
- spin_lock(&e_line->lock);
- if (test_bit(bit, e_line->erase_bitmap)) {
- spin_unlock(&e_line->lock);
- goto retry;
- }
- spin_unlock(&e_line->lock);
-
- set_bit(bit, e_line->erase_bitmap);
- atomic_dec(&e_line->left_eblks);
- *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
- erase_ppa->a.blk = e_line->id;
- }
-
- return 0;
-}
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
deleted file mode 100644
index 5abb1705b039..000000000000
--- a/drivers/lightnvm/pblk-rb.c
+++ /dev/null
@@ -1,858 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- *
- * Based upon the circular ringbuffer.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-rb.c - pblk's write buffer
- */
-
-#include <linux/circ_buf.h>
-
-#include "pblk.h"
-
-static DECLARE_RWSEM(pblk_rb_lock);
-
-static void pblk_rb_data_free(struct pblk_rb *rb)
-{
- struct pblk_rb_pages *p, *t;
-
- down_write(&pblk_rb_lock);
- list_for_each_entry_safe(p, t, &rb->pages, list) {
- free_pages((unsigned long)page_address(p->pages), p->order);
- list_del(&p->list);
- kfree(p);
- }
- up_write(&pblk_rb_lock);
-}
-
-void pblk_rb_free(struct pblk_rb *rb)
-{
- pblk_rb_data_free(rb);
- vfree(rb->entries);
-}
-
-/*
- * pblk_rb_calculate_size -- calculate the size of the write buffer
- */
-static unsigned int pblk_rb_calculate_size(unsigned int nr_entries,
- unsigned int threshold)
-{
- unsigned int thr_sz = 1 << (get_count_order(threshold + NVM_MAX_VLBA));
- unsigned int max_sz = max(thr_sz, nr_entries);
- unsigned int max_io;
-
- /* Alloc a write buffer that can (i) fit at least two split bios
- * (considering max I/O size NVM_MAX_VLBA, and (ii) guarantee that the
- * threshold will be respected
- */
- max_io = (1 << max((int)(get_count_order(max_sz)),
- (int)(get_count_order(NVM_MAX_VLBA << 1))));
- if ((threshold + NVM_MAX_VLBA) >= max_io)
- max_io <<= 1;
-
- return max_io;
-}
-
-/*
- * Initialize ring buffer. The data and metadata buffers must be previously
- * allocated and their size must be a power of two
- * (Documentation/core-api/circular-buffers.rst)
- */
-int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold,
- unsigned int seg_size)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- struct pblk_rb_entry *entries;
- unsigned int init_entry = 0;
- unsigned int max_order = MAX_ORDER - 1;
- unsigned int power_size, power_seg_sz;
- unsigned int alloc_order, order, iter;
- unsigned int nr_entries;
-
- nr_entries = pblk_rb_calculate_size(size, threshold);
- entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry)));
- if (!entries)
- return -ENOMEM;
-
- power_size = get_count_order(nr_entries);
- power_seg_sz = get_count_order(seg_size);
-
- down_write(&pblk_rb_lock);
- rb->entries = entries;
- rb->seg_size = (1 << power_seg_sz);
- rb->nr_entries = (1 << power_size);
- rb->mem = rb->subm = rb->sync = rb->l2p_update = 0;
- rb->back_thres = threshold;
- rb->flush_point = EMPTY_ENTRY;
-
- spin_lock_init(&rb->w_lock);
- spin_lock_init(&rb->s_lock);
-
- INIT_LIST_HEAD(&rb->pages);
-
- alloc_order = power_size;
- if (alloc_order >= max_order) {
- order = max_order;
- iter = (1 << (alloc_order - max_order));
- } else {
- order = alloc_order;
- iter = 1;
- }
-
- do {
- struct pblk_rb_entry *entry;
- struct pblk_rb_pages *page_set;
- void *kaddr;
- unsigned long set_size;
- int i;
-
- page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL);
- if (!page_set) {
- up_write(&pblk_rb_lock);
- vfree(entries);
- return -ENOMEM;
- }
-
- page_set->order = order;
- page_set->pages = alloc_pages(GFP_KERNEL, order);
- if (!page_set->pages) {
- kfree(page_set);
- pblk_rb_data_free(rb);
- up_write(&pblk_rb_lock);
- vfree(entries);
- return -ENOMEM;
- }
- kaddr = page_address(page_set->pages);
-
- entry = &rb->entries[init_entry];
- entry->data = kaddr;
- entry->cacheline = pblk_cacheline_to_addr(init_entry++);
- entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
-
- set_size = (1 << order);
- for (i = 1; i < set_size; i++) {
- entry = &rb->entries[init_entry];
- entry->cacheline = pblk_cacheline_to_addr(init_entry++);
- entry->data = kaddr + (i * rb->seg_size);
- entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
- bio_list_init(&entry->w_ctx.bios);
- }
-
- list_add_tail(&page_set->list, &rb->pages);
- iter--;
- } while (iter > 0);
- up_write(&pblk_rb_lock);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_set(&rb->inflight_flush_point, 0);
-#endif
-
- /*
- * Initialize rate-limiter, which controls access to the write buffer
- * by user and GC I/O
- */
- pblk_rl_init(&pblk->rl, rb->nr_entries, threshold);
-
- return 0;
-}
-
-static void clean_wctx(struct pblk_w_ctx *w_ctx)
-{
- int flags;
-
- flags = READ_ONCE(w_ctx->flags);
- WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY),
- "pblk: overwriting unsubmitted data\n");
-
- /* Release flags on context. Protect from writes and reads */
- smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
- pblk_ppa_set_empty(&w_ctx->ppa);
- w_ctx->lba = ADDR_EMPTY;
-}
-
-#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
-#define pblk_rb_ring_space(rb, head, tail, size) \
- (CIRC_SPACE(head, tail, size))
-
-/*
- * Buffer space is calculated with respect to the back pointer signaling
- * synchronized entries to the media.
- */
-static unsigned int pblk_rb_space(struct pblk_rb *rb)
-{
- unsigned int mem = READ_ONCE(rb->mem);
- unsigned int sync = READ_ONCE(rb->sync);
-
- return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries);
-}
-
-unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p,
- unsigned int nr_entries)
-{
- return (p + nr_entries) & (rb->nr_entries - 1);
-}
-
-/*
- * Buffer count is calculated with respect to the submission entry signaling the
- * entries that are available to send to the media
- */
-unsigned int pblk_rb_read_count(struct pblk_rb *rb)
-{
- unsigned int mem = READ_ONCE(rb->mem);
- unsigned int subm = READ_ONCE(rb->subm);
-
- return pblk_rb_ring_count(mem, subm, rb->nr_entries);
-}
-
-unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
-{
- unsigned int mem = READ_ONCE(rb->mem);
- unsigned int sync = READ_ONCE(rb->sync);
-
- return pblk_rb_ring_count(mem, sync, rb->nr_entries);
-}
-
-unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
-{
- unsigned int subm;
-
- subm = READ_ONCE(rb->subm);
- /* Commit read means updating submission pointer */
- smp_store_release(&rb->subm, pblk_rb_ptr_wrap(rb, subm, nr_entries));
-
- return subm;
-}
-
-static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- struct pblk_line *line;
- struct pblk_rb_entry *entry;
- struct pblk_w_ctx *w_ctx;
- unsigned int user_io = 0, gc_io = 0;
- unsigned int i;
- int flags;
-
- for (i = 0; i < to_update; i++) {
- entry = &rb->entries[rb->l2p_update];
- w_ctx = &entry->w_ctx;
-
- flags = READ_ONCE(entry->w_ctx.flags);
- if (flags & PBLK_IOTYPE_USER)
- user_io++;
- else if (flags & PBLK_IOTYPE_GC)
- gc_io++;
- else
- WARN(1, "pblk: unknown IO type\n");
-
- pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
- entry->cacheline);
-
- line = pblk_ppa_to_line(pblk, w_ctx->ppa);
- atomic_dec(&line->sec_to_update);
- kref_put(&line->ref, pblk_line_put);
- clean_wctx(w_ctx);
- rb->l2p_update = pblk_rb_ptr_wrap(rb, rb->l2p_update, 1);
- }
-
- pblk_rl_out(&pblk->rl, user_io, gc_io);
-
- return 0;
-}
-
-/*
- * When we move the l2p_update pointer, we update the l2p table - lookups will
- * point to the physical address instead of to the cacheline in the write buffer
- * from this moment on.
- */
-static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries,
- unsigned int mem, unsigned int sync)
-{
- unsigned int space, count;
- int ret = 0;
-
- lockdep_assert_held(&rb->w_lock);
-
- /* Update l2p only as buffer entries are being overwritten */
- space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries);
- if (space > nr_entries)
- goto out;
-
- count = nr_entries - space;
- /* l2p_update used exclusively under rb->w_lock */
- ret = __pblk_rb_update_l2p(rb, count);
-
-out:
- return ret;
-}
-
-/*
- * Update the l2p entry for all sectors stored on the write buffer. This means
- * that all future lookups to the l2p table will point to a device address, not
- * to the cacheline in the write buffer.
- */
-void pblk_rb_sync_l2p(struct pblk_rb *rb)
-{
- unsigned int sync;
- unsigned int to_update;
-
- spin_lock(&rb->w_lock);
-
- /* Protect from reads and writes */
- sync = smp_load_acquire(&rb->sync);
-
- to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries);
- __pblk_rb_update_l2p(rb, to_update);
-
- spin_unlock(&rb->w_lock);
-}
-
-/*
- * Write @nr_entries to ring buffer from @data buffer if there is enough space.
- * Typically, 4KB data chunks coming from a bio will be copied to the ring
- * buffer, thus the write will fail if not all incoming data can be copied.
- *
- */
-static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data,
- struct pblk_w_ctx w_ctx,
- struct pblk_rb_entry *entry)
-{
- memcpy(entry->data, data, rb->seg_size);
-
- entry->w_ctx.lba = w_ctx.lba;
- entry->w_ctx.ppa = w_ctx.ppa;
-}
-
-void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
- struct pblk_w_ctx w_ctx, unsigned int ring_pos)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- struct pblk_rb_entry *entry;
- int flags;
-
- entry = &rb->entries[ring_pos];
- flags = READ_ONCE(entry->w_ctx.flags);
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Caller must guarantee that the entry is free */
- BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
-#endif
-
- __pblk_rb_write_entry(rb, data, w_ctx, entry);
-
- pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline);
- flags = w_ctx.flags | PBLK_WRITTEN_DATA;
-
- /* Release flags on write context. Protect from writes */
- smp_store_release(&entry->w_ctx.flags, flags);
-}
-
-void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
- struct pblk_w_ctx w_ctx, struct pblk_line *line,
- u64 paddr, unsigned int ring_pos)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- struct pblk_rb_entry *entry;
- int flags;
-
- entry = &rb->entries[ring_pos];
- flags = READ_ONCE(entry->w_ctx.flags);
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Caller must guarantee that the entry is free */
- BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
-#endif
-
- __pblk_rb_write_entry(rb, data, w_ctx, entry);
-
- if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, line, paddr))
- entry->w_ctx.lba = ADDR_EMPTY;
-
- flags = w_ctx.flags | PBLK_WRITTEN_DATA;
-
- /* Release flags on write context. Protect from writes */
- smp_store_release(&entry->w_ctx.flags, flags);
-}
-
-static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
- unsigned int pos)
-{
- struct pblk_rb_entry *entry;
- unsigned int sync, flush_point;
-
- pblk_rb_sync_init(rb, NULL);
- sync = READ_ONCE(rb->sync);
-
- if (pos == sync) {
- pblk_rb_sync_end(rb, NULL);
- return 0;
- }
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_inc(&rb->inflight_flush_point);
-#endif
-
- flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1);
- entry = &rb->entries[flush_point];
-
- /* Protect flush points */
- smp_store_release(&rb->flush_point, flush_point);
-
- if (bio)
- bio_list_add(&entry->w_ctx.bios, bio);
-
- pblk_rb_sync_end(rb, NULL);
-
- return bio ? 1 : 0;
-}
-
-static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
- unsigned int *pos)
-{
- unsigned int mem;
- unsigned int sync;
- unsigned int threshold;
-
- sync = READ_ONCE(rb->sync);
- mem = READ_ONCE(rb->mem);
-
- threshold = nr_entries + rb->back_thres;
-
- if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < threshold)
- return 0;
-
- if (pblk_rb_update_l2p(rb, nr_entries, mem, sync))
- return 0;
-
- *pos = mem;
-
- return 1;
-}
-
-static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
- unsigned int *pos)
-{
- if (!__pblk_rb_may_write(rb, nr_entries, pos))
- return 0;
-
- /* Protect from read count */
- smp_store_release(&rb->mem, pblk_rb_ptr_wrap(rb, *pos, nr_entries));
- return 1;
-}
-
-void pblk_rb_flush(struct pblk_rb *rb)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- unsigned int mem = READ_ONCE(rb->mem);
-
- if (pblk_rb_flush_point_set(rb, NULL, mem))
- return;
-
- pblk_write_kick(pblk);
-}
-
-static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
- unsigned int *pos, struct bio *bio,
- int *io_ret)
-{
- unsigned int mem;
-
- if (!__pblk_rb_may_write(rb, nr_entries, pos))
- return 0;
-
- mem = pblk_rb_ptr_wrap(rb, *pos, nr_entries);
- *io_ret = NVM_IO_DONE;
-
- if (bio->bi_opf & REQ_PREFLUSH) {
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
-
- atomic64_inc(&pblk->nr_flush);
- if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem))
- *io_ret = NVM_IO_OK;
- }
-
- /* Protect from read count */
- smp_store_release(&rb->mem, mem);
-
- return 1;
-}
-
-/*
- * Atomically check that (i) there is space on the write buffer for the
- * incoming I/O, and (ii) the current I/O type has enough budget in the write
- * buffer (rate-limiter).
- */
-int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
- unsigned int nr_entries, unsigned int *pos)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- int io_ret;
-
- spin_lock(&rb->w_lock);
- io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
- if (io_ret) {
- spin_unlock(&rb->w_lock);
- return io_ret;
- }
-
- if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
- spin_unlock(&rb->w_lock);
- return NVM_IO_REQUEUE;
- }
-
- pblk_rl_user_in(&pblk->rl, nr_entries);
- spin_unlock(&rb->w_lock);
-
- return io_ret;
-}
-
-/*
- * Look at pblk_rb_may_write_user comment
- */
-int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
- unsigned int *pos)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
-
- spin_lock(&rb->w_lock);
- if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) {
- spin_unlock(&rb->w_lock);
- return 0;
- }
-
- if (!pblk_rb_may_write(rb, nr_entries, pos)) {
- spin_unlock(&rb->w_lock);
- return 0;
- }
-
- pblk_rl_gc_in(&pblk->rl, nr_entries);
- spin_unlock(&rb->w_lock);
-
- return 1;
-}
-
-/*
- * Read available entries on rb and add them to the given bio. To avoid a memory
- * copy, a page reference to the write buffer is used to be added to the bio.
- *
- * This function is used by the write thread to form the write bio that will
- * persist data on the write buffer to the media.
- */
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
- unsigned int pos, unsigned int nr_entries,
- unsigned int count)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- struct request_queue *q = pblk->dev->q;
- struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
- struct bio *bio = rqd->bio;
- struct pblk_rb_entry *entry;
- struct page *page;
- unsigned int pad = 0, to_read = nr_entries;
- unsigned int i;
- int flags;
-
- if (count < nr_entries) {
- pad = nr_entries - count;
- to_read = count;
- }
-
- /* Add space for packed metadata if in use*/
- pad += (pblk->min_write_pgs - pblk->min_write_pgs_data);
-
- c_ctx->sentry = pos;
- c_ctx->nr_valid = to_read;
- c_ctx->nr_padded = pad;
-
- for (i = 0; i < to_read; i++) {
- entry = &rb->entries[pos];
-
- /* A write has been allowed into the buffer, but data is still
- * being copied to it. It is ok to busy wait.
- */
-try:
- flags = READ_ONCE(entry->w_ctx.flags);
- if (!(flags & PBLK_WRITTEN_DATA)) {
- io_schedule();
- goto try;
- }
-
- page = virt_to_page(entry->data);
- if (!page) {
- pblk_err(pblk, "could not allocate write bio page\n");
- flags &= ~PBLK_WRITTEN_DATA;
- flags |= PBLK_SUBMITTED_ENTRY;
- /* Release flags on context. Protect from writes */
- smp_store_release(&entry->w_ctx.flags, flags);
- return NVM_IO_ERR;
- }
-
- if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
- rb->seg_size) {
- pblk_err(pblk, "could not add page to write bio\n");
- flags &= ~PBLK_WRITTEN_DATA;
- flags |= PBLK_SUBMITTED_ENTRY;
- /* Release flags on context. Protect from writes */
- smp_store_release(&entry->w_ctx.flags, flags);
- return NVM_IO_ERR;
- }
-
- flags &= ~PBLK_WRITTEN_DATA;
- flags |= PBLK_SUBMITTED_ENTRY;
-
- /* Release flags on context. Protect from writes */
- smp_store_release(&entry->w_ctx.flags, flags);
-
- pos = pblk_rb_ptr_wrap(rb, pos, 1);
- }
-
- if (pad) {
- if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
- pblk_err(pblk, "could not pad page in write bio\n");
- return NVM_IO_ERR;
- }
-
- if (pad < pblk->min_write_pgs)
- atomic64_inc(&pblk->pad_dist[pad - 1]);
- else
- pblk_warn(pblk, "padding more than min. sectors\n");
-
- atomic64_add(pad, &pblk->pad_wa);
- }
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(pad, &pblk->padded_writes);
-#endif
-
- return NVM_IO_OK;
-}
-
-/*
- * Copy to bio only if the lba matches the one on the given cache entry.
- * Otherwise, it means that the entry has been overwritten, and the bio should
- * be directed to disk.
- */
-int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
- struct ppa_addr ppa)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- struct pblk_rb_entry *entry;
- struct pblk_w_ctx *w_ctx;
- struct ppa_addr l2p_ppa;
- u64 pos = pblk_addr_to_cacheline(ppa);
- void *data;
- int flags;
- int ret = 1;
-
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Caller must ensure that the access will not cause an overflow */
- BUG_ON(pos >= rb->nr_entries);
-#endif
- entry = &rb->entries[pos];
- w_ctx = &entry->w_ctx;
- flags = READ_ONCE(w_ctx->flags);
-
- spin_lock(&rb->w_lock);
- spin_lock(&pblk->trans_lock);
- l2p_ppa = pblk_trans_map_get(pblk, lba);
- spin_unlock(&pblk->trans_lock);
-
- /* Check if the entry has been overwritten or is scheduled to be */
- if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
- flags & PBLK_WRITABLE_ENTRY) {
- ret = 0;
- goto out;
- }
- data = bio_data(bio);
- memcpy(data, entry->data, rb->seg_size);
-
-out:
- spin_unlock(&rb->w_lock);
- return ret;
-}
-
-struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos)
-{
- unsigned int entry = pblk_rb_ptr_wrap(rb, pos, 0);
-
- return &rb->entries[entry].w_ctx;
-}
-
-unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags)
- __acquires(&rb->s_lock)
-{
- if (flags)
- spin_lock_irqsave(&rb->s_lock, *flags);
- else
- spin_lock_irq(&rb->s_lock);
-
- return rb->sync;
-}
-
-void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags)
- __releases(&rb->s_lock)
-{
- lockdep_assert_held(&rb->s_lock);
-
- if (flags)
- spin_unlock_irqrestore(&rb->s_lock, *flags);
- else
- spin_unlock_irq(&rb->s_lock);
-}
-
-unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries)
-{
- unsigned int sync, flush_point;
- lockdep_assert_held(&rb->s_lock);
-
- sync = READ_ONCE(rb->sync);
- flush_point = READ_ONCE(rb->flush_point);
-
- if (flush_point != EMPTY_ENTRY) {
- unsigned int secs_to_flush;
-
- secs_to_flush = pblk_rb_ring_count(flush_point, sync,
- rb->nr_entries);
- if (secs_to_flush < nr_entries) {
- /* Protect flush points */
- smp_store_release(&rb->flush_point, EMPTY_ENTRY);
- }
- }
-
- sync = pblk_rb_ptr_wrap(rb, sync, nr_entries);
-
- /* Protect from counts */
- smp_store_release(&rb->sync, sync);
-
- return sync;
-}
-
-/* Calculate how many sectors to submit up to the current flush point. */
-unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb)
-{
- unsigned int subm, sync, flush_point;
- unsigned int submitted, to_flush;
-
- /* Protect flush points */
- flush_point = smp_load_acquire(&rb->flush_point);
- if (flush_point == EMPTY_ENTRY)
- return 0;
-
- /* Protect syncs */
- sync = smp_load_acquire(&rb->sync);
-
- subm = READ_ONCE(rb->subm);
- submitted = pblk_rb_ring_count(subm, sync, rb->nr_entries);
-
- /* The sync point itself counts as a sector to sync */
- to_flush = pblk_rb_ring_count(flush_point, sync, rb->nr_entries) + 1;
-
- return (submitted < to_flush) ? (to_flush - submitted) : 0;
-}
-
-int pblk_rb_tear_down_check(struct pblk_rb *rb)
-{
- struct pblk_rb_entry *entry;
- int i;
- int ret = 0;
-
- spin_lock(&rb->w_lock);
- spin_lock_irq(&rb->s_lock);
-
- if ((rb->mem == rb->subm) && (rb->subm == rb->sync) &&
- (rb->sync == rb->l2p_update) &&
- (rb->flush_point == EMPTY_ENTRY)) {
- goto out;
- }
-
- if (!rb->entries) {
- ret = 1;
- goto out;
- }
-
- for (i = 0; i < rb->nr_entries; i++) {
- entry = &rb->entries[i];
-
- if (!entry->data) {
- ret = 1;
- goto out;
- }
- }
-
-out:
- spin_unlock_irq(&rb->s_lock);
- spin_unlock(&rb->w_lock);
-
- return ret;
-}
-
-unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos)
-{
- return (pos & (rb->nr_entries - 1));
-}
-
-int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos)
-{
- return (pos >= rb->nr_entries);
-}
-
-ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- struct pblk_c_ctx *c;
- ssize_t offset;
- int queued_entries = 0;
-
- spin_lock_irq(&rb->s_lock);
- list_for_each_entry(c, &pblk->compl_list, list)
- queued_entries++;
- spin_unlock_irq(&rb->s_lock);
-
- if (rb->flush_point != EMPTY_ENTRY)
- offset = scnprintf(buf, PAGE_SIZE,
- "%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n",
- rb->nr_entries,
- rb->mem,
- rb->subm,
- rb->sync,
- rb->l2p_update,
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_read(&rb->inflight_flush_point),
-#else
- 0,
-#endif
- rb->flush_point,
- pblk_rb_read_count(rb),
- pblk_rb_space(rb),
- pblk_rb_flush_point_count(rb),
- queued_entries);
- else
- offset = scnprintf(buf, PAGE_SIZE,
- "%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n",
- rb->nr_entries,
- rb->mem,
- rb->subm,
- rb->sync,
- rb->l2p_update,
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_read(&rb->inflight_flush_point),
-#else
- 0,
-#endif
- pblk_rb_read_count(rb),
- pblk_rb_space(rb),
- pblk_rb_flush_point_count(rb),
- queued_entries);
-
- return offset;
-}
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
deleted file mode 100644
index c28537a489bc..000000000000
--- a/drivers/lightnvm/pblk-read.c
+++ /dev/null
@@ -1,474 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-read.c - pblk's read path
- */
-
-#include "pblk.h"
-
-/*
- * There is no guarantee that the value read from cache has not been updated and
- * resides at another location in the cache. We guarantee though that if the
- * value is read from the cache, it belongs to the mapped lba. In order to
- * guarantee and order between writes and reads are ordered, a flush must be
- * issued.
- */
-static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
- sector_t lba, struct ppa_addr ppa)
-{
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Callers must ensure that the ppa points to a cache address */
- BUG_ON(pblk_ppa_empty(ppa));
- BUG_ON(!pblk_addr_in_cache(ppa));
-#endif
-
- return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa);
-}
-
-static int pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
- struct bio *bio, sector_t blba,
- bool *from_cache)
-{
- void *meta_list = rqd->meta_list;
- int nr_secs, i;
-
-retry:
- nr_secs = pblk_lookup_l2p_seq(pblk, rqd->ppa_list, blba, rqd->nr_ppas,
- from_cache);
-
- if (!*from_cache)
- goto end;
-
- for (i = 0; i < nr_secs; i++) {
- struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
- sector_t lba = blba + i;
-
- if (pblk_ppa_empty(rqd->ppa_list[i])) {
- __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
-
- meta->lba = addr_empty;
- } else if (pblk_addr_in_cache(rqd->ppa_list[i])) {
- /*
- * Try to read from write buffer. The address is later
- * checked on the write buffer to prevent retrieving
- * overwritten data.
- */
- if (!pblk_read_from_cache(pblk, bio, lba,
- rqd->ppa_list[i])) {
- if (i == 0) {
- /*
- * We didn't call with bio_advance()
- * yet, so we can just retry.
- */
- goto retry;
- } else {
- /*
- * We already call bio_advance()
- * so we cannot retry and we need
- * to quit that function in order
- * to allow caller to handle the bio
- * splitting in the current sector
- * position.
- */
- nr_secs = i;
- goto end;
- }
- }
- meta->lba = cpu_to_le64(lba);
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_inc(&pblk->cache_reads);
-#endif
- }
- bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
- }
-
-end:
- if (pblk_io_aligned(pblk, nr_secs))
- rqd->is_seq = 1;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(nr_secs, &pblk->inflight_reads);
-#endif
-
- return nr_secs;
-}
-
-
-static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
- sector_t blba)
-{
- void *meta_list = rqd->meta_list;
- int nr_lbas = rqd->nr_ppas;
- int i;
-
- if (!pblk_is_oob_meta_supported(pblk))
- return;
-
- for (i = 0; i < nr_lbas; i++) {
- struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
- u64 lba = le64_to_cpu(meta->lba);
-
- if (lba == ADDR_EMPTY)
- continue;
-
- if (lba != blba + i) {
-#ifdef CONFIG_NVM_PBLK_DEBUG
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
-
- print_ppa(pblk, &ppa_list[i], "seq", i);
-#endif
- pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n",
- lba, (u64)blba + i);
- WARN_ON(1);
- }
- }
-}
-
-/*
- * There can be holes in the lba list.
- */
-static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
- u64 *lba_list, int nr_lbas)
-{
- void *meta_lba_list = rqd->meta_list;
- int i, j;
-
- if (!pblk_is_oob_meta_supported(pblk))
- return;
-
- for (i = 0, j = 0; i < nr_lbas; i++) {
- struct pblk_sec_meta *meta = pblk_get_meta(pblk,
- meta_lba_list, j);
- u64 lba = lba_list[i];
- u64 meta_lba;
-
- if (lba == ADDR_EMPTY)
- continue;
-
- meta_lba = le64_to_cpu(meta->lba);
-
- if (lba != meta_lba) {
-#ifdef CONFIG_NVM_PBLK_DEBUG
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
-
- print_ppa(pblk, &ppa_list[j], "rnd", j);
-#endif
- pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n",
- meta_lba, lba);
- WARN_ON(1);
- }
-
- j++;
- }
-
- WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n");
-}
-
-static void pblk_end_user_read(struct bio *bio, int error)
-{
- if (error && error != NVM_RSP_WARN_HIGHECC)
- bio_io_error(bio);
- else
- bio_endio(bio);
-}
-
-static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
- bool put_line)
-{
- struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
- struct bio *int_bio = rqd->bio;
- unsigned long start_time = r_ctx->start_time;
-
- bio_end_io_acct(int_bio, start_time);
-
- if (rqd->error)
- pblk_log_read_err(pblk, rqd);
-
- pblk_read_check_seq(pblk, rqd, r_ctx->lba);
- bio_put(int_bio);
-
- if (put_line)
- pblk_rq_to_line_put(pblk, rqd);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(rqd->nr_ppas, &pblk->sync_reads);
- atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads);
-#endif
-
- pblk_free_rqd(pblk, rqd, PBLK_READ);
- atomic_dec(&pblk->inflight_io);
-}
-
-static void pblk_end_io_read(struct nvm_rq *rqd)
-{
- struct pblk *pblk = rqd->private;
- struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
- struct bio *bio = (struct bio *)r_ctx->private;
-
- pblk_end_user_read(bio, rqd->error);
- __pblk_end_io_read(pblk, rqd, true);
-}
-
-static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
- sector_t lba, bool *from_cache)
-{
- struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0);
- struct ppa_addr ppa;
-
- pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_inc(&pblk->inflight_reads);
-#endif
-
-retry:
- if (pblk_ppa_empty(ppa)) {
- __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
-
- meta->lba = addr_empty;
- return;
- }
-
- /* Try to read from write buffer. The address is later checked on the
- * write buffer to prevent retrieving overwritten data.
- */
- if (pblk_addr_in_cache(ppa)) {
- if (!pblk_read_from_cache(pblk, bio, lba, ppa)) {
- pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
- goto retry;
- }
-
- meta->lba = cpu_to_le64(lba);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_inc(&pblk->cache_reads);
-#endif
- } else {
- rqd->ppa_addr = ppa;
- }
-}
-
-void pblk_submit_read(struct pblk *pblk, struct bio *bio)
-{
- sector_t blba = pblk_get_lba(bio);
- unsigned int nr_secs = pblk_get_secs(bio);
- bool from_cache;
- struct pblk_g_ctx *r_ctx;
- struct nvm_rq *rqd;
- struct bio *int_bio, *split_bio;
- unsigned long start_time;
-
- start_time = bio_start_io_acct(bio);
-
- rqd = pblk_alloc_rqd(pblk, PBLK_READ);
-
- rqd->opcode = NVM_OP_PREAD;
- rqd->nr_ppas = nr_secs;
- rqd->private = pblk;
- rqd->end_io = pblk_end_io_read;
-
- r_ctx = nvm_rq_to_pdu(rqd);
- r_ctx->start_time = start_time;
- r_ctx->lba = blba;
-
- if (pblk_alloc_rqd_meta(pblk, rqd)) {
- bio_io_error(bio);
- pblk_free_rqd(pblk, rqd, PBLK_READ);
- return;
- }
-
- /* Clone read bio to deal internally with:
- * -read errors when reading from drive
- * -bio_advance() calls during cache reads
- */
- int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
-
- if (nr_secs > 1)
- nr_secs = pblk_read_ppalist_rq(pblk, rqd, int_bio, blba,
- &from_cache);
- else
- pblk_read_rq(pblk, rqd, int_bio, blba, &from_cache);
-
-split_retry:
- r_ctx->private = bio; /* original bio */
- rqd->bio = int_bio; /* internal bio */
-
- if (from_cache && nr_secs == rqd->nr_ppas) {
- /* All data was read from cache, we can complete the IO. */
- pblk_end_user_read(bio, 0);
- atomic_inc(&pblk->inflight_io);
- __pblk_end_io_read(pblk, rqd, false);
- } else if (nr_secs != rqd->nr_ppas) {
- /* The read bio request could be partially filled by the write
- * buffer, but there are some holes that need to be read from
- * the drive. In order to handle this, we will use block layer
- * mechanism to split this request in to smaller ones and make
- * a chain of it.
- */
- split_bio = bio_split(bio, nr_secs * NR_PHY_IN_LOG, GFP_KERNEL,
- &pblk_bio_set);
- bio_chain(split_bio, bio);
- submit_bio_noacct(bio);
-
- /* New bio contains first N sectors of the previous one, so
- * we can continue to use existing rqd, but we need to shrink
- * the number of PPAs in it. New bio is also guaranteed that
- * it contains only either data from cache or from drive, newer
- * mix of them.
- */
- bio = split_bio;
- rqd->nr_ppas = nr_secs;
- if (rqd->nr_ppas == 1)
- rqd->ppa_addr = rqd->ppa_list[0];
-
- /* Recreate int_bio - existing might have some needed internal
- * fields modified already.
- */
- bio_put(int_bio);
- int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
- goto split_retry;
- } else if (pblk_submit_io(pblk, rqd, NULL)) {
- /* Submitting IO to drive failed, let's report an error */
- rqd->error = -ENODEV;
- pblk_end_io_read(rqd);
- }
-}
-
-static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_line *line, u64 *lba_list,
- u64 *paddr_list_gc, unsigned int nr_secs)
-{
- struct ppa_addr ppa_list_l2p[NVM_MAX_VLBA];
- struct ppa_addr ppa_gc;
- int valid_secs = 0;
- int i;
-
- pblk_lookup_l2p_rand(pblk, ppa_list_l2p, lba_list, nr_secs);
-
- for (i = 0; i < nr_secs; i++) {
- if (lba_list[i] == ADDR_EMPTY)
- continue;
-
- ppa_gc = addr_to_gen_ppa(pblk, paddr_list_gc[i], line->id);
- if (!pblk_ppa_comp(ppa_list_l2p[i], ppa_gc)) {
- paddr_list_gc[i] = lba_list[i] = ADDR_EMPTY;
- continue;
- }
-
- rqd->ppa_list[valid_secs++] = ppa_list_l2p[i];
- }
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(valid_secs, &pblk->inflight_reads);
-#endif
-
- return valid_secs;
-}
-
-static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_line *line, sector_t lba,
- u64 paddr_gc)
-{
- struct ppa_addr ppa_l2p, ppa_gc;
- int valid_secs = 0;
-
- if (lba == ADDR_EMPTY)
- goto out;
-
- /* logic error: lba out-of-bounds */
- if (lba >= pblk->capacity) {
- WARN(1, "pblk: read lba out of bounds\n");
- goto out;
- }
-
- spin_lock(&pblk->trans_lock);
- ppa_l2p = pblk_trans_map_get(pblk, lba);
- spin_unlock(&pblk->trans_lock);
-
- ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, line->id);
- if (!pblk_ppa_comp(ppa_l2p, ppa_gc))
- goto out;
-
- rqd->ppa_addr = ppa_l2p;
- valid_secs = 1;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_inc(&pblk->inflight_reads);
-#endif
-
-out:
- return valid_secs;
-}
-
-int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
-{
- struct nvm_rq rqd;
- int ret = NVM_IO_OK;
-
- memset(&rqd, 0, sizeof(struct nvm_rq));
-
- ret = pblk_alloc_rqd_meta(pblk, &rqd);
- if (ret)
- return ret;
-
- if (gc_rq->nr_secs > 1) {
- gc_rq->secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, gc_rq->line,
- gc_rq->lba_list,
- gc_rq->paddr_list,
- gc_rq->nr_secs);
- if (gc_rq->secs_to_gc == 1)
- rqd.ppa_addr = rqd.ppa_list[0];
- } else {
- gc_rq->secs_to_gc = read_rq_gc(pblk, &rqd, gc_rq->line,
- gc_rq->lba_list[0],
- gc_rq->paddr_list[0]);
- }
-
- if (!(gc_rq->secs_to_gc))
- goto out;
-
- rqd.opcode = NVM_OP_PREAD;
- rqd.nr_ppas = gc_rq->secs_to_gc;
-
- if (pblk_submit_io_sync(pblk, &rqd, gc_rq->data)) {
- ret = -EIO;
- goto err_free_dma;
- }
-
- pblk_read_check_rand(pblk, &rqd, gc_rq->lba_list, gc_rq->nr_secs);
-
- atomic_dec(&pblk->inflight_io);
-
- if (rqd.error) {
- atomic_long_inc(&pblk->read_failed_gc);
-#ifdef CONFIG_NVM_PBLK_DEBUG
- pblk_print_failed_rqd(pblk, &rqd, rqd.error);
-#endif
- }
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(gc_rq->secs_to_gc, &pblk->sync_reads);
- atomic_long_add(gc_rq->secs_to_gc, &pblk->recov_gc_reads);
- atomic_long_sub(gc_rq->secs_to_gc, &pblk->inflight_reads);
-#endif
-
-out:
- pblk_free_rqd_meta(pblk, &rqd);
- return ret;
-
-err_free_dma:
- pblk_free_rqd_meta(pblk, &rqd);
- return ret;
-}
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
deleted file mode 100644
index 0e6f0c76e930..000000000000
--- a/drivers/lightnvm/pblk-recovery.c
+++ /dev/null
@@ -1,874 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial: Javier Gonzalez <javier@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-recovery.c - pblk's recovery path
- *
- * The L2P recovery path is single threaded as the L2P table is updated in order
- * following the line sequence ID.
- */
-
-#include "pblk.h"
-#include "pblk-trace.h"
-
-int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf)
-{
- u32 crc;
-
- crc = pblk_calc_emeta_crc(pblk, emeta_buf);
- if (le32_to_cpu(emeta_buf->crc) != crc)
- return 1;
-
- if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
- return 1;
-
- return 0;
-}
-
-static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_emeta *emeta = line->emeta;
- struct line_emeta *emeta_buf = emeta->buf;
- __le64 *lba_list;
- u64 data_start, data_end;
- u64 nr_valid_lbas, nr_lbas = 0;
- u64 i;
-
- lba_list = emeta_to_lbas(pblk, emeta_buf);
- if (!lba_list)
- return 1;
-
- data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
- data_end = line->emeta_ssec;
- nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
-
- for (i = data_start; i < data_end; i++) {
- struct ppa_addr ppa;
- int pos;
-
- ppa = addr_to_gen_ppa(pblk, i, line->id);
- pos = pblk_ppa_to_pos(geo, ppa);
-
- /* Do not update bad blocks */
- if (test_bit(pos, line->blk_bitmap))
- continue;
-
- if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) {
- spin_lock(&line->lock);
- if (test_and_set_bit(i, line->invalid_bitmap))
- WARN_ONCE(1, "pblk: rec. double invalidate:\n");
- else
- le32_add_cpu(line->vsc, -1);
- spin_unlock(&line->lock);
-
- continue;
- }
-
- pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa);
- nr_lbas++;
- }
-
- if (nr_valid_lbas != nr_lbas)
- pblk_err(pblk, "line %d - inconsistent lba list(%llu/%llu)\n",
- line->id, nr_valid_lbas, nr_lbas);
-
- line->left_msecs = 0;
-
- return 0;
-}
-
-static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line,
- u64 written_secs)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- int i;
-
- for (i = 0; i < written_secs; i += pblk->min_write_pgs)
- __pblk_alloc_page(pblk, line, pblk->min_write_pgs);
-
- spin_lock(&l_mg->free_lock);
- if (written_secs > line->left_msecs) {
- /*
- * We have all data sectors written
- * and some emeta sectors written too.
- */
- line->left_msecs = 0;
- } else {
- /* We have only some data sectors written. */
- line->left_msecs -= written_secs;
- }
- spin_unlock(&l_mg->free_lock);
-}
-
-static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
- u64 written_secs = 0;
- int valid_chunks = 0;
- int i;
-
- for (i = 0; i < lm->blk_per_line; i++) {
- struct nvm_chk_meta *chunk = &line->chks[i];
-
- if (chunk->state & NVM_CHK_ST_OFFLINE)
- continue;
-
- written_secs += chunk->wp;
- valid_chunks++;
- }
-
- if (lm->blk_per_line - nr_bb != valid_chunks)
- pblk_err(pblk, "recovery line %d is bad\n", line->id);
-
- pblk_update_line_wp(pblk, line, written_secs - lm->smeta_sec);
-
- return written_secs;
-}
-
-struct pblk_recov_alloc {
- struct ppa_addr *ppa_list;
- void *meta_list;
- struct nvm_rq *rqd;
- void *data;
- dma_addr_t dma_ppa_list;
- dma_addr_t dma_meta_list;
-};
-
-static void pblk_recov_complete(struct kref *ref)
-{
- struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
-
- complete(&pad_rq->wait);
-}
-
-static void pblk_end_io_recov(struct nvm_rq *rqd)
-{
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
- struct pblk_pad_rq *pad_rq = rqd->private;
- struct pblk *pblk = pad_rq->pblk;
-
- pblk_up_chunk(pblk, ppa_list[0]);
-
- pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
-
- atomic_dec(&pblk->inflight_io);
- kref_put(&pad_rq->ref, pblk_recov_complete);
-}
-
-/* pad line using line bitmap. */
-static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
- int left_ppas)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- void *meta_list;
- struct pblk_pad_rq *pad_rq;
- struct nvm_rq *rqd;
- struct ppa_addr *ppa_list;
- void *data;
- __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
- u64 w_ptr = line->cur_sec;
- int left_line_ppas, rq_ppas;
- int i, j;
- int ret = 0;
-
- spin_lock(&line->lock);
- left_line_ppas = line->left_msecs;
- spin_unlock(&line->lock);
-
- pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
- if (!pad_rq)
- return -ENOMEM;
-
- data = vzalloc(array_size(pblk->max_write_pgs, geo->csecs));
- if (!data) {
- ret = -ENOMEM;
- goto free_rq;
- }
-
- pad_rq->pblk = pblk;
- init_completion(&pad_rq->wait);
- kref_init(&pad_rq->ref);
-
-next_pad_rq:
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
- if (rq_ppas < pblk->min_write_pgs) {
- pblk_err(pblk, "corrupted pad line %d\n", line->id);
- goto fail_complete;
- }
-
- rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
-
- ret = pblk_alloc_rqd_meta(pblk, rqd);
- if (ret) {
- pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
- goto fail_complete;
- }
-
- rqd->bio = NULL;
- rqd->opcode = NVM_OP_PWRITE;
- rqd->is_seq = 1;
- rqd->nr_ppas = rq_ppas;
- rqd->end_io = pblk_end_io_recov;
- rqd->private = pad_rq;
-
- ppa_list = nvm_rq_to_ppa_list(rqd);
- meta_list = rqd->meta_list;
-
- for (i = 0; i < rqd->nr_ppas; ) {
- struct ppa_addr ppa;
- int pos;
-
- w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
- ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
- pos = pblk_ppa_to_pos(geo, ppa);
-
- while (test_bit(pos, line->blk_bitmap)) {
- w_ptr += pblk->min_write_pgs;
- ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
- pos = pblk_ppa_to_pos(geo, ppa);
- }
-
- for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
- struct ppa_addr dev_ppa;
- struct pblk_sec_meta *meta;
- __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
-
- dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
-
- pblk_map_invalidate(pblk, dev_ppa);
- lba_list[w_ptr] = addr_empty;
- meta = pblk_get_meta(pblk, meta_list, i);
- meta->lba = addr_empty;
- ppa_list[i] = dev_ppa;
- }
- }
-
- kref_get(&pad_rq->ref);
- pblk_down_chunk(pblk, ppa_list[0]);
-
- ret = pblk_submit_io(pblk, rqd, data);
- if (ret) {
- pblk_err(pblk, "I/O submission failed: %d\n", ret);
- pblk_up_chunk(pblk, ppa_list[0]);
- kref_put(&pad_rq->ref, pblk_recov_complete);
- pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
- goto fail_complete;
- }
-
- left_line_ppas -= rq_ppas;
- left_ppas -= rq_ppas;
- if (left_ppas && left_line_ppas)
- goto next_pad_rq;
-
-fail_complete:
- kref_put(&pad_rq->ref, pblk_recov_complete);
- wait_for_completion(&pad_rq->wait);
-
- if (!pblk_line_is_full(line))
- pblk_err(pblk, "corrupted padded line: %d\n", line->id);
-
- vfree(data);
-free_rq:
- kfree(pad_rq);
- return ret;
-}
-
-static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int distance = geo->mw_cunits * geo->all_luns * geo->ws_opt;
-
- return (distance > line->left_msecs) ? line->left_msecs : distance;
-}
-
-/* Return a chunk belonging to a line by stripe(write order) index */
-static struct nvm_chk_meta *pblk_get_stripe_chunk(struct pblk *pblk,
- struct pblk_line *line,
- int index)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_lun *rlun;
- struct ppa_addr ppa;
- int pos;
-
- rlun = &pblk->luns[index];
- ppa = rlun->bppa;
- pos = pblk_ppa_to_pos(geo, ppa);
-
- return &line->chks[pos];
-}
-
-static int pblk_line_wps_are_unbalanced(struct pblk *pblk,
- struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- int blk_in_line = lm->blk_per_line;
- struct nvm_chk_meta *chunk;
- u64 max_wp, min_wp;
- int i;
-
- i = find_first_zero_bit(line->blk_bitmap, blk_in_line);
-
- /* If there is one or zero good chunks in the line,
- * the write pointers can't be unbalanced.
- */
- if (i >= (blk_in_line - 1))
- return 0;
-
- chunk = pblk_get_stripe_chunk(pblk, line, i);
- max_wp = chunk->wp;
- if (max_wp > pblk->max_write_pgs)
- min_wp = max_wp - pblk->max_write_pgs;
- else
- min_wp = 0;
-
- i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
- while (i < blk_in_line) {
- chunk = pblk_get_stripe_chunk(pblk, line, i);
- if (chunk->wp > max_wp || chunk->wp < min_wp)
- return 1;
-
- i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
- }
-
- return 0;
-}
-
-static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
- struct pblk_recov_alloc p)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct pblk_line_meta *lm = &pblk->lm;
- struct nvm_geo *geo = &dev->geo;
- struct ppa_addr *ppa_list;
- void *meta_list;
- struct nvm_rq *rqd;
- void *data;
- dma_addr_t dma_ppa_list, dma_meta_list;
- __le64 *lba_list;
- u64 paddr = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
- bool padded = false;
- int rq_ppas;
- int i, j;
- int ret;
- u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec;
-
- if (pblk_line_wps_are_unbalanced(pblk, line))
- pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
-
- ppa_list = p.ppa_list;
- meta_list = p.meta_list;
- rqd = p.rqd;
- data = p.data;
- dma_ppa_list = p.dma_ppa_list;
- dma_meta_list = p.dma_meta_list;
-
- lba_list = emeta_to_lbas(pblk, line->emeta->buf);
-
-next_rq:
- memset(rqd, 0, pblk_g_rq_size);
-
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
- if (!rq_ppas)
- rq_ppas = pblk->min_write_pgs;
-
-retry_rq:
- rqd->bio = NULL;
- rqd->opcode = NVM_OP_PREAD;
- rqd->meta_list = meta_list;
- rqd->nr_ppas = rq_ppas;
- rqd->ppa_list = ppa_list;
- rqd->dma_ppa_list = dma_ppa_list;
- rqd->dma_meta_list = dma_meta_list;
- ppa_list = nvm_rq_to_ppa_list(rqd);
-
- if (pblk_io_aligned(pblk, rq_ppas))
- rqd->is_seq = 1;
-
- for (i = 0; i < rqd->nr_ppas; ) {
- struct ppa_addr ppa;
- int pos;
-
- ppa = addr_to_gen_ppa(pblk, paddr, line->id);
- pos = pblk_ppa_to_pos(geo, ppa);
-
- while (test_bit(pos, line->blk_bitmap)) {
- paddr += pblk->min_write_pgs;
- ppa = addr_to_gen_ppa(pblk, paddr, line->id);
- pos = pblk_ppa_to_pos(geo, ppa);
- }
-
- for (j = 0; j < pblk->min_write_pgs; j++, i++)
- ppa_list[i] =
- addr_to_gen_ppa(pblk, paddr + j, line->id);
- }
-
- ret = pblk_submit_io_sync(pblk, rqd, data);
- if (ret) {
- pblk_err(pblk, "I/O submission failed: %d\n", ret);
- return ret;
- }
-
- atomic_dec(&pblk->inflight_io);
-
- /* If a read fails, do a best effort by padding the line and retrying */
- if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) {
- int pad_distance, ret;
-
- if (padded) {
- pblk_log_read_err(pblk, rqd);
- return -EINTR;
- }
-
- pad_distance = pblk_pad_distance(pblk, line);
- ret = pblk_recov_pad_line(pblk, line, pad_distance);
- if (ret) {
- return ret;
- }
-
- padded = true;
- goto retry_rq;
- }
-
- pblk_get_packed_meta(pblk, rqd);
-
- for (i = 0; i < rqd->nr_ppas; i++) {
- struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
- u64 lba = le64_to_cpu(meta->lba);
-
- lba_list[paddr++] = cpu_to_le64(lba);
-
- if (lba == ADDR_EMPTY || lba >= pblk->capacity)
- continue;
-
- line->nr_valid_lbas++;
- pblk_update_map(pblk, lba, ppa_list[i]);
- }
-
- left_ppas -= rq_ppas;
- if (left_ppas > 0)
- goto next_rq;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- WARN_ON(padded && !pblk_line_is_full(line));
-#endif
-
- return 0;
-}
-
-/* Scan line for lbas on out of bound area */
-static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct nvm_rq *rqd;
- struct ppa_addr *ppa_list;
- void *meta_list;
- struct pblk_recov_alloc p;
- void *data;
- dma_addr_t dma_ppa_list, dma_meta_list;
- int ret = 0;
-
- meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
- if (!meta_list)
- return -ENOMEM;
-
- ppa_list = (void *)(meta_list) + pblk_dma_meta_size(pblk);
- dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
-
- data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
- if (!data) {
- ret = -ENOMEM;
- goto free_meta_list;
- }
-
- rqd = mempool_alloc(&pblk->r_rq_pool, GFP_KERNEL);
- memset(rqd, 0, pblk_g_rq_size);
-
- p.ppa_list = ppa_list;
- p.meta_list = meta_list;
- p.rqd = rqd;
- p.data = data;
- p.dma_ppa_list = dma_ppa_list;
- p.dma_meta_list = dma_meta_list;
-
- ret = pblk_recov_scan_oob(pblk, line, p);
- if (ret) {
- pblk_err(pblk, "could not recover L2P form OOB\n");
- goto out;
- }
-
- if (pblk_line_is_full(line))
- pblk_line_recov_close(pblk, line);
-
-out:
- mempool_free(rqd, &pblk->r_rq_pool);
- kfree(data);
-free_meta_list:
- nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
-
- return ret;
-}
-
-/* Insert lines ordered by sequence number (seq_num) on list */
-static void pblk_recov_line_add_ordered(struct list_head *head,
- struct pblk_line *line)
-{
- struct pblk_line *t = NULL;
-
- list_for_each_entry(t, head, list)
- if (t->seq_nr > line->seq_nr)
- break;
-
- __list_add(&line->list, t->list.prev, &t->list);
-}
-
-static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- unsigned int emeta_secs;
- u64 emeta_start;
- struct ppa_addr ppa;
- int pos;
-
- emeta_secs = lm->emeta_sec[0];
- emeta_start = lm->sec_per_line;
-
- while (emeta_secs) {
- emeta_start--;
- ppa = addr_to_gen_ppa(pblk, emeta_start, line->id);
- pos = pblk_ppa_to_pos(geo, ppa);
- if (!test_bit(pos, line->blk_bitmap))
- emeta_secs--;
- }
-
- return emeta_start;
-}
-
-static int pblk_recov_check_line_version(struct pblk *pblk,
- struct line_emeta *emeta)
-{
- struct line_header *header = &emeta->header;
-
- if (header->version_major != EMETA_VERSION_MAJOR) {
- pblk_err(pblk, "line major version mismatch: %d, expected: %d\n",
- header->version_major, EMETA_VERSION_MAJOR);
- return 1;
- }
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- if (header->version_minor > EMETA_VERSION_MINOR)
- pblk_info(pblk, "newer line minor version found: %d\n",
- header->version_minor);
-#endif
-
- return 0;
-}
-
-static void pblk_recov_wa_counters(struct pblk *pblk,
- struct line_emeta *emeta)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct line_header *header = &emeta->header;
- struct wa_counters *wa = emeta_to_wa(lm, emeta);
-
- /* WA counters were introduced in emeta version 0.2 */
- if (header->version_major > 0 || header->version_minor >= 2) {
- u64 user = le64_to_cpu(wa->user);
- u64 pad = le64_to_cpu(wa->pad);
- u64 gc = le64_to_cpu(wa->gc);
-
- atomic64_set(&pblk->user_wa, user);
- atomic64_set(&pblk->pad_wa, pad);
- atomic64_set(&pblk->gc_wa, gc);
-
- pblk->user_rst_wa = user;
- pblk->pad_rst_wa = pad;
- pblk->gc_rst_wa = gc;
- }
-}
-
-static int pblk_line_was_written(struct pblk_line *line,
- struct pblk *pblk)
-{
-
- struct pblk_line_meta *lm = &pblk->lm;
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct nvm_chk_meta *chunk;
- struct ppa_addr bppa;
- int smeta_blk;
-
- if (line->state == PBLK_LINESTATE_BAD)
- return 0;
-
- smeta_blk = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
- if (smeta_blk >= lm->blk_per_line)
- return 0;
-
- bppa = pblk->luns[smeta_blk].bppa;
- chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)];
-
- if (chunk->state & NVM_CHK_ST_CLOSED ||
- (chunk->state & NVM_CHK_ST_OPEN
- && chunk->wp >= lm->smeta_sec))
- return 1;
-
- return 0;
-}
-
-static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- int i;
-
- for (i = 0; i < lm->blk_per_line; i++)
- if (line->chks[i].state & NVM_CHK_ST_OPEN)
- return true;
-
- return false;
-}
-
-struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *line, *tline, *data_line = NULL;
- struct pblk_smeta *smeta;
- struct pblk_emeta *emeta;
- struct line_smeta *smeta_buf;
- int found_lines = 0, recovered_lines = 0, open_lines = 0;
- int is_next = 0;
- int meta_line;
- int i, valid_uuid = 0;
- LIST_HEAD(recov_list);
-
- /* TODO: Implement FTL snapshot */
-
- /* Scan recovery - takes place when FTL snapshot fails */
- spin_lock(&l_mg->free_lock);
- meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- set_bit(meta_line, &l_mg->meta_bitmap);
- smeta = l_mg->sline_meta[meta_line];
- emeta = l_mg->eline_meta[meta_line];
- smeta_buf = (struct line_smeta *)smeta;
- spin_unlock(&l_mg->free_lock);
-
- /* Order data lines using their sequence number */
- for (i = 0; i < l_mg->nr_lines; i++) {
- u32 crc;
-
- line = &pblk->lines[i];
-
- memset(smeta, 0, lm->smeta_len);
- line->smeta = smeta;
- line->lun_bitmap = ((void *)(smeta_buf)) +
- sizeof(struct line_smeta);
-
- if (!pblk_line_was_written(line, pblk))
- continue;
-
- /* Lines that cannot be read are assumed as not written here */
- if (pblk_line_smeta_read(pblk, line))
- continue;
-
- crc = pblk_calc_smeta_crc(pblk, smeta_buf);
- if (le32_to_cpu(smeta_buf->crc) != crc)
- continue;
-
- if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
- continue;
-
- if (smeta_buf->header.version_major != SMETA_VERSION_MAJOR) {
- pblk_err(pblk, "found incompatible line version %u\n",
- smeta_buf->header.version_major);
- return ERR_PTR(-EINVAL);
- }
-
- /* The first valid instance uuid is used for initialization */
- if (!valid_uuid) {
- import_guid(&pblk->instance_uuid, smeta_buf->header.uuid);
- valid_uuid = 1;
- }
-
- if (!guid_equal(&pblk->instance_uuid,
- (guid_t *)&smeta_buf->header.uuid)) {
- pblk_debug(pblk, "ignore line %u due to uuid mismatch\n",
- i);
- continue;
- }
-
- /* Update line metadata */
- spin_lock(&line->lock);
- line->id = le32_to_cpu(smeta_buf->header.id);
- line->type = le16_to_cpu(smeta_buf->header.type);
- line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
- spin_unlock(&line->lock);
-
- /* Update general metadata */
- spin_lock(&l_mg->free_lock);
- if (line->seq_nr >= l_mg->d_seq_nr)
- l_mg->d_seq_nr = line->seq_nr + 1;
- l_mg->nr_free_lines--;
- spin_unlock(&l_mg->free_lock);
-
- if (pblk_line_recov_alloc(pblk, line))
- goto out;
-
- pblk_recov_line_add_ordered(&recov_list, line);
- found_lines++;
- pblk_debug(pblk, "recovering data line %d, seq:%llu\n",
- line->id, smeta_buf->seq_nr);
- }
-
- if (!found_lines) {
- guid_gen(&pblk->instance_uuid);
-
- spin_lock(&l_mg->free_lock);
- WARN_ON_ONCE(!test_and_clear_bit(meta_line,
- &l_mg->meta_bitmap));
- spin_unlock(&l_mg->free_lock);
-
- goto out;
- }
-
- /* Verify closed blocks and recover this portion of L2P table*/
- list_for_each_entry_safe(line, tline, &recov_list, list) {
- recovered_lines++;
-
- line->emeta_ssec = pblk_line_emeta_start(pblk, line);
- line->emeta = emeta;
- memset(line->emeta->buf, 0, lm->emeta_len[0]);
-
- if (pblk_line_is_open(pblk, line)) {
- pblk_recov_l2p_from_oob(pblk, line);
- goto next;
- }
-
- if (pblk_line_emeta_read(pblk, line, line->emeta->buf)) {
- pblk_recov_l2p_from_oob(pblk, line);
- goto next;
- }
-
- if (pblk_recov_check_emeta(pblk, line->emeta->buf)) {
- pblk_recov_l2p_from_oob(pblk, line);
- goto next;
- }
-
- if (pblk_recov_check_line_version(pblk, line->emeta->buf))
- return ERR_PTR(-EINVAL);
-
- pblk_recov_wa_counters(pblk, line->emeta->buf);
-
- if (pblk_recov_l2p_from_emeta(pblk, line))
- pblk_recov_l2p_from_oob(pblk, line);
-
-next:
- if (pblk_line_is_full(line)) {
- struct list_head *move_list;
-
- spin_lock(&line->lock);
- line->state = PBLK_LINESTATE_CLOSED;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- move_list = pblk_line_gc_list(pblk, line);
- spin_unlock(&line->lock);
-
- spin_lock(&l_mg->gc_lock);
- list_move_tail(&line->list, move_list);
- spin_unlock(&l_mg->gc_lock);
-
- mempool_free(line->map_bitmap, l_mg->bitmap_pool);
- line->map_bitmap = NULL;
- line->smeta = NULL;
- line->emeta = NULL;
- } else {
- spin_lock(&line->lock);
- line->state = PBLK_LINESTATE_OPEN;
- spin_unlock(&line->lock);
-
- line->emeta->mem = 0;
- atomic_set(&line->emeta->sync, 0);
-
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
-
- data_line = line;
- line->meta_line = meta_line;
-
- open_lines++;
- }
- }
-
- if (!open_lines) {
- spin_lock(&l_mg->free_lock);
- WARN_ON_ONCE(!test_and_clear_bit(meta_line,
- &l_mg->meta_bitmap));
- spin_unlock(&l_mg->free_lock);
- } else {
- spin_lock(&l_mg->free_lock);
- l_mg->data_line = data_line;
- /* Allocate next line for preparation */
- l_mg->data_next = pblk_line_get(pblk);
- if (l_mg->data_next) {
- l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
- l_mg->data_next->type = PBLK_LINETYPE_DATA;
- is_next = 1;
- }
- spin_unlock(&l_mg->free_lock);
- }
-
- if (is_next)
- pblk_line_erase(pblk, l_mg->data_next);
-
-out:
- if (found_lines != recovered_lines)
- pblk_err(pblk, "failed to recover all found lines %d/%d\n",
- found_lines, recovered_lines);
-
- return data_line;
-}
-
-/*
- * Pad current line
- */
-int pblk_recov_pad(struct pblk *pblk)
-{
- struct pblk_line *line;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- int left_msecs;
- int ret = 0;
-
- spin_lock(&l_mg->free_lock);
- line = l_mg->data_line;
- left_msecs = line->left_msecs;
- spin_unlock(&l_mg->free_lock);
-
- ret = pblk_recov_pad_line(pblk, line, left_msecs);
- if (ret) {
- pblk_err(pblk, "tear down padding failed (%d)\n", ret);
- return ret;
- }
-
- pblk_line_close_meta(pblk, line);
- return ret;
-}
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
deleted file mode 100644
index a5f8bc2defbc..000000000000
--- a/drivers/lightnvm/pblk-rl.c
+++ /dev/null
@@ -1,254 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-rl.c - pblk's rate limiter for user I/O
- *
- */
-
-#include "pblk.h"
-
-static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
-{
- mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
-}
-
-int pblk_rl_is_limit(struct pblk_rl *rl)
-{
- int rb_space;
-
- rb_space = atomic_read(&rl->rb_space);
-
- return (rb_space == 0);
-}
-
-int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
-{
- int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
- int rb_space = atomic_read(&rl->rb_space);
-
- if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0))
- return NVM_IO_ERR;
-
- if (rb_user_cnt >= rl->rb_user_max)
- return NVM_IO_REQUEUE;
-
- return NVM_IO_OK;
-}
-
-void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries)
-{
- int rb_space = atomic_read(&rl->rb_space);
-
- if (unlikely(rb_space >= 0))
- atomic_sub(nr_entries, &rl->rb_space);
-}
-
-int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
-{
- int rb_gc_cnt = atomic_read(&rl->rb_gc_cnt);
- int rb_user_active;
-
- /* If there is no user I/O let GC take over space on the write buffer */
- rb_user_active = READ_ONCE(rl->rb_user_active);
- return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
-}
-
-void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
-{
- atomic_add(nr_entries, &rl->rb_user_cnt);
-
- /* Release user I/O state. Protect from GC */
- smp_store_release(&rl->rb_user_active, 1);
- pblk_rl_kick_u_timer(rl);
-}
-
-void pblk_rl_werr_line_in(struct pblk_rl *rl)
-{
- atomic_inc(&rl->werr_lines);
-}
-
-void pblk_rl_werr_line_out(struct pblk_rl *rl)
-{
- atomic_dec(&rl->werr_lines);
-}
-
-void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries)
-{
- atomic_add(nr_entries, &rl->rb_gc_cnt);
-}
-
-void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc)
-{
- atomic_sub(nr_user, &rl->rb_user_cnt);
- atomic_sub(nr_gc, &rl->rb_gc_cnt);
-}
-
-unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl)
-{
- return atomic_read(&rl->free_blocks);
-}
-
-unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl)
-{
- return atomic_read(&rl->free_user_blocks);
-}
-
-static void __pblk_rl_update_rates(struct pblk_rl *rl,
- unsigned long free_blocks)
-{
- struct pblk *pblk = container_of(rl, struct pblk, rl);
- int max = rl->rb_budget;
- int werr_gc_needed = atomic_read(&rl->werr_lines);
-
- if (free_blocks >= rl->high) {
- if (werr_gc_needed) {
- /* Allocate a small budget for recovering
- * lines with write errors
- */
- rl->rb_gc_max = 1 << rl->rb_windows_pw;
- rl->rb_user_max = max - rl->rb_gc_max;
- rl->rb_state = PBLK_RL_WERR;
- } else {
- rl->rb_user_max = max;
- rl->rb_gc_max = 0;
- rl->rb_state = PBLK_RL_OFF;
- }
- } else if (free_blocks < rl->high) {
- int shift = rl->high_pw - rl->rb_windows_pw;
- int user_windows = free_blocks >> shift;
- int user_max = user_windows << ilog2(NVM_MAX_VLBA);
-
- rl->rb_user_max = user_max;
- rl->rb_gc_max = max - user_max;
-
- if (free_blocks <= rl->rsv_blocks) {
- rl->rb_user_max = 0;
- rl->rb_gc_max = max;
- }
-
- /* In the worst case, we will need to GC lines in the low list
- * (high valid sector count). If there are lines to GC on high
- * or mid lists, these will be prioritized
- */
- rl->rb_state = PBLK_RL_LOW;
- }
-
- if (rl->rb_state != PBLK_RL_OFF)
- pblk_gc_should_start(pblk);
- else
- pblk_gc_should_stop(pblk);
-}
-
-void pblk_rl_update_rates(struct pblk_rl *rl)
-{
- __pblk_rl_update_rates(rl, pblk_rl_nr_user_free_blks(rl));
-}
-
-void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
-{
- int blk_in_line = atomic_read(&line->blk_in_line);
- int free_blocks;
-
- atomic_add(blk_in_line, &rl->free_blocks);
- free_blocks = atomic_add_return(blk_in_line, &rl->free_user_blocks);
-
- __pblk_rl_update_rates(rl, free_blocks);
-}
-
-void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line,
- bool used)
-{
- int blk_in_line = atomic_read(&line->blk_in_line);
- int free_blocks;
-
- atomic_sub(blk_in_line, &rl->free_blocks);
-
- if (used)
- free_blocks = atomic_sub_return(blk_in_line,
- &rl->free_user_blocks);
- else
- free_blocks = atomic_read(&rl->free_user_blocks);
-
- __pblk_rl_update_rates(rl, free_blocks);
-}
-
-int pblk_rl_high_thrs(struct pblk_rl *rl)
-{
- return rl->high;
-}
-
-int pblk_rl_max_io(struct pblk_rl *rl)
-{
- return rl->rb_max_io;
-}
-
-static void pblk_rl_u_timer(struct timer_list *t)
-{
- struct pblk_rl *rl = from_timer(rl, t, u_timer);
-
- /* Release user I/O state. Protect from GC */
- smp_store_release(&rl->rb_user_active, 0);
-}
-
-void pblk_rl_free(struct pblk_rl *rl)
-{
- del_timer(&rl->u_timer);
-}
-
-void pblk_rl_init(struct pblk_rl *rl, int budget, int threshold)
-{
- struct pblk *pblk = container_of(rl, struct pblk, rl);
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
- int sec_meta, blk_meta;
- unsigned int rb_windows;
-
- /* Consider sectors used for metadata */
- sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
- blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
-
- rl->high = pblk->op_blks - blk_meta - lm->blk_per_line;
- rl->high_pw = get_count_order(rl->high);
-
- rl->rsv_blocks = pblk_get_min_chks(pblk);
-
- /* This will always be a power-of-2 */
- rb_windows = budget / NVM_MAX_VLBA;
- rl->rb_windows_pw = get_count_order(rb_windows);
-
- /* To start with, all buffer is available to user I/O writers */
- rl->rb_budget = budget;
- rl->rb_user_max = budget;
- rl->rb_gc_max = 0;
- rl->rb_state = PBLK_RL_HIGH;
-
- /* Maximize I/O size and ansure that back threshold is respected */
- if (threshold)
- rl->rb_max_io = budget - pblk->min_write_pgs_data - threshold;
- else
- rl->rb_max_io = budget - pblk->min_write_pgs_data - 1;
-
- atomic_set(&rl->rb_user_cnt, 0);
- atomic_set(&rl->rb_gc_cnt, 0);
- atomic_set(&rl->rb_space, -1);
- atomic_set(&rl->werr_lines, 0);
-
- timer_setup(&rl->u_timer, pblk_rl_u_timer, 0);
-
- rl->rb_user_active = 0;
- rl->rb_gc_active = 0;
-}
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
deleted file mode 100644
index 6387302b03f2..000000000000
--- a/drivers/lightnvm/pblk-sysfs.c
+++ /dev/null
@@ -1,728 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Implementation of a physical block-device target for Open-channel SSDs.
- *
- * pblk-sysfs.c - pblk's sysfs
- *
- */
-
-#include "pblk.h"
-
-static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_lun *rlun;
- ssize_t sz = 0;
- int i;
-
- for (i = 0; i < geo->all_luns; i++) {
- int active = 1;
-
- rlun = &pblk->luns[i];
- if (!down_trylock(&rlun->wr_sem)) {
- active = 0;
- up(&rlun->wr_sem);
- }
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "pblk: pos:%d, ch:%d, lun:%d - %d\n",
- i,
- rlun->bppa.a.ch,
- rlun->bppa.a.lun,
- active);
- }
-
- return sz;
-}
-
-static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
-{
- int free_blocks, free_user_blocks, total_blocks;
- int rb_user_max, rb_user_cnt;
- int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
-
- free_blocks = pblk_rl_nr_free_blks(&pblk->rl);
- free_user_blocks = pblk_rl_nr_user_free_blks(&pblk->rl);
- rb_user_max = pblk->rl.rb_user_max;
- rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
- rb_gc_max = pblk->rl.rb_gc_max;
- rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
- rb_budget = pblk->rl.rb_budget;
- rb_state = pblk->rl.rb_state;
-
- total_blocks = pblk->rl.total_blocks;
-
- return snprintf(page, PAGE_SIZE,
- "u:%u/%u,gc:%u/%u(%u)(stop:<%u,full:>%u,free:%d/%d/%d)-%d\n",
- rb_user_cnt,
- rb_user_max,
- rb_gc_cnt,
- rb_gc_max,
- rb_state,
- rb_budget,
- pblk->rl.high,
- free_blocks,
- free_user_blocks,
- total_blocks,
- READ_ONCE(pblk->rl.rb_user_active));
-}
-
-static ssize_t pblk_sysfs_gc_state_show(struct pblk *pblk, char *page)
-{
- int gc_enabled, gc_active;
-
- pblk_gc_sysfs_state_show(pblk, &gc_enabled, &gc_active);
- return snprintf(page, PAGE_SIZE, "gc_enabled=%d, gc_active=%d\n",
- gc_enabled, gc_active);
-}
-
-static ssize_t pblk_sysfs_stats(struct pblk *pblk, char *page)
-{
- ssize_t sz;
-
- sz = snprintf(page, PAGE_SIZE,
- "read_failed=%lu, read_high_ecc=%lu, read_empty=%lu, read_failed_gc=%lu, write_failed=%lu, erase_failed=%lu\n",
- atomic_long_read(&pblk->read_failed),
- atomic_long_read(&pblk->read_high_ecc),
- atomic_long_read(&pblk->read_empty),
- atomic_long_read(&pblk->read_failed_gc),
- atomic_long_read(&pblk->write_failed),
- atomic_long_read(&pblk->erase_failed));
-
- return sz;
-}
-
-static ssize_t pblk_sysfs_write_buffer(struct pblk *pblk, char *page)
-{
- return pblk_rb_sysfs(&pblk->rwb, page);
-}
-
-static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- ssize_t sz = 0;
-
- if (geo->version == NVM_OCSSD_SPEC_12) {
- struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
- struct nvm_addrf_12 *gppaf = (struct nvm_addrf_12 *)&geo->addrf;
-
- sz = scnprintf(page, PAGE_SIZE,
- "g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
- pblk->addrf_len,
- ppaf->blk_offset, ppaf->blk_len,
- ppaf->pg_offset, ppaf->pg_len,
- ppaf->lun_offset, ppaf->lun_len,
- ppaf->ch_offset, ppaf->ch_len,
- ppaf->pln_offset, ppaf->pln_len,
- ppaf->sec_offset, ppaf->sec_len);
-
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
- gppaf->blk_offset, gppaf->blk_len,
- gppaf->pg_offset, gppaf->pg_len,
- gppaf->lun_offset, gppaf->lun_len,
- gppaf->ch_offset, gppaf->ch_len,
- gppaf->pln_offset, gppaf->pln_len,
- gppaf->sec_offset, gppaf->sec_len);
- } else {
- struct nvm_addrf *ppaf = &pblk->addrf;
- struct nvm_addrf *gppaf = &geo->addrf;
-
- sz = scnprintf(page, PAGE_SIZE,
- "pblk:(s:%d)ch:%d/%d,lun:%d/%d,chk:%d/%d/sec:%d/%d\n",
- pblk->addrf_len,
- ppaf->ch_offset, ppaf->ch_len,
- ppaf->lun_offset, ppaf->lun_len,
- ppaf->chk_offset, ppaf->chk_len,
- ppaf->sec_offset, ppaf->sec_len);
-
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "device:ch:%d/%d,lun:%d/%d,chk:%d/%d,sec:%d/%d\n",
- gppaf->ch_offset, gppaf->ch_len,
- gppaf->lun_offset, gppaf->lun_len,
- gppaf->chk_offset, gppaf->chk_len,
- gppaf->sec_offset, gppaf->sec_len);
- }
-
- return sz;
-}
-
-static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *line;
- ssize_t sz = 0;
- int nr_free_lines;
- int cur_data, cur_log;
- int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
- int d_line_cnt = 0, l_line_cnt = 0;
- int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
- int gc_werr = 0;
-
- int bad = 0, cor = 0;
- int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
- int map_weight = 0, meta_weight = 0;
-
- spin_lock(&l_mg->free_lock);
- cur_data = (l_mg->data_line) ? l_mg->data_line->id : -1;
- cur_log = (l_mg->log_line) ? l_mg->log_line->id : -1;
- nr_free_lines = l_mg->nr_free_lines;
-
- list_for_each_entry(line, &l_mg->free_list, list)
- free_line_cnt++;
- spin_unlock(&l_mg->free_lock);
-
- spin_lock(&l_mg->close_lock);
- list_for_each_entry(line, &l_mg->emeta_list, list)
- emeta_line_cnt++;
- spin_unlock(&l_mg->close_lock);
-
- spin_lock(&l_mg->gc_lock);
- list_for_each_entry(line, &l_mg->gc_full_list, list) {
- if (line->type == PBLK_LINETYPE_DATA)
- d_line_cnt++;
- else if (line->type == PBLK_LINETYPE_LOG)
- l_line_cnt++;
- closed_line_cnt++;
- gc_full++;
- }
-
- list_for_each_entry(line, &l_mg->gc_high_list, list) {
- if (line->type == PBLK_LINETYPE_DATA)
- d_line_cnt++;
- else if (line->type == PBLK_LINETYPE_LOG)
- l_line_cnt++;
- closed_line_cnt++;
- gc_high++;
- }
-
- list_for_each_entry(line, &l_mg->gc_mid_list, list) {
- if (line->type == PBLK_LINETYPE_DATA)
- d_line_cnt++;
- else if (line->type == PBLK_LINETYPE_LOG)
- l_line_cnt++;
- closed_line_cnt++;
- gc_mid++;
- }
-
- list_for_each_entry(line, &l_mg->gc_low_list, list) {
- if (line->type == PBLK_LINETYPE_DATA)
- d_line_cnt++;
- else if (line->type == PBLK_LINETYPE_LOG)
- l_line_cnt++;
- closed_line_cnt++;
- gc_low++;
- }
-
- list_for_each_entry(line, &l_mg->gc_empty_list, list) {
- if (line->type == PBLK_LINETYPE_DATA)
- d_line_cnt++;
- else if (line->type == PBLK_LINETYPE_LOG)
- l_line_cnt++;
- closed_line_cnt++;
- gc_empty++;
- }
-
- list_for_each_entry(line, &l_mg->gc_werr_list, list) {
- if (line->type == PBLK_LINETYPE_DATA)
- d_line_cnt++;
- else if (line->type == PBLK_LINETYPE_LOG)
- l_line_cnt++;
- closed_line_cnt++;
- gc_werr++;
- }
-
- list_for_each_entry(line, &l_mg->bad_list, list)
- bad++;
- list_for_each_entry(line, &l_mg->corrupt_list, list)
- cor++;
- spin_unlock(&l_mg->gc_lock);
-
- spin_lock(&l_mg->free_lock);
- if (l_mg->data_line) {
- cur_sec = l_mg->data_line->cur_sec;
- msecs = l_mg->data_line->left_msecs;
- vsc = le32_to_cpu(*l_mg->data_line->vsc);
- sec_in_line = l_mg->data_line->sec_in_line;
- meta_weight = bitmap_weight(&l_mg->meta_bitmap,
- PBLK_DATA_LINES);
-
- spin_lock(&l_mg->data_line->lock);
- if (l_mg->data_line->map_bitmap)
- map_weight = bitmap_weight(l_mg->data_line->map_bitmap,
- lm->sec_per_line);
- else
- map_weight = 0;
- spin_unlock(&l_mg->data_line->lock);
- }
- spin_unlock(&l_mg->free_lock);
-
- if (nr_free_lines != free_line_cnt)
- pblk_err(pblk, "corrupted free line list:%d/%d\n",
- nr_free_lines, free_line_cnt);
-
- sz = scnprintf(page, PAGE_SIZE - sz,
- "line: nluns:%d, nblks:%d, nsecs:%d\n",
- geo->all_luns, lm->blk_per_line, lm->sec_per_line);
-
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
- cur_data, cur_log,
- nr_free_lines,
- emeta_line_cnt, meta_weight,
- closed_line_cnt,
- bad, cor,
- d_line_cnt, l_line_cnt,
- l_mg->nr_lines);
-
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n",
- gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr,
- atomic_read(&pblk->gc.read_inflight_gc));
-
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
- cur_data, cur_sec, msecs, vsc, sec_in_line,
- map_weight, lm->sec_per_line,
- atomic_read(&pblk->inflight_io));
-
- return sz;
-}
-
-static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- ssize_t sz = 0;
-
- sz = scnprintf(page, PAGE_SIZE - sz,
- "smeta - len:%d, secs:%d\n",
- lm->smeta_len, lm->smeta_sec);
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "emeta - len:%d, sec:%d, bb_start:%d\n",
- lm->emeta_len[0], lm->emeta_sec[0],
- lm->emeta_bb);
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "bitmap lengths: sec:%d, blk:%d, lun:%d\n",
- lm->sec_bitmap_len,
- lm->blk_bitmap_len,
- lm->lun_bitmap_len);
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "blk_line:%d, sec_line:%d, sec_blk:%d\n",
- lm->blk_per_line,
- lm->sec_per_line,
- geo->clba);
-
- return sz;
-}
-
-static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
-{
- return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
-}
-
-static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad,
- char *page)
-{
- int sz;
-
- sz = scnprintf(page, PAGE_SIZE,
- "user:%lld gc:%lld pad:%lld WA:",
- user, gc, pad);
-
- if (!user) {
- sz += scnprintf(page + sz, PAGE_SIZE - sz, "NaN\n");
- } else {
- u64 wa_int;
- u32 wa_frac;
-
- wa_int = (user + gc + pad) * 100000;
- wa_int = div64_u64(wa_int, user);
- wa_int = div_u64_rem(wa_int, 100000, &wa_frac);
-
- sz += scnprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n",
- wa_int, wa_frac);
- }
-
- return sz;
-}
-
-static ssize_t pblk_sysfs_get_write_amp_mileage(struct pblk *pblk, char *page)
-{
- return pblk_get_write_amp(atomic64_read(&pblk->user_wa),
- atomic64_read(&pblk->gc_wa), atomic64_read(&pblk->pad_wa),
- page);
-}
-
-static ssize_t pblk_sysfs_get_write_amp_trip(struct pblk *pblk, char *page)
-{
- return pblk_get_write_amp(
- atomic64_read(&pblk->user_wa) - pblk->user_rst_wa,
- atomic64_read(&pblk->gc_wa) - pblk->gc_rst_wa,
- atomic64_read(&pblk->pad_wa) - pblk->pad_rst_wa, page);
-}
-
-static long long bucket_percentage(unsigned long long bucket,
- unsigned long long total)
-{
- int p = bucket * 100;
-
- p = div_u64(p, total);
-
- return p;
-}
-
-static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
-{
- int sz = 0;
- unsigned long long total;
- unsigned long long total_buckets = 0;
- int buckets = pblk->min_write_pgs - 1;
- int i;
-
- total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst;
- if (!total) {
- for (i = 0; i < (buckets + 1); i++)
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "%d:0 ", i);
- sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n");
-
- return sz;
- }
-
- for (i = 0; i < buckets; i++)
- total_buckets += atomic64_read(&pblk->pad_dist[i]);
-
- sz += scnprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ",
- bucket_percentage(total - total_buckets, total));
-
- for (i = 0; i < buckets; i++) {
- unsigned long long p;
-
- p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]),
- total);
- sz += scnprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ",
- i + 1, p);
- }
- sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n");
-
- return sz;
-}
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
-static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
-{
- return snprintf(page, PAGE_SIZE,
- "%lu\t%lu\t%ld\t%llu\t%ld\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
- atomic_long_read(&pblk->inflight_writes),
- atomic_long_read(&pblk->inflight_reads),
- atomic_long_read(&pblk->req_writes),
- (u64)atomic64_read(&pblk->nr_flush),
- atomic_long_read(&pblk->padded_writes),
- atomic_long_read(&pblk->padded_wb),
- atomic_long_read(&pblk->sub_writes),
- atomic_long_read(&pblk->sync_writes),
- atomic_long_read(&pblk->recov_writes),
- atomic_long_read(&pblk->recov_gc_writes),
- atomic_long_read(&pblk->recov_gc_reads),
- atomic_long_read(&pblk->cache_reads),
- atomic_long_read(&pblk->sync_reads));
-}
-#endif
-
-static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
- size_t len)
-{
- size_t c_len;
- int force;
-
- c_len = strcspn(page, "\n");
- if (c_len >= len)
- return -EINVAL;
-
- if (kstrtouint(page, 0, &force))
- return -EINVAL;
-
- pblk_gc_sysfs_force(pblk, force);
-
- return len;
-}
-
-static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
- const char *page, size_t len)
-{
- size_t c_len;
- int sec_per_write;
-
- c_len = strcspn(page, "\n");
- if (c_len >= len)
- return -EINVAL;
-
- if (kstrtouint(page, 0, &sec_per_write))
- return -EINVAL;
-
- if (!pblk_is_oob_meta_supported(pblk)) {
- /* For packed metadata case it is
- * not allowed to change sec_per_write.
- */
- return -EINVAL;
- }
-
- if (sec_per_write < pblk->min_write_pgs
- || sec_per_write > pblk->max_write_pgs
- || sec_per_write % pblk->min_write_pgs != 0)
- return -EINVAL;
-
- pblk_set_sec_per_write(pblk, sec_per_write);
-
- return len;
-}
-
-static ssize_t pblk_sysfs_set_write_amp_trip(struct pblk *pblk,
- const char *page, size_t len)
-{
- size_t c_len;
- int reset_value;
-
- c_len = strcspn(page, "\n");
- if (c_len >= len)
- return -EINVAL;
-
- if (kstrtouint(page, 0, &reset_value))
- return -EINVAL;
-
- if (reset_value != 0)
- return -EINVAL;
-
- pblk->user_rst_wa = atomic64_read(&pblk->user_wa);
- pblk->pad_rst_wa = atomic64_read(&pblk->pad_wa);
- pblk->gc_rst_wa = atomic64_read(&pblk->gc_wa);
-
- return len;
-}
-
-
-static ssize_t pblk_sysfs_set_padding_dist(struct pblk *pblk,
- const char *page, size_t len)
-{
- size_t c_len;
- int reset_value;
- int buckets = pblk->min_write_pgs - 1;
- int i;
-
- c_len = strcspn(page, "\n");
- if (c_len >= len)
- return -EINVAL;
-
- if (kstrtouint(page, 0, &reset_value))
- return -EINVAL;
-
- if (reset_value != 0)
- return -EINVAL;
-
- for (i = 0; i < buckets; i++)
- atomic64_set(&pblk->pad_dist[i], 0);
-
- pblk->nr_flush_rst = atomic64_read(&pblk->nr_flush);
-
- return len;
-}
-
-static struct attribute sys_write_luns = {
- .name = "write_luns",
- .mode = 0444,
-};
-
-static struct attribute sys_rate_limiter_attr = {
- .name = "rate_limiter",
- .mode = 0444,
-};
-
-static struct attribute sys_gc_state = {
- .name = "gc_state",
- .mode = 0444,
-};
-
-static struct attribute sys_errors_attr = {
- .name = "errors",
- .mode = 0444,
-};
-
-static struct attribute sys_rb_attr = {
- .name = "write_buffer",
- .mode = 0444,
-};
-
-static struct attribute sys_stats_ppaf_attr = {
- .name = "ppa_format",
- .mode = 0444,
-};
-
-static struct attribute sys_lines_attr = {
- .name = "lines",
- .mode = 0444,
-};
-
-static struct attribute sys_lines_info_attr = {
- .name = "lines_info",
- .mode = 0444,
-};
-
-static struct attribute sys_gc_force = {
- .name = "gc_force",
- .mode = 0200,
-};
-
-static struct attribute sys_max_sec_per_write = {
- .name = "max_sec_per_write",
- .mode = 0644,
-};
-
-static struct attribute sys_write_amp_mileage = {
- .name = "write_amp_mileage",
- .mode = 0444,
-};
-
-static struct attribute sys_write_amp_trip = {
- .name = "write_amp_trip",
- .mode = 0644,
-};
-
-static struct attribute sys_padding_dist = {
- .name = "padding_dist",
- .mode = 0644,
-};
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
-static struct attribute sys_stats_debug_attr = {
- .name = "stats",
- .mode = 0444,
-};
-#endif
-
-static struct attribute *pblk_attrs[] = {
- &sys_write_luns,
- &sys_rate_limiter_attr,
- &sys_errors_attr,
- &sys_gc_state,
- &sys_gc_force,
- &sys_max_sec_per_write,
- &sys_rb_attr,
- &sys_stats_ppaf_attr,
- &sys_lines_attr,
- &sys_lines_info_attr,
- &sys_write_amp_mileage,
- &sys_write_amp_trip,
- &sys_padding_dist,
-#ifdef CONFIG_NVM_PBLK_DEBUG
- &sys_stats_debug_attr,
-#endif
- NULL,
-};
-
-static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct pblk *pblk = container_of(kobj, struct pblk, kobj);
-
- if (strcmp(attr->name, "rate_limiter") == 0)
- return pblk_sysfs_rate_limiter(pblk, buf);
- else if (strcmp(attr->name, "write_luns") == 0)
- return pblk_sysfs_luns_show(pblk, buf);
- else if (strcmp(attr->name, "gc_state") == 0)
- return pblk_sysfs_gc_state_show(pblk, buf);
- else if (strcmp(attr->name, "errors") == 0)
- return pblk_sysfs_stats(pblk, buf);
- else if (strcmp(attr->name, "write_buffer") == 0)
- return pblk_sysfs_write_buffer(pblk, buf);
- else if (strcmp(attr->name, "ppa_format") == 0)
- return pblk_sysfs_ppaf(pblk, buf);
- else if (strcmp(attr->name, "lines") == 0)
- return pblk_sysfs_lines(pblk, buf);
- else if (strcmp(attr->name, "lines_info") == 0)
- return pblk_sysfs_lines_info(pblk, buf);
- else if (strcmp(attr->name, "max_sec_per_write") == 0)
- return pblk_sysfs_get_sec_per_write(pblk, buf);
- else if (strcmp(attr->name, "write_amp_mileage") == 0)
- return pblk_sysfs_get_write_amp_mileage(pblk, buf);
- else if (strcmp(attr->name, "write_amp_trip") == 0)
- return pblk_sysfs_get_write_amp_trip(pblk, buf);
- else if (strcmp(attr->name, "padding_dist") == 0)
- return pblk_sysfs_get_padding_dist(pblk, buf);
-#ifdef CONFIG_NVM_PBLK_DEBUG
- else if (strcmp(attr->name, "stats") == 0)
- return pblk_sysfs_stats_debug(pblk, buf);
-#endif
- return 0;
-}
-
-static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t len)
-{
- struct pblk *pblk = container_of(kobj, struct pblk, kobj);
-
- if (strcmp(attr->name, "gc_force") == 0)
- return pblk_sysfs_gc_force(pblk, buf, len);
- else if (strcmp(attr->name, "max_sec_per_write") == 0)
- return pblk_sysfs_set_sec_per_write(pblk, buf, len);
- else if (strcmp(attr->name, "write_amp_trip") == 0)
- return pblk_sysfs_set_write_amp_trip(pblk, buf, len);
- else if (strcmp(attr->name, "padding_dist") == 0)
- return pblk_sysfs_set_padding_dist(pblk, buf, len);
- return 0;
-}
-
-static const struct sysfs_ops pblk_sysfs_ops = {
- .show = pblk_sysfs_show,
- .store = pblk_sysfs_store,
-};
-
-static struct kobj_type pblk_ktype = {
- .sysfs_ops = &pblk_sysfs_ops,
- .default_attrs = pblk_attrs,
-};
-
-int pblk_sysfs_init(struct gendisk *tdisk)
-{
- struct pblk *pblk = tdisk->private_data;
- struct device *parent_dev = disk_to_dev(pblk->disk);
- int ret;
-
- ret = kobject_init_and_add(&pblk->kobj, &pblk_ktype,
- kobject_get(&parent_dev->kobj),
- "%s", "pblk");
- if (ret) {
- pblk_err(pblk, "could not register\n");
- return ret;
- }
-
- kobject_uevent(&pblk->kobj, KOBJ_ADD);
- return 0;
-}
-
-void pblk_sysfs_exit(struct gendisk *tdisk)
-{
- struct pblk *pblk = tdisk->private_data;
-
- kobject_uevent(&pblk->kobj, KOBJ_REMOVE);
- kobject_del(&pblk->kobj);
- kobject_put(&pblk->kobj);
-}
diff --git a/drivers/lightnvm/pblk-trace.h b/drivers/lightnvm/pblk-trace.h
deleted file mode 100644
index 47b67c6bff7a..000000000000
--- a/drivers/lightnvm/pblk-trace.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM pblk
-
-#if !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_PBLK_H
-
-#include <linux/tracepoint.h>
-
-struct ppa_addr;
-
-#define show_chunk_flags(state) __print_flags(state, "", \
- { NVM_CHK_ST_FREE, "FREE", }, \
- { NVM_CHK_ST_CLOSED, "CLOSED", }, \
- { NVM_CHK_ST_OPEN, "OPEN", }, \
- { NVM_CHK_ST_OFFLINE, "OFFLINE", })
-
-#define show_line_state(state) __print_symbolic(state, \
- { PBLK_LINESTATE_NEW, "NEW", }, \
- { PBLK_LINESTATE_FREE, "FREE", }, \
- { PBLK_LINESTATE_OPEN, "OPEN", }, \
- { PBLK_LINESTATE_CLOSED, "CLOSED", }, \
- { PBLK_LINESTATE_GC, "GC", }, \
- { PBLK_LINESTATE_BAD, "BAD", }, \
- { PBLK_LINESTATE_CORRUPT, "CORRUPT" })
-
-
-#define show_pblk_state(state) __print_symbolic(state, \
- { PBLK_STATE_RUNNING, "RUNNING", }, \
- { PBLK_STATE_STOPPING, "STOPPING", }, \
- { PBLK_STATE_RECOVERING, "RECOVERING", }, \
- { PBLK_STATE_STOPPED, "STOPPED" })
-
-#define show_chunk_erase_state(state) __print_symbolic(state, \
- { PBLK_CHUNK_RESET_START, "START", }, \
- { PBLK_CHUNK_RESET_DONE, "OK", }, \
- { PBLK_CHUNK_RESET_FAILED, "FAILED" })
-
-
-TRACE_EVENT(pblk_chunk_reset,
-
- TP_PROTO(const char *name, struct ppa_addr *ppa, int state),
-
- TP_ARGS(name, ppa, state),
-
- TP_STRUCT__entry(
- __string(name, name)
- __field(u64, ppa)
- __field(int, state)
- ),
-
- TP_fast_assign(
- __assign_str(name, name);
- __entry->ppa = ppa->ppa;
- __entry->state = state;
- ),
-
- TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name),
- (u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp),
- (u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu),
- (u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk),
- show_chunk_erase_state((int)__entry->state))
-
-);
-
-TRACE_EVENT(pblk_chunk_state,
-
- TP_PROTO(const char *name, struct ppa_addr *ppa, int state),
-
- TP_ARGS(name, ppa, state),
-
- TP_STRUCT__entry(
- __string(name, name)
- __field(u64, ppa)
- __field(int, state)
- ),
-
- TP_fast_assign(
- __assign_str(name, name);
- __entry->ppa = ppa->ppa;
- __entry->state = state;
- ),
-
- TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name),
- (u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp),
- (u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu),
- (u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk),
- show_chunk_flags((int)__entry->state))
-
-);
-
-TRACE_EVENT(pblk_line_state,
-
- TP_PROTO(const char *name, int line, int state),
-
- TP_ARGS(name, line, state),
-
- TP_STRUCT__entry(
- __string(name, name)
- __field(int, line)
- __field(int, state)
- ),
-
- TP_fast_assign(
- __assign_str(name, name);
- __entry->line = line;
- __entry->state = state;
- ),
-
- TP_printk("dev=%s line=%d state=%s", __get_str(name),
- (int)__entry->line,
- show_line_state((int)__entry->state))
-
-);
-
-TRACE_EVENT(pblk_state,
-
- TP_PROTO(const char *name, int state),
-
- TP_ARGS(name, state),
-
- TP_STRUCT__entry(
- __string(name, name)
- __field(int, state)
- ),
-
- TP_fast_assign(
- __assign_str(name, name);
- __entry->state = state;
- ),
-
- TP_printk("dev=%s state=%s", __get_str(name),
- show_pblk_state((int)__entry->state))
-
-);
-
-#endif /* !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) */
-
-/* This part must be outside protection */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH ../../drivers/lightnvm
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE pblk-trace
-#include <trace/define_trace.h>
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
deleted file mode 100644
index b9a2aeba95ab..000000000000
--- a/drivers/lightnvm/pblk-write.c
+++ /dev/null
@@ -1,665 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-write.c - pblk's write path from write buffer to media
- */
-
-#include "pblk.h"
-#include "pblk-trace.h"
-
-static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_c_ctx *c_ctx)
-{
- struct bio *original_bio;
- struct pblk_rb *rwb = &pblk->rwb;
- unsigned long ret;
- int i;
-
- for (i = 0; i < c_ctx->nr_valid; i++) {
- struct pblk_w_ctx *w_ctx;
- int pos = c_ctx->sentry + i;
- int flags;
-
- w_ctx = pblk_rb_w_ctx(rwb, pos);
- flags = READ_ONCE(w_ctx->flags);
-
- if (flags & PBLK_FLUSH_ENTRY) {
- flags &= ~PBLK_FLUSH_ENTRY;
- /* Release flags on context. Protect from writes */
- smp_store_release(&w_ctx->flags, flags);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_dec(&rwb->inflight_flush_point);
-#endif
- }
-
- while ((original_bio = bio_list_pop(&w_ctx->bios)))
- bio_endio(original_bio);
- }
-
- if (c_ctx->nr_padded)
- pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid,
- c_ctx->nr_padded);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(rqd->nr_ppas, &pblk->sync_writes);
-#endif
-
- ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
-
- bio_put(rqd->bio);
- pblk_free_rqd(pblk, rqd, PBLK_WRITE);
-
- return ret;
-}
-
-static unsigned long pblk_end_queued_w_bio(struct pblk *pblk,
- struct nvm_rq *rqd,
- struct pblk_c_ctx *c_ctx)
-{
- list_del(&c_ctx->list);
- return pblk_end_w_bio(pblk, rqd, c_ctx);
-}
-
-static void pblk_complete_write(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_c_ctx *c_ctx)
-{
- struct pblk_c_ctx *c, *r;
- unsigned long flags;
- unsigned long pos;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_sub(c_ctx->nr_valid, &pblk->inflight_writes);
-#endif
- pblk_up_rq(pblk, c_ctx->lun_bitmap);
-
- pos = pblk_rb_sync_init(&pblk->rwb, &flags);
- if (pos == c_ctx->sentry) {
- pos = pblk_end_w_bio(pblk, rqd, c_ctx);
-
-retry:
- list_for_each_entry_safe(c, r, &pblk->compl_list, list) {
- rqd = nvm_rq_from_c_ctx(c);
- if (c->sentry == pos) {
- pos = pblk_end_queued_w_bio(pblk, rqd, c);
- goto retry;
- }
- }
- } else {
- WARN_ON(nvm_rq_from_c_ctx(c_ctx) != rqd);
- list_add_tail(&c_ctx->list, &pblk->compl_list);
- }
- pblk_rb_sync_end(&pblk->rwb, &flags);
-}
-
-/* Map remaining sectors in chunk, starting from ppa */
-static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa,
- int rqd_ppas)
-{
- struct pblk_line *line;
- struct ppa_addr map_ppa = *ppa;
- __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
- __le64 *lba_list;
- u64 paddr;
- int done = 0;
- int n = 0;
-
- line = pblk_ppa_to_line(pblk, *ppa);
- lba_list = emeta_to_lbas(pblk, line->emeta->buf);
-
- spin_lock(&line->lock);
-
- while (!done) {
- paddr = pblk_dev_ppa_to_line_addr(pblk, map_ppa);
-
- if (!test_and_set_bit(paddr, line->map_bitmap))
- line->left_msecs--;
-
- if (n < rqd_ppas && lba_list[paddr] != addr_empty)
- line->nr_valid_lbas--;
-
- lba_list[paddr] = addr_empty;
-
- if (!test_and_set_bit(paddr, line->invalid_bitmap))
- le32_add_cpu(line->vsc, -1);
-
- done = nvm_next_ppa_in_chk(pblk->dev, &map_ppa);
-
- n++;
- }
-
- line->w_err_gc->has_write_err = 1;
- spin_unlock(&line->lock);
-}
-
-static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry,
- unsigned int nr_entries)
-{
- struct pblk_rb *rb = &pblk->rwb;
- struct pblk_rb_entry *entry;
- struct pblk_line *line;
- struct pblk_w_ctx *w_ctx;
- struct ppa_addr ppa_l2p;
- int flags;
- unsigned int i;
-
- spin_lock(&pblk->trans_lock);
- for (i = 0; i < nr_entries; i++) {
- entry = &rb->entries[pblk_rb_ptr_wrap(rb, sentry, i)];
- w_ctx = &entry->w_ctx;
-
- /* Check if the lba has been overwritten */
- if (w_ctx->lba != ADDR_EMPTY) {
- ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba);
- if (!pblk_ppa_comp(ppa_l2p, entry->cacheline))
- w_ctx->lba = ADDR_EMPTY;
- }
-
- /* Mark up the entry as submittable again */
- flags = READ_ONCE(w_ctx->flags);
- flags |= PBLK_WRITTEN_DATA;
- /* Release flags on write context. Protect from writes */
- smp_store_release(&w_ctx->flags, flags);
-
- /* Decrease the reference count to the line as we will
- * re-map these entries
- */
- line = pblk_ppa_to_line(pblk, w_ctx->ppa);
- atomic_dec(&line->sec_to_update);
- kref_put(&line->ref, pblk_line_put);
- }
- spin_unlock(&pblk->trans_lock);
-}
-
-static void pblk_queue_resubmit(struct pblk *pblk, struct pblk_c_ctx *c_ctx)
-{
- struct pblk_c_ctx *r_ctx;
-
- r_ctx = kzalloc(sizeof(struct pblk_c_ctx), GFP_KERNEL);
- if (!r_ctx)
- return;
-
- r_ctx->lun_bitmap = NULL;
- r_ctx->sentry = c_ctx->sentry;
- r_ctx->nr_valid = c_ctx->nr_valid;
- r_ctx->nr_padded = c_ctx->nr_padded;
-
- spin_lock(&pblk->resubmit_lock);
- list_add_tail(&r_ctx->list, &pblk->resubmit_list);
- spin_unlock(&pblk->resubmit_lock);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(c_ctx->nr_valid, &pblk->recov_writes);
-#endif
-}
-
-static void pblk_submit_rec(struct work_struct *work)
-{
- struct pblk_rec_ctx *recovery =
- container_of(work, struct pblk_rec_ctx, ws_rec);
- struct pblk *pblk = recovery->pblk;
- struct nvm_rq *rqd = recovery->rqd;
- struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
-
- pblk_log_write_err(pblk, rqd);
-
- pblk_map_remaining(pblk, ppa_list, rqd->nr_ppas);
- pblk_queue_resubmit(pblk, c_ctx);
-
- pblk_up_rq(pblk, c_ctx->lun_bitmap);
- if (c_ctx->nr_padded)
- pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid,
- c_ctx->nr_padded);
- bio_put(rqd->bio);
- pblk_free_rqd(pblk, rqd, PBLK_WRITE);
- mempool_free(recovery, &pblk->rec_pool);
-
- atomic_dec(&pblk->inflight_io);
- pblk_write_kick(pblk);
-}
-
-
-static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct pblk_rec_ctx *recovery;
-
- recovery = mempool_alloc(&pblk->rec_pool, GFP_ATOMIC);
- if (!recovery) {
- pblk_err(pblk, "could not allocate recovery work\n");
- return;
- }
-
- recovery->pblk = pblk;
- recovery->rqd = rqd;
-
- INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
- queue_work(pblk->close_wq, &recovery->ws_rec);
-}
-
-static void pblk_end_io_write(struct nvm_rq *rqd)
-{
- struct pblk *pblk = rqd->private;
- struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
-
- if (rqd->error) {
- pblk_end_w_fail(pblk, rqd);
- return;
- } else {
- if (trace_pblk_chunk_state_enabled())
- pblk_check_chunk_state_update(pblk, rqd);
-#ifdef CONFIG_NVM_PBLK_DEBUG
- WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
-#endif
- }
-
- pblk_complete_write(pblk, rqd, c_ctx);
- atomic_dec(&pblk->inflight_io);
-}
-
-static void pblk_end_io_write_meta(struct nvm_rq *rqd)
-{
- struct pblk *pblk = rqd->private;
- struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
- struct pblk_line *line = m_ctx->private;
- struct pblk_emeta *emeta = line->emeta;
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
- int sync;
-
- pblk_up_chunk(pblk, ppa_list[0]);
-
- if (rqd->error) {
- pblk_log_write_err(pblk, rqd);
- pblk_err(pblk, "metadata I/O failed. Line %d\n", line->id);
- line->w_err_gc->has_write_err = 1;
- } else {
- if (trace_pblk_chunk_state_enabled())
- pblk_check_chunk_state_update(pblk, rqd);
- }
-
- sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
- if (sync == emeta->nr_entries)
- pblk_gen_run_ws(pblk, line, NULL, pblk_line_close_ws,
- GFP_ATOMIC, pblk->close_wq);
-
- pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
-
- atomic_dec(&pblk->inflight_io);
-}
-
-static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
- unsigned int nr_secs, nvm_end_io_fn(*end_io))
-{
- /* Setup write request */
- rqd->opcode = NVM_OP_PWRITE;
- rqd->nr_ppas = nr_secs;
- rqd->is_seq = 1;
- rqd->private = pblk;
- rqd->end_io = end_io;
-
- return pblk_alloc_rqd_meta(pblk, rqd);
-}
-
-static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
- struct ppa_addr *erase_ppa)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *e_line = pblk_line_get_erase(pblk);
- struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
- unsigned int valid = c_ctx->nr_valid;
- unsigned int padded = c_ctx->nr_padded;
- unsigned int nr_secs = valid + padded;
- unsigned long *lun_bitmap;
- int ret;
-
- lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
- if (!lun_bitmap)
- return -ENOMEM;
- c_ctx->lun_bitmap = lun_bitmap;
-
- ret = pblk_alloc_w_rq(pblk, rqd, nr_secs, pblk_end_io_write);
- if (ret) {
- kfree(lun_bitmap);
- return ret;
- }
-
- if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
- ret = pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
- valid, 0);
- else
- ret = pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
- valid, erase_ppa);
-
- return ret;
-}
-
-static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
- unsigned int secs_to_flush)
-{
- int secs_to_sync;
-
- secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush, true);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- if ((!secs_to_sync && secs_to_flush)
- || (secs_to_sync < 0)
- || (secs_to_sync > secs_avail && !secs_to_flush)) {
- pblk_err(pblk, "bad sector calculation (a:%d,s:%d,f:%d)\n",
- secs_avail, secs_to_sync, secs_to_flush);
- }
-#endif
-
- return secs_to_sync;
-}
-
-int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_emeta *emeta = meta_line->emeta;
- struct ppa_addr *ppa_list;
- struct pblk_g_ctx *m_ctx;
- struct nvm_rq *rqd;
- void *data;
- u64 paddr;
- int rq_ppas = pblk->min_write_pgs;
- int id = meta_line->id;
- int rq_len;
- int i, j;
- int ret;
-
- rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
-
- m_ctx = nvm_rq_to_pdu(rqd);
- m_ctx->private = meta_line;
-
- rq_len = rq_ppas * geo->csecs;
- data = ((void *)emeta->buf) + emeta->mem;
-
- ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta);
- if (ret)
- goto fail_free_rqd;
-
- ppa_list = nvm_rq_to_ppa_list(rqd);
- for (i = 0; i < rqd->nr_ppas; ) {
- spin_lock(&meta_line->lock);
- paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas);
- spin_unlock(&meta_line->lock);
- for (j = 0; j < rq_ppas; j++, i++, paddr++)
- ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
- }
-
- spin_lock(&l_mg->close_lock);
- emeta->mem += rq_len;
- if (emeta->mem >= lm->emeta_len[0])
- list_del(&meta_line->list);
- spin_unlock(&l_mg->close_lock);
-
- pblk_down_chunk(pblk, ppa_list[0]);
-
- ret = pblk_submit_io(pblk, rqd, data);
- if (ret) {
- pblk_err(pblk, "emeta I/O submission failed: %d\n", ret);
- goto fail_rollback;
- }
-
- return NVM_IO_OK;
-
-fail_rollback:
- pblk_up_chunk(pblk, ppa_list[0]);
- spin_lock(&l_mg->close_lock);
- pblk_dealloc_page(pblk, meta_line, rq_ppas);
- list_add(&meta_line->list, &meta_line->list);
- spin_unlock(&l_mg->close_lock);
-fail_free_rqd:
- pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
- return ret;
-}
-
-static inline bool pblk_valid_meta_ppa(struct pblk *pblk,
- struct pblk_line *meta_line,
- struct nvm_rq *data_rqd)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_c_ctx *data_c_ctx = nvm_rq_to_pdu(data_rqd);
- struct pblk_line *data_line = pblk_line_get_data(pblk);
- struct ppa_addr ppa, ppa_opt;
- u64 paddr;
- int pos_opt;
-
- /* Schedule a metadata I/O that is half the distance from the data I/O
- * with regards to the number of LUNs forming the pblk instance. This
- * balances LUN conflicts across every I/O.
- *
- * When the LUN configuration changes (e.g., due to GC), this distance
- * can align, which would result on metadata and data I/Os colliding. In
- * this case, modify the distance to not be optimal, but move the
- * optimal in the right direction.
- */
- paddr = pblk_lookup_page(pblk, meta_line);
- ppa = addr_to_gen_ppa(pblk, paddr, 0);
- ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
- pos_opt = pblk_ppa_to_pos(geo, ppa_opt);
-
- if (test_bit(pos_opt, data_c_ctx->lun_bitmap) ||
- test_bit(pos_opt, data_line->blk_bitmap))
- return true;
-
- if (unlikely(pblk_ppa_comp(ppa_opt, ppa)))
- data_line->meta_distance--;
-
- return false;
-}
-
-static struct pblk_line *pblk_should_submit_meta_io(struct pblk *pblk,
- struct nvm_rq *data_rqd)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *meta_line;
-
- spin_lock(&l_mg->close_lock);
- if (list_empty(&l_mg->emeta_list)) {
- spin_unlock(&l_mg->close_lock);
- return NULL;
- }
- meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
- if (meta_line->emeta->mem >= lm->emeta_len[0]) {
- spin_unlock(&l_mg->close_lock);
- return NULL;
- }
- spin_unlock(&l_mg->close_lock);
-
- if (!pblk_valid_meta_ppa(pblk, meta_line, data_rqd))
- return NULL;
-
- return meta_line;
-}
-
-static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct ppa_addr erase_ppa;
- struct pblk_line *meta_line;
- int err;
-
- pblk_ppa_set_empty(&erase_ppa);
-
- /* Assign lbas to ppas and populate request structure */
- err = pblk_setup_w_rq(pblk, rqd, &erase_ppa);
- if (err) {
- pblk_err(pblk, "could not setup write request: %d\n", err);
- return NVM_IO_ERR;
- }
-
- meta_line = pblk_should_submit_meta_io(pblk, rqd);
-
- /* Submit data write for current data line */
- err = pblk_submit_io(pblk, rqd, NULL);
- if (err) {
- pblk_err(pblk, "data I/O submission failed: %d\n", err);
- return NVM_IO_ERR;
- }
-
- if (!pblk_ppa_empty(erase_ppa)) {
- /* Submit erase for next data line */
- if (pblk_blk_erase_async(pblk, erase_ppa)) {
- struct pblk_line *e_line = pblk_line_get_erase(pblk);
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int bit;
-
- atomic_inc(&e_line->left_eblks);
- bit = pblk_ppa_to_pos(geo, erase_ppa);
- WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
- }
- }
-
- if (meta_line) {
- /* Submit metadata write for previous data line */
- err = pblk_submit_meta_io(pblk, meta_line);
- if (err) {
- pblk_err(pblk, "metadata I/O submission failed: %d",
- err);
- return NVM_IO_ERR;
- }
- }
-
- return NVM_IO_OK;
-}
-
-static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
- struct bio *bio = rqd->bio;
-
- if (c_ctx->nr_padded)
- pblk_bio_free_pages(pblk, bio, c_ctx->nr_valid,
- c_ctx->nr_padded);
-}
-
-static int pblk_submit_write(struct pblk *pblk, int *secs_left)
-{
- struct bio *bio;
- struct nvm_rq *rqd;
- unsigned int secs_avail, secs_to_sync, secs_to_com;
- unsigned int secs_to_flush, packed_meta_pgs;
- unsigned long pos;
- unsigned int resubmit;
-
- *secs_left = 0;
-
- spin_lock(&pblk->resubmit_lock);
- resubmit = !list_empty(&pblk->resubmit_list);
- spin_unlock(&pblk->resubmit_lock);
-
- /* Resubmit failed writes first */
- if (resubmit) {
- struct pblk_c_ctx *r_ctx;
-
- spin_lock(&pblk->resubmit_lock);
- r_ctx = list_first_entry(&pblk->resubmit_list,
- struct pblk_c_ctx, list);
- list_del(&r_ctx->list);
- spin_unlock(&pblk->resubmit_lock);
-
- secs_avail = r_ctx->nr_valid;
- pos = r_ctx->sentry;
-
- pblk_prepare_resubmit(pblk, pos, secs_avail);
- secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
- secs_avail);
-
- kfree(r_ctx);
- } else {
- /* If there are no sectors in the cache,
- * flushes (bios without data) will be cleared on
- * the cache threads
- */
- secs_avail = pblk_rb_read_count(&pblk->rwb);
- if (!secs_avail)
- return 0;
-
- secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb);
- if (!secs_to_flush && secs_avail < pblk->min_write_pgs_data)
- return 0;
-
- secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
- secs_to_flush);
- if (secs_to_sync > pblk->max_write_pgs) {
- pblk_err(pblk, "bad buffer sync calculation\n");
- return 0;
- }
-
- secs_to_com = (secs_to_sync > secs_avail) ?
- secs_avail : secs_to_sync;
- pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
- }
-
- packed_meta_pgs = (pblk->min_write_pgs - pblk->min_write_pgs_data);
- bio = bio_alloc(GFP_KERNEL, secs_to_sync + packed_meta_pgs);
-
- bio->bi_iter.bi_sector = 0; /* internal bio */
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-
- rqd = pblk_alloc_rqd(pblk, PBLK_WRITE);
- rqd->bio = bio;
-
- if (pblk_rb_read_to_bio(&pblk->rwb, rqd, pos, secs_to_sync,
- secs_avail)) {
- pblk_err(pblk, "corrupted write bio\n");
- goto fail_put_bio;
- }
-
- if (pblk_submit_io_set(pblk, rqd))
- goto fail_free_bio;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(secs_to_sync, &pblk->sub_writes);
-#endif
-
- *secs_left = 1;
- return 0;
-
-fail_free_bio:
- pblk_free_write_rqd(pblk, rqd);
-fail_put_bio:
- bio_put(bio);
- pblk_free_rqd(pblk, rqd, PBLK_WRITE);
-
- return -EINTR;
-}
-
-int pblk_write_ts(void *data)
-{
- struct pblk *pblk = data;
- int secs_left;
- int write_failure = 0;
-
- while (!kthread_should_stop()) {
- if (!write_failure) {
- write_failure = pblk_submit_write(pblk, &secs_left);
-
- if (secs_left)
- continue;
- }
- set_current_state(TASK_INTERRUPTIBLE);
- io_schedule();
- }
-
- return 0;
-}
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
deleted file mode 100644
index 86ffa875bfe1..000000000000
--- a/drivers/lightnvm/pblk.h
+++ /dev/null
@@ -1,1358 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2015 IT University of Copenhagen (rrpc.h)
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Matias Bjorling <matias@cnexlabs.com>
- * Write buffering: Javier Gonzalez <javier@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Implementation of a Physical Block-device target for Open-channel SSDs.
- *
- */
-
-#ifndef PBLK_H_
-#define PBLK_H_
-
-#include <linux/blkdev.h>
-#include <linux/blk-mq.h>
-#include <linux/bio.h>
-#include <linux/module.h>
-#include <linux/kthread.h>
-#include <linux/vmalloc.h>
-#include <linux/crc32.h>
-#include <linux/uuid.h>
-
-#include <linux/lightnvm.h>
-
-/* Run only GC if less than 1/X blocks are free */
-#define GC_LIMIT_INVERSE 5
-#define GC_TIME_MSECS 1000
-
-#define PBLK_SECTOR (512)
-#define PBLK_EXPOSED_PAGE_SIZE (4096)
-
-#define PBLK_NR_CLOSE_JOBS (4)
-
-#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
-
-/* Max 512 LUNs per device */
-#define PBLK_MAX_LUNS_BITMAP (4)
-
-#define NR_PHY_IN_LOG (PBLK_EXPOSED_PAGE_SIZE / PBLK_SECTOR)
-
-/* Static pool sizes */
-#define PBLK_GEN_WS_POOL_SIZE (2)
-
-#define PBLK_DEFAULT_OP (11)
-
-enum {
- PBLK_READ = READ,
- PBLK_WRITE = WRITE,/* Write from write buffer */
- PBLK_WRITE_INT, /* Internal write - no write buffer */
- PBLK_READ_RECOV, /* Recovery read - errors allowed */
- PBLK_ERASE,
-};
-
-enum {
- /* IO Types */
- PBLK_IOTYPE_USER = 1 << 0,
- PBLK_IOTYPE_GC = 1 << 1,
-
- /* Write buffer flags */
- PBLK_FLUSH_ENTRY = 1 << 2,
- PBLK_WRITTEN_DATA = 1 << 3,
- PBLK_SUBMITTED_ENTRY = 1 << 4,
- PBLK_WRITABLE_ENTRY = 1 << 5,
-};
-
-enum {
- PBLK_BLK_ST_OPEN = 0x1,
- PBLK_BLK_ST_CLOSED = 0x2,
-};
-
-enum {
- PBLK_CHUNK_RESET_START,
- PBLK_CHUNK_RESET_DONE,
- PBLK_CHUNK_RESET_FAILED,
-};
-
-struct pblk_sec_meta {
- u64 reserved;
- __le64 lba;
-};
-
-/* The number of GC lists and the rate-limiter states go together. This way the
- * rate-limiter can dictate how much GC is needed based on resource utilization.
- */
-#define PBLK_GC_NR_LISTS 4
-
-enum {
- PBLK_RL_OFF = 0,
- PBLK_RL_WERR = 1,
- PBLK_RL_HIGH = 2,
- PBLK_RL_MID = 3,
- PBLK_RL_LOW = 4
-};
-
-#define pblk_dma_ppa_size (sizeof(u64) * NVM_MAX_VLBA)
-
-/* write buffer completion context */
-struct pblk_c_ctx {
- struct list_head list; /* Head for out-of-order completion */
-
- unsigned long *lun_bitmap; /* Luns used on current request */
- unsigned int sentry;
- unsigned int nr_valid;
- unsigned int nr_padded;
-};
-
-/* read context */
-struct pblk_g_ctx {
- void *private;
- unsigned long start_time;
- u64 lba;
-};
-
-/* Pad context */
-struct pblk_pad_rq {
- struct pblk *pblk;
- struct completion wait;
- struct kref ref;
-};
-
-/* Recovery context */
-struct pblk_rec_ctx {
- struct pblk *pblk;
- struct nvm_rq *rqd;
- struct work_struct ws_rec;
-};
-
-/* Write context */
-struct pblk_w_ctx {
- struct bio_list bios; /* Original bios - used for completion
- * in REQ_FUA, REQ_FLUSH case
- */
- u64 lba; /* Logic addr. associated with entry */
- struct ppa_addr ppa; /* Physic addr. associated with entry */
- int flags; /* Write context flags */
-};
-
-struct pblk_rb_entry {
- struct ppa_addr cacheline; /* Cacheline for this entry */
- void *data; /* Pointer to data on this entry */
- struct pblk_w_ctx w_ctx; /* Context for this entry */
- struct list_head index; /* List head to enable indexes */
-};
-
-#define EMPTY_ENTRY (~0U)
-
-struct pblk_rb_pages {
- struct page *pages;
- int order;
- struct list_head list;
-};
-
-struct pblk_rb {
- struct pblk_rb_entry *entries; /* Ring buffer entries */
- unsigned int mem; /* Write offset - points to next
- * writable entry in memory
- */
- unsigned int subm; /* Read offset - points to last entry
- * that has been submitted to the media
- * to be persisted
- */
- unsigned int sync; /* Synced - backpointer that signals
- * the last submitted entry that has
- * been successfully persisted to media
- */
- unsigned int flush_point; /* Sync point - last entry that must be
- * flushed to the media. Used with
- * REQ_FLUSH and REQ_FUA
- */
- unsigned int l2p_update; /* l2p update point - next entry for
- * which l2p mapping will be updated to
- * contain a device ppa address (instead
- * of a cacheline
- */
- unsigned int nr_entries; /* Number of entries in write buffer -
- * must be a power of two
- */
- unsigned int seg_size; /* Size of the data segments being
- * stored on each entry. Typically this
- * will be 4KB
- */
-
- unsigned int back_thres; /* Threshold that shall be maintained by
- * the backpointer in order to respect
- * geo->mw_cunits on a per chunk basis
- */
-
- struct list_head pages; /* List of data pages */
-
- spinlock_t w_lock; /* Write lock */
- spinlock_t s_lock; /* Sync lock */
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_t inflight_flush_point; /* Not served REQ_FLUSH | REQ_FUA */
-#endif
-};
-
-#define PBLK_RECOVERY_SECTORS 16
-
-struct pblk_lun {
- struct ppa_addr bppa;
- struct semaphore wr_sem;
-};
-
-struct pblk_gc_rq {
- struct pblk_line *line;
- void *data;
- u64 paddr_list[NVM_MAX_VLBA];
- u64 lba_list[NVM_MAX_VLBA];
- int nr_secs;
- int secs_to_gc;
- struct list_head list;
-};
-
-struct pblk_gc {
- /* These states are not protected by a lock since (i) they are in the
- * fast path, and (ii) they are not critical.
- */
- int gc_active;
- int gc_enabled;
- int gc_forced;
-
- struct task_struct *gc_ts;
- struct task_struct *gc_writer_ts;
- struct task_struct *gc_reader_ts;
-
- struct workqueue_struct *gc_line_reader_wq;
- struct workqueue_struct *gc_reader_wq;
-
- struct timer_list gc_timer;
-
- struct semaphore gc_sem;
- atomic_t read_inflight_gc; /* Number of lines with inflight GC reads */
- atomic_t pipeline_gc; /* Number of lines in the GC pipeline -
- * started reads to finished writes
- */
- int w_entries;
-
- struct list_head w_list;
- struct list_head r_list;
-
- spinlock_t lock;
- spinlock_t w_lock;
- spinlock_t r_lock;
-};
-
-struct pblk_rl {
- unsigned int high; /* Upper threshold for rate limiter (free run -
- * user I/O rate limiter
- */
- unsigned int high_pw; /* High rounded up as a power of 2 */
-
-#define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */
-#define PBLK_USER_LOW_THRS 10 /* Aggressive GC at 10% available blocks */
-
- int rb_windows_pw; /* Number of rate windows in the write buffer
- * given as a power-of-2. This guarantees that
- * when user I/O is being rate limited, there
- * will be reserved enough space for the GC to
- * place its payload. A window is of
- * pblk->max_write_pgs size, which in NVMe is
- * 64, i.e., 256kb.
- */
- int rb_budget; /* Total number of entries available for I/O */
- int rb_user_max; /* Max buffer entries available for user I/O */
- int rb_gc_max; /* Max buffer entries available for GC I/O */
- int rb_gc_rsv; /* Reserved buffer entries for GC I/O */
- int rb_state; /* Rate-limiter current state */
- int rb_max_io; /* Maximum size for an I/O giving the config */
-
- atomic_t rb_user_cnt; /* User I/O buffer counter */
- atomic_t rb_gc_cnt; /* GC I/O buffer counter */
- atomic_t rb_space; /* Space limit in case of reaching capacity */
-
- int rsv_blocks; /* Reserved blocks for GC */
-
- int rb_user_active;
- int rb_gc_active;
-
- atomic_t werr_lines; /* Number of write error lines that needs gc */
-
- struct timer_list u_timer;
-
- unsigned long total_blocks;
-
- atomic_t free_blocks; /* Total number of free blocks (+ OP) */
- atomic_t free_user_blocks; /* Number of user free blocks (no OP) */
-};
-
-#define PBLK_LINE_EMPTY (~0U)
-
-enum {
- /* Line Types */
- PBLK_LINETYPE_FREE = 0,
- PBLK_LINETYPE_LOG = 1,
- PBLK_LINETYPE_DATA = 2,
-
- /* Line state */
- PBLK_LINESTATE_NEW = 9,
- PBLK_LINESTATE_FREE = 10,
- PBLK_LINESTATE_OPEN = 11,
- PBLK_LINESTATE_CLOSED = 12,
- PBLK_LINESTATE_GC = 13,
- PBLK_LINESTATE_BAD = 14,
- PBLK_LINESTATE_CORRUPT = 15,
-
- /* GC group */
- PBLK_LINEGC_NONE = 20,
- PBLK_LINEGC_EMPTY = 21,
- PBLK_LINEGC_LOW = 22,
- PBLK_LINEGC_MID = 23,
- PBLK_LINEGC_HIGH = 24,
- PBLK_LINEGC_FULL = 25,
- PBLK_LINEGC_WERR = 26
-};
-
-#define PBLK_MAGIC 0x70626c6b /*pblk*/
-
-/* emeta/smeta persistent storage format versions:
- * Changes in major version requires offline migration.
- * Changes in minor version are handled automatically during
- * recovery.
- */
-
-#define SMETA_VERSION_MAJOR (0)
-#define SMETA_VERSION_MINOR (1)
-
-#define EMETA_VERSION_MAJOR (0)
-#define EMETA_VERSION_MINOR (2)
-
-struct line_header {
- __le32 crc;
- __le32 identifier; /* pblk identifier */
- __u8 uuid[16]; /* instance uuid */
- __le16 type; /* line type */
- __u8 version_major; /* version major */
- __u8 version_minor; /* version minor */
- __le32 id; /* line id for current line */
-};
-
-struct line_smeta {
- struct line_header header;
-
- __le32 crc; /* Full structure including struct crc */
- /* Previous line metadata */
- __le32 prev_id; /* Line id for previous line */
-
- /* Current line metadata */
- __le64 seq_nr; /* Sequence number for current line */
-
- /* Active writers */
- __le32 window_wr_lun; /* Number of parallel LUNs to write */
-
- __le32 rsvd[2];
-
- __le64 lun_bitmap[];
-};
-
-
-/*
- * Metadata layout in media:
- * First sector:
- * 1. struct line_emeta
- * 2. bad block bitmap (u64 * window_wr_lun)
- * 3. write amplification counters
- * Mid sectors (start at lbas_sector):
- * 3. nr_lbas (u64) forming lba list
- * Last sectors (start at vsc_sector):
- * 4. u32 valid sector count (vsc) for all lines (~0U: free line)
- */
-struct line_emeta {
- struct line_header header;
-
- __le32 crc; /* Full structure including struct crc */
-
- /* Previous line metadata */
- __le32 prev_id; /* Line id for prev line */
-
- /* Current line metadata */
- __le64 seq_nr; /* Sequence number for current line */
-
- /* Active writers */
- __le32 window_wr_lun; /* Number of parallel LUNs to write */
-
- /* Bookkeeping for recovery */
- __le32 next_id; /* Line id for next line */
- __le64 nr_lbas; /* Number of lbas mapped in line */
- __le64 nr_valid_lbas; /* Number of valid lbas mapped in line */
- __le64 bb_bitmap[]; /* Updated bad block bitmap for line */
-};
-
-
-/* Write amplification counters stored on media */
-struct wa_counters {
- __le64 user; /* Number of user written sectors */
- __le64 gc; /* Number of sectors written by GC*/
- __le64 pad; /* Number of padded sectors */
-};
-
-struct pblk_emeta {
- struct line_emeta *buf; /* emeta buffer in media format */
- int mem; /* Write offset - points to next
- * writable entry in memory
- */
- atomic_t sync; /* Synced - backpointer that signals the
- * last entry that has been successfully
- * persisted to media
- */
- unsigned int nr_entries; /* Number of emeta entries */
-};
-
-struct pblk_smeta {
- struct line_smeta *buf; /* smeta buffer in persistent format */
-};
-
-struct pblk_w_err_gc {
- int has_write_err;
- int has_gc_err;
- __le64 *lba_list;
-};
-
-struct pblk_line {
- struct pblk *pblk;
- unsigned int id; /* Line number corresponds to the
- * block line
- */
- unsigned int seq_nr; /* Unique line sequence number */
-
- int state; /* PBLK_LINESTATE_X */
- int type; /* PBLK_LINETYPE_X */
- int gc_group; /* PBLK_LINEGC_X */
- struct list_head list; /* Free, GC lists */
-
- unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */
-
- struct nvm_chk_meta *chks; /* Chunks forming line */
-
- struct pblk_smeta *smeta; /* Start metadata */
- struct pblk_emeta *emeta; /* End medatada */
-
- int meta_line; /* Metadata line id */
- int meta_distance; /* Distance between data and metadata */
-
- u64 emeta_ssec; /* Sector where emeta starts */
-
- unsigned int sec_in_line; /* Number of usable secs in line */
-
- atomic_t blk_in_line; /* Number of good blocks in line */
- unsigned long *blk_bitmap; /* Bitmap for valid/invalid blocks */
- unsigned long *erase_bitmap; /* Bitmap for erased blocks */
-
- unsigned long *map_bitmap; /* Bitmap for mapped sectors in line */
- unsigned long *invalid_bitmap; /* Bitmap for invalid sectors in line */
-
- atomic_t left_eblks; /* Blocks left for erasing */
- atomic_t left_seblks; /* Blocks left for sync erasing */
-
- int left_msecs; /* Sectors left for mapping */
- unsigned int cur_sec; /* Sector map pointer */
- unsigned int nr_valid_lbas; /* Number of valid lbas in line */
-
- __le32 *vsc; /* Valid sector count in line */
-
- struct kref ref; /* Write buffer L2P references */
- atomic_t sec_to_update; /* Outstanding L2P updates to ppa */
-
- struct pblk_w_err_gc *w_err_gc; /* Write error gc recovery metadata */
-
- spinlock_t lock; /* Necessary for invalid_bitmap only */
-};
-
-#define PBLK_DATA_LINES 4
-
-enum {
- PBLK_EMETA_TYPE_HEADER = 1, /* struct line_emeta first sector */
- PBLK_EMETA_TYPE_LLBA = 2, /* lba list - type: __le64 */
- PBLK_EMETA_TYPE_VSC = 3, /* vsc list - type: __le32 */
-};
-
-struct pblk_line_mgmt {
- int nr_lines; /* Total number of full lines */
- int nr_free_lines; /* Number of full lines in free list */
-
- /* Free lists - use free_lock */
- struct list_head free_list; /* Full lines ready to use */
- struct list_head corrupt_list; /* Full lines corrupted */
- struct list_head bad_list; /* Full lines bad */
-
- /* GC lists - use gc_lock */
- struct list_head *gc_lists[PBLK_GC_NR_LISTS];
- struct list_head gc_high_list; /* Full lines ready to GC, high isc */
- struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */
- struct list_head gc_low_list; /* Full lines ready to GC, low isc */
-
- struct list_head gc_werr_list; /* Write err recovery list */
-
- struct list_head gc_full_list; /* Full lines ready to GC, no valid */
- struct list_head gc_empty_list; /* Full lines close, all valid */
-
- struct pblk_line *log_line; /* Current FTL log line */
- struct pblk_line *data_line; /* Current data line */
- struct pblk_line *log_next; /* Next FTL log line */
- struct pblk_line *data_next; /* Next data line */
-
- struct list_head emeta_list; /* Lines queued to schedule emeta */
-
- __le32 *vsc_list; /* Valid sector counts for all lines */
-
- /* Pre-allocated metadata for data lines */
- struct pblk_smeta *sline_meta[PBLK_DATA_LINES];
- struct pblk_emeta *eline_meta[PBLK_DATA_LINES];
- unsigned long meta_bitmap;
-
- /* Cache and mempool for map/invalid bitmaps */
- struct kmem_cache *bitmap_cache;
- mempool_t *bitmap_pool;
-
- /* Helpers for fast bitmap calculations */
- unsigned long *bb_template;
- unsigned long *bb_aux;
-
- unsigned long d_seq_nr; /* Data line unique sequence number */
- unsigned long l_seq_nr; /* Log line unique sequence number */
-
- spinlock_t free_lock;
- spinlock_t close_lock;
- spinlock_t gc_lock;
-};
-
-struct pblk_line_meta {
- unsigned int smeta_len; /* Total length for smeta */
- unsigned int smeta_sec; /* Sectors needed for smeta */
-
- unsigned int emeta_len[4]; /* Lengths for emeta:
- * [0]: Total
- * [1]: struct line_emeta +
- * bb_bitmap + struct wa_counters
- * [2]: L2P portion
- * [3]: vsc
- */
- unsigned int emeta_sec[4]; /* Sectors needed for emeta. Same layout
- * as emeta_len
- */
-
- unsigned int emeta_bb; /* Boundary for bb that affects emeta */
-
- unsigned int vsc_list_len; /* Length for vsc list */
- unsigned int sec_bitmap_len; /* Length for sector bitmap in line */
- unsigned int blk_bitmap_len; /* Length for block bitmap in line */
- unsigned int lun_bitmap_len; /* Length for lun bitmap in line */
-
- unsigned int blk_per_line; /* Number of blocks in a full line */
- unsigned int sec_per_line; /* Number of sectors in a line */
- unsigned int dsec_per_line; /* Number of data sectors in a line */
- unsigned int min_blk_line; /* Min. number of good blocks in line */
-
- unsigned int mid_thrs; /* Threshold for GC mid list */
- unsigned int high_thrs; /* Threshold for GC high list */
-
- unsigned int meta_distance; /* Distance between data and metadata */
-};
-
-enum {
- PBLK_STATE_RUNNING = 0,
- PBLK_STATE_STOPPING = 1,
- PBLK_STATE_RECOVERING = 2,
- PBLK_STATE_STOPPED = 3,
-};
-
-/* Internal format to support not power-of-2 device formats */
-struct pblk_addrf {
- /* gen to dev */
- int sec_stripe;
- int ch_stripe;
- int lun_stripe;
-
- /* dev to gen */
- int sec_lun_stripe;
- int sec_ws_stripe;
-};
-
-struct pblk {
- struct nvm_tgt_dev *dev;
- struct gendisk *disk;
-
- struct kobject kobj;
-
- struct pblk_lun *luns;
-
- struct pblk_line *lines; /* Line array */
- struct pblk_line_mgmt l_mg; /* Line management */
- struct pblk_line_meta lm; /* Line metadata */
-
- struct nvm_addrf addrf; /* Aligned address format */
- struct pblk_addrf uaddrf; /* Unaligned address format */
- int addrf_len;
-
- struct pblk_rb rwb;
-
- int state; /* pblk line state */
-
- int min_write_pgs; /* Minimum amount of pages required by controller */
- int min_write_pgs_data; /* Minimum amount of payload pages */
- int max_write_pgs; /* Maximum amount of pages supported by controller */
- int oob_meta_size; /* Size of OOB sector metadata */
-
- sector_t capacity; /* Device capacity when bad blocks are subtracted */
-
- int op; /* Percentage of device used for over-provisioning */
- int op_blks; /* Number of blocks used for over-provisioning */
-
- /* pblk provisioning values. Used by rate limiter */
- struct pblk_rl rl;
-
- int sec_per_write;
-
- guid_t instance_uuid;
-
- /* Persistent write amplification counters, 4kb sector I/Os */
- atomic64_t user_wa; /* Sectors written by user */
- atomic64_t gc_wa; /* Sectors written by GC */
- atomic64_t pad_wa; /* Padded sectors written */
-
- /* Reset values for delta write amplification measurements */
- u64 user_rst_wa;
- u64 gc_rst_wa;
- u64 pad_rst_wa;
-
- /* Counters used for calculating padding distribution */
- atomic64_t *pad_dist; /* Padding distribution buckets */
- u64 nr_flush_rst; /* Flushes reset value for pad dist.*/
- atomic64_t nr_flush; /* Number of flush/fua I/O */
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Non-persistent debug counters, 4kb sector I/Os */
- atomic_long_t inflight_writes; /* Inflight writes (user and gc) */
- atomic_long_t padded_writes; /* Sectors padded due to flush/fua */
- atomic_long_t padded_wb; /* Sectors padded in write buffer */
- atomic_long_t req_writes; /* Sectors stored on write buffer */
- atomic_long_t sub_writes; /* Sectors submitted from buffer */
- atomic_long_t sync_writes; /* Sectors synced to media */
- atomic_long_t inflight_reads; /* Inflight sector read requests */
- atomic_long_t cache_reads; /* Read requests that hit the cache */
- atomic_long_t sync_reads; /* Completed sector read requests */
- atomic_long_t recov_writes; /* Sectors submitted from recovery */
- atomic_long_t recov_gc_writes; /* Sectors submitted from write GC */
- atomic_long_t recov_gc_reads; /* Sectors submitted from read GC */
-#endif
-
- spinlock_t lock;
-
- atomic_long_t read_failed;
- atomic_long_t read_empty;
- atomic_long_t read_high_ecc;
- atomic_long_t read_failed_gc;
- atomic_long_t write_failed;
- atomic_long_t erase_failed;
-
- atomic_t inflight_io; /* General inflight I/O counter */
-
- struct task_struct *writer_ts;
-
- /* Simple translation map of logical addresses to physical addresses.
- * The logical addresses is known by the host system, while the physical
- * addresses are used when writing to the disk block device.
- */
- unsigned char *trans_map;
- spinlock_t trans_lock;
-
- struct list_head compl_list;
-
- spinlock_t resubmit_lock; /* Resubmit list lock */
- struct list_head resubmit_list; /* Resubmit list for failed writes*/
-
- mempool_t page_bio_pool;
- mempool_t gen_ws_pool;
- mempool_t rec_pool;
- mempool_t r_rq_pool;
- mempool_t w_rq_pool;
- mempool_t e_rq_pool;
-
- struct workqueue_struct *close_wq;
- struct workqueue_struct *bb_wq;
- struct workqueue_struct *r_end_wq;
-
- struct timer_list wtimer;
-
- struct pblk_gc gc;
-};
-
-struct pblk_line_ws {
- struct pblk *pblk;
- struct pblk_line *line;
- void *priv;
- struct work_struct ws;
-};
-
-#define pblk_g_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_g_ctx))
-#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx))
-
-#define pblk_err(pblk, fmt, ...) \
- pr_err("pblk %s: " fmt, pblk->disk->disk_name, ##__VA_ARGS__)
-#define pblk_info(pblk, fmt, ...) \
- pr_info("pblk %s: " fmt, pblk->disk->disk_name, ##__VA_ARGS__)
-#define pblk_warn(pblk, fmt, ...) \
- pr_warn("pblk %s: " fmt, pblk->disk->disk_name, ##__VA_ARGS__)
-#define pblk_debug(pblk, fmt, ...) \
- pr_debug("pblk %s: " fmt, pblk->disk->disk_name, ##__VA_ARGS__)
-
-/*
- * pblk ring buffer operations
- */
-int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold,
- unsigned int seg_sz);
-int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
- unsigned int nr_entries, unsigned int *pos);
-int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
- unsigned int *pos);
-void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
- struct pblk_w_ctx w_ctx, unsigned int pos);
-void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
- struct pblk_w_ctx w_ctx, struct pblk_line *line,
- u64 paddr, unsigned int pos);
-struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos);
-void pblk_rb_flush(struct pblk_rb *rb);
-
-void pblk_rb_sync_l2p(struct pblk_rb *rb);
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
- unsigned int pos, unsigned int nr_entries,
- unsigned int count);
-int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
- struct ppa_addr ppa);
-unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
-
-unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
-unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries);
-unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p,
- unsigned int nr_entries);
-void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
-unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb);
-
-unsigned int pblk_rb_read_count(struct pblk_rb *rb);
-unsigned int pblk_rb_sync_count(struct pblk_rb *rb);
-unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);
-
-int pblk_rb_tear_down_check(struct pblk_rb *rb);
-int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos);
-void pblk_rb_free(struct pblk_rb *rb);
-ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf);
-
-/*
- * pblk core
- */
-struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type);
-void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type);
-int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd);
-void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd);
-void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write);
-int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_c_ctx *c_ctx);
-void pblk_discard(struct pblk *pblk, struct bio *bio);
-struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk);
-struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk,
- struct nvm_chk_meta *lp,
- struct ppa_addr ppa);
-void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
-void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
-int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd, void *buf);
-int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd, void *buf);
-int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
-void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd);
-struct pblk_line *pblk_line_get(struct pblk *pblk);
-struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
-void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa);
-void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd);
-int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
-void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
-struct pblk_line *pblk_line_get_data(struct pblk *pblk);
-struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
-int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
-int pblk_line_is_full(struct pblk_line *line);
-void pblk_line_free(struct pblk_line *line);
-void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line);
-void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
-void pblk_line_close_ws(struct work_struct *work);
-void pblk_pipeline_stop(struct pblk *pblk);
-void __pblk_pipeline_stop(struct pblk *pblk);
-void __pblk_pipeline_flush(struct pblk *pblk);
-void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
- void (*work)(struct work_struct *), gfp_t gfp_mask,
- struct workqueue_struct *wq);
-u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line);
-int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line);
-int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
- void *emeta_buf);
-int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
-void pblk_line_put(struct kref *ref);
-void pblk_line_put_wq(struct kref *ref);
-struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
-u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line);
-void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
-u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
-u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
-int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
- unsigned long secs_to_flush, bool skip_meta);
-void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa,
- unsigned long *lun_bitmap);
-void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa);
-void pblk_up_chunk(struct pblk *pblk, struct ppa_addr ppa);
-void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap);
-int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
- int nr_pages);
-void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
- int nr_pages);
-void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa);
-void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr);
-void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa);
-void pblk_update_map_cache(struct pblk *pblk, sector_t lba,
- struct ppa_addr ppa);
-void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
- struct ppa_addr ppa, struct ppa_addr entry_line);
-int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
- struct pblk_line *gc_line, u64 paddr);
-void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
- u64 *lba_list, int nr_secs);
-int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
- sector_t blba, int nr_secs, bool *from_cache);
-void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd);
-void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd);
-
-/*
- * pblk user I/O write path
- */
-void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
- unsigned long flags);
-int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq);
-
-/*
- * pblk map
- */
-int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
- unsigned int sentry, unsigned long *lun_bitmap,
- unsigned int valid_secs, struct ppa_addr *erase_ppa);
-int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
- unsigned long *lun_bitmap, unsigned int valid_secs,
- unsigned int off);
-
-/*
- * pblk write thread
- */
-int pblk_write_ts(void *data);
-void pblk_write_timer_fn(struct timer_list *t);
-void pblk_write_should_kick(struct pblk *pblk);
-void pblk_write_kick(struct pblk *pblk);
-
-/*
- * pblk read path
- */
-extern struct bio_set pblk_bio_set;
-void pblk_submit_read(struct pblk *pblk, struct bio *bio);
-int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq);
-/*
- * pblk recovery
- */
-struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
-int pblk_recov_pad(struct pblk *pblk);
-int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta);
-
-/*
- * pblk gc
- */
-#define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */
-#define PBLK_GC_RQ_QD 128 /* Queue depth for inflight GC requests */
-#define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */
-
-int pblk_gc_init(struct pblk *pblk);
-void pblk_gc_exit(struct pblk *pblk, bool graceful);
-void pblk_gc_should_start(struct pblk *pblk);
-void pblk_gc_should_stop(struct pblk *pblk);
-void pblk_gc_should_kick(struct pblk *pblk);
-void pblk_gc_free_full_lines(struct pblk *pblk);
-void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
- int *gc_active);
-int pblk_gc_sysfs_force(struct pblk *pblk, int force);
-void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line);
-
-/*
- * pblk rate limiter
- */
-void pblk_rl_init(struct pblk_rl *rl, int budget, int threshold);
-void pblk_rl_free(struct pblk_rl *rl);
-void pblk_rl_update_rates(struct pblk_rl *rl);
-int pblk_rl_high_thrs(struct pblk_rl *rl);
-unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
-unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl);
-int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
-void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries);
-void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
-int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries);
-void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
-void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
-int pblk_rl_max_io(struct pblk_rl *rl);
-void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
-void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line,
- bool used);
-int pblk_rl_is_limit(struct pblk_rl *rl);
-
-void pblk_rl_werr_line_in(struct pblk_rl *rl);
-void pblk_rl_werr_line_out(struct pblk_rl *rl);
-
-/*
- * pblk sysfs
- */
-int pblk_sysfs_init(struct gendisk *tdisk);
-void pblk_sysfs_exit(struct gendisk *tdisk);
-
-static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx)
-{
- return c_ctx - sizeof(struct nvm_rq);
-}
-
-static inline void *emeta_to_bb(struct line_emeta *emeta)
-{
- return emeta->bb_bitmap;
-}
-
-static inline void *emeta_to_wa(struct pblk_line_meta *lm,
- struct line_emeta *emeta)
-{
- return emeta->bb_bitmap + lm->blk_bitmap_len;
-}
-
-static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta)
-{
- return ((void *)emeta + pblk->lm.emeta_len[1]);
-}
-
-static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta)
-{
- return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]);
-}
-
-static inline int pblk_line_vsc(struct pblk_line *line)
-{
- return le32_to_cpu(*line->vsc);
-}
-
-static inline int pblk_ppa_to_line_id(struct ppa_addr p)
-{
- return p.a.blk;
-}
-
-static inline struct pblk_line *pblk_ppa_to_line(struct pblk *pblk,
- struct ppa_addr p)
-{
- return &pblk->lines[pblk_ppa_to_line_id(p)];
-}
-
-static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p)
-{
- return p.a.lun * geo->num_ch + p.a.ch;
-}
-
-static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr,
- u64 line_id)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct ppa_addr ppa;
-
- if (geo->version == NVM_OCSSD_SPEC_12) {
- struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
-
- ppa.ppa = 0;
- ppa.g.blk = line_id;
- ppa.g.pg = (paddr & ppaf->pg_mask) >> ppaf->pg_offset;
- ppa.g.lun = (paddr & ppaf->lun_mask) >> ppaf->lun_offset;
- ppa.g.ch = (paddr & ppaf->ch_mask) >> ppaf->ch_offset;
- ppa.g.pl = (paddr & ppaf->pln_mask) >> ppaf->pln_offset;
- ppa.g.sec = (paddr & ppaf->sec_mask) >> ppaf->sec_offset;
- } else {
- struct pblk_addrf *uaddrf = &pblk->uaddrf;
- int secs, chnls, luns;
-
- ppa.ppa = 0;
-
- ppa.m.chk = line_id;
-
- paddr = div_u64_rem(paddr, uaddrf->sec_stripe, &secs);
- ppa.m.sec = secs;
-
- paddr = div_u64_rem(paddr, uaddrf->ch_stripe, &chnls);
- ppa.m.grp = chnls;
-
- paddr = div_u64_rem(paddr, uaddrf->lun_stripe, &luns);
- ppa.m.pu = luns;
-
- ppa.m.sec += uaddrf->sec_stripe * paddr;
- }
-
- return ppa;
-}
-
-static inline struct nvm_chk_meta *pblk_dev_ppa_to_chunk(struct pblk *pblk,
- struct ppa_addr p)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line *line = pblk_ppa_to_line(pblk, p);
- int pos = pblk_ppa_to_pos(geo, p);
-
- return &line->chks[pos];
-}
-
-static inline u64 pblk_dev_ppa_to_chunk_addr(struct pblk *pblk,
- struct ppa_addr p)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
-
- return dev_to_chunk_addr(dev->parent, &pblk->addrf, p);
-}
-
-static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk,
- struct ppa_addr p)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- u64 paddr;
-
- if (geo->version == NVM_OCSSD_SPEC_12) {
- struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
-
- paddr = (u64)p.g.ch << ppaf->ch_offset;
- paddr |= (u64)p.g.lun << ppaf->lun_offset;
- paddr |= (u64)p.g.pg << ppaf->pg_offset;
- paddr |= (u64)p.g.pl << ppaf->pln_offset;
- paddr |= (u64)p.g.sec << ppaf->sec_offset;
- } else {
- struct pblk_addrf *uaddrf = &pblk->uaddrf;
- u64 secs = p.m.sec;
- int sec_stripe;
-
- paddr = (u64)p.m.grp * uaddrf->sec_stripe;
- paddr += (u64)p.m.pu * uaddrf->sec_lun_stripe;
-
- secs = div_u64_rem(secs, uaddrf->sec_stripe, &sec_stripe);
- paddr += secs * uaddrf->sec_ws_stripe;
- paddr += sec_stripe;
- }
-
- return paddr;
-}
-
-static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
-
- return nvm_ppa32_to_ppa64(dev->parent, &pblk->addrf, ppa32);
-}
-
-static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
-
- return nvm_ppa64_to_ppa32(dev->parent, &pblk->addrf, ppa64);
-}
-
-static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk,
- sector_t lba)
-{
- struct ppa_addr ppa;
-
- if (pblk->addrf_len < 32) {
- u32 *map = (u32 *)pblk->trans_map;
-
- ppa = pblk_ppa32_to_ppa64(pblk, map[lba]);
- } else {
- struct ppa_addr *map = (struct ppa_addr *)pblk->trans_map;
-
- ppa = map[lba];
- }
-
- return ppa;
-}
-
-static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba,
- struct ppa_addr ppa)
-{
- if (pblk->addrf_len < 32) {
- u32 *map = (u32 *)pblk->trans_map;
-
- map[lba] = pblk_ppa64_to_ppa32(pblk, ppa);
- } else {
- u64 *map = (u64 *)pblk->trans_map;
-
- map[lba] = ppa.ppa;
- }
-}
-
-static inline int pblk_ppa_empty(struct ppa_addr ppa_addr)
-{
- return (ppa_addr.ppa == ADDR_EMPTY);
-}
-
-static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr)
-{
- ppa_addr->ppa = ADDR_EMPTY;
-}
-
-static inline bool pblk_ppa_comp(struct ppa_addr lppa, struct ppa_addr rppa)
-{
- return (lppa.ppa == rppa.ppa);
-}
-
-static inline int pblk_addr_in_cache(struct ppa_addr ppa)
-{
- return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached);
-}
-
-static inline int pblk_addr_to_cacheline(struct ppa_addr ppa)
-{
- return ppa.c.line;
-}
-
-static inline struct ppa_addr pblk_cacheline_to_addr(int addr)
-{
- struct ppa_addr p;
-
- p.c.line = addr;
- p.c.is_cached = 1;
-
- return p;
-}
-
-static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk,
- struct line_header *header)
-{
- u32 crc = ~(u32)0;
-
- crc = crc32_le(crc, (unsigned char *)header + sizeof(crc),
- sizeof(struct line_header) - sizeof(crc));
-
- return crc;
-}
-
-static inline u32 pblk_calc_smeta_crc(struct pblk *pblk,
- struct line_smeta *smeta)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- u32 crc = ~(u32)0;
-
- crc = crc32_le(crc, (unsigned char *)smeta +
- sizeof(struct line_header) + sizeof(crc),
- lm->smeta_len -
- sizeof(struct line_header) - sizeof(crc));
-
- return crc;
-}
-
-static inline u32 pblk_calc_emeta_crc(struct pblk *pblk,
- struct line_emeta *emeta)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- u32 crc = ~(u32)0;
-
- crc = crc32_le(crc, (unsigned char *)emeta +
- sizeof(struct line_header) + sizeof(crc),
- lm->emeta_len[0] -
- sizeof(struct line_header) - sizeof(crc));
-
- return crc;
-}
-
-static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs)
-{
- return !(nr_secs % pblk->min_write_pgs);
-}
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
-static inline void print_ppa(struct pblk *pblk, struct ppa_addr *p,
- char *msg, int error)
-{
- struct nvm_geo *geo = &pblk->dev->geo;
-
- if (p->c.is_cached) {
- pblk_err(pblk, "ppa: (%s: %x) cache line: %llu\n",
- msg, error, (u64)p->c.line);
- } else if (geo->version == NVM_OCSSD_SPEC_12) {
- pblk_err(pblk, "ppa: (%s: %x):ch:%d,lun:%d,blk:%d,pg:%d,pl:%d,sec:%d\n",
- msg, error,
- p->g.ch, p->g.lun, p->g.blk,
- p->g.pg, p->g.pl, p->g.sec);
- } else {
- pblk_err(pblk, "ppa: (%s: %x):ch:%d,lun:%d,chk:%d,sec:%d\n",
- msg, error,
- p->m.grp, p->m.pu, p->m.chk, p->m.sec);
- }
-}
-
-static inline void pblk_print_failed_rqd(struct pblk *pblk, struct nvm_rq *rqd,
- int error)
-{
- int bit = -1;
-
- if (rqd->nr_ppas == 1) {
- print_ppa(pblk, &rqd->ppa_addr, "rqd", error);
- return;
- }
-
- while ((bit = find_next_bit((void *)&rqd->ppa_status, rqd->nr_ppas,
- bit + 1)) < rqd->nr_ppas) {
- print_ppa(pblk, &rqd->ppa_list[bit], "rqd", error);
- }
-
- pblk_err(pblk, "error:%d, ppa_status:%llx\n", error, rqd->ppa_status);
-}
-
-static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev,
- struct ppa_addr *ppas, int nr_ppas)
-{
- struct nvm_geo *geo = &tgt_dev->geo;
- struct ppa_addr *ppa;
- int i;
-
- for (i = 0; i < nr_ppas; i++) {
- ppa = &ppas[i];
-
- if (geo->version == NVM_OCSSD_SPEC_12) {
- if (!ppa->c.is_cached &&
- ppa->g.ch < geo->num_ch &&
- ppa->g.lun < geo->num_lun &&
- ppa->g.pl < geo->num_pln &&
- ppa->g.blk < geo->num_chk &&
- ppa->g.pg < geo->num_pg &&
- ppa->g.sec < geo->ws_min)
- continue;
- } else {
- if (!ppa->c.is_cached &&
- ppa->m.grp < geo->num_ch &&
- ppa->m.pu < geo->num_lun &&
- ppa->m.chk < geo->num_chk &&
- ppa->m.sec < geo->clba)
- continue;
- }
-
- print_ppa(tgt_dev->q->queuedata, ppa, "boundary", i);
-
- return 1;
- }
- return 0;
-}
-
-static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
-
- if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) {
- WARN_ON(1);
- return -EINVAL;
- }
-
- if (rqd->opcode == NVM_OP_PWRITE) {
- struct pblk_line *line;
- int i;
-
- for (i = 0; i < rqd->nr_ppas; i++) {
- line = pblk_ppa_to_line(pblk, ppa_list[i]);
-
- spin_lock(&line->lock);
- if (line->state != PBLK_LINESTATE_OPEN) {
- pblk_err(pblk, "bad ppa: line:%d,state:%d\n",
- line->id, line->state);
- WARN_ON(1);
- spin_unlock(&line->lock);
- return -EINVAL;
- }
- spin_unlock(&line->lock);
- }
- }
-
- return 0;
-}
-#endif
-
-static inline int pblk_boundary_paddr_checks(struct pblk *pblk, u64 paddr)
-{
- struct pblk_line_meta *lm = &pblk->lm;
-
- if (paddr > lm->sec_per_line)
- return 1;
-
- return 0;
-}
-
-static inline unsigned int pblk_get_bi_idx(struct bio *bio)
-{
- return bio->bi_iter.bi_idx;
-}
-
-static inline sector_t pblk_get_lba(struct bio *bio)
-{
- return bio->bi_iter.bi_sector / NR_PHY_IN_LOG;
-}
-
-static inline unsigned int pblk_get_secs(struct bio *bio)
-{
- return bio->bi_iter.bi_size / PBLK_EXPOSED_PAGE_SIZE;
-}
-
-static inline char *pblk_disk_name(struct pblk *pblk)
-{
- struct gendisk *disk = pblk->disk;
-
- return disk->disk_name;
-}
-
-static inline unsigned int pblk_get_min_chks(struct pblk *pblk)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- /* In a worst-case scenario every line will have OP invalid sectors.
- * We will then need a minimum of 1/OP lines to free up a single line
- */
-
- return DIV_ROUND_UP(100, pblk->op) * lm->blk_per_line;
-}
-
-static inline struct pblk_sec_meta *pblk_get_meta(struct pblk *pblk,
- void *meta, int index)
-{
- return meta +
- max_t(int, sizeof(struct pblk_sec_meta), pblk->oob_meta_size)
- * index;
-}
-
-static inline int pblk_dma_meta_size(struct pblk *pblk)
-{
- return max_t(int, sizeof(struct pblk_sec_meta), pblk->oob_meta_size)
- * NVM_MAX_VLBA;
-}
-
-static inline int pblk_is_oob_meta_supported(struct pblk *pblk)
-{
- return pblk->oob_meta_size >= sizeof(struct pblk_sec_meta);
-}
-#endif /* PBLK_H_ */
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 0602e82a9516..f45fb372e51b 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -15,6 +15,7 @@ if MD
config BLK_DEV_MD
tristate "RAID support"
+ select BLOCK_HOLDER_DEPRECATED if SYSFS
help
This driver lets you combine several hard disk partitions into one
logical block device. This can be used to simply append one
@@ -201,6 +202,7 @@ config BLK_DEV_DM_BUILTIN
config BLK_DEV_DM
tristate "Device mapper support"
+ select BLOCK_HOLDER_DEPRECATED if SYSFS
select BLK_DEV_DM_BUILTIN
depends on DAX || DAX=n
help
@@ -340,7 +342,7 @@ config DM_WRITECACHE
config DM_EBS
tristate "Emulated block size target (EXPERIMENTAL)"
- depends on BLK_DEV_DM
+ depends on BLK_DEV_DM && !HIGHMEM
select DM_BUFIO
help
dm-ebs emulates smaller logical block size on backing devices
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index a74aaf8b1445..816945eeed7f 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -96,6 +96,10 @@ ifeq ($(CONFIG_BLK_DEV_ZONED),y)
dm-mod-objs += dm-zone.o
endif
+ifeq ($(CONFIG_IMA),y)
+dm-mod-objs += dm-ima.o
+endif
+
ifeq ($(CONFIG_DM_VERITY_FEC),y)
dm-verity-objs += dm-verity-fec.o
endif
diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
index d1ca4d059c20..cf3e8096942a 100644
--- a/drivers/md/bcache/Kconfig
+++ b/drivers/md/bcache/Kconfig
@@ -2,6 +2,7 @@
config BCACHE
tristate "Block device as cache"
+ select BLOCK_HOLDER_DEPRECATED if SYSFS
select CRC64
help
Allows a block device to be used as cache for other devices; uses
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 183a58c89377..0595559de174 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -378,7 +378,7 @@ static void do_btree_node_write(struct btree *b)
struct bvec_iter_all iter_all;
bio_for_each_segment_all(bv, b->bio, iter_all) {
- memcpy(page_address(bv->bv_page), addr, PAGE_SIZE);
+ memcpy(bvec_virt(bv), addr, PAGE_SIZE);
addr += PAGE_SIZE;
}
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 185246a0d855..f2874c77ff79 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -885,11 +885,6 @@ static void bcache_device_free(struct bcache_device *d)
bcache_device_detach(d);
if (disk) {
- bool disk_added = (disk->flags & GENHD_FL_UP) != 0;
-
- if (disk_added)
- del_gendisk(disk);
-
blk_cleanup_disk(disk);
ida_simple_remove(&bcache_device_idx,
first_minor_to_idx(disk->first_minor));
@@ -931,20 +926,20 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long);
d->full_dirty_stripes = kvzalloc(n, GFP_KERNEL);
if (!d->full_dirty_stripes)
- return -ENOMEM;
+ goto out_free_stripe_sectors_dirty;
idx = ida_simple_get(&bcache_device_idx, 0,
BCACHE_DEVICE_IDX_MAX, GFP_KERNEL);
if (idx < 0)
- return idx;
+ goto out_free_full_dirty_stripes;
if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio),
BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
- goto err;
+ goto out_ida_remove;
d->disk = blk_alloc_disk(NUMA_NO_NODE);
if (!d->disk)
- goto err;
+ goto out_bioset_exit;
set_capacity(d->disk, sectors);
snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx);
@@ -987,8 +982,14 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
return 0;
-err:
+out_bioset_exit:
+ bioset_exit(&d->bio_split);
+out_ida_remove:
ida_simple_remove(&bcache_device_idx, idx);
+out_free_full_dirty_stripes:
+ kvfree(d->full_dirty_stripes);
+out_free_stripe_sectors_dirty:
+ kvfree(d->stripe_sectors_dirty);
return -ENOMEM;
}
@@ -1365,8 +1366,10 @@ static void cached_dev_free(struct closure *cl)
mutex_lock(&bch_register_lock);
- if (atomic_read(&dc->running))
+ if (atomic_read(&dc->running)) {
bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
+ del_gendisk(dc->disk.disk);
+ }
bcache_device_free(&dc->disk);
list_del(&dc->list);
@@ -1512,6 +1515,7 @@ static void flash_dev_free(struct closure *cl)
mutex_lock(&bch_register_lock);
atomic_long_sub(bcache_dev_sectors_dirty(d),
&d->c->flash_dev_dirty_sectors);
+ del_gendisk(d->disk);
bcache_device_free(d);
mutex_unlock(&bch_register_lock);
kobject_put(&d->kobj);
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index bca4a7c97da7..b64460a76267 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -15,8 +15,6 @@
#include "closure.h"
-#define PAGE_SECTORS (PAGE_SIZE / 512)
-
struct closure;
#ifdef CONFIG_BCACHE_DEBUG
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 8e4ced5a2516..bdd500447dea 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -3122,6 +3122,30 @@ static void cache_status(struct dm_target *ti, status_type_t type,
DMEMIT(" %s", cache->ctr_args[i]);
if (cache->nr_ctr_args)
DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
+ break;
+
+ case STATUSTYPE_IMA:
+ DMEMIT_TARGET_NAME_VERSION(ti->type);
+ if (get_cache_mode(cache) == CM_FAIL)
+ DMEMIT(",metadata_mode=fail");
+ else if (get_cache_mode(cache) == CM_READ_ONLY)
+ DMEMIT(",metadata_mode=ro");
+ else
+ DMEMIT(",metadata_mode=rw");
+
+ format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
+ DMEMIT(",cache_metadata_device=%s", buf);
+ format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
+ DMEMIT(",cache_device=%s", buf);
+ format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
+ DMEMIT(",cache_origin_device=%s", buf);
+ DMEMIT(",writethrough=%c", writethrough_mode(cache) ? 'y' : 'n');
+ DMEMIT(",writeback=%c", writeback_mode(cache) ? 'y' : 'n');
+ DMEMIT(",passthrough=%c", passthrough_mode(cache) ? 'y' : 'n');
+ DMEMIT(",metadata2=%c", cache->features.metadata_version == 2 ? 'y' : 'n');
+ DMEMIT(",no_discard_passdown=%c", cache->features.discard_passdown ? 'n' : 'y');
+ DMEMIT(";");
+ break;
}
return;
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
index a90bdf9b2ca6..84dbe08ad205 100644
--- a/drivers/md/dm-clone-target.c
+++ b/drivers/md/dm-clone-target.c
@@ -1499,6 +1499,11 @@ static void clone_status(struct dm_target *ti, status_type_t type,
for (i = 0; i < clone->nr_ctr_args; i++)
DMEMIT(" %s", clone->ctr_args[i]);
+ break;
+
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
return;
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index edc1553c4eea..55dccdfbcb22 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -18,6 +18,7 @@
#include <trace/events/block.h>
#include "dm.h"
+#include "dm-ima.h"
#define DM_RESERVED_MAX_IOS 1024
@@ -119,6 +120,10 @@ struct mapped_device {
unsigned int nr_zones;
unsigned int *zwp_offset;
#endif
+
+#ifdef CONFIG_IMA
+ struct dm_ima_measurements ima;
+#endif
};
/*
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 50f4cbd600d5..916b7da16de2 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -2223,11 +2223,11 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io)
if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) ||
(bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) {
/*
- * in_irq(): Crypto API's skcipher_walk_first() refuses to work in hard IRQ context.
+ * in_hardirq(): Crypto API's skcipher_walk_first() refuses to work in hard IRQ context.
* irqs_disabled(): the kernel may run some IO completion from the idle thread, but
* it is being executed with irqs disabled.
*/
- if (in_irq() || irqs_disabled()) {
+ if (in_hardirq() || irqs_disabled()) {
tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work);
tasklet_schedule(&io->tasklet);
return;
@@ -2661,7 +2661,12 @@ static void *crypt_page_alloc(gfp_t gfp_mask, void *pool_data)
struct crypt_config *cc = pool_data;
struct page *page;
- if (unlikely(percpu_counter_compare(&cc->n_allocated_pages, dm_crypt_pages_per_client) >= 0) &&
+ /*
+ * Note, percpu_counter_read_positive() may over (and under) estimate
+ * the current usage by at most (batch - 1) * num_online_cpus() pages,
+ * but avoids potential spinlock contention of an exact result.
+ */
+ if (unlikely(percpu_counter_read_positive(&cc->n_allocated_pages) >= dm_crypt_pages_per_client) &&
likely(gfp_mask & __GFP_NORETRY))
return NULL;
@@ -3485,7 +3490,34 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags))
DMEMIT(" iv_large_sectors");
}
+ break;
+ case STATUSTYPE_IMA:
+ DMEMIT_TARGET_NAME_VERSION(ti->type);
+ DMEMIT(",allow_discards=%c", ti->num_discard_bios ? 'y' : 'n');
+ DMEMIT(",same_cpu_crypt=%c", test_bit(DM_CRYPT_SAME_CPU, &cc->flags) ? 'y' : 'n');
+ DMEMIT(",submit_from_crypt_cpus=%c", test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags) ?
+ 'y' : 'n');
+ DMEMIT(",no_read_workqueue=%c", test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags) ?
+ 'y' : 'n');
+ DMEMIT(",no_write_workqueue=%c", test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags) ?
+ 'y' : 'n');
+ DMEMIT(",iv_large_sectors=%c", test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags) ?
+ 'y' : 'n');
+
+ if (cc->on_disk_tag_size)
+ DMEMIT(",integrity_tag_size=%u,cipher_auth=%s",
+ cc->on_disk_tag_size, cc->cipher_auth);
+ if (cc->sector_size != (1 << SECTOR_SHIFT))
+ DMEMIT(",sector_size=%d", cc->sector_size);
+ if (cc->cipher_string)
+ DMEMIT(",cipher_string=%s", cc->cipher_string);
+
+ DMEMIT(",key_size=%u", cc->key_size);
+ DMEMIT(",key_parts=%u", cc->key_parts);
+ DMEMIT(",key_extra_size=%u", cc->key_extra_size);
+ DMEMIT(",key_mac_size=%u", cc->key_mac_size);
+ DMEMIT(";");
break;
}
}
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 2628a832787b..59e51d285b0e 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -326,6 +326,10 @@ static void delay_status(struct dm_target *ti, status_type_t type,
DMEMIT_DELAY_CLASS(&dc->flush);
}
break;
+
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
}
diff --git a/drivers/md/dm-dust.c b/drivers/md/dm-dust.c
index cbe1058ee589..3163e2b1418e 100644
--- a/drivers/md/dm-dust.c
+++ b/drivers/md/dm-dust.c
@@ -527,6 +527,10 @@ static void dust_status(struct dm_target *ti, status_type_t type,
DMEMIT("%s %llu %u", dd->dev->name,
(unsigned long long)dd->start, dd->blksz);
break;
+
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
}
diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c
index 71475a2410be..d25989660a76 100644
--- a/drivers/md/dm-ebs-target.c
+++ b/drivers/md/dm-ebs-target.c
@@ -74,7 +74,7 @@ static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bv
if (unlikely(!bv->bv_page || !bv_len))
return -EIO;
- pa = page_address(bv->bv_page) + bv->bv_offset;
+ pa = bvec_virt(bv);
/* Handle overlapping page <-> blocks */
while (bv_len) {
@@ -401,6 +401,9 @@ static void ebs_status(struct dm_target *ti, status_type_t type,
snprintf(result, maxlen, ec->u_bs_set ? "%s %llu %u %u" : "%s %llu %u",
ec->dev->name, (unsigned long long) ec->start, ec->e_bs, ec->u_bs);
break;
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
}
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index 3b748393fca5..2a78f6874143 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -1644,6 +1644,10 @@ static void era_status(struct dm_target *ti, status_type_t type,
format_dev_t(buf, era->origin_dev->bdev->bd_dev);
DMEMIT("%s %u", buf, era->sectors_per_block);
break;
+
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
return;
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 5877220c01ed..4b94ffe6f2d4 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -440,6 +440,10 @@ static void flakey_status(struct dm_target *ti, status_type_t type,
fc->corrupt_bio_value, fc->corrupt_bio_flags);
break;
+
+ case STATUSTYPE_IMA:
+ result[0] = '\0';
+ break;
}
}
diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c
new file mode 100644
index 000000000000..3fd69ab12a8e
--- /dev/null
+++ b/drivers/md/dm-ima.c
@@ -0,0 +1,750 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Microsoft Corporation
+ *
+ * Author: Tushar Sugandhi <tusharsu@linux.microsoft.com>
+ *
+ * File: dm-ima.c
+ * Enables IMA measurements for DM targets
+ */
+
+#include "dm-core.h"
+#include "dm-ima.h"
+
+#include <linux/ima.h>
+#include <crypto/hash.h>
+#include <linux/crypto.h>
+#include <crypto/hash_info.h>
+
+#define DM_MSG_PREFIX "ima"
+
+/*
+ * Internal function to prefix separator characters in input buffer with escape
+ * character, so that they don't interfere with the construction of key-value pairs,
+ * and clients can split the key1=val1,key2=val2,key3=val3; pairs properly.
+ */
+static void fix_separator_chars(char **buf)
+{
+ int l = strlen(*buf);
+ int i, j, sp = 0;
+
+ for (i = 0; i < l; i++)
+ if ((*buf)[i] == '\\' || (*buf)[i] == ';' || (*buf)[i] == '=' || (*buf)[i] == ',')
+ sp++;
+
+ if (!sp)
+ return;
+
+ for (i = l-1, j = i+sp; i >= 0; i--) {
+ (*buf)[j--] = (*buf)[i];
+ if ((*buf)[i] == '\\' || (*buf)[i] == ';' || (*buf)[i] == '=' || (*buf)[i] == ',')
+ (*buf)[j--] = '\\';
+ }
+}
+
+/*
+ * Internal function to allocate memory for IMA measurements.
+ */
+static void *dm_ima_alloc(size_t len, gfp_t flags, bool noio)
+{
+ unsigned int noio_flag;
+ void *ptr;
+
+ if (noio)
+ noio_flag = memalloc_noio_save();
+
+ ptr = kzalloc(len, flags);
+
+ if (noio)
+ memalloc_noio_restore(noio_flag);
+
+ return ptr;
+}
+
+/*
+ * Internal function to allocate and copy name and uuid for IMA measurements.
+ */
+static int dm_ima_alloc_and_copy_name_uuid(struct mapped_device *md, char **dev_name,
+ char **dev_uuid, bool noio)
+{
+ int r;
+ *dev_name = dm_ima_alloc(DM_NAME_LEN*2, GFP_KERNEL, noio);
+ if (!(*dev_name)) {
+ r = -ENOMEM;
+ goto error;
+ }
+
+ *dev_uuid = dm_ima_alloc(DM_UUID_LEN*2, GFP_KERNEL, noio);
+ if (!(*dev_uuid)) {
+ r = -ENOMEM;
+ goto error;
+ }
+
+ r = dm_copy_name_and_uuid(md, *dev_name, *dev_uuid);
+ if (r)
+ goto error;
+
+ fix_separator_chars(dev_name);
+ fix_separator_chars(dev_uuid);
+
+ return 0;
+error:
+ kfree(*dev_name);
+ kfree(*dev_uuid);
+ *dev_name = NULL;
+ *dev_uuid = NULL;
+ return r;
+}
+
+/*
+ * Internal function to allocate and copy device data for IMA measurements.
+ */
+static int dm_ima_alloc_and_copy_device_data(struct mapped_device *md, char **device_data,
+ unsigned int num_targets, bool noio)
+{
+ char *dev_name = NULL, *dev_uuid = NULL;
+ int r;
+
+ r = dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio);
+ if (r)
+ return r;
+
+ *device_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, GFP_KERNEL, noio);
+ if (!(*device_data)) {
+ r = -ENOMEM;
+ goto error;
+ }
+
+ scnprintf(*device_data, DM_IMA_DEVICE_BUF_LEN,
+ "name=%s,uuid=%s,major=%d,minor=%d,minor_count=%d,num_targets=%u;",
+ dev_name, dev_uuid, md->disk->major, md->disk->first_minor,
+ md->disk->minors, num_targets);
+error:
+ kfree(dev_name);
+ kfree(dev_uuid);
+ return r;
+}
+
+/*
+ * Internal wrapper function to call IMA to measure DM data.
+ */
+static void dm_ima_measure_data(const char *event_name, const void *buf, size_t buf_len,
+ bool noio)
+{
+ unsigned int noio_flag;
+
+ if (noio)
+ noio_flag = memalloc_noio_save();
+
+ ima_measure_critical_data(DM_NAME, event_name, buf, buf_len, false);
+
+ if (noio)
+ memalloc_noio_restore(noio_flag);
+}
+
+/*
+ * Internal function to allocate and copy current device capacity for IMA measurements.
+ */
+static int dm_ima_alloc_and_copy_capacity_str(struct mapped_device *md, char **capacity_str,
+ bool noio)
+{
+ sector_t capacity;
+
+ capacity = get_capacity(md->disk);
+
+ *capacity_str = dm_ima_alloc(DM_IMA_DEVICE_CAPACITY_BUF_LEN, GFP_KERNEL, noio);
+ if (!(*capacity_str))
+ return -ENOMEM;
+
+ scnprintf(*capacity_str, DM_IMA_DEVICE_BUF_LEN, "current_device_capacity=%llu;",
+ capacity);
+
+ return 0;
+}
+
+/*
+ * Initialize/reset the dm ima related data structure variables.
+ */
+void dm_ima_reset_data(struct mapped_device *md)
+{
+ memset(&(md->ima), 0, sizeof(md->ima));
+ md->ima.dm_version_str_len = strlen(DM_IMA_VERSION_STR);
+}
+
+/*
+ * Build up the IMA data for each target, and finally measure.
+ */
+void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags)
+{
+ size_t device_data_buf_len, target_metadata_buf_len, target_data_buf_len, l = 0;
+ char *target_metadata_buf = NULL, *target_data_buf = NULL, *digest_buf = NULL;
+ char *ima_buf = NULL, *device_data_buf = NULL;
+ int digest_size, last_target_measured = -1, r;
+ status_type_t type = STATUSTYPE_IMA;
+ size_t cur_total_buf_len = 0;
+ unsigned int num_targets, i;
+ SHASH_DESC_ON_STACK(shash, NULL);
+ struct crypto_shash *tfm = NULL;
+ u8 *digest = NULL;
+ bool noio = false;
+ /*
+ * In below hash_alg_prefix_len assignment +1 is for the additional char (':'),
+ * when prefixing the hash value with the hash algorithm name. e.g. sha256:<hash_value>.
+ */
+ const size_t hash_alg_prefix_len = strlen(DM_IMA_TABLE_HASH_ALG) + 1;
+ char table_load_event_name[] = "dm_table_load";
+
+ ima_buf = dm_ima_alloc(DM_IMA_MEASUREMENT_BUF_LEN, GFP_KERNEL, noio);
+ if (!ima_buf)
+ return;
+
+ target_metadata_buf = dm_ima_alloc(DM_IMA_TARGET_METADATA_BUF_LEN, GFP_KERNEL, noio);
+ if (!target_metadata_buf)
+ goto error;
+
+ target_data_buf = dm_ima_alloc(DM_IMA_TARGET_DATA_BUF_LEN, GFP_KERNEL, noio);
+ if (!target_data_buf)
+ goto error;
+
+ num_targets = dm_table_get_num_targets(table);
+
+ if (dm_ima_alloc_and_copy_device_data(table->md, &device_data_buf, num_targets, noio))
+ goto error;
+
+ tfm = crypto_alloc_shash(DM_IMA_TABLE_HASH_ALG, 0, 0);
+ if (IS_ERR(tfm))
+ goto error;
+
+ shash->tfm = tfm;
+ digest_size = crypto_shash_digestsize(tfm);
+ digest = dm_ima_alloc(digest_size, GFP_KERNEL, noio);
+ if (!digest)
+ goto error;
+
+ r = crypto_shash_init(shash);
+ if (r)
+ goto error;
+
+ memcpy(ima_buf + l, DM_IMA_VERSION_STR, table->md->ima.dm_version_str_len);
+ l += table->md->ima.dm_version_str_len;
+
+ device_data_buf_len = strlen(device_data_buf);
+ memcpy(ima_buf + l, device_data_buf, device_data_buf_len);
+ l += device_data_buf_len;
+
+ for (i = 0; i < num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(table, i);
+
+ if (!ti)
+ goto error;
+
+ last_target_measured = 0;
+
+ /*
+ * First retrieve the target metadata.
+ */
+ scnprintf(target_metadata_buf, DM_IMA_TARGET_METADATA_BUF_LEN,
+ "target_index=%d,target_begin=%llu,target_len=%llu,",
+ i, ti->begin, ti->len);
+ target_metadata_buf_len = strlen(target_metadata_buf);
+
+ /*
+ * Then retrieve the actual target data.
+ */
+ if (ti->type->status)
+ ti->type->status(ti, type, status_flags, target_data_buf,
+ DM_IMA_TARGET_DATA_BUF_LEN);
+ else
+ target_data_buf[0] = '\0';
+
+ target_data_buf_len = strlen(target_data_buf);
+
+ /*
+ * Check if the total data can fit into the IMA buffer.
+ */
+ cur_total_buf_len = l + target_metadata_buf_len + target_data_buf_len;
+
+ /*
+ * IMA measurements for DM targets are best-effort.
+ * If the total data buffered so far, including the current target,
+ * is too large to fit into DM_IMA_MEASUREMENT_BUF_LEN, measure what
+ * we have in the current buffer, and continue measuring the remaining
+ * targets by prefixing the device metadata again.
+ */
+ if (unlikely(cur_total_buf_len >= DM_IMA_MEASUREMENT_BUF_LEN)) {
+ dm_ima_measure_data(table_load_event_name, ima_buf, l, noio);
+ r = crypto_shash_update(shash, (const u8 *)ima_buf, l);
+ if (r < 0)
+ goto error;
+
+ memset(ima_buf, 0, DM_IMA_MEASUREMENT_BUF_LEN);
+ l = 0;
+
+ /*
+ * Each new "dm_table_load" entry in IMA log should have device data
+ * prefix, so that multiple records from the same "dm_table_load" for
+ * a given device can be linked together.
+ */
+ memcpy(ima_buf + l, DM_IMA_VERSION_STR, table->md->ima.dm_version_str_len);
+ l += table->md->ima.dm_version_str_len;
+
+ memcpy(ima_buf + l, device_data_buf, device_data_buf_len);
+ l += device_data_buf_len;
+
+ /*
+ * If this iteration of the for loop turns out to be the last target
+ * in the table, dm_ima_measure_data("dm_table_load", ...) doesn't need
+ * to be called again, just the hash needs to be finalized.
+ * "last_target_measured" tracks this state.
+ */
+ last_target_measured = 1;
+ }
+
+ /*
+ * Fill-in all the target metadata, so that multiple targets for the same
+ * device can be linked together.
+ */
+ memcpy(ima_buf + l, target_metadata_buf, target_metadata_buf_len);
+ l += target_metadata_buf_len;
+
+ memcpy(ima_buf + l, target_data_buf, target_data_buf_len);
+ l += target_data_buf_len;
+ }
+
+ if (!last_target_measured) {
+ dm_ima_measure_data(table_load_event_name, ima_buf, l, noio);
+
+ r = crypto_shash_update(shash, (const u8 *)ima_buf, l);
+ if (r < 0)
+ goto error;
+ }
+
+ /*
+ * Finalize the table hash, and store it in table->md->ima.inactive_table.hash,
+ * so that the table data can be verified against the future device state change
+ * events, e.g. resume, rename, remove, table-clear etc.
+ */
+ r = crypto_shash_final(shash, digest);
+ if (r < 0)
+ goto error;
+
+ digest_buf = dm_ima_alloc((digest_size*2) + hash_alg_prefix_len + 1, GFP_KERNEL, noio);
+
+ if (!digest_buf)
+ goto error;
+
+ snprintf(digest_buf, hash_alg_prefix_len + 1, "%s:", DM_IMA_TABLE_HASH_ALG);
+
+ for (i = 0; i < digest_size; i++)
+ snprintf((digest_buf + hash_alg_prefix_len + (i*2)), 3, "%02x", digest[i]);
+
+ if (table->md->ima.active_table.hash != table->md->ima.inactive_table.hash)
+ kfree(table->md->ima.inactive_table.hash);
+
+ table->md->ima.inactive_table.hash = digest_buf;
+ table->md->ima.inactive_table.hash_len = strlen(digest_buf);
+ table->md->ima.inactive_table.num_targets = num_targets;
+
+ if (table->md->ima.active_table.device_metadata !=
+ table->md->ima.inactive_table.device_metadata)
+ kfree(table->md->ima.inactive_table.device_metadata);
+
+ table->md->ima.inactive_table.device_metadata = device_data_buf;
+ table->md->ima.inactive_table.device_metadata_len = device_data_buf_len;
+
+ goto exit;
+error:
+ kfree(digest_buf);
+ kfree(device_data_buf);
+exit:
+ kfree(digest);
+ if (tfm)
+ crypto_free_shash(tfm);
+ kfree(ima_buf);
+ kfree(target_metadata_buf);
+ kfree(target_data_buf);
+}
+
+/*
+ * Measure IMA data on device resume.
+ */
+void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap)
+{
+ char *device_table_data, *dev_name = NULL, *dev_uuid = NULL, *capacity_str = NULL;
+ char active[] = "active_table_hash=";
+ unsigned int active_len = strlen(active), capacity_len = 0;
+ unsigned int l = 0;
+ bool noio = true;
+ bool nodata = true;
+ int r;
+
+ device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, GFP_KERNEL, noio);
+ if (!device_table_data)
+ return;
+
+ r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio);
+ if (r)
+ goto error;
+
+ memcpy(device_table_data + l, DM_IMA_VERSION_STR, md->ima.dm_version_str_len);
+ l += md->ima.dm_version_str_len;
+
+ if (swap) {
+ if (md->ima.active_table.hash != md->ima.inactive_table.hash)
+ kfree(md->ima.active_table.hash);
+
+ md->ima.active_table.hash = NULL;
+ md->ima.active_table.hash_len = 0;
+
+ if (md->ima.active_table.device_metadata !=
+ md->ima.inactive_table.device_metadata)
+ kfree(md->ima.active_table.device_metadata);
+
+ md->ima.active_table.device_metadata = NULL;
+ md->ima.active_table.device_metadata_len = 0;
+ md->ima.active_table.num_targets = 0;
+
+ if (md->ima.inactive_table.hash) {
+ md->ima.active_table.hash = md->ima.inactive_table.hash;
+ md->ima.active_table.hash_len = md->ima.inactive_table.hash_len;
+ md->ima.inactive_table.hash = NULL;
+ md->ima.inactive_table.hash_len = 0;
+ }
+
+ if (md->ima.inactive_table.device_metadata) {
+ md->ima.active_table.device_metadata =
+ md->ima.inactive_table.device_metadata;
+ md->ima.active_table.device_metadata_len =
+ md->ima.inactive_table.device_metadata_len;
+ md->ima.active_table.num_targets = md->ima.inactive_table.num_targets;
+ md->ima.inactive_table.device_metadata = NULL;
+ md->ima.inactive_table.device_metadata_len = 0;
+ md->ima.inactive_table.num_targets = 0;
+ }
+ }
+
+ if (md->ima.active_table.device_metadata) {
+ memcpy(device_table_data + l, md->ima.active_table.device_metadata,
+ md->ima.active_table.device_metadata_len);
+ l += md->ima.active_table.device_metadata_len;
+
+ nodata = false;
+ }
+
+ if (md->ima.active_table.hash) {
+ memcpy(device_table_data + l, active, active_len);
+ l += active_len;
+
+ memcpy(device_table_data + l, md->ima.active_table.hash,
+ md->ima.active_table.hash_len);
+ l += md->ima.active_table.hash_len;
+
+ memcpy(device_table_data + l, ";", 1);
+ l++;
+
+ nodata = false;
+ }
+
+ if (nodata) {
+ r = dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio);
+ if (r)
+ goto error;
+
+ scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN,
+ "%sname=%s,uuid=%s;device_resume=no_data;",
+ DM_IMA_VERSION_STR, dev_name, dev_uuid);
+ l += strlen(device_table_data);
+
+ }
+
+ capacity_len = strlen(capacity_str);
+ memcpy(device_table_data + l, capacity_str, capacity_len);
+ l += capacity_len;
+
+ dm_ima_measure_data("dm_device_resume", device_table_data, l, noio);
+
+ kfree(dev_name);
+ kfree(dev_uuid);
+error:
+ kfree(capacity_str);
+ kfree(device_table_data);
+}
+
+/*
+ * Measure IMA data on remove.
+ */
+void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all)
+{
+ char *device_table_data, *dev_name = NULL, *dev_uuid = NULL, *capacity_str = NULL;
+ char active_table_str[] = "active_table_hash=";
+ char inactive_table_str[] = "inactive_table_hash=";
+ char device_active_str[] = "device_active_metadata=";
+ char device_inactive_str[] = "device_inactive_metadata=";
+ char remove_all_str[] = "remove_all=";
+ unsigned int active_table_len = strlen(active_table_str);
+ unsigned int inactive_table_len = strlen(inactive_table_str);
+ unsigned int device_active_len = strlen(device_active_str);
+ unsigned int device_inactive_len = strlen(device_inactive_str);
+ unsigned int remove_all_len = strlen(remove_all_str);
+ unsigned int capacity_len = 0;
+ unsigned int l = 0;
+ bool noio = true;
+ bool nodata = true;
+ int r;
+
+ device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN*2, GFP_KERNEL, noio);
+ if (!device_table_data)
+ goto exit;
+
+ r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio);
+ if (r) {
+ kfree(device_table_data);
+ goto exit;
+ }
+
+ memcpy(device_table_data + l, DM_IMA_VERSION_STR, md->ima.dm_version_str_len);
+ l += md->ima.dm_version_str_len;
+
+ if (md->ima.active_table.device_metadata) {
+ memcpy(device_table_data + l, device_active_str, device_active_len);
+ l += device_active_len;
+
+ memcpy(device_table_data + l, md->ima.active_table.device_metadata,
+ md->ima.active_table.device_metadata_len);
+ l += md->ima.active_table.device_metadata_len;
+
+ nodata = false;
+ }
+
+ if (md->ima.inactive_table.device_metadata) {
+ memcpy(device_table_data + l, device_inactive_str, device_inactive_len);
+ l += device_inactive_len;
+
+ memcpy(device_table_data + l, md->ima.inactive_table.device_metadata,
+ md->ima.inactive_table.device_metadata_len);
+ l += md->ima.inactive_table.device_metadata_len;
+
+ nodata = false;
+ }
+
+ if (md->ima.active_table.hash) {
+ memcpy(device_table_data + l, active_table_str, active_table_len);
+ l += active_table_len;
+
+ memcpy(device_table_data + l, md->ima.active_table.hash,
+ md->ima.active_table.hash_len);
+ l += md->ima.active_table.hash_len;
+
+ memcpy(device_table_data + l, ",", 1);
+ l++;
+
+ nodata = false;
+ }
+
+ if (md->ima.inactive_table.hash) {
+ memcpy(device_table_data + l, inactive_table_str, inactive_table_len);
+ l += inactive_table_len;
+
+ memcpy(device_table_data + l, md->ima.inactive_table.hash,
+ md->ima.inactive_table.hash_len);
+ l += md->ima.inactive_table.hash_len;
+
+ memcpy(device_table_data + l, ",", 1);
+ l++;
+
+ nodata = false;
+ }
+ /*
+ * In case both active and inactive tables, and corresponding
+ * device metadata is cleared/missing - record the name and uuid
+ * in IMA measurements.
+ */
+ if (nodata) {
+ if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio))
+ goto error;
+
+ scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN,
+ "%sname=%s,uuid=%s;device_remove=no_data;",
+ DM_IMA_VERSION_STR, dev_name, dev_uuid);
+ l += strlen(device_table_data);
+ }
+
+ memcpy(device_table_data + l, remove_all_str, remove_all_len);
+ l += remove_all_len;
+ memcpy(device_table_data + l, remove_all ? "y;" : "n;", 2);
+ l += 2;
+
+ capacity_len = strlen(capacity_str);
+ memcpy(device_table_data + l, capacity_str, capacity_len);
+ l += capacity_len;
+
+ dm_ima_measure_data("dm_device_remove", device_table_data, l, noio);
+
+error:
+ kfree(device_table_data);
+ kfree(capacity_str);
+exit:
+ kfree(md->ima.active_table.device_metadata);
+
+ if (md->ima.active_table.device_metadata !=
+ md->ima.inactive_table.device_metadata)
+ kfree(md->ima.inactive_table.device_metadata);
+
+ kfree(md->ima.active_table.hash);
+
+ if (md->ima.active_table.hash != md->ima.inactive_table.hash)
+ kfree(md->ima.inactive_table.hash);
+
+ dm_ima_reset_data(md);
+
+ kfree(dev_name);
+ kfree(dev_uuid);
+}
+
+/*
+ * Measure ima data on table clear.
+ */
+void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map)
+{
+ unsigned int l = 0, capacity_len = 0;
+ char *device_table_data = NULL, *dev_name = NULL, *dev_uuid = NULL, *capacity_str = NULL;
+ char inactive_str[] = "inactive_table_hash=";
+ unsigned int inactive_len = strlen(inactive_str);
+ bool noio = true;
+ bool nodata = true;
+ int r;
+
+ device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, GFP_KERNEL, noio);
+ if (!device_table_data)
+ return;
+
+ r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio);
+ if (r)
+ goto error1;
+
+ memcpy(device_table_data + l, DM_IMA_VERSION_STR, md->ima.dm_version_str_len);
+ l += md->ima.dm_version_str_len;
+
+ if (md->ima.inactive_table.device_metadata_len &&
+ md->ima.inactive_table.hash_len) {
+ memcpy(device_table_data + l, md->ima.inactive_table.device_metadata,
+ md->ima.inactive_table.device_metadata_len);
+ l += md->ima.inactive_table.device_metadata_len;
+
+ memcpy(device_table_data + l, inactive_str, inactive_len);
+ l += inactive_len;
+
+ memcpy(device_table_data + l, md->ima.inactive_table.hash,
+ md->ima.inactive_table.hash_len);
+
+ l += md->ima.inactive_table.hash_len;
+
+ memcpy(device_table_data + l, ";", 1);
+ l++;
+
+ nodata = false;
+ }
+
+ if (nodata) {
+ if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio))
+ goto error2;
+
+ scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN,
+ "%sname=%s,uuid=%s;table_clear=no_data;",
+ DM_IMA_VERSION_STR, dev_name, dev_uuid);
+ l += strlen(device_table_data);
+ }
+
+ capacity_len = strlen(capacity_str);
+ memcpy(device_table_data + l, capacity_str, capacity_len);
+ l += capacity_len;
+
+ dm_ima_measure_data("dm_table_clear", device_table_data, l, noio);
+
+ if (new_map) {
+ if (md->ima.inactive_table.hash &&
+ md->ima.inactive_table.hash != md->ima.active_table.hash)
+ kfree(md->ima.inactive_table.hash);
+
+ md->ima.inactive_table.hash = NULL;
+ md->ima.inactive_table.hash_len = 0;
+
+ if (md->ima.inactive_table.device_metadata &&
+ md->ima.inactive_table.device_metadata != md->ima.active_table.device_metadata)
+ kfree(md->ima.inactive_table.device_metadata);
+
+ md->ima.inactive_table.device_metadata = NULL;
+ md->ima.inactive_table.device_metadata_len = 0;
+ md->ima.inactive_table.num_targets = 0;
+
+ if (md->ima.active_table.hash) {
+ md->ima.inactive_table.hash = md->ima.active_table.hash;
+ md->ima.inactive_table.hash_len = md->ima.active_table.hash_len;
+ }
+
+ if (md->ima.active_table.device_metadata) {
+ md->ima.inactive_table.device_metadata =
+ md->ima.active_table.device_metadata;
+ md->ima.inactive_table.device_metadata_len =
+ md->ima.active_table.device_metadata_len;
+ md->ima.inactive_table.num_targets =
+ md->ima.active_table.num_targets;
+ }
+ }
+
+ kfree(dev_name);
+ kfree(dev_uuid);
+error2:
+ kfree(capacity_str);
+error1:
+ kfree(device_table_data);
+}
+
+/*
+ * Measure IMA data on device rename.
+ */
+void dm_ima_measure_on_device_rename(struct mapped_device *md)
+{
+ char *old_device_data = NULL, *new_device_data = NULL, *combined_device_data = NULL;
+ char *new_dev_name = NULL, *new_dev_uuid = NULL, *capacity_str = NULL;
+ bool noio = true;
+ int r;
+
+ if (dm_ima_alloc_and_copy_device_data(md, &new_device_data,
+ md->ima.active_table.num_targets, noio))
+ return;
+
+ if (dm_ima_alloc_and_copy_name_uuid(md, &new_dev_name, &new_dev_uuid, noio))
+ goto error;
+
+ combined_device_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN * 2, GFP_KERNEL, noio);
+ if (!combined_device_data)
+ goto error;
+
+ r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio);
+ if (r)
+ goto error;
+
+ old_device_data = md->ima.active_table.device_metadata;
+
+ md->ima.active_table.device_metadata = new_device_data;
+ md->ima.active_table.device_metadata_len = strlen(new_device_data);
+
+ scnprintf(combined_device_data, DM_IMA_DEVICE_BUF_LEN * 2,
+ "%s%snew_name=%s,new_uuid=%s;%s", DM_IMA_VERSION_STR, old_device_data,
+ new_dev_name, new_dev_uuid, capacity_str);
+
+ dm_ima_measure_data("dm_device_rename", combined_device_data, strlen(combined_device_data),
+ noio);
+
+ goto exit;
+
+error:
+ kfree(new_device_data);
+exit:
+ kfree(capacity_str);
+ kfree(combined_device_data);
+ kfree(old_device_data);
+ kfree(new_dev_name);
+ kfree(new_dev_uuid);
+}
diff --git a/drivers/md/dm-ima.h b/drivers/md/dm-ima.h
new file mode 100644
index 000000000000..b8c3b614670b
--- /dev/null
+++ b/drivers/md/dm-ima.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2021 Microsoft Corporation
+ *
+ * Author: Tushar Sugandhi <tusharsu@linux.microsoft.com>
+ *
+ * File: dm-ima.h
+ * Header file for device mapper IMA measurements.
+ */
+
+#ifndef DM_IMA_H
+#define DM_IMA_H
+
+#define DM_IMA_MEASUREMENT_BUF_LEN 4096
+#define DM_IMA_DEVICE_BUF_LEN 1024
+#define DM_IMA_TARGET_METADATA_BUF_LEN 128
+#define DM_IMA_TARGET_DATA_BUF_LEN 2048
+#define DM_IMA_DEVICE_CAPACITY_BUF_LEN 128
+#define DM_IMA_TABLE_HASH_ALG "sha256"
+
+#define __dm_ima_stringify(s) #s
+#define __dm_ima_str(s) __dm_ima_stringify(s)
+
+#define DM_IMA_VERSION_STR "dm_version=" \
+ __dm_ima_str(DM_VERSION_MAJOR) "." \
+ __dm_ima_str(DM_VERSION_MINOR) "." \
+ __dm_ima_str(DM_VERSION_PATCHLEVEL) ";"
+
+#ifdef CONFIG_IMA
+
+struct dm_ima_device_table_metadata {
+ /*
+ * Contains data specific to the device which is common across
+ * all the targets in the table (e.g. name, uuid, major, minor, etc).
+ * The values are stored in comma separated list of key1=val1,key2=val2;
+ * pairs delimited by a semicolon at the end of the list.
+ */
+ char *device_metadata;
+ unsigned int device_metadata_len;
+ unsigned int num_targets;
+
+ /*
+ * Contains the sha256 hashes of the IMA measurements of the target
+ * attributes' key-value pairs from the active/inactive tables.
+ */
+ char *hash;
+ unsigned int hash_len;
+};
+
+/*
+ * This structure contains device metadata, and table hash for
+ * active and inactive tables for ima measurements.
+ */
+struct dm_ima_measurements {
+ struct dm_ima_device_table_metadata active_table;
+ struct dm_ima_device_table_metadata inactive_table;
+ unsigned int dm_version_str_len;
+};
+
+void dm_ima_reset_data(struct mapped_device *md);
+void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags);
+void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap);
+void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all);
+void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map);
+void dm_ima_measure_on_device_rename(struct mapped_device *md);
+
+#else
+
+static inline void dm_ima_reset_data(struct mapped_device *md) {}
+static inline void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags) {}
+static inline void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap) {}
+static inline void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all) {}
+static inline void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map) {}
+static inline void dm_ima_measure_on_device_rename(struct mapped_device *md) {}
+
+#endif /* CONFIG_IMA */
+
+#endif /* DM_IMA_H */
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 20f2510db1f6..dc03b70f6e65 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -1819,7 +1819,7 @@ again:
unsigned this_len;
BUG_ON(PageHighMem(biv.bv_page));
- tag = lowmem_page_address(biv.bv_page) + biv.bv_offset;
+ tag = bvec_virt(&biv);
this_len = min(biv.bv_len, data_to_process);
r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset,
this_len, dio->op == REQ_OP_READ ? TAG_READ : TAG_WRITE);
@@ -2006,7 +2006,7 @@ retry_kmap:
unsigned tag_now = min(biv.bv_len, tag_todo);
char *tag_addr;
BUG_ON(PageHighMem(biv.bv_page));
- tag_addr = lowmem_page_address(biv.bv_page) + biv.bv_offset;
+ tag_addr = bvec_virt(&biv);
if (likely(dio->op == REQ_OP_WRITE))
memcpy(tag_ptr, tag_addr, tag_now);
else
@@ -3306,6 +3306,30 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
EMIT_ALG(journal_mac_alg, "journal_mac");
break;
}
+ case STATUSTYPE_IMA:
+ DMEMIT_TARGET_NAME_VERSION(ti->type);
+ DMEMIT(",dev_name=%s,start=%llu,tag_size=%u,mode=%c",
+ ic->dev->name, ic->start, ic->tag_size, ic->mode);
+
+ if (ic->meta_dev)
+ DMEMIT(",meta_device=%s", ic->meta_dev->name);
+ if (ic->sectors_per_block != 1)
+ DMEMIT(",block_size=%u", ic->sectors_per_block << SECTOR_SHIFT);
+
+ DMEMIT(",recalculate=%c", (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) ?
+ 'y' : 'n');
+ DMEMIT(",allow_discards=%c", ic->discard ? 'y' : 'n');
+ DMEMIT(",fix_padding=%c",
+ ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) ? 'y' : 'n');
+ DMEMIT(",fix_hmac=%c",
+ ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0) ? 'y' : 'n');
+ DMEMIT(",legacy_recalculate=%c", ic->legacy_recalculate ? 'y' : 'n');
+
+ DMEMIT(",journal_sectors=%u", ic->initial_sectors - SB_SECTORS);
+ DMEMIT(",interleave_sectors=%u", 1U << ic->sb->log2_interleave_sectors);
+ DMEMIT(",buffer_sectors=%u", 1U << ic->log2_buffer_sectors);
+ DMEMIT(";");
+ break;
}
}
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 2209cbcd84db..21fe8652b095 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -6,7 +6,7 @@
*/
#include "dm-core.h"
-
+#include "dm-ima.h"
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/miscdevice.h>
@@ -20,6 +20,7 @@
#include <linux/compat.h>
#include <linux/uaccess.h>
+#include <linux/ima.h>
#define DM_MSG_PREFIX "ioctl"
#define DM_DRIVER_EMAIL "dm-devel@redhat.com"
@@ -347,6 +348,7 @@ retry:
dm_sync_table(md);
dm_table_destroy(t);
}
+ dm_ima_measure_on_device_remove(md, true);
dm_put(md);
if (likely(keep_open_devices))
dm_destroy(md);
@@ -483,6 +485,9 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param,
param->flags |= DM_UEVENT_GENERATED_FLAG;
md = hc->md;
+
+ dm_ima_measure_on_device_rename(md);
+
up_write(&_hash_lock);
kfree(old_name);
@@ -981,6 +986,8 @@ static int dev_remove(struct file *filp, struct dm_ioctl *param, size_t param_si
param->flags &= ~DM_DEFERRED_REMOVE;
+ dm_ima_measure_on_device_remove(md, false);
+
if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr))
param->flags |= DM_UEVENT_GENERATED_FLAG;
@@ -1159,8 +1166,12 @@ static int do_resume(struct dm_ioctl *param)
if (dm_suspended_md(md)) {
r = dm_resume(md);
- if (!r && !dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr))
- param->flags |= DM_UEVENT_GENERATED_FLAG;
+ if (!r) {
+ dm_ima_measure_on_device_resume(md, new_map ? true : false);
+
+ if (!dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr))
+ param->flags |= DM_UEVENT_GENERATED_FLAG;
+ }
}
/*
@@ -1224,6 +1235,8 @@ static void retrieve_status(struct dm_table *table,
if (param->flags & DM_STATUS_TABLE_FLAG)
type = STATUSTYPE_TABLE;
+ else if (param->flags & DM_IMA_MEASUREMENT_FLAG)
+ type = STATUSTYPE_IMA;
else
type = STATUSTYPE_INFO;
@@ -1425,6 +1438,8 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si
if (r)
goto err_unlock_md_type;
+ dm_ima_measure_on_table_load(t, STATUSTYPE_IMA);
+
immutable_target_type = dm_get_immutable_target_type(md);
if (immutable_target_type &&
(immutable_target_type != dm_table_get_immutable_target_type(t)) &&
@@ -1436,9 +1451,6 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si
}
if (dm_get_md_type(md) == DM_TYPE_NONE) {
- /* Initial table load: acquire type of table. */
- dm_set_md_type(md, dm_table_get_type(t));
-
/* setup md->queue to reflect md's type (may block) */
r = dm_setup_md_queue(md, t);
if (r) {
@@ -1496,6 +1508,7 @@ static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_s
struct hash_cell *hc;
struct mapped_device *md;
struct dm_table *old_map = NULL;
+ bool has_new_map = false;
down_write(&_hash_lock);
@@ -1509,6 +1522,7 @@ static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_s
if (hc->new_map) {
old_map = hc->new_map;
hc->new_map = NULL;
+ has_new_map = true;
}
param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
@@ -1520,6 +1534,7 @@ static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_s
dm_sync_table(md);
dm_table_destroy(old_map);
}
+ dm_ima_measure_on_table_clear(md, has_new_map);
dm_put(md);
return 0;
@@ -2187,7 +2202,6 @@ int __init dm_early_create(struct dm_ioctl *dmi,
if (r)
goto err_destroy_table;
- md->type = dm_table_get_type(t);
/* setup md->queue to reflect md's type (may block) */
r = dm_setup_md_queue(md, t);
if (r) {
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index c91f1e2e2f65..679b4c0a2eea 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -106,6 +106,7 @@ static void linear_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
{
struct linear_c *lc = (struct linear_c *) ti->private;
+ size_t sz = 0;
switch (type) {
case STATUSTYPE_INFO:
@@ -113,8 +114,13 @@ static void linear_status(struct dm_target *ti, status_type_t type,
break;
case STATUSTYPE_TABLE:
- snprintf(result, maxlen, "%s %llu", lc->dev->name,
- (unsigned long long)lc->start);
+ DMEMIT("%s %llu", lc->dev->name, (unsigned long long)lc->start);
+ break;
+
+ case STATUSTYPE_IMA:
+ DMEMIT_TARGET_NAME_VERSION(ti->type);
+ DMEMIT(",device_name=%s,start=%llu;", lc->dev->name,
+ (unsigned long long)lc->start);
break;
}
}
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index 52090bee17c2..9ab93ebea889 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c
@@ -820,6 +820,9 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
DMEMIT("integrated_flush ");
DMEMIT("%s ", table_args);
break;
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
return (r) ? 0 : (int)sz;
}
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 57882654ffee..d93a4db23512 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -834,6 +834,10 @@ static void log_writes_status(struct dm_target *ti, status_type_t type,
case STATUSTYPE_TABLE:
DMEMIT("%s %s", lc->dev->name, lc->logdev->name);
break;
+
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
}
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 33e71ea6cc14..1ecf75ef276a 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -793,6 +793,11 @@ static int core_status(struct dm_dirty_log *log, status_type_t status,
DMEMIT("%s %u %u ", log->type->name,
lc->sync == DEFAULTSYNC ? 1 : 2, lc->region_size);
DMEMIT_SYNC;
+ break;
+
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
return sz;
@@ -817,6 +822,11 @@ static int disk_status(struct dm_dirty_log *log, status_type_t status,
lc->sync == DEFAULTSYNC ? 2 : 3, lc->log_dev->name,
lc->region_size);
DMEMIT_SYNC;
+ break;
+
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
return sz;
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index bced42f082b0..694aaca4eea2 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1790,7 +1790,7 @@ static void multipath_resume(struct dm_target *ti)
static void multipath_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
{
- int sz = 0;
+ int sz = 0, pg_counter, pgpath_counter;
unsigned long flags;
struct multipath *m = ti->private;
struct priority_group *pg;
@@ -1904,6 +1904,44 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
}
}
break;
+
+ case STATUSTYPE_IMA:
+ sz = 0; /*reset the result pointer*/
+
+ DMEMIT_TARGET_NAME_VERSION(ti->type);
+ DMEMIT(",nr_priority_groups=%u", m->nr_priority_groups);
+
+ pg_counter = 0;
+ list_for_each_entry(pg, &m->priority_groups, list) {
+ if (pg->bypassed)
+ state = 'D'; /* Disabled */
+ else if (pg == m->current_pg)
+ state = 'A'; /* Currently Active */
+ else
+ state = 'E'; /* Enabled */
+ DMEMIT(",pg_state_%d=%c", pg_counter, state);
+ DMEMIT(",nr_pgpaths_%d=%u", pg_counter, pg->nr_pgpaths);
+ DMEMIT(",path_selector_name_%d=%s", pg_counter, pg->ps.type->name);
+
+ pgpath_counter = 0;
+ list_for_each_entry(p, &pg->pgpaths, list) {
+ DMEMIT(",path_name_%d_%d=%s,is_active_%d_%d=%c,fail_count_%d_%d=%u",
+ pg_counter, pgpath_counter, p->path.dev->name,
+ pg_counter, pgpath_counter, p->is_active ? 'A' : 'F',
+ pg_counter, pgpath_counter, p->fail_count);
+ if (pg->ps.type->status) {
+ DMEMIT(",path_selector_status_%d_%d=",
+ pg_counter, pgpath_counter);
+ sz += pg->ps.type->status(&pg->ps, &p->path,
+ type, result + sz,
+ maxlen - sz);
+ }
+ pgpath_counter++;
+ }
+ pg_counter++;
+ }
+ DMEMIT(";");
+ break;
}
spin_unlock_irqrestore(&m->lock, flags);
diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c
index 186f91e2752c..1856a1b125cc 100644
--- a/drivers/md/dm-ps-historical-service-time.c
+++ b/drivers/md/dm-ps-historical-service-time.c
@@ -255,6 +255,9 @@ static int hst_status(struct path_selector *ps, struct dm_path *path,
case STATUSTYPE_TABLE:
DMEMIT("0 ");
break;
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
}
diff --git a/drivers/md/dm-ps-io-affinity.c b/drivers/md/dm-ps-io-affinity.c
index cb8e83bfb1a7..f74501e65a8e 100644
--- a/drivers/md/dm-ps-io-affinity.c
+++ b/drivers/md/dm-ps-io-affinity.c
@@ -170,6 +170,9 @@ static int ioa_status(struct path_selector *ps, struct dm_path *path,
pi = path->pscontext;
DMEMIT("%*pb ", cpumask_pr_args(pi->cpumask));
break;
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
return sz;
diff --git a/drivers/md/dm-ps-queue-length.c b/drivers/md/dm-ps-queue-length.c
index 5fd018d18418..cef70657bbbc 100644
--- a/drivers/md/dm-ps-queue-length.c
+++ b/drivers/md/dm-ps-queue-length.c
@@ -102,6 +102,9 @@ static int ql_status(struct path_selector *ps, struct dm_path *path,
case STATUSTYPE_TABLE:
DMEMIT("%u ", pi->repeat_count);
break;
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
}
diff --git a/drivers/md/dm-ps-round-robin.c b/drivers/md/dm-ps-round-robin.c
index bdbb7e6e8212..27f44c5fa04e 100644
--- a/drivers/md/dm-ps-round-robin.c
+++ b/drivers/md/dm-ps-round-robin.c
@@ -100,6 +100,10 @@ static int rr_status(struct path_selector *ps, struct dm_path *path,
pi = path->pscontext;
DMEMIT("%u ", pi->repeat_count);
break;
+
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
}
diff --git a/drivers/md/dm-ps-service-time.c b/drivers/md/dm-ps-service-time.c
index 9cfda665e9eb..3ec9c33265c5 100644
--- a/drivers/md/dm-ps-service-time.c
+++ b/drivers/md/dm-ps-service-time.c
@@ -99,6 +99,9 @@ static int st_status(struct path_selector *ps, struct dm_path *path,
DMEMIT("%u %u ", pi->repeat_count,
pi->relative_throughput);
break;
+ case STATUSTYPE_IMA:
+ result[0] = '\0';
+ break;
}
}
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index bf4a467fc73a..d9ef52159a22 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3671,6 +3671,45 @@ static void raid_status(struct dm_target *ti, status_type_t type,
for (i = 0; i < rs->raid_disks; i++)
DMEMIT(" %s %s", __get_dev_name(rs->dev[i].meta_dev),
__get_dev_name(rs->dev[i].data_dev));
+ break;
+
+ case STATUSTYPE_IMA:
+ rt = get_raid_type_by_ll(mddev->new_level, mddev->new_layout);
+ if (!rt)
+ return;
+
+ DMEMIT_TARGET_NAME_VERSION(ti->type);
+ DMEMIT(",raid_type=%s,raid_disks=%d", rt->name, mddev->raid_disks);
+
+ /* Access most recent mddev properties for status output */
+ smp_rmb();
+ recovery = rs->md.recovery;
+ state = decipher_sync_action(mddev, recovery);
+ DMEMIT(",raid_state=%s", sync_str(state));
+
+ for (i = 0; i < rs->raid_disks; i++) {
+ DMEMIT(",raid_device_%d_status=", i);
+ DMEMIT(__raid_dev_status(rs, &rs->dev[i].rdev));
+ }
+
+ if (rt_is_raid456(rt)) {
+ DMEMIT(",journal_dev_mode=");
+ switch (rs->journal_dev.mode) {
+ case R5C_JOURNAL_MODE_WRITE_THROUGH:
+ DMEMIT("%s",
+ _raid456_journal_mode[R5C_JOURNAL_MODE_WRITE_THROUGH].param);
+ break;
+ case R5C_JOURNAL_MODE_WRITE_BACK:
+ DMEMIT("%s",
+ _raid456_journal_mode[R5C_JOURNAL_MODE_WRITE_BACK].param);
+ break;
+ default:
+ DMEMIT("invalid");
+ break;
+ }
+ }
+ DMEMIT(";");
+ break;
}
}
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index ebb4810cc3b4..8811d484fdd1 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1435,6 +1435,23 @@ static void mirror_status(struct dm_target *ti, status_type_t type,
}
break;
+
+ case STATUSTYPE_IMA:
+ DMEMIT_TARGET_NAME_VERSION(ti->type);
+ DMEMIT(",nr_mirrors=%d", ms->nr_mirrors);
+ for (m = 0; m < ms->nr_mirrors; m++) {
+ DMEMIT(",mirror_device_%d=%s", m, ms->mirror[m].dev->name);
+ DMEMIT(",mirror_device_%d_status=%c",
+ m, device_status_char(&(ms->mirror[m])));
+ }
+
+ DMEMIT(",handle_errors=%c", errors_handled(ms) ? 'y' : 'n');
+ DMEMIT(",keep_log=%c", keep_log(ms) ? 'y' : 'n');
+
+ DMEMIT(",log_type_status=");
+ sz += log->type->status(log, type, result+sz, maxlen-sz);
+ DMEMIT(";");
+ break;
}
}
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 0dbd48cbdff9..5b95eea517d1 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -559,7 +559,6 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
err = blk_mq_init_allocated_queue(md->tag_set, md->queue);
if (err)
goto out_tag_set;
- elevator_init_mq(md->queue);
return 0;
out_tag_set:
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 9ab4bf651ca9..3bb5cff5d6fc 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -908,6 +908,10 @@ static unsigned persistent_status(struct dm_exception_store *store,
case STATUSTYPE_TABLE:
DMEMIT(" %s %llu", store->userspace_supports_overflow ? "PO" : "P",
(unsigned long long)store->chunk_size);
+ break;
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
return sz;
diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c
index 4d50a12cf00c..0e0ae4c36b37 100644
--- a/drivers/md/dm-snap-transient.c
+++ b/drivers/md/dm-snap-transient.c
@@ -95,6 +95,10 @@ static unsigned transient_status(struct dm_exception_store *store,
break;
case STATUSTYPE_TABLE:
DMEMIT(" N %llu", (unsigned long long)store->chunk_size);
+ break;
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
return sz;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 751ec5ea1dbb..dcf34c6b05ad 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -2390,6 +2390,16 @@ static void snapshot_status(struct dm_target *ti, status_type_t type,
DMEMIT(" discard_passdown_origin");
}
break;
+
+ case STATUSTYPE_IMA:
+ DMEMIT_TARGET_NAME_VERSION(ti->type);
+ DMEMIT(",snap_origin_name=%s", snap->origin->name);
+ DMEMIT(",snap_cow_name=%s", snap->cow->name);
+ DMEMIT(",snap_valid=%c", snap->valid ? 'y' : 'n');
+ DMEMIT(",snap_merge_failed=%c", snap->merge_failed ? 'y' : 'n');
+ DMEMIT(",snapshot_overflowed=%c", snap->snapshot_overflowed ? 'y' : 'n');
+ DMEMIT(";");
+ break;
}
}
@@ -2734,6 +2744,9 @@ static void origin_status(struct dm_target *ti, status_type_t type,
case STATUSTYPE_TABLE:
snprintf(result, maxlen, "%s", o->dev->name);
break;
+ case STATUSTYPE_IMA:
+ result[0] = '\0';
+ break;
}
}
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index df359d33cda8..6660b6b53d5b 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -428,6 +428,21 @@ static void stripe_status(struct dm_target *ti, status_type_t type,
DMEMIT(" %s %llu", sc->stripe[i].dev->name,
(unsigned long long)sc->stripe[i].physical_start);
break;
+
+ case STATUSTYPE_IMA:
+ DMEMIT_TARGET_NAME_VERSION(ti->type);
+ DMEMIT(",stripes=%d,chunk_size=%llu", sc->stripes,
+ (unsigned long long)sc->chunk_size);
+
+ for (i = 0; i < sc->stripes; i++) {
+ DMEMIT(",stripe_%d_device_name=%s", i, sc->stripe[i].dev->name);
+ DMEMIT(",stripe_%d_physical_start=%llu", i,
+ (unsigned long long)sc->stripe[i].physical_start);
+ DMEMIT(",stripe_%d_status=%c", i,
+ atomic_read(&(sc->stripe[i].error_count)) ? 'D' : 'A');
+ }
+ DMEMIT(";");
+ break;
}
}
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c
index 262e2b0fd975..028a92ff6d57 100644
--- a/drivers/md/dm-switch.c
+++ b/drivers/md/dm-switch.c
@@ -504,6 +504,10 @@ static void switch_status(struct dm_target *ti, status_type_t type,
DMEMIT(" %s %llu", sctx->path_list[path_nr].dmdev->name,
(unsigned long long)sctx->path_list[path_nr].start);
break;
+
+ case STATUSTYPE_IMA:
+ result[0] = '\0';
+ break;
}
}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 0543cdf89e92..b03eabc1ed7c 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -2076,7 +2076,7 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
}
dm_update_keyslot_manager(q, t);
- blk_queue_update_readahead(q);
+ disk_update_readahead(t->md->disk);
return 0;
}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 985baee3a678..4c67b77c23c1 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4012,6 +4012,10 @@ static void pool_status(struct dm_target *ti, status_type_t type,
(unsigned long long)pt->low_water_blocks);
emit_flags(&pt->requested_pf, result, sz, maxlen);
break;
+
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
return;
@@ -4423,6 +4427,10 @@ static void thin_status(struct dm_target *ti, status_type_t type,
if (tc->origin_dev)
DMEMIT(" %s", format_dev_t(buf, tc->origin_dev->bdev->bd_dev));
break;
+
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
}
diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c
index 7357c1bd5863..fdc8921e5c19 100644
--- a/drivers/md/dm-unstripe.c
+++ b/drivers/md/dm-unstripe.c
@@ -156,6 +156,10 @@ static void unstripe_status(struct dm_target *ti, status_type_t type,
uc->stripes, (unsigned long long)uc->chunk_size, uc->unstripe,
uc->dev->name, (unsigned long long)uc->physical_start);
break;
+
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
}
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index d3e76aefc1a6..22a5ac82446a 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -772,6 +772,49 @@ static void verity_status(struct dm_target *ti, status_type_t type,
DMEMIT(" " DM_VERITY_ROOT_HASH_VERIFICATION_OPT_SIG_KEY
" %s", v->signature_key_desc);
break;
+
+ case STATUSTYPE_IMA:
+ DMEMIT_TARGET_NAME_VERSION(ti->type);
+ DMEMIT(",hash_failed=%c", v->hash_failed ? 'C' : 'V');
+ DMEMIT(",verity_version=%u", v->version);
+ DMEMIT(",data_device_name=%s", v->data_dev->name);
+ DMEMIT(",hash_device_name=%s", v->hash_dev->name);
+ DMEMIT(",verity_algorithm=%s", v->alg_name);
+
+ DMEMIT(",root_digest=");
+ for (x = 0; x < v->digest_size; x++)
+ DMEMIT("%02x", v->root_digest[x]);
+
+ DMEMIT(",salt=");
+ if (!v->salt_size)
+ DMEMIT("-");
+ else
+ for (x = 0; x < v->salt_size; x++)
+ DMEMIT("%02x", v->salt[x]);
+
+ DMEMIT(",ignore_zero_blocks=%c", v->zero_digest ? 'y' : 'n');
+ DMEMIT(",check_at_most_once=%c", v->validated_blocks ? 'y' : 'n');
+ if (v->signature_key_desc)
+ DMEMIT(",root_hash_sig_key_desc=%s", v->signature_key_desc);
+
+ if (v->mode != DM_VERITY_MODE_EIO) {
+ DMEMIT(",verity_mode=");
+ switch (v->mode) {
+ case DM_VERITY_MODE_LOGGING:
+ DMEMIT(DM_VERITY_OPT_LOGGING);
+ break;
+ case DM_VERITY_MODE_RESTART:
+ DMEMIT(DM_VERITY_OPT_RESTART);
+ break;
+ case DM_VERITY_MODE_PANIC:
+ DMEMIT(DM_VERITY_OPT_PANIC);
+ break;
+ default:
+ DMEMIT("invalid");
+ }
+ }
+ DMEMIT(";");
+ break;
}
}
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index e21e29e81bbf..18320444fb0a 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -206,6 +206,19 @@ struct dm_writecache {
struct bio_set bio_set;
mempool_t copy_pool;
+
+ struct {
+ unsigned long long reads;
+ unsigned long long read_hits;
+ unsigned long long writes;
+ unsigned long long write_hits_uncommitted;
+ unsigned long long write_hits_committed;
+ unsigned long long writes_around;
+ unsigned long long writes_allocate;
+ unsigned long long writes_blocked_on_freelist;
+ unsigned long long flushes;
+ unsigned long long discards;
+ } stats;
};
#define WB_LIST_INLINE 16
@@ -1157,6 +1170,18 @@ static int process_cleaner_mesg(unsigned argc, char **argv, struct dm_writecache
return 0;
}
+static int process_clear_stats_mesg(unsigned argc, char **argv, struct dm_writecache *wc)
+{
+ if (argc != 1)
+ return -EINVAL;
+
+ wc_lock(wc);
+ memset(&wc->stats, 0, sizeof wc->stats);
+ wc_unlock(wc);
+
+ return 0;
+}
+
static int writecache_message(struct dm_target *ti, unsigned argc, char **argv,
char *result, unsigned maxlen)
{
@@ -1169,6 +1194,8 @@ static int writecache_message(struct dm_target *ti, unsigned argc, char **argv,
r = process_flush_on_suspend_mesg(argc, argv, wc);
else if (!strcasecmp(argv[0], "cleaner"))
r = process_cleaner_mesg(argc, argv, wc);
+ else if (!strcasecmp(argv[0], "clear_stats"))
+ r = process_clear_stats_mesg(argc, argv, wc);
else
DMERR("unrecognised message received: %s", argv[0]);
@@ -1214,14 +1241,13 @@ static void memcpy_flushcache_optimized(void *dest, void *source, size_t size)
static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data)
{
void *buf;
- unsigned long flags;
unsigned size;
int rw = bio_data_dir(bio);
unsigned remaining_size = wc->block_size;
do {
struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter);
- buf = bvec_kmap_irq(&bv, &flags);
+ buf = bvec_kmap_local(&bv);
size = bv.bv_len;
if (unlikely(size > remaining_size))
size = remaining_size;
@@ -1239,7 +1265,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data
memcpy_flushcache_optimized(data, buf, size);
}
- bvec_kunmap_irq(buf, &flags);
+ kunmap_local(buf);
data = (char *)data + size;
remaining_size -= size;
@@ -1294,216 +1320,278 @@ static void writecache_offload_bio(struct dm_writecache *wc, struct bio *bio)
bio_list_add(&wc->flush_list, bio);
}
-static int writecache_map(struct dm_target *ti, struct bio *bio)
+enum wc_map_op {
+ WC_MAP_SUBMIT,
+ WC_MAP_REMAP,
+ WC_MAP_REMAP_ORIGIN,
+ WC_MAP_RETURN,
+ WC_MAP_ERROR,
+};
+
+static enum wc_map_op writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio,
+ struct wc_entry *e)
{
- struct wc_entry *e;
- struct dm_writecache *wc = ti->private;
+ if (e) {
+ sector_t next_boundary =
+ read_original_sector(wc, e) - bio->bi_iter.bi_sector;
+ if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT)
+ dm_accept_partial_bio(bio, next_boundary);
+ }
- bio->bi_private = NULL;
+ return WC_MAP_REMAP_ORIGIN;
+}
- wc_lock(wc);
+static enum wc_map_op writecache_map_read(struct dm_writecache *wc, struct bio *bio)
+{
+ enum wc_map_op map_op;
+ struct wc_entry *e;
- if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
- if (writecache_has_error(wc))
- goto unlock_error;
+read_next_block:
+ wc->stats.reads++;
+ e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
+ if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) {
+ wc->stats.read_hits++;
if (WC_MODE_PMEM(wc)) {
- writecache_flush(wc);
- if (writecache_has_error(wc))
- goto unlock_error;
- if (unlikely(wc->cleaner) || unlikely(wc->metadata_only))
- goto unlock_remap_origin;
- goto unlock_submit;
+ bio_copy_block(wc, bio, memory_data(wc, e));
+ if (bio->bi_iter.bi_size)
+ goto read_next_block;
+ map_op = WC_MAP_SUBMIT;
} else {
- if (dm_bio_get_target_bio_nr(bio))
- goto unlock_remap_origin;
- writecache_offload_bio(wc, bio);
- goto unlock_return;
+ dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT);
+ bio_set_dev(bio, wc->ssd_dev->bdev);
+ bio->bi_iter.bi_sector = cache_sector(wc, e);
+ if (!writecache_entry_is_committed(wc, e))
+ writecache_wait_for_ios(wc, WRITE);
+ map_op = WC_MAP_REMAP;
}
+ } else {
+ map_op = writecache_map_remap_origin(wc, bio, e);
}
- bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
+ return map_op;
+}
- if (unlikely((((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
- (wc->block_size / 512 - 1)) != 0)) {
- DMERR("I/O is not aligned, sector %llu, size %u, block size %u",
- (unsigned long long)bio->bi_iter.bi_sector,
- bio->bi_iter.bi_size, wc->block_size);
- goto unlock_error;
- }
+static enum wc_map_op writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio,
+ struct wc_entry *e, bool search_used)
+{
+ unsigned bio_size = wc->block_size;
+ sector_t start_cache_sec = cache_sector(wc, e);
+ sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT);
- if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
- if (writecache_has_error(wc))
- goto unlock_error;
- if (WC_MODE_PMEM(wc)) {
- writecache_discard(wc, bio->bi_iter.bi_sector, bio_end_sector(bio));
- goto unlock_remap_origin;
+ while (bio_size < bio->bi_iter.bi_size) {
+ if (!search_used) {
+ struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec);
+ if (!f)
+ break;
+ write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector +
+ (bio_size >> SECTOR_SHIFT), wc->seq_count);
+ writecache_insert_entry(wc, f);
+ wc->uncommitted_blocks++;
} else {
- writecache_offload_bio(wc, bio);
- goto unlock_return;
+ struct wc_entry *f;
+ struct rb_node *next = rb_next(&e->rb_node);
+ if (!next)
+ break;
+ f = container_of(next, struct wc_entry, rb_node);
+ if (f != e + 1)
+ break;
+ if (read_original_sector(wc, f) !=
+ read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT))
+ break;
+ if (unlikely(f->write_in_progress))
+ break;
+ if (writecache_entry_is_committed(wc, f))
+ wc->overwrote_committed = true;
+ e = f;
}
+ bio_size += wc->block_size;
+ current_cache_sec += wc->block_size >> SECTOR_SHIFT;
}
- if (bio_data_dir(bio) == READ) {
-read_next_block:
- e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
- if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) {
- if (WC_MODE_PMEM(wc)) {
- bio_copy_block(wc, bio, memory_data(wc, e));
- if (bio->bi_iter.bi_size)
- goto read_next_block;
- goto unlock_submit;
- } else {
- dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT);
- bio_set_dev(bio, wc->ssd_dev->bdev);
- bio->bi_iter.bi_sector = cache_sector(wc, e);
- if (!writecache_entry_is_committed(wc, e))
- writecache_wait_for_ios(wc, WRITE);
- goto unlock_remap;
+ bio_set_dev(bio, wc->ssd_dev->bdev);
+ bio->bi_iter.bi_sector = start_cache_sec;
+ dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT);
+
+ if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) {
+ wc->uncommitted_blocks = 0;
+ queue_work(wc->writeback_wq, &wc->flush_work);
+ } else {
+ writecache_schedule_autocommit(wc);
+ }
+
+ return WC_MAP_REMAP;
+}
+
+static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio *bio)
+{
+ struct wc_entry *e;
+
+ do {
+ bool found_entry = false;
+ bool search_used = false;
+ wc->stats.writes++;
+ if (writecache_has_error(wc))
+ return WC_MAP_ERROR;
+ e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0);
+ if (e) {
+ if (!writecache_entry_is_committed(wc, e)) {
+ wc->stats.write_hits_uncommitted++;
+ search_used = true;
+ goto bio_copy;
}
- } else {
- if (e) {
- sector_t next_boundary =
- read_original_sector(wc, e) - bio->bi_iter.bi_sector;
- if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) {
- dm_accept_partial_bio(bio, next_boundary);
- }
+ wc->stats.write_hits_committed++;
+ if (!WC_MODE_PMEM(wc) && !e->write_in_progress) {
+ wc->overwrote_committed = true;
+ search_used = true;
+ goto bio_copy;
}
- goto unlock_remap_origin;
+ found_entry = true;
+ } else {
+ if (unlikely(wc->cleaner) ||
+ (wc->metadata_only && !(bio->bi_opf & REQ_META)))
+ goto direct_write;
}
- } else {
- do {
- bool found_entry = false;
- bool search_used = false;
- if (writecache_has_error(wc))
- goto unlock_error;
- e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0);
- if (e) {
- if (!writecache_entry_is_committed(wc, e)) {
- search_used = true;
- goto bio_copy;
- }
- if (!WC_MODE_PMEM(wc) && !e->write_in_progress) {
- wc->overwrote_committed = true;
- search_used = true;
- goto bio_copy;
- }
- found_entry = true;
- } else {
- if (unlikely(wc->cleaner) ||
- (wc->metadata_only && !(bio->bi_opf & REQ_META)))
- goto direct_write;
- }
- e = writecache_pop_from_freelist(wc, (sector_t)-1);
- if (unlikely(!e)) {
- if (!WC_MODE_PMEM(wc) && !found_entry) {
+ e = writecache_pop_from_freelist(wc, (sector_t)-1);
+ if (unlikely(!e)) {
+ if (!WC_MODE_PMEM(wc) && !found_entry) {
direct_write:
- e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
- if (e) {
- sector_t next_boundary = read_original_sector(wc, e) - bio->bi_iter.bi_sector;
- BUG_ON(!next_boundary);
- if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) {
- dm_accept_partial_bio(bio, next_boundary);
- }
- }
- goto unlock_remap_origin;
- }
- writecache_wait_on_freelist(wc);
- continue;
+ wc->stats.writes_around++;
+ e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
+ return writecache_map_remap_origin(wc, bio, e);
}
- write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count);
- writecache_insert_entry(wc, e);
- wc->uncommitted_blocks++;
+ wc->stats.writes_blocked_on_freelist++;
+ writecache_wait_on_freelist(wc);
+ continue;
+ }
+ write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count);
+ writecache_insert_entry(wc, e);
+ wc->uncommitted_blocks++;
+ wc->stats.writes_allocate++;
bio_copy:
- if (WC_MODE_PMEM(wc)) {
- bio_copy_block(wc, bio, memory_data(wc, e));
- } else {
- unsigned bio_size = wc->block_size;
- sector_t start_cache_sec = cache_sector(wc, e);
- sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT);
-
- while (bio_size < bio->bi_iter.bi_size) {
- if (!search_used) {
- struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec);
- if (!f)
- break;
- write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector +
- (bio_size >> SECTOR_SHIFT), wc->seq_count);
- writecache_insert_entry(wc, f);
- wc->uncommitted_blocks++;
- } else {
- struct wc_entry *f;
- struct rb_node *next = rb_next(&e->rb_node);
- if (!next)
- break;
- f = container_of(next, struct wc_entry, rb_node);
- if (f != e + 1)
- break;
- if (read_original_sector(wc, f) !=
- read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT))
- break;
- if (unlikely(f->write_in_progress))
- break;
- if (writecache_entry_is_committed(wc, f))
- wc->overwrote_committed = true;
- e = f;
- }
- bio_size += wc->block_size;
- current_cache_sec += wc->block_size >> SECTOR_SHIFT;
- }
+ if (WC_MODE_PMEM(wc))
+ bio_copy_block(wc, bio, memory_data(wc, e));
+ else
+ return writecache_bio_copy_ssd(wc, bio, e, search_used);
+ } while (bio->bi_iter.bi_size);
- bio_set_dev(bio, wc->ssd_dev->bdev);
- bio->bi_iter.bi_sector = start_cache_sec;
- dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT);
+ if (unlikely(bio->bi_opf & REQ_FUA || wc->uncommitted_blocks >= wc->autocommit_blocks))
+ writecache_flush(wc);
+ else
+ writecache_schedule_autocommit(wc);
- if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) {
- wc->uncommitted_blocks = 0;
- queue_work(wc->writeback_wq, &wc->flush_work);
- } else {
- writecache_schedule_autocommit(wc);
- }
- goto unlock_remap;
- }
- } while (bio->bi_iter.bi_size);
+ return WC_MAP_SUBMIT;
+}
- if (unlikely(bio->bi_opf & REQ_FUA ||
- wc->uncommitted_blocks >= wc->autocommit_blocks))
- writecache_flush(wc);
- else
- writecache_schedule_autocommit(wc);
- goto unlock_submit;
+static enum wc_map_op writecache_map_flush(struct dm_writecache *wc, struct bio *bio)
+{
+ if (writecache_has_error(wc))
+ return WC_MAP_ERROR;
+
+ if (WC_MODE_PMEM(wc)) {
+ wc->stats.flushes++;
+ writecache_flush(wc);
+ if (writecache_has_error(wc))
+ return WC_MAP_ERROR;
+ else if (unlikely(wc->cleaner) || unlikely(wc->metadata_only))
+ return WC_MAP_REMAP_ORIGIN;
+ return WC_MAP_SUBMIT;
}
+ /* SSD: */
+ if (dm_bio_get_target_bio_nr(bio))
+ return WC_MAP_REMAP_ORIGIN;
+ wc->stats.flushes++;
+ writecache_offload_bio(wc, bio);
+ return WC_MAP_RETURN;
+}
-unlock_remap_origin:
- if (likely(wc->pause != 0)) {
- if (bio_op(bio) == REQ_OP_WRITE) {
- dm_iot_io_begin(&wc->iot, 1);
- bio->bi_private = (void *)2;
- }
+static enum wc_map_op writecache_map_discard(struct dm_writecache *wc, struct bio *bio)
+{
+ wc->stats.discards++;
+
+ if (writecache_has_error(wc))
+ return WC_MAP_ERROR;
+
+ if (WC_MODE_PMEM(wc)) {
+ writecache_discard(wc, bio->bi_iter.bi_sector, bio_end_sector(bio));
+ return WC_MAP_REMAP_ORIGIN;
}
- bio_set_dev(bio, wc->dev->bdev);
- wc_unlock(wc);
- return DM_MAPIO_REMAPPED;
+ /* SSD: */
+ writecache_offload_bio(wc, bio);
+ return WC_MAP_RETURN;
+}
-unlock_remap:
- /* make sure that writecache_end_io decrements bio_in_progress: */
- bio->bi_private = (void *)1;
- atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]);
- wc_unlock(wc);
- return DM_MAPIO_REMAPPED;
+static int writecache_map(struct dm_target *ti, struct bio *bio)
+{
+ struct dm_writecache *wc = ti->private;
+ enum wc_map_op map_op;
-unlock_submit:
- wc_unlock(wc);
- bio_endio(bio);
- return DM_MAPIO_SUBMITTED;
+ bio->bi_private = NULL;
-unlock_return:
- wc_unlock(wc);
- return DM_MAPIO_SUBMITTED;
+ wc_lock(wc);
-unlock_error:
- wc_unlock(wc);
- bio_io_error(bio);
- return DM_MAPIO_SUBMITTED;
+ if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
+ map_op = writecache_map_flush(wc, bio);
+ goto done;
+ }
+
+ bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
+
+ if (unlikely((((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
+ (wc->block_size / 512 - 1)) != 0)) {
+ DMERR("I/O is not aligned, sector %llu, size %u, block size %u",
+ (unsigned long long)bio->bi_iter.bi_sector,
+ bio->bi_iter.bi_size, wc->block_size);
+ map_op = WC_MAP_ERROR;
+ goto done;
+ }
+
+ if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
+ map_op = writecache_map_discard(wc, bio);
+ goto done;
+ }
+
+ if (bio_data_dir(bio) == READ)
+ map_op = writecache_map_read(wc, bio);
+ else
+ map_op = writecache_map_write(wc, bio);
+done:
+ switch (map_op) {
+ case WC_MAP_REMAP_ORIGIN:
+ if (likely(wc->pause != 0)) {
+ if (bio_op(bio) == REQ_OP_WRITE) {
+ dm_iot_io_begin(&wc->iot, 1);
+ bio->bi_private = (void *)2;
+ }
+ }
+ bio_set_dev(bio, wc->dev->bdev);
+ wc_unlock(wc);
+ return DM_MAPIO_REMAPPED;
+
+ case WC_MAP_REMAP:
+ /* make sure that writecache_end_io decrements bio_in_progress: */
+ bio->bi_private = (void *)1;
+ atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]);
+ wc_unlock(wc);
+ return DM_MAPIO_REMAPPED;
+
+ case WC_MAP_SUBMIT:
+ wc_unlock(wc);
+ bio_endio(bio);
+ return DM_MAPIO_SUBMITTED;
+
+ case WC_MAP_RETURN:
+ wc_unlock(wc);
+ return DM_MAPIO_SUBMITTED;
+
+ case WC_MAP_ERROR:
+ wc_unlock(wc);
+ bio_io_error(bio);
+ return DM_MAPIO_SUBMITTED;
+
+ default:
+ BUG();
+ return -1;
+ }
}
static int writecache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status)
@@ -2569,9 +2657,20 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
switch (type) {
case STATUSTYPE_INFO:
- DMEMIT("%ld %llu %llu %llu", writecache_has_error(wc),
+ DMEMIT("%ld %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu",
+ writecache_has_error(wc),
(unsigned long long)wc->n_blocks, (unsigned long long)wc->freelist_size,
- (unsigned long long)wc->writeback_size);
+ (unsigned long long)wc->writeback_size,
+ wc->stats.reads,
+ wc->stats.read_hits,
+ wc->stats.writes,
+ wc->stats.write_hits_uncommitted,
+ wc->stats.write_hits_committed,
+ wc->stats.writes_around,
+ wc->stats.writes_allocate,
+ wc->stats.writes_blocked_on_freelist,
+ wc->stats.flushes,
+ wc->stats.discards);
break;
case STATUSTYPE_TABLE:
DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's',
@@ -2624,12 +2723,15 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
if (wc->pause_set)
DMEMIT(" pause_writeback %u", wc->pause_value);
break;
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
}
static struct target_type writecache_target = {
.name = "writecache",
- .version = {1, 5, 0},
+ .version = {1, 6, 0},
.module = THIS_MODULE,
.ctr = writecache_ctr,
.dtr = writecache_dtr,
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 7e88df64d197..ae1bc48c0043 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -1119,6 +1119,9 @@ static void dmz_status(struct dm_target *ti, status_type_t type,
DMEMIT(" %s", buf);
}
break;
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
return;
}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 2c5f9e585211..84e9145b1714 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -8,6 +8,7 @@
#include "dm-core.h"
#include "dm-rq.h"
#include "dm-uevent.h"
+#include "dm-ima.h"
#include <linux/init.h>
#include <linux/module.h>
@@ -261,9 +262,13 @@ static void (*_exits[])(void) = {
static int __init dm_init(void)
{
const int count = ARRAY_SIZE(_inits);
-
int r, i;
+#if (IS_ENABLED(CONFIG_IMA) && !IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE))
+ DMWARN("CONFIG_IMA_DISABLE_HTABLE is disabled."
+ " Duplicate IMA measurements will not be recorded in the IMA log.");
+#endif
+
for (i = 0; i < count; i++) {
r = _inits[i]();
if (r)
@@ -271,8 +276,7 @@ static int __init dm_init(void)
}
return 0;
-
- bad:
+bad:
while (i--)
_exits[i]();
@@ -1693,14 +1697,13 @@ static void cleanup_mapped_device(struct mapped_device *md)
spin_lock(&_minor_lock);
md->disk->private_data = NULL;
spin_unlock(&_minor_lock);
- del_gendisk(md->disk);
- }
-
- if (md->queue)
+ if (dm_get_md_type(md) != DM_TYPE_NONE) {
+ dm_sysfs_exit(md);
+ del_gendisk(md->disk);
+ }
dm_queue_destroy_keyslot_manager(md->queue);
-
- if (md->disk)
blk_cleanup_disk(md->disk);
+ }
cleanup_srcu_struct(&md->io_barrier);
@@ -1792,7 +1795,6 @@ static struct mapped_device *alloc_dev(int minor)
goto bad;
}
- add_disk_no_queue_reg(md->disk);
format_dev_t(md->name, MKDEV(_major, minor));
md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0);
@@ -1993,18 +1995,13 @@ static struct dm_table *__unbind(struct mapped_device *md)
*/
int dm_create(int minor, struct mapped_device **result)
{
- int r;
struct mapped_device *md;
md = alloc_dev(minor);
if (!md)
return -ENXIO;
- r = dm_sysfs_init(md);
- if (r) {
- free_dev(md);
- return r;
- }
+ dm_ima_reset_data(md);
*result = md;
return 0;
@@ -2056,9 +2053,9 @@ EXPORT_SYMBOL_GPL(dm_get_queue_limits);
*/
int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
{
- int r;
+ enum dm_queue_mode type = dm_table_get_type(t);
struct queue_limits limits;
- enum dm_queue_mode type = dm_get_md_type(md);
+ int r;
switch (type) {
case DM_TYPE_REQUEST_BASED:
@@ -2086,8 +2083,14 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
if (r)
return r;
- blk_register_queue(md->disk);
+ add_disk(md->disk);
+ r = dm_sysfs_init(md);
+ if (r) {
+ del_gendisk(md->disk);
+ return r;
+ }
+ md->type = type;
return 0;
}
@@ -2193,7 +2196,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
dm_device_name(md), atomic_read(&md->holders));
- dm_sysfs_exit(md);
dm_table_destroy(__unbind(md));
free_dev(md);
}
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 832547cf038f..4c96c36bd01a 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -764,9 +764,7 @@ struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
{
- int flags = rdev->bdev->bd_disk->flags;
-
- if (!(flags & GENHD_FL_UP)) {
+ if (!disk_live(rdev->bdev->bd_disk)) {
if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
pr_warn("md: %s: %s array has a missing/failed member\n",
mdname(rdev->mddev), md_type);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 3c44c4bb40fc..19598bd38939 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1329,6 +1329,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
struct raid1_plug_cb *plug = NULL;
int first_clone;
int max_sectors;
+ bool write_behind = false;
if (mddev_is_clustered(mddev) &&
md_cluster_ops->area_resyncing(mddev, WRITE,
@@ -1381,6 +1382,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
max_sectors = r1_bio->sectors;
for (i = 0; i < disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
+
+ /*
+ * The write-behind io is only attempted on drives marked as
+ * write-mostly, which means we could allocate write behind
+ * bio later.
+ */
+ if (rdev && test_bit(WriteMostly, &rdev->flags))
+ write_behind = true;
+
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
atomic_inc(&rdev->nr_pending);
blocked_rdev = rdev;
@@ -1454,6 +1464,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
goto retry_write;
}
+ /*
+ * When using a bitmap, we may call alloc_behind_master_bio below.
+ * alloc_behind_master_bio allocates a copy of the data payload a page
+ * at a time and thus needs a new bio that can fit the whole payload
+ * this bio in page sized chunks.
+ */
+ if (write_behind && bitmap)
+ max_sectors = min_t(int, max_sectors,
+ BIO_MAX_VECS * (PAGE_SIZE >> 9));
if (max_sectors < bio_sectors(bio)) {
struct bio *split = bio_split(bio, max_sectors,
GFP_NOIO, &conf->bio_split);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 07119d7e0fdf..aa2636582841 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1712,6 +1712,11 @@ retry_discard:
} else
r10_bio->master_bio = (struct bio *)first_r10bio;
+ /*
+ * first select target devices under rcu_lock and
+ * inc refcount on their rdev. Record them by setting
+ * bios[x] to bio
+ */
rcu_read_lock();
for (disk = 0; disk < geo->raid_disks; disk++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
@@ -1743,9 +1748,6 @@ retry_discard:
for (disk = 0; disk < geo->raid_disks; disk++) {
sector_t dev_start, dev_end;
struct bio *mbio, *rbio = NULL;
- struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
- struct md_rdev *rrdev = rcu_dereference(
- conf->mirrors[disk].replacement);
/*
* Now start to calculate the start and end address for each disk.
@@ -1775,9 +1777,12 @@ retry_discard:
/*
* It only handles discard bio which size is >= stripe size, so
- * dev_end > dev_start all the time
+ * dev_end > dev_start all the time.
+ * It doesn't need to use rcu lock to get rdev here. We already
+ * add rdev->nr_pending in the first loop.
*/
if (r10_bio->devs[disk].bio) {
+ struct md_rdev *rdev = conf->mirrors[disk].rdev;
mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
mbio->bi_end_io = raid10_end_discard_request;
mbio->bi_private = r10_bio;
@@ -1790,6 +1795,7 @@ retry_discard:
bio_endio(mbio);
}
if (r10_bio->devs[disk].repl_bio) {
+ struct md_rdev *rrdev = conf->mirrors[disk].replacement;
rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
rbio->bi_end_io = raid10_end_discard_request;
rbio->bi_private = r10_bio;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b8436e4930ed..02ed53b20654 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2437,7 +2437,7 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
conf->scribble_sectors >= new_sectors)
return 0;
mddev_suspend(conf->mddev);
- get_online_cpus();
+ cpus_read_lock();
for_each_present_cpu(cpu) {
struct raid5_percpu *percpu;
@@ -2449,7 +2449,7 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
break;
}
- put_online_cpus();
+ cpus_read_unlock();
mddev_resume(conf->mddev);
if (!err) {
conf->scribble_disks = new_disks;
diff --git a/drivers/media/pci/intel/ipu3/cio2-bridge.c b/drivers/media/pci/intel/ipu3/cio2-bridge.c
index 59a36f922675..30d29b96a339 100644
--- a/drivers/media/pci/intel/ipu3/cio2-bridge.c
+++ b/drivers/media/pci/intel/ipu3/cio2-bridge.c
@@ -226,7 +226,7 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg,
err_free_swnodes:
software_node_unregister_nodes(sensor->swnodes);
err_put_adev:
- acpi_dev_put(sensor->adev);
+ acpi_dev_put(adev);
return ret;
}
diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index afae0afe3f81..3eff08d7b8e5 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -160,7 +160,7 @@ static int lirc_bpf_attach(struct rc_dev *rcdev, struct bpf_prog *prog)
goto unlock;
}
- ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
+ ret = bpf_prog_array_copy(old_array, NULL, prog, 0, &new_array);
if (ret < 0)
goto unlock;
@@ -193,7 +193,7 @@ static int lirc_bpf_detach(struct rc_dev *rcdev, struct bpf_prog *prog)
}
old_array = lirc_rcu_dereference(raw->progs);
- ret = bpf_prog_array_copy(old_array, prog, NULL, &new_array);
+ ret = bpf_prog_array_copy(old_array, prog, NULL, 0, &new_array);
/*
* Do not use bpf_prog_array_delete_safe() as we would end up
* with a dummy entry in the array, and the we would free the
@@ -217,7 +217,7 @@ void lirc_bpf_run(struct rc_dev *rcdev, u32 sample)
raw->bpf_sample = sample;
if (raw->progs)
- BPF_PROG_RUN_ARRAY(raw->progs, &raw->bpf_sample, BPF_PROG_RUN);
+ BPF_PROG_RUN_ARRAY(raw->progs, &raw->bpf_sample, bpf_prog_run);
}
/*
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 4a4573fa7b0f..acf36676e388 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -1105,7 +1105,7 @@ static u16 msb_get_free_block(struct msb_data *msb, int zone)
dbg_verbose("result of the free blocks scan: pba %d", pba);
if (pba == msb->block_count || (msb_get_zone_from_pba(pba)) != zone) {
- pr_err("BUG: cant get a free block");
+ pr_err("BUG: can't get a free block");
msb->read_only = true;
return MS_BLOCK_INVALID;
}
diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c
index 615a83782e55..e79a0218c492 100644
--- a/drivers/memstick/host/r592.c
+++ b/drivers/memstick/host/r592.c
@@ -293,7 +293,7 @@ static int r592_transfer_fifo_dma(struct r592_device *dev)
/* TODO: hidden assumption about nenth beeing always 1 */
sg_count = dma_map_sg(&dev->pci_dev->dev, &dev->req->sg, 1, is_write ?
- PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
+ DMA_TO_DEVICE : DMA_FROM_DEVICE);
if (sg_count != 1 || sg_dma_len(&dev->req->sg) < R592_LFIFO_SIZE) {
message("problem in dma_map_sg");
@@ -310,8 +310,7 @@ static int r592_transfer_fifo_dma(struct r592_device *dev)
}
dma_unmap_sg(&dev->pci_dev->dev, &dev->req->sg, 1, is_write ?
- PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
-
+ DMA_TO_DEVICE : DMA_FROM_DEVICE);
return dev->dma_error;
}
@@ -877,7 +876,7 @@ static SIMPLE_DEV_PM_OPS(r592_pm_ops, r592_suspend, r592_resume);
MODULE_DEVICE_TABLE(pci, r592_pci_id_tbl);
-static struct pci_driver r852_pci_driver = {
+static struct pci_driver r592_pci_driver = {
.name = DRV_NAME,
.id_table = r592_pci_id_tbl,
.probe = r592_probe,
@@ -885,7 +884,7 @@ static struct pci_driver r852_pci_driver = {
.driver.pm = &r592_pm_ops,
};
-module_pci_driver(r852_pci_driver);
+module_pci_driver(r592_pci_driver);
module_param_named(enable_dma, r592_enable_dma, bool, S_IRUGO);
MODULE_PARM_DESC(enable_dma, "Enable usage of the DMA (default)");
diff --git a/drivers/memstick/host/tifm_ms.c b/drivers/memstick/host/tifm_ms.c
index 57145374f6ac..c272453670be 100644
--- a/drivers/memstick/host/tifm_ms.c
+++ b/drivers/memstick/host/tifm_ms.c
@@ -279,8 +279,8 @@ static int tifm_ms_issue_cmd(struct tifm_ms *host)
if (host->use_dma) {
if (1 != tifm_map_sg(sock, &host->req->sg, 1,
host->req->data_dir == READ
- ? PCI_DMA_FROMDEVICE
- : PCI_DMA_TODEVICE)) {
+ ? DMA_FROM_DEVICE
+ : DMA_TO_DEVICE)) {
host->req->error = -ENOMEM;
return host->req->error;
}
@@ -350,8 +350,8 @@ static void tifm_ms_complete_cmd(struct tifm_ms *host)
if (host->use_dma) {
tifm_unmap_sg(sock, &host->req->sg, 1,
host->req->data_dir == READ
- ? PCI_DMA_FROMDEVICE
- : PCI_DMA_TODEVICE);
+ ? DMA_FROM_DEVICE
+ : DMA_TO_DEVICE);
}
writel((~TIFM_CTRL_LED) & readl(sock->addr + SOCK_CONTROL),
@@ -607,8 +607,8 @@ static void tifm_ms_remove(struct tifm_dev *sock)
if (host->use_dma)
tifm_unmap_sg(sock, &host->req->sg, 1,
host->req->data_dir == READ
- ? PCI_DMA_TODEVICE
- : PCI_DMA_FROMDEVICE);
+ ? DMA_TO_DEVICE
+ : DMA_FROM_DEVICE);
host->req->error = -ETIME;
do {
diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index 3bde7fda755f..287da20f1231 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c
@@ -2364,7 +2364,7 @@ static bool read_mailbox_0(void)
for (n = 0; n < NUM_PRCMU_WAKEUPS; n++) {
if (ev & prcmu_irq_bit[n])
- generic_handle_irq(irq_find_mapping(db8500_irq_domain, n));
+ generic_handle_domain_irq(db8500_irq_domain, n);
}
r = true;
break;
diff --git a/drivers/mfd/fsl-imx25-tsadc.c b/drivers/mfd/fsl-imx25-tsadc.c
index 5f6f0a83e1c5..37e5e02a1d05 100644
--- a/drivers/mfd/fsl-imx25-tsadc.c
+++ b/drivers/mfd/fsl-imx25-tsadc.c
@@ -35,10 +35,10 @@ static void mx25_tsadc_irq_handler(struct irq_desc *desc)
regmap_read(tsadc->regs, MX25_TSC_TGSR, &status);
if (status & MX25_TGSR_GCQ_INT)
- generic_handle_irq(irq_find_mapping(tsadc->domain, 1));
+ generic_handle_domain_irq(tsadc->domain, 1);
if (status & MX25_TGSR_TCQ_INT)
- generic_handle_irq(irq_find_mapping(tsadc->domain, 0));
+ generic_handle_domain_irq(tsadc->domain, 0);
chained_irq_exit(chip, desc);
}
diff --git a/drivers/mfd/ioc3.c b/drivers/mfd/ioc3.c
index 99b9c113f964..58656837b7c6 100644
--- a/drivers/mfd/ioc3.c
+++ b/drivers/mfd/ioc3.c
@@ -105,19 +105,15 @@ static void ioc3_irq_handler(struct irq_desc *desc)
struct ioc3_priv_data *ipd = domain->host_data;
struct ioc3 __iomem *regs = ipd->regs;
u32 pending, mask;
- unsigned int irq;
pending = readl(&regs->sio_ir);
mask = readl(&regs->sio_ies);
pending &= mask; /* Mask off not enabled interrupts */
- if (pending) {
- irq = irq_find_mapping(domain, __ffs(pending));
- if (irq)
- generic_handle_irq(irq);
- } else {
+ if (pending)
+ generic_handle_domain_irq(domain, __ffs(pending));
+ else
spurious_interrupt();
- }
}
/*
diff --git a/drivers/mfd/qcom-pm8xxx.c b/drivers/mfd/qcom-pm8xxx.c
index acd172ddcbd6..ec18a04de355 100644
--- a/drivers/mfd/qcom-pm8xxx.c
+++ b/drivers/mfd/qcom-pm8xxx.c
@@ -122,7 +122,7 @@ bail:
static int pm8xxx_irq_block_handler(struct pm_irq_chip *chip, int block)
{
- int pmirq, irq, i, ret = 0;
+ int pmirq, i, ret = 0;
unsigned int bits;
ret = pm8xxx_read_block_irq(chip, block, &bits);
@@ -139,8 +139,7 @@ static int pm8xxx_irq_block_handler(struct pm_irq_chip *chip, int block)
for (i = 0; i < 8; i++) {
if (bits & (1 << i)) {
pmirq = block * 8 + i;
- irq = irq_find_mapping(chip->irqdomain, pmirq);
- generic_handle_irq(irq);
+ generic_handle_domain_irq(chip->irqdomain, pmirq);
}
}
return 0;
@@ -199,7 +198,7 @@ static void pm8xxx_irq_handler(struct irq_desc *desc)
static void pm8821_irq_block_handler(struct pm_irq_chip *chip,
int master, int block)
{
- int pmirq, irq, i, ret;
+ int pmirq, i, ret;
unsigned int bits;
ret = regmap_read(chip->regmap,
@@ -216,8 +215,7 @@ static void pm8821_irq_block_handler(struct pm_irq_chip *chip,
for (i = 0; i < 8; i++) {
if (bits & BIT(i)) {
pmirq = block * 8 + i;
- irq = irq_find_mapping(chip->irqdomain, pmirq);
- generic_handle_irq(irq);
+ generic_handle_domain_irq(chip->irqdomain, pmirq);
}
}
}
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index f4fb5c52b863..a420b59917db 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -208,6 +208,18 @@ config CS5535_CLOCK_EVENT_SRC
MFGPTs have a better resolution and max interval than the
generic PIT, and are suitable for use as high-res timers.
+config GEHC_ACHC
+ tristate "GEHC ACHC support"
+ depends on SPI && SYSFS
+ depends on SOC_IMX53 || COMPILE_TEST
+ select FW_LOADER
+ help
+ Support for GE ACHC microcontroller, that is part of the GE
+ PPD device.
+
+ To compile this driver as a module, choose M here: the
+ module will be called gehc-achc.
+
config HP_ILO
tristate "Channel interface driver for the HP iLO processor"
depends on PCI
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index e92a56d4442f..68b7b0736f16 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_KGDB_TESTS) += kgdbts.o
obj-$(CONFIG_SGI_XP) += sgi-xp/
obj-$(CONFIG_SGI_GRU) += sgi-gru/
obj-$(CONFIG_CS5535_MFGPT) += cs5535-mfgpt.o
+obj-$(CONFIG_GEHC_ACHC) += gehc-achc.o
obj-$(CONFIG_HP_ILO) += hpilo.o
obj-$(CONFIG_APDS9802ALS) += apds9802als.o
obj-$(CONFIG_ISL29003) += isl29003.o
diff --git a/drivers/misc/gehc-achc.c b/drivers/misc/gehc-achc.c
new file mode 100644
index 000000000000..02f33bc60c56
--- /dev/null
+++ b/drivers/misc/gehc-achc.c
@@ -0,0 +1,565 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * datasheet: https://www.nxp.com/docs/en/data-sheet/K20P144M120SF3.pdf
+ *
+ * Copyright (C) 2018-2021 Collabora
+ * Copyright (C) 2018-2021 GE Healthcare
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/gpio/consumer.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/spi/spi.h>
+
+#define ACHC_MAX_FREQ_HZ 300000
+#define ACHC_FAST_READ_FREQ_HZ 1000000
+
+struct achc_data {
+ struct spi_device *main;
+ struct spi_device *ezport;
+ struct gpio_desc *reset;
+
+ struct mutex device_lock; /* avoid concurrent device access */
+};
+
+#define EZPORT_RESET_DELAY_MS 100
+#define EZPORT_STARTUP_DELAY_MS 200
+#define EZPORT_WRITE_WAIT_MS 10
+#define EZPORT_TRANSFER_SIZE 2048
+
+#define EZPORT_CMD_SP 0x02 /* flash section program */
+#define EZPORT_CMD_RDSR 0x05 /* read status register */
+#define EZPORT_CMD_WREN 0x06 /* write enable */
+#define EZPORT_CMD_FAST_READ 0x0b /* flash read data at high speed */
+#define EZPORT_CMD_RESET 0xb9 /* reset chip */
+#define EZPORT_CMD_BE 0xc7 /* bulk erase */
+#define EZPORT_CMD_SE 0xd8 /* sector erase */
+
+#define EZPORT_SECTOR_SIZE 4096
+#define EZPORT_SECTOR_MASK (EZPORT_SECTOR_SIZE - 1)
+
+#define EZPORT_STATUS_WIP BIT(0) /* write in progress */
+#define EZPORT_STATUS_WEN BIT(1) /* write enable */
+#define EZPORT_STATUS_BEDIS BIT(2) /* bulk erase disable */
+#define EZPORT_STATUS_FLEXRAM BIT(3) /* FlexRAM mode */
+#define EZPORT_STATUS_WEF BIT(6) /* write error flag */
+#define EZPORT_STATUS_FS BIT(7) /* flash security */
+
+static void ezport_reset(struct gpio_desc *reset)
+{
+ gpiod_set_value(reset, 1);
+ msleep(EZPORT_RESET_DELAY_MS);
+ gpiod_set_value(reset, 0);
+ msleep(EZPORT_STARTUP_DELAY_MS);
+}
+
+static int ezport_start_programming(struct spi_device *spi, struct gpio_desc *reset)
+{
+ struct spi_message msg;
+ struct spi_transfer assert_cs = {
+ .cs_change = 1,
+ };
+ struct spi_transfer release_cs = { };
+ int ret;
+
+ spi_bus_lock(spi->master);
+
+ /* assert chip select */
+ spi_message_init(&msg);
+ spi_message_add_tail(&assert_cs, &msg);
+ ret = spi_sync_locked(spi, &msg);
+ if (ret)
+ goto fail;
+
+ msleep(EZPORT_STARTUP_DELAY_MS);
+
+ /* reset with asserted chip select to switch into programming mode */
+ ezport_reset(reset);
+
+ /* release chip select */
+ spi_message_init(&msg);
+ spi_message_add_tail(&release_cs, &msg);
+ ret = spi_sync_locked(spi, &msg);
+
+fail:
+ spi_bus_unlock(spi->master);
+ return ret;
+}
+
+static void ezport_stop_programming(struct spi_device *spi, struct gpio_desc *reset)
+{
+ /* reset without asserted chip select to return into normal mode */
+ spi_bus_lock(spi->master);
+ ezport_reset(reset);
+ spi_bus_unlock(spi->master);
+}
+
+static int ezport_get_status_register(struct spi_device *spi)
+{
+ int ret;
+
+ ret = spi_w8r8(spi, EZPORT_CMD_RDSR);
+ if (ret < 0)
+ return ret;
+ if (ret == 0xff) {
+ dev_err(&spi->dev, "Invalid EzPort status, EzPort is not functional!\n");
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static int ezport_soft_reset(struct spi_device *spi)
+{
+ u8 cmd = EZPORT_CMD_RESET;
+ int ret;
+
+ ret = spi_write(spi, &cmd, 1);
+ if (ret < 0)
+ return ret;
+
+ msleep(EZPORT_STARTUP_DELAY_MS);
+
+ return 0;
+}
+
+static int ezport_send_simple(struct spi_device *spi, u8 cmd)
+{
+ int ret;
+
+ ret = spi_write(spi, &cmd, 1);
+ if (ret < 0)
+ return ret;
+
+ return ezport_get_status_register(spi);
+}
+
+static int ezport_wait_write(struct spi_device *spi, u32 retries)
+{
+ int ret;
+ u32 i;
+
+ for (i = 0; i < retries; i++) {
+ ret = ezport_get_status_register(spi);
+ if (ret >= 0 && !(ret & EZPORT_STATUS_WIP))
+ break;
+ msleep(EZPORT_WRITE_WAIT_MS);
+ }
+
+ return ret;
+}
+
+static int ezport_write_enable(struct spi_device *spi)
+{
+ int ret = 0, retries = 3;
+
+ for (retries = 0; retries < 3; retries++) {
+ ret = ezport_send_simple(spi, EZPORT_CMD_WREN);
+ if (ret > 0 && ret & EZPORT_STATUS_WEN)
+ break;
+ }
+
+ if (!(ret & EZPORT_STATUS_WEN)) {
+ dev_err(&spi->dev, "EzPort write enable timed out\n");
+ return -ETIMEDOUT;
+ }
+ return 0;
+}
+
+static int ezport_bulk_erase(struct spi_device *spi)
+{
+ int ret;
+ static const u8 cmd = EZPORT_CMD_BE;
+
+ dev_dbg(&spi->dev, "EzPort bulk erase...\n");
+
+ ret = ezport_write_enable(spi);
+ if (ret < 0)
+ return ret;
+
+ ret = spi_write(spi, &cmd, 1);
+ if (ret < 0)
+ return ret;
+
+ ret = ezport_wait_write(spi, 1000);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int ezport_section_erase(struct spi_device *spi, u32 address)
+{
+ u8 query[] = {EZPORT_CMD_SE, (address >> 16) & 0xff, (address >> 8) & 0xff, address & 0xff};
+ int ret;
+
+ dev_dbg(&spi->dev, "Ezport section erase @ 0x%06x...\n", address);
+
+ if (address & EZPORT_SECTOR_MASK)
+ return -EINVAL;
+
+ ret = ezport_write_enable(spi);
+ if (ret < 0)
+ return ret;
+
+ ret = spi_write(spi, query, sizeof(query));
+ if (ret < 0)
+ return ret;
+
+ return ezport_wait_write(spi, 200);
+}
+
+static int ezport_flash_transfer(struct spi_device *spi, u32 address,
+ const u8 *payload, size_t payload_size)
+{
+ struct spi_transfer xfers[2] = {};
+ u8 *command;
+ int ret;
+
+ dev_dbg(&spi->dev, "EzPort write %zu bytes @ 0x%06x...\n", payload_size, address);
+
+ ret = ezport_write_enable(spi);
+ if (ret < 0)
+ return ret;
+
+ command = kmalloc(4, GFP_KERNEL | GFP_DMA);
+ if (!command)
+ return -ENOMEM;
+
+ command[0] = EZPORT_CMD_SP;
+ command[1] = address >> 16;
+ command[2] = address >> 8;
+ command[3] = address >> 0;
+
+ xfers[0].tx_buf = command;
+ xfers[0].len = 4;
+
+ xfers[1].tx_buf = payload;
+ xfers[1].len = payload_size;
+
+ ret = spi_sync_transfer(spi, xfers, 2);
+ kfree(command);
+ if (ret < 0)
+ return ret;
+
+ return ezport_wait_write(spi, 40);
+}
+
+static int ezport_flash_compare(struct spi_device *spi, u32 address,
+ const u8 *payload, size_t payload_size)
+{
+ struct spi_transfer xfers[2] = {};
+ u8 *buffer;
+ int ret;
+
+ buffer = kmalloc(payload_size + 5, GFP_KERNEL | GFP_DMA);
+ if (!buffer)
+ return -ENOMEM;
+
+ buffer[0] = EZPORT_CMD_FAST_READ;
+ buffer[1] = address >> 16;
+ buffer[2] = address >> 8;
+ buffer[3] = address >> 0;
+
+ xfers[0].tx_buf = buffer;
+ xfers[0].len = 4;
+ xfers[0].speed_hz = ACHC_FAST_READ_FREQ_HZ;
+
+ xfers[1].rx_buf = buffer + 4;
+ xfers[1].len = payload_size + 1;
+ xfers[1].speed_hz = ACHC_FAST_READ_FREQ_HZ;
+
+ ret = spi_sync_transfer(spi, xfers, 2);
+ if (ret)
+ goto err;
+
+ /* FAST_READ receives one dummy byte before the real data */
+ ret = memcmp(payload, buffer + 4 + 1, payload_size);
+ if (ret) {
+ ret = -EBADMSG;
+ dev_dbg(&spi->dev, "Verification failure @ %06x", address);
+ print_hex_dump_bytes("fw: ", DUMP_PREFIX_OFFSET, payload, payload_size);
+ print_hex_dump_bytes("dev: ", DUMP_PREFIX_OFFSET, buffer + 4, payload_size);
+ }
+
+err:
+ kfree(buffer);
+ return ret;
+}
+
+static int ezport_firmware_compare_data(struct spi_device *spi,
+ const u8 *data, size_t size)
+{
+ int ret;
+ size_t address = 0;
+ size_t transfer_size;
+
+ dev_dbg(&spi->dev, "EzPort compare data with %zu bytes...\n", size);
+
+ ret = ezport_get_status_register(spi);
+ if (ret < 0)
+ return ret;
+
+ if (ret & EZPORT_STATUS_FS) {
+ dev_info(&spi->dev, "Device is in secure mode (status=0x%02x)!\n", ret);
+ dev_info(&spi->dev, "FW verification is not possible\n");
+ return -EACCES;
+ }
+
+ while (size - address > 0) {
+ transfer_size = min((size_t) EZPORT_TRANSFER_SIZE, size - address);
+
+ ret = ezport_flash_compare(spi, address, data+address, transfer_size);
+ if (ret)
+ return ret;
+
+ address += transfer_size;
+ }
+
+ return 0;
+}
+
+static int ezport_firmware_flash_data(struct spi_device *spi,
+ const u8 *data, size_t size)
+{
+ int ret;
+ size_t address = 0;
+ size_t transfer_size;
+
+ dev_dbg(&spi->dev, "EzPort flash data with %zu bytes...\n", size);
+
+ ret = ezport_get_status_register(spi);
+ if (ret < 0)
+ return ret;
+
+ if (ret & EZPORT_STATUS_FS) {
+ ret = ezport_bulk_erase(spi);
+ if (ret < 0)
+ return ret;
+ if (ret & EZPORT_STATUS_FS)
+ return -EINVAL;
+ }
+
+ while (size - address > 0) {
+ if (!(address & EZPORT_SECTOR_MASK)) {
+ ret = ezport_section_erase(spi, address);
+ if (ret < 0)
+ return ret;
+ if (ret & EZPORT_STATUS_WIP || ret & EZPORT_STATUS_WEF)
+ return -EIO;
+ }
+
+ transfer_size = min((size_t) EZPORT_TRANSFER_SIZE, size - address);
+
+ ret = ezport_flash_transfer(spi, address,
+ data+address, transfer_size);
+ if (ret < 0)
+ return ret;
+ else if (ret & EZPORT_STATUS_WIP)
+ return -ETIMEDOUT;
+ else if (ret & EZPORT_STATUS_WEF)
+ return -EIO;
+
+ address += transfer_size;
+ }
+
+ dev_dbg(&spi->dev, "EzPort verify flashed data...\n");
+ ret = ezport_firmware_compare_data(spi, data, size);
+
+ /* allow missing FW verfication in secure mode */
+ if (ret == -EACCES)
+ ret = 0;
+
+ if (ret < 0)
+ dev_err(&spi->dev, "Failed to verify flashed data: %d\n", ret);
+
+ ret = ezport_soft_reset(spi);
+ if (ret < 0)
+ dev_warn(&spi->dev, "EzPort reset failed!\n");
+
+ return ret;
+}
+
+static int ezport_firmware_load(struct spi_device *spi, const char *fwname)
+{
+ const struct firmware *fw;
+ int ret;
+
+ ret = request_firmware(&fw, fwname, &spi->dev);
+ if (ret) {
+ dev_err(&spi->dev, "Could not get firmware: %d\n", ret);
+ return ret;
+ }
+
+ ret = ezport_firmware_flash_data(spi, fw->data, fw->size);
+
+ release_firmware(fw);
+
+ return ret;
+}
+
+/**
+ * ezport_flash - flash device firmware
+ * @spi: SPI device for NXP EzPort interface
+ * @reset: the gpio connected to the device reset pin
+ * @fwname: filename of the firmware that should be flashed
+ *
+ * Context: can sleep
+ *
+ * Return: 0 on success; negative errno on failure
+ */
+static int ezport_flash(struct spi_device *spi, struct gpio_desc *reset, const char *fwname)
+{
+ int ret;
+
+ ret = ezport_start_programming(spi, reset);
+ if (ret)
+ return ret;
+
+ ret = ezport_firmware_load(spi, fwname);
+
+ ezport_stop_programming(spi, reset);
+
+ if (ret)
+ dev_err(&spi->dev, "Failed to flash firmware: %d\n", ret);
+ else
+ dev_dbg(&spi->dev, "Finished FW flashing!\n");
+
+ return ret;
+}
+
+static ssize_t update_firmware_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct achc_data *achc = dev_get_drvdata(dev);
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &value);
+ if (ret < 0 || value != 1)
+ return -EINVAL;
+
+ mutex_lock(&achc->device_lock);
+ ret = ezport_flash(achc->ezport, achc->reset, "achc.bin");
+ mutex_unlock(&achc->device_lock);
+
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+static DEVICE_ATTR_WO(update_firmware);
+
+static ssize_t reset_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct achc_data *achc = dev_get_drvdata(dev);
+ int ret;
+
+ mutex_lock(&achc->device_lock);
+ ret = gpiod_get_value(achc->reset);
+ mutex_unlock(&achc->device_lock);
+
+ if (ret < 0)
+ return ret;
+
+ return sysfs_emit(buf, "%d\n", ret);
+}
+
+static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct achc_data *achc = dev_get_drvdata(dev);
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &value);
+ if (ret < 0 || value > 1)
+ return -EINVAL;
+
+ mutex_lock(&achc->device_lock);
+ gpiod_set_value(achc->reset, value);
+ mutex_unlock(&achc->device_lock);
+
+ return count;
+}
+static DEVICE_ATTR_RW(reset);
+
+static struct attribute *gehc_achc_attrs[] = {
+ &dev_attr_update_firmware.attr,
+ &dev_attr_reset.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(gehc_achc);
+
+static void unregister_ezport(void *data)
+{
+ struct spi_device *ezport = data;
+
+ spi_unregister_device(ezport);
+}
+
+static int gehc_achc_probe(struct spi_device *spi)
+{
+ struct achc_data *achc;
+ int ezport_reg, ret;
+
+ spi->max_speed_hz = ACHC_MAX_FREQ_HZ;
+ spi->bits_per_word = 8;
+ spi->mode = SPI_MODE_0;
+
+ achc = devm_kzalloc(&spi->dev, sizeof(*achc), GFP_KERNEL);
+ if (!achc)
+ return -ENOMEM;
+ spi_set_drvdata(spi, achc);
+ achc->main = spi;
+
+ mutex_init(&achc->device_lock);
+
+ ret = of_property_read_u32_index(spi->dev.of_node, "reg", 1, &ezport_reg);
+ if (ret)
+ return dev_err_probe(&spi->dev, ret, "missing second reg entry!\n");
+
+ achc->ezport = spi_new_ancillary_device(spi, ezport_reg);
+ if (IS_ERR(achc->ezport))
+ return PTR_ERR(achc->ezport);
+
+ ret = devm_add_action_or_reset(&spi->dev, unregister_ezport, achc->ezport);
+ if (ret)
+ return ret;
+
+ achc->reset = devm_gpiod_get(&spi->dev, "reset", GPIOD_OUT_LOW);
+ if (IS_ERR(achc->reset))
+ return dev_err_probe(&spi->dev, PTR_ERR(achc->reset), "Could not get reset gpio\n");
+
+ return 0;
+}
+
+static const struct spi_device_id gehc_achc_id[] = {
+ { "ge,achc", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(spi, gehc_achc_id);
+
+static const struct of_device_id gehc_achc_of_match[] = {
+ { .compatible = "ge,achc" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, gehc_achc_of_match);
+
+static struct spi_driver gehc_achc_spi_driver = {
+ .driver = {
+ .name = "gehc-achc",
+ .of_match_table = gehc_achc_of_match,
+ .dev_groups = gehc_achc_groups,
+ },
+ .probe = gehc_achc_probe,
+ .id_table = gehc_achc_id,
+};
+module_spi_driver(gehc_achc_spi_driver);
+
+MODULE_DESCRIPTION("GEHC ACHC driver");
+MODULE_AUTHOR("Sebastian Reichel <sebastian.reichel@collabora.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index 88c218a9f8b3..4282b625200f 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -267,6 +267,7 @@ void lkdtm_ARRAY_BOUNDS(void)
kfree(not_checked);
kfree(checked);
pr_err("FAIL: survived array bounds overflow!\n");
+ pr_expected_config(CONFIG_UBSAN_BOUNDS);
}
void lkdtm_CORRUPT_LIST_ADD(void)
@@ -506,53 +507,3 @@ noinline void lkdtm_CORRUPT_PAC(void)
pr_err("XFAIL: this test is arm64-only\n");
#endif
}
-
-void lkdtm_FORTIFY_OBJECT(void)
-{
- struct target {
- char a[10];
- } target[2] = {};
- int result;
-
- /*
- * Using volatile prevents the compiler from determining the value of
- * 'size' at compile time. Without that, we would get a compile error
- * rather than a runtime error.
- */
- volatile int size = 11;
-
- pr_info("trying to read past the end of a struct\n");
-
- result = memcmp(&target[0], &target[1], size);
-
- /* Print result to prevent the code from being eliminated */
- pr_err("FAIL: fortify did not catch an object overread!\n"
- "\"%d\" was the memcmp result.\n", result);
-}
-
-void lkdtm_FORTIFY_SUBOBJECT(void)
-{
- struct target {
- char a[10];
- char b[10];
- } target;
- char *src;
-
- src = kmalloc(20, GFP_KERNEL);
- strscpy(src, "over ten bytes", 20);
-
- pr_info("trying to strcpy past the end of a member of a struct\n");
-
- /*
- * strncpy(target.a, src, 20); will hit a compile error because the
- * compiler knows at build time that target.a < 20 bytes. Use strcpy()
- * to force a runtime error.
- */
- strcpy(target.a, src);
-
- /* Use target.a to prevent the code from being eliminated */
- pr_err("FAIL: fortify did not catch an sub-object overrun!\n"
- "\"%s\" was copied.\n", target.a);
-
- kfree(src);
-}
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index 9dda87c6b54a..95b1c6800a22 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -26,7 +26,6 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/debugfs.h>
-#include <linux/init.h>
#define DEFAULT_COUNT 10
@@ -82,8 +81,7 @@ static struct crashpoint crashpoints[] = {
CRASHPOINT("FS_DEVRW", "ll_rw_block"),
CRASHPOINT("MEM_SWAPOUT", "shrink_inactive_list"),
CRASHPOINT("TIMERADD", "hrtimer_start"),
- CRASHPOINT("SCSI_DISPATCH_CMD", "scsi_dispatch_cmd"),
- CRASHPOINT("IDE_CORE_CP", "generic_ide_ioctl"),
+ CRASHPOINT("SCSI_QUEUE_RQ", "scsi_queue_rq"),
#endif
};
@@ -119,8 +117,6 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(UNSET_SMEP),
CRASHTYPE(CORRUPT_PAC),
CRASHTYPE(UNALIGNED_LOAD_STORE_WRITE),
- CRASHTYPE(FORTIFY_OBJECT),
- CRASHTYPE(FORTIFY_SUBOBJECT),
CRASHTYPE(SLAB_LINEAR_OVERFLOW),
CRASHTYPE(VMALLOC_LINEAR_OVERFLOW),
CRASHTYPE(WRITE_AFTER_FREE),
@@ -180,6 +176,8 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(USERCOPY_KERNEL),
CRASHTYPE(STACKLEAK_ERASING),
CRASHTYPE(CFI_FORWARD_PROTO),
+ CRASHTYPE(FORTIFIED_OBJECT),
+ CRASHTYPE(FORTIFIED_SUBOBJECT),
CRASHTYPE(FORTIFIED_STRSCPY),
CRASHTYPE(DOUBLE_FAULT),
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/drivers/misc/lkdtm/fortify.c b/drivers/misc/lkdtm/fortify.c
index 0f51d31b57ca..d06458a4858e 100644
--- a/drivers/misc/lkdtm/fortify.c
+++ b/drivers/misc/lkdtm/fortify.c
@@ -8,6 +8,59 @@
#include <linux/string.h>
#include <linux/slab.h>
+static volatile int fortify_scratch_space;
+
+void lkdtm_FORTIFIED_OBJECT(void)
+{
+ struct target {
+ char a[10];
+ } target[2] = {};
+ /*
+ * Using volatile prevents the compiler from determining the value of
+ * 'size' at compile time. Without that, we would get a compile error
+ * rather than a runtime error.
+ */
+ volatile int size = 11;
+
+ pr_info("trying to read past the end of a struct\n");
+
+ /* Store result to global to prevent the code from being eliminated */
+ fortify_scratch_space = memcmp(&target[0], &target[1], size);
+
+ pr_err("FAIL: fortify did not block an object overread!\n");
+ pr_expected_config(CONFIG_FORTIFY_SOURCE);
+}
+
+void lkdtm_FORTIFIED_SUBOBJECT(void)
+{
+ struct target {
+ char a[10];
+ char b[10];
+ } target;
+ volatile int size = 20;
+ char *src;
+
+ src = kmalloc(size, GFP_KERNEL);
+ strscpy(src, "over ten bytes", size);
+ size = strlen(src) + 1;
+
+ pr_info("trying to strcpy past the end of a member of a struct\n");
+
+ /*
+ * memcpy(target.a, src, 20); will hit a compile error because the
+ * compiler knows at build time that target.a < 20 bytes. Use a
+ * volatile to force a runtime error.
+ */
+ memcpy(target.a, src, size);
+
+ /* Store result to global to prevent the code from being eliminated */
+ fortify_scratch_space = target.a[3];
+
+ pr_err("FAIL: fortify did not block an sub-object overrun!\n");
+ pr_expected_config(CONFIG_FORTIFY_SOURCE);
+
+ kfree(src);
+}
/*
* Calls fortified strscpy to test that it returns the same result as vanilla
diff --git a/drivers/misc/lkdtm/heap.c b/drivers/misc/lkdtm/heap.c
index 3d9aae5821a0..8a92f5a800fa 100644
--- a/drivers/misc/lkdtm/heap.c
+++ b/drivers/misc/lkdtm/heap.c
@@ -13,6 +13,13 @@ static struct kmem_cache *a_cache;
static struct kmem_cache *b_cache;
/*
+ * Using volatile here means the compiler cannot ever make assumptions
+ * about this value. This means compile-time length checks involving
+ * this variable cannot be performed; only run-time checks.
+ */
+static volatile int __offset = 1;
+
+/*
* If there aren't guard pages, it's likely that a consecutive allocation will
* let us overflow into the second allocation without overwriting something real.
*/
@@ -24,7 +31,7 @@ void lkdtm_VMALLOC_LINEAR_OVERFLOW(void)
two = vzalloc(PAGE_SIZE);
pr_info("Attempting vmalloc linear overflow ...\n");
- memset(one, 0xAA, PAGE_SIZE + 1);
+ memset(one, 0xAA, PAGE_SIZE + __offset);
vfree(two);
vfree(one);
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index 6a30b60519f3..d7d64d9765eb 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -5,13 +5,17 @@
#define pr_fmt(fmt) "lkdtm: " fmt
#include <linux/kernel.h>
+#include <generated/compile.h>
+#include <generated/utsrelease.h>
+
+#define LKDTM_KERNEL "kernel (" UTS_RELEASE " " UTS_MACHINE ")"
#define pr_expected_config(kconfig) \
{ \
if (IS_ENABLED(kconfig)) \
- pr_err("Unexpected! This kernel was built with " #kconfig "=y\n"); \
+ pr_err("Unexpected! This " LKDTM_KERNEL " was built with " #kconfig "=y\n"); \
else \
- pr_warn("This is probably expected, since this kernel was built *without* " #kconfig "=y\n"); \
+ pr_warn("This is probably expected, since this " LKDTM_KERNEL " was built *without* " #kconfig "=y\n"); \
}
#ifndef MODULE
@@ -21,24 +25,24 @@ int lkdtm_check_bool_cmdline(const char *param);
if (IS_ENABLED(kconfig)) { \
switch (lkdtm_check_bool_cmdline(param)) { \
case 0: \
- pr_warn("This is probably expected, since this kernel was built with " #kconfig "=y but booted with '" param "=N'\n"); \
+ pr_warn("This is probably expected, since this " LKDTM_KERNEL " was built with " #kconfig "=y but booted with '" param "=N'\n"); \
break; \
case 1: \
- pr_err("Unexpected! This kernel was built with " #kconfig "=y and booted with '" param "=Y'\n"); \
+ pr_err("Unexpected! This " LKDTM_KERNEL " was built with " #kconfig "=y and booted with '" param "=Y'\n"); \
break; \
default: \
- pr_err("Unexpected! This kernel was built with " #kconfig "=y (and booted without '" param "' specified)\n"); \
+ pr_err("Unexpected! This " LKDTM_KERNEL " was built with " #kconfig "=y (and booted without '" param "' specified)\n"); \
} \
} else { \
switch (lkdtm_check_bool_cmdline(param)) { \
case 0: \
- pr_warn("This is probably expected, as kernel was built *without* " #kconfig "=y and booted with '" param "=N'\n"); \
+ pr_warn("This is probably expected, as this " LKDTM_KERNEL " was built *without* " #kconfig "=y and booted with '" param "=N'\n"); \
break; \
case 1: \
- pr_err("Unexpected! This kernel was built *without* " #kconfig "=y but booted with '" param "=Y'\n"); \
+ pr_err("Unexpected! This " LKDTM_KERNEL " was built *without* " #kconfig "=y but booted with '" param "=Y'\n"); \
break; \
default: \
- pr_err("This is probably expected, since this kernel was built *without* " #kconfig "=y (and booted without '" param "' specified)\n"); \
+ pr_err("This is probably expected, since this " LKDTM_KERNEL " was built *without* " #kconfig "=y (and booted without '" param "' specified)\n"); \
break; \
} \
} \
@@ -74,8 +78,6 @@ void lkdtm_STACK_GUARD_PAGE_TRAILING(void);
void lkdtm_UNSET_SMEP(void);
void lkdtm_DOUBLE_FAULT(void);
void lkdtm_CORRUPT_PAC(void);
-void lkdtm_FORTIFY_OBJECT(void);
-void lkdtm_FORTIFY_SUBOBJECT(void);
/* heap.c */
void __init lkdtm_heap_init(void);
@@ -150,6 +152,8 @@ void lkdtm_STACKLEAK_ERASING(void);
void lkdtm_CFI_FORWARD_PROTO(void);
/* fortify.c */
+void lkdtm_FORTIFIED_OBJECT(void);
+void lkdtm_FORTIFIED_SUBOBJECT(void);
void lkdtm_FORTIFIED_STRSCPY(void);
/* powerpc.c */
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 3bf2bb4fd152..44bac4ad687c 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -31,7 +31,7 @@
*
* Return: written size bytes or < 0 on error
*/
-ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, u8 vtag,
+ssize_t __mei_cl_send(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag,
unsigned int mode)
{
struct mei_device *bus;
@@ -232,8 +232,8 @@ out:
* * < 0 on error
*/
-ssize_t mei_cldev_send_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length,
- u8 vtag)
+ssize_t mei_cldev_send_vtag(struct mei_cl_device *cldev, const u8 *buf,
+ size_t length, u8 vtag)
{
struct mei_cl *cl = cldev->cl;
@@ -296,7 +296,7 @@ EXPORT_SYMBOL_GPL(mei_cldev_recv_nonblock_vtag);
* * written size in bytes
* * < 0 on error
*/
-ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
+ssize_t mei_cldev_send(struct mei_cl_device *cldev, const u8 *buf, size_t length)
{
return mei_cldev_send_vtag(cldev, buf, length, 0);
}
@@ -552,7 +552,7 @@ EXPORT_SYMBOL_GPL(mei_cldev_ver);
*
* Return: true if me client is initialized and connected
*/
-bool mei_cldev_enabled(struct mei_cl_device *cldev)
+bool mei_cldev_enabled(const struct mei_cl_device *cldev)
{
return mei_cl_is_connected(cldev->cl);
}
@@ -771,8 +771,8 @@ EXPORT_SYMBOL_GPL(mei_cldev_disable);
* Return: id on success; NULL if no id is matching
*/
static const
-struct mei_cl_device_id *mei_cl_device_find(struct mei_cl_device *cldev,
- struct mei_cl_driver *cldrv)
+struct mei_cl_device_id *mei_cl_device_find(const struct mei_cl_device *cldev,
+ const struct mei_cl_driver *cldrv)
{
const struct mei_cl_device_id *id;
const uuid_le *uuid;
@@ -815,8 +815,8 @@ struct mei_cl_device_id *mei_cl_device_find(struct mei_cl_device *cldev,
*/
static int mei_cl_device_match(struct device *dev, struct device_driver *drv)
{
- struct mei_cl_device *cldev = to_mei_cl_device(dev);
- struct mei_cl_driver *cldrv = to_mei_cl_driver(drv);
+ const struct mei_cl_device *cldev = to_mei_cl_device(dev);
+ const struct mei_cl_driver *cldrv = to_mei_cl_driver(drv);
const struct mei_cl_device_id *found_id;
if (!cldev)
diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h
index b12cdcde9436..418056fb1489 100644
--- a/drivers/misc/mei/client.h
+++ b/drivers/misc/mei/client.h
@@ -160,7 +160,7 @@ int mei_cl_vt_support_check(const struct mei_cl *cl);
*
* Return: true if the host client is connected
*/
-static inline bool mei_cl_is_connected(struct mei_cl *cl)
+static inline bool mei_cl_is_connected(const struct mei_cl *cl)
{
return cl->state == MEI_FILE_CONNECTED;
}
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index b7b6ef344e80..694f866f87ef 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -356,7 +356,7 @@ struct mei_hw_ops {
/* MEI bus API*/
void mei_cl_bus_rescan_work(struct work_struct *work);
void mei_cl_bus_dev_fixup(struct mei_cl_device *dev);
-ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, u8 vtag,
+ssize_t __mei_cl_send(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag,
unsigned int mode);
ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length, u8 *vtag,
unsigned int mode, unsigned long timeout);
diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c
index 1b2868ca4f2a..d1137a95ad02 100644
--- a/drivers/misc/pci_endpoint_test.c
+++ b/drivers/misc/pci_endpoint_test.c
@@ -862,6 +862,7 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
err = -ENOMEM;
goto err_release_irq;
}
+ misc_device->parent = &pdev->dev;
misc_device->fops = &pci_endpoint_test_fops,
err = misc_register(misc_device);
diff --git a/drivers/misc/pvpanic/pvpanic-pci.c b/drivers/misc/pvpanic/pvpanic-pci.c
index a43c401017ae..741116b3d995 100644
--- a/drivers/misc/pvpanic/pvpanic-pci.c
+++ b/drivers/misc/pvpanic/pvpanic-pci.c
@@ -108,4 +108,6 @@ static struct pci_driver pvpanic_pci_driver = {
},
};
+MODULE_DEVICE_TABLE(pci, pvpanic_pci_id_tbl);
+
module_pci_driver(pvpanic_pci_driver);
diff --git a/drivers/misc/pvpanic/pvpanic.c b/drivers/misc/pvpanic/pvpanic.c
index 02b807c788c9..bb7aa6368538 100644
--- a/drivers/misc/pvpanic/pvpanic.c
+++ b/drivers/misc/pvpanic/pvpanic.c
@@ -85,6 +85,8 @@ int devm_pvpanic_probe(struct device *dev, struct pvpanic_instance *pi)
list_add(&pi->list, &pvpanic_list);
spin_unlock(&pvpanic_lock);
+ dev_set_drvdata(dev, pi);
+
return devm_add_action_or_reset(dev, pvpanic_remove, pi);
}
EXPORT_SYMBOL_GPL(devm_pvpanic_probe);
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index 40ac59dd018c..9afda47efbf2 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -282,7 +282,7 @@ static void gru_unload_mm_tracker(struct gru_state *gru,
*/
void gts_drop(struct gru_thread_state *gts)
{
- if (gts && atomic_dec_return(&gts->ts_refcnt) == 0) {
+ if (gts && refcount_dec_and_test(&gts->ts_refcnt)) {
if (gts->ts_gms)
gru_drop_mmu_notifier(gts->ts_gms);
kfree(gts);
@@ -323,7 +323,7 @@ struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
STAT(gts_alloc);
memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */
- atomic_set(&gts->ts_refcnt, 1);
+ refcount_set(&gts->ts_refcnt, 1);
mutex_init(&gts->ts_ctxlock);
gts->ts_cbr_au_count = cbr_au_count;
gts->ts_dsr_au_count = dsr_au_count;
@@ -888,7 +888,7 @@ again:
gts->ts_gru = gru;
gts->ts_blade = gru->gs_blade_id;
gts->ts_ctxnum = gru_assign_context_number(gru);
- atomic_inc(&gts->ts_refcnt);
+ refcount_inc(&gts->ts_refcnt);
gru->gs_gts[gts->ts_ctxnum] = gts;
spin_unlock(&gru->gs_lock);
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
index 5ce8f3081e96..e4c067c61251 100644
--- a/drivers/misc/sgi-gru/grutables.h
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -129,6 +129,7 @@
*
*/
+#include <linux/refcount.h>
#include <linux/rmap.h>
#include <linux/interrupt.h>
#include <linux/mutex.h>
@@ -358,7 +359,7 @@ struct gru_thread_state {
enabled */
int ts_ctxnum; /* context number where the
context is loaded */
- atomic_t ts_refcnt; /* reference count GTS */
+ refcount_t ts_refcnt; /* reference count GTS */
unsigned char ts_dsr_au_count;/* Number of DSR resources
required for contest */
unsigned char ts_cbr_au_count;/* Number of CBR resources
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index 7791bde81a36..ba9ae0e2df0f 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -1742,7 +1742,7 @@ xpc_init_mq_node(int nid)
{
int cpu;
- get_online_cpus();
+ cpus_read_lock();
for_each_cpu(cpu, cpumask_of_node(nid)) {
xpc_activate_mq_uv =
@@ -1753,7 +1753,7 @@ xpc_init_mq_node(int nid)
break;
}
if (IS_ERR(xpc_activate_mq_uv)) {
- put_online_cpus();
+ cpus_read_unlock();
return PTR_ERR(xpc_activate_mq_uv);
}
@@ -1767,11 +1767,11 @@ xpc_init_mq_node(int nid)
}
if (IS_ERR(xpc_notify_mq_uv)) {
xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
- put_online_cpus();
+ cpus_read_unlock();
return PTR_ERR(xpc_notify_mq_uv);
}
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
diff --git a/drivers/misc/sram.c b/drivers/misc/sram.c
index 93638ae2753a..4c26b19f5154 100644
--- a/drivers/misc/sram.c
+++ b/drivers/misc/sram.c
@@ -97,7 +97,24 @@ static int sram_add_partition(struct sram_dev *sram, struct sram_reserve *block,
struct sram_partition *part = &sram->partition[sram->partitions];
mutex_init(&part->lock);
- part->base = sram->virt_base + block->start;
+
+ if (sram->config && sram->config->map_only_reserved) {
+ void __iomem *virt_base;
+
+ if (sram->no_memory_wc)
+ virt_base = devm_ioremap_resource(sram->dev, &block->res);
+ else
+ virt_base = devm_ioremap_resource_wc(sram->dev, &block->res);
+
+ if (IS_ERR(virt_base)) {
+ dev_err(sram->dev, "could not map SRAM at %pr\n", &block->res);
+ return PTR_ERR(virt_base);
+ }
+
+ part->base = virt_base;
+ } else {
+ part->base = sram->virt_base + block->start;
+ }
if (block->pool) {
ret = sram_add_pool(sram, block, start, part);
@@ -198,6 +215,7 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
block->start = child_res.start - res->start;
block->size = resource_size(&child_res);
+ block->res = child_res;
list_add_tail(&block->list, &reserve_list);
if (of_find_property(child, "export", NULL))
@@ -295,15 +313,17 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
*/
cur_size = block->start - cur_start;
- dev_dbg(sram->dev, "adding chunk 0x%lx-0x%lx\n",
- cur_start, cur_start + cur_size);
+ if (sram->pool) {
+ dev_dbg(sram->dev, "adding chunk 0x%lx-0x%lx\n",
+ cur_start, cur_start + cur_size);
- ret = gen_pool_add_virt(sram->pool,
- (unsigned long)sram->virt_base + cur_start,
- res->start + cur_start, cur_size, -1);
- if (ret < 0) {
- sram_free_partitions(sram);
- goto err_chunks;
+ ret = gen_pool_add_virt(sram->pool,
+ (unsigned long)sram->virt_base + cur_start,
+ res->start + cur_start, cur_size, -1);
+ if (ret < 0) {
+ sram_free_partitions(sram);
+ goto err_chunks;
+ }
}
/* next allocation after this reserved block */
@@ -331,40 +351,63 @@ static int atmel_securam_wait(void)
10000, 500000);
}
+static const struct sram_config atmel_securam_config = {
+ .init = atmel_securam_wait,
+};
+
+/*
+ * SYSRAM contains areas that are not accessible by the
+ * kernel, such as the first 256K that is reserved for TZ.
+ * Accesses to those areas (including speculative accesses)
+ * trigger SErrors. As such we must map only the areas of
+ * SYSRAM specified in the device tree.
+ */
+static const struct sram_config tegra_sysram_config = {
+ .map_only_reserved = true,
+};
+
static const struct of_device_id sram_dt_ids[] = {
{ .compatible = "mmio-sram" },
- { .compatible = "atmel,sama5d2-securam", .data = atmel_securam_wait },
+ { .compatible = "atmel,sama5d2-securam", .data = &atmel_securam_config },
+ { .compatible = "nvidia,tegra186-sysram", .data = &tegra_sysram_config },
+ { .compatible = "nvidia,tegra194-sysram", .data = &tegra_sysram_config },
{}
};
static int sram_probe(struct platform_device *pdev)
{
+ const struct sram_config *config;
struct sram_dev *sram;
int ret;
struct resource *res;
- int (*init_func)(void);
+
+ config = of_device_get_match_data(&pdev->dev);
sram = devm_kzalloc(&pdev->dev, sizeof(*sram), GFP_KERNEL);
if (!sram)
return -ENOMEM;
sram->dev = &pdev->dev;
+ sram->no_memory_wc = of_property_read_bool(pdev->dev.of_node, "no-memory-wc");
+ sram->config = config;
+
+ if (!config || !config->map_only_reserved) {
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (sram->no_memory_wc)
+ sram->virt_base = devm_ioremap_resource(&pdev->dev, res);
+ else
+ sram->virt_base = devm_ioremap_resource_wc(&pdev->dev, res);
+ if (IS_ERR(sram->virt_base)) {
+ dev_err(&pdev->dev, "could not map SRAM registers\n");
+ return PTR_ERR(sram->virt_base);
+ }
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (of_property_read_bool(pdev->dev.of_node, "no-memory-wc"))
- sram->virt_base = devm_ioremap_resource(&pdev->dev, res);
- else
- sram->virt_base = devm_ioremap_resource_wc(&pdev->dev, res);
- if (IS_ERR(sram->virt_base)) {
- dev_err(&pdev->dev, "could not map SRAM registers\n");
- return PTR_ERR(sram->virt_base);
+ sram->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY),
+ NUMA_NO_NODE, NULL);
+ if (IS_ERR(sram->pool))
+ return PTR_ERR(sram->pool);
}
- sram->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY),
- NUMA_NO_NODE, NULL);
- if (IS_ERR(sram->pool))
- return PTR_ERR(sram->pool);
-
sram->clk = devm_clk_get(sram->dev, NULL);
if (IS_ERR(sram->clk))
sram->clk = NULL;
@@ -378,15 +421,15 @@ static int sram_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, sram);
- init_func = of_device_get_match_data(&pdev->dev);
- if (init_func) {
- ret = init_func();
+ if (config && config->init) {
+ ret = config->init();
if (ret)
goto err_free_partitions;
}
- dev_dbg(sram->dev, "SRAM pool: %zu KiB @ 0x%p\n",
- gen_pool_size(sram->pool) / 1024, sram->virt_base);
+ if (sram->pool)
+ dev_dbg(sram->dev, "SRAM pool: %zu KiB @ 0x%p\n",
+ gen_pool_size(sram->pool) / 1024, sram->virt_base);
return 0;
@@ -405,7 +448,7 @@ static int sram_remove(struct platform_device *pdev)
sram_free_partitions(sram);
- if (gen_pool_avail(sram->pool) < gen_pool_size(sram->pool))
+ if (sram->pool && gen_pool_avail(sram->pool) < gen_pool_size(sram->pool))
dev_err(sram->dev, "removed while SRAM allocated\n");
if (sram->clk)
diff --git a/drivers/misc/sram.h b/drivers/misc/sram.h
index 9c1d21ff7347..d2058d8c8f1d 100644
--- a/drivers/misc/sram.h
+++ b/drivers/misc/sram.h
@@ -5,6 +5,11 @@
#ifndef __SRAM_H
#define __SRAM_H
+struct sram_config {
+ int (*init)(void);
+ bool map_only_reserved;
+};
+
struct sram_partition {
void __iomem *base;
@@ -15,8 +20,11 @@ struct sram_partition {
};
struct sram_dev {
+ const struct sram_config *config;
+
struct device *dev;
void __iomem *virt_base;
+ bool no_memory_wc;
struct gen_pool *pool;
struct clk *clk;
@@ -29,6 +37,7 @@ struct sram_reserve {
struct list_head list;
u32 start;
u32 size;
+ struct resource res;
bool export;
bool pool;
bool protect_exec;
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index 880c33ab9f47..94ebf7f3fd58 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -2243,7 +2243,8 @@ int vmci_qp_broker_map(struct vmci_handle handle,
result = VMCI_SUCCESS;
- if (context_id != VMCI_HOST_CONTEXT_ID) {
+ if (context_id != VMCI_HOST_CONTEXT_ID &&
+ !QPBROKERSTATE_HAS_MEM(entry)) {
struct vmci_qp_page_store page_store;
page_store.pages = guest_mem;
@@ -2350,7 +2351,8 @@ int vmci_qp_broker_unmap(struct vmci_handle handle,
goto out;
}
- if (context_id != VMCI_HOST_CONTEXT_ID) {
+ if (context_id != VMCI_HOST_CONTEXT_ID &&
+ QPBROKERSTATE_HAS_MEM(entry)) {
qp_acquire_queue_mutex(entry->produce_q);
result = qp_save_headers(entry);
if (result < VMCI_SUCCESS)
diff --git a/drivers/mmc/core/Kconfig b/drivers/mmc/core/Kconfig
index ae8b69aee619..6f25c34e4fec 100644
--- a/drivers/mmc/core/Kconfig
+++ b/drivers/mmc/core/Kconfig
@@ -15,7 +15,7 @@ config PWRSEQ_EMMC
config PWRSEQ_SD8787
tristate "HW reset support for SD8787 BT + Wifi module"
- depends on OF && (MWIFIEX || BT_MRVL_SDIO || LIBERTAS_SDIO)
+ depends on OF && (MWIFIEX || BT_MRVL_SDIO || LIBERTAS_SDIO || WILC1000_SDIO)
help
This selects hardware reset support for the SD8787 BT + Wifi
module. By default this option is set to n.
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index ce8aed562929..431af5e8be2f 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -98,6 +98,11 @@ static int max_devices;
static DEFINE_IDA(mmc_blk_ida);
static DEFINE_IDA(mmc_rpmb_ida);
+struct mmc_blk_busy_data {
+ struct mmc_card *card;
+ u32 status;
+};
+
/*
* There is one mmc_blk_data per slot.
*/
@@ -128,8 +133,6 @@ struct mmc_blk_data {
* track of the current selected device partition.
*/
unsigned int part_curr;
- struct device_attribute force_ro;
- struct device_attribute power_ro_lock;
int area_type;
/* debugfs files (only in main mmc_blk_data) */
@@ -281,6 +284,9 @@ out_put:
return count;
}
+static DEVICE_ATTR(ro_lock_until_next_power_on, 0,
+ power_ro_lock_show, power_ro_lock_store);
+
static ssize_t force_ro_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -313,6 +319,44 @@ out:
return ret;
}
+static DEVICE_ATTR(force_ro, 0644, force_ro_show, force_ro_store);
+
+static struct attribute *mmc_disk_attrs[] = {
+ &dev_attr_force_ro.attr,
+ &dev_attr_ro_lock_until_next_power_on.attr,
+ NULL,
+};
+
+static umode_t mmc_disk_attrs_is_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+ umode_t mode = a->mode;
+
+ if (a == &dev_attr_ro_lock_until_next_power_on.attr &&
+ (md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
+ md->queue.card->ext_csd.boot_ro_lockable) {
+ mode = S_IRUGO;
+ if (!(md->queue.card->ext_csd.boot_ro_lock &
+ EXT_CSD_BOOT_WP_B_PWR_WP_DIS))
+ mode |= S_IWUSR;
+ }
+
+ mmc_blk_put(md);
+ return mode;
+}
+
+static const struct attribute_group mmc_disk_attr_group = {
+ .is_visible = mmc_disk_attrs_is_visible,
+ .attrs = mmc_disk_attrs,
+};
+
+static const struct attribute_group *mmc_disk_attr_groups[] = {
+ &mmc_disk_attr_group,
+ NULL,
+};
+
static int mmc_blk_open(struct block_device *bdev, fmode_t mode)
{
struct mmc_blk_data *md = mmc_blk_get(bdev->bd_disk);
@@ -417,42 +461,6 @@ static int mmc_blk_ioctl_copy_to_user(struct mmc_ioc_cmd __user *ic_ptr,
return 0;
}
-static int card_busy_detect(struct mmc_card *card, unsigned int timeout_ms,
- u32 *resp_errs)
-{
- unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
- int err = 0;
- u32 status;
-
- do {
- bool done = time_after(jiffies, timeout);
-
- err = __mmc_send_status(card, &status, 5);
- if (err) {
- dev_err(mmc_dev(card->host),
- "error %d requesting status\n", err);
- return err;
- }
-
- /* Accumulate any response error bits seen */
- if (resp_errs)
- *resp_errs |= status;
-
- /*
- * Timeout if the device never becomes ready for data and never
- * leaves the program state.
- */
- if (done) {
- dev_err(mmc_dev(card->host),
- "Card stuck in wrong state! %s status: %#x\n",
- __func__, status);
- return -ETIMEDOUT;
- }
- } while (!mmc_ready_for_data(status));
-
- return err;
-}
-
static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md,
struct mmc_blk_ioc_data *idata)
{
@@ -549,6 +557,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md,
return mmc_sanitize(card, idata->ic.cmd_timeout_ms);
mmc_wait_for_req(card->host, &mrq);
+ memcpy(&idata->ic.response, cmd.resp, sizeof(cmd.resp));
if (cmd.error) {
dev_err(mmc_dev(card->host), "%s: cmd error %d\n",
@@ -598,14 +607,13 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md,
if (idata->ic.postsleep_min_us)
usleep_range(idata->ic.postsleep_min_us, idata->ic.postsleep_max_us);
- memcpy(&(idata->ic.response), cmd.resp, sizeof(cmd.resp));
-
if (idata->rpmb || (cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B) {
/*
* Ensure RPMB/R1B command has completed by polling CMD13
* "Send Status".
*/
- err = card_busy_detect(card, MMC_BLK_TIMEOUT_MS, NULL);
+ err = mmc_poll_for_busy(card, MMC_BLK_TIMEOUT_MS, false,
+ MMC_BUSY_IO);
}
return err;
@@ -792,6 +800,26 @@ static int mmc_blk_compat_ioctl(struct block_device *bdev, fmode_t mode,
}
#endif
+static int mmc_blk_alternative_gpt_sector(struct gendisk *disk,
+ sector_t *sector)
+{
+ struct mmc_blk_data *md;
+ int ret;
+
+ md = mmc_blk_get(disk);
+ if (!md)
+ return -EINVAL;
+
+ if (md->queue.card)
+ ret = mmc_card_alternative_gpt_sector(md->queue.card, sector);
+ else
+ ret = -ENODEV;
+
+ mmc_blk_put(md);
+
+ return ret;
+}
+
static const struct block_device_operations mmc_bdops = {
.open = mmc_blk_open,
.release = mmc_blk_release,
@@ -801,6 +829,7 @@ static const struct block_device_operations mmc_bdops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = mmc_blk_compat_ioctl,
#endif
+ .alternative_gpt_sector = mmc_blk_alternative_gpt_sector,
};
static int mmc_blk_part_switch_pre(struct mmc_card *card,
@@ -1636,7 +1665,7 @@ static int mmc_blk_fix_state(struct mmc_card *card, struct request *req)
mmc_blk_send_stop(card, timeout);
- err = card_busy_detect(card, timeout, NULL);
+ err = mmc_poll_for_busy(card, timeout, false, MMC_BUSY_IO);
mmc_retune_release(card->host);
@@ -1851,28 +1880,48 @@ static inline bool mmc_blk_rq_error(struct mmc_blk_request *brq)
brq->data.error || brq->cmd.resp[0] & CMD_ERRORS;
}
+static int mmc_blk_busy_cb(void *cb_data, bool *busy)
+{
+ struct mmc_blk_busy_data *data = cb_data;
+ u32 status = 0;
+ int err;
+
+ err = mmc_send_status(data->card, &status);
+ if (err)
+ return err;
+
+ /* Accumulate response error bits. */
+ data->status |= status;
+
+ *busy = !mmc_ready_for_data(status);
+ return 0;
+}
+
static int mmc_blk_card_busy(struct mmc_card *card, struct request *req)
{
struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);
- u32 status = 0;
+ struct mmc_blk_busy_data cb_data;
int err;
if (mmc_host_is_spi(card->host) || rq_data_dir(req) == READ)
return 0;
- err = card_busy_detect(card, MMC_BLK_TIMEOUT_MS, &status);
+ cb_data.card = card;
+ cb_data.status = 0;
+ err = __mmc_poll_for_busy(card, MMC_BLK_TIMEOUT_MS, &mmc_blk_busy_cb,
+ &cb_data);
/*
* Do not assume data transferred correctly if there are any error bits
* set.
*/
- if (status & mmc_blk_stop_err_bits(&mqrq->brq)) {
+ if (cb_data.status & mmc_blk_stop_err_bits(&mqrq->brq)) {
mqrq->brq.data.bytes_xfered = 0;
err = err ? err : -EIO;
}
/* Copy the exception bit so it will be seen later on */
- if (mmc_card_mmc(card) && status & R1_EXCEPTION_EVENT)
+ if (mmc_card_mmc(card) && cb_data.status & R1_EXCEPTION_EVENT)
mqrq->brq.cmd.resp[0] |= R1_EXCEPTION_EVENT;
return err;
@@ -2289,7 +2338,8 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
sector_t size,
bool default_ro,
const char *subname,
- int area_type)
+ int area_type,
+ unsigned int part_type)
{
struct mmc_blk_data *md;
int devidx, ret;
@@ -2336,6 +2386,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
kref_init(&md->kref);
md->queue.blkdata = md;
+ md->part_type = part_type;
md->disk->major = MMC_BLOCK_MAJOR;
md->disk->minors = perdev_minors;
@@ -2388,6 +2439,10 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
md->disk->disk_name, mmc_card_id(card), mmc_card_name(card),
cap_str, md->read_only ? "(ro)" : "");
+ /* used in ->open, must be set before add_disk: */
+ if (area_type == MMC_BLK_DATA_AREA_MAIN)
+ dev_set_drvdata(&card->dev, md);
+ device_add_disk(md->parent, md->disk, mmc_disk_attr_groups);
return md;
err_kfree:
@@ -2417,7 +2472,7 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
}
return mmc_blk_alloc_req(card, &card->dev, size, false, NULL,
- MMC_BLK_DATA_AREA_MAIN);
+ MMC_BLK_DATA_AREA_MAIN, 0);
}
static int mmc_blk_alloc_part(struct mmc_card *card,
@@ -2431,10 +2486,9 @@ static int mmc_blk_alloc_part(struct mmc_card *card,
struct mmc_blk_data *part_md;
part_md = mmc_blk_alloc_req(card, disk_to_dev(md->disk), size, default_ro,
- subname, area_type);
+ subname, area_type, part_type);
if (IS_ERR(part_md))
return PTR_ERR(part_md);
- part_md->part_type = part_type;
list_add(&part_md->part, &md->part);
return 0;
@@ -2635,27 +2689,13 @@ static int mmc_blk_alloc_parts(struct mmc_card *card, struct mmc_blk_data *md)
static void mmc_blk_remove_req(struct mmc_blk_data *md)
{
- struct mmc_card *card;
-
- if (md) {
- /*
- * Flush remaining requests and free queues. It
- * is freeing the queue that stops new requests
- * from being accepted.
- */
- card = md->queue.card;
- if (md->disk->flags & GENHD_FL_UP) {
- device_remove_file(disk_to_dev(md->disk), &md->force_ro);
- if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
- card->ext_csd.boot_ro_lockable)
- device_remove_file(disk_to_dev(md->disk),
- &md->power_ro_lock);
-
- del_gendisk(md->disk);
- }
- mmc_cleanup_queue(&md->queue);
- mmc_blk_put(md);
- }
+ /*
+ * Flush remaining requests and free queues. It is freeing the queue
+ * that stops new requests from being accepted.
+ */
+ del_gendisk(md->disk);
+ mmc_cleanup_queue(&md->queue);
+ mmc_blk_put(md);
}
static void mmc_blk_remove_parts(struct mmc_card *card,
@@ -2679,51 +2719,6 @@ static void mmc_blk_remove_parts(struct mmc_card *card,
}
}
-static int mmc_add_disk(struct mmc_blk_data *md)
-{
- int ret;
- struct mmc_card *card = md->queue.card;
-
- device_add_disk(md->parent, md->disk, NULL);
- md->force_ro.show = force_ro_show;
- md->force_ro.store = force_ro_store;
- sysfs_attr_init(&md->force_ro.attr);
- md->force_ro.attr.name = "force_ro";
- md->force_ro.attr.mode = S_IRUGO | S_IWUSR;
- ret = device_create_file(disk_to_dev(md->disk), &md->force_ro);
- if (ret)
- goto force_ro_fail;
-
- if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
- card->ext_csd.boot_ro_lockable) {
- umode_t mode;
-
- if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PWR_WP_DIS)
- mode = S_IRUGO;
- else
- mode = S_IRUGO | S_IWUSR;
-
- md->power_ro_lock.show = power_ro_lock_show;
- md->power_ro_lock.store = power_ro_lock_store;
- sysfs_attr_init(&md->power_ro_lock.attr);
- md->power_ro_lock.attr.mode = mode;
- md->power_ro_lock.attr.name =
- "ro_lock_until_next_power_on";
- ret = device_create_file(disk_to_dev(md->disk),
- &md->power_ro_lock);
- if (ret)
- goto power_ro_lock_fail;
- }
- return ret;
-
-power_ro_lock_fail:
- device_remove_file(disk_to_dev(md->disk), &md->force_ro);
-force_ro_fail:
- del_gendisk(md->disk);
-
- return ret;
-}
-
#ifdef CONFIG_DEBUG_FS
static int mmc_dbg_card_status_get(void *data, u64 *val)
@@ -2889,7 +2884,7 @@ static void mmc_blk_remove_debugfs(struct mmc_card *card,
static int mmc_blk_probe(struct mmc_card *card)
{
- struct mmc_blk_data *md, *part_md;
+ struct mmc_blk_data *md;
int ret = 0;
/*
@@ -2917,18 +2912,6 @@ static int mmc_blk_probe(struct mmc_card *card)
if (ret)
goto out;
- dev_set_drvdata(&card->dev, md);
-
- ret = mmc_add_disk(md);
- if (ret)
- goto out;
-
- list_for_each_entry(part_md, &md->part, part) {
- ret = mmc_add_disk(part_md);
- if (ret)
- goto out;
- }
-
/* Add two debugfs entries */
mmc_blk_add_debugfs(card, md);
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 95fedcf56e4a..240c5af793dc 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -936,15 +936,16 @@ int mmc_execute_tuning(struct mmc_card *card)
opcode = MMC_SEND_TUNING_BLOCK;
err = host->ops->execute_tuning(host, opcode);
+ if (!err) {
+ mmc_retune_clear(host);
+ mmc_retune_enable(host);
+ return 0;
+ }
- if (err) {
+ /* Only print error when we don't check for card removal */
+ if (!host->detect_change)
pr_err("%s: tuning execution failed: %d\n",
mmc_hostname(host), err);
- } else {
- host->retune_now = 0;
- host->need_retune = 0;
- mmc_retune_enable(host);
- }
return err;
}
@@ -2149,6 +2150,41 @@ int mmc_detect_card_removed(struct mmc_host *host)
}
EXPORT_SYMBOL(mmc_detect_card_removed);
+int mmc_card_alternative_gpt_sector(struct mmc_card *card, sector_t *gpt_sector)
+{
+ unsigned int boot_sectors_num;
+
+ if ((!(card->host->caps2 & MMC_CAP2_ALT_GPT_TEGRA)))
+ return -EOPNOTSUPP;
+
+ /* filter out unrelated cards */
+ if (card->ext_csd.rev < 3 ||
+ !mmc_card_mmc(card) ||
+ !mmc_card_is_blockaddr(card) ||
+ mmc_card_is_removable(card->host))
+ return -ENOENT;
+
+ /*
+ * eMMC storage has two special boot partitions in addition to the
+ * main one. NVIDIA's bootloader linearizes eMMC boot0->boot1->main
+ * accesses, this means that the partition table addresses are shifted
+ * by the size of boot partitions. In accordance with the eMMC
+ * specification, the boot partition size is calculated as follows:
+ *
+ * boot partition size = 128K byte x BOOT_SIZE_MULT
+ *
+ * Calculate number of sectors occupied by the both boot partitions.
+ */
+ boot_sectors_num = card->ext_csd.raw_boot_mult * SZ_128K /
+ SZ_512 * MMC_NUM_BOOT_PARTITION;
+
+ /* Defined by NVIDIA and used by Android devices. */
+ *gpt_sector = card->ext_csd.sectors - boot_sectors_num - 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(mmc_card_alternative_gpt_sector);
+
void mmc_rescan(struct work_struct *work)
{
struct mmc_host *host =
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 0c4de2030b3f..7931a4f0137d 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -119,6 +119,8 @@ void mmc_release_host(struct mmc_host *host);
void mmc_get_card(struct mmc_card *card, struct mmc_ctx *ctx);
void mmc_put_card(struct mmc_card *card, struct mmc_ctx *ctx);
+int mmc_card_alternative_gpt_sector(struct mmc_card *card, sector_t *sector);
+
/**
* mmc_claim_host - exclusively claim a host
* @host: mmc host to claim
diff --git a/drivers/mmc/core/crypto.c b/drivers/mmc/core/crypto.c
index 419a368f8402..67557808cada 100644
--- a/drivers/mmc/core/crypto.c
+++ b/drivers/mmc/core/crypto.c
@@ -31,18 +31,11 @@ void mmc_crypto_prepare_req(struct mmc_queue_req *mqrq)
struct request *req = mmc_queue_req_to_req(mqrq);
struct mmc_request *mrq = &mqrq->brq.mrq;
- if (!req->crypt_keyslot)
+ if (!req->crypt_ctx)
return;
- mrq->crypto_enabled = true;
- mrq->crypto_key_slot = blk_ksm_get_slot_idx(req->crypt_keyslot);
-
- /*
- * For now we assume that all MMC drivers set max_dun_bytes_supported=4,
- * which is the limit for CQHCI crypto. So all DUNs should be 32-bit.
- */
- WARN_ON_ONCE(req->crypt_ctx->bc_dun[0] > U32_MAX);
-
- mrq->data_unit_num = req->crypt_ctx->bc_dun[0];
+ mrq->crypto_ctx = req->crypt_ctx;
+ if (req->crypt_keyslot)
+ mrq->crypto_key_slot = blk_ksm_get_slot_idx(req->crypt_keyslot);
}
EXPORT_SYMBOL_GPL(mmc_crypto_prepare_req);
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 0475d96047c4..d4683b1d263f 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -96,6 +96,10 @@ void mmc_unregister_host_class(void)
class_unregister(&mmc_host_class);
}
+/**
+ * mmc_retune_enable() - enter a transfer mode that requires retuning
+ * @host: host which should retune now
+ */
void mmc_retune_enable(struct mmc_host *host)
{
host->can_retune = 1;
@@ -127,13 +131,18 @@ void mmc_retune_unpause(struct mmc_host *host)
}
EXPORT_SYMBOL(mmc_retune_unpause);
+/**
+ * mmc_retune_disable() - exit a transfer mode that requires retuning
+ * @host: host which should not retune anymore
+ *
+ * It is not meant for temporarily preventing retuning!
+ */
void mmc_retune_disable(struct mmc_host *host)
{
mmc_retune_unpause(host);
host->can_retune = 0;
del_timer_sync(&host->retune_timer);
- host->retune_now = 0;
- host->need_retune = 0;
+ mmc_retune_clear(host);
}
void mmc_retune_timer_stop(struct mmc_host *host)
diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
index ba407617ed23..48c4952512a5 100644
--- a/drivers/mmc/core/host.h
+++ b/drivers/mmc/core/host.h
@@ -21,6 +21,12 @@ int mmc_retune(struct mmc_host *host);
void mmc_retune_pause(struct mmc_host *host);
void mmc_retune_unpause(struct mmc_host *host);
+static inline void mmc_retune_clear(struct mmc_host *host)
+{
+ host->retune_now = 0;
+ host->need_retune = 0;
+}
+
static inline void mmc_retune_hold_now(struct mmc_host *host)
{
host->retune_now = 0;
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 838726b68ff3..29e58ffae379 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -418,6 +418,8 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd)
ext_csd[EXT_CSD_ERASE_TIMEOUT_MULT];
card->ext_csd.raw_hc_erase_grp_size =
ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE];
+ card->ext_csd.raw_boot_mult =
+ ext_csd[EXT_CSD_BOOT_MULT];
if (card->ext_csd.rev >= 3) {
u8 sa_shift = ext_csd[EXT_CSD_S_A_TIMEOUT];
card->ext_csd.part_config = ext_csd[EXT_CSD_PART_CONFIG];
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 973756ed4016..0c54858e89c0 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -435,7 +435,7 @@ static int mmc_busy_cb(void *cb_data, bool *busy)
u32 status = 0;
int err;
- if (host->ops->card_busy) {
+ if (data->busy_cmd != MMC_BUSY_IO && host->ops->card_busy) {
*busy = host->ops->card_busy(host);
return 0;
}
@@ -457,6 +457,7 @@ static int mmc_busy_cb(void *cb_data, bool *busy)
break;
case MMC_BUSY_HPI:
case MMC_BUSY_EXTR_SINGLE:
+ case MMC_BUSY_IO:
break;
default:
err = -EINVAL;
@@ -509,6 +510,7 @@ int __mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
return 0;
}
+EXPORT_SYMBOL_GPL(__mmc_poll_for_busy);
int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
bool retry_crc_err, enum mmc_busy_cmd busy_cmd)
@@ -521,6 +523,7 @@ int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
return __mmc_poll_for_busy(card, timeout_ms, &mmc_busy_cb, &cb_data);
}
+EXPORT_SYMBOL_GPL(mmc_poll_for_busy);
bool mmc_prepare_busy_cmd(struct mmc_host *host, struct mmc_command *cmd,
unsigned int timeout_ms)
@@ -956,8 +959,15 @@ void mmc_run_bkops(struct mmc_card *card)
*/
err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
EXT_CSD_BKOPS_START, 1, MMC_BKOPS_TIMEOUT_MS);
- if (err)
- pr_warn("%s: Error %d starting bkops\n",
+ /*
+ * If the BKOPS timed out, the card is probably still busy in the
+ * R1_STATE_PRG. Rather than continue to wait, let's try to abort
+ * it with a HPI command to get back into R1_STATE_TRAN.
+ */
+ if (err == -ETIMEDOUT && !mmc_interrupt_hpi(card))
+ pr_warn("%s: BKOPS aborted\n", mmc_hostname(card->host));
+ else if (err)
+ pr_warn("%s: Error %d running bkops\n",
mmc_hostname(card->host), err);
mmc_retune_release(card->host);
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index 41ab4f573a31..ae25ffc2e870 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -15,6 +15,7 @@ enum mmc_busy_cmd {
MMC_BUSY_ERASE,
MMC_BUSY_HPI,
MMC_BUSY_EXTR_SINGLE,
+ MMC_BUSY_IO,
};
struct mmc_host;
diff --git a/drivers/mmc/core/pwrseq_sd8787.c b/drivers/mmc/core/pwrseq_sd8787.c
index 68a826f1c0a1..2e120ad83020 100644
--- a/drivers/mmc/core/pwrseq_sd8787.c
+++ b/drivers/mmc/core/pwrseq_sd8787.c
@@ -14,6 +14,7 @@
#include <linux/kernel.h>
#include <linux/platform_device.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/err.h>
@@ -27,6 +28,7 @@ struct mmc_pwrseq_sd8787 {
struct mmc_pwrseq pwrseq;
struct gpio_desc *reset_gpio;
struct gpio_desc *pwrdn_gpio;
+ u32 reset_pwrdwn_delay_ms;
};
#define to_pwrseq_sd8787(p) container_of(p, struct mmc_pwrseq_sd8787, pwrseq)
@@ -37,7 +39,7 @@ static void mmc_pwrseq_sd8787_pre_power_on(struct mmc_host *host)
gpiod_set_value_cansleep(pwrseq->reset_gpio, 1);
- msleep(300);
+ msleep(pwrseq->reset_pwrdwn_delay_ms);
gpiod_set_value_cansleep(pwrseq->pwrdn_gpio, 1);
}
@@ -54,8 +56,12 @@ static const struct mmc_pwrseq_ops mmc_pwrseq_sd8787_ops = {
.power_off = mmc_pwrseq_sd8787_power_off,
};
+static const u32 sd8787_delay_ms = 300;
+static const u32 wilc1000_delay_ms = 5;
+
static const struct of_device_id mmc_pwrseq_sd8787_of_match[] = {
- { .compatible = "mmc-pwrseq-sd8787",},
+ { .compatible = "mmc-pwrseq-sd8787", .data = &sd8787_delay_ms },
+ { .compatible = "mmc-pwrseq-wilc1000", .data = &wilc1000_delay_ms },
{/* sentinel */},
};
MODULE_DEVICE_TABLE(of, mmc_pwrseq_sd8787_of_match);
@@ -64,11 +70,15 @@ static int mmc_pwrseq_sd8787_probe(struct platform_device *pdev)
{
struct mmc_pwrseq_sd8787 *pwrseq;
struct device *dev = &pdev->dev;
+ const struct of_device_id *match;
pwrseq = devm_kzalloc(dev, sizeof(*pwrseq), GFP_KERNEL);
if (!pwrseq)
return -ENOMEM;
+ match = of_match_node(mmc_pwrseq_sd8787_of_match, pdev->dev.of_node);
+ pwrseq->reset_pwrdwn_delay_ms = *(u32 *)match->data;
+
pwrseq->pwrdn_gpio = devm_gpiod_get(dev, "powerdown", GPIOD_OUT_LOW);
if (IS_ERR(pwrseq->pwrdn_gpio))
return PTR_ERR(pwrseq->pwrdn_gpio);
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index cc3261777637..b15c034b42fb 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -163,7 +163,7 @@ static void mmc_mq_recovery_handler(struct work_struct *work)
blk_mq_run_hw_queues(q, true);
}
-static struct scatterlist *mmc_alloc_sg(int sg_len, gfp_t gfp)
+static struct scatterlist *mmc_alloc_sg(unsigned short sg_len, gfp_t gfp)
{
struct scatterlist *sg;
@@ -193,33 +193,29 @@ static void mmc_queue_setup_discard(struct request_queue *q,
blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
}
-static unsigned int mmc_get_max_segments(struct mmc_host *host)
+static unsigned short mmc_get_max_segments(struct mmc_host *host)
{
return host->can_dma_map_merge ? MMC_DMA_MAP_MERGE_SEGMENTS :
host->max_segs;
}
-/**
- * mmc_init_request() - initialize the MMC-specific per-request data
- * @mq: the request queue
- * @req: the request
- * @gfp: memory allocation policy
- */
-static int __mmc_init_request(struct mmc_queue *mq, struct request *req,
- gfp_t gfp)
+static int mmc_mq_init_request(struct blk_mq_tag_set *set, struct request *req,
+ unsigned int hctx_idx, unsigned int numa_node)
{
struct mmc_queue_req *mq_rq = req_to_mmc_queue_req(req);
+ struct mmc_queue *mq = set->driver_data;
struct mmc_card *card = mq->card;
struct mmc_host *host = card->host;
- mq_rq->sg = mmc_alloc_sg(mmc_get_max_segments(host), gfp);
+ mq_rq->sg = mmc_alloc_sg(mmc_get_max_segments(host), GFP_KERNEL);
if (!mq_rq->sg)
return -ENOMEM;
return 0;
}
-static void mmc_exit_request(struct request_queue *q, struct request *req)
+static void mmc_mq_exit_request(struct blk_mq_tag_set *set, struct request *req,
+ unsigned int hctx_idx)
{
struct mmc_queue_req *mq_rq = req_to_mmc_queue_req(req);
@@ -227,20 +223,6 @@ static void mmc_exit_request(struct request_queue *q, struct request *req)
mq_rq->sg = NULL;
}
-static int mmc_mq_init_request(struct blk_mq_tag_set *set, struct request *req,
- unsigned int hctx_idx, unsigned int numa_node)
-{
- return __mmc_init_request(set->driver_data, req, GFP_KERNEL);
-}
-
-static void mmc_mq_exit_request(struct blk_mq_tag_set *set, struct request *req,
- unsigned int hctx_idx)
-{
- struct mmc_queue *mq = set->driver_data;
-
- mmc_exit_request(mq->queue, req);
-}
-
static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
diff --git a/drivers/mmc/core/sdio_cis.c b/drivers/mmc/core/sdio_cis.c
index b23773583179..a705ba6eff5b 100644
--- a/drivers/mmc/core/sdio_cis.c
+++ b/drivers/mmc/core/sdio_cis.c
@@ -330,13 +330,25 @@ static int sdio_read_cis(struct mmc_card *card, struct sdio_func *func)
prev = &this->next;
if (ret == -ENOENT) {
+
if (time_after(jiffies, timeout))
break;
- /* warn about unknown tuples */
- pr_warn_ratelimited("%s: queuing unknown"
- " CIS tuple 0x%02x (%u bytes)\n",
- mmc_hostname(card->host),
- tpl_code, tpl_link);
+
+#define FMT(type) "%s: queuing " type " CIS tuple 0x%02x [%*ph] (%u bytes)\n"
+ /*
+ * Tuples in this range are reserved for
+ * vendors, so don't warn about them
+ */
+ if (tpl_code >= 0x80 && tpl_code <= 0x8f)
+ pr_debug_ratelimited(FMT("vendor"),
+ mmc_hostname(card->host),
+ tpl_code, tpl_link, this->data,
+ tpl_link);
+ else
+ pr_warn_ratelimited(FMT("unknown"),
+ mmc_hostname(card->host),
+ tpl_code, tpl_link, this->data,
+ tpl_link);
}
/* keep on analyzing tuples */
diff --git a/drivers/mmc/host/cqhci-crypto.h b/drivers/mmc/host/cqhci-crypto.h
index 60b58ee0e625..d7fb084f563b 100644
--- a/drivers/mmc/host/cqhci-crypto.h
+++ b/drivers/mmc/host/cqhci-crypto.h
@@ -22,12 +22,15 @@ int cqhci_crypto_init(struct cqhci_host *host);
*/
static inline u64 cqhci_crypto_prep_task_desc(struct mmc_request *mrq)
{
- if (!mrq->crypto_enabled)
+ if (!mrq->crypto_ctx)
return 0;
+ /* We set max_dun_bytes_supported=4, so all DUNs should be 32-bit. */
+ WARN_ON_ONCE(mrq->crypto_ctx->bc_dun[0] > U32_MAX);
+
return CQHCI_CRYPTO_ENABLE_BIT |
CQHCI_CRYPTO_KEYSLOT(mrq->crypto_key_slot) |
- mrq->data_unit_num;
+ mrq->crypto_ctx->bc_dun[0];
}
#else /* CONFIG_MMC_CRYPTO */
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index d333130d1531..6578cc64ae9e 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -17,9 +17,11 @@
#include <linux/interrupt.h>
#include <linux/iopoll.h>
#include <linux/ioport.h>
+#include <linux/ktime.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
+#include <linux/prandom.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/stat.h>
@@ -181,6 +183,9 @@ static void dw_mci_init_debugfs(struct dw_mci_slot *slot)
&host->pending_events);
debugfs_create_xul("completed_events", S_IRUSR, root,
&host->completed_events);
+#ifdef CONFIG_FAULT_INJECTION
+ fault_create_debugfs_attr("fail_data_crc", root, &host->fail_data_crc);
+#endif
}
#endif /* defined(CONFIG_DEBUG_FS) */
@@ -782,6 +787,7 @@ static int dw_mci_edmac_start_dma(struct dw_mci *host,
int ret = 0;
/* Set external dma config: burst size, burst width */
+ memset(&cfg, 0, sizeof(cfg));
cfg.dst_addr = host->phy_regs + fifo_offset;
cfg.src_addr = cfg.dst_addr;
cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
@@ -1788,6 +1794,68 @@ static const struct mmc_host_ops dw_mci_ops = {
.prepare_hs400_tuning = dw_mci_prepare_hs400_tuning,
};
+#ifdef CONFIG_FAULT_INJECTION
+static enum hrtimer_restart dw_mci_fault_timer(struct hrtimer *t)
+{
+ struct dw_mci *host = container_of(t, struct dw_mci, fault_timer);
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->irq_lock, flags);
+
+ if (!host->data_status)
+ host->data_status = SDMMC_INT_DCRC;
+ set_bit(EVENT_DATA_ERROR, &host->pending_events);
+ tasklet_schedule(&host->tasklet);
+
+ spin_unlock_irqrestore(&host->irq_lock, flags);
+
+ return HRTIMER_NORESTART;
+}
+
+static void dw_mci_start_fault_timer(struct dw_mci *host)
+{
+ struct mmc_data *data = host->data;
+
+ if (!data || data->blocks <= 1)
+ return;
+
+ if (!should_fail(&host->fail_data_crc, 1))
+ return;
+
+ /*
+ * Try to inject the error at random points during the data transfer.
+ */
+ hrtimer_start(&host->fault_timer,
+ ms_to_ktime(prandom_u32() % 25),
+ HRTIMER_MODE_REL);
+}
+
+static void dw_mci_stop_fault_timer(struct dw_mci *host)
+{
+ hrtimer_cancel(&host->fault_timer);
+}
+
+static void dw_mci_init_fault(struct dw_mci *host)
+{
+ host->fail_data_crc = (struct fault_attr) FAULT_ATTR_INITIALIZER;
+
+ hrtimer_init(&host->fault_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ host->fault_timer.function = dw_mci_fault_timer;
+}
+#else
+static void dw_mci_init_fault(struct dw_mci *host)
+{
+}
+
+static void dw_mci_start_fault_timer(struct dw_mci *host)
+{
+}
+
+static void dw_mci_stop_fault_timer(struct dw_mci *host)
+{
+}
+#endif
+
static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq)
__releases(&host->lock)
__acquires(&host->lock)
@@ -2018,8 +2086,8 @@ static void dw_mci_tasklet_func(struct tasklet_struct *t)
continue;
}
- dw_mci_stop_dma(host);
send_stop_abort(host, data);
+ dw_mci_stop_dma(host);
state = STATE_SENDING_STOP;
break;
}
@@ -2043,10 +2111,10 @@ static void dw_mci_tasklet_func(struct tasklet_struct *t)
*/
if (test_and_clear_bit(EVENT_DATA_ERROR,
&host->pending_events)) {
- dw_mci_stop_dma(host);
if (!(host->data_status & (SDMMC_INT_DRTO |
SDMMC_INT_EBE)))
send_stop_abort(host, data);
+ dw_mci_stop_dma(host);
state = STATE_DATA_ERROR;
break;
}
@@ -2079,10 +2147,10 @@ static void dw_mci_tasklet_func(struct tasklet_struct *t)
*/
if (test_and_clear_bit(EVENT_DATA_ERROR,
&host->pending_events)) {
- dw_mci_stop_dma(host);
if (!(host->data_status & (SDMMC_INT_DRTO |
SDMMC_INT_EBE)))
send_stop_abort(host, data);
+ dw_mci_stop_dma(host);
state = STATE_DATA_ERROR;
break;
}
@@ -2102,6 +2170,7 @@ static void dw_mci_tasklet_func(struct tasklet_struct *t)
break;
}
+ dw_mci_stop_fault_timer(host);
host->data = NULL;
set_bit(EVENT_DATA_COMPLETE, &host->completed_events);
err = dw_mci_data_complete(host, data);
@@ -2151,6 +2220,7 @@ static void dw_mci_tasklet_func(struct tasklet_struct *t)
if (mrq->cmd->error && mrq->data)
dw_mci_reset(host);
+ dw_mci_stop_fault_timer(host);
host->cmd = NULL;
host->data = NULL;
@@ -2600,6 +2670,8 @@ static void dw_mci_cmd_interrupt(struct dw_mci *host, u32 status)
set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
tasklet_schedule(&host->tasklet);
+
+ dw_mci_start_fault_timer(host);
}
static void dw_mci_handle_cd(struct dw_mci *host)
@@ -3223,6 +3295,8 @@ int dw_mci_probe(struct dw_mci *host)
spin_lock_init(&host->irq_lock);
INIT_LIST_HEAD(&host->queue);
+ dw_mci_init_fault(host);
+
/*
* Get the host data width - this assumes that HCON has been set with
* the correct values.
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index da5923a92e60..ce05d81477d9 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -14,6 +14,8 @@
#include <linux/mmc/core.h>
#include <linux/dmaengine.h>
#include <linux/reset.h>
+#include <linux/fault-inject.h>
+#include <linux/hrtimer.h>
#include <linux/interrupt.h>
enum dw_mci_state {
@@ -230,6 +232,11 @@ struct dw_mci {
struct timer_list cmd11_timer;
struct timer_list cto_timer;
struct timer_list dto_timer;
+
+#ifdef CONFIG_FAULT_INJECTION
+ struct fault_attr fail_data_crc;
+ struct hrtimer fault_timer;
+#endif
};
/* DMA ops for Internal/External DMAC interface */
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 65c65bb5737f..a1bcde3395a6 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -180,7 +180,7 @@ static int mmc_spi_skip(struct mmc_spi_host *host, unsigned long timeout,
u8 *cp = host->data->status;
unsigned long start = jiffies;
- while (1) {
+ do {
int status;
unsigned i;
@@ -193,16 +193,9 @@ static int mmc_spi_skip(struct mmc_spi_host *host, unsigned long timeout,
return cp[i];
}
- if (time_is_before_jiffies(start + timeout))
- break;
-
- /* If we need long timeouts, we may release the CPU.
- * We use jiffies here because we want to have a relation
- * between elapsed time and the blocking of the scheduler.
- */
- if (time_is_before_jiffies(start + 1))
- schedule();
- }
+ /* If we need long timeouts, we may release the CPU */
+ cond_resched();
+ } while (time_is_after_jiffies(start + timeout));
return -ETIMEDOUT;
}
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 984d35055156..3765e2f4ad98 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -2126,6 +2126,9 @@ static int mmci_probe(struct amba_device *dev,
ret = PTR_ERR(host->rst);
goto clk_disable;
}
+ ret = reset_control_deassert(host->rst);
+ if (ret)
+ dev_err(mmc_dev(mmc), "failed to de-assert reset\n");
/* Get regulators and the supported OCR mask */
ret = mmc_regulator_get_supply(mmc);
diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c
index 51db30acf4dc..fdaa11f92fe6 100644
--- a/drivers/mmc/host/mmci_stm32_sdmmc.c
+++ b/drivers/mmc/host/mmci_stm32_sdmmc.c
@@ -479,8 +479,9 @@ static int sdmmc_post_sig_volt_switch(struct mmci_host *host,
u32 status;
int ret = 0;
- if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180) {
- spin_lock_irqsave(&host->lock, flags);
+ spin_lock_irqsave(&host->lock, flags);
+ if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180 &&
+ host->pwr_reg & MCI_STM32_VSWITCHEN) {
mmci_write_pwrreg(host, host->pwr_reg | MCI_STM32_VSWITCH);
spin_unlock_irqrestore(&host->lock, flags);
@@ -492,9 +493,11 @@ static int sdmmc_post_sig_volt_switch(struct mmci_host *host,
writel_relaxed(MCI_STM32_VSWENDC | MCI_STM32_CKSTOPC,
host->base + MMCICLEAR);
+ spin_lock_irqsave(&host->lock, flags);
mmci_write_pwrreg(host, host->pwr_reg &
~(MCI_STM32_VSWITCHEN | MCI_STM32_VSWITCH));
}
+ spin_unlock_irqrestore(&host->lock, flags);
return ret;
}
diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
index bde298887579..6c9d38132f74 100644
--- a/drivers/mmc/host/moxart-mmc.c
+++ b/drivers/mmc/host/moxart-mmc.c
@@ -628,6 +628,7 @@ static int moxart_probe(struct platform_device *pdev)
host->dma_chan_tx, host->dma_chan_rx);
host->have_dma = true;
+ memset(&cfg, 0, sizeof(cfg));
cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
diff --git a/drivers/mmc/host/renesas_sdhi.h b/drivers/mmc/host/renesas_sdhi.h
index 53eded81a53e..0c45e82ff0de 100644
--- a/drivers/mmc/host/renesas_sdhi.h
+++ b/drivers/mmc/host/renesas_sdhi.h
@@ -42,6 +42,11 @@ struct renesas_sdhi_quirks {
const u8 (*hs400_calib_table)[SDHI_CALIB_TABLE_MAX];
};
+struct renesas_sdhi_of_data_with_quirks {
+ const struct renesas_sdhi_of_data *of_data;
+ const struct renesas_sdhi_quirks *quirks;
+};
+
struct tmio_mmc_dma {
enum dma_slave_buswidth dma_buswidth;
bool (*filter)(struct dma_chan *chan, void *arg);
@@ -78,6 +83,8 @@ struct renesas_sdhi {
container_of((host)->pdata, struct renesas_sdhi, mmc_data)
int renesas_sdhi_probe(struct platform_device *pdev,
- const struct tmio_mmc_dma_ops *dma_ops);
+ const struct tmio_mmc_dma_ops *dma_ops,
+ const struct renesas_sdhi_of_data *of_data,
+ const struct renesas_sdhi_quirks *quirks);
int renesas_sdhi_remove(struct platform_device *pdev);
#endif
diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index e49ca0f7fe9a..6fc4cf3c9dce 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -305,27 +305,6 @@ static int renesas_sdhi_start_signal_voltage_switch(struct mmc_host *mmc,
#define SH_MOBILE_SDHI_SCC_TMPPORT_CALIB_CODE_MASK 0x1f
#define SH_MOBILE_SDHI_SCC_TMPPORT_MANUAL_MODE BIT(7)
-static const u8 r8a7796_es13_calib_table[2][SDHI_CALIB_TABLE_MAX] = {
- { 3, 3, 3, 3, 3, 3, 3, 4, 4, 5, 6, 7, 8, 9, 10, 15,
- 16, 16, 16, 16, 16, 16, 17, 18, 18, 19, 20, 21, 22, 23, 24, 25 },
- { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 8, 11,
- 12, 17, 18, 18, 18, 18, 18, 18, 18, 19, 20, 21, 22, 23, 25, 25 }
-};
-
-static const u8 r8a77965_calib_table[2][SDHI_CALIB_TABLE_MAX] = {
- { 1, 2, 6, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15, 16,
- 17, 18, 19, 20, 21, 22, 23, 24, 25, 25, 26, 27, 28, 29, 30, 31 },
- { 2, 3, 4, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17,
- 17, 17, 20, 21, 22, 23, 24, 25, 27, 28, 29, 30, 31, 31, 31, 31 }
-};
-
-static const u8 r8a77990_calib_table[2][SDHI_CALIB_TABLE_MAX] = {
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- { 0, 0, 0, 1, 2, 3, 3, 4, 4, 4, 5, 5, 6, 8, 9, 10,
- 11, 12, 13, 15, 16, 17, 17, 18, 18, 19, 20, 22, 24, 25, 26, 26 }
-};
-
static inline u32 sd_scc_read32(struct tmio_mmc_host *host,
struct renesas_sdhi *priv, int addr)
{
@@ -895,69 +874,12 @@ static void renesas_sdhi_enable_dma(struct tmio_mmc_host *host, bool enable)
renesas_sdhi_sdbuf_width(host, enable ? width : 16);
}
-static const struct renesas_sdhi_quirks sdhi_quirks_4tap_nohs400 = {
- .hs400_disabled = true,
- .hs400_4taps = true,
-};
-
-static const struct renesas_sdhi_quirks sdhi_quirks_4tap = {
- .hs400_4taps = true,
- .hs400_bad_taps = BIT(2) | BIT(3) | BIT(6) | BIT(7),
-};
-
-static const struct renesas_sdhi_quirks sdhi_quirks_nohs400 = {
- .hs400_disabled = true,
-};
-
-static const struct renesas_sdhi_quirks sdhi_quirks_bad_taps1357 = {
- .hs400_bad_taps = BIT(1) | BIT(3) | BIT(5) | BIT(7),
-};
-
-static const struct renesas_sdhi_quirks sdhi_quirks_bad_taps2367 = {
- .hs400_bad_taps = BIT(2) | BIT(3) | BIT(6) | BIT(7),
-};
-
-static const struct renesas_sdhi_quirks sdhi_quirks_r8a7796_es13 = {
- .hs400_4taps = true,
- .hs400_bad_taps = BIT(2) | BIT(3) | BIT(6) | BIT(7),
- .hs400_calib_table = r8a7796_es13_calib_table,
-};
-
-static const struct renesas_sdhi_quirks sdhi_quirks_r8a77965 = {
- .hs400_bad_taps = BIT(2) | BIT(3) | BIT(6) | BIT(7),
- .hs400_calib_table = r8a77965_calib_table,
-};
-
-static const struct renesas_sdhi_quirks sdhi_quirks_r8a77990 = {
- .hs400_calib_table = r8a77990_calib_table,
-};
-
-/*
- * Note for r8a7796 / r8a774a1: we can't distinguish ES1.1 and 1.2 as of now.
- * So, we want to treat them equally and only have a match for ES1.2 to enforce
- * this if there ever will be a way to distinguish ES1.2.
- */
-static const struct soc_device_attribute sdhi_quirks_match[] = {
- { .soc_id = "r8a774a1", .revision = "ES1.[012]", .data = &sdhi_quirks_4tap_nohs400 },
- { .soc_id = "r8a7795", .revision = "ES1.*", .data = &sdhi_quirks_4tap_nohs400 },
- { .soc_id = "r8a7795", .revision = "ES2.0", .data = &sdhi_quirks_4tap },
- { .soc_id = "r8a7795", .revision = "ES3.*", .data = &sdhi_quirks_bad_taps2367 },
- { .soc_id = "r8a7796", .revision = "ES1.[012]", .data = &sdhi_quirks_4tap_nohs400 },
- { .soc_id = "r8a7796", .revision = "ES1.*", .data = &sdhi_quirks_r8a7796_es13 },
- { .soc_id = "r8a77961", .data = &sdhi_quirks_bad_taps1357 },
- { .soc_id = "r8a77965", .data = &sdhi_quirks_r8a77965 },
- { .soc_id = "r8a77980", .data = &sdhi_quirks_nohs400 },
- { .soc_id = "r8a77990", .data = &sdhi_quirks_r8a77990 },
- { /* Sentinel. */ },
-};
-
int renesas_sdhi_probe(struct platform_device *pdev,
- const struct tmio_mmc_dma_ops *dma_ops)
+ const struct tmio_mmc_dma_ops *dma_ops,
+ const struct renesas_sdhi_of_data *of_data,
+ const struct renesas_sdhi_quirks *quirks)
{
struct tmio_mmc_data *mmd = pdev->dev.platform_data;
- const struct renesas_sdhi_quirks *quirks = NULL;
- const struct renesas_sdhi_of_data *of_data;
- const struct soc_device_attribute *attr;
struct tmio_mmc_data *mmc_data;
struct tmio_mmc_dma *dma_priv;
struct tmio_mmc_host *host;
@@ -966,12 +888,6 @@ int renesas_sdhi_probe(struct platform_device *pdev,
struct resource *res;
u16 ver;
- of_data = of_device_get_match_data(&pdev->dev);
-
- attr = soc_device_match(sdhi_quirks_match);
- if (attr)
- quirks = attr->data;
-
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res)
return -EINVAL;
diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
index e8f4863d8f1a..7660f7ea74dd 100644
--- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
@@ -15,6 +15,7 @@
#include <linux/mmc/host.h>
#include <linux/mod_devicetable.h>
#include <linux/module.h>
+#include <linux/of_device.h>
#include <linux/pagemap.h>
#include <linux/scatterlist.h>
#include <linux/sys_soc.h>
@@ -92,7 +93,7 @@ static struct renesas_sdhi_scc rcar_gen3_scc_taps[] = {
},
};
-static const struct renesas_sdhi_of_data of_rza2_compatible = {
+static const struct renesas_sdhi_of_data of_data_rza2 = {
.tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_CLK_ACTUAL |
TMIO_MMC_HAVE_CBSY,
.tmio_ocr_mask = MMC_VDD_32_33,
@@ -107,7 +108,11 @@ static const struct renesas_sdhi_of_data of_rza2_compatible = {
.max_segs = 1,
};
-static const struct renesas_sdhi_of_data of_rcar_gen3_compatible = {
+static const struct renesas_sdhi_of_data_with_quirks of_rza2_compatible = {
+ .of_data = &of_data_rza2,
+};
+
+static const struct renesas_sdhi_of_data of_data_rcar_gen3 = {
.tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_CLK_ACTUAL |
TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2,
.capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
@@ -122,11 +127,116 @@ static const struct renesas_sdhi_of_data of_rcar_gen3_compatible = {
.max_segs = 1,
};
+static const u8 r8a7796_es13_calib_table[2][SDHI_CALIB_TABLE_MAX] = {
+ { 3, 3, 3, 3, 3, 3, 3, 4, 4, 5, 6, 7, 8, 9, 10, 15,
+ 16, 16, 16, 16, 16, 16, 17, 18, 18, 19, 20, 21, 22, 23, 24, 25 },
+ { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 8, 11,
+ 12, 17, 18, 18, 18, 18, 18, 18, 18, 19, 20, 21, 22, 23, 25, 25 }
+};
+
+static const u8 r8a77965_calib_table[2][SDHI_CALIB_TABLE_MAX] = {
+ { 1, 2, 6, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 25, 26, 27, 28, 29, 30, 31 },
+ { 2, 3, 4, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 17, 17, 20, 21, 22, 23, 24, 25, 27, 28, 29, 30, 31, 31, 31, 31 }
+};
+
+static const u8 r8a77990_calib_table[2][SDHI_CALIB_TABLE_MAX] = {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 1, 2, 3, 3, 4, 4, 4, 5, 5, 6, 8, 9, 10,
+ 11, 12, 13, 15, 16, 17, 17, 18, 18, 19, 20, 22, 24, 25, 26, 26 }
+};
+
+static const struct renesas_sdhi_quirks sdhi_quirks_4tap_nohs400 = {
+ .hs400_disabled = true,
+ .hs400_4taps = true,
+};
+
+static const struct renesas_sdhi_quirks sdhi_quirks_4tap = {
+ .hs400_4taps = true,
+ .hs400_bad_taps = BIT(2) | BIT(3) | BIT(6) | BIT(7),
+};
+
+static const struct renesas_sdhi_quirks sdhi_quirks_nohs400 = {
+ .hs400_disabled = true,
+};
+
+static const struct renesas_sdhi_quirks sdhi_quirks_bad_taps1357 = {
+ .hs400_bad_taps = BIT(1) | BIT(3) | BIT(5) | BIT(7),
+};
+
+static const struct renesas_sdhi_quirks sdhi_quirks_bad_taps2367 = {
+ .hs400_bad_taps = BIT(2) | BIT(3) | BIT(6) | BIT(7),
+};
+
+static const struct renesas_sdhi_quirks sdhi_quirks_r8a7796_es13 = {
+ .hs400_4taps = true,
+ .hs400_bad_taps = BIT(2) | BIT(3) | BIT(6) | BIT(7),
+ .hs400_calib_table = r8a7796_es13_calib_table,
+};
+
+static const struct renesas_sdhi_quirks sdhi_quirks_r8a77965 = {
+ .hs400_bad_taps = BIT(2) | BIT(3) | BIT(6) | BIT(7),
+ .hs400_calib_table = r8a77965_calib_table,
+};
+
+static const struct renesas_sdhi_quirks sdhi_quirks_r8a77990 = {
+ .hs400_calib_table = r8a77990_calib_table,
+};
+
+/*
+ * Note for r8a7796 / r8a774a1: we can't distinguish ES1.1 and 1.2 as of now.
+ * So, we want to treat them equally and only have a match for ES1.2 to enforce
+ * this if there ever will be a way to distinguish ES1.2.
+ */
+static const struct soc_device_attribute sdhi_quirks_match[] = {
+ { .soc_id = "r8a774a1", .revision = "ES1.[012]", .data = &sdhi_quirks_4tap_nohs400 },
+ { .soc_id = "r8a7795", .revision = "ES1.*", .data = &sdhi_quirks_4tap_nohs400 },
+ { .soc_id = "r8a7795", .revision = "ES2.0", .data = &sdhi_quirks_4tap },
+ { .soc_id = "r8a7796", .revision = "ES1.[012]", .data = &sdhi_quirks_4tap_nohs400 },
+ { .soc_id = "r8a7796", .revision = "ES1.*", .data = &sdhi_quirks_r8a7796_es13 },
+ { /* Sentinel. */ },
+};
+
+static const struct renesas_sdhi_of_data_with_quirks of_r8a7795_compatible = {
+ .of_data = &of_data_rcar_gen3,
+ .quirks = &sdhi_quirks_bad_taps2367,
+};
+
+static const struct renesas_sdhi_of_data_with_quirks of_r8a77961_compatible = {
+ .of_data = &of_data_rcar_gen3,
+ .quirks = &sdhi_quirks_bad_taps1357,
+};
+
+static const struct renesas_sdhi_of_data_with_quirks of_r8a77965_compatible = {
+ .of_data = &of_data_rcar_gen3,
+ .quirks = &sdhi_quirks_r8a77965,
+};
+
+static const struct renesas_sdhi_of_data_with_quirks of_r8a77980_compatible = {
+ .of_data = &of_data_rcar_gen3,
+ .quirks = &sdhi_quirks_nohs400,
+};
+
+static const struct renesas_sdhi_of_data_with_quirks of_r8a77990_compatible = {
+ .of_data = &of_data_rcar_gen3,
+ .quirks = &sdhi_quirks_r8a77990,
+};
+
+static const struct renesas_sdhi_of_data_with_quirks of_rcar_gen3_compatible = {
+ .of_data = &of_data_rcar_gen3,
+};
+
static const struct of_device_id renesas_sdhi_internal_dmac_of_match[] = {
{ .compatible = "renesas,sdhi-r7s9210", .data = &of_rza2_compatible, },
{ .compatible = "renesas,sdhi-mmc-r8a77470", .data = &of_rcar_gen3_compatible, },
- { .compatible = "renesas,sdhi-r8a7795", .data = &of_rcar_gen3_compatible, },
+ { .compatible = "renesas,sdhi-r8a7795", .data = &of_r8a7795_compatible, },
{ .compatible = "renesas,sdhi-r8a7796", .data = &of_rcar_gen3_compatible, },
+ { .compatible = "renesas,sdhi-r8a77961", .data = &of_r8a77961_compatible, },
+ { .compatible = "renesas,sdhi-r8a77965", .data = &of_r8a77965_compatible, },
+ { .compatible = "renesas,sdhi-r8a77980", .data = &of_r8a77980_compatible, },
+ { .compatible = "renesas,sdhi-r8a77990", .data = &of_r8a77990_compatible, },
{ .compatible = "renesas,rcar-gen3-sdhi", .data = &of_rcar_gen3_compatible, },
{},
};
@@ -405,16 +515,27 @@ static const struct soc_device_attribute soc_dma_quirks[] = {
static int renesas_sdhi_internal_dmac_probe(struct platform_device *pdev)
{
- const struct soc_device_attribute *soc = soc_device_match(soc_dma_quirks);
+ const struct soc_device_attribute *attr;
+ const struct renesas_sdhi_of_data_with_quirks *of_data_quirks;
+ const struct renesas_sdhi_quirks *quirks;
struct device *dev = &pdev->dev;
- if (soc)
- global_flags |= (unsigned long)soc->data;
+ of_data_quirks = of_device_get_match_data(&pdev->dev);
+ quirks = of_data_quirks->quirks;
+
+ attr = soc_device_match(soc_dma_quirks);
+ if (attr)
+ global_flags |= (unsigned long)attr->data;
+
+ attr = soc_device_match(sdhi_quirks_match);
+ if (attr)
+ quirks = attr->data;
/* value is max of SD_SECCNT. Confirmed by HW engineers */
dma_set_max_seg_size(dev, 0xffffffff);
- return renesas_sdhi_probe(pdev, &renesas_sdhi_internal_dmac_dma_ops);
+ return renesas_sdhi_probe(pdev, &renesas_sdhi_internal_dmac_dma_ops,
+ of_data_quirks->of_data, quirks);
}
static const struct dev_pm_ops renesas_sdhi_internal_dmac_dev_pm_ops = {
diff --git a/drivers/mmc/host/renesas_sdhi_sys_dmac.c b/drivers/mmc/host/renesas_sdhi_sys_dmac.c
index ffa64211f4de..99e3426df702 100644
--- a/drivers/mmc/host/renesas_sdhi_sys_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_sys_dmac.c
@@ -108,9 +108,9 @@ static void renesas_sdhi_sys_dmac_abort_dma(struct tmio_mmc_host *host)
renesas_sdhi_sys_dmac_enable_dma(host, false);
if (host->chan_rx)
- dmaengine_terminate_all(host->chan_rx);
+ dmaengine_terminate_sync(host->chan_rx);
if (host->chan_tx)
- dmaengine_terminate_all(host->chan_tx);
+ dmaengine_terminate_sync(host->chan_tx);
renesas_sdhi_sys_dmac_enable_dma(host, true);
}
@@ -451,7 +451,8 @@ static const struct tmio_mmc_dma_ops renesas_sdhi_sys_dmac_dma_ops = {
static int renesas_sdhi_sys_dmac_probe(struct platform_device *pdev)
{
- return renesas_sdhi_probe(pdev, &renesas_sdhi_sys_dmac_dma_ops);
+ return renesas_sdhi_probe(pdev, &renesas_sdhi_sys_dmac_dma_ops,
+ of_device_get_match_data(&pdev->dev), NULL);
}
static const struct dev_pm_ops renesas_sdhi_sys_dmac_dev_pm_ops = {
diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
index 4ca937415734..58cfaffa3c2d 100644
--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
+++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
@@ -542,9 +542,22 @@ static int sd_write_long_data(struct realtek_pci_sdmmc *host,
return 0;
}
+static inline void sd_enable_initial_mode(struct realtek_pci_sdmmc *host)
+{
+ rtsx_pci_write_register(host->pcr, SD_CFG1,
+ SD_CLK_DIVIDE_MASK, SD_CLK_DIVIDE_128);
+}
+
+static inline void sd_disable_initial_mode(struct realtek_pci_sdmmc *host)
+{
+ rtsx_pci_write_register(host->pcr, SD_CFG1,
+ SD_CLK_DIVIDE_MASK, SD_CLK_DIVIDE_0);
+}
+
static int sd_rw_multi(struct realtek_pci_sdmmc *host, struct mmc_request *mrq)
{
struct mmc_data *data = mrq->data;
+ int err;
if (host->sg_count < 0) {
data->error = host->sg_count;
@@ -553,22 +566,19 @@ static int sd_rw_multi(struct realtek_pci_sdmmc *host, struct mmc_request *mrq)
return data->error;
}
- if (data->flags & MMC_DATA_READ)
- return sd_read_long_data(host, mrq);
+ if (data->flags & MMC_DATA_READ) {
+ if (host->initial_mode)
+ sd_disable_initial_mode(host);
- return sd_write_long_data(host, mrq);
-}
+ err = sd_read_long_data(host, mrq);
-static inline void sd_enable_initial_mode(struct realtek_pci_sdmmc *host)
-{
- rtsx_pci_write_register(host->pcr, SD_CFG1,
- SD_CLK_DIVIDE_MASK, SD_CLK_DIVIDE_128);
-}
+ if (host->initial_mode)
+ sd_enable_initial_mode(host);
-static inline void sd_disable_initial_mode(struct realtek_pci_sdmmc *host)
-{
- rtsx_pci_write_register(host->pcr, SD_CFG1,
- SD_CLK_DIVIDE_MASK, SD_CLK_DIVIDE_0);
+ return err;
+ }
+
+ return sd_write_long_data(host, mrq);
}
static void sd_normal_rw(struct realtek_pci_sdmmc *host,
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 72c0bf0c1887..f18d169bc8ff 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -24,7 +24,6 @@
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/pinctrl/consumer.h>
-#include <linux/platform_data/mmc-esdhc-imx.h>
#include <linux/pm_runtime.h>
#include "sdhci-pltfm.h"
#include "sdhci-esdhc.h"
@@ -95,6 +94,11 @@
#define ESDHC_VEND_SPEC2 0xc8
#define ESDHC_VEND_SPEC2_EN_BUSY_IRQ (1 << 8)
+#define ESDHC_VEND_SPEC2_AUTO_TUNE_8BIT_EN (1 << 4)
+#define ESDHC_VEND_SPEC2_AUTO_TUNE_4BIT_EN (0 << 4)
+#define ESDHC_VEND_SPEC2_AUTO_TUNE_1BIT_EN (2 << 4)
+#define ESDHC_VEND_SPEC2_AUTO_TUNE_CMD_EN (1 << 6)
+#define ESDHC_VEND_SPEC2_AUTO_TUNE_MODE_MASK (7 << 4)
#define ESDHC_TUNING_CTRL 0xcc
#define ESDHC_STD_TUNING_EN (1 << 24)
@@ -115,6 +119,7 @@
#define ESDHC_CTRL_4BITBUS (0x1 << 1)
#define ESDHC_CTRL_8BITBUS (0x2 << 1)
#define ESDHC_CTRL_BUSWIDTH_MASK (0x3 << 1)
+#define USDHC_GET_BUSWIDTH(c) (c & ESDHC_CTRL_BUSWIDTH_MASK)
/*
* There is an INT DMA ERR mismatch between eSDHC and STD SDHC SPEC:
@@ -191,6 +196,38 @@
*/
#define ESDHC_FLAG_BROKEN_AUTO_CMD23 BIT(16)
+enum wp_types {
+ ESDHC_WP_NONE, /* no WP, neither controller nor gpio */
+ ESDHC_WP_CONTROLLER, /* mmc controller internal WP */
+ ESDHC_WP_GPIO, /* external gpio pin for WP */
+};
+
+enum cd_types {
+ ESDHC_CD_NONE, /* no CD, neither controller nor gpio */
+ ESDHC_CD_CONTROLLER, /* mmc controller internal CD */
+ ESDHC_CD_GPIO, /* external gpio pin for CD */
+ ESDHC_CD_PERMANENT, /* no CD, card permanently wired to host */
+};
+
+/*
+ * struct esdhc_platform_data - platform data for esdhc on i.MX
+ *
+ * ESDHC_WP(CD)_CONTROLLER type is not available on i.MX25/35.
+ *
+ * @wp_type: type of write_protect method (see wp_types enum above)
+ * @cd_type: type of card_detect method (see cd_types enum above)
+ */
+
+struct esdhc_platform_data {
+ enum wp_types wp_type;
+ enum cd_types cd_type;
+ int max_bus_width;
+ unsigned int delay_line;
+ unsigned int tuning_step; /* The delay cell steps in tuning procedure */
+ unsigned int tuning_start_tap; /* The start delay cell point in tuning procedure */
+ unsigned int strobe_dll_delay_target; /* The delay cell for strobe pad (read clock) */
+};
+
struct esdhc_soc_data {
u32 flags;
};
@@ -376,6 +413,30 @@ static inline void esdhc_wait_for_card_clock_gate_off(struct sdhci_host *host)
dev_warn(mmc_dev(host->mmc), "%s: card clock still not gate off in 100us!.\n", __func__);
}
+/* Enable the auto tuning circuit to check the CMD line and BUS line */
+static inline void usdhc_auto_tuning_mode_sel(struct sdhci_host *host)
+{
+ u32 buswidth, auto_tune_buswidth;
+
+ buswidth = USDHC_GET_BUSWIDTH(readl(host->ioaddr + SDHCI_HOST_CONTROL));
+
+ switch (buswidth) {
+ case ESDHC_CTRL_8BITBUS:
+ auto_tune_buswidth = ESDHC_VEND_SPEC2_AUTO_TUNE_8BIT_EN;
+ break;
+ case ESDHC_CTRL_4BITBUS:
+ auto_tune_buswidth = ESDHC_VEND_SPEC2_AUTO_TUNE_4BIT_EN;
+ break;
+ default: /* 1BITBUS */
+ auto_tune_buswidth = ESDHC_VEND_SPEC2_AUTO_TUNE_1BIT_EN;
+ break;
+ }
+
+ esdhc_clrset_le(host, ESDHC_VEND_SPEC2_AUTO_TUNE_MODE_MASK,
+ auto_tune_buswidth | ESDHC_VEND_SPEC2_AUTO_TUNE_CMD_EN,
+ ESDHC_VEND_SPEC2);
+}
+
static u32 esdhc_readl_le(struct sdhci_host *host, int reg)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -597,17 +658,7 @@ static void esdhc_writew_le(struct sdhci_host *host, u16 val, int reg)
else
new_val &= ~ESDHC_VENDOR_SPEC_VSELECT;
writel(new_val, host->ioaddr + ESDHC_VENDOR_SPEC);
- if (imx_data->socdata->flags & ESDHC_FLAG_MAN_TUNING) {
- new_val = readl(host->ioaddr + ESDHC_MIX_CTRL);
- if (val & SDHCI_CTRL_TUNED_CLK) {
- new_val |= ESDHC_MIX_CTRL_SMPCLK_SEL;
- new_val |= ESDHC_MIX_CTRL_AUTO_TUNE_EN;
- } else {
- new_val &= ~ESDHC_MIX_CTRL_SMPCLK_SEL;
- new_val &= ~ESDHC_MIX_CTRL_AUTO_TUNE_EN;
- }
- writel(new_val , host->ioaddr + ESDHC_MIX_CTRL);
- } else if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) {
+ if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) {
u32 v = readl(host->ioaddr + SDHCI_AUTO_CMD_STATUS);
u32 m = readl(host->ioaddr + ESDHC_MIX_CTRL);
if (val & SDHCI_CTRL_TUNED_CLK) {
@@ -622,6 +673,7 @@ static void esdhc_writew_le(struct sdhci_host *host, u16 val, int reg)
v |= ESDHC_MIX_CTRL_EXE_TUNE;
m |= ESDHC_MIX_CTRL_FBCLK_SEL;
m |= ESDHC_MIX_CTRL_AUTO_TUNE_EN;
+ usdhc_auto_tuning_mode_sel(host);
} else {
v &= ~ESDHC_MIX_CTRL_EXE_TUNE;
}
@@ -991,6 +1043,8 @@ static void esdhc_post_tuning(struct sdhci_host *host)
{
u32 reg;
+ usdhc_auto_tuning_mode_sel(host);
+
reg = readl(host->ioaddr + ESDHC_MIX_CTRL);
reg &= ~ESDHC_MIX_CTRL_EXE_TUNE;
reg |= ESDHC_MIX_CTRL_AUTO_TUNE_EN;
diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index cce390fe9cf3..032bf852397f 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -173,6 +173,23 @@ static unsigned int sdhci_iproc_get_max_clock(struct sdhci_host *host)
return pltfm_host->clock;
}
+/*
+ * There is a known bug on BCM2711's SDHCI core integration where the
+ * controller will hang when the difference between the core clock and the bus
+ * clock is too great. Specifically this can be reproduced under the following
+ * conditions:
+ *
+ * - No SD card plugged in, polling thread is running, probing cards at
+ * 100 kHz.
+ * - BCM2711's core clock configured at 500MHz or more
+ *
+ * So we set 200kHz as the minimum clock frequency available for that SoC.
+ */
+static unsigned int sdhci_iproc_bcm2711_get_min_clock(struct sdhci_host *host)
+{
+ return 200000;
+}
+
static const struct sdhci_ops sdhci_iproc_ops = {
.set_clock = sdhci_set_clock,
.get_max_clock = sdhci_iproc_get_max_clock,
@@ -271,6 +288,7 @@ static const struct sdhci_ops sdhci_iproc_bcm2711_ops = {
.set_clock = sdhci_set_clock,
.set_power = sdhci_set_power_and_bus_voltage,
.get_max_clock = sdhci_iproc_get_max_clock,
+ .get_min_clock = sdhci_iproc_bcm2711_get_min_clock,
.set_bus_width = sdhci_set_bus_width,
.reset = sdhci_reset,
.set_uhs_signaling = sdhci_set_uhs_signaling,
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index e44b7a66b73c..50c71e0ba5e4 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -2089,6 +2089,23 @@ static void sdhci_msm_cqe_disable(struct mmc_host *mmc, bool recovery)
sdhci_cqe_disable(mmc, recovery);
}
+static void sdhci_msm_set_timeout(struct sdhci_host *host, struct mmc_command *cmd)
+{
+ u32 count, start = 15;
+
+ __sdhci_set_timeout(host, cmd);
+ count = sdhci_readb(host, SDHCI_TIMEOUT_CONTROL);
+ /*
+ * Update software timeout value if its value is less than hardware data
+ * timeout value. Qcom SoC hardware data timeout value was calculated
+ * using 4 * MCLK * 2^(count + 13). where MCLK = 1 / host->clock.
+ */
+ if (cmd && cmd->data && host->clock > 400000 &&
+ host->clock <= 50000000 &&
+ ((1 << (count + start)) > (10 * host->clock)))
+ host->data_timeout = 22LL * NSEC_PER_SEC;
+}
+
static const struct cqhci_host_ops sdhci_msm_cqhci_ops = {
.enable = sdhci_msm_cqe_enable,
.disable = sdhci_msm_cqe_disable,
@@ -2438,6 +2455,7 @@ static const struct sdhci_ops sdhci_msm_ops = {
.irq = sdhci_msm_cqe_irq,
.dump_vendor_regs = sdhci_msm_dump_vendor_regs,
.set_power = sdhci_set_power_noreg,
+ .set_timeout = sdhci_msm_set_timeout,
};
static const struct sdhci_pltfm_data sdhci_msm_pdata = {
@@ -2696,6 +2714,9 @@ static int sdhci_msm_probe(struct platform_device *pdev)
msm_host->mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_NEED_RSP_BUSY;
+ /* Set the timeout value to max possible */
+ host->max_timeout_count = 0xF;
+
pm_runtime_get_noresume(&pdev->dev);
pm_runtime_set_active(&pdev->dev);
pm_runtime_enable(&pdev->dev);
diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index 0e7c07ed9690..737e2bfdedc2 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -159,6 +159,12 @@ struct sdhci_arasan_data {
/* Controller immediately reports SDHCI_CLOCK_INT_STABLE after enabling the
* internal clock even when the clock isn't stable */
#define SDHCI_ARASAN_QUIRK_CLOCK_UNSTABLE BIT(1)
+/*
+ * Some of the Arasan variations might not have timing requirements
+ * met at 25MHz for Default Speed mode, those controllers work at
+ * 19MHz instead
+ */
+#define SDHCI_ARASAN_QUIRK_CLOCK_25_BROKEN BIT(2)
};
struct sdhci_arasan_of_data {
@@ -267,7 +273,12 @@ static void sdhci_arasan_set_clock(struct sdhci_host *host, unsigned int clock)
* through low speeds without power cycling.
*/
sdhci_set_clock(host, host->max_clk);
- phy_power_on(sdhci_arasan->phy);
+ if (phy_power_on(sdhci_arasan->phy)) {
+ pr_err("%s: Cannot power on phy.\n",
+ mmc_hostname(host->mmc));
+ return;
+ }
+
sdhci_arasan->is_phy_on = true;
/*
@@ -290,6 +301,16 @@ static void sdhci_arasan_set_clock(struct sdhci_host *host, unsigned int clock)
sdhci_arasan->is_phy_on = false;
}
+ if (sdhci_arasan->quirks & SDHCI_ARASAN_QUIRK_CLOCK_25_BROKEN) {
+ /*
+ * Some of the Arasan variations might not have timing
+ * requirements met at 25MHz for Default Speed mode,
+ * those controllers work at 19MHz instead.
+ */
+ if (clock == DEFAULT_SPEED_MAX_DTR)
+ clock = (DEFAULT_SPEED_MAX_DTR * 19) / 25;
+ }
+
/* Set the Input and Output Clock Phase Delays */
if (clk_data->set_clk_delays)
clk_data->set_clk_delays(host);
@@ -307,7 +328,12 @@ static void sdhci_arasan_set_clock(struct sdhci_host *host, unsigned int clock)
msleep(20);
if (ctrl_phy) {
- phy_power_on(sdhci_arasan->phy);
+ if (phy_power_on(sdhci_arasan->phy)) {
+ pr_err("%s: Cannot power on phy.\n",
+ mmc_hostname(host->mmc));
+ return;
+ }
+
sdhci_arasan->is_phy_on = true;
}
}
@@ -463,7 +489,9 @@ static int sdhci_arasan_suspend(struct device *dev)
ret = phy_power_off(sdhci_arasan->phy);
if (ret) {
dev_err(dev, "Cannot power off phy.\n");
- sdhci_resume_host(host);
+ if (sdhci_resume_host(host))
+ dev_err(dev, "Cannot resume host.\n");
+
return ret;
}
sdhci_arasan->is_phy_on = false;
@@ -878,6 +906,10 @@ static int arasan_zynqmp_execute_tuning(struct mmc_host *mmc, u32 opcode)
NODE_SD_1;
int err;
+ /* ZynqMP SD controller does not perform auto tuning in DDR50 mode */
+ if (mmc->ios.timing == MMC_TIMING_UHS_DDR50)
+ return 0;
+
arasan_zynqmp_dll_reset(host, device_id);
err = sdhci_execute_tuning(mmc, opcode);
@@ -952,7 +984,7 @@ static void sdhci_arasan_update_baseclkfreq(struct sdhci_host *host)
struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
const struct sdhci_arasan_soc_ctl_map *soc_ctl_map =
sdhci_arasan->soc_ctl_map;
- u32 mhz = DIV_ROUND_CLOSEST(clk_get_rate(pltfm_host->clk), 1000000);
+ u32 mhz = DIV_ROUND_CLOSEST_ULL(clk_get_rate(pltfm_host->clk), 1000000);
/* Having a map is optional */
if (!soc_ctl_map)
@@ -986,14 +1018,16 @@ static void arasan_dt_read_clk_phase(struct device *dev,
{
struct device_node *np = dev->of_node;
- int clk_phase[2] = {0};
+ u32 clk_phase[2] = {0};
+ int ret;
/*
* Read Tap Delay values from DT, if the DT does not contain the
* Tap Values then use the pre-defined values.
*/
- if (of_property_read_variable_u32_array(np, prop, &clk_phase[0],
- 2, 0)) {
+ ret = of_property_read_variable_u32_array(np, prop, &clk_phase[0],
+ 2, 0);
+ if (ret < 0) {
dev_dbg(dev, "Using predefined clock phase for %s = %d %d\n",
prop, clk_data->clk_phase_in[timing],
clk_data->clk_phase_out[timing]);
@@ -1608,6 +1642,9 @@ static int sdhci_arasan_probe(struct platform_device *pdev)
if (of_device_is_compatible(np, "xlnx,zynqmp-8.9a")) {
host->mmc_host_ops.execute_tuning =
arasan_zynqmp_execute_tuning;
+
+ sdhci_arasan->quirks |= SDHCI_ARASAN_QUIRK_CLOCK_25_BROKEN;
+ host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12;
}
arasan_dt_parse_clk_phases(dev, &sdhci_arasan->clk_data);
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 387ce9cdbd7c..a5001875876b 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -116,6 +116,8 @@
*/
#define NVQUIRK_HAS_TMCLK BIT(10)
+#define NVQUIRK_HAS_ANDROID_GPT_SECTOR BIT(11)
+
/* SDMMC CQE Base Address for Tegra Host Ver 4.1 and Higher */
#define SDHCI_TEGRA_CQE_BASE_ADDR 0xF000
@@ -1361,6 +1363,7 @@ static const struct sdhci_tegra_soc_data soc_data_tegra20 = {
.pdata = &sdhci_tegra20_pdata,
.dma_mask = DMA_BIT_MASK(32),
.nvquirks = NVQUIRK_FORCE_SDHCI_SPEC_200 |
+ NVQUIRK_HAS_ANDROID_GPT_SECTOR |
NVQUIRK_ENABLE_BLOCK_GAP_DET,
};
@@ -1390,6 +1393,7 @@ static const struct sdhci_tegra_soc_data soc_data_tegra30 = {
.nvquirks = NVQUIRK_ENABLE_SDHCI_SPEC_300 |
NVQUIRK_ENABLE_SDR50 |
NVQUIRK_ENABLE_SDR104 |
+ NVQUIRK_HAS_ANDROID_GPT_SECTOR |
NVQUIRK_HAS_PADCALIB,
};
@@ -1422,6 +1426,7 @@ static const struct sdhci_pltfm_data sdhci_tegra114_pdata = {
static const struct sdhci_tegra_soc_data soc_data_tegra114 = {
.pdata = &sdhci_tegra114_pdata,
.dma_mask = DMA_BIT_MASK(32),
+ .nvquirks = NVQUIRK_HAS_ANDROID_GPT_SECTOR,
};
static const struct sdhci_pltfm_data sdhci_tegra124_pdata = {
@@ -1438,6 +1443,7 @@ static const struct sdhci_pltfm_data sdhci_tegra124_pdata = {
static const struct sdhci_tegra_soc_data soc_data_tegra124 = {
.pdata = &sdhci_tegra124_pdata,
.dma_mask = DMA_BIT_MASK(34),
+ .nvquirks = NVQUIRK_HAS_ANDROID_GPT_SECTOR,
};
static const struct sdhci_ops tegra210_sdhci_ops = {
@@ -1616,6 +1622,9 @@ static int sdhci_tegra_probe(struct platform_device *pdev)
tegra_host->pad_control_available = false;
tegra_host->soc_data = soc_data;
+ if (soc_data->nvquirks & NVQUIRK_HAS_ANDROID_GPT_SECTOR)
+ host->mmc->caps2 |= MMC_CAP2_ALT_GPT_TEGRA;
+
if (soc_data->nvquirks & NVQUIRK_NEEDS_PAD_CONTROL) {
rc = tegra_sdhci_init_pinctrl_info(&pdev->dev, tegra_host);
if (rc == 0)
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index aba6e10b8605..8eefa7d5fe85 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -934,21 +934,21 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd,
/*
* If the host controller provides us with an incorrect timeout
- * value, just skip the check and use 0xE. The hardware may take
+ * value, just skip the check and use the maximum. The hardware may take
* longer to time out, but that's much better than having a too-short
* timeout value.
*/
if (host->quirks & SDHCI_QUIRK_BROKEN_TIMEOUT_VAL)
- return 0xE;
+ return host->max_timeout_count;
/* Unspecified command, asume max */
if (cmd == NULL)
- return 0xE;
+ return host->max_timeout_count;
data = cmd->data;
/* Unspecified timeout, assume max */
if (!data && !cmd->busy_timeout)
- return 0xE;
+ return host->max_timeout_count;
/* timeout in us */
target_timeout = sdhci_target_timeout(host, cmd, data);
@@ -968,15 +968,15 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd,
while (current_timeout < target_timeout) {
count++;
current_timeout <<= 1;
- if (count >= 0xF)
+ if (count > host->max_timeout_count)
break;
}
- if (count >= 0xF) {
+ if (count > host->max_timeout_count) {
if (!(host->quirks2 & SDHCI_QUIRK2_DISABLE_HW_TIMEOUT))
DBG("Too large timeout 0x%x requested for CMD%d!\n",
count, cmd->opcode);
- count = 0xE;
+ count = host->max_timeout_count;
} else {
*too_big = false;
}
@@ -1222,6 +1222,7 @@ static int sdhci_external_dma_setup(struct sdhci_host *host,
if (!host->mapbase)
return -EINVAL;
+ memset(&cfg, 0, sizeof(cfg));
cfg.src_addr = host->mapbase + SDHCI_BUFFER;
cfg.dst_addr = host->mapbase + SDHCI_BUFFER;
cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
@@ -3278,8 +3279,14 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
{
u32 command;
- /* CMD19 generates _only_ Buffer Read Ready interrupt */
- if (intmask & SDHCI_INT_DATA_AVAIL) {
+ /*
+ * CMD19 generates _only_ Buffer Read Ready interrupt if
+ * use sdhci_send_tuning.
+ * Need to exclude this case: PIO mode and use mmc_send_tuning,
+ * If not, sdhci_transfer_pio will never be called, make the
+ * SDHCI_INT_DATA_AVAIL always there, stuck in irq storm.
+ */
+ if (intmask & SDHCI_INT_DATA_AVAIL && !host->data) {
command = SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND));
if (command == MMC_SEND_TUNING_BLOCK ||
command == MMC_SEND_TUNING_BLOCK_HS200) {
@@ -3940,6 +3947,8 @@ struct sdhci_host *sdhci_alloc_host(struct device *dev,
*/
host->adma_table_cnt = SDHCI_MAX_SEGS * 2 + 1;
+ host->max_timeout_count = 0xE;
+
return host;
}
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 074dc182b184..e8d04e42a5af 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -517,6 +517,7 @@ struct sdhci_host {
unsigned int max_clk; /* Max possible freq (MHz) */
unsigned int timeout_clk; /* Timeout freq (KHz) */
+ u8 max_timeout_count; /* Vendor specific max timeout count */
unsigned int clk_mul; /* Clock Muliplier value */
unsigned int clock; /* Current clock (MHz) */
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index e5e457037235..bcc595c70a9f 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -1164,9 +1164,9 @@ static bool sh_mmcif_end_cmd(struct sh_mmcif_host *host)
data->bytes_xfered = 0;
/* Abort DMA */
if (data->flags & MMC_DATA_READ)
- dmaengine_terminate_all(host->chan_rx);
+ dmaengine_terminate_sync(host->chan_rx);
else
- dmaengine_terminate_all(host->chan_tx);
+ dmaengine_terminate_sync(host->chan_tx);
}
return false;
diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c
index 9fdf7ea06e3f..63917070b1a7 100644
--- a/drivers/mmc/host/tifm_sd.c
+++ b/drivers/mmc/host/tifm_sd.c
@@ -669,8 +669,8 @@ static void tifm_sd_request(struct mmc_host *mmc, struct mmc_request *mrq)
if(1 != tifm_map_sg(sock, &host->bounce_buf, 1,
r_data->flags & MMC_DATA_WRITE
- ? PCI_DMA_TODEVICE
- : PCI_DMA_FROMDEVICE)) {
+ ? DMA_TO_DEVICE
+ : DMA_FROM_DEVICE)) {
pr_err("%s : scatterlist map failed\n",
dev_name(&sock->dev));
mrq->cmd->error = -ENOMEM;
@@ -680,15 +680,15 @@ static void tifm_sd_request(struct mmc_host *mmc, struct mmc_request *mrq)
r_data->sg_len,
r_data->flags
& MMC_DATA_WRITE
- ? PCI_DMA_TODEVICE
- : PCI_DMA_FROMDEVICE);
+ ? DMA_TO_DEVICE
+ : DMA_FROM_DEVICE);
if (host->sg_len < 1) {
pr_err("%s : scatterlist map failed\n",
dev_name(&sock->dev));
tifm_unmap_sg(sock, &host->bounce_buf, 1,
r_data->flags & MMC_DATA_WRITE
- ? PCI_DMA_TODEVICE
- : PCI_DMA_FROMDEVICE);
+ ? DMA_TO_DEVICE
+ : DMA_FROM_DEVICE);
mrq->cmd->error = -ENOMEM;
goto err_out;
}
@@ -762,10 +762,10 @@ static void tifm_sd_end_cmd(struct tasklet_struct *t)
} else {
tifm_unmap_sg(sock, &host->bounce_buf, 1,
(r_data->flags & MMC_DATA_WRITE)
- ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
+ ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
tifm_unmap_sg(sock, r_data->sg, r_data->sg_len,
(r_data->flags & MMC_DATA_WRITE)
- ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
+ ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
}
r_data->bytes_xfered = r_data->blocks
diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
index b9b79b1089a0..99515be6e5e5 100644
--- a/drivers/mmc/host/usdhi6rol0.c
+++ b/drivers/mmc/host/usdhi6rol0.c
@@ -631,9 +631,9 @@ static void usdhi6_dma_kill(struct usdhi6_host *host)
__func__, data->sg_len, data->blocks, data->blksz);
/* Abort DMA */
if (data->flags & MMC_DATA_READ)
- dmaengine_terminate_all(host->chan_rx);
+ dmaengine_terminate_sync(host->chan_rx);
else
- dmaengine_terminate_all(host->chan_tx);
+ dmaengine_terminate_sync(host->chan_tx);
}
static void usdhi6_dma_check_error(struct usdhi6_host *host)
@@ -1186,6 +1186,15 @@ static int usdhi6_sig_volt_switch(struct mmc_host *mmc, struct mmc_ios *ios)
return ret;
}
+static int usdhi6_card_busy(struct mmc_host *mmc)
+{
+ struct usdhi6_host *host = mmc_priv(mmc);
+ u32 tmp = usdhi6_read(host, USDHI6_SD_INFO2);
+
+ /* Card is busy if it is pulling dat[0] low */
+ return !(tmp & USDHI6_SD_INFO2_SDDAT0);
+}
+
static const struct mmc_host_ops usdhi6_ops = {
.request = usdhi6_request,
.set_ios = usdhi6_set_ios,
@@ -1193,6 +1202,7 @@ static const struct mmc_host_ops usdhi6_ops = {
.get_ro = usdhi6_get_ro,
.enable_sdio_irq = usdhi6_enable_sdio_irq,
.start_signal_voltage_switch = usdhi6_sig_volt_switch,
+ .card_busy = usdhi6_card_busy,
};
/* State machine handlers */
diff --git a/drivers/mmc/host/via-sdmmc.c b/drivers/mmc/host/via-sdmmc.c
index c32df5530b94..88662a90ed96 100644
--- a/drivers/mmc/host/via-sdmmc.c
+++ b/drivers/mmc/host/via-sdmmc.c
@@ -491,7 +491,7 @@ static void via_sdc_preparedata(struct via_crdr_mmc_host *host,
count = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
((data->flags & MMC_DATA_READ) ?
- PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE));
+ DMA_FROM_DEVICE : DMA_TO_DEVICE));
BUG_ON(count != 1);
via_set_ddma(host, sg_dma_address(data->sg), sg_dma_len(data->sg),
@@ -638,7 +638,7 @@ static void via_sdc_finish_data(struct via_crdr_mmc_host *host)
dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
((data->flags & MMC_DATA_READ) ?
- PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE));
+ DMA_FROM_DEVICE : DMA_TO_DEVICE));
if (data->stop)
via_sdc_send_command(host, data->stop);
diff --git a/drivers/most/most_cdev.c b/drivers/most/most_cdev.c
index 8908b9363a96..3722f9abd7b9 100644
--- a/drivers/most/most_cdev.c
+++ b/drivers/most/most_cdev.c
@@ -486,7 +486,7 @@ static struct cdev_component comp = {
},
};
-static int __init mod_init(void)
+static int __init most_cdev_init(void)
{
int err;
@@ -518,7 +518,7 @@ dest_ida:
return err;
}
-static void __exit mod_exit(void)
+static void __exit most_cdev_exit(void)
{
struct comp_channel *c, *tmp;
@@ -534,8 +534,8 @@ static void __exit mod_exit(void)
class_destroy(comp.class);
}
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(most_cdev_init);
+module_exit(most_cdev_exit);
MODULE_AUTHOR("Christian Gromm <christian.gromm@microchip.com>");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("character device component for mostcore");
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 3097e93787f7..a761134fd3be 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -119,7 +119,7 @@ static int cfi_use_status_reg(struct cfi_private *cfi)
struct cfi_pri_amdstd *extp = cfi->cmdset_priv;
u8 poll_mask = CFI_POLL_STATUS_REG | CFI_POLL_DQ;
- return extp->MinorVersion >= '5' &&
+ return extp && extp->MinorVersion >= '5' &&
(extp->SoftwareFeatures & poll_mask) == CFI_POLL_STATUS_REG;
}
diff --git a/drivers/mtd/devices/mchp48l640.c b/drivers/mtd/devices/mchp48l640.c
index efc2003bd13a..99400d0fb8c1 100644
--- a/drivers/mtd/devices/mchp48l640.c
+++ b/drivers/mtd/devices/mchp48l640.c
@@ -229,7 +229,7 @@ static int mchp48l640_write(struct mtd_info *mtd, loff_t to, size_t len,
woff += ws;
}
- return ret;
+ return 0;
}
static int mchp48l640_read_page(struct mtd_info *mtd, loff_t from, size_t len,
@@ -255,6 +255,7 @@ static int mchp48l640_read_page(struct mtd_info *mtd, loff_t from, size_t len,
if (!ret)
*retlen += len;
+ kfree(cmd);
return ret;
fail:
@@ -286,7 +287,7 @@ static int mchp48l640_read(struct mtd_info *mtd, loff_t from, size_t len,
woff += ws;
}
- return ret;
+ return 0;
};
static const struct mchp48_caps mchp48l640_caps = {
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 6ce4bc57f919..44bea3f65060 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -419,6 +419,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
if (tr->discard) {
blk_queue_flag_set(QUEUE_FLAG_DISCARD, new->rq);
blk_queue_max_discard_sectors(new->rq, UINT_MAX);
+ new->rq->limits.discard_granularity = tr->blksize;
}
gd->queue = new->rq;
@@ -525,14 +526,10 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
if (!blktrans_notifier.list.next)
register_mtd_user(&blktrans_notifier);
-
- mutex_lock(&mtd_table_mutex);
-
ret = register_blkdev(tr->major, tr->name);
if (ret < 0) {
printk(KERN_WARNING "Unable to register %s block device on major %d: %d\n",
tr->name, tr->major, ret);
- mutex_unlock(&mtd_table_mutex);
return ret;
}
@@ -542,12 +539,12 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
tr->blkshift = ffs(tr->blksize) - 1;
INIT_LIST_HEAD(&tr->devs);
- list_add(&tr->list, &blktrans_majors);
+ mutex_lock(&mtd_table_mutex);
+ list_add(&tr->list, &blktrans_majors);
mtd_for_each_device(mtd)
if (mtd->type != MTD_ABSENT)
tr->add_mtd(tr, mtd);
-
mutex_unlock(&mtd_table_mutex);
return 0;
}
@@ -564,8 +561,8 @@ int deregister_mtd_blktrans(struct mtd_blktrans_ops *tr)
list_for_each_entry_safe(dev, next, &tr->devs, list)
tr->remove_dev(dev);
- unregister_blkdev(tr->major, tr->name);
mutex_unlock(&mtd_table_mutex);
+ unregister_blkdev(tr->major, tr->name);
BUG_ON(!list_empty(&tr->devs));
return 0;
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index b5ccd3037788..c8fd7f758938 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -806,7 +806,9 @@ static ssize_t mtd_otp_size(struct mtd_info *mtd, bool is_user)
err:
kfree(info);
- return ret;
+
+ /* ENODATA means there is no OTP region. */
+ return ret == -ENODATA ? 0 : ret;
}
static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd,
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 57a583149cc0..3d6c6e880520 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5228,12 +5228,18 @@ static bool of_get_nand_on_flash_bbt(struct device_node *np)
static int of_get_nand_secure_regions(struct nand_chip *chip)
{
struct device_node *dn = nand_get_flash_node(chip);
+ struct property *prop;
int nr_elem, i, j;
- nr_elem = of_property_count_elems_of_size(dn, "secure-regions", sizeof(u64));
- if (!nr_elem)
+ /* Only proceed if the "secure-regions" property is present in DT */
+ prop = of_find_property(dn, "secure-regions", NULL);
+ if (!prop)
return 0;
+ nr_elem = of_property_count_elems_of_size(dn, "secure-regions", sizeof(u64));
+ if (nr_elem <= 0)
+ return nr_elem;
+
chip->nr_secure_regions = nr_elem / 2;
chip->secure_regions = kcalloc(chip->nr_secure_regions, sizeof(*chip->secure_regions),
GFP_KERNEL);
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 6977f8248df7..f37b1c56f7c4 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -431,10 +431,10 @@ config VSOCKMON
config MHI_NET
tristate "MHI network driver"
depends on MHI_BUS
- select WWAN
help
This is the network driver for MHI bus. It can be used with
- QCOM based WWAN modems (like SDX55). Say Y or M.
+ QCOM based WWAN modems for IP or QMAP/rmnet protocol (like SDX55).
+ Say Y or M.
endif # NET_CORE
@@ -483,6 +483,8 @@ config NET_SB1000
source "drivers/net/phy/Kconfig"
+source "drivers/net/mctp/Kconfig"
+
source "drivers/net/mdio/Kconfig"
source "drivers/net/pcs/Kconfig"
@@ -549,8 +551,8 @@ config VMXNET3
tristate "VMware VMXNET3 ethernet driver"
depends on PCI && INET
depends on !(PAGE_SIZE_64KB || ARM64_64K_PAGES || \
- IA64_PAGE_SIZE_64KB || MICROBLAZE_64K_PAGES || \
- PARISC_PAGE_SIZE_64KB || PPC_64K_PAGES)
+ IA64_PAGE_SIZE_64KB || PARISC_PAGE_SIZE_64KB || \
+ PPC_64K_PAGES)
help
This driver supports VMware's vmxnet3 virtual ethernet NIC.
To compile this driver as a module, choose M here: the
@@ -604,4 +606,11 @@ config NET_FAILOVER
a VM with direct attached VF by failing over to the paravirtual
datapath when the VF is unplugged.
+config NETDEV_LEGACY_INIT
+ bool
+ depends on ISA
+ help
+ Drivers that call netdev_boot_setup_check() should select this
+ symbol, everything else no longer needs it.
+
endif # NETDEVICES
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 7ffd2d03efaf..739838623cf6 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -18,7 +18,8 @@ obj-$(CONFIG_MACVLAN) += macvlan.o
obj-$(CONFIG_MACVTAP) += macvtap.o
obj-$(CONFIG_MII) += mii.o
obj-$(CONFIG_MDIO) += mdio.o
-obj-$(CONFIG_NET) += Space.o loopback.o
+obj-$(CONFIG_NET) += loopback.o
+obj-$(CONFIG_NETDEV_LEGACY_INIT) += Space.o
obj-$(CONFIG_NETCONSOLE) += netconsole.o
obj-y += phy/
obj-y += mdio/
@@ -36,7 +37,7 @@ obj-$(CONFIG_GTP) += gtp.o
obj-$(CONFIG_NLMON) += nlmon.o
obj-$(CONFIG_NET_VRF) += vrf.o
obj-$(CONFIG_VSOCKMON) += vsockmon.o
-obj-$(CONFIG_MHI_NET) += mhi/
+obj-$(CONFIG_MHI_NET) += mhi_net.o
#
# Networking Drivers
@@ -69,6 +70,7 @@ obj-$(CONFIG_WAN) += wan/
obj-$(CONFIG_WLAN) += wireless/
obj-$(CONFIG_IEEE802154) += ieee802154/
obj-$(CONFIG_WWAN) += wwan/
+obj-$(CONFIG_MCTP) += mctp/
obj-$(CONFIG_VMXNET3) += vmxnet3/
obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
diff --git a/drivers/net/Space.c b/drivers/net/Space.c
index df79e7370bcc..49e67c9fb5a4 100644
--- a/drivers/net/Space.c
+++ b/drivers/net/Space.c
@@ -30,6 +30,148 @@
#include <linux/netlink.h>
#include <net/Space.h>
+/*
+ * This structure holds boot-time configured netdevice settings. They
+ * are then used in the device probing.
+ */
+struct netdev_boot_setup {
+ char name[IFNAMSIZ];
+ struct ifmap map;
+};
+#define NETDEV_BOOT_SETUP_MAX 8
+
+
+/******************************************************************************
+ *
+ * Device Boot-time Settings Routines
+ *
+ ******************************************************************************/
+
+/* Boot time configuration table */
+static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
+
+/**
+ * netdev_boot_setup_add - add new setup entry
+ * @name: name of the device
+ * @map: configured settings for the device
+ *
+ * Adds new setup entry to the dev_boot_setup list. The function
+ * returns 0 on error and 1 on success. This is a generic routine to
+ * all netdevices.
+ */
+static int netdev_boot_setup_add(char *name, struct ifmap *map)
+{
+ struct netdev_boot_setup *s;
+ int i;
+
+ s = dev_boot_setup;
+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+ if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
+ memset(s[i].name, 0, sizeof(s[i].name));
+ strlcpy(s[i].name, name, IFNAMSIZ);
+ memcpy(&s[i].map, map, sizeof(s[i].map));
+ break;
+ }
+ }
+
+ return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
+}
+
+/**
+ * netdev_boot_setup_check - check boot time settings
+ * @dev: the netdevice
+ *
+ * Check boot time settings for the device.
+ * The found settings are set for the device to be used
+ * later in the device probing.
+ * Returns 0 if no settings found, 1 if they are.
+ */
+int netdev_boot_setup_check(struct net_device *dev)
+{
+ struct netdev_boot_setup *s = dev_boot_setup;
+ int i;
+
+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+ if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
+ !strcmp(dev->name, s[i].name)) {
+ dev->irq = s[i].map.irq;
+ dev->base_addr = s[i].map.base_addr;
+ dev->mem_start = s[i].map.mem_start;
+ dev->mem_end = s[i].map.mem_end;
+ return 1;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL(netdev_boot_setup_check);
+
+/**
+ * netdev_boot_base - get address from boot time settings
+ * @prefix: prefix for network device
+ * @unit: id for network device
+ *
+ * Check boot time settings for the base address of device.
+ * The found settings are set for the device to be used
+ * later in the device probing.
+ * Returns 0 if no settings found.
+ */
+static unsigned long netdev_boot_base(const char *prefix, int unit)
+{
+ const struct netdev_boot_setup *s = dev_boot_setup;
+ char name[IFNAMSIZ];
+ int i;
+
+ sprintf(name, "%s%d", prefix, unit);
+
+ /*
+ * If device already registered then return base of 1
+ * to indicate not to probe for this interface
+ */
+ if (__dev_get_by_name(&init_net, name))
+ return 1;
+
+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
+ if (!strcmp(name, s[i].name))
+ return s[i].map.base_addr;
+ return 0;
+}
+
+/*
+ * Saves at boot time configured settings for any netdevice.
+ */
+static int __init netdev_boot_setup(char *str)
+{
+ int ints[5];
+ struct ifmap map;
+
+ str = get_options(str, ARRAY_SIZE(ints), ints);
+ if (!str || !*str)
+ return 0;
+
+ /* Save settings */
+ memset(&map, 0, sizeof(map));
+ if (ints[0] > 0)
+ map.irq = ints[1];
+ if (ints[0] > 1)
+ map.base_addr = ints[2];
+ if (ints[0] > 2)
+ map.mem_start = ints[3];
+ if (ints[0] > 3)
+ map.mem_end = ints[4];
+
+ /* Add new entry to the list */
+ return netdev_boot_setup_add(str, &map);
+}
+
+__setup("netdev=", netdev_boot_setup);
+
+static int __init ether_boot_setup(char *str)
+{
+ return netdev_boot_setup(str);
+}
+__setup("ether=", ether_boot_setup);
+
+
/* A unified ethernet device probe. This is the easiest way to have every
* ethernet adaptor have the name "eth[0123...]".
*/
@@ -77,39 +219,15 @@ static struct devprobe2 isa_probes[] __initdata = {
#ifdef CONFIG_SMC9194
{smc_init, 0},
#endif
-#ifdef CONFIG_CS89x0
-#ifndef CONFIG_CS89x0_PLATFORM
+#ifdef CONFIG_CS89x0_ISA
{cs89x0_probe, 0},
#endif
-#endif
-#if defined(CONFIG_MVME16x_NET) || defined(CONFIG_BVME6000_NET) /* Intel */
- {i82596_probe, 0}, /* I82596 */
-#endif
#ifdef CONFIG_NI65
{ni65_probe, 0},
#endif
{NULL, 0},
};
-static struct devprobe2 m68k_probes[] __initdata = {
-#ifdef CONFIG_ATARILANCE /* Lance-based Atari ethernet boards */
- {atarilance_probe, 0},
-#endif
-#ifdef CONFIG_SUN3LANCE /* sun3 onboard Lance chip */
- {sun3lance_probe, 0},
-#endif
-#ifdef CONFIG_SUN3_82586 /* sun3 onboard Intel 82586 chip */
- {sun3_82586_probe, 0},
-#endif
-#ifdef CONFIG_APNE /* A1200 PCMCIA NE2000 */
- {apne_probe, 0},
-#endif
-#ifdef CONFIG_MVME147_NET /* MVME147 internal Ethernet */
- {mvme147lance_probe, 0},
-#endif
- {NULL, 0},
-};
-
/* Unified ethernet device probe, segmented per architecture and
* per bus interface. This drives the legacy devices only for now.
*/
@@ -121,8 +239,7 @@ static void __init ethif_probe2(int unit)
if (base_addr == 1)
return;
- (void)(probe_list2(unit, m68k_probes, base_addr == 0) &&
- probe_list2(unit, isa_probes, base_addr == 0));
+ probe_list2(unit, isa_probes, base_addr == 0);
}
/* Statically configured drivers -- order matters here. */
@@ -130,10 +247,6 @@ static int __init net_olddevs_init(void)
{
int num;
-#ifdef CONFIG_SBNI
- for (num = 0; num < 8; ++num)
- sbni_probe(num);
-#endif
for (num = 0; num < 8; ++num)
ethif_probe2(num);
@@ -142,9 +255,6 @@ static int __init net_olddevs_init(void)
cops_probe(1);
cops_probe(2);
#endif
-#ifdef CONFIG_LTPC
- ltpc_probe();
-#endif
return 0;
}
diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig
index 43918398f0d3..90b9f1d6eda9 100644
--- a/drivers/net/appletalk/Kconfig
+++ b/drivers/net/appletalk/Kconfig
@@ -52,7 +52,9 @@ config LTPC
config COPS
tristate "COPS LocalTalk PC support"
- depends on DEV_APPLETALK && (ISA || EISA)
+ depends on DEV_APPLETALK && ISA
+ depends on NETDEVICES
+ select NETDEV_LEGACY_INIT
help
This allows you to use COPS AppleTalk cards to connect to LocalTalk
networks. You also need version 1.3.3 or later of the netatalk
diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index 51cf5eca9c7f..5566daefbff4 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -54,11 +54,12 @@ static netdev_tx_t ipddp_xmit(struct sk_buff *skb,
static int ipddp_create(struct ipddp_route *new_rt);
static int ipddp_delete(struct ipddp_route *rt);
static struct ipddp_route* __ipddp_find_route(struct ipddp_route *rt);
-static int ipddp_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+static int ipddp_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd);
static const struct net_device_ops ipddp_netdev_ops = {
.ndo_start_xmit = ipddp_xmit,
- .ndo_do_ioctl = ipddp_ioctl,
+ .ndo_siocdevprivate = ipddp_siocdevprivate,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
};
@@ -268,15 +269,18 @@ static struct ipddp_route* __ipddp_find_route(struct ipddp_route *rt)
return NULL;
}
-static int ipddp_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int ipddp_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
- struct ipddp_route __user *rt = ifr->ifr_data;
struct ipddp_route rcp, rcp2, *rp;
+ if (in_compat_syscall())
+ return -EOPNOTSUPP;
+
if(!capable(CAP_NET_ADMIN))
return -EPERM;
- if(copy_from_user(&rcp, rt, sizeof(rcp)))
+ if (copy_from_user(&rcp, data, sizeof(rcp)))
return -EFAULT;
switch(cmd)
@@ -296,7 +300,7 @@ static int ipddp_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
spin_unlock_bh(&ipddp_route_lock);
if (rp) {
- if (copy_to_user(rt, &rcp2,
+ if (copy_to_user(data, &rcp2,
sizeof(struct ipddp_route)))
return -EFAULT;
return 0;
diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index 69c270885ff0..1f8925e75b3f 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -1015,7 +1015,7 @@ static const struct net_device_ops ltpc_netdev = {
.ndo_set_rx_mode = set_multicast_list,
};
-struct net_device * __init ltpc_probe(void)
+static struct net_device * __init ltpc_probe(void)
{
struct net_device *dev;
int err = -ENOMEM;
@@ -1221,12 +1221,10 @@ static int __init ltpc_setup(char *str)
}
__setup("ltpc=", ltpc_setup);
-#endif /* MODULE */
+#endif
static struct net_device *dev_ltpc;
-#ifdef MODULE
-
MODULE_LICENSE("GPL");
module_param(debug, int, 0);
module_param_hw(io, int, ioport, 0);
@@ -1244,7 +1242,6 @@ static int __init ltpc_module_init(void)
return PTR_ERR_OR_ZERO(dev_ltpc);
}
module_init(ltpc_module_init);
-#endif
static void __exit ltpc_cleanup(void)
{
diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c
index a7ee0af1af90..54e321a695ce 100644
--- a/drivers/net/bareudp.c
+++ b/drivers/net/bareudp.c
@@ -71,12 +71,18 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
family = AF_INET6;
if (bareudp->ethertype == htons(ETH_P_IP)) {
- struct iphdr *iphdr;
+ __u8 ipversion;
- iphdr = (struct iphdr *)(skb->data + BAREUDP_BASE_HLEN);
- if (iphdr->version == 4) {
- proto = bareudp->ethertype;
- } else if (bareudp->multi_proto_mode && (iphdr->version == 6)) {
+ if (skb_copy_bits(skb, BAREUDP_BASE_HLEN, &ipversion,
+ sizeof(ipversion))) {
+ bareudp->dev->stats.rx_dropped++;
+ goto drop;
+ }
+ ipversion >>= 4;
+
+ if (ipversion == 4) {
+ proto = htons(ETH_P_IP);
+ } else if (ipversion == 6 && bareudp->multi_proto_mode) {
proto = htons(ETH_P_IPV6);
} else {
bareudp->dev->stats.rx_dropped++;
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 6908822d9773..a4a202b9a0a2 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -96,7 +96,7 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker);
static void ad_mux_machine(struct port *port, bool *update_slave_arr);
static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
static void ad_tx_machine(struct port *port);
-static void ad_periodic_machine(struct port *port);
+static void ad_periodic_machine(struct port *port, struct bond_params bond_params);
static void ad_port_selection_logic(struct port *port, bool *update_slave_arr);
static void ad_agg_selection_logic(struct aggregator *aggregator,
bool *update_slave_arr);
@@ -1294,10 +1294,11 @@ static void ad_tx_machine(struct port *port)
/**
* ad_periodic_machine - handle a port's periodic state machine
* @port: the port we're looking at
+ * @bond_params: bond parameters we will use
*
* Turn ntt flag on priodically to perform periodic transmission of lacpdu's.
*/
-static void ad_periodic_machine(struct port *port)
+static void ad_periodic_machine(struct port *port, struct bond_params bond_params)
{
periodic_states_t last_state;
@@ -1306,8 +1307,8 @@ static void ad_periodic_machine(struct port *port)
/* check if port was reinitialized */
if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) ||
- (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY))
- ) {
+ (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) ||
+ !bond_params.lacp_active) {
port->sm_periodic_state = AD_NO_PERIODIC;
}
/* check if state machine should change state */
@@ -2341,7 +2342,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
}
ad_rx_machine(NULL, port);
- ad_periodic_machine(port);
+ ad_periodic_machine(port, bond->params);
ad_port_selection_logic(port, &update_slave_arr);
ad_mux_machine(port, &update_slave_arr);
ad_tx_machine(port);
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 22e5632089ac..7d3752cbf761 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -17,7 +17,6 @@
#include <linux/if_bonding.h>
#include <linux/if_vlan.h>
#include <linux/in.h>
-#include <net/ipx.h>
#include <net/arp.h>
#include <net/ipv6.h>
#include <asm/byteorder.h>
@@ -1351,8 +1350,6 @@ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
if (!is_multicast_ether_addr(eth_data->h_dest)) {
switch (skb->protocol) {
case htons(ETH_P_IP):
- case htons(ETH_P_IPX):
- /* In case of IPX, it will falback to L2 hash */
case htons(ETH_P_IPV6):
hash_index = bond_xmit_hash(bond, skb);
if (bond->params.tlb_dynamic_lb) {
@@ -1454,35 +1451,6 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
hash_size = sizeof(ip6hdr->daddr);
break;
}
- case ETH_P_IPX: {
- const struct ipxhdr *ipxhdr;
-
- if (pskb_network_may_pull(skb, sizeof(*ipxhdr))) {
- do_tx_balance = false;
- break;
- }
- ipxhdr = (struct ipxhdr *)skb_network_header(skb);
-
- if (ipxhdr->ipx_checksum != IPX_NO_CHECKSUM) {
- /* something is wrong with this packet */
- do_tx_balance = false;
- break;
- }
-
- if (ipxhdr->ipx_type != IPX_TYPE_NCP) {
- /* The only protocol worth balancing in
- * this family since it has an "ARP" like
- * mechanism
- */
- do_tx_balance = false;
- break;
- }
-
- eth_data = eth_hdr(skb);
- hash_start = (char *)eth_data->h_dest;
- hash_size = ETH_ALEN;
- break;
- }
case ETH_P_ARP:
do_tx_balance = false;
if (bond_info->rlb_enabled)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 31730efa7538..b0966e733926 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -317,6 +317,25 @@ bool bond_sk_check(struct bonding *bond)
}
}
+static bool bond_xdp_check(struct bonding *bond)
+{
+ switch (BOND_MODE(bond)) {
+ case BOND_MODE_ROUNDROBIN:
+ case BOND_MODE_ACTIVEBACKUP:
+ return true;
+ case BOND_MODE_8023AD:
+ case BOND_MODE_XOR:
+ /* vlan+srcmac is not supported with XDP as in most cases the 802.1q
+ * payload is not in the packet due to hardware offload.
+ */
+ if (bond->params.xmit_policy != BOND_XMIT_POLICY_VLAN_SRCMAC)
+ return true;
+ fallthrough;
+ default:
+ return false;
+ }
+}
+
/*---------------------------------- VLAN -----------------------------------*/
/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
@@ -732,7 +751,7 @@ static int bond_check_dev_link(struct bonding *bond,
BMSR_LSTATUS : 0;
/* Ethtool can't be used, fallback to MII ioctls. */
- ioctl = slave_ops->ndo_do_ioctl;
+ ioctl = slave_ops->ndo_eth_ioctl;
if (ioctl) {
/* TODO: set pointer to correct ioctl on a per team member
* bases to make this more efficient. that is, once
@@ -756,7 +775,7 @@ static int bond_check_dev_link(struct bonding *bond,
}
}
- /* If reporting, report that either there's no dev->do_ioctl,
+ /* If reporting, report that either there's no ndo_eth_ioctl,
* or both SIOCGMIIREG and get_link failed (meaning that we
* cannot report link status). If not reporting, pretend
* we're ok.
@@ -1712,6 +1731,20 @@ void bond_lower_state_changed(struct slave *slave)
netdev_lower_state_changed(slave->dev, &info);
}
+#define BOND_NL_ERR(bond_dev, extack, errmsg) do { \
+ if (extack) \
+ NL_SET_ERR_MSG(extack, errmsg); \
+ else \
+ netdev_err(bond_dev, "Error: %s\n", errmsg); \
+} while (0)
+
+#define SLAVE_NL_ERR(bond_dev, slave_dev, extack, errmsg) do { \
+ if (extack) \
+ NL_SET_ERR_MSG(extack, errmsg); \
+ else \
+ slave_err(bond_dev, slave_dev, "Error: %s\n", errmsg); \
+} while (0)
+
/* enslave device <slave> to bond device <master> */
int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
struct netlink_ext_ack *extack)
@@ -1725,29 +1758,26 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
if (slave_dev->flags & IFF_MASTER &&
!netif_is_bond_master(slave_dev)) {
- NL_SET_ERR_MSG(extack, "Device with IFF_MASTER cannot be enslaved");
- netdev_err(bond_dev,
- "Error: Device with IFF_MASTER cannot be enslaved\n");
+ BOND_NL_ERR(bond_dev, extack,
+ "Device type (master device) cannot be enslaved");
return -EPERM;
}
if (!bond->params.use_carrier &&
slave_dev->ethtool_ops->get_link == NULL &&
- slave_ops->ndo_do_ioctl == NULL) {
+ slave_ops->ndo_eth_ioctl == NULL) {
slave_warn(bond_dev, slave_dev, "no link monitoring support\n");
}
/* already in-use? */
if (netdev_is_rx_handler_busy(slave_dev)) {
- NL_SET_ERR_MSG(extack, "Device is in use and cannot be enslaved");
- slave_err(bond_dev, slave_dev,
- "Error: Device is in use and cannot be enslaved\n");
+ SLAVE_NL_ERR(bond_dev, slave_dev, extack,
+ "Device is in use and cannot be enslaved");
return -EBUSY;
}
if (bond_dev == slave_dev) {
- NL_SET_ERR_MSG(extack, "Cannot enslave bond to itself.");
- netdev_err(bond_dev, "cannot enslave bond to itself.\n");
+ BOND_NL_ERR(bond_dev, extack, "Cannot enslave bond to itself.");
return -EPERM;
}
@@ -1756,8 +1786,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) {
slave_dbg(bond_dev, slave_dev, "is NETIF_F_VLAN_CHALLENGED\n");
if (vlan_uses_dev(bond_dev)) {
- NL_SET_ERR_MSG(extack, "Can not enslave VLAN challenged device to VLAN enabled bond");
- slave_err(bond_dev, slave_dev, "Error: cannot enslave VLAN challenged slave on VLAN enabled bond\n");
+ SLAVE_NL_ERR(bond_dev, slave_dev, extack,
+ "Can not enslave VLAN challenged device to VLAN enabled bond");
return -EPERM;
} else {
slave_warn(bond_dev, slave_dev, "enslaved VLAN challenged slave. Adding VLANs will be blocked as long as it is part of bond.\n");
@@ -1775,8 +1805,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
* enslaving it; the old ifenslave will not.
*/
if (slave_dev->flags & IFF_UP) {
- NL_SET_ERR_MSG(extack, "Device can not be enslaved while up");
- slave_err(bond_dev, slave_dev, "slave is up - this may be due to an out of date ifenslave\n");
+ SLAVE_NL_ERR(bond_dev, slave_dev, extack,
+ "Device can not be enslaved while up");
return -EPERM;
}
@@ -1815,17 +1845,15 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
bond_dev);
}
} else if (bond_dev->type != slave_dev->type) {
- NL_SET_ERR_MSG(extack, "Device type is different from other slaves");
- slave_err(bond_dev, slave_dev, "ether type (%d) is different from other slaves (%d), can not enslave it\n",
- slave_dev->type, bond_dev->type);
+ SLAVE_NL_ERR(bond_dev, slave_dev, extack,
+ "Device type is different from other slaves");
return -EINVAL;
}
if (slave_dev->type == ARPHRD_INFINIBAND &&
BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
- NL_SET_ERR_MSG(extack, "Only active-backup mode is supported for infiniband slaves");
- slave_warn(bond_dev, slave_dev, "Type (%d) supports only active-backup mode\n",
- slave_dev->type);
+ SLAVE_NL_ERR(bond_dev, slave_dev, extack,
+ "Only active-backup mode is supported for infiniband slaves");
res = -EOPNOTSUPP;
goto err_undo_flags;
}
@@ -1839,8 +1867,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
bond->params.fail_over_mac = BOND_FOM_ACTIVE;
slave_warn(bond_dev, slave_dev, "Setting fail_over_mac to active for active-backup mode\n");
} else {
- NL_SET_ERR_MSG(extack, "Slave device does not support setting the MAC address, but fail_over_mac is not set to active");
- slave_err(bond_dev, slave_dev, "The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n");
+ SLAVE_NL_ERR(bond_dev, slave_dev, extack,
+ "Slave device does not support setting the MAC address, but fail_over_mac is not set to active");
res = -EOPNOTSUPP;
goto err_undo_flags;
}
@@ -2133,6 +2161,39 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
bond_update_slave_arr(bond, NULL);
+ if (!slave_dev->netdev_ops->ndo_bpf ||
+ !slave_dev->netdev_ops->ndo_xdp_xmit) {
+ if (bond->xdp_prog) {
+ SLAVE_NL_ERR(bond_dev, slave_dev, extack,
+ "Slave does not support XDP");
+ res = -EOPNOTSUPP;
+ goto err_sysfs_del;
+ }
+ } else {
+ struct netdev_bpf xdp = {
+ .command = XDP_SETUP_PROG,
+ .flags = 0,
+ .prog = bond->xdp_prog,
+ .extack = extack,
+ };
+
+ if (dev_xdp_prog_count(slave_dev) > 0) {
+ SLAVE_NL_ERR(bond_dev, slave_dev, extack,
+ "Slave has XDP program loaded, please unload before enslaving");
+ res = -EOPNOTSUPP;
+ goto err_sysfs_del;
+ }
+
+ res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+ if (res < 0) {
+ /* ndo_bpf() sets extack error message */
+ slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res);
+ goto err_sysfs_del;
+ }
+ if (bond->xdp_prog)
+ bpf_prog_inc(bond->xdp_prog);
+ }
+
slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n",
bond_is_active_slave(new_slave) ? "an active" : "a backup",
new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
@@ -2252,7 +2313,17 @@ static int __bond_release_one(struct net_device *bond_dev,
/* recompute stats just before removing the slave */
bond_get_stats(bond->dev, &bond->bond_stats);
- bond_upper_dev_unlink(bond, slave);
+ if (bond->xdp_prog) {
+ struct netdev_bpf xdp = {
+ .command = XDP_SETUP_PROG,
+ .flags = 0,
+ .prog = NULL,
+ .extack = NULL,
+ };
+ if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp))
+ slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n");
+ }
+
/* unregister rx_handler early so bond_handle_frame wouldn't be called
* for this slave anymore.
*/
@@ -2261,6 +2332,8 @@ static int __bond_release_one(struct net_device *bond_dev,
if (BOND_MODE(bond) == BOND_MODE_8023AD)
bond_3ad_unbind_slave(slave);
+ bond_upper_dev_unlink(bond, slave);
+
if (bond_mode_can_use_xmit_hash(bond))
bond_update_slave_arr(bond, slave);
@@ -3613,90 +3686,112 @@ static struct notifier_block bond_netdev_notifier = {
/*---------------------------- Hashing Policies -----------------------------*/
+/* Helper to access data in a packet, with or without a backing skb.
+ * If skb is given the data is linearized if necessary via pskb_may_pull.
+ */
+static inline const void *bond_pull_data(struct sk_buff *skb,
+ const void *data, int hlen, int n)
+{
+ if (likely(n <= hlen))
+ return data;
+ else if (skb && likely(pskb_may_pull(skb, n)))
+ return skb->head;
+
+ return NULL;
+}
+
/* L2 hash helper */
-static inline u32 bond_eth_hash(struct sk_buff *skb)
+static inline u32 bond_eth_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
{
- struct ethhdr *ep, hdr_tmp;
+ struct ethhdr *ep;
- ep = skb_header_pointer(skb, 0, sizeof(hdr_tmp), &hdr_tmp);
- if (ep)
- return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto;
- return 0;
+ data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
+ if (!data)
+ return 0;
+
+ ep = (struct ethhdr *)(data + mhoff);
+ return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto);
}
-static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk,
- int *noff, int *proto, bool l34)
+static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *data,
+ int hlen, __be16 l2_proto, int *nhoff, int *ip_proto, bool l34)
{
const struct ipv6hdr *iph6;
const struct iphdr *iph;
- if (skb->protocol == htons(ETH_P_IP)) {
- if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph))))
+ if (l2_proto == htons(ETH_P_IP)) {
+ data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph));
+ if (!data)
return false;
- iph = (const struct iphdr *)(skb->data + *noff);
+
+ iph = (const struct iphdr *)(data + *nhoff);
iph_to_flow_copy_v4addrs(fk, iph);
- *noff += iph->ihl << 2;
+ *nhoff += iph->ihl << 2;
if (!ip_is_fragment(iph))
- *proto = iph->protocol;
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
- if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6))))
+ *ip_proto = iph->protocol;
+ } else if (l2_proto == htons(ETH_P_IPV6)) {
+ data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph6));
+ if (!data)
return false;
- iph6 = (const struct ipv6hdr *)(skb->data + *noff);
+
+ iph6 = (const struct ipv6hdr *)(data + *nhoff);
iph_to_flow_copy_v6addrs(fk, iph6);
- *noff += sizeof(*iph6);
- *proto = iph6->nexthdr;
+ *nhoff += sizeof(*iph6);
+ *ip_proto = iph6->nexthdr;
} else {
return false;
}
- if (l34 && *proto >= 0)
- fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto);
+ if (l34 && *ip_proto >= 0)
+ fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen);
return true;
}
-static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
+static u32 bond_vlan_srcmac_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
{
- struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
u32 srcmac_vendor = 0, srcmac_dev = 0;
- u16 vlan;
+ struct ethhdr *mac_hdr;
+ u16 vlan = 0;
int i;
+ data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
+ if (!data)
+ return 0;
+ mac_hdr = (struct ethhdr *)(data + mhoff);
+
for (i = 0; i < 3; i++)
srcmac_vendor = (srcmac_vendor << 8) | mac_hdr->h_source[i];
for (i = 3; i < ETH_ALEN; i++)
srcmac_dev = (srcmac_dev << 8) | mac_hdr->h_source[i];
- if (!skb_vlan_tag_present(skb))
- return srcmac_vendor ^ srcmac_dev;
-
- vlan = skb_vlan_tag_get(skb);
+ if (skb && skb_vlan_tag_present(skb))
+ vlan = skb_vlan_tag_get(skb);
return vlan ^ srcmac_vendor ^ srcmac_dev;
}
/* Extract the appropriate headers based on bond's xmit policy */
-static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
- struct flow_keys *fk)
+static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const void *data,
+ __be16 l2_proto, int nhoff, int hlen, struct flow_keys *fk)
{
bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34;
- int noff, proto = -1;
+ int ip_proto = -1;
switch (bond->params.xmit_policy) {
case BOND_XMIT_POLICY_ENCAP23:
case BOND_XMIT_POLICY_ENCAP34:
memset(fk, 0, sizeof(*fk));
return __skb_flow_dissect(NULL, skb, &flow_keys_bonding,
- fk, NULL, 0, 0, 0, 0);
+ fk, data, l2_proto, nhoff, hlen, 0);
default:
break;
}
fk->ports.ports = 0;
memset(&fk->icmp, 0, sizeof(fk->icmp));
- noff = skb_network_offset(skb);
- if (!bond_flow_ip(skb, fk, &noff, &proto, l34))
+ if (!bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34))
return false;
/* ICMP error packets contains at least 8 bytes of the header
@@ -3704,22 +3799,20 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
* to correlate ICMP error packets within the same flow which
* generated the error.
*/
- if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
- skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data,
- skb_transport_offset(skb),
- skb_headlen(skb));
- if (proto == IPPROTO_ICMP) {
+ if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
+ skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen);
+ if (ip_proto == IPPROTO_ICMP) {
if (!icmp_is_err(fk->icmp.type))
return true;
- noff += sizeof(struct icmphdr);
- } else if (proto == IPPROTO_ICMPV6) {
+ nhoff += sizeof(struct icmphdr);
+ } else if (ip_proto == IPPROTO_ICMPV6) {
if (!icmpv6_is_err(fk->icmp.type))
return true;
- noff += sizeof(struct icmp6hdr);
+ nhoff += sizeof(struct icmp6hdr);
}
- return bond_flow_ip(skb, fk, &noff, &proto, l34);
+ return bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34);
}
return true;
@@ -3735,33 +3828,26 @@ static u32 bond_ip_hash(u32 hash, struct flow_keys *flow)
return hash >> 1;
}
-/**
- * bond_xmit_hash - generate a hash value based on the xmit policy
- * @bond: bonding device
- * @skb: buffer to use for headers
- *
- * This function will extract the necessary headers from the skb buffer and use
- * them to generate a hash based on the xmit_policy set in the bonding device
+/* Generate hash based on xmit policy. If @skb is given it is used to linearize
+ * the data as required, but this function can be used without it if the data is
+ * known to be linear (e.g. with xdp_buff).
*/
-u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
+static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const void *data,
+ __be16 l2_proto, int mhoff, int nhoff, int hlen)
{
struct flow_keys flow;
u32 hash;
- if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
- skb->l4_hash)
- return skb->hash;
-
if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC)
- return bond_vlan_srcmac_hash(skb);
+ return bond_vlan_srcmac_hash(skb, data, mhoff, hlen);
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
- !bond_flow_dissect(bond, skb, &flow))
- return bond_eth_hash(skb);
+ !bond_flow_dissect(bond, skb, data, l2_proto, nhoff, hlen, &flow))
+ return bond_eth_hash(skb, data, mhoff, hlen);
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
- hash = bond_eth_hash(skb);
+ hash = bond_eth_hash(skb, data, mhoff, hlen);
} else {
if (flow.icmp.id)
memcpy(&hash, &flow.icmp, sizeof(hash));
@@ -3772,6 +3858,45 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
return bond_ip_hash(hash, &flow);
}
+/**
+ * bond_xmit_hash - generate a hash value based on the xmit policy
+ * @bond: bonding device
+ * @skb: buffer to use for headers
+ *
+ * This function will extract the necessary headers from the skb buffer and use
+ * them to generate a hash based on the xmit_policy set in the bonding device
+ */
+u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
+{
+ if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
+ skb->l4_hash)
+ return skb->hash;
+
+ return __bond_xmit_hash(bond, skb, skb->head, skb->protocol,
+ skb->mac_header, skb->network_header,
+ skb_headlen(skb));
+}
+
+/**
+ * bond_xmit_hash_xdp - generate a hash value based on the xmit policy
+ * @bond: bonding device
+ * @xdp: buffer to use for headers
+ *
+ * The XDP variant of bond_xmit_hash.
+ */
+static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp)
+{
+ struct ethhdr *eth;
+
+ if (xdp->data + sizeof(struct ethhdr) > xdp->data_end)
+ return 0;
+
+ eth = (struct ethhdr *)xdp->data;
+
+ return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0,
+ sizeof(struct ethhdr), xdp->data_end - xdp->data);
+}
+
/*-------------------------- Device entry points ----------------------------*/
void bond_work_init_all(struct bonding *bond)
@@ -3962,20 +4087,13 @@ static void bond_get_stats(struct net_device *bond_dev,
rcu_read_unlock();
}
-static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)
+static int bond_eth_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)
{
struct bonding *bond = netdev_priv(bond_dev);
- struct net_device *slave_dev = NULL;
- struct ifbond k_binfo;
- struct ifbond __user *u_binfo = NULL;
- struct ifslave k_sinfo;
- struct ifslave __user *u_sinfo = NULL;
struct mii_ioctl_data *mii = NULL;
- struct bond_opt_value newval;
- struct net *net;
- int res = 0;
+ int res;
- netdev_dbg(bond_dev, "bond_ioctl: cmd=%d\n", cmd);
+ netdev_dbg(bond_dev, "bond_eth_ioctl: cmd=%d\n", cmd);
switch (cmd) {
case SIOCGMIIPHY:
@@ -4000,7 +4118,28 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
}
return 0;
- case BOND_INFO_QUERY_OLD:
+ default:
+ res = -EOPNOTSUPP;
+ }
+
+ return res;
+}
+
+static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)
+{
+ struct bonding *bond = netdev_priv(bond_dev);
+ struct net_device *slave_dev = NULL;
+ struct ifbond k_binfo;
+ struct ifbond __user *u_binfo = NULL;
+ struct ifslave k_sinfo;
+ struct ifslave __user *u_sinfo = NULL;
+ struct bond_opt_value newval;
+ struct net *net;
+ int res = 0;
+
+ netdev_dbg(bond_dev, "bond_ioctl: cmd=%d\n", cmd);
+
+ switch (cmd) {
case SIOCBONDINFOQUERY:
u_binfo = (struct ifbond __user *)ifr->ifr_data;
@@ -4012,7 +4151,6 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
return -EFAULT;
return 0;
- case BOND_SLAVE_INFO_QUERY_OLD:
case SIOCBONDSLAVEINFOQUERY:
u_sinfo = (struct ifslave __user *)ifr->ifr_data;
@@ -4042,19 +4180,15 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
return -ENODEV;
switch (cmd) {
- case BOND_ENSLAVE_OLD:
case SIOCBONDENSLAVE:
res = bond_enslave(bond_dev, slave_dev, NULL);
break;
- case BOND_RELEASE_OLD:
case SIOCBONDRELEASE:
res = bond_release(bond_dev, slave_dev);
break;
- case BOND_SETHWADDR_OLD:
case SIOCBONDSETHWADDR:
res = bond_set_dev_addr(bond_dev, slave_dev);
break;
- case BOND_CHANGE_ACTIVE_OLD:
case SIOCBONDCHANGEACTIVE:
bond_opt_initstr(&newval, slave_dev->name);
res = __bond_opt_set_notify(bond, BOND_OPT_ACTIVE_SLAVE,
@@ -4067,6 +4201,29 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
return res;
}
+static int bond_siocdevprivate(struct net_device *bond_dev, struct ifreq *ifr,
+ void __user *data, int cmd)
+{
+ struct ifreq ifrdata = { .ifr_data = data };
+
+ switch (cmd) {
+ case BOND_INFO_QUERY_OLD:
+ return bond_do_ioctl(bond_dev, &ifrdata, SIOCBONDINFOQUERY);
+ case BOND_SLAVE_INFO_QUERY_OLD:
+ return bond_do_ioctl(bond_dev, &ifrdata, SIOCBONDSLAVEINFOQUERY);
+ case BOND_ENSLAVE_OLD:
+ return bond_do_ioctl(bond_dev, ifr, SIOCBONDENSLAVE);
+ case BOND_RELEASE_OLD:
+ return bond_do_ioctl(bond_dev, ifr, SIOCBONDRELEASE);
+ case BOND_SETHWADDR_OLD:
+ return bond_do_ioctl(bond_dev, ifr, SIOCBONDSETHWADDR);
+ case BOND_CHANGE_ACTIVE_OLD:
+ return bond_do_ioctl(bond_dev, ifr, SIOCBONDCHANGEACTIVE);
+ }
+
+ return -EOPNOTSUPP;
+}
+
static void bond_change_rx_flags(struct net_device *bond_dev, int change)
{
struct bonding *bond = netdev_priv(bond_dev);
@@ -4388,6 +4545,47 @@ non_igmp:
return NULL;
}
+static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond,
+ struct xdp_buff *xdp)
+{
+ struct slave *slave;
+ int slave_cnt;
+ u32 slave_id;
+ const struct ethhdr *eth;
+ void *data = xdp->data;
+
+ if (data + sizeof(struct ethhdr) > xdp->data_end)
+ goto non_igmp;
+
+ eth = (struct ethhdr *)data;
+ data += sizeof(struct ethhdr);
+
+ /* See comment on IGMP in bond_xmit_roundrobin_slave_get() */
+ if (eth->h_proto == htons(ETH_P_IP)) {
+ const struct iphdr *iph;
+
+ if (data + sizeof(struct iphdr) > xdp->data_end)
+ goto non_igmp;
+
+ iph = (struct iphdr *)data;
+
+ if (iph->protocol == IPPROTO_IGMP) {
+ slave = rcu_dereference(bond->curr_active_slave);
+ if (slave)
+ return slave;
+ return bond_get_slave_by_id(bond, 0);
+ }
+ }
+
+non_igmp:
+ slave_cnt = READ_ONCE(bond->slave_cnt);
+ if (likely(slave_cnt)) {
+ slave_id = bond_rr_gen_slave_id(bond) % slave_cnt;
+ return bond_get_slave_by_id(bond, slave_id);
+ }
+ return NULL;
+}
+
static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
struct net_device *bond_dev)
{
@@ -4401,8 +4599,7 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
return bond_tx_drop(bond_dev, skb);
}
-static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond,
- struct sk_buff *skb)
+static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond)
{
return rcu_dereference(bond->curr_active_slave);
}
@@ -4416,7 +4613,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb,
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave;
- slave = bond_xmit_activebackup_slave_get(bond, skb);
+ slave = bond_xmit_activebackup_slave_get(bond);
if (slave)
return bond_dev_queue_xmit(bond, skb, slave->dev);
@@ -4604,6 +4801,22 @@ static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond,
return slave;
}
+static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond,
+ struct xdp_buff *xdp)
+{
+ struct bond_up_slave *slaves;
+ unsigned int count;
+ u32 hash;
+
+ hash = bond_xmit_hash_xdp(bond, xdp);
+ slaves = rcu_dereference(bond->usable_slaves);
+ count = slaves ? READ_ONCE(slaves->count) : 0;
+ if (unlikely(!count))
+ return NULL;
+
+ return slaves->arr[hash % count];
+}
+
/* Use this Xmit function for 3AD as well as XOR modes. The current
* usable slave array is formed in the control path. The xmit function
* just calculates hash and sends the packet out.
@@ -4714,7 +4927,7 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev,
slave = bond_xmit_roundrobin_slave_get(bond, skb);
break;
case BOND_MODE_ACTIVEBACKUP:
- slave = bond_xmit_activebackup_slave_get(bond, skb);
+ slave = bond_xmit_activebackup_slave_get(bond);
break;
case BOND_MODE_8023AD:
case BOND_MODE_XOR:
@@ -4888,6 +5101,172 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
return ret;
}
+static struct net_device *
+bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp)
+{
+ struct bonding *bond = netdev_priv(bond_dev);
+ struct slave *slave;
+
+ /* Caller needs to hold rcu_read_lock() */
+
+ switch (BOND_MODE(bond)) {
+ case BOND_MODE_ROUNDROBIN:
+ slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp);
+ break;
+
+ case BOND_MODE_ACTIVEBACKUP:
+ slave = bond_xmit_activebackup_slave_get(bond);
+ break;
+
+ case BOND_MODE_8023AD:
+ case BOND_MODE_XOR:
+ slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp);
+ break;
+
+ default:
+ /* Should never happen. Mode guarded by bond_xdp_check() */
+ netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond));
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+
+ if (slave)
+ return slave->dev;
+
+ return NULL;
+}
+
+static int bond_xdp_xmit(struct net_device *bond_dev,
+ int n, struct xdp_frame **frames, u32 flags)
+{
+ int nxmit, err = -ENXIO;
+
+ rcu_read_lock();
+
+ for (nxmit = 0; nxmit < n; nxmit++) {
+ struct xdp_frame *frame = frames[nxmit];
+ struct xdp_frame *frames1[] = {frame};
+ struct net_device *slave_dev;
+ struct xdp_buff xdp;
+
+ xdp_convert_frame_to_buff(frame, &xdp);
+
+ slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp);
+ if (!slave_dev) {
+ err = -ENXIO;
+ break;
+ }
+
+ err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags);
+ if (err < 1)
+ break;
+ }
+
+ rcu_read_unlock();
+
+ /* If error happened on the first frame then we can pass the error up, otherwise
+ * report the number of frames that were xmitted.
+ */
+ if (err < 0)
+ return (nxmit == 0 ? err : nxmit);
+
+ return nxmit;
+}
+
+static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ struct bonding *bond = netdev_priv(dev);
+ struct list_head *iter;
+ struct slave *slave, *rollback_slave;
+ struct bpf_prog *old_prog;
+ struct netdev_bpf xdp = {
+ .command = XDP_SETUP_PROG,
+ .flags = 0,
+ .prog = prog,
+ .extack = extack,
+ };
+ int err;
+
+ ASSERT_RTNL();
+
+ if (!bond_xdp_check(bond))
+ return -EOPNOTSUPP;
+
+ old_prog = bond->xdp_prog;
+ bond->xdp_prog = prog;
+
+ bond_for_each_slave(bond, slave, iter) {
+ struct net_device *slave_dev = slave->dev;
+
+ if (!slave_dev->netdev_ops->ndo_bpf ||
+ !slave_dev->netdev_ops->ndo_xdp_xmit) {
+ SLAVE_NL_ERR(dev, slave_dev, extack,
+ "Slave device does not support XDP");
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ if (dev_xdp_prog_count(slave_dev) > 0) {
+ SLAVE_NL_ERR(dev, slave_dev, extack,
+ "Slave has XDP program loaded, please unload before enslaving");
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+ if (err < 0) {
+ /* ndo_bpf() sets extack error message */
+ slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err);
+ goto err;
+ }
+ if (prog)
+ bpf_prog_inc(prog);
+ }
+
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ if (prog)
+ static_branch_inc(&bpf_master_redirect_enabled_key);
+ else
+ static_branch_dec(&bpf_master_redirect_enabled_key);
+
+ return 0;
+
+err:
+ /* unwind the program changes */
+ bond->xdp_prog = old_prog;
+ xdp.prog = old_prog;
+ xdp.extack = NULL; /* do not overwrite original error */
+
+ bond_for_each_slave(bond, rollback_slave, iter) {
+ struct net_device *slave_dev = rollback_slave->dev;
+ int err_unwind;
+
+ if (slave == rollback_slave)
+ break;
+
+ err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+ if (err_unwind < 0)
+ slave_err(dev, slave_dev,
+ "Error %d when unwinding XDP program change\n", err_unwind);
+ else if (xdp.prog)
+ bpf_prog_inc(xdp.prog);
+ }
+ return err;
+}
+
+static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return bond_xdp_set(dev, xdp->prog, xdp->extack);
+ default:
+ return -EINVAL;
+ }
+}
+
static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed)
{
if (speed == 0 || speed == SPEED_UNKNOWN)
@@ -4955,7 +5334,9 @@ static const struct net_device_ops bond_netdev_ops = {
.ndo_start_xmit = bond_start_xmit,
.ndo_select_queue = bond_select_queue,
.ndo_get_stats64 = bond_get_stats,
- .ndo_do_ioctl = bond_do_ioctl,
+ .ndo_eth_ioctl = bond_eth_ioctl,
+ .ndo_siocbond = bond_do_ioctl,
+ .ndo_siocdevprivate = bond_siocdevprivate,
.ndo_change_rx_flags = bond_change_rx_flags,
.ndo_set_rx_mode = bond_set_rx_mode,
.ndo_change_mtu = bond_change_mtu,
@@ -4974,6 +5355,9 @@ static const struct net_device_ops bond_netdev_ops = {
.ndo_features_check = passthru_features_check,
.ndo_get_xmit_slave = bond_xmit_get_slave,
.ndo_sk_get_lower_dev = bond_sk_get_lower_dev,
+ .ndo_bpf = bond_xdp,
+ .ndo_xdp_xmit = bond_xdp_xmit,
+ .ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave,
};
static const struct device_type bond_type = {
@@ -5443,6 +5827,7 @@ static int bond_check_params(struct bond_params *params)
params->downdelay = downdelay;
params->peer_notif_delay = 0;
params->use_carrier = use_carrier;
+ params->lacp_active = 1;
params->lacp_fast = lacp_fast;
params->primary[0] = 0;
params->primary_reselect = primary_reselect_value;
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index 0561ece1ba45..5d54e11d18fa 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -100,6 +100,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
[IFLA_BOND_MIN_LINKS] = { .type = NLA_U32 },
[IFLA_BOND_LP_INTERVAL] = { .type = NLA_U32 },
[IFLA_BOND_PACKETS_PER_SLAVE] = { .type = NLA_U32 },
+ [IFLA_BOND_AD_LACP_ACTIVE] = { .type = NLA_U8 },
[IFLA_BOND_AD_LACP_RATE] = { .type = NLA_U8 },
[IFLA_BOND_AD_SELECT] = { .type = NLA_U8 },
[IFLA_BOND_AD_INFO] = { .type = NLA_NESTED },
@@ -387,6 +388,16 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[],
if (err)
return err;
}
+
+ if (data[IFLA_BOND_AD_LACP_ACTIVE]) {
+ int lacp_active = nla_get_u8(data[IFLA_BOND_AD_LACP_ACTIVE]);
+
+ bond_opt_initval(&newval, lacp_active);
+ err = __bond_opt_set(bond, BOND_OPT_LACP_ACTIVE, &newval);
+ if (err)
+ return err;
+ }
+
if (data[IFLA_BOND_AD_LACP_RATE]) {
int lacp_rate =
nla_get_u8(data[IFLA_BOND_AD_LACP_RATE]);
@@ -490,6 +501,7 @@ static size_t bond_get_size(const struct net_device *bond_dev)
nla_total_size(sizeof(u32)) + /* IFLA_BOND_MIN_LINKS */
nla_total_size(sizeof(u32)) + /* IFLA_BOND_LP_INTERVAL */
nla_total_size(sizeof(u32)) + /* IFLA_BOND_PACKETS_PER_SLAVE */
+ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_ACTIVE */
nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_RATE */
nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_SELECT */
nla_total_size(sizeof(struct nlattr)) + /* IFLA_BOND_AD_INFO */
@@ -622,6 +634,10 @@ static int bond_fill_info(struct sk_buff *skb,
packets_per_slave))
goto nla_put_failure;
+ if (nla_put_u8(skb, IFLA_BOND_AD_LACP_ACTIVE,
+ bond->params.lacp_active))
+ goto nla_put_failure;
+
if (nla_put_u8(skb, IFLA_BOND_AD_LACP_RATE,
bond->params.lacp_fast))
goto nla_put_failure;
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 0cf25de6f46d..a8fde3bc458f 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -58,6 +58,8 @@ static int bond_option_lp_interval_set(struct bonding *bond,
const struct bond_opt_value *newval);
static int bond_option_pps_set(struct bonding *bond,
const struct bond_opt_value *newval);
+static int bond_option_lacp_active_set(struct bonding *bond,
+ const struct bond_opt_value *newval);
static int bond_option_lacp_rate_set(struct bonding *bond,
const struct bond_opt_value *newval);
static int bond_option_ad_select_set(struct bonding *bond,
@@ -135,6 +137,12 @@ static const struct bond_opt_value bond_intmax_tbl[] = {
{ NULL, -1, 0}
};
+static const struct bond_opt_value bond_lacp_active[] = {
+ { "off", 0, 0},
+ { "on", 1, BOND_VALFLAG_DEFAULT},
+ { NULL, -1, 0}
+};
+
static const struct bond_opt_value bond_lacp_rate_tbl[] = {
{ "slow", AD_LACP_SLOW, 0},
{ "fast", AD_LACP_FAST, 0},
@@ -283,6 +291,15 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
.values = bond_intmax_tbl,
.set = bond_option_updelay_set
},
+ [BOND_OPT_LACP_ACTIVE] = {
+ .id = BOND_OPT_LACP_ACTIVE,
+ .name = "lacp_active",
+ .desc = "Send LACPDU frames with configured lacp rate or acts as speak when spoken to",
+ .flags = BOND_OPTFLAG_IFDOWN,
+ .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)),
+ .values = bond_lacp_active,
+ .set = bond_option_lacp_active_set
+ },
[BOND_OPT_LACP_RATE] = {
.id = BOND_OPT_LACP_RATE,
.name = "lacp_rate",
@@ -1333,6 +1350,16 @@ static int bond_option_pps_set(struct bonding *bond,
return 0;
}
+static int bond_option_lacp_active_set(struct bonding *bond,
+ const struct bond_opt_value *newval)
+{
+ netdev_dbg(bond->dev, "Setting LACP active to %s (%llu)\n",
+ newval->string, newval->value);
+ bond->params.lacp_active = newval->value;
+
+ return 0;
+}
+
static int bond_option_lacp_rate_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 0fb1da361bb1..f3e3bfd72556 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -133,6 +133,8 @@ static void bond_info_show_master(struct seq_file *seq)
struct ad_info ad_info;
seq_puts(seq, "\n802.3ad info\n");
+ seq_printf(seq, "LACP active: %s\n",
+ (bond->params.lacp_active) ? "on" : "off");
seq_printf(seq, "LACP rate: %s\n",
(bond->params.lacp_fast) ? "fast" : "slow");
seq_printf(seq, "Min links: %d\n", bond->params.min_links);
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 5f9e9a240226..b9e9842fed94 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -339,10 +339,24 @@ static ssize_t bonding_show_peer_notif_delay(struct device *d,
static DEVICE_ATTR(peer_notif_delay, 0644,
bonding_show_peer_notif_delay, bonding_sysfs_store_option);
-/* Show the LACP interval. */
-static ssize_t bonding_show_lacp(struct device *d,
- struct device_attribute *attr,
- char *buf)
+/* Show the LACP activity and interval. */
+static ssize_t bonding_show_lacp_active(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct bonding *bond = to_bond(d);
+ const struct bond_opt_value *val;
+
+ val = bond_opt_get_val(BOND_OPT_LACP_ACTIVE, bond->params.lacp_active);
+
+ return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_active);
+}
+static DEVICE_ATTR(lacp_active, 0644,
+ bonding_show_lacp_active, bonding_sysfs_store_option);
+
+static ssize_t bonding_show_lacp_rate(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
{
struct bonding *bond = to_bond(d);
const struct bond_opt_value *val;
@@ -352,7 +366,7 @@ static ssize_t bonding_show_lacp(struct device *d,
return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_fast);
}
static DEVICE_ATTR(lacp_rate, 0644,
- bonding_show_lacp, bonding_sysfs_store_option);
+ bonding_show_lacp_rate, bonding_sysfs_store_option);
static ssize_t bonding_show_min_links(struct device *d,
struct device_attribute *attr,
@@ -738,6 +752,7 @@ static struct attribute *per_bond_attrs[] = {
&dev_attr_downdelay.attr,
&dev_attr_updelay.attr,
&dev_attr_peer_notif_delay.attr,
+ &dev_attr_lacp_active.attr,
&dev_attr_lacp_rate.attr,
&dev_attr_ad_select.attr,
&dev_attr_xmit_hash_policy.attr,
diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index e355d3974977..fff259247d52 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -97,7 +97,8 @@ config CAN_AT91
config CAN_FLEXCAN
tristate "Support for Freescale FLEXCAN based chips"
- depends on OF && HAS_IOMEM
+ depends on OF || COLDFIRE || COMPILE_TEST
+ depends on HAS_IOMEM
help
Say Y here if you want to support for Freescale FlexCAN.
diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index 04d0bb3ffe89..b06af90a9964 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -43,14 +43,14 @@ enum at91_reg {
};
/* Mailbox registers (0 <= i <= 15) */
-#define AT91_MMR(i) (enum at91_reg)(0x200 + ((i) * 0x20))
-#define AT91_MAM(i) (enum at91_reg)(0x204 + ((i) * 0x20))
-#define AT91_MID(i) (enum at91_reg)(0x208 + ((i) * 0x20))
-#define AT91_MFID(i) (enum at91_reg)(0x20C + ((i) * 0x20))
-#define AT91_MSR(i) (enum at91_reg)(0x210 + ((i) * 0x20))
-#define AT91_MDL(i) (enum at91_reg)(0x214 + ((i) * 0x20))
-#define AT91_MDH(i) (enum at91_reg)(0x218 + ((i) * 0x20))
-#define AT91_MCR(i) (enum at91_reg)(0x21C + ((i) * 0x20))
+#define AT91_MMR(i) ((enum at91_reg)(0x200 + ((i) * 0x20)))
+#define AT91_MAM(i) ((enum at91_reg)(0x204 + ((i) * 0x20)))
+#define AT91_MID(i) ((enum at91_reg)(0x208 + ((i) * 0x20)))
+#define AT91_MFID(i) ((enum at91_reg)(0x20C + ((i) * 0x20)))
+#define AT91_MSR(i) ((enum at91_reg)(0x210 + ((i) * 0x20)))
+#define AT91_MDL(i) ((enum at91_reg)(0x214 + ((i) * 0x20)))
+#define AT91_MDH(i) ((enum at91_reg)(0x218 + ((i) * 0x20)))
+#define AT91_MCR(i) ((enum at91_reg)(0x21C + ((i) * 0x20)))
/* Register bits */
#define AT91_MR_CANEN BIT(0)
@@ -87,19 +87,19 @@ enum at91_mb_mode {
};
/* Interrupt mask bits */
-#define AT91_IRQ_ERRA (1 << 16)
-#define AT91_IRQ_WARN (1 << 17)
-#define AT91_IRQ_ERRP (1 << 18)
-#define AT91_IRQ_BOFF (1 << 19)
-#define AT91_IRQ_SLEEP (1 << 20)
-#define AT91_IRQ_WAKEUP (1 << 21)
-#define AT91_IRQ_TOVF (1 << 22)
-#define AT91_IRQ_TSTP (1 << 23)
-#define AT91_IRQ_CERR (1 << 24)
-#define AT91_IRQ_SERR (1 << 25)
-#define AT91_IRQ_AERR (1 << 26)
-#define AT91_IRQ_FERR (1 << 27)
-#define AT91_IRQ_BERR (1 << 28)
+#define AT91_IRQ_ERRA BIT(16)
+#define AT91_IRQ_WARN BIT(17)
+#define AT91_IRQ_ERRP BIT(18)
+#define AT91_IRQ_BOFF BIT(19)
+#define AT91_IRQ_SLEEP BIT(20)
+#define AT91_IRQ_WAKEUP BIT(21)
+#define AT91_IRQ_TOVF BIT(22)
+#define AT91_IRQ_TSTP BIT(23)
+#define AT91_IRQ_CERR BIT(24)
+#define AT91_IRQ_SERR BIT(25)
+#define AT91_IRQ_AERR BIT(26)
+#define AT91_IRQ_FERR BIT(27)
+#define AT91_IRQ_BERR BIT(28)
#define AT91_IRQ_ERR_ALL (0x1fff0000)
#define AT91_IRQ_ERR_FRAME (AT91_IRQ_CERR | AT91_IRQ_SERR | \
@@ -163,7 +163,7 @@ static const struct can_bittiming_const at91_bittiming_const = {
.tseg2_min = 2,
.tseg2_max = 8,
.sjw_max = 4,
- .brp_min = 2,
+ .brp_min = 2,
.brp_max = 128,
.brp_inc = 1,
};
@@ -281,19 +281,20 @@ static inline u32 at91_read(const struct at91_priv *priv, enum at91_reg reg)
}
static inline void at91_write(const struct at91_priv *priv, enum at91_reg reg,
- u32 value)
+ u32 value)
{
writel_relaxed(value, priv->reg_base + reg);
}
static inline void set_mb_mode_prio(const struct at91_priv *priv,
- unsigned int mb, enum at91_mb_mode mode, int prio)
+ unsigned int mb, enum at91_mb_mode mode,
+ int prio)
{
at91_write(priv, AT91_MMR(mb), (mode << 24) | (prio << 16));
}
static inline void set_mb_mode(const struct at91_priv *priv, unsigned int mb,
- enum at91_mb_mode mode)
+ enum at91_mb_mode mode)
{
set_mb_mode_prio(priv, mb, mode, 0);
}
@@ -316,8 +317,7 @@ static void at91_setup_mailboxes(struct net_device *dev)
unsigned int i;
u32 reg_mid;
- /*
- * Due to a chip bug (errata 50.2.6.3 & 50.3.5.3) the first
+ /* Due to a chip bug (errata 50.2.6.3 & 50.3.5.3) the first
* mailbox is disabled. The next 11 mailboxes are used as a
* reception FIFO. The last mailbox is configured with
* overwrite option. The overwrite flag indicates a FIFO
@@ -368,7 +368,7 @@ static int at91_set_bittiming(struct net_device *dev)
}
static int at91_get_berr_counter(const struct net_device *dev,
- struct can_berr_counter *bec)
+ struct can_berr_counter *bec)
{
const struct at91_priv *priv = netdev_priv(dev);
u32 reg_ecr = at91_read(priv, AT91_ECR);
@@ -423,8 +423,7 @@ static void at91_chip_stop(struct net_device *dev, enum can_state state)
priv->can.state = state;
}
-/*
- * theory of operation:
+/* theory of operation:
*
* According to the datasheet priority 0 is the highest priority, 15
* is the lowest. If two mailboxes have the same priority level the
@@ -486,8 +485,7 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev)
/* _NOTE_: subtract AT91_MB_TX_FIRST offset from mb! */
can_put_echo_skb(skb, dev, mb - get_mb_tx_first(priv), 0);
- /*
- * we have to stop the queue and deliver all messages in case
+ /* we have to stop the queue and deliver all messages in case
* of a prio+mb counter wrap around. This is the case if
* tx_next buffer prio and mailbox equals 0.
*
@@ -515,6 +513,7 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev)
static inline void at91_activate_rx_low(const struct at91_priv *priv)
{
u32 mask = get_mb_rx_low_mask(priv);
+
at91_write(priv, AT91_TCR, mask);
}
@@ -526,9 +525,10 @@ static inline void at91_activate_rx_low(const struct at91_priv *priv)
* Reenables given mailbox for reception of new CAN messages
*/
static inline void at91_activate_rx_mb(const struct at91_priv *priv,
- unsigned int mb)
+ unsigned int mb)
{
u32 mask = 1 << mb;
+
at91_write(priv, AT91_TCR, mask);
}
@@ -568,7 +568,7 @@ static void at91_rx_overflow_err(struct net_device *dev)
* given can frame. "mb" and "cf" must be valid.
*/
static void at91_read_mb(struct net_device *dev, unsigned int mb,
- struct can_frame *cf)
+ struct can_frame *cf)
{
const struct at91_priv *priv = netdev_priv(dev);
u32 reg_msr, reg_mid;
@@ -582,9 +582,9 @@ static void at91_read_mb(struct net_device *dev, unsigned int mb,
reg_msr = at91_read(priv, AT91_MSR(mb));
cf->len = can_cc_dlc2len((reg_msr >> 16) & 0xf);
- if (reg_msr & AT91_MSR_MRTR)
+ if (reg_msr & AT91_MSR_MRTR) {
cf->can_id |= CAN_RTR_FLAG;
- else {
+ } else {
*(u32 *)(cf->data + 0) = at91_read(priv, AT91_MDL(mb));
*(u32 *)(cf->data + 4) = at91_read(priv, AT91_MDH(mb));
}
@@ -685,7 +685,7 @@ static int at91_poll_rx(struct net_device *dev, int quota)
if (priv->rx_next > get_mb_rx_low_last(priv) &&
reg_sr & get_mb_rx_low_mask(priv))
netdev_info(dev,
- "order of incoming frames cannot be guaranteed\n");
+ "order of incoming frames cannot be guaranteed\n");
again:
for (mb = find_next_bit(addr, get_mb_tx_first(priv), priv->rx_next);
@@ -718,7 +718,7 @@ static int at91_poll_rx(struct net_device *dev, int quota)
}
static void at91_poll_err_frame(struct net_device *dev,
- struct can_frame *cf, u32 reg_sr)
+ struct can_frame *cf, u32 reg_sr)
{
struct at91_priv *priv = netdev_priv(dev);
@@ -796,8 +796,7 @@ static int at91_poll(struct napi_struct *napi, int quota)
if (reg_sr & get_irq_mb_rx(priv))
work_done += at91_poll_rx(dev, quota - work_done);
- /*
- * The error bits are clear on read,
+ /* The error bits are clear on read,
* so use saved value from irq handler.
*/
reg_sr |= priv->reg_sr;
@@ -807,6 +806,7 @@ static int at91_poll(struct napi_struct *napi, int quota)
if (work_done < quota) {
/* enable IRQs for frame errors and all mailboxes >= rx_next */
u32 reg_ier = AT91_IRQ_ERR_FRAME;
+
reg_ier |= get_irq_mb_rx(priv) & ~AT91_MB_MASK(priv->rx_next);
napi_complete_done(napi, work_done);
@@ -816,8 +816,7 @@ static int at91_poll(struct napi_struct *napi, int quota)
return work_done;
}
-/*
- * theory of operation:
+/* theory of operation:
*
* priv->tx_echo holds the number of the oldest can_frame put for
* transmission into the hardware, but not yet ACKed by the CAN tx
@@ -846,8 +845,7 @@ static void at91_irq_tx(struct net_device *dev, u32 reg_sr)
/* Disable irq for this TX mailbox */
at91_write(priv, AT91_IDR, 1 << mb);
- /*
- * only echo if mailbox signals us a transfer
+ /* only echo if mailbox signals us a transfer
* complete (MSR_MRDY). Otherwise it's a tansfer
* abort. "can_bus_off()" takes care about the skbs
* parked in the echo queue.
@@ -862,8 +860,7 @@ static void at91_irq_tx(struct net_device *dev, u32 reg_sr)
}
}
- /*
- * restart queue if we don't have a wrap around but restart if
+ /* restart queue if we don't have a wrap around but restart if
* we get a TX int for the last can frame directly before a
* wrap around.
*/
@@ -873,7 +870,7 @@ static void at91_irq_tx(struct net_device *dev, u32 reg_sr)
}
static void at91_irq_err_state(struct net_device *dev,
- struct can_frame *cf, enum can_state new_state)
+ struct can_frame *cf, enum can_state new_state)
{
struct at91_priv *priv = netdev_priv(dev);
u32 reg_idr = 0, reg_ier = 0;
@@ -883,8 +880,7 @@ static void at91_irq_err_state(struct net_device *dev,
switch (priv->can.state) {
case CAN_STATE_ERROR_ACTIVE:
- /*
- * from: ERROR_ACTIVE
+ /* from: ERROR_ACTIVE
* to : ERROR_WARNING, ERROR_PASSIVE, BUS_OFF
* => : there was a warning int
*/
@@ -900,8 +896,7 @@ static void at91_irq_err_state(struct net_device *dev,
}
fallthrough;
case CAN_STATE_ERROR_WARNING:
- /*
- * from: ERROR_ACTIVE, ERROR_WARNING
+ /* from: ERROR_ACTIVE, ERROR_WARNING
* to : ERROR_PASSIVE, BUS_OFF
* => : error passive int
*/
@@ -917,8 +912,7 @@ static void at91_irq_err_state(struct net_device *dev,
}
break;
case CAN_STATE_BUS_OFF:
- /*
- * from: BUS_OFF
+ /* from: BUS_OFF
* to : ERROR_ACTIVE, ERROR_WARNING, ERROR_PASSIVE
*/
if (new_state <= CAN_STATE_ERROR_PASSIVE) {
@@ -935,12 +929,10 @@ static void at91_irq_err_state(struct net_device *dev,
break;
}
-
/* process state changes depending on the new state */
switch (new_state) {
case CAN_STATE_ERROR_ACTIVE:
- /*
- * actually we want to enable AT91_IRQ_WARN here, but
+ /* actually we want to enable AT91_IRQ_WARN here, but
* it screws up the system under certain
* circumstances. so just enable AT91_IRQ_ERRP, thus
* the "fallthrough"
@@ -983,7 +975,7 @@ static void at91_irq_err_state(struct net_device *dev,
}
static int at91_get_state_by_bec(const struct net_device *dev,
- enum can_state *state)
+ enum can_state *state)
{
struct can_berr_counter bec;
int err;
@@ -1004,7 +996,6 @@ static int at91_get_state_by_bec(const struct net_device *dev,
return 0;
}
-
static void at91_irq_err(struct net_device *dev)
{
struct at91_priv *priv = netdev_priv(dev);
@@ -1018,15 +1009,15 @@ static void at91_irq_err(struct net_device *dev)
reg_sr = at91_read(priv, AT91_SR);
/* we need to look at the unmasked reg_sr */
- if (unlikely(reg_sr & AT91_IRQ_BOFF))
+ if (unlikely(reg_sr & AT91_IRQ_BOFF)) {
new_state = CAN_STATE_BUS_OFF;
- else if (unlikely(reg_sr & AT91_IRQ_ERRP))
+ } else if (unlikely(reg_sr & AT91_IRQ_ERRP)) {
new_state = CAN_STATE_ERROR_PASSIVE;
- else if (unlikely(reg_sr & AT91_IRQ_WARN))
+ } else if (unlikely(reg_sr & AT91_IRQ_WARN)) {
new_state = CAN_STATE_ERROR_WARNING;
- else if (likely(reg_sr & AT91_IRQ_ERRA))
+ } else if (likely(reg_sr & AT91_IRQ_ERRA)) {
new_state = CAN_STATE_ERROR_ACTIVE;
- else {
+ } else {
netdev_err(dev, "BUG! hardware in undefined state\n");
return;
}
@@ -1053,8 +1044,7 @@ static void at91_irq_err(struct net_device *dev)
priv->can.state = new_state;
}
-/*
- * interrupt handler
+/* interrupt handler
*/
static irqreturn_t at91_irq(int irq, void *dev_id)
{
@@ -1075,8 +1065,7 @@ static irqreturn_t at91_irq(int irq, void *dev_id)
/* Receive or error interrupt? -> napi */
if (reg_sr & (get_irq_mb_rx(priv) | AT91_IRQ_ERR_FRAME)) {
- /*
- * The error bits are clear on read,
+ /* The error bits are clear on read,
* save for later use.
*/
priv->reg_sr = reg_sr;
@@ -1133,8 +1122,7 @@ static int at91_open(struct net_device *dev)
return err;
}
-/*
- * stop CAN bus activity
+/* stop CAN bus activity
*/
static int at91_close(struct net_device *dev)
{
@@ -1176,8 +1164,8 @@ static const struct net_device_ops at91_netdev_ops = {
.ndo_change_mtu = can_change_mtu,
};
-static ssize_t at91_sysfs_show_mb0_id(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t mb0_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct at91_priv *priv = netdev_priv(to_net_dev(dev));
@@ -1187,8 +1175,9 @@ static ssize_t at91_sysfs_show_mb0_id(struct device *dev,
return snprintf(buf, PAGE_SIZE, "0x%03x\n", priv->mb0_id);
}
-static ssize_t at91_sysfs_set_mb0_id(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
+static ssize_t mb0_id_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct net_device *ndev = to_net_dev(dev);
struct at91_priv *priv = netdev_priv(ndev);
@@ -1222,7 +1211,7 @@ static ssize_t at91_sysfs_set_mb0_id(struct device *dev,
return ret;
}
-static DEVICE_ATTR(mb0_id, 0644, at91_sysfs_show_mb0_id, at91_sysfs_set_mb0_id);
+static DEVICE_ATTR_RW(mb0_id);
static struct attribute *at91_sysfs_attrs[] = {
&dev_attr_mb0_id.attr,
diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h
index 4247ff80a29c..08b6efa7a1a7 100644
--- a/drivers/net/can/c_can/c_can.h
+++ b/drivers/net/can/c_can/c_can.h
@@ -176,6 +176,13 @@ struct c_can_raminit {
bool needs_pulse;
};
+/* c_can tx ring structure */
+struct c_can_tx_ring {
+ unsigned int head;
+ unsigned int tail;
+ unsigned int obj_num;
+};
+
/* c_can private data structure */
struct c_can_priv {
struct can_priv can; /* must be the first member */
@@ -190,17 +197,16 @@ struct c_can_priv {
unsigned int msg_obj_tx_first;
unsigned int msg_obj_tx_last;
u32 msg_obj_rx_mask;
- atomic_t tx_active;
atomic_t sie_pending;
unsigned long tx_dir;
int last_status;
+ struct c_can_tx_ring tx;
u16 (*read_reg)(const struct c_can_priv *priv, enum reg index);
void (*write_reg)(const struct c_can_priv *priv, enum reg index, u16 val);
u32 (*read_reg32)(const struct c_can_priv *priv, enum reg index);
void (*write_reg32)(const struct c_can_priv *priv, enum reg index, u32 val);
void __iomem *base;
const u16 *regs;
- void *priv; /* for board-specific data */
enum c_can_dev_id type;
struct c_can_raminit raminit_sys; /* RAMINIT via syscon regmap */
void (*raminit)(const struct c_can_priv *priv, bool enable);
@@ -220,4 +226,19 @@ int c_can_power_down(struct net_device *dev);
void c_can_set_ethtool_ops(struct net_device *dev);
+static inline u8 c_can_get_tx_head(const struct c_can_tx_ring *ring)
+{
+ return ring->head & (ring->obj_num - 1);
+}
+
+static inline u8 c_can_get_tx_tail(const struct c_can_tx_ring *ring)
+{
+ return ring->tail & (ring->obj_num - 1);
+}
+
+static inline u8 c_can_get_tx_free(const struct c_can_tx_ring *ring)
+{
+ return ring->obj_num - (ring->head - ring->tail);
+}
+
#endif /* C_CAN_H */
diff --git a/drivers/net/can/c_can/c_can_main.c b/drivers/net/can/c_can/c_can_main.c
index 7588f70ca0fe..52671d1ea17d 100644
--- a/drivers/net/can/c_can/c_can_main.c
+++ b/drivers/net/can/c_can/c_can_main.c
@@ -160,8 +160,8 @@
#define IF_MCONT_TX (IF_MCONT_TXIE | IF_MCONT_EOB)
-/* Use IF1 for RX and IF2 for TX */
-#define IF_RX 0
+/* Use IF1 in NAPI path and IF2 in TX path */
+#define IF_NAPI 0
#define IF_TX 1
/* minimum timeout for checking BUSY status */
@@ -427,24 +427,51 @@ static void c_can_setup_receive_object(struct net_device *dev, int iface,
c_can_object_put(dev, iface, obj, IF_COMM_RCV_SETUP);
}
+static bool c_can_tx_busy(const struct c_can_priv *priv,
+ const struct c_can_tx_ring *tx_ring)
+{
+ if (c_can_get_tx_free(tx_ring) > 0)
+ return false;
+
+ netif_stop_queue(priv->dev);
+
+ /* Memory barrier before checking tx_free (head and tail) */
+ smp_mb();
+
+ if (c_can_get_tx_free(tx_ring) == 0) {
+ netdev_dbg(priv->dev,
+ "Stopping tx-queue (tx_head=0x%08x, tx_tail=0x%08x, len=%d).\n",
+ tx_ring->head, tx_ring->tail,
+ tx_ring->head - tx_ring->tail);
+ return true;
+ }
+
+ netif_start_queue(priv->dev);
+ return false;
+}
+
static netdev_tx_t c_can_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct can_frame *frame = (struct can_frame *)skb->data;
struct c_can_priv *priv = netdev_priv(dev);
- u32 idx, obj;
+ struct c_can_tx_ring *tx_ring = &priv->tx;
+ u32 idx, obj, cmd = IF_COMM_TX;
if (can_dropped_invalid_skb(dev, skb))
return NETDEV_TX_OK;
- /* This is not a FIFO. C/D_CAN sends out the buffers
- * prioritized. The lowest buffer number wins.
- */
- idx = fls(atomic_read(&priv->tx_active));
- obj = idx + priv->msg_obj_tx_first;
- /* If this is the last buffer, stop the xmit queue */
- if (idx == priv->msg_obj_tx_num - 1)
+ if (c_can_tx_busy(priv, tx_ring))
+ return NETDEV_TX_BUSY;
+
+ idx = c_can_get_tx_head(tx_ring);
+ tx_ring->head++;
+ if (c_can_get_tx_free(tx_ring) == 0)
netif_stop_queue(dev);
+
+ if (idx < c_can_get_tx_tail(tx_ring))
+ cmd &= ~IF_COMM_TXRQST; /* Cache the message */
+
/* Store the message in the interface so we can call
* can_put_echo_skb(). We must do this before we enable
* transmit as we might race against do_tx().
@@ -452,11 +479,8 @@ static netdev_tx_t c_can_start_xmit(struct sk_buff *skb,
c_can_setup_tx_object(dev, IF_TX, frame, idx);
priv->dlc[idx] = frame->len;
can_put_echo_skb(skb, dev, idx, 0);
-
- /* Update the active bits */
- atomic_add(BIT(idx), &priv->tx_active);
- /* Start transmission */
- c_can_object_put(dev, IF_TX, obj, IF_COMM_TX);
+ obj = idx + priv->msg_obj_tx_first;
+ c_can_object_put(dev, IF_TX, obj, cmd);
return NETDEV_TX_OK;
}
@@ -529,13 +553,13 @@ static void c_can_configure_msg_objects(struct net_device *dev)
/* first invalidate all message objects */
for (i = priv->msg_obj_rx_first; i <= priv->msg_obj_num; i++)
- c_can_inval_msg_object(dev, IF_RX, i);
+ c_can_inval_msg_object(dev, IF_NAPI, i);
/* setup receive message objects */
for (i = priv->msg_obj_rx_first; i < priv->msg_obj_rx_last; i++)
- c_can_setup_receive_object(dev, IF_RX, i, 0, 0, IF_MCONT_RCV);
+ c_can_setup_receive_object(dev, IF_NAPI, i, 0, 0, IF_MCONT_RCV);
- c_can_setup_receive_object(dev, IF_RX, priv->msg_obj_rx_last, 0, 0,
+ c_can_setup_receive_object(dev, IF_NAPI, priv->msg_obj_rx_last, 0, 0,
IF_MCONT_RCV_EOB);
}
@@ -567,6 +591,7 @@ static int c_can_software_reset(struct net_device *dev)
static int c_can_chip_config(struct net_device *dev)
{
struct c_can_priv *priv = netdev_priv(dev);
+ struct c_can_tx_ring *tx_ring = &priv->tx;
int err;
err = c_can_software_reset(dev);
@@ -598,7 +623,8 @@ static int c_can_chip_config(struct net_device *dev)
priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED);
/* Clear all internal status */
- atomic_set(&priv->tx_active, 0);
+ tx_ring->head = 0;
+ tx_ring->tail = 0;
priv->tx_dir = 0;
/* set bittiming params */
@@ -696,40 +722,57 @@ static int c_can_get_berr_counter(const struct net_device *dev,
static void c_can_do_tx(struct net_device *dev)
{
struct c_can_priv *priv = netdev_priv(dev);
+ struct c_can_tx_ring *tx_ring = &priv->tx;
struct net_device_stats *stats = &dev->stats;
- u32 idx, obj, pkts = 0, bytes = 0, pend, clr;
+ u32 idx, obj, pkts = 0, bytes = 0, pend;
+ u8 tail;
if (priv->msg_obj_tx_last > 32)
pend = priv->read_reg32(priv, C_CAN_INTPND3_REG);
else
pend = priv->read_reg(priv, C_CAN_INTPND2_REG);
- clr = pend;
while ((idx = ffs(pend))) {
idx--;
pend &= ~BIT(idx);
obj = idx + priv->msg_obj_tx_first;
- /* We use IF_RX interface instead of IF_TX because we
+ /* We use IF_NAPI interface instead of IF_TX because we
* are called from c_can_poll(), which runs inside
- * NAPI. We are not trasmitting.
+ * NAPI. We are not transmitting.
*/
- c_can_inval_tx_object(dev, IF_RX, obj);
+ c_can_inval_tx_object(dev, IF_NAPI, obj);
can_get_echo_skb(dev, idx, NULL);
bytes += priv->dlc[idx];
pkts++;
}
- /* Clear the bits in the tx_active mask */
- atomic_sub(clr, &priv->tx_active);
+ if (!pkts)
+ return;
- if (clr & BIT(priv->msg_obj_tx_num - 1))
- netif_wake_queue(dev);
+ tx_ring->tail += pkts;
+ if (c_can_get_tx_free(tx_ring)) {
+ /* Make sure that anybody stopping the queue after
+ * this sees the new tx_ring->tail.
+ */
+ smp_mb();
+ netif_wake_queue(priv->dev);
+ }
- if (pkts) {
- stats->tx_bytes += bytes;
- stats->tx_packets += pkts;
- can_led_event(dev, CAN_LED_EVENT_TX);
+ stats->tx_bytes += bytes;
+ stats->tx_packets += pkts;
+ can_led_event(dev, CAN_LED_EVENT_TX);
+
+ tail = c_can_get_tx_tail(tx_ring);
+
+ if (tail == 0) {
+ u8 head = c_can_get_tx_head(tx_ring);
+
+ /* Start transmission for all cached messages */
+ for (idx = tail; idx < head; idx++) {
+ obj = idx + priv->msg_obj_tx_first;
+ c_can_object_put(dev, IF_NAPI, obj, IF_COMM_TXRQST);
+ }
}
}
@@ -766,14 +809,14 @@ static u32 c_can_adjust_pending(u32 pend, u32 rx_mask)
static inline void c_can_rx_object_get(struct net_device *dev,
struct c_can_priv *priv, u32 obj)
{
- c_can_object_get(dev, IF_RX, obj, priv->comm_rcv_high);
+ c_can_object_get(dev, IF_NAPI, obj, priv->comm_rcv_high);
}
static inline void c_can_rx_finalize(struct net_device *dev,
struct c_can_priv *priv, u32 obj)
{
if (priv->type != BOSCH_D_CAN)
- c_can_object_get(dev, IF_RX, obj, IF_COMM_CLR_NEWDAT);
+ c_can_object_get(dev, IF_NAPI, obj, IF_COMM_CLR_NEWDAT);
}
static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
@@ -785,10 +828,12 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
pend &= ~BIT(obj - 1);
c_can_rx_object_get(dev, priv, obj);
- ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_RX));
+ ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_NAPI));
if (ctrl & IF_MCONT_MSGLST) {
- int n = c_can_handle_lost_msg_obj(dev, IF_RX, obj, ctrl);
+ int n;
+
+ n = c_can_handle_lost_msg_obj(dev, IF_NAPI, obj, ctrl);
pkts += n;
quota -= n;
@@ -803,7 +848,7 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
continue;
/* read the data from the message object */
- c_can_read_msg_object(dev, IF_RX, ctrl);
+ c_can_read_msg_object(dev, IF_NAPI, ctrl);
c_can_rx_finalize(dev, priv, obj);
@@ -1205,6 +1250,10 @@ struct net_device *alloc_c_can_dev(int msg_obj_num)
priv->msg_obj_tx_last =
priv->msg_obj_tx_first + priv->msg_obj_tx_num - 1;
+ priv->tx.head = 0;
+ priv->tx.tail = 0;
+ priv->tx.obj_num = msg_obj_tx_num;
+
netif_napi_add(dev, &priv->napi, c_can_poll, priv->msg_obj_rx_num);
priv->dev = dev;
diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c
index 36950363682f..86e95e9d6533 100644
--- a/drivers/net/can/c_can/c_can_platform.c
+++ b/drivers/net/can/c_can/c_can_platform.c
@@ -385,7 +385,6 @@ static int c_can_plat_probe(struct platform_device *pdev)
priv->base = addr;
priv->device = &pdev->dev;
priv->can.clock.freq = clk_get_rate(clk);
- priv->priv = clk;
priv->type = drvdata->id;
platform_set_drvdata(pdev, dev);
diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c
index 311d8564d611..e3d840b81357 100644
--- a/drivers/net/can/dev/dev.c
+++ b/drivers/net/can/dev/dev.c
@@ -15,6 +15,7 @@
#include <linux/can/dev.h>
#include <linux/can/skb.h>
#include <linux/can/led.h>
+#include <linux/gpio/consumer.h>
#include <linux/of.h>
#define MOD_DESC "CAN device driver interface"
@@ -400,10 +401,69 @@ void close_candev(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(close_candev);
+static int can_set_termination(struct net_device *ndev, u16 term)
+{
+ struct can_priv *priv = netdev_priv(ndev);
+ int set;
+
+ if (term == priv->termination_gpio_ohms[CAN_TERMINATION_GPIO_ENABLED])
+ set = 1;
+ else
+ set = 0;
+
+ gpiod_set_value(priv->termination_gpio, set);
+
+ return 0;
+}
+
+static int can_get_termination(struct net_device *ndev)
+{
+ struct can_priv *priv = netdev_priv(ndev);
+ struct device *dev = ndev->dev.parent;
+ struct gpio_desc *gpio;
+ u32 term;
+ int ret;
+
+ /* Disabling termination by default is the safe choice: Else if many
+ * bus participants enable it, no communication is possible at all.
+ */
+ gpio = devm_gpiod_get_optional(dev, "termination", GPIOD_OUT_LOW);
+ if (IS_ERR(gpio))
+ return dev_err_probe(dev, PTR_ERR(gpio),
+ "Cannot get termination-gpios\n");
+
+ if (!gpio)
+ return 0;
+
+ ret = device_property_read_u32(dev, "termination-ohms", &term);
+ if (ret) {
+ netdev_err(ndev, "Cannot get termination-ohms: %pe\n",
+ ERR_PTR(ret));
+ return ret;
+ }
+
+ if (term > U16_MAX) {
+ netdev_err(ndev, "Invalid termination-ohms value (%u > %u)\n",
+ term, U16_MAX);
+ return -EINVAL;
+ }
+
+ priv->termination_const_cnt = ARRAY_SIZE(priv->termination_gpio_ohms);
+ priv->termination_const = priv->termination_gpio_ohms;
+ priv->termination_gpio = gpio;
+ priv->termination_gpio_ohms[CAN_TERMINATION_GPIO_DISABLED] =
+ CAN_TERMINATION_DISABLED;
+ priv->termination_gpio_ohms[CAN_TERMINATION_GPIO_ENABLED] = term;
+ priv->do_set_termination = can_set_termination;
+
+ return 0;
+}
+
/* Register the CAN network device */
int register_candev(struct net_device *dev)
{
struct can_priv *priv = netdev_priv(dev);
+ int err;
/* Ensure termination_const, termination_const_cnt and
* do_set_termination consistency. All must be either set or
@@ -419,6 +479,12 @@ int register_candev(struct net_device *dev)
if (!priv->data_bitrate_const != !priv->data_bitrate_const_cnt)
return -EINVAL;
+ if (!priv->termination_const) {
+ err = can_get_termination(dev);
+ if (err)
+ return err;
+ }
+
dev->rtnl_link_ops = &can_link_ops;
netif_carrier_off(dev);
diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c
index e38c2566aff4..80425636049d 100644
--- a/drivers/net/can/dev/netlink.c
+++ b/drivers/net/can/dev/netlink.c
@@ -47,7 +47,7 @@ static int can_validate(struct nlattr *tb[], struct nlattr *data[],
}
if (data[IFLA_CAN_DATA_BITTIMING]) {
- if (!is_can_fd || !data[IFLA_CAN_BITTIMING])
+ if (!is_can_fd)
return -EOPNOTSUPP;
}
@@ -116,7 +116,7 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[],
maskedflags = cm->flags & cm->mask;
/* check whether provided bits are allowed to be passed */
- if (cm->mask & ~(priv->ctrlmode_supported | ctrlstatic))
+ if (maskedflags & ~(priv->ctrlmode_supported | ctrlstatic))
return -EOPNOTSUPP;
/* do not check for static fd-non-iso if 'fd' is disabled */
@@ -132,10 +132,13 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[],
priv->ctrlmode |= maskedflags;
/* CAN_CTRLMODE_FD can only be set when driver supports FD */
- if (priv->ctrlmode & CAN_CTRLMODE_FD)
+ if (priv->ctrlmode & CAN_CTRLMODE_FD) {
dev->mtu = CANFD_MTU;
- else
+ } else {
dev->mtu = CAN_MTU;
+ memset(&priv->data_bittiming, 0,
+ sizeof(priv->data_bittiming));
+ }
}
if (data[IFLA_CAN_RESTART_MS]) {
diff --git a/drivers/net/can/dev/rx-offload.c b/drivers/net/can/dev/rx-offload.c
index ab2c1543786c..37b0cc65237b 100644
--- a/drivers/net/can/dev/rx-offload.c
+++ b/drivers/net/can/dev/rx-offload.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2014 Protonic Holland,
* David Jander
- * Copyright (C) 2014-2017 Pengutronix,
+ * Copyright (C) 2014-2021 Pengutronix,
* Marc Kleine-Budde <kernel@pengutronix.de>
*/
@@ -174,10 +174,8 @@ can_rx_offload_offload_one(struct can_rx_offload *offload, unsigned int n)
int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload,
u64 pending)
{
- struct sk_buff_head skb_queue;
unsigned int i;
-
- __skb_queue_head_init(&skb_queue);
+ int received = 0;
for (i = offload->mb_first;
can_rx_offload_le(offload, i, offload->mb_last);
@@ -191,26 +189,12 @@ int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload,
if (IS_ERR_OR_NULL(skb))
continue;
- __skb_queue_add_sort(&skb_queue, skb, can_rx_offload_compare);
- }
-
- if (!skb_queue_empty(&skb_queue)) {
- unsigned long flags;
- u32 queue_len;
-
- spin_lock_irqsave(&offload->skb_queue.lock, flags);
- skb_queue_splice_tail(&skb_queue, &offload->skb_queue);
- spin_unlock_irqrestore(&offload->skb_queue.lock, flags);
-
- queue_len = skb_queue_len(&offload->skb_queue);
- if (queue_len > offload->skb_queue_len_max / 8)
- netdev_dbg(offload->dev, "%s: queue_len=%d\n",
- __func__, queue_len);
-
- can_rx_offload_schedule(offload);
+ __skb_queue_add_sort(&offload->skb_irq_queue, skb,
+ can_rx_offload_compare);
+ received++;
}
- return skb_queue_len(&skb_queue);
+ return received;
}
EXPORT_SYMBOL_GPL(can_rx_offload_irq_offload_timestamp);
@@ -226,13 +210,10 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload)
if (!skb)
break;
- skb_queue_tail(&offload->skb_queue, skb);
+ __skb_queue_tail(&offload->skb_irq_queue, skb);
received++;
}
- if (received)
- can_rx_offload_schedule(offload);
-
return received;
}
EXPORT_SYMBOL_GPL(can_rx_offload_irq_offload_fifo);
@@ -241,7 +222,6 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
struct sk_buff *skb, u32 timestamp)
{
struct can_rx_offload_cb *cb;
- unsigned long flags;
if (skb_queue_len(&offload->skb_queue) >
offload->skb_queue_len_max) {
@@ -252,11 +232,8 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
cb = can_rx_offload_get_cb(skb);
cb->timestamp = timestamp;
- spin_lock_irqsave(&offload->skb_queue.lock, flags);
- __skb_queue_add_sort(&offload->skb_queue, skb, can_rx_offload_compare);
- spin_unlock_irqrestore(&offload->skb_queue.lock, flags);
-
- can_rx_offload_schedule(offload);
+ __skb_queue_add_sort(&offload->skb_irq_queue, skb,
+ can_rx_offload_compare);
return 0;
}
@@ -295,13 +272,56 @@ int can_rx_offload_queue_tail(struct can_rx_offload *offload,
return -ENOBUFS;
}
- skb_queue_tail(&offload->skb_queue, skb);
- can_rx_offload_schedule(offload);
+ __skb_queue_tail(&offload->skb_irq_queue, skb);
return 0;
}
EXPORT_SYMBOL_GPL(can_rx_offload_queue_tail);
+void can_rx_offload_irq_finish(struct can_rx_offload *offload)
+{
+ unsigned long flags;
+ int queue_len;
+
+ if (skb_queue_empty_lockless(&offload->skb_irq_queue))
+ return;
+
+ spin_lock_irqsave(&offload->skb_queue.lock, flags);
+ skb_queue_splice_tail_init(&offload->skb_irq_queue, &offload->skb_queue);
+ spin_unlock_irqrestore(&offload->skb_queue.lock, flags);
+
+ queue_len = skb_queue_len(&offload->skb_queue);
+ if (queue_len > offload->skb_queue_len_max / 8)
+ netdev_dbg(offload->dev, "%s: queue_len=%d\n",
+ __func__, queue_len);
+
+ napi_schedule(&offload->napi);
+}
+EXPORT_SYMBOL_GPL(can_rx_offload_irq_finish);
+
+void can_rx_offload_threaded_irq_finish(struct can_rx_offload *offload)
+{
+ unsigned long flags;
+ int queue_len;
+
+ if (skb_queue_empty_lockless(&offload->skb_irq_queue))
+ return;
+
+ spin_lock_irqsave(&offload->skb_queue.lock, flags);
+ skb_queue_splice_tail_init(&offload->skb_irq_queue, &offload->skb_queue);
+ spin_unlock_irqrestore(&offload->skb_queue.lock, flags);
+
+ queue_len = skb_queue_len(&offload->skb_queue);
+ if (queue_len > offload->skb_queue_len_max / 8)
+ netdev_dbg(offload->dev, "%s: queue_len=%d\n",
+ __func__, queue_len);
+
+ local_bh_disable();
+ napi_schedule(&offload->napi);
+ local_bh_enable();
+}
+EXPORT_SYMBOL_GPL(can_rx_offload_threaded_irq_finish);
+
static int can_rx_offload_init_queue(struct net_device *dev,
struct can_rx_offload *offload,
unsigned int weight)
@@ -312,6 +332,7 @@ static int can_rx_offload_init_queue(struct net_device *dev,
offload->skb_queue_len_max = 2 << fls(weight);
offload->skb_queue_len_max *= 4;
skb_queue_head_init(&offload->skb_queue);
+ __skb_queue_head_init(&offload->skb_irq_queue);
netif_napi_add(dev, &offload->napi, can_rx_offload_napi_poll, weight);
@@ -373,5 +394,6 @@ void can_rx_offload_del(struct can_rx_offload *offload)
{
netif_napi_del(&offload->napi);
skb_queue_purge(&offload->skb_queue);
+ __skb_queue_purge(&offload->skb_irq_queue);
}
EXPORT_SYMBOL_GPL(can_rx_offload_del);
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 57f3635ad8d7..7734229aa078 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -28,6 +28,7 @@
#include <linux/of_device.h>
#include <linux/pinctrl/consumer.h>
#include <linux/platform_device.h>
+#include <linux/can/platform/flexcan.h>
#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
@@ -208,18 +209,19 @@
/* FLEXCAN hardware feature flags
*
* Below is some version info we got:
- * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR rece- FD Mode
+ * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR rece- FD Mode MB
* Filter? connected? Passive detection ption in MB Supported?
- * MX25 FlexCAN2 03.00.00.00 no no no no no no
- * MX28 FlexCAN2 03.00.04.00 yes yes no no no no
- * MX35 FlexCAN2 03.00.00.00 no no no no no no
- * MX53 FlexCAN2 03.00.00.00 yes no no no no no
- * MX6s FlexCAN3 10.00.12.00 yes yes no no yes no
- * MX8QM FlexCAN3 03.00.23.00 yes yes no no yes yes
- * MX8MP FlexCAN3 03.00.17.01 yes yes no yes yes yes
- * VF610 FlexCAN3 ? no yes no yes yes? no
- * LS1021A FlexCAN2 03.00.04.00 no yes no no yes no
- * LX2160A FlexCAN3 03.00.23.00 no yes no yes yes yes
+ * MCF5441X FlexCAN2 ? no yes no no yes no 16
+ * MX25 FlexCAN2 03.00.00.00 no no no no no no 64
+ * MX28 FlexCAN2 03.00.04.00 yes yes no no no no 64
+ * MX35 FlexCAN2 03.00.00.00 no no no no no no 64
+ * MX53 FlexCAN2 03.00.00.00 yes no no no no no 64
+ * MX6s FlexCAN3 10.00.12.00 yes yes no no yes no 64
+ * MX8QM FlexCAN3 03.00.23.00 yes yes no no yes yes 64
+ * MX8MP FlexCAN3 03.00.17.01 yes yes no yes yes yes 64
+ * VF610 FlexCAN3 ? no yes no yes yes? no 64
+ * LS1021A FlexCAN2 03.00.04.00 no yes no no yes no 64
+ * LX2160A FlexCAN3 03.00.23.00 no yes no yes yes yes 64
*
* Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected.
*/
@@ -246,6 +248,10 @@
#define FLEXCAN_QUIRK_SUPPORT_ECC BIT(10)
/* Setup stop mode with SCU firmware to support wakeup */
#define FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW BIT(11)
+/* Setup 3 separate interrupts, main, boff and err */
+#define FLEXCAN_QUIRK_NR_IRQ_3 BIT(12)
+/* Setup 16 mailboxes */
+#define FLEXCAN_QUIRK_NR_MB_16 BIT(13)
/* Structure of the message buffer */
struct flexcan_mb {
@@ -363,6 +369,9 @@ struct flexcan_priv {
struct regulator *reg_xceiver;
struct flexcan_stop_mode stm;
+ int irq_boff;
+ int irq_err;
+
/* IPC handle when setup stop mode by System Controller firmware(scfw) */
struct imx_sc_ipc *sc_ipc_handle;
@@ -371,6 +380,11 @@ struct flexcan_priv {
void (*write)(u32 val, void __iomem *addr);
};
+static const struct flexcan_devtype_data fsl_mcf5441x_devtype_data = {
+ .quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+ FLEXCAN_QUIRK_NR_IRQ_3 | FLEXCAN_QUIRK_NR_MB_16,
+};
+
static const struct flexcan_devtype_data fsl_p1010_devtype_data = {
.quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE |
FLEXCAN_QUIRK_BROKEN_PERR_STATE |
@@ -635,15 +649,19 @@ static inline void flexcan_error_irq_disable(const struct flexcan_priv *priv)
static int flexcan_clks_enable(const struct flexcan_priv *priv)
{
- int err;
+ int err = 0;
- err = clk_prepare_enable(priv->clk_ipg);
- if (err)
- return err;
+ if (priv->clk_ipg) {
+ err = clk_prepare_enable(priv->clk_ipg);
+ if (err)
+ return err;
+ }
- err = clk_prepare_enable(priv->clk_per);
- if (err)
- clk_disable_unprepare(priv->clk_ipg);
+ if (priv->clk_per) {
+ err = clk_prepare_enable(priv->clk_per);
+ if (err)
+ clk_disable_unprepare(priv->clk_ipg);
+ }
return err;
}
@@ -1198,6 +1216,9 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
}
}
+ if (handled)
+ can_rx_offload_irq_finish(&priv->offload);
+
return handled;
}
@@ -1401,8 +1422,12 @@ static int flexcan_rx_offload_setup(struct net_device *dev)
priv->mb_size = sizeof(struct flexcan_mb) + CANFD_MAX_DLEN;
else
priv->mb_size = sizeof(struct flexcan_mb) + CAN_MAX_DLEN;
- priv->mb_count = (sizeof(priv->regs->mb[0]) / priv->mb_size) +
- (sizeof(priv->regs->mb[1]) / priv->mb_size);
+
+ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_MB_16)
+ priv->mb_count = 16;
+ else
+ priv->mb_count = (sizeof(priv->regs->mb[0]) / priv->mb_size) +
+ (sizeof(priv->regs->mb[1]) / priv->mb_size);
if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)
priv->tx_mb_reserved =
@@ -1774,6 +1799,18 @@ static int flexcan_open(struct net_device *dev)
if (err)
goto out_can_rx_offload_disable;
+ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
+ err = request_irq(priv->irq_boff,
+ flexcan_irq, IRQF_SHARED, dev->name, dev);
+ if (err)
+ goto out_free_irq;
+
+ err = request_irq(priv->irq_err,
+ flexcan_irq, IRQF_SHARED, dev->name, dev);
+ if (err)
+ goto out_free_irq_boff;
+ }
+
flexcan_chip_interrupts_enable(dev);
can_led_event(dev, CAN_LED_EVENT_OPEN);
@@ -1782,6 +1819,10 @@ static int flexcan_open(struct net_device *dev)
return 0;
+ out_free_irq_boff:
+ free_irq(priv->irq_boff, dev);
+ out_free_irq:
+ free_irq(dev->irq, dev);
out_can_rx_offload_disable:
can_rx_offload_disable(&priv->offload);
flexcan_chip_stop(dev);
@@ -1803,6 +1844,12 @@ static int flexcan_close(struct net_device *dev)
netif_stop_queue(dev);
flexcan_chip_interrupts_disable(dev);
+
+ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
+ free_irq(priv->irq_err, dev);
+ free_irq(priv->irq_boff, dev);
+ }
+
free_irq(dev->irq, dev);
can_rx_offload_disable(&priv->offload);
flexcan_chip_stop_disable_on_error(dev);
@@ -2039,14 +2086,26 @@ static const struct of_device_id flexcan_of_match[] = {
};
MODULE_DEVICE_TABLE(of, flexcan_of_match);
+static const struct platform_device_id flexcan_id_table[] = {
+ {
+ .name = "flexcan-mcf5441x",
+ .driver_data = (kernel_ulong_t)&fsl_mcf5441x_devtype_data,
+ }, {
+ /* sentinel */
+ },
+};
+MODULE_DEVICE_TABLE(platform, flexcan_id_table);
+
static int flexcan_probe(struct platform_device *pdev)
{
+ const struct of_device_id *of_id;
const struct flexcan_devtype_data *devtype_data;
struct net_device *dev;
struct flexcan_priv *priv;
struct regulator *reg_xceiver;
struct clk *clk_ipg = NULL, *clk_per = NULL;
struct flexcan_regs __iomem *regs;
+ struct flexcan_platform_data *pdata;
int err, irq;
u8 clk_src = 1;
u32 clock_freq = 0;
@@ -2064,6 +2123,12 @@ static int flexcan_probe(struct platform_device *pdev)
"clock-frequency", &clock_freq);
of_property_read_u8(pdev->dev.of_node,
"fsl,clk-source", &clk_src);
+ } else {
+ pdata = dev_get_platdata(&pdev->dev);
+ if (pdata) {
+ clock_freq = pdata->clock_frequency;
+ clk_src = pdata->clk_src;
+ }
}
if (!clock_freq) {
@@ -2089,7 +2154,14 @@ static int flexcan_probe(struct platform_device *pdev)
if (IS_ERR(regs))
return PTR_ERR(regs);
- devtype_data = of_device_get_match_data(&pdev->dev);
+ of_id = of_match_device(flexcan_of_match, &pdev->dev);
+ if (of_id)
+ devtype_data = of_id->data;
+ else if (platform_get_device_id(pdev)->driver_data)
+ devtype_data = (struct flexcan_devtype_data *)
+ platform_get_device_id(pdev)->driver_data;
+ else
+ return -ENODEV;
if ((devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_FD) &&
!(devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)) {
@@ -2133,6 +2205,19 @@ static int flexcan_probe(struct platform_device *pdev)
priv->devtype_data = devtype_data;
priv->reg_xceiver = reg_xceiver;
+ if (devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
+ priv->irq_boff = platform_get_irq(pdev, 1);
+ if (priv->irq_boff <= 0) {
+ err = -ENODEV;
+ goto failed_platform_get_irq;
+ }
+ priv->irq_err = platform_get_irq(pdev, 2);
+ if (priv->irq_err <= 0) {
+ err = -ENODEV;
+ goto failed_platform_get_irq;
+ }
+ }
+
if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_FD) {
priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD |
CAN_CTRLMODE_FD_NON_ISO;
@@ -2170,6 +2255,7 @@ static int flexcan_probe(struct platform_device *pdev)
failed_register:
pm_runtime_put_noidle(&pdev->dev);
pm_runtime_disable(&pdev->dev);
+ failed_platform_get_irq:
free_candev(dev);
return err;
}
@@ -2322,6 +2408,7 @@ static struct platform_driver flexcan_driver = {
},
.probe = flexcan_probe,
.remove = flexcan_remove,
+ .id_table = flexcan_id_table,
};
module_platform_driver(flexcan_driver);
diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c
index 2a6c918186c0..c68ad56628bd 100644
--- a/drivers/net/can/janz-ican3.c
+++ b/drivers/net/can/janz-ican3.c
@@ -1815,9 +1815,9 @@ static int ican3_get_berr_counter(const struct net_device *ndev,
* Sysfs Attributes
*/
-static ssize_t ican3_sysfs_show_term(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t termination_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
struct ican3_dev *mod = netdev_priv(to_net_dev(dev));
int ret;
@@ -1834,9 +1834,9 @@ static ssize_t ican3_sysfs_show_term(struct device *dev,
return snprintf(buf, PAGE_SIZE, "%u\n", mod->termination_enabled);
}
-static ssize_t ican3_sysfs_set_term(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t termination_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct ican3_dev *mod = netdev_priv(to_net_dev(dev));
unsigned long enable;
@@ -1852,18 +1852,17 @@ static ssize_t ican3_sysfs_set_term(struct device *dev,
return count;
}
-static ssize_t ican3_sysfs_show_fwinfo(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t fwinfo_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
struct ican3_dev *mod = netdev_priv(to_net_dev(dev));
return scnprintf(buf, PAGE_SIZE, "%s\n", mod->fwinfo);
}
-static DEVICE_ATTR(termination, 0644, ican3_sysfs_show_term,
- ican3_sysfs_set_term);
-static DEVICE_ATTR(fwinfo, 0444, ican3_sysfs_show_fwinfo, NULL);
+static DEVICE_ATTR_RW(termination);
+static DEVICE_ATTR_RO(fwinfo);
static struct attribute *ican3_sysfs_attrs[] = {
&dev_attr_termination.attr,
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index bba2a449ac70..2470c47b2e31 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -21,6 +21,7 @@
#include <linux/iopoll.h>
#include <linux/can/dev.h>
#include <linux/pinctrl/consumer.h>
+#include <linux/phy/phy.h>
#include "m_can.h"
@@ -278,7 +279,7 @@ enum m_can_reg {
/* Message RAM Elements */
#define M_CAN_FIFO_ID 0x0
#define M_CAN_FIFO_DLC 0x4
-#define M_CAN_FIFO_DATA(n) (0x8 + ((n) << 2))
+#define M_CAN_FIFO_DATA 0x8
/* Rx Buffer Element */
/* R0 */
@@ -308,6 +309,15 @@ enum m_can_reg {
#define TX_EVENT_MM_MASK GENMASK(31, 24)
#define TX_EVENT_TXTS_MASK GENMASK(15, 0)
+/* The ID and DLC registers are adjacent in M_CAN FIFO memory,
+ * and we can save a (potentially slow) bus round trip by combining
+ * reads and writes to them.
+ */
+struct id_and_dlc {
+ u32 id;
+ u32 dlc;
+};
+
static inline u32 m_can_read(struct m_can_classdev *cdev, enum m_can_reg reg)
{
return cdev->ops->read_reg(cdev, reg);
@@ -319,36 +329,39 @@ static inline void m_can_write(struct m_can_classdev *cdev, enum m_can_reg reg,
cdev->ops->write_reg(cdev, reg, val);
}
-static u32 m_can_fifo_read(struct m_can_classdev *cdev,
- u32 fgi, unsigned int offset)
+static int
+m_can_fifo_read(struct m_can_classdev *cdev,
+ u32 fgi, unsigned int offset, void *val, size_t val_count)
{
u32 addr_offset = cdev->mcfg[MRAM_RXF0].off + fgi * RXF0_ELEMENT_SIZE +
offset;
- return cdev->ops->read_fifo(cdev, addr_offset);
+ return cdev->ops->read_fifo(cdev, addr_offset, val, val_count);
}
-static void m_can_fifo_write(struct m_can_classdev *cdev,
- u32 fpi, unsigned int offset, u32 val)
+static int
+m_can_fifo_write(struct m_can_classdev *cdev,
+ u32 fpi, unsigned int offset, const void *val, size_t val_count)
{
u32 addr_offset = cdev->mcfg[MRAM_TXB].off + fpi * TXB_ELEMENT_SIZE +
offset;
- cdev->ops->write_fifo(cdev, addr_offset, val);
+ return cdev->ops->write_fifo(cdev, addr_offset, val, val_count);
}
-static inline void m_can_fifo_write_no_off(struct m_can_classdev *cdev,
- u32 fpi, u32 val)
+static inline int m_can_fifo_write_no_off(struct m_can_classdev *cdev,
+ u32 fpi, u32 val)
{
- cdev->ops->write_fifo(cdev, fpi, val);
+ return cdev->ops->write_fifo(cdev, fpi, &val, 1);
}
-static u32 m_can_txe_fifo_read(struct m_can_classdev *cdev, u32 fgi, u32 offset)
+static int
+m_can_txe_fifo_read(struct m_can_classdev *cdev, u32 fgi, u32 offset, u32 *val)
{
u32 addr_offset = cdev->mcfg[MRAM_TXE].off + fgi * TXE_ELEMENT_SIZE +
offset;
- return cdev->ops->read_fifo(cdev, addr_offset);
+ return cdev->ops->read_fifo(cdev, addr_offset, val, 1);
}
static inline bool m_can_tx_fifo_full(struct m_can_classdev *cdev)
@@ -436,7 +449,7 @@ static void m_can_clean(struct net_device *net)
* napi. For non-peripherals, RX is done in napi already, so push
* directly. timestamp is used to ensure good skb ordering in
* rx-offload and is ignored for non-peripherals.
-*/
+ */
static void m_can_receive_skb(struct m_can_classdev *cdev,
struct sk_buff *skb,
u32 timestamp)
@@ -454,54 +467,57 @@ static void m_can_receive_skb(struct m_can_classdev *cdev,
}
}
-static void m_can_read_fifo(struct net_device *dev, u32 rxfs)
+static int m_can_read_fifo(struct net_device *dev, u32 rxfs)
{
struct net_device_stats *stats = &dev->stats;
struct m_can_classdev *cdev = netdev_priv(dev);
struct canfd_frame *cf;
struct sk_buff *skb;
- u32 id, fgi, dlc;
+ struct id_and_dlc fifo_header;
+ u32 fgi;
u32 timestamp = 0;
- int i;
+ int err;
/* calculate the fifo get index for where to read data */
fgi = FIELD_GET(RXFS_FGI_MASK, rxfs);
- dlc = m_can_fifo_read(cdev, fgi, M_CAN_FIFO_DLC);
- if (dlc & RX_BUF_FDF)
+ err = m_can_fifo_read(cdev, fgi, M_CAN_FIFO_ID, &fifo_header, 2);
+ if (err)
+ goto out_fail;
+
+ if (fifo_header.dlc & RX_BUF_FDF)
skb = alloc_canfd_skb(dev, &cf);
else
skb = alloc_can_skb(dev, (struct can_frame **)&cf);
if (!skb) {
stats->rx_dropped++;
- return;
+ return 0;
}
- if (dlc & RX_BUF_FDF)
- cf->len = can_fd_dlc2len((dlc >> 16) & 0x0F);
+ if (fifo_header.dlc & RX_BUF_FDF)
+ cf->len = can_fd_dlc2len((fifo_header.dlc >> 16) & 0x0F);
else
- cf->len = can_cc_dlc2len((dlc >> 16) & 0x0F);
+ cf->len = can_cc_dlc2len((fifo_header.dlc >> 16) & 0x0F);
- id = m_can_fifo_read(cdev, fgi, M_CAN_FIFO_ID);
- if (id & RX_BUF_XTD)
- cf->can_id = (id & CAN_EFF_MASK) | CAN_EFF_FLAG;
+ if (fifo_header.id & RX_BUF_XTD)
+ cf->can_id = (fifo_header.id & CAN_EFF_MASK) | CAN_EFF_FLAG;
else
- cf->can_id = (id >> 18) & CAN_SFF_MASK;
+ cf->can_id = (fifo_header.id >> 18) & CAN_SFF_MASK;
- if (id & RX_BUF_ESI) {
+ if (fifo_header.id & RX_BUF_ESI) {
cf->flags |= CANFD_ESI;
netdev_dbg(dev, "ESI Error\n");
}
- if (!(dlc & RX_BUF_FDF) && (id & RX_BUF_RTR)) {
+ if (!(fifo_header.dlc & RX_BUF_FDF) && (fifo_header.id & RX_BUF_RTR)) {
cf->can_id |= CAN_RTR_FLAG;
} else {
- if (dlc & RX_BUF_BRS)
+ if (fifo_header.dlc & RX_BUF_BRS)
cf->flags |= CANFD_BRS;
- for (i = 0; i < cf->len; i += 4)
- *(u32 *)(cf->data + i) =
- m_can_fifo_read(cdev, fgi,
- M_CAN_FIFO_DATA(i / 4));
+ err = m_can_fifo_read(cdev, fgi, M_CAN_FIFO_DATA,
+ cf->data, DIV_ROUND_UP(cf->len, 4));
+ if (err)
+ goto out_fail;
}
/* acknowledge rx fifo 0 */
@@ -510,9 +526,15 @@ static void m_can_read_fifo(struct net_device *dev, u32 rxfs)
stats->rx_packets++;
stats->rx_bytes += cf->len;
- timestamp = FIELD_GET(RX_BUF_RXTS_MASK, dlc);
+ timestamp = FIELD_GET(RX_BUF_RXTS_MASK, fifo_header.dlc);
m_can_receive_skb(cdev, skb, timestamp);
+
+ return 0;
+
+out_fail:
+ netdev_err(dev, "FIFO read returned %d\n", err);
+ return err;
}
static int m_can_do_rx_poll(struct net_device *dev, int quota)
@@ -520,6 +542,7 @@ static int m_can_do_rx_poll(struct net_device *dev, int quota)
struct m_can_classdev *cdev = netdev_priv(dev);
u32 pkts = 0;
u32 rxfs;
+ int err;
rxfs = m_can_read(cdev, M_CAN_RXF0S);
if (!(rxfs & RXFS_FFL_MASK)) {
@@ -528,7 +551,9 @@ static int m_can_do_rx_poll(struct net_device *dev, int quota)
}
while ((rxfs & RXFS_FFL_MASK) && (quota > 0)) {
- m_can_read_fifo(dev, rxfs);
+ err = m_can_read_fifo(dev, rxfs);
+ if (err)
+ return err;
quota--;
pkts++;
@@ -874,6 +899,7 @@ static int m_can_handle_bus_errors(struct net_device *dev, u32 irqstatus,
static int m_can_rx_handler(struct net_device *dev, int quota)
{
struct m_can_classdev *cdev = netdev_priv(dev);
+ int rx_work_or_err;
int work_done = 0;
u32 irqstatus, psr;
@@ -910,8 +936,13 @@ static int m_can_rx_handler(struct net_device *dev, int quota)
if (irqstatus & IR_ERR_BUS_30X)
work_done += m_can_handle_bus_errors(dev, irqstatus, psr);
- if (irqstatus & IR_RF0N)
- work_done += m_can_do_rx_poll(dev, (quota - work_done));
+ if (irqstatus & IR_RF0N) {
+ rx_work_or_err = m_can_do_rx_poll(dev, (quota - work_done));
+ if (rx_work_or_err < 0)
+ return rx_work_or_err;
+
+ work_done += rx_work_or_err;
+ }
end:
return work_done;
}
@@ -919,12 +950,17 @@ end:
static int m_can_rx_peripheral(struct net_device *dev)
{
struct m_can_classdev *cdev = netdev_priv(dev);
+ int work_done;
- m_can_rx_handler(dev, M_CAN_NAPI_WEIGHT);
+ work_done = m_can_rx_handler(dev, M_CAN_NAPI_WEIGHT);
- m_can_enable_all_interrupts(cdev);
+ /* Don't re-enable interrupts if the driver had a fatal error
+ * (e.g., FIFO read failure).
+ */
+ if (work_done >= 0)
+ m_can_enable_all_interrupts(cdev);
- return 0;
+ return work_done;
}
static int m_can_poll(struct napi_struct *napi, int quota)
@@ -934,7 +970,11 @@ static int m_can_poll(struct napi_struct *napi, int quota)
int work_done;
work_done = m_can_rx_handler(dev, quota);
- if (work_done < quota) {
+
+ /* Don't re-enable interrupts if the driver had a fatal error
+ * (e.g., FIFO read failure).
+ */
+ if (work_done >= 0 && work_done < quota) {
napi_complete_done(napi, work_done);
m_can_enable_all_interrupts(cdev);
}
@@ -945,7 +985,7 @@ static int m_can_poll(struct napi_struct *napi, int quota)
/* Echo tx skb and update net stats. Peripherals use rx-offload for
* echo. timestamp is used for peripherals to ensure correct ordering
* by rx-offload, and is ignored for non-peripherals.
-*/
+ */
static void m_can_tx_update_stats(struct m_can_classdev *cdev,
unsigned int msg_mark,
u32 timestamp)
@@ -965,7 +1005,7 @@ static void m_can_tx_update_stats(struct m_can_classdev *cdev,
stats->tx_packets++;
}
-static void m_can_echo_tx_event(struct net_device *dev)
+static int m_can_echo_tx_event(struct net_device *dev)
{
u32 txe_count = 0;
u32 m_can_txefs;
@@ -984,12 +1024,18 @@ static void m_can_echo_tx_event(struct net_device *dev)
/* Get and process all sent elements */
for (i = 0; i < txe_count; i++) {
u32 txe, timestamp = 0;
+ int err;
/* retrieve get index */
fgi = FIELD_GET(TXEFS_EFGI_MASK, m_can_read(cdev, M_CAN_TXEFS));
/* get message marker, timestamp */
- txe = m_can_txe_fifo_read(cdev, fgi, 4);
+ err = m_can_txe_fifo_read(cdev, fgi, 4, &txe);
+ if (err) {
+ netdev_err(dev, "TXE FIFO read returned %d\n", err);
+ return err;
+ }
+
msg_mark = FIELD_GET(TX_EVENT_MM_MASK, txe);
timestamp = FIELD_GET(TX_EVENT_TXTS_MASK, txe);
@@ -1000,6 +1046,8 @@ static void m_can_echo_tx_event(struct net_device *dev)
/* update stats */
m_can_tx_update_stats(cdev, msg_mark, timestamp);
}
+
+ return 0;
}
static irqreturn_t m_can_isr(int irq, void *dev_id)
@@ -1031,8 +1079,8 @@ static irqreturn_t m_can_isr(int irq, void *dev_id)
m_can_disable_all_interrupts(cdev);
if (!cdev->is_peripheral)
napi_schedule(&cdev->napi);
- else
- m_can_rx_peripheral(dev);
+ else if (m_can_rx_peripheral(dev) < 0)
+ goto out_fail;
}
if (cdev->version == 30) {
@@ -1050,7 +1098,9 @@ static irqreturn_t m_can_isr(int irq, void *dev_id)
} else {
if (ir & IR_TEFN) {
/* New TX FIFO Element arrived */
- m_can_echo_tx_event(dev);
+ if (m_can_echo_tx_event(dev) != 0)
+ goto out_fail;
+
can_led_event(dev, CAN_LED_EVENT_TX);
if (netif_queue_stopped(dev) &&
!m_can_tx_fifo_full(cdev))
@@ -1058,6 +1108,13 @@ static irqreturn_t m_can_isr(int irq, void *dev_id)
}
}
+ if (cdev->is_peripheral)
+ can_rx_offload_threaded_irq_finish(&cdev->offload);
+
+ return IRQ_HANDLED;
+
+out_fail:
+ m_can_disable_all_interrupts(cdev);
return IRQ_HANDLED;
}
@@ -1164,10 +1221,10 @@ static int m_can_set_bittiming(struct net_device *dev)
FIELD_PREP(TDCR_TDCO_MASK, tdco));
}
- reg_btp = FIELD_PREP(NBTP_NBRP_MASK, brp) |
- FIELD_PREP(NBTP_NSJW_MASK, sjw) |
- FIELD_PREP(NBTP_NTSEG1_MASK, tseg1) |
- FIELD_PREP(NBTP_NTSEG2_MASK, tseg2);
+ reg_btp |= FIELD_PREP(DBTP_DBRP_MASK, brp) |
+ FIELD_PREP(DBTP_DSJW_MASK, sjw) |
+ FIELD_PREP(DBTP_DTSEG1_MASK, tseg1) |
+ FIELD_PREP(DBTP_DTSEG2_MASK, tseg2);
m_can_write(cdev, M_CAN_DBTP, reg_btp);
}
@@ -1302,7 +1359,8 @@ static void m_can_chip_config(struct net_device *dev)
m_can_set_bittiming(dev);
/* enable internal timestamp generation, with a prescalar of 16. The
- * prescalar is applied to the nominal bit timing */
+ * prescalar is applied to the nominal bit timing
+ */
m_can_write(cdev, M_CAN_TSCC, FIELD_PREP(TSCC_TCP_MASK, 0xf));
m_can_config_endisable(cdev, false);
@@ -1436,32 +1494,20 @@ static int m_can_dev_setup(struct m_can_classdev *cdev)
case 30:
/* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.0.x */
can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO);
- cdev->can.bittiming_const = cdev->bit_timing ?
- cdev->bit_timing : &m_can_bittiming_const_30X;
-
- cdev->can.data_bittiming_const = cdev->data_timing ?
- cdev->data_timing :
- &m_can_data_bittiming_const_30X;
+ cdev->can.bittiming_const = &m_can_bittiming_const_30X;
+ cdev->can.data_bittiming_const = &m_can_data_bittiming_const_30X;
break;
case 31:
/* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.1.x */
can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO);
- cdev->can.bittiming_const = cdev->bit_timing ?
- cdev->bit_timing : &m_can_bittiming_const_31X;
-
- cdev->can.data_bittiming_const = cdev->data_timing ?
- cdev->data_timing :
- &m_can_data_bittiming_const_31X;
+ cdev->can.bittiming_const = &m_can_bittiming_const_31X;
+ cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X;
break;
case 32:
case 33:
/* Support both MCAN version v3.2.x and v3.3.0 */
- cdev->can.bittiming_const = cdev->bit_timing ?
- cdev->bit_timing : &m_can_bittiming_const_31X;
-
- cdev->can.data_bittiming_const = cdev->data_timing ?
- cdev->data_timing :
- &m_can_data_bittiming_const_31X;
+ cdev->can.bittiming_const = &m_can_bittiming_const_31X;
+ cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X;
cdev->can.ctrlmode_supported |=
(m_can_niso_supported(cdev) ?
@@ -1518,6 +1564,8 @@ static int m_can_close(struct net_device *dev)
close_candev(dev);
can_led_event(dev, CAN_LED_EVENT_STOP);
+ phy_power_off(cdev->transceiver);
+
return 0;
}
@@ -1540,8 +1588,9 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
struct canfd_frame *cf = (struct canfd_frame *)cdev->tx_skb->data;
struct net_device *dev = cdev->net;
struct sk_buff *skb = cdev->tx_skb;
- u32 id, cccr, fdflags;
- int i;
+ struct id_and_dlc fifo_header;
+ u32 cccr, fdflags;
+ int err;
int putidx;
cdev->tx_skb = NULL;
@@ -1549,27 +1598,29 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
/* Generate ID field for TX buffer Element */
/* Common to all supported M_CAN versions */
if (cf->can_id & CAN_EFF_FLAG) {
- id = cf->can_id & CAN_EFF_MASK;
- id |= TX_BUF_XTD;
+ fifo_header.id = cf->can_id & CAN_EFF_MASK;
+ fifo_header.id |= TX_BUF_XTD;
} else {
- id = ((cf->can_id & CAN_SFF_MASK) << 18);
+ fifo_header.id = ((cf->can_id & CAN_SFF_MASK) << 18);
}
if (cf->can_id & CAN_RTR_FLAG)
- id |= TX_BUF_RTR;
+ fifo_header.id |= TX_BUF_RTR;
if (cdev->version == 30) {
netif_stop_queue(dev);
- /* message ram configuration */
- m_can_fifo_write(cdev, 0, M_CAN_FIFO_ID, id);
- m_can_fifo_write(cdev, 0, M_CAN_FIFO_DLC,
- can_fd_len2dlc(cf->len) << 16);
+ fifo_header.dlc = can_fd_len2dlc(cf->len) << 16;
+
+ /* Write the frame ID, DLC, and payload to the FIFO element. */
+ err = m_can_fifo_write(cdev, 0, M_CAN_FIFO_ID, &fifo_header, 2);
+ if (err)
+ goto out_fail;
- for (i = 0; i < cf->len; i += 4)
- m_can_fifo_write(cdev, 0,
- M_CAN_FIFO_DATA(i / 4),
- *(u32 *)(cf->data + i));
+ err = m_can_fifo_write(cdev, 0, M_CAN_FIFO_DATA,
+ cf->data, DIV_ROUND_UP(cf->len, 4));
+ if (err)
+ goto out_fail;
can_put_echo_skb(skb, dev, 0, 0);
@@ -1613,8 +1664,11 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
/* get put index for frame */
putidx = FIELD_GET(TXFQS_TFQPI_MASK,
m_can_read(cdev, M_CAN_TXFQS));
- /* Write ID Field to FIFO Element */
- m_can_fifo_write(cdev, putidx, M_CAN_FIFO_ID, id);
+
+ /* Construct DLC Field, with CAN-FD configuration.
+ * Use the put index of the fifo as the message marker,
+ * used in the TX interrupt for sending the correct echo frame.
+ */
/* get CAN FD configuration of frame */
fdflags = 0;
@@ -1624,20 +1678,17 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
fdflags |= TX_BUF_BRS;
}
- /* Construct DLC Field. Also contains CAN-FD configuration
- * use put index of fifo as message marker
- * it is used in TX interrupt for
- * sending the correct echo frame
- */
- m_can_fifo_write(cdev, putidx, M_CAN_FIFO_DLC,
- FIELD_PREP(TX_BUF_MM_MASK, putidx) |
- FIELD_PREP(TX_BUF_DLC_MASK,
- can_fd_len2dlc(cf->len)) |
- fdflags | TX_BUF_EFC);
+ fifo_header.dlc = FIELD_PREP(TX_BUF_MM_MASK, putidx) |
+ FIELD_PREP(TX_BUF_DLC_MASK, can_fd_len2dlc(cf->len)) |
+ fdflags | TX_BUF_EFC;
+ err = m_can_fifo_write(cdev, putidx, M_CAN_FIFO_ID, &fifo_header, 2);
+ if (err)
+ goto out_fail;
- for (i = 0; i < cf->len; i += 4)
- m_can_fifo_write(cdev, putidx, M_CAN_FIFO_DATA(i / 4),
- *(u32 *)(cf->data + i));
+ err = m_can_fifo_write(cdev, putidx, M_CAN_FIFO_DATA,
+ cf->data, DIV_ROUND_UP(cf->len, 4));
+ if (err)
+ goto out_fail;
/* Push loopback echo.
* Will be looped back on TX interrupt based on message marker
@@ -1654,6 +1705,11 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
}
return NETDEV_TX_OK;
+
+out_fail:
+ netdev_err(dev, "FIFO write returned %d\n", err);
+ m_can_disable_all_interrupts(cdev);
+ return NETDEV_TX_BUSY;
}
static void m_can_tx_work_queue(struct work_struct *ws)
@@ -1703,10 +1759,14 @@ static int m_can_open(struct net_device *dev)
struct m_can_classdev *cdev = netdev_priv(dev);
int err;
- err = m_can_clk_start(cdev);
+ err = phy_power_on(cdev->transceiver);
if (err)
return err;
+ err = m_can_clk_start(cdev);
+ if (err)
+ goto out_phy_power_off;
+
/* open the can device */
err = open_candev(dev);
if (err) {
@@ -1763,6 +1823,8 @@ out_wq_fail:
close_candev(dev);
exit_disable_clks:
m_can_clk_stop(cdev);
+out_phy_power_off:
+ phy_power_off(cdev->transceiver);
return err;
}
@@ -1819,9 +1881,10 @@ static void m_can_of_parse_mram(struct m_can_classdev *cdev,
cdev->mcfg[MRAM_TXB].off, cdev->mcfg[MRAM_TXB].num);
}
-void m_can_init_ram(struct m_can_classdev *cdev)
+int m_can_init_ram(struct m_can_classdev *cdev)
{
int end, i, start;
+ int err = 0;
/* initialize the entire Message RAM in use to avoid possible
* ECC/parity checksum errors when reading an uninitialized buffer
@@ -1830,8 +1893,13 @@ void m_can_init_ram(struct m_can_classdev *cdev)
end = cdev->mcfg[MRAM_TXB].off +
cdev->mcfg[MRAM_TXB].num * TXB_ELEMENT_SIZE;
- for (i = start; i < end; i += 4)
- m_can_fifo_write_no_off(cdev, i, 0x0);
+ for (i = start; i < end; i += 4) {
+ err = m_can_fifo_write_no_off(cdev, i, 0x0);
+ if (err)
+ break;
+ }
+
+ return err;
}
EXPORT_SYMBOL_GPL(m_can_init_ram);
diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h
index ace071c3e58c..d18b515e6ccc 100644
--- a/drivers/net/can/m_can/m_can.h
+++ b/drivers/net/can/m_can/m_can.h
@@ -28,6 +28,7 @@
#include <linux/iopoll.h>
#include <linux/can/dev.h>
#include <linux/pinctrl/consumer.h>
+#include <linux/phy/phy.h>
/* m_can lec values */
enum m_can_lec_type {
@@ -64,9 +65,9 @@ struct m_can_ops {
int (*clear_interrupts)(struct m_can_classdev *cdev);
u32 (*read_reg)(struct m_can_classdev *cdev, int reg);
int (*write_reg)(struct m_can_classdev *cdev, int reg, int val);
- u32 (*read_fifo)(struct m_can_classdev *cdev, int addr_offset);
+ int (*read_fifo)(struct m_can_classdev *cdev, int addr_offset, void *val, size_t val_count);
int (*write_fifo)(struct m_can_classdev *cdev, int addr_offset,
- int val);
+ const void *val, size_t val_count);
int (*init)(struct m_can_classdev *cdev);
};
@@ -82,9 +83,7 @@ struct m_can_classdev {
struct workqueue_struct *tx_wq;
struct work_struct tx_work;
struct sk_buff *tx_skb;
-
- struct can_bittiming_const *bit_timing;
- struct can_bittiming_const *data_timing;
+ struct phy *transceiver;
struct m_can_ops *ops;
@@ -102,7 +101,7 @@ void m_can_class_free_dev(struct net_device *net);
int m_can_class_register(struct m_can_classdev *cdev);
void m_can_class_unregister(struct m_can_classdev *cdev);
int m_can_class_get_clocks(struct m_can_classdev *cdev);
-void m_can_init_ram(struct m_can_classdev *priv);
+int m_can_init_ram(struct m_can_classdev *priv);
int m_can_class_suspend(struct device *dev);
int m_can_class_resume(struct device *dev);
diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c
index 128808605c3f..89cc3d41e952 100644
--- a/drivers/net/can/m_can/m_can_pci.c
+++ b/drivers/net/can/m_can/m_can_pci.c
@@ -39,11 +39,13 @@ static u32 iomap_read_reg(struct m_can_classdev *cdev, int reg)
return readl(priv->base + reg);
}
-static u32 iomap_read_fifo(struct m_can_classdev *cdev, int offset)
+static int iomap_read_fifo(struct m_can_classdev *cdev, int offset, void *val, size_t val_count)
{
struct m_can_pci_priv *priv = cdev_to_priv(cdev);
- return readl(priv->base + offset);
+ ioread32_rep(priv->base + offset, val, val_count);
+
+ return 0;
}
static int iomap_write_reg(struct m_can_classdev *cdev, int reg, int val)
@@ -55,11 +57,12 @@ static int iomap_write_reg(struct m_can_classdev *cdev, int reg, int val)
return 0;
}
-static int iomap_write_fifo(struct m_can_classdev *cdev, int offset, int val)
+static int iomap_write_fifo(struct m_can_classdev *cdev, int offset,
+ const void *val, size_t val_count)
{
struct m_can_pci_priv *priv = cdev_to_priv(cdev);
- writel(val, priv->base + offset);
+ iowrite32_rep(priv->base + offset, val, val_count);
return 0;
}
diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c
index 599de0e08cd7..308d4f2fff00 100644
--- a/drivers/net/can/m_can/m_can_platform.c
+++ b/drivers/net/can/m_can/m_can_platform.c
@@ -6,6 +6,7 @@
// Copyright (C) 2018-19 Texas Instruments Incorporated - http://www.ti.com/
#include <linux/platform_device.h>
+#include <linux/phy/phy.h>
#include "m_can.h"
@@ -28,11 +29,13 @@ static u32 iomap_read_reg(struct m_can_classdev *cdev, int reg)
return readl(priv->base + reg);
}
-static u32 iomap_read_fifo(struct m_can_classdev *cdev, int offset)
+static int iomap_read_fifo(struct m_can_classdev *cdev, int offset, void *val, size_t val_count)
{
struct m_can_plat_priv *priv = cdev_to_priv(cdev);
- return readl(priv->mram_base + offset);
+ ioread32_rep(priv->mram_base + offset, val, val_count);
+
+ return 0;
}
static int iomap_write_reg(struct m_can_classdev *cdev, int reg, int val)
@@ -44,11 +47,12 @@ static int iomap_write_reg(struct m_can_classdev *cdev, int reg, int val)
return 0;
}
-static int iomap_write_fifo(struct m_can_classdev *cdev, int offset, int val)
+static int iomap_write_fifo(struct m_can_classdev *cdev, int offset,
+ const void *val, size_t val_count)
{
struct m_can_plat_priv *priv = cdev_to_priv(cdev);
- writel(val, priv->mram_base + offset);
+ iowrite32_rep(priv->base + offset, val, val_count);
return 0;
}
@@ -67,6 +71,7 @@ static int m_can_plat_probe(struct platform_device *pdev)
struct resource *res;
void __iomem *addr;
void __iomem *mram_addr;
+ struct phy *transceiver;
int irq, ret = 0;
mcan_class = m_can_class_allocate_dev(&pdev->dev,
@@ -80,8 +85,7 @@ static int m_can_plat_probe(struct platform_device *pdev)
if (ret)
goto probe_fail;
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "m_can");
- addr = devm_ioremap_resource(&pdev->dev, res);
+ addr = devm_platform_ioremap_resource_byname(pdev, "m_can");
irq = platform_get_irq_byname(pdev, "int0");
if (IS_ERR(addr) || irq < 0) {
ret = -EINVAL;
@@ -101,6 +105,16 @@ static int m_can_plat_probe(struct platform_device *pdev)
goto probe_fail;
}
+ transceiver = devm_phy_optional_get(&pdev->dev, NULL);
+ if (IS_ERR(transceiver)) {
+ ret = PTR_ERR(transceiver);
+ dev_err_probe(&pdev->dev, ret, "failed to get phy\n");
+ goto probe_fail;
+ }
+
+ if (transceiver)
+ mcan_class->can.bitrate_max = transceiver->attrs.max_link_rate;
+
priv->base = addr;
priv->mram_base = mram_addr;
@@ -108,6 +122,7 @@ static int m_can_plat_probe(struct platform_device *pdev)
mcan_class->pm_clock_support = 1;
mcan_class->can.clock.freq = clk_get_rate(mcan_class->cclk);
mcan_class->dev = &pdev->dev;
+ mcan_class->transceiver = transceiver;
mcan_class->ops = &m_can_plat_ops;
@@ -115,7 +130,9 @@ static int m_can_plat_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, mcan_class);
- m_can_init_ram(mcan_class);
+ ret = m_can_init_ram(mcan_class);
+ if (ret)
+ goto probe_fail;
pm_runtime_enable(mcan_class->dev);
ret = m_can_class_register(mcan_class);
diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c
index 4147cecfbbd6..04687b15b250 100644
--- a/drivers/net/can/m_can/tcan4x5x-core.c
+++ b/drivers/net/can/m_can/tcan4x5x-core.c
@@ -105,7 +105,6 @@
static inline struct tcan4x5x_priv *cdev_to_priv(struct m_can_classdev *cdev)
{
return container_of(cdev, struct tcan4x5x_priv, cdev);
-
}
static void tcan4x5x_check_wake(struct tcan4x5x_priv *priv)
@@ -154,14 +153,12 @@ static u32 tcan4x5x_read_reg(struct m_can_classdev *cdev, int reg)
return val;
}
-static u32 tcan4x5x_read_fifo(struct m_can_classdev *cdev, int addr_offset)
+static int tcan4x5x_read_fifo(struct m_can_classdev *cdev, int addr_offset,
+ void *val, size_t val_count)
{
struct tcan4x5x_priv *priv = cdev_to_priv(cdev);
- u32 val;
-
- regmap_read(priv->regmap, TCAN4X5X_MRAM_START + addr_offset, &val);
- return val;
+ return regmap_bulk_read(priv->regmap, TCAN4X5X_MRAM_START + addr_offset, val, val_count);
}
static int tcan4x5x_write_reg(struct m_can_classdev *cdev, int reg, int val)
@@ -172,11 +169,11 @@ static int tcan4x5x_write_reg(struct m_can_classdev *cdev, int reg, int val)
}
static int tcan4x5x_write_fifo(struct m_can_classdev *cdev,
- int addr_offset, int val)
+ int addr_offset, const void *val, size_t val_count)
{
struct tcan4x5x_priv *priv = cdev_to_priv(cdev);
- return regmap_write(priv->regmap, TCAN4X5X_MRAM_START + addr_offset, val);
+ return regmap_bulk_write(priv->regmap, TCAN4X5X_MRAM_START + addr_offset, val, val_count);
}
static int tcan4x5x_power_enable(struct regulator *reg, int enable)
@@ -238,7 +235,9 @@ static int tcan4x5x_init(struct m_can_classdev *cdev)
return ret;
/* Zero out the MCAN buffers */
- m_can_init_ram(cdev);
+ ret = m_can_init_ram(cdev);
+ if (ret)
+ return ret;
ret = regmap_update_bits(tcan4x5x->regmap, TCAN4X5X_CONFIG,
TCAN4X5X_MODE_SEL_MASK, TCAN4X5X_MODE_NORMAL);
diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c
index e254e04ae257..35892c1efef0 100644
--- a/drivers/net/can/mscan/mpc5xxx_can.c
+++ b/drivers/net/can/mscan/mpc5xxx_can.c
@@ -279,7 +279,6 @@ static u32 mpc512x_can_get_clock(struct platform_device *ofdev,
static const struct of_device_id mpc5xxx_can_table[];
static int mpc5xxx_can_probe(struct platform_device *ofdev)
{
- const struct of_device_id *match;
const struct mpc5xxx_can_data *data;
struct device_node *np = ofdev->dev.of_node;
struct net_device *dev;
@@ -289,10 +288,9 @@ static int mpc5xxx_can_probe(struct platform_device *ofdev)
int irq, mscan_clksrc = 0;
int err = -ENOMEM;
- match = of_match_device(mpc5xxx_can_table, &ofdev->dev);
- if (!match)
+ data = of_device_get_match_data(&ofdev->dev);
+ if (!data)
return -EINVAL;
- data = match->data;
base = of_iomap(np, 0);
if (!base) {
@@ -319,7 +317,6 @@ static int mpc5xxx_can_probe(struct platform_device *ofdev)
clock_name = of_get_property(np, "fsl,mscan-clock-source", NULL);
- BUG_ON(!data);
priv->type = data->type;
priv->can.clock.freq = data->get_clock(ofdev, clock_name,
&mscan_clksrc);
diff --git a/drivers/net/can/rcar/Kconfig b/drivers/net/can/rcar/Kconfig
index 29cabc20109e..56320a7f828b 100644
--- a/drivers/net/can/rcar/Kconfig
+++ b/drivers/net/can/rcar/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
config CAN_RCAR
tristate "Renesas R-Car and RZ/G CAN controller"
- depends on ARCH_RENESAS || ARM
+ depends on ARCH_RENESAS || ARM || COMPILE_TEST
help
Say Y here if you want to use CAN controller found on Renesas R-Car
or RZ/G SoCs.
@@ -11,7 +11,7 @@ config CAN_RCAR
config CAN_RCAR_CANFD
tristate "Renesas R-Car CAN FD controller"
- depends on ARCH_RENESAS || ARM
+ depends on ARCH_RENESAS || ARM || COMPILE_TEST
help
Say Y here if you want to use CAN FD controller found on
Renesas R-Car SoCs. The driver puts the controller in CAN FD only
diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c
index 311e6ca3bdc4..c47988d3674e 100644
--- a/drivers/net/can/rcar/rcar_canfd.c
+++ b/drivers/net/can/rcar/rcar_canfd.c
@@ -37,9 +37,15 @@
#include <linux/bitmap.h>
#include <linux/bitops.h>
#include <linux/iopoll.h>
+#include <linux/reset.h>
#define RCANFD_DRV_NAME "rcar_canfd"
+enum rcanfd_chip_id {
+ RENESAS_RCAR_GEN3 = 0,
+ RENESAS_RZG2L,
+};
+
/* Global register bits */
/* RSCFDnCFDGRMCFG */
@@ -513,6 +519,9 @@ struct rcar_canfd_global {
enum rcar_canfd_fcanclk fcan; /* CANFD or Ext clock */
unsigned long channels_mask; /* Enabled channels mask */
bool fdmode; /* CAN FD or Classical CAN only mode */
+ struct reset_control *rstc1;
+ struct reset_control *rstc2;
+ enum rcanfd_chip_id chip_id;
};
/* CAN FD mode nominal rate constants */
@@ -1070,38 +1079,70 @@ static void rcar_canfd_tx_done(struct net_device *ndev)
can_led_event(ndev, CAN_LED_EVENT_TX);
}
+static void rcar_canfd_handle_global_err(struct rcar_canfd_global *gpriv, u32 ch)
+{
+ struct rcar_canfd_channel *priv = gpriv->ch[ch];
+ struct net_device *ndev = priv->ndev;
+ u32 gerfl;
+
+ /* Handle global error interrupts */
+ gerfl = rcar_canfd_read(priv->base, RCANFD_GERFL);
+ if (unlikely(RCANFD_GERFL_ERR(gpriv, gerfl)))
+ rcar_canfd_global_error(ndev);
+}
+
+static irqreturn_t rcar_canfd_global_err_interrupt(int irq, void *dev_id)
+{
+ struct rcar_canfd_global *gpriv = dev_id;
+ u32 ch;
+
+ for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS)
+ rcar_canfd_handle_global_err(gpriv, ch);
+
+ return IRQ_HANDLED;
+}
+
+static void rcar_canfd_handle_global_receive(struct rcar_canfd_global *gpriv, u32 ch)
+{
+ struct rcar_canfd_channel *priv = gpriv->ch[ch];
+ u32 ridx = ch + RCANFD_RFFIFO_IDX;
+ u32 sts;
+
+ /* Handle Rx interrupts */
+ sts = rcar_canfd_read(priv->base, RCANFD_RFSTS(ridx));
+ if (likely(sts & RCANFD_RFSTS_RFIF)) {
+ if (napi_schedule_prep(&priv->napi)) {
+ /* Disable Rx FIFO interrupts */
+ rcar_canfd_clear_bit(priv->base,
+ RCANFD_RFCC(ridx),
+ RCANFD_RFCC_RFIE);
+ __napi_schedule(&priv->napi);
+ }
+ }
+}
+
+static irqreturn_t rcar_canfd_global_receive_fifo_interrupt(int irq, void *dev_id)
+{
+ struct rcar_canfd_global *gpriv = dev_id;
+ u32 ch;
+
+ for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS)
+ rcar_canfd_handle_global_receive(gpriv, ch);
+
+ return IRQ_HANDLED;
+}
+
static irqreturn_t rcar_canfd_global_interrupt(int irq, void *dev_id)
{
struct rcar_canfd_global *gpriv = dev_id;
- struct net_device *ndev;
- struct rcar_canfd_channel *priv;
- u32 sts, gerfl;
- u32 ch, ridx;
+ u32 ch;
/* Global error interrupts still indicate a condition specific
* to a channel. RxFIFO interrupt is a global interrupt.
*/
for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) {
- priv = gpriv->ch[ch];
- ndev = priv->ndev;
- ridx = ch + RCANFD_RFFIFO_IDX;
-
- /* Global error interrupts */
- gerfl = rcar_canfd_read(priv->base, RCANFD_GERFL);
- if (unlikely(RCANFD_GERFL_ERR(gpriv, gerfl)))
- rcar_canfd_global_error(ndev);
-
- /* Handle Rx interrupts */
- sts = rcar_canfd_read(priv->base, RCANFD_RFSTS(ridx));
- if (likely(sts & RCANFD_RFSTS_RFIF)) {
- if (napi_schedule_prep(&priv->napi)) {
- /* Disable Rx FIFO interrupts */
- rcar_canfd_clear_bit(priv->base,
- RCANFD_RFCC(ridx),
- RCANFD_RFCC_RFIE);
- __napi_schedule(&priv->napi);
- }
- }
+ rcar_canfd_handle_global_err(gpriv, ch);
+ rcar_canfd_handle_global_receive(gpriv, ch);
}
return IRQ_HANDLED;
}
@@ -1139,38 +1180,73 @@ static void rcar_canfd_state_change(struct net_device *ndev,
}
}
-static irqreturn_t rcar_canfd_channel_interrupt(int irq, void *dev_id)
+static void rcar_canfd_handle_channel_tx(struct rcar_canfd_global *gpriv, u32 ch)
+{
+ struct rcar_canfd_channel *priv = gpriv->ch[ch];
+ struct net_device *ndev = priv->ndev;
+ u32 sts;
+
+ /* Handle Tx interrupts */
+ sts = rcar_canfd_read(priv->base,
+ RCANFD_CFSTS(ch, RCANFD_CFFIFO_IDX));
+ if (likely(sts & RCANFD_CFSTS_CFTXIF))
+ rcar_canfd_tx_done(ndev);
+}
+
+static irqreturn_t rcar_canfd_channel_tx_interrupt(int irq, void *dev_id)
{
struct rcar_canfd_global *gpriv = dev_id;
- struct net_device *ndev;
- struct rcar_canfd_channel *priv;
- u32 sts, ch, cerfl;
+ u32 ch;
+
+ for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS)
+ rcar_canfd_handle_channel_tx(gpriv, ch);
+
+ return IRQ_HANDLED;
+}
+
+static void rcar_canfd_handle_channel_err(struct rcar_canfd_global *gpriv, u32 ch)
+{
+ struct rcar_canfd_channel *priv = gpriv->ch[ch];
+ struct net_device *ndev = priv->ndev;
u16 txerr, rxerr;
+ u32 sts, cerfl;
+
+ /* Handle channel error interrupts */
+ cerfl = rcar_canfd_read(priv->base, RCANFD_CERFL(ch));
+ sts = rcar_canfd_read(priv->base, RCANFD_CSTS(ch));
+ txerr = RCANFD_CSTS_TECCNT(sts);
+ rxerr = RCANFD_CSTS_RECCNT(sts);
+ if (unlikely(RCANFD_CERFL_ERR(cerfl)))
+ rcar_canfd_error(ndev, cerfl, txerr, rxerr);
+
+ /* Handle state change to lower states */
+ if (unlikely(priv->can.state != CAN_STATE_ERROR_ACTIVE &&
+ priv->can.state != CAN_STATE_BUS_OFF))
+ rcar_canfd_state_change(ndev, txerr, rxerr);
+}
+
+static irqreturn_t rcar_canfd_channel_err_interrupt(int irq, void *dev_id)
+{
+ struct rcar_canfd_global *gpriv = dev_id;
+ u32 ch;
+
+ for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS)
+ rcar_canfd_handle_channel_err(gpriv, ch);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t rcar_canfd_channel_interrupt(int irq, void *dev_id)
+{
+ struct rcar_canfd_global *gpriv = dev_id;
+ u32 ch;
/* Common FIFO is a per channel resource */
for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) {
- priv = gpriv->ch[ch];
- ndev = priv->ndev;
-
- /* Channel error interrupts */
- cerfl = rcar_canfd_read(priv->base, RCANFD_CERFL(ch));
- sts = rcar_canfd_read(priv->base, RCANFD_CSTS(ch));
- txerr = RCANFD_CSTS_TECCNT(sts);
- rxerr = RCANFD_CSTS_RECCNT(sts);
- if (unlikely(RCANFD_CERFL_ERR(cerfl)))
- rcar_canfd_error(ndev, cerfl, txerr, rxerr);
-
- /* Handle state change to lower states */
- if (unlikely((priv->can.state != CAN_STATE_ERROR_ACTIVE) &&
- (priv->can.state != CAN_STATE_BUS_OFF)))
- rcar_canfd_state_change(ndev, txerr, rxerr);
-
- /* Handle Tx interrupts */
- sts = rcar_canfd_read(priv->base,
- RCANFD_CFSTS(ch, RCANFD_CFFIFO_IDX));
- if (likely(sts & RCANFD_CFSTS_CFTXIF))
- rcar_canfd_tx_done(ndev);
+ rcar_canfd_handle_channel_err(gpriv, ch);
+ rcar_canfd_handle_channel_tx(gpriv, ch);
}
+
return IRQ_HANDLED;
}
@@ -1577,6 +1653,53 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch,
priv->can.clock.freq = fcan_freq;
dev_info(&pdev->dev, "can_clk rate is %u\n", priv->can.clock.freq);
+ if (gpriv->chip_id == RENESAS_RZG2L) {
+ char *irq_name;
+ int err_irq;
+ int tx_irq;
+
+ err_irq = platform_get_irq_byname(pdev, ch == 0 ? "ch0_err" : "ch1_err");
+ if (err_irq < 0) {
+ err = err_irq;
+ goto fail;
+ }
+
+ tx_irq = platform_get_irq_byname(pdev, ch == 0 ? "ch0_trx" : "ch1_trx");
+ if (tx_irq < 0) {
+ err = tx_irq;
+ goto fail;
+ }
+
+ irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+ "canfd.ch%d_err", ch);
+ if (!irq_name) {
+ err = -ENOMEM;
+ goto fail;
+ }
+ err = devm_request_irq(&pdev->dev, err_irq,
+ rcar_canfd_channel_err_interrupt, 0,
+ irq_name, gpriv);
+ if (err) {
+ dev_err(&pdev->dev, "devm_request_irq CH Err(%d) failed, error %d\n",
+ err_irq, err);
+ goto fail;
+ }
+ irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+ "canfd.ch%d_trx", ch);
+ if (!irq_name) {
+ err = -ENOMEM;
+ goto fail;
+ }
+ err = devm_request_irq(&pdev->dev, tx_irq,
+ rcar_canfd_channel_tx_interrupt, 0,
+ irq_name, gpriv);
+ if (err) {
+ dev_err(&pdev->dev, "devm_request_irq Tx (%d) failed, error %d\n",
+ tx_irq, err);
+ goto fail;
+ }
+ }
+
if (gpriv->fdmode) {
priv->can.bittiming_const = &rcar_canfd_nom_bittiming_const;
priv->can.data_bittiming_const =
@@ -1636,7 +1759,11 @@ static int rcar_canfd_probe(struct platform_device *pdev)
struct device_node *of_child;
unsigned long channels_mask = 0;
int err, ch_irq, g_irq;
+ int g_err_irq, g_recc_irq;
bool fdmode = true; /* CAN FD only mode - default */
+ enum rcanfd_chip_id chip_id;
+
+ chip_id = (uintptr_t)of_device_get_match_data(&pdev->dev);
if (of_property_read_bool(pdev->dev.of_node, "renesas,no-can-fd"))
fdmode = false; /* Classical CAN only mode */
@@ -1649,16 +1776,30 @@ static int rcar_canfd_probe(struct platform_device *pdev)
if (of_child && of_device_is_available(of_child))
channels_mask |= BIT(1); /* Channel 1 */
- ch_irq = platform_get_irq(pdev, 0);
- if (ch_irq < 0) {
- err = ch_irq;
- goto fail_dev;
- }
+ if (chip_id == RENESAS_RCAR_GEN3) {
+ ch_irq = platform_get_irq_byname_optional(pdev, "ch_int");
+ if (ch_irq < 0) {
+ /* For backward compatibility get irq by index */
+ ch_irq = platform_get_irq(pdev, 0);
+ if (ch_irq < 0)
+ return ch_irq;
+ }
- g_irq = platform_get_irq(pdev, 1);
- if (g_irq < 0) {
- err = g_irq;
- goto fail_dev;
+ g_irq = platform_get_irq_byname_optional(pdev, "g_int");
+ if (g_irq < 0) {
+ /* For backward compatibility get irq by index */
+ g_irq = platform_get_irq(pdev, 1);
+ if (g_irq < 0)
+ return g_irq;
+ }
+ } else {
+ g_err_irq = platform_get_irq_byname(pdev, "g_err");
+ if (g_err_irq < 0)
+ return g_err_irq;
+
+ g_recc_irq = platform_get_irq_byname(pdev, "g_recc");
+ if (g_recc_irq < 0)
+ return g_recc_irq;
}
/* Global controller context */
@@ -1670,6 +1811,19 @@ static int rcar_canfd_probe(struct platform_device *pdev)
gpriv->pdev = pdev;
gpriv->channels_mask = channels_mask;
gpriv->fdmode = fdmode;
+ gpriv->chip_id = chip_id;
+
+ if (gpriv->chip_id == RENESAS_RZG2L) {
+ gpriv->rstc1 = devm_reset_control_get_exclusive(&pdev->dev, "rstp_n");
+ if (IS_ERR(gpriv->rstc1))
+ return dev_err_probe(&pdev->dev, PTR_ERR(gpriv->rstc1),
+ "failed to get rstp_n\n");
+
+ gpriv->rstc2 = devm_reset_control_get_exclusive(&pdev->dev, "rstc_n");
+ if (IS_ERR(gpriv->rstc2))
+ return dev_err_probe(&pdev->dev, PTR_ERR(gpriv->rstc2),
+ "failed to get rstc_n\n");
+ }
/* Peripheral clock */
gpriv->clkp = devm_clk_get(&pdev->dev, "fck");
@@ -1699,7 +1853,7 @@ static int rcar_canfd_probe(struct platform_device *pdev)
}
fcan_freq = clk_get_rate(gpriv->can_clk);
- if (gpriv->fcan == RCANFD_CANFDCLK)
+ if (gpriv->fcan == RCANFD_CANFDCLK && gpriv->chip_id == RENESAS_RCAR_GEN3)
/* CANFD clock is further divided by (1/2) within the IP */
fcan_freq /= 2;
@@ -1711,20 +1865,51 @@ static int rcar_canfd_probe(struct platform_device *pdev)
gpriv->base = addr;
/* Request IRQ that's common for both channels */
- err = devm_request_irq(&pdev->dev, ch_irq,
- rcar_canfd_channel_interrupt, 0,
- "canfd.chn", gpriv);
- if (err) {
- dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n",
- ch_irq, err);
- goto fail_dev;
+ if (gpriv->chip_id == RENESAS_RCAR_GEN3) {
+ err = devm_request_irq(&pdev->dev, ch_irq,
+ rcar_canfd_channel_interrupt, 0,
+ "canfd.ch_int", gpriv);
+ if (err) {
+ dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n",
+ ch_irq, err);
+ goto fail_dev;
+ }
+
+ err = devm_request_irq(&pdev->dev, g_irq,
+ rcar_canfd_global_interrupt, 0,
+ "canfd.g_int", gpriv);
+ if (err) {
+ dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n",
+ g_irq, err);
+ goto fail_dev;
+ }
+ } else {
+ err = devm_request_irq(&pdev->dev, g_recc_irq,
+ rcar_canfd_global_receive_fifo_interrupt, 0,
+ "canfd.g_recc", gpriv);
+
+ if (err) {
+ dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n",
+ g_recc_irq, err);
+ goto fail_dev;
+ }
+
+ err = devm_request_irq(&pdev->dev, g_err_irq,
+ rcar_canfd_global_err_interrupt, 0,
+ "canfd.g_err", gpriv);
+ if (err) {
+ dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n",
+ g_err_irq, err);
+ goto fail_dev;
+ }
}
- err = devm_request_irq(&pdev->dev, g_irq,
- rcar_canfd_global_interrupt, 0,
- "canfd.gbl", gpriv);
+
+ err = reset_control_reset(gpriv->rstc1);
+ if (err)
+ goto fail_dev;
+ err = reset_control_reset(gpriv->rstc2);
if (err) {
- dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n",
- g_irq, err);
+ reset_control_assert(gpriv->rstc1);
goto fail_dev;
}
@@ -1733,7 +1918,7 @@ static int rcar_canfd_probe(struct platform_device *pdev)
if (err) {
dev_err(&pdev->dev,
"failed to enable peripheral clock, error %d\n", err);
- goto fail_dev;
+ goto fail_reset;
}
err = rcar_canfd_reset_controller(gpriv);
@@ -1790,6 +1975,9 @@ fail_mode:
rcar_canfd_disable_global_interrupts(gpriv);
fail_clk:
clk_disable_unprepare(gpriv->clkp);
+fail_reset:
+ reset_control_assert(gpriv->rstc1);
+ reset_control_assert(gpriv->rstc2);
fail_dev:
return err;
}
@@ -1810,6 +1998,9 @@ static int rcar_canfd_remove(struct platform_device *pdev)
/* Enter global sleep mode */
rcar_canfd_set_bit(gpriv->base, RCANFD_GCTR, RCANFD_GCTR_GSLPR);
clk_disable_unprepare(gpriv->clkp);
+ reset_control_assert(gpriv->rstc1);
+ reset_control_assert(gpriv->rstc2);
+
return 0;
}
@@ -1827,7 +2018,8 @@ static SIMPLE_DEV_PM_OPS(rcar_canfd_pm_ops, rcar_canfd_suspend,
rcar_canfd_resume);
static const struct of_device_id rcar_canfd_of_table[] = {
- { .compatible = "renesas,rcar-gen3-canfd" },
+ { .compatible = "renesas,rcar-gen3-canfd", .data = (void *)RENESAS_RCAR_GEN3 },
+ { .compatible = "renesas,rzg2l-canfd", .data = (void *)RENESAS_RZG2L },
{ }
};
diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c
index 84eac8cb8686..6db90dc4bc9d 100644
--- a/drivers/net/can/sja1000/peak_pci.c
+++ b/drivers/net/can/sja1000/peak_pci.c
@@ -28,6 +28,10 @@ MODULE_LICENSE("GPL v2");
#define DRV_NAME "peak_pci"
+/* FPGA cards FW version registers */
+#define PEAK_VER_REG1 0x40
+#define PEAK_VER_REG2 0x44
+
struct peak_pciec_card;
struct peak_pci_chan {
void __iomem *cfg_base; /* Common for all channels */
@@ -41,9 +45,7 @@ struct peak_pci_chan {
#define PEAK_PCI_CDR (CDR_CBP | CDR_CLKOUT_MASK)
#define PEAK_PCI_OCR OCR_TX0_PUSHPULL
-/*
- * Important PITA registers
- */
+/* Important PITA registers */
#define PITA_ICR 0x00 /* Interrupt control register */
#define PITA_GPIOICR 0x18 /* GPIO interface control register */
#define PITA_MISC 0x1C /* Miscellaneous register */
@@ -70,27 +72,47 @@ static const u16 peak_pci_icr_masks[PEAK_PCI_CHAN_MAX] = {
};
static const struct pci_device_id peak_pci_tbl[] = {
- {PEAK_PCI_VENDOR_ID, PEAK_PCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
- {PEAK_PCI_VENDOR_ID, PEAK_PCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
- {PEAK_PCI_VENDOR_ID, PEAK_MPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
- {PEAK_PCI_VENDOR_ID, PEAK_MPCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
- {PEAK_PCI_VENDOR_ID, PEAK_PC_104P_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
- {PEAK_PCI_VENDOR_ID, PEAK_PCI_104E_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
- {PEAK_PCI_VENDOR_ID, PEAK_CPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
- {PEAK_PCI_VENDOR_ID, PEAK_PCIE_OEM_ID, PCI_ANY_ID, PCI_ANY_ID,},
+ {
+ PEAK_PCI_VENDOR_ID, PEAK_PCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)"PCAN-PCI",
+ }, {
+ PEAK_PCI_VENDOR_ID, PEAK_PCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)"PCAN-PCI Express",
+ }, {
+ PEAK_PCI_VENDOR_ID, PEAK_MPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)"PCAN-miniPCI",
+ }, {
+ PEAK_PCI_VENDOR_ID, PEAK_MPCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)"PCAN-miniPCIe",
+ }, {
+ PEAK_PCI_VENDOR_ID, PEAK_PC_104P_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)"PCAN-PC/104-Plus Quad",
+ }, {
+ PEAK_PCI_VENDOR_ID, PEAK_PCI_104E_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)"PCAN-PCI/104-Express",
+ }, {
+ PEAK_PCI_VENDOR_ID, PEAK_CPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)"PCAN-cPCI",
+ }, {
+ PEAK_PCI_VENDOR_ID, PEAK_PCIE_OEM_ID, PCI_ANY_ID, PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)"PCAN-Chip PCIe",
+ },
#ifdef CONFIG_CAN_PEAK_PCIEC
- {PEAK_PCI_VENDOR_ID, PEAK_PCIEC_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
- {PEAK_PCI_VENDOR_ID, PEAK_PCIEC34_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
+ {
+ PEAK_PCI_VENDOR_ID, PEAK_PCIEC_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)"PCAN-ExpressCard",
+ }, {
+ PEAK_PCI_VENDOR_ID, PEAK_PCIEC34_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)"PCAN-ExpressCard 34",
+ },
#endif
- {0,}
+ { /* sentinel */ }
};
MODULE_DEVICE_TABLE(pci, peak_pci_tbl);
#ifdef CONFIG_CAN_PEAK_PCIEC
-/*
- * PCAN-ExpressCard needs I2C bit-banging configuration option.
- */
+/* PCAN-ExpressCard needs I2C bit-banging configuration option. */
/* GPIOICR byte access offsets */
#define PITA_GPOUT 0x18 /* GPx output value */
@@ -156,12 +178,14 @@ static void peak_pci_write_reg(const struct sja1000_priv *priv,
static inline void pita_set_scl_highz(struct peak_pciec_card *card)
{
u8 gp_outen = readb(card->cfg_base + PITA_GPOEN) & ~PITA_GPIN_SCL;
+
writeb(gp_outen, card->cfg_base + PITA_GPOEN);
}
static inline void pita_set_sda_highz(struct peak_pciec_card *card)
{
u8 gp_outen = readb(card->cfg_base + PITA_GPOEN) & ~PITA_GPIN_SDA;
+
writeb(gp_outen, card->cfg_base + PITA_GPOEN);
}
@@ -230,9 +254,7 @@ static int pita_getscl(void *data)
return (readb(card->cfg_base + PITA_GPIN) & PITA_GPIN_SCL) ? 1 : 0;
}
-/*
- * write commands to the LED chip though the I2C-bus of the PCAN-PCIeC
- */
+/* write commands to the LED chip though the I2C-bus of the PCAN-PCIeC */
static int peak_pciec_write_pca9553(struct peak_pciec_card *card,
u8 offset, u8 data)
{
@@ -248,7 +270,7 @@ static int peak_pciec_write_pca9553(struct peak_pciec_card *card,
int ret;
/* cache led mask */
- if ((offset == 5) && (data == card->led_cache))
+ if (offset == 5 && data == card->led_cache)
return 0;
ret = i2c_transfer(&card->led_chip, &msg, 1);
@@ -261,9 +283,7 @@ static int peak_pciec_write_pca9553(struct peak_pciec_card *card,
return 0;
}
-/*
- * delayed work callback used to control the LEDs
- */
+/* delayed work callback used to control the LEDs */
static void peak_pciec_led_work(struct work_struct *work)
{
struct peak_pciec_card *card =
@@ -309,9 +329,7 @@ static void peak_pciec_led_work(struct work_struct *work)
schedule_delayed_work(&card->led_work, HZ);
}
-/*
- * set LEDs blinking state
- */
+/* set LEDs blinking state */
static void peak_pciec_set_leds(struct peak_pciec_card *card, u8 led_mask, u8 s)
{
u8 new_led = card->led_cache;
@@ -328,25 +346,19 @@ static void peak_pciec_set_leds(struct peak_pciec_card *card, u8 led_mask, u8 s)
peak_pciec_write_pca9553(card, 5, new_led);
}
-/*
- * start one second delayed work to control LEDs
- */
+/* start one second delayed work to control LEDs */
static void peak_pciec_start_led_work(struct peak_pciec_card *card)
{
schedule_delayed_work(&card->led_work, HZ);
}
-/*
- * stop LEDs delayed work
- */
+/* stop LEDs delayed work */
static void peak_pciec_stop_led_work(struct peak_pciec_card *card)
{
cancel_delayed_work_sync(&card->led_work);
}
-/*
- * initialize the PCA9553 4-bit I2C-bus LED chip
- */
+/* initialize the PCA9553 4-bit I2C-bus LED chip */
static int peak_pciec_init_leds(struct peak_pciec_card *card)
{
int err;
@@ -375,17 +387,14 @@ static int peak_pciec_init_leds(struct peak_pciec_card *card)
return peak_pciec_write_pca9553(card, 5, PCA9553_LS0_INIT);
}
-/*
- * restore LEDs state to off peak_pciec_leds_exit
- */
+/* restore LEDs state to off peak_pciec_leds_exit */
static void peak_pciec_leds_exit(struct peak_pciec_card *card)
{
/* switch LEDs to off */
peak_pciec_write_pca9553(card, 5, PCA9553_LED_OFF_ALL);
}
-/*
- * normal write sja1000 register method overloaded to catch when controller
+/* normal write sja1000 register method overloaded to catch when controller
* is started or stopped, to control leds
*/
static void peak_pciec_write_reg(const struct sja1000_priv *priv,
@@ -443,7 +452,7 @@ static int peak_pciec_probe(struct pci_dev *pdev, struct net_device *dev)
/* channel is the first one: do the init part */
} else {
/* create the bit banging I2C adapter structure */
- card = kzalloc(sizeof(struct peak_pciec_card), GFP_KERNEL);
+ card = kzalloc(sizeof(*card), GFP_KERNEL);
if (!card)
return -ENOMEM;
@@ -506,9 +515,7 @@ static void peak_pciec_remove(struct peak_pciec_card *card)
#else /* CONFIG_CAN_PEAK_PCIEC */
-/*
- * Placebo functions when PCAN-ExpressCard support is not selected
- */
+/* Placebo functions when PCAN-ExpressCard support is not selected */
static inline int peak_pciec_probe(struct pci_dev *pdev, struct net_device *dev)
{
return -ENODEV;
@@ -549,6 +556,7 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
void __iomem *cfg_base, *reg_base;
u16 sub_sys_id, icr;
int i, err, channels;
+ char fw_str[14] = "";
err = pci_enable_device(pdev);
if (err)
@@ -602,6 +610,21 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Leave parport mux mode */
writeb(0x04, cfg_base + PITA_MISC + 3);
+ /* FPGA equipped card if not 0 */
+ if (readl(cfg_base + PEAK_VER_REG1)) {
+ /* FPGA card: display version of the running firmware */
+ u32 fw_ver = readl(cfg_base + PEAK_VER_REG2);
+
+ snprintf(fw_str, sizeof(fw_str), " FW v%u.%u.%u",
+ (fw_ver >> 12) & 0xf,
+ (fw_ver >> 8) & 0xf,
+ (fw_ver >> 4) & 0xf);
+ }
+
+ /* Display commercial name (and, eventually, FW version) of the card */
+ dev_info(&pdev->dev, "%ux CAN %s%s\n",
+ channels, (const char *)ent->driver_data, fw_str);
+
icr = readw(cfg_base + PITA_ICR + 2);
for (i = 0; i < channels; i++) {
@@ -642,8 +665,7 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
chan->prev_dev = pci_get_drvdata(pdev);
pci_set_drvdata(pdev, dev);
- /*
- * PCAN-ExpressCard needs some additional i2c init.
+ /* PCAN-ExpressCard needs some additional i2c init.
* This must be done *before* register_sja1000dev() but
* *after* devices linkage
*/
@@ -709,7 +731,8 @@ failure_disable_pci:
/* pci_xxx_config_word() return positive PCIBIOS_xxx error codes while
* the probe() function must return a negative errno in case of failure
- * (err is unchanged if negative) */
+ * (err is unchanged if negative)
+ */
return pcibios_err_to_errno(err);
}
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 9ae48072b6c6..673861ab665a 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -15,10 +15,10 @@
#include <linux/bitfield.h>
#include <linux/clk.h>
#include <linux/device.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/pm_runtime.h>
+#include <linux/property.h>
#include <asm/unaligned.h>
@@ -1456,7 +1456,7 @@ mcp251xfd_rx_ring_update(const struct mcp251xfd_priv *priv,
}
static void
-mcp251xfd_hw_rx_obj_to_skb(struct mcp251xfd_priv *priv,
+mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv,
const struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj,
struct sk_buff *skb)
{
@@ -2195,8 +2195,10 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
FIELD_GET(MCP251XFD_REG_INT_IE_MASK,
priv->regs_status.intf);
- if (!(intf_pending))
+ if (!(intf_pending)) {
+ can_rx_offload_threaded_irq_finish(&priv->offload);
return handled;
+ }
/* Some interrupts must be ACKed in the
* MCP251XFD_REG_INT register.
@@ -2296,6 +2298,8 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
} while (1);
out_fail:
+ can_rx_offload_threaded_irq_finish(&priv->offload);
+
netdev_err(priv->ndev, "IRQ handler returned %d (intf=0x%08x).\n",
err, priv->regs_status.intf);
mcp251xfd_dump(priv);
@@ -2524,8 +2528,8 @@ static int mcp251xfd_open(struct net_device *ndev)
can_rx_offload_enable(&priv->offload);
err = request_threaded_irq(spi->irq, NULL, mcp251xfd_irq,
- IRQF_ONESHOT, dev_name(&spi->dev),
- priv);
+ IRQF_SHARED | IRQF_ONESHOT,
+ dev_name(&spi->dev), priv);
if (err)
goto out_can_rx_offload_disable;
@@ -2857,7 +2861,7 @@ static int mcp251xfd_probe(struct spi_device *spi)
struct gpio_desc *rx_int;
struct regulator *reg_vdd, *reg_xceiver;
struct clk *clk;
- u32 freq;
+ u32 freq = 0;
int err;
if (!spi->irq)
@@ -2884,11 +2888,19 @@ static int mcp251xfd_probe(struct spi_device *spi)
return dev_err_probe(&spi->dev, PTR_ERR(reg_xceiver),
"Failed to get Transceiver regulator!\n");
- clk = devm_clk_get(&spi->dev, NULL);
+ clk = devm_clk_get_optional(&spi->dev, NULL);
if (IS_ERR(clk))
return dev_err_probe(&spi->dev, PTR_ERR(clk),
"Failed to get Oscillator (clock)!\n");
- freq = clk_get_rate(clk);
+ if (clk) {
+ freq = clk_get_rate(clk);
+ } else {
+ err = device_property_read_u32(&spi->dev, "clock-frequency",
+ &freq);
+ if (err)
+ return dev_err_probe(&spi->dev, err,
+ "Failed to get clock-frequency!\n");
+ }
/* Sanity check */
if (freq < MCP251XFD_SYSCLOCK_HZ_MIN ||
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c
index ed3169274d24..712e09186987 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c
@@ -13,7 +13,7 @@
static u64 mcp251xfd_timestamp_read(const struct cyclecounter *cc)
{
- struct mcp251xfd_priv *priv;
+ const struct mcp251xfd_priv *priv;
u32 timestamp = 0;
int err;
@@ -39,7 +39,7 @@ static void mcp251xfd_timestamp_work(struct work_struct *work)
MCP251XFD_TIMESTAMP_WORK_DELAY_SEC * HZ);
}
-void mcp251xfd_skb_set_timestamp(struct mcp251xfd_priv *priv,
+void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv,
struct sk_buff *skb, u32 timestamp)
{
struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
index 1002f3902ad2..0f322dabaf65 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
@@ -853,7 +853,7 @@ int mcp251xfd_regmap_init(struct mcp251xfd_priv *priv);
u16 mcp251xfd_crc16_compute2(const void *cmd, size_t cmd_size,
const void *data, size_t data_size);
u16 mcp251xfd_crc16_compute(const void *data, size_t data_size);
-void mcp251xfd_skb_set_timestamp(struct mcp251xfd_priv *priv,
+void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv,
struct sk_buff *skb, u32 timestamp);
void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv);
void mcp251xfd_timestamp_stop(struct mcp251xfd_priv *priv);
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index 73245d8836a9..353062ead98f 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -786,6 +786,8 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id)
int_status = hecc_read(priv, HECC_CANGIF0);
}
+ can_rx_offload_irq_finish(&priv->offload);
+
return IRQ_HANDLED;
}
diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c
index 66fa8b07c2e6..c6068a251fbe 100644
--- a/drivers/net/can/usb/esd_usb2.c
+++ b/drivers/net/can/usb/esd_usb2.c
@@ -224,8 +224,8 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv,
if (id == ESD_EV_CAN_ERROR_EXT) {
u8 state = msg->msg.rx.data[0];
u8 ecc = msg->msg.rx.data[1];
- u8 txerr = msg->msg.rx.data[2];
- u8 rxerr = msg->msg.rx.data[3];
+ u8 rxerr = msg->msg.rx.data[2];
+ u8 txerr = msg->msg.rx.data[3];
skb = alloc_can_err_skb(priv->netdev, &cf);
if (skb == NULL) {
@@ -476,7 +476,7 @@ static void esd_usb2_write_bulk_callback(struct urb *urb)
netif_trans_update(netdev);
}
-static ssize_t show_firmware(struct device *d,
+static ssize_t firmware_show(struct device *d,
struct device_attribute *attr, char *buf)
{
struct usb_interface *intf = to_usb_interface(d);
@@ -487,9 +487,9 @@ static ssize_t show_firmware(struct device *d,
(dev->version >> 8) & 0xf,
dev->version & 0xff);
}
-static DEVICE_ATTR(firmware, 0444, show_firmware, NULL);
+static DEVICE_ATTR_RO(firmware);
-static ssize_t show_hardware(struct device *d,
+static ssize_t hardware_show(struct device *d,
struct device_attribute *attr, char *buf)
{
struct usb_interface *intf = to_usb_interface(d);
@@ -500,9 +500,9 @@ static ssize_t show_hardware(struct device *d,
(dev->version >> 24) & 0xf,
(dev->version >> 16) & 0xff);
}
-static DEVICE_ATTR(hardware, 0444, show_hardware, NULL);
+static DEVICE_ATTR_RO(hardware);
-static ssize_t show_nets(struct device *d,
+static ssize_t nets_show(struct device *d,
struct device_attribute *attr, char *buf)
{
struct usb_interface *intf = to_usb_interface(d);
@@ -510,7 +510,7 @@ static ssize_t show_nets(struct device *d,
return sprintf(buf, "%d", dev->net_count);
}
-static DEVICE_ATTR(nets, 0444, show_nets, NULL);
+static DEVICE_ATTR_RO(nets);
static int esd_usb2_send_msg(struct esd_usb2 *dev, struct esd_usb2_msg *msg)
{
diff --git a/drivers/net/can/usb/etas_es58x/es581_4.c b/drivers/net/can/usb/etas_es58x/es581_4.c
index 1985f772fc3c..14e360c9f2c9 100644
--- a/drivers/net/can/usb/etas_es58x/es581_4.c
+++ b/drivers/net/can/usb/etas_es58x/es581_4.c
@@ -355,7 +355,7 @@ static int es581_4_tx_can_msg(struct es58x_priv *priv,
return -EMSGSIZE;
if (priv->tx_can_msg_cnt == 0) {
- msg_len = 1; /* struct es581_4_bulk_tx_can_msg:num_can_msg */
+ msg_len = sizeof(es581_4_urb_cmd->bulk_tx_can_msg.num_can_msg);
es581_4_fill_urb_header(urb_cmd, ES581_4_CAN_COMMAND_TYPE,
ES581_4_CMD_ID_TX_MSG,
priv->channel_idx, msg_len);
@@ -371,8 +371,7 @@ static int es581_4_tx_can_msg(struct es58x_priv *priv,
return ret;
/* Fill message contents. */
- tx_can_msg = (struct es581_4_tx_can_msg *)
- &es581_4_urb_cmd->bulk_tx_can_msg.tx_can_msg_buf[msg_len - 1];
+ tx_can_msg = (typeof(tx_can_msg))&es581_4_urb_cmd->raw_msg[msg_len];
put_unaligned_le32(es58x_get_raw_can_id(cf), &tx_can_msg->can_id);
put_unaligned_le32(priv->tx_head, &tx_can_msg->packet_idx);
put_unaligned_le16((u16)es58x_get_flags(skb), &tx_can_msg->flags);
diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c
index 8e9102482c52..96a13c770e4a 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_core.c
+++ b/drivers/net/can/usb/etas_es58x/es58x_core.c
@@ -19,7 +19,7 @@
#include "es58x_core.h"
#define DRV_VERSION "1.00"
-MODULE_AUTHOR("Mailhol Vincent <mailhol.vincent@wanadoo.fr>");
+MODULE_AUTHOR("Vincent Mailhol <mailhol.vincent@wanadoo.fr>");
MODULE_AUTHOR("Arunachalam Santhanam <arunachalam.santhanam@in.bosch.com>");
MODULE_DESCRIPTION("Socket CAN driver for ETAS ES58X USB adapters");
MODULE_VERSION(DRV_VERSION);
@@ -70,7 +70,7 @@ MODULE_DEVICE_TABLE(usb, es58x_id_table);
* bytes (the start of frame) are skipped and the CRC calculation
* starts on the third byte.
*/
-#define ES58X_CRC_CALC_OFFSET 2
+#define ES58X_CRC_CALC_OFFSET sizeof_field(union es58x_urb_cmd, sof)
/**
* es58x_calculate_crc() - Compute the crc16 of a given URB.
@@ -2108,6 +2108,25 @@ static int es58x_init_netdev(struct es58x_device *es58x_dev, int channel_idx)
}
/**
+ * es58x_free_netdevs() - Release all network resources of the device.
+ * @es58x_dev: ES58X device.
+ */
+static void es58x_free_netdevs(struct es58x_device *es58x_dev)
+{
+ int i;
+
+ for (i = 0; i < es58x_dev->num_can_ch; i++) {
+ struct net_device *netdev = es58x_dev->netdev[i];
+
+ if (!netdev)
+ continue;
+ unregister_candev(netdev);
+ es58x_dev->netdev[i] = NULL;
+ free_candev(netdev);
+ }
+}
+
+/**
* es58x_get_product_info() - Get the product information and print them.
* @es58x_dev: ES58X device.
*
@@ -2152,14 +2171,13 @@ static int es58x_get_product_info(struct es58x_device *es58x_dev)
/**
* es58x_init_es58x_dev() - Initialize the ES58X device.
* @intf: USB interface.
- * @p_es58x_dev: pointer to the address of the ES58X device.
* @driver_info: Quirks of the device.
*
- * Return: zero on success, errno when any error occurs.
+ * Return: pointer to an ES58X device on success, error pointer when
+ * any error occurs.
*/
-static int es58x_init_es58x_dev(struct usb_interface *intf,
- struct es58x_device **p_es58x_dev,
- kernel_ulong_t driver_info)
+static struct es58x_device *es58x_init_es58x_dev(struct usb_interface *intf,
+ kernel_ulong_t driver_info)
{
struct device *dev = &intf->dev;
struct es58x_device *es58x_dev;
@@ -2176,7 +2194,7 @@ static int es58x_init_es58x_dev(struct usb_interface *intf,
ret = usb_find_common_endpoints(intf->cur_altsetting, &ep_in, &ep_out,
NULL, NULL);
if (ret)
- return ret;
+ return ERR_PTR(ret);
if (driver_info & ES58X_FD_FAMILY) {
param = &es58x_fd_param;
@@ -2186,9 +2204,10 @@ static int es58x_init_es58x_dev(struct usb_interface *intf,
ops = &es581_4_ops;
}
- es58x_dev = kzalloc(es58x_sizeof_es58x_device(param), GFP_KERNEL);
+ es58x_dev = devm_kzalloc(dev, es58x_sizeof_es58x_device(param),
+ GFP_KERNEL);
if (!es58x_dev)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
es58x_dev->param = param;
es58x_dev->ops = ops;
@@ -2213,9 +2232,7 @@ static int es58x_init_es58x_dev(struct usb_interface *intf,
ep_out->bEndpointAddress);
es58x_dev->rx_max_packet_size = le16_to_cpu(ep_in->wMaxPacketSize);
- *p_es58x_dev = es58x_dev;
-
- return 0;
+ return es58x_dev;
}
/**
@@ -2232,30 +2249,21 @@ static int es58x_probe(struct usb_interface *intf,
struct es58x_device *es58x_dev;
int ch_idx, ret;
- ret = es58x_init_es58x_dev(intf, &es58x_dev, id->driver_info);
- if (ret)
- return ret;
+ es58x_dev = es58x_init_es58x_dev(intf, id->driver_info);
+ if (IS_ERR(es58x_dev))
+ return PTR_ERR(es58x_dev);
ret = es58x_get_product_info(es58x_dev);
if (ret)
- goto cleanup_es58x_dev;
+ return ret;
for (ch_idx = 0; ch_idx < es58x_dev->num_can_ch; ch_idx++) {
ret = es58x_init_netdev(es58x_dev, ch_idx);
- if (ret)
- goto cleanup_candev;
- }
-
- return ret;
-
- cleanup_candev:
- for (ch_idx = 0; ch_idx < es58x_dev->num_can_ch; ch_idx++)
- if (es58x_dev->netdev[ch_idx]) {
- unregister_candev(es58x_dev->netdev[ch_idx]);
- free_candev(es58x_dev->netdev[ch_idx]);
+ if (ret) {
+ es58x_free_netdevs(es58x_dev);
+ return ret;
}
- cleanup_es58x_dev:
- kfree(es58x_dev);
+ }
return ret;
}
@@ -2270,24 +2278,12 @@ static int es58x_probe(struct usb_interface *intf,
static void es58x_disconnect(struct usb_interface *intf)
{
struct es58x_device *es58x_dev = usb_get_intfdata(intf);
- struct net_device *netdev;
- int i;
dev_info(&intf->dev, "Disconnecting %s %s\n",
es58x_dev->udev->manufacturer, es58x_dev->udev->product);
- for (i = 0; i < es58x_dev->num_can_ch; i++) {
- netdev = es58x_dev->netdev[i];
- if (!netdev)
- continue;
- unregister_candev(netdev);
- es58x_dev->netdev[i] = NULL;
- free_candev(netdev);
- }
-
+ es58x_free_netdevs(es58x_dev);
es58x_free_urbs(es58x_dev);
-
- kfree(es58x_dev);
usb_set_intfdata(intf, NULL);
}
diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.h b/drivers/net/can/usb/etas_es58x/es58x_core.h
index fcf219e727bf..826a15871573 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_core.h
+++ b/drivers/net/can/usb/etas_es58x/es58x_core.h
@@ -287,7 +287,7 @@ struct es58x_priv {
* @rx_urb_cmd_max_len: Maximum length of a RX URB command.
* @fifo_mask: Bit mask to quickly convert the tx_tail and tx_head
* field of the struct es58x_priv into echo_skb
- * indexes. Properties: @fifo_mask = echos_skb_max - 1 where
+ * indexes. Properties: @fifo_mask = echo_skb_max - 1 where
* echo_skb_max must be a power of two. Also, echo_skb_max must
* not exceed the maximum size of the device internal TX FIFO
* length. This parameter is used to control the network queue
diff --git a/drivers/net/can/usb/etas_es58x/es58x_fd.c b/drivers/net/can/usb/etas_es58x/es58x_fd.c
index 1a2779d383a4..af042aa55f59 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_fd.c
+++ b/drivers/net/can/usb/etas_es58x/es58x_fd.c
@@ -357,8 +357,7 @@ static int es58x_fd_tx_can_msg(struct es58x_priv *priv,
return ret;
/* Fill message contents. */
- tx_can_msg = (struct es58x_fd_tx_can_msg *)
- &es58x_fd_urb_cmd->tx_can_msg_buf[msg_len];
+ tx_can_msg = (typeof(tx_can_msg))&es58x_fd_urb_cmd->raw_msg[msg_len];
tx_can_msg->packet_idx = (u8)priv->tx_head;
put_unaligned_le32(es58x_get_raw_can_id(cf), &tx_can_msg->can_id);
tx_can_msg->flags = (u8)es58x_get_flags(skb);
@@ -463,9 +462,9 @@ static int es58x_fd_get_timestamp(struct es58x_device *es58x_dev)
}
/* Nominal bittiming constants for ES582.1 and ES584.1 as specified in
- * the microcontroller datasheet: "SAM E701/S70/V70/V71 Family"
- * section 49.6.8 "MCAN Nominal Bit Timing and Prescaler Register"
- * from Microchip.
+ * the microcontroller datasheet: "SAM E70/S70/V70/V71 Family" section
+ * 49.6.8 "MCAN Nominal Bit Timing and Prescaler Register" from
+ * Microchip.
*
* The values from the specification are the hardware register
* values. To convert them to the functional values, all ranges were
@@ -484,8 +483,8 @@ static const struct can_bittiming_const es58x_fd_nom_bittiming_const = {
};
/* Data bittiming constants for ES582.1 and ES584.1 as specified in
- * the microcontroller datasheet: "SAM E701/S70/V70/V71 Family"
- * section 49.6.4 "MCAN Data Bit Timing and Prescaler Register" from
+ * the microcontroller datasheet: "SAM E70/S70/V70/V71 Family" section
+ * 49.6.4 "MCAN Data Bit Timing and Prescaler Register" from
* Microchip.
*/
static const struct can_bittiming_const es58x_fd_data_bittiming_const = {
@@ -501,9 +500,9 @@ static const struct can_bittiming_const es58x_fd_data_bittiming_const = {
};
/* Transmission Delay Compensation constants for ES582.1 and ES584.1
- * as specified in the microcontroller datasheet: "SAM
- * E701/S70/V70/V71 Family" section 49.6.15 "MCAN Transmitter Delay
- * Compensation Register" from Microchip.
+ * as specified in the microcontroller datasheet: "SAM E70/S70/V70/V71
+ * Family" section 49.6.15 "MCAN Transmitter Delay Compensation
+ * Register" from Microchip.
*/
static const struct can_tdc_const es58x_tdc_const = {
.tdcv_max = 0, /* Manual mode not supported. */
diff --git a/drivers/net/can/usb/etas_es58x/es58x_fd.h b/drivers/net/can/usb/etas_es58x/es58x_fd.h
index ee18a87e40c0..a191891b8777 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_fd.h
+++ b/drivers/net/can/usb/etas_es58x/es58x_fd.h
@@ -96,23 +96,14 @@ struct es58x_fd_bittiming {
* @ctrlmode: type enum es58x_fd_ctrlmode.
* @canfd_enabled: boolean (0: Classical CAN, 1: CAN and/or CANFD).
* @data_bittiming: Bittiming for flexible data-rate transmission.
- * @tdc_enabled: Transmitter Delay Compensation switch (0: disabled,
- * 1: enabled). On very high bitrates, the delay between when the
- * bit is sent and received on the CANTX and CANRX pins of the
- * transceiver start to be significant enough for errors to occur
- * and thus need to be compensated.
- * @tdco: Transmitter Delay Compensation Offset. Offset value, in time
- * quanta, defining the delay between the start of the bit
- * reception on the CANRX pin of the transceiver and the SSP
- * (Secondary Sample Point). Valid values: 0 to 127.
- * @tdcf: Transmitter Delay Compensation Filter window. Defines the
- * minimum value for the SSP position, in time quanta. The
- * feature is enabled when TDCF is configured to a value greater
- * than TDCO. Valid values: 0 to 127.
+ * @tdc_enabled: Transmitter Delay Compensation switch (0: TDC is
+ * disabled, 1: TDC is enabled).
+ * @tdco: Transmitter Delay Compensation Offset.
+ * @tdcf: Transmitter Delay Compensation Filter window.
*
- * Please refer to the microcontroller datasheet: "SAM
- * E701/S70/V70/V71 Family" section 49 "Controller Area Network
- * (MCAN)" for additional information.
+ * Please refer to the microcontroller datasheet: "SAM E70/S70/V70/V71
+ * Family" section 49 "Controller Area Network (MCAN)" for additional
+ * information.
*/
struct es58x_fd_tx_conf_msg {
struct es58x_fd_bittiming nominal_bittiming;
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
index 899a3d21b77f..837b3fecd71e 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -63,6 +63,8 @@
#define PCAN_USB_MSG_HEADER_LEN 2
+#define PCAN_USB_MSG_TX_CAN 2 /* Tx msg is a CAN frame */
+
/* PCAN-USB adapter internal clock (MHz) */
#define PCAN_USB_CRYSTAL_HZ 16000000
@@ -73,6 +75,10 @@
#define PCAN_USB_STATUSLEN_RTR (1 << 4)
#define PCAN_USB_STATUSLEN_DLC (0xf)
+/* PCAN-USB 4.1 CAN Id tx extended flags */
+#define PCAN_USB_TX_SRR 0x01 /* SJA1000 SRR command */
+#define PCAN_USB_TX_AT 0x02 /* SJA1000 AT command */
+
/* PCAN-USB error flags */
#define PCAN_USB_ERROR_TXFULL 0x01
#define PCAN_USB_ERROR_RXQOVR 0x02
@@ -385,7 +391,8 @@ static int pcan_usb_get_device_id(struct peak_usb_device *dev, u32 *device_id)
if (err)
netdev_err(dev->netdev, "getting device id failure: %d\n", err);
- *device_id = args[0];
+ else
+ *device_id = args[0];
return err;
}
@@ -446,145 +453,65 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n,
{
struct sk_buff *skb;
struct can_frame *cf;
- enum can_state new_state;
+ enum can_state new_state = CAN_STATE_ERROR_ACTIVE;
/* ignore this error until 1st ts received */
if (n == PCAN_USB_ERROR_QOVR)
if (!mc->pdev->time_ref.tick_count)
return 0;
- new_state = mc->pdev->dev.can.state;
-
- switch (mc->pdev->dev.can.state) {
- case CAN_STATE_ERROR_ACTIVE:
- if (n & PCAN_USB_ERROR_BUS_LIGHT) {
- new_state = CAN_STATE_ERROR_WARNING;
- break;
- }
- fallthrough;
-
- case CAN_STATE_ERROR_WARNING:
- if (n & PCAN_USB_ERROR_BUS_HEAVY) {
- new_state = CAN_STATE_ERROR_PASSIVE;
- break;
- }
- if (n & PCAN_USB_ERROR_BUS_OFF) {
- new_state = CAN_STATE_BUS_OFF;
- break;
- }
- if (n & ~PCAN_USB_ERROR_BUS) {
- /*
- * trick to bypass next comparison and process other
- * errors
- */
- new_state = CAN_STATE_MAX;
- break;
- }
- if ((n & PCAN_USB_ERROR_BUS_LIGHT) == 0) {
- /* no error (back to active state) */
- new_state = CAN_STATE_ERROR_ACTIVE;
- break;
- }
- break;
-
- case CAN_STATE_ERROR_PASSIVE:
- if (n & PCAN_USB_ERROR_BUS_OFF) {
- new_state = CAN_STATE_BUS_OFF;
- break;
- }
- if (n & PCAN_USB_ERROR_BUS_LIGHT) {
- new_state = CAN_STATE_ERROR_WARNING;
- break;
- }
- if (n & ~PCAN_USB_ERROR_BUS) {
- /*
- * trick to bypass next comparison and process other
- * errors
- */
- new_state = CAN_STATE_MAX;
- break;
- }
-
- if ((n & PCAN_USB_ERROR_BUS_HEAVY) == 0) {
- /* no error (back to warning state) */
- new_state = CAN_STATE_ERROR_WARNING;
- break;
- }
- break;
-
- default:
- /* do nothing waiting for restart */
- return 0;
- }
-
- /* donot post any error if current state didn't change */
- if (mc->pdev->dev.can.state == new_state)
- return 0;
-
/* allocate an skb to store the error frame */
skb = alloc_can_err_skb(mc->netdev, &cf);
- if (!skb)
- return -ENOMEM;
-
- switch (new_state) {
- case CAN_STATE_BUS_OFF:
- cf->can_id |= CAN_ERR_BUSOFF;
- mc->pdev->dev.can.can_stats.bus_off++;
- can_bus_off(mc->netdev);
- break;
-
- case CAN_STATE_ERROR_PASSIVE:
- cf->can_id |= CAN_ERR_CRTL;
- cf->data[1] = (mc->pdev->bec.txerr > mc->pdev->bec.rxerr) ?
- CAN_ERR_CRTL_TX_PASSIVE :
- CAN_ERR_CRTL_RX_PASSIVE;
- cf->data[6] = mc->pdev->bec.txerr;
- cf->data[7] = mc->pdev->bec.rxerr;
-
- mc->pdev->dev.can.can_stats.error_passive++;
- break;
-
- case CAN_STATE_ERROR_WARNING:
- cf->can_id |= CAN_ERR_CRTL;
- cf->data[1] = (mc->pdev->bec.txerr > mc->pdev->bec.rxerr) ?
- CAN_ERR_CRTL_TX_WARNING :
- CAN_ERR_CRTL_RX_WARNING;
- cf->data[6] = mc->pdev->bec.txerr;
- cf->data[7] = mc->pdev->bec.rxerr;
-
- mc->pdev->dev.can.can_stats.error_warning++;
- break;
- case CAN_STATE_ERROR_ACTIVE:
- cf->can_id |= CAN_ERR_CRTL;
- cf->data[1] = CAN_ERR_CRTL_ACTIVE;
-
- /* sync local copies of rxerr/txerr counters */
- mc->pdev->bec.txerr = 0;
- mc->pdev->bec.rxerr = 0;
- break;
-
- default:
- /* CAN_STATE_MAX (trick to handle other errors) */
- if (n & PCAN_USB_ERROR_TXQFULL)
- netdev_dbg(mc->netdev, "device Tx queue full)\n");
-
- if (n & PCAN_USB_ERROR_RXQOVR) {
- netdev_dbg(mc->netdev, "data overrun interrupt\n");
+ if (n & PCAN_USB_ERROR_RXQOVR) {
+ /* data overrun interrupt */
+ netdev_dbg(mc->netdev, "data overrun interrupt\n");
+ mc->netdev->stats.rx_over_errors++;
+ mc->netdev->stats.rx_errors++;
+ if (cf) {
cf->can_id |= CAN_ERR_CRTL;
cf->data[1] |= CAN_ERR_CRTL_RX_OVERFLOW;
- mc->netdev->stats.rx_over_errors++;
- mc->netdev->stats.rx_errors++;
}
+ }
- cf->data[6] = mc->pdev->bec.txerr;
- cf->data[7] = mc->pdev->bec.rxerr;
+ if (n & PCAN_USB_ERROR_TXQFULL)
+ netdev_dbg(mc->netdev, "device Tx queue full)\n");
- new_state = mc->pdev->dev.can.state;
- break;
+ if (n & PCAN_USB_ERROR_BUS_OFF) {
+ new_state = CAN_STATE_BUS_OFF;
+ } else if (n & PCAN_USB_ERROR_BUS_HEAVY) {
+ new_state = ((mc->pdev->bec.txerr >= 128) ||
+ (mc->pdev->bec.rxerr >= 128)) ?
+ CAN_STATE_ERROR_PASSIVE :
+ CAN_STATE_ERROR_WARNING;
+ } else {
+ new_state = CAN_STATE_ERROR_ACTIVE;
}
- mc->pdev->dev.can.state = new_state;
+ /* handle change of state */
+ if (new_state != mc->pdev->dev.can.state) {
+ enum can_state tx_state =
+ (mc->pdev->bec.txerr >= mc->pdev->bec.rxerr) ?
+ new_state : 0;
+ enum can_state rx_state =
+ (mc->pdev->bec.txerr <= mc->pdev->bec.rxerr) ?
+ new_state : 0;
+
+ can_change_state(mc->netdev, cf, tx_state, rx_state);
+
+ if (new_state == CAN_STATE_BUS_OFF) {
+ can_bus_off(mc->netdev);
+ } else if (cf && (cf->can_id & CAN_ERR_CRTL)) {
+ /* Supply TX/RX error counters in case of
+ * controller error.
+ */
+ cf->data[6] = mc->pdev->bec.txerr;
+ cf->data[7] = mc->pdev->bec.rxerr;
+ }
+ }
+
+ if (!skb)
+ return -ENOMEM;
if (status_len & PCAN_USB_STATUSLEN_TIMESTAMP) {
struct skb_shared_hwtstamps *hwts = skb_hwtstamps(skb);
@@ -706,6 +633,7 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len)
struct sk_buff *skb;
struct can_frame *cf;
struct skb_shared_hwtstamps *hwts;
+ u32 can_id_flags;
skb = alloc_can_skb(mc->netdev, &cf);
if (!skb)
@@ -715,13 +643,15 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len)
if ((mc->ptr + 4) > mc->end)
goto decode_failed;
- cf->can_id = get_unaligned_le32(mc->ptr) >> 3 | CAN_EFF_FLAG;
+ can_id_flags = get_unaligned_le32(mc->ptr);
+ cf->can_id = can_id_flags >> 3 | CAN_EFF_FLAG;
mc->ptr += 4;
} else {
if ((mc->ptr + 2) > mc->end)
goto decode_failed;
- cf->can_id = get_unaligned_le16(mc->ptr) >> 5;
+ can_id_flags = get_unaligned_le16(mc->ptr);
+ cf->can_id = can_id_flags >> 5;
mc->ptr += 2;
}
@@ -744,6 +674,10 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len)
memcpy(cf->data, mc->ptr, cf->len);
mc->ptr += rec_len;
+
+ /* Ignore next byte (client private id) if SRR bit is set */
+ if (can_id_flags & PCAN_USB_TX_SRR)
+ mc->ptr++;
}
/* convert timestamp into kernel time */
@@ -821,10 +755,11 @@ static int pcan_usb_encode_msg(struct peak_usb_device *dev, struct sk_buff *skb,
struct net_device *netdev = dev->netdev;
struct net_device_stats *stats = &netdev->stats;
struct can_frame *cf = (struct can_frame *)skb->data;
+ u32 can_id_flags = cf->can_id & CAN_ERR_MASK;
u8 *pc;
- obuf[0] = 2;
- obuf[1] = 1;
+ obuf[0] = PCAN_USB_MSG_TX_CAN;
+ obuf[1] = 1; /* only one CAN frame is stored in the packet */
pc = obuf + PCAN_USB_MSG_HEADER_LEN;
@@ -839,12 +774,28 @@ static int pcan_usb_encode_msg(struct peak_usb_device *dev, struct sk_buff *skb,
*pc |= PCAN_USB_STATUSLEN_EXT_ID;
pc++;
- put_unaligned_le32((cf->can_id & CAN_ERR_MASK) << 3, pc);
+ can_id_flags <<= 3;
+
+ if (dev->can.ctrlmode & CAN_CTRLMODE_LOOPBACK)
+ can_id_flags |= PCAN_USB_TX_SRR;
+
+ if (dev->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT)
+ can_id_flags |= PCAN_USB_TX_AT;
+
+ put_unaligned_le32(can_id_flags, pc);
pc += 4;
} else {
pc++;
- put_unaligned_le16((cf->can_id & CAN_ERR_MASK) << 5, pc);
+ can_id_flags <<= 5;
+
+ if (dev->can.ctrlmode & CAN_CTRLMODE_LOOPBACK)
+ can_id_flags |= PCAN_USB_TX_SRR;
+
+ if (dev->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT)
+ can_id_flags |= PCAN_USB_TX_AT;
+
+ put_unaligned_le16(can_id_flags, pc);
pc += 2;
}
@@ -854,6 +805,10 @@ static int pcan_usb_encode_msg(struct peak_usb_device *dev, struct sk_buff *skb,
pc += cf->len;
}
+ /* SRR bit needs a writer id (useless here) */
+ if (can_id_flags & PCAN_USB_TX_SRR)
+ *pc++ = 0x80;
+
obuf[(*size)-1] = (u8)(stats->tx_packets & 0xff);
return 0;
@@ -928,6 +883,19 @@ static int pcan_usb_init(struct peak_usb_device *dev)
return err;
}
+ /* Since rev 4.1, PCAN-USB is able to make single-shot as well as
+ * looped back frames.
+ */
+ if (dev->device_rev >= 41) {
+ struct can_priv *priv = netdev_priv(dev->netdev);
+
+ priv->ctrlmode_supported |= CAN_CTRLMODE_ONE_SHOT |
+ CAN_CTRLMODE_LOOPBACK;
+ } else {
+ dev_info(dev->netdev->dev.parent,
+ "Firmware update available. Please contact support@peak-system.com\n");
+ }
+
dev_info(dev->netdev->dev.parent,
"PEAK-System %s adapter hwrev %u serial %08X (%u channel)\n",
pcan_usb.name, dev->device_rev, serial_number,
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index b23e3488695b..bd1417a66cbf 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -2016,15 +2016,6 @@ int b53_br_flags(struct dsa_switch *ds, int port,
}
EXPORT_SYMBOL(b53_br_flags);
-int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
- struct netlink_ext_ack *extack)
-{
- b53_port_set_mcast_flood(ds->priv, port, mrouter);
-
- return 0;
-}
-EXPORT_SYMBOL(b53_set_mrouter);
-
static bool b53_possible_cpu_port(struct dsa_switch *ds, int port)
{
/* Broadcom switches will accept enabling Broadcom tags on the
@@ -2268,7 +2259,6 @@ static const struct dsa_switch_ops b53_switch_ops = {
.port_bridge_leave = b53_br_leave,
.port_pre_bridge_flags = b53_br_flags_pre,
.port_bridge_flags = b53_br_flags,
- .port_set_mrouter = b53_set_mrouter,
.port_stp_state_set = b53_br_set_stp_state,
.port_fast_age = b53_br_fast_age,
.port_vlan_filtering = b53_vlan_filtering,
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 82700a5714c1..9bf8319342b0 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -328,8 +328,6 @@ int b53_br_flags_pre(struct dsa_switch *ds, int port,
int b53_br_flags(struct dsa_switch *ds, int port,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack);
-int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
- struct netlink_ext_ack *extack);
int b53_setup_devlink_resources(struct dsa_switch *ds);
void b53_port_event(struct dsa_switch *ds, int port);
void b53_phylink_validate(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 3b018fcf4412..6ce9ec1283e0 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1199,7 +1199,6 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
.port_pre_bridge_flags = b53_br_flags_pre,
.port_bridge_flags = b53_br_flags,
.port_stp_state_set = b53_br_set_stp_state,
- .port_set_mrouter = b53_set_mrouter,
.port_fast_age = b53_br_fast_age,
.port_vlan_filtering = b53_vlan_filtering,
.port_vlan_add = b53_vlan_add,
diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
index 9fdcc4bde480..542cfc4ccb08 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -912,6 +912,7 @@ static int hellcreek_fdb_dump(struct dsa_switch *ds, int port,
{
struct hellcreek *hellcreek = ds->priv;
u16 entries;
+ int ret = 0;
size_t i;
mutex_lock(&hellcreek->reg_lock);
@@ -943,12 +944,14 @@ static int hellcreek_fdb_dump(struct dsa_switch *ds, int port,
if (!(entry.portmask & BIT(port)))
continue;
- cb(entry.mac, 0, entry.is_static, data);
+ ret = cb(entry.mac, 0, entry.is_static, data);
+ if (ret)
+ break;
}
mutex_unlock(&hellcreek->reg_lock);
- return 0;
+ return ret;
}
static int hellcreek_vlan_filtering(struct dsa_switch *ds, int port,
@@ -1342,6 +1345,7 @@ static int hellcreek_setup(struct dsa_switch *ds)
* filtering setups are not supported.
*/
ds->vlan_filtering_is_global = true;
+ ds->needs_standalone_vlan_filtering = true;
/* Intercept _all_ PTP multicast traffic */
ret = hellcreek_setup_fdb(hellcreek);
@@ -1469,9 +1473,6 @@ static void hellcreek_setup_gcl(struct hellcreek *hellcreek, int port,
u16 data;
u8 gates;
- cur++;
- next++;
-
if (i == schedule->num_entries)
gates = initial->gate_mask ^
cur->gate_mask;
@@ -1500,6 +1501,9 @@ static void hellcreek_setup_gcl(struct hellcreek *hellcreek, int port,
(initial->gate_mask <<
TR_GCLCMD_INIT_GATE_STATES_SHIFT);
hellcreek_write(hellcreek, data, TR_GCLCMD);
+
+ cur++;
+ next++;
}
}
@@ -1547,7 +1551,7 @@ static bool hellcreek_schedule_startable(struct hellcreek *hellcreek, int port)
/* Calculate difference to admin base time */
base_time_ns = ktime_to_ns(hellcreek_port->current_schedule->base_time);
- return base_time_ns - current_ns < (s64)8 * NSEC_PER_SEC;
+ return base_time_ns - current_ns < (s64)4 * NSEC_PER_SEC;
}
static void hellcreek_start_schedule(struct hellcreek *hellcreek, int port)
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index 344374025426..d7ce281570b5 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -557,12 +557,12 @@ static int lan9303_alr_make_entry_raw(struct lan9303 *chip, u32 dat0, u32 dat1)
return 0;
}
-typedef void alr_loop_cb_t(struct lan9303 *chip, u32 dat0, u32 dat1,
- int portmap, void *ctx);
+typedef int alr_loop_cb_t(struct lan9303 *chip, u32 dat0, u32 dat1,
+ int portmap, void *ctx);
-static void lan9303_alr_loop(struct lan9303 *chip, alr_loop_cb_t *cb, void *ctx)
+static int lan9303_alr_loop(struct lan9303 *chip, alr_loop_cb_t *cb, void *ctx)
{
- int i;
+ int ret = 0, i;
mutex_lock(&chip->alr_mutex);
lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD,
@@ -582,13 +582,17 @@ static void lan9303_alr_loop(struct lan9303 *chip, alr_loop_cb_t *cb, void *ctx)
LAN9303_ALR_DAT1_PORT_BITOFFS;
portmap = alrport_2_portmap[alrport];
- cb(chip, dat0, dat1, portmap, ctx);
+ ret = cb(chip, dat0, dat1, portmap, ctx);
+ if (ret)
+ break;
lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD,
LAN9303_ALR_CMD_GET_NEXT);
lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, 0);
}
mutex_unlock(&chip->alr_mutex);
+
+ return ret;
}
static void alr_reg_to_mac(u32 dat0, u32 dat1, u8 mac[6])
@@ -606,18 +610,20 @@ struct del_port_learned_ctx {
};
/* Clear learned (non-static) entry on given port */
-static void alr_loop_cb_del_port_learned(struct lan9303 *chip, u32 dat0,
- u32 dat1, int portmap, void *ctx)
+static int alr_loop_cb_del_port_learned(struct lan9303 *chip, u32 dat0,
+ u32 dat1, int portmap, void *ctx)
{
struct del_port_learned_ctx *del_ctx = ctx;
int port = del_ctx->port;
if (((BIT(port) & portmap) == 0) || (dat1 & LAN9303_ALR_DAT1_STATIC))
- return;
+ return 0;
/* learned entries has only one port, we can just delete */
dat1 &= ~LAN9303_ALR_DAT1_VALID; /* delete entry */
lan9303_alr_make_entry_raw(chip, dat0, dat1);
+
+ return 0;
}
struct port_fdb_dump_ctx {
@@ -626,19 +632,19 @@ struct port_fdb_dump_ctx {
dsa_fdb_dump_cb_t *cb;
};
-static void alr_loop_cb_fdb_port_dump(struct lan9303 *chip, u32 dat0,
- u32 dat1, int portmap, void *ctx)
+static int alr_loop_cb_fdb_port_dump(struct lan9303 *chip, u32 dat0,
+ u32 dat1, int portmap, void *ctx)
{
struct port_fdb_dump_ctx *dump_ctx = ctx;
u8 mac[ETH_ALEN];
bool is_static;
if ((BIT(dump_ctx->port) & portmap) == 0)
- return;
+ return 0;
alr_reg_to_mac(dat0, dat1, mac);
is_static = !!(dat1 & LAN9303_ALR_DAT1_STATIC);
- dump_ctx->cb(mac, 0, is_static, dump_ctx->data);
+ return dump_ctx->cb(mac, 0, is_static, dump_ctx->data);
}
/* Set a static ALR entry. Delete entry if port_map is zero */
@@ -1210,9 +1216,7 @@ static int lan9303_port_fdb_dump(struct dsa_switch *ds, int port,
};
dev_dbg(chip->dev, "%s(%d)\n", __func__, port);
- lan9303_alr_loop(chip, alr_loop_cb_fdb_port_dump, &dump_ctx);
-
- return 0;
+ return lan9303_alr_loop(chip, alr_loop_cb_fdb_port_dump, &dump_ctx);
}
static int lan9303_port_mdb_prepare(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 314ae78bbdd6..e78026ef6d8c 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -1404,11 +1404,17 @@ static int gswip_port_fdb_dump(struct dsa_switch *ds, int port,
addr[1] = mac_bridge.key[2] & 0xff;
addr[0] = (mac_bridge.key[2] >> 8) & 0xff;
if (mac_bridge.val[1] & GSWIP_TABLE_MAC_BRIDGE_STATIC) {
- if (mac_bridge.val[0] & BIT(port))
- cb(addr, 0, true, data);
+ if (mac_bridge.val[0] & BIT(port)) {
+ err = cb(addr, 0, true, data);
+ if (err)
+ return err;
+ }
} else {
- if (((mac_bridge.val[0] & GENMASK(7, 4)) >> 4) == port)
- cb(addr, 0, false, data);
+ if (((mac_bridge.val[0] & GENMASK(7, 4)) >> 4) == port) {
+ err = cb(addr, 0, false, data);
+ if (err)
+ return err;
+ }
}
}
return 0;
diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index 560f6843bb65..c5142f86a3c7 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -687,8 +687,8 @@ static void ksz8_r_vlan_entries(struct ksz_device *dev, u16 addr)
shifts = ksz8->shifts;
ksz8_r_table(dev, TABLE_VLAN, addr, &data);
- addr *= dev->phy_port_cnt;
- for (i = 0; i < dev->phy_port_cnt; i++) {
+ addr *= 4;
+ for (i = 0; i < 4; i++) {
dev->vlan_cache[addr + i].table[0] = (u16)data;
data >>= shifts[VLAN_TABLE];
}
@@ -702,7 +702,7 @@ static void ksz8_r_vlan_table(struct ksz_device *dev, u16 vid, u16 *vlan)
u64 buf;
data = (u16 *)&buf;
- addr = vid / dev->phy_port_cnt;
+ addr = vid / 4;
index = vid & 3;
ksz8_r_table(dev, TABLE_VLAN, addr, &buf);
*vlan = data[index];
@@ -716,7 +716,7 @@ static void ksz8_w_vlan_table(struct ksz_device *dev, u16 vid, u16 vlan)
u64 buf;
data = (u16 *)&buf;
- addr = vid / dev->phy_port_cnt;
+ addr = vid / 4;
index = vid & 3;
ksz8_r_table(dev, TABLE_VLAN, addr, &buf);
data[index] = vlan;
@@ -1119,24 +1119,67 @@ static int ksz8_port_vlan_filtering(struct dsa_switch *ds, int port, bool flag,
if (ksz_is_ksz88x3(dev))
return -ENOTSUPP;
+ /* Discard packets with VID not enabled on the switch */
ksz_cfg(dev, S_MIRROR_CTRL, SW_VLAN_ENABLE, flag);
+ /* Discard packets with VID not enabled on the ingress port */
+ for (port = 0; port < dev->phy_port_cnt; ++port)
+ ksz_port_cfg(dev, port, REG_PORT_CTRL_2, PORT_INGRESS_FILTER,
+ flag);
+
return 0;
}
+static void ksz8_port_enable_pvid(struct ksz_device *dev, int port, bool state)
+{
+ if (ksz_is_ksz88x3(dev)) {
+ ksz_cfg(dev, REG_SW_INSERT_SRC_PVID,
+ 0x03 << (4 - 2 * port), state);
+ } else {
+ ksz_pwrite8(dev, port, REG_PORT_CTRL_12, state ? 0x0f : 0x00);
+ }
+}
+
static int ksz8_port_vlan_add(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan,
struct netlink_ext_ack *extack)
{
bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
struct ksz_device *dev = ds->priv;
+ struct ksz_port *p = &dev->ports[port];
u16 data, new_pvid = 0;
u8 fid, member, valid;
if (ksz_is_ksz88x3(dev))
return -ENOTSUPP;
- ksz_port_cfg(dev, port, P_TAG_CTRL, PORT_REMOVE_TAG, untagged);
+ /* If a VLAN is added with untagged flag different from the
+ * port's Remove Tag flag, we need to change the latter.
+ * Ignore VID 0, which is always untagged.
+ * Ignore CPU port, which will always be tagged.
+ */
+ if (untagged != p->remove_tag && vlan->vid != 0 &&
+ port != dev->cpu_port) {
+ unsigned int vid;
+
+ /* Reject attempts to add a VLAN that requires the
+ * Remove Tag flag to be changed, unless there are no
+ * other VLANs currently configured.
+ */
+ for (vid = 1; vid < dev->num_vlans; ++vid) {
+ /* Skip the VID we are going to add or reconfigure */
+ if (vid == vlan->vid)
+ continue;
+
+ ksz8_from_vlan(dev, dev->vlan_cache[vid].table[0],
+ &fid, &member, &valid);
+ if (valid && (member & BIT(port)))
+ return -EINVAL;
+ }
+
+ ksz_port_cfg(dev, port, P_TAG_CTRL, PORT_REMOVE_TAG, untagged);
+ p->remove_tag = untagged;
+ }
ksz8_r_vlan_table(dev, vlan->vid, &data);
ksz8_from_vlan(dev, data, &fid, &member, &valid);
@@ -1160,9 +1203,11 @@ static int ksz8_port_vlan_add(struct dsa_switch *ds, int port,
u16 vid;
ksz_pread16(dev, port, REG_PORT_CTRL_VID, &vid);
- vid &= 0xfff;
+ vid &= ~VLAN_VID_MASK;
vid |= new_pvid;
ksz_pwrite16(dev, port, REG_PORT_CTRL_VID, vid);
+
+ ksz8_port_enable_pvid(dev, port, true);
}
return 0;
@@ -1171,9 +1216,8 @@ static int ksz8_port_vlan_add(struct dsa_switch *ds, int port,
static int ksz8_port_vlan_del(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan)
{
- bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
struct ksz_device *dev = ds->priv;
- u16 data, pvid, new_pvid = 0;
+ u16 data, pvid;
u8 fid, member, valid;
if (ksz_is_ksz88x3(dev))
@@ -1182,8 +1226,6 @@ static int ksz8_port_vlan_del(struct dsa_switch *ds, int port,
ksz_pread16(dev, port, REG_PORT_CTRL_VID, &pvid);
pvid = pvid & 0xFFF;
- ksz_port_cfg(dev, port, P_TAG_CTRL, PORT_REMOVE_TAG, untagged);
-
ksz8_r_vlan_table(dev, vlan->vid, &data);
ksz8_from_vlan(dev, data, &fid, &member, &valid);
@@ -1195,14 +1237,11 @@ static int ksz8_port_vlan_del(struct dsa_switch *ds, int port,
valid = 0;
}
- if (pvid == vlan->vid)
- new_pvid = 1;
-
ksz8_to_vlan(dev, fid, member, valid, &data);
ksz8_w_vlan_table(dev, vlan->vid, data);
- if (new_pvid != pvid)
- ksz_pwrite16(dev, port, REG_PORT_CTRL_VID, pvid);
+ if (pvid == vlan->vid)
+ ksz8_port_enable_pvid(dev, port, false);
return 0;
}
@@ -1435,6 +1474,9 @@ static int ksz8_setup(struct dsa_switch *ds)
ksz_cfg(dev, S_MIRROR_CTRL, SW_MIRROR_RX_TX, false);
+ if (!ksz_is_ksz88x3(dev))
+ ksz_cfg(dev, REG_SW_CTRL_19, SW_INS_TAG_ENABLE, true);
+
/* set broadcast storm protection 10% rate */
regmap_update_bits(dev->regmap[1], S_REPLACE_VID_CTRL,
BROADCAST_STORM_RATE,
@@ -1717,6 +1759,16 @@ static int ksz8_switch_init(struct ksz_device *dev)
/* set the real number of ports */
dev->ds->num_ports = dev->port_cnt;
+ /* We rely on software untagging on the CPU port, so that we
+ * can support both tagged and untagged VLANs
+ */
+ dev->ds->untag_bridge_pvid = true;
+
+ /* VLAN filtering is partly controlled by the global VLAN
+ * Enable flag
+ */
+ dev->ds->vlan_filtering_is_global = true;
+
return 0;
}
diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h
index a32355624f31..6b40bc25f7ff 100644
--- a/drivers/net/dsa/microchip/ksz8795_reg.h
+++ b/drivers/net/dsa/microchip/ksz8795_reg.h
@@ -631,6 +631,10 @@
#define REG_PORT_4_OUT_RATE_3 0xEE
#define REG_PORT_5_OUT_RATE_3 0xFE
+/* 88x3 specific */
+
+#define REG_SW_INSERT_SRC_PVID 0xC2
+
/* PME */
#define SW_PME_OUTPUT_ENABLE BIT(1)
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index 2e6bfd333f50..1597c63988b4 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -27,6 +27,7 @@ struct ksz_port_mib {
struct ksz_port {
u16 member;
u16 vid_member;
+ bool remove_tag; /* Remove Tag flag set, for ksz8795 only */
int stp_state;
struct phy_device phydev;
@@ -205,12 +206,8 @@ static inline int ksz_read64(struct ksz_device *dev, u32 reg, u64 *val)
int ret;
ret = regmap_bulk_read(dev->regmap[2], reg, value, 2);
- if (!ret) {
- /* Ick! ToDo: Add 64bit R/W to regmap on 32bit systems */
- value[0] = swab32(value[0]);
- value[1] = swab32(value[1]);
- *val = swab64((u64)*value);
- }
+ if (!ret)
+ *val = (u64)value[0] << 32 | value[1];
return ret;
}
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 69f21b71614c..d0cba2d1cd68 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -47,6 +47,7 @@ static const struct mt7530_mib_desc mt7530_mib[] = {
MIB_DESC(2, 0x48, "TxBytes"),
MIB_DESC(1, 0x60, "RxDrop"),
MIB_DESC(1, 0x64, "RxFiltering"),
+ MIB_DESC(1, 0x68, "RxUnicast"),
MIB_DESC(1, 0x6c, "RxMulticast"),
MIB_DESC(1, 0x70, "RxBroadcast"),
MIB_DESC(1, 0x74, "RxAlignErr"),
@@ -366,8 +367,8 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid,
int i;
reg[1] |= vid & CVID_MASK;
- if (vid > 1)
- reg[1] |= ATA2_IVL;
+ reg[1] |= ATA2_IVL;
+ reg[1] |= ATA2_FID(FID_BRIDGED);
reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER;
reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP;
/* STATIC_ENT indicate that entry is static wouldn't
@@ -1021,6 +1022,10 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port)
mt7530_write(priv, MT7530_PCR_P(port),
PCR_MATRIX(dsa_user_ports(priv->ds)));
+ /* Set to fallback mode for independent VLAN learning */
+ mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+ MT7530_PORT_FALLBACK_MODE);
+
return 0;
}
@@ -1143,7 +1148,8 @@ mt7530_stp_state_set(struct dsa_switch *ds, int port, u8 state)
break;
}
- mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK, stp_state);
+ mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK(FID_BRIDGED),
+ FID_PST(FID_BRIDGED, stp_state));
}
static int
@@ -1185,18 +1191,6 @@ mt7530_port_bridge_flags(struct dsa_switch *ds, int port,
}
static int
-mt7530_port_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
- struct netlink_ext_ack *extack)
-{
- struct mt7530_priv *priv = ds->priv;
-
- mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)),
- mrouter ? UNM_FFP(BIT(port)) : 0);
-
- return 0;
-}
-
-static int
mt7530_port_bridge_join(struct dsa_switch *ds, int port,
struct net_device *bridge)
{
@@ -1229,6 +1223,10 @@ mt7530_port_bridge_join(struct dsa_switch *ds, int port,
PCR_MATRIX_MASK, PCR_MATRIX(port_bitmap));
priv->ports[port].pm |= PCR_MATRIX(port_bitmap);
+ /* Set to fallback mode for independent VLAN learning */
+ mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+ MT7530_PORT_FALLBACK_MODE);
+
mutex_unlock(&priv->reg_mutex);
return 0;
@@ -1241,15 +1239,22 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port)
bool all_user_ports_removed = true;
int i;
- /* When a port is removed from the bridge, the port would be set up
- * back to the default as is at initial boot which is a VLAN-unaware
- * port.
+ /* This is called after .port_bridge_leave when leaving a VLAN-aware
+ * bridge. Don't set standalone ports to fallback mode.
*/
- mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
- MT7530_PORT_MATRIX_MODE);
- mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK | PVC_EG_TAG_MASK,
+ if (dsa_to_port(ds, port)->bridge_dev)
+ mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+ MT7530_PORT_FALLBACK_MODE);
+
+ mt7530_rmw(priv, MT7530_PVC_P(port),
+ VLAN_ATTR_MASK | PVC_EG_TAG_MASK | ACC_FRM_MASK,
VLAN_ATTR(MT7530_VLAN_TRANSPARENT) |
- PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
+ PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT) |
+ MT7530_VLAN_ACC_ALL);
+
+ /* Set PVID to 0 */
+ mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+ G0_PORT_VID_DEF);
for (i = 0; i < MT7530_NUM_PORTS; i++) {
if (dsa_is_user_port(ds, i) &&
@@ -1276,15 +1281,19 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port)
struct mt7530_priv *priv = ds->priv;
/* Trapped into security mode allows packet forwarding through VLAN
- * table lookup. CPU port is set to fallback mode to let untagged
- * frames pass through.
+ * table lookup.
*/
- if (dsa_is_cpu_port(ds, port))
- mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
- MT7530_PORT_FALLBACK_MODE);
- else
+ if (dsa_is_user_port(ds, port)) {
mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
MT7530_PORT_SECURITY_MODE);
+ mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+ G0_PORT_VID(priv->ports[port].pvid));
+
+ /* Only accept tagged frames if PVID is not set */
+ if (!priv->ports[port].pvid)
+ mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+ MT7530_VLAN_ACC_TAGGED);
+ }
/* Set the port as a user port which is to be able to recognize VID
* from incoming packets before fetching entry within the VLAN table.
@@ -1307,11 +1316,8 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
/* Remove this port from the port matrix of the other ports
* in the same bridge. If the port is disabled, port matrix
* is kept and not being setup until the port becomes enabled.
- * And the other port's port matrix cannot be broken when the
- * other port is still a VLAN-aware port.
*/
- if (dsa_is_user_port(ds, i) && i != port &&
- !dsa_port_is_vlan_filtering(dsa_to_port(ds, i))) {
+ if (dsa_is_user_port(ds, i) && i != port) {
if (dsa_to_port(ds, i)->bridge_dev != bridge)
continue;
if (priv->ports[i].enable)
@@ -1329,6 +1335,13 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
PCR_MATRIX(BIT(MT7530_CPU_PORT)));
priv->ports[port].pm = PCR_MATRIX(BIT(MT7530_CPU_PORT));
+ /* When a port is removed from the bridge, the port would be set up
+ * back to the default as is at initial boot which is a VLAN-unaware
+ * port.
+ */
+ mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+ MT7530_PORT_MATRIX_MODE);
+
mutex_unlock(&priv->reg_mutex);
}
@@ -1511,7 +1524,8 @@ mt7530_hw_vlan_add(struct mt7530_priv *priv,
/* Validate the entry with independent learning, create egress tag per
* VLAN and joining the port as one of the port members.
*/
- val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | VLAN_VALID;
+ val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | FID(FID_BRIDGED) |
+ VLAN_VALID;
mt7530_write(priv, MT7530_VAWD1, val);
/* Decide whether adding tag or not for those outgoing packets from the
@@ -1586,6 +1600,21 @@ mt7530_hw_vlan_update(struct mt7530_priv *priv, u16 vid,
}
static int
+mt7530_setup_vlan0(struct mt7530_priv *priv)
+{
+ u32 val;
+
+ /* Validate the entry with independent learning, keep the original
+ * ingress tag attribute.
+ */
+ val = IVL_MAC | EG_CON | PORT_MEM(MT7530_ALL_MEMBERS) | FID(FID_BRIDGED) |
+ VLAN_VALID;
+ mt7530_write(priv, MT7530_VAWD1, val);
+
+ return mt7530_vlan_cmd(priv, MT7530_VTCR_WR_VID, 0);
+}
+
+static int
mt7530_port_vlan_add(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan,
struct netlink_ext_ack *extack)
@@ -1601,9 +1630,28 @@ mt7530_port_vlan_add(struct dsa_switch *ds, int port,
mt7530_hw_vlan_update(priv, vlan->vid, &new_entry, mt7530_hw_vlan_add);
if (pvid) {
- mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
- G0_PORT_VID(vlan->vid));
priv->ports[port].pvid = vlan->vid;
+
+ /* Accept all frames if PVID is set */
+ mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+ MT7530_VLAN_ACC_ALL);
+
+ /* Only configure PVID if VLAN filtering is enabled */
+ if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+ mt7530_rmw(priv, MT7530_PPBV1_P(port),
+ G0_PORT_VID_MASK,
+ G0_PORT_VID(vlan->vid));
+ } else if (vlan->vid && priv->ports[port].pvid == vlan->vid) {
+ /* This VLAN is overwritten without PVID, so unset it */
+ priv->ports[port].pvid = G0_PORT_VID_DEF;
+
+ /* Only accept tagged frames if the port is VLAN-aware */
+ if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+ mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+ MT7530_VLAN_ACC_TAGGED);
+
+ mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+ G0_PORT_VID_DEF);
}
mutex_unlock(&priv->reg_mutex);
@@ -1617,11 +1665,9 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port,
{
struct mt7530_hw_vlan_entry target_entry;
struct mt7530_priv *priv = ds->priv;
- u16 pvid;
mutex_lock(&priv->reg_mutex);
- pvid = priv->ports[port].pvid;
mt7530_hw_vlan_entry_init(&target_entry, port, 0);
mt7530_hw_vlan_update(priv, vlan->vid, &target_entry,
mt7530_hw_vlan_del);
@@ -1629,11 +1675,18 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port,
/* PVID is being restored to the default whenever the PVID port
* is being removed from the VLAN.
*/
- if (pvid == vlan->vid)
- pvid = G0_PORT_VID_DEF;
+ if (priv->ports[port].pvid == vlan->vid) {
+ priv->ports[port].pvid = G0_PORT_VID_DEF;
+
+ /* Only accept tagged frames if the port is VLAN-aware */
+ if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+ mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+ MT7530_VLAN_ACC_TAGGED);
+
+ mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+ G0_PORT_VID_DEF);
+ }
- mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, pvid);
- priv->ports[port].pvid = pvid;
mutex_unlock(&priv->reg_mutex);
@@ -1717,15 +1770,7 @@ static enum dsa_tag_protocol
mtk_get_tag_protocol(struct dsa_switch *ds, int port,
enum dsa_tag_protocol mp)
{
- struct mt7530_priv *priv = ds->priv;
-
- if (port != MT7530_CPU_PORT) {
- dev_warn(priv->dev,
- "port not matched with tagging CPU port\n");
- return DSA_TAG_PROTO_NONE;
- } else {
- return DSA_TAG_PROTO_MTK;
- }
+ return DSA_TAG_PROTO_MTK;
}
#ifdef CONFIG_GPIOLIB
@@ -2054,6 +2099,7 @@ mt7530_setup(struct dsa_switch *ds)
* as two netdev instances.
*/
dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent;
+ ds->assisted_learning_on_cpu_port = true;
ds->mtu_enforcement_ingress = true;
if (priv->id == ID_MT7530) {
@@ -2124,6 +2170,9 @@ mt7530_setup(struct dsa_switch *ds)
mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK,
PCR_MATRIX_CLR);
+ /* Disable learning by default on all ports */
+ mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+
if (dsa_is_cpu_port(ds, i)) {
ret = mt753x_cpu_port_enable(ds, i);
if (ret)
@@ -2131,14 +2180,20 @@ mt7530_setup(struct dsa_switch *ds)
} else {
mt7530_port_disable(ds, i);
- /* Disable learning by default on all user ports */
- mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+ /* Set default PVID to 0 on all user ports */
+ mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK,
+ G0_PORT_VID_DEF);
}
/* Enable consistent egress tag */
mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK,
PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
}
+ /* Setup VLAN ID 0 for VLAN-unaware bridges */
+ ret = mt7530_setup_vlan0(priv);
+ if (ret)
+ return ret;
+
/* Setup port 5 */
priv->p5_intf_sel = P5_DISABLED;
interface = PHY_INTERFACE_MODE_NA;
@@ -2289,6 +2344,9 @@ mt7531_setup(struct dsa_switch *ds)
mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK,
PCR_MATRIX_CLR);
+ /* Disable learning by default on all ports */
+ mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+
mt7530_set(priv, MT7531_DBG_CNT(i), MT7531_DIS_CLR);
if (dsa_is_cpu_port(ds, i)) {
@@ -2298,8 +2356,9 @@ mt7531_setup(struct dsa_switch *ds)
} else {
mt7530_port_disable(ds, i);
- /* Disable learning by default on all user ports */
- mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+ /* Set default PVID to 0 on all user ports */
+ mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK,
+ G0_PORT_VID_DEF);
}
/* Enable consistent egress tag */
@@ -2307,6 +2366,12 @@ mt7531_setup(struct dsa_switch *ds)
PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
}
+ /* Setup VLAN ID 0 for VLAN-unaware bridges */
+ ret = mt7530_setup_vlan0(priv);
+ if (ret)
+ return ret;
+
+ ds->assisted_learning_on_cpu_port = true;
ds->mtu_enforcement_ingress = true;
/* Flush the FDB table */
@@ -3060,7 +3125,6 @@ static const struct dsa_switch_ops mt7530_switch_ops = {
.port_stp_state_set = mt7530_stp_state_set,
.port_pre_bridge_flags = mt7530_port_pre_bridge_flags,
.port_bridge_flags = mt7530_port_bridge_flags,
- .port_set_mrouter = mt7530_port_set_mrouter,
.port_bridge_join = mt7530_port_bridge_join,
.port_bridge_leave = mt7530_port_bridge_leave,
.port_fdb_add = mt7530_port_fdb_add,
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index b19b389ff10a..91508e2feef9 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -80,6 +80,7 @@ enum mt753x_bpdu_port_fw {
#define STATIC_ENT 3
#define MT7530_ATA2 0x78
#define ATA2_IVL BIT(15)
+#define ATA2_FID(x) (((x) & 0x7) << 12)
/* Register for address table write data */
#define MT7530_ATWD 0x7c
@@ -144,15 +145,24 @@ enum mt7530_vlan_cmd {
#define PORT_STAG BIT(31)
/* Independent VLAN Learning */
#define IVL_MAC BIT(30)
+/* Egress Tag Consistent */
+#define EG_CON BIT(29)
/* Per VLAN Egress Tag Control */
#define VTAG_EN BIT(28)
/* VLAN Member Control */
#define PORT_MEM(x) (((x) & 0xff) << 16)
+/* Filter ID */
+#define FID(x) (((x) & 0x7) << 1)
/* VLAN Entry Valid */
#define VLAN_VALID BIT(0)
#define PORT_MEM_SHFT 16
#define PORT_MEM_MASK 0xff
+enum mt7530_fid {
+ FID_STANDALONE = 0,
+ FID_BRIDGED = 1,
+};
+
#define MT7530_VAWD2 0x98
/* Egress Tag Control */
#define ETAG_CTRL_P(p, x) (((x) & 0x3) << ((p) << 1))
@@ -179,8 +189,8 @@ enum mt7530_vlan_egress_attr {
/* Register for port STP state control */
#define MT7530_SSP_P(x) (0x2000 + ((x) * 0x100))
-#define FID_PST(x) ((x) & 0x3)
-#define FID_PST_MASK FID_PST(0x3)
+#define FID_PST(fid, state) (((state) & 0x3) << ((fid) * 2))
+#define FID_PST_MASK(fid) FID_PST(fid, 0x3)
enum mt7530_stp_state {
MT7530_STP_DISABLED = 0,
@@ -230,6 +240,7 @@ enum mt7530_port_mode {
#define PVC_EG_TAG_MASK PVC_EG_TAG(7)
#define VLAN_ATTR(x) (((x) & 0x3) << 6)
#define VLAN_ATTR_MASK VLAN_ATTR(3)
+#define ACC_FRM_MASK GENMASK(1, 0)
enum mt7530_vlan_port_eg_tag {
MT7530_VLAN_EG_DISABLED = 0,
@@ -241,13 +252,19 @@ enum mt7530_vlan_port_attr {
MT7530_VLAN_TRANSPARENT = 3,
};
+enum mt7530_vlan_port_acc_frm {
+ MT7530_VLAN_ACC_ALL = 0,
+ MT7530_VLAN_ACC_TAGGED = 1,
+ MT7530_VLAN_ACC_UNTAGGED = 2,
+};
+
#define STAG_VPID (((x) & 0xffff) << 16)
/* Register for port port-and-protocol based vlan 1 control */
#define MT7530_PPBV1_P(x) (0x2014 + ((x) * 0x100))
#define G0_PORT_VID(x) (((x) & 0xfff) << 0)
#define G0_PORT_VID_MASK G0_PORT_VID(0xfff)
-#define G0_PORT_VID_DEF G0_PORT_VID(1)
+#define G0_PORT_VID_DEF G0_PORT_VID(0)
/* Register for port MAC control register */
#define MT7530_PMCR_P(x) (0x3000 + ((x) * 0x100))
diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig
index 634a48e6616b..7a2445a34eb7 100644
--- a/drivers/net/dsa/mv88e6xxx/Kconfig
+++ b/drivers/net/dsa/mv88e6xxx/Kconfig
@@ -2,6 +2,7 @@
config NET_DSA_MV88E6XXX
tristate "Marvell 88E6xxx Ethernet switch fabric support"
depends on NET_DSA
+ depends on PTP_1588_CLOCK_OPTIONAL
select IRQ_DOMAIN
select NET_DSA_TAG_EDSA
select NET_DSA_TAG_DSA
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 272b0535d946..c45ca2473743 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1221,14 +1221,36 @@ static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port)
bool found = false;
u16 pvlan;
- list_for_each_entry(dp, &dst->ports, list) {
- if (dp->ds->index == dev && dp->index == port) {
+ /* dev is a physical switch */
+ if (dev <= dst->last_switch) {
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->ds->index == dev && dp->index == port) {
+ /* dp might be a DSA link or a user port, so it
+ * might or might not have a bridge_dev
+ * pointer. Use the "found" variable for both
+ * cases.
+ */
+ br = dp->bridge_dev;
+ found = true;
+ break;
+ }
+ }
+ /* dev is a virtual bridge */
+ } else {
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->bridge_num < 0)
+ continue;
+
+ if (dp->bridge_num + 1 + dst->last_switch != dev)
+ continue;
+
+ br = dp->bridge_dev;
found = true;
break;
}
}
- /* Prevent frames from unknown switch or port */
+ /* Prevent frames from unknown switch or virtual bridge */
if (!found)
return 0;
@@ -1236,7 +1258,6 @@ static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port)
if (dp->type == DSA_PORT_TYPE_CPU || dp->type == DSA_PORT_TYPE_DSA)
return mv88e6xxx_port_mask(chip);
- br = dp->bridge_dev;
pvlan = 0;
/* Frames from user ports can egress any local DSA links and CPU ports,
@@ -2422,6 +2443,44 @@ static void mv88e6xxx_crosschip_bridge_leave(struct dsa_switch *ds,
mv88e6xxx_reg_unlock(chip);
}
+/* Treat the software bridge as a virtual single-port switch behind the
+ * CPU and map in the PVT. First dst->last_switch elements are taken by
+ * physical switches, so start from beyond that range.
+ */
+static int mv88e6xxx_map_virtual_bridge_to_pvt(struct dsa_switch *ds,
+ int bridge_num)
+{
+ u8 dev = bridge_num + ds->dst->last_switch + 1;
+ struct mv88e6xxx_chip *chip = ds->priv;
+ int err;
+
+ mv88e6xxx_reg_lock(chip);
+ err = mv88e6xxx_pvt_map(chip, dev, 0);
+ mv88e6xxx_reg_unlock(chip);
+
+ return err;
+}
+
+static int mv88e6xxx_bridge_tx_fwd_offload(struct dsa_switch *ds, int port,
+ struct net_device *br,
+ int bridge_num)
+{
+ return mv88e6xxx_map_virtual_bridge_to_pvt(ds, bridge_num);
+}
+
+static void mv88e6xxx_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port,
+ struct net_device *br,
+ int bridge_num)
+{
+ int err;
+
+ err = mv88e6xxx_map_virtual_bridge_to_pvt(ds, bridge_num);
+ if (err) {
+ dev_err(ds->dev, "failed to remap cross-chip Port VLAN: %pe\n",
+ ERR_PTR(err));
+ }
+}
+
static int mv88e6xxx_software_reset(struct mv88e6xxx_chip *chip)
{
if (chip->info->ops->reset)
@@ -3025,6 +3084,15 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
chip->ds = ds;
ds->slave_mii_bus = mv88e6xxx_default_mdio_bus(chip);
+ /* Since virtual bridges are mapped in the PVT, the number we support
+ * depends on the physical switch topology. We need to let DSA figure
+ * that out and therefore we cannot set this at dsa_register_switch()
+ * time.
+ */
+ if (mv88e6xxx_has_pvt(chip))
+ ds->num_fwd_offloading_bridges = MV88E6XXX_MAX_PVT_SWITCHES -
+ ds->dst->last_switch - 1;
+
mv88e6xxx_reg_lock(chip);
if (chip->info->ops->setup_errata) {
@@ -5729,7 +5797,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
struct netlink_ext_ack *extack)
{
struct mv88e6xxx_chip *chip = ds->priv;
- bool do_fast_age = false;
int err = -EOPNOTSUPP;
mv88e6xxx_reg_lock(chip);
@@ -5741,9 +5808,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
err = mv88e6xxx_port_set_assoc_vector(chip, port, pav);
if (err)
goto out;
-
- if (!learning)
- do_fast_age = true;
}
if (flags.mask & BR_FLOOD) {
@@ -5775,26 +5839,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
out:
mv88e6xxx_reg_unlock(chip);
- if (do_fast_age)
- mv88e6xxx_port_fast_age(ds, port);
-
- return err;
-}
-
-static int mv88e6xxx_port_set_mrouter(struct dsa_switch *ds, int port,
- bool mrouter,
- struct netlink_ext_ack *extack)
-{
- struct mv88e6xxx_chip *chip = ds->priv;
- int err;
-
- if (!chip->info->ops->port_set_mcast_flood)
- return -EOPNOTSUPP;
-
- mv88e6xxx_reg_lock(chip);
- err = chip->info->ops->port_set_mcast_flood(chip, port, mrouter);
- mv88e6xxx_reg_unlock(chip);
-
return err;
}
@@ -6099,7 +6143,6 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
.port_bridge_leave = mv88e6xxx_port_bridge_leave,
.port_pre_bridge_flags = mv88e6xxx_port_pre_bridge_flags,
.port_bridge_flags = mv88e6xxx_port_bridge_flags,
- .port_set_mrouter = mv88e6xxx_port_set_mrouter,
.port_stp_state_set = mv88e6xxx_port_stp_state_set,
.port_fast_age = mv88e6xxx_port_fast_age,
.port_vlan_filtering = mv88e6xxx_port_vlan_filtering,
@@ -6128,6 +6171,8 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
.crosschip_lag_change = mv88e6xxx_crosschip_lag_change,
.crosschip_lag_join = mv88e6xxx_crosschip_lag_join,
.crosschip_lag_leave = mv88e6xxx_crosschip_lag_leave,
+ .port_bridge_tx_fwd_offload = mv88e6xxx_bridge_tx_fwd_offload,
+ .port_bridge_tx_fwd_unoffload = mv88e6xxx_bridge_tx_fwd_unoffload,
};
static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip)
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index b1d46dd8eaab..6ea003678798 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -1277,15 +1277,16 @@ static int mv88e6393x_serdes_port_errata(struct mv88e6xxx_chip *chip, int lane)
int err;
/* mv88e6393x family errata 4.6:
- * Cannot clear PwrDn bit on SERDES on port 0 if device is configured
- * CPU_MGD mode or P0_mode is configured for [x]MII.
- * Workaround: Set Port0 SERDES register 4.F002 bit 5=0 and bit 15=1.
+ * Cannot clear PwrDn bit on SERDES if device is configured CPU_MGD
+ * mode or P0_mode is configured for [x]MII.
+ * Workaround: Set SERDES register 4.F002 bit 5=0 and bit 15=1.
*
* It seems that after this workaround the SERDES is automatically
* powered up (the bit is cleared), so power it down.
*/
- if (lane == MV88E6393X_PORT0_LANE) {
- err = mv88e6390_serdes_read(chip, MV88E6393X_PORT0_LANE,
+ if (lane == MV88E6393X_PORT0_LANE || lane == MV88E6393X_PORT9_LANE ||
+ lane == MV88E6393X_PORT10_LANE) {
+ err = mv88e6390_serdes_read(chip, lane,
MDIO_MMD_PHYXS,
MV88E6393X_SERDES_POC, &reg);
if (err)
diff --git a/drivers/net/dsa/ocelot/Kconfig b/drivers/net/dsa/ocelot/Kconfig
index 932b6b6fe817..9948544ba1c4 100644
--- a/drivers/net/dsa/ocelot/Kconfig
+++ b/drivers/net/dsa/ocelot/Kconfig
@@ -5,6 +5,7 @@ config NET_DSA_MSCC_FELIX
depends on NET_VENDOR_MICROSEMI
depends on NET_VENDOR_FREESCALE
depends on HAS_IOMEM
+ depends on PTP_1588_CLOCK_OPTIONAL
select MSCC_OCELOT_SWITCH_LIB
select NET_DSA_TAG_OCELOT_8021Q
select NET_DSA_TAG_OCELOT
@@ -19,6 +20,7 @@ config NET_DSA_MSCC_SEVILLE
depends on NET_DSA
depends on NET_VENDOR_MICROSEMI
depends on HAS_IOMEM
+ depends on PTP_1588_CLOCK_OPTIONAL
select MSCC_OCELOT_SWITCH_LIB
select NET_DSA_TAG_OCELOT_8021Q
select NET_DSA_TAG_OCELOT
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index a2a15919b960..3656e67af789 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -231,11 +231,6 @@ static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid)
return 0;
}
-static const struct dsa_8021q_ops felix_tag_8021q_ops = {
- .vlan_add = felix_tag_8021q_vlan_add,
- .vlan_del = felix_tag_8021q_vlan_del,
-};
-
/* Alternatively to using the NPI functionality, that same hardware MAC
* connected internally to the enetc or fman DSA master can be configured to
* use the software-defined tag_8021q frame format. As far as the hardware is
@@ -425,29 +420,18 @@ static int felix_setup_tag_8021q(struct dsa_switch *ds, int cpu)
ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_MC);
ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_BC);
- felix->dsa_8021q_ctx = kzalloc(sizeof(*felix->dsa_8021q_ctx),
- GFP_KERNEL);
- if (!felix->dsa_8021q_ctx)
- return -ENOMEM;
-
- felix->dsa_8021q_ctx->ops = &felix_tag_8021q_ops;
- felix->dsa_8021q_ctx->proto = htons(ETH_P_8021AD);
- felix->dsa_8021q_ctx->ds = ds;
-
- err = dsa_8021q_setup(felix->dsa_8021q_ctx, true);
+ err = dsa_tag_8021q_register(ds, htons(ETH_P_8021AD));
if (err)
- goto out_free_dsa_8021_ctx;
+ return err;
err = felix_setup_mmio_filtering(felix);
if (err)
- goto out_teardown_dsa_8021q;
+ goto out_tag_8021q_unregister;
return 0;
-out_teardown_dsa_8021q:
- dsa_8021q_setup(felix->dsa_8021q_ctx, false);
-out_free_dsa_8021_ctx:
- kfree(felix->dsa_8021q_ctx);
+out_tag_8021q_unregister:
+ dsa_tag_8021q_unregister(ds);
return err;
}
@@ -462,11 +446,7 @@ static void felix_teardown_tag_8021q(struct dsa_switch *ds, int cpu)
dev_err(ds->dev, "felix_teardown_mmio_filtering returned %d",
err);
- err = dsa_8021q_setup(felix->dsa_8021q_ctx, false);
- if (err)
- dev_err(ds->dev, "dsa_8021q_setup returned %d", err);
-
- kfree(felix->dsa_8021q_ctx);
+ dsa_tag_8021q_unregister(ds);
for (port = 0; port < ds->num_ports; port++) {
if (dsa_is_unused_port(ds, port))
@@ -762,7 +742,8 @@ static int felix_lag_change(struct dsa_switch *ds, int port)
}
static int felix_vlan_prepare(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan)
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack)
{
struct ocelot *ocelot = ds->priv;
u16 flags = vlan->flags;
@@ -780,7 +761,8 @@ static int felix_vlan_prepare(struct dsa_switch *ds, int port,
return ocelot_vlan_prepare(ocelot, port, vlan->vid,
flags & BRIDGE_VLAN_INFO_PVID,
- flags & BRIDGE_VLAN_INFO_UNTAGGED);
+ flags & BRIDGE_VLAN_INFO_UNTAGGED,
+ extack);
}
static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
@@ -788,7 +770,7 @@ static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
{
struct ocelot *ocelot = ds->priv;
- return ocelot_port_vlan_filtering(ocelot, port, enabled);
+ return ocelot_port_vlan_filtering(ocelot, port, enabled, extack);
}
static int felix_vlan_add(struct dsa_switch *ds, int port,
@@ -799,7 +781,7 @@ static int felix_vlan_add(struct dsa_switch *ds, int port,
u16 flags = vlan->flags;
int err;
- err = felix_vlan_prepare(ds, port, vlan);
+ err = felix_vlan_prepare(ds, port, vlan, extack);
if (err)
return err;
@@ -816,23 +798,6 @@ static int felix_vlan_del(struct dsa_switch *ds, int port,
return ocelot_vlan_del(ocelot, port, vlan->vid);
}
-static int felix_port_enable(struct dsa_switch *ds, int port,
- struct phy_device *phy)
-{
- struct ocelot *ocelot = ds->priv;
-
- ocelot_port_enable(ocelot, port, phy);
-
- return 0;
-}
-
-static void felix_port_disable(struct dsa_switch *ds, int port)
-{
- struct ocelot *ocelot = ds->priv;
-
- return ocelot_port_disable(ocelot, port);
-}
-
static void felix_phylink_validate(struct dsa_switch *ds, int port,
unsigned long *supported,
struct phylink_link_state *state)
@@ -861,25 +826,9 @@ static void felix_phylink_mac_link_down(struct dsa_switch *ds, int port,
phy_interface_t interface)
{
struct ocelot *ocelot = ds->priv;
- struct ocelot_port *ocelot_port = ocelot->ports[port];
- int err;
-
- ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA,
- DEV_MAC_ENA_CFG);
- ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 0);
-
- err = ocelot_port_flush(ocelot, port);
- if (err)
- dev_err(ocelot->dev, "failed to flush port %d: %d\n",
- port, err);
-
- /* Put the port in reset. */
- ocelot_port_writel(ocelot_port,
- DEV_CLOCK_CFG_MAC_TX_RST |
- DEV_CLOCK_CFG_MAC_RX_RST |
- DEV_CLOCK_CFG_LINK_SPEED(OCELOT_SPEED_1000),
- DEV_CLOCK_CFG);
+ ocelot_phylink_mac_link_down(ocelot, port, link_an_mode, interface,
+ FELIX_MAC_QUIRKS);
}
static void felix_phylink_mac_link_up(struct dsa_switch *ds, int port,
@@ -890,75 +839,11 @@ static void felix_phylink_mac_link_up(struct dsa_switch *ds, int port,
bool tx_pause, bool rx_pause)
{
struct ocelot *ocelot = ds->priv;
- struct ocelot_port *ocelot_port = ocelot->ports[port];
struct felix *felix = ocelot_to_felix(ocelot);
- u32 mac_fc_cfg;
-
- /* Take port out of reset by clearing the MAC_TX_RST, MAC_RX_RST and
- * PORT_RST bits in DEV_CLOCK_CFG. Note that the way this system is
- * integrated is that the MAC speed is fixed and it's the PCS who is
- * performing the rate adaptation, so we have to write "1000Mbps" into
- * the LINK_SPEED field of DEV_CLOCK_CFG (which is also its default
- * value).
- */
- ocelot_port_writel(ocelot_port,
- DEV_CLOCK_CFG_LINK_SPEED(OCELOT_SPEED_1000),
- DEV_CLOCK_CFG);
-
- switch (speed) {
- case SPEED_10:
- mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(3);
- break;
- case SPEED_100:
- mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(2);
- break;
- case SPEED_1000:
- case SPEED_2500:
- mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(1);
- break;
- default:
- dev_err(ocelot->dev, "Unsupported speed on port %d: %d\n",
- port, speed);
- return;
- }
-
- /* handle Rx pause in all cases, with 2500base-X this is used for rate
- * adaptation.
- */
- mac_fc_cfg |= SYS_MAC_FC_CFG_RX_FC_ENA;
-
- if (tx_pause)
- mac_fc_cfg |= SYS_MAC_FC_CFG_TX_FC_ENA |
- SYS_MAC_FC_CFG_PAUSE_VAL_CFG(0xffff) |
- SYS_MAC_FC_CFG_FC_LATENCY_CFG(0x7) |
- SYS_MAC_FC_CFG_ZERO_PAUSE_ENA;
-
- /* Flow control. Link speed is only used here to evaluate the time
- * specification in incoming pause frames.
- */
- ocelot_write_rix(ocelot, mac_fc_cfg, SYS_MAC_FC_CFG, port);
-
- ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port);
-
- ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, tx_pause);
-
- /* Undo the effects of felix_phylink_mac_link_down:
- * enable MAC module
- */
- ocelot_port_writel(ocelot_port, DEV_MAC_ENA_CFG_RX_ENA |
- DEV_MAC_ENA_CFG_TX_ENA, DEV_MAC_ENA_CFG);
-
- /* Enable receiving frames on the port, and activate auto-learning of
- * MAC addresses.
- */
- ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_LEARNAUTO |
- ANA_PORT_PORT_CFG_RECV_ENA |
- ANA_PORT_PORT_CFG_PORTID_VAL(port),
- ANA_PORT_PORT_CFG, port);
- /* Core: Enable port for frame transfer */
- ocelot_fields_write(ocelot, port,
- QSYS_SWITCH_PORT_MODE_PORT_ENA, 1);
+ ocelot_phylink_mac_link_up(ocelot, port, phydev, link_an_mode,
+ interface, speed, duplex, tx_pause, rx_pause,
+ FELIX_MAC_QUIRKS);
if (felix->info->port_sched_speed_set)
felix->info->port_sched_speed_set(ocelot, port, speed);
@@ -1635,8 +1520,6 @@ const struct dsa_switch_ops felix_switch_ops = {
.phylink_mac_config = felix_phylink_mac_config,
.phylink_mac_link_down = felix_phylink_mac_link_down,
.phylink_mac_link_up = felix_phylink_mac_link_up,
- .port_enable = felix_port_enable,
- .port_disable = felix_port_disable,
.port_fdb_dump = felix_fdb_dump,
.port_fdb_add = felix_fdb_add,
.port_fdb_del = felix_fdb_del,
@@ -1679,6 +1562,8 @@ const struct dsa_switch_ops felix_switch_ops = {
.port_mrp_del = felix_mrp_del,
.port_mrp_add_ring_role = felix_mrp_add_ring_role,
.port_mrp_del_ring_role = felix_mrp_del_ring_role,
+ .tag_8021q_vlan_add = felix_tag_8021q_vlan_add,
+ .tag_8021q_vlan_del = felix_tag_8021q_vlan_del,
};
struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port)
diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h
index 4d96cad815d5..5854bab43327 100644
--- a/drivers/net/dsa/ocelot/felix.h
+++ b/drivers/net/dsa/ocelot/felix.h
@@ -5,6 +5,7 @@
#define _MSCC_FELIX_H
#define ocelot_to_felix(o) container_of((o), struct felix, ocelot)
+#define FELIX_MAC_QUIRKS OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION
/* Platform-specific information */
struct felix_info {
@@ -60,7 +61,6 @@ struct felix {
struct lynx_pcs **pcs;
resource_size_t switch_base;
resource_size_t imdio_base;
- struct dsa_8021q_context *dsa_8021q_ctx;
enum dsa_tag_protocol tag_proto;
};
diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c
index 6686192e1883..563d8a279030 100644
--- a/drivers/net/dsa/qca/ar9331.c
+++ b/drivers/net/dsa/qca/ar9331.c
@@ -101,6 +101,23 @@
AR9331_SW_PORT_STATUS_RX_FLOW_EN | AR9331_SW_PORT_STATUS_TX_FLOW_EN | \
AR9331_SW_PORT_STATUS_SPEED_M)
+#define AR9331_SW_REG_PORT_CTRL(_port) (0x104 + (_port) * 0x100)
+#define AR9331_SW_PORT_CTRL_HEAD_EN BIT(11)
+#define AR9331_SW_PORT_CTRL_PORT_STATE GENMASK(2, 0)
+#define AR9331_SW_PORT_CTRL_PORT_STATE_DISABLED 0
+#define AR9331_SW_PORT_CTRL_PORT_STATE_BLOCKING 1
+#define AR9331_SW_PORT_CTRL_PORT_STATE_LISTENING 2
+#define AR9331_SW_PORT_CTRL_PORT_STATE_LEARNING 3
+#define AR9331_SW_PORT_CTRL_PORT_STATE_FORWARD 4
+
+#define AR9331_SW_REG_PORT_VLAN(_port) (0x108 + (_port) * 0x100)
+#define AR9331_SW_PORT_VLAN_8021Q_MODE GENMASK(31, 30)
+#define AR9331_SW_8021Q_MODE_SECURE 3
+#define AR9331_SW_8021Q_MODE_CHECK 2
+#define AR9331_SW_8021Q_MODE_FALLBACK 1
+#define AR9331_SW_8021Q_MODE_NONE 0
+#define AR9331_SW_PORT_VLAN_PORT_VID_MEMBER GENMASK(25, 16)
+
/* MIB registers */
#define AR9331_MIB_COUNTER(x) (0x20000 + ((x) * 0x100))
@@ -371,12 +388,60 @@ static int ar9331_sw_mbus_init(struct ar9331_sw_priv *priv)
return 0;
}
-static int ar9331_sw_setup(struct dsa_switch *ds)
+static int ar9331_sw_setup_port(struct dsa_switch *ds, int port)
{
struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv;
struct regmap *regmap = priv->regmap;
+ u32 port_mask, port_ctrl, val;
int ret;
+ /* Generate default port settings */
+ port_ctrl = FIELD_PREP(AR9331_SW_PORT_CTRL_PORT_STATE,
+ AR9331_SW_PORT_CTRL_PORT_STATE_FORWARD);
+
+ if (dsa_is_cpu_port(ds, port)) {
+ /* CPU port should be allowed to communicate with all user
+ * ports.
+ */
+ port_mask = dsa_user_ports(ds);
+ /* Enable Atheros header on CPU port. This will allow us
+ * communicate with each port separately
+ */
+ port_ctrl |= AR9331_SW_PORT_CTRL_HEAD_EN;
+ } else if (dsa_is_user_port(ds, port)) {
+ /* User ports should communicate only with the CPU port.
+ */
+ port_mask = BIT(dsa_upstream_port(ds, port));
+ } else {
+ /* Other ports do not need to communicate at all */
+ port_mask = 0;
+ }
+
+ val = FIELD_PREP(AR9331_SW_PORT_VLAN_8021Q_MODE,
+ AR9331_SW_8021Q_MODE_NONE) |
+ FIELD_PREP(AR9331_SW_PORT_VLAN_PORT_VID_MEMBER, port_mask);
+
+ ret = regmap_write(regmap, AR9331_SW_REG_PORT_VLAN(port), val);
+ if (ret)
+ goto error;
+
+ ret = regmap_write(regmap, AR9331_SW_REG_PORT_CTRL(port), port_ctrl);
+ if (ret)
+ goto error;
+
+ return 0;
+error:
+ dev_err(priv->dev, "%s: error: %i\n", __func__, ret);
+
+ return ret;
+}
+
+static int ar9331_sw_setup(struct dsa_switch *ds)
+{
+ struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv;
+ struct regmap *regmap = priv->regmap;
+ int ret, i;
+
ret = ar9331_sw_reset(priv);
if (ret)
return ret;
@@ -402,6 +467,12 @@ static int ar9331_sw_setup(struct dsa_switch *ds)
if (ret)
goto error;
+ for (i = 0; i < ds->num_ports; i++) {
+ ret = ar9331_sw_setup_port(ds, i);
+ if (ret)
+ goto error;
+ }
+
ds->configure_vlan_while_not_filtering = false;
return 0;
diff --git a/drivers/net/dsa/sja1105/Kconfig b/drivers/net/dsa/sja1105/Kconfig
index b29d41e5e1e7..1291bba3f3b6 100644
--- a/drivers/net/dsa/sja1105/Kconfig
+++ b/drivers/net/dsa/sja1105/Kconfig
@@ -2,6 +2,7 @@
config NET_DSA_SJA1105
tristate "NXP SJA1105 Ethernet switch family support"
depends on NET_DSA && SPI
+ depends on PTP_1588_CLOCK_OPTIONAL
select NET_DSA_TAG_SJA1105
select PCS_XPCS
select PACKING
diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 221c7abdef0e..5e5d24e7c02b 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -115,12 +115,6 @@ struct sja1105_info {
const struct sja1105_dynamic_table_ops *dyn_ops;
const struct sja1105_table_ops *static_ops;
const struct sja1105_regs *regs;
- /* Both E/T and P/Q/R/S have quirks when it comes to popping the S-Tag
- * from double-tagged frames. E/T will pop it only when it's equal to
- * TPID from the General Parameters Table, while P/Q/R/S will only
- * pop it when it's equal to TPID2.
- */
- u16 qinq_tpid;
bool can_limit_mcast_flood;
int (*reset_cmd)(struct dsa_switch *ds);
int (*setup_rgmii_delay)(const void *ctx, int port);
@@ -226,28 +220,13 @@ struct sja1105_flow_block {
int num_virtual_links;
};
-struct sja1105_bridge_vlan {
- struct list_head list;
- int port;
- u16 vid;
- bool pvid;
- bool untagged;
-};
-
-enum sja1105_vlan_state {
- SJA1105_VLAN_UNAWARE,
- SJA1105_VLAN_BEST_EFFORT,
- SJA1105_VLAN_FILTERING_FULL,
-};
-
struct sja1105_private {
struct sja1105_static_config static_config;
bool rgmii_rx_delay[SJA1105_MAX_NUM_PORTS];
bool rgmii_tx_delay[SJA1105_MAX_NUM_PORTS];
phy_interface_t phy_mode[SJA1105_MAX_NUM_PORTS];
bool fixed_link[SJA1105_MAX_NUM_PORTS];
- bool best_effort_vlan_filtering;
- unsigned long learn_ena;
+ bool vlan_aware;
unsigned long ucast_egress_floods;
unsigned long bcast_egress_floods;
const struct sja1105_info *info;
@@ -255,16 +234,14 @@ struct sja1105_private {
struct gpio_desc *reset_gpio;
struct spi_device *spidev;
struct dsa_switch *ds;
- struct list_head dsa_8021q_vlans;
- struct list_head bridge_vlans;
+ u16 bridge_pvid[SJA1105_MAX_NUM_PORTS];
+ u16 tag_8021q_pvid[SJA1105_MAX_NUM_PORTS];
struct sja1105_flow_block flow_block;
struct sja1105_port ports[SJA1105_MAX_NUM_PORTS];
/* Serializes transmission of management frames so that
* the switch doesn't confuse them with one another.
*/
struct mutex mgmt_lock;
- struct dsa_8021q_context *dsa_8021q_ctx;
- enum sja1105_vlan_state vlan_state;
struct devlink_region **regions;
struct sja1105_cbs_entry *cbs;
struct mii_bus *mdio_base_t1;
@@ -311,10 +288,6 @@ int sja1110_pcs_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val);
/* From sja1105_devlink.c */
int sja1105_devlink_setup(struct dsa_switch *ds);
void sja1105_devlink_teardown(struct dsa_switch *ds);
-int sja1105_devlink_param_get(struct dsa_switch *ds, u32 id,
- struct devlink_param_gset_ctx *ctx);
-int sja1105_devlink_param_set(struct dsa_switch *ds, u32 id,
- struct devlink_param_gset_ctx *ctx);
int sja1105_devlink_info_get(struct dsa_switch *ds,
struct devlink_info_req *req,
struct netlink_ext_ack *extack);
diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c
index b6a4a16b8c7e..05c7f4ca3b1a 100644
--- a/drivers/net/dsa/sja1105/sja1105_devlink.c
+++ b/drivers/net/dsa/sja1105/sja1105_devlink.c
@@ -115,105 +115,6 @@ static void sja1105_teardown_devlink_regions(struct dsa_switch *ds)
kfree(priv->regions);
}
-static int sja1105_best_effort_vlan_filtering_get(struct sja1105_private *priv,
- bool *be_vlan)
-{
- *be_vlan = priv->best_effort_vlan_filtering;
-
- return 0;
-}
-
-static int sja1105_best_effort_vlan_filtering_set(struct sja1105_private *priv,
- bool be_vlan)
-{
- struct dsa_switch *ds = priv->ds;
- bool vlan_filtering;
- int port;
- int rc;
-
- priv->best_effort_vlan_filtering = be_vlan;
-
- rtnl_lock();
- for (port = 0; port < ds->num_ports; port++) {
- struct dsa_port *dp;
-
- if (!dsa_is_user_port(ds, port))
- continue;
-
- dp = dsa_to_port(ds, port);
- vlan_filtering = dsa_port_is_vlan_filtering(dp);
-
- rc = sja1105_vlan_filtering(ds, port, vlan_filtering, NULL);
- if (rc)
- break;
- }
- rtnl_unlock();
-
- return rc;
-}
-
-enum sja1105_devlink_param_id {
- SJA1105_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
- SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING,
-};
-
-int sja1105_devlink_param_get(struct dsa_switch *ds, u32 id,
- struct devlink_param_gset_ctx *ctx)
-{
- struct sja1105_private *priv = ds->priv;
- int err;
-
- switch (id) {
- case SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING:
- err = sja1105_best_effort_vlan_filtering_get(priv,
- &ctx->val.vbool);
- break;
- default:
- err = -EOPNOTSUPP;
- break;
- }
-
- return err;
-}
-
-int sja1105_devlink_param_set(struct dsa_switch *ds, u32 id,
- struct devlink_param_gset_ctx *ctx)
-{
- struct sja1105_private *priv = ds->priv;
- int err;
-
- switch (id) {
- case SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING:
- err = sja1105_best_effort_vlan_filtering_set(priv,
- ctx->val.vbool);
- break;
- default:
- err = -EOPNOTSUPP;
- break;
- }
-
- return err;
-}
-
-static const struct devlink_param sja1105_devlink_params[] = {
- DSA_DEVLINK_PARAM_DRIVER(SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING,
- "best_effort_vlan_filtering",
- DEVLINK_PARAM_TYPE_BOOL,
- BIT(DEVLINK_PARAM_CMODE_RUNTIME)),
-};
-
-static int sja1105_setup_devlink_params(struct dsa_switch *ds)
-{
- return dsa_devlink_params_register(ds, sja1105_devlink_params,
- ARRAY_SIZE(sja1105_devlink_params));
-}
-
-static void sja1105_teardown_devlink_params(struct dsa_switch *ds)
-{
- dsa_devlink_params_unregister(ds, sja1105_devlink_params,
- ARRAY_SIZE(sja1105_devlink_params));
-}
-
int sja1105_devlink_info_get(struct dsa_switch *ds,
struct devlink_info_req *req,
struct netlink_ext_ack *extack)
@@ -233,23 +134,10 @@ int sja1105_devlink_info_get(struct dsa_switch *ds,
int sja1105_devlink_setup(struct dsa_switch *ds)
{
- int rc;
-
- rc = sja1105_setup_devlink_params(ds);
- if (rc)
- return rc;
-
- rc = sja1105_setup_devlink_regions(ds);
- if (rc < 0) {
- sja1105_teardown_devlink_params(ds);
- return rc;
- }
-
- return 0;
+ return sja1105_setup_devlink_regions(ds);
}
void sja1105_devlink_teardown(struct dsa_switch *ds)
{
- sja1105_teardown_devlink_params(ds);
sja1105_teardown_devlink_regions(ds);
}
diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index 147709131c13..f2049f52833c 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -1355,14 +1355,14 @@ u8 sja1105et_fdb_hash(struct sja1105_private *priv, const u8 *addr, u16 vid)
{
struct sja1105_l2_lookup_params_entry *l2_lookup_params =
priv->static_config.tables[BLK_IDX_L2_LOOKUP_PARAMS].entries;
- u64 poly_koopman = l2_lookup_params->poly;
+ u64 input, poly_koopman = l2_lookup_params->poly;
/* Convert polynomial from Koopman to 'normal' notation */
u8 poly = (u8)(1 + (poly_koopman << 1));
- u64 vlanid = l2_lookup_params->shared_learn ? 0 : vid;
- u64 input = (vlanid << 48) | ether_addr_to_u64(addr);
u8 crc = 0; /* seed */
int i;
+ input = ((u64)vid << 48) | ether_addr_to_u64(addr);
+
/* Mask the eight bytes starting from MSB one at a time */
for (i = 56; i >= 0; i -= 8) {
u8 byte = (input & (0xffull << i)) >> i;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 8667c9754330..2f8cc6686c38 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -26,9 +26,6 @@
#include "sja1105_tas.h"
#define SJA1105_UNKNOWN_MULTICAST 0x010000000000ull
-#define SJA1105_DEFAULT_VLAN (VLAN_N_VID - 1)
-
-static const struct dsa_switch_ops sja1105_switch_ops;
static void sja1105_hw_reset(struct gpio_desc *gpio, unsigned int pulse_len,
unsigned int startup_delay)
@@ -57,6 +54,93 @@ static bool sja1105_can_forward(struct sja1105_l2_forwarding_entry *l2_fwd,
return !!(l2_fwd[from].reach_port & BIT(to));
}
+static int sja1105_is_vlan_configured(struct sja1105_private *priv, u16 vid)
+{
+ struct sja1105_vlan_lookup_entry *vlan;
+ int count, i;
+
+ vlan = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entries;
+ count = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entry_count;
+
+ for (i = 0; i < count; i++)
+ if (vlan[i].vlanid == vid)
+ return i;
+
+ /* Return an invalid entry index if not found */
+ return -1;
+}
+
+static int sja1105_drop_untagged(struct dsa_switch *ds, int port, bool drop)
+{
+ struct sja1105_private *priv = ds->priv;
+ struct sja1105_mac_config_entry *mac;
+
+ mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
+
+ if (mac[port].drpuntag == drop)
+ return 0;
+
+ mac[port].drpuntag = drop;
+
+ return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
+ &mac[port], true);
+}
+
+static int sja1105_pvid_apply(struct sja1105_private *priv, int port, u16 pvid)
+{
+ struct sja1105_mac_config_entry *mac;
+
+ mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
+
+ if (mac[port].vlanid == pvid)
+ return 0;
+
+ mac[port].vlanid = pvid;
+
+ return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
+ &mac[port], true);
+}
+
+static int sja1105_commit_pvid(struct dsa_switch *ds, int port)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct sja1105_private *priv = ds->priv;
+ struct sja1105_vlan_lookup_entry *vlan;
+ bool drop_untagged = false;
+ int match, rc;
+ u16 pvid;
+
+ if (dp->bridge_dev && br_vlan_enabled(dp->bridge_dev))
+ pvid = priv->bridge_pvid[port];
+ else
+ pvid = priv->tag_8021q_pvid[port];
+
+ rc = sja1105_pvid_apply(priv, port, pvid);
+ if (rc)
+ return rc;
+
+ /* Only force dropping of untagged packets when the port is under a
+ * VLAN-aware bridge. When the tag_8021q pvid is used, we are
+ * deliberately removing the RX VLAN from the port's VMEMB_PORT list,
+ * to prevent DSA tag spoofing from the link partner. Untagged packets
+ * are the only ones that should be received with tag_8021q, so
+ * definitely don't drop them.
+ */
+ if (pvid == priv->bridge_pvid[port]) {
+ vlan = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entries;
+
+ match = sja1105_is_vlan_configured(priv, pvid);
+
+ if (match < 0 || !(vlan[match].vmemb_port & BIT(port)))
+ drop_untagged = true;
+ }
+
+ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+ drop_untagged = true;
+
+ return sja1105_drop_untagged(ds, port, drop_untagged);
+}
+
static int sja1105_init_mac_settings(struct sja1105_private *priv)
{
struct sja1105_mac_config_entry default_mac = {
@@ -101,7 +185,7 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv)
struct sja1105_mac_config_entry *mac;
struct dsa_switch *ds = priv->ds;
struct sja1105_table *table;
- int i;
+ struct dsa_port *dp;
table = &priv->static_config.tables[BLK_IDX_MAC_CONFIG];
@@ -120,14 +204,27 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv)
mac = table->entries;
- for (i = 0; i < ds->num_ports; i++) {
- mac[i] = default_mac;
+ list_for_each_entry(dp, &ds->dst->ports, list) {
+ if (dp->ds != ds)
+ continue;
+
+ mac[dp->index] = default_mac;
/* Let sja1105_bridge_stp_state_set() keep address learning
- * enabled for the CPU port.
+ * enabled for the DSA ports. CPU ports use software-assisted
+ * learning to ensure that only FDB entries belonging to the
+ * bridge are learned, and that they are learned towards all
+ * CPU ports in a cross-chip topology if multiple CPU ports
+ * exist.
+ */
+ if (dsa_port_is_dsa(dp))
+ dp->learning = true;
+
+ /* Disallow untagged packets from being received on the
+ * CPU and DSA ports.
*/
- if (dsa_is_cpu_port(ds, i))
- priv->learn_ena |= BIT(i);
+ if (dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))
+ mac[dp->index].drpuntag = true;
}
return 0;
@@ -378,8 +475,6 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
table->entry_count = 1;
for (port = 0; port < ds->num_ports; port++) {
- struct sja1105_bridge_vlan *v;
-
if (dsa_is_unused_port(ds, port))
continue;
@@ -387,22 +482,10 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
pvid.vlan_bc |= BIT(port);
pvid.tag_port &= ~BIT(port);
- v = kzalloc(sizeof(*v), GFP_KERNEL);
- if (!v)
- return -ENOMEM;
-
- v->port = port;
- v->vid = SJA1105_DEFAULT_VLAN;
- v->untagged = true;
- if (dsa_is_cpu_port(ds, port))
- v->pvid = true;
- list_add(&v->list, &priv->dsa_8021q_vlans);
-
- v = kmemdup(v, sizeof(*v), GFP_KERNEL);
- if (!v)
- return -ENOMEM;
-
- list_add(&v->list, &priv->bridge_vlans);
+ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
+ priv->tag_8021q_pvid[port] = SJA1105_DEFAULT_VLAN;
+ priv->bridge_pvid[port] = SJA1105_DEFAULT_VLAN;
+ }
}
((struct sja1105_vlan_lookup_entry *)table->entries)[0] = pvid;
@@ -413,8 +496,11 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
{
struct sja1105_l2_forwarding_entry *l2fwd;
struct dsa_switch *ds = priv->ds;
+ struct dsa_switch_tree *dst;
struct sja1105_table *table;
- int i, j;
+ struct dsa_link *dl;
+ int port, tc;
+ int from, to;
table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING];
@@ -432,47 +518,109 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
l2fwd = table->entries;
- /* First 5 entries define the forwarding rules */
- for (i = 0; i < ds->num_ports; i++) {
- unsigned int upstream = dsa_upstream_port(priv->ds, i);
+ /* First 5 entries in the L2 Forwarding Table define the forwarding
+ * rules and the VLAN PCP to ingress queue mapping.
+ * Set up the ingress queue mapping first.
+ */
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_is_unused_port(ds, port))
+ continue;
+
+ for (tc = 0; tc < SJA1105_NUM_TC; tc++)
+ l2fwd[port].vlan_pmap[tc] = tc;
+ }
- if (dsa_is_unused_port(ds, i))
+ /* Then manage the forwarding domain for user ports. These can forward
+ * only to the always-on domain (CPU port and DSA links)
+ */
+ for (from = 0; from < ds->num_ports; from++) {
+ if (!dsa_is_user_port(ds, from))
continue;
- for (j = 0; j < SJA1105_NUM_TC; j++)
- l2fwd[i].vlan_pmap[j] = j;
+ for (to = 0; to < ds->num_ports; to++) {
+ if (!dsa_is_cpu_port(ds, to) &&
+ !dsa_is_dsa_port(ds, to))
+ continue;
- /* All ports start up with egress flooding enabled,
- * including the CPU port.
- */
- priv->ucast_egress_floods |= BIT(i);
- priv->bcast_egress_floods |= BIT(i);
+ l2fwd[from].bc_domain |= BIT(to);
+ l2fwd[from].fl_domain |= BIT(to);
- if (i == upstream)
+ sja1105_port_allow_traffic(l2fwd, from, to, true);
+ }
+ }
+
+ /* Then manage the forwarding domain for DSA links and CPU ports (the
+ * always-on domain). These can send packets to any enabled port except
+ * themselves.
+ */
+ for (from = 0; from < ds->num_ports; from++) {
+ if (!dsa_is_cpu_port(ds, from) && !dsa_is_dsa_port(ds, from))
continue;
- sja1105_port_allow_traffic(l2fwd, i, upstream, true);
- sja1105_port_allow_traffic(l2fwd, upstream, i, true);
+ for (to = 0; to < ds->num_ports; to++) {
+ if (dsa_is_unused_port(ds, to))
+ continue;
- l2fwd[i].bc_domain = BIT(upstream);
- l2fwd[i].fl_domain = BIT(upstream);
+ if (from == to)
+ continue;
- l2fwd[upstream].bc_domain |= BIT(i);
- l2fwd[upstream].fl_domain |= BIT(i);
+ l2fwd[from].bc_domain |= BIT(to);
+ l2fwd[from].fl_domain |= BIT(to);
+
+ sja1105_port_allow_traffic(l2fwd, from, to, true);
+ }
+ }
+
+ /* In odd topologies ("H" connections where there is a DSA link to
+ * another switch which also has its own CPU port), TX packets can loop
+ * back into the system (they are flooded from CPU port 1 to the DSA
+ * link, and from there to CPU port 2). Prevent this from happening by
+ * cutting RX from DSA links towards our CPU port, if the remote switch
+ * has its own CPU port and therefore doesn't need ours for network
+ * stack termination.
+ */
+ dst = ds->dst;
+
+ list_for_each_entry(dl, &dst->rtable, list) {
+ if (dl->dp->ds != ds || dl->link_dp->cpu_dp == dl->dp->cpu_dp)
+ continue;
+
+ from = dl->dp->index;
+ to = dsa_upstream_port(ds, from);
+
+ dev_warn(ds->dev,
+ "H topology detected, cutting RX from DSA link %d to CPU port %d to prevent TX packet loops\n",
+ from, to);
+
+ sja1105_port_allow_traffic(l2fwd, from, to, false);
+
+ l2fwd[from].bc_domain &= ~BIT(to);
+ l2fwd[from].fl_domain &= ~BIT(to);
+ }
+
+ /* Finally, manage the egress flooding domain. All ports start up with
+ * flooding enabled, including the CPU port and DSA links.
+ */
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_is_unused_port(ds, port))
+ continue;
+
+ priv->ucast_egress_floods |= BIT(port);
+ priv->bcast_egress_floods |= BIT(port);
}
/* Next 8 entries define VLAN PCP mapping from ingress to egress.
* Create a one-to-one mapping.
*/
- for (i = 0; i < SJA1105_NUM_TC; i++) {
- for (j = 0; j < ds->num_ports; j++) {
- if (dsa_is_unused_port(ds, j))
+ for (tc = 0; tc < SJA1105_NUM_TC; tc++) {
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_is_unused_port(ds, port))
continue;
- l2fwd[ds->num_ports + i].vlan_pmap[j] = i;
+ l2fwd[ds->num_ports + tc].vlan_pmap[port] = tc;
}
- l2fwd[ds->num_ports + i].type_egrpcp2outputq = true;
+ l2fwd[ds->num_ports + tc].type_egrpcp2outputq = true;
}
return 0;
@@ -551,18 +699,11 @@ void sja1105_frame_memory_partitioning(struct sja1105_private *priv)
{
struct sja1105_l2_forwarding_params_entry *l2_fwd_params;
struct sja1105_vl_forwarding_params_entry *vl_fwd_params;
- int max_mem = priv->info->max_frame_mem;
struct sja1105_table *table;
- /* VLAN retagging is implemented using a loopback port that consumes
- * frame buffers. That leaves less for us.
- */
- if (priv->vlan_state == SJA1105_VLAN_BEST_EFFORT)
- max_mem -= SJA1105_FRAME_MEMORY_RETAGGING_OVERHEAD;
-
table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING_PARAMS];
l2_fwd_params = table->entries;
- l2_fwd_params->part_spc[0] = max_mem;
+ l2_fwd_params->part_spc[0] = SJA1105_MAX_FRAME_MEMORY;
/* If we have any critical-traffic virtual links, we need to reserve
* some frame buffer memory for them. At the moment, hardcode the value
@@ -634,6 +775,72 @@ static void sja1110_select_tdmaconfigidx(struct sja1105_private *priv)
general_params->tdmaconfigidx = tdmaconfigidx;
}
+static int sja1105_init_topology(struct sja1105_private *priv,
+ struct sja1105_general_params_entry *general_params)
+{
+ struct dsa_switch *ds = priv->ds;
+ int port;
+
+ /* The host port is the destination for traffic matching mac_fltres1
+ * and mac_fltres0 on all ports except itself. Default to an invalid
+ * value.
+ */
+ general_params->host_port = ds->num_ports;
+
+ /* Link-local traffic received on casc_port will be forwarded
+ * to host_port without embedding the source port and device ID
+ * info in the destination MAC address, and no RX timestamps will be
+ * taken either (presumably because it is a cascaded port and a
+ * downstream SJA switch already did that).
+ * To disable the feature, we need to do different things depending on
+ * switch generation. On SJA1105 we need to set an invalid port, while
+ * on SJA1110 which support multiple cascaded ports, this field is a
+ * bitmask so it must be left zero.
+ */
+ if (!priv->info->multiple_cascade_ports)
+ general_params->casc_port = ds->num_ports;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ bool is_upstream = dsa_is_upstream_port(ds, port);
+ bool is_dsa_link = dsa_is_dsa_port(ds, port);
+
+ /* Upstream ports can be dedicated CPU ports or
+ * upstream-facing DSA links
+ */
+ if (is_upstream) {
+ if (general_params->host_port == ds->num_ports) {
+ general_params->host_port = port;
+ } else {
+ dev_err(ds->dev,
+ "Port %llu is already a host port, configuring %d as one too is not supported\n",
+ general_params->host_port, port);
+ return -EINVAL;
+ }
+ }
+
+ /* Cascade ports are downstream-facing DSA links */
+ if (is_dsa_link && !is_upstream) {
+ if (priv->info->multiple_cascade_ports) {
+ general_params->casc_port |= BIT(port);
+ } else if (general_params->casc_port == ds->num_ports) {
+ general_params->casc_port = port;
+ } else {
+ dev_err(ds->dev,
+ "Port %llu is already a cascade port, configuring %d as one too is not supported\n",
+ general_params->casc_port, port);
+ return -EINVAL;
+ }
+ }
+ }
+
+ if (general_params->host_port == ds->num_ports) {
+ dev_err(ds->dev, "No host port configured\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int sja1105_init_general_params(struct sja1105_private *priv)
{
struct sja1105_general_params_entry default_general_params = {
@@ -652,12 +859,6 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
.mac_flt0 = SJA1105_LINKLOCAL_FILTER_B_MASK,
.incl_srcpt0 = false,
.send_meta0 = false,
- /* The destination for traffic matching mac_fltres1 and
- * mac_fltres0 on all ports except host_port. Such traffic
- * receieved on host_port itself would be dropped, except
- * by installing a temporary 'management route'
- */
- .host_port = priv->ds->num_ports,
/* Default to an invalid value */
.mirr_port = priv->ds->num_ports,
/* No TTEthernet */
@@ -677,16 +878,12 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
.header_type = ETH_P_SJA1110,
};
struct sja1105_general_params_entry *general_params;
- struct dsa_switch *ds = priv->ds;
struct sja1105_table *table;
- int port;
+ int rc;
- for (port = 0; port < ds->num_ports; port++) {
- if (dsa_is_cpu_port(ds, port)) {
- default_general_params.host_port = port;
- break;
- }
- }
+ rc = sja1105_init_topology(priv, &default_general_params);
+ if (rc)
+ return rc;
table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
@@ -709,19 +906,6 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
sja1110_select_tdmaconfigidx(priv);
- /* Link-local traffic received on casc_port will be forwarded
- * to host_port without embedding the source port and device ID
- * info in the destination MAC address, and no RX timestamps will be
- * taken either (presumably because it is a cascaded port and a
- * downstream SJA switch already did that).
- * To disable the feature, we need to do different things depending on
- * switch generation. On SJA1105 we need to set an invalid port, while
- * on SJA1110 which support multiple cascaded ports, this field is a
- * bitmask so it must be left zero.
- */
- if (!priv->info->multiple_cascade_ports)
- general_params->casc_port = ds->num_ports;
-
return 0;
}
@@ -849,7 +1033,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv)
for (port = 0; port < ds->num_ports; port++) {
int mtu = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN;
- if (dsa_is_cpu_port(priv->ds, port))
+ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
mtu += VLAN_HLEN;
policing[port].smax = 65535; /* Burst size in bytes */
@@ -1568,18 +1752,6 @@ static int sja1105_fdb_add(struct dsa_switch *ds, int port,
{
struct sja1105_private *priv = ds->priv;
- /* dsa_8021q is in effect when the bridge's vlan_filtering isn't,
- * so the switch still does some VLAN processing internally.
- * But Shared VLAN Learning (SVL) is also active, and it will take
- * care of autonomous forwarding between the unique pvid's of each
- * port. Here we just make sure that users can't add duplicate FDB
- * entries when in this mode - the actual VID doesn't matter except
- * for what gets printed in 'bridge fdb show'. In the case of zero,
- * no VID gets printed at all.
- */
- if (priv->vlan_state != SJA1105_VLAN_FILTERING_FULL)
- vid = 0;
-
return priv->info->fdb_add_cmd(ds, port, addr, vid);
}
@@ -1588,9 +1760,6 @@ static int sja1105_fdb_del(struct dsa_switch *ds, int port,
{
struct sja1105_private *priv = ds->priv;
- if (priv->vlan_state != SJA1105_VLAN_FILTERING_FULL)
- vid = 0;
-
return priv->info->fdb_del_cmd(ds, port, addr, vid);
}
@@ -1633,13 +1802,55 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
u64_to_ether_addr(l2_lookup.macaddr, macaddr);
/* We need to hide the dsa_8021q VLANs from the user. */
- if (priv->vlan_state == SJA1105_VLAN_UNAWARE)
+ if (!priv->vlan_aware)
l2_lookup.vlanid = 0;
- cb(macaddr, l2_lookup.vlanid, l2_lookup.lockeds, data);
+ rc = cb(macaddr, l2_lookup.vlanid, l2_lookup.lockeds, data);
+ if (rc)
+ return rc;
}
return 0;
}
+static void sja1105_fast_age(struct dsa_switch *ds, int port)
+{
+ struct sja1105_private *priv = ds->priv;
+ int i;
+
+ for (i = 0; i < SJA1105_MAX_L2_LOOKUP_COUNT; i++) {
+ struct sja1105_l2_lookup_entry l2_lookup = {0};
+ u8 macaddr[ETH_ALEN];
+ int rc;
+
+ rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
+ i, &l2_lookup);
+ /* No fdb entry at i, not an issue */
+ if (rc == -ENOENT)
+ continue;
+ if (rc) {
+ dev_err(ds->dev, "Failed to read FDB: %pe\n",
+ ERR_PTR(rc));
+ return;
+ }
+
+ if (!(l2_lookup.destports & BIT(port)))
+ continue;
+
+ /* Don't delete static FDB entries */
+ if (l2_lookup.lockeds)
+ continue;
+
+ u64_to_ether_addr(l2_lookup.macaddr, macaddr);
+
+ rc = sja1105_fdb_del(ds, port, macaddr, l2_lookup.vlanid);
+ if (rc) {
+ dev_err(ds->dev,
+ "Failed to delete FDB entry %pM vid %lld: %pe\n",
+ macaddr, l2_lookup.vlanid, ERR_PTR(rc));
+ return;
+ }
+ }
+}
+
static int sja1105_mdb_add(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_mdb *mdb)
{
@@ -1738,12 +1949,17 @@ static int sja1105_bridge_member(struct dsa_switch *ds, int port,
if (rc)
return rc;
+ rc = sja1105_commit_pvid(ds, port);
+ if (rc)
+ return rc;
+
return sja1105_manage_flood_domains(priv);
}
static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port,
u8 state)
{
+ struct dsa_port *dp = dsa_to_port(ds, port);
struct sja1105_private *priv = ds->priv;
struct sja1105_mac_config_entry *mac;
@@ -1769,12 +1985,12 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port,
case BR_STATE_LEARNING:
mac[port].ingress = true;
mac[port].egress = false;
- mac[port].dyn_learn = !!(priv->learn_ena & BIT(port));
+ mac[port].dyn_learn = dp->learning;
break;
case BR_STATE_FORWARDING:
mac[port].ingress = true;
mac[port].egress = true;
- mac[port].dyn_learn = !!(priv->learn_ena & BIT(port));
+ mac[port].dyn_learn = dp->learning;
break;
default:
dev_err(ds->dev, "invalid STP state: %d\n", state);
@@ -2037,97 +2253,6 @@ out:
return rc;
}
-static int sja1105_pvid_apply(struct sja1105_private *priv, int port, u16 pvid)
-{
- struct sja1105_mac_config_entry *mac;
-
- mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
-
- mac[port].vlanid = pvid;
-
- return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
- &mac[port], true);
-}
-
-static int sja1105_crosschip_bridge_join(struct dsa_switch *ds,
- int tree_index, int sw_index,
- int other_port, struct net_device *br)
-{
- struct dsa_switch *other_ds = dsa_switch_find(tree_index, sw_index);
- struct sja1105_private *other_priv = other_ds->priv;
- struct sja1105_private *priv = ds->priv;
- int port, rc;
-
- if (other_ds->ops != &sja1105_switch_ops)
- return 0;
-
- for (port = 0; port < ds->num_ports; port++) {
- if (!dsa_is_user_port(ds, port))
- continue;
- if (dsa_to_port(ds, port)->bridge_dev != br)
- continue;
-
- rc = dsa_8021q_crosschip_bridge_join(priv->dsa_8021q_ctx,
- port,
- other_priv->dsa_8021q_ctx,
- other_port);
- if (rc)
- return rc;
-
- rc = dsa_8021q_crosschip_bridge_join(other_priv->dsa_8021q_ctx,
- other_port,
- priv->dsa_8021q_ctx,
- port);
- if (rc)
- return rc;
- }
-
- return 0;
-}
-
-static void sja1105_crosschip_bridge_leave(struct dsa_switch *ds,
- int tree_index, int sw_index,
- int other_port,
- struct net_device *br)
-{
- struct dsa_switch *other_ds = dsa_switch_find(tree_index, sw_index);
- struct sja1105_private *other_priv = other_ds->priv;
- struct sja1105_private *priv = ds->priv;
- int port;
-
- if (other_ds->ops != &sja1105_switch_ops)
- return;
-
- for (port = 0; port < ds->num_ports; port++) {
- if (!dsa_is_user_port(ds, port))
- continue;
- if (dsa_to_port(ds, port)->bridge_dev != br)
- continue;
-
- dsa_8021q_crosschip_bridge_leave(priv->dsa_8021q_ctx, port,
- other_priv->dsa_8021q_ctx,
- other_port);
-
- dsa_8021q_crosschip_bridge_leave(other_priv->dsa_8021q_ctx,
- other_port,
- priv->dsa_8021q_ctx, port);
- }
-}
-
-static int sja1105_setup_8021q_tagging(struct dsa_switch *ds, bool enabled)
-{
- struct sja1105_private *priv = ds->priv;
- int rc;
-
- rc = dsa_8021q_setup(priv->dsa_8021q_ctx, enabled);
- if (rc)
- return rc;
-
- dev_info(ds->dev, "%s switch tagging\n",
- enabled ? "Enabled" : "Disabled");
- return 0;
-}
-
static enum dsa_tag_protocol
sja1105_get_tag_protocol(struct dsa_switch *ds, int port,
enum dsa_tag_protocol mp)
@@ -2137,669 +2262,6 @@ sja1105_get_tag_protocol(struct dsa_switch *ds, int port,
return priv->info->tag_proto;
}
-static int sja1105_find_free_subvlan(u16 *subvlan_map, bool pvid)
-{
- int subvlan;
-
- if (pvid)
- return 0;
-
- for (subvlan = 1; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++)
- if (subvlan_map[subvlan] == VLAN_N_VID)
- return subvlan;
-
- return -1;
-}
-
-static int sja1105_find_subvlan(u16 *subvlan_map, u16 vid)
-{
- int subvlan;
-
- for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++)
- if (subvlan_map[subvlan] == vid)
- return subvlan;
-
- return -1;
-}
-
-static int sja1105_find_committed_subvlan(struct sja1105_private *priv,
- int port, u16 vid)
-{
- struct sja1105_port *sp = &priv->ports[port];
-
- return sja1105_find_subvlan(sp->subvlan_map, vid);
-}
-
-static void sja1105_init_subvlan_map(u16 *subvlan_map)
-{
- int subvlan;
-
- for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++)
- subvlan_map[subvlan] = VLAN_N_VID;
-}
-
-static void sja1105_commit_subvlan_map(struct sja1105_private *priv, int port,
- u16 *subvlan_map)
-{
- struct sja1105_port *sp = &priv->ports[port];
- int subvlan;
-
- for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++)
- sp->subvlan_map[subvlan] = subvlan_map[subvlan];
-}
-
-static int sja1105_is_vlan_configured(struct sja1105_private *priv, u16 vid)
-{
- struct sja1105_vlan_lookup_entry *vlan;
- int count, i;
-
- vlan = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entries;
- count = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entry_count;
-
- for (i = 0; i < count; i++)
- if (vlan[i].vlanid == vid)
- return i;
-
- /* Return an invalid entry index if not found */
- return -1;
-}
-
-static int
-sja1105_find_retagging_entry(struct sja1105_retagging_entry *retagging,
- int count, int from_port, u16 from_vid,
- u16 to_vid)
-{
- int i;
-
- for (i = 0; i < count; i++)
- if (retagging[i].ing_port == BIT(from_port) &&
- retagging[i].vlan_ing == from_vid &&
- retagging[i].vlan_egr == to_vid)
- return i;
-
- /* Return an invalid entry index if not found */
- return -1;
-}
-
-static int sja1105_commit_vlans(struct sja1105_private *priv,
- struct sja1105_vlan_lookup_entry *new_vlan,
- struct sja1105_retagging_entry *new_retagging,
- int num_retagging)
-{
- struct sja1105_retagging_entry *retagging;
- struct sja1105_vlan_lookup_entry *vlan;
- struct sja1105_table *table;
- int num_vlans = 0;
- int rc, i, k = 0;
-
- /* VLAN table */
- table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
- vlan = table->entries;
-
- for (i = 0; i < VLAN_N_VID; i++) {
- int match = sja1105_is_vlan_configured(priv, i);
-
- if (new_vlan[i].vlanid != VLAN_N_VID)
- num_vlans++;
-
- if (new_vlan[i].vlanid == VLAN_N_VID && match >= 0) {
- /* Was there before, no longer is. Delete */
- dev_dbg(priv->ds->dev, "Deleting VLAN %d\n", i);
- rc = sja1105_dynamic_config_write(priv,
- BLK_IDX_VLAN_LOOKUP,
- i, &vlan[match], false);
- if (rc < 0)
- return rc;
- } else if (new_vlan[i].vlanid != VLAN_N_VID) {
- /* Nothing changed, don't do anything */
- if (match >= 0 &&
- vlan[match].vlanid == new_vlan[i].vlanid &&
- vlan[match].tag_port == new_vlan[i].tag_port &&
- vlan[match].vlan_bc == new_vlan[i].vlan_bc &&
- vlan[match].vmemb_port == new_vlan[i].vmemb_port)
- continue;
- /* Update entry */
- dev_dbg(priv->ds->dev, "Updating VLAN %d\n", i);
- rc = sja1105_dynamic_config_write(priv,
- BLK_IDX_VLAN_LOOKUP,
- i, &new_vlan[i],
- true);
- if (rc < 0)
- return rc;
- }
- }
-
- if (table->entry_count)
- kfree(table->entries);
-
- table->entries = kcalloc(num_vlans, table->ops->unpacked_entry_size,
- GFP_KERNEL);
- if (!table->entries)
- return -ENOMEM;
-
- table->entry_count = num_vlans;
- vlan = table->entries;
-
- for (i = 0; i < VLAN_N_VID; i++) {
- if (new_vlan[i].vlanid == VLAN_N_VID)
- continue;
- vlan[k++] = new_vlan[i];
- }
-
- /* VLAN Retagging Table */
- table = &priv->static_config.tables[BLK_IDX_RETAGGING];
- retagging = table->entries;
-
- for (i = 0; i < table->entry_count; i++) {
- rc = sja1105_dynamic_config_write(priv, BLK_IDX_RETAGGING,
- i, &retagging[i], false);
- if (rc)
- return rc;
- }
-
- if (table->entry_count)
- kfree(table->entries);
-
- table->entries = kcalloc(num_retagging, table->ops->unpacked_entry_size,
- GFP_KERNEL);
- if (!table->entries)
- return -ENOMEM;
-
- table->entry_count = num_retagging;
- retagging = table->entries;
-
- for (i = 0; i < num_retagging; i++) {
- retagging[i] = new_retagging[i];
-
- /* Update entry */
- rc = sja1105_dynamic_config_write(priv, BLK_IDX_RETAGGING,
- i, &retagging[i], true);
- if (rc < 0)
- return rc;
- }
-
- return 0;
-}
-
-struct sja1105_crosschip_vlan {
- struct list_head list;
- u16 vid;
- bool untagged;
- int port;
- int other_port;
- struct dsa_8021q_context *other_ctx;
-};
-
-struct sja1105_crosschip_switch {
- struct list_head list;
- struct dsa_8021q_context *other_ctx;
-};
-
-static int sja1105_commit_pvid(struct sja1105_private *priv)
-{
- struct sja1105_bridge_vlan *v;
- struct list_head *vlan_list;
- int rc = 0;
-
- if (priv->vlan_state == SJA1105_VLAN_FILTERING_FULL)
- vlan_list = &priv->bridge_vlans;
- else
- vlan_list = &priv->dsa_8021q_vlans;
-
- list_for_each_entry(v, vlan_list, list) {
- if (v->pvid) {
- rc = sja1105_pvid_apply(priv, v->port, v->vid);
- if (rc)
- break;
- }
- }
-
- return rc;
-}
-
-static int
-sja1105_build_bridge_vlans(struct sja1105_private *priv,
- struct sja1105_vlan_lookup_entry *new_vlan)
-{
- struct sja1105_bridge_vlan *v;
-
- if (priv->vlan_state == SJA1105_VLAN_UNAWARE)
- return 0;
-
- list_for_each_entry(v, &priv->bridge_vlans, list) {
- int match = v->vid;
-
- new_vlan[match].vlanid = v->vid;
- new_vlan[match].vmemb_port |= BIT(v->port);
- new_vlan[match].vlan_bc |= BIT(v->port);
- if (!v->untagged)
- new_vlan[match].tag_port |= BIT(v->port);
- new_vlan[match].type_entry = SJA1110_VLAN_D_TAG;
- }
-
- return 0;
-}
-
-static int
-sja1105_build_dsa_8021q_vlans(struct sja1105_private *priv,
- struct sja1105_vlan_lookup_entry *new_vlan)
-{
- struct sja1105_bridge_vlan *v;
-
- if (priv->vlan_state == SJA1105_VLAN_FILTERING_FULL)
- return 0;
-
- list_for_each_entry(v, &priv->dsa_8021q_vlans, list) {
- int match = v->vid;
-
- new_vlan[match].vlanid = v->vid;
- new_vlan[match].vmemb_port |= BIT(v->port);
- new_vlan[match].vlan_bc |= BIT(v->port);
- if (!v->untagged)
- new_vlan[match].tag_port |= BIT(v->port);
- new_vlan[match].type_entry = SJA1110_VLAN_D_TAG;
- }
-
- return 0;
-}
-
-static int sja1105_build_subvlans(struct sja1105_private *priv,
- u16 subvlan_map[][DSA_8021Q_N_SUBVLAN],
- struct sja1105_vlan_lookup_entry *new_vlan,
- struct sja1105_retagging_entry *new_retagging,
- int *num_retagging)
-{
- struct sja1105_bridge_vlan *v;
- int k = *num_retagging;
-
- if (priv->vlan_state != SJA1105_VLAN_BEST_EFFORT)
- return 0;
-
- list_for_each_entry(v, &priv->bridge_vlans, list) {
- int upstream = dsa_upstream_port(priv->ds, v->port);
- int match, subvlan;
- u16 rx_vid;
-
- /* Only sub-VLANs on user ports need to be applied.
- * Bridge VLANs also include VLANs added automatically
- * by DSA on the CPU port.
- */
- if (!dsa_is_user_port(priv->ds, v->port))
- continue;
-
- subvlan = sja1105_find_subvlan(subvlan_map[v->port],
- v->vid);
- if (subvlan < 0) {
- subvlan = sja1105_find_free_subvlan(subvlan_map[v->port],
- v->pvid);
- if (subvlan < 0) {
- dev_err(priv->ds->dev, "No more free subvlans\n");
- return -ENOSPC;
- }
- }
-
- rx_vid = dsa_8021q_rx_vid_subvlan(priv->ds, v->port, subvlan);
-
- /* @v->vid on @v->port needs to be retagged to @rx_vid
- * on @upstream. Assume @v->vid on @v->port and on
- * @upstream was already configured by the previous
- * iteration over bridge_vlans.
- */
- match = rx_vid;
- new_vlan[match].vlanid = rx_vid;
- new_vlan[match].vmemb_port |= BIT(v->port);
- new_vlan[match].vmemb_port |= BIT(upstream);
- new_vlan[match].vlan_bc |= BIT(v->port);
- new_vlan[match].vlan_bc |= BIT(upstream);
- /* The "untagged" flag is set the same as for the
- * original VLAN
- */
- if (!v->untagged)
- new_vlan[match].tag_port |= BIT(v->port);
- /* But it's always tagged towards the CPU */
- new_vlan[match].tag_port |= BIT(upstream);
- new_vlan[match].type_entry = SJA1110_VLAN_D_TAG;
-
- /* The Retagging Table generates packet *clones* with
- * the new VLAN. This is a very odd hardware quirk
- * which we need to suppress by dropping the original
- * packet.
- * Deny egress of the original VLAN towards the CPU
- * port. This will force the switch to drop it, and
- * we'll see only the retagged packets.
- */
- match = v->vid;
- new_vlan[match].vlan_bc &= ~BIT(upstream);
-
- /* And the retagging itself */
- new_retagging[k].vlan_ing = v->vid;
- new_retagging[k].vlan_egr = rx_vid;
- new_retagging[k].ing_port = BIT(v->port);
- new_retagging[k].egr_port = BIT(upstream);
- if (k++ == SJA1105_MAX_RETAGGING_COUNT) {
- dev_err(priv->ds->dev, "No more retagging rules\n");
- return -ENOSPC;
- }
-
- subvlan_map[v->port][subvlan] = v->vid;
- }
-
- *num_retagging = k;
-
- return 0;
-}
-
-/* Sadly, in crosschip scenarios where the CPU port is also the link to another
- * switch, we should retag backwards (the dsa_8021q vid to the original vid) on
- * the CPU port of neighbour switches.
- */
-static int
-sja1105_build_crosschip_subvlans(struct sja1105_private *priv,
- struct sja1105_vlan_lookup_entry *new_vlan,
- struct sja1105_retagging_entry *new_retagging,
- int *num_retagging)
-{
- struct sja1105_crosschip_vlan *tmp, *pos;
- struct dsa_8021q_crosschip_link *c;
- struct sja1105_bridge_vlan *v, *w;
- struct list_head crosschip_vlans;
- int k = *num_retagging;
- int rc = 0;
-
- if (priv->vlan_state != SJA1105_VLAN_BEST_EFFORT)
- return 0;
-
- INIT_LIST_HEAD(&crosschip_vlans);
-
- list_for_each_entry(c, &priv->dsa_8021q_ctx->crosschip_links, list) {
- struct sja1105_private *other_priv = c->other_ctx->ds->priv;
-
- if (other_priv->vlan_state == SJA1105_VLAN_FILTERING_FULL)
- continue;
-
- /* Crosschip links are also added to the CPU ports.
- * Ignore those.
- */
- if (!dsa_is_user_port(priv->ds, c->port))
- continue;
- if (!dsa_is_user_port(c->other_ctx->ds, c->other_port))
- continue;
-
- /* Search for VLANs on the remote port */
- list_for_each_entry(v, &other_priv->bridge_vlans, list) {
- bool already_added = false;
- bool we_have_it = false;
-
- if (v->port != c->other_port)
- continue;
-
- /* If @v is a pvid on @other_ds, it does not need
- * re-retagging, because its SVL field is 0 and we
- * already allow that, via the dsa_8021q crosschip
- * links.
- */
- if (v->pvid)
- continue;
-
- /* Search for the VLAN on our local port */
- list_for_each_entry(w, &priv->bridge_vlans, list) {
- if (w->port == c->port && w->vid == v->vid) {
- we_have_it = true;
- break;
- }
- }
-
- if (!we_have_it)
- continue;
-
- list_for_each_entry(tmp, &crosschip_vlans, list) {
- if (tmp->vid == v->vid &&
- tmp->untagged == v->untagged &&
- tmp->port == c->port &&
- tmp->other_port == v->port &&
- tmp->other_ctx == c->other_ctx) {
- already_added = true;
- break;
- }
- }
-
- if (already_added)
- continue;
-
- tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
- if (!tmp) {
- dev_err(priv->ds->dev, "Failed to allocate memory\n");
- rc = -ENOMEM;
- goto out;
- }
- tmp->vid = v->vid;
- tmp->port = c->port;
- tmp->other_port = v->port;
- tmp->other_ctx = c->other_ctx;
- tmp->untagged = v->untagged;
- list_add(&tmp->list, &crosschip_vlans);
- }
- }
-
- list_for_each_entry(tmp, &crosschip_vlans, list) {
- struct sja1105_private *other_priv = tmp->other_ctx->ds->priv;
- int upstream = dsa_upstream_port(priv->ds, tmp->port);
- int match, subvlan;
- u16 rx_vid;
-
- subvlan = sja1105_find_committed_subvlan(other_priv,
- tmp->other_port,
- tmp->vid);
- /* If this happens, it's a bug. The neighbour switch does not
- * have a subvlan for tmp->vid on tmp->other_port, but it
- * should, since we already checked for its vlan_state.
- */
- if (WARN_ON(subvlan < 0)) {
- rc = -EINVAL;
- goto out;
- }
-
- rx_vid = dsa_8021q_rx_vid_subvlan(tmp->other_ctx->ds,
- tmp->other_port,
- subvlan);
-
- /* The @rx_vid retagged from @tmp->vid on
- * {@tmp->other_ds, @tmp->other_port} needs to be
- * re-retagged to @tmp->vid on the way back to us.
- *
- * Assume the original @tmp->vid is already configured
- * on this local switch, otherwise we wouldn't be
- * retagging its subvlan on the other switch in the
- * first place. We just need to add a reverse retagging
- * rule for @rx_vid and install @rx_vid on our ports.
- */
- match = rx_vid;
- new_vlan[match].vlanid = rx_vid;
- new_vlan[match].vmemb_port |= BIT(tmp->port);
- new_vlan[match].vmemb_port |= BIT(upstream);
- /* The "untagged" flag is set the same as for the
- * original VLAN. And towards the CPU, it doesn't
- * really matter, because @rx_vid will only receive
- * traffic on that port. For consistency with other dsa_8021q
- * VLANs, we'll keep the CPU port tagged.
- */
- if (!tmp->untagged)
- new_vlan[match].tag_port |= BIT(tmp->port);
- new_vlan[match].tag_port |= BIT(upstream);
- new_vlan[match].type_entry = SJA1110_VLAN_D_TAG;
- /* Deny egress of @rx_vid towards our front-panel port.
- * This will force the switch to drop it, and we'll see
- * only the re-retagged packets (having the original,
- * pre-initial-retagging, VLAN @tmp->vid).
- */
- new_vlan[match].vlan_bc &= ~BIT(tmp->port);
-
- /* On reverse retagging, the same ingress VLAN goes to multiple
- * ports. So we have an opportunity to create composite rules
- * to not waste the limited space in the retagging table.
- */
- k = sja1105_find_retagging_entry(new_retagging, *num_retagging,
- upstream, rx_vid, tmp->vid);
- if (k < 0) {
- if (*num_retagging == SJA1105_MAX_RETAGGING_COUNT) {
- dev_err(priv->ds->dev, "No more retagging rules\n");
- rc = -ENOSPC;
- goto out;
- }
- k = (*num_retagging)++;
- }
- /* And the retagging itself */
- new_retagging[k].vlan_ing = rx_vid;
- new_retagging[k].vlan_egr = tmp->vid;
- new_retagging[k].ing_port = BIT(upstream);
- new_retagging[k].egr_port |= BIT(tmp->port);
- }
-
-out:
- list_for_each_entry_safe(tmp, pos, &crosschip_vlans, list) {
- list_del(&tmp->list);
- kfree(tmp);
- }
-
- return rc;
-}
-
-static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify);
-
-static int sja1105_notify_crosschip_switches(struct sja1105_private *priv)
-{
- struct sja1105_crosschip_switch *s, *pos;
- struct list_head crosschip_switches;
- struct dsa_8021q_crosschip_link *c;
- int rc = 0;
-
- INIT_LIST_HEAD(&crosschip_switches);
-
- list_for_each_entry(c, &priv->dsa_8021q_ctx->crosschip_links, list) {
- bool already_added = false;
-
- list_for_each_entry(s, &crosschip_switches, list) {
- if (s->other_ctx == c->other_ctx) {
- already_added = true;
- break;
- }
- }
-
- if (already_added)
- continue;
-
- s = kzalloc(sizeof(*s), GFP_KERNEL);
- if (!s) {
- dev_err(priv->ds->dev, "Failed to allocate memory\n");
- rc = -ENOMEM;
- goto out;
- }
- s->other_ctx = c->other_ctx;
- list_add(&s->list, &crosschip_switches);
- }
-
- list_for_each_entry(s, &crosschip_switches, list) {
- struct sja1105_private *other_priv = s->other_ctx->ds->priv;
-
- rc = sja1105_build_vlan_table(other_priv, false);
- if (rc)
- goto out;
- }
-
-out:
- list_for_each_entry_safe(s, pos, &crosschip_switches, list) {
- list_del(&s->list);
- kfree(s);
- }
-
- return rc;
-}
-
-static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify)
-{
- u16 subvlan_map[SJA1105_MAX_NUM_PORTS][DSA_8021Q_N_SUBVLAN];
- struct sja1105_retagging_entry *new_retagging;
- struct sja1105_vlan_lookup_entry *new_vlan;
- struct sja1105_table *table;
- int i, num_retagging = 0;
- int rc;
-
- table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
- new_vlan = kcalloc(VLAN_N_VID,
- table->ops->unpacked_entry_size, GFP_KERNEL);
- if (!new_vlan)
- return -ENOMEM;
-
- table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
- new_retagging = kcalloc(SJA1105_MAX_RETAGGING_COUNT,
- table->ops->unpacked_entry_size, GFP_KERNEL);
- if (!new_retagging) {
- kfree(new_vlan);
- return -ENOMEM;
- }
-
- for (i = 0; i < VLAN_N_VID; i++)
- new_vlan[i].vlanid = VLAN_N_VID;
-
- for (i = 0; i < SJA1105_MAX_RETAGGING_COUNT; i++)
- new_retagging[i].vlan_ing = VLAN_N_VID;
-
- for (i = 0; i < priv->ds->num_ports; i++)
- sja1105_init_subvlan_map(subvlan_map[i]);
-
- /* Bridge VLANs */
- rc = sja1105_build_bridge_vlans(priv, new_vlan);
- if (rc)
- goto out;
-
- /* VLANs necessary for dsa_8021q operation, given to us by tag_8021q.c:
- * - RX VLANs
- * - TX VLANs
- * - Crosschip links
- */
- rc = sja1105_build_dsa_8021q_vlans(priv, new_vlan);
- if (rc)
- goto out;
-
- /* Private VLANs necessary for dsa_8021q operation, which we need to
- * determine on our own:
- * - Sub-VLANs
- * - Sub-VLANs of crosschip switches
- */
- rc = sja1105_build_subvlans(priv, subvlan_map, new_vlan, new_retagging,
- &num_retagging);
- if (rc)
- goto out;
-
- rc = sja1105_build_crosschip_subvlans(priv, new_vlan, new_retagging,
- &num_retagging);
- if (rc)
- goto out;
-
- rc = sja1105_commit_vlans(priv, new_vlan, new_retagging, num_retagging);
- if (rc)
- goto out;
-
- rc = sja1105_commit_pvid(priv);
- if (rc)
- goto out;
-
- for (i = 0; i < priv->ds->num_ports; i++)
- sja1105_commit_subvlan_map(priv, i, subvlan_map[i]);
-
- if (notify) {
- rc = sja1105_notify_crosschip_switches(priv);
- if (rc)
- goto out;
- }
-
-out:
- kfree(new_vlan);
- kfree(new_retagging);
-
- return rc;
-}
-
/* The TPID setting belongs to the General Parameters table,
* which can only be partially reconfigured at runtime (and not the TPID).
* So a switch reset is required.
@@ -2810,10 +2272,8 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
struct sja1105_l2_lookup_params_entry *l2_lookup_params;
struct sja1105_general_params_entry *general_params;
struct sja1105_private *priv = ds->priv;
- enum sja1105_vlan_state state;
struct sja1105_table *table;
struct sja1105_rule *rule;
- bool want_tagging;
u16 tpid, tpid2;
int rc;
@@ -2835,28 +2295,10 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
tpid2 = ETH_P_SJA1105;
}
- for (port = 0; port < ds->num_ports; port++) {
- struct sja1105_port *sp = &priv->ports[port];
-
- if (enabled)
- sp->xmit_tpid = priv->info->qinq_tpid;
- else
- sp->xmit_tpid = ETH_P_SJA1105;
- }
-
- if (!enabled)
- state = SJA1105_VLAN_UNAWARE;
- else if (priv->best_effort_vlan_filtering)
- state = SJA1105_VLAN_BEST_EFFORT;
- else
- state = SJA1105_VLAN_FILTERING_FULL;
-
- if (priv->vlan_state == state)
+ if (priv->vlan_aware == enabled)
return 0;
- priv->vlan_state = state;
- want_tagging = (state == SJA1105_VLAN_UNAWARE ||
- state == SJA1105_VLAN_BEST_EFFORT);
+ priv->vlan_aware = enabled;
table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
general_params = table->entries;
@@ -2870,8 +2312,6 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
general_params->incl_srcpt1 = enabled;
general_params->incl_srcpt0 = enabled;
- want_tagging = priv->best_effort_vlan_filtering || !enabled;
-
/* VLAN filtering => independent VLAN learning.
* No VLAN filtering (or best effort) => shared VLAN learning.
*
@@ -2892,313 +2332,205 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
*/
table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP_PARAMS];
l2_lookup_params = table->entries;
- l2_lookup_params->shared_learn = want_tagging;
+ l2_lookup_params->shared_learn = !priv->vlan_aware;
- sja1105_frame_memory_partitioning(priv);
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_is_unused_port(ds, port))
+ continue;
- rc = sja1105_build_vlan_table(priv, false);
- if (rc)
- return rc;
+ rc = sja1105_commit_pvid(ds, port);
+ if (rc)
+ return rc;
+ }
rc = sja1105_static_config_reload(priv, SJA1105_VLAN_FILTERING);
if (rc)
NL_SET_ERR_MSG_MOD(extack, "Failed to change VLAN Ethertype");
- /* Switch port identification based on 802.1Q is only passable
- * if we are not under a vlan_filtering bridge. So make sure
- * the two configurations are mutually exclusive (of course, the
- * user may know better, i.e. best_effort_vlan_filtering).
- */
- return sja1105_setup_8021q_tagging(ds, want_tagging);
+ return rc;
}
-/* Returns number of VLANs added (0 or 1) on success,
- * or a negative error code.
- */
-static int sja1105_vlan_add_one(struct dsa_switch *ds, int port, u16 vid,
- u16 flags, struct list_head *vlan_list)
-{
- bool untagged = flags & BRIDGE_VLAN_INFO_UNTAGGED;
- bool pvid = flags & BRIDGE_VLAN_INFO_PVID;
- struct sja1105_bridge_vlan *v;
-
- list_for_each_entry(v, vlan_list, list) {
- if (v->port == port && v->vid == vid) {
- /* Already added */
- if (v->untagged == untagged && v->pvid == pvid)
- /* Nothing changed */
- return 0;
-
- /* It's the same VLAN, but some of the flags changed
- * and the user did not bother to delete it first.
- * Update it and trigger sja1105_build_vlan_table.
- */
- v->untagged = untagged;
- v->pvid = pvid;
- return 1;
- }
- }
+static int sja1105_vlan_add(struct sja1105_private *priv, int port, u16 vid,
+ u16 flags, bool allowed_ingress)
+{
+ struct sja1105_vlan_lookup_entry *vlan;
+ struct sja1105_table *table;
+ int match, rc;
- v = kzalloc(sizeof(*v), GFP_KERNEL);
- if (!v) {
- dev_err(ds->dev, "Out of memory while storing VLAN\n");
- return -ENOMEM;
+ table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
+
+ match = sja1105_is_vlan_configured(priv, vid);
+ if (match < 0) {
+ rc = sja1105_table_resize(table, table->entry_count + 1);
+ if (rc)
+ return rc;
+ match = table->entry_count - 1;
}
- v->port = port;
- v->vid = vid;
- v->untagged = untagged;
- v->pvid = pvid;
- list_add(&v->list, vlan_list);
+ /* Assign pointer after the resize (it's new memory) */
+ vlan = table->entries;
+
+ vlan[match].type_entry = SJA1110_VLAN_D_TAG;
+ vlan[match].vlanid = vid;
+ vlan[match].vlan_bc |= BIT(port);
- return 1;
+ if (allowed_ingress)
+ vlan[match].vmemb_port |= BIT(port);
+ else
+ vlan[match].vmemb_port &= ~BIT(port);
+
+ if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
+ vlan[match].tag_port &= ~BIT(port);
+ else
+ vlan[match].tag_port |= BIT(port);
+
+ return sja1105_dynamic_config_write(priv, BLK_IDX_VLAN_LOOKUP, vid,
+ &vlan[match], true);
}
-/* Returns number of VLANs deleted (0 or 1) */
-static int sja1105_vlan_del_one(struct dsa_switch *ds, int port, u16 vid,
- struct list_head *vlan_list)
+static int sja1105_vlan_del(struct sja1105_private *priv, int port, u16 vid)
{
- struct sja1105_bridge_vlan *v, *n;
+ struct sja1105_vlan_lookup_entry *vlan;
+ struct sja1105_table *table;
+ bool keep = true;
+ int match, rc;
- list_for_each_entry_safe(v, n, vlan_list, list) {
- if (v->port == port && v->vid == vid) {
- list_del(&v->list);
- kfree(v);
- return 1;
- }
- }
+ table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
+
+ match = sja1105_is_vlan_configured(priv, vid);
+ /* Can't delete a missing entry. */
+ if (match < 0)
+ return 0;
+
+ /* Assign pointer after the resize (it's new memory) */
+ vlan = table->entries;
+
+ vlan[match].vlanid = vid;
+ vlan[match].vlan_bc &= ~BIT(port);
+ vlan[match].vmemb_port &= ~BIT(port);
+ /* Also unset tag_port, just so we don't have a confusing bitmap
+ * (no practical purpose).
+ */
+ vlan[match].tag_port &= ~BIT(port);
+
+ /* If there's no port left as member of this VLAN,
+ * it's time for it to go.
+ */
+ if (!vlan[match].vmemb_port)
+ keep = false;
+
+ rc = sja1105_dynamic_config_write(priv, BLK_IDX_VLAN_LOOKUP, vid,
+ &vlan[match], keep);
+ if (rc < 0)
+ return rc;
+
+ if (!keep)
+ return sja1105_table_delete_entry(table, match);
return 0;
}
-static int sja1105_vlan_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct netlink_ext_ack *extack)
+static int sja1105_bridge_vlan_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack)
{
struct sja1105_private *priv = ds->priv;
- bool vlan_table_changed = false;
+ u16 flags = vlan->flags;
int rc;
- /* If the user wants best-effort VLAN filtering (aka vlan_filtering
- * bridge plus tagging), be sure to at least deny alterations to the
- * configuration done by dsa_8021q.
+ /* Be sure to deny alterations to the configuration done by tag_8021q.
*/
- if (priv->vlan_state != SJA1105_VLAN_FILTERING_FULL &&
- vid_is_dsa_8021q(vlan->vid)) {
+ if (vid_is_dsa_8021q(vlan->vid)) {
NL_SET_ERR_MSG_MOD(extack,
"Range 1024-3071 reserved for dsa_8021q operation");
return -EBUSY;
}
- rc = sja1105_vlan_add_one(ds, port, vlan->vid, vlan->flags,
- &priv->bridge_vlans);
- if (rc < 0)
+ /* Always install bridge VLANs as egress-tagged on CPU and DSA ports */
+ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+ flags = 0;
+
+ rc = sja1105_vlan_add(priv, port, vlan->vid, flags, true);
+ if (rc)
return rc;
- if (rc > 0)
- vlan_table_changed = true;
- if (!vlan_table_changed)
- return 0;
+ if (vlan->flags & BRIDGE_VLAN_INFO_PVID)
+ priv->bridge_pvid[port] = vlan->vid;
- return sja1105_build_vlan_table(priv, true);
+ return sja1105_commit_pvid(ds, port);
}
-static int sja1105_vlan_del(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan)
+static int sja1105_bridge_vlan_del(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan)
{
struct sja1105_private *priv = ds->priv;
- bool vlan_table_changed = false;
int rc;
- rc = sja1105_vlan_del_one(ds, port, vlan->vid, &priv->bridge_vlans);
- if (rc > 0)
- vlan_table_changed = true;
-
- if (!vlan_table_changed)
- return 0;
+ rc = sja1105_vlan_del(priv, port, vlan->vid);
+ if (rc)
+ return rc;
- return sja1105_build_vlan_table(priv, true);
+ /* In case the pvid was deleted, make sure that untagged packets will
+ * be dropped.
+ */
+ return sja1105_commit_pvid(ds, port);
}
static int sja1105_dsa_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid,
u16 flags)
{
struct sja1105_private *priv = ds->priv;
+ bool allowed_ingress = true;
int rc;
- rc = sja1105_vlan_add_one(ds, port, vid, flags, &priv->dsa_8021q_vlans);
- if (rc <= 0)
+ /* Prevent attackers from trying to inject a DSA tag from
+ * the outside world.
+ */
+ if (dsa_is_user_port(ds, port))
+ allowed_ingress = false;
+
+ rc = sja1105_vlan_add(priv, port, vid, flags, allowed_ingress);
+ if (rc)
return rc;
- return sja1105_build_vlan_table(priv, true);
+ if (flags & BRIDGE_VLAN_INFO_PVID)
+ priv->tag_8021q_pvid[port] = vid;
+
+ return sja1105_commit_pvid(ds, port);
}
static int sja1105_dsa_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid)
{
struct sja1105_private *priv = ds->priv;
- int rc;
-
- rc = sja1105_vlan_del_one(ds, port, vid, &priv->dsa_8021q_vlans);
- if (!rc)
- return 0;
- return sja1105_build_vlan_table(priv, true);
+ return sja1105_vlan_del(priv, port, vid);
}
-static const struct dsa_8021q_ops sja1105_dsa_8021q_ops = {
- .vlan_add = sja1105_dsa_8021q_vlan_add,
- .vlan_del = sja1105_dsa_8021q_vlan_del,
-};
-
-/* The programming model for the SJA1105 switch is "all-at-once" via static
- * configuration tables. Some of these can be dynamically modified at runtime,
- * but not the xMII mode parameters table.
- * Furthermode, some PHYs may not have crystals for generating their clocks
- * (e.g. RMII). Instead, their 50MHz clock is supplied via the SJA1105 port's
- * ref_clk pin. So port clocking needs to be initialized early, before
- * connecting to PHYs is attempted, otherwise they won't respond through MDIO.
- * Setting correct PHY link speed does not matter now.
- * But dsa_slave_phy_setup is called later than sja1105_setup, so the PHY
- * bindings are not yet parsed by DSA core. We need to parse early so that we
- * can populate the xMII mode parameters table.
- */
-static int sja1105_setup(struct dsa_switch *ds)
+static int sja1105_prechangeupper(struct dsa_switch *ds, int port,
+ struct netdev_notifier_changeupper_info *info)
{
- struct sja1105_private *priv = ds->priv;
- int rc;
+ struct netlink_ext_ack *extack = info->info.extack;
+ struct net_device *upper = info->upper_dev;
+ struct dsa_switch_tree *dst = ds->dst;
+ struct dsa_port *dp;
- rc = sja1105_parse_dt(priv);
- if (rc < 0) {
- dev_err(ds->dev, "Failed to parse DT: %d\n", rc);
- return rc;
- }
-
- /* Error out early if internal delays are required through DT
- * and we can't apply them.
- */
- rc = sja1105_parse_rgmii_delays(priv);
- if (rc < 0) {
- dev_err(ds->dev, "RGMII delay not supported\n");
- return rc;
- }
-
- rc = sja1105_ptp_clock_register(ds);
- if (rc < 0) {
- dev_err(ds->dev, "Failed to register PTP clock: %d\n", rc);
- return rc;
- }
-
- rc = sja1105_mdiobus_register(ds);
- if (rc < 0) {
- dev_err(ds->dev, "Failed to register MDIO bus: %pe\n",
- ERR_PTR(rc));
- goto out_ptp_clock_unregister;
- }
-
- if (priv->info->disable_microcontroller) {
- rc = priv->info->disable_microcontroller(priv);
- if (rc < 0) {
- dev_err(ds->dev,
- "Failed to disable microcontroller: %pe\n",
- ERR_PTR(rc));
- goto out_mdiobus_unregister;
- }
- }
-
- /* Create and send configuration down to device */
- rc = sja1105_static_config_load(priv);
- if (rc < 0) {
- dev_err(ds->dev, "Failed to load static config: %d\n", rc);
- goto out_mdiobus_unregister;
+ if (is_vlan_dev(upper)) {
+ NL_SET_ERR_MSG_MOD(extack, "8021q uppers are not supported");
+ return -EBUSY;
}
- /* Configure the CGU (PHY link modes and speeds) */
- if (priv->info->clocking_setup) {
- rc = priv->info->clocking_setup(priv);
- if (rc < 0) {
- dev_err(ds->dev,
- "Failed to configure MII clocking: %pe\n",
- ERR_PTR(rc));
- goto out_static_config_free;
+ if (netif_is_bridge_master(upper)) {
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->bridge_dev && dp->bridge_dev != upper &&
+ br_vlan_enabled(dp->bridge_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Only one VLAN-aware bridge is supported");
+ return -EBUSY;
+ }
}
}
- /* On SJA1105, VLAN filtering per se is always enabled in hardware.
- * The only thing we can do to disable it is lie about what the 802.1Q
- * EtherType is.
- * So it will still try to apply VLAN filtering, but all ingress
- * traffic (except frames received with EtherType of ETH_P_SJA1105)
- * will be internally tagged with a distorted VLAN header where the
- * TPID is ETH_P_SJA1105, and the VLAN ID is the port pvid.
- */
- ds->vlan_filtering_is_global = true;
-
- /* Advertise the 8 egress queues */
- ds->num_tx_queues = SJA1105_NUM_TC;
-
- ds->mtu_enforcement_ingress = true;
-
- priv->best_effort_vlan_filtering = true;
-
- rc = sja1105_devlink_setup(ds);
- if (rc < 0)
- goto out_static_config_free;
-
- /* The DSA/switchdev model brings up switch ports in standalone mode by
- * default, and that means vlan_filtering is 0 since they're not under
- * a bridge, so it's safe to set up switch tagging at this time.
- */
- rtnl_lock();
- rc = sja1105_setup_8021q_tagging(ds, true);
- rtnl_unlock();
- if (rc)
- goto out_devlink_teardown;
-
return 0;
-
-out_devlink_teardown:
- sja1105_devlink_teardown(ds);
-out_mdiobus_unregister:
- sja1105_mdiobus_unregister(ds);
-out_ptp_clock_unregister:
- sja1105_ptp_clock_unregister(ds);
-out_static_config_free:
- sja1105_static_config_free(&priv->static_config);
-
- return rc;
-}
-
-static void sja1105_teardown(struct dsa_switch *ds)
-{
- struct sja1105_private *priv = ds->priv;
- struct sja1105_bridge_vlan *v, *n;
- int port;
-
- for (port = 0; port < ds->num_ports; port++) {
- struct sja1105_port *sp = &priv->ports[port];
-
- if (!dsa_is_user_port(ds, port))
- continue;
-
- if (sp->xmit_worker)
- kthread_destroy_worker(sp->xmit_worker);
- }
-
- sja1105_devlink_teardown(ds);
- sja1105_flower_teardown(ds);
- sja1105_tas_teardown(ds);
- sja1105_ptp_clock_unregister(ds);
- sja1105_static_config_free(&priv->static_config);
-
- list_for_each_entry_safe(v, n, &priv->dsa_8021q_vlans, list) {
- list_del(&v->list);
- kfree(v);
- }
-
- list_for_each_entry_safe(v, n, &priv->bridge_vlans, list) {
- list_del(&v->list);
- kfree(v);
- }
}
static void sja1105_port_disable(struct dsa_switch *ds, int port)
@@ -3334,7 +2666,7 @@ static int sja1105_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
new_mtu += VLAN_ETH_HLEN + ETH_FCS_LEN;
- if (dsa_is_cpu_port(ds, port))
+ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
new_mtu += VLAN_HLEN;
policing = priv->static_config.tables[BLK_IDX_L2_POLICING].entries;
@@ -3481,23 +2813,13 @@ static int sja1105_port_set_learning(struct sja1105_private *priv, int port,
bool enabled)
{
struct sja1105_mac_config_entry *mac;
- int rc;
mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
mac[port].dyn_learn = enabled;
- rc = sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
- &mac[port], true);
- if (rc)
- return rc;
-
- if (enabled)
- priv->learn_ena |= BIT(port);
- else
- priv->learn_ena &= ~BIT(port);
-
- return 0;
+ return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
+ &mac[port], true);
}
static int sja1105_port_ucast_bcast_flood(struct sja1105_private *priv, int to,
@@ -3613,7 +2935,189 @@ static int sja1105_port_bridge_flags(struct dsa_switch *ds, int port,
return 0;
}
-static const struct dsa_switch_ops sja1105_switch_ops = {
+static void sja1105_teardown_ports(struct sja1105_private *priv)
+{
+ struct dsa_switch *ds = priv->ds;
+ int port;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ struct sja1105_port *sp = &priv->ports[port];
+
+ if (sp->xmit_worker)
+ kthread_destroy_worker(sp->xmit_worker);
+ }
+}
+
+static int sja1105_setup_ports(struct sja1105_private *priv)
+{
+ struct sja1105_tagger_data *tagger_data = &priv->tagger_data;
+ struct dsa_switch *ds = priv->ds;
+ int port, rc;
+
+ /* Connections between dsa_port and sja1105_port */
+ for (port = 0; port < ds->num_ports; port++) {
+ struct sja1105_port *sp = &priv->ports[port];
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct kthread_worker *worker;
+ struct net_device *slave;
+
+ if (!dsa_port_is_user(dp))
+ continue;
+
+ dp->priv = sp;
+ sp->dp = dp;
+ sp->data = tagger_data;
+ slave = dp->slave;
+ kthread_init_work(&sp->xmit_work, sja1105_port_deferred_xmit);
+ worker = kthread_create_worker(0, "%s_xmit", slave->name);
+ if (IS_ERR(worker)) {
+ rc = PTR_ERR(worker);
+ dev_err(ds->dev,
+ "failed to create deferred xmit thread: %d\n",
+ rc);
+ goto out_destroy_workers;
+ }
+ sp->xmit_worker = worker;
+ skb_queue_head_init(&sp->xmit_queue);
+ }
+
+ return 0;
+
+out_destroy_workers:
+ sja1105_teardown_ports(priv);
+ return rc;
+}
+
+/* The programming model for the SJA1105 switch is "all-at-once" via static
+ * configuration tables. Some of these can be dynamically modified at runtime,
+ * but not the xMII mode parameters table.
+ * Furthermode, some PHYs may not have crystals for generating their clocks
+ * (e.g. RMII). Instead, their 50MHz clock is supplied via the SJA1105 port's
+ * ref_clk pin. So port clocking needs to be initialized early, before
+ * connecting to PHYs is attempted, otherwise they won't respond through MDIO.
+ * Setting correct PHY link speed does not matter now.
+ * But dsa_slave_phy_setup is called later than sja1105_setup, so the PHY
+ * bindings are not yet parsed by DSA core. We need to parse early so that we
+ * can populate the xMII mode parameters table.
+ */
+static int sja1105_setup(struct dsa_switch *ds)
+{
+ struct sja1105_private *priv = ds->priv;
+ int rc;
+
+ if (priv->info->disable_microcontroller) {
+ rc = priv->info->disable_microcontroller(priv);
+ if (rc < 0) {
+ dev_err(ds->dev,
+ "Failed to disable microcontroller: %pe\n",
+ ERR_PTR(rc));
+ return rc;
+ }
+ }
+
+ /* Create and send configuration down to device */
+ rc = sja1105_static_config_load(priv);
+ if (rc < 0) {
+ dev_err(ds->dev, "Failed to load static config: %d\n", rc);
+ return rc;
+ }
+
+ /* Configure the CGU (PHY link modes and speeds) */
+ if (priv->info->clocking_setup) {
+ rc = priv->info->clocking_setup(priv);
+ if (rc < 0) {
+ dev_err(ds->dev,
+ "Failed to configure MII clocking: %pe\n",
+ ERR_PTR(rc));
+ goto out_static_config_free;
+ }
+ }
+
+ rc = sja1105_setup_ports(priv);
+ if (rc)
+ goto out_static_config_free;
+
+ sja1105_tas_setup(ds);
+ sja1105_flower_setup(ds);
+
+ rc = sja1105_ptp_clock_register(ds);
+ if (rc < 0) {
+ dev_err(ds->dev, "Failed to register PTP clock: %d\n", rc);
+ goto out_flower_teardown;
+ }
+
+ rc = sja1105_mdiobus_register(ds);
+ if (rc < 0) {
+ dev_err(ds->dev, "Failed to register MDIO bus: %pe\n",
+ ERR_PTR(rc));
+ goto out_ptp_clock_unregister;
+ }
+
+ rc = sja1105_devlink_setup(ds);
+ if (rc < 0)
+ goto out_mdiobus_unregister;
+
+ rtnl_lock();
+ rc = dsa_tag_8021q_register(ds, htons(ETH_P_8021Q));
+ rtnl_unlock();
+ if (rc)
+ goto out_devlink_teardown;
+
+ /* On SJA1105, VLAN filtering per se is always enabled in hardware.
+ * The only thing we can do to disable it is lie about what the 802.1Q
+ * EtherType is.
+ * So it will still try to apply VLAN filtering, but all ingress
+ * traffic (except frames received with EtherType of ETH_P_SJA1105)
+ * will be internally tagged with a distorted VLAN header where the
+ * TPID is ETH_P_SJA1105, and the VLAN ID is the port pvid.
+ */
+ ds->vlan_filtering_is_global = true;
+ ds->untag_bridge_pvid = true;
+ /* tag_8021q has 3 bits for the VBID, and the value 0 is reserved */
+ ds->num_fwd_offloading_bridges = 7;
+
+ /* Advertise the 8 egress queues */
+ ds->num_tx_queues = SJA1105_NUM_TC;
+
+ ds->mtu_enforcement_ingress = true;
+ ds->assisted_learning_on_cpu_port = true;
+
+ return 0;
+
+out_devlink_teardown:
+ sja1105_devlink_teardown(ds);
+out_mdiobus_unregister:
+ sja1105_mdiobus_unregister(ds);
+out_ptp_clock_unregister:
+ sja1105_ptp_clock_unregister(ds);
+out_flower_teardown:
+ sja1105_flower_teardown(ds);
+ sja1105_tas_teardown(ds);
+ sja1105_teardown_ports(priv);
+out_static_config_free:
+ sja1105_static_config_free(&priv->static_config);
+
+ return rc;
+}
+
+static void sja1105_teardown(struct dsa_switch *ds)
+{
+ struct sja1105_private *priv = ds->priv;
+
+ rtnl_lock();
+ dsa_tag_8021q_unregister(ds);
+ rtnl_unlock();
+
+ sja1105_devlink_teardown(ds);
+ sja1105_mdiobus_unregister(ds);
+ sja1105_ptp_clock_unregister(ds);
+ sja1105_flower_teardown(ds);
+ sja1105_tas_teardown(ds);
+ sja1105_teardown_ports(priv);
+ sja1105_static_config_free(&priv->static_config);
+}
+
+const struct dsa_switch_ops sja1105_switch_ops = {
.get_tag_protocol = sja1105_get_tag_protocol,
.setup = sja1105_setup,
.teardown = sja1105_teardown,
@@ -3632,14 +3136,15 @@ static const struct dsa_switch_ops sja1105_switch_ops = {
.port_fdb_dump = sja1105_fdb_dump,
.port_fdb_add = sja1105_fdb_add,
.port_fdb_del = sja1105_fdb_del,
+ .port_fast_age = sja1105_fast_age,
.port_bridge_join = sja1105_bridge_join,
.port_bridge_leave = sja1105_bridge_leave,
.port_pre_bridge_flags = sja1105_port_pre_bridge_flags,
.port_bridge_flags = sja1105_port_bridge_flags,
.port_stp_state_set = sja1105_bridge_stp_state_set,
.port_vlan_filtering = sja1105_vlan_filtering,
- .port_vlan_add = sja1105_vlan_add,
- .port_vlan_del = sja1105_vlan_del,
+ .port_vlan_add = sja1105_bridge_vlan_add,
+ .port_vlan_del = sja1105_bridge_vlan_del,
.port_mdb_add = sja1105_mdb_add,
.port_mdb_del = sja1105_mdb_del,
.port_hwtstamp_get = sja1105_hwtstamp_get,
@@ -3654,12 +3159,14 @@ static const struct dsa_switch_ops sja1105_switch_ops = {
.cls_flower_add = sja1105_cls_flower_add,
.cls_flower_del = sja1105_cls_flower_del,
.cls_flower_stats = sja1105_cls_flower_stats,
- .crosschip_bridge_join = sja1105_crosschip_bridge_join,
- .crosschip_bridge_leave = sja1105_crosschip_bridge_leave,
- .devlink_param_get = sja1105_devlink_param_get,
- .devlink_param_set = sja1105_devlink_param_set,
.devlink_info_get = sja1105_devlink_info_get,
+ .tag_8021q_vlan_add = sja1105_dsa_8021q_vlan_add,
+ .tag_8021q_vlan_del = sja1105_dsa_8021q_vlan_del,
+ .port_prechangeupper = sja1105_prechangeupper,
+ .port_bridge_tx_fwd_offload = dsa_tag_8021q_bridge_tx_fwd_offload,
+ .port_bridge_tx_fwd_unoffload = dsa_tag_8021q_bridge_tx_fwd_unoffload,
};
+EXPORT_SYMBOL_GPL(sja1105_switch_ops);
static const struct of_device_id sja1105_dt_ids[];
@@ -3712,12 +3219,11 @@ static int sja1105_check_device_id(struct sja1105_private *priv)
static int sja1105_probe(struct spi_device *spi)
{
- struct sja1105_tagger_data *tagger_data;
struct device *dev = &spi->dev;
struct sja1105_private *priv;
size_t max_xfer, max_msg;
struct dsa_switch *ds;
- int rc, port;
+ int rc;
if (!dev->of_node) {
dev_err(dev, "No DTS bindings for SJA1105 driver\n");
@@ -3797,95 +3303,42 @@ static int sja1105_probe(struct spi_device *spi)
ds->priv = priv;
priv->ds = ds;
- tagger_data = &priv->tagger_data;
-
mutex_init(&priv->ptp_data.lock);
mutex_init(&priv->mgmt_lock);
- priv->dsa_8021q_ctx = devm_kzalloc(dev, sizeof(*priv->dsa_8021q_ctx),
- GFP_KERNEL);
- if (!priv->dsa_8021q_ctx)
- return -ENOMEM;
-
- priv->dsa_8021q_ctx->ops = &sja1105_dsa_8021q_ops;
- priv->dsa_8021q_ctx->proto = htons(ETH_P_8021Q);
- priv->dsa_8021q_ctx->ds = ds;
-
- INIT_LIST_HEAD(&priv->dsa_8021q_ctx->crosschip_links);
- INIT_LIST_HEAD(&priv->bridge_vlans);
- INIT_LIST_HEAD(&priv->dsa_8021q_vlans);
-
- sja1105_tas_setup(ds);
- sja1105_flower_setup(ds);
+ rc = sja1105_parse_dt(priv);
+ if (rc < 0) {
+ dev_err(ds->dev, "Failed to parse DT: %d\n", rc);
+ return rc;
+ }
- rc = dsa_register_switch(priv->ds);
- if (rc)
+ /* Error out early if internal delays are required through DT
+ * and we can't apply them.
+ */
+ rc = sja1105_parse_rgmii_delays(priv);
+ if (rc < 0) {
+ dev_err(ds->dev, "RGMII delay not supported\n");
return rc;
+ }
if (IS_ENABLED(CONFIG_NET_SCH_CBS)) {
priv->cbs = devm_kcalloc(dev, priv->info->num_cbs_shapers,
sizeof(struct sja1105_cbs_entry),
GFP_KERNEL);
- if (!priv->cbs) {
- rc = -ENOMEM;
- goto out_unregister_switch;
- }
- }
-
- /* Connections between dsa_port and sja1105_port */
- for (port = 0; port < ds->num_ports; port++) {
- struct sja1105_port *sp = &priv->ports[port];
- struct dsa_port *dp = dsa_to_port(ds, port);
- struct net_device *slave;
- int subvlan;
-
- if (!dsa_is_user_port(ds, port))
- continue;
-
- dp->priv = sp;
- sp->dp = dp;
- sp->data = tagger_data;
- slave = dp->slave;
- kthread_init_work(&sp->xmit_work, sja1105_port_deferred_xmit);
- sp->xmit_worker = kthread_create_worker(0, "%s_xmit",
- slave->name);
- if (IS_ERR(sp->xmit_worker)) {
- rc = PTR_ERR(sp->xmit_worker);
- dev_err(ds->dev,
- "failed to create deferred xmit thread: %d\n",
- rc);
- goto out_destroy_workers;
- }
- skb_queue_head_init(&sp->xmit_queue);
- sp->xmit_tpid = ETH_P_SJA1105;
-
- for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++)
- sp->subvlan_map[subvlan] = VLAN_N_VID;
- }
-
- return 0;
-
-out_destroy_workers:
- while (port-- > 0) {
- struct sja1105_port *sp = &priv->ports[port];
-
- if (!dsa_is_user_port(ds, port))
- continue;
-
- kthread_destroy_worker(sp->xmit_worker);
+ if (!priv->cbs)
+ return -ENOMEM;
}
-out_unregister_switch:
- dsa_unregister_switch(ds);
-
- return rc;
+ return dsa_register_switch(priv->ds);
}
static int sja1105_remove(struct spi_device *spi)
{
struct sja1105_private *priv = spi_get_drvdata(spi);
+ struct dsa_switch *ds = priv->ds;
+
+ dsa_unregister_switch(ds);
- dsa_unregister_switch(priv->ds);
return 0;
}
diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c
index 19aea8fb76f6..705d3900e43a 100644
--- a/drivers/net/dsa/sja1105/sja1105_mdio.c
+++ b/drivers/net/dsa/sja1105/sja1105_mdio.c
@@ -284,8 +284,7 @@ static int sja1105_mdiobus_base_tx_register(struct sja1105_private *priv,
struct mii_bus *bus;
int rc = 0;
- np = of_find_compatible_node(mdio_node, NULL,
- "nxp,sja1110-base-tx-mdio");
+ np = of_get_compatible_child(mdio_node, "nxp,sja1110-base-tx-mdio");
if (!np)
return 0;
@@ -339,8 +338,7 @@ static int sja1105_mdiobus_base_t1_register(struct sja1105_private *priv,
struct mii_bus *bus;
int rc = 0;
- np = of_find_compatible_node(mdio_node, NULL,
- "nxp,sja1110-base-t1-mdio");
+ np = of_get_compatible_child(mdio_node, "nxp,sja1110-base-t1-mdio");
if (!np)
return 0;
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 08cc5dbf2fa6..d60a530d0272 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -575,7 +575,6 @@ const struct sja1105_info sja1105e_info = {
.part_no = SJA1105ET_PART_NO,
.static_ops = sja1105e_table_ops,
.dyn_ops = sja1105et_dyn_ops,
- .qinq_tpid = ETH_P_8021Q,
.tag_proto = DSA_TAG_PROTO_SJA1105,
.can_limit_mcast_flood = false,
.ptp_ts_bits = 24,
@@ -608,7 +607,6 @@ const struct sja1105_info sja1105t_info = {
.part_no = SJA1105ET_PART_NO,
.static_ops = sja1105t_table_ops,
.dyn_ops = sja1105et_dyn_ops,
- .qinq_tpid = ETH_P_8021Q,
.tag_proto = DSA_TAG_PROTO_SJA1105,
.can_limit_mcast_flood = false,
.ptp_ts_bits = 24,
@@ -641,7 +639,6 @@ const struct sja1105_info sja1105p_info = {
.part_no = SJA1105P_PART_NO,
.static_ops = sja1105p_table_ops,
.dyn_ops = sja1105pqrs_dyn_ops,
- .qinq_tpid = ETH_P_8021AD,
.tag_proto = DSA_TAG_PROTO_SJA1105,
.can_limit_mcast_flood = true,
.ptp_ts_bits = 32,
@@ -675,7 +672,6 @@ const struct sja1105_info sja1105q_info = {
.part_no = SJA1105Q_PART_NO,
.static_ops = sja1105q_table_ops,
.dyn_ops = sja1105pqrs_dyn_ops,
- .qinq_tpid = ETH_P_8021AD,
.tag_proto = DSA_TAG_PROTO_SJA1105,
.can_limit_mcast_flood = true,
.ptp_ts_bits = 32,
@@ -709,7 +705,6 @@ const struct sja1105_info sja1105r_info = {
.part_no = SJA1105R_PART_NO,
.static_ops = sja1105r_table_ops,
.dyn_ops = sja1105pqrs_dyn_ops,
- .qinq_tpid = ETH_P_8021AD,
.tag_proto = DSA_TAG_PROTO_SJA1105,
.can_limit_mcast_flood = true,
.ptp_ts_bits = 32,
@@ -747,7 +742,6 @@ const struct sja1105_info sja1105s_info = {
.static_ops = sja1105s_table_ops,
.dyn_ops = sja1105pqrs_dyn_ops,
.regs = &sja1105pqrs_regs,
- .qinq_tpid = ETH_P_8021AD,
.tag_proto = DSA_TAG_PROTO_SJA1105,
.can_limit_mcast_flood = true,
.ptp_ts_bits = 32,
@@ -784,7 +778,6 @@ const struct sja1105_info sja1110a_info = {
.static_ops = sja1110_table_ops,
.dyn_ops = sja1110_dyn_ops,
.regs = &sja1110_regs,
- .qinq_tpid = ETH_P_8021AD,
.tag_proto = DSA_TAG_PROTO_SJA1110,
.can_limit_mcast_flood = true,
.multiple_cascade_ports = true,
@@ -835,7 +828,6 @@ const struct sja1105_info sja1110b_info = {
.static_ops = sja1110_table_ops,
.dyn_ops = sja1110_dyn_ops,
.regs = &sja1110_regs,
- .qinq_tpid = ETH_P_8021AD,
.tag_proto = DSA_TAG_PROTO_SJA1110,
.can_limit_mcast_flood = true,
.multiple_cascade_ports = true,
@@ -886,7 +878,6 @@ const struct sja1105_info sja1110c_info = {
.static_ops = sja1110_table_ops,
.dyn_ops = sja1110_dyn_ops,
.regs = &sja1110_regs,
- .qinq_tpid = ETH_P_8021AD,
.tag_proto = DSA_TAG_PROTO_SJA1110,
.can_limit_mcast_flood = true,
.multiple_cascade_ports = true,
@@ -937,7 +928,6 @@ const struct sja1105_info sja1110d_info = {
.static_ops = sja1110_table_ops,
.dyn_ops = sja1110_dyn_ops,
.regs = &sja1110_regs,
- .qinq_tpid = ETH_P_8021AD,
.tag_proto = DSA_TAG_PROTO_SJA1110,
.can_limit_mcast_flood = true,
.multiple_cascade_ports = true,
diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c
index f6e13e6c6a18..ec7b65daec20 100644
--- a/drivers/net/dsa/sja1105/sja1105_vl.c
+++ b/drivers/net/dsa/sja1105/sja1105_vl.c
@@ -496,14 +496,11 @@ int sja1105_vl_redirect(struct sja1105_private *priv, int port,
struct sja1105_rule *rule = sja1105_rule_find(priv, cookie);
int rc;
- if (priv->vlan_state == SJA1105_VLAN_UNAWARE &&
- key->type != SJA1105_KEY_VLAN_UNAWARE_VL) {
+ if (!priv->vlan_aware && key->type != SJA1105_KEY_VLAN_UNAWARE_VL) {
NL_SET_ERR_MSG_MOD(extack,
"Can only redirect based on DMAC");
return -EOPNOTSUPP;
- } else if ((priv->vlan_state == SJA1105_VLAN_BEST_EFFORT ||
- priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) &&
- key->type != SJA1105_KEY_VLAN_AWARE_VL) {
+ } else if (priv->vlan_aware && key->type != SJA1105_KEY_VLAN_AWARE_VL) {
NL_SET_ERR_MSG_MOD(extack,
"Can only redirect based on {DMAC, VID, PCP}");
return -EOPNOTSUPP;
@@ -595,14 +592,11 @@ int sja1105_vl_gate(struct sja1105_private *priv, int port,
return -ERANGE;
}
- if (priv->vlan_state == SJA1105_VLAN_UNAWARE &&
- key->type != SJA1105_KEY_VLAN_UNAWARE_VL) {
+ if (!priv->vlan_aware && key->type != SJA1105_KEY_VLAN_UNAWARE_VL) {
NL_SET_ERR_MSG_MOD(extack,
"Can only gate based on DMAC");
return -EOPNOTSUPP;
- } else if ((priv->vlan_state == SJA1105_VLAN_BEST_EFFORT ||
- priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) &&
- key->type != SJA1105_KEY_VLAN_AWARE_VL) {
+ } else if (priv->vlan_aware && key->type != SJA1105_KEY_VLAN_AWARE_VL) {
NL_SET_ERR_MSG_MOD(extack,
"Can only gate based on {DMAC, VID, PCP}");
return -EOPNOTSUPP;
diff --git a/drivers/net/eql.c b/drivers/net/eql.c
index 74263f8efe1a..8ef34901c2d8 100644
--- a/drivers/net/eql.c
+++ b/drivers/net/eql.c
@@ -113,6 +113,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/compat.h>
#include <linux/capability.h>
#include <linux/module.h>
#include <linux/kernel.h>
@@ -131,7 +132,8 @@
static int eql_open(struct net_device *dev);
static int eql_close(struct net_device *dev);
-static int eql_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+static int eql_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd);
static netdev_tx_t eql_slave_xmit(struct sk_buff *skb, struct net_device *dev);
#define eql_is_slave(dev) ((dev->flags & IFF_SLAVE) == IFF_SLAVE)
@@ -170,7 +172,7 @@ static const char version[] __initconst =
static const struct net_device_ops eql_netdev_ops = {
.ndo_open = eql_open,
.ndo_stop = eql_close,
- .ndo_do_ioctl = eql_ioctl,
+ .ndo_siocdevprivate = eql_siocdevprivate,
.ndo_start_xmit = eql_slave_xmit,
};
@@ -268,25 +270,29 @@ static int eql_s_slave_cfg(struct net_device *dev, slave_config_t __user *sc);
static int eql_g_master_cfg(struct net_device *dev, master_config_t __user *mc);
static int eql_s_master_cfg(struct net_device *dev, master_config_t __user *mc);
-static int eql_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int eql_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
if (cmd != EQL_GETMASTRCFG && cmd != EQL_GETSLAVECFG &&
!capable(CAP_NET_ADMIN))
return -EPERM;
+ if (in_compat_syscall()) /* to be implemented */
+ return -EOPNOTSUPP;
+
switch (cmd) {
case EQL_ENSLAVE:
- return eql_enslave(dev, ifr->ifr_data);
+ return eql_enslave(dev, data);
case EQL_EMANCIPATE:
- return eql_emancipate(dev, ifr->ifr_data);
+ return eql_emancipate(dev, data);
case EQL_GETSLAVECFG:
- return eql_g_slave_cfg(dev, ifr->ifr_data);
+ return eql_g_slave_cfg(dev, data);
case EQL_SETSLAVECFG:
- return eql_s_slave_cfg(dev, ifr->ifr_data);
+ return eql_s_slave_cfg(dev, data);
case EQL_GETMASTRCFG:
- return eql_g_master_cfg(dev, ifr->ifr_data);
+ return eql_g_master_cfg(dev, data);
case EQL_SETMASTRCFG:
- return eql_s_master_cfg(dev, ifr->ifr_data);
+ return eql_s_master_cfg(dev, data);
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index 96cc5fc36eb5..87c906e744fb 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -302,7 +302,6 @@ static int el3_isa_match(struct device *pdev, unsigned int ndev)
return -ENOMEM;
SET_NETDEV_DEV(dev, pdev);
- netdev_boot_setup_check(dev);
if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509-isa")) {
free_netdev(dev);
@@ -421,7 +420,6 @@ static int el3_pnp_probe(struct pnp_dev *pdev, const struct pnp_device_id *id)
return -ENOMEM;
}
SET_NETDEV_DEV(dev, &pdev->dev);
- netdev_boot_setup_check(dev);
el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_PNP);
pnp_set_drvdata(pdev, dev);
@@ -514,7 +512,9 @@ static int el3_common_init(struct net_device *dev)
{
struct el3_private *lp = netdev_priv(dev);
int err;
- const char *if_names[] = {"10baseT", "AUI", "undefined", "BNC"};
+ static const char * const if_names[] = {
+ "10baseT", "AUI", "undefined", "BNC"
+ };
spin_lock_init(&lp->lock);
@@ -588,7 +588,6 @@ static int el3_eisa_probe(struct device *device)
}
SET_NETDEV_DEV(dev, device);
- netdev_boot_setup_check(dev);
el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_EISA);
eisa_set_drvdata (edev, dev);
diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c
index 47b4215bb93b..8d90fed5d33e 100644
--- a/drivers/net/ethernet/3com/3c515.c
+++ b/drivers/net/ethernet/3com/3c515.c
@@ -407,7 +407,7 @@ MODULE_PARM_DESC(max_interrupt_work, "3c515 maximum events handled per interrupt
/* we will need locking (and refcounting) if we ever use it for more */
static LIST_HEAD(root_corkscrew_dev);
-int init_module(void)
+static int corkscrew_init_module(void)
{
int found = 0;
if (debug >= 0)
@@ -416,6 +416,7 @@ int init_module(void)
found++;
return found ? 0 : -ENODEV;
}
+module_init(corkscrew_init_module);
#else
struct net_device *tc515_probe(int unit)
diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c
index f66e7fb9a2bb..dd4d3c48b98d 100644
--- a/drivers/net/ethernet/3com/3c574_cs.c
+++ b/drivers/net/ethernet/3com/3c574_cs.c
@@ -252,7 +252,7 @@ static const struct net_device_ops el3_netdev_ops = {
.ndo_start_xmit = el3_start_xmit,
.ndo_tx_timeout = el3_tx_timeout,
.ndo_get_stats = el3_get_stats,
- .ndo_do_ioctl = el3_ioctl,
+ .ndo_eth_ioctl = el3_ioctl,
.ndo_set_rx_mode = set_multicast_list,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 7d7d3ffe25c3..17c16333a412 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -1052,7 +1052,7 @@ static const struct net_device_ops boomrang_netdev_ops = {
.ndo_tx_timeout = vortex_tx_timeout,
.ndo_get_stats = vortex_get_stats,
#ifdef CONFIG_PCI
- .ndo_do_ioctl = vortex_ioctl,
+ .ndo_eth_ioctl = vortex_ioctl,
#endif
.ndo_set_rx_mode = set_rx_mode,
.ndo_set_mac_address = eth_mac_addr,
@@ -1069,7 +1069,7 @@ static const struct net_device_ops vortex_netdev_ops = {
.ndo_tx_timeout = vortex_tx_timeout,
.ndo_get_stats = vortex_get_stats,
#ifdef CONFIG_PCI
- .ndo_do_ioctl = vortex_ioctl,
+ .ndo_eth_ioctl = vortex_ioctl,
#endif
.ndo_set_rx_mode = set_rx_mode,
.ndo_set_mac_address = eth_mac_addr,
diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig
index a52a3740f0c9..706bd59bf645 100644
--- a/drivers/net/ethernet/3com/Kconfig
+++ b/drivers/net/ethernet/3com/Kconfig
@@ -34,6 +34,7 @@ config EL3
config 3C515
tristate "3c515 ISA \"Fast EtherLink\""
depends on ISA && ISA_DMA_API && !PPC32
+ select NETDEV_LEGACY_INIT
help
If you have a 3Com ISA EtherLink XL "Corkscrew" 3c515 Fast Ethernet
network card, say Y here.
diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig
index 9f4b302fd2ce..a4130e643342 100644
--- a/drivers/net/ethernet/8390/Kconfig
+++ b/drivers/net/ethernet/8390/Kconfig
@@ -102,6 +102,7 @@ config MCF8390
config NE2000
tristate "NE2000/NE1000 support"
depends on (ISA || (Q40 && m) || MACH_TX49XX || ATARI_ETHERNEC)
+ select NETDEV_LEGACY_INIT if ISA
select CRC32
help
If you have a network (Ethernet) card of this type, say Y here.
@@ -169,6 +170,7 @@ config STNIC
config ULTRA
tristate "SMC Ultra support"
depends on ISA
+ select NETDEV_LEGACY_INIT
select CRC32
help
If you have a network (Ethernet) card of this type, say Y here.
@@ -186,6 +188,7 @@ config ULTRA
config WD80x3
tristate "WD80*3 support"
depends on ISA
+ select NETDEV_LEGACY_INIT
select CRC32
help
If you have a network (Ethernet) card of this type, say Y here.
diff --git a/drivers/net/ethernet/8390/apne.c b/drivers/net/ethernet/8390/apne.c
index fe6c834c422e..da1ae37a9d73 100644
--- a/drivers/net/ethernet/8390/apne.c
+++ b/drivers/net/ethernet/8390/apne.c
@@ -75,7 +75,6 @@
#define NESM_STOP_PG 0x80 /* Last page +1 of RX ring */
-struct net_device * __init apne_probe(int unit);
static int apne_probe1(struct net_device *dev, int ioaddr);
static void apne_reset_8390(struct net_device *dev);
@@ -120,7 +119,7 @@ static u32 apne_msg_enable;
module_param_named(msg_enable, apne_msg_enable, uint, 0444);
MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)");
-struct net_device * __init apne_probe(int unit)
+static struct net_device * __init apne_probe(void)
{
struct net_device *dev;
struct ei_device *ei_local;
@@ -150,10 +149,6 @@ struct net_device * __init apne_probe(int unit)
dev = alloc_ei_netdev();
if (!dev)
return ERR_PTR(-ENOMEM);
- if (unit >= 0) {
- sprintf(dev->name, "eth%d", unit);
- netdev_boot_setup_check(dev);
- }
ei_local = netdev_priv(dev);
ei_local->msg_enable = apne_msg_enable;
@@ -554,12 +549,11 @@ static irqreturn_t apne_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-#ifdef MODULE
static struct net_device *apne_dev;
static int __init apne_module_init(void)
{
- apne_dev = apne_probe(-1);
+ apne_dev = apne_probe();
return PTR_ERR_OR_ZERO(apne_dev);
}
@@ -579,7 +573,6 @@ static void __exit apne_module_exit(void)
}
module_init(apne_module_init);
module_exit(apne_module_exit);
-#endif
static int init_pcmcia(void)
{
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index 172947fc051a..6c6bdd5913ec 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -101,6 +101,13 @@ static inline struct ax_device *to_ax_dev(struct net_device *dev)
return (struct ax_device *)(ei_local + 1);
}
+void ax_NS8390_reinit(struct net_device *dev)
+{
+ ax_NS8390_init(dev, 1);
+}
+
+EXPORT_SYMBOL_GPL(ax_NS8390_reinit);
+
/*
* ax_initial_check
*
@@ -635,7 +642,7 @@ static void ax_eeprom_register_write(struct eeprom_93cx6 *eeprom)
static const struct net_device_ops ax_netdev_ops = {
.ndo_open = ax_open,
.ndo_stop = ax_close,
- .ndo_do_ioctl = ax_ioctl,
+ .ndo_eth_ioctl = ax_ioctl,
.ndo_start_xmit = ax_ei_start_xmit,
.ndo_tx_timeout = ax_ei_tx_timeout,
diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c
index 8c321dfc7b3b..3c370e686ec3 100644
--- a/drivers/net/ethernet/8390/axnet_cs.c
+++ b/drivers/net/ethernet/8390/axnet_cs.c
@@ -128,7 +128,7 @@ static inline struct axnet_dev *PRIV(struct net_device *dev)
static const struct net_device_ops axnet_netdev_ops = {
.ndo_open = axnet_open,
.ndo_stop = axnet_close,
- .ndo_do_ioctl = axnet_ioctl,
+ .ndo_eth_ioctl = axnet_ioctl,
.ndo_start_xmit = axnet_start_xmit,
.ndo_tx_timeout = axnet_tx_timeout,
.ndo_get_stats = get_stats,
diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c
index e9756d0ea5b8..53660bc8d6ff 100644
--- a/drivers/net/ethernet/8390/ne.c
+++ b/drivers/net/ethernet/8390/ne.c
@@ -923,7 +923,7 @@ static void __init ne_add_devices(void)
}
#ifdef MODULE
-int __init init_module(void)
+static int __init ne_init(void)
{
int retval;
ne_add_devices();
@@ -940,6 +940,7 @@ int __init init_module(void)
ne_loop_rm_unreg(0);
return retval;
}
+module_init(ne_init);
#else /* MODULE */
static int __init ne_init(void)
{
@@ -951,6 +952,7 @@ static int __init ne_init(void)
}
module_init(ne_init);
+#ifdef CONFIG_NETDEV_LEGACY_INIT
struct net_device * __init ne_probe(int unit)
{
int this_dev;
@@ -991,6 +993,7 @@ struct net_device * __init ne_probe(int unit)
return ERR_PTR(-ENODEV);
}
+#endif
#endif /* MODULE */
static void __exit ne_exit(void)
diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c
index cac036706382..96ad72abd373 100644
--- a/drivers/net/ethernet/8390/pcnet_cs.c
+++ b/drivers/net/ethernet/8390/pcnet_cs.c
@@ -223,7 +223,7 @@ static const struct net_device_ops pcnet_netdev_ops = {
.ndo_set_config = set_config,
.ndo_start_xmit = ei_start_xmit,
.ndo_get_stats = ei_get_stats,
- .ndo_do_ioctl = ei_ioctl,
+ .ndo_eth_ioctl = ei_ioctl,
.ndo_set_rx_mode = ei_set_multicast_list,
.ndo_tx_timeout = ei_tx_timeout,
.ndo_set_mac_address = eth_mac_addr,
diff --git a/drivers/net/ethernet/8390/smc-ultra.c b/drivers/net/ethernet/8390/smc-ultra.c
index 1d8ed7357b7f..0890fa493f70 100644
--- a/drivers/net/ethernet/8390/smc-ultra.c
+++ b/drivers/net/ethernet/8390/smc-ultra.c
@@ -522,7 +522,6 @@ static void ultra_pio_input(struct net_device *dev, int count,
/* We know skbuffs are padded to at least word alignment. */
insw(ioaddr + IOPD, buf, (count+1)>>1);
}
-
static void ultra_pio_output(struct net_device *dev, int count,
const unsigned char *buf, const int start_page)
{
@@ -572,8 +571,7 @@ MODULE_LICENSE("GPL");
/* This is set up so that only a single autoprobe takes place per call.
ISA device autoprobes on a running machine are not recommended. */
-int __init
-init_module(void)
+static int __init ultra_init_module(void)
{
struct net_device *dev;
int this_dev, found = 0;
@@ -600,6 +598,7 @@ init_module(void)
return 0;
return -ENXIO;
}
+module_init(ultra_init_module);
static void cleanup_card(struct net_device *dev)
{
@@ -613,8 +612,7 @@ static void cleanup_card(struct net_device *dev)
iounmap(ei_status.mem);
}
-void __exit
-cleanup_module(void)
+static void __exit ultra_cleanup_module(void)
{
int this_dev;
@@ -627,4 +625,5 @@ cleanup_module(void)
}
}
}
+module_exit(ultra_cleanup_module);
#endif /* MODULE */
diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c
index c834123560f1..263a942d81fa 100644
--- a/drivers/net/ethernet/8390/wd.c
+++ b/drivers/net/ethernet/8390/wd.c
@@ -519,7 +519,7 @@ MODULE_LICENSE("GPL");
/* This is set up so that only a single autoprobe takes place per call.
ISA device autoprobes on a running machine are not recommended. */
-int __init init_module(void)
+static int __init wd_init_module(void)
{
struct net_device *dev;
int this_dev, found = 0;
@@ -548,6 +548,7 @@ int __init init_module(void)
return 0;
return -ENXIO;
}
+module_init(wd_init_module);
static void cleanup_card(struct net_device *dev)
{
@@ -556,8 +557,7 @@ static void cleanup_card(struct net_device *dev)
iounmap(ei_status.mem);
}
-void __exit
-cleanup_module(void)
+static void __exit wd_cleanup_module(void)
{
int this_dev;
@@ -570,4 +570,5 @@ cleanup_module(void)
}
}
}
+module_exit(wd_cleanup_module);
#endif /* MODULE */
diff --git a/drivers/net/ethernet/8390/xsurf100.c b/drivers/net/ethernet/8390/xsurf100.c
index e2c963821ffe..fe7a74707aa4 100644
--- a/drivers/net/ethernet/8390/xsurf100.c
+++ b/drivers/net/ethernet/8390/xsurf100.c
@@ -22,8 +22,6 @@
#define XS100_8390_DATA_WRITE32_BASE 0x0C80
#define XS100_8390_DATA_AREA_SIZE 0x80
-#define __NS8390_init ax_NS8390_init
-
/* force unsigned long back to 'void __iomem *' */
#define ax_convert_addr(_a) ((void __force __iomem *)(_a))
@@ -42,10 +40,7 @@
/* Ensure we have our RCR base value */
#define AX88796_PLATFORM
-static unsigned char version[] =
- "ax88796.c: Copyright 2005,2007 Simtec Electronics\n";
-
-#include "lib8390.c"
+#include "8390.h"
/* from ne.c */
#define NE_CMD EI_SHIFT(0x00)
@@ -232,7 +227,7 @@ static void xs100_block_output(struct net_device *dev, int count,
if (jiffies - dma_start > 2 * HZ / 100) { /* 20ms */
netdev_warn(dev, "timeout waiting for Tx RDC.\n");
ei_local->reset_8390(dev);
- ax_NS8390_init(dev, 1);
+ ax_NS8390_reinit(dev);
break;
}
}
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 1cdff1dca790..d796684ec9ca 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -118,6 +118,7 @@ config LANTIQ_XRX200
Support for the PMAC of the Gigabit switch (GSWIP) inside the
Lantiq / Intel VRX200 VDSL SoC
+source "drivers/net/ethernet/litex/Kconfig"
source "drivers/net/ethernet/marvell/Kconfig"
source "drivers/net/ethernet/mediatek/Kconfig"
source "drivers/net/ethernet/mellanox/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index cb3f9084a21b..aaa5078cd7d1 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_JME) += jme.o
obj-$(CONFIG_KORINA) += korina.o
obj-$(CONFIG_LANTIQ_ETOP) += lantiq_etop.o
obj-$(CONFIG_LANTIQ_XRX200) += lantiq_xrx200.o
+obj-$(CONFIG_NET_VENDOR_LITEX) += litex/
obj-$(CONFIG_NET_VENDOR_MARVELL) += marvell/
obj-$(CONFIG_NET_VENDOR_MEDIATEK) += mediatek/
obj-$(CONFIG_NET_VENDOR_MELLANOX) += mellanox/
diff --git a/drivers/net/ethernet/actions/Kconfig b/drivers/net/ethernet/actions/Kconfig
index ccad6a3f4d6f..f630cac2ab6c 100644
--- a/drivers/net/ethernet/actions/Kconfig
+++ b/drivers/net/ethernet/actions/Kconfig
@@ -2,8 +2,8 @@
config NET_VENDOR_ACTIONS
bool "Actions Semi devices"
- default y
- depends on ARCH_ACTIONS
+ depends on ARCH_ACTIONS || COMPILE_TEST
+ default ARCH_ACTIONS
help
If you have a network (Ethernet) card belonging to this class, say Y.
diff --git a/drivers/net/ethernet/actions/owl-emac.c b/drivers/net/ethernet/actions/owl-emac.c
index b8e771c2bc40..c4ecf4fcadf8 100644
--- a/drivers/net/ethernet/actions/owl-emac.c
+++ b/drivers/net/ethernet/actions/owl-emac.c
@@ -1179,8 +1179,8 @@ static int owl_emac_ndo_set_mac_addr(struct net_device *netdev, void *addr)
return owl_emac_setup_frame_xmit(netdev_priv(netdev));
}
-static int owl_emac_ndo_do_ioctl(struct net_device *netdev,
- struct ifreq *req, int cmd)
+static int owl_emac_ndo_eth_ioctl(struct net_device *netdev,
+ struct ifreq *req, int cmd)
{
if (!netif_running(netdev))
return -EINVAL;
@@ -1224,7 +1224,7 @@ static const struct net_device_ops owl_emac_netdev_ops = {
.ndo_set_rx_mode = owl_emac_ndo_set_rx_mode,
.ndo_set_mac_address = owl_emac_ndo_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = owl_emac_ndo_do_ioctl,
+ .ndo_eth_ioctl = owl_emac_ndo_eth_ioctl,
.ndo_tx_timeout = owl_emac_ndo_tx_timeout,
.ndo_get_stats = owl_emac_ndo_get_stats,
};
diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index 7965e5e3c985..e0f6cc910bd2 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c
@@ -625,7 +625,7 @@ static const struct net_device_ops netdev_ops = {
.ndo_tx_timeout = tx_timeout,
.ndo_get_stats = get_stats,
.ndo_set_rx_mode = set_rx_mode,
- .ndo_do_ioctl = netdev_ioctl,
+ .ndo_eth_ioctl = netdev_ioctl,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
#ifdef VLAN_SUPPORT
diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 41f8821f792d..920633161174 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c
@@ -3882,7 +3882,7 @@ static const struct net_device_ops et131x_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_get_stats = et131x_stats,
- .ndo_do_ioctl = phy_do_ioctl,
+ .ndo_eth_ioctl = phy_do_ioctl,
};
static int et131x_pci_setup(struct pci_dev *pdev,
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index f99ae317c188..037baea1c738 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -774,7 +774,7 @@ static const struct net_device_ops emac_netdev_ops = {
.ndo_start_xmit = emac_start_xmit,
.ndo_tx_timeout = emac_timeout,
.ndo_set_rx_mode = emac_set_rx_mode,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = emac_set_mac_address,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 27dae632efcb..13e745cf3781 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -357,7 +357,9 @@ static int ena_get_link_ksettings(struct net_device *netdev,
}
static int ena_get_coalesce(struct net_device *net_dev,
- struct ethtool_coalesce *coalesce)
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct ena_adapter *adapter = netdev_priv(net_dev);
struct ena_com_dev *ena_dev = adapter->ena_dev;
@@ -402,7 +404,9 @@ static void ena_update_rx_rings_nonadaptive_intr_moderation(struct ena_adapter *
}
static int ena_set_coalesce(struct net_device *net_dev,
- struct ethtool_coalesce *coalesce)
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct ena_adapter *adapter = netdev_priv(net_dev);
struct ena_com_dev *ena_dev = adapter->ena_dev;
diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig
index d0b0609bbe23..4786f0504691 100644
--- a/drivers/net/ethernet/amd/Kconfig
+++ b/drivers/net/ethernet/amd/Kconfig
@@ -46,6 +46,7 @@ config AMD8111_ETH
config LANCE
tristate "AMD LANCE and PCnet (AT1500 and NE2100) support"
depends on ISA && ISA_DMA_API && !ARM && !PPC32
+ select NETDEV_LEGACY_INIT
help
If you have a network (Ethernet) card of this type, say Y here.
Some LinkSys cards are of this type.
@@ -132,6 +133,7 @@ config PCMCIA_NMCLAN
config NI65
tristate "NI6510 support"
depends on ISA && ISA_DMA_API && !ARM && !PPC32
+ select NETDEV_LEGACY_INIT
help
If you have a network (Ethernet) card of this type, say Y here.
@@ -168,11 +170,11 @@ config AMD_XGBE
tristate "AMD 10GbE Ethernet driver"
depends on ((OF_NET && OF_ADDRESS) || ACPI || PCI) && HAS_IOMEM
depends on X86 || ARM64 || COMPILE_TEST
+ depends on PTP_1588_CLOCK_OPTIONAL
select BITREVERSE
select CRC32
select PHYLIB
select AMD_XGBE_HAVE_ECC if X86
- imply PTP_1588_CLOCK
help
This driver supports the AMD 10GbE Ethernet device found on an
AMD SoC.
diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 9cac5aa75a73..92e4246dc359 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -1729,7 +1729,7 @@ static const struct net_device_ops amd8111e_netdev_ops = {
.ndo_set_rx_mode = amd8111e_set_multicast_list,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = amd8111e_set_mac_address,
- .ndo_do_ioctl = amd8111e_ioctl,
+ .ndo_eth_ioctl = amd8111e_ioctl,
.ndo_change_mtu = amd8111e_change_mtu,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = amd8111e_poll,
diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c
index 36f54d13a2eb..9d2f49fd945e 100644
--- a/drivers/net/ethernet/amd/atarilance.c
+++ b/drivers/net/ethernet/amd/atarilance.c
@@ -367,7 +367,7 @@ static void *slow_memcpy( void *dst, const void *src, size_t len )
}
-struct net_device * __init atarilance_probe(int unit)
+struct net_device * __init atarilance_probe(void)
{
int i;
static int found;
@@ -382,10 +382,6 @@ struct net_device * __init atarilance_probe(int unit)
dev = alloc_etherdev(sizeof(struct lance_private));
if (!dev)
return ERR_PTR(-ENOMEM);
- if (unit >= 0) {
- sprintf(dev->name, "eth%d", unit);
- netdev_boot_setup_check(dev);
- }
for( i = 0; i < N_LANCE_ADDR; ++i ) {
if (lance_probe1( dev, &lance_addr_list[i] )) {
@@ -1137,13 +1133,11 @@ static int lance_set_mac_address( struct net_device *dev, void *addr )
return 0;
}
-
-#ifdef MODULE
static struct net_device *atarilance_dev;
static int __init atarilance_module_init(void)
{
- atarilance_dev = atarilance_probe(-1);
+ atarilance_dev = atarilance_probe();
return PTR_ERR_OR_ZERO(atarilance_dev);
}
@@ -1155,4 +1149,3 @@ static void __exit atarilance_module_exit(void)
}
module_init(atarilance_module_init);
module_exit(atarilance_module_exit);
-#endif /* MODULE */
diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index 19e195420e24..9c1636222b99 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -1051,7 +1051,7 @@ static const struct net_device_ops au1000_netdev_ops = {
.ndo_stop = au1000_close,
.ndo_start_xmit = au1000_tx,
.ndo_set_rx_mode = au1000_multicast_list,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_tx_timeout = au1000_tx_timeout,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c
index 2178e6b89dbd..945bf1d87507 100644
--- a/drivers/net/ethernet/amd/lance.c
+++ b/drivers/net/ethernet/amd/lance.c
@@ -327,7 +327,7 @@ MODULE_PARM_DESC(dma, "LANCE/PCnet ISA DMA channel (ignored for some devices)");
MODULE_PARM_DESC(irq, "LANCE/PCnet IRQ number (ignored for some devices)");
MODULE_PARM_DESC(lance_debug, "LANCE/PCnet debug level (0-7)");
-int __init init_module(void)
+static int __init lance_init_module(void)
{
struct net_device *dev;
int this_dev, found = 0;
@@ -356,6 +356,7 @@ int __init init_module(void)
return 0;
return -ENXIO;
}
+module_init(lance_init_module);
static void cleanup_card(struct net_device *dev)
{
@@ -368,7 +369,7 @@ static void cleanup_card(struct net_device *dev)
kfree(lp);
}
-void __exit cleanup_module(void)
+static void __exit lance_cleanup_module(void)
{
int this_dev;
@@ -381,6 +382,7 @@ void __exit cleanup_module(void)
}
}
}
+module_exit(lance_cleanup_module);
#endif /* MODULE */
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/amd/mvme147.c b/drivers/net/ethernet/amd/mvme147.c
index 3f2e4cdd0b83..da97fccea9ea 100644
--- a/drivers/net/ethernet/amd/mvme147.c
+++ b/drivers/net/ethernet/amd/mvme147.c
@@ -68,7 +68,7 @@ static const struct net_device_ops lance_netdev_ops = {
};
/* Initialise the one and only on-board 7990 */
-struct net_device * __init mvme147lance_probe(int unit)
+static struct net_device * __init mvme147lance_probe(void)
{
struct net_device *dev;
static int called;
@@ -86,9 +86,6 @@ struct net_device * __init mvme147lance_probe(int unit)
if (!dev)
return ERR_PTR(-ENOMEM);
- if (unit >= 0)
- sprintf(dev->name, "eth%d", unit);
-
/* Fill the dev fields */
dev->base_addr = (unsigned long)MVME147_LANCE_BASE;
dev->netdev_ops = &lance_netdev_ops;
@@ -179,22 +176,21 @@ static int m147lance_close(struct net_device *dev)
return 0;
}
-#ifdef MODULE
MODULE_LICENSE("GPL");
static struct net_device *dev_mvme147_lance;
-int __init init_module(void)
+static int __init m147lance_init(void)
{
- dev_mvme147_lance = mvme147lance_probe(-1);
+ dev_mvme147_lance = mvme147lance_probe();
return PTR_ERR_OR_ZERO(dev_mvme147_lance);
}
+module_init(m147lance_init);
-void __exit cleanup_module(void)
+static void __exit m147lance_exit(void)
{
struct m147lance_private *lp = netdev_priv(dev_mvme147_lance);
unregister_netdev(dev_mvme147_lance);
free_pages(lp->ram, 3);
free_netdev(dev_mvme147_lance);
}
-
-#endif /* MODULE */
+module_exit(m147lance_exit);
diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c
index 5c1cfb0c4a42..b5df7ad5a83f 100644
--- a/drivers/net/ethernet/amd/ni65.c
+++ b/drivers/net/ethernet/amd/ni65.c
@@ -1230,18 +1230,20 @@ MODULE_PARM_DESC(irq, "ni6510 IRQ number (ignored for some cards)");
MODULE_PARM_DESC(io, "ni6510 I/O base address");
MODULE_PARM_DESC(dma, "ni6510 ISA DMA channel (ignored for some cards)");
-int __init init_module(void)
+static int __init ni65_init_module(void)
{
dev_ni65 = ni65_probe(-1);
return PTR_ERR_OR_ZERO(dev_ni65);
}
+module_init(ni65_init_module);
-void __exit cleanup_module(void)
+static void __exit ni65_cleanup_module(void)
{
unregister_netdev(dev_ni65);
cleanup_card(dev_ni65);
free_netdev(dev_ni65);
}
+module_exit(ni65_cleanup_module);
#endif /* MODULE */
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index 4100ab07e6b7..70d76fdb9f56 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -1572,7 +1572,7 @@ static const struct net_device_ops pcnet32_netdev_ops = {
.ndo_tx_timeout = pcnet32_tx_timeout,
.ndo_get_stats = pcnet32_get_stats,
.ndo_set_rx_mode = pcnet32_set_multicast_list,
- .ndo_do_ioctl = pcnet32_ioctl,
+ .ndo_eth_ioctl = pcnet32_ioctl,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/amd/sun3lance.c b/drivers/net/ethernet/amd/sun3lance.c
index f8d7a9387a56..4a845bc071b2 100644
--- a/drivers/net/ethernet/amd/sun3lance.c
+++ b/drivers/net/ethernet/amd/sun3lance.c
@@ -245,7 +245,7 @@ static void set_multicast_list( struct net_device *dev );
/************************* End of Prototypes **************************/
-struct net_device * __init sun3lance_probe(int unit)
+static struct net_device * __init sun3lance_probe(void)
{
struct net_device *dev;
static int found;
@@ -272,10 +272,6 @@ struct net_device * __init sun3lance_probe(int unit)
dev = alloc_etherdev(sizeof(struct lance_private));
if (!dev)
return ERR_PTR(-ENOMEM);
- if (unit >= 0) {
- sprintf(dev->name, "eth%d", unit);
- netdev_boot_setup_check(dev);
- }
if (!lance_probe(dev))
goto out;
@@ -924,17 +920,16 @@ static void set_multicast_list( struct net_device *dev )
}
-#ifdef MODULE
-
static struct net_device *sun3lance_dev;
-int __init init_module(void)
+static int __init sun3lance_init(void)
{
- sun3lance_dev = sun3lance_probe(-1);
+ sun3lance_dev = sun3lance_probe();
return PTR_ERR_OR_ZERO(sun3lance_dev);
}
+module_init(sun3lance_init);
-void __exit cleanup_module(void)
+static void __exit sun3lance_cleanup(void)
{
unregister_netdev(sun3lance_dev);
#ifdef CONFIG_SUN3
@@ -942,6 +937,4 @@ void __exit cleanup_module(void)
#endif
free_netdev(sun3lance_dev);
}
-
-#endif /* MODULE */
-
+module_exit(sun3lance_cleanup);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 4f714f874c4f..17a585adfb49 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -2284,7 +2284,7 @@ static const struct net_device_ops xgbe_netdev_ops = {
.ndo_set_rx_mode = xgbe_set_rx_mode,
.ndo_set_mac_address = xgbe_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = xgbe_ioctl,
+ .ndo_eth_ioctl = xgbe_ioctl,
.ndo_change_mtu = xgbe_change_mtu,
.ndo_tx_timeout = xgbe_tx_timeout,
.ndo_get_stats64 = xgbe_get_stats64,
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index 61f39a0e04f9..bafc51c34e0b 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -428,7 +428,9 @@ static void xgbe_set_msglevel(struct net_device *netdev, u32 msglevel)
}
static int xgbe_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct xgbe_prv_data *pdata = netdev_priv(netdev);
@@ -443,7 +445,9 @@ static int xgbe_get_coalesce(struct net_device *netdev,
}
static int xgbe_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct xgbe_prv_data *pdata = netdev_priv(netdev);
struct xgbe_hw_if *hw_if = &pdata->hw_if;
diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c b/drivers/net/ethernet/apm/xgene-v2/main.c
index 860c18fb7aae..80399c8980bd 100644
--- a/drivers/net/ethernet/apm/xgene-v2/main.c
+++ b/drivers/net/ethernet/apm/xgene-v2/main.c
@@ -677,11 +677,13 @@ static int xge_probe(struct platform_device *pdev)
ret = register_netdev(ndev);
if (ret) {
netdev_err(ndev, "Failed to register netdev\n");
- goto err;
+ goto err_mdio_remove;
}
return 0;
+err_mdio_remove:
+ xge_mdio_remove(ndev);
err:
free_netdev(ndev);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index de2a9348bc3f..a9ef0544e30f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -547,7 +547,9 @@ static int aq_ethtool_set_rxnfc(struct net_device *ndev,
}
static int aq_ethtool_get_coalesce(struct net_device *ndev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct aq_nic_s *aq_nic = netdev_priv(ndev);
struct aq_nic_cfg_s *cfg;
@@ -571,7 +573,9 @@ static int aq_ethtool_get_coalesce(struct net_device *ndev,
}
static int aq_ethtool_set_coalesce(struct net_device *ndev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct aq_nic_s *aq_nic = netdev_priv(ndev);
struct aq_nic_cfg_s *cfg;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index 4af0cd9530de..e22935ce9573 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -421,7 +421,7 @@ static const struct net_device_ops aq_ndev_ops = {
.ndo_change_mtu = aq_ndev_change_mtu,
.ndo_set_mac_address = aq_ndev_set_mac_address,
.ndo_set_features = aq_ndev_set_features,
- .ndo_do_ioctl = aq_ndev_ioctl,
+ .ndo_eth_ioctl = aq_ndev_ioctl,
.ndo_vlan_rx_add_vid = aq_ndo_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = aq_ndo_vlan_rx_kill_vid,
.ndo_setup_tc = aq_ndo_setup_tc,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index 59253846e885..dee9ff74d6d6 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -119,16 +119,10 @@ static int aq_pci_func_init(struct pci_dev *pdev)
{
int err;
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
- if (!err)
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (err)
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (err) {
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
- if (!err)
- err = pci_set_consistent_dma_mask(pdev,
- DMA_BIT_MASK(32));
- }
- if (err != 0) {
err = -ENOSR;
goto err_exit;
}
@@ -417,6 +411,9 @@ static int atl_resume_common(struct device *dev, bool deep)
pci_restore_state(pdev);
if (deep) {
+ /* Reinitialize Nic/Vecs objects */
+ aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol);
+
ret = aq_nic_init(nic);
if (ret)
goto err_exit;
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index 67b8113a2b53..38c288ec9059 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -844,7 +844,7 @@ static const struct net_device_ops arc_emac_netdev_ops = {
.ndo_set_mac_address = arc_emac_set_address,
.ndo_get_stats = arc_emac_stats,
.ndo_set_rx_mode = arc_emac_set_rx_mode,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = arc_emac_poll_controller,
#endif
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
index 1ba81b1eb6fd..02ae98aabf91 100644
--- a/drivers/net/ethernet/atheros/ag71xx.c
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -1851,7 +1851,7 @@ static const struct net_device_ops ag71xx_netdev_ops = {
.ndo_open = ag71xx_open,
.ndo_stop = ag71xx_stop,
.ndo_start_xmit = ag71xx_hard_start_xmit,
- .ndo_do_ioctl = phy_do_ioctl,
+ .ndo_eth_ioctl = phy_do_ioctl,
.ndo_tx_timeout = ag71xx_tx_timeout,
.ndo_change_mtu = ag71xx_change_mtu,
.ndo_set_mac_address = eth_mac_addr,
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 11ef1fbe7aee..4ea157efca86 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1701,7 +1701,7 @@ static const struct net_device_ops alx_netdev_ops = {
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = alx_set_mac_address,
.ndo_change_mtu = alx_change_mtu,
- .ndo_do_ioctl = alx_ioctl,
+ .ndo_eth_ioctl = alx_ioctl,
.ndo_tx_timeout = alx_tx_timeout,
.ndo_fix_features = alx_fix_features,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 1c6246a5dc22..3b51b172b317 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2609,7 +2609,7 @@ static const struct net_device_ops atl1c_netdev_ops = {
.ndo_change_mtu = atl1c_change_mtu,
.ndo_fix_features = atl1c_fix_features,
.ndo_set_features = atl1c_set_features,
- .ndo_do_ioctl = atl1c_ioctl,
+ .ndo_eth_ioctl = atl1c_ioctl,
.ndo_tx_timeout = atl1c_tx_timeout,
.ndo_get_stats = atl1c_get_stats,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index 2eb0a2ab69f6..753973ac922e 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -2247,7 +2247,7 @@ static const struct net_device_ops atl1e_netdev_ops = {
.ndo_fix_features = atl1e_fix_features,
.ndo_set_features = atl1e_set_features,
.ndo_change_mtu = atl1e_change_mtu,
- .ndo_do_ioctl = atl1e_ioctl,
+ .ndo_eth_ioctl = atl1e_ioctl,
.ndo_tx_timeout = atl1e_tx_timeout,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = atl1e_netpoll,
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index c67201a13cf5..68f6c0bbd945 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -2885,7 +2885,7 @@ static const struct net_device_ops atl1_netdev_ops = {
.ndo_change_mtu = atl1_change_mtu,
.ndo_fix_features = atlx_fix_features,
.ndo_set_features = atlx_set_features,
- .ndo_do_ioctl = atlx_ioctl,
+ .ndo_eth_ioctl = atlx_ioctl,
.ndo_tx_timeout = atlx_tx_timeout,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = atl1_poll_controller,
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index 0cc0db04c27d..b69298ddb647 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -1293,7 +1293,7 @@ static const struct net_device_ops atl2_netdev_ops = {
.ndo_change_mtu = atl2_change_mtu,
.ndo_fix_features = atl2_fix_features,
.ndo_set_features = atl2_set_features,
- .ndo_do_ioctl = atl2_ioctl,
+ .ndo_eth_ioctl = atl2_ioctl,
.ndo_tx_timeout = atl2_tx_timeout,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = atl2_poll_controller,
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 1a02ca600b71..56e0fb07aec7 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -122,8 +122,8 @@ config SB1250_MAC
config TIGON3
tristate "Broadcom Tigon3 support"
depends on PCI
+ depends on PTP_1588_CLOCK_OPTIONAL
select PHYLIB
- imply PTP_1588_CLOCK
help
This driver supports Broadcom Tigon3 based gigabit Ethernet cards.
@@ -140,7 +140,7 @@ config TIGON3_HWMON
config BNX2X
tristate "Broadcom NetXtremeII 10Gb support"
depends on PCI
- imply PTP_1588_CLOCK
+ depends on PTP_1588_CLOCK_OPTIONAL
select FW_LOADER
select ZLIB_INFLATE
select LIBCRC32C
@@ -206,7 +206,7 @@ config SYSTEMPORT
config BNXT
tristate "Broadcom NetXtreme-C/E support"
depends on PCI
- imply PTP_1588_CLOCK
+ depends on PTP_1588_CLOCK_OPTIONAL
select FW_LOADER
select LIBCRC32C
select NET_DEVLINK
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index ad2655efe423..fa784953c601 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -2198,7 +2198,7 @@ static const struct net_device_ops b44_netdev_ops = {
.ndo_set_rx_mode = b44_set_rx_mode,
.ndo_set_mac_address = b44_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = b44_ioctl,
+ .ndo_eth_ioctl = b44_ioctl,
.ndo_tx_timeout = b44_tx_timeout,
.ndo_change_mtu = b44_change_mtu,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 977f097fc7bf..d56886300ecf 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -1699,7 +1699,7 @@ static const struct net_device_ops bcm_enet_ops = {
.ndo_start_xmit = bcm_enet_start_xmit,
.ndo_set_mac_address = bcm_enet_set_mac_address,
.ndo_set_rx_mode = bcm_enet_set_multicast_list,
- .ndo_do_ioctl = bcm_enet_ioctl,
+ .ndo_eth_ioctl = bcm_enet_ioctl,
.ndo_change_mtu = bcm_enet_change_mtu,
};
@@ -2446,7 +2446,7 @@ static const struct net_device_ops bcm_enetsw_ops = {
.ndo_stop = bcm_enetsw_stop,
.ndo_start_xmit = bcm_enet_start_xmit,
.ndo_change_mtu = bcm_enet_change_mtu,
- .ndo_do_ioctl = bcm_enetsw_ioctl,
+ .ndo_eth_ioctl = bcm_enetsw_ioctl,
};
@@ -2649,7 +2649,6 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
if (!res_mem || irq_rx < 0)
return -ENODEV;
- ret = 0;
dev = alloc_etherdev(sizeof(*priv));
if (!dev)
return -ENOMEM;
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index d9f0f0df8f7b..7fa1b695400d 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -607,7 +607,9 @@ static void bcm_sysport_set_tx_coalesce(struct bcm_sysport_tx_ring *ring,
}
static int bcm_sysport_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct bcm_sysport_priv *priv = netdev_priv(dev);
u32 reg;
@@ -627,7 +629,9 @@ static int bcm_sysport_get_coalesce(struct net_device *dev,
}
static int bcm_sysport_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct bcm_sysport_priv *priv = netdev_priv(dev);
struct dim_cq_moder moder;
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 075f6e146b29..fe4d99abd548 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1263,7 +1263,7 @@ static const struct net_device_ops bgmac_netdev_ops = {
.ndo_set_rx_mode = bgmac_set_rx_mode,
.ndo_set_mac_address = bgmac_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_change_mtu = bgmac_change_mtu,
};
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index bee6cfad9fc6..a705e2615307 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -2730,7 +2730,7 @@ bnx2_alloc_rx_page(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index, gf
if (!page)
return -ENOMEM;
mapping = dma_map_page(&bp->pdev->dev, page, 0, PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
if (dma_mapping_error(&bp->pdev->dev, mapping)) {
__free_page(page);
return -EIO;
@@ -2753,7 +2753,7 @@ bnx2_free_rx_page(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index)
return;
dma_unmap_page(&bp->pdev->dev, dma_unmap_addr(rx_pg, mapping),
- PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ PAGE_SIZE, DMA_FROM_DEVICE);
__free_page(page);
rx_pg->page = NULL;
@@ -2775,7 +2775,7 @@ bnx2_alloc_rx_data(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index, gf
mapping = dma_map_single(&bp->pdev->dev,
get_l2_fhdr(data),
bp->rx_buf_use_size,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
if (dma_mapping_error(&bp->pdev->dev, mapping)) {
kfree(data);
return -EIO;
@@ -2881,7 +2881,7 @@ bnx2_tx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
}
dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(tx_buf, mapping),
- skb_headlen(skb), PCI_DMA_TODEVICE);
+ skb_headlen(skb), DMA_TO_DEVICE);
tx_buf->skb = NULL;
last = tx_buf->nr_frags;
@@ -2895,7 +2895,7 @@ bnx2_tx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
dma_unmap_page(&bp->pdev->dev,
dma_unmap_addr(tx_buf, mapping),
skb_frag_size(&skb_shinfo(skb)->frags[i]),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
}
sw_cons = BNX2_NEXT_TX_BD(sw_cons);
@@ -3003,7 +3003,7 @@ bnx2_reuse_rx_data(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr,
dma_sync_single_for_device(&bp->pdev->dev,
dma_unmap_addr(cons_rx_buf, mapping),
- BNX2_RX_OFFSET + BNX2_RX_COPY_THRESH, PCI_DMA_FROMDEVICE);
+ BNX2_RX_OFFSET + BNX2_RX_COPY_THRESH, DMA_FROM_DEVICE);
rxr->rx_prod_bseq += bp->rx_buf_use_size;
@@ -3044,7 +3044,7 @@ error:
}
dma_unmap_single(&bp->pdev->dev, dma_addr, bp->rx_buf_use_size,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
skb = build_skb(data, 0);
if (!skb) {
kfree(data);
@@ -3110,7 +3110,7 @@ error:
}
dma_unmap_page(&bp->pdev->dev, mapping_old,
- PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ PAGE_SIZE, DMA_FROM_DEVICE);
frag_size -= frag_len;
skb->data_len += frag_len;
@@ -3180,7 +3180,7 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
dma_sync_single_for_cpu(&bp->pdev->dev, dma_addr,
BNX2_RX_OFFSET + BNX2_RX_COPY_THRESH,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
next_ring_idx = BNX2_RX_RING_IDX(BNX2_NEXT_RX_BD(sw_cons));
next_rx_buf = &rxr->rx_buf_ring[next_ring_idx];
@@ -5449,7 +5449,7 @@ bnx2_free_tx_skbs(struct bnx2 *bp)
dma_unmap_single(&bp->pdev->dev,
dma_unmap_addr(tx_buf, mapping),
skb_headlen(skb),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
tx_buf->skb = NULL;
@@ -5460,7 +5460,7 @@ bnx2_free_tx_skbs(struct bnx2 *bp)
dma_unmap_page(&bp->pdev->dev,
dma_unmap_addr(tx_buf, mapping),
skb_frag_size(&skb_shinfo(skb)->frags[k]),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
}
dev_kfree_skb(skb);
}
@@ -5491,7 +5491,7 @@ bnx2_free_rx_skbs(struct bnx2 *bp)
dma_unmap_single(&bp->pdev->dev,
dma_unmap_addr(rx_buf, mapping),
bp->rx_buf_use_size,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
rx_buf->data = NULL;
@@ -5843,7 +5843,7 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
packet[i] = (unsigned char) (i & 0xff);
map = dma_map_single(&bp->pdev->dev, skb->data, pkt_size,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
if (dma_mapping_error(&bp->pdev->dev, map)) {
dev_kfree_skb(skb);
return -EIO;
@@ -5882,7 +5882,7 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
udelay(5);
- dma_unmap_single(&bp->pdev->dev, map, pkt_size, PCI_DMA_TODEVICE);
+ dma_unmap_single(&bp->pdev->dev, map, pkt_size, DMA_TO_DEVICE);
dev_kfree_skb(skb);
if (bnx2_get_hw_tx_cons(tx_napi) != txr->tx_prod)
@@ -5901,7 +5901,7 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
dma_sync_single_for_cpu(&bp->pdev->dev,
dma_unmap_addr(rx_buf, mapping),
- bp->rx_buf_use_size, PCI_DMA_FROMDEVICE);
+ bp->rx_buf_use_size, DMA_FROM_DEVICE);
if (rx_hdr->l2_fhdr_status &
(L2_FHDR_ERRORS_BAD_CRC |
@@ -6660,7 +6660,8 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
} else
mss = 0;
- mapping = dma_map_single(&bp->pdev->dev, skb->data, len, PCI_DMA_TODEVICE);
+ mapping = dma_map_single(&bp->pdev->dev, skb->data, len,
+ DMA_TO_DEVICE);
if (dma_mapping_error(&bp->pdev->dev, mapping)) {
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
@@ -6741,7 +6742,7 @@ dma_error:
tx_buf = &txr->tx_buf_ring[ring_prod];
tx_buf->skb = NULL;
dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(tx_buf, mapping),
- skb_headlen(skb), PCI_DMA_TODEVICE);
+ skb_headlen(skb), DMA_TO_DEVICE);
/* unmap remaining mapped pages */
for (i = 0; i < last_frag; i++) {
@@ -6750,7 +6751,7 @@ dma_error:
tx_buf = &txr->tx_buf_ring[ring_prod];
dma_unmap_page(&bp->pdev->dev, dma_unmap_addr(tx_buf, mapping),
skb_frag_size(&skb_shinfo(skb)->frags[i]),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
}
dev_kfree_skb_any(skb);
@@ -7241,8 +7242,10 @@ bnx2_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
return rc;
}
-static int
-bnx2_get_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
+static int bnx2_get_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct bnx2 *bp = netdev_priv(dev);
@@ -7263,8 +7266,10 @@ bnx2_get_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
return 0;
}
-static int
-bnx2_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
+static int bnx2_set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct bnx2 *bp = netdev_priv(dev);
@@ -8041,21 +8046,16 @@ bnx2_read_vpd_fw_ver(struct bnx2 *bp)
#define BNX2_VPD_LEN 128
#define BNX2_MAX_VER_SLEN 30
- data = kmalloc(256, GFP_KERNEL);
+ data = kmalloc(BNX2_VPD_LEN, GFP_KERNEL);
if (!data)
return;
- rc = bnx2_nvram_read(bp, BNX2_VPD_NVRAM_OFFSET, data + BNX2_VPD_LEN,
- BNX2_VPD_LEN);
+ rc = bnx2_nvram_read(bp, BNX2_VPD_NVRAM_OFFSET, data, BNX2_VPD_LEN);
if (rc)
goto vpd_done;
- for (i = 0; i < BNX2_VPD_LEN; i += 4) {
- data[i] = data[i + BNX2_VPD_LEN + 3];
- data[i + 1] = data[i + BNX2_VPD_LEN + 2];
- data[i + 2] = data[i + BNX2_VPD_LEN + 1];
- data[i + 3] = data[i + BNX2_VPD_LEN];
- }
+ for (i = 0; i < BNX2_VPD_LEN; i += 4)
+ swab32s((u32 *)&data[i]);
i = pci_vpd_find_tag(data, BNX2_VPD_LEN, PCI_VPD_LRDT_RO_DATA);
if (i < 0)
@@ -8224,15 +8224,15 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
persist_dma_mask = dma_mask = DMA_BIT_MASK(64);
/* Configure DMA attributes. */
- if (pci_set_dma_mask(pdev, dma_mask) == 0) {
+ if (dma_set_mask(&pdev->dev, dma_mask) == 0) {
dev->features |= NETIF_F_HIGHDMA;
- rc = pci_set_consistent_dma_mask(pdev, persist_dma_mask);
+ rc = dma_set_coherent_mask(&pdev->dev, persist_dma_mask);
if (rc) {
dev_err(&pdev->dev,
"pci_set_consistent_dma_mask failed, aborting\n");
goto err_out_unmap;
}
- } else if ((rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) != 0) {
+ } else if ((rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) != 0) {
dev_err(&pdev->dev, "System does not support DMA, aborting\n");
goto err_out_unmap;
}
@@ -8546,7 +8546,7 @@ static const struct net_device_ops bnx2_netdev_ops = {
.ndo_stop = bnx2_close,
.ndo_get_stats64 = bnx2_get_stats64,
.ndo_set_rx_mode = bnx2_set_rx_mode,
- .ndo_do_ioctl = bnx2_ioctl,
+ .ndo_eth_ioctl = bnx2_ioctl,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = bnx2_change_mac_addr,
.ndo_change_mtu = bnx2_change_mtu,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 32245bbe88a8..472a3a478038 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -1878,7 +1878,9 @@ static int bnx2x_set_eeprom(struct net_device *dev,
}
static int bnx2x_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct bnx2x *bp = netdev_priv(dev);
@@ -1891,7 +1893,9 @@ static int bnx2x_get_coalesce(struct net_device *dev,
}
static int bnx2x_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct bnx2x *bp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 2acbc73dcd18..6d98134913cd 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -13048,7 +13048,7 @@ static const struct net_device_ops bnx2x_netdev_ops = {
.ndo_set_rx_mode = bnx2x_set_rx_mode,
.ndo_set_mac_address = bnx2x_change_mac_addr,
.ndo_validate_addr = bnx2x_validate_addr,
- .ndo_do_ioctl = bnx2x_ioctl,
+ .ndo_eth_ioctl = bnx2x_ioctl,
.ndo_change_mtu = bnx2x_change_mtu,
.ndo_fix_features = bnx2x_fix_features,
.ndo_set_features = bnx2x_set_features,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 27943b0446c2..f255fd0b16db 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1858,7 +1858,6 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
{
int i;
int first_queue_query_index, num_queues_req;
- dma_addr_t cur_data_offset;
struct stats_query_entry *cur_query_entry;
u8 stats_count = 0;
bool is_fcoe = false;
@@ -1879,10 +1878,6 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
BNX2X_NUM_ETH_QUEUES(bp), is_fcoe, first_queue_query_index,
first_queue_query_index + num_queues_req);
- cur_data_offset = bp->fw_stats_data_mapping +
- offsetof(struct bnx2x_fw_stats_data, queue_stats) +
- num_queues_req * sizeof(struct per_queue_stats);
-
cur_query_entry = &bp->fw_stats_req->
query[first_queue_query_index + num_queues_req];
@@ -1933,7 +1928,6 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
cur_query_entry->funcID,
j, cur_query_entry->index);
cur_query_entry++;
- cur_data_offset += sizeof(struct per_queue_stats);
stats_count++;
/* all stats are coalesced to the leading queue */
diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile
index 2b8ae687b3c1..c6ef7ec2c115 100644
--- a/drivers/net/ethernet/broadcom/bnxt/Makefile
+++ b/drivers/net/ethernet/broadcom/bnxt/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_BNXT) += bnxt_en.o
-bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_ptp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o
+bnxt_en-y := bnxt.o bnxt_hwrm.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_ptp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o
bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o
bnxt_en-$(CONFIG_DEBUG_FS) += bnxt_debugfs.o
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 89606587b156..627f85ee3922 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -60,6 +60,7 @@
#include "bnxt_hsi.h"
#include "bnxt.h"
+#include "bnxt_hwrm.h"
#include "bnxt_ulp.h"
#include "bnxt_sriov.h"
#include "bnxt_ethtool.h"
@@ -72,7 +73,8 @@
#include "bnxt_debugfs.h"
#define BNXT_TX_TIMEOUT (5 * HZ)
-#define BNXT_DEF_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_HW)
+#define BNXT_DEF_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_HW | \
+ NETIF_MSG_TX_ERR)
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Broadcom BCM573xx network driver");
@@ -275,8 +277,11 @@ static const u16 bnxt_async_events_arr[] = {
ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY,
ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY,
ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION,
+ ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE,
ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG,
ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST,
+ ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP,
+ ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT,
};
static struct workqueue_struct *bnxt_pf_wq;
@@ -365,6 +370,33 @@ static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb)
return md_dst->u.port_info.port_id;
}
+static void bnxt_txr_db_kick(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
+ u16 prod)
+{
+ bnxt_db_write(bp, &txr->tx_db, prod);
+ txr->kick_pending = 0;
+}
+
+static bool bnxt_txr_netif_try_stop_queue(struct bnxt *bp,
+ struct bnxt_tx_ring_info *txr,
+ struct netdev_queue *txq)
+{
+ netif_tx_stop_queue(txq);
+
+ /* netif_tx_stop_queue() must be done before checking
+ * tx index in bnxt_tx_avail() below, because in
+ * bnxt_tx_int(), we update tx index before checking for
+ * netif_tx_queue_stopped().
+ */
+ smp_mb();
+ if (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh) {
+ netif_tx_wake_queue(txq);
+ return false;
+ }
+
+ return true;
+}
+
static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct bnxt *bp = netdev_priv(dev);
@@ -384,6 +416,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
i = skb_get_queue_mapping(skb);
if (unlikely(i >= bp->tx_nr_rings)) {
dev_kfree_skb_any(skb);
+ atomic_long_inc(&dev->tx_dropped);
return NETDEV_TX_OK;
}
@@ -393,8 +426,12 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
free_size = bnxt_tx_avail(bp, txr);
if (unlikely(free_size < skb_shinfo(skb)->nr_frags + 2)) {
- netif_tx_stop_queue(txq);
- return NETDEV_TX_BUSY;
+ /* We must have raced with NAPI cleanup */
+ if (net_ratelimit() && txr->kick_pending)
+ netif_warn(bp, tx_err, dev,
+ "bnxt: ring busy w/ flush pending!\n");
+ if (bnxt_txr_netif_try_stop_queue(bp, txr, txq))
+ return NETDEV_TX_BUSY;
}
length = skb->len;
@@ -426,7 +463,10 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (ptp && ptp->tx_tstamp_en && !skb_is_gso(skb) &&
atomic_dec_if_positive(&ptp->tx_avail) >= 0) {
- if (!bnxt_ptp_parse(skb, &ptp->tx_seqid)) {
+ if (!bnxt_ptp_parse(skb, &ptp->tx_seqid,
+ &ptp->tx_hdr_off)) {
+ if (vlan_tag_flags)
+ ptp->tx_hdr_off += VLAN_HLEN;
lflags |= cpu_to_le32(TX_BD_FLAGS_STAMP);
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
} else {
@@ -514,21 +554,16 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
normal_tx:
if (length < BNXT_MIN_PKT_SIZE) {
pad = BNXT_MIN_PKT_SIZE - length;
- if (skb_pad(skb, pad)) {
+ if (skb_pad(skb, pad))
/* SKB already freed. */
- tx_buf->skb = NULL;
- return NETDEV_TX_OK;
- }
+ goto tx_kick_pending;
length = BNXT_MIN_PKT_SIZE;
}
mapping = dma_map_single(&pdev->dev, skb->data, len, DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(&pdev->dev, mapping))) {
- dev_kfree_skb_any(skb);
- tx_buf->skb = NULL;
- return NETDEV_TX_OK;
- }
+ if (unlikely(dma_mapping_error(&pdev->dev, mapping)))
+ goto tx_free;
dma_unmap_addr_set(tx_buf, mapping, mapping);
flags = (len << TX_BD_LEN_SHIFT) | TX_BD_TYPE_LONG_TX_BD |
@@ -615,24 +650,17 @@ normal_tx:
txr->tx_prod = prod;
if (!netdev_xmit_more() || netif_xmit_stopped(txq))
- bnxt_db_write(bp, &txr->tx_db, prod);
+ bnxt_txr_db_kick(bp, txr, prod);
+ else
+ txr->kick_pending = 1;
tx_done:
if (unlikely(bnxt_tx_avail(bp, txr) <= MAX_SKB_FRAGS + 1)) {
if (netdev_xmit_more() && !tx_buf->is_push)
- bnxt_db_write(bp, &txr->tx_db, prod);
-
- netif_tx_stop_queue(txq);
+ bnxt_txr_db_kick(bp, txr, prod);
- /* netif_tx_stop_queue() must be done before checking
- * tx index in bnxt_tx_avail() below, because in
- * bnxt_tx_int(), we update tx index before checking for
- * netif_tx_queue_stopped().
- */
- smp_mb();
- if (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh)
- netif_tx_wake_queue(txq);
+ bnxt_txr_netif_try_stop_queue(bp, txr, txq);
}
return NETDEV_TX_OK;
@@ -645,9 +673,8 @@ tx_dma_error:
/* start back at beginning and unmap skb */
prod = txr->tx_prod;
tx_buf = &txr->tx_buf_ring[prod];
- tx_buf->skb = NULL;
dma_unmap_single(&pdev->dev, dma_unmap_addr(tx_buf, mapping),
- skb_headlen(skb), PCI_DMA_TODEVICE);
+ skb_headlen(skb), DMA_TO_DEVICE);
prod = NEXT_TX(prod);
/* unmap remaining mapped pages */
@@ -656,10 +683,16 @@ tx_dma_error:
tx_buf = &txr->tx_buf_ring[prod];
dma_unmap_page(&pdev->dev, dma_unmap_addr(tx_buf, mapping),
skb_frag_size(&skb_shinfo(skb)->frags[i]),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
}
+tx_free:
dev_kfree_skb_any(skb);
+tx_kick_pending:
+ if (txr->kick_pending)
+ bnxt_txr_db_kick(bp, txr, txr->tx_prod);
+ txr->tx_buf_ring[txr->tx_prod].skb = NULL;
+ atomic_long_inc(&dev->tx_dropped);
return NETDEV_TX_OK;
}
@@ -689,7 +722,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
}
dma_unmap_single(&pdev->dev, dma_unmap_addr(tx_buf, mapping),
- skb_headlen(skb), PCI_DMA_TODEVICE);
+ skb_headlen(skb), DMA_TO_DEVICE);
last = tx_buf->nr_frags;
for (j = 0; j < last; j++) {
@@ -699,7 +732,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
&pdev->dev,
dma_unmap_addr(tx_buf, mapping),
skb_frag_size(&skb_shinfo(skb)->frags[j]),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
}
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
if (bp->flags & BNXT_FLAG_CHIP_P5) {
@@ -729,14 +762,9 @@ next_tx_int:
smp_mb();
if (unlikely(netif_tx_queue_stopped(txq)) &&
- (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh)) {
- __netif_tx_lock(txq, smp_processor_id());
- if (netif_tx_queue_stopped(txq) &&
- bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh &&
- txr->dev_state != BNXT_DEV_STATE_CLOSING)
- netif_tx_wake_queue(txq);
- __netif_tx_unlock(txq);
- }
+ bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh &&
+ READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING)
+ netif_tx_wake_queue(txq);
}
static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
@@ -877,7 +905,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp,
}
mapping = dma_map_page_attrs(&pdev->dev, page, offset,
- BNXT_RX_PAGE_SIZE, PCI_DMA_FROMDEVICE,
+ BNXT_RX_PAGE_SIZE, DMA_FROM_DEVICE,
DMA_ATTR_WEAK_ORDERING);
if (dma_mapping_error(&pdev->dev, mapping)) {
__free_page(page);
@@ -1117,7 +1145,7 @@ static struct sk_buff *bnxt_rx_pages(struct bnxt *bp,
}
dma_unmap_page_attrs(&pdev->dev, mapping, BNXT_RX_PAGE_SIZE,
- PCI_DMA_FROMDEVICE,
+ DMA_FROM_DEVICE,
DMA_ATTR_WEAK_ORDERING);
skb->data_len += frag_len;
@@ -1625,6 +1653,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping);
if (!skb) {
bnxt_abort_tpa(cpr, idx, agg_bufs);
+ cpr->sw_stats.rx.rx_oom_discards += 1;
return NULL;
}
} else {
@@ -1634,6 +1663,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
new_data = __bnxt_alloc_rx_data(bp, &new_mapping, GFP_ATOMIC);
if (!new_data) {
bnxt_abort_tpa(cpr, idx, agg_bufs);
+ cpr->sw_stats.rx.rx_oom_discards += 1;
return NULL;
}
@@ -1649,6 +1679,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
if (!skb) {
kfree(data);
bnxt_abort_tpa(cpr, idx, agg_bufs);
+ cpr->sw_stats.rx.rx_oom_discards += 1;
return NULL;
}
skb_reserve(skb, bp->rx_offset);
@@ -1659,6 +1690,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
skb = bnxt_rx_pages(bp, cpr, skb, idx, agg_bufs, true);
if (!skb) {
/* Page reuse already handled by bnxt_rx_pages(). */
+ cpr->sw_stats.rx.rx_oom_discards += 1;
return NULL;
}
}
@@ -1764,6 +1796,10 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
return -EBUSY;
+ /* The valid test of the entry must be done first before
+ * reading any further.
+ */
+ dma_rmb();
prod = rxr->rx_prod;
if (cmp_type == CMP_TYPE_RX_L2_TPA_START_CMP) {
@@ -1858,6 +1894,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
if (agg_bufs)
bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0,
agg_bufs, false);
+ cpr->sw_stats.rx.rx_oom_discards += 1;
rc = -ENOMEM;
goto next_rx;
}
@@ -1871,6 +1908,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
skb = bp->rx_skb_func(bp, rxr, cons, data, data_ptr, dma_addr,
payload | len);
if (!skb) {
+ cpr->sw_stats.rx.rx_oom_discards += 1;
rc = -ENOMEM;
goto next_rx;
}
@@ -1879,6 +1917,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
if (agg_bufs) {
skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs, false);
if (!skb) {
+ cpr->sw_stats.rx.rx_oom_discards += 1;
rc = -ENOMEM;
goto next_rx;
}
@@ -1973,6 +2012,7 @@ static int bnxt_force_rx_discard(struct bnxt *bp,
struct rx_cmp *rxcmp;
u16 cp_cons;
u8 cmp_type;
+ int rc;
cp_cons = RING_CMP(tmp_raw_cons);
rxcmp = (struct rx_cmp *)
@@ -1986,6 +2026,10 @@ static int bnxt_force_rx_discard(struct bnxt *bp,
if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
return -EBUSY;
+ /* The valid test of the entry must be done first before
+ * reading any further.
+ */
+ dma_rmb();
cmp_type = RX_CMP_TYPE(rxcmp);
if (cmp_type == CMP_TYPE_RX_L2_CMP) {
rxcmp1->rx_cmp_cfa_code_errors_v2 |=
@@ -1997,7 +2041,10 @@ static int bnxt_force_rx_discard(struct bnxt *bp,
tpa_end1->rx_tpa_end_cmp_errors_v2 |=
cpu_to_le32(RX_TPA_END_CMP_ERRORS);
}
- return bnxt_rx_pkt(bp, cpr, raw_cons, event);
+ rc = bnxt_rx_pkt(bp, cpr, raw_cons, event);
+ if (rc && rc != -EBUSY)
+ cpr->sw_stats.rx.rx_netpoll_discards += 1;
+ return rc;
}
u32 bnxt_fw_health_readl(struct bnxt *bp, int reg_idx)
@@ -2042,6 +2089,19 @@ static u16 bnxt_agg_ring_id_to_grp_idx(struct bnxt *bp, u16 ring_id)
return INVALID_HW_RING_ID;
}
+static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
+{
+ switch (BNXT_EVENT_ERROR_REPORT_TYPE(data1)) {
+ case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL:
+ netdev_err(bp->dev, "1PPS: Received invalid signal on pin%lu from the external source. Please fix the signal and reconfigure the pin\n",
+ BNXT_EVENT_INVALID_SIGNAL_DATA(data2));
+ break;
+ default:
+ netdev_err(bp->dev, "FW reported unknown error type\n");
+ break;
+ }
+}
+
#define BNXT_GET_EVENT_PORT(data) \
((data) & \
ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK)
@@ -2202,6 +2262,20 @@ static int bnxt_async_event_process(struct bnxt *bp,
}
goto async_event_process_exit;
}
+ case ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP: {
+ bnxt_ptp_pps_event(bp, data1, data2);
+ goto async_event_process_exit;
+ }
+ case ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT: {
+ bnxt_event_error_report(bp, data1, data2);
+ goto async_event_process_exit;
+ }
+ case ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE: {
+ u16 seq_id = le32_to_cpu(cmpl->event_data2) & 0xffff;
+
+ hwrm_update_token(bp, seq_id, BNXT_HWRM_DEFERRED);
+ goto async_event_process_exit;
+ }
default:
goto async_event_process_exit;
}
@@ -2221,10 +2295,7 @@ static int bnxt_hwrm_handler(struct bnxt *bp, struct tx_cmp *txcmp)
switch (cmpl_type) {
case CMPL_BASE_TYPE_HWRM_DONE:
seq_id = le16_to_cpu(h_cmpl->sequence_id);
- if (seq_id == bp->hwrm_intr_seq_id)
- bp->hwrm_intr_seq_id = (u16)~bp->hwrm_intr_seq_id;
- else
- netdev_err(bp->dev, "Invalid hwrm seq id %d\n", seq_id);
+ hwrm_update_token(bp, seq_id, BNXT_HWRM_COMPLETE);
break;
case CMPL_BASE_TYPE_HWRM_FWD_REQ:
@@ -2451,6 +2522,10 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget)
if (!TX_CMP_VALID(txcmp, raw_cons))
break;
+ /* The valid test of the entry must be done first before
+ * reading any further.
+ */
+ dma_rmb();
if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) {
tmp_raw_cons = NEXT_RAW_CMP(raw_cons);
cp_cons = RING_CMP(tmp_raw_cons);
@@ -2654,7 +2729,7 @@ static void bnxt_free_tx_skbs(struct bnxt *bp)
dma_unmap_single(&pdev->dev,
dma_unmap_addr(tx_buf, mapping),
dma_unmap_len(tx_buf, len),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
xdp_return_frame(tx_buf->xdpf);
tx_buf->action = 0;
tx_buf->xdpf = NULL;
@@ -2679,7 +2754,7 @@ static void bnxt_free_tx_skbs(struct bnxt *bp)
dma_unmap_single(&pdev->dev,
dma_unmap_addr(tx_buf, mapping),
skb_headlen(skb),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
last = tx_buf->nr_frags;
j += 2;
@@ -2691,7 +2766,7 @@ static void bnxt_free_tx_skbs(struct bnxt *bp)
dma_unmap_page(
&pdev->dev,
dma_unmap_addr(tx_buf, mapping),
- skb_frag_size(frag), PCI_DMA_TODEVICE);
+ skb_frag_size(frag), DMA_TO_DEVICE);
}
dev_kfree_skb(skb);
}
@@ -2758,7 +2833,7 @@ skip_rx_tpa_free:
continue;
dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping,
- BNXT_RX_PAGE_SIZE, PCI_DMA_FROMDEVICE,
+ BNXT_RX_PAGE_SIZE, DMA_FROM_DEVICE,
DMA_ATTR_WEAK_ORDERING);
rx_agg_buf->page = NULL;
@@ -3140,6 +3215,58 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
return 0;
}
+static void bnxt_free_cp_arrays(struct bnxt_cp_ring_info *cpr)
+{
+ kfree(cpr->cp_desc_ring);
+ cpr->cp_desc_ring = NULL;
+ kfree(cpr->cp_desc_mapping);
+ cpr->cp_desc_mapping = NULL;
+}
+
+static int bnxt_alloc_cp_arrays(struct bnxt_cp_ring_info *cpr, int n)
+{
+ cpr->cp_desc_ring = kcalloc(n, sizeof(*cpr->cp_desc_ring), GFP_KERNEL);
+ if (!cpr->cp_desc_ring)
+ return -ENOMEM;
+ cpr->cp_desc_mapping = kcalloc(n, sizeof(*cpr->cp_desc_mapping),
+ GFP_KERNEL);
+ if (!cpr->cp_desc_mapping)
+ return -ENOMEM;
+ return 0;
+}
+
+static void bnxt_free_all_cp_arrays(struct bnxt *bp)
+{
+ int i;
+
+ if (!bp->bnapi)
+ return;
+ for (i = 0; i < bp->cp_nr_rings; i++) {
+ struct bnxt_napi *bnapi = bp->bnapi[i];
+
+ if (!bnapi)
+ continue;
+ bnxt_free_cp_arrays(&bnapi->cp_ring);
+ }
+}
+
+static int bnxt_alloc_all_cp_arrays(struct bnxt *bp)
+{
+ int i, n = bp->cp_nr_pages;
+
+ for (i = 0; i < bp->cp_nr_rings; i++) {
+ struct bnxt_napi *bnapi = bp->bnapi[i];
+ int rc;
+
+ if (!bnapi)
+ continue;
+ rc = bnxt_alloc_cp_arrays(&bnapi->cp_ring, n);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
static void bnxt_free_cp_rings(struct bnxt *bp)
{
int i;
@@ -3167,6 +3294,7 @@ static void bnxt_free_cp_rings(struct bnxt *bp)
if (cpr2) {
ring = &cpr2->cp_ring_struct;
bnxt_free_ring(bp, &ring->ring_mem);
+ bnxt_free_cp_arrays(cpr2);
kfree(cpr2);
cpr->cp_ring_arr[j] = NULL;
}
@@ -3185,6 +3313,12 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp)
if (!cpr)
return NULL;
+ rc = bnxt_alloc_cp_arrays(cpr, bp->cp_nr_pages);
+ if (rc) {
+ bnxt_free_cp_arrays(cpr);
+ kfree(cpr);
+ return NULL;
+ }
ring = &cpr->cp_ring_struct;
rmem = &ring->ring_mem;
rmem->nr_pages = bp->cp_nr_pages;
@@ -3195,6 +3329,7 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp)
rc = bnxt_alloc_ring(bp, rmem);
if (rc) {
bnxt_free_ring(bp, rmem);
+ bnxt_free_cp_arrays(cpr);
kfree(cpr);
cpr = NULL;
}
@@ -3627,9 +3762,15 @@ void bnxt_set_ring_params(struct bnxt *bp)
if (jumbo_factor > agg_factor)
agg_factor = jumbo_factor;
}
- agg_ring_size = ring_size * agg_factor;
+ if (agg_factor) {
+ if (ring_size > BNXT_MAX_RX_DESC_CNT_JUM_ENA) {
+ ring_size = BNXT_MAX_RX_DESC_CNT_JUM_ENA;
+ netdev_warn(bp->dev, "RX ring size reduced from %d to %d because the jumbo ring is now enabled\n",
+ bp->rx_ring_size, ring_size);
+ bp->rx_ring_size = ring_size;
+ }
+ agg_ring_size = ring_size * agg_factor;
- if (agg_ring_size) {
bp->rx_agg_nr_pages = bnxt_calc_nr_ring_pages(agg_ring_size,
RX_DESC_CNT);
if (bp->rx_agg_nr_pages > MAX_RX_AGG_PAGES) {
@@ -3819,77 +3960,26 @@ out:
static void bnxt_free_hwrm_resources(struct bnxt *bp)
{
- struct pci_dev *pdev = bp->pdev;
-
- if (bp->hwrm_cmd_resp_addr) {
- dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr,
- bp->hwrm_cmd_resp_dma_addr);
- bp->hwrm_cmd_resp_addr = NULL;
- }
-
- if (bp->hwrm_cmd_kong_resp_addr) {
- dma_free_coherent(&pdev->dev, PAGE_SIZE,
- bp->hwrm_cmd_kong_resp_addr,
- bp->hwrm_cmd_kong_resp_dma_addr);
- bp->hwrm_cmd_kong_resp_addr = NULL;
- }
-}
-
-static int bnxt_alloc_kong_hwrm_resources(struct bnxt *bp)
-{
- struct pci_dev *pdev = bp->pdev;
-
- if (bp->hwrm_cmd_kong_resp_addr)
- return 0;
+ struct bnxt_hwrm_wait_token *token;
- bp->hwrm_cmd_kong_resp_addr =
- dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
- &bp->hwrm_cmd_kong_resp_dma_addr,
- GFP_KERNEL);
- if (!bp->hwrm_cmd_kong_resp_addr)
- return -ENOMEM;
+ dma_pool_destroy(bp->hwrm_dma_pool);
+ bp->hwrm_dma_pool = NULL;
- return 0;
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(token, &bp->hwrm_pending_list, node)
+ WRITE_ONCE(token->state, BNXT_HWRM_CANCELLED);
+ rcu_read_unlock();
}
static int bnxt_alloc_hwrm_resources(struct bnxt *bp)
{
- struct pci_dev *pdev = bp->pdev;
-
- bp->hwrm_cmd_resp_addr = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
- &bp->hwrm_cmd_resp_dma_addr,
- GFP_KERNEL);
- if (!bp->hwrm_cmd_resp_addr)
+ bp->hwrm_dma_pool = dma_pool_create("bnxt_hwrm", &bp->pdev->dev,
+ BNXT_HWRM_DMA_SIZE,
+ BNXT_HWRM_DMA_ALIGN, 0);
+ if (!bp->hwrm_dma_pool)
return -ENOMEM;
- return 0;
-}
-
-static void bnxt_free_hwrm_short_cmd_req(struct bnxt *bp)
-{
- if (bp->hwrm_short_cmd_req_addr) {
- struct pci_dev *pdev = bp->pdev;
-
- dma_free_coherent(&pdev->dev, bp->hwrm_max_ext_req_len,
- bp->hwrm_short_cmd_req_addr,
- bp->hwrm_short_cmd_req_dma_addr);
- bp->hwrm_short_cmd_req_addr = NULL;
- }
-}
-
-static int bnxt_alloc_hwrm_short_cmd_req(struct bnxt *bp)
-{
- struct pci_dev *pdev = bp->pdev;
-
- if (bp->hwrm_short_cmd_req_addr)
- return 0;
-
- bp->hwrm_short_cmd_req_addr =
- dma_alloc_coherent(&pdev->dev, bp->hwrm_max_ext_req_len,
- &bp->hwrm_short_cmd_req_dma_addr,
- GFP_KERNEL);
- if (!bp->hwrm_short_cmd_req_addr)
- return -ENOMEM;
+ INIT_HLIST_HEAD(&bp->hwrm_pending_list);
return 0;
}
@@ -3950,8 +4040,8 @@ static void bnxt_copy_hw_masks(u64 *mask_arr, __le64 *hw_mask_arr, int count)
static int bnxt_hwrm_func_qstat_ext(struct bnxt *bp,
struct bnxt_stats_mem *stats)
{
- struct hwrm_func_qstats_ext_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_func_qstats_ext_input req = {0};
+ struct hwrm_func_qstats_ext_output *resp;
+ struct hwrm_func_qstats_ext_input *req;
__le64 *hw_masks;
int rc;
@@ -3959,19 +4049,20 @@ static int bnxt_hwrm_func_qstat_ext(struct bnxt *bp,
!(bp->flags & BNXT_FLAG_CHIP_P5))
return -EOPNOTSUPP;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QSTATS_EXT, -1, -1);
- req.fid = cpu_to_le16(0xffff);
- req.flags = FUNC_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK;
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_QSTATS_EXT);
if (rc)
- goto qstat_exit;
+ return rc;
- hw_masks = &resp->rx_ucast_pkts;
- bnxt_copy_hw_masks(stats->hw_masks, hw_masks, stats->len / 8);
+ req->fid = cpu_to_le16(0xffff);
+ req->flags = FUNC_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK;
-qstat_exit:
- mutex_unlock(&bp->hwrm_cmd_lock);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
+ if (!rc) {
+ hw_masks = &resp->rx_ucast_pkts;
+ bnxt_copy_hw_masks(stats->hw_masks, hw_masks, stats->len / 8);
+ }
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -4230,6 +4321,7 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init)
bnxt_free_tx_rings(bp);
bnxt_free_rx_rings(bp);
bnxt_free_cp_rings(bp);
+ bnxt_free_all_cp_arrays(bp);
bnxt_free_ntp_fltrs(bp, irq_re_init);
if (irq_re_init) {
bnxt_free_ring_stats(bp);
@@ -4350,6 +4442,10 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init)
goto alloc_mem_err;
}
+ rc = bnxt_alloc_all_cp_arrays(bp);
+ if (rc)
+ goto alloc_mem_err;
+
bnxt_init_ring_struct(bp);
rc = bnxt_alloc_rx_rings(bp);
@@ -4432,313 +4528,38 @@ static void bnxt_enable_int(struct bnxt *bp)
}
}
-void bnxt_hwrm_cmd_hdr_init(struct bnxt *bp, void *request, u16 req_type,
- u16 cmpl_ring, u16 target_id)
-{
- struct input *req = request;
-
- req->req_type = cpu_to_le16(req_type);
- req->cmpl_ring = cpu_to_le16(cmpl_ring);
- req->target_id = cpu_to_le16(target_id);
- if (bnxt_kong_hwrm_message(bp, req))
- req->resp_addr = cpu_to_le64(bp->hwrm_cmd_kong_resp_dma_addr);
- else
- req->resp_addr = cpu_to_le64(bp->hwrm_cmd_resp_dma_addr);
-}
-
-static int bnxt_hwrm_to_stderr(u32 hwrm_err)
-{
- switch (hwrm_err) {
- case HWRM_ERR_CODE_SUCCESS:
- return 0;
- case HWRM_ERR_CODE_RESOURCE_LOCKED:
- return -EROFS;
- case HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED:
- return -EACCES;
- case HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR:
- return -ENOSPC;
- case HWRM_ERR_CODE_INVALID_PARAMS:
- case HWRM_ERR_CODE_INVALID_FLAGS:
- case HWRM_ERR_CODE_INVALID_ENABLES:
- case HWRM_ERR_CODE_UNSUPPORTED_TLV:
- case HWRM_ERR_CODE_UNSUPPORTED_OPTION_ERR:
- return -EINVAL;
- case HWRM_ERR_CODE_NO_BUFFER:
- return -ENOMEM;
- case HWRM_ERR_CODE_HOT_RESET_PROGRESS:
- case HWRM_ERR_CODE_BUSY:
- return -EAGAIN;
- case HWRM_ERR_CODE_CMD_NOT_SUPPORTED:
- return -EOPNOTSUPP;
- default:
- return -EIO;
- }
-}
-
-static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
- int timeout, bool silent)
-{
- int i, intr_process, rc, tmo_count;
- struct input *req = msg;
- u32 *data = msg;
- u8 *valid;
- u16 cp_ring_id, len = 0;
- struct hwrm_err_output *resp = bp->hwrm_cmd_resp_addr;
- u16 max_req_len = BNXT_HWRM_MAX_REQ_LEN;
- struct hwrm_short_input short_input = {0};
- u32 doorbell_offset = BNXT_GRCPF_REG_CHIMP_COMM_TRIGGER;
- u32 bar_offset = BNXT_GRCPF_REG_CHIMP_COMM;
- u16 dst = BNXT_HWRM_CHNL_CHIMP;
-
- if (BNXT_NO_FW_ACCESS(bp) &&
- le16_to_cpu(req->req_type) != HWRM_FUNC_RESET)
- return -EBUSY;
-
- if (msg_len > BNXT_HWRM_MAX_REQ_LEN) {
- if (msg_len > bp->hwrm_max_ext_req_len ||
- !bp->hwrm_short_cmd_req_addr)
- return -EINVAL;
- }
-
- if (bnxt_hwrm_kong_chnl(bp, req)) {
- dst = BNXT_HWRM_CHNL_KONG;
- bar_offset = BNXT_GRCPF_REG_KONG_COMM;
- doorbell_offset = BNXT_GRCPF_REG_KONG_COMM_TRIGGER;
- resp = bp->hwrm_cmd_kong_resp_addr;
- }
-
- memset(resp, 0, PAGE_SIZE);
- cp_ring_id = le16_to_cpu(req->cmpl_ring);
- intr_process = (cp_ring_id == INVALID_HW_RING_ID) ? 0 : 1;
-
- req->seq_id = cpu_to_le16(bnxt_get_hwrm_seq_id(bp, dst));
- /* currently supports only one outstanding message */
- if (intr_process)
- bp->hwrm_intr_seq_id = le16_to_cpu(req->seq_id);
-
- if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
- msg_len > BNXT_HWRM_MAX_REQ_LEN) {
- void *short_cmd_req = bp->hwrm_short_cmd_req_addr;
- u16 max_msg_len;
-
- /* Set boundary for maximum extended request length for short
- * cmd format. If passed up from device use the max supported
- * internal req length.
- */
- max_msg_len = bp->hwrm_max_ext_req_len;
-
- memcpy(short_cmd_req, req, msg_len);
- if (msg_len < max_msg_len)
- memset(short_cmd_req + msg_len, 0,
- max_msg_len - msg_len);
-
- short_input.req_type = req->req_type;
- short_input.signature =
- cpu_to_le16(SHORT_REQ_SIGNATURE_SHORT_CMD);
- short_input.size = cpu_to_le16(msg_len);
- short_input.req_addr =
- cpu_to_le64(bp->hwrm_short_cmd_req_dma_addr);
-
- data = (u32 *)&short_input;
- msg_len = sizeof(short_input);
-
- /* Sync memory write before updating doorbell */
- wmb();
-
- max_req_len = BNXT_HWRM_SHORT_REQ_LEN;
- }
-
- /* Write request msg to hwrm channel */
- __iowrite32_copy(bp->bar0 + bar_offset, data, msg_len / 4);
-
- for (i = msg_len; i < max_req_len; i += 4)
- writel(0, bp->bar0 + bar_offset + i);
-
- /* Ring channel doorbell */
- writel(1, bp->bar0 + doorbell_offset);
-
- if (!pci_is_enabled(bp->pdev))
- return -ENODEV;
-
- if (!timeout)
- timeout = DFLT_HWRM_CMD_TIMEOUT;
- /* Limit timeout to an upper limit */
- timeout = min(timeout, HWRM_CMD_MAX_TIMEOUT);
- /* convert timeout to usec */
- timeout *= 1000;
-
- i = 0;
- /* Short timeout for the first few iterations:
- * number of loops = number of loops for short timeout +
- * number of loops for standard timeout.
- */
- tmo_count = HWRM_SHORT_TIMEOUT_COUNTER;
- timeout = timeout - HWRM_SHORT_MIN_TIMEOUT * HWRM_SHORT_TIMEOUT_COUNTER;
- tmo_count += DIV_ROUND_UP(timeout, HWRM_MIN_TIMEOUT);
-
- if (intr_process) {
- u16 seq_id = bp->hwrm_intr_seq_id;
-
- /* Wait until hwrm response cmpl interrupt is processed */
- while (bp->hwrm_intr_seq_id != (u16)~seq_id &&
- i++ < tmo_count) {
- /* Abort the wait for completion if the FW health
- * check has failed.
- */
- if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
- return -EBUSY;
- /* on first few passes, just barely sleep */
- if (i < HWRM_SHORT_TIMEOUT_COUNTER) {
- usleep_range(HWRM_SHORT_MIN_TIMEOUT,
- HWRM_SHORT_MAX_TIMEOUT);
- } else {
- if (HWRM_WAIT_MUST_ABORT(bp, req))
- break;
- usleep_range(HWRM_MIN_TIMEOUT,
- HWRM_MAX_TIMEOUT);
- }
- }
-
- if (bp->hwrm_intr_seq_id != (u16)~seq_id) {
- if (!silent)
- netdev_err(bp->dev, "Resp cmpl intr err msg: 0x%x\n",
- le16_to_cpu(req->req_type));
- return -EBUSY;
- }
- len = le16_to_cpu(resp->resp_len);
- valid = ((u8 *)resp) + len - 1;
- } else {
- int j;
-
- /* Check if response len is updated */
- for (i = 0; i < tmo_count; i++) {
- /* Abort the wait for completion if the FW health
- * check has failed.
- */
- if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
- return -EBUSY;
- len = le16_to_cpu(resp->resp_len);
- if (len)
- break;
- /* on first few passes, just barely sleep */
- if (i < HWRM_SHORT_TIMEOUT_COUNTER) {
- usleep_range(HWRM_SHORT_MIN_TIMEOUT,
- HWRM_SHORT_MAX_TIMEOUT);
- } else {
- if (HWRM_WAIT_MUST_ABORT(bp, req))
- goto timeout_abort;
- usleep_range(HWRM_MIN_TIMEOUT,
- HWRM_MAX_TIMEOUT);
- }
- }
-
- if (i >= tmo_count) {
-timeout_abort:
- if (!silent)
- netdev_err(bp->dev, "Error (timeout: %d) msg {0x%x 0x%x} len:%d\n",
- HWRM_TOTAL_TIMEOUT(i),
- le16_to_cpu(req->req_type),
- le16_to_cpu(req->seq_id), len);
- return -EBUSY;
- }
-
- /* Last byte of resp contains valid bit */
- valid = ((u8 *)resp) + len - 1;
- for (j = 0; j < HWRM_VALID_BIT_DELAY_USEC; j++) {
- /* make sure we read from updated DMA memory */
- dma_rmb();
- if (*valid)
- break;
- usleep_range(1, 5);
- }
-
- if (j >= HWRM_VALID_BIT_DELAY_USEC) {
- if (!silent)
- netdev_err(bp->dev, "Error (timeout: %d) msg {0x%x 0x%x} len:%d v:%d\n",
- HWRM_TOTAL_TIMEOUT(i),
- le16_to_cpu(req->req_type),
- le16_to_cpu(req->seq_id), len,
- *valid);
- return -EBUSY;
- }
- }
-
- /* Zero valid bit for compatibility. Valid bit in an older spec
- * may become a new field in a newer spec. We must make sure that
- * a new field not implemented by old spec will read zero.
- */
- *valid = 0;
- rc = le16_to_cpu(resp->error_code);
- if (rc && !silent)
- netdev_err(bp->dev, "hwrm req_type 0x%x seq id 0x%x error 0x%x\n",
- le16_to_cpu(resp->req_type),
- le16_to_cpu(resp->seq_id), rc);
- return bnxt_hwrm_to_stderr(rc);
-}
-
-int _hwrm_send_message(struct bnxt *bp, void *msg, u32 msg_len, int timeout)
-{
- return bnxt_hwrm_do_send_msg(bp, msg, msg_len, timeout, false);
-}
-
-int _hwrm_send_message_silent(struct bnxt *bp, void *msg, u32 msg_len,
- int timeout)
-{
- return bnxt_hwrm_do_send_msg(bp, msg, msg_len, timeout, true);
-}
-
-int hwrm_send_message(struct bnxt *bp, void *msg, u32 msg_len, int timeout)
-{
- int rc;
-
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, msg, msg_len, timeout);
- mutex_unlock(&bp->hwrm_cmd_lock);
- return rc;
-}
-
-int hwrm_send_message_silent(struct bnxt *bp, void *msg, u32 msg_len,
- int timeout)
-{
- int rc;
-
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = bnxt_hwrm_do_send_msg(bp, msg, msg_len, timeout, true);
- mutex_unlock(&bp->hwrm_cmd_lock);
- return rc;
-}
-
int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size,
bool async_only)
{
- struct hwrm_func_drv_rgtr_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_func_drv_rgtr_input req = {0};
DECLARE_BITMAP(async_events_bmap, 256);
u32 *events = (u32 *)async_events_bmap;
+ struct hwrm_func_drv_rgtr_output *resp;
+ struct hwrm_func_drv_rgtr_input *req;
u32 flags;
int rc, i;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_DRV_RGTR, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_DRV_RGTR);
+ if (rc)
+ return rc;
- req.enables =
- cpu_to_le32(FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE |
- FUNC_DRV_RGTR_REQ_ENABLES_VER |
- FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD);
+ req->enables = cpu_to_le32(FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE |
+ FUNC_DRV_RGTR_REQ_ENABLES_VER |
+ FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD);
- req.os_type = cpu_to_le16(FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX);
+ req->os_type = cpu_to_le16(FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX);
flags = FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE;
if (bp->fw_cap & BNXT_FW_CAP_HOT_RESET)
flags |= FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT;
if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)
flags |= FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT |
FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT;
- req.flags = cpu_to_le32(flags);
- req.ver_maj_8b = DRV_VER_MAJ;
- req.ver_min_8b = DRV_VER_MIN;
- req.ver_upd_8b = DRV_VER_UPD;
- req.ver_maj = cpu_to_le16(DRV_VER_MAJ);
- req.ver_min = cpu_to_le16(DRV_VER_MIN);
- req.ver_upd = cpu_to_le16(DRV_VER_UPD);
+ req->flags = cpu_to_le32(flags);
+ req->ver_maj_8b = DRV_VER_MAJ;
+ req->ver_min_8b = DRV_VER_MIN;
+ req->ver_upd_8b = DRV_VER_UPD;
+ req->ver_maj = cpu_to_le16(DRV_VER_MAJ);
+ req->ver_min = cpu_to_le16(DRV_VER_MIN);
+ req->ver_upd = cpu_to_le16(DRV_VER_UPD);
if (BNXT_PF(bp)) {
u32 data[8];
@@ -4755,14 +4576,14 @@ int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size,
}
for (i = 0; i < 8; i++)
- req.vf_req_fwd[i] = cpu_to_le32(data[i]);
+ req->vf_req_fwd[i] = cpu_to_le32(data[i]);
- req.enables |=
+ req->enables |=
cpu_to_le32(FUNC_DRV_RGTR_REQ_ENABLES_VF_REQ_FWD);
}
if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
- req.flags |= cpu_to_le32(
+ req->flags |= cpu_to_le32(
FUNC_DRV_RGTR_REQ_FLAGS_FLOW_HANDLE_64BIT_MODE);
memset(async_events_bmap, 0, sizeof(async_events_bmap));
@@ -4781,57 +4602,63 @@ int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size,
}
}
for (i = 0; i < 8; i++)
- req.async_event_fwd[i] |= cpu_to_le32(events[i]);
+ req->async_event_fwd[i] |= cpu_to_le32(events[i]);
if (async_only)
- req.enables =
+ req->enables =
cpu_to_le32(FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc) {
set_bit(BNXT_STATE_DRV_REGISTERED, &bp->state);
if (resp->flags &
cpu_to_le32(FUNC_DRV_RGTR_RESP_FLAGS_IF_CHANGE_SUPPORTED))
bp->fw_cap |= BNXT_FW_CAP_IF_CHANGE;
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
static int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp)
{
- struct hwrm_func_drv_unrgtr_input req = {0};
+ struct hwrm_func_drv_unrgtr_input *req;
+ int rc;
if (!test_and_clear_bit(BNXT_STATE_DRV_REGISTERED, &bp->state))
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_DRV_UNRGTR, -1, -1);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_DRV_UNRGTR);
+ if (rc)
+ return rc;
+ return hwrm_req_send(bp, req);
}
static int bnxt_hwrm_tunnel_dst_port_free(struct bnxt *bp, u8 tunnel_type)
{
- u32 rc = 0;
- struct hwrm_tunnel_dst_port_free_input req = {0};
+ struct hwrm_tunnel_dst_port_free_input *req;
+ int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TUNNEL_DST_PORT_FREE, -1, -1);
- req.tunnel_type = tunnel_type;
+ rc = hwrm_req_init(bp, req, HWRM_TUNNEL_DST_PORT_FREE);
+ if (rc)
+ return rc;
+
+ req->tunnel_type = tunnel_type;
switch (tunnel_type) {
case TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN:
- req.tunnel_dst_port_id = cpu_to_le16(bp->vxlan_fw_dst_port_id);
+ req->tunnel_dst_port_id = cpu_to_le16(bp->vxlan_fw_dst_port_id);
bp->vxlan_fw_dst_port_id = INVALID_HW_RING_ID;
break;
case TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE:
- req.tunnel_dst_port_id = cpu_to_le16(bp->nge_fw_dst_port_id);
+ req->tunnel_dst_port_id = cpu_to_le16(bp->nge_fw_dst_port_id);
bp->nge_fw_dst_port_id = INVALID_HW_RING_ID;
break;
default:
break;
}
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, req);
if (rc)
netdev_err(bp->dev, "hwrm_tunnel_dst_port_free failed. rc:%d\n",
rc);
@@ -4841,17 +4668,19 @@ static int bnxt_hwrm_tunnel_dst_port_free(struct bnxt *bp, u8 tunnel_type)
static int bnxt_hwrm_tunnel_dst_port_alloc(struct bnxt *bp, __be16 port,
u8 tunnel_type)
{
- u32 rc = 0;
- struct hwrm_tunnel_dst_port_alloc_input req = {0};
- struct hwrm_tunnel_dst_port_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+ struct hwrm_tunnel_dst_port_alloc_output *resp;
+ struct hwrm_tunnel_dst_port_alloc_input *req;
+ int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TUNNEL_DST_PORT_ALLOC, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_TUNNEL_DST_PORT_ALLOC);
+ if (rc)
+ return rc;
- req.tunnel_type = tunnel_type;
- req.tunnel_dst_port_val = port;
+ req->tunnel_type = tunnel_type;
+ req->tunnel_dst_port_val = port;
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (rc) {
netdev_err(bp->dev, "hwrm_tunnel_dst_port_alloc failed. rc:%d\n",
rc);
@@ -4871,33 +4700,40 @@ static int bnxt_hwrm_tunnel_dst_port_alloc(struct bnxt *bp, __be16 port,
}
err_out:
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
static int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, u16 vnic_id)
{
- struct hwrm_cfa_l2_set_rx_mask_input req = {0};
+ struct hwrm_cfa_l2_set_rx_mask_input *req;
struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
+ int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_L2_SET_RX_MASK, -1, -1);
- req.vnic_id = cpu_to_le32(vnic->fw_vnic_id);
+ rc = hwrm_req_init(bp, req, HWRM_CFA_L2_SET_RX_MASK);
+ if (rc)
+ return rc;
- req.num_mc_entries = cpu_to_le32(vnic->mc_list_count);
- req.mc_tbl_addr = cpu_to_le64(vnic->mc_list_mapping);
- req.mask = cpu_to_le32(vnic->rx_mask);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->vnic_id = cpu_to_le32(vnic->fw_vnic_id);
+ req->num_mc_entries = cpu_to_le32(vnic->mc_list_count);
+ req->mc_tbl_addr = cpu_to_le64(vnic->mc_list_mapping);
+ req->mask = cpu_to_le32(vnic->rx_mask);
+ return hwrm_req_send_silent(bp, req);
}
#ifdef CONFIG_RFS_ACCEL
static int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp,
struct bnxt_ntuple_filter *fltr)
{
- struct hwrm_cfa_ntuple_filter_free_input req = {0};
+ struct hwrm_cfa_ntuple_filter_free_input *req;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_CFA_NTUPLE_FILTER_FREE);
+ if (rc)
+ return rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_NTUPLE_FILTER_FREE, -1, -1);
- req.ntuple_filter_id = fltr->filter_id;
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->ntuple_filter_id = fltr->filter_id;
+ return hwrm_req_send(bp, req);
}
#define BNXT_NTP_FLTR_FLAGS \
@@ -4922,69 +4758,70 @@ static int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp,
static int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp,
struct bnxt_ntuple_filter *fltr)
{
- struct hwrm_cfa_ntuple_filter_alloc_input req = {0};
struct hwrm_cfa_ntuple_filter_alloc_output *resp;
+ struct hwrm_cfa_ntuple_filter_alloc_input *req;
struct flow_keys *keys = &fltr->fkeys;
struct bnxt_vnic_info *vnic;
u32 flags = 0;
- int rc = 0;
+ int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_NTUPLE_FILTER_ALLOC, -1, -1);
- req.l2_filter_id = bp->vnic_info[0].fw_l2_filter_id[fltr->l2_fltr_idx];
+ rc = hwrm_req_init(bp, req, HWRM_CFA_NTUPLE_FILTER_ALLOC);
+ if (rc)
+ return rc;
+
+ req->l2_filter_id = bp->vnic_info[0].fw_l2_filter_id[fltr->l2_fltr_idx];
if (bp->fw_cap & BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2) {
flags = CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DEST_RFS_RING_IDX;
- req.dst_id = cpu_to_le16(fltr->rxq);
+ req->dst_id = cpu_to_le16(fltr->rxq);
} else {
vnic = &bp->vnic_info[fltr->rxq + 1];
- req.dst_id = cpu_to_le16(vnic->fw_vnic_id);
+ req->dst_id = cpu_to_le16(vnic->fw_vnic_id);
}
- req.flags = cpu_to_le32(flags);
- req.enables = cpu_to_le32(BNXT_NTP_FLTR_FLAGS);
+ req->flags = cpu_to_le32(flags);
+ req->enables = cpu_to_le32(BNXT_NTP_FLTR_FLAGS);
- req.ethertype = htons(ETH_P_IP);
- memcpy(req.src_macaddr, fltr->src_mac_addr, ETH_ALEN);
- req.ip_addr_type = CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
- req.ip_protocol = keys->basic.ip_proto;
+ req->ethertype = htons(ETH_P_IP);
+ memcpy(req->src_macaddr, fltr->src_mac_addr, ETH_ALEN);
+ req->ip_addr_type = CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
+ req->ip_protocol = keys->basic.ip_proto;
if (keys->basic.n_proto == htons(ETH_P_IPV6)) {
int i;
- req.ethertype = htons(ETH_P_IPV6);
- req.ip_addr_type =
+ req->ethertype = htons(ETH_P_IPV6);
+ req->ip_addr_type =
CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6;
- *(struct in6_addr *)&req.src_ipaddr[0] =
+ *(struct in6_addr *)&req->src_ipaddr[0] =
keys->addrs.v6addrs.src;
- *(struct in6_addr *)&req.dst_ipaddr[0] =
+ *(struct in6_addr *)&req->dst_ipaddr[0] =
keys->addrs.v6addrs.dst;
for (i = 0; i < 4; i++) {
- req.src_ipaddr_mask[i] = cpu_to_be32(0xffffffff);
- req.dst_ipaddr_mask[i] = cpu_to_be32(0xffffffff);
+ req->src_ipaddr_mask[i] = cpu_to_be32(0xffffffff);
+ req->dst_ipaddr_mask[i] = cpu_to_be32(0xffffffff);
}
} else {
- req.src_ipaddr[0] = keys->addrs.v4addrs.src;
- req.src_ipaddr_mask[0] = cpu_to_be32(0xffffffff);
- req.dst_ipaddr[0] = keys->addrs.v4addrs.dst;
- req.dst_ipaddr_mask[0] = cpu_to_be32(0xffffffff);
+ req->src_ipaddr[0] = keys->addrs.v4addrs.src;
+ req->src_ipaddr_mask[0] = cpu_to_be32(0xffffffff);
+ req->dst_ipaddr[0] = keys->addrs.v4addrs.dst;
+ req->dst_ipaddr_mask[0] = cpu_to_be32(0xffffffff);
}
if (keys->control.flags & FLOW_DIS_ENCAPSULATION) {
- req.enables |= cpu_to_le32(BNXT_NTP_TUNNEL_FLTR_FLAG);
- req.tunnel_type =
+ req->enables |= cpu_to_le32(BNXT_NTP_TUNNEL_FLTR_FLAG);
+ req->tunnel_type =
CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL;
}
- req.src_port = keys->ports.src;
- req.src_port_mask = cpu_to_be16(0xffff);
- req.dst_port = keys->ports.dst;
- req.dst_port_mask = cpu_to_be16(0xffff);
+ req->src_port = keys->ports.src;
+ req->src_port_mask = cpu_to_be16(0xffff);
+ req->dst_port = keys->ports.dst;
+ req->dst_port_mask = cpu_to_be16(0xffff);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
- if (!rc) {
- resp = bnxt_get_hwrm_resp_addr(bp, &req);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
+ if (!rc)
fltr->filter_id = resp->ntuple_filter_id;
- }
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
#endif
@@ -4992,62 +4829,62 @@ static int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp,
static int bnxt_hwrm_set_vnic_filter(struct bnxt *bp, u16 vnic_id, u16 idx,
u8 *mac_addr)
{
- u32 rc = 0;
- struct hwrm_cfa_l2_filter_alloc_input req = {0};
- struct hwrm_cfa_l2_filter_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+ struct hwrm_cfa_l2_filter_alloc_output *resp;
+ struct hwrm_cfa_l2_filter_alloc_input *req;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_CFA_L2_FILTER_ALLOC);
+ if (rc)
+ return rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_L2_FILTER_ALLOC, -1, -1);
- req.flags = cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX);
+ req->flags = cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX);
if (!BNXT_CHIP_TYPE_NITRO_A0(bp))
- req.flags |=
+ req->flags |=
cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_OUTERMOST);
- req.dst_id = cpu_to_le16(bp->vnic_info[vnic_id].fw_vnic_id);
- req.enables =
+ req->dst_id = cpu_to_le16(bp->vnic_info[vnic_id].fw_vnic_id);
+ req->enables =
cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR |
CFA_L2_FILTER_ALLOC_REQ_ENABLES_DST_ID |
CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR_MASK);
- memcpy(req.l2_addr, mac_addr, ETH_ALEN);
- req.l2_addr_mask[0] = 0xff;
- req.l2_addr_mask[1] = 0xff;
- req.l2_addr_mask[2] = 0xff;
- req.l2_addr_mask[3] = 0xff;
- req.l2_addr_mask[4] = 0xff;
- req.l2_addr_mask[5] = 0xff;
-
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ memcpy(req->l2_addr, mac_addr, ETH_ALEN);
+ req->l2_addr_mask[0] = 0xff;
+ req->l2_addr_mask[1] = 0xff;
+ req->l2_addr_mask[2] = 0xff;
+ req->l2_addr_mask[3] = 0xff;
+ req->l2_addr_mask[4] = 0xff;
+ req->l2_addr_mask[5] = 0xff;
+
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc)
bp->vnic_info[vnic_id].fw_l2_filter_id[idx] =
resp->l2_filter_id;
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
static int bnxt_hwrm_clear_vnic_filter(struct bnxt *bp)
{
+ struct hwrm_cfa_l2_filter_free_input *req;
u16 i, j, num_of_vnics = 1; /* only vnic 0 supported */
- int rc = 0;
+ int rc;
/* Any associated ntuple filters will also be cleared by firmware. */
- mutex_lock(&bp->hwrm_cmd_lock);
+ rc = hwrm_req_init(bp, req, HWRM_CFA_L2_FILTER_FREE);
+ if (rc)
+ return rc;
+ hwrm_req_hold(bp, req);
for (i = 0; i < num_of_vnics; i++) {
struct bnxt_vnic_info *vnic = &bp->vnic_info[i];
for (j = 0; j < vnic->uc_filter_count; j++) {
- struct hwrm_cfa_l2_filter_free_input req = {0};
-
- bnxt_hwrm_cmd_hdr_init(bp, &req,
- HWRM_CFA_L2_FILTER_FREE, -1, -1);
+ req->l2_filter_id = vnic->fw_l2_filter_id[j];
- req.l2_filter_id = vnic->fw_l2_filter_id[j];
-
- rc = _hwrm_send_message(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, req);
}
vnic->uc_filter_count = 0;
}
- mutex_unlock(&bp->hwrm_cmd_lock);
-
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -5055,12 +4892,15 @@ static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags)
{
struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
u16 max_aggs = VNIC_TPA_CFG_REQ_MAX_AGGS_MAX;
- struct hwrm_vnic_tpa_cfg_input req = {0};
+ struct hwrm_vnic_tpa_cfg_input *req;
+ int rc;
if (vnic->fw_vnic_id == INVALID_HW_RING_ID)
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_TPA_CFG, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_VNIC_TPA_CFG);
+ if (rc)
+ return rc;
if (tpa_flags) {
u16 mss = bp->dev->mtu - 40;
@@ -5074,9 +4914,9 @@ static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags)
if (tpa_flags & BNXT_FLAG_GRO)
flags |= VNIC_TPA_CFG_REQ_FLAGS_GRO;
- req.flags = cpu_to_le32(flags);
+ req->flags = cpu_to_le32(flags);
- req.enables =
+ req->enables =
cpu_to_le32(VNIC_TPA_CFG_REQ_ENABLES_MAX_AGG_SEGS |
VNIC_TPA_CFG_REQ_ENABLES_MAX_AGGS |
VNIC_TPA_CFG_REQ_ENABLES_MIN_AGG_LEN);
@@ -5100,14 +4940,14 @@ static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags)
} else {
segs = ilog2(nsegs);
}
- req.max_agg_segs = cpu_to_le16(segs);
- req.max_aggs = cpu_to_le16(max_aggs);
+ req->max_agg_segs = cpu_to_le16(segs);
+ req->max_aggs = cpu_to_le16(max_aggs);
- req.min_agg_len = cpu_to_le32(512);
+ req->min_agg_len = cpu_to_le32(512);
}
- req.vnic_id = cpu_to_le16(vnic->fw_vnic_id);
+ req->vnic_id = cpu_to_le16(vnic->fw_vnic_id);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ return hwrm_req_send(bp, req);
}
static u16 bnxt_cp_ring_from_grp(struct bnxt *bp, struct bnxt_ring_struct *ring)
@@ -5251,86 +5091,102 @@ static void bnxt_fill_hw_rss_tbl(struct bnxt *bp, struct bnxt_vnic_info *vnic)
static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss)
{
struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
- struct hwrm_vnic_rss_cfg_input req = {0};
+ struct hwrm_vnic_rss_cfg_input *req;
+ int rc;
if ((bp->flags & BNXT_FLAG_CHIP_P5) ||
vnic->fw_rss_cos_lb_ctx[0] == INVALID_HW_RING_ID)
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_CFG, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_VNIC_RSS_CFG);
+ if (rc)
+ return rc;
+
if (set_rss) {
bnxt_fill_hw_rss_tbl(bp, vnic);
- req.hash_type = cpu_to_le32(bp->rss_hash_cfg);
- req.hash_mode_flags = VNIC_RSS_CFG_REQ_HASH_MODE_FLAGS_DEFAULT;
- req.ring_grp_tbl_addr = cpu_to_le64(vnic->rss_table_dma_addr);
- req.hash_key_tbl_addr =
+ req->hash_type = cpu_to_le32(bp->rss_hash_cfg);
+ req->hash_mode_flags = VNIC_RSS_CFG_REQ_HASH_MODE_FLAGS_DEFAULT;
+ req->ring_grp_tbl_addr = cpu_to_le64(vnic->rss_table_dma_addr);
+ req->hash_key_tbl_addr =
cpu_to_le64(vnic->rss_hash_key_dma_addr);
}
- req.rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]);
+ return hwrm_req_send(bp, req);
}
static int bnxt_hwrm_vnic_set_rss_p5(struct bnxt *bp, u16 vnic_id, bool set_rss)
{
struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
- struct hwrm_vnic_rss_cfg_input req = {0};
+ struct hwrm_vnic_rss_cfg_input *req;
dma_addr_t ring_tbl_map;
u32 i, nr_ctxs;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_VNIC_RSS_CFG);
+ if (rc)
+ return rc;
+
+ req->vnic_id = cpu_to_le16(vnic->fw_vnic_id);
+ if (!set_rss)
+ return hwrm_req_send(bp, req);
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_CFG, -1, -1);
- req.vnic_id = cpu_to_le16(vnic->fw_vnic_id);
- if (!set_rss) {
- hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
- return 0;
- }
bnxt_fill_hw_rss_tbl(bp, vnic);
- req.hash_type = cpu_to_le32(bp->rss_hash_cfg);
- req.hash_mode_flags = VNIC_RSS_CFG_REQ_HASH_MODE_FLAGS_DEFAULT;
- req.hash_key_tbl_addr = cpu_to_le64(vnic->rss_hash_key_dma_addr);
+ req->hash_type = cpu_to_le32(bp->rss_hash_cfg);
+ req->hash_mode_flags = VNIC_RSS_CFG_REQ_HASH_MODE_FLAGS_DEFAULT;
+ req->hash_key_tbl_addr = cpu_to_le64(vnic->rss_hash_key_dma_addr);
ring_tbl_map = vnic->rss_table_dma_addr;
nr_ctxs = bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings);
- for (i = 0; i < nr_ctxs; ring_tbl_map += BNXT_RSS_TABLE_SIZE_P5, i++) {
- int rc;
- req.ring_grp_tbl_addr = cpu_to_le64(ring_tbl_map);
- req.ring_table_pair_index = i;
- req.rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[i]);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ hwrm_req_hold(bp, req);
+ for (i = 0; i < nr_ctxs; ring_tbl_map += BNXT_RSS_TABLE_SIZE_P5, i++) {
+ req->ring_grp_tbl_addr = cpu_to_le64(ring_tbl_map);
+ req->ring_table_pair_index = i;
+ req->rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[i]);
+ rc = hwrm_req_send(bp, req);
if (rc)
- return rc;
+ goto exit;
}
- return 0;
+
+exit:
+ hwrm_req_drop(bp, req);
+ return rc;
}
static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id)
{
struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
- struct hwrm_vnic_plcmodes_cfg_input req = {0};
+ struct hwrm_vnic_plcmodes_cfg_input *req;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_VNIC_PLCMODES_CFG);
+ if (rc)
+ return rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_PLCMODES_CFG, -1, -1);
- req.flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT |
- VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 |
- VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6);
- req.enables =
+ req->flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT |
+ VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 |
+ VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6);
+ req->enables =
cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_JUMBO_THRESH_VALID |
VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID);
/* thresholds not implemented in firmware yet */
- req.jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh);
- req.hds_threshold = cpu_to_le16(bp->rx_copy_thresh);
- req.vnic_id = cpu_to_le32(vnic->fw_vnic_id);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh);
+ req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh);
+ req->vnic_id = cpu_to_le32(vnic->fw_vnic_id);
+ return hwrm_req_send(bp, req);
}
static void bnxt_hwrm_vnic_ctx_free_one(struct bnxt *bp, u16 vnic_id,
u16 ctx_idx)
{
- struct hwrm_vnic_rss_cos_lb_ctx_free_input req = {0};
+ struct hwrm_vnic_rss_cos_lb_ctx_free_input *req;
+
+ if (hwrm_req_init(bp, req, HWRM_VNIC_RSS_COS_LB_CTX_FREE))
+ return;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_COS_LB_CTX_FREE, -1, -1);
- req.rss_cos_lb_ctx_id =
+ req->rss_cos_lb_ctx_id =
cpu_to_le16(bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx]);
- hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ hwrm_req_send(bp, req);
bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx] = INVALID_HW_RING_ID;
}
@@ -5351,20 +5207,20 @@ static void bnxt_hwrm_vnic_ctx_free(struct bnxt *bp)
static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, u16 vnic_id, u16 ctx_idx)
{
+ struct hwrm_vnic_rss_cos_lb_ctx_alloc_output *resp;
+ struct hwrm_vnic_rss_cos_lb_ctx_alloc_input *req;
int rc;
- struct hwrm_vnic_rss_cos_lb_ctx_alloc_input req = {0};
- struct hwrm_vnic_rss_cos_lb_ctx_alloc_output *resp =
- bp->hwrm_cmd_resp_addr;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_COS_LB_CTX_ALLOC, -1,
- -1);
+ rc = hwrm_req_init(bp, req, HWRM_VNIC_RSS_COS_LB_CTX_ALLOC);
+ if (rc)
+ return rc;
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc)
bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx] =
le16_to_cpu(resp->rss_cos_lb_ctx_id);
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -5378,47 +5234,50 @@ static u32 bnxt_get_roce_vnic_mode(struct bnxt *bp)
int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
{
- unsigned int ring = 0, grp_idx;
struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
- struct hwrm_vnic_cfg_input req = {0};
+ struct hwrm_vnic_cfg_input *req;
+ unsigned int ring = 0, grp_idx;
u16 def_vlan = 0;
+ int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_CFG, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_VNIC_CFG);
+ if (rc)
+ return rc;
if (bp->flags & BNXT_FLAG_CHIP_P5) {
struct bnxt_rx_ring_info *rxr = &bp->rx_ring[0];
- req.default_rx_ring_id =
+ req->default_rx_ring_id =
cpu_to_le16(rxr->rx_ring_struct.fw_ring_id);
- req.default_cmpl_ring_id =
+ req->default_cmpl_ring_id =
cpu_to_le16(bnxt_cp_ring_for_rx(bp, rxr));
- req.enables =
+ req->enables =
cpu_to_le32(VNIC_CFG_REQ_ENABLES_DEFAULT_RX_RING_ID |
VNIC_CFG_REQ_ENABLES_DEFAULT_CMPL_RING_ID);
goto vnic_mru;
}
- req.enables = cpu_to_le32(VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP);
+ req->enables = cpu_to_le32(VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP);
/* Only RSS support for now TBD: COS & LB */
if (vnic->fw_rss_cos_lb_ctx[0] != INVALID_HW_RING_ID) {
- req.rss_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]);
- req.enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_RSS_RULE |
+ req->rss_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]);
+ req->enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_RSS_RULE |
VNIC_CFG_REQ_ENABLES_MRU);
} else if (vnic->flags & BNXT_VNIC_RFS_NEW_RSS_FLAG) {
- req.rss_rule =
+ req->rss_rule =
cpu_to_le16(bp->vnic_info[0].fw_rss_cos_lb_ctx[0]);
- req.enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_RSS_RULE |
+ req->enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_RSS_RULE |
VNIC_CFG_REQ_ENABLES_MRU);
- req.flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_RSS_DFLT_CR_MODE);
+ req->flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_RSS_DFLT_CR_MODE);
} else {
- req.rss_rule = cpu_to_le16(0xffff);
+ req->rss_rule = cpu_to_le16(0xffff);
}
if (BNXT_CHIP_TYPE_NITRO_A0(bp) &&
(vnic->fw_rss_cos_lb_ctx[0] != INVALID_HW_RING_ID)) {
- req.cos_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[1]);
- req.enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_COS_RULE);
+ req->cos_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[1]);
+ req->enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_COS_RULE);
} else {
- req.cos_rule = cpu_to_le16(0xffff);
+ req->cos_rule = cpu_to_le16(0xffff);
}
if (vnic->flags & BNXT_VNIC_RSS_FLAG)
@@ -5429,34 +5288,36 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
ring = bp->rx_nr_rings - 1;
grp_idx = bp->rx_ring[ring].bnapi->index;
- req.dflt_ring_grp = cpu_to_le16(bp->grp_info[grp_idx].fw_grp_id);
- req.lb_rule = cpu_to_le16(0xffff);
+ req->dflt_ring_grp = cpu_to_le16(bp->grp_info[grp_idx].fw_grp_id);
+ req->lb_rule = cpu_to_le16(0xffff);
vnic_mru:
- req.mru = cpu_to_le16(bp->dev->mtu + ETH_HLEN + VLAN_HLEN);
+ req->mru = cpu_to_le16(bp->dev->mtu + ETH_HLEN + VLAN_HLEN);
- req.vnic_id = cpu_to_le16(vnic->fw_vnic_id);
+ req->vnic_id = cpu_to_le16(vnic->fw_vnic_id);
#ifdef CONFIG_BNXT_SRIOV
if (BNXT_VF(bp))
def_vlan = bp->vf.vlan;
#endif
if ((bp->flags & BNXT_FLAG_STRIP_VLAN) || def_vlan)
- req.flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_VLAN_STRIP_MODE);
+ req->flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_VLAN_STRIP_MODE);
if (!vnic_id && bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP))
- req.flags |= cpu_to_le32(bnxt_get_roce_vnic_mode(bp));
+ req->flags |= cpu_to_le32(bnxt_get_roce_vnic_mode(bp));
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ return hwrm_req_send(bp, req);
}
static void bnxt_hwrm_vnic_free_one(struct bnxt *bp, u16 vnic_id)
{
if (bp->vnic_info[vnic_id].fw_vnic_id != INVALID_HW_RING_ID) {
- struct hwrm_vnic_free_input req = {0};
+ struct hwrm_vnic_free_input *req;
+
+ if (hwrm_req_init(bp, req, HWRM_VNIC_FREE))
+ return;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_FREE, -1, -1);
- req.vnic_id =
+ req->vnic_id =
cpu_to_le32(bp->vnic_info[vnic_id].fw_vnic_id);
- hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ hwrm_req_send(bp, req);
bp->vnic_info[vnic_id].fw_vnic_id = INVALID_HW_RING_ID;
}
}
@@ -5473,11 +5334,15 @@ static int bnxt_hwrm_vnic_alloc(struct bnxt *bp, u16 vnic_id,
unsigned int start_rx_ring_idx,
unsigned int nr_rings)
{
- int rc = 0;
unsigned int i, j, grp_idx, end_idx = start_rx_ring_idx + nr_rings;
- struct hwrm_vnic_alloc_input req = {0};
- struct hwrm_vnic_alloc_output *resp = bp->hwrm_cmd_resp_addr;
struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
+ struct hwrm_vnic_alloc_output *resp;
+ struct hwrm_vnic_alloc_input *req;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_VNIC_ALLOC);
+ if (rc)
+ return rc;
if (bp->flags & BNXT_FLAG_CHIP_P5)
goto vnic_no_ring_grps;
@@ -5497,22 +5362,20 @@ vnic_no_ring_grps:
for (i = 0; i < BNXT_MAX_CTX_PER_VNIC; i++)
vnic->fw_rss_cos_lb_ctx[i] = INVALID_HW_RING_ID;
if (vnic_id == 0)
- req.flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_DEFAULT);
+ req->flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_DEFAULT);
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_ALLOC, -1, -1);
-
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc)
vnic->fw_vnic_id = le32_to_cpu(resp->vnic_id);
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp)
{
- struct hwrm_vnic_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_vnic_qcaps_input req = {0};
+ struct hwrm_vnic_qcaps_output *resp;
+ struct hwrm_vnic_qcaps_input *req;
int rc;
bp->hw_ring_stats_size = sizeof(struct ctx_hw_stats);
@@ -5520,9 +5383,12 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp)
if (bp->hwrm_spec_code < 0x10600)
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_QCAPS, -1, -1);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_VNIC_QCAPS);
+ if (rc)
+ return rc;
+
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc) {
u32 flags = le32_to_cpu(resp->flags);
@@ -5548,92 +5414,96 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp)
bp->hw_ring_stats_size = BNXT_RING_STATS_SIZE_P5_SR2;
}
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
static int bnxt_hwrm_ring_grp_alloc(struct bnxt *bp)
{
+ struct hwrm_ring_grp_alloc_output *resp;
+ struct hwrm_ring_grp_alloc_input *req;
+ int rc;
u16 i;
- u32 rc = 0;
if (bp->flags & BNXT_FLAG_CHIP_P5)
return 0;
- mutex_lock(&bp->hwrm_cmd_lock);
+ rc = hwrm_req_init(bp, req, HWRM_RING_GRP_ALLOC);
+ if (rc)
+ return rc;
+
+ resp = hwrm_req_hold(bp, req);
for (i = 0; i < bp->rx_nr_rings; i++) {
- struct hwrm_ring_grp_alloc_input req = {0};
- struct hwrm_ring_grp_alloc_output *resp =
- bp->hwrm_cmd_resp_addr;
unsigned int grp_idx = bp->rx_ring[i].bnapi->index;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_GRP_ALLOC, -1, -1);
+ req->cr = cpu_to_le16(bp->grp_info[grp_idx].cp_fw_ring_id);
+ req->rr = cpu_to_le16(bp->grp_info[grp_idx].rx_fw_ring_id);
+ req->ar = cpu_to_le16(bp->grp_info[grp_idx].agg_fw_ring_id);
+ req->sc = cpu_to_le16(bp->grp_info[grp_idx].fw_stats_ctx);
- req.cr = cpu_to_le16(bp->grp_info[grp_idx].cp_fw_ring_id);
- req.rr = cpu_to_le16(bp->grp_info[grp_idx].rx_fw_ring_id);
- req.ar = cpu_to_le16(bp->grp_info[grp_idx].agg_fw_ring_id);
- req.sc = cpu_to_le16(bp->grp_info[grp_idx].fw_stats_ctx);
+ rc = hwrm_req_send(bp, req);
- rc = _hwrm_send_message(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
if (rc)
break;
bp->grp_info[grp_idx].fw_grp_id =
le32_to_cpu(resp->ring_group_id);
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
static void bnxt_hwrm_ring_grp_free(struct bnxt *bp)
{
+ struct hwrm_ring_grp_free_input *req;
u16 i;
- struct hwrm_ring_grp_free_input req = {0};
if (!bp->grp_info || (bp->flags & BNXT_FLAG_CHIP_P5))
return;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_GRP_FREE, -1, -1);
+ if (hwrm_req_init(bp, req, HWRM_RING_GRP_FREE))
+ return;
- mutex_lock(&bp->hwrm_cmd_lock);
+ hwrm_req_hold(bp, req);
for (i = 0; i < bp->cp_nr_rings; i++) {
if (bp->grp_info[i].fw_grp_id == INVALID_HW_RING_ID)
continue;
- req.ring_group_id =
+ req->ring_group_id =
cpu_to_le32(bp->grp_info[i].fw_grp_id);
- _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ hwrm_req_send(bp, req);
bp->grp_info[i].fw_grp_id = INVALID_HW_RING_ID;
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
}
static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
struct bnxt_ring_struct *ring,
u32 ring_type, u32 map_index)
{
- int rc = 0, err = 0;
- struct hwrm_ring_alloc_input req = {0};
- struct hwrm_ring_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+ struct hwrm_ring_alloc_output *resp;
+ struct hwrm_ring_alloc_input *req;
struct bnxt_ring_mem_info *rmem = &ring->ring_mem;
struct bnxt_ring_grp_info *grp_info;
+ int rc, err = 0;
u16 ring_id;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_ALLOC, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_RING_ALLOC);
+ if (rc)
+ goto exit;
- req.enables = 0;
+ req->enables = 0;
if (rmem->nr_pages > 1) {
- req.page_tbl_addr = cpu_to_le64(rmem->pg_tbl_map);
+ req->page_tbl_addr = cpu_to_le64(rmem->pg_tbl_map);
/* Page size is in log2 units */
- req.page_size = BNXT_PAGE_SHIFT;
- req.page_tbl_depth = 1;
+ req->page_size = BNXT_PAGE_SHIFT;
+ req->page_tbl_depth = 1;
} else {
- req.page_tbl_addr = cpu_to_le64(rmem->dma_arr[0]);
+ req->page_tbl_addr = cpu_to_le64(rmem->dma_arr[0]);
}
- req.fbo = 0;
+ req->fbo = 0;
/* Association of ring index with doorbell index and MSIX number */
- req.logical_id = cpu_to_le16(map_index);
+ req->logical_id = cpu_to_le16(map_index);
switch (ring_type) {
case HWRM_RING_ALLOC_TX: {
@@ -5641,67 +5511,67 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
txr = container_of(ring, struct bnxt_tx_ring_info,
tx_ring_struct);
- req.ring_type = RING_ALLOC_REQ_RING_TYPE_TX;
+ req->ring_type = RING_ALLOC_REQ_RING_TYPE_TX;
/* Association of transmit ring with completion ring */
grp_info = &bp->grp_info[ring->grp_idx];
- req.cmpl_ring_id = cpu_to_le16(bnxt_cp_ring_for_tx(bp, txr));
- req.length = cpu_to_le32(bp->tx_ring_mask + 1);
- req.stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
- req.queue_id = cpu_to_le16(ring->queue_id);
+ req->cmpl_ring_id = cpu_to_le16(bnxt_cp_ring_for_tx(bp, txr));
+ req->length = cpu_to_le32(bp->tx_ring_mask + 1);
+ req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
+ req->queue_id = cpu_to_le16(ring->queue_id);
break;
}
case HWRM_RING_ALLOC_RX:
- req.ring_type = RING_ALLOC_REQ_RING_TYPE_RX;
- req.length = cpu_to_le32(bp->rx_ring_mask + 1);
+ req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX;
+ req->length = cpu_to_le32(bp->rx_ring_mask + 1);
if (bp->flags & BNXT_FLAG_CHIP_P5) {
u16 flags = 0;
/* Association of rx ring with stats context */
grp_info = &bp->grp_info[ring->grp_idx];
- req.rx_buf_size = cpu_to_le16(bp->rx_buf_use_size);
- req.stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
- req.enables |= cpu_to_le32(
+ req->rx_buf_size = cpu_to_le16(bp->rx_buf_use_size);
+ req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
+ req->enables |= cpu_to_le32(
RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID);
if (NET_IP_ALIGN == 2)
flags = RING_ALLOC_REQ_FLAGS_RX_SOP_PAD;
- req.flags = cpu_to_le16(flags);
+ req->flags = cpu_to_le16(flags);
}
break;
case HWRM_RING_ALLOC_AGG:
if (bp->flags & BNXT_FLAG_CHIP_P5) {
- req.ring_type = RING_ALLOC_REQ_RING_TYPE_RX_AGG;
+ req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX_AGG;
/* Association of agg ring with rx ring */
grp_info = &bp->grp_info[ring->grp_idx];
- req.rx_ring_id = cpu_to_le16(grp_info->rx_fw_ring_id);
- req.rx_buf_size = cpu_to_le16(BNXT_RX_PAGE_SIZE);
- req.stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
- req.enables |= cpu_to_le32(
+ req->rx_ring_id = cpu_to_le16(grp_info->rx_fw_ring_id);
+ req->rx_buf_size = cpu_to_le16(BNXT_RX_PAGE_SIZE);
+ req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
+ req->enables |= cpu_to_le32(
RING_ALLOC_REQ_ENABLES_RX_RING_ID_VALID |
RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID);
} else {
- req.ring_type = RING_ALLOC_REQ_RING_TYPE_RX;
+ req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX;
}
- req.length = cpu_to_le32(bp->rx_agg_ring_mask + 1);
+ req->length = cpu_to_le32(bp->rx_agg_ring_mask + 1);
break;
case HWRM_RING_ALLOC_CMPL:
- req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
- req.length = cpu_to_le32(bp->cp_ring_mask + 1);
+ req->ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
+ req->length = cpu_to_le32(bp->cp_ring_mask + 1);
if (bp->flags & BNXT_FLAG_CHIP_P5) {
/* Association of cp ring with nq */
grp_info = &bp->grp_info[map_index];
- req.nq_ring_id = cpu_to_le16(grp_info->cp_fw_ring_id);
- req.cq_handle = cpu_to_le64(ring->handle);
- req.enables |= cpu_to_le32(
+ req->nq_ring_id = cpu_to_le16(grp_info->cp_fw_ring_id);
+ req->cq_handle = cpu_to_le64(ring->handle);
+ req->enables |= cpu_to_le32(
RING_ALLOC_REQ_ENABLES_NQ_RING_ID_VALID);
} else if (bp->flags & BNXT_FLAG_USING_MSIX) {
- req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+ req->int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
}
break;
case HWRM_RING_ALLOC_NQ:
- req.ring_type = RING_ALLOC_REQ_RING_TYPE_NQ;
- req.length = cpu_to_le32(bp->cp_ring_mask + 1);
+ req->ring_type = RING_ALLOC_REQ_RING_TYPE_NQ;
+ req->length = cpu_to_le32(bp->cp_ring_mask + 1);
if (bp->flags & BNXT_FLAG_USING_MSIX)
- req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+ req->int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
break;
default:
netdev_err(bp->dev, "hwrm alloc invalid ring type %d\n",
@@ -5709,12 +5579,13 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
return -1;
}
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
err = le16_to_cpu(resp->error_code);
ring_id = le16_to_cpu(resp->ring_id);
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
+exit:
if (rc || err) {
netdev_err(bp->dev, "hwrm_ring_alloc type %d failed. rc:%x err:%x\n",
ring_type, rc, err);
@@ -5729,23 +5600,28 @@ static int bnxt_hwrm_set_async_event_cr(struct bnxt *bp, int idx)
int rc;
if (BNXT_PF(bp)) {
- struct hwrm_func_cfg_input req = {0};
+ struct hwrm_func_cfg_input *req;
+
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+ if (rc)
+ return rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
- req.fid = cpu_to_le16(0xffff);
- req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_ASYNC_EVENT_CR);
- req.async_event_cr = cpu_to_le16(idx);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->fid = cpu_to_le16(0xffff);
+ req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_ASYNC_EVENT_CR);
+ req->async_event_cr = cpu_to_le16(idx);
+ return hwrm_req_send(bp, req);
} else {
- struct hwrm_func_vf_cfg_input req = {0};
+ struct hwrm_func_vf_cfg_input *req;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1);
- req.enables =
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_VF_CFG);
+ if (rc)
+ return rc;
+
+ req->enables =
cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_ASYNC_EVENT_CR);
- req.async_event_cr = cpu_to_le16(idx);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->async_event_cr = cpu_to_le16(idx);
+ return hwrm_req_send(bp, req);
}
- return rc;
}
static void bnxt_set_db(struct bnxt *bp, struct bnxt_db_info *db, u32 ring_type,
@@ -5916,23 +5792,27 @@ static int hwrm_ring_free_send_msg(struct bnxt *bp,
struct bnxt_ring_struct *ring,
u32 ring_type, int cmpl_ring_id)
{
+ struct hwrm_ring_free_output *resp;
+ struct hwrm_ring_free_input *req;
+ u16 error_code = 0;
int rc;
- struct hwrm_ring_free_input req = {0};
- struct hwrm_ring_free_output *resp = bp->hwrm_cmd_resp_addr;
- u16 error_code;
if (BNXT_NO_FW_ACCESS(bp))
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_FREE, cmpl_ring_id, -1);
- req.ring_type = ring_type;
- req.ring_id = cpu_to_le16(ring->fw_ring_id);
+ rc = hwrm_req_init(bp, req, HWRM_RING_FREE);
+ if (rc)
+ goto exit;
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
- error_code = le16_to_cpu(resp->error_code);
- mutex_unlock(&bp->hwrm_cmd_lock);
+ req->cmpl_ring = cpu_to_le16(cmpl_ring_id);
+ req->ring_type = ring_type;
+ req->ring_id = cpu_to_le16(ring->fw_ring_id);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
+ error_code = le16_to_cpu(resp->error_code);
+ hwrm_req_drop(bp, req);
+exit:
if (rc || error_code) {
netdev_err(bp->dev, "hwrm_ring_free type %d failed. rc:%x err:%x\n",
ring_type, rc, error_code);
@@ -6047,20 +5927,23 @@ static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max,
static int bnxt_hwrm_get_rings(struct bnxt *bp)
{
- struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
- struct hwrm_func_qcfg_input req = {0};
+ struct hwrm_func_qcfg_output *resp;
+ struct hwrm_func_qcfg_input *req;
int rc;
if (bp->hwrm_spec_code < 0x10601)
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
- req.fid = cpu_to_le16(0xffff);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_QCFG);
+ if (rc)
+ return rc;
+
+ req->fid = cpu_to_le16(0xffff);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (rc) {
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -6094,39 +5977,45 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp)
hw_resc->resv_cp_rings = cp;
hw_resc->resv_stat_ctxs = stats;
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return 0;
}
-/* Caller must hold bp->hwrm_cmd_lock */
int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings)
{
- struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_func_qcfg_input req = {0};
+ struct hwrm_func_qcfg_output *resp;
+ struct hwrm_func_qcfg_input *req;
int rc;
if (bp->hwrm_spec_code < 0x10601)
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
- req.fid = cpu_to_le16(fid);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_QCFG);
+ if (rc)
+ return rc;
+
+ req->fid = cpu_to_le16(fid);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc)
*tx_rings = le16_to_cpu(resp->alloc_tx_rings);
+ hwrm_req_drop(bp, req);
return rc;
}
static bool bnxt_rfs_supported(struct bnxt *bp);
-static void
-__bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req,
- int tx_rings, int rx_rings, int ring_grps,
- int cp_rings, int stats, int vnics)
+static struct hwrm_func_cfg_input *
+__bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
+ int ring_grps, int cp_rings, int stats, int vnics)
{
+ struct hwrm_func_cfg_input *req;
u32 enables = 0;
- bnxt_hwrm_cmd_hdr_init(bp, req, HWRM_FUNC_CFG, -1, -1);
+ if (hwrm_req_init(bp, req, HWRM_FUNC_CFG))
+ return NULL;
+
req->fid = cpu_to_le16(0xffff);
enables |= tx_rings ? FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
req->num_tx_rings = cpu_to_le16(tx_rings);
@@ -6167,17 +6056,19 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req,
req->num_vnics = cpu_to_le16(vnics);
}
req->enables = cpu_to_le32(enables);
+ return req;
}
-static void
-__bnxt_hwrm_reserve_vf_rings(struct bnxt *bp,
- struct hwrm_func_vf_cfg_input *req, int tx_rings,
- int rx_rings, int ring_grps, int cp_rings,
- int stats, int vnics)
+static struct hwrm_func_vf_cfg_input *
+__bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
+ int ring_grps, int cp_rings, int stats, int vnics)
{
+ struct hwrm_func_vf_cfg_input *req;
u32 enables = 0;
- bnxt_hwrm_cmd_hdr_init(bp, req, HWRM_FUNC_VF_CFG, -1, -1);
+ if (hwrm_req_init(bp, req, HWRM_FUNC_VF_CFG))
+ return NULL;
+
enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS |
FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
@@ -6209,21 +6100,27 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp,
req->num_vnics = cpu_to_le16(vnics);
req->enables = cpu_to_le32(enables);
+ return req;
}
static int
bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
int ring_grps, int cp_rings, int stats, int vnics)
{
- struct hwrm_func_cfg_input req = {0};
+ struct hwrm_func_cfg_input *req;
int rc;
- __bnxt_hwrm_reserve_pf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
- cp_rings, stats, vnics);
- if (!req.enables)
+ req = __bnxt_hwrm_reserve_pf_rings(bp, tx_rings, rx_rings, ring_grps,
+ cp_rings, stats, vnics);
+ if (!req)
+ return -ENOMEM;
+
+ if (!req->enables) {
+ hwrm_req_drop(bp, req);
return 0;
+ }
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, req);
if (rc)
return rc;
@@ -6237,7 +6134,7 @@ static int
bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
int ring_grps, int cp_rings, int stats, int vnics)
{
- struct hwrm_func_vf_cfg_input req = {0};
+ struct hwrm_func_vf_cfg_input *req;
int rc;
if (!BNXT_NEW_RM(bp)) {
@@ -6245,9 +6142,12 @@ bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
return 0;
}
- __bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
- cp_rings, stats, vnics);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req = __bnxt_hwrm_reserve_vf_rings(bp, tx_rings, rx_rings, ring_grps,
+ cp_rings, stats, vnics);
+ if (!req)
+ return -ENOMEM;
+
+ rc = hwrm_req_send(bp, req);
if (rc)
return rc;
@@ -6448,14 +6348,14 @@ static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
int ring_grps, int cp_rings, int stats,
int vnics)
{
- struct hwrm_func_vf_cfg_input req = {0};
+ struct hwrm_func_vf_cfg_input *req;
u32 flags;
if (!BNXT_NEW_RM(bp))
return 0;
- __bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
- cp_rings, stats, vnics);
+ req = __bnxt_hwrm_reserve_vf_rings(bp, tx_rings, rx_rings, ring_grps,
+ cp_rings, stats, vnics);
flags = FUNC_VF_CFG_REQ_FLAGS_TX_ASSETS_TEST |
FUNC_VF_CFG_REQ_FLAGS_RX_ASSETS_TEST |
FUNC_VF_CFG_REQ_FLAGS_CMPL_ASSETS_TEST |
@@ -6465,20 +6365,19 @@ static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
if (!(bp->flags & BNXT_FLAG_CHIP_P5))
flags |= FUNC_VF_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST;
- req.flags = cpu_to_le32(flags);
- return hwrm_send_message_silent(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
+ req->flags = cpu_to_le32(flags);
+ return hwrm_req_send_silent(bp, req);
}
static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
int ring_grps, int cp_rings, int stats,
int vnics)
{
- struct hwrm_func_cfg_input req = {0};
+ struct hwrm_func_cfg_input *req;
u32 flags;
- __bnxt_hwrm_reserve_pf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
- cp_rings, stats, vnics);
+ req = __bnxt_hwrm_reserve_pf_rings(bp, tx_rings, rx_rings, ring_grps,
+ cp_rings, stats, vnics);
flags = FUNC_CFG_REQ_FLAGS_TX_ASSETS_TEST;
if (BNXT_NEW_RM(bp)) {
flags |= FUNC_CFG_REQ_FLAGS_RX_ASSETS_TEST |
@@ -6492,9 +6391,8 @@ static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
flags |= FUNC_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST;
}
- req.flags = cpu_to_le32(flags);
- return hwrm_send_message_silent(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
+ req->flags = cpu_to_le32(flags);
+ return hwrm_req_send_silent(bp, req);
}
static int bnxt_hwrm_check_rings(struct bnxt *bp, int tx_rings, int rx_rings,
@@ -6515,9 +6413,9 @@ static int bnxt_hwrm_check_rings(struct bnxt *bp, int tx_rings, int rx_rings,
static void bnxt_hwrm_coal_params_qcaps(struct bnxt *bp)
{
- struct hwrm_ring_aggint_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
struct bnxt_coal_cap *coal_cap = &bp->coal_cap;
- struct hwrm_ring_aggint_qcaps_input req = {0};
+ struct hwrm_ring_aggint_qcaps_output *resp;
+ struct hwrm_ring_aggint_qcaps_input *req;
int rc;
coal_cap->cmpl_params = BNXT_LEGACY_COAL_CMPL_PARAMS;
@@ -6533,9 +6431,11 @@ static void bnxt_hwrm_coal_params_qcaps(struct bnxt *bp)
if (bp->hwrm_spec_code < 0x10902)
return;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_AGGINT_QCAPS, -1, -1);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ if (hwrm_req_init(bp, req, HWRM_RING_AGGINT_QCAPS))
+ return;
+
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send_silent(bp, req);
if (!rc) {
coal_cap->cmpl_params = le32_to_cpu(resp->cmpl_params);
coal_cap->nq_params = le32_to_cpu(resp->nq_params);
@@ -6555,7 +6455,7 @@ static void bnxt_hwrm_coal_params_qcaps(struct bnxt *bp)
le16_to_cpu(resp->num_cmpl_aggr_int_max);
coal_cap->timer_units = le16_to_cpu(resp->timer_units);
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
}
static u16 bnxt_usec_to_coal_tmr(struct bnxt *bp, u16 usec)
@@ -6623,37 +6523,40 @@ static void bnxt_hwrm_set_coal_params(struct bnxt *bp,
req->enables |= cpu_to_le16(BNXT_COAL_CMPL_ENABLES);
}
-/* Caller holds bp->hwrm_cmd_lock */
static int __bnxt_hwrm_set_coal_nq(struct bnxt *bp, struct bnxt_napi *bnapi,
struct bnxt_coal *hw_coal)
{
- struct hwrm_ring_cmpl_ring_cfg_aggint_params_input req = {0};
+ struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req;
struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
struct bnxt_coal_cap *coal_cap = &bp->coal_cap;
u32 nq_params = coal_cap->nq_params;
u16 tmr;
+ int rc;
if (!(nq_params & RING_AGGINT_QCAPS_RESP_NQ_PARAMS_INT_LAT_TMR_MIN))
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS,
- -1, -1);
- req.ring_id = cpu_to_le16(cpr->cp_ring_struct.fw_ring_id);
- req.flags =
+ rc = hwrm_req_init(bp, req, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS);
+ if (rc)
+ return rc;
+
+ req->ring_id = cpu_to_le16(cpr->cp_ring_struct.fw_ring_id);
+ req->flags =
cpu_to_le16(RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_IS_NQ);
tmr = bnxt_usec_to_coal_tmr(bp, hw_coal->coal_ticks) / 2;
tmr = clamp_t(u16, tmr, 1, coal_cap->int_lat_tmr_min_max);
- req.int_lat_tmr_min = cpu_to_le16(tmr);
- req.enables |= cpu_to_le16(BNXT_COAL_CMPL_MIN_TMR_ENABLE);
- return _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->int_lat_tmr_min = cpu_to_le16(tmr);
+ req->enables |= cpu_to_le16(BNXT_COAL_CMPL_MIN_TMR_ENABLE);
+ return hwrm_req_send(bp, req);
}
int bnxt_hwrm_set_ring_coal(struct bnxt *bp, struct bnxt_napi *bnapi)
{
- struct hwrm_ring_cmpl_ring_cfg_aggint_params_input req_rx = {0};
+ struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req_rx;
struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
struct bnxt_coal coal;
+ int rc;
/* Tick values in micro seconds.
* 1 coal_buf x bufs_per_record = 1 completion record.
@@ -6666,48 +6569,53 @@ int bnxt_hwrm_set_ring_coal(struct bnxt *bp, struct bnxt_napi *bnapi)
if (!bnapi->rx_ring)
return -ENODEV;
- bnxt_hwrm_cmd_hdr_init(bp, &req_rx,
- HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS, -1, -1);
+ rc = hwrm_req_init(bp, req_rx, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS);
+ if (rc)
+ return rc;
- bnxt_hwrm_set_coal_params(bp, &coal, &req_rx);
+ bnxt_hwrm_set_coal_params(bp, &coal, req_rx);
- req_rx.ring_id = cpu_to_le16(bnxt_cp_ring_for_rx(bp, bnapi->rx_ring));
+ req_rx->ring_id = cpu_to_le16(bnxt_cp_ring_for_rx(bp, bnapi->rx_ring));
- return hwrm_send_message(bp, &req_rx, sizeof(req_rx),
- HWRM_CMD_TIMEOUT);
+ return hwrm_req_send(bp, req_rx);
}
int bnxt_hwrm_set_coal(struct bnxt *bp)
{
- int i, rc = 0;
- struct hwrm_ring_cmpl_ring_cfg_aggint_params_input req_rx = {0},
- req_tx = {0}, *req;
+ struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req_rx, *req_tx,
+ *req;
+ int i, rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req_rx,
- HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS, -1, -1);
- bnxt_hwrm_cmd_hdr_init(bp, &req_tx,
- HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS, -1, -1);
+ rc = hwrm_req_init(bp, req_rx, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS);
+ if (rc)
+ return rc;
+
+ rc = hwrm_req_init(bp, req_tx, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS);
+ if (rc) {
+ hwrm_req_drop(bp, req_rx);
+ return rc;
+ }
- bnxt_hwrm_set_coal_params(bp, &bp->rx_coal, &req_rx);
- bnxt_hwrm_set_coal_params(bp, &bp->tx_coal, &req_tx);
+ bnxt_hwrm_set_coal_params(bp, &bp->rx_coal, req_rx);
+ bnxt_hwrm_set_coal_params(bp, &bp->tx_coal, req_tx);
- mutex_lock(&bp->hwrm_cmd_lock);
+ hwrm_req_hold(bp, req_rx);
+ hwrm_req_hold(bp, req_tx);
for (i = 0; i < bp->cp_nr_rings; i++) {
struct bnxt_napi *bnapi = bp->bnapi[i];
struct bnxt_coal *hw_coal;
u16 ring_id;
- req = &req_rx;
+ req = req_rx;
if (!bnapi->rx_ring) {
ring_id = bnxt_cp_ring_for_tx(bp, bnapi->tx_ring);
- req = &req_tx;
+ req = req_tx;
} else {
ring_id = bnxt_cp_ring_for_rx(bp, bnapi->rx_ring);
}
req->ring_id = cpu_to_le16(ring_id);
- rc = _hwrm_send_message(bp, req, sizeof(*req),
- HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, req);
if (rc)
break;
@@ -6715,11 +6623,10 @@ int bnxt_hwrm_set_coal(struct bnxt *bp)
continue;
if (bnapi->rx_ring && bnapi->tx_ring) {
- req = &req_tx;
+ req = req_tx;
ring_id = bnxt_cp_ring_for_tx(bp, bnapi->tx_ring);
req->ring_id = cpu_to_le16(ring_id);
- rc = _hwrm_send_message(bp, req, sizeof(*req),
- HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, req);
if (rc)
break;
}
@@ -6729,14 +6636,15 @@ int bnxt_hwrm_set_coal(struct bnxt *bp)
hw_coal = &bp->tx_coal;
__bnxt_hwrm_set_coal_nq(bp, bnapi, hw_coal);
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req_rx);
+ hwrm_req_drop(bp, req_tx);
return rc;
}
static void bnxt_hwrm_stat_ctx_free(struct bnxt *bp)
{
- struct hwrm_stat_ctx_clr_stats_input req0 = {0};
- struct hwrm_stat_ctx_free_input req = {0};
+ struct hwrm_stat_ctx_clr_stats_input *req0 = NULL;
+ struct hwrm_stat_ctx_free_input *req;
int i;
if (!bp->bnapi)
@@ -6745,53 +6653,60 @@ static void bnxt_hwrm_stat_ctx_free(struct bnxt *bp)
if (BNXT_CHIP_TYPE_NITRO_A0(bp))
return;
- bnxt_hwrm_cmd_hdr_init(bp, &req0, HWRM_STAT_CTX_CLR_STATS, -1, -1);
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_FREE, -1, -1);
-
- mutex_lock(&bp->hwrm_cmd_lock);
+ if (hwrm_req_init(bp, req, HWRM_STAT_CTX_FREE))
+ return;
+ if (BNXT_FW_MAJ(bp) <= 20) {
+ if (hwrm_req_init(bp, req0, HWRM_STAT_CTX_CLR_STATS)) {
+ hwrm_req_drop(bp, req);
+ return;
+ }
+ hwrm_req_hold(bp, req0);
+ }
+ hwrm_req_hold(bp, req);
for (i = 0; i < bp->cp_nr_rings; i++) {
struct bnxt_napi *bnapi = bp->bnapi[i];
struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
if (cpr->hw_stats_ctx_id != INVALID_STATS_CTX_ID) {
- req.stat_ctx_id = cpu_to_le32(cpr->hw_stats_ctx_id);
- if (BNXT_FW_MAJ(bp) <= 20) {
- req0.stat_ctx_id = req.stat_ctx_id;
- _hwrm_send_message(bp, &req0, sizeof(req0),
- HWRM_CMD_TIMEOUT);
+ req->stat_ctx_id = cpu_to_le32(cpr->hw_stats_ctx_id);
+ if (req0) {
+ req0->stat_ctx_id = req->stat_ctx_id;
+ hwrm_req_send(bp, req0);
}
- _hwrm_send_message(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
+ hwrm_req_send(bp, req);
cpr->hw_stats_ctx_id = INVALID_STATS_CTX_ID;
}
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
+ if (req0)
+ hwrm_req_drop(bp, req0);
}
static int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp)
{
- int rc = 0, i;
- struct hwrm_stat_ctx_alloc_input req = {0};
- struct hwrm_stat_ctx_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+ struct hwrm_stat_ctx_alloc_output *resp;
+ struct hwrm_stat_ctx_alloc_input *req;
+ int rc, i;
if (BNXT_CHIP_TYPE_NITRO_A0(bp))
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_ALLOC, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_STAT_CTX_ALLOC);
+ if (rc)
+ return rc;
- req.stats_dma_length = cpu_to_le16(bp->hw_ring_stats_size);
- req.update_period_ms = cpu_to_le32(bp->stats_coal_ticks / 1000);
+ req->stats_dma_length = cpu_to_le16(bp->hw_ring_stats_size);
+ req->update_period_ms = cpu_to_le32(bp->stats_coal_ticks / 1000);
- mutex_lock(&bp->hwrm_cmd_lock);
+ resp = hwrm_req_hold(bp, req);
for (i = 0; i < bp->cp_nr_rings; i++) {
struct bnxt_napi *bnapi = bp->bnapi[i];
struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
- req.stats_dma_addr = cpu_to_le64(cpr->stats.hw_stats_map);
+ req->stats_dma_addr = cpu_to_le64(cpr->stats.hw_stats_map);
- rc = _hwrm_send_message(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, req);
if (rc)
break;
@@ -6799,22 +6714,25 @@ static int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp)
bp->grp_info[i].fw_stats_ctx = cpr->hw_stats_ctx_id;
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
{
- struct hwrm_func_qcfg_input req = {0};
- struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+ struct hwrm_func_qcfg_output *resp;
+ struct hwrm_func_qcfg_input *req;
u32 min_db_offset = 0;
u16 flags;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
- req.fid = cpu_to_le16(0xffff);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_QCFG);
+ if (rc)
+ return rc;
+
+ req->fid = cpu_to_le16(0xffff);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (rc)
goto func_qcfg_exit;
@@ -6874,7 +6792,7 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
bp->db_size = pci_resource_len(bp->pdev, 2);
func_qcfg_exit:
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -6913,17 +6831,19 @@ static void bnxt_init_ctx_initializer(struct bnxt_ctx_mem_info *ctx,
static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp)
{
- struct hwrm_func_backing_store_qcaps_input req = {0};
- struct hwrm_func_backing_store_qcaps_output *resp =
- bp->hwrm_cmd_resp_addr;
+ struct hwrm_func_backing_store_qcaps_output *resp;
+ struct hwrm_func_backing_store_qcaps_input *req;
int rc;
if (bp->hwrm_spec_code < 0x10902 || BNXT_VF(bp) || bp->ctx)
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_BACKING_STORE_QCAPS, -1, -1);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_BACKING_STORE_QCAPS);
+ if (rc)
+ return rc;
+
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send_silent(bp, req);
if (!rc) {
struct bnxt_ctx_pg_info *ctx_pg;
struct bnxt_ctx_mem_info *ctx;
@@ -6988,7 +6908,7 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp)
rc = 0;
}
ctx_err:
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -7019,15 +6939,17 @@ static void bnxt_hwrm_set_pg_attr(struct bnxt_ring_mem_info *rmem, u8 *pg_attr,
static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
{
- struct hwrm_func_backing_store_cfg_input req = {0};
+ struct hwrm_func_backing_store_cfg_input *req;
struct bnxt_ctx_mem_info *ctx = bp->ctx;
struct bnxt_ctx_pg_info *ctx_pg;
- u32 req_len = sizeof(req);
+ void **__req = (void **)&req;
+ u32 req_len = sizeof(*req);
__le32 *num_entries;
__le64 *pg_dir;
u32 flags = 0;
u8 *pg_attr;
u32 ena;
+ int rc;
int i;
if (!ctx)
@@ -7035,90 +6957,93 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
if (req_len > bp->hwrm_max_ext_req_len)
req_len = BNXT_BACKING_STORE_CFG_LEGACY_LEN;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_BACKING_STORE_CFG, -1, -1);
- req.enables = cpu_to_le32(enables);
+ rc = __hwrm_req_init(bp, __req, HWRM_FUNC_BACKING_STORE_CFG, req_len);
+ if (rc)
+ return rc;
+ req->enables = cpu_to_le32(enables);
if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP) {
ctx_pg = &ctx->qp_mem;
- req.qp_num_entries = cpu_to_le32(ctx_pg->entries);
- req.qp_num_qp1_entries = cpu_to_le16(ctx->qp_min_qp1_entries);
- req.qp_num_l2_entries = cpu_to_le16(ctx->qp_max_l2_entries);
- req.qp_entry_size = cpu_to_le16(ctx->qp_entry_size);
+ req->qp_num_entries = cpu_to_le32(ctx_pg->entries);
+ req->qp_num_qp1_entries = cpu_to_le16(ctx->qp_min_qp1_entries);
+ req->qp_num_l2_entries = cpu_to_le16(ctx->qp_max_l2_entries);
+ req->qp_entry_size = cpu_to_le16(ctx->qp_entry_size);
bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
- &req.qpc_pg_size_qpc_lvl,
- &req.qpc_page_dir);
+ &req->qpc_pg_size_qpc_lvl,
+ &req->qpc_page_dir);
}
if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ) {
ctx_pg = &ctx->srq_mem;
- req.srq_num_entries = cpu_to_le32(ctx_pg->entries);
- req.srq_num_l2_entries = cpu_to_le16(ctx->srq_max_l2_entries);
- req.srq_entry_size = cpu_to_le16(ctx->srq_entry_size);
+ req->srq_num_entries = cpu_to_le32(ctx_pg->entries);
+ req->srq_num_l2_entries = cpu_to_le16(ctx->srq_max_l2_entries);
+ req->srq_entry_size = cpu_to_le16(ctx->srq_entry_size);
bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
- &req.srq_pg_size_srq_lvl,
- &req.srq_page_dir);
+ &req->srq_pg_size_srq_lvl,
+ &req->srq_page_dir);
}
if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ) {
ctx_pg = &ctx->cq_mem;
- req.cq_num_entries = cpu_to_le32(ctx_pg->entries);
- req.cq_num_l2_entries = cpu_to_le16(ctx->cq_max_l2_entries);
- req.cq_entry_size = cpu_to_le16(ctx->cq_entry_size);
- bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, &req.cq_pg_size_cq_lvl,
- &req.cq_page_dir);
+ req->cq_num_entries = cpu_to_le32(ctx_pg->entries);
+ req->cq_num_l2_entries = cpu_to_le16(ctx->cq_max_l2_entries);
+ req->cq_entry_size = cpu_to_le16(ctx->cq_entry_size);
+ bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
+ &req->cq_pg_size_cq_lvl,
+ &req->cq_page_dir);
}
if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC) {
ctx_pg = &ctx->vnic_mem;
- req.vnic_num_vnic_entries =
+ req->vnic_num_vnic_entries =
cpu_to_le16(ctx->vnic_max_vnic_entries);
- req.vnic_num_ring_table_entries =
+ req->vnic_num_ring_table_entries =
cpu_to_le16(ctx->vnic_max_ring_table_entries);
- req.vnic_entry_size = cpu_to_le16(ctx->vnic_entry_size);
+ req->vnic_entry_size = cpu_to_le16(ctx->vnic_entry_size);
bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
- &req.vnic_pg_size_vnic_lvl,
- &req.vnic_page_dir);
+ &req->vnic_pg_size_vnic_lvl,
+ &req->vnic_page_dir);
}
if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT) {
ctx_pg = &ctx->stat_mem;
- req.stat_num_entries = cpu_to_le32(ctx->stat_max_entries);
- req.stat_entry_size = cpu_to_le16(ctx->stat_entry_size);
+ req->stat_num_entries = cpu_to_le32(ctx->stat_max_entries);
+ req->stat_entry_size = cpu_to_le16(ctx->stat_entry_size);
bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
- &req.stat_pg_size_stat_lvl,
- &req.stat_page_dir);
+ &req->stat_pg_size_stat_lvl,
+ &req->stat_page_dir);
}
if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV) {
ctx_pg = &ctx->mrav_mem;
- req.mrav_num_entries = cpu_to_le32(ctx_pg->entries);
+ req->mrav_num_entries = cpu_to_le32(ctx_pg->entries);
if (ctx->mrav_num_entries_units)
flags |=
FUNC_BACKING_STORE_CFG_REQ_FLAGS_MRAV_RESERVATION_SPLIT;
- req.mrav_entry_size = cpu_to_le16(ctx->mrav_entry_size);
+ req->mrav_entry_size = cpu_to_le16(ctx->mrav_entry_size);
bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
- &req.mrav_pg_size_mrav_lvl,
- &req.mrav_page_dir);
+ &req->mrav_pg_size_mrav_lvl,
+ &req->mrav_page_dir);
}
if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM) {
ctx_pg = &ctx->tim_mem;
- req.tim_num_entries = cpu_to_le32(ctx_pg->entries);
- req.tim_entry_size = cpu_to_le16(ctx->tim_entry_size);
+ req->tim_num_entries = cpu_to_le32(ctx_pg->entries);
+ req->tim_entry_size = cpu_to_le16(ctx->tim_entry_size);
bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
- &req.tim_pg_size_tim_lvl,
- &req.tim_page_dir);
+ &req->tim_pg_size_tim_lvl,
+ &req->tim_page_dir);
}
- for (i = 0, num_entries = &req.tqm_sp_num_entries,
- pg_attr = &req.tqm_sp_pg_size_tqm_sp_lvl,
- pg_dir = &req.tqm_sp_page_dir,
+ for (i = 0, num_entries = &req->tqm_sp_num_entries,
+ pg_attr = &req->tqm_sp_pg_size_tqm_sp_lvl,
+ pg_dir = &req->tqm_sp_page_dir,
ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP;
i < BNXT_MAX_TQM_RINGS;
i++, num_entries++, pg_attr++, pg_dir++, ena <<= 1) {
if (!(enables & ena))
continue;
- req.tqm_entry_size = cpu_to_le16(ctx->tqm_entry_size);
+ req->tqm_entry_size = cpu_to_le16(ctx->tqm_entry_size);
ctx_pg = ctx->tqm_mem[i];
*num_entries = cpu_to_le32(ctx_pg->entries);
bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, pg_attr, pg_dir);
}
- req.flags = cpu_to_le32(flags);
- return hwrm_send_message(bp, &req, req_len, HWRM_CMD_TIMEOUT);
+ req->flags = cpu_to_le32(flags);
+ return hwrm_req_send(bp, req);
}
static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp,
@@ -7398,17 +7323,18 @@ skip_rdma:
int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all)
{
- struct hwrm_func_resource_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_func_resource_qcaps_input req = {0};
+ struct hwrm_func_resource_qcaps_output *resp;
+ struct hwrm_func_resource_qcaps_input *req;
struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_RESOURCE_QCAPS, -1, -1);
- req.fid = cpu_to_le16(0xffff);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_RESOURCE_QCAPS);
+ if (rc)
+ return rc;
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message_silent(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
+ req->fid = cpu_to_le16(0xffff);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send_silent(bp, req);
if (rc)
goto hwrm_func_resc_qcaps_exit;
@@ -7449,15 +7375,14 @@ int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all)
pf->vf_resv_strategy = BNXT_VF_RESV_STRATEGY_MAXIMAL;
}
hwrm_func_resc_qcaps_exit:
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
-/* bp->hwrm_cmd_lock already held. */
static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
{
- struct hwrm_port_mac_ptp_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_port_mac_ptp_qcfg_input req = {0};
+ struct hwrm_port_mac_ptp_qcfg_output *resp;
+ struct hwrm_port_mac_ptp_qcfg_input *req;
struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
u8 flags;
int rc;
@@ -7467,21 +7392,27 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
goto no_ptp;
}
- req.port_id = cpu_to_le16(bp->pf.port_id);
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_MAC_PTP_QCFG, -1, -1);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_PORT_MAC_PTP_QCFG);
if (rc)
goto no_ptp;
+ req->port_id = cpu_to_le16(bp->pf.port_id);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
+ if (rc)
+ goto exit;
+
flags = resp->flags;
if (!(flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS)) {
rc = -ENODEV;
- goto no_ptp;
+ goto exit;
}
if (!ptp) {
ptp = kzalloc(sizeof(*ptp), GFP_KERNEL);
- if (!ptp)
- return -ENOMEM;
+ if (!ptp) {
+ rc = -ENOMEM;
+ goto exit;
+ }
ptp->bp = bp;
bp->ptp_cfg = ptp;
}
@@ -7493,11 +7424,18 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
ptp->refclk_regs[1] = BNXT_TS_REG_TIMESYNC_TS0_UPPER;
} else {
rc = -ENODEV;
- goto no_ptp;
+ goto exit;
}
- return 0;
+ rc = bnxt_ptp_init(bp);
+ if (rc)
+ netdev_warn(bp->dev, "PTP initialization failed.\n");
+exit:
+ hwrm_req_drop(bp, req);
+ if (!rc)
+ return 0;
no_ptp:
+ bnxt_ptp_clear(bp);
kfree(ptp);
bp->ptp_cfg = NULL;
return rc;
@@ -7505,17 +7443,19 @@ no_ptp:
static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
{
- int rc = 0;
- struct hwrm_func_qcaps_input req = {0};
- struct hwrm_func_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
+ struct hwrm_func_qcaps_output *resp;
+ struct hwrm_func_qcaps_input *req;
struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
u32 flags, flags_ext;
+ int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCAPS, -1, -1);
- req.fid = cpu_to_le16(0xffff);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_QCAPS);
+ if (rc)
+ return rc;
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->fid = cpu_to_le16(0xffff);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (rc)
goto hwrm_func_qcaps_exit;
@@ -7540,6 +7480,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
flags_ext = le32_to_cpu(resp->flags_ext);
if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_EXT_HW_STATS_SUPPORTED)
bp->fw_cap |= BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED;
+ if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_PPS_SUPPORTED))
+ bp->fw_cap |= BNXT_FW_CAP_PTP_PPS;
bp->tx_push_thresh = 0;
if ((flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) &&
@@ -7577,6 +7519,7 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
if (flags & FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED) {
__bnxt_hwrm_ptp_qcfg(bp);
} else {
+ bnxt_ptp_clear(bp);
kfree(bp->ptp_cfg);
bp->ptp_cfg = NULL;
}
@@ -7590,7 +7533,7 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
}
hwrm_func_qcaps_exit:
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -7621,19 +7564,20 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
static int bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(struct bnxt *bp)
{
- struct hwrm_cfa_adv_flow_mgnt_qcaps_input req = {0};
struct hwrm_cfa_adv_flow_mgnt_qcaps_output *resp;
- int rc = 0;
+ struct hwrm_cfa_adv_flow_mgnt_qcaps_input *req;
u32 flags;
+ int rc;
if (!(bp->fw_cap & BNXT_FW_CAP_CFA_ADV_FLOW))
return 0;
- resp = bp->hwrm_cmd_resp_addr;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ADV_FLOW_MGNT_QCAPS, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_CFA_ADV_FLOW_MGNT_QCAPS);
+ if (rc)
+ return rc;
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (rc)
goto hwrm_cfa_adv_qcaps_exit;
@@ -7643,7 +7587,7 @@ static int bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(struct bnxt *bp)
bp->fw_cap |= BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2;
hwrm_cfa_adv_qcaps_exit:
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -7786,17 +7730,20 @@ static int bnxt_map_fw_health_regs(struct bnxt *bp)
static int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
{
- struct hwrm_error_recovery_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
struct bnxt_fw_health *fw_health = bp->fw_health;
- struct hwrm_error_recovery_qcfg_input req = {0};
+ struct hwrm_error_recovery_qcfg_output *resp;
+ struct hwrm_error_recovery_qcfg_input *req;
int rc, i;
if (!(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_ERROR_RECOVERY_QCFG, -1, -1);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_ERROR_RECOVERY_QCFG);
+ if (rc)
+ return rc;
+
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (rc)
goto err_recovery_out;
fw_health->flags = le32_to_cpu(resp->flags);
@@ -7838,7 +7785,7 @@ static int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
resp->delay_after_reset[i];
}
err_recovery_out:
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
if (!rc)
rc = bnxt_map_fw_health_regs(bp);
if (rc)
@@ -7848,12 +7795,16 @@ err_recovery_out:
static int bnxt_hwrm_func_reset(struct bnxt *bp)
{
- struct hwrm_func_reset_input req = {0};
+ struct hwrm_func_reset_input *req;
+ int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_RESET, -1, -1);
- req.enables = 0;
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_RESET);
+ if (rc)
+ return rc;
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_RESET_TIMEOUT);
+ req->enables = 0;
+ hwrm_req_timeout(bp, req, HWRM_RESET_TIMEOUT);
+ return hwrm_req_send(bp, req);
}
static void bnxt_nvm_cfg_ver_get(struct bnxt *bp)
@@ -7868,16 +7819,18 @@ static void bnxt_nvm_cfg_ver_get(struct bnxt *bp)
static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp)
{
- int rc = 0;
- struct hwrm_queue_qportcfg_input req = {0};
- struct hwrm_queue_qportcfg_output *resp = bp->hwrm_cmd_resp_addr;
+ struct hwrm_queue_qportcfg_output *resp;
+ struct hwrm_queue_qportcfg_input *req;
u8 i, j, *qptr;
bool no_rdma;
+ int rc = 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_QPORTCFG, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_QUEUE_QPORTCFG);
+ if (rc)
+ return rc;
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (rc)
goto qportcfg_exit;
@@ -7911,35 +7864,48 @@ static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp)
bp->max_lltc = bp->max_tc;
qportcfg_exit:
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
-static int __bnxt_hwrm_ver_get(struct bnxt *bp, bool silent)
+static int bnxt_hwrm_poll(struct bnxt *bp)
{
- struct hwrm_ver_get_input req = {0};
+ struct hwrm_ver_get_input *req;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VER_GET, -1, -1);
- req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
- req.hwrm_intf_min = HWRM_VERSION_MINOR;
- req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
+ rc = hwrm_req_init(bp, req, HWRM_VER_GET);
+ if (rc)
+ return rc;
+
+ req->hwrm_intf_maj = HWRM_VERSION_MAJOR;
+ req->hwrm_intf_min = HWRM_VERSION_MINOR;
+ req->hwrm_intf_upd = HWRM_VERSION_UPDATE;
- rc = bnxt_hwrm_do_send_msg(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT,
- silent);
+ hwrm_req_flags(bp, req, BNXT_HWRM_CTX_SILENT | BNXT_HWRM_FULL_WAIT);
+ rc = hwrm_req_send(bp, req);
return rc;
}
static int bnxt_hwrm_ver_get(struct bnxt *bp)
{
- struct hwrm_ver_get_output *resp = bp->hwrm_cmd_resp_addr;
+ struct hwrm_ver_get_output *resp;
+ struct hwrm_ver_get_input *req;
u16 fw_maj, fw_min, fw_bld, fw_rsv;
u32 dev_caps_cfg, hwrm_ver;
int rc, len;
+ rc = hwrm_req_init(bp, req, HWRM_VER_GET);
+ if (rc)
+ return rc;
+
+ hwrm_req_flags(bp, req, BNXT_HWRM_FULL_WAIT);
bp->hwrm_max_req_len = HWRM_MAX_REQ_LEN;
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = __bnxt_hwrm_ver_get(bp, false);
+ req->hwrm_intf_maj = HWRM_VERSION_MAJOR;
+ req->hwrm_intf_min = HWRM_VERSION_MINOR;
+ req->hwrm_intf_upd = HWRM_VERSION_UPDATE;
+
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (rc)
goto hwrm_ver_get_exit;
@@ -8031,29 +7997,33 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp)
bp->fw_cap |= BNXT_FW_CAP_CFA_ADV_FLOW;
hwrm_ver_get_exit:
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
int bnxt_hwrm_fw_set_time(struct bnxt *bp)
{
- struct hwrm_fw_set_time_input req = {0};
+ struct hwrm_fw_set_time_input *req;
struct tm tm;
time64_t now = ktime_get_real_seconds();
+ int rc;
if ((BNXT_VF(bp) && bp->hwrm_spec_code < 0x10901) ||
bp->hwrm_spec_code < 0x10400)
return -EOPNOTSUPP;
time64_to_tm(now, 0, &tm);
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_SET_TIME, -1, -1);
- req.year = cpu_to_le16(1900 + tm.tm_year);
- req.month = 1 + tm.tm_mon;
- req.day = tm.tm_mday;
- req.hour = tm.tm_hour;
- req.minute = tm.tm_min;
- req.second = tm.tm_sec;
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_FW_SET_TIME);
+ if (rc)
+ return rc;
+
+ req->year = cpu_to_le16(1900 + tm.tm_year);
+ req->month = 1 + tm.tm_mon;
+ req->day = tm.tm_mday;
+ req->hour = tm.tm_hour;
+ req->minute = tm.tm_min;
+ req->second = tm.tm_sec;
+ return hwrm_req_send(bp, req);
}
static void bnxt_add_one_ctr(u64 hw, u64 *sw, u64 mask)
@@ -8141,8 +8111,9 @@ static void bnxt_accumulate_all_stats(struct bnxt *bp)
static int bnxt_hwrm_port_qstats(struct bnxt *bp, u8 flags)
{
+ struct hwrm_port_qstats_input *req;
struct bnxt_pf_info *pf = &bp->pf;
- struct hwrm_port_qstats_input req = {0};
+ int rc;
if (!(bp->flags & BNXT_FLAG_PORT_STATS))
return 0;
@@ -8150,20 +8121,24 @@ static int bnxt_hwrm_port_qstats(struct bnxt *bp, u8 flags)
if (flags && !(bp->fw_cap & BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED))
return -EOPNOTSUPP;
- req.flags = flags;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_QSTATS, -1, -1);
- req.port_id = cpu_to_le16(pf->port_id);
- req.tx_stat_host_addr = cpu_to_le64(bp->port_stats.hw_stats_map +
+ rc = hwrm_req_init(bp, req, HWRM_PORT_QSTATS);
+ if (rc)
+ return rc;
+
+ req->flags = flags;
+ req->port_id = cpu_to_le16(pf->port_id);
+ req->tx_stat_host_addr = cpu_to_le64(bp->port_stats.hw_stats_map +
BNXT_TX_PORT_STATS_BYTE_OFFSET);
- req.rx_stat_host_addr = cpu_to_le64(bp->port_stats.hw_stats_map);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->rx_stat_host_addr = cpu_to_le64(bp->port_stats.hw_stats_map);
+ return hwrm_req_send(bp, req);
}
static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp, u8 flags)
{
- struct hwrm_port_qstats_ext_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_queue_pri2cos_qcfg_input req2 = {0};
- struct hwrm_port_qstats_ext_input req = {0};
+ struct hwrm_queue_pri2cos_qcfg_output *resp_qc;
+ struct hwrm_queue_pri2cos_qcfg_input *req_qc;
+ struct hwrm_port_qstats_ext_output *resp_qs;
+ struct hwrm_port_qstats_ext_input *req_qs;
struct bnxt_pf_info *pf = &bp->pf;
u32 tx_stat_size;
int rc;
@@ -8174,46 +8149,53 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp, u8 flags)
if (flags && !(bp->fw_cap & BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED))
return -EOPNOTSUPP;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_QSTATS_EXT, -1, -1);
- req.flags = flags;
- req.port_id = cpu_to_le16(pf->port_id);
- req.rx_stat_size = cpu_to_le16(sizeof(struct rx_port_stats_ext));
- req.rx_stat_host_addr = cpu_to_le64(bp->rx_port_stats_ext.hw_stats_map);
+ rc = hwrm_req_init(bp, req_qs, HWRM_PORT_QSTATS_EXT);
+ if (rc)
+ return rc;
+
+ req_qs->flags = flags;
+ req_qs->port_id = cpu_to_le16(pf->port_id);
+ req_qs->rx_stat_size = cpu_to_le16(sizeof(struct rx_port_stats_ext));
+ req_qs->rx_stat_host_addr = cpu_to_le64(bp->rx_port_stats_ext.hw_stats_map);
tx_stat_size = bp->tx_port_stats_ext.hw_stats ?
sizeof(struct tx_port_stats_ext) : 0;
- req.tx_stat_size = cpu_to_le16(tx_stat_size);
- req.tx_stat_host_addr = cpu_to_le64(bp->tx_port_stats_ext.hw_stats_map);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req_qs->tx_stat_size = cpu_to_le16(tx_stat_size);
+ req_qs->tx_stat_host_addr = cpu_to_le64(bp->tx_port_stats_ext.hw_stats_map);
+ resp_qs = hwrm_req_hold(bp, req_qs);
+ rc = hwrm_req_send(bp, req_qs);
if (!rc) {
- bp->fw_rx_stats_ext_size = le16_to_cpu(resp->rx_stat_size) / 8;
+ bp->fw_rx_stats_ext_size =
+ le16_to_cpu(resp_qs->rx_stat_size) / 8;
bp->fw_tx_stats_ext_size = tx_stat_size ?
- le16_to_cpu(resp->tx_stat_size) / 8 : 0;
+ le16_to_cpu(resp_qs->tx_stat_size) / 8 : 0;
} else {
bp->fw_rx_stats_ext_size = 0;
bp->fw_tx_stats_ext_size = 0;
}
+ hwrm_req_drop(bp, req_qs);
+
if (flags)
- goto qstats_done;
+ return rc;
if (bp->fw_tx_stats_ext_size <=
offsetof(struct tx_port_stats_ext, pfc_pri0_tx_duration_us) / 8) {
- mutex_unlock(&bp->hwrm_cmd_lock);
bp->pri2cos_valid = 0;
return rc;
}
- bnxt_hwrm_cmd_hdr_init(bp, &req2, HWRM_QUEUE_PRI2COS_QCFG, -1, -1);
- req2.flags = cpu_to_le32(QUEUE_PRI2COS_QCFG_REQ_FLAGS_IVLAN);
+ rc = hwrm_req_init(bp, req_qc, HWRM_QUEUE_PRI2COS_QCFG);
+ if (rc)
+ return rc;
+
+ req_qc->flags = cpu_to_le32(QUEUE_PRI2COS_QCFG_REQ_FLAGS_IVLAN);
- rc = _hwrm_send_message(bp, &req2, sizeof(req2), HWRM_CMD_TIMEOUT);
+ resp_qc = hwrm_req_hold(bp, req_qc);
+ rc = hwrm_req_send(bp, req_qc);
if (!rc) {
- struct hwrm_queue_pri2cos_qcfg_output *resp2;
u8 *pri2cos;
int i, j;
- resp2 = bp->hwrm_cmd_resp_addr;
- pri2cos = &resp2->pri0_cos_queue_id;
+ pri2cos = &resp_qc->pri0_cos_queue_id;
for (i = 0; i < 8; i++) {
u8 queue_id = pri2cos[i];
u8 queue_idx;
@@ -8222,17 +8204,18 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp, u8 flags)
queue_idx = queue_id % 10;
if (queue_idx > BNXT_MAX_QUEUE) {
bp->pri2cos_valid = false;
- goto qstats_done;
+ hwrm_req_drop(bp, req_qc);
+ return rc;
}
for (j = 0; j < bp->max_q; j++) {
if (bp->q_ids[j] == queue_id)
bp->pri2cos_idx[i] = queue_idx;
}
}
- bp->pri2cos_valid = 1;
+ bp->pri2cos_valid = true;
}
-qstats_done:
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req_qc);
+
return rc;
}
@@ -8307,35 +8290,46 @@ static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path,
static int bnxt_hwrm_set_br_mode(struct bnxt *bp, u16 br_mode)
{
- struct hwrm_func_cfg_input req = {0};
+ struct hwrm_func_cfg_input *req;
+ u8 evb_mode;
+ int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
- req.fid = cpu_to_le16(0xffff);
- req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_EVB_MODE);
if (br_mode == BRIDGE_MODE_VEB)
- req.evb_mode = FUNC_CFG_REQ_EVB_MODE_VEB;
+ evb_mode = FUNC_CFG_REQ_EVB_MODE_VEB;
else if (br_mode == BRIDGE_MODE_VEPA)
- req.evb_mode = FUNC_CFG_REQ_EVB_MODE_VEPA;
+ evb_mode = FUNC_CFG_REQ_EVB_MODE_VEPA;
else
return -EINVAL;
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+ if (rc)
+ return rc;
+
+ req->fid = cpu_to_le16(0xffff);
+ req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_EVB_MODE);
+ req->evb_mode = evb_mode;
+ return hwrm_req_send(bp, req);
}
static int bnxt_hwrm_set_cache_line_size(struct bnxt *bp, int size)
{
- struct hwrm_func_cfg_input req = {0};
+ struct hwrm_func_cfg_input *req;
+ int rc;
if (BNXT_VF(bp) || bp->hwrm_spec_code < 0x10803)
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
- req.fid = cpu_to_le16(0xffff);
- req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_CACHE_LINESIZE);
- req.options = FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_64;
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+ if (rc)
+ return rc;
+
+ req->fid = cpu_to_le16(0xffff);
+ req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_CACHE_LINESIZE);
+ req->options = FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_64;
if (size == 128)
- req.options = FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_128;
+ req->options = FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_128;
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ return hwrm_req_send(bp, req);
}
static int __bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id)
@@ -9125,10 +9119,9 @@ static void bnxt_disable_napi(struct bnxt *bp)
for (i = 0; i < bp->cp_nr_rings; i++) {
struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
+ napi_disable(&bp->bnapi[i]->napi);
if (bp->bnapi[i]->rx_ring)
cancel_work_sync(&cpr->dim.work);
-
- napi_disable(&bp->bnapi[i]->napi);
}
}
@@ -9162,9 +9155,11 @@ void bnxt_tx_disable(struct bnxt *bp)
if (bp->tx_ring) {
for (i = 0; i < bp->tx_nr_rings; i++) {
txr = &bp->tx_ring[i];
- txr->dev_state = BNXT_DEV_STATE_CLOSING;
+ WRITE_ONCE(txr->dev_state, BNXT_DEV_STATE_CLOSING);
}
}
+ /* Make sure napi polls see @dev_state change */
+ synchronize_net();
/* Drop carrier first to prevent TX timeout */
netif_carrier_off(bp->dev);
/* Stop all TX queues */
@@ -9178,8 +9173,10 @@ void bnxt_tx_enable(struct bnxt *bp)
for (i = 0; i < bp->tx_nr_rings; i++) {
txr = &bp->tx_ring[i];
- txr->dev_state = 0;
+ WRITE_ONCE(txr->dev_state, 0);
}
+ /* Make sure napi polls see @dev_state change */
+ synchronize_net();
netif_tx_wake_all_queues(bp->dev);
if (bp->link_info.link_up)
netif_carrier_on(bp->dev);
@@ -9280,18 +9277,20 @@ static bool bnxt_phy_qcaps_no_speed(struct hwrm_port_phy_qcaps_output *resp)
static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
{
- int rc = 0;
- struct hwrm_port_phy_qcaps_input req = {0};
- struct hwrm_port_phy_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
struct bnxt_link_info *link_info = &bp->link_info;
+ struct hwrm_port_phy_qcaps_output *resp;
+ struct hwrm_port_phy_qcaps_input *req;
+ int rc = 0;
if (bp->hwrm_spec_code < 0x10201)
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_QCAPS, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_QCAPS);
+ if (rc)
+ return rc;
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (rc)
goto hwrm_phy_qcaps_exit;
@@ -9329,7 +9328,7 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
bp->port_count = resp->port_cnt;
hwrm_phy_qcaps_exit:
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -9342,19 +9341,21 @@ static bool bnxt_support_dropped(u16 advertising, u16 supported)
int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
{
- int rc = 0;
struct bnxt_link_info *link_info = &bp->link_info;
- struct hwrm_port_phy_qcfg_input req = {0};
- struct hwrm_port_phy_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+ struct hwrm_port_phy_qcfg_output *resp;
+ struct hwrm_port_phy_qcfg_input *req;
u8 link_up = link_info->link_up;
bool support_changed = false;
+ int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_QCFG, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_QCFG);
+ if (rc)
+ return rc;
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (rc) {
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -9449,7 +9450,7 @@ int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
/* alwasy link down if not require to update link state */
link_info->link_up = 0;
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
if (!BNXT_PHY_CFG_ABLE(bp))
return 0;
@@ -9559,18 +9560,20 @@ static void bnxt_hwrm_set_link_common(struct bnxt *bp, struct hwrm_port_phy_cfg_
int bnxt_hwrm_set_pause(struct bnxt *bp)
{
- struct hwrm_port_phy_cfg_input req = {0};
+ struct hwrm_port_phy_cfg_input *req;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1);
- bnxt_hwrm_set_pause_common(bp, &req);
+ rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_CFG);
+ if (rc)
+ return rc;
+
+ bnxt_hwrm_set_pause_common(bp, req);
if ((bp->link_info.autoneg & BNXT_AUTONEG_FLOW_CTRL) ||
bp->link_info.force_link_chng)
- bnxt_hwrm_set_link_common(bp, &req);
+ bnxt_hwrm_set_link_common(bp, req);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, req);
if (!rc && !(bp->link_info.autoneg & BNXT_AUTONEG_FLOW_CTRL)) {
/* since changing of pause setting doesn't trigger any link
* change event, the driver needs to update the current pause
@@ -9583,7 +9586,6 @@ int bnxt_hwrm_set_pause(struct bnxt *bp)
bnxt_report_link(bp);
}
bp->link_info.force_link_chng = false;
- mutex_unlock(&bp->hwrm_cmd_lock);
return rc;
}
@@ -9612,22 +9614,27 @@ static void bnxt_hwrm_set_eee(struct bnxt *bp,
int bnxt_hwrm_set_link_setting(struct bnxt *bp, bool set_pause, bool set_eee)
{
- struct hwrm_port_phy_cfg_input req = {0};
+ struct hwrm_port_phy_cfg_input *req;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_CFG);
+ if (rc)
+ return rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1);
if (set_pause)
- bnxt_hwrm_set_pause_common(bp, &req);
+ bnxt_hwrm_set_pause_common(bp, req);
- bnxt_hwrm_set_link_common(bp, &req);
+ bnxt_hwrm_set_link_common(bp, req);
if (set_eee)
- bnxt_hwrm_set_eee(bp, &req);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ bnxt_hwrm_set_eee(bp, req);
+ return hwrm_req_send(bp, req);
}
static int bnxt_hwrm_shutdown_link(struct bnxt *bp)
{
- struct hwrm_port_phy_cfg_input req = {0};
+ struct hwrm_port_phy_cfg_input *req;
+ int rc;
if (!BNXT_SINGLE_PF(bp))
return 0;
@@ -9636,9 +9643,12 @@ static int bnxt_hwrm_shutdown_link(struct bnxt *bp)
!(bp->phy_flags & BNXT_PHY_FL_FW_MANAGED_LKDN))
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1);
- req.flags = cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DWN);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_CFG);
+ if (rc)
+ return rc;
+
+ req->flags = cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DWN);
+ return hwrm_req_send(bp, req);
}
static int bnxt_fw_init_one(struct bnxt *bp);
@@ -9664,16 +9674,14 @@ static int bnxt_try_recover_fw(struct bnxt *bp)
int retry = 0, rc;
u32 sts;
- mutex_lock(&bp->hwrm_cmd_lock);
do {
sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
- rc = __bnxt_hwrm_ver_get(bp, true);
+ rc = bnxt_hwrm_poll(bp);
if (!BNXT_FW_IS_BOOTING(sts) &&
!BNXT_FW_IS_RECOVERING(sts))
break;
retry++;
} while (rc == -EBUSY && retry < BNXT_FW_RETRY);
- mutex_unlock(&bp->hwrm_cmd_lock);
if (!BNXT_FW_IS_HEALTHY(sts)) {
netdev_err(bp->dev,
@@ -9693,8 +9701,8 @@ static int bnxt_try_recover_fw(struct bnxt *bp)
static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
{
- struct hwrm_func_drv_if_change_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_func_drv_if_change_input req = {0};
+ struct hwrm_func_drv_if_change_output *resp;
+ struct hwrm_func_drv_if_change_input *req;
bool fw_reset = !bp->irq_tbl;
bool resc_reinit = false;
int rc, retry = 0;
@@ -9703,29 +9711,34 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
if (!(bp->fw_cap & BNXT_FW_CAP_IF_CHANGE))
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_DRV_IF_CHANGE, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_DRV_IF_CHANGE);
+ if (rc)
+ return rc;
+
if (up)
- req.flags = cpu_to_le32(FUNC_DRV_IF_CHANGE_REQ_FLAGS_UP);
- mutex_lock(&bp->hwrm_cmd_lock);
+ req->flags = cpu_to_le32(FUNC_DRV_IF_CHANGE_REQ_FLAGS_UP);
+ resp = hwrm_req_hold(bp, req);
+
+ hwrm_req_flags(bp, req, BNXT_HWRM_FULL_WAIT);
while (retry < BNXT_FW_IF_RETRY) {
- rc = _hwrm_send_message(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, req);
if (rc != -EAGAIN)
break;
msleep(50);
retry++;
}
- if (!rc)
- flags = le32_to_cpu(resp->flags);
- mutex_unlock(&bp->hwrm_cmd_lock);
- if (rc == -EAGAIN)
+ if (rc == -EAGAIN) {
+ hwrm_req_drop(bp, req);
return rc;
- if (rc && up) {
+ } else if (!rc) {
+ flags = le32_to_cpu(resp->flags);
+ } else if (up) {
rc = bnxt_try_recover_fw(bp);
fw_reset = true;
}
+ hwrm_req_drop(bp, req);
if (rc)
return rc;
@@ -9794,8 +9807,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
static int bnxt_hwrm_port_led_qcaps(struct bnxt *bp)
{
- struct hwrm_port_led_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_port_led_qcaps_input req = {0};
+ struct hwrm_port_led_qcaps_output *resp;
+ struct hwrm_port_led_qcaps_input *req;
struct bnxt_pf_info *pf = &bp->pf;
int rc;
@@ -9803,12 +9816,15 @@ static int bnxt_hwrm_port_led_qcaps(struct bnxt *bp)
if (BNXT_VF(bp) || bp->hwrm_spec_code < 0x10601)
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_LED_QCAPS, -1, -1);
- req.port_id = cpu_to_le16(pf->port_id);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_PORT_LED_QCAPS);
+ if (rc)
+ return rc;
+
+ req->port_id = cpu_to_le16(pf->port_id);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (rc) {
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
if (resp->num_leds > 0 && resp->num_leds < BNXT_MAX_LED) {
@@ -9828,52 +9844,64 @@ static int bnxt_hwrm_port_led_qcaps(struct bnxt *bp)
}
}
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return 0;
}
int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp)
{
- struct hwrm_wol_filter_alloc_input req = {0};
- struct hwrm_wol_filter_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+ struct hwrm_wol_filter_alloc_output *resp;
+ struct hwrm_wol_filter_alloc_input *req;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_WOL_FILTER_ALLOC, -1, -1);
- req.port_id = cpu_to_le16(bp->pf.port_id);
- req.wol_type = WOL_FILTER_ALLOC_REQ_WOL_TYPE_MAGICPKT;
- req.enables = cpu_to_le32(WOL_FILTER_ALLOC_REQ_ENABLES_MAC_ADDRESS);
- memcpy(req.mac_address, bp->dev->dev_addr, ETH_ALEN);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_WOL_FILTER_ALLOC);
+ if (rc)
+ return rc;
+
+ req->port_id = cpu_to_le16(bp->pf.port_id);
+ req->wol_type = WOL_FILTER_ALLOC_REQ_WOL_TYPE_MAGICPKT;
+ req->enables = cpu_to_le32(WOL_FILTER_ALLOC_REQ_ENABLES_MAC_ADDRESS);
+ memcpy(req->mac_address, bp->dev->dev_addr, ETH_ALEN);
+
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc)
bp->wol_filter_id = resp->wol_filter_id;
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
int bnxt_hwrm_free_wol_fltr(struct bnxt *bp)
{
- struct hwrm_wol_filter_free_input req = {0};
+ struct hwrm_wol_filter_free_input *req;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_WOL_FILTER_FREE);
+ if (rc)
+ return rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_WOL_FILTER_FREE, -1, -1);
- req.port_id = cpu_to_le16(bp->pf.port_id);
- req.enables = cpu_to_le32(WOL_FILTER_FREE_REQ_ENABLES_WOL_FILTER_ID);
- req.wol_filter_id = bp->wol_filter_id;
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->port_id = cpu_to_le16(bp->pf.port_id);
+ req->enables = cpu_to_le32(WOL_FILTER_FREE_REQ_ENABLES_WOL_FILTER_ID);
+ req->wol_filter_id = bp->wol_filter_id;
+
+ return hwrm_req_send(bp, req);
}
static u16 bnxt_hwrm_get_wol_fltrs(struct bnxt *bp, u16 handle)
{
- struct hwrm_wol_filter_qcfg_input req = {0};
- struct hwrm_wol_filter_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+ struct hwrm_wol_filter_qcfg_output *resp;
+ struct hwrm_wol_filter_qcfg_input *req;
u16 next_handle = 0;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_WOL_FILTER_QCFG, -1, -1);
- req.port_id = cpu_to_le16(bp->pf.port_id);
- req.handle = cpu_to_le16(handle);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_WOL_FILTER_QCFG);
+ if (rc)
+ return rc;
+
+ req->port_id = cpu_to_le16(bp->pf.port_id);
+ req->handle = cpu_to_le16(handle);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc) {
next_handle = le16_to_cpu(resp->next_handle);
if (next_handle != 0) {
@@ -9884,7 +9912,7 @@ static u16 bnxt_hwrm_get_wol_fltrs(struct bnxt *bp, u16 handle)
}
}
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return next_handle;
}
@@ -9905,19 +9933,20 @@ static void bnxt_get_wol_settings(struct bnxt *bp)
static ssize_t bnxt_show_temp(struct device *dev,
struct device_attribute *devattr, char *buf)
{
- struct hwrm_temp_monitor_query_input req = {0};
struct hwrm_temp_monitor_query_output *resp;
+ struct hwrm_temp_monitor_query_input *req;
struct bnxt *bp = dev_get_drvdata(dev);
u32 len = 0;
int rc;
- resp = bp->hwrm_cmd_resp_addr;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TEMP_MONITOR_QUERY, -1, -1);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_TEMP_MONITOR_QUERY);
+ if (rc)
+ return rc;
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc)
len = sprintf(buf, "%u\n", resp->temp * 1000); /* display millidegree */
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
if (rc)
return rc;
return len;
@@ -9940,12 +9969,13 @@ static void bnxt_hwmon_close(struct bnxt *bp)
static void bnxt_hwmon_open(struct bnxt *bp)
{
- struct hwrm_temp_monitor_query_input req = {0};
+ struct hwrm_temp_monitor_query_input *req;
struct pci_dev *pdev = bp->pdev;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TEMP_MONITOR_QUERY, -1, -1);
- rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_TEMP_MONITOR_QUERY);
+ if (!rc)
+ rc = hwrm_req_send_silent(bp, req);
if (rc == -EACCES || rc == -EOPNOTSUPP) {
bnxt_hwmon_close(bp);
return;
@@ -10170,7 +10200,9 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
bnxt_tx_enable(bp);
mod_timer(&bp->timer, jiffies + bp->current_interval);
/* Poll link status and check for SFP+ module status */
+ mutex_lock(&bp->link_lock);
bnxt_get_port_module_status(bp);
+ mutex_unlock(&bp->link_lock);
/* VF-reps may need to be re-opened after the PF is re-opened */
if (BNXT_PF(bp))
@@ -10277,15 +10309,9 @@ static int bnxt_open(struct net_device *dev)
if (rc)
return rc;
- if (bnxt_ptp_init(bp)) {
- netdev_warn(dev, "PTP initialization failed.\n");
- kfree(bp->ptp_cfg);
- bp->ptp_cfg = NULL;
- }
rc = __bnxt_open_nic(bp, true, true);
if (rc) {
bnxt_hwrm_if_change(bp, false);
- bnxt_ptp_clear(bp);
} else {
if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) {
if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
@@ -10376,7 +10402,6 @@ static int bnxt_close(struct net_device *dev)
{
struct bnxt *bp = netdev_priv(dev);
- bnxt_ptp_clear(bp);
bnxt_hwmon_close(bp);
bnxt_close_nic(bp, true, true);
bnxt_hwrm_shutdown_link(bp);
@@ -10387,53 +10412,60 @@ static int bnxt_close(struct net_device *dev)
static int bnxt_hwrm_port_phy_read(struct bnxt *bp, u16 phy_addr, u16 reg,
u16 *val)
{
- struct hwrm_port_phy_mdio_read_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_port_phy_mdio_read_input req = {0};
+ struct hwrm_port_phy_mdio_read_output *resp;
+ struct hwrm_port_phy_mdio_read_input *req;
int rc;
if (bp->hwrm_spec_code < 0x10a00)
return -EOPNOTSUPP;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_MDIO_READ, -1, -1);
- req.port_id = cpu_to_le16(bp->pf.port_id);
- req.phy_addr = phy_addr;
- req.reg_addr = cpu_to_le16(reg & 0x1f);
+ rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_MDIO_READ);
+ if (rc)
+ return rc;
+
+ req->port_id = cpu_to_le16(bp->pf.port_id);
+ req->phy_addr = phy_addr;
+ req->reg_addr = cpu_to_le16(reg & 0x1f);
if (mdio_phy_id_is_c45(phy_addr)) {
- req.cl45_mdio = 1;
- req.phy_addr = mdio_phy_id_prtad(phy_addr);
- req.dev_addr = mdio_phy_id_devad(phy_addr);
- req.reg_addr = cpu_to_le16(reg);
+ req->cl45_mdio = 1;
+ req->phy_addr = mdio_phy_id_prtad(phy_addr);
+ req->dev_addr = mdio_phy_id_devad(phy_addr);
+ req->reg_addr = cpu_to_le16(reg);
}
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc)
*val = le16_to_cpu(resp->reg_data);
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
static int bnxt_hwrm_port_phy_write(struct bnxt *bp, u16 phy_addr, u16 reg,
u16 val)
{
- struct hwrm_port_phy_mdio_write_input req = {0};
+ struct hwrm_port_phy_mdio_write_input *req;
+ int rc;
if (bp->hwrm_spec_code < 0x10a00)
return -EOPNOTSUPP;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_MDIO_WRITE, -1, -1);
- req.port_id = cpu_to_le16(bp->pf.port_id);
- req.phy_addr = phy_addr;
- req.reg_addr = cpu_to_le16(reg & 0x1f);
+ rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_MDIO_WRITE);
+ if (rc)
+ return rc;
+
+ req->port_id = cpu_to_le16(bp->pf.port_id);
+ req->phy_addr = phy_addr;
+ req->reg_addr = cpu_to_le16(reg & 0x1f);
if (mdio_phy_id_is_c45(phy_addr)) {
- req.cl45_mdio = 1;
- req.phy_addr = mdio_phy_id_prtad(phy_addr);
- req.dev_addr = mdio_phy_id_devad(phy_addr);
- req.reg_addr = cpu_to_le16(reg);
+ req->cl45_mdio = 1;
+ req->phy_addr = mdio_phy_id_prtad(phy_addr);
+ req->dev_addr = mdio_phy_id_devad(phy_addr);
+ req->reg_addr = cpu_to_le16(reg);
}
- req.reg_data = cpu_to_le16(val);
+ req->reg_data = cpu_to_le16(val);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ return hwrm_req_send(bp, req);
}
/* rtnl_lock held */
@@ -10512,6 +10544,10 @@ static void bnxt_get_ring_stats(struct bnxt *bp,
stats->multicast += BNXT_GET_RING_STATS64(sw, rx_mcast_pkts);
stats->tx_dropped += BNXT_GET_RING_STATS64(sw, tx_error_pkts);
+
+ stats->rx_dropped +=
+ cpr->sw_stats.rx.rx_netpoll_discards +
+ cpr->sw_stats.rx.rx_oom_discards;
}
}
@@ -10526,6 +10562,7 @@ static void bnxt_add_prev_stats(struct bnxt *bp,
stats->tx_bytes += prev_stats->tx_bytes;
stats->rx_missed_errors += prev_stats->rx_missed_errors;
stats->multicast += prev_stats->multicast;
+ stats->rx_dropped += prev_stats->rx_dropped;
stats->tx_dropped += prev_stats->tx_dropped;
}
@@ -10670,6 +10707,7 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp)
{
struct net_device *dev = bp->dev;
struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
+ struct hwrm_cfa_l2_filter_free_input *req;
struct netdev_hw_addr *ha;
int i, off = 0, rc;
bool uc_update;
@@ -10681,19 +10719,16 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp)
if (!uc_update)
goto skip_uc;
- mutex_lock(&bp->hwrm_cmd_lock);
+ rc = hwrm_req_init(bp, req, HWRM_CFA_L2_FILTER_FREE);
+ if (rc)
+ return rc;
+ hwrm_req_hold(bp, req);
for (i = 1; i < vnic->uc_filter_count; i++) {
- struct hwrm_cfa_l2_filter_free_input req = {0};
-
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_L2_FILTER_FREE, -1,
- -1);
-
- req.l2_filter_id = vnic->fw_l2_filter_id[i];
+ req->l2_filter_id = vnic->fw_l2_filter_id[i];
- rc = _hwrm_send_message(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, req);
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
vnic->uc_filter_count = 1;
@@ -10765,6 +10800,9 @@ static bool bnxt_rfs_supported(struct bnxt *bp)
return true;
return false;
}
+ /* 212 firmware is broken for aRFS */
+ if (BNXT_FW_MAJ(bp) == 212)
+ return false;
if (BNXT_PF(bp) && !BNXT_CHIP_TYPE_NITRO_A0(bp))
return true;
if (bp->flags & BNXT_FLAG_NEW_RSS_CAP)
@@ -11042,22 +11080,30 @@ static netdev_features_t bnxt_features_check(struct sk_buff *skb,
int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words,
u32 *reg_buf)
{
- struct hwrm_dbg_read_direct_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_dbg_read_direct_input req = {0};
+ struct hwrm_dbg_read_direct_output *resp;
+ struct hwrm_dbg_read_direct_input *req;
__le32 *dbg_reg_buf;
dma_addr_t mapping;
int rc, i;
- dbg_reg_buf = dma_alloc_coherent(&bp->pdev->dev, num_words * 4,
- &mapping, GFP_KERNEL);
- if (!dbg_reg_buf)
- return -ENOMEM;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_DBG_READ_DIRECT, -1, -1);
- req.host_dest_addr = cpu_to_le64(mapping);
- req.read_addr = cpu_to_le32(reg_off + CHIMP_REG_VIEW_ADDR);
- req.read_len32 = cpu_to_le32(num_words);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_DBG_READ_DIRECT);
+ if (rc)
+ return rc;
+
+ dbg_reg_buf = hwrm_req_dma_slice(bp, req, num_words * 4,
+ &mapping);
+ if (!dbg_reg_buf) {
+ rc = -ENOMEM;
+ goto dbg_rd_reg_exit;
+ }
+
+ req->host_dest_addr = cpu_to_le64(mapping);
+
+ resp = hwrm_req_hold(bp, req);
+ req->read_addr = cpu_to_le32(reg_off + CHIMP_REG_VIEW_ADDR);
+ req->read_len32 = cpu_to_le32(num_words);
+
+ rc = hwrm_req_send(bp, req);
if (rc || resp->error_code) {
rc = -EIO;
goto dbg_rd_reg_exit;
@@ -11066,28 +11112,30 @@ int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words,
reg_buf[i] = le32_to_cpu(dbg_reg_buf[i]);
dbg_rd_reg_exit:
- mutex_unlock(&bp->hwrm_cmd_lock);
- dma_free_coherent(&bp->pdev->dev, num_words * 4, dbg_reg_buf, mapping);
+ hwrm_req_drop(bp, req);
return rc;
}
static int bnxt_dbg_hwrm_ring_info_get(struct bnxt *bp, u8 ring_type,
u32 ring_id, u32 *prod, u32 *cons)
{
- struct hwrm_dbg_ring_info_get_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_dbg_ring_info_get_input req = {0};
+ struct hwrm_dbg_ring_info_get_output *resp;
+ struct hwrm_dbg_ring_info_get_input *req;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_DBG_RING_INFO_GET, -1, -1);
- req.ring_type = ring_type;
- req.fw_ring_id = cpu_to_le32(ring_id);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_DBG_RING_INFO_GET);
+ if (rc)
+ return rc;
+
+ req->ring_type = ring_type;
+ req->fw_ring_id = cpu_to_le32(ring_id);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc) {
*prod = le32_to_cpu(resp->producer_index);
*cons = le32_to_cpu(resp->consumer_index);
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -11145,18 +11193,22 @@ static void bnxt_dbg_dump_states(struct bnxt *bp)
static int bnxt_hwrm_rx_ring_reset(struct bnxt *bp, int ring_nr)
{
struct bnxt_rx_ring_info *rxr = &bp->rx_ring[ring_nr];
- struct hwrm_ring_reset_input req = {0};
+ struct hwrm_ring_reset_input *req;
struct bnxt_napi *bnapi = rxr->bnapi;
struct bnxt_cp_ring_info *cpr;
u16 cp_ring_id;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_RING_RESET);
+ if (rc)
+ return rc;
cpr = &bnapi->cp_ring;
cp_ring_id = cpr->cp_ring_struct.fw_ring_id;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_RESET, cp_ring_id, -1);
- req.ring_type = RING_RESET_REQ_RING_TYPE_RX_RING_GRP;
- req.ring_id = cpu_to_le16(bp->grp_info[bnapi->index].fw_grp_id);
- return hwrm_send_message_silent(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
+ req->cmpl_ring = cpu_to_le16(cp_ring_id);
+ req->ring_type = RING_RESET_REQ_RING_TYPE_RX_RING_GRP;
+ req->ring_id = cpu_to_le16(bp->grp_info[bnapi->index].fw_grp_id);
+ return hwrm_req_send_silent(bp, req);
}
static void bnxt_reset_task(struct bnxt *bp, bool silent)
@@ -11363,7 +11415,6 @@ static void bnxt_fw_reset_close(struct bnxt *bp)
bnxt_clear_int_mode(bp);
pci_disable_device(bp->pdev);
}
- bnxt_ptp_clear(bp);
__bnxt_close_nic(bp, true, false);
bnxt_vf_reps_free(bp);
bnxt_clear_int_mode(bp);
@@ -11399,13 +11450,20 @@ static bool is_bnxt_fw_ok(struct bnxt *bp)
static void bnxt_force_fw_reset(struct bnxt *bp)
{
struct bnxt_fw_health *fw_health = bp->fw_health;
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
u32 wait_dsecs;
if (!test_bit(BNXT_STATE_OPEN, &bp->state) ||
test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
return;
- set_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+ if (ptp) {
+ spin_lock_bh(&ptp->ptp_lock);
+ set_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+ spin_unlock_bh(&ptp->ptp_lock);
+ } else {
+ set_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+ }
bnxt_fw_reset_close(bp);
wait_dsecs = fw_health->master_func_wait_dsecs;
if (fw_health->master) {
@@ -11461,9 +11519,16 @@ void bnxt_fw_reset(struct bnxt *bp)
bnxt_rtnl_lock_sp(bp);
if (test_bit(BNXT_STATE_OPEN, &bp->state) &&
!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
int n = 0, tmo;
- set_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+ if (ptp) {
+ spin_lock_bh(&ptp->ptp_lock);
+ set_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+ spin_unlock_bh(&ptp->ptp_lock);
+ } else {
+ set_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+ }
if (bp->pf.active_vfs &&
!test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
n = bnxt_get_registered_vfs(bp);
@@ -11572,12 +11637,15 @@ static void bnxt_init_ethtool_link_settings(struct bnxt *bp)
static void bnxt_fw_echo_reply(struct bnxt *bp)
{
struct bnxt_fw_health *fw_health = bp->fw_health;
- struct hwrm_func_echo_response_input req = {0};
+ struct hwrm_func_echo_response_input *req;
+ int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_ECHO_RESPONSE, -1, -1);
- req.event_data1 = cpu_to_le32(fw_health->echo_req_data1);
- req.event_data2 = cpu_to_le32(fw_health->echo_req_data2);
- hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_ECHO_RESPONSE);
+ if (rc)
+ return;
+ req->event_data1 = cpu_to_le32(fw_health->echo_req_data1);
+ req->event_data2 = cpu_to_le32(fw_health->echo_req_data2);
+ hwrm_req_send(bp, req);
}
static void bnxt_sp_task(struct work_struct *work)
@@ -11782,18 +11850,6 @@ static int bnxt_fw_init_one_p1(struct bnxt *bp)
return rc;
}
- if (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL) {
- rc = bnxt_alloc_kong_hwrm_resources(bp);
- if (rc)
- bp->fw_cap &= ~BNXT_FW_CAP_KONG_MB_CHNL;
- }
-
- if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
- bp->hwrm_max_ext_req_len > BNXT_HWRM_MAX_REQ_LEN) {
- rc = bnxt_alloc_hwrm_short_cmd_req(bp);
- if (rc)
- return rc;
- }
bnxt_nvm_cfg_ver_get(bp);
rc = bnxt_hwrm_func_reset(bp);
@@ -11968,14 +12024,16 @@ static void bnxt_reset_all(struct bnxt *bp)
for (i = 0; i < fw_health->fw_reset_seq_cnt; i++)
bnxt_fw_reset_writel(bp, i);
} else if (fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU) {
- struct hwrm_fw_reset_input req = {0};
-
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_RESET, -1, -1);
- req.resp_addr = cpu_to_le64(bp->hwrm_cmd_kong_resp_dma_addr);
- req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP;
- req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP;
- req.flags = FW_RESET_REQ_FLAGS_RESET_GRACEFUL;
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ struct hwrm_fw_reset_input *req;
+
+ rc = hwrm_req_init(bp, req, HWRM_FW_RESET);
+ if (!rc) {
+ req->target_id = cpu_to_le16(HWRM_TARGET_ID_KONG);
+ req->embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP;
+ req->selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP;
+ req->flags = FW_RESET_REQ_FLAGS_RESET_GRACEFUL;
+ rc = hwrm_req_send(bp, req);
+ }
if (rc != -ENODEV)
netdev_warn(bp->dev, "Unable to reset FW rc=%d\n", rc);
}
@@ -12102,7 +12160,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
fallthrough;
case BNXT_FW_RESET_STATE_POLL_FW:
bp->hwrm_cmd_timeout = SHORT_HWRM_CMD_TIMEOUT;
- rc = __bnxt_hwrm_ver_get(bp, true);
+ rc = bnxt_hwrm_poll(bp);
if (rc) {
if (bnxt_fw_reset_timeout(bp)) {
netdev_err(bp->dev, "Firmware reset aborted\n");
@@ -12135,6 +12193,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
bnxt_reenable_sriov(bp);
bnxt_vf_reps_alloc(bp);
bnxt_vf_reps_open(bp);
+ bnxt_ptp_reapply_pps(bp);
bnxt_dl_health_recovery_done(bp);
bnxt_dl_health_status_update(bp, true);
rtnl_unlock();
@@ -12666,7 +12725,7 @@ static const struct net_device_ops bnxt_netdev_ops = {
.ndo_stop = bnxt_close,
.ndo_get_stats64 = bnxt_get_stats64,
.ndo_set_rx_mode = bnxt_set_rx_mode,
- .ndo_do_ioctl = bnxt_ioctl,
+ .ndo_eth_ioctl = bnxt_ioctl,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = bnxt_change_mac_addr,
.ndo_change_mtu = bnxt_change_mtu,
@@ -12705,6 +12764,7 @@ static void bnxt_remove_one(struct pci_dev *pdev)
if (BNXT_PF(bp))
devlink_port_type_clear(&bp->dl_port);
+ bnxt_ptp_clear(bp);
pci_disable_pcie_error_reporting(pdev);
unregister_netdev(dev);
clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
@@ -12720,7 +12780,6 @@ static void bnxt_remove_one(struct pci_dev *pdev)
bnxt_clear_int_mode(bp);
bnxt_hwrm_func_drv_unrgtr(bp);
bnxt_free_hwrm_resources(bp);
- bnxt_free_hwrm_short_cmd_req(bp);
bnxt_ethtool_free(bp);
bnxt_dcb_free(bp);
kfree(bp->edev);
@@ -12758,8 +12817,10 @@ static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt)
if (!fw_dflt)
return 0;
+ mutex_lock(&bp->link_lock);
rc = bnxt_update_link(bp, false);
if (rc) {
+ mutex_unlock(&bp->link_lock);
netdev_err(bp->dev, "Probe phy can't update link (rc: %x)\n",
rc);
return rc;
@@ -12772,6 +12833,7 @@ static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt)
link_info->support_auto_speeds = link_info->support_speeds;
bnxt_init_ethtool_link_settings(bp);
+ mutex_unlock(&bp->link_lock);
return 0;
}
@@ -13043,6 +13105,12 @@ static void bnxt_vpd_read_info(struct bnxt *bp)
goto exit;
}
+ i = pci_vpd_find_tag(vpd_data, vpd_size, PCI_VPD_LRDT_RO_DATA);
+ if (i < 0) {
+ netdev_err(bp->dev, "VPD READ-Only not found\n");
+ goto exit;
+ }
+
ro_size = pci_vpd_lrdt_size(&vpd_data[i]);
i += PCI_VPD_LRDT_TAG_SIZE;
if (i + ro_size > vpd_size)
@@ -13314,9 +13382,9 @@ init_err_cleanup:
init_err_pci_clean:
bnxt_hwrm_func_drv_unrgtr(bp);
- bnxt_free_hwrm_short_cmd_req(bp);
bnxt_free_hwrm_resources(bp);
bnxt_ethtool_free(bp);
+ bnxt_ptp_clear(bp);
kfree(bp->ptp_cfg);
bp->ptp_cfg = NULL;
kfree(bp->fw_health);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index bcf8d00b8c80..a8212dcdad5f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -496,6 +496,16 @@ struct rx_tpa_end_cmp_ext {
!!((data1) & \
ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_RECOVERY_ENABLED)
+#define BNXT_EVENT_ERROR_REPORT_TYPE(data1) \
+ (((data1) & \
+ ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK) >>\
+ ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT)
+
+#define BNXT_EVENT_INVALID_SIGNAL_DATA(data2) \
+ (((data2) & \
+ ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA2_PIN_ID_MASK) >>\
+ ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA2_PIN_ID_SFT)
+
struct nqe_cn {
__le16 type;
#define NQ_CN_TYPE_MASK 0x3fUL
@@ -586,15 +596,17 @@ struct nqe_cn {
#define MAX_TPA_SEGS_P5 0x3f
#if (BNXT_PAGE_SHIFT == 16)
-#define MAX_RX_PAGES 1
+#define MAX_RX_PAGES_AGG_ENA 1
+#define MAX_RX_PAGES 4
#define MAX_RX_AGG_PAGES 4
#define MAX_TX_PAGES 1
-#define MAX_CP_PAGES 8
+#define MAX_CP_PAGES 16
#else
-#define MAX_RX_PAGES 8
+#define MAX_RX_PAGES_AGG_ENA 8
+#define MAX_RX_PAGES 32
#define MAX_RX_AGG_PAGES 32
#define MAX_TX_PAGES 8
-#define MAX_CP_PAGES 64
+#define MAX_CP_PAGES 128
#endif
#define RX_DESC_CNT (BNXT_PAGE_SIZE / sizeof(struct rx_bd))
@@ -612,6 +624,7 @@ struct nqe_cn {
#define HW_CMPD_RING_SIZE (sizeof(struct tx_cmp) * CP_DESC_CNT)
#define BNXT_MAX_RX_DESC_CNT (RX_DESC_CNT * MAX_RX_PAGES - 1)
+#define BNXT_MAX_RX_DESC_CNT_JUM_ENA (RX_DESC_CNT * MAX_RX_PAGES_AGG_ENA - 1)
#define BNXT_MAX_RX_JUM_DESC_CNT (RX_DESC_CNT * MAX_RX_AGG_PAGES - 1)
#define BNXT_MAX_TX_DESC_CNT (TX_DESC_CNT * MAX_TX_PAGES - 1)
@@ -656,37 +669,7 @@ struct nqe_cn {
#define RING_CMP(idx) ((idx) & bp->cp_ring_mask)
#define NEXT_CMP(idx) RING_CMP(ADV_RAW_CMP(idx, 1))
-#define BNXT_HWRM_MAX_REQ_LEN (bp->hwrm_max_req_len)
-#define BNXT_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input)
#define DFLT_HWRM_CMD_TIMEOUT 500
-#define HWRM_CMD_MAX_TIMEOUT 40000
-#define SHORT_HWRM_CMD_TIMEOUT 20
-#define HWRM_CMD_TIMEOUT (bp->hwrm_cmd_timeout)
-#define HWRM_RESET_TIMEOUT ((HWRM_CMD_TIMEOUT) * 4)
-#define HWRM_COREDUMP_TIMEOUT ((HWRM_CMD_TIMEOUT) * 12)
-#define BNXT_HWRM_REQ_MAX_SIZE 128
-#define BNXT_HWRM_REQS_PER_PAGE (BNXT_PAGE_SIZE / \
- BNXT_HWRM_REQ_MAX_SIZE)
-#define HWRM_SHORT_MIN_TIMEOUT 3
-#define HWRM_SHORT_MAX_TIMEOUT 10
-#define HWRM_SHORT_TIMEOUT_COUNTER 5
-
-#define HWRM_MIN_TIMEOUT 25
-#define HWRM_MAX_TIMEOUT 40
-
-#define HWRM_WAIT_MUST_ABORT(bp, req) \
- (le16_to_cpu((req)->req_type) != HWRM_VER_GET && \
- !bnxt_is_fw_healthy(bp))
-
-#define HWRM_TOTAL_TIMEOUT(n) (((n) <= HWRM_SHORT_TIMEOUT_COUNTER) ? \
- ((n) * HWRM_SHORT_MIN_TIMEOUT) : \
- (HWRM_SHORT_TIMEOUT_COUNTER * HWRM_SHORT_MIN_TIMEOUT + \
- ((n) - HWRM_SHORT_TIMEOUT_COUNTER) * HWRM_MIN_TIMEOUT))
-
-#define HWRM_VALID_BIT_DELAY_USEC 150
-
-#define BNXT_HWRM_CHNL_CHIMP 0
-#define BNXT_HWRM_CHNL_KONG 1
#define BNXT_RX_EVENT 1
#define BNXT_AGG_EVENT 2
@@ -786,6 +769,7 @@ struct bnxt_tx_ring_info {
u16 tx_prod;
u16 tx_cons;
u16 txq_index;
+ u8 kick_pending;
struct bnxt_db_info tx_db;
struct tx_bd *tx_desc_ring[MAX_TX_PAGES];
@@ -925,6 +909,8 @@ struct bnxt_rx_sw_stats {
u64 rx_l4_csum_errors;
u64 rx_resets;
u64 rx_buf_errors;
+ u64 rx_oom_discards;
+ u64 rx_netpoll_discards;
};
struct bnxt_cmn_sw_stats {
@@ -962,11 +948,11 @@ struct bnxt_cp_ring_info {
struct dim dim;
union {
- struct tx_cmp *cp_desc_ring[MAX_CP_PAGES];
- struct nqe_cn *nq_desc_ring[MAX_CP_PAGES];
+ struct tx_cmp **cp_desc_ring;
+ struct nqe_cn **nq_desc_ring;
};
- dma_addr_t cp_desc_mapping[MAX_CP_PAGES];
+ dma_addr_t *cp_desc_mapping;
struct bnxt_stats_mem stats;
u32 hw_stats_ctx_id;
@@ -1887,19 +1873,15 @@ struct bnxt {
#define BNXT_FW_CAP_VLAN_RX_STRIP 0x01000000
#define BNXT_FW_CAP_VLAN_TX_INSERT 0x02000000
#define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED 0x04000000
+ #define BNXT_FW_CAP_PTP_PPS 0x10000000
#define BNXT_FW_CAP_RING_MONITOR 0x40000000
#define BNXT_NEW_RM(bp) ((bp)->fw_cap & BNXT_FW_CAP_NEW_RM)
u32 hwrm_spec_code;
u16 hwrm_cmd_seq;
u16 hwrm_cmd_kong_seq;
- u16 hwrm_intr_seq_id;
- void *hwrm_short_cmd_req_addr;
- dma_addr_t hwrm_short_cmd_req_dma_addr;
- void *hwrm_cmd_resp_addr;
- dma_addr_t hwrm_cmd_resp_dma_addr;
- void *hwrm_cmd_kong_resp_addr;
- dma_addr_t hwrm_cmd_kong_resp_dma_addr;
+ struct dma_pool *hwrm_dma_pool;
+ struct hlist_head hwrm_pending_list;
struct rtnl_link_stats64 net_stats_prev;
struct bnxt_stats_mem port_stats;
@@ -1999,7 +1981,7 @@ struct bnxt {
struct mutex sriov_lock;
#endif
-#if BITS_PER_LONG == 32
+#ifndef writeq
/* ensure atomic 64-bit doorbell writes on 32-bit systems. */
spinlock_t db_lock;
#endif
@@ -2128,7 +2110,7 @@ static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr)
((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask);
}
-#if BITS_PER_LONG == 32
+#ifndef writeq
#define writeq(val64, db) \
do { \
spin_lock(&bp->db_lock); \
@@ -2170,63 +2152,6 @@ static inline void bnxt_db_write(struct bnxt *bp, struct bnxt_db_info *db,
}
}
-static inline bool bnxt_cfa_hwrm_message(u16 req_type)
-{
- switch (req_type) {
- case HWRM_CFA_ENCAP_RECORD_ALLOC:
- case HWRM_CFA_ENCAP_RECORD_FREE:
- case HWRM_CFA_DECAP_FILTER_ALLOC:
- case HWRM_CFA_DECAP_FILTER_FREE:
- case HWRM_CFA_EM_FLOW_ALLOC:
- case HWRM_CFA_EM_FLOW_FREE:
- case HWRM_CFA_EM_FLOW_CFG:
- case HWRM_CFA_FLOW_ALLOC:
- case HWRM_CFA_FLOW_FREE:
- case HWRM_CFA_FLOW_INFO:
- case HWRM_CFA_FLOW_FLUSH:
- case HWRM_CFA_FLOW_STATS:
- case HWRM_CFA_METER_PROFILE_ALLOC:
- case HWRM_CFA_METER_PROFILE_FREE:
- case HWRM_CFA_METER_PROFILE_CFG:
- case HWRM_CFA_METER_INSTANCE_ALLOC:
- case HWRM_CFA_METER_INSTANCE_FREE:
- return true;
- default:
- return false;
- }
-}
-
-static inline bool bnxt_kong_hwrm_message(struct bnxt *bp, struct input *req)
-{
- return (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL &&
- bnxt_cfa_hwrm_message(le16_to_cpu(req->req_type)));
-}
-
-static inline bool bnxt_hwrm_kong_chnl(struct bnxt *bp, struct input *req)
-{
- return (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL &&
- req->resp_addr == cpu_to_le64(bp->hwrm_cmd_kong_resp_dma_addr));
-}
-
-static inline void *bnxt_get_hwrm_resp_addr(struct bnxt *bp, void *req)
-{
- if (bnxt_hwrm_kong_chnl(bp, (struct input *)req))
- return bp->hwrm_cmd_kong_resp_addr;
- else
- return bp->hwrm_cmd_resp_addr;
-}
-
-static inline u16 bnxt_get_hwrm_seq_id(struct bnxt *bp, u16 dst)
-{
- u16 seq_id;
-
- if (dst == BNXT_HWRM_CHNL_CHIMP)
- seq_id = bp->hwrm_cmd_seq++;
- else
- seq_id = bp->hwrm_cmd_kong_seq++;
- return seq_id;
-}
-
extern const u16 bnxt_lhint_arr[];
int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
@@ -2236,11 +2161,6 @@ u32 bnxt_fw_health_readl(struct bnxt *bp, int reg_idx);
void bnxt_set_tpa_flags(struct bnxt *bp);
void bnxt_set_ring_params(struct bnxt *);
int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode);
-void bnxt_hwrm_cmd_hdr_init(struct bnxt *, void *, u16, u16, u16);
-int _hwrm_send_message(struct bnxt *, void *, u32, int);
-int _hwrm_send_message_silent(struct bnxt *bp, void *msg, u32 len, int timeout);
-int hwrm_send_message(struct bnxt *, void *, u32, int);
-int hwrm_send_message_silent(struct bnxt *, void *, u32, int);
int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap,
int bmap_size, bool async_only);
int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index 8a68df4d9e59..228a5db7e143 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -18,6 +18,7 @@
#include <rdma/ib_verbs.h>
#include "bnxt_hsi.h"
#include "bnxt.h"
+#include "bnxt_hwrm.h"
#include "bnxt_dcb.h"
#ifdef CONFIG_BNXT_DCB
@@ -38,38 +39,43 @@ static int bnxt_queue_to_tc(struct bnxt *bp, u8 queue_id)
static int bnxt_hwrm_queue_pri2cos_cfg(struct bnxt *bp, struct ieee_ets *ets)
{
- struct hwrm_queue_pri2cos_cfg_input req = {0};
+ struct hwrm_queue_pri2cos_cfg_input *req;
u8 *pri2cos;
- int i;
+ int rc, i;
+
+ rc = hwrm_req_init(bp, req, HWRM_QUEUE_PRI2COS_CFG);
+ if (rc)
+ return rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PRI2COS_CFG, -1, -1);
- req.flags = cpu_to_le32(QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR |
- QUEUE_PRI2COS_CFG_REQ_FLAGS_IVLAN);
+ req->flags = cpu_to_le32(QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR |
+ QUEUE_PRI2COS_CFG_REQ_FLAGS_IVLAN);
- pri2cos = &req.pri0_cos_queue_id;
+ pri2cos = &req->pri0_cos_queue_id;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
u8 qidx;
- req.enables |= cpu_to_le32(
+ req->enables |= cpu_to_le32(
QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI0_COS_QUEUE_ID << i);
qidx = bp->tc_to_qidx[ets->prio_tc[i]];
pri2cos[i] = bp->q_info[qidx].queue_id;
}
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ return hwrm_req_send(bp, req);
}
static int bnxt_hwrm_queue_pri2cos_qcfg(struct bnxt *bp, struct ieee_ets *ets)
{
- struct hwrm_queue_pri2cos_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_queue_pri2cos_qcfg_input req = {0};
- int rc = 0;
+ struct hwrm_queue_pri2cos_qcfg_output *resp;
+ struct hwrm_queue_pri2cos_qcfg_input *req;
+ int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PRI2COS_QCFG, -1, -1);
- req.flags = cpu_to_le32(QUEUE_PRI2COS_QCFG_REQ_FLAGS_IVLAN);
+ rc = hwrm_req_init(bp, req, HWRM_QUEUE_PRI2COS_QCFG);
+ if (rc)
+ return rc;
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->flags = cpu_to_le32(QUEUE_PRI2COS_QCFG_REQ_FLAGS_IVLAN);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc) {
u8 *pri2cos = &resp->pri0_cos_queue_id;
int i;
@@ -83,23 +89,26 @@ static int bnxt_hwrm_queue_pri2cos_qcfg(struct bnxt *bp, struct ieee_ets *ets)
ets->prio_tc[i] = tc;
}
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets,
u8 max_tc)
{
- struct hwrm_queue_cos2bw_cfg_input req = {0};
+ struct hwrm_queue_cos2bw_cfg_input *req;
struct bnxt_cos2bw_cfg cos2bw;
void *data;
- int i;
+ int rc, i;
+
+ rc = hwrm_req_init(bp, req, HWRM_QUEUE_COS2BW_CFG);
+ if (rc)
+ return rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_CFG, -1, -1);
for (i = 0; i < max_tc; i++) {
u8 qidx = bp->tc_to_qidx[i];
- req.enables |= cpu_to_le32(
+ req->enables |= cpu_to_le32(
QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID <<
qidx);
@@ -120,30 +129,32 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets,
cpu_to_le32((ets->tc_tx_bw[i] * 100) |
BW_VALUE_UNIT_PERCENT1_100);
}
- data = &req.unused_0 + qidx * (sizeof(cos2bw) - 4);
+ data = &req->unused_0 + qidx * (sizeof(cos2bw) - 4);
memcpy(data, &cos2bw.queue_id, sizeof(cos2bw) - 4);
if (qidx == 0) {
- req.queue_id0 = cos2bw.queue_id;
- req.unused_0 = 0;
+ req->queue_id0 = cos2bw.queue_id;
+ req->unused_0 = 0;
}
}
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ return hwrm_req_send(bp, req);
}
static int bnxt_hwrm_queue_cos2bw_qcfg(struct bnxt *bp, struct ieee_ets *ets)
{
- struct hwrm_queue_cos2bw_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_queue_cos2bw_qcfg_input req = {0};
+ struct hwrm_queue_cos2bw_qcfg_output *resp;
+ struct hwrm_queue_cos2bw_qcfg_input *req;
struct bnxt_cos2bw_cfg cos2bw;
void *data;
int rc, i;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_QCFG, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_QUEUE_COS2BW_QCFG);
+ if (rc)
+ return rc;
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (rc) {
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -167,7 +178,7 @@ static int bnxt_hwrm_queue_cos2bw_qcfg(struct bnxt *bp, struct ieee_ets *ets)
ets->tc_tx_bw[tc] = cos2bw.bw_weight;
}
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return 0;
}
@@ -229,11 +240,12 @@ static int bnxt_queue_remap(struct bnxt *bp, unsigned int lltc_mask)
static int bnxt_hwrm_queue_pfc_cfg(struct bnxt *bp, struct ieee_pfc *pfc)
{
- struct hwrm_queue_pfcenable_cfg_input req = {0};
+ struct hwrm_queue_pfcenable_cfg_input *req;
struct ieee_ets *my_ets = bp->ieee_ets;
unsigned int tc_mask = 0, pri_mask = 0;
u8 i, pri, lltc_count = 0;
bool need_q_remap = false;
+ int rc;
if (!my_ets)
return -EINVAL;
@@ -266,38 +278,43 @@ static int bnxt_hwrm_queue_pfc_cfg(struct bnxt *bp, struct ieee_pfc *pfc)
if (need_q_remap)
bnxt_queue_remap(bp, tc_mask);
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PFCENABLE_CFG, -1, -1);
- req.flags = cpu_to_le32(pri_mask);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_QUEUE_PFCENABLE_CFG);
+ if (rc)
+ return rc;
+
+ req->flags = cpu_to_le32(pri_mask);
+ return hwrm_req_send(bp, req);
}
static int bnxt_hwrm_queue_pfc_qcfg(struct bnxt *bp, struct ieee_pfc *pfc)
{
- struct hwrm_queue_pfcenable_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_queue_pfcenable_qcfg_input req = {0};
+ struct hwrm_queue_pfcenable_qcfg_output *resp;
+ struct hwrm_queue_pfcenable_qcfg_input *req;
u8 pri_mask;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PFCENABLE_QCFG, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_QUEUE_PFCENABLE_QCFG);
+ if (rc)
+ return rc;
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (rc) {
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
pri_mask = le32_to_cpu(resp->flags);
pfc->pfc_en = pri_mask;
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return 0;
}
static int bnxt_hwrm_set_dcbx_app(struct bnxt *bp, struct dcb_app *app,
bool add)
{
- struct hwrm_fw_set_structured_data_input set = {0};
- struct hwrm_fw_get_structured_data_input get = {0};
+ struct hwrm_fw_set_structured_data_input *set;
+ struct hwrm_fw_get_structured_data_input *get;
struct hwrm_struct_data_dcbx_app *fw_app;
struct hwrm_struct_hdr *data;
dma_addr_t mapping;
@@ -307,19 +324,26 @@ static int bnxt_hwrm_set_dcbx_app(struct bnxt *bp, struct dcb_app *app,
if (bp->hwrm_spec_code < 0x10601)
return 0;
+ rc = hwrm_req_init(bp, get, HWRM_FW_GET_STRUCTURED_DATA);
+ if (rc)
+ return rc;
+
+ hwrm_req_hold(bp, get);
+ hwrm_req_alloc_flags(bp, get, GFP_KERNEL | __GFP_ZERO);
+
n = IEEE_8021QAZ_MAX_TCS;
data_len = sizeof(*data) + sizeof(*fw_app) * n;
- data = dma_alloc_coherent(&bp->pdev->dev, data_len, &mapping,
- GFP_KERNEL);
- if (!data)
- return -ENOMEM;
+ data = hwrm_req_dma_slice(bp, get, data_len, &mapping);
+ if (!data) {
+ rc = -ENOMEM;
+ goto set_app_exit;
+ }
- bnxt_hwrm_cmd_hdr_init(bp, &get, HWRM_FW_GET_STRUCTURED_DATA, -1, -1);
- get.dest_data_addr = cpu_to_le64(mapping);
- get.structure_id = cpu_to_le16(STRUCT_HDR_STRUCT_ID_DCBX_APP);
- get.subtype = cpu_to_le16(HWRM_STRUCT_DATA_SUBTYPE_HOST_OPERATIONAL);
- get.count = 0;
- rc = hwrm_send_message(bp, &get, sizeof(get), HWRM_CMD_TIMEOUT);
+ get->dest_data_addr = cpu_to_le64(mapping);
+ get->structure_id = cpu_to_le16(STRUCT_HDR_STRUCT_ID_DCBX_APP);
+ get->subtype = cpu_to_le16(HWRM_STRUCT_DATA_SUBTYPE_HOST_OPERATIONAL);
+ get->count = 0;
+ rc = hwrm_req_send(bp, get);
if (rc)
goto set_app_exit;
@@ -365,44 +389,49 @@ static int bnxt_hwrm_set_dcbx_app(struct bnxt *bp, struct dcb_app *app,
data->len = cpu_to_le16(sizeof(*fw_app) * n);
data->subtype = cpu_to_le16(HWRM_STRUCT_DATA_SUBTYPE_HOST_OPERATIONAL);
- bnxt_hwrm_cmd_hdr_init(bp, &set, HWRM_FW_SET_STRUCTURED_DATA, -1, -1);
- set.src_data_addr = cpu_to_le64(mapping);
- set.data_len = cpu_to_le16(sizeof(*data) + sizeof(*fw_app) * n);
- set.hdr_cnt = 1;
- rc = hwrm_send_message(bp, &set, sizeof(set), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, set, HWRM_FW_SET_STRUCTURED_DATA);
+ if (rc)
+ goto set_app_exit;
+
+ set->src_data_addr = cpu_to_le64(mapping);
+ set->data_len = cpu_to_le16(sizeof(*data) + sizeof(*fw_app) * n);
+ set->hdr_cnt = 1;
+ rc = hwrm_req_send(bp, set);
set_app_exit:
- dma_free_coherent(&bp->pdev->dev, data_len, data, mapping);
+ hwrm_req_drop(bp, get); /* dropping get request and associated slice */
return rc;
}
static int bnxt_hwrm_queue_dscp_qcaps(struct bnxt *bp)
{
- struct hwrm_queue_dscp_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_queue_dscp_qcaps_input req = {0};
+ struct hwrm_queue_dscp_qcaps_output *resp;
+ struct hwrm_queue_dscp_qcaps_input *req;
int rc;
bp->max_dscp_value = 0;
if (bp->hwrm_spec_code < 0x10800 || BNXT_VF(bp))
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_DSCP_QCAPS, -1, -1);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_QUEUE_DSCP_QCAPS);
+ if (rc)
+ return rc;
+
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send_silent(bp, req);
if (!rc) {
bp->max_dscp_value = (1 << resp->num_dscp_bits) - 1;
if (bp->max_dscp_value < 0x3f)
bp->max_dscp_value = 0;
}
-
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
static int bnxt_hwrm_queue_dscp2pri_cfg(struct bnxt *bp, struct dcb_app *app,
bool add)
{
- struct hwrm_queue_dscp2pri_cfg_input req = {0};
+ struct hwrm_queue_dscp2pri_cfg_input *req;
struct bnxt_dscp2pri_entry *dscp2pri;
dma_addr_t mapping;
int rc;
@@ -410,23 +439,25 @@ static int bnxt_hwrm_queue_dscp2pri_cfg(struct bnxt *bp, struct dcb_app *app,
if (bp->hwrm_spec_code < 0x10800)
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_DSCP2PRI_CFG, -1, -1);
- dscp2pri = dma_alloc_coherent(&bp->pdev->dev, sizeof(*dscp2pri),
- &mapping, GFP_KERNEL);
- if (!dscp2pri)
+ rc = hwrm_req_init(bp, req, HWRM_QUEUE_DSCP2PRI_CFG);
+ if (rc)
+ return rc;
+
+ dscp2pri = hwrm_req_dma_slice(bp, req, sizeof(*dscp2pri), &mapping);
+ if (!dscp2pri) {
+ hwrm_req_drop(bp, req);
return -ENOMEM;
+ }
- req.src_data_addr = cpu_to_le64(mapping);
+ req->src_data_addr = cpu_to_le64(mapping);
dscp2pri->dscp = app->protocol;
if (add)
dscp2pri->mask = 0x3f;
else
dscp2pri->mask = 0;
dscp2pri->pri = app->priority;
- req.entry_cnt = cpu_to_le16(1);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
- dma_free_coherent(&bp->pdev->dev, sizeof(*dscp2pri), dscp2pri,
- mapping);
+ req->entry_cnt = cpu_to_le16(1);
+ rc = hwrm_req_send(bp, req);
return rc;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 64381be935a8..1423cc617d93 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -12,6 +12,7 @@
#include <net/devlink.h>
#include "bnxt_hsi.h"
#include "bnxt.h"
+#include "bnxt_hwrm.h"
#include "bnxt_vfr.h"
#include "bnxt_devlink.h"
#include "bnxt_ethtool.h"
@@ -354,28 +355,34 @@ static void bnxt_copy_from_nvm_data(union devlink_param_value *dst,
static int bnxt_hwrm_get_nvm_cfg_ver(struct bnxt *bp,
union devlink_param_value *nvm_cfg_ver)
{
- struct hwrm_nvm_get_variable_input req = {0};
+ struct hwrm_nvm_get_variable_input *req;
union bnxt_nvm_data *data;
dma_addr_t data_dma_addr;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_VARIABLE, -1, -1);
- data = dma_alloc_coherent(&bp->pdev->dev, sizeof(*data),
- &data_dma_addr, GFP_KERNEL);
- if (!data)
- return -ENOMEM;
+ rc = hwrm_req_init(bp, req, HWRM_NVM_GET_VARIABLE);
+ if (rc)
+ return rc;
+
+ data = hwrm_req_dma_slice(bp, req, sizeof(*data), &data_dma_addr);
+ if (!data) {
+ rc = -ENOMEM;
+ goto exit;
+ }
- req.dest_data_addr = cpu_to_le64(data_dma_addr);
- req.data_len = cpu_to_le16(BNXT_NVM_CFG_VER_BITS);
- req.option_num = cpu_to_le16(NVM_OFF_NVM_CFG_VER);
+ hwrm_req_hold(bp, req);
+ req->dest_data_addr = cpu_to_le64(data_dma_addr);
+ req->data_len = cpu_to_le16(BNXT_NVM_CFG_VER_BITS);
+ req->option_num = cpu_to_le16(NVM_OFF_NVM_CFG_VER);
- rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send_silent(bp, req);
if (!rc)
bnxt_copy_from_nvm_data(nvm_cfg_ver, data,
BNXT_NVM_CFG_VER_BITS,
BNXT_NVM_CFG_VER_BYTES);
- dma_free_coherent(&bp->pdev->dev, sizeof(*data), data, data_dma_addr);
+exit:
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -562,17 +569,20 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
}
static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
- int msg_len, union devlink_param_value *val)
+ union devlink_param_value *val)
{
struct hwrm_nvm_get_variable_input *req = msg;
struct bnxt_dl_nvm_param nvm_param;
+ struct hwrm_err_output *resp;
union bnxt_nvm_data *data;
dma_addr_t data_dma_addr;
int idx = 0, rc, i;
/* Get/Set NVM CFG parameter is supported only on PFs */
- if (BNXT_VF(bp))
+ if (BNXT_VF(bp)) {
+ hwrm_req_drop(bp, req);
return -EPERM;
+ }
for (i = 0; i < ARRAY_SIZE(nvm_params); i++) {
if (nvm_params[i].id == param_id) {
@@ -581,18 +591,22 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
}
}
- if (i == ARRAY_SIZE(nvm_params))
+ if (i == ARRAY_SIZE(nvm_params)) {
+ hwrm_req_drop(bp, req);
return -EOPNOTSUPP;
+ }
if (nvm_param.dir_type == BNXT_NVM_PORT_CFG)
idx = bp->pf.port_id;
else if (nvm_param.dir_type == BNXT_NVM_FUNC_CFG)
idx = bp->pf.fw_fid - BNXT_FIRST_PF_FID;
- data = dma_alloc_coherent(&bp->pdev->dev, sizeof(*data),
- &data_dma_addr, GFP_KERNEL);
- if (!data)
+ data = hwrm_req_dma_slice(bp, req, sizeof(*data), &data_dma_addr);
+
+ if (!data) {
+ hwrm_req_drop(bp, req);
return -ENOMEM;
+ }
req->dest_data_addr = cpu_to_le64(data_dma_addr);
req->data_len = cpu_to_le16(nvm_param.nvm_num_bits);
@@ -601,26 +615,24 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
if (idx)
req->dimensions = cpu_to_le16(1);
+ resp = hwrm_req_hold(bp, req);
if (req->req_type == cpu_to_le16(HWRM_NVM_SET_VARIABLE)) {
bnxt_copy_to_nvm_data(data, val, nvm_param.nvm_num_bits,
nvm_param.dl_num_bytes);
- rc = hwrm_send_message(bp, msg, msg_len, HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, msg);
} else {
- rc = hwrm_send_message_silent(bp, msg, msg_len,
- HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send_silent(bp, msg);
if (!rc) {
bnxt_copy_from_nvm_data(val, data,
nvm_param.nvm_num_bits,
nvm_param.dl_num_bytes);
} else {
- struct hwrm_err_output *resp = bp->hwrm_cmd_resp_addr;
-
if (resp->cmd_err ==
NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST)
rc = -EOPNOTSUPP;
}
}
- dma_free_coherent(&bp->pdev->dev, sizeof(*data), data, data_dma_addr);
+ hwrm_req_drop(bp, req);
if (rc == -EACCES)
netdev_err(bp->dev, "PF does not have admin privileges to modify NVM config\n");
return rc;
@@ -629,15 +641,17 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
static int bnxt_dl_nvm_param_get(struct devlink *dl, u32 id,
struct devlink_param_gset_ctx *ctx)
{
- struct hwrm_nvm_get_variable_input req = {0};
struct bnxt *bp = bnxt_get_bp_from_dl(dl);
+ struct hwrm_nvm_get_variable_input *req;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_VARIABLE, -1, -1);
- rc = bnxt_hwrm_nvm_req(bp, id, &req, sizeof(req), &ctx->val);
- if (!rc)
- if (id == BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK)
- ctx->val.vbool = !ctx->val.vbool;
+ rc = hwrm_req_init(bp, req, HWRM_NVM_GET_VARIABLE);
+ if (rc)
+ return rc;
+
+ rc = bnxt_hwrm_nvm_req(bp, id, req, &ctx->val);
+ if (!rc && id == BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK)
+ ctx->val.vbool = !ctx->val.vbool;
return rc;
}
@@ -645,15 +659,18 @@ static int bnxt_dl_nvm_param_get(struct devlink *dl, u32 id,
static int bnxt_dl_nvm_param_set(struct devlink *dl, u32 id,
struct devlink_param_gset_ctx *ctx)
{
- struct hwrm_nvm_set_variable_input req = {0};
struct bnxt *bp = bnxt_get_bp_from_dl(dl);
+ struct hwrm_nvm_set_variable_input *req;
+ int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_SET_VARIABLE, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_NVM_SET_VARIABLE);
+ if (rc)
+ return rc;
if (id == BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK)
ctx->val.vbool = !ctx->val.vbool;
- return bnxt_hwrm_nvm_req(bp, id, &req, sizeof(req), &ctx->val);
+ return bnxt_hwrm_nvm_req(bp, id, req, &ctx->val);
}
static int bnxt_dl_msix_validate(struct devlink *dl, u32 id,
@@ -743,14 +760,17 @@ static void bnxt_dl_params_unregister(struct bnxt *bp)
int bnxt_dl_register(struct bnxt *bp)
{
+ const struct devlink_ops *devlink_ops;
struct devlink_port_attrs attrs = {};
struct devlink *dl;
int rc;
if (BNXT_PF(bp))
- dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
+ devlink_ops = &bnxt_dl_ops;
else
- dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl));
+ devlink_ops = &bnxt_vf_dl_ops;
+
+ dl = devlink_alloc(devlink_ops, sizeof(struct bnxt_dl), &bp->pdev->dev);
if (!dl) {
netdev_warn(bp->dev, "devlink_alloc failed\n");
return -ENOMEM;
@@ -763,7 +783,7 @@ int bnxt_dl_register(struct bnxt *bp)
bp->hwrm_spec_code > 0x10803)
bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
- rc = devlink_register(dl, &bp->pdev->dev);
+ rc = devlink_register(dl);
if (rc) {
netdev_warn(bp->dev, "devlink_register failed. rc=%d\n", rc);
goto err_dl_free;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 786ca51e669b..b056e3c29bbd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -24,6 +24,7 @@
#include <linux/timecounter.h>
#include "bnxt_hsi.h"
#include "bnxt.h"
+#include "bnxt_hwrm.h"
#include "bnxt_xdp.h"
#include "bnxt_ptp.h"
#include "bnxt_ethtool.h"
@@ -49,7 +50,9 @@ static void bnxt_set_msglevel(struct net_device *dev, u32 value)
}
static int bnxt_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct bnxt *bp = netdev_priv(dev);
struct bnxt_coal *hw_coal;
@@ -79,7 +82,9 @@ static int bnxt_get_coalesce(struct net_device *dev,
}
static int bnxt_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct bnxt *bp = netdev_priv(dev);
bool update_stats = false;
@@ -303,6 +308,7 @@ static const char * const bnxt_cmn_sw_stats_str[] = {
enum {
RX_TOTAL_DISCARDS,
TX_TOTAL_DISCARDS,
+ RX_NETPOLL_DISCARDS,
};
static struct {
@@ -311,6 +317,7 @@ static struct {
} bnxt_sw_func_stats[] = {
{0, "rx_total_discard_pkts"},
{0, "tx_total_discard_pkts"},
+ {0, "rx_total_netpoll_discards"},
};
#define NUM_RING_RX_SW_STATS ARRAY_SIZE(bnxt_rx_sw_stats_str)
@@ -599,6 +606,8 @@ skip_tpa_ring_stats:
BNXT_GET_RING_STATS64(sw_stats, rx_discard_pkts);
bnxt_sw_func_stats[TX_TOTAL_DISCARDS].counter +=
BNXT_GET_RING_STATS64(sw_stats, tx_discard_pkts);
+ bnxt_sw_func_stats[RX_NETPOLL_DISCARDS].counter +=
+ cpr->sw_stats.rx.rx_netpoll_discards;
}
for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++, j++)
@@ -768,8 +777,13 @@ static void bnxt_get_ringparam(struct net_device *dev,
{
struct bnxt *bp = netdev_priv(dev);
- ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT;
- ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT;
+ if (bp->flags & BNXT_FLAG_AGG_RINGS) {
+ ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT_JUM_ENA;
+ ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT;
+ } else {
+ ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT;
+ ering->rx_jumbo_max_pending = 0;
+ }
ering->tx_max_pending = BNXT_MAX_TX_DESC_CNT;
ering->rx_pending = bp->rx_ring_size;
@@ -1352,7 +1366,7 @@ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs,
void *_p)
{
struct pcie_ctx_hw_stats *hw_pcie_stats;
- struct hwrm_pcie_qstats_input req = {0};
+ struct hwrm_pcie_qstats_input *req;
struct bnxt *bp = netdev_priv(dev);
dma_addr_t hw_pcie_stats_addr;
int rc;
@@ -1363,18 +1377,21 @@ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs,
if (!(bp->fw_cap & BNXT_FW_CAP_PCIE_STATS_SUPPORTED))
return;
- hw_pcie_stats = dma_alloc_coherent(&bp->pdev->dev,
- sizeof(*hw_pcie_stats),
- &hw_pcie_stats_addr, GFP_KERNEL);
- if (!hw_pcie_stats)
+ if (hwrm_req_init(bp, req, HWRM_PCIE_QSTATS))
return;
+ hw_pcie_stats = hwrm_req_dma_slice(bp, req, sizeof(*hw_pcie_stats),
+ &hw_pcie_stats_addr);
+ if (!hw_pcie_stats) {
+ hwrm_req_drop(bp, req);
+ return;
+ }
+
regs->version = 1;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PCIE_QSTATS, -1, -1);
- req.pcie_stat_size = cpu_to_le16(sizeof(*hw_pcie_stats));
- req.pcie_stat_host_addr = cpu_to_le64(hw_pcie_stats_addr);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ hwrm_req_hold(bp, req); /* hold on to slice */
+ req->pcie_stat_size = cpu_to_le16(sizeof(*hw_pcie_stats));
+ req->pcie_stat_host_addr = cpu_to_le64(hw_pcie_stats_addr);
+ rc = hwrm_req_send(bp, req);
if (!rc) {
__le64 *src = (__le64 *)hw_pcie_stats;
u64 *dst = (u64 *)(_p + BNXT_PXP_REG_LEN);
@@ -1383,9 +1400,7 @@ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs,
for (i = 0; i < sizeof(*hw_pcie_stats) / sizeof(__le64); i++)
dst[i] = le64_to_cpu(src[i]);
}
- mutex_unlock(&bp->hwrm_cmd_lock);
- dma_free_coherent(&bp->pdev->dev, sizeof(*hw_pcie_stats), hw_pcie_stats,
- hw_pcie_stats_addr);
+ hwrm_req_drop(bp, req);
}
static void bnxt_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
@@ -1965,7 +1980,7 @@ static u32 bnxt_ethtool_forced_fec_to_fw(struct bnxt_link_info *link_info,
static int bnxt_set_fecparam(struct net_device *dev,
struct ethtool_fecparam *fecparam)
{
- struct hwrm_port_phy_cfg_input req = {0};
+ struct hwrm_port_phy_cfg_input *req;
struct bnxt *bp = netdev_priv(dev);
struct bnxt_link_info *link_info;
u32 new_cfg, fec = fecparam->fec;
@@ -1997,9 +2012,11 @@ static int bnxt_set_fecparam(struct net_device *dev,
}
apply_fec:
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1);
- req.flags = cpu_to_le32(new_cfg | PORT_PHY_CFG_REQ_FLAGS_RESET_PHY);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_CFG);
+ if (rc)
+ return rc;
+ req->flags = cpu_to_le32(new_cfg | PORT_PHY_CFG_REQ_FLAGS_RESET_PHY);
+ rc = hwrm_req_send(bp, req);
/* update current settings */
if (!rc) {
mutex_lock(&bp->link_lock);
@@ -2093,19 +2110,22 @@ static u32 bnxt_get_link(struct net_device *dev)
int bnxt_hwrm_nvm_get_dev_info(struct bnxt *bp,
struct hwrm_nvm_get_dev_info_output *nvm_dev_info)
{
- struct hwrm_nvm_get_dev_info_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_nvm_get_dev_info_input req = {0};
+ struct hwrm_nvm_get_dev_info_output *resp;
+ struct hwrm_nvm_get_dev_info_input *req;
int rc;
if (BNXT_VF(bp))
return -EOPNOTSUPP;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_DEV_INFO, -1, -1);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_NVM_GET_DEV_INFO);
+ if (rc)
+ return rc;
+
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc)
memcpy(nvm_dev_info, resp, sizeof(*resp));
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -2118,77 +2138,67 @@ static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal,
u16 ext, u16 *index, u32 *item_length,
u32 *data_length);
-static int __bnxt_flash_nvram(struct net_device *dev, u16 dir_type,
- u16 dir_ordinal, u16 dir_ext, u16 dir_attr,
- u32 dir_item_len, const u8 *data,
- size_t data_len)
+static int bnxt_flash_nvram(struct net_device *dev, u16 dir_type,
+ u16 dir_ordinal, u16 dir_ext, u16 dir_attr,
+ u32 dir_item_len, const u8 *data,
+ size_t data_len)
{
struct bnxt *bp = netdev_priv(dev);
+ struct hwrm_nvm_write_input *req;
int rc;
- struct hwrm_nvm_write_input req = {0};
- dma_addr_t dma_handle;
- u8 *kmem = NULL;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_WRITE, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_NVM_WRITE);
+ if (rc)
+ return rc;
- req.dir_type = cpu_to_le16(dir_type);
- req.dir_ordinal = cpu_to_le16(dir_ordinal);
- req.dir_ext = cpu_to_le16(dir_ext);
- req.dir_attr = cpu_to_le16(dir_attr);
- req.dir_item_length = cpu_to_le32(dir_item_len);
if (data_len && data) {
- req.dir_data_length = cpu_to_le32(data_len);
+ dma_addr_t dma_handle;
+ u8 *kmem;
- kmem = dma_alloc_coherent(&bp->pdev->dev, data_len, &dma_handle,
- GFP_KERNEL);
- if (!kmem)
+ kmem = hwrm_req_dma_slice(bp, req, data_len, &dma_handle);
+ if (!kmem) {
+ hwrm_req_drop(bp, req);
return -ENOMEM;
+ }
+
+ req->dir_data_length = cpu_to_le32(data_len);
memcpy(kmem, data, data_len);
- req.host_src_addr = cpu_to_le64(dma_handle);
+ req->host_src_addr = cpu_to_le64(dma_handle);
}
- rc = _hwrm_send_message(bp, &req, sizeof(req), FLASH_NVRAM_TIMEOUT);
- if (kmem)
- dma_free_coherent(&bp->pdev->dev, data_len, kmem, dma_handle);
+ hwrm_req_timeout(bp, req, FLASH_NVRAM_TIMEOUT);
+ req->dir_type = cpu_to_le16(dir_type);
+ req->dir_ordinal = cpu_to_le16(dir_ordinal);
+ req->dir_ext = cpu_to_le16(dir_ext);
+ req->dir_attr = cpu_to_le16(dir_attr);
+ req->dir_item_length = cpu_to_le32(dir_item_len);
+ rc = hwrm_req_send(bp, req);
if (rc == -EACCES)
bnxt_print_admin_err(bp);
return rc;
}
-static int bnxt_flash_nvram(struct net_device *dev, u16 dir_type,
- u16 dir_ordinal, u16 dir_ext, u16 dir_attr,
- const u8 *data, size_t data_len)
-{
- struct bnxt *bp = netdev_priv(dev);
- int rc;
-
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = __bnxt_flash_nvram(dev, dir_type, dir_ordinal, dir_ext, dir_attr,
- 0, data, data_len);
- mutex_unlock(&bp->hwrm_cmd_lock);
- return rc;
-}
-
static int bnxt_hwrm_firmware_reset(struct net_device *dev, u8 proc_type,
u8 self_reset, u8 flags)
{
- struct hwrm_fw_reset_input req = {0};
struct bnxt *bp = netdev_priv(dev);
+ struct hwrm_fw_reset_input *req;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_RESET, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_FW_RESET);
+ if (rc)
+ return rc;
- req.embedded_proc_type = proc_type;
- req.selfrst_status = self_reset;
- req.flags = flags;
+ req->embedded_proc_type = proc_type;
+ req->selfrst_status = self_reset;
+ req->flags = flags;
if (proc_type == FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP) {
- rc = hwrm_send_message_silent(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send_silent(bp, req);
} else {
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, req);
if (rc == -EACCES)
bnxt_print_admin_err(bp);
}
@@ -2326,7 +2336,7 @@ static int bnxt_flash_firmware(struct net_device *dev,
return -EINVAL;
}
rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST,
- 0, 0, fw_data, fw_size);
+ 0, 0, 0, fw_data, fw_size);
if (rc == 0) /* Firmware update successful */
rc = bnxt_firmware_reset(dev, dir_type);
@@ -2379,7 +2389,7 @@ static int bnxt_flash_microcode(struct net_device *dev,
return -EINVAL;
}
rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST,
- 0, 0, fw_data, fw_size);
+ 0, 0, 0, fw_data, fw_size);
return rc;
}
@@ -2445,7 +2455,7 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev,
rc = bnxt_flash_microcode(dev, dir_type, fw->data, fw->size);
else
rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST,
- 0, 0, fw->data, fw->size);
+ 0, 0, 0, fw->data, fw->size);
release_firmware(fw);
return rc;
}
@@ -2457,21 +2467,23 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev,
int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware *fw,
u32 install_type)
{
- struct hwrm_nvm_install_update_input install = {0};
- struct hwrm_nvm_install_update_output resp = {0};
- struct hwrm_nvm_modify_input modify = {0};
+ struct hwrm_nvm_install_update_input *install;
+ struct hwrm_nvm_install_update_output *resp;
+ struct hwrm_nvm_modify_input *modify;
struct bnxt *bp = netdev_priv(dev);
bool defrag_attempted = false;
dma_addr_t dma_handle;
u8 *kmem = NULL;
u32 modify_len;
u32 item_len;
- int rc = 0;
u16 index;
+ int rc;
bnxt_hwrm_fw_set_time(bp);
- bnxt_hwrm_cmd_hdr_init(bp, &modify, HWRM_NVM_MODIFY, -1, -1);
+ rc = hwrm_req_init(bp, modify, HWRM_NVM_MODIFY);
+ if (rc)
+ return rc;
/* Try allocating a large DMA buffer first. Older fw will
* cause excessive NVRAM erases when using small blocks.
@@ -2479,22 +2491,33 @@ int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware
modify_len = roundup_pow_of_two(fw->size);
modify_len = min_t(u32, modify_len, BNXT_PKG_DMA_SIZE);
while (1) {
- kmem = dma_alloc_coherent(&bp->pdev->dev, modify_len,
- &dma_handle, GFP_KERNEL);
+ kmem = hwrm_req_dma_slice(bp, modify, modify_len, &dma_handle);
if (!kmem && modify_len > PAGE_SIZE)
modify_len /= 2;
else
break;
}
- if (!kmem)
+ if (!kmem) {
+ hwrm_req_drop(bp, modify);
return -ENOMEM;
+ }
- modify.host_src_addr = cpu_to_le64(dma_handle);
+ rc = hwrm_req_init(bp, install, HWRM_NVM_INSTALL_UPDATE);
+ if (rc) {
+ hwrm_req_drop(bp, modify);
+ return rc;
+ }
- bnxt_hwrm_cmd_hdr_init(bp, &install, HWRM_NVM_INSTALL_UPDATE, -1, -1);
+ hwrm_req_timeout(bp, modify, FLASH_PACKAGE_TIMEOUT);
+ hwrm_req_timeout(bp, install, INSTALL_PACKAGE_TIMEOUT);
+
+ hwrm_req_hold(bp, modify);
+ modify->host_src_addr = cpu_to_le64(dma_handle);
+
+ resp = hwrm_req_hold(bp, install);
if ((install_type & 0xffff) == 0)
install_type >>= 16;
- install.install_type = cpu_to_le32(install_type);
+ install->install_type = cpu_to_le32(install_type);
do {
u32 copied = 0, len = modify_len;
@@ -2514,76 +2537,69 @@ int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware
break;
}
- modify.dir_idx = cpu_to_le16(index);
+ modify->dir_idx = cpu_to_le16(index);
if (fw->size > modify_len)
- modify.flags = BNXT_NVM_MORE_FLAG;
+ modify->flags = BNXT_NVM_MORE_FLAG;
while (copied < fw->size) {
u32 balance = fw->size - copied;
if (balance <= modify_len) {
len = balance;
if (copied)
- modify.flags |= BNXT_NVM_LAST_FLAG;
+ modify->flags |= BNXT_NVM_LAST_FLAG;
}
memcpy(kmem, fw->data + copied, len);
- modify.len = cpu_to_le32(len);
- modify.offset = cpu_to_le32(copied);
- rc = hwrm_send_message(bp, &modify, sizeof(modify),
- FLASH_PACKAGE_TIMEOUT);
+ modify->len = cpu_to_le32(len);
+ modify->offset = cpu_to_le32(copied);
+ rc = hwrm_req_send(bp, modify);
if (rc)
goto pkg_abort;
copied += len;
}
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message_silent(bp, &install, sizeof(install),
- INSTALL_PACKAGE_TIMEOUT);
- memcpy(&resp, bp->hwrm_cmd_resp_addr, sizeof(resp));
+
+ rc = hwrm_req_send_silent(bp, install);
if (defrag_attempted) {
/* We have tried to defragment already in the previous
* iteration. Return with the result for INSTALL_UPDATE
*/
- mutex_unlock(&bp->hwrm_cmd_lock);
break;
}
- if (rc && ((struct hwrm_err_output *)&resp)->cmd_err ==
+ if (rc && ((struct hwrm_err_output *)resp)->cmd_err ==
NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) {
- install.flags =
+ install->flags =
cpu_to_le16(NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG);
- rc = _hwrm_send_message_silent(bp, &install,
- sizeof(install),
- INSTALL_PACKAGE_TIMEOUT);
- memcpy(&resp, bp->hwrm_cmd_resp_addr, sizeof(resp));
+ rc = hwrm_req_send_silent(bp, install);
- if (rc && ((struct hwrm_err_output *)&resp)->cmd_err ==
+ if (rc && ((struct hwrm_err_output *)resp)->cmd_err ==
NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_SPACE) {
/* FW has cleared NVM area, driver will create
* UPDATE directory and try the flash again
*/
defrag_attempted = true;
- install.flags = 0;
- rc = __bnxt_flash_nvram(bp->dev,
- BNX_DIR_TYPE_UPDATE,
- BNX_DIR_ORDINAL_FIRST,
- 0, 0, item_len, NULL,
- 0);
+ install->flags = 0;
+ rc = bnxt_flash_nvram(bp->dev,
+ BNX_DIR_TYPE_UPDATE,
+ BNX_DIR_ORDINAL_FIRST,
+ 0, 0, item_len, NULL, 0);
} else if (rc) {
netdev_err(dev, "HWRM_NVM_INSTALL_UPDATE failure rc :%x\n", rc);
}
} else if (rc) {
netdev_err(dev, "HWRM_NVM_INSTALL_UPDATE failure rc :%x\n", rc);
}
- mutex_unlock(&bp->hwrm_cmd_lock);
} while (defrag_attempted && !rc);
pkg_abort:
- dma_free_coherent(&bp->pdev->dev, modify_len, kmem, dma_handle);
- if (resp.result) {
+ hwrm_req_drop(bp, modify);
+ hwrm_req_drop(bp, install);
+
+ if (resp->result) {
netdev_err(dev, "PKG install error = %d, problem_item = %d\n",
- (s8)resp.result, (int)resp.problem_item);
+ (s8)resp->result, (int)resp->problem_item);
rc = -ENOPKG;
}
if (rc == -EACCES)
@@ -2629,20 +2645,22 @@ static int bnxt_flash_device(struct net_device *dev,
static int nvm_get_dir_info(struct net_device *dev, u32 *entries, u32 *length)
{
+ struct hwrm_nvm_get_dir_info_output *output;
+ struct hwrm_nvm_get_dir_info_input *req;
struct bnxt *bp = netdev_priv(dev);
int rc;
- struct hwrm_nvm_get_dir_info_input req = {0};
- struct hwrm_nvm_get_dir_info_output *output = bp->hwrm_cmd_resp_addr;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_DIR_INFO, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_NVM_GET_DIR_INFO);
+ if (rc)
+ return rc;
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ output = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc) {
*entries = le32_to_cpu(output->entries);
*length = le32_to_cpu(output->entry_length);
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -2668,7 +2686,7 @@ static int bnxt_get_nvram_directory(struct net_device *dev, u32 len, u8 *data)
u8 *buf;
size_t buflen;
dma_addr_t dma_handle;
- struct hwrm_nvm_get_dir_entries_input req = {0};
+ struct hwrm_nvm_get_dir_entries_input *req;
rc = nvm_get_dir_info(dev, &dir_entries, &entry_length);
if (rc != 0)
@@ -2686,20 +2704,23 @@ static int bnxt_get_nvram_directory(struct net_device *dev, u32 len, u8 *data)
len -= 2;
memset(data, 0xff, len);
+ rc = hwrm_req_init(bp, req, HWRM_NVM_GET_DIR_ENTRIES);
+ if (rc)
+ return rc;
+
buflen = dir_entries * entry_length;
- buf = dma_alloc_coherent(&bp->pdev->dev, buflen, &dma_handle,
- GFP_KERNEL);
+ buf = hwrm_req_dma_slice(bp, req, buflen, &dma_handle);
if (!buf) {
- netdev_err(dev, "dma_alloc_coherent failure, length = %u\n",
- (unsigned)buflen);
+ hwrm_req_drop(bp, req);
return -ENOMEM;
}
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_DIR_ENTRIES, -1, -1);
- req.host_dest_addr = cpu_to_le64(dma_handle);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->host_dest_addr = cpu_to_le64(dma_handle);
+
+ hwrm_req_hold(bp, req); /* hold the slice */
+ rc = hwrm_req_send(bp, req);
if (rc == 0)
memcpy(data, buf, len > buflen ? buflen : len);
- dma_free_coherent(&bp->pdev->dev, buflen, buf, dma_handle);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -2710,28 +2731,31 @@ static int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset,
int rc;
u8 *buf;
dma_addr_t dma_handle;
- struct hwrm_nvm_read_input req = {0};
+ struct hwrm_nvm_read_input *req;
if (!length)
return -EINVAL;
- buf = dma_alloc_coherent(&bp->pdev->dev, length, &dma_handle,
- GFP_KERNEL);
+ rc = hwrm_req_init(bp, req, HWRM_NVM_READ);
+ if (rc)
+ return rc;
+
+ buf = hwrm_req_dma_slice(bp, req, length, &dma_handle);
if (!buf) {
- netdev_err(dev, "dma_alloc_coherent failure, length = %u\n",
- (unsigned)length);
+ hwrm_req_drop(bp, req);
return -ENOMEM;
}
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_READ, -1, -1);
- req.host_dest_addr = cpu_to_le64(dma_handle);
- req.dir_idx = cpu_to_le16(index);
- req.offset = cpu_to_le32(offset);
- req.len = cpu_to_le32(length);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->host_dest_addr = cpu_to_le64(dma_handle);
+ req->dir_idx = cpu_to_le16(index);
+ req->offset = cpu_to_le32(offset);
+ req->len = cpu_to_le32(length);
+
+ hwrm_req_hold(bp, req); /* hold the slice */
+ rc = hwrm_req_send(bp, req);
if (rc == 0)
memcpy(data, buf, length);
- dma_free_coherent(&bp->pdev->dev, length, buf, dma_handle);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -2739,20 +2763,23 @@ static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal,
u16 ext, u16 *index, u32 *item_length,
u32 *data_length)
{
+ struct hwrm_nvm_find_dir_entry_output *output;
+ struct hwrm_nvm_find_dir_entry_input *req;
struct bnxt *bp = netdev_priv(dev);
int rc;
- struct hwrm_nvm_find_dir_entry_input req = {0};
- struct hwrm_nvm_find_dir_entry_output *output = bp->hwrm_cmd_resp_addr;
-
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_FIND_DIR_ENTRY, -1, -1);
- req.enables = 0;
- req.dir_idx = 0;
- req.dir_type = cpu_to_le16(type);
- req.dir_ordinal = cpu_to_le16(ordinal);
- req.dir_ext = cpu_to_le16(ext);
- req.opt_ordinal = NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ;
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+
+ rc = hwrm_req_init(bp, req, HWRM_NVM_FIND_DIR_ENTRY);
+ if (rc)
+ return rc;
+
+ req->enables = 0;
+ req->dir_idx = 0;
+ req->dir_type = cpu_to_le16(type);
+ req->dir_ordinal = cpu_to_le16(ordinal);
+ req->dir_ext = cpu_to_le16(ext);
+ req->opt_ordinal = NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ;
+ output = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send_silent(bp, req);
if (rc == 0) {
if (index)
*index = le16_to_cpu(output->dir_idx);
@@ -2761,7 +2788,7 @@ static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal,
if (data_length)
*data_length = le32_to_cpu(output->dir_data_length);
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -2856,12 +2883,16 @@ static int bnxt_get_eeprom(struct net_device *dev,
static int bnxt_erase_nvram_directory(struct net_device *dev, u8 index)
{
+ struct hwrm_nvm_erase_dir_entry_input *req;
struct bnxt *bp = netdev_priv(dev);
- struct hwrm_nvm_erase_dir_entry_input req = {0};
+ int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_ERASE_DIR_ENTRY, -1, -1);
- req.dir_idx = cpu_to_le16(index);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_NVM_ERASE_DIR_ENTRY);
+ if (rc)
+ return rc;
+
+ req->dir_idx = cpu_to_le16(index);
+ return hwrm_req_send(bp, req);
}
static int bnxt_set_eeprom(struct net_device *dev,
@@ -2901,7 +2932,7 @@ static int bnxt_set_eeprom(struct net_device *dev,
ordinal = eeprom->offset >> 16;
attr = eeprom->offset & 0xffff;
- return bnxt_flash_nvram(dev, type, ordinal, ext, attr, data,
+ return bnxt_flash_nvram(dev, type, ordinal, ext, attr, 0, data,
eeprom->len);
}
@@ -2989,31 +3020,33 @@ static int bnxt_read_sfp_module_eeprom_info(struct bnxt *bp, u16 i2c_addr,
u16 page_number, u16 start_addr,
u16 data_length, u8 *buf)
{
- struct hwrm_port_phy_i2c_read_input req = {0};
- struct hwrm_port_phy_i2c_read_output *output = bp->hwrm_cmd_resp_addr;
+ struct hwrm_port_phy_i2c_read_output *output;
+ struct hwrm_port_phy_i2c_read_input *req;
int rc, byte_offset = 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_I2C_READ, -1, -1);
- req.i2c_slave_addr = i2c_addr;
- req.page_number = cpu_to_le16(page_number);
- req.port_id = cpu_to_le16(bp->pf.port_id);
+ rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_I2C_READ);
+ if (rc)
+ return rc;
+
+ output = hwrm_req_hold(bp, req);
+ req->i2c_slave_addr = i2c_addr;
+ req->page_number = cpu_to_le16(page_number);
+ req->port_id = cpu_to_le16(bp->pf.port_id);
do {
u16 xfer_size;
xfer_size = min_t(u16, data_length, BNXT_MAX_PHY_I2C_RESP_SIZE);
data_length -= xfer_size;
- req.page_offset = cpu_to_le16(start_addr + byte_offset);
- req.data_length = xfer_size;
- req.enables = cpu_to_le32(start_addr + byte_offset ?
+ req->page_offset = cpu_to_le16(start_addr + byte_offset);
+ req->data_length = xfer_size;
+ req->enables = cpu_to_le32(start_addr + byte_offset ?
PORT_PHY_I2C_READ_REQ_ENABLES_PAGE_OFFSET : 0);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, req);
if (!rc)
memcpy(buf + byte_offset, output->data, xfer_size);
- mutex_unlock(&bp->hwrm_cmd_lock);
byte_offset += xfer_size;
} while (!rc && data_length > 0);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -3122,13 +3155,13 @@ static int bnxt_nway_reset(struct net_device *dev)
static int bnxt_set_phys_id(struct net_device *dev,
enum ethtool_phys_id_state state)
{
- struct hwrm_port_led_cfg_input req = {0};
+ struct hwrm_port_led_cfg_input *req;
struct bnxt *bp = netdev_priv(dev);
struct bnxt_pf_info *pf = &bp->pf;
struct bnxt_led_cfg *led_cfg;
u8 led_state;
__le16 duration;
- int i;
+ int rc, i;
if (!bp->num_leds || BNXT_VF(bp))
return -EOPNOTSUPP;
@@ -3142,27 +3175,35 @@ static int bnxt_set_phys_id(struct net_device *dev,
} else {
return -EINVAL;
}
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_LED_CFG, -1, -1);
- req.port_id = cpu_to_le16(pf->port_id);
- req.num_leds = bp->num_leds;
- led_cfg = (struct bnxt_led_cfg *)&req.led0_id;
+ rc = hwrm_req_init(bp, req, HWRM_PORT_LED_CFG);
+ if (rc)
+ return rc;
+
+ req->port_id = cpu_to_le16(pf->port_id);
+ req->num_leds = bp->num_leds;
+ led_cfg = (struct bnxt_led_cfg *)&req->led0_id;
for (i = 0; i < bp->num_leds; i++, led_cfg++) {
- req.enables |= BNXT_LED_DFLT_ENABLES(i);
+ req->enables |= BNXT_LED_DFLT_ENABLES(i);
led_cfg->led_id = bp->leds[i].led_id;
led_cfg->led_state = led_state;
led_cfg->led_blink_on = duration;
led_cfg->led_blink_off = duration;
led_cfg->led_group_id = bp->leds[i].led_group_id;
}
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ return hwrm_req_send(bp, req);
}
static int bnxt_hwrm_selftest_irq(struct bnxt *bp, u16 cmpl_ring)
{
- struct hwrm_selftest_irq_input req = {0};
+ struct hwrm_selftest_irq_input *req;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_SELFTEST_IRQ);
+ if (rc)
+ return rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_SELFTEST_IRQ, cmpl_ring, -1);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->cmpl_ring = cpu_to_le16(cmpl_ring);
+ return hwrm_req_send(bp, req);
}
static int bnxt_test_irq(struct bnxt *bp)
@@ -3182,31 +3223,37 @@ static int bnxt_test_irq(struct bnxt *bp)
static int bnxt_hwrm_mac_loopback(struct bnxt *bp, bool enable)
{
- struct hwrm_port_mac_cfg_input req = {0};
+ struct hwrm_port_mac_cfg_input *req;
+ int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_MAC_CFG, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_PORT_MAC_CFG);
+ if (rc)
+ return rc;
- req.enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_LPBK);
+ req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_LPBK);
if (enable)
- req.lpbk = PORT_MAC_CFG_REQ_LPBK_LOCAL;
+ req->lpbk = PORT_MAC_CFG_REQ_LPBK_LOCAL;
else
- req.lpbk = PORT_MAC_CFG_REQ_LPBK_NONE;
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->lpbk = PORT_MAC_CFG_REQ_LPBK_NONE;
+ return hwrm_req_send(bp, req);
}
static int bnxt_query_force_speeds(struct bnxt *bp, u16 *force_speeds)
{
- struct hwrm_port_phy_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_port_phy_qcaps_input req = {0};
+ struct hwrm_port_phy_qcaps_output *resp;
+ struct hwrm_port_phy_qcaps_input *req;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_QCAPS, -1, -1);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_QCAPS);
+ if (rc)
+ return rc;
+
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc)
*force_speeds = le16_to_cpu(resp->supported_speeds_force_mode);
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -3241,7 +3288,7 @@ static int bnxt_disable_an_for_lpbk(struct bnxt *bp,
req->force_link_speed = cpu_to_le16(fw_speed);
req->flags |= cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_FORCE |
PORT_PHY_CFG_REQ_FLAGS_RESET_PHY);
- rc = hwrm_send_message(bp, req, sizeof(*req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, req);
req->flags = 0;
req->force_link_speed = cpu_to_le16(0);
return rc;
@@ -3249,21 +3296,29 @@ static int bnxt_disable_an_for_lpbk(struct bnxt *bp,
static int bnxt_hwrm_phy_loopback(struct bnxt *bp, bool enable, bool ext)
{
- struct hwrm_port_phy_cfg_input req = {0};
+ struct hwrm_port_phy_cfg_input *req;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_CFG);
+ if (rc)
+ return rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1);
+ /* prevent bnxt_disable_an_for_lpbk() from consuming the request */
+ hwrm_req_hold(bp, req);
if (enable) {
- bnxt_disable_an_for_lpbk(bp, &req);
+ bnxt_disable_an_for_lpbk(bp, req);
if (ext)
- req.lpbk = PORT_PHY_CFG_REQ_LPBK_EXTERNAL;
+ req->lpbk = PORT_PHY_CFG_REQ_LPBK_EXTERNAL;
else
- req.lpbk = PORT_PHY_CFG_REQ_LPBK_LOCAL;
+ req->lpbk = PORT_PHY_CFG_REQ_LPBK_LOCAL;
} else {
- req.lpbk = PORT_PHY_CFG_REQ_LPBK_NONE;
+ req->lpbk = PORT_PHY_CFG_REQ_LPBK_NONE;
}
- req.enables = cpu_to_le32(PORT_PHY_CFG_REQ_ENABLES_LPBK);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->enables = cpu_to_le32(PORT_PHY_CFG_REQ_ENABLES_LPBK);
+ rc = hwrm_req_send(bp, req);
+ hwrm_req_drop(bp, req);
+ return rc;
}
static int bnxt_rx_loopback(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
@@ -3361,7 +3416,7 @@ static int bnxt_run_loopback(struct bnxt *bp)
data[i] = (u8)(i & 0xff);
map = dma_map_single(&bp->pdev->dev, skb->data, pkt_size,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
if (dma_mapping_error(&bp->pdev->dev, map)) {
dev_kfree_skb(skb);
return -EIO;
@@ -3374,24 +3429,28 @@ static int bnxt_run_loopback(struct bnxt *bp)
bnxt_db_write(bp, &txr->tx_db, txr->tx_prod);
rc = bnxt_poll_loopback(bp, cpr, pkt_size);
- dma_unmap_single(&bp->pdev->dev, map, pkt_size, PCI_DMA_TODEVICE);
+ dma_unmap_single(&bp->pdev->dev, map, pkt_size, DMA_TO_DEVICE);
dev_kfree_skb(skb);
return rc;
}
static int bnxt_run_fw_tests(struct bnxt *bp, u8 test_mask, u8 *test_results)
{
- struct hwrm_selftest_exec_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_selftest_exec_input req = {0};
+ struct hwrm_selftest_exec_output *resp;
+ struct hwrm_selftest_exec_input *req;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_SELFTEST_EXEC, -1, -1);
- mutex_lock(&bp->hwrm_cmd_lock);
- resp->test_success = 0;
- req.flags = test_mask;
- rc = _hwrm_send_message(bp, &req, sizeof(req), bp->test_info->timeout);
+ rc = hwrm_req_init(bp, req, HWRM_SELFTEST_EXEC);
+ if (rc)
+ return rc;
+
+ hwrm_req_timeout(bp, req, bp->test_info->timeout);
+ req->flags = test_mask;
+
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
*test_results = resp->test_success;
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -3550,32 +3609,34 @@ static int bnxt_reset(struct net_device *dev, u32 *flags)
return 0;
}
-static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, int msg_len,
+static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg,
struct bnxt_hwrm_dbg_dma_info *info)
{
- struct hwrm_dbg_cmn_output *cmn_resp = bp->hwrm_cmd_resp_addr;
struct hwrm_dbg_cmn_input *cmn_req = msg;
__le16 *seq_ptr = msg + info->seq_off;
+ struct hwrm_dbg_cmn_output *cmn_resp;
u16 seq = 0, len, segs_off;
- void *resp = cmn_resp;
dma_addr_t dma_handle;
+ void *dma_buf, *resp;
int rc, off = 0;
- void *dma_buf;
- dma_buf = dma_alloc_coherent(&bp->pdev->dev, info->dma_len, &dma_handle,
- GFP_KERNEL);
- if (!dma_buf)
+ dma_buf = hwrm_req_dma_slice(bp, msg, info->dma_len, &dma_handle);
+ if (!dma_buf) {
+ hwrm_req_drop(bp, msg);
return -ENOMEM;
+ }
+
+ hwrm_req_timeout(bp, msg, HWRM_COREDUMP_TIMEOUT);
+ cmn_resp = hwrm_req_hold(bp, msg);
+ resp = cmn_resp;
segs_off = offsetof(struct hwrm_dbg_coredump_list_output,
total_segments);
cmn_req->host_dest_addr = cpu_to_le64(dma_handle);
cmn_req->host_buf_len = cpu_to_le32(info->dma_len);
- mutex_lock(&bp->hwrm_cmd_lock);
while (1) {
*seq_ptr = cpu_to_le16(seq);
- rc = _hwrm_send_message(bp, msg, msg_len,
- HWRM_COREDUMP_TIMEOUT);
+ rc = hwrm_req_send(bp, msg);
if (rc)
break;
@@ -3619,26 +3680,27 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, int msg_len,
seq++;
off += len;
}
- mutex_unlock(&bp->hwrm_cmd_lock);
- dma_free_coherent(&bp->pdev->dev, info->dma_len, dma_buf, dma_handle);
+ hwrm_req_drop(bp, msg);
return rc;
}
static int bnxt_hwrm_dbg_coredump_list(struct bnxt *bp,
struct bnxt_coredump *coredump)
{
- struct hwrm_dbg_coredump_list_input req = {0};
struct bnxt_hwrm_dbg_dma_info info = {NULL};
+ struct hwrm_dbg_coredump_list_input *req;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_DBG_COREDUMP_LIST, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_LIST);
+ if (rc)
+ return rc;
info.dma_len = COREDUMP_LIST_BUF_LEN;
info.seq_off = offsetof(struct hwrm_dbg_coredump_list_input, seq_no);
info.data_len_off = offsetof(struct hwrm_dbg_coredump_list_output,
data_len);
- rc = bnxt_hwrm_dbg_dma_data(bp, &req, sizeof(req), &info);
+ rc = bnxt_hwrm_dbg_dma_data(bp, req, &info);
if (!rc) {
coredump->data = info.dest_buf;
coredump->data_size = info.dest_buf_size;
@@ -3650,26 +3712,34 @@ static int bnxt_hwrm_dbg_coredump_list(struct bnxt *bp,
static int bnxt_hwrm_dbg_coredump_initiate(struct bnxt *bp, u16 component_id,
u16 segment_id)
{
- struct hwrm_dbg_coredump_initiate_input req = {0};
+ struct hwrm_dbg_coredump_initiate_input *req;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_INITIATE);
+ if (rc)
+ return rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_DBG_COREDUMP_INITIATE, -1, -1);
- req.component_id = cpu_to_le16(component_id);
- req.segment_id = cpu_to_le16(segment_id);
+ hwrm_req_timeout(bp, req, HWRM_COREDUMP_TIMEOUT);
+ req->component_id = cpu_to_le16(component_id);
+ req->segment_id = cpu_to_le16(segment_id);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_COREDUMP_TIMEOUT);
+ return hwrm_req_send(bp, req);
}
static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
u16 segment_id, u32 *seg_len,
void *buf, u32 buf_len, u32 offset)
{
- struct hwrm_dbg_coredump_retrieve_input req = {0};
+ struct hwrm_dbg_coredump_retrieve_input *req;
struct bnxt_hwrm_dbg_dma_info info = {NULL};
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_DBG_COREDUMP_RETRIEVE, -1, -1);
- req.component_id = cpu_to_le16(component_id);
- req.segment_id = cpu_to_le16(segment_id);
+ rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_RETRIEVE);
+ if (rc)
+ return rc;
+
+ req->component_id = cpu_to_le16(component_id);
+ req->segment_id = cpu_to_le16(segment_id);
info.dma_len = COREDUMP_RETRIEVE_BUF_LEN;
info.seq_off = offsetof(struct hwrm_dbg_coredump_retrieve_input,
@@ -3682,7 +3752,7 @@ static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
info.seg_start = offset;
}
- rc = bnxt_hwrm_dbg_dma_data(bp, &req, sizeof(req), &info);
+ rc = bnxt_hwrm_dbg_dma_data(bp, req, &info);
if (!rc)
*seg_len = info.dest_buf_size;
@@ -3961,8 +4031,8 @@ static int bnxt_get_ts_info(struct net_device *dev,
void bnxt_ethtool_init(struct bnxt *bp)
{
- struct hwrm_selftest_qlist_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_selftest_qlist_input req = {0};
+ struct hwrm_selftest_qlist_output *resp;
+ struct hwrm_selftest_qlist_input *req;
struct bnxt_test_info *test_info;
struct net_device *dev = bp->dev;
int i, rc;
@@ -3974,19 +4044,22 @@ void bnxt_ethtool_init(struct bnxt *bp)
if (bp->hwrm_spec_code < 0x10704 || !BNXT_PF(bp))
return;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_SELFTEST_QLIST, -1, -1);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
- if (rc)
- goto ethtool_init_exit;
-
test_info = bp->test_info;
- if (!test_info)
+ if (!test_info) {
test_info = kzalloc(sizeof(*bp->test_info), GFP_KERNEL);
- if (!test_info)
+ if (!test_info)
+ return;
+ bp->test_info = test_info;
+ }
+
+ if (hwrm_req_init(bp, req, HWRM_SELFTEST_QLIST))
+ return;
+
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send_silent(bp, req);
+ if (rc)
goto ethtool_init_exit;
- bp->test_info = test_info;
bp->num_tests = resp->num_tests + BNXT_DRV_TESTS;
if (bp->num_tests > BNXT_MAX_TEST)
bp->num_tests = BNXT_MAX_TEST;
@@ -4020,7 +4093,7 @@ void bnxt_ethtool_init(struct bnxt *bp)
}
ethtool_init_exit:
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
}
static void bnxt_get_eth_phy_stats(struct net_device *dev,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index 3fc6781c5b98..94d07a9f7034 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -368,6 +368,7 @@ struct cmd_nums {
#define HWRM_FUNC_PTP_TS_QUERY 0x19fUL
#define HWRM_FUNC_PTP_EXT_CFG 0x1a0UL
#define HWRM_FUNC_PTP_EXT_QCFG 0x1a1UL
+ #define HWRM_FUNC_KEY_CTX_ALLOC 0x1a2UL
#define HWRM_SELFTEST_QLIST 0x200UL
#define HWRM_SELFTEST_EXEC 0x201UL
#define HWRM_SELFTEST_IRQ 0x202UL
@@ -531,8 +532,8 @@ struct hwrm_err_output {
#define HWRM_VERSION_MAJOR 1
#define HWRM_VERSION_MINOR 10
#define HWRM_VERSION_UPDATE 2
-#define HWRM_VERSION_RSVD 47
-#define HWRM_VERSION_STR "1.10.2.47"
+#define HWRM_VERSION_RSVD 52
+#define HWRM_VERSION_STR "1.10.2.52"
/* hwrm_ver_get_input (size:192b/24B) */
struct hwrm_ver_get_input {
@@ -585,6 +586,7 @@ struct hwrm_ver_get_output {
#define VER_GET_RESP_DEV_CAPS_CFG_CFA_ADV_FLOW_MGNT_SUPPORTED 0x1000UL
#define VER_GET_RESP_DEV_CAPS_CFG_CFA_TFLIB_SUPPORTED 0x2000UL
#define VER_GET_RESP_DEV_CAPS_CFG_CFA_TRUFLOW_SUPPORTED 0x4000UL
+ #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_BOOT_CAPABLE 0x8000UL
u8 roce_fw_maj_8b;
u8 roce_fw_min_8b;
u8 roce_fw_bld_8b;
@@ -886,7 +888,8 @@ struct hwrm_async_event_cmpl_reset_notify {
#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL (0x2UL << 8)
#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_NON_FATAL (0x3UL << 8)
#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FAST_RESET (0x4UL << 8)
- #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FAST_RESET
+ #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_ACTIVATION (0x5UL << 8)
+ #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_ACTIVATION
#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_MASK 0xffff0000UL
#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_SFT 16
};
@@ -1236,13 +1239,14 @@ struct hwrm_async_event_cmpl_error_report_base {
u8 timestamp_lo;
__le16 timestamp_hi;
__le32 event_data1;
- #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL
- #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT 0
- #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_RESERVED 0x0UL
- #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM 0x1UL
- #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL 0x2UL
- #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM 0x3UL
- #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT 0
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_RESERVED 0x0UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM 0x1UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL 0x2UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM 0x3UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD
};
/* hwrm_async_event_cmpl_error_report_pause_storm (size:128b/16B) */
@@ -1446,6 +1450,8 @@ struct hwrm_func_vf_cfg_input {
#define FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS 0x200UL
#define FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS 0x400UL
#define FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS 0x800UL
+ #define FUNC_VF_CFG_REQ_ENABLES_NUM_TX_KEY_CTXS 0x1000UL
+ #define FUNC_VF_CFG_REQ_ENABLES_NUM_RX_KEY_CTXS 0x2000UL
__le16 mtu;
__le16 guest_vlan;
__le16 async_event_cr;
@@ -1469,7 +1475,8 @@ struct hwrm_func_vf_cfg_input {
__le16 num_vnics;
__le16 num_stat_ctxs;
__le16 num_hw_ring_grps;
- u8 unused_0[4];
+ __le16 num_tx_key_ctxs;
+ __le16 num_rx_key_ctxs;
};
/* hwrm_func_vf_cfg_output (size:128b/16B) */
@@ -1493,7 +1500,7 @@ struct hwrm_func_qcaps_input {
u8 unused_0[6];
};
-/* hwrm_func_qcaps_output (size:704b/88B) */
+/* hwrm_func_qcaps_output (size:768b/96B) */
struct hwrm_func_qcaps_output {
__le16 error_code;
__le16 req_type;
@@ -1587,7 +1594,8 @@ struct hwrm_func_qcaps_output {
#define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TE_CFA 0x4UL
#define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_RE_CFA 0x8UL
#define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_PRIMATE 0x10UL
- u8 unused_1;
+ __le16 max_key_ctxs_alloc;
+ u8 unused_1[7];
u8 valid;
};
@@ -1602,7 +1610,7 @@ struct hwrm_func_qcfg_input {
u8 unused_0[6];
};
-/* hwrm_func_qcfg_output (size:832b/104B) */
+/* hwrm_func_qcfg_output (size:896b/112B) */
struct hwrm_func_qcfg_output {
__le16 error_code;
__le16 req_type;
@@ -1749,11 +1757,13 @@ struct hwrm_func_qcfg_output {
#define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
#define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_LAST FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100
__le16 host_mtu;
- u8 unused_3;
+ __le16 alloc_tx_key_ctxs;
+ __le16 alloc_rx_key_ctxs;
+ u8 unused_3[5];
u8 valid;
};
-/* hwrm_func_cfg_input (size:832b/104B) */
+/* hwrm_func_cfg_input (size:896b/112B) */
struct hwrm_func_cfg_input {
__le16 req_type;
__le16 cmpl_ring;
@@ -1820,6 +1830,8 @@ struct hwrm_func_cfg_input {
#define FUNC_CFG_REQ_ENABLES_PARTITION_MAX_BW 0x8000000UL
#define FUNC_CFG_REQ_ENABLES_TPID 0x10000000UL
#define FUNC_CFG_REQ_ENABLES_HOST_MTU 0x20000000UL
+ #define FUNC_CFG_REQ_ENABLES_TX_KEY_CTXS 0x40000000UL
+ #define FUNC_CFG_REQ_ENABLES_RX_KEY_CTXS 0x80000000UL
__le16 admin_mtu;
__le16 mru;
__le16 num_rsscos_ctxs;
@@ -1929,6 +1941,9 @@ struct hwrm_func_cfg_input {
#define FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_UNIT_LAST FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100
__be16 tpid;
__le16 host_mtu;
+ __le16 num_tx_key_ctxs;
+ __le16 num_rx_key_ctxs;
+ u8 unused_0[4];
};
/* hwrm_func_cfg_output (size:128b/16B) */
@@ -2099,6 +2114,7 @@ struct hwrm_func_drv_rgtr_input {
#define FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT 0x40UL
#define FUNC_DRV_RGTR_REQ_FLAGS_FAST_RESET_SUPPORT 0x80UL
#define FUNC_DRV_RGTR_REQ_FLAGS_RSS_STRICT_HASH_TYPE_SUPPORT 0x100UL
+ #define FUNC_DRV_RGTR_REQ_FLAGS_NPAR_1_2_SUPPORT 0x200UL
__le32 enables;
#define FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE 0x1UL
#define FUNC_DRV_RGTR_REQ_ENABLES_VER 0x2UL
@@ -2268,7 +2284,7 @@ struct hwrm_func_resource_qcaps_input {
u8 unused_0[6];
};
-/* hwrm_func_resource_qcaps_output (size:448b/56B) */
+/* hwrm_func_resource_qcaps_output (size:512b/64B) */
struct hwrm_func_resource_qcaps_output {
__le16 error_code;
__le16 req_type;
@@ -2300,11 +2316,15 @@ struct hwrm_func_resource_qcaps_output {
__le16 max_tx_scheduler_inputs;
__le16 flags;
#define FUNC_RESOURCE_QCAPS_RESP_FLAGS_MIN_GUARANTEED 0x1UL
+ __le16 min_tx_key_ctxs;
+ __le16 max_tx_key_ctxs;
+ __le16 min_rx_key_ctxs;
+ __le16 max_rx_key_ctxs;
u8 unused_0[5];
u8 valid;
};
-/* hwrm_func_vf_resource_cfg_input (size:448b/56B) */
+/* hwrm_func_vf_resource_cfg_input (size:512b/64B) */
struct hwrm_func_vf_resource_cfg_input {
__le16 req_type;
__le16 cmpl_ring;
@@ -2331,6 +2351,10 @@ struct hwrm_func_vf_resource_cfg_input {
__le16 max_hw_ring_grps;
__le16 flags;
#define FUNC_VF_RESOURCE_CFG_REQ_FLAGS_MIN_GUARANTEED 0x1UL
+ __le16 min_tx_key_ctxs;
+ __le16 max_tx_key_ctxs;
+ __le16 min_rx_key_ctxs;
+ __le16 max_rx_key_ctxs;
u8 unused_0[2];
};
@@ -2348,7 +2372,9 @@ struct hwrm_func_vf_resource_cfg_output {
__le16 reserved_vnics;
__le16 reserved_stat_ctx;
__le16 reserved_hw_ring_grps;
- u8 unused_0[7];
+ __le16 reserved_tx_key_ctxs;
+ __le16 reserved_rx_key_ctxs;
+ u8 unused_0[3];
u8 valid;
};
@@ -4220,7 +4246,7 @@ struct hwrm_port_lpbk_clr_stats_output {
u8 valid;
};
-/* hwrm_port_ts_query_input (size:256b/32B) */
+/* hwrm_port_ts_query_input (size:320b/40B) */
struct hwrm_port_ts_query_input {
__le16 req_type;
__le16 cmpl_ring;
@@ -4238,8 +4264,11 @@ struct hwrm_port_ts_query_input {
__le16 enables;
#define PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT 0x1UL
#define PORT_TS_QUERY_REQ_ENABLES_PTP_SEQ_ID 0x2UL
+ #define PORT_TS_QUERY_REQ_ENABLES_PTP_HDR_OFFSET 0x4UL
__le16 ts_req_timeout;
__le32 ptp_seq_id;
+ __le16 ptp_hdr_offset;
+ u8 unused_1[6];
};
/* hwrm_port_ts_query_output (size:192b/24B) */
@@ -8172,6 +8201,7 @@ struct hwrm_fw_reset_input {
u8 host_idx;
u8 flags;
#define FW_RESET_REQ_FLAGS_RESET_GRACEFUL 0x1UL
+ #define FW_RESET_REQ_FLAGS_FW_ACTIVATION 0x2UL
u8 unused_0[4];
};
@@ -8952,7 +8982,7 @@ struct hwrm_nvm_get_dir_info_output {
u8 valid;
};
-/* hwrm_nvm_write_input (size:384b/48B) */
+/* hwrm_nvm_write_input (size:448b/56B) */
struct hwrm_nvm_write_input {
__le16 req_type;
__le16 cmpl_ring;
@@ -8968,7 +8998,11 @@ struct hwrm_nvm_write_input {
__le16 option;
__le16 flags;
#define NVM_WRITE_REQ_FLAGS_KEEP_ORIG_ACTIVE_IMG 0x1UL
+ #define NVM_WRITE_REQ_FLAGS_BATCH_MODE 0x2UL
+ #define NVM_WRITE_REQ_FLAGS_BATCH_LAST 0x4UL
__le32 dir_item_length;
+ __le32 offset;
+ __le32 len;
__le32 unused_0;
};
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c
new file mode 100644
index 000000000000..acef61abe35d
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c
@@ -0,0 +1,763 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2020 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#include <asm/byteorder.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/errno.h>
+#include <linux/ethtool.h>
+#include <linux/if_ether.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/skbuff.h>
+
+#include "bnxt_hsi.h"
+#include "bnxt.h"
+#include "bnxt_hwrm.h"
+
+static u64 hwrm_calc_sentinel(struct bnxt_hwrm_ctx *ctx, u16 req_type)
+{
+ return (((uintptr_t)ctx) + req_type) ^ BNXT_HWRM_SENTINEL;
+}
+
+/**
+ * __hwrm_req_init() - Initialize an HWRM request.
+ * @bp: The driver context.
+ * @req: A pointer to the request pointer to initialize.
+ * @req_type: The request type. This will be converted to the little endian
+ * before being written to the req_type field of the returned request.
+ * @req_len: The length of the request to be allocated.
+ *
+ * Allocate DMA resources and initialize a new HWRM request object of the
+ * given type. The response address field in the request is configured with
+ * the DMA bus address that has been mapped for the response and the passed
+ * request is pointed to kernel virtual memory mapped for the request (such
+ * that short_input indirection can be accomplished without copying). The
+ * request’s target and completion ring are initialized to default values and
+ * can be overridden by writing to the returned request object directly.
+ *
+ * The initialized request can be further customized by writing to its fields
+ * directly, taking care to covert such fields to little endian. The request
+ * object will be consumed (and all its associated resources release) upon
+ * passing it to hwrm_req_send() unless ownership of the request has been
+ * claimed by the caller via a call to hwrm_req_hold(). If the request is not
+ * consumed, either because it is never sent or because ownership has been
+ * claimed, then it must be released by a call to hwrm_req_drop().
+ *
+ * Return: zero on success, negative error code otherwise:
+ * E2BIG: the type of request pointer is too large to fit.
+ * ENOMEM: an allocation failure occurred.
+ */
+int __hwrm_req_init(struct bnxt *bp, void **req, u16 req_type, u32 req_len)
+{
+ struct bnxt_hwrm_ctx *ctx;
+ dma_addr_t dma_handle;
+ u8 *req_addr;
+
+ if (req_len > BNXT_HWRM_CTX_OFFSET)
+ return -E2BIG;
+
+ req_addr = dma_pool_alloc(bp->hwrm_dma_pool, GFP_KERNEL | __GFP_ZERO,
+ &dma_handle);
+ if (!req_addr)
+ return -ENOMEM;
+
+ ctx = (struct bnxt_hwrm_ctx *)(req_addr + BNXT_HWRM_CTX_OFFSET);
+ /* safety first, sentinel used to check for invalid requests */
+ ctx->sentinel = hwrm_calc_sentinel(ctx, req_type);
+ ctx->req_len = req_len;
+ ctx->req = (struct input *)req_addr;
+ ctx->resp = (struct output *)(req_addr + BNXT_HWRM_RESP_OFFSET);
+ ctx->dma_handle = dma_handle;
+ ctx->flags = 0; /* __GFP_ZERO, but be explicit regarding ownership */
+ ctx->timeout = bp->hwrm_cmd_timeout ?: DFLT_HWRM_CMD_TIMEOUT;
+ ctx->allocated = BNXT_HWRM_DMA_SIZE - BNXT_HWRM_CTX_OFFSET;
+ ctx->gfp = GFP_KERNEL;
+ ctx->slice_addr = NULL;
+
+ /* initialize common request fields */
+ ctx->req->req_type = cpu_to_le16(req_type);
+ ctx->req->resp_addr = cpu_to_le64(dma_handle + BNXT_HWRM_RESP_OFFSET);
+ ctx->req->cmpl_ring = cpu_to_le16(BNXT_HWRM_NO_CMPL_RING);
+ ctx->req->target_id = cpu_to_le16(BNXT_HWRM_TARGET);
+ *req = ctx->req;
+
+ return 0;
+}
+
+static struct bnxt_hwrm_ctx *__hwrm_ctx(struct bnxt *bp, u8 *req_addr)
+{
+ void *ctx_addr = req_addr + BNXT_HWRM_CTX_OFFSET;
+ struct input *req = (struct input *)req_addr;
+ struct bnxt_hwrm_ctx *ctx = ctx_addr;
+ u64 sentinel;
+
+ if (!req) {
+ /* can only be due to software bug, be loud */
+ netdev_err(bp->dev, "null HWRM request");
+ dump_stack();
+ return NULL;
+ }
+
+ /* HWRM API has no type safety, verify sentinel to validate address */
+ sentinel = hwrm_calc_sentinel(ctx, le16_to_cpu(req->req_type));
+ if (ctx->sentinel != sentinel) {
+ /* can only be due to software bug, be loud */
+ netdev_err(bp->dev, "HWRM sentinel mismatch, req_type = %u\n",
+ (u32)le16_to_cpu(req->req_type));
+ dump_stack();
+ return NULL;
+ }
+
+ return ctx;
+}
+
+/**
+ * hwrm_req_timeout() - Set the completion timeout for the request.
+ * @bp: The driver context.
+ * @req: The request to set the timeout.
+ * @timeout: The timeout in milliseconds.
+ *
+ * Set the timeout associated with the request for subsequent calls to
+ * hwrm_req_send(). Some requests are long running and require a different
+ * timeout than the default.
+ */
+void hwrm_req_timeout(struct bnxt *bp, void *req, unsigned int timeout)
+{
+ struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+
+ if (ctx)
+ ctx->timeout = timeout;
+}
+
+/**
+ * hwrm_req_alloc_flags() - Sets GFP allocation flags for slices.
+ * @bp: The driver context.
+ * @req: The request for which calls to hwrm_req_dma_slice() will have altered
+ * allocation flags.
+ * @flags: A bitmask of GFP flags. These flags are passed to
+ * dma_alloc_coherent() whenever it is used to allocate backing memory
+ * for slices. Note that calls to hwrm_req_dma_slice() will not always
+ * result in new allocations, however, memory suballocated from the
+ * request buffer is already __GFP_ZERO.
+ *
+ * Sets the GFP allocation flags associated with the request for subsequent
+ * calls to hwrm_req_dma_slice(). This can be useful for specifying __GFP_ZERO
+ * for slice allocations.
+ */
+void hwrm_req_alloc_flags(struct bnxt *bp, void *req, gfp_t gfp)
+{
+ struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+
+ if (ctx)
+ ctx->gfp = gfp;
+}
+
+/**
+ * hwrm_req_replace() - Replace request data.
+ * @bp: The driver context.
+ * @req: The request to modify. A call to hwrm_req_replace() is conceptually
+ * an assignment of new_req to req. Subsequent calls to HWRM API functions,
+ * such as hwrm_req_send(), should thus use req and not new_req (in fact,
+ * calls to HWRM API functions will fail if non-managed request objects
+ * are passed).
+ * @len: The length of new_req.
+ * @new_req: The pre-built request to copy or reference.
+ *
+ * Replaces the request data in req with that of new_req. This is useful in
+ * scenarios where a request object has already been constructed by a third
+ * party prior to creating a resource managed request using hwrm_req_init().
+ * Depending on the length, hwrm_req_replace() will either copy the new
+ * request data into the DMA memory allocated for req, or it will simply
+ * reference the new request and use it in lieu of req during subsequent
+ * calls to hwrm_req_send(). The resource management is associated with
+ * req and is independent of and does not apply to new_req. The caller must
+ * ensure that the lifetime of new_req is least as long as req. Any slices
+ * that may have been associated with the original request are released.
+ *
+ * Return: zero on success, negative error code otherwise:
+ * E2BIG: Request is too large.
+ * EINVAL: Invalid request to modify.
+ */
+int hwrm_req_replace(struct bnxt *bp, void *req, void *new_req, u32 len)
+{
+ struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+ struct input *internal_req = req;
+ u16 req_type;
+
+ if (!ctx)
+ return -EINVAL;
+
+ if (len > BNXT_HWRM_CTX_OFFSET)
+ return -E2BIG;
+
+ /* free any existing slices */
+ ctx->allocated = BNXT_HWRM_DMA_SIZE - BNXT_HWRM_CTX_OFFSET;
+ if (ctx->slice_addr) {
+ dma_free_coherent(&bp->pdev->dev, ctx->slice_size,
+ ctx->slice_addr, ctx->slice_handle);
+ ctx->slice_addr = NULL;
+ }
+ ctx->gfp = GFP_KERNEL;
+
+ if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) || len > BNXT_HWRM_MAX_REQ_LEN) {
+ memcpy(internal_req, new_req, len);
+ } else {
+ internal_req->req_type = ((struct input *)new_req)->req_type;
+ ctx->req = new_req;
+ }
+
+ ctx->req_len = len;
+ ctx->req->resp_addr = cpu_to_le64(ctx->dma_handle +
+ BNXT_HWRM_RESP_OFFSET);
+
+ /* update sentinel for potentially new request type */
+ req_type = le16_to_cpu(internal_req->req_type);
+ ctx->sentinel = hwrm_calc_sentinel(ctx, req_type);
+
+ return 0;
+}
+
+/**
+ * hwrm_req_flags() - Set non internal flags of the ctx
+ * @bp: The driver context.
+ * @req: The request containing the HWRM command
+ * @flags: ctx flags that don't have BNXT_HWRM_INTERNAL_FLAG set
+ *
+ * ctx flags can be used by the callers to instruct how the subsequent
+ * hwrm_req_send() should behave. Example: callers can use hwrm_req_flags
+ * with BNXT_HWRM_CTX_SILENT to omit kernel prints of errors of hwrm_req_send()
+ * or with BNXT_HWRM_FULL_WAIT enforce hwrm_req_send() to wait for full timeout
+ * even if FW is not responding.
+ * This generic function can be used to set any flag that is not an internal flag
+ * of the HWRM module.
+ */
+void hwrm_req_flags(struct bnxt *bp, void *req, enum bnxt_hwrm_ctx_flags flags)
+{
+ struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+
+ if (ctx)
+ ctx->flags |= (flags & HWRM_API_FLAGS);
+}
+
+/**
+ * hwrm_req_hold() - Claim ownership of the request's resources.
+ * @bp: The driver context.
+ * @req: A pointer to the request to own. The request will no longer be
+ * consumed by calls to hwrm_req_send().
+ *
+ * Take ownership of the request. Ownership places responsibility on the
+ * caller to free the resources associated with the request via a call to
+ * hwrm_req_drop(). The caller taking ownership implies that a subsequent
+ * call to hwrm_req_send() will not consume the request (ie. sending will
+ * not free the associated resources if the request is owned by the caller).
+ * Taking ownership returns a reference to the response. Retaining and
+ * accessing the response data is the most common reason to take ownership
+ * of the request. Ownership can also be acquired in order to reuse the same
+ * request object across multiple invocations of hwrm_req_send().
+ *
+ * Return: A pointer to the response object.
+ *
+ * The resources associated with the response will remain available to the
+ * caller until ownership of the request is relinquished via a call to
+ * hwrm_req_drop(). It is not possible for hwrm_req_hold() to return NULL if
+ * a valid request is provided. A returned NULL value would imply a driver
+ * bug and the implementation will complain loudly in the logs to aid in
+ * detection. It should not be necessary to check the result for NULL.
+ */
+void *hwrm_req_hold(struct bnxt *bp, void *req)
+{
+ struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+ struct input *input = (struct input *)req;
+
+ if (!ctx)
+ return NULL;
+
+ if (ctx->flags & BNXT_HWRM_INTERNAL_CTX_OWNED) {
+ /* can only be due to software bug, be loud */
+ netdev_err(bp->dev, "HWRM context already owned, req_type = %u\n",
+ (u32)le16_to_cpu(input->req_type));
+ dump_stack();
+ return NULL;
+ }
+
+ ctx->flags |= BNXT_HWRM_INTERNAL_CTX_OWNED;
+ return ((u8 *)req) + BNXT_HWRM_RESP_OFFSET;
+}
+
+static void __hwrm_ctx_drop(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx)
+{
+ void *addr = ((u8 *)ctx) - BNXT_HWRM_CTX_OFFSET;
+ dma_addr_t dma_handle = ctx->dma_handle; /* save before invalidate */
+
+ /* unmap any auxiliary DMA slice */
+ if (ctx->slice_addr)
+ dma_free_coherent(&bp->pdev->dev, ctx->slice_size,
+ ctx->slice_addr, ctx->slice_handle);
+
+ /* invalidate, ensure ownership, sentinel and dma_handle are cleared */
+ memset(ctx, 0, sizeof(struct bnxt_hwrm_ctx));
+
+ /* return the buffer to the DMA pool */
+ if (dma_handle)
+ dma_pool_free(bp->hwrm_dma_pool, addr, dma_handle);
+}
+
+/**
+ * hwrm_req_drop() - Release all resources associated with the request.
+ * @bp: The driver context.
+ * @req: The request to consume, releasing the associated resources. The
+ * request object, any slices, and its associated response are no
+ * longer valid.
+ *
+ * It is legal to call hwrm_req_drop() on an unowned request, provided it
+ * has not already been consumed by hwrm_req_send() (for example, to release
+ * an aborted request). A given request should not be dropped more than once,
+ * nor should it be dropped after having been consumed by hwrm_req_send(). To
+ * do so is an error (the context will not be found and a stack trace will be
+ * rendered in the kernel log).
+ */
+void hwrm_req_drop(struct bnxt *bp, void *req)
+{
+ struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+
+ if (ctx)
+ __hwrm_ctx_drop(bp, ctx);
+}
+
+static int __hwrm_to_stderr(u32 hwrm_err)
+{
+ switch (hwrm_err) {
+ case HWRM_ERR_CODE_SUCCESS:
+ return 0;
+ case HWRM_ERR_CODE_RESOURCE_LOCKED:
+ return -EROFS;
+ case HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED:
+ return -EACCES;
+ case HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR:
+ return -ENOSPC;
+ case HWRM_ERR_CODE_INVALID_PARAMS:
+ case HWRM_ERR_CODE_INVALID_FLAGS:
+ case HWRM_ERR_CODE_INVALID_ENABLES:
+ case HWRM_ERR_CODE_UNSUPPORTED_TLV:
+ case HWRM_ERR_CODE_UNSUPPORTED_OPTION_ERR:
+ return -EINVAL;
+ case HWRM_ERR_CODE_NO_BUFFER:
+ return -ENOMEM;
+ case HWRM_ERR_CODE_HOT_RESET_PROGRESS:
+ case HWRM_ERR_CODE_BUSY:
+ return -EAGAIN;
+ case HWRM_ERR_CODE_CMD_NOT_SUPPORTED:
+ return -EOPNOTSUPP;
+ default:
+ return -EIO;
+ }
+}
+
+static struct bnxt_hwrm_wait_token *
+__hwrm_acquire_token(struct bnxt *bp, enum bnxt_hwrm_chnl dst)
+{
+ struct bnxt_hwrm_wait_token *token;
+
+ token = kzalloc(sizeof(*token), GFP_KERNEL);
+ if (!token)
+ return NULL;
+
+ mutex_lock(&bp->hwrm_cmd_lock);
+
+ token->dst = dst;
+ token->state = BNXT_HWRM_PENDING;
+ if (dst == BNXT_HWRM_CHNL_CHIMP) {
+ token->seq_id = bp->hwrm_cmd_seq++;
+ hlist_add_head_rcu(&token->node, &bp->hwrm_pending_list);
+ } else {
+ token->seq_id = bp->hwrm_cmd_kong_seq++;
+ }
+
+ return token;
+}
+
+static void
+__hwrm_release_token(struct bnxt *bp, struct bnxt_hwrm_wait_token *token)
+{
+ if (token->dst == BNXT_HWRM_CHNL_CHIMP) {
+ hlist_del_rcu(&token->node);
+ kfree_rcu(token, rcu);
+ } else {
+ kfree(token);
+ }
+ mutex_unlock(&bp->hwrm_cmd_lock);
+}
+
+void
+hwrm_update_token(struct bnxt *bp, u16 seq_id, enum bnxt_hwrm_wait_state state)
+{
+ struct bnxt_hwrm_wait_token *token;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(token, &bp->hwrm_pending_list, node) {
+ if (token->seq_id == seq_id) {
+ WRITE_ONCE(token->state, state);
+ rcu_read_unlock();
+ return;
+ }
+ }
+ rcu_read_unlock();
+ netdev_err(bp->dev, "Invalid hwrm seq id %d\n", seq_id);
+}
+
+static int __hwrm_send(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx)
+{
+ u32 doorbell_offset = BNXT_GRCPF_REG_CHIMP_COMM_TRIGGER;
+ enum bnxt_hwrm_chnl dst = BNXT_HWRM_CHNL_CHIMP;
+ u32 bar_offset = BNXT_GRCPF_REG_CHIMP_COMM;
+ struct bnxt_hwrm_wait_token *token = NULL;
+ struct hwrm_short_input short_input = {0};
+ u16 max_req_len = BNXT_HWRM_MAX_REQ_LEN;
+ unsigned int i, timeout, tmo_count;
+ u32 *data = (u32 *)ctx->req;
+ u32 msg_len = ctx->req_len;
+ int rc = -EBUSY;
+ u32 req_type;
+ u16 len = 0;
+ u8 *valid;
+
+ if (ctx->flags & BNXT_HWRM_INTERNAL_RESP_DIRTY)
+ memset(ctx->resp, 0, PAGE_SIZE);
+
+ req_type = le16_to_cpu(ctx->req->req_type);
+ if (BNXT_NO_FW_ACCESS(bp) && req_type != HWRM_FUNC_RESET)
+ goto exit;
+
+ if (msg_len > BNXT_HWRM_MAX_REQ_LEN &&
+ msg_len > bp->hwrm_max_ext_req_len) {
+ rc = -E2BIG;
+ goto exit;
+ }
+
+ if (bnxt_kong_hwrm_message(bp, ctx->req)) {
+ dst = BNXT_HWRM_CHNL_KONG;
+ bar_offset = BNXT_GRCPF_REG_KONG_COMM;
+ doorbell_offset = BNXT_GRCPF_REG_KONG_COMM_TRIGGER;
+ if (le16_to_cpu(ctx->req->cmpl_ring) != INVALID_HW_RING_ID) {
+ netdev_err(bp->dev, "Ring completions not supported for KONG commands, req_type = %d\n",
+ req_type);
+ rc = -EINVAL;
+ goto exit;
+ }
+ }
+
+ token = __hwrm_acquire_token(bp, dst);
+ if (!token) {
+ rc = -ENOMEM;
+ goto exit;
+ }
+ ctx->req->seq_id = cpu_to_le16(token->seq_id);
+
+ if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
+ msg_len > BNXT_HWRM_MAX_REQ_LEN) {
+ short_input.req_type = ctx->req->req_type;
+ short_input.signature =
+ cpu_to_le16(SHORT_REQ_SIGNATURE_SHORT_CMD);
+ short_input.size = cpu_to_le16(msg_len);
+ short_input.req_addr = cpu_to_le64(ctx->dma_handle);
+
+ data = (u32 *)&short_input;
+ msg_len = sizeof(short_input);
+
+ max_req_len = BNXT_HWRM_SHORT_REQ_LEN;
+ }
+
+ /* Ensure any associated DMA buffers are written before doorbell */
+ wmb();
+
+ /* Write request msg to hwrm channel */
+ __iowrite32_copy(bp->bar0 + bar_offset, data, msg_len / 4);
+
+ for (i = msg_len; i < max_req_len; i += 4)
+ writel(0, bp->bar0 + bar_offset + i);
+
+ /* Ring channel doorbell */
+ writel(1, bp->bar0 + doorbell_offset);
+
+ if (!pci_is_enabled(bp->pdev)) {
+ rc = -ENODEV;
+ goto exit;
+ }
+
+ /* Limit timeout to an upper limit */
+ timeout = min_t(uint, ctx->timeout, HWRM_CMD_MAX_TIMEOUT);
+ /* convert timeout to usec */
+ timeout *= 1000;
+
+ i = 0;
+ /* Short timeout for the first few iterations:
+ * number of loops = number of loops for short timeout +
+ * number of loops for standard timeout.
+ */
+ tmo_count = HWRM_SHORT_TIMEOUT_COUNTER;
+ timeout = timeout - HWRM_SHORT_MIN_TIMEOUT * HWRM_SHORT_TIMEOUT_COUNTER;
+ tmo_count += DIV_ROUND_UP(timeout, HWRM_MIN_TIMEOUT);
+
+ if (le16_to_cpu(ctx->req->cmpl_ring) != INVALID_HW_RING_ID) {
+ /* Wait until hwrm response cmpl interrupt is processed */
+ while (READ_ONCE(token->state) < BNXT_HWRM_COMPLETE &&
+ i++ < tmo_count) {
+ /* Abort the wait for completion if the FW health
+ * check has failed.
+ */
+ if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
+ goto exit;
+ /* on first few passes, just barely sleep */
+ if (i < HWRM_SHORT_TIMEOUT_COUNTER) {
+ usleep_range(HWRM_SHORT_MIN_TIMEOUT,
+ HWRM_SHORT_MAX_TIMEOUT);
+ } else {
+ if (HWRM_WAIT_MUST_ABORT(bp, ctx))
+ break;
+ usleep_range(HWRM_MIN_TIMEOUT,
+ HWRM_MAX_TIMEOUT);
+ }
+ }
+
+ if (READ_ONCE(token->state) != BNXT_HWRM_COMPLETE) {
+ if (!(ctx->flags & BNXT_HWRM_CTX_SILENT))
+ netdev_err(bp->dev, "Resp cmpl intr err msg: 0x%x\n",
+ le16_to_cpu(ctx->req->req_type));
+ goto exit;
+ }
+ len = le16_to_cpu(READ_ONCE(ctx->resp->resp_len));
+ valid = ((u8 *)ctx->resp) + len - 1;
+ } else {
+ __le16 seen_out_of_seq = ctx->req->seq_id; /* will never see */
+ int j;
+
+ /* Check if response len is updated */
+ for (i = 0; i < tmo_count; i++) {
+ /* Abort the wait for completion if the FW health
+ * check has failed.
+ */
+ if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
+ goto exit;
+
+ if (token &&
+ READ_ONCE(token->state) == BNXT_HWRM_DEFERRED) {
+ __hwrm_release_token(bp, token);
+ token = NULL;
+ }
+
+ len = le16_to_cpu(READ_ONCE(ctx->resp->resp_len));
+ if (len) {
+ __le16 resp_seq = READ_ONCE(ctx->resp->seq_id);
+
+ if (resp_seq == ctx->req->seq_id)
+ break;
+ if (resp_seq != seen_out_of_seq) {
+ netdev_warn(bp->dev, "Discarding out of seq response: 0x%x for msg {0x%x 0x%x}\n",
+ le16_to_cpu(resp_seq),
+ le16_to_cpu(ctx->req->req_type),
+ le16_to_cpu(ctx->req->seq_id));
+ seen_out_of_seq = resp_seq;
+ }
+ }
+
+ /* on first few passes, just barely sleep */
+ if (i < HWRM_SHORT_TIMEOUT_COUNTER) {
+ usleep_range(HWRM_SHORT_MIN_TIMEOUT,
+ HWRM_SHORT_MAX_TIMEOUT);
+ } else {
+ if (HWRM_WAIT_MUST_ABORT(bp, ctx))
+ goto timeout_abort;
+ usleep_range(HWRM_MIN_TIMEOUT,
+ HWRM_MAX_TIMEOUT);
+ }
+ }
+
+ if (i >= tmo_count) {
+timeout_abort:
+ if (!(ctx->flags & BNXT_HWRM_CTX_SILENT))
+ netdev_err(bp->dev, "Error (timeout: %u) msg {0x%x 0x%x} len:%d\n",
+ hwrm_total_timeout(i),
+ le16_to_cpu(ctx->req->req_type),
+ le16_to_cpu(ctx->req->seq_id), len);
+ goto exit;
+ }
+
+ /* Last byte of resp contains valid bit */
+ valid = ((u8 *)ctx->resp) + len - 1;
+ for (j = 0; j < HWRM_VALID_BIT_DELAY_USEC; j++) {
+ /* make sure we read from updated DMA memory */
+ dma_rmb();
+ if (*valid)
+ break;
+ usleep_range(1, 5);
+ }
+
+ if (j >= HWRM_VALID_BIT_DELAY_USEC) {
+ if (!(ctx->flags & BNXT_HWRM_CTX_SILENT))
+ netdev_err(bp->dev, "Error (timeout: %u) msg {0x%x 0x%x} len:%d v:%d\n",
+ hwrm_total_timeout(i),
+ le16_to_cpu(ctx->req->req_type),
+ le16_to_cpu(ctx->req->seq_id), len,
+ *valid);
+ goto exit;
+ }
+ }
+
+ /* Zero valid bit for compatibility. Valid bit in an older spec
+ * may become a new field in a newer spec. We must make sure that
+ * a new field not implemented by old spec will read zero.
+ */
+ *valid = 0;
+ rc = le16_to_cpu(ctx->resp->error_code);
+ if (rc && !(ctx->flags & BNXT_HWRM_CTX_SILENT)) {
+ netdev_err(bp->dev, "hwrm req_type 0x%x seq id 0x%x error 0x%x\n",
+ le16_to_cpu(ctx->resp->req_type),
+ le16_to_cpu(ctx->resp->seq_id), rc);
+ }
+ rc = __hwrm_to_stderr(rc);
+exit:
+ if (token)
+ __hwrm_release_token(bp, token);
+ if (ctx->flags & BNXT_HWRM_INTERNAL_CTX_OWNED)
+ ctx->flags |= BNXT_HWRM_INTERNAL_RESP_DIRTY;
+ else
+ __hwrm_ctx_drop(bp, ctx);
+ return rc;
+}
+
+/**
+ * hwrm_req_send() - Execute an HWRM command.
+ * @bp: The driver context.
+ * @req: A pointer to the request to send. The DMA resources associated with
+ * the request will be released (ie. the request will be consumed) unless
+ * ownership of the request has been assumed by the caller via a call to
+ * hwrm_req_hold().
+ *
+ * Send an HWRM request to the device and wait for a response. The request is
+ * consumed if it is not owned by the caller. This function will block until
+ * the request has either completed or times out due to an error.
+ *
+ * Return: A result code.
+ *
+ * The result is zero on success, otherwise the negative error code indicates
+ * one of the following errors:
+ * E2BIG: The request was too large.
+ * EBUSY: The firmware is in a fatal state or the request timed out
+ * EACCESS: HWRM access denied.
+ * ENOSPC: HWRM resource allocation error.
+ * EINVAL: Request parameters are invalid.
+ * ENOMEM: HWRM has no buffers.
+ * EAGAIN: HWRM busy or reset in progress.
+ * EOPNOTSUPP: Invalid request type.
+ * EIO: Any other error.
+ * Error handling is orthogonal to request ownership. An unowned request will
+ * still be consumed on error. If the caller owns the request, then the caller
+ * is responsible for releasing the resources. Otherwise, hwrm_req_send() will
+ * always consume the request.
+ */
+int hwrm_req_send(struct bnxt *bp, void *req)
+{
+ struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+
+ if (!ctx)
+ return -EINVAL;
+
+ return __hwrm_send(bp, ctx);
+}
+
+/**
+ * hwrm_req_send_silent() - A silent version of hwrm_req_send().
+ * @bp: The driver context.
+ * @req: The request to send without logging.
+ *
+ * The same as hwrm_req_send(), except that the request is silenced using
+ * hwrm_req_silence() prior the call. This version of the function is
+ * provided solely to preserve the legacy API’s flavor for this functionality.
+ *
+ * Return: A result code, see hwrm_req_send().
+ */
+int hwrm_req_send_silent(struct bnxt *bp, void *req)
+{
+ hwrm_req_flags(bp, req, BNXT_HWRM_CTX_SILENT);
+ return hwrm_req_send(bp, req);
+}
+
+/**
+ * hwrm_req_dma_slice() - Allocate a slice of DMA mapped memory.
+ * @bp: The driver context.
+ * @req: The request for which indirect data will be associated.
+ * @size: The size of the allocation.
+ * @dma: The bus address associated with the allocation. The HWRM API has no
+ * knowledge about the type of the request and so cannot infer how the
+ * caller intends to use the indirect data. Thus, the caller is
+ * responsible for configuring the request object appropriately to
+ * point to the associated indirect memory. Note, DMA handle has the
+ * same definition as it does in dma_alloc_coherent(), the caller is
+ * responsible for endian conversions via cpu_to_le64() before assigning
+ * this address.
+ *
+ * Allocates DMA mapped memory for indirect data related to a request. The
+ * lifetime of the DMA resources will be bound to that of the request (ie.
+ * they will be automatically released when the request is either consumed by
+ * hwrm_req_send() or dropped by hwrm_req_drop()). Small allocations are
+ * efficiently suballocated out of the request buffer space, hence the name
+ * slice, while larger requests are satisfied via an underlying call to
+ * dma_alloc_coherent(). Multiple suballocations are supported, however, only
+ * one externally mapped region is.
+ *
+ * Return: The kernel virtual address of the DMA mapping.
+ */
+void *
+hwrm_req_dma_slice(struct bnxt *bp, void *req, u32 size, dma_addr_t *dma_handle)
+{
+ struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+ u8 *end = ((u8 *)req) + BNXT_HWRM_DMA_SIZE;
+ struct input *input = req;
+ u8 *addr, *req_addr = req;
+ u32 max_offset, offset;
+
+ if (!ctx)
+ return NULL;
+
+ max_offset = BNXT_HWRM_DMA_SIZE - ctx->allocated;
+ offset = max_offset - size;
+ offset = ALIGN_DOWN(offset, BNXT_HWRM_DMA_ALIGN);
+ addr = req_addr + offset;
+
+ if (addr < req_addr + max_offset && req_addr + ctx->req_len <= addr) {
+ ctx->allocated = end - addr;
+ *dma_handle = ctx->dma_handle + offset;
+ return addr;
+ }
+
+ /* could not suballocate from ctx buffer, try create a new mapping */
+ if (ctx->slice_addr) {
+ /* if one exists, can only be due to software bug, be loud */
+ netdev_err(bp->dev, "HWRM refusing to reallocate DMA slice, req_type = %u\n",
+ (u32)le16_to_cpu(input->req_type));
+ dump_stack();
+ return NULL;
+ }
+
+ addr = dma_alloc_coherent(&bp->pdev->dev, size, dma_handle, ctx->gfp);
+
+ if (!addr)
+ return NULL;
+
+ ctx->slice_addr = addr;
+ ctx->slice_size = size;
+ ctx->slice_handle = *dma_handle;
+
+ return addr;
+}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h
new file mode 100644
index 000000000000..4d17f0d5363b
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h
@@ -0,0 +1,145 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2020 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef BNXT_HWRM_H
+#define BNXT_HWRM_H
+
+#include "bnxt_hsi.h"
+
+enum bnxt_hwrm_ctx_flags {
+ /* Update the HWRM_API_FLAGS right below for any new non-internal bit added here */
+ BNXT_HWRM_INTERNAL_CTX_OWNED = BIT(0), /* caller owns the context */
+ BNXT_HWRM_INTERNAL_RESP_DIRTY = BIT(1), /* response contains data */
+ BNXT_HWRM_CTX_SILENT = BIT(2), /* squelch firmware errors */
+ BNXT_HWRM_FULL_WAIT = BIT(3), /* wait for full timeout of HWRM command */
+};
+
+#define HWRM_API_FLAGS (BNXT_HWRM_CTX_SILENT | BNXT_HWRM_FULL_WAIT)
+
+struct bnxt_hwrm_ctx {
+ u64 sentinel;
+ dma_addr_t dma_handle;
+ struct output *resp;
+ struct input *req;
+ dma_addr_t slice_handle;
+ void *slice_addr;
+ u32 slice_size;
+ u32 req_len;
+ enum bnxt_hwrm_ctx_flags flags;
+ unsigned int timeout;
+ u32 allocated;
+ gfp_t gfp;
+};
+
+enum bnxt_hwrm_wait_state {
+ BNXT_HWRM_PENDING,
+ BNXT_HWRM_DEFERRED,
+ BNXT_HWRM_COMPLETE,
+ BNXT_HWRM_CANCELLED,
+};
+
+enum bnxt_hwrm_chnl { BNXT_HWRM_CHNL_CHIMP, BNXT_HWRM_CHNL_KONG };
+
+struct bnxt_hwrm_wait_token {
+ struct rcu_head rcu;
+ struct hlist_node node;
+ enum bnxt_hwrm_wait_state state;
+ enum bnxt_hwrm_chnl dst;
+ u16 seq_id;
+};
+
+void hwrm_update_token(struct bnxt *bp, u16 seq, enum bnxt_hwrm_wait_state s);
+
+#define BNXT_HWRM_MAX_REQ_LEN (bp->hwrm_max_req_len)
+#define BNXT_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input)
+#define HWRM_CMD_MAX_TIMEOUT 40000
+#define SHORT_HWRM_CMD_TIMEOUT 20
+#define HWRM_CMD_TIMEOUT (bp->hwrm_cmd_timeout)
+#define HWRM_RESET_TIMEOUT ((HWRM_CMD_TIMEOUT) * 4)
+#define HWRM_COREDUMP_TIMEOUT ((HWRM_CMD_TIMEOUT) * 12)
+#define BNXT_HWRM_TARGET 0xffff
+#define BNXT_HWRM_NO_CMPL_RING -1
+#define BNXT_HWRM_REQ_MAX_SIZE 128
+#define BNXT_HWRM_DMA_SIZE (2 * PAGE_SIZE) /* space for req+resp */
+#define BNXT_HWRM_RESP_RESERVED PAGE_SIZE
+#define BNXT_HWRM_RESP_OFFSET (BNXT_HWRM_DMA_SIZE - \
+ BNXT_HWRM_RESP_RESERVED)
+#define BNXT_HWRM_CTX_OFFSET (BNXT_HWRM_RESP_OFFSET - \
+ sizeof(struct bnxt_hwrm_ctx))
+#define BNXT_HWRM_DMA_ALIGN 16
+#define BNXT_HWRM_SENTINEL 0xb6e1f68a12e9a7eb /* arbitrary value */
+#define BNXT_HWRM_REQS_PER_PAGE (BNXT_PAGE_SIZE / \
+ BNXT_HWRM_REQ_MAX_SIZE)
+#define HWRM_SHORT_MIN_TIMEOUT 3
+#define HWRM_SHORT_MAX_TIMEOUT 10
+#define HWRM_SHORT_TIMEOUT_COUNTER 5
+
+#define HWRM_MIN_TIMEOUT 25
+#define HWRM_MAX_TIMEOUT 40
+
+#define HWRM_WAIT_MUST_ABORT(bp, ctx) \
+ (le16_to_cpu((ctx)->req->req_type) != HWRM_VER_GET && \
+ !bnxt_is_fw_healthy(bp))
+
+static inline unsigned int hwrm_total_timeout(unsigned int n)
+{
+ return n <= HWRM_SHORT_TIMEOUT_COUNTER ? n * HWRM_SHORT_MIN_TIMEOUT :
+ HWRM_SHORT_TIMEOUT_COUNTER * HWRM_SHORT_MIN_TIMEOUT +
+ (n - HWRM_SHORT_TIMEOUT_COUNTER) * HWRM_MIN_TIMEOUT;
+}
+
+
+#define HWRM_VALID_BIT_DELAY_USEC 150
+
+static inline bool bnxt_cfa_hwrm_message(u16 req_type)
+{
+ switch (req_type) {
+ case HWRM_CFA_ENCAP_RECORD_ALLOC:
+ case HWRM_CFA_ENCAP_RECORD_FREE:
+ case HWRM_CFA_DECAP_FILTER_ALLOC:
+ case HWRM_CFA_DECAP_FILTER_FREE:
+ case HWRM_CFA_EM_FLOW_ALLOC:
+ case HWRM_CFA_EM_FLOW_FREE:
+ case HWRM_CFA_EM_FLOW_CFG:
+ case HWRM_CFA_FLOW_ALLOC:
+ case HWRM_CFA_FLOW_FREE:
+ case HWRM_CFA_FLOW_INFO:
+ case HWRM_CFA_FLOW_FLUSH:
+ case HWRM_CFA_FLOW_STATS:
+ case HWRM_CFA_METER_PROFILE_ALLOC:
+ case HWRM_CFA_METER_PROFILE_FREE:
+ case HWRM_CFA_METER_PROFILE_CFG:
+ case HWRM_CFA_METER_INSTANCE_ALLOC:
+ case HWRM_CFA_METER_INSTANCE_FREE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool bnxt_kong_hwrm_message(struct bnxt *bp, struct input *req)
+{
+ return (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL &&
+ (bnxt_cfa_hwrm_message(le16_to_cpu(req->req_type)) ||
+ le16_to_cpu(req->target_id) == HWRM_TARGET_ID_KONG));
+}
+
+int __hwrm_req_init(struct bnxt *bp, void **req, u16 req_type, u32 req_len);
+#define hwrm_req_init(bp, req, req_type) \
+ __hwrm_req_init((bp), (void **)&(req), (req_type), sizeof(*(req)))
+void *hwrm_req_hold(struct bnxt *bp, void *req);
+void hwrm_req_drop(struct bnxt *bp, void *req);
+void hwrm_req_flags(struct bnxt *bp, void *req, enum bnxt_hwrm_ctx_flags flags);
+void hwrm_req_timeout(struct bnxt *bp, void *req, unsigned int timeout);
+int hwrm_req_send(struct bnxt *bp, void *req);
+int hwrm_req_send_silent(struct bnxt *bp, void *req);
+int hwrm_req_replace(struct bnxt *bp, void *req, void *new_req, u32 len);
+void hwrm_req_alloc_flags(struct bnxt *bp, void *req, gfp_t flags);
+void *hwrm_req_dma_slice(struct bnxt *bp, void *req, u32 size, dma_addr_t *dma);
+#endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index ec381c2423b8..f0aa480799ca 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -18,9 +18,10 @@
#include <linux/ptp_classify.h>
#include "bnxt_hsi.h"
#include "bnxt.h"
+#include "bnxt_hwrm.h"
#include "bnxt_ptp.h"
-int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id)
+int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off)
{
unsigned int ptp_class;
struct ptp_header *hdr;
@@ -34,6 +35,7 @@ int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id)
if (!hdr)
return -EINVAL;
+ *hdr_off = (u8 *)hdr - skb->data;
*seq_id = ntohs(hdr->sequence_id);
return 0;
default:
@@ -55,16 +57,19 @@ static int bnxt_ptp_settime(struct ptp_clock_info *ptp_info,
}
/* Caller holds ptp_lock */
-static u64 bnxt_refclk_read(struct bnxt *bp, struct ptp_system_timestamp *sts)
+static int bnxt_refclk_read(struct bnxt *bp, struct ptp_system_timestamp *sts,
+ u64 *ns)
{
struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
- u64 ns;
+
+ if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+ return -EIO;
ptp_read_system_prets(sts);
- ns = readl(bp->bar0 + ptp->refclk_mapped_regs[0]);
+ *ns = readl(bp->bar0 + ptp->refclk_mapped_regs[0]);
ptp_read_system_postts(sts);
- ns |= (u64)readl(bp->bar0 + ptp->refclk_mapped_regs[1]) << 32;
- return ns;
+ *ns |= (u64)readl(bp->bar0 + ptp->refclk_mapped_regs[1]) << 32;
+ return 0;
}
static void bnxt_ptp_get_current_time(struct bnxt *bp)
@@ -75,29 +80,34 @@ static void bnxt_ptp_get_current_time(struct bnxt *bp)
return;
spin_lock_bh(&ptp->ptp_lock);
WRITE_ONCE(ptp->old_time, ptp->current_time);
- ptp->current_time = bnxt_refclk_read(bp, NULL);
+ bnxt_refclk_read(bp, NULL, &ptp->current_time);
spin_unlock_bh(&ptp->ptp_lock);
}
static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts)
{
- struct hwrm_port_ts_query_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_port_ts_query_input req = {0};
+ struct hwrm_port_ts_query_output *resp;
+ struct hwrm_port_ts_query_input *req;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_TS_QUERY, -1, -1);
- req.flags = cpu_to_le32(flags);
+ rc = hwrm_req_init(bp, req, HWRM_PORT_TS_QUERY);
+ if (rc)
+ return rc;
+
+ req->flags = cpu_to_le32(flags);
if ((flags & PORT_TS_QUERY_REQ_FLAGS_PATH) ==
PORT_TS_QUERY_REQ_FLAGS_PATH_TX) {
- req.enables = cpu_to_le16(BNXT_PTP_QTS_TX_ENABLES);
- req.ptp_seq_id = cpu_to_le32(bp->ptp_cfg->tx_seqid);
- req.ts_req_timeout = cpu_to_le16(BNXT_PTP_QTS_TIMEOUT);
+ req->enables = cpu_to_le16(BNXT_PTP_QTS_TX_ENABLES);
+ req->ptp_seq_id = cpu_to_le32(bp->ptp_cfg->tx_seqid);
+ req->ptp_hdr_offset = cpu_to_le16(bp->ptp_cfg->tx_hdr_off);
+ req->ts_req_timeout = cpu_to_le16(BNXT_PTP_QTS_TIMEOUT);
}
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ resp = hwrm_req_hold(bp, req);
+
+ rc = hwrm_req_send(bp, req);
if (!rc)
*ts = le64_to_cpu(resp->ptp_msg_ts);
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -108,9 +118,14 @@ static int bnxt_ptp_gettimex(struct ptp_clock_info *ptp_info,
struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
ptp_info);
u64 ns, cycles;
+ int rc;
spin_lock_bh(&ptp->ptp_lock);
- cycles = bnxt_refclk_read(ptp->bp, sts);
+ rc = bnxt_refclk_read(ptp->bp, sts, &cycles);
+ if (rc) {
+ spin_unlock_bh(&ptp->ptp_lock);
+ return rc;
+ }
ns = timecounter_cyc2time(&ptp->tc, cycles);
spin_unlock_bh(&ptp->ptp_lock);
*ts = ns_to_timespec64(ns);
@@ -133,33 +148,246 @@ static int bnxt_ptp_adjfreq(struct ptp_clock_info *ptp_info, s32 ppb)
{
struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
ptp_info);
- struct hwrm_port_mac_cfg_input req = {0};
+ struct hwrm_port_mac_cfg_input *req;
struct bnxt *bp = ptp->bp;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_MAC_CFG, -1, -1);
- req.ptp_freq_adj_ppb = cpu_to_le32(ppb);
- req.enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_PTP_FREQ_ADJ_PPB);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_PORT_MAC_CFG);
+ if (rc)
+ return rc;
+
+ req->ptp_freq_adj_ppb = cpu_to_le32(ppb);
+ req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_PTP_FREQ_ADJ_PPB);
+ rc = hwrm_req_send(ptp->bp, req);
if (rc)
netdev_err(ptp->bp->dev,
"ptp adjfreq failed. rc = %d\n", rc);
return rc;
}
-static int bnxt_ptp_enable(struct ptp_clock_info *ptp,
+void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2)
+{
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+ struct ptp_clock_event event;
+ u64 ns, pps_ts;
+
+ pps_ts = EVENT_PPS_TS(data2, data1);
+ spin_lock_bh(&ptp->ptp_lock);
+ ns = timecounter_cyc2time(&ptp->tc, pps_ts);
+ spin_unlock_bh(&ptp->ptp_lock);
+
+ switch (EVENT_DATA2_PPS_EVENT_TYPE(data2)) {
+ case ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE_INTERNAL:
+ event.pps_times.ts_real = ns_to_timespec64(ns);
+ event.type = PTP_CLOCK_PPSUSR;
+ event.index = EVENT_DATA2_PPS_PIN_NUM(data2);
+ break;
+ case ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE_EXTERNAL:
+ event.timestamp = ns;
+ event.type = PTP_CLOCK_EXTTS;
+ event.index = EVENT_DATA2_PPS_PIN_NUM(data2);
+ break;
+ }
+
+ ptp_clock_event(bp->ptp_cfg->ptp_clock, &event);
+}
+
+static int bnxt_ptp_cfg_pin(struct bnxt *bp, u8 pin, u8 usage)
+{
+ struct hwrm_func_ptp_pin_cfg_input *req;
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+ u8 state = usage != BNXT_PPS_PIN_NONE;
+ u8 *pin_state, *pin_usg;
+ u32 enables;
+ int rc;
+
+ if (!TSIO_PIN_VALID(pin)) {
+ netdev_err(ptp->bp->dev, "1PPS: Invalid pin. Check pin-function configuration\n");
+ return -EOPNOTSUPP;
+ }
+
+ rc = hwrm_req_init(ptp->bp, req, HWRM_FUNC_PTP_PIN_CFG);
+ if (rc)
+ return rc;
+
+ enables = (FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN0_STATE |
+ FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN0_USAGE) << (pin * 2);
+ req->enables = cpu_to_le32(enables);
+
+ pin_state = &req->pin0_state;
+ pin_usg = &req->pin0_usage;
+
+ *(pin_state + (pin * 2)) = state;
+ *(pin_usg + (pin * 2)) = usage;
+
+ rc = hwrm_req_send(ptp->bp, req);
+ if (rc)
+ return rc;
+
+ ptp->pps_info.pins[pin].usage = usage;
+ ptp->pps_info.pins[pin].state = state;
+
+ return 0;
+}
+
+static int bnxt_ptp_cfg_event(struct bnxt *bp, u8 event)
+{
+ struct hwrm_func_ptp_cfg_input *req;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_CFG);
+ if (rc)
+ return rc;
+
+ req->enables = cpu_to_le16(FUNC_PTP_CFG_REQ_ENABLES_PTP_PPS_EVENT);
+ req->ptp_pps_event = event;
+ return hwrm_req_send(bp, req);
+}
+
+void bnxt_ptp_reapply_pps(struct bnxt *bp)
+{
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+ struct bnxt_pps *pps;
+ u32 pin = 0;
+ int rc;
+
+ if (!ptp || !(bp->fw_cap & BNXT_FW_CAP_PTP_PPS) ||
+ !(ptp->ptp_info.pin_config))
+ return;
+ pps = &ptp->pps_info;
+ for (pin = 0; pin < BNXT_MAX_TSIO_PINS; pin++) {
+ if (pps->pins[pin].state) {
+ rc = bnxt_ptp_cfg_pin(bp, pin, pps->pins[pin].usage);
+ if (!rc && pps->pins[pin].event)
+ rc = bnxt_ptp_cfg_event(bp,
+ pps->pins[pin].event);
+ if (rc)
+ netdev_err(bp->dev, "1PPS: Failed to configure pin%d\n",
+ pin);
+ }
+ }
+}
+
+static int bnxt_get_target_cycles(struct bnxt_ptp_cfg *ptp, u64 target_ns,
+ u64 *cycles_delta)
+{
+ u64 cycles_now;
+ u64 nsec_now, nsec_delta;
+ int rc;
+
+ spin_lock_bh(&ptp->ptp_lock);
+ rc = bnxt_refclk_read(ptp->bp, NULL, &cycles_now);
+ if (rc) {
+ spin_unlock_bh(&ptp->ptp_lock);
+ return rc;
+ }
+ nsec_now = timecounter_cyc2time(&ptp->tc, cycles_now);
+ spin_unlock_bh(&ptp->ptp_lock);
+
+ nsec_delta = target_ns - nsec_now;
+ *cycles_delta = div64_u64(nsec_delta << ptp->cc.shift, ptp->cc.mult);
+ return 0;
+}
+
+static int bnxt_ptp_perout_cfg(struct bnxt_ptp_cfg *ptp,
+ struct ptp_clock_request *rq)
+{
+ struct hwrm_func_ptp_cfg_input *req;
+ struct bnxt *bp = ptp->bp;
+ struct timespec64 ts;
+ u64 target_ns, delta;
+ u16 enables;
+ int rc;
+
+ ts.tv_sec = rq->perout.start.sec;
+ ts.tv_nsec = rq->perout.start.nsec;
+ target_ns = timespec64_to_ns(&ts);
+
+ rc = bnxt_get_target_cycles(ptp, target_ns, &delta);
+ if (rc)
+ return rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_CFG);
+ if (rc)
+ return rc;
+
+ enables = FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PERIOD |
+ FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_UP |
+ FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PHASE;
+ req->enables = cpu_to_le16(enables);
+ req->ptp_pps_event = 0;
+ req->ptp_freq_adj_dll_source = 0;
+ req->ptp_freq_adj_dll_phase = 0;
+ req->ptp_freq_adj_ext_period = cpu_to_le32(NSEC_PER_SEC);
+ req->ptp_freq_adj_ext_up = 0;
+ req->ptp_freq_adj_ext_phase_lower = cpu_to_le32(delta);
+
+ return hwrm_req_send(bp, req);
+}
+
+static int bnxt_ptp_enable(struct ptp_clock_info *ptp_info,
struct ptp_clock_request *rq, int on)
{
- return -EOPNOTSUPP;
+ struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
+ ptp_info);
+ struct bnxt *bp = ptp->bp;
+ u8 pin_id;
+ int rc;
+
+ switch (rq->type) {
+ case PTP_CLK_REQ_EXTTS:
+ /* Configure an External PPS IN */
+ pin_id = ptp_find_pin(ptp->ptp_clock, PTP_PF_EXTTS,
+ rq->extts.index);
+ if (!on)
+ break;
+ rc = bnxt_ptp_cfg_pin(bp, pin_id, BNXT_PPS_PIN_PPS_IN);
+ if (rc)
+ return rc;
+ rc = bnxt_ptp_cfg_event(bp, BNXT_PPS_EVENT_EXTERNAL);
+ if (!rc)
+ ptp->pps_info.pins[pin_id].event = BNXT_PPS_EVENT_EXTERNAL;
+ return rc;
+ case PTP_CLK_REQ_PEROUT:
+ /* Configure a Periodic PPS OUT */
+ pin_id = ptp_find_pin(ptp->ptp_clock, PTP_PF_PEROUT,
+ rq->perout.index);
+ if (!on)
+ break;
+
+ rc = bnxt_ptp_cfg_pin(bp, pin_id, BNXT_PPS_PIN_PPS_OUT);
+ if (!rc)
+ rc = bnxt_ptp_perout_cfg(ptp, rq);
+
+ return rc;
+ case PTP_CLK_REQ_PPS:
+ /* Configure PHC PPS IN */
+ rc = bnxt_ptp_cfg_pin(bp, 0, BNXT_PPS_PIN_PPS_IN);
+ if (rc)
+ return rc;
+ rc = bnxt_ptp_cfg_event(bp, BNXT_PPS_EVENT_INTERNAL);
+ if (!rc)
+ ptp->pps_info.pins[0].event = BNXT_PPS_EVENT_INTERNAL;
+ return rc;
+ default:
+ netdev_err(ptp->bp->dev, "Unrecognized PIN function\n");
+ return -EOPNOTSUPP;
+ }
+
+ return bnxt_ptp_cfg_pin(bp, pin_id, BNXT_PPS_PIN_NONE);
}
static int bnxt_hwrm_ptp_cfg(struct bnxt *bp)
{
- struct hwrm_port_mac_cfg_input req = {0};
struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+ struct hwrm_port_mac_cfg_input *req;
u32 flags = 0;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_PORT_MAC_CFG);
+ if (rc)
+ return rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_MAC_CFG, -1, -1);
if (ptp->rx_filter)
flags |= PORT_MAC_CFG_REQ_FLAGS_PTP_RX_TS_CAPTURE_ENABLE;
else
@@ -168,11 +396,11 @@ static int bnxt_hwrm_ptp_cfg(struct bnxt *bp)
flags |= PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_ENABLE;
else
flags |= PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_DISABLE;
- req.flags = cpu_to_le32(flags);
- req.enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_RX_TS_CAPTURE_PTP_MSG_TYPE);
- req.rx_ts_capture_ptp_msg_type = cpu_to_le16(ptp->rxctl);
+ req->flags = cpu_to_le32(flags);
+ req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_RX_TS_CAPTURE_PTP_MSG_TYPE);
+ req->rx_ts_capture_ptp_msg_type = cpu_to_le16(ptp->rxctl);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ return hwrm_req_send(bp, req);
}
int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
@@ -309,8 +537,10 @@ static void bnxt_unmap_ptp_regs(struct bnxt *bp)
static u64 bnxt_cc_read(const struct cyclecounter *cc)
{
struct bnxt_ptp_cfg *ptp = container_of(cc, struct bnxt_ptp_cfg, cc);
+ u64 ns = 0;
- return bnxt_refclk_read(ptp->bp, NULL);
+ bnxt_refclk_read(ptp->bp, NULL, &ns);
+ return ns;
}
static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb)
@@ -408,6 +638,87 @@ static const struct ptp_clock_info bnxt_ptp_caps = {
.enable = bnxt_ptp_enable,
};
+static int bnxt_ptp_verify(struct ptp_clock_info *ptp_info, unsigned int pin,
+ enum ptp_pin_function func, unsigned int chan)
+{
+ struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
+ ptp_info);
+ /* Allow only PPS pin function configuration */
+ if (ptp->pps_info.pins[pin].usage <= BNXT_PPS_PIN_PPS_OUT &&
+ func != PTP_PF_PHYSYNC)
+ return 0;
+ else
+ return -EOPNOTSUPP;
+}
+
+static int bnxt_ptp_pps_init(struct bnxt *bp)
+{
+ struct hwrm_func_ptp_pin_qcfg_output *resp;
+ struct hwrm_func_ptp_pin_qcfg_input *req;
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+ struct ptp_clock_info *ptp_info;
+ struct bnxt_pps *pps_info;
+ u8 *pin_usg;
+ u32 i, rc;
+
+ /* Query current/default PIN CFG */
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_PIN_QCFG);
+ if (rc)
+ return rc;
+
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
+ if (rc || !resp->num_pins) {
+ hwrm_req_drop(bp, req);
+ return -EOPNOTSUPP;
+ }
+
+ ptp_info = &ptp->ptp_info;
+ pps_info = &ptp->pps_info;
+ pps_info->num_pins = resp->num_pins;
+ ptp_info->n_pins = pps_info->num_pins;
+ ptp_info->pin_config = kcalloc(ptp_info->n_pins,
+ sizeof(*ptp_info->pin_config),
+ GFP_KERNEL);
+ if (!ptp_info->pin_config) {
+ hwrm_req_drop(bp, req);
+ return -ENOMEM;
+ }
+
+ /* Report the TSIO capability to kernel */
+ pin_usg = &resp->pin0_usage;
+ for (i = 0; i < pps_info->num_pins; i++, pin_usg++) {
+ snprintf(ptp_info->pin_config[i].name,
+ sizeof(ptp_info->pin_config[i].name), "bnxt_pps%d", i);
+ ptp_info->pin_config[i].index = i;
+ ptp_info->pin_config[i].chan = i;
+ if (*pin_usg == BNXT_PPS_PIN_PPS_IN)
+ ptp_info->pin_config[i].func = PTP_PF_EXTTS;
+ else if (*pin_usg == BNXT_PPS_PIN_PPS_OUT)
+ ptp_info->pin_config[i].func = PTP_PF_PEROUT;
+ else
+ ptp_info->pin_config[i].func = PTP_PF_NONE;
+
+ pps_info->pins[i].usage = *pin_usg;
+ }
+ hwrm_req_drop(bp, req);
+
+ /* Only 1 each of ext_ts and per_out pins is available in HW */
+ ptp_info->n_ext_ts = 1;
+ ptp_info->n_per_out = 1;
+ ptp_info->pps = 1;
+ ptp_info->verify = bnxt_ptp_verify;
+
+ return 0;
+}
+
+static bool bnxt_pps_config_ok(struct bnxt *bp)
+{
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+ return !(bp->fw_cap & BNXT_FW_CAP_PTP_PPS) == !ptp->ptp_info.pin_config;
+}
+
int bnxt_ptp_init(struct bnxt *bp)
{
struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
@@ -420,6 +731,15 @@ int bnxt_ptp_init(struct bnxt *bp)
if (rc)
return rc;
+ if (ptp->ptp_clock && bnxt_pps_config_ok(bp))
+ return 0;
+
+ if (ptp->ptp_clock) {
+ ptp_clock_unregister(ptp->ptp_clock);
+ ptp->ptp_clock = NULL;
+ kfree(ptp->ptp_info.pin_config);
+ ptp->ptp_info.pin_config = NULL;
+ }
atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS);
spin_lock_init(&ptp->ptp_lock);
@@ -433,6 +753,10 @@ int bnxt_ptp_init(struct bnxt *bp)
timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
ptp->ptp_info = bnxt_ptp_caps;
+ if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) {
+ if (bnxt_ptp_pps_init(bp))
+ netdev_err(bp->dev, "1pps not initialized, continuing without 1pps support\n");
+ }
ptp->ptp_clock = ptp_clock_register(&ptp->ptp_info, &bp->pdev->dev);
if (IS_ERR(ptp->ptp_clock)) {
int err = PTR_ERR(ptp->ptp_clock);
@@ -443,7 +767,7 @@ int bnxt_ptp_init(struct bnxt *bp)
}
if (bp->flags & BNXT_FLAG_CHIP_P5) {
spin_lock_bh(&ptp->ptp_lock);
- ptp->current_time = bnxt_refclk_read(bp, NULL);
+ bnxt_refclk_read(bp, NULL, &ptp->current_time);
WRITE_ONCE(ptp->old_time, ptp->current_time);
spin_unlock_bh(&ptp->ptp_lock);
ptp_schedule_worker(ptp->ptp_clock, 0);
@@ -462,6 +786,9 @@ void bnxt_ptp_clear(struct bnxt *bp)
ptp_clock_unregister(ptp->ptp_clock);
ptp->ptp_clock = NULL;
+ kfree(ptp->ptp_info.pin_config);
+ ptp->ptp_info.pin_config = NULL;
+
if (ptp->tx_skb) {
dev_kfree_skb_any(ptp->tx_skb);
ptp->tx_skb = NULL;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
index 254ba7bc0f99..fa5f05708e6d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
@@ -10,8 +10,8 @@
#ifndef BNXT_PTP_H
#define BNXT_PTP_H
-#define BNXT_PTP_GRC_WIN 5
-#define BNXT_PTP_GRC_WIN_BASE 0x5000
+#define BNXT_PTP_GRC_WIN 6
+#define BNXT_PTP_GRC_WIN_BASE 0x6000
#define BNXT_MAX_PHC_DRIFT 31000000
#define BNXT_LO_TIMER_MASK 0x0000ffffffffUL
@@ -19,13 +19,65 @@
#define BNXT_PTP_QTS_TIMEOUT 1000
#define BNXT_PTP_QTS_TX_ENABLES (PORT_TS_QUERY_REQ_ENABLES_PTP_SEQ_ID | \
- PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT)
+ PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT | \
+ PORT_TS_QUERY_REQ_ENABLES_PTP_HDR_OFFSET)
+
+struct pps_pin {
+ u8 event;
+ u8 usage;
+ u8 state;
+};
+
+#define TSIO_PIN_VALID(pin) ((pin) < (BNXT_MAX_TSIO_PINS))
+
+#define EVENT_DATA2_PPS_EVENT_TYPE(data2) \
+ ((data2) & ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE)
+
+#define EVENT_DATA2_PPS_PIN_NUM(data2) \
+ (((data2) & \
+ ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PIN_NUMBER_MASK) >>\
+ ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PIN_NUMBER_SFT)
+
+#define BNXT_DATA2_UPPER_MSK \
+ ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PPS_TIMESTAMP_UPPER_MASK
+
+#define BNXT_DATA2_UPPER_SFT \
+ (32 - \
+ ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PPS_TIMESTAMP_UPPER_SFT)
+
+#define BNXT_DATA1_LOWER_MSK \
+ ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA1_PPS_TIMESTAMP_LOWER_MASK
+
+#define BNXT_DATA1_LOWER_SFT \
+ ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA1_PPS_TIMESTAMP_LOWER_SFT
+
+#define EVENT_PPS_TS(data2, data1) \
+ (((u64)((data2) & BNXT_DATA2_UPPER_MSK) << BNXT_DATA2_UPPER_SFT) |\
+ (((data1) & BNXT_DATA1_LOWER_MSK) >> BNXT_DATA1_LOWER_SFT))
+
+#define BNXT_PPS_PIN_DISABLE 0
+#define BNXT_PPS_PIN_ENABLE 1
+#define BNXT_PPS_PIN_NONE 0
+#define BNXT_PPS_PIN_PPS_IN 1
+#define BNXT_PPS_PIN_PPS_OUT 2
+#define BNXT_PPS_PIN_SYNC_IN 3
+#define BNXT_PPS_PIN_SYNC_OUT 4
+
+#define BNXT_PPS_EVENT_INTERNAL 1
+#define BNXT_PPS_EVENT_EXTERNAL 2
+
+struct bnxt_pps {
+ u8 num_pins;
+#define BNXT_MAX_TSIO_PINS 4
+ struct pps_pin pins[BNXT_MAX_TSIO_PINS];
+};
struct bnxt_ptp_cfg {
struct ptp_clock_info ptp_info;
struct ptp_clock *ptp_clock;
struct cyclecounter cc;
struct timecounter tc;
+ struct bnxt_pps pps_info;
/* serialize timecounter access */
spinlock_t ptp_lock;
struct sk_buff *tx_skb;
@@ -37,6 +89,7 @@ struct bnxt_ptp_cfg {
#define BNXT_PHC_OVERFLOW_PERIOD (19 * 3600 * HZ)
u16 tx_seqid;
+ u16 tx_hdr_off;
struct bnxt *bp;
atomic_t tx_avail;
#define BNXT_MAX_TX_TS 1
@@ -74,7 +127,9 @@ do { \
((dst) = READ_ONCE(src))
#endif
-int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id);
+int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off);
+void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2);
+void bnxt_ptp_reapply_pps(struct bnxt *bp);
int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr);
int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr);
int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 7fa881e1cd80..70d8ca3039dc 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -17,6 +17,7 @@
#include <linux/etherdevice.h>
#include "bnxt_hsi.h"
#include "bnxt.h"
+#include "bnxt_hwrm.h"
#include "bnxt_ulp.h"
#include "bnxt_sriov.h"
#include "bnxt_vfr.h"
@@ -26,21 +27,26 @@
static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp,
struct bnxt_vf_info *vf, u16 event_id)
{
- struct hwrm_fwd_async_event_cmpl_input req = {0};
+ struct hwrm_fwd_async_event_cmpl_input *req;
struct hwrm_async_event_cmpl *async_cmpl;
int rc = 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_ASYNC_EVENT_CMPL, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_FWD_ASYNC_EVENT_CMPL);
+ if (rc)
+ goto exit;
+
if (vf)
- req.encap_async_event_target_id = cpu_to_le16(vf->fw_fid);
+ req->encap_async_event_target_id = cpu_to_le16(vf->fw_fid);
else
/* broadcast this async event to all VFs */
- req.encap_async_event_target_id = cpu_to_le16(0xffff);
- async_cmpl = (struct hwrm_async_event_cmpl *)req.encap_async_event_cmpl;
+ req->encap_async_event_target_id = cpu_to_le16(0xffff);
+ async_cmpl =
+ (struct hwrm_async_event_cmpl *)req->encap_async_event_cmpl;
async_cmpl->type = cpu_to_le16(ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT);
async_cmpl->event_id = cpu_to_le16(event_id);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, req);
+exit:
if (rc)
netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl failed. rc:%d\n",
rc);
@@ -62,10 +68,10 @@ static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id)
int bnxt_set_vf_spoofchk(struct net_device *dev, int vf_id, bool setting)
{
- struct hwrm_func_cfg_input req = {0};
struct bnxt *bp = netdev_priv(dev);
- struct bnxt_vf_info *vf;
+ struct hwrm_func_cfg_input *req;
bool old_setting = false;
+ struct bnxt_vf_info *vf;
u32 func_flags;
int rc;
@@ -89,36 +95,38 @@ int bnxt_set_vf_spoofchk(struct net_device *dev, int vf_id, bool setting)
/*TODO: if the driver supports VLAN filter on guest VLAN,
* the spoof check should also include vlan anti-spoofing
*/
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
- req.fid = cpu_to_le16(vf->fw_fid);
- req.flags = cpu_to_le32(func_flags);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
if (!rc) {
- if (setting)
- vf->flags |= BNXT_VF_SPOOFCHK;
- else
- vf->flags &= ~BNXT_VF_SPOOFCHK;
+ req->fid = cpu_to_le16(vf->fw_fid);
+ req->flags = cpu_to_le32(func_flags);
+ rc = hwrm_req_send(bp, req);
+ if (!rc) {
+ if (setting)
+ vf->flags |= BNXT_VF_SPOOFCHK;
+ else
+ vf->flags &= ~BNXT_VF_SPOOFCHK;
+ }
}
return rc;
}
static int bnxt_hwrm_func_qcfg_flags(struct bnxt *bp, struct bnxt_vf_info *vf)
{
- struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_func_qcfg_input req = {0};
+ struct hwrm_func_qcfg_output *resp;
+ struct hwrm_func_qcfg_input *req;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
- req.fid = cpu_to_le16(BNXT_PF(bp) ? vf->fw_fid : 0xffff);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
- if (rc) {
- mutex_unlock(&bp->hwrm_cmd_lock);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_QCFG);
+ if (rc)
return rc;
- }
- vf->func_qcfg_flags = le16_to_cpu(resp->flags);
- mutex_unlock(&bp->hwrm_cmd_lock);
- return 0;
+
+ req->fid = cpu_to_le16(BNXT_PF(bp) ? vf->fw_fid : 0xffff);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
+ if (!rc)
+ vf->func_qcfg_flags = le16_to_cpu(resp->flags);
+ hwrm_req_drop(bp, req);
+ return rc;
}
bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf)
@@ -132,18 +140,22 @@ bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf)
static int bnxt_hwrm_set_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf)
{
- struct hwrm_func_cfg_input req = {0};
+ struct hwrm_func_cfg_input *req;
+ int rc;
if (!(bp->fw_cap & BNXT_FW_CAP_TRUSTED_VF))
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
- req.fid = cpu_to_le16(vf->fw_fid);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+ if (rc)
+ return rc;
+
+ req->fid = cpu_to_le16(vf->fw_fid);
if (vf->flags & BNXT_VF_TRUST)
- req.flags = cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_ENABLE);
+ req->flags = cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_ENABLE);
else
- req.flags = cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_DISABLE);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->flags = cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_DISABLE);
+ return hwrm_req_send(bp, req);
}
int bnxt_set_vf_trust(struct net_device *dev, int vf_id, bool trusted)
@@ -203,8 +215,8 @@ int bnxt_get_vf_config(struct net_device *dev, int vf_id,
int bnxt_set_vf_mac(struct net_device *dev, int vf_id, u8 *mac)
{
- struct hwrm_func_cfg_input req = {0};
struct bnxt *bp = netdev_priv(dev);
+ struct hwrm_func_cfg_input *req;
struct bnxt_vf_info *vf;
int rc;
@@ -220,19 +232,23 @@ int bnxt_set_vf_mac(struct net_device *dev, int vf_id, u8 *mac)
}
vf = &bp->pf.vf[vf_id];
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+ if (rc)
+ return rc;
+
memcpy(vf->mac_addr, mac, ETH_ALEN);
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
- req.fid = cpu_to_le16(vf->fw_fid);
- req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
- memcpy(req.dflt_mac_addr, mac, ETH_ALEN);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+
+ req->fid = cpu_to_le16(vf->fw_fid);
+ req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
+ memcpy(req->dflt_mac_addr, mac, ETH_ALEN);
+ return hwrm_req_send(bp, req);
}
int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos,
__be16 vlan_proto)
{
- struct hwrm_func_cfg_input req = {0};
struct bnxt *bp = netdev_priv(dev);
+ struct hwrm_func_cfg_input *req;
struct bnxt_vf_info *vf;
u16 vlan_tag;
int rc;
@@ -258,21 +274,23 @@ int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos,
if (vlan_tag == vf->vlan)
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
- req.fid = cpu_to_le16(vf->fw_fid);
- req.dflt_vlan = cpu_to_le16(vlan_tag);
- req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_VLAN);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
- if (!rc)
- vf->vlan = vlan_tag;
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+ if (!rc) {
+ req->fid = cpu_to_le16(vf->fw_fid);
+ req->dflt_vlan = cpu_to_le16(vlan_tag);
+ req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_VLAN);
+ rc = hwrm_req_send(bp, req);
+ if (!rc)
+ vf->vlan = vlan_tag;
+ }
return rc;
}
int bnxt_set_vf_bw(struct net_device *dev, int vf_id, int min_tx_rate,
int max_tx_rate)
{
- struct hwrm_func_cfg_input req = {0};
struct bnxt *bp = netdev_priv(dev);
+ struct hwrm_func_cfg_input *req;
struct bnxt_vf_info *vf;
u32 pf_link_speed;
int rc;
@@ -296,16 +314,18 @@ int bnxt_set_vf_bw(struct net_device *dev, int vf_id, int min_tx_rate,
}
if (min_tx_rate == vf->min_tx_rate && max_tx_rate == vf->max_tx_rate)
return 0;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
- req.fid = cpu_to_le16(vf->fw_fid);
- req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_MAX_BW);
- req.max_bw = cpu_to_le32(max_tx_rate);
- req.enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_MIN_BW);
- req.min_bw = cpu_to_le32(min_tx_rate);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
if (!rc) {
- vf->min_tx_rate = min_tx_rate;
- vf->max_tx_rate = max_tx_rate;
+ req->fid = cpu_to_le16(vf->fw_fid);
+ req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_MAX_BW |
+ FUNC_CFG_REQ_ENABLES_MIN_BW);
+ req->max_bw = cpu_to_le32(max_tx_rate);
+ req->min_bw = cpu_to_le32(min_tx_rate);
+ rc = hwrm_req_send(bp, req);
+ if (!rc) {
+ vf->min_tx_rate = min_tx_rate;
+ vf->max_tx_rate = max_tx_rate;
+ }
}
return rc;
}
@@ -358,21 +378,22 @@ static int bnxt_set_vf_attr(struct bnxt *bp, int num_vfs)
static int bnxt_hwrm_func_vf_resource_free(struct bnxt *bp, int num_vfs)
{
- int i, rc = 0;
+ struct hwrm_func_vf_resc_free_input *req;
struct bnxt_pf_info *pf = &bp->pf;
- struct hwrm_func_vf_resc_free_input req = {0};
+ int i, rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_RESC_FREE, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_VF_RESC_FREE);
+ if (rc)
+ return rc;
- mutex_lock(&bp->hwrm_cmd_lock);
+ hwrm_req_hold(bp, req);
for (i = pf->first_vf_id; i < pf->first_vf_id + num_vfs; i++) {
- req.vf_id = cpu_to_le16(i);
- rc = _hwrm_send_message(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
+ req->vf_id = cpu_to_le16(i);
+ rc = hwrm_req_send(bp, req);
if (rc)
break;
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -446,51 +467,55 @@ static int bnxt_alloc_vf_resources(struct bnxt *bp, int num_vfs)
static int bnxt_hwrm_func_buf_rgtr(struct bnxt *bp)
{
- struct hwrm_func_buf_rgtr_input req = {0};
+ struct hwrm_func_buf_rgtr_input *req;
+ int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_BUF_RGTR, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_BUF_RGTR);
+ if (rc)
+ return rc;
- req.req_buf_num_pages = cpu_to_le16(bp->pf.hwrm_cmd_req_pages);
- req.req_buf_page_size = cpu_to_le16(BNXT_PAGE_SHIFT);
- req.req_buf_len = cpu_to_le16(BNXT_HWRM_REQ_MAX_SIZE);
- req.req_buf_page_addr0 = cpu_to_le64(bp->pf.hwrm_cmd_req_dma_addr[0]);
- req.req_buf_page_addr1 = cpu_to_le64(bp->pf.hwrm_cmd_req_dma_addr[1]);
- req.req_buf_page_addr2 = cpu_to_le64(bp->pf.hwrm_cmd_req_dma_addr[2]);
- req.req_buf_page_addr3 = cpu_to_le64(bp->pf.hwrm_cmd_req_dma_addr[3]);
+ req->req_buf_num_pages = cpu_to_le16(bp->pf.hwrm_cmd_req_pages);
+ req->req_buf_page_size = cpu_to_le16(BNXT_PAGE_SHIFT);
+ req->req_buf_len = cpu_to_le16(BNXT_HWRM_REQ_MAX_SIZE);
+ req->req_buf_page_addr0 = cpu_to_le64(bp->pf.hwrm_cmd_req_dma_addr[0]);
+ req->req_buf_page_addr1 = cpu_to_le64(bp->pf.hwrm_cmd_req_dma_addr[1]);
+ req->req_buf_page_addr2 = cpu_to_le64(bp->pf.hwrm_cmd_req_dma_addr[2]);
+ req->req_buf_page_addr3 = cpu_to_le64(bp->pf.hwrm_cmd_req_dma_addr[3]);
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ return hwrm_req_send(bp, req);
}
-/* Caller holds bp->hwrm_cmd_lock mutex lock */
-static void __bnxt_set_vf_params(struct bnxt *bp, int vf_id)
+static int __bnxt_set_vf_params(struct bnxt *bp, int vf_id)
{
- struct hwrm_func_cfg_input req = {0};
+ struct hwrm_func_cfg_input *req;
struct bnxt_vf_info *vf;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+ if (rc)
+ return rc;
vf = &bp->pf.vf[vf_id];
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
- req.fid = cpu_to_le16(vf->fw_fid);
+ req->fid = cpu_to_le16(vf->fw_fid);
if (is_valid_ether_addr(vf->mac_addr)) {
- req.enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
- memcpy(req.dflt_mac_addr, vf->mac_addr, ETH_ALEN);
+ req->enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
+ memcpy(req->dflt_mac_addr, vf->mac_addr, ETH_ALEN);
}
if (vf->vlan) {
- req.enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_VLAN);
- req.dflt_vlan = cpu_to_le16(vf->vlan);
+ req->enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_VLAN);
+ req->dflt_vlan = cpu_to_le16(vf->vlan);
}
if (vf->max_tx_rate) {
- req.enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_MAX_BW);
- req.max_bw = cpu_to_le32(vf->max_tx_rate);
-#ifdef HAVE_IFLA_TX_RATE
- req.enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_MIN_BW);
- req.min_bw = cpu_to_le32(vf->min_tx_rate);
-#endif
+ req->enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_MAX_BW |
+ FUNC_CFG_REQ_ENABLES_MIN_BW);
+ req->max_bw = cpu_to_le32(vf->max_tx_rate);
+ req->min_bw = cpu_to_le32(vf->min_tx_rate);
}
if (vf->flags & BNXT_VF_TRUST)
- req.flags |= cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_ENABLE);
+ req->flags |= cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_ENABLE);
- _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ return hwrm_req_send(bp, req);
}
/* Only called by PF to reserve resources for VFs, returns actual number of
@@ -498,7 +523,7 @@ static void __bnxt_set_vf_params(struct bnxt *bp, int vf_id)
*/
static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
{
- struct hwrm_func_vf_resource_cfg_input req = {0};
+ struct hwrm_func_vf_resource_cfg_input *req;
struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
u16 vf_tx_rings, vf_rx_rings, vf_cp_rings;
u16 vf_stat_ctx, vf_vnics, vf_ring_grps;
@@ -507,7 +532,9 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
u16 vf_msix = 0;
u16 vf_rss;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_RESOURCE_CFG, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_VF_RESOURCE_CFG);
+ if (rc)
+ return rc;
if (bp->flags & BNXT_FLAG_CHIP_P5) {
vf_msix = hw_resc->max_nqs - bnxt_nq_rings_in_use(bp);
@@ -526,21 +553,21 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
vf_vnics = min_t(u16, vf_vnics, vf_rx_rings);
vf_rss = hw_resc->max_rsscos_ctxs - bp->rsscos_nr_ctxs;
- req.min_rsscos_ctx = cpu_to_le16(BNXT_VF_MIN_RSS_CTX);
+ req->min_rsscos_ctx = cpu_to_le16(BNXT_VF_MIN_RSS_CTX);
if (pf->vf_resv_strategy == BNXT_VF_RESV_STRATEGY_MINIMAL_STATIC) {
min = 0;
- req.min_rsscos_ctx = cpu_to_le16(min);
+ req->min_rsscos_ctx = cpu_to_le16(min);
}
if (pf->vf_resv_strategy == BNXT_VF_RESV_STRATEGY_MINIMAL ||
pf->vf_resv_strategy == BNXT_VF_RESV_STRATEGY_MINIMAL_STATIC) {
- req.min_cmpl_rings = cpu_to_le16(min);
- req.min_tx_rings = cpu_to_le16(min);
- req.min_rx_rings = cpu_to_le16(min);
- req.min_l2_ctxs = cpu_to_le16(min);
- req.min_vnics = cpu_to_le16(min);
- req.min_stat_ctx = cpu_to_le16(min);
+ req->min_cmpl_rings = cpu_to_le16(min);
+ req->min_tx_rings = cpu_to_le16(min);
+ req->min_rx_rings = cpu_to_le16(min);
+ req->min_l2_ctxs = cpu_to_le16(min);
+ req->min_vnics = cpu_to_le16(min);
+ req->min_stat_ctx = cpu_to_le16(min);
if (!(bp->flags & BNXT_FLAG_CHIP_P5))
- req.min_hw_ring_grps = cpu_to_le16(min);
+ req->min_hw_ring_grps = cpu_to_le16(min);
} else {
vf_cp_rings /= num_vfs;
vf_tx_rings /= num_vfs;
@@ -550,56 +577,57 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
vf_ring_grps /= num_vfs;
vf_rss /= num_vfs;
- req.min_cmpl_rings = cpu_to_le16(vf_cp_rings);
- req.min_tx_rings = cpu_to_le16(vf_tx_rings);
- req.min_rx_rings = cpu_to_le16(vf_rx_rings);
- req.min_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
- req.min_vnics = cpu_to_le16(vf_vnics);
- req.min_stat_ctx = cpu_to_le16(vf_stat_ctx);
- req.min_hw_ring_grps = cpu_to_le16(vf_ring_grps);
- req.min_rsscos_ctx = cpu_to_le16(vf_rss);
+ req->min_cmpl_rings = cpu_to_le16(vf_cp_rings);
+ req->min_tx_rings = cpu_to_le16(vf_tx_rings);
+ req->min_rx_rings = cpu_to_le16(vf_rx_rings);
+ req->min_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
+ req->min_vnics = cpu_to_le16(vf_vnics);
+ req->min_stat_ctx = cpu_to_le16(vf_stat_ctx);
+ req->min_hw_ring_grps = cpu_to_le16(vf_ring_grps);
+ req->min_rsscos_ctx = cpu_to_le16(vf_rss);
}
- req.max_cmpl_rings = cpu_to_le16(vf_cp_rings);
- req.max_tx_rings = cpu_to_le16(vf_tx_rings);
- req.max_rx_rings = cpu_to_le16(vf_rx_rings);
- req.max_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
- req.max_vnics = cpu_to_le16(vf_vnics);
- req.max_stat_ctx = cpu_to_le16(vf_stat_ctx);
- req.max_hw_ring_grps = cpu_to_le16(vf_ring_grps);
- req.max_rsscos_ctx = cpu_to_le16(vf_rss);
+ req->max_cmpl_rings = cpu_to_le16(vf_cp_rings);
+ req->max_tx_rings = cpu_to_le16(vf_tx_rings);
+ req->max_rx_rings = cpu_to_le16(vf_rx_rings);
+ req->max_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
+ req->max_vnics = cpu_to_le16(vf_vnics);
+ req->max_stat_ctx = cpu_to_le16(vf_stat_ctx);
+ req->max_hw_ring_grps = cpu_to_le16(vf_ring_grps);
+ req->max_rsscos_ctx = cpu_to_le16(vf_rss);
if (bp->flags & BNXT_FLAG_CHIP_P5)
- req.max_msix = cpu_to_le16(vf_msix / num_vfs);
+ req->max_msix = cpu_to_le16(vf_msix / num_vfs);
- mutex_lock(&bp->hwrm_cmd_lock);
+ hwrm_req_hold(bp, req);
for (i = 0; i < num_vfs; i++) {
if (reset)
__bnxt_set_vf_params(bp, i);
- req.vf_id = cpu_to_le16(pf->first_vf_id + i);
- rc = _hwrm_send_message(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
+ req->vf_id = cpu_to_le16(pf->first_vf_id + i);
+ rc = hwrm_req_send(bp, req);
if (rc)
break;
pf->active_vfs = i + 1;
pf->vf[i].fw_fid = pf->first_vf_id + i;
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+
if (pf->active_vfs) {
u16 n = pf->active_vfs;
- hw_resc->max_tx_rings -= le16_to_cpu(req.min_tx_rings) * n;
- hw_resc->max_rx_rings -= le16_to_cpu(req.min_rx_rings) * n;
- hw_resc->max_hw_ring_grps -= le16_to_cpu(req.min_hw_ring_grps) *
- n;
- hw_resc->max_cp_rings -= le16_to_cpu(req.min_cmpl_rings) * n;
- hw_resc->max_rsscos_ctxs -= le16_to_cpu(req.min_rsscos_ctx) * n;
- hw_resc->max_stat_ctxs -= le16_to_cpu(req.min_stat_ctx) * n;
- hw_resc->max_vnics -= le16_to_cpu(req.min_vnics) * n;
+ hw_resc->max_tx_rings -= le16_to_cpu(req->min_tx_rings) * n;
+ hw_resc->max_rx_rings -= le16_to_cpu(req->min_rx_rings) * n;
+ hw_resc->max_hw_ring_grps -=
+ le16_to_cpu(req->min_hw_ring_grps) * n;
+ hw_resc->max_cp_rings -= le16_to_cpu(req->min_cmpl_rings) * n;
+ hw_resc->max_rsscos_ctxs -=
+ le16_to_cpu(req->min_rsscos_ctx) * n;
+ hw_resc->max_stat_ctxs -= le16_to_cpu(req->min_stat_ctx) * n;
+ hw_resc->max_vnics -= le16_to_cpu(req->min_vnics) * n;
if (bp->flags & BNXT_FLAG_CHIP_P5)
hw_resc->max_irqs -= vf_msix * n;
rc = pf->active_vfs;
}
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -608,15 +636,18 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
*/
static int bnxt_hwrm_func_cfg(struct bnxt *bp, int num_vfs)
{
- u32 rc = 0, mtu, i;
u16 vf_tx_rings, vf_rx_rings, vf_cp_rings, vf_stat_ctx, vf_vnics;
struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
- struct hwrm_func_cfg_input req = {0};
struct bnxt_pf_info *pf = &bp->pf;
+ struct hwrm_func_cfg_input *req;
int total_vf_tx_rings = 0;
u16 vf_ring_grps;
+ u32 mtu, i;
+ int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+ if (rc)
+ return rc;
/* Remaining rings are distributed equally amongs VF's for now */
vf_cp_rings = bnxt_get_avail_cp_rings_for_en(bp) / num_vfs;
@@ -632,50 +663,49 @@ static int bnxt_hwrm_func_cfg(struct bnxt *bp, int num_vfs)
vf_vnics = (hw_resc->max_vnics - bp->nr_vnics) / num_vfs;
vf_vnics = min_t(u16, vf_vnics, vf_rx_rings);
- req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_ADMIN_MTU |
- FUNC_CFG_REQ_ENABLES_MRU |
- FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS |
- FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS |
- FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
- FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS |
- FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS |
- FUNC_CFG_REQ_ENABLES_NUM_L2_CTXS |
- FUNC_CFG_REQ_ENABLES_NUM_VNICS |
- FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS);
+ req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_ADMIN_MTU |
+ FUNC_CFG_REQ_ENABLES_MRU |
+ FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS |
+ FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS |
+ FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
+ FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS |
+ FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS |
+ FUNC_CFG_REQ_ENABLES_NUM_L2_CTXS |
+ FUNC_CFG_REQ_ENABLES_NUM_VNICS |
+ FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS);
mtu = bp->dev->mtu + ETH_HLEN + VLAN_HLEN;
- req.mru = cpu_to_le16(mtu);
- req.admin_mtu = cpu_to_le16(mtu);
+ req->mru = cpu_to_le16(mtu);
+ req->admin_mtu = cpu_to_le16(mtu);
- req.num_rsscos_ctxs = cpu_to_le16(1);
- req.num_cmpl_rings = cpu_to_le16(vf_cp_rings);
- req.num_tx_rings = cpu_to_le16(vf_tx_rings);
- req.num_rx_rings = cpu_to_le16(vf_rx_rings);
- req.num_hw_ring_grps = cpu_to_le16(vf_ring_grps);
- req.num_l2_ctxs = cpu_to_le16(4);
+ req->num_rsscos_ctxs = cpu_to_le16(1);
+ req->num_cmpl_rings = cpu_to_le16(vf_cp_rings);
+ req->num_tx_rings = cpu_to_le16(vf_tx_rings);
+ req->num_rx_rings = cpu_to_le16(vf_rx_rings);
+ req->num_hw_ring_grps = cpu_to_le16(vf_ring_grps);
+ req->num_l2_ctxs = cpu_to_le16(4);
- req.num_vnics = cpu_to_le16(vf_vnics);
+ req->num_vnics = cpu_to_le16(vf_vnics);
/* FIXME spec currently uses 1 bit for stats ctx */
- req.num_stat_ctxs = cpu_to_le16(vf_stat_ctx);
+ req->num_stat_ctxs = cpu_to_le16(vf_stat_ctx);
- mutex_lock(&bp->hwrm_cmd_lock);
+ hwrm_req_hold(bp, req);
for (i = 0; i < num_vfs; i++) {
int vf_tx_rsvd = vf_tx_rings;
- req.fid = cpu_to_le16(pf->first_vf_id + i);
- rc = _hwrm_send_message(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
+ req->fid = cpu_to_le16(pf->first_vf_id + i);
+ rc = hwrm_req_send(bp, req);
if (rc)
break;
pf->active_vfs = i + 1;
- pf->vf[i].fw_fid = le16_to_cpu(req.fid);
+ pf->vf[i].fw_fid = le16_to_cpu(req->fid);
rc = __bnxt_hwrm_get_tx_rings(bp, pf->vf[i].fw_fid,
&vf_tx_rsvd);
if (rc)
break;
total_vf_tx_rings += vf_tx_rsvd;
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
if (pf->active_vfs) {
hw_resc->max_tx_rings -= total_vf_tx_rings;
hw_resc->max_rx_rings -= vf_rx_rings * num_vfs;
@@ -893,23 +923,24 @@ static int bnxt_hwrm_fwd_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
void *encap_resp, __le64 encap_resp_addr,
__le16 encap_resp_cpr, u32 msg_size)
{
- int rc = 0;
- struct hwrm_fwd_resp_input req = {0};
+ struct hwrm_fwd_resp_input *req;
+ int rc;
if (BNXT_FWD_RESP_SIZE_ERR(msg_size))
return -EINVAL;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_RESP, -1, -1);
-
- /* Set the new target id */
- req.target_id = cpu_to_le16(vf->fw_fid);
- req.encap_resp_target_id = cpu_to_le16(vf->fw_fid);
- req.encap_resp_len = cpu_to_le16(msg_size);
- req.encap_resp_addr = encap_resp_addr;
- req.encap_resp_cmpl_ring = encap_resp_cpr;
- memcpy(req.encap_resp, encap_resp, msg_size);
-
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_FWD_RESP);
+ if (!rc) {
+ /* Set the new target id */
+ req->target_id = cpu_to_le16(vf->fw_fid);
+ req->encap_resp_target_id = cpu_to_le16(vf->fw_fid);
+ req->encap_resp_len = cpu_to_le16(msg_size);
+ req->encap_resp_addr = encap_resp_addr;
+ req->encap_resp_cmpl_ring = encap_resp_cpr;
+ memcpy(req->encap_resp, encap_resp, msg_size);
+
+ rc = hwrm_req_send(bp, req);
+ }
if (rc)
netdev_err(bp->dev, "hwrm_fwd_resp failed. rc:%d\n", rc);
return rc;
@@ -918,19 +949,21 @@ static int bnxt_hwrm_fwd_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
static int bnxt_hwrm_fwd_err_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
u32 msg_size)
{
- int rc = 0;
- struct hwrm_reject_fwd_resp_input req = {0};
+ struct hwrm_reject_fwd_resp_input *req;
+ int rc;
if (BNXT_REJ_FWD_RESP_SIZE_ERR(msg_size))
return -EINVAL;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_REJECT_FWD_RESP, -1, -1);
- /* Set the new target id */
- req.target_id = cpu_to_le16(vf->fw_fid);
- req.encap_resp_target_id = cpu_to_le16(vf->fw_fid);
- memcpy(req.encap_request, vf->hwrm_cmd_req_addr, msg_size);
+ rc = hwrm_req_init(bp, req, HWRM_REJECT_FWD_RESP);
+ if (!rc) {
+ /* Set the new target id */
+ req->target_id = cpu_to_le16(vf->fw_fid);
+ req->encap_resp_target_id = cpu_to_le16(vf->fw_fid);
+ memcpy(req->encap_request, vf->hwrm_cmd_req_addr, msg_size);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, req);
+ }
if (rc)
netdev_err(bp->dev, "hwrm_fwd_err_resp failed. rc:%d\n", rc);
return rc;
@@ -939,19 +972,21 @@ static int bnxt_hwrm_fwd_err_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
static int bnxt_hwrm_exec_fwd_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
u32 msg_size)
{
- int rc = 0;
- struct hwrm_exec_fwd_resp_input req = {0};
+ struct hwrm_exec_fwd_resp_input *req;
+ int rc;
if (BNXT_EXEC_FWD_RESP_SIZE_ERR(msg_size))
return -EINVAL;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_EXEC_FWD_RESP, -1, -1);
- /* Set the new target id */
- req.target_id = cpu_to_le16(vf->fw_fid);
- req.encap_resp_target_id = cpu_to_le16(vf->fw_fid);
- memcpy(req.encap_request, vf->hwrm_cmd_req_addr, msg_size);
+ rc = hwrm_req_init(bp, req, HWRM_EXEC_FWD_RESP);
+ if (!rc) {
+ /* Set the new target id */
+ req->target_id = cpu_to_le16(vf->fw_fid);
+ req->encap_resp_target_id = cpu_to_le16(vf->fw_fid);
+ memcpy(req->encap_request, vf->hwrm_cmd_req_addr, msg_size);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, req);
+ }
if (rc)
netdev_err(bp->dev, "hwrm_exec_fw_resp failed. rc:%d\n", rc);
return rc;
@@ -1031,10 +1066,10 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf)
phy_qcfg_req =
(struct hwrm_port_phy_qcfg_input *)vf->hwrm_cmd_req_addr;
- mutex_lock(&bp->hwrm_cmd_lock);
+ mutex_lock(&bp->link_lock);
memcpy(&phy_qcfg_resp, &bp->link_info.phy_qcfg_resp,
sizeof(phy_qcfg_resp));
- mutex_unlock(&bp->hwrm_cmd_lock);
+ mutex_unlock(&bp->link_lock);
phy_qcfg_resp.resp_len = cpu_to_le16(sizeof(phy_qcfg_resp));
phy_qcfg_resp.seq_id = phy_qcfg_req->seq_id;
phy_qcfg_resp.valid = 1;
@@ -1118,7 +1153,7 @@ void bnxt_hwrm_exec_fwd_req(struct bnxt *bp)
int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict)
{
- struct hwrm_func_vf_cfg_input req = {0};
+ struct hwrm_func_vf_cfg_input *req;
int rc = 0;
if (!BNXT_VF(bp))
@@ -1129,10 +1164,16 @@ int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict)
rc = -EADDRNOTAVAIL;
goto mac_done;
}
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1);
- req.enables = cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
- memcpy(req.dflt_mac_addr, mac, ETH_ALEN);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_VF_CFG);
+ if (rc)
+ goto mac_done;
+
+ req->enables = cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
+ memcpy(req->dflt_mac_addr, mac, ETH_ALEN);
+ if (!strict)
+ hwrm_req_flags(bp, req, BNXT_HWRM_CTX_SILENT);
+ rc = hwrm_req_send(bp, req);
mac_done:
if (rc && strict) {
rc = -EADDRNOTAVAIL;
@@ -1145,15 +1186,17 @@ mac_done:
void bnxt_update_vf_mac(struct bnxt *bp)
{
- struct hwrm_func_qcaps_input req = {0};
- struct hwrm_func_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
+ struct hwrm_func_qcaps_output *resp;
+ struct hwrm_func_qcaps_input *req;
bool inform_pf = false;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCAPS, -1, -1);
- req.fid = cpu_to_le16(0xffff);
+ if (hwrm_req_init(bp, req, HWRM_FUNC_QCAPS))
+ return;
+
+ req->fid = cpu_to_le16(0xffff);
- mutex_lock(&bp->hwrm_cmd_lock);
- if (_hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT))
+ resp = hwrm_req_hold(bp, req);
+ if (hwrm_req_send(bp, req))
goto update_vf_mac_exit;
/* Store MAC address from the firmware. There are 2 cases:
@@ -1176,7 +1219,7 @@ void bnxt_update_vf_mac(struct bnxt *bp)
if (is_valid_ether_addr(bp->vf.mac_addr))
memcpy(bp->dev->dev_addr, bp->vf.mac_addr, ETH_ALEN);
update_vf_mac_exit:
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
if (inform_pf)
bnxt_approve_mac(bp, bp->dev->dev_addr, false);
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 5e4429b14b8c..46fae1acbeed 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -22,6 +22,7 @@
#include "bnxt_hsi.h"
#include "bnxt.h"
+#include "bnxt_hwrm.h"
#include "bnxt_sriov.h"
#include "bnxt_tc.h"
#include "bnxt_vfr.h"
@@ -502,16 +503,18 @@ static int bnxt_tc_parse_flow(struct bnxt *bp,
static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp,
struct bnxt_tc_flow_node *flow_node)
{
- struct hwrm_cfa_flow_free_input req = { 0 };
+ struct hwrm_cfa_flow_free_input *req;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_FREE, -1, -1);
- if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
- req.ext_flow_handle = flow_node->ext_flow_handle;
- else
- req.flow_handle = flow_node->flow_handle;
+ rc = hwrm_req_init(bp, req, HWRM_CFA_FLOW_FREE);
+ if (!rc) {
+ if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
+ req->ext_flow_handle = flow_node->ext_flow_handle;
+ else
+ req->flow_handle = flow_node->flow_handle;
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_send(bp, req);
+ }
if (rc)
netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
@@ -587,20 +590,22 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
struct bnxt_tc_actions *actions = &flow->actions;
struct bnxt_tc_l3_key *l3_mask = &flow->l3_mask;
struct bnxt_tc_l3_key *l3_key = &flow->l3_key;
- struct hwrm_cfa_flow_alloc_input req = { 0 };
struct hwrm_cfa_flow_alloc_output *resp;
+ struct hwrm_cfa_flow_alloc_input *req;
u16 flow_flags = 0, action_flags = 0;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_ALLOC, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_CFA_FLOW_ALLOC);
+ if (rc)
+ return rc;
- req.src_fid = cpu_to_le16(flow->src_fid);
- req.ref_flow_handle = ref_flow_handle;
+ req->src_fid = cpu_to_le16(flow->src_fid);
+ req->ref_flow_handle = ref_flow_handle;
if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
- memcpy(req.l2_rewrite_dmac, actions->l2_rewrite_dmac,
+ memcpy(req->l2_rewrite_dmac, actions->l2_rewrite_dmac,
ETH_ALEN);
- memcpy(req.l2_rewrite_smac, actions->l2_rewrite_smac,
+ memcpy(req->l2_rewrite_smac, actions->l2_rewrite_smac,
ETH_ALEN);
action_flags |=
CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
@@ -615,71 +620,71 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
action_flags |=
CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
/* L3 source rewrite */
- req.nat_ip_address[0] =
+ req->nat_ip_address[0] =
actions->nat.l3.ipv4.saddr.s_addr;
/* L4 source port */
if (actions->nat.l4.ports.sport)
- req.nat_port =
+ req->nat_port =
actions->nat.l4.ports.sport;
} else {
action_flags |=
CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
/* L3 destination rewrite */
- req.nat_ip_address[0] =
+ req->nat_ip_address[0] =
actions->nat.l3.ipv4.daddr.s_addr;
/* L4 destination port */
if (actions->nat.l4.ports.dport)
- req.nat_port =
+ req->nat_port =
actions->nat.l4.ports.dport;
}
netdev_dbg(bp->dev,
- "req.nat_ip_address: %pI4 src_xlate: %d req.nat_port: %x\n",
- req.nat_ip_address, actions->nat.src_xlate,
- req.nat_port);
+ "req->nat_ip_address: %pI4 src_xlate: %d req->nat_port: %x\n",
+ req->nat_ip_address, actions->nat.src_xlate,
+ req->nat_port);
} else {
if (actions->nat.src_xlate) {
action_flags |=
CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
/* L3 source rewrite */
- memcpy(req.nat_ip_address,
+ memcpy(req->nat_ip_address,
actions->nat.l3.ipv6.saddr.s6_addr32,
- sizeof(req.nat_ip_address));
+ sizeof(req->nat_ip_address));
/* L4 source port */
if (actions->nat.l4.ports.sport)
- req.nat_port =
+ req->nat_port =
actions->nat.l4.ports.sport;
} else {
action_flags |=
CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
/* L3 destination rewrite */
- memcpy(req.nat_ip_address,
+ memcpy(req->nat_ip_address,
actions->nat.l3.ipv6.daddr.s6_addr32,
- sizeof(req.nat_ip_address));
+ sizeof(req->nat_ip_address));
/* L4 destination port */
if (actions->nat.l4.ports.dport)
- req.nat_port =
+ req->nat_port =
actions->nat.l4.ports.dport;
}
netdev_dbg(bp->dev,
- "req.nat_ip_address: %pI6 src_xlate: %d req.nat_port: %x\n",
- req.nat_ip_address, actions->nat.src_xlate,
- req.nat_port);
+ "req->nat_ip_address: %pI6 src_xlate: %d req->nat_port: %x\n",
+ req->nat_ip_address, actions->nat.src_xlate,
+ req->nat_port);
}
}
if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP ||
actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
- req.tunnel_handle = tunnel_handle;
+ req->tunnel_handle = tunnel_handle;
flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL;
action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL;
}
- req.ethertype = flow->l2_key.ether_type;
- req.ip_proto = flow->l4_key.ip_proto;
+ req->ethertype = flow->l2_key.ether_type;
+ req->ip_proto = flow->l4_key.ip_proto;
if (flow->flags & BNXT_TC_FLOW_FLAGS_ETH_ADDRS) {
- memcpy(req.dmac, flow->l2_key.dmac, ETH_ALEN);
- memcpy(req.smac, flow->l2_key.smac, ETH_ALEN);
+ memcpy(req->dmac, flow->l2_key.dmac, ETH_ALEN);
+ memcpy(req->smac, flow->l2_key.smac, ETH_ALEN);
}
if (flow->l2_key.num_vlans > 0) {
@@ -688,7 +693,7 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
* in outer_vlan_tci when num_vlans is 1 (which is
* always the case in TC.)
*/
- req.outer_vlan_tci = flow->l2_key.inner_vlan_tci;
+ req->outer_vlan_tci = flow->l2_key.inner_vlan_tci;
}
/* If all IP and L4 fields are wildcarded then this is an L2 flow */
@@ -701,68 +706,67 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6;
if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV4_ADDRS) {
- req.ip_dst[0] = l3_key->ipv4.daddr.s_addr;
- req.ip_dst_mask_len =
+ req->ip_dst[0] = l3_key->ipv4.daddr.s_addr;
+ req->ip_dst_mask_len =
inet_mask_len(l3_mask->ipv4.daddr.s_addr);
- req.ip_src[0] = l3_key->ipv4.saddr.s_addr;
- req.ip_src_mask_len =
+ req->ip_src[0] = l3_key->ipv4.saddr.s_addr;
+ req->ip_src_mask_len =
inet_mask_len(l3_mask->ipv4.saddr.s_addr);
} else if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV6_ADDRS) {
- memcpy(req.ip_dst, l3_key->ipv6.daddr.s6_addr32,
- sizeof(req.ip_dst));
- req.ip_dst_mask_len =
+ memcpy(req->ip_dst, l3_key->ipv6.daddr.s6_addr32,
+ sizeof(req->ip_dst));
+ req->ip_dst_mask_len =
ipv6_mask_len(&l3_mask->ipv6.daddr);
- memcpy(req.ip_src, l3_key->ipv6.saddr.s6_addr32,
- sizeof(req.ip_src));
- req.ip_src_mask_len =
+ memcpy(req->ip_src, l3_key->ipv6.saddr.s6_addr32,
+ sizeof(req->ip_src));
+ req->ip_src_mask_len =
ipv6_mask_len(&l3_mask->ipv6.saddr);
}
}
if (flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) {
- req.l4_src_port = flow->l4_key.ports.sport;
- req.l4_src_port_mask = flow->l4_mask.ports.sport;
- req.l4_dst_port = flow->l4_key.ports.dport;
- req.l4_dst_port_mask = flow->l4_mask.ports.dport;
+ req->l4_src_port = flow->l4_key.ports.sport;
+ req->l4_src_port_mask = flow->l4_mask.ports.sport;
+ req->l4_dst_port = flow->l4_key.ports.dport;
+ req->l4_dst_port_mask = flow->l4_mask.ports.dport;
} else if (flow->flags & BNXT_TC_FLOW_FLAGS_ICMP) {
/* l4 ports serve as type/code when ip_proto is ICMP */
- req.l4_src_port = htons(flow->l4_key.icmp.type);
- req.l4_src_port_mask = htons(flow->l4_mask.icmp.type);
- req.l4_dst_port = htons(flow->l4_key.icmp.code);
- req.l4_dst_port_mask = htons(flow->l4_mask.icmp.code);
+ req->l4_src_port = htons(flow->l4_key.icmp.type);
+ req->l4_src_port_mask = htons(flow->l4_mask.icmp.type);
+ req->l4_dst_port = htons(flow->l4_key.icmp.code);
+ req->l4_dst_port_mask = htons(flow->l4_mask.icmp.code);
}
- req.flags = cpu_to_le16(flow_flags);
+ req->flags = cpu_to_le16(flow_flags);
if (actions->flags & BNXT_TC_ACTION_FLAG_DROP) {
action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_DROP;
} else {
if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FWD;
- req.dst_fid = cpu_to_le16(actions->dst_fid);
+ req->dst_fid = cpu_to_le16(actions->dst_fid);
}
if (actions->flags & BNXT_TC_ACTION_FLAG_PUSH_VLAN) {
action_flags |=
CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
- req.l2_rewrite_vlan_tpid = actions->push_vlan_tpid;
- req.l2_rewrite_vlan_tci = actions->push_vlan_tci;
- memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
- memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
+ req->l2_rewrite_vlan_tpid = actions->push_vlan_tpid;
+ req->l2_rewrite_vlan_tci = actions->push_vlan_tci;
+ memcpy(&req->l2_rewrite_dmac, &req->dmac, ETH_ALEN);
+ memcpy(&req->l2_rewrite_smac, &req->smac, ETH_ALEN);
}
if (actions->flags & BNXT_TC_ACTION_FLAG_POP_VLAN) {
action_flags |=
CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
/* Rewrite config with tpid = 0 implies vlan pop */
- req.l2_rewrite_vlan_tpid = 0;
- memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
- memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
+ req->l2_rewrite_vlan_tpid = 0;
+ memcpy(&req->l2_rewrite_dmac, &req->dmac, ETH_ALEN);
+ memcpy(&req->l2_rewrite_smac, &req->smac, ETH_ALEN);
}
}
- req.action_flags = cpu_to_le16(action_flags);
+ req->action_flags = cpu_to_le16(action_flags);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send_silent(bp, req);
if (!rc) {
- resp = bnxt_get_hwrm_resp_addr(bp, &req);
/* CFA_FLOW_ALLOC response interpretation:
* fw with fw with
* 16-bit 64-bit
@@ -778,7 +782,7 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
flow_node->flow_id = resp->flow_id;
}
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
@@ -788,67 +792,69 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
__le32 ref_decap_handle,
__le32 *decap_filter_handle)
{
- struct hwrm_cfa_decap_filter_alloc_input req = { 0 };
struct hwrm_cfa_decap_filter_alloc_output *resp;
struct ip_tunnel_key *tun_key = &flow->tun_key;
+ struct hwrm_cfa_decap_filter_alloc_input *req;
u32 enables = 0;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_ALLOC, -1, -1);
+ rc = hwrm_req_init(bp, req, HWRM_CFA_DECAP_FILTER_ALLOC);
+ if (rc)
+ goto exit;
- req.flags = cpu_to_le32(CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL);
+ req->flags = cpu_to_le32(CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL);
enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE |
CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL;
- req.tunnel_type = CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN;
- req.ip_protocol = CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP;
+ req->tunnel_type = CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN;
+ req->ip_protocol = CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP;
if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ID) {
enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID;
/* tunnel_id is wrongly defined in hsi defn. as __le32 */
- req.tunnel_id = tunnel_id_to_key32(tun_key->tun_id);
+ req->tunnel_id = tunnel_id_to_key32(tun_key->tun_id);
}
if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) {
enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR;
- ether_addr_copy(req.dst_macaddr, l2_info->dmac);
+ ether_addr_copy(req->dst_macaddr, l2_info->dmac);
}
if (l2_info->num_vlans) {
enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID;
- req.t_ivlan_vid = l2_info->inner_vlan_tci;
+ req->t_ivlan_vid = l2_info->inner_vlan_tci;
}
enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE;
- req.ethertype = htons(ETH_P_IP);
+ req->ethertype = htons(ETH_P_IP);
if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS) {
enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR |
CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR |
CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE;
- req.ip_addr_type = CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
- req.dst_ipaddr[0] = tun_key->u.ipv4.dst;
- req.src_ipaddr[0] = tun_key->u.ipv4.src;
+ req->ip_addr_type =
+ CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
+ req->dst_ipaddr[0] = tun_key->u.ipv4.dst;
+ req->src_ipaddr[0] = tun_key->u.ipv4.src;
}
if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_PORTS) {
enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT;
- req.dst_port = tun_key->tp_dst;
+ req->dst_port = tun_key->tp_dst;
}
/* Eventhough the decap_handle returned by hwrm_cfa_decap_filter_alloc
* is defined as __le32, l2_ctxt_ref_id is defined in HSI as __le16.
*/
- req.l2_ctxt_ref_id = (__force __le16)ref_decap_handle;
- req.enables = cpu_to_le32(enables);
+ req->l2_ctxt_ref_id = (__force __le16)ref_decap_handle;
+ req->enables = cpu_to_le32(enables);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
- if (!rc) {
- resp = bnxt_get_hwrm_resp_addr(bp, &req);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send_silent(bp, req);
+ if (!rc)
*decap_filter_handle = resp->decap_filter_id;
- } else {
+ hwrm_req_drop(bp, req);
+exit:
+ if (rc)
netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
- }
- mutex_unlock(&bp->hwrm_cmd_lock);
return rc;
}
@@ -856,13 +862,14 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
static int hwrm_cfa_decap_filter_free(struct bnxt *bp,
__le32 decap_filter_handle)
{
- struct hwrm_cfa_decap_filter_free_input req = { 0 };
+ struct hwrm_cfa_decap_filter_free_input *req;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_FREE, -1, -1);
- req.decap_filter_id = decap_filter_handle;
-
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_CFA_DECAP_FILTER_FREE);
+ if (!rc) {
+ req->decap_filter_id = decap_filter_handle;
+ rc = hwrm_req_send(bp, req);
+ }
if (rc)
netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
@@ -874,18 +881,18 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
struct bnxt_tc_l2_key *l2_info,
__le32 *encap_record_handle)
{
- struct hwrm_cfa_encap_record_alloc_input req = { 0 };
struct hwrm_cfa_encap_record_alloc_output *resp;
- struct hwrm_cfa_encap_data_vxlan *encap =
- (struct hwrm_cfa_encap_data_vxlan *)&req.encap_data;
- struct hwrm_vxlan_ipv4_hdr *encap_ipv4 =
- (struct hwrm_vxlan_ipv4_hdr *)encap->l3;
+ struct hwrm_cfa_encap_record_alloc_input *req;
+ struct hwrm_cfa_encap_data_vxlan *encap;
+ struct hwrm_vxlan_ipv4_hdr *encap_ipv4;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_ALLOC, -1, -1);
-
- req.encap_type = CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN;
+ rc = hwrm_req_init(bp, req, HWRM_CFA_ENCAP_RECORD_ALLOC);
+ if (rc)
+ goto exit;
+ encap = (struct hwrm_cfa_encap_data_vxlan *)&req->encap_data;
+ req->encap_type = CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN;
ether_addr_copy(encap->dst_mac_addr, l2_info->dmac);
ether_addr_copy(encap->src_mac_addr, l2_info->smac);
if (l2_info->num_vlans) {
@@ -894,6 +901,7 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
encap->ovlan_tpid = l2_info->inner_vlan_tpid;
}
+ encap_ipv4 = (struct hwrm_vxlan_ipv4_hdr *)encap->l3;
encap_ipv4->ver_hlen = 4 << VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT;
encap_ipv4->ver_hlen |= 5 << VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT;
encap_ipv4->ttl = encap_key->ttl;
@@ -905,15 +913,14 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
encap->dst_port = encap_key->tp_dst;
encap->vni = tunnel_id_to_key32(encap_key->tun_id);
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
- if (!rc) {
- resp = bnxt_get_hwrm_resp_addr(bp, &req);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send_silent(bp, req);
+ if (!rc)
*encap_record_handle = resp->encap_record_id;
- } else {
+ hwrm_req_drop(bp, req);
+exit:
+ if (rc)
netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
- }
- mutex_unlock(&bp->hwrm_cmd_lock);
return rc;
}
@@ -921,13 +928,14 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
static int hwrm_cfa_encap_record_free(struct bnxt *bp,
__le32 encap_record_handle)
{
- struct hwrm_cfa_encap_record_free_input req = { 0 };
+ struct hwrm_cfa_encap_record_free_input *req;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_FREE, -1, -1);
- req.encap_record_id = encap_record_handle;
-
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_CFA_ENCAP_RECORD_FREE);
+ if (!rc) {
+ req->encap_record_id = encap_record_handle;
+ rc = hwrm_req_send(bp, req);
+ }
if (rc)
netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
@@ -1673,14 +1681,20 @@ static int
bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
struct bnxt_tc_stats_batch stats_batch[])
{
- struct hwrm_cfa_flow_stats_input req = { 0 };
struct hwrm_cfa_flow_stats_output *resp;
- __le16 *req_flow_handles = &req.flow_handle_0;
- __le32 *req_flow_ids = &req.flow_id_0;
+ struct hwrm_cfa_flow_stats_input *req;
+ __le16 *req_flow_handles;
+ __le32 *req_flow_ids;
int rc, i;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1);
- req.num_flows = cpu_to_le16(num_flows);
+ rc = hwrm_req_init(bp, req, HWRM_CFA_FLOW_STATS);
+ if (rc)
+ goto exit;
+
+ req_flow_handles = &req->flow_handle_0;
+ req_flow_ids = &req->flow_id_0;
+
+ req->num_flows = cpu_to_le16(num_flows);
for (i = 0; i < num_flows; i++) {
struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
@@ -1688,13 +1702,12 @@ bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
&req_flow_handles[i], &req_flow_ids[i]);
}
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc) {
__le64 *resp_packets;
__le64 *resp_bytes;
- resp = bnxt_get_hwrm_resp_addr(bp, &req);
resp_packets = &resp->packet_0;
resp_bytes = &resp->byte_0;
@@ -1704,10 +1717,11 @@ bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
stats_batch[i].hw_stats.bytes =
le64_to_cpu(resp_bytes[i]);
}
- } else {
- netdev_info(bp->dev, "error rc=%d\n", rc);
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
+exit:
+ if (rc)
+ netdev_info(bp->dev, "error rc=%d\n", rc);
return rc;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 187ff643ad2a..fde0c3e8ac57 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -22,6 +22,7 @@
#include "bnxt_hsi.h"
#include "bnxt.h"
+#include "bnxt_hwrm.h"
#include "bnxt_ulp.h"
static int bnxt_register_dev(struct bnxt_en_dev *edev, int ulp_id,
@@ -237,27 +238,33 @@ static int bnxt_send_msg(struct bnxt_en_dev *edev, int ulp_id,
{
struct net_device *dev = edev->net;
struct bnxt *bp = netdev_priv(dev);
+ struct output *resp;
struct input *req;
+ u32 resp_len;
int rc;
if (ulp_id != BNXT_ROCE_ULP && bp->fw_reset_state)
return -EBUSY;
- mutex_lock(&bp->hwrm_cmd_lock);
- req = fw_msg->msg;
- req->resp_addr = cpu_to_le64(bp->hwrm_cmd_resp_dma_addr);
- rc = _hwrm_send_message(bp, fw_msg->msg, fw_msg->msg_len,
- fw_msg->timeout);
- if (!rc) {
- struct output *resp = bp->hwrm_cmd_resp_addr;
- u32 len = le16_to_cpu(resp->resp_len);
+ rc = hwrm_req_init(bp, req, 0 /* don't care */);
+ if (rc)
+ return rc;
- if (fw_msg->resp_max_len < len)
- len = fw_msg->resp_max_len;
+ rc = hwrm_req_replace(bp, req, fw_msg->msg, fw_msg->msg_len);
+ if (rc)
+ return rc;
- memcpy(fw_msg->resp, resp, len);
+ hwrm_req_timeout(bp, req, fw_msg->timeout);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
+ resp_len = le16_to_cpu(resp->resp_len);
+ if (resp_len) {
+ if (fw_msg->resp_max_len < resp_len)
+ resp_len = fw_msg->resp_max_len;
+
+ memcpy(fw_msg->resp, resp, resp_len);
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index dd66302343a2..9401936b74fa 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -15,6 +15,7 @@
#include "bnxt_hsi.h"
#include "bnxt.h"
+#include "bnxt_hwrm.h"
#include "bnxt_vfr.h"
#include "bnxt_devlink.h"
#include "bnxt_tc.h"
@@ -27,38 +28,40 @@
static int hwrm_cfa_vfr_alloc(struct bnxt *bp, u16 vf_idx,
u16 *tx_cfa_action, u16 *rx_cfa_code)
{
- struct hwrm_cfa_vfr_alloc_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_cfa_vfr_alloc_input req = { 0 };
+ struct hwrm_cfa_vfr_alloc_output *resp;
+ struct hwrm_cfa_vfr_alloc_input *req;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_VFR_ALLOC, -1, -1);
- req.vf_id = cpu_to_le16(vf_idx);
- sprintf(req.vfr_name, "vfr%d", vf_idx);
-
- mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_CFA_VFR_ALLOC);
if (!rc) {
- *tx_cfa_action = le16_to_cpu(resp->tx_cfa_action);
- *rx_cfa_code = le16_to_cpu(resp->rx_cfa_code);
- netdev_dbg(bp->dev, "tx_cfa_action=0x%x, rx_cfa_code=0x%x",
- *tx_cfa_action, *rx_cfa_code);
- } else {
- netdev_info(bp->dev, "%s error rc=%d\n", __func__, rc);
+ req->vf_id = cpu_to_le16(vf_idx);
+ sprintf(req->vfr_name, "vfr%d", vf_idx);
+
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
+ if (!rc) {
+ *tx_cfa_action = le16_to_cpu(resp->tx_cfa_action);
+ *rx_cfa_code = le16_to_cpu(resp->rx_cfa_code);
+ netdev_dbg(bp->dev, "tx_cfa_action=0x%x, rx_cfa_code=0x%x",
+ *tx_cfa_action, *rx_cfa_code);
+ }
+ hwrm_req_drop(bp, req);
}
-
- mutex_unlock(&bp->hwrm_cmd_lock);
+ if (rc)
+ netdev_info(bp->dev, "%s error rc=%d\n", __func__, rc);
return rc;
}
static int hwrm_cfa_vfr_free(struct bnxt *bp, u16 vf_idx)
{
- struct hwrm_cfa_vfr_free_input req = { 0 };
+ struct hwrm_cfa_vfr_free_input *req;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_VFR_FREE, -1, -1);
- sprintf(req.vfr_name, "vfr%d", vf_idx);
-
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_req_init(bp, req, HWRM_CFA_VFR_FREE);
+ if (!rc) {
+ sprintf(req->vfr_name, "vfr%d", vf_idx);
+ rc = hwrm_req_send(bp, req);
+ }
if (rc)
netdev_info(bp->dev, "%s error rc=%d\n", __func__, rc);
return rc;
@@ -67,17 +70,18 @@ static int hwrm_cfa_vfr_free(struct bnxt *bp, u16 vf_idx)
static int bnxt_hwrm_vfr_qcfg(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
u16 *max_mtu)
{
- struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
- struct hwrm_func_qcfg_input req = {0};
+ struct hwrm_func_qcfg_output *resp;
+ struct hwrm_func_qcfg_input *req;
u16 mtu;
int rc;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
- req.fid = cpu_to_le16(bp->pf.vf[vf_rep->vf_idx].fw_fid);
-
- mutex_lock(&bp->hwrm_cmd_lock);
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_QCFG);
+ if (rc)
+ return rc;
- rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ req->fid = cpu_to_le16(bp->pf.vf[vf_rep->vf_idx].fw_fid);
+ resp = hwrm_req_hold(bp, req);
+ rc = hwrm_req_send(bp, req);
if (!rc) {
mtu = le16_to_cpu(resp->max_mtu_configured);
if (!mtu)
@@ -85,7 +89,7 @@ static int bnxt_hwrm_vfr_qcfg(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
else
*max_mtu = mtu;
}
- mutex_unlock(&bp->hwrm_cmd_lock);
+ hwrm_req_drop(bp, req);
return rc;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index bee6e091a997..c8083df5e0ab 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -87,7 +87,7 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
dma_unmap_single(&pdev->dev,
dma_unmap_addr(tx_buf, mapping),
dma_unmap_len(tx_buf, len),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
xdp_return_frame(tx_buf->xdpf);
tx_buf->action = 0;
tx_buf->xdpf = NULL;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index db74241935ab..23c7595d2a1d 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -828,7 +828,9 @@ static void bcmgenet_set_msglevel(struct net_device *dev, u32 level)
}
static int bcmgenet_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
struct bcmgenet_rx_ring *ring;
@@ -890,7 +892,9 @@ static void bcmgenet_set_ring_rx_coalesce(struct bcmgenet_rx_ring *ring,
}
static int bcmgenet_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
unsigned int i;
@@ -3659,7 +3663,7 @@ static const struct net_device_ops bcmgenet_netdev_ops = {
.ndo_tx_timeout = bcmgenet_timeout,
.ndo_set_rx_mode = bcmgenet_set_rx_mode,
.ndo_set_mac_address = bcmgenet_set_mac_addr,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_set_features = bcmgenet_set_features,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = bcmgenet_poll_controller,
@@ -3972,8 +3976,6 @@ static int bcmgenet_probe(struct platform_device *pdev)
*/
dev->needed_headroom += 64;
- netdev_boot_setup_check(dev);
-
priv->dev = dev;
priv->pdev = pdev;
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 5b4568c2ad1c..f38f40eb966e 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -2136,7 +2136,7 @@ static const struct net_device_ops sbmac_netdev_ops = {
.ndo_start_xmit = sbmac_start_tx,
.ndo_set_rx_mode = sbmac_set_rx_mode,
.ndo_tx_timeout = sbmac_tx_timeout,
- .ndo_do_ioctl = sbmac_mii_ioctl,
+ .ndo_eth_ioctl = sbmac_mii_ioctl,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index b0e49643f483..8a238e349e02 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6564,10 +6564,8 @@ static void tg3_tx(struct tg3_napi *tnapi)
skb_tstamp_tx(skb, &timestamp);
}
- pci_unmap_single(tp->pdev,
- dma_unmap_addr(ri, mapping),
- skb_headlen(skb),
- PCI_DMA_TODEVICE);
+ dma_unmap_single(&tp->pdev->dev, dma_unmap_addr(ri, mapping),
+ skb_headlen(skb), DMA_TO_DEVICE);
ri->skb = NULL;
@@ -6584,10 +6582,10 @@ static void tg3_tx(struct tg3_napi *tnapi)
if (unlikely(ri->skb != NULL || sw_idx == hw_idx))
tx_bug = 1;
- pci_unmap_page(tp->pdev,
+ dma_unmap_page(&tp->pdev->dev,
dma_unmap_addr(ri, mapping),
skb_frag_size(&skb_shinfo(skb)->frags[i]),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
while (ri->fragmented) {
ri->fragmented = false;
@@ -6646,8 +6644,8 @@ static void tg3_rx_data_free(struct tg3 *tp, struct ring_info *ri, u32 map_sz)
if (!ri->data)
return;
- pci_unmap_single(tp->pdev, dma_unmap_addr(ri, mapping),
- map_sz, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&tp->pdev->dev, dma_unmap_addr(ri, mapping), map_sz,
+ DMA_FROM_DEVICE);
tg3_frag_free(skb_size <= PAGE_SIZE, ri->data);
ri->data = NULL;
}
@@ -6711,11 +6709,9 @@ static int tg3_alloc_rx_data(struct tg3 *tp, struct tg3_rx_prodring_set *tpr,
if (!data)
return -ENOMEM;
- mapping = pci_map_single(tp->pdev,
- data + TG3_RX_OFFSET(tp),
- data_size,
- PCI_DMA_FROMDEVICE);
- if (unlikely(pci_dma_mapping_error(tp->pdev, mapping))) {
+ mapping = dma_map_single(&tp->pdev->dev, data + TG3_RX_OFFSET(tp),
+ data_size, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&tp->pdev->dev, mapping))) {
tg3_frag_free(skb_size <= PAGE_SIZE, data);
return -EIO;
}
@@ -6882,8 +6878,8 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
if (skb_size < 0)
goto drop_it;
- pci_unmap_single(tp->pdev, dma_addr, skb_size,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&tp->pdev->dev, dma_addr, skb_size,
+ DMA_FROM_DEVICE);
/* Ensure that the update to the data happens
* after the usage of the old DMA mapping.
@@ -6908,11 +6904,13 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
goto drop_it_no_recycle;
skb_reserve(skb, TG3_RAW_IP_ALIGN);
- pci_dma_sync_single_for_cpu(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&tp->pdev->dev, dma_addr, len,
+ DMA_FROM_DEVICE);
memcpy(skb->data,
data + TG3_RX_OFFSET(tp),
len);
- pci_dma_sync_single_for_device(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_device(&tp->pdev->dev, dma_addr,
+ len, DMA_FROM_DEVICE);
}
skb_put(skb, len);
@@ -7762,10 +7760,8 @@ static void tg3_tx_skb_unmap(struct tg3_napi *tnapi, u32 entry, int last)
skb = txb->skb;
txb->skb = NULL;
- pci_unmap_single(tnapi->tp->pdev,
- dma_unmap_addr(txb, mapping),
- skb_headlen(skb),
- PCI_DMA_TODEVICE);
+ dma_unmap_single(&tnapi->tp->pdev->dev, dma_unmap_addr(txb, mapping),
+ skb_headlen(skb), DMA_TO_DEVICE);
while (txb->fragmented) {
txb->fragmented = false;
@@ -7779,9 +7775,9 @@ static void tg3_tx_skb_unmap(struct tg3_napi *tnapi, u32 entry, int last)
entry = NEXT_TX(entry);
txb = &tnapi->tx_buffers[entry];
- pci_unmap_page(tnapi->tp->pdev,
+ dma_unmap_page(&tnapi->tp->pdev->dev,
dma_unmap_addr(txb, mapping),
- skb_frag_size(frag), PCI_DMA_TODEVICE);
+ skb_frag_size(frag), DMA_TO_DEVICE);
while (txb->fragmented) {
txb->fragmented = false;
@@ -7816,10 +7812,10 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi,
ret = -1;
} else {
/* New SKB is guaranteed to be linear. */
- new_addr = pci_map_single(tp->pdev, new_skb->data, new_skb->len,
- PCI_DMA_TODEVICE);
+ new_addr = dma_map_single(&tp->pdev->dev, new_skb->data,
+ new_skb->len, DMA_TO_DEVICE);
/* Make sure the mapping succeeded */
- if (pci_dma_mapping_error(tp->pdev, new_addr)) {
+ if (dma_mapping_error(&tp->pdev->dev, new_addr)) {
dev_kfree_skb_any(new_skb);
ret = -1;
} else {
@@ -8043,8 +8039,9 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
len = skb_headlen(skb);
- mapping = pci_map_single(tp->pdev, skb->data, len, PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(tp->pdev, mapping))
+ mapping = dma_map_single(&tp->pdev->dev, skb->data, len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&tp->pdev->dev, mapping))
goto drop;
@@ -13499,8 +13496,8 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, bool tso_loopback)
for (i = data_off; i < tx_len; i++)
tx_data[i] = (u8) (i & 0xff);
- map = pci_map_single(tp->pdev, skb->data, tx_len, PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(tp->pdev, map)) {
+ map = dma_map_single(&tp->pdev->dev, skb->data, tx_len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&tp->pdev->dev, map)) {
dev_kfree_skb(skb);
return -EIO;
}
@@ -13598,8 +13595,8 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, bool tso_loopback)
} else
goto out;
- pci_dma_sync_single_for_cpu(tp->pdev, map, rx_len,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&tp->pdev->dev, map, rx_len,
+ DMA_FROM_DEVICE);
rx_data += TG3_RX_OFFSET(tp);
for (i = data_off; i < rx_len; i++, val++) {
@@ -14040,7 +14037,10 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EOPNOTSUPP;
}
-static int tg3_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+static int tg3_get_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct tg3 *tp = netdev_priv(dev);
@@ -14048,7 +14048,10 @@ static int tg3_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
return 0;
}
-static int tg3_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+static int tg3_set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct tg3 *tp = netdev_priv(dev);
u32 max_rxcoal_tick_int = 0, max_txcoal_tick_int = 0;
@@ -14290,7 +14293,7 @@ static const struct net_device_ops tg3_netdev_ops = {
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = tg3_set_rx_mode,
.ndo_set_mac_address = tg3_set_mac_addr,
- .ndo_do_ioctl = tg3_ioctl,
+ .ndo_eth_ioctl = tg3_ioctl,
.ndo_tx_timeout = tg3_tx_timeout,
.ndo_change_mtu = tg3_change_mtu,
.ndo_fix_features = tg3_fix_features,
@@ -17755,11 +17758,11 @@ static int tg3_init_one(struct pci_dev *pdev,
/* Configure DMA attributes. */
if (dma_mask > DMA_BIT_MASK(32)) {
- err = pci_set_dma_mask(pdev, dma_mask);
+ err = dma_set_mask(&pdev->dev, dma_mask);
if (!err) {
features |= NETIF_F_HIGHDMA;
- err = pci_set_consistent_dma_mask(pdev,
- persist_dma_mask);
+ err = dma_set_coherent_mask(&pdev->dev,
+ persist_dma_mask);
if (err < 0) {
dev_err(&pdev->dev, "Unable to obtain 64 bit "
"DMA for consistent allocations\n");
@@ -17768,7 +17771,7 @@ static int tg3_init_one(struct pci_dev *pdev,
}
}
if (err || dma_mask == DMA_BIT_MASK(32)) {
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev,
"No usable DMA configuration, aborting\n");
diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
index 265c2fa6bbe0..391b85f25141 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
@@ -307,8 +307,10 @@ bnad_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wolinfo)
wolinfo->wolopts = 0;
}
-static int
-bnad_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coalesce)
+static int bnad_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct bnad *bnad = netdev_priv(netdev);
unsigned long flags;
@@ -328,8 +330,10 @@ bnad_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coalesce)
return 0;
}
-static int
-bnad_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *coalesce)
+static int bnad_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct bnad *bnad = netdev_priv(netdev);
unsigned long flags;
diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig
index e432a68ac520..5b2a461dfd28 100644
--- a/drivers/net/ethernet/cadence/Kconfig
+++ b/drivers/net/ethernet/cadence/Kconfig
@@ -22,6 +22,7 @@ if NET_VENDOR_CADENCE
config MACB
tristate "Cadence MACB/GEM support"
depends on HAS_DMA && COMMON_CLK
+ depends on PTP_1588_CLOCK_OPTIONAL
select PHYLINK
select CRC32
help
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 7d2fe13a52f8..d13fb1d31821 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -3664,7 +3664,7 @@ static const struct net_device_ops macb_netdev_ops = {
.ndo_start_xmit = macb_start_xmit,
.ndo_set_rx_mode = macb_set_rx_mode,
.ndo_get_stats = macb_get_stats,
- .ndo_do_ioctl = macb_ioctl,
+ .ndo_eth_ioctl = macb_ioctl,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = macb_change_mtu,
.ndo_set_mac_address = eth_mac_addr,
@@ -4323,7 +4323,7 @@ static const struct net_device_ops at91ether_netdev_ops = {
.ndo_get_stats = macb_get_stats,
.ndo_set_rx_mode = macb_set_rx_mode,
.ndo_set_mac_address = eth_mac_addr,
- .ndo_do_ioctl = macb_ioctl,
+ .ndo_eth_ioctl = macb_ioctl,
.ndo_validate_addr = eth_validate_addr,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = at91ether_poll_controller,
@@ -4533,6 +4533,14 @@ static const struct macb_config sama5d2_config = {
.usrio = &macb_default_usrio,
};
+static const struct macb_config sama5d29_config = {
+ .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_GEM_HAS_PTP,
+ .dma_burst_length = 16,
+ .clk_init = macb_clk_init,
+ .init = macb_init,
+ .usrio = &macb_default_usrio,
+};
+
static const struct macb_config sama5d3_config = {
.caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE
| MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_JUMBO,
@@ -4610,6 +4618,7 @@ static const struct of_device_id macb_dt_ids[] = {
{ .compatible = "cdns,gem", .data = &pc302gem_config },
{ .compatible = "cdns,sam9x60-macb", .data = &at91sam9260_config },
{ .compatible = "atmel,sama5d2-gem", .data = &sama5d2_config },
+ { .compatible = "atmel,sama5d29-gem", .data = &sama5d29_config },
{ .compatible = "atmel,sama5d3-gem", .data = &sama5d3_config },
{ .compatible = "atmel,sama5d3-macb", .data = &sama5d3macb_config },
{ .compatible = "atmel,sama5d4-gem", .data = &sama5d4_config },
diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c
index 5c368a9cbbbc..c2e1f163bb14 100644
--- a/drivers/net/ethernet/cadence/macb_ptp.c
+++ b/drivers/net/ethernet/cadence/macb_ptp.c
@@ -275,6 +275,12 @@ void gem_ptp_rxstamp(struct macb *bp, struct sk_buff *skb,
if (GEM_BFEXT(DMA_RXVALID, desc->addr)) {
desc_ptp = macb_ptp_desc(bp, desc);
+ /* Unlikely but check */
+ if (!desc_ptp) {
+ dev_warn_ratelimited(&bp->pdev->dev,
+ "Timestamp not supported in BD\n");
+ return;
+ }
gem_hw_timestamp(bp, desc_ptp->ts_1, desc_ptp->ts_2, &ts);
memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
shhwtstamps->hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
@@ -307,8 +313,11 @@ int gem_ptp_txstamp(struct macb_queue *queue, struct sk_buff *skb,
if (CIRC_SPACE(head, tail, PTP_TS_BUFFER_SIZE) == 0)
return -ENOMEM;
- skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
desc_ptp = macb_ptp_desc(queue->bp, desc);
+ /* Unlikely but check */
+ if (!desc_ptp)
+ return -EINVAL;
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
tx_timestamp = &queue->tx_timestamps[head];
tx_timestamp->skb = skb;
/* ensure ts_1/ts_2 is loaded after ctrl (TX_USED check) */
diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig
index 4875cdae622e..1c76c95b0b27 100644
--- a/drivers/net/ethernet/cavium/Kconfig
+++ b/drivers/net/ethernet/cavium/Kconfig
@@ -66,7 +66,7 @@ config LIQUIDIO
tristate "Cavium LiquidIO support"
depends on 64BIT && PCI
depends on PCI
- imply PTP_1588_CLOCK
+ depends on PTP_1588_CLOCK_OPTIONAL
select FW_LOADER
select LIBCRC32C
select NET_DEVLINK
@@ -91,7 +91,7 @@ config OCTEON_MGMT_ETHERNET
config LIQUIDIO_VF
tristate "Cavium LiquidIO VF support"
depends on 64BIT && PCI_MSI
- imply PTP_1588_CLOCK
+ depends on PTP_1588_CLOCK_OPTIONAL
help
This driver supports Cavium LiquidIO Intelligent Server Adapter
based on CN23XX chips.
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index 66f2c553370c..2b9747867d4c 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -2108,7 +2108,9 @@ static int octnet_set_intrmod_cfg(struct lio *lio,
}
static int lio_get_intr_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *intr_coal)
+ struct ethtool_coalesce *intr_coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct lio *lio = GET_LIO(netdev);
struct octeon_device *oct = lio->oct_dev;
@@ -2412,7 +2414,9 @@ oct_cfg_tx_intrcnt(struct lio *lio,
}
static int lio_set_intr_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *intr_coal)
+ struct ethtool_coalesce *intr_coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct lio *lio = GET_LIO(netdev);
int ret;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 591229b96257..2907e13b9df6 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -1457,7 +1457,7 @@ static void free_netsgbuf(void *buf)
while (frags--) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
- pci_unmap_page((lio->oct_dev)->pci_dev,
+ dma_unmap_page(&lio->oct_dev->pci_dev->dev,
g->sg[(i >> 2)].ptr[(i & 3)],
skb_frag_size(frag), DMA_TO_DEVICE);
i++;
@@ -1500,7 +1500,7 @@ static void free_netsgbuf_with_resp(void *buf)
while (frags--) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
- pci_unmap_page((lio->oct_dev)->pci_dev,
+ dma_unmap_page(&lio->oct_dev->pci_dev->dev,
g->sg[(i >> 2)].ptr[(i & 3)],
skb_frag_size(frag), DMA_TO_DEVICE);
i++;
@@ -3223,7 +3223,7 @@ static const struct net_device_ops lionetdevops = {
.ndo_vlan_rx_add_vid = liquidio_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = liquidio_vlan_rx_kill_vid,
.ndo_change_mtu = liquidio_change_mtu,
- .ndo_do_ioctl = liquidio_ioctl,
+ .ndo_eth_ioctl = liquidio_ioctl,
.ndo_fix_features = liquidio_fix_features,
.ndo_set_features = liquidio_set_features,
.ndo_set_vf_mac = liquidio_set_vf_mac,
@@ -3750,7 +3750,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
}
devlink = devlink_alloc(&liquidio_devlink_ops,
- sizeof(struct lio_devlink_priv));
+ sizeof(struct lio_devlink_priv),
+ &octeon_dev->pci_dev->dev);
if (!devlink) {
dev_err(&octeon_dev->pci_dev->dev, "devlink alloc failed\n");
goto setup_nic_dev_free;
@@ -3759,7 +3760,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
lio_devlink = devlink_priv(devlink);
lio_devlink->oct = octeon_dev;
- if (devlink_register(devlink, &octeon_dev->pci_dev->dev)) {
+ if (devlink_register(devlink)) {
devlink_free(devlink);
dev_err(&octeon_dev->pci_dev->dev,
"devlink registration failed\n");
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index ffddb3126a32..c6fe0f2a4d0e 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -843,7 +843,7 @@ static void free_netsgbuf(void *buf)
while (frags--) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
- pci_unmap_page((lio->oct_dev)->pci_dev,
+ dma_unmap_page(&lio->oct_dev->pci_dev->dev,
g->sg[(i >> 2)].ptr[(i & 3)],
skb_frag_size(frag), DMA_TO_DEVICE);
i++;
@@ -887,7 +887,7 @@ static void free_netsgbuf_with_resp(void *buf)
while (frags--) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
- pci_unmap_page((lio->oct_dev)->pci_dev,
+ dma_unmap_page(&lio->oct_dev->pci_dev->dev,
g->sg[(i >> 2)].ptr[(i & 3)],
skb_frag_size(frag), DMA_TO_DEVICE);
i++;
@@ -1889,7 +1889,7 @@ static const struct net_device_ops lionetdevops = {
.ndo_vlan_rx_add_vid = liquidio_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = liquidio_vlan_rx_kill_vid,
.ndo_change_mtu = liquidio_change_mtu,
- .ndo_do_ioctl = liquidio_ioctl,
+ .ndo_eth_ioctl = liquidio_ioctl,
.ndo_fix_features = liquidio_fix_features,
.ndo_set_features = liquidio_set_features,
};
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index 48ff6fb0eed9..30463a6d1f8c 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -1373,7 +1373,7 @@ static const struct net_device_ops octeon_mgmt_ops = {
.ndo_start_xmit = octeon_mgmt_xmit,
.ndo_set_rx_mode = octeon_mgmt_set_rx_filtering,
.ndo_set_mac_address = octeon_mgmt_set_mac_address,
- .ndo_do_ioctl = octeon_mgmt_ioctl,
+ .ndo_eth_ioctl = octeon_mgmt_ioctl,
.ndo_change_mtu = octeon_mgmt_change_mtu,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = octeon_mgmt_poll_controller,
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 9361f964bb9b..691e1475d55e 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -1322,18 +1322,12 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_disable_device;
}
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
if (err) {
dev_err(dev, "Unable to get usable DMA configuration\n");
goto err_release_regions;
}
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
- if (err) {
- dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n");
- goto err_release_regions;
- }
-
/* MAP PF's configuration registers */
nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
if (!nic->reg_base) {
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index 2f218fbfed06..7f2882109b16 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -456,7 +456,9 @@ static void nicvf_get_regs(struct net_device *dev,
}
static int nicvf_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *cmd)
+ struct ethtool_coalesce *cmd,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct nicvf *nic = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index e2b290135fd9..d1667b759522 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -2096,7 +2096,7 @@ static const struct net_device_ops nicvf_netdev_ops = {
.ndo_fix_features = nicvf_fix_features,
.ndo_set_features = nicvf_set_features,
.ndo_bpf = nicvf_xdp,
- .ndo_do_ioctl = nicvf_ioctl,
+ .ndo_eth_ioctl = nicvf_ioctl,
.ndo_set_rx_mode = nicvf_set_rx_mode,
};
@@ -2130,18 +2130,12 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_disable_device;
}
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
if (err) {
dev_err(dev, "Unable to get usable DMA configuration\n");
goto err_release_regions;
}
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
- if (err) {
- dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n");
- goto err_release_regions;
- }
-
qcount = netif_get_num_default_rss_queues();
/* Restrict multiqset support only for host bound VFs */
diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig
index 8ba0e08e5e64..c931ec8cac40 100644
--- a/drivers/net/ethernet/chelsio/Kconfig
+++ b/drivers/net/ethernet/chelsio/Kconfig
@@ -69,6 +69,7 @@ config CHELSIO_T3
config CHELSIO_T4
tristate "Chelsio Communications T4/T5/T6 Ethernet support"
depends on PCI && (IPV6 || IPV6=n) && (TLS || TLS=n)
+ depends on PTP_1588_CLOCK_OPTIONAL
select FW_LOADER
select MDIO
select ZLIB_DEFLATE
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 512da98019c6..73c016166f06 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -748,7 +748,9 @@ static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
return 0;
}
-static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct adapter *adapter = dev->ml_priv;
@@ -759,7 +761,9 @@ static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
return 0;
}
-static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct adapter *adapter = dev->ml_priv;
@@ -924,7 +928,7 @@ static const struct net_device_ops cxgb_netdev_ops = {
.ndo_get_stats = t1_get_stats,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = t1_set_rxmode,
- .ndo_do_ioctl = t1_ioctl,
+ .ndo_eth_ioctl = t1_ioctl,
.ndo_change_mtu = t1_change_mtu,
.ndo_set_mac_address = t1_set_mac_addr,
.ndo_fix_features = t1_fix_features,
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 57f210c53afc..38e47703f9ab 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -1996,7 +1996,9 @@ static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
return 0;
}
-static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct port_info *pi = netdev_priv(dev);
struct adapter *adapter = pi->adapter;
@@ -2017,7 +2019,9 @@ static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
return 0;
}
-static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct port_info *pi = netdev_priv(dev);
struct adapter *adapter = pi->adapter;
@@ -2135,13 +2139,18 @@ static int in_range(int val, int lo, int hi)
return val < 0 || (val <= hi && val >= lo);
}
-static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
+static int cxgb_siocdevprivate(struct net_device *dev,
+ struct ifreq *ifreq,
+ void __user *useraddr,
+ int cmd)
{
struct port_info *pi = netdev_priv(dev);
struct adapter *adapter = pi->adapter;
- u32 cmd;
int ret;
+ if (cmd != SIOCCHIOCTL)
+ return -EOPNOTSUPP;
+
if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
return -EFAULT;
@@ -2546,8 +2555,6 @@ static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
fallthrough;
case SIOCGMIIPHY:
return mdio_mii_ioctl(&pi->phy.mdio, data, cmd);
- case SIOCCHIOCTL:
- return cxgb_extension_ioctl(dev, req->ifr_data);
default:
return -EOPNOTSUPP;
}
@@ -3181,7 +3188,8 @@ static const struct net_device_ops cxgb_netdev_ops = {
.ndo_get_stats = cxgb_get_stats,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = cxgb_set_rxmode,
- .ndo_do_ioctl = cxgb_ioctl,
+ .ndo_eth_ioctl = cxgb_ioctl,
+ .ndo_siocdevprivate = cxgb_siocdevprivate,
.ndo_change_mtu = cxgb_change_mtu,
.ndo_set_mac_address = cxgb_set_mac_addr,
.ndo_fix_features = cxgb_fix_features,
@@ -3231,15 +3239,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
goto out_disable_device;
}
- if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+ if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
pci_using_dac = 1;
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
- if (err) {
- dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
- "coherent allocations\n");
- goto out_release_regions;
- }
- } else if ((err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) != 0) {
+ } else if ((err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) != 0) {
dev_err(&pdev->dev, "no usable DMA configuration\n");
goto out_release_regions;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index cb5c79c43bc9..e21a2e691382 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -244,8 +244,8 @@ static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
frag_idx = d->fragidx;
if (frag_idx == 0 && skb_headlen(skb)) {
- pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
- skb_headlen(skb), PCI_DMA_TODEVICE);
+ dma_unmap_single(&pdev->dev, be64_to_cpu(sgp->addr[0]),
+ skb_headlen(skb), DMA_TO_DEVICE);
j = 1;
}
@@ -253,9 +253,9 @@ static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
nfrags = skb_shinfo(skb)->nr_frags;
while (frag_idx < nfrags && curflit < WR_FLITS) {
- pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
+ dma_unmap_page(&pdev->dev, be64_to_cpu(sgp->addr[j]),
skb_frag_size(&skb_shinfo(skb)->frags[frag_idx]),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
j ^= 1;
if (j == 0) {
sgp++;
@@ -355,15 +355,14 @@ static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
if (q->use_pages && d->pg_chunk.page) {
(*d->pg_chunk.p_cnt)--;
if (!*d->pg_chunk.p_cnt)
- pci_unmap_page(pdev,
- d->pg_chunk.mapping,
- q->alloc_size, PCI_DMA_FROMDEVICE);
+ dma_unmap_page(&pdev->dev, d->pg_chunk.mapping,
+ q->alloc_size, DMA_FROM_DEVICE);
put_page(d->pg_chunk.page);
d->pg_chunk.page = NULL;
} else {
- pci_unmap_single(pdev, dma_unmap_addr(d, dma_addr),
- q->buf_size, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&pdev->dev, dma_unmap_addr(d, dma_addr),
+ q->buf_size, DMA_FROM_DEVICE);
kfree_skb(d->skb);
d->skb = NULL;
}
@@ -414,8 +413,8 @@ static inline int add_one_rx_buf(void *va, unsigned int len,
{
dma_addr_t mapping;
- mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
- if (unlikely(pci_dma_mapping_error(pdev, mapping)))
+ mapping = dma_map_single(&pdev->dev, va, len, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&pdev->dev, mapping)))
return -ENOMEM;
dma_unmap_addr_set(sd, dma_addr, mapping);
@@ -453,9 +452,9 @@ static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q,
q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) -
SGE_PG_RSVD;
q->pg_chunk.offset = 0;
- mapping = pci_map_page(adapter->pdev, q->pg_chunk.page,
- 0, q->alloc_size, PCI_DMA_FROMDEVICE);
- if (unlikely(pci_dma_mapping_error(adapter->pdev, mapping))) {
+ mapping = dma_map_page(&adapter->pdev->dev, q->pg_chunk.page,
+ 0, q->alloc_size, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&adapter->pdev->dev, mapping))) {
__free_pages(q->pg_chunk.page, order);
q->pg_chunk.page = NULL;
return -EIO;
@@ -522,9 +521,9 @@ nomem: q->alloc_failed++;
dma_unmap_addr_set(sd, dma_addr, mapping);
add_one_rx_chunk(mapping, d, q->gen);
- pci_dma_sync_single_for_device(adap->pdev, mapping,
- q->buf_size - SGE_PG_RSVD,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_device(&adap->pdev->dev, mapping,
+ q->buf_size - SGE_PG_RSVD,
+ DMA_FROM_DEVICE);
} else {
void *buf_start;
@@ -793,13 +792,13 @@ static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
skb = alloc_skb(len, GFP_ATOMIC);
if (likely(skb != NULL)) {
__skb_put(skb, len);
- pci_dma_sync_single_for_cpu(adap->pdev,
- dma_unmap_addr(sd, dma_addr), len,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&adap->pdev->dev,
+ dma_unmap_addr(sd, dma_addr),
+ len, DMA_FROM_DEVICE);
memcpy(skb->data, sd->skb->data, len);
- pci_dma_sync_single_for_device(adap->pdev,
- dma_unmap_addr(sd, dma_addr), len,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_device(&adap->pdev->dev,
+ dma_unmap_addr(sd, dma_addr),
+ len, DMA_FROM_DEVICE);
} else if (!drop_thres)
goto use_orig_buf;
recycle:
@@ -813,8 +812,8 @@ recycle:
goto recycle;
use_orig_buf:
- pci_unmap_single(adap->pdev, dma_unmap_addr(sd, dma_addr),
- fl->buf_size, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&adap->pdev->dev, dma_unmap_addr(sd, dma_addr),
+ fl->buf_size, DMA_FROM_DEVICE);
skb = sd->skb;
skb_put(skb, len);
__refill_fl(adap, fl);
@@ -854,12 +853,11 @@ static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
newskb = alloc_skb(len, GFP_ATOMIC);
if (likely(newskb != NULL)) {
__skb_put(newskb, len);
- pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&adap->pdev->dev, dma_addr,
+ len, DMA_FROM_DEVICE);
memcpy(newskb->data, sd->pg_chunk.va, len);
- pci_dma_sync_single_for_device(adap->pdev, dma_addr,
- len,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_device(&adap->pdev->dev, dma_addr,
+ len, DMA_FROM_DEVICE);
} else if (!drop_thres)
return NULL;
recycle:
@@ -883,14 +881,12 @@ recycle:
goto recycle;
}
- pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&adap->pdev->dev, dma_addr, len,
+ DMA_FROM_DEVICE);
(*sd->pg_chunk.p_cnt)--;
if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
- pci_unmap_page(adap->pdev,
- sd->pg_chunk.mapping,
- fl->alloc_size,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_page(&adap->pdev->dev, sd->pg_chunk.mapping,
+ fl->alloc_size, DMA_FROM_DEVICE);
if (!skb) {
__skb_put(newskb, SGE_RX_PULL_LEN);
memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
@@ -968,9 +964,9 @@ static int map_skb(struct pci_dev *pdev, const struct sk_buff *skb,
const struct skb_shared_info *si;
if (skb_headlen(skb)) {
- *addr = pci_map_single(pdev, skb->data, skb_headlen(skb),
- PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(pdev, *addr))
+ *addr = dma_map_single(&pdev->dev, skb->data,
+ skb_headlen(skb), DMA_TO_DEVICE);
+ if (dma_mapping_error(&pdev->dev, *addr))
goto out_err;
addr++;
}
@@ -981,7 +977,7 @@ static int map_skb(struct pci_dev *pdev, const struct sk_buff *skb,
for (fp = si->frags; fp < end; fp++) {
*addr = skb_frag_dma_map(&pdev->dev, fp, 0, skb_frag_size(fp),
DMA_TO_DEVICE);
- if (pci_dma_mapping_error(pdev, *addr))
+ if (dma_mapping_error(&pdev->dev, *addr))
goto unwind;
addr++;
}
@@ -992,7 +988,8 @@ unwind:
dma_unmap_page(&pdev->dev, *--addr, skb_frag_size(fp),
DMA_TO_DEVICE);
- pci_unmap_single(pdev, addr[-1], skb_headlen(skb), PCI_DMA_TODEVICE);
+ dma_unmap_single(&pdev->dev, addr[-1], skb_headlen(skb),
+ DMA_TO_DEVICE);
out_err:
return -ENOMEM;
}
@@ -1592,13 +1589,14 @@ static void deferred_unmap_destructor(struct sk_buff *skb)
p = dui->addr;
if (skb_tail_pointer(skb) - skb_transport_header(skb))
- pci_unmap_single(dui->pdev, *p++, skb_tail_pointer(skb) -
- skb_transport_header(skb), PCI_DMA_TODEVICE);
+ dma_unmap_single(&dui->pdev->dev, *p++,
+ skb_tail_pointer(skb) - skb_transport_header(skb),
+ DMA_TO_DEVICE);
si = skb_shinfo(skb);
for (i = 0; i < si->nr_frags; i++)
- pci_unmap_page(dui->pdev, *p++, skb_frag_size(&si->frags[i]),
- PCI_DMA_TODEVICE);
+ dma_unmap_page(&dui->pdev->dev, *p++,
+ skb_frag_size(&si->frags[i]), DMA_TO_DEVICE);
}
static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
@@ -2153,17 +2151,14 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
fl->credits--;
- pci_dma_sync_single_for_cpu(adap->pdev,
- dma_unmap_addr(sd, dma_addr),
- fl->buf_size - SGE_PG_RSVD,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&adap->pdev->dev,
+ dma_unmap_addr(sd, dma_addr),
+ fl->buf_size - SGE_PG_RSVD, DMA_FROM_DEVICE);
(*sd->pg_chunk.p_cnt)--;
if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
- pci_unmap_page(adap->pdev,
- sd->pg_chunk.mapping,
- fl->alloc_size,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_page(&adap->pdev->dev, sd->pg_chunk.mapping,
+ fl->alloc_size, DMA_FROM_DEVICE);
if (!skb) {
put_page(sd->pg_chunk.page);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 83ed10ac8660..5903bdb78916 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -1147,7 +1147,9 @@ static int set_dbqtimer_tickval(struct net_device *dev,
}
static int set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *coalesce)
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
int ret;
@@ -1163,7 +1165,9 @@ static int set_coalesce(struct net_device *dev,
coalesce->tx_coalesce_usecs);
}
-static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
const struct port_info *pi = netdev_priv(dev);
const struct adapter *adap = pi->adapter;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 6260b3bebd2b..786ceae34488 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -1441,7 +1441,7 @@ static int cxgb4_set_hash_filter(struct net_device *dev,
} else if (iconf & USE_ENC_IDX_F) {
if (f->fs.val.encap_vld) {
struct port_info *pi = netdev_priv(f->dev);
- u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
+ static const u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
/* allocate MPS TCAM entry */
ret = t4_alloc_encap_mac_filt(adapter, pi->viid,
@@ -1688,7 +1688,7 @@ int __cxgb4_set_filter(struct net_device *dev, int ftid,
} else if (iconf & USE_ENC_IDX_F) {
if (f->fs.val.encap_vld) {
struct port_info *pi = netdev_priv(f->dev);
- u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
+ static const u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
/* allocate MPS TCAM entry */
ret = t4_alloc_encap_mac_filt(adapter, pi->viid,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index dbf9a0e6601d..0d9cda4ab303 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3872,7 +3872,7 @@ static const struct net_device_ops cxgb4_netdev_ops = {
.ndo_set_mac_address = cxgb_set_mac_addr,
.ndo_set_features = cxgb_set_features,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = cxgb_ioctl,
+ .ndo_eth_ioctl = cxgb_ioctl,
.ndo_change_mtu = cxgb_change_mtu,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = cxgb_netpoll,
@@ -4008,7 +4008,7 @@ static void adap_free_hma_mem(struct adapter *adapter)
if (adapter->hma.flags & HMA_DMA_MAPPED_FLAG) {
dma_unmap_sg(adapter->pdev_dev, adapter->hma.sgt->sgl,
- adapter->hma.sgt->nents, PCI_DMA_BIDIRECTIONAL);
+ adapter->hma.sgt->nents, DMA_BIDIRECTIONAL);
adapter->hma.flags &= ~HMA_DMA_MAPPED_FLAG;
}
@@ -5068,6 +5068,7 @@ static int adap_init0(struct adapter *adap, int vpd_skip)
ret = -ENOMEM;
goto bye;
}
+ bitmap_zero(adap->sge.blocked_fl, adap->sge.egr_sz);
#endif
params[0] = FW_PARAM_PFVF(CLIP_START);
@@ -6162,8 +6163,7 @@ static void print_port_info(const struct net_device *dev)
--bufp;
sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type));
- netdev_info(dev, "%s: Chelsio %s (%s) %s\n",
- dev->name, adap->params.vpd.id, adap->name, buf);
+ netdev_info(dev, "Chelsio %s %s\n", adap->params.vpd.id, buf);
}
/*
@@ -6687,16 +6687,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
return 0;
}
- if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+ if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
highdma = true;
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
- if (err) {
- dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
- "coherent allocations\n");
- goto out_free_adapter;
- }
} else {
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "no usable DMA configuration\n");
goto out_free_adapter;
@@ -6788,13 +6782,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
setup_memwin(adapter);
err = adap_init0(adapter, 0);
-#ifdef CONFIG_DEBUG_FS
- bitmap_zero(adapter->sge.blocked_fl, adapter->sge.egr_sz);
-#endif
- setup_memwin_rdma(adapter);
if (err)
goto out_unmap_bar;
+ setup_memwin_rdma(adapter);
+
/* configure SGE_STAT_CFG_A to read WC stats */
if (!is_t4(adapter->params.chip))
t4_write_reg(adapter, SGE_STAT_CFG_A, STATSOURCE_T5_V(7) |
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 6a099cb34b12..fa5b596ff23a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -443,7 +443,7 @@ static void free_rx_bufs(struct adapter *adap, struct sge_fl *q, int n)
if (is_buf_mapped(d))
dma_unmap_page(adap->pdev_dev, get_buf_addr(d),
get_buf_size(adap, d),
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
put_page(d->page);
d->page = NULL;
if (++q->cidx == q->size)
@@ -469,7 +469,7 @@ static void unmap_rx_buf(struct adapter *adap, struct sge_fl *q)
if (is_buf_mapped(d))
dma_unmap_page(adap->pdev_dev, get_buf_addr(d),
- get_buf_size(adap, d), PCI_DMA_FROMDEVICE);
+ get_buf_size(adap, d), DMA_FROM_DEVICE);
d->page = NULL;
if (++q->cidx == q->size)
q->cidx = 0;
@@ -566,7 +566,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
mapping = dma_map_page(adap->pdev_dev, pg, 0,
PAGE_SIZE << s->fl_pg_order,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
__free_pages(pg, s->fl_pg_order);
q->mapping_err++;
@@ -596,7 +596,7 @@ alloc_small_pages:
}
mapping = dma_map_page(adap->pdev_dev, pg, 0, PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
put_page(pg);
q->mapping_err++;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 2820a0bb971b..49b76fd47daa 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -1647,7 +1647,9 @@ static int cxgb4vf_set_ringparam(struct net_device *dev,
* interrupt holdoff timer to be read on all of the device's Queue Sets.
*/
static int cxgb4vf_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *coalesce)
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
const struct port_info *pi = netdev_priv(dev);
const struct adapter *adapter = pi->adapter;
@@ -1667,7 +1669,9 @@ static int cxgb4vf_get_coalesce(struct net_device *dev,
* the interrupt holdoff timer on any of the device's Queue Sets.
*/
static int cxgb4vf_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *coalesce)
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
const struct port_info *pi = netdev_priv(dev);
struct adapter *adapter = pi->adapter;
@@ -2837,7 +2841,7 @@ static const struct net_device_ops cxgb4vf_netdev_ops = {
.ndo_set_rx_mode = cxgb4vf_set_rxmode,
.ndo_set_mac_address = cxgb4vf_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = cxgb4vf_do_ioctl,
+ .ndo_eth_ioctl = cxgb4vf_do_ioctl,
.ndo_change_mtu = cxgb4vf_change_mtu,
.ndo_fix_features = cxgb4vf_fix_features,
.ndo_set_features = cxgb4vf_set_features,
@@ -2917,17 +2921,11 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
* Set up our DMA mask: try for 64-bit address masking first and
* fall back to 32-bit if we can't get 64 bits ...
*/
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (err == 0) {
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
- if (err) {
- dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
- " coherent allocations\n");
- goto err_release_regions;
- }
pci_using_dac = 1;
} else {
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
if (err != 0) {
dev_err(&pdev->dev, "no usable DMA configuration\n");
goto err_release_regions;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 7bc80eeb2c21..0295b2406646 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -478,7 +478,7 @@ static void free_rx_bufs(struct adapter *adapter, struct sge_fl *fl, int n)
if (is_buf_mapped(sdesc))
dma_unmap_page(adapter->pdev_dev, get_buf_addr(sdesc),
get_buf_size(adapter, sdesc),
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
put_page(sdesc->page);
sdesc->page = NULL;
if (++fl->cidx == fl->size)
@@ -507,7 +507,7 @@ static void unmap_rx_buf(struct adapter *adapter, struct sge_fl *fl)
if (is_buf_mapped(sdesc))
dma_unmap_page(adapter->pdev_dev, get_buf_addr(sdesc),
get_buf_size(adapter, sdesc),
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
sdesc->page = NULL;
if (++fl->cidx == fl->size)
fl->cidx = 0;
@@ -644,7 +644,7 @@ static unsigned int refill_fl(struct adapter *adapter, struct sge_fl *fl,
dma_addr = dma_map_page(adapter->pdev_dev, page, 0,
PAGE_SIZE << s->fl_pg_order,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(adapter->pdev_dev, dma_addr))) {
/*
* We've run out of DMA mapping space. Free up the
@@ -682,7 +682,7 @@ alloc_small_pages:
poison_buf(page, PAGE_SIZE);
dma_addr = dma_map_page(adapter->pdev_dev, page, 0, PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(adapter->pdev_dev, dma_addr))) {
put_page(page);
break;
diff --git a/drivers/net/ethernet/cirrus/Kconfig b/drivers/net/ethernet/cirrus/Kconfig
index d8af9e64dd1e..dac1764ba740 100644
--- a/drivers/net/ethernet/cirrus/Kconfig
+++ b/drivers/net/ethernet/cirrus/Kconfig
@@ -6,7 +6,7 @@
config NET_VENDOR_CIRRUS
bool "Cirrus devices"
default y
- depends on ISA || EISA || ARM || MAC
+ depends on ISA || EISA || ARM || MAC || COMPILE_TEST
help
If you have a network (Ethernet) card belonging to this class, say Y.
@@ -18,9 +18,16 @@ config NET_VENDOR_CIRRUS
if NET_VENDOR_CIRRUS
config CS89x0
- tristate "CS89x0 support"
- depends on ISA || EISA || ARM
+ tristate
+
+config CS89x0_ISA
+ tristate "CS89x0 ISA driver support"
+ depends on HAS_IOPORT_MAP
+ depends on ISA
depends on !PPC32
+ depends on CS89x0_PLATFORM=n
+ select NETDEV_LEGACY_INIT
+ select CS89x0
help
Support for CS89x0 chipset based Ethernet cards. If you have a
network (Ethernet) card of this type, say Y and read the file
@@ -30,15 +37,15 @@ config CS89x0
will be called cs89x0.
config CS89x0_PLATFORM
- bool "CS89x0 platform driver support" if HAS_IOPORT_MAP
- default !HAS_IOPORT_MAP
- depends on CS89x0
+ tristate "CS89x0 platform driver support"
+ depends on ARM || COMPILE_TEST
+ select CS89x0
help
- Say Y to compile the cs89x0 driver as a platform driver. This
- makes this driver suitable for use on certain evaluation boards
- such as the iMX21ADS.
+ Say Y to compile the cs89x0 platform driver. This makes this driver
+ suitable for use on certain evaluation boards such as the iMX21ADS.
- If you are unsure, say N.
+ To compile this driver as a module, choose M here. The module
+ will be called cs89x0.
config EP93XX_ETH
tristate "EP93xx Ethernet support"
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index 33ace3307059..d0c4c8b7a15a 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -104,7 +104,7 @@ static char version[] __initdata =
* them to system IRQ numbers. This mapping is card specific and is set to
* the configuration of the Cirrus Eval board for this chip.
*/
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
static unsigned int netcard_portlist[] __used __initdata = {
0x300, 0x320, 0x340, 0x360, 0x200, 0x220, 0x240,
0x260, 0x280, 0x2a0, 0x2c0, 0x2e0, 0
@@ -292,7 +292,7 @@ write_irq(struct net_device *dev, int chip_type, int irq)
int i;
if (chip_type == CS8900) {
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
/* Search the mapping table for the corresponding IRQ pin. */
for (i = 0; i != ARRAY_SIZE(cs8900_irq_map); i++)
if (cs8900_irq_map[i] == irq)
@@ -859,7 +859,7 @@ net_open(struct net_device *dev)
goto bad_out;
}
} else {
-#if !defined(CONFIG_CS89x0_PLATFORM)
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
if (((1 << dev->irq) & lp->irq_map) == 0) {
pr_err("%s: IRQ %d is not in our map of allowable IRQs, which is %x\n",
dev->name, dev->irq, lp->irq_map);
@@ -1523,7 +1523,7 @@ cs89x0_probe1(struct net_device *dev, void __iomem *ioaddr, int modular)
dev->irq = i;
} else {
i = lp->isa_config & INT_NO_MASK;
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
if (lp->chip_type == CS8900) {
/* Translate the IRQ using the IRQ mapping table. */
if (i >= ARRAY_SIZE(cs8900_irq_map))
@@ -1576,7 +1576,7 @@ out1:
return retval;
}
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
/*
* This function converts the I/O port address used by the cs89x0_probe() and
* init_module() functions to the I/O memory address used by the
@@ -1682,11 +1682,7 @@ out:
pr_warn("no cs8900 or cs8920 detected. Be sure to disable PnP with SETUP\n");
return ERR_PTR(err);
}
-#endif
-#endif
-
-#if defined(MODULE) && !defined(CONFIG_CS89x0_PLATFORM)
-
+#else
static struct net_device *dev_cs89x0;
/* Support the 'debug' module parm even if we're compiled for non-debug to
@@ -1757,9 +1753,9 @@ MODULE_LICENSE("GPL");
* (hw or software util)
*/
-int __init init_module(void)
+static int __init cs89x0_isa_init_module(void)
{
- struct net_device *dev = alloc_etherdev(sizeof(struct net_local));
+ struct net_device *dev;
struct net_local *lp;
int ret = 0;
@@ -1768,6 +1764,7 @@ int __init init_module(void)
#else
debug = 0;
#endif
+ dev = alloc_etherdev(sizeof(struct net_local));
if (!dev)
return -ENOMEM;
@@ -1826,9 +1823,9 @@ out:
free_netdev(dev);
return ret;
}
+module_init(cs89x0_isa_init_module);
-void __exit
-cleanup_module(void)
+static void __exit cs89x0_isa_cleanup_module(void)
{
struct net_local *lp = netdev_priv(dev_cs89x0);
@@ -1838,9 +1835,11 @@ cleanup_module(void)
release_region(dev_cs89x0->base_addr, NETCARD_IO_EXTENT);
free_netdev(dev_cs89x0);
}
-#endif /* MODULE && !CONFIG_CS89x0_PLATFORM */
+module_exit(cs89x0_isa_cleanup_module);
+#endif /* MODULE */
+#endif /* CONFIG_CS89x0_ISA */
-#ifdef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_PLATFORM)
static int __init cs89x0_platform_probe(struct platform_device *pdev)
{
struct net_device *dev = alloc_etherdev(sizeof(struct net_local));
diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c
index 9f5e5ec69991..072fac5f5d24 100644
--- a/drivers/net/ethernet/cirrus/ep93xx_eth.c
+++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c
@@ -733,7 +733,7 @@ static const struct net_device_ops ep93xx_netdev_ops = {
.ndo_open = ep93xx_open,
.ndo_stop = ep93xx_close,
.ndo_start_xmit = ep93xx_xmit,
- .ndo_do_ioctl = ep93xx_ioctl,
+ .ndo_eth_ioctl = ep93xx_ioctl,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
};
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index 1a9803f2073e..12ffc14fbecd 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -298,7 +298,9 @@ static void enic_set_msglevel(struct net_device *netdev, u32 value)
}
static int enic_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ecmd)
+ struct ethtool_coalesce *ecmd,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct enic *enic = netdev_priv(netdev);
struct enic_rx_coal *rxcoal = &enic->rx_coalesce_setting;
@@ -343,7 +345,9 @@ static int enic_coalesce_valid(struct enic *enic,
}
static int enic_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ecmd)
+ struct ethtool_coalesce *ecmd,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct enic *enic = netdev_priv(netdev);
u32 tx_coalesce_usecs;
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index c2ebb3388789..6e745ca4c433 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -2144,7 +2144,9 @@ static int gmac_set_ringparam(struct net_device *netdev,
}
static int gmac_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ecmd)
+ struct ethtool_coalesce *ecmd,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct gemini_ethernet_port *port = netdev_priv(netdev);
@@ -2156,7 +2158,9 @@ static int gmac_get_coalesce(struct net_device *netdev,
}
static int gmac_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ecmd)
+ struct ethtool_coalesce *ecmd,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct gemini_ethernet_port *port = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 2a8bf53c2f75..e842de6f6635 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1372,7 +1372,7 @@ static const struct net_device_ops dm9000_netdev_ops = {
.ndo_start_xmit = dm9000_start_xmit,
.ndo_tx_timeout = dm9000_timeout,
.ndo_set_rx_mode = dm9000_hash_table,
- .ndo_do_ioctl = dm9000_ioctl,
+ .ndo_eth_ioctl = dm9000_ioctl,
.ndo_set_features = dm9000_set_features,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index b125d7faefdf..36ab4cbf2ad0 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -443,6 +443,7 @@
=========================================================================
*/
+#include <linux/compat.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/string.h>
@@ -902,7 +903,8 @@ static int de4x5_close(struct net_device *dev);
static struct net_device_stats *de4x5_get_stats(struct net_device *dev);
static void de4x5_local_stats(struct net_device *dev, char *buf, int pkt_len);
static void set_multicast_list(struct net_device *dev);
-static int de4x5_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static int de4x5_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+ void __user *data, int cmd);
/*
** Private functions
@@ -1084,7 +1086,7 @@ static const struct net_device_ops de4x5_netdev_ops = {
.ndo_start_xmit = de4x5_queue_pkt,
.ndo_get_stats = de4x5_get_stats,
.ndo_set_rx_mode = set_multicast_list,
- .ndo_do_ioctl = de4x5_ioctl,
+ .ndo_siocdevprivate = de4x5_siocdevprivate,
.ndo_set_mac_address= eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
};
@@ -5357,7 +5359,7 @@ de4x5_dbg_rx(struct sk_buff *skb, int len)
** this function is only used for my testing.
*/
static int
-de4x5_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+de4x5_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd)
{
struct de4x5_private *lp = netdev_priv(dev);
struct de4x5_ioctl *ioc = (struct de4x5_ioctl *) &rq->ifr_ifru;
@@ -5371,6 +5373,9 @@ de4x5_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
} tmp;
u_long flags = 0;
+ if (cmd != SIOCDEVPRIVATE || in_compat_syscall())
+ return -EOPNOTSUPP;
+
switch(ioc->cmd) {
case DE4X5_GET_HWADDR: /* Get the hardware address */
ioc->len = ETH_ALEN;
diff --git a/drivers/net/ethernet/dec/tulip/media.c b/drivers/net/ethernet/dec/tulip/media.c
index 011604787b8e..55d6fc99f40b 100644
--- a/drivers/net/ethernet/dec/tulip/media.c
+++ b/drivers/net/ethernet/dec/tulip/media.c
@@ -362,7 +362,7 @@ void tulip_select_media(struct net_device *dev, int startup)
iowrite32(0x33, ioaddr + CSR12);
new_csr6 = 0x01860000;
/* Trigger autonegotiation. */
- iowrite32(startup ? 0x0201F868 : 0x0001F868, ioaddr + 0xB8);
+ iowrite32(0x0001F868, ioaddr + 0xB8);
} else {
iowrite32(0x32, ioaddr + CSR12);
new_csr6 = 0x00420000;
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index c1dcd6ca1457..fcedd733bacb 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -1271,7 +1271,7 @@ static const struct net_device_ops tulip_netdev_ops = {
.ndo_tx_timeout = tulip_tx_timeout,
.ndo_stop = tulip_close,
.ndo_get_stats = tulip_get_stats,
- .ndo_do_ioctl = private_ioctl,
+ .ndo_eth_ioctl = private_ioctl,
.ndo_set_rx_mode = set_rx_mode,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
index 1876f15dd827..85b99099c6b9 100644
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -341,7 +341,7 @@ static const struct net_device_ops netdev_ops = {
.ndo_start_xmit = start_tx,
.ndo_get_stats = get_stats,
.ndo_set_rx_mode = set_rx_mode,
- .ndo_do_ioctl = netdev_ioctl,
+ .ndo_eth_ioctl = netdev_ioctl,
.ndo_tx_timeout = tx_timeout,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index 734acb834c98..202ecb132053 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c
@@ -95,7 +95,7 @@ static const struct net_device_ops netdev_ops = {
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
.ndo_set_rx_mode = set_multicast,
- .ndo_do_ioctl = rio_ioctl,
+ .ndo_eth_ioctl = rio_ioctl,
.ndo_tx_timeout = rio_tx_timeout,
};
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index ee0ca712dd1c..c36d186dffed 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -479,7 +479,7 @@ static const struct net_device_ops netdev_ops = {
.ndo_start_xmit = start_tx,
.ndo_get_stats = get_stats,
.ndo_set_rx_mode = set_rx_mode,
- .ndo_do_ioctl = netdev_ioctl,
+ .ndo_eth_ioctl = netdev_ioctl,
.ndo_tx_timeout = tx_timeout,
.ndo_change_mtu = change_mtu,
.ndo_set_mac_address = sundance_set_mac_addr,
diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index 48c6eb142dcc..6c51cf991dad 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c
@@ -742,7 +742,7 @@ static const struct net_device_ops dnet_netdev_ops = {
.ndo_stop = dnet_close,
.ndo_get_stats = dnet_get_stats,
.ndo_start_xmit = dnet_start_xmit,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
};
diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c
index 7c992172933b..b2d4fb3feb74 100644
--- a/drivers/net/ethernet/ec_bhf.c
+++ b/drivers/net/ethernet/ec_bhf.c
@@ -488,15 +488,7 @@ static int ec_bhf_probe(struct pci_dev *dev, const struct pci_device_id *id)
pci_set_master(dev);
- err = pci_set_dma_mask(dev, DMA_BIT_MASK(32));
- if (err) {
- dev_err(&dev->dev,
- "Required dma mask not supported, failed to initialize device\n");
- err = -EIO;
- goto err_disable_dev;
- }
-
- err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(32));
+ err = dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32));
if (err) {
dev_err(&dev->dev,
"Required dma mask not supported, failed to initialize device\n");
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index 99cc1c46fb30..f9955308b93d 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -315,7 +315,9 @@ static int be_read_dump_data(struct be_adapter *adapter, u32 dump_len,
}
static int be_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *et)
+ struct ethtool_coalesce *et,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct be_adapter *adapter = netdev_priv(netdev);
struct be_aic_obj *aic = &adapter->aic_obj[0];
@@ -338,7 +340,9 @@ static int be_get_coalesce(struct net_device *netdev,
* eqd cmd is issued in the worker thread.
*/
static int be_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *et)
+ struct ethtool_coalesce *et,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct be_adapter *adapter = netdev_priv(netdev);
struct be_aic_obj *aic = &adapter->aic_obj[0];
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index e1b43b07755b..ed1ed48e7483 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -1009,7 +1009,7 @@ static const struct ethtool_ops ethoc_ethtool_ops = {
static const struct net_device_ops ethoc_netdev_ops = {
.ndo_open = ethoc_open,
.ndo_stop = ethoc_stop,
- .ndo_do_ioctl = ethoc_ioctl,
+ .ndo_eth_ioctl = ethoc_ioctl,
.ndo_set_mac_address = ethoc_set_mac_address,
.ndo_set_rx_mode = ethoc_set_multicast_list,
.ndo_change_mtu = ethoc_change_mtu,
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 11dbbfd38770..ff76e401a014 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1616,7 +1616,7 @@ static const struct net_device_ops ftgmac100_netdev_ops = {
.ndo_start_xmit = ftgmac100_hard_start_xmit,
.ndo_set_mac_address = ftgmac100_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = phy_do_ioctl,
+ .ndo_eth_ioctl = phy_do_ioctl,
.ndo_tx_timeout = ftgmac100_tx_timeout,
.ndo_set_rx_mode = ftgmac100_set_rx_mode,
.ndo_set_features = ftgmac100_set_features,
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 5a1a8f2ea63c..8a341e2d5833 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -1043,7 +1043,7 @@ static const struct net_device_ops ftmac100_netdev_ops = {
.ndo_start_xmit = ftmac100_hard_start_xmit,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = ftmac100_do_ioctl,
+ .ndo_eth_ioctl = ftmac100_do_ioctl,
};
/******************************************************************************
diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
index 0f141c14d72d..25c91b3c5fd3 100644
--- a/drivers/net/ethernet/fealnx.c
+++ b/drivers/net/ethernet/fealnx.c
@@ -463,7 +463,7 @@ static const struct net_device_ops netdev_ops = {
.ndo_start_xmit = start_tx,
.ndo_get_stats = get_stats,
.ndo_set_rx_mode = set_rx_mode,
- .ndo_do_ioctl = mii_ioctl,
+ .ndo_eth_ioctl = mii_ioctl,
.ndo_tx_timeout = fealnx_tx_timeout,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index 2d1abdd58fab..e04e1c5cb013 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -25,10 +25,10 @@ config FEC
depends on (M523x || M527x || M5272 || M528x || M520x || M532x || \
ARCH_MXC || SOC_IMX28 || COMPILE_TEST)
default ARCH_MXC || SOC_IMX28 if ARM
+ depends on PTP_1588_CLOCK_OPTIONAL
select CRC32
select PHYLIB
imply NET_SELFTESTS
- imply PTP_1588_CLOCK
help
Say Y here if you want to use the built-in 10/100 Fast ethernet
controller on some Motorola ColdFire and Freescale i.MX processors.
diff --git a/drivers/net/ethernet/freescale/dpaa/Kconfig b/drivers/net/ethernet/freescale/dpaa/Kconfig
index 626ec58a0afc..0e1439fd00bd 100644
--- a/drivers/net/ethernet/freescale/dpaa/Kconfig
+++ b/drivers/net/ethernet/freescale/dpaa/Kconfig
@@ -4,7 +4,6 @@ menuconfig FSL_DPAA_ETH
depends on FSL_DPAA && FSL_FMAN
select PHYLIB
select FIXED_PHY
- select FSL_FMAN_MAC
help
Data Path Acceleration Architecture Ethernet driver,
supporting the Freescale QorIQ chips.
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index e6826561cf11..685d2d8a3b36 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -3157,7 +3157,7 @@ static const struct net_device_ops dpaa_ops = {
.ndo_set_mac_address = dpaa_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = dpaa_set_rx_mode,
- .ndo_do_ioctl = dpaa_ioctl,
+ .ndo_eth_ioctl = dpaa_ioctl,
.ndo_setup_tc = dpaa_setup_tc,
.ndo_change_mtu = dpaa_change_mtu,
.ndo_bpf = dpaa_xdp,
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
index 1268996b7030..763d2c7b5fb1 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
@@ -513,7 +513,9 @@ static int dpaa_get_ts_info(struct net_device *net_dev,
}
static int dpaa_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *c)
+ struct ethtool_coalesce *c,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct qman_portal *portal;
u32 period;
@@ -530,7 +532,9 @@ static int dpaa_get_coalesce(struct net_device *dev,
}
static int dpaa_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *c)
+ struct ethtool_coalesce *c,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
const cpumask_t *cpus = qman_affine_cpus();
bool needs_revert[NR_CPUS] = {false};
diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile
index c2ef74052ef8..3d9842af7f10 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Makefile
+++ b/drivers/net/ethernet/freescale/dpaa2/Makefile
@@ -11,7 +11,7 @@ fsl-dpaa2-eth-objs := dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o dpa
fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o
fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o
fsl-dpaa2-ptp-objs := dpaa2-ptp.o dprtc.o
-fsl-dpaa2-switch-objs := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o
+fsl-dpaa2-switch-objs := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o dpaa2-mac.o dpmac.o
# Needed by the tracing framework
CFLAGS_dpaa2-eth.o := -I$(src)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c
index 833696245565..605a39f892b9 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c
@@ -68,7 +68,7 @@ dpaa2_eth_dl_trap_item_lookup(struct dpaa2_eth_priv *priv, u16 trap_id)
struct dpaa2_eth_trap_item *dpaa2_eth_dl_get_trap(struct dpaa2_eth_priv *priv,
struct dpaa2_fapr *fapr)
{
- struct dpaa2_faf_error_bit {
+ static const struct dpaa2_faf_error_bit {
int position;
enum devlink_trap_generic_id trap_id;
} faf_bits[] = {
@@ -196,7 +196,8 @@ int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv)
struct dpaa2_eth_devlink_priv *dl_priv;
int err;
- priv->devlink = devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv));
+ priv->devlink =
+ devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv), dev);
if (!priv->devlink) {
dev_err(dev, "devlink_alloc failed\n");
return -ENOMEM;
@@ -204,7 +205,7 @@ int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv)
dl_priv = devlink_priv(priv->devlink);
dl_priv->dpaa2_priv = priv;
- err = devlink_register(priv->devlink, dev);
+ err = devlink_register(priv->devlink);
if (err) {
dev_err(dev, "devlink_register() = %d\n", err);
goto devlink_free;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 973352393bd4..7065c71ed7b8 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -2594,7 +2594,7 @@ static const struct net_device_ops dpaa2_eth_ops = {
.ndo_get_stats64 = dpaa2_eth_get_stats,
.ndo_set_rx_mode = dpaa2_eth_set_rx_mode,
.ndo_set_features = dpaa2_eth_set_features,
- .ndo_do_ioctl = dpaa2_eth_ioctl,
+ .ndo_eth_ioctl = dpaa2_eth_ioctl,
.ndo_change_mtu = dpaa2_eth_change_mtu,
.ndo_bpf = dpaa2_eth_xdp,
.ndo_xdp_xmit = dpaa2_eth_xdp_xmit,
@@ -4138,7 +4138,7 @@ static int dpaa2_eth_connect_mac(struct dpaa2_eth_priv *priv)
int err;
dpni_dev = to_fsl_mc_device(priv->net_dev->dev.parent);
- dpmac_dev = fsl_mc_get_endpoint(dpni_dev);
+ dpmac_dev = fsl_mc_get_endpoint(dpni_dev, 0);
if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER)
return PTR_ERR(dpmac_dev);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
index ad5e374eeccf..2da5f881f630 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
@@ -72,12 +72,12 @@ static void dpaa2_eth_get_drvinfo(struct net_device *net_dev,
{
struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
- strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
+ strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
"%u.%u", priv->dpni_ver_major, priv->dpni_ver_minor);
- strlcpy(drvinfo->bus_info, dev_name(net_dev->dev.parent->parent),
+ strscpy(drvinfo->bus_info, dev_name(net_dev->dev.parent->parent),
sizeof(drvinfo->bus_info));
}
@@ -191,11 +191,11 @@ static void dpaa2_eth_get_strings(struct net_device *netdev, u32 stringset,
switch (stringset) {
case ETH_SS_STATS:
for (i = 0; i < DPAA2_ETH_NUM_STATS; i++) {
- strlcpy(p, dpaa2_ethtool_stats[i], ETH_GSTRING_LEN);
+ strscpy(p, dpaa2_ethtool_stats[i], ETH_GSTRING_LEN);
p += ETH_GSTRING_LEN;
}
for (i = 0; i < DPAA2_ETH_NUM_EXTRA_STATS; i++) {
- strlcpy(p, dpaa2_ethtool_extras[i], ETH_GSTRING_LEN);
+ strscpy(p, dpaa2_ethtool_extras[i], ETH_GSTRING_LEN);
p += ETH_GSTRING_LEN;
}
if (dpaa2_eth_has_mac(priv))
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c
index 70e04321c420..720c9230cab5 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c
@@ -15,18 +15,18 @@ static struct {
enum dpsw_counter id;
char name[ETH_GSTRING_LEN];
} dpaa2_switch_ethtool_counters[] = {
- {DPSW_CNT_ING_FRAME, "rx frames"},
- {DPSW_CNT_ING_BYTE, "rx bytes"},
- {DPSW_CNT_ING_FLTR_FRAME, "rx filtered frames"},
- {DPSW_CNT_ING_FRAME_DISCARD, "rx discarded frames"},
- {DPSW_CNT_ING_BCAST_FRAME, "rx b-cast frames"},
- {DPSW_CNT_ING_BCAST_BYTES, "rx b-cast bytes"},
- {DPSW_CNT_ING_MCAST_FRAME, "rx m-cast frames"},
- {DPSW_CNT_ING_MCAST_BYTE, "rx m-cast bytes"},
- {DPSW_CNT_EGR_FRAME, "tx frames"},
- {DPSW_CNT_EGR_BYTE, "tx bytes"},
- {DPSW_CNT_EGR_FRAME_DISCARD, "tx discarded frames"},
- {DPSW_CNT_ING_NO_BUFF_DISCARD, "rx discarded no buffer frames"},
+ {DPSW_CNT_ING_FRAME, "[hw] rx frames"},
+ {DPSW_CNT_ING_BYTE, "[hw] rx bytes"},
+ {DPSW_CNT_ING_FLTR_FRAME, "[hw] rx filtered frames"},
+ {DPSW_CNT_ING_FRAME_DISCARD, "[hw] rx discarded frames"},
+ {DPSW_CNT_ING_BCAST_FRAME, "[hw] rx bcast frames"},
+ {DPSW_CNT_ING_BCAST_BYTES, "[hw] rx bcast bytes"},
+ {DPSW_CNT_ING_MCAST_FRAME, "[hw] rx mcast frames"},
+ {DPSW_CNT_ING_MCAST_BYTE, "[hw] rx mcast bytes"},
+ {DPSW_CNT_EGR_FRAME, "[hw] tx frames"},
+ {DPSW_CNT_EGR_BYTE, "[hw] tx bytes"},
+ {DPSW_CNT_EGR_FRAME_DISCARD, "[hw] tx discarded frames"},
+ {DPSW_CNT_ING_NO_BUFF_DISCARD, "[hw] rx nobuffer discards"},
};
#define DPAA2_SWITCH_NUM_COUNTERS ARRAY_SIZE(dpaa2_switch_ethtool_counters)
@@ -62,6 +62,10 @@ dpaa2_switch_get_link_ksettings(struct net_device *netdev,
struct dpsw_link_state state = {0};
int err = 0;
+ if (dpaa2_switch_port_is_type_phy(port_priv))
+ return phylink_ethtool_ksettings_get(port_priv->mac->phylink,
+ link_ksettings);
+
err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
port_priv->ethsw_data->dpsw_handle,
port_priv->idx,
@@ -95,6 +99,10 @@ dpaa2_switch_set_link_ksettings(struct net_device *netdev,
bool if_running;
int err = 0, ret;
+ if (dpaa2_switch_port_is_type_phy(port_priv))
+ return phylink_ethtool_ksettings_set(port_priv->mac->phylink,
+ link_ksettings);
+
/* Interface needs to be down to change link settings */
if_running = netif_running(netdev);
if (if_running) {
@@ -134,11 +142,17 @@ dpaa2_switch_set_link_ksettings(struct net_device *netdev,
return err;
}
-static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset)
+static int
+dpaa2_switch_ethtool_get_sset_count(struct net_device *netdev, int sset)
{
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ int num_ss_stats = DPAA2_SWITCH_NUM_COUNTERS;
+
switch (sset) {
case ETH_SS_STATS:
- return DPAA2_SWITCH_NUM_COUNTERS;
+ if (port_priv->mac)
+ num_ss_stats += dpaa2_mac_get_sset_count();
+ return num_ss_stats;
default:
return -EOPNOTSUPP;
}
@@ -147,14 +161,19 @@ static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset)
static void dpaa2_switch_ethtool_get_strings(struct net_device *netdev,
u32 stringset, u8 *data)
{
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ u8 *p = data;
int i;
switch (stringset) {
case ETH_SS_STATS:
- for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++)
- memcpy(data + i * ETH_GSTRING_LEN,
- dpaa2_switch_ethtool_counters[i].name,
+ for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++) {
+ memcpy(p, dpaa2_switch_ethtool_counters[i].name,
ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
+ if (port_priv->mac)
+ dpaa2_mac_get_strings(p);
break;
}
}
@@ -176,6 +195,9 @@ static void dpaa2_switch_ethtool_get_stats(struct net_device *netdev,
netdev_err(netdev, "dpsw_if_get_counter[%s] err %d\n",
dpaa2_switch_ethtool_counters[i].name, err);
}
+
+ if (port_priv->mac)
+ dpaa2_mac_get_ethtool_stats(port_priv->mac, data + i);
}
const struct ethtool_ops dpaa2_switch_port_ethtool_ops = {
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
index f9451ec5f2cb..d6eefbbf163f 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
@@ -111,11 +111,11 @@ static int dpaa2_switch_flower_parse_key(struct flow_cls_offload *cls,
return 0;
}
-int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl,
+int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block,
struct dpaa2_switch_acl_entry *entry)
{
struct dpsw_acl_entry_cfg *acl_entry_cfg = &entry->cfg;
- struct ethsw_core *ethsw = acl_tbl->ethsw;
+ struct ethsw_core *ethsw = filter_block->ethsw;
struct dpsw_acl_key *acl_key = &entry->key;
struct device *dev = ethsw->dev;
u8 *cmd_buff;
@@ -136,7 +136,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl,
}
err = dpsw_acl_add_entry(ethsw->mc_io, 0, ethsw->dpsw_handle,
- acl_tbl->id, acl_entry_cfg);
+ filter_block->acl_id, acl_entry_cfg);
dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff),
DMA_TO_DEVICE);
@@ -150,12 +150,13 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl,
return 0;
}
-static int dpaa2_switch_acl_entry_remove(struct dpaa2_switch_acl_tbl *acl_tbl,
- struct dpaa2_switch_acl_entry *entry)
+static int
+dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block,
+ struct dpaa2_switch_acl_entry *entry)
{
struct dpsw_acl_entry_cfg *acl_entry_cfg = &entry->cfg;
struct dpsw_acl_key *acl_key = &entry->key;
- struct ethsw_core *ethsw = acl_tbl->ethsw;
+ struct ethsw_core *ethsw = block->ethsw;
struct device *dev = ethsw->dev;
u8 *cmd_buff;
int err;
@@ -175,7 +176,7 @@ static int dpaa2_switch_acl_entry_remove(struct dpaa2_switch_acl_tbl *acl_tbl,
}
err = dpsw_acl_remove_entry(ethsw->mc_io, 0, ethsw->dpsw_handle,
- acl_tbl->id, acl_entry_cfg);
+ block->acl_id, acl_entry_cfg);
dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff),
DMA_TO_DEVICE);
@@ -190,19 +191,19 @@ static int dpaa2_switch_acl_entry_remove(struct dpaa2_switch_acl_tbl *acl_tbl,
}
static int
-dpaa2_switch_acl_entry_add_to_list(struct dpaa2_switch_acl_tbl *acl_tbl,
+dpaa2_switch_acl_entry_add_to_list(struct dpaa2_switch_filter_block *block,
struct dpaa2_switch_acl_entry *entry)
{
struct dpaa2_switch_acl_entry *tmp;
struct list_head *pos, *n;
int index = 0;
- if (list_empty(&acl_tbl->entries)) {
- list_add(&entry->list, &acl_tbl->entries);
+ if (list_empty(&block->acl_entries)) {
+ list_add(&entry->list, &block->acl_entries);
return index;
}
- list_for_each_safe(pos, n, &acl_tbl->entries) {
+ list_for_each_safe(pos, n, &block->acl_entries) {
tmp = list_entry(pos, struct dpaa2_switch_acl_entry, list);
if (entry->prio < tmp->prio)
break;
@@ -213,13 +214,13 @@ dpaa2_switch_acl_entry_add_to_list(struct dpaa2_switch_acl_tbl *acl_tbl,
}
static struct dpaa2_switch_acl_entry*
-dpaa2_switch_acl_entry_get_by_index(struct dpaa2_switch_acl_tbl *acl_tbl,
+dpaa2_switch_acl_entry_get_by_index(struct dpaa2_switch_filter_block *block,
int index)
{
struct dpaa2_switch_acl_entry *tmp;
int i = 0;
- list_for_each_entry(tmp, &acl_tbl->entries, list) {
+ list_for_each_entry(tmp, &block->acl_entries, list) {
if (i == index)
return tmp;
++i;
@@ -229,37 +230,38 @@ dpaa2_switch_acl_entry_get_by_index(struct dpaa2_switch_acl_tbl *acl_tbl,
}
static int
-dpaa2_switch_acl_entry_set_precedence(struct dpaa2_switch_acl_tbl *acl_tbl,
+dpaa2_switch_acl_entry_set_precedence(struct dpaa2_switch_filter_block *block,
struct dpaa2_switch_acl_entry *entry,
int precedence)
{
int err;
- err = dpaa2_switch_acl_entry_remove(acl_tbl, entry);
+ err = dpaa2_switch_acl_entry_remove(block, entry);
if (err)
return err;
entry->cfg.precedence = precedence;
- return dpaa2_switch_acl_entry_add(acl_tbl, entry);
+ return dpaa2_switch_acl_entry_add(block, entry);
}
-static int dpaa2_switch_acl_tbl_add_entry(struct dpaa2_switch_acl_tbl *acl_tbl,
- struct dpaa2_switch_acl_entry *entry)
+static int
+dpaa2_switch_acl_tbl_add_entry(struct dpaa2_switch_filter_block *block,
+ struct dpaa2_switch_acl_entry *entry)
{
struct dpaa2_switch_acl_entry *tmp;
int index, i, precedence, err;
/* Add the new ACL entry to the linked list and get its index */
- index = dpaa2_switch_acl_entry_add_to_list(acl_tbl, entry);
+ index = dpaa2_switch_acl_entry_add_to_list(block, entry);
/* Move up in priority the ACL entries to make space
* for the new filter.
*/
- precedence = DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES - acl_tbl->num_rules - 1;
+ precedence = DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES - block->num_acl_rules - 1;
for (i = 0; i < index; i++) {
- tmp = dpaa2_switch_acl_entry_get_by_index(acl_tbl, i);
+ tmp = dpaa2_switch_acl_entry_get_by_index(block, i);
- err = dpaa2_switch_acl_entry_set_precedence(acl_tbl, tmp,
+ err = dpaa2_switch_acl_entry_set_precedence(block, tmp,
precedence);
if (err)
return err;
@@ -269,19 +271,19 @@ static int dpaa2_switch_acl_tbl_add_entry(struct dpaa2_switch_acl_tbl *acl_tbl,
/* Add the new entry to hardware */
entry->cfg.precedence = precedence;
- err = dpaa2_switch_acl_entry_add(acl_tbl, entry);
- acl_tbl->num_rules++;
+ err = dpaa2_switch_acl_entry_add(block, entry);
+ block->num_acl_rules++;
return err;
}
static struct dpaa2_switch_acl_entry *
-dpaa2_switch_acl_tbl_find_entry_by_cookie(struct dpaa2_switch_acl_tbl *acl_tbl,
+dpaa2_switch_acl_tbl_find_entry_by_cookie(struct dpaa2_switch_filter_block *block,
unsigned long cookie)
{
struct dpaa2_switch_acl_entry *tmp, *n;
- list_for_each_entry_safe(tmp, n, &acl_tbl->entries, list) {
+ list_for_each_entry_safe(tmp, n, &block->acl_entries, list) {
if (tmp->cookie == cookie)
return tmp;
}
@@ -289,13 +291,13 @@ dpaa2_switch_acl_tbl_find_entry_by_cookie(struct dpaa2_switch_acl_tbl *acl_tbl,
}
static int
-dpaa2_switch_acl_entry_get_index(struct dpaa2_switch_acl_tbl *acl_tbl,
+dpaa2_switch_acl_entry_get_index(struct dpaa2_switch_filter_block *block,
struct dpaa2_switch_acl_entry *entry)
{
struct dpaa2_switch_acl_entry *tmp, *n;
int index = 0;
- list_for_each_entry_safe(tmp, n, &acl_tbl->entries, list) {
+ list_for_each_entry_safe(tmp, n, &block->acl_entries, list) {
if (tmp->cookie == entry->cookie)
return index;
index++;
@@ -303,21 +305,34 @@ dpaa2_switch_acl_entry_get_index(struct dpaa2_switch_acl_tbl *acl_tbl,
return -ENOENT;
}
+static struct dpaa2_switch_mirror_entry *
+dpaa2_switch_mirror_find_entry_by_cookie(struct dpaa2_switch_filter_block *block,
+ unsigned long cookie)
+{
+ struct dpaa2_switch_mirror_entry *tmp, *n;
+
+ list_for_each_entry_safe(tmp, n, &block->mirror_entries, list) {
+ if (tmp->cookie == cookie)
+ return tmp;
+ }
+ return NULL;
+}
+
static int
-dpaa2_switch_acl_tbl_remove_entry(struct dpaa2_switch_acl_tbl *acl_tbl,
+dpaa2_switch_acl_tbl_remove_entry(struct dpaa2_switch_filter_block *block,
struct dpaa2_switch_acl_entry *entry)
{
struct dpaa2_switch_acl_entry *tmp;
int index, i, precedence, err;
- index = dpaa2_switch_acl_entry_get_index(acl_tbl, entry);
+ index = dpaa2_switch_acl_entry_get_index(block, entry);
/* Remove from hardware the ACL entry */
- err = dpaa2_switch_acl_entry_remove(acl_tbl, entry);
+ err = dpaa2_switch_acl_entry_remove(block, entry);
if (err)
return err;
- acl_tbl->num_rules--;
+ block->num_acl_rules--;
/* Remove it from the list also */
list_del(&entry->list);
@@ -325,8 +340,8 @@ dpaa2_switch_acl_tbl_remove_entry(struct dpaa2_switch_acl_tbl *acl_tbl,
/* Move down in priority the entries over the deleted one */
precedence = entry->cfg.precedence;
for (i = index - 1; i >= 0; i--) {
- tmp = dpaa2_switch_acl_entry_get_by_index(acl_tbl, i);
- err = dpaa2_switch_acl_entry_set_precedence(acl_tbl, tmp,
+ tmp = dpaa2_switch_acl_entry_get_by_index(block, i);
+ err = dpaa2_switch_acl_entry_set_precedence(block, tmp,
precedence);
if (err)
return err;
@@ -339,10 +354,10 @@ dpaa2_switch_acl_tbl_remove_entry(struct dpaa2_switch_acl_tbl *acl_tbl,
return 0;
}
-static int dpaa2_switch_tc_parse_action(struct ethsw_core *ethsw,
- struct flow_action_entry *cls_act,
- struct dpsw_acl_result *dpsw_act,
- struct netlink_ext_ack *extack)
+static int dpaa2_switch_tc_parse_action_acl(struct ethsw_core *ethsw,
+ struct flow_action_entry *cls_act,
+ struct dpsw_acl_result *dpsw_act,
+ struct netlink_ext_ack *extack)
{
int err = 0;
@@ -374,22 +389,110 @@ out:
return err;
}
-int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
+static int
+dpaa2_switch_block_add_mirror(struct dpaa2_switch_filter_block *block,
+ struct dpaa2_switch_mirror_entry *entry,
+ u16 to, struct netlink_ext_ack *extack)
+{
+ unsigned long block_ports = block->ports;
+ struct ethsw_core *ethsw = block->ethsw;
+ struct ethsw_port_priv *port_priv;
+ unsigned long ports_added = 0;
+ u16 vlan = entry->cfg.vlan_id;
+ bool mirror_port_enabled;
+ int err, port;
+
+ /* Setup the mirroring port */
+ mirror_port_enabled = (ethsw->mirror_port != ethsw->sw_attr.num_ifs);
+ if (!mirror_port_enabled) {
+ err = dpsw_set_reflection_if(ethsw->mc_io, 0,
+ ethsw->dpsw_handle, to);
+ if (err)
+ return err;
+ ethsw->mirror_port = to;
+ }
+
+ /* Setup the same egress mirroring configuration on all the switch
+ * ports that share the same filter block.
+ */
+ for_each_set_bit(port, &block_ports, ethsw->sw_attr.num_ifs) {
+ port_priv = ethsw->ports[port];
+
+ /* We cannot add a per VLAN mirroring rule if the VLAN in
+ * question is not installed on the switch port.
+ */
+ if (entry->cfg.filter == DPSW_REFLECTION_FILTER_INGRESS_VLAN &&
+ !(port_priv->vlans[vlan] & ETHSW_VLAN_MEMBER)) {
+ NL_SET_ERR_MSG(extack,
+ "VLAN must be installed on the switch port");
+ err = -EINVAL;
+ goto err_remove_filters;
+ }
+
+ err = dpsw_if_add_reflection(ethsw->mc_io, 0,
+ ethsw->dpsw_handle,
+ port, &entry->cfg);
+ if (err)
+ goto err_remove_filters;
+
+ ports_added |= BIT(port);
+ }
+
+ list_add(&entry->list, &block->mirror_entries);
+
+ return 0;
+
+err_remove_filters:
+ for_each_set_bit(port, &ports_added, ethsw->sw_attr.num_ifs) {
+ dpsw_if_remove_reflection(ethsw->mc_io, 0, ethsw->dpsw_handle,
+ port, &entry->cfg);
+ }
+
+ if (!mirror_port_enabled)
+ ethsw->mirror_port = ethsw->sw_attr.num_ifs;
+
+ return err;
+}
+
+static int
+dpaa2_switch_block_remove_mirror(struct dpaa2_switch_filter_block *block,
+ struct dpaa2_switch_mirror_entry *entry)
+{
+ struct dpsw_reflection_cfg *cfg = &entry->cfg;
+ unsigned long block_ports = block->ports;
+ struct ethsw_core *ethsw = block->ethsw;
+ int port;
+
+ /* Remove this mirroring configuration from all the ports belonging to
+ * the filter block.
+ */
+ for_each_set_bit(port, &block_ports, ethsw->sw_attr.num_ifs)
+ dpsw_if_remove_reflection(ethsw->mc_io, 0, ethsw->dpsw_handle,
+ port, cfg);
+
+ /* Also remove it from the list of mirror filters */
+ list_del(&entry->list);
+ kfree(entry);
+
+ /* If this was the last mirror filter, then unset the mirror port */
+ if (list_empty(&block->mirror_entries))
+ ethsw->mirror_port = ethsw->sw_attr.num_ifs;
+
+ return 0;
+}
+
+static int
+dpaa2_switch_cls_flower_replace_acl(struct dpaa2_switch_filter_block *block,
struct flow_cls_offload *cls)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
struct netlink_ext_ack *extack = cls->common.extack;
- struct ethsw_core *ethsw = acl_tbl->ethsw;
struct dpaa2_switch_acl_entry *acl_entry;
+ struct ethsw_core *ethsw = block->ethsw;
struct flow_action_entry *act;
int err;
- if (!flow_offload_has_one_action(&rule->action)) {
- NL_SET_ERR_MSG(extack, "Only singular actions are supported");
- return -EOPNOTSUPP;
- }
-
- if (dpaa2_switch_acl_tbl_is_full(acl_tbl)) {
+ if (dpaa2_switch_acl_tbl_is_full(block)) {
NL_SET_ERR_MSG(extack, "Maximum filter capacity reached");
return -ENOMEM;
}
@@ -403,15 +506,15 @@ int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
goto free_acl_entry;
act = &rule->action.entries[0];
- err = dpaa2_switch_tc_parse_action(ethsw, act,
- &acl_entry->cfg.result, extack);
+ err = dpaa2_switch_tc_parse_action_acl(ethsw, act,
+ &acl_entry->cfg.result, extack);
if (err)
goto free_acl_entry;
acl_entry->prio = cls->common.prio;
acl_entry->cookie = cls->cookie;
- err = dpaa2_switch_acl_tbl_add_entry(acl_tbl, acl_entry);
+ err = dpaa2_switch_acl_tbl_add_entry(block, acl_entry);
if (err)
goto free_acl_entry;
@@ -423,33 +526,171 @@ free_acl_entry:
return err;
}
-int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_acl_tbl *acl_tbl,
- struct flow_cls_offload *cls)
+static int dpaa2_switch_flower_parse_mirror_key(struct flow_cls_offload *cls,
+ u16 *vlan)
{
- struct dpaa2_switch_acl_entry *entry;
+ struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+ struct flow_dissector *dissector = rule->match.dissector;
+ struct netlink_ext_ack *extack = cls->common.extack;
+
+ if (dissector->used_keys &
+ ~(BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_VLAN))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Mirroring is supported only per VLAN");
+ return -EOPNOTSUPP;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+ struct flow_match_vlan match;
+
+ flow_rule_match_vlan(rule, &match);
- entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(acl_tbl, cls->cookie);
- if (!entry)
- return 0;
+ if (match.mask->vlan_priority != 0 ||
+ match.mask->vlan_dei != 0) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Only matching on VLAN ID supported");
+ return -EOPNOTSUPP;
+ }
- return dpaa2_switch_acl_tbl_remove_entry(acl_tbl, entry);
+ if (match.mask->vlan_id != 0xFFF) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Masked matching not supported");
+ return -EOPNOTSUPP;
+ }
+
+ *vlan = (u16)match.key->vlan_id;
+ }
+
+ return 0;
}
-int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
- struct tc_cls_matchall_offload *cls)
+static int
+dpaa2_switch_cls_flower_replace_mirror(struct dpaa2_switch_filter_block *block,
+ struct flow_cls_offload *cls)
{
struct netlink_ext_ack *extack = cls->common.extack;
- struct ethsw_core *ethsw = acl_tbl->ethsw;
- struct dpaa2_switch_acl_entry *acl_entry;
- struct flow_action_entry *act;
+ struct dpaa2_switch_mirror_entry *mirror_entry;
+ struct ethsw_core *ethsw = block->ethsw;
+ struct dpaa2_switch_mirror_entry *tmp;
+ struct flow_action_entry *cls_act;
+ struct list_head *pos, *n;
+ bool mirror_port_enabled;
+ u16 if_id, vlan;
int err;
- if (!flow_offload_has_one_action(&cls->rule->action)) {
+ mirror_port_enabled = (ethsw->mirror_port != ethsw->sw_attr.num_ifs);
+ cls_act = &cls->rule->action.entries[0];
+
+ /* Offload rules only when the destination is a DPAA2 switch port */
+ if (!dpaa2_switch_port_dev_check(cls_act->dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Destination not a DPAA2 switch port");
+ return -EOPNOTSUPP;
+ }
+ if_id = dpaa2_switch_get_index(ethsw, cls_act->dev);
+
+ /* We have a single mirror port but can configure egress mirroring on
+ * all the other switch ports. We need to allow mirroring rules only
+ * when the destination port is the same.
+ */
+ if (mirror_port_enabled && ethsw->mirror_port != if_id) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Multiple mirror ports not supported");
+ return -EBUSY;
+ }
+
+ /* Parse the key */
+ err = dpaa2_switch_flower_parse_mirror_key(cls, &vlan);
+ if (err)
+ return err;
+
+ /* Make sure that we don't already have a mirror rule with the same
+ * configuration.
+ */
+ list_for_each_safe(pos, n, &block->mirror_entries) {
+ tmp = list_entry(pos, struct dpaa2_switch_mirror_entry, list);
+
+ if (tmp->cfg.filter == DPSW_REFLECTION_FILTER_INGRESS_VLAN &&
+ tmp->cfg.vlan_id == vlan) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "VLAN mirror filter already installed");
+ return -EBUSY;
+ }
+ }
+
+ mirror_entry = kzalloc(sizeof(*mirror_entry), GFP_KERNEL);
+ if (!mirror_entry)
+ return -ENOMEM;
+
+ mirror_entry->cfg.filter = DPSW_REFLECTION_FILTER_INGRESS_VLAN;
+ mirror_entry->cfg.vlan_id = vlan;
+ mirror_entry->cookie = cls->cookie;
+
+ return dpaa2_switch_block_add_mirror(block, mirror_entry, if_id,
+ extack);
+}
+
+int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_filter_block *block,
+ struct flow_cls_offload *cls)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+ struct netlink_ext_ack *extack = cls->common.extack;
+ struct flow_action_entry *act;
+
+ if (!flow_offload_has_one_action(&rule->action)) {
NL_SET_ERR_MSG(extack, "Only singular actions are supported");
return -EOPNOTSUPP;
}
- if (dpaa2_switch_acl_tbl_is_full(acl_tbl)) {
+ act = &rule->action.entries[0];
+ switch (act->id) {
+ case FLOW_ACTION_REDIRECT:
+ case FLOW_ACTION_TRAP:
+ case FLOW_ACTION_DROP:
+ return dpaa2_switch_cls_flower_replace_acl(block, cls);
+ case FLOW_ACTION_MIRRED:
+ return dpaa2_switch_cls_flower_replace_mirror(block, cls);
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Action not supported");
+ return -EOPNOTSUPP;
+ }
+}
+
+int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_filter_block *block,
+ struct flow_cls_offload *cls)
+{
+ struct dpaa2_switch_mirror_entry *mirror_entry;
+ struct dpaa2_switch_acl_entry *acl_entry;
+
+ /* If this filter is a an ACL one, remove it */
+ acl_entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(block,
+ cls->cookie);
+ if (acl_entry)
+ return dpaa2_switch_acl_tbl_remove_entry(block, acl_entry);
+
+ /* If not, then it has to be a mirror */
+ mirror_entry = dpaa2_switch_mirror_find_entry_by_cookie(block,
+ cls->cookie);
+ if (mirror_entry)
+ return dpaa2_switch_block_remove_mirror(block,
+ mirror_entry);
+
+ return 0;
+}
+
+static int
+dpaa2_switch_cls_matchall_replace_acl(struct dpaa2_switch_filter_block *block,
+ struct tc_cls_matchall_offload *cls)
+{
+ struct netlink_ext_ack *extack = cls->common.extack;
+ struct ethsw_core *ethsw = block->ethsw;
+ struct dpaa2_switch_acl_entry *acl_entry;
+ struct flow_action_entry *act;
+ int err;
+
+ if (dpaa2_switch_acl_tbl_is_full(block)) {
NL_SET_ERR_MSG(extack, "Maximum filter capacity reached");
return -ENOMEM;
}
@@ -459,15 +700,15 @@ int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
return -ENOMEM;
act = &cls->rule->action.entries[0];
- err = dpaa2_switch_tc_parse_action(ethsw, act,
- &acl_entry->cfg.result, extack);
+ err = dpaa2_switch_tc_parse_action_acl(ethsw, act,
+ &acl_entry->cfg.result, extack);
if (err)
goto free_acl_entry;
acl_entry->prio = cls->common.prio;
acl_entry->cookie = cls->cookie;
- err = dpaa2_switch_acl_tbl_add_entry(acl_tbl, acl_entry);
+ err = dpaa2_switch_acl_tbl_add_entry(block, acl_entry);
if (err)
goto free_acl_entry;
@@ -479,14 +720,159 @@ free_acl_entry:
return err;
}
-int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_acl_tbl *acl_tbl,
+static int
+dpaa2_switch_cls_matchall_replace_mirror(struct dpaa2_switch_filter_block *block,
+ struct tc_cls_matchall_offload *cls)
+{
+ struct netlink_ext_ack *extack = cls->common.extack;
+ struct dpaa2_switch_mirror_entry *mirror_entry;
+ struct ethsw_core *ethsw = block->ethsw;
+ struct dpaa2_switch_mirror_entry *tmp;
+ struct flow_action_entry *cls_act;
+ struct list_head *pos, *n;
+ bool mirror_port_enabled;
+ u16 if_id;
+
+ mirror_port_enabled = (ethsw->mirror_port != ethsw->sw_attr.num_ifs);
+ cls_act = &cls->rule->action.entries[0];
+
+ /* Offload rules only when the destination is a DPAA2 switch port */
+ if (!dpaa2_switch_port_dev_check(cls_act->dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Destination not a DPAA2 switch port");
+ return -EOPNOTSUPP;
+ }
+ if_id = dpaa2_switch_get_index(ethsw, cls_act->dev);
+
+ /* We have a single mirror port but can configure egress mirroring on
+ * all the other switch ports. We need to allow mirroring rules only
+ * when the destination port is the same.
+ */
+ if (mirror_port_enabled && ethsw->mirror_port != if_id) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Multiple mirror ports not supported");
+ return -EBUSY;
+ }
+
+ /* Make sure that we don't already have a mirror rule with the same
+ * configuration. One matchall rule per block is the maximum.
+ */
+ list_for_each_safe(pos, n, &block->mirror_entries) {
+ tmp = list_entry(pos, struct dpaa2_switch_mirror_entry, list);
+
+ if (tmp->cfg.filter == DPSW_REFLECTION_FILTER_INGRESS_ALL) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matchall mirror filter already installed");
+ return -EBUSY;
+ }
+ }
+
+ mirror_entry = kzalloc(sizeof(*mirror_entry), GFP_KERNEL);
+ if (!mirror_entry)
+ return -ENOMEM;
+
+ mirror_entry->cfg.filter = DPSW_REFLECTION_FILTER_INGRESS_ALL;
+ mirror_entry->cookie = cls->cookie;
+
+ return dpaa2_switch_block_add_mirror(block, mirror_entry, if_id,
+ extack);
+}
+
+int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_filter_block *block,
+ struct tc_cls_matchall_offload *cls)
+{
+ struct netlink_ext_ack *extack = cls->common.extack;
+ struct flow_action_entry *act;
+
+ if (!flow_offload_has_one_action(&cls->rule->action)) {
+ NL_SET_ERR_MSG(extack, "Only singular actions are supported");
+ return -EOPNOTSUPP;
+ }
+
+ act = &cls->rule->action.entries[0];
+ switch (act->id) {
+ case FLOW_ACTION_REDIRECT:
+ case FLOW_ACTION_TRAP:
+ case FLOW_ACTION_DROP:
+ return dpaa2_switch_cls_matchall_replace_acl(block, cls);
+ case FLOW_ACTION_MIRRED:
+ return dpaa2_switch_cls_matchall_replace_mirror(block, cls);
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Action not supported");
+ return -EOPNOTSUPP;
+ }
+}
+
+int dpaa2_switch_block_offload_mirror(struct dpaa2_switch_filter_block *block,
+ struct ethsw_port_priv *port_priv)
+{
+ struct ethsw_core *ethsw = port_priv->ethsw_data;
+ struct dpaa2_switch_mirror_entry *tmp;
+ int err;
+
+ list_for_each_entry(tmp, &block->mirror_entries, list) {
+ err = dpsw_if_add_reflection(ethsw->mc_io, 0,
+ ethsw->dpsw_handle,
+ port_priv->idx, &tmp->cfg);
+ if (err)
+ goto unwind_add;
+ }
+
+ return 0;
+
+unwind_add:
+ list_for_each_entry(tmp, &block->mirror_entries, list)
+ dpsw_if_remove_reflection(ethsw->mc_io, 0,
+ ethsw->dpsw_handle,
+ port_priv->idx, &tmp->cfg);
+
+ return err;
+}
+
+int dpaa2_switch_block_unoffload_mirror(struct dpaa2_switch_filter_block *block,
+ struct ethsw_port_priv *port_priv)
+{
+ struct ethsw_core *ethsw = port_priv->ethsw_data;
+ struct dpaa2_switch_mirror_entry *tmp;
+ int err;
+
+ list_for_each_entry(tmp, &block->mirror_entries, list) {
+ err = dpsw_if_remove_reflection(ethsw->mc_io, 0,
+ ethsw->dpsw_handle,
+ port_priv->idx, &tmp->cfg);
+ if (err)
+ goto unwind_remove;
+ }
+
+ return 0;
+
+unwind_remove:
+ list_for_each_entry(tmp, &block->mirror_entries, list)
+ dpsw_if_add_reflection(ethsw->mc_io, 0, ethsw->dpsw_handle,
+ port_priv->idx, &tmp->cfg);
+
+ return err;
+}
+
+int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_filter_block *block,
struct tc_cls_matchall_offload *cls)
{
- struct dpaa2_switch_acl_entry *entry;
+ struct dpaa2_switch_mirror_entry *mirror_entry;
+ struct dpaa2_switch_acl_entry *acl_entry;
+
+ /* If this filter is a an ACL one, remove it */
+ acl_entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(block,
+ cls->cookie);
+ if (acl_entry)
+ return dpaa2_switch_acl_tbl_remove_entry(block,
+ acl_entry);
- entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(acl_tbl, cls->cookie);
- if (!entry)
- return 0;
+ /* If not, then it has to be a mirror */
+ mirror_entry = dpaa2_switch_mirror_find_entry_by_cookie(block,
+ cls->cookie);
+ if (mirror_entry)
+ return dpaa2_switch_block_remove_mirror(block,
+ mirror_entry);
- return dpaa2_switch_acl_tbl_remove_entry(acl_tbl, entry);
+ return 0;
}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 68b78642c045..175f15c46842 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -41,14 +41,14 @@ static struct dpaa2_switch_fdb *dpaa2_switch_fdb_get_unused(struct ethsw_core *e
return NULL;
}
-static struct dpaa2_switch_acl_tbl *
-dpaa2_switch_acl_tbl_get_unused(struct ethsw_core *ethsw)
+static struct dpaa2_switch_filter_block *
+dpaa2_switch_filter_block_get_unused(struct ethsw_core *ethsw)
{
int i;
for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
- if (!ethsw->acls[i].in_use)
- return &ethsw->acls[i];
+ if (!ethsw->filter_blocks[i].in_use)
+ return &ethsw->filter_blocks[i];
return NULL;
}
@@ -594,12 +594,18 @@ static int dpaa2_switch_port_change_mtu(struct net_device *netdev, int mtu)
return 0;
}
-static int dpaa2_switch_port_carrier_state_sync(struct net_device *netdev)
+static int dpaa2_switch_port_link_state_update(struct net_device *netdev)
{
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
struct dpsw_link_state state;
int err;
+ /* When we manage the MAC/PHY using phylink there is no need
+ * to manually update the netif_carrier.
+ */
+ if (dpaa2_switch_port_is_type_phy(port_priv))
+ return 0;
+
/* Interrupts are received even though no one issued an 'ifconfig up'
* on the switch interface. Ignore these link state update interrupts
*/
@@ -677,12 +683,14 @@ static int dpaa2_switch_port_open(struct net_device *netdev)
struct ethsw_core *ethsw = port_priv->ethsw_data;
int err;
- /* Explicitly set carrier off, otherwise
- * netif_carrier_ok() will return true and cause 'ip link show'
- * to report the LOWER_UP flag, even though the link
- * notification wasn't even received.
- */
- netif_carrier_off(netdev);
+ if (!dpaa2_switch_port_is_type_phy(port_priv)) {
+ /* Explicitly set carrier off, otherwise
+ * netif_carrier_ok() will return true and cause 'ip link show'
+ * to report the LOWER_UP flag, even though the link
+ * notification wasn't even received.
+ */
+ netif_carrier_off(netdev);
+ }
err = dpsw_if_enable(port_priv->ethsw_data->mc_io, 0,
port_priv->ethsw_data->dpsw_handle,
@@ -692,23 +700,12 @@ static int dpaa2_switch_port_open(struct net_device *netdev)
return err;
}
- /* sync carrier state */
- err = dpaa2_switch_port_carrier_state_sync(netdev);
- if (err) {
- netdev_err(netdev,
- "dpaa2_switch_port_carrier_state_sync err %d\n", err);
- goto err_carrier_sync;
- }
-
dpaa2_switch_enable_ctrl_if_napi(ethsw);
- return 0;
+ if (dpaa2_switch_port_is_type_phy(port_priv))
+ phylink_start(port_priv->mac->phylink);
-err_carrier_sync:
- dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
- port_priv->ethsw_data->dpsw_handle,
- port_priv->idx);
- return err;
+ return 0;
}
static int dpaa2_switch_port_stop(struct net_device *netdev)
@@ -717,6 +714,13 @@ static int dpaa2_switch_port_stop(struct net_device *netdev)
struct ethsw_core *ethsw = port_priv->ethsw_data;
int err;
+ if (dpaa2_switch_port_is_type_phy(port_priv)) {
+ phylink_stop(port_priv->mac->phylink);
+ } else {
+ netif_tx_stop_all_queues(netdev);
+ netif_carrier_off(netdev);
+ }
+
err = dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
port_priv->ethsw_data->dpsw_handle,
port_priv->idx);
@@ -1127,28 +1131,28 @@ err_exit:
}
static int
-dpaa2_switch_setup_tc_cls_flower(struct dpaa2_switch_acl_tbl *acl_tbl,
+dpaa2_switch_setup_tc_cls_flower(struct dpaa2_switch_filter_block *filter_block,
struct flow_cls_offload *f)
{
switch (f->command) {
case FLOW_CLS_REPLACE:
- return dpaa2_switch_cls_flower_replace(acl_tbl, f);
+ return dpaa2_switch_cls_flower_replace(filter_block, f);
case FLOW_CLS_DESTROY:
- return dpaa2_switch_cls_flower_destroy(acl_tbl, f);
+ return dpaa2_switch_cls_flower_destroy(filter_block, f);
default:
return -EOPNOTSUPP;
}
}
static int
-dpaa2_switch_setup_tc_cls_matchall(struct dpaa2_switch_acl_tbl *acl_tbl,
+dpaa2_switch_setup_tc_cls_matchall(struct dpaa2_switch_filter_block *block,
struct tc_cls_matchall_offload *f)
{
switch (f->command) {
case TC_CLSMATCHALL_REPLACE:
- return dpaa2_switch_cls_matchall_replace(acl_tbl, f);
+ return dpaa2_switch_cls_matchall_replace(block, f);
case TC_CLSMATCHALL_DESTROY:
- return dpaa2_switch_cls_matchall_destroy(acl_tbl, f);
+ return dpaa2_switch_cls_matchall_destroy(block, f);
default:
return -EOPNOTSUPP;
}
@@ -1170,106 +1174,122 @@ static int dpaa2_switch_port_setup_tc_block_cb_ig(enum tc_setup_type type,
static LIST_HEAD(dpaa2_switch_block_cb_list);
-static int dpaa2_switch_port_acl_tbl_bind(struct ethsw_port_priv *port_priv,
- struct dpaa2_switch_acl_tbl *acl_tbl)
+static int
+dpaa2_switch_port_acl_tbl_bind(struct ethsw_port_priv *port_priv,
+ struct dpaa2_switch_filter_block *block)
{
struct ethsw_core *ethsw = port_priv->ethsw_data;
struct net_device *netdev = port_priv->netdev;
struct dpsw_acl_if_cfg acl_if_cfg;
int err;
- if (port_priv->acl_tbl)
+ if (port_priv->filter_block)
return -EINVAL;
acl_if_cfg.if_id[0] = port_priv->idx;
acl_if_cfg.num_ifs = 1;
err = dpsw_acl_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
- acl_tbl->id, &acl_if_cfg);
+ block->acl_id, &acl_if_cfg);
if (err) {
netdev_err(netdev, "dpsw_acl_add_if err %d\n", err);
return err;
}
- acl_tbl->ports |= BIT(port_priv->idx);
- port_priv->acl_tbl = acl_tbl;
+ block->ports |= BIT(port_priv->idx);
+ port_priv->filter_block = block;
return 0;
}
static int
dpaa2_switch_port_acl_tbl_unbind(struct ethsw_port_priv *port_priv,
- struct dpaa2_switch_acl_tbl *acl_tbl)
+ struct dpaa2_switch_filter_block *block)
{
struct ethsw_core *ethsw = port_priv->ethsw_data;
struct net_device *netdev = port_priv->netdev;
struct dpsw_acl_if_cfg acl_if_cfg;
int err;
- if (port_priv->acl_tbl != acl_tbl)
+ if (port_priv->filter_block != block)
return -EINVAL;
acl_if_cfg.if_id[0] = port_priv->idx;
acl_if_cfg.num_ifs = 1;
err = dpsw_acl_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
- acl_tbl->id, &acl_if_cfg);
+ block->acl_id, &acl_if_cfg);
if (err) {
netdev_err(netdev, "dpsw_acl_add_if err %d\n", err);
return err;
}
- acl_tbl->ports &= ~BIT(port_priv->idx);
- port_priv->acl_tbl = NULL;
+ block->ports &= ~BIT(port_priv->idx);
+ port_priv->filter_block = NULL;
return 0;
}
static int dpaa2_switch_port_block_bind(struct ethsw_port_priv *port_priv,
- struct dpaa2_switch_acl_tbl *acl_tbl)
+ struct dpaa2_switch_filter_block *block)
{
- struct dpaa2_switch_acl_tbl *old_acl_tbl = port_priv->acl_tbl;
+ struct dpaa2_switch_filter_block *old_block = port_priv->filter_block;
int err;
+ /* Offload all the mirror entries found in the block on this new port
+ * joining it.
+ */
+ err = dpaa2_switch_block_offload_mirror(block, port_priv);
+ if (err)
+ return err;
+
/* If the port is already bound to this ACL table then do nothing. This
* can happen when this port is the first one to join a tc block
*/
- if (port_priv->acl_tbl == acl_tbl)
+ if (port_priv->filter_block == block)
return 0;
- err = dpaa2_switch_port_acl_tbl_unbind(port_priv, old_acl_tbl);
+ err = dpaa2_switch_port_acl_tbl_unbind(port_priv, old_block);
if (err)
return err;
/* Mark the previous ACL table as being unused if this was the last
* port that was using it.
*/
- if (old_acl_tbl->ports == 0)
- old_acl_tbl->in_use = false;
+ if (old_block->ports == 0)
+ old_block->in_use = false;
- return dpaa2_switch_port_acl_tbl_bind(port_priv, acl_tbl);
+ return dpaa2_switch_port_acl_tbl_bind(port_priv, block);
}
-static int dpaa2_switch_port_block_unbind(struct ethsw_port_priv *port_priv,
- struct dpaa2_switch_acl_tbl *acl_tbl)
+static int
+dpaa2_switch_port_block_unbind(struct ethsw_port_priv *port_priv,
+ struct dpaa2_switch_filter_block *block)
{
struct ethsw_core *ethsw = port_priv->ethsw_data;
- struct dpaa2_switch_acl_tbl *new_acl_tbl;
+ struct dpaa2_switch_filter_block *new_block;
int err;
+ /* Unoffload all the mirror entries found in the block from the
+ * port leaving it.
+ */
+ err = dpaa2_switch_block_unoffload_mirror(block, port_priv);
+ if (err)
+ return err;
+
/* We are the last port that leaves a block (an ACL table).
* We'll continue to use this table.
*/
- if (acl_tbl->ports == BIT(port_priv->idx))
+ if (block->ports == BIT(port_priv->idx))
return 0;
- err = dpaa2_switch_port_acl_tbl_unbind(port_priv, acl_tbl);
+ err = dpaa2_switch_port_acl_tbl_unbind(port_priv, block);
if (err)
return err;
- if (acl_tbl->ports == 0)
- acl_tbl->in_use = false;
+ if (block->ports == 0)
+ block->in_use = false;
- new_acl_tbl = dpaa2_switch_acl_tbl_get_unused(ethsw);
- new_acl_tbl->in_use = true;
- return dpaa2_switch_port_acl_tbl_bind(port_priv, new_acl_tbl);
+ new_block = dpaa2_switch_filter_block_get_unused(ethsw);
+ new_block->in_use = true;
+ return dpaa2_switch_port_acl_tbl_bind(port_priv, new_block);
}
static int dpaa2_switch_setup_tc_block_bind(struct net_device *netdev,
@@ -1277,7 +1297,7 @@ static int dpaa2_switch_setup_tc_block_bind(struct net_device *netdev,
{
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
struct ethsw_core *ethsw = port_priv->ethsw_data;
- struct dpaa2_switch_acl_tbl *acl_tbl;
+ struct dpaa2_switch_filter_block *filter_block;
struct flow_block_cb *block_cb;
bool register_block = false;
int err;
@@ -1287,24 +1307,24 @@ static int dpaa2_switch_setup_tc_block_bind(struct net_device *netdev,
ethsw);
if (!block_cb) {
- /* If the ACL table is not already known, then this port must
- * be the first to join it. In this case, we can just continue
- * to use our private table
+ /* If the filter block is not already known, then this port
+ * must be the first to join it. In this case, we can just
+ * continue to use our private table
*/
- acl_tbl = port_priv->acl_tbl;
+ filter_block = port_priv->filter_block;
block_cb = flow_block_cb_alloc(dpaa2_switch_port_setup_tc_block_cb_ig,
- ethsw, acl_tbl, NULL);
+ ethsw, filter_block, NULL);
if (IS_ERR(block_cb))
return PTR_ERR(block_cb);
register_block = true;
} else {
- acl_tbl = flow_block_cb_priv(block_cb);
+ filter_block = flow_block_cb_priv(block_cb);
}
flow_block_cb_incref(block_cb);
- err = dpaa2_switch_port_block_bind(port_priv, acl_tbl);
+ err = dpaa2_switch_port_block_bind(port_priv, filter_block);
if (err)
goto err_block_bind;
@@ -1327,7 +1347,7 @@ static void dpaa2_switch_setup_tc_block_unbind(struct net_device *netdev,
{
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
struct ethsw_core *ethsw = port_priv->ethsw_data;
- struct dpaa2_switch_acl_tbl *acl_tbl;
+ struct dpaa2_switch_filter_block *filter_block;
struct flow_block_cb *block_cb;
int err;
@@ -1337,8 +1357,8 @@ static void dpaa2_switch_setup_tc_block_unbind(struct net_device *netdev,
if (!block_cb)
return;
- acl_tbl = flow_block_cb_priv(block_cb);
- err = dpaa2_switch_port_block_unbind(port_priv, acl_tbl);
+ filter_block = flow_block_cb_priv(block_cb);
+ err = dpaa2_switch_port_block_unbind(port_priv, filter_block);
if (!err && !flow_block_cb_decref(block_cb)) {
flow_block_cb_remove(block_cb, f);
list_del(&block_cb->driver_list);
@@ -1403,41 +1423,105 @@ bool dpaa2_switch_port_dev_check(const struct net_device *netdev)
return netdev->netdev_ops == &dpaa2_switch_port_ops;
}
-static void dpaa2_switch_links_state_update(struct ethsw_core *ethsw)
+static int dpaa2_switch_port_connect_mac(struct ethsw_port_priv *port_priv)
{
- int i;
+ struct fsl_mc_device *dpsw_port_dev, *dpmac_dev;
+ struct dpaa2_mac *mac;
+ int err;
- for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
- dpaa2_switch_port_carrier_state_sync(ethsw->ports[i]->netdev);
- dpaa2_switch_port_set_mac_addr(ethsw->ports[i]);
+ dpsw_port_dev = to_fsl_mc_device(port_priv->netdev->dev.parent);
+ dpmac_dev = fsl_mc_get_endpoint(dpsw_port_dev, port_priv->idx);
+
+ if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER)
+ return PTR_ERR(dpmac_dev);
+
+ if (IS_ERR(dpmac_dev) || dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type)
+ return 0;
+
+ mac = kzalloc(sizeof(*mac), GFP_KERNEL);
+ if (!mac)
+ return -ENOMEM;
+
+ mac->mc_dev = dpmac_dev;
+ mac->mc_io = port_priv->ethsw_data->mc_io;
+ mac->net_dev = port_priv->netdev;
+
+ err = dpaa2_mac_open(mac);
+ if (err)
+ goto err_free_mac;
+ port_priv->mac = mac;
+
+ if (dpaa2_switch_port_is_type_phy(port_priv)) {
+ err = dpaa2_mac_connect(mac);
+ if (err) {
+ netdev_err(port_priv->netdev,
+ "Error connecting to the MAC endpoint %pe\n",
+ ERR_PTR(err));
+ goto err_close_mac;
+ }
}
+
+ return 0;
+
+err_close_mac:
+ dpaa2_mac_close(mac);
+ port_priv->mac = NULL;
+err_free_mac:
+ kfree(mac);
+ return err;
+}
+
+static void dpaa2_switch_port_disconnect_mac(struct ethsw_port_priv *port_priv)
+{
+ if (dpaa2_switch_port_is_type_phy(port_priv))
+ dpaa2_mac_disconnect(port_priv->mac);
+
+ if (!dpaa2_switch_port_has_mac(port_priv))
+ return;
+
+ dpaa2_mac_close(port_priv->mac);
+ kfree(port_priv->mac);
+ port_priv->mac = NULL;
}
static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg)
{
struct device *dev = (struct device *)arg;
struct ethsw_core *ethsw = dev_get_drvdata(dev);
-
- /* Mask the events and the if_id reserved bits to be cleared on read */
- u32 status = DPSW_IRQ_EVENT_LINK_CHANGED | 0xFFFF0000;
- int err;
+ struct ethsw_port_priv *port_priv;
+ u32 status = ~0;
+ int err, if_id;
err = dpsw_get_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
DPSW_IRQ_INDEX_IF, &status);
if (err) {
dev_err(dev, "Can't get irq status (err %d)\n", err);
-
- err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
- DPSW_IRQ_INDEX_IF, 0xFFFFFFFF);
- if (err)
- dev_err(dev, "Can't clear irq status (err %d)\n", err);
goto out;
}
- if (status & DPSW_IRQ_EVENT_LINK_CHANGED)
- dpaa2_switch_links_state_update(ethsw);
+ if_id = (status & 0xFFFF0000) >> 16;
+ port_priv = ethsw->ports[if_id];
+
+ if (status & DPSW_IRQ_EVENT_LINK_CHANGED) {
+ dpaa2_switch_port_link_state_update(port_priv->netdev);
+ dpaa2_switch_port_set_mac_addr(port_priv);
+ }
+
+ if (status & DPSW_IRQ_EVENT_ENDPOINT_CHANGED) {
+ rtnl_lock();
+ if (dpaa2_switch_port_has_mac(port_priv))
+ dpaa2_switch_port_disconnect_mac(port_priv);
+ else
+ dpaa2_switch_port_connect_mac(port_priv);
+ rtnl_unlock();
+ }
out:
+ err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
+ DPSW_IRQ_INDEX_IF, status);
+ if (err)
+ dev_err(dev, "Can't clear irq status (err %d)\n", err);
+
return IRQ_HANDLED;
}
@@ -1889,8 +1973,12 @@ static int dpaa2_switch_port_attr_set_event(struct net_device *netdev,
return notifier_from_errno(err);
}
+static struct notifier_block dpaa2_switch_port_switchdev_nb;
+static struct notifier_block dpaa2_switch_port_switchdev_blocking_nb;
+
static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
- struct net_device *upper_dev)
+ struct net_device *upper_dev,
+ struct netlink_ext_ack *extack)
{
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
struct ethsw_core *ethsw = port_priv->ethsw_data;
@@ -1906,8 +1994,8 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
other_port_priv = netdev_priv(other_dev);
if (other_port_priv->ethsw_data != port_priv->ethsw_data) {
- netdev_err(netdev,
- "Interface from a different DPSW is in the bridge already!\n");
+ NL_SET_ERR_MSG_MOD(extack,
+ "Interface from a different DPSW is in the bridge already");
return -EINVAL;
}
}
@@ -1929,8 +2017,16 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
if (err)
goto err_egress_flood;
+ err = switchdev_bridge_port_offload(netdev, netdev, NULL,
+ &dpaa2_switch_port_switchdev_nb,
+ &dpaa2_switch_port_switchdev_blocking_nb,
+ false, extack);
+ if (err)
+ goto err_switchdev_offload;
+
return 0;
+err_switchdev_offload:
err_egress_flood:
dpaa2_switch_port_set_fdb(port_priv, NULL);
return err;
@@ -1956,6 +2052,13 @@ static int dpaa2_switch_port_restore_rxvlan(struct net_device *vdev, int vid, vo
return dpaa2_switch_port_vlan_add(arg, vlan_proto, vid);
}
+static void dpaa2_switch_port_pre_bridge_leave(struct net_device *netdev)
+{
+ switchdev_bridge_port_unoffload(netdev, NULL,
+ &dpaa2_switch_port_switchdev_nb,
+ &dpaa2_switch_port_switchdev_blocking_nb);
+}
+
static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
{
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
@@ -2029,6 +2132,28 @@ static int dpaa2_switch_prevent_bridging_with_8021q_upper(struct net_device *net
return 0;
}
+static int
+dpaa2_switch_prechangeupper_sanity_checks(struct net_device *netdev,
+ struct net_device *upper_dev,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (!br_vlan_enabled(upper_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot join a VLAN-unaware bridge");
+ return -EOPNOTSUPP;
+ }
+
+ err = dpaa2_switch_prevent_bridging_with_8021q_upper(netdev);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot join a bridge while VLAN uppers are present");
+ return 0;
+ }
+
+ return 0;
+}
+
static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -2049,25 +2174,23 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
if (!netif_is_bridge_master(upper_dev))
break;
- if (!br_vlan_enabled(upper_dev)) {
- NL_SET_ERR_MSG_MOD(extack, "Cannot join a VLAN-unaware bridge");
- err = -EOPNOTSUPP;
+ err = dpaa2_switch_prechangeupper_sanity_checks(netdev,
+ upper_dev,
+ extack);
+ if (err)
goto out;
- }
- err = dpaa2_switch_prevent_bridging_with_8021q_upper(netdev);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack,
- "Cannot join a bridge while VLAN uppers are present");
- goto out;
- }
+ if (!info->linking)
+ dpaa2_switch_port_pre_bridge_leave(netdev);
break;
case NETDEV_CHANGEUPPER:
upper_dev = info->upper_dev;
if (netif_is_bridge_master(upper_dev)) {
if (info->linking)
- err = dpaa2_switch_port_bridge_join(netdev, upper_dev);
+ err = dpaa2_switch_port_bridge_join(netdev,
+ upper_dev,
+ extack);
else
err = dpaa2_switch_port_bridge_leave(netdev);
}
@@ -2802,6 +2925,18 @@ err_free_dpbp:
return err;
}
+static void dpaa2_switch_remove_port(struct ethsw_core *ethsw,
+ u16 port_idx)
+{
+ struct ethsw_port_priv *port_priv = ethsw->ports[port_idx];
+
+ rtnl_lock();
+ dpaa2_switch_port_disconnect_mac(port_priv);
+ rtnl_unlock();
+ free_netdev(port_priv->netdev);
+ ethsw->ports[port_idx] = NULL;
+}
+
static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
{
struct device *dev = &sw_dev->dev;
@@ -2952,7 +3087,7 @@ static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv,
acl_entry.cfg.precedence = 0;
acl_entry.cfg.result.action = DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF;
- return dpaa2_switch_acl_entry_add(port_priv->acl_tbl, &acl_entry);
+ return dpaa2_switch_acl_entry_add(port_priv->filter_block, &acl_entry);
}
static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
@@ -2965,7 +3100,7 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
};
struct net_device *netdev = port_priv->netdev;
struct ethsw_core *ethsw = port_priv->ethsw_data;
- struct dpaa2_switch_acl_tbl *acl_tbl;
+ struct dpaa2_switch_filter_block *filter_block;
struct dpsw_fdb_cfg fdb_cfg = {0};
struct dpsw_if_attr dpsw_if_attr;
struct dpaa2_switch_fdb *fdb;
@@ -3020,14 +3155,15 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
return err;
}
- acl_tbl = dpaa2_switch_acl_tbl_get_unused(ethsw);
- acl_tbl->ethsw = ethsw;
- acl_tbl->id = acl_tbl_id;
- acl_tbl->in_use = true;
- acl_tbl->num_rules = 0;
- INIT_LIST_HEAD(&acl_tbl->entries);
+ filter_block = dpaa2_switch_filter_block_get_unused(ethsw);
+ filter_block->ethsw = ethsw;
+ filter_block->acl_id = acl_tbl_id;
+ filter_block->in_use = true;
+ filter_block->num_acl_rules = 0;
+ INIT_LIST_HEAD(&filter_block->acl_entries);
+ INIT_LIST_HEAD(&filter_block->mirror_entries);
- err = dpaa2_switch_port_acl_tbl_bind(port_priv, acl_tbl);
+ err = dpaa2_switch_port_acl_tbl_bind(port_priv, filter_block);
if (err)
return err;
@@ -3038,26 +3174,30 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
return err;
}
-static void dpaa2_switch_takedown(struct fsl_mc_device *sw_dev)
+static void dpaa2_switch_ctrl_if_teardown(struct ethsw_core *ethsw)
+{
+ dpsw_ctrl_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
+ dpaa2_switch_free_dpio(ethsw);
+ dpaa2_switch_destroy_rings(ethsw);
+ dpaa2_switch_drain_bp(ethsw);
+ dpaa2_switch_free_dpbp(ethsw);
+}
+
+static void dpaa2_switch_teardown(struct fsl_mc_device *sw_dev)
{
struct device *dev = &sw_dev->dev;
struct ethsw_core *ethsw = dev_get_drvdata(dev);
int err;
+ dpaa2_switch_ctrl_if_teardown(ethsw);
+
+ destroy_workqueue(ethsw->workqueue);
+
err = dpsw_close(ethsw->mc_io, 0, ethsw->dpsw_handle);
if (err)
dev_warn(dev, "dpsw_close err %d\n", err);
}
-static void dpaa2_switch_ctrl_if_teardown(struct ethsw_core *ethsw)
-{
- dpsw_ctrl_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
- dpaa2_switch_free_dpio(ethsw);
- dpaa2_switch_destroy_rings(ethsw);
- dpaa2_switch_drain_bp(ethsw);
- dpaa2_switch_free_dpbp(ethsw);
-}
-
static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
{
struct ethsw_port_priv *port_priv;
@@ -3068,8 +3208,6 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
dev = &sw_dev->dev;
ethsw = dev_get_drvdata(dev);
- dpaa2_switch_ctrl_if_teardown(ethsw);
-
dpaa2_switch_teardown_irqs(sw_dev);
dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
@@ -3077,16 +3215,14 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
port_priv = ethsw->ports[i];
unregister_netdev(port_priv->netdev);
- free_netdev(port_priv->netdev);
+ dpaa2_switch_remove_port(ethsw, i);
}
kfree(ethsw->fdbs);
- kfree(ethsw->acls);
+ kfree(ethsw->filter_blocks);
kfree(ethsw->ports);
- dpaa2_switch_takedown(sw_dev);
-
- destroy_workqueue(ethsw->workqueue);
+ dpaa2_switch_teardown(sw_dev);
fsl_mc_portal_free(ethsw->mc_io);
@@ -3156,6 +3292,10 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
goto err_port_probe;
port_priv->learn_ena = false;
+ err = dpaa2_switch_port_connect_mac(port_priv);
+ if (err)
+ goto err_port_probe;
+
return 0;
err_port_probe:
@@ -3199,7 +3339,7 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
GFP_KERNEL);
if (!(ethsw->ports)) {
err = -ENOMEM;
- goto err_takedown;
+ goto err_teardown;
}
ethsw->fdbs = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->fdbs),
@@ -3209,9 +3349,10 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
goto err_free_ports;
}
- ethsw->acls = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->acls),
- GFP_KERNEL);
- if (!ethsw->acls) {
+ ethsw->filter_blocks = kcalloc(ethsw->sw_attr.num_ifs,
+ sizeof(*ethsw->filter_blocks),
+ GFP_KERNEL);
+ if (!ethsw->filter_blocks) {
err = -ENOMEM;
goto err_free_fdbs;
}
@@ -3231,17 +3372,16 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
&ethsw->fq[i].napi, dpaa2_switch_poll,
NAPI_POLL_WEIGHT);
- err = dpsw_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
- if (err) {
- dev_err(ethsw->dev, "dpsw_enable err %d\n", err);
- goto err_free_netdev;
- }
-
/* Setup IRQs */
err = dpaa2_switch_setup_irqs(sw_dev);
if (err)
goto err_stop;
+ /* By convention, if the mirror port is equal to the number of switch
+ * interfaces, then mirroring of any kind is disabled.
+ */
+ ethsw->mirror_port = ethsw->sw_attr.num_ifs;
+
/* Register the netdev only when the entire setup is done and the
* switch port interfaces are ready to receive traffic
*/
@@ -3263,15 +3403,15 @@ err_stop:
dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
err_free_netdev:
for (i--; i >= 0; i--)
- free_netdev(ethsw->ports[i]->netdev);
- kfree(ethsw->acls);
+ dpaa2_switch_remove_port(ethsw, i);
+ kfree(ethsw->filter_blocks);
err_free_fdbs:
kfree(ethsw->fdbs);
err_free_ports:
kfree(ethsw->ports);
-err_takedown:
- dpaa2_switch_takedown(sw_dev);
+err_teardown:
+ dpaa2_switch_teardown(sw_dev);
err_free_cmdport:
fsl_mc_portal_free(ethsw->mc_io);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
index bdef71f234cb..0002dca4d417 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -21,6 +21,7 @@
#include <net/pkt_cls.h>
#include <soc/fsl/dpaa2-io.h>
+#include "dpaa2-mac.h"
#include "dpsw.h"
/* Number of IRQs supported */
@@ -113,20 +114,29 @@ struct dpaa2_switch_acl_entry {
struct dpsw_acl_key key;
};
-struct dpaa2_switch_acl_tbl {
- struct list_head entries;
+struct dpaa2_switch_mirror_entry {
+ struct list_head list;
+ struct dpsw_reflection_cfg cfg;
+ unsigned long cookie;
+ u16 if_id;
+};
+
+struct dpaa2_switch_filter_block {
struct ethsw_core *ethsw;
u64 ports;
-
- u16 id;
- u8 num_rules;
bool in_use;
+
+ struct list_head acl_entries;
+ u16 acl_id;
+ u8 num_acl_rules;
+
+ struct list_head mirror_entries;
};
static inline bool
-dpaa2_switch_acl_tbl_is_full(struct dpaa2_switch_acl_tbl *acl_tbl)
+dpaa2_switch_acl_tbl_is_full(struct dpaa2_switch_filter_block *filter_block)
{
- if ((acl_tbl->num_rules + DPAA2_ETHSW_PORT_DEFAULT_TRAPS) >=
+ if ((filter_block->num_acl_rules + DPAA2_ETHSW_PORT_DEFAULT_TRAPS) >=
DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES)
return true;
return false;
@@ -149,7 +159,8 @@ struct ethsw_port_priv {
bool ucast_flood;
bool learn_ena;
- struct dpaa2_switch_acl_tbl *acl_tbl;
+ struct dpaa2_switch_filter_block *filter_block;
+ struct dpaa2_mac *mac;
};
/* Switch data */
@@ -175,7 +186,8 @@ struct ethsw_core {
int napi_users;
struct dpaa2_switch_fdb *fdbs;
- struct dpaa2_switch_acl_tbl *acls;
+ struct dpaa2_switch_filter_block *filter_blocks;
+ u16 mirror_port;
};
static inline int dpaa2_switch_get_index(struct ethsw_core *ethsw,
@@ -215,6 +227,22 @@ static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw)
return true;
}
+static inline bool
+dpaa2_switch_port_is_type_phy(struct ethsw_port_priv *port_priv)
+{
+ if (port_priv->mac &&
+ (port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_PHY ||
+ port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_BACKPLANE))
+ return true;
+
+ return false;
+}
+
+static inline bool dpaa2_switch_port_has_mac(struct ethsw_port_priv *port_priv)
+{
+ return port_priv->mac ? true : false;
+}
+
bool dpaa2_switch_port_dev_check(const struct net_device *netdev);
int dpaa2_switch_port_vlans_add(struct net_device *netdev,
@@ -229,18 +257,24 @@ typedef int dpaa2_switch_fdb_cb_t(struct ethsw_port_priv *port_priv,
/* TC offload */
-int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
+int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_filter_block *block,
struct flow_cls_offload *cls);
-int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_acl_tbl *acl_tbl,
+int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_filter_block *block,
struct flow_cls_offload *cls);
-int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
+int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_filter_block *block,
struct tc_cls_matchall_offload *cls);
-int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_acl_tbl *acl_tbl,
+int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_filter_block *block,
struct tc_cls_matchall_offload *cls);
-int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl,
+int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *block,
struct dpaa2_switch_acl_entry *entry);
+
+int dpaa2_switch_block_offload_mirror(struct dpaa2_switch_filter_block *block,
+ struct ethsw_port_priv *port_priv);
+
+int dpaa2_switch_block_unoffload_mirror(struct dpaa2_switch_filter_block *block,
+ struct ethsw_port_priv *port_priv);
#endif /* __ETHSW_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
index cb13e740f72b..397d55f2bd99 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
@@ -39,11 +39,16 @@
#define DPSW_CMDID_GET_IRQ_STATUS DPSW_CMD_ID(0x016)
#define DPSW_CMDID_CLEAR_IRQ_STATUS DPSW_CMD_ID(0x017)
+#define DPSW_CMDID_SET_REFLECTION_IF DPSW_CMD_ID(0x022)
+
#define DPSW_CMDID_IF_SET_TCI DPSW_CMD_ID(0x030)
#define DPSW_CMDID_IF_SET_STP DPSW_CMD_ID(0x031)
#define DPSW_CMDID_IF_GET_COUNTER DPSW_CMD_V2(0x034)
+#define DPSW_CMDID_IF_ADD_REFLECTION DPSW_CMD_ID(0x037)
+#define DPSW_CMDID_IF_REMOVE_REFLECTION DPSW_CMD_ID(0x038)
+
#define DPSW_CMDID_IF_ENABLE DPSW_CMD_ID(0x03D)
#define DPSW_CMDID_IF_DISABLE DPSW_CMD_ID(0x03E)
@@ -533,5 +538,19 @@ struct dpsw_cmd_acl_entry {
__le64 pad2[4];
__le64 key_iova;
};
+
+struct dpsw_cmd_set_reflection_if {
+ __le16 if_id;
+};
+
+#define DPSW_FILTER_SHIFT 0
+#define DPSW_FILTER_SIZE 2
+
+struct dpsw_cmd_if_reflection {
+ __le16 if_id;
+ __le16 vlan_id;
+ /* only 2 bits from the LSB */
+ u8 filter;
+};
#pragma pack(pop)
#endif /* __FSL_DPSW_CMD_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
index 6352d6d1ecba..ab921d75deb2 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
@@ -1579,3 +1579,83 @@ int dpsw_acl_remove_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
/* send command to mc*/
return mc_send_command(mc_io, &cmd);
}
+
+/**
+ * dpsw_set_reflection_if() - Set target interface for traffic mirrored
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPSW object
+ * @if_id: Interface Id
+ *
+ * Only one mirroring destination is allowed per switch
+ *
+ * Return: Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_set_reflection_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ u16 if_id)
+{
+ struct dpsw_cmd_set_reflection_if *cmd_params;
+ struct fsl_mc_command cmd = { 0 };
+
+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_REFLECTION_IF,
+ cmd_flags,
+ token);
+ cmd_params = (struct dpsw_cmd_set_reflection_if *)cmd.params;
+ cmd_params->if_id = cpu_to_le16(if_id);
+
+ return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_if_add_reflection() - Setup mirroring rule
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPSW object
+ * @if_id: Interface Identifier
+ * @cfg: Reflection configuration
+ *
+ * Return: Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_add_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ u16 if_id, const struct dpsw_reflection_cfg *cfg)
+{
+ struct dpsw_cmd_if_reflection *cmd_params;
+ struct fsl_mc_command cmd = { 0 };
+
+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_ADD_REFLECTION,
+ cmd_flags,
+ token);
+ cmd_params = (struct dpsw_cmd_if_reflection *)cmd.params;
+ cmd_params->if_id = cpu_to_le16(if_id);
+ cmd_params->vlan_id = cpu_to_le16(cfg->vlan_id);
+ dpsw_set_field(cmd_params->filter, FILTER, cfg->filter);
+
+ return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_if_remove_reflection() - Remove mirroring rule
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPSW object
+ * @if_id: Interface Identifier
+ * @cfg: Reflection configuration
+ *
+ * Return: Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_remove_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ u16 if_id, const struct dpsw_reflection_cfg *cfg)
+{
+ struct dpsw_cmd_if_reflection *cmd_params;
+ struct fsl_mc_command cmd = { 0 };
+
+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_REMOVE_REFLECTION,
+ cmd_flags,
+ token);
+ cmd_params = (struct dpsw_cmd_if_reflection *)cmd.params;
+ cmd_params->if_id = cpu_to_le16(if_id);
+ cmd_params->vlan_id = cpu_to_le16(cfg->vlan_id);
+ dpsw_set_field(cmd_params->filter, FILTER, cfg->filter);
+
+ return mc_send_command(mc_io, &cmd);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
index 5ef221a25b02..b90bd363f47a 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
@@ -99,6 +99,11 @@ int dpsw_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
#define DPSW_IRQ_EVENT_LINK_CHANGED 0x0001
/**
+ * DPSW_IRQ_EVENT_ENDPOINT_CHANGED - Indicates a change in endpoint
+ */
+#define DPSW_IRQ_EVENT_ENDPOINT_CHANGED 0x0002
+
+/**
* struct dpsw_irq_cfg - IRQ configuration
* @addr: Address that must be written to signal a message-based interrupt
* @val: Value to write into irq_addr address
@@ -752,4 +757,35 @@ int dpsw_acl_add_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
int dpsw_acl_remove_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
u16 acl_id, const struct dpsw_acl_entry_cfg *cfg);
+
+/**
+ * enum dpsw_reflection_filter - Filter type for frames to be reflected
+ * @DPSW_REFLECTION_FILTER_INGRESS_ALL: Reflect all frames
+ * @DPSW_REFLECTION_FILTER_INGRESS_VLAN: Reflect only frames that belong to
+ * the particular VLAN defined by vid parameter
+ *
+ */
+enum dpsw_reflection_filter {
+ DPSW_REFLECTION_FILTER_INGRESS_ALL = 0,
+ DPSW_REFLECTION_FILTER_INGRESS_VLAN = 1
+};
+
+/**
+ * struct dpsw_reflection_cfg - Structure representing the mirroring config
+ * @filter: Filter type for frames to be mirrored
+ * @vlan_id: VLAN ID to mirror; valid only when the type is DPSW_INGRESS_VLAN
+ */
+struct dpsw_reflection_cfg {
+ enum dpsw_reflection_filter filter;
+ u16 vlan_id;
+};
+
+int dpsw_set_reflection_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ u16 if_id);
+
+int dpsw_if_add_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ u16 if_id, const struct dpsw_reflection_cfg *cfg);
+
+int dpsw_if_remove_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ u16 if_id, const struct dpsw_reflection_cfg *cfg);
#endif /* __FSL_DPSW_H */
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index ebccaf02411c..9690e36e9e85 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -585,7 +585,9 @@ static void enetc_get_ringparam(struct net_device *ndev,
}
static int enetc_get_coalesce(struct net_device *ndev,
- struct ethtool_coalesce *ic)
+ struct ethtool_coalesce *ic,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct enetc_ndev_priv *priv = netdev_priv(ndev);
struct enetc_int_vector *v = priv->int_vector[0];
@@ -602,7 +604,9 @@ static int enetc_get_coalesce(struct net_device *ndev,
}
static int enetc_set_coalesce(struct net_device *ndev,
- struct ethtool_coalesce *ic)
+ struct ethtool_coalesce *ic,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct enetc_ndev_priv *priv = netdev_priv(ndev);
u32 rx_ictt, tx_ictt;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index c84f6c226743..60d94e0a07d6 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -735,7 +735,7 @@ static const struct net_device_ops enetc_ndev_ops = {
.ndo_set_vf_vlan = enetc_pf_set_vf_vlan,
.ndo_set_vf_spoofchk = enetc_pf_set_vf_spoofchk,
.ndo_set_features = enetc_pf_set_features,
- .ndo_do_ioctl = enetc_ioctl,
+ .ndo_eth_ioctl = enetc_ioctl,
.ndo_setup_tc = enetc_setup_tc,
.ndo_bpf = enetc_setup_bpf,
.ndo_xdp_xmit = enetc_xdp_xmit,
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
index 03090ba7e226..1a9d1e8b772c 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
@@ -99,7 +99,7 @@ static const struct net_device_ops enetc_ndev_ops = {
.ndo_get_stats = enetc_get_stats,
.ndo_set_mac_address = enetc_vf_set_mac_addr,
.ndo_set_features = enetc_vf_set_features,
- .ndo_do_ioctl = enetc_ioctl,
+ .ndo_eth_ioctl = enetc_ioctl,
.ndo_setup_tc = enetc_setup_tc,
};
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 2e002e4b4b4a..7b4961daa254 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -77,6 +77,8 @@
#define FEC_R_DES_ACTIVE_2 0x1e8 /* Rx descriptor active for ring 2 */
#define FEC_X_DES_ACTIVE_2 0x1ec /* Tx descriptor active for ring 2 */
#define FEC_QOS_SCHEME 0x1f0 /* Set multi queues Qos scheme */
+#define FEC_LPI_SLEEP 0x1f4 /* Set IEEE802.3az LPI Sleep Ts time */
+#define FEC_LPI_WAKE 0x1f8 /* Set IEEE802.3az LPI Wake Tw time */
#define FEC_MIIGSK_CFGR 0x300 /* MIIGSK Configuration reg */
#define FEC_MIIGSK_ENR 0x308 /* MIIGSK Enable reg */
@@ -187,6 +189,8 @@
#define FEC_RXIC0 0xfff
#define FEC_RXIC1 0xfff
#define FEC_RXIC2 0xfff
+#define FEC_LPI_SLEEP 0xfff
+#define FEC_LPI_WAKE 0xfff
#endif /* CONFIG_M5272 */
@@ -379,6 +383,9 @@ struct bufdesc_ex {
#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF)
#define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF))
+#define FEC_ENET_TXC_DLY ((uint)0x00010000)
+#define FEC_ENET_RXC_DLY ((uint)0x00020000)
+
/* ENET interrupt coalescing macro define */
#define FEC_ITR_CLK_SEL (0x1 << 30)
#define FEC_ITR_EN (0x1 << 31)
@@ -472,6 +479,22 @@ struct bufdesc_ex {
*/
#define FEC_QUIRK_HAS_MULTI_QUEUES (1 << 19)
+/* i.MX8MQ ENET IP version add new feature to support IEEE 802.3az EEE
+ * standard. For the transmission, MAC supply two user registers to set
+ * Sleep (TS) and Wake (TW) time.
+ */
+#define FEC_QUIRK_HAS_EEE (1 << 20)
+
+/* i.MX8QM ENET IP version add new feture to generate delayed TXC/RXC
+ * as an alternative option to make sure it works well with various PHYs.
+ * For the implementation of delayed clock, ENET takes synchronized 250MHz
+ * clocks to generate 2ns delay.
+ */
+#define FEC_QUIRK_DELAYED_CLKS_SUPPORT (1 << 21)
+
+/* i.MX8MQ SoC integration mix wakeup interrupt signal into "int2" interrupt line. */
+#define FEC_QUIRK_WAKEUP_FROM_INT2 (1 << 22)
+
struct bufdesc_prop {
int qid;
/* Address of Rx and Tx buffers */
@@ -528,6 +551,7 @@ struct fec_enet_private {
struct clk *clk_ref;
struct clk *clk_enet_out;
struct clk *clk_ptp;
+ struct clk *clk_2x_txclk;
bool ptp_clk_on;
struct mutex ptp_clk_mutex;
@@ -550,6 +574,8 @@ struct fec_enet_private {
uint phy_speed;
phy_interface_t phy_interface;
struct device_node *phy_node;
+ bool rgmii_txc_dly;
+ bool rgmii_rxc_dly;
int link;
int full_duplex;
int speed;
@@ -557,6 +583,7 @@ struct fec_enet_private {
bool bufdesc_ex;
int pause_flag;
int wol_flag;
+ int wake_irq;
u32 quirks;
struct napi_struct napi;
@@ -589,6 +616,10 @@ struct fec_enet_private {
unsigned int tx_time_itr;
unsigned int itr_clk_rate;
+ /* tx lpi eee mode */
+ struct ethtool_eee eee;
+ unsigned int clk_ref_rate;
+
u32 rx_copybreak;
/* ptp clock period in ns*/
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 7e4c4980ced7..80bd5c629fa0 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -135,6 +135,26 @@ static const struct fec_devinfo fec_imx6ul_info = {
FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII,
};
+static const struct fec_devinfo fec_imx8mq_info = {
+ .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
+ FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
+ FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB |
+ FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
+ FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE |
+ FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES |
+ FEC_QUIRK_HAS_EEE | FEC_QUIRK_WAKEUP_FROM_INT2,
+};
+
+static const struct fec_devinfo fec_imx8qm_info = {
+ .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
+ FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
+ FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB |
+ FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
+ FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE |
+ FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES |
+ FEC_QUIRK_DELAYED_CLKS_SUPPORT,
+};
+
static struct platform_device_id fec_devtype[] = {
{
/* keep it for coldfire */
@@ -162,6 +182,12 @@ static struct platform_device_id fec_devtype[] = {
.name = "imx6ul-fec",
.driver_data = (kernel_ulong_t)&fec_imx6ul_info,
}, {
+ .name = "imx8mq-fec",
+ .driver_data = (kernel_ulong_t)&fec_imx8mq_info,
+ }, {
+ .name = "imx8qm-fec",
+ .driver_data = (kernel_ulong_t)&fec_imx8qm_info,
+ }, {
/* sentinel */
}
};
@@ -175,6 +201,8 @@ enum imx_fec_type {
MVF600_FEC,
IMX6SX_FEC,
IMX6UL_FEC,
+ IMX8MQ_FEC,
+ IMX8QM_FEC,
};
static const struct of_device_id fec_dt_ids[] = {
@@ -185,6 +213,8 @@ static const struct of_device_id fec_dt_ids[] = {
{ .compatible = "fsl,mvf600-fec", .data = &fec_devtype[MVF600_FEC], },
{ .compatible = "fsl,imx6sx-fec", .data = &fec_devtype[IMX6SX_FEC], },
{ .compatible = "fsl,imx6ul-fec", .data = &fec_devtype[IMX6UL_FEC], },
+ { .compatible = "fsl,imx8mq-fec", .data = &fec_devtype[IMX8MQ_FEC], },
+ { .compatible = "fsl,imx8qm-fec", .data = &fec_devtype[IMX8QM_FEC], },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, fec_dt_ids);
@@ -1107,6 +1137,13 @@ fec_restart(struct net_device *ndev)
if (fep->bufdesc_ex)
ecntl |= (1 << 4);
+ if (fep->quirks & FEC_QUIRK_DELAYED_CLKS_SUPPORT &&
+ fep->rgmii_txc_dly)
+ ecntl |= FEC_ENET_TXC_DLY;
+ if (fep->quirks & FEC_QUIRK_DELAYED_CLKS_SUPPORT &&
+ fep->rgmii_rxc_dly)
+ ecntl |= FEC_ENET_RXC_DLY;
+
#ifndef CONFIG_M5272
/* Enable the MIB statistic event counters */
writel(0 << 31, fep->hwp + FEC_MIB_CTRLSTAT);
@@ -1970,6 +2007,10 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
if (ret)
goto failed_clk_ref;
+ ret = clk_prepare_enable(fep->clk_2x_txclk);
+ if (ret)
+ goto failed_clk_2x_txclk;
+
fec_enet_phy_reset_after_clk_enable(ndev);
} else {
clk_disable_unprepare(fep->clk_enet_out);
@@ -1980,10 +2021,14 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
mutex_unlock(&fep->ptp_clk_mutex);
}
clk_disable_unprepare(fep->clk_ref);
+ clk_disable_unprepare(fep->clk_2x_txclk);
}
return 0;
+failed_clk_2x_txclk:
+ if (fep->clk_ref)
+ clk_disable_unprepare(fep->clk_ref);
failed_clk_ref:
if (fep->clk_ptp) {
mutex_lock(&fep->ptp_clk_mutex);
@@ -1997,6 +2042,34 @@ failed_clk_ptp:
return ret;
}
+static int fec_enet_parse_rgmii_delay(struct fec_enet_private *fep,
+ struct device_node *np)
+{
+ u32 rgmii_tx_delay, rgmii_rx_delay;
+
+ /* For rgmii tx internal delay, valid values are 0ps and 2000ps */
+ if (!of_property_read_u32(np, "tx-internal-delay-ps", &rgmii_tx_delay)) {
+ if (rgmii_tx_delay != 0 && rgmii_tx_delay != 2000) {
+ dev_err(&fep->pdev->dev, "The only allowed RGMII TX delay values are: 0ps, 2000ps");
+ return -EINVAL;
+ } else if (rgmii_tx_delay == 2000) {
+ fep->rgmii_txc_dly = true;
+ }
+ }
+
+ /* For rgmii rx internal delay, valid values are 0ps and 2000ps */
+ if (!of_property_read_u32(np, "rx-internal-delay-ps", &rgmii_rx_delay)) {
+ if (rgmii_rx_delay != 0 && rgmii_rx_delay != 2000) {
+ dev_err(&fep->pdev->dev, "The only allowed RGMII RX delay values are: 0ps, 2000ps");
+ return -EINVAL;
+ } else if (rgmii_rx_delay == 2000) {
+ fep->rgmii_rxc_dly = true;
+ }
+ }
+
+ return 0;
+}
+
static int fec_enet_mii_probe(struct net_device *ndev)
{
struct fec_enet_private *fep = netdev_priv(ndev);
@@ -2581,8 +2654,10 @@ static void fec_enet_itr_coal_set(struct net_device *ndev)
}
}
-static int
-fec_enet_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec)
+static int fec_enet_get_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct fec_enet_private *fep = netdev_priv(ndev);
@@ -2598,8 +2673,10 @@ fec_enet_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec)
return 0;
}
-static int
-fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec)
+static int fec_enet_set_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct fec_enet_private *fep = netdev_priv(ndev);
struct device *dev = &fep->pdev->dev;
@@ -2651,7 +2728,7 @@ static void fec_enet_itr_coal_init(struct net_device *ndev)
ec.tx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT;
ec.tx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT;
- fec_enet_set_coalesce(ndev, &ec);
+ fec_enet_set_coalesce(ndev, &ec, NULL, NULL);
}
static int fec_enet_get_tunable(struct net_device *netdev,
@@ -2692,6 +2769,92 @@ static int fec_enet_set_tunable(struct net_device *netdev,
return ret;
}
+/* LPI Sleep Ts count base on tx clk (clk_ref).
+ * The lpi sleep cnt value = X us / (cycle_ns).
+ */
+static int fec_enet_us_to_tx_cycle(struct net_device *ndev, int us)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+
+ return us * (fep->clk_ref_rate / 1000) / 1000;
+}
+
+static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ struct ethtool_eee *p = &fep->eee;
+ unsigned int sleep_cycle, wake_cycle;
+ int ret = 0;
+
+ if (enable) {
+ ret = phy_init_eee(ndev->phydev, 0);
+ if (ret)
+ return ret;
+
+ sleep_cycle = fec_enet_us_to_tx_cycle(ndev, p->tx_lpi_timer);
+ wake_cycle = sleep_cycle;
+ } else {
+ sleep_cycle = 0;
+ wake_cycle = 0;
+ }
+
+ p->tx_lpi_enabled = enable;
+ p->eee_enabled = enable;
+ p->eee_active = enable;
+
+ writel(sleep_cycle, fep->hwp + FEC_LPI_SLEEP);
+ writel(wake_cycle, fep->hwp + FEC_LPI_WAKE);
+
+ return 0;
+}
+
+static int
+fec_enet_get_eee(struct net_device *ndev, struct ethtool_eee *edata)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ struct ethtool_eee *p = &fep->eee;
+
+ if (!(fep->quirks & FEC_QUIRK_HAS_EEE))
+ return -EOPNOTSUPP;
+
+ if (!netif_running(ndev))
+ return -ENETDOWN;
+
+ edata->eee_enabled = p->eee_enabled;
+ edata->eee_active = p->eee_active;
+ edata->tx_lpi_timer = p->tx_lpi_timer;
+ edata->tx_lpi_enabled = p->tx_lpi_enabled;
+
+ return phy_ethtool_get_eee(ndev->phydev, edata);
+}
+
+static int
+fec_enet_set_eee(struct net_device *ndev, struct ethtool_eee *edata)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ struct ethtool_eee *p = &fep->eee;
+ int ret = 0;
+
+ if (!(fep->quirks & FEC_QUIRK_HAS_EEE))
+ return -EOPNOTSUPP;
+
+ if (!netif_running(ndev))
+ return -ENETDOWN;
+
+ p->tx_lpi_timer = edata->tx_lpi_timer;
+
+ if (!edata->eee_enabled || !edata->tx_lpi_enabled ||
+ !edata->tx_lpi_timer)
+ ret = fec_enet_eee_mode_set(ndev, false);
+ else
+ ret = fec_enet_eee_mode_set(ndev, true);
+
+ if (ret)
+ return ret;
+
+ return phy_ethtool_set_eee(ndev->phydev, edata);
+}
+
static void
fec_enet_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
{
@@ -2719,12 +2882,12 @@ fec_enet_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
device_set_wakeup_enable(&ndev->dev, wol->wolopts & WAKE_MAGIC);
if (device_may_wakeup(&ndev->dev)) {
fep->wol_flag |= FEC_WOL_FLAG_ENABLE;
- if (fep->irq[0] > 0)
- enable_irq_wake(fep->irq[0]);
+ if (fep->wake_irq > 0)
+ enable_irq_wake(fep->wake_irq);
} else {
fep->wol_flag &= (~FEC_WOL_FLAG_ENABLE);
- if (fep->irq[0] > 0)
- disable_irq_wake(fep->irq[0]);
+ if (fep->wake_irq > 0)
+ disable_irq_wake(fep->wake_irq);
}
return 0;
@@ -2752,6 +2915,8 @@ static const struct ethtool_ops fec_enet_ethtool_ops = {
.set_tunable = fec_enet_set_tunable,
.get_wol = fec_enet_get_wol,
.set_wol = fec_enet_set_wol,
+ .get_eee = fec_enet_get_eee,
+ .set_eee = fec_enet_set_eee,
.get_link_ksettings = phy_ethtool_get_link_ksettings,
.set_link_ksettings = phy_ethtool_set_link_ksettings,
.self_test = net_selftest,
@@ -3280,7 +3445,7 @@ static const struct net_device_ops fec_netdev_ops = {
.ndo_validate_addr = eth_validate_addr,
.ndo_tx_timeout = fec_timeout,
.ndo_set_mac_address = fec_set_mac_address,
- .ndo_do_ioctl = fec_enet_ioctl,
+ .ndo_eth_ioctl = fec_enet_ioctl,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = fec_poll_controller,
#endif
@@ -3535,6 +3700,17 @@ static int fec_enet_get_irq_cnt(struct platform_device *pdev)
return irq_cnt;
}
+static void fec_enet_get_wakeup_irq(struct platform_device *pdev)
+{
+ struct net_device *ndev = platform_get_drvdata(pdev);
+ struct fec_enet_private *fep = netdev_priv(ndev);
+
+ if (fep->quirks & FEC_QUIRK_WAKEUP_FROM_INT2)
+ fep->wake_irq = fep->irq[2];
+ else
+ fep->wake_irq = fep->irq[0];
+}
+
static int fec_enet_init_stop_mode(struct fec_enet_private *fep,
struct device_node *np)
{
@@ -3666,6 +3842,10 @@ fec_probe(struct platform_device *pdev)
fep->phy_interface = interface;
}
+ ret = fec_enet_parse_rgmii_delay(fep, np);
+ if (ret)
+ goto failed_rgmii_delay;
+
fep->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
if (IS_ERR(fep->clk_ipg)) {
ret = PTR_ERR(fep->clk_ipg);
@@ -3692,6 +3872,14 @@ fec_probe(struct platform_device *pdev)
fep->clk_ref = devm_clk_get(&pdev->dev, "enet_clk_ref");
if (IS_ERR(fep->clk_ref))
fep->clk_ref = NULL;
+ fep->clk_ref_rate = clk_get_rate(fep->clk_ref);
+
+ /* clk_2x_txclk is optional, depends on board */
+ if (fep->rgmii_txc_dly || fep->rgmii_rxc_dly) {
+ fep->clk_2x_txclk = devm_clk_get(&pdev->dev, "enet_2x_txclk");
+ if (IS_ERR(fep->clk_2x_txclk))
+ fep->clk_2x_txclk = NULL;
+ }
fep->bufdesc_ex = fep->quirks & FEC_QUIRK_HAS_BUFDESC_EX;
fep->clk_ptp = devm_clk_get(&pdev->dev, "ptp");
@@ -3762,6 +3950,9 @@ fec_probe(struct platform_device *pdev)
fep->irq[i] = irq;
}
+ /* Decide which interrupt line is wakeup capable */
+ fec_enet_get_wakeup_irq(pdev);
+
ret = fec_enet_mii_init(pdev);
if (ret)
goto failed_mii_init;
@@ -3809,6 +4000,7 @@ failed_clk_ahb:
failed_clk_ipg:
fec_enet_clk_enable(ndev, false);
failed_clk:
+failed_rgmii_delay:
if (of_phy_is_fixed_link(np))
of_phy_deregister_fixed_link(np);
of_node_put(phy_node);
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index 02c47658a215..73ff359a15f1 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -792,7 +792,7 @@ static const struct net_device_ops mpc52xx_fec_netdev_ops = {
.ndo_set_rx_mode = mpc52xx_fec_set_multicast_list,
.ndo_set_mac_address = mpc52xx_fec_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = phy_do_ioctl,
+ .ndo_eth_ioctl = phy_do_ioctl,
.ndo_tx_timeout = mpc52xx_fec_tx_timeout,
.ndo_get_stats = mpc52xx_fec_get_stats,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index 6ee325ad35c5..2db6e38a772e 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -900,7 +900,7 @@ static const struct net_device_ops fs_enet_netdev_ops = {
.ndo_start_xmit = fs_enet_start_xmit,
.ndo_tx_timeout = fs_timeout,
.ndo_set_rx_mode = fs_set_multicast_list,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 9646483137c4..af6ad94bf24a 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -3184,7 +3184,7 @@ static const struct net_device_ops gfar_netdev_ops = {
.ndo_set_features = gfar_set_features,
.ndo_set_rx_mode = gfar_set_multi,
.ndo_tx_timeout = gfar_timeout,
- .ndo_do_ioctl = gfar_ioctl,
+ .ndo_eth_ioctl = gfar_ioctl,
.ndo_get_stats64 = gfar_get_stats64,
.ndo_change_carrier = fixed_phy_change_carrier,
.ndo_set_mac_address = gfar_set_mac_addr,
diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index cc7d4f93da54..7b32ed29bf4c 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
@@ -243,7 +243,9 @@ static unsigned int gfar_ticks2usecs(struct gfar_private *priv,
/* Get the coalescing parameters, and put them in the cvals
* structure. */
static int gfar_gcoalesce(struct net_device *dev,
- struct ethtool_coalesce *cvals)
+ struct ethtool_coalesce *cvals,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct gfar_private *priv = netdev_priv(dev);
struct gfar_priv_rx_q *rx_queue = NULL;
@@ -280,7 +282,9 @@ static int gfar_gcoalesce(struct net_device *dev,
* in order for coalescing to be active
*/
static int gfar_scoalesce(struct net_device *dev,
- struct ethtool_coalesce *cvals)
+ struct ethtool_coalesce *cvals,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct gfar_private *priv = netdev_priv(dev);
int i, err = 0;
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 0acfafb73db1..3eb288d10b0c 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3516,7 +3516,7 @@ static const struct net_device_ops ucc_geth_netdev_ops = {
.ndo_set_mac_address = ucc_geth_set_mac_addr,
.ndo_set_rx_mode = ucc_geth_set_multi,
.ndo_tx_timeout = ucc_geth_timeout,
- .ndo_do_ioctl = ucc_geth_ioctl,
+ .ndo_eth_ioctl = ucc_geth_ioctl,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = ucc_netpoll,
#endif
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 5bb56b454541..f089d33dd48e 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -322,7 +322,8 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
// Check if next command will overflow the buffer.
- if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == tail) {
+ if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) ==
+ (tail & priv->adminq_mask)) {
int err;
// Flush existing commands to make room.
@@ -332,7 +333,8 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
// Retry.
tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
- if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == tail) {
+ if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) ==
+ (tail & priv->adminq_mask)) {
// This should never happen. We just flushed the
// command queue so there should be enough space.
return -ENOMEM;
diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index bb062b02fb85..3312e1d93c3b 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -90,6 +90,8 @@ config HNS_ENET
config HNS3
tristate "Hisilicon Network Subsystem Support HNS3 (Framework)"
depends on PCI
+ select NET_DEVLINK
+ select PAGE_POOL
help
This selects the framework support for Hisilicon Network Subsystem 3.
This layer facilitates clients like ENET, RoCE and user-space ethernet
@@ -102,7 +104,7 @@ config HNS3_HCLGE
tristate "Hisilicon HNS3 HCLGE Acceleration Engine & Compatibility Layer Support"
default m
depends on PCI_MSI
- imply PTP_1588_CLOCK
+ depends on PTP_1588_CLOCK_OPTIONAL
help
This selects the HNS3_HCLGE network acceleration engine & its hardware
compatibility layer. The engine would be used in Hisilicon hip08 family of
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index e53512f6878a..37b605fed32c 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -796,7 +796,9 @@ static void hip04_tx_timeout_task(struct work_struct *work)
}
static int hip04_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct hip04_priv *priv = netdev_priv(netdev);
@@ -807,7 +809,9 @@ static int hip04_get_coalesce(struct net_device *netdev,
}
static int hip04_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct hip04_priv *priv = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c
index 3c4db4a6b431..22bf914f2dbd 100644
--- a/drivers/net/ethernet/hisilicon/hisi_femac.c
+++ b/drivers/net/ethernet/hisilicon/hisi_femac.c
@@ -685,7 +685,7 @@ static const struct net_device_ops hisi_femac_netdev_ops = {
.ndo_open = hisi_femac_net_open,
.ndo_stop = hisi_femac_net_close,
.ndo_start_xmit = hisi_femac_net_xmit,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_set_mac_address = hisi_femac_set_mac_address,
.ndo_set_rx_mode = hisi_femac_net_set_rx_mode,
};
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index ad534f9e41ab..343c605c4be8 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1945,7 +1945,7 @@ static const struct net_device_ops hns_nic_netdev_ops = {
.ndo_tx_timeout = hns_nic_net_timeout,
.ndo_set_mac_address = hns_nic_net_set_mac_address,
.ndo_change_mtu = hns_nic_change_mtu,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_set_features = hns_nic_set_features,
.ndo_fix_features = hns_nic_fix_features,
.ndo_get_stats64 = hns_nic_get_stats64,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 7e62dcff2426..ab7390225942 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -730,11 +730,15 @@ static int hns_set_pauseparam(struct net_device *net_dev,
* hns_get_coalesce - get coalesce info.
* @net_dev: net device
* @ec: coalesce info.
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
*
* Return 0 on success, negative on failure.
*/
static int hns_get_coalesce(struct net_device *net_dev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct hns_nic_priv *priv = netdev_priv(net_dev);
struct hnae_ae_ops *ops;
@@ -774,11 +778,15 @@ static int hns_get_coalesce(struct net_device *net_dev,
* hns_set_coalesce - set coalesce info.
* @net_dev: net device
* @ec: coalesce info.
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
*
* Return 0 on success, negative on failure.
*/
static int hns_set_coalesce(struct net_device *net_dev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct hns_nic_priv *priv = netdev_priv(net_dev);
struct hnae_ae_ops *ops;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index aa86a81c8f4a..c2bd2584201f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -9,7 +9,7 @@
enum HCLGE_MBX_OPCODE {
HCLGE_MBX_RESET = 0x01, /* (VF -> PF) assert reset */
- HCLGE_MBX_ASSERTING_RESET, /* (PF -> VF) PF is asserting reset*/
+ HCLGE_MBX_ASSERTING_RESET, /* (PF -> VF) PF is asserting reset */
HCLGE_MBX_SET_UNICAST, /* (VF -> PF) set UC addr */
HCLGE_MBX_SET_MULTICAST, /* (VF -> PF) set MC addr */
HCLGE_MBX_SET_VLAN, /* (VF -> PF) set VLAN */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index e0b7c3c44e7b..546a60530384 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -65,7 +65,7 @@
#define HNAE3_UNIC_CLIENT_INITED_B 0x4
#define HNAE3_ROCE_CLIENT_INITED_B 0x5
-#define HNAE3_DEV_SUPPORT_ROCE_DCB_BITS (BIT(HNAE3_DEV_SUPPORT_DCB_B) |\
+#define HNAE3_DEV_SUPPORT_ROCE_DCB_BITS (BIT(HNAE3_DEV_SUPPORT_DCB_B) | \
BIT(HNAE3_DEV_SUPPORT_ROCE_B))
#define hnae3_dev_roce_supported(hdev) \
@@ -718,6 +718,8 @@ struct hnae3_ae_ops {
u32 nsec, u32 sec);
int (*get_ts_info)(struct hnae3_handle *handle,
struct ethtool_ts_info *info);
+ int (*get_link_diagnosis_info)(struct hnae3_handle *handle,
+ u32 *status_code);
};
struct hnae3_dcb_ops {
@@ -772,6 +774,7 @@ struct hnae3_knic_private_info {
u16 int_rl_setting;
enum pkt_hash_types rss_type;
+ void __iomem *io_base;
};
struct hnae3_roce_private_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 532523069d74..2b66c59f5eaf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -38,9 +38,8 @@ static struct hns3_dbg_dentry_info hns3_dbg_dentry[] = {
},
};
-static int hns3_dbg_bd_file_init(struct hnae3_handle *handle, unsigned int cmd);
-static int hns3_dbg_common_file_init(struct hnae3_handle *handle,
- unsigned int cmd);
+static int hns3_dbg_bd_file_init(struct hnae3_handle *handle, u32 cmd);
+static int hns3_dbg_common_file_init(struct hnae3_handle *handle, u32 cmd);
static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
{
@@ -696,7 +695,7 @@ static int hns3_dbg_queue_map(struct hnae3_handle *h, char *buf, int len)
sprintf(result[j++], "%u", i);
sprintf(result[j++], "%u",
h->ae_algo->ops->get_global_queue_id(h, i));
- sprintf(result[j++], "%u",
+ sprintf(result[j++], "%d",
priv->ring[i].tqp_vector->vector_irq);
hns3_dbg_fill_content(content, sizeof(content), queue_map_items,
(const char **)result,
@@ -798,10 +797,10 @@ static const struct hns3_dbg_item tx_bd_info_items[] = {
{ "T_CS_VLAN_TSO", 2 },
{ "OT_VLAN_TAG", 3 },
{ "TV", 2 },
- { "OLT_VLAN_LEN", 2},
- { "PAYLEN_OL4CS", 2},
- { "BD_FE_SC_VLD", 2},
- { "MSS_HW_CSUM", 0},
+ { "OLT_VLAN_LEN", 2 },
+ { "PAYLEN_OL4CS", 2 },
+ { "BD_FE_SC_VLD", 2 },
+ { "MSS_HW_CSUM", 0 },
};
static void hns3_dump_tx_bd_info(struct hns3_nic_priv *priv,
@@ -868,7 +867,7 @@ static void
hns3_dbg_dev_caps(struct hnae3_handle *h, char *buf, int len, int *pos)
{
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
- static const char * const str[] = {"no", "yes"};
+ const char * const str[] = {"no", "yes"};
unsigned long *caps = ae_dev->caps;
u32 i, state;
@@ -938,20 +937,19 @@ static int hns3_dbg_dev_info(struct hnae3_handle *h, char *buf, int len)
return 0;
}
-static int hns3_dbg_get_cmd_index(struct hnae3_handle *handle,
- const unsigned char *name, u32 *index)
+static int hns3_dbg_get_cmd_index(struct hns3_dbg_data *dbg_data, u32 *index)
{
u32 i;
for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++) {
- if (!strncmp(name, hns3_dbg_cmd[i].name,
- strlen(hns3_dbg_cmd[i].name))) {
+ if (hns3_dbg_cmd[i].cmd == dbg_data->cmd) {
*index = i;
return 0;
}
}
- dev_err(&handle->pdev->dev, "unknown command(%s)\n", name);
+ dev_err(&dbg_data->handle->pdev->dev, "unknown command(%d)\n",
+ dbg_data->cmd);
return -EINVAL;
}
@@ -1019,8 +1017,7 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer,
u32 index;
int ret;
- ret = hns3_dbg_get_cmd_index(handle, filp->f_path.dentry->d_iname,
- &index);
+ ret = hns3_dbg_get_cmd_index(dbg_data, &index);
if (ret)
return ret;
@@ -1090,6 +1087,7 @@ static int hns3_dbg_bd_file_init(struct hnae3_handle *handle, u32 cmd)
char name[HNS3_DBG_FILE_NAME_LEN];
data[i].handle = handle;
+ data[i].cmd = hns3_dbg_cmd[cmd].cmd;
data[i].qid = i;
sprintf(name, "%s%u", hns3_dbg_cmd[cmd].name, i);
debugfs_create_file(name, 0400, entry_dir, &data[i],
@@ -1110,6 +1108,7 @@ hns3_dbg_common_file_init(struct hnae3_handle *handle, u32 cmd)
return -ENOMEM;
data->handle = handle;
+ data->cmd = hns3_dbg_cmd[cmd].cmd;
entry_dir = hns3_dbg_dentry[hns3_dbg_cmd[cmd].dentry].dentry;
debugfs_create_file(hns3_dbg_cmd[cmd].name, 0400, entry_dir,
data, &hns3_dbg_fops);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
index f3766ff38bb7..bd8801065e02 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
@@ -22,6 +22,7 @@ struct hns3_dbg_item {
struct hns3_dbg_data {
struct hnae3_handle *handle;
+ enum hnae3_dbg_cmd cmd;
u16 qid;
};
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index cdb5f14fb6bc..22af3d6ce178 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -63,7 +63,7 @@ MODULE_PARM_DESC(tx_sgl, "Minimum number of frags when using dma_map_sg() to opt
#define HNS3_SGL_SIZE(nfrag) (sizeof(struct scatterlist) * (nfrag) + \
sizeof(struct sg_table))
-#define HNS3_MAX_SGL_SIZE ALIGN(HNS3_SGL_SIZE(HNS3_MAX_TSO_BD_NUM),\
+#define HNS3_MAX_SGL_SIZE ALIGN(HNS3_SGL_SIZE(HNS3_MAX_TSO_BD_NUM), \
dma_get_cache_alignment())
#define DEFAULT_MSG_LEVEL (NETIF_MSG_PROBE | NETIF_MSG_LINK | \
@@ -100,7 +100,7 @@ static const struct pci_device_id hns3_pci_tbl[] = {
{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_RDMA_DCB_PFC_VF),
HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
/* required last entry */
- {0, }
+ {0,}
};
MODULE_DEVICE_TABLE(pci, hns3_pci_tbl);
@@ -971,8 +971,7 @@ static u32 hns3_tx_spare_space(struct hns3_enet_ring *ring)
/* The free tx buffer is divided into two part, so pick the
* larger one.
*/
- return (ntc > (tx_spare->len - ntu) ? ntc :
- (tx_spare->len - ntu)) - 1;
+ return max(ntc, tx_spare->len - ntu) - 1;
}
static void hns3_tx_spare_update(struct hns3_enet_ring *ring)
@@ -2852,7 +2851,7 @@ static const struct net_device_ops hns3_nic_netdev_ops = {
.ndo_start_xmit = hns3_nic_net_xmit,
.ndo_tx_timeout = hns3_nic_net_timeout,
.ndo_set_mac_address = hns3_nic_net_set_mac_address,
- .ndo_do_ioctl = hns3_nic_do_ioctl,
+ .ndo_eth_ioctl = hns3_nic_do_ioctl,
.ndo_change_mtu = hns3_nic_change_mtu,
.ndo_set_features = hns3_nic_set_features,
.ndo_features_check = hns3_features_check,
@@ -3127,11 +3126,6 @@ static void hns3_set_default_feature(struct net_device *netdev)
netdev->priv_flags |= IFF_UNICAST_FLT;
- netdev->hw_enc_features |= NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
- NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
- NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
- NETIF_F_SCTP_CRC | NETIF_F_TSO_MANGLEID | NETIF_F_FRAGLIST;
-
netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
@@ -3141,62 +3135,37 @@ static void hns3_set_default_feature(struct net_device *netdev)
NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
NETIF_F_SCTP_CRC | NETIF_F_FRAGLIST;
- netdev->vlan_features |= NETIF_F_RXCSUM |
- NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO |
- NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
- NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
- NETIF_F_SCTP_CRC | NETIF_F_FRAGLIST;
-
- netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX |
- NETIF_F_HW_VLAN_CTAG_RX |
- NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
- NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
- NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
- NETIF_F_SCTP_CRC | NETIF_F_FRAGLIST;
-
if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
- netdev->hw_features |= NETIF_F_GRO_HW;
netdev->features |= NETIF_F_GRO_HW;
- if (!(h->flags & HNAE3_SUPPORT_VF)) {
- netdev->hw_features |= NETIF_F_NTUPLE;
+ if (!(h->flags & HNAE3_SUPPORT_VF))
netdev->features |= NETIF_F_NTUPLE;
- }
}
- if (test_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps)) {
- netdev->hw_features |= NETIF_F_GSO_UDP_L4;
+ if (test_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps))
netdev->features |= NETIF_F_GSO_UDP_L4;
- netdev->vlan_features |= NETIF_F_GSO_UDP_L4;
- netdev->hw_enc_features |= NETIF_F_GSO_UDP_L4;
- }
- if (test_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps)) {
- netdev->hw_features |= NETIF_F_HW_CSUM;
+ if (test_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps))
netdev->features |= NETIF_F_HW_CSUM;
- netdev->vlan_features |= NETIF_F_HW_CSUM;
- netdev->hw_enc_features |= NETIF_F_HW_CSUM;
- } else {
- netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+ else
netdev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
- netdev->vlan_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
- netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
- }
- if (test_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, ae_dev->caps)) {
- netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
+ if (test_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, ae_dev->caps))
netdev->features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
- netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
- netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
- }
- if (test_bit(HNAE3_DEV_SUPPORT_FD_FORWARD_TC_B, ae_dev->caps)) {
- netdev->hw_features |= NETIF_F_HW_TC;
+ if (test_bit(HNAE3_DEV_SUPPORT_FD_FORWARD_TC_B, ae_dev->caps))
netdev->features |= NETIF_F_HW_TC;
- }
- if (test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps))
- netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ netdev->hw_features |= netdev->features;
+ if (!test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps))
+ netdev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+
+ netdev->vlan_features |= netdev->features &
+ ~(NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_GRO_HW | NETIF_F_NTUPLE |
+ NETIF_F_HW_TC);
+
+ netdev->hw_enc_features |= netdev->vlan_features | NETIF_F_TSO_MANGLEID;
}
static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
@@ -3205,6 +3174,21 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
unsigned int order = hns3_page_order(ring);
struct page *p;
+ if (ring->page_pool) {
+ p = page_pool_dev_alloc_frag(ring->page_pool,
+ &cb->page_offset,
+ hns3_buf_size(ring));
+ if (unlikely(!p))
+ return -ENOMEM;
+
+ cb->priv = p;
+ cb->buf = page_address(p);
+ cb->dma = page_pool_get_dma_addr(p);
+ cb->type = DESC_TYPE_PP_FRAG;
+ cb->reuse_flag = 0;
+ return 0;
+ }
+
p = dev_alloc_pages(order);
if (!p)
return -ENOMEM;
@@ -3227,8 +3211,13 @@ static void hns3_free_buffer(struct hns3_enet_ring *ring,
if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_HEAD |
DESC_TYPE_BOUNCE_ALL | DESC_TYPE_SGL_SKB))
napi_consume_skb(cb->priv, budget);
- else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias)
- __page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
+ else if (!HNAE3_IS_TX_RING(ring)) {
+ if (cb->type & DESC_TYPE_PAGE && cb->pagecnt_bias)
+ __page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
+ else if (cb->type & DESC_TYPE_PP_FRAG)
+ page_pool_put_full_page(ring->page_pool, cb->priv,
+ false);
+ }
memset(cb, 0, sizeof(*cb));
}
@@ -3315,7 +3304,7 @@ static int hns3_alloc_and_map_buffer(struct hns3_enet_ring *ring,
int ret;
ret = hns3_alloc_buffer(ring, cb);
- if (ret)
+ if (ret || ring->page_pool)
goto out;
ret = hns3_map_buffer(ring, cb);
@@ -3337,7 +3326,8 @@ static int hns3_alloc_and_attach_buffer(struct hns3_enet_ring *ring, int i)
if (ret)
return ret;
- ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
+ ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma +
+ ring->desc_cb[i].page_offset);
return 0;
}
@@ -3367,7 +3357,8 @@ static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i,
{
hns3_unmap_buffer(ring, &ring->desc_cb[i]);
ring->desc_cb[i] = *res_cb;
- ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
+ ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma +
+ ring->desc_cb[i].page_offset);
ring->desc[i].rx.bd_base_info = 0;
}
@@ -3539,6 +3530,12 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
u32 frag_size = size - pull_len;
bool reused;
+ if (ring->page_pool) {
+ skb_add_rx_frag(skb, i, desc_cb->priv, frag_offset,
+ frag_size, truesize);
+ return;
+ }
+
/* Avoid re-using remote or pfmem page */
if (unlikely(!dev_page_is_reusable(desc_cb->priv)))
goto out;
@@ -3856,6 +3853,9 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
/* We can reuse buffer as-is, just make sure it is reusable */
if (dev_page_is_reusable(desc_cb->priv))
desc_cb->reuse_flag = 1;
+ else if (desc_cb->type & DESC_TYPE_PP_FRAG)
+ page_pool_put_full_page(ring->page_pool, desc_cb->priv,
+ false);
else /* This page cannot be reused so discard it */
__page_frag_cache_drain(desc_cb->priv,
desc_cb->pagecnt_bias);
@@ -3863,6 +3863,10 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
hns3_rx_ring_move_fw(ring);
return 0;
}
+
+ if (ring->page_pool)
+ skb_mark_for_recycle(skb);
+
u64_stats_update_begin(&ring->syncp);
ring->stats.seg_pkt_cnt++;
u64_stats_update_end(&ring->syncp);
@@ -3901,6 +3905,10 @@ static int hns3_add_frag(struct hns3_enet_ring *ring)
"alloc rx fraglist skb fail\n");
return -ENXIO;
}
+
+ if (ring->page_pool)
+ skb_mark_for_recycle(new_skb);
+
ring->frag_num = 0;
if (ring->tail_skb) {
@@ -4434,9 +4442,7 @@ static void hns3_tx_dim_work(struct work_struct *work)
static void hns3_nic_init_dim(struct hns3_enet_tqp_vector *tqp_vector)
{
INIT_WORK(&tqp_vector->rx_group.dim.work, hns3_rx_dim_work);
- tqp_vector->rx_group.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
INIT_WORK(&tqp_vector->tx_group.dim.work, hns3_tx_dim_work);
- tqp_vector->tx_group.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
}
static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
@@ -4705,6 +4711,29 @@ static void hns3_put_ring_config(struct hns3_nic_priv *priv)
priv->ring = NULL;
}
+static void hns3_alloc_page_pool(struct hns3_enet_ring *ring)
+{
+ struct page_pool_params pp_params = {
+ .flags = PP_FLAG_DMA_MAP | PP_FLAG_PAGE_FRAG |
+ PP_FLAG_DMA_SYNC_DEV,
+ .order = hns3_page_order(ring),
+ .pool_size = ring->desc_num * hns3_buf_size(ring) /
+ (PAGE_SIZE << hns3_page_order(ring)),
+ .nid = dev_to_node(ring_to_dev(ring)),
+ .dev = ring_to_dev(ring),
+ .dma_dir = DMA_FROM_DEVICE,
+ .offset = 0,
+ .max_len = PAGE_SIZE << hns3_page_order(ring),
+ };
+
+ ring->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(ring->page_pool)) {
+ dev_warn(ring_to_dev(ring), "page pool creation failed: %ld\n",
+ PTR_ERR(ring->page_pool));
+ ring->page_pool = NULL;
+ }
+}
+
static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
{
int ret;
@@ -4724,6 +4753,8 @@ static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
goto out_with_desc_cb;
if (!HNAE3_IS_TX_RING(ring)) {
+ hns3_alloc_page_pool(ring);
+
ret = hns3_alloc_ring_buffers(ring);
if (ret)
goto out_with_desc;
@@ -4764,6 +4795,11 @@ void hns3_fini_ring(struct hns3_enet_ring *ring)
devm_kfree(ring_to_dev(ring), tx_spare);
ring->tx_spare = NULL;
}
+
+ if (!HNAE3_IS_TX_RING(ring) && ring->page_pool) {
+ page_pool_destroy(ring->page_pool);
+ ring->page_pool = NULL;
+ }
}
static int hns3_buf_size2type(u32 buf_size)
@@ -4954,6 +4990,66 @@ static void hns3_info_show(struct hns3_nic_priv *priv)
dev_info(priv->dev, "Max mtu size: %u\n", priv->netdev->max_mtu);
}
+static void hns3_set_cq_period_mode(struct hns3_nic_priv *priv,
+ enum dim_cq_period_mode mode, bool is_tx)
+{
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(priv->ae_handle->pdev);
+ struct hnae3_handle *handle = priv->ae_handle;
+ int i;
+
+ if (is_tx) {
+ priv->tx_cqe_mode = mode;
+
+ for (i = 0; i < priv->vector_num; i++)
+ priv->tqp_vector[i].tx_group.dim.mode = mode;
+ } else {
+ priv->rx_cqe_mode = mode;
+
+ for (i = 0; i < priv->vector_num; i++)
+ priv->tqp_vector[i].rx_group.dim.mode = mode;
+ }
+
+ /* only device version above V3(include V3), GL can switch CQ/EQ
+ * period mode.
+ */
+ if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) {
+ u32 new_mode;
+ u64 reg;
+
+ new_mode = (mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE) ?
+ HNS3_CQ_MODE_CQE : HNS3_CQ_MODE_EQE;
+ reg = is_tx ? HNS3_GL1_CQ_MODE_REG : HNS3_GL0_CQ_MODE_REG;
+
+ writel(new_mode, handle->kinfo.io_base + reg);
+ }
+}
+
+void hns3_cq_period_mode_init(struct hns3_nic_priv *priv,
+ enum dim_cq_period_mode tx_mode,
+ enum dim_cq_period_mode rx_mode)
+{
+ hns3_set_cq_period_mode(priv, tx_mode, true);
+ hns3_set_cq_period_mode(priv, rx_mode, false);
+}
+
+static void hns3_state_init(struct hnae3_handle *handle)
+{
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
+ struct net_device *netdev = handle->kinfo.netdev;
+ struct hns3_nic_priv *priv = netdev_priv(netdev);
+
+ set_bit(HNS3_NIC_STATE_INITED, &priv->state);
+
+ if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
+ set_bit(HNAE3_PFLAG_LIMIT_PROMISC, &handle->supported_pflags);
+
+ if (test_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps))
+ set_bit(HNS3_NIC_STATE_HW_TX_CSUM_ENABLE, &priv->state);
+
+ if (hnae3_ae_dev_rxd_adv_layout_supported(ae_dev))
+ set_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state);
+}
+
static int hns3_client_init(struct hnae3_handle *handle)
{
struct pci_dev *pdev = handle->pdev;
@@ -5021,6 +5117,9 @@ static int hns3_client_init(struct hnae3_handle *handle)
goto out_init_ring;
}
+ hns3_cq_period_mode_init(priv, DIM_CQ_PERIOD_MODE_START_FROM_EQE,
+ DIM_CQ_PERIOD_MODE_START_FROM_EQE);
+
ret = hns3_init_phy(netdev);
if (ret)
goto out_init_phy;
@@ -5054,16 +5153,7 @@ static int hns3_client_init(struct hnae3_handle *handle)
netdev->max_mtu = HNS3_MAX_MTU(ae_dev->dev_specs.max_frm_size);
- if (test_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps))
- set_bit(HNS3_NIC_STATE_HW_TX_CSUM_ENABLE, &priv->state);
-
- if (hnae3_ae_dev_rxd_adv_layout_supported(ae_dev))
- set_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state);
-
- set_bit(HNS3_NIC_STATE_INITED, &priv->state);
-
- if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
- set_bit(HNAE3_PFLAG_LIMIT_PROMISC, &handle->supported_pflags);
+ hns3_state_init(handle);
ret = register_netdev(netdev);
if (ret) {
@@ -5353,6 +5443,8 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
if (ret)
goto err_uninit_vector;
+ hns3_cq_period_mode_init(priv, priv->tx_cqe_mode, priv->rx_cqe_mode);
+
/* the device can work without cpu rmap, only aRFS needs it */
ret = hns3_set_rx_cpu_rmap(netdev);
if (ret)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 15af3d93857b..6162d9f88e37 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -6,6 +6,7 @@
#include <linux/dim.h>
#include <linux/if_vlan.h>
+#include <net/page_pool.h>
#include "hnae3.h"
@@ -201,6 +202,12 @@ enum hns3_nic_state {
#define HNS3_RING_EN_B 0
+#define HNS3_GL0_CQ_MODE_REG 0x20d00
+#define HNS3_GL1_CQ_MODE_REG 0x20d04
+#define HNS3_GL2_CQ_MODE_REG 0x20d08
+#define HNS3_CQ_MODE_EQE 1U
+#define HNS3_CQ_MODE_CQE 0U
+
enum hns3_pkt_l2t_type {
HNS3_L2_TYPE_UNICAST,
HNS3_L2_TYPE_MULTICAST,
@@ -307,6 +314,7 @@ enum hns3_desc_type {
DESC_TYPE_BOUNCE_ALL = 1 << 3,
DESC_TYPE_BOUNCE_HEAD = 1 << 4,
DESC_TYPE_SGL_SKB = 1 << 5,
+ DESC_TYPE_PP_FRAG = 1 << 6,
};
struct hns3_desc_cb {
@@ -340,7 +348,7 @@ enum hns3_pkt_l3type {
HNS3_L3_TYPE_LLDP,
HNS3_L3_TYPE_BPDU,
HNS3_L3_TYPE_MAC_PAUSE,
- HNS3_L3_TYPE_PFC_PAUSE,/* 0x9*/
+ HNS3_L3_TYPE_PFC_PAUSE, /* 0x9 */
/* reserved for 0xA~0xB */
@@ -384,11 +392,11 @@ enum hns3_pkt_ol4type {
};
struct hns3_rx_ptype {
- u32 ptype:8;
- u32 csum_level:2;
- u32 ip_summed:2;
- u32 l3_type:4;
- u32 valid:1;
+ u32 ptype : 8;
+ u32 csum_level : 2;
+ u32 ip_summed : 2;
+ u32 l3_type : 4;
+ u32 valid : 1;
};
struct ring_stats {
@@ -451,6 +459,7 @@ struct hns3_enet_ring {
struct hnae3_queue *tqp;
int queue_index;
struct device *dev; /* will be used for DMA mapping of descriptors */
+ struct page_pool *page_pool;
/* statistic */
struct ring_stats stats;
@@ -513,9 +522,9 @@ struct hns3_enet_coalesce {
u16 int_gl;
u16 int_ql;
u16 int_ql_max;
- u8 adapt_enable:1;
- u8 ql_enable:1;
- u8 unit_1us:1;
+ u8 adapt_enable : 1;
+ u8 ql_enable : 1;
+ u8 unit_1us : 1;
enum hns3_flow_level_range flow_level;
};
@@ -569,6 +578,8 @@ struct hns3_nic_priv {
unsigned long state;
+ enum dim_cq_period_mode tx_cqe_mode;
+ enum dim_cq_period_mode rx_cqe_mode;
struct hns3_enet_coalesce tx_coal;
struct hns3_enet_coalesce rx_coal;
u32 tx_copybreak;
@@ -593,6 +604,11 @@ struct hns3_hw_error_info {
const char *msg;
};
+struct hns3_reset_type_map {
+ enum ethtool_reset_flags rst_flags;
+ enum hnae3_reset_type rst_type;
+};
+
static inline int ring_space(struct hns3_enet_ring *ring)
{
/* This smp_load_acquire() pairs with smp_store_release() in
@@ -702,4 +718,7 @@ void hns3_dbg_register_debugfs(const char *debugfs_dir_name);
void hns3_dbg_unregister_debugfs(void);
void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size);
u16 hns3_get_max_available_channels(struct hnae3_handle *h);
+void hns3_cq_period_mode_init(struct hns3_nic_priv *priv,
+ enum dim_cq_period_mode tx_mode,
+ enum dim_cq_period_mode rx_mode);
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 82061ab6930f..7ea511d59e91 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -7,21 +7,7 @@
#include <linux/sfp.h>
#include "hns3_enet.h"
-
-struct hns3_stats {
- char stats_string[ETH_GSTRING_LEN];
- int stats_offset;
-};
-
-struct hns3_sfp_type {
- u8 type;
- u8 ext_type;
-};
-
-struct hns3_pflag_desc {
- char name[ETH_GSTRING_LEN];
- void (*handler)(struct net_device *netdev, bool enable);
-};
+#include "hns3_ethtool.h"
/* tqp related stats */
#define HNS3_TQP_STAT(_string, _member) { \
@@ -312,33 +298,8 @@ out:
return ret_val;
}
-/**
- * hns3_self_test - self test
- * @ndev: net device
- * @eth_test: test cmd
- * @data: test result
- */
-static void hns3_self_test(struct net_device *ndev,
- struct ethtool_test *eth_test, u64 *data)
+static void hns3_set_selftest_param(struct hnae3_handle *h, int (*st_param)[2])
{
- struct hns3_nic_priv *priv = netdev_priv(ndev);
- struct hnae3_handle *h = priv->ae_handle;
- int st_param[HNS3_SELF_TEST_TYPE_NUM][2];
- bool if_running = netif_running(ndev);
- int test_index = 0;
- u32 i;
-
- if (hns3_nic_resetting(ndev)) {
- netdev_err(ndev, "dev resetting!");
- return;
- }
-
- /* Only do offline selftest, or pass by default */
- if (eth_test->flags != ETH_TEST_FL_OFFLINE)
- return;
-
- netif_dbg(h, drv, ndev, "self test start");
-
st_param[HNAE3_LOOP_APP][0] = HNAE3_LOOP_APP;
st_param[HNAE3_LOOP_APP][1] =
h->flags & HNAE3_SUPPORT_APP_LOOPBACK;
@@ -355,6 +316,18 @@ static void hns3_self_test(struct net_device *ndev,
st_param[HNAE3_LOOP_PHY][0] = HNAE3_LOOP_PHY;
st_param[HNAE3_LOOP_PHY][1] =
h->flags & HNAE3_SUPPORT_PHY_LOOPBACK;
+}
+
+static void hns3_selftest_prepare(struct net_device *ndev,
+ bool if_running, int (*st_param)[2])
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+
+ if (netif_msg_ifdown(h))
+ netdev_info(ndev, "self test start\n");
+
+ hns3_set_selftest_param(h, st_param);
if (if_running)
ndev->netdev_ops->ndo_stop(ndev);
@@ -373,6 +346,35 @@ static void hns3_self_test(struct net_device *ndev,
h->ae_algo->ops->halt_autoneg(h, true);
set_bit(HNS3_NIC_STATE_TESTING, &priv->state);
+}
+
+static void hns3_selftest_restore(struct net_device *ndev, bool if_running)
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+
+ clear_bit(HNS3_NIC_STATE_TESTING, &priv->state);
+
+ if (h->ae_algo->ops->halt_autoneg)
+ h->ae_algo->ops->halt_autoneg(h, false);
+
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+ if (h->ae_algo->ops->enable_vlan_filter)
+ h->ae_algo->ops->enable_vlan_filter(h, true);
+#endif
+
+ if (if_running)
+ ndev->netdev_ops->ndo_open(ndev);
+
+ if (netif_msg_ifdown(h))
+ netdev_info(ndev, "self test end\n");
+}
+
+static void hns3_do_selftest(struct net_device *ndev, int (*st_param)[2],
+ struct ethtool_test *eth_test, u64 *data)
+{
+ int test_index = 0;
+ u32 i;
for (i = 0; i < HNS3_SELF_TEST_TYPE_NUM; i++) {
enum hnae3_loop loop_type = (enum hnae3_loop)st_param[i][0];
@@ -391,21 +393,32 @@ static void hns3_self_test(struct net_device *ndev,
test_index++;
}
+}
- clear_bit(HNS3_NIC_STATE_TESTING, &priv->state);
-
- if (h->ae_algo->ops->halt_autoneg)
- h->ae_algo->ops->halt_autoneg(h, false);
+/**
+ * hns3_nic_self_test - self test
+ * @ndev: net device
+ * @eth_test: test cmd
+ * @data: test result
+ */
+static void hns3_self_test(struct net_device *ndev,
+ struct ethtool_test *eth_test, u64 *data)
+{
+ int st_param[HNS3_SELF_TEST_TYPE_NUM][2];
+ bool if_running = netif_running(ndev);
-#if IS_ENABLED(CONFIG_VLAN_8021Q)
- if (h->ae_algo->ops->enable_vlan_filter)
- h->ae_algo->ops->enable_vlan_filter(h, true);
-#endif
+ if (hns3_nic_resetting(ndev)) {
+ netdev_err(ndev, "dev resetting!");
+ return;
+ }
- if (if_running)
- ndev->netdev_ops->ndo_open(ndev);
+ /* Only do offline selftest, or pass by default */
+ if (eth_test->flags != ETH_TEST_FL_OFFLINE)
+ return;
- netif_dbg(h, drv, ndev, "self test end\n");
+ hns3_selftest_prepare(ndev, if_running, st_param);
+ hns3_do_selftest(ndev, st_param, eth_test, data);
+ hns3_selftest_restore(ndev, if_running);
}
static void hns3_update_limit_promisc_mode(struct net_device *netdev,
@@ -953,6 +966,60 @@ static int hns3_get_rxnfc(struct net_device *netdev,
}
}
+static const struct hns3_reset_type_map hns3_reset_type[] = {
+ {ETH_RESET_MGMT, HNAE3_IMP_RESET},
+ {ETH_RESET_ALL, HNAE3_GLOBAL_RESET},
+ {ETH_RESET_DEDICATED, HNAE3_FUNC_RESET},
+};
+
+static const struct hns3_reset_type_map hns3vf_reset_type[] = {
+ {ETH_RESET_DEDICATED, HNAE3_VF_FUNC_RESET},
+};
+
+static int hns3_set_reset(struct net_device *netdev, u32 *flags)
+{
+ enum hnae3_reset_type rst_type = HNAE3_NONE_RESET;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
+ const struct hnae3_ae_ops *ops = h->ae_algo->ops;
+ const struct hns3_reset_type_map *rst_type_map;
+ u32 i, size;
+
+ if (ops->ae_dev_resetting && ops->ae_dev_resetting(h))
+ return -EBUSY;
+
+ if (!ops->set_default_reset_request || !ops->reset_event)
+ return -EOPNOTSUPP;
+
+ if (h->flags & HNAE3_SUPPORT_VF) {
+ rst_type_map = hns3vf_reset_type;
+ size = ARRAY_SIZE(hns3vf_reset_type);
+ } else {
+ rst_type_map = hns3_reset_type;
+ size = ARRAY_SIZE(hns3_reset_type);
+ }
+
+ for (i = 0; i < size; i++) {
+ if (rst_type_map[i].rst_flags == *flags) {
+ rst_type = rst_type_map[i].rst_type;
+ break;
+ }
+ }
+
+ if (rst_type == HNAE3_NONE_RESET ||
+ (rst_type == HNAE3_IMP_RESET &&
+ ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2))
+ return -EOPNOTSUPP;
+
+ netdev_info(netdev, "Setting reset type %d\n", rst_type);
+
+ ops->set_default_reset_request(ae_dev, rst_type);
+
+ ops->reset_event(h->pdev, h);
+
+ return 0;
+}
+
static void hns3_change_all_ring_bd_num(struct hns3_nic_priv *priv,
u32 tx_desc_num, u32 rx_desc_num)
{
@@ -1139,7 +1206,9 @@ static void hns3_get_channels(struct net_device *netdev,
}
static int hns3_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *cmd)
+ struct ethtool_coalesce *cmd,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
struct hns3_enet_coalesce *tx_coal = &priv->tx_coal;
@@ -1161,6 +1230,11 @@ static int hns3_get_coalesce(struct net_device *netdev,
cmd->tx_max_coalesced_frames = tx_coal->int_ql;
cmd->rx_max_coalesced_frames = rx_coal->int_ql;
+ kernel_coal->use_cqe_mode_tx = (priv->tx_cqe_mode ==
+ DIM_CQ_PERIOD_MODE_START_FROM_CQE);
+ kernel_coal->use_cqe_mode_rx = (priv->rx_cqe_mode ==
+ DIM_CQ_PERIOD_MODE_START_FROM_CQE);
+
return 0;
}
@@ -1321,13 +1395,17 @@ static void hns3_set_coalesce_per_queue(struct net_device *netdev,
}
static int hns3_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *cmd)
+ struct ethtool_coalesce *cmd,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct hnae3_handle *h = hns3_get_handle(netdev);
struct hns3_nic_priv *priv = netdev_priv(netdev);
struct hns3_enet_coalesce *tx_coal = &priv->tx_coal;
struct hns3_enet_coalesce *rx_coal = &priv->rx_coal;
u16 queue_num = h->kinfo.num_tqps;
+ enum dim_cq_period_mode tx_mode;
+ enum dim_cq_period_mode rx_mode;
int ret;
int i;
@@ -1353,6 +1431,14 @@ static int hns3_set_coalesce(struct net_device *netdev,
for (i = 0; i < queue_num; i++)
hns3_set_coalesce_per_queue(netdev, cmd, i);
+ tx_mode = kernel_coal->use_cqe_mode_tx ?
+ DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+ DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ rx_mode = kernel_coal->use_cqe_mode_rx ?
+ DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+ DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ hns3_cq_period_mode_init(priv, tx_mode, rx_mode);
+
return 0;
}
@@ -1658,7 +1744,8 @@ static int hns3_set_tunable(struct net_device *netdev,
ETHTOOL_COALESCE_USE_ADAPTIVE | \
ETHTOOL_COALESCE_RX_USECS_HIGH | \
ETHTOOL_COALESCE_TX_USECS_HIGH | \
- ETHTOOL_COALESCE_MAX_FRAMES)
+ ETHTOOL_COALESCE_MAX_FRAMES | \
+ ETHTOOL_COALESCE_USE_CQE)
static int hns3_get_ts_info(struct net_device *netdev,
struct ethtool_ts_info *info)
@@ -1671,6 +1758,71 @@ static int hns3_get_ts_info(struct net_device *netdev,
return ethtool_op_get_ts_info(netdev, info);
}
+static const struct hns3_ethtool_link_ext_state_mapping
+hns3_link_ext_state_map[] = {
+ {1, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD},
+ {2, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED},
+
+ {256, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT},
+ {257, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY},
+ {512, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT},
+
+ {513, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK},
+ {514, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED},
+ {515, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED},
+
+ {768, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY,
+ ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS},
+ {769, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY,
+ ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST},
+ {770, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY,
+ ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS},
+
+ {1024, ETHTOOL_LINK_EXT_STATE_NO_CABLE, 0},
+ {1025, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+ ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+
+ {1026, ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE, 0},
+};
+
+static int hns3_get_link_ext_state(struct net_device *netdev,
+ struct ethtool_link_ext_state_info *info)
+{
+ const struct hns3_ethtool_link_ext_state_mapping *map;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
+ u32 status_code, i;
+ int ret;
+
+ if (netif_carrier_ok(netdev))
+ return -ENODATA;
+
+ if (!h->ae_algo->ops->get_link_diagnosis_info)
+ return -EOPNOTSUPP;
+
+ ret = h->ae_algo->ops->get_link_diagnosis_info(h, &status_code);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < ARRAY_SIZE(hns3_link_ext_state_map); i++) {
+ map = &hns3_link_ext_state_map[i];
+ if (map->status_code == status_code) {
+ info->link_ext_state = map->link_ext_state;
+ info->__link_ext_substate = map->link_ext_substate;
+ return 0;
+ }
+ }
+
+ return -ENODATA;
+}
+
static const struct ethtool_ops hns3vf_ethtool_ops = {
.supported_coalesce_params = HNS3_ETHTOOL_COALESCE,
.get_drvinfo = hns3_get_drvinfo,
@@ -1699,6 +1851,7 @@ static const struct ethtool_ops hns3vf_ethtool_ops = {
.set_priv_flags = hns3_set_priv_flags,
.get_tunable = hns3_get_tunable,
.set_tunable = hns3_set_tunable,
+ .reset = hns3_set_reset,
};
static const struct ethtool_ops hns3_ethtool_ops = {
@@ -1740,6 +1893,8 @@ static const struct ethtool_ops hns3_ethtool_ops = {
.get_ts_info = hns3_get_ts_info,
.get_tunable = hns3_get_tunable,
.set_tunable = hns3_set_tunable,
+ .reset = hns3_set_reset,
+ .get_link_ext_state = hns3_get_link_ext_state,
};
void hns3_ethtool_set_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.h b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.h
new file mode 100644
index 000000000000..822d6fcbc73b
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+// Copyright (c) 2021 Hisilicon Limited.
+
+#ifndef __HNS3_ETHTOOL_H
+#define __HNS3_ETHTOOL_H
+
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+
+struct hns3_stats {
+ char stats_string[ETH_GSTRING_LEN];
+ int stats_offset;
+};
+
+struct hns3_sfp_type {
+ u8 type;
+ u8 ext_type;
+};
+
+struct hns3_pflag_desc {
+ char name[ETH_GSTRING_LEN];
+ void (*handler)(struct net_device *netdev, bool enable);
+};
+
+struct hns3_ethtool_link_ext_state_mapping {
+ u32 status_code;
+ enum ethtool_link_ext_state link_ext_state;
+ u8 link_ext_substate;
+};
+
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
index a685392dbfe9..d1bf5c4c0abb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
@@ -7,6 +7,6 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
ccflags-y += -I $(srctree)/$(src)
obj-$(CONFIG_HNS3_HCLGE) += hclge.o
-hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o hclge_debugfs.o hclge_ptp.o
+hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o hclge_debugfs.o hclge_ptp.o hclge_devlink.o
hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 887297e37cf3..474c6d1664e7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -169,17 +169,19 @@ static bool hclge_is_special_opcode(u16 opcode)
/* these commands have several descriptors,
* and use the first one to save opcode and return value
*/
- u16 spec_opcode[] = {HCLGE_OPC_STATS_64_BIT,
- HCLGE_OPC_STATS_32_BIT,
- HCLGE_OPC_STATS_MAC,
- HCLGE_OPC_STATS_MAC_ALL,
- HCLGE_OPC_QUERY_32_BIT_REG,
- HCLGE_OPC_QUERY_64_BIT_REG,
- HCLGE_QUERY_CLEAR_MPF_RAS_INT,
- HCLGE_QUERY_CLEAR_PF_RAS_INT,
- HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT,
- HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT,
- HCLGE_QUERY_ALL_ERR_INFO};
+ static const u16 spec_opcode[] = {
+ HCLGE_OPC_STATS_64_BIT,
+ HCLGE_OPC_STATS_32_BIT,
+ HCLGE_OPC_STATS_MAC,
+ HCLGE_OPC_STATS_MAC_ALL,
+ HCLGE_OPC_QUERY_32_BIT_REG,
+ HCLGE_OPC_QUERY_64_BIT_REG,
+ HCLGE_QUERY_CLEAR_MPF_RAS_INT,
+ HCLGE_QUERY_CLEAR_PF_RAS_INT,
+ HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT,
+ HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT,
+ HCLGE_QUERY_ALL_ERR_INFO
+ };
int i;
for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) {
@@ -360,41 +362,34 @@ static void hclge_set_default_capability(struct hclge_dev *hdev)
}
}
+const struct hclge_caps_bit_map hclge_cmd_caps_bit_map0[] = {
+ {HCLGE_CAP_UDP_GSO_B, HNAE3_DEV_SUPPORT_UDP_GSO_B},
+ {HCLGE_CAP_PTP_B, HNAE3_DEV_SUPPORT_PTP_B},
+ {HCLGE_CAP_INT_QL_B, HNAE3_DEV_SUPPORT_INT_QL_B},
+ {HCLGE_CAP_TQP_TXRX_INDEP_B, HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B},
+ {HCLGE_CAP_HW_TX_CSUM_B, HNAE3_DEV_SUPPORT_HW_TX_CSUM_B},
+ {HCLGE_CAP_UDP_TUNNEL_CSUM_B, HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B},
+ {HCLGE_CAP_FD_FORWARD_TC_B, HNAE3_DEV_SUPPORT_FD_FORWARD_TC_B},
+ {HCLGE_CAP_FEC_B, HNAE3_DEV_SUPPORT_FEC_B},
+ {HCLGE_CAP_PAUSE_B, HNAE3_DEV_SUPPORT_PAUSE_B},
+ {HCLGE_CAP_PHY_IMP_B, HNAE3_DEV_SUPPORT_PHY_IMP_B},
+ {HCLGE_CAP_RAS_IMP_B, HNAE3_DEV_SUPPORT_RAS_IMP_B},
+ {HCLGE_CAP_RXD_ADV_LAYOUT_B, HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B},
+ {HCLGE_CAP_PORT_VLAN_BYPASS_B, HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B},
+ {HCLGE_CAP_PORT_VLAN_BYPASS_B, HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B},
+};
+
static void hclge_parse_capability(struct hclge_dev *hdev,
struct hclge_query_version_cmd *cmd)
{
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
- u32 caps;
+ u32 caps, i;
caps = __le32_to_cpu(cmd->caps[0]);
- if (hnae3_get_bit(caps, HCLGE_CAP_UDP_GSO_B))
- set_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGE_CAP_PTP_B))
- set_bit(HNAE3_DEV_SUPPORT_PTP_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGE_CAP_INT_QL_B))
- set_bit(HNAE3_DEV_SUPPORT_INT_QL_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGE_CAP_TQP_TXRX_INDEP_B))
- set_bit(HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGE_CAP_HW_TX_CSUM_B))
- set_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGE_CAP_UDP_TUNNEL_CSUM_B))
- set_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGE_CAP_FD_FORWARD_TC_B))
- set_bit(HNAE3_DEV_SUPPORT_FD_FORWARD_TC_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGE_CAP_FEC_B))
- set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGE_CAP_PAUSE_B))
- set_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGE_CAP_PHY_IMP_B))
- set_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGE_CAP_RAS_IMP_B))
- set_bit(HNAE3_DEV_SUPPORT_RAS_IMP_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGE_CAP_RXD_ADV_LAYOUT_B))
- set_bit(HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGE_CAP_PORT_VLAN_BYPASS_B)) {
- set_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps);
- set_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps);
- }
+ for (i = 0; i < ARRAY_SIZE(hclge_cmd_caps_bit_map0); i++)
+ if (hnae3_get_bit(caps, hclge_cmd_caps_bit_map0[i].imp_bit))
+ set_bit(hclge_cmd_caps_bit_map0[i].local_bit,
+ ae_dev->caps);
}
static __le32 hclge_build_api_caps(void)
@@ -573,9 +568,13 @@ static void hclge_cmd_uninit_regs(struct hclge_hw *hw)
void hclge_cmd_uninit(struct hclge_dev *hdev)
{
+ set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
+ /* wait to ensure that the firmware completes the possible left
+ * over commands.
+ */
+ msleep(HCLGE_CMDQ_CLEAR_WAIT_TIME);
spin_lock_bh(&hdev->hw.cmq.csq.lock);
spin_lock(&hdev->hw.cmq.crq.lock);
- set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
hclge_cmd_uninit_regs(&hdev->hw);
spin_unlock(&hdev->hw.cmq.crq.lock);
spin_unlock_bh(&hdev->hw.cmq.csq.lock);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 18bde77ef944..33244472e0d0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -9,6 +9,7 @@
#include "hnae3.h"
#define HCLGE_CMDQ_TX_TIMEOUT 30000
+#define HCLGE_CMDQ_CLEAR_WAIT_TIME 200
#define HCLGE_DESC_DATA_LEN 6
struct hclge_dev;
@@ -270,6 +271,9 @@ enum hclge_opcode_type {
/* Led command */
HCLGE_OPC_LED_STATUS_CFG = 0xB000,
+ /* clear hardware resource command */
+ HCLGE_OPC_CLEAR_HW_RESOURCE = 0x700B,
+
/* NCL config command */
HCLGE_OPC_QUERY_NCL_CONFIG = 0x7011,
@@ -316,6 +320,9 @@ enum hclge_opcode_type {
/* PHY command */
HCLGE_OPC_PHY_LINK_KSETTING = 0x7025,
HCLGE_OPC_PHY_REG = 0x7026,
+
+ /* Query link diagnosis info command */
+ HCLGE_OPC_QUERY_LINK_DIAGNOSIS = 0x702A,
};
#define HCLGE_TQP_REG_OFFSET 0x80000
@@ -446,7 +453,7 @@ struct hclge_tc_thrd {
};
struct hclge_priv_buf {
- struct hclge_waterline wl; /* Waterline for low and high*/
+ struct hclge_waterline wl; /* Waterline for low and high */
u32 buf_size; /* TC private buffer size */
u32 tx_buf_size;
u32 enable; /* Enable TC private buffer or not */
@@ -1010,16 +1017,6 @@ struct hclge_common_lb_cmd {
#define HCLGE_TYPE_CRQ 0
#define HCLGE_TYPE_CSQ 1
-#define HCLGE_NIC_CSQ_BASEADDR_L_REG 0x27000
-#define HCLGE_NIC_CSQ_BASEADDR_H_REG 0x27004
-#define HCLGE_NIC_CSQ_DEPTH_REG 0x27008
-#define HCLGE_NIC_CSQ_TAIL_REG 0x27010
-#define HCLGE_NIC_CSQ_HEAD_REG 0x27014
-#define HCLGE_NIC_CRQ_BASEADDR_L_REG 0x27018
-#define HCLGE_NIC_CRQ_BASEADDR_H_REG 0x2701c
-#define HCLGE_NIC_CRQ_DEPTH_REG 0x27020
-#define HCLGE_NIC_CRQ_TAIL_REG 0x27024
-#define HCLGE_NIC_CRQ_HEAD_REG 0x27028
/* this bit indicates that the driver is ready for hardware reset */
#define HCLGE_NIC_SW_RST_RDY_B 16
@@ -1194,6 +1191,19 @@ struct hclge_dev_specs_1_cmd {
u8 rsv1[18];
};
+/* mac speed type defined in firmware command */
+enum HCLGE_FIRMWARE_MAC_SPEED {
+ HCLGE_FW_MAC_SPEED_1G,
+ HCLGE_FW_MAC_SPEED_10G,
+ HCLGE_FW_MAC_SPEED_25G,
+ HCLGE_FW_MAC_SPEED_40G,
+ HCLGE_FW_MAC_SPEED_50G,
+ HCLGE_FW_MAC_SPEED_100G,
+ HCLGE_FW_MAC_SPEED_10M,
+ HCLGE_FW_MAC_SPEED_100M,
+ HCLGE_FW_MAC_SPEED_200G,
+};
+
#define HCLGE_PHY_LINK_SETTING_BD_NUM 2
struct hclge_phy_link_ksetting_0_cmd {
@@ -1224,6 +1234,12 @@ struct hclge_phy_reg_cmd {
u8 rsv1[18];
};
+/* capabilities bits map between imp firmware and local driver */
+struct hclge_caps_bit_map {
+ u16 imp_bit;
+ u16 local_bit;
+};
+
int hclge_cmd_init(struct hclge_dev *hdev);
static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value)
{
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
index 5bf5db91d16c..4a619e5d3f35 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -104,26 +104,30 @@ static int hclge_dcb_common_validate(struct hclge_dev *hdev, u8 num_tc,
return 0;
}
-static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
- u8 *tc, bool *changed)
+static u8 hclge_ets_tc_changed(struct hclge_dev *hdev, struct ieee_ets *ets,
+ bool *changed)
{
- bool has_ets_tc = false;
- u32 total_ets_bw = 0;
- u8 max_tc = 0;
- int ret;
+ u8 max_tc_id = 0;
u8 i;
for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
if (ets->prio_tc[i] != hdev->tm_info.prio_tc[i])
*changed = true;
- if (ets->prio_tc[i] > max_tc)
- max_tc = ets->prio_tc[i];
+ if (ets->prio_tc[i] > max_tc_id)
+ max_tc_id = ets->prio_tc[i];
}
- ret = hclge_dcb_common_validate(hdev, max_tc + 1, ets->prio_tc);
- if (ret)
- return ret;
+ /* return max tc number, max tc id need to plus 1 */
+ return max_tc_id + 1;
+}
+
+static int hclge_ets_sch_mode_validate(struct hclge_dev *hdev,
+ struct ieee_ets *ets, bool *changed)
+{
+ bool has_ets_tc = false;
+ u32 total_ets_bw = 0;
+ u8 i;
for (i = 0; i < hdev->tc_max; i++) {
switch (ets->tc_tsa[i]) {
@@ -148,7 +152,26 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
if (has_ets_tc && total_ets_bw != BW_PERCENT)
return -EINVAL;
- *tc = max_tc + 1;
+ return 0;
+}
+
+static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
+ u8 *tc, bool *changed)
+{
+ u8 tc_num;
+ int ret;
+
+ tc_num = hclge_ets_tc_changed(hdev, ets, changed);
+
+ ret = hclge_dcb_common_validate(hdev, tc_num, ets->prio_tc);
+ if (ret)
+ return ret;
+
+ ret = hclge_ets_sch_mode_validate(hdev, ets, changed);
+ if (ret)
+ return ret;
+
+ *tc = tc_num;
if (*tc != hdev->tm_info.num_tc)
*changed = true;
@@ -234,9 +257,7 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
if (ret)
goto err_out;
- ret = hclge_notify_init_up(hdev);
- if (ret)
- return ret;
+ return hclge_notify_init_up(hdev);
}
return hclge_tm_dwrr_cfg(hdev);
@@ -255,21 +276,12 @@ static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
u64 requests[HNAE3_MAX_TC], indications[HNAE3_MAX_TC];
struct hclge_vport *vport = hclge_get_vport(h);
struct hclge_dev *hdev = vport->back;
- u8 i, j, pfc_map, *prio_tc;
int ret;
+ u8 i;
memset(pfc, 0, sizeof(*pfc));
pfc->pfc_cap = hdev->pfc_max;
- prio_tc = hdev->tm_info.prio_tc;
- pfc_map = hdev->tm_info.hw_pfc_map;
-
- /* Pfc setting is based on TC */
- for (i = 0; i < hdev->tm_info.num_tc; i++) {
- for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) {
- if ((prio_tc[j] == i) && (pfc_map & BIT(i)))
- pfc->pfc_en |= BIT(j);
- }
- }
+ pfc->pfc_en = hdev->tm_info.pfc_en;
ret = hclge_pfc_tx_stats_get(hdev, requests);
if (ret)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 288788186ecc..68ed1715ac52 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -926,26 +926,45 @@ static int hclge_dbg_dump_tm_nodes(struct hclge_dev *hdev, char *buf, int len)
return 0;
}
+static const struct hclge_dbg_item tm_pri_items[] = {
+ { "ID", 4 },
+ { "MODE", 2 },
+ { "DWRR", 2 },
+ { "C_IR_B", 2 },
+ { "C_IR_U", 2 },
+ { "C_IR_S", 2 },
+ { "C_BS_B", 2 },
+ { "C_BS_S", 2 },
+ { "C_FLAG", 2 },
+ { "C_RATE(Mbps)", 2 },
+ { "P_IR_B", 2 },
+ { "P_IR_U", 2 },
+ { "P_IR_S", 2 },
+ { "P_BS_B", 2 },
+ { "P_BS_S", 2 },
+ { "P_FLAG", 2 },
+ { "P_RATE(Mbps)", 0 }
+};
+
static int hclge_dbg_dump_tm_pri(struct hclge_dev *hdev, char *buf, int len)
{
- struct hclge_tm_shaper_para c_shaper_para;
- struct hclge_tm_shaper_para p_shaper_para;
- u8 pri_num, sch_mode, weight;
- char *sch_mode_str;
- int pos = 0;
- int ret;
- u8 i;
+ char data_str[ARRAY_SIZE(tm_pri_items)][HCLGE_DBG_DATA_STR_LEN];
+ struct hclge_tm_shaper_para c_shaper_para, p_shaper_para;
+ char *result[ARRAY_SIZE(tm_pri_items)], *sch_mode_str;
+ char content[HCLGE_DBG_TM_INFO_LEN];
+ u8 pri_num, sch_mode, weight, i, j;
+ int pos, ret;
ret = hclge_tm_get_pri_num(hdev, &pri_num);
if (ret)
return ret;
- pos += scnprintf(buf + pos, len - pos,
- "ID MODE DWRR C_IR_B C_IR_U C_IR_S C_BS_B ");
- pos += scnprintf(buf + pos, len - pos,
- "C_BS_S C_FLAG C_RATE(Mbps) P_IR_B P_IR_U ");
- pos += scnprintf(buf + pos, len - pos,
- "P_IR_S P_BS_B P_BS_S P_FLAG P_RATE(Mbps)\n");
+ for (i = 0; i < ARRAY_SIZE(tm_pri_items); i++)
+ result[i] = &data_str[i][0];
+
+ hclge_dbg_fill_content(content, sizeof(content), tm_pri_items,
+ NULL, ARRAY_SIZE(tm_pri_items));
+ pos = scnprintf(buf, len, "%s", content);
for (i = 0; i < pri_num; i++) {
ret = hclge_tm_get_pri_sch_mode(hdev, i, &sch_mode);
@@ -971,21 +990,16 @@ static int hclge_dbg_dump_tm_pri(struct hclge_dev *hdev, char *buf, int len)
sch_mode_str = sch_mode & HCLGE_TM_TX_SCHD_DWRR_MSK ? "dwrr" :
"sp";
- pos += scnprintf(buf + pos, len - pos,
- "%04u %4s %3u %3u %3u %3u ",
- i, sch_mode_str, weight, c_shaper_para.ir_b,
- c_shaper_para.ir_u, c_shaper_para.ir_s);
- pos += scnprintf(buf + pos, len - pos,
- "%3u %3u %1u %6u ",
- c_shaper_para.bs_b, c_shaper_para.bs_s,
- c_shaper_para.flag, c_shaper_para.rate);
- pos += scnprintf(buf + pos, len - pos,
- "%3u %3u %3u %3u %3u ",
- p_shaper_para.ir_b, p_shaper_para.ir_u,
- p_shaper_para.ir_s, p_shaper_para.bs_b,
- p_shaper_para.bs_s);
- pos += scnprintf(buf + pos, len - pos, "%1u %6u\n",
- p_shaper_para.flag, p_shaper_para.rate);
+ j = 0;
+ sprintf(result[j++], "%04u", i);
+ sprintf(result[j++], "%4s", sch_mode_str);
+ sprintf(result[j++], "%3u", weight);
+ hclge_dbg_fill_shaper_content(&c_shaper_para, result, &j);
+ hclge_dbg_fill_shaper_content(&p_shaper_para, result, &j);
+ hclge_dbg_fill_content(content, sizeof(content), tm_pri_items,
+ (const char **)result,
+ ARRAY_SIZE(tm_pri_items));
+ pos += scnprintf(buf + pos, len - pos, "%s", content);
}
return 0;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c
new file mode 100644
index 000000000000..e4aad695abcc
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (c) 2021 Hisilicon Limited. */
+
+#include <net/devlink.h>
+
+#include "hclge_devlink.h"
+
+static int hclge_devlink_info_get(struct devlink *devlink,
+ struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+#define HCLGE_DEVLINK_FW_STRING_LEN 32
+ struct hclge_devlink_priv *priv = devlink_priv(devlink);
+ char version_str[HCLGE_DEVLINK_FW_STRING_LEN];
+ struct hclge_dev *hdev = priv->hdev;
+ int ret;
+
+ ret = devlink_info_driver_name_put(req, KBUILD_MODNAME);
+ if (ret)
+ return ret;
+
+ snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu",
+ hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK,
+ HNAE3_FW_VERSION_BYTE3_SHIFT),
+ hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE2_MASK,
+ HNAE3_FW_VERSION_BYTE2_SHIFT),
+ hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE1_MASK,
+ HNAE3_FW_VERSION_BYTE1_SHIFT),
+ hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE0_MASK,
+ HNAE3_FW_VERSION_BYTE0_SHIFT));
+
+ return devlink_info_version_running_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_FW,
+ version_str);
+}
+
+static int hclge_devlink_reload_down(struct devlink *devlink, bool netns_change,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
+ struct netlink_ext_ack *extack)
+{
+ struct hclge_devlink_priv *priv = devlink_priv(devlink);
+ struct hclge_dev *hdev = priv->hdev;
+ struct hnae3_handle *h = &hdev->vport->nic;
+ struct pci_dev *pdev = hdev->pdev;
+ int ret;
+
+ if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) {
+ dev_err(&pdev->dev, "reset is handling\n");
+ return -EBUSY;
+ }
+
+ switch (action) {
+ case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+ rtnl_lock();
+ ret = hdev->nic_client->ops->reset_notify(h, HNAE3_DOWN_CLIENT);
+ if (ret) {
+ rtnl_unlock();
+ return ret;
+ }
+
+ ret = hdev->nic_client->ops->reset_notify(h,
+ HNAE3_UNINIT_CLIENT);
+ rtnl_unlock();
+ return ret;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int hclge_devlink_reload_up(struct devlink *devlink,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
+ u32 *actions_performed,
+ struct netlink_ext_ack *extack)
+{
+ struct hclge_devlink_priv *priv = devlink_priv(devlink);
+ struct hclge_dev *hdev = priv->hdev;
+ struct hnae3_handle *h = &hdev->vport->nic;
+ int ret;
+
+ *actions_performed = BIT(action);
+ switch (action) {
+ case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+ rtnl_lock();
+ ret = hdev->nic_client->ops->reset_notify(h, HNAE3_INIT_CLIENT);
+ if (ret) {
+ rtnl_unlock();
+ return ret;
+ }
+
+ ret = hdev->nic_client->ops->reset_notify(h, HNAE3_UP_CLIENT);
+ rtnl_unlock();
+ return ret;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static const struct devlink_ops hclge_devlink_ops = {
+ .info_get = hclge_devlink_info_get,
+ .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT),
+ .reload_down = hclge_devlink_reload_down,
+ .reload_up = hclge_devlink_reload_up,
+};
+
+int hclge_devlink_init(struct hclge_dev *hdev)
+{
+ struct pci_dev *pdev = hdev->pdev;
+ struct hclge_devlink_priv *priv;
+ struct devlink *devlink;
+ int ret;
+
+ devlink = devlink_alloc(&hclge_devlink_ops,
+ sizeof(struct hclge_devlink_priv), &pdev->dev);
+ if (!devlink)
+ return -ENOMEM;
+
+ priv = devlink_priv(devlink);
+ priv->hdev = hdev;
+ hdev->devlink = devlink;
+
+ ret = devlink_register(devlink);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to register devlink, ret = %d\n",
+ ret);
+ goto out_reg_fail;
+ }
+
+ devlink_reload_enable(devlink);
+
+ return 0;
+
+out_reg_fail:
+ devlink_free(devlink);
+ return ret;
+}
+
+void hclge_devlink_uninit(struct hclge_dev *hdev)
+{
+ struct devlink *devlink = hdev->devlink;
+
+ devlink_reload_disable(devlink);
+
+ devlink_unregister(devlink);
+
+ devlink_free(devlink);
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h
new file mode 100644
index 000000000000..918be04507a5
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (c) 2021 Hisilicon Limited. */
+
+#ifndef __HCLGE_DEVLINK_H
+#define __HCLGE_DEVLINK_H
+
+#include "hclge_main.h"
+
+struct hclge_devlink_priv {
+ struct hclge_dev *hdev;
+};
+
+int hclge_devlink_init(struct hclge_dev *hdev);
+void hclge_devlink_uninit(struct hclge_dev *hdev);
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index ec9a7f8bc3fe..718c16d686fa 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -4,468 +4,895 @@
#include "hclge_err.h"
static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = {
- { .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(17), .msg = "imp_itcm4_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(1),
+ .msg = "imp_itcm0_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(3),
+ .msg = "imp_itcm1_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(5),
+ .msg = "imp_itcm2_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(7),
+ .msg = "imp_itcm3_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(9),
+ .msg = "imp_dtcm0_mem0_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(11),
+ .msg = "imp_dtcm0_mem1_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(13),
+ .msg = "imp_dtcm1_mem0_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(15),
+ .msg = "imp_dtcm1_mem1_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(17),
+ .msg = "imp_itcm4_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = {
- { .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(17), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(19), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(21), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(23), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(25), .msg = "cmdq_rocee_rx_head_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(27), .msg = "cmdq_rocee_tx_head_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(29), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(31), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(1),
+ .msg = "cmdq_nic_rx_depth_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(3),
+ .msg = "cmdq_nic_tx_depth_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(5),
+ .msg = "cmdq_nic_rx_tail_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(7),
+ .msg = "cmdq_nic_tx_tail_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(9),
+ .msg = "cmdq_nic_rx_head_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(11),
+ .msg = "cmdq_nic_tx_head_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(13),
+ .msg = "cmdq_nic_rx_addr_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(15),
+ .msg = "cmdq_nic_tx_addr_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(17),
+ .msg = "cmdq_rocee_rx_depth_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(19),
+ .msg = "cmdq_rocee_tx_depth_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(21),
+ .msg = "cmdq_rocee_rx_tail_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(23),
+ .msg = "cmdq_rocee_tx_tail_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(25),
+ .msg = "cmdq_rocee_rx_head_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(27),
+ .msg = "cmdq_rocee_tx_head_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(29),
+ .msg = "cmdq_rocee_rx_addr_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(31),
+ .msg = "cmdq_rocee_tx_addr_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = {
- { .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(9), .msg = "tqp_int_ctrl_odd_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(10), .msg = "tx_que_scan_int_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(11), .msg = "rx_que_scan_int_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(6),
+ .msg = "tqp_int_cfg_even_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(7),
+ .msg = "tqp_int_cfg_odd_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(8),
+ .msg = "tqp_int_ctrl_even_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(9),
+ .msg = "tqp_int_ctrl_odd_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(10),
+ .msg = "tx_que_scan_int_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(11),
+ .msg = "rx_que_scan_int_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = {
- { .int_msk = BIT(1), .msg = "msix_nic_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(3), .msg = "msix_rocee_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(1),
+ .msg = "msix_nic_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(3),
+ .msg = "msix_rocee_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_igu_int[] = {
- { .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(0),
+ .msg = "igu_rx_buf0_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(2),
+ .msg = "igu_rx_buf1_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = {
- { .int_msk = BIT(0), .msg = "rx_buf_overflow",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(2), .msg = "rx_stp_fifo_underflow",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(3), .msg = "tx_buf_overflow",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(4), .msg = "tx_buf_underrun",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(5), .msg = "rx_stp_buf_overflow",
- .reset_level = HNAE3_GLOBAL_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(0),
+ .msg = "rx_buf_overflow",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(1),
+ .msg = "rx_stp_fifo_overflow",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(2),
+ .msg = "rx_stp_fifo_underflow",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(3),
+ .msg = "tx_buf_overflow",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(4),
+ .msg = "tx_buf_underrun",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(5),
+ .msg = "rx_stp_buf_overflow",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_ncsi_err_int[] = {
- { .int_msk = BIT(1), .msg = "ncsi_tx_ecc_mbit_err",
- .reset_level = HNAE3_NONE_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(1),
+ .msg = "ncsi_tx_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = {
- { .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(10), .msg = "rss_idt_mem2_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(11), .msg = "rss_idt_mem3_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(12), .msg = "rss_idt_mem4_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(13), .msg = "rss_idt_mem5_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(14), .msg = "rss_idt_mem6_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(15), .msg = "rss_idt_mem7_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(16), .msg = "rss_idt_mem8_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(17), .msg = "rss_idt_mem9_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(18), .msg = "rss_idt_mem10_ecc_m1bit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(19), .msg = "rss_idt_mem11_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(20), .msg = "rss_idt_mem12_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(21), .msg = "rss_idt_mem13_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(22), .msg = "rss_idt_mem14_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(23), .msg = "rss_idt_mem15_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(24), .msg = "port_vlan_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(25), .msg = "mcast_linear_table_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(26), .msg = "mcast_result_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(27), .msg = "flow_director_ad_mem0_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(28), .msg = "flow_director_ad_mem1_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(29), .msg = "rx_vlan_tag_memory_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(30), .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(0),
+ .msg = "vf_vlan_ad_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(1),
+ .msg = "umv_mcast_group_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(2),
+ .msg = "umv_key_mem0_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(3),
+ .msg = "umv_key_mem1_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(4),
+ .msg = "umv_key_mem2_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(5),
+ .msg = "umv_key_mem3_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(6),
+ .msg = "umv_ad_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(7),
+ .msg = "rss_tc_mode_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(8),
+ .msg = "rss_idt_mem0_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(9),
+ .msg = "rss_idt_mem1_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(10),
+ .msg = "rss_idt_mem2_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(11),
+ .msg = "rss_idt_mem3_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(12),
+ .msg = "rss_idt_mem4_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(13),
+ .msg = "rss_idt_mem5_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(14),
+ .msg = "rss_idt_mem6_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(15),
+ .msg = "rss_idt_mem7_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(16),
+ .msg = "rss_idt_mem8_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(17),
+ .msg = "rss_idt_mem9_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(18),
+ .msg = "rss_idt_mem10_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(19),
+ .msg = "rss_idt_mem11_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(20),
+ .msg = "rss_idt_mem12_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(21),
+ .msg = "rss_idt_mem13_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(22),
+ .msg = "rss_idt_mem14_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(23),
+ .msg = "rss_idt_mem15_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(24),
+ .msg = "port_vlan_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(25),
+ .msg = "mcast_linear_table_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(26),
+ .msg = "mcast_result_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(27),
+ .msg = "flow_director_ad_mem0_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(28),
+ .msg = "flow_director_ad_mem1_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(29),
+ .msg = "rx_vlan_tag_memory_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(30),
+ .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_ppp_pf_abnormal_int[] = {
- { .int_msk = BIT(0), .msg = "tx_vlan_tag_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(1), .msg = "rss_list_tc_unassigned_queue_err",
- .reset_level = HNAE3_NONE_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(0),
+ .msg = "tx_vlan_tag_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(1),
+ .msg = "rss_list_tc_unassigned_queue_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = {
- { .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(3), .msg = "FD_CN0_memory_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(4), .msg = "FD_CN1_memory_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(5), .msg = "GRO_AD_memory_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(0),
+ .msg = "hfs_fifo_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(1),
+ .msg = "rslt_descr_fifo_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(2),
+ .msg = "tx_vlan_tag_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(3),
+ .msg = "FD_CN0_memory_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(4),
+ .msg = "FD_CN1_memory_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(5),
+ .msg = "GRO_AD_memory_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_tm_sch_rint[] = {
- { .int_msk = BIT(1), .msg = "tm_sch_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(12), .msg = "tm_sch_port_shap_offset_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(13), .msg = "tm_sch_port_shap_offset_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(14), .msg = "tm_sch_pg_pshap_offset_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(15), .msg = "tm_sch_pg_pshap_offset_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(16), .msg = "tm_sch_pg_cshap_offset_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(17), .msg = "tm_sch_pg_cshap_offset_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(18), .msg = "tm_sch_pri_pshap_offset_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(19), .msg = "tm_sch_pri_pshap_offset_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(20), .msg = "tm_sch_pri_cshap_offset_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(21), .msg = "tm_sch_pri_cshap_offset_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(1),
+ .msg = "tm_sch_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(2),
+ .msg = "tm_sch_port_shap_sub_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(3),
+ .msg = "tm_sch_port_shap_sub_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(4),
+ .msg = "tm_sch_pg_pshap_sub_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(5),
+ .msg = "tm_sch_pg_pshap_sub_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(6),
+ .msg = "tm_sch_pg_cshap_sub_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(7),
+ .msg = "tm_sch_pg_cshap_sub_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(8),
+ .msg = "tm_sch_pri_pshap_sub_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(9),
+ .msg = "tm_sch_pri_pshap_sub_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(10),
+ .msg = "tm_sch_pri_cshap_sub_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(11),
+ .msg = "tm_sch_pri_cshap_sub_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(12),
+ .msg = "tm_sch_port_shap_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(13),
+ .msg = "tm_sch_port_shap_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(14),
+ .msg = "tm_sch_pg_pshap_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(15),
+ .msg = "tm_sch_pg_pshap_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(16),
+ .msg = "tm_sch_pg_cshap_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(17),
+ .msg = "tm_sch_pg_cshap_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(18),
+ .msg = "tm_sch_pri_pshap_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(19),
+ .msg = "tm_sch_pri_pshap_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(20),
+ .msg = "tm_sch_pri_cshap_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(21),
+ .msg = "tm_sch_pri_cshap_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(22),
+ .msg = "tm_sch_rq_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(23),
+ .msg = "tm_sch_rq_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(24),
+ .msg = "tm_sch_nq_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(25),
+ .msg = "tm_sch_nq_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(26),
+ .msg = "tm_sch_roce_up_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(27),
+ .msg = "tm_sch_roce_up_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(28),
+ .msg = "tm_sch_rcb_byte_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(29),
+ .msg = "tm_sch_rcb_byte_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(30),
+ .msg = "tm_sch_ssu_byte_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(31),
+ .msg = "tm_sch_ssu_byte_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_qcn_fifo_rint[] = {
- { .int_msk = BIT(0), .msg = "qcn_shap_gp0_sch_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(1), .msg = "qcn_shap_gp0_sch_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(2), .msg = "qcn_shap_gp1_sch_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(3), .msg = "qcn_shap_gp1_sch_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(4), .msg = "qcn_shap_gp2_sch_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(5), .msg = "qcn_shap_gp2_sch_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(6), .msg = "qcn_shap_gp3_sch_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(7), .msg = "qcn_shap_gp3_sch_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(8), .msg = "qcn_shap_gp0_offset_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(9), .msg = "qcn_shap_gp0_offset_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(10), .msg = "qcn_shap_gp1_offset_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(11), .msg = "qcn_shap_gp1_offset_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(12), .msg = "qcn_shap_gp2_offset_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(13), .msg = "qcn_shap_gp2_offset_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(14), .msg = "qcn_shap_gp3_offset_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(15), .msg = "qcn_shap_gp3_offset_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(16), .msg = "qcn_byte_info_fifo_rd_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(17), .msg = "qcn_byte_info_fifo_wr_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(0),
+ .msg = "qcn_shap_gp0_sch_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(1),
+ .msg = "qcn_shap_gp0_sch_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(2),
+ .msg = "qcn_shap_gp1_sch_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(3),
+ .msg = "qcn_shap_gp1_sch_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(4),
+ .msg = "qcn_shap_gp2_sch_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(5),
+ .msg = "qcn_shap_gp2_sch_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(6),
+ .msg = "qcn_shap_gp3_sch_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(7),
+ .msg = "qcn_shap_gp3_sch_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(8),
+ .msg = "qcn_shap_gp0_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(9),
+ .msg = "qcn_shap_gp0_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(10),
+ .msg = "qcn_shap_gp1_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(11),
+ .msg = "qcn_shap_gp1_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(12),
+ .msg = "qcn_shap_gp2_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(13),
+ .msg = "qcn_shap_gp2_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(14),
+ .msg = "qcn_shap_gp3_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(15),
+ .msg = "qcn_shap_gp3_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(16),
+ .msg = "qcn_byte_info_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(17),
+ .msg = "qcn_byte_info_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_qcn_ecc_rint[] = {
- { .int_msk = BIT(1), .msg = "qcn_byte_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(3), .msg = "qcn_time_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(5), .msg = "qcn_fb_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(7), .msg = "qcn_link_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(9), .msg = "qcn_rate_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(11), .msg = "qcn_tmplt_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(13), .msg = "qcn_shap_cfg_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(15), .msg = "qcn_gp0_barrel_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(17), .msg = "qcn_gp1_barrel_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(19), .msg = "qcn_gp2_barrel_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(21), .msg = "qcn_gp3_barral_mem_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(1),
+ .msg = "qcn_byte_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(3),
+ .msg = "qcn_time_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(5),
+ .msg = "qcn_fb_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(7),
+ .msg = "qcn_link_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(9),
+ .msg = "qcn_rate_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(11),
+ .msg = "qcn_tmplt_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(13),
+ .msg = "qcn_shap_cfg_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(15),
+ .msg = "qcn_gp0_barrel_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(17),
+ .msg = "qcn_gp1_barrel_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(19),
+ .msg = "qcn_gp2_barrel_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(21),
+ .msg = "qcn_gp3_barral_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = {
- { .int_msk = BIT(0), .msg = "egu_cge_afifo_ecc_1bit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(1), .msg = "egu_cge_afifo_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(2), .msg = "egu_lge_afifo_ecc_1bit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(3), .msg = "egu_lge_afifo_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(4), .msg = "cge_igu_afifo_ecc_1bit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(5), .msg = "cge_igu_afifo_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(6), .msg = "lge_igu_afifo_ecc_1bit_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(7), .msg = "lge_igu_afifo_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(8), .msg = "cge_igu_afifo_overflow_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(9), .msg = "lge_igu_afifo_overflow_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(10), .msg = "egu_cge_afifo_underrun_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(11), .msg = "egu_lge_afifo_underrun_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(12), .msg = "egu_ge_afifo_underrun_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(13), .msg = "ge_igu_afifo_overflow_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(0),
+ .msg = "egu_cge_afifo_ecc_1bit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(1),
+ .msg = "egu_cge_afifo_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(2),
+ .msg = "egu_lge_afifo_ecc_1bit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(3),
+ .msg = "egu_lge_afifo_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(4),
+ .msg = "cge_igu_afifo_ecc_1bit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(5),
+ .msg = "cge_igu_afifo_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(6),
+ .msg = "lge_igu_afifo_ecc_1bit_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(7),
+ .msg = "lge_igu_afifo_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(8),
+ .msg = "cge_igu_afifo_overflow_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(9),
+ .msg = "lge_igu_afifo_overflow_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(10),
+ .msg = "egu_cge_afifo_underrun_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(11),
+ .msg = "egu_lge_afifo_underrun_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(12),
+ .msg = "egu_ge_afifo_underrun_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(13),
+ .msg = "ge_igu_afifo_overflow_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = {
- { .int_msk = BIT(13), .msg = "rpu_rx_pkt_bit32_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(14), .msg = "rpu_rx_pkt_bit33_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(15), .msg = "rpu_rx_pkt_bit34_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(16), .msg = "rpu_rx_pkt_bit35_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(17), .msg = "rcb_tx_ring_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(18), .msg = "rcb_rx_ring_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(19), .msg = "rcb_tx_fbd_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(20), .msg = "rcb_rx_ebd_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(21), .msg = "rcb_tso_info_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(22), .msg = "rcb_tx_int_info_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(23), .msg = "rcb_rx_int_info_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(24), .msg = "tpu_tx_pkt_0_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(25), .msg = "tpu_tx_pkt_1_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(26), .msg = "rd_bus_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(27), .msg = "wr_bus_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(28), .msg = "reg_search_miss",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(29), .msg = "rx_q_search_miss",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(30), .msg = "ooo_ecc_err_detect",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(31), .msg = "ooo_ecc_err_multpl",
- .reset_level = HNAE3_GLOBAL_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(13),
+ .msg = "rpu_rx_pkt_bit32_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(14),
+ .msg = "rpu_rx_pkt_bit33_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(15),
+ .msg = "rpu_rx_pkt_bit34_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(16),
+ .msg = "rpu_rx_pkt_bit35_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(17),
+ .msg = "rcb_tx_ring_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(18),
+ .msg = "rcb_rx_ring_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(19),
+ .msg = "rcb_tx_fbd_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(20),
+ .msg = "rcb_rx_ebd_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(21),
+ .msg = "rcb_tso_info_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(22),
+ .msg = "rcb_tx_int_info_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(23),
+ .msg = "rcb_rx_int_info_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(24),
+ .msg = "tpu_tx_pkt_0_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(25),
+ .msg = "tpu_tx_pkt_1_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(26),
+ .msg = "rd_bus_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(27),
+ .msg = "wr_bus_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(28),
+ .msg = "reg_search_miss",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(29),
+ .msg = "rx_q_search_miss",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(30),
+ .msg = "ooo_ecc_err_detect",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(31),
+ .msg = "ooo_ecc_err_multpl",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = {
- { .int_msk = BIT(4), .msg = "gro_bd_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(5), .msg = "gro_context_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(6), .msg = "rx_stash_cfg_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(7), .msg = "axi_rd_fbd_ecc_mbit_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(4),
+ .msg = "gro_bd_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(5),
+ .msg = "gro_context_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(6),
+ .msg = "rx_stash_cfg_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(7),
+ .msg = "axi_rd_fbd_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_ppu_pf_abnormal_int[] = {
- { .int_msk = BIT(0), .msg = "over_8bd_no_fe",
- .reset_level = HNAE3_FUNC_RESET },
- { .int_msk = BIT(1), .msg = "tso_mss_cmp_min_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(2), .msg = "tso_mss_cmp_max_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(3), .msg = "tx_rd_fbd_poison",
- .reset_level = HNAE3_FUNC_RESET },
- { .int_msk = BIT(4), .msg = "rx_rd_ebd_poison",
- .reset_level = HNAE3_FUNC_RESET },
- { .int_msk = BIT(5), .msg = "buf_wait_timeout",
- .reset_level = HNAE3_NONE_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(0),
+ .msg = "over_8bd_no_fe",
+ .reset_level = HNAE3_FUNC_RESET
+ }, {
+ .int_msk = BIT(1),
+ .msg = "tso_mss_cmp_min_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(2),
+ .msg = "tso_mss_cmp_max_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(3),
+ .msg = "tx_rd_fbd_poison",
+ .reset_level = HNAE3_FUNC_RESET
+ }, {
+ .int_msk = BIT(4),
+ .msg = "rx_rd_ebd_poison",
+ .reset_level = HNAE3_FUNC_RESET
+ }, {
+ .int_msk = BIT(5),
+ .msg = "buf_wait_timeout",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_ssu_com_err_int[] = {
- { .int_msk = BIT(0), .msg = "buf_sum_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(1), .msg = "ppp_mb_num_err",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(2), .msg = "ppp_mbid_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(3), .msg = "ppp_rlt_mac_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(4), .msg = "ppp_rlt_host_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(5), .msg = "cks_edit_position_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(6), .msg = "cks_edit_condition_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(7), .msg = "vlan_edit_condition_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(8), .msg = "vlan_num_ot_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(9), .msg = "vlan_num_in_err",
- .reset_level = HNAE3_GLOBAL_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(0),
+ .msg = "buf_sum_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(1),
+ .msg = "ppp_mb_num_err",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(2),
+ .msg = "ppp_mbid_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(3),
+ .msg = "ppp_rlt_mac_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(4),
+ .msg = "ppp_rlt_host_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(5),
+ .msg = "cks_edit_position_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(6),
+ .msg = "cks_edit_condition_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(7),
+ .msg = "vlan_edit_condition_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(8),
+ .msg = "vlan_num_ot_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(9),
+ .msg = "vlan_num_in_err",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ /* sentinel */
+ }
};
#define HCLGE_SSU_MEM_ECC_ERR(x) \
- { .int_msk = BIT(x), .msg = "ssu_mem" #x "_ecc_mbit_err", \
- .reset_level = HNAE3_GLOBAL_RESET }
+{ \
+ .int_msk = BIT(x), \
+ .msg = "ssu_mem" #x "_ecc_mbit_err", \
+ .reset_level = HNAE3_GLOBAL_RESET \
+}
static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = {
HCLGE_SSU_MEM_ECC_ERR(0),
@@ -504,131 +931,269 @@ static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = {
};
static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = {
- { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port",
- .reset_level = HNAE3_FUNC_RESET },
- { .int_msk = BIT(1), .msg = "tpu_pkt_without_key_port",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(2), .msg = "igu_pkt_without_key_port",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(3), .msg = "roc_eof_mis_match_port",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(4), .msg = "tpu_eof_mis_match_port",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(5), .msg = "igu_eof_mis_match_port",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(6), .msg = "roc_sof_mis_match_port",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(7), .msg = "tpu_sof_mis_match_port",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(8), .msg = "igu_sof_mis_match_port",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(11), .msg = "ets_rd_int_rx_port",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(12), .msg = "ets_wr_int_rx_port",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(13), .msg = "ets_rd_int_tx_port",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(14), .msg = "ets_wr_int_tx_port",
- .reset_level = HNAE3_GLOBAL_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(0),
+ .msg = "roc_pkt_without_key_port",
+ .reset_level = HNAE3_FUNC_RESET
+ }, {
+ .int_msk = BIT(1),
+ .msg = "tpu_pkt_without_key_port",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(2),
+ .msg = "igu_pkt_without_key_port",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(3),
+ .msg = "roc_eof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(4),
+ .msg = "tpu_eof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(5),
+ .msg = "igu_eof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(6),
+ .msg = "roc_sof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(7),
+ .msg = "tpu_sof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(8),
+ .msg = "igu_sof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(11),
+ .msg = "ets_rd_int_rx_port",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(12),
+ .msg = "ets_wr_int_rx_port",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(13),
+ .msg = "ets_rd_int_tx_port",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(14),
+ .msg = "ets_wr_int_tx_port",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_ssu_fifo_overflow_int[] = {
- { .int_msk = BIT(0), .msg = "ig_mac_inf_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(1), .msg = "ig_host_inf_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(2), .msg = "ig_roc_buf_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(3), .msg = "ig_host_data_fifo_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(4), .msg = "ig_host_key_fifo_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(5), .msg = "tx_qcn_fifo_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(6), .msg = "rx_qcn_fifo_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(7), .msg = "tx_pf_rd_fifo_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(8), .msg = "rx_pf_rd_fifo_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(9), .msg = "qm_eof_fifo_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(10), .msg = "mb_rlt_fifo_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(11), .msg = "dup_uncopy_fifo_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(12), .msg = "dup_cnt_rd_fifo_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(13), .msg = "dup_cnt_drop_fifo_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(14), .msg = "dup_cnt_wrb_fifo_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(15), .msg = "host_cmd_fifo_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(16), .msg = "mac_cmd_fifo_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(17), .msg = "host_cmd_bitmap_empty_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(18), .msg = "mac_cmd_bitmap_empty_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(19), .msg = "dup_bitmap_empty_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(20), .msg = "out_queue_bitmap_empty_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(21), .msg = "bank2_bitmap_empty_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(22), .msg = "bank1_bitmap_empty_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(23), .msg = "bank0_bitmap_empty_int",
- .reset_level = HNAE3_GLOBAL_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(0),
+ .msg = "ig_mac_inf_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(1),
+ .msg = "ig_host_inf_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(2),
+ .msg = "ig_roc_buf_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(3),
+ .msg = "ig_host_data_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(4),
+ .msg = "ig_host_key_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(5),
+ .msg = "tx_qcn_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(6),
+ .msg = "rx_qcn_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(7),
+ .msg = "tx_pf_rd_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(8),
+ .msg = "rx_pf_rd_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(9),
+ .msg = "qm_eof_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(10),
+ .msg = "mb_rlt_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(11),
+ .msg = "dup_uncopy_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(12),
+ .msg = "dup_cnt_rd_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(13),
+ .msg = "dup_cnt_drop_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(14),
+ .msg = "dup_cnt_wrb_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(15),
+ .msg = "host_cmd_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(16),
+ .msg = "mac_cmd_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(17),
+ .msg = "host_cmd_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(18),
+ .msg = "mac_cmd_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(19),
+ .msg = "dup_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(20),
+ .msg = "out_queue_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(21),
+ .msg = "bank2_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(22),
+ .msg = "bank1_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(23),
+ .msg = "bank0_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = {
- { .int_msk = BIT(0), .msg = "ets_rd_int_rx_tcg",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(1), .msg = "ets_wr_int_rx_tcg",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(2), .msg = "ets_rd_int_tx_tcg",
- .reset_level = HNAE3_GLOBAL_RESET },
- { .int_msk = BIT(3), .msg = "ets_wr_int_tx_tcg",
- .reset_level = HNAE3_GLOBAL_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(0),
+ .msg = "ets_rd_int_rx_tcg",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(1),
+ .msg = "ets_wr_int_rx_tcg",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(2),
+ .msg = "ets_rd_int_tx_tcg",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ .int_msk = BIT(3),
+ .msg = "ets_wr_int_tx_tcg",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = {
- { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port",
- .reset_level = HNAE3_FUNC_RESET },
- { .int_msk = BIT(9), .msg = "low_water_line_err_port",
- .reset_level = HNAE3_NONE_RESET },
- { .int_msk = BIT(10), .msg = "hi_water_line_err_port",
- .reset_level = HNAE3_GLOBAL_RESET },
- { /* sentinel */ }
+ {
+ .int_msk = BIT(0),
+ .msg = "roc_pkt_without_key_port",
+ .reset_level = HNAE3_FUNC_RESET
+ }, {
+ .int_msk = BIT(9),
+ .msg = "low_water_line_err_port",
+ .reset_level = HNAE3_NONE_RESET
+ }, {
+ .int_msk = BIT(10),
+ .msg = "hi_water_line_err_port",
+ .reset_level = HNAE3_GLOBAL_RESET
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = {
- { .int_msk = 0, .msg = "rocee qmm ovf: sgid invalid err" },
- { .int_msk = 0x4, .msg = "rocee qmm ovf: sgid ovf err" },
- { .int_msk = 0x8, .msg = "rocee qmm ovf: smac invalid err" },
- { .int_msk = 0xC, .msg = "rocee qmm ovf: smac ovf err" },
- { .int_msk = 0x10, .msg = "rocee qmm ovf: cqc invalid err" },
- { .int_msk = 0x11, .msg = "rocee qmm ovf: cqc ovf err" },
- { .int_msk = 0x12, .msg = "rocee qmm ovf: cqc hopnum err" },
- { .int_msk = 0x13, .msg = "rocee qmm ovf: cqc ba0 err" },
- { .int_msk = 0x14, .msg = "rocee qmm ovf: srqc invalid err" },
- { .int_msk = 0x15, .msg = "rocee qmm ovf: srqc ovf err" },
- { .int_msk = 0x16, .msg = "rocee qmm ovf: srqc hopnum err" },
- { .int_msk = 0x17, .msg = "rocee qmm ovf: srqc ba0 err" },
- { .int_msk = 0x18, .msg = "rocee qmm ovf: mpt invalid err" },
- { .int_msk = 0x19, .msg = "rocee qmm ovf: mpt ovf err" },
- { .int_msk = 0x1A, .msg = "rocee qmm ovf: mpt hopnum err" },
- { .int_msk = 0x1B, .msg = "rocee qmm ovf: mpt ba0 err" },
- { .int_msk = 0x1C, .msg = "rocee qmm ovf: qpc invalid err" },
- { .int_msk = 0x1D, .msg = "rocee qmm ovf: qpc ovf err" },
- { .int_msk = 0x1E, .msg = "rocee qmm ovf: qpc hopnum err" },
- { .int_msk = 0x1F, .msg = "rocee qmm ovf: qpc ba0 err" },
- { /* sentinel */ }
+ {
+ .int_msk = 0,
+ .msg = "rocee qmm ovf: sgid invalid err"
+ }, {
+ .int_msk = 0x4,
+ .msg = "rocee qmm ovf: sgid ovf err"
+ }, {
+ .int_msk = 0x8,
+ .msg = "rocee qmm ovf: smac invalid err"
+ }, {
+ .int_msk = 0xC,
+ .msg = "rocee qmm ovf: smac ovf err"
+ }, {
+ .int_msk = 0x10,
+ .msg = "rocee qmm ovf: cqc invalid err"
+ }, {
+ .int_msk = 0x11,
+ .msg = "rocee qmm ovf: cqc ovf err"
+ }, {
+ .int_msk = 0x12,
+ .msg = "rocee qmm ovf: cqc hopnum err"
+ }, {
+ .int_msk = 0x13,
+ .msg = "rocee qmm ovf: cqc ba0 err"
+ }, {
+ .int_msk = 0x14,
+ .msg = "rocee qmm ovf: srqc invalid err"
+ }, {
+ .int_msk = 0x15,
+ .msg = "rocee qmm ovf: srqc ovf err"
+ }, {
+ .int_msk = 0x16,
+ .msg = "rocee qmm ovf: srqc hopnum err"
+ }, {
+ .int_msk = 0x17,
+ .msg = "rocee qmm ovf: srqc ba0 err"
+ }, {
+ .int_msk = 0x18,
+ .msg = "rocee qmm ovf: mpt invalid err"
+ }, {
+ .int_msk = 0x19,
+ .msg = "rocee qmm ovf: mpt ovf err"
+ }, {
+ .int_msk = 0x1A,
+ .msg = "rocee qmm ovf: mpt hopnum err"
+ }, {
+ .int_msk = 0x1B,
+ .msg = "rocee qmm ovf: mpt ba0 err"
+ }, {
+ .int_msk = 0x1C,
+ .msg = "rocee qmm ovf: qpc invalid err"
+ }, {
+ .int_msk = 0x1D,
+ .msg = "rocee qmm ovf: qpc ovf err"
+ }, {
+ .int_msk = 0x1E,
+ .msg = "rocee qmm ovf: qpc hopnum err"
+ }, {
+ .int_msk = 0x1F,
+ .msg = "rocee qmm ovf: qpc ba0 err"
+ }, {
+ /* sentinel */
+ }
};
static const struct hclge_hw_module_id hclge_hw_module_id_st[] = {
@@ -1709,34 +2274,36 @@ static void hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev)
static const struct hclge_hw_blk hw_blk[] = {
{
- .msk = BIT(0), .name = "IGU_EGU",
- .config_err_int = hclge_config_igu_egu_hw_err_int,
- },
- {
- .msk = BIT(1), .name = "PPP",
- .config_err_int = hclge_config_ppp_hw_err_int,
- },
- {
- .msk = BIT(2), .name = "SSU",
- .config_err_int = hclge_config_ssu_hw_err_int,
- },
- {
- .msk = BIT(3), .name = "PPU",
- .config_err_int = hclge_config_ppu_hw_err_int,
- },
- {
- .msk = BIT(4), .name = "TM",
- .config_err_int = hclge_config_tm_hw_err_int,
- },
- {
- .msk = BIT(5), .name = "COMMON",
- .config_err_int = hclge_config_common_hw_err_int,
- },
- {
- .msk = BIT(8), .name = "MAC",
- .config_err_int = hclge_config_mac_err_int,
- },
- { /* sentinel */ }
+ .msk = BIT(0),
+ .name = "IGU_EGU",
+ .config_err_int = hclge_config_igu_egu_hw_err_int,
+ }, {
+ .msk = BIT(1),
+ .name = "PPP",
+ .config_err_int = hclge_config_ppp_hw_err_int,
+ }, {
+ .msk = BIT(2),
+ .name = "SSU",
+ .config_err_int = hclge_config_ssu_hw_err_int,
+ }, {
+ .msk = BIT(3),
+ .name = "PPU",
+ .config_err_int = hclge_config_ppu_hw_err_int,
+ }, {
+ .msk = BIT(4),
+ .name = "TM",
+ .config_err_int = hclge_config_tm_hw_err_int,
+ }, {
+ .msk = BIT(5),
+ .name = "COMMON",
+ .config_err_int = hclge_config_common_hw_err_int,
+ }, {
+ .msk = BIT(8),
+ .name = "MAC",
+ .config_err_int = hclge_config_mac_err_int,
+ }, {
+ /* sentinel */
+ }
};
static void hclge_config_all_msix_error(struct hclge_dev *hdev, bool enable)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index ebeaf12e409b..e55ba2e511b1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -23,6 +23,7 @@
#include "hclge_tm.h"
#include "hclge_err.h"
#include "hnae3.h"
+#include "hclge_devlink.h"
#define HCLGE_NAME "hclge"
#define HCLGE_STATS_READ(p, offset) (*(u64 *)((u8 *)(p) + (offset)))
@@ -91,23 +92,23 @@ static const struct pci_device_id ae_algo_pci_tbl[] = {
MODULE_DEVICE_TABLE(pci, ae_algo_pci_tbl);
-static const u32 cmdq_reg_addr_list[] = {HCLGE_CMDQ_TX_ADDR_L_REG,
- HCLGE_CMDQ_TX_ADDR_H_REG,
- HCLGE_CMDQ_TX_DEPTH_REG,
- HCLGE_CMDQ_TX_TAIL_REG,
- HCLGE_CMDQ_TX_HEAD_REG,
- HCLGE_CMDQ_RX_ADDR_L_REG,
- HCLGE_CMDQ_RX_ADDR_H_REG,
- HCLGE_CMDQ_RX_DEPTH_REG,
- HCLGE_CMDQ_RX_TAIL_REG,
- HCLGE_CMDQ_RX_HEAD_REG,
+static const u32 cmdq_reg_addr_list[] = {HCLGE_NIC_CSQ_BASEADDR_L_REG,
+ HCLGE_NIC_CSQ_BASEADDR_H_REG,
+ HCLGE_NIC_CSQ_DEPTH_REG,
+ HCLGE_NIC_CSQ_TAIL_REG,
+ HCLGE_NIC_CSQ_HEAD_REG,
+ HCLGE_NIC_CRQ_BASEADDR_L_REG,
+ HCLGE_NIC_CRQ_BASEADDR_H_REG,
+ HCLGE_NIC_CRQ_DEPTH_REG,
+ HCLGE_NIC_CRQ_TAIL_REG,
+ HCLGE_NIC_CRQ_HEAD_REG,
HCLGE_VECTOR0_CMDQ_SRC_REG,
HCLGE_CMDQ_INTR_STS_REG,
HCLGE_CMDQ_INTR_EN_REG,
HCLGE_CMDQ_INTR_GEN_REG};
static const u32 common_reg_addr_list[] = {HCLGE_MISC_VECTOR_REG_BASE,
- HCLGE_VECTOR0_OTER_EN_REG,
+ HCLGE_PF_OTHER_INT_REG,
HCLGE_MISC_RESET_STS_REG,
HCLGE_MISC_VECTOR_INT_STS,
HCLGE_GLOBAL_RESET_REG,
@@ -374,14 +375,14 @@ static const enum hclge_opcode_type hclge_dfx_reg_opcode_list[] = {
};
static const struct key_info meta_data_key_info[] = {
- { PACKET_TYPE_ID, 6},
- { IP_FRAGEMENT, 1},
- { ROCE_TYPE, 1},
- { NEXT_KEY, 5},
- { VLAN_NUMBER, 2},
- { SRC_VPORT, 12},
- { DST_VPORT, 12},
- { TUNNEL_PACKET, 1},
+ { PACKET_TYPE_ID, 6 },
+ { IP_FRAGEMENT, 1 },
+ { ROCE_TYPE, 1 },
+ { NEXT_KEY, 5 },
+ { VLAN_NUMBER, 2 },
+ { SRC_VPORT, 12 },
+ { DST_VPORT, 12 },
+ { TUNNEL_PACKET, 1 },
};
static const struct key_info tuple_key_info[] = {
@@ -748,9 +749,9 @@ static void hclge_update_stats(struct hnae3_handle *handle,
static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset)
{
-#define HCLGE_LOOPBACK_TEST_FLAGS (HNAE3_SUPPORT_APP_LOOPBACK |\
- HNAE3_SUPPORT_PHY_LOOPBACK |\
- HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK |\
+#define HCLGE_LOOPBACK_TEST_FLAGS (HNAE3_SUPPORT_APP_LOOPBACK | \
+ HNAE3_SUPPORT_PHY_LOOPBACK | \
+ HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK | \
HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK)
struct hclge_vport *vport = hclge_get_vport(handle);
@@ -958,31 +959,31 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
static int hclge_parse_speed(u8 speed_cmd, u32 *speed)
{
switch (speed_cmd) {
- case 6:
+ case HCLGE_FW_MAC_SPEED_10M:
*speed = HCLGE_MAC_SPEED_10M;
break;
- case 7:
+ case HCLGE_FW_MAC_SPEED_100M:
*speed = HCLGE_MAC_SPEED_100M;
break;
- case 0:
+ case HCLGE_FW_MAC_SPEED_1G:
*speed = HCLGE_MAC_SPEED_1G;
break;
- case 1:
+ case HCLGE_FW_MAC_SPEED_10G:
*speed = HCLGE_MAC_SPEED_10G;
break;
- case 2:
+ case HCLGE_FW_MAC_SPEED_25G:
*speed = HCLGE_MAC_SPEED_25G;
break;
- case 3:
+ case HCLGE_FW_MAC_SPEED_40G:
*speed = HCLGE_MAC_SPEED_40G;
break;
- case 4:
+ case HCLGE_FW_MAC_SPEED_50G:
*speed = HCLGE_MAC_SPEED_50G;
break;
- case 5:
+ case HCLGE_FW_MAC_SPEED_100G:
*speed = HCLGE_MAC_SPEED_100G;
break;
- case 8:
+ case HCLGE_FW_MAC_SPEED_200G:
*speed = HCLGE_MAC_SPEED_200G;
break;
default:
@@ -992,44 +993,43 @@ static int hclge_parse_speed(u8 speed_cmd, u32 *speed)
return 0;
}
+static const struct hclge_speed_bit_map speed_bit_map[] = {
+ {HCLGE_MAC_SPEED_10M, HCLGE_SUPPORT_10M_BIT},
+ {HCLGE_MAC_SPEED_100M, HCLGE_SUPPORT_100M_BIT},
+ {HCLGE_MAC_SPEED_1G, HCLGE_SUPPORT_1G_BIT},
+ {HCLGE_MAC_SPEED_10G, HCLGE_SUPPORT_10G_BIT},
+ {HCLGE_MAC_SPEED_25G, HCLGE_SUPPORT_25G_BIT},
+ {HCLGE_MAC_SPEED_40G, HCLGE_SUPPORT_40G_BIT},
+ {HCLGE_MAC_SPEED_50G, HCLGE_SUPPORT_50G_BIT},
+ {HCLGE_MAC_SPEED_100G, HCLGE_SUPPORT_100G_BIT},
+ {HCLGE_MAC_SPEED_200G, HCLGE_SUPPORT_200G_BIT},
+};
+
+static int hclge_get_speed_bit(u32 speed, u32 *speed_bit)
+{
+ u16 i;
+
+ for (i = 0; i < ARRAY_SIZE(speed_bit_map); i++) {
+ if (speed == speed_bit_map[i].speed) {
+ *speed_bit = speed_bit_map[i].speed_bit;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
static int hclge_check_port_speed(struct hnae3_handle *handle, u32 speed)
{
struct hclge_vport *vport = hclge_get_vport(handle);
struct hclge_dev *hdev = vport->back;
u32 speed_ability = hdev->hw.mac.speed_ability;
u32 speed_bit = 0;
+ int ret;
- switch (speed) {
- case HCLGE_MAC_SPEED_10M:
- speed_bit = HCLGE_SUPPORT_10M_BIT;
- break;
- case HCLGE_MAC_SPEED_100M:
- speed_bit = HCLGE_SUPPORT_100M_BIT;
- break;
- case HCLGE_MAC_SPEED_1G:
- speed_bit = HCLGE_SUPPORT_1G_BIT;
- break;
- case HCLGE_MAC_SPEED_10G:
- speed_bit = HCLGE_SUPPORT_10G_BIT;
- break;
- case HCLGE_MAC_SPEED_25G:
- speed_bit = HCLGE_SUPPORT_25G_BIT;
- break;
- case HCLGE_MAC_SPEED_40G:
- speed_bit = HCLGE_SUPPORT_40G_BIT;
- break;
- case HCLGE_MAC_SPEED_50G:
- speed_bit = HCLGE_SUPPORT_50G_BIT;
- break;
- case HCLGE_MAC_SPEED_100G:
- speed_bit = HCLGE_SUPPORT_100G_BIT;
- break;
- case HCLGE_MAC_SPEED_200G:
- speed_bit = HCLGE_SUPPORT_200G_BIT;
- break;
- default:
- return -EINVAL;
- }
+ ret = hclge_get_speed_bit(speed, &speed_bit);
+ if (ret)
+ return ret;
if (speed_bit & speed_ability)
return 0;
@@ -1550,6 +1550,7 @@ static int hclge_configure(struct hclge_dev *hdev)
hdev->tm_info.hw_pfc_map = 0;
hdev->wanted_umv_size = cfg.umv_space;
hdev->tx_spare_buf_size = cfg.tx_spare_buf_size;
+ hdev->gro_en = true;
if (cfg.vlan_fliter_cap == HCLGE_VLAN_FLTR_CAN_MDF)
set_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps);
@@ -1618,7 +1619,7 @@ static int hclge_config_tso(struct hclge_dev *hdev, u16 tso_mss_min,
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
-static int hclge_config_gro(struct hclge_dev *hdev, bool en)
+static int hclge_config_gro(struct hclge_dev *hdev)
{
struct hclge_cfg_gro_status_cmd *req;
struct hclge_desc desc;
@@ -1630,7 +1631,7 @@ static int hclge_config_gro(struct hclge_dev *hdev, bool en)
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_GRO_GENERIC_CONFIG, false);
req = (struct hclge_cfg_gro_status_cmd *)desc.data;
- req->gro_en = en ? 1 : 0;
+ req->gro_en = hdev->gro_en ? 1 : 0;
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
if (ret)
@@ -1813,6 +1814,7 @@ static int hclge_vport_setup(struct hclge_vport *vport, u16 num_tqps)
nic->pdev = hdev->pdev;
nic->ae_algo = &ae_algo;
nic->numa_node_mask = hdev->numa_node_mask;
+ nic->kinfo.io_base = hdev->hw.io_base;
ret = hclge_knic_setup(vport, num_tqps,
hdev->num_tx_desc, hdev->num_rx_desc);
@@ -2579,39 +2581,39 @@ static int hclge_cfg_mac_speed_dup_hw(struct hclge_dev *hdev, int speed,
switch (speed) {
case HCLGE_MAC_SPEED_10M:
hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
- HCLGE_CFG_SPEED_S, 6);
+ HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_10M);
break;
case HCLGE_MAC_SPEED_100M:
hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
- HCLGE_CFG_SPEED_S, 7);
+ HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_100M);
break;
case HCLGE_MAC_SPEED_1G:
hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
- HCLGE_CFG_SPEED_S, 0);
+ HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_1G);
break;
case HCLGE_MAC_SPEED_10G:
hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
- HCLGE_CFG_SPEED_S, 1);
+ HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_10G);
break;
case HCLGE_MAC_SPEED_25G:
hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
- HCLGE_CFG_SPEED_S, 2);
+ HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_25G);
break;
case HCLGE_MAC_SPEED_40G:
hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
- HCLGE_CFG_SPEED_S, 3);
+ HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_40G);
break;
case HCLGE_MAC_SPEED_50G:
hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
- HCLGE_CFG_SPEED_S, 4);
+ HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_50G);
break;
case HCLGE_MAC_SPEED_100G:
hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
- HCLGE_CFG_SPEED_S, 5);
+ HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_100G);
break;
case HCLGE_MAC_SPEED_200G:
hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
- HCLGE_CFG_SPEED_S, 8);
+ HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_200G);
break;
default:
dev_err(&hdev->pdev->dev, "invalid speed (%d)\n", speed);
@@ -2952,12 +2954,12 @@ static void hclge_update_link_status(struct hclge_dev *hdev)
}
if (state != hdev->hw.mac.link) {
+ hdev->hw.mac.link = state;
client->ops->link_status_change(handle, state);
hclge_config_mac_tnl_int(hdev, state);
if (rclient && rclient->ops->link_status_change)
rclient->ops->link_status_change(rhandle, state);
- hdev->hw.mac.link = state;
hclge_push_link_status(hdev);
}
@@ -3419,7 +3421,7 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
hclge_enable_vector(&hdev->misc_vector, false);
event_cause = hclge_check_event_cause(hdev, &clearval);
- /* vector 0 interrupt is shared with reset and mailbox source events.*/
+ /* vector 0 interrupt is shared with reset and mailbox source events. */
switch (event_cause) {
case HCLGE_VECTOR0_EVENT_ERR:
hclge_errhand_task_schedule(hdev);
@@ -3788,6 +3790,12 @@ static void hclge_do_reset(struct hclge_dev *hdev)
}
switch (hdev->reset_type) {
+ case HNAE3_IMP_RESET:
+ dev_info(&pdev->dev, "IMP reset requested\n");
+ val = hclge_read_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG);
+ hnae3_set_bit(val, HCLGE_TRIGGER_IMP_RESET_B, 1);
+ hclge_write_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG, val);
+ break;
case HNAE3_GLOBAL_RESET:
dev_info(&pdev->dev, "global reset requested\n");
val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG);
@@ -5936,7 +5944,7 @@ static int hclge_config_key(struct hclge_dev *hdev, u8 stage,
cur_key_x = key_x;
cur_key_y = key_y;
- for (i = 0 ; i < MAX_TUPLE; i++) {
+ for (i = 0; i < MAX_TUPLE; i++) {
bool tuple_valid;
tuple_size = tuple_key_info[i].key_length / 8;
@@ -10073,7 +10081,11 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
static void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
bool writen_to_tbl)
{
- struct hclge_vport_vlan_cfg *vlan;
+ struct hclge_vport_vlan_cfg *vlan, *tmp;
+
+ list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node)
+ if (vlan->vlan_id == vlan_id)
+ return;
vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
if (!vlan)
@@ -11443,6 +11455,28 @@ static void hclge_clear_resetting_state(struct hclge_dev *hdev)
}
}
+static int hclge_clear_hw_resource(struct hclge_dev *hdev)
+{
+ struct hclge_desc desc;
+ int ret;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CLEAR_HW_RESOURCE, false);
+
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ /* This new command is only supported by new firmware, it will
+ * fail with older firmware. Error value -EOPNOSUPP can only be
+ * returned by older firmware running this command, to keep code
+ * backward compatible we will override this value and return
+ * success.
+ */
+ if (ret && ret != -EOPNOTSUPP) {
+ dev_err(&hdev->pdev->dev,
+ "failed to clear hw resource, ret = %d\n", ret);
+ return ret;
+ }
+ return 0;
+}
+
static void hclge_init_rxd_adv_layout(struct hclge_dev *hdev)
{
if (hnae3_ae_dev_rxd_adv_layout_supported(hdev->ae_dev))
@@ -11482,16 +11516,24 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
if (ret)
goto out;
+ ret = hclge_devlink_init(hdev);
+ if (ret)
+ goto err_pci_uninit;
+
/* Firmware command queue initialize */
ret = hclge_cmd_queue_init(hdev);
if (ret)
- goto err_pci_uninit;
+ goto err_devlink_uninit;
/* Firmware command initialize */
ret = hclge_cmd_init(hdev);
if (ret)
goto err_cmd_uninit;
+ ret = hclge_clear_hw_resource(hdev);
+ if (ret)
+ goto err_cmd_uninit;
+
ret = hclge_get_cap(hdev);
if (ret)
goto err_cmd_uninit;
@@ -11556,7 +11598,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
goto err_mdiobus_unreg;
}
- ret = hclge_config_gro(hdev, true);
+ ret = hclge_config_gro(hdev);
if (ret)
goto err_mdiobus_unreg;
@@ -11658,6 +11700,8 @@ err_msi_uninit:
pci_free_irq_vectors(pdev);
err_cmd_uninit:
hclge_cmd_uninit(hdev);
+err_devlink_uninit:
+ hclge_devlink_uninit(hdev);
err_pci_uninit:
pcim_iounmap(pdev, hdev->hw.io_base);
pci_clear_master(pdev);
@@ -11937,7 +11981,7 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
return ret;
}
- ret = hclge_config_gro(hdev, true);
+ ret = hclge_config_gro(hdev);
if (ret)
return ret;
@@ -12048,6 +12092,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
hclge_cmd_uninit(hdev);
hclge_misc_irq_uninit(hdev);
+ hclge_devlink_uninit(hdev);
hclge_pci_uninit(hdev);
mutex_destroy(&hdev->vport_lock);
hclge_uninit_vport_vlan_table(hdev);
@@ -12671,8 +12716,15 @@ static int hclge_gro_en(struct hnae3_handle *handle, bool enable)
{
struct hclge_vport *vport = hclge_get_vport(handle);
struct hclge_dev *hdev = vport->back;
+ bool gro_en_old = hdev->gro_en;
+ int ret;
+
+ hdev->gro_en = enable;
+ ret = hclge_config_gro(hdev);
+ if (ret)
+ hdev->gro_en = gro_en_old;
- return hclge_config_gro(hdev, enable);
+ return ret;
}
static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
@@ -12829,6 +12881,29 @@ static int hclge_get_module_eeprom(struct hnae3_handle *handle, u32 offset,
return 0;
}
+static int hclge_get_link_diagnosis_info(struct hnae3_handle *handle,
+ u32 *status_code)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+ struct hclge_desc desc;
+ int ret;
+
+ if (hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2)
+ return -EOPNOTSUPP;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_LINK_DIAGNOSIS, true);
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "failed to query link diagnosis info, ret = %d\n", ret);
+ return ret;
+ }
+
+ *status_code = le32_to_cpu(desc.data[0]);
+ return 0;
+}
+
static const struct hnae3_ae_ops hclge_ops = {
.init_ae_dev = hclge_init_ae_dev,
.uninit_ae_dev = hclge_uninit_ae_dev,
@@ -12929,6 +13004,7 @@ static const struct hnae3_ae_ops hclge_ops = {
.set_tx_hwts_info = hclge_ptp_set_tx_info,
.get_rx_hwts = hclge_ptp_get_rx_hwts,
.get_ts_info = hclge_ptp_get_ts_info,
+ .get_link_diagnosis_info = hclge_get_link_diagnosis_info,
};
static struct hnae3_ae_algo ae_algo = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 3d3352491dba..de6afbcbfbac 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -8,6 +8,7 @@
#include <linux/phy.h>
#include <linux/if_vlan.h>
#include <linux/kfifo.h>
+#include <net/devlink.h>
#include "hclge_cmd.h"
#include "hclge_ptp.h"
@@ -37,22 +38,22 @@
#define HCLGE_VECTOR_REG_OFFSET_H 0x1000
#define HCLGE_VECTOR_VF_OFFSET 0x100000
-#define HCLGE_CMDQ_TX_ADDR_L_REG 0x27000
-#define HCLGE_CMDQ_TX_ADDR_H_REG 0x27004
-#define HCLGE_CMDQ_TX_DEPTH_REG 0x27008
-#define HCLGE_CMDQ_TX_TAIL_REG 0x27010
-#define HCLGE_CMDQ_TX_HEAD_REG 0x27014
-#define HCLGE_CMDQ_RX_ADDR_L_REG 0x27018
-#define HCLGE_CMDQ_RX_ADDR_H_REG 0x2701C
-#define HCLGE_CMDQ_RX_DEPTH_REG 0x27020
-#define HCLGE_CMDQ_RX_TAIL_REG 0x27024
-#define HCLGE_CMDQ_RX_HEAD_REG 0x27028
+#define HCLGE_NIC_CSQ_BASEADDR_L_REG 0x27000
+#define HCLGE_NIC_CSQ_BASEADDR_H_REG 0x27004
+#define HCLGE_NIC_CSQ_DEPTH_REG 0x27008
+#define HCLGE_NIC_CSQ_TAIL_REG 0x27010
+#define HCLGE_NIC_CSQ_HEAD_REG 0x27014
+#define HCLGE_NIC_CRQ_BASEADDR_L_REG 0x27018
+#define HCLGE_NIC_CRQ_BASEADDR_H_REG 0x2701C
+#define HCLGE_NIC_CRQ_DEPTH_REG 0x27020
+#define HCLGE_NIC_CRQ_TAIL_REG 0x27024
+#define HCLGE_NIC_CRQ_HEAD_REG 0x27028
+
#define HCLGE_CMDQ_INTR_STS_REG 0x27104
#define HCLGE_CMDQ_INTR_EN_REG 0x27108
#define HCLGE_CMDQ_INTR_GEN_REG 0x2710C
/* bar registers for common func */
-#define HCLGE_VECTOR0_OTER_EN_REG 0x20600
#define HCLGE_GRO_EN_REG 0x28000
#define HCLGE_RXD_ADV_LAYOUT_EN_REG 0x28008
@@ -193,6 +194,7 @@ enum HLCGE_PORT_TYPE {
#define HCLGE_VECTOR0_IMP_CMDQ_ERR_B 4U
#define HCLGE_VECTOR0_IMP_RD_POISON_B 5U
#define HCLGE_VECTOR0_ALL_MSIX_ERR_B 6U
+#define HCLGE_TRIGGER_IMP_RESET_B 7U
#define HCLGE_MAC_DEFAULT_FRAME \
(ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN + ETH_DATA_LEN)
@@ -927,6 +929,7 @@ struct hclge_dev {
unsigned long fd_bmap[BITS_TO_LONGS(MAX_FD_FILTER_NUM)];
enum HCLGE_FD_ACTIVE_RULE_TYPE fd_active_type;
u8 fd_en;
+ bool gro_en;
u16 wanted_umv_size;
/* max available unicast mac vlan space */
@@ -943,6 +946,7 @@ struct hclge_dev {
cpumask_t affinity_mask;
struct irq_affinity_notify affinity_notify;
struct hclge_ptp *ptp;
+ struct devlink *devlink;
};
/* VPort level vlan tag configuration for TX direction */
@@ -1054,6 +1058,11 @@ struct hclge_vport {
struct list_head vlan_list; /* Store VF vlan table */
};
+struct hclge_speed_bit_map {
+ u32 speed;
+ u32 speed_bit;
+};
+
int hclge_set_vport_promisc_mode(struct hclge_vport *vport, bool en_uc_pmc,
bool en_mc_pmc, bool en_bc_pmc);
int hclge_add_uc_addr_common(struct hclge_vport *vport,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index c0a478ae9583..2ce5302c5956 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -10,7 +10,14 @@
static u16 hclge_errno_to_resp(int errno)
{
- return abs(errno);
+ int resp = abs(errno);
+
+ /* The status for pf to vf msg cmd is u16, constrainted by HW.
+ * We need to keep the same type with it.
+ * The intput errno is the stander error code, it's safely to
+ * use a u16 to store the abs(errno).
+ */
+ return (u16)resp;
}
/* hclge_gen_resp_to_vf: used to generate a synchronous response to VF when PF
@@ -66,6 +73,8 @@ static int hclge_gen_resp_to_vf(struct hclge_vport *vport,
memcpy(resp_pf_to_vf->msg.resp_data, resp_msg->data,
resp_msg->len);
+ trace_hclge_pf_mbx_send(hdev, resp_pf_to_vf);
+
status = hclge_cmd_send(&hdev->hw, &desc, 1);
if (status)
dev_err(&hdev->pdev->dev,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h
index dbf5f4c08019..7a9b77de632a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h
@@ -127,7 +127,7 @@ static inline struct hclge_dev *hclge_ptp_get_hdev(struct ptp_clock_info *info)
}
bool hclge_ptp_set_tx_info(struct hnae3_handle *handle, struct sk_buff *skb);
-void hclge_ptp_clean_tx_hwts(struct hclge_dev *dev);
+void hclge_ptp_clean_tx_hwts(struct hclge_dev *hdev);
void hclge_ptp_get_rx_hwts(struct hnae3_handle *handle, struct sk_buff *skb,
u32 nsec, u32 sec);
int hclge_ptp_get_cfg(struct hclge_dev *hdev, struct ifreq *ifr);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
index 2c26ea607a53..51ff7d86ee90 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
@@ -7,4 +7,4 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
ccflags-y += -I $(srctree)/$(src)
obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o
-hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o
+hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o hclgevf_devlink.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index bd19a2d89f6c..59772b0e9531 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -71,7 +71,7 @@ static bool hclgevf_cmd_csq_done(struct hclgevf_hw *hw)
static bool hclgevf_is_special_opcode(u16 opcode)
{
- static const u16 spec_opcode[] = {0x30, 0x31, 0x32};
+ const u16 spec_opcode[] = {0x30, 0x31, 0x32};
int i;
for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) {
@@ -342,25 +342,26 @@ static void hclgevf_set_default_capability(struct hclgevf_dev *hdev)
set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
}
+const struct hclgevf_caps_bit_map hclgevf_cmd_caps_bit_map0[] = {
+ {HCLGEVF_CAP_UDP_GSO_B, HNAE3_DEV_SUPPORT_UDP_GSO_B},
+ {HCLGEVF_CAP_INT_QL_B, HNAE3_DEV_SUPPORT_INT_QL_B},
+ {HCLGEVF_CAP_TQP_TXRX_INDEP_B, HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B},
+ {HCLGEVF_CAP_HW_TX_CSUM_B, HNAE3_DEV_SUPPORT_HW_TX_CSUM_B},
+ {HCLGEVF_CAP_UDP_TUNNEL_CSUM_B, HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B},
+ {HCLGEVF_CAP_RXD_ADV_LAYOUT_B, HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B},
+};
+
static void hclgevf_parse_capability(struct hclgevf_dev *hdev,
struct hclgevf_query_version_cmd *cmd)
{
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
- u32 caps;
+ u32 caps, i;
caps = __le32_to_cpu(cmd->caps[0]);
- if (hnae3_get_bit(caps, HCLGEVF_CAP_UDP_GSO_B))
- set_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGEVF_CAP_INT_QL_B))
- set_bit(HNAE3_DEV_SUPPORT_INT_QL_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGEVF_CAP_TQP_TXRX_INDEP_B))
- set_bit(HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGEVF_CAP_HW_TX_CSUM_B))
- set_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGEVF_CAP_UDP_TUNNEL_CSUM_B))
- set_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, ae_dev->caps);
- if (hnae3_get_bit(caps, HCLGEVF_CAP_RXD_ADV_LAYOUT_B))
- set_bit(HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B, ae_dev->caps);
+ for (i = 0; i < ARRAY_SIZE(hclgevf_cmd_caps_bit_map0); i++)
+ if (hnae3_get_bit(caps, hclgevf_cmd_caps_bit_map0[i].imp_bit))
+ set_bit(hclgevf_cmd_caps_bit_map0[i].local_bit,
+ ae_dev->caps);
}
static __le32 hclgevf_build_api_caps(void)
@@ -507,12 +508,17 @@ static void hclgevf_cmd_uninit_regs(struct hclgevf_hw *hw)
void hclgevf_cmd_uninit(struct hclgevf_dev *hdev)
{
+ set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
+ /* wait to ensure that the firmware completes the possible left
+ * over commands.
+ */
+ msleep(HCLGEVF_CMDQ_CLEAR_WAIT_TIME);
spin_lock_bh(&hdev->hw.cmq.csq.lock);
spin_lock(&hdev->hw.cmq.crq.lock);
- set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
hclgevf_cmd_uninit_regs(&hdev->hw);
spin_unlock(&hdev->hw.cmq.crq.lock);
spin_unlock_bh(&hdev->hw.cmq.csq.lock);
+
hclgevf_free_cmd_desc(&hdev->hw.cmq.csq);
hclgevf_free_cmd_desc(&hdev->hw.cmq.crq);
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
index 202feb70dba5..39d0b589c720 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
@@ -8,6 +8,7 @@
#include "hnae3.h"
#define HCLGEVF_CMDQ_TX_TIMEOUT 30000
+#define HCLGEVF_CMDQ_CLEAR_WAIT_TIME 200
#define HCLGEVF_CMDQ_RX_INVLD_B 0
#define HCLGEVF_CMDQ_RX_OUTVLD_B 1
@@ -265,16 +266,6 @@ struct hclgevf_cfg_tx_queue_pointer_cmd {
#define HCLGEVF_TYPE_CRQ 0
#define HCLGEVF_TYPE_CSQ 1
-#define HCLGEVF_NIC_CSQ_BASEADDR_L_REG 0x27000
-#define HCLGEVF_NIC_CSQ_BASEADDR_H_REG 0x27004
-#define HCLGEVF_NIC_CSQ_DEPTH_REG 0x27008
-#define HCLGEVF_NIC_CSQ_TAIL_REG 0x27010
-#define HCLGEVF_NIC_CSQ_HEAD_REG 0x27014
-#define HCLGEVF_NIC_CRQ_BASEADDR_L_REG 0x27018
-#define HCLGEVF_NIC_CRQ_BASEADDR_H_REG 0x2701c
-#define HCLGEVF_NIC_CRQ_DEPTH_REG 0x27020
-#define HCLGEVF_NIC_CRQ_TAIL_REG 0x27024
-#define HCLGEVF_NIC_CRQ_HEAD_REG 0x27028
/* this bit indicates that the driver is ready for hardware reset */
#define HCLGEVF_NIC_SW_RST_RDY_B 16
@@ -305,6 +296,12 @@ struct hclgevf_dev_specs_1_cmd {
u8 rsv1[18];
};
+/* capabilities bits map between imp firmware and local driver */
+struct hclgevf_caps_bit_map {
+ u16 imp_bit;
+ u16 local_bit;
+};
+
static inline void hclgevf_write_reg(void __iomem *base, u32 reg, u32 value)
{
writel(value, base + reg);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c
new file mode 100644
index 000000000000..f478770299c6
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (c) 2021 Hisilicon Limited. */
+
+#include <net/devlink.h>
+
+#include "hclgevf_devlink.h"
+
+static int hclgevf_devlink_info_get(struct devlink *devlink,
+ struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+#define HCLGEVF_DEVLINK_FW_STRING_LEN 32
+ struct hclgevf_devlink_priv *priv = devlink_priv(devlink);
+ char version_str[HCLGEVF_DEVLINK_FW_STRING_LEN];
+ struct hclgevf_dev *hdev = priv->hdev;
+ int ret;
+
+ ret = devlink_info_driver_name_put(req, KBUILD_MODNAME);
+ if (ret)
+ return ret;
+
+ snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu",
+ hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK,
+ HNAE3_FW_VERSION_BYTE3_SHIFT),
+ hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE2_MASK,
+ HNAE3_FW_VERSION_BYTE2_SHIFT),
+ hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE1_MASK,
+ HNAE3_FW_VERSION_BYTE1_SHIFT),
+ hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE0_MASK,
+ HNAE3_FW_VERSION_BYTE0_SHIFT));
+
+ return devlink_info_version_running_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_FW,
+ version_str);
+}
+
+static int hclgevf_devlink_reload_down(struct devlink *devlink,
+ bool netns_change,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
+ struct netlink_ext_ack *extack)
+{
+ struct hclgevf_devlink_priv *priv = devlink_priv(devlink);
+ struct hclgevf_dev *hdev = priv->hdev;
+ struct hnae3_handle *h = &hdev->nic;
+ struct pci_dev *pdev = hdev->pdev;
+ int ret;
+
+ if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) {
+ dev_err(&pdev->dev, "reset is handling\n");
+ return -EBUSY;
+ }
+
+ switch (action) {
+ case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+ rtnl_lock();
+ ret = hdev->nic_client->ops->reset_notify(h, HNAE3_DOWN_CLIENT);
+ if (ret) {
+ rtnl_unlock();
+ return ret;
+ }
+
+ ret = hdev->nic_client->ops->reset_notify(h,
+ HNAE3_UNINIT_CLIENT);
+ rtnl_unlock();
+ return ret;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int hclgevf_devlink_reload_up(struct devlink *devlink,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
+ u32 *actions_performed,
+ struct netlink_ext_ack *extack)
+{
+ struct hclgevf_devlink_priv *priv = devlink_priv(devlink);
+ struct hclgevf_dev *hdev = priv->hdev;
+ struct hnae3_handle *h = &hdev->nic;
+ int ret;
+
+ *actions_performed = BIT(action);
+ switch (action) {
+ case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+ rtnl_lock();
+ ret = hdev->nic_client->ops->reset_notify(h, HNAE3_INIT_CLIENT);
+ if (ret) {
+ rtnl_unlock();
+ return ret;
+ }
+
+ ret = hdev->nic_client->ops->reset_notify(h, HNAE3_UP_CLIENT);
+ rtnl_unlock();
+ return ret;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static const struct devlink_ops hclgevf_devlink_ops = {
+ .info_get = hclgevf_devlink_info_get,
+ .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT),
+ .reload_down = hclgevf_devlink_reload_down,
+ .reload_up = hclgevf_devlink_reload_up,
+};
+
+int hclgevf_devlink_init(struct hclgevf_dev *hdev)
+{
+ struct pci_dev *pdev = hdev->pdev;
+ struct hclgevf_devlink_priv *priv;
+ struct devlink *devlink;
+ int ret;
+
+ devlink =
+ devlink_alloc(&hclgevf_devlink_ops,
+ sizeof(struct hclgevf_devlink_priv), &pdev->dev);
+ if (!devlink)
+ return -ENOMEM;
+
+ priv = devlink_priv(devlink);
+ priv->hdev = hdev;
+ hdev->devlink = devlink;
+
+ ret = devlink_register(devlink);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to register devlink, ret = %d\n",
+ ret);
+ goto out_reg_fail;
+ }
+
+ devlink_reload_enable(devlink);
+
+ return 0;
+
+out_reg_fail:
+ devlink_free(devlink);
+ return ret;
+}
+
+void hclgevf_devlink_uninit(struct hclgevf_dev *hdev)
+{
+ struct devlink *devlink = hdev->devlink;
+
+ devlink_reload_disable(devlink);
+
+ devlink_unregister(devlink);
+
+ devlink_free(devlink);
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.h
new file mode 100644
index 000000000000..e09ea3d8a963
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (c) 2021 Hisilicon Limited. */
+
+#ifndef __HCLGEVF_DEVLINK_H
+#define __HCLGEVF_DEVLINK_H
+
+#include "hclgevf_main.h"
+
+struct hclgevf_devlink_priv {
+ struct hclgevf_dev *hdev;
+};
+
+int hclgevf_devlink_init(struct hclgevf_dev *hdev);
+void hclgevf_devlink_uninit(struct hclgevf_dev *hdev);
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 8784d61e833f..82e727020120 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -8,6 +8,7 @@
#include "hclgevf_main.h"
#include "hclge_mbx.h"
#include "hnae3.h"
+#include "hclgevf_devlink.h"
#define HCLGEVF_NAME "hclgevf"
@@ -39,16 +40,16 @@ static const u8 hclgevf_hash_key[] = {
MODULE_DEVICE_TABLE(pci, ae_algovf_pci_tbl);
-static const u32 cmdq_reg_addr_list[] = {HCLGEVF_CMDQ_TX_ADDR_L_REG,
- HCLGEVF_CMDQ_TX_ADDR_H_REG,
- HCLGEVF_CMDQ_TX_DEPTH_REG,
- HCLGEVF_CMDQ_TX_TAIL_REG,
- HCLGEVF_CMDQ_TX_HEAD_REG,
- HCLGEVF_CMDQ_RX_ADDR_L_REG,
- HCLGEVF_CMDQ_RX_ADDR_H_REG,
- HCLGEVF_CMDQ_RX_DEPTH_REG,
- HCLGEVF_CMDQ_RX_TAIL_REG,
- HCLGEVF_CMDQ_RX_HEAD_REG,
+static const u32 cmdq_reg_addr_list[] = {HCLGEVF_NIC_CSQ_BASEADDR_L_REG,
+ HCLGEVF_NIC_CSQ_BASEADDR_H_REG,
+ HCLGEVF_NIC_CSQ_DEPTH_REG,
+ HCLGEVF_NIC_CSQ_TAIL_REG,
+ HCLGEVF_NIC_CSQ_HEAD_REG,
+ HCLGEVF_NIC_CRQ_BASEADDR_L_REG,
+ HCLGEVF_NIC_CRQ_BASEADDR_H_REG,
+ HCLGEVF_NIC_CRQ_DEPTH_REG,
+ HCLGEVF_NIC_CRQ_TAIL_REG,
+ HCLGEVF_NIC_CRQ_HEAD_REG,
HCLGEVF_VECTOR0_CMDQ_SRC_REG,
HCLGEVF_VECTOR0_CMDQ_STATE_REG,
HCLGEVF_CMDQ_INTR_EN_REG,
@@ -506,10 +507,10 @@ void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state)
link_state =
test_bit(HCLGEVF_STATE_DOWN, &hdev->state) ? 0 : link_state;
if (link_state != hdev->hw.mac.link) {
+ hdev->hw.mac.link = link_state;
client->ops->link_status_change(handle, !!link_state);
if (rclient && rclient->ops->link_status_change)
rclient->ops->link_status_change(rhandle, !!link_state);
- hdev->hw.mac.link = link_state;
}
clear_bit(HCLGEVF_STATE_LINK_UPDATING, &hdev->state);
@@ -538,6 +539,7 @@ static int hclgevf_set_handle_info(struct hclgevf_dev *hdev)
nic->pdev = hdev->pdev;
nic->numa_node_mask = hdev->numa_node_mask;
nic->flags |= HNAE3_SUPPORT_VF;
+ nic->kinfo.io_base = hdev->hw.io_base;
ret = hclgevf_knic_setup(hdev);
if (ret)
@@ -1961,7 +1963,7 @@ static void hclgevf_dump_rst_info(struct hclgevf_dev *hdev)
dev_info(&hdev->pdev->dev, "vector0 interrupt status: 0x%x\n",
hclgevf_read_dev(&hdev->hw, HCLGEVF_VECTOR0_CMDQ_STATE_REG));
dev_info(&hdev->pdev->dev, "handshake status: 0x%x\n",
- hclgevf_read_dev(&hdev->hw, HCLGEVF_CMDQ_TX_DEPTH_REG));
+ hclgevf_read_dev(&hdev->hw, HCLGEVF_NIC_CSQ_DEPTH_REG));
dev_info(&hdev->pdev->dev, "function reset status: 0x%x\n",
hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING));
dev_info(&hdev->pdev->dev, "hdev state: 0x%lx\n", hdev->state);
@@ -2487,6 +2489,8 @@ static int hclgevf_configure(struct hclgevf_dev *hdev)
{
int ret;
+ hdev->gro_en = true;
+
ret = hclgevf_get_basic_info(hdev);
if (ret)
return ret;
@@ -2549,7 +2553,7 @@ static int hclgevf_init_roce_base_info(struct hclgevf_dev *hdev)
return 0;
}
-static int hclgevf_config_gro(struct hclgevf_dev *hdev, bool en)
+static int hclgevf_config_gro(struct hclgevf_dev *hdev)
{
struct hclgevf_cfg_gro_status_cmd *req;
struct hclgevf_desc desc;
@@ -2562,7 +2566,7 @@ static int hclgevf_config_gro(struct hclgevf_dev *hdev, bool en)
false);
req = (struct hclgevf_cfg_gro_status_cmd *)desc.data;
- req->gro_en = en ? 1 : 0;
+ req->gro_en = hdev->gro_en ? 1 : 0;
ret = hclgevf_cmd_send(&hdev->hw, &desc, 1);
if (ret)
@@ -3308,7 +3312,7 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev)
return ret;
}
- ret = hclgevf_config_gro(hdev, true);
+ ret = hclgevf_config_gro(hdev);
if (ret)
return ret;
@@ -3337,6 +3341,10 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
if (ret)
return ret;
+ ret = hclgevf_devlink_init(hdev);
+ if (ret)
+ goto err_devlink_init;
+
ret = hclgevf_cmd_queue_init(hdev);
if (ret)
goto err_cmd_queue_init;
@@ -3389,7 +3397,7 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
if (ret)
goto err_config;
- ret = hclgevf_config_gro(hdev, true);
+ ret = hclgevf_config_gro(hdev);
if (ret)
goto err_config;
@@ -3441,6 +3449,8 @@ err_misc_irq_init:
err_cmd_init:
hclgevf_cmd_uninit(hdev);
err_cmd_queue_init:
+ hclgevf_devlink_uninit(hdev);
+err_devlink_init:
hclgevf_pci_uninit(hdev);
clear_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state);
return ret;
@@ -3462,6 +3472,7 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
}
hclgevf_cmd_uninit(hdev);
+ hclgevf_devlink_uninit(hdev);
hclgevf_pci_uninit(hdev);
hclgevf_uninit_mac_list(hdev);
}
@@ -3638,8 +3649,15 @@ void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed,
static int hclgevf_gro_en(struct hnae3_handle *handle, bool enable)
{
struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ bool gro_en_old = hdev->gro_en;
+ int ret;
+
+ hdev->gro_en = enable;
+ ret = hclgevf_config_gro(hdev);
+ if (ret)
+ hdev->gro_en = gro_en_old;
- return hclgevf_config_gro(hdev, enable);
+ return ret;
}
static void hclgevf_get_media_type(struct hnae3_handle *handle, u8 *media_type,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index d7d02848d674..883130a9b48f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -6,6 +6,7 @@
#include <linux/fs.h>
#include <linux/if_vlan.h>
#include <linux/types.h>
+#include <net/devlink.h>
#include "hclge_mbx.h"
#include "hclgevf_cmd.h"
#include "hnae3.h"
@@ -32,16 +33,17 @@
#define HCLGEVF_VECTOR_VF_OFFSET 0x100000
/* bar registers for cmdq */
-#define HCLGEVF_CMDQ_TX_ADDR_L_REG 0x27000
-#define HCLGEVF_CMDQ_TX_ADDR_H_REG 0x27004
-#define HCLGEVF_CMDQ_TX_DEPTH_REG 0x27008
-#define HCLGEVF_CMDQ_TX_TAIL_REG 0x27010
-#define HCLGEVF_CMDQ_TX_HEAD_REG 0x27014
-#define HCLGEVF_CMDQ_RX_ADDR_L_REG 0x27018
-#define HCLGEVF_CMDQ_RX_ADDR_H_REG 0x2701C
-#define HCLGEVF_CMDQ_RX_DEPTH_REG 0x27020
-#define HCLGEVF_CMDQ_RX_TAIL_REG 0x27024
-#define HCLGEVF_CMDQ_RX_HEAD_REG 0x27028
+#define HCLGEVF_NIC_CSQ_BASEADDR_L_REG 0x27000
+#define HCLGEVF_NIC_CSQ_BASEADDR_H_REG 0x27004
+#define HCLGEVF_NIC_CSQ_DEPTH_REG 0x27008
+#define HCLGEVF_NIC_CSQ_TAIL_REG 0x27010
+#define HCLGEVF_NIC_CSQ_HEAD_REG 0x27014
+#define HCLGEVF_NIC_CRQ_BASEADDR_L_REG 0x27018
+#define HCLGEVF_NIC_CRQ_BASEADDR_H_REG 0x2701C
+#define HCLGEVF_NIC_CRQ_DEPTH_REG 0x27020
+#define HCLGEVF_NIC_CRQ_TAIL_REG 0x27024
+#define HCLGEVF_NIC_CRQ_HEAD_REG 0x27028
+
#define HCLGEVF_CMDQ_INTR_EN_REG 0x27108
#define HCLGEVF_CMDQ_INTR_GEN_REG 0x2710C
@@ -310,11 +312,12 @@ struct hclgevf_dev {
u16 *vector_status;
int *vector_irq;
+ bool gro_en;
+
unsigned long vlan_del_fail_bmap[BITS_TO_LONGS(VLAN_N_VID)];
struct hclgevf_mac_table_cfg mac_table;
- bool mbx_event_pending;
struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */
struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */
@@ -330,6 +333,8 @@ struct hclgevf_dev {
u32 flag;
unsigned long serv_processed_cnt;
unsigned long last_serv_processed;
+
+ struct devlink *devlink;
};
static inline bool hclgevf_is_reset_pending(struct hclgevf_dev *hdev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index 772b2f8acd2e..fdc66fae0960 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -155,18 +155,66 @@ static bool hclgevf_cmd_crq_empty(struct hclgevf_hw *hw)
return tail == hw->cmq.crq.next_to_use;
}
+static void hclgevf_handle_mbx_response(struct hclgevf_dev *hdev,
+ struct hclge_mbx_pf_to_vf_cmd *req)
+{
+ struct hclgevf_mbx_resp_status *resp = &hdev->mbx_resp;
+
+ if (resp->received_resp)
+ dev_warn(&hdev->pdev->dev,
+ "VF mbx resp flag not clear(%u)\n",
+ req->msg.vf_mbx_msg_code);
+
+ resp->origin_mbx_msg =
+ (req->msg.vf_mbx_msg_code << 16);
+ resp->origin_mbx_msg |= req->msg.vf_mbx_msg_subcode;
+ resp->resp_status =
+ hclgevf_resp_to_errno(req->msg.resp_status);
+ memcpy(resp->additional_info, req->msg.resp_data,
+ HCLGE_MBX_MAX_RESP_DATA_SIZE * sizeof(u8));
+ if (req->match_id) {
+ /* If match_id is not zero, it means PF support match_id.
+ * if the match_id is right, VF get the right response, or
+ * ignore the response. and driver will clear hdev->mbx_resp
+ * when send next message which need response.
+ */
+ if (req->match_id == resp->match_id)
+ resp->received_resp = true;
+ } else {
+ resp->received_resp = true;
+ }
+}
+
+static void hclgevf_handle_mbx_msg(struct hclgevf_dev *hdev,
+ struct hclge_mbx_pf_to_vf_cmd *req)
+{
+ /* we will drop the async msg if we find ARQ as full
+ * and continue with next message
+ */
+ if (atomic_read(&hdev->arq.count) >=
+ HCLGE_MBX_MAX_ARQ_MSG_NUM) {
+ dev_warn(&hdev->pdev->dev,
+ "Async Q full, dropping msg(%u)\n",
+ req->msg.code);
+ return;
+ }
+
+ /* tail the async message in arq */
+ memcpy(hdev->arq.msg_q[hdev->arq.tail], &req->msg,
+ HCLGE_MBX_MAX_ARQ_MSG_SIZE * sizeof(u16));
+ hclge_mbx_tail_ptr_move_arq(hdev->arq);
+ atomic_inc(&hdev->arq.count);
+
+ hclgevf_mbx_task_schedule(hdev);
+}
+
void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
{
- struct hclgevf_mbx_resp_status *resp;
struct hclge_mbx_pf_to_vf_cmd *req;
struct hclgevf_cmq_ring *crq;
struct hclgevf_desc *desc;
- u16 *msg_q;
u16 flag;
- u8 *temp;
- int i;
- resp = &hdev->mbx_resp;
crq = &hdev->hw.cmq.crq;
while (!hclgevf_cmd_crq_empty(&hdev->hw)) {
@@ -200,69 +248,14 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
*/
switch (req->msg.code) {
case HCLGE_MBX_PF_VF_RESP:
- if (resp->received_resp)
- dev_warn(&hdev->pdev->dev,
- "VF mbx resp flag not clear(%u)\n",
- req->msg.vf_mbx_msg_code);
- resp->received_resp = true;
-
- resp->origin_mbx_msg =
- (req->msg.vf_mbx_msg_code << 16);
- resp->origin_mbx_msg |= req->msg.vf_mbx_msg_subcode;
- resp->resp_status =
- hclgevf_resp_to_errno(req->msg.resp_status);
-
- temp = (u8 *)req->msg.resp_data;
- for (i = 0; i < HCLGE_MBX_MAX_RESP_DATA_SIZE; i++) {
- resp->additional_info[i] = *temp;
- temp++;
- }
-
- /* If match_id is not zero, it means PF support
- * match_id. If the match_id is right, VF get the
- * right response, otherwise ignore the response.
- * Driver will clear hdev->mbx_resp when send
- * next message which need response.
- */
- if (req->match_id) {
- if (req->match_id == resp->match_id)
- resp->received_resp = true;
- } else {
- resp->received_resp = true;
- }
+ hclgevf_handle_mbx_response(hdev, req);
break;
case HCLGE_MBX_LINK_STAT_CHANGE:
case HCLGE_MBX_ASSERTING_RESET:
case HCLGE_MBX_LINK_STAT_MODE:
case HCLGE_MBX_PUSH_VLAN_INFO:
case HCLGE_MBX_PUSH_PROMISC_INFO:
- /* set this mbx event as pending. This is required as we
- * might loose interrupt event when mbx task is busy
- * handling. This shall be cleared when mbx task just
- * enters handling state.
- */
- hdev->mbx_event_pending = true;
-
- /* we will drop the async msg if we find ARQ as full
- * and continue with next message
- */
- if (atomic_read(&hdev->arq.count) >=
- HCLGE_MBX_MAX_ARQ_MSG_NUM) {
- dev_warn(&hdev->pdev->dev,
- "Async Q full, dropping msg(%u)\n",
- req->msg.code);
- break;
- }
-
- /* tail the async message in arq */
- msg_q = hdev->arq.msg_q[hdev->arq.tail];
- memcpy(&msg_q[0], &req->msg,
- HCLGE_MBX_MAX_ARQ_MSG_SIZE * sizeof(u16));
- hclge_mbx_tail_ptr_move_arq(hdev->arq);
- atomic_inc(&hdev->arq.count);
-
- hclgevf_mbx_task_schedule(hdev);
-
+ hclgevf_handle_mbx_msg(hdev, req);
break;
default:
dev_err(&hdev->pdev->dev,
@@ -298,11 +291,6 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
u8 flag;
u8 idx;
- /* we can safely clear it now as we are at start of the async message
- * processing
- */
- hdev->mbx_event_pending = false;
-
tail = hdev->arq.tail;
/* process all the async queue messages */
@@ -323,8 +311,8 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
flag = (u8)msg_q[5];
/* update upper layer with new link link status */
- hclgevf_update_link_status(hdev, link_status);
hclgevf_update_speed_duplex(hdev, speed, duplex);
+ hclgevf_update_link_status(hdev, link_status);
if (flag & HCLGE_MBX_PUSH_LINK_STATUS_EN)
set_bit(HCLGEVF_STATE_PF_PUSH_LINK_STATUS,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c
index 58d5646444b0..6e11ee339f12 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c
@@ -293,9 +293,9 @@ static const struct devlink_ops hinic_devlink_ops = {
.flash_update = hinic_devlink_flash_update,
};
-struct devlink *hinic_devlink_alloc(void)
+struct devlink *hinic_devlink_alloc(struct device *dev)
{
- return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev));
+ return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev), dev);
}
void hinic_devlink_free(struct devlink *devlink)
@@ -303,11 +303,11 @@ void hinic_devlink_free(struct devlink *devlink)
devlink_free(devlink);
}
-int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev)
+int hinic_devlink_register(struct hinic_devlink_priv *priv)
{
struct devlink *devlink = priv_to_devlink(priv);
- return devlink_register(devlink, dev);
+ return devlink_register(devlink);
}
void hinic_devlink_unregister(struct hinic_devlink_priv *priv)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.h b/drivers/net/ethernet/huawei/hinic/hinic_devlink.h
index a090ebcfaabb..9e315011015c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.h
@@ -108,9 +108,9 @@ struct host_image_st {
u32 device_id;
};
-struct devlink *hinic_devlink_alloc(void);
+struct devlink *hinic_devlink_alloc(struct device *dev);
void hinic_devlink_free(struct devlink *devlink);
-int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev);
+int hinic_devlink_register(struct hinic_devlink_priv *priv);
void hinic_devlink_unregister(struct hinic_devlink_priv *priv);
int hinic_health_reporters_create(struct hinic_devlink_priv *priv);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
index 162d3c330dec..b431c300ef1b 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
@@ -795,13 +795,17 @@ static int __hinic_set_coalesce(struct net_device *netdev,
}
static int hinic_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
return __hinic_get_coalesce(netdev, coal, COALESCE_ALL_QUEUE);
}
static int hinic_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
return __hinic_set_coalesce(netdev, coal, COALESCE_ALL_QUEUE);
}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 428108eb10d2..56b6b04e209b 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -754,7 +754,7 @@ static int init_pfhwdev(struct hinic_pfhwdev *pfhwdev)
return err;
}
- err = hinic_devlink_register(hwdev->devlink_dev, &pdev->dev);
+ err = hinic_devlink_register(hwdev->devlink_dev);
if (err) {
dev_err(&hwif->pdev->dev, "Failed to register devlink\n");
hinic_pf_to_mgmt_free(&pfhwdev->pf_to_mgmt);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 405ee4d2d2b1..ae707e305684 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -1183,7 +1183,7 @@ static int nic_dev_init(struct pci_dev *pdev)
struct devlink *devlink;
int err, num_qps;
- devlink = hinic_devlink_alloc();
+ devlink = hinic_devlink_alloc(&pdev->dev);
if (!devlink) {
dev_err(&pdev->dev, "Hinic devlink alloc failed\n");
return -ENOMEM;
@@ -1392,28 +1392,16 @@ static int hinic_probe(struct pci_dev *pdev,
pci_set_master(pdev);
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (err) {
dev_warn(&pdev->dev, "Couldn't set 64-bit DMA mask\n");
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Failed to set DMA mask\n");
goto err_dma_mask;
}
}
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
- if (err) {
- dev_warn(&pdev->dev,
- "Couldn't set 64-bit consistent DMA mask\n");
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- if (err) {
- dev_err(&pdev->dev,
- "Failed to set consistent DMA mask\n");
- goto err_dma_consistent_mask;
- }
- }
-
err = nic_dev_init(pdev);
if (err) {
dev_err(&pdev->dev, "Failed to initialize NIC device\n");
@@ -1424,7 +1412,6 @@ static int hinic_probe(struct pci_dev *pdev,
return 0;
err_nic_dev_init:
-err_dma_consistent_mask:
err_dma_mask:
pci_release_regions(pdev);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
index f8a26459ff65..a78c398bf5b2 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
@@ -836,8 +836,10 @@ int hinic_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting)
int hinic_ndo_set_vf_bw(struct net_device *netdev,
int vf, int min_tx_rate, int max_tx_rate)
{
- u32 speeds[] = {SPEED_10, SPEED_100, SPEED_1000, SPEED_10000,
- SPEED_25000, SPEED_40000, SPEED_100000};
+ static const u32 speeds[] = {
+ SPEED_10, SPEED_100, SPEED_1000, SPEED_10000,
+ SPEED_25000, SPEED_40000, SPEED_100000
+ };
struct hinic_dev *nic_dev = netdev_priv(netdev);
struct hinic_port_cap port_cap = { 0 };
enum hinic_port_link_state link_state;
diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c
index fc8c7cd67471..b8a40146b895 100644
--- a/drivers/net/ethernet/i825xx/82596.c
+++ b/drivers/net/ethernet/i825xx/82596.c
@@ -1110,9 +1110,6 @@ static void print_eth(unsigned char *add, char *str)
add, add + 6, add, add[12], add[13], str);
}
-static int io = 0x300;
-static int irq = 10;
-
static const struct net_device_ops i596_netdev_ops = {
.ndo_open = i596_open,
.ndo_stop = i596_close,
@@ -1123,7 +1120,7 @@ static const struct net_device_ops i596_netdev_ops = {
.ndo_validate_addr = eth_validate_addr,
};
-struct net_device * __init i82596_probe(int unit)
+static struct net_device * __init i82596_probe(void)
{
struct net_device *dev;
int i;
@@ -1140,14 +1137,6 @@ struct net_device * __init i82596_probe(int unit)
if (!dev)
return ERR_PTR(-ENOMEM);
- if (unit >= 0) {
- sprintf(dev->name, "eth%d", unit);
- netdev_boot_setup_check(dev);
- } else {
- dev->base_addr = io;
- dev->irq = irq;
- }
-
#ifdef ENABLE_MVME16x_NET
if (MACH_IS_MVME16x) {
if (mvme16x_config & MVME16x_CONFIG_NO_ETHERNET) {
@@ -1515,22 +1504,22 @@ static void set_multicast_list(struct net_device *dev)
}
}
-#ifdef MODULE
static struct net_device *dev_82596;
static int debug = -1;
module_param(debug, int, 0);
MODULE_PARM_DESC(debug, "i82596 debug mask");
-int __init init_module(void)
+static int __init i82596_init(void)
{
if (debug >= 0)
i596_debug = debug;
- dev_82596 = i82596_probe(-1);
+ dev_82596 = i82596_probe();
return PTR_ERR_OR_ZERO(dev_82596);
}
+module_init(i82596_init);
-void __exit cleanup_module(void)
+static void __exit i82596_cleanup(void)
{
unregister_netdev(dev_82596);
#ifdef __mc68000__
@@ -1544,5 +1533,4 @@ void __exit cleanup_module(void)
free_page ((u32)(dev_82596->mem_start));
free_netdev(dev_82596);
}
-
-#endif /* MODULE */
+module_exit(i82596_cleanup);
diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c
index 4564ee02c95f..893e0ddcb611 100644
--- a/drivers/net/ethernet/i825xx/sun3_82586.c
+++ b/drivers/net/ethernet/i825xx/sun3_82586.c
@@ -29,6 +29,7 @@ static int rfdadd = 0; /* rfdadd=1 may be better for 8K MEM cards */
static int fifo=0x8; /* don't change */
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/ioport.h>
@@ -276,7 +277,7 @@ static void alloc586(struct net_device *dev)
memset((char *)p->scb,0,sizeof(struct scb_struct));
}
-struct net_device * __init sun3_82586_probe(int unit)
+static int __init sun3_82586_probe(void)
{
struct net_device *dev;
unsigned long ioaddr;
@@ -291,25 +292,20 @@ struct net_device * __init sun3_82586_probe(int unit)
break;
default:
- return ERR_PTR(-ENODEV);
+ return -ENODEV;
}
if (found)
- return ERR_PTR(-ENODEV);
+ return -ENODEV;
ioaddr = (unsigned long)ioremap(IE_OBIO, SUN3_82586_TOTAL_SIZE);
if (!ioaddr)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
found = 1;
dev = alloc_etherdev(sizeof(struct priv));
if (!dev)
goto out;
- if (unit >= 0) {
- sprintf(dev->name, "eth%d", unit);
- netdev_boot_setup_check(dev);
- }
-
dev->irq = IE_IRQ;
dev->base_addr = ioaddr;
err = sun3_82586_probe1(dev, ioaddr);
@@ -326,8 +322,9 @@ out1:
free_netdev(dev);
out:
iounmap((void __iomem *)ioaddr);
- return ERR_PTR(err);
+ return err;
}
+module_init(sun3_82586_probe);
static const struct net_device_ops sun3_82586_netdev_ops = {
.ndo_open = sun3_82586_open,
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 471be6ec7e8a..664a91af662d 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -3011,7 +3011,7 @@ static const struct net_device_ops emac_netdev_ops = {
.ndo_stop = emac_close,
.ndo_get_stats = emac_stats,
.ndo_set_rx_mode = emac_set_multicast_list,
- .ndo_do_ioctl = emac_ioctl,
+ .ndo_eth_ioctl = emac_ioctl,
.ndo_tx_timeout = emac_tx_timeout,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = emac_set_mac_address,
@@ -3023,7 +3023,7 @@ static const struct net_device_ops emac_gige_netdev_ops = {
.ndo_stop = emac_close,
.ndo_get_stats = emac_stats,
.ndo_set_rx_mode = emac_set_multicast_list,
- .ndo_do_ioctl = emac_ioctl,
+ .ndo_eth_ioctl = emac_ioctl,
.ndo_tx_timeout = emac_tx_timeout,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = emac_set_mac_address,
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 737ba85e409f..3d9b4f99d357 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1630,7 +1630,7 @@ static const struct net_device_ops ibmveth_netdev_ops = {
.ndo_stop = ibmveth_close,
.ndo_start_xmit = ibmveth_start_xmit,
.ndo_set_rx_mode = ibmveth_set_multicast_list,
- .ndo_do_ioctl = ibmveth_ioctl,
+ .ndo_eth_ioctl = ibmveth_ioctl,
.ndo_change_mtu = ibmveth_change_mtu,
.ndo_fix_features = ibmveth_fix_features,
.ndo_set_features = ibmveth_set_features,
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index 82744a7501c7..b0b6f90deb7d 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -58,8 +58,8 @@ config E1000
config E1000E
tristate "Intel(R) PRO/1000 PCI-Express Gigabit Ethernet support"
depends on PCI && (!SPARC32 || BROKEN)
+ depends on PTP_1588_CLOCK_OPTIONAL
select CRC32
- imply PTP_1588_CLOCK
help
This driver supports the PCI-Express Intel(R) PRO/1000 gigabit
ethernet family of adapters. For PCI or PCI-X e1000 adapters,
@@ -87,7 +87,7 @@ config E1000E_HWTS
config IGB
tristate "Intel(R) 82575/82576 PCI-Express Gigabit Ethernet support"
depends on PCI
- imply PTP_1588_CLOCK
+ depends on PTP_1588_CLOCK_OPTIONAL
select I2C
select I2C_ALGOBIT
help
@@ -159,9 +159,9 @@ config IXGB
config IXGBE
tristate "Intel(R) 10GbE PCI Express adapters support"
depends on PCI
+ depends on PTP_1588_CLOCK_OPTIONAL
select MDIO
select PHYLIB
- imply PTP_1588_CLOCK
help
This driver supports Intel(R) 10GbE PCI Express family of
adapters. For more information on how to identify your adapter, go
@@ -239,7 +239,7 @@ config IXGBEVF_IPSEC
config I40E
tristate "Intel(R) Ethernet Controller XL710 Family support"
- imply PTP_1588_CLOCK
+ depends on PTP_1588_CLOCK_OPTIONAL
depends on PCI
select AUXILIARY_BUS
help
@@ -295,11 +295,11 @@ config ICE
tristate "Intel(R) Ethernet Connection E800 Series Support"
default n
depends on PCI_MSI
+ depends on PTP_1588_CLOCK_OPTIONAL
select AUXILIARY_BUS
select DIMLIB
select NET_DEVLINK
select PLDMFW
- imply PTP_1588_CLOCK
help
This driver supports Intel(R) Ethernet Connection E800 Series of
devices. For more information on how to identify your adapter, go
@@ -317,7 +317,7 @@ config FM10K
tristate "Intel(R) FM10000 Ethernet Switch Host Interface Support"
default n
depends on PCI_MSI
- imply PTP_1588_CLOCK
+ depends on PTP_1588_CLOCK_OPTIONAL
help
This driver supports Intel(R) FM10000 Ethernet Switch Host
Interface. For more information on how to identify your adapter,
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index 1b0958bd24f6..373eb027b925 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -2715,10 +2715,10 @@ static void e100_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
{
switch (stringset) {
case ETH_SS_TEST:
- memcpy(data, *e100_gstrings_test, sizeof(e100_gstrings_test));
+ memcpy(data, e100_gstrings_test, sizeof(e100_gstrings_test));
break;
case ETH_SS_STATS:
- memcpy(data, *e100_gstrings_stats, sizeof(e100_gstrings_stats));
+ memcpy(data, e100_gstrings_stats, sizeof(e100_gstrings_stats));
break;
}
}
@@ -2809,7 +2809,7 @@ static const struct net_device_ops e100_netdev_ops = {
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = e100_set_multicast_list,
.ndo_set_mac_address = e100_set_mac_address,
- .ndo_do_ioctl = e100_do_ioctl,
+ .ndo_eth_ioctl = e100_do_ioctl,
.ndo_tx_timeout = e100_tx_timeout,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = e100_netpoll,
diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
index 3c51ee94fa00..0a57172dfcbc 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
@@ -1739,7 +1739,9 @@ static int e1000_set_phys_id(struct net_device *netdev,
}
static int e1000_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
@@ -1755,7 +1757,9 @@ static int e1000_get_coalesce(struct net_device *netdev,
}
static int e1000_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index c2a109126c27..bed4f040face 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -832,7 +832,7 @@ static const struct net_device_ops e1000_netdev_ops = {
.ndo_set_mac_address = e1000_set_mac,
.ndo_tx_timeout = e1000_tx_timeout,
.ndo_change_mtu = e1000_change_mtu,
- .ndo_do_ioctl = e1000_ioctl,
+ .ndo_eth_ioctl = e1000_ioctl,
.ndo_validate_addr = eth_validate_addr,
.ndo_vlan_rx_add_vid = e1000_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = e1000_vlan_rx_kill_vid,
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 06442e6bef73..8515e00d1b40 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -903,6 +903,7 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data)
case e1000_pch_tgp:
case e1000_pch_adp:
case e1000_pch_mtp:
+ case e1000_pch_lnp:
mask |= BIT(18);
break;
default:
@@ -1569,6 +1570,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter)
case e1000_pch_tgp:
case e1000_pch_adp:
case e1000_pch_mtp:
+ case e1000_pch_lnp:
fext_nvm11 = er32(FEXTNVM11);
fext_nvm11 &= ~E1000_FEXTNVM11_DISABLE_MULR_FIX;
ew32(FEXTNVM11, fext_nvm11);
@@ -1991,7 +1993,9 @@ static int e1000_set_phys_id(struct net_device *netdev,
}
static int e1000_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
@@ -2004,7 +2008,9 @@ static int e1000_get_coalesce(struct net_device *netdev,
}
static int e1000_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index db79c4e6413e..bcf680e83811 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -98,14 +98,22 @@ struct e1000_hw;
#define E1000_DEV_ID_PCH_TGP_I219_V14 0x15FA
#define E1000_DEV_ID_PCH_TGP_I219_LM15 0x15F4
#define E1000_DEV_ID_PCH_TGP_I219_V15 0x15F5
+#define E1000_DEV_ID_PCH_RPL_I219_LM23 0x0DC5
+#define E1000_DEV_ID_PCH_RPL_I219_V23 0x0DC6
#define E1000_DEV_ID_PCH_ADP_I219_LM16 0x1A1E
#define E1000_DEV_ID_PCH_ADP_I219_V16 0x1A1F
#define E1000_DEV_ID_PCH_ADP_I219_LM17 0x1A1C
#define E1000_DEV_ID_PCH_ADP_I219_V17 0x1A1D
+#define E1000_DEV_ID_PCH_RPL_I219_LM22 0x0DC7
+#define E1000_DEV_ID_PCH_RPL_I219_V22 0x0DC8
#define E1000_DEV_ID_PCH_MTP_I219_LM18 0x550A
#define E1000_DEV_ID_PCH_MTP_I219_V18 0x550B
#define E1000_DEV_ID_PCH_MTP_I219_LM19 0x550C
#define E1000_DEV_ID_PCH_MTP_I219_V19 0x550D
+#define E1000_DEV_ID_PCH_LNP_I219_LM20 0x550E
+#define E1000_DEV_ID_PCH_LNP_I219_V20 0x550F
+#define E1000_DEV_ID_PCH_LNP_I219_LM21 0x5510
+#define E1000_DEV_ID_PCH_LNP_I219_V21 0x5511
#define E1000_REVISION_4 4
@@ -132,6 +140,7 @@ enum e1000_mac_type {
e1000_pch_tgp,
e1000_pch_adp,
e1000_pch_mtp,
+ e1000_pch_lnp,
};
enum e1000_media_type {
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index cf7b3887da1d..60c582a16821 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -321,6 +321,7 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
case e1000_pch_tgp:
case e1000_pch_adp:
case e1000_pch_mtp:
+ case e1000_pch_lnp:
if (e1000_phy_is_accessible_pchlan(hw))
break;
@@ -466,6 +467,7 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
case e1000_pch_tgp:
case e1000_pch_adp:
case e1000_pch_mtp:
+ case e1000_pch_lnp:
/* In case the PHY needs to be in mdio slow mode,
* set slow mode and try to get the PHY id again.
*/
@@ -711,6 +713,7 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw)
case e1000_pch_tgp:
case e1000_pch_adp:
case e1000_pch_mtp:
+ case e1000_pch_lnp:
case e1000_pchlan:
/* check management mode */
mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan;
@@ -1006,6 +1009,8 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link)
{
u32 reg = link << (E1000_LTRV_REQ_SHIFT + E1000_LTRV_NOSNOOP_SHIFT) |
link << E1000_LTRV_REQ_SHIFT | E1000_LTRV_SEND;
+ u16 max_ltr_enc_d = 0; /* maximum LTR decoded by platform */
+ u16 lat_enc_d = 0; /* latency decoded */
u16 lat_enc = 0; /* latency encoded */
if (link) {
@@ -1059,7 +1064,17 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link)
E1000_PCI_LTR_CAP_LPT + 2, &max_nosnoop);
max_ltr_enc = max_t(u16, max_snoop, max_nosnoop);
- if (lat_enc > max_ltr_enc)
+ lat_enc_d = (lat_enc & E1000_LTRV_VALUE_MASK) *
+ (1U << (E1000_LTRV_SCALE_FACTOR *
+ ((lat_enc & E1000_LTRV_SCALE_MASK)
+ >> E1000_LTRV_SCALE_SHIFT)));
+
+ max_ltr_enc_d = (max_ltr_enc & E1000_LTRV_VALUE_MASK) *
+ (1U << (E1000_LTRV_SCALE_FACTOR *
+ ((max_ltr_enc & E1000_LTRV_SCALE_MASK)
+ >> E1000_LTRV_SCALE_SHIFT)));
+
+ if (lat_enc_d > max_ltr_enc_d)
lat_enc = max_ltr_enc;
}
@@ -1266,9 +1281,11 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
usleep_range(10000, 11000);
}
if (firmware_bug)
- e_warn("ULP_CONFIG_DONE took %dmsec. This is a firmware bug\n", i * 10);
+ e_warn("ULP_CONFIG_DONE took %d msec. This is a firmware bug\n",
+ i * 10);
else
- e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10);
+ e_dbg("ULP_CONFIG_DONE cleared after %d msec\n",
+ i * 10);
if (force) {
mac_reg = er32(H2ME);
@@ -1663,6 +1680,7 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
case e1000_pch_tgp:
case e1000_pch_adp:
case e1000_pch_mtp:
+ case e1000_pch_lnp:
rc = e1000_init_phy_params_pchlan(hw);
break;
default:
@@ -2118,6 +2136,7 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw)
case e1000_pch_tgp:
case e1000_pch_adp:
case e1000_pch_mtp:
+ case e1000_pch_lnp:
sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M;
break;
default:
@@ -3162,6 +3181,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
case e1000_pch_tgp:
case e1000_pch_adp:
case e1000_pch_mtp:
+ case e1000_pch_lnp:
bank1_offset = nvm->flash_bank_size;
act_offset = E1000_ICH_NVM_SIG_WORD;
@@ -4101,6 +4121,7 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw)
case e1000_pch_tgp:
case e1000_pch_adp:
case e1000_pch_mtp:
+ case e1000_pch_lnp:
word = NVM_COMPAT;
valid_csum_mask = NVM_COMPAT_VALID_CSUM;
break;
@@ -4115,13 +4136,17 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw)
return ret_val;
if (!(data & valid_csum_mask)) {
- data |= valid_csum_mask;
- ret_val = e1000_write_nvm(hw, word, 1, &data);
- if (ret_val)
- return ret_val;
- ret_val = e1000e_update_nvm_checksum(hw);
- if (ret_val)
- return ret_val;
+ e_dbg("NVM Checksum Invalid\n");
+
+ if (hw->mac.type < e1000_pch_cnp) {
+ data |= valid_csum_mask;
+ ret_val = e1000_write_nvm(hw, word, 1, &data);
+ if (ret_val)
+ return ret_val;
+ ret_val = e1000e_update_nvm_checksum(hw);
+ if (ret_val)
+ return ret_val;
+ }
}
return e1000e_validate_nvm_checksum_generic(hw);
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h
index 1502895eb45d..d6a092e5ee74 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.h
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h
@@ -41,12 +41,15 @@
#define E1000_FWSM_WLOCK_MAC_MASK 0x0380
#define E1000_FWSM_WLOCK_MAC_SHIFT 7
#define E1000_FWSM_ULP_CFG_DONE 0x00000400 /* Low power cfg done */
+#define E1000_EXFWSM_DPG_EXIT_DONE 0x00000001
/* Shared Receive Address Registers */
#define E1000_SHRAL_PCH_LPT(_i) (0x05408 + ((_i) * 8))
#define E1000_SHRAH_PCH_LPT(_i) (0x0540C + ((_i) * 8))
#define E1000_H2ME 0x05B50 /* Host to ME */
+#define E1000_H2ME_START_DPG 0x00000001 /* indicate the ME of DPG */
+#define E1000_H2ME_EXIT_DPG 0x00000002 /* indicate the ME exit DPG */
#define E1000_H2ME_ULP 0x00000800 /* ULP Indication Bit */
#define E1000_H2ME_ENFORCE_SETTINGS 0x00001000 /* Enforce Settings */
@@ -274,8 +277,11 @@
/* Latency Tolerance Reporting */
#define E1000_LTRV 0x000F8
+#define E1000_LTRV_VALUE_MASK 0x000003FF
#define E1000_LTRV_SCALE_MAX 5
#define E1000_LTRV_SCALE_FACTOR 5
+#define E1000_LTRV_SCALE_SHIFT 10
+#define E1000_LTRV_SCALE_MASK 0x00001C00
#define E1000_LTRV_REQ_SHIFT 15
#define E1000_LTRV_NOSNOOP_SHIFT 16
#define E1000_LTRV_SEND (1 << 30)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 757a54c39eef..900b3ab998bd 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -3550,6 +3550,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca)
case e1000_pch_tgp:
case e1000_pch_adp:
case e1000_pch_mtp:
+ case e1000_pch_lnp:
if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) {
/* Stable 24MHz frequency */
incperiod = INCPERIOD_24MHZ;
@@ -4068,6 +4069,7 @@ void e1000e_reset(struct e1000_adapter *adapter)
case e1000_pch_tgp:
case e1000_pch_adp:
case e1000_pch_mtp:
+ case e1000_pch_lnp:
fc->refresh_time = 0xFFFF;
fc->pause_time = 0xFFFF;
@@ -6343,42 +6345,110 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter)
u32 mac_data;
u16 phy_data;
- /* Disable the periodic inband message,
- * don't request PCIe clock in K1 page770_17[10:9] = 10b
- */
- e1e_rphy(hw, HV_PM_CTRL, &phy_data);
- phy_data &= ~HV_PM_CTRL_K1_CLK_REQ;
- phy_data |= BIT(10);
- e1e_wphy(hw, HV_PM_CTRL, phy_data);
+ if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
+ /* Request ME configure the device for S0ix */
+ mac_data = er32(H2ME);
+ mac_data |= E1000_H2ME_START_DPG;
+ mac_data &= ~E1000_H2ME_EXIT_DPG;
+ ew32(H2ME, mac_data);
+ } else {
+ /* Request driver configure the device to S0ix */
+ /* Disable the periodic inband message,
+ * don't request PCIe clock in K1 page770_17[10:9] = 10b
+ */
+ e1e_rphy(hw, HV_PM_CTRL, &phy_data);
+ phy_data &= ~HV_PM_CTRL_K1_CLK_REQ;
+ phy_data |= BIT(10);
+ e1e_wphy(hw, HV_PM_CTRL, phy_data);
- /* Make sure we don't exit K1 every time a new packet arrives
- * 772_29[5] = 1 CS_Mode_Stay_In_K1
- */
- e1e_rphy(hw, I217_CGFREG, &phy_data);
- phy_data |= BIT(5);
- e1e_wphy(hw, I217_CGFREG, phy_data);
+ /* Make sure we don't exit K1 every time a new packet arrives
+ * 772_29[5] = 1 CS_Mode_Stay_In_K1
+ */
+ e1e_rphy(hw, I217_CGFREG, &phy_data);
+ phy_data |= BIT(5);
+ e1e_wphy(hw, I217_CGFREG, phy_data);
- /* Change the MAC/PHY interface to SMBus
- * Force the SMBus in PHY page769_23[0] = 1
- * Force the SMBus in MAC CTRL_EXT[11] = 1
- */
- e1e_rphy(hw, CV_SMB_CTRL, &phy_data);
- phy_data |= CV_SMB_CTRL_FORCE_SMBUS;
- e1e_wphy(hw, CV_SMB_CTRL, phy_data);
- mac_data = er32(CTRL_EXT);
- mac_data |= E1000_CTRL_EXT_FORCE_SMBUS;
- ew32(CTRL_EXT, mac_data);
+ /* Change the MAC/PHY interface to SMBus
+ * Force the SMBus in PHY page769_23[0] = 1
+ * Force the SMBus in MAC CTRL_EXT[11] = 1
+ */
+ e1e_rphy(hw, CV_SMB_CTRL, &phy_data);
+ phy_data |= CV_SMB_CTRL_FORCE_SMBUS;
+ e1e_wphy(hw, CV_SMB_CTRL, phy_data);
+ mac_data = er32(CTRL_EXT);
+ mac_data |= E1000_CTRL_EXT_FORCE_SMBUS;
+ ew32(CTRL_EXT, mac_data);
+
+ /* DFT control: PHY bit: page769_20[0] = 1
+ * page769_20[7] - PHY PLL stop
+ * page769_20[8] - PHY go to the electrical idle
+ * page769_20[9] - PHY serdes disable
+ * Gate PPW via EXTCNF_CTRL - set 0x0F00[7] = 1
+ */
+ e1e_rphy(hw, I82579_DFT_CTRL, &phy_data);
+ phy_data |= BIT(0);
+ phy_data |= BIT(7);
+ phy_data |= BIT(8);
+ phy_data |= BIT(9);
+ e1e_wphy(hw, I82579_DFT_CTRL, phy_data);
+
+ mac_data = er32(EXTCNF_CTRL);
+ mac_data |= E1000_EXTCNF_CTRL_GATE_PHY_CFG;
+ ew32(EXTCNF_CTRL, mac_data);
+
+ /* Enable the Dynamic Power Gating in the MAC */
+ mac_data = er32(FEXTNVM7);
+ mac_data |= BIT(22);
+ ew32(FEXTNVM7, mac_data);
+
+ /* Disable disconnected cable conditioning for Power Gating */
+ mac_data = er32(DPGFR);
+ mac_data |= BIT(2);
+ ew32(DPGFR, mac_data);
+
+ /* Don't wake from dynamic Power Gating with clock request */
+ mac_data = er32(FEXTNVM12);
+ mac_data |= BIT(12);
+ ew32(FEXTNVM12, mac_data);
+
+ /* Ungate PGCB clock */
+ mac_data = er32(FEXTNVM9);
+ mac_data &= ~BIT(28);
+ ew32(FEXTNVM9, mac_data);
+
+ /* Enable K1 off to enable mPHY Power Gating */
+ mac_data = er32(FEXTNVM6);
+ mac_data |= BIT(31);
+ ew32(FEXTNVM6, mac_data);
+
+ /* Enable mPHY power gating for any link and speed */
+ mac_data = er32(FEXTNVM8);
+ mac_data |= BIT(9);
+ ew32(FEXTNVM8, mac_data);
+
+ /* Enable the Dynamic Clock Gating in the DMA and MAC */
+ mac_data = er32(CTRL_EXT);
+ mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN;
+ ew32(CTRL_EXT, mac_data);
+
+ /* No MAC DPG gating SLP_S0 in modern standby
+ * Switch the logic of the lanphypc to use PMC counter
+ */
+ mac_data = er32(FEXTNVM5);
+ mac_data |= BIT(7);
+ ew32(FEXTNVM5, mac_data);
+ }
- /* DFT control: PHY bit: page769_20[0] = 1
- * Gate PPW via EXTCNF_CTRL - set 0x0F00[7] = 1
- */
- e1e_rphy(hw, I82579_DFT_CTRL, &phy_data);
- phy_data |= BIT(0);
- e1e_wphy(hw, I82579_DFT_CTRL, phy_data);
+ /* Disable the time synchronization clock */
+ mac_data = er32(FEXTNVM7);
+ mac_data |= BIT(31);
+ mac_data &= ~BIT(0);
+ ew32(FEXTNVM7, mac_data);
- mac_data = er32(EXTCNF_CTRL);
- mac_data |= E1000_EXTCNF_CTRL_GATE_PHY_CFG;
- ew32(EXTCNF_CTRL, mac_data);
+ /* Dynamic Power Gating Enable */
+ mac_data = er32(CTRL_EXT);
+ mac_data |= BIT(3);
+ ew32(CTRL_EXT, mac_data);
/* Check MAC Tx/Rx packet buffer pointers.
* Reset MAC Tx/Rx packet buffer pointers to suppress any
@@ -6414,148 +6484,130 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter)
mac_data = er32(RDFPC);
if (mac_data)
ew32(RDFPC, 0);
-
- /* Enable the Dynamic Power Gating in the MAC */
- mac_data = er32(FEXTNVM7);
- mac_data |= BIT(22);
- ew32(FEXTNVM7, mac_data);
-
- /* Disable the time synchronization clock */
- mac_data = er32(FEXTNVM7);
- mac_data |= BIT(31);
- mac_data &= ~BIT(0);
- ew32(FEXTNVM7, mac_data);
-
- /* Dynamic Power Gating Enable */
- mac_data = er32(CTRL_EXT);
- mac_data |= BIT(3);
- ew32(CTRL_EXT, mac_data);
-
- /* Disable disconnected cable conditioning for Power Gating */
- mac_data = er32(DPGFR);
- mac_data |= BIT(2);
- ew32(DPGFR, mac_data);
-
- /* Don't wake from dynamic Power Gating with clock request */
- mac_data = er32(FEXTNVM12);
- mac_data |= BIT(12);
- ew32(FEXTNVM12, mac_data);
-
- /* Ungate PGCB clock */
- mac_data = er32(FEXTNVM9);
- mac_data &= ~BIT(28);
- ew32(FEXTNVM9, mac_data);
-
- /* Enable K1 off to enable mPHY Power Gating */
- mac_data = er32(FEXTNVM6);
- mac_data |= BIT(31);
- ew32(FEXTNVM6, mac_data);
-
- /* Enable mPHY power gating for any link and speed */
- mac_data = er32(FEXTNVM8);
- mac_data |= BIT(9);
- ew32(FEXTNVM8, mac_data);
-
- /* Enable the Dynamic Clock Gating in the DMA and MAC */
- mac_data = er32(CTRL_EXT);
- mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN;
- ew32(CTRL_EXT, mac_data);
-
- /* No MAC DPG gating SLP_S0 in modern standby
- * Switch the logic of the lanphypc to use PMC counter
- */
- mac_data = er32(FEXTNVM5);
- mac_data |= BIT(7);
- ew32(FEXTNVM5, mac_data);
}
static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
+ bool firmware_bug = false;
u32 mac_data;
u16 phy_data;
+ u32 i = 0;
+
+ if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
+ /* Request ME unconfigure the device from S0ix */
+ mac_data = er32(H2ME);
+ mac_data &= ~E1000_H2ME_START_DPG;
+ mac_data |= E1000_H2ME_EXIT_DPG;
+ ew32(H2ME, mac_data);
+
+ /* Poll up to 2.5 seconds for ME to unconfigure DPG.
+ * If this takes more than 1 second, show a warning indicating a
+ * firmware bug
+ */
+ while (!(er32(EXFWSM) & E1000_EXFWSM_DPG_EXIT_DONE)) {
+ if (i > 100 && !firmware_bug)
+ firmware_bug = true;
- /* Disable the Dynamic Power Gating in the MAC */
- mac_data = er32(FEXTNVM7);
- mac_data &= 0xFFBFFFFF;
- ew32(FEXTNVM7, mac_data);
+ if (i++ == 250) {
+ e_dbg("Timeout (firmware bug): %d msec\n",
+ i * 10);
+ break;
+ }
- /* Enable the time synchronization clock */
- mac_data = er32(FEXTNVM7);
- mac_data |= BIT(0);
- ew32(FEXTNVM7, mac_data);
+ usleep_range(10000, 11000);
+ }
+ if (firmware_bug)
+ e_warn("DPG_EXIT_DONE took %d msec. This is a firmware bug\n",
+ i * 10);
+ else
+ e_dbg("DPG_EXIT_DONE cleared after %d msec\n", i * 10);
+ } else {
+ /* Request driver unconfigure the device from S0ix */
+
+ /* Disable the Dynamic Power Gating in the MAC */
+ mac_data = er32(FEXTNVM7);
+ mac_data &= 0xFFBFFFFF;
+ ew32(FEXTNVM7, mac_data);
+
+ /* Disable mPHY power gating for any link and speed */
+ mac_data = er32(FEXTNVM8);
+ mac_data &= ~BIT(9);
+ ew32(FEXTNVM8, mac_data);
+
+ /* Disable K1 off */
+ mac_data = er32(FEXTNVM6);
+ mac_data &= ~BIT(31);
+ ew32(FEXTNVM6, mac_data);
+
+ /* Disable Ungate PGCB clock */
+ mac_data = er32(FEXTNVM9);
+ mac_data |= BIT(28);
+ ew32(FEXTNVM9, mac_data);
+
+ /* Cancel not waking from dynamic
+ * Power Gating with clock request
+ */
+ mac_data = er32(FEXTNVM12);
+ mac_data &= ~BIT(12);
+ ew32(FEXTNVM12, mac_data);
- /* Disable mPHY power gating for any link and speed */
- mac_data = er32(FEXTNVM8);
- mac_data &= ~BIT(9);
- ew32(FEXTNVM8, mac_data);
+ /* Cancel disable disconnected cable conditioning
+ * for Power Gating
+ */
+ mac_data = er32(DPGFR);
+ mac_data &= ~BIT(2);
+ ew32(DPGFR, mac_data);
- /* Disable K1 off */
- mac_data = er32(FEXTNVM6);
- mac_data &= ~BIT(31);
- ew32(FEXTNVM6, mac_data);
+ /* Disable the Dynamic Clock Gating in the DMA and MAC */
+ mac_data = er32(CTRL_EXT);
+ mac_data &= 0xFFF7FFFF;
+ ew32(CTRL_EXT, mac_data);
- /* Disable Ungate PGCB clock */
- mac_data = er32(FEXTNVM9);
- mac_data |= BIT(28);
- ew32(FEXTNVM9, mac_data);
+ /* Revert the lanphypc logic to use the internal Gbe counter
+ * and not the PMC counter
+ */
+ mac_data = er32(FEXTNVM5);
+ mac_data &= 0xFFFFFF7F;
+ ew32(FEXTNVM5, mac_data);
- /* Cancel not waking from dynamic
- * Power Gating with clock request
- */
- mac_data = er32(FEXTNVM12);
- mac_data &= ~BIT(12);
- ew32(FEXTNVM12, mac_data);
+ /* Enable the periodic inband message,
+ * Request PCIe clock in K1 page770_17[10:9] =01b
+ */
+ e1e_rphy(hw, HV_PM_CTRL, &phy_data);
+ phy_data &= 0xFBFF;
+ phy_data |= HV_PM_CTRL_K1_CLK_REQ;
+ e1e_wphy(hw, HV_PM_CTRL, phy_data);
- /* Cancel disable disconnected cable conditioning
- * for Power Gating
- */
- mac_data = er32(DPGFR);
- mac_data &= ~BIT(2);
- ew32(DPGFR, mac_data);
+ /* Return back configuration
+ * 772_29[5] = 0 CS_Mode_Stay_In_K1
+ */
+ e1e_rphy(hw, I217_CGFREG, &phy_data);
+ phy_data &= 0xFFDF;
+ e1e_wphy(hw, I217_CGFREG, phy_data);
+
+ /* Change the MAC/PHY interface to Kumeran
+ * Unforce the SMBus in PHY page769_23[0] = 0
+ * Unforce the SMBus in MAC CTRL_EXT[11] = 0
+ */
+ e1e_rphy(hw, CV_SMB_CTRL, &phy_data);
+ phy_data &= ~CV_SMB_CTRL_FORCE_SMBUS;
+ e1e_wphy(hw, CV_SMB_CTRL, phy_data);
+ mac_data = er32(CTRL_EXT);
+ mac_data &= ~E1000_CTRL_EXT_FORCE_SMBUS;
+ ew32(CTRL_EXT, mac_data);
+ }
/* Disable Dynamic Power Gating */
mac_data = er32(CTRL_EXT);
mac_data &= 0xFFFFFFF7;
ew32(CTRL_EXT, mac_data);
- /* Disable the Dynamic Clock Gating in the DMA and MAC */
- mac_data = er32(CTRL_EXT);
- mac_data &= 0xFFF7FFFF;
- ew32(CTRL_EXT, mac_data);
-
- /* Revert the lanphypc logic to use the internal Gbe counter
- * and not the PMC counter
- */
- mac_data = er32(FEXTNVM5);
- mac_data &= 0xFFFFFF7F;
- ew32(FEXTNVM5, mac_data);
-
- /* Enable the periodic inband message,
- * Request PCIe clock in K1 page770_17[10:9] =01b
- */
- e1e_rphy(hw, HV_PM_CTRL, &phy_data);
- phy_data &= 0xFBFF;
- phy_data |= HV_PM_CTRL_K1_CLK_REQ;
- e1e_wphy(hw, HV_PM_CTRL, phy_data);
-
- /* Return back configuration
- * 772_29[5] = 0 CS_Mode_Stay_In_K1
- */
- e1e_rphy(hw, I217_CGFREG, &phy_data);
- phy_data &= 0xFFDF;
- e1e_wphy(hw, I217_CGFREG, phy_data);
-
- /* Change the MAC/PHY interface to Kumeran
- * Unforce the SMBus in PHY page769_23[0] = 0
- * Unforce the SMBus in MAC CTRL_EXT[11] = 0
- */
- e1e_rphy(hw, CV_SMB_CTRL, &phy_data);
- phy_data &= ~CV_SMB_CTRL_FORCE_SMBUS;
- e1e_wphy(hw, CV_SMB_CTRL, phy_data);
- mac_data = er32(CTRL_EXT);
- mac_data &= ~E1000_CTRL_EXT_FORCE_SMBUS;
- ew32(CTRL_EXT, mac_data);
+ /* Enable the time synchronization clock */
+ mac_data = er32(FEXTNVM7);
+ mac_data &= ~BIT(31);
+ mac_data |= BIT(0);
+ ew32(FEXTNVM7, mac_data);
}
static int e1000e_pm_freeze(struct device *dev)
@@ -7302,7 +7354,7 @@ static const struct net_device_ops e1000e_netdev_ops = {
.ndo_set_rx_mode = e1000e_set_rx_mode,
.ndo_set_mac_address = e1000_set_mac,
.ndo_change_mtu = e1000_change_mtu,
- .ndo_do_ioctl = e1000_ioctl,
+ .ndo_eth_ioctl = e1000_ioctl,
.ndo_tx_timeout = e1000_tx_timeout,
.ndo_validate_addr = eth_validate_addr,
@@ -7677,7 +7729,7 @@ err_dma:
* @pdev: PCI device information struct
*
* e1000_remove is called by the PCI subsystem to alert the driver
- * that it should release a PCI device. The could be caused by a
+ * that it should release a PCI device. This could be caused by a
* Hot-Plug event, or because the driver is going to be removed from
* memory.
**/
@@ -7850,14 +7902,22 @@ static const struct pci_device_id e1000_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_cnp },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_cnp },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V15), board_pch_cnp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM23), board_pch_cnp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V23), board_pch_cnp },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_cnp },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_cnp },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_cnp },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_cnp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM22), board_pch_cnp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V22), board_pch_cnp },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_cnp },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_cnp },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_cnp },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_cnp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_cnp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_cnp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_cnp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_cnp },
{ 0, 0, 0, 0, 0, 0, 0 } /* terminate list */
};
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
index 9e79d672f4f1..eb5c014c02fb 100644
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c
@@ -298,6 +298,7 @@ void e1000e_ptp_init(struct e1000_adapter *adapter)
case e1000_pch_tgp:
case e1000_pch_adp:
case e1000_pch_mtp:
+ case e1000_pch_lnp:
if ((hw->mac.type < e1000_pch_lpt) ||
(er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) {
adapter->ptp_clock_info.max_adj = 24000000 - 1;
diff --git a/drivers/net/ethernet/intel/e1000e/regs.h b/drivers/net/ethernet/intel/e1000e/regs.h
index 8165ba2619a4..6c0cd8cab3ef 100644
--- a/drivers/net/ethernet/intel/e1000e/regs.h
+++ b/drivers/net/ethernet/intel/e1000e/regs.h
@@ -213,6 +213,7 @@
#define E1000_FACTPS 0x05B30 /* Function Active and Power State to MNG */
#define E1000_SWSM 0x05B50 /* SW Semaphore */
#define E1000_FWSM 0x05B54 /* FW Semaphore */
+#define E1000_EXFWSM 0x05B58 /* Extended FW Semaphore */
/* Driver-only SW semaphore (not used by BOOT agents) */
#define E1000_SWSM2 0x05B58
#define E1000_FFLT_DBG 0x05F04 /* Debug Register */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
index 66776ba7bfb6..0d37f011d0ce 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
@@ -632,7 +632,9 @@ clear_reset:
}
static int fm10k_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct fm10k_intfc *interface = netdev_priv(dev);
@@ -646,7 +648,9 @@ static int fm10k_get_coalesce(struct net_device *dev,
}
static int fm10k_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct fm10k_intfc *interface = netdev_priv(dev);
u16 tx_itr, rx_itr;
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index b9417dc0007c..39fb3d57c057 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -428,6 +428,8 @@ struct i40e_channel {
struct i40e_vsi *parent_vsi;
};
+struct i40e_ptp_pins_settings;
+
static inline bool i40e_is_channel_macvlan(struct i40e_channel *ch)
{
return !!ch->fwd;
@@ -644,12 +646,83 @@ struct i40e_pf {
struct i40e_rx_pb_config pb_cfg; /* Current Rx packet buffer config */
struct i40e_dcbx_config tmp_cfg;
+/* GPIO defines used by PTP */
+#define I40E_SDP3_2 18
+#define I40E_SDP3_3 19
+#define I40E_GPIO_4 20
+#define I40E_LED2_0 26
+#define I40E_LED2_1 27
+#define I40E_LED3_0 28
+#define I40E_LED3_1 29
+#define I40E_GLGEN_GPIO_SET_SDP_DATA_HI \
+ (1 << I40E_GLGEN_GPIO_SET_SDP_DATA_SHIFT)
+#define I40E_GLGEN_GPIO_SET_DRV_SDP_DATA \
+ (1 << I40E_GLGEN_GPIO_SET_DRIVE_SDP_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_0 \
+ (0 << I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_1 \
+ (1 << I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_RESERVED BIT(2)
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_Z \
+ (1 << I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_DIR_OUT \
+ (1 << I40E_GLGEN_GPIO_CTL_PIN_DIR_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_TRI_DRV_HI \
+ (1 << I40E_GLGEN_GPIO_CTL_TRI_CTL_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_OUT_HI_RST \
+ (1 << I40E_GLGEN_GPIO_CTL_OUT_CTL_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_TIMESYNC_0 \
+ (3 << I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_TIMESYNC_1 \
+ (4 << I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN \
+ (0x3F << I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT \
+ (1 << I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PORT_0_IN_TIMESYNC_0 \
+ (I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \
+ I40E_GLGEN_GPIO_CTL_TIMESYNC_0 | \
+ I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_0)
+#define I40E_GLGEN_GPIO_CTL_PORT_1_IN_TIMESYNC_0 \
+ (I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \
+ I40E_GLGEN_GPIO_CTL_TIMESYNC_0 | \
+ I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_1)
+#define I40E_GLGEN_GPIO_CTL_PORT_0_OUT_TIMESYNC_1 \
+ (I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \
+ I40E_GLGEN_GPIO_CTL_TIMESYNC_1 | I40E_GLGEN_GPIO_CTL_OUT_HI_RST | \
+ I40E_GLGEN_GPIO_CTL_TRI_DRV_HI | I40E_GLGEN_GPIO_CTL_DIR_OUT | \
+ I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_0)
+#define I40E_GLGEN_GPIO_CTL_PORT_1_OUT_TIMESYNC_1 \
+ (I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \
+ I40E_GLGEN_GPIO_CTL_TIMESYNC_1 | I40E_GLGEN_GPIO_CTL_OUT_HI_RST | \
+ I40E_GLGEN_GPIO_CTL_TRI_DRV_HI | I40E_GLGEN_GPIO_CTL_DIR_OUT | \
+ I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_1)
+#define I40E_GLGEN_GPIO_CTL_LED_INIT \
+ (I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_Z | \
+ I40E_GLGEN_GPIO_CTL_DIR_OUT | \
+ I40E_GLGEN_GPIO_CTL_TRI_DRV_HI | \
+ I40E_GLGEN_GPIO_CTL_OUT_HI_RST | \
+ I40E_GLGEN_GPIO_CTL_OUT_DEFAULT | \
+ I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN)
+#define I40E_PRTTSYN_AUX_1_INSTNT \
+ (1 << I40E_PRTTSYN_AUX_1_INSTNT_SHIFT)
+#define I40E_PRTTSYN_AUX_0_OUT_ENABLE \
+ (1 << I40E_PRTTSYN_AUX_0_OUT_ENA_SHIFT)
+#define I40E_PRTTSYN_AUX_0_OUT_CLK_MOD (3 << I40E_PRTTSYN_AUX_0_OUTMOD_SHIFT)
+#define I40E_PRTTSYN_AUX_0_OUT_ENABLE_CLK_MOD \
+ (I40E_PRTTSYN_AUX_0_OUT_ENABLE | I40E_PRTTSYN_AUX_0_OUT_CLK_MOD)
+#define I40E_PTP_HALF_SECOND 500000000LL /* nano seconds */
+#define I40E_PTP_2_SEC_DELAY 2
+
struct ptp_clock *ptp_clock;
struct ptp_clock_info ptp_caps;
struct sk_buff *ptp_tx_skb;
unsigned long ptp_tx_start;
struct hwtstamp_config tstamp_config;
struct timespec64 ptp_prev_hw_time;
+ struct work_struct ptp_pps_work;
+ struct work_struct ptp_extts0_work;
+ struct work_struct ptp_extts1_work;
ktime_t ptp_reset_start;
struct mutex tmreg_lock; /* Used to protect the SYSTIME registers. */
u32 ptp_adj_mult;
@@ -657,10 +730,14 @@ struct i40e_pf {
u32 tx_hwtstamp_skipped;
u32 rx_hwtstamp_cleared;
u32 latch_event_flags;
+ u64 ptp_pps_start;
+ u32 pps_delay;
spinlock_t ptp_rx_lock; /* Used to protect Rx timestamp registers. */
+ struct ptp_pin_desc ptp_pin[3];
unsigned long latch_events[4];
bool ptp_tx;
bool ptp_rx;
+ struct i40e_ptp_pins_settings *ptp_pins;
u16 rss_table_size; /* HW RSS table size */
u32 max_bw;
u32 min_bw;
@@ -1169,6 +1246,7 @@ void i40e_ptp_save_hw_time(struct i40e_pf *pf);
void i40e_ptp_restore_hw_time(struct i40e_pf *pf);
void i40e_ptp_init(struct i40e_pf *pf);
void i40e_ptp_stop(struct i40e_pf *pf);
+int i40e_ptp_alloc_pins(struct i40e_pf *pf);
int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi);
i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf);
i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 2c9e4eeb7270..513ba6974355 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2812,13 +2812,17 @@ static int __i40e_get_coalesce(struct net_device *netdev,
* i40e_get_coalesce - get a netdev's coalesce settings
* @netdev: the netdev to check
* @ec: ethtool coalesce data structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
*
* Gets the coalesce settings for a particular netdev. Note that if user has
* modified per-queue settings, this only guarantees to represent queue 0. See
* __i40e_get_coalesce for more details.
**/
static int i40e_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
return __i40e_get_coalesce(netdev, ec, -1);
}
@@ -2986,11 +2990,15 @@ static int __i40e_set_coalesce(struct net_device *netdev,
* i40e_set_coalesce - set coalesce settings for every queue on the netdev
* @netdev: the netdev to change
* @ec: ethtool coalesce settings
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
*
* This will set each queue to the same coalesce settings.
**/
static int i40e_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
return __i40e_set_coalesce(netdev, ec, -1);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 1d1f52756a93..2f20980dd9a5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4079,10 +4079,13 @@ static irqreturn_t i40e_intr(int irq, void *data)
if (icr0 & I40E_PFINT_ICR0_TIMESYNC_MASK) {
u32 prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_0);
- if (prttsyn_stat & I40E_PRTTSYN_STAT_0_TXTIME_MASK) {
- icr0 &= ~I40E_PFINT_ICR0_ENA_TIMESYNC_MASK;
+ if (prttsyn_stat & I40E_PRTTSYN_STAT_0_EVENT0_MASK)
+ schedule_work(&pf->ptp_extts0_work);
+
+ if (prttsyn_stat & I40E_PRTTSYN_STAT_0_TXTIME_MASK)
i40e_ptp_tx_hwtstamp(pf);
- }
+
+ icr0 &= ~I40E_PFINT_ICR0_ENA_TIMESYNC_MASK;
}
/* If a critical error is pending we have no choice but to reset the
@@ -4635,7 +4638,7 @@ void i40e_vsi_stop_rings(struct i40e_vsi *vsi)
err = i40e_control_wait_rx_q(pf, pf_q, false);
if (err)
dev_info(&pf->pdev->dev,
- "VSI seid %d Rx ring %d dissable timeout\n",
+ "VSI seid %d Rx ring %d disable timeout\n",
vsi->seid, pf_q);
}
@@ -13265,7 +13268,7 @@ static const struct net_device_ops i40e_netdev_ops = {
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = i40e_set_mac,
.ndo_change_mtu = i40e_change_mtu,
- .ndo_do_ioctl = i40e_ioctl,
+ .ndo_eth_ioctl = i40e_ioctl,
.ndo_tx_timeout = i40e_tx_timeout,
.ndo_vlan_rx_add_vid = i40e_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = i40e_vlan_rx_kill_vid,
@@ -15181,6 +15184,22 @@ err_switch_setup:
}
/**
+ * i40e_set_subsystem_device_id - set subsystem device id
+ * @hw: pointer to the hardware info
+ *
+ * Set PCI subsystem device id either from a pci_dev structure or
+ * a specific FW register.
+ **/
+static inline void i40e_set_subsystem_device_id(struct i40e_hw *hw)
+{
+ struct pci_dev *pdev = ((struct i40e_pf *)hw->back)->pdev;
+
+ hw->subsystem_device_id = pdev->subsystem_device ?
+ pdev->subsystem_device :
+ (ushort)(rd32(hw, I40E_PFPCI_SUBSYSID) & USHRT_MAX);
+}
+
+/**
* i40e_probe - Device initialization routine
* @pdev: PCI device information struct
* @ent: entry in i40e_pci_tbl
@@ -15275,7 +15294,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
hw->device_id = pdev->device;
pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
hw->subsystem_vendor_id = pdev->subsystem_vendor;
- hw->subsystem_device_id = pdev->subsystem_device;
+ i40e_set_subsystem_device_id(hw);
hw->bus.device = PCI_SLOT(pdev->devfn);
hw->bus.func = PCI_FUNC(pdev->devfn);
hw->bus.bus_id = pdev->bus->number;
@@ -15455,6 +15474,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (is_valid_ether_addr(hw->mac.port_addr))
pf->hw_features |= I40E_HW_PORT_ID_VALID;
+ i40e_ptp_alloc_pins(pf);
pci_set_drvdata(pdev, pf);
pci_save_state(pdev);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 7b971b205d36..09b1d5aed1c9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -3,6 +3,7 @@
#include "i40e.h"
#include <linux/ptp_classify.h>
+#include <linux/posix-clock.h>
/* The XL710 timesync is very much like Intel's 82599 design when it comes to
* the fundamental clock design. However, the clock operations are much simpler
@@ -20,10 +21,252 @@
#define I40E_PTP_10GB_INCVAL_MULT 2
#define I40E_PTP_5GB_INCVAL_MULT 2
#define I40E_PTP_1GB_INCVAL_MULT 20
+#define I40E_ISGN 0x80000000
#define I40E_PRTTSYN_CTL1_TSYNTYPE_V1 BIT(I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT)
#define I40E_PRTTSYN_CTL1_TSYNTYPE_V2 (2 << \
I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT)
+#define I40E_SUBDEV_ID_25G_PTP_PIN 0xB
+#define to_dev(obj) container_of(obj, struct device, kobj)
+
+enum i40e_ptp_pin {
+ SDP3_2 = 0,
+ SDP3_3,
+ GPIO_4
+};
+
+enum i40e_can_set_pins_t {
+ CANT_DO_PINS = -1,
+ CAN_SET_PINS,
+ CAN_DO_PINS
+};
+
+static struct ptp_pin_desc sdp_desc[] = {
+ /* name idx func chan */
+ {"SDP3_2", SDP3_2, PTP_PF_NONE, 0},
+ {"SDP3_3", SDP3_3, PTP_PF_NONE, 1},
+ {"GPIO_4", GPIO_4, PTP_PF_NONE, 1},
+};
+
+enum i40e_ptp_gpio_pin_state {
+ end = -2,
+ invalid,
+ off,
+ in_A,
+ in_B,
+ out_A,
+ out_B,
+};
+
+static const char * const i40e_ptp_gpio_pin_state2str[] = {
+ "off", "in_A", "in_B", "out_A", "out_B"
+};
+
+enum i40e_ptp_led_pin_state {
+ led_end = -2,
+ low = 0,
+ high,
+};
+
+struct i40e_ptp_pins_settings {
+ enum i40e_ptp_gpio_pin_state sdp3_2;
+ enum i40e_ptp_gpio_pin_state sdp3_3;
+ enum i40e_ptp_gpio_pin_state gpio_4;
+ enum i40e_ptp_led_pin_state led2_0;
+ enum i40e_ptp_led_pin_state led2_1;
+ enum i40e_ptp_led_pin_state led3_0;
+ enum i40e_ptp_led_pin_state led3_1;
+};
+
+static const struct i40e_ptp_pins_settings
+ i40e_ptp_pin_led_allowed_states[] = {
+ {off, off, off, high, high, high, high},
+ {off, in_A, off, high, high, high, low},
+ {off, out_A, off, high, low, high, high},
+ {off, in_B, off, high, high, high, low},
+ {off, out_B, off, high, low, high, high},
+ {in_A, off, off, high, high, high, low},
+ {in_A, in_B, off, high, high, high, low},
+ {in_A, out_B, off, high, low, high, high},
+ {out_A, off, off, high, low, high, high},
+ {out_A, in_B, off, high, low, high, high},
+ {in_B, off, off, high, high, high, low},
+ {in_B, in_A, off, high, high, high, low},
+ {in_B, out_A, off, high, low, high, high},
+ {out_B, off, off, high, low, high, high},
+ {out_B, in_A, off, high, low, high, high},
+ {off, off, in_A, high, high, low, high},
+ {off, out_A, in_A, high, low, low, high},
+ {off, in_B, in_A, high, high, low, low},
+ {off, out_B, in_A, high, low, low, high},
+ {out_A, off, in_A, high, low, low, high},
+ {out_A, in_B, in_A, high, low, low, high},
+ {in_B, off, in_A, high, high, low, low},
+ {in_B, out_A, in_A, high, low, low, high},
+ {out_B, off, in_A, high, low, low, high},
+ {off, off, out_A, low, high, high, high},
+ {off, in_A, out_A, low, high, high, low},
+ {off, in_B, out_A, low, high, high, low},
+ {off, out_B, out_A, low, low, high, high},
+ {in_A, off, out_A, low, high, high, low},
+ {in_A, in_B, out_A, low, high, high, low},
+ {in_A, out_B, out_A, low, low, high, high},
+ {in_B, off, out_A, low, high, high, low},
+ {in_B, in_A, out_A, low, high, high, low},
+ {out_B, off, out_A, low, low, high, high},
+ {out_B, in_A, out_A, low, low, high, high},
+ {off, off, in_B, high, high, low, high},
+ {off, in_A, in_B, high, high, low, low},
+ {off, out_A, in_B, high, low, low, high},
+ {off, out_B, in_B, high, low, low, high},
+ {in_A, off, in_B, high, high, low, low},
+ {in_A, out_B, in_B, high, low, low, high},
+ {out_A, off, in_B, high, low, low, high},
+ {out_B, off, in_B, high, low, low, high},
+ {out_B, in_A, in_B, high, low, low, high},
+ {off, off, out_B, low, high, high, high},
+ {off, in_A, out_B, low, high, high, low},
+ {off, out_A, out_B, low, low, high, high},
+ {off, in_B, out_B, low, high, high, low},
+ {in_A, off, out_B, low, high, high, low},
+ {in_A, in_B, out_B, low, high, high, low},
+ {out_A, off, out_B, low, low, high, high},
+ {out_A, in_B, out_B, low, low, high, high},
+ {in_B, off, out_B, low, high, high, low},
+ {in_B, in_A, out_B, low, high, high, low},
+ {in_B, out_A, out_B, low, low, high, high},
+ {end, end, end, led_end, led_end, led_end, led_end}
+};
+
+static int i40e_ptp_set_pins(struct i40e_pf *pf,
+ struct i40e_ptp_pins_settings *pins);
+
+/**
+ * i40e_ptp_extts0_work - workqueue task function
+ * @work: workqueue task structure
+ *
+ * Service for PTP external clock event
+ **/
+static void i40e_ptp_extts0_work(struct work_struct *work)
+{
+ struct i40e_pf *pf = container_of(work, struct i40e_pf,
+ ptp_extts0_work);
+ struct i40e_hw *hw = &pf->hw;
+ struct ptp_clock_event event;
+ u32 hi, lo;
+
+ /* Event time is captured by one of the two matched registers
+ * PRTTSYN_EVNT_L: 32 LSB of sampled time event
+ * PRTTSYN_EVNT_H: 32 MSB of sampled time event
+ * Event is defined in PRTTSYN_EVNT_0 register
+ */
+ lo = rd32(hw, I40E_PRTTSYN_EVNT_L(0));
+ hi = rd32(hw, I40E_PRTTSYN_EVNT_H(0));
+
+ event.timestamp = (((u64)hi) << 32) | lo;
+
+ event.type = PTP_CLOCK_EXTTS;
+ event.index = hw->pf_id;
+
+ /* fire event */
+ ptp_clock_event(pf->ptp_clock, &event);
+}
+
+/**
+ * i40e_is_ptp_pin_dev - check if device supports PTP pins
+ * @hw: pointer to the hardware structure
+ *
+ * Return true if device supports PTP pins, false otherwise.
+ **/
+static bool i40e_is_ptp_pin_dev(struct i40e_hw *hw)
+{
+ return hw->device_id == I40E_DEV_ID_25G_SFP28 &&
+ hw->subsystem_device_id == I40E_SUBDEV_ID_25G_PTP_PIN;
+}
+
+/**
+ * i40e_can_set_pins - check possibility of manipulating the pins
+ * @pf: board private structure
+ *
+ * Check if all conditions are satisfied to manipulate PTP pins.
+ * Return CAN_SET_PINS if pins can be set on a specific PF or
+ * return CAN_DO_PINS if pins can be manipulated within a NIC or
+ * return CANT_DO_PINS otherwise.
+ **/
+static enum i40e_can_set_pins_t i40e_can_set_pins(struct i40e_pf *pf)
+{
+ if (!i40e_is_ptp_pin_dev(&pf->hw)) {
+ dev_warn(&pf->pdev->dev,
+ "PTP external clock not supported.\n");
+ return CANT_DO_PINS;
+ }
+
+ if (!pf->ptp_pins) {
+ dev_warn(&pf->pdev->dev,
+ "PTP PIN manipulation not allowed.\n");
+ return CANT_DO_PINS;
+ }
+
+ if (pf->hw.pf_id) {
+ dev_warn(&pf->pdev->dev,
+ "PTP PINs should be accessed via PF0.\n");
+ return CAN_DO_PINS;
+ }
+
+ return CAN_SET_PINS;
+}
+
+/**
+ * i40_ptp_reset_timing_events - Reset PTP timing events
+ * @pf: Board private structure
+ *
+ * This function resets timing events for pf.
+ **/
+static void i40_ptp_reset_timing_events(struct i40e_pf *pf)
+{
+ u32 i;
+
+ spin_lock_bh(&pf->ptp_rx_lock);
+ for (i = 0; i <= I40E_PRTTSYN_RXTIME_L_MAX_INDEX; i++) {
+ /* reading and automatically clearing timing events registers */
+ rd32(&pf->hw, I40E_PRTTSYN_RXTIME_L(i));
+ rd32(&pf->hw, I40E_PRTTSYN_RXTIME_H(i));
+ pf->latch_events[i] = 0;
+ }
+ /* reading and automatically clearing timing events registers */
+ rd32(&pf->hw, I40E_PRTTSYN_TXTIME_L);
+ rd32(&pf->hw, I40E_PRTTSYN_TXTIME_H);
+
+ pf->tx_hwtstamp_timeouts = 0;
+ pf->tx_hwtstamp_skipped = 0;
+ pf->rx_hwtstamp_cleared = 0;
+ pf->latch_event_flags = 0;
+ spin_unlock_bh(&pf->ptp_rx_lock);
+}
+
+/**
+ * i40e_ptp_verify - check pins
+ * @ptp: ptp clock
+ * @pin: pin index
+ * @func: assigned function
+ * @chan: channel
+ *
+ * Check pins consistency.
+ * Return 0 on success or error on failure.
+ **/
+static int i40e_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+ enum ptp_pin_function func, unsigned int chan)
+{
+ switch (func) {
+ case PTP_PF_NONE:
+ case PTP_PF_EXTTS:
+ case PTP_PF_PEROUT:
+ break;
+ case PTP_PF_PHYSYNC:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
/**
* i40e_ptp_read - Read the PHC time from the device
@@ -137,6 +380,37 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
}
/**
+ * i40e_ptp_set_1pps_signal_hw - configure 1PPS PTP signal for pins
+ * @pf: the PF private data structure
+ *
+ * Configure 1PPS signal used for PTP pins
+ **/
+static void i40e_ptp_set_1pps_signal_hw(struct i40e_pf *pf)
+{
+ struct i40e_hw *hw = &pf->hw;
+ struct timespec64 now;
+ u64 ns;
+
+ wr32(hw, I40E_PRTTSYN_AUX_0(1), 0);
+ wr32(hw, I40E_PRTTSYN_AUX_1(1), I40E_PRTTSYN_AUX_1_INSTNT);
+ wr32(hw, I40E_PRTTSYN_AUX_0(1), I40E_PRTTSYN_AUX_0_OUT_ENABLE);
+
+ i40e_ptp_read(pf, &now, NULL);
+ now.tv_sec += I40E_PTP_2_SEC_DELAY;
+ now.tv_nsec = 0;
+ ns = timespec64_to_ns(&now);
+
+ /* I40E_PRTTSYN_TGT_L(1) */
+ wr32(hw, I40E_PRTTSYN_TGT_L(1), ns & 0xFFFFFFFF);
+ /* I40E_PRTTSYN_TGT_H(1) */
+ wr32(hw, I40E_PRTTSYN_TGT_H(1), ns >> 32);
+ wr32(hw, I40E_PRTTSYN_CLKO(1), I40E_PTP_HALF_SECOND);
+ wr32(hw, I40E_PRTTSYN_AUX_1(1), I40E_PRTTSYN_AUX_1_INSTNT);
+ wr32(hw, I40E_PRTTSYN_AUX_0(1),
+ I40E_PRTTSYN_AUX_0_OUT_ENABLE_CLK_MOD);
+}
+
+/**
* i40e_ptp_adjtime - Adjust the PHC time
* @ptp: The PTP clock structure
* @delta: Offset in nanoseconds to adjust the PHC time by
@@ -146,14 +420,35 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
{
struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
- struct timespec64 now, then;
+ struct i40e_hw *hw = &pf->hw;
- then = ns_to_timespec64(delta);
mutex_lock(&pf->tmreg_lock);
- i40e_ptp_read(pf, &now, NULL);
- now = timespec64_add(now, then);
- i40e_ptp_write(pf, (const struct timespec64 *)&now);
+ if (delta > -999999900LL && delta < 999999900LL) {
+ int neg_adj = 0;
+ u32 timadj;
+ u64 tohw;
+
+ if (delta < 0) {
+ neg_adj = 1;
+ tohw = -delta;
+ } else {
+ tohw = delta;
+ }
+
+ timadj = tohw & 0x3FFFFFFF;
+ if (neg_adj)
+ timadj |= I40E_ISGN;
+ wr32(hw, I40E_PRTTSYN_ADJ, timadj);
+ } else {
+ struct timespec64 then, now;
+
+ then = ns_to_timespec64(delta);
+ i40e_ptp_read(pf, &now, NULL);
+ now = timespec64_add(now, then);
+ i40e_ptp_write(pf, (const struct timespec64 *)&now);
+ i40e_ptp_set_1pps_signal_hw(pf);
+ }
mutex_unlock(&pf->tmreg_lock);
@@ -184,7 +479,7 @@ static int i40e_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
/**
* i40e_ptp_settime - Set the time of the PHC
* @ptp: The PTP clock structure
- * @ts: timespec structure that holds the new time value
+ * @ts: timespec64 structure that holds the new time value
*
* Set the device clock to the user input value. The conversion from timespec
* to ns happens in the write function.
@@ -202,18 +497,145 @@ static int i40e_ptp_settime(struct ptp_clock_info *ptp,
}
/**
- * i40e_ptp_feature_enable - Enable/disable ancillary features of the PHC subsystem
+ * i40e_pps_configure - configure PPS events
+ * @ptp: ptp clock
+ * @rq: clock request
+ * @on: status
+ *
+ * Configure PPS events for external clock source.
+ * Return 0 on success or error on failure.
+ **/
+static int i40e_pps_configure(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
+
+ if (!!on)
+ i40e_ptp_set_1pps_signal_hw(pf);
+
+ return 0;
+}
+
+/**
+ * i40e_pin_state - determine PIN state
+ * @index: PIN index
+ * @func: function assigned to PIN
+ *
+ * Determine PIN state based on PIN index and function assigned.
+ * Return PIN state.
+ **/
+static enum i40e_ptp_gpio_pin_state i40e_pin_state(int index, int func)
+{
+ enum i40e_ptp_gpio_pin_state state = off;
+
+ if (index == 0 && func == PTP_PF_EXTTS)
+ state = in_A;
+ if (index == 1 && func == PTP_PF_EXTTS)
+ state = in_B;
+ if (index == 0 && func == PTP_PF_PEROUT)
+ state = out_A;
+ if (index == 1 && func == PTP_PF_PEROUT)
+ state = out_B;
+
+ return state;
+}
+
+/**
+ * i40e_ptp_enable_pin - enable PINs.
+ * @pf: private board structure
+ * @chan: channel
+ * @func: PIN function
+ * @on: state
+ *
+ * Enable PTP pins for external clock source.
+ * Return 0 on success or error code on failure.
+ **/
+static int i40e_ptp_enable_pin(struct i40e_pf *pf, unsigned int chan,
+ enum ptp_pin_function func, int on)
+{
+ enum i40e_ptp_gpio_pin_state *pin = NULL;
+ struct i40e_ptp_pins_settings pins;
+ int pin_index;
+
+ /* Use PF0 to set pins. Return success for user space tools */
+ if (pf->hw.pf_id)
+ return 0;
+
+ /* Preserve previous state of pins that we don't touch */
+ pins.sdp3_2 = pf->ptp_pins->sdp3_2;
+ pins.sdp3_3 = pf->ptp_pins->sdp3_3;
+ pins.gpio_4 = pf->ptp_pins->gpio_4;
+
+ /* To turn on the pin - find the corresponding one based on
+ * the given index. To to turn the function off - find
+ * which pin had it assigned. Don't use ptp_find_pin here
+ * because it tries to lock the pincfg_mux which is locked by
+ * ptp_pin_store() that calls here.
+ */
+ if (on) {
+ pin_index = ptp_find_pin(pf->ptp_clock, func, chan);
+ if (pin_index < 0)
+ return -EBUSY;
+
+ switch (pin_index) {
+ case SDP3_2:
+ pin = &pins.sdp3_2;
+ break;
+ case SDP3_3:
+ pin = &pins.sdp3_3;
+ break;
+ case GPIO_4:
+ pin = &pins.gpio_4;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ *pin = i40e_pin_state(chan, func);
+ } else {
+ pins.sdp3_2 = off;
+ pins.sdp3_3 = off;
+ pins.gpio_4 = off;
+ }
+
+ return i40e_ptp_set_pins(pf, &pins) ? -EINVAL : 0;
+}
+
+/**
+ * i40e_ptp_feature_enable - Enable external clock pins
* @ptp: The PTP clock structure
- * @rq: The requested feature to change
- * @on: Enable/disable flag
+ * @rq: The PTP clock request structure
+ * @on: To turn feature on/off
*
- * The XL710 does not support any of the ancillary features of the PHC
- * subsystem, so this function may just return.
+ * Setting on/off PTP PPS feature for pin.
**/
static int i40e_ptp_feature_enable(struct ptp_clock_info *ptp,
- struct ptp_clock_request *rq, int on)
+ struct ptp_clock_request *rq,
+ int on)
{
- return -EOPNOTSUPP;
+ struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
+
+ enum ptp_pin_function func;
+ unsigned int chan;
+
+ /* TODO: Implement flags handling for EXTTS and PEROUT */
+ switch (rq->type) {
+ case PTP_CLK_REQ_EXTTS:
+ func = PTP_PF_EXTTS;
+ chan = rq->extts.index;
+ break;
+ case PTP_CLK_REQ_PEROUT:
+ func = PTP_PF_PEROUT;
+ chan = rq->perout.index;
+ break;
+ case PTP_CLK_REQ_PPS:
+ return i40e_pps_configure(ptp, rq, on);
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return i40e_ptp_enable_pin(pf, chan, func, on);
}
/**
@@ -528,6 +950,229 @@ int i40e_ptp_get_ts_config(struct i40e_pf *pf, struct ifreq *ifr)
}
/**
+ * i40e_ptp_free_pins - free memory used by PTP pins
+ * @pf: Board private structure
+ *
+ * Release memory allocated for PTP pins.
+ **/
+static void i40e_ptp_free_pins(struct i40e_pf *pf)
+{
+ if (i40e_is_ptp_pin_dev(&pf->hw)) {
+ kfree(pf->ptp_pins);
+ kfree(pf->ptp_caps.pin_config);
+ pf->ptp_pins = NULL;
+ }
+}
+
+/**
+ * i40e_ptp_set_pin_hw - Set HW GPIO pin
+ * @hw: pointer to the hardware structure
+ * @pin: pin index
+ * @state: pin state
+ *
+ * Set status of GPIO pin for external clock handling.
+ **/
+static void i40e_ptp_set_pin_hw(struct i40e_hw *hw,
+ unsigned int pin,
+ enum i40e_ptp_gpio_pin_state state)
+{
+ switch (state) {
+ case off:
+ wr32(hw, I40E_GLGEN_GPIO_CTL(pin), 0);
+ break;
+ case in_A:
+ wr32(hw, I40E_GLGEN_GPIO_CTL(pin),
+ I40E_GLGEN_GPIO_CTL_PORT_0_IN_TIMESYNC_0);
+ break;
+ case in_B:
+ wr32(hw, I40E_GLGEN_GPIO_CTL(pin),
+ I40E_GLGEN_GPIO_CTL_PORT_1_IN_TIMESYNC_0);
+ break;
+ case out_A:
+ wr32(hw, I40E_GLGEN_GPIO_CTL(pin),
+ I40E_GLGEN_GPIO_CTL_PORT_0_OUT_TIMESYNC_1);
+ break;
+ case out_B:
+ wr32(hw, I40E_GLGEN_GPIO_CTL(pin),
+ I40E_GLGEN_GPIO_CTL_PORT_1_OUT_TIMESYNC_1);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * i40e_ptp_set_led_hw - Set HW GPIO led
+ * @hw: pointer to the hardware structure
+ * @led: led index
+ * @state: led state
+ *
+ * Set status of GPIO led for external clock handling.
+ **/
+static void i40e_ptp_set_led_hw(struct i40e_hw *hw,
+ unsigned int led,
+ enum i40e_ptp_led_pin_state state)
+{
+ switch (state) {
+ case low:
+ wr32(hw, I40E_GLGEN_GPIO_SET,
+ I40E_GLGEN_GPIO_SET_DRV_SDP_DATA | led);
+ break;
+ case high:
+ wr32(hw, I40E_GLGEN_GPIO_SET,
+ I40E_GLGEN_GPIO_SET_DRV_SDP_DATA |
+ I40E_GLGEN_GPIO_SET_SDP_DATA_HI | led);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * i40e_ptp_init_leds_hw - init LEDs
+ * @hw: pointer to a hardware structure
+ *
+ * Set initial state of LEDs
+ **/
+static void i40e_ptp_init_leds_hw(struct i40e_hw *hw)
+{
+ wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED2_0),
+ I40E_GLGEN_GPIO_CTL_LED_INIT);
+ wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED2_1),
+ I40E_GLGEN_GPIO_CTL_LED_INIT);
+ wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED3_0),
+ I40E_GLGEN_GPIO_CTL_LED_INIT);
+ wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED3_1),
+ I40E_GLGEN_GPIO_CTL_LED_INIT);
+}
+
+/**
+ * i40e_ptp_set_pins_hw - Set HW GPIO pins
+ * @pf: Board private structure
+ *
+ * This function sets GPIO pins for PTP
+ **/
+static void i40e_ptp_set_pins_hw(struct i40e_pf *pf)
+{
+ const struct i40e_ptp_pins_settings *pins = pf->ptp_pins;
+ struct i40e_hw *hw = &pf->hw;
+
+ /* pin must be disabled before it may be used */
+ i40e_ptp_set_pin_hw(hw, I40E_SDP3_2, off);
+ i40e_ptp_set_pin_hw(hw, I40E_SDP3_3, off);
+ i40e_ptp_set_pin_hw(hw, I40E_GPIO_4, off);
+
+ i40e_ptp_set_pin_hw(hw, I40E_SDP3_2, pins->sdp3_2);
+ i40e_ptp_set_pin_hw(hw, I40E_SDP3_3, pins->sdp3_3);
+ i40e_ptp_set_pin_hw(hw, I40E_GPIO_4, pins->gpio_4);
+
+ i40e_ptp_set_led_hw(hw, I40E_LED2_0, pins->led2_0);
+ i40e_ptp_set_led_hw(hw, I40E_LED2_1, pins->led2_1);
+ i40e_ptp_set_led_hw(hw, I40E_LED3_0, pins->led3_0);
+ i40e_ptp_set_led_hw(hw, I40E_LED3_1, pins->led3_1);
+
+ dev_info(&pf->pdev->dev,
+ "PTP configuration set to: SDP3_2: %s, SDP3_3: %s, GPIO_4: %s.\n",
+ i40e_ptp_gpio_pin_state2str[pins->sdp3_2],
+ i40e_ptp_gpio_pin_state2str[pins->sdp3_3],
+ i40e_ptp_gpio_pin_state2str[pins->gpio_4]);
+}
+
+/**
+ * i40e_ptp_set_pins - set PTP pins in HW
+ * @pf: Board private structure
+ * @pins: PTP pins to be applied
+ *
+ * Validate and set PTP pins in HW for specific PF.
+ * Return 0 on success or negative value on error.
+ **/
+static int i40e_ptp_set_pins(struct i40e_pf *pf,
+ struct i40e_ptp_pins_settings *pins)
+{
+ enum i40e_can_set_pins_t pin_caps = i40e_can_set_pins(pf);
+ int i = 0;
+
+ if (pin_caps == CANT_DO_PINS)
+ return -EOPNOTSUPP;
+ else if (pin_caps == CAN_DO_PINS)
+ return 0;
+
+ if (pins->sdp3_2 == invalid)
+ pins->sdp3_2 = pf->ptp_pins->sdp3_2;
+ if (pins->sdp3_3 == invalid)
+ pins->sdp3_3 = pf->ptp_pins->sdp3_3;
+ if (pins->gpio_4 == invalid)
+ pins->gpio_4 = pf->ptp_pins->gpio_4;
+ while (i40e_ptp_pin_led_allowed_states[i].sdp3_2 != end) {
+ if (pins->sdp3_2 == i40e_ptp_pin_led_allowed_states[i].sdp3_2 &&
+ pins->sdp3_3 == i40e_ptp_pin_led_allowed_states[i].sdp3_3 &&
+ pins->gpio_4 == i40e_ptp_pin_led_allowed_states[i].gpio_4) {
+ pins->led2_0 =
+ i40e_ptp_pin_led_allowed_states[i].led2_0;
+ pins->led2_1 =
+ i40e_ptp_pin_led_allowed_states[i].led2_1;
+ pins->led3_0 =
+ i40e_ptp_pin_led_allowed_states[i].led3_0;
+ pins->led3_1 =
+ i40e_ptp_pin_led_allowed_states[i].led3_1;
+ break;
+ }
+ i++;
+ }
+ if (i40e_ptp_pin_led_allowed_states[i].sdp3_2 == end) {
+ dev_warn(&pf->pdev->dev,
+ "Unsupported PTP pin configuration: SDP3_2: %s, SDP3_3: %s, GPIO_4: %s.\n",
+ i40e_ptp_gpio_pin_state2str[pins->sdp3_2],
+ i40e_ptp_gpio_pin_state2str[pins->sdp3_3],
+ i40e_ptp_gpio_pin_state2str[pins->gpio_4]);
+
+ return -EPERM;
+ }
+ memcpy(pf->ptp_pins, pins, sizeof(*pins));
+ i40e_ptp_set_pins_hw(pf);
+ i40_ptp_reset_timing_events(pf);
+
+ return 0;
+}
+
+/**
+ * i40e_ptp_alloc_pins - allocate PTP pins structure
+ * @pf: Board private structure
+ *
+ * allocate PTP pins structure
+ **/
+int i40e_ptp_alloc_pins(struct i40e_pf *pf)
+{
+ if (!i40e_is_ptp_pin_dev(&pf->hw))
+ return 0;
+
+ pf->ptp_pins =
+ kzalloc(sizeof(struct i40e_ptp_pins_settings), GFP_KERNEL);
+
+ if (!pf->ptp_pins) {
+ dev_warn(&pf->pdev->dev, "Cannot allocate memory for PTP pins structure.\n");
+ return -I40E_ERR_NO_MEMORY;
+ }
+
+ pf->ptp_pins->sdp3_2 = off;
+ pf->ptp_pins->sdp3_3 = off;
+ pf->ptp_pins->gpio_4 = off;
+ pf->ptp_pins->led2_0 = high;
+ pf->ptp_pins->led2_1 = high;
+ pf->ptp_pins->led3_0 = high;
+ pf->ptp_pins->led3_1 = high;
+
+ /* Use PF0 to set pins in HW. Return success for user space tools */
+ if (pf->hw.pf_id)
+ return 0;
+
+ i40e_ptp_init_leds_hw(&pf->hw);
+ i40e_ptp_set_pins_hw(pf);
+
+ return 0;
+}
+
+/**
* i40e_ptp_set_timestamp_mode - setup hardware for requested timestamp mode
* @pf: Board private structure
* @config: hwtstamp settings requested or saved
@@ -545,6 +1190,21 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
struct i40e_hw *hw = &pf->hw;
u32 tsyntype, regval;
+ /* Selects external trigger to cause event */
+ regval = rd32(hw, I40E_PRTTSYN_AUX_0(0));
+ /* Bit 17:16 is EVNTLVL, 01B rising edge */
+ regval &= 0;
+ regval |= (1 << I40E_PRTTSYN_AUX_0_EVNTLVL_SHIFT);
+ /* regval: 0001 0000 0000 0000 0000 */
+ wr32(hw, I40E_PRTTSYN_AUX_0(0), regval);
+
+ /* Enabel interrupts */
+ regval = rd32(hw, I40E_PRTTSYN_CTL0);
+ regval |= 1 << I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT;
+ wr32(hw, I40E_PRTTSYN_CTL0, regval);
+
+ INIT_WORK(&pf->ptp_extts0_work, i40e_ptp_extts0_work);
+
/* Reserved for future extensions. */
if (config->flags)
return -EINVAL;
@@ -688,6 +1348,45 @@ int i40e_ptp_set_ts_config(struct i40e_pf *pf, struct ifreq *ifr)
}
/**
+ * i40e_init_pin_config - initialize pins.
+ * @pf: private board structure
+ *
+ * Initialize pins for external clock source.
+ * Return 0 on success or error code on failure.
+ **/
+static int i40e_init_pin_config(struct i40e_pf *pf)
+{
+ int i;
+
+ pf->ptp_caps.n_pins = 3;
+ pf->ptp_caps.n_ext_ts = 2;
+ pf->ptp_caps.pps = 1;
+ pf->ptp_caps.n_per_out = 2;
+
+ pf->ptp_caps.pin_config = kcalloc(pf->ptp_caps.n_pins,
+ sizeof(*pf->ptp_caps.pin_config),
+ GFP_KERNEL);
+ if (!pf->ptp_caps.pin_config)
+ return -ENOMEM;
+
+ for (i = 0; i < pf->ptp_caps.n_pins; i++) {
+ snprintf(pf->ptp_caps.pin_config[i].name,
+ sizeof(pf->ptp_caps.pin_config[i].name),
+ "%s", sdp_desc[i].name);
+ pf->ptp_caps.pin_config[i].index = sdp_desc[i].index;
+ pf->ptp_caps.pin_config[i].func = PTP_PF_NONE;
+ pf->ptp_caps.pin_config[i].chan = sdp_desc[i].chan;
+ }
+
+ pf->ptp_caps.verify = i40e_ptp_verify;
+ pf->ptp_caps.enable = i40e_ptp_feature_enable;
+
+ pf->ptp_caps.pps = 1;
+
+ return 0;
+}
+
+/**
* i40e_ptp_create_clock - Create PTP clock device for userspace
* @pf: Board private structure
*
@@ -707,13 +1406,16 @@ static long i40e_ptp_create_clock(struct i40e_pf *pf)
sizeof(pf->ptp_caps.name) - 1);
pf->ptp_caps.owner = THIS_MODULE;
pf->ptp_caps.max_adj = 999999999;
- pf->ptp_caps.n_ext_ts = 0;
- pf->ptp_caps.pps = 0;
pf->ptp_caps.adjfreq = i40e_ptp_adjfreq;
pf->ptp_caps.adjtime = i40e_ptp_adjtime;
pf->ptp_caps.gettimex64 = i40e_ptp_gettimex;
pf->ptp_caps.settime64 = i40e_ptp_settime;
- pf->ptp_caps.enable = i40e_ptp_feature_enable;
+ if (i40e_is_ptp_pin_dev(&pf->hw)) {
+ int err = i40e_init_pin_config(pf);
+
+ if (err)
+ return err;
+ }
/* Attempt to register the clock before enabling the hardware. */
pf->ptp_clock = ptp_clock_register(&pf->ptp_caps, &pf->pdev->dev);
@@ -843,6 +1545,8 @@ void i40e_ptp_init(struct i40e_pf *pf)
/* Restore the clock time based on last known value */
i40e_ptp_restore_hw_time(pf);
}
+
+ i40e_ptp_set_1pps_signal_hw(pf);
}
/**
@@ -854,6 +1558,9 @@ void i40e_ptp_init(struct i40e_pf *pf)
**/
void i40e_ptp_stop(struct i40e_pf *pf)
{
+ struct i40e_hw *hw = &pf->hw;
+ u32 regval;
+
pf->flags &= ~I40E_FLAG_PTP;
pf->ptp_tx = false;
pf->ptp_rx = false;
@@ -872,4 +1579,21 @@ void i40e_ptp_stop(struct i40e_pf *pf)
dev_info(&pf->pdev->dev, "%s: removed PHC on %s\n", __func__,
pf->vsi[pf->lan_vsi]->netdev->name);
}
+
+ if (i40e_is_ptp_pin_dev(&pf->hw)) {
+ i40e_ptp_set_pin_hw(hw, I40E_SDP3_2, off);
+ i40e_ptp_set_pin_hw(hw, I40E_SDP3_3, off);
+ i40e_ptp_set_pin_hw(hw, I40E_GPIO_4, off);
+ }
+
+ regval = rd32(hw, I40E_PRTTSYN_AUX_0(0));
+ regval &= ~I40E_PRTTSYN_AUX_0_PTPFLAG_MASK;
+ wr32(hw, I40E_PRTTSYN_AUX_0(0), regval);
+
+ /* Disable interrupts */
+ regval = rd32(hw, I40E_PRTTSYN_CTL0);
+ regval &= ~I40E_PRTTSYN_CTL0_EVENT_INT_ENA_MASK;
+ wr32(hw, I40E_PRTTSYN_CTL0, regval);
+
+ i40e_ptp_free_pins(pf);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index 36f7b27a04ae..8d0588a27a05 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -182,11 +182,20 @@
#define I40E_GLGEN_GPIO_CTL_PRT_NUM_MASK I40E_MASK(0x3, I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT)
#define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT 3
#define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PIN_DIR_SHIFT 4
+#define I40E_GLGEN_GPIO_CTL_TRI_CTL_SHIFT 5
+#define I40E_GLGEN_GPIO_CTL_OUT_CTL_SHIFT 6
#define I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT 7
#define I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK I40E_MASK(0x7, I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT)
#define I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT 11
#define I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT 12
#define I40E_GLGEN_GPIO_CTL_LED_MODE_MASK I40E_MASK(0x1F, I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT 19
+#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_SHIFT 20
+#define I40E_GLGEN_GPIO_SET 0x00088184 /* Reset: POR */
+#define I40E_GLGEN_GPIO_SET_SDP_DATA_SHIFT 5
+#define I40E_GLGEN_GPIO_SET_DRIVE_SDP_SHIFT 6
#define I40E_GLGEN_MDIO_I2C_SEL(_i) (0x000881C0 + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */
#define I40E_GLGEN_MSCA(_i) (0x0008818C + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */
#define I40E_GLGEN_MSCA_MDIADD_SHIFT 0
@@ -540,6 +549,7 @@
#define I40E_PF_PCI_CIAA_VF_NUM_SHIFT 12
#define I40E_PF_PCI_CIAD 0x0009C100 /* Reset: FLR */
#define I40E_PRTPM_EEE_STAT 0x001E4320 /* Reset: GLOBR */
+#define I40E_PFPCI_SUBSYSID 0x000BE100 /* Reset: PCIR */
#define I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT 30
#define I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK I40E_MASK(0x1, I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT)
#define I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT 31
@@ -742,6 +752,8 @@
#define I40E_PRTTSYN_CTL0 0x001E4200 /* Reset: GLOBR */
#define I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_SHIFT 1
#define I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_SHIFT)
+#define I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT 2
+#define I40E_PRTTSYN_CTL0_EVENT_INT_ENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT)
#define I40E_PRTTSYN_CTL0_PF_ID_SHIFT 8
#define I40E_PRTTSYN_CTL0_PF_ID_MASK I40E_MASK(0xF, I40E_PRTTSYN_CTL0_PF_ID_SHIFT)
#define I40E_PRTTSYN_CTL0_TSYNENA_SHIFT 31
@@ -760,7 +772,10 @@
#define I40E_PRTTSYN_INC_L 0x001E4040 /* Reset: GLOBR */
#define I40E_PRTTSYN_RXTIME_H(_i) (0x00085040 + ((_i) * 32)) /* _i=0...3 */ /* Reset: CORER */
#define I40E_PRTTSYN_RXTIME_L(_i) (0x000850C0 + ((_i) * 32)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_PRTTSYN_RXTIME_L_MAX_INDEX 3
#define I40E_PRTTSYN_STAT_0 0x001E4220 /* Reset: GLOBR */
+#define I40E_PRTTSYN_STAT_0_EVENT0_SHIFT 0
+#define I40E_PRTTSYN_STAT_0_EVENT0_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_0_EVENT0_SHIFT)
#define I40E_PRTTSYN_STAT_0_TXTIME_SHIFT 4
#define I40E_PRTTSYN_STAT_0_TXTIME_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_0_TXTIME_SHIFT)
#define I40E_PRTTSYN_STAT_1 0x00085140 /* Reset: CORER */
@@ -768,6 +783,20 @@
#define I40E_PRTTSYN_TIME_L 0x001E4100 /* Reset: GLOBR */
#define I40E_PRTTSYN_TXTIME_H 0x001E41E0 /* Reset: GLOBR */
#define I40E_PRTTSYN_TXTIME_L 0x001E41C0 /* Reset: GLOBR */
+#define I40E_PRTTSYN_EVNT_H(_i) (0x001E40C0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_EVNT_L(_i) (0x001E4080 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_AUX_0(_i) (0x001E42A0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_AUX_0_OUT_ENA_SHIFT 0
+#define I40E_PRTTSYN_AUX_0_OUTMOD_SHIFT 1
+#define I40E_PRTTSYN_AUX_0_EVNTLVL_SHIFT 16
+#define I40E_PRTTSYN_AUX_0_PTPFLAG_SHIFT 17
+#define I40E_PRTTSYN_AUX_0_PTPFLAG_MASK I40E_MASK(0x1, I40E_PRTTSYN_AUX_0_PTPFLAG_SHIFT)
+#define I40E_PRTTSYN_AUX_1(_i) (0x001E42E0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_AUX_1_INSTNT_SHIFT 0
+#define I40E_PRTTSYN_TGT_H(_i) (0x001E4180 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_TGT_L(_i) (0x001E4140 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_CLKO(_i) (0x001E4240 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_ADJ 0x001E4280 /* Reset: GLOBR */
#define I40E_GL_MDET_RX 0x0012A510 /* Reset: CORER */
#define I40E_GL_MDET_RX_FUNCTION_SHIFT 0
#define I40E_GL_MDET_RX_FUNCTION_MASK I40E_MASK(0xFF, I40E_GL_MDET_RX_FUNCTION_SHIFT)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 3f25bd8c4924..10a83e5385c7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3663,8 +3663,7 @@ u16 i40e_lan_select_queue(struct net_device *netdev,
/* is DCB enabled at all? */
if (vsi->tc_config.numtc == 1)
- return i40e_swdcb_skb_tx_hash(netdev, skb,
- netdev->real_num_tx_queues);
+ return netdev_pick_tx(netdev, skb, sb_dev);
prio = skb->priority;
hw = &vsi->back->hw;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index eff0a30790dd..472f56b360b8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1160,12 +1160,12 @@ static int i40e_quiesce_vf_pci(struct i40e_vf *vf)
}
/**
- * i40e_getnum_vf_vsi_vlan_filters
+ * __i40e_getnum_vf_vsi_vlan_filters
* @vsi: pointer to the vsi
*
* called to get the number of VLANs offloaded on this VF
**/
-static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi)
+static int __i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi)
{
struct i40e_mac_filter *f;
u16 num_vlans = 0, bkt;
@@ -1179,6 +1179,23 @@ static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi)
}
/**
+ * i40e_getnum_vf_vsi_vlan_filters
+ * @vsi: pointer to the vsi
+ *
+ * wrapper for __i40e_getnum_vf_vsi_vlan_filters() with spinlock held
+ **/
+static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi)
+{
+ int num_vlans;
+
+ spin_lock_bh(&vsi->mac_filter_hash_lock);
+ num_vlans = __i40e_getnum_vf_vsi_vlan_filters(vsi);
+ spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+ return num_vlans;
+}
+
+/**
* i40e_get_vlan_list_sync
* @vsi: pointer to the VSI
* @num_vlans: number of VLANs in mac_filter_hash, returned to caller
@@ -1195,7 +1212,7 @@ static void i40e_get_vlan_list_sync(struct i40e_vsi *vsi, u16 *num_vlans,
int bkt;
spin_lock_bh(&vsi->mac_filter_hash_lock);
- *num_vlans = i40e_getnum_vf_vsi_vlan_filters(vsi);
+ *num_vlans = __i40e_getnum_vf_vsi_vlan_filters(vsi);
*vlan_list = kcalloc(*num_vlans, sizeof(**vlan_list), GFP_ATOMIC);
if (!(*vlan_list))
goto err;
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index e8bd04100ecd..68c80f04113c 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -136,6 +136,7 @@ struct iavf_q_vector {
struct iavf_mac_filter {
struct list_head list;
u8 macaddr[ETH_ALEN];
+ bool is_new_mac; /* filter is new, wait for PF decision */
bool remove; /* filter needs to be removed */
bool add; /* filter needs to be added */
};
@@ -185,12 +186,6 @@ enum iavf_state_t {
__IAVF_RUNNING, /* opened, working */
};
-enum iavf_critical_section_t {
- __IAVF_IN_CRITICAL_TASK, /* cannot be interrupted */
- __IAVF_IN_CLIENT_TASK,
- __IAVF_IN_REMOVE_TASK, /* device being removed */
-};
-
#define IAVF_CLOUD_FIELD_OMAC 0x01
#define IAVF_CLOUD_FIELD_IMAC 0x02
#define IAVF_CLOUD_FIELD_IVLAN 0x04
@@ -235,6 +230,9 @@ struct iavf_adapter {
struct iavf_q_vector *q_vectors;
struct list_head vlan_filter_list;
struct list_head mac_filter_list;
+ struct mutex crit_lock;
+ struct mutex client_lock;
+ struct mutex remove_lock;
/* Lock to protect accesses to MAC and VLAN lists */
spinlock_t mac_vlan_list_lock;
char misc_vector_name[IFNAMSIZ + 9];
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index af43fbd8cb75..5a359a0a20ec 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -685,6 +685,8 @@ static int __iavf_get_coalesce(struct net_device *netdev,
* iavf_get_coalesce - Get interrupt coalescing settings
* @netdev: network interface device structure
* @ec: ethtool coalesce structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
*
* Returns current coalescing settings. This is referred to elsewhere in the
* driver as Interrupt Throttle Rate, as this is how the hardware describes
@@ -692,7 +694,9 @@ static int __iavf_get_coalesce(struct net_device *netdev,
* only represents the settings of queue 0.
**/
static int iavf_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
return __iavf_get_coalesce(netdev, ec, -1);
}
@@ -804,11 +808,15 @@ static int __iavf_set_coalesce(struct net_device *netdev,
* iavf_set_coalesce - Set interrupt coalescing settings
* @netdev: network interface device structure
* @ec: ethtool coalesce structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
*
* Change current coalescing settings for every queue.
**/
static int iavf_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
return __iavf_set_coalesce(netdev, ec, -1);
}
@@ -1352,8 +1360,7 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
if (!fltr)
return -ENOMEM;
- while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
- &adapter->crit_section)) {
+ while (!mutex_trylock(&adapter->crit_lock)) {
if (--count == 0) {
kfree(fltr);
return -EINVAL;
@@ -1378,7 +1385,7 @@ ret:
if (err && fltr)
kfree(fltr);
- clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ mutex_unlock(&adapter->crit_lock);
return err;
}
@@ -1563,8 +1570,7 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter,
return -EINVAL;
}
- while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
- &adapter->crit_section)) {
+ while (!mutex_trylock(&adapter->crit_lock)) {
if (--count == 0) {
kfree(rss_new);
return -EINVAL;
@@ -1600,7 +1606,7 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter,
if (!err)
mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
- clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ mutex_unlock(&adapter->crit_lock);
if (!rss_new_add)
kfree(rss_new);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 44bafedd09f2..23762a7ef740 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -132,6 +132,27 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw,
}
/**
+ * iavf_lock_timeout - try to lock mutex but give up after timeout
+ * @lock: mutex that should be locked
+ * @msecs: timeout in msecs
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
+{
+ unsigned int wait, delay = 10;
+
+ for (wait = 0; wait < msecs; wait += delay) {
+ if (mutex_trylock(lock))
+ return 0;
+
+ msleep(delay);
+ }
+
+ return -1;
+}
+
+/**
* iavf_schedule_reset - Set the flags and schedule a reset event
* @adapter: board private structure
**/
@@ -751,6 +772,7 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
list_add_tail(&f->list, &adapter->mac_filter_list);
f->add = true;
+ f->is_new_mac = true;
adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER;
} else {
f->remove = false;
@@ -1506,11 +1528,6 @@ static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter)
set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
iavf_map_rings_to_vectors(adapter);
-
- if (RSS_AQ(adapter))
- adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS;
- else
- err = iavf_init_rss(adapter);
err:
return err;
}
@@ -1920,7 +1937,7 @@ static void iavf_watchdog_task(struct work_struct *work)
struct iavf_hw *hw = &adapter->hw;
u32 reg_val;
- if (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section))
+ if (!mutex_trylock(&adapter->crit_lock))
goto restart_watchdog;
if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
@@ -1938,8 +1955,7 @@ static void iavf_watchdog_task(struct work_struct *work)
adapter->state = __IAVF_STARTUP;
adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
queue_delayed_work(iavf_wq, &adapter->init_task, 10);
- clear_bit(__IAVF_IN_CRITICAL_TASK,
- &adapter->crit_section);
+ mutex_unlock(&adapter->crit_lock);
/* Don't reschedule the watchdog, since we've restarted
* the init task. When init_task contacts the PF and
* gets everything set up again, it'll restart the
@@ -1949,14 +1965,13 @@ static void iavf_watchdog_task(struct work_struct *work)
}
adapter->aq_required = 0;
adapter->current_op = VIRTCHNL_OP_UNKNOWN;
- clear_bit(__IAVF_IN_CRITICAL_TASK,
- &adapter->crit_section);
+ mutex_unlock(&adapter->crit_lock);
queue_delayed_work(iavf_wq,
&adapter->watchdog_task,
msecs_to_jiffies(10));
goto watchdog_done;
case __IAVF_RESETTING:
- clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ mutex_unlock(&adapter->crit_lock);
queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2);
return;
case __IAVF_DOWN:
@@ -1979,7 +1994,7 @@ static void iavf_watchdog_task(struct work_struct *work)
}
break;
case __IAVF_REMOVE:
- clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ mutex_unlock(&adapter->crit_lock);
return;
default:
goto restart_watchdog;
@@ -1988,7 +2003,6 @@ static void iavf_watchdog_task(struct work_struct *work)
/* check for hw reset */
reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK;
if (!reg_val) {
- adapter->state = __IAVF_RESETTING;
adapter->flags |= IAVF_FLAG_RESET_PENDING;
adapter->aq_required = 0;
adapter->current_op = VIRTCHNL_OP_UNKNOWN;
@@ -2002,7 +2016,7 @@ watchdog_done:
if (adapter->state == __IAVF_RUNNING ||
adapter->state == __IAVF_COMM_FAILED)
iavf_detect_recover_hung(&adapter->vsi);
- clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ mutex_unlock(&adapter->crit_lock);
restart_watchdog:
if (adapter->aq_required)
queue_delayed_work(iavf_wq, &adapter->watchdog_task,
@@ -2066,7 +2080,7 @@ static void iavf_disable_vf(struct iavf_adapter *adapter)
memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE);
iavf_shutdown_adminq(&adapter->hw);
adapter->netdev->flags &= ~IFF_UP;
- clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ mutex_unlock(&adapter->crit_lock);
adapter->flags &= ~IAVF_FLAG_RESET_PENDING;
adapter->state = __IAVF_DOWN;
wake_up(&adapter->down_waitqueue);
@@ -2099,11 +2113,14 @@ static void iavf_reset_task(struct work_struct *work)
/* When device is being removed it doesn't make sense to run the reset
* task, just return in such a case.
*/
- if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
+ if (mutex_is_locked(&adapter->remove_lock))
return;
- while (test_and_set_bit(__IAVF_IN_CLIENT_TASK,
- &adapter->crit_section))
+ if (iavf_lock_timeout(&adapter->crit_lock, 200)) {
+ schedule_work(&adapter->reset_task);
+ return;
+ }
+ while (!mutex_trylock(&adapter->client_lock))
usleep_range(500, 1000);
if (CLIENT_ENABLED(adapter)) {
adapter->flags &= ~(IAVF_FLAG_CLIENT_NEEDS_OPEN |
@@ -2155,7 +2172,7 @@ static void iavf_reset_task(struct work_struct *work)
dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n",
reg_val);
iavf_disable_vf(adapter);
- clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section);
+ mutex_unlock(&adapter->client_lock);
return; /* Do not attempt to reinit. It's dead, Jim. */
}
@@ -2200,6 +2217,14 @@ continue_reset:
goto reset_err;
}
+ if (RSS_AQ(adapter)) {
+ adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS;
+ } else {
+ err = iavf_init_rss(adapter);
+ if (err)
+ goto reset_err;
+ }
+
adapter->aq_required |= IAVF_FLAG_AQ_GET_CONFIG;
adapter->aq_required |= IAVF_FLAG_AQ_MAP_VECTORS;
@@ -2274,13 +2299,13 @@ continue_reset:
adapter->state = __IAVF_DOWN;
wake_up(&adapter->down_waitqueue);
}
- clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section);
- clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ mutex_unlock(&adapter->client_lock);
+ mutex_unlock(&adapter->crit_lock);
return;
reset_err:
- clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section);
- clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ mutex_unlock(&adapter->client_lock);
+ mutex_unlock(&adapter->crit_lock);
dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
iavf_close(netdev);
}
@@ -2308,6 +2333,8 @@ static void iavf_adminq_task(struct work_struct *work)
if (!event.msg_buf)
goto out;
+ if (iavf_lock_timeout(&adapter->crit_lock, 200))
+ goto freedom;
do {
ret = iavf_clean_arq_element(hw, &event, &pending);
v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
@@ -2321,6 +2348,7 @@ static void iavf_adminq_task(struct work_struct *work)
if (pending != 0)
memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE);
} while (pending);
+ mutex_unlock(&adapter->crit_lock);
if ((adapter->flags &
(IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
@@ -2387,7 +2415,7 @@ static void iavf_client_task(struct work_struct *work)
* later.
*/
- if (test_and_set_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section))
+ if (!mutex_trylock(&adapter->client_lock))
return;
if (adapter->flags & IAVF_FLAG_SERVICE_CLIENT_REQUESTED) {
@@ -2410,7 +2438,7 @@ static void iavf_client_task(struct work_struct *work)
adapter->flags &= ~IAVF_FLAG_CLIENT_NEEDS_OPEN;
}
out:
- clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section);
+ mutex_unlock(&adapter->client_lock);
}
/**
@@ -3013,8 +3041,7 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter,
if (!filter)
return -ENOMEM;
- while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
- &adapter->crit_section)) {
+ while (!mutex_trylock(&adapter->crit_lock)) {
if (--count == 0)
goto err;
udelay(1);
@@ -3045,7 +3072,7 @@ err:
if (err)
kfree(filter);
- clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ mutex_unlock(&adapter->crit_lock);
return err;
}
@@ -3192,8 +3219,7 @@ static int iavf_open(struct net_device *netdev)
return -EIO;
}
- while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
- &adapter->crit_section))
+ while (!mutex_trylock(&adapter->crit_lock))
usleep_range(500, 1000);
if (adapter->state != __IAVF_DOWN) {
@@ -3228,7 +3254,7 @@ static int iavf_open(struct net_device *netdev)
iavf_irq_enable(adapter, true);
- clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ mutex_unlock(&adapter->crit_lock);
return 0;
@@ -3240,7 +3266,7 @@ err_setup_rx:
err_setup_tx:
iavf_free_all_tx_resources(adapter);
err_unlock:
- clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ mutex_unlock(&adapter->crit_lock);
return err;
}
@@ -3264,8 +3290,7 @@ static int iavf_close(struct net_device *netdev)
if (adapter->state <= __IAVF_DOWN_PENDING)
return 0;
- while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
- &adapter->crit_section))
+ while (!mutex_trylock(&adapter->crit_lock))
usleep_range(500, 1000);
set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
@@ -3276,7 +3301,7 @@ static int iavf_close(struct net_device *netdev)
adapter->state = __IAVF_DOWN_PENDING;
iavf_free_traffic_irqs(adapter);
- clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ mutex_unlock(&adapter->crit_lock);
/* We explicitly don't free resources here because the hardware is
* still active and can DMA into memory. Resources are cleared in
@@ -3625,6 +3650,10 @@ static void iavf_init_task(struct work_struct *work)
init_task.work);
struct iavf_hw *hw = &adapter->hw;
+ if (iavf_lock_timeout(&adapter->crit_lock, 5000)) {
+ dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__);
+ return;
+ }
switch (adapter->state) {
case __IAVF_STARTUP:
if (iavf_startup(adapter) < 0)
@@ -3637,14 +3666,14 @@ static void iavf_init_task(struct work_struct *work)
case __IAVF_INIT_GET_RESOURCES:
if (iavf_init_get_resources(adapter) < 0)
goto init_failed;
- return;
+ goto out;
default:
goto init_failed;
}
queue_delayed_work(iavf_wq, &adapter->init_task,
msecs_to_jiffies(30));
- return;
+ goto out;
init_failed:
if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
dev_err(&adapter->pdev->dev,
@@ -3653,9 +3682,11 @@ init_failed:
iavf_shutdown_adminq(hw);
adapter->state = __IAVF_STARTUP;
queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5);
- return;
+ goto out;
}
queue_delayed_work(iavf_wq, &adapter->init_task, HZ);
+out:
+ mutex_unlock(&adapter->crit_lock);
}
/**
@@ -3672,9 +3703,12 @@ static void iavf_shutdown(struct pci_dev *pdev)
if (netif_running(netdev))
iavf_close(netdev);
+ if (iavf_lock_timeout(&adapter->crit_lock, 5000))
+ dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__);
/* Prevent the watchdog from running. */
adapter->state = __IAVF_REMOVE;
adapter->aq_required = 0;
+ mutex_unlock(&adapter->crit_lock);
#ifdef CONFIG_PM
pci_save_state(pdev);
@@ -3768,6 +3802,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* set up the locks for the AQ, do this only once in probe
* and destroy them only once in remove
*/
+ mutex_init(&adapter->crit_lock);
+ mutex_init(&adapter->client_lock);
+ mutex_init(&adapter->remove_lock);
mutex_init(&hw->aq.asq_mutex);
mutex_init(&hw->aq.arq_mutex);
@@ -3819,8 +3856,7 @@ static int __maybe_unused iavf_suspend(struct device *dev_d)
netif_device_detach(netdev);
- while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
- &adapter->crit_section))
+ while (!mutex_trylock(&adapter->crit_lock))
usleep_range(500, 1000);
if (netif_running(netdev)) {
@@ -3831,7 +3867,7 @@ static int __maybe_unused iavf_suspend(struct device *dev_d)
iavf_free_misc_irq(adapter);
iavf_reset_interrupt_capability(adapter);
- clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ mutex_unlock(&adapter->crit_lock);
return 0;
}
@@ -3893,7 +3929,7 @@ static void iavf_remove(struct pci_dev *pdev)
struct iavf_hw *hw = &adapter->hw;
int err;
/* Indicate we are in remove and not to run reset_task */
- set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section);
+ mutex_lock(&adapter->remove_lock);
cancel_delayed_work_sync(&adapter->init_task);
cancel_work_sync(&adapter->reset_task);
cancel_delayed_work_sync(&adapter->client_task);
@@ -3908,10 +3944,6 @@ static void iavf_remove(struct pci_dev *pdev)
err);
}
- /* Shut down all the garbage mashers on the detention level */
- adapter->state = __IAVF_REMOVE;
- adapter->aq_required = 0;
- adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
iavf_request_reset(adapter);
msleep(50);
/* If the FW isn't responding, kick it once, but only once. */
@@ -3919,6 +3951,13 @@ static void iavf_remove(struct pci_dev *pdev)
iavf_request_reset(adapter);
msleep(50);
}
+ if (iavf_lock_timeout(&adapter->crit_lock, 5000))
+ dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__);
+
+ /* Shut down all the garbage mashers on the detention level */
+ adapter->state = __IAVF_REMOVE;
+ adapter->aq_required = 0;
+ adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
iavf_free_all_tx_resources(adapter);
iavf_free_all_rx_resources(adapter);
iavf_misc_irq_disable(adapter);
@@ -3938,6 +3977,11 @@ static void iavf_remove(struct pci_dev *pdev)
/* destroy the locks only once, here */
mutex_destroy(&hw->aq.arq_mutex);
mutex_destroy(&hw->aq.asq_mutex);
+ mutex_destroy(&adapter->client_lock);
+ mutex_unlock(&adapter->crit_lock);
+ mutex_destroy(&adapter->crit_lock);
+ mutex_unlock(&adapter->remove_lock);
+ mutex_destroy(&adapter->remove_lock);
iounmap(hw->hw_addr);
pci_release_regions(pdev);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 0eab3c43bdc5..3c735968e1b8 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -541,6 +541,47 @@ void iavf_del_ether_addrs(struct iavf_adapter *adapter)
}
/**
+ * iavf_mac_add_ok
+ * @adapter: adapter structure
+ *
+ * Submit list of filters based on PF response.
+ **/
+static void iavf_mac_add_ok(struct iavf_adapter *adapter)
+{
+ struct iavf_mac_filter *f, *ftmp;
+
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+ list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
+ f->is_new_mac = false;
+ }
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+}
+
+/**
+ * iavf_mac_add_reject
+ * @adapter: adapter structure
+ *
+ * Remove filters from list based on PF response.
+ **/
+static void iavf_mac_add_reject(struct iavf_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct iavf_mac_filter *f, *ftmp;
+
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+ list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
+ if (f->remove && ether_addr_equal(f->macaddr, netdev->dev_addr))
+ f->remove = false;
+
+ if (f->is_new_mac) {
+ list_del(&f->list);
+ kfree(f);
+ }
+ }
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+}
+
+/**
* iavf_add_vlans
* @adapter: adapter structure
*
@@ -1492,6 +1533,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
case VIRTCHNL_OP_ADD_ETH_ADDR:
dev_err(&adapter->pdev->dev, "Failed to add MAC filter, error %s\n",
iavf_stat_str(&adapter->hw, v_retval));
+ iavf_mac_add_reject(adapter);
/* restore administratively set MAC address */
ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
break;
@@ -1639,10 +1681,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
}
}
switch (v_opcode) {
- case VIRTCHNL_OP_ADD_ETH_ADDR: {
+ case VIRTCHNL_OP_ADD_ETH_ADDR:
+ if (!v_retval)
+ iavf_mac_add_ok(adapter);
if (!ether_addr_equal(netdev->dev_addr, adapter->hw.mac.addr))
ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
- }
break;
case VIRTCHNL_OP_GET_STATS: {
struct iavf_eth_stats *stats =
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index a450343fbb92..eadcb9958346 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -234,6 +234,7 @@ enum ice_pf_state {
ICE_VFLR_EVENT_PENDING,
ICE_FLTR_OVERFLOW_PROMISC,
ICE_VF_DIS,
+ ICE_VF_DEINIT_IN_PROGRESS,
ICE_CFG_BUSY,
ICE_SERVICE_SCHED,
ICE_SERVICE_DIS,
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
index 91b545ab8b8f..14afce82ef63 100644
--- a/drivers/net/ethernet/intel/ice/ice_devlink.c
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -42,7 +42,9 @@ static int ice_info_pba(struct ice_pf *pf, struct ice_info_ctx *ctx)
status = ice_read_pba_string(hw, (u8 *)ctx->buf, sizeof(ctx->buf));
if (status)
- return -EIO;
+ /* We failed to locate the PBA, so just skip this entry */
+ dev_dbg(ice_pf_to_dev(pf), "Failed to read Product Board Assembly string, status %s\n",
+ ice_stat_str(status));
return 0;
}
@@ -475,7 +477,7 @@ struct ice_pf *ice_allocate_pf(struct device *dev)
{
struct devlink *devlink;
- devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf));
+ devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf), dev);
if (!devlink)
return NULL;
@@ -502,7 +504,7 @@ int ice_devlink_register(struct ice_pf *pf)
struct device *dev = ice_pf_to_dev(pf);
int err;
- err = devlink_register(devlink, dev);
+ err = devlink_register(devlink);
if (err) {
dev_err(dev, "devlink registration failed: %d\n", err);
return err;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index d95a5daca114..c451cf401e63 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3568,8 +3568,10 @@ __ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
return 0;
}
-static int
-ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
+static int ice_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
return __ice_get_coalesce(netdev, ec, -1);
}
@@ -3787,8 +3789,10 @@ set_complete:
return 0;
}
-static int
-ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
+static int ice_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
return __ice_set_coalesce(netdev, ec, -1);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index ef8d1815af56..0d6c143f6653 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -191,6 +191,14 @@ static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr)
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
+ /* Under some circumstances, we might receive a request to delete our
+ * own device address from our uc list. Because we store the device
+ * address in the VSI's MAC filter list, we need to ignore such
+ * requests and not delete our device address from this list.
+ */
+ if (ether_addr_equal(addr, netdev->dev_addr))
+ return 0;
+
if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr,
ICE_FWD_TO_VSI))
return -EINVAL;
@@ -4194,6 +4202,11 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
struct ice_hw *hw;
int i, err;
+ if (pdev->is_virtfn) {
+ dev_err(dev, "can't probe a virtual function\n");
+ return -EINVAL;
+ }
+
/* this driver uses devres, see
* Documentation/driver-api/driver-model/devres.rst
*/
@@ -5109,6 +5122,7 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi)
struct ice_hw *hw = &pf->hw;
struct sockaddr *addr = pi;
enum ice_status status;
+ u8 old_mac[ETH_ALEN];
u8 flags = 0;
int err = 0;
u8 *mac;
@@ -5119,7 +5133,7 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi)
return -EADDRNOTAVAIL;
if (ether_addr_equal(netdev->dev_addr, mac)) {
- netdev_warn(netdev, "already using mac %pM\n", mac);
+ netdev_dbg(netdev, "already using mac %pM\n", mac);
return 0;
}
@@ -5130,8 +5144,14 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi)
return -EBUSY;
}
+ netif_addr_lock_bh(netdev);
+ ether_addr_copy(old_mac, netdev->dev_addr);
+ /* change the netdev's MAC address */
+ memcpy(netdev->dev_addr, mac, netdev->addr_len);
+ netif_addr_unlock_bh(netdev);
+
/* Clean up old MAC filter. Not an error if old filter doesn't exist */
- status = ice_fltr_remove_mac(vsi, netdev->dev_addr, ICE_FWD_TO_VSI);
+ status = ice_fltr_remove_mac(vsi, old_mac, ICE_FWD_TO_VSI);
if (status && status != ICE_ERR_DOES_NOT_EXIST) {
err = -EADDRNOTAVAIL;
goto err_update_filters;
@@ -5139,30 +5159,27 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi)
/* Add filter for new MAC. If filter exists, return success */
status = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI);
- if (status == ICE_ERR_ALREADY_EXISTS) {
+ if (status == ICE_ERR_ALREADY_EXISTS)
/* Although this MAC filter is already present in hardware it's
* possible in some cases (e.g. bonding) that dev_addr was
* modified outside of the driver and needs to be restored back
* to this value.
*/
- memcpy(netdev->dev_addr, mac, netdev->addr_len);
netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac);
- return 0;
- }
-
- /* error if the new filter addition failed */
- if (status)
+ else if (status)
+ /* error if the new filter addition failed */
err = -EADDRNOTAVAIL;
err_update_filters:
if (err) {
netdev_err(netdev, "can't set MAC %pM. filter update failed\n",
mac);
+ netif_addr_lock_bh(netdev);
+ ether_addr_copy(netdev->dev_addr, old_mac);
+ netif_addr_unlock_bh(netdev);
return err;
}
- /* change the netdev's MAC address */
- memcpy(netdev->dev_addr, mac, netdev->addr_len);
netdev_dbg(vsi->netdev, "updated MAC address to %pM\n",
netdev->dev_addr);
@@ -6558,12 +6575,12 @@ event_after:
}
/**
- * ice_do_ioctl - Access the hwtstamp interface
+ * ice_eth_ioctl - Access the hwtstamp interface
* @netdev: network interface device structure
* @ifr: interface request data
* @cmd: ioctl command
*/
-static int ice_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+static int ice_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_pf *pf = np->vsi->back;
@@ -7229,7 +7246,7 @@ static const struct net_device_ops ice_netdev_ops = {
.ndo_change_mtu = ice_change_mtu,
.ndo_get_stats64 = ice_get_stats64,
.ndo_set_tx_maxrate = ice_set_tx_maxrate,
- .ndo_do_ioctl = ice_do_ioctl,
+ .ndo_eth_ioctl = ice_eth_ioctl,
.ndo_set_vf_spoofchk = ice_set_vf_spoofchk,
.ndo_set_vf_mac = ice_set_vf_mac,
.ndo_get_vf_config = ice_get_vf_cfg,
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index 5d5207b56ca9..05cc5870e4ef 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -22,7 +22,7 @@ static void ice_set_tx_tstamp(struct ice_pf *pf, bool on)
return;
/* Set the timestamp enable flag for all the Tx rings */
- ice_for_each_rxq(vsi, i) {
+ ice_for_each_txq(vsi, i) {
if (!vsi->tx_rings[i])
continue;
vsi->tx_rings[i]->ptp_tx = on;
@@ -656,7 +656,7 @@ static int ice_ptp_cfg_clkout(struct ice_pf *pf, unsigned int chan,
* maintaining phase
*/
if (start_time < current_time)
- start_time = div64_u64(current_time + NSEC_PER_MSEC - 1,
+ start_time = div64_u64(current_time + NSEC_PER_SEC - 1,
NSEC_PER_SEC) * NSEC_PER_SEC + phase;
start_time -= E810_OUT_PROP_DELAY_NS;
@@ -689,6 +689,41 @@ err:
}
/**
+ * ice_ptp_disable_all_clkout - Disable all currently configured outputs
+ * @pf: pointer to the PF structure
+ *
+ * Disable all currently configured clock outputs. This is necessary before
+ * certain changes to the PTP hardware clock. Use ice_ptp_enable_all_clkout to
+ * re-enable the clocks again.
+ */
+static void ice_ptp_disable_all_clkout(struct ice_pf *pf)
+{
+ uint i;
+
+ for (i = 0; i < pf->ptp.info.n_per_out; i++)
+ if (pf->ptp.perout_channels[i].ena)
+ ice_ptp_cfg_clkout(pf, i, NULL, false);
+}
+
+/**
+ * ice_ptp_enable_all_clkout - Enable all configured periodic clock outputs
+ * @pf: pointer to the PF structure
+ *
+ * Enable all currently configured clock outputs. Use this after
+ * ice_ptp_disable_all_clkout to reconfigure the output signals according to
+ * their configuration.
+ */
+static void ice_ptp_enable_all_clkout(struct ice_pf *pf)
+{
+ uint i;
+
+ for (i = 0; i < pf->ptp.info.n_per_out; i++)
+ if (pf->ptp.perout_channels[i].ena)
+ ice_ptp_cfg_clkout(pf, i, &pf->ptp.perout_channels[i],
+ false);
+}
+
+/**
* ice_ptp_gpio_enable_e810 - Enable/disable ancillary features of PHC
* @info: the driver's PTP info structure
* @rq: The requested feature to change
@@ -783,12 +818,17 @@ ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts)
goto exit;
}
+ /* Disable periodic outputs */
+ ice_ptp_disable_all_clkout(pf);
+
err = ice_ptp_write_init(pf, &ts64);
ice_ptp_unlock(hw);
if (!err)
ice_ptp_update_cached_phctime(pf);
+ /* Reenable periodic outputs */
+ ice_ptp_enable_all_clkout(pf);
exit:
if (err) {
dev_err(ice_pf_to_dev(pf), "PTP failed to set time %d\n", err);
@@ -842,8 +882,14 @@ static int ice_ptp_adjtime(struct ptp_clock_info *info, s64 delta)
return -EBUSY;
}
+ /* Disable periodic outputs */
+ ice_ptp_disable_all_clkout(pf);
+
err = ice_ptp_write_adj(pf, delta);
+ /* Reenable periodic outputs */
+ ice_ptp_enable_all_clkout(pf);
+
ice_ptp_unlock(hw);
if (err) {
@@ -1064,17 +1110,6 @@ static long ice_ptp_create_clock(struct ice_pf *pf)
info = &pf->ptp.info;
dev = ice_pf_to_dev(pf);
- /* Allocate memory for kernel pins interface */
- if (info->n_pins) {
- info->pin_config = devm_kcalloc(dev, info->n_pins,
- sizeof(*info->pin_config),
- GFP_KERNEL);
- if (!info->pin_config) {
- info->n_pins = 0;
- return -ENOMEM;
- }
- }
-
/* Attempt to register the clock before enabling the hardware. */
clock = ptp_clock_register(info, dev);
if (IS_ERR(clock))
@@ -1278,6 +1313,8 @@ ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
{
u8 idx;
+ spin_lock(&tx->lock);
+
for (idx = 0; idx < tx->len; idx++) {
u8 phy_idx = idx + tx->quad_offset;
@@ -1290,6 +1327,8 @@ ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
tx->tstamps[idx].skb = NULL;
}
}
+
+ spin_unlock(&tx->lock);
}
/**
@@ -1550,6 +1589,9 @@ void ice_ptp_release(struct ice_pf *pf)
if (!pf->ptp.clock)
return;
+ /* Disable periodic outputs */
+ ice_ptp_disable_all_clkout(pf);
+
ice_clear_ptp_clock_index(pf);
ptp_clock_unregister(pf->ptp.clock);
pf->ptp.clock = NULL;
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 2826570dab51..e93430ab37f1 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -615,6 +615,8 @@ void ice_free_vfs(struct ice_pf *pf)
struct ice_hw *hw = &pf->hw;
unsigned int tmp, i;
+ set_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state);
+
if (!pf->vf)
return;
@@ -680,6 +682,7 @@ void ice_free_vfs(struct ice_pf *pf)
i);
clear_bit(ICE_VF_DIS, pf->state);
+ clear_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state);
clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
}
@@ -4415,6 +4418,10 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
struct device *dev;
int err = 0;
+ /* if de-init is underway, don't process messages from VF */
+ if (test_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state))
+ return;
+
dev = ice_pf_to_dev(pf);
if (ice_validate_vf_id(pf, vf_id)) {
err = -EINVAL;
diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c
index e63ee3cca5ea..1277c5c7d099 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.c
@@ -492,6 +492,7 @@ static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
**/
static void igb_i21x_hw_doublecheck(struct e1000_hw *hw)
{
+ int failed_cnt = 3;
bool is_failed;
int i;
@@ -502,9 +503,12 @@ static void igb_i21x_hw_doublecheck(struct e1000_hw *hw)
is_failed = true;
array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]);
wrfl();
- break;
}
}
+ if (is_failed && --failed_cnt <= 0) {
+ hw_dbg("Failed to update MTA_REGISTER, too many retries");
+ break;
+ }
} while (is_failed);
}
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 636a1b1fb7e1..fb1029352c3e 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -2182,7 +2182,9 @@ static int igb_set_phys_id(struct net_device *netdev,
}
static int igb_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct igb_adapter *adapter = netdev_priv(netdev);
int i;
@@ -2238,7 +2240,9 @@ static int igb_set_coalesce(struct net_device *netdev,
}
static int igb_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct igb_adapter *adapter = netdev_priv(netdev);
@@ -2343,8 +2347,7 @@ static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
switch (stringset) {
case ETH_SS_TEST:
- memcpy(data, *igb_gstrings_test,
- IGB_TEST_LEN*ETH_GSTRING_LEN);
+ memcpy(data, igb_gstrings_test, sizeof(igb_gstrings_test));
break;
case ETH_SS_STATS:
for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++)
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 171a7a629b20..751de06019a0 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2991,7 +2991,7 @@ static const struct net_device_ops igb_netdev_ops = {
.ndo_set_rx_mode = igb_set_rx_mode,
.ndo_set_mac_address = igb_set_mac,
.ndo_change_mtu = igb_change_mtu,
- .ndo_do_ioctl = igb_ioctl,
+ .ndo_eth_ioctl = igb_ioctl,
.ndo_tx_timeout = igb_tx_timeout,
.ndo_validate_addr = eth_validate_addr,
.ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c
index f4835eb62fee..06e5bd646a0e 100644
--- a/drivers/net/ethernet/intel/igbvf/ethtool.c
+++ b/drivers/net/ethernet/intel/igbvf/ethtool.c
@@ -314,7 +314,9 @@ static int igbvf_set_wol(struct net_device *netdev,
}
static int igbvf_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct igbvf_adapter *adapter = netdev_priv(netdev);
@@ -327,7 +329,9 @@ static int igbvf_get_coalesce(struct net_device *netdev,
}
static int igbvf_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct igbvf_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 1bbe9862a758..d32e72d953c8 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -2657,7 +2657,7 @@ static const struct net_device_ops igbvf_netdev_ops = {
.ndo_set_rx_mode = igbvf_set_rx_mode,
.ndo_set_mac_address = igbvf_set_mac,
.ndo_change_mtu = igbvf_change_mtu,
- .ndo_do_ioctl = igbvf_ioctl,
+ .ndo_eth_ioctl = igbvf_ioctl,
.ndo_tx_timeout = igbvf_tx_timeout,
.ndo_vlan_rx_add_vid = igbvf_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = igbvf_vlan_rx_kill_vid,
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 5901ed9fb545..3e386c38d016 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -33,6 +33,8 @@ void igc_ethtool_set_ops(struct net_device *);
#define IGC_N_PEROUT 2
#define IGC_N_SDP 4
+#define MAX_FLEX_FILTER 32
+
enum igc_mac_filter_type {
IGC_MAC_FILTER_TYPE_DST = 0,
IGC_MAC_FILTER_TYPE_SRC
@@ -96,6 +98,13 @@ struct igc_ring {
u32 start_time;
u32 end_time;
+ /* CBS parameters */
+ bool cbs_enable; /* indicates if CBS is enabled */
+ s32 idleslope; /* idleSlope in kbps */
+ s32 sendslope; /* sendSlope in kbps */
+ s32 hicredit; /* hiCredit in bytes */
+ s32 locredit; /* loCredit in bytes */
+
/* everything past this point are written often */
u16 next_to_clean;
u16 next_to_use;
@@ -225,6 +234,7 @@ struct igc_adapter {
struct timecounter tc;
struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */
ktime_t ptp_reset_start; /* Reset time in clock mono */
+ struct system_time_snapshot snapshot;
char fw_version[32];
@@ -287,6 +297,10 @@ extern char igc_driver_name[];
#define IGC_FLAG_VLAN_PROMISC BIT(15)
#define IGC_FLAG_RX_LEGACY BIT(16)
#define IGC_FLAG_TSN_QBV_ENABLED BIT(17)
+#define IGC_FLAG_TSN_QAV_ENABLED BIT(18)
+
+#define IGC_FLAG_TSN_ANY_ENABLED \
+ (IGC_FLAG_TSN_QBV_ENABLED | IGC_FLAG_TSN_QAV_ENABLED)
#define IGC_FLAG_RSS_FIELD_IPV4_UDP BIT(6)
#define IGC_FLAG_RSS_FIELD_IPV6_UDP BIT(7)
@@ -476,18 +490,28 @@ struct igc_q_vector {
};
enum igc_filter_match_flags {
- IGC_FILTER_FLAG_ETHER_TYPE = 0x1,
- IGC_FILTER_FLAG_VLAN_TCI = 0x2,
- IGC_FILTER_FLAG_SRC_MAC_ADDR = 0x4,
- IGC_FILTER_FLAG_DST_MAC_ADDR = 0x8,
+ IGC_FILTER_FLAG_ETHER_TYPE = BIT(0),
+ IGC_FILTER_FLAG_VLAN_TCI = BIT(1),
+ IGC_FILTER_FLAG_SRC_MAC_ADDR = BIT(2),
+ IGC_FILTER_FLAG_DST_MAC_ADDR = BIT(3),
+ IGC_FILTER_FLAG_USER_DATA = BIT(4),
+ IGC_FILTER_FLAG_VLAN_ETYPE = BIT(5),
};
struct igc_nfc_filter {
u8 match_flags;
u16 etype;
+ __be16 vlan_etype;
u16 vlan_tci;
u8 src_addr[ETH_ALEN];
u8 dst_addr[ETH_ALEN];
+ u8 user_data[8];
+ u8 user_mask[8];
+ u8 flex_index;
+ u8 rx_queue;
+ u8 prio;
+ u8 immediate_irq;
+ u8 drop;
};
struct igc_nfc_rule {
@@ -495,12 +519,24 @@ struct igc_nfc_rule {
struct igc_nfc_filter filter;
u32 location;
u16 action;
+ bool flex;
};
-/* IGC supports a total of 32 NFC rules: 16 MAC address based,, 8 VLAN priority
- * based, and 8 ethertype based.
+/* IGC supports a total of 32 NFC rules: 16 MAC address based, 8 VLAN priority
+ * based, 8 ethertype based and 32 Flex filter based rules.
*/
-#define IGC_MAX_RXNFC_RULES 32
+#define IGC_MAX_RXNFC_RULES 64
+
+struct igc_flex_filter {
+ u8 index;
+ u8 data[128];
+ u8 mask[16];
+ u8 length;
+ u8 rx_queue;
+ u8 prio;
+ u8 immediate_irq;
+ u8 drop;
+};
/* igc_desc_unused - calculate if we have unused descriptors */
static inline u16 igc_desc_unused(const struct igc_ring *ring)
diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
index d0700d48ecf9..84f142f5e472 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.c
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -187,15 +187,7 @@ static s32 igc_init_phy_params_base(struct igc_hw *hw)
igc_check_for_copper_link(hw);
- /* Verify phy id and set remaining function pointers */
- switch (phy->id) {
- case I225_I_PHY_ID:
- phy->type = igc_phy_i225;
- break;
- default:
- ret_val = -IGC_ERR_PHY;
- goto out;
- }
+ phy->type = igc_phy_i225;
out:
return ret_val;
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index c3a5a5518790..a4bbee748798 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -17,11 +17,22 @@
#define IGC_WUC_PME_EN 0x00000002 /* PME Enable */
/* Wake Up Filter Control */
-#define IGC_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */
-#define IGC_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */
-#define IGC_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */
-#define IGC_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */
-#define IGC_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */
+#define IGC_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */
+#define IGC_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */
+#define IGC_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */
+#define IGC_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */
+#define IGC_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */
+#define IGC_WUFC_FLEX_HQ BIT(14) /* Flex Filters Host Queuing */
+#define IGC_WUFC_FLX0 BIT(16) /* Flexible Filter 0 Enable */
+#define IGC_WUFC_FLX1 BIT(17) /* Flexible Filter 1 Enable */
+#define IGC_WUFC_FLX2 BIT(18) /* Flexible Filter 2 Enable */
+#define IGC_WUFC_FLX3 BIT(19) /* Flexible Filter 3 Enable */
+#define IGC_WUFC_FLX4 BIT(20) /* Flexible Filter 4 Enable */
+#define IGC_WUFC_FLX5 BIT(21) /* Flexible Filter 5 Enable */
+#define IGC_WUFC_FLX6 BIT(22) /* Flexible Filter 6 Enable */
+#define IGC_WUFC_FLX7 BIT(23) /* Flexible Filter 7 Enable */
+
+#define IGC_WUFC_FILTER_MASK GENMASK(23, 14)
#define IGC_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */
@@ -46,6 +57,37 @@
/* Wake Up Packet Memory stores the first 128 bytes of the wake up packet */
#define IGC_WUPM_BYTES 128
+/* Wakeup Filter Control Extended */
+#define IGC_WUFC_EXT_FLX8 BIT(8) /* Flexible Filter 8 Enable */
+#define IGC_WUFC_EXT_FLX9 BIT(9) /* Flexible Filter 9 Enable */
+#define IGC_WUFC_EXT_FLX10 BIT(10) /* Flexible Filter 10 Enable */
+#define IGC_WUFC_EXT_FLX11 BIT(11) /* Flexible Filter 11 Enable */
+#define IGC_WUFC_EXT_FLX12 BIT(12) /* Flexible Filter 12 Enable */
+#define IGC_WUFC_EXT_FLX13 BIT(13) /* Flexible Filter 13 Enable */
+#define IGC_WUFC_EXT_FLX14 BIT(14) /* Flexible Filter 14 Enable */
+#define IGC_WUFC_EXT_FLX15 BIT(15) /* Flexible Filter 15 Enable */
+#define IGC_WUFC_EXT_FLX16 BIT(16) /* Flexible Filter 16 Enable */
+#define IGC_WUFC_EXT_FLX17 BIT(17) /* Flexible Filter 17 Enable */
+#define IGC_WUFC_EXT_FLX18 BIT(18) /* Flexible Filter 18 Enable */
+#define IGC_WUFC_EXT_FLX19 BIT(19) /* Flexible Filter 19 Enable */
+#define IGC_WUFC_EXT_FLX20 BIT(20) /* Flexible Filter 20 Enable */
+#define IGC_WUFC_EXT_FLX21 BIT(21) /* Flexible Filter 21 Enable */
+#define IGC_WUFC_EXT_FLX22 BIT(22) /* Flexible Filter 22 Enable */
+#define IGC_WUFC_EXT_FLX23 BIT(23) /* Flexible Filter 23 Enable */
+#define IGC_WUFC_EXT_FLX24 BIT(24) /* Flexible Filter 24 Enable */
+#define IGC_WUFC_EXT_FLX25 BIT(25) /* Flexible Filter 25 Enable */
+#define IGC_WUFC_EXT_FLX26 BIT(26) /* Flexible Filter 26 Enable */
+#define IGC_WUFC_EXT_FLX27 BIT(27) /* Flexible Filter 27 Enable */
+#define IGC_WUFC_EXT_FLX28 BIT(28) /* Flexible Filter 28 Enable */
+#define IGC_WUFC_EXT_FLX29 BIT(29) /* Flexible Filter 29 Enable */
+#define IGC_WUFC_EXT_FLX30 BIT(30) /* Flexible Filter 30 Enable */
+#define IGC_WUFC_EXT_FLX31 BIT(31) /* Flexible Filter 31 Enable */
+
+#define IGC_WUFC_EXT_FILTER_MASK GENMASK(31, 8)
+
+/* Physical Func Reset Done Indication */
+#define IGC_CTRL_EXT_LINK_MODE_MASK 0x00C00000
+
/* Loop limit on how long we wait for auto-negotiation to complete */
#define COPPER_LINK_UP_LIMIT 10
#define PHY_AUTO_NEG_LIMIT 45
@@ -476,11 +518,50 @@
#define IGC_TXQCTL_QUEUE_MODE_LAUNCHT 0x00000001
#define IGC_TXQCTL_STRICT_CYCLE 0x00000002
#define IGC_TXQCTL_STRICT_END 0x00000004
+#define IGC_TXQCTL_QAV_SEL_MASK 0x000000C0
+#define IGC_TXQCTL_QAV_SEL_CBS0 0x00000080
+#define IGC_TXQCTL_QAV_SEL_CBS1 0x000000C0
+
+#define IGC_TQAVCC_IDLESLOPE_MASK 0xFFFF
+#define IGC_TQAVCC_KEEP_CREDITS BIT(30)
+
+#define IGC_MAX_SR_QUEUES 2
/* Receive Checksum Control */
#define IGC_RXCSUM_CRCOFL 0x00000800 /* CRC32 offload enable */
#define IGC_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */
+/* PCIe PTM Control */
+#define IGC_PTM_CTRL_START_NOW BIT(29) /* Start PTM Now */
+#define IGC_PTM_CTRL_EN BIT(30) /* Enable PTM */
+#define IGC_PTM_CTRL_TRIG BIT(31) /* PTM Cycle trigger */
+#define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x2f) << 2)
+#define IGC_PTM_CTRL_PTM_TO(usec) (((usec) & 0xff) << 8)
+
+#define IGC_PTM_SHORT_CYC_DEFAULT 10 /* Default Short/interrupted cycle interval */
+#define IGC_PTM_CYC_TIME_DEFAULT 5 /* Default PTM cycle time */
+#define IGC_PTM_TIMEOUT_DEFAULT 255 /* Default timeout for PTM errors */
+
+/* PCIe Digital Delay */
+#define IGC_PCIE_DIG_DELAY_DEFAULT 0x01440000
+
+/* PCIe PHY Delay */
+#define IGC_PCIE_PHY_DELAY_DEFAULT 0x40900000
+
+#define IGC_TIMADJ_ADJUST_METH 0x40000000
+
+/* PCIe PTM Status */
+#define IGC_PTM_STAT_VALID BIT(0) /* PTM Status */
+#define IGC_PTM_STAT_RET_ERR BIT(1) /* Root port timeout */
+#define IGC_PTM_STAT_BAD_PTM_RES BIT(2) /* PTM Response msg instead of PTM Response Data */
+#define IGC_PTM_STAT_T4M1_OVFL BIT(3) /* T4 minus T1 overflow */
+#define IGC_PTM_STAT_ADJUST_1ST BIT(4) /* 1588 timer adjusted during 1st PTM cycle */
+#define IGC_PTM_STAT_ADJUST_CYC BIT(5) /* 1588 timer adjusted during non-1st PTM cycle */
+
+/* PCIe PTM Cycle Control */
+#define IGC_PTM_CYCLE_CTRL_CYC_TIME(msec) ((msec) & 0x3ff) /* PTM Cycle Time (msec) */
+#define IGC_PTM_CYCLE_CTRL_AUTO_CYC_EN BIT(31) /* PTM Cycle Control */
+
/* GPY211 - I225 defines */
#define GPY_MMD_MASK 0xFFFF0000
#define GPY_MMD_SHIFT 16
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index fa4171860623..e0a76ac1bbbc 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -862,7 +862,9 @@ static void igc_ethtool_get_stats(struct net_device *netdev,
}
static int igc_ethtool_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct igc_adapter *adapter = netdev_priv(netdev);
@@ -882,7 +884,9 @@ static int igc_ethtool_get_coalesce(struct net_device *netdev,
}
static int igc_ethtool_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct igc_adapter *adapter = netdev_priv(netdev);
int i;
@@ -979,6 +983,12 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter,
eth_broadcast_addr(fsp->m_u.ether_spec.h_source);
}
+ if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) {
+ fsp->flow_type |= FLOW_EXT;
+ memcpy(fsp->h_ext.data, rule->filter.user_data, sizeof(fsp->h_ext.data));
+ memcpy(fsp->m_ext.data, rule->filter.user_mask, sizeof(fsp->m_ext.data));
+ }
+
mutex_unlock(&adapter->nfc_rule_lock);
return 0;
@@ -1215,6 +1225,30 @@ static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule,
ether_addr_copy(rule->filter.dst_addr,
fsp->h_u.ether_spec.h_dest);
}
+
+ /* VLAN etype matching */
+ if ((fsp->flow_type & FLOW_EXT) && fsp->h_ext.vlan_etype) {
+ rule->filter.vlan_etype = fsp->h_ext.vlan_etype;
+ rule->filter.match_flags |= IGC_FILTER_FLAG_VLAN_ETYPE;
+ }
+
+ /* Check for user defined data */
+ if ((fsp->flow_type & FLOW_EXT) &&
+ (fsp->h_ext.data[0] || fsp->h_ext.data[1])) {
+ rule->filter.match_flags |= IGC_FILTER_FLAG_USER_DATA;
+ memcpy(rule->filter.user_data, fsp->h_ext.data, sizeof(fsp->h_ext.data));
+ memcpy(rule->filter.user_mask, fsp->m_ext.data, sizeof(fsp->m_ext.data));
+ }
+
+ /* When multiple filter options or user data or vlan etype is set, use a
+ * flex filter.
+ */
+ if ((rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) ||
+ (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) ||
+ (rule->filter.match_flags & (rule->filter.match_flags - 1)))
+ rule->flex = true;
+ else
+ rule->flex = false;
}
/**
@@ -1244,11 +1278,6 @@ static int igc_ethtool_check_nfc_rule(struct igc_adapter *adapter,
return -EINVAL;
}
- if (flags & (flags - 1)) {
- netdev_dbg(dev, "Rule with multiple matches not supported\n");
- return -EOPNOTSUPP;
- }
-
list_for_each_entry(tmp, &adapter->nfc_rule_list, list) {
if (!memcmp(&rule->filter, &tmp->filter,
sizeof(rule->filter)) &&
@@ -1280,12 +1309,6 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter,
return -EOPNOTSUPP;
}
- if ((fsp->flow_type & FLOW_EXT) &&
- fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) {
- netdev_dbg(netdev, "VLAN mask not supported\n");
- return -EOPNOTSUPP;
- }
-
if (fsp->ring_cookie >= adapter->num_rx_queues) {
netdev_dbg(netdev, "Invalid action\n");
return -EINVAL;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index e29aadbc6744..b877efae61df 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -12,6 +12,8 @@
#include <net/pkt_sched.h>
#include <linux/bpf_trace.h>
#include <net/xdp_sock_drv.h>
+#include <linux/pci.h>
+
#include <net/ipv6.h>
#include "igc.h"
@@ -118,7 +120,7 @@ void igc_reset(struct igc_adapter *adapter)
igc_ptp_reset(adapter);
/* Re-enable TSN offloading, where applicable. */
- igc_tsn_offload_apply(adapter);
+ igc_tsn_reset(adapter);
igc_get_phy_info(hw);
}
@@ -149,6 +151,9 @@ static void igc_release_hw_control(struct igc_adapter *adapter)
struct igc_hw *hw = &adapter->hw;
u32 ctrl_ext;
+ if (!pci_device_is_present(adapter->pdev))
+ return;
+
/* Let firmware take over control of h/w */
ctrl_ext = rd32(IGC_CTRL_EXT);
wr32(IGC_CTRL_EXT,
@@ -3075,11 +3080,320 @@ static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype)
etype);
}
+static int igc_flex_filter_select(struct igc_adapter *adapter,
+ struct igc_flex_filter *input,
+ u32 *fhft)
+{
+ struct igc_hw *hw = &adapter->hw;
+ u8 fhft_index;
+ u32 fhftsl;
+
+ if (input->index >= MAX_FLEX_FILTER) {
+ dev_err(&adapter->pdev->dev, "Wrong Flex Filter index selected!\n");
+ return -EINVAL;
+ }
+
+ /* Indirect table select register */
+ fhftsl = rd32(IGC_FHFTSL);
+ fhftsl &= ~IGC_FHFTSL_FTSL_MASK;
+ switch (input->index) {
+ case 0 ... 7:
+ fhftsl |= 0x00;
+ break;
+ case 8 ... 15:
+ fhftsl |= 0x01;
+ break;
+ case 16 ... 23:
+ fhftsl |= 0x02;
+ break;
+ case 24 ... 31:
+ fhftsl |= 0x03;
+ break;
+ }
+ wr32(IGC_FHFTSL, fhftsl);
+
+ /* Normalize index down to host table register */
+ fhft_index = input->index % 8;
+
+ *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) :
+ IGC_FHFT_EXT(fhft_index - 4);
+
+ return 0;
+}
+
+static int igc_write_flex_filter_ll(struct igc_adapter *adapter,
+ struct igc_flex_filter *input)
+{
+ struct device *dev = &adapter->pdev->dev;
+ struct igc_hw *hw = &adapter->hw;
+ u8 *data = input->data;
+ u8 *mask = input->mask;
+ u32 queuing;
+ u32 fhft;
+ u32 wufc;
+ int ret;
+ int i;
+
+ /* Length has to be aligned to 8. Otherwise the filter will fail. Bail
+ * out early to avoid surprises later.
+ */
+ if (input->length % 8 != 0) {
+ dev_err(dev, "The length of a flex filter has to be 8 byte aligned!\n");
+ return -EINVAL;
+ }
+
+ /* Select corresponding flex filter register and get base for host table. */
+ ret = igc_flex_filter_select(adapter, input, &fhft);
+ if (ret)
+ return ret;
+
+ /* When adding a filter globally disable flex filter feature. That is
+ * recommended within the datasheet.
+ */
+ wufc = rd32(IGC_WUFC);
+ wufc &= ~IGC_WUFC_FLEX_HQ;
+ wr32(IGC_WUFC, wufc);
+
+ /* Configure filter */
+ queuing = input->length & IGC_FHFT_LENGTH_MASK;
+ queuing |= (input->rx_queue << IGC_FHFT_QUEUE_SHIFT) & IGC_FHFT_QUEUE_MASK;
+ queuing |= (input->prio << IGC_FHFT_PRIO_SHIFT) & IGC_FHFT_PRIO_MASK;
+
+ if (input->immediate_irq)
+ queuing |= IGC_FHFT_IMM_INT;
+
+ if (input->drop)
+ queuing |= IGC_FHFT_DROP;
+
+ wr32(fhft + 0xFC, queuing);
+
+ /* Write data (128 byte) and mask (128 bit) */
+ for (i = 0; i < 16; ++i) {
+ const size_t data_idx = i * 8;
+ const size_t row_idx = i * 16;
+ u32 dw0 =
+ (data[data_idx + 0] << 0) |
+ (data[data_idx + 1] << 8) |
+ (data[data_idx + 2] << 16) |
+ (data[data_idx + 3] << 24);
+ u32 dw1 =
+ (data[data_idx + 4] << 0) |
+ (data[data_idx + 5] << 8) |
+ (data[data_idx + 6] << 16) |
+ (data[data_idx + 7] << 24);
+ u32 tmp;
+
+ /* Write row: dw0, dw1 and mask */
+ wr32(fhft + row_idx, dw0);
+ wr32(fhft + row_idx + 4, dw1);
+
+ /* mask is only valid for MASK(7, 0) */
+ tmp = rd32(fhft + row_idx + 8);
+ tmp &= ~GENMASK(7, 0);
+ tmp |= mask[i];
+ wr32(fhft + row_idx + 8, tmp);
+ }
+
+ /* Enable filter. */
+ wufc |= IGC_WUFC_FLEX_HQ;
+ if (input->index > 8) {
+ /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */
+ u32 wufc_ext = rd32(IGC_WUFC_EXT);
+
+ wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8));
+
+ wr32(IGC_WUFC_EXT, wufc_ext);
+ } else {
+ wufc |= (IGC_WUFC_FLX0 << input->index);
+ }
+ wr32(IGC_WUFC, wufc);
+
+ dev_dbg(&adapter->pdev->dev, "Added flex filter %u to HW.\n",
+ input->index);
+
+ return 0;
+}
+
+static void igc_flex_filter_add_field(struct igc_flex_filter *flex,
+ const void *src, unsigned int offset,
+ size_t len, const void *mask)
+{
+ int i;
+
+ /* data */
+ memcpy(&flex->data[offset], src, len);
+
+ /* mask */
+ for (i = 0; i < len; ++i) {
+ const unsigned int idx = i + offset;
+ const u8 *ptr = mask;
+
+ if (mask) {
+ if (ptr[i] & 0xff)
+ flex->mask[idx / 8] |= BIT(idx % 8);
+
+ continue;
+ }
+
+ flex->mask[idx / 8] |= BIT(idx % 8);
+ }
+}
+
+static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter)
+{
+ struct igc_hw *hw = &adapter->hw;
+ u32 wufc, wufc_ext;
+ int i;
+
+ wufc = rd32(IGC_WUFC);
+ wufc_ext = rd32(IGC_WUFC_EXT);
+
+ for (i = 0; i < MAX_FLEX_FILTER; i++) {
+ if (i < 8) {
+ if (!(wufc & (IGC_WUFC_FLX0 << i)))
+ return i;
+ } else {
+ if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8))))
+ return i;
+ }
+ }
+
+ return -ENOSPC;
+}
+
+static bool igc_flex_filter_in_use(struct igc_adapter *adapter)
+{
+ struct igc_hw *hw = &adapter->hw;
+ u32 wufc, wufc_ext;
+
+ wufc = rd32(IGC_WUFC);
+ wufc_ext = rd32(IGC_WUFC_EXT);
+
+ if (wufc & IGC_WUFC_FILTER_MASK)
+ return true;
+
+ if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK)
+ return true;
+
+ return false;
+}
+
+static int igc_add_flex_filter(struct igc_adapter *adapter,
+ struct igc_nfc_rule *rule)
+{
+ struct igc_flex_filter flex = { };
+ struct igc_nfc_filter *filter = &rule->filter;
+ unsigned int eth_offset, user_offset;
+ int ret, index;
+ bool vlan;
+
+ index = igc_find_avail_flex_filter_slot(adapter);
+ if (index < 0)
+ return -ENOSPC;
+
+ /* Construct the flex filter:
+ * -> dest_mac [6]
+ * -> src_mac [6]
+ * -> tpid [2]
+ * -> vlan tci [2]
+ * -> ether type [2]
+ * -> user data [8]
+ * -> = 26 bytes => 32 length
+ */
+ flex.index = index;
+ flex.length = 32;
+ flex.rx_queue = rule->action;
+
+ vlan = rule->filter.vlan_tci || rule->filter.vlan_etype;
+ eth_offset = vlan ? 16 : 12;
+ user_offset = vlan ? 18 : 14;
+
+ /* Add destination MAC */
+ if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
+ igc_flex_filter_add_field(&flex, &filter->dst_addr, 0,
+ ETH_ALEN, NULL);
+
+ /* Add source MAC */
+ if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
+ igc_flex_filter_add_field(&flex, &filter->src_addr, 6,
+ ETH_ALEN, NULL);
+
+ /* Add VLAN etype */
+ if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE)
+ igc_flex_filter_add_field(&flex, &filter->vlan_etype, 12,
+ sizeof(filter->vlan_etype),
+ NULL);
+
+ /* Add VLAN TCI */
+ if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI)
+ igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14,
+ sizeof(filter->vlan_tci), NULL);
+
+ /* Add Ether type */
+ if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
+ __be16 etype = cpu_to_be16(filter->etype);
+
+ igc_flex_filter_add_field(&flex, &etype, eth_offset,
+ sizeof(etype), NULL);
+ }
+
+ /* Add user data */
+ if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA)
+ igc_flex_filter_add_field(&flex, &filter->user_data,
+ user_offset,
+ sizeof(filter->user_data),
+ filter->user_mask);
+
+ /* Add it down to the hardware and enable it. */
+ ret = igc_write_flex_filter_ll(adapter, &flex);
+ if (ret)
+ return ret;
+
+ filter->flex_index = index;
+
+ return 0;
+}
+
+static void igc_del_flex_filter(struct igc_adapter *adapter,
+ u16 reg_index)
+{
+ struct igc_hw *hw = &adapter->hw;
+ u32 wufc;
+
+ /* Just disable the filter. The filter table itself is kept
+ * intact. Another flex_filter_add() should override the "old" data
+ * then.
+ */
+ if (reg_index > 8) {
+ u32 wufc_ext = rd32(IGC_WUFC_EXT);
+
+ wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8));
+ wr32(IGC_WUFC_EXT, wufc_ext);
+ } else {
+ wufc = rd32(IGC_WUFC);
+
+ wufc &= ~(IGC_WUFC_FLX0 << reg_index);
+ wr32(IGC_WUFC, wufc);
+ }
+
+ if (igc_flex_filter_in_use(adapter))
+ return;
+
+ /* No filters are in use, we may disable flex filters */
+ wufc = rd32(IGC_WUFC);
+ wufc &= ~IGC_WUFC_FLEX_HQ;
+ wr32(IGC_WUFC, wufc);
+}
+
static int igc_enable_nfc_rule(struct igc_adapter *adapter,
- const struct igc_nfc_rule *rule)
+ struct igc_nfc_rule *rule)
{
int err;
+ if (rule->flex) {
+ return igc_add_flex_filter(adapter, rule);
+ }
+
if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
err = igc_add_etype_filter(adapter, rule->filter.etype,
rule->action);
@@ -3116,6 +3430,11 @@ static int igc_enable_nfc_rule(struct igc_adapter *adapter,
static void igc_disable_nfc_rule(struct igc_adapter *adapter,
const struct igc_nfc_rule *rule)
{
+ if (rule->flex) {
+ igc_del_flex_filter(adapter, rule->filter.flex_index);
+ return;
+ }
+
if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
igc_del_etype_filter(adapter, rule->filter.etype);
@@ -4449,26 +4768,29 @@ void igc_down(struct igc_adapter *adapter)
igc_ptp_suspend(adapter);
- /* disable receives in the hardware */
- rctl = rd32(IGC_RCTL);
- wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
- /* flush and sleep below */
-
+ if (pci_device_is_present(adapter->pdev)) {
+ /* disable receives in the hardware */
+ rctl = rd32(IGC_RCTL);
+ wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
+ /* flush and sleep below */
+ }
/* set trans_start so we don't get spurious watchdogs during reset */
netif_trans_update(netdev);
netif_carrier_off(netdev);
netif_tx_stop_all_queues(netdev);
- /* disable transmits in the hardware */
- tctl = rd32(IGC_TCTL);
- tctl &= ~IGC_TCTL_EN;
- wr32(IGC_TCTL, tctl);
- /* flush both disables and wait for them to finish */
- wrfl();
- usleep_range(10000, 20000);
+ if (pci_device_is_present(adapter->pdev)) {
+ /* disable transmits in the hardware */
+ tctl = rd32(IGC_TCTL);
+ tctl &= ~IGC_TCTL_EN;
+ wr32(IGC_TCTL, tctl);
+ /* flush both disables and wait for them to finish */
+ wrfl();
+ usleep_range(10000, 20000);
- igc_irq_disable(adapter);
+ igc_irq_disable(adapter);
+ }
adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
@@ -4811,6 +5133,7 @@ static irqreturn_t igc_msix_ring(int irq, void *data)
*/
static int igc_request_msix(struct igc_adapter *adapter)
{
+ unsigned int num_q_vectors = adapter->num_q_vectors;
int i = 0, err = 0, vector = 0, free_vector = 0;
struct net_device *netdev = adapter->netdev;
@@ -4819,7 +5142,13 @@ static int igc_request_msix(struct igc_adapter *adapter)
if (err)
goto err_out;
- for (i = 0; i < adapter->num_q_vectors; i++) {
+ if (num_q_vectors > MAX_Q_VECTORS) {
+ num_q_vectors = MAX_Q_VECTORS;
+ dev_warn(&adapter->pdev->dev,
+ "The number of queue vectors (%d) is higher than max allowed (%d)\n",
+ adapter->num_q_vectors, MAX_Q_VECTORS);
+ }
+ for (i = 0; i < num_q_vectors; i++) {
struct igc_q_vector *q_vector = adapter->q_vector[i];
vector++;
@@ -4898,20 +5227,12 @@ bool igc_has_link(struct igc_adapter *adapter)
* false until the igc_check_for_link establishes link
* for copper adapters ONLY
*/
- switch (hw->phy.media_type) {
- case igc_media_type_copper:
- if (!hw->mac.get_link_status)
- return true;
- hw->mac.ops.check_for_link(hw);
- link_active = !hw->mac.get_link_status;
- break;
- default:
- case igc_media_type_unknown:
- break;
- }
+ if (!hw->mac.get_link_status)
+ return true;
+ hw->mac.ops.check_for_link(hw);
+ link_active = !hw->mac.get_link_status;
- if (hw->mac.type == igc_i225 &&
- hw->phy.id == I225_I_PHY_ID) {
+ if (hw->mac.type == igc_i225) {
if (!netif_carrier_ok(adapter->netdev)) {
adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
} else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) {
@@ -4999,7 +5320,9 @@ static void igc_watchdog_task(struct work_struct *work)
adapter->tx_timeout_factor = 14;
break;
case SPEED_100:
- /* maybe add some timeout factor ? */
+ case SPEED_1000:
+ case SPEED_2500:
+ adapter->tx_timeout_factor = 7;
break;
}
@@ -5426,7 +5749,6 @@ static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue,
bool enable)
{
struct igc_ring *ring;
- int i;
if (queue < 0 || queue >= adapter->num_tx_queues)
return -EINVAL;
@@ -5434,17 +5756,6 @@ static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue,
ring = adapter->tx_ring[queue];
ring->launchtime_enable = enable;
- if (adapter->base_time)
- return 0;
-
- adapter->cycle_time = NSEC_PER_SEC;
-
- for (i = 0; i < adapter->num_tx_queues; i++) {
- ring = adapter->tx_ring[i];
- ring->start_time = 0;
- ring->end_time = NSEC_PER_SEC;
- }
-
return 0;
}
@@ -5489,7 +5800,7 @@ static bool validate_schedule(struct igc_adapter *adapter,
if (e->command != TC_TAPRIO_CMD_SET_GATES)
return false;
- for (i = 0; i < IGC_MAX_TX_QUEUES; i++) {
+ for (i = 0; i < adapter->num_tx_queues; i++) {
if (e->gate_mask & BIT(i))
queue_uses[i]++;
@@ -5517,16 +5828,31 @@ static int igc_tsn_enable_launchtime(struct igc_adapter *adapter,
return igc_tsn_offload_apply(adapter);
}
+static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+{
+ int i;
+
+ adapter->base_time = 0;
+ adapter->cycle_time = NSEC_PER_SEC;
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct igc_ring *ring = adapter->tx_ring[i];
+
+ ring->start_time = 0;
+ ring->end_time = NSEC_PER_SEC;
+ }
+
+ return 0;
+}
+
static int igc_save_qbv_schedule(struct igc_adapter *adapter,
struct tc_taprio_qopt_offload *qopt)
{
u32 start_time = 0, end_time = 0;
size_t n;
- if (!qopt->enable) {
- adapter->base_time = 0;
- return 0;
- }
+ if (!qopt->enable)
+ return igc_tsn_clear_schedule(adapter);
if (adapter->base_time)
return -EALREADY;
@@ -5546,7 +5872,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
end_time += e->interval;
- for (i = 0; i < IGC_MAX_TX_QUEUES; i++) {
+ for (i = 0; i < adapter->num_tx_queues; i++) {
struct igc_ring *ring = adapter->tx_ring[i];
if (!(e->gate_mask & BIT(i)))
@@ -5578,6 +5904,74 @@ static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter,
return igc_tsn_offload_apply(adapter);
}
+static int igc_save_cbs_params(struct igc_adapter *adapter, int queue,
+ bool enable, int idleslope, int sendslope,
+ int hicredit, int locredit)
+{
+ bool cbs_status[IGC_MAX_SR_QUEUES] = { false };
+ struct net_device *netdev = adapter->netdev;
+ struct igc_ring *ring;
+ int i;
+
+ /* i225 has two sets of credit-based shaper logic.
+ * Supporting it only on the top two priority queues
+ */
+ if (queue < 0 || queue > 1)
+ return -EINVAL;
+
+ ring = adapter->tx_ring[queue];
+
+ for (i = 0; i < IGC_MAX_SR_QUEUES; i++)
+ if (adapter->tx_ring[i])
+ cbs_status[i] = adapter->tx_ring[i]->cbs_enable;
+
+ /* CBS should be enabled on the highest priority queue first in order
+ * for the CBS algorithm to operate as intended.
+ */
+ if (enable) {
+ if (queue == 1 && !cbs_status[0]) {
+ netdev_err(netdev,
+ "Enabling CBS on queue1 before queue0\n");
+ return -EINVAL;
+ }
+ } else {
+ if (queue == 0 && cbs_status[1]) {
+ netdev_err(netdev,
+ "Disabling CBS on queue0 before queue1\n");
+ return -EINVAL;
+ }
+ }
+
+ ring->cbs_enable = enable;
+ ring->idleslope = idleslope;
+ ring->sendslope = sendslope;
+ ring->hicredit = hicredit;
+ ring->locredit = locredit;
+
+ return 0;
+}
+
+static int igc_tsn_enable_cbs(struct igc_adapter *adapter,
+ struct tc_cbs_qopt_offload *qopt)
+{
+ struct igc_hw *hw = &adapter->hw;
+ int err;
+
+ if (hw->mac.type != igc_i225)
+ return -EOPNOTSUPP;
+
+ if (qopt->queue < 0 || qopt->queue > 1)
+ return -EINVAL;
+
+ err = igc_save_cbs_params(adapter, qopt->queue, qopt->enable,
+ qopt->idleslope, qopt->sendslope,
+ qopt->hicredit, qopt->locredit);
+ if (err)
+ return err;
+
+ return igc_tsn_offload_apply(adapter);
+}
+
static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
@@ -5590,6 +5984,9 @@ static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
case TC_SETUP_QDISC_ETF:
return igc_tsn_enable_launchtime(adapter, type_data);
+ case TC_SETUP_QDISC_CBS:
+ return igc_tsn_enable_cbs(adapter, type_data);
+
default:
return -EOPNOTSUPP;
}
@@ -5698,7 +6095,7 @@ static const struct net_device_ops igc_netdev_ops = {
.ndo_fix_features = igc_fix_features,
.ndo_set_features = igc_set_features,
.ndo_features_check = igc_features_check,
- .ndo_do_ioctl = igc_ioctl,
+ .ndo_eth_ioctl = igc_ioctl,
.ndo_setup_tc = igc_setup_tc,
.ndo_bpf = igc_bpf,
.ndo_xdp_xmit = igc_xdp_xmit,
@@ -5859,6 +6256,10 @@ static int igc_probe(struct pci_dev *pdev,
pci_enable_pcie_error_reporting(pdev);
+ err = pci_enable_ptm(pdev, NULL);
+ if (err < 0)
+ dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n");
+
pci_set_master(pdev);
err = -ENOMEM;
@@ -6012,6 +6413,8 @@ static int igc_probe(struct pci_dev *pdev,
igc_ptp_init(adapter);
+ igc_tsn_clear_schedule(adapter);
+
/* reset the hardware with the new settings */
igc_reset(adapter);
diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
index 83aeb5e7076f..5cad31c3c7b0 100644
--- a/drivers/net/ethernet/intel/igc/igc_phy.c
+++ b/drivers/net/ethernet/intel/igc/igc_phy.c
@@ -249,8 +249,7 @@ static s32 igc_phy_setup_autoneg(struct igc_hw *hw)
return ret_val;
}
- if ((phy->autoneg_mask & ADVERTISE_2500_FULL) &&
- hw->phy.id == I225_I_PHY_ID) {
+ if (phy->autoneg_mask & ADVERTISE_2500_FULL) {
/* Read the MULTI GBT AN Control Register - reg 7.32 */
ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK <<
MMD_DEVADDR_SHIFT) |
@@ -390,8 +389,7 @@ static s32 igc_phy_setup_autoneg(struct igc_hw *hw)
ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL,
mii_1000t_ctrl_reg);
- if ((phy->autoneg_mask & ADVERTISE_2500_FULL) &&
- hw->phy.id == I225_I_PHY_ID)
+ if (phy->autoneg_mask & ADVERTISE_2500_FULL)
ret_val = phy->ops.write_reg(hw,
(STANDARD_AN_REG_MASK <<
MMD_DEVADDR_SHIFT) |
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index 69617d2c1be2..0f021909b430 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -9,6 +9,8 @@
#include <linux/ptp_classify.h>
#include <linux/clocksource.h>
#include <linux/ktime.h>
+#include <linux/delay.h>
+#include <linux/iopoll.h>
#define INCVALUE_MASK 0x7fffffff
#define ISGN 0x80000000
@@ -16,6 +18,9 @@
#define IGC_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9)
#define IGC_PTP_TX_TIMEOUT (HZ * 15)
+#define IGC_PTM_STAT_SLEEP 2
+#define IGC_PTM_STAT_TIMEOUT 100
+
/* SYSTIM read access for I225 */
void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts)
{
@@ -752,6 +757,147 @@ int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr)
-EFAULT : 0;
}
+/* The two conditions below must be met for cross timestamping via
+ * PCIe PTM:
+ *
+ * 1. We have an way to convert the timestamps in the PTM messages
+ * to something related to the system clocks (right now, only
+ * X86 systems with support for the Always Running Timer allow that);
+ *
+ * 2. We have PTM enabled in the path from the device to the PCIe root port.
+ */
+static bool igc_is_crosststamp_supported(struct igc_adapter *adapter)
+{
+ return IS_ENABLED(CONFIG_X86_TSC) ? pcie_ptm_enabled(adapter->pdev) : false;
+}
+
+static struct system_counterval_t igc_device_tstamp_to_system(u64 tstamp)
+{
+#if IS_ENABLED(CONFIG_X86_TSC)
+ return convert_art_ns_to_tsc(tstamp);
+#else
+ return (struct system_counterval_t) { };
+#endif
+}
+
+static void igc_ptm_log_error(struct igc_adapter *adapter, u32 ptm_stat)
+{
+ struct net_device *netdev = adapter->netdev;
+
+ switch (ptm_stat) {
+ case IGC_PTM_STAT_RET_ERR:
+ netdev_err(netdev, "PTM Error: Root port timeout\n");
+ break;
+ case IGC_PTM_STAT_BAD_PTM_RES:
+ netdev_err(netdev, "PTM Error: Bad response, PTM Response Data expected\n");
+ break;
+ case IGC_PTM_STAT_T4M1_OVFL:
+ netdev_err(netdev, "PTM Error: T4 minus T1 overflow\n");
+ break;
+ case IGC_PTM_STAT_ADJUST_1ST:
+ netdev_err(netdev, "PTM Error: 1588 timer adjusted during first PTM cycle\n");
+ break;
+ case IGC_PTM_STAT_ADJUST_CYC:
+ netdev_err(netdev, "PTM Error: 1588 timer adjusted during non-first PTM cycle\n");
+ break;
+ default:
+ netdev_err(netdev, "PTM Error: Unknown error (%#x)\n", ptm_stat);
+ break;
+ }
+}
+
+static int igc_phc_get_syncdevicetime(ktime_t *device,
+ struct system_counterval_t *system,
+ void *ctx)
+{
+ u32 stat, t2_curr_h, t2_curr_l, ctrl;
+ struct igc_adapter *adapter = ctx;
+ struct igc_hw *hw = &adapter->hw;
+ int err, count = 100;
+ ktime_t t1, t2_curr;
+
+ /* Get a snapshot of system clocks to use as historic value. */
+ ktime_get_snapshot(&adapter->snapshot);
+
+ do {
+ /* Doing this in a loop because in the event of a
+ * badly timed (ha!) system clock adjustment, we may
+ * get PTM errors from the PCI root, but these errors
+ * are transitory. Repeating the process returns valid
+ * data eventually.
+ */
+
+ /* To "manually" start the PTM cycle we need to clear and
+ * then set again the TRIG bit.
+ */
+ ctrl = rd32(IGC_PTM_CTRL);
+ ctrl &= ~IGC_PTM_CTRL_TRIG;
+ wr32(IGC_PTM_CTRL, ctrl);
+ ctrl |= IGC_PTM_CTRL_TRIG;
+ wr32(IGC_PTM_CTRL, ctrl);
+
+ /* The cycle only starts "for real" when software notifies
+ * that it has read the registers, this is done by setting
+ * VALID bit.
+ */
+ wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID);
+
+ err = readx_poll_timeout(rd32, IGC_PTM_STAT, stat,
+ stat, IGC_PTM_STAT_SLEEP,
+ IGC_PTM_STAT_TIMEOUT);
+ if (err < 0) {
+ netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n");
+ return err;
+ }
+
+ if ((stat & IGC_PTM_STAT_VALID) == IGC_PTM_STAT_VALID)
+ break;
+
+ if (stat & ~IGC_PTM_STAT_VALID) {
+ /* An error occurred, log it. */
+ igc_ptm_log_error(adapter, stat);
+ /* The STAT register is write-1-to-clear (W1C),
+ * so write the previous error status to clear it.
+ */
+ wr32(IGC_PTM_STAT, stat);
+ continue;
+ }
+ } while (--count);
+
+ if (!count) {
+ netdev_err(adapter->netdev, "Exceeded number of tries for PTM cycle\n");
+ return -ETIMEDOUT;
+ }
+
+ t1 = ktime_set(rd32(IGC_PTM_T1_TIM0_H), rd32(IGC_PTM_T1_TIM0_L));
+
+ t2_curr_l = rd32(IGC_PTM_CURR_T2_L);
+ t2_curr_h = rd32(IGC_PTM_CURR_T2_H);
+
+ /* FIXME: When the register that tells the endianness of the
+ * PTM registers are implemented, check them here and add the
+ * appropriate conversion.
+ */
+ t2_curr_h = swab32(t2_curr_h);
+
+ t2_curr = ((s64)t2_curr_h << 32 | t2_curr_l);
+
+ *device = t1;
+ *system = igc_device_tstamp_to_system(t2_curr);
+
+ return 0;
+}
+
+static int igc_ptp_getcrosststamp(struct ptp_clock_info *ptp,
+ struct system_device_crosststamp *cts)
+{
+ struct igc_adapter *adapter = container_of(ptp, struct igc_adapter,
+ ptp_caps);
+
+ return get_device_system_crosststamp(igc_phc_get_syncdevicetime,
+ adapter, &adapter->snapshot, cts);
+}
+
/**
* igc_ptp_init - Initialize PTP functionality
* @adapter: Board private structure
@@ -788,6 +934,11 @@ void igc_ptp_init(struct igc_adapter *adapter)
adapter->ptp_caps.n_per_out = IGC_N_PEROUT;
adapter->ptp_caps.n_pins = IGC_N_SDP;
adapter->ptp_caps.verify = igc_ptp_verify_pin;
+
+ if (!igc_is_crosststamp_supported(adapter))
+ break;
+
+ adapter->ptp_caps.getcrosststamp = igc_ptp_getcrosststamp;
break;
default:
adapter->ptp_clock = NULL;
@@ -849,7 +1000,8 @@ void igc_ptp_suspend(struct igc_adapter *adapter)
adapter->ptp_tx_skb = NULL;
clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
- igc_ptp_time_save(adapter);
+ if (pci_device_is_present(adapter->pdev))
+ igc_ptp_time_save(adapter);
}
/**
@@ -878,7 +1030,9 @@ void igc_ptp_stop(struct igc_adapter *adapter)
void igc_ptp_reset(struct igc_adapter *adapter)
{
struct igc_hw *hw = &adapter->hw;
+ u32 cycle_ctrl, ctrl;
unsigned long flags;
+ u32 timadj;
/* reset the tstamp_config */
igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config);
@@ -887,12 +1041,38 @@ void igc_ptp_reset(struct igc_adapter *adapter)
switch (adapter->hw.mac.type) {
case igc_i225:
+ timadj = rd32(IGC_TIMADJ);
+ timadj |= IGC_TIMADJ_ADJUST_METH;
+ wr32(IGC_TIMADJ, timadj);
+
wr32(IGC_TSAUXC, 0x0);
wr32(IGC_TSSDP, 0x0);
wr32(IGC_TSIM,
IGC_TSICR_INTERRUPTS |
(adapter->pps_sys_wrap_on ? IGC_TSICR_SYS_WRAP : 0));
wr32(IGC_IMS, IGC_IMS_TS);
+
+ if (!igc_is_crosststamp_supported(adapter))
+ break;
+
+ wr32(IGC_PCIE_DIG_DELAY, IGC_PCIE_DIG_DELAY_DEFAULT);
+ wr32(IGC_PCIE_PHY_DELAY, IGC_PCIE_PHY_DELAY_DEFAULT);
+
+ cycle_ctrl = IGC_PTM_CYCLE_CTRL_CYC_TIME(IGC_PTM_CYC_TIME_DEFAULT);
+
+ wr32(IGC_PTM_CYCLE_CTRL, cycle_ctrl);
+
+ ctrl = IGC_PTM_CTRL_EN |
+ IGC_PTM_CTRL_START_NOW |
+ IGC_PTM_CTRL_SHRT_CYC(IGC_PTM_SHORT_CYC_DEFAULT) |
+ IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT) |
+ IGC_PTM_CTRL_TRIG;
+
+ wr32(IGC_PTM_CTRL, ctrl);
+
+ /* Force the first cycle to run. */
+ wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID);
+
break;
default:
/* No work to do. */
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 0f82990567d9..e197a33d93a0 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -67,6 +67,9 @@
/* Filtering Registers */
#define IGC_ETQF(_n) (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */
+#define IGC_FHFT(_n) (0x09000 + (256 * (_n))) /* Flexible Host Filter */
+#define IGC_FHFT_EXT(_n) (0x09A00 + (256 * (_n))) /* Flexible Host Filter Extended */
+#define IGC_FHFTSL 0x05804 /* Flex Filter indirect table select */
/* ETQF register bit definitions */
#define IGC_ETQF_FILTER_ENABLE BIT(26)
@@ -75,6 +78,19 @@
#define IGC_ETQF_QUEUE_MASK 0x00070000
#define IGC_ETQF_ETYPE_MASK 0x0000FFFF
+/* FHFT register bit definitions */
+#define IGC_FHFT_LENGTH_MASK GENMASK(7, 0)
+#define IGC_FHFT_QUEUE_SHIFT 8
+#define IGC_FHFT_QUEUE_MASK GENMASK(10, 8)
+#define IGC_FHFT_PRIO_SHIFT 16
+#define IGC_FHFT_PRIO_MASK GENMASK(18, 16)
+#define IGC_FHFT_IMM_INT BIT(24)
+#define IGC_FHFT_DROP BIT(25)
+
+/* FHFTSL register bit definitions */
+#define IGC_FHFTSL_FTSL_SHIFT 0
+#define IGC_FHFTSL_FTSL_MASK GENMASK(1, 0)
+
/* Redirection Table - RW Array */
#define IGC_RETA(_i) (0x05C00 + ((_i) * 4))
/* RSS Random Key - RW Array */
@@ -220,6 +236,9 @@
#define IGC_ENDQT(_n) (0x3334 + 0x4 * (_n))
#define IGC_DTXMXPKTSZ 0x355C
+#define IGC_TQAVCC(_n) (0x3004 + ((_n) * 0x40))
+#define IGC_TQAVHC(_n) (0x300C + ((_n) * 0x40))
+
/* System Time Registers */
#define IGC_SYSTIML 0x0B600 /* System time register Low - RO */
#define IGC_SYSTIMH 0x0B604 /* System time register High - RO */
@@ -229,6 +248,29 @@
#define IGC_TXSTMPL 0x0B618 /* Tx timestamp value Low - RO */
#define IGC_TXSTMPH 0x0B61C /* Tx timestamp value High - RO */
+#define IGC_TIMADJ 0x0B60C /* Time Adjustment Offset Register */
+
+/* PCIe Registers */
+#define IGC_PTM_CTRL 0x12540 /* PTM Control */
+#define IGC_PTM_STAT 0x12544 /* PTM Status */
+#define IGC_PTM_CYCLE_CTRL 0x1254C /* PTM Cycle Control */
+
+/* PTM Time registers */
+#define IGC_PTM_T1_TIM0_L 0x12558 /* T1 on Timer 0 Low */
+#define IGC_PTM_T1_TIM0_H 0x1255C /* T1 on Timer 0 High */
+
+#define IGC_PTM_CURR_T2_L 0x1258C /* Current T2 Low */
+#define IGC_PTM_CURR_T2_H 0x12590 /* Current T2 High */
+#define IGC_PTM_PREV_T2_L 0x12584 /* Previous T2 Low */
+#define IGC_PTM_PREV_T2_H 0x12588 /* Previous T2 High */
+#define IGC_PTM_PREV_T4M1 0x12578 /* T4 Minus T1 on previous PTM Cycle */
+#define IGC_PTM_CURR_T4M1 0x1257C /* T4 Minus T1 on this PTM Cycle */
+#define IGC_PTM_PREV_T3M2 0x12580 /* T3 Minus T2 on previous PTM Cycle */
+#define IGC_PTM_TDELAY 0x12594 /* PTM PCIe Link Delay */
+
+#define IGC_PCIE_DIG_DELAY 0x12550 /* PCIe Digital Delay */
+#define IGC_PCIE_PHY_DELAY 0x12554 /* PCIe PHY Delay */
+
/* Management registers */
#define IGC_MANC 0x05820 /* Management Control - RW */
@@ -240,6 +282,7 @@
#define IGC_WUFC 0x05808 /* Wakeup Filter Control - RW */
#define IGC_WUS 0x05810 /* Wakeup Status - R/W1C */
#define IGC_WUPL 0x05900 /* Wakeup Packet Length - RW */
+#define IGC_WUFC_EXT 0x0580C /* Wakeup Filter Control Register Extended - RW */
/* Wake Up packet memory */
#define IGC_WUPM_REG(_i) (0x05A00 + ((_i) * 4))
diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
index 174103c4bea6..0fce22de2ab8 100644
--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
@@ -18,8 +18,38 @@ static bool is_any_launchtime(struct igc_adapter *adapter)
return false;
}
+static bool is_cbs_enabled(struct igc_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct igc_ring *ring = adapter->tx_ring[i];
+
+ if (ring->cbs_enable)
+ return true;
+ }
+
+ return false;
+}
+
+static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter)
+{
+ unsigned int new_flags = adapter->flags & ~IGC_FLAG_TSN_ANY_ENABLED;
+
+ if (adapter->base_time)
+ new_flags |= IGC_FLAG_TSN_QBV_ENABLED;
+
+ if (is_any_launchtime(adapter))
+ new_flags |= IGC_FLAG_TSN_QBV_ENABLED;
+
+ if (is_cbs_enabled(adapter))
+ new_flags |= IGC_FLAG_TSN_QAV_ENABLED;
+
+ return new_flags;
+}
+
/* Returns the TSN specific registers to their default values after
- * TSN offloading is disabled.
+ * the adapter is reset.
*/
static int igc_tsn_disable_offload(struct igc_adapter *adapter)
{
@@ -27,11 +57,6 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter)
u32 tqavctrl;
int i;
- if (!(adapter->flags & IGC_FLAG_TSN_QBV_ENABLED))
- return 0;
-
- adapter->cycle_time = 0;
-
wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT);
@@ -41,18 +66,12 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter)
wr32(IGC_TQAVCTRL, tqavctrl);
for (i = 0; i < adapter->num_tx_queues; i++) {
- struct igc_ring *ring = adapter->tx_ring[i];
-
- ring->start_time = 0;
- ring->end_time = 0;
- ring->launchtime_enable = false;
-
wr32(IGC_TXQCTL(i), 0);
wr32(IGC_STQT(i), 0);
wr32(IGC_ENDQT(i), NSEC_PER_SEC);
}
- wr32(IGC_QBVCYCLET_S, NSEC_PER_SEC);
+ wr32(IGC_QBVCYCLET_S, 0);
wr32(IGC_QBVCYCLET, NSEC_PER_SEC);
adapter->flags &= ~IGC_FLAG_TSN_QBV_ENABLED;
@@ -68,9 +87,6 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
ktime_t base_time, systim;
int i;
- if (adapter->flags & IGC_FLAG_TSN_QBV_ENABLED)
- return 0;
-
cycle = adapter->cycle_time;
base_time = adapter->base_time;
@@ -88,6 +104,8 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
for (i = 0; i < adapter->num_tx_queues; i++) {
struct igc_ring *ring = adapter->tx_ring[i];
u32 txqctl = 0;
+ u16 cbs_value;
+ u32 tqavcc;
wr32(IGC_STQT(i), ring->start_time);
wr32(IGC_ENDQT(i), ring->end_time);
@@ -105,6 +123,90 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
if (ring->launchtime_enable)
txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT;
+ /* Skip configuring CBS for Q2 and Q3 */
+ if (i > 1)
+ goto skip_cbs;
+
+ if (ring->cbs_enable) {
+ if (i == 0)
+ txqctl |= IGC_TXQCTL_QAV_SEL_CBS0;
+ else
+ txqctl |= IGC_TXQCTL_QAV_SEL_CBS1;
+
+ /* According to i225 datasheet section 7.5.2.7, we
+ * should set the 'idleSlope' field from TQAVCC
+ * register following the equation:
+ *
+ * value = link-speed 0x7736 * BW * 0.2
+ * ---------- * ----------------- (E1)
+ * 100Mbps 2.5
+ *
+ * Note that 'link-speed' is in Mbps.
+ *
+ * 'BW' is the percentage bandwidth out of full
+ * link speed which can be found with the
+ * following equation. Note that idleSlope here
+ * is the parameter from this function
+ * which is in kbps.
+ *
+ * BW = idleSlope
+ * ----------------- (E2)
+ * link-speed * 1000
+ *
+ * That said, we can come up with a generic
+ * equation to calculate the value we should set
+ * it TQAVCC register by replacing 'BW' in E1 by E2.
+ * The resulting equation is:
+ *
+ * value = link-speed * 0x7736 * idleSlope * 0.2
+ * ------------------------------------- (E3)
+ * 100 * 2.5 * link-speed * 1000
+ *
+ * 'link-speed' is present in both sides of the
+ * fraction so it is canceled out. The final
+ * equation is the following:
+ *
+ * value = idleSlope * 61036
+ * ----------------- (E4)
+ * 2500000
+ *
+ * NOTE: For i225, given the above, we can see
+ * that idleslope is represented in
+ * 40.959433 kbps units by the value at
+ * the TQAVCC register (2.5Gbps / 61036),
+ * which reduces the granularity for
+ * idleslope increments.
+ *
+ * In i225 controller, the sendSlope and loCredit
+ * parameters from CBS are not configurable
+ * by software so we don't do any
+ * 'controller configuration' in respect to
+ * these parameters.
+ */
+ cbs_value = DIV_ROUND_UP_ULL(ring->idleslope
+ * 61036ULL, 2500000);
+
+ tqavcc = rd32(IGC_TQAVCC(i));
+ tqavcc &= ~IGC_TQAVCC_IDLESLOPE_MASK;
+ tqavcc |= cbs_value | IGC_TQAVCC_KEEP_CREDITS;
+ wr32(IGC_TQAVCC(i), tqavcc);
+
+ wr32(IGC_TQAVHC(i),
+ 0x80000000 + ring->hicredit * 0x7735);
+ } else {
+ /* Disable any CBS for the queue */
+ txqctl &= ~(IGC_TXQCTL_QAV_SEL_MASK);
+
+ /* Set idleSlope to zero. */
+ tqavcc = rd32(IGC_TQAVCC(i));
+ tqavcc &= ~(IGC_TQAVCC_IDLESLOPE_MASK |
+ IGC_TQAVCC_KEEP_CREDITS);
+ wr32(IGC_TQAVCC(i), tqavcc);
+
+ /* Set hiCredit to zero. */
+ wr32(IGC_TQAVHC(i), 0);
+ }
+skip_cbs:
wr32(IGC_TXQCTL(i), txqctl);
}
@@ -125,33 +227,41 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
wr32(IGC_BASET_H, baset_h);
wr32(IGC_BASET_L, baset_l);
- adapter->flags |= IGC_FLAG_TSN_QBV_ENABLED;
-
return 0;
}
-int igc_tsn_offload_apply(struct igc_adapter *adapter)
+int igc_tsn_reset(struct igc_adapter *adapter)
{
- bool is_any_enabled = adapter->base_time || is_any_launchtime(adapter);
+ unsigned int new_flags;
+ int err = 0;
- if (!(adapter->flags & IGC_FLAG_TSN_QBV_ENABLED) && !is_any_enabled)
- return 0;
+ new_flags = igc_tsn_new_flags(adapter);
- if (!is_any_enabled) {
- int err = igc_tsn_disable_offload(adapter);
+ if (!(new_flags & IGC_FLAG_TSN_ANY_ENABLED))
+ return igc_tsn_disable_offload(adapter);
- if (err < 0)
- return err;
+ err = igc_tsn_enable_offload(adapter);
+ if (err < 0)
+ return err;
- /* The BASET registers aren't cleared when writing
- * into them, force a reset if the interface is
- * running.
- */
- if (netif_running(adapter->netdev))
- schedule_work(&adapter->reset_task);
+ adapter->flags = new_flags;
+ return err;
+}
+
+int igc_tsn_offload_apply(struct igc_adapter *adapter)
+{
+ int err;
+
+ if (netif_running(adapter->netdev)) {
+ schedule_work(&adapter->reset_task);
return 0;
}
- return igc_tsn_enable_offload(adapter);
+ err = igc_tsn_enable_offload(adapter);
+ if (err < 0)
+ return err;
+
+ adapter->flags = igc_tsn_new_flags(adapter);
+ return 0;
}
diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h b/drivers/net/ethernet/intel/igc/igc_tsn.h
index f76bc86ddccd..1512307f5a52 100644
--- a/drivers/net/ethernet/intel/igc/igc_tsn.h
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.h
@@ -5,5 +5,6 @@
#define _IGC_TSN_H_
int igc_tsn_offload_apply(struct igc_adapter *adapter);
+int igc_tsn_reset(struct igc_adapter *adapter);
#endif /* _IGC_BASE_H */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 4ceaca0f6ce3..fc26e4ddeb0d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2358,7 +2358,9 @@ static int ixgbe_set_phys_id(struct net_device *netdev,
}
static int ixgbe_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct ixgbe_adapter *adapter = netdev_priv(netdev);
@@ -2412,7 +2414,9 @@ static bool ixgbe_update_rsc(struct ixgbe_adapter *adapter)
}
static int ixgbe_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct ixgbe_adapter *adapter = netdev_priv(netdev);
struct ixgbe_q_vector *q_vector;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 14aea40da50f..24e06ba6f5e9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10247,7 +10247,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
.ndo_set_tx_maxrate = ixgbe_tx_maxrate,
.ndo_vlan_rx_add_vid = ixgbe_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = ixgbe_vlan_rx_kill_vid,
- .ndo_do_ioctl = ixgbe_ioctl,
+ .ndo_eth_ioctl = ixgbe_ioctl,
.ndo_set_vf_mac = ixgbe_ndo_set_vf_mac,
.ndo_set_vf_vlan = ixgbe_ndo_set_vf_vlan,
.ndo_set_vf_rate = ixgbe_ndo_set_vf_bw,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 96dd1a4f956a..b1d22e4d5ec9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -52,8 +52,11 @@ static int ixgbe_xsk_pool_enable(struct ixgbe_adapter *adapter,
/* Kick start the NAPI context so that receiving will start */
err = ixgbe_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX);
- if (err)
+ if (err) {
+ clear_bit(qid, adapter->af_xdp_zc_qps);
+ xsk_pool_dma_unmap(pool, IXGBE_RX_DMA_ATTR);
return err;
+ }
}
return 0;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index e49fb1cd9a99..8380f905e708 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -787,7 +787,9 @@ static int ixgbevf_nway_reset(struct net_device *netdev)
}
static int ixgbevf_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct ixgbevf_adapter *adapter = netdev_priv(netdev);
@@ -811,7 +813,9 @@ static int ixgbevf_get_coalesce(struct net_device *netdev,
}
static int ixgbevf_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct ixgbevf_adapter *adapter = netdev_priv(netdev);
struct ixgbevf_q_vector *q_vector;
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index f1b9284e0bea..1bdc4f23e1e5 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -734,17 +734,17 @@ jme_make_new_rx_buf(struct jme_adapter *jme, int i)
if (unlikely(!skb))
return -ENOMEM;
- mapping = pci_map_page(jme->pdev, virt_to_page(skb->data),
+ mapping = dma_map_page(&jme->pdev->dev, virt_to_page(skb->data),
offset_in_page(skb->data), skb_tailroom(skb),
- PCI_DMA_FROMDEVICE);
- if (unlikely(pci_dma_mapping_error(jme->pdev, mapping))) {
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&jme->pdev->dev, mapping))) {
dev_kfree_skb(skb);
return -ENOMEM;
}
if (likely(rxbi->mapping))
- pci_unmap_page(jme->pdev, rxbi->mapping,
- rxbi->len, PCI_DMA_FROMDEVICE);
+ dma_unmap_page(&jme->pdev->dev, rxbi->mapping, rxbi->len,
+ DMA_FROM_DEVICE);
rxbi->skb = skb;
rxbi->len = skb_tailroom(skb);
@@ -760,10 +760,8 @@ jme_free_rx_buf(struct jme_adapter *jme, int i)
rxbi += i;
if (rxbi->skb) {
- pci_unmap_page(jme->pdev,
- rxbi->mapping,
- rxbi->len,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_page(&jme->pdev->dev, rxbi->mapping, rxbi->len,
+ DMA_FROM_DEVICE);
dev_kfree_skb(rxbi->skb);
rxbi->skb = NULL;
rxbi->mapping = 0;
@@ -1005,16 +1003,12 @@ jme_alloc_and_feed_skb(struct jme_adapter *jme, int idx)
rxbi += idx;
skb = rxbi->skb;
- pci_dma_sync_single_for_cpu(jme->pdev,
- rxbi->mapping,
- rxbi->len,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&jme->pdev->dev, rxbi->mapping, rxbi->len,
+ DMA_FROM_DEVICE);
if (unlikely(jme_make_new_rx_buf(jme, idx))) {
- pci_dma_sync_single_for_device(jme->pdev,
- rxbi->mapping,
- rxbi->len,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_device(&jme->pdev->dev, rxbi->mapping,
+ rxbi->len, DMA_FROM_DEVICE);
++(NET_STAT(jme).rx_dropped);
} else {
@@ -1453,10 +1447,9 @@ static void jme_tx_clean_tasklet(struct tasklet_struct *t)
ttxbi = txbi + ((i + j) & (mask));
txdesc[(i + j) & (mask)].dw[0] = 0;
- pci_unmap_page(jme->pdev,
- ttxbi->mapping,
- ttxbi->len,
- PCI_DMA_TODEVICE);
+ dma_unmap_page(&jme->pdev->dev,
+ ttxbi->mapping, ttxbi->len,
+ DMA_TO_DEVICE);
ttxbi->mapping = 0;
ttxbi->len = 0;
@@ -1966,19 +1959,13 @@ jme_fill_tx_map(struct pci_dev *pdev,
{
dma_addr_t dmaaddr;
- dmaaddr = pci_map_page(pdev,
- page,
- page_offset,
- len,
- PCI_DMA_TODEVICE);
+ dmaaddr = dma_map_page(&pdev->dev, page, page_offset, len,
+ DMA_TO_DEVICE);
- if (unlikely(pci_dma_mapping_error(pdev, dmaaddr)))
+ if (unlikely(dma_mapping_error(&pdev->dev, dmaaddr)))
return -EINVAL;
- pci_dma_sync_single_for_device(pdev,
- dmaaddr,
- len,
- PCI_DMA_TODEVICE);
+ dma_sync_single_for_device(&pdev->dev, dmaaddr, len, DMA_TO_DEVICE);
txdesc->dw[0] = 0;
txdesc->dw[1] = 0;
@@ -2003,10 +1990,8 @@ static void jme_drop_tx_map(struct jme_adapter *jme, int startidx, int count)
for (j = 0 ; j < count ; j++) {
ctxbi = txbi + ((startidx + j + 2) & (mask));
- pci_unmap_page(jme->pdev,
- ctxbi->mapping,
- ctxbi->len,
- PCI_DMA_TODEVICE);
+ dma_unmap_page(&jme->pdev->dev, ctxbi->mapping, ctxbi->len,
+ DMA_TO_DEVICE);
ctxbi->mapping = 0;
ctxbi->len = 0;
@@ -2400,8 +2385,10 @@ jme_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p)
mdio_memcpy(jme, p32, JME_PHY_REG_NR);
}
-static int
-jme_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ecmd)
+static int jme_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ecmd,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct jme_adapter *jme = netdev_priv(netdev);
@@ -2437,8 +2424,10 @@ jme_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ecmd)
return 0;
}
-static int
-jme_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ecmd)
+static int jme_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ecmd,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct jme_adapter *jme = netdev_priv(netdev);
struct dynpcc_info *dpi = &(jme->dpi);
@@ -2859,18 +2848,15 @@ static int
jme_pci_dma64(struct pci_dev *pdev)
{
if (pdev->device == PCI_DEVICE_ID_JMICRON_JMC250 &&
- !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)))
- if (!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)))
- return 1;
+ !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)))
+ return 1;
if (pdev->device == PCI_DEVICE_ID_JMICRON_JMC250 &&
- !pci_set_dma_mask(pdev, DMA_BIT_MASK(40)))
- if (!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)))
- return 1;
+ !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)))
+ return 1;
- if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))
- if (!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)))
- return 0;
+ if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
+ return 0;
return -1;
}
@@ -2901,7 +2887,7 @@ static const struct net_device_ops jme_netdev_ops = {
.ndo_open = jme_open,
.ndo_stop = jme_close,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = jme_ioctl,
+ .ndo_eth_ioctl = jme_ioctl,
.ndo_start_xmit = jme_start_xmit,
.ndo_set_mac_address = jme_set_macaddr,
.ndo_set_rx_mode = jme_set_multi,
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index b30a45725374..3e9f324f1061 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -1272,7 +1272,7 @@ static const struct net_device_ops korina_netdev_ops = {
.ndo_start_xmit = korina_send_packet,
.ndo_set_rx_mode = korina_multicast_list,
.ndo_tx_timeout = korina_tx_timeout,
- .ndo_do_ioctl = korina_ioctl,
+ .ndo_eth_ioctl = korina_ioctl,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 2d0c52f7106b..62f8c5212182 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -609,7 +609,7 @@ static const struct net_device_ops ltq_eth_netdev_ops = {
.ndo_stop = ltq_etop_stop,
.ndo_start_xmit = ltq_etop_tx,
.ndo_change_mtu = ltq_etop_change_mtu,
- .ndo_do_ioctl = phy_do_ioctl,
+ .ndo_eth_ioctl = phy_do_ioctl,
.ndo_set_mac_address = ltq_etop_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = ltq_etop_set_multicast_list,
diff --git a/drivers/net/ethernet/litex/Kconfig b/drivers/net/ethernet/litex/Kconfig
new file mode 100644
index 000000000000..63bf01d28f0c
--- /dev/null
+++ b/drivers/net/ethernet/litex/Kconfig
@@ -0,0 +1,28 @@
+#
+# LiteX device configuration
+#
+
+config NET_VENDOR_LITEX
+ bool "LiteX devices"
+ default y
+ help
+ If you have a network (Ethernet) card belonging to this class, say Y.
+
+ Note that the answer to this question doesn't directly affect the
+ kernel: saying N will just cause the configurator to skip all
+ the questions about LiteX devices. If you say Y, you will be asked
+ for your specific card in the following questions.
+
+if NET_VENDOR_LITEX
+
+config LITEX_LITEETH
+ tristate "LiteX Ethernet support"
+ depends on OF_NET
+ help
+ If you wish to compile a kernel for hardware with a LiteX LiteEth
+ device then you should answer Y to this.
+
+ LiteX is a soft system-on-chip that targets FPGAs. LiteETH is a basic
+ network device that is commonly used in LiteX designs.
+
+endif # NET_VENDOR_LITEX
diff --git a/drivers/net/ethernet/litex/Makefile b/drivers/net/ethernet/litex/Makefile
new file mode 100644
index 000000000000..9343b73b8e49
--- /dev/null
+++ b/drivers/net/ethernet/litex/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the LiteX network device drivers.
+#
+
+obj-$(CONFIG_LITEX_LITEETH) += litex_liteeth.o
diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c
new file mode 100644
index 000000000000..a9bdbf0dcfe1
--- /dev/null
+++ b/drivers/net/ethernet/litex/litex_liteeth.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LiteX Liteeth Ethernet
+ *
+ * Copyright 2017 Joel Stanley <joel@jms.id.au>
+ *
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/litex.h>
+#include <linux/module.h>
+#include <linux/of_net.h>
+#include <linux/platform_device.h>
+
+#define LITEETH_WRITER_SLOT 0x00
+#define LITEETH_WRITER_LENGTH 0x04
+#define LITEETH_WRITER_ERRORS 0x08
+#define LITEETH_WRITER_EV_STATUS 0x0C
+#define LITEETH_WRITER_EV_PENDING 0x10
+#define LITEETH_WRITER_EV_ENABLE 0x14
+#define LITEETH_READER_START 0x18
+#define LITEETH_READER_READY 0x1C
+#define LITEETH_READER_LEVEL 0x20
+#define LITEETH_READER_SLOT 0x24
+#define LITEETH_READER_LENGTH 0x28
+#define LITEETH_READER_EV_STATUS 0x2C
+#define LITEETH_READER_EV_PENDING 0x30
+#define LITEETH_READER_EV_ENABLE 0x34
+#define LITEETH_PREAMBLE_CRC 0x38
+#define LITEETH_PREAMBLE_ERRORS 0x3C
+#define LITEETH_CRC_ERRORS 0x40
+
+#define LITEETH_PHY_CRG_RESET 0x00
+#define LITEETH_MDIO_W 0x04
+#define LITEETH_MDIO_R 0x0C
+
+#define DRV_NAME "liteeth"
+
+struct liteeth {
+ void __iomem *base;
+ struct net_device *netdev;
+ struct device *dev;
+ u32 slot_size;
+
+ /* Tx */
+ u32 tx_slot;
+ u32 num_tx_slots;
+ void __iomem *tx_base;
+
+ /* Rx */
+ u32 rx_slot;
+ u32 num_rx_slots;
+ void __iomem *rx_base;
+};
+
+static int liteeth_rx(struct net_device *netdev)
+{
+ struct liteeth *priv = netdev_priv(netdev);
+ struct sk_buff *skb;
+ unsigned char *data;
+ u8 rx_slot;
+ int len;
+
+ rx_slot = litex_read8(priv->base + LITEETH_WRITER_SLOT);
+ len = litex_read32(priv->base + LITEETH_WRITER_LENGTH);
+
+ if (len == 0 || len > 2048)
+ goto rx_drop;
+
+ skb = netdev_alloc_skb_ip_align(netdev, len);
+ if (!skb) {
+ netdev_err(netdev, "couldn't get memory\n");
+ goto rx_drop;
+ }
+
+ data = skb_put(skb, len);
+ memcpy_fromio(data, priv->rx_base + rx_slot * priv->slot_size, len);
+ skb->protocol = eth_type_trans(skb, netdev);
+
+ netdev->stats.rx_packets++;
+ netdev->stats.rx_bytes += len;
+
+ return netif_rx(skb);
+
+rx_drop:
+ netdev->stats.rx_dropped++;
+ netdev->stats.rx_errors++;
+
+ return NET_RX_DROP;
+}
+
+static irqreturn_t liteeth_interrupt(int irq, void *dev_id)
+{
+ struct net_device *netdev = dev_id;
+ struct liteeth *priv = netdev_priv(netdev);
+ u8 reg;
+
+ reg = litex_read8(priv->base + LITEETH_READER_EV_PENDING);
+ if (reg) {
+ if (netif_queue_stopped(netdev))
+ netif_wake_queue(netdev);
+ litex_write8(priv->base + LITEETH_READER_EV_PENDING, reg);
+ }
+
+ reg = litex_read8(priv->base + LITEETH_WRITER_EV_PENDING);
+ if (reg) {
+ liteeth_rx(netdev);
+ litex_write8(priv->base + LITEETH_WRITER_EV_PENDING, reg);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int liteeth_open(struct net_device *netdev)
+{
+ struct liteeth *priv = netdev_priv(netdev);
+ int err;
+
+ /* Clear pending events */
+ litex_write8(priv->base + LITEETH_WRITER_EV_PENDING, 1);
+ litex_write8(priv->base + LITEETH_READER_EV_PENDING, 1);
+
+ err = request_irq(netdev->irq, liteeth_interrupt, 0, netdev->name, netdev);
+ if (err) {
+ netdev_err(netdev, "failed to request irq %d\n", netdev->irq);
+ return err;
+ }
+
+ /* Enable IRQs */
+ litex_write8(priv->base + LITEETH_WRITER_EV_ENABLE, 1);
+ litex_write8(priv->base + LITEETH_READER_EV_ENABLE, 1);
+
+ netif_carrier_on(netdev);
+ netif_start_queue(netdev);
+
+ return 0;
+}
+
+static int liteeth_stop(struct net_device *netdev)
+{
+ struct liteeth *priv = netdev_priv(netdev);
+
+ netif_stop_queue(netdev);
+ netif_carrier_off(netdev);
+
+ litex_write8(priv->base + LITEETH_WRITER_EV_ENABLE, 0);
+ litex_write8(priv->base + LITEETH_READER_EV_ENABLE, 0);
+
+ free_irq(netdev->irq, netdev);
+
+ return 0;
+}
+
+static int liteeth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct liteeth *priv = netdev_priv(netdev);
+ void __iomem *txbuffer;
+
+ if (!litex_read8(priv->base + LITEETH_READER_READY)) {
+ if (net_ratelimit())
+ netdev_err(netdev, "LITEETH_READER_READY not ready\n");
+
+ netif_stop_queue(netdev);
+
+ return NETDEV_TX_BUSY;
+ }
+
+ /* Reject oversize packets */
+ if (unlikely(skb->len > priv->slot_size)) {
+ if (net_ratelimit())
+ netdev_err(netdev, "tx packet too big\n");
+
+ dev_kfree_skb_any(skb);
+ netdev->stats.tx_dropped++;
+ netdev->stats.tx_errors++;
+
+ return NETDEV_TX_OK;
+ }
+
+ txbuffer = priv->tx_base + priv->tx_slot * priv->slot_size;
+ memcpy_toio(txbuffer, skb->data, skb->len);
+ litex_write8(priv->base + LITEETH_READER_SLOT, priv->tx_slot);
+ litex_write16(priv->base + LITEETH_READER_LENGTH, skb->len);
+ litex_write8(priv->base + LITEETH_READER_START, 1);
+
+ netdev->stats.tx_bytes += skb->len;
+ netdev->stats.tx_packets++;
+
+ priv->tx_slot = (priv->tx_slot + 1) % priv->num_tx_slots;
+ dev_kfree_skb_any(skb);
+
+ return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops liteeth_netdev_ops = {
+ .ndo_open = liteeth_open,
+ .ndo_stop = liteeth_stop,
+ .ndo_start_xmit = liteeth_start_xmit,
+};
+
+static void liteeth_setup_slots(struct liteeth *priv)
+{
+ struct device_node *np = priv->dev->of_node;
+ int err;
+
+ err = of_property_read_u32(np, "litex,rx-slots", &priv->num_rx_slots);
+ if (err) {
+ dev_dbg(priv->dev, "unable to get litex,rx-slots, using 2\n");
+ priv->num_rx_slots = 2;
+ }
+
+ err = of_property_read_u32(np, "litex,tx-slots", &priv->num_tx_slots);
+ if (err) {
+ dev_dbg(priv->dev, "unable to get litex,tx-slots, using 2\n");
+ priv->num_tx_slots = 2;
+ }
+
+ err = of_property_read_u32(np, "litex,slot-size", &priv->slot_size);
+ if (err) {
+ dev_dbg(priv->dev, "unable to get litex,slot-size, using 0x800\n");
+ priv->slot_size = 0x800;
+ }
+}
+
+static int liteeth_probe(struct platform_device *pdev)
+{
+ struct net_device *netdev;
+ void __iomem *buf_base;
+ struct liteeth *priv;
+ int irq, err;
+
+ netdev = devm_alloc_etherdev(&pdev->dev, sizeof(*priv));
+ if (!netdev)
+ return -ENOMEM;
+
+ SET_NETDEV_DEV(netdev, &pdev->dev);
+ platform_set_drvdata(pdev, netdev);
+
+ priv = netdev_priv(netdev);
+ priv->netdev = netdev;
+ priv->dev = &pdev->dev;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ dev_err(&pdev->dev, "Failed to get IRQ %d\n", irq);
+ return irq;
+ }
+ netdev->irq = irq;
+
+ priv->base = devm_platform_ioremap_resource_byname(pdev, "mac");
+ if (IS_ERR(priv->base))
+ return PTR_ERR(priv->base);
+
+ buf_base = devm_platform_ioremap_resource_byname(pdev, "buffer");
+ if (IS_ERR(buf_base))
+ return PTR_ERR(buf_base);
+
+ liteeth_setup_slots(priv);
+
+ /* Rx slots */
+ priv->rx_base = buf_base;
+ priv->rx_slot = 0;
+
+ /* Tx slots come after Rx slots */
+ priv->tx_base = buf_base + priv->num_rx_slots * priv->slot_size;
+ priv->tx_slot = 0;
+
+ err = of_get_mac_address(pdev->dev.of_node, netdev->dev_addr);
+ if (err)
+ eth_hw_addr_random(netdev);
+
+ netdev->netdev_ops = &liteeth_netdev_ops;
+
+ err = register_netdev(netdev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to register netdev %d\n", err);
+ return err;
+ }
+
+ netdev_info(netdev, "irq %d slots: tx %d rx %d size %d\n",
+ netdev->irq, priv->num_tx_slots, priv->num_rx_slots, priv->slot_size);
+
+ return 0;
+}
+
+static int liteeth_remove(struct platform_device *pdev)
+{
+ struct net_device *netdev = platform_get_drvdata(pdev);
+
+ unregister_netdev(netdev);
+ free_netdev(netdev);
+
+ return 0;
+}
+
+static const struct of_device_id liteeth_of_match[] = {
+ { .compatible = "litex,liteeth" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, liteeth_of_match);
+
+static struct platform_driver liteeth_driver = {
+ .probe = liteeth_probe,
+ .remove = liteeth_remove,
+ .driver = {
+ .name = DRV_NAME,
+ .of_match_table = liteeth_of_match,
+ },
+};
+module_platform_driver(liteeth_driver);
+
+MODULE_AUTHOR("Joel Stanley <joel@jms.id.au>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index d207bfcaf31d..28d5ad296646 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -1611,8 +1611,10 @@ static void mv643xx_eth_get_drvinfo(struct net_device *dev,
strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
}
-static int
-mv643xx_eth_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+static int mv643xx_eth_get_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mv643xx_eth_private *mp = netdev_priv(dev);
@@ -1622,8 +1624,10 @@ mv643xx_eth_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
return 0;
}
-static int
-mv643xx_eth_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+static int mv643xx_eth_set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mv643xx_eth_private *mp = netdev_priv(dev);
@@ -3060,7 +3064,7 @@ static const struct net_device_ops mv643xx_eth_netdev_ops = {
.ndo_set_rx_mode = mv643xx_eth_set_rx_mode,
.ndo_set_mac_address = mv643xx_eth_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = mv643xx_eth_ioctl,
+ .ndo_eth_ioctl = mv643xx_eth_ioctl,
.ndo_change_mtu = mv643xx_eth_change_mtu,
.ndo_set_features = mv643xx_eth_set_features,
.ndo_tx_timeout = mv643xx_eth_tx_timeout,
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 76a7777c746d..9d460a270601 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -105,7 +105,7 @@
#define MVNETA_VLAN_PRIO_TO_RXQ 0x2440
#define MVNETA_VLAN_PRIO_RXQ_MAP(prio, rxq) ((rxq) << ((prio) * 3))
#define MVNETA_PORT_STATUS 0x2444
-#define MVNETA_TX_IN_PRGRS BIT(1)
+#define MVNETA_TX_IN_PRGRS BIT(0)
#define MVNETA_TX_FIFO_EMPTY BIT(8)
#define MVNETA_RX_MIN_FRAME_SIZE 0x247c
/* Only exists on Armada XP and Armada 370 */
@@ -2327,7 +2327,7 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
if (!skb)
return ERR_PTR(-ENOMEM);
- skb_mark_for_recycle(skb, virt_to_page(xdp->data), pool);
+ skb_mark_for_recycle(skb);
skb_reserve(skb, xdp->data - xdp->data_hard_start);
skb_put(skb, xdp->data_end - xdp->data);
@@ -2339,10 +2339,6 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
skb_frag_page(frag), skb_frag_off(frag),
skb_frag_size(frag), PAGE_SIZE);
- /* We don't need to reset pp_recycle here. It's already set, so
- * just mark fragments for recycling.
- */
- page_pool_store_mem_info(skb_frag_page(frag), pool);
}
return skb;
@@ -2666,7 +2662,7 @@ static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev,
return 0;
if (skb_headlen(skb) < (skb_transport_offset(skb) + tcp_hdrlen(skb))) {
- pr_info("*** Is this even possible???!?!?\n");
+ pr_info("*** Is this even possible?\n");
return 0;
}
@@ -3832,12 +3828,20 @@ static void mvneta_validate(struct phylink_config *config,
struct mvneta_port *pp = netdev_priv(ndev);
__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
- /* We only support QSGMII, SGMII, 802.3z and RGMII modes */
- if (state->interface != PHY_INTERFACE_MODE_NA &&
- state->interface != PHY_INTERFACE_MODE_QSGMII &&
- state->interface != PHY_INTERFACE_MODE_SGMII &&
- !phy_interface_mode_is_8023z(state->interface) &&
- !phy_interface_mode_is_rgmii(state->interface)) {
+ /* We only support QSGMII, SGMII, 802.3z and RGMII modes.
+ * When in 802.3z mode, we must have AN enabled:
+ * "Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ...
+ * When <PortType> = 1 (1000BASE-X) this field must be set to 1."
+ */
+ if (phy_interface_mode_is_8023z(state->interface)) {
+ if (!phylink_test(state->advertising, Autoneg)) {
+ bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ return;
+ }
+ } else if (state->interface != PHY_INTERFACE_MODE_NA &&
+ state->interface != PHY_INTERFACE_MODE_QSGMII &&
+ state->interface != PHY_INTERFACE_MODE_SGMII &&
+ !phy_interface_mode_is_rgmii(state->interface)) {
bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
return;
}
@@ -4496,8 +4500,11 @@ static int mvneta_ethtool_nway_reset(struct net_device *dev)
}
/* Set interrupt coalescing for ethtools */
-static int mvneta_ethtool_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *c)
+static int
+mvneta_ethtool_set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *c,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mvneta_port *pp = netdev_priv(dev);
int queue;
@@ -4520,8 +4527,11 @@ static int mvneta_ethtool_set_coalesce(struct net_device *dev,
}
/* get coalescing for ethtools */
-static int mvneta_ethtool_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *c)
+static int
+mvneta_ethtool_get_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *c,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mvneta_port *pp = netdev_priv(dev);
@@ -4986,7 +4996,7 @@ static const struct net_device_ops mvneta_netdev_ops = {
.ndo_change_mtu = mvneta_change_mtu,
.ndo_fix_features = mvneta_fix_features,
.ndo_get_stats64 = mvneta_get_stats64,
- .ndo_do_ioctl = mvneta_ioctl,
+ .ndo_eth_ioctl = mvneta_ioctl,
.ndo_bpf = mvneta_xdp,
.ndo_xdp_xmit = mvneta_xdp_xmit,
.ndo_setup_tc = mvneta_setup_tc,
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index b9fbc9f000f2..cf8acabb90ac 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -938,7 +938,7 @@ enum mvpp22_ptp_packet_format {
#define MVPP2_BM_COOKIE_POOL_OFFS 8
#define MVPP2_BM_COOKIE_CPU_OFFS 24
-#define MVPP2_BM_SHORT_FRAME_SIZE 704 /* frame size 128 */
+#define MVPP2_BM_SHORT_FRAME_SIZE 736 /* frame size 128 */
#define MVPP2_BM_LONG_FRAME_SIZE 2240 /* frame size 1664 */
#define MVPP2_BM_JUMBO_FRAME_SIZE 10432 /* frame size 9856 */
/* BM short pool packet size
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 3229bafa2a2c..d5c92e43f89e 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3995,7 +3995,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
}
if (pp)
- skb_mark_for_recycle(skb, page, pp);
+ skb_mark_for_recycle(skb);
else
dma_unmap_single_attrs(dev->dev.parent, dma_addr,
bm_pool->buf_size, DMA_FROM_DEVICE,
@@ -5367,8 +5367,11 @@ static int mvpp2_ethtool_nway_reset(struct net_device *dev)
}
/* Set interrupt coalescing for ethtools */
-static int mvpp2_ethtool_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *c)
+static int
+mvpp2_ethtool_set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *c,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mvpp2_port *port = netdev_priv(dev);
int queue;
@@ -5400,8 +5403,11 @@ static int mvpp2_ethtool_set_coalesce(struct net_device *dev,
}
/* get coalescing for ethtools */
-static int mvpp2_ethtool_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *c)
+static int
+mvpp2_ethtool_get_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *c,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mvpp2_port *port = netdev_priv(dev);
@@ -5702,7 +5708,7 @@ static const struct net_device_ops mvpp2_netdev_ops = {
.ndo_set_mac_address = mvpp2_set_mac_address,
.ndo_change_mtu = mvpp2_change_mtu,
.ndo_get_stats64 = mvpp2_get_stats64,
- .ndo_do_ioctl = mvpp2_ioctl,
+ .ndo_eth_ioctl = mvpp2_ioctl,
.ndo_vlan_rx_add_vid = mvpp2_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = mvpp2_vlan_rx_kill_vid,
.ndo_set_features = mvpp2_set_features,
@@ -6269,6 +6275,15 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
if (!mvpp2_port_supports_rgmii(port))
goto empty_set;
break;
+ case PHY_INTERFACE_MODE_1000BASEX:
+ case PHY_INTERFACE_MODE_2500BASEX:
+ /* When in 802.3z mode, we must have AN enabled:
+ * Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ...
+ * When <PortType> = 1 (1000BASE-X) this field must be set to 1.
+ */
+ if (!phylink_test(state->advertising, Autoneg))
+ goto empty_set;
+ break;
default:
break;
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig
index 16caa02095fe..3f982ccf2c85 100644
--- a/drivers/net/ethernet/marvell/octeontx2/Kconfig
+++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
#
-# Marvell OcteonTX2 drivers configuration
+# Marvell RVU Network drivers configuration
#
config OCTEONTX2_MBOX
@@ -12,6 +12,7 @@ config OCTEONTX2_AF
select NET_DEVLINK
depends on (64BIT && COMPILE_TEST) || ARM64
depends on PCI
+ depends on PTP_1588_CLOCK_OPTIONAL
help
This driver supports Marvell's OcteonTX2 Resource Virtualization
Unit's admin function manager which manages all RVU HW resources
@@ -32,6 +33,7 @@ config OCTEONTX2_PF
select OCTEONTX2_MBOX
depends on (64BIT && COMPILE_TEST) || ARM64
depends on PCI
+ depends on PTP_1588_CLOCK_OPTIONAL
help
This driver supports Marvell's OcteonTX2 NIC physical function.
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
index cc8ac36cf687..7f4a4ca9af78 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
#
-# Makefile for Marvell's OcteonTX2 RVU Admin Function driver
+# Makefile for Marvell's RVU Admin Function driver
#
ccflags-y += -I$(src)
@@ -10,4 +10,5 @@ obj-$(CONFIG_OCTEONTX2_AF) += rvu_af.o
rvu_mbox-y := mbox.o rvu_trace.o
rvu_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \
rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o \
- rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o rvu_switch.o
+ rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o rvu_switch.o \
+ rvu_sdp.o
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 544c96c8fe1d..7f3d01059e19 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -1,11 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Marvell OcteonTx2 CGX driver
*
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/acpi.h>
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
index 237ba2b56210..ab1e4abdea38 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
@@ -1,11 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 CGX driver
+/* Marvell OcteonTx2 CGX driver
*
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef CGX_H
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h
index aa4e42f78f13..f72ec0e2506f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h
@@ -1,11 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 CGX driver
+/* Marvell OcteonTx2 CGX driver
*
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef __CGX_FW_INTF_H__
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h
index 47f5ed006a93..d9bea13f15b8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h
@@ -1,11 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
- * Copyright (C) 2018 Marvell International Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2018 Marvell.
*/
#ifndef COMMON_H
@@ -64,8 +60,8 @@ static inline int qmem_alloc(struct device *dev, struct qmem **q,
qmem->entry_sz = entry_sz;
qmem->alloc_sz = (qsize * entry_sz) + OTX2_ALIGN;
- qmem->base = dma_alloc_coherent(dev, qmem->alloc_sz,
- &qmem->iova, GFP_KERNEL);
+ qmem->base = dma_alloc_attrs(dev, qmem->alloc_sz, &qmem->iova,
+ GFP_KERNEL, DMA_ATTR_FORCE_CONTIGUOUS);
if (!qmem->base)
return -ENOMEM;
@@ -84,9 +80,10 @@ static inline void qmem_free(struct device *dev, struct qmem *qmem)
return;
if (qmem->base)
- dma_free_coherent(dev, qmem->alloc_sz,
- qmem->base - qmem->align,
- qmem->iova - qmem->align);
+ dma_free_attrs(dev, qmem->alloc_sz,
+ qmem->base - qmem->align,
+ qmem->iova - qmem->align,
+ DMA_ATTR_FORCE_CONTIGUOUS);
devm_kfree(dev, qmem);
}
@@ -146,10 +143,7 @@ enum nix_scheduler {
#define TXSCH_RR_QTM_MAX ((1 << 24) - 1)
#define TXSCH_TL1_DFLT_RR_QTM TXSCH_RR_QTM_MAX
#define TXSCH_TL1_DFLT_RR_PRIO (0x1ull)
-#define MAX_SCHED_WEIGHT 0xFF
-#define DFLT_RR_WEIGHT 71
-#define DFLT_RR_QTM ((DFLT_RR_WEIGHT * TXSCH_RR_QTM_MAX) \
- / MAX_SCHED_WEIGHT)
+#define CN10K_MAX_DWRR_WEIGHT 16384 /* Weight is 14bit on CN10K */
/* Min/Max packet sizes, excluding FCS */
#define NIC_HW_MIN_FRS 40
@@ -187,15 +181,16 @@ enum nix_scheduler {
#define NIX_INTF_TYPE_CGX 0
#define NIX_INTF_TYPE_LBK 1
+#define NIX_INTF_TYPE_SDP 2
#define MAX_LMAC_PKIND 12
#define NIX_LINK_CGX_LMAC(a, b) (0 + 4 * (a) + (b))
#define NIX_LINK_LBK(a) (12 + (a))
#define NIX_CHAN_CGX_LMAC_CHX(a, b, c) (0x800 + 0x100 * (a) + 0x10 * (b) + (c))
#define NIX_CHAN_LBK_CHX(a, b) (0 + 0x100 * (a) + (b))
-#define NIX_CHAN_SDP_CH_START (0x700ull)
-
-#define SDP_CHANNELS 256
+#define NIX_CHAN_SDP_CH_START (0x700ull)
+#define NIX_CHAN_SDP_CHX(a) (NIX_CHAN_SDP_CH_START + (a))
+#define NIX_CHAN_SDP_NUM_CHANS 256
/* The mask is to extract lower 10-bits of channel number
* which CPT will pass to X2P.
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
index a8b7b1c7a1d5..c38306b3384a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
@@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RPM driver
+/* Marvell CN10K RPM driver
*
* Copyright (C) 2020 Marvell.
+ *
*/
#ifndef LMAC_COMMON_H
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
index 0a37ca96aab8..2898931d5260 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
@@ -1,11 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/module.h>
@@ -412,5 +409,5 @@ const char *otx2_mbox_id2name(u16 id)
}
EXPORT_SYMBOL(otx2_mbox_id2name);
-MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_AUTHOR("Marvell.");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index f5ec39de026a..154877706a0e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -1,11 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef MBOX_H
@@ -87,7 +84,7 @@ struct mbox_msghdr {
#define OTX2_MBOX_REQ_SIG (0xdead)
#define OTX2_MBOX_RSP_SIG (0xbeef)
u16 sig; /* Signature, for validating corrupted msgs */
-#define OTX2_MBOX_VERSION (0x0007)
+#define OTX2_MBOX_VERSION (0x0009)
u16 ver; /* Version of msg's structure for this ID */
u16 next_msgoff; /* Offset of next msg within mailbox region */
int rc; /* Msg process'ed response code */
@@ -130,6 +127,7 @@ static inline struct mbox_msghdr *otx2_mbox_alloc_msg(struct otx2_mbox *mbox,
M(READY, 0x001, ready, msg_req, ready_msg_rsp) \
M(ATTACH_RESOURCES, 0x002, attach_resources, rsrc_attach, msg_rsp) \
M(DETACH_RESOURCES, 0x003, detach_resources, rsrc_detach, msg_rsp) \
+M(FREE_RSRC_CNT, 0x004, free_rsrc_cnt, msg_req, free_rsrcs_rsp) \
M(MSIX_OFFSET, 0x005, msix_offset, msg_req, msix_offset_rsp) \
M(VF_FLR, 0x006, vf_flr, msg_req, msg_rsp) \
M(PTP_OP, 0x007, ptp_op, ptp_req, ptp_rsp) \
@@ -191,6 +189,9 @@ M(CPT_RD_WR_REGISTER, 0xA02, cpt_rd_wr_register, cpt_rd_wr_reg_msg, \
M(CPT_STATS, 0xA05, cpt_sts, cpt_sts_req, cpt_sts_rsp) \
M(CPT_RXC_TIME_CFG, 0xA06, cpt_rxc_time_cfg, cpt_rxc_time_cfg_req, \
msg_rsp) \
+/* SDP mbox IDs (range 0x1000 - 0x11FF) */ \
+M(SET_SDP_CHAN_INFO, 0x1000, set_sdp_chan_info, sdp_chan_info_msg, msg_rsp) \
+M(GET_SDP_CHAN_INFO, 0x1001, get_sdp_chan_info, msg_req, sdp_get_chan_info_msg) \
/* NPC mbox IDs (range 0x6000 - 0x7FFF) */ \
M(NPC_MCAM_ALLOC_ENTRY, 0x6000, npc_mcam_alloc_entry, npc_mcam_alloc_entry_req,\
npc_mcam_alloc_entry_rsp) \
@@ -243,7 +244,8 @@ M(NIX_HWCTX_DISABLE, 0x8003, nix_hwctx_disable, \
M(NIX_TXSCH_ALLOC, 0x8004, nix_txsch_alloc, \
nix_txsch_alloc_req, nix_txsch_alloc_rsp) \
M(NIX_TXSCH_FREE, 0x8005, nix_txsch_free, nix_txsch_free_req, msg_rsp) \
-M(NIX_TXSCHQ_CFG, 0x8006, nix_txschq_cfg, nix_txschq_config, msg_rsp) \
+M(NIX_TXSCHQ_CFG, 0x8006, nix_txschq_cfg, nix_txschq_config, \
+ nix_txschq_config) \
M(NIX_STATS_RST, 0x8007, nix_stats_rst, msg_req, msg_rsp) \
M(NIX_VTAG_CFG, 0x8008, nix_vtag_cfg, nix_vtag_config, \
nix_vtag_config_rsp) \
@@ -268,13 +270,15 @@ M(NIX_BP_ENABLE, 0x8016, nix_bp_enable, nix_bp_cfg_req, \
nix_bp_cfg_rsp) \
M(NIX_BP_DISABLE, 0x8017, nix_bp_disable, nix_bp_cfg_req, msg_rsp) \
M(NIX_GET_MAC_ADDR, 0x8018, nix_get_mac_addr, msg_req, nix_get_mac_addr_rsp) \
-M(NIX_CN10K_AQ_ENQ, 0x8019, nix_cn10k_aq_enq, nix_cn10k_aq_enq_req, \
+M(NIX_CN10K_AQ_ENQ, 0x801b, nix_cn10k_aq_enq, nix_cn10k_aq_enq_req, \
nix_cn10k_aq_enq_rsp) \
M(NIX_GET_HW_INFO, 0x801c, nix_get_hw_info, msg_req, nix_hw_info) \
M(NIX_BANDPROF_ALLOC, 0x801d, nix_bandprof_alloc, nix_bandprof_alloc_req, \
nix_bandprof_alloc_rsp) \
M(NIX_BANDPROF_FREE, 0x801e, nix_bandprof_free, nix_bandprof_free_req, \
- msg_rsp)
+ msg_rsp) \
+M(NIX_BANDPROF_GET_HWINFO, 0x801f, nix_bandprof_get_hwinfo, msg_req, \
+ nix_bandprof_get_hwinfo_rsp)
/* Messages initiated by AF (range 0xC00 - 0xDFF) */
#define MBOX_UP_CGX_MESSAGES \
@@ -363,6 +367,25 @@ struct rsrc_detach {
u8 cptlfs:1;
};
+/* Number of resources available to the caller.
+ * In reply to MBOX_MSG_FREE_RSRC_CNT.
+ */
+struct free_rsrcs_rsp {
+ struct mbox_msghdr hdr;
+ u16 schq[NIX_TXSCH_LVL_CNT];
+ u16 sso;
+ u16 tim;
+ u16 ssow;
+ u16 cpt;
+ u8 npa;
+ u8 nix;
+ u16 schq_nix1[NIX_TXSCH_LVL_CNT];
+ u8 nix1;
+ u8 cpt1;
+ u8 ree0;
+ u8 ree1;
+};
+
#define MSIX_VECTOR_INVALID 0xFFFF
#define MAX_RVU_BLKLF_CNT 256
@@ -370,16 +393,20 @@ struct msix_offset_rsp {
struct mbox_msghdr hdr;
u16 npa_msixoff;
u16 nix_msixoff;
- u8 sso;
- u8 ssow;
- u8 timlfs;
- u8 cptlfs;
+ u16 sso;
+ u16 ssow;
+ u16 timlfs;
+ u16 cptlfs;
u16 sso_msixoff[MAX_RVU_BLKLF_CNT];
u16 ssow_msixoff[MAX_RVU_BLKLF_CNT];
u16 timlf_msixoff[MAX_RVU_BLKLF_CNT];
u16 cptlf_msixoff[MAX_RVU_BLKLF_CNT];
- u8 cpt1_lfs;
+ u16 cpt1_lfs;
+ u16 ree0_lfs;
+ u16 ree1_lfs;
u16 cpt1_lf_msixoff[MAX_RVU_BLKLF_CNT];
+ u16 ree0_lf_msixoff[MAX_RVU_BLKLF_CNT];
+ u16 ree1_lf_msixoff[MAX_RVU_BLKLF_CNT];
};
struct get_hw_cap_rsp {
@@ -594,6 +621,7 @@ struct npa_lf_alloc_rsp {
u32 stack_pg_ptrs; /* No of ptrs per stack page */
u32 stack_pg_bytes; /* Size of stack page */
u16 qints; /* NPA_AF_CONST::QINTS */
+ u8 cache_lines; /*BATCH ALLOC DMA */
};
/* NPA AQ enqueue msg */
@@ -698,6 +726,9 @@ struct nix_lf_alloc_req {
u16 sso_func;
u64 rx_cfg; /* See NIX_AF_LF(0..127)_RX_CFG */
u64 way_mask;
+#define NIX_LF_RSS_TAG_LSB_AS_ADDER BIT_ULL(0)
+#define NIX_LF_LBK_BLK_SEL BIT_ULL(1)
+ u64 flags;
};
struct nix_lf_alloc_rsp {
@@ -717,6 +748,7 @@ struct nix_lf_alloc_rsp {
u8 cgx_links; /* No. of CGX links present in HW */
u8 lbk_links; /* No. of LBK links present in HW */
u8 sdp_links; /* No. of SDP links present in HW */
+ u8 tx_link; /* Transmit channel link number */
};
struct nix_lf_free_req {
@@ -835,6 +867,7 @@ struct nix_txsch_free_req {
struct nix_txschq_config {
struct mbox_msghdr hdr;
u8 lvl; /* SMQ/MDQ/TL4/TL3/TL2/TL1 */
+ u8 read;
#define TXSCHQ_IDX_SHIFT 16
#define TXSCHQ_IDX_MASK (BIT_ULL(10) - 1)
#define TXSCHQ_IDX(reg, shift) (((reg) >> (shift)) & TXSCHQ_IDX_MASK)
@@ -842,6 +875,8 @@ struct nix_txschq_config {
#define MAX_REGS_PER_MBOX_MSG 20
u64 reg[MAX_REGS_PER_MBOX_MSG];
u64 regval[MAX_REGS_PER_MBOX_MSG];
+ /* All 0's => overwrite with new value */
+ u64 regval_mask[MAX_REGS_PER_MBOX_MSG];
};
struct nix_vtag_config {
@@ -1032,8 +1067,12 @@ struct nix_bp_cfg_rsp {
struct nix_hw_info {
struct mbox_msghdr hdr;
+ u16 rsvs16;
u16 max_mtu;
u16 min_mtu;
+ u32 rpm_dwrr_mtu;
+ u32 sdp_dwrr_mtu;
+ u64 rsvd[16]; /* Add reserved fields for future expansion */
};
struct nix_bandprof_alloc_req {
@@ -1061,6 +1100,12 @@ struct nix_bandprof_free_req {
u16 prof_idx[BAND_PROF_NUM_LAYERS][MAX_BANDPROF_PER_PFFUNC];
};
+struct nix_bandprof_get_hwinfo_rsp {
+ struct mbox_msghdr hdr;
+ u16 prof_count[BAND_PROF_NUM_LAYERS];
+ u32 policer_timeunit;
+};
+
/* NPC mbox message structs */
#define NPC_MCAM_ENTRY_INVALID 0xFFFF
@@ -1074,6 +1119,13 @@ enum npc_af_status {
NPC_MCAM_ALLOC_DENIED = -702,
NPC_MCAM_ALLOC_FAILED = -703,
NPC_MCAM_PERM_DENIED = -704,
+ NPC_FLOW_INTF_INVALID = -707,
+ NPC_FLOW_CHAN_INVALID = -708,
+ NPC_FLOW_NO_NIXLF = -709,
+ NPC_FLOW_NOT_SUPPORTED = -710,
+ NPC_FLOW_VF_PERM_DENIED = -711,
+ NPC_FLOW_VF_NOT_INIT = -712,
+ NPC_FLOW_VF_OVERLAP = -713,
};
struct npc_mcam_alloc_entry_req {
@@ -1328,6 +1380,10 @@ struct set_vf_perm {
struct lmtst_tbl_setup_req {
struct mbox_msghdr hdr;
+ u64 dis_sched_early_comp :1;
+ u64 sch_ena :1;
+ u64 dis_line_pref :1;
+ u64 ssow_pf_func :13;
u16 base_pcifunc;
u8 use_local_lmt_region;
u64 lmt_iova;
@@ -1422,4 +1478,34 @@ struct cpt_rxc_time_cfg_req {
u16 active_limit;
};
+struct sdp_node_info {
+ /* Node to which this PF belons to */
+ u8 node_id;
+ u8 max_vfs;
+ u8 num_pf_rings;
+ u8 pf_srn;
+#define SDP_MAX_VFS 128
+ u8 vf_rings[SDP_MAX_VFS];
+};
+
+struct sdp_chan_info_msg {
+ struct mbox_msghdr hdr;
+ struct sdp_node_info info;
+};
+
+struct sdp_get_chan_info_msg {
+ struct mbox_msghdr hdr;
+ u16 chan_base;
+ u16 num_chan;
+};
+
+/* CGX mailbox error codes
+ * Range 1101 - 1200.
+ */
+enum cgx_af_status {
+ LMAC_AF_ERR_INVALID_PARAM = -1101,
+ LMAC_AF_ERR_PF_NOT_MAPPED = -1102,
+ LMAC_AF_ERR_PERM_DENIED = -1103,
+};
+
#endif /* MBOX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index 243cf8070e77..3a819b24accc 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -1,11 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef NPC_H
@@ -172,6 +169,8 @@ enum key_fields {
NPC_DMAC,
NPC_SMAC,
NPC_ETYPE,
+ NPC_VLAN_ETYPE_CTAG, /* 0x8100 */
+ NPC_VLAN_ETYPE_STAG, /* 0x88A8 */
NPC_OUTER_VID,
NPC_TOS,
NPC_SIP_IPV4,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
index fee655cc7523..588822a0cf21 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
@@ -1,11 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef NPC_PROFILE_H
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
index 1ee37853f338..9b8e59f4c206 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Marvell PTP driver
*
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
+ *
*/
#include <linux/bitfield.h>
@@ -19,12 +20,11 @@
#define PCI_SUBSYS_DEVID_OCTX2_98xx_PTP 0xB100
#define PCI_SUBSYS_DEVID_OCTX2_96XX_PTP 0xB200
#define PCI_SUBSYS_DEVID_OCTX2_95XX_PTP 0xB300
-#define PCI_SUBSYS_DEVID_OCTX2_LOKI_PTP 0xB400
+#define PCI_SUBSYS_DEVID_OCTX2_95XXN_PTP 0xB400
#define PCI_SUBSYS_DEVID_OCTX2_95MM_PTP 0xB500
-#define PCI_SUBSYS_DEVID_CN10K_A_PTP 0xB900
-#define PCI_SUBSYS_DEVID_CNF10K_A_PTP 0xBA00
-#define PCI_SUBSYS_DEVID_CNF10K_B_PTP 0xBC00
+#define PCI_SUBSYS_DEVID_OCTX2_95XXO_PTP 0xB600
#define PCI_DEVID_OCTEONTX2_RST 0xA085
+#define PCI_DEVID_CN10K_PTP 0xA09E
#define PCI_PTP_BAR_NO 0
#define PCI_RST_BAR_NO 0
@@ -39,6 +39,9 @@
#define RST_MUL_BITS GENMASK_ULL(38, 33)
#define CLOCK_BASE_RATE 50000000ULL
+static struct ptp *first_ptp_block;
+static const struct pci_device_id ptp_id_table[];
+
static u64 get_clock_rate(void)
{
u64 cfg, ret = CLOCK_BASE_RATE * 16;
@@ -74,23 +77,14 @@ error:
struct ptp *ptp_get(void)
{
- struct pci_dev *pdev;
- struct ptp *ptp;
+ struct ptp *ptp = first_ptp_block;
- /* If the PTP pci device is found on the system and ptp
- * driver is bound to it then the PTP pci device is returned
- * to the caller(rvu driver).
- */
- pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
- PCI_DEVID_OCTEONTX2_PTP, NULL);
- if (!pdev)
+ /* Check PTP block is present in hardware */
+ if (!pci_dev_present(ptp_id_table))
return ERR_PTR(-ENODEV);
-
- ptp = pci_get_drvdata(pdev);
+ /* Check driver is bound to PTP block */
if (!ptp)
ptp = ERR_PTR(-EPROBE_DEFER);
- if (IS_ERR(ptp))
- pci_dev_put(pdev);
return ptp;
}
@@ -190,6 +184,8 @@ static int ptp_probe(struct pci_dev *pdev,
writeq(clock_comp, ptp->reg_base + PTP_CLOCK_COMP);
pci_set_drvdata(pdev, ptp);
+ if (!first_ptp_block)
+ first_ptp_block = ptp;
return 0;
@@ -204,6 +200,9 @@ error:
* `dev->driver_data`.
*/
pci_set_drvdata(pdev, ERR_PTR(err));
+ if (!first_ptp_block)
+ first_ptp_block = ERR_PTR(err);
+
return 0;
}
@@ -233,19 +232,14 @@ static const struct pci_device_id ptp_id_table[] = {
PCI_SUBSYS_DEVID_OCTX2_95XX_PTP) },
{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
PCI_VENDOR_ID_CAVIUM,
- PCI_SUBSYS_DEVID_OCTX2_LOKI_PTP) },
+ PCI_SUBSYS_DEVID_OCTX2_95XXN_PTP) },
{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
PCI_VENDOR_ID_CAVIUM,
PCI_SUBSYS_DEVID_OCTX2_95MM_PTP) },
{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
PCI_VENDOR_ID_CAVIUM,
- PCI_SUBSYS_DEVID_CN10K_A_PTP) },
- { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
- PCI_VENDOR_ID_CAVIUM,
- PCI_SUBSYS_DEVID_CNF10K_A_PTP) },
- { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
- PCI_VENDOR_ID_CAVIUM,
- PCI_SUBSYS_DEVID_CNF10K_B_PTP) },
+ PCI_SUBSYS_DEVID_OCTX2_95XXO_PTP) },
+ { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10K_PTP) },
{ 0, }
};
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.h b/drivers/net/ethernet/marvell/octeontx2/af/ptp.h
index 878bc395d28f..76d404b24552 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.h
@@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Marvell PTP driver
*
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
+ *
*/
#ifndef PTP_H
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
index a91ccdc59403..07b0eafccad8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RPM driver
+/* Marvell CN10K RPM driver
*
* Copyright (C) 2020 Marvell.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
index d32e74bd5964..f0b069442dcc 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RPM driver
+/* Marvell CN10K RPM driver
*
* Copyright (C) 2020 Marvell.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 5fe277e354f7..ce647e037f4d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -1,11 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/module.h>
@@ -70,18 +67,21 @@ static void rvu_setup_hw_capabilities(struct rvu *rvu)
hw->cap.nix_shaping = true;
hw->cap.nix_tx_link_bp = true;
hw->cap.nix_rx_multicast = true;
+ hw->cap.nix_shaper_toggle_wait = false;
hw->rvu = rvu;
- if (is_rvu_96xx_B0(rvu)) {
+ if (is_rvu_pre_96xx_C0(rvu)) {
hw->cap.nix_fixed_txschq_mapping = true;
hw->cap.nix_txsch_per_cgx_lmac = 4;
hw->cap.nix_txsch_per_lbk_lmac = 132;
hw->cap.nix_txsch_per_sdp_lmac = 76;
hw->cap.nix_shaping = false;
hw->cap.nix_tx_link_bp = false;
- if (is_rvu_96xx_A0(rvu))
+ if (is_rvu_96xx_A0(rvu) || is_rvu_95xx_A0(rvu))
hw->cap.nix_rx_multicast = false;
}
+ if (!is_rvu_pre_96xx_C0(rvu))
+ hw->cap.nix_shaper_toggle_wait = true;
if (!is_rvu_otx2(rvu))
hw->cap.per_pf_mbox_regs = true;
@@ -498,12 +498,15 @@ int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf)
static void rvu_block_reset(struct rvu *rvu, int blkaddr, u64 rst_reg)
{
struct rvu_block *block = &rvu->hw->block[blkaddr];
+ int err;
if (!block->implemented)
return;
rvu_write64(rvu, blkaddr, rst_reg, BIT_ULL(0));
- rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true);
+ err = rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true);
+ if (err)
+ dev_err(rvu->dev, "HW block:%d reset failed\n", blkaddr);
}
static void rvu_reset_all_blocks(struct rvu *rvu)
@@ -924,16 +927,26 @@ static int rvu_setup_hw_resources(struct rvu *rvu)
block->lfreset_reg = NPA_AF_LF_RST;
sprintf(block->name, "NPA");
err = rvu_alloc_bitmap(&block->lf);
- if (err)
+ if (err) {
+ dev_err(rvu->dev,
+ "%s: Failed to allocate NPA LF bitmap\n", __func__);
return err;
+ }
nix:
err = rvu_setup_nix_hw_resource(rvu, BLKADDR_NIX0);
- if (err)
+ if (err) {
+ dev_err(rvu->dev,
+ "%s: Failed to allocate NIX0 LFs bitmap\n", __func__);
return err;
+ }
+
err = rvu_setup_nix_hw_resource(rvu, BLKADDR_NIX1);
- if (err)
+ if (err) {
+ dev_err(rvu->dev,
+ "%s: Failed to allocate NIX1 LFs bitmap\n", __func__);
return err;
+ }
/* Init SSO group's bitmap */
block = &hw->block[BLKADDR_SSO];
@@ -953,8 +966,11 @@ nix:
block->lfreset_reg = SSO_AF_LF_HWGRP_RST;
sprintf(block->name, "SSO GROUP");
err = rvu_alloc_bitmap(&block->lf);
- if (err)
+ if (err) {
+ dev_err(rvu->dev,
+ "%s: Failed to allocate SSO LF bitmap\n", __func__);
return err;
+ }
ssow:
/* Init SSO workslot's bitmap */
@@ -974,8 +990,11 @@ ssow:
block->lfreset_reg = SSOW_AF_LF_HWS_RST;
sprintf(block->name, "SSOWS");
err = rvu_alloc_bitmap(&block->lf);
- if (err)
+ if (err) {
+ dev_err(rvu->dev,
+ "%s: Failed to allocate SSOW LF bitmap\n", __func__);
return err;
+ }
tim:
/* Init TIM LF's bitmap */
@@ -996,35 +1015,53 @@ tim:
block->lfreset_reg = TIM_AF_LF_RST;
sprintf(block->name, "TIM");
err = rvu_alloc_bitmap(&block->lf);
- if (err)
+ if (err) {
+ dev_err(rvu->dev,
+ "%s: Failed to allocate TIM LF bitmap\n", __func__);
return err;
+ }
cpt:
err = rvu_setup_cpt_hw_resource(rvu, BLKADDR_CPT0);
- if (err)
+ if (err) {
+ dev_err(rvu->dev,
+ "%s: Failed to allocate CPT0 LF bitmap\n", __func__);
return err;
+ }
err = rvu_setup_cpt_hw_resource(rvu, BLKADDR_CPT1);
- if (err)
+ if (err) {
+ dev_err(rvu->dev,
+ "%s: Failed to allocate CPT1 LF bitmap\n", __func__);
return err;
+ }
/* Allocate memory for PFVF data */
rvu->pf = devm_kcalloc(rvu->dev, hw->total_pfs,
sizeof(struct rvu_pfvf), GFP_KERNEL);
- if (!rvu->pf)
+ if (!rvu->pf) {
+ dev_err(rvu->dev,
+ "%s: Failed to allocate memory for PF's rvu_pfvf struct\n", __func__);
return -ENOMEM;
+ }
rvu->hwvf = devm_kcalloc(rvu->dev, hw->total_vfs,
sizeof(struct rvu_pfvf), GFP_KERNEL);
- if (!rvu->hwvf)
+ if (!rvu->hwvf) {
+ dev_err(rvu->dev,
+ "%s: Failed to allocate memory for VF's rvu_pfvf struct\n", __func__);
return -ENOMEM;
+ }
mutex_init(&rvu->rsrc_lock);
rvu_fwdata_init(rvu);
err = rvu_setup_msix_resources(rvu);
- if (err)
+ if (err) {
+ dev_err(rvu->dev,
+ "%s: Failed to setup MSIX resources\n", __func__);
return err;
+ }
for (blkid = 0; blkid < BLK_COUNT; blkid++) {
block = &hw->block[blkid];
@@ -1050,25 +1087,39 @@ cpt:
goto msix_err;
err = rvu_npc_init(rvu);
- if (err)
+ if (err) {
+ dev_err(rvu->dev, "%s: Failed to initialize npc\n", __func__);
goto npc_err;
+ }
err = rvu_cgx_init(rvu);
- if (err)
+ if (err) {
+ dev_err(rvu->dev, "%s: Failed to initialize cgx\n", __func__);
goto cgx_err;
+ }
/* Assign MACs for CGX mapped functions */
rvu_setup_pfvf_macaddress(rvu);
err = rvu_npa_init(rvu);
- if (err)
+ if (err) {
+ dev_err(rvu->dev, "%s: Failed to initialize npa\n", __func__);
goto npa_err;
+ }
rvu_get_lbk_bufsize(rvu);
err = rvu_nix_init(rvu);
- if (err)
+ if (err) {
+ dev_err(rvu->dev, "%s: Failed to initialize nix\n", __func__);
goto nix_err;
+ }
+
+ err = rvu_sdp_init(rvu);
+ if (err) {
+ dev_err(rvu->dev, "%s: Failed to initialize sdp\n", __func__);
+ goto nix_err;
+ }
rvu_program_channels(rvu);
@@ -1322,9 +1373,10 @@ int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc)
int blkaddr = BLKADDR_NIX0, vf;
struct rvu_pfvf *pf;
+ pf = rvu_get_pfvf(rvu, pcifunc & ~RVU_PFVF_FUNC_MASK);
+
/* All CGX mapped PFs are set with assigned NIX block during init */
if (is_pf_cgxmapped(rvu, rvu_get_pf(pcifunc))) {
- pf = rvu_get_pfvf(rvu, pcifunc & ~RVU_PFVF_FUNC_MASK);
blkaddr = pf->nix_blkaddr;
} else if (is_afvf(pcifunc)) {
vf = pcifunc - 1;
@@ -1337,6 +1389,10 @@ int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc)
blkaddr = BLKADDR_NIX0;
}
+ /* if SDP1 then the blkaddr is NIX1 */
+ if (is_sdp_pfvf(pcifunc) && pf->sdp_info->node_id == 1)
+ blkaddr = BLKADDR_NIX1;
+
switch (blkaddr) {
case BLKADDR_NIX1:
pfvf->nix_blkaddr = BLKADDR_NIX1;
@@ -1737,6 +1793,99 @@ int rvu_mbox_handler_msix_offset(struct rvu *rvu, struct msg_req *req,
return 0;
}
+int rvu_mbox_handler_free_rsrc_cnt(struct rvu *rvu, struct msg_req *req,
+ struct free_rsrcs_rsp *rsp)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ struct rvu_block *block;
+ struct nix_txsch *txsch;
+ struct nix_hw *nix_hw;
+
+ mutex_lock(&rvu->rsrc_lock);
+
+ block = &hw->block[BLKADDR_NPA];
+ rsp->npa = rvu_rsrc_free_count(&block->lf);
+
+ block = &hw->block[BLKADDR_NIX0];
+ rsp->nix = rvu_rsrc_free_count(&block->lf);
+
+ block = &hw->block[BLKADDR_NIX1];
+ rsp->nix1 = rvu_rsrc_free_count(&block->lf);
+
+ block = &hw->block[BLKADDR_SSO];
+ rsp->sso = rvu_rsrc_free_count(&block->lf);
+
+ block = &hw->block[BLKADDR_SSOW];
+ rsp->ssow = rvu_rsrc_free_count(&block->lf);
+
+ block = &hw->block[BLKADDR_TIM];
+ rsp->tim = rvu_rsrc_free_count(&block->lf);
+
+ block = &hw->block[BLKADDR_CPT0];
+ rsp->cpt = rvu_rsrc_free_count(&block->lf);
+
+ block = &hw->block[BLKADDR_CPT1];
+ rsp->cpt1 = rvu_rsrc_free_count(&block->lf);
+
+ if (rvu->hw->cap.nix_fixed_txschq_mapping) {
+ rsp->schq[NIX_TXSCH_LVL_SMQ] = 1;
+ rsp->schq[NIX_TXSCH_LVL_TL4] = 1;
+ rsp->schq[NIX_TXSCH_LVL_TL3] = 1;
+ rsp->schq[NIX_TXSCH_LVL_TL2] = 1;
+ /* NIX1 */
+ if (!is_block_implemented(rvu->hw, BLKADDR_NIX1))
+ goto out;
+ rsp->schq_nix1[NIX_TXSCH_LVL_SMQ] = 1;
+ rsp->schq_nix1[NIX_TXSCH_LVL_TL4] = 1;
+ rsp->schq_nix1[NIX_TXSCH_LVL_TL3] = 1;
+ rsp->schq_nix1[NIX_TXSCH_LVL_TL2] = 1;
+ } else {
+ nix_hw = get_nix_hw(hw, BLKADDR_NIX0);
+ txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ];
+ rsp->schq[NIX_TXSCH_LVL_SMQ] =
+ rvu_rsrc_free_count(&txsch->schq);
+
+ txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL4];
+ rsp->schq[NIX_TXSCH_LVL_TL4] =
+ rvu_rsrc_free_count(&txsch->schq);
+
+ txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL3];
+ rsp->schq[NIX_TXSCH_LVL_TL3] =
+ rvu_rsrc_free_count(&txsch->schq);
+
+ txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL2];
+ rsp->schq[NIX_TXSCH_LVL_TL2] =
+ rvu_rsrc_free_count(&txsch->schq);
+
+ if (!is_block_implemented(rvu->hw, BLKADDR_NIX1))
+ goto out;
+
+ nix_hw = get_nix_hw(hw, BLKADDR_NIX1);
+ txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ];
+ rsp->schq_nix1[NIX_TXSCH_LVL_SMQ] =
+ rvu_rsrc_free_count(&txsch->schq);
+
+ txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL4];
+ rsp->schq_nix1[NIX_TXSCH_LVL_TL4] =
+ rvu_rsrc_free_count(&txsch->schq);
+
+ txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL3];
+ rsp->schq_nix1[NIX_TXSCH_LVL_TL3] =
+ rvu_rsrc_free_count(&txsch->schq);
+
+ txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL2];
+ rsp->schq_nix1[NIX_TXSCH_LVL_TL2] =
+ rvu_rsrc_free_count(&txsch->schq);
+ }
+
+ rsp->schq_nix1[NIX_TXSCH_LVL_TL1] = 1;
+out:
+ rsp->schq[NIX_TXSCH_LVL_TL1] = 1;
+ mutex_unlock(&rvu->rsrc_lock);
+
+ return 0;
+}
+
int rvu_mbox_handler_vf_flr(struct rvu *rvu, struct msg_req *req,
struct msg_rsp *rsp)
{
@@ -2402,11 +2551,12 @@ static void rvu_afvf_queue_flr_work(struct rvu *rvu, int start_vf, int numvfs)
for (vf = 0; vf < numvfs; vf++) {
if (!(intr & BIT_ULL(vf)))
continue;
- dev = vf + start_vf + rvu->hw->total_pfs;
- queue_work(rvu->flr_wq, &rvu->flr_wrk[dev].work);
/* Clear and disable the interrupt */
rvupf_write64(rvu, RVU_PF_VFFLR_INTX(reg), BIT_ULL(vf));
rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1CX(reg), BIT_ULL(vf));
+
+ dev = vf + start_vf + rvu->hw->total_pfs;
+ queue_work(rvu->flr_wq, &rvu->flr_wrk[dev].work);
}
}
@@ -2422,14 +2572,14 @@ static irqreturn_t rvu_flr_intr_handler(int irq, void *rvu_irq)
for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
if (intr & (1ULL << pf)) {
- /* PF is already dead do only AF related operations */
- queue_work(rvu->flr_wq, &rvu->flr_wrk[pf].work);
/* clear interrupt */
rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT,
BIT_ULL(pf));
/* Disable the interrupt */
rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT_ENA_W1C,
BIT_ULL(pf));
+ /* PF is already dead do only AF related operations */
+ queue_work(rvu->flr_wq, &rvu->flr_wrk[pf].work);
}
}
@@ -2984,27 +3134,37 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
err = rvu_mbox_init(rvu, &rvu->afpf_wq_info, TYPE_AFPF,
rvu->hw->total_pfs, rvu_afpf_mbox_handler,
rvu_afpf_mbox_up_handler);
- if (err)
+ if (err) {
+ dev_err(dev, "%s: Failed to initialize mbox\n", __func__);
goto err_hwsetup;
+ }
err = rvu_flr_init(rvu);
- if (err)
+ if (err) {
+ dev_err(dev, "%s: Failed to initialize flr\n", __func__);
goto err_mbox;
+ }
err = rvu_register_interrupts(rvu);
- if (err)
+ if (err) {
+ dev_err(dev, "%s: Failed to register interrupts\n", __func__);
goto err_flr;
+ }
err = rvu_register_dl(rvu);
- if (err)
+ if (err) {
+ dev_err(dev, "%s: Failed to register devlink\n", __func__);
goto err_irq;
+ }
rvu_setup_rvum_blk_revid(rvu);
/* Enable AF's VFs (if any) */
err = rvu_enable_sriov(rvu);
- if (err)
+ if (err) {
+ dev_err(dev, "%s: Failed to enable sriov\n", __func__);
goto err_dl;
+ }
/* Initialize debugfs */
rvu_dbg_init(rvu);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 91503fb2762c..d38e5c980c30 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -1,11 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef RVU_H
@@ -243,8 +240,11 @@ struct rvu_pfvf {
u8 nix_blkaddr; /* BLKADDR_NIX0/1 assigned to this PF */
u8 nix_rx_intf; /* NIX0_RX/NIX1_RX interface to NPC */
u8 nix_tx_intf; /* NIX0_TX/NIX1_TX interface to NPC */
+ u8 lbkid; /* NIX0/1 lbk link ID */
u64 lmt_base_addr; /* Preseving the pcifunc's lmtst base addr*/
+ u64 lmt_map_ent_w1; /* Preseving the word1 of lmtst map table entry*/
unsigned long flags;
+ struct sdp_node_info *sdp_info;
};
enum rvu_pfvf_flags {
@@ -314,6 +314,7 @@ struct nix_hw {
struct nix_lso lso;
struct nix_txvlan txvlan;
struct nix_ipolicer *ipolicer;
+ u64 *tx_credits;
};
/* RVU block's capabilities or functionality,
@@ -327,8 +328,10 @@ struct hw_cap {
u16 nix_txsch_per_sdp_lmac; /* Max Q's transmitting to SDP LMAC */
bool nix_fixed_txschq_mapping; /* Schq mapping fixed or flexible */
bool nix_shaping; /* Is shaping and coloring supported */
+ bool nix_shaper_toggle_wait; /* Shaping toggle needs poll/wait */
bool nix_tx_link_bp; /* Can link backpressure TL queues ? */
bool nix_rx_multicast; /* Rx packet replication support */
+ bool nix_common_dwrr_mtu; /* Common DWRR MTU for quantum config */
bool per_pf_mbox_regs; /* PF mbox specified in per PF registers ? */
bool programmable_chans; /* Channels programmable ? */
bool ipolicer;
@@ -355,6 +358,7 @@ struct rvu_hwinfo {
u16 npc_counters; /* No of match stats counters */
u32 lbk_bufsize; /* FIFO size supported by LBK */
bool npc_ext_set; /* Extended register set */
+ u64 npc_stat_ena; /* Match stats enable bit */
struct hw_cap cap;
struct rvu_block block[BLK_COUNT]; /* Block info */
@@ -514,20 +518,34 @@ static inline u64 rvupf_read64(struct rvu *rvu, u64 offset)
}
/* Silicon revisions */
+static inline bool is_rvu_pre_96xx_C0(struct rvu *rvu)
+{
+ struct pci_dev *pdev = rvu->pdev;
+ /* 96XX A0/B0, 95XX A0/A1/B0 chips */
+ return ((pdev->revision == 0x00) || (pdev->revision == 0x01) ||
+ (pdev->revision == 0x10) || (pdev->revision == 0x11) ||
+ (pdev->revision == 0x14));
+}
+
static inline bool is_rvu_96xx_A0(struct rvu *rvu)
{
struct pci_dev *pdev = rvu->pdev;
- return (pdev->revision == 0x00) &&
- (pdev->subsystem_device == PCI_SUBSYS_DEVID_96XX);
+ return (pdev->revision == 0x00);
}
static inline bool is_rvu_96xx_B0(struct rvu *rvu)
{
struct pci_dev *pdev = rvu->pdev;
- return ((pdev->revision == 0x00) || (pdev->revision == 0x01)) &&
- (pdev->subsystem_device == PCI_SUBSYS_DEVID_96XX);
+ return (pdev->revision == 0x00) || (pdev->revision == 0x01);
+}
+
+static inline bool is_rvu_95xx_A0(struct rvu *rvu)
+{
+ struct pci_dev *pdev = rvu->pdev;
+
+ return (pdev->revision == 0x10) || (pdev->revision == 0x11);
}
/* REVID for PCIe devices.
@@ -536,9 +554,10 @@ static inline bool is_rvu_96xx_B0(struct rvu *rvu)
*/
#define PCI_REVISION_ID_96XX 0x00
#define PCI_REVISION_ID_95XX 0x10
-#define PCI_REVISION_ID_LOKI 0x20
+#define PCI_REVISION_ID_95XXN 0x20
#define PCI_REVISION_ID_98XX 0x30
#define PCI_REVISION_ID_95XXMM 0x40
+#define PCI_REVISION_ID_95XXO 0xE0
static inline bool is_rvu_otx2(struct rvu *rvu)
{
@@ -547,8 +566,8 @@ static inline bool is_rvu_otx2(struct rvu *rvu)
u8 midr = pdev->revision & 0xF0;
return (midr == PCI_REVISION_ID_96XX || midr == PCI_REVISION_ID_95XX ||
- midr == PCI_REVISION_ID_LOKI || midr == PCI_REVISION_ID_98XX ||
- midr == PCI_REVISION_ID_95XXMM);
+ midr == PCI_REVISION_ID_95XXN || midr == PCI_REVISION_ID_98XX ||
+ midr == PCI_REVISION_ID_95XXMM || midr == PCI_REVISION_ID_95XXO);
}
static inline u16 rvu_nix_chan_cgx(struct rvu *rvu, u8 cgxid,
@@ -578,6 +597,16 @@ static inline u16 rvu_nix_chan_lbk(struct rvu *rvu, u8 lbkid,
return rvu->hw->lbk_chan_base + lbkid * lbk_chans + chan;
}
+static inline u16 rvu_nix_chan_sdp(struct rvu *rvu, u8 chan)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+
+ if (!hw->cap.programmable_chans)
+ return NIX_CHAN_SDP_CHX(chan);
+
+ return hw->sdp_chan_base + chan;
+}
+
static inline u16 rvu_nix_chan_cpt(struct rvu *rvu, u8 chan)
{
return rvu->hw->cpt_chan_base + chan;
@@ -640,10 +669,17 @@ int rvu_aq_alloc(struct rvu *rvu, struct admin_queue **ad_queue,
int qsize, int inst_size, int res_size);
void rvu_aq_free(struct rvu *rvu, struct admin_queue *aq);
+/* SDP APIs */
+int rvu_sdp_init(struct rvu *rvu);
+bool is_sdp_pfvf(u16 pcifunc);
+bool is_sdp_pf(u16 pcifunc);
+bool is_sdp_vf(u16 pcifunc);
+
/* CGX APIs */
static inline bool is_pf_cgxmapped(struct rvu *rvu, u8 pf)
{
- return (pf >= PF_CGXMAP_BASE && pf <= rvu->cgx_mapped_pfs);
+ return (pf >= PF_CGXMAP_BASE && pf <= rvu->cgx_mapped_pfs) &&
+ !is_sdp_pf(pf << RVU_PFVF_PF_SHIFT);
}
static inline void rvu_get_cgx_lmac_id(u8 map, u8 *cgx_id, u8 *lmac_id)
@@ -706,6 +742,8 @@ int nix_aq_context_read(struct rvu *rvu, struct nix_hw *nix_hw,
struct nix_cn10k_aq_enq_rsp *aq_rsp,
u16 pcifunc, u8 ctype, u32 qidx);
int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc);
+u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu);
+u32 convert_bytes_to_dwrr_mtu(u32 bytes);
/* NPC APIs */
int rvu_npc_init(struct rvu *rvu);
@@ -745,7 +783,6 @@ bool is_npc_intf_tx(u8 intf);
bool is_npc_intf_rx(u8 intf);
bool is_npc_interface_valid(struct rvu *rvu, u8 intf);
int rvu_npc_get_tx_nibble_cfg(struct rvu *rvu, u64 nibble_ena);
-int npc_mcam_verify_channel(struct rvu *rvu, u16 pcifunc, u8 intf, u16 channel);
int npc_flow_steering_init(struct rvu *rvu, int blkaddr);
const char *npc_get_field_name(u8 hdr);
int npc_get_bank(struct npc_mcam *mcam, int index);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index fe99ac4a4dd8..81e8ea9ee30e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -1,11 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/types.h>
@@ -448,7 +445,7 @@ int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start)
u8 cgx_id, lmac_id;
if (!is_cgx_config_permitted(rvu, pcifunc))
- return -EPERM;
+ return LMAC_AF_ERR_PERM_DENIED;
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
@@ -507,7 +504,7 @@ static int rvu_lmac_get_stats(struct rvu *rvu, struct msg_req *req,
void *cgxd;
if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc))
- return -ENODEV;
+ return LMAC_AF_ERR_PERM_DENIED;
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac);
cgxd = rvu_cgx_pdata(cgx_idx, rvu);
@@ -561,7 +558,7 @@ int rvu_mbox_handler_cgx_fec_stats(struct rvu *rvu,
void *cgxd;
if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc))
- return -EPERM;
+ return LMAC_AF_ERR_PERM_DENIED;
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac);
cgxd = rvu_cgx_pdata(cgx_idx, rvu);
@@ -888,7 +885,7 @@ int rvu_mbox_handler_cgx_get_phy_fec_stats(struct rvu *rvu, struct msg_req *req,
u8 cgx_id, lmac_id;
if (!is_pf_cgxmapped(rvu, pf))
- return -EPERM;
+ return LMAC_AF_ERR_PF_NOT_MAPPED;
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
return cgx_get_phy_fec_stats(rvu_cgx_pdata(cgx_id, rvu), lmac_id);
@@ -1046,7 +1043,7 @@ int rvu_mbox_handler_cgx_mac_addr_reset(struct rvu *rvu, struct msg_req *req,
u8 cgx_id, lmac_id;
if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc))
- return -EPERM;
+ return LMAC_AF_ERR_PERM_DENIED;
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
return cgx_lmac_addr_reset(cgx_id, lmac_id);
@@ -1060,7 +1057,7 @@ int rvu_mbox_handler_cgx_mac_addr_update(struct rvu *rvu,
u8 cgx_id, lmac_id;
if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc))
- return -EPERM;
+ return LMAC_AF_ERR_PERM_DENIED;
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
return cgx_lmac_addr_update(cgx_id, lmac_id, req->mac_addr, req->index);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
index 8d48b64485c6..46a41cfff575 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell RPM CN10K driver
+/* Marvell RPM CN10K driver
*
* Copyright (C) 2020 Marvell.
*/
@@ -49,6 +49,7 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val,
return 0;
}
+#define LMT_MAP_TBL_W1_OFF 8
static u32 rvu_get_lmtst_tbl_index(struct rvu *rvu, u16 pcifunc)
{
return ((rvu_get_pf(pcifunc) * rvu->hw->total_vfs) +
@@ -82,10 +83,10 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc,
dev_err(rvu->dev, "%s LMTLINE iova transulation failed err:%llx\n", __func__, val);
return -EIO;
}
- /* PA[51:12] = RVU_AF_SMMU_TLN_FLIT1[60:21]
+ /* PA[51:12] = RVU_AF_SMMU_TLN_FLIT0[57:18]
* PA[11:0] = IOVA[11:0]
*/
- pa = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TLN_FLIT1) >> 21;
+ pa = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TLN_FLIT0) >> 18;
pa &= GENMASK_ULL(39, 0);
*lmt_addr = (pa << 12) | (iova & 0xFFF);
@@ -131,9 +132,11 @@ int rvu_mbox_handler_lmtst_tbl_setup(struct rvu *rvu,
struct lmtst_tbl_setup_req *req,
struct msg_rsp *rsp)
{
- u64 lmt_addr, val;
- u32 pri_tbl_idx;
+ struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
+ u32 pri_tbl_idx, tbl_idx;
+ u64 lmt_addr;
int err = 0;
+ u64 val;
/* Check if PF_FUNC wants to use it's own local memory as LMTLINE
* region, if so, convert that IOVA to physical address and
@@ -170,7 +173,7 @@ int rvu_mbox_handler_lmtst_tbl_setup(struct rvu *rvu,
dev_err(rvu->dev,
"Failed to read LMT map table: index 0x%x err %d\n",
pri_tbl_idx, err);
- return err;
+ goto error;
}
/* Update the base lmt addr of secondary with primary's base
@@ -181,7 +184,53 @@ int rvu_mbox_handler_lmtst_tbl_setup(struct rvu *rvu,
return err;
}
- return 0;
+ /* This mailbox can also be used to update word1 of APR_LMT_MAP_ENTRY_S
+ * like enabling scheduled LMTST, disable LMTLINE prefetch, disable
+ * early completion for ordered LMTST.
+ */
+ if (req->sch_ena || req->dis_sched_early_comp || req->dis_line_pref) {
+ tbl_idx = rvu_get_lmtst_tbl_index(rvu, req->hdr.pcifunc);
+ err = lmtst_map_table_ops(rvu, tbl_idx + LMT_MAP_TBL_W1_OFF,
+ &val, LMT_TBL_OP_READ);
+ if (err) {
+ dev_err(rvu->dev,
+ "Failed to read LMT map table: index 0x%x err %d\n",
+ tbl_idx + LMT_MAP_TBL_W1_OFF, err);
+ goto error;
+ }
+
+ /* Storing lmt map table entry word1 default value as this needs
+ * to be reverted in FLR. Also making sure this default value
+ * doesn't get overwritten on multiple calls to this mailbox.
+ */
+ if (!pfvf->lmt_map_ent_w1)
+ pfvf->lmt_map_ent_w1 = val;
+
+ /* Disable early completion for Ordered LMTSTs. */
+ if (req->dis_sched_early_comp)
+ val |= (req->dis_sched_early_comp <<
+ APR_LMT_MAP_ENT_DIS_SCH_CMP_SHIFT);
+ /* Enable scheduled LMTST */
+ if (req->sch_ena)
+ val |= (req->sch_ena << APR_LMT_MAP_ENT_SCH_ENA_SHIFT) |
+ req->ssow_pf_func;
+ /* Disables LMTLINE prefetch before receiving store data. */
+ if (req->dis_line_pref)
+ val |= (req->dis_line_pref <<
+ APR_LMT_MAP_ENT_DIS_LINE_PREF_SHIFT);
+
+ err = lmtst_map_table_ops(rvu, tbl_idx + LMT_MAP_TBL_W1_OFF,
+ &val, LMT_TBL_OP_WRITE);
+ if (err) {
+ dev_err(rvu->dev,
+ "Failed to update LMT map table: index 0x%x err %d\n",
+ tbl_idx + LMT_MAP_TBL_W1_OFF, err);
+ goto error;
+ }
+ }
+
+error:
+ return err;
}
/* Resetting the lmtst map table to original base addresses */
@@ -194,27 +243,45 @@ void rvu_reset_lmt_map_tbl(struct rvu *rvu, u16 pcifunc)
if (is_rvu_otx2(rvu))
return;
- if (pfvf->lmt_base_addr) {
+ if (pfvf->lmt_base_addr || pfvf->lmt_map_ent_w1) {
/* This corresponds to lmt map table index */
tbl_idx = rvu_get_lmtst_tbl_index(rvu, pcifunc);
/* Reverting back original lmt base addr for respective
* pcifunc.
*/
- err = lmtst_map_table_ops(rvu, tbl_idx, &pfvf->lmt_base_addr,
- LMT_TBL_OP_WRITE);
- if (err)
- dev_err(rvu->dev,
- "Failed to update LMT map table: index 0x%x err %d\n",
- tbl_idx, err);
- pfvf->lmt_base_addr = 0;
+ if (pfvf->lmt_base_addr) {
+ err = lmtst_map_table_ops(rvu, tbl_idx,
+ &pfvf->lmt_base_addr,
+ LMT_TBL_OP_WRITE);
+ if (err)
+ dev_err(rvu->dev,
+ "Failed to update LMT map table: index 0x%x err %d\n",
+ tbl_idx, err);
+ pfvf->lmt_base_addr = 0;
+ }
+ /* Reverting back to orginal word1 val of lmtst map table entry
+ * which underwent changes.
+ */
+ if (pfvf->lmt_map_ent_w1) {
+ err = lmtst_map_table_ops(rvu,
+ tbl_idx + LMT_MAP_TBL_W1_OFF,
+ &pfvf->lmt_map_ent_w1,
+ LMT_TBL_OP_WRITE);
+ if (err)
+ dev_err(rvu->dev,
+ "Failed to update LMT map table: index 0x%x err %d\n",
+ tbl_idx + LMT_MAP_TBL_W1_OFF, err);
+ pfvf->lmt_map_ent_w1 = 0;
+ }
}
}
int rvu_set_channels_base(struct rvu *rvu)
{
+ u16 nr_lbk_chans, nr_sdp_chans, nr_cgx_chans, nr_cpt_chans;
+ u16 sdp_chan_base, cgx_chan_base, cpt_chan_base;
struct rvu_hwinfo *hw = rvu->hw;
- u16 cpt_chan_base;
- u64 nix_const;
+ u64 nix_const, nix_const1;
int blkaddr;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
@@ -222,6 +289,7 @@ int rvu_set_channels_base(struct rvu *rvu)
return blkaddr;
nix_const = rvu_read64(rvu, blkaddr, NIX_AF_CONST);
+ nix_const1 = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
hw->cgx = (nix_const >> 12) & 0xFULL;
hw->lmac_per_cgx = (nix_const >> 8) & 0xFULL;
@@ -244,14 +312,24 @@ int rvu_set_channels_base(struct rvu *rvu)
* channels such that all channel numbers are contiguous
* leaving no holes. This way the new CPT channels can be
* accomodated. The order of channel numbers assigned is
- * LBK, SDP, CGX and CPT.
+ * LBK, SDP, CGX and CPT. Also the base channel number
+ * of a block must be multiple of number of channels
+ * of the block.
*/
- hw->sdp_chan_base = hw->lbk_chan_base + hw->lbk_links *
- ((nix_const >> 16) & 0xFFULL);
- hw->cgx_chan_base = hw->sdp_chan_base + hw->sdp_links * SDP_CHANNELS;
+ nr_lbk_chans = (nix_const >> 16) & 0xFFULL;
+ nr_sdp_chans = nix_const1 & 0xFFFULL;
+ nr_cgx_chans = nix_const & 0xFFULL;
+ nr_cpt_chans = (nix_const >> 32) & 0xFFFULL;
+
+ sdp_chan_base = hw->lbk_chan_base + hw->lbk_links * nr_lbk_chans;
+ /* Round up base channel to multiple of number of channels */
+ hw->sdp_chan_base = ALIGN(sdp_chan_base, nr_sdp_chans);
+
+ cgx_chan_base = hw->sdp_chan_base + hw->sdp_links * nr_sdp_chans;
+ hw->cgx_chan_base = ALIGN(cgx_chan_base, nr_cgx_chans);
- cpt_chan_base = hw->cgx_chan_base + hw->cgx_links *
- (nix_const & 0xFFULL);
+ cpt_chan_base = hw->cgx_chan_base + hw->cgx_links * nr_cgx_chans;
+ hw->cpt_chan_base = ALIGN(cpt_chan_base, nr_cpt_chans);
/* Out of 4096 channels start CPT from 2048 so
* that MSB for CPT channels is always set
@@ -355,6 +433,7 @@ err_put:
static void __rvu_nix_set_channels(struct rvu *rvu, int blkaddr)
{
+ u64 nix_const1 = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
u64 nix_const = rvu_read64(rvu, blkaddr, NIX_AF_CONST);
u16 cgx_chans, lbk_chans, sdp_chans, cpt_chans;
struct rvu_hwinfo *hw = rvu->hw;
@@ -364,7 +443,7 @@ static void __rvu_nix_set_channels(struct rvu *rvu, int blkaddr)
cgx_chans = nix_const & 0xFFULL;
lbk_chans = (nix_const >> 16) & 0xFFULL;
- sdp_chans = SDP_CHANNELS;
+ sdp_chans = nix_const1 & 0xFFFULL;
cpt_chans = (nix_const >> 32) & 0xFFFULL;
start = hw->cgx_chan_base;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
index 89253f7bdadb..1f90a7403392 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
@@ -1,5 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2020 Marvell. */
+/* Marvell RVU Admin Function driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
#include <linux/bitfield.h>
#include <linux/pci.h>
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index 9b2dfbf90e51..9338765da048 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -1,11 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
- * Copyright (C) 2019 Marvell International Ltd.
+ * Copyright (C) 2019 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
index 2688186066d9..274d3abe30eb 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Devlink
+/* Marvell RVU Admin Function Devlink
*
* Copyright (C) 2020 Marvell.
*
@@ -1364,6 +1364,89 @@ static void rvu_health_reporters_destroy(struct rvu *rvu)
rvu_nix_health_reporters_destroy(rvu_dl);
}
+/* Devlink Params APIs */
+static int rvu_af_dl_dwrr_mtu_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+ struct rvu *rvu = rvu_dl->rvu;
+ int dwrr_mtu = val.vu32;
+ struct nix_txsch *txsch;
+ struct nix_hw *nix_hw;
+
+ if (!rvu->hw->cap.nix_common_dwrr_mtu) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Setting DWRR_MTU is not supported on this silicon");
+ return -EOPNOTSUPP;
+ }
+
+ if ((dwrr_mtu > 65536 || !is_power_of_2(dwrr_mtu)) &&
+ (dwrr_mtu != 9728 && dwrr_mtu != 10240)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Invalid, supported MTUs are 0,2,4,8.16,32,64....4K,8K,32K,64K and 9728, 10240");
+ return -EINVAL;
+ }
+
+ nix_hw = get_nix_hw(rvu->hw, BLKADDR_NIX0);
+ if (!nix_hw)
+ return -ENODEV;
+
+ txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ];
+ if (rvu_rsrc_free_count(&txsch->schq) != txsch->schq.max) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Changing DWRR MTU is not supported when there are active NIXLFs");
+ NL_SET_ERR_MSG_MOD(extack,
+ "Make sure none of the PF/VF interfaces are initialized and retry");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int rvu_af_dl_dwrr_mtu_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+ struct rvu *rvu = rvu_dl->rvu;
+ u64 dwrr_mtu;
+
+ dwrr_mtu = convert_bytes_to_dwrr_mtu(ctx->val.vu32);
+ rvu_write64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU, dwrr_mtu);
+
+ return 0;
+}
+
+static int rvu_af_dl_dwrr_mtu_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+ struct rvu *rvu = rvu_dl->rvu;
+ u64 dwrr_mtu;
+
+ if (!rvu->hw->cap.nix_common_dwrr_mtu)
+ return -EOPNOTSUPP;
+
+ dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU);
+ ctx->val.vu32 = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
+ return 0;
+}
+
+enum rvu_af_dl_param_id {
+ RVU_AF_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+ RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU,
+};
+
+static const struct devlink_param rvu_af_dl_params[] = {
+ DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU,
+ "dwrr_mtu", DEVLINK_PARAM_TYPE_U32,
+ BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ rvu_af_dl_dwrr_mtu_get, rvu_af_dl_dwrr_mtu_set,
+ rvu_af_dl_dwrr_mtu_validate),
+};
+
+/* Devlink switch mode */
static int rvu_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
{
struct rvu_devlink *rvu_dl = devlink_priv(devlink);
@@ -1420,13 +1503,14 @@ int rvu_register_dl(struct rvu *rvu)
struct devlink *dl;
int err;
- dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink));
+ dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink),
+ rvu->dev);
if (!dl) {
dev_warn(rvu->dev, "devlink_alloc failed\n");
return -ENOMEM;
}
- err = devlink_register(dl, rvu->dev);
+ err = devlink_register(dl);
if (err) {
dev_err(rvu->dev, "devlink register failed with error %d\n", err);
devlink_free(dl);
@@ -1438,7 +1522,30 @@ int rvu_register_dl(struct rvu *rvu)
rvu_dl->rvu = rvu;
rvu->rvu_dl = rvu_dl;
- return rvu_health_reporters_create(rvu);
+ err = rvu_health_reporters_create(rvu);
+ if (err) {
+ dev_err(rvu->dev,
+ "devlink health reporter creation failed with error %d\n", err);
+ goto err_dl_health;
+ }
+
+ err = devlink_params_register(dl, rvu_af_dl_params,
+ ARRAY_SIZE(rvu_af_dl_params));
+ if (err) {
+ dev_err(rvu->dev,
+ "devlink params register failed with error %d", err);
+ goto err_dl_health;
+ }
+
+ devlink_params_publish(dl);
+
+ return 0;
+
+err_dl_health:
+ rvu_health_reporters_destroy(rvu);
+ devlink_unregister(dl);
+ devlink_free(dl);
+ return err;
}
void rvu_unregister_dl(struct rvu *rvu)
@@ -1449,6 +1556,8 @@ void rvu_unregister_dl(struct rvu *rvu)
if (!dl)
return;
+ devlink_params_unregister(dl, rvu_af_dl_params,
+ ARRAY_SIZE(rvu_af_dl_params));
rvu_health_reporters_destroy(rvu);
devlink_unregister(dl);
devlink_free(dl);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h
index 471e57dedb20..51efe88dce11 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Devlink
+/* Marvell RVU Admin Function Devlink
*
* Copyright (C) 2020 Marvell.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 4bfbbdf38770..9ef4e942e31e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -1,11 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/module.h>
@@ -25,7 +22,7 @@ static int nix_update_mce_rule(struct rvu *rvu, u16 pcifunc,
int type, bool add);
static int nix_setup_ipolicers(struct rvu *rvu,
struct nix_hw *nix_hw, int blkaddr);
-static void nix_ipolicer_freemem(struct nix_hw *nix_hw);
+static void nix_ipolicer_freemem(struct rvu *rvu, struct nix_hw *nix_hw);
static int nix_verify_bandprof(struct nix_cn10k_aq_enq_req *req,
struct nix_hw *nix_hw, u16 pcifunc);
static int nix_free_all_bandprof(struct rvu *rvu, u16 pcifunc);
@@ -192,6 +189,47 @@ struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr)
return NULL;
}
+u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu)
+{
+ dwrr_mtu &= 0x1FULL;
+
+ /* MTU used for DWRR calculation is in power of 2 up until 64K bytes.
+ * Value of 4 is reserved for MTU value of 9728 bytes.
+ * Value of 5 is reserved for MTU value of 10240 bytes.
+ */
+ switch (dwrr_mtu) {
+ case 4:
+ return 9728;
+ case 5:
+ return 10240;
+ default:
+ return BIT_ULL(dwrr_mtu);
+ }
+
+ return 0;
+}
+
+u32 convert_bytes_to_dwrr_mtu(u32 bytes)
+{
+ /* MTU used for DWRR calculation is in power of 2 up until 64K bytes.
+ * Value of 4 is reserved for MTU value of 9728 bytes.
+ * Value of 5 is reserved for MTU value of 10240 bytes.
+ */
+ if (bytes > BIT_ULL(16))
+ return 0;
+
+ switch (bytes) {
+ case 9728:
+ return 4;
+ case 10240:
+ return 5;
+ default:
+ return ilog2(bytes);
+ }
+
+ return 0;
+}
+
static void nix_rx_sync(struct rvu *rvu, int blkaddr)
{
int err;
@@ -249,16 +287,22 @@ static bool is_valid_txschq(struct rvu *rvu, int blkaddr,
return true;
}
-static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
+static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf,
+ struct nix_lf_alloc_rsp *rsp, bool loop)
{
- struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+ struct rvu_pfvf *parent_pf, *pfvf = rvu_get_pfvf(rvu, pcifunc);
+ u16 req_chan_base, req_chan_end, req_chan_cnt;
+ struct rvu_hwinfo *hw = rvu->hw;
+ struct sdp_node_info *sdp_info;
+ int pkind, pf, vf, lbkid, vfid;
struct mac_ops *mac_ops;
- int pkind, pf, vf, lbkid;
u8 cgx_id, lmac_id;
+ bool from_vf;
int err;
pf = rvu_get_pf(pcifunc);
- if (!is_pf_cgxmapped(rvu, pf) && type != NIX_INTF_TYPE_LBK)
+ if (!is_pf_cgxmapped(rvu, pf) && type != NIX_INTF_TYPE_LBK &&
+ type != NIX_INTF_TYPE_SDP)
return 0;
switch (type) {
@@ -276,10 +320,13 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
pfvf->tx_chan_base = pfvf->rx_chan_base;
pfvf->rx_chan_cnt = 1;
pfvf->tx_chan_cnt = 1;
+ rsp->tx_link = cgx_id * hw->lmac_per_cgx + lmac_id;
+
cgx_set_pkind(rvu_cgx_pdata(cgx_id, rvu), lmac_id, pkind);
rvu_npc_set_pkind(rvu, pkind, pfvf);
mac_ops = get_mac_ops(rvu_cgx_pdata(cgx_id, rvu));
+
/* By default we enable pause frames */
if ((pcifunc & RVU_PFVF_FUNC_MASK) == 0)
mac_ops->mac_enadis_pause_frm(rvu_cgx_pdata(cgx_id,
@@ -299,6 +346,25 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
if (rvu->hw->lbk_links > 1)
lbkid = vf & 0x1 ? 0 : 1;
+ /* By default NIX0 is configured to send packet on lbk link 1
+ * (which corresponds to LBK1), same packet will receive on
+ * NIX1 over lbk link 0. If NIX1 sends packet on lbk link 0
+ * (which corresponds to LBK2) packet will receive on NIX0 lbk
+ * link 1.
+ * But if lbk links for NIX0 and NIX1 are negated, i.e NIX0
+ * transmits and receives on lbk link 0, whick corresponds
+ * to LBK1 block, back to back connectivity between NIX and
+ * LBK can be achieved (which is similar to 96xx)
+ *
+ * RX TX
+ * NIX0 lbk link 1 (LBK2) 1 (LBK1)
+ * NIX0 lbk link 0 (LBK0) 0 (LBK0)
+ * NIX1 lbk link 0 (LBK1) 0 (LBK2)
+ * NIX1 lbk link 1 (LBK3) 1 (LBK3)
+ */
+ if (loop)
+ lbkid = !lbkid;
+
/* Note that AF's VFs work in pairs and talk over consecutive
* loopback channels.Therefore if odd number of AF VFs are
* enabled then the last VF remains with no pair.
@@ -309,10 +375,51 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
rvu_nix_chan_lbk(rvu, lbkid, vf + 1);
pfvf->rx_chan_cnt = 1;
pfvf->tx_chan_cnt = 1;
+ rsp->tx_link = hw->cgx_links + lbkid;
+ pfvf->lbkid = lbkid;
rvu_npc_set_pkind(rvu, NPC_RX_LBK_PKIND, pfvf);
rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
pfvf->rx_chan_base,
pfvf->rx_chan_cnt);
+
+ break;
+ case NIX_INTF_TYPE_SDP:
+ from_vf = !!(pcifunc & RVU_PFVF_FUNC_MASK);
+ parent_pf = &rvu->pf[rvu_get_pf(pcifunc)];
+ sdp_info = parent_pf->sdp_info;
+ if (!sdp_info) {
+ dev_err(rvu->dev, "Invalid sdp_info pointer\n");
+ return -EINVAL;
+ }
+ if (from_vf) {
+ req_chan_base = rvu_nix_chan_sdp(rvu, 0) + sdp_info->pf_srn +
+ sdp_info->num_pf_rings;
+ vf = (pcifunc & RVU_PFVF_FUNC_MASK) - 1;
+ for (vfid = 0; vfid < vf; vfid++)
+ req_chan_base += sdp_info->vf_rings[vfid];
+ req_chan_cnt = sdp_info->vf_rings[vf];
+ req_chan_end = req_chan_base + req_chan_cnt - 1;
+ if (req_chan_base < rvu_nix_chan_sdp(rvu, 0) ||
+ req_chan_end > rvu_nix_chan_sdp(rvu, 255)) {
+ dev_err(rvu->dev,
+ "PF_Func 0x%x: Invalid channel base and count\n",
+ pcifunc);
+ return -EINVAL;
+ }
+ } else {
+ req_chan_base = rvu_nix_chan_sdp(rvu, 0) + sdp_info->pf_srn;
+ req_chan_cnt = sdp_info->num_pf_rings;
+ }
+
+ pfvf->rx_chan_base = req_chan_base;
+ pfvf->rx_chan_cnt = req_chan_cnt;
+ pfvf->tx_chan_base = pfvf->rx_chan_base;
+ pfvf->tx_chan_cnt = pfvf->rx_chan_cnt;
+
+ rsp->tx_link = hw->cgx_links + hw->lbk_links;
+ rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
+ pfvf->rx_chan_base,
+ pfvf->rx_chan_cnt);
break;
}
@@ -393,9 +500,9 @@ int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu,
static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
int type, int chan_id)
{
- int bpid, blkaddr, lmac_chan_cnt;
+ int bpid, blkaddr, lmac_chan_cnt, sdp_chan_cnt;
+ u16 cgx_bpid_cnt, lbk_bpid_cnt, sdp_bpid_cnt;
struct rvu_hwinfo *hw = rvu->hw;
- u16 cgx_bpid_cnt, lbk_bpid_cnt;
struct rvu_pfvf *pfvf;
u8 cgx_id, lmac_id;
u64 cfg;
@@ -404,8 +511,12 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST);
lmac_chan_cnt = cfg & 0xFF;
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
+ sdp_chan_cnt = cfg & 0xFFF;
+
cgx_bpid_cnt = hw->cgx_links * lmac_chan_cnt;
lbk_bpid_cnt = hw->lbk_links * ((cfg >> 16) & 0xFF);
+ sdp_bpid_cnt = hw->sdp_links * sdp_chan_cnt;
pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
@@ -443,6 +554,17 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
if (bpid > (cgx_bpid_cnt + lbk_bpid_cnt))
return -EINVAL;
break;
+ case NIX_INTF_TYPE_SDP:
+ if ((req->chan_base + req->chan_cnt) > 255)
+ return -EINVAL;
+
+ bpid = sdp_bpid_cnt + req->chan_base;
+ if (req->bpid_per_chan)
+ bpid += chan_id;
+
+ if (bpid > (cgx_bpid_cnt + lbk_bpid_cnt + sdp_bpid_cnt))
+ return -EINVAL;
+ break;
default:
return -EINVAL;
}
@@ -462,9 +584,12 @@ int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu,
pf = rvu_get_pf(pcifunc);
type = is_afvf(pcifunc) ? NIX_INTF_TYPE_LBK : NIX_INTF_TYPE_CGX;
+ if (is_sdp_pfvf(pcifunc))
+ type = NIX_INTF_TYPE_SDP;
- /* Enable backpressure only for CGX mapped PFs and LBK interface */
- if (!is_pf_cgxmapped(rvu, pf) && type != NIX_INTF_TYPE_LBK)
+ /* Enable backpressure only for CGX mapped PFs and LBK/SDP interface */
+ if (!is_pf_cgxmapped(rvu, pf) && type != NIX_INTF_TYPE_LBK &&
+ type != NIX_INTF_TYPE_SDP)
return 0;
pfvf = rvu_get_pfvf(rvu, pcifunc);
@@ -481,8 +606,9 @@ int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu,
}
cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan));
+ cfg &= ~GENMASK_ULL(8, 0);
rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan),
- cfg | (bpid & 0xFF) | BIT_ULL(16));
+ cfg | (bpid & GENMASK_ULL(8, 0)) | BIT_ULL(16));
chan_id++;
bpid = rvu_nix_get_bpid(rvu, req, type, chan_id);
}
@@ -630,9 +756,10 @@ static void nix_ctx_free(struct rvu *rvu, struct rvu_pfvf *pfvf)
static int nixlf_rss_ctx_init(struct rvu *rvu, int blkaddr,
struct rvu_pfvf *pfvf, int nixlf,
int rss_sz, int rss_grps, int hwctx_size,
- u64 way_mask)
+ u64 way_mask, bool tag_lsb_as_adder)
{
int err, grp, num_indices;
+ u64 val;
/* RSS is not requested for this NIXLF */
if (!rss_sz)
@@ -648,10 +775,13 @@ static int nixlf_rss_ctx_init(struct rvu *rvu, int blkaddr,
(u64)pfvf->rss_ctx->iova);
/* Config full RSS table size, enable RSS and caching */
- rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_CFG(nixlf),
- BIT_ULL(36) | BIT_ULL(4) |
- ilog2(num_indices / MAX_RSS_INDIR_TBL_SIZE) |
- way_mask << 20);
+ val = BIT_ULL(36) | BIT_ULL(4) | way_mask << 20 |
+ ilog2(num_indices / MAX_RSS_INDIR_TBL_SIZE);
+
+ if (tag_lsb_as_adder)
+ val |= BIT_ULL(5);
+
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_CFG(nixlf), val);
/* Config RSS group offset and sizes */
for (grp = 0; grp < rss_grps; grp++)
rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_GRPX(nixlf, grp),
@@ -943,7 +1073,7 @@ static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req,
nix_hw = get_nix_hw(rvu->hw, blkaddr);
if (!nix_hw)
- return -EINVAL;
+ return NIX_AF_ERR_INVALID_NIXBLK;
return rvu_nix_blk_aq_enq_inst(rvu, nix_hw, req, rsp);
}
@@ -1200,7 +1330,8 @@ int rvu_mbox_handler_nix_lf_alloc(struct rvu *rvu,
/* Initialize receive side scaling (RSS) */
hwctx_size = 1UL << ((ctx_cfg >> 12) & 0xF);
err = nixlf_rss_ctx_init(rvu, blkaddr, pfvf, nixlf, req->rss_sz,
- req->rss_grps, hwctx_size, req->way_mask);
+ req->rss_grps, hwctx_size, req->way_mask,
+ !!(req->flags & NIX_LF_RSS_TAG_LSB_AS_ADDER));
if (err)
goto free_mem;
@@ -1258,7 +1389,11 @@ int rvu_mbox_handler_nix_lf_alloc(struct rvu *rvu,
rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_PARSE_CFG(nixlf), cfg);
intf = is_afvf(pcifunc) ? NIX_INTF_TYPE_LBK : NIX_INTF_TYPE_CGX;
- err = nix_interface_init(rvu, pcifunc, intf, nixlf);
+ if (is_sdp_pfvf(pcifunc))
+ intf = NIX_INTF_TYPE_SDP;
+
+ err = nix_interface_init(rvu, pcifunc, intf, nixlf, rsp,
+ !!(req->flags & NIX_LF_LBK_BLK_SEL));
if (err)
goto free_mem;
@@ -1364,7 +1499,7 @@ int rvu_mbox_handler_nix_mark_format_cfg(struct rvu *rvu,
nix_hw = get_nix_hw(rvu->hw, blkaddr);
if (!nix_hw)
- return -EINVAL;
+ return NIX_AF_ERR_INVALID_NIXBLK;
cfg = (((u32)req->offset & 0x7) << 16) |
(((u32)req->y_mask & 0xF) << 12) |
@@ -1382,12 +1517,104 @@ int rvu_mbox_handler_nix_mark_format_cfg(struct rvu *rvu,
return 0;
}
+/* Handle shaper update specially for few revisions */
+static bool
+handle_txschq_shaper_update(struct rvu *rvu, int blkaddr, int nixlf,
+ int lvl, u64 reg, u64 regval)
+{
+ u64 regbase, oldval, sw_xoff = 0;
+ u64 dbgval, md_debug0 = 0;
+ unsigned long poll_tmo;
+ bool rate_reg = 0;
+ u32 schq;
+
+ regbase = reg & 0xFFFF;
+ schq = TXSCHQ_IDX(reg, TXSCHQ_IDX_SHIFT);
+
+ /* Check for rate register */
+ switch (lvl) {
+ case NIX_TXSCH_LVL_TL1:
+ md_debug0 = NIX_AF_TL1X_MD_DEBUG0(schq);
+ sw_xoff = NIX_AF_TL1X_SW_XOFF(schq);
+
+ rate_reg = !!(regbase == NIX_AF_TL1X_CIR(0));
+ break;
+ case NIX_TXSCH_LVL_TL2:
+ md_debug0 = NIX_AF_TL2X_MD_DEBUG0(schq);
+ sw_xoff = NIX_AF_TL2X_SW_XOFF(schq);
+
+ rate_reg = (regbase == NIX_AF_TL2X_CIR(0) ||
+ regbase == NIX_AF_TL2X_PIR(0));
+ break;
+ case NIX_TXSCH_LVL_TL3:
+ md_debug0 = NIX_AF_TL3X_MD_DEBUG0(schq);
+ sw_xoff = NIX_AF_TL3X_SW_XOFF(schq);
+
+ rate_reg = (regbase == NIX_AF_TL3X_CIR(0) ||
+ regbase == NIX_AF_TL3X_PIR(0));
+ break;
+ case NIX_TXSCH_LVL_TL4:
+ md_debug0 = NIX_AF_TL4X_MD_DEBUG0(schq);
+ sw_xoff = NIX_AF_TL4X_SW_XOFF(schq);
+
+ rate_reg = (regbase == NIX_AF_TL4X_CIR(0) ||
+ regbase == NIX_AF_TL4X_PIR(0));
+ break;
+ case NIX_TXSCH_LVL_MDQ:
+ sw_xoff = NIX_AF_MDQX_SW_XOFF(schq);
+ rate_reg = (regbase == NIX_AF_MDQX_CIR(0) ||
+ regbase == NIX_AF_MDQX_PIR(0));
+ break;
+ }
+
+ if (!rate_reg)
+ return false;
+
+ /* Nothing special to do when state is not toggled */
+ oldval = rvu_read64(rvu, blkaddr, reg);
+ if ((oldval & 0x1) == (regval & 0x1)) {
+ rvu_write64(rvu, blkaddr, reg, regval);
+ return true;
+ }
+
+ /* PIR/CIR disable */
+ if (!(regval & 0x1)) {
+ rvu_write64(rvu, blkaddr, sw_xoff, 1);
+ rvu_write64(rvu, blkaddr, reg, 0);
+ udelay(4);
+ rvu_write64(rvu, blkaddr, sw_xoff, 0);
+ return true;
+ }
+
+ /* PIR/CIR enable */
+ rvu_write64(rvu, blkaddr, sw_xoff, 1);
+ if (md_debug0) {
+ poll_tmo = jiffies + usecs_to_jiffies(10000);
+ /* Wait until VLD(bit32) == 1 or C_CON(bit48) == 0 */
+ do {
+ if (time_after(jiffies, poll_tmo)) {
+ dev_err(rvu->dev,
+ "NIXLF%d: TLX%u(lvl %u) CIR/PIR enable failed\n",
+ nixlf, schq, lvl);
+ goto exit;
+ }
+ usleep_range(1, 5);
+ dbgval = rvu_read64(rvu, blkaddr, md_debug0);
+ } while (!(dbgval & BIT_ULL(32)) && (dbgval & BIT_ULL(48)));
+ }
+ rvu_write64(rvu, blkaddr, reg, regval);
+exit:
+ rvu_write64(rvu, blkaddr, sw_xoff, 0);
+ return true;
+}
+
/* Disable shaping of pkts by a scheduler queue
* at a given scheduler level.
*/
static void nix_reset_tx_shaping(struct rvu *rvu, int blkaddr,
- int lvl, int schq)
+ int nixlf, int lvl, int schq)
{
+ struct rvu_hwinfo *hw = rvu->hw;
u64 cir_reg = 0, pir_reg = 0;
u64 cfg;
@@ -1408,6 +1635,21 @@ static void nix_reset_tx_shaping(struct rvu *rvu, int blkaddr,
cir_reg = NIX_AF_TL4X_CIR(schq);
pir_reg = NIX_AF_TL4X_PIR(schq);
break;
+ case NIX_TXSCH_LVL_MDQ:
+ cir_reg = NIX_AF_MDQX_CIR(schq);
+ pir_reg = NIX_AF_MDQX_PIR(schq);
+ break;
+ }
+
+ /* Shaper state toggle needs wait/poll */
+ if (hw->cap.nix_shaper_toggle_wait) {
+ if (cir_reg)
+ handle_txschq_shaper_update(rvu, blkaddr, nixlf,
+ lvl, cir_reg, 0);
+ if (pir_reg)
+ handle_txschq_shaper_update(rvu, blkaddr, nixlf,
+ lvl, pir_reg, 0);
+ return;
}
if (!cir_reg)
@@ -1425,6 +1667,7 @@ static void nix_reset_tx_linkcfg(struct rvu *rvu, int blkaddr,
int lvl, int schq)
{
struct rvu_hwinfo *hw = rvu->hw;
+ int link_level;
int link;
if (lvl >= hw->cap.nix_tx_aggr_lvl)
@@ -1434,7 +1677,9 @@ static void nix_reset_tx_linkcfg(struct rvu *rvu, int blkaddr,
if (lvl == NIX_TXSCH_LVL_TL4)
rvu_write64(rvu, blkaddr, NIX_AF_TL4X_SDP_LINK_CFG(schq), 0x00);
- if (lvl != NIX_TXSCH_LVL_TL2)
+ link_level = rvu_read64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL) & 0x01 ?
+ NIX_TXSCH_LVL_TL3 : NIX_TXSCH_LVL_TL2;
+ if (lvl != link_level)
return;
/* Reset TL2's CGX or LBK link config */
@@ -1443,6 +1688,40 @@ static void nix_reset_tx_linkcfg(struct rvu *rvu, int blkaddr,
NIX_AF_TL3_TL2X_LINKX_CFG(schq, link), 0x00);
}
+static void nix_clear_tx_xoff(struct rvu *rvu, int blkaddr,
+ int lvl, int schq)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ u64 reg;
+
+ /* Skip this if shaping is not supported */
+ if (!hw->cap.nix_shaping)
+ return;
+
+ /* Clear level specific SW_XOFF */
+ switch (lvl) {
+ case NIX_TXSCH_LVL_TL1:
+ reg = NIX_AF_TL1X_SW_XOFF(schq);
+ break;
+ case NIX_TXSCH_LVL_TL2:
+ reg = NIX_AF_TL2X_SW_XOFF(schq);
+ break;
+ case NIX_TXSCH_LVL_TL3:
+ reg = NIX_AF_TL3X_SW_XOFF(schq);
+ break;
+ case NIX_TXSCH_LVL_TL4:
+ reg = NIX_AF_TL4X_SW_XOFF(schq);
+ break;
+ case NIX_TXSCH_LVL_MDQ:
+ reg = NIX_AF_MDQX_SW_XOFF(schq);
+ break;
+ default:
+ return;
+ }
+
+ rvu_write64(rvu, blkaddr, reg, 0x0);
+}
+
static int nix_get_tx_link(struct rvu *rvu, u16 pcifunc)
{
struct rvu_hwinfo *hw = rvu->hw;
@@ -1620,19 +1899,18 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu,
int link, blkaddr, rc = 0;
int lvl, idx, start, end;
struct nix_txsch *txsch;
- struct rvu_pfvf *pfvf;
struct nix_hw *nix_hw;
u32 *pfvf_map;
+ int nixlf;
u16 schq;
- pfvf = rvu_get_pfvf(rvu, pcifunc);
- blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
- if (!pfvf->nixlf || blkaddr < 0)
- return NIX_AF_ERR_AF_LF_INVALID;
+ rc = nix_get_nixlf(rvu, pcifunc, &nixlf, &blkaddr);
+ if (rc)
+ return rc;
nix_hw = get_nix_hw(rvu->hw, blkaddr);
if (!nix_hw)
- return -EINVAL;
+ return NIX_AF_ERR_INVALID_NIXBLK;
mutex_lock(&rvu->rsrc_lock);
@@ -1677,7 +1955,7 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu,
NIX_TXSCHQ_CFG_DONE))
pfvf_map[schq] = TXSCH_MAP(pcifunc, 0);
nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
- nix_reset_tx_shaping(rvu, blkaddr, lvl, schq);
+ nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq);
}
for (idx = 0; idx < req->schq[lvl]; idx++) {
@@ -1686,7 +1964,7 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu,
NIX_TXSCHQ_CFG_DONE))
pfvf_map[schq] = TXSCH_MAP(pcifunc, 0);
nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
- nix_reset_tx_shaping(rvu, blkaddr, lvl, schq);
+ nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq);
}
}
@@ -1703,8 +1981,8 @@ exit:
return rc;
}
-static void nix_smq_flush(struct rvu *rvu, int blkaddr,
- int smq, u16 pcifunc, int nixlf)
+static int nix_smq_flush(struct rvu *rvu, int blkaddr,
+ int smq, u16 pcifunc, int nixlf)
{
int pf = rvu_get_pf(pcifunc);
u8 cgx_id = 0, lmac_id = 0;
@@ -1739,6 +2017,7 @@ static void nix_smq_flush(struct rvu *rvu, int blkaddr,
/* restore cgx tx state */
if (restore_tx_en)
cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
+ return err;
}
static int nix_txschq_free(struct rvu *rvu, u16 pcifunc)
@@ -1747,6 +2026,7 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc)
struct rvu_hwinfo *hw = rvu->hw;
struct nix_txsch *txsch;
struct nix_hw *nix_hw;
+ u16 map_func;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
if (blkaddr < 0)
@@ -1754,25 +2034,42 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc)
nix_hw = get_nix_hw(rvu->hw, blkaddr);
if (!nix_hw)
- return -EINVAL;
+ return NIX_AF_ERR_INVALID_NIXBLK;
nixlf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0);
if (nixlf < 0)
return NIX_AF_ERR_AF_LF_INVALID;
- /* Disable TL2/3 queue links before SMQ flush*/
+ /* Disable TL2/3 queue links and all XOFF's before SMQ flush*/
mutex_lock(&rvu->rsrc_lock);
- for (lvl = NIX_TXSCH_LVL_TL4; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
- if (lvl != NIX_TXSCH_LVL_TL2 && lvl != NIX_TXSCH_LVL_TL4)
+ for (lvl = NIX_TXSCH_LVL_MDQ; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+ txsch = &nix_hw->txsch[lvl];
+
+ if (lvl >= hw->cap.nix_tx_aggr_lvl)
continue;
- txsch = &nix_hw->txsch[lvl];
for (schq = 0; schq < txsch->schq.max; schq++) {
if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc)
continue;
nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
+ nix_clear_tx_xoff(rvu, blkaddr, lvl, schq);
}
}
+ nix_clear_tx_xoff(rvu, blkaddr, NIX_TXSCH_LVL_TL1,
+ nix_get_tx_link(rvu, pcifunc));
+
+ /* On PF cleanup, clear cfg done flag as
+ * PF would have changed default config.
+ */
+ if (!(pcifunc & RVU_PFVF_FUNC_MASK)) {
+ txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL1];
+ schq = nix_get_tx_link(rvu, pcifunc);
+ /* Do not clear pcifunc in txsch->pfvf_map[schq] because
+ * VF might be using this TL1 queue
+ */
+ map_func = TXSCH_MAP_FUNC(txsch->pfvf_map[schq]);
+ txsch->pfvf_map[schq] = TXSCH_SET_FLAG(map_func, 0x0);
+ }
/* Flush SMQs */
txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ];
@@ -1818,6 +2115,7 @@ static int nix_txschq_free_one(struct rvu *rvu,
struct nix_txsch *txsch;
struct nix_hw *nix_hw;
u32 *pfvf_map;
+ int rc;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
if (blkaddr < 0)
@@ -1825,7 +2123,7 @@ static int nix_txschq_free_one(struct rvu *rvu,
nix_hw = get_nix_hw(rvu->hw, blkaddr);
if (!nix_hw)
- return -EINVAL;
+ return NIX_AF_ERR_INVALID_NIXBLK;
nixlf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0);
if (nixlf < 0)
@@ -1842,15 +2140,24 @@ static int nix_txschq_free_one(struct rvu *rvu,
mutex_lock(&rvu->rsrc_lock);
if (TXSCH_MAP_FUNC(pfvf_map[schq]) != pcifunc) {
- mutex_unlock(&rvu->rsrc_lock);
+ rc = NIX_AF_ERR_TLX_INVALID;
goto err;
}
+ /* Clear SW_XOFF of this resource only.
+ * For SMQ level, all path XOFF's
+ * need to be made clear by user
+ */
+ nix_clear_tx_xoff(rvu, blkaddr, lvl, schq);
+
/* Flush if it is a SMQ. Onus of disabling
* TL2/3 queue links before SMQ flush is on user
*/
- if (lvl == NIX_TXSCH_LVL_SMQ)
- nix_smq_flush(rvu, blkaddr, schq, pcifunc, nixlf);
+ if (lvl == NIX_TXSCH_LVL_SMQ &&
+ nix_smq_flush(rvu, blkaddr, schq, pcifunc, nixlf)) {
+ rc = NIX_AF_SMQ_FLUSH_FAILED;
+ goto err;
+ }
/* Free the resource */
rvu_free_rsrc(&txsch->schq, schq);
@@ -1858,7 +2165,8 @@ static int nix_txschq_free_one(struct rvu *rvu,
mutex_unlock(&rvu->rsrc_lock);
return 0;
err:
- return NIX_AF_ERR_TLX_INVALID;
+ mutex_unlock(&rvu->rsrc_lock);
+ return rc;
}
int rvu_mbox_handler_nix_txsch_free(struct rvu *rvu,
@@ -1941,6 +2249,11 @@ static bool is_txschq_shaping_valid(struct rvu_hwinfo *hw, int lvl, u64 reg)
regbase == NIX_AF_TL4X_PIR(0))
return false;
break;
+ case NIX_TXSCH_LVL_MDQ:
+ if (regbase == NIX_AF_MDQX_CIR(0) ||
+ regbase == NIX_AF_MDQX_PIR(0))
+ return false;
+ break;
}
return true;
}
@@ -1958,12 +2271,48 @@ static void nix_tl1_default_cfg(struct rvu *rvu, struct nix_hw *nix_hw,
return;
rvu_write64(rvu, blkaddr, NIX_AF_TL1X_TOPOLOGY(schq),
(TXSCH_TL1_DFLT_RR_PRIO << 1));
- rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq),
- TXSCH_TL1_DFLT_RR_QTM);
+
+ /* On OcteonTx2 the config was in bytes and newer silcons
+ * it's changed to weight.
+ */
+ if (!rvu->hw->cap.nix_common_dwrr_mtu)
+ rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq),
+ TXSCH_TL1_DFLT_RR_QTM);
+ else
+ rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq),
+ CN10K_MAX_DWRR_WEIGHT);
+
rvu_write64(rvu, blkaddr, NIX_AF_TL1X_CIR(schq), 0x00);
pfvf_map[schq] = TXSCH_SET_FLAG(pfvf_map[schq], NIX_TXSCHQ_CFG_DONE);
}
+/* Register offset - [15:0]
+ * Scheduler Queue number - [25:16]
+ */
+#define NIX_TX_SCHQ_MASK GENMASK_ULL(25, 0)
+
+static int nix_txschq_cfg_read(struct rvu *rvu, struct nix_hw *nix_hw,
+ int blkaddr, struct nix_txschq_config *req,
+ struct nix_txschq_config *rsp)
+{
+ u16 pcifunc = req->hdr.pcifunc;
+ int idx, schq;
+ u64 reg;
+
+ for (idx = 0; idx < req->num_regs; idx++) {
+ reg = req->reg[idx];
+ reg &= NIX_TX_SCHQ_MASK;
+ schq = TXSCHQ_IDX(reg, TXSCHQ_IDX_SHIFT);
+ if (!rvu_check_valid_reg(TXSCHQ_HWREGMAP, req->lvl, reg) ||
+ !is_valid_txschq(rvu, blkaddr, req->lvl, pcifunc, schq))
+ return NIX_AF_INVAL_TXSCHQ_CFG;
+ rsp->regval[idx] = rvu_read64(rvu, blkaddr, reg);
+ }
+ rsp->lvl = req->lvl;
+ rsp->num_regs = req->num_regs;
+ return 0;
+}
+
static void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr,
u16 pcifunc, struct nix_txsch *txsch)
{
@@ -1995,11 +2344,11 @@ static void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr,
int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu,
struct nix_txschq_config *req,
- struct msg_rsp *rsp)
+ struct nix_txschq_config *rsp)
{
+ u64 reg, val, regval, schq_regbase, val_mask;
struct rvu_hwinfo *hw = rvu->hw;
u16 pcifunc = req->hdr.pcifunc;
- u64 reg, regval, schq_regbase;
struct nix_txsch *txsch;
struct nix_hw *nix_hw;
int blkaddr, idx, err;
@@ -2016,7 +2365,10 @@ int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu,
nix_hw = get_nix_hw(rvu->hw, blkaddr);
if (!nix_hw)
- return -EINVAL;
+ return NIX_AF_ERR_INVALID_NIXBLK;
+
+ if (req->read)
+ return nix_txschq_cfg_read(rvu, nix_hw, blkaddr, req, rsp);
txsch = &nix_hw->txsch[req->lvl];
pfvf_map = txsch->pfvf_map;
@@ -2032,8 +2384,10 @@ int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu,
for (idx = 0; idx < req->num_regs; idx++) {
reg = req->reg[idx];
+ reg &= NIX_TX_SCHQ_MASK;
regval = req->regval[idx];
schq_regbase = reg & 0xFFFF;
+ val_mask = req->regval_mask[idx];
if (!is_txschq_hierarchy_valid(rvu, pcifunc, blkaddr,
txsch->lvl, reg, regval))
@@ -2043,6 +2397,15 @@ int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu,
if (!is_txschq_shaping_valid(hw, req->lvl, reg))
continue;
+ val = rvu_read64(rvu, blkaddr, reg);
+ regval = (val & val_mask) | (regval & ~val_mask);
+
+ /* Handle shaping state toggle specially */
+ if (hw->cap.nix_shaper_toggle_wait &&
+ handle_txschq_shaper_update(rvu, blkaddr, nixlf,
+ req->lvl, reg, regval))
+ continue;
+
/* Replace PF/VF visible NIXLF slot with HW NIXLF id */
if (schq_regbase == NIX_AF_SMQX_CFG(0)) {
nixlf = rvu_get_lf(rvu, &hw->block[blkaddr],
@@ -2083,7 +2446,6 @@ int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu,
rvu_nix_tx_tl2_cfg(rvu, blkaddr, pcifunc,
&nix_hw->txsch[NIX_TXSCH_LVL_TL2]);
-
return 0;
}
@@ -2114,8 +2476,12 @@ static int nix_tx_vtag_free(struct rvu *rvu, int blkaddr,
u16 pcifunc, int index)
{
struct nix_hw *nix_hw = get_nix_hw(rvu->hw, blkaddr);
- struct nix_txvlan *vlan = &nix_hw->txvlan;
+ struct nix_txvlan *vlan;
+
+ if (!nix_hw)
+ return NIX_AF_ERR_INVALID_NIXBLK;
+ vlan = &nix_hw->txvlan;
if (vlan->entry2pfvf_map[index] != pcifunc)
return NIX_AF_ERR_PARAM;
@@ -2156,10 +2522,15 @@ static int nix_tx_vtag_alloc(struct rvu *rvu, int blkaddr,
u64 vtag, u8 size)
{
struct nix_hw *nix_hw = get_nix_hw(rvu->hw, blkaddr);
- struct nix_txvlan *vlan = &nix_hw->txvlan;
+ struct nix_txvlan *vlan;
u64 regval;
int index;
+ if (!nix_hw)
+ return NIX_AF_ERR_INVALID_NIXBLK;
+
+ vlan = &nix_hw->txvlan;
+
mutex_lock(&vlan->rsrc_lock);
index = rvu_alloc_rsrc(&vlan->rsrc);
@@ -2184,12 +2555,16 @@ static int nix_tx_vtag_decfg(struct rvu *rvu, int blkaddr,
struct nix_vtag_config *req)
{
struct nix_hw *nix_hw = get_nix_hw(rvu->hw, blkaddr);
- struct nix_txvlan *vlan = &nix_hw->txvlan;
u16 pcifunc = req->hdr.pcifunc;
int idx0 = req->tx.vtag0_idx;
int idx1 = req->tx.vtag1_idx;
+ struct nix_txvlan *vlan;
int err = 0;
+ if (!nix_hw)
+ return NIX_AF_ERR_INVALID_NIXBLK;
+
+ vlan = &nix_hw->txvlan;
if (req->tx.free_vtag0 && req->tx.free_vtag1)
if (vlan->entry2pfvf_map[idx0] != pcifunc ||
vlan->entry2pfvf_map[idx1] != pcifunc)
@@ -2216,9 +2591,13 @@ static int nix_tx_vtag_cfg(struct rvu *rvu, int blkaddr,
struct nix_vtag_config_rsp *rsp)
{
struct nix_hw *nix_hw = get_nix_hw(rvu->hw, blkaddr);
- struct nix_txvlan *vlan = &nix_hw->txvlan;
+ struct nix_txvlan *vlan;
u16 pcifunc = req->hdr.pcifunc;
+ if (!nix_hw)
+ return NIX_AF_ERR_INVALID_NIXBLK;
+
+ vlan = &nix_hw->txvlan;
if (req->tx.cfg_vtag0) {
rsp->vtag0_idx =
nix_tx_vtag_alloc(rvu, blkaddr,
@@ -2456,14 +2835,19 @@ static int nix_update_mce_rule(struct rvu *rvu, u16 pcifunc,
struct npc_mcam *mcam = &rvu->hw->mcam;
struct rvu_hwinfo *hw = rvu->hw;
struct nix_mce_list *mce_list;
+ int pf;
- /* skip multicast pkt replication for AF's VFs */
- if (is_afvf(pcifunc))
+ /* skip multicast pkt replication for AF's VFs & SDP links */
+ if (is_afvf(pcifunc) || is_sdp_pfvf(pcifunc))
return 0;
if (!hw->cap.nix_rx_multicast)
return 0;
+ pf = rvu_get_pf(pcifunc);
+ if (!is_pf_cgxmapped(rvu, pf))
+ return 0;
+
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
if (blkaddr < 0)
return -EINVAL;
@@ -2667,6 +3051,15 @@ static int nix_setup_txschq(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr)
for (schq = 0; schq < txsch->schq.max; schq++)
txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE);
}
+
+ /* Setup a default value of 8192 as DWRR MTU */
+ if (rvu->hw->cap.nix_common_dwrr_mtu) {
+ rvu_write64(rvu, blkaddr, NIX_AF_DWRR_RPM_MTU,
+ convert_bytes_to_dwrr_mtu(8192));
+ rvu_write64(rvu, blkaddr, NIX_AF_DWRR_SDP_MTU,
+ convert_bytes_to_dwrr_mtu(8192));
+ }
+
return 0;
}
@@ -2743,6 +3136,7 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req,
struct nix_hw_info *rsp)
{
u16 pcifunc = req->hdr.pcifunc;
+ u64 dwrr_mtu;
int blkaddr;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
@@ -2755,6 +3149,20 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req,
rvu_get_lmac_link_max_frs(rvu, &rsp->max_mtu);
rsp->min_mtu = NIC_HW_MIN_FRS;
+
+ if (!rvu->hw->cap.nix_common_dwrr_mtu) {
+ /* Return '1' on OTx2 */
+ rsp->rpm_dwrr_mtu = 1;
+ rsp->sdp_dwrr_mtu = 1;
+ return 0;
+ }
+
+ dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU);
+ rsp->rpm_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
+ dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_SDP_MTU);
+ rsp->sdp_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
return 0;
}
@@ -3068,7 +3476,7 @@ static int reserve_flowkey_alg_idx(struct rvu *rvu, int blkaddr, u32 flow_cfg)
hw = get_nix_hw(rvu->hw, blkaddr);
if (!hw)
- return -EINVAL;
+ return NIX_AF_ERR_INVALID_NIXBLK;
/* No room to add new flow hash algoritham */
if (hw->flowkey.in_use >= NIX_FLOW_KEY_ALG_MAX)
@@ -3108,7 +3516,7 @@ int rvu_mbox_handler_nix_rss_flowkey_cfg(struct rvu *rvu,
nix_hw = get_nix_hw(rvu->hw, blkaddr);
if (!nix_hw)
- return -EINVAL;
+ return NIX_AF_ERR_INVALID_NIXBLK;
alg_idx = get_flowkey_alg_idx(nix_hw, req->flowkey_cfg);
/* Failed to get algo index from the exiting list, reserve new */
@@ -3366,6 +3774,77 @@ static void nix_find_link_frs(struct rvu *rvu,
req->minlen = minlen;
}
+static int
+nix_config_link_credits(struct rvu *rvu, int blkaddr, int link,
+ u16 pcifunc, u64 tx_credits)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ int pf = rvu_get_pf(pcifunc);
+ u8 cgx_id = 0, lmac_id = 0;
+ unsigned long poll_tmo;
+ bool restore_tx_en = 0;
+ struct nix_hw *nix_hw;
+ u64 cfg, sw_xoff = 0;
+ u32 schq = 0;
+ u32 credits;
+ int rc;
+
+ nix_hw = get_nix_hw(rvu->hw, blkaddr);
+ if (!nix_hw)
+ return NIX_AF_ERR_INVALID_NIXBLK;
+
+ if (tx_credits == nix_hw->tx_credits[link])
+ return 0;
+
+ /* Enable cgx tx if disabled for credits to be back */
+ if (is_pf_cgxmapped(rvu, pf)) {
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+ restore_tx_en = !cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu),
+ lmac_id, true);
+ }
+
+ mutex_lock(&rvu->rsrc_lock);
+ /* Disable new traffic to link */
+ if (hw->cap.nix_shaping) {
+ schq = nix_get_tx_link(rvu, pcifunc);
+ sw_xoff = rvu_read64(rvu, blkaddr, NIX_AF_TL1X_SW_XOFF(schq));
+ rvu_write64(rvu, blkaddr,
+ NIX_AF_TL1X_SW_XOFF(schq), BIT_ULL(0));
+ }
+
+ rc = -EBUSY;
+ poll_tmo = jiffies + usecs_to_jiffies(10000);
+ /* Wait for credits to return */
+ do {
+ if (time_after(jiffies, poll_tmo))
+ goto exit;
+ usleep_range(100, 200);
+
+ cfg = rvu_read64(rvu, blkaddr,
+ NIX_AF_TX_LINKX_NORM_CREDIT(link));
+ credits = (cfg >> 12) & 0xFFFFFULL;
+ } while (credits != nix_hw->tx_credits[link]);
+
+ cfg &= ~(0xFFFFFULL << 12);
+ cfg |= (tx_credits << 12);
+ rvu_write64(rvu, blkaddr, NIX_AF_TX_LINKX_NORM_CREDIT(link), cfg);
+ rc = 0;
+
+ nix_hw->tx_credits[link] = tx_credits;
+
+exit:
+ /* Enable traffic back */
+ if (hw->cap.nix_shaping && !sw_xoff)
+ rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SW_XOFF(schq), 0);
+
+ /* Restore state of cgx tx */
+ if (restore_tx_en)
+ cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
+
+ mutex_unlock(&rvu->rsrc_lock);
+ return rc;
+}
+
int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req,
struct msg_rsp *rsp)
{
@@ -3376,6 +3855,7 @@ int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req,
struct nix_txsch *txsch;
u64 cfg, lmac_fifo_len;
struct nix_hw *nix_hw;
+ struct rvu_pfvf *pfvf;
u8 cgx = 0, lmac = 0;
u16 max_mtu;
@@ -3385,7 +3865,7 @@ int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req,
nix_hw = get_nix_hw(rvu->hw, blkaddr);
if (!nix_hw)
- return -EINVAL;
+ return NIX_AF_ERR_INVALID_NIXBLK;
if (is_afvf(pcifunc))
rvu_get_lbk_link_max_frs(rvu, &max_mtu);
@@ -3432,7 +3912,8 @@ rx_frscfg:
link = (cgx * hw->lmac_per_cgx) + lmac;
} else if (pf == 0) {
/* For VFs of PF0 ingress is LBK port, so config LBK link */
- link = hw->cgx_links;
+ pfvf = rvu_get_pfvf(rvu, pcifunc);
+ link = hw->cgx_links + pfvf->lbkid;
}
if (link < 0)
@@ -3454,11 +3935,8 @@ linkcfg:
lmac_fifo_len =
rvu_cgx_get_fifolen(rvu) /
cgx_get_lmac_cnt(rvu_cgx_pdata(cgx, rvu));
- cfg = rvu_read64(rvu, blkaddr, NIX_AF_TX_LINKX_NORM_CREDIT(link));
- cfg &= ~(0xFFFFFULL << 12);
- cfg |= ((lmac_fifo_len - req->maxlen) / 16) << 12;
- rvu_write64(rvu, blkaddr, NIX_AF_TX_LINKX_NORM_CREDIT(link), cfg);
- return 0;
+ return nix_config_link_credits(rvu, blkaddr, link, pcifunc,
+ (lmac_fifo_len - req->maxlen) / 16);
}
int rvu_mbox_handler_nix_set_rx_cfg(struct rvu *rvu, struct nix_rx_cfg *req,
@@ -3502,12 +3980,13 @@ static u64 rvu_get_lbk_link_credits(struct rvu *rvu, u16 lbk_max_frs)
return 1600; /* 16 * max LBK datarate = 16 * 100Gbps */
}
-static void nix_link_config(struct rvu *rvu, int blkaddr)
+static void nix_link_config(struct rvu *rvu, int blkaddr,
+ struct nix_hw *nix_hw)
{
struct rvu_hwinfo *hw = rvu->hw;
int cgx, lmac_cnt, slink, link;
u16 lbk_max_frs, lmac_max_frs;
- u64 tx_credits;
+ u64 tx_credits, cfg;
rvu_get_lbk_link_max_frs(rvu, &lbk_max_frs);
rvu_get_lmac_link_max_frs(rvu, &lmac_max_frs);
@@ -3538,15 +4017,18 @@ static void nix_link_config(struct rvu *rvu, int blkaddr)
*/
for (cgx = 0; cgx < hw->cgx; cgx++) {
lmac_cnt = cgx_get_lmac_cnt(rvu_cgx_pdata(cgx, rvu));
+ /* Skip when cgx is not available or lmac cnt is zero */
+ if (lmac_cnt <= 0)
+ continue;
tx_credits = ((rvu_cgx_get_fifolen(rvu) / lmac_cnt) -
lmac_max_frs) / 16;
/* Enable credits and set credit pkt count to max allowed */
- tx_credits = (tx_credits << 12) | (0x1FF << 2) | BIT_ULL(1);
+ cfg = (tx_credits << 12) | (0x1FF << 2) | BIT_ULL(1);
slink = cgx * hw->lmac_per_cgx;
for (link = slink; link < (slink + lmac_cnt); link++) {
+ nix_hw->tx_credits[link] = tx_credits;
rvu_write64(rvu, blkaddr,
- NIX_AF_TX_LINKX_NORM_CREDIT(link),
- tx_credits);
+ NIX_AF_TX_LINKX_NORM_CREDIT(link), cfg);
}
}
@@ -3554,6 +4036,7 @@ static void nix_link_config(struct rvu *rvu, int blkaddr)
slink = hw->cgx_links;
for (link = slink; link < (slink + hw->lbk_links); link++) {
tx_credits = rvu_get_lbk_link_credits(rvu, lbk_max_frs);
+ nix_hw->tx_credits[link] = tx_credits;
/* Enable credits and set credit pkt count to max allowed */
tx_credits = (tx_credits << 12) | (0x1FF << 2) | BIT_ULL(1);
rvu_write64(rvu, blkaddr,
@@ -3647,6 +4130,28 @@ static int nix_aq_init(struct rvu *rvu, struct rvu_block *block)
return 0;
}
+static void rvu_nix_setup_capabilities(struct rvu *rvu, int blkaddr)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ u64 hw_const;
+
+ hw_const = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
+
+ /* On OcteonTx2 DWRR quantum is directly configured into each of
+ * the transmit scheduler queues. And PF/VF drivers were free to
+ * config any value upto 2^24.
+ * On CN10K, HW is modified, the quantum configuration at scheduler
+ * queues is in terms of weight. And SW needs to setup a base DWRR MTU
+ * at NIX_AF_DWRR_RPM_MTU / NIX_AF_DWRR_SDP_MTU. HW will do
+ * 'DWRR MTU * weight' to get the quantum.
+ *
+ * Check if HW uses a common MTU for all DWRR quantum configs.
+ * On OcteonTx2 this register field is '0'.
+ */
+ if (((hw_const >> 56) & 0x10) == 0x10)
+ hw->cap.nix_common_dwrr_mtu = true;
+}
+
static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
{
const struct npc_lt_def_cfg *ltdefs;
@@ -3684,6 +4189,9 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
if (err)
return err;
+ /* Setup capabilities of the NIX block */
+ rvu_nix_setup_capabilities(rvu, blkaddr);
+
/* Initialize admin queue */
err = nix_aq_init(rvu, block);
if (err)
@@ -3692,6 +4200,9 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
/* Restore CINT timer delay to HW reset values */
rvu_write64(rvu, blkaddr, NIX_AF_CINT_DELAY, 0x0ULL);
+ /* For better performance use NDC TX instead of NDC RX for SQ's SQEs" */
+ rvu_write64(rvu, blkaddr, NIX_AF_SEB_CFG, 0x1ULL);
+
if (is_block_implemented(hw, blkaddr)) {
err = nix_setup_txschq(rvu, nix_hw, blkaddr);
if (err)
@@ -3792,8 +4303,13 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
if (err)
return err;
+ nix_hw->tx_credits = kcalloc(hw->cgx_links + hw->lbk_links,
+ sizeof(u64), GFP_KERNEL);
+ if (!nix_hw->tx_credits)
+ return -ENOMEM;
+
/* Initialize CGX/LBK/SDP link credits, min/max pkt lengths */
- nix_link_config(rvu, blkaddr);
+ nix_link_config(rvu, blkaddr, nix_hw);
/* Enable Channel backpressure */
rvu_write64(rvu, blkaddr, NIX_AF_RX_CFG, BIT_ULL(0));
@@ -3849,7 +4365,9 @@ static void rvu_nix_block_freemem(struct rvu *rvu, int blkaddr,
kfree(txsch->schq.bmap);
}
- nix_ipolicer_freemem(nix_hw);
+ kfree(nix_hw->tx_credits);
+
+ nix_ipolicer_freemem(rvu, nix_hw);
vlan = &nix_hw->txvlan;
kfree(vlan->rsrc.bmap);
@@ -4027,7 +4545,7 @@ int rvu_mbox_handler_nix_lso_format_cfg(struct rvu *rvu,
nix_hw = get_nix_hw(rvu->hw, blkaddr);
if (!nix_hw)
- return -EINVAL;
+ return NIX_AF_ERR_INVALID_NIXBLK;
/* Find existing matching LSO format, if any */
for (idx = 0; idx < nix_hw->lso.in_use; idx++) {
@@ -4225,11 +4743,14 @@ static int nix_setup_ipolicers(struct rvu *rvu,
return 0;
}
-static void nix_ipolicer_freemem(struct nix_hw *nix_hw)
+static void nix_ipolicer_freemem(struct rvu *rvu, struct nix_hw *nix_hw)
{
struct nix_ipolicer *ipolicer;
int layer;
+ if (!rvu->hw->cap.ipolicer)
+ return;
+
for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) {
ipolicer = &nix_hw->ipolicer[layer];
@@ -4652,3 +5173,36 @@ static void nix_clear_ratelimit_aggr(struct rvu *rvu, struct nix_hw *nix_hw,
rvu_free_rsrc(&ipolicer->band_prof, mid_prof);
}
}
+
+int rvu_mbox_handler_nix_bandprof_get_hwinfo(struct rvu *rvu, struct msg_req *req,
+ struct nix_bandprof_get_hwinfo_rsp *rsp)
+{
+ struct nix_ipolicer *ipolicer;
+ int blkaddr, layer, err;
+ struct nix_hw *nix_hw;
+ u64 tu;
+
+ if (!rvu->hw->cap.ipolicer)
+ return NIX_AF_ERR_IPOLICER_NOTSUPP;
+
+ err = nix_get_struct_ptrs(rvu, req->hdr.pcifunc, &nix_hw, &blkaddr);
+ if (err)
+ return err;
+
+ /* Return number of bandwidth profiles free at each layer */
+ mutex_lock(&rvu->rsrc_lock);
+ for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) {
+ if (layer == BAND_PROF_INVAL_LAYER)
+ continue;
+
+ ipolicer = &nix_hw->ipolicer[layer];
+ rsp->prof_count[layer] = rvu_rsrc_free_count(&ipolicer->band_prof);
+ }
+ mutex_unlock(&rvu->rsrc_lock);
+
+ /* Set the policer timeunit in nanosec */
+ tu = rvu_read64(rvu, blkaddr, NIX_AF_PL_TS) & GENMASK_ULL(9, 0);
+ rsp->policer_timeunit = (tu + 1) * 100;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
index 24c2bfdfec4e..70bd036ed76e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
@@ -1,11 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/module.h>
@@ -419,6 +416,10 @@ exit:
rsp->stack_pg_ptrs = (cfg >> 8) & 0xFF;
rsp->stack_pg_bytes = cfg & 0xFF;
rsp->qints = (cfg >> 28) & 0xFFF;
+ if (!is_rvu_otx2(rvu)) {
+ cfg = rvu_read64(rvu, block->addr, NPA_AF_BATCH_CTL);
+ rsp->cache_lines = (cfg >> 1) & 0x3F;
+ }
return rc;
}
@@ -478,6 +479,13 @@ static int npa_aq_init(struct rvu *rvu, struct rvu_block *block)
#endif
rvu_write64(rvu, block->addr, NPA_AF_NDC_CFG, cfg);
+ /* For CN10K NPA BATCH DMA set 35 cache lines */
+ if (!is_rvu_otx2(rvu)) {
+ cfg = rvu_read64(rvu, block->addr, NPA_AF_BATCH_CTL);
+ cfg &= ~0x7EULL;
+ cfg |= BIT_ULL(6) | BIT_ULL(2) | BIT_ULL(1);
+ rvu_write64(rvu, block->addr, NPA_AF_BATCH_CTL, cfg);
+ }
/* Result structure can be followed by Aura/Pool context at
* RES + 128bytes and a write mask at RES + 256 bytes, depending on
* operation type. Alloc sufficient result memory for all operations.
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 52b255426c22..5efb4174e82d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -1,11 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/bitfield.h>
@@ -23,7 +20,7 @@
#define RSVD_MCAM_ENTRIES_PER_NIXLF 1 /* Ucast for LFs */
#define NPC_PARSE_RESULT_DMAC_OFFSET 8
-#define NPC_HW_TSTAMP_OFFSET 8
+#define NPC_HW_TSTAMP_OFFSET 8ULL
#define NPC_KEX_CHAN_MASK 0xFFFULL
#define NPC_KEX_PF_FUNC_MASK 0xFFFFULL
@@ -85,36 +82,6 @@ static int npc_mcam_verify_pf_func(struct rvu *rvu,
return 0;
}
-int npc_mcam_verify_channel(struct rvu *rvu, u16 pcifunc, u8 intf, u16 channel)
-{
- int pf = rvu_get_pf(pcifunc);
- u8 cgx_id, lmac_id;
- int base = 0, end;
-
- if (is_npc_intf_tx(intf))
- return 0;
-
- /* return in case of AF installed rules */
- if (is_pffunc_af(pcifunc))
- return 0;
-
- if (is_afvf(pcifunc)) {
- end = rvu_get_num_lbk_chans();
- if (end < 0)
- return -EINVAL;
- } else {
- rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
- base = rvu_nix_chan_cgx(rvu, cgx_id, lmac_id, 0x0);
- /* CGX mapped functions has maximum of 16 channels */
- end = rvu_nix_chan_cgx(rvu, cgx_id, lmac_id, 0xF);
- }
-
- if (channel < base || channel > end)
- return -EINVAL;
-
- return 0;
-}
-
void rvu_npc_set_pkind(struct rvu *rvu, int pkind, struct rvu_pfvf *pfvf)
{
int blkaddr;
@@ -634,8 +601,8 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
struct nix_rx_action action;
int blkaddr, index;
- /* AF's VFs work in promiscuous mode */
- if (is_afvf(pcifunc))
+ /* AF's and SDP VFs work in promiscuous mode */
+ if (is_afvf(pcifunc) || is_sdp_vf(pcifunc))
return;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
@@ -724,7 +691,17 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
action.index = pfvf->promisc_mce_idx;
}
- req.chan_mask = 0xFFFU;
+ /* For cn10k the upper two bits of the channel number are
+ * cpt channel number. with masking out these bits in the
+ * mcam entry, same entry used for NIX will allow packets
+ * received from cpt for parsing.
+ */
+ if (!is_rvu_otx2(rvu)) {
+ req.chan_mask = NIX_CHAN_CPT_X2P_MASK;
+ } else {
+ req.chan_mask = 0xFFFU;
+ }
+
if (chan_cnt > 1) {
if (!is_power_of_2(chan_cnt)) {
dev_err(rvu->dev,
@@ -853,7 +830,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
u16 vf_func;
/* Only CGX PF/VF can add allmulticast entry */
- if (is_afvf(pcifunc))
+ if (is_afvf(pcifunc) && is_sdp_vf(pcifunc))
return;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
@@ -938,7 +915,7 @@ void rvu_npc_enable_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
int blkaddr, u16 pcifunc, u64 rx_action)
{
- int actindex, index, bank;
+ int actindex, index, bank, entry;
bool enable;
if (!(pcifunc & RVU_PFVF_FUNC_MASK))
@@ -949,7 +926,7 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
if (mcam->entry2target_pffunc[index] == pcifunc) {
bank = npc_get_bank(mcam, index);
actindex = index;
- index &= (mcam->banksize - 1);
+ entry = index & (mcam->banksize - 1);
/* read vf flow entry enable status */
enable = is_mcam_entry_enabled(rvu, mcam, blkaddr,
@@ -959,7 +936,7 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
false);
/* update 'action' */
rvu_write64(rvu, blkaddr,
- NPC_AF_MCAMEX_BANKX_ACTION(index, bank),
+ NPC_AF_MCAMEX_BANKX_ACTION(entry, bank),
rx_action);
if (enable)
npc_enable_mcam_entry(rvu, mcam, blkaddr,
@@ -1898,9 +1875,22 @@ static void rvu_npc_hw_init(struct rvu *rvu, int blkaddr)
mcam->banks = (npc_const >> 44) & 0xFULL;
mcam->banksize = (npc_const >> 28) & 0xFFFFULL;
+ hw->npc_stat_ena = BIT_ULL(9);
/* Extended set */
if (npc_const2) {
hw->npc_ext_set = true;
+ /* 96xx supports only match_stats and npc_counters
+ * reflected in NPC_AF_CONST reg.
+ * STAT_SEL and ENA are at [0:8] and 9 bit positions.
+ * 98xx has both match_stat and ext and npc_counter
+ * reflected in NPC_AF_CONST2
+ * STAT_SEL_EXT added at [12:14] bit position.
+ * cn10k supports only ext and hence npc_counters in
+ * NPC_AF_CONST is 0 and npc_counters reflected in NPC_AF_CONST2.
+ * STAT_SEL bitpos incremented from [0:8] to [0:11] and ENA bit moved to 63
+ */
+ if (!hw->npc_counters)
+ hw->npc_stat_ena = BIT_ULL(63);
hw->npc_counters = (npc_const2 >> 16) & 0xFFFFULL;
mcam->banksize = npc_const2 & 0xFFFFULL;
}
@@ -1955,7 +1945,7 @@ static void rvu_npc_setup_interfaces(struct rvu *rvu, int blkaddr)
rvu_write64(rvu, blkaddr,
NPC_AF_INTFX_MISS_STAT_ACT(intf),
((mcam->rx_miss_act_cntr >> 9) << 12) |
- BIT_ULL(9) | mcam->rx_miss_act_cntr);
+ hw->npc_stat_ena | mcam->rx_miss_act_cntr);
}
/* Configure TX interfaces */
@@ -2030,14 +2020,15 @@ int rvu_npc_init(struct rvu *rvu)
/* Enable below for Rx pkts.
* - Outer IPv4 header checksum validation.
- * - Detect outer L2 broadcast address and set NPC_RESULT_S[L2M].
+ * - Detect outer L2 broadcast address and set NPC_RESULT_S[L2B].
+ * - Detect outer L2 multicast address and set NPC_RESULT_S[L2M].
* - Inner IPv4 header checksum validation.
* - Set non zero checksum error code value
*/
rvu_write64(rvu, blkaddr, NPC_AF_PCK_CFG,
rvu_read64(rvu, blkaddr, NPC_AF_PCK_CFG) |
- BIT_ULL(32) | BIT_ULL(24) | BIT_ULL(6) |
- BIT_ULL(2) | BIT_ULL(1));
+ ((u64)NPC_EC_OIP4_CSUM << 32) | (NPC_EC_IIP4_CSUM << 24) |
+ BIT_ULL(7) | BIT_ULL(6) | BIT_ULL(2) | BIT_ULL(1));
rvu_npc_setup_interfaces(rvu, blkaddr);
@@ -2147,18 +2138,16 @@ static void npc_map_mcam_entry_and_cntr(struct rvu *rvu, struct npc_mcam *mcam,
int blkaddr, u16 entry, u16 cntr)
{
u16 index = entry & (mcam->banksize - 1);
- u16 bank = npc_get_bank(mcam, entry);
+ u32 bank = npc_get_bank(mcam, entry);
+ struct rvu_hwinfo *hw = rvu->hw;
/* Set mapping and increment counter's refcnt */
mcam->entry2cntr_map[entry] = cntr;
mcam->cntr_refcnt[cntr]++;
- /* Enable stats
- * NPC_AF_MCAMEX_BANKX_STAT_ACT[14:12] - counter[11:9]
- * NPC_AF_MCAMEX_BANKX_STAT_ACT[8:0] - counter[8:0]
- */
+ /* Enable stats */
rvu_write64(rvu, blkaddr,
NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank),
- ((cntr >> 9) << 12) | BIT_ULL(9) | cntr);
+ ((cntr >> 9) << 12) | hw->npc_stat_ena | cntr);
}
static void npc_unmap_mcam_entry_and_cntr(struct rvu *rvu,
@@ -2166,7 +2155,7 @@ static void npc_unmap_mcam_entry_and_cntr(struct rvu *rvu,
int blkaddr, u16 entry, u16 cntr)
{
u16 index = entry & (mcam->banksize - 1);
- u16 bank = npc_get_bank(mcam, entry);
+ u32 bank = npc_get_bank(mcam, entry);
/* Remove mapping and reduce counter's refcnt */
mcam->entry2cntr_map[entry] = NPC_MCAM_INVALID_MAP;
@@ -2414,6 +2403,17 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc,
goto alloc;
}
+ /* For a VF base MCAM match rule is set by its PF. And all the
+ * further MCAM rules installed by VF on its own are
+ * concatenated with the base rule set by its PF. Hence PF entries
+ * should be at lower priority compared to VF entries. Otherwise
+ * base rule is hit always and rules installed by VF will be of
+ * no use. Hence if the request is from PF and NOT a priority
+ * allocation request then allocate low priority entries.
+ */
+ if (!(pcifunc & RVU_PFVF_FUNC_MASK))
+ goto lprio_alloc;
+
/* Find out the search range for non-priority allocation request
*
* Get MCAM free entry count in middle zone.
@@ -2439,6 +2439,7 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc,
/* Not enough free entries, search all entries in reverse,
* so that low priority ones will get used up.
*/
+lprio_alloc:
reverse = true;
start = 0;
end = mcam->bmap_entries;
@@ -2673,7 +2674,6 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu,
struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
struct npc_mcam *mcam = &rvu->hw->mcam;
u16 pcifunc = req->hdr.pcifunc;
- u16 channel, chan_mask;
int blkaddr, rc;
u8 nix_intf;
@@ -2681,10 +2681,6 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu,
if (blkaddr < 0)
return NPC_MCAM_INVALID_REQ;
- chan_mask = req->entry_data.kw_mask[0] & NPC_KEX_CHAN_MASK;
- channel = req->entry_data.kw[0] & NPC_KEX_CHAN_MASK;
- channel &= chan_mask;
-
mutex_lock(&mcam->lock);
rc = npc_mcam_verify_entry(mcam, pcifunc, req->entry);
if (rc)
@@ -2707,12 +2703,6 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu,
nix_intf = pfvf->nix_rx_intf;
if (!is_pffunc_af(pcifunc) &&
- npc_mcam_verify_channel(rvu, pcifunc, req->intf, channel)) {
- rc = NPC_MCAM_INVALID_REQ;
- goto exit;
- }
-
- if (!is_pffunc_af(pcifunc) &&
npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, pcifunc)) {
rc = NPC_MCAM_INVALID_REQ;
goto exit;
@@ -2788,8 +2778,8 @@ int rvu_mbox_handler_npc_mcam_shift_entry(struct rvu *rvu,
struct npc_mcam *mcam = &rvu->hw->mcam;
u16 pcifunc = req->hdr.pcifunc;
u16 old_entry, new_entry;
+ int blkaddr, rc = 0;
u16 index, cntr;
- int blkaddr, rc;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
if (blkaddr < 0)
@@ -2990,10 +2980,11 @@ int rvu_mbox_handler_npc_mcam_unmap_counter(struct rvu *rvu,
index = find_next_bit(mcam->bmap, mcam->bmap_entries, entry);
if (index >= mcam->bmap_entries)
break;
+ entry = index + 1;
+
if (mcam->entry2cntr_map[index] != req->cntr)
continue;
- entry = index + 1;
npc_unmap_mcam_entry_and_cntr(rvu, mcam, blkaddr,
index, req->cntr);
}
@@ -3058,7 +3049,6 @@ int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu,
struct npc_mcam *mcam = &rvu->hw->mcam;
u16 entry = NPC_MCAM_ENTRY_INVALID;
u16 cntr = NPC_MCAM_ENTRY_INVALID;
- u16 channel, chan_mask;
int blkaddr, rc;
u8 nix_intf;
@@ -3069,13 +3059,6 @@ int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu,
if (!is_npc_interface_valid(rvu, req->intf))
return NPC_MCAM_INVALID_REQ;
- chan_mask = req->entry_data.kw_mask[0] & NPC_KEX_CHAN_MASK;
- channel = req->entry_data.kw[0] & NPC_KEX_CHAN_MASK;
- channel &= chan_mask;
-
- if (npc_mcam_verify_channel(rvu, req->hdr.pcifunc, req->intf, channel))
- return NPC_MCAM_INVALID_REQ;
-
if (npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf,
req->hdr.pcifunc))
return NPC_MCAM_INVALID_REQ;
@@ -3252,7 +3235,7 @@ int rvu_mbox_handler_npc_mcam_entry_stats(struct rvu *rvu,
/* read MCAM entry STAT_ACT register */
regval = rvu_read64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank));
- if (!(regval & BIT_ULL(9))) {
+ if (!(regval & rvu->hw->npc_stat_ena)) {
rsp->stat_ena = 0;
mutex_unlock(&mcam->lock);
return 0;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index 5c01cf4a9c5b..51ddc7b81d0b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
* Copyright (C) 2020 Marvell.
*/
@@ -20,6 +20,8 @@ static const char * const npc_flow_names[] = {
[NPC_DMAC] = "dmac",
[NPC_SMAC] = "smac",
[NPC_ETYPE] = "ether type",
+ [NPC_VLAN_ETYPE_CTAG] = "vlan ether type ctag",
+ [NPC_VLAN_ETYPE_STAG] = "vlan ether type stag",
[NPC_OUTER_VID] = "outer vlan id",
[NPC_TOS] = "tos",
[NPC_SIP_IPV4] = "ipv4 source ip",
@@ -492,6 +494,11 @@ static void npc_set_features(struct rvu *rvu, int blkaddr, u8 intf)
if (*features & BIT_ULL(NPC_OUTER_VID))
if (!npc_check_field(rvu, blkaddr, NPC_LB, intf))
*features &= ~BIT_ULL(NPC_OUTER_VID);
+
+ /* for vlan ethertypes corresponding layer type should be in the key */
+ if (npc_check_field(rvu, blkaddr, NPC_LB, intf))
+ *features |= BIT_ULL(NPC_VLAN_ETYPE_CTAG) |
+ BIT_ULL(NPC_VLAN_ETYPE_STAG);
}
/* Scan key extraction profile and record how fields of our interest
@@ -600,7 +607,7 @@ static int npc_check_unsupported_flows(struct rvu *rvu, u64 features, u8 intf)
dev_info(rvu->dev, "Unsupported flow(s):\n");
for_each_set_bit(bit, (unsigned long *)&unsupported, 64)
dev_info(rvu->dev, "%s ", npc_get_field_name(bit));
- return NIX_AF_ERR_NPC_KEY_NOT_SUPP;
+ return -EOPNOTSUPP;
}
return 0;
@@ -747,6 +754,28 @@ static void npc_update_ipv6_flow(struct rvu *rvu, struct mcam_entry *entry,
}
}
+static void npc_update_vlan_features(struct rvu *rvu, struct mcam_entry *entry,
+ u64 features, u8 intf)
+{
+ bool ctag = !!(features & BIT_ULL(NPC_VLAN_ETYPE_CTAG));
+ bool stag = !!(features & BIT_ULL(NPC_VLAN_ETYPE_STAG));
+ bool vid = !!(features & BIT_ULL(NPC_OUTER_VID));
+
+ /* If only VLAN id is given then always match outer VLAN id */
+ if (vid && !ctag && !stag) {
+ npc_update_entry(rvu, NPC_LB, entry,
+ NPC_LT_LB_STAG_QINQ | NPC_LT_LB_CTAG, 0,
+ NPC_LT_LB_STAG_QINQ & NPC_LT_LB_CTAG, 0, intf);
+ return;
+ }
+ if (ctag)
+ npc_update_entry(rvu, NPC_LB, entry, NPC_LT_LB_CTAG, 0,
+ ~0ULL, 0, intf);
+ if (stag)
+ npc_update_entry(rvu, NPC_LB, entry, NPC_LT_LB_STAG_QINQ, 0,
+ ~0ULL, 0, intf);
+}
+
static void npc_update_flow(struct rvu *rvu, struct mcam_entry *entry,
u64 features, struct flow_msg *pkt,
struct flow_msg *mask,
@@ -779,11 +808,6 @@ static void npc_update_flow(struct rvu *rvu, struct mcam_entry *entry,
npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_ICMP6,
0, ~0ULL, 0, intf);
- if (features & BIT_ULL(NPC_OUTER_VID))
- npc_update_entry(rvu, NPC_LB, entry,
- NPC_LT_LB_STAG_QINQ | NPC_LT_LB_CTAG, 0,
- NPC_LT_LB_STAG_QINQ & NPC_LT_LB_CTAG, 0, intf);
-
/* For AH, LTYPE should be present in entry */
if (features & BIT_ULL(NPC_IPPROTO_AH))
npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_AH,
@@ -829,6 +853,7 @@ do { \
ntohs(mask->vlan_tci), 0);
npc_update_ipv6_flow(rvu, entry, features, pkt, mask, output, intf);
+ npc_update_vlan_features(rvu, entry, features, intf);
}
static struct rvu_npc_mcam_rule *rvu_mcam_find_rule(struct npc_mcam *mcam,
@@ -995,13 +1020,11 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target,
struct npc_mcam *mcam = &rvu->hw->mcam;
struct rvu_npc_mcam_rule dummy = { 0 };
struct rvu_npc_mcam_rule *rule;
- bool new = false, msg_from_vf;
u16 owner = req->hdr.pcifunc;
struct msg_rsp write_rsp;
struct mcam_entry *entry;
int entry_index, err;
-
- msg_from_vf = !!(owner & RVU_PFVF_FUNC_MASK);
+ bool new = false;
installed_features = req->features;
features = req->features;
@@ -1027,7 +1050,7 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target,
}
/* update mcam entry with default unicast rule attributes */
- if (def_ucast_rule && (msg_from_vf || (req->default_rule && req->append))) {
+ if (def_ucast_rule && (req->default_rule && req->append)) {
missing_features = (def_ucast_rule->features ^ features) &
def_ucast_rule->features;
if (missing_features)
@@ -1130,6 +1153,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
struct npc_install_flow_rsp *rsp)
{
bool from_vf = !!(req->hdr.pcifunc & RVU_PFVF_FUNC_MASK);
+ struct rvu_switch *rswitch = &rvu->rswitch;
int blkaddr, nixlf, err;
struct rvu_pfvf *pfvf;
bool pf_set_vfs_mac = false;
@@ -1139,14 +1163,14 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
if (blkaddr < 0) {
dev_err(rvu->dev, "%s: NPC block not implemented\n", __func__);
- return -ENODEV;
+ return NPC_MCAM_INVALID_REQ;
}
if (!is_npc_interface_valid(rvu, req->intf))
- return -EINVAL;
+ return NPC_FLOW_INTF_INVALID;
if (from_vf && req->default_rule)
- return NPC_MCAM_PERM_DENIED;
+ return NPC_FLOW_VF_PERM_DENIED;
/* Each PF/VF info is maintained in struct rvu_pfvf.
* rvu_pfvf for the target PF/VF needs to be retrieved
@@ -1172,12 +1196,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
err = npc_check_unsupported_flows(rvu, req->features, req->intf);
if (err)
- return err;
-
- /* Skip channel validation if AF is installing */
- if (!is_pffunc_af(req->hdr.pcifunc) &&
- npc_mcam_verify_channel(rvu, target, req->intf, req->channel))
- return -EINVAL;
+ return NPC_FLOW_NOT_SUPPORTED;
pfvf = rvu_get_pfvf(rvu, target);
@@ -1195,7 +1214,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
/* Proceed if NIXLF is attached or not for TX rules */
err = nix_get_nixlf(rvu, target, &nixlf, NULL);
if (err && is_npc_intf_rx(req->intf) && !pf_set_vfs_mac)
- return -EINVAL;
+ return NPC_FLOW_NO_NIXLF;
/* don't enable rule when nixlf not attached or initialized */
if (!(is_nixlf_attached(rvu, target) &&
@@ -1211,7 +1230,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
/* Do not allow requests from uninitialized VFs */
if (from_vf && !enable)
- return -EINVAL;
+ return NPC_FLOW_VF_NOT_INIT;
/* PF sets VF mac & VF NIXLF is not attached, update the mac addr */
if (pf_set_vfs_mac && !enable) {
@@ -1221,15 +1240,12 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
return 0;
}
- /* If message is from VF then its flow should not overlap with
- * reserved unicast flow.
- */
- if (from_vf && pfvf->def_ucast_rule && is_npc_intf_rx(req->intf) &&
- pfvf->def_ucast_rule->features & req->features)
- return -EINVAL;
+ mutex_lock(&rswitch->switch_lock);
+ err = npc_install_flow(rvu, blkaddr, target, nixlf, pfvf,
+ req, rsp, enable, pf_set_vfs_mac);
+ mutex_unlock(&rswitch->switch_lock);
- return npc_install_flow(rvu, blkaddr, target, nixlf, pfvf, req, rsp,
- enable, pf_set_vfs_mac);
+ return err;
}
static int npc_delete_flow(struct rvu *rvu, struct rvu_npc_mcam_rule *rule,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c
index e266f0c49559..b3150f053291 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c
@@ -1,11 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/module.h>
@@ -33,8 +30,8 @@ static struct hw_reg_map txsch_reg_map[NIX_TXSCH_LVL_CNT] = {
{NIX_TXSCH_LVL_SMQ, 2, 0xFFFF, {{0x0700, 0x0708}, {0x1400, 0x14C8} } },
{NIX_TXSCH_LVL_TL4, 3, 0xFFFF, {{0x0B00, 0x0B08}, {0x0B10, 0x0B18},
{0x1200, 0x12E0} } },
- {NIX_TXSCH_LVL_TL3, 3, 0xFFFF, {{0x1000, 0x10E0}, {0x1600, 0x1608},
- {0x1610, 0x1618} } },
+ {NIX_TXSCH_LVL_TL3, 4, 0xFFFF, {{0x1000, 0x10E0}, {0x1600, 0x1608},
+ {0x1610, 0x1618}, {0x1700, 0x17B0} } },
{NIX_TXSCH_LVL_TL2, 2, 0xFFFF, {{0x0E00, 0x0EE0}, {0x1700, 0x17B0} } },
{NIX_TXSCH_LVL_TL1, 1, 0xFFFF, {{0x0C00, 0x0D98} } },
};
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
index 8b01ef6e2c99..21f1ed4e222f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
@@ -1,11 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef RVU_REG_H
@@ -53,7 +50,7 @@
#define RVU_AF_SMMU_TXN_REQ (0x6008)
#define RVU_AF_SMMU_ADDR_RSP_STS (0x6010)
#define RVU_AF_SMMU_ADDR_TLN (0x6018)
-#define RVU_AF_SMMU_TLN_FLIT1 (0x6030)
+#define RVU_AF_SMMU_TLN_FLIT0 (0x6020)
/* Admin function's privileged PF/VF registers */
#define RVU_PRIV_CONST (0x8000000)
@@ -156,6 +153,7 @@
#define NPA_AF_AQ_DONE_INT_W1S (0x0688)
#define NPA_AF_AQ_DONE_ENA_W1S (0x0690)
#define NPA_AF_AQ_DONE_ENA_W1C (0x0698)
+#define NPA_AF_BATCH_CTL (0x06a0)
#define NPA_AF_LFX_AURAS_CFG(a) (0x4000 | (a) << 18)
#define NPA_AF_LFX_LOC_AURAS_BASE(a) (0x4010 | (a) << 18)
#define NPA_AF_LFX_QINTS_CFG(a) (0x4100 | (a) << 18)
@@ -265,10 +263,13 @@
#define NIX_AF_SDP_TX_FIFO_STATUS (0x0640)
#define NIX_AF_TX_NPC_CAPTURE_CONFIG (0x0660)
#define NIX_AF_TX_NPC_CAPTURE_INFO (0x0670)
+#define NIX_AF_SEB_CFG (0x05F0)
#define NIX_AF_DEBUG_NPC_RESP_DATAX(a) (0x680 | (a) << 3)
#define NIX_AF_SMQX_CFG(a) (0x700 | (a) << 16)
#define NIX_AF_SQM_DBG_CTL_STATUS (0x750)
+#define NIX_AF_DWRR_SDP_MTU (0x790)
+#define NIX_AF_DWRR_RPM_MTU (0x7A0)
#define NIX_AF_PSE_CHANNEL_LEVEL (0x800)
#define NIX_AF_PSE_SHAPER_CFG (0x810)
#define NIX_AF_TX_EXPR_CREDIT (0x830)
@@ -701,5 +702,8 @@
#define APR_AF_LMT_CFG (0x000ull)
#define APR_AF_LMT_MAP_BASE (0x008ull)
#define APR_AF_LMT_CTL (0x010ull)
+#define APR_LMT_MAP_ENT_DIS_SCH_CMP_SHIFT 23
+#define APR_LMT_MAP_ENT_SCH_ENA_SHIFT 22
+#define APR_LMT_MAP_ENT_DIS_LINE_PREF_SHIFT 21
#endif /* RVU_REG_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c
new file mode 100644
index 000000000000..b04fb226f708
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell RVU Admin Function driver
+ *
+ * Copyright (C) 2021 Marvell.
+ *
+ */
+
+#include <linux/pci.h>
+#include "rvu.h"
+
+/* SDP PF device id */
+#define PCI_DEVID_OTX2_SDP_PF 0xA0F6
+
+/* Maximum SDP blocks in a chip */
+#define MAX_SDP 2
+
+/* SDP PF number */
+static int sdp_pf_num[MAX_SDP] = {-1, -1};
+
+bool is_sdp_pfvf(u16 pcifunc)
+{
+ u16 pf = rvu_get_pf(pcifunc);
+ u32 found = 0, i = 0;
+
+ while (i < MAX_SDP) {
+ if (pf == sdp_pf_num[i])
+ found = 1;
+ i++;
+ }
+
+ if (!found)
+ return false;
+
+ return true;
+}
+
+bool is_sdp_pf(u16 pcifunc)
+{
+ return (is_sdp_pfvf(pcifunc) &&
+ !(pcifunc & RVU_PFVF_FUNC_MASK));
+}
+
+bool is_sdp_vf(u16 pcifunc)
+{
+ return (is_sdp_pfvf(pcifunc) &&
+ !!(pcifunc & RVU_PFVF_FUNC_MASK));
+}
+
+int rvu_sdp_init(struct rvu *rvu)
+{
+ struct pci_dev *pdev = NULL;
+ struct rvu_pfvf *pfvf;
+ u32 i = 0;
+
+ while ((i < MAX_SDP) && (pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
+ PCI_DEVID_OTX2_SDP_PF,
+ pdev)) != NULL) {
+ /* The RVU PF number is one less than bus number */
+ sdp_pf_num[i] = pdev->bus->number - 1;
+ pfvf = &rvu->pf[sdp_pf_num[i]];
+
+ pfvf->sdp_info = devm_kzalloc(rvu->dev,
+ sizeof(struct sdp_node_info),
+ GFP_KERNEL);
+ if (!pfvf->sdp_info)
+ return -ENOMEM;
+
+ dev_info(rvu->dev, "SDP PF number:%d\n", sdp_pf_num[i]);
+
+ put_device(&pdev->dev);
+ i++;
+ }
+
+ return 0;
+}
+
+int
+rvu_mbox_handler_set_sdp_chan_info(struct rvu *rvu,
+ struct sdp_chan_info_msg *req,
+ struct msg_rsp *rsp)
+{
+ struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
+
+ memcpy(pfvf->sdp_info, &req->info, sizeof(struct sdp_node_info));
+ dev_info(rvu->dev, "AF: SDP%d max_vfs %d num_pf_rings %d pf_srn %d\n",
+ req->info.node_id, req->info.max_vfs, req->info.num_pf_rings,
+ req->info.pf_srn);
+ return 0;
+}
+
+int
+rvu_mbox_handler_get_sdp_chan_info(struct rvu *rvu, struct msg_req *req,
+ struct sdp_get_chan_info_msg *rsp)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ int blkaddr;
+
+ if (!hw->cap.programmable_chans) {
+ rsp->chan_base = NIX_CHAN_SDP_CH_START;
+ rsp->num_chan = NIX_CHAN_SDP_NUM_CHANS;
+ } else {
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+ rsp->chan_base = hw->sdp_chan_base;
+ rsp->num_chan = rvu_read64(rvu, blkaddr, NIX_AF_CONST1) & 0xFFFUL;
+ }
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
index 5bbe6727d11d..77ac96693f04 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
@@ -1,11 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef RVU_STRUCT_H
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c
index 820adf390b8e..3392487f6b47 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
*
* Copyright (C) 2021 Marvell.
+ *
*/
#include <linux/bitfield.h>
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c
index 56f90cf9c4c0..775fd4c35794 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver tracepoints
+/* Marvell RVU Admin Function driver
+ *
+ * Copyright (C) 2020 Marvell.
*
- * Copyright (C) 2020 Marvell International Ltd.
*/
#define CREATE_TRACE_POINTS
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h
index 64aa7d350df1..28984d0e848a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h
@@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Admin Function driver tracepoints
+/* Marvell RVU Admin Function driver
+ *
+ * Copyright (C) 2020 Marvell.
*
- * Copyright (C) 2020 Marvell International Ltd.
*/
#undef TRACE_SYSTEM
@@ -14,6 +15,8 @@
#include <linux/tracepoint.h>
#include <linux/pci.h>
+#include "mbox.h"
+
TRACE_EVENT(otx2_msg_alloc,
TP_PROTO(const struct pci_dev *pdev, u16 id, u64 size),
TP_ARGS(pdev, id, size),
@@ -25,8 +28,8 @@ TRACE_EVENT(otx2_msg_alloc,
__entry->id = id;
__entry->size = size;
),
- TP_printk("[%s] msg:(0x%x) size:%lld\n", __get_str(dev),
- __entry->id, __entry->size)
+ TP_printk("[%s] msg:(%s) size:%lld\n", __get_str(dev),
+ otx2_mbox_id2name(__entry->id), __entry->size)
);
TRACE_EVENT(otx2_msg_send,
@@ -88,8 +91,8 @@ TRACE_EVENT(otx2_msg_process,
__entry->id = id;
__entry->err = err;
),
- TP_printk("[%s] msg:(0x%x) error:%d\n", __get_str(dev),
- __entry->id, __entry->err)
+ TP_printk("[%s] msg:(%s) error:%d\n", __get_str(dev),
+ otx2_mbox_id2name(__entry->id), __entry->err)
);
#endif /* __RVU_TRACE_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
index 3254b02205ca..b92c267628b8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
@@ -1,13 +1,14 @@
# SPDX-License-Identifier: GPL-2.0
#
-# Makefile for Marvell's OcteonTX2 ethernet device drivers
+# Makefile for Marvell's RVU Ethernet device drivers
#
obj-$(CONFIG_OCTEONTX2_PF) += rvu_nicpf.o
obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o
rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
- otx2_ptp.o otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o
-rvu_nicvf-y := otx2_vf.o
+ otx2_ptp.o otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \
+ otx2_devlink.o
+rvu_nicvf-y := otx2_vf.o otx2_devlink.o
ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
index 184de9466286..3cc76f14d2fd 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Physcial Function ethernet driver
+/* Marvell RVU Ethernet driver
+ *
+ * Copyright (C) 2021 Marvell.
*
- * Copyright (C) 2020 Marvell.
*/
#include "cn10k.h"
@@ -92,8 +93,7 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
aq->sq.ena = 1;
/* Only one SMQ is allocated, map all SQ's to that SMQ */
aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
- /* FIXME: set based on NIX_AF_DWRR_RPM_MTU*/
- aq->sq.smq_rr_weight = pfvf->netdev->mtu;
+ aq->sq.smq_rr_weight = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
aq->sq.default_chan = pfvf->hw.tx_chan_base;
aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
aq->sq.sqb_aura = sqb_aura;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h
index 1a1ae334477d..8ae96815865e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h
@@ -1,7 +1,8 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Ethernet driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell RVU Ethernet driver
+ *
+ * Copyright (C) 2021 Marvell.
*
- * Copyright (C) 2020 Marvell.
*/
#ifndef CN10K_H
@@ -9,6 +10,20 @@
#include "otx2_common.h"
+static inline int mtu_to_dwrr_weight(struct otx2_nic *pfvf, int mtu)
+{
+ u32 weight;
+
+ /* On OTx2, since AF returns DWRR_MTU as '1', this logic
+ * will work on those silicons as well.
+ */
+ weight = mtu / pfvf->hw.dwrr_mtu;
+ if (mtu % pfvf->hw.dwrr_mtu)
+ weight += 1;
+
+ return weight;
+}
+
void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq);
void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx);
int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 70fcc1fd962f..ce25c2744435 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -1,11 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Ethernet driver
+/* Marvell RVU Ethernet driver
*
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/interrupt.h>
@@ -208,7 +205,8 @@ int otx2_set_mac_address(struct net_device *netdev, void *p)
if (!otx2_hw_set_mac_addr(pfvf, addr->sa_data)) {
memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
/* update dmac field in vlan offload rule */
- if (pfvf->flags & OTX2_FLAG_RX_VLAN_SUPPORT)
+ if (netif_running(netdev) &&
+ pfvf->flags & OTX2_FLAG_RX_VLAN_SUPPORT)
otx2_install_rxvlan_offload_flow(pfvf);
/* update dmac address in ntuple and DMAC filter list */
if (pfvf->flags & OTX2_FLAG_DMACFLTR_SUPPORT)
@@ -268,6 +266,7 @@ unlock:
int otx2_set_flowkey_cfg(struct otx2_nic *pfvf)
{
struct otx2_rss_info *rss = &pfvf->hw.rss_info;
+ struct nix_rss_flowkey_cfg_rsp *rsp;
struct nix_rss_flowkey_cfg *req;
int err;
@@ -282,6 +281,18 @@ int otx2_set_flowkey_cfg(struct otx2_nic *pfvf)
req->group = DEFAULT_RSS_CONTEXT_GROUP;
err = otx2_sync_mbox_msg(&pfvf->mbox);
+ if (err)
+ goto fail;
+
+ rsp = (struct nix_rss_flowkey_cfg_rsp *)
+ otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr);
+ if (IS_ERR(rsp)) {
+ err = PTR_ERR(rsp);
+ goto fail;
+ }
+
+ pfvf->hw.flowkey_alg_idx = rsp->alg_idx;
+fail:
mutex_unlock(&pfvf->mbox.lock);
return err;
}
@@ -572,30 +583,14 @@ void otx2_get_mac_from_af(struct net_device *netdev)
}
EXPORT_SYMBOL(otx2_get_mac_from_af);
-static int otx2_get_link(struct otx2_nic *pfvf)
-{
- int link = 0;
- u16 map;
-
- /* cgx lmac link */
- if (pfvf->hw.tx_chan_base >= CGX_CHAN_BASE) {
- map = pfvf->hw.tx_chan_base & 0x7FF;
- link = 4 * ((map >> 8) & 0xF) + ((map >> 4) & 0xF);
- }
- /* LBK channel */
- if (pfvf->hw.tx_chan_base < SDP_CHAN_BASE) {
- map = pfvf->hw.tx_chan_base & 0x7FF;
- link = pfvf->hw.cgx_links | ((map >> 8) & 0xF);
- }
-
- return link;
-}
-
int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
{
struct otx2_hw *hw = &pfvf->hw;
struct nix_txschq_config *req;
u64 schq, parent;
+ u64 dwrr_val;
+
+ dwrr_val = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
req = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox);
if (!req)
@@ -621,21 +616,21 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
req->num_regs++;
/* Set DWRR quantum */
req->reg[2] = NIX_AF_MDQX_SCHEDULE(schq);
- req->regval[2] = DFLT_RR_QTM;
+ req->regval[2] = dwrr_val;
} else if (lvl == NIX_TXSCH_LVL_TL4) {
parent = hw->txschq_list[NIX_TXSCH_LVL_TL3][0];
req->reg[0] = NIX_AF_TL4X_PARENT(schq);
req->regval[0] = parent << 16;
req->num_regs++;
req->reg[1] = NIX_AF_TL4X_SCHEDULE(schq);
- req->regval[1] = DFLT_RR_QTM;
+ req->regval[1] = dwrr_val;
} else if (lvl == NIX_TXSCH_LVL_TL3) {
parent = hw->txschq_list[NIX_TXSCH_LVL_TL2][0];
req->reg[0] = NIX_AF_TL3X_PARENT(schq);
req->regval[0] = parent << 16;
req->num_regs++;
req->reg[1] = NIX_AF_TL3X_SCHEDULE(schq);
- req->regval[1] = DFLT_RR_QTM;
+ req->regval[1] = dwrr_val;
} else if (lvl == NIX_TXSCH_LVL_TL2) {
parent = hw->txschq_list[NIX_TXSCH_LVL_TL1][0];
req->reg[0] = NIX_AF_TL2X_PARENT(schq);
@@ -643,11 +638,10 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
req->num_regs++;
req->reg[1] = NIX_AF_TL2X_SCHEDULE(schq);
- req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | DFLT_RR_QTM;
+ req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | dwrr_val;
req->num_regs++;
- req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq,
- otx2_get_link(pfvf));
+ req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq, hw->tx_link);
/* Enable this queue and backpressure */
req->regval[2] = BIT_ULL(13) | BIT_ULL(12);
@@ -656,7 +650,10 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
* For VF this is always ignored.
*/
- /* Set DWRR quantum */
+ /* On CN10K, if RR_WEIGHT is greater than 16384, HW will
+ * clip it to 16384, so configuring a 24bit max value
+ * will work on both OTx2 and CN10K.
+ */
req->reg[0] = NIX_AF_TL1X_SCHEDULE(schq);
req->regval[0] = TXSCH_TL1_DFLT_RR_QTM;
@@ -803,7 +800,7 @@ int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
aq->sq.ena = 1;
/* Only one SMQ is allocated, map all SQ's to that SMQ */
aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
- aq->sq.smq_rr_quantum = DFLT_RR_QTM;
+ aq->sq.smq_rr_quantum = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
aq->sq.default_chan = pfvf->hw.tx_chan_base;
aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
aq->sq.sqb_aura = sqb_aura;
@@ -1190,7 +1187,22 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
/* Enable backpressure for RQ aura */
if (aura_id < pfvf->hw.rqpool_cnt && !is_otx2_lbkvf(pfvf->pdev)) {
aq->aura.bp_ena = 0;
+ /* If NIX1 LF is attached then specify NIX1_RX.
+ *
+ * Below NPA_AURA_S[BP_ENA] is set according to the
+ * NPA_BPINTF_E enumeration given as:
+ * 0x0 + a*0x1 where 'a' is 0 for NIX0_RX and 1 for NIX1_RX so
+ * NIX0_RX is 0x0 + 0*0x1 = 0
+ * NIX1_RX is 0x0 + 1*0x1 = 1
+ * But in HRM it is given that
+ * "NPA_AURA_S[BP_ENA](w1[33:32]) - Enable aura backpressure to
+ * NIX-RX based on [BP] level. One bit per NIX-RX; index
+ * enumerated by NPA_BPINTF_E."
+ */
+ if (pfvf->nix_blkaddr == BLKADDR_NIX1)
+ aq->aura.bp_ena = 1;
aq->aura.nix0_bpid = pfvf->bpid[0];
+
/* Set backpressure level for RQ's Aura */
aq->aura.bp = RQ_BP_LVL_AURA;
}
@@ -1577,6 +1589,7 @@ void mbox_handler_nix_lf_alloc(struct otx2_nic *pfvf,
pfvf->hw.lso_tsov6_idx = rsp->lso_tsov6_idx;
pfvf->hw.cgx_links = rsp->cgx_links;
pfvf->hw.lbk_links = rsp->lbk_links;
+ pfvf->hw.tx_link = rsp->tx_link;
}
EXPORT_SYMBOL(mbox_handler_nix_lf_alloc);
@@ -1668,6 +1681,11 @@ u16 otx2_get_max_mtu(struct otx2_nic *pfvf)
* SMQ errors
*/
max_mtu = rsp->max_mtu - 8 - OTX2_ETH_HLEN;
+
+ /* Also save DWRR MTU, needed for DWRR weight calculation */
+ pfvf->hw.dwrr_mtu = rsp->rpm_dwrr_mtu;
+ if (!pfvf->hw.dwrr_mtu)
+ pfvf->hw.dwrr_mtu = 1;
}
out:
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 8fd58cd07f50..48227cec06ee 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -1,11 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Ethernet driver
+/* Marvell RVU Ethernet driver
*
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef OTX2_COMMON_H
@@ -19,11 +16,13 @@
#include <linux/timecounter.h>
#include <linux/soc/marvell/octeontx2/asm.h>
#include <net/pkt_cls.h>
+#include <net/devlink.h>
#include <mbox.h>
#include <npc.h>
#include "otx2_reg.h"
#include "otx2_txrx.h"
+#include "otx2_devlink.h"
#include <rvu_trace.h>
/* PCI device IDs */
@@ -181,6 +180,7 @@ struct otx2_hw {
/* NIX */
u16 txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
u16 matchall_ipolicer;
+ u32 dwrr_mtu;
/* HW settings, coalescing etc */
u16 rx_chan_base;
@@ -196,6 +196,9 @@ struct otx2_hw {
u8 lso_udpv4_idx;
u8 lso_udpv6_idx;
+ /* RSS */
+ u8 flowkey_alg_idx;
+
/* MSI-X */
u8 cint_cnt; /* CQ interrupt count */
u16 npa_msixoff; /* Offset of NPA vectors */
@@ -212,6 +215,7 @@ struct otx2_hw {
u64 cgx_fec_uncorr_blks;
u8 cgx_links; /* No. of CGX links present in HW */
u8 lbk_links; /* No. of LBK links present in HW */
+ u8 tx_link; /* Transmit channel link number */
#define HW_TSO 0
#define CN10K_MBOX 1
#define CN10K_LMTST 2
@@ -267,7 +271,6 @@ struct otx2_mac_table {
};
struct otx2_flow_config {
- u16 entry[NPC_MAX_NONCONTIG_ENTRIES];
u16 *flow_ent;
u16 *def_ent;
u16 nr_flows;
@@ -278,16 +281,13 @@ struct otx2_flow_config {
#define OTX2_MCAM_COUNT (OTX2_DEFAULT_FLOWCOUNT + \
OTX2_MAX_UNICAST_FLOWS + \
OTX2_MAX_VLAN_FLOWS)
- u16 ntuple_offset;
u16 unicast_offset;
u16 rx_vlan_offset;
u16 vf_vlan_offset;
#define OTX2_PER_VF_VLAN_FLOWS 2 /* Rx + Tx per VF */
#define OTX2_VF_VLAN_RX_INDEX 0
#define OTX2_VF_VLAN_TX_INDEX 1
- u16 tc_flower_offset;
- u16 ntuple_max_flows;
- u16 tc_max_flows;
+ u16 max_flows;
u8 dmacflt_max_flows;
u8 *bmap_to_dmacindex;
unsigned long dmacflt_bmap;
@@ -298,8 +298,7 @@ struct otx2_tc_info {
/* hash table to store TC offloaded flows */
struct rhashtable flow_table;
struct rhashtable_params flow_ht_params;
- DECLARE_BITMAP(tc_entries_bitmap, OTX2_MAX_TC_FLOWS);
- unsigned long num_entries;
+ unsigned long *tc_entries_bitmap;
};
struct dev_hw_ops {
@@ -352,6 +351,11 @@ struct otx2_nic {
struct otx2_vf_config *vf_configs;
struct cgx_link_user_info linfo;
+ /* NPC MCAM */
+ struct otx2_flow_config *flow_cfg;
+ struct otx2_mac_table *mac_table;
+ struct otx2_tc_info tc_info;
+
u64 reset_count;
struct work_struct reset_task;
struct workqueue_struct *flr_wq;
@@ -359,7 +363,6 @@ struct otx2_nic {
struct refill_work *refill_wrk;
struct workqueue_struct *otx2_wq;
struct work_struct rx_mode_work;
- struct otx2_mac_table *mac_table;
/* Ethtool stuff */
u32 msg_enable;
@@ -375,9 +378,10 @@ struct otx2_nic {
struct otx2_ptp *ptp;
struct hwtstamp_config tstamp;
- struct otx2_flow_config *flow_cfg;
- struct otx2_tc_info tc_info;
unsigned long rq_bmap;
+
+ /* Devlink */
+ struct otx2_devlink *dl;
};
static inline bool is_otx2_lbkvf(struct pci_dev *pdev)
@@ -709,6 +713,11 @@ MBOX_UP_CGX_MESSAGES
#define RVU_PFVF_FUNC_SHIFT 0
#define RVU_PFVF_FUNC_MASK 0x3FF
+static inline bool is_otx2_vf(u16 pcifunc)
+{
+ return !!(pcifunc & RVU_PFVF_FUNC_MASK);
+}
+
static inline int rvu_get_pf(u16 pcifunc)
{
return (pcifunc >> RVU_PFVF_PF_SHIFT) & RVU_PFVF_PF_MASK;
@@ -814,7 +823,8 @@ int otx2_set_real_num_queues(struct net_device *netdev,
int tx_queues, int rx_queues);
/* MCAM filter related APIs */
int otx2_mcam_flow_init(struct otx2_nic *pf);
-int otx2_alloc_mcam_entries(struct otx2_nic *pfvf);
+int otx2vf_mcam_flow_init(struct otx2_nic *pfvf);
+int otx2_alloc_mcam_entries(struct otx2_nic *pfvf, u16 count);
void otx2_mcam_flow_del(struct otx2_nic *pf);
int otx2_destroy_ntuple_flows(struct otx2_nic *pf);
int otx2_destroy_mcam_flows(struct otx2_nic *pfvf);
@@ -825,8 +835,7 @@ int otx2_get_all_flows(struct otx2_nic *pfvf,
int otx2_add_flow(struct otx2_nic *pfvf,
struct ethtool_rxnfc *nfc);
int otx2_remove_flow(struct otx2_nic *pfvf, u32 location);
-int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp,
- struct npc_install_flow_req *req);
+int otx2_get_maxflows(struct otx2_flow_config *flow_cfg);
void otx2_rss_ctx_flow_del(struct otx2_nic *pfvf, int ctx_id);
int otx2_del_macfilter(struct net_device *netdev, const u8 *mac);
int otx2_add_macfilter(struct net_device *netdev, const u8 *mac);
@@ -838,6 +847,7 @@ int otx2_init_tc(struct otx2_nic *nic);
void otx2_shutdown_tc(struct otx2_nic *nic);
int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type,
void *type_data);
+int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic);
/* CGX/RPM DMAC filters support */
int otx2_dmacflt_get_max_cnt(struct otx2_nic *pf);
int otx2_dmacflt_add(struct otx2_nic *pf, const u8 *mac, u8 bit_pos);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c
new file mode 100644
index 000000000000..7ac3ef2fa06a
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell RVU PF/VF Netdev Devlink
+ *
+ * Copyright (C) 2021 Marvell.
+ */
+
+#include "otx2_common.h"
+
+/* Devlink Params APIs */
+static int otx2_dl_mcam_count_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct otx2_devlink *otx2_dl = devlink_priv(devlink);
+ struct otx2_nic *pfvf = otx2_dl->pfvf;
+ struct otx2_flow_config *flow_cfg;
+
+ if (!pfvf->flow_cfg) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "pfvf->flow_cfg not initialized");
+ return -EINVAL;
+ }
+
+ flow_cfg = pfvf->flow_cfg;
+ if (flow_cfg && flow_cfg->nr_flows) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot modify count when there are active rules");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int otx2_dl_mcam_count_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct otx2_devlink *otx2_dl = devlink_priv(devlink);
+ struct otx2_nic *pfvf = otx2_dl->pfvf;
+
+ if (!pfvf->flow_cfg)
+ return 0;
+
+ otx2_alloc_mcam_entries(pfvf, ctx->val.vu16);
+ otx2_tc_alloc_ent_bitmap(pfvf);
+
+ return 0;
+}
+
+static int otx2_dl_mcam_count_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct otx2_devlink *otx2_dl = devlink_priv(devlink);
+ struct otx2_nic *pfvf = otx2_dl->pfvf;
+ struct otx2_flow_config *flow_cfg;
+
+ if (!pfvf->flow_cfg) {
+ ctx->val.vu16 = 0;
+ return 0;
+ }
+
+ flow_cfg = pfvf->flow_cfg;
+ ctx->val.vu16 = flow_cfg->max_flows;
+
+ return 0;
+}
+
+enum otx2_dl_param_id {
+ OTX2_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+ OTX2_DEVLINK_PARAM_ID_MCAM_COUNT,
+};
+
+static const struct devlink_param otx2_dl_params[] = {
+ DEVLINK_PARAM_DRIVER(OTX2_DEVLINK_PARAM_ID_MCAM_COUNT,
+ "mcam_count", DEVLINK_PARAM_TYPE_U16,
+ BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ otx2_dl_mcam_count_get, otx2_dl_mcam_count_set,
+ otx2_dl_mcam_count_validate),
+};
+
+/* Devlink OPs */
+static int otx2_devlink_info_get(struct devlink *devlink,
+ struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+ struct otx2_devlink *otx2_dl = devlink_priv(devlink);
+ struct otx2_nic *pfvf = otx2_dl->pfvf;
+
+ if (is_otx2_vf(pfvf->pcifunc))
+ return devlink_info_driver_name_put(req, "rvu_nicvf");
+
+ return devlink_info_driver_name_put(req, "rvu_nicpf");
+}
+
+static const struct devlink_ops otx2_devlink_ops = {
+ .info_get = otx2_devlink_info_get,
+};
+
+int otx2_register_dl(struct otx2_nic *pfvf)
+{
+ struct otx2_devlink *otx2_dl;
+ struct devlink *dl;
+ int err;
+
+ dl = devlink_alloc(&otx2_devlink_ops,
+ sizeof(struct otx2_devlink), pfvf->dev);
+ if (!dl) {
+ dev_warn(pfvf->dev, "devlink_alloc failed\n");
+ return -ENOMEM;
+ }
+
+ err = devlink_register(dl);
+ if (err) {
+ dev_err(pfvf->dev, "devlink register failed with error %d\n", err);
+ devlink_free(dl);
+ return err;
+ }
+
+ otx2_dl = devlink_priv(dl);
+ otx2_dl->dl = dl;
+ otx2_dl->pfvf = pfvf;
+ pfvf->dl = otx2_dl;
+
+ err = devlink_params_register(dl, otx2_dl_params,
+ ARRAY_SIZE(otx2_dl_params));
+ if (err) {
+ dev_err(pfvf->dev,
+ "devlink params register failed with error %d", err);
+ goto err_dl;
+ }
+
+ devlink_params_publish(dl);
+
+ return 0;
+
+err_dl:
+ devlink_unregister(dl);
+ devlink_free(dl);
+ return err;
+}
+
+void otx2_unregister_dl(struct otx2_nic *pfvf)
+{
+ struct otx2_devlink *otx2_dl = pfvf->dl;
+ struct devlink *dl;
+
+ if (!otx2_dl || !otx2_dl->dl)
+ return;
+
+ dl = otx2_dl->dl;
+
+ devlink_params_unregister(dl, otx2_dl_params,
+ ARRAY_SIZE(otx2_dl_params));
+
+ devlink_unregister(dl);
+ devlink_free(dl);
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.h
new file mode 100644
index 000000000000..c7bd4f3c6c6b
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell RVU PF/VF Netdev Devlink
+ *
+ * Copyright (C) 2021 Marvell.
+ *
+ */
+
+#ifndef OTX2_DEVLINK_H
+#define OTX2_DEVLINK_H
+
+struct otx2_devlink {
+ struct devlink *dl;
+ struct otx2_nic *pfvf;
+};
+
+/* Devlink APIs */
+int otx2_register_dl(struct otx2_nic *pfvf);
+void otx2_unregister_dl(struct otx2_nic *pfvf);
+
+#endif /* RVU_DEVLINK_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dmac_flt.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dmac_flt.c
index 383a6b5cb698..2ec800f741d8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dmac_flt.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dmac_flt.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Physcial Function ethernet driver
+/* Marvell RVU Ethernet driver
*
* Copyright (C) 2021 Marvell.
+ *
*/
#include "otx2_common.h"
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index b906a0eb6e0d..799486c72177 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -1,11 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Ethernet driver
+/* Marvell RVU Ethernet driver
*
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/pci.h>
@@ -33,9 +30,6 @@ struct otx2_stat {
.index = offsetof(struct otx2_dev_stats, stat) / sizeof(u64), \
}
-/* Physical link config */
-#define OTX2_ETHTOOL_SUPPORTED_MODES 0x638CCBF //110001110001100110010111111
-
enum link_mode {
OTX2_MODE_SUPPORTED,
OTX2_MODE_ADVERTISED
@@ -415,7 +409,9 @@ static int otx2_set_ringparam(struct net_device *netdev,
}
static int otx2_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *cmd)
+ struct ethtool_coalesce *cmd,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct otx2_nic *pfvf = netdev_priv(netdev);
struct otx2_hw *hw = &pfvf->hw;
@@ -429,7 +425,9 @@ static int otx2_get_coalesce(struct net_device *netdev,
}
static int otx2_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct otx2_nic *pfvf = netdev_priv(netdev);
struct otx2_hw *hw = &pfvf->hw;
@@ -645,6 +643,7 @@ static int otx2_set_rss_hash_opts(struct otx2_nic *pfvf,
static int otx2_get_rxnfc(struct net_device *dev,
struct ethtool_rxnfc *nfc, u32 *rules)
{
+ bool ntuple = !!(dev->features & NETIF_F_NTUPLE);
struct otx2_nic *pfvf = netdev_priv(dev);
int ret = -EOPNOTSUPP;
@@ -654,14 +653,18 @@ static int otx2_get_rxnfc(struct net_device *dev,
ret = 0;
break;
case ETHTOOL_GRXCLSRLCNT:
- nfc->rule_cnt = pfvf->flow_cfg->nr_flows;
- ret = 0;
+ if (netif_running(dev) && ntuple) {
+ nfc->rule_cnt = pfvf->flow_cfg->nr_flows;
+ ret = 0;
+ }
break;
case ETHTOOL_GRXCLSRULE:
- ret = otx2_get_flow(pfvf, nfc, nfc->fs.location);
+ if (netif_running(dev) && ntuple)
+ ret = otx2_get_flow(pfvf, nfc, nfc->fs.location);
break;
case ETHTOOL_GRXCLSRLALL:
- ret = otx2_get_all_flows(pfvf, nfc, rules);
+ if (netif_running(dev) && ntuple)
+ ret = otx2_get_all_flows(pfvf, nfc, rules);
break;
case ETHTOOL_GRXFH:
return otx2_get_rss_hash_opts(pfvf, nfc);
@@ -696,41 +699,6 @@ static int otx2_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc)
return ret;
}
-static int otx2vf_get_rxnfc(struct net_device *dev,
- struct ethtool_rxnfc *nfc, u32 *rules)
-{
- struct otx2_nic *pfvf = netdev_priv(dev);
- int ret = -EOPNOTSUPP;
-
- switch (nfc->cmd) {
- case ETHTOOL_GRXRINGS:
- nfc->data = pfvf->hw.rx_queues;
- ret = 0;
- break;
- case ETHTOOL_GRXFH:
- return otx2_get_rss_hash_opts(pfvf, nfc);
- default:
- break;
- }
- return ret;
-}
-
-static int otx2vf_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc)
-{
- struct otx2_nic *pfvf = netdev_priv(dev);
- int ret = -EOPNOTSUPP;
-
- switch (nfc->cmd) {
- case ETHTOOL_SRXFH:
- ret = otx2_set_rss_hash_opts(pfvf, nfc);
- break;
- default:
- break;
- }
-
- return ret;
-}
-
static u32 otx2_get_rxfh_key_size(struct net_device *netdev)
{
struct otx2_nic *pfvf = netdev_priv(netdev);
@@ -1116,8 +1084,6 @@ static void otx2_get_link_mode_info(u64 link_mode_bmap,
};
u8 bit;
- link_mode_bmap = link_mode_bmap & OTX2_ETHTOOL_SUPPORTED_MODES;
-
for_each_set_bit(bit, (unsigned long *)&link_mode_bmap, 27) {
/* SGMII mode is set */
if (bit == 0)
@@ -1357,8 +1323,8 @@ static const struct ethtool_ops otx2vf_ethtool_ops = {
.get_sset_count = otx2vf_get_sset_count,
.set_channels = otx2_set_channels,
.get_channels = otx2_get_channels,
- .get_rxnfc = otx2vf_get_rxnfc,
- .set_rxnfc = otx2vf_set_rxnfc,
+ .get_rxnfc = otx2_get_rxnfc,
+ .set_rxnfc = otx2_set_rxnfc,
.get_rxfh_key_size = otx2_get_rxfh_key_size,
.get_rxfh_indir_size = otx2_get_rxfh_indir_size,
.get_rxfh = otx2_get_rxfh,
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
index 4d9de525802d..77a13fb555fb 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
@@ -1,15 +1,19 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Physical Function ethernet driver
+/* Marvell RVU Ethernet driver
*
* Copyright (C) 2020 Marvell.
+ *
*/
#include <net/ipv6.h>
+#include <linux/sort.h>
#include "otx2_common.h"
#define OTX2_DEFAULT_ACTION 0x1
+static int otx2_mcam_entry_init(struct otx2_nic *pfvf);
+
struct otx2_flow {
struct ethtool_rx_flow_spec flow_spec;
struct list_head list;
@@ -30,8 +34,7 @@ static void otx2_clear_ntuple_flow_info(struct otx2_nic *pfvf, struct otx2_flow_
{
devm_kfree(pfvf->dev, flow_cfg->flow_ent);
flow_cfg->flow_ent = NULL;
- flow_cfg->ntuple_max_flows = 0;
- flow_cfg->tc_max_flows = 0;
+ flow_cfg->max_flows = 0;
}
static int otx2_free_ntuple_mcam_entries(struct otx2_nic *pfvf)
@@ -40,11 +43,11 @@ static int otx2_free_ntuple_mcam_entries(struct otx2_nic *pfvf)
struct npc_mcam_free_entry_req *req;
int ent, err;
- if (!flow_cfg->ntuple_max_flows)
+ if (!flow_cfg->max_flows)
return 0;
mutex_lock(&pfvf->mbox.lock);
- for (ent = 0; ent < flow_cfg->ntuple_max_flows; ent++) {
+ for (ent = 0; ent < flow_cfg->max_flows; ent++) {
req = otx2_mbox_alloc_msg_npc_mcam_free_entry(&pfvf->mbox);
if (!req)
break;
@@ -61,7 +64,12 @@ static int otx2_free_ntuple_mcam_entries(struct otx2_nic *pfvf)
return 0;
}
-static int otx2_alloc_ntuple_mcam_entries(struct otx2_nic *pfvf, u16 count)
+static int mcam_entry_cmp(const void *a, const void *b)
+{
+ return *(u16 *)a - *(u16 *)b;
+}
+
+int otx2_alloc_mcam_entries(struct otx2_nic *pfvf, u16 count)
{
struct otx2_flow_config *flow_cfg = pfvf->flow_cfg;
struct npc_mcam_alloc_entry_req *req;
@@ -76,8 +84,12 @@ static int otx2_alloc_ntuple_mcam_entries(struct otx2_nic *pfvf, u16 count)
flow_cfg->flow_ent = devm_kmalloc_array(pfvf->dev, count,
sizeof(u16), GFP_KERNEL);
- if (!flow_cfg->flow_ent)
+ if (!flow_cfg->flow_ent) {
+ netdev_err(pfvf->netdev,
+ "%s: Unable to allocate memory for flow entries\n",
+ __func__);
return -ENOMEM;
+ }
mutex_lock(&pfvf->mbox.lock);
@@ -92,8 +104,14 @@ static int otx2_alloc_ntuple_mcam_entries(struct otx2_nic *pfvf, u16 count)
req->contig = false;
req->count = (count - allocated) > NPC_MAX_NONCONTIG_ENTRIES ?
NPC_MAX_NONCONTIG_ENTRIES : count - allocated;
- req->priority = NPC_MCAM_HIGHER_PRIO;
- req->ref_entry = flow_cfg->def_ent[0];
+
+ /* Allocate higher priority entries for PFs, so that VF's entries
+ * will be on top of PF.
+ */
+ if (!is_otx2_vf(pfvf->pcifunc)) {
+ req->priority = NPC_MCAM_HIGHER_PRIO;
+ req->ref_entry = flow_cfg->def_ent[0];
+ }
/* Send message to AF */
if (otx2_sync_mbox_msg(&pfvf->mbox))
@@ -114,22 +132,34 @@ static int otx2_alloc_ntuple_mcam_entries(struct otx2_nic *pfvf, u16 count)
break;
}
+ /* Multiple MCAM entry alloc requests could result in non-sequential
+ * MCAM entries in the flow_ent[] array. Sort them in an ascending order,
+ * otherwise user installed ntuple filter index and MCAM entry index will
+ * not be in sync.
+ */
+ if (allocated)
+ sort(&flow_cfg->flow_ent[0], allocated,
+ sizeof(flow_cfg->flow_ent[0]), mcam_entry_cmp, NULL);
+
exit:
mutex_unlock(&pfvf->mbox.lock);
- flow_cfg->ntuple_offset = 0;
- flow_cfg->ntuple_max_flows = allocated;
- flow_cfg->tc_max_flows = allocated;
+ flow_cfg->max_flows = allocated;
+
+ if (allocated) {
+ pfvf->flags |= OTX2_FLAG_MCAM_ENTRIES_ALLOC;
+ pfvf->flags |= OTX2_FLAG_NTUPLE_SUPPORT;
+ }
if (allocated != count)
netdev_info(pfvf->netdev,
- "Unable to allocate %d MCAM entries for ntuple, got %d\n",
+ "Unable to allocate %d MCAM entries, got only %d\n",
count, allocated);
-
return allocated;
}
+EXPORT_SYMBOL(otx2_alloc_mcam_entries);
-int otx2_alloc_mcam_entries(struct otx2_nic *pfvf)
+static int otx2_mcam_entry_init(struct otx2_nic *pfvf)
{
struct otx2_flow_config *flow_cfg = pfvf->flow_cfg;
struct npc_mcam_alloc_entry_req *req;
@@ -189,18 +219,35 @@ int otx2_alloc_mcam_entries(struct otx2_nic *pfvf)
mutex_unlock(&pfvf->mbox.lock);
/* Allocate entries for Ntuple filters */
- count = otx2_alloc_ntuple_mcam_entries(pfvf, OTX2_DEFAULT_FLOWCOUNT);
+ count = otx2_alloc_mcam_entries(pfvf, OTX2_DEFAULT_FLOWCOUNT);
if (count <= 0) {
otx2_clear_ntuple_flow_info(pfvf, flow_cfg);
return 0;
}
- pfvf->flags |= OTX2_FLAG_NTUPLE_SUPPORT;
pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT;
return 0;
}
+int otx2vf_mcam_flow_init(struct otx2_nic *pfvf)
+{
+ struct otx2_flow_config *flow_cfg;
+
+ pfvf->flow_cfg = devm_kzalloc(pfvf->dev,
+ sizeof(struct otx2_flow_config),
+ GFP_KERNEL);
+ if (!pfvf->flow_cfg)
+ return -ENOMEM;
+
+ flow_cfg = pfvf->flow_cfg;
+ INIT_LIST_HEAD(&flow_cfg->flow_list);
+ flow_cfg->max_flows = 0;
+
+ return 0;
+}
+EXPORT_SYMBOL(otx2vf_mcam_flow_init);
+
int otx2_mcam_flow_init(struct otx2_nic *pf)
{
int err;
@@ -212,7 +259,10 @@ int otx2_mcam_flow_init(struct otx2_nic *pf)
INIT_LIST_HEAD(&pf->flow_cfg->flow_list);
- err = otx2_alloc_mcam_entries(pf);
+ /* Allocate bare minimum number of MCAM entries needed for
+ * unicast and ntuple filters.
+ */
+ err = otx2_mcam_entry_init(pf);
if (err)
return err;
@@ -248,6 +298,7 @@ void otx2_mcam_flow_del(struct otx2_nic *pf)
{
otx2_destroy_mcam_flows(pf);
}
+EXPORT_SYMBOL(otx2_mcam_flow_del);
/* On success adds mcam entry
* On failure enable promisous mode
@@ -379,15 +430,19 @@ static void otx2_add_flow_to_list(struct otx2_nic *pfvf, struct otx2_flow *flow)
list_add(&flow->list, head);
}
-static int otx2_get_maxflows(struct otx2_flow_config *flow_cfg)
+int otx2_get_maxflows(struct otx2_flow_config *flow_cfg)
{
- if (flow_cfg->nr_flows == flow_cfg->ntuple_max_flows ||
+ if (!flow_cfg)
+ return 0;
+
+ if (flow_cfg->nr_flows == flow_cfg->max_flows ||
bitmap_weight(&flow_cfg->dmacflt_bmap,
flow_cfg->dmacflt_max_flows))
- return flow_cfg->ntuple_max_flows + flow_cfg->dmacflt_max_flows;
+ return flow_cfg->max_flows + flow_cfg->dmacflt_max_flows;
else
- return flow_cfg->ntuple_max_flows;
+ return flow_cfg->max_flows;
}
+EXPORT_SYMBOL(otx2_get_maxflows);
int otx2_get_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc,
u32 location)
@@ -708,7 +763,7 @@ static int otx2_prepare_ipv6_flow(struct ethtool_rx_flow_spec *fsp,
return 0;
}
-int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp,
+static int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp,
struct npc_install_flow_req *req)
{
struct ethhdr *eth_mask = &fsp->m_u.ether_spec;
@@ -732,7 +787,7 @@ int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp,
ether_addr_copy(pmask->dmac, eth_mask->h_dest);
req->features |= BIT_ULL(NPC_DMAC);
}
- if (eth_mask->h_proto) {
+ if (eth_hdr->h_proto) {
memcpy(&pkt->etype, &eth_hdr->h_proto,
sizeof(pkt->etype));
memcpy(&pmask->etype, &eth_mask->h_proto,
@@ -764,14 +819,31 @@ int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp,
return -EOPNOTSUPP;
}
if (fsp->flow_type & FLOW_EXT) {
- if (fsp->m_ext.vlan_etype)
- return -EINVAL;
- if (fsp->m_ext.vlan_tci) {
- if (fsp->m_ext.vlan_tci != cpu_to_be16(VLAN_VID_MASK))
+ u16 vlan_etype;
+
+ if (fsp->m_ext.vlan_etype) {
+ /* Partial masks not supported */
+ if (be16_to_cpu(fsp->m_ext.vlan_etype) != 0xFFFF)
return -EINVAL;
- if (be16_to_cpu(fsp->h_ext.vlan_tci) >= VLAN_N_VID)
+
+ vlan_etype = be16_to_cpu(fsp->h_ext.vlan_etype);
+ /* Only ETH_P_8021Q and ETH_P_802AD types supported */
+ if (vlan_etype != ETH_P_8021Q &&
+ vlan_etype != ETH_P_8021AD)
return -EINVAL;
+ memcpy(&pkt->vlan_etype, &fsp->h_ext.vlan_etype,
+ sizeof(pkt->vlan_etype));
+ memcpy(&pmask->vlan_etype, &fsp->m_ext.vlan_etype,
+ sizeof(pmask->vlan_etype));
+
+ if (vlan_etype == ETH_P_8021Q)
+ req->features |= BIT_ULL(NPC_VLAN_ETYPE_CTAG);
+ else
+ req->features |= BIT_ULL(NPC_VLAN_ETYPE_STAG);
+ }
+
+ if (fsp->m_ext.vlan_tci) {
memcpy(&pkt->vlan_tci, &fsp->h_ext.vlan_tci,
sizeof(pkt->vlan_tci));
memcpy(&pmask->vlan_tci, &fsp->m_ext.vlan_tci,
@@ -858,6 +930,7 @@ static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow)
if (flow->flow_spec.flow_type & FLOW_RSS) {
req->op = NIX_RX_ACTIONOP_RSS;
req->index = flow->rss_ctx_id;
+ req->flow_key_alg = pfvf->hw.flowkey_alg_idx;
} else {
req->op = NIX_RX_ACTIONOP_UCAST;
req->index = ethtool_get_flow_spec_ring(ring_cookie);
@@ -894,7 +967,7 @@ static int otx2_add_flow_with_pfmac(struct otx2_nic *pfvf,
pf_mac->entry = 0;
pf_mac->dmac_filter = true;
- pf_mac->location = pfvf->flow_cfg->ntuple_max_flows;
+ pf_mac->location = pfvf->flow_cfg->max_flows;
memcpy(&pf_mac->flow_spec, &flow->flow_spec,
sizeof(struct ethtool_rx_flow_spec));
pf_mac->flow_spec.location = pf_mac->location;
@@ -923,6 +996,12 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
int err = 0;
u32 ring;
+ if (!flow_cfg->max_flows) {
+ netdev_err(pfvf->netdev,
+ "Ntuple rule count is 0, allocate and retry\n");
+ return -EINVAL;
+ }
+
ring = ethtool_get_flow_spec_ring(fsp->ring_cookie);
if (!(pfvf->flags & OTX2_FLAG_NTUPLE_SUPPORT))
return -ENOMEM;
@@ -939,6 +1018,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
if (!flow)
return -ENOMEM;
flow->location = fsp->location;
+ flow->entry = flow_cfg->flow_ent[flow->location];
new = true;
}
/* struct copy */
@@ -975,7 +1055,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
flow->dmac_filter = true;
flow->entry = find_first_zero_bit(&flow_cfg->dmacflt_bmap,
flow_cfg->dmacflt_max_flows);
- fsp->location = flow_cfg->ntuple_max_flows + flow->entry;
+ fsp->location = flow_cfg->max_flows + flow->entry;
flow->flow_spec.location = fsp->location;
flow->location = fsp->location;
@@ -983,19 +1063,20 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
otx2_dmacflt_add(pfvf, eth_hdr->h_dest, flow->entry);
} else {
- if (flow->location >= pfvf->flow_cfg->ntuple_max_flows) {
+ if (flow->location >= pfvf->flow_cfg->max_flows) {
netdev_warn(pfvf->netdev,
"Can't insert non dmac ntuple rule at %d, allowed range %d-0\n",
flow->location,
- flow_cfg->ntuple_max_flows - 1);
+ flow_cfg->max_flows - 1);
err = -EINVAL;
} else {
- flow->entry = flow_cfg->flow_ent[flow->location];
err = otx2_add_flow_msg(pfvf, flow);
}
}
if (err) {
+ if (err == MBOX_MSG_INVALID)
+ err = -EINVAL;
if (new)
kfree(flow);
return err;
@@ -1132,6 +1213,9 @@ int otx2_destroy_ntuple_flows(struct otx2_nic *pfvf)
if (!(pfvf->flags & OTX2_FLAG_NTUPLE_SUPPORT))
return 0;
+ if (!flow_cfg->max_flows)
+ return 0;
+
mutex_lock(&pfvf->mbox.lock);
req = otx2_mbox_alloc_msg_npc_delete_flow(&pfvf->mbox);
if (!req) {
@@ -1140,7 +1224,7 @@ int otx2_destroy_ntuple_flows(struct otx2_nic *pfvf)
}
req->start = flow_cfg->flow_ent[0];
- req->end = flow_cfg->flow_ent[flow_cfg->ntuple_max_flows - 1];
+ req->end = flow_cfg->flow_ent[flow_cfg->max_flows - 1];
err = otx2_sync_mbox_msg(&pfvf->mbox);
mutex_unlock(&pfvf->mbox.lock);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 2c24944a4dba..2f2e8a3d7924 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1,11 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Physical Function ethernet driver
+/* Marvell RVU Physical Function ethernet driver
*
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/module.h>
@@ -1787,17 +1784,10 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev)
static netdev_features_t otx2_fix_features(struct net_device *dev,
netdev_features_t features)
{
- /* check if n-tuple filters are ON */
- if ((features & NETIF_F_HW_TC) && (dev->features & NETIF_F_NTUPLE)) {
- netdev_info(dev, "Disabling n-tuple filters\n");
- features &= ~NETIF_F_NTUPLE;
- }
-
- /* check if tc hw offload is ON */
- if ((features & NETIF_F_NTUPLE) && (dev->features & NETIF_F_HW_TC)) {
- netdev_info(dev, "Disabling TC hardware offload\n");
- features &= ~NETIF_F_HW_TC;
- }
+ if (features & NETIF_F_HW_VLAN_CTAG_RX)
+ features |= NETIF_F_HW_VLAN_STAG_RX;
+ else
+ features &= ~NETIF_F_HW_VLAN_STAG_RX;
return features;
}
@@ -1854,6 +1844,7 @@ static int otx2_set_features(struct net_device *netdev,
netdev_features_t changed = features ^ netdev->features;
bool ntuple = !!(features & NETIF_F_NTUPLE);
struct otx2_nic *pf = netdev_priv(netdev);
+ bool tc = !!(features & NETIF_F_HW_TC);
if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev))
return otx2_cgx_config_loopback(pf,
@@ -1866,12 +1857,42 @@ static int otx2_set_features(struct net_device *netdev,
if ((changed & NETIF_F_NTUPLE) && !ntuple)
otx2_destroy_ntuple_flows(pf);
- if ((netdev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC) &&
- pf->tc_info.num_entries) {
+ if ((changed & NETIF_F_NTUPLE) && ntuple) {
+ if (!pf->flow_cfg->max_flows) {
+ netdev_err(netdev,
+ "Can't enable NTUPLE, MCAM entries not allocated\n");
+ return -EINVAL;
+ }
+ }
+
+ if ((changed & NETIF_F_HW_TC) && tc) {
+ if (!pf->flow_cfg->max_flows) {
+ netdev_err(netdev,
+ "Can't enable TC, MCAM entries not allocated\n");
+ return -EINVAL;
+ }
+ }
+
+ if ((changed & NETIF_F_HW_TC) && !tc &&
+ pf->flow_cfg && pf->flow_cfg->nr_flows) {
netdev_err(netdev, "Can't disable TC hardware offload while flows are active\n");
return -EBUSY;
}
+ if ((changed & NETIF_F_NTUPLE) && ntuple &&
+ (netdev->features & NETIF_F_HW_TC) && !(changed & NETIF_F_HW_TC)) {
+ netdev_err(netdev,
+ "Can't enable NTUPLE when TC is active, disable TC and retry\n");
+ return -EINVAL;
+ }
+
+ if ((changed & NETIF_F_HW_TC) && tc &&
+ (netdev->features & NETIF_F_NTUPLE) && !(changed & NETIF_F_NTUPLE)) {
+ netdev_err(netdev,
+ "Can't enable TC when NTUPLE is active, disable NTUPLE and retry\n");
+ return -EINVAL;
+ }
+
return 0;
}
@@ -2331,7 +2352,7 @@ static const struct net_device_ops otx2_netdev_ops = {
.ndo_set_features = otx2_set_features,
.ndo_tx_timeout = otx2_tx_timeout,
.ndo_get_stats64 = otx2_get_stats64,
- .ndo_do_ioctl = otx2_ioctl,
+ .ndo_eth_ioctl = otx2_ioctl,
.ndo_set_vf_mac = otx2_set_vf_mac,
.ndo_set_vf_vlan = otx2_set_vf_vlan,
.ndo_get_vf_config = otx2_get_vf_config,
@@ -2569,8 +2590,6 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
NETIF_F_GSO_UDP_L4);
netdev->features |= netdev->hw_features;
- netdev->hw_features |= NETIF_F_LOOPBACK | NETIF_F_RXALL;
-
err = otx2_mcam_flow_init(pf);
if (err)
goto err_ptp_destroy;
@@ -2594,12 +2613,13 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (pf->flags & OTX2_FLAG_TC_FLOWER_SUPPORT)
netdev->hw_features |= NETIF_F_HW_TC;
+ netdev->hw_features |= NETIF_F_LOOPBACK | NETIF_F_RXALL;
+
netdev->gso_max_segs = OTX2_MAX_GSO_SEGS;
netdev->watchdog_timeo = OTX2_TX_TIMEOUT;
netdev->netdev_ops = &otx2_netdev_ops;
- /* MTU range: 64 - 9190 */
netdev->min_mtu = OTX2_MIN_MTU;
netdev->max_mtu = otx2_get_max_mtu(pf);
@@ -2619,6 +2639,10 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (err)
goto err_mcam_flow_del;
+ err = otx2_register_dl(pf);
+ if (err)
+ goto err_mcam_flow_del;
+
/* Initialize SR-IOV resources */
err = otx2_sriov_vfcfg_init(pf);
if (err)
@@ -2776,6 +2800,7 @@ static void otx2_remove(struct pci_dev *pdev)
/* Disable link notifications */
otx2_cgx_config_linkevents(pf, false);
+ otx2_unregister_dl(pf);
unregister_netdev(netdev);
otx2_sriov_disable(pf->pdev);
otx2_sriov_vfcfg_cleanup(pf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
index 56390a664517..ec9e49985c2c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 PTP support for ethernet driver
+/* Marvell RVU Ethernet driver
+ *
+ * Copyright (C) 2020 Marvell.
*
- * Copyright (C) 2020 Marvell International Ltd.
*/
#include "otx2_common.h"
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h
index 706d63a43ae1..6ff284211d7b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h
@@ -1,5 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 PTP support for ethernet driver */
+/* Marvell RVU Ethernet driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
#ifndef OTX2_PTP_H
#define OTX2_PTP_H
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h
index f4fd72ee9a25..1b967eaf948b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h
@@ -1,11 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Ethernet driver
+/* Marvell RVU Ethernet driver
*
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef OTX2_REG_H
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
index 1f49b3caf5d4..4bbd12ff26e6 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
@@ -1,11 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Ethernet driver
+/* Marvell RVU Ethernet driver
*
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef OTX2_STRUCT_H
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index 972b202b9884..626961a41089 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Physcial Function ethernet driver
+/* Marvell RVU Ethernet driver
*
* Copyright (C) 2021 Marvell.
+ *
*/
+
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/inetdevice.h>
@@ -52,6 +54,29 @@ struct otx2_tc_flow {
bool is_act_police;
};
+int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic)
+{
+ struct otx2_tc_info *tc = &nic->tc_info;
+
+ if (!nic->flow_cfg->max_flows || is_otx2_vf(nic->pcifunc))
+ return 0;
+
+ /* Max flows changed, free the existing bitmap */
+ kfree(tc->tc_entries_bitmap);
+
+ tc->tc_entries_bitmap =
+ kcalloc(BITS_TO_LONGS(nic->flow_cfg->max_flows),
+ sizeof(long), GFP_KERNEL);
+ if (!tc->tc_entries_bitmap) {
+ netdev_err(nic->netdev,
+ "Unable to alloc TC flow entries bitmap\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(otx2_tc_alloc_ent_bitmap);
+
static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp,
u32 *burst_mantissa)
{
@@ -485,8 +510,8 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
match.key->vlan_priority << 13;
vlan_tci_mask = match.mask->vlan_id |
- match.key->vlan_dei << 12 |
- match.key->vlan_priority << 13;
+ match.mask->vlan_dei << 12 |
+ match.mask->vlan_priority << 13;
flow_spec->vlan_tci = htons(vlan_tci);
flow_mask->vlan_tci = htons(vlan_tci_mask);
@@ -596,6 +621,7 @@ static int otx2_del_mcam_flow_entry(struct otx2_nic *nic, u16 entry)
static int otx2_tc_del_flow(struct otx2_nic *nic,
struct flow_cls_offload *tc_flow_cmd)
{
+ struct otx2_flow_config *flow_cfg = nic->flow_cfg;
struct otx2_tc_info *tc_info = &nic->tc_info;
struct otx2_tc_flow *flow_node;
int err;
@@ -638,7 +664,7 @@ static int otx2_tc_del_flow(struct otx2_nic *nic,
kfree_rcu(flow_node, rcu);
clear_bit(flow_node->bitpos, tc_info->tc_entries_bitmap);
- tc_info->num_entries--;
+ flow_cfg->nr_flows--;
return 0;
}
@@ -647,6 +673,7 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
struct flow_cls_offload *tc_flow_cmd)
{
struct netlink_ext_ack *extack = tc_flow_cmd->common.extack;
+ struct otx2_flow_config *flow_cfg = nic->flow_cfg;
struct otx2_tc_info *tc_info = &nic->tc_info;
struct otx2_tc_flow *new_node, *old_node;
struct npc_install_flow_req *req, dummy;
@@ -655,9 +682,9 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
if (!(nic->flags & OTX2_FLAG_TC_FLOWER_SUPPORT))
return -ENOMEM;
- if (bitmap_full(tc_info->tc_entries_bitmap, nic->flow_cfg->tc_max_flows)) {
+ if (bitmap_full(tc_info->tc_entries_bitmap, flow_cfg->max_flows)) {
NL_SET_ERR_MSG_MOD(extack,
- "Not enough MCAM space to add the flow");
+ "Free MCAM entry not available to add the flow");
return -ENOMEM;
}
@@ -695,10 +722,9 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
memcpy(req, &dummy, sizeof(struct npc_install_flow_req));
new_node->bitpos = find_first_zero_bit(tc_info->tc_entries_bitmap,
- nic->flow_cfg->tc_max_flows);
+ flow_cfg->max_flows);
req->channel = nic->hw.rx_chan_base;
- req->entry = nic->flow_cfg->flow_ent[nic->flow_cfg->tc_flower_offset +
- nic->flow_cfg->tc_max_flows - new_node->bitpos];
+ req->entry = flow_cfg->flow_ent[flow_cfg->max_flows - new_node->bitpos - 1];
req->intf = NIX_INTF_RX;
req->set_cntr = 1;
new_node->entry = req->entry;
@@ -723,7 +749,7 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
}
set_bit(new_node->bitpos, tc_info->tc_entries_bitmap);
- tc_info->num_entries++;
+ flow_cfg->nr_flows++;
return 0;
@@ -1008,10 +1034,21 @@ static const struct rhashtable_params tc_flow_ht_params = {
int otx2_init_tc(struct otx2_nic *nic)
{
struct otx2_tc_info *tc = &nic->tc_info;
+ int err;
/* Exclude receive queue 0 being used for police action */
set_bit(0, &nic->rq_bmap);
+ if (!nic->flow_cfg) {
+ netdev_err(nic->netdev,
+ "Can't init TC, nic->flow_cfg is not setup\n");
+ return -EINVAL;
+ }
+
+ err = otx2_tc_alloc_ent_bitmap(nic);
+ if (err)
+ return err;
+
tc->flow_ht_params = tc_flow_ht_params;
return rhashtable_init(&tc->flow_table, &tc->flow_ht_params);
}
@@ -1020,5 +1057,6 @@ void otx2_shutdown_tc(struct otx2_nic *nic)
{
struct otx2_tc_info *tc = &nic->tc_info;
+ kfree(tc->tc_entries_bitmap);
rhashtable_destroy(&tc->flow_table);
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index 22ec03a618b1..f42b1d4e0c67 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -1,11 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Ethernet driver
+/* Marvell RVU Ethernet driver
*
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/etherdevice.h>
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
index 2f144e2cf436..869de5f59e73 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
@@ -1,11 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Ethernet driver
+/* Marvell RVU Ethernet driver
*
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef OTX2_TXRX_H
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
index a8bee5aefec1..03b4ec630432 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
@@ -1,5 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Virtual Function ethernet driver */
+/* Marvell RVU Virtual Function ethernet driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
#include <linux/etherdevice.h>
#include <linux/module.h>
@@ -464,6 +468,28 @@ static void otx2vf_reset_task(struct work_struct *work)
rtnl_unlock();
}
+static int otx2vf_set_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ netdev_features_t changed = features ^ netdev->features;
+ bool ntuple_enabled = !!(features & NETIF_F_NTUPLE);
+ struct otx2_nic *vf = netdev_priv(netdev);
+
+ if (changed & NETIF_F_NTUPLE) {
+ if (!ntuple_enabled) {
+ otx2_mcam_flow_del(vf);
+ return 0;
+ }
+
+ if (!otx2_get_maxflows(vf->flow_cfg)) {
+ netdev_err(netdev,
+ "Can't enable NTUPLE, MCAM entries not allocated\n");
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
static const struct net_device_ops otx2vf_netdev_ops = {
.ndo_open = otx2vf_open,
.ndo_stop = otx2vf_stop,
@@ -471,6 +497,7 @@ static const struct net_device_ops otx2vf_netdev_ops = {
.ndo_set_rx_mode = otx2vf_set_rx_mode,
.ndo_set_mac_address = otx2_set_mac_address,
.ndo_change_mtu = otx2vf_change_mtu,
+ .ndo_set_features = otx2vf_set_features,
.ndo_get_stats64 = otx2_get_stats64,
.ndo_tx_timeout = otx2_tx_timeout,
};
@@ -627,12 +654,14 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
NETIF_F_HW_VLAN_STAG_TX;
netdev->features |= netdev->hw_features;
+ netdev->hw_features |= NETIF_F_NTUPLE;
+ netdev->hw_features |= NETIF_F_RXALL;
+
netdev->gso_max_segs = OTX2_MAX_GSO_SEGS;
netdev->watchdog_timeo = OTX2_TX_TIMEOUT;
netdev->netdev_ops = &otx2vf_netdev_ops;
- /* MTU range: 68 - 9190 */
netdev->min_mtu = OTX2_MIN_MTU;
netdev->max_mtu = otx2_get_max_mtu(vf);
@@ -658,6 +687,14 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
otx2vf_set_ethtool_ops(netdev);
+ err = otx2vf_mcam_flow_init(vf);
+ if (err)
+ goto err_unreg_netdev;
+
+ err = otx2_register_dl(vf);
+ if (err)
+ goto err_unreg_netdev;
+
/* Enable pause frames by default */
vf->flags |= OTX2_FLAG_RX_PAUSE_ENABLED;
vf->flags |= OTX2_FLAG_TX_PAUSE_ENABLED;
@@ -695,6 +732,7 @@ static void otx2vf_remove(struct pci_dev *pdev)
vf = netdev_priv(netdev);
cancel_work_sync(&vf->reset_task);
+ otx2_unregister_dl(vf);
unregister_netdev(netdev);
if (vf->otx2_wq)
destroy_workqueue(vf->otx2_wq);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
index fa7a0682ad1e..68b442eb6d69 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
@@ -390,11 +390,12 @@ static const struct devlink_ops prestera_dl_ops = {
.trap_drop_counter_get = prestera_drop_counter_get,
};
-struct prestera_switch *prestera_devlink_alloc(void)
+struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev)
{
struct devlink *dl;
- dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch));
+ dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch),
+ dev->dev);
return devlink_priv(dl);
}
@@ -411,7 +412,7 @@ int prestera_devlink_register(struct prestera_switch *sw)
struct devlink *dl = priv_to_devlink(sw);
int err;
- err = devlink_register(dl, sw->dev->dev);
+ err = devlink_register(dl);
if (err) {
dev_err(prestera_dev(sw), "devlink_register failed: %d\n", err);
return err;
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.h b/drivers/net/ethernet/marvell/prestera/prestera_devlink.h
index 5d73aa9db897..cc34c3db13a2 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.h
@@ -6,7 +6,7 @@
#include "prestera.h"
-struct prestera_switch *prestera_devlink_alloc(void);
+struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev);
void prestera_devlink_free(struct prestera_switch *sw);
int prestera_devlink_register(struct prestera_switch *sw);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index 226f4ff29f6e..44c670807fb3 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -746,7 +746,8 @@ static int prestera_netdev_port_event(struct net_device *lower,
case NETDEV_CHANGEUPPER:
if (netif_is_bridge_master(upper)) {
if (info->linking)
- return prestera_bridge_port_join(upper, port);
+ return prestera_bridge_port_join(upper, port,
+ extack);
else
prestera_bridge_port_leave(upper, port);
} else if (netif_is_lag_master(upper)) {
@@ -904,7 +905,7 @@ int prestera_device_register(struct prestera_device *dev)
struct prestera_switch *sw;
int err;
- sw = prestera_devlink_alloc();
+ sw = prestera_devlink_alloc(dev);
if (!sw)
return -ENOMEM;
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
index 0b3e8f2db294..3ce6ccd0f539 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -480,7 +480,8 @@ err_port_flood_set:
}
int prestera_bridge_port_join(struct net_device *br_dev,
- struct prestera_port *port)
+ struct prestera_port *port,
+ struct netlink_ext_ack *extack)
{
struct prestera_switchdev *swdev = port->sw->swdev;
struct prestera_bridge_port *br_port;
@@ -500,6 +501,11 @@ int prestera_bridge_port_join(struct net_device *br_dev,
goto err_brport_create;
}
+ err = switchdev_bridge_port_offload(br_port->dev, port->dev, NULL,
+ NULL, NULL, false, extack);
+ if (err)
+ goto err_switchdev_offload;
+
if (bridge->vlan_enabled)
return 0;
@@ -510,6 +516,8 @@ int prestera_bridge_port_join(struct net_device *br_dev,
return 0;
err_port_join:
+ switchdev_bridge_port_unoffload(br_port->dev, NULL, NULL, NULL);
+err_switchdev_offload:
prestera_bridge_port_put(br_port);
err_brport_create:
prestera_bridge_put(bridge);
@@ -584,6 +592,8 @@ void prestera_bridge_port_leave(struct net_device *br_dev,
else
prestera_bridge_1d_port_leave(br_port);
+ switchdev_bridge_port_unoffload(br_port->dev, NULL, NULL, NULL);
+
prestera_hw_port_learning_set(port, false);
prestera_hw_port_flood_set(port, BR_FLOOD | BR_MCAST_FLOOD, 0);
prestera_port_vid_stp_set(port, PRESTERA_VID_ALL, BR_STATE_FORWARDING);
@@ -748,7 +758,7 @@ static void
prestera_fdb_offload_notify(struct prestera_port *port,
struct switchdev_notifier_fdb_info *info)
{
- struct switchdev_notifier_fdb_info send_info;
+ struct switchdev_notifier_fdb_info send_info = {};
send_info.addr = info->addr;
send_info.vid = info->vid;
@@ -1123,7 +1133,7 @@ static int prestera_switchdev_blk_event(struct notifier_block *unused,
static void prestera_fdb_event(struct prestera_switch *sw,
struct prestera_event *evt, void *arg)
{
- struct switchdev_notifier_fdb_info info;
+ struct switchdev_notifier_fdb_info info = {};
struct net_device *dev = NULL;
struct prestera_port *port;
struct prestera_lag *lag;
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h
index a91bc35d235f..0e93fda3d9a5 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h
@@ -8,7 +8,8 @@ int prestera_switchdev_init(struct prestera_switch *sw);
void prestera_switchdev_fini(struct prestera_switch *sw);
int prestera_bridge_port_join(struct net_device *br_dev,
- struct prestera_port *port);
+ struct prestera_port *port,
+ struct netlink_ext_ack *extack);
void prestera_bridge_port_leave(struct net_device *br_dev,
struct prestera_port *port);
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index 9b48ae4bac39..fab53c9b8380 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1377,7 +1377,7 @@ static const struct net_device_ops pxa168_eth_netdev_ops = {
.ndo_set_rx_mode = pxa168_eth_set_rx_mode,
.ndo_set_mac_address = pxa168_eth_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = phy_do_ioctl,
+ .ndo_eth_ioctl = phy_do_ioctl,
.ndo_change_mtu = pxa168_eth_change_mtu,
.ndo_tx_timeout = pxa168_eth_tx_timeout,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index d4bb27ba1419..051dd3fb5b03 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -615,7 +615,9 @@ static inline u32 skge_usecs2clk(const struct skge_hw *hw, u32 usec)
}
static int skge_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ecmd)
+ struct ethtool_coalesce *ecmd,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct skge_port *skge = netdev_priv(dev);
struct skge_hw *hw = skge->hw;
@@ -639,7 +641,9 @@ static int skge_get_coalesce(struct net_device *dev,
/* Note: interrupt timer is per board, but can turn on/off per port */
static int skge_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ecmd)
+ struct ethtool_coalesce *ecmd,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct skge_port *skge = netdev_priv(dev);
struct skge_hw *hw = skge->hw;
@@ -3787,7 +3791,7 @@ static const struct net_device_ops skge_netdev_ops = {
.ndo_open = skge_up,
.ndo_stop = skge_down,
.ndo_start_xmit = skge_xmit_frame,
- .ndo_do_ioctl = skge_ioctl,
+ .ndo_eth_ioctl = skge_ioctl,
.ndo_get_stats = skge_get_stats,
.ndo_tx_timeout = skge_tx_timeout,
.ndo_change_mtu = skge_change_mtu,
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 8b8bff59c8fe..e9fc74e54b22 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -4052,7 +4052,9 @@ static int sky2_set_pauseparam(struct net_device *dev,
}
static int sky2_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ecmd)
+ struct ethtool_coalesce *ecmd,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct sky2_port *sky2 = netdev_priv(dev);
struct sky2_hw *hw = sky2->hw;
@@ -4087,7 +4089,9 @@ static int sky2_get_coalesce(struct net_device *dev,
/* Note: this affect both ports */
static int sky2_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ecmd)
+ struct ethtool_coalesce *ecmd,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct sky2_port *sky2 = netdev_priv(dev);
struct sky2_hw *hw = sky2->hw;
@@ -4693,7 +4697,7 @@ static const struct net_device_ops sky2_netdev_ops[2] = {
.ndo_open = sky2_open,
.ndo_stop = sky2_close,
.ndo_start_xmit = sky2_xmit_frame,
- .ndo_do_ioctl = sky2_ioctl,
+ .ndo_eth_ioctl = sky2_ioctl,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = sky2_set_mac_address,
.ndo_set_rx_mode = sky2_set_multicast,
@@ -4710,7 +4714,7 @@ static const struct net_device_ops sky2_netdev_ops[2] = {
.ndo_open = sky2_open,
.ndo_stop = sky2_close,
.ndo_start_xmit = sky2_xmit_frame,
- .ndo_do_ioctl = sky2_ioctl,
+ .ndo_eth_ioctl = sky2_ioctl,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = sky2_set_mac_address,
.ndo_set_rx_mode = sky2_set_multicast,
@@ -4884,7 +4888,7 @@ static int sky2_test_msi(struct sky2_hw *hw)
/* This driver supports yukon2 chipset only */
static const char *sky2_name(u8 chipid, char *buf, int sz)
{
- const char *name[] = {
+ static const char *const name[] = {
"XL", /* 0xb3 */
"EC Ultra", /* 0xb4 */
"Extreme", /* 0xb5 */
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 64adfd24e134..398c23cec815 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2933,7 +2933,7 @@ static const struct net_device_ops mtk_netdev_ops = {
.ndo_start_xmit = mtk_start_xmit,
.ndo_set_mac_address = mtk_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = mtk_do_ioctl,
+ .ndo_eth_ioctl = mtk_do_ioctl,
.ndo_change_mtu = mtk_change_mtu,
.ndo_tx_timeout = mtk_tx_timeout,
.ndo_get_stats64 = mtk_get_stats64,
diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index 96d2891f1675..1d5dd2015453 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -1162,7 +1162,7 @@ static const struct net_device_ops mtk_star_netdev_ops = {
.ndo_start_xmit = mtk_star_netdev_start_xmit,
.ndo_get_stats64 = mtk_star_netdev_get_stats64,
.ndo_set_rx_mode = mtk_star_set_rx_mode,
- .ndo_do_ioctl = mtk_star_netdev_ioctl,
+ .ndo_eth_ioctl = mtk_star_netdev_ioctl,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
};
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig
index 400e611ba041..1b4b1f642317 100644
--- a/drivers/net/ethernet/mellanox/mlx4/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig
@@ -6,8 +6,8 @@
config MLX4_EN
tristate "Mellanox Technologies 1/10/40Gbit Ethernet support"
depends on PCI && NETDEVICES && ETHERNET && INET
+ depends on PTP_1588_CLOCK_OPTIONAL
select MLX4_CORE
- imply PTP_1588_CLOCK
help
This driver supports Mellanox Technologies ConnectX Ethernet
devices.
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 3616b77caa0a..ef518b1040f7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -998,7 +998,9 @@ mlx4_en_set_link_ksettings(struct net_device *dev,
}
static int mlx4_en_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -1020,7 +1022,9 @@ static int mlx4_en_get_coalesce(struct net_device *dev,
}
static int mlx4_en_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 5d0c9c62382d..a2f61a87cef8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2828,7 +2828,7 @@ static const struct net_device_ops mlx4_netdev_ops = {
.ndo_set_mac_address = mlx4_en_set_mac,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = mlx4_en_change_mtu,
- .ndo_do_ioctl = mlx4_en_ioctl,
+ .ndo_eth_ioctl = mlx4_en_ioctl,
.ndo_tx_timeout = mlx4_en_tx_timeout,
.ndo_vlan_rx_add_vid = mlx4_en_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = mlx4_en_vlan_rx_kill_vid,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 442991d91c15..7f6d3b82c29b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -991,7 +991,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
* expense of more costly truesize accounting
*/
priv->frag_info[0].frag_stride = PAGE_SIZE;
- priv->dma_dir = PCI_DMA_BIDIRECTIONAL;
+ priv->dma_dir = DMA_BIDIRECTIONAL;
priv->rx_headroom = XDP_PACKET_HEADROOM;
i = 1;
} else {
@@ -1021,7 +1021,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
buf_size += frag_size;
i++;
}
- priv->dma_dir = PCI_DMA_FROMDEVICE;
+ priv->dma_dir = DMA_FROM_DEVICE;
priv->rx_headroom = 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 31b74bddb7cd..c56b9dba4c71 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -297,12 +297,12 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
dma_unmap_single(priv->ddev,
tx_info->map0_dma,
tx_info->map0_byte_count,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
else
dma_unmap_page(priv->ddev,
tx_info->map0_dma,
tx_info->map0_byte_count,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
/* Optimize the common case when there are no wraparounds */
if (likely((void *)tx_desc +
(tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) {
@@ -311,7 +311,7 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
dma_unmap_page(priv->ddev,
(dma_addr_t)be64_to_cpu(data->addr),
be32_to_cpu(data->byte_count),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
}
} else {
if ((void *)data >= end)
@@ -325,7 +325,7 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
dma_unmap_page(priv->ddev,
(dma_addr_t)be64_to_cpu(data->addr),
be32_to_cpu(data->byte_count),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
}
}
}
@@ -831,7 +831,7 @@ static bool mlx4_en_build_dma_wqe(struct mlx4_en_priv *priv,
dma = dma_map_single(ddev, skb->data +
lso_header_size, byte_count,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
if (dma_mapping_error(ddev, dma))
goto tx_drop_unmap;
@@ -853,7 +853,7 @@ tx_drop_unmap:
++data;
dma_unmap_page(ddev, (dma_addr_t)be64_to_cpu(data->addr),
be32_to_cpu(data->byte_count),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
}
return false;
@@ -1170,7 +1170,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
- length, PCI_DMA_TODEVICE);
+ length, DMA_TO_DEVICE);
data->addr = cpu_to_be64(dma + frame->page_offset);
dma_wmb();
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 28ac4693da3c..5a6b0fcaf7f8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3806,24 +3806,15 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
pci_set_master(pdev);
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (err) {
dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
goto err_release_regions;
}
}
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
- if (err) {
- dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- if (err) {
- dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
- goto err_release_regions;
- }
- }
/* Allow large DMA segments, up to the firmware limit of 1 GB */
dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
@@ -4005,7 +3996,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
printk_once(KERN_INFO "%s", mlx4_version);
- devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv));
+ devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv), &pdev->dev);
if (!devlink)
return -ENOMEM;
priv = devlink_priv(devlink);
@@ -4024,7 +4015,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
mutex_init(&dev->persist->interface_state_mutex);
mutex_init(&dev->persist->pci_status_mutex);
- ret = devlink_register(devlink, &pdev->dev);
+ ret = devlink_register(devlink);
if (ret)
goto err_persist_free;
ret = devlink_params_register(devlink, mlx4_devlink_params,
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 427e7a31862c..b149e601f673 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -739,7 +739,7 @@ static void mlx4_cleanup_qp_zones(struct mlx4_dev *dev)
int i;
for (i = 0;
- i < sizeof(qp_table->zones_uids)/sizeof(qp_table->zones_uids[0]);
+ i < ARRAY_SIZE(qp_table->zones_uids);
i++) {
struct mlx4_bitmap *bitmap =
mlx4_zone_get_bitmap(qp_table->zones,
@@ -917,7 +917,7 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
{
int err;
int i;
- enum mlx4_qp_state states[] = {
+ static const enum mlx4_qp_state states[] = {
MLX4_QP_STATE_RST,
MLX4_QP_STATE_INIT,
MLX4_QP_STATE_RTR,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index e1a5a79e27c7..92056452a9e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -10,7 +10,7 @@ config MLX5_CORE
select NET_DEVLINK
depends on VXLAN || !VXLAN
depends on MLXFW || !MLXFW
- depends on PTP_1588_CLOCK || !PTP_1588_CLOCK
+ depends on PTP_1588_CLOCK_OPTIONAL
depends on PCI_HYPERV_INTERFACE || !PCI_HYPERV_INTERFACE
help
Core driver for low level functionality of the ConnectX-4 and
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index b5072a3a2585..63032cd6efb1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -15,14 +15,15 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o alloc.o port.o mr.o pd.o \
transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
fs_counters.o fs_ft_pool.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
- lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
+ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
fw_reset.o qos.o
#
# Netdev basic
#
-mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
+mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \
+ en/channels.o en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
en_selftest.o en/port.o en/monitor_stats.o en/health.o \
en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
@@ -43,19 +44,22 @@ mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \
lib/fs_chains.o en/tc_tun.o \
esw/indir_table.o en/tc_tun_encap.o \
en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
- en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o
+ en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o \
+ en/tc/post_act.o
mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o
+mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o
#
# Core extra
#
mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
- ecpf.o rdma.o esw/legacy.o
+ ecpf.o rdma.o esw/legacy.o \
+ esw/devlink_port.o esw/vporttbl.o esw/qos.o
+
mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \
esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
- esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o \
- esw/devlink_port.o esw/vporttbl.o
-mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += esw/sample.o
+ esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o
+
mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o en/rep/bridge.o
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 9d79c5ec31e9..db5dfff585c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -877,7 +877,7 @@ static void cb_timeout_handler(struct work_struct *work)
ent->ret = -ETIMEDOUT;
mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n",
ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
- mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+ mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true);
out:
cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */
@@ -994,7 +994,7 @@ static void cmd_work_handler(struct work_struct *work)
MLX5_SET(mbox_out, ent->out, status, status);
MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
- mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+ mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true);
return;
}
@@ -1008,7 +1008,7 @@ static void cmd_work_handler(struct work_struct *work)
poll_timeout(ent);
/* make sure we read the descriptor after ownership is SW */
rmb();
- mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT));
+ mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, (ent->ret == -ETIMEDOUT));
}
}
@@ -1068,7 +1068,7 @@ static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev,
mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
ent->ret = -ETIMEDOUT;
- mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+ mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true);
}
static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index df3e4938ecdd..cf97985628ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -89,7 +89,8 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 *in, int inlen, u32 *out, int outlen)
{
- int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
+ int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
+ c_eqn_or_apu_element);
u32 din[MLX5_ST_SZ_DW(destroy_cq_in)] = {};
struct mlx5_eq_comp *eq;
int err;
@@ -134,6 +135,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
cq->cqn);
cq->uar = dev->priv.uar;
+ cq->irqn = eq->core.irqn;
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index def2156e50ee..e8093c4e09d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -53,7 +53,7 @@ static bool is_eth_rep_supported(struct mlx5_core_dev *dev)
return true;
}
-static bool is_eth_supported(struct mlx5_core_dev *dev)
+bool mlx5_eth_supported(struct mlx5_core_dev *dev)
{
if (!IS_ENABLED(CONFIG_MLX5_CORE_EN))
return false;
@@ -105,7 +105,18 @@ static bool is_eth_supported(struct mlx5_core_dev *dev)
return true;
}
-static bool is_vnet_supported(struct mlx5_core_dev *dev)
+static bool is_eth_enabled(struct mlx5_core_dev *dev)
+{
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(priv_to_devlink(dev),
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+ &val);
+ return err ? false : val.vbool;
+}
+
+bool mlx5_vnet_supported(struct mlx5_core_dev *dev)
{
if (!IS_ENABLED(CONFIG_MLX5_VDPA_NET))
return false;
@@ -127,6 +138,17 @@ static bool is_vnet_supported(struct mlx5_core_dev *dev)
return true;
}
+static bool is_vnet_enabled(struct mlx5_core_dev *dev)
+{
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(priv_to_devlink(dev),
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
+ &val);
+ return err ? false : val.vbool;
+}
+
static bool is_ib_rep_supported(struct mlx5_core_dev *dev)
{
if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
@@ -170,7 +192,7 @@ static bool is_mp_supported(struct mlx5_core_dev *dev)
return true;
}
-static bool is_ib_supported(struct mlx5_core_dev *dev)
+bool mlx5_rdma_supported(struct mlx5_core_dev *dev)
{
if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
return false;
@@ -187,6 +209,17 @@ static bool is_ib_supported(struct mlx5_core_dev *dev)
return true;
}
+static bool is_ib_enabled(struct mlx5_core_dev *dev)
+{
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(priv_to_devlink(dev),
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+ &val);
+ return err ? false : val.vbool;
+}
+
enum {
MLX5_INTERFACE_PROTOCOL_ETH,
MLX5_INTERFACE_PROTOCOL_ETH_REP,
@@ -201,13 +234,17 @@ enum {
static const struct mlx5_adev_device {
const char *suffix;
bool (*is_supported)(struct mlx5_core_dev *dev);
+ bool (*is_enabled)(struct mlx5_core_dev *dev);
} mlx5_adev_devices[] = {
[MLX5_INTERFACE_PROTOCOL_VNET] = { .suffix = "vnet",
- .is_supported = &is_vnet_supported },
+ .is_supported = &mlx5_vnet_supported,
+ .is_enabled = &is_vnet_enabled },
[MLX5_INTERFACE_PROTOCOL_IB] = { .suffix = "rdma",
- .is_supported = &is_ib_supported },
+ .is_supported = &mlx5_rdma_supported,
+ .is_enabled = &is_ib_enabled },
[MLX5_INTERFACE_PROTOCOL_ETH] = { .suffix = "eth",
- .is_supported = &is_eth_supported },
+ .is_supported = &mlx5_eth_supported,
+ .is_enabled = &is_eth_enabled },
[MLX5_INTERFACE_PROTOCOL_ETH_REP] = { .suffix = "eth-rep",
.is_supported = &is_eth_rep_supported },
[MLX5_INTERFACE_PROTOCOL_IB_REP] = { .suffix = "rdma-rep",
@@ -308,6 +345,14 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
if (!priv->adev[i]) {
bool is_supported = false;
+ if (mlx5_adev_devices[i].is_enabled) {
+ bool enabled;
+
+ enabled = mlx5_adev_devices[i].is_enabled(dev);
+ if (!enabled)
+ continue;
+ }
+
if (mlx5_adev_devices[i].is_supported)
is_supported = mlx5_adev_devices[i].is_supported(dev);
@@ -360,6 +405,14 @@ void mlx5_detach_device(struct mlx5_core_dev *dev)
if (!priv->adev[i])
continue;
+ if (mlx5_adev_devices[i].is_enabled) {
+ bool enabled;
+
+ enabled = mlx5_adev_devices[i].is_enabled(dev);
+ if (!enabled)
+ goto skip_suspend;
+ }
+
adev = &priv->adev[i]->adev;
/* Auxiliary driver was unbind manually through sysfs */
if (!adev->dev.driver)
@@ -397,7 +450,7 @@ int mlx5_register_device(struct mlx5_core_dev *dev)
void mlx5_unregister_device(struct mlx5_core_dev *dev)
{
mutex_lock(&mlx5_intf_mutex);
- dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
+ dev->priv.flags = MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
mlx5_rescan_drivers_locked(dev);
mutex_unlock(&mlx5_intf_mutex);
}
@@ -447,12 +500,21 @@ static void delete_drivers(struct mlx5_core_dev *dev)
if (!priv->adev[i])
continue;
+ if (mlx5_adev_devices[i].is_enabled) {
+ bool enabled;
+
+ enabled = mlx5_adev_devices[i].is_enabled(dev);
+ if (!enabled)
+ goto del_adev;
+ }
+
if (mlx5_adev_devices[i].is_supported && !delete_all)
is_supported = mlx5_adev_devices[i].is_supported(dev);
if (is_supported)
continue;
+del_adev:
del_adev(&priv->adev[i]->adev);
priv->adev[i] = NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index d791d351b489..e84287ffc7ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -7,6 +7,7 @@
#include "fw_reset.h"
#include "fs_core.h"
#include "eswitch.h"
+#include "esw/qos.h"
#include "sf/dev/dev.h"
#include "sf/sf.h"
@@ -292,6 +293,13 @@ static const struct devlink_ops mlx5_devlink_ops = {
.eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
.port_function_hw_addr_get = mlx5_devlink_port_function_hw_addr_get,
.port_function_hw_addr_set = mlx5_devlink_port_function_hw_addr_set,
+ .rate_leaf_tx_share_set = mlx5_esw_devlink_rate_leaf_tx_share_set,
+ .rate_leaf_tx_max_set = mlx5_esw_devlink_rate_leaf_tx_max_set,
+ .rate_node_tx_share_set = mlx5_esw_devlink_rate_node_tx_share_set,
+ .rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set,
+ .rate_node_new = mlx5_esw_devlink_rate_node_new,
+ .rate_node_del = mlx5_esw_devlink_rate_node_del,
+ .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set,
#endif
#ifdef CONFIG_MLX5_SF_MANAGER
.port_new = mlx5_devlink_sf_port_new,
@@ -359,9 +367,10 @@ int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id,
return 0;
}
-struct devlink *mlx5_devlink_alloc(void)
+struct devlink *mlx5_devlink_alloc(struct device *dev)
{
- return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev));
+ return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev),
+ dev);
}
void mlx5_devlink_free(struct devlink *devlink)
@@ -595,6 +604,157 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
#endif
}
+static const struct devlink_param enable_eth_param =
+ DEVLINK_PARAM_GENERIC(ENABLE_ETH, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, NULL);
+
+static int mlx5_devlink_eth_param_register(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ union devlink_param_value value;
+ int err;
+
+ if (!mlx5_eth_supported(dev))
+ return 0;
+
+ err = devlink_param_register(devlink, &enable_eth_param);
+ if (err)
+ return err;
+
+ value.vbool = true;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+ value);
+ devlink_param_publish(devlink, &enable_eth_param);
+ return 0;
+}
+
+static void mlx5_devlink_eth_param_unregister(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!mlx5_eth_supported(dev))
+ return;
+
+ devlink_param_unpublish(devlink, &enable_eth_param);
+ devlink_param_unregister(devlink, &enable_eth_param);
+}
+
+static int mlx5_devlink_enable_rdma_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ bool new_state = val.vbool;
+
+ if (new_state && !mlx5_rdma_supported(dev))
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+static const struct devlink_param enable_rdma_param =
+ DEVLINK_PARAM_GENERIC(ENABLE_RDMA, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, mlx5_devlink_enable_rdma_validate);
+
+static int mlx5_devlink_rdma_param_register(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ union devlink_param_value value;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND) || MLX5_ESWITCH_MANAGER(dev))
+ return 0;
+
+ err = devlink_param_register(devlink, &enable_rdma_param);
+ if (err)
+ return err;
+
+ value.vbool = true;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+ value);
+ devlink_param_publish(devlink, &enable_rdma_param);
+ return 0;
+}
+
+static void mlx5_devlink_rdma_param_unregister(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND) || MLX5_ESWITCH_MANAGER(dev))
+ return;
+
+ devlink_param_unpublish(devlink, &enable_rdma_param);
+ devlink_param_unregister(devlink, &enable_rdma_param);
+}
+
+static const struct devlink_param enable_vnet_param =
+ DEVLINK_PARAM_GENERIC(ENABLE_VNET, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, NULL);
+
+static int mlx5_devlink_vnet_param_register(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ union devlink_param_value value;
+ int err;
+
+ if (!mlx5_vnet_supported(dev))
+ return 0;
+
+ err = devlink_param_register(devlink, &enable_vnet_param);
+ if (err)
+ return err;
+
+ value.vbool = true;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
+ value);
+ devlink_param_publish(devlink, &enable_rdma_param);
+ return 0;
+}
+
+static void mlx5_devlink_vnet_param_unregister(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!mlx5_vnet_supported(dev))
+ return;
+
+ devlink_param_unpublish(devlink, &enable_vnet_param);
+ devlink_param_unregister(devlink, &enable_vnet_param);
+}
+
+static int mlx5_devlink_auxdev_params_register(struct devlink *devlink)
+{
+ int err;
+
+ err = mlx5_devlink_eth_param_register(devlink);
+ if (err)
+ return err;
+
+ err = mlx5_devlink_rdma_param_register(devlink);
+ if (err)
+ goto rdma_err;
+
+ err = mlx5_devlink_vnet_param_register(devlink);
+ if (err)
+ goto vnet_err;
+ return 0;
+
+vnet_err:
+ mlx5_devlink_rdma_param_unregister(devlink);
+rdma_err:
+ mlx5_devlink_eth_param_unregister(devlink);
+ return err;
+}
+
+static void mlx5_devlink_auxdev_params_unregister(struct devlink *devlink)
+{
+ mlx5_devlink_vnet_param_unregister(devlink);
+ mlx5_devlink_rdma_param_unregister(devlink);
+ mlx5_devlink_eth_param_unregister(devlink);
+}
+
#define MLX5_TRAP_DROP(_id, _group_id) \
DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \
DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \
@@ -638,11 +798,11 @@ static void mlx5_devlink_traps_unregister(struct devlink *devlink)
ARRAY_SIZE(mlx5_trap_groups_arr));
}
-int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
+int mlx5_devlink_register(struct devlink *devlink)
{
int err;
- err = devlink_register(devlink, dev);
+ err = devlink_register(devlink);
if (err)
return err;
@@ -653,6 +813,10 @@ int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
mlx5_devlink_set_params_init_values(devlink);
devlink_params_publish(devlink);
+ err = mlx5_devlink_auxdev_params_register(devlink);
+ if (err)
+ goto auxdev_reg_err;
+
err = mlx5_devlink_traps_register(devlink);
if (err)
goto traps_reg_err;
@@ -660,6 +824,8 @@ int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
return 0;
traps_reg_err:
+ mlx5_devlink_auxdev_params_unregister(devlink);
+auxdev_reg_err:
devlink_params_unregister(devlink, mlx5_devlink_params,
ARRAY_SIZE(mlx5_devlink_params));
params_reg_err:
@@ -670,6 +836,8 @@ params_reg_err:
void mlx5_devlink_unregister(struct devlink *devlink)
{
mlx5_devlink_traps_unregister(devlink);
+ mlx5_devlink_auxdev_params_unregister(devlink);
+ devlink_params_unpublish(devlink);
devlink_params_unregister(devlink, mlx5_devlink_params,
ARRAY_SIZE(mlx5_devlink_params));
devlink_unregister(devlink);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
index 7318d44b774b..30bf4882779b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
@@ -31,9 +31,9 @@ int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev);
int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id,
enum devlink_trap_action *action);
-struct devlink *mlx5_devlink_alloc(void);
+struct devlink *mlx5_devlink_alloc(struct device *dev);
void mlx5_devlink_free(struct devlink *devlink);
-int mlx5_devlink_register(struct devlink *devlink, struct device *dev);
+int mlx5_devlink_register(struct devlink *devlink);
void mlx5_devlink_unregister(struct devlink *devlink);
#endif /* __MLX5_DEVLINK_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index 01a1d02dcf15..3f8a98093f8c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -1019,12 +1019,19 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER);
mlx5_eq_notifier_register(dev, &tracer->nb);
- mlx5_fw_tracer_start(tracer);
-
+ err = mlx5_fw_tracer_start(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Failed to start tracer %d\n", err);
+ goto err_notifier_unregister;
+ }
return 0;
+err_notifier_unregister:
+ mlx5_eq_notifier_unregister(dev, &tracer->nb);
+ mlx5_core_destroy_mkey(dev, &tracer->buff.mkey);
err_dealloc_pd:
mlx5_core_dealloc_pd(dev, tracer->buff.pdn);
+ cancel_work_sync(&tracer->read_fw_strings_work);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index b1b51bbba054..669a75f3537a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -58,6 +58,7 @@
#include "en/qos.h"
#include "lib/hv_vhca.h"
#include "lib/clock.h"
+#include "en/rx_res.h"
extern const struct net_device_ops mlx5e_netdev_ops;
struct page_pool;
@@ -65,14 +66,13 @@ struct page_pool;
#define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
#define MLX5E_METADATA_ETHER_LEN 8
-#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
-
#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
#define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
#define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu))
#define MLX5E_MAX_NUM_TC 8
+#define MLX5E_MAX_NUM_MQPRIO_CH_TC TC_QOPT_MAX_QUEUE
#define MLX5_RX_HEADROOM NET_SKB_PAD
#define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \
@@ -126,7 +126,6 @@ struct page_pool;
#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2
-#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024)
#define MLX5E_DEFAULT_LRO_TIMEOUT 32
#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4
@@ -139,8 +138,6 @@ struct page_pool;
#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80
#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2
-#define MLX5E_LOG_INDIR_RQT_SIZE 0x8
-#define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE)
#define MLX5E_MIN_NUM_CHANNELS 0x1
#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2)
#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
@@ -252,7 +249,10 @@ struct mlx5e_params {
u8 rq_wq_type;
u8 log_rq_mtu_frames;
u16 num_channels;
- u8 num_tc;
+ struct {
+ u16 mode;
+ u8 num_tc;
+ } mqprio;
bool rx_cqe_compress_def;
bool tunneled_offload_en;
struct dim_cq_moder rx_cq_moderation;
@@ -272,6 +272,12 @@ struct mlx5e_params {
bool ptp_rx;
};
+static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params)
+{
+ return params->mqprio.mode == TC_MQPRIO_MODE_DCB ?
+ params->mqprio.num_tc : 1;
+}
+
enum {
MLX5E_RQ_STATE_ENABLED,
MLX5E_RQ_STATE_RECOVERING,
@@ -745,29 +751,11 @@ enum {
MLX5E_STATE_XDP_ACTIVE,
};
-struct mlx5e_rqt {
- u32 rqtn;
- bool enabled;
-};
-
-struct mlx5e_tir {
- u32 tirn;
- struct mlx5e_rqt rqt;
- struct list_head list;
-};
-
enum {
MLX5E_TC_PRIO = 0,
MLX5E_NIC_PRIO
};
-struct mlx5e_rss_params {
- u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
- u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
- u8 toeplitz_hash_key[40];
- u8 hfunc;
-};
-
struct mlx5e_modify_sq_param {
int curr_state;
int next_state;
@@ -837,13 +825,7 @@ struct mlx5e_priv {
struct mlx5e_channels channels;
u32 tisn[MLX5_MAX_PORTS][MLX5E_MAX_NUM_TC];
- struct mlx5e_rqt indir_rqt;
- struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
- struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
- struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS];
- struct mlx5e_tir xsk_tir[MLX5E_MAX_NUM_CHANNELS];
- struct mlx5e_tir ptp_tir;
- struct mlx5e_rss_params rss_params;
+ struct mlx5e_rx_res *rx_res;
u32 tx_rates[MLX5E_MAX_NUM_SQS];
struct mlx5e_flow_steering fs;
@@ -948,25 +930,6 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
u16 vid);
void mlx5e_timestamp_init(struct mlx5e_priv *priv);
-struct mlx5e_redirect_rqt_param {
- bool is_rss;
- union {
- u32 rqn; /* Direct RQN (Non-RSS) */
- struct {
- u8 hfunc;
- struct mlx5e_channels *channels;
- } rss; /* RSS data */
- };
-};
-
-int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
- struct mlx5e_redirect_rqt_param rrp);
-void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
- const struct mlx5e_tirc_config *ttconfig,
- void *tirc, bool inner);
-void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in);
-struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt);
-
struct mlx5e_xsk_param;
struct mlx5e_rq_param;
@@ -1028,9 +991,6 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx);
-void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
- int num_channels);
-
int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
void mlx5e_activate_rq(struct mlx5e_rq *rq);
void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
@@ -1065,10 +1025,6 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
extern const struct ethtool_ops mlx5e_ethtool_ops;
-int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir,
- u32 *in);
-void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
- struct mlx5e_tir *tir);
int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
@@ -1084,17 +1040,6 @@ void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq);
int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node);
void mlx5e_free_di_list(struct mlx5e_rq *rq);
-int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
-
-int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv);
-
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n);
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n);
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n);
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n);
-void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
-
int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn);
void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
@@ -1106,7 +1051,6 @@ int mlx5e_close(struct net_device *netdev);
int mlx5e_open(struct net_device *netdev);
void mlx5e_queue_update_stats(struct mlx5e_priv *priv);
-int mlx5e_bits_invert(unsigned long a, int size);
int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv);
int mlx5e_set_dev_port_mtu_ctx(struct mlx5e_priv *priv, void *context);
@@ -1183,8 +1127,6 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv);
void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu);
-void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
- u16 num_channels);
void mlx5e_rx_dim_work(struct work_struct *work);
void mlx5e_tx_dim_work(struct work_struct *work);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
new file mode 100644
index 000000000000..e7c14c0de0a7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "channels.h"
+#include "en.h"
+#include "en/ptp.h"
+
+unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs)
+{
+ return chs->num;
+}
+
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+ struct mlx5e_channel *c;
+
+ WARN_ON(ix >= mlx5e_channels_get_num(chs));
+ c = chs->c[ix];
+
+ *rqn = c->rq.rqn;
+}
+
+bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+ struct mlx5e_channel *c;
+
+ WARN_ON(ix >= mlx5e_channels_get_num(chs));
+ c = chs->c[ix];
+
+ if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ return false;
+
+ *rqn = c->xskrq.rqn;
+ return true;
+}
+
+bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn)
+{
+ struct mlx5e_ptp *c = chs->ptp;
+
+ if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state))
+ return false;
+
+ *rqn = c->rq.rqn;
+ return true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
new file mode 100644
index 000000000000..ca00cbc827cb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_CHANNELS_H__
+#define __MLX5_EN_CHANNELS_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_channels;
+
+unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs);
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn);
+
+#endif /* __MLX5_EN_CHANNELS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
index bc33eaada3b9..86e079310ac3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
@@ -55,19 +55,15 @@ void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv)
{
struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
- if (dl_port->registered)
- devlink_port_unregister(dl_port);
+ devlink_port_unregister(dl_port);
}
struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct devlink_port *port;
if (!netif_device_present(dev))
return NULL;
- port = mlx5e_devlink_get_dl_port(priv);
- if (port->registered)
- return port;
- return NULL;
+
+ return mlx5e_devlink_get_dl_port(priv);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 1d5ce07b83f4..41684a6c44e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -5,6 +5,9 @@
#define __MLX5E_FLOW_STEER_H__
#include "mod_hdr.h"
+#include "lib/fs_ttc.h"
+
+struct mlx5e_post_act;
enum {
MLX5E_TC_FT_LEVEL = 0,
@@ -18,6 +21,7 @@ struct mlx5e_tc_table {
struct mutex t_lock;
struct mlx5_flow_table *t;
struct mlx5_fs_chains *chains;
+ struct mlx5e_post_act *post_act;
struct rhashtable ht;
@@ -67,27 +71,7 @@ struct mlx5e_l2_table {
bool promisc_enabled;
};
-enum mlx5e_traffic_types {
- MLX5E_TT_IPV4_TCP,
- MLX5E_TT_IPV6_TCP,
- MLX5E_TT_IPV4_UDP,
- MLX5E_TT_IPV6_UDP,
- MLX5E_TT_IPV4_IPSEC_AH,
- MLX5E_TT_IPV6_IPSEC_AH,
- MLX5E_TT_IPV4_IPSEC_ESP,
- MLX5E_TT_IPV6_IPSEC_ESP,
- MLX5E_TT_IPV4,
- MLX5E_TT_IPV6,
- MLX5E_TT_ANY,
- MLX5E_NUM_TT,
- MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY,
-};
-
-struct mlx5e_tirc_config {
- u8 l3_prot_type;
- u8 l4_prot_type;
- u32 rx_hash_fields;
-};
+#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_TT - 1)
#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
MLX5_HASH_FIELD_SEL_DST_IP)
@@ -99,30 +83,6 @@ struct mlx5e_tirc_config {
MLX5_HASH_FIELD_SEL_DST_IP |\
MLX5_HASH_FIELD_SEL_IPSEC_SPI)
-enum mlx5e_tunnel_types {
- MLX5E_TT_IPV4_GRE,
- MLX5E_TT_IPV6_GRE,
- MLX5E_TT_IPV4_IPIP,
- MLX5E_TT_IPV6_IPIP,
- MLX5E_TT_IPV4_IPV6,
- MLX5E_TT_IPV6_IPV6,
- MLX5E_NUM_TUNNEL_TT,
-};
-
-bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev);
-
-struct mlx5e_ttc_rule {
- struct mlx5_flow_handle *rule;
- struct mlx5_flow_destination default_dest;
-};
-
-/* L3/L4 traffic type classifier */
-struct mlx5e_ttc_table {
- struct mlx5e_flow_table ft;
- struct mlx5e_ttc_rule rules[MLX5E_NUM_TT];
- struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT];
-};
-
/* NIC prio FTS */
enum {
MLX5E_PROMISC_FT_LEVEL,
@@ -144,21 +104,7 @@ enum {
#endif
};
-#define MLX5E_TTC_NUM_GROUPS 3
-#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT)
-#define MLX5E_TTC_GROUP2_SIZE BIT(1)
-#define MLX5E_TTC_GROUP3_SIZE BIT(0)
-#define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\
- MLX5E_TTC_GROUP2_SIZE +\
- MLX5E_TTC_GROUP3_SIZE)
-
-#define MLX5E_INNER_TTC_NUM_GROUPS 3
-#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3)
-#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1)
-#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0)
-#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\
- MLX5E_INNER_TTC_GROUP2_SIZE +\
- MLX5E_INNER_TTC_GROUP3_SIZE)
+struct mlx5e_priv;
#ifdef CONFIG_MLX5_EN_RXNFC
@@ -226,8 +172,8 @@ struct mlx5e_flow_steering {
struct mlx5e_promisc_table promisc;
struct mlx5e_vlan_table *vlan;
struct mlx5e_l2_table l2;
- struct mlx5e_ttc_table ttc;
- struct mlx5e_ttc_table inner_ttc;
+ struct mlx5_ttc_table *ttc;
+ struct mlx5_ttc_table *inner_ttc;
#ifdef CONFIG_MLX5_EN_ARFS
struct mlx5e_arfs_tables *arfs;
#endif
@@ -239,33 +185,13 @@ struct mlx5e_flow_steering {
struct mlx5e_ptp_fs *ptp_fs;
};
-struct ttc_params {
- struct mlx5_flow_table_attr ft_attr;
- u32 any_tt_tirn;
- u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
- struct mlx5e_ttc_table *inner_ttc;
-};
-
-void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params);
-void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params);
-void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params);
-
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
- struct mlx5e_ttc_table *ttc);
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv,
- struct mlx5e_ttc_table *ttc);
+void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
+ struct ttc_params *ttc_params, bool tunnel);
-int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
- struct mlx5e_ttc_table *ttc);
-void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv,
- struct mlx5e_ttc_table *ttc);
+void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv);
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv);
void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
-int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type,
- struct mlx5_flow_destination *new_dest);
-struct mlx5_flow_destination
-mlx5e_ttc_get_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type);
-int mlx5e_ttc_fwd_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type);
void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv);
void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
@@ -273,7 +199,6 @@ void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
-u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt);
int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num);
void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv);
int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int trap_id, int tir_num);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
index 909faa6c89d7..7aa25a5e29d7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
@@ -33,22 +33,22 @@ static char *fs_udp_type2str(enum fs_udp_type i)
}
}
-static enum mlx5e_traffic_types fs_udp2tt(enum fs_udp_type i)
+static enum mlx5_traffic_types fs_udp2tt(enum fs_udp_type i)
{
switch (i) {
case FS_IPV4_UDP:
- return MLX5E_TT_IPV4_UDP;
+ return MLX5_TT_IPV4_UDP;
default: /* FS_IPV6_UDP */
- return MLX5E_TT_IPV6_UDP;
+ return MLX5_TT_IPV6_UDP;
}
}
-static enum fs_udp_type tt2fs_udp(enum mlx5e_traffic_types i)
+static enum fs_udp_type tt2fs_udp(enum mlx5_traffic_types i)
{
switch (i) {
- case MLX5E_TT_IPV4_UDP:
+ case MLX5_TT_IPV4_UDP:
return FS_IPV4_UDP;
- case MLX5E_TT_IPV6_UDP:
+ case MLX5_TT_IPV6_UDP:
return FS_IPV6_UDP;
default:
return FS_UDP_NUM_TYPES;
@@ -75,7 +75,7 @@ static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type
struct mlx5_flow_handle *
mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
- enum mlx5e_traffic_types ttc_type,
+ enum mlx5_traffic_types ttc_type,
u32 tir_num, u16 d_port)
{
enum fs_udp_type type = tt2fs_udp(ttc_type);
@@ -124,7 +124,7 @@ static int fs_udp_add_default_rule(struct mlx5e_priv *priv, enum fs_udp_type typ
fs_udp = priv->fs.udp;
fs_udp_t = &fs_udp->tables[type];
- dest = mlx5e_ttc_get_default_dest(priv, fs_udp2tt(type));
+ dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_udp2tt(type));
rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -259,7 +259,7 @@ static int fs_udp_disable(struct mlx5e_priv *priv)
for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
/* Modify ttc rules destination to point back to the indir TIRs */
- err = mlx5e_ttc_fwd_default_dest(priv, fs_udp2tt(i));
+ err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_udp2tt(i));
if (err) {
netdev_err(priv->netdev,
"%s: modify ttc[%d] default destination failed, err(%d)\n",
@@ -281,7 +281,7 @@ static int fs_udp_enable(struct mlx5e_priv *priv)
dest.ft = priv->fs.udp->tables[i].t;
/* Modify ttc rules destination to point on the accel_fs FTs */
- err = mlx5e_ttc_fwd_dest(priv, fs_udp2tt(i), &dest);
+ err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_udp2tt(i), &dest);
if (err) {
netdev_err(priv->netdev,
"%s: modify ttc[%d] destination to accel failed, err(%d)\n",
@@ -401,7 +401,7 @@ static int fs_any_add_default_rule(struct mlx5e_priv *priv)
fs_any = priv->fs.any;
fs_any_t = &fs_any->table;
- dest = mlx5e_ttc_get_default_dest(priv, MLX5E_TT_ANY);
+ dest = mlx5_ttc_get_default_dest(priv->fs.ttc, MLX5_TT_ANY);
rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -514,11 +514,11 @@ static int fs_any_disable(struct mlx5e_priv *priv)
int err;
/* Modify ttc rules destination to point back to the indir TIRs */
- err = mlx5e_ttc_fwd_default_dest(priv, MLX5E_TT_ANY);
+ err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, MLX5_TT_ANY);
if (err) {
netdev_err(priv->netdev,
"%s: modify ttc[%d] default destination failed, err(%d)\n",
- __func__, MLX5E_TT_ANY, err);
+ __func__, MLX5_TT_ANY, err);
return err;
}
return 0;
@@ -533,11 +533,11 @@ static int fs_any_enable(struct mlx5e_priv *priv)
dest.ft = priv->fs.any->table.t;
/* Modify ttc rules destination to point on the accel_fs FTs */
- err = mlx5e_ttc_fwd_dest(priv, MLX5E_TT_ANY, &dest);
+ err = mlx5_ttc_fwd_dest(priv->fs.ttc, MLX5_TT_ANY, &dest);
if (err) {
netdev_err(priv->netdev,
"%s: modify ttc[%d] destination to accel failed, err(%d)\n",
- __func__, MLX5E_TT_ANY, err);
+ __func__, MLX5_TT_ANY, err);
return err;
}
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
index 8385df24eb99..7a70c4f38fda 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
@@ -12,7 +12,7 @@ void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule);
/* UDP traffic type redirect */
struct mlx5_flow_handle *
mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
- enum mlx5e_traffic_types ttc_type,
+ enum mlx5_traffic_types ttc_type,
u32 tir_num, u16 d_port);
void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv);
int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c
index ea321e528749..4e72ca8070e2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c
@@ -5,11 +5,15 @@
#include <linux/slab.h>
#include <linux/xarray.h>
#include <linux/hashtable.h>
+#include <linux/refcount.h>
#include "mapping.h"
#define MAPPING_GRACE_PERIOD 2000
+static LIST_HEAD(shared_ctx_list);
+static DEFINE_MUTEX(shared_ctx_lock);
+
struct mapping_ctx {
struct xarray xarray;
DECLARE_HASHTABLE(ht, 8);
@@ -20,6 +24,10 @@ struct mapping_ctx {
struct delayed_work dwork;
struct list_head pending_list;
spinlock_t pending_list_lock; /* Guards pending list */
+ u64 id;
+ u8 type;
+ struct list_head list;
+ refcount_t refcount;
};
struct mapping_item {
@@ -205,11 +213,48 @@ mapping_create(size_t data_size, u32 max_id, bool delayed_removal)
mutex_init(&ctx->lock);
xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1);
+ refcount_set(&ctx->refcount, 1);
+ INIT_LIST_HEAD(&ctx->list);
+
+ return ctx;
+}
+
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal)
+{
+ struct mapping_ctx *ctx;
+
+ mutex_lock(&shared_ctx_lock);
+ list_for_each_entry(ctx, &shared_ctx_list, list) {
+ if (ctx->id == id && ctx->type == type) {
+ if (refcount_inc_not_zero(&ctx->refcount))
+ goto unlock;
+ break;
+ }
+ }
+
+ ctx = mapping_create(data_size, max_id, delayed_removal);
+ if (IS_ERR(ctx))
+ goto unlock;
+
+ ctx->id = id;
+ ctx->type = type;
+ list_add(&ctx->list, &shared_ctx_list);
+
+unlock:
+ mutex_unlock(&shared_ctx_lock);
return ctx;
}
void mapping_destroy(struct mapping_ctx *ctx)
{
+ if (!refcount_dec_and_test(&ctx->refcount))
+ return;
+
+ mutex_lock(&shared_ctx_lock);
+ list_del(&ctx->list);
+ mutex_unlock(&shared_ctx_lock);
+
mapping_flush_work(ctx);
xa_destroy(&ctx->xarray);
mutex_destroy(&ctx->lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h
index 285525cc5470..4e2119f0f4c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h
@@ -24,4 +24,9 @@ struct mapping_ctx *mapping_create(size_t data_size, u32 max_id,
bool delayed_removal);
void mapping_destroy(struct mapping_ctx *ctx);
+/* adds mapping with an id or get an existing mapping with the same id
+ */
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal);
+
#endif /* __MLX5_MAPPING_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 2cbf18c967f7..3cbb596821e8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -167,6 +167,18 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0;
}
+struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params)
+{
+ struct mlx5e_lro_param lro_param;
+
+ lro_param = (struct mlx5e_lro_param) {
+ .enabled = params->lro_en,
+ .timeout = params->lro_timeout,
+ };
+
+ return lro_param;
+}
+
u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
{
bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index e9593f5f0661..879ad46d754e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -11,6 +11,11 @@ struct mlx5e_xsk_param {
u16 chunk_size;
};
+struct mlx5e_lro_param {
+ bool enabled;
+ u32 timeout;
+};
+
struct mlx5e_cq_param {
u32 cqc[MLX5_ST_SZ_DW(cqc)];
struct mlx5_wq_param wq;
@@ -120,6 +125,7 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
+struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params);
/* Build queue parameters */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index efef4adce086..ee688dec67a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -326,13 +326,14 @@ static int mlx5e_ptp_open_txqsqs(struct mlx5e_ptp *c,
struct mlx5e_ptp_params *cparams)
{
struct mlx5e_params *params = &cparams->params;
+ u8 num_tc = mlx5e_get_dcb_num_tc(params);
int ix_base;
int err;
int tc;
- ix_base = params->num_tc * params->num_channels;
+ ix_base = num_tc * params->num_channels;
- for (tc = 0; tc < params->num_tc; tc++) {
+ for (tc = 0; tc < num_tc; tc++) {
int txq_ix = ix_base + tc;
err = mlx5e_ptp_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
@@ -365,9 +366,12 @@ static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c,
struct mlx5e_create_cq_param ccp = {};
struct dim_cq_moder ptp_moder = {};
struct mlx5e_cq_param *cq_param;
+ u8 num_tc;
int err;
int tc;
+ num_tc = mlx5e_get_dcb_num_tc(params);
+
ccp.node = dev_to_node(mlx5_core_dma_dev(c->mdev));
ccp.ch_stats = c->stats;
ccp.napi = &c->napi;
@@ -375,7 +379,7 @@ static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c,
cq_param = &cparams->txq_sq_param.cqp;
- for (tc = 0; tc < params->num_tc; tc++) {
+ for (tc = 0; tc < num_tc; tc++) {
struct mlx5e_cq *cq = &c->ptpsq[tc].txqsq.cq;
err = mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq);
@@ -383,7 +387,7 @@ static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c,
goto out_err_txqsq_cq;
}
- for (tc = 0; tc < params->num_tc; tc++) {
+ for (tc = 0; tc < num_tc; tc++) {
struct mlx5e_cq *cq = &c->ptpsq[tc].ts_cq;
struct mlx5e_ptpsq *ptpsq = &c->ptpsq[tc];
@@ -399,7 +403,7 @@ static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c,
out_err_ts_cq:
for (--tc; tc >= 0; tc--)
mlx5e_close_cq(&c->ptpsq[tc].ts_cq);
- tc = params->num_tc;
+ tc = num_tc;
out_err_txqsq_cq:
for (--tc; tc >= 0; tc--)
mlx5e_close_cq(&c->ptpsq[tc].txqsq.cq);
@@ -475,7 +479,7 @@ static void mlx5e_ptp_build_params(struct mlx5e_ptp *c,
params->num_channels = orig->num_channels;
params->hard_mtu = orig->hard_mtu;
params->sw_mtu = orig->sw_mtu;
- params->num_tc = orig->num_tc;
+ params->mqprio = orig->mqprio;
/* SQ */
if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
@@ -605,9 +609,9 @@ static void mlx5e_ptp_rx_unset_fs(struct mlx5e_priv *priv)
static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
{
+ u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res);
struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs;
struct mlx5_flow_handle *rule;
- u32 tirn = priv->ptp_tir.tirn;
int err;
if (ptp_fs->valid)
@@ -617,7 +621,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
if (err)
goto out_free;
- rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV4_UDP,
+ rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV4_UDP,
tirn, PTP_EV_PORT);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -625,7 +629,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
}
ptp_fs->udp_v4_rule = rule;
- rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV6_UDP,
+ rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV6_UDP,
tirn, PTP_EV_PORT);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -680,7 +684,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
c->pdev = mlx5_core_dma_dev(priv->mdev);
c->netdev = priv->netdev;
c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
- c->num_tc = params->num_tc;
+ c->num_tc = mlx5e_get_dcb_num_tc(params);
c->stats = &priv->ptp_stats.ch;
c->lag_port = lag_port;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
index 5efe3278b0f6..e8a8d78e3e4d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -132,7 +132,7 @@ static u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid)
*/
bool is_ptp = MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS);
- return (chs->params.num_channels + is_ptp) * chs->params.num_tc + qid;
+ return (chs->params.num_channels + is_ptp) * mlx5e_get_dcb_num_tc(&chs->params) + qid;
}
int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid)
@@ -733,8 +733,8 @@ static void mlx5e_reset_qdisc(struct net_device *dev, u16 qid)
spin_unlock_bh(qdisc_lock(qdisc));
}
-int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
- u16 *new_qid, struct netlink_ext_ack *extack)
+int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 *classid,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_qos_node *node;
struct netdev_queue *txq;
@@ -742,11 +742,9 @@ int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
bool opened;
int err;
- qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL classid %04x\n", classid);
-
- *old_qid = *new_qid = 0;
+ qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid);
- node = mlx5e_sw_node_find(priv, classid);
+ node = mlx5e_sw_node_find(priv, *classid);
if (!node)
return -ENOENT;
@@ -764,7 +762,7 @@ int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
if (err) /* Not fatal. */
qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
- node->hw_id, classid, err);
+ node->hw_id, *classid, err);
mlx5e_sw_node_delete(priv, node);
@@ -826,8 +824,7 @@ int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
if (opened)
mlx5e_reactivate_qos_sq(priv, moved_qid, txq);
- *old_qid = mlx5e_qid_from_qos(&priv->channels, moved_qid);
- *new_qid = mlx5e_qid_from_qos(&priv->channels, qid);
+ *classid = node->classid;
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
index 5af7991fcd19..757682b7c0e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
@@ -34,8 +34,8 @@ int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid,
struct netlink_ext_ack *extack);
int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid,
u64 rate, u64 ceil, struct netlink_ext_ack *extack);
-int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
- u16 *new_qid, struct netlink_ext_ack *extack);
+int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 *classid,
+ struct netlink_ext_ack *extack);
int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force,
struct netlink_ext_ack *extack);
int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
index 3c0032c9647c..0c38c2e319be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -15,9 +15,116 @@ struct mlx5_bridge_switchdev_fdb_work {
struct work_struct work;
struct switchdev_notifier_fdb_info fdb_info;
struct net_device *dev;
+ struct mlx5_esw_bridge_offloads *br_offloads;
bool add;
};
+static bool mlx5_esw_bridge_dev_same_esw(struct net_device *dev, struct mlx5_eswitch *esw)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return esw == priv->mdev->priv.eswitch;
+}
+
+static bool mlx5_esw_bridge_dev_same_hw(struct net_device *dev, struct mlx5_eswitch *esw)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev, *esw_mdev;
+ u64 system_guid, esw_system_guid;
+
+ mdev = priv->mdev;
+ esw_mdev = esw->dev;
+
+ system_guid = mlx5_query_nic_system_image_guid(mdev);
+ esw_system_guid = mlx5_query_nic_system_image_guid(esw_mdev);
+
+ return system_guid == esw_system_guid;
+}
+
+static struct net_device *
+mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw)
+{
+ struct net_device *lower;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_eswitch_rep(lower))
+ continue;
+
+ priv = netdev_priv(lower);
+ mdev = priv->mdev;
+ if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw))
+ return lower;
+ }
+
+ return NULL;
+}
+
+static struct net_device *
+mlx5_esw_bridge_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_eswitch *esw,
+ u16 *vport_num, u16 *esw_owner_vhca_id)
+{
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *priv;
+
+ if (netif_is_lag_master(dev))
+ dev = mlx5_esw_bridge_lag_rep_get(dev, esw);
+
+ if (!dev || !mlx5e_eswitch_rep(dev) || !mlx5_esw_bridge_dev_same_hw(dev, esw))
+ return NULL;
+
+ priv = netdev_priv(dev);
+ rpriv = priv->ppriv;
+ *vport_num = rpriv->rep->vport;
+ *esw_owner_vhca_id = MLX5_CAP_GEN(priv->mdev, vhca_id);
+ return dev;
+}
+
+static struct net_device *
+mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_eswitch *esw,
+ u16 *vport_num, u16 *esw_owner_vhca_id)
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
+ if (netif_is_lag_master(dev) || mlx5e_eswitch_rep(dev))
+ return mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, vport_num,
+ esw_owner_vhca_id);
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ struct net_device *rep;
+
+ if (netif_is_bridge_master(lower_dev))
+ continue;
+
+ rep = mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(lower_dev, esw, vport_num,
+ esw_owner_vhca_id);
+ if (rep)
+ return rep;
+ }
+
+ return NULL;
+}
+
+static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device *rep,
+ struct mlx5_eswitch *esw)
+{
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5_esw_bridge_dev_same_esw(rep, esw))
+ return false;
+
+ priv = netdev_priv(rep);
+ mdev = priv->mdev;
+ if (netif_is_lag_master(dev))
+ return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev);
+ return true;
+}
+
static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr)
{
struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb,
@@ -25,37 +132,36 @@ static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr
netdev_nb);
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct netdev_notifier_changeupper_info *info = ptr;
+ struct net_device *upper = info->upper_dev, *rep;
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ u16 vport_num, esw_owner_vhca_id;
struct netlink_ext_ack *extack;
- struct mlx5e_rep_priv *rpriv;
- struct mlx5_eswitch *esw;
- struct mlx5_vport *vport;
- struct net_device *upper;
- struct mlx5e_priv *priv;
- u16 vport_num;
-
- if (!mlx5e_eswitch_rep(dev))
- return 0;
+ int ifindex = upper->ifindex;
+ int err;
- upper = info->upper_dev;
if (!netif_is_bridge_master(upper))
return 0;
- esw = br_offloads->esw;
- priv = netdev_priv(dev);
- if (esw != priv->mdev->priv.eswitch)
+ rep = mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, &vport_num, &esw_owner_vhca_id);
+ if (!rep)
return 0;
- rpriv = priv->ppriv;
- vport_num = rpriv->rep->vport;
- vport = mlx5_eswitch_get_vport(esw, vport_num);
- if (IS_ERR(vport))
- return PTR_ERR(vport);
-
extack = netdev_notifier_info_to_extack(&info->info);
- return info->linking ?
- mlx5_esw_bridge_vport_link(upper->ifindex, br_offloads, vport, extack) :
- mlx5_esw_bridge_vport_unlink(upper->ifindex, br_offloads, vport, extack);
+ if (mlx5_esw_bridge_is_local(dev, rep, esw))
+ err = info->linking ?
+ mlx5_esw_bridge_vport_link(ifindex, vport_num, esw_owner_vhca_id,
+ br_offloads, extack) :
+ mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id,
+ br_offloads, extack);
+ else if (mlx5_esw_bridge_dev_same_hw(rep, esw))
+ err = info->linking ?
+ mlx5_esw_bridge_vport_peer_link(ifindex, vport_num, esw_owner_vhca_id,
+ br_offloads, extack) :
+ mlx5_esw_bridge_vport_peer_unlink(ifindex, vport_num, esw_owner_vhca_id,
+ br_offloads, extack);
+
+ return err;
}
static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
@@ -75,31 +181,28 @@ static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
return notifier_from_errno(err);
}
-static int mlx5_esw_bridge_port_obj_add(struct net_device *dev,
- const void *ctx,
- const struct switchdev_obj *obj,
- struct netlink_ext_ack *extack)
+static int
+mlx5_esw_bridge_port_obj_add(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ struct mlx5_esw_bridge_offloads *br_offloads)
{
+ struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
+ const struct switchdev_obj *obj = port_obj_info->obj;
const struct switchdev_obj_port_vlan *vlan;
- struct mlx5e_rep_priv *rpriv;
- struct mlx5_eswitch *esw;
- struct mlx5_vport *vport;
- struct mlx5e_priv *priv;
- u16 vport_num;
- int err = 0;
+ u16 vport_num, esw_owner_vhca_id;
+ int err;
- priv = netdev_priv(dev);
- rpriv = priv->ppriv;
- vport_num = rpriv->rep->vport;
- esw = priv->mdev->priv.eswitch;
- vport = mlx5_eswitch_get_vport(esw, vport_num);
- if (IS_ERR(vport))
- return PTR_ERR(vport);
+ if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+ &esw_owner_vhca_id))
+ return 0;
+
+ port_obj_info->handled = true;
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_VLAN:
vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
- err = mlx5_esw_bridge_port_vlan_add(vlan->vid, vlan->flags, esw, vport, extack);
+ err = mlx5_esw_bridge_port_vlan_add(vport_num, esw_owner_vhca_id, vlan->vid,
+ vlan->flags, br_offloads, extack);
break;
default:
return -EOPNOTSUPP;
@@ -107,29 +210,25 @@ static int mlx5_esw_bridge_port_obj_add(struct net_device *dev,
return err;
}
-static int mlx5_esw_bridge_port_obj_del(struct net_device *dev,
- const void *ctx,
- const struct switchdev_obj *obj)
+static int
+mlx5_esw_bridge_port_obj_del(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ struct mlx5_esw_bridge_offloads *br_offloads)
{
+ const struct switchdev_obj *obj = port_obj_info->obj;
const struct switchdev_obj_port_vlan *vlan;
- struct mlx5e_rep_priv *rpriv;
- struct mlx5_eswitch *esw;
- struct mlx5_vport *vport;
- struct mlx5e_priv *priv;
- u16 vport_num;
+ u16 vport_num, esw_owner_vhca_id;
- priv = netdev_priv(dev);
- rpriv = priv->ppriv;
- vport_num = rpriv->rep->vport;
- esw = priv->mdev->priv.eswitch;
- vport = mlx5_eswitch_get_vport(esw, vport_num);
- if (IS_ERR(vport))
- return PTR_ERR(vport);
+ if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+ &esw_owner_vhca_id))
+ return 0;
+
+ port_obj_info->handled = true;
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_VLAN:
vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
- mlx5_esw_bridge_port_vlan_del(vlan->vid, esw, vport);
+ mlx5_esw_bridge_port_vlan_del(vport_num, esw_owner_vhca_id, vlan->vid, br_offloads);
break;
default:
return -EOPNOTSUPP;
@@ -137,25 +236,21 @@ static int mlx5_esw_bridge_port_obj_del(struct net_device *dev,
return 0;
}
-static int mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
- const void *ctx,
- const struct switchdev_attr *attr,
- struct netlink_ext_ack *extack)
+static int
+mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
+ struct switchdev_notifier_port_attr_info *port_attr_info,
+ struct mlx5_esw_bridge_offloads *br_offloads)
{
- struct mlx5e_rep_priv *rpriv;
- struct mlx5_eswitch *esw;
- struct mlx5_vport *vport;
- struct mlx5e_priv *priv;
- u16 vport_num;
- int err = 0;
+ struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_attr_info->info);
+ const struct switchdev_attr *attr = port_attr_info->attr;
+ u16 vport_num, esw_owner_vhca_id;
+ int err;
- priv = netdev_priv(dev);
- rpriv = priv->ppriv;
- vport_num = rpriv->rep->vport;
- esw = priv->mdev->priv.eswitch;
- vport = mlx5_eswitch_get_vport(esw, vport_num);
- if (IS_ERR(vport))
- return PTR_ERR(vport);
+ if (!mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+ &esw_owner_vhca_id))
+ return 0;
+
+ port_attr_info->handled = true;
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
@@ -167,10 +262,12 @@ static int mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
break;
case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
- err = mlx5_esw_bridge_ageing_time_set(attr->u.ageing_time, esw, vport);
+ err = mlx5_esw_bridge_ageing_time_set(vport_num, esw_owner_vhca_id,
+ attr->u.ageing_time, br_offloads);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
- err = mlx5_esw_bridge_vlan_filtering_set(attr->u.vlan_filtering, esw, vport);
+ err = mlx5_esw_bridge_vlan_filtering_set(vport_num, esw_owner_vhca_id,
+ attr->u.vlan_filtering, br_offloads);
break;
default:
err = -EOPNOTSUPP;
@@ -179,27 +276,24 @@ static int mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
return err;
}
-static int mlx5_esw_bridge_event_blocking(struct notifier_block *unused,
+static int mlx5_esw_bridge_event_blocking(struct notifier_block *nb,
unsigned long event, void *ptr)
{
+ struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb,
+ struct mlx5_esw_bridge_offloads,
+ nb_blk);
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
int err;
switch (event) {
case SWITCHDEV_PORT_OBJ_ADD:
- err = switchdev_handle_port_obj_add(dev, ptr,
- mlx5e_eswitch_rep,
- mlx5_esw_bridge_port_obj_add);
+ err = mlx5_esw_bridge_port_obj_add(dev, ptr, br_offloads);
break;
case SWITCHDEV_PORT_OBJ_DEL:
- err = switchdev_handle_port_obj_del(dev, ptr,
- mlx5e_eswitch_rep,
- mlx5_esw_bridge_port_obj_del);
+ err = mlx5_esw_bridge_port_obj_del(dev, ptr, br_offloads);
break;
case SWITCHDEV_PORT_ATTR_SET:
- err = switchdev_handle_port_attr_set(dev, ptr,
- mlx5e_eswitch_rep,
- mlx5_esw_bridge_port_obj_attr_set);
+ err = mlx5_esw_bridge_port_obj_attr_set(dev, ptr, br_offloads);
break;
default:
err = 0;
@@ -222,27 +316,23 @@ static void mlx5_esw_bridge_switchdev_fdb_event_work(struct work_struct *work)
container_of(work, struct mlx5_bridge_switchdev_fdb_work, work);
struct switchdev_notifier_fdb_info *fdb_info =
&fdb_work->fdb_info;
+ struct mlx5_esw_bridge_offloads *br_offloads =
+ fdb_work->br_offloads;
struct net_device *dev = fdb_work->dev;
- struct mlx5e_rep_priv *rpriv;
- struct mlx5_eswitch *esw;
- struct mlx5_vport *vport;
- struct mlx5e_priv *priv;
- u16 vport_num;
+ u16 vport_num, esw_owner_vhca_id;
rtnl_lock();
- priv = netdev_priv(dev);
- rpriv = priv->ppriv;
- vport_num = rpriv->rep->vport;
- esw = priv->mdev->priv.eswitch;
- vport = mlx5_eswitch_get_vport(esw, vport_num);
- if (IS_ERR(vport))
+ if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+ &esw_owner_vhca_id))
goto out;
if (fdb_work->add)
- mlx5_esw_bridge_fdb_create(dev, esw, vport, fdb_info);
+ mlx5_esw_bridge_fdb_create(dev, vport_num, esw_owner_vhca_id, br_offloads,
+ fdb_info);
else
- mlx5_esw_bridge_fdb_remove(dev, esw, vport, fdb_info);
+ mlx5_esw_bridge_fdb_remove(dev, vport_num, esw_owner_vhca_id, br_offloads,
+ fdb_info);
out:
rtnl_unlock();
@@ -251,7 +341,8 @@ out:
static struct mlx5_bridge_switchdev_fdb_work *
mlx5_esw_bridge_init_switchdev_fdb_work(struct net_device *dev, bool add,
- struct switchdev_notifier_fdb_info *fdb_info)
+ struct switchdev_notifier_fdb_info *fdb_info,
+ struct mlx5_esw_bridge_offloads *br_offloads)
{
struct mlx5_bridge_switchdev_fdb_work *work;
u8 *addr;
@@ -273,6 +364,7 @@ mlx5_esw_bridge_init_switchdev_fdb_work(struct net_device *dev, bool add,
dev_hold(dev);
work->dev = dev;
+ work->br_offloads = br_offloads;
work->add = add;
return work;
}
@@ -286,20 +378,14 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
struct switchdev_notifier_fdb_info *fdb_info;
struct mlx5_bridge_switchdev_fdb_work *work;
+ struct mlx5_eswitch *esw = br_offloads->esw;
struct switchdev_notifier_info *info = ptr;
- struct net_device *upper;
- struct mlx5e_priv *priv;
-
- if (!mlx5e_eswitch_rep(dev))
- return NOTIFY_DONE;
- priv = netdev_priv(dev);
- if (priv->mdev->priv.eswitch != br_offloads->esw)
- return NOTIFY_DONE;
+ u16 vport_num, esw_owner_vhca_id;
+ struct net_device *upper, *rep;
if (event == SWITCHDEV_PORT_ATTR_SET) {
- int err = switchdev_handle_port_attr_set(dev, ptr,
- mlx5e_eswitch_rep,
- mlx5_esw_bridge_port_obj_attr_set);
+ int err = mlx5_esw_bridge_port_obj_attr_set(dev, ptr, br_offloads);
+
return notifier_from_errno(err);
}
@@ -309,7 +395,27 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
if (!netif_is_bridge_master(upper))
return NOTIFY_DONE;
+ rep = mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, &vport_num, &esw_owner_vhca_id);
+ if (!rep)
+ return NOTIFY_DONE;
+
switch (event) {
+ case SWITCHDEV_FDB_ADD_TO_BRIDGE:
+ /* only handle the event on native eswtich of representor */
+ if (!mlx5_esw_bridge_is_local(dev, rep, esw))
+ break;
+
+ fdb_info = container_of(info,
+ struct switchdev_notifier_fdb_info,
+ info);
+ mlx5_esw_bridge_fdb_update_used(dev, vport_num, esw_owner_vhca_id, br_offloads,
+ fdb_info);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_BRIDGE:
+ /* only handle the event on peers */
+ if (mlx5_esw_bridge_is_local(dev, rep, esw))
+ break;
+ fallthrough;
case SWITCHDEV_FDB_ADD_TO_DEVICE:
case SWITCHDEV_FDB_DEL_TO_DEVICE:
fdb_info = container_of(info,
@@ -318,7 +424,8 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
work = mlx5_esw_bridge_init_switchdev_fdb_work(dev,
event == SWITCHDEV_FDB_ADD_TO_DEVICE,
- fdb_info);
+ fdb_info,
+ br_offloads);
if (IS_ERR(work)) {
WARN_ONCE(1, "Failed to init switchdev work, err=%ld",
PTR_ERR(work));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 059799e4f483..51a4d80f7fa3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -17,7 +17,7 @@
#include "en/mapping.h"
#include "en/tc_tun.h"
#include "lib/port_tun.h"
-#include "esw/sample.h"
+#include "en/tc/sample.h"
struct mlx5e_rep_indr_block_priv {
struct net_device *netdev;
@@ -516,7 +516,6 @@ void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv)
mlx5e_rep_indr_block_unbind);
}
-#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
struct mlx5e_tc_update_priv *tc_priv,
u32 tunnel_id)
@@ -609,12 +608,13 @@ static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
return true;
}
-static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1,
- struct mlx5e_tc_update_priv *tc_priv)
+static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1,
+ struct mlx5e_tc_update_priv *tc_priv)
{
struct mlx5e_priv *priv = netdev_priv(skb->dev);
u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
if (chain) {
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *uplink_rpriv;
@@ -636,9 +636,25 @@ static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1,
zone_restore_id))
return false;
}
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
}
-#endif /* CONFIG_NET_TC_SKB_EXT */
+
+static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5_mapped_obj *mapped_obj,
+ struct mlx5e_tc_update_priv *tc_priv)
+{
+ if (!mlx5e_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) {
+ netdev_dbg(priv->netdev,
+ "Failed to restore tunnel info for sampled packet\n");
+ return;
+ }
+#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+ mlx5e_tc_sample_skb(skb, mapped_obj);
+#endif /* CONFIG_MLX5_TC_SAMPLE */
+ mlx5_rep_tc_post_napi_receive(tc_priv);
+}
bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
struct sk_buff *skb,
@@ -647,7 +663,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
struct mlx5_mapped_obj mapped_obj;
struct mlx5_eswitch *esw;
struct mlx5e_priv *priv;
- u32 reg_c0, reg_c1;
+ u32 reg_c0;
int err;
reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
@@ -659,8 +675,6 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
*/
skb->mark = 0;
- reg_c1 = be32_to_cpu(cqe->ft_metadata);
-
priv = netdev_priv(skb->dev);
esw = priv->mdev->priv.eswitch;
err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj);
@@ -671,18 +685,14 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
return false;
}
-#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN)
- return mlx5e_restore_skb(skb, mapped_obj.chain, reg_c1, tc_priv);
-#endif /* CONFIG_NET_TC_SKB_EXT */
-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
- if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) {
- mlx5_esw_sample_skb(skb, &mapped_obj);
+ if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
+ u32 reg_c1 = be32_to_cpu(cqe->ft_metadata);
+
+ return mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, tc_priv);
+ } else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) {
+ mlx5e_restore_skb_sample(priv, skb, &mapped_obj, tc_priv);
return false;
- }
-#endif /* CONFIG_MLX5_TC_SAMPLE */
- if (mapped_obj.type != MLX5_MAPPED_OBJ_SAMPLE &&
- mapped_obj.type != MLX5_MAPPED_OBJ_CHAIN) {
+ } else {
netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
return false;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 9d361efd5ff7..bb682fd751c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -372,7 +372,7 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
for (i = 0; i < priv->channels.num; i++) {
struct mlx5e_channel *c = priv->channels.c[i];
- for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
struct mlx5e_txqsq *sq = &c->sq[tc];
err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc);
@@ -384,7 +384,7 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
goto close_sqs_nest;
- for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg,
&ptp_ch->ptpsq[tc],
tc);
@@ -494,7 +494,7 @@ static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
for (i = 0; i < priv->channels.num; i++) {
struct mlx5e_channel *c = priv->channels.c[i];
- for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
struct mlx5e_txqsq *sq = &c->sq[tc];
err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ");
@@ -504,7 +504,7 @@ static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
}
if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) {
- for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq;
err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
new file mode 100644
index 000000000000..b915fb29dd2c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "rqt.h"
+#include <linux/mlx5/transobj.h>
+
+void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
+ unsigned int num_channels)
+{
+ unsigned int i;
+
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+ indir->table[i] = i % num_channels;
+}
+
+static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ u16 max_size, u32 *init_rqns, u16 init_size)
+{
+ void *rqtc;
+ int inlen;
+ int err;
+ u32 *in;
+ int i;
+
+ rqt->mdev = mdev;
+ rqt->size = max_size;
+
+ inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * init_size;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
+
+ MLX5_SET(rqtc, rqtc, rqt_max_size, rqt->size);
+
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, init_size);
+ for (i = 0; i < init_size; i++)
+ MLX5_SET(rqtc, rqtc, rq_num[i], init_rqns[i]);
+
+ err = mlx5_core_create_rqt(rqt->mdev, in, inlen, &rqt->rqtn);
+
+ kvfree(in);
+ return err;
+}
+
+int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ bool indir_enabled, u32 init_rqn)
+{
+ u16 max_size = indir_enabled ? MLX5E_INDIR_RQT_SIZE : 1;
+
+ return mlx5e_rqt_init(rqt, mdev, max_size, &init_rqn, 1);
+}
+
+static int mlx5e_bits_invert(unsigned long a, int size)
+{
+ int inv = 0;
+ int i;
+
+ for (i = 0; i < size; i++)
+ inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i;
+
+ return inv;
+}
+
+static int mlx5e_calc_indir_rqns(u32 *rss_rqns, u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir)
+{
+ unsigned int i;
+
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) {
+ unsigned int ix = i;
+
+ if (hfunc == ETH_RSS_HASH_XOR)
+ ix = mlx5e_bits_invert(ix, ilog2(MLX5E_INDIR_RQT_SIZE));
+
+ ix = indir->table[ix];
+
+ if (WARN_ON(ix >= num_rqns))
+ /* Could be a bug in the driver or in the kernel part of
+ * ethtool: indir table refers to non-existent RQs.
+ */
+ return -EINVAL;
+ rss_rqns[i] = rqns[ix];
+ }
+
+ return 0;
+}
+
+int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir)
+{
+ u32 *rss_rqns;
+ int err;
+
+ rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL);
+ if (!rss_rqns)
+ return -ENOMEM;
+
+ err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir);
+ if (err)
+ goto out;
+
+ err = mlx5e_rqt_init(rqt, mdev, MLX5E_INDIR_RQT_SIZE, rss_rqns, MLX5E_INDIR_RQT_SIZE);
+
+out:
+ kvfree(rss_rqns);
+ return err;
+}
+
+void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt)
+{
+ mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn);
+}
+
+static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int size)
+{
+ unsigned int i;
+ void *rqtc;
+ int inlen;
+ u32 *in;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * size;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
+
+ MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1);
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, size);
+ for (i = 0; i < size; i++)
+ MLX5_SET(rqtc, rqtc, rq_num[i], rqns[i]);
+
+ err = mlx5_core_modify_rqt(rqt->mdev, rqt->rqtn, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn)
+{
+ return mlx5e_rqt_redirect(rqt, &rqn, 1);
+}
+
+int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir)
+{
+ u32 *rss_rqns;
+ int err;
+
+ if (WARN_ON(rqt->size != MLX5E_INDIR_RQT_SIZE))
+ return -EINVAL;
+
+ rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL);
+ if (!rss_rqns)
+ return -ENOMEM;
+
+ err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir);
+ if (err)
+ goto out;
+
+ err = mlx5e_rqt_redirect(rqt, rss_rqns, MLX5E_INDIR_RQT_SIZE);
+
+out:
+ kvfree(rss_rqns);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
new file mode 100644
index 000000000000..60c985a12f24
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_RQT_H__
+#define __MLX5_EN_RQT_H__
+
+#include <linux/kernel.h>
+
+#define MLX5E_INDIR_RQT_SIZE (1 << 8)
+
+struct mlx5_core_dev;
+
+struct mlx5e_rss_params_indir {
+ u32 table[MLX5E_INDIR_RQT_SIZE];
+};
+
+void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
+ unsigned int num_channels);
+
+struct mlx5e_rqt {
+ struct mlx5_core_dev *mdev;
+ u32 rqtn;
+ u16 size;
+};
+
+int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ bool indir_enabled, u32 init_rqn);
+int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir);
+void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt);
+
+static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt)
+{
+ return rqt->rqtn;
+}
+
+int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn);
+int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir);
+
+#endif /* __MLX5_EN_RQT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
new file mode 100644
index 000000000000..625cd49ef96c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
@@ -0,0 +1,588 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES.
+
+#include "rss.h"
+
+#define mlx5e_rss_warn(__dev, format, ...) \
+ dev_warn((__dev)->device, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_INDIR_TIRS] = {
+ [MLX5_TT_IPV4_TCP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5_TT_IPV6_TCP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5_TT_IPV4_UDP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5_TT_IPV6_UDP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5_TT_IPV4_IPSEC_AH] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5_TT_IPV6_IPSEC_AH] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5_TT_IPV4_IPSEC_ESP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5_TT_IPV6_IPSEC_ESP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5_TT_IPV4] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP,
+ },
+ [MLX5_TT_IPV6] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP,
+ },
+};
+
+struct mlx5e_rss_params_traffic_type
+mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt)
+{
+ return rss_default_config[tt];
+}
+
+struct mlx5e_rss {
+ struct mlx5e_rss_params_hash hash;
+ struct mlx5e_rss_params_indir indir;
+ u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_tir *tir[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_tir *inner_tir[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_rqt rqt;
+ struct mlx5_core_dev *mdev;
+ u32 drop_rqn;
+ bool inner_ft_support;
+ bool enabled;
+ refcount_t refcnt;
+};
+
+struct mlx5e_rss *mlx5e_rss_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_rss), GFP_KERNEL);
+}
+
+void mlx5e_rss_free(struct mlx5e_rss *rss)
+{
+ kvfree(rss);
+}
+
+static void mlx5e_rss_params_init(struct mlx5e_rss *rss)
+{
+ enum mlx5_traffic_types tt;
+
+ rss->hash.hfunc = ETH_RSS_HASH_TOP;
+ netdev_rss_key_fill(rss->hash.toeplitz_hash_key,
+ sizeof(rss->hash.toeplitz_hash_key));
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ rss->rx_hash_fields[tt] =
+ mlx5e_rss_get_default_tt_config(tt).rx_hash_fields;
+}
+
+static struct mlx5e_tir **rss_get_tirp(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ return inner ? &rss->inner_tir[tt] : &rss->tir[tt];
+}
+
+static struct mlx5e_tir *rss_get_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ return *rss_get_tirp(rss, tt, inner);
+}
+
+static struct mlx5e_rss_params_traffic_type
+mlx5e_rss_get_tt_config(struct mlx5e_rss *rss, enum mlx5_traffic_types tt)
+{
+ struct mlx5e_rss_params_traffic_type rss_tt;
+
+ rss_tt = mlx5e_rss_get_default_tt_config(tt);
+ rss_tt.rx_hash_fields = rss->rx_hash_fields[tt];
+ return rss_tt;
+}
+
+static int mlx5e_rss_create_tir(struct mlx5e_rss *rss,
+ enum mlx5_traffic_types tt,
+ const struct mlx5e_lro_param *init_lro_param,
+ bool inner)
+{
+ struct mlx5e_rss_params_traffic_type rss_tt;
+ struct mlx5e_tir_builder *builder;
+ struct mlx5e_tir **tir_p;
+ struct mlx5e_tir *tir;
+ u32 rqtn;
+ int err;
+
+ if (inner && !rss->inner_ft_support) {
+ mlx5e_rss_warn(rss->mdev,
+ "Cannot create inner indirect TIR[%d], RSS inner FT is not supported.\n",
+ tt);
+ return -EINVAL;
+ }
+
+ tir_p = rss_get_tirp(rss, tt, inner);
+ if (*tir_p)
+ return -EINVAL;
+
+ tir = kvzalloc(sizeof(*tir), GFP_KERNEL);
+ if (!tir)
+ return -ENOMEM;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder) {
+ err = -ENOMEM;
+ goto free_tir;
+ }
+
+ rqtn = mlx5e_rqt_get_rqtn(&rss->rqt);
+ mlx5e_tir_builder_build_rqt(builder, rss->mdev->mlx5e_res.hw_objs.td.tdn,
+ rqtn, rss->inner_ft_support);
+ mlx5e_tir_builder_build_lro(builder, init_lro_param);
+ rss_tt = mlx5e_rss_get_tt_config(rss, tt);
+ mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner);
+
+ err = mlx5e_tir_init(tir, builder, rss->mdev, true);
+ mlx5e_tir_builder_free(builder);
+ if (err) {
+ mlx5e_rss_warn(rss->mdev, "Failed to create %sindirect TIR: err = %d, tt = %d\n",
+ inner ? "inner " : "", err, tt);
+ goto free_tir;
+ }
+
+ *tir_p = tir;
+ return 0;
+
+free_tir:
+ kvfree(tir);
+ return err;
+}
+
+static void mlx5e_rss_destroy_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ struct mlx5e_tir **tir_p;
+ struct mlx5e_tir *tir;
+
+ tir_p = rss_get_tirp(rss, tt, inner);
+ if (!*tir_p)
+ return;
+
+ tir = *tir_p;
+ mlx5e_tir_destroy(tir);
+ kvfree(tir);
+ *tir_p = NULL;
+}
+
+static int mlx5e_rss_create_tirs(struct mlx5e_rss *rss,
+ const struct mlx5e_lro_param *init_lro_param,
+ bool inner)
+{
+ enum mlx5_traffic_types tt, max_tt;
+ int err;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner);
+ if (err)
+ goto err_destroy_tirs;
+ }
+
+ return 0;
+
+err_destroy_tirs:
+ max_tt = tt;
+ for (tt = 0; tt < max_tt; tt++)
+ mlx5e_rss_destroy_tir(rss, tt, inner);
+ return err;
+}
+
+static void mlx5e_rss_destroy_tirs(struct mlx5e_rss *rss, bool inner)
+{
+ enum mlx5_traffic_types tt;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ mlx5e_rss_destroy_tir(rss, tt, inner);
+}
+
+static int mlx5e_rss_update_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ struct mlx5e_rss_params_traffic_type rss_tt;
+ struct mlx5e_tir_builder *builder;
+ struct mlx5e_tir *tir;
+ int err;
+
+ tir = rss_get_tir(rss, tt, inner);
+ if (!tir)
+ return 0;
+
+ builder = mlx5e_tir_builder_alloc(true);
+ if (!builder)
+ return -ENOMEM;
+
+ rss_tt = mlx5e_rss_get_tt_config(rss, tt);
+
+ mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner);
+ err = mlx5e_tir_modify(tir, builder);
+
+ mlx5e_tir_builder_free(builder);
+ return err;
+}
+
+static int mlx5e_rss_update_tirs(struct mlx5e_rss *rss)
+{
+ enum mlx5_traffic_types tt;
+ int err, retval;
+
+ retval = 0;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ err = mlx5e_rss_update_tir(rss, tt, false);
+ if (err) {
+ retval = retval ? : err;
+ mlx5e_rss_warn(rss->mdev,
+ "Failed to update RSS hash of indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ }
+
+ if (!rss->inner_ft_support)
+ continue;
+
+ err = mlx5e_rss_update_tir(rss, tt, true);
+ if (err) {
+ retval = retval ? : err;
+ mlx5e_rss_warn(rss->mdev,
+ "Failed to update RSS hash of inner indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ }
+ }
+ return retval;
+}
+
+int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn)
+{
+ rss->mdev = mdev;
+ rss->inner_ft_support = inner_ft_support;
+ rss->drop_rqn = drop_rqn;
+
+ mlx5e_rss_params_init(rss);
+ refcount_set(&rss->refcnt, 1);
+
+ return mlx5e_rqt_init_direct(&rss->rqt, mdev, true, drop_rqn);
+}
+
+int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn,
+ const struct mlx5e_lro_param *init_lro_param)
+{
+ int err;
+
+ err = mlx5e_rss_init_no_tirs(rss, mdev, inner_ft_support, drop_rqn);
+ if (err)
+ goto err_out;
+
+ err = mlx5e_rss_create_tirs(rss, init_lro_param, false);
+ if (err)
+ goto err_destroy_rqt;
+
+ if (inner_ft_support) {
+ err = mlx5e_rss_create_tirs(rss, init_lro_param, true);
+ if (err)
+ goto err_destroy_tirs;
+ }
+
+ return 0;
+
+err_destroy_tirs:
+ mlx5e_rss_destroy_tirs(rss, false);
+err_destroy_rqt:
+ mlx5e_rqt_destroy(&rss->rqt);
+err_out:
+ return err;
+}
+
+int mlx5e_rss_cleanup(struct mlx5e_rss *rss)
+{
+ if (!refcount_dec_if_one(&rss->refcnt))
+ return -EBUSY;
+
+ mlx5e_rss_destroy_tirs(rss, false);
+
+ if (rss->inner_ft_support)
+ mlx5e_rss_destroy_tirs(rss, true);
+
+ mlx5e_rqt_destroy(&rss->rqt);
+
+ return 0;
+}
+
+void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss)
+{
+ refcount_inc(&rss->refcnt);
+}
+
+void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss)
+{
+ refcount_dec(&rss->refcnt);
+}
+
+unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss)
+{
+ return refcount_read(&rss->refcnt);
+}
+
+u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ struct mlx5e_tir *tir;
+
+ WARN_ON(inner && !rss->inner_ft_support);
+ tir = rss_get_tir(rss, tt, inner);
+ WARN_ON(!tir);
+
+ return mlx5e_tir_get_tirn(tir);
+}
+
+/* Fill the "tirn" output parameter.
+ * Create the requested TIR if it's its first usage.
+ */
+int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
+ enum mlx5_traffic_types tt,
+ const struct mlx5e_lro_param *init_lro_param,
+ bool inner, u32 *tirn)
+{
+ struct mlx5e_tir *tir;
+
+ tir = rss_get_tir(rss, tt, inner);
+ if (!tir) { /* TIR doesn't exist, create one */
+ int err;
+
+ err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner);
+ if (err)
+ return err;
+ tir = rss_get_tir(rss, tt, inner);
+ }
+
+ *tirn = mlx5e_tir_get_tirn(tir);
+ return 0;
+}
+
+static void mlx5e_rss_apply(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns)
+{
+ int err;
+
+ err = mlx5e_rqt_redirect_indir(&rss->rqt, rqns, num_rqns, rss->hash.hfunc, &rss->indir);
+ if (err)
+ mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to channels: err = %d\n",
+ mlx5e_rqt_get_rqtn(&rss->rqt), err);
+}
+
+void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns)
+{
+ rss->enabled = true;
+ mlx5e_rss_apply(rss, rqns, num_rqns);
+}
+
+void mlx5e_rss_disable(struct mlx5e_rss *rss)
+{
+ int err;
+
+ rss->enabled = false;
+ err = mlx5e_rqt_redirect_direct(&rss->rqt, rss->drop_rqn);
+ if (err)
+ mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to drop RQ %#x: err = %d\n",
+ mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err);
+}
+
+int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param)
+{
+ struct mlx5e_tir_builder *builder;
+ enum mlx5_traffic_types tt;
+ int err, final_err;
+
+ builder = mlx5e_tir_builder_alloc(true);
+ if (!builder)
+ return -ENOMEM;
+
+ mlx5e_tir_builder_build_lro(builder, lro_param);
+
+ final_err = 0;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ struct mlx5e_tir *tir;
+
+ tir = rss_get_tir(rss, tt, false);
+ if (!tir)
+ goto inner_tir;
+ err = mlx5e_tir_modify(tir, builder);
+ if (err) {
+ mlx5e_rss_warn(rss->mdev, "Failed to update LRO state of indirect TIR %#x for traffic type %d: err = %d\n",
+ mlx5e_tir_get_tirn(tir), tt, err);
+ if (!final_err)
+ final_err = err;
+ }
+
+inner_tir:
+ if (!rss->inner_ft_support)
+ continue;
+
+ tir = rss_get_tir(rss, tt, true);
+ if (!tir)
+ continue;
+ err = mlx5e_tir_modify(tir, builder);
+ if (err) {
+ mlx5e_rss_warn(rss->mdev, "Failed to update LRO state of inner indirect TIR %#x for traffic type %d: err = %d\n",
+ mlx5e_tir_get_tirn(tir), tt, err);
+ if (!final_err)
+ final_err = err;
+ }
+ }
+
+ mlx5e_tir_builder_free(builder);
+ return final_err;
+}
+
+int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc)
+{
+ unsigned int i;
+
+ if (indir)
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+ indir[i] = rss->indir.table[i];
+
+ if (key)
+ memcpy(key, rss->hash.toeplitz_hash_key,
+ sizeof(rss->hash.toeplitz_hash_key));
+
+ if (hfunc)
+ *hfunc = rss->hash.hfunc;
+
+ return 0;
+}
+
+int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
+ const u8 *key, const u8 *hfunc,
+ u32 *rqns, unsigned int num_rqns)
+{
+ bool changed_indir = false;
+ bool changed_hash = false;
+
+ if (hfunc && *hfunc != rss->hash.hfunc) {
+ switch (*hfunc) {
+ case ETH_RSS_HASH_XOR:
+ case ETH_RSS_HASH_TOP:
+ break;
+ default:
+ return -EINVAL;
+ }
+ changed_hash = true;
+ changed_indir = true;
+ rss->hash.hfunc = *hfunc;
+ }
+
+ if (key) {
+ if (rss->hash.hfunc == ETH_RSS_HASH_TOP)
+ changed_hash = true;
+ memcpy(rss->hash.toeplitz_hash_key, key,
+ sizeof(rss->hash.toeplitz_hash_key));
+ }
+
+ if (indir) {
+ unsigned int i;
+
+ changed_indir = true;
+
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+ rss->indir.table[i] = indir[i];
+ }
+
+ if (changed_indir && rss->enabled)
+ mlx5e_rss_apply(rss, rqns, num_rqns);
+
+ if (changed_hash)
+ mlx5e_rss_update_tirs(rss);
+
+ return 0;
+}
+
+struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss)
+{
+ return rss->hash;
+}
+
+u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt)
+{
+ return rss->rx_hash_fields[tt];
+}
+
+int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields)
+{
+ u8 old_rx_hash_fields;
+ int err;
+
+ old_rx_hash_fields = rss->rx_hash_fields[tt];
+
+ if (old_rx_hash_fields == rx_hash_fields)
+ return 0;
+
+ rss->rx_hash_fields[tt] = rx_hash_fields;
+
+ err = mlx5e_rss_update_tir(rss, tt, false);
+ if (err) {
+ rss->rx_hash_fields[tt] = old_rx_hash_fields;
+ mlx5e_rss_warn(rss->mdev,
+ "Failed to update RSS hash fields of indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ return err;
+ }
+
+ if (!(rss->inner_ft_support))
+ return 0;
+
+ err = mlx5e_rss_update_tir(rss, tt, true);
+ if (err) {
+ /* Partial update happened. Try to revert - it may fail too, but
+ * there is nothing more we can do.
+ */
+ rss->rx_hash_fields[tt] = old_rx_hash_fields;
+ mlx5e_rss_warn(rss->mdev,
+ "Failed to update RSS hash fields of inner indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ if (mlx5e_rss_update_tir(rss, tt, false))
+ mlx5e_rss_warn(rss->mdev,
+ "Partial update of RSS hash fields happened: failed to revert indirect TIR for traffic type %d to the old values\n",
+ tt);
+ }
+
+ return err;
+}
+
+void mlx5e_rss_set_indir_uniform(struct mlx5e_rss *rss, unsigned int nch)
+{
+ mlx5e_rss_params_indir_init_uniform(&rss->indir, nch);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
new file mode 100644
index 000000000000..d522a10dadf3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_EN_RSS_H__
+#define __MLX5_EN_RSS_H__
+
+#include "rqt.h"
+#include "tir.h"
+#include "fs.h"
+
+struct mlx5e_rss_params_traffic_type
+mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt);
+
+struct mlx5e_rss;
+
+struct mlx5e_rss *mlx5e_rss_alloc(void);
+void mlx5e_rss_free(struct mlx5e_rss *rss);
+int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn,
+ const struct mlx5e_lro_param *init_lro_param);
+int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn);
+int mlx5e_rss_cleanup(struct mlx5e_rss *rss);
+
+void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss);
+void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss);
+unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss);
+
+u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner);
+int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
+ enum mlx5_traffic_types tt,
+ const struct mlx5e_lro_param *init_lro_param,
+ bool inner, u32 *tirn);
+
+void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns);
+void mlx5e_rss_disable(struct mlx5e_rss *rss);
+
+int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param);
+int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc);
+int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
+ const u8 *key, const u8 *hfunc,
+ u32 *rqns, unsigned int num_rqns);
+struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss);
+u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt);
+int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields);
+void mlx5e_rss_set_indir_uniform(struct mlx5e_rss *rss, unsigned int nch);
+#endif /* __MLX5_EN_RSS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
new file mode 100644
index 000000000000..bf0313e2682b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
@@ -0,0 +1,690 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "rx_res.h"
+#include "channels.h"
+#include "params.h"
+
+#define MLX5E_MAX_NUM_RSS 16
+
+struct mlx5e_rx_res {
+ struct mlx5_core_dev *mdev;
+ enum mlx5e_rx_res_features features;
+ unsigned int max_nch;
+ u32 drop_rqn;
+
+ struct mlx5e_rss *rss[MLX5E_MAX_NUM_RSS];
+ bool rss_active;
+ u32 rss_rqns[MLX5E_INDIR_RQT_SIZE];
+ unsigned int rss_nch;
+
+ struct {
+ struct mlx5e_rqt direct_rqt;
+ struct mlx5e_tir direct_tir;
+ struct mlx5e_rqt xsk_rqt;
+ struct mlx5e_tir xsk_tir;
+ } *channels;
+
+ struct {
+ struct mlx5e_rqt rqt;
+ struct mlx5e_tir tir;
+ } ptp;
+};
+
+/* API for rx_res_rss_* */
+
+static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
+ const struct mlx5e_lro_param *init_lro_param,
+ unsigned int init_nch)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_rss *rss;
+ int err;
+
+ if (WARN_ON(res->rss[0]))
+ return -EINVAL;
+
+ rss = mlx5e_rss_alloc();
+ if (!rss)
+ return -ENOMEM;
+
+ err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn,
+ init_lro_param);
+ if (err)
+ goto err_rss_free;
+
+ mlx5e_rss_set_indir_uniform(rss, init_nch);
+
+ res->rss[0] = rss;
+
+ return 0;
+
+err_rss_free:
+ mlx5e_rss_free(rss);
+ return err;
+}
+
+int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_rss *rss;
+ int err, i;
+
+ for (i = 1; i < MLX5E_MAX_NUM_RSS; i++)
+ if (!res->rss[i])
+ break;
+
+ if (i == MLX5E_MAX_NUM_RSS)
+ return -ENOSPC;
+
+ rss = mlx5e_rss_alloc();
+ if (!rss)
+ return -ENOMEM;
+
+ err = mlx5e_rss_init_no_tirs(rss, res->mdev, inner_ft_support, res->drop_rqn);
+ if (err)
+ goto err_rss_free;
+
+ mlx5e_rss_set_indir_uniform(rss, init_nch);
+ if (res->rss_active)
+ mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch);
+
+ res->rss[i] = rss;
+ *rss_idx = i;
+
+ return 0;
+
+err_rss_free:
+ mlx5e_rss_free(rss);
+ return err;
+}
+
+static int __mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx)
+{
+ struct mlx5e_rss *rss = res->rss[rss_idx];
+ int err;
+
+ err = mlx5e_rss_cleanup(rss);
+ if (err)
+ return err;
+
+ mlx5e_rss_free(rss);
+ res->rss[rss_idx] = NULL;
+
+ return 0;
+}
+
+int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx)
+{
+ struct mlx5e_rss *rss;
+
+ if (rss_idx >= MLX5E_MAX_NUM_RSS)
+ return -EINVAL;
+
+ rss = res->rss[rss_idx];
+ if (!rss)
+ return -EINVAL;
+
+ return __mlx5e_rx_res_rss_destroy(res, rss_idx);
+}
+
+static void mlx5e_rx_res_rss_destroy_all(struct mlx5e_rx_res *res)
+{
+ int i;
+
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) {
+ struct mlx5e_rss *rss = res->rss[i];
+ int err;
+
+ if (!rss)
+ continue;
+
+ err = __mlx5e_rx_res_rss_destroy(res, i);
+ if (err) {
+ unsigned int refcount;
+
+ refcount = mlx5e_rss_refcnt_read(rss);
+ mlx5_core_warn(res->mdev,
+ "Failed to destroy RSS context %d, refcount = %u, err = %d\n",
+ i, refcount, err);
+ }
+ }
+}
+
+static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res)
+{
+ int i;
+
+ res->rss_active = true;
+
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) {
+ struct mlx5e_rss *rss = res->rss[i];
+
+ if (!rss)
+ continue;
+ mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch);
+ }
+}
+
+static void mlx5e_rx_res_rss_disable(struct mlx5e_rx_res *res)
+{
+ int i;
+
+ res->rss_active = false;
+
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) {
+ struct mlx5e_rss *rss = res->rss[i];
+
+ if (!rss)
+ continue;
+ mlx5e_rss_disable(rss);
+ }
+}
+
+/* Updates the indirection table SW shadow, does not update the HW resources yet */
+void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch)
+{
+ WARN_ON_ONCE(res->rss_active);
+ mlx5e_rss_set_indir_uniform(res->rss[0], nch);
+}
+
+int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ u32 *indir, u8 *key, u8 *hfunc)
+{
+ struct mlx5e_rss *rss;
+
+ if (rss_idx >= MLX5E_MAX_NUM_RSS)
+ return -EINVAL;
+
+ rss = res->rss[rss_idx];
+ if (!rss)
+ return -ENOENT;
+
+ return mlx5e_rss_get_rxfh(rss, indir, key, hfunc);
+}
+
+int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ const u32 *indir, const u8 *key, const u8 *hfunc)
+{
+ struct mlx5e_rss *rss;
+
+ if (rss_idx >= MLX5E_MAX_NUM_RSS)
+ return -EINVAL;
+
+ rss = res->rss[rss_idx];
+ if (!rss)
+ return -ENOENT;
+
+ return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, res->rss_rqns, res->rss_nch);
+}
+
+u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+ struct mlx5e_rss *rss = res->rss[0];
+
+ return mlx5e_rss_get_hash_fields(rss, tt);
+}
+
+int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields)
+{
+ struct mlx5e_rss *rss = res->rss[0];
+
+ return mlx5e_rss_set_hash_fields(rss, tt, rx_hash_fields);
+}
+
+int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res)
+{
+ int i, cnt;
+
+ cnt = 0;
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++)
+ if (res->rss[i])
+ cnt++;
+
+ return cnt;
+}
+
+int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss)
+{
+ int i;
+
+ if (!rss)
+ return -EINVAL;
+
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++)
+ if (rss == res->rss[i])
+ return i;
+
+ return -ENOENT;
+}
+
+struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx)
+{
+ if (rss_idx >= MLX5E_MAX_NUM_RSS)
+ return NULL;
+
+ return res->rss[rss_idx];
+}
+
+/* End of API rx_res_rss_* */
+
+struct mlx5e_rx_res *mlx5e_rx_res_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL);
+}
+
+static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
+ const struct mlx5e_lro_param *init_lro_param)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_tir_builder *builder;
+ int err = 0;
+ int ix;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ res->channels = kvcalloc(res->max_nch, sizeof(*res->channels), GFP_KERNEL);
+ if (!res->channels) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ err = mlx5e_rqt_init_direct(&res->channels[ix].direct_rqt,
+ res->mdev, false, res->drop_rqn);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to create a direct RQT: err = %d, ix = %u\n",
+ err, ix);
+ goto err_destroy_direct_rqts;
+ }
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ inner_ft_support);
+ mlx5e_tir_builder_build_lro(builder, init_lro_param);
+ mlx5e_tir_builder_build_direct(builder);
+
+ err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to create a direct TIR: err = %d, ix = %u\n",
+ err, ix);
+ goto err_destroy_direct_tirs;
+ }
+
+ mlx5e_tir_builder_clear(builder);
+ }
+
+ if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+ goto out;
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ err = mlx5e_rqt_init_direct(&res->channels[ix].xsk_rqt,
+ res->mdev, false, res->drop_rqn);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to create an XSK RQT: err = %d, ix = %u\n",
+ err, ix);
+ goto err_destroy_xsk_rqts;
+ }
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+ inner_ft_support);
+ mlx5e_tir_builder_build_lro(builder, init_lro_param);
+ mlx5e_tir_builder_build_direct(builder);
+
+ err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to create an XSK TIR: err = %d, ix = %u\n",
+ err, ix);
+ goto err_destroy_xsk_tirs;
+ }
+
+ mlx5e_tir_builder_clear(builder);
+ }
+
+ goto out;
+
+err_destroy_xsk_tirs:
+ while (--ix >= 0)
+ mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
+
+ ix = res->max_nch;
+err_destroy_xsk_rqts:
+ while (--ix >= 0)
+ mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
+
+ ix = res->max_nch;
+err_destroy_direct_tirs:
+ while (--ix >= 0)
+ mlx5e_tir_destroy(&res->channels[ix].direct_tir);
+
+ ix = res->max_nch;
+err_destroy_direct_rqts:
+ while (--ix >= 0)
+ mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
+
+ kvfree(res->channels);
+
+out:
+ mlx5e_tir_builder_free(builder);
+
+ return err;
+}
+
+static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_tir_builder *builder;
+ int err;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ err = mlx5e_rqt_init_direct(&res->ptp.rqt, res->mdev, false, res->drop_rqn);
+ if (err)
+ goto out;
+
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+ inner_ft_support);
+ mlx5e_tir_builder_build_direct(builder);
+
+ err = mlx5e_tir_init(&res->ptp.tir, builder, res->mdev, true);
+ if (err)
+ goto err_destroy_ptp_rqt;
+
+ goto out;
+
+err_destroy_ptp_rqt:
+ mlx5e_rqt_destroy(&res->ptp.rqt);
+
+out:
+ mlx5e_tir_builder_free(builder);
+ return err;
+}
+
+static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res)
+{
+ unsigned int ix;
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ mlx5e_tir_destroy(&res->channels[ix].direct_tir);
+ mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
+
+ if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+ continue;
+
+ mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
+ mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
+ }
+
+ kvfree(res->channels);
+}
+
+static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res)
+{
+ mlx5e_tir_destroy(&res->ptp.tir);
+ mlx5e_rqt_destroy(&res->ptp.rqt);
+}
+
+int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+ enum mlx5e_rx_res_features features, unsigned int max_nch,
+ u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
+ unsigned int init_nch)
+{
+ int err;
+
+ res->mdev = mdev;
+ res->features = features;
+ res->max_nch = max_nch;
+ res->drop_rqn = drop_rqn;
+
+ err = mlx5e_rx_res_rss_init_def(res, init_lro_param, init_nch);
+ if (err)
+ goto err_out;
+
+ err = mlx5e_rx_res_channels_init(res, init_lro_param);
+ if (err)
+ goto err_rss_destroy;
+
+ err = mlx5e_rx_res_ptp_init(res);
+ if (err)
+ goto err_channels_destroy;
+
+ return 0;
+
+err_channels_destroy:
+ mlx5e_rx_res_channels_destroy(res);
+err_rss_destroy:
+ __mlx5e_rx_res_rss_destroy(res, 0);
+err_out:
+ return err;
+}
+
+void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res)
+{
+ mlx5e_rx_res_ptp_destroy(res);
+ mlx5e_rx_res_channels_destroy(res);
+ mlx5e_rx_res_rss_destroy_all(res);
+}
+
+void mlx5e_rx_res_free(struct mlx5e_rx_res *res)
+{
+ kvfree(res);
+}
+
+u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+{
+ return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix)
+{
+ WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_XSK));
+
+ return mlx5e_tir_get_tirn(&res->channels[ix].xsk_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+ struct mlx5e_rss *rss = res->rss[0];
+
+ return mlx5e_rss_get_tirn(rss, tt, false);
+}
+
+u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+ struct mlx5e_rss *rss = res->rss[0];
+
+ return mlx5e_rss_get_tirn(rss, tt, true);
+}
+
+u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res)
+{
+ WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_PTP));
+ return mlx5e_tir_get_tirn(&res->ptp.tir);
+}
+
+u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+{
+ return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt);
+}
+
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs)
+{
+ unsigned int nch, ix;
+ int err;
+
+ nch = mlx5e_channels_get_num(chs);
+
+ for (ix = 0; ix < chs->num; ix++)
+ mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
+ res->rss_nch = chs->num;
+
+ mlx5e_rx_res_rss_enable(res);
+
+ for (ix = 0; ix < nch; ix++) {
+ u32 rqn;
+
+ mlx5e_channels_get_regular_rqn(chs, ix, &rqn);
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ rqn, ix, err);
+
+ if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+ continue;
+
+ if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
+ rqn = res->drop_rqn;
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+ rqn, ix, err);
+ }
+ for (ix = nch; ix < res->max_nch; ix++) {
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ res->drop_rqn, ix, err);
+
+ if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+ continue;
+
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+ res->drop_rqn, ix, err);
+ }
+
+ if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
+ u32 rqn;
+
+ if (mlx5e_channels_get_ptp_rqn(chs, &rqn))
+ rqn = res->drop_rqn;
+
+ err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (PTP): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+ rqn, err);
+ }
+}
+
+void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res)
+{
+ unsigned int ix;
+ int err;
+
+ mlx5e_rx_res_rss_disable(res);
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ res->drop_rqn, ix, err);
+
+ if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+ continue;
+
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+ res->drop_rqn, ix, err);
+ }
+
+ if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
+ err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (PTP): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+ res->drop_rqn, err);
+ }
+}
+
+int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+ unsigned int ix)
+{
+ u32 rqn;
+ int err;
+
+ if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
+ return -EINVAL;
+
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to XSK RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+ rqn, ix, err);
+ return err;
+}
+
+int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix)
+{
+ int err;
+
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+ res->drop_rqn, ix, err);
+ return err;
+}
+
+int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param)
+{
+ struct mlx5e_tir_builder *builder;
+ int err, final_err;
+ unsigned int ix;
+
+ builder = mlx5e_tir_builder_alloc(true);
+ if (!builder)
+ return -ENOMEM;
+
+ mlx5e_tir_builder_build_lro(builder, lro_param);
+
+ final_err = 0;
+
+ for (ix = 0; ix < MLX5E_MAX_NUM_RSS; ix++) {
+ struct mlx5e_rss *rss = res->rss[ix];
+
+ if (!rss)
+ continue;
+
+ err = mlx5e_rss_lro_set_param(rss, lro_param);
+ if (err)
+ final_err = final_err ? : err;
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to update LRO state of direct TIR %#x for channel %u: err = %d\n",
+ mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err);
+ if (!final_err)
+ final_err = err;
+ }
+ }
+
+ mlx5e_tir_builder_free(builder);
+ return final_err;
+}
+
+struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res)
+{
+ return mlx5e_rss_get_hash(res->rss[0]);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
new file mode 100644
index 000000000000..4a15942d79f7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_RX_RES_H__
+#define __MLX5_EN_RX_RES_H__
+
+#include <linux/kernel.h>
+#include "rqt.h"
+#include "tir.h"
+#include "fs.h"
+#include "rss.h"
+
+struct mlx5e_rx_res;
+
+struct mlx5e_channels;
+struct mlx5e_rss_params_hash;
+
+enum mlx5e_rx_res_features {
+ MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0),
+ MLX5E_RX_RES_FEATURE_XSK = BIT(1),
+ MLX5E_RX_RES_FEATURE_PTP = BIT(2),
+};
+
+/* Setup */
+struct mlx5e_rx_res *mlx5e_rx_res_alloc(void);
+int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+ enum mlx5e_rx_res_features features, unsigned int max_nch,
+ u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
+ unsigned int init_nch);
+void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res);
+void mlx5e_rx_res_free(struct mlx5e_rx_res *res);
+
+/* TIRN getters for flow steering */
+u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix);
+u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix);
+u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
+
+/* RQTN getters for modules that create their own TIRs */
+u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix);
+
+/* Activate/deactivate API */
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs);
+void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res);
+int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+ unsigned int ix);
+int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix);
+
+/* Configuration API */
+void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch);
+int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ u32 *indir, u8 *key, u8 *hfunc);
+int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ const u32 *indir, const u8 *key, const u8 *hfunc);
+
+u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields);
+int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param);
+
+int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch);
+int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx);
+int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res);
+int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss);
+struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx);
+
+/* Workaround for hairpin */
+struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res);
+
+#endif /* __MLX5_EN_RX_RES_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
new file mode 100644
index 000000000000..a3e43e898a56
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "en_tc.h"
+#include "post_act.h"
+#include "mlx5_core.h"
+
+struct mlx5e_post_act {
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_fs_chains *chains;
+ struct mlx5_flow_table *ft;
+ struct mlx5e_priv *priv;
+ struct xarray ids;
+};
+
+struct mlx5e_post_act_handle {
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_handle *rule;
+ u32 id;
+};
+
+#define MLX5_POST_ACTION_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen)
+#define MLX5_POST_ACTION_MAX GENMASK(MLX5_POST_ACTION_BITS - 1, 0)
+#define MLX5_POST_ACTION_MASK MLX5_POST_ACTION_MAX
+
+struct mlx5e_post_act *
+mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5e_post_act *post_act;
+ int err;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ignore_flow_level)) {
+ mlx5_core_warn(priv->mdev, "firmware level support is missing\n");
+ err = -EOPNOTSUPP;
+ goto err_check;
+ } else if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
+ mlx5_core_warn(priv->mdev, "firmware level support is missing\n");
+ err = -EOPNOTSUPP;
+ goto err_check;
+ }
+
+ post_act = kzalloc(sizeof(*post_act), GFP_KERNEL);
+ if (!post_act) {
+ err = -ENOMEM;
+ goto err_check;
+ }
+ post_act->ft = mlx5_chains_create_global_table(chains);
+ if (IS_ERR(post_act->ft)) {
+ err = PTR_ERR(post_act->ft);
+ mlx5_core_warn(priv->mdev, "failed to create post action table, err: %d\n", err);
+ goto err_ft;
+ }
+ post_act->chains = chains;
+ post_act->ns_type = ns_type;
+ post_act->priv = priv;
+ xa_init_flags(&post_act->ids, XA_FLAGS_ALLOC1);
+ return post_act;
+
+err_ft:
+ kfree(post_act);
+err_check:
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act)
+{
+ if (IS_ERR_OR_NULL(post_act))
+ return;
+
+ xa_destroy(&post_act->ids);
+ mlx5_chains_destroy_global_table(post_act->chains, post_act->ft);
+ kfree(post_act);
+}
+
+struct mlx5e_post_act_handle *
+mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr)
+{
+ u32 attr_sz = ns_to_attr_sz(post_act->ns_type);
+ struct mlx5e_post_act_handle *handle = NULL;
+ struct mlx5_flow_attr *post_attr = NULL;
+ struct mlx5_flow_spec *spec = NULL;
+ int err;
+
+ handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ post_attr = mlx5_alloc_flow_attr(post_act->ns_type);
+ if (!handle || !spec || !post_attr) {
+ kfree(post_attr);
+ kvfree(spec);
+ kfree(handle);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ memcpy(post_attr, attr, attr_sz);
+ post_attr->chain = 0;
+ post_attr->prio = 0;
+ post_attr->ft = post_act->ft;
+ post_attr->inner_match_level = MLX5_MATCH_NONE;
+ post_attr->outer_match_level = MLX5_MATCH_NONE;
+ post_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
+
+ handle->ns_type = post_act->ns_type;
+ /* Splits were handled before post action */
+ if (handle->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ post_attr->esw_attr->split_count = 0;
+
+ err = xa_alloc(&post_act->ids, &handle->id, post_attr,
+ XA_LIMIT(1, MLX5_POST_ACTION_MAX), GFP_KERNEL);
+ if (err)
+ goto err_xarray;
+
+ /* Post action rule matches on fte_id and executes original rule's
+ * tc rule action
+ */
+ mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG,
+ handle->id, MLX5_POST_ACTION_MASK);
+
+ handle->rule = mlx5_tc_rule_insert(post_act->priv, spec, post_attr);
+ if (IS_ERR(handle->rule)) {
+ err = PTR_ERR(handle->rule);
+ netdev_warn(post_act->priv->netdev, "Failed to add post action rule");
+ goto err_rule;
+ }
+ handle->attr = post_attr;
+
+ kvfree(spec);
+ return handle;
+
+err_rule:
+ xa_erase(&post_act->ids, handle->id);
+err_xarray:
+ kfree(post_attr);
+ kvfree(spec);
+ kfree(handle);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle)
+{
+ mlx5_tc_rule_delete(post_act->priv, handle->rule, handle->attr);
+ xa_erase(&post_act->ids, handle->id);
+ kfree(handle->attr);
+ kfree(handle);
+}
+
+struct mlx5_flow_table *
+mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act)
+{
+ return post_act->ft;
+}
+
+/* Allocate a header modify action to write the post action handle fte id to a register. */
+int
+mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev,
+ struct mlx5e_post_act_handle *handle,
+ struct mlx5e_tc_mod_hdr_acts *acts)
+{
+ return mlx5e_tc_match_to_reg_set(dev, acts, handle->ns_type, FTEID_TO_REG, handle->id);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
new file mode 100644
index 000000000000..b530ec1981a5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_POST_ACTION_H__
+#define __MLX5_POST_ACTION_H__
+
+#include "en.h"
+#include "lib/fs_chains.h"
+
+struct mlx5_flow_attr;
+struct mlx5e_priv;
+struct mlx5e_tc_mod_hdr_acts;
+
+struct mlx5e_post_act *
+mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ enum mlx5_flow_namespace_type ns_type);
+
+void
+mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act);
+
+struct mlx5e_post_act_handle *
+mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr);
+
+void
+mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle);
+
+struct mlx5_flow_table *
+mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act);
+
+int
+mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev,
+ struct mlx5e_post_act_handle *handle,
+ struct mlx5e_tc_mod_hdr_acts *acts);
+
+#endif /* __MLX5_POST_ACTION_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
index 794012c5c476..6552ecee3f9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
@@ -4,7 +4,8 @@
#include <linux/skbuff.h>
#include <net/psample.h>
#include "en/mapping.h"
-#include "esw/sample.h"
+#include "en/tc/post_act.h"
+#include "sample.h"
#include "eswitch.h"
#include "en_tc.h"
#include "fs_core.h"
@@ -17,17 +18,18 @@ static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = {
.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP,
};
-struct mlx5_esw_psample {
- struct mlx5e_priv *priv;
+struct mlx5e_tc_psample {
+ struct mlx5_eswitch *esw;
struct mlx5_flow_table *termtbl;
struct mlx5_flow_handle *termtbl_rule;
DECLARE_HASHTABLE(hashtbl, 8);
struct mutex ht_lock; /* protect hashtbl */
DECLARE_HASHTABLE(restore_hashtbl, 8);
struct mutex restore_lock; /* protect restore_hashtbl */
+ struct mlx5e_post_act *post_act;
};
-struct mlx5_sampler {
+struct mlx5e_sampler {
struct hlist_node hlist;
u32 sampler_id;
u32 sample_ratio;
@@ -36,29 +38,32 @@ struct mlx5_sampler {
int count;
};
-struct mlx5_sample_flow {
- struct mlx5_sampler *sampler;
- struct mlx5_sample_restore *restore;
+struct mlx5e_sample_flow {
+ struct mlx5e_sampler *sampler;
+ struct mlx5e_sample_restore *restore;
struct mlx5_flow_attr *pre_attr;
struct mlx5_flow_handle *pre_rule;
- struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *post_attr;
+ struct mlx5_flow_handle *post_rule;
+ struct mlx5e_post_act_handle *post_act_handle;
};
-struct mlx5_sample_restore {
+struct mlx5e_sample_restore {
struct hlist_node hlist;
struct mlx5_modify_hdr *modify_hdr;
struct mlx5_flow_handle *rule;
+ struct mlx5e_post_act_handle *post_act_handle;
u32 obj_id;
int count;
};
static int
-sampler_termtbl_create(struct mlx5_esw_psample *esw_psample)
+sampler_termtbl_create(struct mlx5e_tc_psample *tc_psample)
{
- struct mlx5_core_dev *dev = esw_psample->priv->mdev;
- struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_eswitch *esw = tc_psample->esw;
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_destination dest = {};
+ struct mlx5_core_dev *dev = esw->dev;
struct mlx5_flow_namespace *root_ns;
struct mlx5_flow_act act = {};
int err;
@@ -79,20 +84,20 @@ sampler_termtbl_create(struct mlx5_esw_psample *esw_psample)
ft_attr.prio = FDB_SLOW_PATH;
ft_attr.max_fte = 1;
ft_attr.level = 1;
- esw_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
- if (IS_ERR(esw_psample->termtbl)) {
- err = PTR_ERR(esw_psample->termtbl);
+ tc_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(tc_psample->termtbl)) {
+ err = PTR_ERR(tc_psample->termtbl);
mlx5_core_warn(dev, "failed to create termtbl, err: %d\n", err);
return err;
}
act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
dest.vport.num = esw->manager_vport;
- esw_psample->termtbl_rule = mlx5_add_flow_rules(esw_psample->termtbl, NULL, &act, &dest, 1);
- if (IS_ERR(esw_psample->termtbl_rule)) {
- err = PTR_ERR(esw_psample->termtbl_rule);
+ tc_psample->termtbl_rule = mlx5_add_flow_rules(tc_psample->termtbl, NULL, &act, &dest, 1);
+ if (IS_ERR(tc_psample->termtbl_rule)) {
+ err = PTR_ERR(tc_psample->termtbl_rule);
mlx5_core_warn(dev, "failed to create termtbl rule, err: %d\n", err);
- mlx5_destroy_flow_table(esw_psample->termtbl);
+ mlx5_destroy_flow_table(tc_psample->termtbl);
return err;
}
@@ -100,14 +105,14 @@ sampler_termtbl_create(struct mlx5_esw_psample *esw_psample)
}
static void
-sampler_termtbl_destroy(struct mlx5_esw_psample *esw_psample)
+sampler_termtbl_destroy(struct mlx5e_tc_psample *tc_psample)
{
- mlx5_del_flow_rules(esw_psample->termtbl_rule);
- mlx5_destroy_flow_table(esw_psample->termtbl);
+ mlx5_del_flow_rules(tc_psample->termtbl_rule);
+ mlx5_destroy_flow_table(tc_psample->termtbl);
}
static int
-sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5_sampler *sampler)
+sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5e_sampler *sampler)
{
u32 in[MLX5_ST_SZ_DW(create_sampler_obj_in)] = {};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
@@ -163,16 +168,16 @@ sampler_cmp(u32 sample_ratio1, u32 default_table_id1, u32 sample_ratio2, u32 def
return sample_ratio1 != sample_ratio2 || default_table_id1 != default_table_id2;
}
-static struct mlx5_sampler *
-sampler_get(struct mlx5_esw_psample *esw_psample, u32 sample_ratio, u32 default_table_id)
+static struct mlx5e_sampler *
+sampler_get(struct mlx5e_tc_psample *tc_psample, u32 sample_ratio, u32 default_table_id)
{
- struct mlx5_sampler *sampler;
+ struct mlx5e_sampler *sampler;
u32 hash_key;
int err;
- mutex_lock(&esw_psample->ht_lock);
+ mutex_lock(&tc_psample->ht_lock);
hash_key = sampler_hash(sample_ratio, default_table_id);
- hash_for_each_possible(esw_psample->hashtbl, sampler, hlist, hash_key)
+ hash_for_each_possible(tc_psample->hashtbl, sampler, hlist, hash_key)
if (!sampler_cmp(sampler->sample_ratio, sampler->default_table_id,
sample_ratio, default_table_id))
goto add_ref;
@@ -183,42 +188,49 @@ sampler_get(struct mlx5_esw_psample *esw_psample, u32 sample_ratio, u32 default_
goto err_alloc;
}
- sampler->sample_table_id = esw_psample->termtbl->id;
+ sampler->sample_table_id = tc_psample->termtbl->id;
sampler->default_table_id = default_table_id;
sampler->sample_ratio = sample_ratio;
- err = sampler_obj_create(esw_psample->priv->mdev, sampler);
+ err = sampler_obj_create(tc_psample->esw->dev, sampler);
if (err)
goto err_create;
- hash_add(esw_psample->hashtbl, &sampler->hlist, hash_key);
+ hash_add(tc_psample->hashtbl, &sampler->hlist, hash_key);
add_ref:
sampler->count++;
- mutex_unlock(&esw_psample->ht_lock);
+ mutex_unlock(&tc_psample->ht_lock);
return sampler;
err_create:
kfree(sampler);
err_alloc:
- mutex_unlock(&esw_psample->ht_lock);
+ mutex_unlock(&tc_psample->ht_lock);
return ERR_PTR(err);
}
static void
-sampler_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sampler *sampler)
+sampler_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sampler *sampler)
{
- mutex_lock(&esw_psample->ht_lock);
+ mutex_lock(&tc_psample->ht_lock);
if (--sampler->count == 0) {
hash_del(&sampler->hlist);
- sampler_obj_destroy(esw_psample->priv->mdev, sampler->sampler_id);
+ sampler_obj_destroy(tc_psample->esw->dev, sampler->sampler_id);
kfree(sampler);
}
- mutex_unlock(&esw_psample->ht_lock);
+ mutex_unlock(&tc_psample->ht_lock);
}
+/* obj_id is used to restore the sample parameters.
+ * Set fte_id in original flow table, then match it in the default table.
+ * Only set it for NICs can preserve reg_c or decap action. For other cases,
+ * use the same match in the default table.
+ * Use one header rewrite for both obj_id and fte_id.
+ */
static struct mlx5_modify_hdr *
-sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id)
+sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id,
+ struct mlx5e_post_act_handle *handle)
{
struct mlx5e_tc_mod_hdr_acts mod_acts = {};
struct mlx5_modify_hdr *modify_hdr;
@@ -229,6 +241,12 @@ sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id)
if (err)
goto err_set_regc0;
+ if (handle) {
+ err = mlx5e_tc_post_act_set_handle(mdev, handle, &mod_acts);
+ if (err)
+ goto err_post_act;
+ }
+
modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB,
mod_acts.num_actions,
mod_acts.actions);
@@ -241,23 +259,40 @@ sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id)
return modify_hdr;
err_modify_hdr:
+err_post_act:
dealloc_mod_hdr_actions(&mod_acts);
err_set_regc0:
return ERR_PTR(err);
}
-static struct mlx5_sample_restore *
-sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id)
+static u32
+restore_hash(u32 obj_id, struct mlx5e_post_act_handle *post_act_handle)
{
- struct mlx5_core_dev *mdev = esw_psample->priv->mdev;
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
- struct mlx5_sample_restore *restore;
+ return jhash_2words(obj_id, hash32_ptr(post_act_handle), 0);
+}
+
+static bool
+restore_equal(struct mlx5e_sample_restore *restore, u32 obj_id,
+ struct mlx5e_post_act_handle *post_act_handle)
+{
+ return restore->obj_id == obj_id && restore->post_act_handle == post_act_handle;
+}
+
+static struct mlx5e_sample_restore *
+sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id,
+ struct mlx5e_post_act_handle *post_act_handle)
+{
+ struct mlx5_eswitch *esw = tc_psample->esw;
+ struct mlx5_core_dev *mdev = esw->dev;
+ struct mlx5e_sample_restore *restore;
struct mlx5_modify_hdr *modify_hdr;
+ u32 hash_key;
int err;
- mutex_lock(&esw_psample->restore_lock);
- hash_for_each_possible(esw_psample->restore_hashtbl, restore, hlist, obj_id)
- if (restore->obj_id == obj_id)
+ mutex_lock(&tc_psample->restore_lock);
+ hash_key = restore_hash(obj_id, post_act_handle);
+ hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, hash_key)
+ if (restore_equal(restore, obj_id, post_act_handle))
goto add_ref;
restore = kzalloc(sizeof(*restore), GFP_KERNEL);
@@ -266,8 +301,9 @@ sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id)
goto err_alloc;
}
restore->obj_id = obj_id;
+ restore->post_act_handle = post_act_handle;
- modify_hdr = sample_metadata_rule_get(mdev, obj_id);
+ modify_hdr = sample_modify_hdr_get(mdev, obj_id, post_act_handle);
if (IS_ERR(modify_hdr)) {
err = PTR_ERR(modify_hdr);
goto err_modify_hdr;
@@ -280,10 +316,10 @@ sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id)
goto err_restore;
}
- hash_add(esw_psample->restore_hashtbl, &restore->hlist, obj_id);
+ hash_add(tc_psample->restore_hashtbl, &restore->hlist, hash_key);
add_ref:
restore->count++;
- mutex_unlock(&esw_psample->restore_lock);
+ mutex_unlock(&tc_psample->restore_lock);
return restore;
err_restore:
@@ -291,26 +327,26 @@ err_restore:
err_modify_hdr:
kfree(restore);
err_alloc:
- mutex_unlock(&esw_psample->restore_lock);
+ mutex_unlock(&tc_psample->restore_lock);
return ERR_PTR(err);
}
static void
-sample_restore_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sample_restore *restore)
+sample_restore_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sample_restore *restore)
{
- mutex_lock(&esw_psample->restore_lock);
+ mutex_lock(&tc_psample->restore_lock);
if (--restore->count == 0)
hash_del(&restore->hlist);
- mutex_unlock(&esw_psample->restore_lock);
+ mutex_unlock(&tc_psample->restore_lock);
if (!restore->count) {
mlx5_del_flow_rules(restore->rule);
- mlx5_modify_header_dealloc(esw_psample->priv->mdev, restore->modify_hdr);
+ mlx5_modify_header_dealloc(tc_psample->esw->dev, restore->modify_hdr);
kfree(restore);
}
}
-void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj)
+void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj)
{
u32 trunc_size = mapped_obj->sample.trunc_size;
struct psample_group psample_group = {};
@@ -325,6 +361,87 @@ void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj
psample_sample_packet(&psample_group, skb, mapped_obj->sample.rate, &md);
}
+static int
+add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
+ struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr,
+ u32 *default_tbl_id)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ u32 attr_sz = ns_to_attr_sz(MLX5_FLOW_NAMESPACE_FDB);
+ struct mlx5_vport_tbl_attr per_vport_tbl_attr;
+ struct mlx5_flow_table *default_tbl;
+ struct mlx5_flow_attr *post_attr;
+ int err;
+
+ /* Allocate default table per vport, chain and prio. Otherwise, there is
+ * only one default table for the same sampler object. Rules with different
+ * prio and chain may overlap. For CT sample action, per vport default
+ * table is needed to resotre the metadata.
+ */
+ per_vport_tbl_attr.chain = attr->chain;
+ per_vport_tbl_attr.prio = attr->prio;
+ per_vport_tbl_attr.vport = esw_attr->in_rep->vport;
+ per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+ default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr);
+ if (IS_ERR(default_tbl)) {
+ err = PTR_ERR(default_tbl);
+ goto err_default_tbl;
+ }
+ *default_tbl_id = default_tbl->id;
+
+ post_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+ if (!post_attr) {
+ err = -ENOMEM;
+ goto err_attr;
+ }
+ sample_flow->post_attr = post_attr;
+ memcpy(post_attr, attr, attr_sz);
+ /* Perform the original matches on the default table.
+ * Offload all actions except the sample action.
+ */
+ post_attr->chain = 0;
+ post_attr->prio = 0;
+ post_attr->ft = default_tbl;
+ post_attr->flags = MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
+
+ /* When offloading sample and encap action, if there is no valid
+ * neigh data struct, a slow path rule is offloaded first. Source
+ * port metadata match is set at that time. A per vport table is
+ * already allocated. No need to match it again. So clear the source
+ * port metadata match.
+ */
+ mlx5_eswitch_clear_rule_source_port(esw, spec);
+ sample_flow->post_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, post_attr);
+ if (IS_ERR(sample_flow->post_rule)) {
+ err = PTR_ERR(sample_flow->post_rule);
+ goto err_rule;
+ }
+ return 0;
+
+err_rule:
+ kfree(post_attr);
+err_attr:
+ mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr);
+err_default_tbl:
+ return err;
+}
+
+static void
+del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_vport_tbl_attr tbl_attr;
+
+ mlx5_eswitch_del_offloaded_rule(esw, sample_flow->post_rule, sample_flow->post_attr);
+ kfree(sample_flow->post_attr);
+ tbl_attr.chain = attr->chain;
+ tbl_attr.prio = attr->prio;
+ tbl_attr.vport = esw_attr->in_rep->vport;
+ tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+ mlx5_esw_vporttbl_put(esw, &tbl_attr);
+}
+
/* For the following typical flow table:
*
* +-------------------------------+
@@ -342,8 +459,9 @@ void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj
* +---------------------+
* + original match +
* +---------------------+
- * |
- * v
+ * | set fte_id (if reg_c preserve cap)
+ * | do decap (if required)
+ * v
* +------------------------------------------------+
* + Flow Sampler Object +
* +------------------------------------------------+
@@ -353,80 +471,82 @@ void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj
* +------------------------------------------------+
* | |
* v v
- * +-----------------------------+ +----------------------------------------+
- * + sample table + + default table per <vport, chain, prio> +
- * +-----------------------------+ +----------------------------------------+
- * + forward to management vport + + original match +
- * +-----------------------------+ +----------------------------------------+
- * + other actions +
- * +----------------------------------------+
+ * +-----------------------------+ +-------------------+
+ * + sample table + + default table +
+ * +-----------------------------+ +-------------------+
+ * + forward to management vport + |
+ * +-----------------------------+ |
+ * +-------+------+
+ * | |reg_c preserve cap
+ * | |or decap action
+ * v v
+ * +-----------------+ +-------------+
+ * + per vport table + + post action +
+ * +-----------------+ +-------------+
+ * + original match +
+ * +-----------------+
+ * + other actions +
+ * +-----------------+
*/
struct mlx5_flow_handle *
-mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample,
+mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr)
+ struct mlx5_flow_attr *attr,
+ u32 tunnel_id)
{
+ struct mlx5e_post_act_handle *post_act_handle = NULL;
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
- struct mlx5_vport_tbl_attr per_vport_tbl_attr;
struct mlx5_esw_flow_attr *pre_esw_attr;
struct mlx5_mapped_obj restore_obj = {};
- struct mlx5_sample_flow *sample_flow;
- struct mlx5_sample_attr *sample_attr;
- struct mlx5_flow_table *default_tbl;
+ struct mlx5e_sample_flow *sample_flow;
+ struct mlx5e_sample_attr *sample_attr;
struct mlx5_flow_attr *pre_attr;
struct mlx5_eswitch *esw;
+ u32 default_tbl_id;
u32 obj_id;
int err;
- if (IS_ERR_OR_NULL(esw_psample))
+ if (IS_ERR_OR_NULL(tc_psample))
return ERR_PTR(-EOPNOTSUPP);
/* If slow path flag is set, eg. when the neigh is invalid for encap,
* don't offload sample action.
*/
- esw = esw_psample->priv->mdev->priv.eswitch;
+ esw = tc_psample->esw;
if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL);
if (!sample_flow)
return ERR_PTR(-ENOMEM);
- esw_attr->sample->sample_flow = sample_flow;
+ sample_attr = attr->sample_attr;
+ sample_attr->sample_flow = sample_flow;
- /* Allocate default table per vport, chain and prio. Otherwise, there is
- * only one default table for the same sampler object. Rules with different
- * prio and chain may overlap. For CT sample action, per vport default
- * table is needed to resotre the metadata.
+ /* For NICs with reg_c_preserve support or decap action, use
+ * post action instead of the per vport, chain and prio table.
+ * Only match the fte id instead of the same match in the
+ * original flow table.
*/
- per_vport_tbl_attr.chain = attr->chain;
- per_vport_tbl_attr.prio = attr->prio;
- per_vport_tbl_attr.vport = esw_attr->in_rep->vport;
- per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
- default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr);
- if (IS_ERR(default_tbl)) {
- err = PTR_ERR(default_tbl);
- goto err_default_tbl;
- }
-
- /* Perform the original matches on the default table.
- * Offload all actions except the sample action.
- */
- esw_attr->sample->sample_default_tbl = default_tbl;
- /* When offloading sample and encap action, if there is no valid
- * neigh data struct, a slow path rule is offloaded first. Source
- * port metadata match is set at that time. A per vport table is
- * already allocated. No need to match it again. So clear the source
- * port metadata match.
- */
- mlx5_eswitch_clear_rule_source_port(esw, spec);
- sample_flow->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
- if (IS_ERR(sample_flow->rule)) {
- err = PTR_ERR(sample_flow->rule);
- goto err_offload_rule;
+ if (MLX5_CAP_GEN(esw->dev, reg_c_preserve) ||
+ attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
+ struct mlx5_flow_table *ft;
+
+ ft = mlx5e_tc_post_act_get_ft(tc_psample->post_act);
+ default_tbl_id = ft->id;
+ post_act_handle = mlx5e_tc_post_act_add(tc_psample->post_act, attr);
+ if (IS_ERR(post_act_handle)) {
+ err = PTR_ERR(post_act_handle);
+ goto err_post_act;
+ }
+ sample_flow->post_act_handle = post_act_handle;
+ } else {
+ err = add_post_rule(esw, sample_flow, spec, attr, &default_tbl_id);
+ if (err)
+ goto err_post_rule;
}
/* Create sampler object. */
- sample_flow->sampler = sampler_get(esw_psample, esw_attr->sample->rate, default_tbl->id);
+ sample_flow->sampler = sampler_get(tc_psample, sample_attr->rate, default_tbl_id);
if (IS_ERR(sample_flow->sampler)) {
err = PTR_ERR(sample_flow->sampler);
goto err_sampler;
@@ -434,16 +554,17 @@ mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample,
/* Create an id mapping reg_c0 value to sample object. */
restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE;
- restore_obj.sample.group_id = esw_attr->sample->group_num;
- restore_obj.sample.rate = esw_attr->sample->rate;
- restore_obj.sample.trunc_size = esw_attr->sample->trunc_size;
+ restore_obj.sample.group_id = sample_attr->group_num;
+ restore_obj.sample.rate = sample_attr->rate;
+ restore_obj.sample.trunc_size = sample_attr->trunc_size;
+ restore_obj.sample.tunnel_id = tunnel_id;
err = mapping_add(esw->offloads.reg_c0_obj_pool, &restore_obj, &obj_id);
if (err)
goto err_obj_id;
- esw_attr->sample->restore_obj_id = obj_id;
+ sample_attr->restore_obj_id = obj_id;
/* Create sample restore context. */
- sample_flow->restore = sample_restore_get(esw_psample, obj_id);
+ sample_flow->restore = sample_restore_get(tc_psample, obj_id, post_act_handle);
if (IS_ERR(sample_flow->restore)) {
err = PTR_ERR(sample_flow->restore);
goto err_sample_restore;
@@ -455,21 +576,23 @@ mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample,
pre_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
if (!pre_attr) {
err = -ENOMEM;
- goto err_alloc_flow_attr;
- }
- sample_attr = kzalloc(sizeof(*sample_attr), GFP_KERNEL);
- if (!sample_attr) {
- err = -ENOMEM;
- goto err_alloc_sample_attr;
+ goto err_alloc_pre_flow_attr;
}
- pre_esw_attr = pre_attr->esw_attr;
pre_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ /* For decap action, do decap in the original flow table instead of the
+ * default flow table.
+ */
+ if (tunnel_id)
+ pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
pre_attr->modify_hdr = sample_flow->restore->modify_hdr;
pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE;
+ pre_attr->inner_match_level = attr->inner_match_level;
+ pre_attr->outer_match_level = attr->outer_match_level;
pre_attr->chain = attr->chain;
pre_attr->prio = attr->prio;
- pre_esw_attr->sample = sample_attr;
- pre_esw_attr->sample->sampler_id = sample_flow->sampler->sampler_id;
+ pre_attr->sample_attr = attr->sample_attr;
+ sample_attr->sampler_id = sample_flow->sampler->sampler_id;
+ pre_esw_attr = pre_attr->esw_attr;
pre_esw_attr->in_mdev = esw_attr->in_mdev;
pre_esw_attr->in_rep = esw_attr->in_rep;
sample_flow->pre_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, pre_attr);
@@ -479,107 +602,113 @@ mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample,
}
sample_flow->pre_attr = pre_attr;
- return sample_flow->rule;
+ return sample_flow->post_rule;
err_pre_offload_rule:
- kfree(sample_attr);
-err_alloc_sample_attr:
kfree(pre_attr);
-err_alloc_flow_attr:
- sample_restore_put(esw_psample, sample_flow->restore);
+err_alloc_pre_flow_attr:
+ sample_restore_put(tc_psample, sample_flow->restore);
err_sample_restore:
mapping_remove(esw->offloads.reg_c0_obj_pool, obj_id);
err_obj_id:
- sampler_put(esw_psample, sample_flow->sampler);
+ sampler_put(tc_psample, sample_flow->sampler);
err_sampler:
- /* For sample offload, rule is added in default_tbl. No need to call
- * mlx5_esw_chains_put_table()
- */
- attr->prio = 0;
- attr->chain = 0;
- mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr);
-err_offload_rule:
- mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr);
-err_default_tbl:
+ if (!post_act_handle)
+ del_post_rule(esw, sample_flow, attr);
+err_post_rule:
+ if (post_act_handle)
+ mlx5e_tc_post_act_del(tc_psample->post_act, post_act_handle);
+err_post_act:
+ kfree(sample_flow);
return ERR_PTR(err);
}
void
-mlx5_esw_sample_unoffload(struct mlx5_esw_psample *esw_psample,
+mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample,
struct mlx5_flow_handle *rule,
struct mlx5_flow_attr *attr)
{
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
- struct mlx5_sample_flow *sample_flow;
+ struct mlx5e_sample_flow *sample_flow;
struct mlx5_vport_tbl_attr tbl_attr;
- struct mlx5_flow_attr *pre_attr;
struct mlx5_eswitch *esw;
- if (IS_ERR_OR_NULL(esw_psample))
+ if (IS_ERR_OR_NULL(tc_psample))
return;
/* If slow path flag is set, sample action is not offloaded.
* No need to delete sample rule.
*/
- esw = esw_psample->priv->mdev->priv.eswitch;
+ esw = tc_psample->esw;
if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) {
mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
return;
}
- sample_flow = esw_attr->sample->sample_flow;
- pre_attr = sample_flow->pre_attr;
- memset(pre_attr, 0, sizeof(*pre_attr));
- esw = esw_psample->priv->mdev->priv.eswitch;
- mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, pre_attr);
- mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr);
-
- sample_restore_put(esw_psample, sample_flow->restore);
- mapping_remove(esw->offloads.reg_c0_obj_pool, esw_attr->sample->restore_obj_id);
- sampler_put(esw_psample, sample_flow->sampler);
- tbl_attr.chain = attr->chain;
- tbl_attr.prio = attr->prio;
- tbl_attr.vport = esw_attr->in_rep->vport;
- tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
- mlx5_esw_vporttbl_put(esw, &tbl_attr);
+ /* The following delete order can't be changed, otherwise,
+ * will hit fw syndromes.
+ */
+ sample_flow = attr->sample_attr->sample_flow;
+ mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr);
+ if (!sample_flow->post_act_handle)
+ mlx5_eswitch_del_offloaded_rule(esw, sample_flow->post_rule,
+ sample_flow->post_attr);
+
+ sample_restore_put(tc_psample, sample_flow->restore);
+ mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr->restore_obj_id);
+ sampler_put(tc_psample, sample_flow->sampler);
+ if (sample_flow->post_act_handle) {
+ mlx5e_tc_post_act_del(tc_psample->post_act, sample_flow->post_act_handle);
+ } else {
+ tbl_attr.chain = attr->chain;
+ tbl_attr.prio = attr->prio;
+ tbl_attr.vport = esw_attr->in_rep->vport;
+ tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+ mlx5_esw_vporttbl_put(esw, &tbl_attr);
+ kfree(sample_flow->post_attr);
+ }
- kfree(pre_attr->esw_attr->sample);
- kfree(pre_attr);
+ kfree(sample_flow->pre_attr);
kfree(sample_flow);
}
-struct mlx5_esw_psample *
-mlx5_esw_sample_init(struct mlx5e_priv *priv)
+struct mlx5e_tc_psample *
+mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act)
{
- struct mlx5_esw_psample *esw_psample;
+ struct mlx5e_tc_psample *tc_psample;
int err;
- esw_psample = kzalloc(sizeof(*esw_psample), GFP_KERNEL);
- if (!esw_psample)
+ tc_psample = kzalloc(sizeof(*tc_psample), GFP_KERNEL);
+ if (!tc_psample)
return ERR_PTR(-ENOMEM);
- esw_psample->priv = priv;
- err = sampler_termtbl_create(esw_psample);
+ if (IS_ERR_OR_NULL(post_act)) {
+ err = PTR_ERR(post_act);
+ goto err_post_act;
+ }
+ tc_psample->post_act = post_act;
+ tc_psample->esw = esw;
+ err = sampler_termtbl_create(tc_psample);
if (err)
- goto err_termtbl;
+ goto err_post_act;
- mutex_init(&esw_psample->ht_lock);
- mutex_init(&esw_psample->restore_lock);
+ mutex_init(&tc_psample->ht_lock);
+ mutex_init(&tc_psample->restore_lock);
- return esw_psample;
+ return tc_psample;
-err_termtbl:
- kfree(esw_psample);
+err_post_act:
+ kfree(tc_psample);
return ERR_PTR(err);
}
void
-mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample)
+mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample)
{
- if (IS_ERR_OR_NULL(esw_psample))
+ if (IS_ERR_OR_NULL(tc_psample))
return;
- mutex_destroy(&esw_psample->restore_lock);
- mutex_destroy(&esw_psample->ht_lock);
- sampler_termtbl_destroy(esw_psample);
- kfree(esw_psample);
+ mutex_destroy(&tc_psample->restore_lock);
+ mutex_destroy(&tc_psample->ht_lock);
+ sampler_termtbl_destroy(tc_psample);
+ kfree(tc_psample);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
new file mode 100644
index 000000000000..db0146df9b30
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_SAMPLE_H__
+#define __MLX5_EN_TC_SAMPLE_H__
+
+#include "eswitch.h"
+
+struct mlx5_flow_attr;
+struct mlx5e_tc_psample;
+struct mlx5e_post_act;
+
+struct mlx5e_sample_attr {
+ u32 group_num;
+ u32 rate;
+ u32 trunc_size;
+ u32 restore_obj_id;
+ u32 sampler_id;
+ struct mlx5e_sample_flow *sample_flow;
+};
+
+void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj);
+
+struct mlx5_flow_handle *
+mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ u32 tunnel_id);
+
+void
+mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5e_tc_psample *
+mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act);
+
+void
+mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample);
+
+#endif /* __MLX5_EN_TC_SAMPLE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 91e7a01e32be..6c949abcd2e1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -19,6 +19,7 @@
#include "en/tc_ct.h"
#include "en/mod_hdr.h"
#include "en/mapping.h"
+#include "en/tc/post_act.h"
#include "en.h"
#include "en_tc.h"
#include "en_rep.h"
@@ -32,10 +33,6 @@
#define MLX5_CT_STATE_RELATED_BIT BIT(5)
#define MLX5_CT_STATE_INVALID_BIT BIT(6)
-#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen)
-#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
-#define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
-
#define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen)
#define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
@@ -46,14 +43,13 @@ struct mlx5_tc_ct_priv {
struct mlx5_core_dev *dev;
const struct net_device *netdev;
struct mod_hdr_tbl *mod_hdr_tbl;
- struct idr fte_ids;
struct xarray tuple_ids;
struct rhashtable zone_ht;
struct rhashtable ct_tuples_ht;
struct rhashtable ct_tuples_nat_ht;
struct mlx5_flow_table *ct;
struct mlx5_flow_table *ct_nat;
- struct mlx5_flow_table *post_ct;
+ struct mlx5e_post_act *post_act;
struct mutex control_lock; /* guards parallel adds/dels */
struct mapping_ctx *zone_mapping;
struct mapping_ctx *labels_mapping;
@@ -64,11 +60,9 @@ struct mlx5_tc_ct_priv {
struct mlx5_ct_flow {
struct mlx5_flow_attr *pre_ct_attr;
- struct mlx5_flow_attr *post_ct_attr;
struct mlx5_flow_handle *pre_ct_rule;
- struct mlx5_flow_handle *post_ct_rule;
+ struct mlx5e_post_act_handle *post_act_handle;
struct mlx5_ct_ft *ft;
- u32 fte_id;
u32 chain_mapping;
};
@@ -768,7 +762,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
attr->dest_chain = 0;
- attr->dest_ft = ct_priv->post_ct;
+ attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
attr->outer_match_level = MLX5_MATCH_L4;
attr->counter = entry->counter->counter;
@@ -1432,7 +1426,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
ctstate |= MLX5_CT_STATE_NAT_BIT;
mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
- dest.ft = ct_priv->post_ct;
+ dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -1716,9 +1710,9 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
* | do decap
* v
* +---------------------+
- * + pre_ct/pre_ct_nat + if matches +---------------------+
- * + zone+nat match +---------------->+ post_ct (see below) +
- * +---------------------+ set zone +---------------------+
+ * + pre_ct/pre_ct_nat + if matches +-------------------------+
+ * + zone+nat match +---------------->+ post_act (see below) +
+ * +---------------------+ set zone +-------------------------+
* | set zone
* v
* +--------------------+
@@ -1732,7 +1726,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
* | do nat (if needed)
* v
* +--------------+
- * + post_ct + original filter actions
+ * + post_act + original filter actions
* + fte_id match +------------------------>
* +--------------+
*/
@@ -1746,19 +1740,15 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
- struct mlx5_flow_spec *post_ct_spec = NULL;
+ struct mlx5e_post_act_handle *handle;
struct mlx5_flow_attr *pre_ct_attr;
struct mlx5_modify_hdr *mod_hdr;
- struct mlx5_flow_handle *rule;
struct mlx5_ct_flow *ct_flow;
int chain_mapping = 0, err;
struct mlx5_ct_ft *ft;
- u32 fte_id = 1;
- post_ct_spec = kvzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
- if (!post_ct_spec || !ct_flow) {
- kvfree(post_ct_spec);
+ if (!ct_flow) {
kfree(ct_flow);
return ERR_PTR(-ENOMEM);
}
@@ -1773,14 +1763,13 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
}
ct_flow->ft = ft;
- err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
- MLX5_FTE_ID_MAX, GFP_KERNEL);
- if (err) {
- netdev_warn(priv->netdev,
- "Failed to allocate fte id, err: %d\n", err);
- goto err_idr;
+ handle = mlx5e_tc_post_act_add(ct_priv->post_act, attr);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ ct_dbg("Failed to allocate post action handle");
+ goto err_post_act_handle;
}
- ct_flow->fte_id = fte_id;
+ ct_flow->post_act_handle = handle;
/* Base flow attributes of both rules on original rule attribute */
ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
@@ -1789,15 +1778,8 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
goto err_alloc_pre;
}
- ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
- if (!ct_flow->post_ct_attr) {
- err = -ENOMEM;
- goto err_alloc_post;
- }
-
pre_ct_attr = ct_flow->pre_ct_attr;
memcpy(pre_ct_attr, attr, attr_sz);
- memcpy(ct_flow->post_ct_attr, attr, attr_sz);
/* Modify the original rule's action to fwd and modify, leave decap */
pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
@@ -1823,10 +1805,9 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
goto err_mapping;
}
- err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
- FTEID_TO_REG, fte_id);
+ err = mlx5e_tc_post_act_set_handle(priv->mdev, handle, &pre_mod_acts);
if (err) {
- ct_dbg("Failed to set fte_id register mapping");
+ ct_dbg("Failed to set post action handle");
goto err_mapping;
}
@@ -1857,33 +1838,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
}
pre_ct_attr->modify_hdr = mod_hdr;
- /* Post ct rule matches on fte_id and executes original rule's
- * tc rule action
- */
- mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
- fte_id, MLX5_FTE_ID_MASK);
-
- /* Put post_ct rule on post_ct flow table */
- ct_flow->post_ct_attr->chain = 0;
- ct_flow->post_ct_attr->prio = 0;
- ct_flow->post_ct_attr->ft = ct_priv->post_ct;
-
- /* Splits were handled before CT */
- if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
- ct_flow->post_ct_attr->esw_attr->split_count = 0;
-
- ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
- ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
- ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
- rule = mlx5_tc_rule_insert(priv, post_ct_spec,
- ct_flow->post_ct_attr);
- ct_flow->post_ct_rule = rule;
- if (IS_ERR(ct_flow->post_ct_rule)) {
- err = PTR_ERR(ct_flow->post_ct_rule);
- ct_dbg("Failed to add post ct rule");
- goto err_insert_post_ct;
- }
-
/* Change original rule point to ct table */
pre_ct_attr->dest_chain = 0;
pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
@@ -1897,28 +1851,21 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
attr->ct_attr.ct_flow = ct_flow;
dealloc_mod_hdr_actions(&pre_mod_acts);
- kvfree(post_ct_spec);
- return rule;
+ return ct_flow->pre_ct_rule;
err_insert_orig:
- mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
- ct_flow->post_ct_attr);
-err_insert_post_ct:
mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
err_mapping:
dealloc_mod_hdr_actions(&pre_mod_acts);
mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
err_get_chain:
- kfree(ct_flow->post_ct_attr);
-err_alloc_post:
kfree(ct_flow->pre_ct_attr);
err_alloc_pre:
- idr_remove(&ct_priv->fte_ids, fte_id);
-err_idr:
+ mlx5e_tc_post_act_del(ct_priv->post_act, handle);
+err_post_act_handle:
mlx5_tc_ct_del_ft_cb(ct_priv, ft);
err_ft:
- kvfree(post_ct_spec);
kfree(ct_flow);
netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
return ERR_PTR(err);
@@ -2029,16 +1976,13 @@ __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
pre_ct_attr);
mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
- if (ct_flow->post_ct_rule) {
- mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
- ct_flow->post_ct_attr);
+ if (ct_flow->post_act_handle) {
mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
- idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
+ mlx5e_tc_post_act_del(ct_priv->post_act, ct_flow->post_act_handle);
mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
}
kfree(ct_flow->pre_ct_attr);
- kfree(ct_flow->post_ct_attr);
kfree(ct_flow);
}
@@ -2064,11 +2008,6 @@ static int
mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
const char **err_msg)
{
- if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
- *err_msg = "firmware level support is missing";
- return -EOPNOTSUPP;
- }
-
if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
/* vlan workaround should be avoided for multi chain rules.
* This is just a sanity check as pop vlan action should
@@ -2098,20 +2037,9 @@ mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
}
static int
-mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv,
- const char **err_msg)
-{
- if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
- *err_msg = "firmware level support is missing";
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
-static int
mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act,
const char **err_msg)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -2122,10 +2050,14 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
*err_msg = "tc skb extension missing";
return -EOPNOTSUPP;
#endif
+ if (IS_ERR_OR_NULL(post_act)) {
+ *err_msg = "tc ct offload not supported, post action is missing";
+ return -EOPNOTSUPP;
+ }
+
if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
- else
- return mlx5_tc_ct_init_check_nic_support(priv, err_msg);
+ return 0;
}
#define INIT_ERR_PREFIX "tc ct offload init failed"
@@ -2133,19 +2065,19 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
struct mlx5_tc_ct_priv *
mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
struct mod_hdr_tbl *mod_hdr,
- enum mlx5_flow_namespace_type ns_type)
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
{
struct mlx5_tc_ct_priv *ct_priv;
struct mlx5_core_dev *dev;
const char *msg;
+ u64 mapping_id;
int err;
dev = priv->mdev;
- err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg);
+ err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act, &msg);
if (err) {
- mlx5_core_warn(dev,
- "tc ct offload not supported, %s\n",
- msg);
+ mlx5_core_warn(dev, "tc ct offload not supported, %s\n", msg);
goto err_support;
}
@@ -2153,13 +2085,17 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
if (!ct_priv)
goto err_alloc;
- ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
+ mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+ ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
+ sizeof(u16), 0, true);
if (IS_ERR(ct_priv->zone_mapping)) {
err = PTR_ERR(ct_priv->zone_mapping);
goto err_mapping_zone;
}
- ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
+ ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
+ sizeof(u32) * 4, 0, true);
if (IS_ERR(ct_priv->labels_mapping)) {
err = PTR_ERR(ct_priv->labels_mapping);
goto err_mapping_labels;
@@ -2189,16 +2125,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
goto err_ct_nat_tbl;
}
- ct_priv->post_ct = mlx5_chains_create_global_table(chains);
- if (IS_ERR(ct_priv->post_ct)) {
- err = PTR_ERR(ct_priv->post_ct);
- mlx5_core_warn(dev,
- "%s, failed to create post ct table err: %d\n",
- INIT_ERR_PREFIX, err);
- goto err_post_ct_tbl;
- }
-
- idr_init(&ct_priv->fte_ids);
+ ct_priv->post_act = post_act;
mutex_init(&ct_priv->control_lock);
rhashtable_init(&ct_priv->zone_ht, &zone_params);
rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
@@ -2206,8 +2133,6 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
return ct_priv;
-err_post_ct_tbl:
- mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
err_ct_nat_tbl:
mlx5_chains_destroy_global_table(chains, ct_priv->ct);
err_ct_tbl:
@@ -2232,7 +2157,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
chains = ct_priv->chains;
- mlx5_chains_destroy_global_table(chains, ct_priv->post_ct);
mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
mlx5_chains_destroy_global_table(chains, ct_priv->ct);
mapping_destroy(ct_priv->zone_mapping);
@@ -2242,7 +2166,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
rhashtable_destroy(&ct_priv->zone_ht);
mutex_destroy(&ct_priv->control_lock);
- idr_destroy(&ct_priv->fte_ids);
kfree(ct_priv);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
index 644cf1641cde..363329f4aac6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -92,7 +92,8 @@ struct mlx5_ct_attr {
struct mlx5_tc_ct_priv *
mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
struct mod_hdr_tbl *mod_hdr,
- enum mlx5_flow_namespace_type ns_type);
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act);
void
mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv);
@@ -132,7 +133,8 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
static inline struct mlx5_tc_ct_priv *
mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
struct mod_hdr_tbl *mod_hdr,
- enum mlx5_flow_namespace_type ns_type)
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
{
return NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 8f79f04eccd6..b4e986818794 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -124,6 +124,11 @@ static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
if (IS_ERR(rt))
return PTR_ERR(rt);
+ if (rt->rt_type != RTN_UNICAST) {
+ ret = -ENETUNREACH;
+ goto err_rt_release;
+ }
+
if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
ret = -ENETUNREACH;
goto err_rt_release;
@@ -520,7 +525,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
e->out_dev = attr.out_dev;
e->route_dev_ifindex = attr.route_dev->ifindex;
- /* It's importent to add the neigh to the hash table before checking
+ /* It's important to add the neigh to the hash table before checking
* the neigh validity state. So if we'll get a notification, in case the
* neigh changes it's validity state, we would find the relevant neigh
* in the hash.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 2e846b741280..1c44c6c345f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -147,7 +147,7 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
mlx5e_rep_queue_neigh_stats_work(priv);
list_for_each_entry(flow, flow_list, tmp_list) {
- if (!mlx5e_is_offloaded_flow(flow))
+ if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
continue;
attr = flow->attr;
esw_attr = attr->esw_attr;
@@ -188,7 +188,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
int err;
list_for_each_entry(flow, flow_list, tmp_list) {
- if (!mlx5e_is_offloaded_flow(flow))
+ if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW))
continue;
attr = flow->attr;
esw_attr = attr->esw_attr;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
new file mode 100644
index 000000000000..de936dc4bc48
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "tir.h"
+#include "params.h"
+#include <linux/mlx5/transobj.h>
+
+#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024)
+
+/* max() doesn't work inside square brackets. */
+#define MLX5E_TIR_CMD_IN_SZ_DW ( \
+ MLX5_ST_SZ_DW(create_tir_in) > MLX5_ST_SZ_DW(modify_tir_in) ? \
+ MLX5_ST_SZ_DW(create_tir_in) : MLX5_ST_SZ_DW(modify_tir_in) \
+)
+
+struct mlx5e_tir_builder {
+ u32 in[MLX5E_TIR_CMD_IN_SZ_DW];
+ bool modify;
+};
+
+struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify)
+{
+ struct mlx5e_tir_builder *builder;
+
+ builder = kvzalloc(sizeof(*builder), GFP_KERNEL);
+ builder->modify = modify;
+
+ return builder;
+}
+
+void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder)
+{
+ kvfree(builder);
+}
+
+void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder)
+{
+ memset(builder->in, 0, sizeof(builder->in));
+}
+
+static void *mlx5e_tir_builder_get_tirc(struct mlx5e_tir_builder *builder)
+{
+ if (builder->modify)
+ return MLX5_ADDR_OF(modify_tir_in, builder->in, ctx);
+ return MLX5_ADDR_OF(create_tir_in, builder->in, ctx);
+}
+
+void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+ WARN_ON(builder->modify);
+
+ MLX5_SET(tirc, tirc, transport_domain, tdn);
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
+ MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE);
+ MLX5_SET(tirc, tirc, inline_rqn, rqn);
+}
+
+void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
+ u32 rqtn, bool inner_ft_support)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+ WARN_ON(builder->modify);
+
+ MLX5_SET(tirc, tirc, transport_domain, tdn);
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+ MLX5_SET(tirc, tirc, indirect_table, rqtn);
+ MLX5_SET(tirc, tirc, tunneled_offload_en, inner_ft_support);
+}
+
+void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_lro_param *lro_param)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+ const unsigned int rough_max_l2_l3_hdr_sz = 256;
+
+ if (builder->modify)
+ MLX5_SET(modify_tir_in, builder->in, bitmask.lro, 1);
+
+ if (!lro_param->enabled)
+ return;
+
+ MLX5_SET(tirc, tirc, lro_enable_mask,
+ MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
+ MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
+ MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
+ (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
+ MLX5_SET(tirc, tirc, lro_timeout_period_usecs, lro_param->timeout);
+}
+
+static int mlx5e_hfunc_to_hw(u8 hfunc)
+{
+ switch (hfunc) {
+ case ETH_RSS_HASH_TOP:
+ return MLX5_RX_HASH_FN_TOEPLITZ;
+ case ETH_RSS_HASH_XOR:
+ return MLX5_RX_HASH_FN_INVERTED_XOR8;
+ default:
+ return MLX5_RX_HASH_FN_NONE;
+ }
+}
+
+void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_rss_params_hash *rss_hash,
+ const struct mlx5e_rss_params_traffic_type *rss_tt,
+ bool inner)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+ void *hfso;
+
+ if (builder->modify)
+ MLX5_SET(modify_tir_in, builder->in, bitmask.hash, 1);
+
+ MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_hfunc_to_hw(rss_hash->hfunc));
+ if (rss_hash->hfunc == ETH_RSS_HASH_TOP) {
+ const size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key);
+ void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
+
+ MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
+ memcpy(rss_key, rss_hash->toeplitz_hash_key, len);
+ }
+
+ if (inner)
+ hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner);
+ else
+ hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, rss_tt->l3_prot_type);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, rss_tt->l4_prot_type);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields, rss_tt->rx_hash_fields);
+}
+
+void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+ WARN_ON(builder->modify);
+
+ MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
+}
+
+void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+ WARN_ON(builder->modify);
+
+ MLX5_SET(tirc, tirc, tls_en, 1);
+ MLX5_SET(tirc, tirc, self_lb_block,
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST |
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST);
+}
+
+int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder,
+ struct mlx5_core_dev *mdev, bool reg)
+{
+ int err;
+
+ tir->mdev = mdev;
+
+ err = mlx5_core_create_tir(tir->mdev, builder->in, &tir->tirn);
+ if (err)
+ return err;
+
+ if (reg) {
+ struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs;
+
+ mutex_lock(&res->td.list_lock);
+ list_add(&tir->list, &res->td.tirs_list);
+ mutex_unlock(&res->td.list_lock);
+ } else {
+ INIT_LIST_HEAD(&tir->list);
+ }
+
+ return 0;
+}
+
+void mlx5e_tir_destroy(struct mlx5e_tir *tir)
+{
+ struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs;
+
+ /* Skip mutex if list_del is no-op (the TIR wasn't registered in the
+ * list). list_empty will never return true for an item of tirs_list,
+ * and READ_ONCE/WRITE_ONCE in list_empty/list_del guarantee consistency
+ * of the list->next value.
+ */
+ if (!list_empty(&tir->list)) {
+ mutex_lock(&res->td.list_lock);
+ list_del(&tir->list);
+ mutex_unlock(&res->td.list_lock);
+ }
+
+ mlx5_core_destroy_tir(tir->mdev, tir->tirn);
+}
+
+int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder)
+{
+ return mlx5_core_modify_tir(tir->mdev, tir->tirn, builder->in);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
new file mode 100644
index 000000000000..e45149a78ed9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_TIR_H__
+#define __MLX5_EN_TIR_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_rss_params_hash {
+ u8 hfunc;
+ u8 toeplitz_hash_key[40];
+};
+
+struct mlx5e_rss_params_traffic_type {
+ u8 l3_prot_type;
+ u8 l4_prot_type;
+ u32 rx_hash_fields;
+};
+
+struct mlx5e_tir_builder;
+struct mlx5e_lro_param;
+
+struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify);
+void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder);
+void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder);
+
+void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn);
+void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
+ u32 rqtn, bool inner_ft_support);
+void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_lro_param *lro_param);
+void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_rss_params_hash *rss_hash,
+ const struct mlx5e_rss_params_traffic_type *rss_tt,
+ bool inner);
+void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder);
+void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder);
+
+struct mlx5_core_dev;
+
+struct mlx5e_tir {
+ struct mlx5_core_dev *mdev;
+ u32 tirn;
+ struct list_head list;
+};
+
+int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder,
+ struct mlx5_core_dev *mdev, bool reg);
+void mlx5e_tir_destroy(struct mlx5e_tir *tir);
+
+static inline u32 mlx5e_tir_get_tirn(struct mlx5e_tir *tir)
+{
+ return tir->tirn;
+}
+
+int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder);
+
+#endif /* __MLX5_EN_TIR_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index 7f94508594fb..d54607a42740 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -92,30 +92,19 @@ static void mlx5e_close_trap_rq(struct mlx5e_rq *rq)
static int mlx5e_create_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir,
u32 rqn)
{
- void *tirc;
- int inlen;
- u32 *in;
+ struct mlx5e_tir_builder *builder;
int err;
- inlen = MLX5_ST_SZ_BYTES(create_tir_in);
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
return -ENOMEM;
- tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
- MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
- MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE);
- MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
- MLX5_SET(tirc, tirc, inline_rqn, rqn);
- err = mlx5e_create_tir(mdev, tir, in);
- kvfree(in);
+ mlx5e_tir_builder_build_inline(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqn);
+ err = mlx5e_tir_init(tir, builder, mdev, true);
- return err;
-}
+ mlx5e_tir_builder_free(builder);
-static void mlx5e_destroy_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir)
-{
- mlx5e_destroy_tir(mdev, tir);
+ return err;
}
static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev,
@@ -173,7 +162,7 @@ err_napi_del:
void mlx5e_close_trap(struct mlx5e_trap *trap)
{
- mlx5e_destroy_trap_direct_rq_tir(trap->mdev, &trap->tir);
+ mlx5e_tir_destroy(&trap->tir);
mlx5e_close_trap_rq(&trap->rq);
netif_napi_del(&trap->napi);
kvfree(trap);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
index 71e8d66fa150..7b562d2c8a19 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
@@ -122,7 +122,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
* any Fill Ring entries at the setup stage.
*/
- err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]);
+ err = mlx5e_rx_res_xsk_activate(priv->rx_res, &priv->channels, ix);
if (unlikely(err))
goto err_deactivate;
@@ -169,7 +169,7 @@ static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
goto remove_pool;
c = priv->channels.c[ix];
- mlx5e_xsk_redirect_rqt_to_drop(priv, ix);
+ mlx5e_rx_res_xsk_deactivate(priv->rx_res, ix);
mlx5e_deactivate_xsk(c);
mlx5e_close_xsk(c);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index a8315f166696..538bc2419bd8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -126,7 +126,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
/* Create a separate SQ, so that when the buff pool is disabled, we could
* close this SQ safely and stop receiving CQEs. In other case, e.g., if
* the XDPSQ was used instead, we might run into trouble when the buff pool
- * is disabled and then reenabled, but the SQ continues receiving CQEs
+ * is disabled and then re-enabled, but the SQ continues receiving CQEs
* from the old buff pool.
*/
err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, pool, &c->xsksq, true);
@@ -183,73 +183,3 @@ void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
mlx5e_deactivate_rq(&c->xskrq);
/* TX queue is disabled on close. */
}
-
-static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn)
-{
- struct mlx5e_redirect_rqt_param direct_rrp = {
- .is_rss = false,
- {
- .rqn = rqn,
- },
- };
-
- u32 rqtn = priv->xsk_tir[ix].rqt.rqtn;
-
- return mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
-}
-
-int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c)
-{
- return mlx5e_redirect_xsk_rqt(priv, c->ix, c->xskrq.rqn);
-}
-
-int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix)
-{
- return mlx5e_redirect_xsk_rqt(priv, ix, priv->drop_rq.rqn);
-}
-
-int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
-{
- int err, i;
-
- if (!priv->xsk.refcnt)
- return 0;
-
- for (i = 0; i < chs->num; i++) {
- struct mlx5e_channel *c = chs->c[i];
-
- if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
- continue;
-
- err = mlx5e_xsk_redirect_rqt_to_channel(priv, c);
- if (unlikely(err))
- goto err_stop;
- }
-
- return 0;
-
-err_stop:
- for (i--; i >= 0; i--) {
- if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
- continue;
-
- mlx5e_xsk_redirect_rqt_to_drop(priv, i);
- }
-
- return err;
-}
-
-void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
-{
- int i;
-
- if (!priv->xsk.refcnt)
- return;
-
- for (i = 0; i < chs->num; i++) {
- if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
- continue;
-
- mlx5e_xsk_redirect_rqt_to_drop(priv, i);
- }
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
index ca20f1ff5e39..50e111b85efd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
@@ -17,9 +17,5 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
void mlx5e_close_xsk(struct mlx5e_channel *c);
void mlx5e_activate_xsk(struct mlx5e_channel *c);
void mlx5e_deactivate_xsk(struct mlx5e_channel *c);
-int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c);
-int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix);
-int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
-void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
#endif /* __MLX5_EN_XSK_SETUP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
index e51f60b55daa..4c4ee524176c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
@@ -16,13 +16,13 @@ struct mlx5e_accel_fs_tcp {
struct mlx5_flow_handle *default_rules[ACCEL_FS_TCP_NUM_TYPES];
};
-static enum mlx5e_traffic_types fs_accel2tt(enum accel_fs_tcp_type i)
+static enum mlx5_traffic_types fs_accel2tt(enum accel_fs_tcp_type i)
{
switch (i) {
case ACCEL_FS_IPV4_TCP:
- return MLX5E_TT_IPV4_TCP;
+ return MLX5_TT_IPV4_TCP;
default: /* ACCEL_FS_IPV6_TCP */
- return MLX5E_TT_IPV6_TCP;
+ return MLX5_TT_IPV6_TCP;
}
}
@@ -161,7 +161,7 @@ static int accel_fs_tcp_add_default_rule(struct mlx5e_priv *priv,
fs_tcp = priv->fs.accel_tcp;
accel_fs_t = &fs_tcp->tables[type];
- dest = mlx5e_ttc_get_default_dest(priv, fs_accel2tt(type));
+ dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_accel2tt(type));
rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -307,7 +307,7 @@ static int accel_fs_tcp_disable(struct mlx5e_priv *priv)
for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
/* Modify ttc rules destination to point back to the indir TIRs */
- err = mlx5e_ttc_fwd_default_dest(priv, fs_accel2tt(i));
+ err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_accel2tt(i));
if (err) {
netdev_err(priv->netdev,
"%s: modify ttc[%d] default destination failed, err(%d)\n",
@@ -329,7 +329,7 @@ static int accel_fs_tcp_enable(struct mlx5e_priv *priv)
dest.ft = priv->fs.accel_tcp->tables[i].t;
/* Modify ttc rules destination to point on the accel_fs FTs */
- err = mlx5e_ttc_fwd_dest(priv, fs_accel2tt(i), &dest);
+ err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_accel2tt(i), &dest);
if (err) {
netdev_err(priv->netdev,
"%s: modify ttc[%d] destination to accel failed, err(%d)\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 34119ce92031..17da23dff0ed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -41,11 +41,11 @@ struct mlx5e_ipsec_tx {
};
/* IPsec RX flow steering */
-static enum mlx5e_traffic_types fs_esp2tt(enum accel_fs_esp_type i)
+static enum mlx5_traffic_types fs_esp2tt(enum accel_fs_esp_type i)
{
if (i == ACCEL_FS_ESP4)
- return MLX5E_TT_IPV4_IPSEC_ESP;
- return MLX5E_TT_IPV6_IPSEC_ESP;
+ return MLX5_TT_IPV4_IPSEC_ESP;
+ return MLX5_TT_IPV6_IPSEC_ESP;
}
static int rx_err_add_rule(struct mlx5e_priv *priv,
@@ -265,7 +265,8 @@ static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
accel_esp = priv->ipsec->rx_fs;
fs_prot = &accel_esp->fs_prot[type];
- fs_prot->default_dest = mlx5e_ttc_get_default_dest(priv, fs_esp2tt(type));
+ fs_prot->default_dest =
+ mlx5_ttc_get_default_dest(priv->fs.ttc, fs_esp2tt(type));
err = rx_err_create_ft(priv, fs_prot, &fs_prot->rx_err);
if (err)
@@ -301,7 +302,7 @@ static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
/* connect */
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest.ft = fs_prot->ft;
- mlx5e_ttc_fwd_dest(priv, fs_esp2tt(type), &dest);
+ mlx5_ttc_fwd_dest(priv->fs.ttc, fs_esp2tt(type), &dest);
out:
mutex_unlock(&fs_prot->prot_mutex);
@@ -320,7 +321,7 @@ static void rx_ft_put(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
goto out;
/* disconnect */
- mlx5e_ttc_fwd_default_dest(priv, fs_esp2tt(type));
+ mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_esp2tt(type));
/* remove FT */
rx_destroy(priv, type);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index 4e58fade7a60..62abce008c7b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -49,7 +49,7 @@ struct mlx5e_ktls_offload_context_rx {
struct mlx5e_rq_stats *rq_stats;
struct mlx5e_tls_sw_stats *sw_stats;
struct completion add_ctx;
- u32 tirn;
+ struct mlx5e_tir tir;
u32 key_id;
u32 rxq;
DECLARE_BITMAP(flags, MLX5E_NUM_PRIV_RX_FLAGS);
@@ -99,31 +99,22 @@ mlx5e_ktls_rx_resync_create_resp_list(void)
return resp_list;
}
-static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, u32 *tirn, u32 rqtn)
+static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 rqtn)
{
- int err, inlen;
- void *tirc;
- u32 *in;
+ struct mlx5e_tir_builder *builder;
+ int err;
- inlen = MLX5_ST_SZ_BYTES(create_tir_in);
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
return -ENOMEM;
- tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
-
- MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
- MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
- MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
- MLX5_SET(tirc, tirc, indirect_table, rqtn);
- MLX5_SET(tirc, tirc, tls_en, 1);
- MLX5_SET(tirc, tirc, self_lb_block,
- MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST |
- MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST);
+ mlx5e_tir_builder_build_rqt(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqtn, false);
+ mlx5e_tir_builder_build_direct(builder);
+ mlx5e_tir_builder_build_tls(builder);
+ err = mlx5e_tir_init(tir, builder, mdev, false);
- err = mlx5_core_create_tir(mdev, in, tirn);
+ mlx5e_tir_builder_free(builder);
- kvfree(in);
return err;
}
@@ -139,7 +130,8 @@ static void accel_rule_handle_work(struct work_struct *work)
goto out;
rule = mlx5e_accel_fs_add_sk(accel_rule->priv, priv_rx->sk,
- priv_rx->tirn, MLX5_FS_DEFAULT_FLOW_TAG);
+ mlx5e_tir_get_tirn(&priv_rx->tir),
+ MLX5_FS_DEFAULT_FLOW_TAG);
if (!IS_ERR_OR_NULL(rule))
accel_rule->rule = rule;
out:
@@ -173,8 +165,8 @@ post_static_params(struct mlx5e_icosq *sq,
pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs);
wqe = MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi);
mlx5e_ktls_build_static_params(wqe, sq->pc, sq->sqn, &priv_rx->crypto_info,
- priv_rx->tirn, priv_rx->key_id,
- priv_rx->resync.seq, false,
+ mlx5e_tir_get_tirn(&priv_rx->tir),
+ priv_rx->key_id, priv_rx->resync.seq, false,
TLS_OFFLOAD_CTX_DIR_RX);
wi = (struct mlx5e_icosq_wqe_info) {
.wqe_type = MLX5E_ICOSQ_WQE_UMR_TLS,
@@ -202,8 +194,9 @@ post_progress_params(struct mlx5e_icosq *sq,
pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs);
wqe = MLX5E_TLS_FETCH_SET_PROGRESS_PARAMS_WQE(sq, pi);
- mlx5e_ktls_build_progress_params(wqe, sq->pc, sq->sqn, priv_rx->tirn, false,
- next_record_tcp_sn,
+ mlx5e_ktls_build_progress_params(wqe, sq->pc, sq->sqn,
+ mlx5e_tir_get_tirn(&priv_rx->tir),
+ false, next_record_tcp_sn,
TLS_OFFLOAD_CTX_DIR_RX);
wi = (struct mlx5e_icosq_wqe_info) {
.wqe_type = MLX5E_ICOSQ_WQE_SET_PSV_TLS,
@@ -325,7 +318,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq,
psv = &wqe->psv;
psv->num_psv = 1 << 4;
psv->l_key = sq->channel->mkey_be;
- psv->psv_index[0] = cpu_to_be32(priv_rx->tirn);
+ psv->psv_index[0] = cpu_to_be32(mlx5e_tir_get_tirn(&priv_rx->tir));
psv->va = cpu_to_be64(buf->dma_addr);
wi = (struct mlx5e_icosq_wqe_info) {
@@ -635,9 +628,9 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
priv_rx->sw_stats = &priv->tls->sw_stats;
mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx);
- rqtn = priv->direct_tir[rxq].rqt.rqtn;
+ rqtn = mlx5e_rx_res_get_rqtn_direct(priv->rx_res, rxq);
- err = mlx5e_ktls_create_tir(mdev, &priv_rx->tirn, rqtn);
+ err = mlx5e_ktls_create_tir(mdev, &priv_rx->tir, rqtn);
if (err)
goto err_create_tir;
@@ -658,7 +651,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
return 0;
err_post_wqes:
- mlx5_core_destroy_tir(mdev, priv_rx->tirn);
+ mlx5e_tir_destroy(&priv_rx->tir);
err_create_tir:
mlx5_ktls_destroy_key(mdev, priv_rx->key_id);
err_create_key:
@@ -693,7 +686,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
if (priv_rx->rule.rule)
mlx5e_accel_fs_del_sk(priv_rx->rule.rule);
- mlx5_core_destroy_tir(mdev, priv_rx->tirn);
+ mlx5e_tir_destroy(&priv_rx->tir);
mlx5_ktls_destroy_key(mdev, priv_rx->key_id);
/* priv_rx should normally be freed here, but if there is an outstanding
* GET_PSV, deallocation will be delayed until the CQE for GET_PSV is
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 25403af32859..fe5d82fa6e92 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -98,17 +98,17 @@ struct arfs_rule {
for (j = 0; j < ARFS_HASH_SIZE; j++) \
hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist)
-static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type)
+static enum mlx5_traffic_types arfs_get_tt(enum arfs_type type)
{
switch (type) {
case ARFS_IPV4_TCP:
- return MLX5E_TT_IPV4_TCP;
+ return MLX5_TT_IPV4_TCP;
case ARFS_IPV4_UDP:
- return MLX5E_TT_IPV4_UDP;
+ return MLX5_TT_IPV4_UDP;
case ARFS_IPV6_TCP:
- return MLX5E_TT_IPV6_TCP;
+ return MLX5_TT_IPV6_TCP;
case ARFS_IPV6_UDP:
- return MLX5E_TT_IPV6_UDP;
+ return MLX5_TT_IPV6_UDP;
default:
return -EINVAL;
}
@@ -120,7 +120,7 @@ static int arfs_disable(struct mlx5e_priv *priv)
for (i = 0; i < ARFS_NUM_TYPES; i++) {
/* Modify ttc rules destination back to their default */
- err = mlx5e_ttc_fwd_default_dest(priv, arfs_get_tt(i));
+ err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, arfs_get_tt(i));
if (err) {
netdev_err(priv->netdev,
"%s: modify ttc[%d] default destination failed, err(%d)\n",
@@ -149,7 +149,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv)
for (i = 0; i < ARFS_NUM_TYPES; i++) {
dest.ft = priv->fs.arfs->arfs_tables[i].ft.t;
/* Modify ttc rules destination to point on the aRFS FTs */
- err = mlx5e_ttc_fwd_dest(priv, arfs_get_tt(i), &dest);
+ err = mlx5_ttc_fwd_dest(priv->fs.ttc, arfs_get_tt(i), &dest);
if (err) {
netdev_err(priv->netdev,
"%s: modify ttc[%d] dest to arfs, failed err(%d)\n",
@@ -192,10 +192,9 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
enum arfs_type type)
{
struct arfs_table *arfs_t = &priv->fs.arfs->arfs_tables[type];
- struct mlx5e_tir *tir = priv->indir_tir;
struct mlx5_flow_destination dest = {};
MLX5_DECLARE_FLOW_ACT(flow_act);
- enum mlx5e_traffic_types tt;
+ enum mlx5_traffic_types tt;
int err = 0;
dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
@@ -206,10 +205,10 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
return -EINVAL;
}
- /* FIXME: Must use mlx5e_ttc_get_default_dest(),
+ /* FIXME: Must use mlx5_ttc_get_default_dest(),
* but can't since TTC default is not setup yet !
*/
- dest.tir_num = tir[tt].tirn;
+ dest.tir_num = mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt);
arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL,
&flow_act,
&dest, 1);
@@ -553,7 +552,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
16);
}
dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
+ dest.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, arfs_rule->rxq);
rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -576,7 +575,7 @@ static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
int err = 0;
dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dst.tir_num = priv->direct_tir[rxq].tirn;
+ dst.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, rxq);
err = mlx5_modify_rule_destination(rule, &dst, NULL);
if (err)
netdev_warn(priv->netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index 8c166ee56d8b..84eb7201c142 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -33,36 +33,9 @@
#include "en.h"
/* mlx5e global resources should be placed in this file.
- * Global resources are common to all the netdevices crated on the same nic.
+ * Global resources are common to all the netdevices created on the same nic.
*/
-int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 *in)
-{
- struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
- int err;
-
- err = mlx5_core_create_tir(mdev, in, &tir->tirn);
- if (err)
- return err;
-
- mutex_lock(&res->td.list_lock);
- list_add(&tir->list, &res->td.tirs_list);
- mutex_unlock(&res->td.list_lock);
-
- return 0;
-}
-
-void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
- struct mlx5e_tir *tir)
-{
- struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
-
- mutex_lock(&res->td.list_lock);
- mlx5_core_destroy_tir(mdev, tir->tirn);
- list_del(&tir->list);
- mutex_unlock(&res->td.list_lock);
-}
-
void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc)
{
bool ro_pci_enable = pcie_relaxed_ordering_enabled(mdev->pdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index bd72572e03d1..2cfd12953909 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -420,6 +420,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
unsigned int count = ch->combined_count;
struct mlx5e_params new_params;
bool arfs_enabled;
+ int rss_cnt;
bool opened;
int err = 0;
@@ -455,6 +456,27 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
goto out;
}
+ /* Don't allow changing the number of channels if non-default RSS contexts exist,
+ * the kernel doesn't protect against set_channels operations that break them.
+ */
+ rss_cnt = mlx5e_rx_res_rss_cnt(priv->rx_res) - 1;
+ if (rss_cnt) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: Non-default RSS contexts exist (%d), cannot change the number of channels\n",
+ __func__, rss_cnt);
+ goto out;
+ }
+
+ /* Don't allow changing the number of channels if MQPRIO mode channel offload is active,
+ * because it defines a partition over the channels queues.
+ */
+ if (cur_params->mqprio.mode == TC_MQPRIO_MODE_CHANNEL) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: MQPRIO mode channel offload is active, cannot change the number of channels\n",
+ __func__);
+ goto out;
+ }
+
new_params = *cur_params;
new_params.num_channels = count;
@@ -512,7 +534,9 @@ int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
}
static int mlx5e_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -630,7 +654,9 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
}
static int mlx5e_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -1172,7 +1198,7 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev,
u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv)
{
- return sizeof(priv->rss_params.toeplitz_hash_key);
+ return sizeof_field(struct mlx5e_rss_params_hash, toeplitz_hash_key);
}
static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev)
@@ -1194,88 +1220,64 @@ static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev)
return mlx5e_ethtool_get_rxfh_indir_size(priv);
}
-int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
- u8 *hfunc)
+static int mlx5e_get_rxfh_context(struct net_device *dev, u32 *indir,
+ u8 *key, u8 *hfunc, u32 rss_context)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_rss_params *rss = &priv->rss_params;
-
- if (indir)
- memcpy(indir, rss->indirection_rqt,
- sizeof(rss->indirection_rqt));
-
- if (key)
- memcpy(key, rss->toeplitz_hash_key,
- sizeof(rss->toeplitz_hash_key));
-
- if (hfunc)
- *hfunc = rss->hfunc;
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int err;
- return 0;
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rss_context, indir, key, hfunc);
+ mutex_unlock(&priv->state_lock);
+ return err;
}
-int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
- const u8 *key, const u8 hfunc)
+static int mlx5e_set_rxfh_context(struct net_device *dev, const u32 *indir,
+ const u8 *key, const u8 hfunc,
+ u32 *rss_context, bool delete)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_rss_params *rss = &priv->rss_params;
- int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
- bool refresh_tirs = false;
- bool refresh_rqt = false;
- void *in;
-
- if ((hfunc != ETH_RSS_HASH_NO_CHANGE) &&
- (hfunc != ETH_RSS_HASH_XOR) &&
- (hfunc != ETH_RSS_HASH_TOP))
- return -EINVAL;
-
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
+ int err;
mutex_lock(&priv->state_lock);
-
- if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hfunc) {
- rss->hfunc = hfunc;
- refresh_rqt = true;
- refresh_tirs = true;
- }
-
- if (indir) {
- memcpy(rss->indirection_rqt, indir,
- sizeof(rss->indirection_rqt));
- refresh_rqt = true;
+ if (delete) {
+ err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context);
+ goto unlock;
}
- if (key) {
- memcpy(rss->toeplitz_hash_key, key,
- sizeof(rss->toeplitz_hash_key));
- refresh_tirs = refresh_tirs || rss->hfunc == ETH_RSS_HASH_TOP;
- }
+ if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
+ unsigned int count = priv->channels.params.num_channels;
- if (refresh_rqt && test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- struct mlx5e_redirect_rqt_param rrp = {
- .is_rss = true,
- {
- .rss = {
- .hfunc = rss->hfunc,
- .channels = &priv->channels,
- },
- },
- };
- u32 rqtn = priv->indir_rqt.rqtn;
-
- mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp);
+ err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count);
+ if (err)
+ goto unlock;
}
- if (refresh_tirs)
- mlx5e_modify_tirs_hash(priv, in);
+ err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, *rss_context, indir, key,
+ hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc);
+unlock:
mutex_unlock(&priv->state_lock);
+ return err;
+}
- kvfree(in);
+int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+ u8 *hfunc)
+{
+ return mlx5e_get_rxfh_context(netdev, indir, key, hfunc, 0);
+}
- return 0;
+int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
+ const u8 *key, const u8 hfunc)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, 0, indir, key,
+ hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc);
+ mutex_unlock(&priv->state_lock);
+ return err;
}
#define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC 100
@@ -2358,6 +2360,8 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
.get_rxfh_indir_size = mlx5e_get_rxfh_indir_size,
.get_rxfh = mlx5e_get_rxfh,
.set_rxfh = mlx5e_set_rxfh,
+ .get_rxfh_context = mlx5e_get_rxfh_context,
+ .set_rxfh_context = mlx5e_set_rxfh_context,
.get_rxnfc = mlx5e_get_rxnfc,
.set_rxnfc = mlx5e_set_rxnfc,
.get_tunable = mlx5e_get_tunable,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 0b75fab41ae8..c06b4b938ae7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -718,7 +718,7 @@ static int mlx5e_add_promisc_rule(struct mlx5e_priv *priv)
if (!spec)
return -ENOMEM;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = priv->fs.ttc.ft.t;
+ dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
rule_p = &priv->fs.promisc.rule;
*rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
@@ -854,593 +854,59 @@ void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft)
ft->t = NULL;
}
-static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc)
-{
- int i;
-
- for (i = 0; i < MLX5E_NUM_TT; i++) {
- if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) {
- mlx5_del_flow_rules(ttc->rules[i].rule);
- ttc->rules[i].rule = NULL;
- }
- }
-
- for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) {
- if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
- mlx5_del_flow_rules(ttc->tunnel_rules[i]);
- ttc->tunnel_rules[i] = NULL;
- }
- }
-}
-
-struct mlx5e_etype_proto {
- u16 etype;
- u8 proto;
-};
-
-static struct mlx5e_etype_proto ttc_rules[] = {
- [MLX5E_TT_IPV4_TCP] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_TCP,
- },
- [MLX5E_TT_IPV6_TCP] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_TCP,
- },
- [MLX5E_TT_IPV4_UDP] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_UDP,
- },
- [MLX5E_TT_IPV6_UDP] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_UDP,
- },
- [MLX5E_TT_IPV4_IPSEC_AH] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_AH,
- },
- [MLX5E_TT_IPV6_IPSEC_AH] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_AH,
- },
- [MLX5E_TT_IPV4_IPSEC_ESP] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_ESP,
- },
- [MLX5E_TT_IPV6_IPSEC_ESP] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_ESP,
- },
- [MLX5E_TT_IPV4] = {
- .etype = ETH_P_IP,
- .proto = 0,
- },
- [MLX5E_TT_IPV6] = {
- .etype = ETH_P_IPV6,
- .proto = 0,
- },
- [MLX5E_TT_ANY] = {
- .etype = 0,
- .proto = 0,
- },
-};
-
-static struct mlx5e_etype_proto ttc_tunnel_rules[] = {
- [MLX5E_TT_IPV4_GRE] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_GRE,
- },
- [MLX5E_TT_IPV6_GRE] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_GRE,
- },
- [MLX5E_TT_IPV4_IPIP] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_IPIP,
- },
- [MLX5E_TT_IPV6_IPIP] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_IPIP,
- },
- [MLX5E_TT_IPV4_IPV6] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_IPV6,
- },
- [MLX5E_TT_IPV6_IPV6] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_IPV6,
- },
-
-};
-
-u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt)
-{
- return ttc_tunnel_rules[tt].proto;
-}
-
-static bool mlx5e_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev, u8 proto_type)
-{
- switch (proto_type) {
- case IPPROTO_GRE:
- return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
- case IPPROTO_IPIP:
- case IPPROTO_IPV6:
- return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
- MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx));
- default:
- return false;
- }
-}
-
-static bool mlx5e_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev)
-{
- int tt;
-
- for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
- if (mlx5e_tunnel_proto_supported_rx(mdev, ttc_tunnel_rules[tt].proto))
- return true;
- }
- return false;
-}
-
-bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
-{
- return (mlx5e_tunnel_any_rx_proto_supported(mdev) &&
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version));
-}
-
-static u8 mlx5e_etype_to_ipv(u16 ethertype)
-{
- if (ethertype == ETH_P_IP)
- return 4;
-
- if (ethertype == ETH_P_IPV6)
- return 6;
-
- return 0;
-}
-
-static struct mlx5_flow_handle *
-mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_destination *dest,
- u16 etype,
- u8 proto)
-{
- int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
- MLX5_DECLARE_FLOW_ACT(flow_act);
- struct mlx5_flow_handle *rule;
- struct mlx5_flow_spec *spec;
- int err = 0;
- u8 ipv;
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec)
- return ERR_PTR(-ENOMEM);
-
- if (proto) {
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
- MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
- }
-
- ipv = mlx5e_etype_to_ipv(etype);
- if (match_ipv_outer && ipv) {
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
- MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
- } else if (etype) {
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
- MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
- }
-
- rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
- netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
- }
-
- kvfree(spec);
- return err ? ERR_PTR(err) : rule;
-}
-
-static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv,
- struct ttc_params *params,
- struct mlx5e_ttc_table *ttc)
-{
- struct mlx5_flow_destination dest = {};
- struct mlx5_flow_handle **trules;
- struct mlx5e_ttc_rule *rules;
- struct mlx5_flow_table *ft;
- int tt;
- int err;
-
- ft = ttc->ft.t;
- rules = ttc->rules;
-
- dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
- struct mlx5e_ttc_rule *rule = &rules[tt];
-
- if (tt == MLX5E_TT_ANY)
- dest.tir_num = params->any_tt_tirn;
- else
- dest.tir_num = params->indir_tirn[tt];
-
- rule->rule = mlx5e_generate_ttc_rule(priv, ft, &dest,
- ttc_rules[tt].etype,
- ttc_rules[tt].proto);
- if (IS_ERR(rule->rule)) {
- err = PTR_ERR(rule->rule);
- rule->rule = NULL;
- goto del_rules;
- }
- rule->default_dest = dest;
- }
-
- if (!params->inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
- return 0;
-
- trules = ttc->tunnel_rules;
- dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = params->inner_ttc->ft.t;
- for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
- if (!mlx5e_tunnel_proto_supported_rx(priv->mdev,
- ttc_tunnel_rules[tt].proto))
- continue;
- trules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest,
- ttc_tunnel_rules[tt].etype,
- ttc_tunnel_rules[tt].proto);
- if (IS_ERR(trules[tt])) {
- err = PTR_ERR(trules[tt]);
- trules[tt] = NULL;
- goto del_rules;
- }
- }
-
- return 0;
-
-del_rules:
- mlx5e_cleanup_ttc_rules(ttc);
- return err;
-}
-
-static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc,
- bool use_ipv)
-{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5e_flow_table *ft = &ttc->ft;
- int ix = 0;
- u32 *in;
- int err;
- u8 *mc;
-
- ft->g = kcalloc(MLX5E_TTC_NUM_GROUPS,
- sizeof(*ft->g), GFP_KERNEL);
- if (!ft->g)
- return -ENOMEM;
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in) {
- kfree(ft->g);
- ft->g = NULL;
- return -ENOMEM;
- }
-
- /* L4 Group */
- mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
- MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
- if (use_ipv)
- MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
- else
- MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
- MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_TTC_GROUP1_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- /* L3 Group */
- MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_TTC_GROUP2_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- /* Any Group */
- memset(in, 0, inlen);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_TTC_GROUP3_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- kvfree(in);
- return 0;
-
-err:
- err = PTR_ERR(ft->g[ft->num_groups]);
- ft->g[ft->num_groups] = NULL;
- kvfree(in);
-
- return err;
-}
-
-static struct mlx5_flow_handle *
-mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_destination *dest,
- u16 etype, u8 proto)
-{
- MLX5_DECLARE_FLOW_ACT(flow_act);
- struct mlx5_flow_handle *rule;
- struct mlx5_flow_spec *spec;
- int err = 0;
- u8 ipv;
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec)
- return ERR_PTR(-ENOMEM);
-
- ipv = mlx5e_etype_to_ipv(etype);
- if (etype && ipv) {
- spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
- MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
- }
-
- if (proto) {
- spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
- MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
- }
-
- rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
- netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
- }
-
- kvfree(spec);
- return err ? ERR_PTR(err) : rule;
-}
-
-static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv,
- struct ttc_params *params,
- struct mlx5e_ttc_table *ttc)
-{
- struct mlx5_flow_destination dest = {};
- struct mlx5e_ttc_rule *rules;
- struct mlx5_flow_table *ft;
- int err;
- int tt;
-
- ft = ttc->ft.t;
- rules = ttc->rules;
- dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-
- for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
- struct mlx5e_ttc_rule *rule = &rules[tt];
-
- if (tt == MLX5E_TT_ANY)
- dest.tir_num = params->any_tt_tirn;
- else
- dest.tir_num = params->indir_tirn[tt];
-
- rule->rule = mlx5e_generate_inner_ttc_rule(priv, ft, &dest,
- ttc_rules[tt].etype,
- ttc_rules[tt].proto);
- if (IS_ERR(rule->rule)) {
- err = PTR_ERR(rule->rule);
- rule->rule = NULL;
- goto del_rules;
- }
- rule->default_dest = dest;
- }
-
- return 0;
-
-del_rules:
-
- mlx5e_cleanup_ttc_rules(ttc);
- return err;
-}
-
-static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc)
-{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5e_flow_table *ft = &ttc->ft;
- int ix = 0;
- u32 *in;
- int err;
- u8 *mc;
-
- ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
- if (!ft->g)
- return -ENOMEM;
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in) {
- kfree(ft->g);
- ft->g = NULL;
- return -ENOMEM;
- }
-
- /* L4 Group */
- mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
- MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
- MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
- MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_INNER_TTC_GROUP1_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- /* L3 Group */
- MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_INNER_TTC_GROUP2_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- /* Any Group */
- memset(in, 0, inlen);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_INNER_TTC_GROUP3_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- kvfree(in);
- return 0;
-
-err:
- err = PTR_ERR(ft->g[ft->num_groups]);
- ft->g[ft->num_groups] = NULL;
- kvfree(in);
-
- return err;
-}
-
-void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv,
- struct ttc_params *ttc_params)
-{
- ttc_params->any_tt_tirn = priv->direct_tir[0].tirn;
- ttc_params->inner_ttc = &priv->fs.inner_ttc;
-}
-
-void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params)
+static void mlx5e_set_inner_ttc_params(struct mlx5e_priv *priv,
+ struct ttc_params *ttc_params)
{
struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+ int tt;
- ft_attr->max_fte = MLX5E_INNER_TTC_TABLE_SIZE;
+ memset(ttc_params, 0, sizeof(*ttc_params));
+ ttc_params->ns = mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL;
ft_attr->prio = MLX5E_NIC_PRIO;
+
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ ttc_params->dests[tt].tir_num =
+ tt == MLX5_TT_ANY ?
+ mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) :
+ mlx5e_rx_res_get_tirn_rss_inner(priv->rx_res,
+ tt);
+ }
}
-void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params)
+void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
+ struct ttc_params *ttc_params, bool tunnel)
{
struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+ int tt;
- ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
+ memset(ttc_params, 0, sizeof(*ttc_params));
+ ttc_params->ns = mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
ft_attr->level = MLX5E_TTC_FT_LEVEL;
ft_attr->prio = MLX5E_NIC_PRIO;
-}
-
-int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
- struct mlx5e_ttc_table *ttc)
-{
- struct mlx5e_flow_table *ft = &ttc->ft;
- int err;
- if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
- return 0;
-
- ft->t = mlx5_create_flow_table(priv->fs.ns, &params->ft_attr);
- if (IS_ERR(ft->t)) {
- err = PTR_ERR(ft->t);
- ft->t = NULL;
- return err;
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ ttc_params->dests[tt].tir_num =
+ tt == MLX5_TT_ANY ?
+ mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) :
+ mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt);
}
- err = mlx5e_create_inner_ttc_table_groups(ttc);
- if (err)
- goto err;
-
- err = mlx5e_generate_inner_ttc_table_rules(priv, params, ttc);
- if (err)
- goto err;
-
- return 0;
-
-err:
- mlx5e_destroy_flow_table(ft);
- return err;
-}
-
-void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv,
- struct mlx5e_ttc_table *ttc)
-{
- if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ ttc_params->inner_ttc = tunnel;
+ if (!tunnel || !mlx5_tunnel_inner_ft_supported(priv->mdev))
return;
- mlx5e_cleanup_ttc_rules(ttc);
- mlx5e_destroy_flow_table(&ttc->ft);
-}
-
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv,
- struct mlx5e_ttc_table *ttc)
-{
- mlx5e_cleanup_ttc_rules(ttc);
- mlx5e_destroy_flow_table(&ttc->ft);
-}
-
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
- struct mlx5e_ttc_table *ttc)
-{
- bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
- struct mlx5e_flow_table *ft = &ttc->ft;
- int err;
-
- ft->t = mlx5_create_flow_table(priv->fs.ns, &params->ft_attr);
- if (IS_ERR(ft->t)) {
- err = PTR_ERR(ft->t);
- ft->t = NULL;
- return err;
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ ttc_params->tunnel_dests[tt].type =
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ ttc_params->tunnel_dests[tt].ft =
+ mlx5_get_ttc_flow_table(priv->fs.inner_ttc);
}
-
- err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer);
- if (err)
- goto err;
-
- err = mlx5e_generate_ttc_table_rules(priv, params, ttc);
- if (err)
- goto err;
-
- return 0;
-err:
- mlx5e_destroy_flow_table(ft);
- return err;
-}
-
-int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type,
- struct mlx5_flow_destination *new_dest)
-{
- return mlx5_modify_rule_destination(priv->fs.ttc.rules[type].rule, new_dest, NULL);
-}
-
-struct mlx5_flow_destination
-mlx5e_ttc_get_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type)
-{
- struct mlx5_flow_destination *dest = &priv->fs.ttc.rules[type].default_dest;
-
- WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR,
- "TTC[%d] default dest is not setup yet", type);
-
- return *dest;
-}
-
-int mlx5e_ttc_fwd_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type)
-{
- struct mlx5_flow_destination dest = mlx5e_ttc_get_default_dest(priv, type);
-
- return mlx5e_ttc_fwd_dest(priv, type, &dest);
}
static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
@@ -1473,7 +939,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
outer_headers.dmac_47_16);
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = priv->fs.ttc.ft.t;
+ dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
switch (type) {
case MLX5E_FULLMATCH:
@@ -1769,10 +1235,47 @@ static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv)
kvfree(priv->fs.vlan);
}
-int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
+static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv)
+{
+ if (!mlx5_tunnel_inner_ft_supported(priv->mdev))
+ return;
+ mlx5_destroy_ttc_table(priv->fs.inner_ttc);
+}
+
+void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv)
+{
+ mlx5_destroy_ttc_table(priv->fs.ttc);
+}
+
+static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv)
{
struct ttc_params ttc_params = {};
- int tt, err;
+
+ if (!mlx5_tunnel_inner_ft_supported(priv->mdev))
+ return 0;
+
+ mlx5e_set_inner_ttc_params(priv, &ttc_params);
+ priv->fs.inner_ttc = mlx5_create_inner_ttc_table(priv->mdev,
+ &ttc_params);
+ if (IS_ERR(priv->fs.inner_ttc))
+ return PTR_ERR(priv->fs.inner_ttc);
+ return 0;
+}
+
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv)
+{
+ struct ttc_params ttc_params = {};
+
+ mlx5e_set_ttc_params(priv, &ttc_params, true);
+ priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+ if (IS_ERR(priv->fs.ttc))
+ return PTR_ERR(priv->fs.ttc);
+ return 0;
+}
+
+int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
+{
+ int err;
priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
MLX5_FLOW_NAMESPACE_KERNEL);
@@ -1787,23 +1290,15 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
}
- mlx5e_set_ttc_basic_params(priv, &ttc_params);
- mlx5e_set_inner_ttc_ft_params(&ttc_params);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn;
-
- err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc);
+ err = mlx5e_create_inner_ttc_table(priv);
if (err) {
- netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n",
+ netdev_err(priv->netdev,
+ "Failed to create inner ttc table, err=%d\n",
err);
goto err_destroy_arfs_tables;
}
- mlx5e_set_ttc_ft_params(&ttc_params);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn;
-
- err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+ err = mlx5e_create_ttc_table(priv);
if (err) {
netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
err);
@@ -1837,9 +1332,9 @@ err_destory_vlan_table:
err_destroy_l2_table:
mlx5e_destroy_l2_table(priv);
err_destroy_ttc_table:
- mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
+ mlx5e_destroy_ttc_table(priv);
err_destroy_inner_ttc_table:
- mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+ mlx5e_destroy_inner_ttc_table(priv);
err_destroy_arfs_tables:
mlx5e_arfs_destroy_tables(priv);
@@ -1851,8 +1346,8 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
mlx5e_ptp_free_rx_fs(priv);
mlx5e_destroy_vlan_table(priv);
mlx5e_destroy_l2_table(priv);
- mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
- mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+ mlx5e_destroy_ttc_table(priv);
+ mlx5e_destroy_inner_ttc_table(priv);
mlx5e_arfs_destroy_tables(priv);
mlx5e_ethtool_cleanup_steering(priv);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index b416a8ee2eed..03693fa74a70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -35,11 +35,19 @@
#include "en/params.h"
#include "en/xsk/pool.h"
+static int flow_type_to_traffic_type(u32 flow_type);
+
+static u32 flow_type_mask(u32 flow_type)
+{
+ return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS);
+}
+
struct mlx5e_ethtool_rule {
struct list_head list;
struct ethtool_rx_flow_spec flow_spec;
struct mlx5_flow_handle *rule;
struct mlx5e_ethtool_table *eth_ft;
+ struct mlx5e_rss *rss;
};
static void put_flow_table(struct mlx5e_ethtool_table *eth_ft)
@@ -66,7 +74,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
int table_size;
int prio;
- switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+ switch (flow_type_mask(fs->flow_type)) {
case TCP_V4_FLOW:
case UDP_V4_FLOW:
case TCP_V6_FLOW:
@@ -329,7 +337,7 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v,
outer_headers);
void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
outer_headers);
- u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
+ u32 flow_type = flow_type_mask(fs->flow_type);
switch (flow_type) {
case TCP_V4_FLOW:
@@ -397,10 +405,53 @@ static bool outer_header_zero(u32 *match_criteria)
size - 1);
}
+static int flow_get_tirn(struct mlx5e_priv *priv,
+ struct mlx5e_ethtool_rule *eth_rule,
+ struct ethtool_rx_flow_spec *fs,
+ u32 rss_context, u32 *tirn)
+{
+ if (fs->flow_type & FLOW_RSS) {
+ struct mlx5e_lro_param lro_param;
+ struct mlx5e_rss *rss;
+ u32 flow_type;
+ int err;
+ int tt;
+
+ rss = mlx5e_rx_res_rss_get(priv->rx_res, rss_context);
+ if (!rss)
+ return -ENOENT;
+
+ flow_type = flow_type_mask(fs->flow_type);
+ tt = flow_type_to_traffic_type(flow_type);
+ if (tt < 0)
+ return -EINVAL;
+
+ lro_param = mlx5e_get_lro_param(&priv->channels.params);
+ err = mlx5e_rss_obtain_tirn(rss, tt, &lro_param, false, tirn);
+ if (err)
+ return err;
+ eth_rule->rss = rss;
+ mlx5e_rss_refcnt_inc(eth_rule->rss);
+ } else {
+ struct mlx5e_params *params = &priv->channels.params;
+ enum mlx5e_rq_group group;
+ u16 ix;
+
+ mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group);
+
+ *tirn = group == MLX5E_RQ_GROUP_XSK ?
+ mlx5e_rx_res_get_tirn_xsk(priv->rx_res, ix) :
+ mlx5e_rx_res_get_tirn_direct(priv->rx_res, ix);
+ }
+
+ return 0;
+}
+
static struct mlx5_flow_handle *
add_ethtool_flow_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ethtool_rule *eth_rule,
struct mlx5_flow_table *ft,
- struct ethtool_rx_flow_spec *fs)
+ struct ethtool_rx_flow_spec *fs, u32 rss_context)
{
struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND };
struct mlx5_flow_destination *dst = NULL;
@@ -419,22 +470,17 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
} else {
- struct mlx5e_params *params = &priv->channels.params;
- enum mlx5e_rq_group group;
- struct mlx5e_tir *tir;
- u16 ix;
-
- mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group);
- tir = group == MLX5E_RQ_GROUP_XSK ? priv->xsk_tir : priv->direct_tir;
-
dst = kzalloc(sizeof(*dst), GFP_KERNEL);
if (!dst) {
err = -ENOMEM;
goto free;
}
+ err = flow_get_tirn(priv, eth_rule, fs, rss_context, &dst->tir_num);
+ if (err)
+ goto free;
+
dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dst->tir_num = tir[ix].tirn;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
}
@@ -458,6 +504,8 @@ static void del_ethtool_rule(struct mlx5e_priv *priv,
{
if (eth_rule->rule)
mlx5_del_flow_rules(eth_rule->rule);
+ if (eth_rule->rss)
+ mlx5e_rss_refcnt_dec(eth_rule->rss);
list_del(&eth_rule->list);
priv->fs.ethtool.tot_num_rules--;
put_flow_table(eth_rule->eth_ft);
@@ -618,7 +666,7 @@ static int validate_flow(struct mlx5e_priv *priv,
fs->ring_cookie))
return -EINVAL;
- switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+ switch (flow_type_mask(fs->flow_type)) {
case ETHER_FLOW:
num_tuples += validate_ethter(fs);
break;
@@ -667,7 +715,7 @@ static int validate_flow(struct mlx5e_priv *priv,
static int
mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
- struct ethtool_rx_flow_spec *fs)
+ struct ethtool_rx_flow_spec *fs, u32 rss_context)
{
struct mlx5e_ethtool_table *eth_ft;
struct mlx5e_ethtool_rule *eth_rule;
@@ -698,7 +746,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
err = -EINVAL;
goto del_ethtool_rule;
}
- rule = add_ethtool_flow_rule(priv, eth_ft->ft, fs);
+ rule = add_ethtool_flow_rule(priv, eth_rule, eth_ft->ft, fs, rss_context);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
goto del_ethtool_rule;
@@ -744,10 +792,20 @@ mlx5e_ethtool_get_flow(struct mlx5e_priv *priv,
return -EINVAL;
list_for_each_entry(eth_rule, &priv->fs.ethtool.rules, list) {
- if (eth_rule->flow_spec.location == location) {
- info->fs = eth_rule->flow_spec;
+ int index;
+
+ if (eth_rule->flow_spec.location != location)
+ continue;
+ if (!info)
return 0;
- }
+ info->fs = eth_rule->flow_spec;
+ if (!eth_rule->rss)
+ return 0;
+ index = mlx5e_rx_res_rss_index(priv->rx_res, eth_rule->rss);
+ if (index < 0)
+ return index;
+ info->rss_context = index;
+ return 0;
}
return -ENOENT;
@@ -763,7 +821,7 @@ mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv,
info->data = MAX_NUM_OF_ETHTOOL_RULES;
while ((!err || err == -ENOENT) && idx < info->rule_cnt) {
- err = mlx5e_ethtool_get_flow(priv, info, location);
+ err = mlx5e_ethtool_get_flow(priv, NULL, location);
if (!err)
rule_locs[idx++] = location;
location++;
@@ -785,45 +843,44 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv)
INIT_LIST_HEAD(&priv->fs.ethtool.rules);
}
-static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type)
+static int flow_type_to_traffic_type(u32 flow_type)
{
switch (flow_type) {
case TCP_V4_FLOW:
- return MLX5E_TT_IPV4_TCP;
+ return MLX5_TT_IPV4_TCP;
case TCP_V6_FLOW:
- return MLX5E_TT_IPV6_TCP;
+ return MLX5_TT_IPV6_TCP;
case UDP_V4_FLOW:
- return MLX5E_TT_IPV4_UDP;
+ return MLX5_TT_IPV4_UDP;
case UDP_V6_FLOW:
- return MLX5E_TT_IPV6_UDP;
+ return MLX5_TT_IPV6_UDP;
case AH_V4_FLOW:
- return MLX5E_TT_IPV4_IPSEC_AH;
+ return MLX5_TT_IPV4_IPSEC_AH;
case AH_V6_FLOW:
- return MLX5E_TT_IPV6_IPSEC_AH;
+ return MLX5_TT_IPV6_IPSEC_AH;
case ESP_V4_FLOW:
- return MLX5E_TT_IPV4_IPSEC_ESP;
+ return MLX5_TT_IPV4_IPSEC_ESP;
case ESP_V6_FLOW:
- return MLX5E_TT_IPV6_IPSEC_ESP;
+ return MLX5_TT_IPV6_IPSEC_ESP;
case IPV4_FLOW:
- return MLX5E_TT_IPV4;
+ return MLX5_TT_IPV4;
case IPV6_FLOW:
- return MLX5E_TT_IPV6;
+ return MLX5_TT_IPV6;
default:
- return MLX5E_NUM_INDIR_TIRS;
+ return -EINVAL;
}
}
static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
struct ethtool_rxnfc *nfc)
{
- int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
- enum mlx5e_traffic_types tt;
u8 rx_hash_field = 0;
- void *in;
+ int err;
+ int tt;
tt = flow_type_to_traffic_type(nfc->flow_type);
- if (tt == MLX5E_NUM_INDIR_TIRS)
- return -EINVAL;
+ if (tt < 0)
+ return tt;
/* RSS does not support anything other than hashing to queues
* on src IP, dest IP, TCP/UDP src port and TCP/UDP dest
@@ -848,35 +905,24 @@ static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
if (nfc->data & RXH_L4_B_2_3)
rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT;
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
mutex_lock(&priv->state_lock);
-
- if (rx_hash_field == priv->rss_params.rx_hash_fields[tt])
- goto out;
-
- priv->rss_params.rx_hash_fields[tt] = rx_hash_field;
- mlx5e_modify_tirs_hash(priv, in);
-
-out:
+ err = mlx5e_rx_res_rss_set_hash_fields(priv->rx_res, tt, rx_hash_field);
mutex_unlock(&priv->state_lock);
- kvfree(in);
- return 0;
+
+ return err;
}
static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv,
struct ethtool_rxnfc *nfc)
{
- enum mlx5e_traffic_types tt;
u32 hash_field = 0;
+ int tt;
tt = flow_type_to_traffic_type(nfc->flow_type);
- if (tt == MLX5E_NUM_INDIR_TIRS)
- return -EINVAL;
+ if (tt < 0)
+ return tt;
- hash_field = priv->rss_params.rx_hash_fields[tt];
+ hash_field = mlx5e_rx_res_rss_get_hash_fields(priv->rx_res, tt);
nfc->data = 0;
if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP)
@@ -898,7 +944,7 @@ int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
switch (cmd->cmd) {
case ETHTOOL_SRXCLSRLINS:
- err = mlx5e_ethtool_flow_replace(priv, &cmd->fs);
+ err = mlx5e_ethtool_flow_replace(priv, &cmd->fs, cmd->rss_context);
break;
case ETHTOOL_SRXCLSRLDEL:
err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 37c440837945..47efd858964d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1535,15 +1535,9 @@ static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv,
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5_core_cq *mcq = &cq->mcq;
- int eqn_not_used;
- unsigned int irqn;
int err;
u32 i;
- err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn);
- if (err)
- return err;
-
err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
&cq->wq_ctrl);
if (err)
@@ -1557,7 +1551,6 @@ static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv,
mcq->vector = param->eq_ix;
mcq->comp = mlx5e_completion_event;
mcq->event = mlx5e_cq_error_event;
- mcq->irqn = irqn;
for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
@@ -1605,11 +1598,10 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
void *in;
void *cqc;
int inlen;
- unsigned int irqn_not_used;
int eqn;
int err;
- err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
+ err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn);
if (err)
return err;
@@ -1627,7 +1619,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
(__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode);
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
@@ -1719,7 +1711,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c,
{
int err, tc;
- for (tc = 0; tc < params->num_tc; tc++) {
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(params); tc++) {
int txq_ix = c->ix + tc * params->num_channels;
err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
@@ -1891,30 +1883,30 @@ static int mlx5e_open_queues(struct mlx5e_channel *c,
if (err)
goto err_close_icosq;
+ err = mlx5e_open_rxq_rq(c, params, &cparam->rq);
+ if (err)
+ goto err_close_sqs;
+
if (c->xdp) {
err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL,
&c->rq_xdpsq, false);
if (err)
- goto err_close_sqs;
+ goto err_close_rq;
}
- err = mlx5e_open_rxq_rq(c, params, &cparam->rq);
- if (err)
- goto err_close_xdp_sq;
-
err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true);
if (err)
- goto err_close_rq;
+ goto err_close_xdp_sq;
return 0;
-err_close_rq:
- mlx5e_close_rq(&c->rq);
-
err_close_xdp_sq:
if (c->xdp)
mlx5e_close_xdpsq(&c->rq_xdpsq);
+err_close_rq:
+ mlx5e_close_rq(&c->rq);
+
err_close_sqs:
mlx5e_close_sqs(c);
@@ -1949,9 +1941,9 @@ err_close_async_icosq_cq:
static void mlx5e_close_queues(struct mlx5e_channel *c)
{
mlx5e_close_xdpsq(&c->xdpsq);
- mlx5e_close_rq(&c->rq);
if (c->xdp)
mlx5e_close_xdpsq(&c->rq_xdpsq);
+ mlx5e_close_rq(&c->rq);
mlx5e_close_sqs(c);
mlx5e_close_icosq(&c->icosq);
mlx5e_close_icosq(&c->async_icosq);
@@ -1983,9 +1975,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_channel *c;
unsigned int irq;
int err;
- int eqn;
- err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
+ err = mlx5_vector2irqn(priv->mdev, ix, &irq);
if (err)
return err;
@@ -2001,7 +1992,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
c->pdev = mlx5_core_dma_dev(priv->mdev);
c->netdev = priv->netdev;
c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
- c->num_tc = params->num_tc;
+ c->num_tc = mlx5e_get_dcb_num_tc(params);
c->xdp = !!params->xdp_prog;
c->stats = &priv->channel_stats[ix].ch;
c->aff_mask = irq_get_effective_affinity_mask(irq);
@@ -2194,400 +2185,14 @@ void mlx5e_close_channels(struct mlx5e_channels *chs)
chs->num = 0;
}
-static int
-mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, struct mlx5e_rqt *rqt)
-{
- struct mlx5_core_dev *mdev = priv->mdev;
- void *rqtc;
- int inlen;
- int err;
- u32 *in;
- int i;
-
- inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
-
- MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
- MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
-
- for (i = 0; i < sz; i++)
- MLX5_SET(rqtc, rqtc, rq_num[i], priv->drop_rq.rqn);
-
- err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn);
- if (!err)
- rqt->enabled = true;
-
- kvfree(in);
- return err;
-}
-
-void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt)
-{
- rqt->enabled = false;
- mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn);
-}
-
-int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv)
-{
- struct mlx5e_rqt *rqt = &priv->indir_rqt;
- int err;
-
- err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, rqt);
- if (err)
- mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err);
- return err;
-}
-
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n)
-{
- int err;
- int ix;
-
- for (ix = 0; ix < n; ix++) {
- err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt);
- if (unlikely(err))
- goto err_destroy_rqts;
- }
-
- return 0;
-
-err_destroy_rqts:
- mlx5_core_warn(priv->mdev, "create rqts failed, %d\n", err);
- for (ix--; ix >= 0; ix--)
- mlx5e_destroy_rqt(priv, &tirs[ix].rqt);
-
- return err;
-}
-
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n)
-{
- int i;
-
- for (i = 0; i < n; i++)
- mlx5e_destroy_rqt(priv, &tirs[i].rqt);
-}
-
-static int mlx5e_rx_hash_fn(int hfunc)
-{
- return (hfunc == ETH_RSS_HASH_TOP) ?
- MLX5_RX_HASH_FN_TOEPLITZ :
- MLX5_RX_HASH_FN_INVERTED_XOR8;
-}
-
-int mlx5e_bits_invert(unsigned long a, int size)
-{
- int inv = 0;
- int i;
-
- for (i = 0; i < size; i++)
- inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i;
-
- return inv;
-}
-
-static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz,
- struct mlx5e_redirect_rqt_param rrp, void *rqtc)
-{
- int i;
-
- for (i = 0; i < sz; i++) {
- u32 rqn;
-
- if (rrp.is_rss) {
- int ix = i;
-
- if (rrp.rss.hfunc == ETH_RSS_HASH_XOR)
- ix = mlx5e_bits_invert(i, ilog2(sz));
-
- ix = priv->rss_params.indirection_rqt[ix];
- rqn = rrp.rss.channels->c[ix]->rq.rqn;
- } else {
- rqn = rrp.rqn;
- }
- MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
- }
-}
-
-int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
- struct mlx5e_redirect_rqt_param rrp)
-{
- struct mlx5_core_dev *mdev = priv->mdev;
- void *rqtc;
- int inlen;
- u32 *in;
- int err;
-
- inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz;
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
-
- MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
- MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1);
- mlx5e_fill_rqt_rqns(priv, sz, rrp, rqtc);
- err = mlx5_core_modify_rqt(mdev, rqtn, in, inlen);
-
- kvfree(in);
- return err;
-}
-
-static u32 mlx5e_get_direct_rqn(struct mlx5e_priv *priv, int ix,
- struct mlx5e_redirect_rqt_param rrp)
-{
- if (!rrp.is_rss)
- return rrp.rqn;
-
- if (ix >= rrp.rss.channels->num)
- return priv->drop_rq.rqn;
-
- return rrp.rss.channels->c[ix]->rq.rqn;
-}
-
-static void mlx5e_redirect_rqts(struct mlx5e_priv *priv,
- struct mlx5e_redirect_rqt_param rrp,
- struct mlx5e_redirect_rqt_param *ptp_rrp)
-{
- u32 rqtn;
- int ix;
-
- if (priv->indir_rqt.enabled) {
- /* RSS RQ table */
- rqtn = priv->indir_rqt.rqtn;
- mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp);
- }
-
- for (ix = 0; ix < priv->max_nch; ix++) {
- struct mlx5e_redirect_rqt_param direct_rrp = {
- .is_rss = false,
- {
- .rqn = mlx5e_get_direct_rqn(priv, ix, rrp)
- },
- };
-
- /* Direct RQ Tables */
- if (!priv->direct_tir[ix].rqt.enabled)
- continue;
-
- rqtn = priv->direct_tir[ix].rqt.rqtn;
- mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
- }
- if (ptp_rrp) {
- rqtn = priv->ptp_tir.rqt.rqtn;
- mlx5e_redirect_rqt(priv, rqtn, 1, *ptp_rrp);
- }
-}
-
-static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv,
- struct mlx5e_channels *chs)
-{
- bool rx_ptp_support = priv->profile->rx_ptp_support;
- struct mlx5e_redirect_rqt_param *ptp_rrp_p = NULL;
- struct mlx5e_redirect_rqt_param rrp = {
- .is_rss = true,
- {
- .rss = {
- .channels = chs,
- .hfunc = priv->rss_params.hfunc,
- }
- },
- };
- struct mlx5e_redirect_rqt_param ptp_rrp;
-
- if (rx_ptp_support) {
- u32 ptp_rqn;
-
- ptp_rrp.is_rss = false;
- ptp_rrp.rqn = mlx5e_ptp_get_rqn(priv->channels.ptp, &ptp_rqn) ?
- priv->drop_rq.rqn : ptp_rqn;
- ptp_rrp_p = &ptp_rrp;
- }
- mlx5e_redirect_rqts(priv, rrp, ptp_rrp_p);
-}
-
-static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv)
-{
- bool rx_ptp_support = priv->profile->rx_ptp_support;
- struct mlx5e_redirect_rqt_param drop_rrp = {
- .is_rss = false,
- {
- .rqn = priv->drop_rq.rqn,
- },
- };
-
- mlx5e_redirect_rqts(priv, drop_rrp, rx_ptp_support ? &drop_rrp : NULL);
-}
-
-static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = {
- [MLX5E_TT_IPV4_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
- .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
- .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
- },
- [MLX5E_TT_IPV6_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
- .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
- .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
- },
- [MLX5E_TT_IPV4_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
- .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
- .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
- },
- [MLX5E_TT_IPV6_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
- .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
- .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
- },
- [MLX5E_TT_IPV4_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
- .l4_prot_type = 0,
- .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
- },
- [MLX5E_TT_IPV6_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
- .l4_prot_type = 0,
- .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
- },
- [MLX5E_TT_IPV4_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
- .l4_prot_type = 0,
- .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
- },
- [MLX5E_TT_IPV6_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
- .l4_prot_type = 0,
- .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
- },
- [MLX5E_TT_IPV4] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
- .l4_prot_type = 0,
- .rx_hash_fields = MLX5_HASH_IP,
- },
- [MLX5E_TT_IPV6] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
- .l4_prot_type = 0,
- .rx_hash_fields = MLX5_HASH_IP,
- },
-};
-
-struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt)
-{
- return tirc_default_config[tt];
-}
-
-static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc)
-{
- if (!params->lro_en)
- return;
-
-#define ROUGH_MAX_L2_L3_HDR_SZ 256
-
- MLX5_SET(tirc, tirc, lro_enable_mask,
- MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
- MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
- MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
- (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
- MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout);
-}
-
-void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
- const struct mlx5e_tirc_config *ttconfig,
- void *tirc, bool inner)
-{
- void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) :
- MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
-
- MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(rss_params->hfunc));
- if (rss_params->hfunc == ETH_RSS_HASH_TOP) {
- void *rss_key = MLX5_ADDR_OF(tirc, tirc,
- rx_hash_toeplitz_key);
- size_t len = MLX5_FLD_SZ_BYTES(tirc,
- rx_hash_toeplitz_key);
-
- MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
- memcpy(rss_key, rss_params->toeplitz_hash_key, len);
- }
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- ttconfig->l3_prot_type);
- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
- ttconfig->l4_prot_type);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- ttconfig->rx_hash_fields);
-}
-
-static void mlx5e_update_rx_hash_fields(struct mlx5e_tirc_config *ttconfig,
- enum mlx5e_traffic_types tt,
- u32 rx_hash_fields)
-{
- *ttconfig = tirc_default_config[tt];
- ttconfig->rx_hash_fields = rx_hash_fields;
-}
-
-void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in)
-{
- void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
- struct mlx5e_rss_params *rss = &priv->rss_params;
- struct mlx5_core_dev *mdev = priv->mdev;
- int ctxlen = MLX5_ST_SZ_BYTES(tirc);
- struct mlx5e_tirc_config ttconfig;
- int tt;
-
- MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- memset(tirc, 0, ctxlen);
- mlx5e_update_rx_hash_fields(&ttconfig, tt,
- rss->rx_hash_fields[tt]);
- mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, false);
- mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in);
- }
-
- /* Verify inner tirs resources allocated */
- if (!priv->inner_indir_tir[0].tirn)
- return;
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- memset(tirc, 0, ctxlen);
- mlx5e_update_rx_hash_fields(&ttconfig, tt,
- rss->rx_hash_fields[tt]);
- mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, true);
- mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in);
- }
-}
-
static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
{
- struct mlx5_core_dev *mdev = priv->mdev;
-
- void *in;
- void *tirc;
- int inlen;
- int err;
- int tt;
- int ix;
-
- inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
+ struct mlx5e_rx_res *res = priv->rx_res;
+ struct mlx5e_lro_param lro_param;
- MLX5_SET(modify_tir_in, in, bitmask.lro, 1);
- tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
+ lro_param = mlx5e_get_lro_param(&priv->channels.params);
- mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in);
- if (err)
- goto free_in;
- }
-
- for (ix = 0; ix < priv->max_nch; ix++) {
- err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn, in);
- if (err)
- goto free_in;
- }
-
-free_in:
- kvfree(in);
-
- return err;
+ return mlx5e_rx_res_lro_set_param(res, &lro_param);
}
static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro);
@@ -2658,22 +2263,34 @@ void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv)
ETH_MAX_MTU);
}
-static void mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc)
+static int mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc,
+ struct tc_mqprio_qopt_offload *mqprio)
{
- int tc;
+ int tc, err;
netdev_reset_tc(netdev);
if (ntc == 1)
- return;
+ return 0;
- netdev_set_num_tc(netdev, ntc);
+ err = netdev_set_num_tc(netdev, ntc);
+ if (err) {
+ netdev_WARN(netdev, "netdev_set_num_tc failed (%d), ntc = %d\n", err, ntc);
+ return err;
+ }
- /* Map netdev TCs to offset 0
- * We have our own UP to TXQ mapping for QoS
- */
- for (tc = 0; tc < ntc; tc++)
- netdev_set_tc_queue(netdev, tc, nch, 0);
+ for (tc = 0; tc < ntc; tc++) {
+ u16 count, offset;
+
+ /* For DCB mode, map netdev TCs to offset 0
+ * We have our own UP to TXQ mapping for QoS
+ */
+ count = mqprio ? mqprio->qopt.count[tc] : nch;
+ offset = mqprio ? mqprio->qopt.offset[tc] : 0;
+ netdev_set_tc_queue(netdev, tc, count, offset);
+ }
+
+ return 0;
}
int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv)
@@ -2683,7 +2300,7 @@ int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv)
qos_queues = mlx5e_qos_cur_leaf_nodes(priv);
nch = priv->channels.params.num_channels;
- ntc = priv->channels.params.num_tc;
+ ntc = mlx5e_get_dcb_num_tc(&priv->channels.params);
num_txqs = nch * ntc + qos_queues;
if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS))
num_txqs += ntc;
@@ -2707,11 +2324,12 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
old_ntc = netdev->num_tc ? : 1;
nch = priv->channels.params.num_channels;
- ntc = priv->channels.params.num_tc;
+ ntc = mlx5e_get_dcb_num_tc(&priv->channels.params);
num_rxqs = nch * priv->profile->rq_groups;
- mlx5e_netdev_set_tcs(netdev, nch, ntc);
-
+ err = mlx5e_netdev_set_tcs(netdev, nch, ntc, NULL);
+ if (err)
+ goto err_out;
err = mlx5e_update_tx_netdev_queues(priv);
if (err)
goto err_tcs;
@@ -2732,7 +2350,8 @@ err_txqs:
WARN_ON_ONCE(netif_set_real_num_tx_queues(netdev, old_num_txqs));
err_tcs:
- mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc);
+ mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc, NULL);
+err_out:
return err;
}
@@ -2768,9 +2387,9 @@ int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params);
- if (!netif_is_rxfh_configured(priv->netdev))
- mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
- MLX5E_INDIR_RQT_SIZE, count);
+ /* This function may be called on attach, before priv->rx_res is created. */
+ if (!netif_is_rxfh_configured(priv->netdev) && priv->rx_res)
+ mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count);
return 0;
}
@@ -2782,7 +2401,7 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
int i, ch, tc, num_tc;
ch = priv->channels.num;
- num_tc = priv->channels.params.num_tc;
+ num_tc = mlx5e_get_dcb_num_tc(&priv->channels.params);
for (i = 0; i < ch; i++) {
for (tc = 0; tc < num_tc; tc++) {
@@ -2813,7 +2432,7 @@ static void mlx5e_update_num_tc_x_num_ch(struct mlx5e_priv *priv)
{
/* Sync with mlx5e_select_queue. */
WRITE_ONCE(priv->num_tc_x_num_ch,
- priv->channels.params.num_tc * priv->channels.num);
+ mlx5e_get_dcb_num_tc(&priv->channels.params) * priv->channels.num);
}
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
@@ -2829,16 +2448,15 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
mlx5e_add_sqs_fwd_rules(priv);
mlx5e_wait_channels_min_rx_wqes(&priv->channels);
- mlx5e_redirect_rqts_to_channels(priv, &priv->channels);
- mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels);
+ if (priv->rx_res)
+ mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels);
}
void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
{
- mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels);
-
- mlx5e_redirect_rqts_to_drop(priv);
+ if (priv->rx_res)
+ mlx5e_rx_res_channels_deactivate(priv->rx_res);
if (mlx5e_is_vport_rep(priv))
mlx5e_remove_sqs_fwd_rules(priv);
@@ -3213,224 +2831,152 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
mlx5e_destroy_tises(priv);
}
-static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv,
- u32 rqtn, u32 *tirc)
-{
- MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.hw_objs.td.tdn);
- MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
- MLX5_SET(tirc, tirc, indirect_table, rqtn);
- MLX5_SET(tirc, tirc, tunneled_offload_en,
- priv->channels.params.tunneled_offload_en);
-
- mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
-}
-
-static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv,
- enum mlx5e_traffic_types tt,
- u32 *tirc)
+static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
{
- mlx5e_build_indir_tir_ctx_common(priv, priv->indir_rqt.rqtn, tirc);
- mlx5e_build_indir_tir_ctx_hash(&priv->rss_params,
- &tirc_default_config[tt], tirc, false);
-}
+ int err = 0;
+ int i;
-static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc)
-{
- mlx5e_build_indir_tir_ctx_common(priv, rqtn, tirc);
- MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
-}
+ for (i = 0; i < chs->num; i++) {
+ err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable);
+ if (err)
+ return err;
+ }
-static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv,
- enum mlx5e_traffic_types tt,
- u32 *tirc)
-{
- mlx5e_build_indir_tir_ctx_common(priv, priv->indir_rqt.rqtn, tirc);
- mlx5e_build_indir_tir_ctx_hash(&priv->rss_params,
- &tirc_default_config[tt], tirc, true);
+ return 0;
}
-int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
+static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
{
- struct mlx5e_tir *tir;
- void *tirc;
- int inlen;
- int i = 0;
int err;
- u32 *in;
- int tt;
-
- inlen = MLX5_ST_SZ_BYTES(create_tir_in);
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- memset(in, 0, inlen);
- tir = &priv->indir_tir[tt];
- tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
- mlx5e_build_indir_tir_ctx(priv, tt, tirc);
- err = mlx5e_create_tir(priv->mdev, tir, in);
- if (err) {
- mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err);
- goto err_destroy_inner_tirs;
- }
- }
-
- if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
- goto out;
+ int i;
- for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) {
- memset(in, 0, inlen);
- tir = &priv->inner_indir_tir[i];
- tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
- mlx5e_build_inner_indir_tir_ctx(priv, i, tirc);
- err = mlx5e_create_tir(priv->mdev, tir, in);
- if (err) {
- mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err);
- goto err_destroy_inner_tirs;
- }
+ for (i = 0; i < chs->num; i++) {
+ err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd);
+ if (err)
+ return err;
}
-
-out:
- kvfree(in);
+ if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state))
+ return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd);
return 0;
-
-err_destroy_inner_tirs:
- for (i--; i >= 0; i--)
- mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
-
- for (tt--; tt >= 0; tt--)
- mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]);
-
- kvfree(in);
-
- return err;
}
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n)
+static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
+ struct tc_mqprio_qopt *mqprio)
{
- struct mlx5e_tir *tir;
- void *tirc;
- int inlen;
- int err = 0;
- u32 *in;
- int ix;
-
- inlen = MLX5_ST_SZ_BYTES(create_tir_in);
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
+ struct mlx5e_params new_params;
+ u8 tc = mqprio->num_tc;
+ int err;
- for (ix = 0; ix < n; ix++) {
- memset(in, 0, inlen);
- tir = &tirs[ix];
- tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
- mlx5e_build_direct_tir_ctx(priv, tir->rqt.rqtn, tirc);
- err = mlx5e_create_tir(priv->mdev, tir, in);
- if (unlikely(err))
- goto err_destroy_ch_tirs;
- }
+ mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
- goto out;
+ if (tc && tc != MLX5E_MAX_NUM_TC)
+ return -EINVAL;
-err_destroy_ch_tirs:
- mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err);
- for (ix--; ix >= 0; ix--)
- mlx5e_destroy_tir(priv->mdev, &tirs[ix]);
+ new_params = priv->channels.params;
+ new_params.mqprio.mode = TC_MQPRIO_MODE_DCB;
+ new_params.mqprio.num_tc = tc ? tc : 1;
-out:
- kvfree(in);
+ err = mlx5e_safe_switch_params(priv, &new_params,
+ mlx5e_num_channels_changed_ctx, NULL, true);
+ priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
+ mlx5e_get_dcb_num_tc(&priv->channels.params));
return err;
}
-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv)
+static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv,
+ struct tc_mqprio_qopt_offload *mqprio)
{
+ struct net_device *netdev = priv->netdev;
+ int agg_count = 0;
int i;
- for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
- mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]);
-
- /* Verify inner tirs resources allocated */
- if (!priv->inner_indir_tir[0].tirn)
- return;
-
- for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
- mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
-}
-
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n)
-{
- int i;
+ if (mqprio->qopt.offset[0] != 0 || mqprio->qopt.num_tc < 1 ||
+ mqprio->qopt.num_tc > MLX5E_MAX_NUM_MQPRIO_CH_TC)
+ return -EINVAL;
- for (i = 0; i < n; i++)
- mlx5e_destroy_tir(priv->mdev, &tirs[i]);
-}
+ for (i = 0; i < mqprio->qopt.num_tc; i++) {
+ if (!mqprio->qopt.count[i]) {
+ netdev_err(netdev, "Zero size for queue-group (%d) is not supported\n", i);
+ return -EINVAL;
+ }
+ if (mqprio->min_rate[i]) {
+ netdev_err(netdev, "Min tx rate is not supported\n");
+ return -EINVAL;
+ }
+ if (mqprio->max_rate[i]) {
+ netdev_err(netdev, "Max tx rate is not supported\n");
+ return -EINVAL;
+ }
-static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
-{
- int err = 0;
- int i;
+ if (mqprio->qopt.offset[i] != agg_count) {
+ netdev_err(netdev, "Discontinuous queues config is not supported\n");
+ return -EINVAL;
+ }
+ agg_count += mqprio->qopt.count[i];
+ }
- for (i = 0; i < chs->num; i++) {
- err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable);
- if (err)
- return err;
+ if (priv->channels.params.num_channels < agg_count) {
+ netdev_err(netdev, "Num of queues (%d) exceeds available (%d)\n",
+ agg_count, priv->channels.params.num_channels);
+ return -EINVAL;
}
return 0;
}
-static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
+static int mlx5e_mqprio_channel_set_tcs_ctx(struct mlx5e_priv *priv, void *ctx)
{
- int err;
- int i;
+ struct tc_mqprio_qopt_offload *mqprio = (struct tc_mqprio_qopt_offload *)ctx;
+ struct net_device *netdev = priv->netdev;
+ u8 num_tc;
- for (i = 0; i < chs->num; i++) {
- err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd);
- if (err)
- return err;
- }
- if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state))
- return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd);
+ if (priv->channels.params.mqprio.mode != TC_MQPRIO_MODE_CHANNEL)
+ return -EINVAL;
+
+ num_tc = priv->channels.params.mqprio.num_tc;
+ mlx5e_netdev_set_tcs(netdev, 0, num_tc, mqprio);
return 0;
}
-static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
- struct tc_mqprio_qopt *mqprio)
+static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv,
+ struct tc_mqprio_qopt_offload *mqprio)
{
struct mlx5e_params new_params;
- u8 tc = mqprio->num_tc;
- int err = 0;
+ int err;
- mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+ err = mlx5e_mqprio_channel_validate(priv, mqprio);
+ if (err)
+ return err;
- if (tc && tc != MLX5E_MAX_NUM_TC)
- return -EINVAL;
+ new_params = priv->channels.params;
+ new_params.mqprio.mode = TC_MQPRIO_MODE_CHANNEL;
+ new_params.mqprio.num_tc = mqprio->qopt.num_tc;
+ err = mlx5e_safe_switch_params(priv, &new_params,
+ mlx5e_mqprio_channel_set_tcs_ctx, mqprio, true);
- mutex_lock(&priv->state_lock);
+ return err;
+}
+static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
+ struct tc_mqprio_qopt_offload *mqprio)
+{
/* MQPRIO is another toplevel qdisc that can't be attached
* simultaneously with the offloaded HTB.
*/
- if (WARN_ON(priv->htb.maj_id)) {
- err = -EINVAL;
- goto out;
- }
-
- new_params = priv->channels.params;
- new_params.num_tc = tc ? tc : 1;
-
- err = mlx5e_safe_switch_params(priv, &new_params,
- mlx5e_num_channels_changed_ctx, NULL, true);
+ if (WARN_ON(priv->htb.maj_id))
+ return -EINVAL;
-out:
- priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
- priv->channels.params.num_tc);
- mutex_unlock(&priv->state_lock);
- return err;
+ switch (mqprio->mode) {
+ case TC_MQPRIO_MODE_DCB:
+ return mlx5e_setup_tc_mqprio_dcb(priv, &mqprio->qopt);
+ case TC_MQPRIO_MODE_CHANNEL:
+ return mlx5e_setup_tc_mqprio_channel(priv, mqprio);
+ default:
+ return -EOPNOTSUPP;
+ }
}
static int mlx5e_setup_tc_htb(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb)
@@ -3454,8 +3000,7 @@ static int mlx5e_setup_tc_htb(struct mlx5e_priv *priv, struct tc_htb_qopt_offloa
return mlx5e_htb_leaf_to_inner(priv, htb->parent_classid, htb->classid,
htb->rate, htb->ceil, htb->extack);
case TC_HTB_LEAF_DEL:
- return mlx5e_htb_leaf_del(priv, htb->classid, &htb->moved_qid, &htb->qid,
- htb->extack);
+ return mlx5e_htb_leaf_del(priv, &htb->classid, htb->extack);
case TC_HTB_LEAF_DEL_LAST:
case TC_HTB_LEAF_DEL_LAST_FORCE:
return mlx5e_htb_leaf_del_last(priv, htb->classid,
@@ -3502,7 +3047,10 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
priv, priv, true);
}
case TC_SETUP_QDISC_MQPRIO:
- return mlx5e_setup_tc_mqprio(priv, type_data);
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_setup_tc_mqprio(priv, type_data);
+ mutex_unlock(&priv->state_lock);
+ return err;
case TC_SETUP_QDISC_HTB:
mutex_lock(&priv->state_lock);
err = mlx5e_setup_tc_htb(priv, type_data);
@@ -4591,7 +4139,7 @@ const struct net_device_ops mlx5e_netdev_ops = {
.ndo_set_features = mlx5e_set_features,
.ndo_fix_features = mlx5e_fix_features,
.ndo_change_mtu = mlx5e_change_nic_mtu,
- .ndo_do_ioctl = mlx5e_ioctl,
+ .ndo_eth_ioctl = mlx5e_ioctl,
.ndo_set_tx_maxrate = mlx5e_set_tx_maxrate,
.ndo_features_check = mlx5e_features_check,
.ndo_tx_timeout = mlx5e_tx_timeout,
@@ -4620,15 +4168,6 @@ const struct net_device_ops mlx5e_netdev_ops = {
.ndo_get_devlink_port = mlx5e_get_devlink_port,
};
-void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
- int num_channels)
-{
- int i;
-
- for (i = 0; i < len; i++)
- indirection_rqt[i] = i % num_channels;
-}
-
static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
{
int i;
@@ -4641,24 +4180,8 @@ static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeo
return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
}
-void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
- u16 num_channels)
-{
- enum mlx5e_traffic_types tt;
-
- rss_params->hfunc = ETH_RSS_HASH_TOP;
- netdev_rss_key_fill(rss_params->toeplitz_hash_key,
- sizeof(rss_params->toeplitz_hash_key));
- mlx5e_build_default_indir_rqt(rss_params->indirection_rqt,
- MLX5E_INDIR_RQT_SIZE, num_channels);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- rss_params->rx_hash_fields[tt] =
- tirc_default_config[tt].rx_hash_fields;
-}
-
void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu)
{
- struct mlx5e_rss_params *rss_params = &priv->rss_params;
struct mlx5e_params *params = &priv->channels.params;
struct mlx5_core_dev *mdev = priv->mdev;
u8 rx_cq_period_mode;
@@ -4669,12 +4192,12 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
params->hard_mtu = MLX5E_ETH_HARD_MTU;
params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2,
priv->max_nch);
- params->num_tc = 1;
+ params->mqprio.num_tc = 1;
/* Set an initial non-zero value, so that mlx5e_select_queue won't
* divide by zero if called before first activating channels.
*/
- priv->num_tc_x_num_ch = params->num_channels * params->num_tc;
+ priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc;
/* SQ */
params->log_sq_size = is_kdump_kernel() ?
@@ -4718,10 +4241,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
/* TX inline */
mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
- /* RSS */
- mlx5e_build_rss_params(rss_params, params->num_channels);
- params->tunneled_offload_en =
- mlx5e_tunnel_inner_ft_supported(mdev);
+ params->tunneled_offload_en = mlx5_tunnel_inner_ft_supported(mdev);
/* AF_XDP */
params->xsk = xsk;
@@ -4781,8 +4301,8 @@ static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev)
{
int tt;
- for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
- if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5e_get_proto_by_tunnel_type(tt)))
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt)))
return true;
}
return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev));
@@ -4821,7 +4341,14 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX;
netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX;
+ /* Tunneled LRO is not supported in the driver, and the same RQs are
+ * shared between inner and outer TIRs, so the driver can't disable LRO
+ * for inner TIRs while having it enabled for outer TIRs. Due to this,
+ * block LRO altogether if the firmware declares tunneled LRO support.
+ */
if (!!MLX5_CAP_ETH(mdev, lro_cap) &&
+ !MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) &&
+ !MLX5_CAP_ETH(mdev, tunnel_lro_gre) &&
mlx5e_check_fragmented_striding_rq_cap(mdev))
netdev->vlan_features |= NETIF_F_LRO;
@@ -4948,7 +4475,6 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- struct devlink_port *dl_port;
int err;
mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu);
@@ -4964,19 +4490,13 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
if (err)
mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
- dl_port = mlx5e_devlink_get_dl_port(priv);
- if (dl_port->registered)
- mlx5e_health_create_reporters(priv);
-
+ mlx5e_health_create_reporters(priv);
return 0;
}
static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
{
- struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
-
- if (dl_port->registered)
- mlx5e_health_destroy_reporters(priv);
+ mlx5e_health_destroy_reporters(priv);
mlx5e_tls_cleanup(priv);
mlx5e_ipsec_cleanup(priv);
}
@@ -4984,9 +4504,14 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
- u16 max_nch = priv->max_nch;
+ enum mlx5e_rx_res_features features;
+ struct mlx5e_lro_param lro_param;
int err;
+ priv->rx_res = mlx5e_rx_res_alloc();
+ if (!priv->rx_res)
+ return -ENOMEM;
+
mlx5e_create_q_counters(priv);
err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
@@ -4995,42 +4520,20 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
goto err_destroy_q_counters;
}
- err = mlx5e_create_indirect_rqt(priv);
+ features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP;
+ if (priv->channels.params.tunneled_offload_en)
+ features |= MLX5E_RX_RES_FEATURE_INNER_FT;
+ lro_param = mlx5e_get_lro_param(&priv->channels.params);
+ err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
+ priv->max_nch, priv->drop_rq.rqn, &lro_param,
+ priv->channels.params.num_channels);
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch);
- if (err)
- goto err_destroy_indirect_rqts;
-
- err = mlx5e_create_indirect_tirs(priv, true);
- if (err)
- goto err_destroy_direct_rqts;
-
- err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch);
- if (err)
- goto err_destroy_indirect_tirs;
-
- err = mlx5e_create_direct_rqts(priv, priv->xsk_tir, max_nch);
- if (unlikely(err))
- goto err_destroy_direct_tirs;
-
- err = mlx5e_create_direct_tirs(priv, priv->xsk_tir, max_nch);
- if (unlikely(err))
- goto err_destroy_xsk_rqts;
-
- err = mlx5e_create_direct_rqts(priv, &priv->ptp_tir, 1);
- if (err)
- goto err_destroy_xsk_tirs;
-
- err = mlx5e_create_direct_tirs(priv, &priv->ptp_tir, 1);
- if (err)
- goto err_destroy_ptp_rqt;
-
err = mlx5e_create_flow_steering(priv);
if (err) {
mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
- goto err_destroy_ptp_direct_tir;
+ goto err_destroy_rx_res;
}
err = mlx5e_tc_nic_init(priv);
@@ -5051,46 +4554,27 @@ err_tc_nic_cleanup:
mlx5e_tc_nic_cleanup(priv);
err_destroy_flow_steering:
mlx5e_destroy_flow_steering(priv);
-err_destroy_ptp_direct_tir:
- mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1);
-err_destroy_ptp_rqt:
- mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1);
-err_destroy_xsk_tirs:
- mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch);
-err_destroy_xsk_rqts:
- mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch);
-err_destroy_direct_tirs:
- mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
-err_destroy_indirect_tirs:
- mlx5e_destroy_indirect_tirs(priv);
-err_destroy_direct_rqts:
- mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
-err_destroy_indirect_rqts:
- mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+err_destroy_rx_res:
+ mlx5e_rx_res_destroy(priv->rx_res);
err_close_drop_rq:
mlx5e_close_drop_rq(&priv->drop_rq);
err_destroy_q_counters:
mlx5e_destroy_q_counters(priv);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
return err;
}
static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
{
- u16 max_nch = priv->max_nch;
-
mlx5e_accel_cleanup_rx(priv);
mlx5e_tc_nic_cleanup(priv);
mlx5e_destroy_flow_steering(priv);
- mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1);
- mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1);
- mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch);
- mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch);
- mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
- mlx5e_destroy_indirect_tirs(priv);
- mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
- mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ mlx5e_rx_res_destroy(priv->rx_res);
mlx5e_close_drop_rq(&priv->drop_rq);
mlx5e_destroy_q_counters(priv);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
}
static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index bf94bcb6fa5d..ae71a17fdb27 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -49,6 +49,7 @@
#include "en/devlink.h"
#include "fs_core.h"
#include "lib/mlx5.h"
+#include "lib/devcom.h"
#define CREATE_TRACE_POINTS
#include "diag/en_rep_tracepoint.h"
#include "en_accel/ipsec.h"
@@ -250,7 +251,9 @@ static int mlx5e_rep_set_channels(struct net_device *dev,
}
static int mlx5e_rep_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -258,7 +261,9 @@ static int mlx5e_rep_get_coalesce(struct net_device *netdev,
}
static int mlx5e_rep_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -310,6 +315,8 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
rpriv = mlx5e_rep_to_rep_priv(rep);
list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) {
mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+ if (rep_sq->send_to_vport_rule_peer)
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
list_del(&rep_sq->list);
kfree(rep_sq);
}
@@ -319,6 +326,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
struct mlx5_eswitch_rep *rep,
u32 *sqns_array, int sqns_num)
{
+ struct mlx5_eswitch *peer_esw = NULL;
struct mlx5_flow_handle *flow_rule;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_rep_sq *rep_sq;
@@ -329,6 +337,10 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
return 0;
rpriv = mlx5e_rep_to_rep_priv(rep);
+ if (mlx5_devcom_is_paired(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS))
+ peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS);
+
for (i = 0; i < sqns_num; i++) {
rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL);
if (!rep_sq) {
@@ -337,7 +349,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
}
/* Add re-inject rule to the PF/representor sqs */
- flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, rep,
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep,
sqns_array[i]);
if (IS_ERR(flow_rule)) {
err = PTR_ERR(flow_rule);
@@ -345,12 +357,34 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
goto out_err;
}
rep_sq->send_to_vport_rule = flow_rule;
+ rep_sq->sqn = sqns_array[i];
+
+ if (peer_esw) {
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw,
+ rep, sqns_array[i]);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+ kfree(rep_sq);
+ goto out_err;
+ }
+ rep_sq->send_to_vport_rule_peer = flow_rule;
+ }
+
list_add(&rep_sq->list, &rpriv->vport_sqs_list);
}
+
+ if (peer_esw)
+ mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
return 0;
out_err:
mlx5e_sqs2vport_stop(esw, rep);
+
+ if (peer_esw)
+ mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
return err;
}
@@ -364,7 +398,8 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
int err = -ENOMEM;
u32 *sqs;
- sqs = kcalloc(priv->channels.num * priv->channels.params.num_tc, sizeof(*sqs), GFP_KERNEL);
+ sqs = kcalloc(priv->channels.num * mlx5e_get_dcb_num_tc(&priv->channels.params),
+ sizeof(*sqs), GFP_KERNEL);
if (!sqs)
goto out;
@@ -581,13 +616,10 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
- params->num_tc = 1;
+ params->mqprio.num_tc = 1;
params->tunneled_offload_en = false;
mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
-
- /* RSS */
- mlx5e_build_rss_params(&priv->rss_params, params->num_channels);
}
static void mlx5e_build_rep_netdev(struct net_device *netdev,
@@ -651,25 +683,23 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv)
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep = rpriv->rep;
struct ttc_params ttc_params = {};
- int tt, err;
+ int err;
priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
MLX5_FLOW_NAMESPACE_KERNEL);
/* The inner_ttc in the ttc params is intentionally not set */
- ttc_params.any_tt_tirn = priv->direct_tir[0].tirn;
- mlx5e_set_ttc_ft_params(&ttc_params);
+ mlx5e_set_ttc_params(priv, &ttc_params, false);
if (rep->vport != MLX5_VPORT_UPLINK)
/* To give uplik rep TTC a lower level for chaining from root ft */
ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1;
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn;
-
- err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
- if (err) {
- netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", err);
+ priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+ if (IS_ERR(priv->fs.ttc)) {
+ err = PTR_ERR(priv->fs.ttc);
+ netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n",
+ err);
return err;
}
return 0;
@@ -687,7 +717,7 @@ static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv)
/* non uplik reps will skip any bypass tables and go directly to
* their own ttc
*/
- rpriv->root_ft = priv->fs.ttc.ft.t;
+ rpriv->root_ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
return 0;
}
@@ -760,9 +790,13 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup)
static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
- u16 max_nch = priv->max_nch;
+ struct mlx5e_lro_param lro_param;
int err;
+ priv->rx_res = mlx5e_rx_res_alloc();
+ if (!priv->rx_res)
+ return -ENOMEM;
+
mlx5e_init_l2_addr(priv);
err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
@@ -771,25 +805,16 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
return err;
}
- err = mlx5e_create_indirect_rqt(priv);
+ lro_param = mlx5e_get_lro_param(&priv->channels.params);
+ err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+ priv->max_nch, priv->drop_rq.rqn, &lro_param,
+ priv->channels.params.num_channels);
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch);
- if (err)
- goto err_destroy_indirect_rqts;
-
- err = mlx5e_create_indirect_tirs(priv, false);
- if (err)
- goto err_destroy_direct_rqts;
-
- err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch);
- if (err)
- goto err_destroy_indirect_tirs;
-
err = mlx5e_create_rep_ttc_table(priv);
if (err)
- goto err_destroy_direct_tirs;
+ goto err_destroy_rx_res;
err = mlx5e_create_rep_root_ft(priv);
if (err)
@@ -806,33 +831,26 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
err_destroy_root_ft:
mlx5e_destroy_rep_root_ft(priv);
err_destroy_ttc_table:
- mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
-err_destroy_direct_tirs:
- mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
-err_destroy_indirect_tirs:
- mlx5e_destroy_indirect_tirs(priv);
-err_destroy_direct_rqts:
- mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
-err_destroy_indirect_rqts:
- mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ mlx5_destroy_ttc_table(priv->fs.ttc);
+err_destroy_rx_res:
+ mlx5e_rx_res_destroy(priv->rx_res);
err_close_drop_rq:
mlx5e_close_drop_rq(&priv->drop_rq);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
return err;
}
static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
{
- u16 max_nch = priv->max_nch;
-
mlx5e_ethtool_cleanup_steering(priv);
rep_vport_rx_rule_destroy(priv);
mlx5e_destroy_rep_root_ft(priv);
- mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
- mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
- mlx5e_destroy_indirect_tirs(priv);
- mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
- mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ mlx5_destroy_ttc_table(priv->fs.ttc);
+ mlx5e_rx_res_destroy(priv->rx_res);
mlx5e_close_drop_rq(&priv->drop_rq);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
}
static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv)
@@ -1264,10 +1282,64 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
return rpriv->netdev;
}
+static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_sq *rep_sq;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+ list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
+ if (!rep_sq->send_to_vport_rule_peer)
+ continue;
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
+ rep_sq->send_to_vport_rule_peer = NULL;
+ }
+}
+
+static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ struct mlx5_eswitch *peer_esw)
+{
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_sq *rep_sq;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+ list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
+ if (rep_sq->send_to_vport_rule_peer)
+ continue;
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, rep_sq->sqn);
+ if (IS_ERR(flow_rule))
+ goto err_out;
+ rep_sq->send_to_vport_rule_peer = flow_rule;
+ }
+
+ return 0;
+err_out:
+ mlx5e_vport_rep_event_unpair(rep);
+ return PTR_ERR(flow_rule);
+}
+
+static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ enum mlx5_switchdev_event event,
+ void *data)
+{
+ int err = 0;
+
+ if (event == MLX5_SWITCHDEV_EVENT_PAIR)
+ err = mlx5e_vport_rep_event_pair(esw, rep, data);
+ else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR)
+ mlx5e_vport_rep_event_unpair(rep);
+
+ return err;
+}
+
static const struct mlx5_eswitch_rep_ops rep_ops = {
.load = mlx5e_vport_rep_load,
.unload = mlx5e_vport_rep_unload,
- .get_proto_dev = mlx5e_vport_rep_get_proto_dev
+ .get_proto_dev = mlx5e_vport_rep_get_proto_dev,
+ .event = mlx5e_vport_rep_event,
};
static int mlx5e_rep_probe(struct auxiliary_device *adev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 47a2dfb7792a..48a203a9e7d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -60,6 +60,7 @@ struct mlx5e_neigh_update_table {
struct mlx5_tc_ct_priv;
struct mlx5e_rep_bond;
struct mlx5e_tc_tun_encap;
+struct mlx5e_post_act;
struct mlx5_rep_uplink_priv {
/* Filters DB - instantiated by the uplink representor and shared by
@@ -88,8 +89,9 @@ struct mlx5_rep_uplink_priv {
/* maps tun_enc_opts to a unique id*/
struct mapping_ctx *tunnel_enc_opts_mapping;
+ struct mlx5e_post_act *post_act;
struct mlx5_tc_ct_priv *ct_priv;
- struct mlx5_esw_psample *esw_psample;
+ struct mlx5e_tc_psample *tc_psample;
/* support eswitch vports bonding */
struct mlx5e_rep_bond *bond;
@@ -146,7 +148,7 @@ struct mlx5e_neigh_hash_entry {
*/
refcount_t refcnt;
- /* Save the last reported time offloaded trafic pass over one of the
+ /* Save the last reported time offloaded traffic pass over one of the
* neigh hash entry flows. Use it to periodically update the neigh
* 'used' value and avoid neigh deleting by the kernel.
*/
@@ -207,6 +209,8 @@ struct mlx5e_encap_entry {
struct mlx5e_rep_sq {
struct mlx5_flow_handle *send_to_vport_rule;
+ struct mlx5_flow_handle *send_to_vport_rule_peer;
+ u32 sqn;
struct list_head list;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index d273758255c3..ba8164792016 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -34,25 +34,20 @@
#include <net/flow_offload.h>
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
-#include <net/tc_act/tc_gact.h>
-#include <net/tc_act/tc_skbedit.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/device.h>
#include <linux/rhashtable.h>
#include <linux/refcount.h>
#include <linux/completion.h>
-#include <net/tc_act/tc_mirred.h>
-#include <net/tc_act/tc_vlan.h>
-#include <net/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_pedit.h>
#include <net/tc_act/tc_csum.h>
-#include <net/tc_act/tc_mpls.h>
#include <net/psample.h>
#include <net/arp.h>
#include <net/ipv6_stubs.h>
#include <net/bareudp.h>
#include <net/bonding.h>
#include "en.h"
+#include "en/tc/post_act.h"
#include "en_rep.h"
#include "en/rep/tc.h"
#include "en/rep/neigh.h"
@@ -66,7 +61,7 @@
#include "en/mod_hdr.h"
#include "en/tc_priv.h"
#include "en/tc_tun_encap.h"
-#include "esw/sample.h"
+#include "en/tc/sample.h"
#include "lib/devcom.h"
#include "lib/geneve.h"
#include "lib/fs_chains.h"
@@ -103,7 +98,7 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
[MARK_TO_REG] = mark_to_reg_ct,
[LABELS_TO_REG] = labels_to_reg_ct,
[FTEID_TO_REG] = fteid_to_reg_ct,
- /* For NIC rules we store the retore metadata directly
+ /* For NIC rules we store the restore metadata directly
* into reg_b that is passed to SW since we don't
* jump between steering domains.
*/
@@ -252,7 +247,7 @@ get_ct_priv(struct mlx5e_priv *priv)
}
#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
-static struct mlx5_esw_psample *
+static struct mlx5e_tc_psample *
get_sample_priv(struct mlx5e_priv *priv)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -263,7 +258,7 @@ get_sample_priv(struct mlx5e_priv *priv)
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
uplink_priv = &uplink_rpriv->uplink_priv;
- return uplink_priv->esw_psample;
+ return uplink_priv->tc_psample;
}
return NULL;
@@ -340,12 +335,12 @@ struct mlx5e_hairpin {
struct mlx5_core_dev *func_mdev;
struct mlx5e_priv *func_priv;
u32 tdn;
- u32 tirn;
+ struct mlx5e_tir direct_tir;
int num_channels;
struct mlx5e_rqt indir_rqt;
- u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
- struct mlx5e_ttc_table ttc;
+ struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5_ttc_table *ttc;
};
struct mlx5e_hairpin_entry {
@@ -482,126 +477,101 @@ struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
{
- u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {};
- void *tirc;
+ struct mlx5e_tir_builder *builder;
int err;
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
if (err)
- goto alloc_tdn_err;
-
- tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
-
- MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
- MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
- MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
+ goto out;
- err = mlx5_core_create_tir(hp->func_mdev, in, &hp->tirn);
+ mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]);
+ err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false);
if (err)
goto create_tir_err;
- return 0;
+out:
+ mlx5e_tir_builder_free(builder);
+ return err;
create_tir_err:
mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
-alloc_tdn_err:
- return err;
+
+ goto out;
}
static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
{
- mlx5_core_destroy_tir(hp->func_mdev, hp->tirn);
+ mlx5e_tir_destroy(&hp->direct_tir);
mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
}
-static int mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
-{
- struct mlx5e_priv *priv = hp->func_priv;
- int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
- u32 *indirection_rqt, rqn;
-
- indirection_rqt = kcalloc(sz, sizeof(*indirection_rqt), GFP_KERNEL);
- if (!indirection_rqt)
- return -ENOMEM;
-
- mlx5e_build_default_indir_rqt(indirection_rqt, sz,
- hp->num_channels);
-
- for (i = 0; i < sz; i++) {
- ix = i;
- if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
- ix = mlx5e_bits_invert(i, ilog2(sz));
- ix = indirection_rqt[ix];
- rqn = hp->pair->rqn[ix];
- MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
- }
-
- kfree(indirection_rqt);
- return 0;
-}
-
static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
{
- int inlen, err, sz = MLX5E_INDIR_RQT_SIZE;
struct mlx5e_priv *priv = hp->func_priv;
struct mlx5_core_dev *mdev = priv->mdev;
- void *rqtc;
- u32 *in;
+ struct mlx5e_rss_params_indir *indir;
+ int err;
- inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
+ indir = kvmalloc(sizeof(*indir), GFP_KERNEL);
+ if (!indir)
return -ENOMEM;
- rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
-
- MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
- MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
-
- err = mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
- if (err)
- goto out;
-
- err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
- if (!err)
- hp->indir_rqt.enabled = true;
+ mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels);
+ err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels,
+ mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
+ indir);
-out:
- kvfree(in);
+ kvfree(indir);
return err;
}
static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
{
struct mlx5e_priv *priv = hp->func_priv;
- u32 in[MLX5_ST_SZ_DW(create_tir_in)];
- int tt, i, err;
- void *tirc;
+ struct mlx5e_rss_params_hash rss_hash;
+ enum mlx5_traffic_types tt, max_tt;
+ struct mlx5e_tir_builder *builder;
+ int err = 0;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res);
for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
+ struct mlx5e_rss_params_traffic_type rss_tt;
- memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
- tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ rss_tt = mlx5e_rss_get_default_tt_config(tt);
- MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
- MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
- MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
- mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
+ mlx5e_tir_builder_build_rqt(builder, hp->tdn,
+ mlx5e_rqt_get_rqtn(&hp->indir_rqt),
+ false);
+ mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false);
- err = mlx5_core_create_tir(hp->func_mdev, in,
- &hp->indir_tirn[tt]);
+ err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false);
if (err) {
mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
goto err_destroy_tirs;
}
+
+ mlx5e_tir_builder_clear(builder);
}
- return 0;
-err_destroy_tirs:
- for (i = 0; i < tt; i++)
- mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]);
+out:
+ mlx5e_tir_builder_free(builder);
return err;
+
+err_destroy_tirs:
+ max_tt = tt;
+ for (tt = 0; tt < max_tt; tt++)
+ mlx5e_tir_destroy(&hp->indir_tir[tt]);
+
+ goto out;
}
static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
@@ -609,7 +579,7 @@ static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
int tt;
for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]);
+ mlx5e_tir_destroy(&hp->indir_tir[tt]);
}
static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
@@ -620,12 +590,16 @@ static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
memset(ttc_params, 0, sizeof(*ttc_params));
- ttc_params->any_tt_tirn = hp->tirn;
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
+ ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ ttc_params->dests[tt].tir_num =
+ tt == MLX5_TT_ANY ?
+ mlx5e_tir_get_tirn(&hp->direct_tir) :
+ mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
+ }
- ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
ft_attr->prio = MLX5E_TC_PRIO;
}
@@ -645,30 +619,31 @@ static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
goto err_create_indirect_tirs;
mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
- err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
- if (err)
+ hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+ if (IS_ERR(hp->ttc)) {
+ err = PTR_ERR(hp->ttc);
goto err_create_ttc_table;
+ }
netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
- hp->num_channels, hp->ttc.ft.t->id);
+ hp->num_channels,
+ mlx5_get_ttc_flow_table(priv->fs.ttc)->id);
return 0;
err_create_ttc_table:
mlx5e_hairpin_destroy_indirect_tirs(hp);
err_create_indirect_tirs:
- mlx5e_destroy_rqt(priv, &hp->indir_rqt);
+ mlx5e_rqt_destroy(&hp->indir_rqt);
return err;
}
static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
{
- struct mlx5e_priv *priv = hp->func_priv;
-
- mlx5e_destroy_ttc_table(priv, &hp->ttc);
+ mlx5_destroy_ttc_table(hp->ttc);
mlx5e_hairpin_destroy_indirect_tirs(hp);
- mlx5e_destroy_rqt(priv, &hp->indir_rqt);
+ mlx5e_rqt_destroy(&hp->indir_rqt);
}
static struct mlx5e_hairpin *
@@ -903,16 +878,17 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
}
netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
- hp->tirn, hp->pair->rqn[0],
+ mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0],
dev_name(hp->pair->peer_mdev->device),
hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
attach_flow:
if (hpe->hp->num_channels > 1) {
flow_flag_set(flow, HAIRPIN_RSS);
- flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
+ flow->attr->nic_attr->hairpin_ft =
+ mlx5_get_ttc_flow_table(hpe->hp->ttc);
} else {
- flow->attr->nic_attr->hairpin_tirn = hpe->hp->tirn;
+ flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir);
}
flow->hpe = hpe;
@@ -1056,15 +1032,17 @@ err_ft_get:
static int
mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_core_dev *dev = priv->mdev;
- struct mlx5_fc *counter = NULL;
+ struct mlx5_fc *counter;
int err;
+ parse_attr = attr->parse_attr;
+
if (flow_flag_test(flow, HAIRPIN)) {
err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
if (err)
@@ -1170,7 +1148,8 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
mod_hdr_acts);
#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
} else if (flow_flag_test(flow, SAMPLE)) {
- rule = mlx5_esw_sample_offload(get_sample_priv(flow->priv), spec, attr);
+ rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr,
+ mlx5e_tc_get_flow_tun_id(flow));
#endif
} else {
rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
@@ -1209,7 +1188,7 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
if (flow_flag_test(flow, SAMPLE)) {
- mlx5_esw_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
+ mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
return;
}
#endif
@@ -1338,6 +1317,7 @@ bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_
int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
{
struct mlx5e_priv *out_priv, *route_priv;
+ struct mlx5_devcom *devcom = NULL;
struct mlx5_core_dev *route_mdev;
struct mlx5_eswitch *esw;
u16 vhca_id;
@@ -1349,7 +1329,24 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
route_mdev = route_priv->mdev;
vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
+ if (mlx5_lag_is_active(out_priv->mdev)) {
+ /* In lag case we may get devices from different eswitch instances.
+ * If we failed to get vport num, it means, mostly, that we on the wrong
+ * eswitch.
+ */
+ err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+ if (err != -ENOENT)
+ return err;
+
+ devcom = out_priv->mdev->priv.devcom;
+ esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!esw)
+ return -ENODEV;
+ }
+
err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+ if (devcom)
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
return err;
}
@@ -1384,9 +1381,9 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
bool vf_tun = false, encap_valid = true;
struct net_device *encap_dev = NULL;
struct mlx5_esw_flow_attr *esw_attr;
- struct mlx5_fc *counter = NULL;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_priv *out_priv;
+ struct mlx5_fc *counter;
u32 max_prio, max_chain;
int err = 0;
int out_index;
@@ -1573,6 +1570,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
else
mlx5e_detach_mod_hdr(priv, flow);
}
+ kfree(attr->sample_attr);
kvfree(attr->parse_attr);
kvfree(attr->esw_attr->rx_tun_attr);
@@ -1582,7 +1580,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
if (flow_flag_test(flow, L3_TO_L2_DECAP))
mlx5e_detach_decap(priv, flow);
- kfree(flow->attr->esw_attr->sample);
kfree(flow->attr);
}
@@ -1647,17 +1644,22 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
}
}
-static int flow_has_tc_fwd_action(struct flow_cls_offload *f)
+static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct flow_action *flow_action = &rule->action;
const struct flow_action_entry *act;
int i;
+ if (chain)
+ return false;
+
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
case FLOW_ACTION_GOTO:
return true;
+ case FLOW_ACTION_SAMPLE:
+ return true;
default:
continue;
}
@@ -1898,7 +1900,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
needs_mapping = !!flow->attr->chain;
- sets_mapping = !flow->attr->chain && flow_has_tc_fwd_action(f);
+ sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f);
*match_inner = !needs_mapping;
if ((needs_mapping || sets_mapping) &&
@@ -2471,7 +2473,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
}
}
- /* Currenlty supported only for MPLS over UDP */
+ /* Currently supported only for MPLS over UDP */
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
!netif_is_bareudp(filter_dev)) {
NL_SET_ERR_MSG_MOD(extack,
@@ -2725,7 +2727,9 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
if (s_mask && a_mask) {
NL_SET_ERR_MSG_MOD(extack,
"can't set and add to the same HW field");
- printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
+ netdev_warn(priv->netdev,
+ "mlx5: can't set and add to the same HW field (%x)\n",
+ f->field);
return -EOPNOTSUPP;
}
@@ -2764,8 +2768,9 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
if (first < next_z && next_z < last) {
NL_SET_ERR_MSG_MOD(extack,
"rewrite of few sub-fields isn't supported");
- printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
- mask);
+ netdev_warn(priv->netdev,
+ "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
+ mask);
return -EOPNOTSUPP;
}
@@ -3352,10 +3357,10 @@ static int validate_goto_chain(struct mlx5e_priv *priv,
static int parse_tc_nic_actions(struct mlx5e_priv *priv,
struct flow_action *flow_action,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
struct pedit_headers_action hdrs[2] = {};
const struct flow_action_entry *act;
@@ -3371,8 +3376,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
nic_attr = attr->nic_attr;
-
nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ parse_attr = attr->parse_attr;
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
@@ -3381,10 +3386,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
MLX5_FLOW_CONTEXT_ACTION_COUNT;
break;
case FLOW_ACTION_DROP:
- action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
- if (MLX5_CAP_FLOWTABLE(priv->mdev,
- flow_table_properties_nic_receive.flow_counter))
- action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
break;
case FLOW_ACTION_MANGLE:
case FLOW_ACTION_ADD:
@@ -3425,7 +3428,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
"device is not on same HW, can't offload");
netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
peer_dev->name);
- return -EINVAL;
+ return -EOPNOTSUPP;
}
}
break;
@@ -3435,7 +3438,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
NL_SET_ERR_MSG_MOD(extack,
"Bad flow mark - only 16 bit is supported");
- return -EINVAL;
+ return -EOPNOTSUPP;
}
nic_attr->flow_tag = mark;
@@ -3732,20 +3735,19 @@ static int verify_uplink_forwarding(struct mlx5e_priv *priv,
static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
struct flow_action *flow_action,
struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack,
- struct net_device *filter_dev)
+ struct netlink_ext_ack *extack)
{
struct pedit_headers_action hdrs[2] = {};
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_sample_attr sample_attr = {};
const struct ip_tunnel_info *info = NULL;
struct mlx5_flow_attr *attr = flow->attr;
int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
bool ft_flow = mlx5e_is_ft_flow(flow);
const struct flow_action_entry *act;
struct mlx5_esw_flow_attr *esw_attr;
- struct mlx5_sample_attr sample = {};
bool encap = false, decap = false;
u32 action = attr->action;
int err, i, if_count = 0;
@@ -3798,7 +3800,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
"mpls pop supported only as first action");
return -EOPNOTSUPP;
}
- if (!netif_is_bareudp(filter_dev)) {
+ if (!netif_is_bareudp(parse_attr->filter_dev)) {
NL_SET_ERR_MSG_MOD(extack,
"mpls pop supported only on bareudp devices");
return -EOPNOTSUPP;
@@ -3947,7 +3949,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
"devices %s %s not on same switch HW, can't offload forwarding\n",
priv->netdev->name,
out_dev->name);
- return -EINVAL;
+ return -EOPNOTSUPP;
}
}
break;
@@ -4016,10 +4018,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported");
return -EOPNOTSUPP;
}
- sample.rate = act->sample.rate;
- sample.group_num = act->sample.psample_group->group_num;
+ sample_attr.rate = act->sample.rate;
+ sample_attr.group_num = act->sample.psample_group->group_num;
if (act->sample.truncate)
- sample.trunc_size = act->sample.trunc_size;
+ sample_attr.trunc_size = act->sample.trunc_size;
flow_flag_set(flow, SAMPLE);
break;
default:
@@ -4104,10 +4106,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
* no errors after parsing.
*/
if (flow_flag_test(flow, SAMPLE)) {
- esw_attr->sample = kzalloc(sizeof(*esw_attr->sample), GFP_KERNEL);
- if (!esw_attr->sample)
+ attr->sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL);
+ if (!attr->sample_attr)
return -ENOMEM;
- *esw_attr->sample = sample;
+ *attr->sample_attr = sample_attr;
}
return 0;
@@ -4300,7 +4302,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
if (err)
goto err_free;
- err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev);
+ err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
if (err)
goto err_free;
@@ -4446,11 +4448,11 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
if (err)
goto err_free;
- err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
+ err = parse_tc_nic_actions(priv, &rule->action, flow, extack);
if (err)
goto err_free;
- err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
+ err = mlx5e_tc_add_nic_flow(priv, flow, extack);
if (err)
goto err_free;
@@ -4705,7 +4707,7 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
rate_mbps = max_t(u32, rate, 1);
}
- err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
+ err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps);
if (err)
NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
@@ -4877,6 +4879,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
struct mlx5_core_dev *dev = priv->mdev;
struct mapping_ctx *chains_mapping;
struct mlx5_chains_attr attr = {};
+ u64 mapping_id;
int err;
mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
@@ -4890,8 +4893,12 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
- chains_mapping = mapping_create(sizeof(struct mlx5_mapped_obj),
- MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
+ mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+ chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
+ sizeof(struct mlx5_mapped_obj),
+ MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
+
if (IS_ERR(chains_mapping)) {
err = PTR_ERR(chains_mapping);
goto err_mapping;
@@ -4913,8 +4920,9 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
goto err_chains;
}
+ tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
- MLX5_FLOW_NAMESPACE_KERNEL);
+ MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
err = register_netdevice_notifier_dev_net(priv->netdev,
@@ -4930,6 +4938,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
err_reg:
mlx5_tc_ct_clean(tc->ct);
+ mlx5e_tc_post_act_destroy(tc->post_act);
mlx5_chains_destroy(tc->chains);
err_chains:
mapping_destroy(chains_mapping);
@@ -4968,6 +4977,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
mutex_destroy(&tc->t_lock);
mlx5_tc_ct_clean(tc->ct);
+ mlx5e_tc_post_act_destroy(tc->post_act);
mapping_destroy(tc->mapping);
mlx5_chains_destroy(tc->chains);
}
@@ -4980,6 +4990,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
struct mapping_ctx *mapping;
struct mlx5_eswitch *esw;
struct mlx5e_priv *priv;
+ u64 mapping_id;
int err = 0;
uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
@@ -4987,17 +4998,24 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
priv = netdev_priv(rpriv->netdev);
esw = priv->mdev->priv.eswitch;
+ uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw),
+ MLX5_FLOW_NAMESPACE_FDB);
uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
esw_chains(esw),
&esw->offloads.mod_hdr,
- MLX5_FLOW_NAMESPACE_FDB);
+ MLX5_FLOW_NAMESPACE_FDB,
+ uplink_priv->post_act);
#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
- uplink_priv->esw_psample = mlx5_esw_sample_init(netdev_priv(priv->netdev));
+ uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
#endif
- mapping = mapping_create(sizeof(struct tunnel_match_key),
- TUNNEL_INFO_BITS_MASK, true);
+ mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+ mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL,
+ sizeof(struct tunnel_match_key),
+ TUNNEL_INFO_BITS_MASK, true);
+
if (IS_ERR(mapping)) {
err = PTR_ERR(mapping);
goto err_tun_mapping;
@@ -5005,7 +5023,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
uplink_priv->tunnel_mapping = mapping;
/* 0xFFF is reserved for stack devices slow path table mark */
- mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
+ mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS,
+ sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
if (IS_ERR(mapping)) {
err = PTR_ERR(mapping);
goto err_enc_opts_mapping;
@@ -5034,11 +5053,12 @@ err_enc_opts_mapping:
mapping_destroy(uplink_priv->tunnel_mapping);
err_tun_mapping:
#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
- mlx5_esw_sample_cleanup(uplink_priv->esw_psample);
+ mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
#endif
mlx5_tc_ct_clean(uplink_priv->ct_priv);
netdev_warn(priv->netdev,
"Failed to initialize tc (eswitch), err: %d", err);
+ mlx5e_tc_post_act_destroy(uplink_priv->post_act);
return err;
}
@@ -5055,9 +5075,10 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
mapping_destroy(uplink_priv->tunnel_mapping);
#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
- mlx5_esw_sample_cleanup(uplink_priv->esw_psample);
+ mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
#endif
mlx5_tc_ct_clean(uplink_priv->ct_priv);
+ mlx5e_tc_post_act_destroy(uplink_priv->post_act);
}
int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index f7cbeb0b66d2..1a4cd882f0fb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -70,6 +70,7 @@ struct mlx5_flow_attr {
struct mlx5_fc *counter;
struct mlx5_modify_hdr *modify_hdr;
struct mlx5_ct_attr ct_attr;
+ struct mlx5e_sample_attr *sample_attr;
struct mlx5e_tc_flow_parse_attr *parse_attr;
u32 chain;
u16 prio;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 6e074cc457de..605c8ecc3610 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -855,8 +855,8 @@ clean:
return err;
}
-int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
- unsigned int *irqn)
+static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn,
+ unsigned int *irqn)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
struct mlx5_eq_comp *eq, *n;
@@ -865,8 +865,10 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
if (i++ == vector) {
- *eqn = eq->core.eqn;
- *irqn = eq->core.irqn;
+ if (irqn)
+ *irqn = eq->core.irqn;
+ if (eqn)
+ *eqn = eq->core.eqn;
err = 0;
break;
}
@@ -874,8 +876,18 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
return err;
}
+
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn)
+{
+ return vector2eqnirqn(dev, vector, eqn, NULL);
+}
EXPORT_SYMBOL(mlx5_vector2eqn);
+int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn)
+{
+ return vector2eqnirqn(dev, vector, NULL, irqn);
+}
+
unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
{
return dev->priv.eq_table->num_comp_eqs;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
index 505bf811984a..2e504c7461c6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
@@ -15,6 +15,15 @@ static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport)
vport->egress.offloads.fwd_rule = NULL;
}
+static void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport)
+{
+ if (!vport->egress.offloads.bounce_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->egress.offloads.bounce_rule);
+ vport->egress.offloads.bounce_rule = NULL;
+}
+
static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw,
struct mlx5_vport *vport,
struct mlx5_flow_destination *fwd_dest)
@@ -87,6 +96,7 @@ static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport)
{
esw_acl_egress_vlan_destroy(vport);
esw_acl_egress_ofld_fwd2vport_destroy(vport);
+ esw_acl_egress_ofld_bounce_rule_destroy(vport);
}
static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw,
@@ -145,6 +155,12 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport)
mlx5_destroy_flow_group(vport->egress.offloads.fwd_grp);
vport->egress.offloads.fwd_grp = NULL;
}
+
+ if (!IS_ERR_OR_NULL(vport->egress.offloads.bounce_grp)) {
+ mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp);
+ vport->egress.offloads.bounce_grp = NULL;
+ }
+
esw_acl_egress_vlan_grp_destroy(vport);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
index a6e1d4f78268..7e221038df8d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -5,6 +5,7 @@
#include <linux/notifier.h>
#include <net/netevent.h>
#include <net/switchdev.h>
+#include "lib/devcom.h"
#include "bridge.h"
#include "eswitch.h"
#include "bridge_priv.h"
@@ -56,7 +57,6 @@ struct mlx5_esw_bridge {
struct list_head fdb_list;
struct rhashtable fdb_ht;
- struct xarray vports;
struct mlx5_flow_table *egress_ft;
struct mlx5_flow_group *egress_vlan_fg;
@@ -69,7 +69,7 @@ static void
mlx5_esw_bridge_fdb_offload_notify(struct net_device *dev, const unsigned char *addr, u16 vid,
unsigned long val)
{
- struct switchdev_notifier_fdb_info send_info;
+ struct switchdev_notifier_fdb_info send_info = {};
send_info.addr = addr;
send_info.vid = vid;
@@ -77,6 +77,15 @@ mlx5_esw_bridge_fdb_offload_notify(struct net_device *dev, const unsigned char *
call_switchdev_notifiers(val, dev, &send_info.info, NULL);
}
+static void
+mlx5_esw_bridge_fdb_del_notify(struct mlx5_esw_bridge_fdb_entry *entry)
+{
+ if (!(entry->flags & (MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER | MLX5_ESW_BRIDGE_FLAG_PEER)))
+ mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
+ entry->key.vid,
+ SWITCHDEV_FDB_DEL_TO_BRIDGE);
+}
+
static struct mlx5_flow_table *
mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw)
{
@@ -400,9 +409,10 @@ mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge)
}
static struct mlx5_flow_handle *
-mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr,
- struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
- struct mlx5_esw_bridge *bridge)
+mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char *addr,
+ struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
+ struct mlx5_esw_bridge *bridge,
+ struct mlx5_eswitch *esw)
{
struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
struct mlx5_flow_act flow_act = {
@@ -430,7 +440,7 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr,
MLX5_SET(fte_match_param, rule_spec->match_criteria,
misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
- mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num));
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
if (vlan && vlan->pkt_reformat_push) {
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
@@ -459,6 +469,35 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr,
}
static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr,
+ struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
+ struct mlx5_esw_bridge *bridge)
+{
+ return mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id,
+ bridge, bridge->br_offloads->esw);
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, const unsigned char *addr,
+ struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_devcom *devcom = bridge->br_offloads->esw->dev->priv.devcom;
+ static struct mlx5_flow_handle *handle;
+ struct mlx5_eswitch *peer_esw;
+
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ return ERR_PTR(-ENODEV);
+
+ handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id,
+ bridge, peer_esw);
+
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ return handle;
+}
+
+static struct mlx5_flow_handle *
mlx5_esw_bridge_ingress_filter_flow_create(u16 vport_num, const unsigned char *addr,
struct mlx5_esw_bridge *bridge)
{
@@ -505,7 +544,7 @@ mlx5_esw_bridge_ingress_filter_flow_create(u16 vport_num, const unsigned char *a
}
static struct mlx5_flow_handle *
-mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr,
+mlx5_esw_bridge_egress_flow_create(u16 vport_num, u16 esw_owner_vhca_id, const unsigned char *addr,
struct mlx5_esw_bridge_vlan *vlan,
struct mlx5_esw_bridge *bridge)
{
@@ -550,6 +589,10 @@ mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr,
vlan->vid);
}
+ if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) {
+ dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ dest.vport.vhca_id = esw_owner_vhca_id;
+ }
handle = mlx5_add_flow_rules(bridge->egress_ft, rule_spec, &flow_act, &dest, 1);
kvfree(rule_spec);
@@ -576,10 +619,9 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex,
goto err_fdb_ht;
INIT_LIST_HEAD(&bridge->fdb_list);
- xa_init(&bridge->vports);
bridge->ifindex = ifindex;
bridge->refcnt = 1;
- bridge->ageing_time = BR_DEFAULT_AGEING_TIME;
+ bridge->ageing_time = clock_t_to_jiffies(BR_DEFAULT_AGEING_TIME);
list_add(&bridge->list, &br_offloads->bridges);
return bridge;
@@ -603,7 +645,6 @@ static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads,
return;
mlx5_esw_bridge_egress_table_cleanup(bridge);
- WARN_ON(!xa_empty(&bridge->vports));
list_del(&bridge->list);
rhashtable_destroy(&bridge->fdb_ht);
kvfree(bridge);
@@ -639,30 +680,40 @@ mlx5_esw_bridge_lookup(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads
return bridge;
}
+static unsigned long mlx5_esw_bridge_port_key_from_data(u16 vport_num, u16 esw_owner_vhca_id)
+{
+ return vport_num | (unsigned long)esw_owner_vhca_id << sizeof(vport_num) * BITS_PER_BYTE;
+}
+
+static unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port)
+{
+ return mlx5_esw_bridge_port_key_from_data(port->vport_num, port->esw_owner_vhca_id);
+}
+
static int mlx5_esw_bridge_port_insert(struct mlx5_esw_bridge_port *port,
- struct mlx5_esw_bridge *bridge)
+ struct mlx5_esw_bridge_offloads *br_offloads)
{
- return xa_insert(&bridge->vports, port->vport_num, port, GFP_KERNEL);
+ return xa_insert(&br_offloads->ports, mlx5_esw_bridge_port_key(port), port, GFP_KERNEL);
}
static struct mlx5_esw_bridge_port *
-mlx5_esw_bridge_port_lookup(u16 vport_num, struct mlx5_esw_bridge *bridge)
+mlx5_esw_bridge_port_lookup(u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads)
{
- return xa_load(&bridge->vports, vport_num);
+ return xa_load(&br_offloads->ports, mlx5_esw_bridge_port_key_from_data(vport_num,
+ esw_owner_vhca_id));
}
static void mlx5_esw_bridge_port_erase(struct mlx5_esw_bridge_port *port,
- struct mlx5_esw_bridge *bridge)
+ struct mlx5_esw_bridge_offloads *br_offloads)
{
- xa_erase(&bridge->vports, port->vport_num);
+ xa_erase(&br_offloads->ports, mlx5_esw_bridge_port_key(port));
}
-static void mlx5_esw_bridge_fdb_entry_refresh(unsigned long lastuse,
- struct mlx5_esw_bridge_fdb_entry *entry)
+static void mlx5_esw_bridge_fdb_entry_refresh(struct mlx5_esw_bridge_fdb_entry *entry)
{
trace_mlx5_esw_bridge_fdb_entry_refresh(entry);
- entry->lastuse = lastuse;
mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
entry->key.vid,
SWITCHDEV_FDB_ADD_TO_BRIDGE);
@@ -690,10 +741,7 @@ static void mlx5_esw_bridge_fdb_flush(struct mlx5_esw_bridge *bridge)
struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) {
- if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER))
- mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
- entry->key.vid,
- SWITCHDEV_FDB_DEL_TO_BRIDGE);
+ mlx5_esw_bridge_fdb_del_notify(entry);
mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
}
}
@@ -841,10 +889,7 @@ static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan,
struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
list_for_each_entry_safe(entry, tmp, &vlan->fdb_list, vlan_list) {
- if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER))
- mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
- entry->key.vid,
- SWITCHDEV_FDB_DEL_TO_BRIDGE);
+ mlx5_esw_bridge_fdb_del_notify(entry);
mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
}
@@ -875,13 +920,13 @@ static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port,
}
static struct mlx5_esw_bridge_vlan *
-mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, struct mlx5_esw_bridge *bridge,
- struct mlx5_eswitch *esw)
+mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge *bridge, struct mlx5_eswitch *esw)
{
struct mlx5_esw_bridge_port *port;
struct mlx5_esw_bridge_vlan *vlan;
- port = mlx5_esw_bridge_port_lookup(vport_num, bridge);
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, bridge->br_offloads);
if (!port) {
/* FDB is added asynchronously on wq while port might have been deleted
* concurrently. Report on 'info' logging level and skip the FDB offload.
@@ -904,24 +949,23 @@ mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, struct mlx5_esw_bridge
}
static struct mlx5_esw_bridge_fdb_entry *
-mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsigned char *addr,
- u16 vid, bool added_by_user, struct mlx5_eswitch *esw,
- struct mlx5_esw_bridge *bridge)
+mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ const unsigned char *addr, u16 vid, bool added_by_user, bool peer,
+ struct mlx5_eswitch *esw, struct mlx5_esw_bridge *bridge)
{
struct mlx5_esw_bridge_vlan *vlan = NULL;
struct mlx5_esw_bridge_fdb_entry *entry;
struct mlx5_flow_handle *handle;
struct mlx5_fc *counter;
- struct mlx5e_priv *priv;
int err;
if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG && vid) {
- vlan = mlx5_esw_bridge_port_vlan_lookup(vid, vport_num, bridge, esw);
+ vlan = mlx5_esw_bridge_port_vlan_lookup(vid, vport_num, esw_owner_vhca_id, bridge,
+ esw);
if (IS_ERR(vlan))
return ERR_CAST(vlan);
}
- priv = netdev_priv(dev);
entry = kvzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return ERR_PTR(-ENOMEM);
@@ -930,19 +974,25 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi
entry->key.vid = vid;
entry->dev = dev;
entry->vport_num = vport_num;
+ entry->esw_owner_vhca_id = esw_owner_vhca_id;
entry->lastuse = jiffies;
if (added_by_user)
entry->flags |= MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER;
+ if (peer)
+ entry->flags |= MLX5_ESW_BRIDGE_FLAG_PEER;
- counter = mlx5_fc_create(priv->mdev, true);
+ counter = mlx5_fc_create(esw->dev, true);
if (IS_ERR(counter)) {
err = PTR_ERR(counter);
goto err_ingress_fc_create;
}
entry->ingress_counter = counter;
- handle = mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan, mlx5_fc_id(counter),
- bridge);
+ handle = peer ?
+ mlx5_esw_bridge_ingress_flow_peer_create(vport_num, addr, vlan,
+ mlx5_fc_id(counter), bridge) :
+ mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan,
+ mlx5_fc_id(counter), bridge);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
esw_warn(esw->dev, "Failed to create ingress flow(vport=%u,err=%d)\n",
@@ -962,7 +1012,8 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi
entry->filter_handle = handle;
}
- handle = mlx5_esw_bridge_egress_flow_create(vport_num, addr, vlan, bridge);
+ handle = mlx5_esw_bridge_egress_flow_create(vport_num, esw_owner_vhca_id, addr, vlan,
+ bridge);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
esw_warn(esw->dev, "Failed to create egress flow(vport=%u,err=%d)\n",
@@ -994,32 +1045,37 @@ err_egress_flow_create:
err_ingress_filter_flow_create:
mlx5_del_flow_rules(entry->ingress_handle);
err_ingress_flow_create:
- mlx5_fc_destroy(priv->mdev, entry->ingress_counter);
+ mlx5_fc_destroy(esw->dev, entry->ingress_counter);
err_ingress_fc_create:
kvfree(entry);
return ERR_PTR(err);
}
-int mlx5_esw_bridge_ageing_time_set(unsigned long ageing_time, struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
+int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time,
+ struct mlx5_esw_bridge_offloads *br_offloads)
{
- if (!vport->bridge)
+ struct mlx5_esw_bridge_port *port;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
return -EINVAL;
- vport->bridge->ageing_time = ageing_time;
+ port->bridge->ageing_time = clock_t_to_jiffies(ageing_time);
return 0;
}
-int mlx5_esw_bridge_vlan_filtering_set(bool enable, struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
+int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable,
+ struct mlx5_esw_bridge_offloads *br_offloads)
{
+ struct mlx5_esw_bridge_port *port;
struct mlx5_esw_bridge *bridge;
bool filtering;
- if (!vport->bridge)
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
return -EINVAL;
- bridge = vport->bridge;
+ bridge = port->bridge;
filtering = bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG;
if (filtering == enable)
return 0;
@@ -1033,114 +1089,143 @@ int mlx5_esw_bridge_vlan_filtering_set(bool enable, struct mlx5_eswitch *esw,
return 0;
}
-static int mlx5_esw_bridge_vport_init(struct mlx5_esw_bridge_offloads *br_offloads,
- struct mlx5_esw_bridge *bridge,
- struct mlx5_vport *vport)
+static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16 flags,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct mlx5_esw_bridge *bridge)
{
struct mlx5_eswitch *esw = br_offloads->esw;
struct mlx5_esw_bridge_port *port;
int err;
port = kvzalloc(sizeof(*port), GFP_KERNEL);
- if (!port) {
- err = -ENOMEM;
- goto err_port_alloc;
- }
+ if (!port)
+ return -ENOMEM;
- port->vport_num = vport->vport;
+ port->vport_num = vport_num;
+ port->esw_owner_vhca_id = esw_owner_vhca_id;
+ port->bridge = bridge;
+ port->flags |= flags;
xa_init(&port->vlans);
- err = mlx5_esw_bridge_port_insert(port, bridge);
+ err = mlx5_esw_bridge_port_insert(port, br_offloads);
if (err) {
- esw_warn(esw->dev, "Failed to insert port metadata (vport=%u,err=%d)\n",
- vport->vport, err);
+ esw_warn(esw->dev,
+ "Failed to insert port metadata (vport=%u,esw_owner_vhca_id=%u,err=%d)\n",
+ port->vport_num, port->esw_owner_vhca_id, err);
goto err_port_insert;
}
trace_mlx5_esw_bridge_vport_init(port);
- vport->bridge = bridge;
return 0;
err_port_insert:
kvfree(port);
-err_port_alloc:
- mlx5_esw_bridge_put(br_offloads, bridge);
return err;
}
static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_offloads,
- struct mlx5_vport *vport)
+ struct mlx5_esw_bridge_port *port)
{
- struct mlx5_esw_bridge *bridge = vport->bridge;
+ u16 vport_num = port->vport_num, esw_owner_vhca_id = port->esw_owner_vhca_id;
+ struct mlx5_esw_bridge *bridge = port->bridge;
struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
- struct mlx5_esw_bridge_port *port;
list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list)
- if (entry->vport_num == vport->vport)
+ if (entry->vport_num == vport_num && entry->esw_owner_vhca_id == esw_owner_vhca_id)
mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
- port = mlx5_esw_bridge_port_lookup(vport->vport, bridge);
- if (!port) {
- WARN(1, "Vport %u metadata not found on bridge", vport->vport);
- return -EINVAL;
- }
-
trace_mlx5_esw_bridge_vport_cleanup(port);
mlx5_esw_bridge_port_vlans_flush(port, bridge);
- mlx5_esw_bridge_port_erase(port, bridge);
+ mlx5_esw_bridge_port_erase(port, br_offloads);
kvfree(port);
mlx5_esw_bridge_put(br_offloads, bridge);
- vport->bridge = NULL;
return 0;
}
-int mlx5_esw_bridge_vport_link(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads,
- struct mlx5_vport *vport, struct netlink_ext_ack *extack)
+static int mlx5_esw_bridge_vport_link_with_flags(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ u16 flags,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
{
struct mlx5_esw_bridge *bridge;
int err;
- WARN_ON(vport->bridge);
-
bridge = mlx5_esw_bridge_lookup(ifindex, br_offloads);
if (IS_ERR(bridge)) {
NL_SET_ERR_MSG_MOD(extack, "Error checking for existing bridge with same ifindex");
return PTR_ERR(bridge);
}
- err = mlx5_esw_bridge_vport_init(br_offloads, bridge, vport);
- if (err)
+ err = mlx5_esw_bridge_vport_init(vport_num, esw_owner_vhca_id, flags, br_offloads, bridge);
+ if (err) {
NL_SET_ERR_MSG_MOD(extack, "Error initializing port");
+ goto err_vport;
+ }
+ return 0;
+
+err_vport:
+ mlx5_esw_bridge_put(br_offloads, bridge);
return err;
}
-int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads,
- struct mlx5_vport *vport, struct netlink_ext_ack *extack)
+int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
{
- struct mlx5_esw_bridge *bridge = vport->bridge;
+ return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id, 0,
+ br_offloads, extack);
+}
+
+int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_bridge_port *port;
int err;
- if (!bridge) {
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port) {
NL_SET_ERR_MSG_MOD(extack, "Port is not attached to any bridge");
return -EINVAL;
}
- if (bridge->ifindex != ifindex) {
+ if (port->bridge->ifindex != ifindex) {
NL_SET_ERR_MSG_MOD(extack, "Port is attached to another bridge");
return -EINVAL;
}
- err = mlx5_esw_bridge_vport_cleanup(br_offloads, vport);
+ err = mlx5_esw_bridge_vport_cleanup(br_offloads, port);
if (err)
NL_SET_ERR_MSG_MOD(extack, "Port cleanup failed");
return err;
}
-int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw,
- struct mlx5_vport *vport, struct netlink_ext_ack *extack)
+int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ if (!MLX5_CAP_ESW(br_offloads->esw->dev, merged_eswitch))
+ return 0;
+
+ return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id,
+ MLX5_ESW_BRIDGE_PORT_FLAG_PEER,
+ br_offloads, extack);
+}
+
+int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ return mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id, br_offloads,
+ extack);
+}
+
+int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
{
struct mlx5_esw_bridge_port *port;
struct mlx5_esw_bridge_vlan *vlan;
- port = mlx5_esw_bridge_port_lookup(vport->vport, vport->bridge);
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
if (!port)
return -EINVAL;
@@ -1148,10 +1233,10 @@ int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw,
if (vlan) {
if (vlan->flags == flags)
return 0;
- mlx5_esw_bridge_vlan_cleanup(port, vlan, vport->bridge);
+ mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge);
}
- vlan = mlx5_esw_bridge_vlan_create(vid, flags, port, esw);
+ vlan = mlx5_esw_bridge_vlan_create(vid, flags, port, br_offloads->esw);
if (IS_ERR(vlan)) {
NL_SET_ERR_MSG_MOD(extack, "Failed to create VLAN entry");
return PTR_ERR(vlan);
@@ -1159,62 +1244,93 @@ int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw,
return 0;
}
-void mlx5_esw_bridge_port_vlan_del(u16 vid, struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid,
+ struct mlx5_esw_bridge_offloads *br_offloads)
{
struct mlx5_esw_bridge_port *port;
struct mlx5_esw_bridge_vlan *vlan;
- port = mlx5_esw_bridge_port_lookup(vport->vport, vport->bridge);
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
if (!port)
return;
vlan = mlx5_esw_bridge_vlan_lookup(vid, port);
if (!vlan)
return;
- mlx5_esw_bridge_vlan_cleanup(port, vlan, vport->bridge);
+ mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge);
}
-void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw,
- struct mlx5_vport *vport,
- struct switchdev_notifier_fdb_info *fdb_info)
+void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct switchdev_notifier_fdb_info *fdb_info)
{
- struct mlx5_esw_bridge *bridge = vport->bridge;
struct mlx5_esw_bridge_fdb_entry *entry;
- u16 vport_num = vport->vport;
+ struct mlx5_esw_bridge_fdb_key key;
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
- if (!bridge) {
- esw_info(esw->dev, "Vport is not assigned to bridge (vport=%u)\n", vport_num);
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port || port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER)
+ return;
+
+ bridge = port->bridge;
+ ether_addr_copy(key.addr, fdb_info->addr);
+ key.vid = fdb_info->vid;
+ entry = rhashtable_lookup_fast(&bridge->fdb_ht, &key, fdb_ht_params);
+ if (!entry) {
+ esw_debug(br_offloads->esw->dev,
+ "FDB entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n",
+ key.addr, key.vid, vport_num);
return;
}
- entry = mlx5_esw_bridge_fdb_entry_init(dev, vport_num, fdb_info->addr, fdb_info->vid,
- fdb_info->added_by_user, esw, bridge);
+ entry->lastuse = jiffies;
+}
+
+void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct switchdev_notifier_fdb_info *fdb_info)
+{
+ struct mlx5_esw_bridge_fdb_entry *entry;
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return;
+
+ bridge = port->bridge;
+ entry = mlx5_esw_bridge_fdb_entry_init(dev, vport_num, esw_owner_vhca_id, fdb_info->addr,
+ fdb_info->vid, fdb_info->added_by_user,
+ port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER,
+ br_offloads->esw, bridge);
if (IS_ERR(entry))
return;
if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)
mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid,
SWITCHDEV_FDB_OFFLOADED);
- else
+ else if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_PEER))
/* Take over dynamic entries to prevent kernel bridge from aging them out. */
mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid,
SWITCHDEV_FDB_ADD_TO_BRIDGE);
}
-void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw,
- struct mlx5_vport *vport,
+void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
struct switchdev_notifier_fdb_info *fdb_info)
{
- struct mlx5_esw_bridge *bridge = vport->bridge;
+ struct mlx5_eswitch *esw = br_offloads->esw;
struct mlx5_esw_bridge_fdb_entry *entry;
struct mlx5_esw_bridge_fdb_key key;
- u16 vport_num = vport->vport;
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
- if (!bridge) {
- esw_warn(esw->dev, "Vport is not assigned to bridge (vport=%u)\n", vport_num);
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
return;
- }
+ bridge = port->bridge;
ether_addr_copy(key.addr, fdb_info->addr);
key.vid = fdb_info->vid;
entry = rhashtable_lookup_fast(&bridge->fdb_ht, &key, fdb_ht_params);
@@ -1225,9 +1341,7 @@ void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw
return;
}
- if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER))
- mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid,
- SWITCHDEV_FDB_DEL_TO_BRIDGE);
+ mlx5_esw_bridge_fdb_del_notify(entry);
mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
}
@@ -1245,11 +1359,10 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads)
continue;
if (time_after(lastuse, entry->lastuse)) {
- mlx5_esw_bridge_fdb_entry_refresh(lastuse, entry);
- } else if (time_is_before_jiffies(entry->lastuse + bridge->ageing_time)) {
- mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
- entry->key.vid,
- SWITCHDEV_FDB_DEL_TO_BRIDGE);
+ mlx5_esw_bridge_fdb_entry_refresh(entry);
+ } else if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_PEER) &&
+ time_is_before_jiffies(entry->lastuse + bridge->ageing_time)) {
+ mlx5_esw_bridge_fdb_del_notify(entry);
mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
}
}
@@ -1258,13 +1371,11 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads)
static void mlx5_esw_bridge_flush(struct mlx5_esw_bridge_offloads *br_offloads)
{
- struct mlx5_eswitch *esw = br_offloads->esw;
- struct mlx5_vport *vport;
+ struct mlx5_esw_bridge_port *port;
unsigned long i;
- mlx5_esw_for_each_vport(esw, i, vport)
- if (vport->bridge)
- mlx5_esw_bridge_vport_cleanup(br_offloads, vport);
+ xa_for_each(&br_offloads->ports, i, port)
+ mlx5_esw_bridge_vport_cleanup(br_offloads, port);
WARN_ONCE(!list_empty(&br_offloads->bridges),
"Cleaning up bridge offloads while still having bridges attached\n");
@@ -1279,6 +1390,7 @@ struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&br_offloads->bridges);
+ xa_init(&br_offloads->ports);
br_offloads->esw = esw;
esw->br_offloads = br_offloads;
@@ -1293,6 +1405,7 @@ void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw)
return;
mlx5_esw_bridge_flush(br_offloads);
+ WARN_ON(!xa_empty(&br_offloads->ports));
esw->br_offloads = NULL;
kvfree(br_offloads);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
index d826942b27fc..efc39975226e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
@@ -7,6 +7,7 @@
#include <linux/notifier.h>
#include <linux/list.h>
#include <linux/workqueue.h>
+#include <linux/xarray.h>
#include "eswitch.h"
struct mlx5_flow_table;
@@ -15,6 +16,8 @@ struct mlx5_flow_group;
struct mlx5_esw_bridge_offloads {
struct mlx5_eswitch *esw;
struct list_head bridges;
+ struct xarray ports;
+
struct notifier_block netdev_nb;
struct notifier_block nb_blk;
struct notifier_block nb;
@@ -31,23 +34,36 @@ struct mlx5_esw_bridge_offloads {
struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw);
void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw);
-int mlx5_esw_bridge_vport_link(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads,
- struct mlx5_vport *vport, struct netlink_ext_ack *extack);
-int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads,
- struct mlx5_vport *vport, struct netlink_ext_ack *extack);
-void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw,
- struct mlx5_vport *vport,
+int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct switchdev_notifier_fdb_info *fdb_info);
+void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
struct switchdev_notifier_fdb_info *fdb_info);
-void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw,
- struct mlx5_vport *vport,
+void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
struct switchdev_notifier_fdb_info *fdb_info);
void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads);
-int mlx5_esw_bridge_ageing_time_set(unsigned long ageing_time, struct mlx5_eswitch *esw,
- struct mlx5_vport *vport);
-int mlx5_esw_bridge_vlan_filtering_set(bool enable, struct mlx5_eswitch *esw,
- struct mlx5_vport *vport);
-int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw,
- struct mlx5_vport *vport, struct netlink_ext_ack *extack);
-void mlx5_esw_bridge_port_vlan_del(u16 vid, struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time,
+ struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable,
+ struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid,
+ struct mlx5_esw_bridge_offloads *br_offloads);
#endif /* __MLX5_ESW_BRIDGE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
index d9ab2e8bc2cb..52964a82d6a6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
@@ -19,6 +19,11 @@ struct mlx5_esw_bridge_fdb_key {
enum {
MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0),
+ MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1),
+};
+
+enum {
+ MLX5_ESW_BRIDGE_PORT_FLAG_PEER = BIT(0),
};
struct mlx5_esw_bridge_fdb_entry {
@@ -28,6 +33,7 @@ struct mlx5_esw_bridge_fdb_entry {
struct list_head list;
struct list_head vlan_list;
u16 vport_num;
+ u16 esw_owner_vhca_id;
u16 flags;
struct mlx5_flow_handle *ingress_handle;
@@ -47,6 +53,9 @@ struct mlx5_esw_bridge_vlan {
struct mlx5_esw_bridge_port {
u16 vport_num;
+ u16 esw_owner_vhca_id;
+ u16 flags;
+ struct mlx5_esw_bridge *bridge;
struct xarray vlans;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index 1703384eca95..20af557ae30c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -91,9 +91,15 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_
if (err)
goto reg_err;
+ err = devlink_rate_leaf_create(dl_port, vport);
+ if (err)
+ goto rate_err;
+
vport->dl_port = dl_port;
return 0;
+rate_err:
+ devlink_port_unregister(dl_port);
reg_err:
mlx5_esw_dl_port_free(dl_port);
return err;
@@ -109,6 +115,12 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo
vport = mlx5_eswitch_get_vport(esw, vport_num);
if (IS_ERR(vport))
return;
+
+ if (vport->dl_port->devlink_rate) {
+ mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL);
+ devlink_rate_leaf_destroy(vport->dl_port);
+ }
+
devlink_port_unregister(vport->dl_port);
mlx5_esw_dl_port_free(vport->dl_port);
vport->dl_port = NULL;
@@ -148,8 +160,16 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p
if (err)
return err;
+ err = devlink_rate_leaf_create(dl_port, vport);
+ if (err)
+ goto rate_err;
+
vport->dl_port = dl_port;
return 0;
+
+rate_err:
+ devlink_port_unregister(dl_port);
+ return err;
}
void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num)
@@ -159,6 +179,12 @@ void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num
vport = mlx5_eswitch_get_vport(esw, vport_num);
if (IS_ERR(vport))
return;
+
+ if (vport->dl_port->devlink_rate) {
+ mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL);
+ devlink_rate_leaf_destroy(vport->dl_port);
+ }
+
devlink_port_unregister(vport->dl_port);
vport->dl_port = NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
index 227964b7d3b9..3401188e0a60 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
@@ -85,11 +85,18 @@ DECLARE_EVENT_CLASS(mlx5_esw_bridge_port_template,
TP_ARGS(port),
TP_STRUCT__entry(
__field(u16, vport_num)
+ __field(u16, esw_owner_vhca_id)
+ __field(u16, flags)
),
TP_fast_assign(
__entry->vport_num = port->vport_num;
+ __entry->esw_owner_vhca_id = port->esw_owner_vhca_id;
+ __entry->flags = port->flags;
),
- TP_printk("vport_num=%hu", __entry->vport_num)
+ TP_printk("vport_num=%hu esw_owner_vhca_id=%hu flags=%hx",
+ __entry->vport_num,
+ __entry->esw_owner_vhca_id,
+ __entry->flags)
);
DEFINE_EVENT(mlx5_esw_bridge_port_template,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h
new file mode 100644
index 000000000000..458baf0c6415
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_ESW_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_ESW_TP_
+
+#include <linux/tracepoint.h>
+#include "eswitch.h"
+
+TRACE_EVENT(mlx5_esw_vport_qos_destroy,
+ TP_PROTO(const struct mlx5_vport *vport),
+ TP_ARGS(vport),
+ TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device))
+ __field(unsigned short, vport_id)
+ __field(unsigned int, tsar_ix)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device));
+ __entry->vport_id = vport->vport;
+ __entry->tsar_ix = vport->qos.esw_tsar_ix;
+ ),
+ TP_printk("(%s) vport=%hu tsar_ix=%u\n",
+ __get_str(devname), __entry->vport_id, __entry->tsar_ix
+ )
+);
+
+DECLARE_EVENT_CLASS(mlx5_esw_vport_qos_template,
+ TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate),
+ TP_ARGS(vport, bw_share, max_rate),
+ TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device))
+ __field(unsigned short, vport_id)
+ __field(unsigned int, tsar_ix)
+ __field(unsigned int, bw_share)
+ __field(unsigned int, max_rate)
+ __field(void *, group)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device));
+ __entry->vport_id = vport->vport;
+ __entry->tsar_ix = vport->qos.esw_tsar_ix;
+ __entry->bw_share = bw_share;
+ __entry->max_rate = max_rate;
+ __entry->group = vport->qos.group;
+ ),
+ TP_printk("(%s) vport=%hu tsar_ix=%u bw_share=%u, max_rate=%u group=%p\n",
+ __get_str(devname), __entry->vport_id, __entry->tsar_ix,
+ __entry->bw_share, __entry->max_rate, __entry->group
+ )
+);
+
+DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_create,
+ TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate),
+ TP_ARGS(vport, bw_share, max_rate)
+ );
+
+DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_config,
+ TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate),
+ TP_ARGS(vport, bw_share, max_rate)
+ );
+
+DECLARE_EVENT_CLASS(mlx5_esw_group_qos_template,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_esw_rate_group *group,
+ unsigned int tsar_ix),
+ TP_ARGS(dev, group, tsar_ix),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(const void *, group)
+ __field(unsigned int, tsar_ix)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->group = group;
+ __entry->tsar_ix = tsar_ix;
+ ),
+ TP_printk("(%s) group=%p tsar_ix=%u\n",
+ __get_str(devname), __entry->group, __entry->tsar_ix
+ )
+);
+
+DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_create,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_esw_rate_group *group,
+ unsigned int tsar_ix),
+ TP_ARGS(dev, group, tsar_ix)
+ );
+
+DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_destroy,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_esw_rate_group *group,
+ unsigned int tsar_ix),
+ TP_ARGS(dev, group, tsar_ix)
+ );
+
+TRACE_EVENT(mlx5_esw_group_qos_config,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_esw_rate_group *group,
+ unsigned int tsar_ix, u32 bw_share, u32 max_rate),
+ TP_ARGS(dev, group, tsar_ix, bw_share, max_rate),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(const void *, group)
+ __field(unsigned int, tsar_ix)
+ __field(unsigned int, bw_share)
+ __field(unsigned int, max_rate)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->group = group;
+ __entry->tsar_ix = tsar_ix;
+ __entry->bw_share = bw_share;
+ __entry->max_rate = max_rate;
+ ),
+ TP_printk("(%s) group=%p tsar_ix=%u bw_share=%u max_rate=%u\n",
+ __get_str(devname), __entry->group, __entry->tsar_ix,
+ __entry->bw_share, __entry->max_rate
+ )
+);
+#endif /* _MLX5_ESW_TP_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH esw/diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE qos_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
index 3da7becc1069..425c91814b34 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
@@ -364,6 +364,7 @@ static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw,
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest.vport.num = e->vport;
dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
+ dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1);
if (IS_ERR(e->fwd_rule)) {
mlx5_destroy_flow_group(e->fwd_grp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
index d9041b16611d..df277a6cddc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
@@ -11,6 +11,7 @@
#include "mlx5_core.h"
#include "eswitch.h"
#include "fs_core.h"
+#include "esw/qos.h"
enum {
LEGACY_VEPA_PRIO = 0,
@@ -508,3 +509,22 @@ unlock:
mutex_unlock(&esw->state_lock);
return err;
}
+
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
+ u32 max_rate, u32 min_rate)
+{
+ struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+ int err;
+
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+ if (IS_ERR(evport))
+ return PTR_ERR(evport);
+
+ mutex_lock(&esw->state_lock);
+ err = mlx5_esw_qos_set_vport_min_rate(esw, evport, min_rate, NULL);
+ if (!err)
+ err = mlx5_esw_qos_set_vport_max_rate(esw, evport, max_rate, NULL);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
new file mode 100644
index 000000000000..985e305179d1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -0,0 +1,869 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "eswitch.h"
+#include "esw/qos.h"
+#include "en/port.h"
+#define CREATE_TRACE_POINTS
+#include "diag/qos_tracepoint.h"
+
+/* Minimum supported BW share value by the HW is 1 Mbit/sec */
+#define MLX5_MIN_BW_SHARE 1
+
+#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
+ min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
+
+struct mlx5_esw_rate_group {
+ u32 tsar_ix;
+ u32 max_rate;
+ u32 min_rate;
+ u32 bw_share;
+ struct list_head list;
+};
+
+static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
+ u32 parent_ix, u32 tsar_ix,
+ u32 max_rate, u32 bw_share)
+{
+ u32 bitmask = 0;
+
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_ix);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
+
+ return mlx5_modify_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ sched_ctx,
+ tsar_ix,
+ bitmask);
+}
+
+static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
+ u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ int err;
+
+ err = esw_qos_tsar_config(dev, sched_ctx,
+ esw->qos.root_tsar_ix, group->tsar_ix,
+ max_rate, bw_share);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
+
+ trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
+
+ return err;
+}
+
+static int esw_qos_vport_config(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share,
+ struct netlink_ext_ack *extack)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_esw_rate_group *group = vport->qos.group;
+ struct mlx5_core_dev *dev = esw->dev;
+ u32 parent_tsar_ix;
+ void *vport_elem;
+ int err;
+
+ if (!vport->qos.enabled)
+ return -EIO;
+
+ parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+ vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
+ element_attributes);
+ MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
+
+ err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix,
+ max_rate, bw_share);
+ if (err) {
+ esw_warn(esw->dev,
+ "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
+ vport->vport, err);
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
+ return err;
+ }
+
+ trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
+
+ return 0;
+}
+
+static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ bool group_level)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ struct mlx5_vport *evport;
+ u32 max_guarantee = 0;
+ unsigned long i;
+
+ if (group_level) {
+ struct mlx5_esw_rate_group *group;
+
+ list_for_each_entry(group, &esw->qos.groups, list) {
+ if (group->min_rate < max_guarantee)
+ continue;
+ max_guarantee = group->min_rate;
+ }
+ } else {
+ mlx5_esw_for_each_vport(esw, i, evport) {
+ if (!evport->enabled || !evport->qos.enabled ||
+ evport->qos.group != group || evport->qos.min_rate < max_guarantee)
+ continue;
+ max_guarantee = evport->qos.min_rate;
+ }
+ }
+
+ if (max_guarantee)
+ return max_t(u32, max_guarantee / fw_max_bw_share, 1);
+
+ /* If vports min rate divider is 0 but their group has bw_share configured, then
+ * need to set bw_share for vports to minimal value.
+ */
+ if (!group_level && !max_guarantee && group->bw_share)
+ return 1;
+ return 0;
+}
+
+static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
+{
+ if (divider)
+ return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
+
+ return 0;
+}
+
+static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
+ struct mlx5_vport *evport;
+ unsigned long i;
+ u32 bw_share;
+ int err;
+
+ mlx5_esw_for_each_vport(esw, i, evport) {
+ if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
+ continue;
+ bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
+
+ if (bw_share == evport->qos.bw_share)
+ continue;
+
+ err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
+ if (err)
+ return err;
+
+ evport->qos.bw_share = bw_share;
+ }
+
+ return 0;
+}
+
+static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
+ struct netlink_ext_ack *extack)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ struct mlx5_esw_rate_group *group;
+ u32 bw_share;
+ int err;
+
+ list_for_each_entry(group, &esw->qos.groups, list) {
+ bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
+
+ if (bw_share == group->bw_share)
+ continue;
+
+ err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
+ if (err)
+ return err;
+
+ group->bw_share = bw_share;
+
+ /* All the group's vports need to be set with default bw_share
+ * to enable them with QOS
+ */
+ err = esw_qos_normalize_vports_min_rate(esw, group, extack);
+
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
+ struct mlx5_vport *evport,
+ u32 min_rate,
+ struct netlink_ext_ack *extack)
+{
+ u32 fw_max_bw_share, previous_min_rate;
+ bool min_rate_supported;
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
+ fw_max_bw_share >= MLX5_MIN_BW_SHARE;
+ if (min_rate && !min_rate_supported)
+ return -EOPNOTSUPP;
+ if (min_rate == evport->qos.min_rate)
+ return 0;
+
+ previous_min_rate = evport->qos.min_rate;
+ evport->qos.min_rate = min_rate;
+ err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
+ if (err)
+ evport->qos.min_rate = previous_min_rate;
+
+ return err;
+}
+
+int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
+ struct mlx5_vport *evport,
+ u32 max_rate,
+ struct netlink_ext_ack *extack)
+{
+ u32 act_max_rate = max_rate;
+ bool max_rate_supported;
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
+
+ if (max_rate && !max_rate_supported)
+ return -EOPNOTSUPP;
+ if (max_rate == evport->qos.max_rate)
+ return 0;
+
+ /* If parent group has rate limit need to set to group
+ * value when new max rate is 0.
+ */
+ if (evport->qos.group && !max_rate)
+ act_max_rate = evport->qos.group->max_rate;
+
+ err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
+
+ if (!err)
+ evport->qos.max_rate = max_rate;
+
+ return err;
+}
+
+static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
+ u32 min_rate, struct netlink_ext_ack *extack)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ struct mlx5_core_dev *dev = esw->dev;
+ u32 previous_min_rate, divider;
+ int err;
+
+ if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
+ return -EOPNOTSUPP;
+
+ if (min_rate == group->min_rate)
+ return 0;
+
+ previous_min_rate = group->min_rate;
+ group->min_rate = min_rate;
+ divider = esw_qos_calculate_min_rate_divider(esw, group, true);
+ err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
+ if (err) {
+ group->min_rate = previous_min_rate;
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
+
+ /* Attempt restoring previous configuration */
+ divider = esw_qos_calculate_min_rate_divider(esw, group, true);
+ if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
+ }
+
+ return err;
+}
+
+static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ u32 max_rate, struct netlink_ext_ack *extack)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+ int err;
+
+ if (group->max_rate == max_rate)
+ return 0;
+
+ err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
+ if (err)
+ return err;
+
+ group->max_rate = max_rate;
+
+ /* Any unlimited vports in the group should be set
+ * with the value of the group.
+ */
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ if (!vport->enabled || !vport->qos.enabled ||
+ vport->qos.group != group || vport->qos.max_rate)
+ continue;
+
+ err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack,
+ "E-Switch vport implicit rate limit setting failed");
+ }
+
+ return err;
+}
+
+static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_esw_rate_group *group = vport->qos.group;
+ struct mlx5_core_dev *dev = esw->dev;
+ u32 parent_tsar_ix;
+ void *vport_elem;
+ int err;
+
+ parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+ vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
+ MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+
+ err = mlx5_create_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ sched_ctx,
+ &vport->qos.esw_tsar_ix);
+ if (err) {
+ esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
+ vport->vport, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *curr_group,
+ struct mlx5_esw_rate_group *new_group,
+ struct netlink_ext_ack *extack)
+{
+ u32 max_rate;
+ int err;
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ vport->qos.esw_tsar_ix);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
+ return err;
+ }
+
+ vport->qos.group = new_group;
+ max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
+
+ /* If vport is unlimited, we set the group's value.
+ * Therefore, if the group is limited it will apply to
+ * the vport as well and if not, vport will remain unlimited.
+ */
+ err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
+ goto err_sched;
+ }
+
+ return 0;
+
+err_sched:
+ vport->qos.group = curr_group;
+ max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
+ if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
+ esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
+ vport->vport);
+
+ return err;
+}
+
+static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *new_group, *curr_group;
+ int err;
+
+ if (!vport->enabled)
+ return -EINVAL;
+
+ curr_group = vport->qos.group;
+ new_group = group ?: esw->qos.group0;
+ if (curr_group == new_group)
+ return 0;
+
+ err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
+ if (err)
+ return err;
+
+ /* Recalculate bw share weights of old and new groups */
+ if (vport->qos.bw_share) {
+ esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
+ esw_qos_normalize_vports_min_rate(esw, new_group, extack);
+ }
+
+ return 0;
+}
+
+static struct mlx5_esw_rate_group *
+esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+{
+ u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_esw_rate_group *group;
+ u32 divider;
+ int err;
+
+ if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ group = kzalloc(sizeof(*group), GFP_KERNEL);
+ if (!group)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
+ esw->qos.root_tsar_ix);
+ err = mlx5_create_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ tsar_ctx,
+ &group->tsar_ix);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
+ goto err_sched_elem;
+ }
+
+ list_add_tail(&group->list, &esw->qos.groups);
+
+ divider = esw_qos_calculate_min_rate_divider(esw, group, true);
+ if (divider) {
+ err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
+ goto err_min_rate;
+ }
+ }
+ trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
+
+ return group;
+
+err_min_rate:
+ list_del(&group->list);
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ group->tsar_ix);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
+err_sched_elem:
+ kfree(group);
+ return ERR_PTR(err);
+}
+
+static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ u32 divider;
+ int err;
+
+ list_del(&group->list);
+
+ divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
+ err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ group->tsar_ix);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
+
+ trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
+ kfree(group);
+ return err;
+}
+
+static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
+{
+ switch (type) {
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_TASR;
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_VPORT;
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_VPORT_TC;
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
+ }
+ return false;
+}
+
+void mlx5_esw_qos_create(struct mlx5_eswitch *esw)
+{
+ u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ __be32 *attr;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+ return;
+
+ if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
+ return;
+
+ mutex_lock(&esw->state_lock);
+ if (esw->qos.enabled)
+ goto unlock;
+
+ MLX5_SET(scheduling_context, tsar_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+
+ attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
+ *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
+
+ err = mlx5_create_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ tsar_ctx,
+ &esw->qos.root_tsar_ix);
+ if (err) {
+ esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
+ goto unlock;
+ }
+
+ INIT_LIST_HEAD(&esw->qos.groups);
+ if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
+ esw->qos.group0 = esw_qos_create_rate_group(esw, NULL);
+ if (IS_ERR(esw->qos.group0)) {
+ esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
+ PTR_ERR(esw->qos.group0));
+ goto err_group0;
+ }
+ }
+ esw->qos.enabled = true;
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return;
+
+err_group0:
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ esw->qos.root_tsar_ix);
+ if (err)
+ esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
+ mutex_unlock(&esw->state_lock);
+}
+
+void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw)
+{
+ struct devlink *devlink = priv_to_devlink(esw->dev);
+ int err;
+
+ devlink_rate_nodes_destroy(devlink);
+ mutex_lock(&esw->state_lock);
+ if (!esw->qos.enabled)
+ goto unlock;
+
+ if (esw->qos.group0)
+ esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ esw->qos.root_tsar_ix);
+ if (err)
+ esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
+
+ esw->qos.enabled = false;
+unlock:
+ mutex_unlock(&esw->state_lock);
+}
+
+int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share)
+{
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ if (!esw->qos.enabled)
+ return 0;
+
+ if (vport->qos.enabled)
+ return -EEXIST;
+
+ vport->qos.group = esw->qos.group0;
+
+ err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
+ if (!err) {
+ vport->qos.enabled = true;
+ trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
+ }
+
+ return err;
+}
+
+void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ if (!esw->qos.enabled || !vport->qos.enabled)
+ return;
+ WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
+ "Disabling QoS on port before detaching it from group");
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ vport->qos.esw_tsar_ix);
+ if (err)
+ esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
+ vport->vport, err);
+
+ vport->qos.enabled = false;
+ trace_mlx5_esw_vport_qos_destroy(vport);
+}
+
+int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
+{
+ u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_vport *vport;
+ u32 bitmask;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ if (!vport->qos.enabled)
+ return -EOPNOTSUPP;
+
+ MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
+ bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+
+ return mlx5_modify_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ ctx,
+ vport->qos.esw_tsar_ix,
+ bitmask);
+}
+
+#define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
+
+/* Converts bytes per second value passed in a pointer into megabits per
+ * second, rewriting last. If converted rate exceed link speed or is not a
+ * fraction of Mbps - returns error.
+ */
+static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
+ u64 *rate, struct netlink_ext_ack *extack)
+{
+ u32 link_speed_max, reminder;
+ u64 value;
+ int err;
+
+ err = mlx5e_port_max_linkspeed(mdev, &link_speed_max);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
+ return err;
+ }
+
+ value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder);
+ if (reminder) {
+ pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
+ name, *rate);
+ NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
+ return -EINVAL;
+ }
+
+ if (value > link_speed_max) {
+ pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
+ name, value, link_speed_max);
+ NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
+ return -EINVAL;
+ }
+
+ *rate = value;
+ return 0;
+}
+
+/* Eswitch devlink rate API */
+
+int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack)
+{
+ struct mlx5_vport *vport = priv;
+ struct mlx5_eswitch *esw;
+ int err;
+
+ esw = vport->dev->priv.eswitch;
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+
+ err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ err = mlx5_esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack)
+{
+ struct mlx5_vport *vport = priv;
+ struct mlx5_eswitch *esw;
+ int err;
+
+ esw = vport->dev->priv.eswitch;
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+
+ err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ err = mlx5_esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_esw_rate_group *group = priv;
+ int err;
+
+ err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_esw_rate_group *group = priv;
+ int err;
+
+ err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group;
+ struct mlx5_eswitch *esw;
+ int err = 0;
+
+ esw = mlx5_devlink_eswitch_get(rate_node->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ mutex_lock(&esw->state_lock);
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Rate node creation supported only in switchdev mode");
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
+ group = esw_qos_create_rate_group(esw, extack);
+ if (IS_ERR(group)) {
+ err = PTR_ERR(group);
+ goto unlock;
+ }
+
+ *priv = group;
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group = priv;
+ struct mlx5_eswitch *esw;
+ int err;
+
+ esw = mlx5_devlink_eswitch_get(rate_node->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_destroy_rate_group(esw, group, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_vport_update_group(esw, vport, group, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
+ struct devlink_rate *parent,
+ void *priv, void *parent_priv,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group;
+ struct mlx5_vport *vport = priv;
+
+ if (!parent)
+ return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
+ vport, NULL, extack);
+
+ group = parent_priv;
+ return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
new file mode 100644
index 000000000000..28451abe2d2f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_ESW_QOS_H__
+#define __MLX5_ESW_QOS_H__
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
+ struct mlx5_vport *evport,
+ u32 min_rate,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
+ struct mlx5_vport *evport,
+ u32 max_rate,
+ struct netlink_ext_ack *extack);
+void mlx5_esw_qos_create(struct mlx5_eswitch *esw);
+void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw);
+int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share);
+void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
+int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
+ struct devlink_rate *parent,
+ void *priv, void *parent_priv,
+ struct netlink_ext_ack *extack);
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h
deleted file mode 100644
index 2a3f4be10030..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2021 Mellanox Technologies. */
-
-#ifndef __MLX5_EN_TC_SAMPLE_H__
-#define __MLX5_EN_TC_SAMPLE_H__
-
-#include "en.h"
-#include "eswitch.h"
-
-struct mlx5e_priv;
-struct mlx5_flow_attr;
-struct mlx5_esw_psample;
-
-struct mlx5_sample_attr {
- u32 group_num;
- u32 rate;
- u32 trunc_size;
- u32 restore_obj_id;
- u32 sampler_id;
- struct mlx5_flow_table *sample_default_tbl;
- struct mlx5_sample_flow *sample_flow;
-};
-
-void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj);
-
-struct mlx5_flow_handle *
-mlx5_esw_sample_offload(struct mlx5_esw_psample *sample_priv,
- struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr);
-
-void
-mlx5_esw_sample_unoffload(struct mlx5_esw_psample *sample_priv,
- struct mlx5_flow_handle *rule,
- struct mlx5_flow_attr *attr);
-
-struct mlx5_esw_psample *
-mlx5_esw_sample_init(struct mlx5e_priv *priv);
-
-void
-mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample);
-
-#endif /* __MLX5_EN_TC_SAMPLE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 97e6cb6f13c1..ec136b499204 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -38,6 +38,7 @@
#include <linux/mlx5/mpfs.h>
#include "esw/acl/lgcy.h"
#include "esw/legacy.h"
+#include "esw/qos.h"
#include "mlx5_core.h"
#include "lib/eq.h"
#include "eswitch.h"
@@ -740,201 +741,6 @@ static void esw_vport_change_handler(struct work_struct *work)
mutex_unlock(&esw->state_lock);
}
-static bool element_type_supported(struct mlx5_eswitch *esw, int type)
-{
- const struct mlx5_core_dev *dev = esw->dev;
-
- switch (type) {
- case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
- return MLX5_CAP_QOS(dev, esw_element_type) &
- ELEMENT_TYPE_CAP_MASK_TASR;
- case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
- return MLX5_CAP_QOS(dev, esw_element_type) &
- ELEMENT_TYPE_CAP_MASK_VPORT;
- case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
- return MLX5_CAP_QOS(dev, esw_element_type) &
- ELEMENT_TYPE_CAP_MASK_VPORT_TC;
- case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
- return MLX5_CAP_QOS(dev, esw_element_type) &
- ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
- }
- return false;
-}
-
-/* Vport QoS management */
-static void esw_create_tsar(struct mlx5_eswitch *esw)
-{
- u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
- struct mlx5_core_dev *dev = esw->dev;
- __be32 *attr;
- int err;
-
- if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
- return;
-
- if (!element_type_supported(esw, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
- return;
-
- if (esw->qos.enabled)
- return;
-
- MLX5_SET(scheduling_context, tsar_ctx, element_type,
- SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
-
- attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
- *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
-
- err = mlx5_create_scheduling_element_cmd(dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- tsar_ctx,
- &esw->qos.root_tsar_id);
- if (err) {
- esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err);
- return;
- }
-
- esw->qos.enabled = true;
-}
-
-static void esw_destroy_tsar(struct mlx5_eswitch *esw)
-{
- int err;
-
- if (!esw->qos.enabled)
- return;
-
- err = mlx5_destroy_scheduling_element_cmd(esw->dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- esw->qos.root_tsar_id);
- if (err)
- esw_warn(esw->dev, "E-Switch destroy TSAR failed (%d)\n", err);
-
- esw->qos.enabled = false;
-}
-
-static int esw_vport_enable_qos(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport,
- u32 initial_max_rate, u32 initial_bw_share)
-{
- u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
- struct mlx5_core_dev *dev = esw->dev;
- void *vport_elem;
- int err = 0;
-
- if (!esw->qos.enabled)
- return 0;
-
- if (vport->qos.enabled)
- return -EEXIST;
-
- MLX5_SET(scheduling_context, sched_ctx, element_type,
- SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
- vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
- element_attributes);
- MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
- MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
- esw->qos.root_tsar_id);
- MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
- initial_max_rate);
- MLX5_SET(scheduling_context, sched_ctx, bw_share, initial_bw_share);
-
- err = mlx5_create_scheduling_element_cmd(dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- sched_ctx,
- &vport->qos.esw_tsar_ix);
- if (err) {
- esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
- vport->vport, err);
- return err;
- }
-
- vport->qos.enabled = true;
- return 0;
-}
-
-static void esw_vport_disable_qos(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- int err;
-
- if (!vport->qos.enabled)
- return;
-
- err = mlx5_destroy_scheduling_element_cmd(esw->dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- vport->qos.esw_tsar_ix);
- if (err)
- esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
- vport->vport, err);
-
- vport->qos.enabled = false;
-}
-
-static int esw_vport_qos_config(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport,
- u32 max_rate, u32 bw_share)
-{
- u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
- struct mlx5_core_dev *dev = esw->dev;
- void *vport_elem;
- u32 bitmask = 0;
- int err = 0;
-
- if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
- return -EOPNOTSUPP;
-
- if (!vport->qos.enabled)
- return -EIO;
-
- MLX5_SET(scheduling_context, sched_ctx, element_type,
- SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
- vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
- element_attributes);
- MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
- MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
- esw->qos.root_tsar_id);
- MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
- max_rate);
- MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
- bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
- bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
-
- err = mlx5_modify_scheduling_element_cmd(dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- sched_ctx,
- vport->qos.esw_tsar_ix,
- bitmask);
- if (err) {
- esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
- vport->vport, err);
- return err;
- }
-
- return 0;
-}
-
-int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
- u32 rate_mbps)
-{
- u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
- struct mlx5_vport *vport;
-
- vport = mlx5_eswitch_get_vport(esw, vport_num);
- if (IS_ERR(vport))
- return PTR_ERR(vport);
-
- if (!vport->qos.enabled)
- return -EOPNOTSUPP;
-
- MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
-
- return mlx5_modify_scheduling_element_cmd(esw->dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- ctx,
- vport->qos.esw_tsar_ix,
- MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW);
-}
-
static void node_guid_gen_from_mac(u64 *node_guid, const u8 *mac)
{
((u8 *)node_guid)[7] = mac[0];
@@ -976,7 +782,7 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
return err;
/* Attach vport to the eswitch rate limiter */
- esw_vport_enable_qos(esw, vport, vport->qos.max_rate, vport->qos.bw_share);
+ mlx5_esw_qos_vport_enable(esw, vport, vport->qos.max_rate, vport->qos.bw_share);
if (mlx5_esw_is_manager_vport(esw, vport_num))
return 0;
@@ -1013,7 +819,7 @@ static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport
vport_num, 1,
MLX5_VPORT_ADMIN_STATE_DOWN);
- esw_vport_disable_qos(esw, vport);
+ mlx5_esw_qos_vport_disable(esw, vport);
esw_vport_cleanup_acl(esw, vport);
}
@@ -1454,12 +1260,10 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs)
mlx5_eswitch_update_num_of_vfs(esw, num_vfs);
- esw_create_tsar(esw);
+ mlx5_esw_qos_create(esw);
esw->mode = mode;
- mlx5_lag_update(esw->dev);
-
if (mode == MLX5_ESWITCH_LEGACY) {
err = esw_legacy_enable(esw);
} else {
@@ -1486,7 +1290,7 @@ abort:
if (mode == MLX5_ESWITCH_OFFLOADS)
mlx5_rescan_drivers(esw->dev);
- esw_destroy_tsar(esw);
+ mlx5_esw_qos_destroy(esw);
mlx5_esw_acls_ns_cleanup(esw);
return err;
}
@@ -1494,7 +1298,7 @@ abort:
/**
* mlx5_eswitch_enable - Enable eswitch
* @esw: Pointer to eswitch
- * @num_vfs: Enable eswitch swich for given number of VFs.
+ * @num_vfs: Enable eswitch switch for given number of VFs.
* Caller must pass num_vfs > 0 when enabling eswitch for
* vf vports.
* mlx5_eswitch_enable() returns 0 on success or error code on failure.
@@ -1506,6 +1310,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
if (!mlx5_esw_allowed(esw))
return 0;
+ mlx5_lag_disable_change(esw->dev);
down_write(&esw->mode_lock);
if (esw->mode == MLX5_ESWITCH_NONE) {
ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs);
@@ -1519,6 +1324,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
esw->esw_funcs.num_vfs = num_vfs;
}
up_write(&esw->mode_lock);
+ mlx5_lag_enable_change(esw->dev);
return ret;
}
@@ -1550,12 +1356,10 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf)
old_mode = esw->mode;
esw->mode = MLX5_ESWITCH_NONE;
- mlx5_lag_update(esw->dev);
-
if (old_mode == MLX5_ESWITCH_OFFLOADS)
mlx5_rescan_drivers(esw->dev);
- esw_destroy_tsar(esw);
+ mlx5_esw_qos_destroy(esw);
mlx5_esw_acls_ns_cleanup(esw);
if (clear_vf)
@@ -1567,10 +1371,12 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
if (!mlx5_esw_allowed(esw))
return;
+ mlx5_lag_disable_change(esw->dev);
down_write(&esw->mode_lock);
mlx5_eswitch_disable_locked(esw, clear_vf);
esw->esw_funcs.num_vfs = 0;
up_write(&esw->mode_lock);
+ mlx5_lag_enable_change(esw->dev);
}
static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out)
@@ -1759,7 +1565,9 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
ida_init(&esw->offloads.vport_metadata_ida);
xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
mutex_init(&esw->state_lock);
+ lockdep_register_key(&esw->mode_lock_key);
init_rwsem(&esw->mode_lock);
+ lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key);
esw->enabled_vports = 0;
esw->mode = MLX5_ESWITCH_NONE;
@@ -1793,6 +1601,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
+ lockdep_unregister_key(&esw->mode_lock_key);
mutex_destroy(&esw->state_lock);
WARN_ON(!xa_empty(&esw->offloads.vhca_map));
xa_destroy(&esw->offloads.vhca_map);
@@ -1889,8 +1698,7 @@ is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num)
mlx5_esw_is_sf_vport(esw, vport_num);
}
-int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
- struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
u8 *hw_addr, int *hw_addr_len,
struct netlink_ext_ack *extack)
{
@@ -1899,7 +1707,7 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
int err = -EOPNOTSUPP;
u16 vport_num;
- esw = mlx5_devlink_eswitch_get(devlink);
+ esw = mlx5_devlink_eswitch_get(port->devlink);
if (IS_ERR(esw))
return PTR_ERR(esw);
@@ -1923,8 +1731,7 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
return err;
}
-int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink,
- struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
const u8 *hw_addr, int hw_addr_len,
struct netlink_ext_ack *extack)
{
@@ -1933,7 +1740,7 @@ int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink,
int err = -EOPNOTSUPP;
u16 vport_num;
- esw = mlx5_devlink_eswitch_get(devlink);
+ esw = mlx5_devlink_eswitch_get(port->devlink);
if (IS_ERR(esw)) {
NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr");
return PTR_ERR(esw);
@@ -2049,110 +1856,6 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
return err;
}
-static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
-{
- u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
- struct mlx5_vport *evport;
- u32 max_guarantee = 0;
- unsigned long i;
-
- mlx5_esw_for_each_vport(esw, i, evport) {
- if (!evport->enabled || evport->qos.min_rate < max_guarantee)
- continue;
- max_guarantee = evport->qos.min_rate;
- }
-
- if (max_guarantee)
- return max_t(u32, max_guarantee / fw_max_bw_share, 1);
- return 0;
-}
-
-static int normalize_vports_min_rate(struct mlx5_eswitch *esw)
-{
- u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
- u32 divider = calculate_vports_min_rate_divider(esw);
- struct mlx5_vport *evport;
- u32 vport_max_rate;
- u32 vport_min_rate;
- unsigned long i;
- u32 bw_share;
- int err;
-
- mlx5_esw_for_each_vport(esw, i, evport) {
- if (!evport->enabled)
- continue;
- vport_min_rate = evport->qos.min_rate;
- vport_max_rate = evport->qos.max_rate;
- bw_share = 0;
-
- if (divider)
- bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate,
- divider,
- fw_max_bw_share);
-
- if (bw_share == evport->qos.bw_share)
- continue;
-
- err = esw_vport_qos_config(esw, evport, vport_max_rate,
- bw_share);
- if (!err)
- evport->qos.bw_share = bw_share;
- else
- return err;
- }
-
- return 0;
-}
-
-int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
- u32 max_rate, u32 min_rate)
-{
- struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
- u32 fw_max_bw_share;
- u32 previous_min_rate;
- bool min_rate_supported;
- bool max_rate_supported;
- int err = 0;
-
- if (!mlx5_esw_allowed(esw))
- return -EPERM;
- if (IS_ERR(evport))
- return PTR_ERR(evport);
-
- fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
- min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
- fw_max_bw_share >= MLX5_MIN_BW_SHARE;
- max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
-
- if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported))
- return -EOPNOTSUPP;
-
- mutex_lock(&esw->state_lock);
-
- if (min_rate == evport->qos.min_rate)
- goto set_max_rate;
-
- previous_min_rate = evport->qos.min_rate;
- evport->qos.min_rate = min_rate;
- err = normalize_vports_min_rate(esw);
- if (err) {
- evport->qos.min_rate = previous_min_rate;
- goto unlock;
- }
-
-set_max_rate:
- if (max_rate == evport->qos.max_rate)
- goto unlock;
-
- err = esw_vport_qos_config(esw, evport, max_rate, evport->qos.bw_share);
- if (!err)
- evport->qos.max_rate = max_rate;
-
-unlock:
- mutex_unlock(&esw->state_lock);
- return err;
-}
-
int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
u16 vport_num,
struct ifla_vf_stats *vf_stats)
@@ -2366,10 +2069,23 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw)
*/
void mlx5_esw_unlock(struct mlx5_eswitch *esw)
{
+ if (!mlx5_esw_allowed(esw))
+ return;
up_write(&esw->mode_lock);
}
/**
+ * mlx5_esw_lock() - Take write lock on esw mode lock
+ * @esw: eswitch device.
+ */
+void mlx5_esw_lock(struct mlx5_eswitch *esw)
+{
+ if (!mlx5_esw_allowed(esw))
+ return;
+ down_write(&esw->mode_lock);
+}
+
+/**
* mlx5_eswitch_get_total_vports - Get total vports of the eswitch
*
* @dev: Pointer to core device
@@ -2384,3 +2100,15 @@ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
return mlx5_esw_allowed(esw) ? esw->total_vports : 0;
}
EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
+
+/**
+ * mlx5_eswitch_get_core_dev - Get the mdev device
+ * @esw : eswitch device.
+ *
+ * Return the mellanox core device which manages the eswitch.
+ */
+struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw)
+{
+ return mlx5_esw_allowed(esw) ? esw->dev : NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_core_dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index d562edf5b0bc..2c7444101bb9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -46,7 +46,7 @@
#include "lib/fs_chains.h"
#include "sf/sf.h"
#include "en/tc_ct.h"
-#include "esw/sample.h"
+#include "en/tc/sample.h"
enum mlx5_mapped_obj_type {
MLX5_MAPPED_OBJ_CHAIN,
@@ -61,6 +61,7 @@ struct mlx5_mapped_obj {
u32 group_id;
u32 rate;
u32 trunc_size;
+ u32 tunnel_id;
} sample;
};
};
@@ -75,17 +76,20 @@ struct mlx5_mapped_obj {
#define MLX5_MAX_MC_PER_VPORT(dev) \
(1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))
-#define MLX5_MIN_BW_SHARE 1
-
-#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
- min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit)
-
#define mlx5_esw_has_fwd_fdb(dev) \
MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table)
#define esw_chains(esw) \
((esw)->fdb_table.offloads.esw_chains_priv)
+enum {
+ MAPPING_TYPE_CHAIN,
+ MAPPING_TYPE_TUNNEL,
+ MAPPING_TYPE_TUNNEL_ENC_OPTS,
+ MAPPING_TYPE_LABELS,
+ MAPPING_TYPE_ZONE,
+};
+
struct vport_ingress {
struct mlx5_flow_table *acl;
struct mlx5_flow_handle *allow_rule;
@@ -124,6 +128,8 @@ struct vport_egress {
struct {
struct mlx5_flow_group *fwd_grp;
struct mlx5_flow_handle *fwd_rule;
+ struct mlx5_flow_handle *bounce_rule;
+ struct mlx5_flow_group *bounce_grp;
} offloads;
};
};
@@ -150,8 +156,6 @@ enum mlx5_eswitch_vport_event {
MLX5_VPORT_PROMISC_CHANGE = BIT(3),
};
-struct mlx5_esw_bridge;
-
struct mlx5_vport {
struct mlx5_core_dev *dev;
struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE];
@@ -173,6 +177,7 @@ struct mlx5_vport {
u32 bw_share;
u32 min_rate;
u32 max_rate;
+ struct mlx5_esw_rate_group *group;
} qos;
u16 vport;
@@ -180,7 +185,6 @@ struct mlx5_vport {
enum mlx5_eswitch_vport_event enabled_events;
int index;
struct devlink_port *dl_port;
- struct mlx5_esw_bridge *bridge;
};
struct mlx5_esw_indir_table;
@@ -302,7 +306,9 @@ struct mlx5_eswitch {
struct {
bool enabled;
- u32 root_tsar_id;
+ u32 root_tsar_ix;
+ struct mlx5_esw_rate_group *group0;
+ struct list_head groups; /* Protected by esw->state_lock */
} qos;
struct mlx5_esw_bridge_offloads *br_offloads;
@@ -315,6 +321,7 @@ struct mlx5_eswitch {
u32 large_group_num;
} params;
struct blocking_notifier_head n_head;
+ struct lock_class_key mode_lock_key;
};
void esw_offloads_disable(struct mlx5_eswitch *esw);
@@ -327,8 +334,7 @@ int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable);
u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw);
void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata);
-int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
- u32 rate_mbps);
+int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps);
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
@@ -351,6 +357,10 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
u16 vport_num, bool setting);
int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
u32 max_rate, u32 min_rate);
+int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack);
int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting);
int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting);
int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
@@ -461,7 +471,6 @@ struct mlx5_esw_flow_attr {
} dests[MLX5_MAX_FLOW_FWD_VPORTS];
struct mlx5_rx_tun_attr *rx_tun_attr;
struct mlx5_pkt_reformat *decap_pkt_reformat;
- struct mlx5_sample_attr *sample;
};
int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
@@ -475,12 +484,10 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
struct netlink_ext_ack *extack);
int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
enum devlink_eswitch_encap_mode *encap);
-int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
- struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
u8 *hw_addr, int *hw_addr_len,
struct netlink_ext_ack *extack);
-int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink,
- struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
const u8 *hw_addr, int hw_addr_len,
struct netlink_ext_ack *extack);
@@ -699,11 +706,18 @@ void mlx5_esw_get(struct mlx5_core_dev *dev);
void mlx5_esw_put(struct mlx5_core_dev *dev);
int mlx5_esw_try_lock(struct mlx5_eswitch *esw);
void mlx5_esw_unlock(struct mlx5_eswitch *esw);
+void mlx5_esw_lock(struct mlx5_eswitch *esw);
void esw_vport_change_handle_locked(struct mlx5_vport *vport);
bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller);
+int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw);
+void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw);
+int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
+
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -719,6 +733,9 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
return ERR_PTR(-EOPNOTSUPP);
}
+static inline void mlx5_esw_unlock(struct mlx5_eswitch *esw) { return; }
+static inline void mlx5_esw_lock(struct mlx5_eswitch *esw) { return; }
+
static inline struct mlx5_flow_handle *
esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
{
@@ -731,6 +748,23 @@ mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev,
{
return vport_num;
}
+
+static inline int
+mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw)
+{
+ return 0;
+}
+
+static inline void
+mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw) {}
+
+static inline int
+mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+{
+ return 0;
+}
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 011e766e4f67..0d461e38add3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -48,6 +48,7 @@
#include "lib/fs_chains.h"
#include "en_tc.h"
#include "en/mapping.h"
+#include "devlink.h"
#define mlx5_esw_for_each_rep(esw, i, rep) \
xa_for_each(&((esw)->offloads.vport_reps), i, rep)
@@ -186,12 +187,12 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw,
static int
esw_setup_sampler_dest(struct mlx5_flow_destination *dest,
struct mlx5_flow_act *flow_act,
- struct mlx5_esw_flow_attr *esw_attr,
+ struct mlx5_flow_attr *attr,
int i)
{
flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
- dest[i].sampler_id = esw_attr->sample->sampler_id;
+ dest[i].sampler_id = attr->sample_attr->sampler_id;
return 0;
}
@@ -434,7 +435,7 @@ esw_setup_dests(struct mlx5_flow_destination *dest,
attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) {
- esw_setup_sampler_dest(dest, flow_act, esw_attr, *i);
+ esw_setup_sampler_dest(dest, flow_act, attr, *i);
(*i)++;
} else if (attr->dest_ft) {
esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i);
@@ -539,10 +540,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
flow_act.modify_hdr = attr->modify_hdr;
- /* esw_attr->sample is allocated only when there is a sample action */
- if (esw_attr->sample && esw_attr->sample->sample_default_tbl) {
- fdb = esw_attr->sample->sample_default_tbl;
- } else if (split) {
+ if (split) {
fwd_attr.chain = attr->chain;
fwd_attr.prio = attr->prio;
fwd_attr.vport = esw_attr->in_rep->vport;
@@ -926,6 +924,7 @@ out:
struct mlx5_flow_handle *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+ struct mlx5_eswitch *from_esw,
struct mlx5_eswitch_rep *rep,
u32 sqn)
{
@@ -944,10 +943,10 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
/* source vport is the esw manager */
- MLX5_SET(fte_match_set_misc, misc, source_port, rep->esw->manager_vport);
+ MLX5_SET(fte_match_set_misc, misc, source_port, from_esw->manager_vport);
if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(rep->esw->dev, vhca_id));
+ MLX5_CAP_GEN(from_esw->dev, vhca_id));
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
@@ -963,6 +962,9 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+
flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow_rule))
@@ -1613,7 +1615,18 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
goto ns_err;
}
- table_size = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+ /* To be strictly correct:
+ * MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ)
+ * should be:
+ * esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+ * peer_esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ
+ * but as the peer device might not be in switchdev mode it's not
+ * possible. We use the fact that by default FW sets max vfs and max sfs
+ * to the same value on both devices. If it needs to be changed in the future note
+ * the peer miss group should also be created based on the number of
+ * total vports of the peer (currently is also uses esw->total_vports).
+ */
+ table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) +
MLX5_ESW_MISS_FLOWS + esw->total_vports + esw->esw_funcs.num_vfs;
/* create the slow path fdb with encap set, so further table instances
@@ -1670,7 +1683,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
source_eswitch_owner_vhca_id_valid, 1);
}
- ix = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ;
+ /* See comment above table_size calculation */
+ ix = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ);
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
@@ -2310,14 +2324,293 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
}
+static int esw_set_uplink_slave_ingress_root(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+ struct mlx5_eswitch *esw;
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_vport *vport;
+ int err;
+
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type, FS_FT_ESW_INGRESS_ACL);
+ MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
+ MLX5_SET(set_flow_table_root_in, in, vport_number, MLX5_VPORT_UPLINK);
+
+ if (master) {
+ esw = master->priv.eswitch;
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+ MLX5_SET(set_flow_table_root_in, in, table_of_other_vport, 1);
+ MLX5_SET(set_flow_table_root_in, in, table_vport_number,
+ MLX5_VPORT_UPLINK);
+
+ ns = mlx5_get_flow_vport_acl_namespace(master,
+ MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ vport->index);
+ root = find_root(&ns->node);
+ mutex_lock(&root->chain_lock);
+
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(master, vhca_id));
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ } else {
+ esw = slave->priv.eswitch;
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+ ns = mlx5_get_flow_vport_acl_namespace(slave,
+ MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ vport->index);
+ root = find_root(&ns->node);
+ mutex_lock(&root->chain_lock);
+ MLX5_SET(set_flow_table_root_in, in, table_id, root->root_ft->id);
+ }
+
+ err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+ mutex_unlock(&root->chain_lock);
+
+ return err;
+}
+
+static int esw_set_slave_root_fdb(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+ int err;
+
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type,
+ FS_FT_FDB);
+
+ if (master) {
+ ns = mlx5_get_flow_namespace(master,
+ MLX5_FLOW_NAMESPACE_FDB);
+ root = find_root(&ns->node);
+ mutex_lock(&root->chain_lock);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(master, vhca_id));
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ } else {
+ ns = mlx5_get_flow_namespace(slave,
+ MLX5_FLOW_NAMESPACE_FDB);
+ root = find_root(&ns->node);
+ mutex_lock(&root->chain_lock);
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ }
+
+ err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+ mutex_unlock(&root->chain_lock);
+
+ return err;
+}
+
+static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave,
+ struct mlx5_vport *vport,
+ struct mlx5_flow_table *acl)
+{
+ struct mlx5_flow_handle *flow_rule = NULL;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ void *misc;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
+ MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(slave, vhca_id));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = slave->priv.eswitch->manager_vport;
+ dest.vport.vhca_id = MLX5_CAP_GEN(slave, vhca_id);
+ dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+
+ flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act,
+ &dest, 1);
+ if (IS_ERR(flow_rule))
+ err = PTR_ERR(flow_rule);
+ else
+ vport->egress.offloads.bounce_rule = flow_rule;
+
+ kvfree(spec);
+ return err;
+}
+
+static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_eswitch *esw = master->priv.eswitch;
+ struct mlx5_flow_table_attr ft_attr = {
+ .max_fte = 1, .prio = 0, .level = 0,
+ };
+ struct mlx5_flow_namespace *egress_ns;
+ struct mlx5_flow_table *acl;
+ struct mlx5_flow_group *g;
+ struct mlx5_vport *vport;
+ void *match_criteria;
+ u32 *flow_group_in;
+ int err;
+
+ vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ egress_ns = mlx5_get_flow_vport_acl_namespace(master,
+ MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+ vport->index);
+ if (!egress_ns)
+ return -EINVAL;
+
+ if (vport->egress.acl)
+ return -EINVAL;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ acl = mlx5_create_vport_flow_table(egress_ns, &ft_attr, vport->vport);
+ if (IS_ERR(acl)) {
+ err = PTR_ERR(acl);
+ goto out;
+ }
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_port);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+ g = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ goto err_group;
+ }
+
+ err = __esw_set_master_egress_rule(master, slave, vport, acl);
+ if (err)
+ goto err_rule;
+
+ vport->egress.acl = acl;
+ vport->egress.offloads.bounce_grp = g;
+
+ kvfree(flow_group_in);
+
+ return 0;
+
+err_rule:
+ mlx5_destroy_flow_group(g);
+err_group:
+ mlx5_destroy_flow_table(acl);
+out:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(dev->priv.eswitch,
+ dev->priv.eswitch->manager_vport);
+
+ esw_acl_egress_ofld_cleanup(vport);
+}
+
+int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw)
+{
+ int err;
+
+ err = esw_set_uplink_slave_ingress_root(master_esw->dev,
+ slave_esw->dev);
+ if (err)
+ return -EINVAL;
+
+ err = esw_set_slave_root_fdb(master_esw->dev,
+ slave_esw->dev);
+ if (err)
+ goto err_fdb;
+
+ err = esw_set_master_egress_rule(master_esw->dev,
+ slave_esw->dev);
+ if (err)
+ goto err_acl;
+
+ return err;
+
+err_acl:
+ esw_set_slave_root_fdb(NULL, slave_esw->dev);
+
+err_fdb:
+ esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev);
+
+ return err;
+}
+
+void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw)
+{
+ esw_unset_master_egress_rule(master_esw->dev);
+ esw_set_slave_root_fdb(NULL, slave_esw->dev);
+ esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev);
+}
+
#define ESW_OFFLOADS_DEVCOM_PAIR (0)
#define ESW_OFFLOADS_DEVCOM_UNPAIR (1)
-static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
- struct mlx5_eswitch *peer_esw)
+static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw)
{
+ const struct mlx5_eswitch_rep_ops *ops;
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+ u8 rep_type;
- return esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ rep_type = NUM_REP_TYPES;
+ while (rep_type--) {
+ ops = esw->offloads.rep_ops[rep_type];
+ if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+ ops->event)
+ ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, NULL);
+ }
+ }
}
static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
@@ -2325,9 +2618,42 @@ static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
mlx5e_tc_clean_fdb_peer_flows(esw);
#endif
+ mlx5_esw_offloads_rep_event_unpair(esw);
esw_del_fdb_peer_miss_rules(esw);
}
+static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch *peer_esw)
+{
+ const struct mlx5_eswitch_rep_ops *ops;
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+ u8 rep_type;
+ int err;
+
+ err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
+ if (err)
+ return err;
+
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
+ ops = esw->offloads.rep_ops[rep_type];
+ if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+ ops->event) {
+ err = ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_PAIR, peer_esw);
+ if (err)
+ goto err_out;
+ }
+ }
+ }
+
+ return 0;
+
+err_out:
+ mlx5_esw_offloads_unpair(esw);
+ return err;
+}
+
static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
struct mlx5_eswitch *peer_esw,
bool pair)
@@ -2618,6 +2944,31 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
esw_vport_destroy_offloads_acl_tables(esw, vport);
}
+int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+{
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+ int ret;
+
+ if (!esw || esw->mode != MLX5_ESWITCH_OFFLOADS)
+ return 0;
+
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+ if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
+ return 0;
+
+ ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK);
+ if (ret)
+ return ret;
+
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED)
+ mlx5_esw_offloads_rep_load(esw, rep->vport);
+ }
+
+ return 0;
+}
+
static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
{
struct mlx5_esw_indir_table *indir;
@@ -2787,6 +3138,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
struct mapping_ctx *reg_c0_obj_pool;
struct mlx5_vport *vport;
unsigned long i;
+ u64 mapping_id;
int err;
if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
@@ -2810,9 +3162,13 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
if (err)
goto err_vport_metadata;
- reg_c0_obj_pool = mapping_create(sizeof(struct mlx5_mapped_obj),
- ESW_REG_C0_USER_DATA_METADATA_MASK,
- true);
+ mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+ reg_c0_obj_pool = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
+ sizeof(struct mlx5_mapped_obj),
+ ESW_REG_C0_USER_DATA_METADATA_MASK,
+ true);
+
if (IS_ERR(reg_c0_obj_pool)) {
err = PTR_ERR(reg_c0_obj_pool);
goto err_pool;
@@ -2990,10 +3346,11 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
if (esw_mode_from_devlink(mode, &mlx5_mode))
return -EINVAL;
+ mlx5_lag_disable_change(esw->dev);
err = mlx5_esw_try_lock(esw);
if (err < 0) {
NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy");
- return err;
+ goto enable_lag;
}
cur_mlx5_mode = err;
err = 0;
@@ -3001,15 +3358,24 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
if (cur_mlx5_mode == mlx5_mode)
goto unlock;
- if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
+ if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) {
+ if (mlx5_devlink_trap_get_num_active(esw->dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't change mode while devlink traps are active");
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
err = esw_offloads_start(esw, extack);
- else if (mode == DEVLINK_ESWITCH_MODE_LEGACY)
+ } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) {
err = esw_offloads_stop(esw, extack);
- else
+ } else {
err = -EINVAL;
+ }
unlock:
mlx5_esw_unlock(esw);
+enable_lag:
+ mlx5_lag_enable_change(esw->dev);
return err;
}
@@ -3083,8 +3449,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
- if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE)
+ if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) {
+ err = 0;
goto out;
+ }
+
fallthrough;
case MLX5_CAP_INLINE_MODE_L2:
NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
index d713ae24d6b6..a1ac3a654962 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/events.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c
@@ -27,7 +27,7 @@ static int pcie_core(struct notifier_block *, unsigned long, void *);
static int forward_event(struct notifier_block *, unsigned long, void *);
static struct mlx5_nb events_nbs_ref[] = {
- /* Events to be proccessed by mlx5_core */
+ /* Events to be processed by mlx5_core */
{.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
{.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
{.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index bd66ab2af5b5..306279b7f9e7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -417,7 +417,6 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
struct mlx5_wq_param wqp;
struct mlx5_cqe64 *cqe;
int inlen, err, eqn;
- unsigned int irqn;
void *cqc, *in;
__be64 *pas;
u32 i;
@@ -446,7 +445,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
goto err_cqwq;
}
- err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn);
+ err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn);
if (err) {
kvfree(in);
goto err_cqwq;
@@ -454,7 +453,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index);
MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
@@ -476,7 +475,6 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
*conn->cq.mcq.arm_db = 0;
conn->cq.mcq.vector = 0;
conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete;
- conn->cq.mcq.irqn = irqn;
conn->cq.mcq.uar = fdev->conn_res.uar;
tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 0bba92cf5dc0..8ec148010d62 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -1516,7 +1516,7 @@ static int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
mutex_lock(&fpga_xfrm->lock);
if (!fpga_xfrm->sa_ctx)
- /* Unbounded xfrm, chane only sw attrs */
+ /* Unbounded xfrm, change only sw attrs */
goto change_sw_xfrm_attrs;
/* copy original hw sa */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 896a6c3dbdb7..7db8df64a60e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -152,17 +152,56 @@ static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns)
return 0;
}
+static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave,
+ bool ft_id_valid,
+ u32 ft_id)
+{
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type,
+ FS_FT_FDB);
+ if (ft_id_valid) {
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(master, vhca_id));
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ ft_id);
+ } else {
+ ns = mlx5_get_flow_namespace(slave,
+ MLX5_FLOW_NAMESPACE_FDB);
+ root = find_root(&ns->node);
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ }
+
+ return mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+}
+
static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft, u32 underlay_qpn,
bool disconnect)
{
u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
struct mlx5_core_dev *dev = ns->dev;
+ int err;
if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
underlay_qpn == 0)
return 0;
+ if (ft->type == FS_FT_FDB &&
+ mlx5_lag_is_shared_fdb(dev) &&
+ !mlx5_lag_is_master(dev))
+ return 0;
+
MLX5_SET(set_flow_table_root_in, in, opcode,
MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
@@ -177,7 +216,24 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
MLX5_SET(set_flow_table_root_in, in, other_vport,
!!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
- return mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+ err = mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+ if (!err &&
+ ft->type == FS_FT_FDB &&
+ mlx5_lag_is_shared_fdb(dev) &&
+ mlx5_lag_is_master(dev)) {
+ err = mlx5_cmd_set_slave_root_fdb(dev,
+ mlx5_lag_get_peer_mdev(dev),
+ !disconnect, (!disconnect) ?
+ ft->id : 0);
+ if (err && !disconnect) {
+ MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ ns->root_ft->id);
+ mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+ }
+ }
+
+ return err;
}
static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index c0697e1b7118..9fe8e3c204d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -413,7 +413,7 @@ static bool check_valid_spec(const struct mlx5_flow_spec *spec)
return true;
}
-static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
+struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
{
struct fs_node *root;
struct mlx5_flow_namespace *ns;
@@ -2343,7 +2343,7 @@ static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio,
#define FLOW_TABLE_BIT_SZ 1
#define GET_FLOW_TABLE_CAP(dev, offset) \
- ((be32_to_cpu(*((__be32 *)(dev->caps.hca_cur[MLX5_CAP_FLOW_TABLE]) + \
+ ((be32_to_cpu(*((__be32 *)(dev->caps.hca[MLX5_CAP_FLOW_TABLE]->cur) + \
offset / 32)) >> \
(32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
@@ -2493,7 +2493,7 @@ static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level)
acc_level_ns = set_prio_attrs_in_ns(ns, acc_level);
/* If this a prio with chains, and we can jump from one chain
- * (namepsace) to another, so we accumulate the levels
+ * (namespace) to another, so we accumulate the levels
*/
if (prio->node.type == FS_TYPE_PRIO_CHAINS)
acc_level = acc_level_ns;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 7317cdeab661..98240badc342 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -294,6 +294,8 @@ void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports);
void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev);
+struct mlx5_flow_root_namespace *find_root(struct fs_node *node);
+
#define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); }
#define fs_list_for_each_entry(pos, root) \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 9abeb80ffa31..037e18dd4be0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -170,7 +170,7 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
/* The reset only needs to be issued by one PF. The health buffer is
* shared between all functions, and will be cleared during a reset.
- * Check again to avoid a redundant 2nd reset. If the fatal erros was
+ * Check again to avoid a redundant 2nd reset. If the fatal errors was
* PCI related a reset won't help.
*/
fatal_error = mlx5_health_check_fatal_sensors(dev);
@@ -213,10 +213,6 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
mutex_lock(&dev->intf_state_mutex);
if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
goto unlock;/* a previous error is still being handled */
- if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) {
- dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
- goto unlock;
- }
enter_error_state(dev, force);
unlock:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index 0e487ec57d5c..0c8594c7df21 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -99,7 +99,9 @@ static void mlx5i_get_channels(struct net_device *dev,
}
static int mlx5i_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = mlx5i_epriv(netdev);
@@ -107,7 +109,9 @@ static int mlx5i_set_coalesce(struct net_device *netdev,
}
static int mlx5i_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = mlx5i_epriv(netdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 7d7ed025db0d..67571e5040d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -50,7 +50,7 @@ static const struct net_device_ops mlx5i_netdev_ops = {
.ndo_init = mlx5i_dev_init,
.ndo_uninit = mlx5i_dev_cleanup,
.ndo_change_mtu = mlx5i_change_mtu,
- .ndo_do_ioctl = mlx5i_ioctl,
+ .ndo_eth_ioctl = mlx5i_ioctl,
};
/* IPoIB mlx5 netdev profile */
@@ -314,8 +314,7 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
{
- struct ttc_params ttc_params = {};
- int tt, err;
+ int err;
priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
MLX5_FLOW_NAMESPACE_KERNEL);
@@ -330,33 +329,15 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
}
- mlx5e_set_ttc_basic_params(priv, &ttc_params);
- mlx5e_set_inner_ttc_ft_params(&ttc_params);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn;
-
- err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc);
- if (err) {
- netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n",
- err);
- goto err_destroy_arfs_tables;
- }
-
- mlx5e_set_ttc_ft_params(&ttc_params);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn;
-
- err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+ err = mlx5e_create_ttc_table(priv);
if (err) {
netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
err);
- goto err_destroy_inner_ttc_table;
+ goto err_destroy_arfs_tables;
}
return 0;
-err_destroy_inner_ttc_table:
- mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
err_destroy_arfs_tables:
mlx5e_arfs_destroy_tables(priv);
@@ -365,17 +346,20 @@ err_destroy_arfs_tables:
static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
{
- mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
- mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+ mlx5e_destroy_ttc_table(priv);
mlx5e_arfs_destroy_tables(priv);
}
static int mlx5i_init_rx(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
- u16 max_nch = priv->max_nch;
+ struct mlx5e_lro_param lro_param;
int err;
+ priv->rx_res = mlx5e_rx_res_alloc();
+ if (!priv->rx_res)
+ return -ENOMEM;
+
mlx5e_create_q_counters(priv);
err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
@@ -384,54 +368,38 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
goto err_destroy_q_counters;
}
- err = mlx5e_create_indirect_rqt(priv);
+ lro_param = mlx5e_get_lro_param(&priv->channels.params);
+ err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+ priv->max_nch, priv->drop_rq.rqn, &lro_param,
+ priv->channels.params.num_channels);
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch);
- if (err)
- goto err_destroy_indirect_rqts;
-
- err = mlx5e_create_indirect_tirs(priv, true);
- if (err)
- goto err_destroy_direct_rqts;
-
- err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch);
- if (err)
- goto err_destroy_indirect_tirs;
-
err = mlx5i_create_flow_steering(priv);
if (err)
- goto err_destroy_direct_tirs;
+ goto err_destroy_rx_res;
return 0;
-err_destroy_direct_tirs:
- mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
-err_destroy_indirect_tirs:
- mlx5e_destroy_indirect_tirs(priv);
-err_destroy_direct_rqts:
- mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
-err_destroy_indirect_rqts:
- mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+err_destroy_rx_res:
+ mlx5e_rx_res_destroy(priv->rx_res);
err_close_drop_rq:
mlx5e_close_drop_rq(&priv->drop_rq);
err_destroy_q_counters:
mlx5e_destroy_q_counters(priv);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
return err;
}
static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
{
- u16 max_nch = priv->max_nch;
-
mlx5i_destroy_flow_steering(priv);
- mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
- mlx5e_destroy_indirect_tirs(priv);
- mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
- mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ mlx5e_rx_res_destroy(priv->rx_res);
mlx5e_close_drop_rq(&priv->drop_rq);
mlx5e_destroy_q_counters(priv);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
}
/* The stats groups order is opposite to the update_stats() order calls */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
index 18ee21b06a00..5308f23702bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -149,7 +149,7 @@ static const struct net_device_ops mlx5i_pkey_netdev_ops = {
.ndo_get_stats64 = mlx5i_get_stats,
.ndo_uninit = mlx5i_pkey_dev_cleanup,
.ndo_change_mtu = mlx5i_pkey_change_mtu,
- .ndo_do_ioctl = mlx5i_pkey_ioctl,
+ .ndo_eth_ioctl = mlx5i_pkey_ioctl,
};
/* Child NDOs */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 5c043c5cc403..49ca57c6d31d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -32,7 +32,9 @@
#include <linux/netdevice.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
#include <linux/mlx5/vport.h>
+#include "lib/devcom.h"
#include "mlx5_core.h"
#include "eswitch.h"
#include "lag.h"
@@ -45,7 +47,7 @@
static DEFINE_SPINLOCK(lag_lock);
static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
- u8 remap_port2)
+ u8 remap_port2, bool shared_fdb)
{
u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
@@ -54,6 +56,7 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+ MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
return mlx5_cmd_exec_in(dev, create_lag, in);
}
@@ -224,35 +227,59 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
}
static int mlx5_create_lag(struct mlx5_lag *ldev,
- struct lag_tracker *tracker)
+ struct lag_tracker *tracker,
+ bool shared_fdb)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
int err;
mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
&ldev->v2p_map[MLX5_LAG_P2]);
- mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
- ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
+ mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
+ ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
+ shared_fdb);
err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
- ldev->v2p_map[MLX5_LAG_P2]);
- if (err)
+ ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
+ if (err) {
mlx5_core_err(dev0,
"Failed to create LAG (%d)\n",
err);
+ return err;
+ }
+
+ if (shared_fdb) {
+ err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
+ dev1->priv.eswitch);
+ if (err)
+ mlx5_core_err(dev0, "Can't enable single FDB mode\n");
+ else
+ mlx5_core_info(dev0, "Operation mode is single FDB\n");
+ }
+
+ if (err) {
+ MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+ if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
+ mlx5_core_err(dev0,
+ "Failed to deactivate RoCE LAG; driver restart required\n");
+ }
+
return err;
}
int mlx5_activate_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker,
- u8 flags)
+ u8 flags,
+ bool shared_fdb)
{
bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
int err;
- err = mlx5_create_lag(ldev, tracker);
+ err = mlx5_create_lag(ldev, tracker, shared_fdb);
if (err) {
if (roce_lag) {
mlx5_core_err(dev0,
@@ -266,6 +293,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
}
ldev->flags |= flags;
+ ldev->shared_fdb = shared_fdb;
return 0;
}
@@ -277,6 +305,13 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
int err;
ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
+ mlx5_lag_mp_reset(ldev);
+
+ if (ldev->shared_fdb) {
+ mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
+ ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
+ ldev->shared_fdb = false;
+ }
MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
@@ -333,6 +368,10 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
if (!ldev->pf[i].dev)
continue;
+ if (ldev->pf[i].dev->priv.flags &
+ MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+ continue;
+
ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(ldev->pf[i].dev);
}
@@ -342,12 +381,15 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ bool shared_fdb = ldev->shared_fdb;
bool roce_lag;
int err;
roce_lag = __mlx5_lag_is_roce(ldev);
- if (roce_lag) {
+ if (shared_fdb) {
+ mlx5_lag_remove_devices(ldev);
+ } else if (roce_lag) {
if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
@@ -359,8 +401,34 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
if (err)
return;
- if (roce_lag)
+ if (shared_fdb || roce_lag)
mlx5_lag_add_devices(ldev);
+
+ if (shared_fdb) {
+ if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+ mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+ mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+ }
+}
+
+static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+
+ if (is_mdev_switchdev_mode(dev0) &&
+ is_mdev_switchdev_mode(dev1) &&
+ mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
+ mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
+ mlx5_devcom_is_paired(dev0->priv.devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS) &&
+ MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
+ MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
+ MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
+ return true;
+
+ return false;
}
static void mlx5_do_bond(struct mlx5_lag *ldev)
@@ -371,14 +439,17 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
bool do_bond, roce_lag;
int err;
- if (!mlx5_lag_is_ready(ldev))
- return;
-
- tracker = ldev->tracker;
+ if (!mlx5_lag_is_ready(ldev)) {
+ do_bond = false;
+ } else {
+ tracker = ldev->tracker;
- do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
+ do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
+ }
if (do_bond && !__mlx5_lag_is_active(ldev)) {
+ bool shared_fdb = mlx5_shared_fdb_supported(ldev);
+
roce_lag = !mlx5_sriov_is_enabled(dev0) &&
!mlx5_sriov_is_enabled(dev1);
@@ -388,23 +459,40 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
#endif
- if (roce_lag)
+ if (shared_fdb || roce_lag)
mlx5_lag_remove_devices(ldev);
err = mlx5_activate_lag(ldev, &tracker,
roce_lag ? MLX5_LAG_FLAG_ROCE :
- MLX5_LAG_FLAG_SRIOV);
+ MLX5_LAG_FLAG_SRIOV,
+ shared_fdb);
if (err) {
- if (roce_lag)
+ if (shared_fdb || roce_lag)
mlx5_lag_add_devices(ldev);
return;
- }
-
- if (roce_lag) {
+ } else if (roce_lag) {
dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
mlx5_nic_vport_enable_roce(dev1);
+ } else if (shared_fdb) {
+ dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+
+ err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ if (!err)
+ err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+
+ if (err) {
+ dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+ mlx5_deactivate_lag(ldev);
+ mlx5_lag_add_devices(ldev);
+ mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+ mlx5_core_err(dev0, "Failed to enable lag\n");
+ return;
+ }
}
} else if (do_bond && __mlx5_lag_is_active(ldev)) {
mlx5_modify_lag(ldev, &tracker);
@@ -418,21 +506,48 @@ static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
}
+static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
+ struct mlx5_core_dev *dev1)
+{
+ if (dev0)
+ mlx5_esw_lock(dev0->priv.eswitch);
+ if (dev1)
+ mlx5_esw_lock(dev1->priv.eswitch);
+}
+
+static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
+ struct mlx5_core_dev *dev1)
+{
+ if (dev1)
+ mlx5_esw_unlock(dev1->priv.eswitch);
+ if (dev0)
+ mlx5_esw_unlock(dev0->priv.eswitch);
+}
+
static void mlx5_do_bond_work(struct work_struct *work)
{
struct delayed_work *delayed_work = to_delayed_work(work);
struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
bond_work);
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
int status;
status = mlx5_dev_list_trylock();
if (!status) {
- /* 1 sec delay. */
mlx5_queue_bond_work(ldev, HZ);
return;
}
+ if (ldev->mode_changes_in_progress) {
+ mlx5_dev_list_unlock();
+ mlx5_queue_bond_work(ldev, HZ);
+ return;
+ }
+
+ mlx5_lag_lock_eswitches(dev0, dev1);
mlx5_do_bond(ldev);
+ mlx5_lag_unlock_eswitches(dev0, dev1);
mlx5_dev_list_unlock();
}
@@ -630,7 +745,7 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
}
/* Must be called with intf_mutex held */
-static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
+static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev = NULL;
struct mlx5_core_dev *tmp_dev;
@@ -638,7 +753,7 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
!MLX5_CAP_GEN(dev, lag_master) ||
MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
- return;
+ return 0;
tmp_dev = mlx5_get_next_phys_dev(dev);
if (tmp_dev)
@@ -648,15 +763,17 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
ldev = mlx5_lag_dev_alloc(dev);
if (!ldev) {
mlx5_core_err(dev, "Failed to alloc lag dev\n");
- return;
+ return 0;
}
} else {
+ if (ldev->mode_changes_in_progress)
+ return -EAGAIN;
mlx5_ldev_get(ldev);
}
mlx5_ldev_add_mdev(ldev, dev);
- return;
+ return 0;
}
void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
@@ -667,7 +784,13 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
if (!ldev)
return;
+recheck:
mlx5_dev_list_lock();
+ if (ldev->mode_changes_in_progress) {
+ mlx5_dev_list_unlock();
+ msleep(100);
+ goto recheck;
+ }
mlx5_ldev_remove_mdev(ldev, dev);
mlx5_dev_list_unlock();
mlx5_ldev_put(ldev);
@@ -675,8 +798,16 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
{
+ int err;
+
+recheck:
mlx5_dev_list_lock();
- __mlx5_lag_dev_add_mdev(dev);
+ err = __mlx5_lag_dev_add_mdev(dev);
+ if (err) {
+ mlx5_dev_list_unlock();
+ msleep(100);
+ goto recheck;
+ }
mlx5_dev_list_unlock();
}
@@ -690,11 +821,11 @@ void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
if (!ldev)
return;
- if (__mlx5_lag_is_active(ldev))
- mlx5_disable_lag(ldev);
-
mlx5_ldev_remove_netdev(ldev, netdev);
ldev->flags &= ~MLX5_LAG_FLAG_READY;
+
+ if (__mlx5_lag_is_active(ldev))
+ mlx5_queue_bond_work(ldev, 0);
}
/* Must be called with intf_mutex held */
@@ -716,6 +847,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
if (i >= MLX5_MAX_PORTS)
ldev->flags |= MLX5_LAG_FLAG_READY;
+ mlx5_queue_bond_work(ldev, 0);
}
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
@@ -746,6 +878,21 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
}
EXPORT_SYMBOL(mlx5_lag_is_active);
+bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ bool res;
+
+ spin_lock(&lag_lock);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_active(ldev) &&
+ dev == ldev->pf[MLX5_LAG_P1].dev;
+ spin_unlock(&lag_lock);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_master);
+
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
@@ -760,19 +907,50 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
}
EXPORT_SYMBOL(mlx5_lag_is_sriov);
-void mlx5_lag_update(struct mlx5_core_dev *dev)
+bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ bool res;
+
+ spin_lock(&lag_lock);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
+ spin_unlock(&lag_lock);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
+
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
{
+ struct mlx5_core_dev *dev0;
+ struct mlx5_core_dev *dev1;
struct mlx5_lag *ldev;
mlx5_dev_list_lock();
+
ldev = mlx5_lag_dev(dev);
- if (!ldev)
- goto unlock;
+ dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ dev1 = ldev->pf[MLX5_LAG_P2].dev;
- mlx5_do_bond(ldev);
+ ldev->mode_changes_in_progress++;
+ if (__mlx5_lag_is_active(ldev)) {
+ mlx5_lag_lock_eswitches(dev0, dev1);
+ mlx5_disable_lag(ldev);
+ mlx5_lag_unlock_eswitches(dev0, dev1);
+ }
+ mlx5_dev_list_unlock();
+}
-unlock:
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+
+ mlx5_dev_list_lock();
+ ldev = mlx5_lag_dev(dev);
+ ldev->mode_changes_in_progress--;
mlx5_dev_list_unlock();
+ mlx5_queue_bond_work(ldev, 0);
}
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
@@ -827,6 +1005,26 @@ unlock:
}
EXPORT_SYMBOL(mlx5_lag_get_slave_port);
+struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_dev *peer_dev = NULL;
+ struct mlx5_lag *ldev;
+
+ spin_lock(&lag_lock);
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ goto unlock;
+
+ peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
+ ldev->pf[MLX5_LAG_P2].dev :
+ ldev->pf[MLX5_LAG_P1].dev;
+
+unlock:
+ spin_unlock(&lag_lock);
+ return peer_dev;
+}
+EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
+
int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
u64 *values,
int num_counters,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
index 191392c37558..d4bae528954e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
@@ -39,6 +39,8 @@ struct lag_tracker {
*/
struct mlx5_lag {
u8 flags;
+ int mode_changes_in_progress;
+ bool shared_fdb;
u8 v2p_map[MLX5_MAX_PORTS];
struct kref ref;
struct lag_func pf[MLX5_MAX_PORTS];
@@ -71,7 +73,8 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker);
int mlx5_activate_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker,
- u8 flags);
+ u8 flags,
+ bool shared_fdb);
int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
struct net_device *ndev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index c4bf8b679541..f239b352a58a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -161,7 +161,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
struct lag_tracker tracker;
tracker = ldev->tracker;
- mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
+ mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false);
}
mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
@@ -302,6 +302,14 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
return NOTIFY_DONE;
}
+void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
+{
+ /* Clear mfi, as it might become stale when a route delete event
+ * has been missed, see mlx5_lag_fib_route_event().
+ */
+ ldev->lag_mp.mfi = NULL;
+}
+
int mlx5_lag_mp_init(struct mlx5_lag *ldev)
{
struct lag_mp *mp = &ldev->lag_mp;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
index 258ac7b2964e..729c839397a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
@@ -21,11 +21,13 @@ struct lag_mp {
#ifdef CONFIG_MLX5_ESWITCH
+void mlx5_lag_mp_reset(struct mlx5_lag *ldev);
int mlx5_lag_mp_init(struct mlx5_lag *ldev);
void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev);
#else /* CONFIG_MLX5_ESWITCH */
+static inline void mlx5_lag_mp_reset(struct mlx5_lag *ldev) {};
static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; }
static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index ce696d523493..ffac8a0e7a23 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -749,7 +749,7 @@ static int mlx5_pps_event(struct notifier_block *nb,
} else {
ptp_event.type = PTP_CLOCK_EXTTS;
}
- /* TODOL clock->ptp can be NULL if ptp_clock_register failes */
+ /* TODOL clock->ptp can be NULL if ptp_clock_register fails */
ptp_clock_event(clock->ptp, &ptp_event);
break;
case PTP_PF_PEROUT:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index 624cedebb510..d3d628b862f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -104,4 +104,6 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev);
#endif
+int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn);
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
new file mode 100644
index 000000000000..749d17c0057d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
@@ -0,0 +1,602 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES.
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "lib/fs_ttc.h"
+
+#define MLX5_TTC_NUM_GROUPS 3
+#define MLX5_TTC_GROUP1_SIZE (BIT(3) + MLX5_NUM_TUNNEL_TT)
+#define MLX5_TTC_GROUP2_SIZE BIT(1)
+#define MLX5_TTC_GROUP3_SIZE BIT(0)
+#define MLX5_TTC_TABLE_SIZE (MLX5_TTC_GROUP1_SIZE +\
+ MLX5_TTC_GROUP2_SIZE +\
+ MLX5_TTC_GROUP3_SIZE)
+
+#define MLX5_INNER_TTC_NUM_GROUPS 3
+#define MLX5_INNER_TTC_GROUP1_SIZE BIT(3)
+#define MLX5_INNER_TTC_GROUP2_SIZE BIT(1)
+#define MLX5_INNER_TTC_GROUP3_SIZE BIT(0)
+#define MLX5_INNER_TTC_TABLE_SIZE (MLX5_INNER_TTC_GROUP1_SIZE +\
+ MLX5_INNER_TTC_GROUP2_SIZE +\
+ MLX5_INNER_TTC_GROUP3_SIZE)
+
+/* L3/L4 traffic type classifier */
+struct mlx5_ttc_table {
+ int num_groups;
+ struct mlx5_flow_table *t;
+ struct mlx5_flow_group **g;
+ struct mlx5_ttc_rule rules[MLX5_NUM_TT];
+ struct mlx5_flow_handle *tunnel_rules[MLX5_NUM_TUNNEL_TT];
+};
+
+struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc)
+{
+ return ttc->t;
+}
+
+static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc)
+{
+ int i;
+
+ for (i = 0; i < MLX5_NUM_TT; i++) {
+ if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) {
+ mlx5_del_flow_rules(ttc->rules[i].rule);
+ ttc->rules[i].rule = NULL;
+ }
+ }
+
+ for (i = 0; i < MLX5_NUM_TUNNEL_TT; i++) {
+ if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
+ mlx5_del_flow_rules(ttc->tunnel_rules[i]);
+ ttc->tunnel_rules[i] = NULL;
+ }
+ }
+}
+
+struct mlx5_etype_proto {
+ u16 etype;
+ u8 proto;
+};
+
+static struct mlx5_etype_proto ttc_rules[] = {
+ [MLX5_TT_IPV4_TCP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_TCP,
+ },
+ [MLX5_TT_IPV6_TCP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_TCP,
+ },
+ [MLX5_TT_IPV4_UDP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_UDP,
+ },
+ [MLX5_TT_IPV6_UDP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_UDP,
+ },
+ [MLX5_TT_IPV4_IPSEC_AH] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_AH,
+ },
+ [MLX5_TT_IPV6_IPSEC_AH] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_AH,
+ },
+ [MLX5_TT_IPV4_IPSEC_ESP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_ESP,
+ },
+ [MLX5_TT_IPV6_IPSEC_ESP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_ESP,
+ },
+ [MLX5_TT_IPV4] = {
+ .etype = ETH_P_IP,
+ .proto = 0,
+ },
+ [MLX5_TT_IPV6] = {
+ .etype = ETH_P_IPV6,
+ .proto = 0,
+ },
+ [MLX5_TT_ANY] = {
+ .etype = 0,
+ .proto = 0,
+ },
+};
+
+static struct mlx5_etype_proto ttc_tunnel_rules[] = {
+ [MLX5_TT_IPV4_GRE] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_GRE,
+ },
+ [MLX5_TT_IPV6_GRE] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_GRE,
+ },
+ [MLX5_TT_IPV4_IPIP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_IPIP,
+ },
+ [MLX5_TT_IPV6_IPIP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_IPIP,
+ },
+ [MLX5_TT_IPV4_IPV6] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_IPV6,
+ },
+ [MLX5_TT_IPV6_IPV6] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_IPV6,
+ },
+
+};
+
+u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt)
+{
+ return ttc_tunnel_rules[tt].proto;
+}
+
+static bool mlx5_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev,
+ u8 proto_type)
+{
+ switch (proto_type) {
+ case IPPROTO_GRE:
+ return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
+ case IPPROTO_IPIP:
+ case IPPROTO_IPV6:
+ return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
+ MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx));
+ default:
+ return false;
+ }
+}
+
+static bool mlx5_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev)
+{
+ int tt;
+
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ if (mlx5_tunnel_proto_supported_rx(mdev,
+ ttc_tunnel_rules[tt].proto))
+ return true;
+ }
+ return false;
+}
+
+bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
+{
+ return (mlx5_tunnel_any_rx_proto_supported(mdev) &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version));
+}
+
+static u8 mlx5_etype_to_ipv(u16 ethertype)
+{
+ if (ethertype == ETH_P_IP)
+ return 4;
+
+ if (ethertype == ETH_P_IPV6)
+ return 6;
+
+ return 0;
+}
+
+static struct mlx5_flow_handle *
+mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
+ struct mlx5_flow_destination *dest, u16 etype, u8 proto)
+{
+ int match_ipv_outer =
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev,
+ ft_field_support.outer_ip_version);
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ u8 ipv;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ if (proto) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
+ }
+
+ ipv = mlx5_etype_to_ipv(etype);
+ if (match_ipv_outer && ipv) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
+ } else if (etype) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
+ }
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_err(dev, "%s: add rule failed\n", __func__);
+ }
+
+ kvfree(spec);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev,
+ struct ttc_params *params,
+ struct mlx5_ttc_table *ttc)
+{
+ struct mlx5_flow_handle **trules;
+ struct mlx5_ttc_rule *rules;
+ struct mlx5_flow_table *ft;
+ int tt;
+ int err;
+
+ ft = ttc->t;
+ rules = ttc->rules;
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ struct mlx5_ttc_rule *rule = &rules[tt];
+
+ rule->rule = mlx5_generate_ttc_rule(dev, ft, &params->dests[tt],
+ ttc_rules[tt].etype,
+ ttc_rules[tt].proto);
+ if (IS_ERR(rule->rule)) {
+ err = PTR_ERR(rule->rule);
+ rule->rule = NULL;
+ goto del_rules;
+ }
+ rule->default_dest = params->dests[tt];
+ }
+
+ if (!params->inner_ttc || !mlx5_tunnel_inner_ft_supported(dev))
+ return 0;
+
+ trules = ttc->tunnel_rules;
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ if (!mlx5_tunnel_proto_supported_rx(dev,
+ ttc_tunnel_rules[tt].proto))
+ continue;
+ trules[tt] = mlx5_generate_ttc_rule(dev, ft,
+ &params->tunnel_dests[tt],
+ ttc_tunnel_rules[tt].etype,
+ ttc_tunnel_rules[tt].proto);
+ if (IS_ERR(trules[tt])) {
+ err = PTR_ERR(trules[tt]);
+ trules[tt] = NULL;
+ goto del_rules;
+ }
+ }
+
+ return 0;
+
+del_rules:
+ mlx5_cleanup_ttc_rules(ttc);
+ return err;
+}
+
+static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc,
+ bool use_ipv)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ttc->g = kcalloc(MLX5_TTC_NUM_GROUPS, sizeof(*ttc->g), GFP_KERNEL);
+ if (!ttc->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ttc->g);
+ ttc->g = NULL;
+ return -ENOMEM;
+ }
+
+ /* L4 Group */
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+ if (use_ipv)
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
+ else
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_TTC_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ /* L3 Group */
+ MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_TTC_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ /* Any Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_TTC_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ttc->g[ttc->num_groups]);
+ ttc->g[ttc->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+static struct mlx5_flow_handle *
+mlx5_generate_inner_ttc_rule(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_destination *dest,
+ u16 etype, u8 proto)
+{
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ u8 ipv;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ ipv = mlx5_etype_to_ipv(etype);
+ if (etype && ipv) {
+ spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
+ }
+
+ if (proto) {
+ spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
+ }
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_err(dev, "%s: add inner TTC rule failed\n", __func__);
+ }
+
+ kvfree(spec);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev,
+ struct ttc_params *params,
+ struct mlx5_ttc_table *ttc)
+{
+ struct mlx5_ttc_rule *rules;
+ struct mlx5_flow_table *ft;
+ int err;
+ int tt;
+
+ ft = ttc->t;
+ rules = ttc->rules;
+
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ struct mlx5_ttc_rule *rule = &rules[tt];
+
+ rule->rule = mlx5_generate_inner_ttc_rule(dev, ft,
+ &params->dests[tt],
+ ttc_rules[tt].etype,
+ ttc_rules[tt].proto);
+ if (IS_ERR(rule->rule)) {
+ err = PTR_ERR(rule->rule);
+ rule->rule = NULL;
+ goto del_rules;
+ }
+ rule->default_dest = params->dests[tt];
+ }
+
+ return 0;
+
+del_rules:
+
+ mlx5_cleanup_ttc_rules(ttc);
+ return err;
+}
+
+static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ttc->g = kcalloc(MLX5_INNER_TTC_NUM_GROUPS, sizeof(*ttc->g),
+ GFP_KERNEL);
+ if (!ttc->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ttc->g);
+ ttc->g = NULL;
+ return -ENOMEM;
+ }
+
+ /* L4 Group */
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
+ MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_INNER_TTC_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ /* L3 Group */
+ MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_INNER_TTC_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ /* Any Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_INNER_TTC_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ttc->g[ttc->num_groups]);
+ ttc->g[ttc->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+ struct ttc_params *params)
+{
+ struct mlx5_ttc_table *ttc;
+ int err;
+
+ ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+ if (!ttc)
+ return ERR_PTR(-ENOMEM);
+
+ WARN_ON_ONCE(params->ft_attr.max_fte);
+ params->ft_attr.max_fte = MLX5_INNER_TTC_TABLE_SIZE;
+ ttc->t = mlx5_create_flow_table(params->ns, &params->ft_attr);
+ if (IS_ERR(ttc->t)) {
+ err = PTR_ERR(ttc->t);
+ kvfree(ttc);
+ return ERR_PTR(err);
+ }
+
+ err = mlx5_create_inner_ttc_table_groups(ttc);
+ if (err)
+ goto destroy_ft;
+
+ err = mlx5_generate_inner_ttc_table_rules(dev, params, ttc);
+ if (err)
+ goto destroy_ft;
+
+ return ttc;
+
+destroy_ft:
+ mlx5_destroy_ttc_table(ttc);
+ return ERR_PTR(err);
+}
+
+void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc)
+{
+ int i;
+
+ mlx5_cleanup_ttc_rules(ttc);
+ for (i = ttc->num_groups - 1; i >= 0; i--) {
+ if (!IS_ERR_OR_NULL(ttc->g[i]))
+ mlx5_destroy_flow_group(ttc->g[i]);
+ ttc->g[i] = NULL;
+ }
+
+ kfree(ttc->g);
+ mlx5_destroy_flow_table(ttc->t);
+ kvfree(ttc);
+}
+
+struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+ struct ttc_params *params)
+{
+ bool match_ipv_outer =
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev,
+ ft_field_support.outer_ip_version);
+ struct mlx5_ttc_table *ttc;
+ int err;
+
+ ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+ if (!ttc)
+ return ERR_PTR(-ENOMEM);
+
+ WARN_ON_ONCE(params->ft_attr.max_fte);
+ params->ft_attr.max_fte = MLX5_TTC_TABLE_SIZE;
+ ttc->t = mlx5_create_flow_table(params->ns, &params->ft_attr);
+ if (IS_ERR(ttc->t)) {
+ err = PTR_ERR(ttc->t);
+ kvfree(ttc);
+ return ERR_PTR(err);
+ }
+
+ err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer);
+ if (err)
+ goto destroy_ft;
+
+ err = mlx5_generate_ttc_table_rules(dev, params, ttc);
+ if (err)
+ goto destroy_ft;
+
+ return ttc;
+
+destroy_ft:
+ mlx5_destroy_ttc_table(ttc);
+ return ERR_PTR(err);
+}
+
+int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type,
+ struct mlx5_flow_destination *new_dest)
+{
+ return mlx5_modify_rule_destination(ttc->rules[type].rule, new_dest,
+ NULL);
+}
+
+struct mlx5_flow_destination
+mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc,
+ enum mlx5_traffic_types type)
+{
+ struct mlx5_flow_destination *dest = &ttc->rules[type].default_dest;
+
+ WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR,
+ "TTC[%d] default dest is not setup yet", type);
+
+ return *dest;
+}
+
+int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc,
+ enum mlx5_traffic_types type)
+{
+ struct mlx5_flow_destination dest = mlx5_ttc_get_default_dest(ttc, type);
+
+ return mlx5_ttc_fwd_dest(ttc, type, &dest);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h
new file mode 100644
index 000000000000..ce95be8f8382
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __ML5_FS_TTC_H__
+#define __ML5_FS_TTC_H__
+
+#include <linux/mlx5/fs.h>
+
+enum mlx5_traffic_types {
+ MLX5_TT_IPV4_TCP,
+ MLX5_TT_IPV6_TCP,
+ MLX5_TT_IPV4_UDP,
+ MLX5_TT_IPV6_UDP,
+ MLX5_TT_IPV4_IPSEC_AH,
+ MLX5_TT_IPV6_IPSEC_AH,
+ MLX5_TT_IPV4_IPSEC_ESP,
+ MLX5_TT_IPV6_IPSEC_ESP,
+ MLX5_TT_IPV4,
+ MLX5_TT_IPV6,
+ MLX5_TT_ANY,
+ MLX5_NUM_TT,
+ MLX5_NUM_INDIR_TIRS = MLX5_TT_ANY,
+};
+
+enum mlx5_tunnel_types {
+ MLX5_TT_IPV4_GRE,
+ MLX5_TT_IPV6_GRE,
+ MLX5_TT_IPV4_IPIP,
+ MLX5_TT_IPV6_IPIP,
+ MLX5_TT_IPV4_IPV6,
+ MLX5_TT_IPV6_IPV6,
+ MLX5_NUM_TUNNEL_TT,
+};
+
+struct mlx5_ttc_rule {
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_destination default_dest;
+};
+
+struct mlx5_ttc_table;
+
+struct ttc_params {
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table_attr ft_attr;
+ struct mlx5_flow_destination dests[MLX5_NUM_TT];
+ bool inner_ttc;
+ struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT];
+};
+
+struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc);
+
+struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+ struct ttc_params *params);
+void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc);
+
+struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+ struct ttc_params *params);
+
+int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type,
+ struct mlx5_flow_destination *new_dest);
+struct mlx5_flow_destination
+mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc,
+ enum mlx5_traffic_types type);
+int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc,
+ enum mlx5_traffic_types type);
+
+bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev);
+u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt);
+
+#endif /* __MLX5_FS_TTC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
index 38084400ee8f..e3b0a131c3e1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
@@ -40,7 +40,7 @@
struct mlx5_vxlan {
struct mlx5_core_dev *mdev;
- /* max_num_ports is usuallly 4, 16 buckets is more than enough */
+ /* max_num_ports is usually 4, 16 buckets is more than enough */
DECLARE_HASHTABLE(htable, 4);
struct mutex sync_lock; /* sync add/del port HW operations */
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index eb1b316560a8..79482824c64f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -252,28 +252,16 @@ static int set_dma_caps(struct pci_dev *pdev)
{
int err;
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (err) {
dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
return err;
}
}
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
- if (err) {
- dev_warn(&pdev->dev,
- "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- if (err) {
- dev_err(&pdev->dev,
- "Can't set consistent PCI DMA mask, aborting\n");
- return err;
- }
- }
-
dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
return err;
}
@@ -389,11 +377,11 @@ static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
switch (cap_mode) {
case HCA_CAP_OPMOD_GET_MAX:
- memcpy(dev->caps.hca_max[cap_type], hca_caps,
+ memcpy(dev->caps.hca[cap_type]->max, hca_caps,
MLX5_UN_SZ_BYTES(hca_cap_union));
break;
case HCA_CAP_OPMOD_GET_CUR:
- memcpy(dev->caps.hca_cur[cap_type], hca_caps,
+ memcpy(dev->caps.hca[cap_type]->cur, hca_caps,
MLX5_UN_SZ_BYTES(hca_cap_union));
break;
default:
@@ -469,7 +457,7 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
return err;
set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
- memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP],
+ memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ODP]->cur,
MLX5_ST_SZ_BYTES(odp_cap));
#define ODP_CAP_SET_MAX(dev, field) \
@@ -514,7 +502,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
capability);
- memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_GENERAL],
+ memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL]->cur,
MLX5_ST_SZ_BYTES(cmd_hca_cap));
mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
@@ -596,7 +584,7 @@ static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx)
return 0;
set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
- memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ROCE],
+ memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ROCE]->cur,
MLX5_ST_SZ_BYTES(roce_cap));
MLX5_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1);
@@ -748,14 +736,12 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
const struct pci_device_id *id)
{
- struct mlx5_priv *priv = &dev->priv;
int err = 0;
mutex_init(&dev->pci_status_mutex);
pci_set_drvdata(dev->pdev, dev);
dev->bar_addr = pci_resource_start(pdev, 0);
- priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev));
err = mlx5_pci_enable_device(dev);
if (err) {
@@ -1179,6 +1165,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
goto err_ec;
}
+ mlx5_lag_add_mdev(dev);
err = mlx5_sriov_attach(dev);
if (err) {
mlx5_core_err(dev, "sriov init failed %d\n", err);
@@ -1186,11 +1173,11 @@ static int mlx5_load(struct mlx5_core_dev *dev)
}
mlx5_sf_dev_table_create(dev);
- mlx5_lag_add_mdev(dev);
return 0;
err_sriov:
+ mlx5_lag_remove_mdev(dev);
mlx5_ec_cleanup(dev);
err_ec:
mlx5_sf_hw_table_destroy(dev);
@@ -1222,9 +1209,9 @@ err_irq_table:
static void mlx5_unload(struct mlx5_core_dev *dev)
{
- mlx5_lag_remove_mdev(dev);
mlx5_sf_dev_table_destroy(dev);
mlx5_sriov_detach(dev);
+ mlx5_lag_remove_mdev(dev);
mlx5_ec_cleanup(dev);
mlx5_sf_hw_table_destroy(dev);
mlx5_vhca_event_stop(dev);
@@ -1248,11 +1235,6 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
int err = 0;
mutex_lock(&dev->intf_state_mutex);
- if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
- mlx5_core_warn(dev, "interface is up, NOP\n");
- goto out;
- }
- /* remove any previous indication of internal error */
dev->state = MLX5_DEVICE_STATE_UP;
err = mlx5_function_setup(dev, true);
@@ -1271,7 +1253,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
- err = mlx5_devlink_register(priv_to_devlink(dev), dev->device);
+ err = mlx5_devlink_register(priv_to_devlink(dev));
if (err)
goto err_devlink_reg;
@@ -1293,7 +1275,6 @@ function_teardown:
mlx5_function_teardown(dev, true);
err_function:
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
-out:
mutex_unlock(&dev->intf_state_mutex);
return err;
}
@@ -1380,6 +1361,60 @@ out:
mutex_unlock(&dev->intf_state_mutex);
}
+static const int types[] = {
+ MLX5_CAP_GENERAL,
+ MLX5_CAP_GENERAL_2,
+ MLX5_CAP_ETHERNET_OFFLOADS,
+ MLX5_CAP_IPOIB_ENHANCED_OFFLOADS,
+ MLX5_CAP_ODP,
+ MLX5_CAP_ATOMIC,
+ MLX5_CAP_ROCE,
+ MLX5_CAP_IPOIB_OFFLOADS,
+ MLX5_CAP_FLOW_TABLE,
+ MLX5_CAP_ESWITCH_FLOW_TABLE,
+ MLX5_CAP_ESWITCH,
+ MLX5_CAP_VECTOR_CALC,
+ MLX5_CAP_QOS,
+ MLX5_CAP_DEBUG,
+ MLX5_CAP_DEV_MEM,
+ MLX5_CAP_DEV_EVENT,
+ MLX5_CAP_TLS,
+ MLX5_CAP_VDPA_EMULATION,
+ MLX5_CAP_IPSEC,
+};
+
+static void mlx5_hca_caps_free(struct mlx5_core_dev *dev)
+{
+ int type;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(types); i++) {
+ type = types[i];
+ kfree(dev->caps.hca[type]);
+ }
+}
+
+static int mlx5_hca_caps_alloc(struct mlx5_core_dev *dev)
+{
+ struct mlx5_hca_cap *cap;
+ int type;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(types); i++) {
+ cap = kzalloc(sizeof(*cap), GFP_KERNEL);
+ if (!cap)
+ goto err;
+ type = types[i];
+ dev->caps.hca[type] = cap;
+ }
+
+ return 0;
+
+err:
+ mlx5_hca_caps_free(dev);
+ return -ENOMEM;
+}
+
int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
{
struct mlx5_priv *priv = &dev->priv;
@@ -1399,6 +1434,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
mutex_init(&priv->pgdir_mutex);
INIT_LIST_HEAD(&priv->pgdir_list);
+ priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev));
priv->dbg_root = debugfs_create_dir(dev_name(dev->device),
mlx5_debugfs_root);
INIT_LIST_HEAD(&priv->traps);
@@ -1415,8 +1451,14 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
if (err)
goto err_adev_init;
+ err = mlx5_hca_caps_alloc(dev);
+ if (err)
+ goto err_hca_caps;
+
return 0;
+err_hca_caps:
+ mlx5_adev_cleanup(dev);
err_adev_init:
mlx5_pagealloc_cleanup(dev);
err_pagealloc_init:
@@ -1435,6 +1477,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
+ mlx5_hca_caps_free(dev);
mlx5_adev_cleanup(dev);
mlx5_pagealloc_cleanup(dev);
mlx5_health_cleanup(dev);
@@ -1452,7 +1495,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
struct devlink *devlink;
int err;
- devlink = mlx5_devlink_alloc();
+ devlink = mlx5_devlink_alloc(&pdev->dev);
if (!devlink) {
dev_err(&pdev->dev, "devlink alloc failed\n");
return -ENOMEM;
@@ -1784,16 +1827,14 @@ static int __init init(void)
if (err)
goto err_sf;
-#ifdef CONFIG_MLX5_CORE_EN
err = mlx5e_init();
- if (err) {
- pci_unregister_driver(&mlx5_core_driver);
- goto err_debug;
- }
-#endif
+ if (err)
+ goto err_en;
return 0;
+err_en:
+ mlx5_sf_driver_unregister();
err_sf:
pci_unregister_driver(&mlx5_core_driver);
err_debug:
@@ -1803,9 +1844,7 @@ err_debug:
static void __exit cleanup(void)
{
-#ifdef CONFIG_MLX5_CORE_EN
mlx5e_cleanup();
-#endif
mlx5_sf_driver_unregister();
pci_unregister_driver(&mlx5_core_driver);
mlx5_unregister_debugfs();
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 343807ac2036..230eab7e3bc9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -168,6 +168,8 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_add_mdev(struct mlx5_core_dev *dev);
void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev);
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev);
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev);
int mlx5_events_init(struct mlx5_core_dev *dev);
void mlx5_events_cleanup(struct mlx5_core_dev *dev);
@@ -206,8 +208,13 @@ int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw,
int mlx5_fw_version_query(struct mlx5_core_dev *dev,
u32 *running_ver, u32 *stored_ver);
+#ifdef CONFIG_MLX5_CORE_EN
int mlx5e_init(void);
void mlx5e_cleanup(void);
+#else
+static inline int mlx5e_init(void){ return 0; }
+static inline void mlx5e_cleanup(void){}
+#endif
static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
{
@@ -270,4 +277,9 @@ static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev)
return MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
}
+
+bool mlx5_eth_supported(struct mlx5_core_dev *dev);
+bool mlx5_rdma_supported(struct mlx5_core_dev *dev);
+bool mlx5_vnet_supported(struct mlx5_core_dev *dev);
+
#endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index b25f764daa08..c79a10b3454d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -18,7 +18,7 @@
#define MLX5_SFS_PER_CTRL_IRQ 64
#define MLX5_IRQ_CTRL_SF_MAX 8
-/* min num of vectores for SFs to be enabled */
+/* min num of vectors for SFs to be enabled */
#define MLX5_IRQ_VEC_COMP_BASE_SF 2
#define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
@@ -28,13 +28,13 @@
#define MLX5_EQ_REFS_PER_IRQ (2)
struct mlx5_irq {
- u32 index;
struct atomic_notifier_head nh;
cpumask_var_t mask;
char name[MLX5_MAX_IRQ_NAME];
- struct kref kref;
- int irqn;
struct mlx5_irq_pool *pool;
+ int refcount;
+ u32 index;
+ int irqn;
};
struct mlx5_irq_pool {
@@ -138,9 +138,8 @@ out:
return ret;
}
-static void irq_release(struct kref *kref)
+static void irq_release(struct mlx5_irq *irq)
{
- struct mlx5_irq *irq = container_of(kref, struct mlx5_irq, kref);
struct mlx5_irq_pool *pool = irq->pool;
xa_erase(&pool->irqs, irq->index);
@@ -159,10 +158,31 @@ static void irq_put(struct mlx5_irq *irq)
struct mlx5_irq_pool *pool = irq->pool;
mutex_lock(&pool->lock);
- kref_put(&irq->kref, irq_release);
+ irq->refcount--;
+ if (!irq->refcount)
+ irq_release(irq);
mutex_unlock(&pool->lock);
}
+static int irq_get_locked(struct mlx5_irq *irq)
+{
+ lockdep_assert_held(&irq->pool->lock);
+ if (WARN_ON_ONCE(!irq->refcount))
+ return 0;
+ irq->refcount++;
+ return 1;
+}
+
+static int irq_get(struct mlx5_irq *irq)
+{
+ int err;
+
+ mutex_lock(&irq->pool->lock);
+ err = irq_get_locked(irq);
+ mutex_unlock(&irq->pool->lock);
+ return err;
+}
+
static irqreturn_t irq_int_handler(int irq, void *nh)
{
atomic_notifier_call_chain(nh, 0, NULL);
@@ -214,7 +234,8 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
err = -ENOMEM;
goto err_cpumask;
}
- kref_init(&irq->kref);
+ irq->pool = pool;
+ irq->refcount = 1;
irq->index = i;
err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL));
if (err) {
@@ -222,7 +243,6 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
irq->index, err);
goto err_xa;
}
- irq->pool = pool;
return irq;
err_xa:
free_cpumask_var(irq->mask);
@@ -235,24 +255,27 @@ err_req_irq:
int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
{
- int err;
+ int ret;
- err = kref_get_unless_zero(&irq->kref);
- if (WARN_ON_ONCE(!err))
+ ret = irq_get(irq);
+ if (!ret)
/* Something very bad happens here, we are enabling EQ
* on non-existing IRQ.
*/
return -ENOENT;
- err = atomic_notifier_chain_register(&irq->nh, nb);
- if (err)
+ ret = atomic_notifier_chain_register(&irq->nh, nb);
+ if (ret)
irq_put(irq);
- return err;
+ return ret;
}
int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
{
+ int err = 0;
+
+ err = atomic_notifier_chain_unregister(&irq->nh, nb);
irq_put(irq);
- return atomic_notifier_chain_unregister(&irq->nh, nb);
+ return err;
}
struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
@@ -301,10 +324,9 @@ static struct mlx5_irq *irq_pool_find_least_loaded(struct mlx5_irq_pool *pool,
xa_for_each_range(&pool->irqs, index, iter, start, end) {
if (!cpumask_equal(iter->mask, affinity))
continue;
- if (kref_read(&iter->kref) < pool->min_threshold)
+ if (iter->refcount < pool->min_threshold)
return iter;
- if (!irq || kref_read(&iter->kref) <
- kref_read(&irq->kref))
+ if (!irq || iter->refcount < irq->refcount)
irq = iter;
}
return irq;
@@ -319,7 +341,7 @@ static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool,
mutex_lock(&pool->lock);
least_loaded_irq = irq_pool_find_least_loaded(pool, affinity);
if (least_loaded_irq &&
- kref_read(&least_loaded_irq->kref) < pool->min_threshold)
+ least_loaded_irq->refcount < pool->min_threshold)
goto out;
new_irq = irq_pool_create_irq(pool, affinity);
if (IS_ERR(new_irq)) {
@@ -337,11 +359,11 @@ static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool,
least_loaded_irq = new_irq;
goto unlock;
out:
- kref_get(&least_loaded_irq->kref);
- if (kref_read(&least_loaded_irq->kref) > pool->max_threshold)
+ irq_get_locked(least_loaded_irq);
+ if (least_loaded_irq->refcount > pool->max_threshold)
mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n",
least_loaded_irq->irqn, pool->name,
- kref_read(&least_loaded_irq->kref) / MLX5_EQ_REFS_PER_IRQ);
+ least_loaded_irq->refcount / MLX5_EQ_REFS_PER_IRQ);
unlock:
mutex_unlock(&pool->lock);
return least_loaded_irq;
@@ -357,7 +379,7 @@ irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
mutex_lock(&pool->lock);
irq = xa_load(&pool->irqs, vecidx);
if (irq) {
- kref_get(&irq->kref);
+ irq_get_locked(irq);
goto unlock;
}
irq = irq_request(pool, vecidx);
@@ -424,7 +446,7 @@ out:
return irq;
mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
irq->irqn, cpumask_pr_args(affinity),
- kref_read(&irq->kref) / MLX5_EQ_REFS_PER_IRQ);
+ irq->refcount / MLX5_EQ_REFS_PER_IRQ);
return irq;
}
@@ -437,6 +459,7 @@ irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
if (!pool)
return ERR_PTR(-ENOMEM);
pool->dev = dev;
+ mutex_init(&pool->lock);
xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
pool->xa_num_irqs.min = start;
pool->xa_num_irqs.max = start + size - 1;
@@ -445,7 +468,6 @@ irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
name);
pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
- mutex_init(&pool->lock);
mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
name, size, start);
return pool;
@@ -456,9 +478,14 @@ static void irq_pool_free(struct mlx5_irq_pool *pool)
struct mlx5_irq *irq;
unsigned long index;
+ /* There are cases in which we are destrying the irq_table before
+ * freeing all the IRQs, fast teardown for example. Hence, free the irqs
+ * which might not have been freed.
+ */
xa_for_each(&pool->irqs, index, irq)
- irq_release(&irq->kref);
+ irq_release(irq);
xa_destroy(&pool->irqs);
+ mutex_destroy(&pool->lock);
kvfree(pool);
}
@@ -479,7 +506,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
if (!mlx5_sf_max_functions(dev))
return 0;
if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
- mlx5_core_err(dev, "Not enough IRQs for SFs. SF may run at lower performance\n");
+ mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
return 0;
}
@@ -597,7 +624,7 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
return;
/* There are cases where IRQs still will be in used when we reaching
- * to here. Hence, making sure all the irqs are realeased.
+ * to here. Hence, making sure all the irqs are released.
*/
irq_pools_destroy(table);
pci_free_irq_vectors(dev->pdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
index fa0288afc0dd..871c2fbe18d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
@@ -39,7 +39,7 @@ static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, cha
struct auxiliary_device *adev = container_of(dev, struct auxiliary_device, dev);
struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
- return scnprintf(buf, PAGE_SIZE, "%u\n", sf_dev->sfnum);
+ return sysfs_emit(buf, "%u\n", sf_dev->sfnum);
}
static DEVICE_ATTR_RO(sfnum);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
index 42c8ee03fe3e..052f48068dc1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -14,7 +14,7 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
struct devlink *devlink;
int err;
- devlink = mlx5_devlink_alloc();
+ devlink = mlx5_devlink_alloc(&adev->dev);
if (!devlink)
return -ENOMEM;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
index 1be048769309..13891fdc607e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -164,12 +164,12 @@ static bool mlx5_sf_is_active(const struct mlx5_sf *sf)
return sf->hw_state == MLX5_VHCA_STATE_ACTIVE || sf->hw_state == MLX5_VHCA_STATE_IN_USE;
}
-int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port,
enum devlink_port_fn_state *state,
enum devlink_port_fn_opstate *opstate,
struct netlink_ext_ack *extack)
{
- struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink);
struct mlx5_sf_table *table;
struct mlx5_sf *sf;
int err = 0;
@@ -248,11 +248,11 @@ out:
return err;
}
-int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port,
enum devlink_port_fn_state state,
struct netlink_ext_ack *extack)
{
- struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink);
struct mlx5_sf_table *table;
struct mlx5_sf *sf;
int err;
@@ -476,7 +476,7 @@ static void mlx5_sf_table_disable(struct mlx5_sf_table *table)
return;
/* Balances with refcount_set; drop the reference so that new user cmd cannot start
- * and new vhca event handler cannnot run.
+ * and new vhca event handler cannot run.
*/
mlx5_sf_table_put(table);
wait_for_completion(&table->disable_complete);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h
index 81ce13b19ee8..3a480e06ecc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h
@@ -24,11 +24,11 @@ int mlx5_devlink_sf_port_new(struct devlink *devlink,
unsigned int *new_port_index);
int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index,
struct netlink_ext_ack *extack);
-int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port,
enum devlink_port_fn_state *state,
enum devlink_port_fn_opstate *opstate,
struct netlink_ext_ack *extack);
-int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port,
enum devlink_port_fn_state state,
struct netlink_ext_ack *extack);
#else
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 6475ba35cf6b..a5b9f65db23c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -18,12 +18,39 @@ enum dr_action_valid_state {
DR_ACTION_STATE_ENCAP,
DR_ACTION_STATE_DECAP,
DR_ACTION_STATE_MODIFY_HDR,
- DR_ACTION_STATE_MODIFY_VLAN,
+ DR_ACTION_STATE_POP_VLAN,
+ DR_ACTION_STATE_PUSH_VLAN,
DR_ACTION_STATE_NON_TERM,
DR_ACTION_STATE_TERM,
DR_ACTION_STATE_MAX,
};
+static const char * const action_type_to_str[] = {
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = "DR_ACTION_TYP_TNL_L2_TO_L2",
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = "DR_ACTION_TYP_L2_TO_TNL_L2",
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = "DR_ACTION_TYP_TNL_L3_TO_L2",
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = "DR_ACTION_TYP_L2_TO_TNL_L3",
+ [DR_ACTION_TYP_DROP] = "DR_ACTION_TYP_DROP",
+ [DR_ACTION_TYP_QP] = "DR_ACTION_TYP_QP",
+ [DR_ACTION_TYP_FT] = "DR_ACTION_TYP_FT",
+ [DR_ACTION_TYP_CTR] = "DR_ACTION_TYP_CTR",
+ [DR_ACTION_TYP_TAG] = "DR_ACTION_TYP_TAG",
+ [DR_ACTION_TYP_MODIFY_HDR] = "DR_ACTION_TYP_MODIFY_HDR",
+ [DR_ACTION_TYP_VPORT] = "DR_ACTION_TYP_VPORT",
+ [DR_ACTION_TYP_POP_VLAN] = "DR_ACTION_TYP_POP_VLAN",
+ [DR_ACTION_TYP_PUSH_VLAN] = "DR_ACTION_TYP_PUSH_VLAN",
+ [DR_ACTION_TYP_INSERT_HDR] = "DR_ACTION_TYP_INSERT_HDR",
+ [DR_ACTION_TYP_REMOVE_HDR] = "DR_ACTION_TYP_REMOVE_HDR",
+ [DR_ACTION_TYP_MAX] = "DR_ACTION_UNKNOWN",
+};
+
+static const char *dr_action_id_to_str(enum mlx5dr_action_type action_id)
+{
+ if (action_id > DR_ACTION_TYP_MAX)
+ action_id = DR_ACTION_TYP_MAX;
+ return action_type_to_str[action_id];
+}
+
static const enum dr_action_valid_state
next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] = {
[DR_ACTION_DOMAIN_NIC_INGRESS] = {
@@ -39,8 +66,10 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
},
[DR_ACTION_STATE_DECAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -53,7 +82,8 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
},
[DR_ACTION_STATE_ENCAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -73,20 +103,31 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
},
- [DR_ACTION_STATE_MODIFY_VLAN] = {
+ [DR_ACTION_STATE_POP_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
- [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_MODIFY_VLAN,
- [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN,
- [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
},
+ [DR_ACTION_STATE_PUSH_VLAN] = {
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ },
[DR_ACTION_STATE_NON_TERM] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
@@ -99,8 +140,10 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
},
[DR_ACTION_STATE_TERM] = {
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
@@ -115,8 +158,16 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ },
+ [DR_ACTION_STATE_DECAP] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
},
[DR_ACTION_STATE_ENCAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -132,14 +183,25 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
- [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ },
+ [DR_ACTION_STATE_POP_VLAN] = {
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
},
- [DR_ACTION_STATE_MODIFY_VLAN] = {
+ [DR_ACTION_STATE_PUSH_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
- [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN,
- [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
@@ -152,8 +214,10 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
},
[DR_ACTION_STATE_TERM] = {
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
@@ -170,8 +234,10 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
},
[DR_ACTION_STATE_DECAP] = {
@@ -180,11 +246,12 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
},
[DR_ACTION_STATE_ENCAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -203,13 +270,26 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
},
- [DR_ACTION_STATE_MODIFY_VLAN] = {
+ [DR_ACTION_STATE_POP_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
- [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
- [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ },
+ [DR_ACTION_STATE_PUSH_VLAN] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
@@ -226,8 +306,10 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
},
[DR_ACTION_STATE_TERM] = {
@@ -244,8 +326,17 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
- [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ },
+ [DR_ACTION_STATE_DECAP] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
},
[DR_ACTION_STATE_ENCAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -262,15 +353,27 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
- [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ },
+ [DR_ACTION_STATE_POP_VLAN] = {
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
},
- [DR_ACTION_STATE_MODIFY_VLAN] = {
+ [DR_ACTION_STATE_PUSH_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
- [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
- [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
@@ -285,7 +388,9 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
- [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
},
[DR_ACTION_STATE_TERM] = {
@@ -314,6 +419,9 @@ dr_action_reformat_to_action_type(enum mlx5dr_action_reformat_type reformat_type
case DR_ACTION_REFORMAT_TYP_INSERT_HDR:
*action_type = DR_ACTION_TYP_INSERT_HDR;
break;
+ case DR_ACTION_REFORMAT_TYP_REMOVE_HDR:
+ *action_type = DR_ACTION_TYP_REMOVE_HDR;
+ break;
default:
return -EINVAL;
}
@@ -326,7 +434,7 @@ dr_action_reformat_to_action_type(enum mlx5dr_action_reformat_type reformat_type
* the new size of the STEs array, rule with actions.
*/
static void dr_actions_apply(struct mlx5dr_domain *dmn,
- enum mlx5dr_ste_entry_type ste_type,
+ enum mlx5dr_domain_nic_type nic_type,
u8 *action_type_set,
u8 *last_ste,
struct mlx5dr_ste_actions_attr *attr,
@@ -335,7 +443,7 @@ static void dr_actions_apply(struct mlx5dr_domain *dmn,
struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
u32 added_stes = 0;
- if (ste_type == MLX5DR_STE_TYPE_RX)
+ if (nic_type == DR_DOMAIN_NIC_TYPE_RX)
mlx5dr_ste_set_actions_rx(ste_ctx, dmn, action_type_set,
last_ste, attr, &added_stes);
else
@@ -347,7 +455,7 @@ static void dr_actions_apply(struct mlx5dr_domain *dmn,
static enum dr_action_domain
dr_action_get_action_domain(enum mlx5dr_domain_type domain,
- enum mlx5dr_ste_entry_type ste_type)
+ enum mlx5dr_domain_nic_type nic_type)
{
switch (domain) {
case MLX5DR_DOMAIN_TYPE_NIC_RX:
@@ -355,7 +463,7 @@ dr_action_get_action_domain(enum mlx5dr_domain_type domain,
case MLX5DR_DOMAIN_TYPE_NIC_TX:
return DR_ACTION_DOMAIN_NIC_EGRESS;
case MLX5DR_DOMAIN_TYPE_FDB:
- if (ste_type == MLX5DR_STE_TYPE_RX)
+ if (nic_type == DR_DOMAIN_NIC_TYPE_RX)
return DR_ACTION_DOMAIN_FDB_INGRESS;
return DR_ACTION_DOMAIN_FDB_EGRESS;
default:
@@ -421,6 +529,18 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn,
return 0;
}
+static void dr_action_print_sequence(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action *actions[],
+ int last_idx)
+{
+ int i;
+
+ for (i = 0; i <= last_idx; i++)
+ mlx5dr_err(dmn, "< %s (%d) > ",
+ dr_action_id_to_str(actions[i]->action_type),
+ actions[i]->action_type);
+}
+
#define WITH_VLAN_NUM_HW_ACTIONS 6
int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
@@ -431,7 +551,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
u32 *new_hw_ste_arr_sz)
{
struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
- bool rx_rule = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX;
+ bool rx_rule = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX;
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
u8 action_type_set[DR_ACTION_TYP_MAX] = {};
struct mlx5dr_ste_actions_attr attr = {};
@@ -445,7 +565,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
attr.gvmi = dmn->info.caps.gvmi;
attr.hit_gvmi = dmn->info.caps.gvmi;
attr.final_icm_addr = nic_dmn->default_icm_addr;
- action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->ste_type);
+ action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->type);
for (i = 0; i < num_actions; i++) {
struct mlx5dr_action_dest_tbl *dest_tbl;
@@ -467,11 +587,11 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
if (dest_tbl->tbl->dmn != dmn) {
mlx5dr_err(dmn,
"Destination table belongs to a different domain\n");
- goto out_invalid_arg;
+ return -EINVAL;
}
if (dest_tbl->tbl->level <= matcher->tbl->level) {
- mlx5_core_warn_once(dmn->mdev,
- "Connecting table to a lower/same level destination table\n");
+ mlx5_core_dbg_once(dmn->mdev,
+ "Connecting table to a lower/same level destination table\n");
mlx5dr_dbg(dmn,
"Connecting table at level %d to a destination table at level %d\n",
matcher->tbl->level,
@@ -509,7 +629,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
break;
case DR_ACTION_TYP_QP:
mlx5dr_info(dmn, "Domain doesn't support QP\n");
- goto out_invalid_arg;
+ return -EOPNOTSUPP;
case DR_ACTION_TYP_CTR:
attr.ctr_id = action->ctr->ctr_id +
action->ctr->offeset;
@@ -536,7 +656,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
if (rx_rule &&
!(dmn->ste_ctx->actions_caps & DR_STE_CTX_ACTION_CAP_RX_ENCAP)) {
mlx5dr_info(dmn, "Device doesn't support Encap on RX\n");
- goto out_invalid_arg;
+ return -EOPNOTSUPP;
}
attr.reformat.size = action->reformat->size;
attr.reformat.id = action->reformat->id;
@@ -549,48 +669,66 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
attr.hit_gvmi = action->vport->caps->vhca_gvmi;
dest_action = action;
if (rx_rule) {
- /* Loopback on WIRE vport is not supported */
- if (action->vport->caps->num == WIRE_PORT)
- goto out_invalid_arg;
-
+ if (action->vport->caps->num == WIRE_PORT) {
+ mlx5dr_dbg(dmn, "Device doesn't support Loopback on WIRE vport\n");
+ return -EOPNOTSUPP;
+ }
attr.final_icm_addr = action->vport->caps->icm_address_rx;
} else {
attr.final_icm_addr = action->vport->caps->icm_address_tx;
}
break;
case DR_ACTION_TYP_POP_VLAN:
+ if (!rx_rule && !(dmn->ste_ctx->actions_caps &
+ DR_STE_CTX_ACTION_CAP_TX_POP)) {
+ mlx5dr_dbg(dmn, "Device doesn't support POP VLAN action on TX\n");
+ return -EOPNOTSUPP;
+ }
+
max_actions_type = MLX5DR_MAX_VLANS;
attr.vlans.count++;
break;
case DR_ACTION_TYP_PUSH_VLAN:
+ if (rx_rule && !(dmn->ste_ctx->actions_caps &
+ DR_STE_CTX_ACTION_CAP_RX_PUSH)) {
+ mlx5dr_dbg(dmn, "Device doesn't support PUSH VLAN action on RX\n");
+ return -EOPNOTSUPP;
+ }
+
max_actions_type = MLX5DR_MAX_VLANS;
- if (attr.vlans.count == MLX5DR_MAX_VLANS)
+ if (attr.vlans.count == MLX5DR_MAX_VLANS) {
+ mlx5dr_dbg(dmn, "Max VLAN push/pop count exceeded\n");
return -EINVAL;
+ }
attr.vlans.headers[attr.vlans.count++] = action->push_vlan->vlan_hdr;
break;
case DR_ACTION_TYP_INSERT_HDR:
+ case DR_ACTION_TYP_REMOVE_HDR:
attr.reformat.size = action->reformat->size;
attr.reformat.id = action->reformat->id;
attr.reformat.param_0 = action->reformat->param_0;
attr.reformat.param_1 = action->reformat->param_1;
break;
default:
- goto out_invalid_arg;
+ mlx5dr_err(dmn, "Unsupported action type %d\n", action_type);
+ return -EINVAL;
}
/* Check action duplication */
if (++action_type_set[action_type] > max_actions_type) {
mlx5dr_err(dmn, "Action type %d supports only max %d time(s)\n",
action_type, max_actions_type);
- goto out_invalid_arg;
+ return -EINVAL;
}
/* Check action state machine is valid */
if (dr_action_validate_and_get_next_state(action_domain,
action_type,
&state)) {
- mlx5dr_err(dmn, "Invalid action sequence provided\n");
+ mlx5dr_err(dmn, "Invalid action (gvmi: %d, is_rx: %d) sequence provided:",
+ attr.gvmi, rx_rule);
+ dr_action_print_sequence(dmn, actions, i);
return -EOPNOTSUPP;
}
}
@@ -614,16 +752,13 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
}
dr_actions_apply(dmn,
- nic_dmn->ste_type,
+ nic_dmn->type,
action_type_set,
last_ste,
&attr,
new_hw_ste_arr_sz);
return 0;
-
-out_invalid_arg:
- return -EINVAL;
}
static unsigned int action_size[DR_ACTION_TYP_MAX] = {
@@ -638,6 +773,7 @@ static unsigned int action_size[DR_ACTION_TYP_MAX] = {
[DR_ACTION_TYP_VPORT] = sizeof(struct mlx5dr_action_vport),
[DR_ACTION_TYP_PUSH_VLAN] = sizeof(struct mlx5dr_action_push_vlan),
[DR_ACTION_TYP_INSERT_HDR] = sizeof(struct mlx5dr_action_reformat),
+ [DR_ACTION_TYP_REMOVE_HDR] = sizeof(struct mlx5dr_action_reformat),
[DR_ACTION_TYP_SAMPLER] = sizeof(struct mlx5dr_action_sampler),
};
@@ -709,7 +845,8 @@ dec_ref:
struct mlx5dr_action *
mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
struct mlx5dr_action_dest *dests,
- u32 num_of_dests)
+ u32 num_of_dests,
+ bool ignore_flow_level)
{
struct mlx5dr_cmd_flow_destination_hw_info *hw_dests;
struct mlx5dr_action **ref_actions;
@@ -776,7 +913,8 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
num_of_dests,
reformat_req,
&action->dest_tbl->fw_tbl.id,
- &action->dest_tbl->fw_tbl.group_id);
+ &action->dest_tbl->fw_tbl.group_id,
+ ignore_flow_level);
if (ret)
goto free_action;
@@ -884,11 +1022,23 @@ dr_action_verify_reformat_params(enum mlx5dr_action_type reformat_type,
size_t data_sz,
void *data)
{
- if ((!data && data_sz) || (data && !data_sz) ||
- ((reformat_param_0 || reformat_param_1) &&
- reformat_type != DR_ACTION_TYP_INSERT_HDR) ||
- reformat_type > DR_ACTION_TYP_INSERT_HDR) {
- mlx5dr_dbg(dmn, "Invalid reformat parameter!\n");
+ if (reformat_type == DR_ACTION_TYP_INSERT_HDR) {
+ if ((!data && data_sz) || (data && !data_sz) ||
+ MLX5_CAP_GEN_2(dmn->mdev, max_reformat_insert_size) < data_sz ||
+ MLX5_CAP_GEN_2(dmn->mdev, max_reformat_insert_offset) < reformat_param_1) {
+ mlx5dr_dbg(dmn, "Invalid reformat parameters for INSERT_HDR\n");
+ goto out_err;
+ }
+ } else if (reformat_type == DR_ACTION_TYP_REMOVE_HDR) {
+ if (data ||
+ MLX5_CAP_GEN_2(dmn->mdev, max_reformat_remove_size) < data_sz ||
+ MLX5_CAP_GEN_2(dmn->mdev, max_reformat_remove_offset) < reformat_param_1) {
+ mlx5dr_dbg(dmn, "Invalid reformat parameters for REMOVE_HDR\n");
+ goto out_err;
+ }
+ } else if (reformat_param_0 || reformat_param_1 ||
+ reformat_type > DR_ACTION_TYP_REMOVE_HDR) {
+ mlx5dr_dbg(dmn, "Invalid reformat parameters\n");
goto out_err;
}
@@ -987,7 +1137,6 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
return 0;
}
case DR_ACTION_TYP_INSERT_HDR:
- {
ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev,
MLX5_REFORMAT_TYPE_INSERT_HDR,
reformat_param_0,
@@ -1002,7 +1151,12 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
action->reformat->param_0 = reformat_param_0;
action->reformat->param_1 = reformat_param_1;
return 0;
- }
+ case DR_ACTION_TYP_REMOVE_HDR:
+ action->reformat->id = 0;
+ action->reformat->size = data_sz;
+ action->reformat->param_0 = reformat_param_0;
+ action->reformat->param_1 = reformat_param_1;
+ return 0;
default:
mlx5dr_info(dmn, "Reformat type is not supported %d\n", action->action_type);
return -EINVAL;
@@ -1658,6 +1812,7 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action)
}
break;
case DR_ACTION_TYP_TNL_L2_TO_L2:
+ case DR_ACTION_TYP_REMOVE_HDR:
refcount_dec(&action->reformat->dmn->refcount);
break;
case DR_ACTION_TYP_TNL_L3_TO_L2:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 54e1f5438bbe..56307283bf9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -655,6 +655,7 @@ int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(set_fte_in, in, table_type, ft->type);
MLX5_SET(set_fte_in, in, table_id, ft->id);
MLX5_SET(set_fte_in, in, flow_index, fte->index);
+ MLX5_SET(set_fte_in, in, ignore_flow_level, fte->ignore_flow_level);
if (ft->vport) {
MLX5_SET(set_fte_in, in, vport_number, ft->vport);
MLX5_SET(set_fte_in, in, other_vport, 1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
index 7091b1be84ef..0fe159809ba1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
@@ -245,7 +245,7 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev,
return -ENOTSUPP;
dmn->info.supp_sw_steering = true;
- dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX;
+ dmn->info.rx.type = DR_DOMAIN_NIC_TYPE_RX;
dmn->info.rx.default_icm_addr = dmn->info.caps.nic_rx_drop_address;
dmn->info.rx.drop_icm_addr = dmn->info.caps.nic_rx_drop_address;
break;
@@ -254,7 +254,7 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev,
return -ENOTSUPP;
dmn->info.supp_sw_steering = true;
- dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX;
+ dmn->info.tx.type = DR_DOMAIN_NIC_TYPE_TX;
dmn->info.tx.default_icm_addr = dmn->info.caps.nic_tx_allow_address;
dmn->info.tx.drop_icm_addr = dmn->info.caps.nic_tx_drop_address;
break;
@@ -265,8 +265,8 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev,
if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, fdb))
return -ENOTSUPP;
- dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX;
- dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX;
+ dmn->info.rx.type = DR_DOMAIN_NIC_TYPE_RX;
+ dmn->info.tx.type = DR_DOMAIN_NIC_TYPE_TX;
vport_cap = mlx5dr_get_vport_cap(&dmn->info.caps, 0);
if (!vport_cap) {
mlx5dr_err(dmn, "Failed to get esw manager vport\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
index 7ccfd40586ce..0d6f86eb248b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
@@ -103,7 +103,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
int num_dest,
bool reformat_req,
u32 *tbl_id,
- u32 *group_id)
+ u32 *group_id,
+ bool ignore_flow_level)
{
struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
struct mlx5dr_cmd_fte_info fte_info = {};
@@ -137,6 +138,7 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
fte_info.dests_size = num_dest;
fte_info.val = val;
fte_info.dest_arr = dest;
+ fte_info.ignore_flow_level = ignore_flow_level;
ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info);
if (ret) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
index 6f6191d1d5a6..b5409cc021d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -396,13 +396,14 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
struct mlx5dr_match_param mask = {};
+ bool allow_empty_match = false;
struct mlx5dr_ste_build *sb;
bool inner, rx;
int idx = 0;
int ret, i;
sb = nic_matcher->ste_builder_arr[outer_ipv][inner_ipv];
- rx = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX;
+ rx = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX;
/* Create a temporary mask to track and clear used mask fields */
if (matcher->match_criteria & DR_MATCHER_CRITERIA_OUTER)
@@ -428,6 +429,16 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
if (ret)
return ret;
+ /* Optimize RX pipe by reducing source port match, since
+ * the FDB RX part is connected only to the wire.
+ */
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB &&
+ rx && mask.misc.source_port) {
+ mask.misc.source_port = 0;
+ mask.misc.source_eswitch_owner_vhca_id = 0;
+ allow_empty_match = true;
+ }
+
/* Outer */
if (matcher->match_criteria & (DR_MATCHER_CRITERIA_OUTER |
DR_MATCHER_CRITERIA_MISC |
@@ -619,7 +630,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
}
/* Empty matcher, takes all */
- if (matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY)
+ if ((!idx && allow_empty_match) ||
+ matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY)
mlx5dr_ste_build_empty_always_hit(&sb[idx++], rx);
if (idx == 0) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
index 43356fad53de..aca80efc28fa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -81,6 +81,7 @@ dr_rule_create_collision_entry(struct mlx5dr_matcher *matcher,
}
ste->ste_chain_location = orig_ste->ste_chain_location;
+ ste->htbl->pointing_ste = orig_ste->htbl->pointing_ste;
/* In collision entry, all members share the same miss_list_head */
ste->htbl->miss_list = mlx5dr_ste_get_miss_list(orig_ste);
@@ -185,6 +186,9 @@ dr_rule_rehash_handle_collision(struct mlx5dr_matcher *matcher,
if (!new_ste)
return NULL;
+ /* Update collision pointing STE */
+ new_ste->htbl->pointing_ste = col_ste->htbl->pointing_ste;
+
/* In collision entry, all members share the same miss_list_head */
new_ste->htbl->miss_list = mlx5dr_ste_get_miss_list(col_ste);
@@ -212,7 +216,7 @@ static void dr_rule_rehash_copy_ste_ctrl(struct mlx5dr_matcher *matcher,
new_ste->next_htbl = cur_ste->next_htbl;
new_ste->ste_chain_location = cur_ste->ste_chain_location;
- if (!mlx5dr_ste_is_last_in_rule(nic_matcher, new_ste->ste_chain_location))
+ if (new_ste->next_htbl)
new_ste->next_htbl->pointing_ste = new_ste;
/* We need to copy the refcount since this ste
@@ -220,10 +224,8 @@ static void dr_rule_rehash_copy_ste_ctrl(struct mlx5dr_matcher *matcher,
*/
new_ste->refcount = cur_ste->refcount;
- /* Link old STEs rule_mem list to the new ste */
- mlx5dr_rule_update_rule_member(cur_ste, new_ste);
- INIT_LIST_HEAD(&new_ste->rule_list);
- list_splice_tail_init(&cur_ste->rule_list, &new_ste->rule_list);
+ /* Link old STEs rule to the new ste */
+ mlx5dr_rule_set_last_member(cur_ste->rule_rx_tx, new_ste, false);
}
static struct mlx5dr_ste *
@@ -404,7 +406,7 @@ dr_rule_rehash_htbl(struct mlx5dr_rule *rule,
info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr;
mlx5dr_ste_set_formatted_ste(dmn->ste_ctx,
dmn->info.caps.gvmi,
- nic_dmn,
+ nic_dmn->type,
new_htbl,
formatted_ste,
&info);
@@ -581,34 +583,66 @@ free_action_members:
return -ENOMEM;
}
-/* While the pointer of ste is no longer valid, like while moving ste to be
- * the first in the miss_list, and to be in the origin table,
- * all rule-members that are attached to this ste should update their ste member
- * to the new pointer
- */
-void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *ste,
- struct mlx5dr_ste *new_ste)
+void mlx5dr_rule_set_last_member(struct mlx5dr_rule_rx_tx *nic_rule,
+ struct mlx5dr_ste *ste,
+ bool force)
{
- struct mlx5dr_rule_member *rule_mem;
+ /* Update rule member is usually done for the last STE or during rule
+ * creation to recover from mid-creation failure (for this peruse the
+ * force flag is used)
+ */
+ if (ste->next_htbl && !force)
+ return;
- list_for_each_entry(rule_mem, &ste->rule_list, use_ste_list)
- rule_mem->ste = new_ste;
+ /* Update is required since each rule keeps track of its last STE */
+ ste->rule_rx_tx = nic_rule;
+ nic_rule->last_rule_ste = ste;
+}
+
+static struct mlx5dr_ste *dr_rule_get_pointed_ste(struct mlx5dr_ste *curr_ste)
+{
+ struct mlx5dr_ste *first_ste;
+
+ first_ste = list_first_entry(mlx5dr_ste_get_miss_list(curr_ste),
+ struct mlx5dr_ste, miss_list_node);
+
+ return first_ste->htbl->pointing_ste;
+}
+
+int mlx5dr_rule_get_reverse_rule_members(struct mlx5dr_ste **ste_arr,
+ struct mlx5dr_ste *curr_ste,
+ int *num_of_stes)
+{
+ bool first = false;
+
+ *num_of_stes = 0;
+
+ if (!curr_ste)
+ return -ENOENT;
+
+ /* Iterate from last to first */
+ while (!first) {
+ first = curr_ste->ste_chain_location == 1;
+ ste_arr[*num_of_stes] = curr_ste;
+ *num_of_stes += 1;
+ curr_ste = dr_rule_get_pointed_ste(curr_ste);
+ }
+
+ return 0;
}
static void dr_rule_clean_rule_members(struct mlx5dr_rule *rule,
struct mlx5dr_rule_rx_tx *nic_rule)
{
- struct mlx5dr_rule_member *rule_mem;
- struct mlx5dr_rule_member *tmp_mem;
+ struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES];
+ struct mlx5dr_ste *curr_ste = nic_rule->last_rule_ste;
+ int i;
- if (list_empty(&nic_rule->rule_members_list))
+ if (mlx5dr_rule_get_reverse_rule_members(ste_arr, curr_ste, &i))
return;
- list_for_each_entry_safe(rule_mem, tmp_mem, &nic_rule->rule_members_list, list) {
- list_del(&rule_mem->list);
- list_del(&rule_mem->use_ste_list);
- mlx5dr_ste_put(rule_mem->ste, rule->matcher, nic_rule->nic_matcher);
- kvfree(rule_mem);
- }
+
+ while (i--)
+ mlx5dr_ste_put(ste_arr[i], rule->matcher, nic_rule->nic_matcher);
}
static u16 dr_get_bits_per_mask(u16 byte_mask)
@@ -628,43 +662,25 @@ static bool dr_rule_need_enlarge_hash(struct mlx5dr_ste_htbl *htbl,
struct mlx5dr_domain_rx_tx *nic_dmn)
{
struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl;
+ int threshold;
if (dmn->info.max_log_sw_icm_sz <= htbl->chunk_size)
return false;
- if (!ctrl->may_grow)
+ if (!mlx5dr_ste_htbl_may_grow(htbl))
return false;
if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk_size)
return false;
- if (ctrl->num_of_collisions >= ctrl->increase_threshold &&
- (ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= ctrl->increase_threshold)
+ threshold = mlx5dr_ste_htbl_increase_threshold(htbl);
+ if (ctrl->num_of_collisions >= threshold &&
+ (ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= threshold)
return true;
return false;
}
-static int dr_rule_add_member(struct mlx5dr_rule_rx_tx *nic_rule,
- struct mlx5dr_ste *ste)
-{
- struct mlx5dr_rule_member *rule_mem;
-
- rule_mem = kvzalloc(sizeof(*rule_mem), GFP_KERNEL);
- if (!rule_mem)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&rule_mem->list);
- INIT_LIST_HEAD(&rule_mem->use_ste_list);
-
- rule_mem->ste = ste;
- list_add_tail(&rule_mem->list, &nic_rule->rule_members_list);
-
- list_add_tail(&rule_mem->use_ste_list, &ste->rule_list);
-
- return 0;
-}
-
static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule,
struct mlx5dr_rule_rx_tx *nic_rule,
struct list_head *send_ste_list,
@@ -679,15 +695,13 @@ static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule,
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
u8 *curr_hw_ste, *prev_hw_ste;
struct mlx5dr_ste *action_ste;
- int i, k, ret;
+ int i, k;
/* Two cases:
* 1. num_of_builders is equal to new_hw_ste_arr_sz, the action in the ste
* 2. num_of_builders is less then new_hw_ste_arr_sz, new ste was added
* to support the action.
*/
- if (num_of_builders == new_hw_ste_arr_sz)
- return 0;
for (i = num_of_builders, k = 0; i < new_hw_ste_arr_sz; i++, k++) {
curr_hw_ste = hw_ste_arr + i * DR_STE_SIZE;
@@ -700,6 +714,10 @@ static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule,
mlx5dr_ste_get(action_ste);
+ action_ste->htbl->pointing_ste = last_ste;
+ last_ste->next_htbl = action_ste->htbl;
+ last_ste = action_ste;
+
/* While free ste we go over the miss list, so add this ste to the list */
list_add_tail(&action_ste->miss_list_node,
mlx5dr_ste_get_miss_list(action_ste));
@@ -713,21 +731,19 @@ static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule,
mlx5dr_ste_set_hit_addr_by_next_htbl(dmn->ste_ctx,
prev_hw_ste,
action_ste->htbl);
- ret = dr_rule_add_member(nic_rule, action_ste);
- if (ret) {
- mlx5dr_dbg(dmn, "Failed adding rule member\n");
- goto free_ste_info;
- }
+
+ mlx5dr_rule_set_last_member(nic_rule, action_ste, true);
+
mlx5dr_send_fill_and_append_ste_send_info(action_ste, DR_STE_SIZE, 0,
curr_hw_ste,
ste_info_arr[k],
send_ste_list, false);
}
+ last_ste->next_htbl = NULL;
+
return 0;
-free_ste_info:
- kfree(ste_info_arr[k]);
err_exit:
mlx5dr_ste_put(action_ste, matcher, nic_matcher);
return -ENOMEM;
@@ -846,9 +862,9 @@ again:
new_htbl = dr_rule_rehash(rule, nic_rule, cur_htbl,
ste_location, send_ste_list);
if (!new_htbl) {
- mlx5dr_htbl_put(cur_htbl);
mlx5dr_err(dmn, "Failed creating rehash table, htbl-log_size: %d\n",
cur_htbl->chunk_size);
+ mlx5dr_htbl_put(cur_htbl);
} else {
cur_htbl = new_htbl;
}
@@ -1015,12 +1031,12 @@ static enum mlx5dr_ipv dr_rule_get_ipv(struct mlx5dr_match_spec *spec)
}
static bool dr_rule_skip(enum mlx5dr_domain_type domain,
- enum mlx5dr_ste_entry_type ste_type,
+ enum mlx5dr_domain_nic_type nic_type,
struct mlx5dr_match_param *mask,
struct mlx5dr_match_param *value,
u32 flow_source)
{
- bool rx = ste_type == MLX5DR_STE_TYPE_RX;
+ bool rx = nic_type == DR_DOMAIN_NIC_TYPE_RX;
if (domain != MLX5DR_DOMAIN_TYPE_FDB)
return false;
@@ -1065,9 +1081,7 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
nic_matcher = nic_rule->nic_matcher;
nic_dmn = nic_matcher->nic_tbl->nic_dmn;
- INIT_LIST_HEAD(&nic_rule->rule_members_list);
-
- if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param,
+ if (dr_rule_skip(dmn->type, nic_dmn->type, &matcher->mask, param,
rule->flow_source))
return 0;
@@ -1121,14 +1135,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
cur_htbl = ste->next_htbl;
- /* Keep all STEs in the rule struct */
- ret = dr_rule_add_member(nic_rule, ste);
- if (ret) {
- mlx5dr_dbg(dmn, "Failed adding rule member index %d\n", i);
- goto free_ste;
- }
-
mlx5dr_ste_get(ste);
+ mlx5dr_rule_set_last_member(nic_rule, ste, true);
}
/* Connect actions */
@@ -1153,8 +1161,6 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
return 0;
-free_ste:
- mlx5dr_ste_put(ste, matcher, nic_matcher);
free_rule:
dr_rule_clean_rule_members(rule, nic_rule);
/* Clean all ste_info's */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index 12cf323a5943..bfb14b4b1906 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -325,10 +325,14 @@ static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
do {
ne = dr_poll_cq(send_ring->cq, 1);
- if (ne < 0)
+ if (unlikely(ne < 0)) {
+ mlx5_core_warn_once(dmn->mdev, "SMFS QPN 0x%x is disabled/limited",
+ send_ring->qp->qpn);
+ send_ring->err_state = true;
return ne;
- else if (ne == 1)
+ } else if (ne == 1) {
send_ring->pending_wqe -= send_ring->signal_th;
+ }
} while (is_drain && send_ring->pending_wqe);
return 0;
@@ -361,6 +365,14 @@ static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
u32 buff_offset;
int ret;
+ if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
+ send_ring->err_state)) {
+ mlx5_core_dbg_once(dmn->mdev,
+ "Skipping post send: QP err state: %d, device state: %d\n",
+ send_ring->err_state, dmn->mdev->state);
+ return 0;
+ }
+
spin_lock(&send_ring->lock);
ret = dr_handle_pending_wc(dmn, send_ring);
@@ -620,6 +632,7 @@ static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
+ MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */
MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
@@ -749,7 +762,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
struct mlx5_cqe64 *cqe;
struct mlx5dr_cq *cq;
int inlen, err, eqn;
- unsigned int irqn;
void *cqc, *in;
__be64 *pas;
int vector;
@@ -782,7 +794,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
goto err_cqwq;
vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev);
- err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn);
+ err = mlx5_vector2eqn(mdev, vector, &eqn);
if (err) {
kvfree(in);
goto err_cqwq;
@@ -790,7 +802,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET(cqc, cqc, uar_page, uar->index);
MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
@@ -818,7 +830,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
*cq->mcq.arm_db = cpu_to_be32(2 << 28);
cq->mcq.vector = 0;
- cq->mcq.irqn = irqn;
cq->mcq.uar = uar;
return cq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 9b1529137cba..1cdfe4fccc7a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -172,9 +172,6 @@ static void dr_ste_replace(struct mlx5dr_ste *dst, struct mlx5dr_ste *src)
dst->next_htbl->pointing_ste = dst;
dst->refcount = src->refcount;
-
- INIT_LIST_HEAD(&dst->rule_list);
- list_splice_tail_init(&src->rule_list, &dst->rule_list);
}
/* Free ste which is the head and the only one in miss_list */
@@ -233,12 +230,12 @@ dr_ste_replace_head_ste(struct mlx5dr_matcher_rx_tx *nic_matcher,
/* Remove from the miss_list the next_ste before copy */
list_del_init(&next_ste->miss_list_node);
- /* All rule-members that use next_ste should know about that */
- mlx5dr_rule_update_rule_member(next_ste, ste);
-
/* Move data from next into ste */
dr_ste_replace(ste, next_ste);
+ /* Update the rule on STE change */
+ mlx5dr_rule_set_last_member(next_ste->rule_rx_tx, ste, false);
+
/* Copy all 64 hw_ste bytes */
memcpy(hw_ste, ste->hw_ste, DR_STE_SIZE_REDUCED);
sb_idx = ste->ste_chain_location - 1;
@@ -382,14 +379,15 @@ void mlx5dr_ste_prepare_for_postsend(struct mlx5dr_ste_ctx *ste_ctx,
/* Init one ste as a pattern for ste data array */
void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx,
u16 gvmi,
- struct mlx5dr_domain_rx_tx *nic_dmn,
+ enum mlx5dr_domain_nic_type nic_type,
struct mlx5dr_ste_htbl *htbl,
u8 *formatted_ste,
struct mlx5dr_htbl_connect_info *connect_info)
{
+ bool is_rx = nic_type == DR_DOMAIN_NIC_TYPE_RX;
struct mlx5dr_ste ste = {};
- ste_ctx->ste_init(formatted_ste, htbl->lu_type, nic_dmn->ste_type, gvmi);
+ ste_ctx->ste_init(formatted_ste, htbl->lu_type, is_rx, gvmi);
ste.hw_ste = formatted_ste;
if (connect_info->type == CONNECT_HIT)
@@ -408,7 +406,7 @@ int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn,
mlx5dr_ste_set_formatted_ste(dmn->ste_ctx,
dmn->info.caps.gvmi,
- nic_dmn,
+ nic_dmn->type,
htbl,
formatted_ste,
connect_info);
@@ -466,21 +464,6 @@ free_table:
return -ENOENT;
}
-static void dr_ste_set_ctrl(struct mlx5dr_ste_htbl *htbl)
-{
- struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl;
- int num_of_entries;
-
- htbl->ctrl.may_grow = true;
-
- if (htbl->chunk_size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask)
- htbl->ctrl.may_grow = false;
-
- /* Threshold is 50%, one is added to table of size 1 */
- num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk_size);
- ctrl->increase_threshold = (num_of_entries + 1) / 2;
-}
-
struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool,
enum mlx5dr_icm_chunk_size chunk_size,
u16 lu_type, u16 byte_mask)
@@ -513,11 +496,9 @@ struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool,
ste->refcount = 0;
INIT_LIST_HEAD(&ste->miss_list_node);
INIT_LIST_HEAD(&htbl->miss_list[i]);
- INIT_LIST_HEAD(&ste->rule_list);
}
htbl->chunk_size = chunk_size;
- dr_ste_set_ctrl(htbl);
return htbl;
out_free_htbl:
@@ -649,6 +630,7 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
u8 *ste_arr)
{
struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+ bool is_rx = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX;
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
struct mlx5dr_ste_build *sb;
@@ -663,7 +645,7 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
for (i = 0; i < nic_matcher->num_of_builders; i++) {
ste_ctx->ste_init(ste_arr,
sb->lu_type,
- nic_dmn->ste_type,
+ is_rx,
dmn->info.caps.gvmi);
mlx5dr_ste_set_bit_mask(ste_arr, sb->bit_mask);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
index 12a8bbbf944b..2d52d065dc8b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
@@ -146,7 +146,7 @@ struct mlx5dr_ste_ctx {
/* Getters and Setters */
void (*ste_init)(u8 *hw_ste_p, u16 lu_type,
- u8 entry_type, u16 gvmi);
+ bool is_rx, u16 gvmi);
void (*set_next_lu_type)(u8 *hw_ste_p, u16 lu_type);
u16 (*get_next_lu_type)(u8 *hw_ste_p);
void (*set_miss_addr)(u8 *hw_ste_p, u64 miss_addr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
index f1950e4968da..9c704bce3c12 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
@@ -8,6 +8,12 @@
#define SVLAN_ETHERTYPE 0x88a8
#define DR_STE_ENABLE_FLOW_TAG BIT(31)
+enum dr_ste_v0_entry_type {
+ DR_STE_TYPE_TX = 1,
+ DR_STE_TYPE_RX = 2,
+ DR_STE_TYPE_MODIFY_PKT = 6,
+};
+
enum dr_ste_v0_action_tunl {
DR_STE_TUNL_ACTION_NONE = 0,
DR_STE_TUNL_ACTION_ENABLE = 1,
@@ -292,8 +298,8 @@ static void dr_ste_v0_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size)
MLX5_SET(ste_general, hw_ste_p, next_table_base_31_5_size, index);
}
-static void dr_ste_v0_init(u8 *hw_ste_p, u16 lu_type,
- u8 entry_type, u16 gvmi)
+static void dr_ste_v0_init_full(u8 *hw_ste_p, u16 lu_type,
+ enum dr_ste_v0_entry_type entry_type, u16 gvmi)
{
dr_ste_v0_set_entry_type(hw_ste_p, entry_type);
dr_ste_v0_set_lu_type(hw_ste_p, lu_type);
@@ -307,6 +313,15 @@ static void dr_ste_v0_init(u8 *hw_ste_p, u16 lu_type,
MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_63_48, gvmi);
}
+static void dr_ste_v0_init(u8 *hw_ste_p, u16 lu_type,
+ bool is_rx, u16 gvmi)
+{
+ enum dr_ste_v0_entry_type entry_type;
+
+ entry_type = is_rx ? DR_STE_TYPE_RX : DR_STE_TYPE_TX;
+ dr_ste_v0_init_full(hw_ste_p, lu_type, entry_type, gvmi);
+}
+
static void dr_ste_v0_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag)
{
MLX5_SET(ste_rx_steering_mult, hw_ste_p, qp_list_pointer,
@@ -352,6 +367,7 @@ static void dr_ste_v0_set_rx_decap(u8 *hw_ste_p)
{
MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
DR_STE_TUNL_ACTION_DECAP);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, fail_on_error, 1);
}
static void dr_ste_v0_set_rx_pop_vlan(u8 *hw_ste_p)
@@ -365,6 +381,7 @@ static void dr_ste_v0_set_rx_decap_l3(u8 *hw_ste_p, bool vlan)
MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
DR_STE_TUNL_ACTION_L3_DECAP);
MLX5_SET(ste_modify_packet, hw_ste_p, action_description, vlan ? 1 : 0);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, fail_on_error, 1);
}
static void dr_ste_v0_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions,
@@ -378,13 +395,13 @@ static void dr_ste_v0_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions,
static void dr_ste_v0_arr_init_next(u8 **last_ste,
u32 *added_stes,
- enum mlx5dr_ste_entry_type entry_type,
+ enum dr_ste_v0_entry_type entry_type,
u16 gvmi)
{
(*added_stes)++;
*last_ste += DR_STE_SIZE;
- dr_ste_v0_init(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE,
- entry_type, gvmi);
+ dr_ste_v0_init_full(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE,
+ entry_type, gvmi);
}
static void
@@ -402,7 +419,7 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn,
* modify headers for outer headers only
*/
if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
- dr_ste_v0_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT);
+ dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT);
dr_ste_v0_set_rewrite_actions(last_ste,
attr->modify_actions,
attr->modify_index);
@@ -415,7 +432,7 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn,
if (i || action_type_set[DR_ACTION_TYP_MODIFY_HDR])
dr_ste_v0_arr_init_next(&last_ste,
added_stes,
- MLX5DR_STE_TYPE_TX,
+ DR_STE_TYPE_TX,
attr->gvmi);
dr_ste_v0_set_tx_push_vlan(last_ste,
@@ -433,7 +450,7 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn,
action_type_set[DR_ACTION_TYP_PUSH_VLAN])
dr_ste_v0_arr_init_next(&last_ste,
added_stes,
- MLX5DR_STE_TYPE_TX,
+ DR_STE_TYPE_TX,
attr->gvmi);
dr_ste_v0_set_tx_encap(last_ste,
@@ -467,7 +484,7 @@ dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn,
dr_ste_v0_set_counter_id(last_ste, attr->ctr_id);
if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) {
- dr_ste_v0_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT);
+ dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT);
dr_ste_v0_set_rx_decap_l3(last_ste, attr->decap_with_vlan);
dr_ste_v0_set_rewrite_actions(last_ste,
attr->decap_actions,
@@ -486,7 +503,7 @@ dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn,
action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2])
dr_ste_v0_arr_init_next(&last_ste,
added_stes,
- MLX5DR_STE_TYPE_RX,
+ DR_STE_TYPE_RX,
attr->gvmi);
dr_ste_v0_set_rx_pop_vlan(last_ste);
@@ -494,13 +511,13 @@ dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn,
}
if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
- if (dr_ste_v0_get_entry_type(last_ste) == MLX5DR_STE_TYPE_MODIFY_PKT)
+ if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT)
dr_ste_v0_arr_init_next(&last_ste,
added_stes,
- MLX5DR_STE_TYPE_MODIFY_PKT,
+ DR_STE_TYPE_MODIFY_PKT,
attr->gvmi);
else
- dr_ste_v0_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT);
+ dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT);
dr_ste_v0_set_rewrite_actions(last_ste,
attr->modify_actions,
@@ -508,10 +525,10 @@ dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn,
}
if (action_type_set[DR_ACTION_TYP_TAG]) {
- if (dr_ste_v0_get_entry_type(last_ste) == MLX5DR_STE_TYPE_MODIFY_PKT)
+ if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT)
dr_ste_v0_arr_init_next(&last_ste,
added_stes,
- MLX5DR_STE_TYPE_RX,
+ DR_STE_TYPE_RX,
attr->gvmi);
dr_ste_v0_rx_set_flow_tag(last_ste, attr->flow_tag);
@@ -1155,6 +1172,7 @@ dr_ste_v0_build_eth_ipv6_l3_l4_tag(struct mlx5dr_match_param *value,
u8 *tag)
{
struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc = &value->misc;
DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, tcp_dport);
DR_STE_SET_TAG(eth_l4, tag, src_port, spec, tcp_sport);
@@ -1166,6 +1184,11 @@ dr_ste_v0_build_eth_ipv6_l3_l4_tag(struct mlx5dr_match_param *value,
DR_STE_SET_TAG(eth_l4, tag, ecn, spec, ip_ecn);
DR_STE_SET_TAG(eth_l4, tag, ipv6_hop_limit, spec, ttl_hoplimit);
+ if (sb->inner)
+ DR_STE_SET_TAG(eth_l4, tag, flow_label, misc, inner_ipv6_flow_label);
+ else
+ DR_STE_SET_TAG(eth_l4, tag, flow_label, misc, outer_ipv6_flow_label);
+
if (spec->tcp_flags) {
DR_STE_SET_TCP_FLAGS(eth_l4, tag, spec);
spec->tcp_flags = 0;
@@ -1770,7 +1793,7 @@ dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
static int dr_ste_v0_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- uint8_t *tag)
+ u8 *tag)
{
struct mlx5dr_match_misc3 *misc3 = &value->misc3;
@@ -1800,7 +1823,7 @@ static void dr_ste_v0_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *s
static int
dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- uint8_t *tag)
+ u8 *tag)
{
if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0))
DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
@@ -1827,7 +1850,7 @@ dr_ste_v0_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb,
static int
dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- uint8_t *tag)
+ u8 *tag)
{
if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0))
DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index 4aaca8eb7597..b2481c99da79 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -322,7 +322,7 @@ static void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size)
}
static void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type,
- u8 entry_type, u16 gvmi)
+ bool is_rx, u16 gvmi)
{
dr_ste_v1_set_lu_type(hw_ste_p, lu_type);
dr_ste_v1_set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE);
@@ -402,8 +402,23 @@ static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action,
dr_ste_v1_set_reparse(hw_ste_p);
}
-static void dr_ste_v1_set_tx_push_vlan(u8 *hw_ste_p, u8 *d_action,
- u32 vlan_hdr)
+static void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action,
+ u8 anchor, u8 offset,
+ int size)
+{
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action,
+ action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action, start_anchor, anchor);
+
+ /* The hardware expects here size and offset in words (2 byte) */
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action, remove_size, size / 2);
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action, start_offset, offset / 2);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_push_vlan(u8 *hw_ste_p, u8 *d_action,
+ u32 vlan_hdr)
{
MLX5_SET(ste_double_action_insert_with_inline_v1, d_action,
action_id, DR_STE_V1_ACTION_ID_INSERT_INLINE);
@@ -416,7 +431,7 @@ static void dr_ste_v1_set_tx_push_vlan(u8 *hw_ste_p, u8 *d_action,
dr_ste_v1_set_reparse(hw_ste_p);
}
-static void dr_ste_v1_set_rx_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num)
+static void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num)
{
MLX5_SET(ste_single_action_remove_header_size_v1, s_action,
action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
@@ -503,13 +518,28 @@ static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
{
u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action);
u8 action_sz = DR_STE_ACTION_DOUBLE_SZ;
+ bool allow_modify_hdr = true;
bool allow_encap = true;
+ if (action_type_set[DR_ACTION_TYP_POP_VLAN]) {
+ if (action_sz < DR_STE_ACTION_SINGLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes,
+ attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1,
+ last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count);
+ action_sz -= DR_STE_ACTION_SINGLE_SZ;
+ action += DR_STE_ACTION_SINGLE_SZ;
+ allow_modify_hdr = false;
+ }
+
if (action_type_set[DR_ACTION_TYP_CTR])
dr_ste_v1_set_counter_id(last_ste, attr->ctr_id);
if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
- if (action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
dr_ste_v1_arr_init_next_match(&last_ste, added_stes,
attr->gvmi);
action = MLX5_ADDR_OF(ste_mask_and_match_v1,
@@ -534,7 +564,8 @@ static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
action_sz = DR_STE_ACTION_TRIPLE_SZ;
allow_encap = true;
}
- dr_ste_v1_set_tx_push_vlan(last_ste, action, attr->vlans.headers[i]);
+ dr_ste_v1_set_push_vlan(last_ste, action,
+ attr->vlans.headers[i]);
action_sz -= DR_STE_ACTION_DOUBLE_SZ;
action += DR_STE_ACTION_DOUBLE_SZ;
}
@@ -579,6 +610,18 @@ static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
attr->reformat.size);
action_sz -= DR_STE_ACTION_DOUBLE_SZ;
action += DR_STE_ACTION_DOUBLE_SZ;
+ } else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) {
+ if (action_sz < DR_STE_ACTION_SINGLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_remove_hdr(last_ste, action,
+ attr->reformat.param_0,
+ attr->reformat.param_1,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_SINGLE_SZ;
+ action += DR_STE_ACTION_SINGLE_SZ;
}
dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
@@ -635,7 +678,7 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
allow_ctr = false;
}
- dr_ste_v1_set_rx_pop_vlan(last_ste, action, attr->vlans.count);
+ dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count);
action_sz -= DR_STE_ACTION_SINGLE_SZ;
action += DR_STE_ACTION_SINGLE_SZ;
}
@@ -656,6 +699,26 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
action += DR_STE_ACTION_DOUBLE_SZ;
}
+ if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) {
+ int i;
+
+ for (i = 0; i < attr->vlans.count; i++) {
+ if (action_sz < DR_STE_ACTION_DOUBLE_SZ ||
+ !allow_modify_hdr) {
+ dr_ste_v1_arr_init_next_match(&last_ste,
+ added_stes,
+ attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1,
+ last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_push_vlan(last_ste, action,
+ attr->vlans.headers[i]);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ }
+ }
+
if (action_type_set[DR_ACTION_TYP_CTR]) {
/* Counter action set after decap and before insert_hdr
* to exclude decaped / encaped header respectively.
@@ -714,6 +777,20 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
action_sz -= DR_STE_ACTION_DOUBLE_SZ;
action += DR_STE_ACTION_DOUBLE_SZ;
allow_modify_hdr = false;
+ } else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) {
+ if (action_sz < DR_STE_ACTION_SINGLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ allow_modify_hdr = true;
+ allow_ctr = true;
+ }
+ dr_ste_v1_set_remove_hdr(last_ste, action,
+ attr->reformat.param_0,
+ attr->reformat.param_1,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_SINGLE_SZ;
+ action += DR_STE_ACTION_SINGLE_SZ;
}
dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
@@ -1844,7 +1921,7 @@ dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
static int dr_ste_v1_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- uint8_t *tag)
+ u8 *tag)
{
struct mlx5dr_match_misc3 *misc3 = &value->misc3;
@@ -1868,7 +1945,7 @@ static void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *s
static int
dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- uint8_t *tag)
+ u8 *tag)
{
if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0))
DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
@@ -1895,7 +1972,7 @@ dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb,
static int
dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- uint8_t *tag)
+ u8 *tag)
{
if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0))
DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
@@ -1960,7 +2037,9 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = {
.set_byte_mask = &dr_ste_v1_set_byte_mask,
.get_byte_mask = &dr_ste_v1_get_byte_mask,
/* Actions */
- .actions_caps = DR_STE_CTX_ACTION_CAP_RX_ENCAP,
+ .actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP |
+ DR_STE_CTX_ACTION_CAP_RX_PUSH |
+ DR_STE_CTX_ACTION_CAP_RX_ENCAP,
.set_actions_rx = &dr_ste_v1_set_actions_rx,
.set_actions_tx = &dr_ste_v1_set_actions_tx,
.modify_field_arr_sz = ARRAY_SIZE(dr_ste_v1_action_modify_field_arr),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index f5e93fa87aff..b20e8aabb861 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -83,15 +83,14 @@ enum {
DR_STE_SIZE_CTRL = 32,
DR_STE_SIZE_TAG = 16,
DR_STE_SIZE_MASK = 16,
-};
-
-enum {
DR_STE_SIZE_REDUCED = DR_STE_SIZE - DR_STE_SIZE_MASK,
};
enum mlx5dr_ste_ctx_action_cap {
DR_STE_CTX_ACTION_CAP_NONE = 0,
- DR_STE_CTX_ACTION_CAP_RX_ENCAP = 1 << 0,
+ DR_STE_CTX_ACTION_CAP_TX_POP = 1 << 0,
+ DR_STE_CTX_ACTION_CAP_RX_PUSH = 1 << 1,
+ DR_STE_CTX_ACTION_CAP_RX_ENCAP = 1 << 2,
};
enum {
@@ -124,6 +123,7 @@ enum mlx5dr_action_type {
DR_ACTION_TYP_POP_VLAN,
DR_ACTION_TYP_PUSH_VLAN,
DR_ACTION_TYP_INSERT_HDR,
+ DR_ACTION_TYP_REMOVE_HDR,
DR_ACTION_TYP_SAMPLER,
DR_ACTION_TYP_MAX,
};
@@ -140,6 +140,7 @@ struct mlx5dr_icm_buddy_mem;
struct mlx5dr_ste_htbl;
struct mlx5dr_match_param;
struct mlx5dr_cmd_caps;
+struct mlx5dr_rule_rx_tx;
struct mlx5dr_matcher_rx_tx;
struct mlx5dr_ste_ctx;
@@ -151,14 +152,14 @@ struct mlx5dr_ste {
/* attached to the miss_list head at each htbl entry */
struct list_head miss_list_node;
- /* each rule member that uses this ste attached here */
- struct list_head rule_list;
-
/* this ste is member of htbl */
struct mlx5dr_ste_htbl *htbl;
struct mlx5dr_ste_htbl *next_htbl;
+ /* The rule this STE belongs to */
+ struct mlx5dr_rule_rx_tx *rule_rx_tx;
+
/* this ste is part of a rule, located in ste's chain */
u8 ste_chain_location;
};
@@ -171,8 +172,6 @@ struct mlx5dr_ste_htbl_ctrl {
/* total number of collisions entries attached to this table */
unsigned int num_of_collisions;
- unsigned int increase_threshold;
- u8 may_grow:1;
};
struct mlx5dr_ste_htbl {
@@ -804,10 +803,15 @@ struct mlx5dr_cmd_caps {
u8 isolate_vl_tc:1;
};
+enum mlx5dr_domain_nic_type {
+ DR_DOMAIN_NIC_TYPE_RX,
+ DR_DOMAIN_NIC_TYPE_TX,
+};
+
struct mlx5dr_domain_rx_tx {
u64 drop_icm_addr;
u64 default_icm_addr;
- enum mlx5dr_ste_entry_type ste_type;
+ enum mlx5dr_domain_nic_type type;
struct mutex mutex; /* protect rx/tx domain */
};
@@ -885,14 +889,6 @@ struct mlx5dr_matcher {
struct mlx5dv_flow_matcher *dv_matcher;
};
-struct mlx5dr_rule_member {
- struct mlx5dr_ste *ste;
- /* attached to mlx5dr_rule via this */
- struct list_head list;
- /* attached to mlx5dr_ste via this */
- struct list_head use_ste_list;
-};
-
struct mlx5dr_ste_action_modify_field {
u16 hw_field;
u8 start;
@@ -993,8 +989,8 @@ struct mlx5dr_htbl_connect_info {
};
struct mlx5dr_rule_rx_tx {
- struct list_head rule_members_list;
struct mlx5dr_matcher_rx_tx *nic_matcher;
+ struct mlx5dr_ste *last_rule_ste;
};
struct mlx5dr_rule {
@@ -1005,8 +1001,12 @@ struct mlx5dr_rule {
u32 flow_source;
};
-void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *new_ste,
- struct mlx5dr_ste *ste);
+void mlx5dr_rule_set_last_member(struct mlx5dr_rule_rx_tx *nic_rule,
+ struct mlx5dr_ste *ste,
+ bool force);
+int mlx5dr_rule_get_reverse_rule_members(struct mlx5dr_ste **ste_arr,
+ struct mlx5dr_ste *curr_ste,
+ int *num_of_stes);
struct mlx5dr_icm_chunk {
struct mlx5dr_icm_buddy_mem *buddy_mem;
@@ -1083,6 +1083,25 @@ mlx5dr_icm_pool_chunk_size_to_byte(enum mlx5dr_icm_chunk_size chunk_size,
return entry_size * num_of_entries;
}
+static inline int
+mlx5dr_ste_htbl_increase_threshold(struct mlx5dr_ste_htbl *htbl)
+{
+ int num_of_entries =
+ mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk_size);
+
+ /* Threshold is 50%, one is added to table of size 1 */
+ return (num_of_entries + 1) / 2;
+}
+
+static inline bool
+mlx5dr_ste_htbl_may_grow(struct mlx5dr_ste_htbl *htbl)
+{
+ if (htbl->chunk_size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask)
+ return false;
+
+ return true;
+}
+
static inline struct mlx5dr_cmd_vport_cap *
mlx5dr_get_vport_cap(struct mlx5dr_cmd_caps *caps, u32 vport)
{
@@ -1216,7 +1235,7 @@ int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn,
bool update_hw_ste);
void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx,
u16 gvmi,
- struct mlx5dr_domain_rx_tx *nic_dmn,
+ enum mlx5dr_domain_nic_type nic_type,
struct mlx5dr_ste_htbl *htbl,
u8 *formatted_ste,
struct mlx5dr_htbl_connect_info *connect_info);
@@ -1282,6 +1301,7 @@ struct mlx5dr_send_ring {
u8 sync_buff[MIN_READ_SYNC];
struct mlx5dr_mr *sync_mr;
spinlock_t lock; /* Protect the data path of the send ring */
+ bool err_state; /* send_ring is not usable in err state */
};
int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn);
@@ -1333,6 +1353,7 @@ struct mlx5dr_cmd_fte_info {
u32 *val;
struct mlx5_flow_act action;
struct mlx5dr_cmd_flow_destination_hw_info *dest_arr;
+ bool ignore_flow_level;
};
int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
@@ -1362,7 +1383,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
int num_dest,
bool reformat_req,
u32 *tbl_id,
- u32 *group_id);
+ u32 *group_id,
+ bool ignore_flow_level);
void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id,
u32 group_id);
#endif /* _DR_TYPES_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index d5926dd7e972..7e58f4e594b7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -133,6 +133,9 @@ static int mlx5_cmd_dr_modify_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
struct mlx5_flow_table *next_ft)
{
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->modify_flow_table(ns, ft, next_ft);
+
return set_miss_action(ns, ft, next_ft);
}
@@ -487,9 +490,13 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
actions[num_actions++] = term_actions->dest;
} else if (num_term_actions > 1) {
+ bool ignore_flow_level =
+ !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL);
+
tmp_action = mlx5dr_action_create_mult_dest_tbl(domain,
term_actions,
- num_term_actions);
+ num_term_actions,
+ ignore_flow_level);
if (!tmp_action) {
err = -EOPNOTSUPP;
goto free_actions;
@@ -557,6 +564,9 @@ static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns
case MLX5_REFORMAT_TYPE_INSERT_HDR:
dr_reformat = DR_ACTION_REFORMAT_TYP_INSERT_HDR;
break;
+ case MLX5_REFORMAT_TYPE_REMOVE_HDR:
+ dr_reformat = DR_ACTION_REFORMAT_TYP_REMOVE_HDR;
+ break;
default:
mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n",
params->type);
@@ -615,15 +625,6 @@ static void mlx5_cmd_dr_modify_header_dealloc(struct mlx5_flow_root_namespace *n
mlx5dr_action_destroy(modify_hdr->action.dr_action);
}
-static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *group,
- int modify_mask,
- struct fs_fte *fte)
-{
- return -EOPNOTSUPP;
-}
-
static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
struct fs_fte *fte)
@@ -648,6 +649,36 @@ static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns,
return 0;
}
+static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *group,
+ int modify_mask,
+ struct fs_fte *fte)
+{
+ struct fs_fte fte_tmp = {};
+ int ret;
+
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->update_fte(ns, ft, group, modify_mask, fte);
+
+ /* Backup current dr rule details */
+ fte_tmp.fs_dr_rule = fte->fs_dr_rule;
+ memset(&fte->fs_dr_rule, 0, sizeof(struct mlx5_fs_dr_rule));
+
+ /* First add the new updated rule, then delete the old rule */
+ ret = mlx5_cmd_dr_create_fte(ns, ft, group, fte);
+ if (ret)
+ goto restore_fte;
+
+ ret = mlx5_cmd_dr_delete_fte(ns, ft, &fte_tmp);
+ WARN_ONCE(ret, "dr update fte duplicate rule deletion failed\n");
+ return ret;
+
+restore_fte:
+ fte->fs_dr_rule = fte_tmp.fs_dr_rule;
+ return ret;
+}
+
static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_root_namespace *peer_ns)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
index 9643ee647f57..d2a937f69784 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
@@ -8,12 +8,6 @@ enum {
MLX5DR_STE_LU_TYPE_DONT_CARE = 0x0f,
};
-enum mlx5dr_ste_entry_type {
- MLX5DR_STE_TYPE_TX = 1,
- MLX5DR_STE_TYPE_RX = 2,
- MLX5DR_STE_TYPE_MODIFY_PKT = 6,
-};
-
struct mlx5_ifc_ste_general_bits {
u8 entry_type[0x4];
u8 reserved_at_4[0x4];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index bbfe101d4e57..c5a8b1601999 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -27,6 +27,7 @@ enum mlx5dr_action_reformat_type {
DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2,
DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3,
DR_ACTION_REFORMAT_TYP_INSERT_HDR,
+ DR_ACTION_REFORMAT_TYP_REMOVE_HDR,
};
struct mlx5dr_match_parameters {
@@ -94,7 +95,8 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain,
struct mlx5dr_action *
mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
struct mlx5dr_action_dest *dests,
- u32 num_of_dests);
+ u32 num_of_dests,
+ bool ignore_flow_level);
struct mlx5dr_action *mlx5dr_action_create_drop(void);
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
index a0a059e0154f..3e85b17f5857 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
@@ -199,7 +199,7 @@ static int mlxbf_gige_stop(struct net_device *netdev)
return 0;
}
-static int mlxbf_gige_do_ioctl(struct net_device *netdev,
+static int mlxbf_gige_eth_ioctl(struct net_device *netdev,
struct ifreq *ifr, int cmd)
{
if (!(netif_running(netdev)))
@@ -253,7 +253,7 @@ static const struct net_device_ops mlxbf_gige_netdev_ops = {
.ndo_start_xmit = mlxbf_gige_start_xmit,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = mlxbf_gige_do_ioctl,
+ .ndo_eth_ioctl = mlxbf_gige_eth_ioctl,
.ndo_set_rx_mode = mlxbf_gige_set_rx_mode,
.ndo_get_stats64 = mlxbf_gige_get_stats64,
};
@@ -269,9 +269,6 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
{
struct phy_device *phydev;
struct net_device *netdev;
- struct resource *mac_res;
- struct resource *llu_res;
- struct resource *plu_res;
struct mlxbf_gige *priv;
void __iomem *llu_base;
void __iomem *plu_base;
@@ -280,27 +277,15 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
int addr;
int err;
- mac_res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_MAC);
- if (!mac_res)
- return -ENXIO;
-
- base = devm_ioremap_resource(&pdev->dev, mac_res);
+ base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MAC);
if (IS_ERR(base))
return PTR_ERR(base);
- llu_res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_LLU);
- if (!llu_res)
- return -ENXIO;
-
- llu_base = devm_ioremap_resource(&pdev->dev, llu_res);
+ llu_base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_LLU);
if (IS_ERR(llu_base))
return PTR_ERR(llu_base);
- plu_res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_PLU);
- if (!plu_res)
- return -ENXIO;
-
- plu_base = devm_ioremap_resource(&pdev->dev, plu_res);
+ plu_base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_PLU);
if (IS_ERR(plu_base))
return PTR_ERR(plu_base);
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c
index e32dd34fdcc0..7905179a9575 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c
@@ -145,14 +145,9 @@ static int mlxbf_gige_mdio_write(struct mii_bus *bus, int phy_add,
int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv)
{
struct device *dev = &pdev->dev;
- struct resource *res;
int ret;
- res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_MDIO9);
- if (!res)
- return -ENODEV;
-
- priv->mdio_io = devm_ioremap_resource(dev, res);
+ priv->mdio_io = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MDIO9);
if (IS_ERR(priv->mdio_io))
return PTR_ERR(priv->mdio_io);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 12871c8dc7c1..d1ae248e125c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -58,10 +58,10 @@ config MLXSW_SPECTRUM
depends on NET_IPGRE || NET_IPGRE=n
depends on IPV6_GRE || IPV6_GRE=n
depends on VXLAN || VXLAN=n
+ depends on PTP_1588_CLOCK_OPTIONAL
select GENERIC_ALLOCATOR
select PARMAN
select OBJAGG
- imply PTP_1588_CLOCK
select NET_PTP_CLASSIFY if PTP_1588_CLOCK
default m
help
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index e775f08fb464..f080fab3de2b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1927,7 +1927,8 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
if (!reload) {
alloc_size = sizeof(*mlxsw_core) + mlxsw_driver->priv_size;
- devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size);
+ devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size,
+ mlxsw_bus_info->dev);
if (!devlink) {
err = -ENOMEM;
goto err_devlink_alloc;
@@ -1974,7 +1975,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
goto err_emad_init;
if (!reload) {
- err = devlink_register(devlink, mlxsw_bus_info->dev);
+ err = devlink_register(devlink);
if (err)
goto err_devlink_register;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 88699e678544..250c5a24264d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1207,7 +1207,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
.ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid,
.ndo_set_features = mlxsw_sp_set_features,
.ndo_get_devlink_port = mlxsw_sp_port_get_devlink_port,
- .ndo_do_ioctl = mlxsw_sp_port_ioctl,
+ .ndo_eth_ioctl = mlxsw_sp_port_ioctl,
};
static int
@@ -2717,6 +2717,22 @@ mlxsw_sp_sample_trigger_params_unset(struct mlxsw_sp *mlxsw_sp,
static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
unsigned long event, void *ptr);
+#define MLXSW_SP_DEFAULT_PARSING_DEPTH 96
+#define MLXSW_SP_INCREASED_PARSING_DEPTH 128
+#define MLXSW_SP_DEFAULT_VXLAN_UDP_DPORT 4789
+
+static void mlxsw_sp_parsing_init(struct mlxsw_sp *mlxsw_sp)
+{
+ mlxsw_sp->parsing.parsing_depth = MLXSW_SP_DEFAULT_PARSING_DEPTH;
+ mlxsw_sp->parsing.vxlan_udp_dport = MLXSW_SP_DEFAULT_VXLAN_UDP_DPORT;
+ mutex_init(&mlxsw_sp->parsing.lock);
+}
+
+static void mlxsw_sp_parsing_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ mutex_destroy(&mlxsw_sp->parsing.lock);
+}
+
static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
const struct mlxsw_bus_info *mlxsw_bus_info,
struct netlink_ext_ack *extack)
@@ -2727,6 +2743,7 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->core = mlxsw_core;
mlxsw_sp->bus_info = mlxsw_bus_info;
+ mlxsw_sp_parsing_init(mlxsw_sp);
mlxsw_core_emad_string_tlv_enable(mlxsw_core);
err = mlxsw_sp_base_mac_get(mlxsw_sp);
@@ -2926,6 +2943,7 @@ err_policers_init:
mlxsw_sp_fids_fini(mlxsw_sp);
err_fids_init:
mlxsw_sp_kvdl_fini(mlxsw_sp);
+ mlxsw_sp_parsing_fini(mlxsw_sp);
return err;
}
@@ -3046,6 +3064,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_policers_fini(mlxsw_sp);
mlxsw_sp_fids_fini(mlxsw_sp);
mlxsw_sp_kvdl_fini(mlxsw_sp);
+ mlxsw_sp_parsing_fini(mlxsw_sp);
}
/* Per-FID flood tables are used for both "true" 802.1D FIDs and emulated
@@ -3611,6 +3630,69 @@ void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port)
dev_put(mlxsw_sp_port->dev);
}
+int mlxsw_sp_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp)
+{
+ char mprs_pl[MLXSW_REG_MPRS_LEN];
+ int err = 0;
+
+ mutex_lock(&mlxsw_sp->parsing.lock);
+
+ if (refcount_inc_not_zero(&mlxsw_sp->parsing.parsing_depth_ref))
+ goto out_unlock;
+
+ mlxsw_reg_mprs_pack(mprs_pl, MLXSW_SP_INCREASED_PARSING_DEPTH,
+ mlxsw_sp->parsing.vxlan_udp_dport);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl);
+ if (err)
+ goto out_unlock;
+
+ mlxsw_sp->parsing.parsing_depth = MLXSW_SP_INCREASED_PARSING_DEPTH;
+ refcount_set(&mlxsw_sp->parsing.parsing_depth_ref, 1);
+
+out_unlock:
+ mutex_unlock(&mlxsw_sp->parsing.lock);
+ return err;
+}
+
+void mlxsw_sp_parsing_depth_dec(struct mlxsw_sp *mlxsw_sp)
+{
+ char mprs_pl[MLXSW_REG_MPRS_LEN];
+
+ mutex_lock(&mlxsw_sp->parsing.lock);
+
+ if (!refcount_dec_and_test(&mlxsw_sp->parsing.parsing_depth_ref))
+ goto out_unlock;
+
+ mlxsw_reg_mprs_pack(mprs_pl, MLXSW_SP_DEFAULT_PARSING_DEPTH,
+ mlxsw_sp->parsing.vxlan_udp_dport);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl);
+ mlxsw_sp->parsing.parsing_depth = MLXSW_SP_DEFAULT_PARSING_DEPTH;
+
+out_unlock:
+ mutex_unlock(&mlxsw_sp->parsing.lock);
+}
+
+int mlxsw_sp_parsing_vxlan_udp_dport_set(struct mlxsw_sp *mlxsw_sp,
+ __be16 udp_dport)
+{
+ char mprs_pl[MLXSW_REG_MPRS_LEN];
+ int err;
+
+ mutex_lock(&mlxsw_sp->parsing.lock);
+
+ mlxsw_reg_mprs_pack(mprs_pl, mlxsw_sp->parsing.parsing_depth,
+ be16_to_cpu(udp_dport));
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl);
+ if (err)
+ goto out_unlock;
+
+ mlxsw_sp->parsing.vxlan_udp_dport = be16_to_cpu(udp_dport);
+
+out_unlock:
+ mutex_unlock(&mlxsw_sp->parsing.lock);
+ return err;
+}
+
static void
mlxsw_sp_port_lag_uppers_cleanup(struct mlxsw_sp_port *mlxsw_sp_port,
struct net_device *lag_dev)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index f99db88ee884..3a43cba6d23c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -148,6 +148,13 @@ struct mlxsw_sp_port_mapping {
u8 lane;
};
+struct mlxsw_sp_parsing {
+ refcount_t parsing_depth_ref;
+ u16 parsing_depth;
+ u16 vxlan_udp_dport;
+ struct mutex lock; /* Protects parsing configuration */
+};
+
struct mlxsw_sp {
struct mlxsw_sp_port **ports;
struct mlxsw_core *core;
@@ -173,6 +180,7 @@ struct mlxsw_sp {
struct mlxsw_sp_counter_pool *counter_pool;
struct mlxsw_sp_span *span;
struct mlxsw_sp_trap *trap;
+ struct mlxsw_sp_parsing parsing;
const struct mlxsw_sp_switchdev_ops *switchdev_ops;
const struct mlxsw_sp_kvdl_ops *kvdl_ops;
const struct mlxsw_afa_ops *afa_ops;
@@ -652,6 +660,10 @@ struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev);
struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev);
void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port);
struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev);
+int mlxsw_sp_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_parsing_depth_dec(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_parsing_vxlan_udp_dport_set(struct mlxsw_sp *mlxsw_sp,
+ __be16 udp_dport);
/* spectrum_dcb.c */
#ifdef CONFIG_MLXSW_SPECTRUM_DCB
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
index d8104fc6c900..98d1fdc25eac 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
@@ -29,7 +29,6 @@ struct mlxsw_sp_nve {
unsigned int num_max_mc_entries[MLXSW_SP_L3_PROTO_MAX];
u32 tunnel_index;
u16 ul_rif_index; /* Reserved for Spectrum */
- unsigned int inc_parsing_depth_refs;
};
struct mlxsw_sp_nve_ops {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
index b84bb4b65098..d018d2da5949 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
@@ -10,14 +10,6 @@
#include "spectrum.h"
#include "spectrum_nve.h"
-/* Eth (18B) | IPv6 (40B) | UDP (8B) | VxLAN (8B) | Eth (14B) | IPv6 (40B)
- *
- * In the worst case - where we have a VLAN tag on the outer Ethernet
- * header and IPv6 in overlay and underlay - we need to parse 128 bytes
- */
-#define MLXSW_SP_NVE_VXLAN_PARSING_DEPTH 128
-#define MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH 96
-
#define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS (VXLAN_F_UDP_ZERO_CSUM_TX | \
VXLAN_F_LEARN)
@@ -115,66 +107,6 @@ static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve,
config->udp_dport = cfg->dst_port;
}
-static int __mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
- unsigned int parsing_depth,
- __be16 udp_dport)
-{
- char mprs_pl[MLXSW_REG_MPRS_LEN];
-
- mlxsw_reg_mprs_pack(mprs_pl, parsing_depth, be16_to_cpu(udp_dport));
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl);
-}
-
-static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
- __be16 udp_dport)
-{
- int parsing_depth = mlxsw_sp->nve->inc_parsing_depth_refs ?
- MLXSW_SP_NVE_VXLAN_PARSING_DEPTH :
- MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH;
-
- return __mlxsw_sp_nve_parsing_set(mlxsw_sp, parsing_depth, udp_dport);
-}
-
-static int
-__mlxsw_sp_nve_inc_parsing_depth_get(struct mlxsw_sp *mlxsw_sp,
- __be16 udp_dport)
-{
- int err;
-
- mlxsw_sp->nve->inc_parsing_depth_refs++;
-
- err = mlxsw_sp_nve_parsing_set(mlxsw_sp, udp_dport);
- if (err)
- goto err_nve_parsing_set;
- return 0;
-
-err_nve_parsing_set:
- mlxsw_sp->nve->inc_parsing_depth_refs--;
- return err;
-}
-
-static void
-__mlxsw_sp_nve_inc_parsing_depth_put(struct mlxsw_sp *mlxsw_sp,
- __be16 udp_dport)
-{
- mlxsw_sp->nve->inc_parsing_depth_refs--;
- mlxsw_sp_nve_parsing_set(mlxsw_sp, udp_dport);
-}
-
-int mlxsw_sp_nve_inc_parsing_depth_get(struct mlxsw_sp *mlxsw_sp)
-{
- __be16 udp_dport = mlxsw_sp->nve->config.udp_dport;
-
- return __mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp, udp_dport);
-}
-
-void mlxsw_sp_nve_inc_parsing_depth_put(struct mlxsw_sp *mlxsw_sp)
-{
- __be16 udp_dport = mlxsw_sp->nve->config.udp_dport;
-
- __mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, udp_dport);
-}
-
static void
mlxsw_sp_nve_vxlan_config_prepare(char *tngcr_pl,
const struct mlxsw_sp_nve_config *config)
@@ -238,10 +170,14 @@ static int mlxsw_sp1_nve_vxlan_init(struct mlxsw_sp_nve *nve,
struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
int err;
- err = __mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp, config->udp_dport);
+ err = mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, config->udp_dport);
if (err)
return err;
+ err = mlxsw_sp_parsing_depth_inc(mlxsw_sp);
+ if (err)
+ goto err_parsing_depth_inc;
+
err = mlxsw_sp1_nve_vxlan_config_set(mlxsw_sp, config);
if (err)
goto err_config_set;
@@ -263,7 +199,9 @@ err_promote_decap:
err_rtdp_set:
mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
err_config_set:
- __mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
+ mlxsw_sp_parsing_depth_dec(mlxsw_sp);
+err_parsing_depth_inc:
+ mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, 0);
return err;
}
@@ -275,7 +213,8 @@ static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id,
config->ul_proto, &config->ul_sip);
mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
- __mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
+ mlxsw_sp_parsing_depth_dec(mlxsw_sp);
+ mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, 0);
}
static int
@@ -412,10 +351,14 @@ static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve,
struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
int err;
- err = __mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp, config->udp_dport);
+ err = mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, config->udp_dport);
if (err)
return err;
+ err = mlxsw_sp_parsing_depth_inc(mlxsw_sp);
+ if (err)
+ goto err_parsing_depth_inc;
+
err = mlxsw_sp2_nve_vxlan_config_set(mlxsw_sp, config);
if (err)
goto err_config_set;
@@ -438,7 +381,9 @@ err_promote_decap:
err_rtdp_set:
mlxsw_sp2_nve_vxlan_config_clear(mlxsw_sp);
err_config_set:
- __mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
+ mlxsw_sp_parsing_depth_dec(mlxsw_sp);
+err_parsing_depth_inc:
+ mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, 0);
return err;
}
@@ -450,7 +395,8 @@ static void mlxsw_sp2_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id,
config->ul_proto, &config->ul_sip);
mlxsw_sp2_nve_vxlan_config_clear(mlxsw_sp);
- __mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
+ mlxsw_sp_parsing_depth_dec(mlxsw_sp);
+ mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, 0);
}
const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
index bfef65d1587c..1a180384e7e8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
@@ -975,14 +975,14 @@ static int mlxsw_sp1_ptp_mtpppc_update(struct mlxsw_sp_port *mlxsw_sp_port,
}
if ((ing_types || egr_types) && !(orig_ing_types || orig_egr_types)) {
- err = mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp);
+ err = mlxsw_sp_parsing_depth_inc(mlxsw_sp);
if (err) {
netdev_err(mlxsw_sp_port->dev, "Failed to increase parsing depth");
return err;
}
}
if (!(ing_types || egr_types) && (orig_ing_types || orig_egr_types))
- mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp);
+ mlxsw_sp_parsing_depth_dec(mlxsw_sp);
return mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp_port->mlxsw_sp,
ing_types, egr_types);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 7e221ef01437..19bb3ca0515e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -9079,7 +9079,7 @@ mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
{
- struct switchdev_notifier_fdb_info info;
+ struct switchdev_notifier_fdb_info info = {};
struct net_device *dev;
dev = br_fdb_find_port(rif->dev, mac, 0);
@@ -9127,8 +9127,8 @@ mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
{
+ struct switchdev_notifier_fdb_info info = {};
u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
- struct switchdev_notifier_fdb_info info;
struct net_device *br_dev;
struct net_device *dev;
@@ -9484,6 +9484,7 @@ struct mlxsw_sp_mp_hash_config {
DECLARE_BITMAP(fields, __MLXSW_REG_RECR2_FIELD_CNT);
DECLARE_BITMAP(inner_headers, __MLXSW_REG_RECR2_HEADER_CNT);
DECLARE_BITMAP(inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT);
+ bool inc_parsing_depth;
};
#define MLXSW_SP_MP_HASH_HEADER_SET(_headers, _header) \
@@ -9654,6 +9655,7 @@ static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
/* Inner */
mlxsw_sp_mp_hash_inner_l3(config);
+ config->inc_parsing_depth = true;
break;
case 3:
/* Outer */
@@ -9678,22 +9680,53 @@ static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
/* Inner */
mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)
+ config->inc_parsing_depth = true;
break;
}
}
+static int mlxsw_sp_mp_hash_parsing_depth_adjust(struct mlxsw_sp *mlxsw_sp,
+ bool old_inc_parsing_depth,
+ bool new_inc_parsing_depth)
+{
+ int err;
+
+ if (!old_inc_parsing_depth && new_inc_parsing_depth) {
+ err = mlxsw_sp_parsing_depth_inc(mlxsw_sp);
+ if (err)
+ return err;
+ mlxsw_sp->router->inc_parsing_depth = true;
+ } else if (old_inc_parsing_depth && !new_inc_parsing_depth) {
+ mlxsw_sp_parsing_depth_dec(mlxsw_sp);
+ mlxsw_sp->router->inc_parsing_depth = false;
+ }
+
+ return 0;
+}
+
static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
{
+ bool old_inc_parsing_depth, new_inc_parsing_depth;
struct mlxsw_sp_mp_hash_config config = {};
char recr2_pl[MLXSW_REG_RECR2_LEN];
unsigned long bit;
u32 seed;
+ int err;
seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
mlxsw_reg_recr2_pack(recr2_pl, seed);
mlxsw_sp_mp4_hash_init(mlxsw_sp, &config);
mlxsw_sp_mp6_hash_init(mlxsw_sp, &config);
+ old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth;
+ new_inc_parsing_depth = config.inc_parsing_depth;
+ err = mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp,
+ old_inc_parsing_depth,
+ new_inc_parsing_depth);
+ if (err)
+ return err;
+
for_each_set_bit(bit, config.headers, __MLXSW_REG_RECR2_HEADER_CNT)
mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, bit, 1);
for_each_set_bit(bit, config.fields, __MLXSW_REG_RECR2_FIELD_CNT)
@@ -9703,7 +9736,16 @@ static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
for_each_set_bit(bit, config.inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT)
mlxsw_reg_recr2_inner_header_fields_enable_set(recr2_pl, bit, 1);
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
+ if (err)
+ goto err_reg_write;
+
+ return 0;
+
+err_reg_write:
+ mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, new_inc_parsing_depth,
+ old_inc_parsing_depth);
+ return err;
}
#else
static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index c5d7007f9173..25d3eae63501 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -81,6 +81,7 @@ struct mlxsw_sp_router {
size_t adj_grp_size_ranges_count;
struct delayed_work nh_grp_activity_dw;
struct list_head nh_res_grp_list;
+ bool inc_parsing_depth;
};
struct mlxsw_sp_fib_entry_priv {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index c5ef9aa64efe..22fede5cb32c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -335,14 +335,16 @@ mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge,
static struct mlxsw_sp_bridge_port *
mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device,
- struct net_device *brport_dev)
+ struct net_device *brport_dev,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_sp_bridge_port *bridge_port;
struct mlxsw_sp_port *mlxsw_sp_port;
+ int err;
bridge_port = kzalloc(sizeof(*bridge_port), GFP_KERNEL);
if (!bridge_port)
- return NULL;
+ return ERR_PTR(-ENOMEM);
mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(brport_dev);
bridge_port->lagged = mlxsw_sp_port->lagged;
@@ -359,12 +361,23 @@ mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device,
list_add(&bridge_port->list, &bridge_device->ports_list);
bridge_port->ref_count = 1;
+ err = switchdev_bridge_port_offload(brport_dev, mlxsw_sp_port->dev,
+ NULL, NULL, NULL, false, extack);
+ if (err)
+ goto err_switchdev_offload;
+
return bridge_port;
+
+err_switchdev_offload:
+ list_del(&bridge_port->list);
+ kfree(bridge_port);
+ return ERR_PTR(err);
}
static void
mlxsw_sp_bridge_port_destroy(struct mlxsw_sp_bridge_port *bridge_port)
{
+ switchdev_bridge_port_unoffload(bridge_port->dev, NULL, NULL, NULL);
list_del(&bridge_port->list);
WARN_ON(!list_empty(&bridge_port->vlans_list));
kfree(bridge_port);
@@ -390,9 +403,10 @@ mlxsw_sp_bridge_port_get(struct mlxsw_sp_bridge *bridge,
if (IS_ERR(bridge_device))
return ERR_CAST(bridge_device);
- bridge_port = mlxsw_sp_bridge_port_create(bridge_device, brport_dev);
- if (!bridge_port) {
- err = -ENOMEM;
+ bridge_port = mlxsw_sp_bridge_port_create(bridge_device, brport_dev,
+ extack);
+ if (IS_ERR(bridge_port)) {
+ err = PTR_ERR(bridge_port);
goto err_bridge_port_create;
}
@@ -1569,7 +1583,6 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp,
{
long *flood_bitmap;
int num_of_ports;
- int alloc_size;
u16 mid_idx;
int err;
@@ -1579,18 +1592,17 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp,
return false;
num_of_ports = mlxsw_core_max_ports(mlxsw_sp->core);
- alloc_size = sizeof(long) * BITS_TO_LONGS(num_of_ports);
- flood_bitmap = kzalloc(alloc_size, GFP_KERNEL);
+ flood_bitmap = bitmap_alloc(num_of_ports, GFP_KERNEL);
if (!flood_bitmap)
return false;
- bitmap_copy(flood_bitmap, mid->ports_in_mid, num_of_ports);
+ bitmap_copy(flood_bitmap, mid->ports_in_mid, num_of_ports);
mlxsw_sp_mc_get_mrouters_bitmap(flood_bitmap, bridge_device, mlxsw_sp);
mid->mid = mid_idx;
err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap,
bridge_device->mrouter);
- kfree(flood_bitmap);
+ bitmap_free(flood_bitmap);
if (err)
return false;
@@ -2508,7 +2520,7 @@ mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type,
const char *mac, u16 vid,
struct net_device *dev, bool offloaded)
{
- struct switchdev_notifier_fdb_info info;
+ struct switchdev_notifier_fdb_info info = {};
info.addr = mac;
info.vid = vid;
diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c
index 831518466de2..3f69bb59ba49 100644
--- a/drivers/net/ethernet/micrel/ks8851_common.c
+++ b/drivers/net/ethernet/micrel/ks8851_common.c
@@ -689,7 +689,7 @@ static int ks8851_net_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
static const struct net_device_ops ks8851_netdev_ops = {
.ndo_open = ks8851_net_open,
.ndo_stop = ks8851_net_stop,
- .ndo_do_ioctl = ks8851_net_ioctl,
+ .ndo_eth_ioctl = ks8851_net_ioctl,
.ndo_start_xmit = ks8851_start_xmit,
.ndo_set_mac_address = ks8851_set_mac_address,
.ndo_set_rx_mode = ks8851_set_rx_mode,
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index 7945eb5e2fe8..a0ee155f9f51 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -6738,7 +6738,7 @@ static const struct net_device_ops netdev_ops = {
.ndo_set_features = netdev_set_features,
.ndo_set_mac_address = netdev_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = netdev_ioctl,
+ .ndo_eth_ioctl = netdev_ioctl,
.ndo_set_rx_mode = netdev_set_rx_mode,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = netdev_netpoll,
diff --git a/drivers/net/ethernet/microchip/Kconfig b/drivers/net/ethernet/microchip/Kconfig
index d54aa164c4e9..735eea1dacf1 100644
--- a/drivers/net/ethernet/microchip/Kconfig
+++ b/drivers/net/ethernet/microchip/Kconfig
@@ -45,6 +45,7 @@ config ENCX24J600
config LAN743X
tristate "LAN743x support"
depends on PCI
+ depends on PTP_1588_CLOCK_OPTIONAL
select PHYLIB
select CRC16
select CRC32
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index dae10328c6cf..9e8561cdc32a 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -2655,7 +2655,7 @@ static const struct net_device_ops lan743x_netdev_ops = {
.ndo_open = lan743x_netdev_open,
.ndo_stop = lan743x_netdev_close,
.ndo_start_xmit = lan743x_netdev_xmit_frame,
- .ndo_do_ioctl = lan743x_netdev_ioctl,
+ .ndo_eth_ioctl = lan743x_netdev_ioctl,
.ndo_set_rx_mode = lan743x_netdev_set_multicast,
.ndo_change_mtu = lan743x_netdev_change_mtu,
.ndo_get_stats64 = lan743x_netdev_get_stats64,
diff --git a/drivers/net/ethernet/microchip/sparx5/Makefile b/drivers/net/ethernet/microchip/sparx5/Makefile
index faa8f07a6b75..c271e86ee292 100644
--- a/drivers/net/ethernet/microchip/sparx5/Makefile
+++ b/drivers/net/ethernet/microchip/sparx5/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_SPARX5_SWITCH) += sparx5-switch.o
sparx5-switch-objs := sparx5_main.o sparx5_packet.o \
sparx5_netdev.o sparx5_phylink.o sparx5_port.o sparx5_mactable.o sparx5_vlan.o \
- sparx5_switchdev.o sparx5_calendar.o sparx5_ethtool.o
+ sparx5_switchdev.o sparx5_calendar.o sparx5_ethtool.o sparx5_fdma.o
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c
new file mode 100644
index 000000000000..7436f62fa152
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c
@@ -0,0 +1,593 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2021 Microchip Technology Inc. and its subsidiaries.
+ *
+ * The Sparx5 Chip Register Model can be browsed at this location:
+ * https://github.com/microchip-ung/sparx-5_reginfo
+ */
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/interrupt.h>
+#include <linux/ip.h>
+#include <linux/dma-mapping.h>
+
+#include "sparx5_main_regs.h"
+#include "sparx5_main.h"
+#include "sparx5_port.h"
+
+#define FDMA_XTR_CHANNEL 6
+#define FDMA_INJ_CHANNEL 0
+
+#define FDMA_DCB_INFO_DATAL(x) ((x) & GENMASK(15, 0))
+#define FDMA_DCB_INFO_TOKEN BIT(17)
+#define FDMA_DCB_INFO_INTR BIT(18)
+#define FDMA_DCB_INFO_SW(x) (((x) << 24) & GENMASK(31, 24))
+
+#define FDMA_DCB_STATUS_BLOCKL(x) ((x) & GENMASK(15, 0))
+#define FDMA_DCB_STATUS_SOF BIT(16)
+#define FDMA_DCB_STATUS_EOF BIT(17)
+#define FDMA_DCB_STATUS_INTR BIT(18)
+#define FDMA_DCB_STATUS_DONE BIT(19)
+#define FDMA_DCB_STATUS_BLOCKO(x) (((x) << 20) & GENMASK(31, 20))
+#define FDMA_DCB_INVALID_DATA 0x1
+
+#define FDMA_XTR_BUFFER_SIZE 2048
+#define FDMA_WEIGHT 4
+
+/* Frame DMA DCB format
+ *
+ * +---------------------------+
+ * | Next Ptr |
+ * +---------------------------+
+ * | Reserved | Info |
+ * +---------------------------+
+ * | Data0 Ptr |
+ * +---------------------------+
+ * | Reserved | Status0 |
+ * +---------------------------+
+ * | Data1 Ptr |
+ * +---------------------------+
+ * | Reserved | Status1 |
+ * +---------------------------+
+ * | Data2 Ptr |
+ * +---------------------------+
+ * | Reserved | Status2 |
+ * |-------------|-------------|
+ * | |
+ * | |
+ * | |
+ * | |
+ * | |
+ * |---------------------------|
+ * | Data14 Ptr |
+ * +-------------|-------------+
+ * | Reserved | Status14 |
+ * +-------------|-------------+
+ */
+
+/* For each hardware DB there is an entry in this list and when the HW DB
+ * entry is used, this SW DB entry is moved to the back of the list
+ */
+struct sparx5_db {
+ struct list_head list;
+ void *cpu_addr;
+};
+
+static void sparx5_fdma_rx_add_dcb(struct sparx5_rx *rx,
+ struct sparx5_rx_dcb_hw *dcb,
+ u64 nextptr)
+{
+ int idx = 0;
+
+ /* Reset the status of the DB */
+ for (idx = 0; idx < FDMA_RX_DCB_MAX_DBS; ++idx) {
+ struct sparx5_db_hw *db = &dcb->db[idx];
+
+ db->status = FDMA_DCB_STATUS_INTR;
+ }
+ dcb->nextptr = FDMA_DCB_INVALID_DATA;
+ dcb->info = FDMA_DCB_INFO_DATAL(FDMA_XTR_BUFFER_SIZE);
+ rx->last_entry->nextptr = nextptr;
+ rx->last_entry = dcb;
+}
+
+static void sparx5_fdma_tx_add_dcb(struct sparx5_tx *tx,
+ struct sparx5_tx_dcb_hw *dcb,
+ u64 nextptr)
+{
+ int idx = 0;
+
+ /* Reset the status of the DB */
+ for (idx = 0; idx < FDMA_TX_DCB_MAX_DBS; ++idx) {
+ struct sparx5_db_hw *db = &dcb->db[idx];
+
+ db->status = FDMA_DCB_STATUS_DONE;
+ }
+ dcb->nextptr = FDMA_DCB_INVALID_DATA;
+ dcb->info = FDMA_DCB_INFO_DATAL(FDMA_XTR_BUFFER_SIZE);
+}
+
+static void sparx5_fdma_rx_activate(struct sparx5 *sparx5, struct sparx5_rx *rx)
+{
+ /* Write the buffer address in the LLP and LLP1 regs */
+ spx5_wr(((u64)rx->dma) & GENMASK(31, 0), sparx5,
+ FDMA_DCB_LLP(rx->channel_id));
+ spx5_wr(((u64)rx->dma) >> 32, sparx5, FDMA_DCB_LLP1(rx->channel_id));
+
+ /* Set the number of RX DBs to be used, and DB end-of-frame interrupt */
+ spx5_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(FDMA_RX_DCB_MAX_DBS) |
+ FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(1) |
+ FDMA_CH_CFG_CH_INJ_PORT_SET(XTR_QUEUE),
+ sparx5, FDMA_CH_CFG(rx->channel_id));
+
+ /* Set the RX Watermark to max */
+ spx5_rmw(FDMA_XTR_CFG_XTR_FIFO_WM_SET(31), FDMA_XTR_CFG_XTR_FIFO_WM,
+ sparx5,
+ FDMA_XTR_CFG);
+
+ /* Start RX fdma */
+ spx5_rmw(FDMA_PORT_CTRL_XTR_STOP_SET(0), FDMA_PORT_CTRL_XTR_STOP,
+ sparx5, FDMA_PORT_CTRL(0));
+
+ /* Enable RX channel DB interrupt */
+ spx5_rmw(BIT(rx->channel_id),
+ BIT(rx->channel_id) & FDMA_INTR_DB_ENA_INTR_DB_ENA,
+ sparx5, FDMA_INTR_DB_ENA);
+
+ /* Activate the RX channel */
+ spx5_wr(BIT(rx->channel_id), sparx5, FDMA_CH_ACTIVATE);
+}
+
+static void sparx5_fdma_rx_deactivate(struct sparx5 *sparx5, struct sparx5_rx *rx)
+{
+ /* Dectivate the RX channel */
+ spx5_rmw(0, BIT(rx->channel_id) & FDMA_CH_ACTIVATE_CH_ACTIVATE,
+ sparx5, FDMA_CH_ACTIVATE);
+
+ /* Disable RX channel DB interrupt */
+ spx5_rmw(0, BIT(rx->channel_id) & FDMA_INTR_DB_ENA_INTR_DB_ENA,
+ sparx5, FDMA_INTR_DB_ENA);
+
+ /* Stop RX fdma */
+ spx5_rmw(FDMA_PORT_CTRL_XTR_STOP_SET(1), FDMA_PORT_CTRL_XTR_STOP,
+ sparx5, FDMA_PORT_CTRL(0));
+}
+
+static void sparx5_fdma_tx_activate(struct sparx5 *sparx5, struct sparx5_tx *tx)
+{
+ /* Write the buffer address in the LLP and LLP1 regs */
+ spx5_wr(((u64)tx->dma) & GENMASK(31, 0), sparx5,
+ FDMA_DCB_LLP(tx->channel_id));
+ spx5_wr(((u64)tx->dma) >> 32, sparx5, FDMA_DCB_LLP1(tx->channel_id));
+
+ /* Set the number of TX DBs to be used, and DB end-of-frame interrupt */
+ spx5_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(FDMA_TX_DCB_MAX_DBS) |
+ FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(1) |
+ FDMA_CH_CFG_CH_INJ_PORT_SET(INJ_QUEUE),
+ sparx5, FDMA_CH_CFG(tx->channel_id));
+
+ /* Start TX fdma */
+ spx5_rmw(FDMA_PORT_CTRL_INJ_STOP_SET(0), FDMA_PORT_CTRL_INJ_STOP,
+ sparx5, FDMA_PORT_CTRL(0));
+
+ /* Activate the channel */
+ spx5_wr(BIT(tx->channel_id), sparx5, FDMA_CH_ACTIVATE);
+}
+
+static void sparx5_fdma_tx_deactivate(struct sparx5 *sparx5, struct sparx5_tx *tx)
+{
+ /* Disable the channel */
+ spx5_rmw(0, BIT(tx->channel_id) & FDMA_CH_ACTIVATE_CH_ACTIVATE,
+ sparx5, FDMA_CH_ACTIVATE);
+}
+
+static void sparx5_fdma_rx_reload(struct sparx5 *sparx5, struct sparx5_rx *rx)
+{
+ /* Reload the RX channel */
+ spx5_wr(BIT(rx->channel_id), sparx5, FDMA_CH_RELOAD);
+}
+
+static void sparx5_fdma_tx_reload(struct sparx5 *sparx5, struct sparx5_tx *tx)
+{
+ /* Reload the TX channel */
+ spx5_wr(BIT(tx->channel_id), sparx5, FDMA_CH_RELOAD);
+}
+
+static struct sk_buff *sparx5_fdma_rx_alloc_skb(struct sparx5_rx *rx)
+{
+ return __netdev_alloc_skb(rx->ndev, FDMA_XTR_BUFFER_SIZE,
+ GFP_ATOMIC);
+}
+
+static bool sparx5_fdma_rx_get_frame(struct sparx5 *sparx5, struct sparx5_rx *rx)
+{
+ struct sparx5_db_hw *db_hw;
+ unsigned int packet_size;
+ struct sparx5_port *port;
+ struct sk_buff *new_skb;
+ struct frame_info fi;
+ struct sk_buff *skb;
+ dma_addr_t dma_addr;
+
+ /* Check if the DCB is done */
+ db_hw = &rx->dcb_entries[rx->dcb_index].db[rx->db_index];
+ if (unlikely(!(db_hw->status & FDMA_DCB_STATUS_DONE)))
+ return false;
+ skb = rx->skb[rx->dcb_index][rx->db_index];
+ /* Replace the DB entry with a new SKB */
+ new_skb = sparx5_fdma_rx_alloc_skb(rx);
+ if (unlikely(!new_skb))
+ return false;
+ /* Map the new skb data and set the new skb */
+ dma_addr = virt_to_phys(new_skb->data);
+ rx->skb[rx->dcb_index][rx->db_index] = new_skb;
+ db_hw->dataptr = dma_addr;
+ packet_size = FDMA_DCB_STATUS_BLOCKL(db_hw->status);
+ skb_put(skb, packet_size);
+ /* Now do the normal processing of the skb */
+ sparx5_ifh_parse((u32 *)skb->data, &fi);
+ /* Map to port netdev */
+ port = fi.src_port < SPX5_PORTS ? sparx5->ports[fi.src_port] : NULL;
+ if (!port || !port->ndev) {
+ dev_err(sparx5->dev, "Data on inactive port %d\n", fi.src_port);
+ sparx5_xtr_flush(sparx5, XTR_QUEUE);
+ return false;
+ }
+ skb->dev = port->ndev;
+ skb_pull(skb, IFH_LEN * sizeof(u32));
+ if (likely(!(skb->dev->features & NETIF_F_RXFCS)))
+ skb_trim(skb, skb->len - ETH_FCS_LEN);
+ skb->protocol = eth_type_trans(skb, skb->dev);
+ /* Everything we see on an interface that is in the HW bridge
+ * has already been forwarded
+ */
+ if (test_bit(port->portno, sparx5->bridge_mask))
+ skb->offload_fwd_mark = 1;
+ skb->dev->stats.rx_bytes += skb->len;
+ skb->dev->stats.rx_packets++;
+ rx->packets++;
+ netif_receive_skb(skb);
+ return true;
+}
+
+static int sparx5_fdma_napi_callback(struct napi_struct *napi, int weight)
+{
+ struct sparx5_rx *rx = container_of(napi, struct sparx5_rx, napi);
+ struct sparx5 *sparx5 = container_of(rx, struct sparx5, rx);
+ int counter = 0;
+
+ while (counter < weight && sparx5_fdma_rx_get_frame(sparx5, rx)) {
+ struct sparx5_rx_dcb_hw *old_dcb;
+
+ rx->db_index++;
+ counter++;
+ /* Check if the DCB can be reused */
+ if (rx->db_index != FDMA_RX_DCB_MAX_DBS)
+ continue;
+ /* As the DCB can be reused, just advance the dcb_index
+ * pointer and set the nextptr in the DCB
+ */
+ rx->db_index = 0;
+ old_dcb = &rx->dcb_entries[rx->dcb_index];
+ rx->dcb_index++;
+ rx->dcb_index &= FDMA_DCB_MAX - 1;
+ sparx5_fdma_rx_add_dcb(rx, old_dcb,
+ rx->dma +
+ ((unsigned long)old_dcb -
+ (unsigned long)rx->dcb_entries));
+ }
+ if (counter < weight) {
+ napi_complete_done(&rx->napi, counter);
+ spx5_rmw(BIT(rx->channel_id),
+ BIT(rx->channel_id) & FDMA_INTR_DB_ENA_INTR_DB_ENA,
+ sparx5, FDMA_INTR_DB_ENA);
+ }
+ if (counter)
+ sparx5_fdma_rx_reload(sparx5, rx);
+ return counter;
+}
+
+static struct sparx5_tx_dcb_hw *sparx5_fdma_next_dcb(struct sparx5_tx *tx,
+ struct sparx5_tx_dcb_hw *dcb)
+{
+ struct sparx5_tx_dcb_hw *next_dcb;
+
+ next_dcb = dcb;
+ next_dcb++;
+ /* Handle wrap-around */
+ if ((unsigned long)next_dcb >=
+ ((unsigned long)tx->first_entry + FDMA_DCB_MAX * sizeof(*dcb)))
+ next_dcb = tx->first_entry;
+ return next_dcb;
+}
+
+int sparx5_fdma_xmit(struct sparx5 *sparx5, u32 *ifh, struct sk_buff *skb)
+{
+ struct sparx5_tx_dcb_hw *next_dcb_hw;
+ struct sparx5_tx *tx = &sparx5->tx;
+ static bool first_time = true;
+ struct sparx5_db_hw *db_hw;
+ struct sparx5_db *db;
+
+ next_dcb_hw = sparx5_fdma_next_dcb(tx, tx->curr_entry);
+ db_hw = &next_dcb_hw->db[0];
+ if (!(db_hw->status & FDMA_DCB_STATUS_DONE))
+ tx->dropped++;
+ db = list_first_entry(&tx->db_list, struct sparx5_db, list);
+ list_move_tail(&db->list, &tx->db_list);
+ next_dcb_hw->nextptr = FDMA_DCB_INVALID_DATA;
+ tx->curr_entry->nextptr = tx->dma +
+ ((unsigned long)next_dcb_hw -
+ (unsigned long)tx->first_entry);
+ tx->curr_entry = next_dcb_hw;
+ memset(db->cpu_addr, 0, FDMA_XTR_BUFFER_SIZE);
+ memcpy(db->cpu_addr, ifh, IFH_LEN * 4);
+ memcpy(db->cpu_addr + IFH_LEN * 4, skb->data, skb->len);
+ db_hw->status = FDMA_DCB_STATUS_SOF |
+ FDMA_DCB_STATUS_EOF |
+ FDMA_DCB_STATUS_BLOCKO(0) |
+ FDMA_DCB_STATUS_BLOCKL(skb->len + IFH_LEN * 4 + 4);
+ if (first_time) {
+ sparx5_fdma_tx_activate(sparx5, tx);
+ first_time = false;
+ } else {
+ sparx5_fdma_tx_reload(sparx5, tx);
+ }
+ return NETDEV_TX_OK;
+}
+
+static int sparx5_fdma_rx_alloc(struct sparx5 *sparx5)
+{
+ struct sparx5_rx *rx = &sparx5->rx;
+ struct sparx5_rx_dcb_hw *dcb;
+ int idx, jdx;
+ int size;
+
+ size = sizeof(struct sparx5_rx_dcb_hw) * FDMA_DCB_MAX;
+ size = ALIGN(size, PAGE_SIZE);
+ rx->dcb_entries = devm_kzalloc(sparx5->dev, size, GFP_KERNEL);
+ if (!rx->dcb_entries)
+ return -ENOMEM;
+ rx->dma = virt_to_phys(rx->dcb_entries);
+ rx->last_entry = rx->dcb_entries;
+ rx->db_index = 0;
+ rx->dcb_index = 0;
+ /* Now for each dcb allocate the db */
+ for (idx = 0; idx < FDMA_DCB_MAX; ++idx) {
+ dcb = &rx->dcb_entries[idx];
+ dcb->info = 0;
+ /* For each db allocate an skb and map skb data pointer to the DB
+ * dataptr. In this way when the frame is received the skb->data
+ * will contain the frame, so no memcpy is needed
+ */
+ for (jdx = 0; jdx < FDMA_RX_DCB_MAX_DBS; ++jdx) {
+ struct sparx5_db_hw *db_hw = &dcb->db[jdx];
+ dma_addr_t dma_addr;
+ struct sk_buff *skb;
+
+ skb = sparx5_fdma_rx_alloc_skb(rx);
+ if (!skb)
+ return -ENOMEM;
+
+ dma_addr = virt_to_phys(skb->data);
+ db_hw->dataptr = dma_addr;
+ db_hw->status = 0;
+ rx->skb[idx][jdx] = skb;
+ }
+ sparx5_fdma_rx_add_dcb(rx, dcb, rx->dma + sizeof(*dcb) * idx);
+ }
+ netif_napi_add(rx->ndev, &rx->napi, sparx5_fdma_napi_callback, FDMA_WEIGHT);
+ napi_enable(&rx->napi);
+ sparx5_fdma_rx_activate(sparx5, rx);
+ return 0;
+}
+
+static int sparx5_fdma_tx_alloc(struct sparx5 *sparx5)
+{
+ struct sparx5_tx *tx = &sparx5->tx;
+ struct sparx5_tx_dcb_hw *dcb;
+ int idx, jdx;
+ int size;
+
+ size = sizeof(struct sparx5_tx_dcb_hw) * FDMA_DCB_MAX;
+ size = ALIGN(size, PAGE_SIZE);
+ tx->curr_entry = devm_kzalloc(sparx5->dev, size, GFP_KERNEL);
+ if (!tx->curr_entry)
+ return -ENOMEM;
+ tx->dma = virt_to_phys(tx->curr_entry);
+ tx->first_entry = tx->curr_entry;
+ INIT_LIST_HEAD(&tx->db_list);
+ /* Now for each dcb allocate the db */
+ for (idx = 0; idx < FDMA_DCB_MAX; ++idx) {
+ dcb = &tx->curr_entry[idx];
+ dcb->info = 0;
+ /* TX databuffers must be 16byte aligned */
+ for (jdx = 0; jdx < FDMA_TX_DCB_MAX_DBS; ++jdx) {
+ struct sparx5_db_hw *db_hw = &dcb->db[jdx];
+ struct sparx5_db *db;
+ dma_addr_t phys;
+ void *cpu_addr;
+
+ cpu_addr = devm_kzalloc(sparx5->dev,
+ FDMA_XTR_BUFFER_SIZE,
+ GFP_KERNEL);
+ if (!cpu_addr)
+ return -ENOMEM;
+ phys = virt_to_phys(cpu_addr);
+ db_hw->dataptr = phys;
+ db_hw->status = 0;
+ db = devm_kzalloc(sparx5->dev, sizeof(*db), GFP_KERNEL);
+ db->cpu_addr = cpu_addr;
+ list_add_tail(&db->list, &tx->db_list);
+ }
+ sparx5_fdma_tx_add_dcb(tx, dcb, tx->dma + sizeof(*dcb) * idx);
+ /* Let the curr_entry to point to the last allocated entry */
+ if (idx == FDMA_DCB_MAX - 1)
+ tx->curr_entry = dcb;
+ }
+ return 0;
+}
+
+static void sparx5_fdma_rx_init(struct sparx5 *sparx5,
+ struct sparx5_rx *rx, int channel)
+{
+ int idx;
+
+ rx->channel_id = channel;
+ /* Fetch a netdev for SKB and NAPI use, any will do */
+ for (idx = 0; idx < SPX5_PORTS; ++idx) {
+ struct sparx5_port *port = sparx5->ports[idx];
+
+ if (port && port->ndev) {
+ rx->ndev = port->ndev;
+ break;
+ }
+ }
+}
+
+static void sparx5_fdma_tx_init(struct sparx5 *sparx5,
+ struct sparx5_tx *tx, int channel)
+{
+ tx->channel_id = channel;
+}
+
+irqreturn_t sparx5_fdma_handler(int irq, void *args)
+{
+ struct sparx5 *sparx5 = args;
+ u32 db = 0, err = 0;
+
+ db = spx5_rd(sparx5, FDMA_INTR_DB);
+ err = spx5_rd(sparx5, FDMA_INTR_ERR);
+ /* Clear interrupt */
+ if (db) {
+ spx5_wr(0, sparx5, FDMA_INTR_DB_ENA);
+ spx5_wr(db, sparx5, FDMA_INTR_DB);
+ napi_schedule(&sparx5->rx.napi);
+ }
+ if (err) {
+ u32 err_type = spx5_rd(sparx5, FDMA_ERRORS);
+
+ dev_err_ratelimited(sparx5->dev,
+ "ERR: int: %#x, type: %#x\n",
+ err, err_type);
+ spx5_wr(err, sparx5, FDMA_INTR_ERR);
+ spx5_wr(err_type, sparx5, FDMA_ERRORS);
+ }
+ return IRQ_HANDLED;
+}
+
+static void sparx5_fdma_injection_mode(struct sparx5 *sparx5)
+{
+ const int byte_swap = 1;
+ int portno;
+ int urgency;
+
+ /* Change mode to fdma extraction and injection */
+ spx5_wr(QS_XTR_GRP_CFG_MODE_SET(2) |
+ QS_XTR_GRP_CFG_STATUS_WORD_POS_SET(1) |
+ QS_XTR_GRP_CFG_BYTE_SWAP_SET(byte_swap),
+ sparx5, QS_XTR_GRP_CFG(XTR_QUEUE));
+ spx5_wr(QS_INJ_GRP_CFG_MODE_SET(2) |
+ QS_INJ_GRP_CFG_BYTE_SWAP_SET(byte_swap),
+ sparx5, QS_INJ_GRP_CFG(INJ_QUEUE));
+
+ /* CPU ports capture setup */
+ for (portno = SPX5_PORT_CPU_0; portno <= SPX5_PORT_CPU_1; portno++) {
+ /* ASM CPU port: No preamble, IFH, enable padding */
+ spx5_wr(ASM_PORT_CFG_PAD_ENA_SET(1) |
+ ASM_PORT_CFG_NO_PREAMBLE_ENA_SET(1) |
+ ASM_PORT_CFG_INJ_FORMAT_CFG_SET(1), /* 1 = IFH */
+ sparx5, ASM_PORT_CFG(portno));
+
+ /* Reset WM cnt to unclog queued frames */
+ spx5_rmw(DSM_DEV_TX_STOP_WM_CFG_DEV_TX_CNT_CLR_SET(1),
+ DSM_DEV_TX_STOP_WM_CFG_DEV_TX_CNT_CLR,
+ sparx5,
+ DSM_DEV_TX_STOP_WM_CFG(portno));
+
+ /* Set Disassembler Stop Watermark level */
+ spx5_rmw(DSM_DEV_TX_STOP_WM_CFG_DEV_TX_STOP_WM_SET(100),
+ DSM_DEV_TX_STOP_WM_CFG_DEV_TX_STOP_WM,
+ sparx5,
+ DSM_DEV_TX_STOP_WM_CFG(portno));
+
+ /* Enable port in queue system */
+ urgency = sparx5_port_fwd_urg(sparx5, SPEED_2500);
+ spx5_rmw(QFWD_SWITCH_PORT_MODE_PORT_ENA_SET(1) |
+ QFWD_SWITCH_PORT_MODE_FWD_URGENCY_SET(urgency),
+ QFWD_SWITCH_PORT_MODE_PORT_ENA |
+ QFWD_SWITCH_PORT_MODE_FWD_URGENCY,
+ sparx5,
+ QFWD_SWITCH_PORT_MODE(portno));
+
+ /* Disable Disassembler buffer underrun watchdog
+ * to avoid truncated packets in XTR
+ */
+ spx5_rmw(DSM_BUF_CFG_UNDERFLOW_WATCHDOG_DIS_SET(1),
+ DSM_BUF_CFG_UNDERFLOW_WATCHDOG_DIS,
+ sparx5,
+ DSM_BUF_CFG(portno));
+
+ /* Disabling frame aging */
+ spx5_rmw(HSCH_PORT_MODE_AGE_DIS_SET(1),
+ HSCH_PORT_MODE_AGE_DIS,
+ sparx5,
+ HSCH_PORT_MODE(portno));
+ }
+}
+
+int sparx5_fdma_start(struct sparx5 *sparx5)
+{
+ int err;
+
+ /* Reset FDMA state */
+ spx5_wr(FDMA_CTRL_NRESET_SET(0), sparx5, FDMA_CTRL);
+ spx5_wr(FDMA_CTRL_NRESET_SET(1), sparx5, FDMA_CTRL);
+
+ /* Force ACP caching but disable read/write allocation */
+ spx5_rmw(CPU_PROC_CTRL_ACP_CACHE_FORCE_ENA_SET(1) |
+ CPU_PROC_CTRL_ACP_AWCACHE_SET(0) |
+ CPU_PROC_CTRL_ACP_ARCACHE_SET(0),
+ CPU_PROC_CTRL_ACP_CACHE_FORCE_ENA |
+ CPU_PROC_CTRL_ACP_AWCACHE |
+ CPU_PROC_CTRL_ACP_ARCACHE,
+ sparx5, CPU_PROC_CTRL);
+
+ sparx5_fdma_injection_mode(sparx5);
+ sparx5_fdma_rx_init(sparx5, &sparx5->rx, FDMA_XTR_CHANNEL);
+ sparx5_fdma_tx_init(sparx5, &sparx5->tx, FDMA_INJ_CHANNEL);
+ err = sparx5_fdma_rx_alloc(sparx5);
+ if (err) {
+ dev_err(sparx5->dev, "Could not allocate RX buffers: %d\n", err);
+ return err;
+ }
+ err = sparx5_fdma_tx_alloc(sparx5);
+ if (err) {
+ dev_err(sparx5->dev, "Could not allocate TX buffers: %d\n", err);
+ return err;
+ }
+ return err;
+}
+
+static u32 sparx5_fdma_port_ctrl(struct sparx5 *sparx5)
+{
+ return spx5_rd(sparx5, FDMA_PORT_CTRL(0));
+}
+
+int sparx5_fdma_stop(struct sparx5 *sparx5)
+{
+ u32 val;
+
+ napi_disable(&sparx5->rx.napi);
+ /* Stop the fdma and channel interrupts */
+ sparx5_fdma_rx_deactivate(sparx5, &sparx5->rx);
+ sparx5_fdma_tx_deactivate(sparx5, &sparx5->tx);
+ /* Wait for the RX channel to stop */
+ read_poll_timeout(sparx5_fdma_port_ctrl, val,
+ FDMA_PORT_CTRL_XTR_BUF_IS_EMPTY_GET(val) == 0,
+ 500, 10000, 0, sparx5);
+ return 0;
+}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c
index 0443f66b5550..9a8e4f201eb1 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c
@@ -277,7 +277,7 @@ static void sparx5_fdb_call_notifiers(enum switchdev_notifier_type type,
const char *mac, u16 vid,
struct net_device *dev, bool offloaded)
{
- struct switchdev_notifier_fdb_info info;
+ struct switchdev_notifier_fdb_info info = {};
info.addr = mac;
info.vid = vid;
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index f666133a15de..cbece6e9bff2 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -640,8 +640,23 @@ static int sparx5_start(struct sparx5 *sparx5)
sparx5_board_init(sparx5);
err = sparx5_register_notifier_blocks(sparx5);
- /* Start register based INJ/XTR */
+ /* Start Frame DMA with fallback to register based INJ/XTR */
err = -ENXIO;
+ if (sparx5->fdma_irq >= 0) {
+ if (GCB_CHIP_ID_REV_ID_GET(sparx5->chip_id) > 0)
+ err = devm_request_threaded_irq(sparx5->dev,
+ sparx5->fdma_irq,
+ NULL,
+ sparx5_fdma_handler,
+ IRQF_ONESHOT,
+ "sparx5-fdma", sparx5);
+ if (!err)
+ err = sparx5_fdma_start(sparx5);
+ if (err)
+ sparx5->fdma_irq = -ENXIO;
+ } else {
+ sparx5->fdma_irq = -ENXIO;
+ }
if (err && sparx5->xtr_irq >= 0) {
err = devm_request_irq(sparx5->dev, sparx5->xtr_irq,
sparx5_xtr_handler, IRQF_SHARED,
@@ -766,6 +781,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev)
sparx5->base_mac[5] = 0;
}
+ sparx5->fdma_irq = platform_get_irq_byname(sparx5->pdev, "fdma");
sparx5->xtr_irq = platform_get_irq_byname(sparx5->pdev, "xtr");
/* Read chip ID to check CPU interface */
@@ -824,6 +840,11 @@ static int mchp_sparx5_remove(struct platform_device *pdev)
disable_irq(sparx5->xtr_irq);
sparx5->xtr_irq = -ENXIO;
}
+ if (sparx5->fdma_irq) {
+ disable_irq(sparx5->fdma_irq);
+ sparx5->fdma_irq = -ENXIO;
+ }
+ sparx5_fdma_stop(sparx5);
sparx5_cleanup_ports(sparx5);
/* Unregister netdevs */
sparx5_unregister_notifier_blocks(sparx5);
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
index 4d5f44c3a421..a1acc9b461f2 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
@@ -73,8 +73,61 @@ enum sparx5_vlan_port_type {
#define XTR_QUEUE 0
#define INJ_QUEUE 0
+#define FDMA_DCB_MAX 64
+#define FDMA_RX_DCB_MAX_DBS 15
+#define FDMA_TX_DCB_MAX_DBS 1
+
struct sparx5;
+struct sparx5_db_hw {
+ u64 dataptr;
+ u64 status;
+};
+
+struct sparx5_rx_dcb_hw {
+ u64 nextptr;
+ u64 info;
+ struct sparx5_db_hw db[FDMA_RX_DCB_MAX_DBS];
+};
+
+struct sparx5_tx_dcb_hw {
+ u64 nextptr;
+ u64 info;
+ struct sparx5_db_hw db[FDMA_TX_DCB_MAX_DBS];
+};
+
+/* Frame DMA receive state:
+ * For each DB, there is a SKB, and the skb data pointer is mapped in
+ * the DB. Once a frame is received the skb is given to the upper layers
+ * and a new skb is added to the dcb.
+ * When the db_index reached FDMA_RX_DCB_MAX_DBS the DB is reused.
+ */
+struct sparx5_rx {
+ struct sparx5_rx_dcb_hw *dcb_entries;
+ struct sparx5_rx_dcb_hw *last_entry;
+ struct sk_buff *skb[FDMA_DCB_MAX][FDMA_RX_DCB_MAX_DBS];
+ int db_index;
+ int dcb_index;
+ dma_addr_t dma;
+ struct napi_struct napi;
+ u32 channel_id;
+ struct net_device *ndev;
+ u64 packets;
+};
+
+/* Frame DMA transmit state:
+ * DCBs are chained using the DCBs nextptr field.
+ */
+struct sparx5_tx {
+ struct sparx5_tx_dcb_hw *curr_entry;
+ struct sparx5_tx_dcb_hw *first_entry;
+ struct list_head db_list;
+ dma_addr_t dma;
+ u32 channel_id;
+ u64 packets;
+ u64 dropped;
+};
+
struct sparx5_port_config {
phy_interface_t portmode;
u32 bandwidth;
@@ -167,6 +220,10 @@ struct sparx5 {
bool sd_sgpio_remapping;
/* Register based inj/xtr */
int xtr_irq;
+ /* Frame DMA */
+ int fdma_irq;
+ struct sparx5_rx rx;
+ struct sparx5_tx tx;
};
/* sparx5_switchdev.c */
@@ -174,11 +231,23 @@ int sparx5_register_notifier_blocks(struct sparx5 *sparx5);
void sparx5_unregister_notifier_blocks(struct sparx5 *sparx5);
/* sparx5_packet.c */
+struct frame_info {
+ int src_port;
+};
+
+void sparx5_xtr_flush(struct sparx5 *sparx5, u8 grp);
+void sparx5_ifh_parse(u32 *ifh, struct frame_info *info);
irqreturn_t sparx5_xtr_handler(int irq, void *_priv);
int sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev);
int sparx5_manual_injection_mode(struct sparx5 *sparx5);
void sparx5_port_inj_timer_setup(struct sparx5_port *port);
+/* sparx5_fdma.c */
+int sparx5_fdma_start(struct sparx5 *sparx5);
+int sparx5_fdma_stop(struct sparx5 *sparx5);
+int sparx5_fdma_xmit(struct sparx5 *sparx5, u32 *ifh, struct sk_buff *skb);
+irqreturn_t sparx5_fdma_handler(int irq, void *args);
+
/* sparx5_mactable.c */
void sparx5_mact_pull_work(struct work_struct *work);
int sparx5_mact_learn(struct sparx5 *sparx5, int port,
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
index 09ca7a3bafdc..dc7e5ea6ec15 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
@@ -20,11 +20,7 @@
#define INJ_TIMEOUT_NS 50000
-struct frame_info {
- int src_port;
-};
-
-static void sparx5_xtr_flush(struct sparx5 *sparx5, u8 grp)
+void sparx5_xtr_flush(struct sparx5 *sparx5, u8 grp)
{
/* Start flush */
spx5_wr(QS_XTR_FLUSH_FLUSH_SET(BIT(grp)), sparx5, QS_XTR_FLUSH);
@@ -36,7 +32,7 @@ static void sparx5_xtr_flush(struct sparx5 *sparx5, u8 grp)
spx5_wr(0, sparx5, QS_XTR_FLUSH);
}
-static void sparx5_ifh_parse(u32 *ifh, struct frame_info *info)
+void sparx5_ifh_parse(u32 *ifh, struct frame_info *info)
{
u8 *xtr_hdr = (u8 *)ifh;
@@ -224,7 +220,10 @@ int sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev)
struct sparx5 *sparx5 = port->sparx5;
int ret;
- ret = sparx5_inject(sparx5, port->ifh, skb, dev);
+ if (sparx5->fdma_irq > 0)
+ ret = sparx5_fdma_xmit(sparx5, port->ifh, skb);
+ else
+ ret = sparx5_inject(sparx5, port->ifh, skb, dev);
if (ret == NETDEV_TX_OK) {
stats->tx_bytes += skb->len;
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
index d2e3250928bf..189a6a0a2e08 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
@@ -596,7 +596,7 @@ static int sparx5_port_max_tags_set(struct sparx5 *sparx5,
return 0;
}
-static int sparx5_port_fwd_urg(struct sparx5 *sparx5, u32 speed)
+int sparx5_port_fwd_urg(struct sparx5 *sparx5, u32 speed)
{
u32 clk_period_ps = 1600; /* 625Mhz for now */
u32 urg = 672000;
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.h b/drivers/net/ethernet/microchip/sparx5/sparx5_port.h
index fd05ab6436d1..2f8043eac71b 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.h
@@ -89,5 +89,6 @@ int sparx5_get_port_status(struct sparx5 *sparx5,
struct sparx5_port_status *status);
void sparx5_port_enable(struct sparx5_port *port, bool enable);
+int sparx5_port_fwd_urg(struct sparx5 *sparx5, u32 speed);
#endif /* __SPARX5_PORT_H__ */
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
index a72e3b3b596e..649ca609884a 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
@@ -93,9 +93,12 @@ static int sparx5_port_attr_set(struct net_device *dev, const void *ctx,
}
static int sparx5_port_bridge_join(struct sparx5_port *port,
- struct net_device *bridge)
+ struct net_device *bridge,
+ struct netlink_ext_ack *extack)
{
struct sparx5 *sparx5 = port->sparx5;
+ struct net_device *ndev = port->ndev;
+ int err;
if (bitmap_empty(sparx5->bridge_mask, SPX5_PORTS))
/* First bridged port */
@@ -109,12 +112,21 @@ static int sparx5_port_bridge_join(struct sparx5_port *port,
set_bit(port->portno, sparx5->bridge_mask);
+ err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL,
+ false, extack);
+ if (err)
+ goto err_switchdev_offload;
+
/* Port enters in bridge mode therefor don't need to copy to CPU
* frames for multicast in case the bridge is not requesting them
*/
- __dev_mc_unsync(port->ndev, sparx5_mc_unsync);
+ __dev_mc_unsync(ndev, sparx5_mc_unsync);
return 0;
+
+err_switchdev_offload:
+ clear_bit(port->portno, sparx5->bridge_mask);
+ return err;
}
static void sparx5_port_bridge_leave(struct sparx5_port *port,
@@ -122,6 +134,8 @@ static void sparx5_port_bridge_leave(struct sparx5_port *port,
{
struct sparx5 *sparx5 = port->sparx5;
+ switchdev_bridge_port_unoffload(port->ndev, NULL, NULL, NULL);
+
clear_bit(port->portno, sparx5->bridge_mask);
if (bitmap_empty(sparx5->bridge_mask, SPX5_PORTS))
sparx5->hw_bridge_dev = NULL;
@@ -139,11 +153,15 @@ static int sparx5_port_changeupper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
{
struct sparx5_port *port = netdev_priv(dev);
+ struct netlink_ext_ack *extack;
int err = 0;
+ extack = netdev_notifier_info_to_extack(&info->info);
+
if (netif_is_bridge_master(info->upper_dev)) {
if (info->linking)
- err = sparx5_port_bridge_join(port, info->upper_dev);
+ err = sparx5_port_bridge_join(port, info->upper_dev,
+ extack);
else
sparx5_port_bridge_leave(port, info->upper_dev);
diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h
index 33e53d32e891..41ecd156e95f 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma.h
+++ b/drivers/net/ethernet/microsoft/mana/gdma.h
@@ -239,10 +239,8 @@ struct gdma_event {
struct gdma_queue;
-#define CQE_POLLING_BUFFER 512
struct mana_eq {
struct gdma_queue *eq;
- struct gdma_comp cqe_poll[CQE_POLLING_BUFFER];
};
typedef void gdma_eq_callback(void *context, struct gdma_queue *q,
@@ -291,11 +289,6 @@ struct gdma_queue {
unsigned int msix_index;
u32 log2_throttle_limit;
-
- /* NAPI data */
- struct napi_struct napi;
- int work_done;
- int budget;
} eq;
struct {
@@ -319,9 +312,6 @@ struct gdma_queue_spec {
void *context;
unsigned long log2_throttle_limit;
-
- /* Only used by the MANA device. */
- struct net_device *ndev;
} eq;
struct {
@@ -406,7 +396,7 @@ void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue);
int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe);
-void mana_gd_arm_cq(struct gdma_queue *cq);
+void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit);
struct gdma_wqe {
u32 reserved :24;
@@ -496,16 +486,28 @@ enum {
GDMA_PROTOCOL_LAST = GDMA_PROTOCOL_V1,
};
+#define GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT BIT(0)
+
+#define GDMA_DRV_CAP_FLAGS1 GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT
+
+#define GDMA_DRV_CAP_FLAGS2 0
+
+#define GDMA_DRV_CAP_FLAGS3 0
+
+#define GDMA_DRV_CAP_FLAGS4 0
+
struct gdma_verify_ver_req {
struct gdma_req_hdr hdr;
/* Mandatory fields required for protocol establishment */
u64 protocol_ver_min;
u64 protocol_ver_max;
- u64 drv_cap_flags1;
- u64 drv_cap_flags2;
- u64 drv_cap_flags3;
- u64 drv_cap_flags4;
+
+ /* Gdma Driver Capability Flags */
+ u64 gd_drv_cap_flags1;
+ u64 gd_drv_cap_flags2;
+ u64 gd_drv_cap_flags3;
+ u64 gd_drv_cap_flags4;
/* Advisory fields */
u64 drv_ver;
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 2f87bf90f8ec..cee75b561f59 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -67,6 +67,10 @@ static int mana_gd_query_max_resources(struct pci_dev *pdev)
if (gc->max_num_queues > resp.max_rq)
gc->max_num_queues = resp.max_rq;
+ /* The Hardware Channel (HWC) used 1 MSI-X */
+ if (gc->max_num_queues > gc->num_msix_usable - 1)
+ gc->max_num_queues = gc->num_msix_usable - 1;
+
return 0;
}
@@ -267,7 +271,7 @@ void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue)
queue->id, queue->head * GDMA_WQE_BU_SIZE, 1);
}
-void mana_gd_arm_cq(struct gdma_queue *cq)
+void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
{
struct gdma_context *gc = cq->gdma_dev->gdma_context;
@@ -276,7 +280,7 @@ void mana_gd_arm_cq(struct gdma_queue *cq)
u32 head = cq->head % (num_cqe << GDMA_CQE_OWNER_BITS);
mana_gd_ring_doorbell(gc, cq->gdma_dev->doorbell, cq->type, cq->id,
- head, SET_ARM_BIT);
+ head, arm_bit);
}
static void mana_gd_process_eqe(struct gdma_queue *eq)
@@ -339,7 +343,6 @@ static void mana_gd_process_eq_events(void *arg)
struct gdma_queue *eq = arg;
struct gdma_context *gc;
struct gdma_eqe *eqe;
- unsigned int arm_bit;
u32 head, num_eqe;
int i;
@@ -370,92 +373,54 @@ static void mana_gd_process_eq_events(void *arg)
eq->head++;
}
- /* Always rearm the EQ for HWC. For MANA, rearm it when NAPI is done. */
- if (mana_gd_is_hwc(eq->gdma_dev)) {
- arm_bit = SET_ARM_BIT;
- } else if (eq->eq.work_done < eq->eq.budget &&
- napi_complete_done(&eq->eq.napi, eq->eq.work_done)) {
- arm_bit = SET_ARM_BIT;
- } else {
- arm_bit = 0;
- }
-
head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS);
mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id,
- head, arm_bit);
-}
-
-static int mana_poll(struct napi_struct *napi, int budget)
-{
- struct gdma_queue *eq = container_of(napi, struct gdma_queue, eq.napi);
-
- eq->eq.work_done = 0;
- eq->eq.budget = budget;
-
- mana_gd_process_eq_events(eq);
-
- return min(eq->eq.work_done, budget);
-}
-
-static void mana_gd_schedule_napi(void *arg)
-{
- struct gdma_queue *eq = arg;
- struct napi_struct *napi;
-
- napi = &eq->eq.napi;
- napi_schedule_irqoff(napi);
+ head, SET_ARM_BIT);
}
static int mana_gd_register_irq(struct gdma_queue *queue,
const struct gdma_queue_spec *spec)
{
struct gdma_dev *gd = queue->gdma_dev;
- bool is_mana = mana_gd_is_mana(gd);
struct gdma_irq_context *gic;
struct gdma_context *gc;
struct gdma_resource *r;
unsigned int msi_index;
unsigned long flags;
- int err;
+ struct device *dev;
+ int err = 0;
gc = gd->gdma_context;
r = &gc->msix_resource;
+ dev = gc->dev;
spin_lock_irqsave(&r->lock, flags);
msi_index = find_first_zero_bit(r->map, r->size);
- if (msi_index >= r->size) {
+ if (msi_index >= r->size || msi_index >= gc->num_msix_usable) {
err = -ENOSPC;
} else {
bitmap_set(r->map, msi_index, 1);
queue->eq.msix_index = msi_index;
- err = 0;
}
spin_unlock_irqrestore(&r->lock, flags);
- if (err)
- return err;
+ if (err) {
+ dev_err(dev, "Register IRQ err:%d, msi:%u rsize:%u, nMSI:%u",
+ err, msi_index, r->size, gc->num_msix_usable);
- WARN_ON(msi_index >= gc->num_msix_usable);
+ return err;
+ }
gic = &gc->irq_contexts[msi_index];
- if (is_mana) {
- netif_napi_add(spec->eq.ndev, &queue->eq.napi, mana_poll,
- NAPI_POLL_WEIGHT);
- napi_enable(&queue->eq.napi);
- }
-
WARN_ON(gic->handler || gic->arg);
gic->arg = queue;
- if (is_mana)
- gic->handler = mana_gd_schedule_napi;
- else
- gic->handler = mana_gd_process_eq_events;
+ gic->handler = mana_gd_process_eq_events;
return 0;
}
@@ -549,11 +514,6 @@ static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets,
mana_gd_deregiser_irq(queue);
- if (mana_gd_is_mana(queue->gdma_dev)) {
- napi_disable(&queue->eq.napi);
- netif_napi_del(&queue->eq.napi);
- }
-
if (queue->eq.disable_needed)
mana_gd_disable_queue(queue);
}
@@ -883,6 +843,11 @@ int mana_gd_verify_vf_version(struct pci_dev *pdev)
req.protocol_ver_min = GDMA_PROTOCOL_FIRST;
req.protocol_ver_max = GDMA_PROTOCOL_LAST;
+ req.gd_drv_cap_flags1 = GDMA_DRV_CAP_FLAGS1;
+ req.gd_drv_cap_flags2 = GDMA_DRV_CAP_FLAGS2;
+ req.gd_drv_cap_flags3 = GDMA_DRV_CAP_FLAGS3;
+ req.gd_drv_cap_flags4 = GDMA_DRV_CAP_FLAGS4;
+
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
if (err || resp.hdr.status) {
dev_err(gc->dev, "VfVerifyVersionOutput: %d, status=0x%x\n",
@@ -1128,7 +1093,7 @@ static int mana_gd_read_cqe(struct gdma_queue *cq, struct gdma_comp *comp)
new_bits = (cq->head / num_cqe) & GDMA_CQE_OWNER_MASK;
/* Return -1 if overflow detected. */
- if (owner_bits != new_bits)
+ if (WARN_ON_ONCE(owner_bits != new_bits))
return -1;
comp->wq_num = cqe->cqe_info.wq_num;
@@ -1201,10 +1166,8 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev)
if (max_queues_per_port > MANA_MAX_NUM_QUEUES)
max_queues_per_port = MANA_MAX_NUM_QUEUES;
- max_irqs = max_queues_per_port * MAX_PORTS_IN_MANA_DEV;
-
/* Need 1 interrupt for the Hardware communication Channel (HWC) */
- max_irqs++;
+ max_irqs = max_queues_per_port + 1;
nvec = pci_alloc_irq_vectors(pdev, 2, max_irqs, PCI_IRQ_MSIX);
if (nvec < 0)
@@ -1291,6 +1254,9 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
int bar = 0;
int err;
+ /* Each port has 2 CQs, each CQ has at most 1 EQE at a time */
+ BUILD_BUG_ON(2 * MAX_PORTS_IN_MANA_DEV * GDMA_EQE_SIZE > EQ_SIZE);
+
err = pci_enable_device(pdev);
if (err)
return -ENXIO;
diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
index 1a923fd99990..c1310ea1c216 100644
--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
@@ -304,7 +304,7 @@ static void mana_hwc_comp_event(void *ctx, struct gdma_queue *q_self)
&comp_data);
}
- mana_gd_arm_cq(q_self);
+ mana_gd_ring_cq(q_self, SET_ARM_BIT);
}
static void mana_hwc_destroy_cq(struct gdma_context *gc, struct hwc_cq *hwc_cq)
diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h
index a2c3f826f022..fc98a5ba5ed0 100644
--- a/drivers/net/ethernet/microsoft/mana/mana.h
+++ b/drivers/net/ethernet/microsoft/mana/mana.h
@@ -46,7 +46,7 @@ enum TRI_STATE {
#define EQ_SIZE (8 * PAGE_SIZE)
#define LOG2_EQ_THROTTLE 3
-#define MAX_PORTS_IN_MANA_DEV 16
+#define MAX_PORTS_IN_MANA_DEV 256
struct mana_stats {
u64 packets;
@@ -225,6 +225,8 @@ struct mana_tx_comp_oob {
struct mana_rxq;
+#define CQE_POLLING_BUFFER 512
+
struct mana_cq {
struct gdma_queue *gdma_cq;
@@ -244,8 +246,13 @@ struct mana_cq {
*/
struct mana_txq *txq;
- /* Pointer to a buffer which the CQ handler can copy the CQE's into. */
- struct gdma_comp *gdma_comp_buf;
+ /* Buffer which the CQ handler can copy the CQE's into. */
+ struct gdma_comp gdma_comp_buf[CQE_POLLING_BUFFER];
+
+ /* NAPI data */
+ struct napi_struct napi;
+ int work_done;
+ int budget;
};
#define GDMA_MAX_RQE_SGES 15
@@ -315,6 +322,8 @@ struct mana_context {
u16 num_ports;
+ struct mana_eq *eqs;
+
struct net_device *ports[MAX_PORTS_IN_MANA_DEV];
};
@@ -324,8 +333,6 @@ struct mana_port_context {
u8 mac_addr[ETH_ALEN];
- struct mana_eq *eqs;
-
enum TRI_STATE rss_state;
mana_handle_t default_rxobj;
@@ -395,11 +402,11 @@ enum mana_command_code {
struct mana_query_device_cfg_req {
struct gdma_req_hdr hdr;
- /* Driver Capability flags */
- u64 drv_cap_flags1;
- u64 drv_cap_flags2;
- u64 drv_cap_flags3;
- u64 drv_cap_flags4;
+ /* MANA Nic Driver Capability flags */
+ u64 mn_drv_cap_flags1;
+ u64 mn_drv_cap_flags2;
+ u64 mn_drv_cap_flags3;
+ u64 mn_drv_cap_flags4;
u32 proto_major_ver;
u32 proto_minor_ver;
@@ -516,7 +523,7 @@ struct mana_cfg_rx_steer_resp {
struct gdma_resp_hdr hdr;
}; /* HW DATA */
-#define MANA_MAX_NUM_QUEUES 16
+#define MANA_MAX_NUM_QUEUES 64
#define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index fff78900fc8a..1b21030308e5 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -696,66 +696,56 @@ static void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type,
resp.hdr.status);
}
-static void mana_init_cqe_poll_buf(struct gdma_comp *cqe_poll_buf)
-{
- int i;
-
- for (i = 0; i < CQE_POLLING_BUFFER; i++)
- memset(&cqe_poll_buf[i], 0, sizeof(struct gdma_comp));
-}
-
-static void mana_destroy_eq(struct gdma_context *gc,
- struct mana_port_context *apc)
+static void mana_destroy_eq(struct mana_context *ac)
{
+ struct gdma_context *gc = ac->gdma_dev->gdma_context;
struct gdma_queue *eq;
int i;
- if (!apc->eqs)
+ if (!ac->eqs)
return;
- for (i = 0; i < apc->num_queues; i++) {
- eq = apc->eqs[i].eq;
+ for (i = 0; i < gc->max_num_queues; i++) {
+ eq = ac->eqs[i].eq;
if (!eq)
continue;
mana_gd_destroy_queue(gc, eq);
}
- kfree(apc->eqs);
- apc->eqs = NULL;
+ kfree(ac->eqs);
+ ac->eqs = NULL;
}
-static int mana_create_eq(struct mana_port_context *apc)
+static int mana_create_eq(struct mana_context *ac)
{
- struct gdma_dev *gd = apc->ac->gdma_dev;
+ struct gdma_dev *gd = ac->gdma_dev;
+ struct gdma_context *gc = gd->gdma_context;
struct gdma_queue_spec spec = {};
int err;
int i;
- apc->eqs = kcalloc(apc->num_queues, sizeof(struct mana_eq),
- GFP_KERNEL);
- if (!apc->eqs)
+ ac->eqs = kcalloc(gc->max_num_queues, sizeof(struct mana_eq),
+ GFP_KERNEL);
+ if (!ac->eqs)
return -ENOMEM;
spec.type = GDMA_EQ;
spec.monitor_avl_buf = false;
spec.queue_size = EQ_SIZE;
spec.eq.callback = NULL;
- spec.eq.context = apc->eqs;
+ spec.eq.context = ac->eqs;
spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
- spec.eq.ndev = apc->ndev;
-
- for (i = 0; i < apc->num_queues; i++) {
- mana_init_cqe_poll_buf(apc->eqs[i].cqe_poll);
- err = mana_gd_create_mana_eq(gd, &spec, &apc->eqs[i].eq);
+ for (i = 0; i < gc->max_num_queues; i++) {
+ err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq);
if (err)
goto out;
}
return 0;
out:
- mana_destroy_eq(gd->gdma_context, apc);
+ mana_destroy_eq(ac);
return err;
}
@@ -790,7 +780,6 @@ static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
static void mana_poll_tx_cq(struct mana_cq *cq)
{
- struct gdma_queue *gdma_eq = cq->gdma_cq->cq.parent;
struct gdma_comp *completions = cq->gdma_comp_buf;
struct gdma_posted_wqe_info *wqe_info;
unsigned int pkt_transmitted = 0;
@@ -812,6 +801,9 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
comp_read = mana_gd_poll_cq(cq->gdma_cq, completions,
CQE_POLLING_BUFFER);
+ if (comp_read < 1)
+ return;
+
for (i = 0; i < comp_read; i++) {
struct mana_tx_comp_oob *cqe_oob;
@@ -861,7 +853,7 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
mana_unmap_skb(skb, apc);
- napi_consume_skb(skb, gdma_eq->eq.budget);
+ napi_consume_skb(skb, cq->budget);
pkt_transmitted++;
}
@@ -890,6 +882,8 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0)
WARN_ON_ONCE(1);
+
+ cq->work_done = pkt_transmitted;
}
static void mana_post_pkt_rxq(struct mana_rxq *rxq)
@@ -918,17 +912,13 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
struct mana_stats *rx_stats = &rxq->stats;
struct net_device *ndev = rxq->ndev;
uint pkt_len = cqe->ppi[0].pkt_len;
- struct mana_port_context *apc;
u16 rxq_idx = rxq->rxq_idx;
struct napi_struct *napi;
- struct gdma_queue *eq;
struct sk_buff *skb;
u32 hash_value;
- apc = netdev_priv(ndev);
- eq = apc->eqs[rxq_idx].eq;
- eq->eq.work_done++;
- napi = &eq->eq.napi;
+ rxq->rx_cq.work_done++;
+ napi = &rxq->rx_cq.napi;
if (!buf_va) {
++ndev->stats.rx_dropped;
@@ -1081,6 +1071,7 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
{
struct mana_cq *cq = context;
+ u8 arm_bit;
WARN_ON_ONCE(cq->gdma_cq != gdma_queue);
@@ -1089,7 +1080,33 @@ static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
else
mana_poll_tx_cq(cq);
- mana_gd_arm_cq(gdma_queue);
+ if (cq->work_done < cq->budget &&
+ napi_complete_done(&cq->napi, cq->work_done)) {
+ arm_bit = SET_ARM_BIT;
+ } else {
+ arm_bit = 0;
+ }
+
+ mana_gd_ring_cq(gdma_queue, arm_bit);
+}
+
+static int mana_poll(struct napi_struct *napi, int budget)
+{
+ struct mana_cq *cq = container_of(napi, struct mana_cq, napi);
+
+ cq->work_done = 0;
+ cq->budget = budget;
+
+ mana_cq_handler(cq, cq->gdma_cq);
+
+ return min(cq->work_done, budget);
+}
+
+static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue)
+{
+ struct mana_cq *cq = context;
+
+ napi_schedule_irqoff(&cq->napi);
}
static void mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq)
@@ -1114,12 +1131,18 @@ static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq)
static void mana_destroy_txq(struct mana_port_context *apc)
{
+ struct napi_struct *napi;
int i;
if (!apc->tx_qp)
return;
for (i = 0; i < apc->num_queues; i++) {
+ napi = &apc->tx_qp[i].tx_cq.napi;
+ napi_synchronize(napi);
+ napi_disable(napi);
+ netif_napi_del(napi);
+
mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
@@ -1134,7 +1157,8 @@ static void mana_destroy_txq(struct mana_port_context *apc)
static int mana_create_txq(struct mana_port_context *apc,
struct net_device *net)
{
- struct gdma_dev *gd = apc->ac->gdma_dev;
+ struct mana_context *ac = apc->ac;
+ struct gdma_dev *gd = ac->gdma_dev;
struct mana_obj_spec wq_spec;
struct mana_obj_spec cq_spec;
struct gdma_queue_spec spec;
@@ -1186,7 +1210,6 @@ static int mana_create_txq(struct mana_port_context *apc,
/* Create SQ's CQ */
cq = &apc->tx_qp[i].tx_cq;
- cq->gdma_comp_buf = apc->eqs[i].cqe_poll;
cq->type = MANA_CQ_TYPE_TX;
cq->txq = txq;
@@ -1195,8 +1218,8 @@ static int mana_create_txq(struct mana_port_context *apc,
spec.type = GDMA_CQ;
spec.monitor_avl_buf = false;
spec.queue_size = cq_size;
- spec.cq.callback = mana_cq_handler;
- spec.cq.parent_eq = apc->eqs[i].eq;
+ spec.cq.callback = mana_schedule_napi;
+ spec.cq.parent_eq = ac->eqs[i].eq;
spec.cq.context = cq;
err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq);
if (err)
@@ -1237,7 +1260,10 @@ static int mana_create_txq(struct mana_port_context *apc,
gc->cq_table[cq->gdma_id] = cq->gdma_cq;
- mana_gd_arm_cq(cq->gdma_cq);
+ netif_tx_napi_add(net, &cq->napi, mana_poll, NAPI_POLL_WEIGHT);
+ napi_enable(&cq->napi);
+
+ mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
}
return 0;
@@ -1246,21 +1272,6 @@ out:
return err;
}
-static void mana_napi_sync_for_rx(struct mana_rxq *rxq)
-{
- struct net_device *ndev = rxq->ndev;
- struct mana_port_context *apc;
- u16 rxq_idx = rxq->rxq_idx;
- struct napi_struct *napi;
- struct gdma_queue *eq;
-
- apc = netdev_priv(ndev);
- eq = apc->eqs[rxq_idx].eq;
- napi = &eq->eq.napi;
-
- napi_synchronize(napi);
-}
-
static void mana_destroy_rxq(struct mana_port_context *apc,
struct mana_rxq *rxq, bool validate_state)
@@ -1268,13 +1279,19 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
struct mana_recv_buf_oob *rx_oob;
struct device *dev = gc->dev;
+ struct napi_struct *napi;
int i;
if (!rxq)
return;
+ napi = &rxq->rx_cq.napi;
+
if (validate_state)
- mana_napi_sync_for_rx(rxq);
+ napi_synchronize(napi);
+
+ napi_disable(napi);
+ netif_napi_del(napi);
mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
@@ -1418,7 +1435,6 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
/* Create RQ's CQ */
cq = &rxq->rx_cq;
- cq->gdma_comp_buf = eq->cqe_poll;
cq->type = MANA_CQ_TYPE_RX;
cq->rxq = rxq;
@@ -1426,7 +1442,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
spec.type = GDMA_CQ;
spec.monitor_avl_buf = false;
spec.queue_size = cq_size;
- spec.cq.callback = mana_cq_handler;
+ spec.cq.callback = mana_schedule_napi;
spec.cq.parent_eq = eq->eq;
spec.cq.context = cq;
err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq);
@@ -1466,7 +1482,10 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
gc->cq_table[cq->gdma_id] = cq->gdma_cq;
- mana_gd_arm_cq(cq->gdma_cq);
+ netif_napi_add(ndev, &cq->napi, mana_poll, 1);
+ napi_enable(&cq->napi);
+
+ mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
out:
if (!err)
return rxq;
@@ -1484,12 +1503,13 @@ out:
static int mana_add_rx_queues(struct mana_port_context *apc,
struct net_device *ndev)
{
+ struct mana_context *ac = apc->ac;
struct mana_rxq *rxq;
int err = 0;
int i;
for (i = 0; i < apc->num_queues; i++) {
- rxq = mana_create_rxq(apc, i, &apc->eqs[i], ndev);
+ rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev);
if (!rxq) {
err = -ENOMEM;
goto out;
@@ -1601,16 +1621,11 @@ reset_apc:
int mana_alloc_queues(struct net_device *ndev)
{
struct mana_port_context *apc = netdev_priv(ndev);
- struct gdma_dev *gd = apc->ac->gdma_dev;
int err;
- err = mana_create_eq(apc);
- if (err)
- return err;
-
err = mana_create_vport(apc, ndev);
if (err)
- goto destroy_eq;
+ return err;
err = netif_set_real_num_tx_queues(ndev, apc->num_queues);
if (err)
@@ -1636,8 +1651,6 @@ int mana_alloc_queues(struct net_device *ndev)
destroy_vport:
mana_destroy_vport(apc);
-destroy_eq:
- mana_destroy_eq(gd->gdma_context, apc);
return err;
}
@@ -1714,8 +1727,6 @@ static int mana_dealloc_queues(struct net_device *ndev)
mana_destroy_vport(apc);
- mana_destroy_eq(apc->ac->gdma_dev->gdma_context, apc);
-
return 0;
}
@@ -1768,7 +1779,7 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
apc->ac = ac;
apc->ndev = ndev;
apc->max_queues = gc->max_num_queues;
- apc->num_queues = min_t(uint, gc->max_num_queues, MANA_MAX_NUM_QUEUES);
+ apc->num_queues = gc->max_num_queues;
apc->port_handle = INVALID_MANA_HANDLE;
apc->port_idx = port_idx;
@@ -1839,6 +1850,10 @@ int mana_probe(struct gdma_dev *gd)
ac->num_ports = 1;
gd->driver_data = ac;
+ err = mana_create_eq(ac);
+ if (err)
+ goto out;
+
err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION,
MANA_MICRO_VERSION, &ac->num_ports);
if (err)
@@ -1888,6 +1903,9 @@ void mana_remove(struct gdma_dev *gd)
free_netdev(ndev);
}
+
+ mana_destroy_eq(ac);
+
out:
mana_gd_deregister_device(gd);
gd->driver_data = NULL;
diff --git a/drivers/net/ethernet/mscc/Kconfig b/drivers/net/ethernet/mscc/Kconfig
index 2d3157e4d081..b6a73d151dec 100644
--- a/drivers/net/ethernet/mscc/Kconfig
+++ b/drivers/net/ethernet/mscc/Kconfig
@@ -16,7 +16,7 @@ config MSCC_OCELOT_SWITCH_LIB
select NET_DEVLINK
select REGMAP_MMIO
select PACKING
- select PHYLIB
+ select PHYLINK
tristate
help
This is a hardware support library for Ocelot network switches. It is
@@ -24,6 +24,7 @@ config MSCC_OCELOT_SWITCH_LIB
config MSCC_OCELOT_SWITCH
tristate "Ocelot switch driver"
+ depends on PTP_1588_CLOCK_OPTIONAL
depends on BRIDGE || BRIDGE=n
depends on NET_SWITCHDEV
depends on HAS_IOMEM
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index adfb9781799e..c581b955efb3 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -222,8 +222,35 @@ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port,
ANA_PORT_DROP_CFG, port);
}
+static int ocelot_vlan_member_set(struct ocelot *ocelot, u32 vlan_mask, u16 vid)
+{
+ int err;
+
+ err = ocelot_vlant_set_mask(ocelot, vid, vlan_mask);
+ if (err)
+ return err;
+
+ ocelot->vlan_mask[vid] = vlan_mask;
+
+ return 0;
+}
+
+static int ocelot_vlan_member_add(struct ocelot *ocelot, int port, u16 vid)
+{
+ return ocelot_vlan_member_set(ocelot,
+ ocelot->vlan_mask[vid] | BIT(port),
+ vid);
+}
+
+static int ocelot_vlan_member_del(struct ocelot *ocelot, int port, u16 vid)
+{
+ return ocelot_vlan_member_set(ocelot,
+ ocelot->vlan_mask[vid] & ~BIT(port),
+ vid);
+}
+
int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
- bool vlan_aware)
+ bool vlan_aware, struct netlink_ext_ack *extack)
{
struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1];
struct ocelot_port *ocelot_port = ocelot->ports[port];
@@ -233,8 +260,8 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
list_for_each_entry(filter, &block->rules, list) {
if (filter->ingress_port_mask & BIT(port) &&
filter->action.vid_replace_ena) {
- dev_err(ocelot->dev,
- "Cannot change VLAN state with vlan modify rules active\n");
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot change VLAN state with vlan modify rules active");
return -EBUSY;
}
}
@@ -259,16 +286,15 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
EXPORT_SYMBOL(ocelot_port_vlan_filtering);
int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid,
- bool untagged)
+ bool untagged, struct netlink_ext_ack *extack)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
/* Deny changing the native VLAN, but always permit deleting it */
if (untagged && ocelot_port->native_vlan.vid != vid &&
ocelot_port->native_vlan.valid) {
- dev_err(ocelot->dev,
- "Port already has a native VLAN: %d\n",
- ocelot_port->native_vlan.vid);
+ NL_SET_ERR_MSG_MOD(extack,
+ "Port already has a native VLAN");
return -EBUSY;
}
@@ -279,13 +305,11 @@ EXPORT_SYMBOL(ocelot_vlan_prepare);
int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid,
bool untagged)
{
- int ret;
+ int err;
- /* Make the port a member of the VLAN */
- ocelot->vlan_mask[vid] |= BIT(port);
- ret = ocelot_vlant_set_mask(ocelot, vid, ocelot->vlan_mask[vid]);
- if (ret)
- return ret;
+ err = ocelot_vlan_member_add(ocelot, port, vid);
+ if (err)
+ return err;
/* Default ingress vlan classification */
if (pvid) {
@@ -312,13 +336,11 @@ EXPORT_SYMBOL(ocelot_vlan_add);
int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
- int ret;
+ int err;
- /* Stop the port from being a member of the vlan */
- ocelot->vlan_mask[vid] &= ~BIT(port);
- ret = ocelot_vlant_set_mask(ocelot, vid, ocelot->vlan_mask[vid]);
- if (ret)
- return ret;
+ err = ocelot_vlan_member_del(ocelot, port, vid);
+ if (err)
+ return err;
/* Ingress */
if (ocelot_port->pvid_vlan.vid == vid) {
@@ -340,6 +362,7 @@ EXPORT_SYMBOL(ocelot_vlan_del);
static void ocelot_vlan_init(struct ocelot *ocelot)
{
+ unsigned long all_ports = GENMASK(ocelot->num_phys_ports - 1, 0);
u16 port, vid;
/* Clear VLAN table, by default all ports are members of all VLANs */
@@ -348,23 +371,19 @@ static void ocelot_vlan_init(struct ocelot *ocelot)
ocelot_vlant_wait_for_completion(ocelot);
/* Configure the port VLAN memberships */
- for (vid = 1; vid < VLAN_N_VID; vid++) {
- ocelot->vlan_mask[vid] = 0;
- ocelot_vlant_set_mask(ocelot, vid, ocelot->vlan_mask[vid]);
- }
+ for (vid = 1; vid < VLAN_N_VID; vid++)
+ ocelot_vlan_member_set(ocelot, 0, vid);
/* Because VLAN filtering is enabled, we need VID 0 to get untagged
* traffic. It is added automatically if 8021q module is loaded, but
* we can't rely on it since module may be not loaded.
*/
- ocelot->vlan_mask[0] = GENMASK(ocelot->num_phys_ports - 1, 0);
- ocelot_vlant_set_mask(ocelot, 0, ocelot->vlan_mask[0]);
+ ocelot_vlan_member_set(ocelot, all_ports, 0);
/* Set vlan ingress filter mask to all ports but the CPU port by
* default.
*/
- ocelot_write(ocelot, GENMASK(ocelot->num_phys_ports - 1, 0),
- ANA_VLANMASK);
+ ocelot_write(ocelot, all_ports, ANA_VLANMASK);
for (port = 0; port < ocelot->num_phys_ports; port++) {
ocelot_write_gix(ocelot, 0, REW_PORT_VLAN_CFG, port);
@@ -377,7 +396,7 @@ static u32 ocelot_read_eq_avail(struct ocelot *ocelot, int port)
return ocelot_read_rix(ocelot, QSYS_SW_STATUS, port);
}
-int ocelot_port_flush(struct ocelot *ocelot, int port)
+static int ocelot_port_flush(struct ocelot *ocelot, int port)
{
unsigned int pause_ena;
int err, val;
@@ -429,63 +448,118 @@ int ocelot_port_flush(struct ocelot *ocelot, int port)
return err;
}
-EXPORT_SYMBOL(ocelot_port_flush);
-void ocelot_adjust_link(struct ocelot *ocelot, int port,
- struct phy_device *phydev)
+void ocelot_phylink_mac_link_down(struct ocelot *ocelot, int port,
+ unsigned int link_an_mode,
+ phy_interface_t interface,
+ unsigned long quirks)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
- int speed, mode = 0;
+ int err;
+
+ ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA,
+ DEV_MAC_ENA_CFG);
+
+ ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 0);
+
+ err = ocelot_port_flush(ocelot, port);
+ if (err)
+ dev_err(ocelot->dev, "failed to flush port %d: %d\n",
+ port, err);
+
+ /* Put the port in reset. */
+ if (interface != PHY_INTERFACE_MODE_QSGMII ||
+ !(quirks & OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP))
+ ocelot_port_rmwl(ocelot_port,
+ DEV_CLOCK_CFG_MAC_TX_RST |
+ DEV_CLOCK_CFG_MAC_TX_RST,
+ DEV_CLOCK_CFG_MAC_TX_RST |
+ DEV_CLOCK_CFG_MAC_TX_RST,
+ DEV_CLOCK_CFG);
+}
+EXPORT_SYMBOL_GPL(ocelot_phylink_mac_link_down);
+
+void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port,
+ struct phy_device *phydev,
+ unsigned int link_an_mode,
+ phy_interface_t interface,
+ int speed, int duplex,
+ bool tx_pause, bool rx_pause,
+ unsigned long quirks)
+{
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
+ int mac_speed, mode = 0;
+ u32 mac_fc_cfg;
+
+ /* The MAC might be integrated in systems where the MAC speed is fixed
+ * and it's the PCS who is performing the rate adaptation, so we have
+ * to write "1000Mbps" into the LINK_SPEED field of DEV_CLOCK_CFG
+ * (which is also its default value).
+ */
+ if ((quirks & OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION) ||
+ speed == SPEED_1000) {
+ mac_speed = OCELOT_SPEED_1000;
+ mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA;
+ } else if (speed == SPEED_2500) {
+ mac_speed = OCELOT_SPEED_2500;
+ mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA;
+ } else if (speed == SPEED_100) {
+ mac_speed = OCELOT_SPEED_100;
+ } else {
+ mac_speed = OCELOT_SPEED_10;
+ }
- switch (phydev->speed) {
+ if (duplex == DUPLEX_FULL)
+ mode |= DEV_MAC_MODE_CFG_FDX_ENA;
+
+ ocelot_port_writel(ocelot_port, mode, DEV_MAC_MODE_CFG);
+
+ /* Take port out of reset by clearing the MAC_TX_RST, MAC_RX_RST and
+ * PORT_RST bits in DEV_CLOCK_CFG.
+ */
+ ocelot_port_writel(ocelot_port, DEV_CLOCK_CFG_LINK_SPEED(mac_speed),
+ DEV_CLOCK_CFG);
+
+ switch (speed) {
case SPEED_10:
- speed = OCELOT_SPEED_10;
+ mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(OCELOT_SPEED_10);
break;
case SPEED_100:
- speed = OCELOT_SPEED_100;
+ mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(OCELOT_SPEED_100);
break;
case SPEED_1000:
- speed = OCELOT_SPEED_1000;
- mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA;
- break;
case SPEED_2500:
- speed = OCELOT_SPEED_2500;
- mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA;
+ mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(OCELOT_SPEED_1000);
break;
default:
- dev_err(ocelot->dev, "Unsupported PHY speed on port %d: %d\n",
- port, phydev->speed);
+ dev_err(ocelot->dev, "Unsupported speed on port %d: %d\n",
+ port, speed);
return;
}
- phy_print_status(phydev);
-
- if (!phydev->link)
- return;
-
- /* Only full duplex supported for now */
- ocelot_port_writel(ocelot_port, DEV_MAC_MODE_CFG_FDX_ENA |
- mode, DEV_MAC_MODE_CFG);
-
- /* Disable HDX fast control */
- ocelot_port_writel(ocelot_port, DEV_PORT_MISC_HDX_FAST_DIS,
- DEV_PORT_MISC);
+ /* Handle RX pause in all cases, with 2500base-X this is used for rate
+ * adaptation.
+ */
+ mac_fc_cfg |= SYS_MAC_FC_CFG_RX_FC_ENA;
- /* SGMII only for now */
- ocelot_port_writel(ocelot_port, PCS1G_MODE_CFG_SGMII_MODE_ENA,
- PCS1G_MODE_CFG);
- ocelot_port_writel(ocelot_port, PCS1G_SD_CFG_SD_SEL, PCS1G_SD_CFG);
+ if (tx_pause)
+ mac_fc_cfg |= SYS_MAC_FC_CFG_TX_FC_ENA |
+ SYS_MAC_FC_CFG_PAUSE_VAL_CFG(0xffff) |
+ SYS_MAC_FC_CFG_FC_LATENCY_CFG(0x7) |
+ SYS_MAC_FC_CFG_ZERO_PAUSE_ENA;
- /* Enable PCS */
- ocelot_port_writel(ocelot_port, PCS1G_CFG_PCS_ENA, PCS1G_CFG);
+ /* Flow control. Link speed is only used here to evaluate the time
+ * specification in incoming pause frames.
+ */
+ ocelot_write_rix(ocelot, mac_fc_cfg, SYS_MAC_FC_CFG, port);
- /* No aneg on SGMII */
- ocelot_port_writel(ocelot_port, 0, PCS1G_ANEG_CFG);
+ ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port);
- /* No loopback */
- ocelot_port_writel(ocelot_port, 0, PCS1G_LB_CFG);
+ ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, tx_pause);
- /* Enable MAC module */
+ /* Undo the effects of ocelot_phylink_mac_link_down:
+ * enable MAC module
+ */
ocelot_port_writel(ocelot_port, DEV_MAC_ENA_CFG_RX_ENA |
DEV_MAC_ENA_CFG_TX_ENA, DEV_MAC_ENA_CFG);
@@ -502,39 +576,8 @@ void ocelot_adjust_link(struct ocelot *ocelot, int port,
/* Core: Enable port for frame transfer */
ocelot_fields_write(ocelot, port,
QSYS_SWITCH_PORT_MODE_PORT_ENA, 1);
-
- /* Flow control */
- ocelot_write_rix(ocelot, SYS_MAC_FC_CFG_PAUSE_VAL_CFG(0xffff) |
- SYS_MAC_FC_CFG_RX_FC_ENA | SYS_MAC_FC_CFG_TX_FC_ENA |
- SYS_MAC_FC_CFG_ZERO_PAUSE_ENA |
- SYS_MAC_FC_CFG_FC_LATENCY_CFG(0x7) |
- SYS_MAC_FC_CFG_FC_LINK_SPEED(speed),
- SYS_MAC_FC_CFG, port);
- ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port);
-}
-EXPORT_SYMBOL(ocelot_adjust_link);
-
-void ocelot_port_enable(struct ocelot *ocelot, int port,
- struct phy_device *phy)
-{
- /* Enable receiving frames on the port, and activate auto-learning of
- * MAC addresses.
- */
- ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_LEARNAUTO |
- ANA_PORT_PORT_CFG_RECV_ENA |
- ANA_PORT_PORT_CFG_PORTID_VAL(port),
- ANA_PORT_PORT_CFG, port);
}
-EXPORT_SYMBOL(ocelot_port_enable);
-
-void ocelot_port_disable(struct ocelot *ocelot, int port)
-{
- struct ocelot_port *ocelot_port = ocelot->ports[port];
-
- ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG);
- ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 0);
-}
-EXPORT_SYMBOL(ocelot_port_disable);
+EXPORT_SYMBOL_GPL(ocelot_phylink_mac_link_up);
static void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,
struct sk_buff *clone)
@@ -1334,6 +1377,7 @@ void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot)
struct net_device *bond = ocelot_port->bond;
mask = ocelot_get_bridge_fwd_mask(ocelot, bridge);
+ mask |= cpu_fwd_mask;
mask &= ~BIT(port);
if (bond) {
mask &= ~ocelot_get_bond_mask(ocelot, bond,
@@ -1956,6 +2000,15 @@ void ocelot_init_port(struct ocelot *ocelot, int port)
/* Disable source address learning for standalone mode */
ocelot_port_set_learning(ocelot, port, false);
+ /* Set the port's initial logical port ID value, enable receiving
+ * frames on it, and configure the MAC address learning type to
+ * automatic.
+ */
+ ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_LEARNAUTO |
+ ANA_PORT_PORT_CFG_RECV_ENA |
+ ANA_PORT_PORT_CFG_PORTID_VAL(port),
+ ANA_PORT_PORT_CFG, port);
+
/* Enable vcap lookups */
ocelot_vcap_enable(ocelot, port);
}
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index db6b1a4c3926..1952d6a1b98a 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -12,8 +12,7 @@
#include <linux/etherdevice.h>
#include <linux/if_vlan.h>
#include <linux/net_tstamp.h>
-#include <linux/phy.h>
-#include <linux/phy/phy.h>
+#include <linux/phylink.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
@@ -42,11 +41,9 @@ struct ocelot_port_tc {
struct ocelot_port_private {
struct ocelot_port port;
struct net_device *dev;
- struct phy_device *phy;
+ struct phylink *phylink;
+ struct phylink_config phylink_config;
u8 chip_port;
-
- struct phy *serdes;
-
struct ocelot_port_tc tc;
};
@@ -107,7 +104,7 @@ u32 ocelot_port_readl(struct ocelot_port *port, u32 reg);
void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg);
int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target,
- struct phy_device *phy);
+ struct device_node *portnp);
void ocelot_release_port(struct ocelot_port *ocelot_port);
int ocelot_devlink_init(struct ocelot *ocelot);
void ocelot_devlink_teardown(struct ocelot *ocelot);
diff --git a/drivers/net/ethernet/mscc/ocelot_io.c b/drivers/net/ethernet/mscc/ocelot_io.c
index ea4e83410fe4..7390fa3980ec 100644
--- a/drivers/net/ethernet/mscc/ocelot_io.c
+++ b/drivers/net/ethernet/mscc/ocelot_io.c
@@ -21,7 +21,7 @@ u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset)
ocelot->map[target][reg & REG_MASK] + offset, &val);
return val;
}
-EXPORT_SYMBOL(__ocelot_read_ix);
+EXPORT_SYMBOL_GPL(__ocelot_read_ix);
void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset)
{
@@ -32,7 +32,7 @@ void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset)
regmap_write(ocelot->targets[target],
ocelot->map[target][reg & REG_MASK] + offset, val);
}
-EXPORT_SYMBOL(__ocelot_write_ix);
+EXPORT_SYMBOL_GPL(__ocelot_write_ix);
void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg,
u32 offset)
@@ -45,7 +45,7 @@ void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg,
ocelot->map[target][reg & REG_MASK] + offset,
mask, val);
}
-EXPORT_SYMBOL(__ocelot_rmw_ix);
+EXPORT_SYMBOL_GPL(__ocelot_rmw_ix);
u32 ocelot_port_readl(struct ocelot_port *port, u32 reg)
{
@@ -58,7 +58,7 @@ u32 ocelot_port_readl(struct ocelot_port *port, u32 reg)
regmap_read(port->target, ocelot->map[target][reg & REG_MASK], &val);
return val;
}
-EXPORT_SYMBOL(ocelot_port_readl);
+EXPORT_SYMBOL_GPL(ocelot_port_readl);
void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg)
{
@@ -69,7 +69,7 @@ void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg)
regmap_write(port->target, ocelot->map[target][reg & REG_MASK], val);
}
-EXPORT_SYMBOL(ocelot_port_writel);
+EXPORT_SYMBOL_GPL(ocelot_port_writel);
void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg)
{
@@ -77,7 +77,7 @@ void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg)
ocelot_port_writel(port, (cur & (~mask)) | val, reg);
}
-EXPORT_SYMBOL(ocelot_port_rmwl);
+EXPORT_SYMBOL_GPL(ocelot_port_rmwl);
u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target,
u32 reg, u32 offset)
@@ -128,7 +128,7 @@ int ocelot_regfields_init(struct ocelot *ocelot,
return 0;
}
-EXPORT_SYMBOL(ocelot_regfields_init);
+EXPORT_SYMBOL_GPL(ocelot_regfields_init);
static struct regmap_config ocelot_regmap_config = {
.reg_bits = 32,
@@ -148,4 +148,4 @@ struct regmap *ocelot_regmap_init(struct ocelot *ocelot, struct resource *res)
return devm_regmap_init_mmio(ocelot->dev, regs, &ocelot_regmap_config);
}
-EXPORT_SYMBOL(ocelot_regmap_init);
+EXPORT_SYMBOL_GPL(ocelot_regmap_init);
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index e9d260d84bf3..c0c465a4a981 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -9,10 +9,14 @@
*/
#include <linux/if_bridge.h>
+#include <linux/of_net.h>
+#include <linux/phy/phy.h>
#include <net/pkt_cls.h>
#include "ocelot.h"
#include "ocelot_vcap.h"
+#define OCELOT_MAC_QUIRKS OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP
+
static struct ocelot *devlink_port_to_ocelot(struct devlink_port *dlp)
{
return devlink_priv(dlp->devlink);
@@ -160,6 +164,7 @@ int ocelot_port_devlink_init(struct ocelot *ocelot, int port,
struct devlink *dl = ocelot->devlink;
struct devlink_port_attrs attrs = {};
+ memset(dlp, 0, sizeof(*dlp));
memcpy(attrs.switch_id.id, &ocelot->base_mac, id_len);
attrs.switch_id.id_len = id_len;
attrs.phys.port_number = port;
@@ -381,26 +386,6 @@ static int ocelot_setup_tc(struct net_device *dev, enum tc_setup_type type,
return 0;
}
-static void ocelot_port_adjust_link(struct net_device *dev)
-{
- struct ocelot_port_private *priv = netdev_priv(dev);
- struct ocelot *ocelot = priv->port.ocelot;
- int port = priv->chip_port;
-
- ocelot_adjust_link(ocelot, port, dev->phydev);
-}
-
-static int ocelot_vlan_vid_prepare(struct net_device *dev, u16 vid, bool pvid,
- bool untagged)
-{
- struct ocelot_port_private *priv = netdev_priv(dev);
- struct ocelot_port *ocelot_port = &priv->port;
- struct ocelot *ocelot = ocelot_port->ocelot;
- int port = priv->chip_port;
-
- return ocelot_vlan_prepare(ocelot, port, vid, pvid, untagged);
-}
-
static int ocelot_vlan_vid_add(struct net_device *dev, u16 vid, bool pvid,
bool untagged)
{
@@ -448,33 +433,8 @@ static int ocelot_vlan_vid_del(struct net_device *dev, u16 vid)
static int ocelot_port_open(struct net_device *dev)
{
struct ocelot_port_private *priv = netdev_priv(dev);
- struct ocelot_port *ocelot_port = &priv->port;
- struct ocelot *ocelot = ocelot_port->ocelot;
- int port = priv->chip_port;
- int err;
- if (priv->serdes) {
- err = phy_set_mode_ext(priv->serdes, PHY_MODE_ETHERNET,
- ocelot_port->phy_mode);
- if (err) {
- netdev_err(dev, "Could not set mode of SerDes\n");
- return err;
- }
- }
-
- err = phy_connect_direct(dev, priv->phy, &ocelot_port_adjust_link,
- ocelot_port->phy_mode);
- if (err) {
- netdev_err(dev, "Could not attach to PHY\n");
- return err;
- }
-
- dev->phydev = priv->phy;
-
- phy_attached_info(priv->phy);
- phy_start(priv->phy);
-
- ocelot_port_enable(ocelot, port, priv->phy);
+ phylink_start(priv->phylink);
return 0;
}
@@ -482,14 +442,8 @@ static int ocelot_port_open(struct net_device *dev)
static int ocelot_port_stop(struct net_device *dev)
{
struct ocelot_port_private *priv = netdev_priv(dev);
- struct ocelot *ocelot = priv->port.ocelot;
- int port = priv->chip_port;
-
- phy_disconnect(priv->phy);
-
- dev->phydev = NULL;
- ocelot_port_disable(ocelot, port);
+ phylink_stop(priv->phylink);
return 0;
}
@@ -823,7 +777,7 @@ static const struct net_device_ops ocelot_port_netdev_ops = {
.ndo_vlan_rx_kill_vid = ocelot_vlan_rx_kill_vid,
.ndo_set_features = ocelot_set_features,
.ndo_setup_tc = ocelot_setup_tc,
- .ndo_do_ioctl = ocelot_ioctl,
+ .ndo_eth_ioctl = ocelot_ioctl,
.ndo_get_devlink_port = ocelot_get_devlink_port,
};
@@ -959,7 +913,8 @@ static int ocelot_port_attr_set(struct net_device *dev, const void *ctx,
ocelot_port_attr_ageing_set(ocelot, port, attr->u.ageing_time);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
- ocelot_port_vlan_filtering(ocelot, port, attr->u.vlan_filtering);
+ ocelot_port_vlan_filtering(ocelot, port, attr->u.vlan_filtering,
+ extack);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED:
ocelot_port_attr_mc_set(ocelot, port, !attr->u.mc_disabled);
@@ -979,14 +934,26 @@ static int ocelot_port_attr_set(struct net_device *dev, const void *ctx,
return err;
}
+static int ocelot_vlan_vid_prepare(struct net_device *dev, u16 vid, bool pvid,
+ bool untagged, struct netlink_ext_ack *extack)
+{
+ struct ocelot_port_private *priv = netdev_priv(dev);
+ struct ocelot_port *ocelot_port = &priv->port;
+ struct ocelot *ocelot = ocelot_port->ocelot;
+ int port = priv->chip_port;
+
+ return ocelot_vlan_prepare(ocelot, port, vid, pvid, untagged, extack);
+}
+
static int ocelot_port_obj_add_vlan(struct net_device *dev,
- const struct switchdev_obj_port_vlan *vlan)
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack)
{
bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
int ret;
- ret = ocelot_vlan_vid_prepare(dev, vlan->vid, pvid, untagged);
+ ret = ocelot_vlan_vid_prepare(dev, vlan->vid, pvid, untagged, extack);
if (ret)
return ret;
@@ -1074,7 +1041,8 @@ static int ocelot_port_obj_add(struct net_device *dev, const void *ctx,
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_VLAN:
ret = ocelot_port_obj_add_vlan(dev,
- SWITCHDEV_OBJ_PORT_VLAN(obj));
+ SWITCHDEV_OBJ_PORT_VLAN(obj),
+ extack);
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
ret = ocelot_port_obj_add_mdb(dev, SWITCHDEV_OBJ_PORT_MDB(obj));
@@ -1154,45 +1122,27 @@ static int ocelot_switchdev_sync(struct ocelot *ocelot, int port,
struct net_device *bridge_dev,
struct netlink_ext_ack *extack)
{
- struct ocelot_port *ocelot_port = ocelot->ports[port];
- struct ocelot_port_private *priv;
clock_t ageing_time;
u8 stp_state;
- int err;
-
- priv = container_of(ocelot_port, struct ocelot_port_private, port);
ocelot_inherit_brport_flags(ocelot, port, brport_dev);
stp_state = br_port_get_stp_state(brport_dev);
ocelot_bridge_stp_state_set(ocelot, port, stp_state);
- err = ocelot_port_vlan_filtering(ocelot, port,
- br_vlan_enabled(bridge_dev));
- if (err)
- return err;
-
ageing_time = br_get_ageing_time(bridge_dev);
ocelot_port_attr_ageing_set(ocelot, port, ageing_time);
- err = br_mdb_replay(bridge_dev, brport_dev, priv, true,
- &ocelot_switchdev_blocking_nb, extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- err = br_vlan_replay(bridge_dev, brport_dev, priv, true,
- &ocelot_switchdev_blocking_nb, extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- return 0;
+ return ocelot_port_vlan_filtering(ocelot, port,
+ br_vlan_enabled(bridge_dev),
+ extack);
}
static int ocelot_switchdev_unsync(struct ocelot *ocelot, int port)
{
int err;
- err = ocelot_port_vlan_filtering(ocelot, port, false);
+ err = ocelot_port_vlan_filtering(ocelot, port, false, NULL);
if (err)
return err;
@@ -1216,6 +1166,13 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev,
ocelot_port_bridge_join(ocelot, port, bridge);
+ err = switchdev_bridge_port_offload(brport_dev, dev, priv,
+ &ocelot_netdevice_nb,
+ &ocelot_switchdev_blocking_nb,
+ false, extack);
+ if (err)
+ goto err_switchdev_offload;
+
err = ocelot_switchdev_sync(ocelot, port, brport_dev, bridge, extack);
if (err)
goto err_switchdev_sync;
@@ -1223,10 +1180,24 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev,
return 0;
err_switchdev_sync:
+ switchdev_bridge_port_unoffload(brport_dev, priv,
+ &ocelot_netdevice_nb,
+ &ocelot_switchdev_blocking_nb);
+err_switchdev_offload:
ocelot_port_bridge_leave(ocelot, port, bridge);
return err;
}
+static void ocelot_netdevice_pre_bridge_leave(struct net_device *dev,
+ struct net_device *brport_dev)
+{
+ struct ocelot_port_private *priv = netdev_priv(dev);
+
+ switchdev_bridge_port_unoffload(brport_dev, priv,
+ &ocelot_netdevice_nb,
+ &ocelot_switchdev_blocking_nb);
+}
+
static int ocelot_netdevice_bridge_leave(struct net_device *dev,
struct net_device *brport_dev,
struct net_device *bridge)
@@ -1279,6 +1250,18 @@ err_bridge_join:
return err;
}
+static void ocelot_netdevice_pre_lag_leave(struct net_device *dev,
+ struct net_device *bond)
+{
+ struct net_device *bridge_dev;
+
+ bridge_dev = netdev_master_upper_dev_get(bond);
+ if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
+ return;
+
+ ocelot_netdevice_pre_bridge_leave(dev, bond);
+}
+
static int ocelot_netdevice_lag_leave(struct net_device *dev,
struct net_device *bond)
{
@@ -1356,6 +1339,43 @@ ocelot_netdevice_lag_changeupper(struct net_device *dev,
}
static int
+ocelot_netdevice_prechangeupper(struct net_device *dev,
+ struct net_device *brport_dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ if (netif_is_bridge_master(info->upper_dev) && !info->linking)
+ ocelot_netdevice_pre_bridge_leave(dev, brport_dev);
+
+ if (netif_is_lag_master(info->upper_dev) && !info->linking)
+ ocelot_netdevice_pre_lag_leave(dev, info->upper_dev);
+
+ return NOTIFY_DONE;
+}
+
+static int
+ocelot_netdevice_lag_prechangeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *lower;
+ struct list_head *iter;
+ int err = NOTIFY_DONE;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ struct ocelot_port_private *priv = netdev_priv(lower);
+ struct ocelot_port *ocelot_port = &priv->port;
+
+ if (ocelot_port->bond != dev)
+ return NOTIFY_OK;
+
+ err = ocelot_netdevice_prechangeupper(dev, lower, info);
+ if (err)
+ return err;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int
ocelot_netdevice_changelowerstate(struct net_device *dev,
struct netdev_lag_lower_state_info *info)
{
@@ -1382,6 +1402,17 @@ static int ocelot_netdevice_event(struct notifier_block *unused,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
switch (event) {
+ case NETDEV_PRECHANGEUPPER: {
+ struct netdev_notifier_changeupper_info *info = ptr;
+
+ if (ocelot_netdevice_dev_check(dev))
+ return ocelot_netdevice_prechangeupper(dev, dev, info);
+
+ if (netif_is_lag_master(dev))
+ return ocelot_netdevice_lag_prechangeupper(dev, info);
+
+ break;
+ }
case NETDEV_CHANGEUPPER: {
struct netdev_notifier_changeupper_info *info = ptr;
@@ -1466,8 +1497,188 @@ struct notifier_block ocelot_switchdev_blocking_nb __read_mostly = {
.notifier_call = ocelot_switchdev_blocking_event,
};
+static void vsc7514_phylink_validate(struct phylink_config *config,
+ unsigned long *supported,
+ struct phylink_link_state *state)
+{
+ struct net_device *ndev = to_net_dev(config->dev);
+ struct ocelot_port_private *priv = netdev_priv(ndev);
+ struct ocelot_port *ocelot_port = &priv->port;
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = {};
+
+ if (state->interface != PHY_INTERFACE_MODE_NA &&
+ state->interface != ocelot_port->phy_mode) {
+ bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ return;
+ }
+
+ phylink_set_port_modes(mask);
+
+ phylink_set(mask, Pause);
+ phylink_set(mask, Autoneg);
+ phylink_set(mask, Asym_Pause);
+ phylink_set(mask, 10baseT_Half);
+ phylink_set(mask, 10baseT_Full);
+ phylink_set(mask, 100baseT_Half);
+ phylink_set(mask, 100baseT_Full);
+ phylink_set(mask, 1000baseT_Half);
+ phylink_set(mask, 1000baseT_Full);
+ phylink_set(mask, 1000baseX_Full);
+ phylink_set(mask, 2500baseT_Full);
+ phylink_set(mask, 2500baseX_Full);
+
+ bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ bitmap_and(state->advertising, state->advertising, mask,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static void vsc7514_phylink_mac_config(struct phylink_config *config,
+ unsigned int link_an_mode,
+ const struct phylink_link_state *state)
+{
+ struct net_device *ndev = to_net_dev(config->dev);
+ struct ocelot_port_private *priv = netdev_priv(ndev);
+ struct ocelot_port *ocelot_port = &priv->port;
+
+ /* Disable HDX fast control */
+ ocelot_port_writel(ocelot_port, DEV_PORT_MISC_HDX_FAST_DIS,
+ DEV_PORT_MISC);
+
+ /* SGMII only for now */
+ ocelot_port_writel(ocelot_port, PCS1G_MODE_CFG_SGMII_MODE_ENA,
+ PCS1G_MODE_CFG);
+ ocelot_port_writel(ocelot_port, PCS1G_SD_CFG_SD_SEL, PCS1G_SD_CFG);
+
+ /* Enable PCS */
+ ocelot_port_writel(ocelot_port, PCS1G_CFG_PCS_ENA, PCS1G_CFG);
+
+ /* No aneg on SGMII */
+ ocelot_port_writel(ocelot_port, 0, PCS1G_ANEG_CFG);
+
+ /* No loopback */
+ ocelot_port_writel(ocelot_port, 0, PCS1G_LB_CFG);
+}
+
+static void vsc7514_phylink_mac_link_down(struct phylink_config *config,
+ unsigned int link_an_mode,
+ phy_interface_t interface)
+{
+ struct net_device *ndev = to_net_dev(config->dev);
+ struct ocelot_port_private *priv = netdev_priv(ndev);
+ struct ocelot *ocelot = priv->port.ocelot;
+ int port = priv->chip_port;
+
+ ocelot_phylink_mac_link_down(ocelot, port, link_an_mode, interface,
+ OCELOT_MAC_QUIRKS);
+}
+
+static void vsc7514_phylink_mac_link_up(struct phylink_config *config,
+ struct phy_device *phydev,
+ unsigned int link_an_mode,
+ phy_interface_t interface,
+ int speed, int duplex,
+ bool tx_pause, bool rx_pause)
+{
+ struct net_device *ndev = to_net_dev(config->dev);
+ struct ocelot_port_private *priv = netdev_priv(ndev);
+ struct ocelot *ocelot = priv->port.ocelot;
+ int port = priv->chip_port;
+
+ ocelot_phylink_mac_link_up(ocelot, port, phydev, link_an_mode,
+ interface, speed, duplex,
+ tx_pause, rx_pause, OCELOT_MAC_QUIRKS);
+}
+
+static const struct phylink_mac_ops ocelot_phylink_ops = {
+ .validate = vsc7514_phylink_validate,
+ .mac_config = vsc7514_phylink_mac_config,
+ .mac_link_down = vsc7514_phylink_mac_link_down,
+ .mac_link_up = vsc7514_phylink_mac_link_up,
+};
+
+static int ocelot_port_phylink_create(struct ocelot *ocelot, int port,
+ struct device_node *portnp)
+{
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
+ struct ocelot_port_private *priv;
+ struct device *dev = ocelot->dev;
+ phy_interface_t phy_mode;
+ struct phylink *phylink;
+ int err;
+
+ of_get_phy_mode(portnp, &phy_mode);
+ /* DT bindings of internal PHY ports are broken and don't
+ * specify a phy-mode
+ */
+ if (phy_mode == PHY_INTERFACE_MODE_NA)
+ phy_mode = PHY_INTERFACE_MODE_INTERNAL;
+
+ if (phy_mode != PHY_INTERFACE_MODE_SGMII &&
+ phy_mode != PHY_INTERFACE_MODE_QSGMII &&
+ phy_mode != PHY_INTERFACE_MODE_INTERNAL) {
+ dev_err(dev, "unsupported phy mode %s for port %d\n",
+ phy_modes(phy_mode), port);
+ return -EINVAL;
+ }
+
+ /* Ensure clock signals and speed are set on all QSGMII links */
+ if (phy_mode == PHY_INTERFACE_MODE_QSGMII)
+ ocelot_port_rmwl(ocelot_port, 0,
+ DEV_CLOCK_CFG_MAC_TX_RST |
+ DEV_CLOCK_CFG_MAC_TX_RST,
+ DEV_CLOCK_CFG);
+
+ ocelot_port->phy_mode = phy_mode;
+
+ if (phy_mode != PHY_INTERFACE_MODE_INTERNAL) {
+ struct phy *serdes = of_phy_get(portnp, NULL);
+
+ if (IS_ERR(serdes)) {
+ err = PTR_ERR(serdes);
+ dev_err_probe(dev, err,
+ "missing SerDes phys for port %d\n",
+ port);
+ return err;
+ }
+
+ err = phy_set_mode_ext(serdes, PHY_MODE_ETHERNET, phy_mode);
+ of_phy_put(serdes);
+ if (err) {
+ dev_err(dev, "Could not SerDes mode on port %d: %pe\n",
+ port, ERR_PTR(err));
+ return err;
+ }
+ }
+
+ priv = container_of(ocelot_port, struct ocelot_port_private, port);
+
+ priv->phylink_config.dev = &priv->dev->dev;
+ priv->phylink_config.type = PHYLINK_NETDEV;
+
+ phylink = phylink_create(&priv->phylink_config,
+ of_fwnode_handle(portnp),
+ phy_mode, &ocelot_phylink_ops);
+ if (IS_ERR(phylink)) {
+ err = PTR_ERR(phylink);
+ dev_err(dev, "Could not create phylink (%pe)\n", phylink);
+ return err;
+ }
+
+ priv->phylink = phylink;
+
+ err = phylink_of_phy_connect(phylink, portnp, 0);
+ if (err) {
+ dev_err(dev, "Could not connect to PHY: %pe\n", ERR_PTR(err));
+ phylink_destroy(phylink);
+ priv->phylink = NULL;
+ return err;
+ }
+
+ return 0;
+}
+
int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target,
- struct phy_device *phy)
+ struct device_node *portnp)
{
struct ocelot_port_private *priv;
struct ocelot_port *ocelot_port;
@@ -1480,7 +1691,6 @@ int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target,
SET_NETDEV_DEV(dev, ocelot->dev);
priv = netdev_priv(dev);
priv->dev = dev;
- priv->phy = phy;
priv->chip_port = port;
ocelot_port = &priv->port;
ocelot_port->ocelot = ocelot;
@@ -1501,15 +1711,23 @@ int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target,
ocelot_init_port(ocelot, port);
+ err = ocelot_port_phylink_create(ocelot, port, portnp);
+ if (err)
+ goto out;
+
err = register_netdev(dev);
if (err) {
dev_err(ocelot->dev, "register_netdev failed\n");
- free_netdev(dev);
- ocelot->ports[port] = NULL;
- return err;
+ goto out;
}
return 0;
+
+out:
+ ocelot->ports[port] = NULL;
+ free_netdev(dev);
+
+ return err;
}
void ocelot_release_port(struct ocelot_port *ocelot_port)
@@ -1519,5 +1737,14 @@ void ocelot_release_port(struct ocelot_port *ocelot_port)
port);
unregister_netdev(priv->dev);
+
+ if (priv->phylink) {
+ rtnl_lock();
+ phylink_disconnect_phy(priv->phylink);
+ rtnl_unlock();
+
+ phylink_destroy(priv->phylink);
+ }
+
free_netdev(priv->dev);
}
diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
index 4bd7e9d9ec61..291ae6817c26 100644
--- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c
+++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/of_net.h>
#include <linux/netdevice.h>
+#include <linux/phylink.h>
#include <linux/of_mdio.h>
#include <linux/of_platform.h>
#include <linux/mfd/syscon.h>
@@ -945,13 +946,9 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev,
for_each_available_child_of_node(ports, portnp) {
struct ocelot_port_private *priv;
struct ocelot_port *ocelot_port;
- struct device_node *phy_node;
struct devlink_port *dlp;
- phy_interface_t phy_mode;
- struct phy_device *phy;
struct regmap *target;
struct resource *res;
- struct phy *serdes;
char res_name[8];
if (of_property_read_u32(portnp, "reg", &reg))
@@ -975,77 +972,26 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev,
goto out_teardown;
}
- phy_node = of_parse_phandle(portnp, "phy-handle", 0);
- if (!phy_node)
- continue;
-
- phy = of_phy_find_device(phy_node);
- of_node_put(phy_node);
- if (!phy)
- continue;
-
err = ocelot_port_devlink_init(ocelot, port,
DEVLINK_PORT_FLAVOUR_PHYSICAL);
if (err) {
of_node_put(portnp);
goto out_teardown;
}
- devlink_ports_registered |= BIT(port);
- err = ocelot_probe_port(ocelot, port, target, phy);
+ err = ocelot_probe_port(ocelot, port, target, portnp);
if (err) {
- of_node_put(portnp);
- goto out_teardown;
+ ocelot_port_devlink_teardown(ocelot, port);
+ continue;
}
+ devlink_ports_registered |= BIT(port);
+
ocelot_port = ocelot->ports[port];
priv = container_of(ocelot_port, struct ocelot_port_private,
port);
dlp = &ocelot->devlink_ports[port];
devlink_port_type_eth_set(dlp, priv->dev);
-
- of_get_phy_mode(portnp, &phy_mode);
-
- ocelot_port->phy_mode = phy_mode;
-
- switch (ocelot_port->phy_mode) {
- case PHY_INTERFACE_MODE_NA:
- continue;
- case PHY_INTERFACE_MODE_SGMII:
- break;
- case PHY_INTERFACE_MODE_QSGMII:
- /* Ensure clock signals and speed is set on all
- * QSGMII links
- */
- ocelot_port_writel(ocelot_port,
- DEV_CLOCK_CFG_LINK_SPEED
- (OCELOT_SPEED_1000),
- DEV_CLOCK_CFG);
- break;
- default:
- dev_err(ocelot->dev,
- "invalid phy mode for port%d, (Q)SGMII only\n",
- port);
- of_node_put(portnp);
- err = -EINVAL;
- goto out_teardown;
- }
-
- serdes = devm_of_phy_get(ocelot->dev, portnp, NULL);
- if (IS_ERR(serdes)) {
- err = PTR_ERR(serdes);
- if (err == -EPROBE_DEFER)
- dev_dbg(ocelot->dev, "deferring probe\n");
- else
- dev_err(ocelot->dev,
- "missing SerDes phys for port%d\n",
- port);
-
- of_node_put(portnp);
- goto out_teardown;
- }
-
- priv->serdes = serdes;
}
/* Initialize unused devlink ports at the end */
@@ -1103,7 +1049,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
if (!np && !pdev->dev.platform_data)
return -ENODEV;
- devlink = devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot));
+ devlink =
+ devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot), &pdev->dev);
if (!devlink)
return -ENOMEM;
@@ -1187,7 +1134,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
if (err)
goto out_put_ports;
- err = devlink_register(devlink, ocelot->dev);
+ err = devlink_register(devlink);
if (err)
goto out_ocelot_deinit;
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index fc99ad8e4a38..c1a75b08ced7 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -850,9 +850,9 @@ static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
dmatest_page = alloc_page(GFP_KERNEL);
if (!dmatest_page)
return -ENOMEM;
- dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
- if (unlikely(pci_dma_mapping_error(mgp->pdev, dmatest_bus))) {
+ dmatest_bus = dma_map_page(&mgp->pdev->dev, dmatest_page, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ if (unlikely(dma_mapping_error(&mgp->pdev->dev, dmatest_bus))) {
__free_page(dmatest_page);
return -ENOMEM;
}
@@ -899,7 +899,8 @@ static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
(cmd.data0 & 0xffff);
abort:
- pci_unmap_page(mgp->pdev, dmatest_bus, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_unmap_page(&mgp->pdev->dev, dmatest_bus, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
put_page(dmatest_page);
if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
@@ -1205,10 +1206,10 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
return;
}
- bus = pci_map_page(mgp->pdev, page, 0,
+ bus = dma_map_page(&mgp->pdev->dev, page, 0,
MYRI10GE_ALLOC_SIZE,
- PCI_DMA_FROMDEVICE);
- if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) {
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&mgp->pdev->dev, bus))) {
__free_pages(page, MYRI10GE_ALLOC_ORDER);
if (rx->fill_cnt - rx->cnt < 16)
rx->watchdog_needed = 1;
@@ -1256,9 +1257,9 @@ myri10ge_unmap_rx_page(struct pci_dev *pdev,
/* unmap the recvd page if we're the only or last user of it */
if (bytes >= MYRI10GE_ALLOC_SIZE / 2 ||
(info->page_offset + 2 * bytes) > MYRI10GE_ALLOC_SIZE) {
- pci_unmap_page(pdev, (dma_unmap_addr(info, bus)
- & ~(MYRI10GE_ALLOC_SIZE - 1)),
- MYRI10GE_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
+ dma_unmap_page(&pdev->dev, (dma_unmap_addr(info, bus)
+ & ~(MYRI10GE_ALLOC_SIZE - 1)),
+ MYRI10GE_ALLOC_SIZE, DMA_FROM_DEVICE);
}
}
@@ -1398,16 +1399,16 @@ myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index)
ss->stats.tx_packets++;
dev_consume_skb_irq(skb);
if (len)
- pci_unmap_single(pdev,
+ dma_unmap_single(&pdev->dev,
dma_unmap_addr(&tx->info[idx],
bus), len,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
} else {
if (len)
- pci_unmap_page(pdev,
+ dma_unmap_page(&pdev->dev,
dma_unmap_addr(&tx->info[idx],
bus), len,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
}
}
@@ -1651,8 +1652,10 @@ myri10ge_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info)
strlcpy(info->bus_info, pci_name(mgp->pdev), sizeof(info->bus_info));
}
-static int
-myri10ge_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal)
+static int myri10ge_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct myri10ge_priv *mgp = netdev_priv(netdev);
@@ -1660,8 +1663,10 @@ myri10ge_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal)
return 0;
}
-static int
-myri10ge_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal)
+static int myri10ge_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct myri10ge_priv *mgp = netdev_priv(netdev);
@@ -2110,16 +2115,16 @@ static void myri10ge_free_rings(struct myri10ge_slice_state *ss)
ss->stats.tx_dropped++;
dev_kfree_skb_any(skb);
if (len)
- pci_unmap_single(mgp->pdev,
+ dma_unmap_single(&mgp->pdev->dev,
dma_unmap_addr(&tx->info[idx],
bus), len,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
} else {
if (len)
- pci_unmap_page(mgp->pdev,
+ dma_unmap_page(&mgp->pdev->dev,
dma_unmap_addr(&tx->info[idx],
bus), len,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
}
}
kfree(ss->rx_big.info);
@@ -2584,15 +2589,15 @@ static void myri10ge_unmap_tx_dma(struct myri10ge_priv *mgp,
len = dma_unmap_len(&tx->info[idx], len);
if (len) {
if (tx->info[idx].skb != NULL)
- pci_unmap_single(mgp->pdev,
+ dma_unmap_single(&mgp->pdev->dev,
dma_unmap_addr(&tx->info[idx],
bus), len,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
else
- pci_unmap_page(mgp->pdev,
+ dma_unmap_page(&mgp->pdev->dev,
dma_unmap_addr(&tx->info[idx],
bus), len,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
dma_unmap_len_set(&tx->info[idx], len, 0);
tx->info[idx].skb = NULL;
}
@@ -2715,8 +2720,8 @@ again:
/* map the skb for DMA */
len = skb_headlen(skb);
- bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
- if (unlikely(pci_dma_mapping_error(mgp->pdev, bus)))
+ bus = dma_map_single(&mgp->pdev->dev, skb->data, len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(&mgp->pdev->dev, bus)))
goto drop;
idx = tx->req & tx->mask;
@@ -2824,7 +2829,7 @@ again:
len = skb_frag_size(frag);
bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len,
DMA_TO_DEVICE);
- if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) {
+ if (unlikely(dma_mapping_error(&mgp->pdev->dev, bus))) {
myri10ge_unmap_tx_dma(mgp, tx, idx);
goto drop;
}
@@ -3776,19 +3781,17 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
myri10ge_mask_surprise_down(pdev);
pci_set_master(pdev);
dac_enabled = 1;
- status = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (status != 0) {
dac_enabled = 0;
dev_err(&pdev->dev,
- "64-bit pci address mask was refused, "
- "trying 32-bit\n");
- status = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ "64-bit pci address mask was refused, trying 32-bit\n");
+ status = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
}
if (status != 0) {
dev_err(&pdev->dev, "Error %d setting DMA mask\n", status);
goto abort_with_enabled;
}
- (void)pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
mgp->cmd = dma_alloc_coherent(&pdev->dev, sizeof(*mgp->cmd),
&mgp->cmd_bus, GFP_KERNEL);
if (!mgp->cmd) {
diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c
index ce3eca5d152b..d74a80f010c5 100644
--- a/drivers/net/ethernet/natsemi/jazzsonic.c
+++ b/drivers/net/ethernet/natsemi/jazzsonic.c
@@ -193,8 +193,6 @@ static int jazz_sonic_probe(struct platform_device *pdev)
SET_NETDEV_DEV(dev, &pdev->dev);
platform_set_drvdata(pdev, dev);
- netdev_boot_setup_check(dev);
-
dev->base_addr = res->start;
dev->irq = platform_get_irq(pdev, 0);
err = sonic_probe1(dev);
diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c
index 84f7dbe9edff..3f982033944b 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c
@@ -790,7 +790,7 @@ static const struct net_device_ops natsemi_netdev_ops = {
.ndo_get_stats = get_stats,
.ndo_set_rx_mode = set_rx_mode,
.ndo_change_mtu = natsemi_change_mtu,
- .ndo_do_ioctl = netdev_ioctl,
+ .ndo_eth_ioctl = netdev_ioctl,
.ndo_tx_timeout = ns_tx_timeout,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c
index 28d9e98db81a..ca4686094701 100644
--- a/drivers/net/ethernet/natsemi/xtsonic.c
+++ b/drivers/net/ethernet/natsemi/xtsonic.c
@@ -215,7 +215,6 @@ int xtsonic_probe(struct platform_device *pdev)
lp->device = &pdev->dev;
platform_set_drvdata(pdev, dev);
SET_NETDEV_DEV(dev, &pdev->dev);
- netdev_boot_setup_check(dev);
dev->base_addr = resmem->start;
dev->irq = resirq->start;
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index 0b017d4f5c08..09c0e839cca5 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -7625,7 +7625,7 @@ static const struct net_device_ops s2io_netdev_ops = {
.ndo_start_xmit = s2io_xmit,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = s2io_ndo_set_multicast,
- .ndo_do_ioctl = s2io_ioctl,
+ .ndo_eth_ioctl = s2io_ioctl,
.ndo_set_mac_address = s2io_set_mac_addr,
.ndo_change_mtu = s2io_change_mtu,
.ndo_set_features = s2io_set_features,
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 7abd13e69471..df4a3f3da83a 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -3339,7 +3339,7 @@ static const struct net_device_ops vxge_netdev_ops = {
.ndo_start_xmit = vxge_xmit,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = vxge_set_multicast,
- .ndo_do_ioctl = vxge_ioctl,
+ .ndo_eth_ioctl = vxge_ioctl,
.ndo_set_mac_address = vxge_set_mac_addr,
.ndo_change_mtu = vxge_change_mtu,
.ndo_fix_features = vxge_fix_features,
diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig
index b82758d5beed..8844d1ac053a 100644
--- a/drivers/net/ethernet/netronome/Kconfig
+++ b/drivers/net/ethernet/netronome/Kconfig
@@ -23,6 +23,7 @@ config NFP
depends on TLS && TLS_DEVICE || TLS_DEVICE=n
select NET_DEVLINK
select CRC32
+ select DIMLIB
help
This driver supports the Netronome(R) NFP4000/NFP6000 based
cards working as a advanced Ethernet NIC. It works with both
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 1cbe2c9f3959..2a432de11858 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -262,10 +262,10 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output,
}
static bool
-nfp_flower_tun_is_gre(struct flow_cls_offload *flow, int start_idx)
+nfp_flower_tun_is_gre(struct flow_rule *rule, int start_idx)
{
- struct flow_action_entry *act = flow->rule->action.entries;
- int num_act = flow->rule->action.num_entries;
+ struct flow_action_entry *act = rule->action.entries;
+ int num_act = rule->action.num_entries;
int act_idx;
/* Preparse action list for next mirred or redirect action */
@@ -279,7 +279,7 @@ nfp_flower_tun_is_gre(struct flow_cls_offload *flow, int start_idx)
static enum nfp_flower_tun_type
nfp_fl_get_tun_from_act(struct nfp_app *app,
- struct flow_cls_offload *flow,
+ struct flow_rule *rule,
const struct flow_action_entry *act, int act_idx)
{
const struct ip_tunnel_info *tun = act->tunnel;
@@ -288,7 +288,7 @@ nfp_fl_get_tun_from_act(struct nfp_app *app,
/* Determine the tunnel type based on the egress netdev
* in the mirred action for tunnels without l4.
*/
- if (nfp_flower_tun_is_gre(flow, act_idx))
+ if (nfp_flower_tun_is_gre(rule, act_idx))
return NFP_FL_TUNNEL_GRE;
switch (tun->key.tp_dst) {
@@ -788,11 +788,10 @@ struct nfp_flower_pedit_acts {
};
static int
-nfp_fl_commit_mangle(struct flow_cls_offload *flow, char *nfp_action,
+nfp_fl_commit_mangle(struct flow_rule *rule, char *nfp_action,
int *a_len, struct nfp_flower_pedit_acts *set_act,
u32 *csum_updated)
{
- struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
size_t act_size = 0;
u8 ip_proto = 0;
@@ -890,7 +889,7 @@ nfp_fl_commit_mangle(struct flow_cls_offload *flow, char *nfp_action,
static int
nfp_fl_pedit(const struct flow_action_entry *act,
- struct flow_cls_offload *flow, char *nfp_action, int *a_len,
+ char *nfp_action, int *a_len,
u32 *csum_updated, struct nfp_flower_pedit_acts *set_act,
struct netlink_ext_ack *extack)
{
@@ -977,7 +976,7 @@ nfp_flower_output_action(struct nfp_app *app,
static int
nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
- struct flow_cls_offload *flow,
+ struct flow_rule *rule,
struct nfp_fl_payload *nfp_fl, int *a_len,
struct net_device *netdev,
enum nfp_flower_tun_type *tun_type, int *tun_out_cnt,
@@ -1045,7 +1044,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
case FLOW_ACTION_TUNNEL_ENCAP: {
const struct ip_tunnel_info *ip_tun = act->tunnel;
- *tun_type = nfp_fl_get_tun_from_act(app, flow, act, act_idx);
+ *tun_type = nfp_fl_get_tun_from_act(app, rule, act, act_idx);
if (*tun_type == NFP_FL_TUNNEL_NONE) {
NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported tunnel type in action list");
return -EOPNOTSUPP;
@@ -1086,7 +1085,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
/* Tunnel decap is handled by default so accept action. */
return 0;
case FLOW_ACTION_MANGLE:
- if (nfp_fl_pedit(act, flow, &nfp_fl->action_data[*a_len],
+ if (nfp_fl_pedit(act, &nfp_fl->action_data[*a_len],
a_len, csum_updated, set_act, extack))
return -EOPNOTSUPP;
break;
@@ -1195,7 +1194,7 @@ static bool nfp_fl_check_mangle_end(struct flow_action *flow_act,
}
int nfp_flower_compile_action(struct nfp_app *app,
- struct flow_cls_offload *flow,
+ struct flow_rule *rule,
struct net_device *netdev,
struct nfp_fl_payload *nfp_flow,
struct netlink_ext_ack *extack)
@@ -1207,7 +1206,7 @@ int nfp_flower_compile_action(struct nfp_app *app,
bool pkt_host = false;
u32 csum_updated = 0;
- if (!flow_action_hw_stats_check(&flow->rule->action, extack,
+ if (!flow_action_hw_stats_check(&rule->action, extack,
FLOW_ACTION_HW_STATS_DELAYED_BIT))
return -EOPNOTSUPP;
@@ -1219,18 +1218,18 @@ int nfp_flower_compile_action(struct nfp_app *app,
tun_out_cnt = 0;
out_cnt = 0;
- flow_action_for_each(i, act, &flow->rule->action) {
- if (nfp_fl_check_mangle_start(&flow->rule->action, i))
+ flow_action_for_each(i, act, &rule->action) {
+ if (nfp_fl_check_mangle_start(&rule->action, i))
memset(&set_act, 0, sizeof(set_act));
- err = nfp_flower_loop_action(app, act, flow, nfp_flow, &act_len,
+ err = nfp_flower_loop_action(app, act, rule, nfp_flow, &act_len,
netdev, &tun_type, &tun_out_cnt,
&out_cnt, &csum_updated,
&set_act, &pkt_host, extack, i);
if (err)
return err;
act_cnt++;
- if (nfp_fl_check_mangle_end(&flow->rule->action, i))
- nfp_fl_commit_mangle(flow,
+ if (nfp_fl_check_mangle_end(&rule->action, i))
+ nfp_fl_commit_mangle(rule,
&nfp_flow->action_data[act_len],
&act_len, &set_act, &csum_updated);
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index 062bb2db68bf..bfd7d1c35076 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -2,6 +2,7 @@
/* Copyright (C) 2021 Corigine, Inc. */
#include "conntrack.h"
+#include "../nfp_port.h"
const struct rhashtable_params nfp_tc_ct_merge_params = {
.head_offset = offsetof(struct nfp_fl_ct_tc_merge,
@@ -407,15 +408,491 @@ static int nfp_ct_check_meta(struct nfp_fl_ct_flow_entry *post_ct_entry,
return -EINVAL;
}
+static int
+nfp_fl_calc_key_layers_sz(struct nfp_fl_key_ls in_key_ls, uint16_t *map)
+{
+ int key_size;
+
+ /* This field must always be present */
+ key_size = sizeof(struct nfp_flower_meta_tci);
+ map[FLOW_PAY_META_TCI] = 0;
+
+ if (in_key_ls.key_layer & NFP_FLOWER_LAYER_EXT_META) {
+ map[FLOW_PAY_EXT_META] = key_size;
+ key_size += sizeof(struct nfp_flower_ext_meta);
+ }
+ if (in_key_ls.key_layer & NFP_FLOWER_LAYER_PORT) {
+ map[FLOW_PAY_INPORT] = key_size;
+ key_size += sizeof(struct nfp_flower_in_port);
+ }
+ if (in_key_ls.key_layer & NFP_FLOWER_LAYER_MAC) {
+ map[FLOW_PAY_MAC_MPLS] = key_size;
+ key_size += sizeof(struct nfp_flower_mac_mpls);
+ }
+ if (in_key_ls.key_layer & NFP_FLOWER_LAYER_TP) {
+ map[FLOW_PAY_L4] = key_size;
+ key_size += sizeof(struct nfp_flower_tp_ports);
+ }
+ if (in_key_ls.key_layer & NFP_FLOWER_LAYER_IPV4) {
+ map[FLOW_PAY_IPV4] = key_size;
+ key_size += sizeof(struct nfp_flower_ipv4);
+ }
+ if (in_key_ls.key_layer & NFP_FLOWER_LAYER_IPV6) {
+ map[FLOW_PAY_IPV6] = key_size;
+ key_size += sizeof(struct nfp_flower_ipv6);
+ }
+
+ if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GRE) {
+ map[FLOW_PAY_GRE] = key_size;
+ if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6)
+ key_size += sizeof(struct nfp_flower_ipv6_gre_tun);
+ else
+ key_size += sizeof(struct nfp_flower_ipv4_gre_tun);
+ }
+
+ if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_QINQ) {
+ map[FLOW_PAY_QINQ] = key_size;
+ key_size += sizeof(struct nfp_flower_vlan);
+ }
+
+ if ((in_key_ls.key_layer & NFP_FLOWER_LAYER_VXLAN) ||
+ (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE)) {
+ map[FLOW_PAY_UDP_TUN] = key_size;
+ if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6)
+ key_size += sizeof(struct nfp_flower_ipv6_udp_tun);
+ else
+ key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+ }
+
+ if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) {
+ map[FLOW_PAY_GENEVE_OPT] = key_size;
+ key_size += sizeof(struct nfp_flower_geneve_options);
+ }
+
+ return key_size;
+}
+
+static int nfp_fl_merge_actions_offload(struct flow_rule **rules,
+ struct nfp_flower_priv *priv,
+ struct net_device *netdev,
+ struct nfp_fl_payload *flow_pay)
+{
+ struct flow_action_entry *a_in;
+ int i, j, num_actions, id;
+ struct flow_rule *a_rule;
+ int err = 0, offset = 0;
+
+ num_actions = rules[CT_TYPE_PRE_CT]->action.num_entries +
+ rules[CT_TYPE_NFT]->action.num_entries +
+ rules[CT_TYPE_POST_CT]->action.num_entries;
+
+ a_rule = flow_rule_alloc(num_actions);
+ if (!a_rule)
+ return -ENOMEM;
+
+ /* Actions need a BASIC dissector. */
+ a_rule->match = rules[CT_TYPE_PRE_CT]->match;
+
+ /* Copy actions */
+ for (j = 0; j < _CT_TYPE_MAX; j++) {
+ if (flow_rule_match_key(rules[j], FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_match_basic match;
+
+ /* ip_proto is the only field that needed in later compile_action,
+ * needed to set the correct checksum flags. It doesn't really matter
+ * which input rule's ip_proto field we take as the earlier merge checks
+ * would have made sure that they don't conflict. We do not know which
+ * of the subflows would have the ip_proto filled in, so we need to iterate
+ * through the subflows and assign the proper subflow to a_rule
+ */
+ flow_rule_match_basic(rules[j], &match);
+ if (match.mask->ip_proto)
+ a_rule->match = rules[j]->match;
+ }
+
+ for (i = 0; i < rules[j]->action.num_entries; i++) {
+ a_in = &rules[j]->action.entries[i];
+ id = a_in->id;
+
+ /* Ignore CT related actions as these would already have
+ * been taken care of by previous checks, and we do not send
+ * any CT actions to the firmware.
+ */
+ switch (id) {
+ case FLOW_ACTION_CT:
+ case FLOW_ACTION_GOTO:
+ case FLOW_ACTION_CT_METADATA:
+ continue;
+ default:
+ memcpy(&a_rule->action.entries[offset++],
+ a_in, sizeof(struct flow_action_entry));
+ break;
+ }
+ }
+ }
+
+ /* Some actions would have been ignored, so update the num_entries field */
+ a_rule->action.num_entries = offset;
+ err = nfp_flower_compile_action(priv->app, a_rule, netdev, flow_pay, NULL);
+ kfree(a_rule);
+
+ return err;
+}
+
static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
{
- return 0;
+ enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE;
+ struct nfp_fl_ct_zone_entry *zt = m_entry->zt;
+ struct nfp_fl_key_ls key_layer, tmp_layer;
+ struct nfp_flower_priv *priv = zt->priv;
+ u16 key_map[_FLOW_PAY_LAYERS_MAX];
+ struct nfp_fl_payload *flow_pay;
+
+ struct flow_rule *rules[_CT_TYPE_MAX];
+ u8 *key, *msk, *kdata, *mdata;
+ struct nfp_port *port = NULL;
+ struct net_device *netdev;
+ bool qinq_sup;
+ u32 port_id;
+ u16 offset;
+ int i, err;
+
+ netdev = m_entry->netdev;
+ qinq_sup = !!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ);
+
+ rules[CT_TYPE_PRE_CT] = m_entry->tc_m_parent->pre_ct_parent->rule;
+ rules[CT_TYPE_NFT] = m_entry->nft_parent->rule;
+ rules[CT_TYPE_POST_CT] = m_entry->tc_m_parent->post_ct_parent->rule;
+
+ memset(&key_layer, 0, sizeof(struct nfp_fl_key_ls));
+ memset(&key_map, 0, sizeof(key_map));
+
+ /* Calculate the resultant key layer and size for offload */
+ for (i = 0; i < _CT_TYPE_MAX; i++) {
+ err = nfp_flower_calculate_key_layers(priv->app,
+ m_entry->netdev,
+ &tmp_layer, rules[i],
+ &tun_type, NULL);
+ if (err)
+ return err;
+
+ key_layer.key_layer |= tmp_layer.key_layer;
+ key_layer.key_layer_two |= tmp_layer.key_layer_two;
+ }
+ key_layer.key_size = nfp_fl_calc_key_layers_sz(key_layer, key_map);
+
+ flow_pay = nfp_flower_allocate_new(&key_layer);
+ if (!flow_pay)
+ return -ENOMEM;
+
+ memset(flow_pay->unmasked_data, 0, key_layer.key_size);
+ memset(flow_pay->mask_data, 0, key_layer.key_size);
+
+ kdata = flow_pay->unmasked_data;
+ mdata = flow_pay->mask_data;
+
+ offset = key_map[FLOW_PAY_META_TCI];
+ key = kdata + offset;
+ msk = mdata + offset;
+ nfp_flower_compile_meta((struct nfp_flower_meta_tci *)key,
+ (struct nfp_flower_meta_tci *)msk,
+ key_layer.key_layer);
+
+ if (NFP_FLOWER_LAYER_EXT_META & key_layer.key_layer) {
+ offset = key_map[FLOW_PAY_EXT_META];
+ key = kdata + offset;
+ msk = mdata + offset;
+ nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)key,
+ key_layer.key_layer_two);
+ nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)msk,
+ key_layer.key_layer_two);
+ }
+
+ /* Using in_port from the -trk rule. The tc merge checks should already
+ * be checking that the ingress netdevs are the same
+ */
+ port_id = nfp_flower_get_port_id_from_netdev(priv->app, netdev);
+ offset = key_map[FLOW_PAY_INPORT];
+ key = kdata + offset;
+ msk = mdata + offset;
+ err = nfp_flower_compile_port((struct nfp_flower_in_port *)key,
+ port_id, false, tun_type, NULL);
+ if (err)
+ goto ct_offload_err;
+ err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk,
+ port_id, true, tun_type, NULL);
+ if (err)
+ goto ct_offload_err;
+
+ /* This following part works on the assumption that previous checks has
+ * already filtered out flows that has different values for the different
+ * layers. Here we iterate through all three rules and merge their respective
+ * masked value(cared bits), basic method is:
+ * final_key = (r1_key & r1_mask) | (r2_key & r2_mask) | (r3_key & r3_mask)
+ * final_mask = r1_mask | r2_mask | r3_mask
+ * If none of the rules contains a match that is also fine, that simply means
+ * that the layer is not present.
+ */
+ if (!qinq_sup) {
+ for (i = 0; i < _CT_TYPE_MAX; i++) {
+ offset = key_map[FLOW_PAY_META_TCI];
+ key = kdata + offset;
+ msk = mdata + offset;
+ nfp_flower_compile_tci((struct nfp_flower_meta_tci *)key,
+ (struct nfp_flower_meta_tci *)msk,
+ rules[i]);
+ }
+ }
+
+ if (NFP_FLOWER_LAYER_MAC & key_layer.key_layer) {
+ offset = key_map[FLOW_PAY_MAC_MPLS];
+ key = kdata + offset;
+ msk = mdata + offset;
+ for (i = 0; i < _CT_TYPE_MAX; i++) {
+ nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)key,
+ (struct nfp_flower_mac_mpls *)msk,
+ rules[i]);
+ err = nfp_flower_compile_mpls((struct nfp_flower_mac_mpls *)key,
+ (struct nfp_flower_mac_mpls *)msk,
+ rules[i], NULL);
+ if (err)
+ goto ct_offload_err;
+ }
+ }
+
+ if (NFP_FLOWER_LAYER_IPV4 & key_layer.key_layer) {
+ offset = key_map[FLOW_PAY_IPV4];
+ key = kdata + offset;
+ msk = mdata + offset;
+ for (i = 0; i < _CT_TYPE_MAX; i++) {
+ nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)key,
+ (struct nfp_flower_ipv4 *)msk,
+ rules[i]);
+ }
+ }
+
+ if (NFP_FLOWER_LAYER_IPV6 & key_layer.key_layer) {
+ offset = key_map[FLOW_PAY_IPV6];
+ key = kdata + offset;
+ msk = mdata + offset;
+ for (i = 0; i < _CT_TYPE_MAX; i++) {
+ nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)key,
+ (struct nfp_flower_ipv6 *)msk,
+ rules[i]);
+ }
+ }
+
+ if (NFP_FLOWER_LAYER_TP & key_layer.key_layer) {
+ offset = key_map[FLOW_PAY_L4];
+ key = kdata + offset;
+ msk = mdata + offset;
+ for (i = 0; i < _CT_TYPE_MAX; i++) {
+ nfp_flower_compile_tport((struct nfp_flower_tp_ports *)key,
+ (struct nfp_flower_tp_ports *)msk,
+ rules[i]);
+ }
+ }
+
+ if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GRE) {
+ offset = key_map[FLOW_PAY_GRE];
+ key = kdata + offset;
+ msk = mdata + offset;
+ if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) {
+ struct nfp_flower_ipv6_gre_tun *gre_match;
+ struct nfp_ipv6_addr_entry *entry;
+ struct in6_addr *dst;
+
+ for (i = 0; i < _CT_TYPE_MAX; i++) {
+ nfp_flower_compile_ipv6_gre_tun((void *)key,
+ (void *)msk, rules[i]);
+ }
+ gre_match = (struct nfp_flower_ipv6_gre_tun *)key;
+ dst = &gre_match->ipv6.dst;
+
+ entry = nfp_tunnel_add_ipv6_off(priv->app, dst);
+ if (!entry) {
+ err = -ENOMEM;
+ goto ct_offload_err;
+ }
+
+ flow_pay->nfp_tun_ipv6 = entry;
+ } else {
+ __be32 dst;
+
+ for (i = 0; i < _CT_TYPE_MAX; i++) {
+ nfp_flower_compile_ipv4_gre_tun((void *)key,
+ (void *)msk, rules[i]);
+ }
+ dst = ((struct nfp_flower_ipv4_gre_tun *)key)->ipv4.dst;
+
+ /* Store the tunnel destination in the rule data.
+ * This must be present and be an exact match.
+ */
+ flow_pay->nfp_tun_ipv4_addr = dst;
+ nfp_tunnel_add_ipv4_off(priv->app, dst);
+ }
+ }
+
+ if (NFP_FLOWER_LAYER2_QINQ & key_layer.key_layer_two) {
+ offset = key_map[FLOW_PAY_QINQ];
+ key = kdata + offset;
+ msk = mdata + offset;
+ for (i = 0; i < _CT_TYPE_MAX; i++) {
+ nfp_flower_compile_vlan((struct nfp_flower_vlan *)key,
+ (struct nfp_flower_vlan *)msk,
+ rules[i]);
+ }
+ }
+
+ if (key_layer.key_layer & NFP_FLOWER_LAYER_VXLAN ||
+ key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE) {
+ offset = key_map[FLOW_PAY_UDP_TUN];
+ key = kdata + offset;
+ msk = mdata + offset;
+ if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) {
+ struct nfp_flower_ipv6_udp_tun *udp_match;
+ struct nfp_ipv6_addr_entry *entry;
+ struct in6_addr *dst;
+
+ for (i = 0; i < _CT_TYPE_MAX; i++) {
+ nfp_flower_compile_ipv6_udp_tun((void *)key,
+ (void *)msk, rules[i]);
+ }
+ udp_match = (struct nfp_flower_ipv6_udp_tun *)key;
+ dst = &udp_match->ipv6.dst;
+
+ entry = nfp_tunnel_add_ipv6_off(priv->app, dst);
+ if (!entry) {
+ err = -ENOMEM;
+ goto ct_offload_err;
+ }
+
+ flow_pay->nfp_tun_ipv6 = entry;
+ } else {
+ __be32 dst;
+
+ for (i = 0; i < _CT_TYPE_MAX; i++) {
+ nfp_flower_compile_ipv4_udp_tun((void *)key,
+ (void *)msk, rules[i]);
+ }
+ dst = ((struct nfp_flower_ipv4_udp_tun *)key)->ipv4.dst;
+
+ /* Store the tunnel destination in the rule data.
+ * This must be present and be an exact match.
+ */
+ flow_pay->nfp_tun_ipv4_addr = dst;
+ nfp_tunnel_add_ipv4_off(priv->app, dst);
+ }
+
+ if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) {
+ offset = key_map[FLOW_PAY_GENEVE_OPT];
+ key = kdata + offset;
+ msk = mdata + offset;
+ for (i = 0; i < _CT_TYPE_MAX; i++)
+ nfp_flower_compile_geneve_opt(key, msk, rules[i]);
+ }
+ }
+
+ /* Merge actions into flow_pay */
+ err = nfp_fl_merge_actions_offload(rules, priv, netdev, flow_pay);
+ if (err)
+ goto ct_offload_err;
+
+ /* Use the pointer address as the cookie, but set the last bit to 1.
+ * This is to avoid the 'is_merge_flow' check from detecting this as
+ * an already merged flow. This works since address alignment means
+ * that the last bit for pointer addresses will be 0.
+ */
+ flow_pay->tc_flower_cookie = ((unsigned long)flow_pay) | 0x1;
+ err = nfp_compile_flow_metadata(priv->app, flow_pay->tc_flower_cookie,
+ flow_pay, netdev, NULL);
+ if (err)
+ goto ct_offload_err;
+
+ if (nfp_netdev_is_nfp_repr(netdev))
+ port = nfp_port_from_netdev(netdev);
+
+ err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node,
+ nfp_flower_table_params);
+ if (err)
+ goto ct_release_offload_meta_err;
+
+ err = nfp_flower_xmit_flow(priv->app, flow_pay,
+ NFP_FLOWER_CMSG_TYPE_FLOW_ADD);
+ if (err)
+ goto ct_remove_rhash_err;
+
+ m_entry->tc_flower_cookie = flow_pay->tc_flower_cookie;
+ m_entry->flow_pay = flow_pay;
+
+ if (port)
+ port->tc_offload_cnt++;
+
+ return err;
+
+ct_remove_rhash_err:
+ WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table,
+ &flow_pay->fl_node,
+ nfp_flower_table_params));
+ct_release_offload_meta_err:
+ nfp_modify_flow_metadata(priv->app, flow_pay);
+ct_offload_err:
+ if (flow_pay->nfp_tun_ipv4_addr)
+ nfp_tunnel_del_ipv4_off(priv->app, flow_pay->nfp_tun_ipv4_addr);
+ if (flow_pay->nfp_tun_ipv6)
+ nfp_tunnel_put_ipv6_off(priv->app, flow_pay->nfp_tun_ipv6);
+ kfree(flow_pay->action_data);
+ kfree(flow_pay->mask_data);
+ kfree(flow_pay->unmasked_data);
+ kfree(flow_pay);
+ return err;
}
static int nfp_fl_ct_del_offload(struct nfp_app *app, unsigned long cookie,
struct net_device *netdev)
{
- return 0;
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_fl_payload *flow_pay;
+ struct nfp_port *port = NULL;
+ int err = 0;
+
+ if (nfp_netdev_is_nfp_repr(netdev))
+ port = nfp_port_from_netdev(netdev);
+
+ flow_pay = nfp_flower_search_fl_table(app, cookie, netdev);
+ if (!flow_pay)
+ return -ENOENT;
+
+ err = nfp_modify_flow_metadata(app, flow_pay);
+ if (err)
+ goto err_free_merge_flow;
+
+ if (flow_pay->nfp_tun_ipv4_addr)
+ nfp_tunnel_del_ipv4_off(app, flow_pay->nfp_tun_ipv4_addr);
+
+ if (flow_pay->nfp_tun_ipv6)
+ nfp_tunnel_put_ipv6_off(app, flow_pay->nfp_tun_ipv6);
+
+ if (!flow_pay->in_hw) {
+ err = 0;
+ goto err_free_merge_flow;
+ }
+
+ err = nfp_flower_xmit_flow(app, flow_pay,
+ NFP_FLOWER_CMSG_TYPE_FLOW_DEL);
+
+err_free_merge_flow:
+ nfp_flower_del_linked_merge_flows(app, flow_pay);
+ if (port)
+ port->tc_offload_cnt--;
+ kfree(flow_pay->action_data);
+ kfree(flow_pay->mask_data);
+ kfree(flow_pay->unmasked_data);
+ WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table,
+ &flow_pay->fl_node,
+ nfp_flower_table_params));
+ kfree_rcu(flow_pay, rcu);
+ return err;
}
static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt,
@@ -1048,6 +1525,139 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
return 0;
}
+static void
+nfp_fl_ct_sub_stats(struct nfp_fl_nft_tc_merge *nft_merge,
+ enum ct_entry_type type, u64 *m_pkts,
+ u64 *m_bytes, u64 *m_used)
+{
+ struct nfp_flower_priv *priv = nft_merge->zt->priv;
+ struct nfp_fl_payload *nfp_flow;
+ u32 ctx_id;
+
+ nfp_flow = nft_merge->flow_pay;
+ if (!nfp_flow)
+ return;
+
+ ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id);
+ *m_pkts += priv->stats[ctx_id].pkts;
+ *m_bytes += priv->stats[ctx_id].bytes;
+ *m_used = max_t(u64, *m_used, priv->stats[ctx_id].used);
+
+ /* If request is for a sub_flow which is part of a tunnel merged
+ * flow then update stats from tunnel merged flows first.
+ */
+ if (!list_empty(&nfp_flow->linked_flows))
+ nfp_flower_update_merge_stats(priv->app, nfp_flow);
+
+ if (type != CT_TYPE_NFT) {
+ /* Update nft cached stats */
+ flow_stats_update(&nft_merge->nft_parent->stats,
+ priv->stats[ctx_id].bytes,
+ priv->stats[ctx_id].pkts,
+ 0, priv->stats[ctx_id].used,
+ FLOW_ACTION_HW_STATS_DELAYED);
+ } else {
+ /* Update pre_ct cached stats */
+ flow_stats_update(&nft_merge->tc_m_parent->pre_ct_parent->stats,
+ priv->stats[ctx_id].bytes,
+ priv->stats[ctx_id].pkts,
+ 0, priv->stats[ctx_id].used,
+ FLOW_ACTION_HW_STATS_DELAYED);
+ /* Update post_ct cached stats */
+ flow_stats_update(&nft_merge->tc_m_parent->post_ct_parent->stats,
+ priv->stats[ctx_id].bytes,
+ priv->stats[ctx_id].pkts,
+ 0, priv->stats[ctx_id].used,
+ FLOW_ACTION_HW_STATS_DELAYED);
+ }
+ /* Reset stats from the nfp */
+ priv->stats[ctx_id].pkts = 0;
+ priv->stats[ctx_id].bytes = 0;
+}
+
+int nfp_fl_ct_stats(struct flow_cls_offload *flow,
+ struct nfp_fl_ct_map_entry *ct_map_ent)
+{
+ struct nfp_fl_ct_flow_entry *ct_entry = ct_map_ent->ct_entry;
+ struct nfp_fl_nft_tc_merge *nft_merge, *nft_m_tmp;
+ struct nfp_fl_ct_tc_merge *tc_merge, *tc_m_tmp;
+
+ u64 pkts = 0, bytes = 0, used = 0;
+ u64 m_pkts, m_bytes, m_used;
+
+ spin_lock_bh(&ct_entry->zt->priv->stats_lock);
+
+ if (ct_entry->type == CT_TYPE_PRE_CT) {
+ /* Iterate tc_merge entries associated with this flow */
+ list_for_each_entry_safe(tc_merge, tc_m_tmp, &ct_entry->children,
+ pre_ct_list) {
+ m_pkts = 0;
+ m_bytes = 0;
+ m_used = 0;
+ /* Iterate nft_merge entries associated with this tc_merge flow */
+ list_for_each_entry_safe(nft_merge, nft_m_tmp, &tc_merge->children,
+ tc_merge_list) {
+ nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_PRE_CT,
+ &m_pkts, &m_bytes, &m_used);
+ }
+ pkts += m_pkts;
+ bytes += m_bytes;
+ used = max_t(u64, used, m_used);
+ /* Update post_ct partner */
+ flow_stats_update(&tc_merge->post_ct_parent->stats,
+ m_bytes, m_pkts, 0, m_used,
+ FLOW_ACTION_HW_STATS_DELAYED);
+ }
+ } else if (ct_entry->type == CT_TYPE_POST_CT) {
+ /* Iterate tc_merge entries associated with this flow */
+ list_for_each_entry_safe(tc_merge, tc_m_tmp, &ct_entry->children,
+ post_ct_list) {
+ m_pkts = 0;
+ m_bytes = 0;
+ m_used = 0;
+ /* Iterate nft_merge entries associated with this tc_merge flow */
+ list_for_each_entry_safe(nft_merge, nft_m_tmp, &tc_merge->children,
+ tc_merge_list) {
+ nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_POST_CT,
+ &m_pkts, &m_bytes, &m_used);
+ }
+ pkts += m_pkts;
+ bytes += m_bytes;
+ used = max_t(u64, used, m_used);
+ /* Update pre_ct partner */
+ flow_stats_update(&tc_merge->pre_ct_parent->stats,
+ m_bytes, m_pkts, 0, m_used,
+ FLOW_ACTION_HW_STATS_DELAYED);
+ }
+ } else {
+ /* Iterate nft_merge entries associated with this nft flow */
+ list_for_each_entry_safe(nft_merge, nft_m_tmp, &ct_entry->children,
+ nft_flow_list) {
+ nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_NFT,
+ &pkts, &bytes, &used);
+ }
+ }
+
+ /* Add stats from this request to stats potentially cached by
+ * previous requests.
+ */
+ flow_stats_update(&ct_entry->stats, bytes, pkts, 0, used,
+ FLOW_ACTION_HW_STATS_DELAYED);
+ /* Finally update the flow stats from the original stats request */
+ flow_stats_update(&flow->stats, ct_entry->stats.bytes,
+ ct_entry->stats.pkts, 0,
+ ct_entry->stats.lastused,
+ FLOW_ACTION_HW_STATS_DELAYED);
+ /* Stats has been synced to original flow, can now clear
+ * the cache.
+ */
+ ct_entry->stats.pkts = 0;
+ ct_entry->stats.bytes = 0;
+ spin_unlock_bh(&ct_entry->zt->priv->stats_lock);
+
+ return 0;
+}
+
static int
nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offload *flow)
{
@@ -1080,7 +1690,11 @@ nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offl
nfp_ct_map_params);
return nfp_fl_ct_del_flow(ct_map_ent);
case FLOW_CLS_STATS:
- return 0;
+ ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie,
+ nfp_ct_map_params);
+ if (ct_map_ent)
+ return nfp_fl_ct_stats(flow, ct_map_ent);
+ break;
default:
break;
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
index 170b6cdb8cd0..beb6cceff9d8 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
@@ -83,6 +83,24 @@ enum ct_entry_type {
CT_TYPE_PRE_CT,
CT_TYPE_NFT,
CT_TYPE_POST_CT,
+ _CT_TYPE_MAX,
+};
+
+enum nfp_nfp_layer_name {
+ FLOW_PAY_META_TCI = 0,
+ FLOW_PAY_INPORT,
+ FLOW_PAY_EXT_META,
+ FLOW_PAY_MAC_MPLS,
+ FLOW_PAY_L4,
+ FLOW_PAY_IPV4,
+ FLOW_PAY_IPV6,
+ FLOW_PAY_CT,
+ FLOW_PAY_GRE,
+ FLOW_PAY_QINQ,
+ FLOW_PAY_UDP_TUN,
+ FLOW_PAY_GENEVE_OPT,
+
+ _FLOW_PAY_LAYERS_MAX
};
/**
@@ -228,4 +246,12 @@ int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent);
*/
int nfp_fl_ct_handle_nft_flow(enum tc_setup_type type, void *type_data,
void *cb_priv);
+
+/**
+ * nfp_fl_ct_stats() - Handle flower stats callbacks for ct flows
+ * @flow: TC flower classifier offload structure.
+ * @ct_map_ent: ct map entry for the flow that needs deleting
+ */
+int nfp_fl_ct_stats(struct flow_cls_offload *flow,
+ struct nfp_fl_ct_map_entry *ct_map_ent);
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 0fbd682ccf72..917c450a7aad 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -413,20 +413,73 @@ int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
struct nfp_fl_payload *sub_flow1,
struct nfp_fl_payload *sub_flow2);
+void
+nfp_flower_compile_meta(struct nfp_flower_meta_tci *ext,
+ struct nfp_flower_meta_tci *msk, u8 key_type);
+void
+nfp_flower_compile_tci(struct nfp_flower_meta_tci *ext,
+ struct nfp_flower_meta_tci *msk,
+ struct flow_rule *rule);
+void
+nfp_flower_compile_ext_meta(struct nfp_flower_ext_meta *frame, u32 key_ext);
+int
+nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
+ bool mask_version, enum nfp_flower_tun_type tun_type,
+ struct netlink_ext_ack *extack);
+void
+nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
+ struct nfp_flower_mac_mpls *msk,
+ struct flow_rule *rule);
+int
+nfp_flower_compile_mpls(struct nfp_flower_mac_mpls *ext,
+ struct nfp_flower_mac_mpls *msk,
+ struct flow_rule *rule,
+ struct netlink_ext_ack *extack);
+void
+nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext,
+ struct nfp_flower_tp_ports *msk,
+ struct flow_rule *rule);
+void
+nfp_flower_compile_vlan(struct nfp_flower_vlan *ext,
+ struct nfp_flower_vlan *msk,
+ struct flow_rule *rule);
+void
+nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext,
+ struct nfp_flower_ipv4 *msk, struct flow_rule *rule);
+void
+nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext,
+ struct nfp_flower_ipv6 *msk, struct flow_rule *rule);
+void
+nfp_flower_compile_geneve_opt(u8 *ext, u8 *msk, struct flow_rule *rule);
+void
+nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext,
+ struct nfp_flower_ipv4_gre_tun *msk,
+ struct flow_rule *rule);
+void
+nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext,
+ struct nfp_flower_ipv4_udp_tun *msk,
+ struct flow_rule *rule);
+void
+nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext,
+ struct nfp_flower_ipv6_udp_tun *msk,
+ struct flow_rule *rule);
+void
+nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext,
+ struct nfp_flower_ipv6_gre_tun *msk,
+ struct flow_rule *rule);
int nfp_flower_compile_flow_match(struct nfp_app *app,
- struct flow_cls_offload *flow,
+ struct flow_rule *rule,
struct nfp_fl_key_ls *key_ls,
struct net_device *netdev,
struct nfp_fl_payload *nfp_flow,
enum nfp_flower_tun_type tun_type,
struct netlink_ext_ack *extack);
int nfp_flower_compile_action(struct nfp_app *app,
- struct flow_cls_offload *flow,
+ struct flow_rule *rule,
struct net_device *netdev,
struct nfp_fl_payload *nfp_flow,
struct netlink_ext_ack *extack);
-int nfp_compile_flow_metadata(struct nfp_app *app,
- struct flow_cls_offload *flow,
+int nfp_compile_flow_metadata(struct nfp_app *app, u32 cookie,
struct nfp_fl_payload *nfp_flow,
struct net_device *netdev,
struct netlink_ext_ack *extack);
@@ -498,4 +551,22 @@ int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app,
struct nfp_fl_payload *flow);
int nfp_flower_xmit_pre_tun_del_flow(struct nfp_app *app,
struct nfp_fl_payload *flow);
+
+struct nfp_fl_payload *
+nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer);
+int nfp_flower_calculate_key_layers(struct nfp_app *app,
+ struct net_device *netdev,
+ struct nfp_fl_key_ls *ret_key_ls,
+ struct flow_rule *flow,
+ enum nfp_flower_tun_type *tun_type,
+ struct netlink_ext_ack *extack);
+void
+nfp_flower_del_linked_merge_flows(struct nfp_app *app,
+ struct nfp_fl_payload *sub_flow);
+int
+nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow,
+ u8 mtype);
+void
+nfp_flower_update_merge_stats(struct nfp_app *app,
+ struct nfp_fl_payload *sub_flow);
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index 255a4dff6288..9d86eea4dc16 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -7,51 +7,68 @@
#include "cmsg.h"
#include "main.h"
-static void
-nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext,
- struct nfp_flower_meta_tci *msk,
- struct flow_rule *rule, u8 key_type, bool qinq_sup)
+void
+nfp_flower_compile_meta(struct nfp_flower_meta_tci *ext,
+ struct nfp_flower_meta_tci *msk, u8 key_type)
{
- u16 tmp_tci;
-
- memset(ext, 0, sizeof(struct nfp_flower_meta_tci));
- memset(msk, 0, sizeof(struct nfp_flower_meta_tci));
-
/* Populate the metadata frame. */
ext->nfp_flow_key_layer = key_type;
ext->mask_id = ~0;
msk->nfp_flow_key_layer = key_type;
msk->mask_id = ~0;
+}
- if (!qinq_sup && flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+void
+nfp_flower_compile_tci(struct nfp_flower_meta_tci *ext,
+ struct nfp_flower_meta_tci *msk,
+ struct flow_rule *rule)
+{
+ u16 msk_tci, key_tci;
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
struct flow_match_vlan match;
flow_rule_match_vlan(rule, &match);
/* Populate the tci field. */
- tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
- tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+ key_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
+ key_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
match.key->vlan_priority) |
FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
match.key->vlan_id);
- ext->tci = cpu_to_be16(tmp_tci);
- tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
- tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+ msk_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
+ msk_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
match.mask->vlan_priority) |
FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
match.mask->vlan_id);
- msk->tci = cpu_to_be16(tmp_tci);
+
+ ext->tci |= cpu_to_be16((key_tci & msk_tci));
+ msk->tci |= cpu_to_be16(msk_tci);
}
}
static void
+nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext,
+ struct nfp_flower_meta_tci *msk,
+ struct flow_rule *rule, u8 key_type, bool qinq_sup)
+{
+ memset(ext, 0, sizeof(struct nfp_flower_meta_tci));
+ memset(msk, 0, sizeof(struct nfp_flower_meta_tci));
+
+ nfp_flower_compile_meta(ext, msk, key_type);
+
+ if (!qinq_sup)
+ nfp_flower_compile_tci(ext, msk, rule);
+}
+
+void
nfp_flower_compile_ext_meta(struct nfp_flower_ext_meta *frame, u32 key_ext)
{
frame->nfp_flow_key_layer2 = cpu_to_be32(key_ext);
}
-static int
+int
nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
bool mask_version, enum nfp_flower_tun_type tun_type,
struct netlink_ext_ack *extack)
@@ -74,28 +91,37 @@ nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
return 0;
}
-static int
+void
nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
- struct nfp_flower_mac_mpls *msk, struct flow_rule *rule,
- struct netlink_ext_ack *extack)
+ struct nfp_flower_mac_mpls *msk,
+ struct flow_rule *rule)
{
- memset(ext, 0, sizeof(struct nfp_flower_mac_mpls));
- memset(msk, 0, sizeof(struct nfp_flower_mac_mpls));
-
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
struct flow_match_eth_addrs match;
+ int i;
flow_rule_match_eth_addrs(rule, &match);
/* Populate mac frame. */
- ether_addr_copy(ext->mac_dst, &match.key->dst[0]);
- ether_addr_copy(ext->mac_src, &match.key->src[0]);
- ether_addr_copy(msk->mac_dst, &match.mask->dst[0]);
- ether_addr_copy(msk->mac_src, &match.mask->src[0]);
+ for (i = 0; i < ETH_ALEN; i++) {
+ ext->mac_dst[i] |= match.key->dst[i] &
+ match.mask->dst[i];
+ msk->mac_dst[i] |= match.mask->dst[i];
+ ext->mac_src[i] |= match.key->src[i] &
+ match.mask->src[i];
+ msk->mac_src[i] |= match.mask->src[i];
+ }
}
+}
+int
+nfp_flower_compile_mpls(struct nfp_flower_mac_mpls *ext,
+ struct nfp_flower_mac_mpls *msk,
+ struct flow_rule *rule,
+ struct netlink_ext_ack *extack)
+{
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) {
struct flow_match_mpls match;
- u32 t_mpls;
+ u32 key_mpls, msk_mpls;
flow_rule_match_mpls(rule, &match);
@@ -106,22 +132,24 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
return -EOPNOTSUPP;
}
- t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB,
- match.key->ls[0].mpls_label) |
- FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC,
- match.key->ls[0].mpls_tc) |
- FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS,
- match.key->ls[0].mpls_bos) |
- NFP_FLOWER_MASK_MPLS_Q;
- ext->mpls_lse = cpu_to_be32(t_mpls);
- t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB,
- match.mask->ls[0].mpls_label) |
- FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC,
- match.mask->ls[0].mpls_tc) |
- FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS,
- match.mask->ls[0].mpls_bos) |
- NFP_FLOWER_MASK_MPLS_Q;
- msk->mpls_lse = cpu_to_be32(t_mpls);
+ key_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB,
+ match.key->ls[0].mpls_label) |
+ FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC,
+ match.key->ls[0].mpls_tc) |
+ FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS,
+ match.key->ls[0].mpls_bos) |
+ NFP_FLOWER_MASK_MPLS_Q;
+
+ msk_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB,
+ match.mask->ls[0].mpls_label) |
+ FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC,
+ match.mask->ls[0].mpls_tc) |
+ FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS,
+ match.mask->ls[0].mpls_bos) |
+ NFP_FLOWER_MASK_MPLS_Q;
+
+ ext->mpls_lse |= cpu_to_be32((key_mpls & msk_mpls));
+ msk->mpls_lse |= cpu_to_be32(msk_mpls);
} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
/* Check for mpls ether type and set NFP_FLOWER_MASK_MPLS_Q
* bit, which indicates an mpls ether type but without any
@@ -132,30 +160,41 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
flow_rule_match_basic(rule, &match);
if (match.key->n_proto == cpu_to_be16(ETH_P_MPLS_UC) ||
match.key->n_proto == cpu_to_be16(ETH_P_MPLS_MC)) {
- ext->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
- msk->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
+ ext->mpls_lse |= cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
+ msk->mpls_lse |= cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
}
}
return 0;
}
-static void
+static int
+nfp_flower_compile_mac_mpls(struct nfp_flower_mac_mpls *ext,
+ struct nfp_flower_mac_mpls *msk,
+ struct flow_rule *rule,
+ struct netlink_ext_ack *extack)
+{
+ memset(ext, 0, sizeof(struct nfp_flower_mac_mpls));
+ memset(msk, 0, sizeof(struct nfp_flower_mac_mpls));
+
+ nfp_flower_compile_mac(ext, msk, rule);
+
+ return nfp_flower_compile_mpls(ext, msk, rule, extack);
+}
+
+void
nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext,
struct nfp_flower_tp_ports *msk,
struct flow_rule *rule)
{
- memset(ext, 0, sizeof(struct nfp_flower_tp_ports));
- memset(msk, 0, sizeof(struct nfp_flower_tp_ports));
-
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
struct flow_match_ports match;
flow_rule_match_ports(rule, &match);
- ext->port_src = match.key->src;
- ext->port_dst = match.key->dst;
- msk->port_src = match.mask->src;
- msk->port_dst = match.mask->dst;
+ ext->port_src |= match.key->src & match.mask->src;
+ ext->port_dst |= match.key->dst & match.mask->dst;
+ msk->port_src |= match.mask->src;
+ msk->port_dst |= match.mask->dst;
}
}
@@ -167,18 +206,18 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext,
struct flow_match_basic match;
flow_rule_match_basic(rule, &match);
- ext->proto = match.key->ip_proto;
- msk->proto = match.mask->ip_proto;
+ ext->proto |= match.key->ip_proto & match.mask->ip_proto;
+ msk->proto |= match.mask->ip_proto;
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
struct flow_match_ip match;
flow_rule_match_ip(rule, &match);
- ext->tos = match.key->tos;
- ext->ttl = match.key->ttl;
- msk->tos = match.mask->tos;
- msk->ttl = match.mask->ttl;
+ ext->tos |= match.key->tos & match.mask->tos;
+ ext->ttl |= match.key->ttl & match.mask->ttl;
+ msk->tos |= match.mask->tos;
+ msk->ttl |= match.mask->ttl;
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
@@ -231,99 +270,108 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext,
}
static void
-nfp_flower_fill_vlan(struct flow_dissector_key_vlan *key,
- struct nfp_flower_vlan *frame,
- bool outer_vlan)
+nfp_flower_fill_vlan(struct flow_match_vlan *match,
+ struct nfp_flower_vlan *ext,
+ struct nfp_flower_vlan *msk, bool outer_vlan)
{
- u16 tci;
-
- tci = NFP_FLOWER_MASK_VLAN_PRESENT;
- tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
- key->vlan_priority) |
- FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
- key->vlan_id);
+ struct flow_dissector_key_vlan *mask = match->mask;
+ struct flow_dissector_key_vlan *key = match->key;
+ u16 msk_tci, key_tci;
+
+ key_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
+ key_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+ key->vlan_priority) |
+ FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+ key->vlan_id);
+ msk_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
+ msk_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+ mask->vlan_priority) |
+ FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+ mask->vlan_id);
if (outer_vlan) {
- frame->outer_tci = cpu_to_be16(tci);
- frame->outer_tpid = key->vlan_tpid;
+ ext->outer_tci |= cpu_to_be16((key_tci & msk_tci));
+ ext->outer_tpid |= key->vlan_tpid & mask->vlan_tpid;
+ msk->outer_tci |= cpu_to_be16(msk_tci);
+ msk->outer_tpid |= mask->vlan_tpid;
} else {
- frame->inner_tci = cpu_to_be16(tci);
- frame->inner_tpid = key->vlan_tpid;
+ ext->inner_tci |= cpu_to_be16((key_tci & msk_tci));
+ ext->inner_tpid |= key->vlan_tpid & mask->vlan_tpid;
+ msk->inner_tci |= cpu_to_be16(msk_tci);
+ msk->inner_tpid |= mask->vlan_tpid;
}
}
-static void
+void
nfp_flower_compile_vlan(struct nfp_flower_vlan *ext,
struct nfp_flower_vlan *msk,
struct flow_rule *rule)
{
struct flow_match_vlan match;
- memset(ext, 0, sizeof(struct nfp_flower_vlan));
- memset(msk, 0, sizeof(struct nfp_flower_vlan));
-
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
flow_rule_match_vlan(rule, &match);
- nfp_flower_fill_vlan(match.key, ext, true);
- nfp_flower_fill_vlan(match.mask, msk, true);
+ nfp_flower_fill_vlan(&match, ext, msk, true);
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
flow_rule_match_cvlan(rule, &match);
- nfp_flower_fill_vlan(match.key, ext, false);
- nfp_flower_fill_vlan(match.mask, msk, false);
+ nfp_flower_fill_vlan(&match, ext, msk, false);
}
}
-static void
+void
nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext,
struct nfp_flower_ipv4 *msk, struct flow_rule *rule)
{
- struct flow_match_ipv4_addrs match;
-
- memset(ext, 0, sizeof(struct nfp_flower_ipv4));
- memset(msk, 0, sizeof(struct nfp_flower_ipv4));
-
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+ struct flow_match_ipv4_addrs match;
+
flow_rule_match_ipv4_addrs(rule, &match);
- ext->ipv4_src = match.key->src;
- ext->ipv4_dst = match.key->dst;
- msk->ipv4_src = match.mask->src;
- msk->ipv4_dst = match.mask->dst;
+ ext->ipv4_src |= match.key->src & match.mask->src;
+ ext->ipv4_dst |= match.key->dst & match.mask->dst;
+ msk->ipv4_src |= match.mask->src;
+ msk->ipv4_dst |= match.mask->dst;
}
nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
}
-static void
+void
nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext,
struct nfp_flower_ipv6 *msk, struct flow_rule *rule)
{
- memset(ext, 0, sizeof(struct nfp_flower_ipv6));
- memset(msk, 0, sizeof(struct nfp_flower_ipv6));
-
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
struct flow_match_ipv6_addrs match;
+ int i;
flow_rule_match_ipv6_addrs(rule, &match);
- ext->ipv6_src = match.key->src;
- ext->ipv6_dst = match.key->dst;
- msk->ipv6_src = match.mask->src;
- msk->ipv6_dst = match.mask->dst;
+ for (i = 0; i < sizeof(ext->ipv6_src); i++) {
+ ext->ipv6_src.s6_addr[i] |= match.key->src.s6_addr[i] &
+ match.mask->src.s6_addr[i];
+ ext->ipv6_dst.s6_addr[i] |= match.key->dst.s6_addr[i] &
+ match.mask->dst.s6_addr[i];
+ msk->ipv6_src.s6_addr[i] |= match.mask->src.s6_addr[i];
+ msk->ipv6_dst.s6_addr[i] |= match.mask->dst.s6_addr[i];
+ }
}
nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
}
-static int
-nfp_flower_compile_geneve_opt(void *ext, void *msk, struct flow_rule *rule)
+void
+nfp_flower_compile_geneve_opt(u8 *ext, u8 *msk, struct flow_rule *rule)
{
struct flow_match_enc_opts match;
+ int i;
- flow_rule_match_enc_opts(rule, &match);
- memcpy(ext, match.key->data, match.key->len);
- memcpy(msk, match.mask->data, match.mask->len);
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) {
+ flow_rule_match_enc_opts(rule, &match);
- return 0;
+ for (i = 0; i < match.mask->len; i++) {
+ ext[i] |= match.key->data[i] & match.mask->data[i];
+ msk[i] |= match.mask->data[i];
+ }
+ }
}
static void
@@ -335,10 +383,10 @@ nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext,
struct flow_match_ipv4_addrs match;
flow_rule_match_enc_ipv4_addrs(rule, &match);
- ext->src = match.key->src;
- ext->dst = match.key->dst;
- msk->src = match.mask->src;
- msk->dst = match.mask->dst;
+ ext->src |= match.key->src & match.mask->src;
+ ext->dst |= match.key->dst & match.mask->dst;
+ msk->src |= match.mask->src;
+ msk->dst |= match.mask->dst;
}
}
@@ -349,12 +397,17 @@ nfp_flower_compile_tun_ipv6_addrs(struct nfp_flower_tun_ipv6 *ext,
{
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
struct flow_match_ipv6_addrs match;
+ int i;
flow_rule_match_enc_ipv6_addrs(rule, &match);
- ext->src = match.key->src;
- ext->dst = match.key->dst;
- msk->src = match.mask->src;
- msk->dst = match.mask->dst;
+ for (i = 0; i < sizeof(ext->src); i++) {
+ ext->src.s6_addr[i] |= match.key->src.s6_addr[i] &
+ match.mask->src.s6_addr[i];
+ ext->dst.s6_addr[i] |= match.key->dst.s6_addr[i] &
+ match.mask->dst.s6_addr[i];
+ msk->src.s6_addr[i] |= match.mask->src.s6_addr[i];
+ msk->dst.s6_addr[i] |= match.mask->dst.s6_addr[i];
+ }
}
}
@@ -367,10 +420,10 @@ nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext,
struct flow_match_ip match;
flow_rule_match_enc_ip(rule, &match);
- ext->tos = match.key->tos;
- ext->ttl = match.key->ttl;
- msk->tos = match.mask->tos;
- msk->ttl = match.mask->ttl;
+ ext->tos |= match.key->tos & match.mask->tos;
+ ext->ttl |= match.key->ttl & match.mask->ttl;
+ msk->tos |= match.mask->tos;
+ msk->ttl |= match.mask->ttl;
}
}
@@ -383,10 +436,11 @@ nfp_flower_compile_tun_udp_key(__be32 *key, __be32 *key_msk,
u32 vni;
flow_rule_match_enc_keyid(rule, &match);
- vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET;
- *key = cpu_to_be32(vni);
+ vni = be32_to_cpu((match.key->keyid & match.mask->keyid)) <<
+ NFP_FL_TUN_VNI_OFFSET;
+ *key |= cpu_to_be32(vni);
vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET;
- *key_msk = cpu_to_be32(vni);
+ *key_msk |= cpu_to_be32(vni);
}
}
@@ -398,22 +452,19 @@ nfp_flower_compile_tun_gre_key(__be32 *key, __be32 *key_msk, __be16 *flags,
struct flow_match_enc_keyid match;
flow_rule_match_enc_keyid(rule, &match);
- *key = match.key->keyid;
- *key_msk = match.mask->keyid;
+ *key |= match.key->keyid & match.mask->keyid;
+ *key_msk |= match.mask->keyid;
*flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
*flags_msk = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
}
}
-static void
+void
nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext,
struct nfp_flower_ipv4_gre_tun *msk,
struct flow_rule *rule)
{
- memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
- memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
-
/* NVGRE is the only supported GRE tunnel type */
ext->ethertype = cpu_to_be16(ETH_P_TEB);
msk->ethertype = cpu_to_be16(~0);
@@ -424,40 +475,31 @@ nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext,
&ext->tun_flags, &msk->tun_flags, rule);
}
-static void
+void
nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext,
struct nfp_flower_ipv4_udp_tun *msk,
struct flow_rule *rule)
{
- memset(ext, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
- memset(msk, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
-
nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule);
nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule);
}
-static void
+void
nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext,
struct nfp_flower_ipv6_udp_tun *msk,
struct flow_rule *rule)
{
- memset(ext, 0, sizeof(struct nfp_flower_ipv6_udp_tun));
- memset(msk, 0, sizeof(struct nfp_flower_ipv6_udp_tun));
-
nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule);
nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule);
}
-static void
+void
nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext,
struct nfp_flower_ipv6_gre_tun *msk,
struct flow_rule *rule)
{
- memset(ext, 0, sizeof(struct nfp_flower_ipv6_gre_tun));
- memset(msk, 0, sizeof(struct nfp_flower_ipv6_gre_tun));
-
/* NVGRE is the only supported GRE tunnel type */
ext->ethertype = cpu_to_be16(ETH_P_TEB);
msk->ethertype = cpu_to_be16(~0);
@@ -469,14 +511,13 @@ nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext,
}
int nfp_flower_compile_flow_match(struct nfp_app *app,
- struct flow_cls_offload *flow,
+ struct flow_rule *rule,
struct nfp_fl_key_ls *key_ls,
struct net_device *netdev,
struct nfp_fl_payload *nfp_flow,
enum nfp_flower_tun_type tun_type,
struct netlink_ext_ack *extack)
{
- struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
struct nfp_flower_priv *priv = app->priv;
bool qinq_sup;
u32 port_id;
@@ -527,9 +568,9 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
msk += sizeof(struct nfp_flower_in_port);
if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) {
- err = nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext,
- (struct nfp_flower_mac_mpls *)msk,
- rule, extack);
+ err = nfp_flower_compile_mac_mpls((struct nfp_flower_mac_mpls *)ext,
+ (struct nfp_flower_mac_mpls *)msk,
+ rule, extack);
if (err)
return err;
@@ -640,9 +681,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
}
if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) {
- err = nfp_flower_compile_geneve_opt(ext, msk, rule);
- if (err)
- return err;
+ nfp_flower_compile_geneve_opt(ext, msk, rule);
}
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index 621113650a9b..2af9faee96c5 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -290,8 +290,7 @@ nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len,
return true;
}
-int nfp_compile_flow_metadata(struct nfp_app *app,
- struct flow_cls_offload *flow,
+int nfp_compile_flow_metadata(struct nfp_app *app, u32 cookie,
struct nfp_fl_payload *nfp_flow,
struct net_device *netdev,
struct netlink_ext_ack *extack)
@@ -310,7 +309,7 @@ int nfp_compile_flow_metadata(struct nfp_app *app,
}
nfp_flow->meta.host_ctx_id = cpu_to_be32(stats_cxt);
- nfp_flow->meta.host_cookie = cpu_to_be64(flow->cookie);
+ nfp_flow->meta.host_cookie = cpu_to_be64(cookie);
nfp_flow->ingress_dev = netdev;
ctx_entry = kzalloc(sizeof(*ctx_entry), GFP_KERNEL);
@@ -357,7 +356,7 @@ int nfp_compile_flow_metadata(struct nfp_app *app,
priv->stats[stats_cxt].bytes = 0;
priv->stats[stats_cxt].used = jiffies;
- check_entry = nfp_flower_search_fl_table(app, flow->cookie, netdev);
+ check_entry = nfp_flower_search_fl_table(app, cookie, netdev);
if (check_entry) {
NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot offload duplicate flow entry");
if (nfp_release_stats_entry(app, stats_cxt)) {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 2406d33356ad..556c3495211d 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -41,6 +41,8 @@
BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \
BIT(FLOW_DISSECTOR_KEY_ENC_IP) | \
BIT(FLOW_DISSECTOR_KEY_MPLS) | \
+ BIT(FLOW_DISSECTOR_KEY_CT) | \
+ BIT(FLOW_DISSECTOR_KEY_META) | \
BIT(FLOW_DISSECTOR_KEY_IP))
#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR \
@@ -89,7 +91,7 @@ struct nfp_flower_merge_check {
};
};
-static int
+int
nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow,
u8 mtype)
{
@@ -134,20 +136,16 @@ nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow,
return 0;
}
-static bool nfp_flower_check_higher_than_mac(struct flow_cls_offload *f)
+static bool nfp_flower_check_higher_than_mac(struct flow_rule *rule)
{
- struct flow_rule *rule = flow_cls_offload_flow_rule(f);
-
return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS) ||
flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS) ||
flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) ||
flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP);
}
-static bool nfp_flower_check_higher_than_l3(struct flow_cls_offload *f)
+static bool nfp_flower_check_higher_than_l3(struct flow_rule *rule)
{
- struct flow_rule *rule = flow_cls_offload_flow_rule(f);
-
return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) ||
flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP);
}
@@ -236,15 +234,14 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports,
return 0;
}
-static int
+int
nfp_flower_calculate_key_layers(struct nfp_app *app,
struct net_device *netdev,
struct nfp_fl_key_ls *ret_key_ls,
- struct flow_cls_offload *flow,
+ struct flow_rule *rule,
enum nfp_flower_tun_type *tun_type,
struct netlink_ext_ack *extack)
{
- struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
struct flow_dissector *dissector = rule->match.dissector;
struct flow_match_basic basic = { NULL, NULL};
struct nfp_flower_priv *priv = app->priv;
@@ -452,7 +449,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on given EtherType is not supported");
return -EOPNOTSUPP;
}
- } else if (nfp_flower_check_higher_than_mac(flow)) {
+ } else if (nfp_flower_check_higher_than_mac(rule)) {
NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot match above L2 without specified EtherType");
return -EOPNOTSUPP;
}
@@ -471,7 +468,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
}
if (!(key_layer & NFP_FLOWER_LAYER_TP) &&
- nfp_flower_check_higher_than_l3(flow)) {
+ nfp_flower_check_higher_than_l3(rule)) {
NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot match on L4 information without specified IP protocol type");
return -EOPNOTSUPP;
}
@@ -543,7 +540,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
return 0;
}
-static struct nfp_fl_payload *
+struct nfp_fl_payload *
nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
{
struct nfp_fl_payload *flow_pay;
@@ -1005,9 +1002,7 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
struct nfp_fl_payload *sub_flow1,
struct nfp_fl_payload *sub_flow2)
{
- struct flow_cls_offload merge_tc_off;
struct nfp_flower_priv *priv = app->priv;
- struct netlink_ext_ack *extack = NULL;
struct nfp_fl_payload *merge_flow;
struct nfp_fl_key_ls merge_key_ls;
struct nfp_merge_info *merge_info;
@@ -1016,7 +1011,6 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
ASSERT_RTNL();
- extack = merge_tc_off.common.extack;
if (sub_flow1 == sub_flow2 ||
nfp_flower_is_merge_flow(sub_flow1) ||
nfp_flower_is_merge_flow(sub_flow2))
@@ -1061,9 +1055,8 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
if (err)
goto err_unlink_sub_flow1;
- merge_tc_off.cookie = merge_flow->tc_flower_cookie;
- err = nfp_compile_flow_metadata(app, &merge_tc_off, merge_flow,
- merge_flow->ingress_dev, extack);
+ err = nfp_compile_flow_metadata(app, merge_flow->tc_flower_cookie, merge_flow,
+ merge_flow->ingress_dev, NULL);
if (err)
goto err_unlink_sub_flow2;
@@ -1305,6 +1298,7 @@ static int
nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
struct flow_cls_offload *flow)
{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE;
struct nfp_flower_priv *priv = app->priv;
struct netlink_ext_ack *extack = NULL;
@@ -1330,7 +1324,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
if (!key_layer)
return -ENOMEM;
- err = nfp_flower_calculate_key_layers(app, netdev, key_layer, flow,
+ err = nfp_flower_calculate_key_layers(app, netdev, key_layer, rule,
&tun_type, extack);
if (err)
goto err_free_key_ls;
@@ -1341,12 +1335,12 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
goto err_free_key_ls;
}
- err = nfp_flower_compile_flow_match(app, flow, key_layer, netdev,
+ err = nfp_flower_compile_flow_match(app, rule, key_layer, netdev,
flow_pay, tun_type, extack);
if (err)
goto err_destroy_flow;
- err = nfp_flower_compile_action(app, flow, netdev, flow_pay, extack);
+ err = nfp_flower_compile_action(app, rule, netdev, flow_pay, extack);
if (err)
goto err_destroy_flow;
@@ -1356,7 +1350,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
goto err_destroy_flow;
}
- err = nfp_compile_flow_metadata(app, flow, flow_pay, netdev, extack);
+ err = nfp_compile_flow_metadata(app, flow->cookie, flow_pay, netdev, extack);
if (err)
goto err_destroy_flow;
@@ -1476,7 +1470,7 @@ err_free_links:
kfree_rcu(merge_flow, rcu);
}
-static void
+void
nfp_flower_del_linked_merge_flows(struct nfp_app *app,
struct nfp_fl_payload *sub_flow)
{
@@ -1601,7 +1595,7 @@ __nfp_flower_update_merge_stats(struct nfp_app *app,
}
}
-static void
+void
nfp_flower_update_merge_stats(struct nfp_app *app,
struct nfp_fl_payload *sub_flow)
{
@@ -1628,10 +1622,17 @@ nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev,
struct flow_cls_offload *flow)
{
struct nfp_flower_priv *priv = app->priv;
+ struct nfp_fl_ct_map_entry *ct_map_ent;
struct netlink_ext_ack *extack = NULL;
struct nfp_fl_payload *nfp_flow;
u32 ctx_id;
+ /* Check ct_map table first */
+ ct_map_ent = rhashtable_lookup_fast(&priv->ct_map_table, &flow->cookie,
+ nfp_ct_map_params);
+ if (ct_map_ent)
+ return nfp_fl_ct_stats(flow, ct_map_ent);
+
extack = flow->common.extack;
nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, netdev);
if (!nfp_flow) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index 742a420152b3..bb3b8a7f6c5d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -692,7 +692,7 @@ static int nfp_pci_probe(struct pci_dev *pdev,
goto err_pci_disable;
}
- devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf));
+ devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf), &pdev->dev);
if (!devlink) {
err = -ENOMEM;
goto err_rel_regions;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index df5b748be068..df203738511b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -17,6 +17,7 @@
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/pci.h>
+#include <linux/dim.h>
#include <linux/io-64-nonatomic-hi-lo.h>
#include <linux/semaphore.h>
#include <linux/workqueue.h>
@@ -360,6 +361,9 @@ struct nfp_net_rx_ring {
* @rx_ring: Pointer to RX ring
* @xdp_ring: Pointer to an extra TX ring for XDP
* @irq_entry: MSI-X table entry (use for talking to the device)
+ * @event_ctr: Number of interrupt
+ * @rx_dim: Dynamic interrupt moderation structure for RX
+ * @tx_dim: Dynamic interrupt moderation structure for TX
* @rx_sync: Seqlock for atomic updates of RX stats
* @rx_pkts: Number of received packets
* @rx_bytes: Number of received bytes
@@ -410,6 +414,10 @@ struct nfp_net_r_vector {
u16 irq_entry;
+ u16 event_ctr;
+ struct dim rx_dim;
+ struct dim tx_dim;
+
struct u64_stats_sync rx_sync;
u64 rx_pkts;
u64 rx_bytes;
@@ -571,6 +579,8 @@ struct nfp_net_dp {
* mailbox area, crypto TLV
* @link_up: Is the link up?
* @link_status_lock: Protects @link_* and ensures atomicity with BAR reading
+ * @rx_coalesce_adapt_on: Is RX interrupt moderation adaptive?
+ * @tx_coalesce_adapt_on: Is TX interrupt moderation adaptive?
* @rx_coalesce_usecs: RX interrupt moderation usecs delay parameter
* @rx_coalesce_max_frames: RX interrupt moderation frame count parameter
* @tx_coalesce_usecs: TX interrupt moderation usecs delay parameter
@@ -654,6 +664,8 @@ struct nfp_net {
struct semaphore bar_lock;
+ bool rx_coalesce_adapt_on;
+ bool tx_coalesce_adapt_on;
u32 rx_coalesce_usecs;
u32 rx_coalesce_max_frames;
u32 tx_coalesce_usecs;
@@ -919,6 +931,14 @@ static inline bool nfp_netdev_is_nfp_net(struct net_device *netdev)
return netdev->netdev_ops == &nfp_net_netdev_ops;
}
+static inline int nfp_net_coalesce_para_check(u32 usecs, u32 pkts)
+{
+ if ((usecs >= ((1 << 16) - 1)) || (pkts >= ((1 << 16) - 1)))
+ return -EINVAL;
+
+ return 0;
+}
+
/* Prototypes */
void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
void __iomem *ctrl_bar);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 5dfa4799c34f..5bfa22accf2c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -474,6 +474,12 @@ static irqreturn_t nfp_net_irq_rxtx(int irq, void *data)
{
struct nfp_net_r_vector *r_vec = data;
+ /* Currently we cannot tell if it's a rx or tx interrupt,
+ * since dim does not need accurate event_ctr to calculate,
+ * we just use this counter for both rx and tx dim.
+ */
+ r_vec->event_ctr++;
+
napi_schedule_irqoff(&r_vec->napi);
/* The FW auto-masks any interrupt, either via the MASK bit in
@@ -1697,7 +1703,7 @@ nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
case NFP_NET_META_RESYNC_INFO:
if (nfp_net_tls_rx_resync_req(netdev, data, pkt,
pkt_len))
- return NULL;
+ return false;
data += sizeof(struct nfp_net_tls_resync_req);
break;
default:
@@ -2061,6 +2067,36 @@ static int nfp_net_poll(struct napi_struct *napi, int budget)
if (napi_complete_done(napi, pkts_polled))
nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
+ if (r_vec->nfp_net->rx_coalesce_adapt_on) {
+ struct dim_sample dim_sample = {};
+ unsigned int start;
+ u64 pkts, bytes;
+
+ do {
+ start = u64_stats_fetch_begin(&r_vec->rx_sync);
+ pkts = r_vec->rx_pkts;
+ bytes = r_vec->rx_bytes;
+ } while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
+
+ dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
+ net_dim(&r_vec->rx_dim, dim_sample);
+ }
+
+ if (r_vec->nfp_net->tx_coalesce_adapt_on) {
+ struct dim_sample dim_sample = {};
+ unsigned int start;
+ u64 pkts, bytes;
+
+ do {
+ start = u64_stats_fetch_begin(&r_vec->tx_sync);
+ pkts = r_vec->tx_pkts;
+ bytes = r_vec->tx_bytes;
+ } while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
+
+ dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
+ net_dim(&r_vec->tx_dim, dim_sample);
+ }
+
return pkts_polled;
}
@@ -2873,6 +2909,7 @@ static int nfp_net_set_config_and_enable(struct nfp_net *nn)
*/
static void nfp_net_close_stack(struct nfp_net *nn)
{
+ struct nfp_net_r_vector *r_vec;
unsigned int r;
disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
@@ -2880,8 +2917,16 @@ static void nfp_net_close_stack(struct nfp_net *nn)
nn->link_up = false;
for (r = 0; r < nn->dp.num_r_vecs; r++) {
- disable_irq(nn->r_vecs[r].irq_vector);
- napi_disable(&nn->r_vecs[r].napi);
+ r_vec = &nn->r_vecs[r];
+
+ disable_irq(r_vec->irq_vector);
+ napi_disable(&r_vec->napi);
+
+ if (r_vec->rx_ring)
+ cancel_work_sync(&r_vec->rx_dim.work);
+
+ if (r_vec->tx_ring)
+ cancel_work_sync(&r_vec->tx_dim.work);
}
netif_tx_disable(nn->dp.netdev);
@@ -2948,17 +2993,92 @@ void nfp_ctrl_close(struct nfp_net *nn)
rtnl_unlock();
}
+static void nfp_net_rx_dim_work(struct work_struct *work)
+{
+ struct nfp_net_r_vector *r_vec;
+ unsigned int factor, value;
+ struct dim_cq_moder moder;
+ struct nfp_net *nn;
+ struct dim *dim;
+
+ dim = container_of(work, struct dim, work);
+ moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
+ r_vec = container_of(dim, struct nfp_net_r_vector, rx_dim);
+ nn = r_vec->nfp_net;
+
+ /* Compute factor used to convert coalesce '_usecs' parameters to
+ * ME timestamp ticks. There are 16 ME clock cycles for each timestamp
+ * count.
+ */
+ factor = nn->tlv_caps.me_freq_mhz / 16;
+ if (nfp_net_coalesce_para_check(factor * moder.usec, moder.pkts))
+ return;
+
+ /* copy RX interrupt coalesce parameters */
+ value = (moder.pkts << 16) | (factor * moder.usec);
+ rtnl_lock();
+ nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(r_vec->rx_ring->idx), value);
+ (void)nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD);
+ rtnl_unlock();
+
+ dim->state = DIM_START_MEASURE;
+}
+
+static void nfp_net_tx_dim_work(struct work_struct *work)
+{
+ struct nfp_net_r_vector *r_vec;
+ unsigned int factor, value;
+ struct dim_cq_moder moder;
+ struct nfp_net *nn;
+ struct dim *dim;
+
+ dim = container_of(work, struct dim, work);
+ moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
+ r_vec = container_of(dim, struct nfp_net_r_vector, tx_dim);
+ nn = r_vec->nfp_net;
+
+ /* Compute factor used to convert coalesce '_usecs' parameters to
+ * ME timestamp ticks. There are 16 ME clock cycles for each timestamp
+ * count.
+ */
+ factor = nn->tlv_caps.me_freq_mhz / 16;
+ if (nfp_net_coalesce_para_check(factor * moder.usec, moder.pkts))
+ return;
+
+ /* copy TX interrupt coalesce parameters */
+ value = (moder.pkts << 16) | (factor * moder.usec);
+ rtnl_lock();
+ nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(r_vec->tx_ring->idx), value);
+ (void)nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD);
+ rtnl_unlock();
+
+ dim->state = DIM_START_MEASURE;
+}
+
/**
* nfp_net_open_stack() - Start the device from stack's perspective
* @nn: NFP Net device to reconfigure
*/
static void nfp_net_open_stack(struct nfp_net *nn)
{
+ struct nfp_net_r_vector *r_vec;
unsigned int r;
for (r = 0; r < nn->dp.num_r_vecs; r++) {
- napi_enable(&nn->r_vecs[r].napi);
- enable_irq(nn->r_vecs[r].irq_vector);
+ r_vec = &nn->r_vecs[r];
+
+ if (r_vec->rx_ring) {
+ INIT_WORK(&r_vec->rx_dim.work, nfp_net_rx_dim_work);
+ r_vec->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ }
+
+ if (r_vec->tx_ring) {
+ INIT_WORK(&r_vec->tx_dim.work, nfp_net_tx_dim_work);
+ r_vec->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ }
+
+ napi_enable(&r_vec->napi);
+ enable_irq(r_vec->irq_vector);
}
netif_tx_wake_all_queues(nn->dp.netdev);
@@ -3161,17 +3281,12 @@ static int nfp_net_dp_swap_enable(struct nfp_net *nn, struct nfp_net_dp *dp)
for (r = 0; r < nn->max_r_vecs; r++)
nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
- err = netif_set_real_num_rx_queues(nn->dp.netdev, nn->dp.num_rx_rings);
+ err = netif_set_real_num_queues(nn->dp.netdev,
+ nn->dp.num_stack_tx_rings,
+ nn->dp.num_rx_rings);
if (err)
return err;
- if (nn->dp.netdev->real_num_tx_queues != nn->dp.num_stack_tx_rings) {
- err = netif_set_real_num_tx_queues(nn->dp.netdev,
- nn->dp.num_stack_tx_rings);
- if (err)
- return err;
- }
-
return nfp_net_set_config_and_enable(nn);
}
@@ -3893,6 +4008,9 @@ static void nfp_net_irqmod_init(struct nfp_net *nn)
nn->rx_coalesce_max_frames = 64;
nn->tx_coalesce_usecs = 50;
nn->tx_coalesce_max_frames = 64;
+
+ nn->rx_coalesce_adapt_on = true;
+ nn->tx_coalesce_adapt_on = true;
}
static void nfp_net_netdev_init(struct nfp_net *nn)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 8803faadd302..0685ece1f155 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -1078,13 +1078,18 @@ static void nfp_net_get_regs(struct net_device *netdev,
}
static int nfp_net_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct nfp_net *nn = netdev_priv(netdev);
if (!(nn->cap & NFP_NET_CFG_CTRL_IRQMOD))
return -EINVAL;
+ ec->use_adaptive_rx_coalesce = nn->rx_coalesce_adapt_on;
+ ec->use_adaptive_tx_coalesce = nn->tx_coalesce_adapt_on;
+
ec->rx_coalesce_usecs = nn->rx_coalesce_usecs;
ec->rx_max_coalesced_frames = nn->rx_coalesce_max_frames;
ec->tx_coalesce_usecs = nn->tx_coalesce_usecs;
@@ -1327,7 +1332,9 @@ exit_close_nsp:
}
static int nfp_net_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct nfp_net *nn = netdev_priv(netdev);
unsigned int factor;
@@ -1361,19 +1368,18 @@ static int nfp_net_set_coalesce(struct net_device *netdev,
if (!ec->tx_coalesce_usecs && !ec->tx_max_coalesced_frames)
return -EINVAL;
- if (ec->rx_coalesce_usecs * factor >= ((1 << 16) - 1))
- return -EINVAL;
-
- if (ec->tx_coalesce_usecs * factor >= ((1 << 16) - 1))
+ if (nfp_net_coalesce_para_check(ec->rx_coalesce_usecs * factor,
+ ec->rx_max_coalesced_frames))
return -EINVAL;
- if (ec->rx_max_coalesced_frames >= ((1 << 16) - 1))
- return -EINVAL;
-
- if (ec->tx_max_coalesced_frames >= ((1 << 16) - 1))
+ if (nfp_net_coalesce_para_check(ec->tx_coalesce_usecs * factor,
+ ec->tx_max_coalesced_frames))
return -EINVAL;
/* configuration is valid */
+ nn->rx_coalesce_adapt_on = !!ec->use_adaptive_rx_coalesce;
+ nn->tx_coalesce_adapt_on = !!ec->use_adaptive_tx_coalesce;
+
nn->rx_coalesce_usecs = ec->rx_coalesce_usecs;
nn->rx_coalesce_max_frames = ec->rx_max_coalesced_frames;
nn->tx_coalesce_usecs = ec->tx_coalesce_usecs;
@@ -1445,7 +1451,8 @@ static int nfp_net_set_channels(struct net_device *netdev,
static const struct ethtool_ops nfp_net_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
- ETHTOOL_COALESCE_MAX_FRAMES,
+ ETHTOOL_COALESCE_MAX_FRAMES |
+ ETHTOOL_COALESCE_USE_ADAPTIVE,
.get_drvinfo = nfp_net_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_ringparam = nfp_net_get_ringparam,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 921db40047d7..d10a93801344 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -701,7 +701,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
if (err)
goto err_unmap;
- err = devlink_register(devlink, &pf->pdev->dev);
+ err = devlink_register(devlink);
if (err)
goto err_app_clean;
diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index 2d097dcb7bda..346145d3180e 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -993,8 +993,11 @@ static void nixge_ethtools_get_drvinfo(struct net_device *ndev,
strlcpy(ed->bus_info, "platform", sizeof(ed->bus_info));
}
-static int nixge_ethtools_get_coalesce(struct net_device *ndev,
- struct ethtool_coalesce *ecoalesce)
+static int
+nixge_ethtools_get_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *ecoalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct nixge_priv *priv = netdev_priv(ndev);
u32 regval = 0;
@@ -1008,8 +1011,11 @@ static int nixge_ethtools_get_coalesce(struct net_device *ndev,
return 0;
}
-static int nixge_ethtools_set_coalesce(struct net_device *ndev,
- struct ethtool_coalesce *ecoalesce)
+static int
+nixge_ethtools_set_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *ecoalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct nixge_priv *priv = netdev_priv(ndev);
@@ -1223,7 +1229,6 @@ static int nixge_of_get_resources(struct platform_device *pdev)
{
const struct of_device_id *of_id;
enum nixge_version version;
- struct resource *ctrlres;
struct net_device *ndev;
struct nixge_priv *priv;
@@ -1242,13 +1247,10 @@ static int nixge_of_get_resources(struct platform_device *pdev)
netdev_err(ndev, "failed to map dma regs\n");
return PTR_ERR(priv->dma_regs);
}
- if (version <= NIXGE_V2) {
+ if (version <= NIXGE_V2)
priv->ctrl_regs = priv->dma_regs + NIXGE_REG_CTRL_OFFSET;
- } else {
- ctrlres = platform_get_resource_byname(pdev, IORESOURCE_MEM,
- "ctrl");
- priv->ctrl_regs = devm_ioremap_resource(&pdev->dev, ctrlres);
- }
+ else
+ priv->ctrl_regs = devm_platform_ioremap_resource_byname(pdev, "ctrl");
if (IS_ERR(priv->ctrl_regs)) {
netdev_err(ndev, "failed to map ctrl regs\n");
return PTR_ERR(priv->ctrl_regs);
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 8724d6a9ed02..ef3fb4cc90af 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -5782,15 +5782,11 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
np->desc_ver = DESC_VER_3;
np->txrxctl_bits = NVREG_TXRXCTL_DESC_3;
if (dma_64bit) {
- if (pci_set_dma_mask(pci_dev, DMA_BIT_MASK(39)))
+ if (dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(39)))
dev_info(&pci_dev->dev,
"64-bit DMA failed, using 32-bit addressing\n");
else
dev->features |= NETIF_F_HIGHDMA;
- if (pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(39))) {
- dev_info(&pci_dev->dev,
- "64-bit DMA (consistent) failed, using 32-bit ring buffers\n");
- }
}
} else if (id->driver_data & DEV_HAS_LARGEDESC) {
/* packet format 2: supports jumbo frames */
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 64c6842bd452..d29fe562b3de 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1219,7 +1219,7 @@ static const struct net_device_ops lpc_netdev_ops = {
.ndo_stop = lpc_eth_close,
.ndo_start_xmit = lpc_eth_hard_start_xmit,
.ndo_set_rx_mode = lpc_eth_set_multicast_list,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_set_mac_address = lpc_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
};
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
index af84f72bf08e..4e18b64dceb9 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
@@ -6,6 +6,7 @@
config PCH_GBE
tristate "OKI SEMICONDUCTOR IOH(ML7223/ML7831) GbE"
depends on PCI && (X86_32 || COMPILE_TEST)
+ depends on PTP_1588_CLOCK
select MII
select PTP_1588_CLOCK_PCH
select NET_PTP_CLASSIFY
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index e351f3d1608f..ec3e558f890e 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -1031,13 +1031,7 @@ static void pch_gbe_watchdog(struct timer_list *t)
struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
netdev->tx_queue_len = adapter->tx_queue_len;
/* mii library handles link maintenance tasks */
- if (mii_ethtool_gset(&adapter->mii, &cmd)) {
- netdev_err(netdev, "ethtool get setting Error\n");
- mod_timer(&adapter->watchdog_timer,
- round_jiffies(jiffies +
- PCH_GBE_WATCHDOG_PERIOD));
- return;
- }
+ mii_ethtool_gset(&adapter->mii, &cmd);
hw->mac.link_speed = ethtool_cmd_speed(&cmd);
hw->mac.link_duplex = cmd.duplex;
/* Set the RGMII control. */
@@ -2333,7 +2327,7 @@ static const struct net_device_ops pch_gbe_netdev_ops = {
.ndo_tx_timeout = pch_gbe_tx_timeout,
.ndo_change_mtu = pch_gbe_change_mtu,
.ndo_set_features = pch_gbe_set_features,
- .ndo_do_ioctl = pch_gbe_ioctl,
+ .ndo_eth_ioctl = pch_gbe_ioctl,
.ndo_set_rx_mode = pch_gbe_set_multi,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = pch_gbe_netpoll,
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.c
index ed832046216a..3426f6fa2b57 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.c
@@ -301,9 +301,7 @@ void pch_gbe_phy_init_setting(struct pch_gbe_hw *hw)
int ret;
u16 mii_reg;
- ret = mii_ethtool_gset(&adapter->mii, &cmd);
- if (ret)
- netdev_err(adapter->netdev, "Error: mii_ethtool_gset\n");
+ mii_ethtool_gset(&adapter->mii, &cmd);
ethtool_cmd_speed_set(&cmd, hw->mac.link_speed);
cmd.duplex = hw->mac.link_duplex;
diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c
index d058a63602a9..1a6336a56d3d 100644
--- a/drivers/net/ethernet/packetengines/hamachi.c
+++ b/drivers/net/ethernet/packetengines/hamachi.c
@@ -546,7 +546,9 @@ static int read_eeprom(void __iomem *ioaddr, int location);
static int mdio_read(struct net_device *dev, int phy_id, int location);
static void mdio_write(struct net_device *dev, int phy_id, int location, int value);
static int hamachi_open(struct net_device *dev);
-static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static int hamachi_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static int hamachi_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+ void __user *data, int cmd);
static void hamachi_timer(struct timer_list *t);
static void hamachi_tx_timeout(struct net_device *dev, unsigned int txqueue);
static void hamachi_init_ring(struct net_device *dev);
@@ -571,7 +573,8 @@ static const struct net_device_ops hamachi_netdev_ops = {
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
.ndo_tx_timeout = hamachi_tx_timeout,
- .ndo_do_ioctl = netdev_ioctl,
+ .ndo_eth_ioctl = hamachi_ioctl,
+ .ndo_siocdevprivate = hamachi_siocdevprivate,
};
@@ -1867,7 +1870,36 @@ static const struct ethtool_ops ethtool_ops_no_mii = {
.get_drvinfo = hamachi_get_drvinfo,
};
-static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+/* private ioctl: set rx,tx intr params */
+static int hamachi_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+ void __user *data, int cmd)
+{
+ struct hamachi_private *np = netdev_priv(dev);
+ u32 *d = (u32 *)&rq->ifr_ifru;
+
+ if (!netif_running(dev))
+ return -EINVAL;
+
+ if (cmd != SIOCDEVPRIVATE + 3)
+ return -EOPNOTSUPP;
+
+ /* Should add this check here or an ordinary user can do nasty
+ * things. -KDU
+ *
+ * TODO: Shut down the Rx and Tx engines while doing this.
+ */
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ writel(d[0], np->base + TxIntrCtrl);
+ writel(d[1], np->base + RxIntrCtrl);
+ printk(KERN_NOTICE "%s: tx %08x, rx %08x intr\n", dev->name,
+ (u32)readl(np->base + TxIntrCtrl),
+ (u32)readl(np->base + RxIntrCtrl));
+
+ return 0;
+}
+
+static int hamachi_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
struct hamachi_private *np = netdev_priv(dev);
struct mii_ioctl_data *data = if_mii(rq);
@@ -1876,28 +1908,9 @@ static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
if (!netif_running(dev))
return -EINVAL;
- if (cmd == (SIOCDEVPRIVATE+3)) { /* set rx,tx intr params */
- u32 *d = (u32 *)&rq->ifr_ifru;
- /* Should add this check here or an ordinary user can do nasty
- * things. -KDU
- *
- * TODO: Shut down the Rx and Tx engines while doing this.
- */
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
- writel(d[0], np->base + TxIntrCtrl);
- writel(d[1], np->base + RxIntrCtrl);
- printk(KERN_NOTICE "%s: tx %08x, rx %08x intr\n", dev->name,
- (u32) readl(np->base + TxIntrCtrl),
- (u32) readl(np->base + RxIntrCtrl));
- rc = 0;
- }
-
- else {
- spin_lock_irq(&np->lock);
- rc = generic_mii_ioctl(&np->mii_if, data, cmd, NULL);
- spin_unlock_irq(&np->lock);
- }
+ spin_lock_irq(&np->lock);
+ rc = generic_mii_ioctl(&np->mii_if, data, cmd, NULL);
+ spin_unlock_irq(&np->lock);
return rc;
}
diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c
index d1dd9bc1bc7f..f5cd8f51be7c 100644
--- a/drivers/net/ethernet/packetengines/yellowfin.c
+++ b/drivers/net/ethernet/packetengines/yellowfin.c
@@ -362,7 +362,7 @@ static const struct net_device_ops netdev_ops = {
.ndo_set_rx_mode = set_rx_mode,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
- .ndo_do_ioctl = netdev_ioctl,
+ .ndo_eth_ioctl = netdev_ioctl,
.ndo_tx_timeout = yellowfin_tx_timeout,
};
diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c
index 040a15a828b4..7e096b2888b9 100644
--- a/drivers/net/ethernet/pasemi/pasemi_mac.c
+++ b/drivers/net/ethernet/pasemi/pasemi_mac.c
@@ -247,12 +247,13 @@ static int pasemi_mac_unmap_tx_skb(struct pasemi_mac *mac,
int f;
struct pci_dev *pdev = mac->dma_pdev;
- pci_unmap_single(pdev, dmas[0], skb_headlen(skb), PCI_DMA_TODEVICE);
+ dma_unmap_single(&pdev->dev, dmas[0], skb_headlen(skb), DMA_TO_DEVICE);
for (f = 0; f < nfrags; f++) {
const skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
- pci_unmap_page(pdev, dmas[f+1], skb_frag_size(frag), PCI_DMA_TODEVICE);
+ dma_unmap_page(&pdev->dev, dmas[f + 1], skb_frag_size(frag),
+ DMA_TO_DEVICE);
}
dev_kfree_skb_irq(skb);
@@ -548,10 +549,8 @@ static void pasemi_mac_free_rx_buffers(struct pasemi_mac *mac)
for (i = 0; i < RX_RING_SIZE; i++) {
info = &RX_DESC_INFO(rx, i);
if (info->skb && info->dma) {
- pci_unmap_single(mac->dma_pdev,
- info->dma,
- info->skb->len,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&mac->dma_pdev->dev, info->dma,
+ info->skb->len, DMA_FROM_DEVICE);
dev_kfree_skb_any(info->skb);
}
info->dma = 0;
@@ -600,11 +599,11 @@ static void pasemi_mac_replenish_rx_ring(struct net_device *dev,
if (unlikely(!skb))
break;
- dma = pci_map_single(mac->dma_pdev, skb->data,
+ dma = dma_map_single(&mac->dma_pdev->dev, skb->data,
mac->bufsz - LOCAL_SKB_ALIGN,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
- if (unlikely(pci_dma_mapping_error(mac->dma_pdev, dma))) {
+ if (dma_mapping_error(&mac->dma_pdev->dev, dma)) {
dev_kfree_skb_irq(info->skb);
break;
}
@@ -741,8 +740,9 @@ static int pasemi_mac_clean_rx(struct pasemi_mac_rxring *rx,
len = (macrx & XCT_MACRX_LLEN_M) >> XCT_MACRX_LLEN_S;
- pci_unmap_single(pdev, dma, mac->bufsz - LOCAL_SKB_ALIGN,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&pdev->dev, dma,
+ mac->bufsz - LOCAL_SKB_ALIGN,
+ DMA_FROM_DEVICE);
if (macrx & XCT_MACRX_CRC) {
/* CRC error flagged */
@@ -1444,10 +1444,10 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
nfrags = skb_shinfo(skb)->nr_frags;
- map[0] = pci_map_single(mac->dma_pdev, skb->data, skb_headlen(skb),
- PCI_DMA_TODEVICE);
+ map[0] = dma_map_single(&mac->dma_pdev->dev, skb->data,
+ skb_headlen(skb), DMA_TO_DEVICE);
map_size[0] = skb_headlen(skb);
- if (pci_dma_mapping_error(mac->dma_pdev, map[0]))
+ if (dma_mapping_error(&mac->dma_pdev->dev, map[0]))
goto out_err_nolock;
for (i = 0; i < nfrags; i++) {
@@ -1534,8 +1534,8 @@ out_err:
spin_unlock_irqrestore(&txring->lock, flags);
out_err_nolock:
while (nfrags--)
- pci_unmap_single(mac->dma_pdev, map[nfrags], map_size[nfrags],
- PCI_DMA_TODEVICE);
+ dma_unmap_single(&mac->dma_pdev->dev, map[nfrags],
+ map_size[nfrags], DMA_TO_DEVICE);
return NETDEV_TX_BUSY;
}
diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig
index 202973a82712..3f7519e435b8 100644
--- a/drivers/net/ethernet/pensando/Kconfig
+++ b/drivers/net/ethernet/pensando/Kconfig
@@ -20,7 +20,7 @@ if NET_VENDOR_PENSANDO
config IONIC
tristate "Pensando Ethernet IONIC Support"
depends on 64BIT && PCI
- depends on PTP_1588_CLOCK || !PTP_1588_CLOCK
+ depends on PTP_1588_CLOCK_OPTIONAL
select NET_DEVLINK
select DIMLIB
help
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
index e4a5416adc80..7e296fa71b36 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -165,10 +165,10 @@ static int ionic_vf_alloc(struct ionic *ionic, int num_vfs)
goto out;
}
+ ionic->num_vfs++;
/* ignore failures from older FW, we just won't get stats */
(void)ionic_set_vf_config(ionic, i, IONIC_VF_ATTR_STATSADDR,
(u8 *)&v->stats_pa);
- ionic->num_vfs++;
}
out:
@@ -373,9 +373,6 @@ static void ionic_remove(struct pci_dev *pdev)
{
struct ionic *ionic = pci_get_drvdata(pdev);
- if (!ionic)
- return;
-
del_timer_sync(&ionic->watchdog_timer);
if (ionic->lif) {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index 1dfe962e22e0..0d6858ab511c 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -15,6 +15,7 @@ static void ionic_watchdog_cb(struct timer_list *t)
{
struct ionic *ionic = from_timer(ionic, t, watchdog_timer);
struct ionic_lif *lif = ionic->lif;
+ struct ionic_deferred_work *work;
int hb;
mod_timer(&ionic->watchdog_timer,
@@ -31,6 +32,18 @@ static void ionic_watchdog_cb(struct timer_list *t)
if (hb >= 0 &&
!test_bit(IONIC_LIF_F_FW_RESET, lif->state))
ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
+
+ if (test_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state)) {
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work) {
+ netdev_err(lif->netdev, "rxmode change dropped\n");
+ return;
+ }
+
+ work->type = IONIC_DW_TYPE_RX_MODE;
+ netdev_dbg(lif->netdev, "deferred: rx_mode\n");
+ ionic_lif_deferred_enqueue(&lif->deferred, work);
+ }
}
void ionic_init_devinfo(struct ionic *ionic)
@@ -106,6 +119,8 @@ int ionic_dev_setup(struct ionic *ionic)
idev->last_fw_hb = 0;
idev->fw_hb_ready = true;
idev->fw_status_ready = true;
+ idev->fw_generation = IONIC_FW_STS_F_GENERATION &
+ ioread8(&idev->dev_info_regs->fw_status);
mod_timer(&ionic->watchdog_timer,
round_jiffies(jiffies + ionic->watchdog_period));
@@ -121,7 +136,9 @@ int ionic_heartbeat_check(struct ionic *ionic)
{
struct ionic_dev *idev = &ionic->idev;
unsigned long check_time, last_check_time;
- bool fw_status_ready, fw_hb_ready;
+ bool fw_status_ready = true;
+ bool fw_hb_ready;
+ u8 fw_generation;
u8 fw_status;
u32 fw_hb;
@@ -140,9 +157,29 @@ do_check_time:
/* firmware is useful only if the running bit is set and
* fw_status != 0xff (bad PCI read)
+ * If fw_status is not ready don't bother with the generation.
*/
fw_status = ioread8(&idev->dev_info_regs->fw_status);
- fw_status_ready = (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING);
+
+ if (fw_status == 0xff || !(fw_status & IONIC_FW_STS_F_RUNNING)) {
+ fw_status_ready = false;
+ } else {
+ fw_generation = fw_status & IONIC_FW_STS_F_GENERATION;
+ if (idev->fw_generation != fw_generation) {
+ dev_info(ionic->dev, "FW generation 0x%02x -> 0x%02x\n",
+ idev->fw_generation, fw_generation);
+
+ idev->fw_generation = fw_generation;
+
+ /* If the generation changed, the fw status is not
+ * ready so we need to trigger a fw-down cycle. After
+ * the down, the next watchdog will see the fw is up
+ * and the generation value stable, so will trigger
+ * the fw-up activity.
+ */
+ fw_status_ready = false;
+ }
+ }
/* is this a transition? */
if (fw_status_ready != idev->fw_status_ready) {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index c25cf9b744c5..8311086fb1f4 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -143,6 +143,7 @@ struct ionic_dev {
u32 last_fw_hb;
bool fw_hb_ready;
bool fw_status_ready;
+ u8 fw_generation;
u64 __iomem *db_pages;
dma_addr_t phy_db_pages;
@@ -160,8 +161,6 @@ struct ionic_dev {
struct ionic_cq_info {
union {
void *cq_desc;
- struct ionic_txq_comp *txcq;
- struct ionic_rxq_comp *rxcq;
struct ionic_admin_comp *admincq;
struct ionic_notifyq_event *notifyq;
};
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
index b41301a5b0df..c7d0e195d176 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
@@ -64,7 +64,7 @@ struct ionic *ionic_devlink_alloc(struct device *dev)
{
struct devlink *dl;
- dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic));
+ dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic), dev);
return devlink_priv(dl);
}
@@ -82,7 +82,7 @@ int ionic_devlink_register(struct ionic *ionic)
struct devlink_port_attrs attrs = {};
int err;
- err = devlink_register(dl, ionic->dev);
+ err = devlink_register(dl);
if (err) {
dev_warn(ionic->dev, "devlink_register failed: %d\n", err);
return err;
@@ -91,20 +91,20 @@ int ionic_devlink_register(struct ionic *ionic)
attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
devlink_port_attrs_set(&ionic->dl_port, &attrs);
err = devlink_port_register(dl, &ionic->dl_port, 0);
- if (err)
+ if (err) {
dev_err(ionic->dev, "devlink_port_register failed: %d\n", err);
- else
- devlink_port_type_eth_set(&ionic->dl_port,
- ionic->lif->netdev);
+ devlink_unregister(dl);
+ return err;
+ }
- return err;
+ devlink_port_type_eth_set(&ionic->dl_port, ionic->lif->netdev);
+ return 0;
}
void ionic_devlink_unregister(struct ionic *ionic)
{
struct devlink *dl = priv_to_devlink(ionic);
- if (ionic->dl_port.registered)
- devlink_port_unregister(&ionic->dl_port);
+ devlink_port_unregister(&ionic->dl_port);
devlink_unregister(dl);
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index 6583be570e45..e91b4874a57f 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -32,6 +32,9 @@ static void ionic_get_stats(struct net_device *netdev,
struct ionic_lif *lif = netdev_priv(netdev);
u32 i;
+ if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+ return;
+
memset(buf, 0, stats->n_stats * sizeof(*buf));
for (i = 0; i < ionic_num_stats_grps; i++)
ionic_stats_groups[i].get_values(lif, &buf);
@@ -274,6 +277,9 @@ static int ionic_set_link_ksettings(struct net_device *netdev,
struct ionic *ionic = lif->ionic;
int err = 0;
+ if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+ return -EBUSY;
+
/* set autoneg */
if (ks->base.autoneg != idev->port_info->config.an_enable) {
mutex_lock(&ionic->dev_cmd_lock);
@@ -320,6 +326,9 @@ static int ionic_set_pauseparam(struct net_device *netdev,
u32 requested_pause;
int err;
+ if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+ return -EBUSY;
+
if (pause->autoneg)
return -EOPNOTSUPP;
@@ -372,6 +381,9 @@ static int ionic_set_fecparam(struct net_device *netdev,
u8 fec_type;
int ret = 0;
+ if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+ return -EBUSY;
+
if (lif->ionic->idev.port_info->config.an_enable) {
netdev_err(netdev, "FEC request not allowed while autoneg is enabled\n");
return -EINVAL;
@@ -408,7 +420,9 @@ static int ionic_set_fecparam(struct net_device *netdev,
}
static int ionic_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coalesce)
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct ionic_lif *lif = netdev_priv(netdev);
@@ -426,7 +440,9 @@ static int ionic_get_coalesce(struct net_device *netdev,
}
static int ionic_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coalesce)
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct ionic_lif *lif = netdev_priv(netdev);
struct ionic_identity *ident;
@@ -528,6 +544,9 @@ static int ionic_set_ringparam(struct net_device *netdev,
struct ionic_queue_params qparam;
int err;
+ if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+ return -EBUSY;
+
ionic_init_queue_params(lif, &qparam);
if (ring->rx_mini_pending || ring->rx_jumbo_pending) {
@@ -597,6 +616,9 @@ static int ionic_set_channels(struct net_device *netdev,
int max_cnt;
int err;
+ if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+ return -EBUSY;
+
ionic_init_queue_params(lif, &qparam);
if (ch->rx_count != ch->tx_count) {
@@ -947,6 +969,9 @@ static int ionic_nway_reset(struct net_device *netdev)
struct ionic *ionic = lif->ionic;
int err = 0;
+ if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+ return -EBUSY;
+
/* flap the link to force auto-negotiation */
mutex_lock(&ionic->dev_cmd_lock);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index 0478b48d9895..278610ed7227 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -2936,6 +2936,8 @@ struct ionic_hwstamp_regs {
* @asic_type: Asic type
* @asic_rev: Asic revision
* @fw_status: Firmware status
+ * bit 0 - 1 = fw running
+ * bit 4-7 - 4 bit generation number, changes on fw restart
* @fw_heartbeat: Firmware heartbeat counter
* @serial_num: Serial number
* @fw_version: Firmware version
@@ -2949,7 +2951,8 @@ union ionic_dev_info_regs {
u8 version;
u8 asic_type;
u8 asic_rev;
-#define IONIC_FW_STS_F_RUNNING 0x1
+#define IONIC_FW_STS_F_RUNNING 0x01
+#define IONIC_FW_STS_F_GENERATION 0xF0
u8 fw_status;
u32 fw_heartbeat;
char fw_version[IONIC_DEVINFO_FWVERS_BUFLEN];
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index e795fa63ca12..23c9e196a784 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -11,6 +11,7 @@
#include <linux/interrupt.h>
#include <linux/pci.h>
#include <linux/cpumask.h>
+#include <linux/crash_dump.h>
#include "ionic.h"
#include "ionic_bus.h"
@@ -29,9 +30,6 @@ static const u8 ionic_qtype_versions[IONIC_QTYPE_MAX] = {
*/
};
-static void ionic_lif_rx_mode(struct ionic_lif *lif);
-static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr);
-static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr);
static void ionic_link_status_check(struct ionic_lif *lif);
static void ionic_lif_handle_fw_down(struct ionic_lif *lif);
static void ionic_lif_handle_fw_up(struct ionic_lif *lif);
@@ -91,20 +89,21 @@ static void ionic_lif_deferred_work(struct work_struct *work)
case IONIC_DW_TYPE_RX_MODE:
ionic_lif_rx_mode(lif);
break;
- case IONIC_DW_TYPE_RX_ADDR_ADD:
- ionic_lif_addr_add(lif, w->addr);
- break;
- case IONIC_DW_TYPE_RX_ADDR_DEL:
- ionic_lif_addr_del(lif, w->addr);
- break;
case IONIC_DW_TYPE_LINK_STATUS:
ionic_link_status_check(lif);
break;
case IONIC_DW_TYPE_LIF_RESET:
- if (w->fw_status)
+ if (w->fw_status) {
ionic_lif_handle_fw_up(lif);
- else
+ } else {
ionic_lif_handle_fw_down(lif);
+
+ /* Fire off another watchdog to see
+ * if the FW is already back rather than
+ * waiting another whole cycle
+ */
+ mod_timer(&lif->ionic->watchdog_timer, jiffies + 1);
+ }
break;
default:
break;
@@ -850,10 +849,8 @@ int ionic_lif_create_hwstamp_txq(struct ionic_lif *lif)
u64 features;
int err;
- mutex_lock(&lif->queue_lock);
-
if (lif->hwstamp_txq)
- goto out;
+ return 0;
features = IONIC_Q_F_2X_CQ_DESC | IONIC_TXQ_F_HWSTAMP;
@@ -895,9 +892,6 @@ int ionic_lif_create_hwstamp_txq(struct ionic_lif *lif)
}
}
-out:
- mutex_unlock(&lif->queue_lock);
-
return 0;
err_qcq_enable:
@@ -908,7 +902,6 @@ err_qcq_init:
ionic_qcq_free(lif, txq);
devm_kfree(lif->ionic->dev, txq);
err_qcq_alloc:
- mutex_unlock(&lif->queue_lock);
return err;
}
@@ -920,10 +913,8 @@ int ionic_lif_create_hwstamp_rxq(struct ionic_lif *lif)
u64 features;
int err;
- mutex_lock(&lif->queue_lock);
-
if (lif->hwstamp_rxq)
- goto out;
+ return 0;
features = IONIC_Q_F_2X_CQ_DESC | IONIC_RXQ_F_HWSTAMP;
@@ -961,9 +952,6 @@ int ionic_lif_create_hwstamp_rxq(struct ionic_lif *lif)
}
}
-out:
- mutex_unlock(&lif->queue_lock);
-
return 0;
err_qcq_enable:
@@ -974,7 +962,6 @@ err_qcq_init:
ionic_qcq_free(lif, rxq);
devm_kfree(lif->ionic->dev, rxq);
err_qcq_alloc:
- mutex_unlock(&lif->queue_lock);
return err;
}
@@ -1077,7 +1064,11 @@ static int ionic_lif_add_hwstamp_rxfilt(struct ionic_lif *lif, u64 pkt_class)
if (err && err != -EEXIST)
return err;
- return ionic_rx_filter_save(lif, 0, qid, 0, &ctx);
+ spin_lock_bh(&lif->rx_filters.lock);
+ err = ionic_rx_filter_save(lif, 0, qid, 0, &ctx, IONIC_FILTER_STATE_SYNCED);
+ spin_unlock_bh(&lif->rx_filters.lock);
+
+ return err;
}
int ionic_lif_set_hwstamp_rxfilt(struct ionic_lif *lif, u64 pkt_class)
@@ -1250,7 +1241,7 @@ void ionic_get_stats64(struct net_device *netdev,
ns->tx_errors = ns->tx_aborted_errors;
}
-static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr)
+int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr)
{
struct ionic_admin_ctx ctx = {
.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
@@ -1260,27 +1251,83 @@ static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr)
.match = cpu_to_le16(IONIC_RX_FILTER_MATCH_MAC),
},
};
+ int nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters);
+ bool mc = is_multicast_ether_addr(addr);
struct ionic_rx_filter *f;
- int err;
+ int err = 0;
+
+ memcpy(ctx.cmd.rx_filter_add.mac.addr, addr, ETH_ALEN);
- /* don't bother if we already have it */
spin_lock_bh(&lif->rx_filters.lock);
f = ionic_rx_filter_by_addr(lif, addr);
+ if (f) {
+ /* don't bother if we already have it and it is sync'd */
+ if (f->state == IONIC_FILTER_STATE_SYNCED) {
+ spin_unlock_bh(&lif->rx_filters.lock);
+ return 0;
+ }
+
+ /* mark preemptively as sync'd to block any parallel attempts */
+ f->state = IONIC_FILTER_STATE_SYNCED;
+ } else {
+ /* save as SYNCED to catch any DEL requests while processing */
+ err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx,
+ IONIC_FILTER_STATE_SYNCED);
+ }
spin_unlock_bh(&lif->rx_filters.lock);
- if (f)
- return 0;
+ if (err)
+ return err;
netdev_dbg(lif->netdev, "rx_filter add ADDR %pM\n", addr);
- memcpy(ctx.cmd.rx_filter_add.mac.addr, addr, ETH_ALEN);
- err = ionic_adminq_post_wait(lif, &ctx);
- if (err && err != -EEXIST)
- return err;
+ /* Don't bother with the write to FW if we know there's no room,
+ * we can try again on the next sync attempt.
+ */
+ if ((lif->nucast + lif->nmcast) >= nfilters)
+ err = -ENOSPC;
+ else
+ err = ionic_adminq_post_wait(lif, &ctx);
+
+ spin_lock_bh(&lif->rx_filters.lock);
+ if (err && err != -EEXIST) {
+ /* set the state back to NEW so we can try again later */
+ f = ionic_rx_filter_by_addr(lif, addr);
+ if (f && f->state == IONIC_FILTER_STATE_SYNCED)
+ f->state = IONIC_FILTER_STATE_NEW;
+
+ spin_unlock_bh(&lif->rx_filters.lock);
+
+ if (err == -ENOSPC)
+ return 0;
+ else
+ return err;
+ }
- return ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx);
+ if (mc)
+ lif->nmcast++;
+ else
+ lif->nucast++;
+
+ f = ionic_rx_filter_by_addr(lif, addr);
+ if (f && f->state == IONIC_FILTER_STATE_OLD) {
+ /* Someone requested a delete while we were adding
+ * so update the filter info with the results from the add
+ * and the data will be there for the delete on the next
+ * sync cycle.
+ */
+ err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx,
+ IONIC_FILTER_STATE_OLD);
+ } else {
+ err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx,
+ IONIC_FILTER_STATE_SYNCED);
+ }
+
+ spin_unlock_bh(&lif->rx_filters.lock);
+
+ return err;
}
-static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr)
+int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr)
{
struct ionic_admin_ctx ctx = {
.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
@@ -1290,6 +1337,7 @@ static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr)
},
};
struct ionic_rx_filter *f;
+ int state;
int err;
spin_lock_bh(&lif->rx_filters.lock);
@@ -1302,65 +1350,37 @@ static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr)
netdev_dbg(lif->netdev, "rx_filter del ADDR %pM (id %d)\n",
addr, f->filter_id);
+ state = f->state;
ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(f->filter_id);
ionic_rx_filter_free(lif, f);
- spin_unlock_bh(&lif->rx_filters.lock);
-
- err = ionic_adminq_post_wait(lif, &ctx);
- if (err && err != -EEXIST)
- return err;
- return 0;
-}
+ if (is_multicast_ether_addr(addr) && lif->nmcast)
+ lif->nmcast--;
+ else if (!is_multicast_ether_addr(addr) && lif->nucast)
+ lif->nucast--;
-static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add)
-{
- unsigned int nmfilters;
- unsigned int nufilters;
+ spin_unlock_bh(&lif->rx_filters.lock);
- if (add) {
- /* Do we have space for this filter? We test the counters
- * here before checking the need for deferral so that we
- * can return an overflow error to the stack.
- */
- nmfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters);
- nufilters = le32_to_cpu(lif->identity->eth.max_ucast_filters);
-
- if ((is_multicast_ether_addr(addr) && lif->nmcast < nmfilters))
- lif->nmcast++;
- else if (!is_multicast_ether_addr(addr) &&
- lif->nucast < nufilters)
- lif->nucast++;
- else
- return -ENOSPC;
- } else {
- if (is_multicast_ether_addr(addr) && lif->nmcast)
- lif->nmcast--;
- else if (!is_multicast_ether_addr(addr) && lif->nucast)
- lif->nucast--;
+ if (state != IONIC_FILTER_STATE_NEW) {
+ err = ionic_adminq_post_wait(lif, &ctx);
+ if (err && err != -EEXIST)
+ return err;
}
- netdev_dbg(lif->netdev, "rx_filter %s %pM\n",
- add ? "add" : "del", addr);
- if (add)
- return ionic_lif_addr_add(lif, addr);
- else
- return ionic_lif_addr_del(lif, addr);
-
return 0;
}
static int ionic_addr_add(struct net_device *netdev, const u8 *addr)
{
- return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR);
+ return ionic_lif_list_addr(netdev_priv(netdev), addr, ADD_ADDR);
}
static int ionic_addr_del(struct net_device *netdev, const u8 *addr)
{
- return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR);
+ return ionic_lif_list_addr(netdev_priv(netdev), addr, DEL_ADDR);
}
-static void ionic_lif_rx_mode(struct ionic_lif *lif)
+void ionic_lif_rx_mode(struct ionic_lif *lif)
{
struct net_device *netdev = lif->netdev;
unsigned int nfilters;
@@ -1381,32 +1401,26 @@ static void ionic_lif_rx_mode(struct ionic_lif *lif)
rx_mode |= (nd_flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0;
rx_mode |= (nd_flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0;
- /* sync unicast addresses
- * next check to see if we're in an overflow state
+ /* sync the mac filters */
+ ionic_rx_filter_sync(lif);
+
+ /* check for overflow state
* if so, we track that we overflowed and enable NIC PROMISC
* else if the overflow is set and not needed
* we remove our overflow flag and check the netdev flags
* to see if we can disable NIC PROMISC
*/
- __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters);
- if (netdev_uc_count(netdev) + 1 > nfilters) {
+ if ((lif->nucast + lif->nmcast) >= nfilters) {
rx_mode |= IONIC_RX_MODE_F_PROMISC;
+ rx_mode |= IONIC_RX_MODE_F_ALLMULTI;
lif->uc_overflow = true;
+ lif->mc_overflow = true;
} else if (lif->uc_overflow) {
lif->uc_overflow = false;
+ lif->mc_overflow = false;
if (!(nd_flags & IFF_PROMISC))
rx_mode &= ~IONIC_RX_MODE_F_PROMISC;
- }
-
- /* same for multicast */
- __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del);
- nfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters);
- if (netdev_mc_count(netdev) > nfilters) {
- rx_mode |= IONIC_RX_MODE_F_ALLMULTI;
- lif->mc_overflow = true;
- } else if (lif->mc_overflow) {
- lif->mc_overflow = false;
if (!(nd_flags & IFF_ALLMULTI))
rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI;
}
@@ -1449,28 +1463,26 @@ static void ionic_lif_rx_mode(struct ionic_lif *lif)
mutex_unlock(&lif->config_lock);
}
-static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep)
+static void ionic_ndo_set_rx_mode(struct net_device *netdev)
{
struct ionic_lif *lif = netdev_priv(netdev);
struct ionic_deferred_work *work;
- if (!can_sleep) {
- work = kzalloc(sizeof(*work), GFP_ATOMIC);
- if (!work) {
- netdev_err(lif->netdev, "rxmode change dropped\n");
- return;
- }
- work->type = IONIC_DW_TYPE_RX_MODE;
- netdev_dbg(lif->netdev, "deferred: rx_mode\n");
- ionic_lif_deferred_enqueue(&lif->deferred, work);
- } else {
- ionic_lif_rx_mode(lif);
- }
-}
+ /* Sync the kernel filter list with the driver filter list */
+ __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
+ __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del);
-static void ionic_ndo_set_rx_mode(struct net_device *netdev)
-{
- ionic_set_rx_mode(netdev, CAN_NOT_SLEEP);
+ /* Shove off the rest of the rxmode work to the work task
+ * which will include syncing the filters to the firmware.
+ */
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work) {
+ netdev_err(lif->netdev, "rxmode change dropped\n");
+ return;
+ }
+ work->type = IONIC_DW_TYPE_RX_MODE;
+ netdev_dbg(lif->netdev, "deferred: rx_mode\n");
+ ionic_lif_deferred_enqueue(&lif->deferred, work);
}
static __le64 ionic_netdev_features_to_nic(netdev_features_t features)
@@ -1599,7 +1611,6 @@ static int ionic_init_nic_features(struct ionic_lif *lif)
features = NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_FILTER |
- NETIF_F_RXHASH |
NETIF_F_SG |
NETIF_F_HW_CSUM |
NETIF_F_RXCSUM |
@@ -1607,6 +1618,9 @@ static int ionic_init_nic_features(struct ionic_lif *lif)
NETIF_F_TSO6 |
NETIF_F_TSO_ECN;
+ if (lif->nxqs > 1)
+ features |= NETIF_F_RXHASH;
+
err = ionic_set_nic_features(lif, features);
if (err)
return err;
@@ -1689,13 +1703,13 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa)
if (!is_zero_ether_addr(netdev->dev_addr)) {
netdev_info(netdev, "deleting mac addr %pM\n",
netdev->dev_addr);
- ionic_addr_del(netdev, netdev->dev_addr);
+ ionic_lif_addr_del(netdev_priv(netdev), netdev->dev_addr);
}
eth_commit_mac_addr_change(netdev, addr);
netdev_info(netdev, "updating mac addr %pM\n", mac);
- return ionic_addr_add(netdev, mac);
+ return ionic_lif_addr_add(netdev_priv(netdev), mac);
}
static void ionic_stop_queues_reconfig(struct ionic_lif *lif)
@@ -1801,7 +1815,12 @@ static int ionic_vlan_rx_add_vid(struct net_device *netdev, __be16 proto,
if (err)
return err;
- return ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx);
+ spin_lock_bh(&lif->rx_filters.lock);
+ err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx,
+ IONIC_FILTER_STATE_SYNCED);
+ spin_unlock_bh(&lif->rx_filters.lock);
+
+ return err;
}
static int ionic_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto,
@@ -2104,7 +2123,7 @@ static int ionic_txrx_init(struct ionic_lif *lif)
if (lif->netdev->features & NETIF_F_RXHASH)
ionic_lif_rss_init(lif);
- ionic_set_rx_mode(lif->netdev, CAN_SLEEP);
+ ionic_lif_rx_mode(lif);
return 0;
@@ -2202,9 +2221,11 @@ static int ionic_open(struct net_device *netdev)
if (test_and_clear_bit(IONIC_LIF_F_BROKEN, lif->state))
netdev_info(netdev, "clearing broken state\n");
+ mutex_lock(&lif->queue_lock);
+
err = ionic_txrx_alloc(lif);
if (err)
- return err;
+ goto err_unlock;
err = ionic_txrx_init(lif);
if (err)
@@ -2225,12 +2246,21 @@ static int ionic_open(struct net_device *netdev)
goto err_txrx_deinit;
}
+ /* If hardware timestamping is enabled, but the queues were freed by
+ * ionic_stop, those need to be reallocated and initialized, too.
+ */
+ ionic_lif_hwstamp_recreate_queues(lif);
+
+ mutex_unlock(&lif->queue_lock);
+
return 0;
err_txrx_deinit:
ionic_txrx_deinit(lif);
err_txrx_free:
ionic_txrx_free(lif);
+err_unlock:
+ mutex_unlock(&lif->queue_lock);
return err;
}
@@ -2250,14 +2280,16 @@ static int ionic_stop(struct net_device *netdev)
if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
return 0;
+ mutex_lock(&lif->queue_lock);
ionic_stop_queues(lif);
ionic_txrx_deinit(lif);
ionic_txrx_free(lif);
+ mutex_unlock(&lif->queue_lock);
return 0;
}
-static int ionic_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+static int ionic_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
{
struct ionic_lif *lif = netdev_priv(netdev);
@@ -2519,7 +2551,7 @@ static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set)
static const struct net_device_ops ionic_netdev_ops = {
.ndo_open = ionic_open,
.ndo_stop = ionic_stop,
- .ndo_do_ioctl = ionic_do_ioctl,
+ .ndo_eth_ioctl = ionic_eth_ioctl,
.ndo_start_xmit = ionic_start_xmit,
.ndo_get_stats64 = ionic_get_stats64,
.ndo_set_rx_mode = ionic_ndo_set_rx_mode,
@@ -2580,22 +2612,26 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
struct ionic_qcq **tx_qcqs = NULL;
struct ionic_qcq **rx_qcqs = NULL;
unsigned int flags, i;
- int err = -ENOMEM;
+ int err = 0;
/* allocate temporary qcq arrays to hold new queue structs */
if (qparam->nxqs != lif->nxqs || qparam->ntxq_descs != lif->ntxq_descs) {
tx_qcqs = devm_kcalloc(lif->ionic->dev, lif->ionic->ntxqs_per_lif,
sizeof(struct ionic_qcq *), GFP_KERNEL);
- if (!tx_qcqs)
+ if (!tx_qcqs) {
+ err = -ENOMEM;
goto err_out;
+ }
}
if (qparam->nxqs != lif->nxqs ||
qparam->nrxq_descs != lif->nrxq_descs ||
qparam->rxq_features != lif->rxq_features) {
rx_qcqs = devm_kcalloc(lif->ionic->dev, lif->ionic->nrxqs_per_lif,
sizeof(struct ionic_qcq *), GFP_KERNEL);
- if (!rx_qcqs)
+ if (!rx_qcqs) {
+ err = -ENOMEM;
goto err_out;
+ }
}
/* allocate new desc_info and rings, but leave the interrupt setup
@@ -2774,6 +2810,9 @@ err_out:
ionic_qcq_free(lif, lif->rxqcqs[i]);
}
+ if (err)
+ netdev_info(lif->netdev, "%s: failed %d\n", __func__, err);
+
return err;
}
@@ -2827,8 +2866,14 @@ int ionic_lif_alloc(struct ionic *ionic)
lif->ionic = ionic;
lif->index = 0;
- lif->ntxq_descs = IONIC_DEF_TXRX_DESC;
- lif->nrxq_descs = IONIC_DEF_TXRX_DESC;
+
+ if (is_kdump_kernel()) {
+ lif->ntxq_descs = IONIC_MIN_TXRX_DESC;
+ lif->nrxq_descs = IONIC_MIN_TXRX_DESC;
+ } else {
+ lif->ntxq_descs = IONIC_DEF_TXRX_DESC;
+ lif->nrxq_descs = IONIC_DEF_TXRX_DESC;
+ }
/* Convert the default coalesce value to actual hw resolution */
lif->rx_coalesce_usecs = IONIC_ITR_COAL_USEC_DEFAULT;
@@ -3179,7 +3224,7 @@ static int ionic_station_set(struct ionic_lif *lif)
*/
if (!ether_addr_equal(ctx.comp.lif_getattr.mac,
netdev->dev_addr))
- ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR);
+ ionic_lif_addr_add(lif, netdev->dev_addr);
} else {
/* Update the netdev mac with the device's mac */
memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len);
@@ -3196,7 +3241,7 @@ static int ionic_station_set(struct ionic_lif *lif)
netdev_dbg(lif->netdev, "adding station MAC addr %pM\n",
netdev->dev_addr);
- ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR);
+ ionic_lif_addr_add(lif, netdev->dev_addr);
return 0;
}
@@ -3514,6 +3559,7 @@ int ionic_lif_size(struct ionic *ionic)
unsigned int min_intrs;
int err;
+ /* retrieve basic values from FW */
lc = &ident->lif.eth.config;
dev_nintrs = le32_to_cpu(ident->dev.nintrs);
neqs_per_lif = le32_to_cpu(ident->lif.rdma.eq_qtype.qid_count);
@@ -3521,6 +3567,15 @@ int ionic_lif_size(struct ionic *ionic)
ntxqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_TXQ]);
nrxqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_RXQ]);
+ /* limit values to play nice with kdump */
+ if (is_kdump_kernel()) {
+ dev_nintrs = 2;
+ neqs_per_lif = 0;
+ nnqs_per_lif = 0;
+ ntxqs_per_lif = 1;
+ nrxqs_per_lif = 1;
+ }
+
/* reserve last queue id for hardware timestamping */
if (lc->features & cpu_to_le64(IONIC_ETH_HW_TIMESTAMP)) {
if (ntxqs_per_lif <= 1 || nrxqs_per_lif <= 1) {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 69ab59fedb6c..4915184f3efb 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -98,8 +98,6 @@ struct ionic_qcq {
enum ionic_deferred_work_type {
IONIC_DW_TYPE_RX_MODE,
- IONIC_DW_TYPE_RX_ADDR_ADD,
- IONIC_DW_TYPE_RX_ADDR_DEL,
IONIC_DW_TYPE_LINK_STATUS,
IONIC_DW_TYPE_LIF_RESET,
};
@@ -147,6 +145,7 @@ enum ionic_lif_state_flags {
IONIC_LIF_F_SW_DEBUG_STATS,
IONIC_LIF_F_UP,
IONIC_LIF_F_LINK_CHECK_REQUESTED,
+ IONIC_LIF_F_FILTER_SYNC_NEEDED,
IONIC_LIF_F_FW_RESET,
IONIC_LIF_F_SPLIT_INTR,
IONIC_LIF_F_BROKEN,
@@ -295,6 +294,10 @@ int ionic_lif_alloc(struct ionic *ionic);
int ionic_lif_init(struct ionic_lif *lif);
void ionic_lif_free(struct ionic_lif *lif);
void ionic_lif_deinit(struct ionic_lif *lif);
+
+int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr);
+int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr);
+
int ionic_lif_register(struct ionic_lif *lif);
void ionic_lif_unregister(struct ionic_lif *lif);
int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
@@ -303,6 +306,7 @@ int ionic_lif_size(struct ionic *ionic);
#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
void ionic_lif_hwstamp_replay(struct ionic_lif *lif);
+void ionic_lif_hwstamp_recreate_queues(struct ionic_lif *lif);
int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr);
int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr);
ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter);
@@ -312,6 +316,7 @@ void ionic_lif_alloc_phc(struct ionic_lif *lif);
void ionic_lif_free_phc(struct ionic_lif *lif);
#else
static inline void ionic_lif_hwstamp_replay(struct ionic_lif *lif) {}
+static inline void ionic_lif_hwstamp_recreate_queues(struct ionic_lif *lif) {}
static inline int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
{
@@ -342,6 +347,7 @@ int ionic_lif_set_hwstamp_rxfilt(struct ionic_lif *lif, u64 pkt_class);
int ionic_lif_rss_config(struct ionic_lif *lif, u16 types,
const u8 *key, const u32 *indir);
+void ionic_lif_rx_mode(struct ionic_lif *lif);
int ionic_reconfigure_queues(struct ionic_lif *lif,
struct ionic_queue_params *qparam);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index 61cfe2120817..6f07bf509efe 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -375,8 +375,8 @@ try_again:
* heartbeat check but is still alive and will process this
* request, so don't clean the dev_cmd in this case.
*/
- dev_warn(ionic->dev, "DEVCMD %s (%d) failed - FW halted\n",
- ionic_opcode_to_str(opcode), opcode);
+ dev_dbg(ionic->dev, "DEVCMD %s (%d) failed - FW halted\n",
+ ionic_opcode_to_str(opcode), opcode);
return -ENXIO;
}
@@ -450,6 +450,8 @@ int ionic_identify(struct ionic *ionic)
}
mutex_unlock(&ionic->dev_cmd_lock);
+ dev_info(ionic->dev, "FW: %s\n", idev->dev_info.fw_version);
+
if (err) {
dev_err(ionic->dev, "Cannot identify ionic: %dn", err);
goto err_out;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
index 6e2403c71608..eed2db69d708 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_phc.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
@@ -119,8 +119,8 @@ static int ionic_lif_hwstamp_set_ts_config(struct ionic_lif *lif,
config->rx_filter = HWTSTAMP_FILTER_ALL;
}
- dev_dbg(ionic->dev, "config_rx_filter %d rx_filt %#llx rx_all %d\n",
- config->rx_filter, rx_filt, rx_all);
+ dev_dbg(ionic->dev, "%s: config_rx_filter %d rx_filt %#llx rx_all %d\n",
+ __func__, config->rx_filter, rx_filt, rx_all);
if (tx_mode) {
err = ionic_lif_create_hwstamp_txq(lif);
@@ -194,7 +194,9 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
return -EFAULT;
+ mutex_lock(&lif->queue_lock);
err = ionic_lif_hwstamp_set_ts_config(lif, &config);
+ mutex_unlock(&lif->queue_lock);
if (err) {
netdev_info(lif->netdev, "hwstamp set failed: %d\n", err);
return err;
@@ -213,11 +215,37 @@ void ionic_lif_hwstamp_replay(struct ionic_lif *lif)
if (!lif->phc || !lif->phc->ptp)
return;
+ mutex_lock(&lif->queue_lock);
err = ionic_lif_hwstamp_set_ts_config(lif, NULL);
+ mutex_unlock(&lif->queue_lock);
if (err)
netdev_info(lif->netdev, "hwstamp replay failed: %d\n", err);
}
+void ionic_lif_hwstamp_recreate_queues(struct ionic_lif *lif)
+{
+ int err;
+
+ if (!lif->phc || !lif->phc->ptp)
+ return;
+
+ mutex_lock(&lif->phc->config_lock);
+
+ if (lif->phc->ts_config_tx_mode) {
+ err = ionic_lif_create_hwstamp_txq(lif);
+ if (err)
+ netdev_info(lif->netdev, "hwstamp recreate txq failed: %d\n", err);
+ }
+
+ if (lif->phc->ts_config_rx_filt) {
+ err = ionic_lif_create_hwstamp_rxq(lif);
+ if (err)
+ netdev_info(lif->netdev, "hwstamp recreate rxq failed: %d\n", err);
+ }
+
+ mutex_unlock(&lif->phc->config_lock);
+}
+
int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr)
{
struct hwtstamp_config config;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
index d71316d9ded2..7e3a5634c161 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
@@ -4,6 +4,7 @@
#include <linux/netdevice.h>
#include <linux/dynamic_debug.h>
#include <linux/etherdevice.h>
+#include <linux/list.h>
#include "ionic.h"
#include "ionic_lif.h"
@@ -120,11 +121,12 @@ void ionic_rx_filters_deinit(struct ionic_lif *lif)
}
int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
- u32 hash, struct ionic_admin_ctx *ctx)
+ u32 hash, struct ionic_admin_ctx *ctx,
+ enum ionic_filter_state state)
{
struct device *dev = lif->ionic->dev;
struct ionic_rx_filter_add_cmd *ac;
- struct ionic_rx_filter *f;
+ struct ionic_rx_filter *f = NULL;
struct hlist_head *head;
unsigned int key;
@@ -133,9 +135,11 @@ int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
switch (le16_to_cpu(ac->match)) {
case IONIC_RX_FILTER_MATCH_VLAN:
key = le16_to_cpu(ac->vlan.vlan);
+ f = ionic_rx_filter_by_vlan(lif, le16_to_cpu(ac->vlan.vlan));
break;
case IONIC_RX_FILTER_MATCH_MAC:
key = *(u32 *)ac->mac.addr;
+ f = ionic_rx_filter_by_addr(lif, ac->mac.addr);
break;
case IONIC_RX_FILTER_MATCH_MAC_VLAN:
key = le16_to_cpu(ac->mac_vlan.vlan);
@@ -147,12 +151,19 @@ int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
return -EINVAL;
}
- f = devm_kzalloc(dev, sizeof(*f), GFP_KERNEL);
- if (!f)
- return -ENOMEM;
+ if (f) {
+ /* remove from current linking so we can refresh it */
+ hlist_del(&f->by_id);
+ hlist_del(&f->by_hash);
+ } else {
+ f = devm_kzalloc(dev, sizeof(*f), GFP_ATOMIC);
+ if (!f)
+ return -ENOMEM;
+ }
f->flow_id = flow_id;
f->filter_id = le32_to_cpu(ctx->comp.rx_filter_add.filter_id);
+ f->state = state;
f->rxq_index = rxq_index;
memcpy(&f->cmd, ac, sizeof(f->cmd));
netdev_dbg(lif->netdev, "rx_filter add filter_id %d\n", f->filter_id);
@@ -160,8 +171,6 @@ int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
INIT_HLIST_NODE(&f->by_hash);
INIT_HLIST_NODE(&f->by_id);
- spin_lock_bh(&lif->rx_filters.lock);
-
key = hash_32(key, IONIC_RX_FILTER_HASH_BITS);
head = &lif->rx_filters.by_hash[key];
hlist_add_head(&f->by_hash, head);
@@ -170,8 +179,6 @@ int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
head = &lif->rx_filters.by_id[key];
hlist_add_head(&f->by_id, head);
- spin_unlock_bh(&lif->rx_filters.lock);
-
return 0;
}
@@ -231,3 +238,121 @@ struct ionic_rx_filter *ionic_rx_filter_rxsteer(struct ionic_lif *lif)
return NULL;
}
+
+int ionic_lif_list_addr(struct ionic_lif *lif, const u8 *addr, bool mode)
+{
+ struct ionic_rx_filter *f;
+ int err;
+
+ spin_lock_bh(&lif->rx_filters.lock);
+
+ f = ionic_rx_filter_by_addr(lif, addr);
+ if (mode == ADD_ADDR && !f) {
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.rx_filter_add = {
+ .opcode = IONIC_CMD_RX_FILTER_ADD,
+ .lif_index = cpu_to_le16(lif->index),
+ .match = cpu_to_le16(IONIC_RX_FILTER_MATCH_MAC),
+ },
+ };
+
+ memcpy(ctx.cmd.rx_filter_add.mac.addr, addr, ETH_ALEN);
+ err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx,
+ IONIC_FILTER_STATE_NEW);
+ if (err) {
+ spin_unlock_bh(&lif->rx_filters.lock);
+ return err;
+ }
+
+ } else if (mode == ADD_ADDR && f) {
+ if (f->state == IONIC_FILTER_STATE_OLD)
+ f->state = IONIC_FILTER_STATE_SYNCED;
+
+ } else if (mode == DEL_ADDR && f) {
+ if (f->state == IONIC_FILTER_STATE_NEW)
+ ionic_rx_filter_free(lif, f);
+ else if (f->state == IONIC_FILTER_STATE_SYNCED)
+ f->state = IONIC_FILTER_STATE_OLD;
+ } else if (mode == DEL_ADDR && !f) {
+ spin_unlock_bh(&lif->rx_filters.lock);
+ return -ENOENT;
+ }
+
+ spin_unlock_bh(&lif->rx_filters.lock);
+
+ set_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state);
+
+ return 0;
+}
+
+struct sync_item {
+ struct list_head list;
+ struct ionic_rx_filter f;
+};
+
+void ionic_rx_filter_sync(struct ionic_lif *lif)
+{
+ struct device *dev = lif->ionic->dev;
+ struct list_head sync_add_list;
+ struct list_head sync_del_list;
+ struct sync_item *sync_item;
+ struct ionic_rx_filter *f;
+ struct hlist_head *head;
+ struct hlist_node *tmp;
+ struct sync_item *spos;
+ unsigned int i;
+
+ INIT_LIST_HEAD(&sync_add_list);
+ INIT_LIST_HEAD(&sync_del_list);
+
+ clear_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state);
+
+ /* Copy the filters to be added and deleted
+ * into a separate local list that needs no locking.
+ */
+ spin_lock_bh(&lif->rx_filters.lock);
+ for (i = 0; i < IONIC_RX_FILTER_HLISTS; i++) {
+ head = &lif->rx_filters.by_id[i];
+ hlist_for_each_entry_safe(f, tmp, head, by_id) {
+ if (f->state == IONIC_FILTER_STATE_NEW ||
+ f->state == IONIC_FILTER_STATE_OLD) {
+ sync_item = devm_kzalloc(dev, sizeof(*sync_item),
+ GFP_KERNEL);
+ if (!sync_item)
+ goto loop_out;
+
+ sync_item->f = *f;
+
+ if (f->state == IONIC_FILTER_STATE_NEW)
+ list_add(&sync_item->list, &sync_add_list);
+ else
+ list_add(&sync_item->list, &sync_del_list);
+ }
+ }
+ }
+loop_out:
+ spin_unlock_bh(&lif->rx_filters.lock);
+
+ /* If the add or delete fails, it won't get marked as sync'd
+ * and will be tried again in the next sync action.
+ * Do the deletes first in case we're in an overflow state and
+ * they can clear room for some new filters
+ */
+ list_for_each_entry_safe(sync_item, spos, &sync_del_list, list) {
+ (void)ionic_lif_addr_del(lif, sync_item->f.cmd.mac.addr);
+
+ list_del(&sync_item->list);
+ devm_kfree(dev, sync_item);
+ }
+
+ list_for_each_entry_safe(sync_item, spos, &sync_add_list, list) {
+ (void)ionic_lif_addr_add(lif, sync_item->f.cmd.mac.addr);
+
+ if (sync_item->f.state != IONIC_FILTER_STATE_SYNCED)
+ set_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state);
+
+ list_del(&sync_item->list);
+ devm_kfree(dev, sync_item);
+ }
+}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h
index 1ead48be3c83..a66e35f0833b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h
@@ -5,10 +5,18 @@
#define _IONIC_RX_FILTER_H_
#define IONIC_RXQ_INDEX_ANY (0xFFFF)
+
+enum ionic_filter_state {
+ IONIC_FILTER_STATE_SYNCED,
+ IONIC_FILTER_STATE_NEW,
+ IONIC_FILTER_STATE_OLD,
+};
+
struct ionic_rx_filter {
u32 flow_id;
u32 filter_id;
u16 rxq_index;
+ enum ionic_filter_state state;
struct ionic_rx_filter_add_cmd cmd;
struct hlist_node by_hash;
struct hlist_node by_id;
@@ -28,9 +36,13 @@ void ionic_rx_filter_replay(struct ionic_lif *lif);
int ionic_rx_filters_init(struct ionic_lif *lif);
void ionic_rx_filters_deinit(struct ionic_lif *lif);
int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
- u32 hash, struct ionic_admin_ctx *ctx);
+ u32 hash, struct ionic_admin_ctx *ctx,
+ enum ionic_filter_state state);
struct ionic_rx_filter *ionic_rx_filter_by_vlan(struct ionic_lif *lif, u16 vid);
struct ionic_rx_filter *ionic_rx_filter_by_addr(struct ionic_lif *lif, const u8 *addr);
struct ionic_rx_filter *ionic_rx_filter_rxsteer(struct ionic_lif *lif);
+void ionic_rx_filter_sync(struct ionic_lif *lif);
+int ionic_lif_list_addr(struct ionic_lif *lif, const u8 *addr, bool mode);
+int ionic_rx_filters_need_sync(struct ionic_lif *lif);
#endif /* _IONIC_RX_FILTER_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 08870190e4d2..37c39581b659 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -32,19 +32,13 @@ static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q)
return netdev_get_tx_queue(q->lif->netdev, q->index);
}
-static void ionic_rx_buf_reset(struct ionic_buf_info *buf_info)
-{
- buf_info->page = NULL;
- buf_info->page_offset = 0;
- buf_info->dma_addr = 0;
-}
-
static int ionic_rx_page_alloc(struct ionic_queue *q,
struct ionic_buf_info *buf_info)
{
struct net_device *netdev = q->lif->netdev;
struct ionic_rx_stats *stats;
struct device *dev;
+ struct page *page;
dev = q->dev;
stats = q_to_rx_stats(q);
@@ -55,26 +49,27 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
return -EINVAL;
}
- buf_info->page = alloc_pages(IONIC_PAGE_GFP_MASK, 0);
- if (unlikely(!buf_info->page)) {
+ page = alloc_pages(IONIC_PAGE_GFP_MASK, 0);
+ if (unlikely(!page)) {
net_err_ratelimited("%s: %s page alloc failed\n",
netdev->name, q->name);
stats->alloc_err++;
return -ENOMEM;
}
- buf_info->page_offset = 0;
- buf_info->dma_addr = dma_map_page(dev, buf_info->page, buf_info->page_offset,
+ buf_info->dma_addr = dma_map_page(dev, page, 0,
IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(dev, buf_info->dma_addr))) {
- __free_pages(buf_info->page, 0);
- ionic_rx_buf_reset(buf_info);
+ __free_pages(page, 0);
net_err_ratelimited("%s: %s dma map failed\n",
netdev->name, q->name);
stats->dma_map_err++;
return -EIO;
}
+ buf_info->page = page;
+ buf_info->page_offset = 0;
+
return 0;
}
@@ -95,7 +90,7 @@ static void ionic_rx_page_free(struct ionic_queue *q,
dma_unmap_page(dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
__free_pages(buf_info->page, 0);
- ionic_rx_buf_reset(buf_info);
+ buf_info->page = NULL;
}
static bool ionic_rx_buf_recycle(struct ionic_queue *q,
@@ -139,7 +134,7 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
buf_info = &desc_info->bufs[0];
len = le16_to_cpu(comp->len);
- prefetch(buf_info->page);
+ prefetchw(buf_info->page);
skb = napi_get_frags(&q_to_qcq(q)->napi);
if (unlikely(!skb)) {
@@ -170,7 +165,7 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
if (!ionic_rx_buf_recycle(q, buf_info, frag_len)) {
dma_unmap_page(dev, buf_info->dma_addr,
IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
- ionic_rx_buf_reset(buf_info);
+ buf_info->page = NULL;
}
buf_info++;
diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index 98f430905ffa..1203353238e5 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig
@@ -99,7 +99,7 @@ config QED_SRIOV
config QEDE
tristate "QLogic QED 25/40/100Gb Ethernet NIC"
depends on QED
- imply PTP_1588_CLOCK
+ depends on PTP_1588_CLOCK_OPTIONAL
help
This enables the support for Marvell FastLinQ adapters family,
ethernet driver.
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
index e5c51256243a..f13fa7396aef 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
@@ -1863,7 +1863,6 @@ static inline u32 netxen_tx_avail(struct nx_host_tx_ring *tx_ring)
int netxen_get_flash_mac_addr(struct netxen_adapter *adapter, u64 *mac);
int netxen_p3_get_mac_addr(struct netxen_adapter *adapter, u64 *mac);
void netxen_change_ringparam(struct netxen_adapter *adapter);
-int netxen_rom_fast_read(struct netxen_adapter *adapter, int addr, int *valp);
extern const struct ethtool_ops netxen_nic_ethtool_ops;
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c
index dd22cb056d03..a075643f5826 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c
@@ -731,7 +731,9 @@ netxen_nic_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
* firmware coalescing to default.
*/
static int netxen_set_intr_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ethcoal)
+ struct ethtool_coalesce *ethcoal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct netxen_adapter *adapter = netdev_priv(netdev);
@@ -775,7 +777,9 @@ static int netxen_set_intr_coalesce(struct net_device *netdev,
}
static int netxen_get_intr_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ethcoal)
+ struct ethtool_coalesce *ethcoal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct netxen_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index b590c70539b5..d58e021614cd 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -26,15 +26,6 @@
extern const struct qed_common_ops qed_common_ops_pass;
-#define QED_MAJOR_VERSION 8
-#define QED_MINOR_VERSION 37
-#define QED_REVISION_VERSION 0
-#define QED_ENGINEERING_VERSION 20
-
-#define QED_VERSION \
- ((QED_MAJOR_VERSION << 24) | (QED_MINOR_VERSION << 16) | \
- (QED_REVISION_VERSION << 8) | QED_ENGINEERING_VERSION)
-
#define STORM_FW_VERSION \
((FW_MAJOR_VERSION << 24) | (FW_MINOR_VERSION << 16) | \
(FW_REVISION_VERSION << 8) | FW_ENGINEERING_VERSION)
@@ -517,12 +508,6 @@ enum qed_hsi_def_type {
QED_NUM_HSI_DEFS
};
-#define DRV_MODULE_VERSION \
- __stringify(QED_MAJOR_VERSION) "." \
- __stringify(QED_MINOR_VERSION) "." \
- __stringify(QED_REVISION_VERSION) "." \
- __stringify(QED_ENGINEERING_VERSION)
-
struct qed_simd_fp_handler {
void *token;
void (*func)(void *);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index e81dd34a3cac..dc93ddea8906 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -741,7 +741,6 @@ static int
qed_dcbx_read_local_lldp_mib(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
struct qed_dcbx_mib_meta_data data;
- int rc = 0;
memset(&data, 0, sizeof(data));
data.addr = p_hwfn->mcp_info->port_addr + offsetof(struct public_port,
@@ -750,7 +749,7 @@ qed_dcbx_read_local_lldp_mib(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
data.size = sizeof(struct lldp_config_params_s);
qed_memcpy_from(p_hwfn, p_ptt, data.lldp_local, data.addr, data.size);
- return rc;
+ return 0;
}
static int
@@ -810,7 +809,6 @@ static int
qed_dcbx_read_local_mib(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
struct qed_dcbx_mib_meta_data data;
- int rc = 0;
memset(&data, 0, sizeof(data));
data.addr = p_hwfn->mcp_info->port_addr +
@@ -819,7 +817,7 @@ qed_dcbx_read_local_mib(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
data.size = sizeof(struct dcbx_local_params);
qed_memcpy_from(p_hwfn, p_ptt, data.local_admin, data.addr, data.size);
- return rc;
+ return 0;
}
static int qed_dcbx_read_mib(struct qed_hwfn *p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.c b/drivers/net/ethernet/qlogic/qed/qed_devlink.c
index cf7f4da68e69..78070682f2df 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_devlink.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.c
@@ -93,7 +93,7 @@ static const struct devlink_health_reporter_ops qed_fw_fatal_reporter_ops = {
.dump = qed_fw_fatal_reporter_dump,
};
-#define QED_REPORTER_FW_GRACEFUL_PERIOD 1200000
+#define QED_REPORTER_FW_GRACEFUL_PERIOD 0
void qed_fw_reporters_create(struct devlink *devlink)
{
@@ -207,14 +207,15 @@ struct devlink *qed_devlink_register(struct qed_dev *cdev)
struct devlink *dl;
int rc;
- dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink));
+ dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink),
+ &cdev->pdev->dev);
if (!dl)
return ERR_PTR(-ENOMEM);
qdevlink = devlink_priv(dl);
qdevlink->cdev = cdev;
- rc = devlink_register(dl, &cdev->pdev->dev);
+ rc = devlink_register(dl);
if (rc)
goto err_free;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index 578935f643b8..f78e6055f654 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -351,6 +351,9 @@ static int qed_fw_assertion(struct qed_hwfn *p_hwfn)
qed_hw_err_notify(p_hwfn, p_hwfn->p_dpc_ptt, QED_HW_ERR_FW_ASSERT,
"FW assertion!\n");
+ /* Clear assert indications */
+ qed_wr(p_hwfn, p_hwfn->p_dpc_ptt, MISC_REG_AEU_GENERAL_ATTN_32, 0);
+
return -EINVAL;
}
@@ -464,12 +467,19 @@ static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn)
u32 int_sts, first_drop_reason, details, address, all_drops_reason;
struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
+ int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
+ if (int_sts == 0xdeadbeaf) {
+ DP_NOTICE(p_hwfn->cdev,
+ "DORQ is being reset, skipping int_sts handler\n");
+
+ return 0;
+ }
+
/* int_sts may be zero since all PFs were interrupted for doorbell
* overflow but another one already handled it. Can abort here. If
* This PF also requires overflow recovery we will be interrupted again.
* The masked almost full indication may also be set. Ignoring.
*/
- int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL))
return 0;
@@ -528,6 +538,9 @@ static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn)
static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
{
+ if (p_hwfn->cdev->recov_in_prog)
+ return 0;
+
p_hwfn->db_recovery_info.dorq_attn = true;
qed_dorq_attn_overflow(p_hwfn);
@@ -943,6 +956,13 @@ qed_int_deassertion_aeu_bit(struct qed_hwfn *p_hwfn,
DP_INFO(p_hwfn, "`%s' - Disabled future attentions\n",
p_bit_name);
+ /* Re-enable FW aassertion (Gen 32) interrupts */
+ val = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+ MISC_REG_AEU_ENABLE4_IGU_OUT_0);
+ val |= MISC_REG_AEU_ENABLE4_IGU_OUT_0_GENERAL_ATTN32;
+ qed_wr(p_hwfn, p_hwfn->p_dpc_ptt,
+ MISC_REG_AEU_ENABLE4_IGU_OUT_0, val);
+
out:
return rc;
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index a99861124630..fc8b3e64f153 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -1624,8 +1624,6 @@ qed_iwarp_get_listener(struct qed_hwfn *p_hwfn,
static const u32 ip_zero[4] = { 0, 0, 0, 0 };
bool found = false;
- qed_iwarp_print_cm_info(p_hwfn, cm_info);
-
list_for_each_entry(listener,
&p_hwfn->p_rdma_info->iwarp.listen_list,
list_entry) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 02a4610d9330..c46a7f756ed5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -327,6 +327,9 @@ static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
unsigned long flags;
int rc = -EINVAL;
+ if (!p_ll2_conn)
+ return rc;
+
spin_lock_irqsave(&p_tx->lock, flags);
if (p_tx->b_completing_packet) {
rc = -EBUSY;
@@ -500,7 +503,16 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie)
unsigned long flags = 0;
int rc = 0;
+ if (!p_ll2_conn)
+ return rc;
+
spin_lock_irqsave(&p_rx->lock, flags);
+
+ if (!QED_LL2_RX_REGISTERED(p_ll2_conn)) {
+ spin_unlock_irqrestore(&p_rx->lock, flags);
+ return 0;
+ }
+
cq_new_idx = le16_to_cpu(*p_rx->p_fw_cons);
cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain);
@@ -821,6 +833,9 @@ static int qed_ll2_lb_rxq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)p_cookie;
int rc;
+ if (!p_ll2_conn)
+ return 0;
+
if (!QED_LL2_RX_REGISTERED(p_ll2_conn))
return 0;
@@ -844,6 +859,9 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
u16 new_idx = 0, num_bds = 0;
int rc;
+ if (!p_ll2_conn)
+ return 0;
+
if (!QED_LL2_TX_REGISTERED(p_ll2_conn))
return 0;
@@ -1728,6 +1746,8 @@ int qed_ll2_post_rx_buffer(void *cxt,
if (!p_ll2_conn)
return -EINVAL;
p_rx = &p_ll2_conn->rx_queue;
+ if (!p_rx->set_prod_addr)
+ return -EIO;
spin_lock_irqsave(&p_rx->lock, flags);
if (!list_empty(&p_rx->free_descq))
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 5bd58c65e163..15ef59aa34ff 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -49,11 +49,10 @@
#define QED_NVM_CFG_MAX_ATTRS 50
static char version[] =
- "QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n";
+ "QLogic FastLinQ 4xxxx Core Module qed\n";
MODULE_DESCRIPTION("QLogic FastLinQ 4xxxx Core Module");
MODULE_LICENSE("GPL");
-MODULE_VERSION(DRV_MODULE_VERSION);
#define FW_FILE_VERSION \
__stringify(FW_MAJOR_VERSION) "." \
@@ -616,7 +615,12 @@ static int qed_enable_msix(struct qed_dev *cdev,
rc = cnt;
}
- if (rc > 0) {
+ /* For VFs, we should return with an error in case we didn't get the
+ * exact number of msix vectors as we requested.
+ * Not doing that will lead to a crash when starting queues for
+ * this VF.
+ */
+ if ((IS_PF(cdev) && rc > 0) || (IS_VF(cdev) && rc == cnt)) {
/* MSI-x configuration was achieved */
int_params->out.int_mode = QED_INT_MODE_MSIX;
int_params->out.num_vectors = rc;
@@ -1216,6 +1220,10 @@ static void qed_slowpath_task(struct work_struct *work)
if (test_and_clear_bit(QED_SLOWPATH_PERIODIC_DB_REC,
&hwfn->slowpath_task_flags)) {
+ /* skip qed_db_rec_handler during recovery/unload */
+ if (hwfn->cdev->recov_in_prog || !hwfn->slowpath_wq_active)
+ goto out;
+
qed_db_rec_handler(hwfn, ptt);
if (hwfn->periodic_db_rec_count--)
qed_slowpath_delayed_work(hwfn,
@@ -1223,6 +1231,7 @@ static void qed_slowpath_task(struct work_struct *work)
QED_PERIODIC_DB_REC_INTERVAL);
}
+out:
qed_ptt_release(hwfn, ptt);
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 4387292c37e2..6e5a6cc97d0e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -944,7 +944,6 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
memset(&in_params, 0, sizeof(in_params));
in_params.hsi_ver = QED_LOAD_REQ_HSI_VER_DEFAULT;
- in_params.drv_ver_0 = QED_VERSION;
in_params.drv_ver_1 = qed_get_config_bitmap();
in_params.fw_ver = STORM_FW_VERSION;
rc = eocre_get_mfw_drv_role(p_hwfn, p_params->drv_role, &mfw_drv_role);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c
index c1dd71d19f3f..3b84d00cf987 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c
@@ -4,7 +4,6 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
-#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/types.h>
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index da864d12916b..4f4b79250a2b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -1285,8 +1285,7 @@ qed_rdma_create_qp(void *rdma_cxt,
if (!rdma_cxt || !in_params || !out_params ||
!p_hwfn->p_rdma_info->active) {
- DP_ERR(p_hwfn->cdev,
- "qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n",
+ pr_err("qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n",
rdma_cxt, in_params, out_params);
return NULL;
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index 9db22be42476..da1b7fdcbda7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -504,6 +504,8 @@
0x180824UL
#define MISC_REG_AEU_GENERAL_ATTN_0 \
0x008400UL
+#define MISC_REG_AEU_GENERAL_ATTN_32 \
+ 0x008480UL
#define MISC_REG_AEU_GENERAL_ATTN_35 \
0x00848cUL
#define CAU_REG_SB_ADDR_MEMORY \
@@ -518,6 +520,12 @@
0x180804UL
#define MISC_REG_AEU_ENABLE1_IGU_OUT_0 \
0x00849cUL
+#define MISC_REG_AEU_ENABLE4_IGU_OUT_0 \
+ 0x0084a8UL
+#define MISC_REG_AEU_ENABLE4_IGU_OUT_0_GENERAL_ATTN32 \
+ (0x1UL << 0)
+#define MISC_REG_AEU_ENABLE4_IGU_OUT_0_GENERAL_ATTN32_SHIFT \
+ 0
#define MISC_REG_AEU_AFTER_INVERT_1_IGU \
0x0087b4UL
#define MISC_REG_AEU_MASK_ATTN_IGU \
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 5630008f38b7..f90dcfe9ee68 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -30,15 +30,6 @@
#include <net/pkt_cls.h>
#include <net/tc_act/tc_gact.h>
-#define QEDE_MAJOR_VERSION 8
-#define QEDE_MINOR_VERSION 37
-#define QEDE_REVISION_VERSION 0
-#define QEDE_ENGINEERING_VERSION 20
-#define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "." \
- __stringify(QEDE_MINOR_VERSION) "." \
- __stringify(QEDE_REVISION_VERSION) "." \
- __stringify(QEDE_ENGINEERING_VERSION)
-
#define DRV_MODULE_SYM qede
struct qede_stats_common {
@@ -589,7 +580,9 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto,
struct flow_cls_offload *f);
void qede_forced_speed_maps_init(void);
-int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal);
+int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack);
int qede_set_per_coalesce(struct net_device *dev, u32 queue,
struct ethtool_coalesce *coal);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 1560ad3d9290..8284c4c1528f 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -625,13 +625,13 @@ static void qede_get_drvinfo(struct net_device *ndev,
(edev->dev_info.common.mfw_rev >> 8) & 0xFF,
edev->dev_info.common.mfw_rev & 0xFF);
- if ((strlen(storm) + strlen(DRV_MODULE_VERSION) + strlen("[storm] ")) <
+ if ((strlen(storm) + strlen("[storm]")) <
sizeof(info->version))
snprintf(info->version, sizeof(info->version),
- "%s [storm %s]", DRV_MODULE_VERSION, storm);
+ "[storm %s]", storm);
else
snprintf(info->version, sizeof(info->version),
- "%s %s", DRV_MODULE_VERSION, storm);
+ "%s", storm);
if (edev->dev_info.common.mbi_version) {
snprintf(mbi, ETHTOOL_FWVERS_LEN, "%d.%d.%d",
@@ -760,7 +760,9 @@ static int qede_flash_device(struct net_device *dev,
}
static int qede_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
void *rx_handle = NULL, *tx_handle = NULL;
struct qede_dev *edev = netdev_priv(dev);
@@ -819,7 +821,9 @@ out:
return rc;
}
-int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
+int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct qede_dev *edev = netdev_priv(dev);
struct qede_fastpath *fp;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 7c6064baeba2..9837bdb89cd4 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -39,12 +39,8 @@
#include "qede.h"
#include "qede_ptp.h"
-static char version[] =
- "QLogic FastLinQ 4xxxx Ethernet Driver qede " DRV_MODULE_VERSION "\n";
-
MODULE_DESCRIPTION("QLogic FastLinQ 4xxxx Ethernet Driver");
MODULE_LICENSE("GPL");
-MODULE_VERSION(DRV_MODULE_VERSION);
static uint debug;
module_param(debug, uint, 0);
@@ -258,7 +254,7 @@ int __init qede_init(void)
{
int ret;
- pr_info("qede_init: %s\n", version);
+ pr_info("qede init: QLogic FastLinQ 4xxxx Ethernet Driver qede\n");
qede_forced_speed_maps_init();
@@ -644,7 +640,7 @@ static const struct net_device_ops qede_netdev_ops = {
.ndo_set_mac_address = qede_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = qede_change_mtu,
- .ndo_do_ioctl = qede_ioctl,
+ .ndo_eth_ioctl = qede_ioctl,
.ndo_tx_timeout = qede_tx_timeout,
#ifdef CONFIG_QED_SRIOV
.ndo_set_vf_mac = qede_set_vf_mac,
@@ -1157,10 +1153,6 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
/* Start the Slowpath-process */
memset(&sp_params, 0, sizeof(sp_params));
sp_params.int_mode = QED_INT_MODE_MSIX;
- sp_params.drv_major = QEDE_MAJOR_VERSION;
- sp_params.drv_minor = QEDE_MINOR_VERSION;
- sp_params.drv_rev = QEDE_REVISION_VERSION;
- sp_params.drv_eng = QEDE_ENGINEERING_VERSION;
strlcpy(sp_params.name, "qede LAN", QED_DRV_VER_STR_SIZE);
rc = qed_ops->common->slowpath_start(cdev, &sp_params);
if (rc) {
@@ -1874,6 +1866,7 @@ static void qede_sync_free_irqs(struct qede_dev *edev)
}
edev->int_info.used_cnt = 0;
+ edev->int_info.msix_cnt = 0;
}
static int qede_req_msix_irqs(struct qede_dev *edev)
@@ -1906,6 +1899,12 @@ static int qede_req_msix_irqs(struct qede_dev *edev)
&edev->fp_array[i]);
if (rc) {
DP_ERR(edev, "Request fp %d irq failed\n", i);
+#ifdef CONFIG_RFS_ACCEL
+ if (edev->ndev->rx_cpu_rmap)
+ free_irq_cpu_rmap(edev->ndev->rx_cpu_rmap);
+
+ edev->ndev->rx_cpu_rmap = NULL;
+#endif
qede_sync_free_irqs(edev);
return rc;
}
@@ -2298,6 +2297,15 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
rc = qede_stop_queues(edev);
if (rc) {
+#ifdef CONFIG_RFS_ACCEL
+ if (edev->dev_info.common.b_arfs_capable) {
+ qede_poll_for_freeing_arfs_filters(edev);
+ if (edev->ndev->rx_cpu_rmap)
+ free_irq_cpu_rmap(edev->ndev->rx_cpu_rmap);
+
+ edev->ndev->rx_cpu_rmap = NULL;
+ }
+#endif
qede_sync_free_irqs(edev);
goto out;
}
@@ -2427,7 +2435,6 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
goto out;
err4:
qede_sync_free_irqs(edev);
- memset(&edev->int_info.msix_cnt, 0, sizeof(struct qed_int_info));
err3:
qede_napi_disable_remove(edev);
err2:
@@ -2628,8 +2635,10 @@ static void qede_generic_hw_err_handler(struct qede_dev *edev)
"Generic sleepable HW error handling started - err_flags 0x%lx\n",
edev->err_flags);
- if (edev->devlink)
+ if (edev->devlink) {
+ DP_NOTICE(edev, "Reporting fatal error to devlink\n");
edev->ops->common->report_fatal_error(edev->devlink, edev->last_err_type);
+ }
clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
@@ -2651,6 +2660,8 @@ static void qede_set_hw_err_flags(struct qede_dev *edev,
case QED_HW_ERR_FW_ASSERT:
set_bit(QEDE_ERR_ATTN_CLR_EN, &err_flags);
set_bit(QEDE_ERR_GET_DBG_INFO, &err_flags);
+ /* make this error as recoverable and start recovery*/
+ set_bit(QEDE_ERR_IS_RECOVERABLE, &err_flags);
break;
default:
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index d8882d0b6b49..d51bac7ba5af 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -3156,8 +3156,10 @@ int qlcnic_83xx_flash_read32(struct qlcnic_adapter *adapter, u32 flash_addr,
indirect_addr = QLC_83XX_FLASH_DIRECT_DATA(addr);
ret = QLCRD32(adapter, indirect_addr, &err);
- if (err == -EIO)
+ if (err == -EIO) {
+ qlcnic_83xx_unlock_flash(adapter);
return err;
+ }
word = ret;
*(u32 *)p_data = word;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
index d8f0863b3934..fc364b4ab6eb 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
@@ -1021,7 +1021,7 @@ clear_diag_irq:
static void qlcnic_create_loopback_buff(unsigned char *data, u8 mac[])
{
- unsigned char random_data[] = {0xa8, 0x06, 0x45, 0x00};
+ static const unsigned char random_data[] = {0xa8, 0x06, 0x45, 0x00};
memset(data, 0x4e, QLCNIC_ILB_PKT_SIZE);
@@ -1527,7 +1527,9 @@ qlcnic_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
* firmware coalescing to default.
*/
static int qlcnic_set_intr_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ethcoal)
+ struct ethtool_coalesce *ethcoal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct qlcnic_adapter *adapter = netdev_priv(netdev);
int err;
@@ -1551,7 +1553,9 @@ static int qlcnic_set_intr_coalesce(struct net_device *netdev,
}
static int qlcnic_get_intr_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ethcoal)
+ struct ethtool_coalesce *ethcoal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct qlcnic_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
index e6784023bce4..3d61a767a8a3 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
@@ -94,10 +94,8 @@ void qlcnic_release_rx_buffers(struct qlcnic_adapter *adapter)
if (rx_buf->skb == NULL)
continue;
- pci_unmap_single(adapter->pdev,
- rx_buf->dma,
- rds_ring->dma_size,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&adapter->pdev->dev, rx_buf->dma,
+ rds_ring->dma_size, DMA_FROM_DEVICE);
dev_kfree_skb_any(rx_buf->skb);
}
@@ -139,16 +137,16 @@ void qlcnic_release_tx_buffers(struct qlcnic_adapter *adapter,
for (i = 0; i < tx_ring->num_desc; i++) {
buffrag = cmd_buf->frag_array;
if (buffrag->dma) {
- pci_unmap_single(adapter->pdev, buffrag->dma,
- buffrag->length, PCI_DMA_TODEVICE);
+ dma_unmap_single(&adapter->pdev->dev, buffrag->dma,
+ buffrag->length, DMA_TO_DEVICE);
buffrag->dma = 0ULL;
}
for (j = 1; j < cmd_buf->frag_count; j++) {
buffrag++;
if (buffrag->dma) {
- pci_unmap_page(adapter->pdev, buffrag->dma,
- buffrag->length,
- PCI_DMA_TODEVICE);
+ dma_unmap_page(&adapter->pdev->dev,
+ buffrag->dma, buffrag->length,
+ DMA_TO_DEVICE);
buffrag->dma = 0ULL;
}
}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index af4c516a9e7c..29cdcb2285b1 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -587,9 +587,9 @@ static int qlcnic_map_tx_skb(struct pci_dev *pdev, struct sk_buff *skb,
nr_frags = skb_shinfo(skb)->nr_frags;
nf = &pbuf->frag_array[0];
- map = pci_map_single(pdev, skb->data, skb_headlen(skb),
- PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(pdev, map))
+ map = dma_map_single(&pdev->dev, skb->data, skb_headlen(skb),
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&pdev->dev, map))
goto out_err;
nf->dma = map;
@@ -612,11 +612,11 @@ static int qlcnic_map_tx_skb(struct pci_dev *pdev, struct sk_buff *skb,
unwind:
while (--i >= 0) {
nf = &pbuf->frag_array[i+1];
- pci_unmap_page(pdev, nf->dma, nf->length, PCI_DMA_TODEVICE);
+ dma_unmap_page(&pdev->dev, nf->dma, nf->length, DMA_TO_DEVICE);
}
nf = &pbuf->frag_array[0];
- pci_unmap_single(pdev, nf->dma, skb_headlen(skb), PCI_DMA_TODEVICE);
+ dma_unmap_single(&pdev->dev, nf->dma, skb_headlen(skb), DMA_TO_DEVICE);
out_err:
return -ENOMEM;
@@ -630,11 +630,11 @@ static void qlcnic_unmap_buffers(struct pci_dev *pdev, struct sk_buff *skb,
for (i = 0; i < nr_frags; i++) {
nf = &pbuf->frag_array[i+1];
- pci_unmap_page(pdev, nf->dma, nf->length, PCI_DMA_TODEVICE);
+ dma_unmap_page(&pdev->dev, nf->dma, nf->length, DMA_TO_DEVICE);
}
nf = &pbuf->frag_array[0];
- pci_unmap_single(pdev, nf->dma, skb_headlen(skb), PCI_DMA_TODEVICE);
+ dma_unmap_single(&pdev->dev, nf->dma, skb_headlen(skb), DMA_TO_DEVICE);
pbuf->skb = NULL;
}
@@ -825,10 +825,10 @@ static int qlcnic_alloc_rx_skb(struct qlcnic_adapter *adapter,
}
skb_reserve(skb, NET_IP_ALIGN);
- dma = pci_map_single(pdev, skb->data,
- rds_ring->dma_size, PCI_DMA_FROMDEVICE);
+ dma = dma_map_single(&pdev->dev, skb->data, rds_ring->dma_size,
+ DMA_FROM_DEVICE);
- if (pci_dma_mapping_error(pdev, dma)) {
+ if (dma_mapping_error(&pdev->dev, dma)) {
adapter->stats.rx_dma_map_error++;
dev_kfree_skb_any(skb);
return -ENOMEM;
@@ -903,13 +903,13 @@ static int qlcnic_process_cmd_ring(struct qlcnic_adapter *adapter,
buffer = &tx_ring->cmd_buf_arr[sw_consumer];
if (buffer->skb) {
frag = &buffer->frag_array[0];
- pci_unmap_single(pdev, frag->dma, frag->length,
- PCI_DMA_TODEVICE);
+ dma_unmap_single(&pdev->dev, frag->dma, frag->length,
+ DMA_TO_DEVICE);
frag->dma = 0ULL;
for (i = 1; i < buffer->frag_count; i++) {
frag++;
- pci_unmap_page(pdev, frag->dma, frag->length,
- PCI_DMA_TODEVICE);
+ dma_unmap_page(&pdev->dev, frag->dma,
+ frag->length, DMA_TO_DEVICE);
frag->dma = 0ULL;
}
tx_ring->tx_stats.xmit_finished++;
@@ -1147,8 +1147,8 @@ static struct sk_buff *qlcnic_process_rxbuf(struct qlcnic_adapter *adapter,
return NULL;
}
- pci_unmap_single(adapter->pdev, buffer->dma, ring->dma_size,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&adapter->pdev->dev, buffer->dma, ring->dma_size,
+ DMA_FROM_DEVICE);
skb = buffer->skb;
if (likely((adapter->netdev->features & NETIF_F_RXCSUM) &&
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index a4fa507903ee..75960a29f80e 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -2343,11 +2343,9 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev,
static int qlcnic_set_dma_mask(struct pci_dev *pdev, int *pci_using_dac)
{
- if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) &&
- !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)))
+ if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)))
*pci_using_dac = 1;
- else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) &&
- !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)))
+ else if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
*pci_using_dac = 0;
else {
dev_err(&pdev->dev, "Unable to set DMA mask, aborting\n");
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index ad655f0a4965..9015a38eaced 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -377,7 +377,7 @@ static const struct net_device_ops emac_netdev_ops = {
.ndo_start_xmit = emac_start_xmit,
.ndo_set_mac_address = eth_mac_addr,
.ndo_change_mtu = emac_change_mtu,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_tx_timeout = emac_tx_timeout,
.ndo_get_stats64 = emac_get_stats64,
.ndo_set_features = emac_set_features,
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index b64c254e00ba..8427fe1b8fd1 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -434,7 +434,7 @@ qcaspi_receive(struct qcaspi *qca)
skb_put(qca->rx_skb, retcode);
qca->rx_skb->protocol = eth_type_trans(
qca->rx_skb, qca->rx_skb->dev);
- qca->rx_skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb_checksum_none_assert(qca->rx_skb);
netif_rx_ni(qca->rx_skb);
qca->rx_skb = netdev_alloc_skb_ip_align(net_dev,
net_dev->mtu + VLAN_ETH_HLEN);
diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c
index bcdeca7b3366..ce3f7ce31adc 100644
--- a/drivers/net/ethernet/qualcomm/qca_uart.c
+++ b/drivers/net/ethernet/qualcomm/qca_uart.c
@@ -107,7 +107,7 @@ qca_tty_receive(struct serdev_device *serdev, const unsigned char *data,
skb_put(qca->rx_skb, retcode);
qca->rx_skb->protocol = eth_type_trans(
qca->rx_skb, qca->rx_skb->dev);
- qca->rx_skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb_checksum_none_assert(qca->rx_skb);
netif_rx_ni(qca->rx_skb);
qca->rx_skb = netdev_alloc_skb_ip_align(netdev,
netdev->mtu +
diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
index 47e9998b62f0..4b2eca5e08e2 100644
--- a/drivers/net/ethernet/rdc/r6040.c
+++ b/drivers/net/ethernet/rdc/r6040.c
@@ -954,7 +954,7 @@ static const struct net_device_ops r6040_netdev_ops = {
.ndo_set_rx_mode = r6040_multicast_list,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
- .ndo_do_ioctl = phy_do_ioctl,
+ .ndo_eth_ioctl = phy_do_ioctl,
.ndo_tx_timeout = r6040_tx_timeout,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = r6040_poll_controller,
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index 9677e257e9a1..2b84b4565e64 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -514,7 +514,7 @@ static int cp_rx_poll(struct napi_struct *napi, int budget)
}
new_mapping = dma_map_single(&cp->pdev->dev, new_skb->data, buflen,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
if (dma_mapping_error(&cp->pdev->dev, new_mapping)) {
dev->stats.rx_dropped++;
kfree_skb(new_skb);
@@ -522,7 +522,7 @@ static int cp_rx_poll(struct napi_struct *napi, int budget)
}
dma_unmap_single(&cp->pdev->dev, mapping,
- buflen, PCI_DMA_FROMDEVICE);
+ buflen, DMA_FROM_DEVICE);
/* Handle checksum offloading for incoming packets. */
if (cp_rx_csum_ok(status))
@@ -666,7 +666,7 @@ static void cp_tx (struct cp_private *cp)
dma_unmap_single(&cp->pdev->dev, le64_to_cpu(txd->addr),
cp->tx_opts[tx_tail] & 0xffff,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
if (status & LastFrag) {
if (status & (TxError | TxFIFOUnder)) {
@@ -724,7 +724,7 @@ static void unwind_tx_frag_mapping(struct cp_private *cp, struct sk_buff *skb,
txd = &cp->tx_ring[index];
this_frag = &skb_shinfo(skb)->frags[frag];
dma_unmap_single(&cp->pdev->dev, le64_to_cpu(txd->addr),
- skb_frag_size(this_frag), PCI_DMA_TODEVICE);
+ skb_frag_size(this_frag), DMA_TO_DEVICE);
}
}
@@ -781,7 +781,7 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb,
dma_addr_t mapping;
len = skb->len;
- mapping = dma_map_single(&cp->pdev->dev, skb->data, len, PCI_DMA_TODEVICE);
+ mapping = dma_map_single(&cp->pdev->dev, skb->data, len, DMA_TO_DEVICE);
if (dma_mapping_error(&cp->pdev->dev, mapping))
goto out_dma_error;
@@ -810,7 +810,7 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb,
first_eor = eor;
first_len = skb_headlen(skb);
first_mapping = dma_map_single(&cp->pdev->dev, skb->data,
- first_len, PCI_DMA_TODEVICE);
+ first_len, DMA_TO_DEVICE);
if (dma_mapping_error(&cp->pdev->dev, first_mapping))
goto out_dma_error;
@@ -826,7 +826,7 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb,
len = skb_frag_size(this_frag);
mapping = dma_map_single(&cp->pdev->dev,
skb_frag_address(this_frag),
- len, PCI_DMA_TODEVICE);
+ len, DMA_TO_DEVICE);
if (dma_mapping_error(&cp->pdev->dev, mapping)) {
unwind_tx_frag_mapping(cp, skb, first_entry, entry);
goto out_dma_error;
@@ -1069,7 +1069,7 @@ static int cp_refill_rx(struct cp_private *cp)
goto err_out;
mapping = dma_map_single(&cp->pdev->dev, skb->data,
- cp->rx_buf_sz, PCI_DMA_FROMDEVICE);
+ cp->rx_buf_sz, DMA_FROM_DEVICE);
if (dma_mapping_error(&cp->pdev->dev, mapping)) {
kfree_skb(skb);
goto err_out;
@@ -1139,7 +1139,7 @@ static void cp_clean_rings (struct cp_private *cp)
if (cp->rx_skb[i]) {
desc = cp->rx_ring + i;
dma_unmap_single(&cp->pdev->dev,le64_to_cpu(desc->addr),
- cp->rx_buf_sz, PCI_DMA_FROMDEVICE);
+ cp->rx_buf_sz, DMA_FROM_DEVICE);
dev_kfree_skb_any(cp->rx_skb[i]);
}
}
@@ -1151,7 +1151,7 @@ static void cp_clean_rings (struct cp_private *cp)
desc = cp->tx_ring + i;
dma_unmap_single(&cp->pdev->dev,le64_to_cpu(desc->addr),
le32_to_cpu(desc->opts1) & 0xffff,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
if (le32_to_cpu(desc->opts1) & LastFrag)
dev_kfree_skb_any(skb);
cp->dev->stats.tx_dropped++;
@@ -1869,7 +1869,7 @@ static const struct net_device_ops cp_netdev_ops = {
.ndo_set_mac_address = cp_set_mac_address,
.ndo_set_rx_mode = cp_set_rx_mode,
.ndo_get_stats = cp_get_stats,
- .ndo_do_ioctl = cp_ioctl,
+ .ndo_eth_ioctl = cp_ioctl,
.ndo_start_xmit = cp_start_xmit,
.ndo_tx_timeout = cp_tx_timeout,
.ndo_set_features = cp_set_features,
@@ -1945,24 +1945,17 @@ static int cp_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
/* Configure DMA attributes. */
if ((sizeof(dma_addr_t) > 4) &&
- !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) &&
- !pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+ !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
pci_using_dac = 1;
} else {
pci_using_dac = 0;
- rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (rc) {
dev_err(&pdev->dev,
"No usable DMA configuration, aborting\n");
goto err_out_res;
}
- rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- if (rc) {
- dev_err(&pdev->dev,
- "No usable consistent DMA configuration, aborting\n");
- goto err_out_res;
- }
}
cp->cpcmd = (pci_using_dac ? PCIDAC : 0) |
diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index f0608f050050..2e6923cc653e 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -932,7 +932,7 @@ static const struct net_device_ops rtl8139_netdev_ops = {
.ndo_set_mac_address = rtl8139_set_mac_address,
.ndo_start_xmit = rtl8139_start_xmit,
.ndo_set_rx_mode = rtl8139_set_rx_mode,
- .ndo_do_ioctl = netdev_ioctl,
+ .ndo_eth_ioctl = netdev_ioctl,
.ndo_tx_timeout = rtl8139_tx_timeout,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = rtl8139_poll_controller,
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index c7af5bc3b8af..46a6ff9a782d 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1749,7 +1749,10 @@ rtl_coalesce_info(struct rtl8169_private *tp)
return ERR_PTR(-ELNRNG);
}
-static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+static int rtl_get_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct rtl8169_private *tp = netdev_priv(dev);
const struct rtl_coalesce_info *ci;
@@ -1807,7 +1810,10 @@ static int rtl_coalesce_choose_scale(struct rtl8169_private *tp, u32 usec,
return -ERANGE;
}
-static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+static int rtl_set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct rtl8169_private *tp = netdev_priv(dev);
u32 tx_fr = ec->tx_max_coalesced_frames;
@@ -2598,7 +2604,7 @@ static u32 rtl_csi_read(struct rtl8169_private *tp, int addr)
RTL_R32(tp, CSIDR) : ~0;
}
-static void rtl_csi_access_enable(struct rtl8169_private *tp, u8 val)
+static void rtl_set_aspm_entry_latency(struct rtl8169_private *tp, u8 val)
{
struct pci_dev *pdev = tp->pci_dev;
u32 csi;
@@ -2606,6 +2612,8 @@ static void rtl_csi_access_enable(struct rtl8169_private *tp, u8 val)
/* According to Realtek the value at config space address 0x070f
* controls the L0s/L1 entrance latency. We try standard ECAM access
* first and if it fails fall back to CSI.
+ * bit 0..2: L0: 0 = 1us, 1 = 2us .. 6 = 7us, 7 = 7us (no typo)
+ * bit 3..5: L1: 0 = 1us, 1 = 2us .. 6 = 64us, 7 = 64us
*/
if (pdev->cfg_size > 0x070f &&
pci_write_config_byte(pdev, 0x070f, val) == PCIBIOS_SUCCESSFUL)
@@ -2619,7 +2627,8 @@ static void rtl_csi_access_enable(struct rtl8169_private *tp, u8 val)
static void rtl_set_def_aspm_entry_latency(struct rtl8169_private *tp)
{
- rtl_csi_access_enable(tp, 0x27);
+ /* L0 7us, L1 16us */
+ rtl_set_aspm_entry_latency(tp, 0x27);
}
struct ephy_info {
@@ -2660,6 +2669,34 @@ static void rtl_pcie_state_l2l3_disable(struct rtl8169_private *tp)
RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Rdy_to_L23);
}
+static void rtl_enable_exit_l1(struct rtl8169_private *tp)
+{
+ /* Bits control which events trigger ASPM L1 exit:
+ * Bit 12: rxdv
+ * Bit 11: ltr_msg
+ * Bit 10: txdma_poll
+ * Bit 9: xadm
+ * Bit 8: pktavi
+ * Bit 7: txpla
+ */
+ switch (tp->mac_version) {
+ case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_36:
+ rtl_eri_set_bits(tp, 0xd4, 0x1f00);
+ break;
+ case RTL_GIGA_MAC_VER_37 ... RTL_GIGA_MAC_VER_38:
+ rtl_eri_set_bits(tp, 0xd4, 0x0c00);
+ break;
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53:
+ rtl_eri_set_bits(tp, 0xd4, 0x1f80);
+ break;
+ case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+ r8168_mac_ocp_modify(tp, 0xc0ac, 0, 0x1f80);
+ break;
+ default:
+ break;
+ }
+}
+
static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
{
/* Don't enable ASPM in the chip if OS can't control ASPM */
@@ -2848,7 +2885,6 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
rtl_eri_write(tp, 0xb8, ERIAR_MASK_1111, 0x0000);
rtl_set_fifo_size(tp, 0x10, 0x10, 0x02, 0x06);
- rtl_eri_set_bits(tp, 0x0d4, 0x1f00);
rtl_eri_set_bits(tp, 0x1d0, BIT(1));
rtl_reset_packet_filter(tp);
rtl_eri_set_bits(tp, 0x1b0, BIT(4));
@@ -2905,8 +2941,6 @@ static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
rtl_hw_start_8168f(tp);
rtl_ephy_init(tp, e_info_8168f_1);
-
- rtl_eri_set_bits(tp, 0x0d4, 0x1f00);
}
static void rtl_hw_start_8411(struct rtl8169_private *tp)
@@ -2923,8 +2957,6 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp)
rtl_pcie_state_l2l3_disable(tp);
rtl_ephy_init(tp, e_info_8168f_1);
-
- rtl_eri_set_bits(tp, 0x0d4, 0x0c00);
}
static void rtl_hw_start_8168g(struct rtl8169_private *tp)
@@ -2941,7 +2973,6 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
- rtl_eri_set_bits(tp, 0x0d4, 0x1f80);
rtl8168_config_eee_mac(tp);
@@ -3172,7 +3203,6 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
rtl_reset_packet_filter(tp);
- rtl_eri_set_bits(tp, 0xd4, 0x1f00);
rtl_eri_set_bits(tp, 0xdc, 0x001c);
rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87);
@@ -3226,8 +3256,6 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
rtl_reset_packet_filter(tp);
- rtl_eri_set_bits(tp, 0xd4, 0x1f80);
-
rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87);
RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
@@ -3329,7 +3357,7 @@ static void rtl_hw_start_8117(struct rtl8169_private *tp)
rtl_reset_packet_filter(tp);
- rtl_eri_set_bits(tp, 0xd4, 0x1f90);
+ rtl_eri_set_bits(tp, 0xd4, 0x0010);
rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87);
@@ -3502,12 +3530,16 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+ /* L0 7us, L1 32us - needed to avoid issues with link-up detection */
+ rtl_set_aspm_entry_latency(tp, 0x2f);
+
rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000);
/* disable EEE */
rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000);
rtl_pcie_state_l2l3_disable(tp);
+ rtl_hw_aspm_clkreq_enable(tp, true);
}
DECLARE_RTL_COND(rtl_mac_ocp_e00e_cond)
@@ -3556,7 +3588,6 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
r8168_mac_ocp_modify(tp, 0xea1c, 0x0003, 0x0001);
r8168_mac_ocp_modify(tp, 0xe0c0, 0x4f0f, 0x4403);
r8168_mac_ocp_modify(tp, 0xe052, 0x0080, 0x0068);
- r8168_mac_ocp_modify(tp, 0xc0ac, 0x0080, 0x1f00);
r8168_mac_ocp_modify(tp, 0xd430, 0x0fff, 0x047f);
r8168_mac_ocp_modify(tp, 0xea1c, 0x0004, 0x0000);
@@ -3779,6 +3810,7 @@ static void rtl_hw_start(struct rtl8169_private *tp)
else
rtl_hw_start_8168(tp);
+ rtl_enable_exit_l1(tp);
rtl_set_rx_max_size(tp);
rtl_set_rx_tx_desc_registers(tp);
rtl_lock_config_regs(tp);
@@ -4979,7 +5011,7 @@ static const struct net_device_ops rtl_netdev_ops = {
.ndo_fix_features = rtl8169_fix_features,
.ndo_set_features = rtl8169_set_features,
.ndo_set_mac_address = rtl_set_mac_address,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_set_rx_mode = rtl_set_rx_mode,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = rtl8169_netpoll,
@@ -5274,11 +5306,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (rc)
return rc;
- /* Disable ASPM completely as that cause random device stop working
+ /* Disable ASPM L1 as that cause random device stop working
* problems as well as full system hangs for some PCIe devices users.
*/
- rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S |
- PCIE_LINK_STATE_L1);
+ rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
tp->aspm_manageable = !rc;
/* enable device (incl. PCI PM wakeup and hotplug setup) */
diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig
index 5a2a4af31812..8008b2f45934 100644
--- a/drivers/net/ethernet/renesas/Kconfig
+++ b/drivers/net/ethernet/renesas/Kconfig
@@ -32,11 +32,11 @@ config SH_ETH
config RAVB
tristate "Renesas Ethernet AVB support"
depends on ARCH_RENESAS || COMPILE_TEST
+ depends on PTP_1588_CLOCK_OPTIONAL
select CRC32
select MII
select MDIO_BITBANG
select PHYLIB
- imply PTP_1588_CLOCK
help
Renesas Ethernet AVB device driver.
This driver supports the following SoCs:
diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index 80e62ca2e3d3..47c5377e4f42 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -956,10 +956,6 @@ enum RAVB_QUEUE {
#define RX_BUF_SZ (2048 - ETH_FCS_LEN + sizeof(__sum16))
-/* TX descriptors per packet */
-#define NUM_TX_DESC_GEN2 2
-#define NUM_TX_DESC_GEN3 1
-
struct ravb_tstamp_skb {
struct list_head list;
struct sk_buff *skb;
@@ -983,9 +979,29 @@ struct ravb_ptp {
struct ravb_ptp_perout perout[N_PER_OUT];
};
-enum ravb_chip_id {
- RCAR_GEN2,
- RCAR_GEN3,
+struct ravb_hw_info {
+ void (*rx_ring_free)(struct net_device *ndev, int q);
+ void (*rx_ring_format)(struct net_device *ndev, int q);
+ void *(*alloc_rx_desc)(struct net_device *ndev, int q);
+ bool (*receive)(struct net_device *ndev, int *quota, int q);
+ void (*set_rate)(struct net_device *ndev);
+ int (*set_rx_csum_feature)(struct net_device *ndev, netdev_features_t features);
+ void (*dmac_init)(struct net_device *ndev);
+ void (*emac_init)(struct net_device *ndev);
+ const char (*gstrings_stats)[ETH_GSTRING_LEN];
+ size_t gstrings_size;
+ netdev_features_t net_hw_features;
+ netdev_features_t net_features;
+ int stats_len;
+ size_t max_rx_len;
+ unsigned aligned_tx: 1;
+
+ /* hardware features */
+ unsigned internal_delay:1; /* AVB-DMAC has internal delays */
+ unsigned tx_counters:1; /* E-MAC has TX counters */
+ unsigned multi_irqs:1; /* AVB-DMAC and E-MAC has multiple irqs */
+ unsigned no_ptp_cfg_active:1; /* AVB-DMAC does not support gPTP active in config mode */
+ unsigned ptp_cfg_active:1; /* AVB-DMAC has gPTP support active in config mode */
};
struct ravb_private {
@@ -1029,7 +1045,6 @@ struct ravb_private {
int msg_enable;
int speed;
int emac_irq;
- enum ravb_chip_id chip_id;
int rx_irqs[NUM_RX_QUEUE];
int tx_irqs[NUM_TX_QUEUE];
@@ -1039,7 +1054,10 @@ struct ravb_private {
unsigned rxcidm:1; /* RX Clock Internal Delay Mode */
unsigned txcidm:1; /* TX Clock Internal Delay Mode */
unsigned rgmii_override:1; /* Deprecated rgmii-*id behavior */
- int num_tx_desc; /* TX descriptors per packet */
+ unsigned int num_tx_desc; /* TX descriptors per packet */
+
+ const struct ravb_hw_info *info;
+ struct reset_control *rstc;
};
static inline u32 ravb_read(struct net_device *ndev, enum ravb_reg reg)
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 805397088850..0f85f2d97b18 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -29,6 +29,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/sys_soc.h>
+#include <linux/reset.h>
#include <asm/div64.h>
@@ -177,10 +178,10 @@ static int ravb_tx_free(struct net_device *ndev, int q, bool free_txed_only)
{
struct ravb_private *priv = netdev_priv(ndev);
struct net_device_stats *stats = &priv->stats[q];
- int num_tx_desc = priv->num_tx_desc;
+ unsigned int num_tx_desc = priv->num_tx_desc;
struct ravb_tx_desc *desc;
+ unsigned int entry;
int free_num = 0;
- int entry;
u32 size;
for (; priv->cur_tx[q] - priv->dirty_tx[q] > 0; priv->dirty_tx[q]++) {
@@ -216,31 +217,42 @@ static int ravb_tx_free(struct net_device *ndev, int q, bool free_txed_only)
return free_num;
}
-/* Free skb's and DMA buffers for Ethernet AVB */
-static void ravb_ring_free(struct net_device *ndev, int q)
+static void ravb_rx_ring_free(struct net_device *ndev, int q)
{
struct ravb_private *priv = netdev_priv(ndev);
- int num_tx_desc = priv->num_tx_desc;
- int ring_size;
- int i;
+ unsigned int ring_size;
+ unsigned int i;
+
+ if (!priv->rx_ring[q])
+ return;
- if (priv->rx_ring[q]) {
- for (i = 0; i < priv->num_rx_ring[q]; i++) {
- struct ravb_ex_rx_desc *desc = &priv->rx_ring[q][i];
+ for (i = 0; i < priv->num_rx_ring[q]; i++) {
+ struct ravb_ex_rx_desc *desc = &priv->rx_ring[q][i];
- if (!dma_mapping_error(ndev->dev.parent,
- le32_to_cpu(desc->dptr)))
- dma_unmap_single(ndev->dev.parent,
- le32_to_cpu(desc->dptr),
- RX_BUF_SZ,
- DMA_FROM_DEVICE);
- }
- ring_size = sizeof(struct ravb_ex_rx_desc) *
- (priv->num_rx_ring[q] + 1);
- dma_free_coherent(ndev->dev.parent, ring_size, priv->rx_ring[q],
- priv->rx_desc_dma[q]);
- priv->rx_ring[q] = NULL;
+ if (!dma_mapping_error(ndev->dev.parent,
+ le32_to_cpu(desc->dptr)))
+ dma_unmap_single(ndev->dev.parent,
+ le32_to_cpu(desc->dptr),
+ RX_BUF_SZ,
+ DMA_FROM_DEVICE);
}
+ ring_size = sizeof(struct ravb_ex_rx_desc) *
+ (priv->num_rx_ring[q] + 1);
+ dma_free_coherent(ndev->dev.parent, ring_size, priv->rx_ring[q],
+ priv->rx_desc_dma[q]);
+ priv->rx_ring[q] = NULL;
+}
+
+/* Free skb's and DMA buffers for Ethernet AVB */
+static void ravb_ring_free(struct net_device *ndev, int q)
+{
+ struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
+ unsigned int num_tx_desc = priv->num_tx_desc;
+ unsigned int ring_size;
+ unsigned int i;
+
+ info->rx_ring_free(ndev, q);
if (priv->tx_ring[q]) {
ravb_tx_free(ndev, q, false);
@@ -271,24 +283,13 @@ static void ravb_ring_free(struct net_device *ndev, int q)
priv->tx_skb[q] = NULL;
}
-/* Format skb and descriptor buffer for Ethernet AVB */
-static void ravb_ring_format(struct net_device *ndev, int q)
+static void ravb_rx_ring_format(struct net_device *ndev, int q)
{
struct ravb_private *priv = netdev_priv(ndev);
- int num_tx_desc = priv->num_tx_desc;
struct ravb_ex_rx_desc *rx_desc;
- struct ravb_tx_desc *tx_desc;
- struct ravb_desc *desc;
- int rx_ring_size = sizeof(*rx_desc) * priv->num_rx_ring[q];
- int tx_ring_size = sizeof(*tx_desc) * priv->num_tx_ring[q] *
- num_tx_desc;
+ unsigned int rx_ring_size = sizeof(*rx_desc) * priv->num_rx_ring[q];
dma_addr_t dma_addr;
- int i;
-
- priv->cur_rx[q] = 0;
- priv->cur_tx[q] = 0;
- priv->dirty_rx[q] = 0;
- priv->dirty_tx[q] = 0;
+ unsigned int i;
memset(priv->rx_ring[q], 0, rx_ring_size);
/* Build RX ring buffer */
@@ -310,6 +311,26 @@ static void ravb_ring_format(struct net_device *ndev, int q)
rx_desc = &priv->rx_ring[q][i];
rx_desc->dptr = cpu_to_le32((u32)priv->rx_desc_dma[q]);
rx_desc->die_dt = DT_LINKFIX; /* type */
+}
+
+/* Format skb and descriptor buffer for Ethernet AVB */
+static void ravb_ring_format(struct net_device *ndev, int q)
+{
+ struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
+ unsigned int num_tx_desc = priv->num_tx_desc;
+ struct ravb_tx_desc *tx_desc;
+ struct ravb_desc *desc;
+ unsigned int tx_ring_size = sizeof(*tx_desc) * priv->num_tx_ring[q] *
+ num_tx_desc;
+ unsigned int i;
+
+ priv->cur_rx[q] = 0;
+ priv->cur_tx[q] = 0;
+ priv->dirty_rx[q] = 0;
+ priv->dirty_tx[q] = 0;
+
+ info->rx_ring_format(ndev, q);
memset(priv->tx_ring[q], 0, tx_ring_size);
/* Build TX ring buffer */
@@ -335,14 +356,28 @@ static void ravb_ring_format(struct net_device *ndev, int q)
desc->dptr = cpu_to_le32((u32)priv->tx_desc_dma[q]);
}
+static void *ravb_alloc_rx_desc(struct net_device *ndev, int q)
+{
+ struct ravb_private *priv = netdev_priv(ndev);
+ unsigned int ring_size;
+
+ ring_size = sizeof(struct ravb_ex_rx_desc) * (priv->num_rx_ring[q] + 1);
+
+ priv->rx_ring[q] = dma_alloc_coherent(ndev->dev.parent, ring_size,
+ &priv->rx_desc_dma[q],
+ GFP_KERNEL);
+ return priv->rx_ring[q];
+}
+
/* Init skb and descriptor buffer for Ethernet AVB */
static int ravb_ring_init(struct net_device *ndev, int q)
{
struct ravb_private *priv = netdev_priv(ndev);
- int num_tx_desc = priv->num_tx_desc;
+ const struct ravb_hw_info *info = priv->info;
+ unsigned int num_tx_desc = priv->num_tx_desc;
+ unsigned int ring_size;
struct sk_buff *skb;
- int ring_size;
- int i;
+ unsigned int i;
/* Allocate RX and TX skb rings */
priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q],
@@ -353,7 +388,7 @@ static int ravb_ring_init(struct net_device *ndev, int q)
goto error;
for (i = 0; i < priv->num_rx_ring[q]; i++) {
- skb = netdev_alloc_skb(ndev, RX_BUF_SZ + RAVB_ALIGN - 1);
+ skb = netdev_alloc_skb(ndev, info->max_rx_len);
if (!skb)
goto error;
ravb_set_buffer_align(skb);
@@ -369,11 +404,7 @@ static int ravb_ring_init(struct net_device *ndev, int q)
}
/* Allocate all RX descriptors. */
- ring_size = sizeof(struct ravb_ex_rx_desc) * (priv->num_rx_ring[q] + 1);
- priv->rx_ring[q] = dma_alloc_coherent(ndev->dev.parent, ring_size,
- &priv->rx_desc_dma[q],
- GFP_KERNEL);
- if (!priv->rx_ring[q])
+ if (!info->alloc_rx_desc(ndev, q))
goto error;
priv->dirty_rx[q] = 0;
@@ -395,8 +426,7 @@ error:
return -ENOMEM;
}
-/* E-MAC init function */
-static void ravb_emac_init(struct net_device *ndev)
+static void ravb_rcar_emac_init(struct net_device *ndev)
{
/* Receive frame limit set register */
ravb_write(ndev, ndev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, RFLR);
@@ -422,29 +452,19 @@ static void ravb_emac_init(struct net_device *ndev)
ravb_write(ndev, ECSIPR_ICDIP | ECSIPR_MPDIP | ECSIPR_LCHNGIP, ECSIPR);
}
-/* Device init function for Ethernet AVB */
-static int ravb_dmac_init(struct net_device *ndev)
+/* E-MAC init function */
+static void ravb_emac_init(struct net_device *ndev)
{
struct ravb_private *priv = netdev_priv(ndev);
- int error;
+ const struct ravb_hw_info *info = priv->info;
- /* Set CONFIG mode */
- error = ravb_config(ndev);
- if (error)
- return error;
-
- error = ravb_ring_init(ndev, RAVB_BE);
- if (error)
- return error;
- error = ravb_ring_init(ndev, RAVB_NC);
- if (error) {
- ravb_ring_free(ndev, RAVB_BE);
- return error;
- }
+ info->emac_init(ndev);
+}
- /* Descriptor format */
- ravb_ring_format(ndev, RAVB_BE);
- ravb_ring_format(ndev, RAVB_NC);
+static void ravb_rcar_dmac_init(struct net_device *ndev)
+{
+ struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
/* Set AVB RX */
ravb_write(ndev,
@@ -457,7 +477,7 @@ static int ravb_dmac_init(struct net_device *ndev)
ravb_write(ndev, TCCR_TFEN, TCCR);
/* Interrupt init: */
- if (priv->chip_id == RCAR_GEN3) {
+ if (info->multi_irqs) {
/* Clear DIL.DPLx */
ravb_write(ndev, 0, DIL);
/* Set queue specific interrupt */
@@ -471,6 +491,34 @@ static int ravb_dmac_init(struct net_device *ndev)
ravb_write(ndev, RIC2_QFE0 | RIC2_QFE1 | RIC2_RFFE, RIC2);
/* Frame transmitted, timestamp FIFO updated */
ravb_write(ndev, TIC_FTE0 | TIC_FTE1 | TIC_TFUE, TIC);
+}
+
+/* Device init function for Ethernet AVB */
+static int ravb_dmac_init(struct net_device *ndev)
+{
+ struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
+ int error;
+
+ /* Set CONFIG mode */
+ error = ravb_config(ndev);
+ if (error)
+ return error;
+
+ error = ravb_ring_init(ndev, RAVB_BE);
+ if (error)
+ return error;
+ error = ravb_ring_init(ndev, RAVB_NC);
+ if (error) {
+ ravb_ring_free(ndev, RAVB_BE);
+ return error;
+ }
+
+ /* Descriptor format */
+ ravb_ring_format(ndev, RAVB_BE);
+ ravb_ring_format(ndev, RAVB_NC);
+
+ info->dmac_init(ndev);
/* Setting the control will start the AVB-DMAC process. */
ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_OPERATION);
@@ -531,10 +579,10 @@ static void ravb_rx_csum(struct sk_buff *skb)
skb_trim(skb, skb->len - sizeof(__sum16));
}
-/* Packet receive function for Ethernet AVB */
-static bool ravb_rx(struct net_device *ndev, int *quota, int q)
+static bool ravb_rcar_rx(struct net_device *ndev, int *quota, int q)
{
struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
int entry = priv->cur_rx[q] % priv->num_rx_ring[q];
int boguscnt = (priv->dirty_rx[q] + priv->num_rx_ring[q]) -
priv->cur_rx[q];
@@ -619,9 +667,7 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
desc->ds_cc = cpu_to_le16(RX_BUF_SZ);
if (!priv->rx_skb[q][entry]) {
- skb = netdev_alloc_skb(ndev,
- RX_BUF_SZ +
- RAVB_ALIGN - 1);
+ skb = netdev_alloc_skb(ndev, info->max_rx_len);
if (!skb)
break; /* Better luck next round. */
ravb_set_buffer_align(skb);
@@ -647,6 +693,15 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
return boguscnt <= 0;
}
+/* Packet receive function for Ethernet AVB */
+static bool ravb_rx(struct net_device *ndev, int *quota, int q)
+{
+ struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
+
+ return info->receive(ndev, quota, q);
+}
+
static void ravb_rcv_snd_disable(struct net_device *ndev)
{
/* Disable TX and RX */
@@ -758,6 +813,7 @@ static void ravb_error_interrupt(struct net_device *ndev)
static bool ravb_queue_interrupt(struct net_device *ndev, int q)
{
struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
u32 ris0 = ravb_read(ndev, RIS0);
u32 ric0 = ravb_read(ndev, RIC0);
u32 tis = ravb_read(ndev, TIS);
@@ -766,7 +822,7 @@ static bool ravb_queue_interrupt(struct net_device *ndev, int q)
if (((ris0 & ric0) & BIT(q)) || ((tis & tic) & BIT(q))) {
if (napi_schedule_prep(&priv->napi[q])) {
/* Mask RX and TX interrupts */
- if (priv->chip_id == RCAR_GEN2) {
+ if (!info->multi_irqs) {
ravb_write(ndev, ric0 & ~BIT(q), RIC0);
ravb_write(ndev, tic & ~BIT(q), TIC);
} else {
@@ -909,6 +965,7 @@ static int ravb_poll(struct napi_struct *napi, int budget)
{
struct net_device *ndev = napi->dev;
struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
unsigned long flags;
int q = napi - priv->napi;
int mask = BIT(q);
@@ -932,7 +989,7 @@ static int ravb_poll(struct napi_struct *napi, int budget)
/* Re-enable RX/TX interrupts */
spin_lock_irqsave(&priv->lock, flags);
- if (priv->chip_id == RCAR_GEN2) {
+ if (!info->multi_irqs) {
ravb_modify(ndev, RIC0, mask, mask);
ravb_modify(ndev, TIC, mask, mask);
} else {
@@ -956,6 +1013,7 @@ out:
static void ravb_adjust_link(struct net_device *ndev)
{
struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
struct phy_device *phydev = ndev->phydev;
bool new_state = false;
unsigned long flags;
@@ -970,7 +1028,7 @@ static void ravb_adjust_link(struct net_device *ndev)
if (phydev->speed != priv->speed) {
new_state = true;
priv->speed = phydev->speed;
- ravb_set_rate(ndev);
+ info->set_rate(ndev);
}
if (!priv->link) {
ravb_modify(ndev, ECMR, ECMR_TXF, 0);
@@ -1133,13 +1191,14 @@ static const char ravb_gstrings_stats[][ETH_GSTRING_LEN] = {
"rx_queue_1_over_errors",
};
-#define RAVB_STATS_LEN ARRAY_SIZE(ravb_gstrings_stats)
-
static int ravb_get_sset_count(struct net_device *netdev, int sset)
{
+ struct ravb_private *priv = netdev_priv(netdev);
+ const struct ravb_hw_info *info = priv->info;
+
switch (sset) {
case ETH_SS_STATS:
- return RAVB_STATS_LEN;
+ return info->stats_len;
default:
return -EOPNOTSUPP;
}
@@ -1176,9 +1235,12 @@ static void ravb_get_ethtool_stats(struct net_device *ndev,
static void ravb_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
{
+ struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
+
switch (stringset) {
case ETH_SS_STATS:
- memcpy(data, ravb_gstrings_stats, sizeof(ravb_gstrings_stats));
+ memcpy(data, info->gstrings_stats, info->gstrings_size);
break;
}
}
@@ -1198,6 +1260,7 @@ static int ravb_set_ringparam(struct net_device *ndev,
struct ethtool_ringparam *ring)
{
struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
int error;
if (ring->tx_pending > BE_TX_RING_MAX ||
@@ -1211,7 +1274,7 @@ static int ravb_set_ringparam(struct net_device *ndev,
if (netif_running(ndev)) {
netif_device_detach(ndev);
/* Stop PTP Clock driver */
- if (priv->chip_id == RCAR_GEN2)
+ if (info->no_ptp_cfg_active)
ravb_ptp_stop(ndev);
/* Wait for DMA stopping */
error = ravb_stop_dma(ndev);
@@ -1243,7 +1306,7 @@ static int ravb_set_ringparam(struct net_device *ndev,
ravb_emac_init(ndev);
/* Initialise PTP Clock driver */
- if (priv->chip_id == RCAR_GEN2)
+ if (info->no_ptp_cfg_active)
ravb_ptp_init(ndev, priv->pdev);
netif_device_attach(ndev);
@@ -1334,6 +1397,7 @@ static inline int ravb_hook_irq(unsigned int irq, irq_handler_t handler,
static int ravb_open(struct net_device *ndev)
{
struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
struct platform_device *pdev = priv->pdev;
struct device *dev = &pdev->dev;
int error;
@@ -1341,7 +1405,7 @@ static int ravb_open(struct net_device *ndev)
napi_enable(&priv->napi[RAVB_BE]);
napi_enable(&priv->napi[RAVB_NC]);
- if (priv->chip_id == RCAR_GEN2) {
+ if (!info->multi_irqs) {
error = request_irq(ndev->irq, ravb_interrupt, IRQF_SHARED,
ndev->name, ndev);
if (error) {
@@ -1382,7 +1446,7 @@ static int ravb_open(struct net_device *ndev)
ravb_emac_init(ndev);
/* Initialise PTP Clock driver */
- if (priv->chip_id == RCAR_GEN2)
+ if (info->no_ptp_cfg_active)
ravb_ptp_init(ndev, priv->pdev);
netif_tx_start_all_queues(ndev);
@@ -1396,10 +1460,10 @@ static int ravb_open(struct net_device *ndev)
out_ptp_stop:
/* Stop PTP Clock driver */
- if (priv->chip_id == RCAR_GEN2)
+ if (info->no_ptp_cfg_active)
ravb_ptp_stop(ndev);
out_free_irq_nc_tx:
- if (priv->chip_id == RCAR_GEN2)
+ if (!info->multi_irqs)
goto out_free_irq;
free_irq(priv->tx_irqs[RAVB_NC], ndev);
out_free_irq_nc_rx:
@@ -1437,13 +1501,14 @@ static void ravb_tx_timeout_work(struct work_struct *work)
{
struct ravb_private *priv = container_of(work, struct ravb_private,
work);
+ const struct ravb_hw_info *info = priv->info;
struct net_device *ndev = priv->ndev;
int error;
netif_tx_stop_all_queues(ndev);
/* Stop PTP Clock driver */
- if (priv->chip_id == RCAR_GEN2)
+ if (info->no_ptp_cfg_active)
ravb_ptp_stop(ndev);
/* Wait for DMA stopping */
@@ -1478,7 +1543,7 @@ static void ravb_tx_timeout_work(struct work_struct *work)
out:
/* Initialise PTP Clock driver */
- if (priv->chip_id == RCAR_GEN2)
+ if (info->no_ptp_cfg_active)
ravb_ptp_init(ndev, priv->pdev);
netif_tx_start_all_queues(ndev);
@@ -1488,7 +1553,7 @@ out:
static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
{
struct ravb_private *priv = netdev_priv(ndev);
- int num_tx_desc = priv->num_tx_desc;
+ unsigned int num_tx_desc = priv->num_tx_desc;
u16 q = skb_get_queue_mapping(skb);
struct ravb_tstamp_skb *ts_skb;
struct ravb_tx_desc *desc;
@@ -1628,13 +1693,14 @@ static u16 ravb_select_queue(struct net_device *ndev, struct sk_buff *skb,
static struct net_device_stats *ravb_get_stats(struct net_device *ndev)
{
struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
struct net_device_stats *nstats, *stats0, *stats1;
nstats = &ndev->stats;
stats0 = &priv->stats[RAVB_BE];
stats1 = &priv->stats[RAVB_NC];
- if (priv->chip_id == RCAR_GEN3) {
+ if (info->tx_counters) {
nstats->tx_dropped += ravb_read(ndev, TROCR);
ravb_write(ndev, 0, TROCR); /* (write clear) */
}
@@ -1675,6 +1741,7 @@ static int ravb_close(struct net_device *ndev)
{
struct device_node *np = ndev->dev.parent->of_node;
struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
struct ravb_tstamp_skb *ts_skb, *ts_skb2;
netif_tx_stop_all_queues(ndev);
@@ -1685,7 +1752,7 @@ static int ravb_close(struct net_device *ndev)
ravb_write(ndev, 0, TIC);
/* Stop PTP Clock driver */
- if (priv->chip_id == RCAR_GEN2)
+ if (info->no_ptp_cfg_active)
ravb_ptp_stop(ndev);
/* Set the config mode to stop the AVB-DMAC's processes */
@@ -1708,7 +1775,7 @@ static int ravb_close(struct net_device *ndev)
of_phy_deregister_fixed_link(np);
}
- if (priv->chip_id != RCAR_GEN2) {
+ if (info->multi_irqs) {
free_irq(priv->tx_irqs[RAVB_NC], ndev);
free_irq(priv->rx_irqs[RAVB_NC], ndev);
free_irq(priv->tx_irqs[RAVB_BE], ndev);
@@ -1851,8 +1918,8 @@ static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
spin_unlock_irqrestore(&priv->lock, flags);
}
-static int ravb_set_features(struct net_device *ndev,
- netdev_features_t features)
+static int ravb_set_features_rx_csum(struct net_device *ndev,
+ netdev_features_t features)
{
netdev_features_t changed = ndev->features ^ features;
@@ -1864,6 +1931,15 @@ static int ravb_set_features(struct net_device *ndev,
return 0;
}
+static int ravb_set_features(struct net_device *ndev,
+ netdev_features_t features)
+{
+ struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
+
+ return info->set_rx_csum_feature(ndev, features);
+}
+
static const struct net_device_ops ravb_netdev_ops = {
.ndo_open = ravb_open,
.ndo_stop = ravb_close,
@@ -1872,7 +1948,7 @@ static const struct net_device_ops ravb_netdev_ops = {
.ndo_get_stats = ravb_get_stats,
.ndo_set_rx_mode = ravb_set_rx_mode,
.ndo_tx_timeout = ravb_tx_timeout,
- .ndo_do_ioctl = ravb_do_ioctl,
+ .ndo_eth_ioctl = ravb_do_ioctl,
.ndo_change_mtu = ravb_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
@@ -1924,12 +2000,52 @@ static int ravb_mdio_release(struct ravb_private *priv)
return 0;
}
+static const struct ravb_hw_info ravb_gen3_hw_info = {
+ .rx_ring_free = ravb_rx_ring_free,
+ .rx_ring_format = ravb_rx_ring_format,
+ .alloc_rx_desc = ravb_alloc_rx_desc,
+ .receive = ravb_rcar_rx,
+ .set_rate = ravb_set_rate,
+ .set_rx_csum_feature = ravb_set_features_rx_csum,
+ .dmac_init = ravb_rcar_dmac_init,
+ .emac_init = ravb_rcar_emac_init,
+ .gstrings_stats = ravb_gstrings_stats,
+ .gstrings_size = sizeof(ravb_gstrings_stats),
+ .net_hw_features = NETIF_F_RXCSUM,
+ .net_features = NETIF_F_RXCSUM,
+ .stats_len = ARRAY_SIZE(ravb_gstrings_stats),
+ .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1,
+ .internal_delay = 1,
+ .tx_counters = 1,
+ .multi_irqs = 1,
+ .ptp_cfg_active = 1,
+};
+
+static const struct ravb_hw_info ravb_gen2_hw_info = {
+ .rx_ring_free = ravb_rx_ring_free,
+ .rx_ring_format = ravb_rx_ring_format,
+ .alloc_rx_desc = ravb_alloc_rx_desc,
+ .receive = ravb_rcar_rx,
+ .set_rate = ravb_set_rate,
+ .set_rx_csum_feature = ravb_set_features_rx_csum,
+ .dmac_init = ravb_rcar_dmac_init,
+ .emac_init = ravb_rcar_emac_init,
+ .gstrings_stats = ravb_gstrings_stats,
+ .gstrings_size = sizeof(ravb_gstrings_stats),
+ .net_hw_features = NETIF_F_RXCSUM,
+ .net_features = NETIF_F_RXCSUM,
+ .stats_len = ARRAY_SIZE(ravb_gstrings_stats),
+ .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1,
+ .aligned_tx = 1,
+ .no_ptp_cfg_active = 1,
+};
+
static const struct of_device_id ravb_match_table[] = {
- { .compatible = "renesas,etheravb-r8a7790", .data = (void *)RCAR_GEN2 },
- { .compatible = "renesas,etheravb-r8a7794", .data = (void *)RCAR_GEN2 },
- { .compatible = "renesas,etheravb-rcar-gen2", .data = (void *)RCAR_GEN2 },
- { .compatible = "renesas,etheravb-r8a7795", .data = (void *)RCAR_GEN3 },
- { .compatible = "renesas,etheravb-rcar-gen3", .data = (void *)RCAR_GEN3 },
+ { .compatible = "renesas,etheravb-r8a7790", .data = &ravb_gen2_hw_info },
+ { .compatible = "renesas,etheravb-r8a7794", .data = &ravb_gen2_hw_info },
+ { .compatible = "renesas,etheravb-rcar-gen2", .data = &ravb_gen2_hw_info },
+ { .compatible = "renesas,etheravb-r8a7795", .data = &ravb_gen3_hw_info },
+ { .compatible = "renesas,etheravb-rcar-gen3", .data = &ravb_gen3_hw_info },
{ }
};
MODULE_DEVICE_TABLE(of, ravb_match_table);
@@ -1962,8 +2078,9 @@ static int ravb_set_gti(struct net_device *ndev)
static void ravb_set_config_mode(struct net_device *ndev)
{
struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
- if (priv->chip_id == RCAR_GEN2) {
+ if (info->no_ptp_cfg_active) {
ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG);
/* Set CSEL value */
ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB);
@@ -1973,13 +2090,6 @@ static void ravb_set_config_mode(struct net_device *ndev)
}
}
-static const struct soc_device_attribute ravb_delay_mode_quirk_match[] = {
- { .soc_id = "r8a774c0" },
- { .soc_id = "r8a77990" },
- { .soc_id = "r8a77995" },
- { /* sentinel */ }
-};
-
/* Set tx and rx clock internal delay modes */
static void ravb_parse_delay_mode(struct device_node *np, struct net_device *ndev)
{
@@ -2010,12 +2120,8 @@ static void ravb_parse_delay_mode(struct device_node *np, struct net_device *nde
if (priv->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
priv->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) {
- if (!WARN(soc_device_match(ravb_delay_mode_quirk_match),
- "phy-mode %s requires TX clock internal delay mode which is not supported by this hardware revision. Please update device tree",
- phy_modes(priv->phy_interface))) {
- priv->txcidm = 1;
- priv->rgmii_override = 1;
- }
+ priv->txcidm = 1;
+ priv->rgmii_override = 1;
}
}
@@ -2034,8 +2140,9 @@ static void ravb_set_delay_mode(struct net_device *ndev)
static int ravb_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
+ const struct ravb_hw_info *info;
+ struct reset_control *rstc;
struct ravb_private *priv;
- enum ravb_chip_id chip_id;
struct net_device *ndev;
int error, irq, q;
struct resource *res;
@@ -2047,20 +2154,26 @@ static int ravb_probe(struct platform_device *pdev)
return -EINVAL;
}
+ rstc = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL);
+ if (IS_ERR(rstc))
+ return dev_err_probe(&pdev->dev, PTR_ERR(rstc),
+ "failed to get cpg reset\n");
+
ndev = alloc_etherdev_mqs(sizeof(struct ravb_private),
NUM_TX_QUEUE, NUM_RX_QUEUE);
if (!ndev)
return -ENOMEM;
- ndev->features = NETIF_F_RXCSUM;
- ndev->hw_features = NETIF_F_RXCSUM;
+ info = of_device_get_match_data(&pdev->dev);
+ ndev->features = info->net_features;
+ ndev->hw_features = info->net_hw_features;
+
+ reset_control_deassert(rstc);
pm_runtime_enable(&pdev->dev);
pm_runtime_get_sync(&pdev->dev);
- chip_id = (enum ravb_chip_id)of_device_get_match_data(&pdev->dev);
-
- if (chip_id == RCAR_GEN3)
+ if (info->multi_irqs)
irq = platform_get_irq_byname(pdev, "ch22");
else
irq = platform_get_irq(pdev, 0);
@@ -2073,6 +2186,8 @@ static int ravb_probe(struct platform_device *pdev)
SET_NETDEV_DEV(ndev, &pdev->dev);
priv = netdev_priv(ndev);
+ priv->info = info;
+ priv->rstc = rstc;
priv->ndev = ndev;
priv->pdev = pdev;
priv->num_tx_ring[RAVB_BE] = BE_TX_RING_SIZE;
@@ -2099,7 +2214,7 @@ static int ravb_probe(struct platform_device *pdev)
priv->avb_link_active_low =
of_property_read_bool(np, "renesas,ether-link-active-low");
- if (chip_id == RCAR_GEN3) {
+ if (info->multi_irqs) {
irq = platform_get_irq_byname(pdev, "ch24");
if (irq < 0) {
error = irq;
@@ -2124,8 +2239,6 @@ static int ravb_probe(struct platform_device *pdev)
}
}
- priv->chip_id = chip_id;
-
priv->clk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(priv->clk)) {
error = PTR_ERR(priv->clk);
@@ -2142,8 +2255,12 @@ static int ravb_probe(struct platform_device *pdev)
ndev->max_mtu = 2048 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
ndev->min_mtu = ETH_MIN_MTU;
- priv->num_tx_desc = chip_id == RCAR_GEN2 ?
- NUM_TX_DESC_GEN2 : NUM_TX_DESC_GEN3;
+ /* FIXME: R-Car Gen2 has 4byte alignment restriction for tx buffer
+ * Use two descriptor to handle such situation. First descriptor to
+ * handle aligned data buffer and second descriptor to handle the
+ * overflow data because of alignment.
+ */
+ priv->num_tx_desc = info->aligned_tx ? 2 : 1;
/* Set function */
ndev->netdev_ops = &ravb_netdev_ops;
@@ -2160,7 +2277,7 @@ static int ravb_probe(struct platform_device *pdev)
/* Request GTI loading */
ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
- if (priv->chip_id != RCAR_GEN2) {
+ if (info->internal_delay) {
ravb_parse_delay_mode(np, ndev);
ravb_set_delay_mode(ndev);
}
@@ -2184,7 +2301,7 @@ static int ravb_probe(struct platform_device *pdev)
INIT_LIST_HEAD(&priv->ts_skb_list);
/* Initialise PTP Clock driver */
- if (chip_id != RCAR_GEN2)
+ if (info->ptp_cfg_active)
ravb_ptp_init(ndev, pdev);
/* Debug message level */
@@ -2232,7 +2349,7 @@ out_dma_free:
priv->desc_bat_dma);
/* Stop PTP Clock driver */
- if (chip_id != RCAR_GEN2)
+ if (info->ptp_cfg_active)
ravb_ptp_stop(ndev);
out_disable_refclk:
clk_disable_unprepare(priv->refclk);
@@ -2241,6 +2358,7 @@ out_release:
pm_runtime_put(&pdev->dev);
pm_runtime_disable(&pdev->dev);
+ reset_control_assert(rstc);
return error;
}
@@ -2248,9 +2366,10 @@ static int ravb_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
/* Stop PTP Clock driver */
- if (priv->chip_id != RCAR_GEN2)
+ if (info->ptp_cfg_active)
ravb_ptp_stop(ndev);
clk_disable_unprepare(priv->refclk);
@@ -2265,6 +2384,7 @@ static int ravb_remove(struct platform_device *pdev)
netif_napi_del(&priv->napi[RAVB_BE]);
ravb_mdio_release(priv);
pm_runtime_disable(&pdev->dev);
+ reset_control_assert(priv->rstc);
free_netdev(ndev);
platform_set_drvdata(pdev, NULL);
@@ -2333,6 +2453,7 @@ static int __maybe_unused ravb_resume(struct device *dev)
{
struct net_device *ndev = dev_get_drvdata(dev);
struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
int ret = 0;
/* If WoL is enabled set reset mode to rearm the WoL logic */
@@ -2355,7 +2476,7 @@ static int __maybe_unused ravb_resume(struct device *dev)
/* Request GTI loading */
ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
- if (priv->chip_id != RCAR_GEN2)
+ if (info->internal_delay)
ravb_set_delay_mode(ndev);
/* Restore descriptor base address table */
diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c
index 6984bd5b7da9..c099656dd75b 100644
--- a/drivers/net/ethernet/renesas/ravb_ptp.c
+++ b/drivers/net/ethernet/renesas/ravb_ptp.c
@@ -179,6 +179,7 @@ static int ravb_ptp_extts(struct ptp_clock_info *ptp,
{
struct ravb_private *priv = container_of(ptp, struct ravb_private,
ptp.info);
+ const struct ravb_hw_info *info = priv->info;
struct net_device *ndev = priv->ndev;
unsigned long flags;
@@ -197,7 +198,7 @@ static int ravb_ptp_extts(struct ptp_clock_info *ptp,
priv->ptp.extts[req->index] = on;
spin_lock_irqsave(&priv->lock, flags);
- if (priv->chip_id == RCAR_GEN2)
+ if (!info->multi_irqs)
ravb_modify(ndev, GIC, GIC_PTCE, on ? GIC_PTCE : 0);
else if (on)
ravb_write(ndev, GIE_PTCS, GIE);
@@ -213,6 +214,7 @@ static int ravb_ptp_perout(struct ptp_clock_info *ptp,
{
struct ravb_private *priv = container_of(ptp, struct ravb_private,
ptp.info);
+ const struct ravb_hw_info *info = priv->info;
struct net_device *ndev = priv->ndev;
struct ravb_ptp_perout *perout;
unsigned long flags;
@@ -252,7 +254,7 @@ static int ravb_ptp_perout(struct ptp_clock_info *ptp,
error = ravb_ptp_update_compare(priv, (u32)start_ns);
if (!error) {
/* Unmask interrupt */
- if (priv->chip_id == RCAR_GEN2)
+ if (!info->multi_irqs)
ravb_modify(ndev, GIC, GIC_PTME, GIC_PTME);
else
ravb_write(ndev, GIE_PTMS0, GIE);
@@ -264,7 +266,7 @@ static int ravb_ptp_perout(struct ptp_clock_info *ptp,
perout->period = 0;
/* Mask interrupt */
- if (priv->chip_id == RCAR_GEN2)
+ if (!info->multi_irqs)
ravb_modify(ndev, GIC, GIC_PTME, 0);
else
ravb_write(ndev, GID_PTMD0, GID);
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 840478692a37..6c8ba916d1a6 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -3141,7 +3141,7 @@ static const struct net_device_ops sh_eth_netdev_ops = {
.ndo_get_stats = sh_eth_get_stats,
.ndo_set_rx_mode = sh_eth_set_rx_mode,
.ndo_tx_timeout = sh_eth_tx_timeout,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_change_mtu = sh_eth_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
@@ -3157,7 +3157,7 @@ static const struct net_device_ops sh_eth_netdev_ops_tsu = {
.ndo_vlan_rx_add_vid = sh_eth_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = sh_eth_vlan_rx_kill_vid,
.ndo_tx_timeout = sh_eth_tx_timeout,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_change_mtu = sh_eth_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h
index 315a6e5c0f59..e75814a4654f 100644
--- a/drivers/net/ethernet/rocker/rocker.h
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -119,7 +119,8 @@ struct rocker_world_ops {
int (*port_obj_fdb_del)(struct rocker_port *rocker_port,
u16 vid, const unsigned char *addr);
int (*port_master_linked)(struct rocker_port *rocker_port,
- struct net_device *master);
+ struct net_device *master,
+ struct netlink_ext_ack *extack);
int (*port_master_unlinked)(struct rocker_port *rocker_port,
struct net_device *master);
int (*port_neigh_update)(struct rocker_port *rocker_port,
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index a46633606cae..3364b6a56bd1 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -1670,13 +1670,14 @@ rocker_world_port_fdb_del(struct rocker_port *rocker_port,
}
static int rocker_world_port_master_linked(struct rocker_port *rocker_port,
- struct net_device *master)
+ struct net_device *master,
+ struct netlink_ext_ack *extack)
{
struct rocker_world_ops *wops = rocker_port->rocker->wops;
if (!wops->port_master_linked)
return -EOPNOTSUPP;
- return wops->port_master_linked(rocker_port, master);
+ return wops->port_master_linked(rocker_port, master, extack);
}
static int rocker_world_port_master_unlinked(struct rocker_port *rocker_port,
@@ -2715,7 +2716,7 @@ static void
rocker_fdb_offload_notify(struct rocker_port *rocker_port,
struct switchdev_notifier_fdb_info *recv_info)
{
- struct switchdev_notifier_fdb_info info;
+ struct switchdev_notifier_fdb_info info = {};
info.addr = recv_info->addr;
info.vid = recv_info->vid;
@@ -3107,6 +3108,7 @@ struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev,
static int rocker_netdevice_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
+ struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct netdev_notifier_changeupper_info *info;
struct rocker_port *rocker_port;
@@ -3123,7 +3125,8 @@ static int rocker_netdevice_event(struct notifier_block *unused,
rocker_port = netdev_priv(dev);
if (info->linking) {
err = rocker_world_port_master_linked(rocker_port,
- info->upper_dev);
+ info->upper_dev,
+ extack);
if (err)
netdev_warn(dev, "failed to reflect master linked (err %d)\n",
err);
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 967a634ee9ac..3e1ca7a8d029 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -1822,7 +1822,7 @@ static void ofdpa_port_fdb_learn_work(struct work_struct *work)
container_of(work, struct ofdpa_fdb_learn_work, work);
bool removing = (lw->flags & OFDPA_OP_FLAG_REMOVE);
bool learned = (lw->flags & OFDPA_OP_FLAG_LEARNED);
- struct switchdev_notifier_fdb_info info;
+ struct switchdev_notifier_fdb_info info = {};
info.addr = lw->addr;
info.vid = lw->vid;
@@ -2571,8 +2571,10 @@ static int ofdpa_port_obj_fdb_del(struct rocker_port *rocker_port,
}
static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port,
- struct net_device *bridge)
+ struct net_device *bridge,
+ struct netlink_ext_ack *extack)
{
+ struct net_device *dev = ofdpa_port->dev;
int err;
/* Port is joining bridge, so the internal VLAN for the
@@ -2592,13 +2594,21 @@ static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port,
ofdpa_port->bridge_dev = bridge;
- return ofdpa_port_vlan_add(ofdpa_port, OFDPA_UNTAGGED_VID, 0);
+ err = ofdpa_port_vlan_add(ofdpa_port, OFDPA_UNTAGGED_VID, 0);
+ if (err)
+ return err;
+
+ return switchdev_bridge_port_offload(dev, dev, NULL, NULL, NULL,
+ false, extack);
}
static int ofdpa_port_bridge_leave(struct ofdpa_port *ofdpa_port)
{
+ struct net_device *dev = ofdpa_port->dev;
int err;
+ switchdev_bridge_port_unoffload(dev, NULL, NULL, NULL);
+
err = ofdpa_port_vlan_del(ofdpa_port, OFDPA_UNTAGGED_VID, 0);
if (err)
return err;
@@ -2637,13 +2647,14 @@ static int ofdpa_port_ovs_changed(struct ofdpa_port *ofdpa_port,
}
static int ofdpa_port_master_linked(struct rocker_port *rocker_port,
- struct net_device *master)
+ struct net_device *master,
+ struct netlink_ext_ack *extack)
{
struct ofdpa_port *ofdpa_port = rocker_port->wpriv;
int err = 0;
if (netif_is_bridge_master(master))
- err = ofdpa_port_bridge_join(ofdpa_port, master);
+ err = ofdpa_port_bridge_join(ofdpa_port, master, extack);
else if (netif_is_ovs_master(master))
err = ofdpa_port_ovs_changed(ofdpa_port, master);
return err;
diff --git a/drivers/net/ethernet/samsung/Kconfig b/drivers/net/ethernet/samsung/Kconfig
index 0582e110b1c0..2a6c2658d284 100644
--- a/drivers/net/ethernet/samsung/Kconfig
+++ b/drivers/net/ethernet/samsung/Kconfig
@@ -20,9 +20,9 @@ if NET_VENDOR_SAMSUNG
config SXGBE_ETH
tristate "Samsung 10G/2.5G/1G SXGBE Ethernet driver"
depends on HAS_IOMEM && HAS_DMA
+ depends on PTP_1588_CLOCK_OPTIONAL
select PHYLIB
select CRC32
- imply PTP_1588_CLOCK
help
This is the driver for the SXGBE 10G Ethernet IP block found on
Samsung platforms.
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
index 7f8b10c49660..98edb01024f0 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
@@ -274,7 +274,9 @@ static u32 sxgbe_usec2riwt(u32 usec, struct sxgbe_priv_data *priv)
}
static int sxgbe_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct sxgbe_priv_data *priv = netdev_priv(dev);
@@ -285,7 +287,9 @@ static int sxgbe_get_coalesce(struct net_device *dev,
}
static int sxgbe_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct sxgbe_priv_data *priv = netdev_priv(dev);
unsigned int rx_riwt;
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index 090bcd2fb758..6781aa636d58 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -1964,7 +1964,7 @@ static const struct net_device_ops sxgbe_netdev_ops = {
.ndo_set_features = sxgbe_set_features,
.ndo_set_rx_mode = sxgbe_set_rx_mode,
.ndo_tx_timeout = sxgbe_tx_timeout,
- .ndo_do_ioctl = sxgbe_ioctl,
+ .ndo_eth_ioctl = sxgbe_ioctl,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = sxgbe_poll_controller,
#endif
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index 5e37c8313725..97ce64079855 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig
@@ -19,9 +19,9 @@ if NET_VENDOR_SOLARFLARE
config SFC
tristate "Solarflare SFC9000/SFC9100/EF100-family support"
depends on PCI
+ depends on PTP_1588_CLOCK_OPTIONAL
select MDIO
select CRC32
- imply PTP_1588_CLOCK
help
This driver supports 10/40-gigabit Ethernet cards based on
the Solarflare SFC9000-family and SFC9100-family controllers.
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 37fcf2eb0741..a295e2621cf3 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -591,7 +591,7 @@ static const struct net_device_ops efx_netdev_ops = {
.ndo_tx_timeout = efx_watchdog,
.ndo_start_xmit = efx_hard_start_xmit,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = efx_ioctl,
+ .ndo_eth_ioctl = efx_ioctl,
.ndo_change_mtu = efx_change_mtu,
.ndo_set_mac_address = efx_set_mac_address,
.ndo_set_rx_mode = efx_set_rx_mode,
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 058d9fe41d99..e002ce21788d 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -97,7 +97,9 @@ static void efx_ethtool_get_regs(struct net_device *net_dev,
*/
static int efx_ethtool_get_coalesce(struct net_device *net_dev,
- struct ethtool_coalesce *coalesce)
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct efx_nic *efx = netdev_priv(net_dev);
unsigned int tx_usecs, rx_usecs;
@@ -115,7 +117,9 @@ static int efx_ethtool_get_coalesce(struct net_device *net_dev,
}
static int efx_ethtool_set_coalesce(struct net_device *net_dev,
- struct ethtool_coalesce *coalesce)
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct efx_nic *efx = netdev_priv(net_dev);
struct efx_channel *channel;
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index 9ec752a43c75..c177ea0f301e 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -2219,7 +2219,7 @@ static const struct net_device_ops ef4_netdev_ops = {
.ndo_tx_timeout = ef4_watchdog,
.ndo_start_xmit = ef4_hard_start_xmit,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = ef4_ioctl,
+ .ndo_eth_ioctl = ef4_ioctl,
.ndo_change_mtu = ef4_change_mtu,
.ndo_set_mac_address = ef4_set_mac_address,
.ndo_set_rx_mode = ef4_set_rx_mode,
diff --git a/drivers/net/ethernet/sfc/falcon/ethtool.c b/drivers/net/ethernet/sfc/falcon/ethtool.c
index a6bae6a234ba..137e8a7aeaa1 100644
--- a/drivers/net/ethernet/sfc/falcon/ethtool.c
+++ b/drivers/net/ethernet/sfc/falcon/ethtool.c
@@ -577,7 +577,9 @@ static int ef4_ethtool_nway_reset(struct net_device *net_dev)
*/
static int ef4_ethtool_get_coalesce(struct net_device *net_dev,
- struct ethtool_coalesce *coalesce)
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct ef4_nic *efx = netdev_priv(net_dev);
unsigned int tx_usecs, rx_usecs;
@@ -595,7 +597,9 @@ static int ef4_ethtool_get_coalesce(struct net_device *net_dev,
}
static int ef4_ethtool_set_coalesce(struct net_device *net_dev,
- struct ethtool_coalesce *coalesce)
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct ef4_nic *efx = netdev_priv(net_dev);
struct ef4_channel *channel;
diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index 2b29fd4cbdf4..062f7844c496 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -820,7 +820,7 @@ static const struct net_device_ops ioc3_netdev_ops = {
.ndo_tx_timeout = ioc3_timeout,
.ndo_get_stats = ioc3_get_stats,
.ndo_set_rx_mode = ioc3_set_multicast_list,
- .ndo_do_ioctl = ioc3_ioctl,
+ .ndo_eth_ioctl = ioc3_ioctl,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = ioc3_set_mac_address,
};
diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c
index 0c396ecd3389..efce834d8ee6 100644
--- a/drivers/net/ethernet/sgi/meth.c
+++ b/drivers/net/ethernet/sgi/meth.c
@@ -812,7 +812,7 @@ static const struct net_device_ops meth_netdev_ops = {
.ndo_open = meth_open,
.ndo_stop = meth_release,
.ndo_start_xmit = meth_tx,
- .ndo_do_ioctl = meth_ioctl,
+ .ndo_eth_ioctl = meth_ioctl,
.ndo_tx_timeout = meth_tx_timeout,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c
index 676b193833c0..3d1a18a01ce5 100644
--- a/drivers/net/ethernet/sis/sis190.c
+++ b/drivers/net/ethernet/sis/sis190.c
@@ -1841,7 +1841,7 @@ static int sis190_mac_addr(struct net_device *dev, void *p)
static const struct net_device_ops sis190_netdev_ops = {
.ndo_open = sis190_open,
.ndo_stop = sis190_close,
- .ndo_do_ioctl = sis190_ioctl,
+ .ndo_eth_ioctl = sis190_ioctl,
.ndo_start_xmit = sis190_start_xmit,
.ndo_tx_timeout = sis190_tx_timeout,
.ndo_set_rx_mode = sis190_set_rx_mode,
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index cff87de9178a..60a0c0e9ded2 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -404,7 +404,7 @@ static const struct net_device_ops sis900_netdev_ops = {
.ndo_set_rx_mode = set_rx_mode,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
- .ndo_do_ioctl = mii_ioctl,
+ .ndo_eth_ioctl = mii_ioctl,
.ndo_tx_timeout = sis900_tx_timeout,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = sis900_poll,
diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig
index c52a38df0e0d..72e42a868346 100644
--- a/drivers/net/ethernet/smsc/Kconfig
+++ b/drivers/net/ethernet/smsc/Kconfig
@@ -23,6 +23,7 @@ config SMC9194
tristate "SMC 9194 support"
depends on ISA
select CRC32
+ select NETDEV_LEGACY_INIT
help
This is support for the SMC9xxx based Ethernet cards. Choose this
option if you have a DELL laptop with the docking station, or
diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c
index 51cd7dca91cd..44daf79a8f97 100644
--- a/drivers/net/ethernet/smsc/epic100.c
+++ b/drivers/net/ethernet/smsc/epic100.c
@@ -312,7 +312,7 @@ static const struct net_device_ops epic_netdev_ops = {
.ndo_tx_timeout = epic_tx_timeout,
.ndo_get_stats = epic_get_stats,
.ndo_set_rx_mode = set_rx_mode,
- .ndo_do_ioctl = netdev_ioctl,
+ .ndo_eth_ioctl = netdev_ioctl,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
};
diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c
index bf7c8c8b1350..0ce403fa5f1a 100644
--- a/drivers/net/ethernet/smsc/smc9194.c
+++ b/drivers/net/ethernet/smsc/smc9194.c
@@ -1508,7 +1508,7 @@ MODULE_PARM_DESC(io, "SMC 99194 I/O base address");
MODULE_PARM_DESC(irq, "SMC 99194 IRQ number");
MODULE_PARM_DESC(ifport, "SMC 99194 interface port (0-default, 1-TP, 2-AUI)");
-int __init init_module(void)
+static int __init smc_init_module(void)
{
if (io == 0)
printk(KERN_WARNING
@@ -1518,13 +1518,15 @@ int __init init_module(void)
devSMC9194 = smc_init(-1);
return PTR_ERR_OR_ZERO(devSMC9194);
}
+module_init(smc_init_module);
-void __exit cleanup_module(void)
+static void __exit smc_cleanup_module(void)
{
unregister_netdev(devSMC9194);
free_irq(devSMC9194->irq, devSMC9194);
release_region(devSMC9194->base_addr, SMC_IO_EXTENT);
free_netdev(devSMC9194);
}
+module_exit(smc_cleanup_module);
#endif /* MODULE */
diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c
index f2a50eb3c1e0..42fc37c7887a 100644
--- a/drivers/net/ethernet/smsc/smc91c92_cs.c
+++ b/drivers/net/ethernet/smsc/smc91c92_cs.c
@@ -294,7 +294,7 @@ static const struct net_device_ops smc_netdev_ops = {
.ndo_tx_timeout = smc_tx_timeout,
.ndo_set_config = s9k_config,
.ndo_set_rx_mode = set_rx_mode,
- .ndo_do_ioctl = smc_ioctl,
+ .ndo_eth_ioctl = smc_ioctl,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
};
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 556a9790cdcf..199a97339280 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -2148,7 +2148,7 @@ static const struct net_device_ops smsc911x_netdev_ops = {
.ndo_start_xmit = smsc911x_hard_start_xmit,
.ndo_get_stats = smsc911x_get_stats,
.ndo_set_rx_mode = smsc911x_set_multicast_list,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = smsc911x_set_mac_address,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c
index c1dab009415d..fdbd2a43e267 100644
--- a/drivers/net/ethernet/smsc/smsc9420.c
+++ b/drivers/net/ethernet/smsc/smsc9420.c
@@ -1482,7 +1482,7 @@ static const struct net_device_ops smsc9420_netdev_ops = {
.ndo_start_xmit = smsc9420_hard_start_xmit,
.ndo_get_stats = smsc9420_get_stats,
.ndo_set_rx_mode = smsc9420_set_multicast_list,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index 20d148c019d8..1f46af136aa8 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -532,7 +532,9 @@ static void netsec_et_get_drvinfo(struct net_device *net_device,
}
static int netsec_et_get_coalesce(struct net_device *net_device,
- struct ethtool_coalesce *et_coalesce)
+ struct ethtool_coalesce *et_coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct netsec_priv *priv = netdev_priv(net_device);
@@ -542,7 +544,9 @@ static int netsec_et_get_coalesce(struct net_device *net_device,
}
static int netsec_et_set_coalesce(struct net_device *net_device,
- struct ethtool_coalesce *et_coalesce)
+ struct ethtool_coalesce *et_coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct netsec_priv *priv = netdev_priv(net_device);
@@ -1544,7 +1548,7 @@ static int netsec_start_gmac(struct netsec_priv *priv)
netsec_write(priv, NETSEC_REG_NRM_RX_INTEN_CLR, ~0);
netsec_write(priv, NETSEC_REG_NRM_TX_INTEN_CLR, ~0);
- netsec_et_set_coalesce(priv->ndev, &priv->et_coalesce);
+ netsec_et_set_coalesce(priv->ndev, &priv->et_coalesce, NULL, NULL);
if (netsec_mac_write(priv, GMAC_REG_OMR, value))
return -ETIMEDOUT;
@@ -1831,7 +1835,7 @@ static const struct net_device_ops netsec_netdev_ops = {
.ndo_set_features = netsec_netdev_set_features,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = phy_do_ioctl,
+ .ndo_eth_ioctl = phy_do_ioctl,
.ndo_xdp_xmit = netsec_xdp_xmit,
.ndo_bpf = netsec_xdp,
};
diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index 5eb6bb4f7b6c..ae31ed93aaf0 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -1543,7 +1543,7 @@ static const struct net_device_ops ave_netdev_ops = {
.ndo_open = ave_open,
.ndo_stop = ave_stop,
.ndo_start_xmit = ave_start_xmit,
- .ndo_do_ioctl = ave_ioctl,
+ .ndo_eth_ioctl = ave_ioctl,
.ndo_set_rx_mode = ave_set_rx_mode,
.ndo_get_stats64 = ave_get_stats64,
.ndo_set_mac_address = ave_set_mac_address,
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index ac3c248d4f9b..929cfc22cd0c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -2,12 +2,12 @@
config STMMAC_ETH
tristate "STMicroelectronics Multi-Gigabit Ethernet driver"
depends on HAS_IOMEM && HAS_DMA
+ depends on PTP_1588_CLOCK_OPTIONAL
select MII
select PCS_XPCS
select PAGE_POOL
select PHYLINK
select CRC32
- imply PTP_1588_CLOCK
select RESET_CONTROLLER
help
This is the driver for the Ethernet IPs built around a
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 5fecc83f175b..b6d945ea903d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -58,6 +58,16 @@
#undef FRAME_FILTER_DEBUG
/* #define FRAME_FILTER_DEBUG */
+struct stmmac_txq_stats {
+ unsigned long tx_pkt_n;
+ unsigned long tx_normal_irq_n;
+};
+
+struct stmmac_rxq_stats {
+ unsigned long rx_pkt_n;
+ unsigned long rx_normal_irq_n;
+};
+
/* Extra statistic and debug information exposed by ethtool */
struct stmmac_extra_stats {
/* Transmit errors */
@@ -189,6 +199,9 @@ struct stmmac_extra_stats {
unsigned long mtl_est_hlbf;
unsigned long mtl_est_btre;
unsigned long mtl_est_btrlm;
+ /* per queue statistics */
+ struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES];
+ struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES];
};
/* Safety Feature statistics exposed by ethtool */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index 28dd0ed85a82..f7dc8458cde8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -289,10 +289,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
val &= ~NSS_COMMON_GMAC_CTL_PHY_IFACE_SEL;
break;
default:
- dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
- phy_modes(gmac->phy_mode));
- err = -EINVAL;
- goto err_remove_config_dt;
+ goto err_unsupported_phy;
}
regmap_write(gmac->nss_common, NSS_COMMON_GMAC_CTL(gmac->id), val);
@@ -309,10 +306,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
NSS_COMMON_CLK_SRC_CTRL_OFFSET(gmac->id);
break;
default:
- dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
- phy_modes(gmac->phy_mode));
- err = -EINVAL;
- goto err_remove_config_dt;
+ goto err_unsupported_phy;
}
regmap_write(gmac->nss_common, NSS_COMMON_CLK_SRC_CTRL, val);
@@ -329,8 +323,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
NSS_COMMON_CLK_GATE_GMII_TX_EN(gmac->id);
break;
default:
- /* We don't get here; the switch above will have errored out */
- unreachable();
+ goto err_unsupported_phy;
}
regmap_write(gmac->nss_common, NSS_COMMON_CLK_GATE, val);
@@ -361,6 +354,11 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
return 0;
+err_unsupported_phy:
+ dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
+ phy_modes(gmac->phy_mode));
+ err = -EINVAL;
+
err_remove_config_dt:
stmmac_remove_config_dt(pdev, plat_dat);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 280ac0129572..ed817011a94a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -21,7 +21,6 @@
#include <linux/delay.h>
#include <linux/mfd/syscon.h>
#include <linux/regmap.h>
-#include <linux/pm_runtime.h>
#include "stmmac_platform.h"
@@ -1529,9 +1528,6 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
return ret;
}
- pm_runtime_enable(dev);
- pm_runtime_get_sync(dev);
-
if (bsp_priv->integrated_phy)
rk_gmac_integrated_phy_powerup(bsp_priv);
@@ -1540,14 +1536,9 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
static void rk_gmac_powerdown(struct rk_priv_data *gmac)
{
- struct device *dev = &gmac->pdev->dev;
-
if (gmac->integrated_phy)
rk_gmac_integrated_phy_powerdown(gmac);
- pm_runtime_put_sync(dev);
- pm_runtime_disable(dev);
-
phy_power_on(gmac, false);
gmac_clk_enable(gmac, false);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index e63270267578..9292a1fab7d3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -170,13 +170,16 @@ int dwmac4_dma_interrupt(void __iomem *ioaddr,
x->normal_irq_n++;
if (likely(intr_status & DMA_CHAN_STATUS_RI)) {
x->rx_normal_irq_n++;
+ x->rxq_stats[chan].rx_normal_irq_n++;
ret |= handle_rx;
}
- if (likely(intr_status & (DMA_CHAN_STATUS_TI |
- DMA_CHAN_STATUS_TBU))) {
+ if (likely(intr_status & DMA_CHAN_STATUS_TI)) {
x->tx_normal_irq_n++;
+ x->txq_stats[chan].tx_normal_irq_n++;
ret |= handle_tx;
}
+ if (unlikely(intr_status & DMA_CHAN_STATUS_TBU))
+ ret |= handle_tx;
if (unlikely(intr_status & DMA_CHAN_STATUS_ERI))
x->rx_early_irq++;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index fcdb1d20389b..43eead726886 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -339,9 +339,9 @@ static inline bool stmmac_xdp_is_enabled(struct stmmac_priv *priv)
static inline unsigned int stmmac_rx_offset(struct stmmac_priv *priv)
{
if (stmmac_xdp_is_enabled(priv))
- return XDP_PACKET_HEADROOM + NET_IP_ALIGN;
+ return XDP_PACKET_HEADROOM;
- return NET_SKB_PAD + NET_IP_ALIGN;
+ return 0;
}
void stmmac_disable_rx_queue(struct stmmac_priv *priv, u32 queue);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index d0ce608b81c3..d89455803bed 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -261,6 +261,18 @@ static const struct stmmac_stats stmmac_mmc[] = {
};
#define STMMAC_MMC_STATS_LEN ARRAY_SIZE(stmmac_mmc)
+static const char stmmac_qstats_tx_string[][ETH_GSTRING_LEN] = {
+ "tx_pkt_n",
+ "tx_irq_n",
+#define STMMAC_TXQ_STATS ARRAY_SIZE(stmmac_qstats_tx_string)
+};
+
+static const char stmmac_qstats_rx_string[][ETH_GSTRING_LEN] = {
+ "rx_pkt_n",
+ "rx_irq_n",
+#define STMMAC_RXQ_STATS ARRAY_SIZE(stmmac_qstats_rx_string)
+};
+
static void stmmac_ethtool_getdrvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
{
@@ -510,6 +522,31 @@ stmmac_set_pauseparam(struct net_device *netdev,
}
}
+static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data)
+{
+ u32 tx_cnt = priv->plat->tx_queues_to_use;
+ u32 rx_cnt = priv->plat->rx_queues_to_use;
+ int q, stat;
+ char *p;
+
+ for (q = 0; q < tx_cnt; q++) {
+ p = (char *)priv + offsetof(struct stmmac_priv,
+ xstats.txq_stats[q].tx_pkt_n);
+ for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) {
+ *data++ = (*(u64 *)p);
+ p += sizeof(u64 *);
+ }
+ }
+ for (q = 0; q < rx_cnt; q++) {
+ p = (char *)priv + offsetof(struct stmmac_priv,
+ xstats.rxq_stats[q].rx_pkt_n);
+ for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) {
+ *data++ = (*(u64 *)p);
+ p += sizeof(u64 *);
+ }
+ }
+}
+
static void stmmac_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *dummy, u64 *data)
{
@@ -560,16 +597,21 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
data[j++] = (stmmac_gstrings_stats[i].sizeof_stat ==
sizeof(u64)) ? (*(u64 *)p) : (*(u32 *)p);
}
+ stmmac_get_per_qstats(priv, &data[j]);
}
static int stmmac_get_sset_count(struct net_device *netdev, int sset)
{
struct stmmac_priv *priv = netdev_priv(netdev);
+ u32 tx_cnt = priv->plat->tx_queues_to_use;
+ u32 rx_cnt = priv->plat->rx_queues_to_use;
int i, len, safety_len = 0;
switch (sset) {
case ETH_SS_STATS:
- len = STMMAC_STATS_LEN;
+ len = STMMAC_STATS_LEN +
+ STMMAC_TXQ_STATS * tx_cnt +
+ STMMAC_RXQ_STATS * rx_cnt;
if (priv->dma_cap.rmon)
len += STMMAC_MMC_STATS_LEN;
@@ -592,6 +634,28 @@ static int stmmac_get_sset_count(struct net_device *netdev, int sset)
}
}
+static void stmmac_get_qstats_string(struct stmmac_priv *priv, u8 *data)
+{
+ u32 tx_cnt = priv->plat->tx_queues_to_use;
+ u32 rx_cnt = priv->plat->rx_queues_to_use;
+ int q, stat;
+
+ for (q = 0; q < tx_cnt; q++) {
+ for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) {
+ snprintf(data, ETH_GSTRING_LEN, "q%d_%s", q,
+ stmmac_qstats_tx_string[stat]);
+ data += ETH_GSTRING_LEN;
+ }
+ }
+ for (q = 0; q < rx_cnt; q++) {
+ for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) {
+ snprintf(data, ETH_GSTRING_LEN, "q%d_%s", q,
+ stmmac_qstats_rx_string[stat]);
+ data += ETH_GSTRING_LEN;
+ }
+ }
+}
+
static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data)
{
int i;
@@ -622,6 +686,7 @@ static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data)
ETH_GSTRING_LEN);
p += ETH_GSTRING_LEN;
}
+ stmmac_get_qstats_string(priv, p);
break;
case ETH_SS_TEST:
stmmac_selftest_get_strings(priv, p);
@@ -809,7 +874,9 @@ static int __stmmac_get_coalesce(struct net_device *dev,
}
static int stmmac_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
return __stmmac_get_coalesce(dev, ec, -1);
}
@@ -893,7 +960,9 @@ static int __stmmac_set_coalesce(struct net_device *dev,
}
static int stmmac_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
return __stmmac_set_coalesce(dev, ec, -1);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 7b8404a21544..ed0cd3920171 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2500,6 +2500,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
} else {
priv->dev->stats.tx_packets++;
priv->xstats.tx_pkt_n++;
+ priv->xstats.txq_stats[queue].tx_pkt_n++;
}
if (skb)
stmmac_get_tx_hwtstamp(priv, p, skb);
@@ -4914,6 +4915,10 @@ read_again:
prefetch(np);
+ /* Ensure a valid XSK buffer before proceed */
+ if (!buf->xdp)
+ break;
+
if (priv->extend_desc)
stmmac_rx_extended_status(priv, &priv->dev->stats,
&priv->xstats,
@@ -4934,10 +4939,6 @@ read_again:
continue;
}
- /* Ensure a valid XSK buffer before proceed */
- if (!buf->xdp)
- break;
-
/* XSK pool expects RX frame 1:1 mapped to XSK buffer */
if (likely(status & rx_not_ls)) {
xsk_buff_free(buf->xdp);
@@ -5000,6 +5001,9 @@ read_again:
stmmac_finalize_xdp_rx(priv, xdp_status);
+ priv->xstats.rx_pkt_n += count;
+ priv->xstats.rxq_stats[queue].rx_pkt_n += count;
+
if (xsk_uses_need_wakeup(rx_q->xsk_pool)) {
if (failure || stmmac_rx_dirty(priv, queue) > 0)
xsk_set_rx_need_wakeup(rx_q->xsk_pool);
@@ -5287,6 +5291,7 @@ drain_data:
stmmac_rx_refill(priv, queue);
priv->xstats.rx_pkt_n += count;
+ priv->xstats.rxq_stats[queue].rx_pkt_n += count;
return count;
}
@@ -6451,7 +6456,7 @@ static const struct net_device_ops stmmac_netdev_ops = {
.ndo_set_features = stmmac_set_features,
.ndo_set_rx_mode = stmmac_set_rx_mode,
.ndo_tx_timeout = stmmac_tx_timeout,
- .ndo_do_ioctl = stmmac_ioctl,
+ .ndo_eth_ioctl = stmmac_ioctl,
.ndo_setup_tc = stmmac_setup_tc,
.ndo_select_queue = stmmac_select_queue,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 4f3b6437b114..8160087ee92f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -884,11 +884,13 @@ static int tc_setup_taprio(struct stmmac_priv *priv,
return 0;
disable:
- mutex_lock(&priv->plat->est->lock);
- priv->plat->est->enable = false;
- stmmac_est_configure(priv, priv->ioaddr, priv->plat->est,
- priv->plat->clk_ptp_rate);
- mutex_unlock(&priv->plat->est->lock);
+ if (priv->plat->est) {
+ mutex_lock(&priv->plat->est->lock);
+ priv->plat->est->enable = false;
+ stmmac_est_configure(priv, priv->ioaddr, priv->plat->est,
+ priv->plat->clk_ptp_rate);
+ mutex_unlock(&priv->plat->est->lock);
+ }
priv->plat->fpe_cfg->enable = false;
stmmac_fpe_configure(priv, priv->ioaddr,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
index 105821b53020..2a616c6f7cd0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
@@ -34,18 +34,18 @@ static int stmmac_xdp_enable_pool(struct stmmac_priv *priv,
need_update = netif_running(priv->dev) && stmmac_xdp_is_enabled(priv);
if (need_update) {
- stmmac_disable_rx_queue(priv, queue);
- stmmac_disable_tx_queue(priv, queue);
napi_disable(&ch->rx_napi);
napi_disable(&ch->tx_napi);
+ stmmac_disable_rx_queue(priv, queue);
+ stmmac_disable_tx_queue(priv, queue);
}
set_bit(queue, priv->af_xdp_zc_qps);
if (need_update) {
- napi_enable(&ch->rxtx_napi);
stmmac_enable_rx_queue(priv, queue);
stmmac_enable_tx_queue(priv, queue);
+ napi_enable(&ch->rxtx_napi);
err = stmmac_xsk_wakeup(priv->dev, queue, XDP_WAKEUP_RX);
if (err)
@@ -72,10 +72,10 @@ static int stmmac_xdp_disable_pool(struct stmmac_priv *priv, u16 queue)
need_update = netif_running(priv->dev) && stmmac_xdp_is_enabled(priv);
if (need_update) {
+ napi_disable(&ch->rxtx_napi);
stmmac_disable_rx_queue(priv, queue);
stmmac_disable_tx_queue(priv, queue);
synchronize_rcu();
- napi_disable(&ch->rxtx_napi);
}
xsk_pool_dma_unmap(pool, STMMAC_RX_DMA_ATTR);
@@ -83,10 +83,10 @@ static int stmmac_xdp_disable_pool(struct stmmac_priv *priv, u16 queue)
clear_bit(queue, priv->af_xdp_zc_qps);
if (need_update) {
- napi_enable(&ch->rx_napi);
- napi_enable(&ch->tx_napi);
stmmac_enable_rx_queue(priv, queue);
stmmac_enable_tx_queue(priv, queue);
+ napi_enable(&ch->rx_napi);
+ napi_enable(&ch->tx_napi);
}
return 0;
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 981685c88308..287ae4c538aa 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -4876,7 +4876,7 @@ static const struct net_device_ops cas_netdev_ops = {
.ndo_start_xmit = cas_start_xmit,
.ndo_get_stats = cas_get_stats,
.ndo_set_rx_mode = cas_set_multicast,
- .ndo_do_ioctl = cas_ioctl,
+ .ndo_eth_ioctl = cas_ioctl,
.ndo_tx_timeout = cas_tx_timeout,
.ndo_change_mtu = cas_change_mtu,
.ndo_set_mac_address = eth_mac_addr,
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 860644d182ab..a68a01d1b2b1 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -9208,7 +9208,7 @@ static int niu_get_of_props(struct niu *np)
else
dp = pci_device_to_OF_node(np->pdev);
- phy_type = of_get_property(dp, "phy-type", &prop_len);
+ phy_type = of_get_property(dp, "phy-type", NULL);
if (!phy_type) {
netdev_err(dev, "%pOF: OF node lacks phy-type property\n", dp);
return -EINVAL;
@@ -9242,12 +9242,12 @@ static int niu_get_of_props(struct niu *np)
return -EINVAL;
}
- model = of_get_property(dp, "model", &prop_len);
+ model = of_get_property(dp, "model", NULL);
if (model)
strcpy(np->vpd.model, model);
- if (of_find_property(dp, "hot-swappable-phy", &prop_len)) {
+ if (of_find_property(dp, "hot-swappable-phy", NULL)) {
np->flags |= (NIU_FLAGS_10G | NIU_FLAGS_FIBER |
NIU_FLAGS_HOTPLUG_PHY);
}
@@ -9668,7 +9668,7 @@ static const struct net_device_ops niu_netdev_ops = {
.ndo_set_rx_mode = niu_set_rx_mode,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = niu_set_mac_addr,
- .ndo_do_ioctl = niu_ioctl,
+ .ndo_eth_ioctl = niu_ioctl,
.ndo_tx_timeout = niu_tx_timeout,
.ndo_change_mtu = niu_change_mtu,
};
@@ -9722,7 +9722,6 @@ static int niu_pci_init_one(struct pci_dev *pdev,
struct net_device *dev;
struct niu *np;
int err;
- u64 dma_mask;
niu_driver_version();
@@ -9777,18 +9776,11 @@ static int niu_pci_init_one(struct pci_dev *pdev,
PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE |
PCI_EXP_DEVCTL_RELAX_EN);
- dma_mask = DMA_BIT_MASK(44);
- err = pci_set_dma_mask(pdev, dma_mask);
- if (!err) {
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
+ if (!err)
dev->features |= NETIF_F_HIGHDMA;
- err = pci_set_consistent_dma_mask(pdev, dma_mask);
- if (err) {
- dev_err(&pdev->dev, "Unable to obtain 44 bit DMA for consistent allocations, aborting\n");
- goto err_out_release_parent;
- }
- }
if (err) {
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "No usable DMA configuration, aborting\n");
goto err_out_release_parent;
diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index cfb9e21b18b7..d72018a60c0f 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -2831,7 +2831,7 @@ static const struct net_device_ops gem_netdev_ops = {
.ndo_start_xmit = gem_start_xmit,
.ndo_get_stats = gem_get_stats,
.ndo_set_rx_mode = gem_set_multicast,
- .ndo_do_ioctl = gem_ioctl,
+ .ndo_eth_ioctl = gem_ioctl,
.ndo_tx_timeout = gem_tx_timeout,
.ndo_change_mtu = gem_change_mtu,
.ndo_validate_addr = eth_validate_addr,
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index a2c1a404c52d..62f81b0d14ed 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -251,14 +251,6 @@ static u32 pci_hme_read_desc32(hme32 *p)
((__hp)->write_txd((__txd), (__flags), (__addr)))
#define hme_read_desc32(__hp, __p) \
((__hp)->read_desc32(__p))
-#define hme_dma_map(__hp, __ptr, __size, __dir) \
- ((__hp)->dma_map((__hp)->dma_dev, (__ptr), (__size), (__dir)))
-#define hme_dma_unmap(__hp, __addr, __size, __dir) \
- ((__hp)->dma_unmap((__hp)->dma_dev, (__addr), (__size), (__dir)))
-#define hme_dma_sync_for_cpu(__hp, __addr, __size, __dir) \
- ((__hp)->dma_sync_for_cpu((__hp)->dma_dev, (__addr), (__size), (__dir)))
-#define hme_dma_sync_for_device(__hp, __addr, __size, __dir) \
- ((__hp)->dma_sync_for_device((__hp)->dma_dev, (__addr), (__size), (__dir)))
#else
#ifdef CONFIG_SBUS
/* SBUS only compilation */
@@ -277,14 +269,6 @@ do { (__txd)->tx_addr = (__force hme32)(u32)(__addr); \
(__txd)->tx_flags = (__force hme32)(u32)(__flags); \
} while(0)
#define hme_read_desc32(__hp, __p) ((__force u32)(hme32)*(__p))
-#define hme_dma_map(__hp, __ptr, __size, __dir) \
- dma_map_single((__hp)->dma_dev, (__ptr), (__size), (__dir))
-#define hme_dma_unmap(__hp, __addr, __size, __dir) \
- dma_unmap_single((__hp)->dma_dev, (__addr), (__size), (__dir))
-#define hme_dma_sync_for_cpu(__hp, __addr, __size, __dir) \
- dma_dma_sync_single_for_cpu((__hp)->dma_dev, (__addr), (__size), (__dir))
-#define hme_dma_sync_for_device(__hp, __addr, __size, __dir) \
- dma_dma_sync_single_for_device((__hp)->dma_dev, (__addr), (__size), (__dir))
#else
/* PCI only compilation */
#define hme_write32(__hp, __reg, __val) \
@@ -305,14 +289,6 @@ static inline u32 hme_read_desc32(struct happy_meal *hp, hme32 *p)
{
return le32_to_cpup((__le32 *)p);
}
-#define hme_dma_map(__hp, __ptr, __size, __dir) \
- pci_map_single((__hp)->dma_dev, (__ptr), (__size), (__dir))
-#define hme_dma_unmap(__hp, __addr, __size, __dir) \
- pci_unmap_single((__hp)->dma_dev, (__addr), (__size), (__dir))
-#define hme_dma_sync_for_cpu(__hp, __addr, __size, __dir) \
- pci_dma_sync_single_for_cpu((__hp)->dma_dev, (__addr), (__size), (__dir))
-#define hme_dma_sync_for_device(__hp, __addr, __size, __dir) \
- pci_dma_sync_single_for_device((__hp)->dma_dev, (__addr), (__size), (__dir))
#endif
#endif
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-ethtool.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-ethtool.c
index bc198eadfcab..49f8c6be9459 100644
--- a/drivers/net/ethernet/synopsys/dwc-xlgmac-ethtool.c
+++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-ethtool.c
@@ -146,8 +146,11 @@ static void xlgmac_ethtool_get_channels(struct net_device *netdev,
channel->tx_count = pdata->tx_q_count;
}
-static int xlgmac_ethtool_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+static int
+xlgmac_ethtool_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct xlgmac_pdata *pdata = netdev_priv(netdev);
@@ -158,8 +161,11 @@ static int xlgmac_ethtool_get_coalesce(struct net_device *netdev,
return 0;
}
-static int xlgmac_ethtool_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *ec)
+static int
+xlgmac_ethtool_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct xlgmac_pdata *pdata = netdev_priv(netdev);
struct xlgmac_hw_ops *hw_ops = &pdata->hw_ops;
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
index 26d178f8616b..1db7104fef3a 100644
--- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
+++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
@@ -933,7 +933,7 @@ static const struct net_device_ops xlgmac_netdev_ops = {
.ndo_change_mtu = xlgmac_change_mtu,
.ndo_set_mac_address = xlgmac_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = xlgmac_ioctl,
+ .ndo_eth_ioctl = xlgmac_ioctl,
.ndo_vlan_rx_add_vid = xlgmac_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = xlgmac_vlan_rx_kill_vid,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index d054c6e83b1c..6b409f9c5863 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c
@@ -637,7 +637,8 @@ static int bdx_range_check(struct bdx_priv *priv, u32 offset)
-EINVAL : 0;
}
-static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd)
+static int bdx_siocdevprivate(struct net_device *ndev, struct ifreq *ifr,
+ void __user *udata, int cmd)
{
struct bdx_priv *priv = netdev_priv(ndev);
u32 data[3];
@@ -647,7 +648,7 @@ static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd)
DBG("jiffies=%ld cmd=%d\n", jiffies, cmd);
if (cmd != SIOCDEVPRIVATE) {
- error = copy_from_user(data, ifr->ifr_data, sizeof(data));
+ error = copy_from_user(data, udata, sizeof(data));
if (error) {
pr_err("can't copy from user\n");
RET(-EFAULT);
@@ -669,7 +670,7 @@ static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd)
data[2] = READ_REG(priv, data[1]);
DBG("read_reg(0x%x)=0x%x (dec %d)\n", data[1], data[2],
data[2]);
- error = copy_to_user(ifr->ifr_data, data, sizeof(data));
+ error = copy_to_user(udata, data, sizeof(data));
if (error)
RET(-EFAULT);
break;
@@ -688,15 +689,6 @@ static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd)
return 0;
}
-static int bdx_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd)
-{
- ENTER;
- if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
- RET(bdx_ioctl_priv(ndev, ifr, cmd));
- else
- RET(-EOPNOTSUPP);
-}
-
/**
* __bdx_vlan_rx_vid - private helper for adding/killing VLAN vid
* @ndev: network device
@@ -1860,7 +1852,7 @@ static const struct net_device_ops bdx_netdev_ops = {
.ndo_stop = bdx_close,
.ndo_start_xmit = bdx_tx_transmit,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = bdx_ioctl,
+ .ndo_siocdevprivate = bdx_siocdevprivate,
.ndo_set_rx_mode = bdx_setmulti,
.ndo_change_mtu = bdx_change_mtu,
.ndo_set_mac_address = bdx_set_mac,
@@ -2159,8 +2151,10 @@ bdx_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
* @netdev
* @ecoal
*/
-static int
-bdx_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ecoal)
+static int bdx_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ecoal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
u32 rdintcm;
u32 tdintcm;
@@ -2188,8 +2182,10 @@ bdx_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ecoal)
* @netdev
* @ecoal
*/
-static int
-bdx_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ecoal)
+static int bdx_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ecoal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
u32 rdintcm;
u32 tdintcm;
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 67a08cbba859..130346f74ee8 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -27,6 +27,7 @@
#include <linux/sys_soc.h>
#include <linux/dma/ti-cppi5.h>
#include <linux/dma/k3-udma-glue.h>
+#include <net/switchdev.h>
#include "cpsw_ale.h"
#include "cpsw_sl.h"
@@ -518,6 +519,10 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common,
}
napi_enable(&common->napi_rx);
+ if (common->rx_irq_disabled) {
+ common->rx_irq_disabled = false;
+ enable_irq(common->rx_chns.irq);
+ }
dev_dbg(common->dev, "cpsw_nuss started\n");
return 0;
@@ -871,8 +876,12 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget)
dev_dbg(common->dev, "%s num_rx:%d %d\n", __func__, num_rx, budget);
- if (num_rx < budget && napi_complete_done(napi_rx, num_rx))
- enable_irq(common->rx_chns.irq);
+ if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
+ if (common->rx_irq_disabled) {
+ common->rx_irq_disabled = false;
+ enable_irq(common->rx_chns.irq);
+ }
+ }
return num_rx;
}
@@ -1077,19 +1086,20 @@ static int am65_cpsw_nuss_tx_poll(struct napi_struct *napi_tx, int budget)
else
num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, tx_chn->id, budget);
- num_tx = min(num_tx, budget);
- if (num_tx < budget) {
- napi_complete(napi_tx);
+ if (num_tx >= budget)
+ return budget;
+
+ if (napi_complete_done(napi_tx, num_tx))
enable_irq(tx_chn->irq);
- }
- return num_tx;
+ return 0;
}
static irqreturn_t am65_cpsw_nuss_rx_irq(int irq, void *dev_id)
{
struct am65_cpsw_common *common = dev_id;
+ common->rx_irq_disabled = true;
disable_irq_nosync(irq);
napi_schedule(&common->napi_rx);
@@ -1479,7 +1489,7 @@ static const struct net_device_ops am65_cpsw_nuss_netdev_ops = {
.ndo_tx_timeout = am65_cpsw_nuss_ndo_host_tx_timeout,
.ndo_vlan_rx_add_vid = am65_cpsw_nuss_ndo_slave_add_vid,
.ndo_vlan_rx_kill_vid = am65_cpsw_nuss_ndo_slave_kill_vid,
- .ndo_do_ioctl = am65_cpsw_nuss_ndo_slave_ioctl,
+ .ndo_eth_ioctl = am65_cpsw_nuss_ndo_slave_ioctl,
.ndo_setup_tc = am65_cpsw_qos_ndo_setup_tc,
.ndo_get_devlink_port = am65_cpsw_ndo_get_devlink_port,
};
@@ -2081,10 +2091,13 @@ bool am65_cpsw_port_dev_check(const struct net_device *ndev)
return false;
}
-static int am65_cpsw_netdevice_port_link(struct net_device *ndev, struct net_device *br_ndev)
+static int am65_cpsw_netdevice_port_link(struct net_device *ndev,
+ struct net_device *br_ndev,
+ struct netlink_ext_ack *extack)
{
struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(ndev);
+ int err;
if (!common->br_members) {
common->hw_bridge_dev = br_ndev;
@@ -2096,6 +2109,11 @@ static int am65_cpsw_netdevice_port_link(struct net_device *ndev, struct net_dev
return -EOPNOTSUPP;
}
+ err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL,
+ false, extack);
+ if (err)
+ return err;
+
common->br_members |= BIT(priv->port->port_id);
am65_cpsw_port_offload_fwd_mark_update(common);
@@ -2108,6 +2126,8 @@ static void am65_cpsw_netdevice_port_unlink(struct net_device *ndev)
struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(ndev);
+ switchdev_bridge_port_unoffload(ndev, NULL, NULL, NULL);
+
common->br_members &= ~BIT(priv->port->port_id);
am65_cpsw_port_offload_fwd_mark_update(common);
@@ -2120,6 +2140,7 @@ static void am65_cpsw_netdevice_port_unlink(struct net_device *ndev)
static int am65_cpsw_netdevice_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
+ struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
struct netdev_notifier_changeupper_info *info;
int ret = NOTIFY_DONE;
@@ -2133,7 +2154,9 @@ static int am65_cpsw_netdevice_event(struct notifier_block *unused,
if (netif_is_bridge_master(info->upper_dev)) {
if (info->linking)
- ret = am65_cpsw_netdevice_port_link(ndev, info->upper_dev);
+ ret = am65_cpsw_netdevice_port_link(ndev,
+ info->upper_dev,
+ extack);
else
am65_cpsw_netdevice_port_unlink(ndev);
}
@@ -2388,21 +2411,6 @@ static const struct devlink_param am65_cpsw_devlink_params[] = {
am65_cpsw_dl_switch_mode_set, NULL),
};
-static void am65_cpsw_unregister_devlink_ports(struct am65_cpsw_common *common)
-{
- struct devlink_port *dl_port;
- struct am65_cpsw_port *port;
- int i;
-
- for (i = 1; i <= common->port_num; i++) {
- port = am65_common_get_port(common, i);
- dl_port = &port->devlink_port;
-
- if (dl_port->registered)
- devlink_port_unregister(dl_port);
- }
-}
-
static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common)
{
struct devlink_port_attrs attrs = {};
@@ -2414,14 +2422,14 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common)
int i;
common->devlink =
- devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv));
+ devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv), dev);
if (!common->devlink)
return -ENOMEM;
dl_priv = devlink_priv(common->devlink);
dl_priv->common = common;
- ret = devlink_register(common->devlink, dev);
+ ret = devlink_register(common->devlink);
if (ret) {
dev_err(dev, "devlink reg fail ret:%d\n", ret);
goto dl_free;
@@ -2464,7 +2472,12 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common)
return ret;
dl_port_unreg:
- am65_cpsw_unregister_devlink_ports(common);
+ for (i = i - 1; i >= 1; i--) {
+ port = am65_common_get_port(common, i);
+ dl_port = &port->devlink_port;
+
+ devlink_port_unregister(dl_port);
+ }
dl_unreg:
devlink_unregister(common->devlink);
dl_free:
@@ -2475,6 +2488,17 @@ dl_free:
static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common)
{
+ struct devlink_port *dl_port;
+ struct am65_cpsw_port *port;
+ int i;
+
+ for (i = 1; i <= common->port_num; i++) {
+ port = am65_common_get_port(common, i);
+ dl_port = &port->devlink_port;
+
+ devlink_port_unregister(dl_port);
+ }
+
if (!AM65_CPSW_IS_CPSW2G(common) &&
IS_ENABLED(CONFIG_TI_K3_AM65_CPSW_SWITCHDEV)) {
devlink_params_unpublish(common->devlink);
@@ -2482,7 +2506,6 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common)
ARRAY_SIZE(am65_cpsw_devlink_params));
}
- am65_cpsw_unregister_devlink_ports(common);
devlink_unregister(common->devlink);
devlink_free(common->devlink);
}
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
index 5d93e346f05e..048ed10143c1 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
@@ -126,6 +126,8 @@ struct am65_cpsw_common {
struct am65_cpsw_rx_chn rx_chns;
struct napi_struct napi_rx;
+ bool rx_irq_disabled;
+
u32 nuss_ver;
u32 cpsw_ver;
unsigned long bus_freq;
diff --git a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c
index 9c29b363e9ae..599708a3e81d 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c
@@ -358,7 +358,7 @@ static int am65_cpsw_port_obj_del(struct net_device *ndev, const void *ctx,
static void am65_cpsw_fdb_offload_notify(struct net_device *ndev,
struct switchdev_notifier_fdb_info *rcv)
{
- struct switchdev_notifier_fdb_info info;
+ struct switchdev_notifier_fdb_info info = {};
info.addr = rcv->addr;
info.vid = rcv->vid;
diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index c20715107075..02d4e51f7306 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -1044,7 +1044,7 @@ static const struct net_device_ops cpmac_netdev_ops = {
.ndo_start_xmit = cpmac_start_xmit,
.ndo_tx_timeout = cpmac_tx_timeout,
.ndo_set_rx_mode = cpmac_set_multicast_list,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
};
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index cbbd0f665796..66f7ddd9b1f9 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -431,7 +431,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
skb->protocol = eth_type_trans(skb, ndev);
/* mark skb for recycling */
- skb_mark_for_recycle(skb, page, pool);
+ skb_mark_for_recycle(skb);
netif_receive_skb(skb);
ndev->stats.rx_bytes += len;
@@ -845,7 +845,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
struct ethtool_coalesce coal;
coal.rx_coalesce_usecs = cpsw->coal_intvl;
- cpsw_set_coalesce(ndev, &coal);
+ cpsw_set_coalesce(ndev, &coal, NULL, NULL);
}
cpdma_ctlr_start(cpsw->dma);
@@ -905,7 +905,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
struct cpdma_chan *txch;
int ret, q_idx;
- if (skb_padto(skb, CPSW_MIN_PACKET_SIZE)) {
+ if (skb_put_padto(skb, CPSW_MIN_PACKET_SIZE)) {
cpsw_err(priv, tx_err, "packet pad failed\n");
ndev->stats.tx_dropped++;
return NET_XMIT_DROP;
@@ -1159,7 +1159,7 @@ static const struct net_device_ops cpsw_netdev_ops = {
.ndo_stop = cpsw_ndo_stop,
.ndo_start_xmit = cpsw_ndo_start_xmit,
.ndo_set_mac_address = cpsw_ndo_set_mac_address,
- .ndo_do_ioctl = cpsw_ndo_ioctl,
+ .ndo_eth_ioctl = cpsw_ndo_ioctl,
.ndo_validate_addr = eth_validate_addr,
.ndo_tx_timeout = cpsw_ndo_tx_timeout,
.ndo_set_rx_mode = cpsw_ndo_set_rx_mode,
diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c
index 4619c3a950b0..158c8d3793f4 100644
--- a/drivers/net/ethernet/ti/cpsw_ethtool.c
+++ b/drivers/net/ethernet/ti/cpsw_ethtool.c
@@ -152,7 +152,9 @@ void cpsw_set_msglevel(struct net_device *ndev, u32 value)
priv->msg_enable = value;
}
-int cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal)
+int cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
@@ -160,7 +162,9 @@ int cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal)
return 0;
}
-int cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal)
+int cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct cpsw_priv *priv = netdev_priv(ndev);
u32 int_ctrl;
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index 57d279fdcc9f..7968f24d99c8 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -28,6 +28,7 @@
#include <linux/kmemleak.h>
#include <linux/sys_soc.h>
+#include <net/switchdev.h>
#include <net/page_pool.h>
#include <net/pkt_cls.h>
#include <net/devlink.h>
@@ -374,7 +375,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
skb->protocol = eth_type_trans(skb, ndev);
/* mark skb for recycling */
- skb_mark_for_recycle(skb, page, pool);
+ skb_mark_for_recycle(skb);
netif_receive_skb(skb);
ndev->stats.rx_bytes += len;
@@ -501,7 +502,7 @@ static void cpsw_restore(struct cpsw_priv *priv)
static void cpsw_init_stp_ale_entry(struct cpsw_common *cpsw)
{
- char stpa[] = {0x01, 0x80, 0xc2, 0x0, 0x0, 0x0};
+ static const char stpa[] = {0x01, 0x80, 0xc2, 0x0, 0x0, 0x0};
cpsw_ale_add_mcast(cpsw->ale, stpa,
ALE_PORT_HOST, ALE_SUPER, 0,
@@ -893,7 +894,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
struct ethtool_coalesce coal;
coal.rx_coalesce_usecs = cpsw->coal_intvl;
- cpsw_set_coalesce(ndev, &coal);
+ cpsw_set_coalesce(ndev, &coal, NULL, NULL);
}
cpdma_ctlr_start(cpsw->dma);
@@ -920,7 +921,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
struct cpdma_chan *txch;
int ret, q_idx;
- if (skb_padto(skb, CPSW_MIN_PACKET_SIZE)) {
+ if (skb_put_padto(skb, READ_ONCE(priv->tx_packet_min))) {
cpsw_err(priv, tx_err, "packet pad failed\n");
ndev->stats.tx_dropped++;
return NET_XMIT_DROP;
@@ -1100,7 +1101,7 @@ static int cpsw_ndo_xdp_xmit(struct net_device *ndev, int n,
for (i = 0; i < n; i++) {
xdpf = frames[i];
- if (xdpf->len < CPSW_MIN_PACKET_SIZE)
+ if (xdpf->len < READ_ONCE(priv->tx_packet_min))
break;
if (cpsw_xdp_tx_frame(priv, xdpf, NULL, priv->emac_port))
@@ -1127,7 +1128,7 @@ static const struct net_device_ops cpsw_netdev_ops = {
.ndo_stop = cpsw_ndo_stop,
.ndo_start_xmit = cpsw_ndo_start_xmit,
.ndo_set_mac_address = cpsw_ndo_set_mac_address,
- .ndo_do_ioctl = cpsw_ndo_ioctl,
+ .ndo_eth_ioctl = cpsw_ndo_ioctl,
.ndo_validate_addr = eth_validate_addr,
.ndo_tx_timeout = cpsw_ndo_tx_timeout,
.ndo_set_rx_mode = cpsw_ndo_set_rx_mode,
@@ -1389,6 +1390,7 @@ static int cpsw_create_ports(struct cpsw_common *cpsw)
priv->dev = dev;
priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
priv->emac_port = i + 1;
+ priv->tx_packet_min = CPSW_MIN_PACKET_SIZE;
if (is_valid_ether_addr(slave_data->mac_addr)) {
ether_addr_copy(priv->mac_addr, slave_data->mac_addr);
@@ -1499,10 +1501,12 @@ static void cpsw_port_offload_fwd_mark_update(struct cpsw_common *cpsw)
}
static int cpsw_netdevice_port_link(struct net_device *ndev,
- struct net_device *br_ndev)
+ struct net_device *br_ndev,
+ struct netlink_ext_ack *extack)
{
struct cpsw_priv *priv = netdev_priv(ndev);
struct cpsw_common *cpsw = priv->cpsw;
+ int err;
if (!cpsw->br_members) {
cpsw->hw_bridge_dev = br_ndev;
@@ -1514,6 +1518,11 @@ static int cpsw_netdevice_port_link(struct net_device *ndev,
return -EOPNOTSUPP;
}
+ err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL,
+ false, extack);
+ if (err)
+ return err;
+
cpsw->br_members |= BIT(priv->emac_port);
cpsw_port_offload_fwd_mark_update(cpsw);
@@ -1526,6 +1535,8 @@ static void cpsw_netdevice_port_unlink(struct net_device *ndev)
struct cpsw_priv *priv = netdev_priv(ndev);
struct cpsw_common *cpsw = priv->cpsw;
+ switchdev_bridge_port_unoffload(ndev, NULL, NULL, NULL);
+
cpsw->br_members &= ~BIT(priv->emac_port);
cpsw_port_offload_fwd_mark_update(cpsw);
@@ -1538,6 +1549,7 @@ static void cpsw_netdevice_port_unlink(struct net_device *ndev)
static int cpsw_netdevice_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
+ struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
struct netdev_notifier_changeupper_info *info;
int ret = NOTIFY_DONE;
@@ -1552,7 +1564,8 @@ static int cpsw_netdevice_event(struct notifier_block *unused,
if (netif_is_bridge_master(info->upper_dev)) {
if (info->linking)
ret = cpsw_netdevice_port_link(ndev,
- info->upper_dev);
+ info->upper_dev,
+ extack);
else
cpsw_netdevice_port_unlink(ndev);
}
@@ -1686,6 +1699,7 @@ static int cpsw_dl_switch_mode_set(struct devlink *dl, u32 id,
priv = netdev_priv(sl_ndev);
slave->port_vlan = vlan;
+ WRITE_ONCE(priv->tx_packet_min, CPSW_MIN_PACKET_SIZE_VLAN);
if (netif_running(sl_ndev))
cpsw_port_add_switch_def_ale_entries(priv,
slave);
@@ -1714,6 +1728,7 @@ static int cpsw_dl_switch_mode_set(struct devlink *dl, u32 id,
priv = netdev_priv(slave->ndev);
slave->port_vlan = slave->data->dual_emac_res_vlan;
+ WRITE_ONCE(priv->tx_packet_min, CPSW_MIN_PACKET_SIZE);
cpsw_port_add_dual_emac_def_ale_entries(priv, slave);
}
@@ -1788,14 +1803,14 @@ static int cpsw_register_devlink(struct cpsw_common *cpsw)
struct cpsw_devlink *dl_priv;
int ret = 0;
- cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv));
+ cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv), dev);
if (!cpsw->devlink)
return -ENOMEM;
dl_priv = devlink_priv(cpsw->devlink);
dl_priv->cpsw = cpsw;
- ret = devlink_register(cpsw->devlink, dev);
+ ret = devlink_register(cpsw->devlink);
if (ret) {
dev_err(dev, "DL reg fail ret:%d\n", ret);
goto dl_free;
diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h
index a323bea54faa..435668ee542d 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.h
+++ b/drivers/net/ethernet/ti/cpsw_priv.h
@@ -89,7 +89,8 @@ do { \
#define CPSW_POLL_WEIGHT 64
#define CPSW_RX_VLAN_ENCAP_HDR_SIZE 4
-#define CPSW_MIN_PACKET_SIZE (VLAN_ETH_ZLEN)
+#define CPSW_MIN_PACKET_SIZE_VLAN (VLAN_ETH_ZLEN)
+#define CPSW_MIN_PACKET_SIZE (ETH_ZLEN)
#define CPSW_MAX_PACKET_SIZE (VLAN_ETH_FRAME_LEN +\
ETH_FCS_LEN +\
CPSW_RX_VLAN_ENCAP_HDR_SIZE)
@@ -380,6 +381,7 @@ struct cpsw_priv {
u32 emac_port;
struct cpsw_common *cpsw;
int offload_fwd_mark;
+ u32 tx_packet_min;
};
#define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw)
@@ -462,8 +464,12 @@ void cpsw_mqprio_resume(struct cpsw_slave *slave, struct cpsw_priv *priv);
/* ethtool */
u32 cpsw_get_msglevel(struct net_device *ndev);
void cpsw_set_msglevel(struct net_device *ndev, u32 value);
-int cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal);
-int cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal);
+int cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack);
+int cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack);
int cpsw_get_sset_count(struct net_device *ndev, int sset);
void cpsw_get_strings(struct net_device *ndev, u32 stringset, u8 *data);
void cpsw_get_ethtool_stats(struct net_device *ndev,
diff --git a/drivers/net/ethernet/ti/cpsw_switchdev.c b/drivers/net/ethernet/ti/cpsw_switchdev.c
index f7fb6e17dadd..a7d97d429e06 100644
--- a/drivers/net/ethernet/ti/cpsw_switchdev.c
+++ b/drivers/net/ethernet/ti/cpsw_switchdev.c
@@ -368,7 +368,7 @@ static int cpsw_port_obj_del(struct net_device *ndev, const void *ctx,
static void cpsw_fdb_offload_notify(struct net_device *ndev,
struct switchdev_notifier_fdb_info *rcv)
{
- struct switchdev_notifier_fdb_info info;
+ struct switchdev_notifier_fdb_info info = {};
info.addr = rcv->addr;
info.vid = rcv->vid;
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index c674e34b6839..e8291d848839 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -383,12 +383,16 @@ static void emac_get_drvinfo(struct net_device *ndev,
* emac_get_coalesce - Get interrupt coalesce settings for this device
* @ndev : The DaVinci EMAC network adapter
* @coal : ethtool coalesce settings structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
*
* Fetch the current interrupt coalesce settings
*
*/
static int emac_get_coalesce(struct net_device *ndev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct emac_priv *priv = netdev_priv(ndev);
@@ -401,12 +405,16 @@ static int emac_get_coalesce(struct net_device *ndev,
* emac_set_coalesce - Set interrupt coalesce settings for this device
* @ndev : The DaVinci EMAC network adapter
* @coal : ethtool coalesce settings structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
*
* Set interrupt coalesce parameters
*
*/
static int emac_set_coalesce(struct net_device *ndev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct emac_priv *priv = netdev_priv(ndev);
u32 int_ctrl, num_interrupts = 0;
@@ -943,7 +951,7 @@ static int emac_dev_xmit(struct sk_buff *skb, struct net_device *ndev)
goto fail_tx;
}
- ret_code = skb_padto(skb, EMAC_DEF_MIN_ETHPKTSIZE);
+ ret_code = skb_put_padto(skb, EMAC_DEF_MIN_ETHPKTSIZE);
if (unlikely(ret_code < 0)) {
if (netif_msg_tx_err(priv) && net_ratelimit())
dev_err(emac_dev, "DaVinci EMAC: packet pad failed");
@@ -1462,7 +1470,7 @@ static int emac_dev_open(struct net_device *ndev)
struct ethtool_coalesce coal;
coal.rx_coalesce_usecs = (priv->coal_intvl << 4);
- emac_set_coalesce(ndev, &coal);
+ emac_set_coalesce(ndev, &coal, NULL, NULL);
}
cpdma_ctlr_start(priv->dma);
@@ -1670,7 +1678,7 @@ static const struct net_device_ops emac_netdev_ops = {
.ndo_start_xmit = emac_dev_xmit,
.ndo_set_rx_mode = emac_dev_mcast_set,
.ndo_set_mac_address = emac_dev_setmac_addr,
- .ndo_do_ioctl = emac_devioctl,
+ .ndo_eth_ioctl = emac_devioctl,
.ndo_tx_timeout = emac_dev_tx_timeout,
.ndo_get_stats = emac_dev_getnetstats,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 97942b0e3897..eda2961c0fe2 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1944,7 +1944,7 @@ static const struct net_device_ops netcp_netdev_ops = {
.ndo_stop = netcp_ndo_stop,
.ndo_start_xmit = netcp_ndo_start_xmit,
.ndo_set_rx_mode = netcp_set_rx_mode,
- .ndo_do_ioctl = netcp_ndo_ioctl,
+ .ndo_eth_ioctl = netcp_ndo_ioctl,
.ndo_get_stats64 = netcp_get_stats,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c
index e0cb713193ea..77c448ad67ce 100644
--- a/drivers/net/ethernet/ti/tlan.c
+++ b/drivers/net/ethernet/ti/tlan.c
@@ -749,7 +749,7 @@ static const struct net_device_ops tlan_netdev_ops = {
.ndo_tx_timeout = tlan_tx_timeout,
.ndo_get_stats = tlan_get_stats,
.ndo_set_rx_mode = tlan_set_multicast_list,
- .ndo_do_ioctl = tlan_ioctl,
+ .ndo_eth_ioctl = tlan_ioctl,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c
index 226a76633e65..66d4e024d11e 100644
--- a/drivers/net/ethernet/toshiba/spider_net.c
+++ b/drivers/net/ethernet/toshiba/spider_net.c
@@ -354,9 +354,10 @@ spider_net_free_rx_chain_contents(struct spider_net_card *card)
descr = card->rx_chain.head;
do {
if (descr->skb) {
- pci_unmap_single(card->pdev, descr->hwdescr->buf_addr,
+ dma_unmap_single(&card->pdev->dev,
+ descr->hwdescr->buf_addr,
SPIDER_NET_MAX_FRAME,
- PCI_DMA_BIDIRECTIONAL);
+ DMA_BIDIRECTIONAL);
dev_kfree_skb(descr->skb);
descr->skb = NULL;
}
@@ -411,9 +412,9 @@ spider_net_prepare_rx_descr(struct spider_net_card *card,
if (offset)
skb_reserve(descr->skb, SPIDER_NET_RXBUF_ALIGN - offset);
/* iommu-map the skb */
- buf = pci_map_single(card->pdev, descr->skb->data,
- SPIDER_NET_MAX_FRAME, PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(card->pdev, buf)) {
+ buf = dma_map_single(&card->pdev->dev, descr->skb->data,
+ SPIDER_NET_MAX_FRAME, DMA_FROM_DEVICE);
+ if (dma_mapping_error(&card->pdev->dev, buf)) {
dev_kfree_skb_any(descr->skb);
descr->skb = NULL;
if (netif_msg_rx_err(card) && net_ratelimit())
@@ -653,8 +654,9 @@ spider_net_prepare_tx_descr(struct spider_net_card *card,
dma_addr_t buf;
unsigned long flags;
- buf = pci_map_single(card->pdev, skb->data, skb->len, PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(card->pdev, buf)) {
+ buf = dma_map_single(&card->pdev->dev, skb->data, skb->len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&card->pdev->dev, buf)) {
if (netif_msg_tx_err(card) && net_ratelimit())
dev_err(&card->netdev->dev, "could not iommu-map packet (%p, %i). "
"Dropping packet\n", skb->data, skb->len);
@@ -666,7 +668,8 @@ spider_net_prepare_tx_descr(struct spider_net_card *card,
descr = card->tx_chain.head;
if (descr->next == chain->tail->prev) {
spin_unlock_irqrestore(&chain->lock, flags);
- pci_unmap_single(card->pdev, buf, skb->len, PCI_DMA_TODEVICE);
+ dma_unmap_single(&card->pdev->dev, buf, skb->len,
+ DMA_TO_DEVICE);
return -ENOMEM;
}
hwdescr = descr->hwdescr;
@@ -822,8 +825,8 @@ spider_net_release_tx_chain(struct spider_net_card *card, int brutal)
/* unmap the skb */
if (skb) {
- pci_unmap_single(card->pdev, buf_addr, skb->len,
- PCI_DMA_TODEVICE);
+ dma_unmap_single(&card->pdev->dev, buf_addr, skb->len,
+ DMA_TO_DEVICE);
dev_consume_skb_any(skb);
}
}
@@ -1165,8 +1168,8 @@ spider_net_decode_one_descr(struct spider_net_card *card)
/* unmap descriptor */
hw_buf_addr = hwdescr->buf_addr;
hwdescr->buf_addr = 0xffffffff;
- pci_unmap_single(card->pdev, hw_buf_addr,
- SPIDER_NET_MAX_FRAME, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&card->pdev->dev, hw_buf_addr, SPIDER_NET_MAX_FRAME,
+ DMA_FROM_DEVICE);
if ( (status == SPIDER_NET_DESCR_RESPONSE_ERROR) ||
(status == SPIDER_NET_DESCR_PROTECTION_ERROR) ||
@@ -2214,7 +2217,7 @@ static const struct net_device_ops spider_net_ops = {
.ndo_start_xmit = spider_net_xmit,
.ndo_set_rx_mode = spider_net_set_multi,
.ndo_set_mac_address = spider_net_set_mac,
- .ndo_do_ioctl = spider_net_do_ioctl,
+ .ndo_eth_ioctl = spider_net_do_ioctl,
.ndo_tx_timeout = spider_net_tx_timeout,
.ndo_validate_addr = eth_validate_addr,
/* HW VLAN */
diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c
index fedb2bf69261..52245ac60fc7 100644
--- a/drivers/net/ethernet/toshiba/tc35815.c
+++ b/drivers/net/ethernet/toshiba/tc35815.c
@@ -750,7 +750,7 @@ static const struct net_device_ops tc35815_netdev_ops = {
.ndo_get_stats = tc35815_get_stats,
.ndo_set_rx_mode = tc35815_set_multicast_list,
.ndo_tx_timeout = tc35815_tx_timeout,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c
index c62f474b6d08..cf0917b29e30 100644
--- a/drivers/net/ethernet/tundra/tsi108_eth.c
+++ b/drivers/net/ethernet/tundra/tsi108_eth.c
@@ -1538,7 +1538,7 @@ static const struct net_device_ops tsi108_netdev_ops = {
.ndo_start_xmit = tsi108_send_packet,
.ndo_set_rx_mode = tsi108_set_rx_mode,
.ndo_get_stats = tsi108_get_stats,
- .ndo_do_ioctl = tsi108_do_ioctl,
+ .ndo_eth_ioctl = tsi108_do_ioctl,
.ndo_set_mac_address = tsi108_set_mac,
.ndo_validate_addr = eth_validate_addr,
};
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 73ca597ebd1b..3b73a9c55a5a 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -884,7 +884,7 @@ static const struct net_device_ops rhine_netdev_ops = {
.ndo_set_rx_mode = rhine_set_rx_mode,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
- .ndo_do_ioctl = netdev_ioctl,
+ .ndo_eth_ioctl = netdev_ioctl,
.ndo_tx_timeout = rhine_tx_timeout,
.ndo_vlan_rx_add_vid = rhine_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = rhine_vlan_rx_kill_vid,
@@ -1113,13 +1113,12 @@ err_out:
static int rhine_init_one_platform(struct platform_device *pdev)
{
- const struct of_device_id *match;
const u32 *quirks;
int irq;
void __iomem *ioaddr;
- match = of_match_device(rhine_of_tbl, &pdev->dev);
- if (!match)
+ quirks = of_device_get_match_data(&pdev->dev);
+ if (!quirks)
return -EINVAL;
ioaddr = devm_platform_ioremap_resource(pdev, 0);
@@ -1130,10 +1129,6 @@ static int rhine_init_one_platform(struct platform_device *pdev)
if (!irq)
return -EINVAL;
- quirks = match->data;
- if (!quirks)
- return -EINVAL;
-
return rhine_init_one_common(&pdev->dev, *quirks,
(long)ioaddr, ioaddr, irq);
}
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index 88426b5e410b..4b9c30f735b5 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -2637,7 +2637,7 @@ static const struct net_device_ops velocity_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_set_rx_mode = velocity_set_multi,
.ndo_change_mtu = velocity_change_mtu,
- .ndo_do_ioctl = velocity_ioctl,
+ .ndo_eth_ioctl = velocity_ioctl,
.ndo_vlan_rx_add_vid = velocity_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = velocity_vlan_rx_kill_vid,
#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2943,14 +2943,12 @@ static void velocity_pci_remove(struct pci_dev *pdev)
static int velocity_platform_probe(struct platform_device *pdev)
{
- const struct of_device_id *of_id;
const struct velocity_info_tbl *info;
int irq;
- of_id = of_match_device(velocity_of_ids, &pdev->dev);
- if (!of_id)
+ info = of_device_get_match_data(&pdev->dev);
+ if (!info)
return -EINVAL;
- info = of_id->data;
irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
if (!irq)
@@ -3520,7 +3518,9 @@ static void set_pending_timer_val(int *val, u32 us)
static int velocity_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ecmd)
+ struct ethtool_coalesce *ecmd,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct velocity_info *vptr = netdev_priv(dev);
@@ -3534,7 +3534,9 @@ static int velocity_get_coalesce(struct net_device *dev,
}
static int velocity_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ecmd)
+ struct ethtool_coalesce *ecmd,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct velocity_info *vptr = netdev_priv(dev);
int max_us = 0x3f * 64;
diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index 811815f8cd3b..f974e70a82e8 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -1047,6 +1047,8 @@ static int w5100_mmio_probe(struct platform_device *pdev)
mac_addr = data->mac_addr;
mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!mem)
+ return -EINVAL;
if (resource_size(mem) < W5100_BUS_DIRECT_SIZE)
ops = &w5100_mmio_indirect_ops;
else
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 60a4f79b8fa1..463094ced104 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1237,7 +1237,7 @@ static const struct net_device_ops temac_netdev_ops = {
.ndo_set_rx_mode = temac_set_multicast_list,
.ndo_set_mac_address = temac_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = temac_poll_controller,
#endif
@@ -1310,8 +1310,11 @@ static int ll_temac_ethtools_set_ringparam(struct net_device *ndev,
return 0;
}
-static int ll_temac_ethtools_get_coalesce(struct net_device *ndev,
- struct ethtool_coalesce *ec)
+static int
+ll_temac_ethtools_get_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct temac_local *lp = netdev_priv(ndev);
@@ -1322,8 +1325,11 @@ static int ll_temac_ethtools_get_coalesce(struct net_device *ndev,
return 0;
}
-static int ll_temac_ethtools_set_coalesce(struct net_device *ndev,
- struct ethtool_coalesce *ec)
+static int
+ll_temac_ethtools_set_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct temac_local *lp = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 13cd799541aa..871b5ec3183d 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1227,7 +1227,7 @@ static const struct net_device_ops axienet_netdev_ops = {
.ndo_change_mtu = axienet_change_mtu,
.ndo_set_mac_address = netdev_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = axienet_ioctl,
+ .ndo_eth_ioctl = axienet_ioctl,
.ndo_set_rx_mode = axienet_set_multicast_list,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = axienet_poll_controller,
@@ -1400,6 +1400,8 @@ axienet_ethtools_set_pauseparam(struct net_device *ndev,
* axienet_ethtools_get_coalesce - Get DMA interrupt coalescing count.
* @ndev: Pointer to net_device structure
* @ecoalesce: Pointer to ethtool_coalesce structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
*
* This implements ethtool command for getting the DMA interrupt coalescing
* count on Tx and Rx paths. Issue "ethtool -c ethX" under linux prompt to
@@ -1407,8 +1409,11 @@ axienet_ethtools_set_pauseparam(struct net_device *ndev,
*
* Return: 0 always
*/
-static int axienet_ethtools_get_coalesce(struct net_device *ndev,
- struct ethtool_coalesce *ecoalesce)
+static int
+axienet_ethtools_get_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *ecoalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
u32 regval = 0;
struct axienet_local *lp = netdev_priv(ndev);
@@ -1425,6 +1430,8 @@ static int axienet_ethtools_get_coalesce(struct net_device *ndev,
* axienet_ethtools_set_coalesce - Set DMA interrupt coalescing count.
* @ndev: Pointer to net_device structure
* @ecoalesce: Pointer to ethtool_coalesce structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
*
* This implements ethtool command for setting the DMA interrupt coalescing
* count on Tx and Rx paths. Issue "ethtool -C ethX rx-frames 5" under linux
@@ -1432,8 +1439,11 @@ static int axienet_ethtools_get_coalesce(struct net_device *ndev,
*
* Return: 0, on success, Non-zero error value on failure.
*/
-static int axienet_ethtools_set_coalesce(struct net_device *ndev,
- struct ethtool_coalesce *ecoalesce)
+static int
+axienet_ethtools_set_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *ecoalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct axienet_local *lp = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index b06377fe7293..b780aad3550a 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -1263,7 +1263,7 @@ static const struct net_device_ops xemaclite_netdev_ops = {
.ndo_start_xmit = xemaclite_send,
.ndo_set_mac_address = xemaclite_set_mac_address,
.ndo_tx_timeout = xemaclite_tx_timeout,
- .ndo_do_ioctl = xemaclite_ioctl,
+ .ndo_eth_ioctl = xemaclite_ioctl,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = xemaclite_poll_controller,
#endif
diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c
index 4f6db6f5c272..ae611e46da6a 100644
--- a/drivers/net/ethernet/xircom/xirc2ps_cs.c
+++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c
@@ -464,7 +464,7 @@ static const struct net_device_ops netdev_ops = {
.ndo_start_xmit = do_start_xmit,
.ndo_tx_timeout = xirc_tx_timeout,
.ndo_set_config = do_config,
- .ndo_do_ioctl = do_ioctl,
+ .ndo_eth_ioctl = do_ioctl,
.ndo_set_rx_mode = set_multicast_list,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
diff --git a/drivers/net/ethernet/xscale/Kconfig b/drivers/net/ethernet/xscale/Kconfig
index 468ffe3d1707..0e878fa6e322 100644
--- a/drivers/net/ethernet/xscale/Kconfig
+++ b/drivers/net/ethernet/xscale/Kconfig
@@ -29,9 +29,9 @@ config IXP4XX_ETH
on IXP4xx processor.
config PTP_1588_CLOCK_IXP46X
- tristate "Intel IXP46x as PTP clock"
+ bool "Intel IXP46x as PTP clock"
depends on IXP4XX_ETH
- depends on PTP_1588_CLOCK
+ depends on PTP_1588_CLOCK=y || PTP_1588_CLOCK=IXP4XX_ETH
default y
help
This driver adds support for using the IXP46X as a PTP
diff --git a/drivers/net/ethernet/xscale/Makefile b/drivers/net/ethernet/xscale/Makefile
index 607f91b1e878..e935f2a2979f 100644
--- a/drivers/net/ethernet/xscale/Makefile
+++ b/drivers/net/ethernet/xscale/Makefile
@@ -3,5 +3,9 @@
# Makefile for the Intel XScale IXP device drivers.
#
+# Keep this link order to avoid deferred probing
+ifdef CONFIG_PTP_1588_CLOCK_IXP46X
+obj-$(CONFIG_IXP4XX_ETH) += ptp_ixp46x.o
+endif
+
obj-$(CONFIG_IXP4XX_ETH) += ixp4xx_eth.o
-obj-$(CONFIG_PTP_1588_CLOCK_IXP46X) += ptp_ixp46x.o
diff --git a/drivers/net/ethernet/xscale/ixp46x_ts.h b/drivers/net/ethernet/xscale/ixp46x_ts.h
index d792130e27b0..ee9b93ded20a 100644
--- a/drivers/net/ethernet/xscale/ixp46x_ts.h
+++ b/drivers/net/ethernet/xscale/ixp46x_ts.h
@@ -62,7 +62,16 @@ struct ixp46x_ts_regs {
#define TX_SNAPSHOT_LOCKED (1<<0)
#define RX_SNAPSHOT_LOCKED (1<<1)
-/* The ptp_ixp46x module will set this variable */
-extern int ixp46x_phc_index;
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK_IXP46X)
+int ixp46x_ptp_find(struct ixp46x_ts_regs *__iomem *regs, int *phc_index);
+#else
+static inline int ixp46x_ptp_find(struct ixp46x_ts_regs *__iomem *regs, int *phc_index)
+{
+ *regs = NULL;
+ *phc_index = -1;
+
+ return -ENODEV;
+}
+#endif
#endif
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 7ae754eadf22..931494cc1c39 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -37,7 +37,6 @@
#include <linux/module.h>
#include <linux/soc/ixp4xx/npe.h>
#include <linux/soc/ixp4xx/qmgr.h>
-#include <mach/hardware.h>
#include <linux/soc/ixp4xx/cpu.h>
#include "ixp46x_ts.h"
@@ -169,13 +168,15 @@ struct eth_regs {
struct port {
struct eth_regs __iomem *regs;
+ struct ixp46x_ts_regs __iomem *timesync_regs;
+ int phc_index;
struct npe *npe;
struct net_device *netdev;
struct napi_struct napi;
struct eth_plat_info *plat;
buffer_t *rx_buff_tab[RX_DESCS], *tx_buff_tab[TX_DESCS];
struct desc *desc_tab; /* coherent */
- u32 desc_tab_phys;
+ dma_addr_t desc_tab_phys;
int id; /* logical port ID */
int speed, duplex;
u8 firmware[4];
@@ -295,7 +296,7 @@ static void ixp_rx_timestamp(struct port *port, struct sk_buff *skb)
ch = PORT2CHANNEL(port);
- regs = (struct ixp46x_ts_regs __iomem *) IXP4XX_TIMESYNC_BASE_VIRT;
+ regs = port->timesync_regs;
val = __raw_readl(&regs->channel[ch].ch_event);
@@ -340,7 +341,7 @@ static void ixp_tx_timestamp(struct port *port, struct sk_buff *skb)
ch = PORT2CHANNEL(port);
- regs = (struct ixp46x_ts_regs __iomem *) IXP4XX_TIMESYNC_BASE_VIRT;
+ regs = port->timesync_regs;
/*
* This really stinks, but we have to poll for the Tx time stamp.
@@ -375,6 +376,7 @@ static int hwtstamp_set(struct net_device *netdev, struct ifreq *ifr)
struct hwtstamp_config cfg;
struct ixp46x_ts_regs *regs;
struct port *port = netdev_priv(netdev);
+ int ret;
int ch;
if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
@@ -383,8 +385,12 @@ static int hwtstamp_set(struct net_device *netdev, struct ifreq *ifr)
if (cfg.flags) /* reserved for future extensions */
return -EINVAL;
+ ret = ixp46x_ptp_find(&port->timesync_regs, &port->phc_index);
+ if (ret)
+ return ret;
+
ch = PORT2CHANNEL(port);
- regs = (struct ixp46x_ts_regs __iomem *) IXP4XX_TIMESYNC_BASE_VIRT;
+ regs = port->timesync_regs;
if (cfg.tx_type != HWTSTAMP_TX_OFF && cfg.tx_type != HWTSTAMP_TX_ON)
return -ERANGE;
@@ -850,14 +856,14 @@ static int eth_xmit(struct sk_buff *skb, struct net_device *dev)
bytes = len;
mem = skb->data;
#else
- offset = (int)skb->data & 3; /* keep 32-bit alignment */
+ offset = (uintptr_t)skb->data & 3; /* keep 32-bit alignment */
bytes = ALIGN(offset + len, 4);
if (!(mem = kmalloc(bytes, GFP_ATOMIC))) {
dev_kfree_skb(skb);
dev->stats.tx_dropped++;
return NETDEV_TX_OK;
}
- memcpy_swab32(mem, (u32 *)((int)skb->data & ~3), bytes / 4);
+ memcpy_swab32(mem, (u32 *)((uintptr_t)skb->data & ~3), bytes / 4);
#endif
phys = dma_map_single(&dev->dev, mem, bytes, DMA_TO_DEVICE);
@@ -988,25 +994,27 @@ static void ixp4xx_get_drvinfo(struct net_device *dev,
strlcpy(info->bus_info, "internal", sizeof(info->bus_info));
}
-int ixp46x_phc_index = -1;
-EXPORT_SYMBOL_GPL(ixp46x_phc_index);
-
static int ixp4xx_get_ts_info(struct net_device *dev,
struct ethtool_ts_info *info)
{
- if (!cpu_is_ixp46x()) {
+ struct port *port = netdev_priv(dev);
+
+ if (port->phc_index < 0)
+ ixp46x_ptp_find(&port->timesync_regs, &port->phc_index);
+
+ info->phc_index = port->phc_index;
+
+ if (info->phc_index < 0) {
info->so_timestamping =
SOF_TIMESTAMPING_TX_SOFTWARE |
SOF_TIMESTAMPING_RX_SOFTWARE |
SOF_TIMESTAMPING_SOFTWARE;
- info->phc_index = -1;
return 0;
}
info->so_timestamping =
SOF_TIMESTAMPING_TX_HARDWARE |
SOF_TIMESTAMPING_RX_HARDWARE |
SOF_TIMESTAMPING_RAW_HARDWARE;
- info->phc_index = ixp46x_phc_index;
info->tx_types =
(1 << HWTSTAMP_TX_OFF) |
(1 << HWTSTAMP_TX_ON);
@@ -1357,7 +1365,7 @@ static const struct net_device_ops ixp4xx_netdev_ops = {
.ndo_stop = eth_close,
.ndo_start_xmit = eth_xmit,
.ndo_set_rx_mode = eth_set_mcast_list,
- .ndo_do_ioctl = eth_ioctl,
+ .ndo_eth_ioctl = eth_ioctl,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
};
@@ -1481,6 +1489,7 @@ static int ixp4xx_eth_probe(struct platform_device *pdev)
port = netdev_priv(ndev);
port->netdev = ndev;
port->id = plat->npe;
+ port->phc_index = -1;
/* Get the port resource and remap */
port->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
diff --git a/drivers/net/ethernet/xscale/ptp_ixp46x.c b/drivers/net/ethernet/xscale/ptp_ixp46x.c
index a6fb88fd42f7..ecece21315c3 100644
--- a/drivers/net/ethernet/xscale/ptp_ixp46x.c
+++ b/drivers/net/ethernet/xscale/ptp_ixp46x.c
@@ -5,14 +5,16 @@
* Copyright (C) 2010 OMICRON electronics GmbH
*/
#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
#include <linux/err.h>
-#include <linux/gpio.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/ptp_clock_kernel.h>
+#include <linux/platform_device.h>
#include <linux/soc/ixp4xx/cpu.h>
#include <linux/module.h>
#include <mach/ixp4xx-regs.h>
@@ -21,10 +23,6 @@
#define DRIVER "ptp_ixp46x"
#define N_EXT_TS 2
-#define MASTER_GPIO 8
-#define MASTER_IRQ 25
-#define SLAVE_GPIO 7
-#define SLAVE_IRQ 24
struct ixp_clock {
struct ixp46x_ts_regs *regs;
@@ -32,9 +30,11 @@ struct ixp_clock {
struct ptp_clock_info caps;
int exts0_enabled;
int exts1_enabled;
+ int slave_irq;
+ int master_irq;
};
-DEFINE_SPINLOCK(register_lock);
+static DEFINE_SPINLOCK(register_lock);
/*
* Register access functions
@@ -243,53 +243,38 @@ static const struct ptp_clock_info ptp_ixp_caps = {
static struct ixp_clock ixp_clock;
-static int setup_interrupt(int gpio)
+int ixp46x_ptp_find(struct ixp46x_ts_regs *__iomem *regs, int *phc_index)
{
- int irq;
- int err;
-
- err = gpio_request(gpio, "ixp4-ptp");
- if (err)
- return err;
-
- err = gpio_direction_input(gpio);
- if (err)
- return err;
-
- irq = gpio_to_irq(gpio);
- if (irq < 0)
- return irq;
+ *regs = ixp_clock.regs;
+ *phc_index = ptp_clock_index(ixp_clock.ptp_clock);
- err = irq_set_irq_type(irq, IRQF_TRIGGER_FALLING);
- if (err) {
- pr_err("cannot set trigger type for irq %d\n", irq);
- return err;
- }
-
- err = request_irq(irq, isr, 0, DRIVER, &ixp_clock);
- if (err) {
- pr_err("request_irq failed for irq %d\n", irq);
- return err;
- }
+ if (!ixp_clock.ptp_clock)
+ return -EPROBE_DEFER;
- return irq;
+ return 0;
}
+EXPORT_SYMBOL_GPL(ixp46x_ptp_find);
-static void __exit ptp_ixp_exit(void)
+/* Called from the registered devm action */
+static void ptp_ixp_unregister_action(void *d)
{
- free_irq(MASTER_IRQ, &ixp_clock);
- free_irq(SLAVE_IRQ, &ixp_clock);
- ixp46x_phc_index = -1;
- ptp_clock_unregister(ixp_clock.ptp_clock);
+ struct ptp_clock *ptp_clock = d;
+
+ ptp_clock_unregister(ptp_clock);
+ ixp_clock.ptp_clock = NULL;
}
-static int __init ptp_ixp_init(void)
+static int ptp_ixp_probe(struct platform_device *pdev)
{
- if (!cpu_is_ixp46x())
- return -ENODEV;
+ struct device *dev = &pdev->dev;
+ int ret;
- ixp_clock.regs =
- (struct ixp46x_ts_regs __iomem *) IXP4XX_TIMESYNC_BASE_VIRT;
+ ixp_clock.regs = devm_platform_ioremap_resource(pdev, 0);
+ ixp_clock.master_irq = platform_get_irq(pdev, 0);
+ ixp_clock.slave_irq = platform_get_irq(pdev, 1);
+ if (IS_ERR(ixp_clock.regs) ||
+ !ixp_clock.master_irq || !ixp_clock.slave_irq)
+ return -ENXIO;
ixp_clock.caps = ptp_ixp_caps;
@@ -298,32 +283,51 @@ static int __init ptp_ixp_init(void)
if (IS_ERR(ixp_clock.ptp_clock))
return PTR_ERR(ixp_clock.ptp_clock);
- ixp46x_phc_index = ptp_clock_index(ixp_clock.ptp_clock);
+ ret = devm_add_action_or_reset(dev, ptp_ixp_unregister_action,
+ ixp_clock.ptp_clock);
+ if (ret) {
+ dev_err(dev, "failed to install clock removal handler\n");
+ return ret;
+ }
__raw_writel(DEFAULT_ADDEND, &ixp_clock.regs->addend);
__raw_writel(1, &ixp_clock.regs->trgt_lo);
__raw_writel(0, &ixp_clock.regs->trgt_hi);
__raw_writel(TTIPEND, &ixp_clock.regs->event);
- if (MASTER_IRQ != setup_interrupt(MASTER_GPIO)) {
- pr_err("failed to setup gpio %d as irq\n", MASTER_GPIO);
- goto no_master;
- }
- if (SLAVE_IRQ != setup_interrupt(SLAVE_GPIO)) {
- pr_err("failed to setup gpio %d as irq\n", SLAVE_GPIO);
- goto no_slave;
- }
+ ret = devm_request_irq(dev, ixp_clock.master_irq, isr,
+ 0, DRIVER, &ixp_clock);
+ if (ret)
+ return dev_err_probe(dev, ret,
+ "request_irq failed for irq %d\n",
+ ixp_clock.master_irq);
+
+ ret = devm_request_irq(dev, ixp_clock.slave_irq, isr,
+ 0, DRIVER, &ixp_clock);
+ if (ret)
+ return dev_err_probe(dev, ret,
+ "request_irq failed for irq %d\n",
+ ixp_clock.slave_irq);
return 0;
-no_slave:
- free_irq(MASTER_IRQ, &ixp_clock);
-no_master:
- ptp_clock_unregister(ixp_clock.ptp_clock);
- return -ENODEV;
}
-module_init(ptp_ixp_init);
-module_exit(ptp_ixp_exit);
+static const struct of_device_id ptp_ixp_match[] = {
+ {
+ .compatible = "intel,ixp46x-ptp-timer",
+ },
+ { },
+};
+
+static struct platform_driver ptp_ixp_driver = {
+ .driver = {
+ .name = "ptp-ixp46x",
+ .of_match_table = ptp_ixp_match,
+ .suppress_bind_attrs = true,
+ },
+ .probe = ptp_ixp_probe,
+};
+module_platform_driver(ptp_ixp_driver);
MODULE_AUTHOR("Richard Cochran <richardcochran@gmail.com>");
MODULE_DESCRIPTION("PTP clock using the IXP46X timer");
diff --git a/drivers/net/fddi/skfp/skfddi.c b/drivers/net/fddi/skfp/skfddi.c
index 69c29a2ef95d..c5cb421f9890 100644
--- a/drivers/net/fddi/skfp/skfddi.c
+++ b/drivers/net/fddi/skfp/skfddi.c
@@ -70,6 +70,7 @@ static const char * const boot_msg =
/* Include files */
#include <linux/capability.h>
+#include <linux/compat.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/errno.h>
@@ -103,7 +104,8 @@ static struct net_device_stats *skfp_ctl_get_stats(struct net_device *dev);
static void skfp_ctl_set_multicast_list(struct net_device *dev);
static void skfp_ctl_set_multicast_list_wo_lock(struct net_device *dev);
static int skfp_ctl_set_mac_address(struct net_device *dev, void *addr);
-static int skfp_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static int skfp_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+ void __user *data, int cmd);
static netdev_tx_t skfp_send_pkt(struct sk_buff *skb,
struct net_device *dev);
static void send_queued_packets(struct s_smc *smc);
@@ -164,7 +166,7 @@ static const struct net_device_ops skfp_netdev_ops = {
.ndo_get_stats = skfp_ctl_get_stats,
.ndo_set_rx_mode = skfp_ctl_set_multicast_list,
.ndo_set_mac_address = skfp_ctl_set_mac_address,
- .ndo_do_ioctl = skfp_ioctl,
+ .ndo_siocdevprivate = skfp_siocdevprivate,
};
/*
@@ -932,9 +934,9 @@ static int skfp_ctl_set_mac_address(struct net_device *dev, void *addr)
/*
- * ==============
- * = skfp_ioctl =
- * ==============
+ * =======================
+ * = skfp_siocdevprivate =
+ * =======================
*
* Overview:
*
@@ -954,16 +956,19 @@ static int skfp_ctl_set_mac_address(struct net_device *dev, void *addr)
*/
-static int skfp_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int skfp_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd)
{
struct s_smc *smc = netdev_priv(dev);
skfddi_priv *lp = &smc->os;
struct s_skfp_ioctl ioc;
int status = 0;
- if (copy_from_user(&ioc, rq->ifr_data, sizeof(struct s_skfp_ioctl)))
+ if (copy_from_user(&ioc, data, sizeof(struct s_skfp_ioctl)))
return -EFAULT;
+ if (in_compat_syscall())
+ return -EOPNOTSUPP;
+
switch (ioc.cmd) {
case SKFP_GET_STATS: /* Get the driver statistics */
ioc.len = sizeof(lp->MacStat);
@@ -1169,8 +1174,8 @@ static void send_queued_packets(struct s_smc *smc)
txd = (struct s_smt_fp_txd *) HWM_GET_CURR_TXD(smc, queue);
- dma_address = pci_map_single(&bp->pdev, skb->data,
- skb->len, PCI_DMA_TODEVICE);
+ dma_address = dma_map_single(&(&bp->pdev)->dev, skb->data,
+ skb->len, DMA_TO_DEVICE);
if (frame_status & LAN_TX) {
txd->txd_os.skb = skb; // save skb
txd->txd_os.dma_addr = dma_address; // save dma mapping
@@ -1179,8 +1184,8 @@ static void send_queued_packets(struct s_smc *smc)
frame_status | FIRST_FRAG | LAST_FRAG | EN_IRQ_EOF);
if (!(frame_status & LAN_TX)) { // local only frame
- pci_unmap_single(&bp->pdev, dma_address,
- skb->len, PCI_DMA_TODEVICE);
+ dma_unmap_single(&(&bp->pdev)->dev, dma_address,
+ skb->len, DMA_TO_DEVICE);
dev_kfree_skb_irq(skb);
}
spin_unlock_irqrestore(&bp->DriverLock, Flags);
@@ -1462,8 +1467,9 @@ void dma_complete(struct s_smc *smc, volatile union s_fp_descr *descr, int flag)
if (r->rxd_os.skb && r->rxd_os.dma_addr) {
int MaxFrameSize = bp->MaxFrameSize;
- pci_unmap_single(&bp->pdev, r->rxd_os.dma_addr,
- MaxFrameSize, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&(&bp->pdev)->dev,
+ r->rxd_os.dma_addr, MaxFrameSize,
+ DMA_FROM_DEVICE);
r->rxd_os.dma_addr = 0;
}
}
@@ -1498,8 +1504,8 @@ void mac_drv_tx_complete(struct s_smc *smc, volatile struct s_smt_fp_txd *txd)
txd->txd_os.skb = NULL;
// release the DMA mapping
- pci_unmap_single(&smc->os.pdev, txd->txd_os.dma_addr,
- skb->len, PCI_DMA_TODEVICE);
+ dma_unmap_single(&(&smc->os.pdev)->dev, txd->txd_os.dma_addr,
+ skb->len, DMA_TO_DEVICE);
txd->txd_os.dma_addr = 0;
smc->os.MacStat.gen.tx_packets++; // Count transmitted packets.
@@ -1702,10 +1708,9 @@ void mac_drv_requeue_rxd(struct s_smc *smc, volatile struct s_smt_fp_rxd *rxd,
skb_reserve(skb, 3);
skb_put(skb, MaxFrameSize);
v_addr = skb->data;
- b_addr = pci_map_single(&smc->os.pdev,
- v_addr,
- MaxFrameSize,
- PCI_DMA_FROMDEVICE);
+ b_addr = dma_map_single(&(&smc->os.pdev)->dev,
+ v_addr, MaxFrameSize,
+ DMA_FROM_DEVICE);
rxd->rxd_os.dma_addr = b_addr;
} else {
// no skb available, use local buffer
@@ -1718,10 +1723,8 @@ void mac_drv_requeue_rxd(struct s_smc *smc, volatile struct s_smt_fp_rxd *rxd,
// we use skb from old rxd
rxd->rxd_os.skb = skb;
v_addr = skb->data;
- b_addr = pci_map_single(&smc->os.pdev,
- v_addr,
- MaxFrameSize,
- PCI_DMA_FROMDEVICE);
+ b_addr = dma_map_single(&(&smc->os.pdev)->dev, v_addr,
+ MaxFrameSize, DMA_FROM_DEVICE);
rxd->rxd_os.dma_addr = b_addr;
}
hwm_rx_frag(smc, v_addr, b_addr, MaxFrameSize,
@@ -1773,10 +1776,8 @@ void mac_drv_fill_rxd(struct s_smc *smc)
skb_reserve(skb, 3);
skb_put(skb, MaxFrameSize);
v_addr = skb->data;
- b_addr = pci_map_single(&smc->os.pdev,
- v_addr,
- MaxFrameSize,
- PCI_DMA_FROMDEVICE);
+ b_addr = dma_map_single(&(&smc->os.pdev)->dev, v_addr,
+ MaxFrameSize, DMA_FROM_DEVICE);
rxd->rxd_os.dma_addr = b_addr;
} else {
// no skb available, use local buffer
@@ -1833,8 +1834,9 @@ void mac_drv_clear_rxd(struct s_smc *smc, volatile struct s_smt_fp_rxd *rxd,
skfddi_priv *bp = &smc->os;
int MaxFrameSize = bp->MaxFrameSize;
- pci_unmap_single(&bp->pdev, rxd->rxd_os.dma_addr,
- MaxFrameSize, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&(&bp->pdev)->dev,
+ rxd->rxd_os.dma_addr, MaxFrameSize,
+ DMA_FROM_DEVICE);
dev_kfree_skb(skb);
rxd->rxd_os.skb = NULL;
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index fcf3af76b6d7..8fe8887d506a 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -827,6 +827,12 @@ static void decode_data(struct sixpack *sp, unsigned char inbyte)
return;
}
+ if (sp->rx_count_cooked + 2 >= sizeof(sp->cooked_buf)) {
+ pr_err("6pack: cooked buffer overrun, data loss\n");
+ sp->rx_count = 0;
+ return;
+ }
+
buf = sp->raw_buf;
sp->cooked_buf[sp->rx_count_cooked++] =
buf[0] | ((buf[1] << 2) & 0xc0);
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 4435a1195194..775dcf4ebde5 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -1005,7 +1005,8 @@ static int baycom_setmode(struct baycom_state *bc, const char *modestr)
/* --------------------------------------------------------------------- */
-static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int baycom_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
struct baycom_state *bc = netdev_priv(dev);
struct hdlcdrv_ioctl hi;
@@ -1013,7 +1014,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (cmd != SIOCDEVPRIVATE)
return -ENOIOCTLCMD;
- if (copy_from_user(&hi, ifr->ifr_data, sizeof(hi)))
+ if (copy_from_user(&hi, data, sizeof(hi)))
return -EFAULT;
switch (hi.cmd) {
default:
@@ -1104,7 +1105,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return HDLCDRV_PARMASK_IOBASE;
}
- if (copy_to_user(ifr->ifr_data, &hi, sizeof(hi)))
+ if (copy_to_user(data, &hi, sizeof(hi)))
return -EFAULT;
return 0;
}
@@ -1114,7 +1115,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
static const struct net_device_ops baycom_netdev_ops = {
.ndo_open = epp_open,
.ndo_stop = epp_close,
- .ndo_do_ioctl = baycom_ioctl,
+ .ndo_siocdevprivate = baycom_siocdevprivate,
.ndo_start_xmit = baycom_send_packet,
.ndo_set_mac_address = baycom_set_mac_address,
};
diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c
index 6a3dc7b3f28a..fd7da5bb1fa5 100644
--- a/drivers/net/hamradio/baycom_par.c
+++ b/drivers/net/hamradio/baycom_par.c
@@ -380,7 +380,7 @@ static int par96_close(struct net_device *dev)
* ===================== hdlcdrv driver interface =========================
*/
-static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
+static int baycom_ioctl(struct net_device *dev, void __user *data,
struct hdlcdrv_ioctl *hi, int cmd);
/* --------------------------------------------------------------------- */
@@ -408,7 +408,7 @@ static int baycom_setmode(struct baycom_state *bc, const char *modestr)
/* --------------------------------------------------------------------- */
-static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
+static int baycom_ioctl(struct net_device *dev, void __user *data,
struct hdlcdrv_ioctl *hi, int cmd)
{
struct baycom_state *bc;
@@ -428,7 +428,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
case HDLCDRVCTL_GETMODE:
strcpy(hi->data.modename, bc->options ? "par96" : "picpar");
- if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl)))
+ if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
return -EFAULT;
return 0;
@@ -440,7 +440,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
case HDLCDRVCTL_MODELIST:
strcpy(hi->data.modename, "par96,picpar");
- if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl)))
+ if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
return -EFAULT;
return 0;
@@ -449,7 +449,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
}
- if (copy_from_user(&bi, ifr->ifr_data, sizeof(bi)))
+ if (copy_from_user(&bi, data, sizeof(bi)))
return -EFAULT;
switch (bi.cmd) {
default:
@@ -464,7 +464,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
#endif /* BAYCOM_DEBUG */
}
- if (copy_to_user(ifr->ifr_data, &bi, sizeof(bi)))
+ if (copy_to_user(data, &bi, sizeof(bi)))
return -EFAULT;
return 0;
diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c
index 04bb409707fc..646f605e358f 100644
--- a/drivers/net/hamradio/baycom_ser_fdx.c
+++ b/drivers/net/hamradio/baycom_ser_fdx.c
@@ -462,7 +462,7 @@ static int ser12_close(struct net_device *dev)
/* --------------------------------------------------------------------- */
-static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
+static int baycom_ioctl(struct net_device *dev, void __user *data,
struct hdlcdrv_ioctl *hi, int cmd);
/* --------------------------------------------------------------------- */
@@ -497,7 +497,7 @@ static int baycom_setmode(struct baycom_state *bc, const char *modestr)
/* --------------------------------------------------------------------- */
-static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
+static int baycom_ioctl(struct net_device *dev, void __user *data,
struct hdlcdrv_ioctl *hi, int cmd)
{
struct baycom_state *bc;
@@ -519,7 +519,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
sprintf(hi->data.modename, "ser%u", bc->baud / 100);
if (bc->opt_dcd <= 0)
strcat(hi->data.modename, (!bc->opt_dcd) ? "*" : "+");
- if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl)))
+ if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
return -EFAULT;
return 0;
@@ -531,7 +531,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
case HDLCDRVCTL_MODELIST:
strcpy(hi->data.modename, "ser12,ser3,ser24");
- if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl)))
+ if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
return -EFAULT;
return 0;
@@ -540,7 +540,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
}
- if (copy_from_user(&bi, ifr->ifr_data, sizeof(bi)))
+ if (copy_from_user(&bi, data, sizeof(bi)))
return -EFAULT;
switch (bi.cmd) {
default:
@@ -555,7 +555,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
#endif /* BAYCOM_DEBUG */
}
- if (copy_to_user(ifr->ifr_data, &bi, sizeof(bi)))
+ if (copy_to_user(data, &bi, sizeof(bi)))
return -EFAULT;
return 0;
diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c
index a1acb3a47bdb..5d1ab4840753 100644
--- a/drivers/net/hamradio/baycom_ser_hdx.c
+++ b/drivers/net/hamradio/baycom_ser_hdx.c
@@ -521,7 +521,7 @@ static int ser12_close(struct net_device *dev)
/* --------------------------------------------------------------------- */
-static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
+static int baycom_ioctl(struct net_device *dev, void __user *data,
struct hdlcdrv_ioctl *hi, int cmd);
/* --------------------------------------------------------------------- */
@@ -551,7 +551,7 @@ static int baycom_setmode(struct baycom_state *bc, const char *modestr)
/* --------------------------------------------------------------------- */
-static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
+static int baycom_ioctl(struct net_device *dev, void __user *data,
struct hdlcdrv_ioctl *hi, int cmd)
{
struct baycom_state *bc;
@@ -573,7 +573,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
strcpy(hi->data.modename, "ser12");
if (bc->opt_dcd <= 0)
strcat(hi->data.modename, (!bc->opt_dcd) ? "*" : (bc->opt_dcd == -2) ? "@" : "+");
- if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl)))
+ if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
return -EFAULT;
return 0;
@@ -585,7 +585,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
case HDLCDRVCTL_MODELIST:
strcpy(hi->data.modename, "ser12");
- if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl)))
+ if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
return -EFAULT;
return 0;
@@ -594,7 +594,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
}
- if (copy_from_user(&bi, ifr->ifr_data, sizeof(bi)))
+ if (copy_from_user(&bi, data, sizeof(bi)))
return -EFAULT;
switch (bi.cmd) {
default:
@@ -609,7 +609,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
#endif /* BAYCOM_DEBUG */
}
- if (copy_to_user(ifr->ifr_data, &bi, sizeof(bi)))
+ if (copy_to_user(data, &bi, sizeof(bi)))
return -EFAULT;
return 0;
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index 0e623c2e8b2d..d967b0748773 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -314,9 +314,10 @@ static int bpq_set_mac_address(struct net_device *dev, void *addr)
* source ethernet address (broadcast
* or multicast: accept all)
*/
-static int bpq_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int bpq_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
- struct bpq_ethaddr __user *ethaddr = ifr->ifr_data;
+ struct bpq_ethaddr __user *ethaddr = data;
struct bpqdev *bpq = netdev_priv(dev);
struct bpq_req req;
@@ -325,7 +326,7 @@ static int bpq_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
switch (cmd) {
case SIOCSBPQETHOPT:
- if (copy_from_user(&req, ifr->ifr_data, sizeof(struct bpq_req)))
+ if (copy_from_user(&req, data, sizeof(struct bpq_req)))
return -EFAULT;
switch (req.cmd) {
case SIOCGBPQETHPARAM:
@@ -448,7 +449,7 @@ static const struct net_device_ops bpq_netdev_ops = {
.ndo_stop = bpq_close,
.ndo_start_xmit = bpq_xmit,
.ndo_set_mac_address = bpq_set_mac_address,
- .ndo_do_ioctl = bpq_ioctl,
+ .ndo_siocdevprivate = bpq_siocdevprivate,
};
static void bpq_setup(struct net_device *dev)
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index c25c8c99c5c7..b50b7fafd8d6 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -225,7 +225,8 @@ static int read_scc_data(struct scc_priv *priv);
static int scc_open(struct net_device *dev);
static int scc_close(struct net_device *dev);
-static int scc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+static int scc_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd);
static int scc_send_packet(struct sk_buff *skb, struct net_device *dev);
static int scc_set_mac_address(struct net_device *dev, void *sa);
@@ -432,7 +433,7 @@ static const struct net_device_ops scc_netdev_ops = {
.ndo_open = scc_open,
.ndo_stop = scc_close,
.ndo_start_xmit = scc_send_packet,
- .ndo_do_ioctl = scc_ioctl,
+ .ndo_siocdevprivate = scc_siocdevprivate,
.ndo_set_mac_address = scc_set_mac_address,
};
@@ -881,15 +882,13 @@ static int scc_close(struct net_device *dev)
}
-static int scc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int scc_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd)
{
struct scc_priv *priv = dev->ml_priv;
switch (cmd) {
case SIOCGSCCPARAM:
- if (copy_to_user
- (ifr->ifr_data, &priv->param,
- sizeof(struct scc_param)))
+ if (copy_to_user(data, &priv->param, sizeof(struct scc_param)))
return -EFAULT;
return 0;
case SIOCSSCCPARAM:
@@ -897,13 +896,12 @@ static int scc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EPERM;
if (netif_running(dev))
return -EAGAIN;
- if (copy_from_user
- (&priv->param, ifr->ifr_data,
- sizeof(struct scc_param)))
+ if (copy_from_user(&priv->param, data,
+ sizeof(struct scc_param)))
return -EFAULT;
return 0;
default:
- return -EINVAL;
+ return -EOPNOTSUPP;
}
}
diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
index cbaf1cdde7cb..5805cfc83854 100644
--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c
@@ -483,23 +483,25 @@ static int hdlcdrv_close(struct net_device *dev)
/* --------------------------------------------------------------------- */
-static int hdlcdrv_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int hdlcdrv_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
struct hdlcdrv_state *s = netdev_priv(dev);
struct hdlcdrv_ioctl bi;
- if (cmd != SIOCDEVPRIVATE) {
- if (s->ops && s->ops->ioctl)
- return s->ops->ioctl(dev, ifr, &bi, cmd);
+ if (cmd != SIOCDEVPRIVATE)
return -ENOIOCTLCMD;
- }
- if (copy_from_user(&bi, ifr->ifr_data, sizeof(bi)))
+
+ if (in_compat_syscall()) /* to be implemented */
+ return -ENOIOCTLCMD;
+
+ if (copy_from_user(&bi, data, sizeof(bi)))
return -EFAULT;
switch (bi.cmd) {
default:
if (s->ops && s->ops->ioctl)
- return s->ops->ioctl(dev, ifr, &bi, cmd);
+ return s->ops->ioctl(dev, data, &bi, cmd);
return -ENOIOCTLCMD;
case HDLCDRVCTL_GETCHANNELPAR:
@@ -605,7 +607,7 @@ static int hdlcdrv_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
- if (copy_to_user(ifr->ifr_data, &bi, sizeof(bi)))
+ if (copy_to_user(data, &bi, sizeof(bi)))
return -EFAULT;
return 0;
@@ -617,7 +619,7 @@ static const struct net_device_ops hdlcdrv_netdev = {
.ndo_open = hdlcdrv_open,
.ndo_stop = hdlcdrv_close,
.ndo_start_xmit = hdlcdrv_send_packet,
- .ndo_do_ioctl = hdlcdrv_ioctl,
+ .ndo_siocdevprivate = hdlcdrv_siocdevprivate,
.ndo_set_mac_address = hdlcdrv_set_mac_address,
};
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 3f1edd0526a4..e0bb131a33d7 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -210,7 +210,8 @@ static int scc_net_close(struct net_device *dev);
static void scc_net_rx(struct scc_channel *scc, struct sk_buff *skb);
static netdev_tx_t scc_net_tx(struct sk_buff *skb,
struct net_device *dev);
-static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+static int scc_net_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd);
static int scc_net_set_mac_address(struct net_device *dev, void *addr);
static struct net_device_stats * scc_net_get_stats(struct net_device *dev);
@@ -1550,7 +1551,7 @@ static const struct net_device_ops scc_netdev_ops = {
.ndo_start_xmit = scc_net_tx,
.ndo_set_mac_address = scc_net_set_mac_address,
.ndo_get_stats = scc_net_get_stats,
- .ndo_do_ioctl = scc_net_ioctl,
+ .ndo_siocdevprivate = scc_net_siocdevprivate,
};
/* ----> Initialize device <----- */
@@ -1703,7 +1704,8 @@ static netdev_tx_t scc_net_tx(struct sk_buff *skb, struct net_device *dev)
* SIOCSCCCAL - send calib. pattern arg: (struct scc_calibrate *) arg
*/
-static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int scc_net_siocdevprivate(struct net_device *dev,
+ struct ifreq *ifr, void __user *arg, int cmd)
{
struct scc_kiss_cmd kiss_cmd;
struct scc_mem_config memcfg;
@@ -1712,8 +1714,6 @@ static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
struct scc_channel *scc = (struct scc_channel *) dev->ml_priv;
int chan;
unsigned char device_name[IFNAMSIZ];
- void __user *arg = ifr->ifr_data;
-
if (!Driver_Initialized)
{
@@ -1722,6 +1722,9 @@ static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
int found = 1;
if (!capable(CAP_SYS_RAWIO)) return -EPERM;
+ if (in_compat_syscall())
+ return -EOPNOTSUPP;
+
if (!arg) return -EFAULT;
if (Nchips >= SCC_MAXCHIPS)
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index d4911041596c..6ddacbdb224b 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -920,15 +920,15 @@ static int yam_close(struct net_device *dev)
/* --------------------------------------------------------------------- */
-static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int yam_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd)
{
struct yam_port *yp = netdev_priv(dev);
struct yamdrv_ioctl_cfg yi;
struct yamdrv_ioctl_mcs *ym;
int ioctl_cmd;
- if (copy_from_user(&ioctl_cmd, ifr->ifr_data, sizeof(int)))
- return -EFAULT;
+ if (copy_from_user(&ioctl_cmd, data, sizeof(int)))
+ return -EFAULT;
if (yp->magic != YAM_MAGIC)
return -EINVAL;
@@ -947,8 +947,7 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCYAMSMCS:
if (netif_running(dev))
return -EINVAL; /* Cannot change this parameter when up */
- ym = memdup_user(ifr->ifr_data,
- sizeof(struct yamdrv_ioctl_mcs));
+ ym = memdup_user(data, sizeof(struct yamdrv_ioctl_mcs));
if (IS_ERR(ym))
return PTR_ERR(ym);
if (ym->cmd != SIOCYAMSMCS)
@@ -965,8 +964,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCYAMSCFG:
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
- if (copy_from_user(&yi, ifr->ifr_data, sizeof(struct yamdrv_ioctl_cfg)))
- return -EFAULT;
+ if (copy_from_user(&yi, data, sizeof(struct yamdrv_ioctl_cfg)))
+ return -EFAULT;
if (yi.cmd != SIOCYAMSCFG)
return -EINVAL;
@@ -1045,8 +1044,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
yi.cfg.txtail = yp->txtail;
yi.cfg.persist = yp->pers;
yi.cfg.slottime = yp->slot;
- if (copy_to_user(ifr->ifr_data, &yi, sizeof(struct yamdrv_ioctl_cfg)))
- return -EFAULT;
+ if (copy_to_user(data, &yi, sizeof(struct yamdrv_ioctl_cfg)))
+ return -EFAULT;
break;
default:
@@ -1074,7 +1073,7 @@ static const struct net_device_ops yam_netdev_ops = {
.ndo_open = yam_open,
.ndo_stop = yam_close,
.ndo_start_xmit = yam_send_packet,
- .ndo_do_ioctl = yam_ioctl,
+ .ndo_siocdevprivate = yam_siocdevprivate,
.ndo_set_mac_address = yam_set_mac_address,
};
diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c
index 22010384c4a3..7661dbb31162 100644
--- a/drivers/net/hippi/rrunner.c
+++ b/drivers/net/hippi/rrunner.c
@@ -63,7 +63,7 @@ static const char version[] =
static const struct net_device_ops rr_netdev_ops = {
.ndo_open = rr_open,
.ndo_stop = rr_close,
- .ndo_do_ioctl = rr_ioctl,
+ .ndo_siocdevprivate = rr_siocdevprivate,
.ndo_start_xmit = rr_start_xmit,
.ndo_set_mac_address = hippi_mac_addr,
};
@@ -1568,7 +1568,8 @@ out:
}
-static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int rr_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+ void __user *data, int cmd)
{
struct rr_private *rrpriv;
unsigned char *image, *oldimage;
@@ -1603,7 +1604,7 @@ static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
error = -EFAULT;
goto gf_out;
}
- error = copy_to_user(rq->ifr_data, image, EEPROM_BYTES);
+ error = copy_to_user(data, image, EEPROM_BYTES);
if (error)
error = -EFAULT;
gf_out:
@@ -1615,7 +1616,7 @@ static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
return -EPERM;
}
- image = memdup_user(rq->ifr_data, EEPROM_BYTES);
+ image = memdup_user(data, EEPROM_BYTES);
if (IS_ERR(image))
return PTR_ERR(image);
@@ -1658,7 +1659,7 @@ static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
return error;
case SIOCRRID:
- return put_user(0x52523032, (int __user *)rq->ifr_data);
+ return put_user(0x52523032, (int __user *)data);
default:
return error;
}
diff --git a/drivers/net/hippi/rrunner.h b/drivers/net/hippi/rrunner.h
index 87533784604f..55377614e752 100644
--- a/drivers/net/hippi/rrunner.h
+++ b/drivers/net/hippi/rrunner.h
@@ -835,7 +835,8 @@ static int rr_open(struct net_device *dev);
static netdev_tx_t rr_start_xmit(struct sk_buff *skb,
struct net_device *dev);
static int rr_close(struct net_device *dev);
-static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static int rr_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+ void __user *data, int cmd);
static unsigned int rr_read_eeprom(struct rr_private *rrpriv,
unsigned long offset,
unsigned char *buf,
diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c
index ebc976b7fcc2..8caa61ec718f 100644
--- a/drivers/net/ieee802154/mac802154_hwsim.c
+++ b/drivers/net/ieee802154/mac802154_hwsim.c
@@ -418,7 +418,7 @@ static int hwsim_new_edge_nl(struct sk_buff *msg, struct genl_info *info)
struct hwsim_edge *e;
u32 v0, v1;
- if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] &&
+ if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] ||
!info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE])
return -EINVAL;
@@ -528,14 +528,14 @@ static int hwsim_set_edge_lqi(struct sk_buff *msg, struct genl_info *info)
u32 v0, v1;
u8 lqi;
- if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] &&
+ if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] ||
!info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE])
return -EINVAL;
if (nla_parse_nested_deprecated(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL))
return -EINVAL;
- if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID] &&
+ if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID] ||
!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_LQI])
return -EINVAL;
diff --git a/drivers/net/ipa/Makefile b/drivers/net/ipa/Makefile
index 506f8d5cd4ee..bdfb2430ab2c 100644
--- a/drivers/net/ipa/Makefile
+++ b/drivers/net/ipa/Makefile
@@ -1,9 +1,6 @@
-# Un-comment the next line if you want to validate configuration data
-#ccflags-y += -DIPA_VALIDATE
-
obj-$(CONFIG_QCOM_IPA) += ipa.o
-ipa-y := ipa_main.o ipa_clock.o ipa_reg.o ipa_mem.o \
+ipa-y := ipa_main.o ipa_power.o ipa_reg.o ipa_mem.o \
ipa_table.o ipa_interrupt.o gsi.o gsi_trans.o \
ipa_gsi.o ipa_smp2p.o ipa_uc.o \
ipa_endpoint.o ipa_cmd.o ipa_modem.o \
diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 427c68b2ad8f..a2fcdb1abdb9 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -198,77 +198,6 @@ static void gsi_irq_type_disable(struct gsi *gsi, enum gsi_irq_type_id type_id)
gsi_irq_type_update(gsi, gsi->type_enabled_bitmap & ~BIT(type_id));
}
-/* Turn off all GSI interrupts initially; there is no gsi_irq_teardown() */
-static void gsi_irq_setup(struct gsi *gsi)
-{
- /* Disable all interrupt types */
- gsi_irq_type_update(gsi, 0);
-
- /* Clear all type-specific interrupt masks */
- iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
- iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
- iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET);
- iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
-
- /* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */
- if (gsi->version > IPA_VERSION_3_1) {
- u32 offset;
-
- /* These registers are in the non-adjusted address range */
- offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET;
- iowrite32(0, gsi->virt_raw + offset);
- offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET;
- iowrite32(0, gsi->virt_raw + offset);
- }
-
- iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET);
-}
-
-/* Get # supported channel and event rings; there is no gsi_ring_teardown() */
-static int gsi_ring_setup(struct gsi *gsi)
-{
- struct device *dev = gsi->dev;
- u32 count;
- u32 val;
-
- if (gsi->version < IPA_VERSION_3_5_1) {
- /* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */
- gsi->channel_count = GSI_CHANNEL_COUNT_MAX;
- gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX;
-
- return 0;
- }
-
- val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET);
-
- count = u32_get_bits(val, NUM_CH_PER_EE_FMASK);
- if (!count) {
- dev_err(dev, "GSI reports zero channels supported\n");
- return -EINVAL;
- }
- if (count > GSI_CHANNEL_COUNT_MAX) {
- dev_warn(dev, "limiting to %u channels; hardware supports %u\n",
- GSI_CHANNEL_COUNT_MAX, count);
- count = GSI_CHANNEL_COUNT_MAX;
- }
- gsi->channel_count = count;
-
- count = u32_get_bits(val, NUM_EV_PER_EE_FMASK);
- if (!count) {
- dev_err(dev, "GSI reports zero event rings supported\n");
- return -EINVAL;
- }
- if (count > GSI_EVT_RING_COUNT_MAX) {
- dev_warn(dev,
- "limiting to %u event rings; hardware supports %u\n",
- GSI_EVT_RING_COUNT_MAX, count);
- count = GSI_EVT_RING_COUNT_MAX;
- }
- gsi->evt_ring_count = count;
-
- return 0;
-}
-
/* Event ring commands are performed one at a time. Their completion
* is signaled by the event ring control GSI interrupt type, which is
* only enabled when we issue an event ring command. Only the event
@@ -920,12 +849,13 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
/* All done! */
}
-static int __gsi_channel_start(struct gsi_channel *channel, bool start)
+static int __gsi_channel_start(struct gsi_channel *channel, bool resume)
{
struct gsi *gsi = channel->gsi;
int ret;
- if (!start)
+ /* Prior to IPA v4.0 suspend/resume is not implemented by GSI */
+ if (resume && gsi->version < IPA_VERSION_4_0)
return 0;
mutex_lock(&gsi->mutex);
@@ -947,7 +877,7 @@ int gsi_channel_start(struct gsi *gsi, u32 channel_id)
napi_enable(&channel->napi);
gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id);
- ret = __gsi_channel_start(channel, true);
+ ret = __gsi_channel_start(channel, false);
if (ret) {
gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id);
napi_disable(&channel->napi);
@@ -971,7 +901,7 @@ static int gsi_channel_stop_retry(struct gsi_channel *channel)
return ret;
}
-static int __gsi_channel_stop(struct gsi_channel *channel, bool stop)
+static int __gsi_channel_stop(struct gsi_channel *channel, bool suspend)
{
struct gsi *gsi = channel->gsi;
int ret;
@@ -979,7 +909,8 @@ static int __gsi_channel_stop(struct gsi_channel *channel, bool stop)
/* Wait for any underway transactions to complete before stopping. */
gsi_channel_trans_quiesce(channel);
- if (!stop)
+ /* Prior to IPA v4.0 suspend/resume is not implemented by GSI */
+ if (suspend && gsi->version < IPA_VERSION_4_0)
return 0;
mutex_lock(&gsi->mutex);
@@ -997,7 +928,7 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id)
struct gsi_channel *channel = &gsi->channel[channel_id];
int ret;
- ret = __gsi_channel_stop(channel, true);
+ ret = __gsi_channel_stop(channel, false);
if (ret)
return ret;
@@ -1026,13 +957,13 @@ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell)
mutex_unlock(&gsi->mutex);
}
-/* Stop a STARTED channel for suspend (using stop if requested) */
-int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop)
+/* Stop a started channel for suspend */
+int gsi_channel_suspend(struct gsi *gsi, u32 channel_id)
{
struct gsi_channel *channel = &gsi->channel[channel_id];
int ret;
- ret = __gsi_channel_stop(channel, stop);
+ ret = __gsi_channel_stop(channel, true);
if (ret)
return ret;
@@ -1042,12 +973,24 @@ int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop)
return 0;
}
-/* Resume a suspended channel (starting will be requested if STOPPED) */
-int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start)
+/* Resume a suspended channel (starting if stopped) */
+int gsi_channel_resume(struct gsi *gsi, u32 channel_id)
{
struct gsi_channel *channel = &gsi->channel[channel_id];
- return __gsi_channel_start(channel, start);
+ return __gsi_channel_start(channel, true);
+}
+
+/* Prevent all GSI interrupts while suspended */
+void gsi_suspend(struct gsi *gsi)
+{
+ disable_irq(gsi->irq);
+}
+
+/* Allow all GSI interrupts again when resuming */
+void gsi_resume(struct gsi *gsi)
+{
+ enable_irq(gsi->irq);
}
/**
@@ -1372,33 +1315,20 @@ static irqreturn_t gsi_isr(int irq, void *dev_id)
return IRQ_HANDLED;
}
+/* Init function for GSI IRQ lookup; there is no gsi_irq_exit() */
static int gsi_irq_init(struct gsi *gsi, struct platform_device *pdev)
{
- struct device *dev = &pdev->dev;
- unsigned int irq;
int ret;
ret = platform_get_irq_byname(pdev, "gsi");
if (ret <= 0)
return ret ? : -EINVAL;
- irq = ret;
-
- ret = request_irq(irq, gsi_isr, 0, "gsi", gsi);
- if (ret) {
- dev_err(dev, "error %d requesting \"gsi\" IRQ\n", ret);
- return ret;
- }
- gsi->irq = irq;
+ gsi->irq = ret;
return 0;
}
-static void gsi_irq_exit(struct gsi *gsi)
-{
- free_irq(gsi->irq, gsi);
-}
-
/* Return the transaction associated with a transfer completion event */
static struct gsi_trans *gsi_event_trans(struct gsi_channel *channel,
struct gsi_event *event)
@@ -1876,6 +1806,93 @@ static void gsi_channel_teardown(struct gsi *gsi)
gsi_irq_disable(gsi);
}
+/* Turn off all GSI interrupts initially */
+static int gsi_irq_setup(struct gsi *gsi)
+{
+ int ret;
+
+ /* Writing 1 indicates IRQ interrupts; 0 would be MSI */
+ iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET);
+
+ /* Disable all interrupt types */
+ gsi_irq_type_update(gsi, 0);
+
+ /* Clear all type-specific interrupt masks */
+ iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
+ iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
+ iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET);
+ iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
+
+ /* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */
+ if (gsi->version > IPA_VERSION_3_1) {
+ u32 offset;
+
+ /* These registers are in the non-adjusted address range */
+ offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET;
+ iowrite32(0, gsi->virt_raw + offset);
+ offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET;
+ iowrite32(0, gsi->virt_raw + offset);
+ }
+
+ iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET);
+
+ ret = request_irq(gsi->irq, gsi_isr, 0, "gsi", gsi);
+ if (ret)
+ dev_err(gsi->dev, "error %d requesting \"gsi\" IRQ\n", ret);
+
+ return ret;
+}
+
+static void gsi_irq_teardown(struct gsi *gsi)
+{
+ free_irq(gsi->irq, gsi);
+}
+
+/* Get # supported channel and event rings; there is no gsi_ring_teardown() */
+static int gsi_ring_setup(struct gsi *gsi)
+{
+ struct device *dev = gsi->dev;
+ u32 count;
+ u32 val;
+
+ if (gsi->version < IPA_VERSION_3_5_1) {
+ /* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */
+ gsi->channel_count = GSI_CHANNEL_COUNT_MAX;
+ gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX;
+
+ return 0;
+ }
+
+ val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET);
+
+ count = u32_get_bits(val, NUM_CH_PER_EE_FMASK);
+ if (!count) {
+ dev_err(dev, "GSI reports zero channels supported\n");
+ return -EINVAL;
+ }
+ if (count > GSI_CHANNEL_COUNT_MAX) {
+ dev_warn(dev, "limiting to %u channels; hardware supports %u\n",
+ GSI_CHANNEL_COUNT_MAX, count);
+ count = GSI_CHANNEL_COUNT_MAX;
+ }
+ gsi->channel_count = count;
+
+ count = u32_get_bits(val, NUM_EV_PER_EE_FMASK);
+ if (!count) {
+ dev_err(dev, "GSI reports zero event rings supported\n");
+ return -EINVAL;
+ }
+ if (count > GSI_EVT_RING_COUNT_MAX) {
+ dev_warn(dev,
+ "limiting to %u event rings; hardware supports %u\n",
+ GSI_EVT_RING_COUNT_MAX, count);
+ count = GSI_EVT_RING_COUNT_MAX;
+ }
+ gsi->evt_ring_count = count;
+
+ return 0;
+}
+
/* Setup function for GSI. GSI firmware must be loaded and initialized */
int gsi_setup(struct gsi *gsi)
{
@@ -1889,25 +1906,34 @@ int gsi_setup(struct gsi *gsi)
return -EIO;
}
- gsi_irq_setup(gsi); /* No matching teardown required */
+ ret = gsi_irq_setup(gsi);
+ if (ret)
+ return ret;
ret = gsi_ring_setup(gsi); /* No matching teardown required */
if (ret)
- return ret;
+ goto err_irq_teardown;
/* Initialize the error log */
iowrite32(0, gsi->virt + GSI_ERROR_LOG_OFFSET);
- /* Writing 1 indicates IRQ interrupts; 0 would be MSI */
- iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET);
+ ret = gsi_channel_setup(gsi);
+ if (ret)
+ goto err_irq_teardown;
- return gsi_channel_setup(gsi);
+ return 0;
+
+err_irq_teardown:
+ gsi_irq_teardown(gsi);
+
+ return ret;
}
/* Inverse of gsi_setup() */
void gsi_teardown(struct gsi *gsi)
{
gsi_channel_teardown(gsi);
+ gsi_irq_teardown(gsi);
}
/* Initialize a channel's event ring */
@@ -1964,7 +1990,6 @@ static void gsi_evt_ring_init(struct gsi *gsi)
static bool gsi_channel_data_valid(struct gsi *gsi,
const struct ipa_gsi_endpoint_data *data)
{
-#ifdef IPA_VALIDATION
u32 channel_id = data->channel_id;
struct device *dev = gsi->dev;
@@ -2010,7 +2035,6 @@ static bool gsi_channel_data_valid(struct gsi *gsi,
channel_id, data->channel.event_count);
return false;
}
-#endif /* IPA_VALIDATION */
return true;
}
@@ -2206,20 +2230,18 @@ int gsi_init(struct gsi *gsi, struct platform_device *pdev,
init_completion(&gsi->completion);
- ret = gsi_irq_init(gsi, pdev);
+ ret = gsi_irq_init(gsi, pdev); /* No matching exit required */
if (ret)
goto err_iounmap;
ret = gsi_channel_init(gsi, count, data);
if (ret)
- goto err_irq_exit;
+ goto err_iounmap;
mutex_init(&gsi->mutex);
return 0;
-err_irq_exit:
- gsi_irq_exit(gsi);
err_iounmap:
iounmap(gsi->virt_raw);
@@ -2231,7 +2253,6 @@ void gsi_exit(struct gsi *gsi)
{
mutex_destroy(&gsi->mutex);
gsi_channel_exit(gsi);
- gsi_irq_exit(gsi);
iounmap(gsi->virt_raw);
}
diff --git a/drivers/net/ipa/gsi.h b/drivers/net/ipa/gsi.h
index 81cd7b07f6e1..88b80dc3db79 100644
--- a/drivers/net/ipa/gsi.h
+++ b/drivers/net/ipa/gsi.h
@@ -232,8 +232,35 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id);
*/
void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell);
-int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop);
-int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start);
+/**
+ * gsi_suspend() - Prepare the GSI subsystem for suspend
+ * @gsi: GSI pointer
+ */
+void gsi_suspend(struct gsi *gsi);
+
+/**
+ * gsi_resume() - Resume the GSI subsystem following suspend
+ * @gsi: GSI pointer
+ */
+void gsi_resume(struct gsi *gsi);
+
+/**
+ * gsi_channel_suspend() - Suspend a GSI channel
+ * @gsi: GSI pointer
+ * @channel_id: Channel to suspend
+ *
+ * For IPA v4.0+, suspend is implemented by stopping the channel.
+ */
+int gsi_channel_suspend(struct gsi *gsi, u32 channel_id);
+
+/**
+ * gsi_channel_resume() - Resume a suspended GSI channel
+ * @gsi: GSI pointer
+ * @channel_id: Channel to resume
+ *
+ * For IPA v4.0+, the stopped channel is started again.
+ */
+int gsi_channel_resume(struct gsi *gsi, u32 channel_id);
/**
* gsi_init() - Initialize the GSI subsystem
diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c
index 8c795a6a8598..1544564bc283 100644
--- a/drivers/net/ipa/gsi_trans.c
+++ b/drivers/net/ipa/gsi_trans.c
@@ -90,14 +90,12 @@ int gsi_trans_pool_init(struct gsi_trans_pool *pool, size_t size, u32 count,
{
void *virt;
-#ifdef IPA_VALIDATE
if (!size)
return -EINVAL;
if (count < max_alloc)
return -EINVAL;
if (!max_alloc)
return -EINVAL;
-#endif /* IPA_VALIDATE */
/* By allocating a few extra entries in our pool (one less
* than the maximum number that will be requested in a
@@ -140,14 +138,12 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool,
dma_addr_t addr;
void *virt;
-#ifdef IPA_VALIDATE
if (!size)
return -EINVAL;
if (count < max_alloc)
return -EINVAL;
if (!max_alloc)
return -EINVAL;
-#endif /* IPA_VALIDATE */
/* Don't let allocations cross a power-of-two boundary */
size = __roundup_pow_of_two(size);
@@ -188,8 +184,8 @@ static u32 gsi_trans_pool_alloc_common(struct gsi_trans_pool *pool, u32 count)
{
u32 offset;
- /* assert(count > 0); */
- /* assert(count <= pool->max_alloc); */
+ WARN_ON(!count);
+ WARN_ON(count > pool->max_alloc);
/* Allocate from beginning if wrap would occur */
if (count > pool->count - pool->free)
@@ -225,9 +221,10 @@ void *gsi_trans_pool_next(struct gsi_trans_pool *pool, void *element)
{
void *end = pool->base + pool->count * pool->size;
- /* assert(element >= pool->base); */
- /* assert(element < end); */
- /* assert(pool->max_alloc == 1); */
+ WARN_ON(element < pool->base);
+ WARN_ON(element >= end);
+ WARN_ON(pool->max_alloc != 1);
+
element += pool->size;
return element < end ? element : pool->base;
@@ -332,7 +329,8 @@ struct gsi_trans *gsi_channel_trans_alloc(struct gsi *gsi, u32 channel_id,
struct gsi_trans_info *trans_info;
struct gsi_trans *trans;
- /* assert(tre_count <= gsi_channel_trans_tre_max(gsi, channel_id)); */
+ if (WARN_ON(tre_count > gsi_channel_trans_tre_max(gsi, channel_id)))
+ return NULL;
trans_info = &channel->trans_info;
@@ -408,7 +406,7 @@ void gsi_trans_cmd_add(struct gsi_trans *trans, void *buf, u32 size,
u32 which = trans->used++;
struct scatterlist *sg;
- /* assert(which < trans->tre_count); */
+ WARN_ON(which >= trans->tre_count);
/* Commands are quite different from data transfer requests.
* Their payloads come from a pool whose memory is allocated
@@ -441,8 +439,10 @@ int gsi_trans_page_add(struct gsi_trans *trans, struct page *page, u32 size,
struct scatterlist *sg = &trans->sgl[0];
int ret;
- /* assert(trans->tre_count == 1); */
- /* assert(!trans->used); */
+ if (WARN_ON(trans->tre_count != 1))
+ return -EINVAL;
+ if (WARN_ON(trans->used))
+ return -EINVAL;
sg_set_page(sg, page, size, offset);
ret = dma_map_sg(trans->gsi->dev, sg, 1, trans->direction);
@@ -461,8 +461,10 @@ int gsi_trans_skb_add(struct gsi_trans *trans, struct sk_buff *skb)
u32 used;
int ret;
- /* assert(trans->tre_count == 1); */
- /* assert(!trans->used); */
+ if (WARN_ON(trans->tre_count != 1))
+ return -EINVAL;
+ if (WARN_ON(trans->used))
+ return -EINVAL;
/* skb->len will not be 0 (checked early) */
ret = skb_to_sgvec(skb, sg, 0, skb->len);
@@ -550,7 +552,7 @@ static void __gsi_trans_commit(struct gsi_trans *trans, bool ring_db)
u32 avail;
u32 i;
- /* assert(trans->used > 0); */
+ WARN_ON(!trans->used);
/* Consume the entries. If we cross the end of the ring while
* filling them we'll switch to the beginning to finish.
diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h
index 744406832a77..9fc880eb7e3a 100644
--- a/drivers/net/ipa/ipa.h
+++ b/drivers/net/ipa/ipa.h
@@ -23,34 +23,24 @@ struct icc_path;
struct net_device;
struct platform_device;
-struct ipa_clock;
+struct ipa_power;
struct ipa_smp2p;
struct ipa_interrupt;
/**
- * enum ipa_flag - IPA state flags
- * @IPA_FLAG_RESUMED: Whether resume from suspend has been signaled
- * @IPA_FLAG_COUNT: Number of defined IPA flags
- */
-enum ipa_flag {
- IPA_FLAG_RESUMED,
- IPA_FLAG_COUNT, /* Last; not a flag */
-};
-
-/**
* struct ipa - IPA information
* @gsi: Embedded GSI structure
- * @flags: Boolean state flags
* @version: IPA hardware version
* @pdev: Platform device
* @completion: Used to signal pipeline clear transfer complete
* @nb: Notifier block used for remoteproc SSR
* @notifier: Remoteproc SSR notifier
* @smp2p: SMP2P information
- * @clock: IPA clocking information
+ * @power: IPA power information
* @table_addr: DMA address of filter/route table content
* @table_virt: Virtual address of filter/route table content
* @interrupt: IPA Interrupt information
+ * @uc_powered: true if power is active by proxy for microcontroller
* @uc_loaded: true after microcontroller has reported it's ready
* @reg_addr: DMA address used for IPA register access
* @reg_virt: Virtual address used for IPA register access
@@ -82,19 +72,19 @@ enum ipa_flag {
*/
struct ipa {
struct gsi gsi;
- DECLARE_BITMAP(flags, IPA_FLAG_COUNT);
enum ipa_version version;
struct platform_device *pdev;
struct completion completion;
struct notifier_block nb;
void *notifier;
struct ipa_smp2p *smp2p;
- struct ipa_clock *clock;
+ struct ipa_power *power;
dma_addr_t table_addr;
__le64 *table_virt;
struct ipa_interrupt *interrupt;
+ bool uc_powered;
bool uc_loaded;
dma_addr_t reg_addr;
@@ -144,11 +134,11 @@ struct ipa {
*
* Activities performed at the init stage can be done without requiring
* any access to IPA hardware. Activities performed at the config stage
- * require the IPA clock to be running, because they involve access
- * to IPA registers. The setup stage is performed only after the GSI
- * hardware is ready (more on this below). The setup stage allows
- * the AP to perform more complex initialization by issuing "immediate
- * commands" using a special interface to the IPA.
+ * require IPA power, because they involve access to IPA registers.
+ * The setup stage is performed only after the GSI hardware is ready
+ * (more on this below). The setup stage allows the AP to perform
+ * more complex initialization by issuing "immediate commands" using
+ * a special interface to the IPA.
*
* This function, @ipa_setup(), starts the setup stage.
*
diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c
deleted file mode 100644
index 69ef6ea41e61..000000000000
--- a/drivers/net/ipa/ipa_clock.c
+++ /dev/null
@@ -1,331 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
- * Copyright (C) 2018-2021 Linaro Ltd.
- */
-
-#include <linux/refcount.h>
-#include <linux/mutex.h>
-#include <linux/clk.h>
-#include <linux/device.h>
-#include <linux/interconnect.h>
-
-#include "ipa.h"
-#include "ipa_clock.h"
-#include "ipa_modem.h"
-#include "ipa_data.h"
-
-/**
- * DOC: IPA Clocking
- *
- * The "IPA Clock" manages both the IPA core clock and the interconnects
- * (buses) the IPA depends on as a single logical entity. A reference count
- * is incremented by "get" operations and decremented by "put" operations.
- * Transitions of that count from 0 to 1 result in the clock and interconnects
- * being enabled, and transitions of the count from 1 to 0 cause them to be
- * disabled. We currently operate the core clock at a fixed clock rate, and
- * all buses at a fixed average and peak bandwidth. As more advanced IPA
- * features are enabled, we can make better use of clock and bus scaling.
- *
- * An IPA clock reference must be held for any access to IPA hardware.
- */
-
-/**
- * struct ipa_interconnect - IPA interconnect information
- * @path: Interconnect path
- * @average_bandwidth: Average interconnect bandwidth (KB/second)
- * @peak_bandwidth: Peak interconnect bandwidth (KB/second)
- */
-struct ipa_interconnect {
- struct icc_path *path;
- u32 average_bandwidth;
- u32 peak_bandwidth;
-};
-
-/**
- * struct ipa_clock - IPA clocking information
- * @count: Clocking reference count
- * @mutex: Protects clock enable/disable
- * @core: IPA core clock
- * @interconnect_count: Number of elements in interconnect[]
- * @interconnect: Interconnect array
- */
-struct ipa_clock {
- refcount_t count;
- struct mutex mutex; /* protects clock enable/disable */
- struct clk *core;
- u32 interconnect_count;
- struct ipa_interconnect *interconnect;
-};
-
-static int ipa_interconnect_init_one(struct device *dev,
- struct ipa_interconnect *interconnect,
- const struct ipa_interconnect_data *data)
-{
- struct icc_path *path;
-
- path = of_icc_get(dev, data->name);
- if (IS_ERR(path)) {
- int ret = PTR_ERR(path);
-
- dev_err_probe(dev, ret, "error getting %s interconnect\n",
- data->name);
-
- return ret;
- }
-
- interconnect->path = path;
- interconnect->average_bandwidth = data->average_bandwidth;
- interconnect->peak_bandwidth = data->peak_bandwidth;
-
- return 0;
-}
-
-static void ipa_interconnect_exit_one(struct ipa_interconnect *interconnect)
-{
- icc_put(interconnect->path);
- memset(interconnect, 0, sizeof(*interconnect));
-}
-
-/* Initialize interconnects required for IPA operation */
-static int ipa_interconnect_init(struct ipa_clock *clock, struct device *dev,
- const struct ipa_interconnect_data *data)
-{
- struct ipa_interconnect *interconnect;
- u32 count;
- int ret;
-
- count = clock->interconnect_count;
- interconnect = kcalloc(count, sizeof(*interconnect), GFP_KERNEL);
- if (!interconnect)
- return -ENOMEM;
- clock->interconnect = interconnect;
-
- while (count--) {
- ret = ipa_interconnect_init_one(dev, interconnect, data++);
- if (ret)
- goto out_unwind;
- interconnect++;
- }
-
- return 0;
-
-out_unwind:
- while (interconnect-- > clock->interconnect)
- ipa_interconnect_exit_one(interconnect);
- kfree(clock->interconnect);
- clock->interconnect = NULL;
-
- return ret;
-}
-
-/* Inverse of ipa_interconnect_init() */
-static void ipa_interconnect_exit(struct ipa_clock *clock)
-{
- struct ipa_interconnect *interconnect;
-
- interconnect = clock->interconnect + clock->interconnect_count;
- while (interconnect-- > clock->interconnect)
- ipa_interconnect_exit_one(interconnect);
- kfree(clock->interconnect);
- clock->interconnect = NULL;
-}
-
-/* Currently we only use one bandwidth level, so just "enable" interconnects */
-static int ipa_interconnect_enable(struct ipa *ipa)
-{
- struct ipa_interconnect *interconnect;
- struct ipa_clock *clock = ipa->clock;
- int ret;
- u32 i;
-
- interconnect = clock->interconnect;
- for (i = 0; i < clock->interconnect_count; i++) {
- ret = icc_set_bw(interconnect->path,
- interconnect->average_bandwidth,
- interconnect->peak_bandwidth);
- if (ret)
- goto out_unwind;
- interconnect++;
- }
-
- return 0;
-
-out_unwind:
- while (interconnect-- > clock->interconnect)
- (void)icc_set_bw(interconnect->path, 0, 0);
-
- return ret;
-}
-
-/* To disable an interconnect, we just its bandwidth to 0 */
-static void ipa_interconnect_disable(struct ipa *ipa)
-{
- struct ipa_interconnect *interconnect;
- struct ipa_clock *clock = ipa->clock;
- int result = 0;
- u32 count;
- int ret;
-
- count = clock->interconnect_count;
- interconnect = clock->interconnect + count;
- while (count--) {
- interconnect--;
- ret = icc_set_bw(interconnect->path, 0, 0);
- if (ret && !result)
- result = ret;
- }
-
- if (result)
- dev_err(&ipa->pdev->dev,
- "error %d disabling IPA interconnects\n", ret);
-}
-
-/* Turn on IPA clocks, including interconnects */
-static int ipa_clock_enable(struct ipa *ipa)
-{
- int ret;
-
- ret = ipa_interconnect_enable(ipa);
- if (ret)
- return ret;
-
- ret = clk_prepare_enable(ipa->clock->core);
- if (ret)
- ipa_interconnect_disable(ipa);
-
- return ret;
-}
-
-/* Inverse of ipa_clock_enable() */
-static void ipa_clock_disable(struct ipa *ipa)
-{
- clk_disable_unprepare(ipa->clock->core);
- ipa_interconnect_disable(ipa);
-}
-
-/* Get an IPA clock reference, but only if the reference count is
- * already non-zero. Returns true if the additional reference was
- * added successfully, or false otherwise.
- */
-bool ipa_clock_get_additional(struct ipa *ipa)
-{
- return refcount_inc_not_zero(&ipa->clock->count);
-}
-
-/* Get an IPA clock reference. If the reference count is non-zero, it is
- * incremented and return is immediate. Otherwise it is checked again
- * under protection of the mutex, and if appropriate the IPA clock
- * is enabled.
- *
- * Incrementing the reference count is intentionally deferred until
- * after the clock is running and endpoints are resumed.
- */
-void ipa_clock_get(struct ipa *ipa)
-{
- struct ipa_clock *clock = ipa->clock;
- int ret;
-
- /* If the clock is running, just bump the reference count */
- if (ipa_clock_get_additional(ipa))
- return;
-
- /* Otherwise get the mutex and check again */
- mutex_lock(&clock->mutex);
-
- /* A reference might have been added before we got the mutex. */
- if (ipa_clock_get_additional(ipa))
- goto out_mutex_unlock;
-
- ret = ipa_clock_enable(ipa);
- if (ret) {
- dev_err(&ipa->pdev->dev, "error %d enabling IPA clock\n", ret);
- goto out_mutex_unlock;
- }
-
- refcount_set(&clock->count, 1);
-
-out_mutex_unlock:
- mutex_unlock(&clock->mutex);
-}
-
-/* Attempt to remove an IPA clock reference. If this represents the
- * last reference, disable the IPA clock under protection of the mutex.
- */
-void ipa_clock_put(struct ipa *ipa)
-{
- struct ipa_clock *clock = ipa->clock;
-
- /* If this is not the last reference there's nothing more to do */
- if (!refcount_dec_and_mutex_lock(&clock->count, &clock->mutex))
- return;
-
- ipa_clock_disable(ipa);
-
- mutex_unlock(&clock->mutex);
-}
-
-/* Return the current IPA core clock rate */
-u32 ipa_clock_rate(struct ipa *ipa)
-{
- return ipa->clock ? (u32)clk_get_rate(ipa->clock->core) : 0;
-}
-
-/* Initialize IPA clocking */
-struct ipa_clock *
-ipa_clock_init(struct device *dev, const struct ipa_clock_data *data)
-{
- struct ipa_clock *clock;
- struct clk *clk;
- int ret;
-
- clk = clk_get(dev, "core");
- if (IS_ERR(clk)) {
- dev_err_probe(dev, PTR_ERR(clk), "error getting core clock\n");
-
- return ERR_CAST(clk);
- }
-
- ret = clk_set_rate(clk, data->core_clock_rate);
- if (ret) {
- dev_err(dev, "error %d setting core clock rate to %u\n",
- ret, data->core_clock_rate);
- goto err_clk_put;
- }
-
- clock = kzalloc(sizeof(*clock), GFP_KERNEL);
- if (!clock) {
- ret = -ENOMEM;
- goto err_clk_put;
- }
- clock->core = clk;
- clock->interconnect_count = data->interconnect_count;
-
- ret = ipa_interconnect_init(clock, dev, data->interconnect_data);
- if (ret)
- goto err_kfree;
-
- mutex_init(&clock->mutex);
- refcount_set(&clock->count, 0);
-
- return clock;
-
-err_kfree:
- kfree(clock);
-err_clk_put:
- clk_put(clk);
-
- return ERR_PTR(ret);
-}
-
-/* Inverse of ipa_clock_init() */
-void ipa_clock_exit(struct ipa_clock *clock)
-{
- struct clk *clk = clock->core;
-
- WARN_ON(refcount_read(&clock->count) != 0);
- mutex_destroy(&clock->mutex);
- ipa_interconnect_exit(clock);
- kfree(clock);
- clk_put(clk);
-}
diff --git a/drivers/net/ipa/ipa_clock.h b/drivers/net/ipa/ipa_clock.h
deleted file mode 100644
index 1fe634760e59..000000000000
--- a/drivers/net/ipa/ipa_clock.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
- * Copyright (C) 2018-2020 Linaro Ltd.
- */
-#ifndef _IPA_CLOCK_H_
-#define _IPA_CLOCK_H_
-
-struct device;
-
-struct ipa;
-struct ipa_clock_data;
-
-/**
- * ipa_clock_rate() - Return the current IPA core clock rate
- * @ipa: IPA structure
- *
- * Return: The current clock rate (in Hz), or 0.
- */
-u32 ipa_clock_rate(struct ipa *ipa);
-
-/**
- * ipa_clock_init() - Initialize IPA clocking
- * @dev: IPA device
- * @data: Clock configuration data
- *
- * Return: A pointer to an ipa_clock structure, or a pointer-coded error
- */
-struct ipa_clock *ipa_clock_init(struct device *dev,
- const struct ipa_clock_data *data);
-
-/**
- * ipa_clock_exit() - Inverse of ipa_clock_init()
- * @clock: IPA clock pointer
- */
-void ipa_clock_exit(struct ipa_clock *clock);
-
-/**
- * ipa_clock_get() - Get an IPA clock reference
- * @ipa: IPA pointer
- *
- * This call blocks if this is the first reference.
- */
-void ipa_clock_get(struct ipa *ipa);
-
-/**
- * ipa_clock_get_additional() - Get an IPA clock reference if not first
- * @ipa: IPA pointer
- *
- * This returns immediately, and only takes a reference if not the first
- */
-bool ipa_clock_get_additional(struct ipa *ipa);
-
-/**
- * ipa_clock_put() - Drop an IPA clock reference
- * @ipa: IPA pointer
- *
- * This drops a clock reference. If the last reference is being dropped,
- * the clock is stopped and RX endpoints are suspended. This call will
- * not block unless the last reference is dropped.
- */
-void ipa_clock_put(struct ipa *ipa);
-
-#endif /* _IPA_CLOCK_H_ */
diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c
index af44ca41189e..cff51731195a 100644
--- a/drivers/net/ipa/ipa_cmd.c
+++ b/drivers/net/ipa/ipa_cmd.c
@@ -159,35 +159,49 @@ static void ipa_cmd_validate_build(void)
BUILD_BUG_ON(TABLE_SIZE > field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK));
#undef TABLE_COUNT_MAX
#undef TABLE_SIZE
-}
-#ifdef IPA_VALIDATE
+ /* Hashed and non-hashed fields are assumed to be the same size */
+ BUILD_BUG_ON(field_max(IP_FLTRT_FLAGS_HASH_SIZE_FMASK) !=
+ field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK));
+ BUILD_BUG_ON(field_max(IP_FLTRT_FLAGS_HASH_ADDR_FMASK) !=
+ field_max(IP_FLTRT_FLAGS_NHASH_ADDR_FMASK));
+
+ /* Valid endpoint numbers must fit in the IP packet init command */
+ BUILD_BUG_ON(field_max(IPA_PACKET_INIT_DEST_ENDPOINT_FMASK) <
+ IPA_ENDPOINT_MAX - 1);
+}
/* Validate a memory region holding a table */
-bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem,
- bool route, bool ipv6, bool hashed)
+bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem, bool route)
{
+ u32 offset_max = field_max(IP_FLTRT_FLAGS_NHASH_ADDR_FMASK);
+ u32 size_max = field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK);
+ const char *table = route ? "route" : "filter";
struct device *dev = &ipa->pdev->dev;
- u32 offset_max;
- offset_max = hashed ? field_max(IP_FLTRT_FLAGS_HASH_ADDR_FMASK)
- : field_max(IP_FLTRT_FLAGS_NHASH_ADDR_FMASK);
+ /* Size must fit in the immediate command field that holds it */
+ if (mem->size > size_max) {
+ dev_err(dev, "%s table region size too large\n", table);
+ dev_err(dev, " (0x%04x > 0x%04x)\n",
+ mem->size, size_max);
+
+ return false;
+ }
+
+ /* Offset must fit in the immediate command field that holds it */
if (mem->offset > offset_max ||
ipa->mem_offset > offset_max - mem->offset) {
- dev_err(dev, "IPv%c %s%s table region offset too large\n",
- ipv6 ? '6' : '4', hashed ? "hashed " : "",
- route ? "route" : "filter");
+ dev_err(dev, "%s table region offset too large\n", table);
dev_err(dev, " (0x%04x + 0x%04x > 0x%04x)\n",
ipa->mem_offset, mem->offset, offset_max);
return false;
}
+ /* Entire memory range must fit within IPA-local memory */
if (mem->offset > ipa->mem_size ||
mem->size > ipa->mem_size - mem->offset) {
- dev_err(dev, "IPv%c %s%s table region out of range\n",
- ipv6 ? '6' : '4', hashed ? "hashed " : "",
- route ? "route" : "filter");
+ dev_err(dev, "%s table region out of range\n", table);
dev_err(dev, " (0x%04x + 0x%04x > 0x%04x)\n",
mem->offset, mem->size, ipa->mem_size);
@@ -331,7 +345,6 @@ bool ipa_cmd_data_valid(struct ipa *ipa)
return true;
}
-#endif /* IPA_VALIDATE */
int ipa_cmd_pool_init(struct gsi_channel *channel, u32 tre_max)
{
@@ -522,9 +535,6 @@ static void ipa_cmd_ip_packet_init_add(struct gsi_trans *trans, u8 endpoint_id)
union ipa_cmd_payload *cmd_payload;
dma_addr_t payload_addr;
- /* assert(endpoint_id <
- field_max(IPA_PACKET_INIT_DEST_ENDPOINT_FMASK)); */
-
cmd_payload = ipa_cmd_payload_alloc(ipa, &payload_addr);
payload = &cmd_payload->ip_packet_init;
@@ -548,8 +558,9 @@ void ipa_cmd_dma_shared_mem_add(struct gsi_trans *trans, u32 offset, u16 size,
u16 flags;
/* size and offset must fit in 16 bit fields */
- /* assert(size > 0 && size <= U16_MAX); */
- /* assert(offset <= U16_MAX && ipa->mem_offset <= U16_MAX - offset); */
+ WARN_ON(!size);
+ WARN_ON(size > U16_MAX);
+ WARN_ON(offset > U16_MAX || ipa->mem_offset > U16_MAX - offset);
offset += ipa->mem_offset;
@@ -588,8 +599,6 @@ static void ipa_cmd_ip_tag_status_add(struct gsi_trans *trans)
union ipa_cmd_payload *cmd_payload;
dma_addr_t payload_addr;
- /* assert(tag <= field_max(IP_PACKET_TAG_STATUS_TAG_FMASK)); */
-
cmd_payload = ipa_cmd_payload_alloc(ipa, &payload_addr);
payload = &cmd_payload->ip_packet_tag_status;
diff --git a/drivers/net/ipa/ipa_cmd.h b/drivers/net/ipa/ipa_cmd.h
index b99262281f41..69cd085d427d 100644
--- a/drivers/net/ipa/ipa_cmd.h
+++ b/drivers/net/ipa/ipa_cmd.h
@@ -57,20 +57,16 @@ struct ipa_cmd_info {
enum dma_data_direction direction;
};
-#ifdef IPA_VALIDATE
-
/**
* ipa_cmd_table_valid() - Validate a memory region holding a table
* @ipa: - IPA pointer
* @mem: - IPA memory region descriptor
* @route: - Whether the region holds a route or filter table
- * @ipv6: - Whether the table is for IPv6 or IPv4
- * @hashed: - Whether the table is hashed or non-hashed
*
* Return: true if region is valid, false otherwise
*/
bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem,
- bool route, bool ipv6, bool hashed);
+ bool route);
/**
* ipa_cmd_data_valid() - Validate command-realted configuration is valid
@@ -80,22 +76,6 @@ bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem,
*/
bool ipa_cmd_data_valid(struct ipa *ipa);
-#else /* !IPA_VALIDATE */
-
-static inline bool ipa_cmd_table_valid(struct ipa *ipa,
- const struct ipa_mem *mem, bool route,
- bool ipv6, bool hashed)
-{
- return true;
-}
-
-static inline bool ipa_cmd_data_valid(struct ipa *ipa)
-{
- return true;
-}
-
-#endif /* !IPA_VALIDATE */
-
/**
* ipa_cmd_pool_init() - initialize command channel pools
* @channel: AP->IPA command TX GSI channel pointer
diff --git a/drivers/net/ipa/ipa_data-v3.1.c b/drivers/net/ipa/ipa_data-v3.1.c
index 4c28189462a7..06ddb85f39b2 100644
--- a/drivers/net/ipa/ipa_data-v3.1.c
+++ b/drivers/net/ipa/ipa_data-v3.1.c
@@ -513,7 +513,7 @@ static const struct ipa_interconnect_data ipa_interconnect_data[] = {
};
/* Clock and interconnect configuration data for an SoC having IPA v3.1 */
-static const struct ipa_clock_data ipa_clock_data = {
+static const struct ipa_power_data ipa_power_data = {
.core_clock_rate = 16 * 1000 * 1000, /* Hz */
.interconnect_count = ARRAY_SIZE(ipa_interconnect_data),
.interconnect_data = ipa_interconnect_data,
@@ -529,5 +529,5 @@ const struct ipa_data ipa_data_v3_1 = {
.endpoint_data = ipa_gsi_endpoint_data,
.resource_data = &ipa_resource_data,
.mem_data = &ipa_mem_data,
- .clock_data = &ipa_clock_data,
+ .power_data = &ipa_power_data,
};
diff --git a/drivers/net/ipa/ipa_data-v3.5.1.c b/drivers/net/ipa/ipa_data-v3.5.1.c
index af536ef8c120..760c22bbdf70 100644
--- a/drivers/net/ipa/ipa_data-v3.5.1.c
+++ b/drivers/net/ipa/ipa_data-v3.5.1.c
@@ -394,7 +394,7 @@ static const struct ipa_interconnect_data ipa_interconnect_data[] = {
};
/* Clock and interconnect configuration data for an SoC having IPA v3.5.1 */
-static const struct ipa_clock_data ipa_clock_data = {
+static const struct ipa_power_data ipa_power_data = {
.core_clock_rate = 75 * 1000 * 1000, /* Hz */
.interconnect_count = ARRAY_SIZE(ipa_interconnect_data),
.interconnect_data = ipa_interconnect_data,
@@ -414,5 +414,5 @@ const struct ipa_data ipa_data_v3_5_1 = {
.endpoint_data = ipa_gsi_endpoint_data,
.resource_data = &ipa_resource_data,
.mem_data = &ipa_mem_data,
- .clock_data = &ipa_clock_data,
+ .power_data = &ipa_power_data,
};
diff --git a/drivers/net/ipa/ipa_data-v4.11.c b/drivers/net/ipa/ipa_data-v4.11.c
index 9353efbd504f..fea91451a0c3 100644
--- a/drivers/net/ipa/ipa_data-v4.11.c
+++ b/drivers/net/ipa/ipa_data-v4.11.c
@@ -105,6 +105,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.filter_support = true,
.config = {
.resource_group = IPA_RSRC_GROUP_SRC_UL_DL,
+ .checksum = true,
.qmap = true,
.status_enable = true,
.tx = {
@@ -128,6 +129,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.endpoint = {
.config = {
.resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL,
+ .checksum = true,
.qmap = true,
.aggregation = true,
.rx = {
@@ -368,24 +370,19 @@ static const struct ipa_mem_data ipa_mem_data = {
static const struct ipa_interconnect_data ipa_interconnect_data[] = {
{
.name = "memory",
- .peak_bandwidth = 465000, /* 465 MBps */
- .average_bandwidth = 80000, /* 80 MBps */
- },
- /* Average rate is unused for the next two interconnects */
- {
- .name = "imem",
- .peak_bandwidth = 68570, /* 68.57 MBps */
- .average_bandwidth = 80000, /* 80 MBps (unused?) */
+ .peak_bandwidth = 600000, /* 600 MBps */
+ .average_bandwidth = 150000, /* 150 MBps */
},
+ /* Average rate is unused for the next interconnect */
{
.name = "config",
- .peak_bandwidth = 30000, /* 30 MBps */
+ .peak_bandwidth = 74000, /* 74 MBps */
.average_bandwidth = 0, /* unused */
},
};
/* Clock and interconnect configuration data for an SoC having IPA v4.11 */
-static const struct ipa_clock_data ipa_clock_data = {
+static const struct ipa_power_data ipa_power_data = {
.core_clock_rate = 60 * 1000 * 1000, /* Hz */
.interconnect_count = ARRAY_SIZE(ipa_interconnect_data),
.interconnect_data = ipa_interconnect_data,
@@ -400,5 +397,5 @@ const struct ipa_data ipa_data_v4_11 = {
.endpoint_data = ipa_gsi_endpoint_data,
.resource_data = &ipa_resource_data,
.mem_data = &ipa_mem_data,
- .clock_data = &ipa_clock_data,
+ .power_data = &ipa_power_data,
};
diff --git a/drivers/net/ipa/ipa_data-v4.2.c b/drivers/net/ipa/ipa_data-v4.2.c
index 3b09b7baa95f..2a231e79d5e1 100644
--- a/drivers/net/ipa/ipa_data-v4.2.c
+++ b/drivers/net/ipa/ipa_data-v4.2.c
@@ -360,7 +360,7 @@ static const struct ipa_interconnect_data ipa_interconnect_data[] = {
};
/* Clock and interconnect configuration data for an SoC having IPA v4.2 */
-static const struct ipa_clock_data ipa_clock_data = {
+static const struct ipa_power_data ipa_power_data = {
.core_clock_rate = 100 * 1000 * 1000, /* Hz */
.interconnect_count = ARRAY_SIZE(ipa_interconnect_data),
.interconnect_data = ipa_interconnect_data,
@@ -376,5 +376,5 @@ const struct ipa_data ipa_data_v4_2 = {
.endpoint_data = ipa_gsi_endpoint_data,
.resource_data = &ipa_resource_data,
.mem_data = &ipa_mem_data,
- .clock_data = &ipa_clock_data,
+ .power_data = &ipa_power_data,
};
diff --git a/drivers/net/ipa/ipa_data-v4.5.c b/drivers/net/ipa/ipa_data-v4.5.c
index a99b6478fa3a..e62ab9c3ac67 100644
--- a/drivers/net/ipa/ipa_data-v4.5.c
+++ b/drivers/net/ipa/ipa_data-v4.5.c
@@ -114,6 +114,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.filter_support = true,
.config = {
.resource_group = IPA_RSRC_GROUP_SRC_UL_DL,
+ .checksum = true,
.qmap = true,
.status_enable = true,
.tx = {
@@ -137,6 +138,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.endpoint = {
.config = {
.resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL,
+ .checksum = true,
.qmap = true,
.aggregation = true,
.rx = {
@@ -441,7 +443,7 @@ static const struct ipa_interconnect_data ipa_interconnect_data[] = {
};
/* Clock and interconnect configuration data for an SoC having IPA v4.5 */
-static const struct ipa_clock_data ipa_clock_data = {
+static const struct ipa_power_data ipa_power_data = {
.core_clock_rate = 150 * 1000 * 1000, /* Hz (150? 60?) */
.interconnect_count = ARRAY_SIZE(ipa_interconnect_data),
.interconnect_data = ipa_interconnect_data,
@@ -456,5 +458,5 @@ const struct ipa_data ipa_data_v4_5 = {
.endpoint_data = ipa_gsi_endpoint_data,
.resource_data = &ipa_resource_data,
.mem_data = &ipa_mem_data,
- .clock_data = &ipa_clock_data,
+ .power_data = &ipa_power_data,
};
diff --git a/drivers/net/ipa/ipa_data-v4.9.c b/drivers/net/ipa/ipa_data-v4.9.c
index 798d43e1eb13..2421b5abb5d4 100644
--- a/drivers/net/ipa/ipa_data-v4.9.c
+++ b/drivers/net/ipa/ipa_data-v4.9.c
@@ -106,6 +106,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.filter_support = true,
.config = {
.resource_group = IPA_RSRC_GROUP_SRC_UL_DL,
+ .checksum = true,
.qmap = true,
.status_enable = true,
.tx = {
@@ -129,6 +130,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.endpoint = {
.config = {
.resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL,
+ .checksum = true,
.qmap = true,
.aggregation = true,
.rx = {
@@ -416,18 +418,13 @@ static const struct ipa_mem_data ipa_mem_data = {
/* Interconnect rates are in 1000 byte/second units */
static const struct ipa_interconnect_data ipa_interconnect_data[] = {
{
- .name = "ipa_to_llcc",
+ .name = "memory",
.peak_bandwidth = 600000, /* 600 MBps */
.average_bandwidth = 150000, /* 150 MBps */
},
- {
- .name = "llcc_to_ebi1",
- .peak_bandwidth = 1804000, /* 1.804 GBps */
- .average_bandwidth = 150000, /* 150 MBps */
- },
/* Average rate is unused for the next interconnect */
{
- .name = "appss_to_ipa",
+ .name = "config",
.peak_bandwidth = 74000, /* 74 MBps */
.average_bandwidth = 0, /* unused */
},
@@ -435,7 +432,7 @@ static const struct ipa_interconnect_data ipa_interconnect_data[] = {
};
/* Clock and interconnect configuration data for an SoC having IPA v4.9 */
-static const struct ipa_clock_data ipa_clock_data = {
+static const struct ipa_power_data ipa_power_data = {
.core_clock_rate = 60 * 1000 * 1000, /* Hz */
.interconnect_count = ARRAY_SIZE(ipa_interconnect_data),
.interconnect_data = ipa_interconnect_data,
@@ -450,5 +447,5 @@ const struct ipa_data ipa_data_v4_9 = {
.endpoint_data = ipa_gsi_endpoint_data,
.resource_data = &ipa_resource_data,
.mem_data = &ipa_mem_data,
- .clock_data = &ipa_clock_data,
+ .power_data = &ipa_power_data,
};
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index 5bc244c8f94e..6d329e9ce5d2 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -19,7 +19,7 @@
* IPA and GSI resources to use for a given platform. This data is supplied
* via the Device Tree match table, associated with a particular compatible
* string. The data defines information about how resources, endpoints and
- * channels, memory, clocking and so on are allocated and used for the
+ * channels, memory, power and so on are allocated and used for the
* platform.
*
* Resources are data structures used internally by the IPA hardware. The
@@ -265,12 +265,12 @@ struct ipa_interconnect_data {
};
/**
- * struct ipa_clock_data - description of IPA clock and interconnect rates
+ * struct ipa_power_data - description of IPA power configuration data
* @core_clock_rate: Core clock rate (Hz)
* @interconnect_count: Number of entries in the interconnect_data array
* @interconnect_data: IPA interconnect configuration data
*/
-struct ipa_clock_data {
+struct ipa_power_data {
u32 core_clock_rate;
u32 interconnect_count; /* # entries in interconnect_data[] */
const struct ipa_interconnect_data *interconnect_data;
@@ -286,7 +286,7 @@ struct ipa_clock_data {
* @endpoint_data: IPA endpoint/GSI channel data
* @resource_data: IPA resource configuration data
* @mem_data: IPA memory region data
- * @clock_data: IPA clock and interconnect data
+ * @power_data: IPA power data
*/
struct ipa_data {
enum ipa_version version;
@@ -297,7 +297,7 @@ struct ipa_data {
const struct ipa_gsi_endpoint_data *endpoint_data;
const struct ipa_resource_data *resource_data;
const struct ipa_mem_data *mem_data;
- const struct ipa_clock_data *clock_data;
+ const struct ipa_power_data *power_data;
};
extern const struct ipa_data ipa_data_v3_1;
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index ab02669bae4e..5528d97110d5 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -21,7 +21,7 @@
#include "ipa_modem.h"
#include "ipa_table.h"
#include "ipa_gsi.h"
-#include "ipa_clock.h"
+#include "ipa_power.h"
#define atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0)
@@ -250,17 +250,18 @@ ipa_endpoint_init_ctrl(struct ipa_endpoint *endpoint, bool suspend_delay)
/* Suspend is not supported for IPA v4.0+. Delay doesn't work
* correctly on IPA v4.2.
- *
- * if (endpoint->toward_ipa)
- * assert(ipa->version != IPA_VERSION_4.2);
- * else
- * assert(ipa->version < IPA_VERSION_4_0);
*/
+ if (endpoint->toward_ipa)
+ WARN_ON(ipa->version == IPA_VERSION_4_2);
+ else
+ WARN_ON(ipa->version >= IPA_VERSION_4_0);
+
mask = endpoint->toward_ipa ? ENDP_DELAY_FMASK : ENDP_SUSPEND_FMASK;
val = ioread32(ipa->reg_virt + offset);
- /* Don't bother if it's already in the requested state */
state = !!(val & mask);
+
+ /* Don't bother if it's already in the requested state */
if (suspend_delay != state) {
val ^= mask;
iowrite32(val, ipa->reg_virt + offset);
@@ -273,7 +274,7 @@ ipa_endpoint_init_ctrl(struct ipa_endpoint *endpoint, bool suspend_delay)
static void
ipa_endpoint_program_delay(struct ipa_endpoint *endpoint, bool enable)
{
- /* assert(endpoint->toward_ipa); */
+ WARN_ON(!endpoint->toward_ipa);
/* Delay mode doesn't work properly for IPA v4.2 */
if (endpoint->ipa->version != IPA_VERSION_4_2)
@@ -287,7 +288,8 @@ static bool ipa_endpoint_aggr_active(struct ipa_endpoint *endpoint)
u32 offset;
u32 val;
- /* assert(mask & ipa->available); */
+ WARN_ON(!(mask & ipa->available));
+
offset = ipa_reg_state_aggr_active_offset(ipa->version);
val = ioread32(ipa->reg_virt + offset);
@@ -299,7 +301,8 @@ static void ipa_endpoint_force_close(struct ipa_endpoint *endpoint)
u32 mask = BIT(endpoint->endpoint_id);
struct ipa *ipa = endpoint->ipa;
- /* assert(mask & ipa->available); */
+ WARN_ON(!(mask & ipa->available));
+
iowrite32(mask, ipa->reg_virt + IPA_REG_AGGR_FORCE_CLOSE_OFFSET);
}
@@ -338,7 +341,7 @@ ipa_endpoint_program_suspend(struct ipa_endpoint *endpoint, bool enable)
if (endpoint->ipa->version >= IPA_VERSION_4_0)
return enable; /* For IPA v4.0+, no change made */
- /* assert(!endpoint->toward_ipa); */
+ WARN_ON(endpoint->toward_ipa);
suspended = ipa_endpoint_init_ctrl(endpoint, enable);
@@ -807,7 +810,7 @@ static u32 hol_block_timer_val(struct ipa *ipa, u32 microseconds)
return hol_block_timer_qtime_val(ipa, microseconds);
/* Use 64 bit arithmetic to avoid overflow... */
- rate = ipa_clock_rate(ipa);
+ rate = ipa_core_clock_rate(ipa);
ticks = DIV_ROUND_CLOSEST(microseconds * rate, 128 * USEC_PER_SEC);
/* ...but we still need to fit into a 32-bit register */
WARN_ON(ticks > U32_MAX);
@@ -1156,7 +1159,8 @@ static bool ipa_endpoint_skb_build(struct ipa_endpoint *endpoint,
if (!endpoint->netdev)
return false;
- /* assert(len <= SKB_WITH_OVERHEAD(IPA_RX_BUFFER_SIZE-NET_SKB_PAD)); */
+ WARN_ON(len > SKB_WITH_OVERHEAD(IPA_RX_BUFFER_SIZE - NET_SKB_PAD));
+
skb = build_skb(page_address(page), IPA_RX_BUFFER_SIZE);
if (skb) {
/* Reserve the headroom and account for the data */
@@ -1583,7 +1587,6 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint)
{
struct device *dev = &endpoint->ipa->pdev->dev;
struct gsi *gsi = &endpoint->ipa->gsi;
- bool stop_channel;
int ret;
if (!(endpoint->ipa->enabled & BIT(endpoint->endpoint_id)))
@@ -1594,11 +1597,7 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint)
(void)ipa_endpoint_program_suspend(endpoint, true);
}
- /* Starting with IPA v4.0, endpoints are suspended by stopping the
- * underlying GSI channel rather than using endpoint suspend mode.
- */
- stop_channel = endpoint->ipa->version >= IPA_VERSION_4_0;
- ret = gsi_channel_suspend(gsi, endpoint->channel_id, stop_channel);
+ ret = gsi_channel_suspend(gsi, endpoint->channel_id);
if (ret)
dev_err(dev, "error %d suspending channel %u\n", ret,
endpoint->channel_id);
@@ -1608,7 +1607,6 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint)
{
struct device *dev = &endpoint->ipa->pdev->dev;
struct gsi *gsi = &endpoint->ipa->gsi;
- bool start_channel;
int ret;
if (!(endpoint->ipa->enabled & BIT(endpoint->endpoint_id)))
@@ -1617,11 +1615,7 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint)
if (!endpoint->toward_ipa)
(void)ipa_endpoint_program_suspend(endpoint, false);
- /* Starting with IPA v4.0, the underlying GSI channel must be
- * restarted for resume.
- */
- start_channel = endpoint->ipa->version >= IPA_VERSION_4_0;
- ret = gsi_channel_resume(gsi, endpoint->channel_id, start_channel);
+ ret = gsi_channel_resume(gsi, endpoint->channel_id);
if (ret)
dev_err(dev, "error %d resuming channel %u\n", ret,
endpoint->channel_id);
diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c
index c46df0b7c4e5..b35170a93b0f 100644
--- a/drivers/net/ipa/ipa_interrupt.c
+++ b/drivers/net/ipa/ipa_interrupt.c
@@ -21,9 +21,9 @@
#include <linux/types.h>
#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
#include "ipa.h"
-#include "ipa_clock.h"
#include "ipa_reg.h"
#include "ipa_endpoint.h"
#include "ipa_interrupt.h"
@@ -74,21 +74,30 @@ static void ipa_interrupt_process(struct ipa_interrupt *interrupt, u32 irq_id)
iowrite32(mask, ipa->reg_virt + offset);
}
-/* Process all IPA interrupt types that have been signaled */
-static void ipa_interrupt_process_all(struct ipa_interrupt *interrupt)
+/* IPA IRQ handler is threaded */
+static irqreturn_t ipa_isr_thread(int irq, void *dev_id)
{
+ struct ipa_interrupt *interrupt = dev_id;
struct ipa *ipa = interrupt->ipa;
u32 enabled = interrupt->enabled;
+ struct device *dev;
+ u32 pending;
u32 offset;
u32 mask;
+ int ret;
+
+ dev = &ipa->pdev->dev;
+ ret = pm_runtime_get_sync(dev);
+ if (WARN_ON(ret < 0))
+ goto out_power_put;
/* The status register indicates which conditions are present,
* including conditions whose interrupt is not enabled. Handle
* only the enabled ones.
*/
offset = ipa_reg_irq_stts_offset(ipa->version);
- mask = ioread32(ipa->reg_virt + offset);
- while ((mask &= enabled)) {
+ pending = ioread32(ipa->reg_virt + offset);
+ while ((mask = pending & enabled)) {
do {
u32 irq_id = __ffs(mask);
@@ -96,43 +105,19 @@ static void ipa_interrupt_process_all(struct ipa_interrupt *interrupt)
ipa_interrupt_process(interrupt, irq_id);
} while (mask);
- mask = ioread32(ipa->reg_virt + offset);
+ pending = ioread32(ipa->reg_virt + offset);
}
-}
-
-/* Threaded part of the IPA IRQ handler */
-static irqreturn_t ipa_isr_thread(int irq, void *dev_id)
-{
- struct ipa_interrupt *interrupt = dev_id;
-
- ipa_clock_get(interrupt->ipa);
-
- ipa_interrupt_process_all(interrupt);
-
- ipa_clock_put(interrupt->ipa);
-
- return IRQ_HANDLED;
-}
-
-/* Hard part (i.e., "real" IRQ handler) of the IRQ handler */
-static irqreturn_t ipa_isr(int irq, void *dev_id)
-{
- struct ipa_interrupt *interrupt = dev_id;
- struct ipa *ipa = interrupt->ipa;
- u32 offset;
- u32 mask;
- offset = ipa_reg_irq_stts_offset(ipa->version);
- mask = ioread32(ipa->reg_virt + offset);
- if (mask & interrupt->enabled)
- return IRQ_WAKE_THREAD;
-
- /* Nothing in the mask was supposed to cause an interrupt */
- offset = ipa_reg_irq_clr_offset(ipa->version);
- iowrite32(mask, ipa->reg_virt + offset);
-
- dev_err(&ipa->pdev->dev, "%s: unexpected interrupt, mask 0x%08x\n",
- __func__, mask);
+ /* If any disabled interrupts are pending, clear them */
+ if (pending) {
+ dev_dbg(dev, "clearing disabled IPA interrupts 0x%08x\n",
+ pending);
+ offset = ipa_reg_irq_clr_offset(ipa->version);
+ iowrite32(pending, ipa->reg_virt + offset);
+ }
+out_power_put:
+ pm_runtime_mark_last_busy(dev);
+ (void)pm_runtime_put_autosuspend(dev);
return IRQ_HANDLED;
}
@@ -146,7 +131,7 @@ static void ipa_interrupt_suspend_control(struct ipa_interrupt *interrupt,
u32 offset;
u32 val;
- /* assert(mask & ipa->available); */
+ WARN_ON(!(mask & ipa->available));
/* IPA version 3.0 does not support TX_SUSPEND interrupt control */
if (ipa->version == IPA_VERSION_3_0)
@@ -206,7 +191,8 @@ void ipa_interrupt_add(struct ipa_interrupt *interrupt,
struct ipa *ipa = interrupt->ipa;
u32 offset;
- /* assert(ipa_irq < IPA_IRQ_COUNT); */
+ WARN_ON(ipa_irq >= IPA_IRQ_COUNT);
+
interrupt->handler[ipa_irq] = handler;
/* Update the IPA interrupt mask to enable it */
@@ -222,7 +208,8 @@ ipa_interrupt_remove(struct ipa_interrupt *interrupt, enum ipa_irq_id ipa_irq)
struct ipa *ipa = interrupt->ipa;
u32 offset;
- /* assert(ipa_irq < IPA_IRQ_COUNT); */
+ WARN_ON(ipa_irq >= IPA_IRQ_COUNT);
+
/* Update the IPA interrupt mask to disable it */
interrupt->enabled &= ~BIT(ipa_irq);
offset = ipa_reg_irq_en_offset(ipa->version);
@@ -231,8 +218,8 @@ ipa_interrupt_remove(struct ipa_interrupt *interrupt, enum ipa_irq_id ipa_irq)
interrupt->handler[ipa_irq] = NULL;
}
-/* Set up the IPA interrupt framework */
-struct ipa_interrupt *ipa_interrupt_setup(struct ipa *ipa)
+/* Configure the IPA interrupt framework */
+struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa)
{
struct device *dev = &ipa->pdev->dev;
struct ipa_interrupt *interrupt;
@@ -258,7 +245,7 @@ struct ipa_interrupt *ipa_interrupt_setup(struct ipa *ipa)
offset = ipa_reg_irq_en_offset(ipa->version);
iowrite32(0, ipa->reg_virt + offset);
- ret = request_threaded_irq(irq, ipa_isr, ipa_isr_thread, IRQF_ONESHOT,
+ ret = request_threaded_irq(irq, NULL, ipa_isr_thread, IRQF_ONESHOT,
"ipa", interrupt);
if (ret) {
dev_err(dev, "error %d requesting \"ipa\" IRQ\n", ret);
@@ -281,8 +268,8 @@ err_kfree:
return ERR_PTR(ret);
}
-/* Tear down the IPA interrupt framework */
-void ipa_interrupt_teardown(struct ipa_interrupt *interrupt)
+/* Inverse of ipa_interrupt_config() */
+void ipa_interrupt_deconfig(struct ipa_interrupt *interrupt)
{
struct device *dev = &interrupt->ipa->pdev->dev;
int ret;
diff --git a/drivers/net/ipa/ipa_interrupt.h b/drivers/net/ipa/ipa_interrupt.h
index d5c486a6800d..231390cea52a 100644
--- a/drivers/net/ipa/ipa_interrupt.h
+++ b/drivers/net/ipa/ipa_interrupt.h
@@ -86,17 +86,17 @@ void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt);
void ipa_interrupt_simulate_suspend(struct ipa_interrupt *interrupt);
/**
- * ipa_interrupt_setup() - Set up the IPA interrupt framework
+ * ipa_interrupt_config() - Configure the IPA interrupt framework
* @ipa: IPA pointer
*
* Return: Pointer to IPA SMP2P info, or a pointer-coded error
*/
-struct ipa_interrupt *ipa_interrupt_setup(struct ipa *ipa);
+struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa);
/**
- * ipa_interrupt_teardown() - Tear down the IPA interrupt framework
+ * ipa_interrupt_deconfig() - Inverse of ipa_interrupt_config()
* @interrupt: IPA interrupt structure
*/
-void ipa_interrupt_teardown(struct ipa_interrupt *interrupt);
+void ipa_interrupt_deconfig(struct ipa_interrupt *interrupt);
#endif /* _IPA_INTERRUPT_H_ */
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index 9810c61a0320..cdfa98a76e1f 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -15,11 +15,12 @@
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/of_address.h>
+#include <linux/pm_runtime.h>
#include <linux/qcom_scm.h>
#include <linux/soc/qcom/mdt_loader.h>
#include "ipa.h"
-#include "ipa_clock.h"
+#include "ipa_power.h"
#include "ipa_data.h"
#include "ipa_endpoint.h"
#include "ipa_resource.h"
@@ -80,29 +81,6 @@
#define IPA_XO_CLOCK_DIVIDER 192 /* 1 is subtracted where used */
/**
- * ipa_suspend_handler() - Handle the suspend IPA interrupt
- * @ipa: IPA pointer
- * @irq_id: IPA interrupt type (unused)
- *
- * If an RX endpoint is in suspend state, and the IPA has a packet
- * destined for that endpoint, the IPA generates a SUSPEND interrupt
- * to inform the AP that it should resume the endpoint. If we get
- * one of these interrupts we just resume everything.
- */
-static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id)
-{
- /* Just report the event, and let system resume handle the rest.
- * More than one endpoint could signal this; if so, ignore
- * all but the first.
- */
- if (!test_and_set_bit(IPA_FLAG_RESUMED, ipa->flags))
- pm_wakeup_dev_event(&ipa->pdev->dev, 0, true);
-
- /* Acknowledge/clear the suspend interrupt on all endpoints */
- ipa_interrupt_suspend_clear_all(ipa->interrupt);
-}
-
-/**
* ipa_setup() - Set up IPA hardware
* @ipa: IPA pointer
*
@@ -124,19 +102,9 @@ int ipa_setup(struct ipa *ipa)
if (ret)
return ret;
- ipa->interrupt = ipa_interrupt_setup(ipa);
- if (IS_ERR(ipa->interrupt)) {
- ret = PTR_ERR(ipa->interrupt);
- goto err_gsi_teardown;
- }
- ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND,
- ipa_suspend_handler);
-
- ipa_uc_setup(ipa);
-
- ret = device_init_wakeup(dev, true);
+ ret = ipa_power_setup(ipa);
if (ret)
- goto err_uc_teardown;
+ goto err_gsi_teardown;
ipa_endpoint_setup(ipa);
@@ -167,7 +135,7 @@ int ipa_setup(struct ipa *ipa)
ipa_endpoint_default_route_set(ipa, exception_endpoint->endpoint_id);
/* We're all set. Now prepare for communication with the modem */
- ret = ipa_modem_setup(ipa);
+ ret = ipa_qmi_setup(ipa);
if (ret)
goto err_default_route_clear;
@@ -184,11 +152,7 @@ err_command_disable:
ipa_endpoint_disable_one(command_endpoint);
err_endpoint_teardown:
ipa_endpoint_teardown(ipa);
- (void)device_init_wakeup(dev, false);
-err_uc_teardown:
- ipa_uc_teardown(ipa);
- ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
- ipa_interrupt_teardown(ipa->interrupt);
+ ipa_power_teardown(ipa);
err_gsi_teardown:
gsi_teardown(&ipa->gsi);
@@ -204,17 +168,17 @@ static void ipa_teardown(struct ipa *ipa)
struct ipa_endpoint *exception_endpoint;
struct ipa_endpoint *command_endpoint;
- ipa_modem_teardown(ipa);
+ /* We're going to tear everything down, as if setup never completed */
+ ipa->setup_complete = false;
+
+ ipa_qmi_teardown(ipa);
ipa_endpoint_default_route_clear(ipa);
exception_endpoint = ipa->name_map[IPA_ENDPOINT_AP_LAN_RX];
ipa_endpoint_disable_one(exception_endpoint);
command_endpoint = ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX];
ipa_endpoint_disable_one(command_endpoint);
ipa_endpoint_teardown(ipa);
- (void)device_init_wakeup(&ipa->pdev->dev, false);
- ipa_uc_teardown(ipa);
- ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
- ipa_interrupt_teardown(ipa->interrupt);
+ ipa_power_teardown(ipa);
gsi_teardown(&ipa->gsi);
}
@@ -253,9 +217,6 @@ ipa_hardware_config_qsb(struct ipa *ipa, const struct ipa_data *data)
const struct ipa_qsb_data *data1;
u32 val;
- /* assert(data->qsb_count > 0); */
- /* assert(data->qsb_count < 3); */
-
/* QMB 0 represents DDR; QMB 1 (if present) represents PCIe */
data0 = &data->qsb_data[IPA_QSB_MASTER_DDR];
if (data->qsb_count > 1)
@@ -289,12 +250,11 @@ ipa_hardware_config_qsb(struct ipa *ipa, const struct ipa_data *data)
/* Compute the value to use in the COUNTER_CFG register AGGR_GRANULARITY
* field to represent the given number of microseconds. The value is one
* less than the number of timer ticks in the requested period. 0 is not
- * a valid granularity value.
+ * a valid granularity value (so for example @usec must be at least 16 for
+ * a TIMER_FREQUENCY of 32000).
*/
-static u32 ipa_aggr_granularity_val(u32 usec)
+static __always_inline u32 ipa_aggr_granularity_val(u32 usec)
{
- /* assert(usec != 0); */
-
return DIV_ROUND_CLOSEST(usec * TIMER_FREQUENCY, USEC_PER_SEC) - 1;
}
@@ -366,8 +326,8 @@ static void ipa_idle_indication_cfg(struct ipa *ipa,
* @ipa: IPA pointer
*
* Configures when the IPA signals it is idle to the global clock
- * controller, which can respond by scalling down the clock to
- * save power.
+ * controller, which can respond by scaling down the clock to save
+ * power.
*/
static void ipa_hardware_dcd_config(struct ipa *ipa)
{
@@ -457,48 +417,54 @@ static void ipa_hardware_deconfig(struct ipa *ipa)
* @ipa: IPA pointer
* @data: IPA configuration data
*
- * Perform initialization requiring IPA clock to be enabled.
+ * Perform initialization requiring IPA power to be enabled.
*/
static int ipa_config(struct ipa *ipa, const struct ipa_data *data)
{
int ret;
- /* Get a clock reference to allow initialization. This reference
- * is held after initialization completes, and won't get dropped
- * unless/until a system suspend request arrives.
- */
- ipa_clock_get(ipa);
-
ipa_hardware_config(ipa, data);
- ret = ipa_endpoint_config(ipa);
+ ret = ipa_mem_config(ipa);
if (ret)
goto err_hardware_deconfig;
- ret = ipa_mem_config(ipa);
+ ipa->interrupt = ipa_interrupt_config(ipa);
+ if (IS_ERR(ipa->interrupt)) {
+ ret = PTR_ERR(ipa->interrupt);
+ ipa->interrupt = NULL;
+ goto err_mem_deconfig;
+ }
+
+ ipa_uc_config(ipa);
+
+ ret = ipa_endpoint_config(ipa);
if (ret)
- goto err_endpoint_deconfig;
+ goto err_uc_deconfig;
ipa_table_config(ipa); /* No deconfig required */
/* Assign resource limitation to each group; no deconfig required */
ret = ipa_resource_config(ipa, data->resource_data);
if (ret)
- goto err_mem_deconfig;
+ goto err_endpoint_deconfig;
ret = ipa_modem_config(ipa);
if (ret)
- goto err_mem_deconfig;
+ goto err_endpoint_deconfig;
return 0;
-err_mem_deconfig:
- ipa_mem_deconfig(ipa);
err_endpoint_deconfig:
ipa_endpoint_deconfig(ipa);
+err_uc_deconfig:
+ ipa_uc_deconfig(ipa);
+ ipa_interrupt_deconfig(ipa->interrupt);
+ ipa->interrupt = NULL;
+err_mem_deconfig:
+ ipa_mem_deconfig(ipa);
err_hardware_deconfig:
ipa_hardware_deconfig(ipa);
- ipa_clock_put(ipa);
return ret;
}
@@ -510,10 +476,12 @@ err_hardware_deconfig:
static void ipa_deconfig(struct ipa *ipa)
{
ipa_modem_deconfig(ipa);
- ipa_mem_deconfig(ipa);
ipa_endpoint_deconfig(ipa);
+ ipa_uc_deconfig(ipa);
+ ipa_interrupt_deconfig(ipa->interrupt);
+ ipa->interrupt = NULL;
+ ipa_mem_deconfig(ipa);
ipa_hardware_deconfig(ipa);
- ipa_clock_put(ipa);
}
static int ipa_firmware_load(struct device *dev)
@@ -612,7 +580,6 @@ MODULE_DEVICE_TABLE(of, ipa_match);
* */
static void ipa_validate_build(void)
{
-#ifdef IPA_VALIDATE
/* At one time we assumed a 64-bit build, allowing some do_div()
* calls to be replaced by simple division or modulo operations.
* We currently only perform divide and modulo operations on u32,
@@ -646,7 +613,6 @@ static void ipa_validate_build(void)
BUILD_BUG_ON(!ipa_aggr_granularity_val(IPA_AGGR_GRANULARITY));
BUILD_BUG_ON(ipa_aggr_granularity_val(IPA_AGGR_GRANULARITY) >
field_max(AGGR_GRANULARITY_FMASK));
-#endif /* IPA_VALIDATE */
}
static bool ipa_version_valid(enum ipa_version version)
@@ -681,7 +647,7 @@ static bool ipa_version_valid(enum ipa_version version)
* in several stages:
* - The "init" stage involves activities that can be initialized without
* access to the IPA hardware.
- * - The "config" stage requires the IPA clock to be active so IPA registers
+ * - The "config" stage requires IPA power to be active so IPA registers
* can be accessed, but does not require the use of IPA immediate commands.
* - The "setup" stage uses IPA immediate commands, and so requires the GSI
* layer to be initialized.
@@ -697,14 +663,14 @@ static int ipa_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
const struct ipa_data *data;
- struct ipa_clock *clock;
+ struct ipa_power *power;
bool modem_init;
struct ipa *ipa;
int ret;
ipa_validate_build();
- /* Get configuration data early; needed for clock initialization */
+ /* Get configuration data early; needed for power initialization */
data = of_device_get_match_data(dev);
if (!data) {
dev_err(dev, "matched hardware not supported\n");
@@ -725,20 +691,20 @@ static int ipa_probe(struct platform_device *pdev)
/* The clock and interconnects might not be ready when we're
* probed, so might return -EPROBE_DEFER.
*/
- clock = ipa_clock_init(dev, data->clock_data);
- if (IS_ERR(clock))
- return PTR_ERR(clock);
+ power = ipa_power_init(dev, data->power_data);
+ if (IS_ERR(power))
+ return PTR_ERR(power);
/* No more EPROBE_DEFER. Allocate and initialize the IPA structure */
ipa = kzalloc(sizeof(*ipa), GFP_KERNEL);
if (!ipa) {
ret = -ENOMEM;
- goto err_clock_exit;
+ goto err_power_exit;
}
ipa->pdev = pdev;
dev_set_drvdata(dev, ipa);
- ipa->clock = clock;
+ ipa->power = power;
ipa->version = data->version;
init_completion(&ipa->completion);
@@ -771,18 +737,23 @@ static int ipa_probe(struct platform_device *pdev)
if (ret)
goto err_table_exit;
+ /* Power needs to be active for config and setup */
+ ret = pm_runtime_get_sync(dev);
+ if (WARN_ON(ret < 0))
+ goto err_power_put;
+
ret = ipa_config(ipa, data);
if (ret)
- goto err_modem_exit;
+ goto err_power_put;
dev_info(dev, "IPA driver initialized");
/* If the modem is doing early initialization, it will trigger a
- * call to ipa_setup() call when it has finished. In that case
- * we're done here.
+ * call to ipa_setup() when it has finished. In that case we're
+ * done here.
*/
if (modem_init)
- return 0;
+ goto done;
/* Otherwise we need to load the firmware and have Trust Zone validate
* and install it. If that succeeds we can proceed with setup.
@@ -794,12 +765,16 @@ static int ipa_probe(struct platform_device *pdev)
ret = ipa_setup(ipa);
if (ret)
goto err_deconfig;
+done:
+ pm_runtime_mark_last_busy(dev);
+ (void)pm_runtime_put_autosuspend(dev);
return 0;
err_deconfig:
ipa_deconfig(ipa);
-err_modem_exit:
+err_power_put:
+ pm_runtime_put_noidle(dev);
ipa_modem_exit(ipa);
err_table_exit:
ipa_table_exit(ipa);
@@ -813,8 +788,8 @@ err_reg_exit:
ipa_reg_exit(ipa);
err_kfree_ipa:
kfree(ipa);
-err_clock_exit:
- ipa_clock_exit(clock);
+err_power_exit:
+ ipa_power_exit(power);
return ret;
}
@@ -822,9 +797,14 @@ err_clock_exit:
static int ipa_remove(struct platform_device *pdev)
{
struct ipa *ipa = dev_get_drvdata(&pdev->dev);
- struct ipa_clock *clock = ipa->clock;
+ struct ipa_power *power = ipa->power;
+ struct device *dev = &pdev->dev;
int ret;
+ ret = pm_runtime_get_sync(dev);
+ if (WARN_ON(ret < 0))
+ goto out_power_put;
+
if (ipa->setup_complete) {
ret = ipa_modem_stop(ipa);
/* If starting or stopping is in progress, try once more */
@@ -839,6 +819,8 @@ static int ipa_remove(struct platform_device *pdev)
}
ipa_deconfig(ipa);
+out_power_put:
+ pm_runtime_put_noidle(dev);
ipa_modem_exit(ipa);
ipa_table_exit(ipa);
ipa_endpoint_exit(ipa);
@@ -846,7 +828,7 @@ static int ipa_remove(struct platform_device *pdev)
ipa_mem_exit(ipa);
ipa_reg_exit(ipa);
kfree(ipa);
- ipa_clock_exit(clock);
+ ipa_power_exit(power);
return 0;
}
@@ -860,62 +842,6 @@ static void ipa_shutdown(struct platform_device *pdev)
dev_err(&pdev->dev, "shutdown: remove returned %d\n", ret);
}
-/**
- * ipa_suspend() - Power management system suspend callback
- * @dev: IPA device structure
- *
- * Return: Always returns zero
- *
- * Called by the PM framework when a system suspend operation is invoked.
- * Suspends endpoints and releases the clock reference held to keep
- * the IPA clock running until this point.
- */
-static int ipa_suspend(struct device *dev)
-{
- struct ipa *ipa = dev_get_drvdata(dev);
-
- /* When a suspended RX endpoint has a packet ready to receive, we
- * get an IPA SUSPEND interrupt. We trigger a system resume in
- * that case, but only on the first such interrupt since suspend.
- */
- __clear_bit(IPA_FLAG_RESUMED, ipa->flags);
-
- ipa_endpoint_suspend(ipa);
-
- ipa_clock_put(ipa);
-
- return 0;
-}
-
-/**
- * ipa_resume() - Power management system resume callback
- * @dev: IPA device structure
- *
- * Return: Always returns 0
- *
- * Called by the PM framework when a system resume operation is invoked.
- * Takes an IPA clock reference to keep the clock running until suspend,
- * and resumes endpoints.
- */
-static int ipa_resume(struct device *dev)
-{
- struct ipa *ipa = dev_get_drvdata(dev);
-
- /* This clock reference will keep the IPA out of suspend
- * until we get a power management suspend request.
- */
- ipa_clock_get(ipa);
-
- ipa_endpoint_resume(ipa);
-
- return 0;
-}
-
-static const struct dev_pm_ops ipa_pm_ops = {
- .suspend = ipa_suspend,
- .resume = ipa_resume,
-};
-
static const struct attribute_group *ipa_attribute_groups[] = {
&ipa_attribute_group,
&ipa_feature_attribute_group,
diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c
index af9aedbde717..ad116bcc0580 100644
--- a/drivers/net/ipa/ipa_modem.c
+++ b/drivers/net/ipa/ipa_modem.c
@@ -9,6 +9,7 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/if_rmnet.h>
+#include <linux/pm_runtime.h>
#include <linux/remoteproc/qcom_rproc.h>
#include "ipa.h"
@@ -19,6 +20,8 @@
#include "ipa_modem.h"
#include "ipa_smp2p.h"
#include "ipa_qmi.h"
+#include "ipa_uc.h"
+#include "ipa_power.h"
#define IPA_NETDEV_NAME "rmnet_ipa%d"
#define IPA_NETDEV_TAILROOM 0 /* for padding by mux layer */
@@ -31,9 +34,14 @@ enum ipa_modem_state {
IPA_MODEM_STATE_STOPPING,
};
-/** struct ipa_priv - IPA network device private data */
+/**
+ * struct ipa_priv - IPA network device private data
+ * @ipa: IPA pointer
+ * @work: Work structure used to wake the modem netdev TX queue
+ */
struct ipa_priv {
struct ipa *ipa;
+ struct work_struct work;
};
/** ipa_open() - Opens the modem network interface */
@@ -41,21 +49,33 @@ static int ipa_open(struct net_device *netdev)
{
struct ipa_priv *priv = netdev_priv(netdev);
struct ipa *ipa = priv->ipa;
+ struct device *dev;
int ret;
+ dev = &ipa->pdev->dev;
+ ret = pm_runtime_get_sync(dev);
+ if (ret < 0)
+ goto err_power_put;
+
ret = ipa_endpoint_enable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
if (ret)
- return ret;
+ goto err_power_put;
+
ret = ipa_endpoint_enable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
if (ret)
goto err_disable_tx;
netif_start_queue(netdev);
+ pm_runtime_mark_last_busy(dev);
+ (void)pm_runtime_put_autosuspend(dev);
+
return 0;
err_disable_tx:
ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
+err_power_put:
+ pm_runtime_put_noidle(dev);
return ret;
}
@@ -65,11 +85,21 @@ static int ipa_stop(struct net_device *netdev)
{
struct ipa_priv *priv = netdev_priv(netdev);
struct ipa *ipa = priv->ipa;
+ struct device *dev;
+ int ret;
+
+ dev = &ipa->pdev->dev;
+ ret = pm_runtime_get_sync(dev);
+ if (ret < 0)
+ goto out_power_put;
netif_stop_queue(netdev);
ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
+out_power_put:
+ pm_runtime_mark_last_busy(dev);
+ (void)pm_runtime_put_autosuspend(dev);
return 0;
}
@@ -82,13 +112,15 @@ static int ipa_stop(struct net_device *netdev)
* NETDEV_TX_OK: Success
* NETDEV_TX_BUSY: Error while transmitting the skb. Try again later
*/
-static int ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t
+ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev)
{
struct net_device_stats *stats = &netdev->stats;
struct ipa_priv *priv = netdev_priv(netdev);
struct ipa_endpoint *endpoint;
struct ipa *ipa = priv->ipa;
u32 skb_len = skb->len;
+ struct device *dev;
int ret;
if (!skb_len)
@@ -98,7 +130,35 @@ static int ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev)
if (endpoint->data->qmap && skb->protocol != htons(ETH_P_MAP))
goto err_drop_skb;
+ /* The hardware must be powered for us to transmit */
+ dev = &ipa->pdev->dev;
+ ret = pm_runtime_get(dev);
+ if (ret < 1) {
+ /* If a resume won't happen, just drop the packet */
+ if (ret < 0 && ret != -EINPROGRESS) {
+ ipa_power_modem_queue_active(ipa);
+ pm_runtime_put_noidle(dev);
+ goto err_drop_skb;
+ }
+
+ /* No power (yet). Stop the network stack from transmitting
+ * until we're resumed; ipa_modem_resume() arranges for the
+ * TX queue to be started again.
+ */
+ ipa_power_modem_queue_stop(ipa);
+
+ pm_runtime_put_noidle(dev);
+
+ return NETDEV_TX_BUSY;
+ }
+
+ ipa_power_modem_queue_active(ipa);
+
ret = ipa_endpoint_skb_tx(endpoint, skb);
+
+ pm_runtime_mark_last_busy(dev);
+ (void)pm_runtime_put_autosuspend(dev);
+
if (ret) {
if (ret != -E2BIG)
return NETDEV_TX_BUSY;
@@ -169,12 +229,31 @@ void ipa_modem_suspend(struct net_device *netdev)
struct ipa_priv *priv = netdev_priv(netdev);
struct ipa *ipa = priv->ipa;
- netif_stop_queue(netdev);
+ if (!(netdev->flags & IFF_UP))
+ return;
ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
}
+/**
+ * ipa_modem_wake_queue_work() - enable modem netdev queue
+ * @work: Work structure
+ *
+ * Re-enable transmit on the modem network device. This is called
+ * in (power management) work queue context, scheduled when resuming
+ * the modem. We can't enable the queue directly in ipa_modem_resume()
+ * because transmits restart the instant the queue is awakened; but the
+ * device power state won't be ACTIVE until *after* ipa_modem_resume()
+ * returns.
+ */
+static void ipa_modem_wake_queue_work(struct work_struct *work)
+{
+ struct ipa_priv *priv = container_of(work, struct ipa_priv, work);
+
+ ipa_power_modem_queue_wake(priv->ipa);
+}
+
/** ipa_modem_resume() - resume callback for runtime_pm
* @dev: pointer to device
*
@@ -185,10 +264,14 @@ void ipa_modem_resume(struct net_device *netdev)
struct ipa_priv *priv = netdev_priv(netdev);
struct ipa *ipa = priv->ipa;
+ if (!(netdev->flags & IFF_UP))
+ return;
+
ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
- netif_wake_queue(netdev);
+ /* Arrange for the TX queue to be restarted */
+ (void)queue_pm_work(&priv->work);
}
int ipa_modem_start(struct ipa *ipa)
@@ -216,13 +299,16 @@ int ipa_modem_start(struct ipa *ipa)
SET_NETDEV_DEV(netdev, &ipa->pdev->dev);
priv = netdev_priv(netdev);
priv->ipa = ipa;
+ INIT_WORK(&priv->work, ipa_modem_wake_queue_work);
+ ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev;
+ ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev;
+ ipa->modem_netdev = netdev;
ret = register_netdev(netdev);
- if (!ret) {
- ipa->modem_netdev = netdev;
- ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev;
- ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev;
- } else {
+ if (ret) {
+ ipa->modem_netdev = NULL;
+ ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL;
+ ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL;
free_netdev(netdev);
}
@@ -256,13 +342,18 @@ int ipa_modem_stop(struct ipa *ipa)
/* Prevent the modem from triggering a call to ipa_setup() */
ipa_smp2p_disable(ipa);
- /* Stop the queue and disable the endpoints if it's open */
+ /* Clean up the netdev and endpoints if it was started */
if (netdev) {
- (void)ipa_stop(netdev);
+ struct ipa_priv *priv = netdev_priv(netdev);
+
+ cancel_work_sync(&priv->work);
+ /* If it was opened, stop it first */
+ if (netdev->flags & IFF_UP)
+ (void)ipa_stop(netdev);
+ unregister_netdev(netdev);
+ ipa->modem_netdev = NULL;
ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL;
ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL;
- ipa->modem_netdev = NULL;
- unregister_netdev(netdev);
free_netdev(netdev);
}
@@ -278,6 +369,12 @@ static void ipa_modem_crashed(struct ipa *ipa)
struct device *dev = &ipa->pdev->dev;
int ret;
+ ret = pm_runtime_get_sync(dev);
+ if (ret < 0) {
+ dev_err(dev, "error %d getting power to handle crash\n", ret);
+ goto out_power_put;
+ }
+
ipa_endpoint_modem_pause_all(ipa, true);
ipa_endpoint_modem_hol_block_clear_all(ipa);
@@ -302,6 +399,10 @@ static void ipa_modem_crashed(struct ipa *ipa)
ret = ipa_mem_zero_modem(ipa);
if (ret)
dev_err(dev, "error %d zeroing modem memory regions\n", ret);
+
+out_power_put:
+ pm_runtime_mark_last_busy(dev);
+ (void)pm_runtime_put_autosuspend(dev);
}
static int ipa_modem_notify(struct notifier_block *nb, unsigned long action,
@@ -314,6 +415,7 @@ static int ipa_modem_notify(struct notifier_block *nb, unsigned long action,
switch (action) {
case QCOM_SSR_BEFORE_POWERUP:
dev_info(dev, "received modem starting event\n");
+ ipa_uc_power(ipa);
ipa_smp2p_notify_reset(ipa);
break;
@@ -377,13 +479,3 @@ void ipa_modem_deconfig(struct ipa *ipa)
ipa->notifier = NULL;
memset(&ipa->nb, 0, sizeof(ipa->nb));
}
-
-int ipa_modem_setup(struct ipa *ipa)
-{
- return ipa_qmi_setup(ipa);
-}
-
-void ipa_modem_teardown(struct ipa *ipa)
-{
- ipa_qmi_teardown(ipa);
-}
diff --git a/drivers/net/ipa/ipa_modem.h b/drivers/net/ipa/ipa_modem.h
index 2de3e216d1d4..5e6e3d234454 100644
--- a/drivers/net/ipa/ipa_modem.h
+++ b/drivers/net/ipa/ipa_modem.h
@@ -7,7 +7,6 @@
#define _IPA_MODEM_H_
struct ipa;
-struct ipa_endpoint;
struct net_device;
struct sk_buff;
@@ -25,7 +24,4 @@ void ipa_modem_exit(struct ipa *ipa);
int ipa_modem_config(struct ipa *ipa);
void ipa_modem_deconfig(struct ipa *ipa);
-int ipa_modem_setup(struct ipa *ipa);
-void ipa_modem_teardown(struct ipa *ipa);
-
#endif /* _IPA_MODEM_H_ */
diff --git a/drivers/net/ipa/ipa_power.c b/drivers/net/ipa/ipa_power.c
new file mode 100644
index 000000000000..b1c6c0fcb654
--- /dev/null
+++ b/drivers/net/ipa/ipa_power.c
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2018-2021 Linaro Ltd.
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/interconnect.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/bitops.h>
+
+#include "ipa.h"
+#include "ipa_power.h"
+#include "ipa_endpoint.h"
+#include "ipa_modem.h"
+#include "ipa_data.h"
+
+/**
+ * DOC: IPA Power Management
+ *
+ * The IPA hardware is enabled when the IPA core clock and all the
+ * interconnects (buses) it depends on are enabled. Runtime power
+ * management is used to determine whether the core clock and
+ * interconnects are enabled, and if not in use to be suspended
+ * automatically.
+ *
+ * The core clock currently runs at a fixed clock rate when enabled,
+ * an all interconnects use a fixed average and peak bandwidth.
+ */
+
+#define IPA_AUTOSUSPEND_DELAY 500 /* milliseconds */
+
+/**
+ * struct ipa_interconnect - IPA interconnect information
+ * @path: Interconnect path
+ * @average_bandwidth: Average interconnect bandwidth (KB/second)
+ * @peak_bandwidth: Peak interconnect bandwidth (KB/second)
+ */
+struct ipa_interconnect {
+ struct icc_path *path;
+ u32 average_bandwidth;
+ u32 peak_bandwidth;
+};
+
+/**
+ * enum ipa_power_flag - IPA power flags
+ * @IPA_POWER_FLAG_RESUMED: Whether resume from suspend has been signaled
+ * @IPA_POWER_FLAG_SYSTEM: Hardware is system (not runtime) suspended
+ * @IPA_POWER_FLAG_STOPPED: Modem TX is disabled by ipa_start_xmit()
+ * @IPA_POWER_FLAG_STARTED: Modem TX was enabled by ipa_runtime_resume()
+ * @IPA_POWER_FLAG_COUNT: Number of defined power flags
+ */
+enum ipa_power_flag {
+ IPA_POWER_FLAG_RESUMED,
+ IPA_POWER_FLAG_SYSTEM,
+ IPA_POWER_FLAG_STOPPED,
+ IPA_POWER_FLAG_STARTED,
+ IPA_POWER_FLAG_COUNT, /* Last; not a flag */
+};
+
+/**
+ * struct ipa_power - IPA power management information
+ * @dev: IPA device pointer
+ * @core: IPA core clock
+ * @spinlock: Protects modem TX queue enable/disable
+ * @flags: Boolean state flags
+ * @interconnect_count: Number of elements in interconnect[]
+ * @interconnect: Interconnect array
+ */
+struct ipa_power {
+ struct device *dev;
+ struct clk *core;
+ spinlock_t spinlock; /* used with STOPPED/STARTED power flags */
+ DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT);
+ u32 interconnect_count;
+ struct ipa_interconnect *interconnect;
+};
+
+static int ipa_interconnect_init_one(struct device *dev,
+ struct ipa_interconnect *interconnect,
+ const struct ipa_interconnect_data *data)
+{
+ struct icc_path *path;
+
+ path = of_icc_get(dev, data->name);
+ if (IS_ERR(path)) {
+ int ret = PTR_ERR(path);
+
+ dev_err_probe(dev, ret, "error getting %s interconnect\n",
+ data->name);
+
+ return ret;
+ }
+
+ interconnect->path = path;
+ interconnect->average_bandwidth = data->average_bandwidth;
+ interconnect->peak_bandwidth = data->peak_bandwidth;
+
+ return 0;
+}
+
+static void ipa_interconnect_exit_one(struct ipa_interconnect *interconnect)
+{
+ icc_put(interconnect->path);
+ memset(interconnect, 0, sizeof(*interconnect));
+}
+
+/* Initialize interconnects required for IPA operation */
+static int ipa_interconnect_init(struct ipa_power *power, struct device *dev,
+ const struct ipa_interconnect_data *data)
+{
+ struct ipa_interconnect *interconnect;
+ u32 count;
+ int ret;
+
+ count = power->interconnect_count;
+ interconnect = kcalloc(count, sizeof(*interconnect), GFP_KERNEL);
+ if (!interconnect)
+ return -ENOMEM;
+ power->interconnect = interconnect;
+
+ while (count--) {
+ ret = ipa_interconnect_init_one(dev, interconnect, data++);
+ if (ret)
+ goto out_unwind;
+ interconnect++;
+ }
+
+ return 0;
+
+out_unwind:
+ while (interconnect-- > power->interconnect)
+ ipa_interconnect_exit_one(interconnect);
+ kfree(power->interconnect);
+ power->interconnect = NULL;
+
+ return ret;
+}
+
+/* Inverse of ipa_interconnect_init() */
+static void ipa_interconnect_exit(struct ipa_power *power)
+{
+ struct ipa_interconnect *interconnect;
+
+ interconnect = power->interconnect + power->interconnect_count;
+ while (interconnect-- > power->interconnect)
+ ipa_interconnect_exit_one(interconnect);
+ kfree(power->interconnect);
+ power->interconnect = NULL;
+}
+
+/* Currently we only use one bandwidth level, so just "enable" interconnects */
+static int ipa_interconnect_enable(struct ipa *ipa)
+{
+ struct ipa_interconnect *interconnect;
+ struct ipa_power *power = ipa->power;
+ int ret;
+ u32 i;
+
+ interconnect = power->interconnect;
+ for (i = 0; i < power->interconnect_count; i++) {
+ ret = icc_set_bw(interconnect->path,
+ interconnect->average_bandwidth,
+ interconnect->peak_bandwidth);
+ if (ret) {
+ dev_err(&ipa->pdev->dev,
+ "error %d enabling %s interconnect\n",
+ ret, icc_get_name(interconnect->path));
+ goto out_unwind;
+ }
+ interconnect++;
+ }
+
+ return 0;
+
+out_unwind:
+ while (interconnect-- > power->interconnect)
+ (void)icc_set_bw(interconnect->path, 0, 0);
+
+ return ret;
+}
+
+/* To disable an interconnect, we just its bandwidth to 0 */
+static int ipa_interconnect_disable(struct ipa *ipa)
+{
+ struct ipa_interconnect *interconnect;
+ struct ipa_power *power = ipa->power;
+ struct device *dev = &ipa->pdev->dev;
+ int result = 0;
+ u32 count;
+ int ret;
+
+ count = power->interconnect_count;
+ interconnect = power->interconnect + count;
+ while (count--) {
+ interconnect--;
+ ret = icc_set_bw(interconnect->path, 0, 0);
+ if (ret) {
+ dev_err(dev, "error %d disabling %s interconnect\n",
+ ret, icc_get_name(interconnect->path));
+ /* Try to disable all; record only the first error */
+ if (!result)
+ result = ret;
+ }
+ }
+
+ return result;
+}
+
+/* Enable IPA power, enabling interconnects and the core clock */
+static int ipa_power_enable(struct ipa *ipa)
+{
+ int ret;
+
+ ret = ipa_interconnect_enable(ipa);
+ if (ret)
+ return ret;
+
+ ret = clk_prepare_enable(ipa->power->core);
+ if (ret) {
+ dev_err(&ipa->pdev->dev, "error %d enabling core clock\n", ret);
+ (void)ipa_interconnect_disable(ipa);
+ }
+
+ return ret;
+}
+
+/* Inverse of ipa_power_enable() */
+static int ipa_power_disable(struct ipa *ipa)
+{
+ clk_disable_unprepare(ipa->power->core);
+
+ return ipa_interconnect_disable(ipa);
+}
+
+static int ipa_runtime_suspend(struct device *dev)
+{
+ struct ipa *ipa = dev_get_drvdata(dev);
+
+ /* Endpoints aren't usable until setup is complete */
+ if (ipa->setup_complete) {
+ __clear_bit(IPA_POWER_FLAG_RESUMED, ipa->power->flags);
+ ipa_endpoint_suspend(ipa);
+ gsi_suspend(&ipa->gsi);
+ }
+
+ return ipa_power_disable(ipa);
+}
+
+static int ipa_runtime_resume(struct device *dev)
+{
+ struct ipa *ipa = dev_get_drvdata(dev);
+ int ret;
+
+ ret = ipa_power_enable(ipa);
+ if (WARN_ON(ret < 0))
+ return ret;
+
+ /* Endpoints aren't usable until setup is complete */
+ if (ipa->setup_complete) {
+ gsi_resume(&ipa->gsi);
+ ipa_endpoint_resume(ipa);
+ }
+
+ return 0;
+}
+
+static int ipa_suspend(struct device *dev)
+{
+ struct ipa *ipa = dev_get_drvdata(dev);
+
+ __set_bit(IPA_POWER_FLAG_SYSTEM, ipa->power->flags);
+
+ return pm_runtime_force_suspend(dev);
+}
+
+static int ipa_resume(struct device *dev)
+{
+ struct ipa *ipa = dev_get_drvdata(dev);
+ int ret;
+
+ ret = pm_runtime_force_resume(dev);
+
+ __clear_bit(IPA_POWER_FLAG_SYSTEM, ipa->power->flags);
+
+ return ret;
+}
+
+/* Return the current IPA core clock rate */
+u32 ipa_core_clock_rate(struct ipa *ipa)
+{
+ return ipa->power ? (u32)clk_get_rate(ipa->power->core) : 0;
+}
+
+/**
+ * ipa_suspend_handler() - Handle the suspend IPA interrupt
+ * @ipa: IPA pointer
+ * @irq_id: IPA interrupt type (unused)
+ *
+ * If an RX endpoint is suspended, and the IPA has a packet destined for
+ * that endpoint, the IPA generates a SUSPEND interrupt to inform the AP
+ * that it should resume the endpoint. If we get one of these interrupts
+ * we just wake up the system.
+ */
+static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id)
+{
+ /* To handle an IPA interrupt we will have resumed the hardware
+ * just to handle the interrupt, so we're done. If we are in a
+ * system suspend, trigger a system resume.
+ */
+ if (!__test_and_set_bit(IPA_POWER_FLAG_RESUMED, ipa->power->flags))
+ if (test_bit(IPA_POWER_FLAG_SYSTEM, ipa->power->flags))
+ pm_wakeup_dev_event(&ipa->pdev->dev, 0, true);
+
+ /* Acknowledge/clear the suspend interrupt on all endpoints */
+ ipa_interrupt_suspend_clear_all(ipa->interrupt);
+}
+
+/* The next few functions coordinate stopping and starting the modem
+ * network device transmit queue.
+ *
+ * Transmit can be running concurrent with power resume, and there's a
+ * chance the resume completes before the transmit path stops the queue,
+ * leaving the queue in a stopped state. The next two functions are used
+ * to avoid this: ipa_power_modem_queue_stop() is used by ipa_start_xmit()
+ * to conditionally stop the TX queue; and ipa_power_modem_queue_start()
+ * is used by ipa_runtime_resume() to conditionally restart it.
+ *
+ * Two flags and a spinlock are used. If the queue is stopped, the STOPPED
+ * power flag is set. And if the queue is started, the STARTED flag is set.
+ * The queue is only started on resume if the STOPPED flag is set. And the
+ * queue is only started in ipa_start_xmit() if the STARTED flag is *not*
+ * set. As a result, the queue remains operational if the two activites
+ * happen concurrently regardless of the order they complete. The spinlock
+ * ensures the flag and TX queue operations are done atomically.
+ *
+ * The first function stops the modem netdev transmit queue, but only if
+ * the STARTED flag is *not* set. That flag is cleared if it was set.
+ * If the queue is stopped, the STOPPED flag is set. This is called only
+ * from the power ->runtime_resume operation.
+ */
+void ipa_power_modem_queue_stop(struct ipa *ipa)
+{
+ struct ipa_power *power = ipa->power;
+ unsigned long flags;
+
+ spin_lock_irqsave(&power->spinlock, flags);
+
+ if (!__test_and_clear_bit(IPA_POWER_FLAG_STARTED, power->flags)) {
+ netif_stop_queue(ipa->modem_netdev);
+ __set_bit(IPA_POWER_FLAG_STOPPED, power->flags);
+ }
+
+ spin_unlock_irqrestore(&power->spinlock, flags);
+}
+
+/* This function starts the modem netdev transmit queue, but only if the
+ * STOPPED flag is set. That flag is cleared if it was set. If the queue
+ * was restarted, the STARTED flag is set; this allows ipa_start_xmit()
+ * to skip stopping the queue in the event of a race.
+ */
+void ipa_power_modem_queue_wake(struct ipa *ipa)
+{
+ struct ipa_power *power = ipa->power;
+ unsigned long flags;
+
+ spin_lock_irqsave(&power->spinlock, flags);
+
+ if (__test_and_clear_bit(IPA_POWER_FLAG_STOPPED, power->flags)) {
+ __set_bit(IPA_POWER_FLAG_STARTED, power->flags);
+ netif_wake_queue(ipa->modem_netdev);
+ }
+
+ spin_unlock_irqrestore(&power->spinlock, flags);
+}
+
+/* This function clears the STARTED flag once the TX queue is operating */
+void ipa_power_modem_queue_active(struct ipa *ipa)
+{
+ clear_bit(IPA_POWER_FLAG_STARTED, ipa->power->flags);
+}
+
+int ipa_power_setup(struct ipa *ipa)
+{
+ int ret;
+
+ ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND,
+ ipa_suspend_handler);
+
+ ret = device_init_wakeup(&ipa->pdev->dev, true);
+ if (ret)
+ ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
+
+ return ret;
+}
+
+void ipa_power_teardown(struct ipa *ipa)
+{
+ (void)device_init_wakeup(&ipa->pdev->dev, false);
+ ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
+}
+
+/* Initialize IPA power management */
+struct ipa_power *
+ipa_power_init(struct device *dev, const struct ipa_power_data *data)
+{
+ struct ipa_power *power;
+ struct clk *clk;
+ int ret;
+
+ clk = clk_get(dev, "core");
+ if (IS_ERR(clk)) {
+ dev_err_probe(dev, PTR_ERR(clk), "error getting core clock\n");
+
+ return ERR_CAST(clk);
+ }
+
+ ret = clk_set_rate(clk, data->core_clock_rate);
+ if (ret) {
+ dev_err(dev, "error %d setting core clock rate to %u\n",
+ ret, data->core_clock_rate);
+ goto err_clk_put;
+ }
+
+ power = kzalloc(sizeof(*power), GFP_KERNEL);
+ if (!power) {
+ ret = -ENOMEM;
+ goto err_clk_put;
+ }
+ power->dev = dev;
+ power->core = clk;
+ spin_lock_init(&power->spinlock);
+ power->interconnect_count = data->interconnect_count;
+
+ ret = ipa_interconnect_init(power, dev, data->interconnect_data);
+ if (ret)
+ goto err_kfree;
+
+ pm_runtime_set_autosuspend_delay(dev, IPA_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_enable(dev);
+
+ return power;
+
+err_kfree:
+ kfree(power);
+err_clk_put:
+ clk_put(clk);
+
+ return ERR_PTR(ret);
+}
+
+/* Inverse of ipa_power_init() */
+void ipa_power_exit(struct ipa_power *power)
+{
+ struct device *dev = power->dev;
+ struct clk *clk = power->core;
+
+ pm_runtime_disable(dev);
+ pm_runtime_dont_use_autosuspend(dev);
+ ipa_interconnect_exit(power);
+ kfree(power);
+ clk_put(clk);
+}
+
+const struct dev_pm_ops ipa_pm_ops = {
+ .suspend = ipa_suspend,
+ .resume = ipa_resume,
+ .runtime_suspend = ipa_runtime_suspend,
+ .runtime_resume = ipa_runtime_resume,
+};
diff --git a/drivers/net/ipa/ipa_power.h b/drivers/net/ipa/ipa_power.h
new file mode 100644
index 000000000000..2151805d7fbb
--- /dev/null
+++ b/drivers/net/ipa/ipa_power.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2018-2020 Linaro Ltd.
+ */
+#ifndef _IPA_POWER_H_
+#define _IPA_POWER_H_
+
+struct device;
+
+struct ipa;
+struct ipa_power_data;
+
+/* IPA device power management function block */
+extern const struct dev_pm_ops ipa_pm_ops;
+
+/**
+ * ipa_core_clock_rate() - Return the current IPA core clock rate
+ * @ipa: IPA structure
+ *
+ * Return: The current clock rate (in Hz), or 0.
+ */
+u32 ipa_core_clock_rate(struct ipa *ipa);
+
+/**
+ * ipa_power_modem_queue_stop() - Possibly stop the modem netdev TX queue
+ * @ipa: IPA pointer
+ */
+void ipa_power_modem_queue_stop(struct ipa *ipa);
+
+/**
+ * ipa_power_modem_queue_wake() - Possibly wake the modem netdev TX queue
+ * @ipa: IPA pointer
+ */
+void ipa_power_modem_queue_wake(struct ipa *ipa);
+
+/**
+ * ipa_power_modem_queue_active() - Report modem netdev TX queue active
+ * @ipa: IPA pointer
+ */
+void ipa_power_modem_queue_active(struct ipa *ipa);
+
+/**
+ * ipa_power_setup() - Set up IPA power management
+ * @ipa: IPA pointer
+ *
+ * Return: 0 if successful, or a negative error code
+ */
+int ipa_power_setup(struct ipa *ipa);
+
+/**
+ * ipa_power_teardown() - Inverse of ipa_power_setup()
+ * @ipa: IPA pointer
+ */
+void ipa_power_teardown(struct ipa *ipa);
+
+/**
+ * ipa_power_init() - Initialize IPA power management
+ * @dev: IPA device
+ * @data: Clock configuration data
+ *
+ * Return: A pointer to an ipa_power structure, or a pointer-coded error
+ */
+struct ipa_power *ipa_power_init(struct device *dev,
+ const struct ipa_power_data *data);
+
+/**
+ * ipa_power_exit() - Inverse of ipa_power_init()
+ * @power: IPA power pointer
+ */
+void ipa_power_exit(struct ipa_power *power);
+
+#endif /* _IPA_POWER_H_ */
diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c
index 4661105ce7ab..90f3aec55b36 100644
--- a/drivers/net/ipa/ipa_qmi.c
+++ b/drivers/net/ipa/ipa_qmi.c
@@ -467,10 +467,7 @@ static const struct qmi_ops ipa_client_ops = {
.new_server = ipa_client_new_server,
};
-/* This is called by ipa_setup(). We can be informed via remoteproc that
- * the modem has shut down, in which case this function will be called
- * again to prepare for it coming back up again.
- */
+/* Set up for QMI message exchange */
int ipa_qmi_setup(struct ipa *ipa)
{
struct ipa_qmi *ipa_qmi = &ipa->qmi;
@@ -526,6 +523,7 @@ err_server_handle_release:
return ret;
}
+/* Tear down IPA QMI handles */
void ipa_qmi_teardown(struct ipa *ipa)
{
cancel_work_sync(&ipa->qmi.init_driver_work);
diff --git a/drivers/net/ipa/ipa_qmi.h b/drivers/net/ipa/ipa_qmi.h
index b6f2055d35a6..856ef629ccc8 100644
--- a/drivers/net/ipa/ipa_qmi.h
+++ b/drivers/net/ipa/ipa_qmi.h
@@ -39,7 +39,26 @@ struct ipa_qmi {
bool indication_sent;
};
+/**
+ * ipa_qmi_setup() - Set up for QMI message exchange
+ * @ipa: IPA pointer
+ *
+ * This is called at the end of ipa_setup(), to prepare for the exchange
+ * of QMI messages that perform a "handshake" between the AP and modem.
+ * When the modem QMI server announces its presence, an AP request message
+ * supplies operating parameters to be used to the modem, and the modem
+ * acknowledges receipt of those parameters. The modem will not touch the
+ * IPA hardware until this handshake is complete.
+ *
+ * If the modem crashes (or shuts down) a new handshake begins when the
+ * modem's QMI server is started again.
+ */
int ipa_qmi_setup(struct ipa *ipa);
+
+/**
+ * ipa_qmi_teardown() - Tear down IPA QMI handles
+ * @ipa: IPA pointer
+ */
void ipa_qmi_teardown(struct ipa *ipa);
#endif /* !_IPA_QMI_H_ */
diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index b89dec5865a5..a5b355384d4a 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -99,7 +99,7 @@ struct ipa;
static inline u32 arbitration_lock_disable_encoded(enum ipa_version version,
u32 mask)
{
- /* assert(version >= IPA_VERSION_4_0); */
+ WARN_ON(version < IPA_VERSION_4_0);
if (version < IPA_VERSION_4_9)
return u32_encode_bits(mask, GENMASK(20, 17));
@@ -116,7 +116,7 @@ static inline u32 full_flush_rsc_closure_en_encoded(enum ipa_version version,
{
u32 val = enable ? 1 : 0;
- /* assert(version >= IPA_VERSION_4_5); */
+ WARN_ON(version < IPA_VERSION_4_5);
if (version == IPA_VERSION_4_5 || version == IPA_VERSION_4_7)
return u32_encode_bits(val, GENMASK(21, 21));
@@ -409,7 +409,7 @@ static inline u32 ipa_header_size_encoded(enum ipa_version version,
val = u32_encode_bits(size, HDR_LEN_FMASK);
if (version < IPA_VERSION_4_5) {
- /* ipa_assert(header_size == size); */
+ WARN_ON(header_size != size);
return val;
}
@@ -429,7 +429,7 @@ static inline u32 ipa_metadata_offset_encoded(enum ipa_version version,
val = u32_encode_bits(off, HDR_OFST_METADATA_FMASK);
if (version < IPA_VERSION_4_5) {
- /* ipa_assert(offset == off); */
+ WARN_ON(offset != off);
return val;
}
@@ -812,7 +812,7 @@ ipa_reg_irq_suspend_info_offset(enum ipa_version version)
static inline u32
ipa_reg_irq_suspend_en_ee_n_offset(enum ipa_version version, u32 ee)
{
- /* assert(version != IPA_VERSION_3_0); */
+ WARN_ON(version == IPA_VERSION_3_0);
if (version < IPA_VERSION_4_9)
return 0x00003034 + 0x1000 * ee;
@@ -830,7 +830,7 @@ ipa_reg_irq_suspend_en_offset(enum ipa_version version)
static inline u32
ipa_reg_irq_suspend_clr_ee_n_offset(enum ipa_version version, u32 ee)
{
- /* assert(version != IPA_VERSION_3_0); */
+ WARN_ON(version == IPA_VERSION_3_0);
if (version < IPA_VERSION_4_9)
return 0x00003038 + 0x1000 * ee;
diff --git a/drivers/net/ipa/ipa_resource.c b/drivers/net/ipa/ipa_resource.c
index 3b2dc216d3a6..e3da95d69409 100644
--- a/drivers/net/ipa/ipa_resource.c
+++ b/drivers/net/ipa/ipa_resource.c
@@ -29,7 +29,6 @@
static bool ipa_resource_limits_valid(struct ipa *ipa,
const struct ipa_resource_data *data)
{
-#ifdef IPA_VALIDATION
u32 group_count;
u32 i;
u32 j;
@@ -65,7 +64,7 @@ static bool ipa_resource_limits_valid(struct ipa *ipa,
if (resource->limits[j].min || resource->limits[j].max)
return false;
}
-#endif /* !IPA_VALIDATION */
+
return true;
}
diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c
index 93270e50b6b3..df7639c39d71 100644
--- a/drivers/net/ipa/ipa_smp2p.c
+++ b/drivers/net/ipa/ipa_smp2p.c
@@ -9,13 +9,13 @@
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/panic_notifier.h>
+#include <linux/pm_runtime.h>
#include <linux/soc/qcom/smem.h>
#include <linux/soc/qcom/smem_state.h>
#include "ipa_smp2p.h"
#include "ipa.h"
#include "ipa_uc.h"
-#include "ipa_clock.h"
/**
* DOC: IPA SMP2P communication with the modem
@@ -23,19 +23,19 @@
* SMP2P is a primitive communication mechanism available between the AP and
* the modem. The IPA driver uses this for two purposes: to enable the modem
* to state that the GSI hardware is ready to use; and to communicate the
- * state of the IPA clock in the event of a crash.
+ * state of IPA power in the event of a crash.
*
* GSI needs to have early initialization completed before it can be used.
* This initialization is done either by Trust Zone or by the modem. In the
* latter case, the modem uses an SMP2P interrupt to tell the AP IPA driver
* when the GSI is ready to use.
*
- * The modem is also able to inquire about the current state of the IPA
- * clock by trigging another SMP2P interrupt to the AP. We communicate
- * whether the clock is enabled using two SMP2P state bits--one to
- * indicate the clock state (on or off), and a second to indicate the
- * clock state bit is valid. The modem will poll the valid bit until it
- * is set, and at that time records whether the AP has the IPA clock enabled.
+ * The modem is also able to inquire about the current state of IPA
+ * power by trigging another SMP2P interrupt to the AP. We communicate
+ * whether power is enabled using two SMP2P state bits--one to indicate
+ * the power state (on or off), and a second to indicate the power state
+ * bit is valid. The modem will poll the valid bit until it is set, and
+ * at that time records whether the AP has IPA power enabled.
*
* Finally, if the AP kernel panics, we update the SMP2P state bits even if
* we never receive an interrupt from the modem requesting this.
@@ -45,14 +45,14 @@
* struct ipa_smp2p - IPA SMP2P information
* @ipa: IPA pointer
* @valid_state: SMEM state indicating enabled state is valid
- * @enabled_state: SMEM state to indicate clock is enabled
+ * @enabled_state: SMEM state to indicate power is enabled
* @valid_bit: Valid bit in 32-bit SMEM state mask
* @enabled_bit: Enabled bit in 32-bit SMEM state mask
* @enabled_bit: Enabled bit in 32-bit SMEM state mask
- * @clock_query_irq: IPA interrupt triggered by modem for clock query
+ * @clock_query_irq: IPA interrupt triggered by modem for power query
* @setup_ready_irq: IPA interrupt triggered by modem to signal GSI ready
- * @clock_on: Whether IPA clock is on
- * @notified: Whether modem has been notified of clock state
+ * @power_on: Whether IPA power is on
+ * @notified: Whether modem has been notified of power state
* @disabled: Whether setup ready interrupt handling is disabled
* @mutex: Mutex protecting ready-interrupt/shutdown interlock
* @panic_notifier: Panic notifier structure
@@ -65,7 +65,7 @@ struct ipa_smp2p {
u32 enabled_bit;
u32 clock_query_irq;
u32 setup_ready_irq;
- bool clock_on;
+ bool power_on;
bool notified;
bool disabled;
struct mutex mutex;
@@ -73,28 +73,30 @@ struct ipa_smp2p {
};
/**
- * ipa_smp2p_notify() - use SMP2P to tell modem about IPA clock state
+ * ipa_smp2p_notify() - use SMP2P to tell modem about IPA power state
* @smp2p: SMP2P information
*
* This is called either when the modem has requested it (by triggering
- * the modem clock query IPA interrupt) or whenever the AP is shutting down
+ * the modem power query IPA interrupt) or whenever the AP is shutting down
* (via a panic notifier). It sets the two SMP2P state bits--one saying
- * whether the IPA clock is running, and the other indicating the first bit
+ * whether the IPA power is on, and the other indicating the first bit
* is valid.
*/
static void ipa_smp2p_notify(struct ipa_smp2p *smp2p)
{
+ struct device *dev;
u32 value;
u32 mask;
if (smp2p->notified)
return;
- smp2p->clock_on = ipa_clock_get_additional(smp2p->ipa);
+ dev = &smp2p->ipa->pdev->dev;
+ smp2p->power_on = pm_runtime_get_if_active(dev, true) > 0;
- /* Signal whether the clock is enabled */
+ /* Signal whether the IPA power is enabled */
mask = BIT(smp2p->enabled_bit);
- value = smp2p->clock_on ? mask : 0;
+ value = smp2p->power_on ? mask : 0;
qcom_smem_state_update_bits(smp2p->enabled_state, mask, value);
/* Now indicate that the enabled flag is valid */
@@ -124,7 +126,7 @@ static int ipa_smp2p_panic_notifier(struct notifier_block *nb,
ipa_smp2p_notify(smp2p);
- if (smp2p->clock_on)
+ if (smp2p->power_on)
ipa_uc_panic_notifier(smp2p->ipa);
return NOTIFY_DONE;
@@ -150,19 +152,31 @@ static void ipa_smp2p_panic_notifier_unregister(struct ipa_smp2p *smp2p)
static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id)
{
struct ipa_smp2p *smp2p = dev_id;
+ struct device *dev;
+ int ret;
mutex_lock(&smp2p->mutex);
- if (!smp2p->disabled) {
- int ret;
+ if (smp2p->disabled)
+ goto out_mutex_unlock;
+ smp2p->disabled = true; /* If any others arrive, ignore them */
- ret = ipa_setup(smp2p->ipa);
- if (ret)
- dev_err(&smp2p->ipa->pdev->dev,
- "error %d from ipa_setup()\n", ret);
- smp2p->disabled = true;
+ /* Power needs to be active for setup */
+ dev = &smp2p->ipa->pdev->dev;
+ ret = pm_runtime_get_sync(dev);
+ if (ret < 0) {
+ dev_err(dev, "error %d getting power for setup\n", ret);
+ goto out_power_put;
}
+ /* An error here won't cause driver shutdown, so warn if one occurs */
+ ret = ipa_setup(smp2p->ipa);
+ WARN(ret != 0, "error %d from ipa_setup()\n", ret);
+
+out_power_put:
+ pm_runtime_mark_last_busy(dev);
+ (void)pm_runtime_put_autosuspend(dev);
+out_mutex_unlock:
mutex_unlock(&smp2p->mutex);
return IRQ_HANDLED;
@@ -195,14 +209,17 @@ static void ipa_smp2p_irq_exit(struct ipa_smp2p *smp2p, u32 irq)
free_irq(irq, smp2p);
}
-/* Drop the clock reference if it was taken in ipa_smp2p_notify() */
-static void ipa_smp2p_clock_release(struct ipa *ipa)
+/* Drop the power reference if it was taken in ipa_smp2p_notify() */
+static void ipa_smp2p_power_release(struct ipa *ipa)
{
- if (!ipa->smp2p->clock_on)
+ struct device *dev = &ipa->pdev->dev;
+
+ if (!ipa->smp2p->power_on)
return;
- ipa_clock_put(ipa);
- ipa->smp2p->clock_on = false;
+ pm_runtime_mark_last_busy(dev);
+ (void)pm_runtime_put_autosuspend(dev);
+ ipa->smp2p->power_on = false;
}
/* Initialize the IPA SMP2P subsystem */
@@ -236,7 +253,7 @@ int ipa_smp2p_init(struct ipa *ipa, bool modem_init)
smp2p->ipa = ipa;
- /* These fields are needed by the clock query interrupt
+ /* These fields are needed by the power query interrupt
* handler, so initialize them now.
*/
mutex_init(&smp2p->mutex);
@@ -289,8 +306,8 @@ void ipa_smp2p_exit(struct ipa *ipa)
ipa_smp2p_irq_exit(smp2p, smp2p->setup_ready_irq);
ipa_smp2p_panic_notifier_unregister(smp2p);
ipa_smp2p_irq_exit(smp2p, smp2p->clock_query_irq);
- /* We won't get notified any more; drop clock reference (if any) */
- ipa_smp2p_clock_release(ipa);
+ /* We won't get notified any more; drop power reference (if any) */
+ ipa_smp2p_power_release(ipa);
ipa->smp2p = NULL;
mutex_destroy(&smp2p->mutex);
kfree(smp2p);
@@ -319,13 +336,13 @@ void ipa_smp2p_notify_reset(struct ipa *ipa)
if (!smp2p->notified)
return;
- ipa_smp2p_clock_release(ipa);
+ ipa_smp2p_power_release(ipa);
- /* Reset the clock enabled valid flag */
+ /* Reset the power enabled valid flag */
mask = BIT(smp2p->valid_bit);
qcom_smem_state_update_bits(smp2p->valid_state, mask, 0);
- /* Mark the clock disabled for good measure... */
+ /* Mark the power disabled for good measure... */
mask = BIT(smp2p->enabled_bit);
qcom_smem_state_update_bits(smp2p->enabled_state, mask, 0);
diff --git a/drivers/net/ipa/ipa_smp2p.h b/drivers/net/ipa/ipa_smp2p.h
index 20319438a841..99a956789638 100644
--- a/drivers/net/ipa/ipa_smp2p.h
+++ b/drivers/net/ipa/ipa_smp2p.h
@@ -39,7 +39,7 @@ void ipa_smp2p_disable(struct ipa *ipa);
* ipa_smp2p_notify_reset() - Reset modem notification state
* @ipa: IPA pointer
*
- * If the modem crashes it queries the IPA clock state. In cleaning
+ * If the modem crashes it queries the IPA power state. In cleaning
* up after such a crash this is used to reset some state maintained
* for managing this notification.
*/
diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index c617a9156f26..2324e1b93e37 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -120,8 +120,6 @@
*/
#define IPA_ZERO_RULE_SIZE (2 * sizeof(__le32))
-#ifdef IPA_VALIDATE
-
/* Check things that can be validated at build time. */
static void ipa_table_validate_build(void)
{
@@ -161,7 +159,7 @@ ipa_table_valid_one(struct ipa *ipa, enum ipa_mem_id mem_id, bool route)
else
size = (1 + IPA_FILTER_COUNT_MAX) * sizeof(__le64);
- if (!ipa_cmd_table_valid(ipa, mem, route, ipv6, hashed))
+ if (!ipa_cmd_table_valid(ipa, mem, route))
return false;
/* mem->size >= size is sufficient, but we'll demand more */
@@ -169,7 +167,7 @@ ipa_table_valid_one(struct ipa *ipa, enum ipa_mem_id mem_id, bool route)
return true;
/* Hashed table regions can be zero size if hashing is not supported */
- if (hashed && !mem->size)
+ if (ipa_table_hash_support(ipa) && !mem->size)
return true;
dev_err(dev, "%s table region %u size 0x%02x, expected 0x%02x\n",
@@ -183,14 +181,22 @@ bool ipa_table_valid(struct ipa *ipa)
{
bool valid;
- valid = ipa_table_valid_one(IPA_MEM_V4_FILTER, false);
- valid = valid && ipa_table_valid_one(IPA_MEM_V4_FILTER_HASHED, false);
- valid = valid && ipa_table_valid_one(IPA_MEM_V6_FILTER, false);
- valid = valid && ipa_table_valid_one(IPA_MEM_V6_FILTER_HASHED, false);
- valid = valid && ipa_table_valid_one(IPA_MEM_V4_ROUTE, true);
- valid = valid && ipa_table_valid_one(IPA_MEM_V4_ROUTE_HASHED, true);
- valid = valid && ipa_table_valid_one(IPA_MEM_V6_ROUTE, true);
- valid = valid && ipa_table_valid_one(IPA_MEM_V6_ROUTE_HASHED, true);
+ valid = ipa_table_valid_one(ipa, IPA_MEM_V4_FILTER, false);
+ valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V6_FILTER, false);
+ valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V4_ROUTE, true);
+ valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V6_ROUTE, true);
+
+ if (!ipa_table_hash_support(ipa))
+ return valid;
+
+ valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V4_FILTER_HASHED,
+ false);
+ valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V6_FILTER_HASHED,
+ false);
+ valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V4_ROUTE_HASHED,
+ true);
+ valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V6_ROUTE_HASHED,
+ true);
return valid;
}
@@ -217,14 +223,6 @@ bool ipa_filter_map_valid(struct ipa *ipa, u32 filter_map)
return true;
}
-#else /* !IPA_VALIDATE */
-static void ipa_table_validate_build(void)
-
-{
-}
-
-#endif /* !IPA_VALIDATE */
-
/* Zero entry count means no table, so just return a 0 address */
static dma_addr_t ipa_table_addr(struct ipa *ipa, bool filter_mask, u16 count)
{
@@ -233,7 +231,7 @@ static dma_addr_t ipa_table_addr(struct ipa *ipa, bool filter_mask, u16 count)
if (!count)
return 0;
-/* assert(count <= max_t(u32, IPA_FILTER_COUNT_MAX, IPA_ROUTE_COUNT_MAX)); */
+ WARN_ON(count > max_t(u32, IPA_FILTER_COUNT_MAX, IPA_ROUTE_COUNT_MAX));
/* Skip over the zero rule and possibly the filter mask */
skip = filter_mask ? 1 : 2;
diff --git a/drivers/net/ipa/ipa_table.h b/drivers/net/ipa/ipa_table.h
index 1e2be9fce2f8..b6a9a0d79d68 100644
--- a/drivers/net/ipa/ipa_table.h
+++ b/drivers/net/ipa/ipa_table.h
@@ -16,8 +16,6 @@ struct ipa;
/* The maximum number of route table entries (IPv4, IPv6; hashed or not) */
#define IPA_ROUTE_COUNT_MAX 15
-#ifdef IPA_VALIDATE
-
/**
* ipa_table_valid() - Validate route and filter table memory regions
* @ipa: IPA pointer
@@ -35,20 +33,6 @@ bool ipa_table_valid(struct ipa *ipa);
*/
bool ipa_filter_map_valid(struct ipa *ipa, u32 filter_mask);
-#else /* !IPA_VALIDATE */
-
-static inline bool ipa_table_valid(struct ipa *ipa)
-{
- return true;
-}
-
-static inline bool ipa_filter_map_valid(struct ipa *ipa, u32 filter_mask)
-{
- return true;
-}
-
-#endif /* !IPA_VALIDATE */
-
/**
* ipa_table_hash_support() - Return true if hashed tables are supported
* @ipa: IPA pointer
diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c
index fd9219863234..856e55a080a7 100644
--- a/drivers/net/ipa/ipa_uc.c
+++ b/drivers/net/ipa/ipa_uc.c
@@ -7,9 +7,9 @@
#include <linux/types.h>
#include <linux/io.h>
#include <linux/delay.h>
+#include <linux/pm_runtime.h>
#include "ipa.h"
-#include "ipa_clock.h"
#include "ipa_uc.h"
/**
@@ -131,7 +131,7 @@ static void ipa_uc_event_handler(struct ipa *ipa, enum ipa_irq_id irq_id)
if (shared->event == IPA_UC_EVENT_ERROR)
dev_err(dev, "microcontroller error event\n");
else if (shared->event != IPA_UC_EVENT_LOG_INFO)
- dev_err(dev, "unsupported microcontroller event %hhu\n",
+ dev_err(dev, "unsupported microcontroller event %u\n",
shared->event);
/* The LOG_INFO event can be safely ignored */
}
@@ -140,53 +140,77 @@ static void ipa_uc_event_handler(struct ipa *ipa, enum ipa_irq_id irq_id)
static void ipa_uc_response_hdlr(struct ipa *ipa, enum ipa_irq_id irq_id)
{
struct ipa_uc_mem_area *shared = ipa_uc_shared(ipa);
+ struct device *dev = &ipa->pdev->dev;
/* An INIT_COMPLETED response message is sent to the AP by the
* microcontroller when it is operational. Other than this, the AP
* should only receive responses from the microcontroller when it has
* sent it a request message.
*
- * We can drop the clock reference taken in ipa_uc_setup() once we
+ * We can drop the power reference taken in ipa_uc_power() once we
* know the microcontroller has finished its initialization.
*/
switch (shared->response) {
case IPA_UC_RESPONSE_INIT_COMPLETED:
- ipa->uc_loaded = true;
- ipa_clock_put(ipa);
+ if (ipa->uc_powered) {
+ ipa->uc_loaded = true;
+ pm_runtime_mark_last_busy(dev);
+ (void)pm_runtime_put_autosuspend(dev);
+ ipa->uc_powered = false;
+ } else {
+ dev_warn(dev, "unexpected init_completed response\n");
+ }
break;
default:
- dev_warn(&ipa->pdev->dev,
- "unsupported microcontroller response %hhu\n",
+ dev_warn(dev, "unsupported microcontroller response %u\n",
shared->response);
break;
}
}
-/* ipa_uc_setup() - Set up the microcontroller */
-void ipa_uc_setup(struct ipa *ipa)
+/* Configure the IPA microcontroller subsystem */
+void ipa_uc_config(struct ipa *ipa)
{
- /* The microcontroller needs the IPA clock running until it has
- * completed its initialization. It signals this by sending an
- * INIT_COMPLETED response message to the AP. This could occur after
- * we have finished doing the rest of the IPA initialization, so we
- * need to take an extra "proxy" reference, and hold it until we've
- * received that signal. (This reference is dropped in
- * ipa_uc_response_hdlr(), above.)
- */
- ipa_clock_get(ipa);
-
+ ipa->uc_powered = false;
ipa->uc_loaded = false;
ipa_interrupt_add(ipa->interrupt, IPA_IRQ_UC_0, ipa_uc_event_handler);
ipa_interrupt_add(ipa->interrupt, IPA_IRQ_UC_1, ipa_uc_response_hdlr);
}
-/* Inverse of ipa_uc_setup() */
-void ipa_uc_teardown(struct ipa *ipa)
+/* Inverse of ipa_uc_config() */
+void ipa_uc_deconfig(struct ipa *ipa)
{
+ struct device *dev = &ipa->pdev->dev;
+
ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_1);
ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_0);
- if (!ipa->uc_loaded)
- ipa_clock_put(ipa);
+ if (!ipa->uc_powered)
+ return;
+
+ pm_runtime_mark_last_busy(dev);
+ (void)pm_runtime_put_autosuspend(dev);
+}
+
+/* Take a proxy power reference for the microcontroller */
+void ipa_uc_power(struct ipa *ipa)
+{
+ static bool already;
+ struct device *dev;
+ int ret;
+
+ if (already)
+ return;
+ already = true; /* Only do this on first boot */
+
+ /* This power reference dropped in ipa_uc_response_hdlr() above */
+ dev = &ipa->pdev->dev;
+ ret = pm_runtime_get_sync(dev);
+ if (ret < 0) {
+ pm_runtime_put_noidle(dev);
+ dev_err(dev, "error %d getting proxy power\n", ret);
+ } else {
+ ipa->uc_powered = true;
+ }
}
/* Send a command to the microcontroller */
diff --git a/drivers/net/ipa/ipa_uc.h b/drivers/net/ipa/ipa_uc.h
index e8510899a3f0..23847f934d64 100644
--- a/drivers/net/ipa/ipa_uc.h
+++ b/drivers/net/ipa/ipa_uc.h
@@ -9,16 +9,30 @@
struct ipa;
/**
- * ipa_uc_setup() - set up the IPA microcontroller subsystem
+ * ipa_uc_config() - Configure the IPA microcontroller subsystem
* @ipa: IPA pointer
*/
-void ipa_uc_setup(struct ipa *ipa);
+void ipa_uc_config(struct ipa *ipa);
/**
- * ipa_uc_teardown() - inverse of ipa_uc_setup()
+ * ipa_uc_deconfig() - Inverse of ipa_uc_config()
* @ipa: IPA pointer
*/
-void ipa_uc_teardown(struct ipa *ipa);
+void ipa_uc_deconfig(struct ipa *ipa);
+
+/**
+ * ipa_uc_power() - Take a proxy power reference for the microcontroller
+ * @ipa: IPA pointer
+ *
+ * The first time the modem boots, it loads firmware for and starts the
+ * IPA-resident microcontroller. The microcontroller signals that it
+ * has completed its initialization by sending an INIT_COMPLETED response
+ * message to the AP. The AP must ensure the IPA is powered until
+ * it receives this message, and to do so we take a "proxy" clock
+ * reference on its behalf here. Once we receive the INIT_COMPLETED
+ * message (in ipa_uc_response_hdlr()) we drop this power reference.
+ */
+void ipa_uc_power(struct ipa *ipa);
/**
* ipa_uc_panic_notifier()
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index a707502a0c0f..c0b21a5580d5 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -732,6 +732,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
port = ipvlan_port_get_rtnl(dev);
switch (event) {
+ case NETDEV_UP:
case NETDEV_CHANGE:
list_for_each_entry(ipvlan, &port->ipvlans, pnode)
netif_stacked_transfer_operstate(ipvlan->phy_dev,
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 80de9768ecd4..35f46ad040b0 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -829,7 +829,7 @@ static int macvlan_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
-static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int macvlan_eth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
struct net_device *real_dev = macvlan_dev_real_dev(dev);
const struct net_device_ops *ops = real_dev->netdev_ops;
@@ -845,8 +845,8 @@ static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
break;
fallthrough;
case SIOCGHWTSTAMP:
- if (netif_device_present(real_dev) && ops->ndo_do_ioctl)
- err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd);
+ if (netif_device_present(real_dev) && ops->ndo_eth_ioctl)
+ err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd);
break;
}
@@ -1151,7 +1151,7 @@ static const struct net_device_ops macvlan_netdev_ops = {
.ndo_stop = macvlan_stop,
.ndo_start_xmit = macvlan_start_xmit,
.ndo_change_mtu = macvlan_change_mtu,
- .ndo_do_ioctl = macvlan_do_ioctl,
+ .ndo_eth_ioctl = macvlan_eth_ioctl,
.ndo_fix_features = macvlan_fix_features,
.ndo_change_rx_flags = macvlan_change_rx_flags,
.ndo_set_mac_address = macvlan_set_mac_address,
diff --git a/drivers/net/mctp/Kconfig b/drivers/net/mctp/Kconfig
new file mode 100644
index 000000000000..d8f966cedc89
--- /dev/null
+++ b/drivers/net/mctp/Kconfig
@@ -0,0 +1,8 @@
+
+if MCTP
+
+menu "MCTP Device Drivers"
+
+endmenu
+
+endif
diff --git a/drivers/net/mctp/Makefile b/drivers/net/mctp/Makefile
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/drivers/net/mctp/Makefile
diff --git a/drivers/net/mdio/Kconfig b/drivers/net/mdio/Kconfig
index 99a6c13a11af..6da1fcb25847 100644
--- a/drivers/net/mdio/Kconfig
+++ b/drivers/net/mdio/Kconfig
@@ -169,9 +169,10 @@ config MDIO_OCTEON
config MDIO_IPQ4019
tristate "Qualcomm IPQ4019 MDIO interface support"
depends on HAS_IOMEM && OF_MDIO
+ depends on COMMON_CLK
help
This driver supports the MDIO interface found in Qualcomm
- IPQ40xx series Soc-s.
+ IPQ40xx, IPQ60xx, IPQ807x and IPQ50xx series Soc-s.
config MDIO_IPQ8064
tristate "Qualcomm IPQ8064 MDIO interface support"
diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c
index 9cd71d896963..0d7d3e15d2f0 100644
--- a/drivers/net/mdio/mdio-ipq4019.c
+++ b/drivers/net/mdio/mdio-ipq4019.c
@@ -11,6 +11,7 @@
#include <linux/of_mdio.h>
#include <linux/phy.h>
#include <linux/platform_device.h>
+#include <linux/clk.h>
#define MDIO_MODE_REG 0x40
#define MDIO_ADDR_REG 0x44
@@ -31,8 +32,15 @@
#define IPQ4019_MDIO_TIMEOUT 10000
#define IPQ4019_MDIO_SLEEP 10
+/* MDIO clock source frequency is fixed to 100M */
+#define IPQ_MDIO_CLK_RATE 100000000
+
+#define IPQ_PHY_SET_DELAY_US 100000
+
struct ipq4019_mdio_data {
void __iomem *membase;
+ void __iomem *eth_ldo_rdy;
+ struct clk *mdio_clk;
};
static int ipq4019_mdio_wait_busy(struct mii_bus *bus)
@@ -171,6 +179,30 @@ static int ipq4019_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
return 0;
}
+static int ipq_mdio_reset(struct mii_bus *bus)
+{
+ struct ipq4019_mdio_data *priv = bus->priv;
+ u32 val;
+ int ret;
+
+ /* To indicate CMN_PLL that ethernet_ldo has been ready if platform resource 1
+ * is specified in the device tree.
+ */
+ if (priv->eth_ldo_rdy) {
+ val = readl(priv->eth_ldo_rdy);
+ val |= BIT(0);
+ writel(val, priv->eth_ldo_rdy);
+ fsleep(IPQ_PHY_SET_DELAY_US);
+ }
+
+ /* Configure MDIO clock source frequency if clock is specified in the device tree */
+ ret = clk_set_rate(priv->mdio_clk, IPQ_MDIO_CLK_RATE);
+ if (ret)
+ return ret;
+
+ return clk_prepare_enable(priv->mdio_clk);
+}
+
static int ipq4019_mdio_probe(struct platform_device *pdev)
{
struct ipq4019_mdio_data *priv;
@@ -187,9 +219,17 @@ static int ipq4019_mdio_probe(struct platform_device *pdev)
if (IS_ERR(priv->membase))
return PTR_ERR(priv->membase);
+ priv->mdio_clk = devm_clk_get_optional(&pdev->dev, "gcc_mdio_ahb_clk");
+ if (IS_ERR(priv->mdio_clk))
+ return PTR_ERR(priv->mdio_clk);
+
+ /* The platform resource is provided on the chipset IPQ5018 */
+ priv->eth_ldo_rdy = devm_platform_ioremap_resource(pdev, 1);
+
bus->name = "ipq4019_mdio";
bus->read = ipq4019_mdio_read;
bus->write = ipq4019_mdio_write;
+ bus->reset = ipq_mdio_reset;
bus->parent = &pdev->dev;
snprintf(bus->id, MII_BUS_ID_SIZE, "%s%d", pdev->name, pdev->id);
@@ -215,6 +255,7 @@ static int ipq4019_mdio_remove(struct platform_device *pdev)
static const struct of_device_id ipq4019_mdio_dt_ids[] = {
{ .compatible = "qcom,ipq4019-mdio" },
+ { .compatible = "qcom,ipq5018-mdio" },
{ }
};
MODULE_DEVICE_TABLE(of, ipq4019_mdio_dt_ids);
diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c
index 2d67e12c8262..1ee592d3eae4 100644
--- a/drivers/net/mdio/mdio-mscc-miim.c
+++ b/drivers/net/mdio/mdio-mscc-miim.c
@@ -134,7 +134,6 @@ static int mscc_miim_reset(struct mii_bus *bus)
static int mscc_miim_probe(struct platform_device *pdev)
{
- struct resource *res;
struct mii_bus *bus;
struct mscc_miim_dev *dev;
int ret;
@@ -157,13 +156,10 @@ static int mscc_miim_probe(struct platform_device *pdev)
return PTR_ERR(dev->regs);
}
- res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- if (res) {
- dev->phy_regs = devm_ioremap_resource(&pdev->dev, res);
- if (IS_ERR(dev->phy_regs)) {
- dev_err(&pdev->dev, "Unable to map internal phy registers\n");
- return PTR_ERR(dev->phy_regs);
- }
+ dev->phy_regs = devm_platform_ioremap_resource(pdev, 1);
+ if (IS_ERR(dev->phy_regs)) {
+ dev_err(&pdev->dev, "Unable to map internal phy registers\n");
+ return PTR_ERR(dev->phy_regs);
}
ret = of_mdiobus_register(bus, pdev->dev.of_node);
diff --git a/drivers/net/mdio/mdio-mux.c b/drivers/net/mdio/mdio-mux.c
index 110e4ee85785..ebd001f0eece 100644
--- a/drivers/net/mdio/mdio-mux.c
+++ b/drivers/net/mdio/mdio-mux.c
@@ -82,6 +82,17 @@ out:
static int parent_count;
+static void mdio_mux_uninit_children(struct mdio_mux_parent_bus *pb)
+{
+ struct mdio_mux_child_bus *cb = pb->children;
+
+ while (cb) {
+ mdiobus_unregister(cb->mii_bus);
+ mdiobus_free(cb->mii_bus);
+ cb = cb->next;
+ }
+}
+
int mdio_mux_init(struct device *dev,
struct device_node *mux_node,
int (*switch_fn)(int cur, int desired, void *data),
@@ -144,7 +155,7 @@ int mdio_mux_init(struct device *dev,
cb = devm_kzalloc(dev, sizeof(*cb), GFP_KERNEL);
if (!cb) {
ret_val = -ENOMEM;
- continue;
+ goto err_loop;
}
cb->bus_number = v;
cb->parent = pb;
@@ -152,8 +163,7 @@ int mdio_mux_init(struct device *dev,
cb->mii_bus = mdiobus_alloc();
if (!cb->mii_bus) {
ret_val = -ENOMEM;
- devm_kfree(dev, cb);
- continue;
+ goto err_loop;
}
cb->mii_bus->priv = cb;
@@ -165,11 +175,15 @@ int mdio_mux_init(struct device *dev,
cb->mii_bus->write = mdio_mux_write;
r = of_mdiobus_register(cb->mii_bus, child_bus_node);
if (r) {
+ mdiobus_free(cb->mii_bus);
+ if (r == -EPROBE_DEFER) {
+ ret_val = r;
+ goto err_loop;
+ }
+ devm_kfree(dev, cb);
dev_err(dev,
"Error: Failed to register MDIO bus for child %pOF\n",
child_bus_node);
- mdiobus_free(cb->mii_bus);
- devm_kfree(dev, cb);
} else {
cb->next = pb->children;
pb->children = cb;
@@ -181,7 +195,10 @@ int mdio_mux_init(struct device *dev,
}
dev_err(dev, "Error: No acceptable child buses found\n");
- devm_kfree(dev, pb);
+
+err_loop:
+ mdio_mux_uninit_children(pb);
+ of_node_put(child_bus_node);
err_pb_kz:
put_device(&parent_bus->dev);
err_parent_bus:
@@ -193,14 +210,8 @@ EXPORT_SYMBOL_GPL(mdio_mux_init);
void mdio_mux_uninit(void *mux_handle)
{
struct mdio_mux_parent_bus *pb = mux_handle;
- struct mdio_mux_child_bus *cb = pb->children;
-
- while (cb) {
- mdiobus_unregister(cb->mii_bus);
- mdiobus_free(cb->mii_bus);
- cb = cb->next;
- }
+ mdio_mux_uninit_children(pb);
put_device(&pb->mii_bus->dev);
}
EXPORT_SYMBOL_GPL(mdio_mux_uninit);
diff --git a/drivers/net/mhi/Makefile b/drivers/net/mhi/Makefile
deleted file mode 100644
index f71b9f8f3c4f..000000000000
--- a/drivers/net/mhi/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_MHI_NET) += mhi_net.o
-
-mhi_net-y := net.o proto_mbim.o
diff --git a/drivers/net/mhi/mhi.h b/drivers/net/mhi/mhi.h
deleted file mode 100644
index 1d0c499d27a3..000000000000
--- a/drivers/net/mhi/mhi.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* MHI Network driver - Network over MHI bus
- *
- * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org>
- */
-
-struct mhi_net_stats {
- u64_stats_t rx_packets;
- u64_stats_t rx_bytes;
- u64_stats_t rx_errors;
- u64_stats_t rx_dropped;
- u64_stats_t rx_length_errors;
- u64_stats_t tx_packets;
- u64_stats_t tx_bytes;
- u64_stats_t tx_errors;
- u64_stats_t tx_dropped;
- struct u64_stats_sync tx_syncp;
- struct u64_stats_sync rx_syncp;
-};
-
-struct mhi_net_dev {
- struct mhi_device *mdev;
- struct net_device *ndev;
- struct sk_buff *skbagg_head;
- struct sk_buff *skbagg_tail;
- const struct mhi_net_proto *proto;
- void *proto_data;
- struct delayed_work rx_refill;
- struct mhi_net_stats stats;
- u32 rx_queue_sz;
- int msg_enable;
- unsigned int mru;
-};
-
-struct mhi_net_proto {
- int (*init)(struct mhi_net_dev *mhi_netdev);
- struct sk_buff * (*tx_fixup)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb);
- void (*rx)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb);
-};
-
-extern const struct mhi_net_proto proto_mbim;
diff --git a/drivers/net/mhi/proto_mbim.c b/drivers/net/mhi/proto_mbim.c
deleted file mode 100644
index bf1ad863237d..000000000000
--- a/drivers/net/mhi/proto_mbim.c
+++ /dev/null
@@ -1,304 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* MHI Network driver - Network over MHI bus
- *
- * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org>
- *
- * This driver copy some code from cdc_ncm, which is:
- * Copyright (C) ST-Ericsson 2010-2012
- * and cdc_mbim, which is:
- * Copyright (c) 2012 Smith Micro Software, Inc.
- * Copyright (c) 2012 Bjørn Mork <bjorn@mork.no>
- *
- */
-
-#include <linux/ethtool.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/mii.h>
-#include <linux/netdevice.h>
-#include <linux/wwan.h>
-#include <linux/skbuff.h>
-#include <linux/usb.h>
-#include <linux/usb/cdc.h>
-#include <linux/usb/usbnet.h>
-#include <linux/usb/cdc_ncm.h>
-
-#include "mhi.h"
-
-#define MBIM_NDP16_SIGN_MASK 0x00ffffff
-
-/* Usual WWAN MTU */
-#define MHI_MBIM_DEFAULT_MTU 1500
-
-/* 3500 allows to optimize skb allocation, the skbs will basically fit in
- * one 4K page. Large MBIM packets will simply be split over several MHI
- * transfers and chained by the MHI net layer (zerocopy).
- */
-#define MHI_MBIM_DEFAULT_MRU 3500
-
-struct mbim_context {
- u16 rx_seq;
- u16 tx_seq;
-};
-
-static void __mbim_length_errors_inc(struct mhi_net_dev *dev)
-{
- u64_stats_update_begin(&dev->stats.rx_syncp);
- u64_stats_inc(&dev->stats.rx_length_errors);
- u64_stats_update_end(&dev->stats.rx_syncp);
-}
-
-static void __mbim_errors_inc(struct mhi_net_dev *dev)
-{
- u64_stats_update_begin(&dev->stats.rx_syncp);
- u64_stats_inc(&dev->stats.rx_errors);
- u64_stats_update_end(&dev->stats.rx_syncp);
-}
-
-static int mbim_rx_verify_nth16(struct sk_buff *skb)
-{
- struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev);
- struct mbim_context *ctx = dev->proto_data;
- struct usb_cdc_ncm_nth16 *nth16;
- int len;
-
- if (skb->len < sizeof(struct usb_cdc_ncm_nth16) +
- sizeof(struct usb_cdc_ncm_ndp16)) {
- netif_dbg(dev, rx_err, dev->ndev, "frame too short\n");
- __mbim_length_errors_inc(dev);
- return -EINVAL;
- }
-
- nth16 = (struct usb_cdc_ncm_nth16 *)skb->data;
-
- if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) {
- netif_dbg(dev, rx_err, dev->ndev,
- "invalid NTH16 signature <%#010x>\n",
- le32_to_cpu(nth16->dwSignature));
- __mbim_errors_inc(dev);
- return -EINVAL;
- }
-
- /* No limit on the block length, except the size of the data pkt */
- len = le16_to_cpu(nth16->wBlockLength);
- if (len > skb->len) {
- netif_dbg(dev, rx_err, dev->ndev,
- "NTB does not fit into the skb %u/%u\n", len,
- skb->len);
- __mbim_length_errors_inc(dev);
- return -EINVAL;
- }
-
- if (ctx->rx_seq + 1 != le16_to_cpu(nth16->wSequence) &&
- (ctx->rx_seq || le16_to_cpu(nth16->wSequence)) &&
- !(ctx->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) {
- netif_dbg(dev, rx_err, dev->ndev,
- "sequence number glitch prev=%d curr=%d\n",
- ctx->rx_seq, le16_to_cpu(nth16->wSequence));
- }
- ctx->rx_seq = le16_to_cpu(nth16->wSequence);
-
- return le16_to_cpu(nth16->wNdpIndex);
-}
-
-static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16)
-{
- struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev);
- int ret;
-
- if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) {
- netif_dbg(dev, rx_err, dev->ndev, "invalid DPT16 length <%u>\n",
- le16_to_cpu(ndp16->wLength));
- return -EINVAL;
- }
-
- ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16))
- / sizeof(struct usb_cdc_ncm_dpe16));
- ret--; /* Last entry is always a NULL terminator */
-
- if (sizeof(struct usb_cdc_ncm_ndp16) +
- ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) {
- netif_dbg(dev, rx_err, dev->ndev,
- "Invalid nframes = %d\n", ret);
- return -EINVAL;
- }
-
- return ret;
-}
-
-static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb)
-{
- struct net_device *ndev = mhi_netdev->ndev;
- int ndpoffset;
-
- /* Check NTB header and retrieve first NDP offset */
- ndpoffset = mbim_rx_verify_nth16(skb);
- if (ndpoffset < 0) {
- net_err_ratelimited("%s: Incorrect NTB header\n", ndev->name);
- goto error;
- }
-
- /* Process each NDP */
- while (1) {
- struct usb_cdc_ncm_ndp16 ndp16;
- struct usb_cdc_ncm_dpe16 dpe16;
- int nframes, n, dpeoffset;
-
- if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) {
- net_err_ratelimited("%s: Incorrect NDP offset (%u)\n",
- ndev->name, ndpoffset);
- __mbim_length_errors_inc(mhi_netdev);
- goto error;
- }
-
- /* Check NDP header and retrieve number of datagrams */
- nframes = mbim_rx_verify_ndp16(skb, &ndp16);
- if (nframes < 0) {
- net_err_ratelimited("%s: Incorrect NDP16\n", ndev->name);
- __mbim_length_errors_inc(mhi_netdev);
- goto error;
- }
-
- /* Only IP data type supported, no DSS in MHI context */
- if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK))
- != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) {
- net_err_ratelimited("%s: Unsupported NDP type\n", ndev->name);
- __mbim_errors_inc(mhi_netdev);
- goto next_ndp;
- }
-
- /* Only primary IP session 0 (0x00) supported for now */
- if (ndp16.dwSignature & ~cpu_to_le32(MBIM_NDP16_SIGN_MASK)) {
- net_err_ratelimited("%s: bad packet session\n", ndev->name);
- __mbim_errors_inc(mhi_netdev);
- goto next_ndp;
- }
-
- /* de-aggregate and deliver IP packets */
- dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16);
- for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) {
- u16 dgram_offset, dgram_len;
- struct sk_buff *skbn;
-
- if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16)))
- break;
-
- dgram_offset = le16_to_cpu(dpe16.wDatagramIndex);
- dgram_len = le16_to_cpu(dpe16.wDatagramLength);
-
- if (!dgram_offset || !dgram_len)
- break; /* null terminator */
-
- skbn = netdev_alloc_skb(ndev, dgram_len);
- if (!skbn)
- continue;
-
- skb_put(skbn, dgram_len);
- skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len);
-
- switch (skbn->data[0] & 0xf0) {
- case 0x40:
- skbn->protocol = htons(ETH_P_IP);
- break;
- case 0x60:
- skbn->protocol = htons(ETH_P_IPV6);
- break;
- default:
- net_err_ratelimited("%s: unknown protocol\n",
- ndev->name);
- __mbim_errors_inc(mhi_netdev);
- dev_kfree_skb_any(skbn);
- continue;
- }
-
- netif_rx(skbn);
- }
-next_ndp:
- /* Other NDP to process? */
- ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex);
- if (!ndpoffset)
- break;
- }
-
- /* free skb */
- dev_consume_skb_any(skb);
- return;
-error:
- dev_kfree_skb_any(skb);
-}
-
-struct mbim_tx_hdr {
- struct usb_cdc_ncm_nth16 nth16;
- struct usb_cdc_ncm_ndp16 ndp16;
- struct usb_cdc_ncm_dpe16 dpe16[2];
-} __packed;
-
-static struct sk_buff *mbim_tx_fixup(struct mhi_net_dev *mhi_netdev,
- struct sk_buff *skb)
-{
- struct mbim_context *ctx = mhi_netdev->proto_data;
- unsigned int dgram_size = skb->len;
- struct usb_cdc_ncm_nth16 *nth16;
- struct usb_cdc_ncm_ndp16 *ndp16;
- struct mbim_tx_hdr *mbim_hdr;
-
- /* For now, this is a partial implementation of CDC MBIM, only one NDP
- * is sent, containing the IP packet (no aggregation).
- */
-
- /* Ensure we have enough headroom for crafting MBIM header */
- if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) {
- dev_kfree_skb_any(skb);
- return NULL;
- }
-
- mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr));
-
- /* Fill NTB header */
- nth16 = &mbim_hdr->nth16;
- nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN);
- nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
- nth16->wSequence = cpu_to_le16(ctx->tx_seq++);
- nth16->wBlockLength = cpu_to_le16(skb->len);
- nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
-
- /* Fill the unique NDP */
- ndp16 = &mbim_hdr->ndp16;
- ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN);
- ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16)
- + sizeof(struct usb_cdc_ncm_dpe16) * 2);
- ndp16->wNextNdpIndex = 0;
-
- /* Datagram follows the mbim header */
- ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr));
- ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size);
-
- /* null termination */
- ndp16->dpe16[1].wDatagramIndex = 0;
- ndp16->dpe16[1].wDatagramLength = 0;
-
- return skb;
-}
-
-static int mbim_init(struct mhi_net_dev *mhi_netdev)
-{
- struct net_device *ndev = mhi_netdev->ndev;
-
- mhi_netdev->proto_data = devm_kzalloc(&ndev->dev,
- sizeof(struct mbim_context),
- GFP_KERNEL);
- if (!mhi_netdev->proto_data)
- return -ENOMEM;
-
- ndev->needed_headroom = sizeof(struct mbim_tx_hdr);
- ndev->mtu = MHI_MBIM_DEFAULT_MTU;
- mhi_netdev->mru = MHI_MBIM_DEFAULT_MRU;
-
- return 0;
-}
-
-const struct mhi_net_proto proto_mbim = {
- .init = mbim_init,
- .rx = mbim_rx,
- .tx_fixup = mbim_tx_fixup,
-};
diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi_net.c
index 11be6bcdd551..d127eb6e9257 100644
--- a/drivers/net/mhi/net.c
+++ b/drivers/net/mhi_net.c
@@ -11,28 +11,42 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/u64_stats_sync.h>
-#include <linux/wwan.h>
-
-#include "mhi.h"
#define MHI_NET_MIN_MTU ETH_MIN_MTU
#define MHI_NET_MAX_MTU 0xffff
#define MHI_NET_DEFAULT_MTU 0x4000
-/* When set to false, the default netdev (link 0) is not created, and it's up
- * to user to create the link (via wwan rtnetlink).
- */
-static bool create_default_iface = true;
-module_param(create_default_iface, bool, 0);
+struct mhi_net_stats {
+ u64_stats_t rx_packets;
+ u64_stats_t rx_bytes;
+ u64_stats_t rx_errors;
+ u64_stats_t tx_packets;
+ u64_stats_t tx_bytes;
+ u64_stats_t tx_errors;
+ u64_stats_t tx_dropped;
+ struct u64_stats_sync tx_syncp;
+ struct u64_stats_sync rx_syncp;
+};
+
+struct mhi_net_dev {
+ struct mhi_device *mdev;
+ struct net_device *ndev;
+ struct sk_buff *skbagg_head;
+ struct sk_buff *skbagg_tail;
+ struct delayed_work rx_refill;
+ struct mhi_net_stats stats;
+ u32 rx_queue_sz;
+ int msg_enable;
+ unsigned int mru;
+};
struct mhi_device_info {
const char *netname;
- const struct mhi_net_proto *proto;
};
static int mhi_ndo_open(struct net_device *ndev)
{
- struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
+ struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
/* Feed the rx buffer pool */
schedule_delayed_work(&mhi_netdev->rx_refill, 0);
@@ -47,7 +61,7 @@ static int mhi_ndo_open(struct net_device *ndev)
static int mhi_ndo_stop(struct net_device *ndev)
{
- struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
+ struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
netif_stop_queue(ndev);
netif_carrier_off(ndev);
@@ -58,17 +72,10 @@ static int mhi_ndo_stop(struct net_device *ndev)
static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
{
- struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
- const struct mhi_net_proto *proto = mhi_netdev->proto;
+ struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
struct mhi_device *mdev = mhi_netdev->mdev;
int err;
- if (proto && proto->tx_fixup) {
- skb = proto->tx_fixup(mhi_netdev, skb);
- if (unlikely(!skb))
- goto exit_drop;
- }
-
err = mhi_queue_skb(mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT);
if (unlikely(err)) {
net_err_ratelimited("%s: Failed to queue TX buf (%d)\n",
@@ -93,7 +100,7 @@ exit_drop:
static void mhi_ndo_get_stats64(struct net_device *ndev,
struct rtnl_link_stats64 *stats)
{
- struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
+ struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
unsigned int start;
do {
@@ -101,8 +108,6 @@ static void mhi_ndo_get_stats64(struct net_device *ndev,
stats->rx_packets = u64_stats_read(&mhi_netdev->stats.rx_packets);
stats->rx_bytes = u64_stats_read(&mhi_netdev->stats.rx_bytes);
stats->rx_errors = u64_stats_read(&mhi_netdev->stats.rx_errors);
- stats->rx_dropped = u64_stats_read(&mhi_netdev->stats.rx_dropped);
- stats->rx_length_errors = u64_stats_read(&mhi_netdev->stats.rx_length_errors);
} while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.rx_syncp, start));
do {
@@ -165,7 +170,6 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev,
struct mhi_result *mhi_res)
{
struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
- const struct mhi_net_proto *proto = mhi_netdev->proto;
struct sk_buff *skb = mhi_res->buf_addr;
int free_desc_count;
@@ -205,11 +209,6 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev,
mhi_netdev->skbagg_head = NULL;
}
- u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
- u64_stats_inc(&mhi_netdev->stats.rx_packets);
- u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len);
- u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
-
switch (skb->data[0] & 0xf0) {
case 0x40:
skb->protocol = htons(ETH_P_IP);
@@ -222,10 +221,11 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev,
break;
}
- if (proto && proto->rx)
- proto->rx(mhi_netdev, skb);
- else
- netif_rx(skb);
+ u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
+ u64_stats_inc(&mhi_netdev->stats.rx_packets);
+ u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len);
+ u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
+ netif_rx(skb);
}
/* Refill if RX buffers queue becomes low */
@@ -248,7 +248,6 @@ static void mhi_net_ul_callback(struct mhi_device *mhi_dev,
u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
if (unlikely(mhi_res->transaction_status)) {
-
/* MHI layer stopping/resetting the UL channel */
if (mhi_res->transaction_status == -ENOTCONN) {
u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
@@ -302,78 +301,47 @@ static void mhi_net_rx_refill_work(struct work_struct *work)
schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2);
}
-static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id,
- struct netlink_ext_ack *extack)
+static int mhi_net_newlink(struct mhi_device *mhi_dev, struct net_device *ndev)
{
- const struct mhi_device_info *info;
- struct mhi_device *mhi_dev = ctxt;
struct mhi_net_dev *mhi_netdev;
int err;
- info = (struct mhi_device_info *)mhi_dev->id->driver_data;
-
- /* For now we only support one link (link context 0), driver must be
- * reworked to break 1:1 relationship for net MBIM and to forward setup
- * call to rmnet(QMAP) otherwise.
- */
- if (if_id != 0)
- return -EINVAL;
-
- if (dev_get_drvdata(&mhi_dev->dev))
- return -EBUSY;
-
- mhi_netdev = wwan_netdev_drvpriv(ndev);
+ mhi_netdev = netdev_priv(ndev);
dev_set_drvdata(&mhi_dev->dev, mhi_netdev);
mhi_netdev->ndev = ndev;
mhi_netdev->mdev = mhi_dev;
mhi_netdev->skbagg_head = NULL;
- mhi_netdev->proto = info->proto;
+ mhi_netdev->mru = mhi_dev->mhi_cntrl->mru;
INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work);
u64_stats_init(&mhi_netdev->stats.rx_syncp);
u64_stats_init(&mhi_netdev->stats.tx_syncp);
/* Start MHI channels */
- err = mhi_prepare_for_transfer(mhi_dev, 0);
+ err = mhi_prepare_for_transfer(mhi_dev);
if (err)
goto out_err;
/* Number of transfer descriptors determines size of the queue */
mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
- if (extack)
- err = register_netdevice(ndev);
- else
- err = register_netdev(ndev);
+ err = register_netdev(ndev);
if (err)
- goto out_err;
-
- if (mhi_netdev->proto) {
- err = mhi_netdev->proto->init(mhi_netdev);
- if (err)
- goto out_err_proto;
- }
+ return err;
return 0;
-out_err_proto:
- unregister_netdevice(ndev);
out_err:
free_netdev(ndev);
return err;
}
-static void mhi_net_dellink(void *ctxt, struct net_device *ndev,
- struct list_head *head)
+static void mhi_net_dellink(struct mhi_device *mhi_dev, struct net_device *ndev)
{
- struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
- struct mhi_device *mhi_dev = ctxt;
+ struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
- if (head)
- unregister_netdevice_queue(ndev, head);
- else
- unregister_netdev(ndev);
+ unregister_netdev(ndev);
mhi_unprepare_from_transfer(mhi_dev);
@@ -382,65 +350,34 @@ static void mhi_net_dellink(void *ctxt, struct net_device *ndev,
dev_set_drvdata(&mhi_dev->dev, NULL);
}
-static const struct wwan_ops mhi_wwan_ops = {
- .priv_size = sizeof(struct mhi_net_dev),
- .setup = mhi_net_setup,
- .newlink = mhi_net_newlink,
- .dellink = mhi_net_dellink,
-};
-
static int mhi_net_probe(struct mhi_device *mhi_dev,
const struct mhi_device_id *id)
{
const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data;
- struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
struct net_device *ndev;
int err;
- err = wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_wwan_ops, mhi_dev,
- WWAN_NO_DEFAULT_LINK);
- if (err)
- return err;
-
- if (!create_default_iface)
- return 0;
-
- /* Create a default interface which is used as either RMNET real-dev,
- * MBIM link 0 or ip link 0)
- */
ndev = alloc_netdev(sizeof(struct mhi_net_dev), info->netname,
NET_NAME_PREDICTABLE, mhi_net_setup);
- if (!ndev) {
- err = -ENOMEM;
- goto err_unregister;
- }
+ if (!ndev)
+ return -ENOMEM;
SET_NETDEV_DEV(ndev, &mhi_dev->dev);
- err = mhi_net_newlink(mhi_dev, ndev, 0, NULL);
- if (err)
- goto err_release;
+ err = mhi_net_newlink(mhi_dev, ndev);
+ if (err) {
+ free_netdev(ndev);
+ return err;
+ }
return 0;
-
-err_release:
- free_netdev(ndev);
-err_unregister:
- wwan_unregister_ops(&cntrl->mhi_dev->dev);
-
- return err;
}
static void mhi_net_remove(struct mhi_device *mhi_dev)
{
struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
- struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
-
- /* WWAN core takes care of removing remaining links */
- wwan_unregister_ops(&cntrl->mhi_dev->dev);
- if (create_default_iface)
- mhi_net_dellink(mhi_dev, mhi_netdev->ndev, NULL);
+ mhi_net_dellink(mhi_dev, mhi_netdev->ndev);
}
static const struct mhi_device_info mhi_hwip0 = {
@@ -451,18 +388,11 @@ static const struct mhi_device_info mhi_swip0 = {
.netname = "mhi_swip%d",
};
-static const struct mhi_device_info mhi_hwip0_mbim = {
- .netname = "mhi_mbim%d",
- .proto = &proto_mbim,
-};
-
static const struct mhi_device_id mhi_net_id_table[] = {
/* Hardware accelerated data PATH (to modem IPA), protocol agnostic */
{ .chan = "IP_HW0", .driver_data = (kernel_ulong_t)&mhi_hwip0 },
/* Software data PATH (to modem CPU) */
{ .chan = "IP_SW0", .driver_data = (kernel_ulong_t)&mhi_swip0 },
- /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */
- { .chan = "IP_HW0_MBIM", .driver_data = (kernel_ulong_t)&mhi_hwip0_mbim },
{}
};
MODULE_DEVICE_TABLE(mhi, mhi_net_id_table);
diff --git a/drivers/net/mii.c b/drivers/net/mii.c
index 779c3a96dba7..22680f47385d 100644
--- a/drivers/net/mii.c
+++ b/drivers/net/mii.c
@@ -49,10 +49,8 @@ static u32 mii_get_an(struct mii_if_info *mii, u16 addr)
*
* The @ecmd parameter is expected to have been cleared before calling
* mii_ethtool_gset().
- *
- * Returns 0 for success, negative on error.
*/
-int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
+void mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
{
struct net_device *dev = mii->dev;
u16 bmcr, bmsr, ctrl1000 = 0, stat1000 = 0;
@@ -131,8 +129,6 @@ int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
mii->full_duplex = ecmd->duplex;
/* ignore maxtxpkt, maxrxpkt for now */
-
- return 0;
}
/**
diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
index 14b154929533..29f5627d11e6 100644
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -183,8 +183,6 @@ new_port_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
- struct nsim_dev *nsim_dev = dev_get_drvdata(dev);
- struct devlink *devlink;
unsigned int port_index;
int ret;
@@ -195,12 +193,15 @@ new_port_store(struct device *dev, struct device_attribute *attr,
if (ret)
return ret;
- devlink = priv_to_devlink(nsim_dev);
+ if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock))
+ return -EBUSY;
+
+ if (nsim_bus_dev->in_reload) {
+ mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
+ return -EBUSY;
+ }
- mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
- devlink_reload_disable(devlink);
ret = nsim_dev_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index);
- devlink_reload_enable(devlink);
mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
return ret ? ret : count;
}
@@ -212,8 +213,6 @@ del_port_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
- struct nsim_dev *nsim_dev = dev_get_drvdata(dev);
- struct devlink *devlink;
unsigned int port_index;
int ret;
@@ -224,12 +223,15 @@ del_port_store(struct device *dev, struct device_attribute *attr,
if (ret)
return ret;
- devlink = priv_to_devlink(nsim_dev);
+ if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock))
+ return -EBUSY;
+
+ if (nsim_bus_dev->in_reload) {
+ mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
+ return -EBUSY;
+ }
- mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
- devlink_reload_disable(devlink);
ret = nsim_dev_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index);
- devlink_reload_enable(devlink);
mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
return ret ? ret : count;
}
@@ -262,29 +264,31 @@ static struct device_type nsim_bus_dev_type = {
};
static struct nsim_bus_dev *
-nsim_bus_dev_new(unsigned int id, unsigned int port_count);
+nsim_bus_dev_new(unsigned int id, unsigned int port_count, unsigned int num_queues);
static ssize_t
new_device_store(struct bus_type *bus, const char *buf, size_t count)
{
+ unsigned int id, port_count, num_queues;
struct nsim_bus_dev *nsim_bus_dev;
- unsigned int port_count;
- unsigned int id;
int err;
- err = sscanf(buf, "%u %u", &id, &port_count);
+ err = sscanf(buf, "%u %u %u", &id, &port_count, &num_queues);
switch (err) {
case 1:
port_count = 1;
fallthrough;
case 2:
+ num_queues = 1;
+ fallthrough;
+ case 3:
if (id > INT_MAX) {
pr_err("Value of \"id\" is too big.\n");
return -EINVAL;
}
break;
default:
- pr_err("Format for adding new device is \"id port_count\" (uint uint).\n");
+ pr_err("Format for adding new device is \"id port_count num_queues\" (uint uint unit).\n");
return -EINVAL;
}
@@ -295,7 +299,7 @@ new_device_store(struct bus_type *bus, const char *buf, size_t count)
goto err;
}
- nsim_bus_dev = nsim_bus_dev_new(id, port_count);
+ nsim_bus_dev = nsim_bus_dev_new(id, port_count, num_queues);
if (IS_ERR(nsim_bus_dev)) {
err = PTR_ERR(nsim_bus_dev);
goto err;
@@ -396,7 +400,7 @@ static struct bus_type nsim_bus = {
#define NSIM_BUS_DEV_MAX_VFS 4
static struct nsim_bus_dev *
-nsim_bus_dev_new(unsigned int id, unsigned int port_count)
+nsim_bus_dev_new(unsigned int id, unsigned int port_count, unsigned int num_queues)
{
struct nsim_bus_dev *nsim_bus_dev;
int err;
@@ -412,6 +416,7 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count)
nsim_bus_dev->dev.bus = &nsim_bus;
nsim_bus_dev->dev.type = &nsim_bus_dev_type;
nsim_bus_dev->port_count = port_count;
+ nsim_bus_dev->num_queues = num_queues;
nsim_bus_dev->initial_net = current->nsproxy->net_ns;
nsim_bus_dev->max_vfs = NSIM_BUS_DEV_MAX_VFS;
mutex_init(&nsim_bus_dev->nsim_bus_reload_lock);
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 6348307bfa84..54313bd57797 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -864,16 +864,24 @@ static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change,
struct netlink_ext_ack *extack)
{
struct nsim_dev *nsim_dev = devlink_priv(devlink);
+ struct nsim_bus_dev *nsim_bus_dev;
+
+ nsim_bus_dev = nsim_dev->nsim_bus_dev;
+ if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock))
+ return -EOPNOTSUPP;
if (nsim_dev->dont_allow_reload) {
/* For testing purposes, user set debugfs dont_allow_reload
* value to true. So forbid it.
*/
NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes");
+ mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
return -EOPNOTSUPP;
}
+ nsim_bus_dev->in_reload = true;
nsim_dev_reload_destroy(nsim_dev);
+ mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
return 0;
}
@@ -882,17 +890,26 @@ static int nsim_dev_reload_up(struct devlink *devlink, enum devlink_reload_actio
struct netlink_ext_ack *extack)
{
struct nsim_dev *nsim_dev = devlink_priv(devlink);
+ struct nsim_bus_dev *nsim_bus_dev;
+ int ret;
+
+ nsim_bus_dev = nsim_dev->nsim_bus_dev;
+ mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
+ nsim_bus_dev->in_reload = false;
if (nsim_dev->fail_reload) {
/* For testing purposes, user set debugfs fail_reload
* value to true. Fail right away.
*/
NL_SET_ERR_MSG_MOD(extack, "User setup the reload to fail for testing purposes");
+ mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
return -EINVAL;
}
*actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
- return nsim_dev_reload_create(nsim_dev, extack);
+ ret = nsim_dev_reload_create(nsim_dev, extack);
+ mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
+ return ret;
}
static int nsim_dev_info_get(struct devlink *devlink,
@@ -1431,10 +1448,10 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
struct devlink *devlink;
int err;
- devlink = devlink_alloc(&nsim_dev_devlink_ops, sizeof(*nsim_dev));
+ devlink = devlink_alloc_ns(&nsim_dev_devlink_ops, sizeof(*nsim_dev),
+ nsim_bus_dev->initial_net, &nsim_bus_dev->dev);
if (!devlink)
return -ENOMEM;
- devlink_net_set(devlink, nsim_bus_dev->initial_net);
nsim_dev = devlink_priv(devlink);
nsim_dev->nsim_bus_dev = nsim_bus_dev;
nsim_dev->switch_id.id_len = sizeof(nsim_dev->switch_id.id);
@@ -1453,7 +1470,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
if (err)
goto err_devlink_free;
- err = devlink_register(devlink, &nsim_bus_dev->dev);
+ err = devlink_register(devlink);
if (err)
goto err_resources_unregister;
diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c
index c9ae52595a8f..b03a0513eb7e 100644
--- a/drivers/net/netdevsim/ethtool.c
+++ b/drivers/net/netdevsim/ethtool.c
@@ -43,7 +43,9 @@ nsim_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause)
}
static int nsim_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct netdevsim *ns = netdev_priv(dev);
@@ -52,7 +54,9 @@ static int nsim_get_coalesce(struct net_device *dev,
}
static int nsim_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct netdevsim *ns = netdev_priv(dev);
diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index 213d3e5056c8..4300261e2f9e 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -1441,7 +1441,7 @@ static u64 nsim_fib_nexthops_res_occ_get(void *priv)
static void nsim_fib_set_max_all(struct nsim_fib_data *data,
struct devlink *devlink)
{
- enum nsim_resource_id res_ids[] = {
+ static const enum nsim_resource_id res_ids[] = {
NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES,
NSIM_RESOURCE_NEXTHOPS,
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index c3aeb15843e2..50572e0f1f52 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -347,7 +347,8 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
struct netdevsim *ns;
int err;
- dev = alloc_netdev(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup);
+ dev = alloc_netdev_mq(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup,
+ nsim_dev->nsim_bus_dev->num_queues);
if (!dev)
return ERR_PTR(-ENOMEM);
@@ -392,7 +393,8 @@ void nsim_destroy(struct netdevsim *ns)
static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- NL_SET_ERR_MSG_MOD(extack, "Please use: echo \"[ID] [PORT_COUNT]\" > /sys/bus/netdevsim/new_device");
+ NL_SET_ERR_MSG_MOD(extack,
+ "Please use: echo \"[ID] [PORT_COUNT] [NUM_QUEUES]\" > /sys/bus/netdevsim/new_device");
return -EOPNOTSUPP;
}
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index ae462957dcee..793c86dc5a9c 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -352,6 +352,7 @@ struct nsim_bus_dev {
struct device dev;
struct list_head list;
unsigned int port_count;
+ unsigned int num_queues; /* Number of queues for each port on this bus */
struct net *initial_net; /* Purpose of this is to carry net pointer
* during the probe time only.
*/
@@ -361,6 +362,7 @@ struct nsim_bus_dev {
struct nsim_vf_config *vfconfigs;
/* Lock for devlink->reload_enabled in netdevsim module */
struct mutex nsim_bus_reload_lock;
+ bool in_reload;
bool init;
};
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 63fda3fc40aa..fb0a83dc09ac 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -65,6 +65,9 @@ static const int xpcs_xlgmii_features[] = {
};
static const int xpcs_sgmii_features[] = {
+ ETHTOOL_LINK_MODE_Pause_BIT,
+ ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+ ETHTOOL_LINK_MODE_Autoneg_BIT,
ETHTOOL_LINK_MODE_10baseT_Half_BIT,
ETHTOOL_LINK_MODE_10baseT_Full_BIT,
ETHTOOL_LINK_MODE_100baseT_Half_BIT,
@@ -75,6 +78,7 @@ static const int xpcs_sgmii_features[] = {
};
static const int xpcs_2500basex_features[] = {
+ ETHTOOL_LINK_MODE_Pause_BIT,
ETHTOOL_LINK_MODE_Asym_Pause_BIT,
ETHTOOL_LINK_MODE_Autoneg_BIT,
ETHTOOL_LINK_MODE_2500baseX_Full_BIT,
@@ -1089,7 +1093,7 @@ struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
xpcs = kzalloc(sizeof(*xpcs), GFP_KERNEL);
if (!xpcs)
- return NULL;
+ return ERR_PTR(-ENOMEM);
xpcs->mdiodev = mdiodev;
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index c56f703ae998..902495afcb38 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -207,6 +207,12 @@ config MARVELL_88X2222_PHY
Support for the Marvell 88X2222 Dual-port Multi-speed Ethernet
Transceiver.
+config MAXLINEAR_GPHY
+ tristate "Maxlinear Ethernet PHYs"
+ help
+ Support for the Maxlinear GPY115, GPY211, GPY212, GPY215,
+ GPY241, GPY245 PHYs.
+
config MEDIATEK_GE_PHY
tristate "MediaTek Gigabit Ethernet PHYs"
help
@@ -230,6 +236,7 @@ config MICROCHIP_T1_PHY
config MICROSEMI_PHY
tristate "Microsemi PHYs"
depends on MACSEC || MACSEC=n
+ depends on PTP_1588_CLOCK_OPTIONAL || !NETWORK_PHY_TIMESTAMPING
select CRYPTO_LIB_AES if MACSEC
help
Currently supports VSC8514, VSC8530, VSC8531, VSC8540 and VSC8541 PHYs
@@ -247,6 +254,7 @@ config NATIONAL_PHY
config NXP_C45_TJA11XX_PHY
tristate "NXP C45 TJA11XX PHYs"
+ depends on PTP_1588_CLOCK_OPTIONAL
help
Enable support for NXP C45 TJA11XX PHYs.
Currently supports only the TJA1103 PHY.
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 172bb193ae6a..b2728d00fc9a 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -64,6 +64,7 @@ obj-$(CONFIG_LXT_PHY) += lxt.o
obj-$(CONFIG_MARVELL_10G_PHY) += marvell10g.o
obj-$(CONFIG_MARVELL_PHY) += marvell.o
obj-$(CONFIG_MARVELL_88X2222_PHY) += marvell-88x2222.o
+obj-$(CONFIG_MAXLINEAR_GPHY) += mxl-gpy.o
obj-$(CONFIG_MEDIATEK_GE_PHY) += mediatek-ge.o
obj-$(CONFIG_MESON_GXL_PHY) += meson-gxl.o
obj-$(CONFIG_MICREL_KS8995MA) += spi_ks8995.o
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 5d62b85a4024..bdac087058b2 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -532,12 +532,6 @@ static int at8031_register_regulators(struct phy_device *phydev)
return 0;
}
-static bool at803x_match_phy_id(struct phy_device *phydev, u32 phy_id)
-{
- return (phydev->phy_id & phydev->drv->phy_id_mask)
- == (phy_id & phydev->drv->phy_id_mask);
-}
-
static int at803x_parse_dt(struct phy_device *phydev)
{
struct device_node *node = phydev->mdio.dev.of_node;
@@ -602,8 +596,8 @@ static int at803x_parse_dt(struct phy_device *phydev)
* to the AR8030 so there might be a good chance it works on
* the AR8030 too.
*/
- if (at803x_match_phy_id(phydev, ATH8030_PHY_ID) ||
- at803x_match_phy_id(phydev, ATH8035_PHY_ID)) {
+ if (phydev->drv->phy_id == ATH8030_PHY_ID ||
+ phydev->drv->phy_id == ATH8035_PHY_ID) {
priv->clk_25m_reg &= AT8035_CLK_OUT_MASK;
priv->clk_25m_mask &= AT8035_CLK_OUT_MASK;
}
@@ -631,7 +625,7 @@ static int at803x_parse_dt(struct phy_device *phydev)
/* Only supported on AR8031/AR8033, the AR8030/AR8035 use strapping
* options.
*/
- if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) {
+ if (phydev->drv->phy_id == ATH8031_PHY_ID) {
if (of_property_read_bool(node, "qca,keep-pll-enabled"))
priv->flags |= AT803X_KEEP_PLL_ENABLED;
@@ -676,7 +670,7 @@ static int at803x_probe(struct phy_device *phydev)
* Switch to the copper page, as otherwise we read
* the PHY capabilities from the fiber side.
*/
- if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) {
+ if (phydev->drv->phy_id == ATH8031_PHY_ID) {
phy_lock_mdio_bus(phydev);
ret = at803x_write_page(phydev, AT803X_PAGE_COPPER);
phy_unlock_mdio_bus(phydev);
@@ -709,7 +703,7 @@ static int at803x_get_features(struct phy_device *phydev)
if (err)
return err;
- if (!at803x_match_phy_id(phydev, ATH8031_PHY_ID))
+ if (phydev->drv->phy_id != ATH8031_PHY_ID)
return 0;
/* AR8031/AR8033 have different status registers
@@ -820,7 +814,7 @@ static int at803x_config_init(struct phy_device *phydev)
if (ret < 0)
return ret;
- if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) {
+ if (phydev->drv->phy_id == ATH8031_PHY_ID) {
ret = at8031_pll_config(phydev);
if (ret < 0)
return ret;
diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
index f7a2ec150e54..211b5476a6f5 100644
--- a/drivers/net/phy/dp83822.c
+++ b/drivers/net/phy/dp83822.c
@@ -326,11 +326,9 @@ static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev)
static int dp8382x_disable_wol(struct phy_device *phydev)
{
- int value = DP83822_WOL_EN | DP83822_WOL_MAGIC_EN |
- DP83822_WOL_SECURE_ON;
-
- return phy_clear_bits_mmd(phydev, DP83822_DEVADDR,
- MII_DP83822_WOL_CFG, value);
+ return phy_clear_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG,
+ DP83822_WOL_EN | DP83822_WOL_MAGIC_EN |
+ DP83822_WOL_SECURE_ON);
}
static int dp83822_read_status(struct phy_device *phydev)
diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c
index d453ec016168..3c032868ef04 100644
--- a/drivers/net/phy/intel-xway.c
+++ b/drivers/net/phy/intel-xway.c
@@ -8,11 +8,16 @@
#include <linux/module.h>
#include <linux/phy.h>
#include <linux/of.h>
+#include <linux/bitfield.h>
+#define XWAY_MDIO_MIICTRL 0x17 /* mii control */
#define XWAY_MDIO_IMASK 0x19 /* interrupt mask */
#define XWAY_MDIO_ISTAT 0x1A /* interrupt status */
#define XWAY_MDIO_LED 0x1B /* led control */
+#define XWAY_MDIO_MIICTRL_RXSKEW_MASK GENMASK(14, 12)
+#define XWAY_MDIO_MIICTRL_TXSKEW_MASK GENMASK(10, 8)
+
/* bit 15:12 are reserved */
#define XWAY_MDIO_LED_LED3_EN BIT(11) /* Enable the integrated function of LED3 */
#define XWAY_MDIO_LED_LED2_EN BIT(10) /* Enable the integrated function of LED2 */
@@ -157,6 +162,73 @@
#define PHY_ID_PHY11G_VR9_1_2 0xD565A409
#define PHY_ID_PHY22F_VR9_1_2 0xD565A419
+static const int xway_internal_delay[] = {0, 500, 1000, 1500, 2000, 2500,
+ 3000, 3500};
+
+static int xway_gphy_rgmii_init(struct phy_device *phydev)
+{
+ struct device *dev = &phydev->mdio.dev;
+ unsigned int delay_size = ARRAY_SIZE(xway_internal_delay);
+ s32 int_delay;
+ int val = 0;
+
+ if (!phy_interface_is_rgmii(phydev))
+ return 0;
+
+ /* Existing behavior was to use default pin strapping delay in rgmii
+ * mode, but rgmii should have meant no delay. Warn existing users,
+ * but do not change anything at the moment.
+ */
+ if (phydev->interface == PHY_INTERFACE_MODE_RGMII) {
+ u16 txskew, rxskew;
+
+ val = phy_read(phydev, XWAY_MDIO_MIICTRL);
+ if (val < 0)
+ return val;
+
+ txskew = FIELD_GET(XWAY_MDIO_MIICTRL_TXSKEW_MASK, val);
+ rxskew = FIELD_GET(XWAY_MDIO_MIICTRL_RXSKEW_MASK, val);
+
+ if (txskew > 0 || rxskew > 0)
+ phydev_warn(phydev,
+ "PHY has delays (e.g. via pin strapping), but phy-mode = 'rgmii'\n"
+ "Should be 'rgmii-id' to use internal delays txskew:%d ps rxskew:%d ps\n",
+ xway_internal_delay[txskew],
+ xway_internal_delay[rxskew]);
+ return 0;
+ }
+
+ if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+ phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) {
+ int_delay = phy_get_internal_delay(phydev, dev,
+ xway_internal_delay,
+ delay_size, true);
+
+ /* if rx-internal-delay-ps is missing, use default of 2.0 ns */
+ if (int_delay < 0)
+ int_delay = 4; /* 2000 ps */
+
+ val |= FIELD_PREP(XWAY_MDIO_MIICTRL_RXSKEW_MASK, int_delay);
+ }
+
+ if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+ phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) {
+ int_delay = phy_get_internal_delay(phydev, dev,
+ xway_internal_delay,
+ delay_size, false);
+
+ /* if tx-internal-delay-ps is missing, use default of 2.0 ns */
+ if (int_delay < 0)
+ int_delay = 4; /* 2000 ps */
+
+ val |= FIELD_PREP(XWAY_MDIO_MIICTRL_TXSKEW_MASK, int_delay);
+ }
+
+ return phy_modify(phydev, XWAY_MDIO_MIICTRL,
+ XWAY_MDIO_MIICTRL_RXSKEW_MASK |
+ XWAY_MDIO_MIICTRL_TXSKEW_MASK, val);
+}
+
static int xway_gphy_config_init(struct phy_device *phydev)
{
int err;
@@ -204,6 +276,10 @@ static int xway_gphy_config_init(struct phy_device *phydev)
phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LED2H, ledxh);
phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LED2L, ledxl);
+ err = xway_gphy_rgmii_init(phydev);
+ if (err)
+ return err;
+
return 0;
}
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 3de93c9f2744..4fcfca4e1702 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -32,6 +32,7 @@
#include <linux/marvell_phy.h>
#include <linux/bitfield.h>
#include <linux/of.h>
+#include <linux/sfp.h>
#include <linux/io.h>
#include <asm/irq.h>
@@ -46,6 +47,7 @@
#define MII_MARVELL_MISC_TEST_PAGE 0x06
#define MII_MARVELL_VCT7_PAGE 0x07
#define MII_MARVELL_WOL_PAGE 0x11
+#define MII_MARVELL_MODE_PAGE 0x12
#define MII_M1011_IEVENT 0x13
#define MII_M1011_IEVENT_CLEAR 0x0000
@@ -155,6 +157,7 @@
#define MII_88E1318S_PHY_WOL_CTRL 0x10
#define MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS BIT(12)
+#define MII_88E1318S_PHY_WOL_CTRL_LINK_UP_ENABLE BIT(13)
#define MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE BIT(14)
#define MII_PHY_LED_CTRL 16
@@ -176,7 +179,14 @@
#define MII_88E1510_GEN_CTRL_REG_1 0x14
#define MII_88E1510_GEN_CTRL_REG_1_MODE_MASK 0x7
+#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII 0x0 /* RGMII to copper */
#define MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII 0x1 /* SGMII to copper */
+/* RGMII to 1000BASE-X */
+#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_1000X 0x2
+/* RGMII to 100BASE-FX */
+#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_100FX 0x3
+/* RGMII to SGMII */
+#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_SGMII 0x4
#define MII_88E1510_GEN_CTRL_REG_1_RESET 0x8000 /* Soft reset */
#define MII_VCT5_TX_RX_MDI0_COUPLING 0x10
@@ -1746,13 +1756,19 @@ static void m88e1318_get_wol(struct phy_device *phydev,
{
int ret;
- wol->supported = WAKE_MAGIC;
+ wol->supported = WAKE_MAGIC | WAKE_PHY;
wol->wolopts = 0;
ret = phy_read_paged(phydev, MII_MARVELL_WOL_PAGE,
MII_88E1318S_PHY_WOL_CTRL);
- if (ret >= 0 && ret & MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE)
+ if (ret < 0)
+ return;
+
+ if (ret & MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE)
wol->wolopts |= WAKE_MAGIC;
+
+ if (ret & MII_88E1318S_PHY_WOL_CTRL_LINK_UP_ENABLE)
+ wol->wolopts |= WAKE_PHY;
}
static int m88e1318_set_wol(struct phy_device *phydev,
@@ -1764,7 +1780,7 @@ static int m88e1318_set_wol(struct phy_device *phydev,
if (oldpage < 0)
goto error;
- if (wol->wolopts & WAKE_MAGIC) {
+ if (wol->wolopts & (WAKE_MAGIC | WAKE_PHY)) {
/* Explicitly switch to page 0x00, just to be sure */
err = marvell_write_page(phydev, MII_MARVELL_COPPER_PAGE);
if (err < 0)
@@ -1796,7 +1812,9 @@ static int m88e1318_set_wol(struct phy_device *phydev,
MII_88E1318S_PHY_LED_TCR_INT_ACTIVE_LOW);
if (err < 0)
goto error;
+ }
+ if (wol->wolopts & WAKE_MAGIC) {
err = marvell_write_page(phydev, MII_MARVELL_WOL_PAGE);
if (err < 0)
goto error;
@@ -1837,6 +1855,30 @@ static int m88e1318_set_wol(struct phy_device *phydev,
goto error;
}
+ if (wol->wolopts & WAKE_PHY) {
+ err = marvell_write_page(phydev, MII_MARVELL_WOL_PAGE);
+ if (err < 0)
+ goto error;
+
+ /* Clear WOL status and enable link up event */
+ err = __phy_modify(phydev, MII_88E1318S_PHY_WOL_CTRL, 0,
+ MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS |
+ MII_88E1318S_PHY_WOL_CTRL_LINK_UP_ENABLE);
+ if (err < 0)
+ goto error;
+ } else {
+ err = marvell_write_page(phydev, MII_MARVELL_WOL_PAGE);
+ if (err < 0)
+ goto error;
+
+ /* Clear WOL status and disable link up event */
+ err = __phy_modify(phydev, MII_88E1318S_PHY_WOL_CTRL,
+ MII_88E1318S_PHY_WOL_CTRL_LINK_UP_ENABLE,
+ MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS);
+ if (err < 0)
+ goto error;
+ }
+
error:
return phy_restore_page(phydev, oldpage, err);
}
@@ -2701,6 +2743,100 @@ static int marvell_probe(struct phy_device *phydev)
return marvell_hwmon_probe(phydev);
}
+static int m88e1510_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
+{
+ struct phy_device *phydev = upstream;
+ phy_interface_t interface;
+ struct device *dev;
+ int oldpage;
+ int ret = 0;
+ u16 mode;
+
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
+
+ dev = &phydev->mdio.dev;
+
+ sfp_parse_support(phydev->sfp_bus, id, supported);
+ interface = sfp_select_interface(phydev->sfp_bus, supported);
+
+ dev_info(dev, "%s SFP module inserted\n", phy_modes(interface));
+
+ switch (interface) {
+ case PHY_INTERFACE_MODE_1000BASEX:
+ mode = MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_1000X;
+
+ break;
+ case PHY_INTERFACE_MODE_100BASEX:
+ mode = MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_100FX;
+
+ break;
+ case PHY_INTERFACE_MODE_SGMII:
+ mode = MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_SGMII;
+
+ break;
+ default:
+ dev_err(dev, "Incompatible SFP module inserted\n");
+
+ return -EINVAL;
+ }
+
+ oldpage = phy_select_page(phydev, MII_MARVELL_MODE_PAGE);
+ if (oldpage < 0)
+ goto error;
+
+ ret = __phy_modify(phydev, MII_88E1510_GEN_CTRL_REG_1,
+ MII_88E1510_GEN_CTRL_REG_1_MODE_MASK, mode);
+ if (ret < 0)
+ goto error;
+
+ ret = __phy_set_bits(phydev, MII_88E1510_GEN_CTRL_REG_1,
+ MII_88E1510_GEN_CTRL_REG_1_RESET);
+
+error:
+ return phy_restore_page(phydev, oldpage, ret);
+}
+
+static void m88e1510_sfp_remove(void *upstream)
+{
+ struct phy_device *phydev = upstream;
+ int oldpage;
+ int ret = 0;
+
+ oldpage = phy_select_page(phydev, MII_MARVELL_MODE_PAGE);
+ if (oldpage < 0)
+ goto error;
+
+ ret = __phy_modify(phydev, MII_88E1510_GEN_CTRL_REG_1,
+ MII_88E1510_GEN_CTRL_REG_1_MODE_MASK,
+ MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII);
+ if (ret < 0)
+ goto error;
+
+ ret = __phy_set_bits(phydev, MII_88E1510_GEN_CTRL_REG_1,
+ MII_88E1510_GEN_CTRL_REG_1_RESET);
+
+error:
+ phy_restore_page(phydev, oldpage, ret);
+}
+
+static const struct sfp_upstream_ops m88e1510_sfp_ops = {
+ .module_insert = m88e1510_sfp_insert,
+ .module_remove = m88e1510_sfp_remove,
+ .attach = phy_sfp_attach,
+ .detach = phy_sfp_detach,
+};
+
+static int m88e1510_probe(struct phy_device *phydev)
+{
+ int err;
+
+ err = marvell_probe(phydev);
+ if (err)
+ return err;
+
+ return phy_sfp_probe(phydev, &m88e1510_sfp_ops);
+}
+
static struct phy_driver marvell_drivers[] = {
{
.phy_id = MARVELL_PHY_ID_88E1101,
@@ -2927,7 +3063,7 @@ static struct phy_driver marvell_drivers[] = {
.driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
.features = PHY_GBIT_FIBRE_FEATURES,
.flags = PHY_POLL_CABLE_TEST,
- .probe = marvell_probe,
+ .probe = m88e1510_probe,
.config_init = m88e1510_config_init,
.config_aneg = m88e1510_config_aneg,
.read_status = marvell_read_status,
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 53a433442803..bd310e8d5e43 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -28,6 +28,7 @@
#include <linux/marvell_phy.h>
#include <linux/phy.h>
#include <linux/sfp.h>
+#include <linux/netdevice.h>
#define MV_PHY_ALASKA_NBT_QUIRK_MASK 0xfffffffe
#define MV_PHY_ALASKA_NBT_QUIRK_REV (MARVELL_PHY_ID_88X3310 | 0xa)
@@ -104,6 +105,16 @@ enum {
MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_NO_SGMII_AN = 0x5,
MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH = 0x6,
MV_V2_33X0_PORT_CTRL_MACTYPE_USXGMII = 0x7,
+ MV_V2_PORT_INTR_STS = 0xf040,
+ MV_V2_PORT_INTR_MASK = 0xf043,
+ MV_V2_PORT_INTR_STS_WOL_EN = BIT(8),
+ MV_V2_MAGIC_PKT_WORD0 = 0xf06b,
+ MV_V2_MAGIC_PKT_WORD1 = 0xf06c,
+ MV_V2_MAGIC_PKT_WORD2 = 0xf06d,
+ /* Wake on LAN registers */
+ MV_V2_WOL_CTRL = 0xf06e,
+ MV_V2_WOL_CTRL_CLEAR_STS = BIT(15),
+ MV_V2_WOL_CTRL_MAGIC_PKT_EN = BIT(0),
/* Temperature control/read registers (88X3310 only) */
MV_V2_TEMP_CTRL = 0xf08a,
MV_V2_TEMP_CTRL_MASK = 0xc000,
@@ -987,11 +998,19 @@ static int mv3310_get_number_of_ports(struct phy_device *phydev)
static int mv3310_match_phy_device(struct phy_device *phydev)
{
+ if ((phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] &
+ MARVELL_PHY_ID_MASK) != MARVELL_PHY_ID_88X3310)
+ return 0;
+
return mv3310_get_number_of_ports(phydev) == 1;
}
static int mv3340_match_phy_device(struct phy_device *phydev)
{
+ if ((phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] &
+ MARVELL_PHY_ID_MASK) != MARVELL_PHY_ID_88X3310)
+ return 0;
+
return mv3310_get_number_of_ports(phydev) == 4;
}
@@ -1020,6 +1039,80 @@ static int mv2111_match_phy_device(struct phy_device *phydev)
return mv211x_match_phy_device(phydev, false);
}
+static void mv3110_get_wol(struct phy_device *phydev,
+ struct ethtool_wolinfo *wol)
+{
+ int ret;
+
+ wol->supported = WAKE_MAGIC;
+ wol->wolopts = 0;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_V2_WOL_CTRL);
+ if (ret < 0)
+ return;
+
+ if (ret & MV_V2_WOL_CTRL_MAGIC_PKT_EN)
+ wol->wolopts |= WAKE_MAGIC;
+}
+
+static int mv3110_set_wol(struct phy_device *phydev,
+ struct ethtool_wolinfo *wol)
+{
+ int ret;
+
+ if (wol->wolopts & WAKE_MAGIC) {
+ /* Enable the WOL interrupt */
+ ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2,
+ MV_V2_PORT_INTR_MASK,
+ MV_V2_PORT_INTR_STS_WOL_EN);
+ if (ret < 0)
+ return ret;
+
+ /* Store the device address for the magic packet */
+ ret = phy_write_mmd(phydev, MDIO_MMD_VEND2,
+ MV_V2_MAGIC_PKT_WORD2,
+ ((phydev->attached_dev->dev_addr[5] << 8) |
+ phydev->attached_dev->dev_addr[4]));
+ if (ret < 0)
+ return ret;
+
+ ret = phy_write_mmd(phydev, MDIO_MMD_VEND2,
+ MV_V2_MAGIC_PKT_WORD1,
+ ((phydev->attached_dev->dev_addr[3] << 8) |
+ phydev->attached_dev->dev_addr[2]));
+ if (ret < 0)
+ return ret;
+
+ ret = phy_write_mmd(phydev, MDIO_MMD_VEND2,
+ MV_V2_MAGIC_PKT_WORD0,
+ ((phydev->attached_dev->dev_addr[1] << 8) |
+ phydev->attached_dev->dev_addr[0]));
+ if (ret < 0)
+ return ret;
+
+ /* Clear WOL status and enable magic packet matching */
+ ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2,
+ MV_V2_WOL_CTRL,
+ MV_V2_WOL_CTRL_MAGIC_PKT_EN |
+ MV_V2_WOL_CTRL_CLEAR_STS);
+ if (ret < 0)
+ return ret;
+ } else {
+ /* Disable magic packet matching & reset WOL status bit */
+ ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2,
+ MV_V2_WOL_CTRL,
+ MV_V2_WOL_CTRL_MAGIC_PKT_EN,
+ MV_V2_WOL_CTRL_CLEAR_STS);
+ if (ret < 0)
+ return ret;
+ }
+
+ /* Reset the clear WOL status bit as it does not self-clear */
+ return phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2,
+ MV_V2_WOL_CTRL,
+ MV_V2_WOL_CTRL_CLEAR_STS);
+}
+
static struct phy_driver mv3310_drivers[] = {
{
.phy_id = MARVELL_PHY_ID_88X3310,
@@ -1039,6 +1132,8 @@ static struct phy_driver mv3310_drivers[] = {
.set_tunable = mv3310_set_tunable,
.remove = mv3310_remove,
.set_loopback = genphy_c45_loopback,
+ .get_wol = mv3110_get_wol,
+ .set_wol = mv3110_set_wol,
},
{
.phy_id = MARVELL_PHY_ID_88X3310,
@@ -1076,6 +1171,8 @@ static struct phy_driver mv3310_drivers[] = {
.set_tunable = mv3310_set_tunable,
.remove = mv3310_remove,
.set_loopback = genphy_c45_loopback,
+ .get_wol = mv3110_get_wol,
+ .set_wol = mv3110_set_wol,
},
{
.phy_id = MARVELL_PHY_ID_88E2110,
diff --git a/drivers/net/phy/mediatek-ge.c b/drivers/net/phy/mediatek-ge.c
index 11ff335d6228..b7a5ae20edd5 100644
--- a/drivers/net/phy/mediatek-ge.c
+++ b/drivers/net/phy/mediatek-ge.c
@@ -81,6 +81,8 @@ static struct phy_driver mtk_gephy_driver[] = {
*/
.config_intr = genphy_no_config_intr,
.handle_interrupt = genphy_handle_interrupt_no_ack,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
.read_page = mtk_gephy_read_page,
.write_page = mtk_gephy_write_page,
},
@@ -93,6 +95,8 @@ static struct phy_driver mtk_gephy_driver[] = {
*/
.config_intr = genphy_no_config_intr,
.handle_interrupt = genphy_handle_interrupt_no_ack,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
.read_page = mtk_gephy_read_page,
.write_page = mtk_gephy_write_page,
},
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 53bdd673ae56..5c928f827173 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -1760,8 +1760,6 @@ static struct phy_driver ksphy_driver[] = {
.name = "Micrel KSZ87XX Switch",
/* PHY_BASIC_FEATURES */
.config_init = kszphy_config_init,
- .config_aneg = ksz8873mll_config_aneg,
- .read_status = ksz8873mll_read_status,
.match_phy_device = ksz8795_match_phy_device,
.suspend = genphy_suspend,
.resume = genphy_resume,
diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c
index 924ed5b034a4..edb951695b13 100644
--- a/drivers/net/phy/mscc/mscc_ptp.c
+++ b/drivers/net/phy/mscc/mscc_ptp.c
@@ -506,7 +506,7 @@ static int vsc85xx_ptp_cmp_init(struct phy_device *phydev, enum ts_blk blk)
{
struct vsc8531_private *vsc8531 = phydev->priv;
bool base = phydev->mdio.addr == vsc8531->ts_base_addr;
- u8 msgs[] = {
+ static const u8 msgs[] = {
PTP_MSGTYPE_SYNC,
PTP_MSGTYPE_DELAY_REQ
};
@@ -847,7 +847,7 @@ static int vsc85xx_ts_ptp_action_flow(struct phy_device *phydev, enum ts_blk blk
static int vsc85xx_ptp_conf(struct phy_device *phydev, enum ts_blk blk,
bool one_step, bool enable)
{
- u8 msgs[] = {
+ static const u8 msgs[] = {
PTP_MSGTYPE_SYNC,
PTP_MSGTYPE_DELAY_REQ
};
@@ -1268,8 +1268,8 @@ static void vsc8584_set_input_clk_configured(struct phy_device *phydev)
static int __vsc8584_init_ptp(struct phy_device *phydev)
{
struct vsc8531_private *vsc8531 = phydev->priv;
- u32 ltc_seq_e[] = { 0, 400000, 0, 0, 0 };
- u8 ltc_seq_a[] = { 8, 6, 5, 4, 2 };
+ static const u32 ltc_seq_e[] = { 0, 400000, 0, 0, 0 };
+ static const u8 ltc_seq_a[] = { 8, 6, 5, 4, 2 };
u32 val;
if (!vsc8584_is_1588_input_clk_configured(phydev)) {
diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c
new file mode 100644
index 000000000000..2d5d5081c3b6
--- /dev/null
+++ b/drivers/net/phy/mxl-gpy.c
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2021 Maxlinear Corporation
+ * Copyright (C) 2020 Intel Corporation
+ *
+ * Drivers for Maxlinear Ethernet GPY
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/bitfield.h>
+#include <linux/phy.h>
+#include <linux/netdevice.h>
+
+/* PHY ID */
+#define PHY_ID_GPYx15B_MASK 0xFFFFFFFC
+#define PHY_ID_GPY21xB_MASK 0xFFFFFFF9
+#define PHY_ID_GPY2xx 0x67C9DC00
+#define PHY_ID_GPY115B 0x67C9DF00
+#define PHY_ID_GPY115C 0x67C9DF10
+#define PHY_ID_GPY211B 0x67C9DE08
+#define PHY_ID_GPY211C 0x67C9DE10
+#define PHY_ID_GPY212B 0x67C9DE09
+#define PHY_ID_GPY212C 0x67C9DE20
+#define PHY_ID_GPY215B 0x67C9DF04
+#define PHY_ID_GPY215C 0x67C9DF20
+#define PHY_ID_GPY241B 0x67C9DE40
+#define PHY_ID_GPY241BM 0x67C9DE80
+#define PHY_ID_GPY245B 0x67C9DEC0
+
+#define PHY_MIISTAT 0x18 /* MII state */
+#define PHY_IMASK 0x19 /* interrupt mask */
+#define PHY_ISTAT 0x1A /* interrupt status */
+#define PHY_FWV 0x1E /* firmware version */
+
+#define PHY_MIISTAT_SPD_MASK GENMASK(2, 0)
+#define PHY_MIISTAT_DPX BIT(3)
+#define PHY_MIISTAT_LS BIT(10)
+
+#define PHY_MIISTAT_SPD_10 0
+#define PHY_MIISTAT_SPD_100 1
+#define PHY_MIISTAT_SPD_1000 2
+#define PHY_MIISTAT_SPD_2500 4
+
+#define PHY_IMASK_WOL BIT(15) /* Wake-on-LAN */
+#define PHY_IMASK_ANC BIT(10) /* Auto-Neg complete */
+#define PHY_IMASK_ADSC BIT(5) /* Link auto-downspeed detect */
+#define PHY_IMASK_DXMC BIT(2) /* Duplex mode change */
+#define PHY_IMASK_LSPC BIT(1) /* Link speed change */
+#define PHY_IMASK_LSTC BIT(0) /* Link state change */
+#define PHY_IMASK_MASK (PHY_IMASK_LSTC | \
+ PHY_IMASK_LSPC | \
+ PHY_IMASK_DXMC | \
+ PHY_IMASK_ADSC | \
+ PHY_IMASK_ANC)
+
+#define PHY_FWV_REL_MASK BIT(15)
+#define PHY_FWV_TYPE_MASK GENMASK(11, 8)
+#define PHY_FWV_MINOR_MASK GENMASK(7, 0)
+
+/* SGMII */
+#define VSPEC1_SGMII_CTRL 0x08
+#define VSPEC1_SGMII_CTRL_ANEN BIT(12) /* Aneg enable */
+#define VSPEC1_SGMII_CTRL_ANRS BIT(9) /* Restart Aneg */
+#define VSPEC1_SGMII_ANEN_ANRS (VSPEC1_SGMII_CTRL_ANEN | \
+ VSPEC1_SGMII_CTRL_ANRS)
+
+/* WoL */
+#define VPSPEC2_WOL_CTL 0x0E06
+#define VPSPEC2_WOL_AD01 0x0E08
+#define VPSPEC2_WOL_AD23 0x0E09
+#define VPSPEC2_WOL_AD45 0x0E0A
+#define WOL_EN BIT(0)
+
+static const struct {
+ int type;
+ int minor;
+} ver_need_sgmii_reaneg[] = {
+ {7, 0x6D},
+ {8, 0x6D},
+ {9, 0x73},
+};
+
+static int gpy_config_init(struct phy_device *phydev)
+{
+ int ret;
+
+ /* Mask all interrupts */
+ ret = phy_write(phydev, PHY_IMASK, 0);
+ if (ret)
+ return ret;
+
+ /* Clear all pending interrupts */
+ ret = phy_read(phydev, PHY_ISTAT);
+ return ret < 0 ? ret : 0;
+}
+
+static int gpy_probe(struct phy_device *phydev)
+{
+ int ret;
+
+ if (!phydev->is_c45) {
+ ret = phy_get_c45_ids(phydev);
+ if (ret < 0)
+ return ret;
+ }
+
+ /* Show GPY PHY FW version in dmesg */
+ ret = phy_read(phydev, PHY_FWV);
+ if (ret < 0)
+ return ret;
+
+ phydev_info(phydev, "Firmware Version: 0x%04X (%s)\n", ret,
+ (ret & PHY_FWV_REL_MASK) ? "release" : "test");
+
+ return 0;
+}
+
+static bool gpy_sgmii_need_reaneg(struct phy_device *phydev)
+{
+ int fw_ver, fw_type, fw_minor;
+ size_t i;
+
+ fw_ver = phy_read(phydev, PHY_FWV);
+ if (fw_ver < 0)
+ return true;
+
+ fw_type = FIELD_GET(PHY_FWV_TYPE_MASK, fw_ver);
+ fw_minor = FIELD_GET(PHY_FWV_MINOR_MASK, fw_ver);
+
+ for (i = 0; i < ARRAY_SIZE(ver_need_sgmii_reaneg); i++) {
+ if (fw_type != ver_need_sgmii_reaneg[i].type)
+ continue;
+ if (fw_minor < ver_need_sgmii_reaneg[i].minor)
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+static bool gpy_2500basex_chk(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = phy_read(phydev, PHY_MIISTAT);
+ if (ret < 0) {
+ phydev_err(phydev, "Error: MDIO register access failed: %d\n",
+ ret);
+ return false;
+ }
+
+ if (!(ret & PHY_MIISTAT_LS) ||
+ FIELD_GET(PHY_MIISTAT_SPD_MASK, ret) != PHY_MIISTAT_SPD_2500)
+ return false;
+
+ phydev->speed = SPEED_2500;
+ phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
+ phy_modify_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL,
+ VSPEC1_SGMII_CTRL_ANEN, 0);
+ return true;
+}
+
+static bool gpy_sgmii_aneg_en(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL);
+ if (ret < 0) {
+ phydev_err(phydev, "Error: MMD register access failed: %d\n",
+ ret);
+ return true;
+ }
+
+ return (ret & VSPEC1_SGMII_CTRL_ANEN) ? true : false;
+}
+
+static int gpy_config_aneg(struct phy_device *phydev)
+{
+ bool changed = false;
+ u32 adv;
+ int ret;
+
+ if (phydev->autoneg == AUTONEG_DISABLE) {
+ /* Configure half duplex with genphy_setup_forced,
+ * because genphy_c45_pma_setup_forced does not support.
+ */
+ return phydev->duplex != DUPLEX_FULL
+ ? genphy_setup_forced(phydev)
+ : genphy_c45_pma_setup_forced(phydev);
+ }
+
+ ret = genphy_c45_an_config_aneg(phydev);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ changed = true;
+
+ adv = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising);
+ ret = phy_modify_changed(phydev, MII_CTRL1000,
+ ADVERTISE_1000FULL | ADVERTISE_1000HALF,
+ adv);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ changed = true;
+
+ ret = genphy_c45_check_and_restart_aneg(phydev, changed);
+ if (ret < 0)
+ return ret;
+
+ if (phydev->interface == PHY_INTERFACE_MODE_USXGMII ||
+ phydev->interface == PHY_INTERFACE_MODE_INTERNAL)
+ return 0;
+
+ /* No need to trigger re-ANEG if link speed is 2.5G or SGMII ANEG is
+ * disabled.
+ */
+ if (!gpy_sgmii_need_reaneg(phydev) || gpy_2500basex_chk(phydev) ||
+ !gpy_sgmii_aneg_en(phydev))
+ return 0;
+
+ /* There is a design constraint in GPY2xx device where SGMII AN is
+ * only triggered when there is change of speed. If, PHY link
+ * partner`s speed is still same even after PHY TPI is down and up
+ * again, SGMII AN is not triggered and hence no new in-band message
+ * from GPY to MAC side SGMII.
+ * This could cause an issue during power up, when PHY is up prior to
+ * MAC. At this condition, once MAC side SGMII is up, MAC side SGMII
+ * wouldn`t receive new in-band message from GPY with correct link
+ * status, speed and duplex info.
+ *
+ * 1) If PHY is already up and TPI link status is still down (such as
+ * hard reboot), TPI link status is polled for 4 seconds before
+ * retriggerring SGMII AN.
+ * 2) If PHY is already up and TPI link status is also up (such as soft
+ * reboot), polling of TPI link status is not needed and SGMII AN is
+ * immediately retriggered.
+ * 3) Other conditions such as PHY is down, speed change etc, skip
+ * retriggering SGMII AN. Note: in case of speed change, GPY FW will
+ * initiate SGMII AN.
+ */
+
+ if (phydev->state != PHY_UP)
+ return 0;
+
+ ret = phy_read_poll_timeout(phydev, MII_BMSR, ret, ret & BMSR_LSTATUS,
+ 20000, 4000000, false);
+ if (ret == -ETIMEDOUT)
+ return 0;
+ else if (ret < 0)
+ return ret;
+
+ /* Trigger SGMII AN. */
+ return phy_modify_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL,
+ VSPEC1_SGMII_CTRL_ANRS, VSPEC1_SGMII_CTRL_ANRS);
+}
+
+static void gpy_update_interface(struct phy_device *phydev)
+{
+ int ret;
+
+ /* Interface mode is fixed for USXGMII and integrated PHY */
+ if (phydev->interface == PHY_INTERFACE_MODE_USXGMII ||
+ phydev->interface == PHY_INTERFACE_MODE_INTERNAL)
+ return;
+
+ /* Automatically switch SERDES interface between SGMII and 2500-BaseX
+ * according to speed. Disable ANEG in 2500-BaseX mode.
+ */
+ switch (phydev->speed) {
+ case SPEED_2500:
+ phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
+ ret = phy_modify_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL,
+ VSPEC1_SGMII_CTRL_ANEN, 0);
+ if (ret < 0)
+ phydev_err(phydev,
+ "Error: Disable of SGMII ANEG failed: %d\n",
+ ret);
+ break;
+ case SPEED_1000:
+ case SPEED_100:
+ case SPEED_10:
+ phydev->interface = PHY_INTERFACE_MODE_SGMII;
+ if (gpy_sgmii_aneg_en(phydev))
+ break;
+ /* Enable and restart SGMII ANEG for 10/100/1000Mbps link speed
+ * if ANEG is disabled (in 2500-BaseX mode).
+ */
+ ret = phy_modify_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL,
+ VSPEC1_SGMII_ANEN_ANRS,
+ VSPEC1_SGMII_ANEN_ANRS);
+ if (ret < 0)
+ phydev_err(phydev,
+ "Error: Enable of SGMII ANEG failed: %d\n",
+ ret);
+ break;
+ }
+}
+
+static int gpy_read_status(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = genphy_update_link(phydev);
+ if (ret)
+ return ret;
+
+ phydev->speed = SPEED_UNKNOWN;
+ phydev->duplex = DUPLEX_UNKNOWN;
+ phydev->pause = 0;
+ phydev->asym_pause = 0;
+
+ if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
+ ret = genphy_c45_read_lpa(phydev);
+ if (ret < 0)
+ return ret;
+
+ /* Read the link partner's 1G advertisement */
+ ret = phy_read(phydev, MII_STAT1000);
+ if (ret < 0)
+ return ret;
+ mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, ret);
+ } else if (phydev->autoneg == AUTONEG_DISABLE) {
+ linkmode_zero(phydev->lp_advertising);
+ }
+
+ ret = phy_read(phydev, PHY_MIISTAT);
+ if (ret < 0)
+ return ret;
+
+ phydev->link = (ret & PHY_MIISTAT_LS) ? 1 : 0;
+ phydev->duplex = (ret & PHY_MIISTAT_DPX) ? DUPLEX_FULL : DUPLEX_HALF;
+ switch (FIELD_GET(PHY_MIISTAT_SPD_MASK, ret)) {
+ case PHY_MIISTAT_SPD_10:
+ phydev->speed = SPEED_10;
+ break;
+ case PHY_MIISTAT_SPD_100:
+ phydev->speed = SPEED_100;
+ break;
+ case PHY_MIISTAT_SPD_1000:
+ phydev->speed = SPEED_1000;
+ break;
+ case PHY_MIISTAT_SPD_2500:
+ phydev->speed = SPEED_2500;
+ break;
+ }
+
+ if (phydev->link)
+ gpy_update_interface(phydev);
+
+ return 0;
+}
+
+static int gpy_config_intr(struct phy_device *phydev)
+{
+ u16 mask = 0;
+
+ if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
+ mask = PHY_IMASK_MASK;
+
+ return phy_write(phydev, PHY_IMASK, mask);
+}
+
+static irqreturn_t gpy_handle_interrupt(struct phy_device *phydev)
+{
+ int reg;
+
+ reg = phy_read(phydev, PHY_ISTAT);
+ if (reg < 0) {
+ phy_error(phydev);
+ return IRQ_NONE;
+ }
+
+ if (!(reg & PHY_IMASK_MASK))
+ return IRQ_NONE;
+
+ phy_trigger_machine(phydev);
+
+ return IRQ_HANDLED;
+}
+
+static int gpy_set_wol(struct phy_device *phydev,
+ struct ethtool_wolinfo *wol)
+{
+ struct net_device *attach_dev = phydev->attached_dev;
+ int ret;
+
+ if (wol->wolopts & WAKE_MAGIC) {
+ /* MAC address - Byte0:Byte1:Byte2:Byte3:Byte4:Byte5
+ * VPSPEC2_WOL_AD45 = Byte0:Byte1
+ * VPSPEC2_WOL_AD23 = Byte2:Byte3
+ * VPSPEC2_WOL_AD01 = Byte4:Byte5
+ */
+ ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2,
+ VPSPEC2_WOL_AD45,
+ ((attach_dev->dev_addr[0] << 8) |
+ attach_dev->dev_addr[1]));
+ if (ret < 0)
+ return ret;
+
+ ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2,
+ VPSPEC2_WOL_AD23,
+ ((attach_dev->dev_addr[2] << 8) |
+ attach_dev->dev_addr[3]));
+ if (ret < 0)
+ return ret;
+
+ ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2,
+ VPSPEC2_WOL_AD01,
+ ((attach_dev->dev_addr[4] << 8) |
+ attach_dev->dev_addr[5]));
+ if (ret < 0)
+ return ret;
+
+ /* Enable the WOL interrupt */
+ ret = phy_write(phydev, PHY_IMASK, PHY_IMASK_WOL);
+ if (ret < 0)
+ return ret;
+
+ /* Enable magic packet matching */
+ ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2,
+ VPSPEC2_WOL_CTL,
+ WOL_EN);
+ if (ret < 0)
+ return ret;
+
+ /* Clear the interrupt status register.
+ * Only WoL is enabled so clear all.
+ */
+ ret = phy_read(phydev, PHY_ISTAT);
+ if (ret < 0)
+ return ret;
+ } else {
+ /* Disable magic packet matching */
+ ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2,
+ VPSPEC2_WOL_CTL,
+ WOL_EN);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (wol->wolopts & WAKE_PHY) {
+ /* Enable the link state change interrupt */
+ ret = phy_set_bits(phydev, PHY_IMASK, PHY_IMASK_LSTC);
+ if (ret < 0)
+ return ret;
+
+ /* Clear the interrupt status register */
+ ret = phy_read(phydev, PHY_ISTAT);
+ if (ret < 0)
+ return ret;
+
+ if (ret & (PHY_IMASK_MASK & ~PHY_IMASK_LSTC))
+ phy_trigger_machine(phydev);
+
+ return 0;
+ }
+
+ /* Disable the link state change interrupt */
+ return phy_clear_bits(phydev, PHY_IMASK, PHY_IMASK_LSTC);
+}
+
+static void gpy_get_wol(struct phy_device *phydev,
+ struct ethtool_wolinfo *wol)
+{
+ int ret;
+
+ wol->supported = WAKE_MAGIC | WAKE_PHY;
+ wol->wolopts = 0;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, VPSPEC2_WOL_CTL);
+ if (ret & WOL_EN)
+ wol->wolopts |= WAKE_MAGIC;
+
+ ret = phy_read(phydev, PHY_IMASK);
+ if (ret & PHY_IMASK_LSTC)
+ wol->wolopts |= WAKE_PHY;
+}
+
+static int gpy_loopback(struct phy_device *phydev, bool enable)
+{
+ int ret;
+
+ ret = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK,
+ enable ? BMCR_LOOPBACK : 0);
+ if (!ret) {
+ /* It takes some time for PHY device to switch
+ * into/out-of loopback mode.
+ */
+ msleep(100);
+ }
+
+ return ret;
+}
+
+static struct phy_driver gpy_drivers[] = {
+ {
+ PHY_ID_MATCH_MODEL(PHY_ID_GPY2xx),
+ .name = "Maxlinear Ethernet GPY2xx",
+ .get_features = genphy_c45_pma_read_abilities,
+ .config_init = gpy_config_init,
+ .probe = gpy_probe,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .config_aneg = gpy_config_aneg,
+ .aneg_done = genphy_c45_aneg_done,
+ .read_status = gpy_read_status,
+ .config_intr = gpy_config_intr,
+ .handle_interrupt = gpy_handle_interrupt,
+ .set_wol = gpy_set_wol,
+ .get_wol = gpy_get_wol,
+ .set_loopback = gpy_loopback,
+ },
+ {
+ .phy_id = PHY_ID_GPY115B,
+ .phy_id_mask = PHY_ID_GPYx15B_MASK,
+ .name = "Maxlinear Ethernet GPY115B",
+ .get_features = genphy_c45_pma_read_abilities,
+ .config_init = gpy_config_init,
+ .probe = gpy_probe,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .config_aneg = gpy_config_aneg,
+ .aneg_done = genphy_c45_aneg_done,
+ .read_status = gpy_read_status,
+ .config_intr = gpy_config_intr,
+ .handle_interrupt = gpy_handle_interrupt,
+ .set_wol = gpy_set_wol,
+ .get_wol = gpy_get_wol,
+ .set_loopback = gpy_loopback,
+ },
+ {
+ PHY_ID_MATCH_MODEL(PHY_ID_GPY115C),
+ .name = "Maxlinear Ethernet GPY115C",
+ .get_features = genphy_c45_pma_read_abilities,
+ .config_init = gpy_config_init,
+ .probe = gpy_probe,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .config_aneg = gpy_config_aneg,
+ .aneg_done = genphy_c45_aneg_done,
+ .read_status = gpy_read_status,
+ .config_intr = gpy_config_intr,
+ .handle_interrupt = gpy_handle_interrupt,
+ .set_wol = gpy_set_wol,
+ .get_wol = gpy_get_wol,
+ .set_loopback = gpy_loopback,
+ },
+ {
+ .phy_id = PHY_ID_GPY211B,
+ .phy_id_mask = PHY_ID_GPY21xB_MASK,
+ .name = "Maxlinear Ethernet GPY211B",
+ .get_features = genphy_c45_pma_read_abilities,
+ .config_init = gpy_config_init,
+ .probe = gpy_probe,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .config_aneg = gpy_config_aneg,
+ .aneg_done = genphy_c45_aneg_done,
+ .read_status = gpy_read_status,
+ .config_intr = gpy_config_intr,
+ .handle_interrupt = gpy_handle_interrupt,
+ .set_wol = gpy_set_wol,
+ .get_wol = gpy_get_wol,
+ .set_loopback = gpy_loopback,
+ },
+ {
+ PHY_ID_MATCH_MODEL(PHY_ID_GPY211C),
+ .name = "Maxlinear Ethernet GPY211C",
+ .get_features = genphy_c45_pma_read_abilities,
+ .config_init = gpy_config_init,
+ .probe = gpy_probe,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .config_aneg = gpy_config_aneg,
+ .aneg_done = genphy_c45_aneg_done,
+ .read_status = gpy_read_status,
+ .config_intr = gpy_config_intr,
+ .handle_interrupt = gpy_handle_interrupt,
+ .set_wol = gpy_set_wol,
+ .get_wol = gpy_get_wol,
+ .set_loopback = gpy_loopback,
+ },
+ {
+ .phy_id = PHY_ID_GPY212B,
+ .phy_id_mask = PHY_ID_GPY21xB_MASK,
+ .name = "Maxlinear Ethernet GPY212B",
+ .get_features = genphy_c45_pma_read_abilities,
+ .config_init = gpy_config_init,
+ .probe = gpy_probe,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .config_aneg = gpy_config_aneg,
+ .aneg_done = genphy_c45_aneg_done,
+ .read_status = gpy_read_status,
+ .config_intr = gpy_config_intr,
+ .handle_interrupt = gpy_handle_interrupt,
+ .set_wol = gpy_set_wol,
+ .get_wol = gpy_get_wol,
+ .set_loopback = gpy_loopback,
+ },
+ {
+ PHY_ID_MATCH_MODEL(PHY_ID_GPY212C),
+ .name = "Maxlinear Ethernet GPY212C",
+ .get_features = genphy_c45_pma_read_abilities,
+ .config_init = gpy_config_init,
+ .probe = gpy_probe,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .config_aneg = gpy_config_aneg,
+ .aneg_done = genphy_c45_aneg_done,
+ .read_status = gpy_read_status,
+ .config_intr = gpy_config_intr,
+ .handle_interrupt = gpy_handle_interrupt,
+ .set_wol = gpy_set_wol,
+ .get_wol = gpy_get_wol,
+ .set_loopback = gpy_loopback,
+ },
+ {
+ .phy_id = PHY_ID_GPY215B,
+ .phy_id_mask = PHY_ID_GPYx15B_MASK,
+ .name = "Maxlinear Ethernet GPY215B",
+ .get_features = genphy_c45_pma_read_abilities,
+ .config_init = gpy_config_init,
+ .probe = gpy_probe,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .config_aneg = gpy_config_aneg,
+ .aneg_done = genphy_c45_aneg_done,
+ .read_status = gpy_read_status,
+ .config_intr = gpy_config_intr,
+ .handle_interrupt = gpy_handle_interrupt,
+ .set_wol = gpy_set_wol,
+ .get_wol = gpy_get_wol,
+ .set_loopback = gpy_loopback,
+ },
+ {
+ PHY_ID_MATCH_MODEL(PHY_ID_GPY215C),
+ .name = "Maxlinear Ethernet GPY215C",
+ .get_features = genphy_c45_pma_read_abilities,
+ .config_init = gpy_config_init,
+ .probe = gpy_probe,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .config_aneg = gpy_config_aneg,
+ .aneg_done = genphy_c45_aneg_done,
+ .read_status = gpy_read_status,
+ .config_intr = gpy_config_intr,
+ .handle_interrupt = gpy_handle_interrupt,
+ .set_wol = gpy_set_wol,
+ .get_wol = gpy_get_wol,
+ .set_loopback = gpy_loopback,
+ },
+ {
+ PHY_ID_MATCH_MODEL(PHY_ID_GPY241B),
+ .name = "Maxlinear Ethernet GPY241B",
+ .get_features = genphy_c45_pma_read_abilities,
+ .config_init = gpy_config_init,
+ .probe = gpy_probe,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .config_aneg = gpy_config_aneg,
+ .aneg_done = genphy_c45_aneg_done,
+ .read_status = gpy_read_status,
+ .config_intr = gpy_config_intr,
+ .handle_interrupt = gpy_handle_interrupt,
+ .set_wol = gpy_set_wol,
+ .get_wol = gpy_get_wol,
+ .set_loopback = gpy_loopback,
+ },
+ {
+ PHY_ID_MATCH_MODEL(PHY_ID_GPY241BM),
+ .name = "Maxlinear Ethernet GPY241BM",
+ .get_features = genphy_c45_pma_read_abilities,
+ .config_init = gpy_config_init,
+ .probe = gpy_probe,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .config_aneg = gpy_config_aneg,
+ .aneg_done = genphy_c45_aneg_done,
+ .read_status = gpy_read_status,
+ .config_intr = gpy_config_intr,
+ .handle_interrupt = gpy_handle_interrupt,
+ .set_wol = gpy_set_wol,
+ .get_wol = gpy_get_wol,
+ .set_loopback = gpy_loopback,
+ },
+ {
+ PHY_ID_MATCH_MODEL(PHY_ID_GPY245B),
+ .name = "Maxlinear Ethernet GPY245B",
+ .get_features = genphy_c45_pma_read_abilities,
+ .config_init = gpy_config_init,
+ .probe = gpy_probe,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .config_aneg = gpy_config_aneg,
+ .aneg_done = genphy_c45_aneg_done,
+ .read_status = gpy_read_status,
+ .config_intr = gpy_config_intr,
+ .handle_interrupt = gpy_handle_interrupt,
+ .set_wol = gpy_set_wol,
+ .get_wol = gpy_get_wol,
+ .set_loopback = gpy_loopback,
+ },
+};
+module_phy_driver(gpy_drivers);
+
+static struct mdio_device_id __maybe_unused gpy_tbl[] = {
+ {PHY_ID_MATCH_MODEL(PHY_ID_GPY2xx)},
+ {PHY_ID_GPY115B, PHY_ID_GPYx15B_MASK},
+ {PHY_ID_MATCH_MODEL(PHY_ID_GPY115C)},
+ {PHY_ID_GPY211B, PHY_ID_GPY21xB_MASK},
+ {PHY_ID_MATCH_MODEL(PHY_ID_GPY211C)},
+ {PHY_ID_GPY212B, PHY_ID_GPY21xB_MASK},
+ {PHY_ID_MATCH_MODEL(PHY_ID_GPY212C)},
+ {PHY_ID_GPY215B, PHY_ID_GPYx15B_MASK},
+ {PHY_ID_MATCH_MODEL(PHY_ID_GPY215C)},
+ {PHY_ID_MATCH_MODEL(PHY_ID_GPY241B)},
+ {PHY_ID_MATCH_MODEL(PHY_ID_GPY241BM)},
+ {PHY_ID_MATCH_MODEL(PHY_ID_GPY245B)},
+ { }
+};
+MODULE_DEVICE_TABLE(mdio, gpy_tbl);
+
+MODULE_DESCRIPTION("Maxlinear Ethernet GPY Driver");
+MODULE_AUTHOR("Xu Liang");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c
index afd7afa1f498..9944cc501806 100644
--- a/drivers/net/phy/nxp-tja11xx.c
+++ b/drivers/net/phy/nxp-tja11xx.c
@@ -47,12 +47,14 @@
#define MII_INTSRC_LINK_FAIL BIT(10)
#define MII_INTSRC_LINK_UP BIT(9)
#define MII_INTSRC_MASK (MII_INTSRC_LINK_FAIL | MII_INTSRC_LINK_UP)
-#define MII_INTSRC_TEMP_ERR BIT(1)
#define MII_INTSRC_UV_ERR BIT(3)
+#define MII_INTSRC_TEMP_ERR BIT(1)
#define MII_INTEN 22
#define MII_INTEN_LINK_FAIL BIT(10)
#define MII_INTEN_LINK_UP BIT(9)
+#define MII_INTEN_UV_ERR BIT(3)
+#define MII_INTEN_TEMP_ERR BIT(1)
#define MII_COMMSTAT 23
#define MII_COMMSTAT_LINK_UP BIT(15)
@@ -607,7 +609,8 @@ static int tja11xx_config_intr(struct phy_device *phydev)
if (err)
return err;
- value = MII_INTEN_LINK_FAIL | MII_INTEN_LINK_UP;
+ value = MII_INTEN_LINK_FAIL | MII_INTEN_LINK_UP |
+ MII_INTEN_UV_ERR | MII_INTEN_TEMP_ERR;
err = phy_write(phydev, MII_INTEN, value);
} else {
err = phy_write(phydev, MII_INTEN, value);
@@ -622,6 +625,7 @@ static int tja11xx_config_intr(struct phy_device *phydev)
static irqreturn_t tja11xx_handle_interrupt(struct phy_device *phydev)
{
+ struct device *dev = &phydev->mdio.dev;
int irq_status;
irq_status = phy_read(phydev, MII_INTSRC);
@@ -630,6 +634,11 @@ static irqreturn_t tja11xx_handle_interrupt(struct phy_device *phydev)
return IRQ_NONE;
}
+ if (irq_status & MII_INTSRC_TEMP_ERR)
+ dev_warn(dev, "Overtemperature error detected (temp > 155C°).\n");
+ if (irq_status & MII_INTSRC_UV_ERR)
+ dev_warn(dev, "Undervoltage error detected.\n");
+
if (!(irq_status & MII_INTSRC_MASK))
return IRQ_NONE;
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 8eeb26d8aeb7..f124a8a58bd4 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -426,7 +426,7 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
EXPORT_SYMBOL(phy_mii_ioctl);
/**
- * phy_do_ioctl - generic ndo_do_ioctl implementation
+ * phy_do_ioctl - generic ndo_eth_ioctl implementation
* @dev: the net_device struct
* @ifr: &struct ifreq for socket ioctl's
* @cmd: ioctl cmd to execute
@@ -441,7 +441,7 @@ int phy_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
EXPORT_SYMBOL(phy_do_ioctl);
/**
- * phy_do_ioctl_running - generic ndo_do_ioctl implementation but test first
+ * phy_do_ioctl_running - generic ndo_eth_ioctl implementation but test first
*
* @dev: the net_device struct
* @ifr: &struct ifreq for socket ioctl's
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 5d5f9a9ee768..9e2891d8e8dd 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -233,11 +233,9 @@ static DEFINE_MUTEX(phy_fixup_lock);
static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
{
- struct device_driver *drv = phydev->mdio.dev.driver;
- struct phy_driver *phydrv = to_phy_driver(drv);
struct net_device *netdev = phydev->attached_dev;
- if (!drv || !phydrv->suspend)
+ if (!phydev->drv->suspend)
return false;
/* PHY not attached? May suspend if the PHY has not already been
@@ -969,6 +967,20 @@ void phy_device_remove(struct phy_device *phydev)
EXPORT_SYMBOL(phy_device_remove);
/**
+ * phy_get_c45_ids - Read 802.3-c45 IDs for phy device.
+ * @phydev: phy_device structure to read 802.3-c45 IDs
+ *
+ * Returns zero on success, %-EIO on bus access error, or %-ENODEV if
+ * the "devices in package" is invalid.
+ */
+int phy_get_c45_ids(struct phy_device *phydev)
+{
+ return get_phy_c45_ids(phydev->mdio.bus, phydev->mdio.addr,
+ &phydev->c45_ids);
+}
+EXPORT_SYMBOL(phy_get_c45_ids);
+
+/**
* phy_find_first - finds the first PHY device on the bus
* @bus: the target MII bus
*/
@@ -1807,11 +1819,10 @@ EXPORT_SYMBOL(phy_resume);
int phy_loopback(struct phy_device *phydev, bool enable)
{
- struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver);
int ret = 0;
- if (!phydrv)
- return -ENODEV;
+ if (!phydev->drv)
+ return -EIO;
mutex_lock(&phydev->lock);
@@ -1825,8 +1836,8 @@ int phy_loopback(struct phy_device *phydev, bool enable)
goto out;
}
- if (phydrv->set_loopback)
- ret = phydrv->set_loopback(phydev, enable);
+ if (phydev->drv->set_loopback)
+ ret = phydev->drv->set_loopback(phydev, enable);
else
ret = genphy_loopback(phydev, enable);
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index eb29ef53d971..2cdf9f989dec 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -942,10 +942,11 @@ static void phylink_phy_change(struct phy_device *phydev, bool up)
phylink_run_resolve(pl);
- phylink_dbg(pl, "phy link %s %s/%s/%s\n", up ? "up" : "down",
+ phylink_dbg(pl, "phy link %s %s/%s/%s/%s\n", up ? "up" : "down",
phy_modes(phydev->interface),
phy_speed_to_str(phydev->speed),
- phy_duplex_to_str(phydev->duplex));
+ phy_duplex_to_str(phydev->duplex),
+ phylink_pause_to_str(pl->phy_state.pause));
}
static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy,
@@ -1457,15 +1458,11 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
return phy_ethtool_ksettings_set(pl->phydev, kset);
}
- linkmode_copy(support, pl->supported);
config = pl->link_config;
- config.an_enabled = kset->base.autoneg == AUTONEG_ENABLE;
- /* Mask out unsupported advertisements, and force the autoneg bit */
+ /* Mask out unsupported advertisements */
linkmode_and(config.advertising, kset->link_modes.advertising,
- support);
- linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising,
- config.an_enabled);
+ pl->supported);
/* FIXME: should we reject autoneg if phy/mac does not support it? */
switch (kset->base.autoneg) {
@@ -1474,7 +1471,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
* duplex.
*/
s = phy_lookup_setting(kset->base.speed, kset->base.duplex,
- support, false);
+ pl->supported, false);
if (!s)
return -EINVAL;
@@ -1515,6 +1512,12 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
/* We have ruled out the case with a PHY attached, and the
* fixed-link cases. All that is left are in-band links.
*/
+ config.an_enabled = kset->base.autoneg == AUTONEG_ENABLE;
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising,
+ config.an_enabled);
+
+ /* Validate without changing the current supported mask. */
+ linkmode_copy(support, pl->supported);
if (phylink_validate(pl, support, &config))
return -EINVAL;
diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c
index 151c2a3f0b3a..8dcb49ed1f3d 100644
--- a/drivers/net/phy/xilinx_gmii2rgmii.c
+++ b/drivers/net/phy/xilinx_gmii2rgmii.c
@@ -27,12 +27,28 @@ struct gmii2rgmii {
struct mdio_device *mdio;
};
-static int xgmiitorgmii_read_status(struct phy_device *phydev)
+static void xgmiitorgmii_configure(struct gmii2rgmii *priv, int speed)
{
- struct gmii2rgmii *priv = mdiodev_get_drvdata(&phydev->mdio);
struct mii_bus *bus = priv->mdio->bus;
int addr = priv->mdio->addr;
- u16 val = 0;
+ u16 val;
+
+ val = mdiobus_read(bus, addr, XILINX_GMII2RGMII_REG);
+ val &= ~XILINX_GMII2RGMII_SPEED_MASK;
+
+ if (speed == SPEED_1000)
+ val |= BMCR_SPEED1000;
+ else if (speed == SPEED_100)
+ val |= BMCR_SPEED100;
+ else
+ val |= BMCR_SPEED10;
+
+ mdiobus_write(bus, addr, XILINX_GMII2RGMII_REG, val);
+}
+
+static int xgmiitorgmii_read_status(struct phy_device *phydev)
+{
+ struct gmii2rgmii *priv = mdiodev_get_drvdata(&phydev->mdio);
int err;
if (priv->phy_drv->read_status)
@@ -42,17 +58,24 @@ static int xgmiitorgmii_read_status(struct phy_device *phydev)
if (err < 0)
return err;
- val = mdiobus_read(bus, addr, XILINX_GMII2RGMII_REG);
- val &= ~XILINX_GMII2RGMII_SPEED_MASK;
+ xgmiitorgmii_configure(priv, phydev->speed);
- if (phydev->speed == SPEED_1000)
- val |= BMCR_SPEED1000;
- else if (phydev->speed == SPEED_100)
- val |= BMCR_SPEED100;
+ return 0;
+}
+
+static int xgmiitorgmii_set_loopback(struct phy_device *phydev, bool enable)
+{
+ struct gmii2rgmii *priv = mdiodev_get_drvdata(&phydev->mdio);
+ int err;
+
+ if (priv->phy_drv->set_loopback)
+ err = priv->phy_drv->set_loopback(phydev, enable);
else
- val |= BMCR_SPEED10;
+ err = genphy_loopback(phydev, enable);
+ if (err < 0)
+ return err;
- mdiobus_write(bus, addr, XILINX_GMII2RGMII_REG, val);
+ xgmiitorgmii_configure(priv, phydev->speed);
return 0;
}
@@ -90,6 +113,7 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev)
memcpy(&priv->conv_phy_drv, priv->phy_dev->drv,
sizeof(struct phy_driver));
priv->conv_phy_drv.read_status = xgmiitorgmii_read_status;
+ priv->conv_phy_drv.set_loopback = xgmiitorgmii_set_loopback;
mdiodev_set_drvdata(&priv->phy_dev->mdio, priv);
priv->phy_dev->drv = &priv->conv_phy_drv;
diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c
index e26cf91bdec2..82d609401711 100644
--- a/drivers/net/plip/plip.c
+++ b/drivers/net/plip/plip.c
@@ -84,6 +84,7 @@ static const char version[] = "NET3 PLIP version 2.4-parport gniibe@mri.co.jp\n"
extra grounds are 18,19,20,21,22,23,24
*/
+#include <linux/compat.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/types.h>
@@ -150,7 +151,8 @@ static int plip_hard_header_cache(const struct neighbour *neigh,
struct hh_cache *hh, __be16 type);
static int plip_open(struct net_device *dev);
static int plip_close(struct net_device *dev);
-static int plip_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+static int plip_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd);
static int plip_preempt(void *handle);
static void plip_wakeup(void *handle);
@@ -265,7 +267,7 @@ static const struct net_device_ops plip_netdev_ops = {
.ndo_open = plip_open,
.ndo_stop = plip_close,
.ndo_start_xmit = plip_tx_packet,
- .ndo_do_ioctl = plip_ioctl,
+ .ndo_siocdevprivate = plip_siocdevprivate,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
};
@@ -1207,7 +1209,8 @@ plip_wakeup(void *handle)
}
static int
-plip_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+plip_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+ void __user *data, int cmd)
{
struct net_local *nl = netdev_priv(dev);
struct plipconf *pc = (struct plipconf *) &rq->ifr_ifru;
@@ -1215,6 +1218,9 @@ plip_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
if (cmd != SIOCDEVPLIP)
return -EOPNOTSUPP;
+ if (in_compat_syscall())
+ return -EOPNOTSUPP;
+
switch(pc->pcmd) {
case PLIP_GET_TIMEOUT:
pc->trigger = nl->trigger;
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 930e49ef15f6..fb52cd175b45 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -284,7 +284,7 @@ static struct channel *ppp_find_channel(struct ppp_net *pn, int unit);
static int ppp_connect_channel(struct channel *pch, int unit);
static int ppp_disconnect_channel(struct channel *pch);
static void ppp_destroy_channel(struct channel *pch);
-static int unit_get(struct idr *p, void *ptr);
+static int unit_get(struct idr *p, void *ptr, int min);
static int unit_set(struct idr *p, void *ptr, int n);
static void unit_put(struct idr *p, int n);
static void *unit_find(struct idr *p, int n);
@@ -1155,9 +1155,20 @@ static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set)
mutex_lock(&pn->all_ppp_mutex);
if (unit < 0) {
- ret = unit_get(&pn->units_idr, ppp);
+ ret = unit_get(&pn->units_idr, ppp, 0);
if (ret < 0)
goto err;
+ if (!ifname_is_set) {
+ while (1) {
+ snprintf(ppp->dev->name, IFNAMSIZ, "ppp%i", ret);
+ if (!__dev_get_by_name(ppp->ppp_net, ppp->dev->name))
+ break;
+ unit_put(&pn->units_idr, ret);
+ ret = unit_get(&pn->units_idr, ppp, ret + 1);
+ if (ret < 0)
+ goto err;
+ }
+ }
} else {
/* Caller asked for a specific unit number. Fail with -EEXIST
* if unavailable. For backward compatibility, return -EEXIST
@@ -1306,7 +1317,7 @@ static int ppp_nl_newlink(struct net *src_net, struct net_device *dev,
* the PPP unit identifer as suffix (i.e. ppp<unit_id>). This allows
* userspace to infer the device name using to the PPPIOCGUNIT ioctl.
*/
- if (!tb[IFLA_IFNAME])
+ if (!tb[IFLA_IFNAME] || !nla_len(tb[IFLA_IFNAME]) || !*(char *)nla_data(tb[IFLA_IFNAME]))
conf.ifname_is_set = false;
err = ppp_dev_configure(src_net, dev, &conf);
@@ -1452,11 +1463,11 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev)
}
static int
-ppp_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ppp_net_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *addr, int cmd)
{
struct ppp *ppp = netdev_priv(dev);
int err = -EFAULT;
- void __user *addr = (void __user *) ifr->ifr_ifru.ifru_data;
struct ppp_stats stats;
struct ppp_comp_stats cstats;
char *vers;
@@ -1585,7 +1596,7 @@ static const struct net_device_ops ppp_netdev_ops = {
.ndo_init = ppp_dev_init,
.ndo_uninit = ppp_dev_uninit,
.ndo_start_xmit = ppp_start_xmit,
- .ndo_do_ioctl = ppp_net_ioctl,
+ .ndo_siocdevprivate = ppp_net_siocdevprivate,
.ndo_get_stats64 = ppp_get_stats64,
.ndo_fill_forward_path = ppp_fill_forward_path,
};
@@ -1733,7 +1744,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
a four-byte PPP header on each packet */
*(u8 *)skb_push(skb, 2) = 1;
if (ppp->pass_filter &&
- BPF_PROG_RUN(ppp->pass_filter, skb) == 0) {
+ bpf_prog_run(ppp->pass_filter, skb) == 0) {
if (ppp->debug & 1)
netdev_printk(KERN_DEBUG, ppp->dev,
"PPP: outbound frame "
@@ -1743,7 +1754,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
}
/* if this packet passes the active filter, record the time */
if (!(ppp->active_filter &&
- BPF_PROG_RUN(ppp->active_filter, skb) == 0))
+ bpf_prog_run(ppp->active_filter, skb) == 0))
ppp->last_xmit = jiffies;
skb_pull(skb, 2);
#else
@@ -2457,7 +2468,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
*(u8 *)skb_push(skb, 2) = 0;
if (ppp->pass_filter &&
- BPF_PROG_RUN(ppp->pass_filter, skb) == 0) {
+ bpf_prog_run(ppp->pass_filter, skb) == 0) {
if (ppp->debug & 1)
netdev_printk(KERN_DEBUG, ppp->dev,
"PPP: inbound frame "
@@ -2466,7 +2477,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
return;
}
if (!(ppp->active_filter &&
- BPF_PROG_RUN(ppp->active_filter, skb) == 0))
+ bpf_prog_run(ppp->active_filter, skb) == 0))
ppp->last_recv = jiffies;
__skb_pull(skb, 2);
} else
@@ -3552,9 +3563,9 @@ static int unit_set(struct idr *p, void *ptr, int n)
}
/* get new free unit number and associate pointer with it */
-static int unit_get(struct idr *p, void *ptr)
+static int unit_get(struct idr *p, void *ptr, int min)
{
- return idr_alloc(p, ptr, 0, 0, GFP_KERNEL);
+ return idr_alloc(p, ptr, min, 0, GFP_KERNEL);
}
/* put unit number back to a pool */
diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c
index e88af978f63c..f01c9db01b16 100644
--- a/drivers/net/sb1000.c
+++ b/drivers/net/sb1000.c
@@ -78,7 +78,8 @@ struct sb1000_private {
/* prototypes for Linux interface */
extern int sb1000_probe(struct net_device *dev);
static int sb1000_open(struct net_device *dev);
-static int sb1000_dev_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd);
+static int sb1000_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd);
static netdev_tx_t sb1000_start_xmit(struct sk_buff *skb,
struct net_device *dev);
static irqreturn_t sb1000_interrupt(int irq, void *dev_id);
@@ -135,7 +136,7 @@ MODULE_DEVICE_TABLE(pnp, sb1000_pnp_ids);
static const struct net_device_ops sb1000_netdev_ops = {
.ndo_open = sb1000_open,
.ndo_start_xmit = sb1000_start_xmit,
- .ndo_do_ioctl = sb1000_dev_ioctl,
+ .ndo_siocdevprivate = sb1000_siocdevprivate,
.ndo_stop = sb1000_close,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
@@ -987,7 +988,8 @@ sb1000_open(struct net_device *dev)
return 0; /* Always succeed */
}
-static int sb1000_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int sb1000_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
char* name;
unsigned char version[2];
@@ -1011,7 +1013,7 @@ static int sb1000_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
stats[2] = dev->stats.rx_packets;
stats[3] = dev->stats.rx_errors;
stats[4] = dev->stats.rx_dropped;
- if(copy_to_user(ifr->ifr_data, stats, sizeof(stats)))
+ if (copy_to_user(data, stats, sizeof(stats)))
return -EFAULT;
status = 0;
break;
@@ -1019,21 +1021,21 @@ static int sb1000_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCGCMFIRMWARE: /* get firmware version */
if ((status = sb1000_get_firmware_version(ioaddr, name, version, 1)))
return status;
- if(copy_to_user(ifr->ifr_data, version, sizeof(version)))
+ if (copy_to_user(data, version, sizeof(version)))
return -EFAULT;
break;
case SIOCGCMFREQUENCY: /* get frequency */
if ((status = sb1000_get_frequency(ioaddr, name, &frequency)))
return status;
- if(put_user(frequency, (int __user *) ifr->ifr_data))
+ if (put_user(frequency, (int __user *)data))
return -EFAULT;
break;
case SIOCSCMFREQUENCY: /* set frequency */
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- if(get_user(frequency, (int __user *) ifr->ifr_data))
+ if (get_user(frequency, (int __user *)data))
return -EFAULT;
if ((status = sb1000_set_frequency(ioaddr, name, frequency)))
return status;
@@ -1042,14 +1044,14 @@ static int sb1000_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCGCMPIDS: /* get PIDs */
if ((status = sb1000_get_PIDs(ioaddr, name, PID)))
return status;
- if(copy_to_user(ifr->ifr_data, PID, sizeof(PID)))
+ if (copy_to_user(data, PID, sizeof(PID)))
return -EFAULT;
break;
case SIOCSCMPIDS: /* set PIDs */
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- if(copy_from_user(PID, ifr->ifr_data, sizeof(PID)))
+ if (copy_from_user(PID, data, sizeof(PID)))
return -EFAULT;
if ((status = sb1000_set_PIDs(ioaddr, name, PID)))
return status;
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index dc84cb844319..5435b5689ce6 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -62,6 +62,7 @@
*/
#define SL_CHECK_TRANSMIT
+#include <linux/compat.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -108,7 +109,7 @@ static void slip_unesc6(struct slip *sl, unsigned char c);
#ifdef CONFIG_SLIP_SMART
static void sl_keepalive(struct timer_list *t);
static void sl_outfill(struct timer_list *t);
-static int sl_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static int sl_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd);
#endif
/********************************
@@ -647,7 +648,7 @@ static const struct net_device_ops sl_netdev_ops = {
.ndo_change_mtu = sl_change_mtu,
.ndo_tx_timeout = sl_tx_timeout,
#ifdef CONFIG_SLIP_SMART
- .ndo_do_ioctl = sl_ioctl,
+ .ndo_siocdevprivate = sl_siocdevprivate,
#endif
};
@@ -1179,11 +1180,12 @@ static int slip_ioctl(struct tty_struct *tty, struct file *file,
/* VSV changes start here */
#ifdef CONFIG_SLIP_SMART
-/* function do_ioctl called from net/core/dev.c
+/* function sl_siocdevprivate called from net/core/dev.c
to allow get/set outfill/keepalive parameter
by ifconfig */
-static int sl_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int sl_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+ void __user *data, int cmd)
{
struct slip *sl = netdev_priv(dev);
unsigned long *p = (unsigned long *)&rq->ifr_ifru;
@@ -1191,6 +1193,9 @@ static int sl_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
if (sl == NULL) /* Allocation failed ?? */
return -ENODEV;
+ if (in_compat_syscall())
+ return -EOPNOTSUPP;
+
spin_lock_bh(&sl->lock);
if (!sl->tty) {
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index 32aef8ac4a14..b095a4b4957b 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -197,7 +197,7 @@ static unsigned int lb_get_skb_hash(struct lb_priv *lb_priv,
fp = rcu_dereference_bh(lb_priv->fp);
if (unlikely(!fp))
return 0;
- lhash = BPF_PROG_RUN(fp, skb);
+ lhash = bpf_prog_run(fp, skb);
c = (char *) &lhash;
return c[0] ^ c[1] ^ c[2] ^ c[3];
}
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2ced021f4faf..fecc9a1d293a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -3510,7 +3510,9 @@ static void tun_set_msglevel(struct net_device *dev, u32 value)
}
static int tun_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct tun_struct *tun = netdev_priv(dev);
@@ -3520,7 +3522,9 @@ static int tun_get_coalesce(struct net_device *dev,
}
static int tun_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct tun_struct *tun = netdev_priv(dev);
diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h
index e1994a246122..2a1e31defe71 100644
--- a/drivers/net/usb/asix.h
+++ b/drivers/net/usb/asix.h
@@ -184,6 +184,7 @@ struct asix_common_private {
struct phy_device *phydev;
u16 phy_addr;
char phy_name[20];
+ bool embd_phy;
};
extern const struct driver_info ax88172a_info;
diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index ac92bc52a85e..38cda590895c 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -63,6 +63,29 @@ void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index,
value, index, data, size);
}
+static int asix_check_host_enable(struct usbnet *dev, int in_pm)
+{
+ int i, ret;
+ u8 smsr;
+
+ for (i = 0; i < 30; ++i) {
+ ret = asix_set_sw_mii(dev, in_pm);
+ if (ret == -ENODEV || ret == -ETIMEDOUT)
+ break;
+ usleep_range(1000, 1100);
+ ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG,
+ 0, 0, 1, &smsr, in_pm);
+ if (ret == -ENODEV)
+ break;
+ else if (ret < 0)
+ continue;
+ else if (smsr & AX_HOST_EN)
+ break;
+ }
+
+ return ret;
+}
+
static void reset_asix_rx_fixup_info(struct asix_rx_fixup_info *rx)
{
/* Reset the variables that have a lifetime outside of
@@ -467,19 +490,11 @@ int asix_mdio_read(struct net_device *netdev, int phy_id, int loc)
{
struct usbnet *dev = netdev_priv(netdev);
__le16 res;
- u8 smsr;
- int i = 0;
int ret;
mutex_lock(&dev->phy_mutex);
- do {
- ret = asix_set_sw_mii(dev, 0);
- if (ret == -ENODEV || ret == -ETIMEDOUT)
- break;
- usleep_range(1000, 1100);
- ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG,
- 0, 0, 1, &smsr, 0);
- } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV));
+
+ ret = asix_check_host_enable(dev, 0);
if (ret == -ENODEV || ret == -ETIMEDOUT) {
mutex_unlock(&dev->phy_mutex);
return ret;
@@ -505,23 +520,14 @@ static int __asix_mdio_write(struct net_device *netdev, int phy_id, int loc,
{
struct usbnet *dev = netdev_priv(netdev);
__le16 res = cpu_to_le16(val);
- u8 smsr;
- int i = 0;
int ret;
netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n",
phy_id, loc, val);
mutex_lock(&dev->phy_mutex);
- do {
- ret = asix_set_sw_mii(dev, 0);
- if (ret == -ENODEV)
- break;
- usleep_range(1000, 1100);
- ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG,
- 0, 0, 1, &smsr, 0);
- } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV));
+ ret = asix_check_host_enable(dev, 0);
if (ret == -ENODEV)
goto out;
@@ -561,19 +567,11 @@ int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc)
{
struct usbnet *dev = netdev_priv(netdev);
__le16 res;
- u8 smsr;
- int i = 0;
int ret;
mutex_lock(&dev->phy_mutex);
- do {
- ret = asix_set_sw_mii(dev, 1);
- if (ret == -ENODEV || ret == -ETIMEDOUT)
- break;
- usleep_range(1000, 1100);
- ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG,
- 0, 0, 1, &smsr, 1);
- } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV));
+
+ ret = asix_check_host_enable(dev, 1);
if (ret == -ENODEV || ret == -ETIMEDOUT) {
mutex_unlock(&dev->phy_mutex);
return ret;
@@ -595,22 +593,14 @@ asix_mdio_write_nopm(struct net_device *netdev, int phy_id, int loc, int val)
{
struct usbnet *dev = netdev_priv(netdev);
__le16 res = cpu_to_le16(val);
- u8 smsr;
- int i = 0;
int ret;
netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n",
phy_id, loc, val);
mutex_lock(&dev->phy_mutex);
- do {
- ret = asix_set_sw_mii(dev, 1);
- if (ret == -ENODEV)
- break;
- usleep_range(1000, 1100);
- ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG,
- 0, 0, 1, &smsr, 1);
- } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV));
+
+ ret = asix_check_host_enable(dev, 1);
if (ret == -ENODEV) {
mutex_unlock(&dev->phy_mutex);
return;
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 2c115216420a..30821f6a6d7a 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -197,7 +197,7 @@ static const struct net_device_ops ax88172_netdev_ops = {
.ndo_get_stats64 = dev_get_tstats64,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = asix_ioctl,
+ .ndo_eth_ioctl = asix_ioctl,
.ndo_set_rx_mode = ax88172_set_multicast,
};
@@ -354,24 +354,23 @@ out:
static int ax88772_hw_reset(struct usbnet *dev, int in_pm)
{
struct asix_data *data = (struct asix_data *)&dev->data;
- int ret, embd_phy;
+ struct asix_common_private *priv = dev->driver_priv;
u16 rx_ctl;
+ int ret;
ret = asix_write_gpio(dev, AX_GPIO_RSE | AX_GPIO_GPO_2 |
AX_GPIO_GPO2EN, 5, in_pm);
if (ret < 0)
goto out;
- embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0);
-
- ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy,
+ ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, priv->embd_phy,
0, 0, NULL, in_pm);
if (ret < 0) {
netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret);
goto out;
}
- if (embd_phy) {
+ if (priv->embd_phy) {
ret = asix_sw_reset(dev, AX_SWRESET_IPPD, in_pm);
if (ret < 0)
goto out;
@@ -449,17 +448,16 @@ out:
static int ax88772a_hw_reset(struct usbnet *dev, int in_pm)
{
struct asix_data *data = (struct asix_data *)&dev->data;
- int ret, embd_phy;
+ struct asix_common_private *priv = dev->driver_priv;
u16 rx_ctl, phy14h, phy15h, phy16h;
u8 chipcode = 0;
+ int ret;
ret = asix_write_gpio(dev, AX_GPIO_RSE, 5, in_pm);
if (ret < 0)
goto out;
- embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0);
-
- ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy |
+ ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, priv->embd_phy |
AX_PHYSEL_SSEN, 0, 0, NULL, in_pm);
if (ret < 0) {
netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret);
@@ -589,7 +587,7 @@ static const struct net_device_ops ax88772_netdev_ops = {
.ndo_get_stats64 = dev_get_tstats64,
.ndo_set_mac_address = asix_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_set_rx_mode = asix_set_multicast,
};
@@ -683,12 +681,6 @@ static int ax88772_init_phy(struct usbnet *dev)
struct asix_common_private *priv = dev->driver_priv;
int ret;
- ret = asix_read_phy_addr(dev, true);
- if (ret < 0)
- return ret;
-
- priv->phy_addr = ret;
-
snprintf(priv->phy_name, sizeof(priv->phy_name), PHY_ID_FMT,
priv->mdio->id, priv->phy_addr);
@@ -714,7 +706,12 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
u8 buf[ETH_ALEN] = {0}, chipcode = 0;
struct asix_common_private *priv;
int ret, i;
- u32 phyid;
+
+ priv = devm_kzalloc(&dev->udev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ dev->driver_priv = priv;
usbnet_get_endpoints(dev, intf);
@@ -751,6 +748,13 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
dev->net->needed_headroom = 4; /* cf asix_tx_fixup() */
dev->net->needed_tailroom = 4; /* cf asix_tx_fixup() */
+ ret = asix_read_phy_addr(dev, true);
+ if (ret < 0)
+ return ret;
+
+ priv->phy_addr = ret;
+ priv->embd_phy = ((priv->phy_addr & 0x1f) == 0x10);
+
asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0);
chipcode &= AX_CHIPCODE_MASK;
@@ -762,10 +766,6 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
return ret;
}
- /* Read PHYID register *AFTER* the PHY was reset properly */
- phyid = asix_get_phyid(dev);
- netdev_dbg(dev->net, "PHYID=0x%08x\n", phyid);
-
/* Asix framing packs multiple eth frames into a 2K usb bulk transfer */
if (dev->driver_info->flags & FLAG_FRAMING_AX) {
/* hard_mtu is still the default - the device does not support
@@ -773,12 +773,6 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
dev->rx_urb_size = 2048;
}
- priv = devm_kzalloc(&dev->udev->dev, sizeof(*priv), GFP_KERNEL);
- if (!priv)
- return -ENOMEM;
-
- dev->driver_priv = priv;
-
priv->presvd_phy_bmcr = 0;
priv->presvd_phy_advertise = 0;
if (chipcode == AX_AX88772_CHIPCODE) {
@@ -817,6 +811,12 @@ static void ax88772_unbind(struct usbnet *dev, struct usb_interface *intf)
asix_rx_fixup_common_free(dev->driver_priv);
}
+static void ax88178_unbind(struct usbnet *dev, struct usb_interface *intf)
+{
+ asix_rx_fixup_common_free(dev->driver_priv);
+ kfree(dev->driver_priv);
+}
+
static const struct ethtool_ops ax88178_ethtool_ops = {
.get_drvinfo = asix_get_drvinfo,
.get_link = asix_get_link,
@@ -1100,7 +1100,7 @@ static const struct net_device_ops ax88178_netdev_ops = {
.ndo_set_mac_address = asix_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = asix_set_multicast,
- .ndo_do_ioctl = asix_ioctl,
+ .ndo_eth_ioctl = asix_ioctl,
.ndo_change_mtu = ax88178_change_mtu,
};
@@ -1215,6 +1215,7 @@ static const struct driver_info ax88772b_info = {
.unbind = ax88772_unbind,
.status = asix_status,
.reset = ax88772_reset,
+ .stop = ax88772_stop,
.flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR |
FLAG_MULTI_PACKET,
.rx_fixup = asix_rx_fixup_common,
@@ -1225,7 +1226,7 @@ static const struct driver_info ax88772b_info = {
static const struct driver_info ax88178_info = {
.description = "ASIX AX88178 USB 2.0 Ethernet",
.bind = ax88178_bind,
- .unbind = ax88772_unbind,
+ .unbind = ax88178_unbind,
.status = asix_status,
.link_reset = ax88178_link_reset,
.reset = ax88178_reset,
diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c
index 530947d7477b..d9777d9a7c5d 100644
--- a/drivers/net/usb/ax88172a.c
+++ b/drivers/net/usb/ax88172a.c
@@ -109,7 +109,7 @@ static const struct net_device_ops ax88172a_netdev_ops = {
.ndo_get_stats64 = dev_get_tstats64,
.ndo_set_mac_address = asix_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_set_rx_mode = asix_set_multicast,
};
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index c1316718304d..f25448a08870 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1035,7 +1035,7 @@ static const struct net_device_ops ax88179_netdev_ops = {
.ndo_change_mtu = ax88179_change_mtu,
.ndo_set_mac_address = ax88179_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = ax88179_ioctl,
+ .ndo_eth_ioctl = ax88179_ioctl,
.ndo_set_rx_mode = ax88179_set_multicast,
.ndo_set_features = ax88179_set_features,
};
diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c
index 8d1f69dad603..e1da9102a540 100644
--- a/drivers/net/usb/cdc-phonet.c
+++ b/drivers/net/usb/cdc-phonet.c
@@ -253,7 +253,8 @@ static int usbpn_close(struct net_device *dev)
return usb_set_interface(pnd->usb, num, !pnd->active_setting);
}
-static int usbpn_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int usbpn_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
struct if_phonet_req *req = (struct if_phonet_req *)ifr;
@@ -269,7 +270,7 @@ static const struct net_device_ops usbpn_ops = {
.ndo_open = usbpn_open,
.ndo_stop = usbpn_close,
.ndo_start_xmit = usbpn_xmit,
- .ndo_do_ioctl = usbpn_ioctl,
+ .ndo_siocdevprivate = usbpn_siocdevprivate,
};
static void usbpn_setup(struct net_device *dev)
diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
index 89cc61d7a675..907f98b1eefe 100644
--- a/drivers/net/usb/dm9601.c
+++ b/drivers/net/usb/dm9601.c
@@ -345,7 +345,7 @@ static const struct net_device_ops dm9601_netdev_ops = {
.ndo_change_mtu = usbnet_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = dm9601_ioctl,
+ .ndo_eth_ioctl = dm9601_ioctl,
.ndo_set_rx_mode = dm9601_set_multicast,
.ndo_set_mac_address = dm9601_set_mac_address,
};
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index dec96e8ab567..24bc1e678b7b 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -1079,8 +1079,7 @@ static void hso_init_termios(struct ktermios *termios)
tty_termios_encode_baud_rate(termios, 115200, 115200);
}
-static void _hso_serial_set_termios(struct tty_struct *tty,
- struct ktermios *old)
+static void _hso_serial_set_termios(struct tty_struct *tty)
{
struct hso_serial *serial = tty->driver_data;
@@ -1262,7 +1261,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp)
if (serial->port.count == 1) {
serial->rx_state = RX_IDLE;
/* Force default termio settings */
- _hso_serial_set_termios(tty, NULL);
+ _hso_serial_set_termios(tty);
tasklet_setup(&serial->unthrottle_tasklet,
hso_unthrottle_tasklet);
result = hso_start_serial_device(serial->parent, GFP_KERNEL);
@@ -1394,7 +1393,7 @@ static void hso_serial_set_termios(struct tty_struct *tty, struct ktermios *old)
/* the actual setup */
spin_lock_irqsave(&serial->serial_lock, flags);
if (serial->port.count)
- _hso_serial_set_termios(tty, old);
+ _hso_serial_set_termios(tty);
else
tty->termios = *old;
spin_unlock_irqrestore(&serial->serial_lock, flags);
@@ -2353,7 +2352,7 @@ static int remove_net_device(struct hso_device *hso_dev)
}
/* Frees our network device */
-static void hso_free_net_device(struct hso_device *hso_dev, bool bailout)
+static void hso_free_net_device(struct hso_device *hso_dev)
{
int i;
struct hso_net *hso_net = dev2net(hso_dev);
@@ -2376,7 +2375,7 @@ static void hso_free_net_device(struct hso_device *hso_dev, bool bailout)
kfree(hso_net->mux_bulk_tx_buf);
hso_net->mux_bulk_tx_buf = NULL;
- if (hso_net->net && !bailout)
+ if (hso_net->net)
free_netdev(hso_net->net);
kfree(hso_dev);
@@ -3133,7 +3132,7 @@ static void hso_free_interface(struct usb_interface *interface)
rfkill_unregister(rfk);
rfkill_destroy(rfk);
}
- hso_free_net_device(network_table[i], false);
+ hso_free_net_device(network_table[i]);
}
}
}
diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
index 207e59e74935..06e2181e5810 100644
--- a/drivers/net/usb/ipheth.c
+++ b/drivers/net/usb/ipheth.c
@@ -443,7 +443,7 @@ static int ipheth_probe(struct usb_interface *intf,
netdev->netdev_ops = &ipheth_netdev_ops;
netdev->watchdog_timeo = IPHETH_TX_TIMEOUT;
- strcpy(netdev->name, "eth%d");
+ strscpy(netdev->name, "eth%d", sizeof(netdev->name));
dev = netdev_priv(netdev);
dev->udev = udev;
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 6d092d78e0cb..793f8fbe0069 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -46,6 +46,19 @@
#define MAX_RX_FIFO_SIZE (12 * 1024)
#define MAX_TX_FIFO_SIZE (12 * 1024)
+
+#define FLOW_THRESHOLD(n) ((((n) + 511) / 512) & 0x7F)
+#define FLOW_CTRL_THRESHOLD(on, off) ((FLOW_THRESHOLD(on) << 0) | \
+ (FLOW_THRESHOLD(off) << 8))
+
+/* Flow control turned on when Rx FIFO level rises above this level (bytes) */
+#define FLOW_ON_SS 9216
+#define FLOW_ON_HS 8704
+
+/* Flow control turned off when Rx FIFO level falls below this level (bytes) */
+#define FLOW_OFF_SS 4096
+#define FLOW_OFF_HS 1024
+
#define DEFAULT_BURST_CAP_SIZE (MAX_TX_FIFO_SIZE)
#define DEFAULT_BULK_IN_DELAY (0x0800)
#define MAX_SINGLE_PACKET_SIZE (9000)
@@ -87,6 +100,12 @@
/* statistic update interval (mSec) */
#define STAT_UPDATE_TIMER (1 * 1000)
+/* time to wait for MAC or FCT to stop (jiffies) */
+#define HW_DISABLE_TIMEOUT (HZ / 10)
+
+/* time to wait between polling MAC or FCT state (ms) */
+#define HW_DISABLE_DELAY_MS 1
+
/* defines interrupts from interrupt EP */
#define MAX_INT_EP (32)
#define INT_EP_INTEP (31)
@@ -341,6 +360,7 @@ struct usb_context {
#define EVENT_DEV_ASLEEP 7
#define EVENT_DEV_OPEN 8
#define EVENT_STAT_UPDATE 9
+#define EVENT_DEV_DISCONNECT 10
struct statstage {
struct mutex access_lock; /* for stats access */
@@ -370,7 +390,6 @@ struct lan78xx_net {
struct sk_buff_head rxq;
struct sk_buff_head txq;
struct sk_buff_head done;
- struct sk_buff_head rxq_pause;
struct sk_buff_head txq_pend;
struct tasklet_struct bh;
@@ -381,8 +400,9 @@ struct lan78xx_net {
struct urb *urb_intr;
struct usb_anchor deferred;
+ struct mutex dev_mutex; /* serialise open/stop wrt suspend/resume */
struct mutex phy_mutex; /* for phy access */
- unsigned pipe_in, pipe_out, pipe_intr;
+ unsigned int pipe_in, pipe_out, pipe_intr;
u32 hard_mtu; /* count any extra framing */
size_t rx_urb_size; /* size for rx urbs */
@@ -392,8 +412,7 @@ struct lan78xx_net {
wait_queue_head_t *wait;
unsigned char suspend_count;
- unsigned maxpacket;
- struct timer_list delay;
+ unsigned int maxpacket;
struct timer_list stat_monitor;
unsigned long data[5];
@@ -426,9 +445,13 @@ MODULE_PARM_DESC(msg_level, "Override default message level");
static int lan78xx_read_reg(struct lan78xx_net *dev, u32 index, u32 *data)
{
- u32 *buf = kmalloc(sizeof(u32), GFP_KERNEL);
+ u32 *buf;
int ret;
+ if (test_bit(EVENT_DEV_DISCONNECT, &dev->flags))
+ return -ENODEV;
+
+ buf = kmalloc(sizeof(u32), GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -439,7 +462,7 @@ static int lan78xx_read_reg(struct lan78xx_net *dev, u32 index, u32 *data)
if (likely(ret >= 0)) {
le32_to_cpus(buf);
*data = *buf;
- } else {
+ } else if (net_ratelimit()) {
netdev_warn(dev->net,
"Failed to read register index 0x%08x. ret = %d",
index, ret);
@@ -452,9 +475,13 @@ static int lan78xx_read_reg(struct lan78xx_net *dev, u32 index, u32 *data)
static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data)
{
- u32 *buf = kmalloc(sizeof(u32), GFP_KERNEL);
+ u32 *buf;
int ret;
+ if (test_bit(EVENT_DEV_DISCONNECT, &dev->flags))
+ return -ENODEV;
+
+ buf = kmalloc(sizeof(u32), GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -465,7 +492,8 @@ static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data)
USB_VENDOR_REQUEST_WRITE_REGISTER,
USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
0, index, buf, 4, USB_CTRL_SET_TIMEOUT);
- if (unlikely(ret < 0)) {
+ if (unlikely(ret < 0) &&
+ net_ratelimit()) {
netdev_warn(dev->net,
"Failed to write register index 0x%08x. ret = %d",
index, ret);
@@ -476,6 +504,26 @@ static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data)
return ret;
}
+static int lan78xx_update_reg(struct lan78xx_net *dev, u32 reg, u32 mask,
+ u32 data)
+{
+ int ret;
+ u32 buf;
+
+ ret = lan78xx_read_reg(dev, reg, &buf);
+ if (ret < 0)
+ return ret;
+
+ buf &= ~mask;
+ buf |= (mask & data);
+
+ ret = lan78xx_write_reg(dev, reg, buf);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
static int lan78xx_read_stats(struct lan78xx_net *dev,
struct lan78xx_statstage *data)
{
@@ -501,7 +549,7 @@ static int lan78xx_read_stats(struct lan78xx_net *dev,
if (likely(ret >= 0)) {
src = (u32 *)stats;
dst = (u32 *)data;
- for (i = 0; i < sizeof(*stats)/sizeof(u32); i++) {
+ for (i = 0; i < sizeof(*stats) / sizeof(u32); i++) {
le32_to_cpus(&src[i]);
dst[i] = src[i];
}
@@ -515,10 +563,11 @@ static int lan78xx_read_stats(struct lan78xx_net *dev,
return ret;
}
-#define check_counter_rollover(struct1, dev_stats, member) { \
- if (struct1->member < dev_stats.saved.member) \
- dev_stats.rollover_count.member++; \
- }
+#define check_counter_rollover(struct1, dev_stats, member) \
+ do { \
+ if ((struct1)->member < (dev_stats).saved.member) \
+ (dev_stats).rollover_count.member++; \
+ } while (0)
static void lan78xx_check_stat_rollover(struct lan78xx_net *dev,
struct lan78xx_statstage *stats)
@@ -844,9 +893,9 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset,
for (i = 0; i < length; i++) {
lan78xx_write_reg(dev, OTP_ADDR1,
- ((offset + i) >> 8) & OTP_ADDR1_15_11);
+ ((offset + i) >> 8) & OTP_ADDR1_15_11);
lan78xx_write_reg(dev, OTP_ADDR2,
- ((offset + i) & OTP_ADDR2_10_3));
+ ((offset + i) & OTP_ADDR2_10_3));
lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_);
lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_);
@@ -900,9 +949,9 @@ static int lan78xx_write_raw_otp(struct lan78xx_net *dev, u32 offset,
for (i = 0; i < length; i++) {
lan78xx_write_reg(dev, OTP_ADDR1,
- ((offset + i) >> 8) & OTP_ADDR1_15_11);
+ ((offset + i) >> 8) & OTP_ADDR1_15_11);
lan78xx_write_reg(dev, OTP_ADDR2,
- ((offset + i) & OTP_ADDR2_10_3));
+ ((offset + i) & OTP_ADDR2_10_3));
lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]);
lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_);
lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_);
@@ -959,7 +1008,7 @@ static int lan78xx_dataport_wait_not_busy(struct lan78xx_net *dev)
usleep_range(40, 100);
}
- netdev_warn(dev->net, "lan78xx_dataport_wait_not_busy timed out");
+ netdev_warn(dev->net, "%s timed out", __func__);
return -EIO;
}
@@ -972,7 +1021,7 @@ static int lan78xx_dataport_write(struct lan78xx_net *dev, u32 ram_select,
int i, ret;
if (usb_autopm_get_interface(dev->intf) < 0)
- return 0;
+ return 0;
mutex_lock(&pdata->dataport_mutex);
@@ -1045,9 +1094,9 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param)
for (i = 1; i < NUM_OF_MAF; i++) {
lan78xx_write_reg(dev, MAF_HI(i), 0);
lan78xx_write_reg(dev, MAF_LO(i),
- pdata->pfilter_table[i][1]);
+ pdata->pfilter_table[i][1]);
lan78xx_write_reg(dev, MAF_HI(i),
- pdata->pfilter_table[i][0]);
+ pdata->pfilter_table[i][0]);
}
lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
@@ -1066,11 +1115,12 @@ static void lan78xx_set_multicast(struct net_device *netdev)
RFE_CTL_DA_PERFECT_ | RFE_CTL_MCAST_HASH_);
for (i = 0; i < DP_SEL_VHF_HASH_LEN; i++)
- pdata->mchash_table[i] = 0;
+ pdata->mchash_table[i] = 0;
+
/* pfilter_table[0] has own HW address */
for (i = 1; i < NUM_OF_MAF; i++) {
- pdata->pfilter_table[i][0] =
- pdata->pfilter_table[i][1] = 0;
+ pdata->pfilter_table[i][0] = 0;
+ pdata->pfilter_table[i][1] = 0;
}
pdata->rfe_ctl |= RFE_CTL_BCAST_EN_;
@@ -1134,9 +1184,9 @@ static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex,
flow |= FLOW_CR_RX_FCEN_;
if (dev->udev->speed == USB_SPEED_SUPER)
- fct_flow = 0x817;
+ fct_flow = FLOW_CTRL_THRESHOLD(FLOW_ON_SS, FLOW_OFF_SS);
else if (dev->udev->speed == USB_SPEED_HIGH)
- fct_flow = 0x211;
+ fct_flow = FLOW_CTRL_THRESHOLD(FLOW_ON_HS, FLOW_OFF_HS);
netif_dbg(dev, link, dev->net, "rx pause %s, tx pause %s",
(cap & FLOW_CTRL_RX ? "enabled" : "disabled"),
@@ -1150,6 +1200,52 @@ static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex,
return 0;
}
+static int lan78xx_mac_reset(struct lan78xx_net *dev)
+{
+ unsigned long start_time = jiffies;
+ u32 val;
+ int ret;
+
+ mutex_lock(&dev->phy_mutex);
+
+ /* Resetting the device while there is activity on the MDIO
+ * bus can result in the MAC interface locking up and not
+ * completing register access transactions.
+ */
+ ret = lan78xx_phy_wait_not_busy(dev);
+ if (ret < 0)
+ goto done;
+
+ ret = lan78xx_read_reg(dev, MAC_CR, &val);
+ if (ret < 0)
+ goto done;
+
+ val |= MAC_CR_RST_;
+ ret = lan78xx_write_reg(dev, MAC_CR, val);
+ if (ret < 0)
+ goto done;
+
+ /* Wait for the reset to complete before allowing any further
+ * MAC register accesses otherwise the MAC may lock up.
+ */
+ do {
+ ret = lan78xx_read_reg(dev, MAC_CR, &val);
+ if (ret < 0)
+ goto done;
+
+ if (!(val & MAC_CR_RST_)) {
+ ret = 0;
+ goto done;
+ }
+ } while (!time_after(jiffies, start_time + HZ));
+
+ ret = -ETIMEDOUT;
+done:
+ mutex_unlock(&dev->phy_mutex);
+
+ return ret;
+}
+
static int lan78xx_link_reset(struct lan78xx_net *dev)
{
struct phy_device *phydev = dev->net->phydev;
@@ -1160,7 +1256,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
/* clear LAN78xx interrupt status */
ret = lan78xx_write_reg(dev, INT_STS, INT_STS_PHY_INT_);
if (unlikely(ret < 0))
- return -EIO;
+ return ret;
mutex_lock(&phydev->lock);
phy_read_status(phydev);
@@ -1171,13 +1267,9 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
dev->link_on = false;
/* reset MAC */
- ret = lan78xx_read_reg(dev, MAC_CR, &buf);
- if (unlikely(ret < 0))
- return -EIO;
- buf |= MAC_CR_RST_;
- ret = lan78xx_write_reg(dev, MAC_CR, buf);
- if (unlikely(ret < 0))
- return -EIO;
+ ret = lan78xx_mac_reset(dev);
+ if (ret < 0)
+ return ret;
del_timer(&dev->stat_monitor);
} else if (link && !dev->link_on) {
@@ -1189,18 +1281,30 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
if (ecmd.base.speed == 1000) {
/* disable U2 */
ret = lan78xx_read_reg(dev, USB_CFG1, &buf);
+ if (ret < 0)
+ return ret;
buf &= ~USB_CFG1_DEV_U2_INIT_EN_;
ret = lan78xx_write_reg(dev, USB_CFG1, buf);
+ if (ret < 0)
+ return ret;
/* enable U1 */
ret = lan78xx_read_reg(dev, USB_CFG1, &buf);
+ if (ret < 0)
+ return ret;
buf |= USB_CFG1_DEV_U1_INIT_EN_;
ret = lan78xx_write_reg(dev, USB_CFG1, buf);
+ if (ret < 0)
+ return ret;
} else {
/* enable U1 & U2 */
ret = lan78xx_read_reg(dev, USB_CFG1, &buf);
+ if (ret < 0)
+ return ret;
buf |= USB_CFG1_DEV_U2_INIT_EN_;
buf |= USB_CFG1_DEV_U1_INIT_EN_;
ret = lan78xx_write_reg(dev, USB_CFG1, buf);
+ if (ret < 0)
+ return ret;
}
}
@@ -1218,6 +1322,8 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
ret = lan78xx_update_flowcontrol(dev, ecmd.base.duplex, ladv,
radv);
+ if (ret < 0)
+ return ret;
if (!timer_pending(&dev->stat_monitor)) {
dev->delta = 1;
@@ -1228,7 +1334,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
tasklet_schedule(&dev->bh);
}
- return ret;
+ return 0;
}
/* some work can't be done in tasklets, so we use keventd
@@ -1264,9 +1370,10 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb)
generic_handle_irq(dev->domain_data.phyirq);
local_irq_enable();
}
- } else
+ } else {
netdev_warn(dev->net,
"unexpected interrupt: 0x%08x\n", intdata);
+ }
}
static int lan78xx_ethtool_get_eeprom_len(struct net_device *netdev)
@@ -1355,7 +1462,7 @@ static void lan78xx_get_wol(struct net_device *netdev,
struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
if (usb_autopm_get_interface(dev->intf) < 0)
- return;
+ return;
ret = lan78xx_read_reg(dev, USB_CFG0, &buf);
if (unlikely(ret < 0)) {
@@ -2003,7 +2110,7 @@ static int lan8835_fixup(struct phy_device *phydev)
/* RGMII MAC TXC Delay Enable */
lan78xx_write_reg(dev, MAC_RGMII_ID,
- MAC_RGMII_ID_TXC_DELAY_EN_);
+ MAC_RGMII_ID_TXC_DELAY_EN_);
/* RGMII TX DLL Tune Adjust */
lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00);
@@ -2267,11 +2374,16 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu)
int ll_mtu = new_mtu + netdev->hard_header_len;
int old_hard_mtu = dev->hard_mtu;
int old_rx_urb_size = dev->rx_urb_size;
+ int ret;
/* no second zero-length packet read wanted after mtu-sized packets */
if ((ll_mtu % dev->maxpacket) == 0)
return -EDOM;
+ ret = usb_autopm_get_interface(dev->intf);
+ if (ret < 0)
+ return ret;
+
lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN);
netdev->mtu = new_mtu;
@@ -2287,6 +2399,8 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu)
}
}
+ usb_autopm_put_interface(dev->intf);
+
return 0;
}
@@ -2443,26 +2557,186 @@ static void lan78xx_init_ltm(struct lan78xx_net *dev)
lan78xx_write_reg(dev, LTM_INACTIVE1, regs[5]);
}
+static int lan78xx_start_hw(struct lan78xx_net *dev, u32 reg, u32 hw_enable)
+{
+ return lan78xx_update_reg(dev, reg, hw_enable, hw_enable);
+}
+
+static int lan78xx_stop_hw(struct lan78xx_net *dev, u32 reg, u32 hw_enabled,
+ u32 hw_disabled)
+{
+ unsigned long timeout;
+ bool stopped = true;
+ int ret;
+ u32 buf;
+
+ /* Stop the h/w block (if not already stopped) */
+
+ ret = lan78xx_read_reg(dev, reg, &buf);
+ if (ret < 0)
+ return ret;
+
+ if (buf & hw_enabled) {
+ buf &= ~hw_enabled;
+
+ ret = lan78xx_write_reg(dev, reg, buf);
+ if (ret < 0)
+ return ret;
+
+ stopped = false;
+ timeout = jiffies + HW_DISABLE_TIMEOUT;
+ do {
+ ret = lan78xx_read_reg(dev, reg, &buf);
+ if (ret < 0)
+ return ret;
+
+ if (buf & hw_disabled)
+ stopped = true;
+ else
+ msleep(HW_DISABLE_DELAY_MS);
+ } while (!stopped && !time_after(jiffies, timeout));
+ }
+
+ ret = stopped ? 0 : -ETIME;
+
+ return ret;
+}
+
+static int lan78xx_flush_fifo(struct lan78xx_net *dev, u32 reg, u32 fifo_flush)
+{
+ return lan78xx_update_reg(dev, reg, fifo_flush, fifo_flush);
+}
+
+static int lan78xx_start_tx_path(struct lan78xx_net *dev)
+{
+ int ret;
+
+ netif_dbg(dev, drv, dev->net, "start tx path");
+
+ /* Start the MAC transmitter */
+
+ ret = lan78xx_start_hw(dev, MAC_TX, MAC_TX_TXEN_);
+ if (ret < 0)
+ return ret;
+
+ /* Start the Tx FIFO */
+
+ ret = lan78xx_start_hw(dev, FCT_TX_CTL, FCT_TX_CTL_EN_);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int lan78xx_stop_tx_path(struct lan78xx_net *dev)
+{
+ int ret;
+
+ netif_dbg(dev, drv, dev->net, "stop tx path");
+
+ /* Stop the Tx FIFO */
+
+ ret = lan78xx_stop_hw(dev, FCT_TX_CTL, FCT_TX_CTL_EN_, FCT_TX_CTL_DIS_);
+ if (ret < 0)
+ return ret;
+
+ /* Stop the MAC transmitter */
+
+ ret = lan78xx_stop_hw(dev, MAC_TX, MAC_TX_TXEN_, MAC_TX_TXD_);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+/* The caller must ensure the Tx path is stopped before calling
+ * lan78xx_flush_tx_fifo().
+ */
+static int lan78xx_flush_tx_fifo(struct lan78xx_net *dev)
+{
+ return lan78xx_flush_fifo(dev, FCT_TX_CTL, FCT_TX_CTL_RST_);
+}
+
+static int lan78xx_start_rx_path(struct lan78xx_net *dev)
+{
+ int ret;
+
+ netif_dbg(dev, drv, dev->net, "start rx path");
+
+ /* Start the Rx FIFO */
+
+ ret = lan78xx_start_hw(dev, FCT_RX_CTL, FCT_RX_CTL_EN_);
+ if (ret < 0)
+ return ret;
+
+ /* Start the MAC receiver*/
+
+ ret = lan78xx_start_hw(dev, MAC_RX, MAC_RX_RXEN_);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int lan78xx_stop_rx_path(struct lan78xx_net *dev)
+{
+ int ret;
+
+ netif_dbg(dev, drv, dev->net, "stop rx path");
+
+ /* Stop the MAC receiver */
+
+ ret = lan78xx_stop_hw(dev, MAC_RX, MAC_RX_RXEN_, MAC_RX_RXD_);
+ if (ret < 0)
+ return ret;
+
+ /* Stop the Rx FIFO */
+
+ ret = lan78xx_stop_hw(dev, FCT_RX_CTL, FCT_RX_CTL_EN_, FCT_RX_CTL_DIS_);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+/* The caller must ensure the Rx path is stopped before calling
+ * lan78xx_flush_rx_fifo().
+ */
+static int lan78xx_flush_rx_fifo(struct lan78xx_net *dev)
+{
+ return lan78xx_flush_fifo(dev, FCT_RX_CTL, FCT_RX_CTL_RST_);
+}
+
static int lan78xx_reset(struct lan78xx_net *dev)
{
struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
- u32 buf;
- int ret = 0;
unsigned long timeout;
+ int ret;
+ u32 buf;
u8 sig;
ret = lan78xx_read_reg(dev, HW_CFG, &buf);
+ if (ret < 0)
+ return ret;
+
buf |= HW_CFG_LRST_;
+
ret = lan78xx_write_reg(dev, HW_CFG, buf);
+ if (ret < 0)
+ return ret;
timeout = jiffies + HZ;
do {
mdelay(1);
ret = lan78xx_read_reg(dev, HW_CFG, &buf);
+ if (ret < 0)
+ return ret;
+
if (time_after(jiffies, timeout)) {
netdev_warn(dev->net,
"timeout on completion of LiteReset");
- return -EIO;
+ ret = -ETIMEDOUT;
+ return ret;
}
} while (buf & HW_CFG_LRST_);
@@ -2470,13 +2744,22 @@ static int lan78xx_reset(struct lan78xx_net *dev)
/* save DEVID for later usage */
ret = lan78xx_read_reg(dev, ID_REV, &buf);
+ if (ret < 0)
+ return ret;
+
dev->chipid = (buf & ID_REV_CHIP_ID_MASK_) >> 16;
dev->chiprev = buf & ID_REV_CHIP_REV_MASK_;
/* Respond to the IN token with a NAK */
ret = lan78xx_read_reg(dev, USB_CFG0, &buf);
+ if (ret < 0)
+ return ret;
+
buf |= USB_CFG_BIR_;
+
ret = lan78xx_write_reg(dev, USB_CFG0, buf);
+ if (ret < 0)
+ return ret;
/* Init LTM */
lan78xx_init_ltm(dev);
@@ -2499,53 +2782,105 @@ static int lan78xx_reset(struct lan78xx_net *dev)
}
ret = lan78xx_write_reg(dev, BURST_CAP, buf);
+ if (ret < 0)
+ return ret;
+
ret = lan78xx_write_reg(dev, BULK_IN_DLY, DEFAULT_BULK_IN_DELAY);
+ if (ret < 0)
+ return ret;
ret = lan78xx_read_reg(dev, HW_CFG, &buf);
+ if (ret < 0)
+ return ret;
+
buf |= HW_CFG_MEF_;
+
ret = lan78xx_write_reg(dev, HW_CFG, buf);
+ if (ret < 0)
+ return ret;
ret = lan78xx_read_reg(dev, USB_CFG0, &buf);
+ if (ret < 0)
+ return ret;
+
buf |= USB_CFG_BCE_;
+
ret = lan78xx_write_reg(dev, USB_CFG0, buf);
+ if (ret < 0)
+ return ret;
/* set FIFO sizes */
buf = (MAX_RX_FIFO_SIZE - 512) / 512;
+
ret = lan78xx_write_reg(dev, FCT_RX_FIFO_END, buf);
+ if (ret < 0)
+ return ret;
buf = (MAX_TX_FIFO_SIZE - 512) / 512;
+
ret = lan78xx_write_reg(dev, FCT_TX_FIFO_END, buf);
+ if (ret < 0)
+ return ret;
ret = lan78xx_write_reg(dev, INT_STS, INT_STS_CLEAR_ALL_);
+ if (ret < 0)
+ return ret;
+
ret = lan78xx_write_reg(dev, FLOW, 0);
+ if (ret < 0)
+ return ret;
+
ret = lan78xx_write_reg(dev, FCT_FLOW, 0);
+ if (ret < 0)
+ return ret;
/* Don't need rfe_ctl_lock during initialisation */
ret = lan78xx_read_reg(dev, RFE_CTL, &pdata->rfe_ctl);
+ if (ret < 0)
+ return ret;
+
pdata->rfe_ctl |= RFE_CTL_BCAST_EN_ | RFE_CTL_DA_PERFECT_;
+
ret = lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
+ if (ret < 0)
+ return ret;
/* Enable or disable checksum offload engines */
- lan78xx_set_features(dev->net, dev->net->features);
+ ret = lan78xx_set_features(dev->net, dev->net->features);
+ if (ret < 0)
+ return ret;
lan78xx_set_multicast(dev->net);
/* reset PHY */
ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+ if (ret < 0)
+ return ret;
+
buf |= PMT_CTL_PHY_RST_;
+
ret = lan78xx_write_reg(dev, PMT_CTL, buf);
+ if (ret < 0)
+ return ret;
timeout = jiffies + HZ;
do {
mdelay(1);
ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+ if (ret < 0)
+ return ret;
+
if (time_after(jiffies, timeout)) {
netdev_warn(dev->net, "timeout waiting for PHY Reset");
- return -EIO;
+ ret = -ETIMEDOUT;
+ return ret;
}
} while ((buf & PMT_CTL_PHY_RST_) || !(buf & PMT_CTL_READY_));
ret = lan78xx_read_reg(dev, MAC_CR, &buf);
+ if (ret < 0)
+ return ret;
+
/* LAN7801 only has RGMII mode */
if (dev->chipid == ID_REV_CHIP_ID_7801_)
buf &= ~MAC_CR_GMII_EN_;
@@ -2559,27 +2894,13 @@ static int lan78xx_reset(struct lan78xx_net *dev)
}
}
ret = lan78xx_write_reg(dev, MAC_CR, buf);
-
- ret = lan78xx_read_reg(dev, MAC_TX, &buf);
- buf |= MAC_TX_TXEN_;
- ret = lan78xx_write_reg(dev, MAC_TX, buf);
-
- ret = lan78xx_read_reg(dev, FCT_TX_CTL, &buf);
- buf |= FCT_TX_CTL_EN_;
- ret = lan78xx_write_reg(dev, FCT_TX_CTL, buf);
+ if (ret < 0)
+ return ret;
ret = lan78xx_set_rx_max_frame_length(dev,
dev->net->mtu + VLAN_ETH_HLEN);
- ret = lan78xx_read_reg(dev, MAC_RX, &buf);
- buf |= MAC_RX_RXEN_;
- ret = lan78xx_write_reg(dev, MAC_RX, buf);
-
- ret = lan78xx_read_reg(dev, FCT_RX_CTL, &buf);
- buf |= FCT_RX_CTL_EN_;
- ret = lan78xx_write_reg(dev, FCT_RX_CTL, buf);
-
- return 0;
+ return ret;
}
static void lan78xx_init_stats(struct lan78xx_net *dev)
@@ -2613,9 +2934,13 @@ static int lan78xx_open(struct net_device *net)
struct lan78xx_net *dev = netdev_priv(net);
int ret;
+ netif_dbg(dev, ifup, dev->net, "open device");
+
ret = usb_autopm_get_interface(dev->intf);
if (ret < 0)
- goto out;
+ return ret;
+
+ mutex_lock(&dev->dev_mutex);
phy_start(net->phydev);
@@ -2631,6 +2956,20 @@ static int lan78xx_open(struct net_device *net)
}
}
+ ret = lan78xx_flush_rx_fifo(dev);
+ if (ret < 0)
+ goto done;
+ ret = lan78xx_flush_tx_fifo(dev);
+ if (ret < 0)
+ goto done;
+
+ ret = lan78xx_start_tx_path(dev);
+ if (ret < 0)
+ goto done;
+ ret = lan78xx_start_rx_path(dev);
+ if (ret < 0)
+ goto done;
+
lan78xx_init_stats(dev);
set_bit(EVENT_DEV_OPEN, &dev->flags);
@@ -2641,9 +2980,10 @@ static int lan78xx_open(struct net_device *net)
lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
done:
+ mutex_unlock(&dev->dev_mutex);
+
usb_autopm_put_interface(dev->intf);
-out:
return ret;
}
@@ -2660,53 +3000,74 @@ static void lan78xx_terminate_urbs(struct lan78xx_net *dev)
temp = unlink_urbs(dev, &dev->txq) + unlink_urbs(dev, &dev->rxq);
/* maybe wait for deletions to finish. */
- while (!skb_queue_empty(&dev->rxq) &&
- !skb_queue_empty(&dev->txq) &&
- !skb_queue_empty(&dev->done)) {
+ while (!skb_queue_empty(&dev->rxq) ||
+ !skb_queue_empty(&dev->txq)) {
schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
set_current_state(TASK_UNINTERRUPTIBLE);
netif_dbg(dev, ifdown, dev->net,
- "waited for %d urb completions\n", temp);
+ "waited for %d urb completions", temp);
}
set_current_state(TASK_RUNNING);
dev->wait = NULL;
remove_wait_queue(&unlink_wakeup, &wait);
+
+ while (!skb_queue_empty(&dev->done)) {
+ struct skb_data *entry;
+ struct sk_buff *skb;
+
+ skb = skb_dequeue(&dev->done);
+ entry = (struct skb_data *)(skb->cb);
+ usb_free_urb(entry->urb);
+ dev_kfree_skb(skb);
+ }
}
static int lan78xx_stop(struct net_device *net)
{
struct lan78xx_net *dev = netdev_priv(net);
+ netif_dbg(dev, ifup, dev->net, "stop device");
+
+ mutex_lock(&dev->dev_mutex);
+
if (timer_pending(&dev->stat_monitor))
del_timer_sync(&dev->stat_monitor);
- if (net->phydev)
- phy_stop(net->phydev);
-
clear_bit(EVENT_DEV_OPEN, &dev->flags);
netif_stop_queue(net);
+ tasklet_kill(&dev->bh);
+
+ lan78xx_terminate_urbs(dev);
netif_info(dev, ifdown, dev->net,
"stop stats: rx/tx %lu/%lu, errs %lu/%lu\n",
net->stats.rx_packets, net->stats.tx_packets,
net->stats.rx_errors, net->stats.tx_errors);
- lan78xx_terminate_urbs(dev);
+ /* ignore errors that occur stopping the Tx and Rx data paths */
+ lan78xx_stop_tx_path(dev);
+ lan78xx_stop_rx_path(dev);
- usb_kill_urb(dev->urb_intr);
+ if (net->phydev)
+ phy_stop(net->phydev);
- skb_queue_purge(&dev->rxq_pause);
+ usb_kill_urb(dev->urb_intr);
/* deferred work (task, timer, softirq) must also stop.
* can't flush_scheduled_work() until we drop rtnl (later),
* else workers could deadlock; so make workers a NOP.
*/
- dev->flags = 0;
+ clear_bit(EVENT_TX_HALT, &dev->flags);
+ clear_bit(EVENT_RX_HALT, &dev->flags);
+ clear_bit(EVENT_LINK_RESET, &dev->flags);
+ clear_bit(EVENT_STAT_UPDATE, &dev->flags);
+
cancel_delayed_work_sync(&dev->wq);
- tasklet_kill(&dev->bh);
usb_autopm_put_interface(dev->intf);
+ mutex_unlock(&dev->dev_mutex);
+
return 0;
}
@@ -2795,16 +3156,23 @@ static void tx_complete(struct urb *urb)
/* software-driven interface shutdown */
case -ECONNRESET:
case -ESHUTDOWN:
+ netif_dbg(dev, tx_err, dev->net,
+ "tx err interface gone %d\n",
+ entry->urb->status);
break;
case -EPROTO:
case -ETIME:
case -EILSEQ:
netif_stop_queue(dev->net);
+ netif_dbg(dev, tx_err, dev->net,
+ "tx err queue stopped %d\n",
+ entry->urb->status);
break;
default:
netif_dbg(dev, tx_err, dev->net,
- "tx err %d\n", entry->urb->status);
+ "unknown tx err %d\n",
+ entry->urb->status);
break;
}
}
@@ -2829,6 +3197,9 @@ lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net)
struct lan78xx_net *dev = netdev_priv(net);
struct sk_buff *skb2 = NULL;
+ if (test_bit(EVENT_DEV_ASLEEP, &dev->flags))
+ schedule_delayed_work(&dev->wq, 0);
+
if (skb) {
skb_tx_timestamp(skb);
skb2 = lan78xx_tx_prep(dev, skb, GFP_ATOMIC);
@@ -2988,11 +3359,6 @@ static void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb)
{
int status;
- if (test_bit(EVENT_RX_PAUSED, &dev->flags)) {
- skb_queue_tail(&dev->rxq_pause, skb);
- return;
- }
-
dev->net->stats.rx_packets++;
dev->net->stats.rx_bytes += skb->len;
@@ -3140,6 +3506,7 @@ static int rx_submit(struct lan78xx_net *dev, struct urb *urb, gfp_t flags)
lan78xx_defer_kevent(dev, EVENT_RX_HALT);
break;
case -ENODEV:
+ case -ENOENT:
netif_dbg(dev, ifdown, dev->net, "device gone\n");
netif_device_detach(dev->net);
break;
@@ -3340,6 +3707,12 @@ gso_skb:
lan78xx_defer_kevent(dev, EVENT_TX_HALT);
usb_autopm_put_interface_async(dev->intf);
break;
+ case -ENODEV:
+ case -ENOENT:
+ netif_dbg(dev, tx_err, dev->net,
+ "tx: submit urb err %d (disconnected?)", ret);
+ netif_device_detach(dev->net);
+ break;
default:
usb_autopm_put_interface_async(dev->intf);
netif_dbg(dev, tx_err, dev->net,
@@ -3356,9 +3729,10 @@ drop:
if (skb)
dev_kfree_skb_any(skb);
usb_free_urb(urb);
- } else
+ } else {
netif_dbg(dev, tx_queued, dev->net,
"> tx, len %d, type 0x%x\n", length, skb->protocol);
+ }
}
static void lan78xx_rx_bh(struct lan78xx_net *dev)
@@ -3421,8 +3795,7 @@ static void lan78xx_bh(struct tasklet_struct *t)
if (!skb_queue_empty(&dev->txq_pend))
lan78xx_tx_bh(dev);
- if (!timer_pending(&dev->delay) &&
- !test_bit(EVENT_RX_HALT, &dev->flags))
+ if (!test_bit(EVENT_RX_HALT, &dev->flags))
lan78xx_rx_bh(dev);
}
}
@@ -3434,18 +3807,20 @@ static void lan78xx_delayedwork(struct work_struct *work)
dev = container_of(work, struct lan78xx_net, wq.work);
+ if (test_bit(EVENT_DEV_DISCONNECT, &dev->flags))
+ return;
+
+ if (usb_autopm_get_interface(dev->intf) < 0)
+ return;
+
if (test_bit(EVENT_TX_HALT, &dev->flags)) {
unlink_urbs(dev, &dev->txq);
- status = usb_autopm_get_interface(dev->intf);
- if (status < 0)
- goto fail_pipe;
+
status = usb_clear_halt(dev->udev, dev->pipe_out);
- usb_autopm_put_interface(dev->intf);
if (status < 0 &&
status != -EPIPE &&
status != -ESHUTDOWN) {
if (netif_msg_tx_err(dev))
-fail_pipe:
netdev_err(dev->net,
"can't clear tx halt, status %d\n",
status);
@@ -3455,18 +3830,14 @@ fail_pipe:
netif_wake_queue(dev->net);
}
}
+
if (test_bit(EVENT_RX_HALT, &dev->flags)) {
unlink_urbs(dev, &dev->rxq);
- status = usb_autopm_get_interface(dev->intf);
- if (status < 0)
- goto fail_halt;
status = usb_clear_halt(dev->udev, dev->pipe_in);
- usb_autopm_put_interface(dev->intf);
if (status < 0 &&
status != -EPIPE &&
status != -ESHUTDOWN) {
if (netif_msg_rx_err(dev))
-fail_halt:
netdev_err(dev->net,
"can't clear rx halt, status %d\n",
status);
@@ -3480,16 +3851,9 @@ fail_halt:
int ret = 0;
clear_bit(EVENT_LINK_RESET, &dev->flags);
- status = usb_autopm_get_interface(dev->intf);
- if (status < 0)
- goto skip_reset;
if (lan78xx_link_reset(dev) < 0) {
- usb_autopm_put_interface(dev->intf);
-skip_reset:
netdev_info(dev->net, "link reset failed (%d)\n",
ret);
- } else {
- usb_autopm_put_interface(dev->intf);
}
}
@@ -3503,6 +3867,8 @@ skip_reset:
dev->delta = min((dev->delta * 2), 50);
}
+
+ usb_autopm_put_interface(dev->intf);
}
static void intr_complete(struct urb *urb)
@@ -3518,6 +3884,7 @@ static void intr_complete(struct urb *urb)
/* software-driven interface shutdown */
case -ENOENT: /* urb killed */
+ case -ENODEV: /* hardware gone */
case -ESHUTDOWN: /* hardware gone */
netif_dbg(dev, ifdown, dev->net,
"intr shutdown, code %d\n", status);
@@ -3531,14 +3898,29 @@ static void intr_complete(struct urb *urb)
break;
}
- if (!netif_running(dev->net))
+ if (!netif_device_present(dev->net) ||
+ !netif_running(dev->net)) {
+ netdev_warn(dev->net, "not submitting new status URB");
return;
+ }
memset(urb->transfer_buffer, 0, urb->transfer_buffer_length);
status = usb_submit_urb(urb, GFP_ATOMIC);
- if (status != 0)
+
+ switch (status) {
+ case 0:
+ break;
+ case -ENODEV:
+ case -ENOENT:
+ netif_dbg(dev, timer, dev->net,
+ "intr resubmit %d (disconnect?)", status);
+ netif_device_detach(dev->net);
+ break;
+ default:
netif_err(dev, timer, dev->net,
"intr resubmit --> %d\n", status);
+ break;
+ }
}
static void lan78xx_disconnect(struct usb_interface *intf)
@@ -3553,8 +3935,15 @@ static void lan78xx_disconnect(struct usb_interface *intf)
if (!dev)
return;
+ set_bit(EVENT_DEV_DISCONNECT, &dev->flags);
+
udev = interface_to_usbdev(intf);
net = dev->net;
+
+ unregister_netdev(net);
+
+ cancel_delayed_work_sync(&dev->wq);
+
phydev = net->phydev;
phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0);
@@ -3565,12 +3954,11 @@ static void lan78xx_disconnect(struct usb_interface *intf)
if (phy_is_pseudo_fixed_link(phydev))
fixed_phy_unregister(phydev);
- unregister_netdev(net);
-
- cancel_delayed_work_sync(&dev->wq);
-
usb_scuttle_anchored_urbs(&dev->deferred);
+ if (timer_pending(&dev->stat_monitor))
+ del_timer_sync(&dev->stat_monitor);
+
lan78xx_unbind(dev, intf);
usb_kill_urb(dev->urb_intr);
@@ -3609,7 +3997,7 @@ static const struct net_device_ops lan78xx_netdev_ops = {
.ndo_change_mtu = lan78xx_change_mtu,
.ndo_set_mac_address = lan78xx_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = phy_do_ioctl_running,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_set_rx_mode = lan78xx_set_multicast,
.ndo_set_features = lan78xx_set_features,
.ndo_vlan_rx_add_vid = lan78xx_vlan_rx_add_vid,
@@ -3632,8 +4020,8 @@ static int lan78xx_probe(struct usb_interface *intf,
struct net_device *netdev;
struct usb_device *udev;
int ret;
- unsigned maxp;
- unsigned period;
+ unsigned int maxp;
+ unsigned int period;
u8 *buf = NULL;
udev = interface_to_usbdev(intf);
@@ -3659,9 +4047,9 @@ static int lan78xx_probe(struct usb_interface *intf,
skb_queue_head_init(&dev->rxq);
skb_queue_head_init(&dev->txq);
skb_queue_head_init(&dev->done);
- skb_queue_head_init(&dev->rxq_pause);
skb_queue_head_init(&dev->txq_pend);
mutex_init(&dev->phy_mutex);
+ mutex_init(&dev->dev_mutex);
tasklet_setup(&dev->bh, lan78xx_bh);
INIT_DELAYED_WORK(&dev->wq, lan78xx_delayedwork);
@@ -3798,37 +4186,119 @@ static u16 lan78xx_wakeframe_crc16(const u8 *buf, int len)
return crc;
}
-static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
+static int lan78xx_set_auto_suspend(struct lan78xx_net *dev)
{
u32 buf;
- int mask_index;
- u16 crc;
- u32 temp_wucsr;
- u32 temp_pmt_ctl;
+ int ret;
+
+ ret = lan78xx_stop_tx_path(dev);
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_stop_rx_path(dev);
+ if (ret < 0)
+ return ret;
+
+ /* auto suspend (selective suspend) */
+
+ ret = lan78xx_write_reg(dev, WUCSR, 0);
+ if (ret < 0)
+ return ret;
+ ret = lan78xx_write_reg(dev, WUCSR2, 0);
+ if (ret < 0)
+ return ret;
+ ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL);
+ if (ret < 0)
+ return ret;
+
+ /* set goodframe wakeup */
+
+ ret = lan78xx_read_reg(dev, WUCSR, &buf);
+ if (ret < 0)
+ return ret;
+
+ buf |= WUCSR_RFE_WAKE_EN_;
+ buf |= WUCSR_STORE_WAKE_;
+
+ ret = lan78xx_write_reg(dev, WUCSR, buf);
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+ if (ret < 0)
+ return ret;
+
+ buf &= ~PMT_CTL_RES_CLR_WKP_EN_;
+ buf |= PMT_CTL_RES_CLR_WKP_STS_;
+ buf |= PMT_CTL_PHY_WAKE_EN_;
+ buf |= PMT_CTL_WOL_EN_;
+ buf &= ~PMT_CTL_SUS_MODE_MASK_;
+ buf |= PMT_CTL_SUS_MODE_3_;
+
+ ret = lan78xx_write_reg(dev, PMT_CTL, buf);
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+ if (ret < 0)
+ return ret;
+
+ buf |= PMT_CTL_WUPS_MASK_;
+
+ ret = lan78xx_write_reg(dev, PMT_CTL, buf);
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_start_rx_path(dev);
+
+ return ret;
+}
+
+static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
+{
const u8 ipv4_multicast[3] = { 0x01, 0x00, 0x5E };
const u8 ipv6_multicast[3] = { 0x33, 0x33 };
const u8 arp_type[2] = { 0x08, 0x06 };
+ u32 temp_pmt_ctl;
+ int mask_index;
+ u32 temp_wucsr;
+ u32 buf;
+ u16 crc;
+ int ret;
- lan78xx_read_reg(dev, MAC_TX, &buf);
- buf &= ~MAC_TX_TXEN_;
- lan78xx_write_reg(dev, MAC_TX, buf);
- lan78xx_read_reg(dev, MAC_RX, &buf);
- buf &= ~MAC_RX_RXEN_;
- lan78xx_write_reg(dev, MAC_RX, buf);
+ ret = lan78xx_stop_tx_path(dev);
+ if (ret < 0)
+ return ret;
+ ret = lan78xx_stop_rx_path(dev);
+ if (ret < 0)
+ return ret;
- lan78xx_write_reg(dev, WUCSR, 0);
- lan78xx_write_reg(dev, WUCSR2, 0);
- lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL);
+ ret = lan78xx_write_reg(dev, WUCSR, 0);
+ if (ret < 0)
+ return ret;
+ ret = lan78xx_write_reg(dev, WUCSR2, 0);
+ if (ret < 0)
+ return ret;
+ ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL);
+ if (ret < 0)
+ return ret;
temp_wucsr = 0;
temp_pmt_ctl = 0;
- lan78xx_read_reg(dev, PMT_CTL, &temp_pmt_ctl);
+
+ ret = lan78xx_read_reg(dev, PMT_CTL, &temp_pmt_ctl);
+ if (ret < 0)
+ return ret;
+
temp_pmt_ctl &= ~PMT_CTL_RES_CLR_WKP_EN_;
temp_pmt_ctl |= PMT_CTL_RES_CLR_WKP_STS_;
- for (mask_index = 0; mask_index < NUM_OF_WUF_CFG; mask_index++)
- lan78xx_write_reg(dev, WUF_CFG(mask_index), 0);
+ for (mask_index = 0; mask_index < NUM_OF_WUF_CFG; mask_index++) {
+ ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), 0);
+ if (ret < 0)
+ return ret;
+ }
mask_index = 0;
if (wol & WAKE_PHY) {
@@ -3857,30 +4327,52 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
/* set WUF_CFG & WUF_MASK for IPv4 Multicast */
crc = lan78xx_wakeframe_crc16(ipv4_multicast, 3);
- lan78xx_write_reg(dev, WUF_CFG(mask_index),
+ ret = lan78xx_write_reg(dev, WUF_CFG(mask_index),
WUF_CFGX_EN_ |
WUF_CFGX_TYPE_MCAST_ |
(0 << WUF_CFGX_OFFSET_SHIFT_) |
(crc & WUF_CFGX_CRC16_MASK_));
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7);
+ if (ret < 0)
+ return ret;
+ ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
+ if (ret < 0)
+ return ret;
+ ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
+ if (ret < 0)
+ return ret;
+ ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
+ if (ret < 0)
+ return ret;
- lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7);
- lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
- lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
- lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
mask_index++;
/* for IPv6 Multicast */
crc = lan78xx_wakeframe_crc16(ipv6_multicast, 2);
- lan78xx_write_reg(dev, WUF_CFG(mask_index),
+ ret = lan78xx_write_reg(dev, WUF_CFG(mask_index),
WUF_CFGX_EN_ |
WUF_CFGX_TYPE_MCAST_ |
(0 << WUF_CFGX_OFFSET_SHIFT_) |
(crc & WUF_CFGX_CRC16_MASK_));
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3);
+ if (ret < 0)
+ return ret;
+ ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
+ if (ret < 0)
+ return ret;
+ ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
+ if (ret < 0)
+ return ret;
+ ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
+ if (ret < 0)
+ return ret;
- lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3);
- lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
- lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
- lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
mask_index++;
temp_pmt_ctl |= PMT_CTL_WOL_EN_;
@@ -3901,16 +4393,27 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
* for packettype (offset 12,13) = ARP (0x0806)
*/
crc = lan78xx_wakeframe_crc16(arp_type, 2);
- lan78xx_write_reg(dev, WUF_CFG(mask_index),
+ ret = lan78xx_write_reg(dev, WUF_CFG(mask_index),
WUF_CFGX_EN_ |
WUF_CFGX_TYPE_ALL_ |
(0 << WUF_CFGX_OFFSET_SHIFT_) |
(crc & WUF_CFGX_CRC16_MASK_));
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000);
+ if (ret < 0)
+ return ret;
+ ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
+ if (ret < 0)
+ return ret;
+ ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
+ if (ret < 0)
+ return ret;
+ ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
+ if (ret < 0)
+ return ret;
- lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000);
- lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
- lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
- lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
mask_index++;
temp_pmt_ctl |= PMT_CTL_WOL_EN_;
@@ -3918,7 +4421,9 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
temp_pmt_ctl |= PMT_CTL_SUS_MODE_0_;
}
- lan78xx_write_reg(dev, WUCSR, temp_wucsr);
+ ret = lan78xx_write_reg(dev, WUCSR, temp_wucsr);
+ if (ret < 0)
+ return ret;
/* when multiple WOL bits are set */
if (hweight_long((unsigned long)wol) > 1) {
@@ -3926,33 +4431,45 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
temp_pmt_ctl &= ~PMT_CTL_SUS_MODE_MASK_;
temp_pmt_ctl |= PMT_CTL_SUS_MODE_0_;
}
- lan78xx_write_reg(dev, PMT_CTL, temp_pmt_ctl);
+ ret = lan78xx_write_reg(dev, PMT_CTL, temp_pmt_ctl);
+ if (ret < 0)
+ return ret;
/* clear WUPS */
- lan78xx_read_reg(dev, PMT_CTL, &buf);
+ ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+ if (ret < 0)
+ return ret;
+
buf |= PMT_CTL_WUPS_MASK_;
- lan78xx_write_reg(dev, PMT_CTL, buf);
- lan78xx_read_reg(dev, MAC_RX, &buf);
- buf |= MAC_RX_RXEN_;
- lan78xx_write_reg(dev, MAC_RX, buf);
+ ret = lan78xx_write_reg(dev, PMT_CTL, buf);
+ if (ret < 0)
+ return ret;
- return 0;
+ ret = lan78xx_start_rx_path(dev);
+
+ return ret;
}
static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message)
{
struct lan78xx_net *dev = usb_get_intfdata(intf);
- struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
- u32 buf;
+ bool dev_open;
int ret;
- if (!dev->suspend_count++) {
+ mutex_lock(&dev->dev_mutex);
+
+ netif_dbg(dev, ifdown, dev->net,
+ "suspending: pm event %#x", message.event);
+
+ dev_open = test_bit(EVENT_DEV_OPEN, &dev->flags);
+
+ if (dev_open) {
spin_lock_irq(&dev->txq.lock);
/* don't autosuspend while transmitting */
if ((skb_queue_len(&dev->txq) ||
skb_queue_len(&dev->txq_pend)) &&
- PMSG_IS_AUTO(message)) {
+ PMSG_IS_AUTO(message)) {
spin_unlock_irq(&dev->txq.lock);
ret = -EBUSY;
goto out;
@@ -3961,129 +4478,207 @@ static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message)
spin_unlock_irq(&dev->txq.lock);
}
- /* stop TX & RX */
- ret = lan78xx_read_reg(dev, MAC_TX, &buf);
- buf &= ~MAC_TX_TXEN_;
- ret = lan78xx_write_reg(dev, MAC_TX, buf);
- ret = lan78xx_read_reg(dev, MAC_RX, &buf);
- buf &= ~MAC_RX_RXEN_;
- ret = lan78xx_write_reg(dev, MAC_RX, buf);
+ /* stop RX */
+ ret = lan78xx_stop_rx_path(dev);
+ if (ret < 0)
+ goto out;
+
+ ret = lan78xx_flush_rx_fifo(dev);
+ if (ret < 0)
+ goto out;
+
+ /* stop Tx */
+ ret = lan78xx_stop_tx_path(dev);
+ if (ret < 0)
+ goto out;
- /* empty out the rx and queues */
+ /* empty out the Rx and Tx queues */
netif_device_detach(dev->net);
lan78xx_terminate_urbs(dev);
usb_kill_urb(dev->urb_intr);
/* reattach */
netif_device_attach(dev->net);
- }
- if (test_bit(EVENT_DEV_ASLEEP, &dev->flags)) {
del_timer(&dev->stat_monitor);
if (PMSG_IS_AUTO(message)) {
- /* auto suspend (selective suspend) */
- ret = lan78xx_read_reg(dev, MAC_TX, &buf);
- buf &= ~MAC_TX_TXEN_;
- ret = lan78xx_write_reg(dev, MAC_TX, buf);
- ret = lan78xx_read_reg(dev, MAC_RX, &buf);
- buf &= ~MAC_RX_RXEN_;
- ret = lan78xx_write_reg(dev, MAC_RX, buf);
+ ret = lan78xx_set_auto_suspend(dev);
+ if (ret < 0)
+ goto out;
+ } else {
+ struct lan78xx_priv *pdata;
- ret = lan78xx_write_reg(dev, WUCSR, 0);
- ret = lan78xx_write_reg(dev, WUCSR2, 0);
- ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL);
+ pdata = (struct lan78xx_priv *)(dev->data[0]);
+ netif_carrier_off(dev->net);
+ ret = lan78xx_set_suspend(dev, pdata->wol);
+ if (ret < 0)
+ goto out;
+ }
+ } else {
+ /* Interface is down; don't allow WOL and PHY
+ * events to wake up the host
+ */
+ u32 buf;
- /* set goodframe wakeup */
- ret = lan78xx_read_reg(dev, WUCSR, &buf);
+ set_bit(EVENT_DEV_ASLEEP, &dev->flags);
- buf |= WUCSR_RFE_WAKE_EN_;
- buf |= WUCSR_STORE_WAKE_;
+ ret = lan78xx_write_reg(dev, WUCSR, 0);
+ if (ret < 0)
+ goto out;
+ ret = lan78xx_write_reg(dev, WUCSR2, 0);
+ if (ret < 0)
+ goto out;
- ret = lan78xx_write_reg(dev, WUCSR, buf);
+ ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+ if (ret < 0)
+ goto out;
+
+ buf &= ~PMT_CTL_RES_CLR_WKP_EN_;
+ buf |= PMT_CTL_RES_CLR_WKP_STS_;
+ buf &= ~PMT_CTL_SUS_MODE_MASK_;
+ buf |= PMT_CTL_SUS_MODE_3_;
+
+ ret = lan78xx_write_reg(dev, PMT_CTL, buf);
+ if (ret < 0)
+ goto out;
- ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+ ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+ if (ret < 0)
+ goto out;
+
+ buf |= PMT_CTL_WUPS_MASK_;
+
+ ret = lan78xx_write_reg(dev, PMT_CTL, buf);
+ if (ret < 0)
+ goto out;
+ }
- buf &= ~PMT_CTL_RES_CLR_WKP_EN_;
- buf |= PMT_CTL_RES_CLR_WKP_STS_;
+ ret = 0;
+out:
+ mutex_unlock(&dev->dev_mutex);
- buf |= PMT_CTL_PHY_WAKE_EN_;
- buf |= PMT_CTL_WOL_EN_;
- buf &= ~PMT_CTL_SUS_MODE_MASK_;
- buf |= PMT_CTL_SUS_MODE_3_;
+ return ret;
+}
- ret = lan78xx_write_reg(dev, PMT_CTL, buf);
+static bool lan78xx_submit_deferred_urbs(struct lan78xx_net *dev)
+{
+ bool pipe_halted = false;
+ struct urb *urb;
- ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+ while ((urb = usb_get_from_anchor(&dev->deferred))) {
+ struct sk_buff *skb = urb->context;
+ int ret;
- buf |= PMT_CTL_WUPS_MASK_;
+ if (!netif_device_present(dev->net) ||
+ !netif_carrier_ok(dev->net) ||
+ pipe_halted) {
+ usb_free_urb(urb);
+ dev_kfree_skb(skb);
+ continue;
+ }
- ret = lan78xx_write_reg(dev, PMT_CTL, buf);
+ ret = usb_submit_urb(urb, GFP_ATOMIC);
- ret = lan78xx_read_reg(dev, MAC_RX, &buf);
- buf |= MAC_RX_RXEN_;
- ret = lan78xx_write_reg(dev, MAC_RX, buf);
+ if (ret == 0) {
+ netif_trans_update(dev->net);
+ lan78xx_queue_skb(&dev->txq, skb, tx_start);
} else {
- lan78xx_set_suspend(dev, pdata->wol);
+ usb_free_urb(urb);
+ dev_kfree_skb(skb);
+
+ if (ret == -EPIPE) {
+ netif_stop_queue(dev->net);
+ pipe_halted = true;
+ } else if (ret == -ENODEV) {
+ netif_device_detach(dev->net);
+ }
}
}
- ret = 0;
-out:
- return ret;
+ return pipe_halted;
}
static int lan78xx_resume(struct usb_interface *intf)
{
struct lan78xx_net *dev = usb_get_intfdata(intf);
- struct sk_buff *skb;
- struct urb *res;
+ bool dev_open;
int ret;
- u32 buf;
- if (!timer_pending(&dev->stat_monitor)) {
- dev->delta = 1;
- mod_timer(&dev->stat_monitor,
- jiffies + STAT_UPDATE_TIMER);
- }
+ mutex_lock(&dev->dev_mutex);
- if (!--dev->suspend_count) {
- /* resume interrupt URBs */
- if (dev->urb_intr && test_bit(EVENT_DEV_OPEN, &dev->flags))
- usb_submit_urb(dev->urb_intr, GFP_NOIO);
+ netif_dbg(dev, ifup, dev->net, "resuming device");
+
+ dev_open = test_bit(EVENT_DEV_OPEN, &dev->flags);
+
+ if (dev_open) {
+ bool pipe_halted = false;
+
+ ret = lan78xx_flush_tx_fifo(dev);
+ if (ret < 0)
+ goto out;
+
+ if (dev->urb_intr) {
+ int ret = usb_submit_urb(dev->urb_intr, GFP_KERNEL);
- spin_lock_irq(&dev->txq.lock);
- while ((res = usb_get_from_anchor(&dev->deferred))) {
- skb = (struct sk_buff *)res->context;
- ret = usb_submit_urb(res, GFP_ATOMIC);
if (ret < 0) {
- dev_kfree_skb_any(skb);
- usb_free_urb(res);
- usb_autopm_put_interface_async(dev->intf);
- } else {
- netif_trans_update(dev->net);
- lan78xx_queue_skb(&dev->txq, skb, tx_start);
+ if (ret == -ENODEV)
+ netif_device_detach(dev->net);
+
+ netdev_warn(dev->net, "Failed to submit intr URB");
}
}
+ spin_lock_irq(&dev->txq.lock);
+
+ if (netif_device_present(dev->net)) {
+ pipe_halted = lan78xx_submit_deferred_urbs(dev);
+
+ if (pipe_halted)
+ lan78xx_defer_kevent(dev, EVENT_TX_HALT);
+ }
+
clear_bit(EVENT_DEV_ASLEEP, &dev->flags);
+
spin_unlock_irq(&dev->txq.lock);
- if (test_bit(EVENT_DEV_OPEN, &dev->flags)) {
- if (!(skb_queue_len(&dev->txq) >= dev->tx_qlen))
- netif_start_queue(dev->net);
- tasklet_schedule(&dev->bh);
+ if (!pipe_halted &&
+ netif_device_present(dev->net) &&
+ (skb_queue_len(&dev->txq) < dev->tx_qlen))
+ netif_start_queue(dev->net);
+
+ ret = lan78xx_start_tx_path(dev);
+ if (ret < 0)
+ goto out;
+
+ tasklet_schedule(&dev->bh);
+
+ if (!timer_pending(&dev->stat_monitor)) {
+ dev->delta = 1;
+ mod_timer(&dev->stat_monitor,
+ jiffies + STAT_UPDATE_TIMER);
}
+
+ } else {
+ clear_bit(EVENT_DEV_ASLEEP, &dev->flags);
}
ret = lan78xx_write_reg(dev, WUCSR2, 0);
+ if (ret < 0)
+ goto out;
ret = lan78xx_write_reg(dev, WUCSR, 0);
+ if (ret < 0)
+ goto out;
ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL);
+ if (ret < 0)
+ goto out;
ret = lan78xx_write_reg(dev, WUCSR2, WUCSR2_NS_RCD_ |
WUCSR2_ARP_RCD_ |
WUCSR2_IPV6_TCPSYN_RCD_ |
WUCSR2_IPV4_TCPSYN_RCD_);
+ if (ret < 0)
+ goto out;
ret = lan78xx_write_reg(dev, WUCSR, WUCSR_EEE_TX_WAKE_ |
WUCSR_EEE_RX_WAKE_ |
@@ -4092,23 +4687,32 @@ static int lan78xx_resume(struct usb_interface *intf)
WUCSR_WUFR_ |
WUCSR_MPR_ |
WUCSR_BCST_FR_);
+ if (ret < 0)
+ goto out;
- ret = lan78xx_read_reg(dev, MAC_TX, &buf);
- buf |= MAC_TX_TXEN_;
- ret = lan78xx_write_reg(dev, MAC_TX, buf);
+ ret = 0;
+out:
+ mutex_unlock(&dev->dev_mutex);
- return 0;
+ return ret;
}
static int lan78xx_reset_resume(struct usb_interface *intf)
{
struct lan78xx_net *dev = usb_get_intfdata(intf);
+ int ret;
- lan78xx_reset(dev);
+ netif_dbg(dev, ifup, dev->net, "(reset) resuming device");
+
+ ret = lan78xx_reset(dev);
+ if (ret < 0)
+ return ret;
phy_start(dev->net->phydev);
- return lan78xx_resume(intf);
+ ret = lan78xx_resume(intf);
+
+ return ret;
}
static const struct usb_device_id products[] = {
diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c
index 2469bdcb1a04..66866bef25df 100644
--- a/drivers/net/usb/mcs7830.c
+++ b/drivers/net/usb/mcs7830.c
@@ -464,7 +464,7 @@ static const struct net_device_ops mcs7830_netdev_ops = {
.ndo_change_mtu = usbnet_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = mcs7830_ioctl,
+ .ndo_eth_ioctl = mcs7830_ioctl,
.ndo_set_rx_mode = mcs7830_set_multicast,
.ndo_set_mac_address = mcs7830_set_mac_address,
};
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 652e9fcf0b77..6a92a3fef75e 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -446,7 +446,7 @@ static int enable_net_traffic(struct net_device *dev, struct usb_device *usb)
write_mii_word(pegasus, 0, 0x1b, &auxmode);
}
- return 0;
+ return ret;
fail:
netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
return ret;
@@ -835,7 +835,7 @@ static int pegasus_open(struct net_device *net)
if (!pegasus->rx_skb)
goto exit;
- res = set_registers(pegasus, EthID, 6, net->dev_addr);
+ set_registers(pegasus, EthID, 6, net->dev_addr);
usb_fill_bulk_urb(pegasus->rx_urb, pegasus->usb,
usb_rcvbulkpipe(pegasus->usb, 1),
@@ -1001,7 +1001,8 @@ static const struct ethtool_ops ops = {
.set_link_ksettings = pegasus_set_link_ksettings,
};
-static int pegasus_ioctl(struct net_device *net, struct ifreq *rq, int cmd)
+static int pegasus_siocdevprivate(struct net_device *net, struct ifreq *rq,
+ void __user *udata, int cmd)
{
__u16 *data = (__u16 *) &rq->ifr_ifru;
pegasus_t *pegasus = netdev_priv(net);
@@ -1269,7 +1270,7 @@ static int pegasus_resume(struct usb_interface *intf)
static const struct net_device_ops pegasus_netdev_ops = {
.ndo_open = pegasus_open,
.ndo_stop = pegasus_close,
- .ndo_do_ioctl = pegasus_ioctl,
+ .ndo_siocdevprivate = pegasus_siocdevprivate,
.ndo_start_xmit = pegasus_start_xmit,
.ndo_set_rx_mode = pegasus_set_multicast,
.ndo_tx_timeout = pegasus_tx_timeout,
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index e09b107b5c99..60ba9b734055 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3955,17 +3955,28 @@ static void rtl_clear_bp(struct r8152 *tp, u16 type)
case RTL_VER_06:
ocp_write_byte(tp, type, PLA_BP_EN, 0);
break;
+ case RTL_VER_14:
+ ocp_write_word(tp, type, USB_BP2_EN, 0);
+
+ ocp_write_word(tp, type, USB_BP_8, 0);
+ ocp_write_word(tp, type, USB_BP_9, 0);
+ ocp_write_word(tp, type, USB_BP_10, 0);
+ ocp_write_word(tp, type, USB_BP_11, 0);
+ ocp_write_word(tp, type, USB_BP_12, 0);
+ ocp_write_word(tp, type, USB_BP_13, 0);
+ ocp_write_word(tp, type, USB_BP_14, 0);
+ ocp_write_word(tp, type, USB_BP_15, 0);
+ break;
case RTL_VER_08:
case RTL_VER_09:
case RTL_VER_10:
case RTL_VER_11:
case RTL_VER_12:
case RTL_VER_13:
- case RTL_VER_14:
case RTL_VER_15:
default:
if (type == MCU_TYPE_USB) {
- ocp_write_byte(tp, MCU_TYPE_USB, USB_BP2_EN, 0);
+ ocp_write_word(tp, MCU_TYPE_USB, USB_BP2_EN, 0);
ocp_write_word(tp, MCU_TYPE_USB, USB_BP_8, 0);
ocp_write_word(tp, MCU_TYPE_USB, USB_BP_9, 0);
@@ -4331,7 +4342,6 @@ static bool rtl8152_is_fw_mac_ok(struct r8152 *tp, struct fw_mac *mac)
case RTL_VER_11:
case RTL_VER_12:
case RTL_VER_13:
- case RTL_VER_14:
case RTL_VER_15:
fw_reg = 0xf800;
bp_ba_addr = PLA_BP_BA;
@@ -4339,6 +4349,13 @@ static bool rtl8152_is_fw_mac_ok(struct r8152 *tp, struct fw_mac *mac)
bp_start = PLA_BP_0;
max_bp = 8;
break;
+ case RTL_VER_14:
+ fw_reg = 0xf800;
+ bp_ba_addr = PLA_BP_BA;
+ bp_en_addr = USB_BP2_EN;
+ bp_start = PLA_BP_0;
+ max_bp = 16;
+ break;
default:
goto out;
}
@@ -8831,7 +8848,9 @@ out:
}
static int rtl8152_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coalesce)
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct r8152 *tp = netdev_priv(netdev);
@@ -8850,7 +8869,9 @@ static int rtl8152_get_coalesce(struct net_device *netdev,
}
static int rtl8152_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coalesce)
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct r8152 *tp = netdev_priv(netdev);
int ret;
@@ -9173,7 +9194,7 @@ static int rtl8152_change_mtu(struct net_device *dev, int new_mtu)
static const struct net_device_ops rtl8152_netdev_ops = {
.ndo_open = rtl8152_open,
.ndo_stop = rtl8152_close,
- .ndo_do_ioctl = rtl8152_ioctl,
+ .ndo_eth_ioctl = rtl8152_ioctl,
.ndo_start_xmit = rtl8152_start_xmit,
.ndo_tx_timeout = rtl8152_tx_timeout,
.ndo_set_features = rtl8152_set_features,
diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c
index 7656f2a3afd9..4a1b0e0fc3a3 100644
--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c
@@ -822,7 +822,8 @@ static const struct ethtool_ops ops = {
.get_link_ksettings = rtl8150_get_link_ksettings,
};
-static int rtl8150_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
+static int rtl8150_siocdevprivate(struct net_device *netdev, struct ifreq *rq,
+ void __user *udata, int cmd)
{
rtl8150_t *dev = netdev_priv(netdev);
u16 *data = (u16 *) & rq->ifr_ifru;
@@ -850,7 +851,7 @@ static int rtl8150_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
static const struct net_device_ops rtl8150_netdev_ops = {
.ndo_open = rtl8150_open,
.ndo_stop = rtl8150_close,
- .ndo_do_ioctl = rtl8150_ioctl,
+ .ndo_siocdevprivate = rtl8150_siocdevprivate,
.ndo_start_xmit = rtl8150_start_xmit,
.ndo_tx_timeout = rtl8150_tx_timeout,
.ndo_set_rx_mode = rtl8150_set_multicast,
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 13141dbfa3a8..76f7af161313 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -1439,7 +1439,7 @@ static const struct net_device_ops smsc75xx_netdev_ops = {
.ndo_change_mtu = smsc75xx_change_mtu,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = smsc75xx_ioctl,
+ .ndo_eth_ioctl = smsc75xx_ioctl,
.ndo_set_rx_mode = smsc75xx_set_multicast,
.ndo_set_features = smsc75xx_set_features,
};
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 4c8ee1cff4d4..7d953974eb9b 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -1044,7 +1044,7 @@ static const struct net_device_ops smsc95xx_netdev_ops = {
.ndo_get_stats64 = dev_get_tstats64,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = smsc95xx_ioctl,
+ .ndo_eth_ioctl = smsc95xx_ioctl,
.ndo_set_rx_mode = smsc95xx_set_multicast,
.ndo_set_features = smsc95xx_set_features,
};
diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c
index ce29261263cd..6516a37893e2 100644
--- a/drivers/net/usb/sr9700.c
+++ b/drivers/net/usb/sr9700.c
@@ -310,7 +310,7 @@ static const struct net_device_ops sr9700_netdev_ops = {
.ndo_change_mtu = usbnet_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = sr9700_ioctl,
+ .ndo_eth_ioctl = sr9700_ioctl,
.ndo_set_rx_mode = sr9700_set_multicast,
.ndo_set_mac_address = sr9700_set_mac_address,
};
diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c
index a822d81310d5..576401c8b1be 100644
--- a/drivers/net/usb/sr9800.c
+++ b/drivers/net/usb/sr9800.c
@@ -684,7 +684,7 @@ static const struct net_device_ops sr9800_netdev_ops = {
.ndo_get_stats64 = dev_get_tstats64,
.ndo_set_mac_address = sr_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = sr_ioctl,
+ .ndo_eth_ioctl = sr_ioctl,
.ndo_set_rx_mode = sr_set_multicast,
};
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 470e1c1e6353..840c1c2ab16a 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1725,7 +1725,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
dev->interrupt_count = 0;
dev->net = net;
- strcpy (net->name, "usb%d");
+ strscpy(net->name, "usb%d", sizeof(net->name));
memcpy (net->dev_addr, node_id, sizeof node_id);
/* rx and tx sides can use different message sizes;
@@ -1752,13 +1752,13 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
if ((dev->driver_info->flags & FLAG_ETHER) != 0 &&
((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 ||
(net->dev_addr [0] & 0x02) == 0))
- strcpy (net->name, "eth%d");
+ strscpy(net->name, "eth%d", sizeof(net->name));
/* WLAN devices should always be named "wlan%d" */
if ((dev->driver_info->flags & FLAG_WLAN) != 0)
- strcpy(net->name, "wlan%d");
+ strscpy(net->name, "wlan%d", sizeof(net->name));
/* WWAN devices should always be named "wwan%d" */
if ((dev->driver_info->flags & FLAG_WWAN) != 0)
- strcpy(net->name, "wwan%d");
+ strscpy(net->name, "wwan%d", sizeof(net->name));
/* devices that cannot do ARP */
if ((dev->driver_info->flags & FLAG_NOARP) != 0)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index bdb7ce3cb054..50eb43e5bf45 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -224,12 +224,13 @@ static void veth_get_channels(struct net_device *dev,
{
channels->tx_count = dev->real_num_tx_queues;
channels->rx_count = dev->real_num_rx_queues;
- channels->max_tx = dev->real_num_tx_queues;
- channels->max_rx = dev->real_num_rx_queues;
- channels->combined_count = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
- channels->max_combined = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
+ channels->max_tx = dev->num_tx_queues;
+ channels->max_rx = dev->num_rx_queues;
}
+static int veth_set_channels(struct net_device *dev,
+ struct ethtool_channels *ch);
+
static const struct ethtool_ops veth_ethtool_ops = {
.get_drvinfo = veth_get_drvinfo,
.get_link = ethtool_op_get_link,
@@ -239,6 +240,7 @@ static const struct ethtool_ops veth_ethtool_ops = {
.get_link_ksettings = veth_get_link_ksettings,
.get_ts_info = ethtool_op_get_ts_info,
.get_channels = veth_get_channels,
+ .set_channels = veth_set_channels,
};
/* general routines */
@@ -711,7 +713,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
int mac_len, delta, off;
struct xdp_buff xdp;
- skb_orphan_partial(skb);
+ skb_prepare_for_gro(skb);
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
@@ -928,12 +930,12 @@ static int veth_poll(struct napi_struct *napi, int budget)
return done;
}
-static int __veth_napi_enable(struct net_device *dev)
+static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
{
struct veth_priv *priv = netdev_priv(dev);
int err, i;
- for (i = 0; i < dev->real_num_rx_queues; i++) {
+ for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL);
@@ -941,7 +943,7 @@ static int __veth_napi_enable(struct net_device *dev)
goto err_xdp_ring;
}
- for (i = 0; i < dev->real_num_rx_queues; i++) {
+ for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
napi_enable(&rq->xdp_napi);
@@ -949,19 +951,25 @@ static int __veth_napi_enable(struct net_device *dev)
}
return 0;
+
err_xdp_ring:
- for (i--; i >= 0; i--)
+ for (i--; i >= start; i--)
ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
return err;
}
-static void veth_napi_del(struct net_device *dev)
+static int __veth_napi_enable(struct net_device *dev)
+{
+ return __veth_napi_enable_range(dev, 0, dev->real_num_rx_queues);
+}
+
+static void veth_napi_del_range(struct net_device *dev, int start, int end)
{
struct veth_priv *priv = netdev_priv(dev);
int i;
- for (i = 0; i < dev->real_num_rx_queues; i++) {
+ for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
rcu_assign_pointer(priv->rq[i].napi, NULL);
@@ -970,7 +978,7 @@ static void veth_napi_del(struct net_device *dev)
}
synchronize_net();
- for (i = 0; i < dev->real_num_rx_queues; i++) {
+ for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
rq->rx_notify_masked = false;
@@ -978,41 +986,90 @@ static void veth_napi_del(struct net_device *dev)
}
}
+static void veth_napi_del(struct net_device *dev)
+{
+ veth_napi_del_range(dev, 0, dev->real_num_rx_queues);
+}
+
static bool veth_gro_requested(const struct net_device *dev)
{
return !!(dev->wanted_features & NETIF_F_GRO);
}
-static int veth_enable_xdp(struct net_device *dev)
+static int veth_enable_xdp_range(struct net_device *dev, int start, int end,
+ bool napi_already_on)
{
- bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP);
struct veth_priv *priv = netdev_priv(dev);
int err, i;
- if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) {
- for (i = 0; i < dev->real_num_rx_queues; i++) {
- struct veth_rq *rq = &priv->rq[i];
+ for (i = start; i < end; i++) {
+ struct veth_rq *rq = &priv->rq[i];
- if (!napi_already_on)
- netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
- err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id);
- if (err < 0)
- goto err_rxq_reg;
+ if (!napi_already_on)
+ netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
+ err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id);
+ if (err < 0)
+ goto err_rxq_reg;
- err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
- MEM_TYPE_PAGE_SHARED,
- NULL);
- if (err < 0)
- goto err_reg_mem;
+ err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED,
+ NULL);
+ if (err < 0)
+ goto err_reg_mem;
- /* Save original mem info as it can be overwritten */
- rq->xdp_mem = rq->xdp_rxq.mem;
- }
+ /* Save original mem info as it can be overwritten */
+ rq->xdp_mem = rq->xdp_rxq.mem;
+ }
+ return 0;
+
+err_reg_mem:
+ xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
+err_rxq_reg:
+ for (i--; i >= start; i--) {
+ struct veth_rq *rq = &priv->rq[i];
+
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
+ if (!napi_already_on)
+ netif_napi_del(&rq->xdp_napi);
+ }
+
+ return err;
+}
+
+static void veth_disable_xdp_range(struct net_device *dev, int start, int end,
+ bool delete_napi)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ int i;
+
+ for (i = start; i < end; i++) {
+ struct veth_rq *rq = &priv->rq[i];
+
+ rq->xdp_rxq.mem = rq->xdp_mem;
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
+
+ if (delete_napi)
+ netif_napi_del(&rq->xdp_napi);
+ }
+}
+
+static int veth_enable_xdp(struct net_device *dev)
+{
+ bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP);
+ struct veth_priv *priv = netdev_priv(dev);
+ int err, i;
+
+ if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) {
+ err = veth_enable_xdp_range(dev, 0, dev->real_num_rx_queues, napi_already_on);
+ if (err)
+ return err;
if (!napi_already_on) {
err = __veth_napi_enable(dev);
- if (err)
- goto err_rxq_reg;
+ if (err) {
+ veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true);
+ return err;
+ }
if (!veth_gro_requested(dev)) {
/* user-space did not require GRO, but adding XDP
@@ -1030,18 +1087,6 @@ static int veth_enable_xdp(struct net_device *dev)
}
return 0;
-err_reg_mem:
- xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
-err_rxq_reg:
- for (i--; i >= 0; i--) {
- struct veth_rq *rq = &priv->rq[i];
-
- xdp_rxq_info_unreg(&rq->xdp_rxq);
- if (!napi_already_on)
- netif_napi_del(&rq->xdp_napi);
- }
-
- return err;
}
static void veth_disable_xdp(struct net_device *dev)
@@ -1064,28 +1109,23 @@ static void veth_disable_xdp(struct net_device *dev)
}
}
- for (i = 0; i < dev->real_num_rx_queues; i++) {
- struct veth_rq *rq = &priv->rq[i];
-
- rq->xdp_rxq.mem = rq->xdp_mem;
- xdp_rxq_info_unreg(&rq->xdp_rxq);
- }
+ veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false);
}
-static int veth_napi_enable(struct net_device *dev)
+static int veth_napi_enable_range(struct net_device *dev, int start, int end)
{
struct veth_priv *priv = netdev_priv(dev);
int err, i;
- for (i = 0; i < dev->real_num_rx_queues; i++) {
+ for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
}
- err = __veth_napi_enable(dev);
+ err = __veth_napi_enable_range(dev, start, end);
if (err) {
- for (i = 0; i < dev->real_num_rx_queues; i++) {
+ for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
netif_napi_del(&rq->xdp_napi);
@@ -1095,6 +1135,128 @@ static int veth_napi_enable(struct net_device *dev)
return err;
}
+static int veth_napi_enable(struct net_device *dev)
+{
+ return veth_napi_enable_range(dev, 0, dev->real_num_rx_queues);
+}
+
+static void veth_disable_range_safe(struct net_device *dev, int start, int end)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+
+ if (start >= end)
+ return;
+
+ if (priv->_xdp_prog) {
+ veth_napi_del_range(dev, start, end);
+ veth_disable_xdp_range(dev, start, end, false);
+ } else if (veth_gro_requested(dev)) {
+ veth_napi_del_range(dev, start, end);
+ }
+}
+
+static int veth_enable_range_safe(struct net_device *dev, int start, int end)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ int err;
+
+ if (start >= end)
+ return 0;
+
+ if (priv->_xdp_prog) {
+ /* these channels are freshly initialized, napi is not on there even
+ * when GRO is requeste
+ */
+ err = veth_enable_xdp_range(dev, start, end, false);
+ if (err)
+ return err;
+
+ err = __veth_napi_enable_range(dev, start, end);
+ if (err) {
+ /* on error always delete the newly added napis */
+ veth_disable_xdp_range(dev, start, end, true);
+ return err;
+ }
+ } else if (veth_gro_requested(dev)) {
+ return veth_napi_enable_range(dev, start, end);
+ }
+ return 0;
+}
+
+static int veth_set_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ unsigned int old_rx_count, new_rx_count;
+ struct veth_priv *peer_priv;
+ struct net_device *peer;
+ int err;
+
+ /* sanity check. Upper bounds are already enforced by the caller */
+ if (!ch->rx_count || !ch->tx_count)
+ return -EINVAL;
+
+ /* avoid braking XDP, if that is enabled */
+ peer = rtnl_dereference(priv->peer);
+ peer_priv = peer ? netdev_priv(peer) : NULL;
+ if (priv->_xdp_prog && peer && ch->rx_count < peer->real_num_tx_queues)
+ return -EINVAL;
+
+ if (peer && peer_priv && peer_priv->_xdp_prog && ch->tx_count > peer->real_num_rx_queues)
+ return -EINVAL;
+
+ old_rx_count = dev->real_num_rx_queues;
+ new_rx_count = ch->rx_count;
+ if (netif_running(dev)) {
+ /* turn device off */
+ netif_carrier_off(dev);
+ if (peer)
+ netif_carrier_off(peer);
+
+ /* try to allocate new resurces, as needed*/
+ err = veth_enable_range_safe(dev, old_rx_count, new_rx_count);
+ if (err)
+ goto out;
+ }
+
+ err = netif_set_real_num_rx_queues(dev, ch->rx_count);
+ if (err)
+ goto revert;
+
+ err = netif_set_real_num_tx_queues(dev, ch->tx_count);
+ if (err) {
+ int err2 = netif_set_real_num_rx_queues(dev, old_rx_count);
+
+ /* this error condition could happen only if rx and tx change
+ * in opposite directions (e.g. tx nr raises, rx nr decreases)
+ * and we can't do anything to fully restore the original
+ * status
+ */
+ if (err2)
+ pr_warn("Can't restore rx queues config %d -> %d %d",
+ new_rx_count, old_rx_count, err2);
+ else
+ goto revert;
+ }
+
+out:
+ if (netif_running(dev)) {
+ /* note that we need to swap the arguments WRT the enable part
+ * to identify the range we have to disable
+ */
+ veth_disable_range_safe(dev, new_rx_count, old_rx_count);
+ netif_carrier_on(dev);
+ if (peer)
+ netif_carrier_on(peer);
+ }
+ return err;
+
+revert:
+ new_rx_count = old_rx_count;
+ old_rx_count = ch->rx_count;
+ goto out;
+}
+
static int veth_open(struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
@@ -1447,6 +1609,23 @@ static void veth_disable_gro(struct net_device *dev)
netdev_update_features(dev);
}
+static int veth_init_queues(struct net_device *dev, struct nlattr *tb[])
+{
+ int err;
+
+ if (!tb[IFLA_NUM_TX_QUEUES] && dev->num_tx_queues > 1) {
+ err = netif_set_real_num_tx_queues(dev, 1);
+ if (err)
+ return err;
+ }
+ if (!tb[IFLA_NUM_RX_QUEUES] && dev->num_rx_queues > 1) {
+ err = netif_set_real_num_rx_queues(dev, 1);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
static int veth_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
@@ -1556,13 +1735,21 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
priv = netdev_priv(dev);
rcu_assign_pointer(priv->peer, peer);
+ err = veth_init_queues(dev, tb);
+ if (err)
+ goto err_queues;
priv = netdev_priv(peer);
rcu_assign_pointer(priv->peer, dev);
+ err = veth_init_queues(peer, tb);
+ if (err)
+ goto err_queues;
veth_disable_gro(dev);
return 0;
+err_queues:
+ unregister_netdevice(dev);
err_register_dev:
/* nothing to do */
err_configure_peer:
@@ -1608,6 +1795,16 @@ static struct net *veth_get_link_net(const struct net_device *dev)
return peer ? dev_net(peer) : dev_net(dev);
}
+static unsigned int veth_get_num_queues(void)
+{
+ /* enforce the same queue limit as rtnl_create_link */
+ int queues = num_possible_cpus();
+
+ if (queues > 4096)
+ queues = 4096;
+ return queues;
+}
+
static struct rtnl_link_ops veth_link_ops = {
.kind = DRV_NAME,
.priv_size = sizeof(struct veth_priv),
@@ -1618,6 +1815,8 @@ static struct rtnl_link_ops veth_link_ops = {
.policy = veth_policy,
.maxtype = VETH_INFO_MAX,
.get_link_net = veth_get_link_net,
+ .get_num_tx_queues = veth_get_num_queues,
+ .get_num_rx_queues = veth_get_num_queues,
};
/*
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 56c3f8519093..271d38c1d9f8 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -63,7 +63,7 @@ static const unsigned long guest_offloads[] = {
VIRTIO_NET_F_GUEST_CSUM
};
-#define GUEST_OFFLOAD_LRO_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
+#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
(1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
(1ULL << VIRTIO_NET_F_GUEST_ECN) | \
(1ULL << VIRTIO_NET_F_GUEST_UFO))
@@ -380,7 +380,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
struct page *page, unsigned int offset,
unsigned int len, unsigned int truesize,
bool hdr_valid, unsigned int metasize,
- bool whole_page)
+ unsigned int headroom)
{
struct sk_buff *skb;
struct virtio_net_hdr_mrg_rxbuf *hdr;
@@ -398,28 +398,16 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
else
hdr_padded_len = sizeof(struct padded_vnet_hdr);
- /* If whole_page, there is an offset between the beginning of the
+ /* If headroom is not 0, there is an offset between the beginning of the
* data and the allocated space, otherwise the data and the allocated
* space are aligned.
*
* Buffers with headroom use PAGE_SIZE as alloc size, see
* add_recvbuf_mergeable() + get_mergeable_buf_len()
*/
- if (whole_page) {
- /* Buffers with whole_page use PAGE_SIZE as alloc size,
- * see add_recvbuf_mergeable() + get_mergeable_buf_len()
- */
- truesize = PAGE_SIZE;
-
- /* page maybe head page, so we should get the buf by p, not the
- * page
- */
- tailroom = truesize - len - offset_in_page(p);
- buf = (char *)((unsigned long)p & PAGE_MASK);
- } else {
- tailroom = truesize - len;
- buf = p;
- }
+ truesize = headroom ? PAGE_SIZE : truesize;
+ tailroom = truesize - len - headroom;
+ buf = p - headroom;
len -= hdr_len;
offset += hdr_padded_len;
@@ -540,19 +528,20 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
* functions to perfectly solve these three problems at the same time.
*/
#define virtnet_xdp_get_sq(vi) ({ \
+ int cpu = smp_processor_id(); \
struct netdev_queue *txq; \
typeof(vi) v = (vi); \
unsigned int qp; \
\
if (v->curr_queue_pairs > nr_cpu_ids) { \
qp = v->curr_queue_pairs - v->xdp_queue_pairs; \
- qp += smp_processor_id(); \
+ qp += cpu; \
txq = netdev_get_tx_queue(v->dev, qp); \
__netif_tx_acquire(txq); \
} else { \
- qp = smp_processor_id() % v->curr_queue_pairs; \
+ qp = cpu % v->curr_queue_pairs; \
txq = netdev_get_tx_queue(v->dev, qp); \
- __netif_tx_lock(txq, raw_smp_processor_id()); \
+ __netif_tx_lock(txq, cpu); \
} \
v->sq + qp; \
})
@@ -978,7 +967,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
put_page(page);
head_skb = page_to_skb(vi, rq, xdp_page, offset,
len, PAGE_SIZE, false,
- metasize, true);
+ metasize,
+ VIRTIO_XDP_HEADROOM);
return head_skb;
}
break;
@@ -1029,7 +1019,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
rcu_read_unlock();
head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
- metasize, !!headroom);
+ metasize, headroom);
curr_skb = head_skb;
if (unlikely(!curr_skb))
@@ -2208,14 +2198,14 @@ static int virtnet_set_channels(struct net_device *dev,
if (vi->rq[0].xdp_prog)
return -EINVAL;
- get_online_cpus();
+ cpus_read_lock();
err = _virtnet_set_queues(vi, queue_pairs);
if (err) {
- put_online_cpus();
+ cpus_read_unlock();
goto err;
}
virtnet_set_affinity(vi);
- put_online_cpus();
+ cpus_read_unlock();
netif_set_real_num_tx_queues(dev, queue_pairs);
netif_set_real_num_rx_queues(dev, queue_pairs);
@@ -2331,7 +2321,9 @@ static int virtnet_get_link_ksettings(struct net_device *dev,
}
static int virtnet_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct virtnet_info *vi = netdev_priv(dev);
int i, napi_weight;
@@ -2352,7 +2344,9 @@ static int virtnet_set_coalesce(struct net_device *dev,
}
static int virtnet_get_coalesce(struct net_device *dev,
- struct ethtool_coalesce *ec)
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct ethtool_coalesce ec_default = {
.cmd = ETHTOOL_GCOALESCE,
@@ -2515,7 +2509,7 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
- NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first");
+ NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
return -EOPNOTSUPP;
}
@@ -2646,15 +2640,15 @@ static int virtnet_set_features(struct net_device *dev,
u64 offloads;
int err;
- if ((dev->features ^ features) & NETIF_F_LRO) {
+ if ((dev->features ^ features) & NETIF_F_GRO_HW) {
if (vi->xdp_enabled)
return -EBUSY;
- if (features & NETIF_F_LRO)
+ if (features & NETIF_F_GRO_HW)
offloads = vi->guest_offloads_capable;
else
offloads = vi->guest_offloads_capable &
- ~GUEST_OFFLOAD_LRO_MASK;
+ ~GUEST_OFFLOAD_GRO_HW_MASK;
err = virtnet_set_guest_offloads(vi, offloads);
if (err)
@@ -2970,9 +2964,9 @@ static int init_vqs(struct virtnet_info *vi)
if (ret)
goto err_free;
- get_online_cpus();
+ cpus_read_lock();
virtnet_set_affinity(vi);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
@@ -3134,9 +3128,9 @@ static int virtnet_probe(struct virtio_device *vdev)
dev->features |= NETIF_F_RXCSUM;
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
- dev->features |= NETIF_F_LRO;
+ dev->features |= NETIF_F_GRO_HW;
if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
- dev->hw_features |= NETIF_F_LRO;
+ dev->hw_features |= NETIF_F_GRO_HW;
dev->vlan_features = dev->features;
diff --git a/drivers/net/vmxnet3/Makefile b/drivers/net/vmxnet3/Makefile
index c5a167a1c85c..7a38925f4165 100644
--- a/drivers/net/vmxnet3/Makefile
+++ b/drivers/net/vmxnet3/Makefile
@@ -2,7 +2,7 @@
#
# Linux driver for VMware's vmxnet3 ethernet NIC.
#
-# Copyright (C) 2007-2020, VMware, Inc. All Rights Reserved.
+# Copyright (C) 2007-2021, VMware, Inc. All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
diff --git a/drivers/net/vmxnet3/upt1_defs.h b/drivers/net/vmxnet3/upt1_defs.h
index 8c014c98471c..f9f3a23d1698 100644
--- a/drivers/net/vmxnet3/upt1_defs.h
+++ b/drivers/net/vmxnet3/upt1_defs.h
@@ -1,7 +1,7 @@
/*
* Linux driver for VMware's vmxnet3 ethernet NIC.
*
- * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h
index a8d5ebd47c71..74d4e8bc4abc 100644
--- a/drivers/net/vmxnet3/vmxnet3_defs.h
+++ b/drivers/net/vmxnet3/vmxnet3_defs.h
@@ -1,7 +1,7 @@
/*
* Linux driver for VMware's vmxnet3 ethernet NIC.
*
- * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -98,6 +98,9 @@ enum {
VMXNET3_CMD_GET_TXDATA_DESC_SIZE,
VMXNET3_CMD_GET_COALESCE,
VMXNET3_CMD_GET_RSS_FIELDS,
+ VMXNET3_CMD_GET_RESERVED2,
+ VMXNET3_CMD_GET_RESERVED3,
+ VMXNET3_CMD_GET_MAX_QUEUES_CONF,
};
/*
@@ -341,13 +344,15 @@ struct Vmxnet3_RxCompDescExt {
#define VMXNET3_TXD_EOP_SIZE 1
/* value of RxCompDesc.rssType */
-enum {
- VMXNET3_RCD_RSS_TYPE_NONE = 0,
- VMXNET3_RCD_RSS_TYPE_IPV4 = 1,
- VMXNET3_RCD_RSS_TYPE_TCPIPV4 = 2,
- VMXNET3_RCD_RSS_TYPE_IPV6 = 3,
- VMXNET3_RCD_RSS_TYPE_TCPIPV6 = 4,
-};
+#define VMXNET3_RCD_RSS_TYPE_NONE 0
+#define VMXNET3_RCD_RSS_TYPE_IPV4 1
+#define VMXNET3_RCD_RSS_TYPE_TCPIPV4 2
+#define VMXNET3_RCD_RSS_TYPE_IPV6 3
+#define VMXNET3_RCD_RSS_TYPE_TCPIPV6 4
+#define VMXNET3_RCD_RSS_TYPE_UDPIPV4 5
+#define VMXNET3_RCD_RSS_TYPE_UDPIPV6 6
+#define VMXNET3_RCD_RSS_TYPE_ESPIPV4 7
+#define VMXNET3_RCD_RSS_TYPE_ESPIPV6 8
/* a union for accessing all cmd/completion descriptors */
@@ -533,6 +538,13 @@ enum vmxnet3_intr_type {
/* addition 1 for events */
#define VMXNET3_MAX_INTRS 25
+/* Version 6 and later will use below macros */
+#define VMXNET3_EXT_MAX_TX_QUEUES 32
+#define VMXNET3_EXT_MAX_RX_QUEUES 32
+/* addition 1 for events */
+#define VMXNET3_EXT_MAX_INTRS 65
+#define VMXNET3_FIRST_SET_INTRS 64
+
/* value of intrCtrl */
#define VMXNET3_IC_DISABLE_ALL 0x1 /* bit 0 */
@@ -547,6 +559,19 @@ struct Vmxnet3_IntrConf {
__le32 reserved[2];
};
+struct Vmxnet3_IntrConfExt {
+ u8 autoMask;
+ u8 numIntrs; /* # of interrupts */
+ u8 eventIntrIdx;
+ u8 reserved;
+ __le32 intrCtrl;
+ __le32 reserved1;
+ u8 modLevels[VMXNET3_EXT_MAX_INTRS]; /* moderation level for
+ * each intr
+ */
+ u8 reserved2[3];
+};
+
/* one bit per VLAN ID, the size is in the units of u32 */
#define VMXNET3_VFT_SIZE (4096 / (sizeof(u32) * 8))
@@ -719,11 +744,16 @@ struct Vmxnet3_DSDevRead {
struct Vmxnet3_VariableLenConfDesc pluginConfDesc;
};
+struct Vmxnet3_DSDevReadExt {
+ /* read-only region for device, read by dev in response to a SET cmd */
+ struct Vmxnet3_IntrConfExt intrConfExt;
+};
+
/* All structures in DriverShared are padded to multiples of 8 bytes */
struct Vmxnet3_DriverShared {
__le32 magic;
/* make devRead start at 64bit boundaries */
- __le32 pad;
+ __le32 size; /* size of DriverShared */
struct Vmxnet3_DSDevRead devRead;
__le32 ecr;
__le32 reserved;
@@ -734,6 +764,7 @@ struct Vmxnet3_DriverShared {
* command
*/
} cu;
+ struct Vmxnet3_DSDevReadExt devReadExt;
};
@@ -764,6 +795,7 @@ struct Vmxnet3_DriverShared {
((vfTable[vid >> 5] & (1 << (vid & 31))) != 0)
#define VMXNET3_MAX_MTU 9000
+#define VMXNET3_V6_MAX_MTU 9190
#define VMXNET3_MIN_MTU 60
#define VMXNET3_LINK_UP (10000 << 16 | 1) /* 10 Gbps, up */
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 6e87f1fc4874..142f70670f5c 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1,7 +1,7 @@
/*
* Linux driver for VMware's vmxnet3 ethernet NIC.
*
- * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -314,10 +314,10 @@ vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
{
if (tbi->map_type == VMXNET3_MAP_SINGLE)
dma_unmap_single(&pdev->dev, tbi->dma_addr, tbi->len,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
else if (tbi->map_type == VMXNET3_MAP_PAGE)
dma_unmap_page(&pdev->dev, tbi->dma_addr, tbi->len,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
else
BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
@@ -585,7 +585,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
rbi->dma_addr = dma_map_single(
&adapter->pdev->dev,
rbi->skb->data, rbi->len,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
if (dma_mapping_error(&adapter->pdev->dev,
rbi->dma_addr)) {
dev_kfree_skb_any(rbi->skb);
@@ -609,7 +609,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
rbi->dma_addr = dma_map_page(
&adapter->pdev->dev,
rbi->page, 0, PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
if (dma_mapping_error(&adapter->pdev->dev,
rbi->dma_addr)) {
put_page(rbi->page);
@@ -723,7 +723,7 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
tbi->map_type = VMXNET3_MAP_SINGLE;
tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
skb->data + buf_offset, buf_size,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
return -EFAULT;
@@ -1449,7 +1449,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
new_dma_addr =
dma_map_single(&adapter->pdev->dev,
new_skb->data, rbi->len,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
if (dma_mapping_error(&adapter->pdev->dev,
new_dma_addr)) {
dev_kfree_skb(new_skb);
@@ -1467,7 +1467,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
dma_unmap_single(&adapter->pdev->dev,
rbi->dma_addr,
rbi->len,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
/* Immediate refill */
rbi->skb = new_skb;
@@ -1478,10 +1478,28 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
#ifdef VMXNET3_RSS
if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
- (adapter->netdev->features & NETIF_F_RXHASH))
+ (adapter->netdev->features & NETIF_F_RXHASH)) {
+ enum pkt_hash_types hash_type;
+
+ switch (rcd->rssType) {
+ case VMXNET3_RCD_RSS_TYPE_IPV4:
+ case VMXNET3_RCD_RSS_TYPE_IPV6:
+ hash_type = PKT_HASH_TYPE_L3;
+ break;
+ case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
+ case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
+ case VMXNET3_RCD_RSS_TYPE_UDPIPV4:
+ case VMXNET3_RCD_RSS_TYPE_UDPIPV6:
+ hash_type = PKT_HASH_TYPE_L4;
+ break;
+ default:
+ hash_type = PKT_HASH_TYPE_L3;
+ break;
+ }
skb_set_hash(ctx->skb,
le32_to_cpu(rcd->rssHash),
- PKT_HASH_TYPE_L3);
+ hash_type);
+ }
#endif
skb_put(ctx->skb, rcd->len);
@@ -1528,7 +1546,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
new_dma_addr = dma_map_page(&adapter->pdev->dev,
new_page,
0, PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
if (dma_mapping_error(&adapter->pdev->dev,
new_dma_addr)) {
put_page(new_page);
@@ -1541,7 +1559,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
dma_unmap_page(&adapter->pdev->dev,
rbi->dma_addr, rbi->len,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
vmxnet3_append_frag(ctx->skb, rcd, rbi);
@@ -1659,13 +1677,13 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
rq->buf_info[ring_idx][i].skb) {
dma_unmap_single(&adapter->pdev->dev, rxd->addr,
- rxd->len, PCI_DMA_FROMDEVICE);
+ rxd->len, DMA_FROM_DEVICE);
dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
rq->buf_info[ring_idx][i].skb = NULL;
} else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
rq->buf_info[ring_idx][i].page) {
dma_unmap_page(&adapter->pdev->dev, rxd->addr,
- rxd->len, PCI_DMA_FROMDEVICE);
+ rxd->len, DMA_FROM_DEVICE);
put_page(rq->buf_info[ring_idx][i].page);
rq->buf_info[ring_idx][i].page = NULL;
}
@@ -2401,7 +2419,7 @@ vmxnet3_set_mc(struct net_device *netdev)
&adapter->pdev->dev,
new_table,
sz,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
if (!dma_mapping_error(&adapter->pdev->dev,
new_table_pa)) {
new_mode |= VMXNET3_RXM_MCAST;
@@ -2437,7 +2455,7 @@ vmxnet3_set_mc(struct net_device *netdev)
if (new_table_pa_valid)
dma_unmap_single(&adapter->pdev->dev, new_table_pa,
- rxConf->mfTableLen, PCI_DMA_TODEVICE);
+ rxConf->mfTableLen, DMA_TO_DEVICE);
kfree(new_table);
}
@@ -2460,6 +2478,7 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
{
struct Vmxnet3_DriverShared *shared = adapter->shared;
struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
+ struct Vmxnet3_DSDevReadExt *devReadExt = &shared->devReadExt;
struct Vmxnet3_TxQueueConf *tqc;
struct Vmxnet3_RxQueueConf *rqc;
int i;
@@ -2572,14 +2591,26 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
#endif /* VMXNET3_RSS */
/* intr settings */
- devRead->intrConf.autoMask = adapter->intr.mask_mode ==
- VMXNET3_IMM_AUTO;
- devRead->intrConf.numIntrs = adapter->intr.num_intrs;
- for (i = 0; i < adapter->intr.num_intrs; i++)
- devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
+ if (!VMXNET3_VERSION_GE_6(adapter) ||
+ !adapter->queuesExtEnabled) {
+ devRead->intrConf.autoMask = adapter->intr.mask_mode ==
+ VMXNET3_IMM_AUTO;
+ devRead->intrConf.numIntrs = adapter->intr.num_intrs;
+ for (i = 0; i < adapter->intr.num_intrs; i++)
+ devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
+
+ devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
+ devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
+ } else {
+ devReadExt->intrConfExt.autoMask = adapter->intr.mask_mode ==
+ VMXNET3_IMM_AUTO;
+ devReadExt->intrConfExt.numIntrs = adapter->intr.num_intrs;
+ for (i = 0; i < adapter->intr.num_intrs; i++)
+ devReadExt->intrConfExt.modLevels[i] = adapter->intr.mod_levels[i];
- devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
- devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
+ devReadExt->intrConfExt.eventIntrIdx = adapter->intr.event_intr_idx;
+ devReadExt->intrConfExt.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
+ }
/* rx filter settings */
devRead->rxFilterConf.rxMode = 0;
@@ -2717,6 +2748,7 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
* tx queue if the link is up.
*/
vmxnet3_check_link(adapter, true);
+ netif_tx_wake_all_queues(adapter->netdev);
for (i = 0; i < adapter->num_rx_queues; i++)
napi_enable(&adapter->rx_queue[i].napi);
vmxnet3_enable_all_intrs(adapter);
@@ -3372,6 +3404,8 @@ vmxnet3_probe_device(struct pci_dev *pdev,
int size;
int num_tx_queues;
int num_rx_queues;
+ int queues;
+ unsigned long flags;
if (!pci_msi_enabled())
enable_mq = 0;
@@ -3383,7 +3417,6 @@ vmxnet3_probe_device(struct pci_dev *pdev,
else
#endif
num_rx_queues = 1;
- num_rx_queues = rounddown_pow_of_two(num_rx_queues);
if (enable_mq)
num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
@@ -3391,13 +3424,8 @@ vmxnet3_probe_device(struct pci_dev *pdev,
else
num_tx_queues = 1;
- num_tx_queues = rounddown_pow_of_two(num_tx_queues);
netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
max(num_tx_queues, num_rx_queues));
- dev_info(&pdev->dev,
- "# of Tx queues : %d, # of Rx queues : %d\n",
- num_tx_queues, num_rx_queues);
-
if (!netdev)
return -ENOMEM;
@@ -3410,19 +3438,12 @@ vmxnet3_probe_device(struct pci_dev *pdev,
adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE;
adapter->rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE;
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
- if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
- dev_err(&pdev->dev,
- "pci_set_consistent_dma_mask failed\n");
- err = -EIO;
- goto err_set_mask;
- }
+ if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) == 0) {
dma64 = true;
} else {
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
- dev_err(&pdev->dev,
- "pci_set_dma_mask failed\n");
- err = -EIO;
+ err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pdev->dev, "dma_set_mask failed\n");
goto err_set_mask;
}
dma64 = false;
@@ -3431,7 +3452,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
spin_lock_init(&adapter->cmd_lock);
adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
sizeof(struct vmxnet3_adapter),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) {
dev_err(&pdev->dev, "Failed to map dma\n");
err = -EFAULT;
@@ -3447,51 +3468,22 @@ vmxnet3_probe_device(struct pci_dev *pdev,
goto err_alloc_shared;
}
- adapter->num_rx_queues = num_rx_queues;
- adapter->num_tx_queues = num_tx_queues;
- adapter->rx_buf_per_pkt = 1;
-
- size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
- size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
- adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size,
- &adapter->queue_desc_pa,
- GFP_KERNEL);
-
- if (!adapter->tqd_start) {
- dev_err(&pdev->dev, "Failed to allocate memory\n");
- err = -ENOMEM;
- goto err_alloc_queue_desc;
- }
- adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
- adapter->num_tx_queues);
-
- adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev,
- sizeof(struct Vmxnet3_PMConf),
- &adapter->pm_conf_pa,
- GFP_KERNEL);
- if (adapter->pm_conf == NULL) {
- err = -ENOMEM;
- goto err_alloc_pm;
- }
-
-#ifdef VMXNET3_RSS
-
- adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev,
- sizeof(struct UPT1_RSSConf),
- &adapter->rss_conf_pa,
- GFP_KERNEL);
- if (adapter->rss_conf == NULL) {
- err = -ENOMEM;
- goto err_alloc_rss;
- }
-#endif /* VMXNET3_RSS */
-
err = vmxnet3_alloc_pci_resources(adapter);
if (err < 0)
goto err_alloc_pci;
ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
- if (ver & (1 << VMXNET3_REV_4)) {
+ if (ver & (1 << VMXNET3_REV_6)) {
+ VMXNET3_WRITE_BAR1_REG(adapter,
+ VMXNET3_REG_VRRS,
+ 1 << VMXNET3_REV_6);
+ adapter->version = VMXNET3_REV_6 + 1;
+ } else if (ver & (1 << VMXNET3_REV_5)) {
+ VMXNET3_WRITE_BAR1_REG(adapter,
+ VMXNET3_REG_VRRS,
+ 1 << VMXNET3_REV_5);
+ adapter->version = VMXNET3_REV_5 + 1;
+ } else if (ver & (1 << VMXNET3_REV_4)) {
VMXNET3_WRITE_BAR1_REG(adapter,
VMXNET3_REG_VRRS,
1 << VMXNET3_REV_4);
@@ -3529,6 +3521,77 @@ vmxnet3_probe_device(struct pci_dev *pdev,
goto err_ver;
}
+ if (VMXNET3_VERSION_GE_6(adapter)) {
+ spin_lock_irqsave(&adapter->cmd_lock, flags);
+ VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+ VMXNET3_CMD_GET_MAX_QUEUES_CONF);
+ queues = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
+ spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+ if (queues > 0) {
+ adapter->num_rx_queues = min(num_rx_queues, ((queues >> 8) & 0xff));
+ adapter->num_tx_queues = min(num_tx_queues, (queues & 0xff));
+ } else {
+ adapter->num_rx_queues = min(num_rx_queues,
+ VMXNET3_DEVICE_DEFAULT_RX_QUEUES);
+ adapter->num_tx_queues = min(num_tx_queues,
+ VMXNET3_DEVICE_DEFAULT_TX_QUEUES);
+ }
+ if (adapter->num_rx_queues > VMXNET3_MAX_RX_QUEUES ||
+ adapter->num_tx_queues > VMXNET3_MAX_TX_QUEUES) {
+ adapter->queuesExtEnabled = true;
+ } else {
+ adapter->queuesExtEnabled = false;
+ }
+ } else {
+ adapter->queuesExtEnabled = false;
+ num_rx_queues = rounddown_pow_of_two(num_rx_queues);
+ num_tx_queues = rounddown_pow_of_two(num_tx_queues);
+ adapter->num_rx_queues = min(num_rx_queues,
+ VMXNET3_DEVICE_DEFAULT_RX_QUEUES);
+ adapter->num_tx_queues = min(num_tx_queues,
+ VMXNET3_DEVICE_DEFAULT_TX_QUEUES);
+ }
+ dev_info(&pdev->dev,
+ "# of Tx queues : %d, # of Rx queues : %d\n",
+ adapter->num_tx_queues, adapter->num_rx_queues);
+
+ adapter->rx_buf_per_pkt = 1;
+
+ size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
+ size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
+ adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size,
+ &adapter->queue_desc_pa,
+ GFP_KERNEL);
+
+ if (!adapter->tqd_start) {
+ dev_err(&pdev->dev, "Failed to allocate memory\n");
+ err = -ENOMEM;
+ goto err_ver;
+ }
+ adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
+ adapter->num_tx_queues);
+
+ adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev,
+ sizeof(struct Vmxnet3_PMConf),
+ &adapter->pm_conf_pa,
+ GFP_KERNEL);
+ if (adapter->pm_conf == NULL) {
+ err = -ENOMEM;
+ goto err_alloc_pm;
+ }
+
+#ifdef VMXNET3_RSS
+
+ adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev,
+ sizeof(struct UPT1_RSSConf),
+ &adapter->rss_conf_pa,
+ GFP_KERNEL);
+ if (adapter->rss_conf == NULL) {
+ err = -ENOMEM;
+ goto err_alloc_rss;
+ }
+#endif /* VMXNET3_RSS */
+
if (VMXNET3_VERSION_GE_3(adapter)) {
adapter->coal_conf =
dma_alloc_coherent(&adapter->pdev->dev,
@@ -3538,7 +3601,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
GFP_KERNEL);
if (!adapter->coal_conf) {
err = -ENOMEM;
- goto err_ver;
+ goto err_coal_conf;
}
adapter->coal_conf->coalMode = VMXNET3_COALESCE_DISABLED;
adapter->default_coal_mode = true;
@@ -3581,9 +3644,12 @@ vmxnet3_probe_device(struct pci_dev *pdev,
vmxnet3_set_ethtool_ops(netdev);
netdev->watchdog_timeo = 5 * HZ;
- /* MTU range: 60 - 9000 */
+ /* MTU range: 60 - 9190 */
netdev->min_mtu = VMXNET3_MIN_MTU;
- netdev->max_mtu = VMXNET3_MAX_MTU;
+ if (VMXNET3_VERSION_GE_6(adapter))
+ netdev->max_mtu = VMXNET3_V6_MAX_MTU;
+ else
+ netdev->max_mtu = VMXNET3_MAX_MTU;
INIT_WORK(&adapter->work, vmxnet3_reset_work);
set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
@@ -3621,9 +3687,7 @@ err_register:
adapter->coal_conf, adapter->coal_conf_pa);
}
vmxnet3_free_intr_resources(adapter);
-err_ver:
- vmxnet3_free_pci_resources(adapter);
-err_alloc_pci:
+err_coal_conf:
#ifdef VMXNET3_RSS
dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
adapter->rss_conf, adapter->rss_conf_pa);
@@ -3634,13 +3698,15 @@ err_alloc_rss:
err_alloc_pm:
dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
adapter->queue_desc_pa);
-err_alloc_queue_desc:
+err_ver:
+ vmxnet3_free_pci_resources(adapter);
+err_alloc_pci:
dma_free_coherent(&adapter->pdev->dev,
sizeof(struct Vmxnet3_DriverShared),
adapter->shared, adapter->shared_pa);
err_alloc_shared:
dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
- sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
+ sizeof(struct vmxnet3_adapter), DMA_TO_DEVICE);
err_set_mask:
free_netdev(netdev);
return err;
@@ -3653,7 +3719,8 @@ vmxnet3_remove_device(struct pci_dev *pdev)
struct net_device *netdev = pci_get_drvdata(pdev);
struct vmxnet3_adapter *adapter = netdev_priv(netdev);
int size = 0;
- int num_rx_queues;
+ int num_rx_queues, rx_queues;
+ unsigned long flags;
#ifdef VMXNET3_RSS
if (enable_mq)
@@ -3662,7 +3729,24 @@ vmxnet3_remove_device(struct pci_dev *pdev)
else
#endif
num_rx_queues = 1;
- num_rx_queues = rounddown_pow_of_two(num_rx_queues);
+ if (!VMXNET3_VERSION_GE_6(adapter)) {
+ num_rx_queues = rounddown_pow_of_two(num_rx_queues);
+ }
+ if (VMXNET3_VERSION_GE_6(adapter)) {
+ spin_lock_irqsave(&adapter->cmd_lock, flags);
+ VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+ VMXNET3_CMD_GET_MAX_QUEUES_CONF);
+ rx_queues = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
+ spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+ if (rx_queues > 0)
+ rx_queues = (rx_queues >> 8) & 0xff;
+ else
+ rx_queues = min(num_rx_queues, VMXNET3_DEVICE_DEFAULT_RX_QUEUES);
+ num_rx_queues = min(num_rx_queues, rx_queues);
+ } else {
+ num_rx_queues = min(num_rx_queues,
+ VMXNET3_DEVICE_DEFAULT_RX_QUEUES);
+ }
cancel_work_sync(&adapter->work);
@@ -3690,7 +3774,7 @@ vmxnet3_remove_device(struct pci_dev *pdev)
sizeof(struct Vmxnet3_DriverShared),
adapter->shared, adapter->shared_pa);
dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
- sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
+ sizeof(struct vmxnet3_adapter), DMA_TO_DEVICE);
free_netdev(netdev);
}
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 1b483cf2b1ca..5dd8360b21a0 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -787,6 +787,10 @@ vmxnet3_get_rss_hash_opts(struct vmxnet3_adapter *adapter,
case AH_ESP_V6_FLOW:
case AH_V6_FLOW:
case ESP_V6_FLOW:
+ if (VMXNET3_VERSION_GE_6(adapter) &&
+ (rss_fields & VMXNET3_RSS_FIELDS_ESPIP6))
+ info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ fallthrough;
case SCTP_V6_FLOW:
case IPV6_FLOW:
info->data |= RXH_IP_SRC | RXH_IP_DST;
@@ -871,6 +875,22 @@ vmxnet3_set_rss_hash_opt(struct net_device *netdev,
case ESP_V6_FLOW:
case AH_V6_FLOW:
case AH_ESP_V6_FLOW:
+ if (!VMXNET3_VERSION_GE_6(adapter))
+ return -EOPNOTSUPP;
+ if (!(nfc->data & RXH_IP_SRC) ||
+ !(nfc->data & RXH_IP_DST))
+ return -EINVAL;
+ switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+ case 0:
+ rss_fields &= ~VMXNET3_RSS_FIELDS_ESPIP6;
+ break;
+ case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+ rss_fields |= VMXNET3_RSS_FIELDS_ESPIP6;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
case SCTP_V4_FLOW:
case SCTP_V6_FLOW:
if (!(nfc->data & RXH_IP_SRC) ||
@@ -1033,8 +1053,10 @@ vmxnet3_set_rss(struct net_device *netdev, const u32 *p, const u8 *key,
}
#endif
-static int
-vmxnet3_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
+static int vmxnet3_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct vmxnet3_adapter *adapter = netdev_priv(netdev);
@@ -1068,8 +1090,10 @@ vmxnet3_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
return 0;
}
-static int
-vmxnet3_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
+static int vmxnet3_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct vmxnet3_adapter *adapter = netdev_priv(netdev);
struct Vmxnet3_DriverShared *shared = adapter->shared;
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index e910596b79cf..7027ff483fa5 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -1,7 +1,7 @@
/*
* Linux driver for VMware's vmxnet3 ethernet NIC.
*
- * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -69,18 +69,20 @@
/*
* Version numbers
*/
-#define VMXNET3_DRIVER_VERSION_STRING "1.5.0.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING "1.6.0.0-k"
/* Each byte of this 32-bit integer encodes a version number in
* VMXNET3_DRIVER_VERSION_STRING.
*/
-#define VMXNET3_DRIVER_VERSION_NUM 0x01050000
+#define VMXNET3_DRIVER_VERSION_NUM 0x01060000
#if defined(CONFIG_PCI_MSI)
/* RSS only makes sense if MSI-X is supported. */
#define VMXNET3_RSS
#endif
+#define VMXNET3_REV_6 5 /* Vmxnet3 Rev. 6 */
+#define VMXNET3_REV_5 4 /* Vmxnet3 Rev. 5 */
#define VMXNET3_REV_4 3 /* Vmxnet3 Rev. 4 */
#define VMXNET3_REV_3 2 /* Vmxnet3 Rev. 3 */
#define VMXNET3_REV_2 1 /* Vmxnet3 Rev. 2 */
@@ -301,15 +303,18 @@ struct vmxnet3_rx_queue {
struct vmxnet3_rq_driver_stats stats;
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
-#define VMXNET3_DEVICE_MAX_TX_QUEUES 8
-#define VMXNET3_DEVICE_MAX_RX_QUEUES 8 /* Keep this value as a power of 2 */
+#define VMXNET3_DEVICE_MAX_TX_QUEUES 32
+#define VMXNET3_DEVICE_MAX_RX_QUEUES 32 /* Keep this value as a power of 2 */
+
+#define VMXNET3_DEVICE_DEFAULT_TX_QUEUES 8
+#define VMXNET3_DEVICE_DEFAULT_RX_QUEUES 8 /* Keep this value as a power of 2 */
/* Should be less than UPT1_RSS_MAX_IND_TABLE_SIZE */
#define VMXNET3_RSS_IND_TABLE_SIZE (VMXNET3_DEVICE_MAX_RX_QUEUES * 4)
#define VMXNET3_LINUX_MAX_MSIX_VECT (VMXNET3_DEVICE_MAX_TX_QUEUES + \
VMXNET3_DEVICE_MAX_RX_QUEUES + 1)
-#define VMXNET3_LINUX_MIN_MSIX_VECT 2 /* 1 for tx-rx pair and 1 for event */
+#define VMXNET3_LINUX_MIN_MSIX_VECT 3 /* 1 for tx, 1 for rx pair and 1 for event */
struct vmxnet3_intr {
@@ -396,6 +401,7 @@ struct vmxnet3_adapter {
dma_addr_t adapter_pa;
dma_addr_t pm_conf_pa;
dma_addr_t rss_conf_pa;
+ bool queuesExtEnabled;
};
#define VMXNET3_WRITE_BAR0_REG(adapter, reg, val) \
@@ -421,6 +427,10 @@ struct vmxnet3_adapter {
(adapter->version >= VMXNET3_REV_3 + 1)
#define VMXNET3_VERSION_GE_4(adapter) \
(adapter->version >= VMXNET3_REV_4 + 1)
+#define VMXNET3_VERSION_GE_5(adapter) \
+ (adapter->version >= VMXNET3_REV_5 + 1)
+#define VMXNET3_VERSION_GE_6(adapter) \
+ (adapter->version >= VMXNET3_REV_6 + 1)
/* must be a multiple of VMXNET3_RING_SIZE_ALIGN */
#define VMXNET3_DEF_TX_RING_SIZE 512
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 2b1b944d4b28..bf2fac913942 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -857,30 +857,24 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
bool is_v6gw = false;
- int ret = -EINVAL;
nf_reset_ct(skb);
/* Be paranoid, rather than too clever. */
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
- struct sk_buff *skb2;
-
- skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
- if (!skb2) {
- ret = -ENOMEM;
- goto err;
+ skb = skb_expand_head(skb, hh_len);
+ if (!skb) {
+ dev->stats.tx_errors++;
+ return -ENOMEM;
}
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
-
- consume_skb(skb);
- skb = skb2;
}
rcu_read_lock_bh();
neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
if (!IS_ERR(neigh)) {
+ int ret;
+
sock_confirm_neigh(skb, neigh);
/* if crossing protocols, can not use the cached header */
ret = neigh_output(neigh, skb, is_v6gw);
@@ -889,9 +883,8 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
}
rcu_read_unlock_bh();
-err:
vrf_tx_error(skb->dev, skb);
- return ret;
+ return -EINVAL;
}
static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -1367,6 +1360,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
bool is_ndisc = ipv6_ndisc_frame(skb);
+ nf_reset_ct(skb);
+
/* loopback, multicast & non-ND link-local traffic; do not push through
* packet taps again. Reset pkt_type for upper layers to process skb.
* For strict packets with a source LLA, determine the dst using the
@@ -1429,6 +1424,8 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
skb->skb_iif = vrf_dev->ifindex;
IPCB(skb)->flags |= IPSKB_L3SLAVE;
+ nf_reset_ct(skb);
+
if (ipv4_is_multicast(ip_hdr(skb)->daddr))
goto out;
diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig
index 473df2505c8e..592a8389fc5a 100644
--- a/drivers/net/wan/Kconfig
+++ b/drivers/net/wan/Kconfig
@@ -290,30 +290,6 @@ config SLIC_DS26522
To compile this driver as a module, choose M here: the
module will be called slic_ds26522.
-config DSCC4_PCISYNC
- bool "Etinc PCISYNC features"
- depends on DSCC4
- help
- Due to Etinc's design choice for its PCISYNC cards, some operations
- are only allowed on specific ports of the DSCC4. This option is the
- only way for the driver to know that it shouldn't return a success
- code for these operations.
-
- Please say Y if your card is an Etinc's PCISYNC.
-
-config DSCC4_PCI_RST
- bool "Hard reset support"
- depends on DSCC4
- help
- Various DSCC4 bugs forbid any reliable software reset of the ASIC.
- As a replacement, some vendors provide a way to assert the PCI #RST
- pin of DSCC4 through the GPIO port of the card. If you choose Y,
- the driver will make use of this feature before module removal
- (i.e. rmmod). The feature is known to be available on Commtech's
- cards. Contact your manufacturer for details.
-
- Say Y if your card supports this feature.
-
config IXP4XX_HSS
tristate "Intel IXP4xx HSS (synchronous serial port) support"
depends on HDLC && IXP4XX_NPE && IXP4XX_QMGR
@@ -337,33 +313,6 @@ config LAPBETHER
To compile this driver as a module, choose M here: the
module will be called lapbether.
- If unsure, say N.
-
-config SBNI
- tristate "Granch SBNI12 Leased Line adapter support"
- depends on X86
- help
- Driver for ISA SBNI12-xx cards which are low cost alternatives to
- leased line modems.
-
- You can find more information and last versions of drivers and
- utilities at <http://www.granch.ru/>. If you have any question you
- can send email to <sbni@granch.ru>.
-
- To compile this driver as a module, choose M here: the
- module will be called sbni.
-
- If unsure, say N.
-
-config SBNI_MULTILINE
- bool "Multiple line feature support"
- depends on SBNI
- help
- Schedule traffic for some parallel lines, via SBNI12 adapters.
-
- If you have two computers connected with two parallel lines it's
- possible to increase transfer rate nearly twice. You should have
- a program named 'sbniconfig' to configure adapters.
If unsure, say N.
diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile
index 081666c36ca2..f6b92efffc94 100644
--- a/drivers/net/wan/Makefile
+++ b/drivers/net/wan/Makefile
@@ -22,7 +22,6 @@ obj-$(CONFIG_FARSYNC) += farsync.o
obj-$(CONFIG_LANMEDIA) += lmc/
obj-$(CONFIG_LAPBETHER) += lapbether.o
-obj-$(CONFIG_SBNI) += sbni.o
obj-$(CONFIG_N2) += n2.o
obj-$(CONFIG_C101) += c101.o
obj-$(CONFIG_WANXL) += wanxl.o
diff --git a/drivers/net/wan/c101.c b/drivers/net/wan/c101.c
index 059c2f7133be..8dd14d916c3a 100644
--- a/drivers/net/wan/c101.c
+++ b/drivers/net/wan/c101.c
@@ -208,14 +208,12 @@ static int c101_close(struct net_device *dev)
return 0;
}
-static int c101_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int c101_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
- const size_t size = sizeof(sync_serial_settings);
- sync_serial_settings new_line;
- sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync;
+#ifdef DEBUG_RINGS
port_t *port = dev_to_port(dev);
-#ifdef DEBUG_RINGS
if (cmd == SIOCDEVPRIVATE) {
sca_dump_rings(dev);
printk(KERN_DEBUG "MSCI1: ST: %02x %02x %02x %02x\n",
@@ -226,14 +224,22 @@ static int c101_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return 0;
}
#endif
- if (cmd != SIOCWANDEV)
- return hdlc_ioctl(dev, ifr, cmd);
- switch (ifr->ifr_settings.type) {
+ return -EOPNOTSUPP;
+}
+
+static int c101_ioctl(struct net_device *dev, struct if_settings *ifs)
+{
+ const size_t size = sizeof(sync_serial_settings);
+ sync_serial_settings new_line;
+ sync_serial_settings __user *line = ifs->ifs_ifsu.sync;
+ port_t *port = dev_to_port(dev);
+
+ switch (ifs->type) {
case IF_GET_IFACE:
- ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL;
- if (ifr->ifr_settings.size < size) {
- ifr->ifr_settings.size = size; /* data size wanted */
+ ifs->type = IF_IFACE_SYNC_SERIAL;
+ if (ifs->size < size) {
+ ifs->size = size; /* data size wanted */
return -ENOBUFS;
}
if (copy_to_user(line, &port->settings, size))
@@ -261,7 +267,7 @@ static int c101_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return 0;
default:
- return hdlc_ioctl(dev, ifr, cmd);
+ return hdlc_ioctl(dev, ifs);
}
}
@@ -286,7 +292,8 @@ static const struct net_device_ops c101_ops = {
.ndo_open = c101_open,
.ndo_stop = c101_close,
.ndo_start_xmit = hdlc_start_xmit,
- .ndo_do_ioctl = c101_ioctl,
+ .ndo_siocwandev = c101_ioctl,
+ .ndo_siocdevprivate = c101_siocdevprivate,
};
static int __init c101_run(unsigned long irq, unsigned long winbase)
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 43caab0b7dee..23d2954d9747 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -267,7 +267,6 @@ static netdev_tx_t cosa_net_tx(struct sk_buff *skb, struct net_device *d);
static char *cosa_net_setup_rx(struct channel_data *channel, int size);
static int cosa_net_rx_done(struct channel_data *channel);
static int cosa_net_tx_done(struct channel_data *channel, int size);
-static int cosa_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
/* Character device */
static char *chrdev_setup_rx(struct channel_data *channel, int size);
@@ -415,7 +414,7 @@ static const struct net_device_ops cosa_ops = {
.ndo_open = cosa_net_open,
.ndo_stop = cosa_net_close,
.ndo_start_xmit = hdlc_start_xmit,
- .ndo_do_ioctl = cosa_net_ioctl,
+ .ndo_siocwandev = hdlc_ioctl,
.ndo_tx_timeout = cosa_net_timeout,
};
@@ -1169,18 +1168,6 @@ static int cosa_ioctl_common(struct cosa_data *cosa,
return -ENOIOCTLCMD;
}
-static int cosa_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-{
- int rv;
- struct channel_data *chan = dev_to_chan(dev);
-
- rv = cosa_ioctl_common(chan->cosa, chan, cmd,
- (unsigned long)ifr->ifr_data);
- if (rv != -ENOIOCTLCMD)
- return rv;
- return hdlc_ioctl(dev, ifr, cmd);
-}
-
static long cosa_chardev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index b3466e084e84..6a212c085435 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -1784,16 +1784,15 @@ gather_conf_info(struct fst_card_info *card, struct fst_port_info *port,
static int
fst_set_iface(struct fst_card_info *card, struct fst_port_info *port,
- struct ifreq *ifr)
+ struct if_settings *ifs)
{
sync_serial_settings sync;
int i;
- if (ifr->ifr_settings.size != sizeof(sync))
+ if (ifs->size != sizeof(sync))
return -ENOMEM;
- if (copy_from_user
- (&sync, ifr->ifr_settings.ifs_ifsu.sync, sizeof(sync)))
+ if (copy_from_user(&sync, ifs->ifs_ifsu.sync, sizeof(sync)))
return -EFAULT;
if (sync.loopback)
@@ -1801,7 +1800,7 @@ fst_set_iface(struct fst_card_info *card, struct fst_port_info *port,
i = port->index;
- switch (ifr->ifr_settings.type) {
+ switch (ifs->type) {
case IF_IFACE_V35:
FST_WRW(card, portConfig[i].lineInterface, V35);
port->hwif = V35;
@@ -1857,7 +1856,7 @@ fst_set_iface(struct fst_card_info *card, struct fst_port_info *port,
static int
fst_get_iface(struct fst_card_info *card, struct fst_port_info *port,
- struct ifreq *ifr)
+ struct if_settings *ifs)
{
sync_serial_settings sync;
int i;
@@ -1868,29 +1867,29 @@ fst_get_iface(struct fst_card_info *card, struct fst_port_info *port,
*/
switch (port->hwif) {
case E1:
- ifr->ifr_settings.type = IF_IFACE_E1;
+ ifs->type = IF_IFACE_E1;
break;
case T1:
- ifr->ifr_settings.type = IF_IFACE_T1;
+ ifs->type = IF_IFACE_T1;
break;
case V35:
- ifr->ifr_settings.type = IF_IFACE_V35;
+ ifs->type = IF_IFACE_V35;
break;
case V24:
- ifr->ifr_settings.type = IF_IFACE_V24;
+ ifs->type = IF_IFACE_V24;
break;
case X21D:
- ifr->ifr_settings.type = IF_IFACE_X21D;
+ ifs->type = IF_IFACE_X21D;
break;
case X21:
default:
- ifr->ifr_settings.type = IF_IFACE_X21;
+ ifs->type = IF_IFACE_X21;
break;
}
- if (ifr->ifr_settings.size == 0)
+ if (!ifs->size)
return 0; /* only type requested */
- if (ifr->ifr_settings.size < sizeof(sync))
+ if (ifs->size < sizeof(sync))
return -ENOMEM;
i = port->index;
@@ -1901,15 +1900,15 @@ fst_get_iface(struct fst_card_info *card, struct fst_port_info *port,
INTCLK ? CLOCK_INT : CLOCK_EXT;
sync.loopback = 0;
- if (copy_to_user(ifr->ifr_settings.ifs_ifsu.sync, &sync, sizeof(sync)))
+ if (copy_to_user(ifs->ifs_ifsu.sync, &sync, sizeof(sync)))
return -EFAULT;
- ifr->ifr_settings.size = sizeof(sync);
+ ifs->size = sizeof(sync);
return 0;
}
static int
-fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+fst_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd)
{
struct fst_card_info *card;
struct fst_port_info *port;
@@ -1918,7 +1917,7 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
unsigned long flags;
void *buf;
- dbg(DBG_IOCTL, "ioctl: %x, %p\n", cmd, ifr->ifr_data);
+ dbg(DBG_IOCTL, "ioctl: %x, %p\n", cmd, data);
port = dev_to_port(dev);
card = port->card;
@@ -1942,11 +1941,10 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
/* First copy in the header with the length and offset of data
* to write
*/
- if (!ifr->ifr_data)
+ if (!data)
return -EINVAL;
- if (copy_from_user(&wrthdr, ifr->ifr_data,
- sizeof(struct fstioc_write)))
+ if (copy_from_user(&wrthdr, data, sizeof(struct fstioc_write)))
return -EFAULT;
/* Sanity check the parameters. We don't support partial writes
@@ -1958,7 +1956,7 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
/* Now copy the data to the card. */
- buf = memdup_user(ifr->ifr_data + sizeof(struct fstioc_write),
+ buf = memdup_user(data + sizeof(struct fstioc_write),
wrthdr.size);
if (IS_ERR(buf))
return PTR_ERR(buf);
@@ -1991,12 +1989,12 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
}
}
- if (!ifr->ifr_data)
+ if (!data)
return -EINVAL;
gather_conf_info(card, port, &info);
- if (copy_to_user(ifr->ifr_data, &info, sizeof(info)))
+ if (copy_to_user(data, &info, sizeof(info)))
return -EFAULT;
return 0;
@@ -2011,46 +2009,58 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
card->card_no, card->state);
return -EIO;
}
- if (copy_from_user(&info, ifr->ifr_data, sizeof(info)))
+ if (copy_from_user(&info, data, sizeof(info)))
return -EFAULT;
return set_conf_from_info(card, port, &info);
+ default:
+ return -EINVAL;
+ }
+}
- case SIOCWANDEV:
- switch (ifr->ifr_settings.type) {
- case IF_GET_IFACE:
- return fst_get_iface(card, port, ifr);
-
- case IF_IFACE_SYNC_SERIAL:
- case IF_IFACE_V35:
- case IF_IFACE_V24:
- case IF_IFACE_X21:
- case IF_IFACE_X21D:
- case IF_IFACE_T1:
- case IF_IFACE_E1:
- return fst_set_iface(card, port, ifr);
-
- case IF_PROTO_RAW:
- port->mode = FST_RAW;
- return 0;
+static int
+fst_ioctl(struct net_device *dev, struct if_settings *ifs)
+{
+ struct fst_card_info *card;
+ struct fst_port_info *port;
- case IF_GET_PROTO:
- if (port->mode == FST_RAW) {
- ifr->ifr_settings.type = IF_PROTO_RAW;
- return 0;
- }
- return hdlc_ioctl(dev, ifr, cmd);
+ dbg(DBG_IOCTL, "SIOCDEVPRIVATE, %x\n", ifs->type);
- default:
- port->mode = FST_GEN_HDLC;
- dbg(DBG_IOCTL, "Passing this type to hdlc %x\n",
- ifr->ifr_settings.type);
- return hdlc_ioctl(dev, ifr, cmd);
+ port = dev_to_port(dev);
+ card = port->card;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (ifs->type) {
+ case IF_GET_IFACE:
+ return fst_get_iface(card, port, ifs);
+
+ case IF_IFACE_SYNC_SERIAL:
+ case IF_IFACE_V35:
+ case IF_IFACE_V24:
+ case IF_IFACE_X21:
+ case IF_IFACE_X21D:
+ case IF_IFACE_T1:
+ case IF_IFACE_E1:
+ return fst_set_iface(card, port, ifs);
+
+ case IF_PROTO_RAW:
+ port->mode = FST_RAW;
+ return 0;
+
+ case IF_GET_PROTO:
+ if (port->mode == FST_RAW) {
+ ifs->type = IF_PROTO_RAW;
+ return 0;
}
+ return hdlc_ioctl(dev, ifs);
default:
- /* Not one of ours. Pass through to HDLC package */
- return hdlc_ioctl(dev, ifr, cmd);
+ port->mode = FST_GEN_HDLC;
+ dbg(DBG_IOCTL, "Passing this type to hdlc %x\n",
+ ifs->type);
+ return hdlc_ioctl(dev, ifs);
}
}
@@ -2310,7 +2320,8 @@ static const struct net_device_ops fst_ops = {
.ndo_open = fst_open,
.ndo_stop = fst_close,
.ndo_start_xmit = hdlc_start_xmit,
- .ndo_do_ioctl = fst_ioctl,
+ .ndo_siocwandev = fst_ioctl,
+ .ndo_siocdevprivate = fst_siocdevprivate,
.ndo_tx_timeout = fst_tx_timeout,
};
diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index 39f05fabbfa4..cda1b4ce6b21 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -674,31 +674,28 @@ static irqreturn_t ucc_hdlc_irq_handler(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static int uhdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int uhdlc_ioctl(struct net_device *dev, struct if_settings *ifs)
{
const size_t size = sizeof(te1_settings);
te1_settings line;
struct ucc_hdlc_private *priv = netdev_priv(dev);
- if (cmd != SIOCWANDEV)
- return hdlc_ioctl(dev, ifr, cmd);
-
- switch (ifr->ifr_settings.type) {
+ switch (ifs->type) {
case IF_GET_IFACE:
- ifr->ifr_settings.type = IF_IFACE_E1;
- if (ifr->ifr_settings.size < size) {
- ifr->ifr_settings.size = size; /* data size wanted */
+ ifs->type = IF_IFACE_E1;
+ if (ifs->size < size) {
+ ifs->size = size; /* data size wanted */
return -ENOBUFS;
}
memset(&line, 0, sizeof(line));
line.clock_type = priv->clocking;
- if (copy_to_user(ifr->ifr_settings.ifs_ifsu.sync, &line, size))
+ if (copy_to_user(ifs->ifs_ifsu.sync, &line, size))
return -EFAULT;
return 0;
default:
- return hdlc_ioctl(dev, ifr, cmd);
+ return hdlc_ioctl(dev, ifs);
}
}
@@ -1053,7 +1050,7 @@ static const struct net_device_ops uhdlc_ops = {
.ndo_open = uhdlc_open,
.ndo_stop = uhdlc_close,
.ndo_start_xmit = hdlc_start_xmit,
- .ndo_do_ioctl = uhdlc_ioctl,
+ .ndo_siocwandev = uhdlc_ioctl,
.ndo_tx_timeout = uhdlc_tx_timeout,
};
diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c
index dd6312b69861..cbed10b1d862 100644
--- a/drivers/net/wan/hdlc.c
+++ b/drivers/net/wan/hdlc.c
@@ -196,16 +196,13 @@ void hdlc_close(struct net_device *dev)
}
EXPORT_SYMBOL(hdlc_close);
-int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+int hdlc_ioctl(struct net_device *dev, struct if_settings *ifs)
{
struct hdlc_proto *proto = first_proto;
int result;
- if (cmd != SIOCWANDEV)
- return -EINVAL;
-
if (dev_to_hdlc(dev)->proto) {
- result = dev_to_hdlc(dev)->proto->ioctl(dev, ifr);
+ result = dev_to_hdlc(dev)->proto->ioctl(dev, ifs);
if (result != -EINVAL)
return result;
}
@@ -213,7 +210,7 @@ int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
/* Not handled by currently attached protocol (if any) */
while (proto) {
- result = proto->ioctl(dev, ifr);
+ result = proto->ioctl(dev, ifs);
if (result != -EINVAL)
return result;
proto = proto->next;
diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index c54fdae950fb..cdebe65a7e2d 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -56,7 +56,7 @@ struct cisco_state {
u32 rxseq; /* RX sequence number */
};
-static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr);
+static int cisco_ioctl(struct net_device *dev, struct if_settings *ifs);
static inline struct cisco_state *state(hdlc_device *hdlc)
{
@@ -306,21 +306,21 @@ static const struct header_ops cisco_header_ops = {
.create = cisco_hard_header,
};
-static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr)
+static int cisco_ioctl(struct net_device *dev, struct if_settings *ifs)
{
- cisco_proto __user *cisco_s = ifr->ifr_settings.ifs_ifsu.cisco;
+ cisco_proto __user *cisco_s = ifs->ifs_ifsu.cisco;
const size_t size = sizeof(cisco_proto);
cisco_proto new_settings;
hdlc_device *hdlc = dev_to_hdlc(dev);
int result;
- switch (ifr->ifr_settings.type) {
+ switch (ifs->type) {
case IF_GET_PROTO:
if (dev_to_hdlc(dev)->proto != &proto)
return -EINVAL;
- ifr->ifr_settings.type = IF_PROTO_CISCO;
- if (ifr->ifr_settings.size < size) {
- ifr->ifr_settings.size = size; /* data size wanted */
+ ifs->type = IF_PROTO_CISCO;
+ if (ifs->size < size) {
+ ifs->size = size; /* data size wanted */
return -ENOBUFS;
}
if (copy_to_user(cisco_s, &state(hdlc)->settings, size))
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 25e3564ce118..7637edce443e 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -146,7 +146,7 @@ struct frad_state {
u8 rxseq; /* RX sequence number */
};
-static int fr_ioctl(struct net_device *dev, struct ifreq *ifr);
+static int fr_ioctl(struct net_device *dev, struct if_settings *ifs);
static inline u16 q922_to_dlci(u8 *hdr)
{
@@ -357,26 +357,26 @@ static int pvc_close(struct net_device *dev)
return 0;
}
-static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int pvc_ioctl(struct net_device *dev, struct if_settings *ifs)
{
struct pvc_device *pvc = dev->ml_priv;
fr_proto_pvc_info info;
- if (ifr->ifr_settings.type == IF_GET_PROTO) {
+ if (ifs->type == IF_GET_PROTO) {
if (dev->type == ARPHRD_ETHER)
- ifr->ifr_settings.type = IF_PROTO_FR_ETH_PVC;
+ ifs->type = IF_PROTO_FR_ETH_PVC;
else
- ifr->ifr_settings.type = IF_PROTO_FR_PVC;
+ ifs->type = IF_PROTO_FR_PVC;
- if (ifr->ifr_settings.size < sizeof(info)) {
+ if (ifs->size < sizeof(info)) {
/* data size wanted */
- ifr->ifr_settings.size = sizeof(info);
+ ifs->size = sizeof(info);
return -ENOBUFS;
}
info.dlci = pvc->dlci;
memcpy(info.master, pvc->frad->name, IFNAMSIZ);
- if (copy_to_user(ifr->ifr_settings.ifs_ifsu.fr_pvc_info,
+ if (copy_to_user(ifs->ifs_ifsu.fr_pvc_info,
&info, sizeof(info)))
return -EFAULT;
return 0;
@@ -1056,7 +1056,7 @@ static const struct net_device_ops pvc_ops = {
.ndo_open = pvc_open,
.ndo_stop = pvc_close,
.ndo_start_xmit = pvc_xmit,
- .ndo_do_ioctl = pvc_ioctl,
+ .ndo_siocwandev = pvc_ioctl,
};
static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
@@ -1179,22 +1179,22 @@ static struct hdlc_proto proto = {
.module = THIS_MODULE,
};
-static int fr_ioctl(struct net_device *dev, struct ifreq *ifr)
+static int fr_ioctl(struct net_device *dev, struct if_settings *ifs)
{
- fr_proto __user *fr_s = ifr->ifr_settings.ifs_ifsu.fr;
+ fr_proto __user *fr_s = ifs->ifs_ifsu.fr;
const size_t size = sizeof(fr_proto);
fr_proto new_settings;
hdlc_device *hdlc = dev_to_hdlc(dev);
fr_proto_pvc pvc;
int result;
- switch (ifr->ifr_settings.type) {
+ switch (ifs->type) {
case IF_GET_PROTO:
if (dev_to_hdlc(dev)->proto != &proto) /* Different proto */
return -EINVAL;
- ifr->ifr_settings.type = IF_PROTO_FR;
- if (ifr->ifr_settings.size < size) {
- ifr->ifr_settings.size = size; /* data size wanted */
+ ifs->type = IF_PROTO_FR;
+ if (ifs->size < size) {
+ ifs->size = size; /* data size wanted */
return -ENOBUFS;
}
if (copy_to_user(fr_s, &state(hdlc)->settings, size))
@@ -1256,21 +1256,21 @@ static int fr_ioctl(struct net_device *dev, struct ifreq *ifr)
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- if (copy_from_user(&pvc, ifr->ifr_settings.ifs_ifsu.fr_pvc,
+ if (copy_from_user(&pvc, ifs->ifs_ifsu.fr_pvc,
sizeof(fr_proto_pvc)))
return -EFAULT;
if (pvc.dlci <= 0 || pvc.dlci >= 1024)
return -EINVAL; /* Only 10 bits, DLCI 0 reserved */
- if (ifr->ifr_settings.type == IF_PROTO_FR_ADD_ETH_PVC ||
- ifr->ifr_settings.type == IF_PROTO_FR_DEL_ETH_PVC)
+ if (ifs->type == IF_PROTO_FR_ADD_ETH_PVC ||
+ ifs->type == IF_PROTO_FR_DEL_ETH_PVC)
result = ARPHRD_ETHER; /* bridged Ethernet device */
else
result = ARPHRD_DLCI;
- if (ifr->ifr_settings.type == IF_PROTO_FR_ADD_PVC ||
- ifr->ifr_settings.type == IF_PROTO_FR_ADD_ETH_PVC)
+ if (ifs->type == IF_PROTO_FR_ADD_PVC ||
+ ifs->type == IF_PROTO_FR_ADD_ETH_PVC)
return fr_add_pvc(dev, pvc.dlci, result);
else
return fr_del_pvc(hdlc, pvc.dlci, result);
diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index b81ecf432a0c..37a3c989cba1 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -100,7 +100,7 @@ static const char *const event_names[EVENTS] = {
static struct sk_buff_head tx_queue; /* used when holding the spin lock */
-static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr);
+static int ppp_ioctl(struct net_device *dev, struct if_settings *ifs);
static inline struct ppp *get_ppp(struct net_device *dev)
{
@@ -655,17 +655,17 @@ static const struct header_ops ppp_header_ops = {
.create = ppp_hard_header,
};
-static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr)
+static int ppp_ioctl(struct net_device *dev, struct if_settings *ifs)
{
hdlc_device *hdlc = dev_to_hdlc(dev);
struct ppp *ppp;
int result;
- switch (ifr->ifr_settings.type) {
+ switch (ifs->type) {
case IF_GET_PROTO:
if (dev_to_hdlc(dev)->proto != &proto)
return -EINVAL;
- ifr->ifr_settings.type = IF_PROTO_PPP;
+ ifs->type = IF_PROTO_PPP;
return 0; /* return protocol only, no settable parameters */
case IF_PROTO_PPP:
diff --git a/drivers/net/wan/hdlc_raw.c b/drivers/net/wan/hdlc_raw.c
index 54d28496fefd..4a2f068721bc 100644
--- a/drivers/net/wan/hdlc_raw.c
+++ b/drivers/net/wan/hdlc_raw.c
@@ -19,7 +19,7 @@
#include <linux/skbuff.h>
-static int raw_ioctl(struct net_device *dev, struct ifreq *ifr);
+static int raw_ioctl(struct net_device *dev, struct if_settings *ifs);
static __be16 raw_type_trans(struct sk_buff *skb, struct net_device *dev)
{
@@ -33,21 +33,21 @@ static struct hdlc_proto proto = {
};
-static int raw_ioctl(struct net_device *dev, struct ifreq *ifr)
+static int raw_ioctl(struct net_device *dev, struct if_settings *ifs)
{
- raw_hdlc_proto __user *raw_s = ifr->ifr_settings.ifs_ifsu.raw_hdlc;
+ raw_hdlc_proto __user *raw_s = ifs->ifs_ifsu.raw_hdlc;
const size_t size = sizeof(raw_hdlc_proto);
raw_hdlc_proto new_settings;
hdlc_device *hdlc = dev_to_hdlc(dev);
int result;
- switch (ifr->ifr_settings.type) {
+ switch (ifs->type) {
case IF_GET_PROTO:
if (dev_to_hdlc(dev)->proto != &proto)
return -EINVAL;
- ifr->ifr_settings.type = IF_PROTO_HDLC;
- if (ifr->ifr_settings.size < size) {
- ifr->ifr_settings.size = size; /* data size wanted */
+ ifs->type = IF_PROTO_HDLC;
+ if (ifs->size < size) {
+ ifs->size = size; /* data size wanted */
return -ENOBUFS;
}
if (copy_to_user(raw_s, hdlc->state, size))
diff --git a/drivers/net/wan/hdlc_raw_eth.c b/drivers/net/wan/hdlc_raw_eth.c
index 927596276a07..0a66b7356405 100644
--- a/drivers/net/wan/hdlc_raw_eth.c
+++ b/drivers/net/wan/hdlc_raw_eth.c
@@ -20,7 +20,7 @@
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
-static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr);
+static int raw_eth_ioctl(struct net_device *dev, struct if_settings *ifs);
static netdev_tx_t eth_tx(struct sk_buff *skb, struct net_device *dev)
{
@@ -48,22 +48,22 @@ static struct hdlc_proto proto = {
};
-static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr)
+static int raw_eth_ioctl(struct net_device *dev, struct if_settings *ifs)
{
- raw_hdlc_proto __user *raw_s = ifr->ifr_settings.ifs_ifsu.raw_hdlc;
+ raw_hdlc_proto __user *raw_s = ifs->ifs_ifsu.raw_hdlc;
const size_t size = sizeof(raw_hdlc_proto);
raw_hdlc_proto new_settings;
hdlc_device *hdlc = dev_to_hdlc(dev);
unsigned int old_qlen;
int result;
- switch (ifr->ifr_settings.type) {
+ switch (ifs->type) {
case IF_GET_PROTO:
if (dev_to_hdlc(dev)->proto != &proto)
return -EINVAL;
- ifr->ifr_settings.type = IF_PROTO_HDLC_ETH;
- if (ifr->ifr_settings.size < size) {
- ifr->ifr_settings.size = size; /* data size wanted */
+ ifs->type = IF_PROTO_HDLC_ETH;
+ if (ifs->size < size) {
+ ifs->size = size; /* data size wanted */
return -ENOBUFS;
}
if (copy_to_user(raw_s, hdlc->state, size))
diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c
index 9b7ebf8bd85c..f72c92c24003 100644
--- a/drivers/net/wan/hdlc_x25.c
+++ b/drivers/net/wan/hdlc_x25.c
@@ -29,7 +29,7 @@ struct x25_state {
struct tasklet_struct rx_tasklet;
};
-static int x25_ioctl(struct net_device *dev, struct ifreq *ifr);
+static int x25_ioctl(struct net_device *dev, struct if_settings *ifs);
static struct x25_state *state(hdlc_device *hdlc)
{
@@ -274,21 +274,21 @@ static struct hdlc_proto proto = {
.module = THIS_MODULE,
};
-static int x25_ioctl(struct net_device *dev, struct ifreq *ifr)
+static int x25_ioctl(struct net_device *dev, struct if_settings *ifs)
{
- x25_hdlc_proto __user *x25_s = ifr->ifr_settings.ifs_ifsu.x25;
+ x25_hdlc_proto __user *x25_s = ifs->ifs_ifsu.x25;
const size_t size = sizeof(x25_hdlc_proto);
hdlc_device *hdlc = dev_to_hdlc(dev);
x25_hdlc_proto new_settings;
int result;
- switch (ifr->ifr_settings.type) {
+ switch (ifs->type) {
case IF_GET_PROTO:
if (dev_to_hdlc(dev)->proto != &proto)
return -EINVAL;
- ifr->ifr_settings.type = IF_PROTO_X25;
- if (ifr->ifr_settings.size < size) {
- ifr->ifr_settings.size = size; /* data size wanted */
+ ifs->type = IF_PROTO_X25;
+ if (ifs->size < size) {
+ ifs->size = size; /* data size wanted */
return -ENOBUFS;
}
if (copy_to_user(x25_s, &state(hdlc)->settings, size))
@@ -303,7 +303,7 @@ static int x25_ioctl(struct net_device *dev, struct ifreq *ifr)
return -EBUSY;
/* backward compatibility */
- if (ifr->ifr_settings.size == 0) {
+ if (ifs->size == 0) {
new_settings.dce = 0;
new_settings.modulo = 8;
new_settings.window = 7;
diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index fd61a7cc4fdf..e985e54ba75d 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -142,11 +142,6 @@ static int hostess_close(struct net_device *d)
return 0;
}
-static int hostess_ioctl(struct net_device *d, struct ifreq *ifr, int cmd)
-{
- return hdlc_ioctl(d, ifr, cmd);
-}
-
/* Passed network frames, fire them downwind.
*/
@@ -171,7 +166,7 @@ static const struct net_device_ops hostess_ops = {
.ndo_open = hostess_open,
.ndo_stop = hostess_close,
.ndo_start_xmit = hdlc_start_xmit,
- .ndo_do_ioctl = hostess_ioctl,
+ .ndo_siocwandev = hdlc_ioctl,
};
static struct z8530_dev *sv11_init(int iobase, int irq)
@@ -324,16 +319,18 @@ MODULE_DESCRIPTION("Modular driver for the Comtrol Hostess SV11");
static struct z8530_dev *sv11_unit;
-int init_module(void)
+static int sv11_module_init(void)
{
sv11_unit = sv11_init(io, irq);
if (!sv11_unit)
return -ENODEV;
return 0;
}
+module_init(sv11_module_init);
-void cleanup_module(void)
+static void sv11_module_cleanup(void)
{
if (sv11_unit)
sv11_shutdown(sv11_unit);
}
+module_exit(sv11_module_cleanup);
diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
index 3c51ab239fb2..88a36a069311 100644
--- a/drivers/net/wan/ixp4xx_hss.c
+++ b/drivers/net/wan/ixp4xx_hss.c
@@ -975,11 +975,10 @@ static int init_hdlc_queues(struct port *port)
return -ENOMEM;
}
- port->desc_tab = dma_pool_alloc(dma_pool, GFP_KERNEL,
+ port->desc_tab = dma_pool_zalloc(dma_pool, GFP_KERNEL,
&port->desc_tab_phys);
if (!port->desc_tab)
return -ENOMEM;
- memset(port->desc_tab, 0, POOL_ALLOC_SIZE);
memset(port->rx_buff_tab, 0, sizeof(port->rx_buff_tab)); /* tables */
memset(port->tx_buff_tab, 0, sizeof(port->tx_buff_tab));
@@ -1255,23 +1254,20 @@ static void find_best_clock(u32 timer_freq, u32 rate, u32 *best, u32 *reg)
}
}
-static int hss_hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int hss_hdlc_ioctl(struct net_device *dev, struct if_settings *ifs)
{
const size_t size = sizeof(sync_serial_settings);
sync_serial_settings new_line;
- sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync;
+ sync_serial_settings __user *line = ifs->ifs_ifsu.sync;
struct port *port = dev_to_port(dev);
unsigned long flags;
int clk;
- if (cmd != SIOCWANDEV)
- return hdlc_ioctl(dev, ifr, cmd);
-
- switch (ifr->ifr_settings.type) {
+ switch (ifs->type) {
case IF_GET_IFACE:
- ifr->ifr_settings.type = IF_IFACE_V35;
- if (ifr->ifr_settings.size < size) {
- ifr->ifr_settings.size = size; /* data size wanted */
+ ifs->type = IF_IFACE_V35;
+ if (ifs->size < size) {
+ ifs->size = size; /* data size wanted */
return -ENOBUFS;
}
memset(&new_line, 0, sizeof(new_line));
@@ -1324,7 +1320,7 @@ static int hss_hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return 0;
default:
- return hdlc_ioctl(dev, ifr, cmd);
+ return hdlc_ioctl(dev, ifs);
}
}
@@ -1336,7 +1332,7 @@ static const struct net_device_ops hss_hdlc_ops = {
.ndo_open = hss_hdlc_open,
.ndo_stop = hss_hdlc_close,
.ndo_start_xmit = hdlc_start_xmit,
- .ndo_do_ioctl = hss_hdlc_ioctl,
+ .ndo_siocwandev = hss_hdlc_ioctl,
};
static int hss_init_one(struct platform_device *pdev)
diff --git a/drivers/net/wan/lmc/lmc.h b/drivers/net/wan/lmc/lmc.h
index 3bd541c868d5..d7d59b4595f9 100644
--- a/drivers/net/wan/lmc/lmc.h
+++ b/drivers/net/wan/lmc/lmc.h
@@ -19,7 +19,7 @@ void lmc_mii_writereg(lmc_softc_t * const, unsigned, unsigned, unsigned);
void lmc_gpio_mkinput(lmc_softc_t * const sc, u32 bits);
void lmc_gpio_mkoutput(lmc_softc_t * const sc, u32 bits);
-int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+int lmc_ioctl(struct net_device *dev, struct if_settings *ifs);
extern lmc_media_t lmc_ds3_media;
extern lmc_media_t lmc_ssi_media;
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 6c163db52835..ed687bf6ec47 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -105,7 +105,8 @@ static void lmc_driver_timeout(struct net_device *dev, unsigned int txqueue);
* linux reserves 16 device specific IOCTLs. We call them
* LMCIOC* to control various bits of our world.
*/
-int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
+static int lmc_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd) /*fold00*/
{
lmc_softc_t *sc = dev_to_sc(dev);
lmc_ctl_t ctl;
@@ -124,7 +125,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
* To date internally, just copy this out to the user.
*/
case LMCIOCGINFO: /*fold01*/
- if (copy_to_user(ifr->ifr_data, &sc->ictl, sizeof(lmc_ctl_t)))
+ if (copy_to_user(data, &sc->ictl, sizeof(lmc_ctl_t)))
ret = -EFAULT;
else
ret = 0;
@@ -141,7 +142,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
break;
}
- if (copy_from_user(&ctl, ifr->ifr_data, sizeof(lmc_ctl_t))) {
+ if (copy_from_user(&ctl, data, sizeof(lmc_ctl_t))) {
ret = -EFAULT;
break;
}
@@ -171,7 +172,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
break;
}
- if (copy_from_user(&new_type, ifr->ifr_data, sizeof(u16))) {
+ if (copy_from_user(&new_type, data, sizeof(u16))) {
ret = -EFAULT;
break;
}
@@ -211,8 +212,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
sc->lmc_xinfo.Magic1 = 0xDEADBEEF;
- if (copy_to_user(ifr->ifr_data, &sc->lmc_xinfo,
- sizeof(struct lmc_xinfo)))
+ if (copy_to_user(data, &sc->lmc_xinfo, sizeof(struct lmc_xinfo)))
ret = -EFAULT;
else
ret = 0;
@@ -245,9 +245,9 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
regVal & T1FRAMER_SEF_MASK;
}
spin_unlock_irqrestore(&sc->lmc_lock, flags);
- if (copy_to_user(ifr->ifr_data, &sc->lmc_device->stats,
+ if (copy_to_user(data, &sc->lmc_device->stats,
sizeof(sc->lmc_device->stats)) ||
- copy_to_user(ifr->ifr_data + sizeof(sc->lmc_device->stats),
+ copy_to_user(data + sizeof(sc->lmc_device->stats),
&sc->extra_stats, sizeof(sc->extra_stats)))
ret = -EFAULT;
else
@@ -282,7 +282,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
break;
}
- if (copy_from_user(&ctl, ifr->ifr_data, sizeof(lmc_ctl_t))) {
+ if (copy_from_user(&ctl, data, sizeof(lmc_ctl_t))) {
ret = -EFAULT;
break;
}
@@ -314,11 +314,11 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
#ifdef DEBUG
case LMCIOCDUMPEVENTLOG:
- if (copy_to_user(ifr->ifr_data, &lmcEventLogIndex, sizeof(u32))) {
+ if (copy_to_user(data, &lmcEventLogIndex, sizeof(u32))) {
ret = -EFAULT;
break;
}
- if (copy_to_user(ifr->ifr_data + sizeof(u32), lmcEventLogBuf,
+ if (copy_to_user(data + sizeof(u32), lmcEventLogBuf,
sizeof(lmcEventLogBuf)))
ret = -EFAULT;
else
@@ -346,7 +346,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
*/
netif_stop_queue(dev);
- if (copy_from_user(&xc, ifr->ifr_data, sizeof(struct lmc_xilinx_control))) {
+ if (copy_from_user(&xc, data, sizeof(struct lmc_xilinx_control))) {
ret = -EFAULT;
break;
}
@@ -609,10 +609,8 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
}
break;
- default: /*fold01*/
- /* If we don't know what to do, give the protocol a shot. */
- ret = lmc_proto_ioctl (sc, ifr, cmd);
- break;
+ default:
+ break;
}
return ret;
@@ -788,7 +786,8 @@ static const struct net_device_ops lmc_ops = {
.ndo_open = lmc_open,
.ndo_stop = lmc_close,
.ndo_start_xmit = hdlc_start_xmit,
- .ndo_do_ioctl = lmc_ioctl,
+ .ndo_siocwandev = hdlc_ioctl,
+ .ndo_siocdevprivate = lmc_siocdevprivate,
.ndo_tx_timeout = lmc_driver_timeout,
.ndo_get_stats = lmc_get_stats,
};
diff --git a/drivers/net/wan/lmc/lmc_proto.c b/drivers/net/wan/lmc/lmc_proto.c
index 4e9cc83b615a..e5487616a816 100644
--- a/drivers/net/wan/lmc/lmc_proto.c
+++ b/drivers/net/wan/lmc/lmc_proto.c
@@ -58,13 +58,6 @@ void lmc_proto_attach(lmc_softc_t *sc) /*FOLD00*/
}
}
-int lmc_proto_ioctl(lmc_softc_t *sc, struct ifreq *ifr, int cmd)
-{
- if (sc->if_type == LMC_PPP)
- return hdlc_ioctl(sc->lmc_device, ifr, cmd);
- return -EOPNOTSUPP;
-}
-
int lmc_proto_open(lmc_softc_t *sc)
{
int ret = 0;
diff --git a/drivers/net/wan/lmc/lmc_proto.h b/drivers/net/wan/lmc/lmc_proto.h
index bb098e443776..e56e7072de44 100644
--- a/drivers/net/wan/lmc/lmc_proto.h
+++ b/drivers/net/wan/lmc/lmc_proto.h
@@ -5,7 +5,6 @@
#include <linux/hdlc.h>
void lmc_proto_attach(lmc_softc_t *sc);
-int lmc_proto_ioctl(lmc_softc_t *sc, struct ifreq *ifr, int cmd);
int lmc_proto_open(lmc_softc_t *sc);
void lmc_proto_close(lmc_softc_t *sc);
__be16 lmc_proto_type(lmc_softc_t *sc, struct sk_buff *skb);
diff --git a/drivers/net/wan/n2.c b/drivers/net/wan/n2.c
index bdb6dc2409bc..f3e80722ba1d 100644
--- a/drivers/net/wan/n2.c
+++ b/drivers/net/wan/n2.c
@@ -227,27 +227,30 @@ static int n2_close(struct net_device *dev)
return 0;
}
-static int n2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int n2_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
- const size_t size = sizeof(sync_serial_settings);
- sync_serial_settings new_line;
- sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync;
- port_t *port = dev_to_port(dev);
-
#ifdef DEBUG_RINGS
if (cmd == SIOCDEVPRIVATE) {
sca_dump_rings(dev);
return 0;
}
#endif
- if (cmd != SIOCWANDEV)
- return hdlc_ioctl(dev, ifr, cmd);
+ return -EOPNOTSUPP;
+}
+
+static int n2_ioctl(struct net_device *dev, struct if_settings *ifs)
+{
+ const size_t size = sizeof(sync_serial_settings);
+ sync_serial_settings new_line;
+ sync_serial_settings __user *line = ifs->ifs_ifsu.sync;
+ port_t *port = dev_to_port(dev);
- switch (ifr->ifr_settings.type) {
+ switch (ifs->type) {
case IF_GET_IFACE:
- ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL;
- if (ifr->ifr_settings.size < size) {
- ifr->ifr_settings.size = size; /* data size wanted */
+ ifs->type = IF_IFACE_SYNC_SERIAL;
+ if (ifs->size < size) {
+ ifs->size = size; /* data size wanted */
return -ENOBUFS;
}
if (copy_to_user(line, &port->settings, size))
@@ -275,7 +278,7 @@ static int n2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return 0;
default:
- return hdlc_ioctl(dev, ifr, cmd);
+ return hdlc_ioctl(dev, ifs);
}
}
@@ -311,7 +314,8 @@ static const struct net_device_ops n2_ops = {
.ndo_open = n2_open,
.ndo_stop = n2_close,
.ndo_start_xmit = hdlc_start_xmit,
- .ndo_do_ioctl = n2_ioctl,
+ .ndo_siocwandev = n2_ioctl,
+ .ndo_siocdevprivate = n2_siocdevprivate,
};
static int __init n2_run(unsigned long io, unsigned long irq,
diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c
index 7b123a771aa6..4766446f0fa0 100644
--- a/drivers/net/wan/pc300too.c
+++ b/drivers/net/wan/pc300too.c
@@ -174,27 +174,30 @@ static int pc300_close(struct net_device *dev)
return 0;
}
-static int pc300_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int pc300_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
- const size_t size = sizeof(sync_serial_settings);
- sync_serial_settings new_line;
- sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync;
- int new_type;
- port_t *port = dev_to_port(dev);
-
#ifdef DEBUG_RINGS
if (cmd == SIOCDEVPRIVATE) {
sca_dump_rings(dev);
return 0;
}
#endif
- if (cmd != SIOCWANDEV)
- return hdlc_ioctl(dev, ifr, cmd);
+ return -EOPNOTSUPP;
+}
+
+static int pc300_ioctl(struct net_device *dev, struct if_settings *ifs)
+{
+ const size_t size = sizeof(sync_serial_settings);
+ sync_serial_settings new_line;
+ sync_serial_settings __user *line = ifs->ifs_ifsu.sync;
+ int new_type;
+ port_t *port = dev_to_port(dev);
- if (ifr->ifr_settings.type == IF_GET_IFACE) {
- ifr->ifr_settings.type = port->iface;
- if (ifr->ifr_settings.size < size) {
- ifr->ifr_settings.size = size; /* data size wanted */
+ if (ifs->type == IF_GET_IFACE) {
+ ifs->type = port->iface;
+ if (ifs->size < size) {
+ ifs->size = size; /* data size wanted */
return -ENOBUFS;
}
if (copy_to_user(line, &port->settings, size))
@@ -203,21 +206,21 @@ static int pc300_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
}
if (port->card->type == PC300_X21 &&
- (ifr->ifr_settings.type == IF_IFACE_SYNC_SERIAL ||
- ifr->ifr_settings.type == IF_IFACE_X21))
+ (ifs->type == IF_IFACE_SYNC_SERIAL ||
+ ifs->type == IF_IFACE_X21))
new_type = IF_IFACE_X21;
else if (port->card->type == PC300_RSV &&
- (ifr->ifr_settings.type == IF_IFACE_SYNC_SERIAL ||
- ifr->ifr_settings.type == IF_IFACE_V35))
+ (ifs->type == IF_IFACE_SYNC_SERIAL ||
+ ifs->type == IF_IFACE_V35))
new_type = IF_IFACE_V35;
else if (port->card->type == PC300_RSV &&
- ifr->ifr_settings.type == IF_IFACE_V24)
+ ifs->type == IF_IFACE_V24)
new_type = IF_IFACE_V24;
else
- return hdlc_ioctl(dev, ifr, cmd);
+ return hdlc_ioctl(dev, ifs);
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -272,7 +275,8 @@ static const struct net_device_ops pc300_ops = {
.ndo_open = pc300_open,
.ndo_stop = pc300_close,
.ndo_start_xmit = hdlc_start_xmit,
- .ndo_do_ioctl = pc300_ioctl,
+ .ndo_siocwandev = pc300_ioctl,
+ .ndo_siocdevprivate = pc300_siocdevprivate,
};
static int pc300_pci_init_one(struct pci_dev *pdev,
diff --git a/drivers/net/wan/pci200syn.c b/drivers/net/wan/pci200syn.c
index dee9c4e15eca..ea86c7035653 100644
--- a/drivers/net/wan/pci200syn.c
+++ b/drivers/net/wan/pci200syn.c
@@ -167,27 +167,30 @@ static int pci200_close(struct net_device *dev)
return 0;
}
-static int pci200_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int pci200_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
- const size_t size = sizeof(sync_serial_settings);
- sync_serial_settings new_line;
- sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync;
- port_t *port = dev_to_port(dev);
-
#ifdef DEBUG_RINGS
if (cmd == SIOCDEVPRIVATE) {
sca_dump_rings(dev);
return 0;
}
#endif
- if (cmd != SIOCWANDEV)
- return hdlc_ioctl(dev, ifr, cmd);
+ return -EOPNOTSUPP;
+}
+
+static int pci200_ioctl(struct net_device *dev, struct if_settings *ifs)
+{
+ const size_t size = sizeof(sync_serial_settings);
+ sync_serial_settings new_line;
+ sync_serial_settings __user *line = ifs->ifs_ifsu.sync;
+ port_t *port = dev_to_port(dev);
- switch (ifr->ifr_settings.type) {
+ switch (ifs->type) {
case IF_GET_IFACE:
- ifr->ifr_settings.type = IF_IFACE_V35;
- if (ifr->ifr_settings.size < size) {
- ifr->ifr_settings.size = size; /* data size wanted */
+ ifs->type = IF_IFACE_V35;
+ if (ifs->size < size) {
+ ifs->size = size; /* data size wanted */
return -ENOBUFS;
}
if (copy_to_user(line, &port->settings, size))
@@ -217,7 +220,7 @@ static int pci200_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return 0;
default:
- return hdlc_ioctl(dev, ifr, cmd);
+ return hdlc_ioctl(dev, ifs);
}
}
@@ -253,7 +256,8 @@ static const struct net_device_ops pci200_ops = {
.ndo_open = pci200_open,
.ndo_stop = pci200_close,
.ndo_start_xmit = hdlc_start_xmit,
- .ndo_do_ioctl = pci200_ioctl,
+ .ndo_siocwandev = pci200_ioctl,
+ .ndo_siocdevprivate = pci200_siocdevprivate,
};
static int pci200_pci_init_one(struct pci_dev *pdev,
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
deleted file mode 100644
index 3092a09d3eaa..000000000000
--- a/drivers/net/wan/sbni.c
+++ /dev/null
@@ -1,1638 +0,0 @@
-/* sbni.c: Granch SBNI12 leased line adapters driver for linux
- *
- * Written 2001 by Denis I.Timofeev (timofeev@granch.ru)
- *
- * Previous versions were written by Yaroslav Polyakov,
- * Alexey Zverev and Max Khon.
- *
- * Driver supports SBNI12-02,-04,-05,-10,-11 cards, single and
- * double-channel, PCI and ISA modifications.
- * More info and useful utilities to work with SBNI12 cards you can find
- * at http://www.granch.com (English) or http://www.granch.ru (Russian)
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License.
- *
- *
- * 5.0.1 Jun 22 2001
- * - Fixed bug in probe
- * 5.0.0 Jun 06 2001
- * - Driver was completely redesigned by Denis I.Timofeev,
- * - now PCI/Dual, ISA/Dual (with single interrupt line) models are
- * - supported
- * 3.3.0 Thu Feb 24 21:30:28 NOVT 2000
- * - PCI cards support
- * 3.2.0 Mon Dec 13 22:26:53 NOVT 1999
- * - Completely rebuilt all the packet storage system
- * - to work in Ethernet-like style.
- * 3.1.1 just fixed some bugs (5 aug 1999)
- * 3.1.0 added balancing feature (26 apr 1999)
- * 3.0.1 just fixed some bugs (14 apr 1999).
- * 3.0.0 Initial Revision, Yaroslav Polyakov (24 Feb 1999)
- * - added pre-calculation for CRC, fixed bug with "len-2" frames,
- * - removed outbound fragmentation (MTU=1000), written CRC-calculation
- * - on asm, added work with hard_headers and now we have our own cache
- * - for them, optionally supported word-interchange on some chipsets,
- *
- * Known problem: this driver wasn't tested on multiprocessor machine.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/ptrace.h>
-#include <linux/fcntl.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/pci.h>
-#include <linux/skbuff.h>
-#include <linux/timer.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-
-#include <net/net_namespace.h>
-#include <net/arp.h>
-#include <net/Space.h>
-
-#include <asm/io.h>
-#include <asm/types.h>
-#include <asm/byteorder.h>
-#include <asm/irq.h>
-#include <linux/uaccess.h>
-
-#include "sbni.h"
-
-/* device private data */
-
-struct net_local {
- struct timer_list watchdog;
- struct net_device *watchdog_dev;
-
- spinlock_t lock;
- struct sk_buff *rx_buf_p; /* receive buffer ptr */
- struct sk_buff *tx_buf_p; /* transmit buffer ptr */
-
- unsigned int framelen; /* current frame length */
- unsigned int maxframe; /* maximum valid frame length */
- unsigned int state;
- unsigned int inppos, outpos; /* positions in rx/tx buffers */
-
- /* transmitting frame number - from frames qty to 1 */
- unsigned int tx_frameno;
-
- /* expected number of next receiving frame */
- unsigned int wait_frameno;
-
- /* count of failed attempts to frame send - 32 attempts do before
- error - while receiver tunes on opposite side of wire */
- unsigned int trans_errors;
-
- /* idle time; send pong when limit exceeded */
- unsigned int timer_ticks;
-
- /* fields used for receive level autoselection */
- int delta_rxl;
- unsigned int cur_rxl_index, timeout_rxl;
- unsigned long cur_rxl_rcvd, prev_rxl_rcvd;
-
- struct sbni_csr1 csr1; /* current value of CSR1 */
- struct sbni_in_stats in_stats; /* internal statistics */
-
- struct net_device *second; /* for ISA/dual cards */
-
-#ifdef CONFIG_SBNI_MULTILINE
- struct net_device *master;
- struct net_device *link;
-#endif
-};
-
-
-static int sbni_card_probe( unsigned long );
-static int sbni_pci_probe( struct net_device * );
-static struct net_device *sbni_probe1(struct net_device *, unsigned long, int);
-static int sbni_open( struct net_device * );
-static int sbni_close( struct net_device * );
-static netdev_tx_t sbni_start_xmit(struct sk_buff *,
- struct net_device * );
-static int sbni_ioctl( struct net_device *, struct ifreq *, int );
-static void set_multicast_list( struct net_device * );
-
-static irqreturn_t sbni_interrupt( int, void * );
-static void handle_channel( struct net_device * );
-static int recv_frame( struct net_device * );
-static void send_frame( struct net_device * );
-static int upload_data( struct net_device *,
- unsigned, unsigned, unsigned, u32 );
-static void download_data( struct net_device *, u32 * );
-static void sbni_watchdog(struct timer_list *);
-static void interpret_ack( struct net_device *, unsigned );
-static int append_frame_to_pkt( struct net_device *, unsigned, u32 );
-static void indicate_pkt( struct net_device * );
-static void card_start( struct net_device * );
-static void prepare_to_send( struct sk_buff *, struct net_device * );
-static void drop_xmit_queue( struct net_device * );
-static void send_frame_header( struct net_device *, u32 * );
-static int skip_tail( unsigned int, unsigned int, u32 );
-static int check_fhdr( u32, u32 *, u32 *, u32 *, u32 *, u32 * );
-static void change_level( struct net_device * );
-static void timeout_change_level( struct net_device * );
-static u32 calc_crc32( u32, u8 *, u32 );
-static struct sk_buff * get_rx_buf( struct net_device * );
-static int sbni_init( struct net_device * );
-
-#ifdef CONFIG_SBNI_MULTILINE
-static int enslave( struct net_device *, struct net_device * );
-static int emancipate( struct net_device * );
-#endif
-
-static const char version[] =
- "Granch SBNI12 driver ver 5.0.1 Jun 22 2001 Denis I.Timofeev.\n";
-
-static bool skip_pci_probe __initdata = false;
-static int scandone __initdata = 0;
-static int num __initdata = 0;
-
-static unsigned char rxl_tab[];
-static u32 crc32tab[];
-
-/* A list of all installed devices, for removing the driver module. */
-static struct net_device *sbni_cards[ SBNI_MAX_NUM_CARDS ];
-
-/* Lists of device's parameters */
-static u32 io[ SBNI_MAX_NUM_CARDS ] __initdata =
- { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 };
-static u32 irq[ SBNI_MAX_NUM_CARDS ] __initdata;
-static u32 baud[ SBNI_MAX_NUM_CARDS ] __initdata;
-static u32 rxl[ SBNI_MAX_NUM_CARDS ] __initdata =
- { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 };
-static u32 mac[ SBNI_MAX_NUM_CARDS ] __initdata;
-
-#ifndef MODULE
-typedef u32 iarr[];
-static iarr *dest[5] __initdata = { &io, &irq, &baud, &rxl, &mac };
-#endif
-
-/* A zero-terminated list of I/O addresses to be probed on ISA bus */
-static unsigned int netcard_portlist[ ] __initdata = {
- 0x210, 0x214, 0x220, 0x224, 0x230, 0x234, 0x240, 0x244, 0x250, 0x254,
- 0x260, 0x264, 0x270, 0x274, 0x280, 0x284, 0x290, 0x294, 0x2a0, 0x2a4,
- 0x2b0, 0x2b4, 0x2c0, 0x2c4, 0x2d0, 0x2d4, 0x2e0, 0x2e4, 0x2f0, 0x2f4,
- 0 };
-
-#define NET_LOCAL_LOCK(dev) (((struct net_local *)netdev_priv(dev))->lock)
-
-/*
- * Look for SBNI card which addr stored in dev->base_addr, if nonzero.
- * Otherwise, look through PCI bus. If none PCI-card was found, scan ISA.
- */
-
-static inline int __init
-sbni_isa_probe( struct net_device *dev )
-{
- if( dev->base_addr > 0x1ff &&
- request_region( dev->base_addr, SBNI_IO_EXTENT, dev->name ) &&
- sbni_probe1( dev, dev->base_addr, dev->irq ) )
-
- return 0;
- else {
- pr_err("base address 0x%lx is busy, or adapter is malfunctional!\n",
- dev->base_addr);
- return -ENODEV;
- }
-}
-
-static const struct net_device_ops sbni_netdev_ops = {
- .ndo_open = sbni_open,
- .ndo_stop = sbni_close,
- .ndo_start_xmit = sbni_start_xmit,
- .ndo_set_rx_mode = set_multicast_list,
- .ndo_do_ioctl = sbni_ioctl,
- .ndo_set_mac_address = eth_mac_addr,
- .ndo_validate_addr = eth_validate_addr,
-};
-
-static void __init sbni_devsetup(struct net_device *dev)
-{
- ether_setup( dev );
- dev->netdev_ops = &sbni_netdev_ops;
-}
-
-int __init sbni_probe(int unit)
-{
- struct net_device *dev;
- int err;
-
- dev = alloc_netdev(sizeof(struct net_local), "sbni",
- NET_NAME_UNKNOWN, sbni_devsetup);
- if (!dev)
- return -ENOMEM;
-
- dev->netdev_ops = &sbni_netdev_ops;
-
- sprintf(dev->name, "sbni%d", unit);
- netdev_boot_setup_check(dev);
-
- err = sbni_init(dev);
- if (err) {
- free_netdev(dev);
- return err;
- }
-
- err = register_netdev(dev);
- if (err) {
- release_region( dev->base_addr, SBNI_IO_EXTENT );
- free_netdev(dev);
- return err;
- }
- pr_info_once("%s", version);
- return 0;
-}
-
-static int __init sbni_init(struct net_device *dev)
-{
- int i;
- if( dev->base_addr )
- return sbni_isa_probe( dev );
- /* otherwise we have to perform search our adapter */
-
- if( io[ num ] != -1 ) {
- dev->base_addr = io[ num ];
- dev->irq = irq[ num ];
- } else if( scandone || io[ 0 ] != -1 ) {
- return -ENODEV;
- }
-
- /* if io[ num ] contains non-zero address, then that is on ISA bus */
- if( dev->base_addr )
- return sbni_isa_probe( dev );
-
- /* ...otherwise - scan PCI first */
- if( !skip_pci_probe && !sbni_pci_probe( dev ) )
- return 0;
-
- if( io[ num ] == -1 ) {
- /* Auto-scan will be stopped when first ISA card were found */
- scandone = 1;
- if( num > 0 )
- return -ENODEV;
- }
-
- for( i = 0; netcard_portlist[ i ]; ++i ) {
- int ioaddr = netcard_portlist[ i ];
- if( request_region( ioaddr, SBNI_IO_EXTENT, dev->name ) &&
- sbni_probe1( dev, ioaddr, 0 ))
- return 0;
- }
-
- return -ENODEV;
-}
-
-
-static int __init
-sbni_pci_probe( struct net_device *dev )
-{
- struct pci_dev *pdev = NULL;
-
- while( (pdev = pci_get_class( PCI_CLASS_NETWORK_OTHER << 8, pdev ))
- != NULL ) {
- int pci_irq_line;
- unsigned long pci_ioaddr;
-
- if( pdev->vendor != SBNI_PCI_VENDOR &&
- pdev->device != SBNI_PCI_DEVICE )
- continue;
-
- pci_ioaddr = pci_resource_start( pdev, 0 );
- pci_irq_line = pdev->irq;
-
- /* Avoid already found cards from previous calls */
- if( !request_region( pci_ioaddr, SBNI_IO_EXTENT, dev->name ) ) {
- if (pdev->subsystem_device != 2)
- continue;
-
- /* Dual adapter is present */
- if (!request_region(pci_ioaddr += 4, SBNI_IO_EXTENT,
- dev->name ) )
- continue;
- }
-
- if (pci_irq_line <= 0 || pci_irq_line >= nr_irqs)
- pr_warn(
-"WARNING: The PCI BIOS assigned this PCI card to IRQ %d, which is unlikely to work!.\n"
-"You should use the PCI BIOS setup to assign a valid IRQ line.\n",
- pci_irq_line );
-
- /* avoiding re-enable dual adapters */
- if( (pci_ioaddr & 7) == 0 && pci_enable_device( pdev ) ) {
- release_region( pci_ioaddr, SBNI_IO_EXTENT );
- pci_dev_put( pdev );
- return -EIO;
- }
- if( sbni_probe1( dev, pci_ioaddr, pci_irq_line ) ) {
- SET_NETDEV_DEV(dev, &pdev->dev);
- /* not the best thing to do, but this is all messed up
- for hotplug systems anyway... */
- pci_dev_put( pdev );
- return 0;
- }
- }
- return -ENODEV;
-}
-
-
-static struct net_device * __init
-sbni_probe1( struct net_device *dev, unsigned long ioaddr, int irq )
-{
- struct net_local *nl;
-
- if( sbni_card_probe( ioaddr ) ) {
- release_region( ioaddr, SBNI_IO_EXTENT );
- return NULL;
- }
-
- outb( 0, ioaddr + CSR0 );
-
- if( irq < 2 ) {
- unsigned long irq_mask;
-
- irq_mask = probe_irq_on();
- outb( EN_INT | TR_REQ, ioaddr + CSR0 );
- outb( PR_RES, ioaddr + CSR1 );
- mdelay(50);
- irq = probe_irq_off(irq_mask);
- outb( 0, ioaddr + CSR0 );
-
- if( !irq ) {
- pr_err("%s: can't detect device irq!\n", dev->name);
- release_region( ioaddr, SBNI_IO_EXTENT );
- return NULL;
- }
- } else if( irq == 2 )
- irq = 9;
-
- dev->irq = irq;
- dev->base_addr = ioaddr;
-
- /* Fill in sbni-specific dev fields. */
- nl = netdev_priv(dev);
- if( !nl ) {
- pr_err("%s: unable to get memory!\n", dev->name);
- release_region( ioaddr, SBNI_IO_EXTENT );
- return NULL;
- }
-
- memset( nl, 0, sizeof(struct net_local) );
- spin_lock_init( &nl->lock );
-
- /* store MAC address (generate if that isn't known) */
- *(__be16 *)dev->dev_addr = htons( 0x00ff );
- *(__be32 *)(dev->dev_addr + 2) = htonl( 0x01000000 |
- ((mac[num] ?
- mac[num] :
- (u32)((long)netdev_priv(dev))) & 0x00ffffff));
-
- /* store link settings (speed, receive level ) */
- nl->maxframe = DEFAULT_FRAME_LEN;
- nl->csr1.rate = baud[ num ];
-
- if( (nl->cur_rxl_index = rxl[ num ]) == -1 ) {
- /* autotune rxl */
- nl->cur_rxl_index = DEF_RXL;
- nl->delta_rxl = DEF_RXL_DELTA;
- } else {
- nl->delta_rxl = 0;
- }
- nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ];
- if( inb( ioaddr + CSR0 ) & 0x01 )
- nl->state |= FL_SLOW_MODE;
-
- pr_notice("%s: ioaddr %#lx, irq %d, MAC: 00:ff:01:%02x:%02x:%02x\n",
- dev->name, dev->base_addr, dev->irq,
- ((u8 *)dev->dev_addr)[3],
- ((u8 *)dev->dev_addr)[4],
- ((u8 *)dev->dev_addr)[5]);
-
- pr_notice("%s: speed %d",
- dev->name,
- ((nl->state & FL_SLOW_MODE) ? 500000 : 2000000)
- / (1 << nl->csr1.rate));
-
- if( nl->delta_rxl == 0 )
- pr_cont(", receive level 0x%x (fixed)\n", nl->cur_rxl_index);
- else
- pr_cont(", receive level (auto)\n");
-
-#ifdef CONFIG_SBNI_MULTILINE
- nl->master = dev;
- nl->link = NULL;
-#endif
-
- sbni_cards[ num++ ] = dev;
- return dev;
-}
-
-/* -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_SBNI_MULTILINE
-
-static netdev_tx_t
-sbni_start_xmit( struct sk_buff *skb, struct net_device *dev )
-{
- struct net_device *p;
-
- netif_stop_queue( dev );
-
- /* Looking for idle device in the list */
- for( p = dev; p; ) {
- struct net_local *nl = netdev_priv(p);
- spin_lock( &nl->lock );
- if( nl->tx_buf_p || (nl->state & FL_LINE_DOWN) ) {
- p = nl->link;
- spin_unlock( &nl->lock );
- } else {
- /* Idle dev is found */
- prepare_to_send( skb, p );
- spin_unlock( &nl->lock );
- netif_start_queue( dev );
- return NETDEV_TX_OK;
- }
- }
-
- return NETDEV_TX_BUSY;
-}
-
-#else /* CONFIG_SBNI_MULTILINE */
-
-static netdev_tx_t
-sbni_start_xmit( struct sk_buff *skb, struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
- netif_stop_queue( dev );
- spin_lock( &nl->lock );
-
- prepare_to_send( skb, dev );
-
- spin_unlock( &nl->lock );
- return NETDEV_TX_OK;
-}
-
-#endif /* CONFIG_SBNI_MULTILINE */
-
-/* -------------------------------------------------------------------------- */
-
-/* interrupt handler */
-
-/*
- * SBNI12D-10, -11/ISA boards within "common interrupt" mode could not
- * be looked as two independent single-channel devices. Every channel seems
- * as Ethernet interface but interrupt handler must be common. Really, first
- * channel ("master") driver only registers the handler. In its struct net_local
- * it has got pointer to "slave" channel's struct net_local and handles that's
- * interrupts too.
- * dev of successfully attached ISA SBNI boards is linked to list.
- * While next board driver is initialized, it scans this list. If one
- * has found dev with same irq and ioaddr different by 4 then it assumes
- * this board to be "master".
- */
-
-static irqreturn_t
-sbni_interrupt( int irq, void *dev_id )
-{
- struct net_device *dev = dev_id;
- struct net_local *nl = netdev_priv(dev);
- int repeat;
-
- spin_lock( &nl->lock );
- if( nl->second )
- spin_lock(&NET_LOCAL_LOCK(nl->second));
-
- do {
- repeat = 0;
- if( inb( dev->base_addr + CSR0 ) & (RC_RDY | TR_RDY) ) {
- handle_channel( dev );
- repeat = 1;
- }
- if( nl->second && /* second channel present */
- (inb( nl->second->base_addr+CSR0 ) & (RC_RDY | TR_RDY)) ) {
- handle_channel( nl->second );
- repeat = 1;
- }
- } while( repeat );
-
- if( nl->second )
- spin_unlock(&NET_LOCAL_LOCK(nl->second));
- spin_unlock( &nl->lock );
- return IRQ_HANDLED;
-}
-
-
-static void
-handle_channel( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
- unsigned long ioaddr = dev->base_addr;
-
- int req_ans;
- unsigned char csr0;
-
-#ifdef CONFIG_SBNI_MULTILINE
- /* Lock the master device because we going to change its local data */
- if( nl->state & FL_SLAVE )
- spin_lock(&NET_LOCAL_LOCK(nl->master));
-#endif
-
- outb( (inb( ioaddr + CSR0 ) & ~EN_INT) | TR_REQ, ioaddr + CSR0 );
-
- nl->timer_ticks = CHANGE_LEVEL_START_TICKS;
- for(;;) {
- csr0 = inb( ioaddr + CSR0 );
- if( ( csr0 & (RC_RDY | TR_RDY) ) == 0 )
- break;
-
- req_ans = !(nl->state & FL_PREV_OK);
-
- if( csr0 & RC_RDY )
- req_ans = recv_frame( dev );
-
- /*
- * TR_RDY always equals 1 here because we have owned the marker,
- * and we set TR_REQ when disabled interrupts
- */
- csr0 = inb( ioaddr + CSR0 );
- if( !(csr0 & TR_RDY) || (csr0 & RC_RDY) )
- netdev_err(dev, "internal error!\n");
-
- /* if state & FL_NEED_RESEND != 0 then tx_frameno != 0 */
- if( req_ans || nl->tx_frameno != 0 )
- send_frame( dev );
- else
- /* send marker without any data */
- outb( inb( ioaddr + CSR0 ) & ~TR_REQ, ioaddr + CSR0 );
- }
-
- outb( inb( ioaddr + CSR0 ) | EN_INT, ioaddr + CSR0 );
-
-#ifdef CONFIG_SBNI_MULTILINE
- if( nl->state & FL_SLAVE )
- spin_unlock(&NET_LOCAL_LOCK(nl->master));
-#endif
-}
-
-
-/*
- * Routine returns 1 if it needs to acknowledge received frame.
- * Empty frame received without errors won't be acknowledged.
- */
-
-static int
-recv_frame( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
- unsigned long ioaddr = dev->base_addr;
-
- u32 crc = CRC32_INITIAL;
-
- unsigned framelen = 0, frameno, ack;
- unsigned is_first, frame_ok = 0;
-
- if( check_fhdr( ioaddr, &framelen, &frameno, &ack, &is_first, &crc ) ) {
- frame_ok = framelen > 4
- ? upload_data( dev, framelen, frameno, is_first, crc )
- : skip_tail( ioaddr, framelen, crc );
- if( frame_ok )
- interpret_ack( dev, ack );
- }
-
- outb( inb( ioaddr + CSR0 ) ^ CT_ZER, ioaddr + CSR0 );
- if( frame_ok ) {
- nl->state |= FL_PREV_OK;
- if( framelen > 4 )
- nl->in_stats.all_rx_number++;
- } else {
- nl->state &= ~FL_PREV_OK;
- change_level( dev );
- nl->in_stats.all_rx_number++;
- nl->in_stats.bad_rx_number++;
- }
-
- return !frame_ok || framelen > 4;
-}
-
-
-static void
-send_frame( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
- u32 crc = CRC32_INITIAL;
-
- if( nl->state & FL_NEED_RESEND ) {
-
- /* if frame was sended but not ACK'ed - resend it */
- if( nl->trans_errors ) {
- --nl->trans_errors;
- if( nl->framelen != 0 )
- nl->in_stats.resend_tx_number++;
- } else {
- /* cannot xmit with many attempts */
-#ifdef CONFIG_SBNI_MULTILINE
- if( (nl->state & FL_SLAVE) || nl->link )
-#endif
- nl->state |= FL_LINE_DOWN;
- drop_xmit_queue( dev );
- goto do_send;
- }
- } else
- nl->trans_errors = TR_ERROR_COUNT;
-
- send_frame_header( dev, &crc );
- nl->state |= FL_NEED_RESEND;
- /*
- * FL_NEED_RESEND will be cleared after ACK, but if empty
- * frame sended then in prepare_to_send next frame
- */
-
-
- if( nl->framelen ) {
- download_data( dev, &crc );
- nl->in_stats.all_tx_number++;
- nl->state |= FL_WAIT_ACK;
- }
-
- outsb( dev->base_addr + DAT, (u8 *)&crc, sizeof crc );
-
-do_send:
- outb( inb( dev->base_addr + CSR0 ) & ~TR_REQ, dev->base_addr + CSR0 );
-
- if( nl->tx_frameno )
- /* next frame exists - we request card to send it */
- outb( inb( dev->base_addr + CSR0 ) | TR_REQ,
- dev->base_addr + CSR0 );
-}
-
-
-/*
- * Write the frame data into adapter's buffer memory, and calculate CRC.
- * Do padding if necessary.
- */
-
-static void
-download_data( struct net_device *dev, u32 *crc_p )
-{
- struct net_local *nl = netdev_priv(dev);
- struct sk_buff *skb = nl->tx_buf_p;
-
- unsigned len = min_t(unsigned int, skb->len - nl->outpos, nl->framelen);
-
- outsb( dev->base_addr + DAT, skb->data + nl->outpos, len );
- *crc_p = calc_crc32( *crc_p, skb->data + nl->outpos, len );
-
- /* if packet too short we should write some more bytes to pad */
- for( len = nl->framelen - len; len--; ) {
- outb( 0, dev->base_addr + DAT );
- *crc_p = CRC32( 0, *crc_p );
- }
-}
-
-
-static int
-upload_data( struct net_device *dev, unsigned framelen, unsigned frameno,
- unsigned is_first, u32 crc )
-{
- struct net_local *nl = netdev_priv(dev);
-
- int frame_ok;
-
- if( is_first ) {
- nl->wait_frameno = frameno;
- nl->inppos = 0;
- }
-
- if( nl->wait_frameno == frameno ) {
-
- if( nl->inppos + framelen <= ETHER_MAX_LEN )
- frame_ok = append_frame_to_pkt( dev, framelen, crc );
-
- /*
- * if CRC is right but framelen incorrect then transmitter
- * error was occurred... drop entire packet
- */
- else if( (frame_ok = skip_tail( dev->base_addr, framelen, crc ))
- != 0 ) {
- nl->wait_frameno = 0;
- nl->inppos = 0;
-#ifdef CONFIG_SBNI_MULTILINE
- nl->master->stats.rx_errors++;
- nl->master->stats.rx_missed_errors++;
-#else
- dev->stats.rx_errors++;
- dev->stats.rx_missed_errors++;
-#endif
- }
- /* now skip all frames until is_first != 0 */
- } else
- frame_ok = skip_tail( dev->base_addr, framelen, crc );
-
- if( is_first && !frame_ok ) {
- /*
- * Frame has been broken, but we had already stored
- * is_first... Drop entire packet.
- */
- nl->wait_frameno = 0;
-#ifdef CONFIG_SBNI_MULTILINE
- nl->master->stats.rx_errors++;
- nl->master->stats.rx_crc_errors++;
-#else
- dev->stats.rx_errors++;
- dev->stats.rx_crc_errors++;
-#endif
- }
-
- return frame_ok;
-}
-
-
-static inline void
-send_complete( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
-#ifdef CONFIG_SBNI_MULTILINE
- nl->master->stats.tx_packets++;
- nl->master->stats.tx_bytes += nl->tx_buf_p->len;
-#else
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += nl->tx_buf_p->len;
-#endif
- dev_consume_skb_irq(nl->tx_buf_p);
-
- nl->tx_buf_p = NULL;
-
- nl->outpos = 0;
- nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
- nl->framelen = 0;
-}
-
-
-static void
-interpret_ack( struct net_device *dev, unsigned ack )
-{
- struct net_local *nl = netdev_priv(dev);
-
- if( ack == FRAME_SENT_OK ) {
- nl->state &= ~FL_NEED_RESEND;
-
- if( nl->state & FL_WAIT_ACK ) {
- nl->outpos += nl->framelen;
-
- if( --nl->tx_frameno ) {
- nl->framelen = min_t(unsigned int,
- nl->maxframe,
- nl->tx_buf_p->len - nl->outpos);
- } else {
- send_complete( dev );
-#ifdef CONFIG_SBNI_MULTILINE
- netif_wake_queue( nl->master );
-#else
- netif_wake_queue( dev );
-#endif
- }
- }
- }
-
- nl->state &= ~FL_WAIT_ACK;
-}
-
-
-/*
- * Glue received frame with previous fragments of packet.
- * Indicate packet when last frame would be accepted.
- */
-
-static int
-append_frame_to_pkt( struct net_device *dev, unsigned framelen, u32 crc )
-{
- struct net_local *nl = netdev_priv(dev);
-
- u8 *p;
-
- if( nl->inppos + framelen > ETHER_MAX_LEN )
- return 0;
-
- if( !nl->rx_buf_p && !(nl->rx_buf_p = get_rx_buf( dev )) )
- return 0;
-
- p = nl->rx_buf_p->data + nl->inppos;
- insb( dev->base_addr + DAT, p, framelen );
- if( calc_crc32( crc, p, framelen ) != CRC32_REMAINDER )
- return 0;
-
- nl->inppos += framelen - 4;
- if( --nl->wait_frameno == 0 ) /* last frame received */
- indicate_pkt( dev );
-
- return 1;
-}
-
-
-/*
- * Prepare to start output on adapter.
- * Transmitter will be actually activated when marker is accepted.
- */
-
-static void
-prepare_to_send( struct sk_buff *skb, struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
- unsigned int len;
-
- /* nl->tx_buf_p == NULL here! */
- if( nl->tx_buf_p )
- netdev_err(dev, "memory leak!\n");
-
- nl->outpos = 0;
- nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
-
- len = skb->len;
- if( len < SBNI_MIN_LEN )
- len = SBNI_MIN_LEN;
-
- nl->tx_buf_p = skb;
- nl->tx_frameno = DIV_ROUND_UP(len, nl->maxframe);
- nl->framelen = len < nl->maxframe ? len : nl->maxframe;
-
- outb( inb( dev->base_addr + CSR0 ) | TR_REQ, dev->base_addr + CSR0 );
-#ifdef CONFIG_SBNI_MULTILINE
- netif_trans_update(nl->master);
-#else
- netif_trans_update(dev);
-#endif
-}
-
-
-static void
-drop_xmit_queue( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
- if( nl->tx_buf_p ) {
- dev_kfree_skb_any( nl->tx_buf_p );
- nl->tx_buf_p = NULL;
-#ifdef CONFIG_SBNI_MULTILINE
- nl->master->stats.tx_errors++;
- nl->master->stats.tx_carrier_errors++;
-#else
- dev->stats.tx_errors++;
- dev->stats.tx_carrier_errors++;
-#endif
- }
-
- nl->tx_frameno = 0;
- nl->framelen = 0;
- nl->outpos = 0;
- nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
-#ifdef CONFIG_SBNI_MULTILINE
- netif_start_queue( nl->master );
- netif_trans_update(nl->master);
-#else
- netif_start_queue( dev );
- netif_trans_update(dev);
-#endif
-}
-
-
-static void
-send_frame_header( struct net_device *dev, u32 *crc_p )
-{
- struct net_local *nl = netdev_priv(dev);
-
- u32 crc = *crc_p;
- u32 len_field = nl->framelen + 6; /* CRC + frameno + reserved */
- u8 value;
-
- if( nl->state & FL_NEED_RESEND )
- len_field |= FRAME_RETRY; /* non-first attempt... */
-
- if( nl->outpos == 0 )
- len_field |= FRAME_FIRST;
-
- len_field |= (nl->state & FL_PREV_OK) ? FRAME_SENT_OK : FRAME_SENT_BAD;
- outb( SBNI_SIG, dev->base_addr + DAT );
-
- value = (u8) len_field;
- outb( value, dev->base_addr + DAT );
- crc = CRC32( value, crc );
- value = (u8) (len_field >> 8);
- outb( value, dev->base_addr + DAT );
- crc = CRC32( value, crc );
-
- outb( nl->tx_frameno, dev->base_addr + DAT );
- crc = CRC32( nl->tx_frameno, crc );
- outb( 0, dev->base_addr + DAT );
- crc = CRC32( 0, crc );
- *crc_p = crc;
-}
-
-
-/*
- * if frame tail not needed (incorrect number or received twice),
- * it won't store, but CRC will be calculated
- */
-
-static int
-skip_tail( unsigned int ioaddr, unsigned int tail_len, u32 crc )
-{
- while( tail_len-- )
- crc = CRC32( inb( ioaddr + DAT ), crc );
-
- return crc == CRC32_REMAINDER;
-}
-
-
-/*
- * Preliminary checks if frame header is correct, calculates its CRC
- * and split it to simple fields
- */
-
-static int
-check_fhdr( u32 ioaddr, u32 *framelen, u32 *frameno, u32 *ack,
- u32 *is_first, u32 *crc_p )
-{
- u32 crc = *crc_p;
- u8 value;
-
- if( inb( ioaddr + DAT ) != SBNI_SIG )
- return 0;
-
- value = inb( ioaddr + DAT );
- *framelen = (u32)value;
- crc = CRC32( value, crc );
- value = inb( ioaddr + DAT );
- *framelen |= ((u32)value) << 8;
- crc = CRC32( value, crc );
-
- *ack = *framelen & FRAME_ACK_MASK;
- *is_first = (*framelen & FRAME_FIRST) != 0;
-
- if( (*framelen &= FRAME_LEN_MASK) < 6 ||
- *framelen > SBNI_MAX_FRAME - 3 )
- return 0;
-
- value = inb( ioaddr + DAT );
- *frameno = (u32)value;
- crc = CRC32( value, crc );
-
- crc = CRC32( inb( ioaddr + DAT ), crc ); /* reserved byte */
- *framelen -= 2;
-
- *crc_p = crc;
- return 1;
-}
-
-
-static struct sk_buff *
-get_rx_buf( struct net_device *dev )
-{
- /* +2 is to compensate for the alignment fixup below */
- struct sk_buff *skb = dev_alloc_skb( ETHER_MAX_LEN + 2 );
- if( !skb )
- return NULL;
-
- skb_reserve( skb, 2 ); /* Align IP on longword boundaries */
- return skb;
-}
-
-
-static void
-indicate_pkt( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
- struct sk_buff *skb = nl->rx_buf_p;
-
- skb_put( skb, nl->inppos );
-
-#ifdef CONFIG_SBNI_MULTILINE
- skb->protocol = eth_type_trans( skb, nl->master );
- netif_rx( skb );
- ++nl->master->stats.rx_packets;
- nl->master->stats.rx_bytes += nl->inppos;
-#else
- skb->protocol = eth_type_trans( skb, dev );
- netif_rx( skb );
- ++dev->stats.rx_packets;
- dev->stats.rx_bytes += nl->inppos;
-#endif
- nl->rx_buf_p = NULL; /* protocol driver will clear this sk_buff */
-}
-
-
-/* -------------------------------------------------------------------------- */
-
-/*
- * Routine checks periodically wire activity and regenerates marker if
- * connect was inactive for a long time.
- */
-
-static void
-sbni_watchdog(struct timer_list *t)
-{
- struct net_local *nl = from_timer(nl, t, watchdog);
- struct net_device *dev = nl->watchdog_dev;
- unsigned long flags;
- unsigned char csr0;
-
- spin_lock_irqsave( &nl->lock, flags );
-
- csr0 = inb( dev->base_addr + CSR0 );
- if( csr0 & RC_CHK ) {
-
- if( nl->timer_ticks ) {
- if( csr0 & (RC_RDY | BU_EMP) )
- /* receiving not active */
- nl->timer_ticks--;
- } else {
- nl->in_stats.timeout_number++;
- if( nl->delta_rxl )
- timeout_change_level( dev );
-
- outb( *(u_char *)&nl->csr1 | PR_RES,
- dev->base_addr + CSR1 );
- csr0 = inb( dev->base_addr + CSR0 );
- }
- } else
- nl->state &= ~FL_LINE_DOWN;
-
- outb( csr0 | RC_CHK, dev->base_addr + CSR0 );
-
- mod_timer(t, jiffies + SBNI_TIMEOUT);
-
- spin_unlock_irqrestore( &nl->lock, flags );
-}
-
-
-static unsigned char rxl_tab[] = {
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08,
- 0x0a, 0x0c, 0x0f, 0x16, 0x18, 0x1a, 0x1c, 0x1f
-};
-
-#define SIZE_OF_TIMEOUT_RXL_TAB 4
-static unsigned char timeout_rxl_tab[] = {
- 0x03, 0x05, 0x08, 0x0b
-};
-
-/* -------------------------------------------------------------------------- */
-
-static void
-card_start( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
- nl->timer_ticks = CHANGE_LEVEL_START_TICKS;
- nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
- nl->state |= FL_PREV_OK;
-
- nl->inppos = nl->outpos = 0;
- nl->wait_frameno = 0;
- nl->tx_frameno = 0;
- nl->framelen = 0;
-
- outb( *(u_char *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 );
- outb( EN_INT, dev->base_addr + CSR0 );
-}
-
-/* -------------------------------------------------------------------------- */
-
-/* Receive level auto-selection */
-
-static void
-change_level( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
- if( nl->delta_rxl == 0 ) /* do not auto-negotiate RxL */
- return;
-
- if( nl->cur_rxl_index == 0 )
- nl->delta_rxl = 1;
- else if( nl->cur_rxl_index == 15 )
- nl->delta_rxl = -1;
- else if( nl->cur_rxl_rcvd < nl->prev_rxl_rcvd )
- nl->delta_rxl = -nl->delta_rxl;
-
- nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index += nl->delta_rxl ];
- inb( dev->base_addr + CSR0 ); /* needs for PCI cards */
- outb( *(u8 *)&nl->csr1, dev->base_addr + CSR1 );
-
- nl->prev_rxl_rcvd = nl->cur_rxl_rcvd;
- nl->cur_rxl_rcvd = 0;
-}
-
-
-static void
-timeout_change_level( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
- nl->cur_rxl_index = timeout_rxl_tab[ nl->timeout_rxl ];
- if( ++nl->timeout_rxl >= 4 )
- nl->timeout_rxl = 0;
-
- nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ];
- inb( dev->base_addr + CSR0 );
- outb( *(unsigned char *)&nl->csr1, dev->base_addr + CSR1 );
-
- nl->prev_rxl_rcvd = nl->cur_rxl_rcvd;
- nl->cur_rxl_rcvd = 0;
-}
-
-/* -------------------------------------------------------------------------- */
-
-/*
- * Open/initialize the board.
- */
-
-static int
-sbni_open( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
- struct timer_list *w = &nl->watchdog;
-
- /*
- * For double ISA adapters within "common irq" mode, we have to
- * determine whether primary or secondary channel is initialized,
- * and set the irq handler only in first case.
- */
- if( dev->base_addr < 0x400 ) { /* ISA only */
- struct net_device **p = sbni_cards;
- for( ; *p && p < sbni_cards + SBNI_MAX_NUM_CARDS; ++p )
- if( (*p)->irq == dev->irq &&
- ((*p)->base_addr == dev->base_addr + 4 ||
- (*p)->base_addr == dev->base_addr - 4) &&
- (*p)->flags & IFF_UP ) {
-
- ((struct net_local *) (netdev_priv(*p)))
- ->second = dev;
- netdev_notice(dev, "using shared irq with %s\n",
- (*p)->name);
- nl->state |= FL_SECONDARY;
- goto handler_attached;
- }
- }
-
- if( request_irq(dev->irq, sbni_interrupt, IRQF_SHARED, dev->name, dev) ) {
- netdev_err(dev, "unable to get IRQ %d\n", dev->irq);
- return -EAGAIN;
- }
-
-handler_attached:
-
- spin_lock( &nl->lock );
- memset( &dev->stats, 0, sizeof(struct net_device_stats) );
- memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) );
-
- card_start( dev );
-
- netif_start_queue( dev );
-
- /* set timer watchdog */
- nl->watchdog_dev = dev;
- timer_setup(w, sbni_watchdog, 0);
- w->expires = jiffies + SBNI_TIMEOUT;
- add_timer( w );
-
- spin_unlock( &nl->lock );
- return 0;
-}
-
-
-static int
-sbni_close( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
- if( nl->second && nl->second->flags & IFF_UP ) {
- netdev_notice(dev, "Secondary channel (%s) is active!\n",
- nl->second->name);
- return -EBUSY;
- }
-
-#ifdef CONFIG_SBNI_MULTILINE
- if( nl->state & FL_SLAVE )
- emancipate( dev );
- else
- while( nl->link ) /* it's master device! */
- emancipate( nl->link );
-#endif
-
- spin_lock( &nl->lock );
-
- nl->second = NULL;
- drop_xmit_queue( dev );
- netif_stop_queue( dev );
-
- del_timer( &nl->watchdog );
-
- outb( 0, dev->base_addr + CSR0 );
-
- if( !(nl->state & FL_SECONDARY) )
- free_irq( dev->irq, dev );
- nl->state &= FL_SECONDARY;
-
- spin_unlock( &nl->lock );
- return 0;
-}
-
-
-/*
- Valid combinations in CSR0 (for probing):
-
- VALID_DECODER 0000,0011,1011,1010
-
- ; 0 ; -
- TR_REQ ; 1 ; +
- TR_RDY ; 2 ; -
- TR_RDY TR_REQ ; 3 ; +
- BU_EMP ; 4 ; +
- BU_EMP TR_REQ ; 5 ; +
- BU_EMP TR_RDY ; 6 ; -
- BU_EMP TR_RDY TR_REQ ; 7 ; +
- RC_RDY ; 8 ; +
- RC_RDY TR_REQ ; 9 ; +
- RC_RDY TR_RDY ; 10 ; -
- RC_RDY TR_RDY TR_REQ ; 11 ; -
- RC_RDY BU_EMP ; 12 ; -
- RC_RDY BU_EMP TR_REQ ; 13 ; -
- RC_RDY BU_EMP TR_RDY ; 14 ; -
- RC_RDY BU_EMP TR_RDY TR_REQ ; 15 ; -
-*/
-
-#define VALID_DECODER (2 + 8 + 0x10 + 0x20 + 0x80 + 0x100 + 0x200)
-
-
-static int
-sbni_card_probe( unsigned long ioaddr )
-{
- unsigned char csr0;
-
- csr0 = inb( ioaddr + CSR0 );
- if( csr0 != 0xff && csr0 != 0x00 ) {
- csr0 &= ~EN_INT;
- if( csr0 & BU_EMP )
- csr0 |= EN_INT;
-
- if( VALID_DECODER & (1 << (csr0 >> 4)) )
- return 0;
- }
-
- return -ENODEV;
-}
-
-/* -------------------------------------------------------------------------- */
-
-static int
-sbni_ioctl( struct net_device *dev, struct ifreq *ifr, int cmd )
-{
- struct net_local *nl = netdev_priv(dev);
- struct sbni_flags flags;
- int error = 0;
-
-#ifdef CONFIG_SBNI_MULTILINE
- struct net_device *slave_dev;
- char slave_name[ 8 ];
-#endif
-
- switch( cmd ) {
- case SIOCDEVGETINSTATS :
- if (copy_to_user( ifr->ifr_data, &nl->in_stats,
- sizeof(struct sbni_in_stats) ))
- error = -EFAULT;
- break;
-
- case SIOCDEVRESINSTATS :
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
- memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) );
- break;
-
- case SIOCDEVGHWSTATE :
- flags.mac_addr = *(u32 *)(dev->dev_addr + 3);
- flags.rate = nl->csr1.rate;
- flags.slow_mode = (nl->state & FL_SLOW_MODE) != 0;
- flags.rxl = nl->cur_rxl_index;
- flags.fixed_rxl = nl->delta_rxl == 0;
-
- if (copy_to_user( ifr->ifr_data, &flags, sizeof flags ))
- error = -EFAULT;
- break;
-
- case SIOCDEVSHWSTATE :
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
- spin_lock( &nl->lock );
- flags = *(struct sbni_flags*) &ifr->ifr_ifru;
- if( flags.fixed_rxl ) {
- nl->delta_rxl = 0;
- nl->cur_rxl_index = flags.rxl;
- } else {
- nl->delta_rxl = DEF_RXL_DELTA;
- nl->cur_rxl_index = DEF_RXL;
- }
-
- nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ];
- nl->csr1.rate = flags.rate;
- outb( *(u8 *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 );
- spin_unlock( &nl->lock );
- break;
-
-#ifdef CONFIG_SBNI_MULTILINE
-
- case SIOCDEVENSLAVE :
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
- if (copy_from_user( slave_name, ifr->ifr_data, sizeof slave_name ))
- return -EFAULT;
- slave_dev = dev_get_by_name(&init_net, slave_name );
- if( !slave_dev || !(slave_dev->flags & IFF_UP) ) {
- netdev_err(dev, "trying to enslave non-active device %s\n",
- slave_name);
- if (slave_dev)
- dev_put(slave_dev);
- return -EPERM;
- }
-
- return enslave( dev, slave_dev );
-
- case SIOCDEVEMANSIPATE :
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
- return emancipate( dev );
-
-#endif /* CONFIG_SBNI_MULTILINE */
-
- default :
- return -EOPNOTSUPP;
- }
-
- return error;
-}
-
-
-#ifdef CONFIG_SBNI_MULTILINE
-
-static int
-enslave( struct net_device *dev, struct net_device *slave_dev )
-{
- struct net_local *nl = netdev_priv(dev);
- struct net_local *snl = netdev_priv(slave_dev);
-
- if( nl->state & FL_SLAVE ) /* This isn't master or free device */
- return -EBUSY;
-
- if( snl->state & FL_SLAVE ) /* That was already enslaved */
- return -EBUSY;
-
- spin_lock( &nl->lock );
- spin_lock( &snl->lock );
-
- /* append to list */
- snl->link = nl->link;
- nl->link = slave_dev;
- snl->master = dev;
- snl->state |= FL_SLAVE;
-
- /* Summary statistics of MultiLine operation will be stored
- in master's counters */
- memset( &slave_dev->stats, 0, sizeof(struct net_device_stats) );
- netif_stop_queue( slave_dev );
- netif_wake_queue( dev ); /* Now we are able to transmit */
-
- spin_unlock( &snl->lock );
- spin_unlock( &nl->lock );
- netdev_notice(dev, "slave device (%s) attached\n", slave_dev->name);
- return 0;
-}
-
-
-static int
-emancipate( struct net_device *dev )
-{
- struct net_local *snl = netdev_priv(dev);
- struct net_device *p = snl->master;
- struct net_local *nl = netdev_priv(p);
-
- if( !(snl->state & FL_SLAVE) )
- return -EINVAL;
-
- spin_lock( &nl->lock );
- spin_lock( &snl->lock );
- drop_xmit_queue( dev );
-
- /* exclude from list */
- for(;;) { /* must be in list */
- struct net_local *t = netdev_priv(p);
- if( t->link == dev ) {
- t->link = snl->link;
- break;
- }
- p = t->link;
- }
-
- snl->link = NULL;
- snl->master = dev;
- snl->state &= ~FL_SLAVE;
-
- netif_start_queue( dev );
-
- spin_unlock( &snl->lock );
- spin_unlock( &nl->lock );
-
- dev_put( dev );
- return 0;
-}
-
-#endif
-
-static void
-set_multicast_list( struct net_device *dev )
-{
- return; /* sbni always operate in promiscuos mode */
-}
-
-
-#ifdef MODULE
-module_param_hw_array(io, int, ioport, NULL, 0);
-module_param_hw_array(irq, int, irq, NULL, 0);
-module_param_array(baud, int, NULL, 0);
-module_param_array(rxl, int, NULL, 0);
-module_param_array(mac, int, NULL, 0);
-module_param(skip_pci_probe, bool, 0);
-
-MODULE_LICENSE("GPL");
-
-
-int __init init_module( void )
-{
- struct net_device *dev;
- int err;
-
- while( num < SBNI_MAX_NUM_CARDS ) {
- dev = alloc_netdev(sizeof(struct net_local), "sbni%d",
- NET_NAME_UNKNOWN, sbni_devsetup);
- if( !dev)
- break;
-
- sprintf( dev->name, "sbni%d", num );
-
- err = sbni_init(dev);
- if (err) {
- free_netdev(dev);
- break;
- }
-
- if( register_netdev( dev ) ) {
- release_region( dev->base_addr, SBNI_IO_EXTENT );
- free_netdev( dev );
- break;
- }
- }
-
- return *sbni_cards ? 0 : -ENODEV;
-}
-
-void
-cleanup_module(void)
-{
- int i;
-
- for (i = 0; i < SBNI_MAX_NUM_CARDS; ++i) {
- struct net_device *dev = sbni_cards[i];
- if (dev != NULL) {
- unregister_netdev(dev);
- release_region(dev->base_addr, SBNI_IO_EXTENT);
- free_netdev(dev);
- }
- }
-}
-
-#else /* MODULE */
-
-static int __init
-sbni_setup( char *p )
-{
- int n, parm;
-
- if( *p++ != '(' )
- goto bad_param;
-
- for( n = 0, parm = 0; *p && n < 8; ) {
- (*dest[ parm ])[ n ] = simple_strtoul( p, &p, 0 );
- if( !*p || *p == ')' )
- return 1;
- if( *p == ';' ) {
- ++p;
- ++n;
- parm = 0;
- } else if( *p++ != ',' ) {
- break;
- } else {
- if( ++parm >= 5 )
- break;
- }
- }
-bad_param:
- pr_err("Error in sbni kernel parameter!\n");
- return 0;
-}
-
-__setup( "sbni=", sbni_setup );
-
-#endif /* MODULE */
-
-/* -------------------------------------------------------------------------- */
-
-static u32
-calc_crc32( u32 crc, u8 *p, u32 len )
-{
- while( len-- )
- crc = CRC32( *p++, crc );
-
- return crc;
-}
-
-static u32 crc32tab[] __attribute__ ((aligned(8))) = {
- 0xD202EF8D, 0xA505DF1B, 0x3C0C8EA1, 0x4B0BBE37,
- 0xD56F2B94, 0xA2681B02, 0x3B614AB8, 0x4C667A2E,
- 0xDCD967BF, 0xABDE5729, 0x32D70693, 0x45D03605,
- 0xDBB4A3A6, 0xACB39330, 0x35BAC28A, 0x42BDF21C,
- 0xCFB5FFE9, 0xB8B2CF7F, 0x21BB9EC5, 0x56BCAE53,
- 0xC8D83BF0, 0xBFDF0B66, 0x26D65ADC, 0x51D16A4A,
- 0xC16E77DB, 0xB669474D, 0x2F6016F7, 0x58672661,
- 0xC603B3C2, 0xB1048354, 0x280DD2EE, 0x5F0AE278,
- 0xE96CCF45, 0x9E6BFFD3, 0x0762AE69, 0x70659EFF,
- 0xEE010B5C, 0x99063BCA, 0x000F6A70, 0x77085AE6,
- 0xE7B74777, 0x90B077E1, 0x09B9265B, 0x7EBE16CD,
- 0xE0DA836E, 0x97DDB3F8, 0x0ED4E242, 0x79D3D2D4,
- 0xF4DBDF21, 0x83DCEFB7, 0x1AD5BE0D, 0x6DD28E9B,
- 0xF3B61B38, 0x84B12BAE, 0x1DB87A14, 0x6ABF4A82,
- 0xFA005713, 0x8D076785, 0x140E363F, 0x630906A9,
- 0xFD6D930A, 0x8A6AA39C, 0x1363F226, 0x6464C2B0,
- 0xA4DEAE1D, 0xD3D99E8B, 0x4AD0CF31, 0x3DD7FFA7,
- 0xA3B36A04, 0xD4B45A92, 0x4DBD0B28, 0x3ABA3BBE,
- 0xAA05262F, 0xDD0216B9, 0x440B4703, 0x330C7795,
- 0xAD68E236, 0xDA6FD2A0, 0x4366831A, 0x3461B38C,
- 0xB969BE79, 0xCE6E8EEF, 0x5767DF55, 0x2060EFC3,
- 0xBE047A60, 0xC9034AF6, 0x500A1B4C, 0x270D2BDA,
- 0xB7B2364B, 0xC0B506DD, 0x59BC5767, 0x2EBB67F1,
- 0xB0DFF252, 0xC7D8C2C4, 0x5ED1937E, 0x29D6A3E8,
- 0x9FB08ED5, 0xE8B7BE43, 0x71BEEFF9, 0x06B9DF6F,
- 0x98DD4ACC, 0xEFDA7A5A, 0x76D32BE0, 0x01D41B76,
- 0x916B06E7, 0xE66C3671, 0x7F6567CB, 0x0862575D,
- 0x9606C2FE, 0xE101F268, 0x7808A3D2, 0x0F0F9344,
- 0x82079EB1, 0xF500AE27, 0x6C09FF9D, 0x1B0ECF0B,
- 0x856A5AA8, 0xF26D6A3E, 0x6B643B84, 0x1C630B12,
- 0x8CDC1683, 0xFBDB2615, 0x62D277AF, 0x15D54739,
- 0x8BB1D29A, 0xFCB6E20C, 0x65BFB3B6, 0x12B88320,
- 0x3FBA6CAD, 0x48BD5C3B, 0xD1B40D81, 0xA6B33D17,
- 0x38D7A8B4, 0x4FD09822, 0xD6D9C998, 0xA1DEF90E,
- 0x3161E49F, 0x4666D409, 0xDF6F85B3, 0xA868B525,
- 0x360C2086, 0x410B1010, 0xD80241AA, 0xAF05713C,
- 0x220D7CC9, 0x550A4C5F, 0xCC031DE5, 0xBB042D73,
- 0x2560B8D0, 0x52678846, 0xCB6ED9FC, 0xBC69E96A,
- 0x2CD6F4FB, 0x5BD1C46D, 0xC2D895D7, 0xB5DFA541,
- 0x2BBB30E2, 0x5CBC0074, 0xC5B551CE, 0xB2B26158,
- 0x04D44C65, 0x73D37CF3, 0xEADA2D49, 0x9DDD1DDF,
- 0x03B9887C, 0x74BEB8EA, 0xEDB7E950, 0x9AB0D9C6,
- 0x0A0FC457, 0x7D08F4C1, 0xE401A57B, 0x930695ED,
- 0x0D62004E, 0x7A6530D8, 0xE36C6162, 0x946B51F4,
- 0x19635C01, 0x6E646C97, 0xF76D3D2D, 0x806A0DBB,
- 0x1E0E9818, 0x6909A88E, 0xF000F934, 0x8707C9A2,
- 0x17B8D433, 0x60BFE4A5, 0xF9B6B51F, 0x8EB18589,
- 0x10D5102A, 0x67D220BC, 0xFEDB7106, 0x89DC4190,
- 0x49662D3D, 0x3E611DAB, 0xA7684C11, 0xD06F7C87,
- 0x4E0BE924, 0x390CD9B2, 0xA0058808, 0xD702B89E,
- 0x47BDA50F, 0x30BA9599, 0xA9B3C423, 0xDEB4F4B5,
- 0x40D06116, 0x37D75180, 0xAEDE003A, 0xD9D930AC,
- 0x54D13D59, 0x23D60DCF, 0xBADF5C75, 0xCDD86CE3,
- 0x53BCF940, 0x24BBC9D6, 0xBDB2986C, 0xCAB5A8FA,
- 0x5A0AB56B, 0x2D0D85FD, 0xB404D447, 0xC303E4D1,
- 0x5D677172, 0x2A6041E4, 0xB369105E, 0xC46E20C8,
- 0x72080DF5, 0x050F3D63, 0x9C066CD9, 0xEB015C4F,
- 0x7565C9EC, 0x0262F97A, 0x9B6BA8C0, 0xEC6C9856,
- 0x7CD385C7, 0x0BD4B551, 0x92DDE4EB, 0xE5DAD47D,
- 0x7BBE41DE, 0x0CB97148, 0x95B020F2, 0xE2B71064,
- 0x6FBF1D91, 0x18B82D07, 0x81B17CBD, 0xF6B64C2B,
- 0x68D2D988, 0x1FD5E91E, 0x86DCB8A4, 0xF1DB8832,
- 0x616495A3, 0x1663A535, 0x8F6AF48F, 0xF86DC419,
- 0x660951BA, 0x110E612C, 0x88073096, 0xFF000000
-};
-
diff --git a/drivers/net/wan/sbni.h b/drivers/net/wan/sbni.h
deleted file mode 100644
index 84264510a8ed..000000000000
--- a/drivers/net/wan/sbni.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/* sbni.h: definitions for a Granch SBNI12 driver, version 5.0.0
- * Written 2001 Denis I.Timofeev (timofeev@granch.ru)
- * This file is distributed under the GNU GPL
- */
-
-#ifndef SBNI_H
-#define SBNI_H
-
-#ifdef SBNI_DEBUG
-#define DP( A ) A
-#else
-#define DP( A )
-#endif
-
-
-/* We don't have official vendor id yet... */
-#define SBNI_PCI_VENDOR 0x55
-#define SBNI_PCI_DEVICE 0x9f
-
-#define ISA_MODE 0x00
-#define PCI_MODE 0x01
-
-#define SBNI_IO_EXTENT 4
-
-enum sbni_reg {
- CSR0 = 0,
- CSR1 = 1,
- DAT = 2
-};
-
-/* CSR0 mapping */
-enum {
- BU_EMP = 0x02,
- RC_CHK = 0x04,
- CT_ZER = 0x08,
- TR_REQ = 0x10,
- TR_RDY = 0x20,
- EN_INT = 0x40,
- RC_RDY = 0x80
-};
-
-
-/* CSR1 mapping */
-#define PR_RES 0x80
-
-struct sbni_csr1 {
-#ifdef __LITTLE_ENDIAN_BITFIELD
- u8 rxl : 5;
- u8 rate : 2;
- u8 : 1;
-#else
- u8 : 1;
- u8 rate : 2;
- u8 rxl : 5;
-#endif
-};
-
-/* fields in frame header */
-#define FRAME_ACK_MASK (unsigned short)0x7000
-#define FRAME_LEN_MASK (unsigned short)0x03FF
-#define FRAME_FIRST (unsigned short)0x8000
-#define FRAME_RETRY (unsigned short)0x0800
-
-#define FRAME_SENT_BAD (unsigned short)0x4000
-#define FRAME_SENT_OK (unsigned short)0x3000
-
-
-/* state flags */
-enum {
- FL_WAIT_ACK = 0x01,
- FL_NEED_RESEND = 0x02,
- FL_PREV_OK = 0x04,
- FL_SLOW_MODE = 0x08,
- FL_SECONDARY = 0x10,
-#ifdef CONFIG_SBNI_MULTILINE
- FL_SLAVE = 0x20,
-#endif
- FL_LINE_DOWN = 0x40
-};
-
-
-enum {
- DEFAULT_IOBASEADDR = 0x210,
- DEFAULT_INTERRUPTNUMBER = 5,
- DEFAULT_RATE = 0,
- DEFAULT_FRAME_LEN = 1012
-};
-
-#define DEF_RXL_DELTA -1
-#define DEF_RXL 0xf
-
-#define SBNI_SIG 0x5a
-
-#define SBNI_MIN_LEN 60 /* Shortest Ethernet frame without FCS */
-#define SBNI_MAX_FRAME 1023
-#define ETHER_MAX_LEN 1518
-
-#define SBNI_TIMEOUT (HZ/10)
-
-#define TR_ERROR_COUNT 32
-#define CHANGE_LEVEL_START_TICKS 4
-
-#define SBNI_MAX_NUM_CARDS 16
-
-/* internal SBNI-specific statistics */
-struct sbni_in_stats {
- u32 all_rx_number;
- u32 bad_rx_number;
- u32 timeout_number;
- u32 all_tx_number;
- u32 resend_tx_number;
-};
-
-/* SBNI ioctl params */
-#define SIOCDEVGETINSTATS SIOCDEVPRIVATE
-#define SIOCDEVRESINSTATS SIOCDEVPRIVATE+1
-#define SIOCDEVGHWSTATE SIOCDEVPRIVATE+2
-#define SIOCDEVSHWSTATE SIOCDEVPRIVATE+3
-#define SIOCDEVENSLAVE SIOCDEVPRIVATE+4
-#define SIOCDEVEMANSIPATE SIOCDEVPRIVATE+5
-
-
-/* data packet for SIOCDEVGHWSTATE/SIOCDEVSHWSTATE ioctl requests */
-struct sbni_flags {
- u32 rxl : 4;
- u32 rate : 2;
- u32 fixed_rxl : 1;
- u32 slow_mode : 1;
- u32 mac_addr : 24;
-};
-
-/*
- * CRC-32 stuff
- */
-#define CRC32(c,crc) (crc32tab[((size_t)(crc) ^ (c)) & 0xff] ^ (((crc) >> 8) & 0x00FFFFFF))
- /* CRC generator 0xEDB88320 */
- /* CRC remainder 0x2144DF1C */
- /* CRC initial value 0x00000000 */
-#define CRC32_REMAINDER 0x2144DF1C
-#define CRC32_INITIAL 0x00000000
-
-#ifndef __initdata
-#define __initdata
-#endif
-
-#endif
-
diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index 4403e219ca03..eddd20aab691 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -124,14 +124,6 @@ static int sealevel_close(struct net_device *d)
return 0;
}
-static int sealevel_ioctl(struct net_device *d, struct ifreq *ifr, int cmd)
-{
- /* struct slvl_device *slvl=dev_to_chan(d);
- * z8530_ioctl(d,&slvl->sync.chanA,ifr,cmd)
- */
- return hdlc_ioctl(d, ifr, cmd);
-}
-
/* Passed network frames, fire them downwind. */
static netdev_tx_t sealevel_queue_xmit(struct sk_buff *skb,
@@ -152,7 +144,7 @@ static const struct net_device_ops sealevel_ops = {
.ndo_open = sealevel_open,
.ndo_stop = sealevel_close,
.ndo_start_xmit = hdlc_start_xmit,
- .ndo_do_ioctl = sealevel_ioctl,
+ .ndo_siocwandev = hdlc_ioctl,
};
static int slvl_setup(struct slvl_device *sv, int iobase, int irq)
diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c
index f22e48415e6f..5a9e262188ef 100644
--- a/drivers/net/wan/wanxl.c
+++ b/drivers/net/wan/wanxl.c
@@ -343,20 +343,17 @@ static int wanxl_attach(struct net_device *dev, unsigned short encoding,
return 0;
}
-static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int wanxl_ioctl(struct net_device *dev, struct if_settings *ifs)
{
const size_t size = sizeof(sync_serial_settings);
sync_serial_settings line;
struct port *port = dev_to_port(dev);
- if (cmd != SIOCWANDEV)
- return hdlc_ioctl(dev, ifr, cmd);
-
- switch (ifr->ifr_settings.type) {
+ switch (ifs->type) {
case IF_GET_IFACE:
- ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL;
- if (ifr->ifr_settings.size < size) {
- ifr->ifr_settings.size = size; /* data size wanted */
+ ifs->type = IF_IFACE_SYNC_SERIAL;
+ if (ifs->size < size) {
+ ifs->size = size; /* data size wanted */
return -ENOBUFS;
}
memset(&line, 0, sizeof(line));
@@ -364,7 +361,7 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
line.clock_rate = 0;
line.loopback = 0;
- if (copy_to_user(ifr->ifr_settings.ifs_ifsu.sync, &line, size))
+ if (copy_to_user(ifs->ifs_ifsu.sync, &line, size))
return -EFAULT;
return 0;
@@ -374,7 +371,7 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (dev->flags & IFF_UP)
return -EBUSY;
- if (copy_from_user(&line, ifr->ifr_settings.ifs_ifsu.sync,
+ if (copy_from_user(&line, ifs->ifs_ifsu.sync,
size))
return -EFAULT;
@@ -389,7 +386,7 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return 0;
default:
- return hdlc_ioctl(dev, ifr, cmd);
+ return hdlc_ioctl(dev, ifs);
}
}
@@ -545,7 +542,7 @@ static const struct net_device_ops wanxl_ops = {
.ndo_open = wanxl_open,
.ndo_stop = wanxl_close,
.ndo_start_xmit = hdlc_start_xmit,
- .ndo_do_ioctl = wanxl_ioctl,
+ .ndo_siocwandev = wanxl_ioctl,
.ndo_get_stats = wanxl_get_stats,
};
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 71878ab35b93..4d4e2f91e15c 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -3393,19 +3393,12 @@ static int ath10k_pci_claim(struct ath10k *ar)
}
/* Target expects 32 bit DMA. Enforce it. */
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (ret) {
ath10k_err(ar, "failed to set dma mask to 32-bit: %d\n", ret);
goto err_region;
}
- ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- if (ret) {
- ath10k_err(ar, "failed to set consistent dma mask to 32-bit: %d\n",
- ret);
- goto err_region;
- }
-
pci_set_master(pdev);
/* Arrange for access to Target SoC registers. */
diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index 603d2f93ac18..9a224817630a 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -1406,11 +1406,6 @@ ath11k_update_per_peer_tx_stats(struct ath11k *ar,
* Firmware rate's control to be skipped for this?
*/
- if (flags == WMI_RATE_PREAMBLE_HE && mcs > 11) {
- ath11k_warn(ab, "Invalid HE mcs %d peer stats", mcs);
- return;
- }
-
if (flags == WMI_RATE_PREAMBLE_HE && mcs > ATH11K_HE_MCS_MAX) {
ath11k_warn(ab, "Invalid HE mcs %d peer stats", mcs);
return;
diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c
index 75cc2d80fde8..26c7ae242db6 100644
--- a/drivers/net/wireless/ath/ath11k/mhi.c
+++ b/drivers/net/wireless/ath/ath11k/mhi.c
@@ -330,6 +330,7 @@ int ath11k_mhi_register(struct ath11k_pci *ab_pci)
mhi_ctrl->cntrl_dev = ab->dev;
mhi_ctrl->fw_image = ab_pci->amss_path;
mhi_ctrl->regs = ab->mem;
+ mhi_ctrl->reg_len = ab->mem_len;
ret = ath11k_mhi_get_msi(ab_pci);
if (ret) {
diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index 646ad79f309c..5abb38cc3b55 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -933,20 +933,14 @@ static int ath11k_pci_claim(struct ath11k_pci *ab_pci, struct pci_dev *pdev)
goto disable_device;
}
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(ATH11K_PCI_DMA_MASK));
+ ret = dma_set_mask_and_coherent(&pdev->dev,
+ DMA_BIT_MASK(ATH11K_PCI_DMA_MASK));
if (ret) {
ath11k_err(ab, "failed to set pci dma mask to %d: %d\n",
ATH11K_PCI_DMA_MASK, ret);
goto release_region;
}
- ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(ATH11K_PCI_DMA_MASK));
- if (ret) {
- ath11k_err(ab, "failed to set pci consistent dma mask to %d: %d\n",
- ATH11K_PCI_DMA_MASK, ret);
- goto release_region;
- }
-
pci_set_master(pdev);
ab->mem_len = pci_resource_len(pdev, ATH11K_PCI_BAR_NUM);
diff --git a/drivers/net/wireless/ath/ath5k/pci.c b/drivers/net/wireless/ath/ath5k/pci.c
index 43b4ae86e5fb..86b8cb975b1a 100644
--- a/drivers/net/wireless/ath/ath5k/pci.c
+++ b/drivers/net/wireless/ath/ath5k/pci.c
@@ -191,7 +191,7 @@ ath5k_pci_probe(struct pci_dev *pdev,
}
/* XXX 32-bit addressing only */
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
if (ret) {
dev_err(&pdev->dev, "32-bit DMA not available\n");
goto err_dis;
diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c
index b137e7f34397..bd1ef6334997 100644
--- a/drivers/net/wireless/ath/ath6kl/wmi.c
+++ b/drivers/net/wireless/ath/ath6kl/wmi.c
@@ -2504,8 +2504,10 @@ static int ath6kl_wmi_sync_point(struct wmi *wmi, u8 if_idx)
goto free_data_skb;
for (index = 0; index < num_pri_streams; index++) {
- if (WARN_ON(!data_sync_bufs[index].skb))
+ if (WARN_ON(!data_sync_bufs[index].skb)) {
+ ret = -ENOMEM;
goto free_data_skb;
+ }
ep_id = ath6kl_ac2_endpoint_id(wmi->parent_dev,
data_sync_bufs[index].
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
index b4885a700296..b0a4ca3559fd 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
@@ -3351,7 +3351,8 @@ found:
"Found block at %x: code=%d ref=%d length=%d major=%d minor=%d\n",
cptr, code, reference, length, major, minor);
if ((!AR_SREV_9485(ah) && length >= 1024) ||
- (AR_SREV_9485(ah) && length > EEPROM_DATA_LEN_9485)) {
+ (AR_SREV_9485(ah) && length > EEPROM_DATA_LEN_9485) ||
+ (length > cptr)) {
ath_dbg(common, EEPROM, "Skipping bad header\n");
cptr -= COMP_HDR_LEN;
continue;
diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
index 2ca3b86714a9..172081ffe477 100644
--- a/drivers/net/wireless/ath/ath9k/hw.c
+++ b/drivers/net/wireless/ath/ath9k/hw.c
@@ -1621,7 +1621,6 @@ static void ath9k_hw_apply_gpio_override(struct ath_hw *ah)
ath9k_hw_gpio_request_out(ah, i, NULL,
AR_GPIO_OUTPUT_MUX_AS_OUTPUT);
ath9k_hw_set_gpio(ah, i, !!(ah->gpio_val & BIT(i)));
- ath9k_hw_gpio_free(ah, i);
}
}
@@ -2728,14 +2727,17 @@ static void ath9k_hw_gpio_cfg_output_mux(struct ath_hw *ah, u32 gpio, u32 type)
static void ath9k_hw_gpio_cfg_soc(struct ath_hw *ah, u32 gpio, bool out,
const char *label)
{
+ int err;
+
if (ah->caps.gpio_requested & BIT(gpio))
return;
- /* may be requested by BSP, free anyway */
- gpio_free(gpio);
-
- if (gpio_request_one(gpio, out ? GPIOF_OUT_INIT_LOW : GPIOF_IN, label))
+ err = gpio_request_one(gpio, out ? GPIOF_OUT_INIT_LOW : GPIOF_IN, label);
+ if (err) {
+ ath_err(ath9k_hw_common(ah), "request GPIO%d failed:%d\n",
+ gpio, err);
return;
+ }
ah->caps.gpio_requested |= BIT(gpio);
}
diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c
index cff9af3af38d..a074e23013c5 100644
--- a/drivers/net/wireless/ath/ath9k/pci.c
+++ b/drivers/net/wireless/ath/ath9k/pci.c
@@ -896,18 +896,12 @@ static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (pcim_enable_device(pdev))
return -EIO;
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (ret) {
pr_err("32-bit DMA not available\n");
return ret;
}
- ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- if (ret) {
- pr_err("32-bit DMA consistent DMA enable failed\n");
- return ret;
- }
-
/*
* Cache line size is used to size and align various
* structures used to communicate with the hardware.
diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index d202f2128df2..ec913ec991f3 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -408,13 +408,14 @@ static int wcn36xx_config(struct ieee80211_hw *hw, u32 changed)
wcn36xx_dbg(WCN36XX_DBG_MAC, "wcn36xx_config channel switch=%d\n",
ch);
- if (wcn->sw_scan_opchannel == ch) {
+ if (wcn->sw_scan_opchannel == ch && wcn->sw_scan_channel) {
/* If channel is the initial operating channel, we may
* want to receive/transmit regular data packets, then
* simply stop the scan session and exit PS mode.
*/
wcn36xx_smd_finish_scan(wcn, HAL_SYS_MODE_SCAN,
wcn->sw_scan_vif);
+ wcn->sw_scan_channel = 0;
} else if (wcn->sw_scan) {
/* A scan is ongoing, do not change the operating
* channel, but start a scan session on the channel.
@@ -422,6 +423,7 @@ static int wcn36xx_config(struct ieee80211_hw *hw, u32 changed)
wcn36xx_smd_init_scan(wcn, HAL_SYS_MODE_SCAN,
wcn->sw_scan_vif);
wcn36xx_smd_start_scan(wcn, ch);
+ wcn->sw_scan_channel = ch;
} else {
wcn36xx_change_opchannel(wcn, ch);
}
@@ -702,6 +704,7 @@ static void wcn36xx_sw_scan_start(struct ieee80211_hw *hw,
wcn->sw_scan = true;
wcn->sw_scan_vif = vif;
+ wcn->sw_scan_channel = 0;
if (vif_priv->sta_assoc)
wcn->sw_scan_opchannel = WCN36XX_HW_CHANNEL(wcn);
else
@@ -1500,6 +1503,13 @@ static int wcn36xx_probe(struct platform_device *pdev)
goto out_wq;
}
+ wcn->nv_file = WLAN_NV_FILE;
+ ret = of_property_read_string(wcn->dev->parent->of_node, "firmware-name", &wcn->nv_file);
+ if (ret < 0 && ret != -EINVAL) {
+ wcn36xx_err("failed to read \"firmware-name\" property: %d\n", ret);
+ goto out_wq;
+ }
+
wcn->smd_channel = qcom_wcnss_open_channel(wcnss, "WLAN_CTRL", wcn36xx_smd_rsp_process, hw);
if (IS_ERR(wcn->smd_channel)) {
wcn36xx_err("failed to open WLAN_CTRL channel\n");
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
index 0e3be17d8cea..57fa857b290b 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.c
+++ b/drivers/net/wireless/ath/wcn36xx/smd.c
@@ -504,10 +504,10 @@ int wcn36xx_smd_load_nv(struct wcn36xx *wcn)
u16 fm_offset = 0;
if (!wcn->nv) {
- ret = request_firmware(&wcn->nv, WLAN_NV_FILE, wcn->dev);
+ ret = request_firmware(&wcn->nv, wcn->nv_file, wcn->dev);
if (ret) {
wcn36xx_err("Failed to load nv file %s: %d\n",
- WLAN_NV_FILE, ret);
+ wcn->nv_file, ret);
goto out;
}
}
diff --git a/drivers/net/wireless/ath/wcn36xx/txrx.c b/drivers/net/wireless/ath/wcn36xx/txrx.c
index 1b831157ede1..cab196bb38cd 100644
--- a/drivers/net/wireless/ath/wcn36xx/txrx.c
+++ b/drivers/net/wireless/ath/wcn36xx/txrx.c
@@ -287,6 +287,10 @@ int wcn36xx_rx_skb(struct wcn36xx *wcn, struct sk_buff *skb)
status.rate_idx = 0;
}
+ if (ieee80211_is_beacon(hdr->frame_control) ||
+ ieee80211_is_probe_resp(hdr->frame_control))
+ status.boottime_ns = ktime_get_boottime_ns();
+
memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status));
if (ieee80211_is_beacon(hdr->frame_control)) {
diff --git a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
index 6121d8a5641a..add6e527e833 100644
--- a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
+++ b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
@@ -199,6 +199,7 @@ struct wcn36xx {
struct device *dev;
struct list_head vif_list;
+ const char *nv_file;
const struct firmware *nv;
u8 fw_revision;
@@ -246,6 +247,7 @@ struct wcn36xx {
struct cfg80211_scan_request *scan_req;
bool sw_scan;
u8 sw_scan_opchannel;
+ u8 sw_scan_channel;
struct ieee80211_vif *sw_scan_vif;
struct mutex scan_lock;
bool scan_aborted;
diff --git a/drivers/net/wireless/ath/wil6210/ethtool.c b/drivers/net/wireless/ath/wil6210/ethtool.c
index e481674485c2..29a9f17c2df0 100644
--- a/drivers/net/wireless/ath/wil6210/ethtool.c
+++ b/drivers/net/wireless/ath/wil6210/ethtool.c
@@ -11,8 +11,11 @@
#include "wil6210.h"
-static int wil_ethtoolops_get_coalesce(struct net_device *ndev,
- struct ethtool_coalesce *cp)
+static int
+wil_ethtoolops_get_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *cp,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct wil6210_priv *wil = ndev_to_wil(ndev);
u32 tx_itr_en, tx_itr_val = 0;
@@ -45,8 +48,11 @@ out:
return ret;
}
-static int wil_ethtoolops_set_coalesce(struct net_device *ndev,
- struct ethtool_coalesce *cp)
+static int
+wil_ethtoolops_set_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *cp,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct wil6210_priv *wil = ndev_to_wil(ndev);
struct wireless_dev *wdev = ndev->ieee80211_ptr;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile
index 9b15bc3f6054..13c13504a6e8 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile
@@ -23,7 +23,8 @@ brcmfmac-objs += \
feature.o \
btcoex.o \
vendor.o \
- pno.o
+ pno.o \
+ xtlv.o
brcmfmac-$(CONFIG_BRCMFMAC_PROTO_BCDC) += \
bcdc.o \
fwsignal.o
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index 633d0ab19031..ac02244a6fdf 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -128,7 +128,8 @@ int brcmf_sdiod_intr_register(struct brcmf_sdio_dev *sdiodev)
if (sdiodev->bus_if->chip == BRCM_CC_43362_CHIP_ID) {
/* assign GPIO to SDIO core */
- addr = CORE_CC_REG(SI_ENUM_BASE, gpiocontrol);
+ addr = brcmf_chip_enum_base(sdiodev->func1->device);
+ addr = CORE_CC_REG(addr, gpiocontrol);
gpiocontrol = brcmf_sdiod_readl(sdiodev, addr, &ret);
gpiocontrol |= 0x2;
brcmf_sdiod_writel(sdiodev, addr, gpiocontrol, &ret);
@@ -990,6 +991,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = {
BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4359),
BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_CYPRESS_4373),
BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_CYPRESS_43012),
+ BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_CYPRESS_43752),
BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_CYPRESS_89359),
{ /* end: all zeroes */ }
};
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index cedba56fc448..f7b96cd69242 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -1829,6 +1829,14 @@ brcmf_set_key_mgmt(struct net_device *ndev, struct cfg80211_connect_params *sme)
profile->use_fwsup = BRCMF_PROFILE_FWSUP_SAE;
}
break;
+ case WLAN_AKM_SUITE_FT_OVER_SAE:
+ val = WPA3_AUTH_SAE_PSK | WPA2_AUTH_FT;
+ profile->is_ft = true;
+ if (sme->crypto.sae_pwd) {
+ brcmf_dbg(INFO, "using SAE offload\n");
+ profile->use_fwsup = BRCMF_PROFILE_FWSUP_SAE;
+ }
+ break;
default:
bphy_err(drvr, "invalid cipher group (%d)\n",
sme->crypto.cipher_group);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
index 45037decba40..1ee49f9e325d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
@@ -139,6 +139,8 @@ struct sbconfig {
u32 sbidhigh; /* identification */
};
+#define INVALID_RAMBASE ((u32)(~0))
+
/* bankidx and bankinfo reg defines corerev >= 8 */
#define SOCRAM_BANKINFO_RETNTRAM_MASK 0x00010000
#define SOCRAM_BANKINFO_SZMASK 0x0000007f
@@ -527,7 +529,7 @@ static int brcmf_chip_cores_check(struct brcmf_chip_priv *ci)
int idx = 1;
list_for_each_entry(core, &ci->cores, list) {
- brcmf_dbg(INFO, " [%-2d] core 0x%x:%-2d base 0x%08x wrap 0x%08x\n",
+ brcmf_dbg(INFO, " [%-2d] core 0x%x:%-3d base 0x%08x wrap 0x%08x\n",
idx++, core->pub.id, core->pub.rev, core->pub.base,
core->wrapbase);
@@ -727,11 +729,13 @@ static u32 brcmf_chip_tcm_rambase(struct brcmf_chip_priv *ci)
case BRCM_CC_4364_CHIP_ID:
case CY_CC_4373_CHIP_ID:
return 0x160000;
+ case CY_CC_43752_CHIP_ID:
+ return 0x170000;
default:
brcmf_err("unknown chip: %s\n", ci->pub.name);
break;
}
- return 0;
+ return INVALID_RAMBASE;
}
int brcmf_chip_get_raminfo(struct brcmf_chip *pub)
@@ -746,7 +750,7 @@ int brcmf_chip_get_raminfo(struct brcmf_chip *pub)
mem_core = container_of(mem, struct brcmf_core_priv, pub);
ci->pub.ramsize = brcmf_chip_tcm_ramsize(mem_core);
ci->pub.rambase = brcmf_chip_tcm_rambase(ci);
- if (!ci->pub.rambase) {
+ if (ci->pub.rambase == INVALID_RAMBASE) {
brcmf_err("RAM base not provided with ARM CR4 core\n");
return -EINVAL;
}
@@ -757,7 +761,7 @@ int brcmf_chip_get_raminfo(struct brcmf_chip *pub)
pub);
ci->pub.ramsize = brcmf_chip_sysmem_ramsize(mem_core);
ci->pub.rambase = brcmf_chip_tcm_rambase(ci);
- if (!ci->pub.rambase) {
+ if (ci->pub.rambase == INVALID_RAMBASE) {
brcmf_err("RAM base not provided with ARM CA7 core\n");
return -EINVAL;
}
@@ -894,7 +898,8 @@ int brcmf_chip_dmp_erom_scan(struct brcmf_chip_priv *ci)
u32 base, wrap;
int err;
- eromaddr = ci->ops->read32(ci->ctx, CORE_CC_REG(SI_ENUM_BASE, eromptr));
+ eromaddr = ci->ops->read32(ci->ctx,
+ CORE_CC_REG(ci->pub.enum_base, eromptr));
while (desc_type != DMP_DESC_EOT) {
val = brcmf_chip_dmp_get_desc(ci, &eromaddr, &desc_type);
@@ -942,6 +947,11 @@ int brcmf_chip_dmp_erom_scan(struct brcmf_chip_priv *ci)
return 0;
}
+u32 brcmf_chip_enum_base(u16 devid)
+{
+ return SI_ENUM_BASE_DEFAULT;
+}
+
static int brcmf_chip_recognition(struct brcmf_chip_priv *ci)
{
struct brcmf_core *core;
@@ -954,7 +964,8 @@ static int brcmf_chip_recognition(struct brcmf_chip_priv *ci)
* For different chiptypes or old sdio hosts w/o chipcommon,
* other ways of recognition should be added here.
*/
- regdata = ci->ops->read32(ci->ctx, CORE_CC_REG(SI_ENUM_BASE, chipid));
+ regdata = ci->ops->read32(ci->ctx,
+ CORE_CC_REG(ci->pub.enum_base, chipid));
ci->pub.chip = regdata & CID_ID_MASK;
ci->pub.chiprev = (regdata & CID_REV_MASK) >> CID_REV_SHIFT;
socitype = (regdata & CID_TYPE_MASK) >> CID_TYPE_SHIFT;
@@ -974,7 +985,7 @@ static int brcmf_chip_recognition(struct brcmf_chip_priv *ci)
ci->resetcore = brcmf_chip_sb_resetcore;
core = brcmf_chip_add_core(ci, BCMA_CORE_CHIPCOMMON,
- SI_ENUM_BASE, 0);
+ SI_ENUM_BASE_DEFAULT, 0);
brcmf_chip_sb_corerev(ci, core);
core = brcmf_chip_add_core(ci, BCMA_CORE_SDIO_DEV,
BCM4329_CORE_BUS_BASE, 0);
@@ -1088,7 +1099,7 @@ static int brcmf_chip_setup(struct brcmf_chip_priv *chip)
return ret;
}
-struct brcmf_chip *brcmf_chip_attach(void *ctx,
+struct brcmf_chip *brcmf_chip_attach(void *ctx, u16 devid,
const struct brcmf_buscore_ops *ops)
{
struct brcmf_chip_priv *chip;
@@ -1113,6 +1124,7 @@ struct brcmf_chip *brcmf_chip_attach(void *ctx,
chip->num_cores = 0;
chip->ops = ops;
chip->ctx = ctx;
+ chip->pub.enum_base = brcmf_chip_enum_base(devid);
err = ops->prepare(ctx);
if (err < 0)
@@ -1411,6 +1423,7 @@ bool brcmf_chip_sr_capable(struct brcmf_chip *pub)
reg = chip->ops->read32(chip->ctx, addr);
return (reg & CC_SR_CTL0_ENABLE_MASK) != 0;
case BRCM_CC_4359_CHIP_ID:
+ case CY_CC_43752_CHIP_ID:
case CY_CC_43012_CHIP_ID:
addr = CORE_CC_REG(pmu->base, retention_ctl);
reg = chip->ops->read32(chip->ctx, addr);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.h
index 8fa38658e727..d69f101f5834 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.h
@@ -15,6 +15,7 @@
*
* @chip: chip identifier.
* @chiprev: chip revision.
+ * @enum_base: base address of core enumeration space.
* @cc_caps: chipcommon core capabilities.
* @cc_caps_ext: chipcommon core extended capabilities.
* @pmucaps: PMU capabilities.
@@ -27,6 +28,7 @@
struct brcmf_chip {
u32 chip;
u32 chiprev;
+ u32 enum_base;
u32 cc_caps;
u32 cc_caps_ext;
u32 pmucaps;
@@ -70,7 +72,7 @@ struct brcmf_buscore_ops {
};
int brcmf_chip_get_raminfo(struct brcmf_chip *pub);
-struct brcmf_chip *brcmf_chip_attach(void *ctx,
+struct brcmf_chip *brcmf_chip_attach(void *ctx, u16 devid,
const struct brcmf_buscore_ops *ops);
void brcmf_chip_detach(struct brcmf_chip *chip);
struct brcmf_core *brcmf_chip_get_core(struct brcmf_chip *chip, u16 coreid);
@@ -85,5 +87,6 @@ void brcmf_chip_set_passive(struct brcmf_chip *ci);
bool brcmf_chip_set_active(struct brcmf_chip *ci, u32 rstvec);
bool brcmf_chip_sr_capable(struct brcmf_chip *pub);
char *brcmf_chip_name(u32 chipid, u32 chiprev, char *buf, uint len);
+u32 brcmf_chip_enum_base(u16 devid);
#endif /* BRCMF_AXIDMP_H */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
index d40104b8df55..0eb13e5df517 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
@@ -431,8 +431,6 @@ struct brcmf_fw {
void (*done)(struct device *dev, int err, struct brcmf_fw_request *req);
};
-static void brcmf_fw_request_done(const struct firmware *fw, void *ctx);
-
#ifdef CONFIG_EFI
/* In some cases the EFI-var stored nvram contains "ccode=ALL" or "ccode=XV"
* to specify "worldwide" compatible settings, but these 2 ccode-s do not work
@@ -594,28 +592,47 @@ static int brcmf_fw_complete_request(const struct firmware *fw,
return (cur->flags & BRCMF_FW_REQF_OPTIONAL) ? 0 : ret;
}
+static char *brcm_alt_fw_path(const char *path, const char *board_type)
+{
+ char alt_path[BRCMF_FW_NAME_LEN];
+ char suffix[5];
+
+ strscpy(alt_path, path, BRCMF_FW_NAME_LEN);
+ /* At least one character + suffix */
+ if (strlen(alt_path) < 5)
+ return NULL;
+
+ /* strip .txt or .bin at the end */
+ strscpy(suffix, alt_path + strlen(alt_path) - 4, 5);
+ alt_path[strlen(alt_path) - 4] = 0;
+ strlcat(alt_path, ".", BRCMF_FW_NAME_LEN);
+ strlcat(alt_path, board_type, BRCMF_FW_NAME_LEN);
+ strlcat(alt_path, suffix, BRCMF_FW_NAME_LEN);
+
+ return kstrdup(alt_path, GFP_KERNEL);
+}
+
static int brcmf_fw_request_firmware(const struct firmware **fw,
struct brcmf_fw *fwctx)
{
struct brcmf_fw_item *cur = &fwctx->req->items[fwctx->curpos];
int ret;
- /* nvram files are board-specific, first try a board-specific path */
+ /* Files can be board-specific, first try a board-specific path */
if (cur->type == BRCMF_FW_TYPE_NVRAM && fwctx->req->board_type) {
- char alt_path[BRCMF_FW_NAME_LEN];
+ char *alt_path;
- strlcpy(alt_path, cur->path, BRCMF_FW_NAME_LEN);
- /* strip .txt at the end */
- alt_path[strlen(alt_path) - 4] = 0;
- strlcat(alt_path, ".", BRCMF_FW_NAME_LEN);
- strlcat(alt_path, fwctx->req->board_type, BRCMF_FW_NAME_LEN);
- strlcat(alt_path, ".txt", BRCMF_FW_NAME_LEN);
+ alt_path = brcm_alt_fw_path(cur->path, fwctx->req->board_type);
+ if (!alt_path)
+ goto fallback;
ret = request_firmware(fw, alt_path, fwctx->dev);
+ kfree(alt_path);
if (ret == 0)
return ret;
}
+fallback:
return request_firmware(fw, cur->path, fwctx->dev);
}
@@ -639,6 +656,22 @@ static void brcmf_fw_request_done(const struct firmware *fw, void *ctx)
kfree(fwctx);
}
+static void brcmf_fw_request_done_alt_path(const struct firmware *fw, void *ctx)
+{
+ struct brcmf_fw *fwctx = ctx;
+ struct brcmf_fw_item *first = &fwctx->req->items[0];
+ int ret = 0;
+
+ /* Fall back to canonical path if board firmware not found */
+ if (!fw)
+ ret = request_firmware_nowait(THIS_MODULE, true, first->path,
+ fwctx->dev, GFP_KERNEL, fwctx,
+ brcmf_fw_request_done);
+
+ if (fw || ret < 0)
+ brcmf_fw_request_done(fw, ctx);
+}
+
static bool brcmf_fw_request_is_valid(struct brcmf_fw_request *req)
{
struct brcmf_fw_item *item;
@@ -660,6 +693,7 @@ int brcmf_fw_get_firmwares(struct device *dev, struct brcmf_fw_request *req,
{
struct brcmf_fw_item *first = &req->items[0];
struct brcmf_fw *fwctx;
+ char *alt_path;
int ret;
brcmf_dbg(TRACE, "enter: dev=%s\n", dev_name(dev));
@@ -677,9 +711,18 @@ int brcmf_fw_get_firmwares(struct device *dev, struct brcmf_fw_request *req,
fwctx->req = req;
fwctx->done = fw_cb;
- ret = request_firmware_nowait(THIS_MODULE, true, first->path,
- fwctx->dev, GFP_KERNEL, fwctx,
- brcmf_fw_request_done);
+ /* First try alternative board-specific path if any */
+ alt_path = brcm_alt_fw_path(first->path, fwctx->req->board_type);
+ if (alt_path) {
+ ret = request_firmware_nowait(THIS_MODULE, true, alt_path,
+ fwctx->dev, GFP_KERNEL, fwctx,
+ brcmf_fw_request_done_alt_path);
+ kfree(alt_path);
+ } else {
+ ret = request_firmware_nowait(THIS_MODULE, true, first->path,
+ fwctx->dev, GFP_KERNEL, fwctx,
+ brcmf_fw_request_done);
+ }
if (ret < 0)
brcmf_fw_request_done(NULL, fwctx);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c
index 9ed85420f3ca..d5578ca681bb 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c
@@ -15,6 +15,7 @@
#include "bus.h"
#include "debug.h"
#include "tracepoint.h"
+#include "xtlv.h"
#include "fwil.h"
#include "proto.h"
@@ -150,7 +151,8 @@ brcmf_fil_cmd_data_get(struct brcmf_if *ifp, u32 cmd, void *data, u32 len)
mutex_lock(&ifp->drvr->proto_block);
err = brcmf_fil_cmd_data(ifp, cmd, data, len, false);
- brcmf_dbg(FIL, "ifidx=%d, cmd=%d, len=%d\n", ifp->ifidx, cmd, len);
+ brcmf_dbg(FIL, "ifidx=%d, cmd=%d, len=%d, err=%d\n", ifp->ifidx, cmd,
+ len, err);
brcmf_dbg_hex_dump(BRCMF_FIL_ON(), data,
min_t(uint, len, MAX_HEX_DUMP_LEN), "data\n");
@@ -260,7 +262,8 @@ brcmf_fil_iovar_data_get(struct brcmf_if *ifp, char *name, void *data,
bphy_err(drvr, "Creating iovar failed\n");
}
- brcmf_dbg(FIL, "ifidx=%d, name=%s, len=%d\n", ifp->ifidx, name, len);
+ brcmf_dbg(FIL, "ifidx=%d, name=%s, len=%d, err=%d\n", ifp->ifidx, name,
+ len, err);
brcmf_dbg_hex_dump(BRCMF_FIL_ON(), data,
min_t(uint, len, MAX_HEX_DUMP_LEN), "data\n");
@@ -383,14 +386,13 @@ brcmf_fil_bsscfg_data_get(struct brcmf_if *ifp, char *name,
err = -EPERM;
bphy_err(drvr, "Creating bsscfg failed\n");
}
- brcmf_dbg(FIL, "ifidx=%d, bsscfgidx=%d, name=%s, len=%d\n", ifp->ifidx,
- ifp->bsscfgidx, name, len);
+ brcmf_dbg(FIL, "ifidx=%d, bsscfgidx=%d, name=%s, len=%d, err=%d\n",
+ ifp->ifidx, ifp->bsscfgidx, name, len, err);
brcmf_dbg_hex_dump(BRCMF_FIL_ON(), data,
min_t(uint, len, MAX_HEX_DUMP_LEN), "data\n");
mutex_unlock(&drvr->proto_block);
return err;
-
}
s32
@@ -414,3 +416,117 @@ brcmf_fil_bsscfg_int_get(struct brcmf_if *ifp, char *name, u32 *data)
*data = le32_to_cpu(data_le);
return err;
}
+
+static u32 brcmf_create_xtlv(char *name, u16 id, char *data, u32 len,
+ char *buf, u32 buflen)
+{
+ u32 iolen;
+ u32 nmlen;
+
+ nmlen = strlen(name) + 1;
+ iolen = nmlen + brcmf_xtlv_data_size(len, BRCMF_XTLV_OPTION_ALIGN32);
+
+ if (iolen > buflen) {
+ brcmf_err("buffer is too short\n");
+ return 0;
+ }
+
+ memcpy(buf, name, nmlen);
+ brcmf_xtlv_pack_header((void *)(buf + nmlen), id, len, data,
+ BRCMF_XTLV_OPTION_ALIGN32);
+
+ return iolen;
+}
+
+s32 brcmf_fil_xtlv_data_set(struct brcmf_if *ifp, char *name, u16 id,
+ void *data, u32 len)
+{
+ struct brcmf_pub *drvr = ifp->drvr;
+ s32 err;
+ u32 buflen;
+
+ mutex_lock(&drvr->proto_block);
+
+ brcmf_dbg(FIL, "ifidx=%d, name=%s, id=%u, len=%u\n", ifp->ifidx, name,
+ id, len);
+ brcmf_dbg_hex_dump(BRCMF_FIL_ON(), data,
+ min_t(uint, len, MAX_HEX_DUMP_LEN), "data\n");
+
+ buflen = brcmf_create_xtlv(name, id, data, len,
+ drvr->proto_buf, sizeof(drvr->proto_buf));
+ if (buflen) {
+ err = brcmf_fil_cmd_data(ifp, BRCMF_C_SET_VAR, drvr->proto_buf,
+ buflen, true);
+ } else {
+ err = -EPERM;
+ bphy_err(drvr, "Creating xtlv failed\n");
+ }
+
+ mutex_unlock(&drvr->proto_block);
+ return err;
+}
+
+s32 brcmf_fil_xtlv_data_get(struct brcmf_if *ifp, char *name, u16 id,
+ void *data, u32 len)
+{
+ struct brcmf_pub *drvr = ifp->drvr;
+ s32 err;
+ u32 buflen;
+
+ mutex_lock(&drvr->proto_block);
+
+ buflen = brcmf_create_xtlv(name, id, data, len,
+ drvr->proto_buf, sizeof(drvr->proto_buf));
+ if (buflen) {
+ err = brcmf_fil_cmd_data(ifp, BRCMF_C_GET_VAR, drvr->proto_buf,
+ buflen, false);
+ if (err == 0)
+ memcpy(data, drvr->proto_buf, len);
+ } else {
+ err = -EPERM;
+ bphy_err(drvr, "Creating bsscfg failed\n");
+ }
+ brcmf_dbg(FIL, "ifidx=%d, name=%s, id=%u, len=%u, err=%d\n",
+ ifp->ifidx, name, id, len, err);
+ brcmf_dbg_hex_dump(BRCMF_FIL_ON(), data,
+ min_t(uint, len, MAX_HEX_DUMP_LEN), "data\n");
+
+ mutex_unlock(&drvr->proto_block);
+ return err;
+}
+
+s32 brcmf_fil_xtlv_int_set(struct brcmf_if *ifp, char *name, u16 id, u32 data)
+{
+ __le32 data_le = cpu_to_le32(data);
+
+ return brcmf_fil_xtlv_data_set(ifp, name, id, &data_le,
+ sizeof(data_le));
+}
+
+s32 brcmf_fil_xtlv_int_get(struct brcmf_if *ifp, char *name, u16 id, u32 *data)
+{
+ __le32 data_le = cpu_to_le32(*data);
+ s32 err;
+
+ err = brcmf_fil_xtlv_data_get(ifp, name, id, &data_le, sizeof(data_le));
+ if (err == 0)
+ *data = le32_to_cpu(data_le);
+ return err;
+}
+
+s32 brcmf_fil_xtlv_int8_get(struct brcmf_if *ifp, char *name, u16 id, u8 *data)
+{
+ return brcmf_fil_xtlv_data_get(ifp, name, id, data, sizeof(*data));
+}
+
+s32 brcmf_fil_xtlv_int16_get(struct brcmf_if *ifp, char *name, u16 id, u16 *data)
+{
+ __le16 data_le = cpu_to_le16(*data);
+ s32 err;
+
+ err = brcmf_fil_xtlv_data_get(ifp, name, id, &data_le, sizeof(data_le));
+ if (err == 0)
+ *data = le16_to_cpu(data_le);
+ return err;
+}
+
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h
index ae4cf4372908..cb26f8c59c21 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h
@@ -97,5 +97,13 @@ s32 brcmf_fil_bsscfg_data_get(struct brcmf_if *ifp, char *name, void *data,
u32 len);
s32 brcmf_fil_bsscfg_int_set(struct brcmf_if *ifp, char *name, u32 data);
s32 brcmf_fil_bsscfg_int_get(struct brcmf_if *ifp, char *name, u32 *data);
+s32 brcmf_fil_xtlv_data_set(struct brcmf_if *ifp, char *name, u16 id,
+ void *data, u32 len);
+s32 brcmf_fil_xtlv_data_get(struct brcmf_if *ifp, char *name, u16 id,
+ void *data, u32 len);
+s32 brcmf_fil_xtlv_int_set(struct brcmf_if *ifp, char *name, u16 id, u32 data);
+s32 brcmf_fil_xtlv_int_get(struct brcmf_if *ifp, char *name, u16 id, u32 *data);
+s32 brcmf_fil_xtlv_int8_get(struct brcmf_if *ifp, char *name, u16 id, u8 *data);
+s32 brcmf_fil_xtlv_int16_get(struct brcmf_if *ifp, char *name, u16 id, u16 *data);
#endif /* _fwil_h_ */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index c49dd0c36ae4..8b149996fc00 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -1886,7 +1886,8 @@ brcmf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
devinfo->pdev = pdev;
pcie_bus_dev = NULL;
- devinfo->ci = brcmf_chip_attach(devinfo, &brcmf_pcie_buscore_ops);
+ devinfo->ci = brcmf_chip_attach(devinfo, pdev->device,
+ &brcmf_pcie_buscore_ops);
if (IS_ERR(devinfo->ci)) {
ret = PTR_ERR(devinfo->ci);
devinfo->ci = NULL;
@@ -2075,7 +2076,7 @@ cleanup:
err = brcmf_pcie_probe(pdev, NULL);
if (err)
- brcmf_err(bus, "probe after resume failed, err=%d\n", err);
+ __brcmf_err(NULL, __func__, "probe after resume failed, err=%d\n", err);
return err;
}
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 97ee9e2e2e35..8effeb7a7269 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -617,6 +617,7 @@ BRCMF_FW_DEF(4339, "brcmfmac4339-sdio");
BRCMF_FW_DEF(43430A0, "brcmfmac43430a0-sdio");
/* Note the names are not postfixed with a1 for backward compatibility */
BRCMF_FW_CLM_DEF(43430A1, "brcmfmac43430-sdio");
+BRCMF_FW_DEF(43430B0, "brcmfmac43430b0-sdio");
BRCMF_FW_CLM_DEF(43455, "brcmfmac43455-sdio");
BRCMF_FW_DEF(43456, "brcmfmac43456-sdio");
BRCMF_FW_CLM_DEF(4354, "brcmfmac4354-sdio");
@@ -624,11 +625,15 @@ BRCMF_FW_CLM_DEF(4356, "brcmfmac4356-sdio");
BRCMF_FW_DEF(4359, "brcmfmac4359-sdio");
BRCMF_FW_CLM_DEF(4373, "brcmfmac4373-sdio");
BRCMF_FW_CLM_DEF(43012, "brcmfmac43012-sdio");
+BRCMF_FW_CLM_DEF(43752, "brcmfmac43752-sdio");
/* firmware config files */
MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-sdio.*.txt");
MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.txt");
+/* per-board firmware binaries */
+MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-sdio.*.bin");
+
static const struct brcmf_firmware_mapping brcmf_sdio_fwnames[] = {
BRCMF_FW_ENTRY(BRCM_CC_43143_CHIP_ID, 0xFFFFFFFF, 43143),
BRCMF_FW_ENTRY(BRCM_CC_43241_CHIP_ID, 0x0000001F, 43241B0),
@@ -643,14 +648,16 @@ static const struct brcmf_firmware_mapping brcmf_sdio_fwnames[] = {
BRCMF_FW_ENTRY(BRCM_CC_43362_CHIP_ID, 0xFFFFFFFE, 43362),
BRCMF_FW_ENTRY(BRCM_CC_4339_CHIP_ID, 0xFFFFFFFF, 4339),
BRCMF_FW_ENTRY(BRCM_CC_43430_CHIP_ID, 0x00000001, 43430A0),
- BRCMF_FW_ENTRY(BRCM_CC_43430_CHIP_ID, 0xFFFFFFFE, 43430A1),
+ BRCMF_FW_ENTRY(BRCM_CC_43430_CHIP_ID, 0x00000002, 43430A1),
+ BRCMF_FW_ENTRY(BRCM_CC_43430_CHIP_ID, 0xFFFFFFFC, 43430B0),
BRCMF_FW_ENTRY(BRCM_CC_4345_CHIP_ID, 0x00000200, 43456),
BRCMF_FW_ENTRY(BRCM_CC_4345_CHIP_ID, 0xFFFFFDC0, 43455),
BRCMF_FW_ENTRY(BRCM_CC_4354_CHIP_ID, 0xFFFFFFFF, 4354),
BRCMF_FW_ENTRY(BRCM_CC_4356_CHIP_ID, 0xFFFFFFFF, 4356),
BRCMF_FW_ENTRY(BRCM_CC_4359_CHIP_ID, 0xFFFFFFFF, 4359),
BRCMF_FW_ENTRY(CY_CC_4373_CHIP_ID, 0xFFFFFFFF, 4373),
- BRCMF_FW_ENTRY(CY_CC_43012_CHIP_ID, 0xFFFFFFFF, 43012)
+ BRCMF_FW_ENTRY(CY_CC_43012_CHIP_ID, 0xFFFFFFFF, 43012),
+ BRCMF_FW_ENTRY(CY_CC_43752_CHIP_ID, 0xFFFFFFFF, 43752)
};
#define TXCTL_CREDITS 2
@@ -3416,7 +3423,8 @@ err:
static bool brcmf_sdio_aos_no_decode(struct brcmf_sdio *bus)
{
- if (bus->ci->chip == CY_CC_43012_CHIP_ID)
+ if (bus->ci->chip == CY_CC_43012_CHIP_ID ||
+ bus->ci->chip == CY_CC_43752_CHIP_ID)
return true;
else
return false;
@@ -3907,7 +3915,7 @@ static u32 brcmf_sdio_buscore_read32(void *ctx, u32 addr)
* It can be identified as 4339 by looking at the chip revision. It
* is corrected here so the chip.c module has the right info.
*/
- if (addr == CORE_CC_REG(SI_ENUM_BASE, chipid) &&
+ if (addr == CORE_CC_REG(SI_ENUM_BASE_DEFAULT, chipid) &&
(sdiodev->func1->device == SDIO_DEVICE_ID_BROADCOM_4339 ||
sdiodev->func1->device == SDIO_DEVICE_ID_BROADCOM_4335_4339)) {
rev = (val & CID_REV_MASK) >> CID_REV_SHIFT;
@@ -3943,12 +3951,15 @@ brcmf_sdio_probe_attach(struct brcmf_sdio *bus)
int reg_addr;
u32 reg_val;
u32 drivestrength;
+ u32 enum_base;
sdiodev = bus->sdiodev;
sdio_claim_host(sdiodev->func1);
- pr_debug("F1 signature read @0x18000000=0x%4x\n",
- brcmf_sdiod_readl(sdiodev, SI_ENUM_BASE, NULL));
+ enum_base = brcmf_chip_enum_base(sdiodev->func1->device);
+
+ pr_debug("F1 signature read @0x%08x=0x%4x\n", enum_base,
+ brcmf_sdiod_readl(sdiodev, enum_base, NULL));
/*
* Force PLL off until brcmf_chip_attach()
@@ -3967,7 +3978,8 @@ brcmf_sdio_probe_attach(struct brcmf_sdio *bus)
goto fail;
}
- bus->ci = brcmf_chip_attach(sdiodev, &brcmf_sdio_buscore_ops);
+ bus->ci = brcmf_chip_attach(sdiodev, sdiodev->func1->device,
+ &brcmf_sdio_buscore_ops);
if (IS_ERR(bus->ci)) {
brcmf_err("brcmf_chip_attach failed!\n");
bus->ci = NULL;
@@ -4257,6 +4269,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
switch (sdiod->func1->device) {
case SDIO_DEVICE_ID_BROADCOM_CYPRESS_4373:
+ case SDIO_DEVICE_ID_BROADCOM_CYPRESS_43752:
brcmf_dbg(INFO, "set F2 watermark to 0x%x*4 bytes\n",
CY_4373_F2_WATERMARK);
brcmf_sdiod_writeb(sdiod, SBSDIO_WATERMARK,
@@ -4442,7 +4455,7 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
bus->tx_seq = SDPCM_SEQ_WRAP - 1;
/* single-threaded workqueue */
- wq = alloc_ordered_workqueue("brcmf_wq/%s", WQ_MEM_RECLAIM,
+ wq = alloc_ordered_workqueue("brcmf_wq/%s", WQ_MEM_RECLAIM | WQ_HIGHPRI,
dev_name(&sdiodev->func1->dev));
if (!wq) {
brcmf_err("insufficient memory to create txworkqueue\n");
@@ -4616,4 +4629,3 @@ int brcmf_sdio_sleep(struct brcmf_sdio *bus, bool sleep)
return ret;
}
-
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c
new file mode 100644
index 000000000000..2f3c451148db
--- /dev/null
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: ISC
+/*
+ * Copyright (c) 2019 Broadcom
+ */
+
+#include <asm/unaligned.h>
+#include <linux/string.h>
+#include <linux/bug.h>
+
+#include "xtlv.h"
+
+static int brcmf_xtlv_header_size(u16 opts)
+{
+ int len = (int)offsetof(struct brcmf_xtlv, data);
+
+ if (opts & BRCMF_XTLV_OPTION_IDU8)
+ --len;
+ if (opts & BRCMF_XTLV_OPTION_LENU8)
+ --len;
+
+ return len;
+}
+
+int brcmf_xtlv_data_size(int dlen, u16 opts)
+{
+ int hsz;
+
+ hsz = brcmf_xtlv_header_size(opts);
+ if (opts & BRCMF_XTLV_OPTION_ALIGN32)
+ return roundup(dlen + hsz, 4);
+
+ return dlen + hsz;
+}
+
+void brcmf_xtlv_pack_header(struct brcmf_xtlv *xtlv, u16 id, u16 len,
+ const u8 *data, u16 opts)
+{
+ u8 *data_buf;
+ u16 mask = BRCMF_XTLV_OPTION_IDU8 | BRCMF_XTLV_OPTION_LENU8;
+
+ if (!(opts & mask)) {
+ u8 *idp = (u8 *)xtlv;
+ u8 *lenp = idp + sizeof(xtlv->id);
+
+ put_unaligned_le16(id, idp);
+ put_unaligned_le16(len, lenp);
+ data_buf = lenp + sizeof(u16);
+ } else if ((opts & mask) == mask) { /* u8 id and u8 len */
+ u8 *idp = (u8 *)xtlv;
+ u8 *lenp = idp + 1;
+
+ *idp = (u8)id;
+ *lenp = (u8)len;
+ data_buf = lenp + sizeof(u8);
+ } else if (opts & BRCMF_XTLV_OPTION_IDU8) { /* u8 id, u16 len */
+ u8 *idp = (u8 *)xtlv;
+ u8 *lenp = idp + 1;
+
+ *idp = (u8)id;
+ put_unaligned_le16(len, lenp);
+ data_buf = lenp + sizeof(u16);
+ } else if (opts & BRCMF_XTLV_OPTION_LENU8) { /* u16 id, u8 len */
+ u8 *idp = (u8 *)xtlv;
+ u8 *lenp = idp + sizeof(u16);
+
+ put_unaligned_le16(id, idp);
+ *lenp = (u8)len;
+ data_buf = lenp + sizeof(u8);
+ } else {
+ WARN(true, "Unexpected xtlv option");
+ return;
+ }
+
+ if (opts & BRCMF_XTLV_OPTION_LENU8) {
+ WARN_ON(len > 0x00ff);
+ len &= 0xff;
+ }
+
+ if (data)
+ memcpy(data_buf, data, len);
+}
+
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.h
new file mode 100644
index 000000000000..e1930ce1b642
--- /dev/null
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.h
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: ISC
+/*
+ * Copyright (c) 2019 Broadcom
+ */
+#ifndef __BRCMF_XTLV_H
+#define __BRCMF_XTLV_H
+
+#include <linux/types.h>
+#include <linux/bits.h>
+
+/* bcm type(id), length, value with w/16 bit id/len. The structure below
+ * is nominal, and is used to support variable length id and type. See
+ * xtlv options below.
+ */
+struct brcmf_xtlv {
+ u16 id;
+ u16 len;
+ u8 data[0];
+};
+
+enum brcmf_xtlv_option {
+ BRCMF_XTLV_OPTION_ALIGN32 = BIT(0),
+ BRCMF_XTLV_OPTION_IDU8 = BIT(1),
+ BRCMF_XTLV_OPTION_LENU8 = BIT(2),
+};
+
+int brcmf_xtlv_data_size(int dlen, u16 opts);
+void brcmf_xtlv_pack_header(struct brcmf_xtlv *xtlv, u16 id, u16 len,
+ const u8 *data, u16 opts);
+
+#endif /* __BRCMF_XTLV_H */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c
index 26de1bd7fee9..8ddfc3d06687 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c
@@ -704,7 +704,7 @@ static void brcms_c_write_inits(struct brcms_hardware *wlc_hw,
static void brcms_c_write_mhf(struct brcms_hardware *wlc_hw, u16 *mhfs)
{
u8 idx;
- u16 addr[] = {
+ static const u16 addr[] = {
M_HOST_FLAGS1, M_HOST_FLAGS2, M_HOST_FLAGS3, M_HOST_FLAGS4,
M_HOST_FLAGS5
};
diff --git a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h
index 00309b272a0e..9d81320164ce 100644
--- a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h
+++ b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h
@@ -52,6 +52,7 @@
#define BRCM_CC_4371_CHIP_ID 0x4371
#define CY_CC_4373_CHIP_ID 0x4373
#define CY_CC_43012_CHIP_ID 43012
+#define CY_CC_43752_CHIP_ID 43752
/* USB Device IDs */
#define BRCM_USB_43143_DEVICE_ID 0xbd1e
diff --git a/drivers/net/wireless/broadcom/brcm80211/include/soc.h b/drivers/net/wireless/broadcom/brcm80211/include/soc.h
index 92d942b44f2c..824921191366 100644
--- a/drivers/net/wireless/broadcom/brcm80211/include/soc.h
+++ b/drivers/net/wireless/broadcom/brcm80211/include/soc.h
@@ -6,7 +6,7 @@
#ifndef _BRCM_SOC_H
#define _BRCM_SOC_H
-#define SI_ENUM_BASE 0x18000000 /* Enumeration space base */
+#define SI_ENUM_BASE_DEFAULT 0x18000000
/* Common core control flags */
#define SICF_BIST_EN 0x8000
diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index fd37d4d2983b..65dd8cff1b01 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -1144,7 +1144,7 @@ static int waitbusy(struct airo_info *ai);
static irqreturn_t airo_interrupt(int irq, void* dev_id);
static int airo_thread(void *data);
static void timer_func(struct net_device *dev);
-static int airo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static int airo_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *, int cmd);
static struct iw_statistics *airo_get_wireless_stats(struct net_device *dev);
#ifdef CISCO_EXT
static int readrids(struct net_device *dev, aironet_ioctl *comp);
@@ -2664,7 +2664,7 @@ static const struct net_device_ops airo11_netdev_ops = {
.ndo_start_xmit = airo_start_xmit11,
.ndo_get_stats = airo_get_stats,
.ndo_set_mac_address = airo_set_mac_address,
- .ndo_do_ioctl = airo_ioctl,
+ .ndo_siocdevprivate = airo_siocdevprivate,
};
static void wifi_setup(struct net_device *dev)
@@ -2764,7 +2764,7 @@ static const struct net_device_ops airo_netdev_ops = {
.ndo_get_stats = airo_get_stats,
.ndo_set_rx_mode = airo_set_multicast_list,
.ndo_set_mac_address = airo_set_mac_address,
- .ndo_do_ioctl = airo_ioctl,
+ .ndo_siocdevprivate = airo_siocdevprivate,
.ndo_validate_addr = eth_validate_addr,
};
@@ -2775,7 +2775,7 @@ static const struct net_device_ops mpi_netdev_ops = {
.ndo_get_stats = airo_get_stats,
.ndo_set_rx_mode = airo_set_multicast_list,
.ndo_set_mac_address = airo_set_mac_address,
- .ndo_do_ioctl = airo_ioctl,
+ .ndo_siocdevprivate = airo_siocdevprivate,
.ndo_validate_addr = eth_validate_addr,
};
@@ -7661,7 +7661,8 @@ static const struct iw_handler_def airo_handler_def =
* Javier Achirica did a great job of merging code from the unnamed CISCO
* developer that added support for flashing the card.
*/
-static int airo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int airo_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+ void __user *data, int cmd)
{
int rc = 0;
struct airo_info *ai = dev->ml_priv;
@@ -7678,7 +7679,7 @@ static int airo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
int val = AIROMAGIC;
aironet_ioctl com;
- if (copy_from_user(&com, rq->ifr_data, sizeof(com)))
+ if (copy_from_user(&com, data, sizeof(com)))
rc = -EFAULT;
else if (copy_to_user(com.data, (char *)&val, sizeof(val)))
rc = -EFAULT;
@@ -7694,7 +7695,7 @@ static int airo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
*/
{
aironet_ioctl com;
- if (copy_from_user(&com, rq->ifr_data, sizeof(com))) {
+ if (copy_from_user(&com, data, sizeof(com))) {
rc = -EFAULT;
break;
}
diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_rx.c b/drivers/net/wireless/intel/ipw2x00/libipw_rx.c
index 5a2a723e480b..7a684b76f39b 100644
--- a/drivers/net/wireless/intel/ipw2x00/libipw_rx.c
+++ b/drivers/net/wireless/intel/ipw2x00/libipw_rx.c
@@ -927,7 +927,8 @@ static u8 qos_oui[QOS_OUI_LEN] = { 0x00, 0x50, 0xF2 };
static int libipw_verify_qos_info(struct libipw_qos_information_element
*info_element, int sub_type)
{
-
+ if (info_element->elementID != QOS_ELEMENT_ID)
+ return -1;
if (info_element->qui_subtype != sub_type)
return -1;
if (memcmp(info_element->qui, qos_oui, QOS_OUI_LEN))
@@ -943,57 +944,34 @@ static int libipw_verify_qos_info(struct libipw_qos_information_element
/*
* Parse a QoS parameter element
*/
-static int libipw_read_qos_param_element(struct libipw_qos_parameter_info
- *element_param, struct libipw_info_element
- *info_element)
+static int libipw_read_qos_param_element(
+ struct libipw_qos_parameter_info *element_param,
+ struct libipw_info_element *info_element)
{
- int ret = 0;
- u16 size = sizeof(struct libipw_qos_parameter_info) - 2;
+ size_t size = sizeof(*element_param);
- if ((info_element == NULL) || (element_param == NULL))
+ if (!element_param || !info_element || info_element->len != size - 2)
return -1;
- if (info_element->id == QOS_ELEMENT_ID && info_element->len == size) {
- memcpy(element_param->info_element.qui, info_element->data,
- info_element->len);
- element_param->info_element.elementID = info_element->id;
- element_param->info_element.length = info_element->len;
- } else
- ret = -1;
- if (ret == 0)
- ret = libipw_verify_qos_info(&element_param->info_element,
- QOS_OUI_PARAM_SUB_TYPE);
- return ret;
+ memcpy(element_param, info_element, size);
+ return libipw_verify_qos_info(&element_param->info_element,
+ QOS_OUI_PARAM_SUB_TYPE);
}
/*
* Parse a QoS information element
*/
-static int libipw_read_qos_info_element(struct
- libipw_qos_information_element
- *element_info, struct libipw_info_element
- *info_element)
+static int libipw_read_qos_info_element(
+ struct libipw_qos_information_element *element_info,
+ struct libipw_info_element *info_element)
{
- int ret = 0;
- u16 size = sizeof(struct libipw_qos_information_element) - 2;
+ size_t size = sizeof(struct libipw_qos_information_element) - 2;
- if (element_info == NULL)
+ if (!element_info || !info_element || info_element->len != size - 2)
return -1;
- if (info_element == NULL)
- return -1;
-
- if ((info_element->id == QOS_ELEMENT_ID) && (info_element->len == size)) {
- memcpy(element_info->qui, info_element->data,
- info_element->len);
- element_info->elementID = info_element->id;
- element_info->length = info_element->len;
- } else
- ret = -1;
- if (ret == 0)
- ret = libipw_verify_qos_info(element_info,
- QOS_OUI_INFO_SUB_TYPE);
- return ret;
+ memcpy(element_info, info_element, size);
+ return libipw_verify_qos_info(element_info, QOS_OUI_INFO_SUB_TYPE);
}
/*
diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c
index d9baa2fa603b..36d1e6b2568d 100644
--- a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c
+++ b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c
@@ -179,8 +179,8 @@ static struct libipw_txb *libipw_alloc_txb(int nr_frags, int txb_size,
{
struct libipw_txb *txb;
int i;
- txb = kmalloc(sizeof(struct libipw_txb) + (sizeof(u8 *) * nr_frags),
- gfp_mask);
+
+ txb = kmalloc(struct_size(txb, fragments, nr_frags), gfp_mask);
if (!txb)
return NULL;
diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
index 6ff2674f8466..45abb25b65a9 100644
--- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c
+++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
@@ -571,20 +571,18 @@ il3945_tx_skb(struct il_priv *il,
/* Physical address of this Tx command's header (not MAC header!),
* within command buffer array. */
- txcmd_phys =
- pci_map_single(il->pci_dev, &out_cmd->hdr, firstlen,
- PCI_DMA_TODEVICE);
- if (unlikely(pci_dma_mapping_error(il->pci_dev, txcmd_phys)))
+ txcmd_phys = dma_map_single(&il->pci_dev->dev, &out_cmd->hdr, firstlen,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(&il->pci_dev->dev, txcmd_phys)))
goto drop_unlock;
/* Set up TFD's 2nd entry to point directly to remainder of skb,
* if any (802.11 null frames have no payload). */
secondlen = skb->len - hdr_len;
if (secondlen > 0) {
- phys_addr =
- pci_map_single(il->pci_dev, skb->data + hdr_len, secondlen,
- PCI_DMA_TODEVICE);
- if (unlikely(pci_dma_mapping_error(il->pci_dev, phys_addr)))
+ phys_addr = dma_map_single(&il->pci_dev->dev, skb->data + hdr_len,
+ secondlen, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(&il->pci_dev->dev, phys_addr)))
goto drop_unlock;
}
@@ -1015,11 +1013,11 @@ il3945_rx_allocate(struct il_priv *il, gfp_t priority)
/* Get physical address of RB/SKB */
page_dma =
- pci_map_page(il->pci_dev, page, 0,
+ dma_map_page(&il->pci_dev->dev, page, 0,
PAGE_SIZE << il->hw_params.rx_page_order,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
- if (unlikely(pci_dma_mapping_error(il->pci_dev, page_dma))) {
+ if (unlikely(dma_mapping_error(&il->pci_dev->dev, page_dma))) {
__free_pages(page, il->hw_params.rx_page_order);
break;
}
@@ -1028,9 +1026,9 @@ il3945_rx_allocate(struct il_priv *il, gfp_t priority)
if (list_empty(&rxq->rx_used)) {
spin_unlock_irqrestore(&rxq->lock, flags);
- pci_unmap_page(il->pci_dev, page_dma,
+ dma_unmap_page(&il->pci_dev->dev, page_dma,
PAGE_SIZE << il->hw_params.rx_page_order,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
__free_pages(page, il->hw_params.rx_page_order);
return;
}
@@ -1062,9 +1060,10 @@ il3945_rx_queue_reset(struct il_priv *il, struct il_rx_queue *rxq)
/* In the reset function, these buffers may have been allocated
* to an SKB, so we need to unmap and free potential storage */
if (rxq->pool[i].page != NULL) {
- pci_unmap_page(il->pci_dev, rxq->pool[i].page_dma,
+ dma_unmap_page(&il->pci_dev->dev,
+ rxq->pool[i].page_dma,
PAGE_SIZE << il->hw_params.rx_page_order,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
__il_free_pages(il, rxq->pool[i].page);
rxq->pool[i].page = NULL;
}
@@ -1111,9 +1110,10 @@ il3945_rx_queue_free(struct il_priv *il, struct il_rx_queue *rxq)
int i;
for (i = 0; i < RX_QUEUE_SIZE + RX_FREE_BUFFERS; i++) {
if (rxq->pool[i].page != NULL) {
- pci_unmap_page(il->pci_dev, rxq->pool[i].page_dma,
+ dma_unmap_page(&il->pci_dev->dev,
+ rxq->pool[i].page_dma,
PAGE_SIZE << il->hw_params.rx_page_order,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
__il_free_pages(il, rxq->pool[i].page);
rxq->pool[i].page = NULL;
}
@@ -1213,9 +1213,9 @@ il3945_rx_handle(struct il_priv *il)
rxq->queue[i] = NULL;
- pci_unmap_page(il->pci_dev, rxb->page_dma,
+ dma_unmap_page(&il->pci_dev->dev, rxb->page_dma,
PAGE_SIZE << il->hw_params.rx_page_order,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
pkt = rxb_addr(rxb);
len = le32_to_cpu(pkt->len_n_flags) & IL_RX_FRAME_SIZE_MSK;
@@ -1260,11 +1260,11 @@ il3945_rx_handle(struct il_priv *il)
spin_lock_irqsave(&rxq->lock, flags);
if (rxb->page != NULL) {
rxb->page_dma =
- pci_map_page(il->pci_dev, rxb->page, 0,
- PAGE_SIZE << il->hw_params.
- rx_page_order, PCI_DMA_FROMDEVICE);
- if (unlikely(pci_dma_mapping_error(il->pci_dev,
- rxb->page_dma))) {
+ dma_map_page(&il->pci_dev->dev, rxb->page, 0,
+ PAGE_SIZE << il->hw_params.rx_page_order,
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&il->pci_dev->dev,
+ rxb->page_dma))) {
__il_free_pages(il, rxb->page);
rxb->page = NULL;
list_add_tail(&rxb->list, &rxq->rx_used);
@@ -3616,9 +3616,7 @@ il3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_master(pdev);
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
- if (!err)
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (err) {
IL_WARN("No suitable DMA available.\n");
goto out_pci_disable_device;
diff --git a/drivers/net/wireless/intel/iwlegacy/3945.c b/drivers/net/wireless/intel/iwlegacy/3945.c
index 0597d828bee1..a773939b8c2a 100644
--- a/drivers/net/wireless/intel/iwlegacy/3945.c
+++ b/drivers/net/wireless/intel/iwlegacy/3945.c
@@ -652,16 +652,16 @@ il3945_hw_txq_free_tfd(struct il_priv *il, struct il_tx_queue *txq)
/* Unmap tx_cmd */
if (counter)
- pci_unmap_single(dev, dma_unmap_addr(&txq->meta[idx], mapping),
+ dma_unmap_single(&dev->dev,
+ dma_unmap_addr(&txq->meta[idx], mapping),
dma_unmap_len(&txq->meta[idx], len),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
/* unmap chunks if any */
for (i = 1; i < counter; i++)
- pci_unmap_single(dev, le32_to_cpu(tfd->tbs[i].addr),
- le32_to_cpu(tfd->tbs[i].len),
- PCI_DMA_TODEVICE);
+ dma_unmap_single(&dev->dev, le32_to_cpu(tfd->tbs[i].addr),
+ le32_to_cpu(tfd->tbs[i].len), DMA_TO_DEVICE);
/* free SKB */
if (txq->skbs) {
diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
index 341d6a2bc690..0223532fd56a 100644
--- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c
+++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
@@ -94,9 +94,10 @@ il4965_rx_queue_reset(struct il_priv *il, struct il_rx_queue *rxq)
/* In the reset function, these buffers may have been allocated
* to an SKB, so we need to unmap and free potential storage */
if (rxq->pool[i].page != NULL) {
- pci_unmap_page(il->pci_dev, rxq->pool[i].page_dma,
+ dma_unmap_page(&il->pci_dev->dev,
+ rxq->pool[i].page_dma,
PAGE_SIZE << il->hw_params.rx_page_order,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
__il_free_pages(il, rxq->pool[i].page);
rxq->pool[i].page = NULL;
}
@@ -342,11 +343,10 @@ il4965_rx_allocate(struct il_priv *il, gfp_t priority)
}
/* Get physical address of the RB */
- page_dma =
- pci_map_page(il->pci_dev, page, 0,
- PAGE_SIZE << il->hw_params.rx_page_order,
- PCI_DMA_FROMDEVICE);
- if (unlikely(pci_dma_mapping_error(il->pci_dev, page_dma))) {
+ page_dma = dma_map_page(&il->pci_dev->dev, page, 0,
+ PAGE_SIZE << il->hw_params.rx_page_order,
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&il->pci_dev->dev, page_dma))) {
__free_pages(page, il->hw_params.rx_page_order);
break;
}
@@ -355,9 +355,9 @@ il4965_rx_allocate(struct il_priv *il, gfp_t priority)
if (list_empty(&rxq->rx_used)) {
spin_unlock_irqrestore(&rxq->lock, flags);
- pci_unmap_page(il->pci_dev, page_dma,
+ dma_unmap_page(&il->pci_dev->dev, page_dma,
PAGE_SIZE << il->hw_params.rx_page_order,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
__free_pages(page, il->hw_params.rx_page_order);
return;
}
@@ -409,9 +409,10 @@ il4965_rx_queue_free(struct il_priv *il, struct il_rx_queue *rxq)
int i;
for (i = 0; i < RX_QUEUE_SIZE + RX_FREE_BUFFERS; i++) {
if (rxq->pool[i].page != NULL) {
- pci_unmap_page(il->pci_dev, rxq->pool[i].page_dma,
+ dma_unmap_page(&il->pci_dev->dev,
+ rxq->pool[i].page_dma,
PAGE_SIZE << il->hw_params.rx_page_order,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
__il_free_pages(il, rxq->pool[i].page);
rxq->pool[i].page = NULL;
}
@@ -1815,20 +1816,18 @@ il4965_tx_skb(struct il_priv *il,
/* Physical address of this Tx command's header (not MAC header!),
* within command buffer array. */
- txcmd_phys =
- pci_map_single(il->pci_dev, &out_cmd->hdr, firstlen,
- PCI_DMA_BIDIRECTIONAL);
- if (unlikely(pci_dma_mapping_error(il->pci_dev, txcmd_phys)))
+ txcmd_phys = dma_map_single(&il->pci_dev->dev, &out_cmd->hdr, firstlen,
+ DMA_BIDIRECTIONAL);
+ if (unlikely(dma_mapping_error(&il->pci_dev->dev, txcmd_phys)))
goto drop_unlock;
/* Set up TFD's 2nd entry to point directly to remainder of skb,
* if any (802.11 null frames have no payload). */
secondlen = skb->len - hdr_len;
if (secondlen > 0) {
- phys_addr =
- pci_map_single(il->pci_dev, skb->data + hdr_len, secondlen,
- PCI_DMA_TODEVICE);
- if (unlikely(pci_dma_mapping_error(il->pci_dev, phys_addr)))
+ phys_addr = dma_map_single(&il->pci_dev->dev, skb->data + hdr_len,
+ secondlen, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(&il->pci_dev->dev, phys_addr)))
goto drop_unlock;
}
@@ -1853,8 +1852,8 @@ il4965_tx_skb(struct il_priv *il,
offsetof(struct il_tx_cmd, scratch);
/* take back ownership of DMA buffer to enable update */
- pci_dma_sync_single_for_cpu(il->pci_dev, txcmd_phys, firstlen,
- PCI_DMA_BIDIRECTIONAL);
+ dma_sync_single_for_cpu(&il->pci_dev->dev, txcmd_phys, firstlen,
+ DMA_BIDIRECTIONAL);
tx_cmd->dram_lsb_ptr = cpu_to_le32(scratch_phys);
tx_cmd->dram_msb_ptr = il_get_dma_hi_addr(scratch_phys);
@@ -1869,8 +1868,8 @@ il4965_tx_skb(struct il_priv *il,
if (info->flags & IEEE80211_TX_CTL_AMPDU)
il->ops->txq_update_byte_cnt_tbl(il, txq, le16_to_cpu(tx_cmd->len));
- pci_dma_sync_single_for_device(il->pci_dev, txcmd_phys, firstlen,
- PCI_DMA_BIDIRECTIONAL);
+ dma_sync_single_for_device(&il->pci_dev->dev, txcmd_phys, firstlen,
+ DMA_BIDIRECTIONAL);
/* Tell device the write idx *just past* this latest filled TFD */
q->write_ptr = il_queue_inc_wrap(q->write_ptr, q->n_bd);
@@ -3929,15 +3928,15 @@ il4965_hw_txq_free_tfd(struct il_priv *il, struct il_tx_queue *txq)
/* Unmap tx_cmd */
if (num_tbs)
- pci_unmap_single(dev, dma_unmap_addr(&txq->meta[idx], mapping),
+ dma_unmap_single(&dev->dev,
+ dma_unmap_addr(&txq->meta[idx], mapping),
dma_unmap_len(&txq->meta[idx], len),
- PCI_DMA_BIDIRECTIONAL);
+ DMA_BIDIRECTIONAL);
/* Unmap chunks, if any. */
for (i = 1; i < num_tbs; i++)
- pci_unmap_single(dev, il4965_tfd_tb_get_addr(tfd, i),
- il4965_tfd_tb_get_len(tfd, i),
- PCI_DMA_TODEVICE);
+ dma_unmap_single(&dev->dev, il4965_tfd_tb_get_addr(tfd, i),
+ il4965_tfd_tb_get_len(tfd, i), DMA_TO_DEVICE);
/* free SKB */
if (txq->skbs) {
@@ -4243,9 +4242,9 @@ il4965_rx_handle(struct il_priv *il)
rxq->queue[i] = NULL;
- pci_unmap_page(il->pci_dev, rxb->page_dma,
+ dma_unmap_page(&il->pci_dev->dev, rxb->page_dma,
PAGE_SIZE << il->hw_params.rx_page_order,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
pkt = rxb_addr(rxb);
len = le32_to_cpu(pkt->len_n_flags) & IL_RX_FRAME_SIZE_MSK;
@@ -4290,12 +4289,12 @@ il4965_rx_handle(struct il_priv *il)
spin_lock_irqsave(&rxq->lock, flags);
if (rxb->page != NULL) {
rxb->page_dma =
- pci_map_page(il->pci_dev, rxb->page, 0,
- PAGE_SIZE << il->hw_params.
- rx_page_order, PCI_DMA_FROMDEVICE);
+ dma_map_page(&il->pci_dev->dev, rxb->page, 0,
+ PAGE_SIZE << il->hw_params.rx_page_order,
+ DMA_FROM_DEVICE);
- if (unlikely(pci_dma_mapping_error(il->pci_dev,
- rxb->page_dma))) {
+ if (unlikely(dma_mapping_error(&il->pci_dev->dev,
+ rxb->page_dma))) {
__il_free_pages(il, rxb->page);
rxb->page = NULL;
list_add_tail(&rxb->list, &rxq->rx_used);
@@ -6514,14 +6513,9 @@ il4965_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_master(pdev);
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(36));
- if (!err)
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(36));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(36));
if (err) {
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
- if (!err)
- err =
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
/* both attempts failed: */
if (err) {
IL_WARN("No suitable DMA available.\n");
diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c
index 219fed91cac5..683b632981ed 100644
--- a/drivers/net/wireless/intel/iwlegacy/common.c
+++ b/drivers/net/wireless/intel/iwlegacy/common.c
@@ -2819,10 +2819,10 @@ il_cmd_queue_unmap(struct il_priv *il)
i = il_get_cmd_idx(q, q->read_ptr, 0);
if (txq->meta[i].flags & CMD_MAPPED) {
- pci_unmap_single(il->pci_dev,
+ dma_unmap_single(&il->pci_dev->dev,
dma_unmap_addr(&txq->meta[i], mapping),
dma_unmap_len(&txq->meta[i], len),
- PCI_DMA_BIDIRECTIONAL);
+ DMA_BIDIRECTIONAL);
txq->meta[i].flags = 0;
}
@@ -2831,10 +2831,10 @@ il_cmd_queue_unmap(struct il_priv *il)
i = q->n_win;
if (txq->meta[i].flags & CMD_MAPPED) {
- pci_unmap_single(il->pci_dev,
+ dma_unmap_single(&il->pci_dev->dev,
dma_unmap_addr(&txq->meta[i], mapping),
dma_unmap_len(&txq->meta[i], len),
- PCI_DMA_BIDIRECTIONAL);
+ DMA_BIDIRECTIONAL);
txq->meta[i].flags = 0;
}
}
@@ -3197,10 +3197,9 @@ il_enqueue_hcmd(struct il_priv *il, struct il_host_cmd *cmd)
}
#endif
- phys_addr =
- pci_map_single(il->pci_dev, &out_cmd->hdr, fix_size,
- PCI_DMA_BIDIRECTIONAL);
- if (unlikely(pci_dma_mapping_error(il->pci_dev, phys_addr))) {
+ phys_addr = dma_map_single(&il->pci_dev->dev, &out_cmd->hdr, fix_size,
+ DMA_BIDIRECTIONAL);
+ if (unlikely(dma_mapping_error(&il->pci_dev->dev, phys_addr))) {
idx = -ENOMEM;
goto out;
}
@@ -3298,8 +3297,8 @@ il_tx_cmd_complete(struct il_priv *il, struct il_rx_buf *rxb)
txq->time_stamp = jiffies;
- pci_unmap_single(il->pci_dev, dma_unmap_addr(meta, mapping),
- dma_unmap_len(meta, len), PCI_DMA_BIDIRECTIONAL);
+ dma_unmap_single(&il->pci_dev->dev, dma_unmap_addr(meta, mapping),
+ dma_unmap_len(meta, len), DMA_BIDIRECTIONAL);
/* Input error checking is done when commands are added to queue. */
if (meta->flags & CMD_WANT_SKB) {
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index 7f1faa9d97b4..52d1d391f4c6 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -9,7 +9,7 @@
#include "iwl-prph.h"
/* Highest firmware API version supported */
-#define IWL_22000_UCODE_API_MAX 64
+#define IWL_22000_UCODE_API_MAX 65
/* Lowest firmware API version supported */
#define IWL_22000_UCODE_API_MIN 39
@@ -154,7 +154,7 @@ static const struct iwl_ht_params iwl_22000_ht_params = {
.apmg_not_supported = true, \
.trans.mq_rx_supported = true, \
.vht_mu_mimo_supported = true, \
- .mac_addr_from_csr = true, \
+ .mac_addr_from_csr = 0x380, \
.ht_params = &iwl_22000_ht_params, \
.nvm_ver = IWL_22000_NVM_VERSION, \
.trans.use_tfh = true, \
@@ -215,6 +215,67 @@ static const struct iwl_ht_params iwl_22000_ht_params = {
}, \
}
+#define IWL_DEVICE_BZ_COMMON \
+ .ucode_api_max = IWL_22000_UCODE_API_MAX, \
+ .ucode_api_min = IWL_22000_UCODE_API_MIN, \
+ .led_mode = IWL_LED_RF_STATE, \
+ .nvm_hw_section_num = 10, \
+ .non_shared_ant = ANT_B, \
+ .dccm_offset = IWL_22000_DCCM_OFFSET, \
+ .dccm_len = IWL_22000_DCCM_LEN, \
+ .dccm2_offset = IWL_22000_DCCM2_OFFSET, \
+ .dccm2_len = IWL_22000_DCCM2_LEN, \
+ .smem_offset = IWL_22000_SMEM_OFFSET, \
+ .smem_len = IWL_22000_SMEM_LEN, \
+ .features = IWL_TX_CSUM_NETIF_FLAGS | NETIF_F_RXCSUM, \
+ .apmg_not_supported = true, \
+ .trans.mq_rx_supported = true, \
+ .vht_mu_mimo_supported = true, \
+ .mac_addr_from_csr = 0x30, \
+ .ht_params = &iwl_22000_ht_params, \
+ .nvm_ver = IWL_22000_NVM_VERSION, \
+ .trans.use_tfh = true, \
+ .trans.rf_id = true, \
+ .trans.gen2 = true, \
+ .nvm_type = IWL_NVM_EXT, \
+ .dbgc_supported = true, \
+ .min_umac_error_event_table = 0x400000, \
+ .d3_debug_data_base_addr = 0x401000, \
+ .d3_debug_data_length = 60 * 1024, \
+ .mon_smem_regs = { \
+ .write_ptr = { \
+ .addr = LDBG_M2S_BUF_WPTR, \
+ .mask = LDBG_M2S_BUF_WPTR_VAL_MSK, \
+ }, \
+ .cycle_cnt = { \
+ .addr = LDBG_M2S_BUF_WRAP_CNT, \
+ .mask = LDBG_M2S_BUF_WRAP_CNT_VAL_MSK, \
+ }, \
+ }
+
+#define IWL_DEVICE_BZ \
+ IWL_DEVICE_BZ_COMMON, \
+ .trans.umac_prph_offset = 0x300000, \
+ .trans.device_family = IWL_DEVICE_FAMILY_BZ, \
+ .trans.base_params = &iwl_ax210_base_params, \
+ .min_txq_size = 128, \
+ .gp2_reg_addr = 0xd02c68, \
+ .min_256_ba_txq_size = 1024, \
+ .mon_dram_regs = { \
+ .write_ptr = { \
+ .addr = DBGC_CUR_DBGBUF_STATUS, \
+ .mask = DBGC_CUR_DBGBUF_STATUS_OFFSET_MSK, \
+ }, \
+ .cycle_cnt = { \
+ .addr = DBGC_DBGBUF_WRAP_AROUND, \
+ .mask = 0xffffffff, \
+ }, \
+ .cur_frag = { \
+ .addr = DBGC_CUR_DBGBUF_STATUS, \
+ .mask = DBGC_CUR_DBGBUF_STATUS_IDX_MSK, \
+ }, \
+ }
+
const struct iwl_cfg_trans_params iwl_qnj_trans_cfg = {
.mq_rx_supported = true,
.use_tfh = true,
@@ -373,7 +434,7 @@ const struct iwl_cfg_trans_params iwl_ma_trans_cfg = {
};
const struct iwl_cfg_trans_params iwl_bz_trans_cfg = {
- .device_family = IWL_DEVICE_FAMILY_AX210,
+ .device_family = IWL_DEVICE_FAMILY_BZ,
.base_params = &iwl_ax210_base_params,
.mq_rx_supported = true,
.use_tfh = true,
@@ -394,6 +455,7 @@ const char iwl_ax211_name[] = "Intel(R) Wi-Fi 6E AX211 160MHz";
const char iwl_ax221_name[] = "Intel(R) Wi-Fi 6E AX221 160MHz";
const char iwl_ax231_name[] = "Intel(R) Wi-Fi 6E AX231 160MHz";
const char iwl_ax411_name[] = "Intel(R) Wi-Fi 6E AX411 160MHz";
+const char iwl_bz_name[] = "Intel(R) TBD Bz device";
const char iwl_ax200_killer_1650w_name[] =
"Killer(R) Wi-Fi 6 AX1650w 160MHz Wireless Network Adapter (200D2W)";
@@ -763,28 +825,28 @@ const struct iwl_cfg iwl_cfg_quz_a0_hr_b0 = {
const struct iwl_cfg iwl_cfg_bz_a0_hr_b0 = {
.fw_name_pre = IWL_BZ_A_HR_B_FW_PRE,
.uhb_supported = true,
- IWL_DEVICE_AX210,
+ IWL_DEVICE_BZ,
.num_rbds = IWL_NUM_RBDS_AX210_HE,
};
const struct iwl_cfg iwl_cfg_bz_a0_gf_a0 = {
.fw_name_pre = IWL_BZ_A_GF_A_FW_PRE,
.uhb_supported = true,
- IWL_DEVICE_AX210,
+ IWL_DEVICE_BZ,
.num_rbds = IWL_NUM_RBDS_AX210_HE,
};
const struct iwl_cfg iwl_cfg_bz_a0_gf4_a0 = {
.fw_name_pre = IWL_BZ_A_GF4_A_FW_PRE,
.uhb_supported = true,
- IWL_DEVICE_AX210,
+ IWL_DEVICE_BZ,
.num_rbds = IWL_NUM_RBDS_AX210_HE,
};
const struct iwl_cfg iwl_cfg_bz_a0_mr_a0 = {
.fw_name_pre = IWL_BZ_A_MR_A_FW_PRE,
.uhb_supported = true,
- IWL_DEVICE_AX210,
+ IWL_DEVICE_BZ,
.num_rbds = IWL_NUM_RBDS_AX210_HE,
};
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
index 871533beff30..7a7ca06d46c1 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
@@ -89,7 +89,7 @@ static const struct iwl_tt_params iwl9000_tt_params = {
.apmg_not_supported = true, \
.num_rbds = 512, \
.vht_mu_mimo_supported = true, \
- .mac_addr_from_csr = true, \
+ .mac_addr_from_csr = 0x380, \
.nvm_type = IWL_NVM_EXT, \
.dbgc_supported = true, \
.min_umac_error_event_table = 0x800000, \
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/main.c b/drivers/net/wireless/intel/iwlwifi/dvm/main.c
index c01523f64bfc..cc7b69fd14d3 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/main.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/main.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/******************************************************************************
*
- * Copyright(c) 2003 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
+ * Copyright(c) 2003 - 2014, 2018 - 2021 Intel Corporation. All rights reserved.
* Copyright(c) 2015 Intel Deutschland GmbH
*
* Portions of this file are derived from the ipw3945 project, as well
@@ -1950,7 +1950,7 @@ static void iwlagn_fw_error(struct iwl_priv *priv, bool ondemand)
}
}
-static void iwl_nic_error(struct iwl_op_mode *op_mode)
+static void iwl_nic_error(struct iwl_op_mode *op_mode, bool sync)
{
struct iwl_priv *priv = IWL_OP_MODE_GET_DVM(op_mode);
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/rx.c b/drivers/net/wireless/intel/iwlwifi/dvm/rx.c
index 80475c7a6fba..3cd7b423c588 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/rx.c
@@ -318,7 +318,7 @@ iwlagn_accumulative_statistics(struct iwl_priv *priv,
(__le32 *)&priv->delta_stats._name, \
(__le32 *)&priv->max_delta_stats._name, \
(__le32 *)&priv->accum_stats._name, \
- sizeof(*_name));
+ sizeof(*_name))
ACCUM(common);
ACCUM(rx_non_phy);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index 34933f133a0a..1efac0b2a94d 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -264,7 +264,7 @@ int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt,
goto out_free;
}
- enabled = !!wifi_pkg->package.elements[0].integer.value;
+ enabled = !!wifi_pkg->package.elements[1].integer.value;
if (!enabled) {
*block_list_size = -1;
@@ -273,15 +273,15 @@ int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt,
goto out_free;
}
- if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER ||
- wifi_pkg->package.elements[1].integer.value >
+ if (wifi_pkg->package.elements[2].type != ACPI_TYPE_INTEGER ||
+ wifi_pkg->package.elements[2].integer.value >
APCI_WTAS_BLACK_LIST_MAX) {
IWL_DEBUG_RADIO(fwrt, "TAS invalid array size %llu\n",
wifi_pkg->package.elements[1].integer.value);
ret = -EINVAL;
goto out_free;
}
- *block_list_size = wifi_pkg->package.elements[1].integer.value;
+ *block_list_size = wifi_pkg->package.elements[2].integer.value;
IWL_DEBUG_RADIO(fwrt, "TAS array size %d\n", *block_list_size);
if (*block_list_size > APCI_WTAS_BLACK_LIST_MAX) {
@@ -294,15 +294,15 @@ int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt,
for (i = 0; i < *block_list_size; i++) {
u32 country;
- if (wifi_pkg->package.elements[2 + i].type !=
+ if (wifi_pkg->package.elements[3 + i].type !=
ACPI_TYPE_INTEGER) {
IWL_DEBUG_RADIO(fwrt,
- "TAS invalid array elem %d\n", 2 + i);
+ "TAS invalid array elem %d\n", 3 + i);
ret = -EINVAL;
goto out_free;
}
- country = wifi_pkg->package.elements[2 + i].integer.value;
+ country = wifi_pkg->package.elements[3 + i].integer.value;
block_list_array[i] = cpu_to_le32(country);
IWL_DEBUG_RADIO(fwrt, "TAS block list country %d\n", country);
}
@@ -412,20 +412,35 @@ IWL_EXPORT_SYMBOL(iwl_acpi_get_eckv);
static int iwl_sar_set_profile(union acpi_object *table,
struct iwl_sar_profile *profile,
- bool enabled)
+ bool enabled, u8 num_chains, u8 num_sub_bands)
{
- int i;
-
- profile->enabled = enabled;
-
- for (i = 0; i < ACPI_SAR_TABLE_SIZE; i++) {
- if (table[i].type != ACPI_TYPE_INTEGER ||
- table[i].integer.value > U8_MAX)
- return -EINVAL;
+ int i, j, idx = 0;
- profile->table[i] = table[i].integer.value;
+ /*
+ * The table from ACPI is flat, but we store it in a
+ * structured array.
+ */
+ for (i = 0; i < ACPI_SAR_NUM_CHAINS_REV2; i++) {
+ for (j = 0; j < ACPI_SAR_NUM_SUB_BANDS_REV2; j++) {
+ /* if we don't have the values, use the default */
+ if (i >= num_chains || j >= num_sub_bands) {
+ profile->chains[i].subbands[j] = 0;
+ } else {
+ if (table[idx].type != ACPI_TYPE_INTEGER ||
+ table[idx].integer.value > U8_MAX)
+ return -EINVAL;
+
+ profile->chains[i].subbands[j] =
+ table[idx].integer.value;
+
+ idx++;
+ }
+ }
}
+ /* Only if all values were valid can the profile be enabled */
+ profile->enabled = enabled;
+
return 0;
}
@@ -433,10 +448,10 @@ static int iwl_sar_fill_table(struct iwl_fw_runtime *fwrt,
__le16 *per_chain, u32 n_subbands,
int prof_a, int prof_b)
{
- int profs[ACPI_SAR_NUM_CHAIN_LIMITS] = { prof_a, prof_b };
- int i, j, idx;
+ int profs[ACPI_SAR_NUM_CHAINS_REV0] = { prof_a, prof_b };
+ int i, j;
- for (i = 0; i < ACPI_SAR_NUM_CHAIN_LIMITS; i++) {
+ for (i = 0; i < ACPI_SAR_NUM_CHAINS_REV0; i++) {
struct iwl_sar_profile *prof;
/* don't allow SAR to be disabled (profile 0 means disable) */
@@ -467,11 +482,10 @@ static int iwl_sar_fill_table(struct iwl_fw_runtime *fwrt,
i, profs[i]);
IWL_DEBUG_RADIO(fwrt, " Chain[%d]:\n", i);
for (j = 0; j < n_subbands; j++) {
- idx = i * ACPI_SAR_NUM_SUB_BANDS + j;
per_chain[i * n_subbands + j] =
- cpu_to_le16(prof->table[idx]);
+ cpu_to_le16(prof->chains[i].subbands[j]);
IWL_DEBUG_RADIO(fwrt, " Band[%d] = %d * .125dBm\n",
- j, prof->table[idx]);
+ j, prof->chains[i].subbands[j]);
}
}
@@ -486,7 +500,7 @@ int iwl_sar_select_profile(struct iwl_fw_runtime *fwrt,
for (i = 0; i < n_tables; i++) {
ret = iwl_sar_fill_table(fwrt,
- &per_chain[i * n_subbands * ACPI_SAR_NUM_CHAIN_LIMITS],
+ &per_chain[i * n_subbands * ACPI_SAR_NUM_CHAINS_REV0],
n_subbands, prof_a, prof_b);
if (ret)
break;
@@ -501,28 +515,71 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt)
union acpi_object *wifi_pkg, *table, *data;
bool enabled;
int ret, tbl_rev;
+ u8 num_chains, num_sub_bands;
data = iwl_acpi_get_object(fwrt->dev, ACPI_WRDS_METHOD);
if (IS_ERR(data))
return PTR_ERR(data);
+ /* start by trying to read revision 2 */
wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
- ACPI_WRDS_WIFI_DATA_SIZE, &tbl_rev);
- if (IS_ERR(wifi_pkg)) {
- ret = PTR_ERR(wifi_pkg);
- goto out_free;
+ ACPI_WRDS_WIFI_DATA_SIZE_REV2,
+ &tbl_rev);
+ if (!IS_ERR(wifi_pkg)) {
+ if (tbl_rev != 2) {
+ ret = PTR_ERR(wifi_pkg);
+ goto out_free;
+ }
+
+ num_chains = ACPI_SAR_NUM_CHAINS_REV2;
+ num_sub_bands = ACPI_SAR_NUM_SUB_BANDS_REV2;
+
+ goto read_table;
}
- if (tbl_rev != 0) {
- ret = -EINVAL;
- goto out_free;
+ /* then try revision 1 */
+ wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
+ ACPI_WRDS_WIFI_DATA_SIZE_REV1,
+ &tbl_rev);
+ if (!IS_ERR(wifi_pkg)) {
+ if (tbl_rev != 1) {
+ ret = PTR_ERR(wifi_pkg);
+ goto out_free;
+ }
+
+ num_chains = ACPI_SAR_NUM_CHAINS_REV1;
+ num_sub_bands = ACPI_SAR_NUM_SUB_BANDS_REV1;
+
+ goto read_table;
+ }
+
+ /* then finally revision 0 */
+ wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
+ ACPI_WRDS_WIFI_DATA_SIZE_REV0,
+ &tbl_rev);
+ if (!IS_ERR(wifi_pkg)) {
+ if (tbl_rev != 0) {
+ ret = PTR_ERR(wifi_pkg);
+ goto out_free;
+ }
+
+ num_chains = ACPI_SAR_NUM_CHAINS_REV0;
+ num_sub_bands = ACPI_SAR_NUM_SUB_BANDS_REV0;
+
+ goto read_table;
}
+ ret = PTR_ERR(wifi_pkg);
+ goto out_free;
+
+read_table:
if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) {
ret = -EINVAL;
goto out_free;
}
+ IWL_DEBUG_RADIO(fwrt, "Reading WRDS tbl_rev=%d\n", tbl_rev);
+
enabled = !!(wifi_pkg->package.elements[1].integer.value);
/* position of the actual table */
@@ -531,7 +588,8 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt)
/* The profile from WRDS is officially profile 1, but goes
* into sar_profiles[0] (because we don't have a profile 0).
*/
- ret = iwl_sar_set_profile(table, &fwrt->sar_profiles[0], enabled);
+ ret = iwl_sar_set_profile(table, &fwrt->sar_profiles[0], enabled,
+ num_chains, num_sub_bands);
out_free:
kfree(data);
return ret;
@@ -544,23 +602,64 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
bool enabled;
int i, n_profiles, tbl_rev, pos;
int ret = 0;
+ u8 num_chains, num_sub_bands;
data = iwl_acpi_get_object(fwrt->dev, ACPI_EWRD_METHOD);
if (IS_ERR(data))
return PTR_ERR(data);
+ /* start by trying to read revision 2 */
wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
- ACPI_EWRD_WIFI_DATA_SIZE, &tbl_rev);
- if (IS_ERR(wifi_pkg)) {
- ret = PTR_ERR(wifi_pkg);
- goto out_free;
+ ACPI_EWRD_WIFI_DATA_SIZE_REV2,
+ &tbl_rev);
+ if (!IS_ERR(wifi_pkg)) {
+ if (tbl_rev != 2) {
+ ret = PTR_ERR(wifi_pkg);
+ goto out_free;
+ }
+
+ num_chains = ACPI_SAR_NUM_CHAINS_REV2;
+ num_sub_bands = ACPI_SAR_NUM_SUB_BANDS_REV2;
+
+ goto read_table;
}
- if (tbl_rev != 0) {
- ret = -EINVAL;
- goto out_free;
+ /* then try revision 1 */
+ wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
+ ACPI_EWRD_WIFI_DATA_SIZE_REV1,
+ &tbl_rev);
+ if (!IS_ERR(wifi_pkg)) {
+ if (tbl_rev != 1) {
+ ret = PTR_ERR(wifi_pkg);
+ goto out_free;
+ }
+
+ num_chains = ACPI_SAR_NUM_CHAINS_REV1;
+ num_sub_bands = ACPI_SAR_NUM_SUB_BANDS_REV1;
+
+ goto read_table;
+ }
+
+ /* then finally revision 0 */
+ wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
+ ACPI_EWRD_WIFI_DATA_SIZE_REV0,
+ &tbl_rev);
+ if (!IS_ERR(wifi_pkg)) {
+ if (tbl_rev != 0) {
+ ret = PTR_ERR(wifi_pkg);
+ goto out_free;
+ }
+
+ num_chains = ACPI_SAR_NUM_CHAINS_REV0;
+ num_sub_bands = ACPI_SAR_NUM_SUB_BANDS_REV0;
+
+ goto read_table;
}
+ ret = PTR_ERR(wifi_pkg);
+ goto out_free;
+
+read_table:
if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER ||
wifi_pkg->package.elements[2].type != ACPI_TYPE_INTEGER) {
ret = -EINVAL;
@@ -589,13 +688,13 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
* have profile 0). So in the array we start from 1.
*/
ret = iwl_sar_set_profile(&wifi_pkg->package.elements[pos],
- &fwrt->sar_profiles[i + 1],
- enabled);
+ &fwrt->sar_profiles[i + 1], enabled,
+ num_chains, num_sub_bands);
if (ret < 0)
break;
/* go to the next table */
- pos += ACPI_SAR_TABLE_SIZE;
+ pos += num_chains * num_sub_bands;
}
out_free:
@@ -607,41 +706,93 @@ IWL_EXPORT_SYMBOL(iwl_sar_get_ewrd_table);
int iwl_sar_get_wgds_table(struct iwl_fw_runtime *fwrt)
{
union acpi_object *wifi_pkg, *data;
- int i, j, ret, tbl_rev;
- int idx = 1;
+ int i, j, k, ret, tbl_rev;
+ int idx = 1; /* start from one to skip the domain */
+ u8 num_bands;
data = iwl_acpi_get_object(fwrt->dev, ACPI_WGDS_METHOD);
if (IS_ERR(data))
return PTR_ERR(data);
+ /* start by trying to read revision 2 */
wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
- ACPI_WGDS_WIFI_DATA_SIZE, &tbl_rev);
+ ACPI_WGDS_WIFI_DATA_SIZE_REV2,
+ &tbl_rev);
+ if (!IS_ERR(wifi_pkg)) {
+ if (tbl_rev != 2) {
+ ret = PTR_ERR(wifi_pkg);
+ goto out_free;
+ }
- if (IS_ERR(wifi_pkg)) {
- ret = PTR_ERR(wifi_pkg);
- goto out_free;
+ num_bands = ACPI_GEO_NUM_BANDS_REV2;
+
+ goto read_table;
}
- if (tbl_rev > 1) {
- ret = -EINVAL;
- goto out_free;
+ /* then try revision 0 (which is the same as 1) */
+ wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
+ ACPI_WGDS_WIFI_DATA_SIZE_REV0,
+ &tbl_rev);
+ if (!IS_ERR(wifi_pkg)) {
+ if (tbl_rev != 0 && tbl_rev != 1) {
+ ret = PTR_ERR(wifi_pkg);
+ goto out_free;
+ }
+
+ num_bands = ACPI_GEO_NUM_BANDS_REV0;
+
+ goto read_table;
}
+ ret = PTR_ERR(wifi_pkg);
+ goto out_free;
+
+read_table:
fwrt->geo_rev = tbl_rev;
for (i = 0; i < ACPI_NUM_GEO_PROFILES; i++) {
- for (j = 0; j < ACPI_GEO_TABLE_SIZE; j++) {
+ for (j = 0; j < ACPI_GEO_NUM_BANDS_REV2; j++) {
union acpi_object *entry;
- entry = &wifi_pkg->package.elements[idx++];
- if (entry->type != ACPI_TYPE_INTEGER ||
- entry->integer.value > U8_MAX) {
- ret = -EINVAL;
- goto out_free;
+ /*
+ * num_bands is either 2 or 3, if it's only 2 then
+ * fill the third band (6 GHz) with the values from
+ * 5 GHz (second band)
+ */
+ if (j >= num_bands) {
+ fwrt->geo_profiles[i].bands[j].max =
+ fwrt->geo_profiles[i].bands[1].max;
+ } else {
+ entry = &wifi_pkg->package.elements[idx++];
+ if (entry->type != ACPI_TYPE_INTEGER ||
+ entry->integer.value > U8_MAX) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ fwrt->geo_profiles[i].bands[j].max =
+ entry->integer.value;
}
- fwrt->geo_profiles[i].values[j] = entry->integer.value;
+ for (k = 0; k < ACPI_GEO_NUM_CHAINS; k++) {
+ /* same here as above */
+ if (j >= num_bands) {
+ fwrt->geo_profiles[i].bands[j].chains[k] =
+ fwrt->geo_profiles[i].bands[1].chains[k];
+ } else {
+ entry = &wifi_pkg->package.elements[idx++];
+ if (entry->type != ACPI_TYPE_INTEGER ||
+ entry->integer.value > U8_MAX) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ fwrt->geo_profiles[i].bands[j].chains[k] =
+ entry->integer.value;
+ }
+ }
}
}
+
ret = 0;
out_free:
kfree(data);
@@ -673,43 +824,26 @@ IWL_EXPORT_SYMBOL(iwl_sar_geo_support);
int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
struct iwl_per_chain_offset *table, u32 n_bands)
{
- int ret, i, j;
+ int i, j;
if (!iwl_sar_geo_support(fwrt))
return -EOPNOTSUPP;
- ret = iwl_sar_get_wgds_table(fwrt);
- if (ret < 0) {
- IWL_DEBUG_RADIO(fwrt,
- "Geo SAR BIOS table invalid or unavailable. (%d)\n",
- ret);
- /* we don't fail if the table is not available */
- return -ENOENT;
- }
-
for (i = 0; i < ACPI_NUM_GEO_PROFILES; i++) {
for (j = 0; j < n_bands; j++) {
struct iwl_per_chain_offset *chain =
&table[i * n_bands + j];
- u8 *value;
-
- if (j * ACPI_GEO_PER_CHAIN_SIZE >=
- ARRAY_SIZE(fwrt->geo_profiles[0].values))
- /*
- * Currently we only store lb an hb values, and
- * don't have any special ones for uhb. So leave
- * those empty for the time being
- */
- break;
-
- value = &fwrt->geo_profiles[i].values[j *
- ACPI_GEO_PER_CHAIN_SIZE];
- chain->max_tx_power = cpu_to_le16(value[0]);
- chain->chain_a = value[1];
- chain->chain_b = value[2];
+
+ chain->max_tx_power =
+ cpu_to_le16(fwrt->geo_profiles[i].bands[j].max);
+ chain->chain_a = fwrt->geo_profiles[i].bands[j].chains[0];
+ chain->chain_b = fwrt->geo_profiles[i].bands[j].chains[1];
IWL_DEBUG_RADIO(fwrt,
"SAR geographic profile[%d] Band[%d]: chain A = %d chain B = %d max_tx_power = %d\n",
- i, j, value[1], value[2], value[0]);
+ i, j,
+ fwrt->geo_profiles[i].bands[j].chains[0],
+ fwrt->geo_profiles[i].bands[j].chains[1],
+ fwrt->geo_profiles[i].bands[j].max);
}
}
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
index b858e998999c..16ed0995b51e 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
@@ -26,21 +26,46 @@
#define ACPI_WIFI_DOMAIN (0x07)
-#define ACPI_SAR_TABLE_SIZE 10
#define ACPI_SAR_PROFILE_NUM 4
-#define ACPI_GEO_TABLE_SIZE 6
#define ACPI_NUM_GEO_PROFILES 3
#define ACPI_GEO_PER_CHAIN_SIZE 3
-#define ACPI_SAR_NUM_CHAIN_LIMITS 2
-#define ACPI_SAR_NUM_SUB_BANDS 5
-#define ACPI_SAR_NUM_TABLES 1
+#define ACPI_SAR_NUM_CHAINS_REV0 2
+#define ACPI_SAR_NUM_CHAINS_REV1 2
+#define ACPI_SAR_NUM_CHAINS_REV2 4
+#define ACPI_SAR_NUM_SUB_BANDS_REV0 5
+#define ACPI_SAR_NUM_SUB_BANDS_REV1 11
+#define ACPI_SAR_NUM_SUB_BANDS_REV2 11
+
+#define ACPI_WRDS_WIFI_DATA_SIZE_REV0 (ACPI_SAR_NUM_CHAINS_REV0 * \
+ ACPI_SAR_NUM_SUB_BANDS_REV0 + 2)
+#define ACPI_WRDS_WIFI_DATA_SIZE_REV1 (ACPI_SAR_NUM_CHAINS_REV1 * \
+ ACPI_SAR_NUM_SUB_BANDS_REV1 + 2)
+#define ACPI_WRDS_WIFI_DATA_SIZE_REV2 (ACPI_SAR_NUM_CHAINS_REV2 * \
+ ACPI_SAR_NUM_SUB_BANDS_REV2 + 2)
+#define ACPI_EWRD_WIFI_DATA_SIZE_REV0 ((ACPI_SAR_PROFILE_NUM - 1) * \
+ ACPI_SAR_NUM_CHAINS_REV0 * \
+ ACPI_SAR_NUM_SUB_BANDS_REV0 + 3)
+#define ACPI_EWRD_WIFI_DATA_SIZE_REV1 ((ACPI_SAR_PROFILE_NUM - 1) * \
+ ACPI_SAR_NUM_CHAINS_REV1 * \
+ ACPI_SAR_NUM_SUB_BANDS_REV1 + 3)
+#define ACPI_EWRD_WIFI_DATA_SIZE_REV2 ((ACPI_SAR_PROFILE_NUM - 1) * \
+ ACPI_SAR_NUM_CHAINS_REV2 * \
+ ACPI_SAR_NUM_SUB_BANDS_REV2 + 3)
+
+/* revision 0 and 1 are identical, except for the semantics in the FW */
+#define ACPI_GEO_NUM_BANDS_REV0 2
+#define ACPI_GEO_NUM_BANDS_REV2 3
+#define ACPI_GEO_NUM_CHAINS 2
+
+#define ACPI_WGDS_WIFI_DATA_SIZE_REV0 (ACPI_NUM_GEO_PROFILES * \
+ ACPI_GEO_NUM_BANDS_REV0 * \
+ ACPI_GEO_PER_CHAIN_SIZE + 1)
+#define ACPI_WGDS_WIFI_DATA_SIZE_REV2 (ACPI_NUM_GEO_PROFILES * \
+ ACPI_GEO_NUM_BANDS_REV2 * \
+ ACPI_GEO_PER_CHAIN_SIZE + 1)
-#define ACPI_WRDS_WIFI_DATA_SIZE (ACPI_SAR_TABLE_SIZE + 2)
-#define ACPI_EWRD_WIFI_DATA_SIZE ((ACPI_SAR_PROFILE_NUM - 1) * \
- ACPI_SAR_TABLE_SIZE + 3)
-#define ACPI_WGDS_WIFI_DATA_SIZE 19
#define ACPI_WRDD_WIFI_DATA_SIZE 2
#define ACPI_SPLC_WIFI_DATA_SIZE 2
#define ACPI_ECKV_WIFI_DATA_SIZE 2
@@ -51,8 +76,6 @@
#define APCI_WTAS_BLACK_LIST_MAX 16
#define ACPI_WTAS_WIFI_DATA_SIZE (3 + APCI_WTAS_BLACK_LIST_MAX)
-#define ACPI_WGDS_TABLE_SIZE 3
-
#define ACPI_PPAG_WIFI_DATA_SIZE_V1 ((IWL_NUM_CHAIN_LIMITS * \
IWL_NUM_SUB_BANDS_V1) + 2)
#define ACPI_PPAG_WIFI_DATA_SIZE_V2 ((IWL_NUM_CHAIN_LIMITS * \
@@ -64,13 +87,28 @@
#define ACPI_PPAG_MIN_HB -16
#define ACPI_PPAG_MAX_HB 40
+/*
+ * The profile for revision 2 is a superset of revision 1, which is in
+ * turn a superset of revision 0. So we can store all revisions
+ * inside revision 2, which is what we represent here.
+ */
+struct iwl_sar_profile_chain {
+ u8 subbands[ACPI_SAR_NUM_SUB_BANDS_REV2];
+};
+
struct iwl_sar_profile {
bool enabled;
- u8 table[ACPI_SAR_TABLE_SIZE];
+ struct iwl_sar_profile_chain chains[ACPI_SAR_NUM_CHAINS_REV2];
+};
+
+/* Same thing as with SAR, all revisions fit in revision 2 */
+struct iwl_geo_profile_band {
+ u8 max;
+ u8 chains[ACPI_GEO_NUM_CHAINS];
};
struct iwl_geo_profile {
- u8 values[ACPI_GEO_TABLE_SIZE];
+ struct iwl_geo_profile_band bands[ACPI_GEO_NUM_BANDS_REV2];
};
enum iwl_dsm_funcs_rev_0 {
@@ -234,7 +272,7 @@ static inline int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
static inline int iwl_sar_get_wgds_table(struct iwl_fw_runtime *fwrt)
{
- return -ENOENT;
+ return 1;
}
static inline bool iwl_sar_geo_support(struct iwl_fw_runtime *fwrt)
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/coex.h b/drivers/net/wireless/intel/iwlwifi/fw/api/coex.h
index 01580c9175f3..3e81e9369224 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/coex.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/coex.h
@@ -142,7 +142,7 @@ enum iwl_bt_mxbox_dw3 {
"\t%s: %d%s", \
#_field, \
BT_MBOX_MSG(notif, _num, _field), \
- true ? "\n" : ", ");
+ true ? "\n" : ", ")
enum iwl_bt_activity_grading {
BT_OFF = 0,
BT_ON_NO_CONNECTION = 1,
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
index ce060c3dfd7b..ee6b5844a871 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
@@ -550,7 +550,8 @@ enum iwl_legacy_cmds {
WOWLAN_CONFIGURATION = 0xe1,
/**
- * @WOWLAN_TSC_RSC_PARAM: &struct iwl_wowlan_rsc_tsc_params_cmd
+ * @WOWLAN_TSC_RSC_PARAM: &struct iwl_wowlan_rsc_tsc_params_cmd_v4,
+ * &struct iwl_wowlan_rsc_tsc_params_cmd
*/
WOWLAN_TSC_RSC_PARAM = 0xe2,
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
index b2e7ef3ddc88..3ec82cae3981 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
@@ -6,6 +6,7 @@
*/
#ifndef __iwl_fw_api_d3_h__
#define __iwl_fw_api_d3_h__
+#include <iwl-trans.h>
/**
* enum iwl_d0i3_flags - d0i3 flags
@@ -389,11 +390,14 @@ struct iwl_wowlan_config_cmd {
u8 reserved;
} __packed; /* WOWLAN_CONFIG_API_S_VER_5 */
+#define IWL_NUM_RSC 16
+#define WOWLAN_KEY_MAX_SIZE 32
+#define WOWLAN_GTK_KEYS_NUM 2
+#define WOWLAN_IGTK_KEYS_NUM 2
+
/*
* WOWLAN_TSC_RSC_PARAMS
*/
-#define IWL_NUM_RSC 16
-
struct tkip_sc {
__le16 iv16;
__le16 pad;
@@ -425,11 +429,19 @@ struct iwl_wowlan_rsc_tsc_params_cmd_ver_2 {
union iwl_all_tsc_rsc all_tsc_rsc;
} __packed; /* ALL_TSC_RSC_API_S_VER_2 */
-struct iwl_wowlan_rsc_tsc_params_cmd {
+struct iwl_wowlan_rsc_tsc_params_cmd_v4 {
struct iwl_wowlan_rsc_tsc_params_cmd_ver_2 params;
__le32 sta_id;
} __packed; /* ALL_TSC_RSC_API_S_VER_4 */
+struct iwl_wowlan_rsc_tsc_params_cmd {
+ __le64 ucast_rsc[IWL_MAX_TID_COUNT];
+ __le64 mcast_rsc[WOWLAN_GTK_KEYS_NUM][IWL_MAX_TID_COUNT];
+ __le32 sta_id;
+#define IWL_MCAST_KEY_MAP_INVALID 0xff
+ u8 mcast_key_id_map[4];
+} __packed; /* ALL_TSC_RSC_API_S_VER_5 */
+
#define IWL_MIC_KEY_SIZE 8
struct iwl_mic_keys {
u8 tx[IWL_MIC_KEY_SIZE];
@@ -541,10 +553,6 @@ struct iwl_wowlan_gtk_status_v1 {
struct iwl_wowlan_rsc_tsc_params_cmd_ver_2 rsc;
} __packed; /* WOWLAN_GTK_MATERIAL_VER_1 */
-#define WOWLAN_KEY_MAX_SIZE 32
-#define WOWLAN_GTK_KEYS_NUM 2
-#define WOWLAN_IGTK_KEYS_NUM 2
-
/**
* struct iwl_wowlan_gtk_status - GTK status
* @key: GTK material
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h b/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h
index 5a2d9a1f7e73..d8b5870d6e9a 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h
@@ -33,12 +33,11 @@ struct iwl_fw_ini_hcmd {
*
* @version: TLV version
* @domain: domain of the TLV. One of &enum iwl_fw_ini_dbg_domain
- * @data: TLV data
*/
struct iwl_fw_ini_header {
__le32 version;
__le32 domain;
- u8 data[];
+ /* followed by the data */
} __packed; /* FW_TLV_DEBUG_HEADER_S_VER_1 */
/**
@@ -130,6 +129,7 @@ struct iwl_fw_ini_region_internal_buffer {
* &IWL_FW_INI_REGION_PERIPHERY_PHY, &IWL_FW_INI_REGION_PERIPHERY_AUX,
* &IWL_FW_INI_REGION_PAGING, &IWL_FW_INI_REGION_CSR,
* &IWL_FW_INI_REGION_DRAM_IMR and &IWL_FW_INI_REGION_PCI_IOSF_CONFIG
+ * &IWL_FW_INI_REGION_DBGI_SRAM, &FW_TLV_DEBUG_REGION_TYPE_DBGI_SRAM,
* @fifos: fifos configuration. Used by &IWL_FW_INI_REGION_TXF and
* &IWL_FW_INI_REGION_RXF
* @err_table: error table configuration. Used by
@@ -249,7 +249,6 @@ struct iwl_fw_ini_hcmd_tlv {
* @IWL_FW_INI_ALLOCATION_ID_DBGC1: allocation meant for DBGC1 configuration
* @IWL_FW_INI_ALLOCATION_ID_DBGC2: allocation meant for DBGC2 configuration
* @IWL_FW_INI_ALLOCATION_ID_DBGC3: allocation meant for DBGC3 configuration
- * @IWL_FW_INI_ALLOCATION_ID_INTERNAL: allocation meant for Intreanl SMEM in D3
* @IWL_FW_INI_ALLOCATION_NUM: number of allocation ids
*/
enum iwl_fw_ini_allocation_id {
@@ -257,7 +256,6 @@ enum iwl_fw_ini_allocation_id {
IWL_FW_INI_ALLOCATION_ID_DBGC1,
IWL_FW_INI_ALLOCATION_ID_DBGC2,
IWL_FW_INI_ALLOCATION_ID_DBGC3,
- IWL_FW_INI_ALLOCATION_ID_INTERNAL,
IWL_FW_INI_ALLOCATION_NUM,
}; /* FW_DEBUG_TLV_ALLOCATION_ID_E_VER_1 */
@@ -298,6 +296,7 @@ enum iwl_fw_ini_buffer_location {
* @IWL_FW_INI_REGION_DRAM_IMR: IMR memory
* @IWL_FW_INI_REGION_PCI_IOSF_CONFIG: PCI/IOSF config
* @IWL_FW_INI_REGION_SPECIAL_DEVICE_MEMORY: special device memory
+ * @IWL_FW_INI_REGION_DBGI_SRAM: periphery registers of DBGI SRAM
* @IWL_FW_INI_REGION_NUM: number of region types
*/
enum iwl_fw_ini_region_type {
@@ -319,6 +318,7 @@ enum iwl_fw_ini_region_type {
IWL_FW_INI_REGION_DRAM_IMR,
IWL_FW_INI_REGION_PCI_IOSF_CONFIG,
IWL_FW_INI_REGION_SPECIAL_DEVICE_MEMORY,
+ IWL_FW_INI_REGION_DBGI_SRAM,
IWL_FW_INI_REGION_NUM
}; /* FW_TLV_DEBUG_REGION_TYPE_API_E */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
index 0e38eb1cd75d..6bbb8b8c91cd 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#ifndef __iwl_fw_api_location_h__
#define __iwl_fw_api_location_h__
@@ -151,6 +151,10 @@ enum iwl_tof_mcsi_enable {
* is valid
* @IWL_TOF_RESPONDER_CMD_VALID_NDP_PARAMS: NDP parameters are valid
* @IWL_TOF_RESPONDER_CMD_VALID_LMR_FEEDBACK: LMR feedback support is valid
+ * @IWL_TOF_RESPONDER_CMD_VALID_SESSION_ID: session id flag is valid
+ * @IWL_TOF_RESPONDER_CMD_VALID_BSS_COLOR: the bss_color field is valid
+ * @IWL_TOF_RESPONDER_CMD_VALID_MIN_MAX_TIME_BETWEEN_MSR: the
+ * min_time_between_msr and max_time_between_msr fields are valid
*/
enum iwl_tof_responder_cmd_valid_field {
IWL_TOF_RESPONDER_CMD_VALID_CHAN_INFO = BIT(0),
@@ -169,6 +173,9 @@ enum iwl_tof_responder_cmd_valid_field {
IWL_TOF_RESPONDER_CMD_VALID_NDP_SUPPORT = BIT(22),
IWL_TOF_RESPONDER_CMD_VALID_NDP_PARAMS = BIT(23),
IWL_TOF_RESPONDER_CMD_VALID_LMR_FEEDBACK = BIT(24),
+ IWL_TOF_RESPONDER_CMD_VALID_SESSION_ID = BIT(25),
+ IWL_TOF_RESPONDER_CMD_VALID_BSS_COLOR = BIT(26),
+ IWL_TOF_RESPONDER_CMD_VALID_MIN_MAX_TIME_BETWEEN_MSR = BIT(27),
};
/**
@@ -186,6 +193,8 @@ enum iwl_tof_responder_cmd_valid_field {
* @IWL_TOF_RESPONDER_FLAGS_NDP_SUPPORT: support NDP ranging
* @IWL_TOF_RESPONDER_FLAGS_LMR_FEEDBACK: request for LMR feedback if the
* initiator supports it
+ * @IWL_TOF_RESPONDER_FLAGS_SESSION_ID: send the session id in the initial FTM
+ * frame.
*/
enum iwl_tof_responder_cfg_flags {
IWL_TOF_RESPONDER_FLAGS_NON_ASAP_SUPPORT = BIT(0),
@@ -200,6 +209,7 @@ enum iwl_tof_responder_cfg_flags {
IWL_TOF_RESPONDER_FLAGS_FTM_TX_ANT = RATE_MCS_ANT_ABC_MSK,
IWL_TOF_RESPONDER_FLAGS_NDP_SUPPORT = BIT(24),
IWL_TOF_RESPONDER_FLAGS_LMR_FEEDBACK = BIT(25),
+ IWL_TOF_RESPONDER_FLAGS_SESSION_ID = BIT(27),
};
/**
@@ -297,13 +307,13 @@ struct iwl_tof_responder_config_cmd_v7 {
* @r2i_ndp_params: parameters for R2I NDP.
* bits 0 - 2: max number of LTF repetitions
* bits 3 - 5: max number of spatial streams (supported values are < 2)
- * bits 6 - 7: max number of total LTFs
- * (&enum ieee80211_range_params_max_total_ltf)
+ * bits 6 - 7: max number of total LTFs see
+ * &enum ieee80211_range_params_max_total_ltf
* @i2r_ndp_params: parameters for I2R NDP.
* bits 0 - 2: max number of LTF repetitions
* bits 3 - 5: max number of spatial streams
- * bits 6 - 7: max number of total LTFs
- * (&enum ieee80211_range_params_max_total_ltf)
+ * bits 6 - 7: max number of total LTFs see
+ * &enum ieee80211_range_params_max_total_ltf
*/
struct iwl_tof_responder_config_cmd_v8 {
__le32 cmd_valid_fields;
@@ -322,6 +332,58 @@ struct iwl_tof_responder_config_cmd_v8 {
u8 i2r_ndp_params;
} __packed; /* TOF_RESPONDER_CONFIG_CMD_API_S_VER_8 */
+/**
+ * struct iwl_tof_responder_config_cmd_v9 - ToF AP mode (for debug)
+ * @cmd_valid_fields: &iwl_tof_responder_cmd_valid_field
+ * @responder_cfg_flags: &iwl_tof_responder_cfg_flags
+ * @format_bw: bits 0 - 3: &enum iwl_location_frame_format.
+ * bits 4 - 7: &enum iwl_location_bw.
+ * @bss_color: current AP bss_color
+ * @channel_num: current AP Channel
+ * @ctrl_ch_position: coding of the control channel position relative to
+ * the center frequency, see iwl_mvm_get_ctrl_pos()
+ * @sta_id: index of the AP STA when in AP mode
+ * @reserved1: reserved
+ * @toa_offset: Artificial addition [pSec] for the ToA - to be used for debug
+ * purposes, simulating station movement by adding various values
+ * to this field
+ * @common_calib: XVT: common calibration value
+ * @specific_calib: XVT: specific calibration value
+ * @bssid: Current AP BSSID
+ * @r2i_ndp_params: parameters for R2I NDP.
+ * bits 0 - 2: max number of LTF repetitions
+ * bits 3 - 5: max number of spatial streams (supported values are < 2)
+ * bits 6 - 7: max number of total LTFs see
+ * &enum ieee80211_range_params_max_total_ltf
+ * @i2r_ndp_params: parameters for I2R NDP.
+ * bits 0 - 2: max number of LTF repetitions
+ * bits 3 - 5: max number of spatial streams
+ * bits 6 - 7: max number of total LTFs see
+ * &enum ieee80211_range_params_max_total_ltf
+ * @min_time_between_msr: for non trigger based NDP ranging, minimum time
+ * between measurements in milliseconds.
+ * @max_time_between_msr: for non trigger based NDP ranging, maximum time
+ * between measurements in milliseconds.
+ */
+struct iwl_tof_responder_config_cmd_v9 {
+ __le32 cmd_valid_fields;
+ __le32 responder_cfg_flags;
+ u8 format_bw;
+ u8 bss_color;
+ u8 channel_num;
+ u8 ctrl_ch_position;
+ u8 sta_id;
+ u8 reserved1;
+ __le16 toa_offset;
+ __le16 common_calib;
+ __le16 specific_calib;
+ u8 bssid[ETH_ALEN];
+ u8 r2i_ndp_params;
+ u8 i2r_ndp_params;
+ __le16 min_time_between_msr;
+ __le16 max_time_between_msr;
+} __packed; /* TOF_RESPONDER_CONFIG_CMD_API_S_VER_8 */
+
#define IWL_LCI_CIVIC_IE_MAX_SIZE 400
/**
@@ -489,6 +551,10 @@ struct iwl_tof_range_req_ap_entry_v2 {
* instead of fw internal values.
* @IWL_INITIATOR_AP_FLAGS_PMF: request to protect the negotiation and LMR
* frames with protected management frames.
+ * @IWL_INITIATOR_AP_FLAGS_TERMINATE_ON_LMR_FEEDBACK: terminate the session if
+ * the responder asked for LMR feedback although the initiator did not set
+ * the LMR feedback bit in the FTM request. If not set, the initiator will
+ * continue with the session and will provide the LMR feedback.
*/
enum iwl_initiator_ap_flags {
IWL_INITIATOR_AP_FLAGS_ASAP = BIT(1),
@@ -504,6 +570,7 @@ enum iwl_initiator_ap_flags {
IWL_INITIATOR_AP_FLAGS_LMR_FEEDBACK = BIT(12),
IWL_INITIATOR_AP_FLAGS_USE_CALIB = BIT(13),
IWL_INITIATOR_AP_FLAGS_PMF = BIT(14),
+ IWL_INITIATOR_AP_FLAGS_TERMINATE_ON_LMR_FEEDBACK = BIT(15),
};
/**
@@ -795,6 +862,90 @@ struct iwl_tof_range_req_ap_entry_v8 {
} __packed; /* LOCATION_RANGE_REQ_AP_ENTRY_CMD_API_S_VER_8 */
/**
+ * struct iwl_tof_range_req_ap_entry_v9 - AP configuration parameters
+ * @initiator_ap_flags: see &enum iwl_initiator_ap_flags.
+ * @channel_num: AP Channel number
+ * @format_bw: bits 0 - 3: &enum iwl_location_frame_format.
+ * bits 4 - 7: &enum iwl_location_bw.
+ * @ctrl_ch_position: Coding of the control channel position relative to the
+ * center frequency, see iwl_mvm_get_ctrl_pos().
+ * @ftmr_max_retries: Max number of retries to send the FTMR in case of no
+ * reply from the AP.
+ * @bssid: AP's BSSID
+ * @burst_period: For EDCA based ranging: Recommended value to be sent to the
+ * AP. Measurement periodicity In units of 100ms. ignored if
+ * num_of_bursts_exp = 0.
+ * For non trigger based NDP ranging, the maximum time between
+ * measurements in units of milliseconds.
+ * @samples_per_burst: the number of FTMs pairs in single Burst (1-31);
+ * @num_of_bursts: Recommended value to be sent to the AP. 2s Exponent of
+ * the number of measurement iterations (min 2^0 = 1, max 2^14)
+ * @sta_id: the station id of the AP. Only relevant when associated to the AP,
+ * otherwise should be set to &IWL_MVM_INVALID_STA.
+ * @cipher: pairwise cipher suite for secured measurement.
+ * &enum iwl_location_cipher.
+ * @hltk: HLTK to be used for secured 11az measurement
+ * @tk: TK to be used for secured 11az measurement
+ * @calib: An array of calibration values per FTM rx bandwidth.
+ * If &IWL_INITIATOR_AP_FLAGS_USE_CALIB is set, the fw will use the
+ * calibration value that corresponds to the rx bandwidth of the FTM
+ * frame.
+ * @beacon_interval: beacon interval of the AP in TUs. Only required if
+ * &IWL_INITIATOR_AP_FLAGS_TB is set.
+ * @bss_color: the BSS color of the responder. Only valid if
+ * &IWL_INITIATOR_AP_FLAGS_TB or &IWL_INITIATOR_AP_FLAGS_NON_TB is set.
+ * @rx_pn: the next expected PN for protected management frames Rx. LE byte
+ * order. Only valid if &IWL_INITIATOR_AP_FLAGS_SECURED is set and sta_id
+ * is set to &IWL_MVM_INVALID_STA.
+ * @tx_pn: the next PN to use for protected management frames Tx. LE byte
+ * order. Only valid if &IWL_INITIATOR_AP_FLAGS_SECURED is set and sta_id
+ * is set to &IWL_MVM_INVALID_STA.
+ * @r2i_ndp_params: parameters for R2I NDP ranging negotiation.
+ * bits 0 - 2: max LTF repetitions
+ * bits 3 - 5: max number of spatial streams
+ * bits 6 - 7: reserved
+ * @i2r_ndp_params: parameters for I2R NDP ranging negotiation.
+ * bits 0 - 2: max LTF repetitions
+ * bits 3 - 5: max number of spatial streams (supported values are < 2)
+ * bits 6 - 7: reserved
+ * @r2i_max_total_ltf: R2I Max Total LTFs for NDP ranging negotiation.
+ * One of &enum ieee80211_range_params_max_total_ltf.
+ * @i2r_max_total_ltf: I2R Max Total LTFs for NDP ranging negotiation.
+ * One of &enum ieee80211_range_params_max_total_ltf.
+ * @bss_color: the BSS color of the responder. Only valid if
+ * &IWL_INITIATOR_AP_FLAGS_NON_TB or &IWL_INITIATOR_AP_FLAGS_TB is set.
+ * @band: 0 for 5.2 GHz, 1 for 2.4 GHz, 2 for 6GHz
+ * @min_time_between_msr: For non trigger based NDP ranging, the minimum time
+ * between measurements in units of milliseconds
+ */
+struct iwl_tof_range_req_ap_entry_v9 {
+ __le32 initiator_ap_flags;
+ u8 channel_num;
+ u8 format_bw;
+ u8 ctrl_ch_position;
+ u8 ftmr_max_retries;
+ u8 bssid[ETH_ALEN];
+ __le16 burst_period;
+ u8 samples_per_burst;
+ u8 num_of_bursts;
+ u8 sta_id;
+ u8 cipher;
+ u8 hltk[HLTK_11AZ_LEN];
+ u8 tk[TK_11AZ_LEN];
+ __le16 calib[IWL_TOF_BW_NUM];
+ u16 beacon_interval;
+ u8 rx_pn[IEEE80211_CCMP_PN_LEN];
+ u8 tx_pn[IEEE80211_CCMP_PN_LEN];
+ u8 r2i_ndp_params;
+ u8 i2r_ndp_params;
+ u8 r2i_max_total_ltf;
+ u8 i2r_max_total_ltf;
+ u8 bss_color;
+ u8 band;
+ __le16 min_time_between_msr;
+} __packed; /* LOCATION_RANGE_REQ_AP_ENTRY_CMD_API_S_VER_9 */
+
+/**
* enum iwl_tof_response_mode
* @IWL_MVM_TOF_RESPONSE_ASAP: report each AP measurement separately as soon as
* possible (not supported for this release)
@@ -1043,6 +1194,34 @@ struct iwl_tof_range_req_cmd_v12 {
struct iwl_tof_range_req_ap_entry_v8 ap[IWL_MVM_TOF_MAX_APS];
} __packed; /* LOCATION_RANGE_REQ_CMD_API_S_VER_12 */
+/**
+ * struct iwl_tof_range_req_cmd_v13 - start measurement cmd
+ * @initiator_flags: see flags @ iwl_tof_initiator_flags
+ * @request_id: A Token incremented per request. The same Token will be
+ * sent back in the range response
+ * @num_of_ap: Number of APs to measure (error if > IWL_MVM_TOF_MAX_APS)
+ * @range_req_bssid: ranging request BSSID
+ * @macaddr_mask: Bits set to 0 shall be copied from the MAC address template.
+ * Bits set to 1 shall be randomized by the UMAC
+ * @macaddr_template: MAC address template to use for non-randomized bits
+ * @req_timeout_ms: Requested timeout of the response in units of milliseconds.
+ * This is the session time for completing the measurement.
+ * @tsf_mac_id: report the measurement start time for each ap in terms of the
+ * TSF of this mac id. 0xff to disable TSF reporting.
+ * @ap: per-AP request data, see &struct iwl_tof_range_req_ap_entry_v9.
+ */
+struct iwl_tof_range_req_cmd_v13 {
+ __le32 initiator_flags;
+ u8 request_id;
+ u8 num_of_ap;
+ u8 range_req_bssid[ETH_ALEN];
+ u8 macaddr_mask[ETH_ALEN];
+ u8 macaddr_template[ETH_ALEN];
+ __le32 req_timeout_ms;
+ __le32 tsf_mac_id;
+ struct iwl_tof_range_req_ap_entry_v9 ap[IWL_MVM_TOF_MAX_APS];
+} __packed; /* LOCATION_RANGE_REQ_CMD_API_S_VER_13 */
+
/*
* enum iwl_tof_range_request_status - status of the sent request
* @IWL_TOF_RANGE_REQUEST_STATUS_SUCCESSFUL - FW successfully received the
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h b/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h
index 93084bbad534..7be7715b431d 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
* Copyright (C) 2017 Intel Deutschland GmbH
*/
#ifndef __iwl_fw_api_mac_h__
@@ -137,12 +137,14 @@ struct iwl_mac_data_ibss {
* early termination detection.
* @FLEXIBLE_TWT_SUPPORTED: AP supports flexible TWT schedule
* @PROTECTED_TWT_SUPPORTED: AP supports protected TWT frames (with 11w)
+ * @BROADCAST_TWT_SUPPORTED: AP and STA support broadcast TWT
*/
enum iwl_mac_data_policy {
TWT_SUPPORTED = BIT(0),
MORE_DATA_ACK_SUPPORTED = BIT(1),
FLEXIBLE_TWT_SUPPORTED = BIT(2),
PROTECTED_TWT_SUPPORTED = BIT(3),
+ BROADCAST_TWT_SUPPORTED = BIT(4),
};
/**
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/offload.h b/drivers/net/wireless/intel/iwlwifi/fw/api/offload.h
index f06214d418aa..5204aa94e72a 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/offload.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/offload.h
@@ -3,6 +3,7 @@
* Copyright (C) 2012-2014 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
+ * Copyright (C) 2021 Intel Corporation
*/
#ifndef __iwl_fw_api_offload_h__
#define __iwl_fw_api_offload_h__
@@ -20,7 +21,7 @@ enum iwl_prot_offload_subcmd_ids {
#define MAX_STORED_BEACON_SIZE 600
/**
- * struct iwl_stored_beacon_notif - Stored beacon notification
+ * struct iwl_stored_beacon_notif_common - Stored beacon notif common fields
*
* @system_time: system time on air rise
* @tsf: TSF on air rise
@@ -29,9 +30,8 @@ enum iwl_prot_offload_subcmd_ids {
* @channel: channel this beacon was received on
* @rates: rate in ucode internal format
* @byte_count: frame's byte count
- * @data: beacon data, length in @byte_count
*/
-struct iwl_stored_beacon_notif {
+struct iwl_stored_beacon_notif_common {
__le32 system_time;
__le64 tsf;
__le32 beacon_timestamp;
@@ -39,7 +39,32 @@ struct iwl_stored_beacon_notif {
__le16 channel;
__le32 rates;
__le32 byte_count;
+} __packed;
+
+/**
+ * struct iwl_stored_beacon_notif - Stored beacon notification
+ *
+ * @common: fields common for all versions
+ * @data: beacon data, length in @byte_count
+ */
+struct iwl_stored_beacon_notif_v2 {
+ struct iwl_stored_beacon_notif_common common;
u8 data[MAX_STORED_BEACON_SIZE];
} __packed; /* WOWLAN_STROED_BEACON_INFO_S_VER_2 */
+/**
+ * struct iwl_stored_beacon_notif_v3 - Stored beacon notification
+ *
+ * @common: fields common for all versions
+ * @sta_id: station for which the beacon was received
+ * @reserved: reserved for alignment
+ * @data: beacon data, length in @byte_count
+ */
+struct iwl_stored_beacon_notif_v3 {
+ struct iwl_stored_beacon_notif_common common;
+ u8 sta_id;
+ u8 reserved[3];
+ u8 data[MAX_STORED_BEACON_SIZE];
+} __packed; /* WOWLAN_STROED_BEACON_INFO_S_VER_3 */
+
#endif /* __iwl_fw_api_offload_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
index b2605aefc290..8b200379f7c2 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -874,7 +874,7 @@ struct iwl_scan_probe_params_v3 {
u8 reserved;
struct iwl_ssid_ie direct_scan[PROBE_OPTION_MAX];
__le32 short_ssid[SCAN_SHORT_SSID_MAX_SIZE];
- u8 bssid_array[ETH_ALEN][SCAN_BSSID_MAX_SIZE];
+ u8 bssid_array[SCAN_BSSID_MAX_SIZE][ETH_ALEN];
} __packed; /* SCAN_PROBE_PARAMS_API_S_VER_3 */
/**
@@ -894,7 +894,7 @@ struct iwl_scan_probe_params_v4 {
__le16 reserved;
struct iwl_ssid_ie direct_scan[PROBE_OPTION_MAX];
__le32 short_ssid[SCAN_SHORT_SSID_MAX_SIZE];
- u8 bssid_array[ETH_ALEN][SCAN_BSSID_MAX_SIZE];
+ u8 bssid_array[SCAN_BSSID_MAX_SIZE][ETH_ALEN];
} __packed; /* SCAN_PROBE_PARAMS_API_S_VER_4 */
#define SCAN_MAX_NUM_CHANS_V3 67
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h b/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
index 12b2f2c48387..f1a3e14880e7 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
* Copyright (C) 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -384,13 +384,17 @@ struct iwl_mvm_add_sta_key_cmd_v1 {
* @rx_mic_key: TKIP RX unicast or multicast key
* @tx_mic_key: TKIP TX key
* @transmit_seq_cnt: TSC, transmit packet number
+ *
+ * Note: This is used for both v2 and v3, the difference being
+ * in the way the common.rx_secur_seq_cnt is used, in v2 that's
+ * the strange hole format, in v3 it's just a u64.
*/
struct iwl_mvm_add_sta_key_cmd {
struct iwl_mvm_add_sta_key_common common;
__le64 rx_mic_key;
__le64 tx_mic_key;
__le64 transmit_seq_cnt;
-} __packed; /* ADD_MODIFY_STA_KEY_API_S_VER_2 */
+} __packed; /* ADD_MODIFY_STA_KEY_API_S_VER_2, ADD_MODIFY_STA_KEY_API_S_VER_3 */
/**
* enum iwl_mvm_add_sta_rsp_status - status in the response to ADD_STA command
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index df7c55e06f54..6dcafd0a3d4b 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -1517,6 +1517,37 @@ iwl_dump_ini_special_mem_iter(struct iwl_fw_runtime *fwrt,
return sizeof(*range) + le32_to_cpu(range->range_data_size);
}
+static int
+iwl_dump_ini_dbgi_sram_iter(struct iwl_fw_runtime *fwrt,
+ struct iwl_dump_ini_region_data *reg_data,
+ void *range_ptr, int idx)
+{
+ struct iwl_fw_ini_region_tlv *reg = (void *)reg_data->reg_tlv->data;
+ struct iwl_fw_ini_error_dump_range *range = range_ptr;
+ __le32 *val = range->data;
+ u32 prph_data;
+ int i;
+
+ if (!iwl_trans_grab_nic_access(fwrt->trans))
+ return -EBUSY;
+
+ range->range_data_size = reg->dev_addr.size;
+ iwl_write_prph_no_grab(fwrt->trans, DBGI_SRAM_TARGET_ACCESS_CFG,
+ DBGI_SRAM_TARGET_ACCESS_CFG_RESET_ADDRESS_MSK);
+ for (i = 0; i < (le32_to_cpu(reg->dev_addr.size) / 4); i++) {
+ prph_data = iwl_read_prph(fwrt->trans, (i % 2) ?
+ DBGI_SRAM_TARGET_ACCESS_RDATA_MSB :
+ DBGI_SRAM_TARGET_ACCESS_RDATA_LSB);
+ if (prph_data == 0x5a5a5a5a) {
+ iwl_trans_release_nic_access(fwrt->trans);
+ return -EBUSY;
+ }
+ *val++ = cpu_to_le32(prph_data);
+ }
+ iwl_trans_release_nic_access(fwrt->trans);
+ return sizeof(*range) + le32_to_cpu(range->range_data_size);
+}
+
static int iwl_dump_ini_fw_pkt_iter(struct iwl_fw_runtime *fwrt,
struct iwl_dump_ini_region_data *reg_data,
void *range_ptr, int idx)
@@ -1547,7 +1578,7 @@ iwl_dump_ini_mem_fill_header(struct iwl_fw_runtime *fwrt,
dump->header.version = cpu_to_le32(IWL_INI_DUMP_VER);
- return dump->ranges;
+ return dump->data;
}
/**
@@ -1611,7 +1642,7 @@ iwl_dump_ini_mon_fill_header(struct iwl_fw_runtime *fwrt,
data->header.version = cpu_to_le32(IWL_INI_DUMP_VER);
- return data->ranges;
+ return data->data;
}
static void *
@@ -1647,7 +1678,7 @@ iwl_dump_ini_err_table_fill_header(struct iwl_fw_runtime *fwrt,
dump->header.version = cpu_to_le32(IWL_INI_DUMP_VER);
dump->version = reg->err_table.version;
- return dump->ranges;
+ return dump->data;
}
static void *
@@ -1662,7 +1693,7 @@ iwl_dump_ini_special_mem_fill_header(struct iwl_fw_runtime *fwrt,
dump->type = reg->special_mem.type;
dump->version = reg->special_mem.version;
- return dump->ranges;
+ return dump->data;
}
static u32 iwl_dump_ini_mem_ranges(struct iwl_fw_runtime *fwrt,
@@ -2189,6 +2220,12 @@ static const struct iwl_dump_ini_mem_ops iwl_dump_ini_region_ops[] = {
.fill_mem_hdr = iwl_dump_ini_special_mem_fill_header,
.fill_range = iwl_dump_ini_special_mem_iter,
},
+ [IWL_FW_INI_REGION_DBGI_SRAM] = {
+ .get_num_of_ranges = iwl_dump_ini_mem_ranges,
+ .get_size = iwl_dump_ini_mem_get_size,
+ .fill_mem_hdr = iwl_dump_ini_mem_fill_header,
+ .fill_range = iwl_dump_ini_dbgi_sram_iter,
+ },
};
static u32 iwl_dump_ini_trigger(struct iwl_fw_runtime *fwrt,
@@ -2321,7 +2358,7 @@ static void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt,
return;
if (dump_data->monitor_only)
- dump_mask &= IWL_FW_ERROR_DUMP_FW_MONITOR;
+ dump_mask &= BIT(IWL_FW_ERROR_DUMP_FW_MONITOR);
fw_error_dump.trans_ptr = iwl_trans_dump_data(fwrt->trans, dump_mask);
file_len = le32_to_cpu(dump_file->file_len);
@@ -2530,51 +2567,6 @@ int iwl_fw_dbg_collect(struct iwl_fw_runtime *fwrt,
}
IWL_EXPORT_SYMBOL(iwl_fw_dbg_collect);
-int iwl_fw_dbg_ini_collect(struct iwl_fw_runtime *fwrt,
- struct iwl_fwrt_dump_data *dump_data)
-{
- struct iwl_fw_ini_trigger_tlv *trig = dump_data->trig;
- enum iwl_fw_ini_time_point tp_id = le32_to_cpu(trig->time_point);
- u32 occur, delay;
- unsigned long idx;
-
- if (!iwl_fw_ini_trigger_on(fwrt, trig)) {
- IWL_WARN(fwrt, "WRT: Trigger %d is not active, aborting dump\n",
- tp_id);
- return -EINVAL;
- }
-
- delay = le32_to_cpu(trig->dump_delay);
- occur = le32_to_cpu(trig->occurrences);
- if (!occur)
- return 0;
-
- trig->occurrences = cpu_to_le32(--occur);
-
- /* Check there is an available worker.
- * ffz return value is undefined if no zero exists,
- * so check against ~0UL first.
- */
- if (fwrt->dump.active_wks == ~0UL)
- return -EBUSY;
-
- idx = ffz(fwrt->dump.active_wks);
-
- if (idx >= IWL_FW_RUNTIME_DUMP_WK_NUM ||
- test_and_set_bit(fwrt->dump.wks[idx].idx, &fwrt->dump.active_wks))
- return -EBUSY;
-
- fwrt->dump.wks[idx].dump_data = *dump_data;
-
- IWL_WARN(fwrt,
- "WRT: Collecting data: ini trigger %d fired (delay=%dms).\n",
- tp_id, (u32)(delay / USEC_PER_MSEC));
-
- schedule_delayed_work(&fwrt->dump.wks[idx].wk, usecs_to_jiffies(delay));
-
- return 0;
-}
-
int iwl_fw_dbg_collect_trig(struct iwl_fw_runtime *fwrt,
struct iwl_fw_dbg_trigger_tlv *trigger,
const char *fmt, ...)
@@ -2703,6 +2695,58 @@ out:
clear_bit(wk_idx, &fwrt->dump.active_wks);
}
+int iwl_fw_dbg_ini_collect(struct iwl_fw_runtime *fwrt,
+ struct iwl_fwrt_dump_data *dump_data,
+ bool sync)
+{
+ struct iwl_fw_ini_trigger_tlv *trig = dump_data->trig;
+ enum iwl_fw_ini_time_point tp_id = le32_to_cpu(trig->time_point);
+ u32 occur, delay;
+ unsigned long idx;
+
+ if (!iwl_fw_ini_trigger_on(fwrt, trig)) {
+ IWL_WARN(fwrt, "WRT: Trigger %d is not active, aborting dump\n",
+ tp_id);
+ return -EINVAL;
+ }
+
+ delay = le32_to_cpu(trig->dump_delay);
+ occur = le32_to_cpu(trig->occurrences);
+ if (!occur)
+ return 0;
+
+ trig->occurrences = cpu_to_le32(--occur);
+
+ /* Check there is an available worker.
+ * ffz return value is undefined if no zero exists,
+ * so check against ~0UL first.
+ */
+ if (fwrt->dump.active_wks == ~0UL)
+ return -EBUSY;
+
+ idx = ffz(fwrt->dump.active_wks);
+
+ if (idx >= IWL_FW_RUNTIME_DUMP_WK_NUM ||
+ test_and_set_bit(fwrt->dump.wks[idx].idx, &fwrt->dump.active_wks))
+ return -EBUSY;
+
+ fwrt->dump.wks[idx].dump_data = *dump_data;
+
+ if (sync)
+ delay = 0;
+
+ IWL_WARN(fwrt,
+ "WRT: Collecting data: ini trigger %d fired (delay=%dms).\n",
+ tp_id, (u32)(delay / USEC_PER_MSEC));
+
+ schedule_delayed_work(&fwrt->dump.wks[idx].wk, usecs_to_jiffies(delay));
+
+ if (sync)
+ iwl_fw_dbg_collect_sync(fwrt, idx);
+
+ return 0;
+}
+
void iwl_fw_error_dump_wk(struct work_struct *work)
{
struct iwl_fwrt_wk_data *wks =
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
index c0e84ef84f5d..8c3c890066b0 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
@@ -46,7 +46,8 @@ int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt,
int iwl_fw_dbg_error_collect(struct iwl_fw_runtime *fwrt,
enum iwl_fw_dbg_trigger trig_type);
int iwl_fw_dbg_ini_collect(struct iwl_fw_runtime *fwrt,
- struct iwl_fwrt_dump_data *dump_data);
+ struct iwl_fwrt_dump_data *dump_data,
+ bool sync);
int iwl_fw_dbg_collect(struct iwl_fw_runtime *fwrt,
enum iwl_fw_dbg_trigger trig, const char *str,
size_t len, struct iwl_fw_dbg_trigger_tlv *trigger);
@@ -284,7 +285,7 @@ static inline void iwl_fw_umac_set_alive_err_table(struct iwl_trans *trans,
trans->dbg.umac_error_event_table = umac_error_event_table;
}
-static inline void iwl_fw_error_collect(struct iwl_fw_runtime *fwrt)
+static inline void iwl_fw_error_collect(struct iwl_fw_runtime *fwrt, bool sync)
{
enum iwl_fw_ini_time_point tp_id;
@@ -300,7 +301,7 @@ static inline void iwl_fw_error_collect(struct iwl_fw_runtime *fwrt)
tp_id = IWL_FW_INI_TIME_POINT_FW_ASSERT;
}
- iwl_dbg_tlv_time_point(fwrt, tp_id, NULL);
+ _iwl_dbg_tlv_time_point(fwrt, tp_id, NULL, sync);
}
void iwl_fw_error_print_fseq_regs(struct iwl_fw_runtime *fwrt);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
index 9fffac903b93..521ca2bb0e92 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2014, 2018-2021 Intel Corporation
* Copyright (C) 2014-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -305,11 +305,12 @@ struct iwl_fw_ini_error_dump_header {
/**
* struct iwl_fw_ini_error_dump - ini region dump
* @header: the header of this region
- * @ranges: the memory ranges of this region
+ * @data: data of memory ranges in this region,
+ * see &struct iwl_fw_ini_error_dump_range
*/
struct iwl_fw_ini_error_dump {
struct iwl_fw_ini_error_dump_header header;
- struct iwl_fw_ini_error_dump_range ranges[];
+ u8 data[];
} __packed;
/* This bit is used to differentiate between lmac and umac rxf */
@@ -399,12 +400,13 @@ struct iwl_fw_ini_dump_info {
* struct iwl_fw_ini_err_table_dump - ini error table dump
* @header: header of the region
* @version: error table version
- * @ranges: the memory ranges of this this region
+ * @data: data of memory ranges in this region,
+ * see &struct iwl_fw_ini_error_dump_range
*/
struct iwl_fw_ini_err_table_dump {
struct iwl_fw_ini_error_dump_header header;
__le32 version;
- struct iwl_fw_ini_error_dump_range ranges[];
+ u8 data[];
} __packed;
/**
@@ -427,14 +429,15 @@ struct iwl_fw_error_dump_rb {
* @write_ptr: write pointer position in the buffer
* @cycle_cnt: cycles count
* @cur_frag: current fragment in use
- * @ranges: the memory ranges of this this region
+ * @data: data of memory ranges in this region,
+ * see &struct iwl_fw_ini_error_dump_range
*/
struct iwl_fw_ini_monitor_dump {
struct iwl_fw_ini_error_dump_header header;
__le32 write_ptr;
__le32 cycle_cnt;
__le32 cur_frag;
- struct iwl_fw_ini_error_dump_range ranges[];
+ u8 data[];
} __packed;
/**
@@ -442,13 +445,14 @@ struct iwl_fw_ini_monitor_dump {
* @header: header of the region
* @type: type of special memory
* @version: struct special memory version
- * @ranges: the memory ranges of this this region
+ * @data: data of memory ranges in this region,
+ * see &struct iwl_fw_ini_error_dump_range
*/
struct iwl_fw_ini_special_device_memory {
struct iwl_fw_ini_error_dump_header header;
__le16 type;
__le16 version;
- struct iwl_fw_ini_error_dump_range ranges[];
+ u8 data[];
} __packed;
/**
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index 9a8c7b7a0816..6c8e9f3a6af2 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -414,6 +414,7 @@ enum iwl_ucode_tlv_capa {
IWL_UCODE_TLV_CAPA_PROTECTED_TWT = (__force iwl_ucode_tlv_capa_t)56,
IWL_UCODE_TLV_CAPA_FW_RESET_HANDSHAKE = (__force iwl_ucode_tlv_capa_t)57,
IWL_UCODE_TLV_CAPA_PASSIVE_6GHZ_SCAN = (__force iwl_ucode_tlv_capa_t)58,
+ IWL_UCODE_TLV_CAPA_HIDDEN_6GHZ_SCAN = (__force iwl_ucode_tlv_capa_t)59,
IWL_UCODE_TLV_CAPA_BROADCAST_TWT = (__force iwl_ucode_tlv_capa_t)60,
/* set 2 */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
index 2403490cbc26..314ed90c23dd 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
@@ -24,7 +24,7 @@ static bool iwl_pnvm_complete_fn(struct iwl_notif_wait_data *notif_wait,
struct iwl_pnvm_init_complete_ntfy *pnvm_ntf = (void *)pkt->data;
IWL_DEBUG_FW(trans,
- "PNVM complete notification received with status %d\n",
+ "PNVM complete notification received with status 0x%0x\n",
le32_to_cpu(pnvm_ntf->status));
return true;
@@ -37,6 +37,7 @@ static int iwl_pnvm_handle_section(struct iwl_trans *trans, const u8 *data,
u32 sha1 = 0;
u16 mac_type = 0, rf_id = 0;
u8 *pnvm_data = NULL, *tmp;
+ bool hw_match = false;
u32 size = 0;
int ret;
@@ -83,6 +84,9 @@ static int iwl_pnvm_handle_section(struct iwl_trans *trans, const u8 *data,
break;
}
+ if (hw_match)
+ break;
+
mac_type = le16_to_cpup((__le16 *)data);
rf_id = le16_to_cpup((__le16 *)(data + sizeof(__le16)));
@@ -90,15 +94,9 @@ static int iwl_pnvm_handle_section(struct iwl_trans *trans, const u8 *data,
"Got IWL_UCODE_TLV_HW_TYPE mac_type 0x%0x rf_id 0x%0x\n",
mac_type, rf_id);
- if (mac_type != CSR_HW_REV_TYPE(trans->hw_rev) ||
- rf_id != CSR_HW_RFID_TYPE(trans->hw_rf_id)) {
- IWL_DEBUG_FW(trans,
- "HW mismatch, skipping PNVM section, mac_type 0x%0x, rf_id 0x%0x.\n",
- CSR_HW_REV_TYPE(trans->hw_rev), trans->hw_rf_id);
- ret = -ENOENT;
- goto out;
- }
-
+ if (mac_type == CSR_HW_REV_TYPE(trans->hw_rev) &&
+ rf_id == CSR_HW_RFID_TYPE(trans->hw_rf_id))
+ hw_match = true;
break;
case IWL_UCODE_TLV_SEC_RT: {
struct iwl_pnvm_section *section = (void *)data;
@@ -149,6 +147,15 @@ static int iwl_pnvm_handle_section(struct iwl_trans *trans, const u8 *data,
}
done:
+ if (!hw_match) {
+ IWL_DEBUG_FW(trans,
+ "HW mismatch, skipping PNVM section (need mac_type 0x%x rf_id 0x%x)\n",
+ CSR_HW_REV_TYPE(trans->hw_rev),
+ CSR_HW_RFID_TYPE(trans->hw_rf_id));
+ ret = -ENOENT;
+ goto out;
+ }
+
if (!size) {
IWL_DEBUG_FW(trans, "Empty PNVM, skipping.\n");
ret = -ENOENT;
@@ -223,19 +230,10 @@ static int iwl_pnvm_parse(struct iwl_trans *trans, const u8 *data,
static int iwl_pnvm_get_from_fs(struct iwl_trans *trans, u8 **data, size_t *len)
{
const struct firmware *pnvm;
- char pnvm_name[64];
+ char pnvm_name[MAX_PNVM_NAME];
int ret;
- /*
- * The prefix unfortunately includes a hyphen at the end, so
- * don't add the dot here...
- */
- snprintf(pnvm_name, sizeof(pnvm_name), "%spnvm",
- trans->cfg->fw_name_pre);
-
- /* ...but replace the hyphen with the dot here. */
- if (strlen(trans->cfg->fw_name_pre) < sizeof(pnvm_name))
- pnvm_name[strlen(trans->cfg->fw_name_pre) - 1] = '.';
+ iwl_pnvm_get_fs_name(trans, pnvm_name, sizeof(pnvm_name));
ret = firmware_request_nowarn(&pnvm, pnvm_name, trans->dev);
if (ret) {
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.h b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.h
index 61d3d4e0b7d9..203c367dd4de 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.h
@@ -12,7 +12,27 @@
#define MVM_UCODE_PNVM_TIMEOUT (HZ / 4)
+#define MAX_PNVM_NAME 64
+
int iwl_pnvm_load(struct iwl_trans *trans,
struct iwl_notif_wait_data *notif_wait);
+static inline
+void iwl_pnvm_get_fs_name(struct iwl_trans *trans,
+ u8 *pnvm_name, size_t max_len)
+{
+ int pre_len;
+
+ /*
+ * The prefix unfortunately includes a hyphen at the end, so
+ * don't add the dot here...
+ */
+ snprintf(pnvm_name, max_len, "%spnvm", trans->cfg->fw_name_pre);
+
+ /* ...but replace the hyphen with the dot here. */
+ pre_len = strlen(trans->cfg->fw_name_pre);
+ if (pre_len < max_len && pre_len > 0)
+ pnvm_name[pre_len - 1] = '.';
+}
+
#endif /* __IWL_PNVM_H__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index bf6ee56d4d96..7eb534df5331 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -33,6 +33,7 @@ enum iwl_device_family {
IWL_DEVICE_FAMILY_9000,
IWL_DEVICE_FAMILY_22000,
IWL_DEVICE_FAMILY_AX210,
+ IWL_DEVICE_FAMILY_BZ,
};
/*
@@ -321,7 +322,7 @@ struct iwl_fw_mon_regs {
* @host_interrupt_operation_mode: device needs host interrupt operation
* mode set
* @nvm_hw_section_num: the ID of the HW NVM section
- * @mac_addr_from_csr: read HW address from CSR registers
+ * @mac_addr_from_csr: read HW address from CSR registers at this offset
* @features: hw features, any combination of feature_passlist
* @pwr_tx_backoffs: translation table between power limits and backoffs
* @max_tx_agg_size: max TX aggregation size of the ADDBA request/response
@@ -343,6 +344,8 @@ struct iwl_fw_mon_regs {
* supports 256 BA aggregation
* @num_rbds: number of receive buffer descriptors to use
* (only used for multi-queue capable devices)
+ * @mac_addr_csr_base: CSR base register for MAC address access, if not set
+ * assume 0x380
*
* We enable the driver to be backward compatible wrt. hardware features.
* API differences in uCode shouldn't be handled here but through TLVs
@@ -378,7 +381,7 @@ struct iwl_cfg {
internal_wimax_coex:1,
host_interrupt_operation_mode:1,
high_temp:1,
- mac_addr_from_csr:1,
+ mac_addr_from_csr:10,
lp_xtal_workaround:1,
disable_dummy_notification:1,
apmg_not_supported:1,
@@ -512,6 +515,7 @@ extern const char iwl_ax211_name[];
extern const char iwl_ax221_name[];
extern const char iwl_ax231_name[];
extern const char iwl_ax411_name[];
+extern const char iwl_bz_name[];
#if IS_ENABLED(CONFIG_IWLDVM)
extern const struct iwl_cfg iwl5300_agn_cfg;
extern const struct iwl_cfg iwl5100_agn_cfg;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
index 47e5a17c0f48..cf796403c45c 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
@@ -104,6 +104,10 @@
/* GIO Chicken Bits (PCI Express bus link power management) */
#define CSR_GIO_CHICKEN_BITS (CSR_BASE+0x100)
+/* Doorbell NMI (since Bz) */
+#define CSR_DOORBELL_VECTOR (CSR_BASE + 0x130)
+#define CSR_DOORBELL_VECTOR_NMI BIT(1)
+
/* host chicken bits */
#define CSR_HOST_CHICKEN (CSR_BASE + 0x204)
#define CSR_HOST_CHICKEN_PM_IDLE_SRC_DIS_SB_PME BIT(19)
@@ -266,6 +270,14 @@
#define CSR_GP_CNTRL_REG_FLAG_RFKILL_WAKE_L1A_EN (0x04000000)
#define CSR_GP_CNTRL_REG_FLAG_HW_RF_KILL_SW (0x08000000)
+/* From Bz we use these instead during init/reset flow */
+#define CSR_GP_CNTRL_REG_FLAG_MAC_INIT BIT(6)
+#define CSR_GP_CNTRL_REG_FLAG_ROM_START BIT(7)
+#define CSR_GP_CNTRL_REG_FLAG_MAC_STATUS BIT(20)
+#define CSR_GP_CNTRL_REG_FLAG_BZ_MAC_ACCESS_REQ BIT(21)
+#define CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_STATUS BIT(28)
+#define CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_REQ BIT(29)
+#define CSR_GP_CNTRL_REG_FLAG_SW_RESET BIT(31)
/* HW REV */
#define CSR_HW_REV_DASH(_val) (((_val) & 0x0000003) >> 0)
@@ -604,10 +616,10 @@ enum msix_hw_int_causes {
* HW address related registers *
*****************************************************************************/
-#define CSR_ADDR_BASE (0x380)
-#define CSR_MAC_ADDR0_OTP (CSR_ADDR_BASE)
-#define CSR_MAC_ADDR1_OTP (CSR_ADDR_BASE + 4)
-#define CSR_MAC_ADDR0_STRAP (CSR_ADDR_BASE + 8)
-#define CSR_MAC_ADDR1_STRAP (CSR_ADDR_BASE + 0xC)
+#define CSR_ADDR_BASE(trans) ((trans)->cfg->mac_addr_from_csr)
+#define CSR_MAC_ADDR0_OTP(trans) (CSR_ADDR_BASE(trans) + 0x00)
+#define CSR_MAC_ADDR1_OTP(trans) (CSR_ADDR_BASE(trans) + 0x04)
+#define CSR_MAC_ADDR0_STRAP(trans) (CSR_ADDR_BASE(trans) + 0x08)
+#define CSR_MAC_ADDR1_STRAP(trans) (CSR_ADDR_BASE(trans) + 0x0c)
#endif /* !__iwl_csr_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
index 0ddd255a8cc1..125479b5c0d6 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
@@ -131,8 +131,7 @@ static int iwl_dbg_tlv_alloc_buf_alloc(struct iwl_trans *trans,
goto err;
if (buf_location == IWL_FW_INI_LOCATION_SRAM_PATH &&
- alloc_id != IWL_FW_INI_ALLOCATION_ID_DBGC1 &&
- alloc_id != IWL_FW_INI_ALLOCATION_ID_INTERNAL)
+ alloc_id != IWL_FW_INI_ALLOCATION_ID_DBGC1)
goto err;
trans->dbg.fw_mon_cfg[alloc_id] = *alloc;
@@ -435,13 +434,16 @@ static int iwl_dbg_tlv_parse_bin(struct iwl_trans *trans, const u8 *data,
void iwl_dbg_tlv_load_bin(struct device *dev, struct iwl_trans *trans)
{
const struct firmware *fw;
+ const char *yoyo_bin = "iwl-debug-yoyo.bin";
int res;
if (!iwlwifi_mod_params.enable_ini ||
trans->trans_cfg->device_family <= IWL_DEVICE_FAMILY_9000)
return;
- res = firmware_request_nowarn(&fw, "iwl-debug-yoyo.bin", dev);
+ res = firmware_request_nowarn(&fw, yoyo_bin, dev);
+ IWL_DEBUG_FW(trans, "%s %s\n", res ? "didn't load" : "loaded", yoyo_bin);
+
if (res)
return;
@@ -621,6 +623,7 @@ static int iwl_dbg_tlv_apply_buffer(struct iwl_fw_runtime *fwrt,
.id = WIDE_ID(DEBUG_GROUP, BUFFER_ALLOCATION),
.data[0] = &data,
.len[0] = sizeof(data),
+ .flags = CMD_SEND_IN_RFKILL,
};
int ret, j;
@@ -683,7 +686,7 @@ static void iwl_dbg_tlv_periodic_trig_handler(struct timer_list *t)
};
int ret;
- ret = iwl_fw_dbg_ini_collect(timer_node->fwrt, &dump_data);
+ ret = iwl_fw_dbg_ini_collect(timer_node->fwrt, &dump_data, false);
if (!ret || ret == -EBUSY) {
u32 occur = le32_to_cpu(dump_data.trig->occurrences);
u32 collect_interval = le32_to_cpu(dump_data.trig->data[0]);
@@ -927,7 +930,7 @@ static bool iwl_dbg_tlv_check_fw_pkt(struct iwl_fw_runtime *fwrt,
}
static int
-iwl_dbg_tlv_tp_trigger(struct iwl_fw_runtime *fwrt,
+iwl_dbg_tlv_tp_trigger(struct iwl_fw_runtime *fwrt, bool sync,
struct list_head *active_trig_list,
union iwl_dbg_tlv_tp_data *tp_data,
bool (*data_check)(struct iwl_fw_runtime *fwrt,
@@ -946,7 +949,7 @@ iwl_dbg_tlv_tp_trigger(struct iwl_fw_runtime *fwrt,
int ret, i;
if (!num_data) {
- ret = iwl_fw_dbg_ini_collect(fwrt, &dump_data);
+ ret = iwl_fw_dbg_ini_collect(fwrt, &dump_data, sync);
if (ret)
return ret;
}
@@ -955,7 +958,7 @@ iwl_dbg_tlv_tp_trigger(struct iwl_fw_runtime *fwrt,
if (!data_check ||
data_check(fwrt, &dump_data, tp_data,
le32_to_cpu(dump_data.trig->data[i]))) {
- ret = iwl_fw_dbg_ini_collect(fwrt, &dump_data);
+ ret = iwl_fw_dbg_ini_collect(fwrt, &dump_data, sync);
if (ret)
return ret;
@@ -1043,9 +1046,10 @@ static void iwl_dbg_tlv_init_cfg(struct iwl_fw_runtime *fwrt)
}
}
-void iwl_dbg_tlv_time_point(struct iwl_fw_runtime *fwrt,
- enum iwl_fw_ini_time_point tp_id,
- union iwl_dbg_tlv_tp_data *tp_data)
+void _iwl_dbg_tlv_time_point(struct iwl_fw_runtime *fwrt,
+ enum iwl_fw_ini_time_point tp_id,
+ union iwl_dbg_tlv_tp_data *tp_data,
+ bool sync)
{
struct list_head *hcmd_list, *trig_list;
@@ -1060,12 +1064,12 @@ void iwl_dbg_tlv_time_point(struct iwl_fw_runtime *fwrt,
switch (tp_id) {
case IWL_FW_INI_TIME_POINT_EARLY:
iwl_dbg_tlv_init_cfg(fwrt);
- iwl_dbg_tlv_tp_trigger(fwrt, trig_list, tp_data, NULL);
+ iwl_dbg_tlv_tp_trigger(fwrt, sync, trig_list, tp_data, NULL);
break;
case IWL_FW_INI_TIME_POINT_AFTER_ALIVE:
iwl_dbg_tlv_apply_buffers(fwrt);
iwl_dbg_tlv_send_hcmds(fwrt, hcmd_list);
- iwl_dbg_tlv_tp_trigger(fwrt, trig_list, tp_data, NULL);
+ iwl_dbg_tlv_tp_trigger(fwrt, sync, trig_list, tp_data, NULL);
break;
case IWL_FW_INI_TIME_POINT_PERIODIC:
iwl_dbg_tlv_set_periodic_trigs(fwrt);
@@ -1075,13 +1079,13 @@ void iwl_dbg_tlv_time_point(struct iwl_fw_runtime *fwrt,
case IWL_FW_INI_TIME_POINT_MISSED_BEACONS:
case IWL_FW_INI_TIME_POINT_FW_DHC_NOTIFICATION:
iwl_dbg_tlv_send_hcmds(fwrt, hcmd_list);
- iwl_dbg_tlv_tp_trigger(fwrt, trig_list, tp_data,
+ iwl_dbg_tlv_tp_trigger(fwrt, sync, trig_list, tp_data,
iwl_dbg_tlv_check_fw_pkt);
break;
default:
iwl_dbg_tlv_send_hcmds(fwrt, hcmd_list);
- iwl_dbg_tlv_tp_trigger(fwrt, trig_list, tp_data, NULL);
+ iwl_dbg_tlv_tp_trigger(fwrt, sync, trig_list, tp_data, NULL);
break;
}
}
-IWL_EXPORT_SYMBOL(iwl_dbg_tlv_time_point);
+IWL_EXPORT_SYMBOL(_iwl_dbg_tlv_time_point);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.h b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.h
index 92c720527946..c12b1fd3f479 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#ifndef __iwl_dbg_tlv_h__
#define __iwl_dbg_tlv_h__
@@ -48,9 +48,25 @@ void iwl_dbg_tlv_free(struct iwl_trans *trans);
void iwl_dbg_tlv_alloc(struct iwl_trans *trans, const struct iwl_ucode_tlv *tlv,
bool ext);
void iwl_dbg_tlv_init(struct iwl_trans *trans);
-void iwl_dbg_tlv_time_point(struct iwl_fw_runtime *fwrt,
- enum iwl_fw_ini_time_point tp_id,
- union iwl_dbg_tlv_tp_data *tp_data);
+void _iwl_dbg_tlv_time_point(struct iwl_fw_runtime *fwrt,
+ enum iwl_fw_ini_time_point tp_id,
+ union iwl_dbg_tlv_tp_data *tp_data,
+ bool sync);
+
+static inline void iwl_dbg_tlv_time_point(struct iwl_fw_runtime *fwrt,
+ enum iwl_fw_ini_time_point tp_id,
+ union iwl_dbg_tlv_tp_data *tp_data)
+{
+ _iwl_dbg_tlv_time_point(fwrt, tp_id, tp_data, false);
+}
+
+static inline void iwl_dbg_tlv_time_point_sync(struct iwl_fw_runtime *fwrt,
+ enum iwl_fw_ini_time_point tp_id,
+ union iwl_dbg_tlv_tp_data *tp_data)
+{
+ _iwl_dbg_tlv_time_point(fwrt, tp_id, tp_data, true);
+}
+
void iwl_dbg_tlv_del_timers(struct iwl_trans *trans);
#endif /* __iwl_dbg_tlv_h__*/
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index 977dce686bdb..77124b8b235e 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -78,7 +78,7 @@ enum {
};
/* Protects the table contents, i.e. the ops pointer & drv list */
-static struct mutex iwlwifi_opmode_table_mtx;
+static DEFINE_MUTEX(iwlwifi_opmode_table_mtx);
static struct iwlwifi_opmode_table {
const char *name; /* name: iwldvm, iwlmvm, etc */
const struct iwl_op_mode_ops *ops; /* pointer to op_mode ops */
@@ -1754,8 +1754,6 @@ static int __init iwl_drv_init(void)
{
int i, err;
- mutex_init(&iwlwifi_opmode_table_mtx);
-
for (i = 0; i < ARRAY_SIZE(iwlwifi_opmode_table); i++)
INIT_LIST_HEAD(&iwlwifi_opmode_table[i].drv);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.c b/drivers/net/wireless/intel/iwlwifi/iwl-io.c
index 33d42e08d5b8..2517c4ae07ab 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-io.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2003-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2003-2014, 2018-2021 Intel Corporation
* Copyright (C) 2015-2016 Intel Deutschland GmbH
*/
#include <linux/delay.h>
@@ -213,9 +213,12 @@ void iwl_force_nmi(struct iwl_trans *trans)
else if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_AX210)
iwl_write_umac_prph(trans, UREG_NIC_SET_NMI_DRIVER,
UREG_NIC_SET_NMI_DRIVER_NMI_FROM_DRIVER);
- else
+ else if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_BZ)
iwl_write_umac_prph(trans, UREG_DOORBELL_TO_ISR6,
UREG_DOORBELL_TO_ISR6_NMI_BIT);
+ else
+ iwl_write32(trans, CSR_DOORBELL_VECTOR,
+ CSR_DOORBELL_VECTOR_NMI);
}
IWL_EXPORT_SYMBOL(iwl_force_nmi);
@@ -398,6 +401,7 @@ int iwl_dump_fh(struct iwl_trans *trans, char **buf)
int iwl_finish_nic_init(struct iwl_trans *trans,
const struct iwl_cfg_trans_params *cfg_trans)
{
+ u32 poll_ready;
int err;
if (cfg_trans->bisr_workaround) {
@@ -409,7 +413,16 @@ int iwl_finish_nic_init(struct iwl_trans *trans,
* Set "initialization complete" bit to move adapter from
* D0U* --> D0A* (powered-up active) state.
*/
- iwl_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+ if (cfg_trans->device_family >= IWL_DEVICE_FAMILY_BZ) {
+ iwl_set_bit(trans, CSR_GP_CNTRL,
+ CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY |
+ CSR_GP_CNTRL_REG_FLAG_MAC_INIT);
+ poll_ready = CSR_GP_CNTRL_REG_FLAG_MAC_STATUS;
+ } else {
+ iwl_set_bit(trans, CSR_GP_CNTRL,
+ CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+ poll_ready = CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY;
+ }
if (cfg_trans->device_family == IWL_DEVICE_FAMILY_8000)
udelay(2);
@@ -419,10 +432,7 @@ int iwl_finish_nic_init(struct iwl_trans *trans,
* device-internal resources is supported, e.g. iwl_write_prph()
* and accesses to uCode SRAM.
*/
- err = iwl_poll_bit(trans, CSR_GP_CNTRL,
- CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
- CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
- 25000);
+ err = iwl_poll_bit(trans, CSR_GP_CNTRL, poll_ready, poll_ready, 25000);
if (err < 0)
IWL_DEBUG_INFO(trans, "Failed to wake NIC\n");
@@ -468,5 +478,5 @@ void iwl_trans_sync_nmi_with_addr(struct iwl_trans *trans, u32 inta_addr,
if (interrupts_enabled)
iwl_trans_interrupts(trans, true);
- iwl_trans_fw_error(trans);
+ iwl_trans_fw_error(trans, false);
}
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index 850648ebd61c..475f951d4b1e 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -549,7 +549,8 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
.mac_cap_info[2] =
IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP,
.mac_cap_info[3] =
- IEEE80211_HE_MAC_CAP3_OMI_CONTROL,
+ IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
+ IEEE80211_HE_MAC_CAP3_RX_CTRL_FRAME_TO_MULTIBSS,
.mac_cap_info[4] =
IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU |
IEEE80211_HE_MAC_CAP4_MULTI_TID_AGG_TX_QOS_B39,
@@ -568,7 +569,8 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A |
IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD,
.phy_cap_info[2] =
- IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US,
+ IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US |
+ IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ,
.phy_cap_info[3] =
IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_NO_DCM |
IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_1 |
@@ -595,6 +597,8 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB |
IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB |
IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_RESERVED,
+ .phy_cap_info[10] =
+ IEEE80211_HE_PHY_CAP10_HE_MU_M1RU_MAX_LTF,
},
/*
* Set default Tx/Rx HE MCS NSS Support field.
@@ -634,6 +638,7 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
.phy_cap_info[1] =
IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD,
.phy_cap_info[2] =
+ IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ |
IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US,
.phy_cap_info[3] =
IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_NO_DCM |
@@ -742,6 +747,8 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans,
IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
if ((tx_chains & rx_chains) == ANT_AB) {
+ iftype_data->he_cap.he_cap_elem.phy_cap_info[2] |=
+ IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ;
iftype_data->he_cap.he_cap_elem.phy_cap_info[5] |=
IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_2 |
IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2;
@@ -958,8 +965,10 @@ static void iwl_flip_hw_address(__le32 mac_addr0, __le32 mac_addr1, u8 *dest)
static void iwl_set_hw_address_from_csr(struct iwl_trans *trans,
struct iwl_nvm_data *data)
{
- __le32 mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_STRAP));
- __le32 mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_STRAP));
+ __le32 mac_addr0 = cpu_to_le32(iwl_read32(trans,
+ CSR_MAC_ADDR0_STRAP(trans)));
+ __le32 mac_addr1 = cpu_to_le32(iwl_read32(trans,
+ CSR_MAC_ADDR1_STRAP(trans)));
iwl_flip_hw_address(mac_addr0, mac_addr1, data->hw_addr);
/*
@@ -969,8 +978,8 @@ static void iwl_set_hw_address_from_csr(struct iwl_trans *trans,
if (is_valid_ether_addr(data->hw_addr))
return;
- mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_OTP));
- mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_OTP));
+ mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_OTP(trans)));
+ mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_OTP(trans)));
iwl_flip_hw_address(mac_addr0, mac_addr1, data->hw_addr);
}
@@ -1373,6 +1382,25 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
reg_query_regdb_wmm(regd->alpha2, center_freq, rule);
}
+ /*
+ * Certain firmware versions might report no valid channels
+ * if booted in RF-kill, i.e. not all calibrations etc. are
+ * running. We'll get out of this situation later when the
+ * rfkill is removed and we update the regdomain again, but
+ * since cfg80211 doesn't accept an empty regdomain, add a
+ * dummy (unusable) rule here in this case so we can init.
+ */
+ if (!valid_rules) {
+ valid_rules = 1;
+ rule = &regd->reg_rules[valid_rules - 1];
+ rule->freq_range.start_freq_khz = MHZ_TO_KHZ(2412);
+ rule->freq_range.end_freq_khz = MHZ_TO_KHZ(2413);
+ rule->freq_range.max_bandwidth_khz = MHZ_TO_KHZ(1);
+ rule->power_rule.max_antenna_gain = DBI_TO_MBI(6);
+ rule->power_rule.max_eirp =
+ DBM_TO_MBM(IWL_DEFAULT_MAX_TX_POWER);
+ }
+
regd->n_reg_rules = valid_rules;
/*
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h b/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h
index cf9c64090014..af5f9b210f22 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h
@@ -78,7 +78,7 @@ struct iwl_cfg;
* there are Tx packets pending in the transport layer.
* Must be atomic
* @nic_error: error notification. Must be atomic and must be called with BH
- * disabled.
+ * disabled, unless the sync parameter is true.
* @cmd_queue_full: Called when the command queue gets full. Must be atomic and
* called with BH disabled.
* @nic_config: configure NIC, called before firmware is started.
@@ -102,7 +102,7 @@ struct iwl_op_mode_ops {
void (*queue_not_full)(struct iwl_op_mode *op_mode, int queue);
bool (*hw_rf_kill)(struct iwl_op_mode *op_mode, bool state);
void (*free_skb)(struct iwl_op_mode *op_mode, struct sk_buff *skb);
- void (*nic_error)(struct iwl_op_mode *op_mode);
+ void (*nic_error)(struct iwl_op_mode *op_mode, bool sync);
void (*cmd_queue_full)(struct iwl_op_mode *op_mode);
void (*nic_config)(struct iwl_op_mode *op_mode);
void (*wimax_active)(struct iwl_op_mode *op_mode);
@@ -181,9 +181,9 @@ static inline void iwl_op_mode_free_skb(struct iwl_op_mode *op_mode,
op_mode->ops->free_skb(op_mode, skb);
}
-static inline void iwl_op_mode_nic_error(struct iwl_op_mode *op_mode)
+static inline void iwl_op_mode_nic_error(struct iwl_op_mode *op_mode, bool sync)
{
- op_mode->ops->nic_error(op_mode);
+ op_mode->ops->nic_error(op_mode, sync);
}
static inline void iwl_op_mode_cmd_queue_full(struct iwl_op_mode *op_mode)
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
index 9a9e714bf9af..d0a7d58336a9 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
@@ -348,6 +348,13 @@
#define RFIC_REG_RD 0xAD0470
#define WFPM_CTRL_REG 0xA03030
#define WFPM_GP2 0xA030B4
+
+/* DBGI SRAM Register details */
+#define DBGI_SRAM_TARGET_ACCESS_CFG 0x00A2E14C
+#define DBGI_SRAM_TARGET_ACCESS_CFG_RESET_ADDRESS_MSK 0x10000
+#define DBGI_SRAM_TARGET_ACCESS_RDATA_LSB 0x00A2E154
+#define DBGI_SRAM_TARGET_ACCESS_RDATA_MSB 0x00A2E158
+
enum {
ENABLE_WFPM = BIT(31),
WFPM_AUX_CTL_AUX_IF_MAC_OWNER_MSK = 0x80000000,
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index 0199d7a5a648..8f0ff540f439 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -887,7 +887,7 @@ struct iwl_trans_txqs {
bool bc_table_dword;
u8 page_offs;
u8 dev_cmd_offs;
- struct __percpu iwl_tso_hdr_page * tso_hdr_page;
+ struct iwl_tso_hdr_page __percpu *tso_hdr_page;
struct {
u8 fifo;
@@ -1385,14 +1385,14 @@ iwl_trans_release_nic_access(struct iwl_trans *trans)
__release(nic_access);
}
-static inline void iwl_trans_fw_error(struct iwl_trans *trans)
+static inline void iwl_trans_fw_error(struct iwl_trans *trans, bool sync)
{
if (WARN_ON_ONCE(!trans->op_mode))
return;
/* prevent double restarts due to the same erroneous FW */
if (!test_and_set_bit(STATUS_FW_ERROR, &trans->status)) {
- iwl_op_mode_nic_error(trans->op_mode);
+ iwl_op_mode_nic_error(trans->op_mode, sync);
trans->state = IWL_TRANS_NO_FW;
}
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
index 1343f25f1090..9d0d01f27d92 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
- * Copyright (C) 2013-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2013-2014, 2018-2021 Intel Corporation
* Copyright (C) 2015 Intel Deutschland GmbH
*/
#ifndef __MVM_CONSTANTS_H
@@ -93,6 +93,7 @@
#define IWL_MVM_ENABLE_EBS 1
#define IWL_MVM_FTM_INITIATOR_ALGO IWL_TOF_ALGO_TYPE_MAX_LIKE
#define IWL_MVM_FTM_INITIATOR_DYNACK true
+#define IWL_MVM_FTM_LMR_FEEDBACK_TERMINATE false
#define IWL_MVM_FTM_R2I_MAX_REP 7
#define IWL_MVM_FTM_I2R_MAX_REP 7
#define IWL_MVM_FTM_R2I_MAX_STS 1
@@ -102,6 +103,8 @@
#define IWL_MVM_FTM_INITIATOR_SECURE_LTF false
#define IWL_MVM_FTM_RESP_NDP_SUPPORT true
#define IWL_MVM_FTM_RESP_LMR_FEEDBACK_SUPPORT true
+#define IWL_MVM_FTM_NON_TB_MIN_TIME_BETWEEN_MSR 5
+#define IWL_MVM_FTM_NON_TB_MAX_TIME_BETWEEN_MSR 1000
#define IWL_MVM_D3_DEBUG false
#define IWL_MVM_USE_TWT true
#define IWL_MVM_AMPDU_CONSEC_DROPS_DELBA 10
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 6a259d867d90..0e97d5e6c644 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -101,11 +101,8 @@ static const u8 *iwl_mvm_find_max_pn(struct ieee80211_key_conf *key,
return ret;
}
-struct wowlan_key_data {
- struct iwl_wowlan_rsc_tsc_params_cmd *rsc_tsc;
- struct iwl_wowlan_tkip_params_cmd *tkip;
- struct iwl_wowlan_kek_kck_material_cmd_v4 *kek_kck_cmd;
- bool error, use_rsc_tsc, use_tkip, configure_keys;
+struct wowlan_key_reprogram_data {
+ bool error;
int wep_key_idx;
};
@@ -117,15 +114,8 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
{
struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
- struct wowlan_key_data *data = _data;
- struct aes_sc *aes_sc, *aes_tx_sc = NULL;
- struct tkip_sc *tkip_sc, *tkip_tx_sc = NULL;
- struct iwl_p1k_cache *rx_p1ks;
- u8 *rx_mic_key;
- struct ieee80211_key_seq seq;
- u32 cur_rx_iv32 = 0;
- u16 p1k[IWL_P1K_SIZE];
- int ret, i;
+ struct wowlan_key_reprogram_data *data = _data;
+ int ret;
switch (key->cipher) {
case WLAN_CIPHER_SUITE_WEP40:
@@ -162,18 +152,14 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
wkc.wep_key.key_offset = data->wep_key_idx;
}
- if (data->configure_keys) {
- mutex_lock(&mvm->mutex);
- ret = iwl_mvm_send_cmd_pdu(mvm, WEP_KEY, 0,
- sizeof(wkc), &wkc);
- data->error = ret != 0;
-
- mvm->ptk_ivlen = key->iv_len;
- mvm->ptk_icvlen = key->icv_len;
- mvm->gtk_ivlen = key->iv_len;
- mvm->gtk_icvlen = key->icv_len;
- mutex_unlock(&mvm->mutex);
- }
+ mutex_lock(&mvm->mutex);
+ ret = iwl_mvm_send_cmd_pdu(mvm, WEP_KEY, 0, sizeof(wkc), &wkc);
+ data->error = ret != 0;
+
+ mvm->ptk_ivlen = key->iv_len;
+ mvm->ptk_icvlen = key->icv_len;
+ mvm->gtk_ivlen = key->iv_len;
+ mvm->gtk_icvlen = key->icv_len;
/* don't upload key again */
return;
@@ -183,10 +169,8 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
return;
case WLAN_CIPHER_SUITE_BIP_GMAC_256:
case WLAN_CIPHER_SUITE_BIP_GMAC_128:
- data->kek_kck_cmd->igtk_cipher = cpu_to_le32(STA_KEY_FLG_GCMP);
return;
case WLAN_CIPHER_SUITE_AES_CMAC:
- data->kek_kck_cmd->igtk_cipher = cpu_to_le32(STA_KEY_FLG_CCM);
/*
* Ignore CMAC keys -- the WoWLAN firmware doesn't support them
* but we also shouldn't abort suspend due to that. It does have
@@ -196,6 +180,58 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
*/
return;
case WLAN_CIPHER_SUITE_TKIP:
+ case WLAN_CIPHER_SUITE_CCMP:
+ case WLAN_CIPHER_SUITE_GCMP:
+ case WLAN_CIPHER_SUITE_GCMP_256:
+ break;
+ }
+
+ mutex_lock(&mvm->mutex);
+ /*
+ * The D3 firmware hardcodes the key offset 0 as the key it
+ * uses to transmit packets to the AP, i.e. the PTK.
+ */
+ if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) {
+ mvm->ptk_ivlen = key->iv_len;
+ mvm->ptk_icvlen = key->icv_len;
+ ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, 0);
+ } else {
+ /*
+ * firmware only supports TSC/RSC for a single key,
+ * so if there are multiple keep overwriting them
+ * with new ones -- this relies on mac80211 doing
+ * list_add_tail().
+ */
+ mvm->gtk_ivlen = key->iv_len;
+ mvm->gtk_icvlen = key->icv_len;
+ ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, 1);
+ }
+ mutex_unlock(&mvm->mutex);
+ data->error = ret != 0;
+}
+
+struct wowlan_key_rsc_tsc_data {
+ struct iwl_wowlan_rsc_tsc_params_cmd_v4 *rsc_tsc;
+ bool have_rsc_tsc;
+};
+
+static void iwl_mvm_wowlan_get_rsc_tsc_data(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta,
+ struct ieee80211_key_conf *key,
+ void *_data)
+{
+ struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+ struct wowlan_key_rsc_tsc_data *data = _data;
+ struct aes_sc *aes_sc;
+ struct tkip_sc *tkip_sc, *tkip_tx_sc = NULL;
+ struct ieee80211_key_seq seq;
+ int i;
+
+ switch (key->cipher) {
+ default:
+ break;
+ case WLAN_CIPHER_SUITE_TKIP:
if (sta) {
u64 pn64;
@@ -204,28 +240,12 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
tkip_tx_sc =
&data->rsc_tsc->params.all_tsc_rsc.tkip.tsc;
- rx_p1ks = data->tkip->rx_uni;
-
pn64 = atomic64_read(&key->tx_pn);
tkip_tx_sc->iv16 = cpu_to_le16(TKIP_PN_TO_IV16(pn64));
tkip_tx_sc->iv32 = cpu_to_le32(TKIP_PN_TO_IV32(pn64));
-
- ieee80211_get_tkip_p1k_iv(key, TKIP_PN_TO_IV32(pn64),
- p1k);
- iwl_mvm_convert_p1k(p1k, data->tkip->tx.p1k);
-
- memcpy(data->tkip->mic_keys.tx,
- &key->key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY],
- IWL_MIC_KEY_SIZE);
-
- rx_mic_key = data->tkip->mic_keys.rx_unicast;
} else {
tkip_sc =
data->rsc_tsc->params.all_tsc_rsc.tkip.multicast_rsc;
- rx_p1ks = data->tkip->rx_multi;
- rx_mic_key = data->tkip->mic_keys.rx_mcast;
- data->kek_kck_cmd->gtk_cipher =
- cpu_to_le32(STA_KEY_FLG_TKIP);
}
/*
@@ -237,29 +257,15 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
ieee80211_get_key_rx_seq(key, i, &seq);
tkip_sc[i].iv16 = cpu_to_le16(seq.tkip.iv16);
tkip_sc[i].iv32 = cpu_to_le32(seq.tkip.iv32);
- /* wrapping isn't allowed, AP must rekey */
- if (seq.tkip.iv32 > cur_rx_iv32)
- cur_rx_iv32 = seq.tkip.iv32;
}
- ieee80211_get_tkip_rx_p1k(key, vif->bss_conf.bssid,
- cur_rx_iv32, p1k);
- iwl_mvm_convert_p1k(p1k, rx_p1ks[0].p1k);
- ieee80211_get_tkip_rx_p1k(key, vif->bss_conf.bssid,
- cur_rx_iv32 + 1, p1k);
- iwl_mvm_convert_p1k(p1k, rx_p1ks[1].p1k);
-
- memcpy(rx_mic_key,
- &key->key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY],
- IWL_MIC_KEY_SIZE);
-
- data->use_tkip = true;
- data->use_rsc_tsc = true;
+ data->have_rsc_tsc = true;
break;
case WLAN_CIPHER_SUITE_CCMP:
case WLAN_CIPHER_SUITE_GCMP:
case WLAN_CIPHER_SUITE_GCMP_256:
if (sta) {
+ struct aes_sc *aes_tx_sc;
u64 pn64;
aes_sc =
@@ -272,10 +278,6 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
} else {
aes_sc =
data->rsc_tsc->params.all_tsc_rsc.aes.multicast_rsc;
- data->kek_kck_cmd->gtk_cipher =
- key->cipher == WLAN_CIPHER_SUITE_CCMP ?
- cpu_to_le32(STA_KEY_FLG_CCM) :
- cpu_to_le32(STA_KEY_FLG_GCMP);
}
/*
@@ -320,35 +322,301 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
((u64)pn[0] << 40));
}
}
- data->use_rsc_tsc = true;
+ data->have_rsc_tsc = true;
break;
}
+}
- IWL_DEBUG_WOWLAN(mvm, "GTK cipher %d\n", data->kek_kck_cmd->gtk_cipher);
+struct wowlan_key_rsc_v5_data {
+ struct iwl_wowlan_rsc_tsc_params_cmd *rsc;
+ bool have_rsc;
+ int gtks;
+ int gtk_ids[4];
+};
- if (data->configure_keys) {
- mutex_lock(&mvm->mutex);
+static void iwl_mvm_wowlan_get_rsc_v5_data(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta,
+ struct ieee80211_key_conf *key,
+ void *_data)
+{
+ struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+ struct wowlan_key_rsc_v5_data *data = _data;
+ struct ieee80211_key_seq seq;
+ __le64 *rsc;
+ int i;
+
+ /* only for ciphers that can be PTK/GTK */
+ switch (key->cipher) {
+ default:
+ return;
+ case WLAN_CIPHER_SUITE_TKIP:
+ case WLAN_CIPHER_SUITE_CCMP:
+ case WLAN_CIPHER_SUITE_GCMP:
+ case WLAN_CIPHER_SUITE_GCMP_256:
+ break;
+ }
+
+ if (sta) {
+ rsc = data->rsc->ucast_rsc;
+ } else {
+ if (WARN_ON(data->gtks > ARRAY_SIZE(data->gtk_ids)))
+ return;
+ data->gtk_ids[data->gtks] = key->keyidx;
+ rsc = data->rsc->mcast_rsc[data->gtks % 2];
+ if (WARN_ON(key->keyidx >
+ ARRAY_SIZE(data->rsc->mcast_key_id_map)))
+ return;
+ data->rsc->mcast_key_id_map[key->keyidx] = data->gtks % 2;
+ if (data->gtks >= 2) {
+ int prev = data->gtks - 2;
+ int prev_idx = data->gtk_ids[prev];
+
+ data->rsc->mcast_key_id_map[prev_idx] =
+ IWL_MCAST_KEY_MAP_INVALID;
+ }
+ data->gtks++;
+ }
+
+ switch (key->cipher) {
+ default:
+ WARN_ON(1);
+ break;
+ case WLAN_CIPHER_SUITE_TKIP:
+
+ /*
+ * For non-QoS this relies on the fact that both the uCode and
+ * mac80211 use TID 0 (as they need to to avoid replay attacks)
+ * for checking the IV in the frames.
+ */
+ for (i = 0; i < IWL_MAX_TID_COUNT; i++) {
+ ieee80211_get_key_rx_seq(key, i, &seq);
+
+ rsc[i] = cpu_to_le64(((u64)seq.tkip.iv32 << 16) |
+ seq.tkip.iv16);
+ }
+
+ data->have_rsc = true;
+ break;
+ case WLAN_CIPHER_SUITE_CCMP:
+ case WLAN_CIPHER_SUITE_GCMP:
+ case WLAN_CIPHER_SUITE_GCMP_256:
/*
- * The D3 firmware hardcodes the key offset 0 as the key it
- * uses to transmit packets to the AP, i.e. the PTK.
+ * For non-QoS this relies on the fact that both the uCode and
+ * mac80211/our RX code use TID 0 for checking the PN.
*/
- if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) {
- mvm->ptk_ivlen = key->iv_len;
- mvm->ptk_icvlen = key->icv_len;
- ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, 0);
+ if (sta) {
+ struct iwl_mvm_sta *mvmsta;
+ struct iwl_mvm_key_pn *ptk_pn;
+ const u8 *pn;
+
+ mvmsta = iwl_mvm_sta_from_mac80211(sta);
+ rcu_read_lock();
+ ptk_pn = rcu_dereference(mvmsta->ptk_pn[key->keyidx]);
+ if (WARN_ON(!ptk_pn)) {
+ rcu_read_unlock();
+ break;
+ }
+
+ for (i = 0; i < IWL_MAX_TID_COUNT; i++) {
+ pn = iwl_mvm_find_max_pn(key, ptk_pn, &seq, i,
+ mvm->trans->num_rx_queues);
+ rsc[i] = cpu_to_le64((u64)pn[5] |
+ ((u64)pn[4] << 8) |
+ ((u64)pn[3] << 16) |
+ ((u64)pn[2] << 24) |
+ ((u64)pn[1] << 32) |
+ ((u64)pn[0] << 40));
+ }
+
+ rcu_read_unlock();
} else {
- /*
- * firmware only supports TSC/RSC for a single key,
- * so if there are multiple keep overwriting them
- * with new ones -- this relies on mac80211 doing
- * list_add_tail().
- */
- mvm->gtk_ivlen = key->iv_len;
- mvm->gtk_icvlen = key->icv_len;
- ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, 1);
+ for (i = 0; i < IWL_MAX_TID_COUNT; i++) {
+ u8 *pn = seq.ccmp.pn;
+
+ ieee80211_get_key_rx_seq(key, i, &seq);
+ rsc[i] = cpu_to_le64((u64)pn[5] |
+ ((u64)pn[4] << 8) |
+ ((u64)pn[3] << 16) |
+ ((u64)pn[2] << 24) |
+ ((u64)pn[1] << 32) |
+ ((u64)pn[0] << 40));
+ }
}
- mutex_unlock(&mvm->mutex);
- data->error = ret != 0;
+ data->have_rsc = true;
+ break;
+ }
+}
+
+static int iwl_mvm_wowlan_config_rsc_tsc(struct iwl_mvm *mvm,
+ struct ieee80211_vif *vif)
+{
+ struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+ int ver = iwl_fw_lookup_cmd_ver(mvm->fw, LONG_GROUP,
+ WOWLAN_TSC_RSC_PARAM,
+ IWL_FW_CMD_VER_UNKNOWN);
+ int ret;
+
+ if (ver == 5) {
+ struct wowlan_key_rsc_v5_data data = {};
+ int i;
+
+ data.rsc = kmalloc(sizeof(*data.rsc), GFP_KERNEL);
+ if (!data.rsc)
+ return -ENOMEM;
+
+ memset(data.rsc, 0xff, sizeof(*data.rsc));
+
+ for (i = 0; i < ARRAY_SIZE(data.rsc->mcast_key_id_map); i++)
+ data.rsc->mcast_key_id_map[i] =
+ IWL_MCAST_KEY_MAP_INVALID;
+ data.rsc->sta_id = cpu_to_le32(mvmvif->ap_sta_id);
+
+ ieee80211_iter_keys(mvm->hw, vif,
+ iwl_mvm_wowlan_get_rsc_v5_data,
+ &data);
+
+ if (data.have_rsc)
+ ret = iwl_mvm_send_cmd_pdu(mvm, WOWLAN_TSC_RSC_PARAM,
+ CMD_ASYNC, sizeof(*data.rsc),
+ data.rsc);
+ else
+ ret = 0;
+ kfree(data.rsc);
+ } else if (ver == 4 || ver == 2 || ver == IWL_FW_CMD_VER_UNKNOWN) {
+ struct wowlan_key_rsc_tsc_data data = {};
+ int size;
+
+ data.rsc_tsc = kzalloc(sizeof(*data.rsc_tsc), GFP_KERNEL);
+ if (!data.rsc_tsc)
+ return -ENOMEM;
+
+ if (ver == 4) {
+ size = sizeof(*data.rsc_tsc);
+ data.rsc_tsc->sta_id = cpu_to_le32(mvmvif->ap_sta_id);
+ } else {
+ /* ver == 2 || ver == IWL_FW_CMD_VER_UNKNOWN */
+ size = sizeof(data.rsc_tsc->params);
+ }
+
+ ieee80211_iter_keys(mvm->hw, vif,
+ iwl_mvm_wowlan_get_rsc_tsc_data,
+ &data);
+
+ if (data.have_rsc_tsc)
+ ret = iwl_mvm_send_cmd_pdu(mvm, WOWLAN_TSC_RSC_PARAM,
+ CMD_ASYNC, size,
+ data.rsc_tsc);
+ else
+ ret = 0;
+ kfree(data.rsc_tsc);
+ } else {
+ ret = 0;
+ WARN_ON_ONCE(1);
+ }
+
+ return ret;
+}
+
+struct wowlan_key_tkip_data {
+ struct iwl_wowlan_tkip_params_cmd tkip;
+ bool have_tkip_keys;
+};
+
+static void iwl_mvm_wowlan_get_tkip_data(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta,
+ struct ieee80211_key_conf *key,
+ void *_data)
+{
+ struct wowlan_key_tkip_data *data = _data;
+ struct iwl_p1k_cache *rx_p1ks;
+ u8 *rx_mic_key;
+ struct ieee80211_key_seq seq;
+ u32 cur_rx_iv32 = 0;
+ u16 p1k[IWL_P1K_SIZE];
+ int i;
+
+ switch (key->cipher) {
+ default:
+ break;
+ case WLAN_CIPHER_SUITE_TKIP:
+ if (sta) {
+ u64 pn64;
+
+ rx_p1ks = data->tkip.rx_uni;
+
+ pn64 = atomic64_read(&key->tx_pn);
+
+ ieee80211_get_tkip_p1k_iv(key, TKIP_PN_TO_IV32(pn64),
+ p1k);
+ iwl_mvm_convert_p1k(p1k, data->tkip.tx.p1k);
+
+ memcpy(data->tkip.mic_keys.tx,
+ &key->key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY],
+ IWL_MIC_KEY_SIZE);
+
+ rx_mic_key = data->tkip.mic_keys.rx_unicast;
+ } else {
+ rx_p1ks = data->tkip.rx_multi;
+ rx_mic_key = data->tkip.mic_keys.rx_mcast;
+ }
+
+ for (i = 0; i < IWL_NUM_RSC; i++) {
+ /* wrapping isn't allowed, AP must rekey */
+ if (seq.tkip.iv32 > cur_rx_iv32)
+ cur_rx_iv32 = seq.tkip.iv32;
+ }
+
+ ieee80211_get_tkip_rx_p1k(key, vif->bss_conf.bssid,
+ cur_rx_iv32, p1k);
+ iwl_mvm_convert_p1k(p1k, rx_p1ks[0].p1k);
+ ieee80211_get_tkip_rx_p1k(key, vif->bss_conf.bssid,
+ cur_rx_iv32 + 1, p1k);
+ iwl_mvm_convert_p1k(p1k, rx_p1ks[1].p1k);
+
+ memcpy(rx_mic_key,
+ &key->key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY],
+ IWL_MIC_KEY_SIZE);
+
+ data->have_tkip_keys = true;
+ break;
+ }
+}
+
+struct wowlan_key_gtk_type_iter {
+ struct iwl_wowlan_kek_kck_material_cmd_v4 *kek_kck_cmd;
+};
+
+static void iwl_mvm_wowlan_gtk_type_iter(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta,
+ struct ieee80211_key_conf *key,
+ void *_data)
+{
+ struct wowlan_key_gtk_type_iter *data = _data;
+
+ switch (key->cipher) {
+ default:
+ return;
+ case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+ case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+ data->kek_kck_cmd->igtk_cipher = cpu_to_le32(STA_KEY_FLG_GCMP);
+ return;
+ case WLAN_CIPHER_SUITE_AES_CMAC:
+ data->kek_kck_cmd->igtk_cipher = cpu_to_le32(STA_KEY_FLG_CCM);
+ return;
+ case WLAN_CIPHER_SUITE_CCMP:
+ if (!sta)
+ data->kek_kck_cmd->gtk_cipher =
+ cpu_to_le32(STA_KEY_FLG_CCM);
+ break;
+ case WLAN_CIPHER_SUITE_GCMP:
+ case WLAN_CIPHER_SUITE_GCMP_256:
+ if (!sta)
+ data->kek_kck_cmd->gtk_cipher =
+ cpu_to_le32(STA_KEY_FLG_GCMP);
+ break;
}
}
@@ -713,109 +981,81 @@ iwl_mvm_get_wowlan_config(struct iwl_mvm *mvm,
}
static int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm,
- struct ieee80211_vif *vif,
- u32 cmd_flags)
+ struct ieee80211_vif *vif)
{
- struct iwl_wowlan_kek_kck_material_cmd_v4 kek_kck_cmd = {};
- struct iwl_wowlan_kek_kck_material_cmd_v4 *_kek_kck_cmd = &kek_kck_cmd;
- struct iwl_wowlan_tkip_params_cmd tkip_cmd = {};
bool unified = fw_has_capa(&mvm->fw->ucode_capa,
IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG);
- struct wowlan_key_data key_data = {
- .configure_keys = !unified,
- .use_rsc_tsc = false,
- .tkip = &tkip_cmd,
- .use_tkip = false,
- .kek_kck_cmd = _kek_kck_cmd,
- };
+ struct wowlan_key_reprogram_data key_data = {};
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
int ret;
u8 cmd_ver;
size_t cmd_size;
- key_data.rsc_tsc = kzalloc(sizeof(*key_data.rsc_tsc), GFP_KERNEL);
- if (!key_data.rsc_tsc)
- return -ENOMEM;
-
- /*
- * if we have to configure keys, call ieee80211_iter_keys(),
- * as we need non-atomic context in order to take the
- * required locks.
- */
- /*
- * Note that currently we don't propagate cmd_flags
- * to the iterator. In case of key_data.configure_keys,
- * all the configured commands are SYNC, and
- * iwl_mvm_wowlan_program_keys() will take care of
- * locking/unlocking mvm->mutex.
- */
- ieee80211_iter_keys(mvm->hw, vif, iwl_mvm_wowlan_program_keys,
- &key_data);
+ if (!unified) {
+ /*
+ * if we have to configure keys, call ieee80211_iter_keys(),
+ * as we need non-atomic context in order to take the
+ * required locks.
+ */
+ /*
+ * Note that currently we don't use CMD_ASYNC in the iterator.
+ * In case of key_data.configure_keys, all the configured
+ * commands are SYNC, and iwl_mvm_wowlan_program_keys() will
+ * take care of locking/unlocking mvm->mutex.
+ */
+ ieee80211_iter_keys(mvm->hw, vif, iwl_mvm_wowlan_program_keys,
+ &key_data);
- if (key_data.error) {
- ret = -EIO;
- goto out;
+ if (key_data.error)
+ return -EIO;
}
- if (key_data.use_rsc_tsc) {
- int ver = iwl_fw_lookup_cmd_ver(mvm->fw, LONG_GROUP,
- WOWLAN_TSC_RSC_PARAM,
- IWL_FW_CMD_VER_UNKNOWN);
- int size;
-
- if (ver == 4) {
- size = sizeof(*key_data.rsc_tsc);
- key_data.rsc_tsc->sta_id =
- cpu_to_le32(mvmvif->ap_sta_id);
-
- } else if (ver == 2 || ver == IWL_FW_CMD_VER_UNKNOWN) {
- size = sizeof(key_data.rsc_tsc->params);
- } else {
- ret = 0;
- WARN_ON_ONCE(1);
- goto out;
- }
-
- ret = iwl_mvm_send_cmd_pdu(mvm, WOWLAN_TSC_RSC_PARAM,
- cmd_flags,
- size,
- key_data.rsc_tsc);
-
- if (ret)
- goto out;
- }
+ ret = iwl_mvm_wowlan_config_rsc_tsc(mvm, vif);
+ if (ret)
+ return ret;
- if (key_data.use_tkip &&
- !fw_has_api(&mvm->fw->ucode_capa,
+ if (!fw_has_api(&mvm->fw->ucode_capa,
IWL_UCODE_TLV_API_TKIP_MIC_KEYS)) {
int ver = iwl_fw_lookup_cmd_ver(mvm->fw, LONG_GROUP,
WOWLAN_TKIP_PARAM,
IWL_FW_CMD_VER_UNKNOWN);
+ struct wowlan_key_tkip_data tkip_data = {};
int size;
if (ver == 2) {
- size = sizeof(tkip_cmd);
- key_data.tkip->sta_id =
+ size = sizeof(tkip_data.tkip);
+ tkip_data.tkip.sta_id =
cpu_to_le32(mvmvif->ap_sta_id);
} else if (ver == 1 || ver == IWL_FW_CMD_VER_UNKNOWN) {
size = sizeof(struct iwl_wowlan_tkip_params_cmd_ver_1);
} else {
- ret = -EINVAL;
WARN_ON_ONCE(1);
- goto out;
+ return -EINVAL;
}
- /* send relevant data according to CMD version */
- ret = iwl_mvm_send_cmd_pdu(mvm,
- WOWLAN_TKIP_PARAM,
- cmd_flags, size,
- &tkip_cmd);
- if (ret)
- goto out;
+ ieee80211_iter_keys(mvm->hw, vif, iwl_mvm_wowlan_get_tkip_data,
+ &tkip_data);
+
+ if (tkip_data.have_tkip_keys) {
+ /* send relevant data according to CMD version */
+ ret = iwl_mvm_send_cmd_pdu(mvm,
+ WOWLAN_TKIP_PARAM,
+ CMD_ASYNC, size,
+ &tkip_data.tkip);
+ if (ret)
+ return ret;
+ }
}
/* configure rekey data only if offloaded rekey is supported (d3) */
if (mvmvif->rekey_data.valid) {
+ struct iwl_wowlan_kek_kck_material_cmd_v4 kek_kck_cmd = {};
+ struct iwl_wowlan_kek_kck_material_cmd_v4 *_kek_kck_cmd =
+ &kek_kck_cmd;
+ struct wowlan_key_gtk_type_iter gtk_type_data = {
+ .kek_kck_cmd = _kek_kck_cmd,
+ };
+
cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw,
IWL_ALWAYS_LONG_GROUP,
WOWLAN_KEK_KCK_MATERIAL,
@@ -824,6 +1064,9 @@ static int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm,
cmd_ver != IWL_FW_CMD_VER_UNKNOWN))
return -EINVAL;
+ ieee80211_iter_keys(mvm->hw, vif, iwl_mvm_wowlan_gtk_type_iter,
+ &gtk_type_data);
+
memcpy(kek_kck_cmd.kck, mvmvif->rekey_data.kck,
mvmvif->rekey_data.kck_len);
kek_kck_cmd.kck_len = cpu_to_le16(mvmvif->rekey_data.kck_len);
@@ -851,17 +1094,13 @@ static int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm,
IWL_DEBUG_WOWLAN(mvm, "setting akm %d\n",
mvmvif->rekey_data.akm);
- ret = iwl_mvm_send_cmd_pdu(mvm,
- WOWLAN_KEK_KCK_MATERIAL, cmd_flags,
- cmd_size,
- _kek_kck_cmd);
+ ret = iwl_mvm_send_cmd_pdu(mvm, WOWLAN_KEK_KCK_MATERIAL,
+ CMD_ASYNC, cmd_size, _kek_kck_cmd);
if (ret)
- goto out;
+ return ret;
}
- ret = 0;
-out:
- kfree(key_data.rsc_tsc);
- return ret;
+
+ return 0;
}
static int
@@ -893,7 +1132,7 @@ iwl_mvm_wowlan_config(struct iwl_mvm *mvm,
* that isn't really a problem though.
*/
mutex_unlock(&mvm->mutex);
- ret = iwl_mvm_wowlan_config_key_params(mvm, vif, CMD_ASYNC);
+ ret = iwl_mvm_wowlan_config_key_params(mvm, vif);
mutex_lock(&mvm->mutex);
if (ret)
return ret;
@@ -1694,9 +1933,12 @@ iwl_mvm_send_wowlan_get_status(struct iwl_mvm *mvm, u8 sta_id)
status->gtk[0] = v7->gtk[0];
status->igtk[0] = v7->igtk[0];
- } else if (notif_ver == 9 || notif_ver == 10) {
+ } else if (notif_ver == 9 || notif_ver == 10 || notif_ver == 11) {
struct iwl_wowlan_status_v9 *v9 = (void *)cmd.resp_pkt->data;
+ /* these three command versions have same layout and size, the
+ * difference is only in a few not used (reserved) fields.
+ */
status = iwl_mvm_parse_wowlan_status_common_v9(mvm,
cmd.resp_pkt->data,
len);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index 95f883aba148..5dc39fbb74d6 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -305,7 +305,6 @@ static ssize_t iwl_dbgfs_sar_geo_profile_read(struct file *file,
int pos = 0;
int bufsz = sizeof(buf);
int tbl_idx;
- u8 *value;
if (!iwl_mvm_firmware_running(mvm))
return -EIO;
@@ -321,16 +320,18 @@ static ssize_t iwl_dbgfs_sar_geo_profile_read(struct file *file,
pos = scnprintf(buf, bufsz,
"SAR geographic profile disabled\n");
} else {
- value = &mvm->fwrt.geo_profiles[tbl_idx - 1].values[0];
-
pos += scnprintf(buf + pos, bufsz - pos,
"Use geographic profile %d\n", tbl_idx);
pos += scnprintf(buf + pos, bufsz - pos,
"2.4GHz:\n\tChain A offset: %hhu dBm\n\tChain B offset: %hhu dBm\n\tmax tx power: %hhu dBm\n",
- value[1], value[2], value[0]);
+ mvm->fwrt.geo_profiles[tbl_idx - 1].bands[0].chains[0],
+ mvm->fwrt.geo_profiles[tbl_idx - 1].bands[0].chains[1],
+ mvm->fwrt.geo_profiles[tbl_idx - 1].bands[0].max);
pos += scnprintf(buf + pos, bufsz - pos,
"5.2GHz:\n\tChain A offset: %hhu dBm\n\tChain B offset: %hhu dBm\n\tmax tx power: %hhu dBm\n",
- value[4], value[5], value[3]);
+ mvm->fwrt.geo_profiles[tbl_idx - 1].bands[1].chains[0],
+ mvm->fwrt.geo_profiles[tbl_idx - 1].bands[1].chains[1],
+ mvm->fwrt.geo_profiles[tbl_idx - 1].bands[1].max);
}
mutex_unlock(&mvm->mutex);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
index 59cef0d89a6d..03e5bf5cb909 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
@@ -754,6 +754,33 @@ iwl_mvm_ftm_set_ndp_params(struct iwl_mvm *mvm,
target->i2r_max_total_ltf = IWL_MVM_FTM_I2R_MAX_TOTAL_LTF;
}
+static int
+iwl_mvm_ftm_put_target_v8(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+ struct cfg80211_pmsr_request_peer *peer,
+ struct iwl_tof_range_req_ap_entry_v8 *target)
+{
+ u32 flags;
+ int ret = iwl_mvm_ftm_put_target_v7(mvm, vif, peer, (void *)target);
+
+ if (ret)
+ return ret;
+
+ iwl_mvm_ftm_set_ndp_params(mvm, target);
+
+ /*
+ * If secure LTF is turned off, replace the flag with PMF only
+ */
+ flags = le32_to_cpu(target->initiator_ap_flags);
+ if ((flags & IWL_INITIATOR_AP_FLAGS_SECURED) &&
+ !IWL_MVM_FTM_INITIATOR_SECURE_LTF) {
+ flags &= ~IWL_INITIATOR_AP_FLAGS_SECURED;
+ flags |= IWL_INITIATOR_AP_FLAGS_PMF;
+ target->initiator_ap_flags = cpu_to_le32(flags);
+ }
+
+ return 0;
+}
+
static int iwl_mvm_ftm_start_v12(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
struct cfg80211_pmsr_request *req)
@@ -773,24 +800,53 @@ static int iwl_mvm_ftm_start_v12(struct iwl_mvm *mvm,
for (i = 0; i < cmd.num_of_ap; i++) {
struct cfg80211_pmsr_request_peer *peer = &req->peers[i];
struct iwl_tof_range_req_ap_entry_v8 *target = &cmd.ap[i];
- u32 flags;
- err = iwl_mvm_ftm_put_target_v7(mvm, vif, peer, (void *)target);
+ err = iwl_mvm_ftm_put_target_v8(mvm, vif, peer, target);
if (err)
return err;
+ }
- iwl_mvm_ftm_set_ndp_params(mvm, target);
-
- /*
- * If secure LTF is turned off, replace the flag with PMF only
- */
- flags = le32_to_cpu(target->initiator_ap_flags);
- if ((flags & IWL_INITIATOR_AP_FLAGS_SECURED) &&
- !IWL_MVM_FTM_INITIATOR_SECURE_LTF) {
- flags &= ~IWL_INITIATOR_AP_FLAGS_SECURED;
- flags |= IWL_INITIATOR_AP_FLAGS_PMF;
- target->initiator_ap_flags = cpu_to_le32(flags);
+ return iwl_mvm_ftm_send_cmd(mvm, &hcmd);
+}
+
+static int iwl_mvm_ftm_start_v13(struct iwl_mvm *mvm,
+ struct ieee80211_vif *vif,
+ struct cfg80211_pmsr_request *req)
+{
+ struct iwl_tof_range_req_cmd_v13 cmd;
+ struct iwl_host_cmd hcmd = {
+ .id = iwl_cmd_id(TOF_RANGE_REQ_CMD, LOCATION_GROUP, 0),
+ .dataflags[0] = IWL_HCMD_DFL_DUP,
+ .data[0] = &cmd,
+ .len[0] = sizeof(cmd),
+ };
+ u8 i;
+ int err;
+
+ iwl_mvm_ftm_cmd_common(mvm, vif, (void *)&cmd, req);
+
+ for (i = 0; i < cmd.num_of_ap; i++) {
+ struct cfg80211_pmsr_request_peer *peer = &req->peers[i];
+ struct iwl_tof_range_req_ap_entry_v9 *target = &cmd.ap[i];
+
+ err = iwl_mvm_ftm_put_target_v8(mvm, vif, peer, (void *)target);
+ if (err)
+ return err;
+
+ if (peer->ftm.trigger_based || peer->ftm.non_trigger_based)
+ target->bss_color = peer->ftm.bss_color;
+
+ if (peer->ftm.non_trigger_based) {
+ target->min_time_between_msr =
+ cpu_to_le16(IWL_MVM_FTM_NON_TB_MIN_TIME_BETWEEN_MSR);
+ target->burst_period =
+ cpu_to_le16(IWL_MVM_FTM_NON_TB_MAX_TIME_BETWEEN_MSR);
+ } else {
+ target->min_time_between_msr = cpu_to_le16(0);
}
+
+ target->band =
+ iwl_mvm_phy_band_from_nl80211(peer->chandef.chan->band);
}
return iwl_mvm_ftm_send_cmd(mvm, &hcmd);
@@ -814,6 +870,9 @@ int iwl_mvm_ftm_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
IWL_FW_CMD_VER_UNKNOWN);
switch (cmd_ver) {
+ case 13:
+ err = iwl_mvm_ftm_start_v13(mvm, vif, req);
+ break;
case 12:
err = iwl_mvm_ftm_start_v12(mvm, vif, req);
break;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c
index 5a249ea97eb2..eba5433c2626 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#include <net/cfg80211.h>
#include <linux/etherdevice.h>
@@ -77,7 +77,7 @@ static int iwl_mvm_ftm_responder_set_bw_v2(struct cfg80211_chan_def *chandef,
static void
iwl_mvm_ftm_responder_set_ndp(struct iwl_mvm *mvm,
- struct iwl_tof_responder_config_cmd_v8 *cmd)
+ struct iwl_tof_responder_config_cmd_v9 *cmd)
{
/* Up to 2 R2I STS are allowed on the responder */
u32 r2i_max_sts = IWL_MVM_FTM_R2I_MAX_STS < 2 ?
@@ -104,7 +104,7 @@ iwl_mvm_ftm_responder_cmd(struct iwl_mvm *mvm,
* field interpretation is different), so the same struct can be use
* for all cases.
*/
- struct iwl_tof_responder_config_cmd_v8 cmd = {
+ struct iwl_tof_responder_config_cmd_v9 cmd = {
.channel_num = chandef->chan->hw_value,
.cmd_valid_fields =
cpu_to_le32(IWL_TOF_RESPONDER_CMD_VALID_CHAN_INFO |
@@ -115,10 +115,27 @@ iwl_mvm_ftm_responder_cmd(struct iwl_mvm *mvm,
u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, LOCATION_GROUP,
TOF_RESPONDER_CONFIG_CMD, 6);
int err;
+ int cmd_size;
lockdep_assert_held(&mvm->mutex);
-if (cmd_ver == 8)
+ /* Use a default of bss_color=1 for now */
+ if (cmd_ver == 9) {
+ cmd.cmd_valid_fields |=
+ cpu_to_le32(IWL_TOF_RESPONDER_CMD_VALID_BSS_COLOR |
+ IWL_TOF_RESPONDER_CMD_VALID_MIN_MAX_TIME_BETWEEN_MSR);
+ cmd.bss_color = 1;
+ cmd.min_time_between_msr =
+ cpu_to_le16(IWL_MVM_FTM_NON_TB_MIN_TIME_BETWEEN_MSR);
+ cmd.max_time_between_msr =
+ cpu_to_le16(IWL_MVM_FTM_NON_TB_MAX_TIME_BETWEEN_MSR);
+ cmd_size = sizeof(struct iwl_tof_responder_config_cmd_v9);
+ } else {
+ /* All versions up to version 8 have the same size */
+ cmd_size = sizeof(struct iwl_tof_responder_config_cmd_v8);
+ }
+
+ if (cmd_ver >= 8)
iwl_mvm_ftm_responder_set_ndp(mvm, &cmd);
if (cmd_ver >= 7)
@@ -137,7 +154,7 @@ if (cmd_ver == 8)
return iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(TOF_RESPONDER_CONFIG_CMD,
LOCATION_GROUP, 0),
- 0, sizeof(cmd), &cmd);
+ 0, cmd_size, &cmd);
}
static int
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 38fd5886af2d..74404c96063b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -743,7 +743,8 @@ int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b)
/* all structs have the same common part, add it */
len += sizeof(cmd.common);
- ret = iwl_sar_select_profile(&mvm->fwrt, per_chain, ACPI_SAR_NUM_TABLES,
+ ret = iwl_sar_select_profile(&mvm->fwrt, per_chain,
+ IWL_NUM_CHAIN_TABLES,
n_subbands, prof_a, prof_b);
/* return on error or if the profile is disabled (positive number) */
@@ -1057,16 +1058,7 @@ static const struct dmi_system_id dmi_ppag_approved_list[] = {
static int iwl_mvm_ppag_init(struct iwl_mvm *mvm)
{
- int ret;
-
- ret = iwl_mvm_get_ppag_table(mvm);
- if (ret < 0) {
- IWL_DEBUG_RADIO(mvm,
- "PPAG BIOS table invalid or unavailable. (%d)\n",
- ret);
- return 0;
- }
-
+ /* no need to read the table, done in INIT stage */
if (!dmi_check_system(dmi_ppag_approved_list)) {
IWL_DEBUG_RADIO(mvm,
"System vendor '%s' is not in the approved list, disabling PPAG.\n",
@@ -1191,12 +1183,65 @@ static void iwl_mvm_lari_cfg(struct iwl_mvm *mvm)
ret);
}
}
+
+void iwl_mvm_get_acpi_tables(struct iwl_mvm *mvm)
+{
+ int ret;
+
+ /* read PPAG table */
+ ret = iwl_mvm_get_ppag_table(mvm);
+ if (ret < 0) {
+ IWL_DEBUG_RADIO(mvm,
+ "PPAG BIOS table invalid or unavailable. (%d)\n",
+ ret);
+ }
+
+ /* read SAR tables */
+ ret = iwl_sar_get_wrds_table(&mvm->fwrt);
+ if (ret < 0) {
+ IWL_DEBUG_RADIO(mvm,
+ "WRDS SAR BIOS table invalid or unavailable. (%d)\n",
+ ret);
+ /*
+ * If not available, don't fail and don't bother with EWRD and
+ * WGDS */
+
+ if (!iwl_sar_get_wgds_table(&mvm->fwrt)) {
+ /*
+ * If basic SAR is not available, we check for WGDS,
+ * which should *not* be available either. If it is
+ * available, issue an error, because we can't use SAR
+ * Geo without basic SAR.
+ */
+ IWL_ERR(mvm, "BIOS contains WGDS but no WRDS\n");
+ }
+
+ } else {
+ ret = iwl_sar_get_ewrd_table(&mvm->fwrt);
+ /* if EWRD is not available, we can still use
+ * WRDS, so don't fail */
+ if (ret < 0)
+ IWL_DEBUG_RADIO(mvm,
+ "EWRD SAR BIOS table invalid or unavailable. (%d)\n",
+ ret);
+
+ /* read geo SAR table */
+ if (iwl_sar_geo_support(&mvm->fwrt)) {
+ ret = iwl_sar_get_wgds_table(&mvm->fwrt);
+ if (ret < 0)
+ IWL_DEBUG_RADIO(mvm,
+ "Geo SAR BIOS table invalid or unavailable. (%d)\n",
+ ret);
+ /* we don't fail if the table is not available */
+ }
+ }
+}
#else /* CONFIG_ACPI */
inline int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm,
int prof_a, int prof_b)
{
- return -ENOENT;
+ return 1;
}
inline int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm)
@@ -1231,6 +1276,10 @@ static u8 iwl_mvm_eval_dsm_rfi(struct iwl_mvm *mvm)
{
return DSM_VALUE_RFI_DISABLE;
}
+
+void iwl_mvm_get_acpi_tables(struct iwl_mvm *mvm)
+{
+}
#endif /* CONFIG_ACPI */
void iwl_mvm_send_recovery_cmd(struct iwl_mvm *mvm, u32 flags)
@@ -1286,27 +1335,6 @@ void iwl_mvm_send_recovery_cmd(struct iwl_mvm *mvm, u32 flags)
static int iwl_mvm_sar_init(struct iwl_mvm *mvm)
{
- int ret;
-
- ret = iwl_sar_get_wrds_table(&mvm->fwrt);
- if (ret < 0) {
- IWL_DEBUG_RADIO(mvm,
- "WRDS SAR BIOS table invalid or unavailable. (%d)\n",
- ret);
- /*
- * If not available, don't fail and don't bother with EWRD.
- * Return 1 to tell that we can't use WGDS either.
- */
- return 1;
- }
-
- ret = iwl_sar_get_ewrd_table(&mvm->fwrt);
- /* if EWRD is not available, we can still use WRDS, so don't fail */
- if (ret < 0)
- IWL_DEBUG_RADIO(mvm,
- "EWRD SAR BIOS table invalid or unavailable. (%d)\n",
- ret);
-
return iwl_mvm_sar_select_profile(mvm, 1, 1);
}
@@ -1542,19 +1570,9 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
goto error;
ret = iwl_mvm_sar_init(mvm);
- if (ret == 0) {
+ if (ret == 0)
ret = iwl_mvm_sar_geo_init(mvm);
- } else if (ret == -ENOENT && !iwl_sar_get_wgds_table(&mvm->fwrt)) {
- /*
- * If basic SAR is not available, we check for WGDS,
- * which should *not* be available either. If it is
- * available, issue an error, because we can't use SAR
- * Geo without basic SAR.
- */
- IWL_ERR(mvm, "BIOS contains WGDS but no WRDS\n");
- }
-
- if (ret < 0)
+ else if (ret < 0)
goto error;
iwl_mvm_tas_init(mvm);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
index fd5e08961651..fd352b2624a6 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
* Copyright (C) 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
*/
@@ -647,12 +647,14 @@ static int iwl_mvm_mac_ctxt_cmd_sta(struct iwl_mvm *mvm,
if (vif->bss_conf.he_support && !iwlwifi_mod_params.disable_11ax) {
cmd.filter_flags |= cpu_to_le32(MAC_FILTER_IN_11AX);
- if (vif->bss_conf.twt_requester && IWL_MVM_USE_TWT) {
+ if (vif->bss_conf.twt_requester && IWL_MVM_USE_TWT)
ctxt_sta->data_policy |= cpu_to_le32(TWT_SUPPORTED);
- if (vif->bss_conf.twt_protected)
- ctxt_sta->data_policy |=
- cpu_to_le32(PROTECTED_TWT_SUPPORTED);
- }
+ if (vif->bss_conf.twt_protected)
+ ctxt_sta->data_policy |=
+ cpu_to_le32(PROTECTED_TWT_SUPPORTED);
+ if (vif->bss_conf.twt_broadcast)
+ ctxt_sta->data_policy |=
+ cpu_to_le32(BROADCAST_TWT_SUPPORTED);
}
@@ -1005,8 +1007,10 @@ int iwl_mvm_mac_ctxt_beacon_changed(struct iwl_mvm *mvm,
return -ENOMEM;
#ifdef CONFIG_IWLWIFI_DEBUGFS
- if (mvm->beacon_inject_active)
+ if (mvm->beacon_inject_active) {
+ dev_kfree_skb(beacon);
return -EBUSY;
+ }
#endif
ret = iwl_mvm_mac_ctxt_send_beacon(mvm, vif, beacon);
@@ -1427,14 +1431,34 @@ void iwl_mvm_rx_stored_beacon_notif(struct iwl_mvm *mvm,
{
struct iwl_rx_packet *pkt = rxb_addr(rxb);
unsigned int pkt_len = iwl_rx_packet_payload_len(pkt);
- struct iwl_stored_beacon_notif *sb = (void *)pkt->data;
+ struct iwl_stored_beacon_notif_common *sb = (void *)pkt->data;
struct ieee80211_rx_status rx_status;
struct sk_buff *skb;
+ u8 *data;
u32 size = le32_to_cpu(sb->byte_count);
+ int ver = iwl_fw_lookup_cmd_ver(mvm->fw, PROT_OFFLOAD_GROUP,
+ STORED_BEACON_NTF, 0);
- if (size == 0 || pkt_len < struct_size(sb, data, size))
+ if (size == 0)
return;
+ /* handle per-version differences */
+ if (ver <= 2) {
+ struct iwl_stored_beacon_notif_v2 *sb_v2 = (void *)pkt->data;
+
+ if (pkt_len < struct_size(sb_v2, data, size))
+ return;
+
+ data = sb_v2->data;
+ } else {
+ struct iwl_stored_beacon_notif_v3 *sb_v3 = (void *)pkt->data;
+
+ if (pkt_len < struct_size(sb_v3, data, size))
+ return;
+
+ data = sb_v3->data;
+ }
+
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb) {
IWL_ERR(mvm, "alloc_skb failed\n");
@@ -1455,7 +1479,7 @@ void iwl_mvm_rx_stored_beacon_notif(struct iwl_mvm *mvm,
rx_status.band);
/* copy the data */
- skb_put_data(skb, sb->data, size);
+ skb_put_data(skb, data, size);
memcpy(IEEE80211_SKB_RXCB(skb), &rx_status, sizeof(rx_status));
/* pass it as regular rx to mac80211 */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 70ebecb73c24..3a4585222d6d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -390,7 +390,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
if (mvm->trans->max_skb_frags)
hw->netdev_features = NETIF_F_HIGHDMA | NETIF_F_SG;
- hw->queues = IEEE80211_MAX_QUEUES;
+ hw->queues = IEEE80211_NUM_ACS;
hw->offchannel_tx_hw_queue = IWL_MVM_OFFCHANNEL_QUEUE;
hw->radiotap_mcs_details |= IEEE80211_RADIOTAP_MCS_HAVE_FEC |
IEEE80211_RADIOTAP_MCS_HAVE_STBC;
@@ -762,11 +762,11 @@ static void iwl_mvm_mac_tx(struct ieee80211_hw *hw,
!test_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status))
goto drop;
- /* treat non-bufferable MMPDUs on AP interfaces as broadcast */
- if ((info->control.vif->type == NL80211_IFTYPE_AP ||
- info->control.vif->type == NL80211_IFTYPE_ADHOC) &&
- ieee80211_is_mgmt(hdr->frame_control) &&
- !ieee80211_is_bufferable_mmpdu(hdr->frame_control))
+ /*
+ * bufferable MMPDUs or MMPDUs on STA interfaces come via TXQs
+ * so we treat the others as broadcast
+ */
+ if (ieee80211_is_mgmt(hdr->frame_control))
sta = NULL;
/* If there is no sta, and it's not offchannel - send through AP */
@@ -2440,6 +2440,9 @@ static void iwl_mvm_bss_info_changed_station(struct iwl_mvm *mvm,
IWL_DEBUG_MAC80211(mvm, "arp filter changed\n");
iwl_mvm_configure_bcast_filter(mvm);
}
+
+ if (changes & BSS_CHANGED_BANDWIDTH)
+ iwl_mvm_apply_fw_smps_request(vif);
}
static int iwl_mvm_start_ap_ibss(struct ieee80211_hw *hw,
@@ -2987,16 +2990,20 @@ static void iwl_mvm_check_he_obss_narrow_bw_ru_iter(struct wiphy *wiphy,
void *_data)
{
struct iwl_mvm_he_obss_narrow_bw_ru_data *data = _data;
+ const struct cfg80211_bss_ies *ies;
const struct element *elem;
- elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, bss->ies->data,
- bss->ies->len);
+ rcu_read_lock();
+ ies = rcu_dereference(bss->ies);
+ elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, ies->data,
+ ies->len);
if (!elem || elem->datalen < 10 ||
!(elem->data[10] &
WLAN_EXT_CAPA10_OBSS_NARROW_BW_RU_TOLERANCE_SUPPORT)) {
data->tolerated = false;
}
+ rcu_read_unlock();
}
static void iwl_mvm_check_he_obss_narrow_bw_ru(struct ieee80211_hw *hw,
@@ -5035,22 +5042,14 @@ static void iwl_mvm_event_mlme_callback_ini(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
const struct ieee80211_mlme_event *mlme)
{
- if (mlme->data == ASSOC_EVENT && (mlme->status == MLME_DENIED ||
- mlme->status == MLME_TIMEOUT)) {
+ if ((mlme->data == ASSOC_EVENT || mlme->data == AUTH_EVENT) &&
+ (mlme->status == MLME_DENIED || mlme->status == MLME_TIMEOUT)) {
iwl_dbg_tlv_time_point(&mvm->fwrt,
IWL_FW_INI_TIME_POINT_ASSOC_FAILED,
NULL);
return;
}
- if (mlme->data == AUTH_EVENT && (mlme->status == MLME_DENIED ||
- mlme->status == MLME_TIMEOUT)) {
- iwl_dbg_tlv_time_point(&mvm->fwrt,
- IWL_FW_INI_TIME_POINT_EAPOL_FAILED,
- NULL);
- return;
- }
-
if (mlme->data == DEAUTH_RX_EVENT || mlme->data == DEAUTH_TX_EVENT) {
iwl_dbg_tlv_time_point(&mvm->fwrt,
IWL_FW_INI_TIME_POINT_DEASSOC,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index b50942f28bb7..f877d86b038e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -431,8 +431,6 @@ struct iwl_mvm_vif {
static inline struct iwl_mvm_vif *
iwl_mvm_vif_from_mac80211(struct ieee80211_vif *vif)
{
- if (!vif)
- return NULL;
return (void *)vif->drv_priv;
}
@@ -2045,6 +2043,7 @@ void iwl_mvm_event_frame_timeout_callback(struct iwl_mvm *mvm,
int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b);
int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm);
int iwl_mvm_ppag_send_cmd(struct iwl_mvm *mvm);
+void iwl_mvm_get_acpi_tables(struct iwl_mvm *mvm);
#ifdef CONFIG_IWLWIFI_DEBUGFS
void iwl_mvm_sta_add_debugfs(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
index 7fb4e618f76e..da705fcaf0fc 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2019 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2019, 2021 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -416,7 +416,7 @@ iwl_mvm_update_mcc(struct iwl_mvm *mvm, const char *alpha2,
struct iwl_rx_packet *pkt;
struct iwl_host_cmd cmd = {
.id = MCC_UPDATE_CMD,
- .flags = CMD_WANT_SKB,
+ .flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL,
.data = { &mcc_update_cmd },
};
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 20e8d343a950..6f60018feed1 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -78,7 +78,6 @@ module_exit(iwl_mvm_exit);
static void iwl_mvm_nic_config(struct iwl_op_mode *op_mode)
{
struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
- struct iwl_trans_debug *dbg = &mvm->trans->dbg;
u8 radio_cfg_type, radio_cfg_step, radio_cfg_dash;
u32 reg_val = 0;
u32 phy_config = iwl_mvm_get_phy_config(mvm);
@@ -115,10 +114,7 @@ static void iwl_mvm_nic_config(struct iwl_op_mode *op_mode)
if (mvm->trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_8000)
reg_val |= CSR_HW_IF_CONFIG_REG_BIT_RADIO_SI;
- if (iwl_fw_dbg_is_d3_debug_enabled(&mvm->fwrt) ||
- (iwl_trans_dbg_ini_valid(mvm->trans) &&
- dbg->fw_mon_cfg[IWL_FW_INI_ALLOCATION_ID_INTERNAL].buf_location)
- )
+ if (iwl_fw_dbg_is_d3_debug_enabled(&mvm->fwrt))
reg_val |= CSR_HW_IF_CONFIG_REG_D3_DEBUG;
iwl_trans_set_bits_mask(mvm->trans, CSR_HW_IF_CONFIG_REG,
@@ -214,11 +210,14 @@ void iwl_mvm_apply_fw_smps_request(struct ieee80211_vif *vif)
{
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
struct iwl_mvm *mvm = mvmvif->mvm;
+ enum ieee80211_smps_mode mode = IEEE80211_SMPS_AUTOMATIC;
- iwl_mvm_update_smps(mvm, vif, IWL_MVM_SMPS_REQ_FW,
- mvm->fw_static_smps_request ?
- IEEE80211_SMPS_STATIC :
- IEEE80211_SMPS_AUTOMATIC);
+ if (mvm->fw_static_smps_request &&
+ vif->bss_conf.chandef.width == NL80211_CHAN_WIDTH_160 &&
+ vif->bss_conf.he_support)
+ mode = IEEE80211_SMPS_STATIC;
+
+ iwl_mvm_update_smps(mvm, vif, IWL_MVM_SMPS_REQ_FW, mode);
}
static void iwl_mvm_intf_dual_chain_req(void *data, u8 *mac,
@@ -374,7 +373,7 @@ static const struct iwl_rx_handlers iwl_mvm_rx_handlers[] = {
struct iwl_mfu_assert_dump_notif),
RX_HANDLER_GRP(PROT_OFFLOAD_GROUP, STORED_BEACON_NTF,
iwl_mvm_rx_stored_beacon_notif, RX_HANDLER_SYNC,
- struct iwl_stored_beacon_notif),
+ struct iwl_stored_beacon_notif_v2),
RX_HANDLER_GRP(DATA_PATH_GROUP, MU_GROUP_MGMT_NOTIF,
iwl_mvm_mu_mimo_grp_notif, RX_HANDLER_SYNC,
struct iwl_mu_group_mgmt_notif),
@@ -693,11 +692,16 @@ static int iwl_mvm_start_get_nvm(struct iwl_mvm *mvm)
if (ret && ret != -ERFKILL)
iwl_fw_dbg_error_collect(&mvm->fwrt, FW_DBG_TRIGGER_DRIVER);
+ if (!ret && iwl_mvm_is_lar_supported(mvm)) {
+ mvm->hw->wiphy->regulatory_flags |= REGULATORY_WIPHY_SELF_MANAGED;
+ ret = iwl_mvm_init_mcc(mvm);
+ }
if (!iwlmvm_mod_params.init_dbg || !ret)
iwl_mvm_stop_device(mvm);
mutex_unlock(&mvm->mutex);
+ rtnl_unlock();
if (ret < 0)
IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", ret);
@@ -772,6 +776,8 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
iwl_fw_runtime_init(&mvm->fwrt, trans, fw, &iwl_mvm_fwrt_ops, mvm,
dbgfs_dir);
+ iwl_mvm_get_acpi_tables(mvm);
+
mvm->init_status = 0;
if (iwl_mvm_has_new_rx_api(mvm)) {
@@ -792,10 +798,26 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
mvm->fw_restart = iwlwifi_mod_params.fw_restart ? -1 : 0;
- mvm->aux_queue = IWL_MVM_DQA_AUX_QUEUE;
- mvm->snif_queue = IWL_MVM_DQA_INJECT_MONITOR_QUEUE;
- mvm->probe_queue = IWL_MVM_DQA_AP_PROBE_RESP_QUEUE;
- mvm->p2p_dev_queue = IWL_MVM_DQA_P2P_DEVICE_QUEUE;
+ if (iwl_mvm_has_new_tx_api(mvm)) {
+ /*
+ * If we have the new TX/queue allocation API initialize them
+ * all to invalid numbers. We'll rewrite the ones that we need
+ * later, but that doesn't happen for all of them all of the
+ * time (e.g. P2P Device is optional), and if a dynamic queue
+ * ends up getting number 2 (IWL_MVM_DQA_P2P_DEVICE_QUEUE) then
+ * iwl_mvm_is_static_queue() erroneously returns true, and we
+ * might have things getting stuck.
+ */
+ mvm->aux_queue = IWL_MVM_INVALID_QUEUE;
+ mvm->snif_queue = IWL_MVM_INVALID_QUEUE;
+ mvm->probe_queue = IWL_MVM_INVALID_QUEUE;
+ mvm->p2p_dev_queue = IWL_MVM_INVALID_QUEUE;
+ } else {
+ mvm->aux_queue = IWL_MVM_DQA_AUX_QUEUE;
+ mvm->snif_queue = IWL_MVM_DQA_INJECT_MONITOR_QUEUE;
+ mvm->probe_queue = IWL_MVM_DQA_AP_PROBE_RESP_QUEUE;
+ mvm->p2p_dev_queue = IWL_MVM_DQA_P2P_DEVICE_QUEUE;
+ }
mvm->sf_state = SF_UNINIT;
if (iwl_mvm_has_unified_ucode(mvm))
@@ -1400,7 +1422,7 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error)
* can't recover this since we're already half suspended.
*/
if (!mvm->fw_restart && fw_error) {
- iwl_fw_error_collect(&mvm->fwrt);
+ iwl_fw_error_collect(&mvm->fwrt, false);
} else if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
struct iwl_mvm_reprobe *reprobe;
@@ -1451,7 +1473,7 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error)
}
}
- iwl_fw_error_collect(&mvm->fwrt);
+ iwl_fw_error_collect(&mvm->fwrt, false);
if (fw_error && mvm->fw_restart > 0)
mvm->fw_restart--;
@@ -1459,13 +1481,31 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error)
}
}
-static void iwl_mvm_nic_error(struct iwl_op_mode *op_mode)
+static void iwl_mvm_nic_error(struct iwl_op_mode *op_mode, bool sync)
{
struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
if (!test_bit(STATUS_TRANS_DEAD, &mvm->trans->status))
iwl_mvm_dump_nic_error_log(mvm);
+ if (sync) {
+ iwl_fw_error_collect(&mvm->fwrt, true);
+ /*
+ * Currently, the only case for sync=true is during
+ * shutdown, so just stop in this case. If/when that
+ * changes, we need to be a bit smarter here.
+ */
+ return;
+ }
+
+ /*
+ * If the firmware crashes while we're already considering it
+ * to be dead then don't ask for a restart, that cannot do
+ * anything useful anyway.
+ */
+ if (!test_bit(IWL_MVM_STATUS_FIRMWARE_RUNNING, &mvm->status))
+ return;
+
iwl_mvm_nic_restart(mvm, true);
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
index 0b818067067c..44344216a1a9 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
@@ -11,7 +11,7 @@
* DDR needs frequency in units of 16.666MHz, so provide FW with the
* frequency values in the adjusted format.
*/
-const static struct iwl_rfi_lut_entry iwl_rfi_table[IWL_RFI_LUT_SIZE] = {
+static const struct iwl_rfi_lut_entry iwl_rfi_table[IWL_RFI_LUT_SIZE] = {
/* LPDDR4 */
/* frequency 3733MHz */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index c0babb8d5b5c..c12f303cf652 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -69,8 +69,8 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb,
/* if we are here - this for sure is either CCMP or GCMP */
if (IS_ERR_OR_NULL(sta)) {
- IWL_ERR(mvm,
- "expected hw-decrypted unicast frame for station\n");
+ IWL_DEBUG_DROP(mvm,
+ "expected hw-decrypted unicast frame for station\n");
return -1;
}
@@ -279,7 +279,6 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta,
{
struct iwl_mvm_sta *mvmsta;
struct iwl_mvm_vif *mvmvif;
- u8 fwkeyid = u32_get_bits(status, IWL_RX_MPDU_STATUS_KEY);
u8 keyid;
struct ieee80211_key_conf *key;
u32 len = le16_to_cpu(desc->mpdu_len);
@@ -299,6 +298,10 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta,
if (!ieee80211_is_beacon(hdr->frame_control))
return 0;
+ /* key mismatch - will also report !MIC_OK but we shouldn't count it */
+ if (!(status & IWL_RX_MPDU_STATUS_KEY_VALID))
+ return -1;
+
/* good cases */
if (likely(status & IWL_RX_MPDU_STATUS_MIC_OK &&
!(status & IWL_RX_MPDU_STATUS_REPLAY_ERROR)))
@@ -309,26 +312,36 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta,
mvmsta = iwl_mvm_sta_from_mac80211(sta);
- /* what? */
- if (fwkeyid != 6 && fwkeyid != 7)
- return -1;
-
mvmvif = iwl_mvm_vif_from_mac80211(mvmsta->vif);
- key = rcu_dereference(mvmvif->bcn_prot.keys[fwkeyid - 6]);
- if (!key)
- return -1;
+ /*
+ * both keys will have the same cipher and MIC length, use
+ * whichever one is available
+ */
+ key = rcu_dereference(mvmvif->bcn_prot.keys[0]);
+ if (!key) {
+ key = rcu_dereference(mvmvif->bcn_prot.keys[1]);
+ if (!key)
+ return -1;
+ }
if (len < key->icv_len + IEEE80211_GMAC_PN_LEN + 2)
return -1;
- /*
- * See if the key ID matches - if not this may be due to a
- * switch and the firmware may erroneously report !MIC_OK.
- */
+ /* get the real key ID */
keyid = frame[len - key->icv_len - IEEE80211_GMAC_PN_LEN - 2];
- if (keyid != fwkeyid)
- return -1;
+ /* and if that's the other key, look it up */
+ if (keyid != key->keyidx) {
+ /*
+ * shouldn't happen since firmware checked, but be safe
+ * in case the MIC length is wrong too, for example
+ */
+ if (keyid != 6 && keyid != 7)
+ return -1;
+ key = rcu_dereference(mvmvif->bcn_prot.keys[keyid - 6]);
+ if (!key)
+ return -1;
+ }
/* Report status to mac80211 */
if (!(status & IWL_RX_MPDU_STATUS_MIC_OK))
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index 0368b7101222..d78e436fa8b5 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -1648,7 +1648,7 @@ iwl_mvm_umac_scan_cfg_channels_v6(struct iwl_mvm *mvm,
struct iwl_scan_channel_cfg_umac *cfg = &cp->channel_config[i];
u32 n_aps_flag =
iwl_mvm_scan_ch_n_aps_flag(vif_type,
- cfg->v2.channel_num);
+ channels[i]->hw_value);
cfg->flags = cpu_to_le32(flags | n_aps_flag);
cfg->v2.channel_num = channels[i]->hw_value;
@@ -1661,22 +1661,32 @@ iwl_mvm_umac_scan_cfg_channels_v6(struct iwl_mvm *mvm,
}
static int
-iwl_mvm_umac_scan_fill_6g_chan_list(struct iwl_mvm_scan_params *params,
- __le32 *cmd_short_ssid, u8 *cmd_bssid,
- u8 *scan_ssid_num, u8 *bssid_num)
+iwl_mvm_umac_scan_fill_6g_chan_list(struct iwl_mvm *mvm,
+ struct iwl_mvm_scan_params *params,
+ struct iwl_scan_probe_params_v4 *pp)
{
int j, idex_s = 0, idex_b = 0;
struct cfg80211_scan_6ghz_params *scan_6ghz_params =
params->scan_6ghz_params;
+ bool hidden_supported = fw_has_capa(&mvm->fw->ucode_capa,
+ IWL_UCODE_TLV_CAPA_HIDDEN_6GHZ_SCAN);
- if (!params->n_6ghz_params) {
- for (j = 0; j < params->n_ssids; j++) {
- cmd_short_ssid[idex_s++] =
- cpu_to_le32(~crc32_le(~0, params->ssids[j].ssid,
- params->ssids[j].ssid_len));
- (*scan_ssid_num)++;
+ for (j = 0; j < params->n_ssids && idex_s < SCAN_SHORT_SSID_MAX_SIZE;
+ j++) {
+ if (!params->ssids[j].ssid_len)
+ continue;
+
+ pp->short_ssid[idex_s] =
+ cpu_to_le32(~crc32_le(~0, params->ssids[j].ssid,
+ params->ssids[j].ssid_len));
+
+ if (hidden_supported) {
+ pp->direct_scan[idex_s].id = WLAN_EID_SSID;
+ pp->direct_scan[idex_s].len = params->ssids[j].ssid_len;
+ memcpy(pp->direct_scan[idex_s].ssid, params->ssids[j].ssid,
+ params->ssids[j].ssid_len);
}
- return 0;
+ idex_s++;
}
/*
@@ -1693,40 +1703,40 @@ iwl_mvm_umac_scan_fill_6g_chan_list(struct iwl_mvm_scan_params *params,
/* First, try to place the short SSID */
if (scan_6ghz_params[j].short_ssid_valid) {
for (k = 0; k < idex_s; k++) {
- if (cmd_short_ssid[k] ==
+ if (pp->short_ssid[k] ==
cpu_to_le32(scan_6ghz_params[j].short_ssid))
break;
}
if (k == idex_s && idex_s < SCAN_SHORT_SSID_MAX_SIZE) {
- cmd_short_ssid[idex_s++] =
+ pp->short_ssid[idex_s++] =
cpu_to_le32(scan_6ghz_params[j].short_ssid);
- (*scan_ssid_num)++;
}
}
/* try to place BSSID for the same entry */
for (k = 0; k < idex_b; k++) {
- if (!memcmp(&cmd_bssid[ETH_ALEN * k],
+ if (!memcmp(&pp->bssid_array[k],
scan_6ghz_params[j].bssid, ETH_ALEN))
break;
}
if (k == idex_b && idex_b < SCAN_BSSID_MAX_SIZE) {
- memcpy(&cmd_bssid[ETH_ALEN * idex_b++],
+ memcpy(&pp->bssid_array[idex_b++],
scan_6ghz_params[j].bssid, ETH_ALEN);
- (*bssid_num)++;
}
}
+
+ pp->short_ssid_num = idex_s;
+ pp->bssid_num = idex_b;
return 0;
}
/* TODO: this function can be merged with iwl_mvm_scan_umac_fill_ch_p_v6 */
static void
iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm_scan_params *params,
- u32 n_channels, __le32 *cmd_short_ssid,
- u8 *cmd_bssid, u8 scan_ssid_num,
- u8 bssid_num,
+ u32 n_channels,
+ struct iwl_scan_probe_params_v4 *pp,
struct iwl_scan_channel_params_v6 *cp,
enum nl80211_iftype vif_type)
{
@@ -1741,7 +1751,7 @@ iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm_scan_params *params,
u32 s_ssid_bitmap = 0, bssid_bitmap = 0, flags = 0;
u8 j, k, s_max = 0, b_max = 0, n_used_bssid_entries;
- bool force_passive, found = false,
+ bool force_passive, found = false, allow_passive = true,
unsolicited_probe_on_chan = false, psc_no_listen = false;
cfg->v1.channel_num = params->channels[i]->hw_value;
@@ -1766,9 +1776,9 @@ iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm_scan_params *params,
scan_6ghz_params[j].unsolicited_probe;
psc_no_listen |= scan_6ghz_params[j].psc_no_listen;
- for (k = 0; k < scan_ssid_num; k++) {
+ for (k = 0; k < pp->short_ssid_num; k++) {
if (!scan_6ghz_params[j].unsolicited_probe &&
- le32_to_cpu(cmd_short_ssid[k]) ==
+ le32_to_cpu(pp->short_ssid[k]) ==
scan_6ghz_params[j].short_ssid) {
/* Relevant short SSID bit set */
if (s_ssid_bitmap & BIT(k)) {
@@ -1778,7 +1788,10 @@ iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm_scan_params *params,
/*
* Use short SSID only to create a new
- * iteration during channel dwell.
+ * iteration during channel dwell or in
+ * case that the short SSID has a
+ * matching SSID, i.e., scan for hidden
+ * APs.
*/
if (n_used_bssid_entries >= 3) {
s_ssid_bitmap |= BIT(k);
@@ -1786,6 +1799,12 @@ iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm_scan_params *params,
n_used_bssid_entries -= 3;
found = true;
break;
+ } else if (pp->direct_scan[k].len) {
+ s_ssid_bitmap |= BIT(k);
+ s_max++;
+ found = true;
+ allow_passive = false;
+ break;
}
}
}
@@ -1793,8 +1812,8 @@ iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm_scan_params *params,
if (found)
continue;
- for (k = 0; k < bssid_num; k++) {
- if (!memcmp(&cmd_bssid[ETH_ALEN * k],
+ for (k = 0; k < pp->bssid_num; k++) {
+ if (!memcmp(&pp->bssid_array[k],
scan_6ghz_params[j].bssid,
ETH_ALEN)) {
if (!(bssid_bitmap & BIT(k))) {
@@ -1849,7 +1868,7 @@ iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm_scan_params *params,
force_passive |= (unsolicited_probe_on_chan &&
(s_max > 1 || b_max > 3));
}
- if (force_passive ||
+ if ((allow_passive && force_passive) ||
(!flags && !cfg80211_channel_is_psc(params->channels[i])))
flags |= IWL_UHB_CHAN_CFG_FLAG_FORCE_PASSIVE;
@@ -2368,32 +2387,28 @@ static int iwl_mvm_scan_umac_v14(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
if (ret)
return ret;
- iwl_mvm_scan_umac_fill_probe_p_v4(params, &scan_p->probe_params,
- &bitmap_ssid);
if (!params->scan_6ghz) {
+ iwl_mvm_scan_umac_fill_probe_p_v4(params, &scan_p->probe_params,
+ &bitmap_ssid);
iwl_mvm_scan_umac_fill_ch_p_v6(mvm, params, vif,
- &scan_p->channel_params, bitmap_ssid);
+ &scan_p->channel_params, bitmap_ssid);
return 0;
+ } else {
+ pb->preq = params->preq;
}
+
cp->flags = iwl_mvm_scan_umac_chan_flags_v2(mvm, params, vif);
cp->n_aps_override[0] = IWL_SCAN_ADWELL_N_APS_GO_FRIENDLY;
cp->n_aps_override[1] = IWL_SCAN_ADWELL_N_APS_SOCIAL_CHS;
- ret = iwl_mvm_umac_scan_fill_6g_chan_list(params, pb->short_ssid,
- pb->bssid_array[0],
- &pb->short_ssid_num,
- &pb->bssid_num);
+ ret = iwl_mvm_umac_scan_fill_6g_chan_list(mvm, params, pb);
if (ret)
return ret;
iwl_mvm_umac_scan_cfg_channels_v6_6g(params,
params->n_channels,
- pb->short_ssid,
- pb->bssid_array[0],
- pb->short_ssid_num,
- pb->bssid_num, cp,
- vif->type);
+ pb, cp, vif->type);
cp->count = params->n_channels;
if (!params->n_ssids ||
(params->n_ssids == 1 && !params->ssids[0].ssid_len))
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 9c45a64c5009..a64874c05ced 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -316,8 +316,9 @@ static int iwl_mvm_invalidate_sta_queue(struct iwl_mvm *mvm, int queue,
}
static int iwl_mvm_disable_txq(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
- int queue, u8 tid, u8 flags)
+ u16 *queueptr, u8 tid, u8 flags)
{
+ int queue = *queueptr;
struct iwl_scd_txq_cfg_cmd cmd = {
.scd_queue = queue,
.action = SCD_CFG_DISABLE_QUEUE,
@@ -326,6 +327,7 @@ static int iwl_mvm_disable_txq(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
if (iwl_mvm_has_new_tx_api(mvm)) {
iwl_trans_txq_free(mvm->trans, queue);
+ *queueptr = IWL_MVM_INVALID_QUEUE;
return 0;
}
@@ -487,6 +489,7 @@ static int iwl_mvm_free_inactive_queue(struct iwl_mvm *mvm, int queue,
u8 sta_id, tid;
unsigned long disable_agg_tids = 0;
bool same_sta;
+ u16 queue_tmp = queue;
int ret;
lockdep_assert_held(&mvm->mutex);
@@ -509,7 +512,7 @@ static int iwl_mvm_free_inactive_queue(struct iwl_mvm *mvm, int queue,
iwl_mvm_invalidate_sta_queue(mvm, queue,
disable_agg_tids, false);
- ret = iwl_mvm_disable_txq(mvm, old_sta, queue, tid, 0);
+ ret = iwl_mvm_disable_txq(mvm, old_sta, &queue_tmp, tid, 0);
if (ret) {
IWL_ERR(mvm,
"Failed to free inactive queue %d (ret=%d)\n",
@@ -1184,6 +1187,7 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm,
unsigned int wdg_timeout =
iwl_mvm_get_wd_timeout(mvm, mvmsta->vif, false, false);
int queue = -1;
+ u16 queue_tmp;
unsigned long disable_agg_tids = 0;
enum iwl_mvm_agg_state queue_state;
bool shared_queue = false, inc_ssn;
@@ -1332,7 +1336,8 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm,
return 0;
out_err:
- iwl_mvm_disable_txq(mvm, sta, queue, tid, 0);
+ queue_tmp = queue;
+ iwl_mvm_disable_txq(mvm, sta, &queue_tmp, tid, 0);
return ret;
}
@@ -1779,7 +1784,7 @@ static void iwl_mvm_disable_sta_queues(struct iwl_mvm *mvm,
if (mvm_sta->tid_data[i].txq_id == IWL_MVM_INVALID_QUEUE)
continue;
- iwl_mvm_disable_txq(mvm, sta, mvm_sta->tid_data[i].txq_id, i,
+ iwl_mvm_disable_txq(mvm, sta, &mvm_sta->tid_data[i].txq_id, i,
0);
mvm_sta->tid_data[i].txq_id = IWL_MVM_INVALID_QUEUE;
}
@@ -1987,7 +1992,7 @@ static int iwl_mvm_add_int_sta_with_queue(struct iwl_mvm *mvm, int macidx,
ret = iwl_mvm_add_int_sta_common(mvm, sta, addr, macidx, maccolor);
if (ret) {
if (!iwl_mvm_has_new_tx_api(mvm))
- iwl_mvm_disable_txq(mvm, NULL, *queue,
+ iwl_mvm_disable_txq(mvm, NULL, queue,
IWL_MAX_TID_COUNT, 0);
return ret;
}
@@ -2060,7 +2065,7 @@ int iwl_mvm_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
if (WARN_ON_ONCE(mvm->snif_sta.sta_id == IWL_MVM_INVALID_STA))
return -EINVAL;
- iwl_mvm_disable_txq(mvm, NULL, mvm->snif_queue, IWL_MAX_TID_COUNT, 0);
+ iwl_mvm_disable_txq(mvm, NULL, &mvm->snif_queue, IWL_MAX_TID_COUNT, 0);
ret = iwl_mvm_rm_sta_common(mvm, mvm->snif_sta.sta_id);
if (ret)
IWL_WARN(mvm, "Failed sending remove station\n");
@@ -2077,7 +2082,7 @@ int iwl_mvm_rm_aux_sta(struct iwl_mvm *mvm)
if (WARN_ON_ONCE(mvm->aux_sta.sta_id == IWL_MVM_INVALID_STA))
return -EINVAL;
- iwl_mvm_disable_txq(mvm, NULL, mvm->aux_queue, IWL_MAX_TID_COUNT, 0);
+ iwl_mvm_disable_txq(mvm, NULL, &mvm->aux_queue, IWL_MAX_TID_COUNT, 0);
ret = iwl_mvm_rm_sta_common(mvm, mvm->aux_sta.sta_id);
if (ret)
IWL_WARN(mvm, "Failed sending remove station\n");
@@ -2173,7 +2178,7 @@ static void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm,
struct ieee80211_vif *vif)
{
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
- int queue;
+ u16 *queueptr, queue;
lockdep_assert_held(&mvm->mutex);
@@ -2182,10 +2187,10 @@ static void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm,
switch (vif->type) {
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_ADHOC:
- queue = mvm->probe_queue;
+ queueptr = &mvm->probe_queue;
break;
case NL80211_IFTYPE_P2P_DEVICE:
- queue = mvm->p2p_dev_queue;
+ queueptr = &mvm->p2p_dev_queue;
break;
default:
WARN(1, "Can't free bcast queue on vif type %d\n",
@@ -2193,7 +2198,8 @@ static void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm,
return;
}
- iwl_mvm_disable_txq(mvm, NULL, queue, IWL_MAX_TID_COUNT, 0);
+ queue = *queueptr;
+ iwl_mvm_disable_txq(mvm, NULL, queueptr, IWL_MAX_TID_COUNT, 0);
if (iwl_mvm_has_new_tx_api(mvm))
return;
@@ -2428,7 +2434,7 @@ int iwl_mvm_rm_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
iwl_mvm_flush_sta(mvm, &mvmvif->mcast_sta, true);
- iwl_mvm_disable_txq(mvm, NULL, mvmvif->cab_queue, 0, 0);
+ iwl_mvm_disable_txq(mvm, NULL, &mvmvif->cab_queue, 0, 0);
ret = iwl_mvm_rm_sta_common(mvm, mvmvif->mcast_sta.sta_id);
if (ret)
@@ -3190,6 +3196,20 @@ static struct iwl_mvm_sta *iwl_mvm_get_key_sta(struct iwl_mvm *mvm,
return NULL;
}
+static int iwl_mvm_pn_cmp(const u8 *pn1, const u8 *pn2, int len)
+{
+ int i;
+
+ for (i = len - 1; i >= 0; i--) {
+ if (pn1[i] > pn2[i])
+ return 1;
+ if (pn1[i] < pn2[i])
+ return -1;
+ }
+
+ return 0;
+}
+
static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
u32 sta_id,
struct ieee80211_key_conf *key, bool mcast,
@@ -3208,6 +3228,9 @@ static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
int i, size;
bool new_api = fw_has_api(&mvm->fw->ucode_capa,
IWL_UCODE_TLV_API_TKIP_MIC_KEYS);
+ int api_ver = iwl_fw_lookup_cmd_ver(mvm->fw, LONG_GROUP,
+ ADD_STA_KEY,
+ new_api ? 2 : 1);
if (sta_id == IWL_MVM_INVALID_STA)
return -EINVAL;
@@ -3220,7 +3243,7 @@ static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
switch (key->cipher) {
case WLAN_CIPHER_SUITE_TKIP:
key_flags |= cpu_to_le16(STA_KEY_FLG_TKIP);
- if (new_api) {
+ if (api_ver >= 2) {
memcpy((void *)&u.cmd.tx_mic_key,
&key->key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY],
IWL_MIC_KEY_SIZE);
@@ -3241,7 +3264,7 @@ static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
case WLAN_CIPHER_SUITE_CCMP:
key_flags |= cpu_to_le16(STA_KEY_FLG_CCM);
memcpy(u.cmd.common.key, key->key, key->keylen);
- if (new_api)
+ if (api_ver >= 2)
pn = atomic64_read(&key->tx_pn);
break;
case WLAN_CIPHER_SUITE_WEP104:
@@ -3257,7 +3280,7 @@ static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
case WLAN_CIPHER_SUITE_GCMP:
key_flags |= cpu_to_le16(STA_KEY_FLG_GCMP);
memcpy(u.cmd.common.key, key->key, key->keylen);
- if (new_api)
+ if (api_ver >= 2)
pn = atomic64_read(&key->tx_pn);
break;
default:
@@ -3274,7 +3297,46 @@ static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
u.cmd.common.key_flags = key_flags;
u.cmd.common.sta_id = sta_id;
- if (new_api) {
+ if (key->cipher == WLAN_CIPHER_SUITE_TKIP)
+ i = 0;
+ else
+ i = -1;
+
+ for (; i < IEEE80211_NUM_TIDS; i++) {
+ struct ieee80211_key_seq seq = {};
+ u8 _rx_pn[IEEE80211_MAX_PN_LEN] = {}, *rx_pn = _rx_pn;
+ int rx_pn_len = 8;
+ /* there's a hole at 2/3 in FW format depending on version */
+ int hole = api_ver >= 3 ? 0 : 2;
+
+ ieee80211_get_key_rx_seq(key, i, &seq);
+
+ if (key->cipher == WLAN_CIPHER_SUITE_TKIP) {
+ rx_pn[0] = seq.tkip.iv16;
+ rx_pn[1] = seq.tkip.iv16 >> 8;
+ rx_pn[2 + hole] = seq.tkip.iv32;
+ rx_pn[3 + hole] = seq.tkip.iv32 >> 8;
+ rx_pn[4 + hole] = seq.tkip.iv32 >> 16;
+ rx_pn[5 + hole] = seq.tkip.iv32 >> 24;
+ } else if (key_flags & cpu_to_le16(STA_KEY_FLG_EXT)) {
+ rx_pn = seq.hw.seq;
+ rx_pn_len = seq.hw.seq_len;
+ } else {
+ rx_pn[0] = seq.ccmp.pn[0];
+ rx_pn[1] = seq.ccmp.pn[1];
+ rx_pn[2 + hole] = seq.ccmp.pn[2];
+ rx_pn[3 + hole] = seq.ccmp.pn[3];
+ rx_pn[4 + hole] = seq.ccmp.pn[4];
+ rx_pn[5 + hole] = seq.ccmp.pn[5];
+ }
+
+ if (iwl_mvm_pn_cmp(rx_pn, (u8 *)&u.cmd.common.rx_secur_seq_cnt,
+ rx_pn_len) > 0)
+ memcpy(&u.cmd.common.rx_secur_seq_cnt, rx_pn,
+ rx_pn_len);
+ }
+
+ if (api_ver >= 2) {
u.cmd.transmit_seq_cnt = cpu_to_le64(pn);
size = sizeof(u.cmd);
} else {
@@ -3411,7 +3473,6 @@ static int __iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
u8 key_offset,
bool mcast)
{
- int ret;
const u8 *addr;
struct ieee80211_key_seq seq;
u16 p1k[5];
@@ -3433,30 +3494,19 @@ static int __iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
return -EINVAL;
}
- switch (keyconf->cipher) {
- case WLAN_CIPHER_SUITE_TKIP:
+ if (keyconf->cipher == WLAN_CIPHER_SUITE_TKIP) {
addr = iwl_mvm_get_mac_addr(mvm, vif, sta);
/* get phase 1 key from mac80211 */
ieee80211_get_key_rx_seq(keyconf, 0, &seq);
ieee80211_get_tkip_rx_p1k(keyconf, addr, seq.tkip.iv32, p1k);
- ret = iwl_mvm_send_sta_key(mvm, sta_id, keyconf, mcast,
- seq.tkip.iv32, p1k, 0, key_offset,
- mfp);
- break;
- case WLAN_CIPHER_SUITE_CCMP:
- case WLAN_CIPHER_SUITE_WEP40:
- case WLAN_CIPHER_SUITE_WEP104:
- case WLAN_CIPHER_SUITE_GCMP:
- case WLAN_CIPHER_SUITE_GCMP_256:
- ret = iwl_mvm_send_sta_key(mvm, sta_id, keyconf, mcast,
- 0, NULL, 0, key_offset, mfp);
- break;
- default:
- ret = iwl_mvm_send_sta_key(mvm, sta_id, keyconf, mcast,
- 0, NULL, 0, key_offset, mfp);
+
+ return iwl_mvm_send_sta_key(mvm, sta_id, keyconf, mcast,
+ seq.tkip.iv32, p1k, 0, key_offset,
+ mfp);
}
- return ret;
+ return iwl_mvm_send_sta_key(mvm, sta_id, keyconf, mcast,
+ 0, NULL, 0, key_offset, mfp);
}
int iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index d3307a11fcac..25af88a3edce 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -168,6 +168,16 @@ static bool iwl_mvm_te_check_disconnect(struct iwl_mvm *mvm,
rcu_read_unlock();
}
+ if (vif->bss_conf.assoc) {
+ /*
+ * When not associated, this will be called from
+ * iwl_mvm_event_mlme_callback_ini()
+ */
+ iwl_dbg_tlv_time_point(&mvm->fwrt,
+ IWL_FW_INI_TIME_POINT_ASSOC_FAILED,
+ NULL);
+ }
+
iwl_mvm_connection_loss(mvm, vif, errmsg);
return true;
}
@@ -246,6 +256,18 @@ static void iwl_mvm_te_check_trigger(struct iwl_mvm *mvm,
}
}
+static void iwl_mvm_p2p_roc_finished(struct iwl_mvm *mvm)
+{
+ /*
+ * If the IWL_MVM_STATUS_NEED_FLUSH_P2P is already set, then the
+ * roc_done_wk is already scheduled or running, so don't schedule it
+ * again to avoid a race where the roc_done_wk clears this bit after
+ * it is set here, affecting the next run of the roc_done_wk.
+ */
+ if (!test_and_set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status))
+ iwl_mvm_roc_finished(mvm);
+}
+
/*
* Handles a FW notification for an event that is known to the driver.
*
@@ -297,8 +319,7 @@ static void iwl_mvm_te_handle_notif(struct iwl_mvm *mvm,
switch (te_data->vif->type) {
case NL80211_IFTYPE_P2P_DEVICE:
ieee80211_remain_on_channel_expired(mvm->hw);
- set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
- iwl_mvm_roc_finished(mvm);
+ iwl_mvm_p2p_roc_finished(mvm);
break;
case NL80211_IFTYPE_STATION:
/*
@@ -674,8 +695,7 @@ static bool __iwl_mvm_remove_time_event(struct iwl_mvm *mvm,
/* Session protection is still ongoing. Cancel it */
iwl_mvm_cancel_session_protection(mvm, mvmvif, id);
if (iftype == NL80211_IFTYPE_P2P_DEVICE) {
- set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
- iwl_mvm_roc_finished(mvm);
+ iwl_mvm_p2p_roc_finished(mvm);
}
}
return false;
@@ -842,8 +862,7 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
/* End TE, notify mac80211 */
mvmvif->time_event_data.id = SESSION_PROTECT_CONF_MAX_ID;
ieee80211_remain_on_channel_expired(mvm->hw);
- set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
- iwl_mvm_roc_finished(mvm);
+ iwl_mvm_p2p_roc_finished(mvm);
} else if (le32_to_cpu(notif->start)) {
if (WARN_ON(mvmvif->time_event_data.id !=
le32_to_cpu(notif->conf_id)))
@@ -1004,14 +1023,13 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
if (vif->type == NL80211_IFTYPE_P2P_DEVICE) {
iwl_mvm_cancel_session_protection(mvm, mvmvif,
mvmvif->time_event_data.id);
- set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
+ iwl_mvm_p2p_roc_finished(mvm);
} else {
iwl_mvm_remove_aux_roc_te(mvm, mvmvif,
&mvmvif->time_event_data);
+ iwl_mvm_roc_finished(mvm);
}
- iwl_mvm_roc_finished(mvm);
-
return;
}
@@ -1025,12 +1043,11 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
if (te_data->vif->type == NL80211_IFTYPE_P2P_DEVICE) {
iwl_mvm_remove_time_event(mvm, mvmvif, te_data);
- set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
+ iwl_mvm_p2p_roc_finished(mvm);
} else {
iwl_mvm_remove_aux_roc_te(mvm, mvmvif, te_data);
+ iwl_mvm_roc_finished(mvm);
}
-
- iwl_mvm_roc_finished(mvm);
}
void iwl_mvm_remove_csa_period(struct iwl_mvm *mvm,
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 16baee3d52ae..8dc1b8eecb86 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -1093,29 +1093,97 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY,
IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY,
IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
- iwl_cfg_bz_a0_hr_b0, iwl_ax201_name),
+ iwl_cfg_bz_a0_hr_b0, iwl_bz_name),
_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY,
IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY,
IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
- iwl_cfg_bz_a0_gf_a0, iwl_ax211_name),
+ iwl_cfg_bz_a0_gf_a0, iwl_bz_name),
_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY,
IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY,
IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_CDB,
- iwl_cfg_bz_a0_gf4_a0, iwl_ax211_name),
+ iwl_cfg_bz_a0_gf4_a0, iwl_bz_name),
_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY,
IWL_CFG_RF_TYPE_MR, IWL_CFG_ANY,
IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
- iwl_cfg_bz_a0_mr_a0, iwl_ax211_name),
+ iwl_cfg_bz_a0_mr_a0, iwl_bz_name),
+
+/* SoF with JF2 */
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY,
+ IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF,
+ IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB,
+ iwlax210_2ax_cfg_so_jf_b0, iwl9560_160_name),
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY,
+ IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF,
+ IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB,
+ iwlax210_2ax_cfg_so_jf_b0, iwl9560_name),
+
+/* SoF with JF */
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY,
+ IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1,
+ IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB,
+ iwlax210_2ax_cfg_so_jf_b0, iwl9461_160_name),
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY,
+ IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV,
+ IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB,
+ iwlax210_2ax_cfg_so_jf_b0, iwl9462_160_name),
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY,
+ IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1,
+ IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB,
+ iwlax210_2ax_cfg_so_jf_b0, iwl9461_name),
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY,
+ IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV,
+ IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB,
+ iwlax210_2ax_cfg_so_jf_b0, iwl9462_name),
/* So with GF */
_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY,
IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY,
IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB,
- iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_name)
+ iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_name),
+
+/* So with JF2 */
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY,
+ IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF,
+ IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB,
+ iwlax210_2ax_cfg_so_jf_b0, iwl9560_160_name),
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY,
+ IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF,
+ IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB,
+ iwlax210_2ax_cfg_so_jf_b0, iwl9560_name),
+
+/* So with JF */
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY,
+ IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1,
+ IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB,
+ iwlax210_2ax_cfg_so_jf_b0, iwl9461_160_name),
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY,
+ IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV,
+ IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB,
+ iwlax210_2ax_cfg_so_jf_b0, iwl9462_160_name),
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY,
+ IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1,
+ IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB,
+ iwlax210_2ax_cfg_so_jf_b0, iwl9461_name),
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY,
+ IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV,
+ IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB,
+ iwlax210_2ax_cfg_so_jf_b0, iwl9462_name)
#endif /* CONFIG_IWLMVM */
};
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index cc550f6ef957..a43e56c7689f 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -42,6 +42,7 @@ struct iwl_host_cmd;
* struct iwl_rx_mem_buffer
* @page_dma: bus address of rxb page
* @page: driver's pointer to the rxb page
+ * @list: list entry for the membuffer
* @invalid: rxb is in driver ownership - not owned by HW
* @vid: index of this rxb in the global table
* @offset: indicates which offset of the page (in bytes)
@@ -50,10 +51,10 @@ struct iwl_host_cmd;
struct iwl_rx_mem_buffer {
dma_addr_t page_dma;
struct page *page;
- u16 vid;
- bool invalid;
struct list_head list;
u32 offset;
+ u16 vid;
+ bool invalid;
};
/**
@@ -253,6 +254,13 @@ struct cont_rec {
};
#endif
+enum iwl_pcie_fw_reset_state {
+ FW_RESET_IDLE,
+ FW_RESET_REQUESTED,
+ FW_RESET_OK,
+ FW_RESET_ERROR,
+};
+
/**
* struct iwl_trans_pcie - PCIe transport specific data
* @rxq: all the RX queue data
@@ -404,7 +412,7 @@ struct iwl_trans_pcie {
dma_addr_t base_rb_stts_dma;
bool fw_reset_handshake;
- bool fw_reset_done;
+ enum iwl_pcie_fw_reset_state fw_reset_state;
wait_queue_head_t fw_reset_waitq;
char rf_name[32];
@@ -670,19 +678,19 @@ static inline const char *queue_name(struct device *dev,
IWL_SHARED_IRQ_FIRST_RSS ? 1 : 0;
if (i == 0)
- return DRV_NAME ": shared IRQ";
+ return DRV_NAME ":shared_IRQ";
return devm_kasprintf(dev, GFP_KERNEL,
- DRV_NAME ": queue %d", i + vec);
+ DRV_NAME ":queue_%d", i + vec);
}
if (i == 0)
- return DRV_NAME ": default queue";
+ return DRV_NAME ":default_queue";
if (i == trans_p->alloc_vecs - 1)
- return DRV_NAME ": exception";
+ return DRV_NAME ":exception";
return devm_kasprintf(dev, GFP_KERNEL,
- DRV_NAME ": queue %d", i);
+ DRV_NAME ":queue_%d", i);
}
static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
index 4f6f4b2720f0..8e45eb38304b 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
@@ -487,6 +487,9 @@ void iwl_pcie_free_rbs_pool(struct iwl_trans *trans)
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
int i;
+ if (!trans_pcie->rx_pool)
+ return;
+
for (i = 0; i < RX_POOL_SIZE(trans_pcie->num_rx_bufs); i++) {
if (!trans_pcie->rx_pool[i].page)
continue;
@@ -1062,7 +1065,7 @@ static int _iwl_pcie_rx_init(struct iwl_trans *trans)
INIT_LIST_HEAD(&rba->rbd_empty);
spin_unlock_bh(&rba->lock);
- /* free all first - we might be reconfigured for a different size */
+ /* free all first - we overwrite everything here */
iwl_pcie_free_rbs_pool(trans);
for (i = 0; i < RX_QUEUE_SIZE; i++)
@@ -1653,7 +1656,7 @@ static void iwl_pcie_irq_handle_error(struct iwl_trans *trans)
/* The STATUS_FW_ERROR bit is set in this function. This must happen
* before we wake up the command caller, to ensure a proper cleanup. */
- iwl_trans_fw_error(trans);
+ iwl_trans_fw_error(trans, false);
clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
wake_up(&trans->wait_command_queue);
@@ -2225,7 +2228,13 @@ irqreturn_t iwl_pcie_irq_msix_handler(int irq, void *dev_id)
"Microcode SW error detected. Restarting 0x%X.\n",
inta_fh);
isr_stats->sw++;
- iwl_pcie_irq_handle_error(trans);
+ /* during FW reset flow report errors from there */
+ if (trans_pcie->fw_reset_state == FW_RESET_REQUESTED) {
+ trans_pcie->fw_reset_state = FW_RESET_ERROR;
+ wake_up(&trans_pcie->fw_reset_waitq);
+ } else {
+ iwl_pcie_irq_handle_error(trans);
+ }
}
/* After checking FH register check HW register */
@@ -2293,7 +2302,7 @@ irqreturn_t iwl_pcie_irq_msix_handler(int irq, void *dev_id)
if (inta_hw & MSIX_HW_INT_CAUSES_REG_RESET_DONE) {
IWL_DEBUG_ISR(trans, "Reset flow completed\n");
- trans_pcie->fw_reset_done = true;
+ trans_pcie->fw_reset_state = FW_RESET_OK;
wake_up(&trans_pcie->fw_reset_waitq);
}
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
index a34009357227..bf0c32a74ca4 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
@@ -87,7 +87,12 @@ static void iwl_pcie_gen2_apm_stop(struct iwl_trans *trans, bool op_mode_leave)
* Clear "initialization complete" bit to move adapter from
* D0A* (powered-up Active) --> D0U* (Uninitialized) state.
*/
- iwl_clear_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+ if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ)
+ iwl_clear_bit(trans, CSR_GP_CNTRL,
+ CSR_GP_CNTRL_REG_FLAG_MAC_INIT);
+ else
+ iwl_clear_bit(trans, CSR_GP_CNTRL,
+ CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
}
static void iwl_trans_pcie_fw_reset_handshake(struct iwl_trans *trans)
@@ -95,7 +100,7 @@ static void iwl_trans_pcie_fw_reset_handshake(struct iwl_trans *trans)
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
int ret;
- trans_pcie->fw_reset_done = false;
+ trans_pcie->fw_reset_state = FW_RESET_REQUESTED;
if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_AX210)
iwl_write_umac_prph(trans, UREG_NIC_SET_NMI_DRIVER,
@@ -106,10 +111,15 @@ static void iwl_trans_pcie_fw_reset_handshake(struct iwl_trans *trans)
/* wait 200ms */
ret = wait_event_timeout(trans_pcie->fw_reset_waitq,
- trans_pcie->fw_reset_done, FW_RESET_TIMEOUT);
- if (!ret)
+ trans_pcie->fw_reset_state != FW_RESET_REQUESTED,
+ FW_RESET_TIMEOUT);
+ if (!ret || trans_pcie->fw_reset_state == FW_RESET_ERROR) {
IWL_INFO(trans,
"firmware didn't ACK the reset - continue anyway\n");
+ iwl_trans_fw_error(trans, true);
+ }
+
+ trans_pcie->fw_reset_state = FW_RESET_IDLE;
}
void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans)
@@ -121,9 +131,21 @@ void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans)
if (trans_pcie->is_down)
return;
- if (trans_pcie->fw_reset_handshake &&
- trans->state >= IWL_TRANS_FW_STARTED)
- iwl_trans_pcie_fw_reset_handshake(trans);
+ if (trans->state >= IWL_TRANS_FW_STARTED) {
+ if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) {
+ iwl_set_bit(trans, CSR_GP_CNTRL,
+ CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_REQ);
+ iwl_poll_bit(trans, CSR_GP_CNTRL,
+ CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_STATUS,
+ CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_STATUS,
+ 5000);
+ msleep(100);
+ iwl_set_bit(trans, CSR_GP_CNTRL,
+ CSR_GP_CNTRL_REG_FLAG_SW_RESET);
+ } else if (trans_pcie->fw_reset_handshake) {
+ iwl_trans_pcie_fw_reset_handshake(trans);
+ }
+ }
trans_pcie->is_down = true;
@@ -154,9 +176,17 @@ void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans)
iwl_pcie_ctxt_info_free(trans);
/* Make sure (redundant) we've released our request to stay awake */
- iwl_clear_bit(trans, CSR_GP_CNTRL,
- CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+ if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ)
+ iwl_clear_bit(trans, CSR_GP_CNTRL,
+ CSR_GP_CNTRL_REG_FLAG_BZ_MAC_ACCESS_REQ);
+ else
+ iwl_clear_bit(trans, CSR_GP_CNTRL,
+ CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+ if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) {
+ iwl_set_bit(trans, CSR_GP_CNTRL,
+ CSR_GP_CNTRL_REG_FLAG_SW_RESET);
+ }
/* Stop the device, and put it in low power state */
iwl_pcie_gen2_apm_stop(trans, false);
@@ -436,7 +466,10 @@ int iwl_trans_pcie_gen2_start_fw(struct iwl_trans *trans,
iwl_pcie_set_ltr(trans);
- if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
+ if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ)
+ iwl_set_bit(trans, CSR_GP_CNTRL,
+ CSR_GP_CNTRL_REG_FLAG_ROM_START);
+ else if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
iwl_write_umac_prph(trans, UREG_CPU_INIT_RUN, 1);
else
iwl_write_prph(trans, UREG_CPU_INIT_RUN, 1);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index bee6b4574226..f252680f18e8 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -449,11 +449,23 @@ void iwl_pcie_apm_stop_master(struct iwl_trans *trans)
int ret;
/* stop device's busmaster DMA activity */
- iwl_set_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_STOP_MASTER);
- ret = iwl_poll_bit(trans, CSR_RESET,
- CSR_RESET_REG_FLAG_MASTER_DISABLED,
- CSR_RESET_REG_FLAG_MASTER_DISABLED, 100);
+ if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) {
+ iwl_set_bit(trans, CSR_GP_CNTRL,
+ CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_REQ);
+
+ ret = iwl_poll_bit(trans, CSR_GP_CNTRL,
+ CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_STATUS,
+ CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_STATUS,
+ 100);
+ } else {
+ iwl_set_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_STOP_MASTER);
+
+ ret = iwl_poll_bit(trans, CSR_RESET,
+ CSR_RESET_REG_FLAG_MASTER_DISABLED,
+ CSR_RESET_REG_FLAG_MASTER_DISABLED, 100);
+ }
+
if (ret < 0)
IWL_WARN(trans, "Master Disable Timed Out, 100 usec\n");
@@ -1866,6 +1878,9 @@ static void iwl_trans_pcie_configure(struct iwl_trans *trans,
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+ /* free all first - we might be reconfigured for a different size */
+ iwl_pcie_free_rbs_pool(trans);
+
trans->txqs.cmd.q_id = trans_cfg->cmd_queue;
trans->txqs.cmd.fifo = trans_cfg->cmd_fifo;
trans->txqs.cmd.wdg_timeout = trans_cfg->cmd_q_wdg_timeout;
@@ -1992,15 +2007,24 @@ bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans)
{
int ret;
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+ u32 write = CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ;
+ u32 mask = CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY |
+ CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP;
+ u32 poll = CSR_GP_CNTRL_REG_VAL_MAC_ACCESS_EN;
spin_lock(&trans_pcie->reg_lock);
if (trans_pcie->cmd_hold_nic_awake)
goto out;
+ if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) {
+ write = CSR_GP_CNTRL_REG_FLAG_BZ_MAC_ACCESS_REQ;
+ mask = CSR_GP_CNTRL_REG_FLAG_MAC_STATUS;
+ poll = CSR_GP_CNTRL_REG_FLAG_MAC_STATUS;
+ }
+
/* this bit wakes up the NIC */
- __iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL,
- CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+ __iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL, write);
if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_8000)
udelay(2);
@@ -2024,10 +2048,7 @@ bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans)
* 5000 series and later (including 1000 series) have non-volatile SRAM,
* and do not save/restore SRAM when power cycling.
*/
- ret = iwl_poll_bit(trans, CSR_GP_CNTRL,
- CSR_GP_CNTRL_REG_VAL_MAC_ACCESS_EN,
- (CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY |
- CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000);
+ ret = iwl_poll_bit(trans, CSR_GP_CNTRL, poll, mask, 15000);
if (unlikely(ret < 0)) {
u32 cntrl = iwl_read32(trans, CSR_GP_CNTRL);
@@ -2947,8 +2968,8 @@ static u32 iwl_trans_pcie_dump_rbs(struct iwl_trans *trans,
struct iwl_rx_mem_buffer *rxb = rxq->queue[i];
struct iwl_fw_error_dump_rb *rb;
- dma_unmap_page(trans->dev, rxb->page_dma, max_len,
- DMA_FROM_DEVICE);
+ dma_sync_single_for_cpu(trans->dev, rxb->page_dma,
+ max_len, DMA_FROM_DEVICE);
rb_len += sizeof(**data) + sizeof(*rb) + max_len;
@@ -2957,10 +2978,6 @@ static u32 iwl_trans_pcie_dump_rbs(struct iwl_trans *trans,
rb = (void *)(*data)->data;
rb->index = cpu_to_le32(i);
memcpy(rb->data, page_address(rxb->page), max_len);
- /* remap the page for the free benefit */
- rxb->page_dma = dma_map_page(trans->dev, rxb->page,
- rxb->offset, max_len,
- DMA_FROM_DEVICE);
*data = iwl_fw_error_next_data(*data);
}
@@ -3489,15 +3506,9 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
pci_set_master(pdev);
addr_size = trans->txqs.tfd.addr_size;
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(addr_size));
- if (!ret)
- ret = pci_set_consistent_dma_mask(pdev,
- DMA_BIT_MASK(addr_size));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_size));
if (ret) {
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
- if (!ret)
- ret = pci_set_consistent_dma_mask(pdev,
- DMA_BIT_MASK(32));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
/* both attempts failed: */
if (ret) {
dev_err(&pdev->dev, "No suitable DMA available\n");
diff --git a/drivers/net/wireless/intersil/Kconfig b/drivers/net/wireless/intersil/Kconfig
index c52d9b535623..bd6bf70ece03 100644
--- a/drivers/net/wireless/intersil/Kconfig
+++ b/drivers/net/wireless/intersil/Kconfig
@@ -16,24 +16,4 @@ source "drivers/net/wireless/intersil/hostap/Kconfig"
source "drivers/net/wireless/intersil/orinoco/Kconfig"
source "drivers/net/wireless/intersil/p54/Kconfig"
-config PRISM54
- tristate 'Intersil Prism GT/Duette/Indigo PCI/Cardbus (DEPRECATED)'
- depends on PCI
- select WIRELESS_EXT
- select WEXT_SPY
- select WEXT_PRIV
- select FW_LOADER
- help
- This enables support for FullMAC PCI/Cardbus prism54 devices. This
- driver is now deprecated in favor for the SoftMAC driver, p54pci.
- p54pci supports FullMAC PCI/Cardbus devices as well.
-
- For more information refer to the p54 wiki:
-
- http://wireless.wiki.kernel.org/en/users/Drivers/p54
-
- Note: You need a motherboard with DMA support to use any of these cards
-
- When built as module you get the module prism54
-
endif # WLAN_VENDOR_INTERSIL
diff --git a/drivers/net/wireless/intersil/Makefile b/drivers/net/wireless/intersil/Makefile
index aa630e9c3d3d..65281d1b3d85 100644
--- a/drivers/net/wireless/intersil/Makefile
+++ b/drivers/net/wireless/intersil/Makefile
@@ -2,4 +2,3 @@
obj-$(CONFIG_HOSTAP) += hostap/
obj-$(CONFIG_HERMES) += orinoco/
obj-$(CONFIG_P54_COMMON) += p54/
-obj-$(CONFIG_PRISM54) += prism54/
diff --git a/drivers/net/wireless/intersil/hostap/hostap.h b/drivers/net/wireless/intersil/hostap/hostap.h
index c4b81ff7d7e4..c17ab6dbbb53 100644
--- a/drivers/net/wireless/intersil/hostap/hostap.h
+++ b/drivers/net/wireless/intersil/hostap/hostap.h
@@ -93,6 +93,7 @@ extern const struct iw_handler_def hostap_iw_handler_def;
extern const struct ethtool_ops prism2_ethtool_ops;
int hostap_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
-
+int hostap_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd);
#endif /* HOSTAP_H */
diff --git a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
index 49766b285230..0a376f112db9 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
@@ -3941,7 +3941,8 @@ const struct iw_handler_def hostap_iw_handler_def =
.get_wireless_stats = hostap_get_wireless_stats,
};
-
+/* Private ioctls (iwpriv) that have not yet been converted
+ * into new wireless extensions API */
int hostap_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
struct iwreq *wrq = (struct iwreq *) ifr;
@@ -3953,9 +3954,6 @@ int hostap_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
local = iface->local;
switch (cmd) {
- /* Private ioctls (iwpriv) that have not yet been converted
- * into new wireless extensions API */
-
case PRISM2_IOCTL_INQUIRE:
if (!capable(CAP_NET_ADMIN)) ret = -EPERM;
else ret = prism2_ioctl_priv_inquire(dev, (int *) wrq->u.name);
@@ -4009,11 +4007,31 @@ int hostap_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
wrq->u.ap_addr.sa_data);
break;
#endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ return ret;
+}
+/* Private ioctls that are not used with iwpriv;
+ * in SIOCDEVPRIVATE range */
+int hostap_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
+{
+ struct iwreq *wrq = (struct iwreq *)ifr;
+ struct hostap_interface *iface;
+ local_info_t *local;
+ int ret = 0;
- /* Private ioctls that are not used with iwpriv;
- * in SIOCDEVPRIVATE range */
+ iface = netdev_priv(dev);
+ local = iface->local;
+
+ if (in_compat_syscall()) /* not implemented yet */
+ return -EOPNOTSUPP;
+ switch (cmd) {
#ifdef PRISM2_DOWNLOAD_SUPPORT
case PRISM2_IOCTL_DOWNLOAD:
if (!capable(CAP_NET_ADMIN)) ret = -EPERM;
diff --git a/drivers/net/wireless/intersil/hostap/hostap_main.c b/drivers/net/wireless/intersil/hostap/hostap_main.c
index de97b3304115..54f67b682b6a 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_main.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_main.c
@@ -797,6 +797,7 @@ static const struct net_device_ops hostap_netdev_ops = {
.ndo_open = prism2_open,
.ndo_stop = prism2_close,
.ndo_do_ioctl = hostap_ioctl,
+ .ndo_siocdevprivate = hostap_siocdevprivate,
.ndo_set_mac_address = prism2_set_mac_address,
.ndo_set_rx_mode = hostap_set_multicast_list,
.ndo_tx_timeout = prism2_tx_timeout,
@@ -809,6 +810,7 @@ static const struct net_device_ops hostap_mgmt_netdev_ops = {
.ndo_open = prism2_open,
.ndo_stop = prism2_close,
.ndo_do_ioctl = hostap_ioctl,
+ .ndo_siocdevprivate = hostap_siocdevprivate,
.ndo_set_mac_address = prism2_set_mac_address,
.ndo_set_rx_mode = hostap_set_multicast_list,
.ndo_tx_timeout = prism2_tx_timeout,
@@ -821,6 +823,7 @@ static const struct net_device_ops hostap_master_ops = {
.ndo_open = prism2_open,
.ndo_stop = prism2_close,
.ndo_do_ioctl = hostap_ioctl,
+ .ndo_siocdevprivate = hostap_siocdevprivate,
.ndo_set_mac_address = prism2_set_mac_address,
.ndo_set_rx_mode = hostap_set_multicast_list,
.ndo_tx_timeout = prism2_tx_timeout,
diff --git a/drivers/net/wireless/intersil/prism54/Makefile b/drivers/net/wireless/intersil/prism54/Makefile
deleted file mode 100644
index 4f5572dffb5e..000000000000
--- a/drivers/net/wireless/intersil/prism54/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-# $Id: Makefile.k26,v 1.7 2004/01/30 16:24:00 ajfa Exp $
-
-prism54-objs := islpci_eth.o islpci_mgt.o \
- isl_38xx.o isl_ioctl.o islpci_dev.o \
- islpci_hotplug.o oid_mgt.o
-
-obj-$(CONFIG_PRISM54) += prism54.o
-
diff --git a/drivers/net/wireless/intersil/prism54/isl_38xx.c b/drivers/net/wireless/intersil/prism54/isl_38xx.c
deleted file mode 100644
index ae964de347f7..000000000000
--- a/drivers/net/wireless/intersil/prism54/isl_38xx.c
+++ /dev/null
@@ -1,245 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2002 Intersil Americas Inc.
- * Copyright (C) 2003-2004 Luis R. Rodriguez <mcgrof@ruslug.rutgers.edu>_
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/delay.h>
-#include <linux/ktime.h>
-
-#include <linux/uaccess.h>
-#include <asm/io.h>
-
-#include "prismcompat.h"
-#include "isl_38xx.h"
-#include "islpci_dev.h"
-#include "islpci_mgt.h"
-
-/******************************************************************************
- Device Interface & Control functions
-******************************************************************************/
-
-/**
- * isl38xx_disable_interrupts - disable all interrupts
- * @device: pci memory base address
- *
- * Instructs the device to disable all interrupt reporting by asserting
- * the IRQ line. New events may still show up in the interrupt identification
- * register located at offset %ISL38XX_INT_IDENT_REG.
- */
-void
-isl38xx_disable_interrupts(void __iomem *device)
-{
- isl38xx_w32_flush(device, 0x00000000, ISL38XX_INT_EN_REG);
- udelay(ISL38XX_WRITEIO_DELAY);
-}
-
-void
-isl38xx_handle_sleep_request(isl38xx_control_block *control_block,
- int *powerstate, void __iomem *device_base)
-{
- /* device requests to go into sleep mode
- * check whether the transmit queues for data and management are empty */
- if (isl38xx_in_queue(control_block, ISL38XX_CB_TX_DATA_LQ))
- /* data tx queue not empty */
- return;
-
- if (isl38xx_in_queue(control_block, ISL38XX_CB_TX_MGMTQ))
- /* management tx queue not empty */
- return;
-
- /* check also whether received frames are pending */
- if (isl38xx_in_queue(control_block, ISL38XX_CB_RX_DATA_LQ))
- /* data rx queue not empty */
- return;
-
- if (isl38xx_in_queue(control_block, ISL38XX_CB_RX_MGMTQ))
- /* management rx queue not empty */
- return;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING, "Device going to sleep mode\n");
-#endif
-
- /* all queues are empty, allow the device to go into sleep mode */
- *powerstate = ISL38XX_PSM_POWERSAVE_STATE;
-
- /* assert the Sleep interrupt in the Device Interrupt Register */
- isl38xx_w32_flush(device_base, ISL38XX_DEV_INT_SLEEP,
- ISL38XX_DEV_INT_REG);
- udelay(ISL38XX_WRITEIO_DELAY);
-}
-
-void
-isl38xx_handle_wakeup(isl38xx_control_block *control_block,
- int *powerstate, void __iomem *device_base)
-{
- /* device is in active state, update the powerstate flag */
- *powerstate = ISL38XX_PSM_ACTIVE_STATE;
-
- /* now check whether there are frames pending for the card */
- if (!isl38xx_in_queue(control_block, ISL38XX_CB_TX_DATA_LQ)
- && !isl38xx_in_queue(control_block, ISL38XX_CB_TX_MGMTQ))
- return;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_ANYTHING, "Wake up handler trigger the device\n");
-#endif
-
- /* either data or management transmit queue has a frame pending
- * trigger the device by setting the Update bit in the Device Int reg */
- isl38xx_w32_flush(device_base, ISL38XX_DEV_INT_UPDATE,
- ISL38XX_DEV_INT_REG);
- udelay(ISL38XX_WRITEIO_DELAY);
-}
-
-void
-isl38xx_trigger_device(int asleep, void __iomem *device_base)
-{
- u32 reg;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- u32 counter = 0;
- struct timespec64 current_ts64;
- DEBUG(SHOW_FUNCTION_CALLS, "isl38xx trigger device\n");
-#endif
-
- /* check whether the device is in power save mode */
- if (asleep) {
- /* device is in powersave, trigger the device for wakeup */
-#if VERBOSE > SHOW_ERROR_MESSAGES
- ktime_get_real_ts64(&current_ts64);
- DEBUG(SHOW_TRACING, "%lld.%09ld Device wakeup triggered\n",
- (s64)current_ts64.tv_sec, current_ts64.tv_nsec);
-
- DEBUG(SHOW_TRACING, "%lld.%09ld Device register read %08x\n",
- (s64)current_ts64.tv_sec, current_ts64.tv_nsec,
- readl(device_base + ISL38XX_CTRL_STAT_REG));
-#endif
-
- reg = readl(device_base + ISL38XX_INT_IDENT_REG);
- if (reg == 0xabadface) {
-#if VERBOSE > SHOW_ERROR_MESSAGES
- ktime_get_real_ts64(&current_ts64);
- DEBUG(SHOW_TRACING,
- "%lld.%09ld Device register abadface\n",
- (s64)current_ts64.tv_sec, current_ts64.tv_nsec);
-#endif
- /* read the Device Status Register until Sleepmode bit is set */
- while (reg = readl(device_base + ISL38XX_CTRL_STAT_REG),
- (reg & ISL38XX_CTRL_STAT_SLEEPMODE) == 0) {
- udelay(ISL38XX_WRITEIO_DELAY);
-#if VERBOSE > SHOW_ERROR_MESSAGES
- counter++;
-#endif
- }
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING,
- "%lld.%09ld Device register read %08x\n",
- (s64)current_ts64.tv_sec, current_ts64.tv_nsec,
- readl(device_base + ISL38XX_CTRL_STAT_REG));
- ktime_get_real_ts64(&current_ts64);
- DEBUG(SHOW_TRACING,
- "%lld.%09ld Device asleep counter %i\n",
- (s64)current_ts64.tv_sec, current_ts64.tv_nsec,
- counter);
-#endif
- }
- /* assert the Wakeup interrupt in the Device Interrupt Register */
- isl38xx_w32_flush(device_base, ISL38XX_DEV_INT_WAKEUP,
- ISL38XX_DEV_INT_REG);
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- udelay(ISL38XX_WRITEIO_DELAY);
-
- /* perform another read on the Device Status Register */
- reg = readl(device_base + ISL38XX_CTRL_STAT_REG);
- ktime_get_real_ts64(&current_ts64);
- DEBUG(SHOW_TRACING, "%lld.%00ld Device register read %08x\n",
- (s64)current_ts64.tv_sec, current_ts64.tv_nsec, reg);
-#endif
- } else {
- /* device is (still) awake */
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING, "Device is in active state\n");
-#endif
- /* trigger the device by setting the Update bit in the Device Int reg */
-
- isl38xx_w32_flush(device_base, ISL38XX_DEV_INT_UPDATE,
- ISL38XX_DEV_INT_REG);
- }
-}
-
-void
-isl38xx_interface_reset(void __iomem *device_base, dma_addr_t host_address)
-{
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_FUNCTION_CALLS, "isl38xx_interface_reset\n");
-#endif
-
- /* load the address of the control block in the device */
- isl38xx_w32_flush(device_base, host_address, ISL38XX_CTRL_BLK_BASE_REG);
- udelay(ISL38XX_WRITEIO_DELAY);
-
- /* set the reset bit in the Device Interrupt Register */
- isl38xx_w32_flush(device_base, ISL38XX_DEV_INT_RESET, ISL38XX_DEV_INT_REG);
- udelay(ISL38XX_WRITEIO_DELAY);
-
- /* enable the interrupt for detecting initialization */
-
- /* Note: Do not enable other interrupts here. We want the
- * device to have come up first 100% before allowing any other
- * interrupts. */
- isl38xx_w32_flush(device_base, ISL38XX_INT_IDENT_INIT, ISL38XX_INT_EN_REG);
- udelay(ISL38XX_WRITEIO_DELAY); /* allow complete full reset */
-}
-
-void
-isl38xx_enable_common_interrupts(void __iomem *device_base)
-{
- u32 reg;
-
- reg = ISL38XX_INT_IDENT_UPDATE | ISL38XX_INT_IDENT_SLEEP |
- ISL38XX_INT_IDENT_WAKEUP;
- isl38xx_w32_flush(device_base, reg, ISL38XX_INT_EN_REG);
- udelay(ISL38XX_WRITEIO_DELAY);
-}
-
-int
-isl38xx_in_queue(isl38xx_control_block *cb, int queue)
-{
- const s32 delta = (le32_to_cpu(cb->driver_curr_frag[queue]) -
- le32_to_cpu(cb->device_curr_frag[queue]));
-
- /* determine the amount of fragments in the queue depending on the type
- * of the queue, either transmit or receive */
-
- BUG_ON(delta < 0); /* driver ptr must be ahead of device ptr */
-
- switch (queue) {
- /* send queues */
- case ISL38XX_CB_TX_MGMTQ:
- BUG_ON(delta > ISL38XX_CB_MGMT_QSIZE);
- fallthrough;
-
- case ISL38XX_CB_TX_DATA_LQ:
- case ISL38XX_CB_TX_DATA_HQ:
- BUG_ON(delta > ISL38XX_CB_TX_QSIZE);
- return delta;
-
- /* receive queues */
- case ISL38XX_CB_RX_MGMTQ:
- BUG_ON(delta > ISL38XX_CB_MGMT_QSIZE);
- return ISL38XX_CB_MGMT_QSIZE - delta;
-
- case ISL38XX_CB_RX_DATA_LQ:
- case ISL38XX_CB_RX_DATA_HQ:
- BUG_ON(delta > ISL38XX_CB_RX_QSIZE);
- return ISL38XX_CB_RX_QSIZE - delta;
- }
- BUG();
- return 0;
-}
diff --git a/drivers/net/wireless/intersil/prism54/isl_38xx.h b/drivers/net/wireless/intersil/prism54/isl_38xx.h
deleted file mode 100644
index 69218b8b2b23..000000000000
--- a/drivers/net/wireless/intersil/prism54/isl_38xx.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2002 Intersil Americas Inc.
- */
-
-#ifndef _ISL_38XX_H
-#define _ISL_38XX_H
-
-#include <asm/io.h>
-#include <asm/byteorder.h>
-
-#define ISL38XX_CB_RX_QSIZE 8
-#define ISL38XX_CB_TX_QSIZE 32
-
-/* ISL38XX Access Point Specific definitions */
-#define ISL38XX_MAX_WDS_LINKS 8
-
-/* ISL38xx Client Specific definitions */
-#define ISL38XX_PSM_ACTIVE_STATE 0
-#define ISL38XX_PSM_POWERSAVE_STATE 1
-
-/* ISL38XX Host Interface Definitions */
-#define ISL38XX_PCI_MEM_SIZE 0x02000
-#define ISL38XX_MEMORY_WINDOW_SIZE 0x01000
-#define ISL38XX_DEV_FIRMWARE_ADDRES 0x20000
-#define ISL38XX_WRITEIO_DELAY 10 /* in us */
-#define ISL38XX_RESET_DELAY 50 /* in ms */
-#define ISL38XX_WAIT_CYCLE 10 /* in 10ms */
-#define ISL38XX_MAX_WAIT_CYCLES 10
-
-/* PCI Memory Area */
-#define ISL38XX_HARDWARE_REG 0x0000
-#define ISL38XX_CARDBUS_CIS 0x0800
-#define ISL38XX_DIRECT_MEM_WIN 0x1000
-
-/* Hardware registers */
-#define ISL38XX_DEV_INT_REG 0x0000
-#define ISL38XX_INT_IDENT_REG 0x0010
-#define ISL38XX_INT_ACK_REG 0x0014
-#define ISL38XX_INT_EN_REG 0x0018
-#define ISL38XX_GEN_PURP_COM_REG_1 0x0020
-#define ISL38XX_GEN_PURP_COM_REG_2 0x0024
-#define ISL38XX_CTRL_BLK_BASE_REG ISL38XX_GEN_PURP_COM_REG_1
-#define ISL38XX_DIR_MEM_BASE_REG 0x0030
-#define ISL38XX_CTRL_STAT_REG 0x0078
-
-/* High end mobos queue up pci writes, the following
- * is used to "read" from after a write to force flush */
-#define ISL38XX_PCI_POSTING_FLUSH ISL38XX_INT_EN_REG
-
-/**
- * isl38xx_w32_flush - PCI iomem write helper
- * @base: (host) memory base address of the device
- * @val: 32bit value (host order) to write
- * @offset: byte offset into @base to write value to
- *
- * This helper takes care of writing a 32bit datum to the
- * specified offset into the device's pci memory space, and making sure
- * the pci memory buffers get flushed by performing one harmless read
- * from the %ISL38XX_PCI_POSTING_FLUSH offset.
- */
-static inline void
-isl38xx_w32_flush(void __iomem *base, u32 val, unsigned long offset)
-{
- writel(val, base + offset);
- (void) readl(base + ISL38XX_PCI_POSTING_FLUSH);
-}
-
-/* Device Interrupt register bits */
-#define ISL38XX_DEV_INT_RESET 0x0001
-#define ISL38XX_DEV_INT_UPDATE 0x0002
-#define ISL38XX_DEV_INT_WAKEUP 0x0008
-#define ISL38XX_DEV_INT_SLEEP 0x0010
-
-/* Interrupt Identification/Acknowledge/Enable register bits */
-#define ISL38XX_INT_IDENT_UPDATE 0x0002
-#define ISL38XX_INT_IDENT_INIT 0x0004
-#define ISL38XX_INT_IDENT_WAKEUP 0x0008
-#define ISL38XX_INT_IDENT_SLEEP 0x0010
-#define ISL38XX_INT_SOURCES 0x001E
-
-/* Control/Status register bits */
-/* Looks like there are other meaningful bits
- 0x20004400 seen in normal operation,
- 0x200044db at 'timeout waiting for mgmt response'
-*/
-#define ISL38XX_CTRL_STAT_SLEEPMODE 0x00000200
-#define ISL38XX_CTRL_STAT_CLKRUN 0x00800000
-#define ISL38XX_CTRL_STAT_RESET 0x10000000
-#define ISL38XX_CTRL_STAT_RAMBOOT 0x20000000
-#define ISL38XX_CTRL_STAT_STARTHALTED 0x40000000
-#define ISL38XX_CTRL_STAT_HOST_OVERRIDE 0x80000000
-
-/* Control Block definitions */
-#define ISL38XX_CB_RX_DATA_LQ 0
-#define ISL38XX_CB_TX_DATA_LQ 1
-#define ISL38XX_CB_RX_DATA_HQ 2
-#define ISL38XX_CB_TX_DATA_HQ 3
-#define ISL38XX_CB_RX_MGMTQ 4
-#define ISL38XX_CB_TX_MGMTQ 5
-#define ISL38XX_CB_QCOUNT 6
-#define ISL38XX_CB_MGMT_QSIZE 4
-#define ISL38XX_MIN_QTHRESHOLD 4 /* fragments */
-
-/* Memory Manager definitions */
-#define MGMT_FRAME_SIZE 1500 /* >= size struct obj_bsslist */
-#define MGMT_TX_FRAME_COUNT 24 /* max 4 + spare 4 + 8 init */
-#define MGMT_RX_FRAME_COUNT 24 /* 4*4 + spare 8 */
-#define MGMT_FRAME_COUNT (MGMT_TX_FRAME_COUNT + MGMT_RX_FRAME_COUNT)
-#define CONTROL_BLOCK_SIZE 1024 /* should be enough */
-#define PSM_FRAME_SIZE 1536
-#define PSM_MINIMAL_STATION_COUNT 64
-#define PSM_FRAME_COUNT PSM_MINIMAL_STATION_COUNT
-#define PSM_BUFFER_SIZE PSM_FRAME_SIZE * PSM_FRAME_COUNT
-#define MAX_TRAP_RX_QUEUE 4
-#define HOST_MEM_BLOCK CONTROL_BLOCK_SIZE + PSM_BUFFER_SIZE
-
-/* Fragment package definitions */
-#define FRAGMENT_FLAG_MF 0x0001
-#define MAX_FRAGMENT_SIZE 1536
-
-/* In monitor mode frames have a header. I don't know exactly how big those
- * frame can be but I've never seen any frame bigger than 1584... :
- */
-#define MAX_FRAGMENT_SIZE_RX 1600
-
-typedef struct {
- __le32 address; /* physical address on host */
- __le16 size; /* packet size */
- __le16 flags; /* set of bit-wise flags */
-} isl38xx_fragment;
-
-struct isl38xx_cb {
- __le32 driver_curr_frag[ISL38XX_CB_QCOUNT];
- __le32 device_curr_frag[ISL38XX_CB_QCOUNT];
- isl38xx_fragment rx_data_low[ISL38XX_CB_RX_QSIZE];
- isl38xx_fragment tx_data_low[ISL38XX_CB_TX_QSIZE];
- isl38xx_fragment rx_data_high[ISL38XX_CB_RX_QSIZE];
- isl38xx_fragment tx_data_high[ISL38XX_CB_TX_QSIZE];
- isl38xx_fragment rx_data_mgmt[ISL38XX_CB_MGMT_QSIZE];
- isl38xx_fragment tx_data_mgmt[ISL38XX_CB_MGMT_QSIZE];
-};
-
-typedef struct isl38xx_cb isl38xx_control_block;
-
-/* determine number of entries currently in queue */
-int isl38xx_in_queue(isl38xx_control_block *cb, int queue);
-
-void isl38xx_disable_interrupts(void __iomem *);
-void isl38xx_enable_common_interrupts(void __iomem *);
-
-void isl38xx_handle_sleep_request(isl38xx_control_block *, int *,
- void __iomem *);
-void isl38xx_handle_wakeup(isl38xx_control_block *, int *, void __iomem *);
-void isl38xx_trigger_device(int, void __iomem *);
-void isl38xx_interface_reset(void __iomem *, dma_addr_t);
-
-#endif /* _ISL_38XX_H */
diff --git a/drivers/net/wireless/intersil/prism54/isl_ioctl.c b/drivers/net/wireless/intersil/prism54/isl_ioctl.c
deleted file mode 100644
index 5e5ceafe098b..000000000000
--- a/drivers/net/wireless/intersil/prism54/isl_ioctl.c
+++ /dev/null
@@ -1,2909 +0,0 @@
-/*
- * Copyright (C) 2002 Intersil Americas Inc.
- * (C) 2003,2004 Aurelien Alleaume <slts@free.fr>
- * (C) 2003 Herbert Valerio Riedel <hvr@gnu.org>
- * (C) 2003 Luis R. Rodriguez <mcgrof@ruslug.rutgers.edu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#include <linux/capability.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/if_arp.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/etherdevice.h>
-
-#include <linux/uaccess.h>
-
-#include "prismcompat.h"
-#include "isl_ioctl.h"
-#include "islpci_mgt.h"
-#include "isl_oid.h" /* additional types and defs for isl38xx fw */
-#include "oid_mgt.h"
-
-#include <net/iw_handler.h> /* New driver API */
-
-#define KEY_SIZE_WEP104 13 /* 104/128-bit WEP keys */
-#define KEY_SIZE_WEP40 5 /* 40/64-bit WEP keys */
-/* KEY_SIZE_TKIP should match isl_oid.h, struct obj_key.key[] size */
-#define KEY_SIZE_TKIP 32 /* TKIP keys */
-
-static void prism54_wpa_bss_ie_add(islpci_private *priv, u8 *bssid,
- u8 *wpa_ie, size_t wpa_ie_len);
-static size_t prism54_wpa_bss_ie_get(islpci_private *priv, u8 *bssid, u8 *wpa_ie);
-static int prism54_set_wpa(struct net_device *, struct iw_request_info *,
- __u32 *, char *);
-
-/* In 500 kbps */
-static const unsigned char scan_rate_list[] = { 2, 4, 11, 22,
- 12, 18, 24, 36,
- 48, 72, 96, 108 };
-
-/**
- * prism54_mib_mode_helper - MIB change mode helper function
- * @priv: the &struct islpci_private object to modify
- * @iw_mode: new mode (%IW_MODE_*)
- *
- * This is a helper function, hence it does not lock. Make sure
- * caller deals with locking *if* necessary. This function sets the
- * mode-dependent mib values and does the mapping of the Linux
- * Wireless API modes to Device firmware modes. It also checks for
- * correct valid Linux wireless modes.
- */
-static int
-prism54_mib_mode_helper(islpci_private *priv, u32 iw_mode)
-{
- u32 config = INL_CONFIG_MANUALRUN;
- u32 mode, bsstype;
-
- /* For now, just catch early the Repeater and Secondary modes here */
- if (iw_mode == IW_MODE_REPEAT || iw_mode == IW_MODE_SECOND) {
- printk(KERN_DEBUG
- "%s(): Sorry, Repeater mode and Secondary mode "
- "are not yet supported by this driver.\n", __func__);
- return -EINVAL;
- }
-
- priv->iw_mode = iw_mode;
-
- switch (iw_mode) {
- case IW_MODE_AUTO:
- mode = INL_MODE_CLIENT;
- bsstype = DOT11_BSSTYPE_ANY;
- break;
- case IW_MODE_ADHOC:
- mode = INL_MODE_CLIENT;
- bsstype = DOT11_BSSTYPE_IBSS;
- break;
- case IW_MODE_INFRA:
- mode = INL_MODE_CLIENT;
- bsstype = DOT11_BSSTYPE_INFRA;
- break;
- case IW_MODE_MASTER:
- mode = INL_MODE_AP;
- bsstype = DOT11_BSSTYPE_INFRA;
- break;
- case IW_MODE_MONITOR:
- mode = INL_MODE_PROMISCUOUS;
- bsstype = DOT11_BSSTYPE_ANY;
- config |= INL_CONFIG_RXANNEX;
- break;
- default:
- return -EINVAL;
- }
-
- if (init_wds)
- config |= INL_CONFIG_WDS;
- mgt_set(priv, DOT11_OID_BSSTYPE, &bsstype);
- mgt_set(priv, OID_INL_CONFIG, &config);
- mgt_set(priv, OID_INL_MODE, &mode);
-
- return 0;
-}
-
-/*
- * prism54_mib_init - fill MIB cache with defaults
- *
- * this function initializes the struct given as @mib with defaults,
- * of which many are retrieved from the global module parameter
- * variables.
- */
-void
-prism54_mib_init(islpci_private *priv)
-{
- u32 channel, authen, wep, filter, dot1x, mlme, conformance, power, mode;
- struct obj_buffer psm_buffer = {
- .size = PSM_BUFFER_SIZE,
- .addr = priv->device_psm_buffer
- };
-
- channel = CARD_DEFAULT_CHANNEL;
- authen = CARD_DEFAULT_AUTHEN;
- wep = CARD_DEFAULT_WEP;
- filter = CARD_DEFAULT_FILTER; /* (0) Do not filter un-encrypted data */
- dot1x = CARD_DEFAULT_DOT1X;
- mlme = CARD_DEFAULT_MLME_MODE;
- conformance = CARD_DEFAULT_CONFORMANCE;
- power = 127;
- mode = CARD_DEFAULT_IW_MODE;
-
- mgt_set(priv, DOT11_OID_CHANNEL, &channel);
- mgt_set(priv, DOT11_OID_AUTHENABLE, &authen);
- mgt_set(priv, DOT11_OID_PRIVACYINVOKED, &wep);
- mgt_set(priv, DOT11_OID_PSMBUFFER, &psm_buffer);
- mgt_set(priv, DOT11_OID_EXUNENCRYPTED, &filter);
- mgt_set(priv, DOT11_OID_DOT1XENABLE, &dot1x);
- mgt_set(priv, DOT11_OID_MLMEAUTOLEVEL, &mlme);
- mgt_set(priv, OID_INL_DOT11D_CONFORMANCE, &conformance);
- mgt_set(priv, OID_INL_OUTPUTPOWER, &power);
-
- /* This sets all of the mode-dependent values */
- prism54_mib_mode_helper(priv, mode);
-}
-
-/* this will be executed outside of atomic context thanks to
- * schedule_work(), thus we can as well use sleeping semaphore
- * locking */
-void
-prism54_update_stats(struct work_struct *work)
-{
- islpci_private *priv = container_of(work, islpci_private, stats_work);
- char *data;
- struct obj_bss bss, *bss2;
- union oid_res_t r;
-
- mutex_lock(&priv->stats_lock);
-
-/* Noise floor.
- * I'm not sure if the unit is dBm.
- * Note : If we are not connected, this value seems to be irrelevant. */
-
- mgt_get_request(priv, DOT11_OID_NOISEFLOOR, 0, NULL, &r);
- priv->local_iwstatistics.qual.noise = r.u;
-
-/* Get the rssi of the link. To do this we need to retrieve a bss. */
-
- /* First get the MAC address of the AP we are associated with. */
- mgt_get_request(priv, DOT11_OID_BSSID, 0, NULL, &r);
- data = r.ptr;
-
- /* copy this MAC to the bss */
- memcpy(bss.address, data, ETH_ALEN);
- kfree(data);
-
- /* now ask for the corresponding bss */
- mgt_get_request(priv, DOT11_OID_BSSFIND, 0, (void *) &bss, &r);
- bss2 = r.ptr;
- /* report the rssi and use it to calculate
- * link quality through a signal-noise
- * ratio */
- priv->local_iwstatistics.qual.level = bss2->rssi;
- priv->local_iwstatistics.qual.qual =
- bss2->rssi - priv->iwstatistics.qual.noise;
-
- kfree(bss2);
-
- /* report that the stats are new */
- priv->local_iwstatistics.qual.updated = 0x7;
-
-/* Rx : unable to decrypt the MPDU */
- mgt_get_request(priv, DOT11_OID_PRIVRXFAILED, 0, NULL, &r);
- priv->local_iwstatistics.discard.code = r.u;
-
-/* Tx : Max MAC retries num reached */
- mgt_get_request(priv, DOT11_OID_MPDUTXFAILED, 0, NULL, &r);
- priv->local_iwstatistics.discard.retries = r.u;
-
- mutex_unlock(&priv->stats_lock);
-}
-
-struct iw_statistics *
-prism54_get_wireless_stats(struct net_device *ndev)
-{
- islpci_private *priv = netdev_priv(ndev);
-
- /* If the stats are being updated return old data */
- if (mutex_trylock(&priv->stats_lock)) {
- memcpy(&priv->iwstatistics, &priv->local_iwstatistics,
- sizeof (struct iw_statistics));
- /* They won't be marked updated for the next time */
- priv->local_iwstatistics.qual.updated = 0;
- mutex_unlock(&priv->stats_lock);
- } else
- priv->iwstatistics.qual.updated = 0;
-
- /* Update our wireless stats, but do not schedule to often
- * (max 1 HZ) */
- if ((priv->stats_timestamp == 0) ||
- time_after(jiffies, priv->stats_timestamp + 1 * HZ)) {
- schedule_work(&priv->stats_work);
- priv->stats_timestamp = jiffies;
- }
-
- return &priv->iwstatistics;
-}
-
-static int
-prism54_commit(struct net_device *ndev, struct iw_request_info *info,
- char *cwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
-
- /* simply re-set the last set SSID, this should commit most stuff */
-
- /* Commit in Monitor mode is not necessary, also setting essid
- * in Monitor mode does not make sense and isn't allowed for this
- * device's firmware */
- if (priv->iw_mode != IW_MODE_MONITOR)
- return mgt_set_request(priv, DOT11_OID_SSID, 0, NULL);
- return 0;
-}
-
-static int
-prism54_get_name(struct net_device *ndev, struct iw_request_info *info,
- char *cwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- char *capabilities;
- union oid_res_t r;
- int rvalue;
-
- if (islpci_get_state(priv) < PRV_STATE_INIT) {
- strncpy(cwrq, "NOT READY!", IFNAMSIZ);
- return 0;
- }
- rvalue = mgt_get_request(priv, OID_INL_PHYCAPABILITIES, 0, NULL, &r);
-
- switch (r.u) {
- case INL_PHYCAP_5000MHZ:
- capabilities = "IEEE 802.11a/b/g";
- break;
- case INL_PHYCAP_FAA:
- capabilities = "IEEE 802.11b/g - FAA Support";
- break;
- case INL_PHYCAP_2400MHZ:
- default:
- capabilities = "IEEE 802.11b/g"; /* Default */
- break;
- }
- strncpy(cwrq, capabilities, IFNAMSIZ);
- return rvalue;
-}
-
-static int
-prism54_set_freq(struct net_device *ndev, struct iw_request_info *info,
- struct iw_freq *fwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- int rvalue;
- u32 c;
-
- if (fwrq->m < 1000)
- /* we have a channel number */
- c = fwrq->m;
- else
- c = (fwrq->e == 1) ? channel_of_freq(fwrq->m / 100000) : 0;
-
- rvalue = c ? mgt_set_request(priv, DOT11_OID_CHANNEL, 0, &c) : -EINVAL;
-
- /* Call commit handler */
- return (rvalue ? rvalue : -EINPROGRESS);
-}
-
-static int
-prism54_get_freq(struct net_device *ndev, struct iw_request_info *info,
- struct iw_freq *fwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- union oid_res_t r;
- int rvalue;
-
- rvalue = mgt_get_request(priv, DOT11_OID_CHANNEL, 0, NULL, &r);
- fwrq->i = r.u;
- rvalue |= mgt_get_request(priv, DOT11_OID_FREQUENCY, 0, NULL, &r);
- fwrq->m = r.u;
- fwrq->e = 3;
-
- return rvalue;
-}
-
-static int
-prism54_set_mode(struct net_device *ndev, struct iw_request_info *info,
- __u32 * uwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- u32 mlmeautolevel = CARD_DEFAULT_MLME_MODE;
-
- /* Let's see if the user passed a valid Linux Wireless mode */
- if (*uwrq > IW_MODE_MONITOR || *uwrq < IW_MODE_AUTO) {
- printk(KERN_DEBUG
- "%s: %s() You passed a non-valid init_mode.\n",
- priv->ndev->name, __func__);
- return -EINVAL;
- }
-
- down_write(&priv->mib_sem);
-
- if (prism54_mib_mode_helper(priv, *uwrq)) {
- up_write(&priv->mib_sem);
- return -EOPNOTSUPP;
- }
-
- /* the ACL code needs an intermediate mlmeautolevel. The wpa stuff an
- * extended one.
- */
- if ((*uwrq == IW_MODE_MASTER) && (priv->acl.policy != MAC_POLICY_OPEN))
- mlmeautolevel = DOT11_MLME_INTERMEDIATE;
- if (priv->wpa)
- mlmeautolevel = DOT11_MLME_EXTENDED;
-
- mgt_set(priv, DOT11_OID_MLMEAUTOLEVEL, &mlmeautolevel);
-
- if (mgt_commit(priv)) {
- up_write(&priv->mib_sem);
- return -EIO;
- }
- priv->ndev->type = (priv->iw_mode == IW_MODE_MONITOR)
- ? priv->monitor_type : ARPHRD_ETHER;
- up_write(&priv->mib_sem);
-
- return 0;
-}
-
-/* Use mib cache */
-static int
-prism54_get_mode(struct net_device *ndev, struct iw_request_info *info,
- __u32 * uwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
-
- BUG_ON((priv->iw_mode < IW_MODE_AUTO) || (priv->iw_mode >
- IW_MODE_MONITOR));
- *uwrq = priv->iw_mode;
-
- return 0;
-}
-
-/* we use DOT11_OID_EDTHRESHOLD. From what I guess the card will not try to
- * emit data if (sensitivity > rssi - noise) (in dBm).
- * prism54_set_sens does not seem to work.
- */
-
-static int
-prism54_set_sens(struct net_device *ndev, struct iw_request_info *info,
- struct iw_param *vwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- u32 sens;
-
- /* by default the card sets this to 20. */
- sens = vwrq->disabled ? 20 : vwrq->value;
-
- return mgt_set_request(priv, DOT11_OID_EDTHRESHOLD, 0, &sens);
-}
-
-static int
-prism54_get_sens(struct net_device *ndev, struct iw_request_info *info,
- struct iw_param *vwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- union oid_res_t r;
- int rvalue;
-
- rvalue = mgt_get_request(priv, DOT11_OID_EDTHRESHOLD, 0, NULL, &r);
-
- vwrq->value = r.u;
- vwrq->disabled = (vwrq->value == 0);
- vwrq->fixed = 1;
-
- return rvalue;
-}
-
-static int
-prism54_get_range(struct net_device *ndev, struct iw_request_info *info,
- struct iw_point *dwrq, char *extra)
-{
- struct iw_range *range = (struct iw_range *) extra;
- islpci_private *priv = netdev_priv(ndev);
- u8 *data;
- int i, m, rvalue;
- struct obj_frequencies *freq;
- union oid_res_t r;
-
- memset(range, 0, sizeof (struct iw_range));
- dwrq->length = sizeof (struct iw_range);
-
- /* set the wireless extension version number */
- range->we_version_source = SUPPORTED_WIRELESS_EXT;
- range->we_version_compiled = WIRELESS_EXT;
-
- /* Now the encoding capabilities */
- range->num_encoding_sizes = 3;
- /* 64(40) bits WEP */
- range->encoding_size[0] = 5;
- /* 128(104) bits WEP */
- range->encoding_size[1] = 13;
- /* 256 bits for WPA-PSK */
- range->encoding_size[2] = 32;
- /* 4 keys are allowed */
- range->max_encoding_tokens = 4;
-
- /* we don't know the quality range... */
- range->max_qual.level = 0;
- range->max_qual.noise = 0;
- range->max_qual.qual = 0;
- /* these value describe an average quality. Needs more tweaking... */
- range->avg_qual.level = -80; /* -80 dBm */
- range->avg_qual.noise = 0; /* don't know what to put here */
- range->avg_qual.qual = 0;
-
- range->sensitivity = 200;
-
- /* retry limit capabilities */
- range->retry_capa = IW_RETRY_LIMIT | IW_RETRY_LIFETIME;
- range->retry_flags = IW_RETRY_LIMIT;
- range->r_time_flags = IW_RETRY_LIFETIME;
-
- /* I don't know the range. Put stupid things here */
- range->min_retry = 1;
- range->max_retry = 65535;
- range->min_r_time = 1024;
- range->max_r_time = 65535 * 1024;
-
- /* txpower is supported in dBm's */
- range->txpower_capa = IW_TXPOW_DBM;
-
- /* Event capability (kernel + driver) */
- range->event_capa[0] = (IW_EVENT_CAPA_K_0 |
- IW_EVENT_CAPA_MASK(SIOCGIWTHRSPY) |
- IW_EVENT_CAPA_MASK(SIOCGIWAP));
- range->event_capa[1] = IW_EVENT_CAPA_K_1;
- range->event_capa[4] = IW_EVENT_CAPA_MASK(IWEVCUSTOM);
-
- range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 |
- IW_ENC_CAPA_CIPHER_TKIP;
-
- if (islpci_get_state(priv) < PRV_STATE_INIT)
- return 0;
-
- /* Request the device for the supported frequencies
- * not really relevant since some devices will report the 5 GHz band
- * frequencies even if they don't support them.
- */
- rvalue =
- mgt_get_request(priv, DOT11_OID_SUPPORTEDFREQUENCIES, 0, NULL, &r);
- freq = r.ptr;
-
- range->num_channels = freq->nr;
- range->num_frequency = freq->nr;
-
- m = min(IW_MAX_FREQUENCIES, (int) freq->nr);
- for (i = 0; i < m; i++) {
- range->freq[i].m = freq->mhz[i];
- range->freq[i].e = 6;
- range->freq[i].i = channel_of_freq(freq->mhz[i]);
- }
- kfree(freq);
-
- rvalue |= mgt_get_request(priv, DOT11_OID_SUPPORTEDRATES, 0, NULL, &r);
- data = r.ptr;
-
- /* We got an array of char. It is NULL terminated. */
- i = 0;
- while ((i < IW_MAX_BITRATES) && (*data != 0)) {
- /* the result must be in bps. The card gives us 500Kbps */
- range->bitrate[i] = *data * 500000;
- i++;
- data++;
- }
- range->num_bitrates = i;
- kfree(r.ptr);
-
- return rvalue;
-}
-
-/* Set AP address*/
-
-static int
-prism54_set_wap(struct net_device *ndev, struct iw_request_info *info,
- struct sockaddr *awrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- char bssid[6];
- int rvalue;
-
- if (awrq->sa_family != ARPHRD_ETHER)
- return -EINVAL;
-
- /* prepare the structure for the set object */
- memcpy(&bssid[0], awrq->sa_data, ETH_ALEN);
-
- /* set the bssid -- does this make sense when in AP mode? */
- rvalue = mgt_set_request(priv, DOT11_OID_BSSID, 0, &bssid);
-
- return (rvalue ? rvalue : -EINPROGRESS); /* Call commit handler */
-}
-
-/* get AP address*/
-
-static int
-prism54_get_wap(struct net_device *ndev, struct iw_request_info *info,
- struct sockaddr *awrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- union oid_res_t r;
- int rvalue;
-
- rvalue = mgt_get_request(priv, DOT11_OID_BSSID, 0, NULL, &r);
- memcpy(awrq->sa_data, r.ptr, ETH_ALEN);
- awrq->sa_family = ARPHRD_ETHER;
- kfree(r.ptr);
-
- return rvalue;
-}
-
-static int
-prism54_set_scan(struct net_device *dev, struct iw_request_info *info,
- struct iw_param *vwrq, char *extra)
-{
- /* hehe the device does this automagicaly */
- return 0;
-}
-
-/* a little helper that will translate our data into a card independent
- * format that the Wireless Tools will understand. This was inspired by
- * the "Aironet driver for 4500 and 4800 series cards" (GPL)
- */
-
-static char *
-prism54_translate_bss(struct net_device *ndev, struct iw_request_info *info,
- char *current_ev, char *end_buf, struct obj_bss *bss,
- char noise)
-{
- struct iw_event iwe; /* Temporary buffer */
- short cap;
- islpci_private *priv = netdev_priv(ndev);
- u8 wpa_ie[MAX_WPA_IE_LEN];
- size_t wpa_ie_len;
-
- /* The first entry must be the MAC address */
- memcpy(iwe.u.ap_addr.sa_data, bss->address, ETH_ALEN);
- iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
- iwe.cmd = SIOCGIWAP;
- current_ev = iwe_stream_add_event(info, current_ev, end_buf,
- &iwe, IW_EV_ADDR_LEN);
-
- /* The following entries will be displayed in the same order we give them */
-
- /* The ESSID. */
- iwe.u.data.length = bss->ssid.length;
- iwe.u.data.flags = 1;
- iwe.cmd = SIOCGIWESSID;
- current_ev = iwe_stream_add_point(info, current_ev, end_buf,
- &iwe, bss->ssid.octets);
-
- /* Capabilities */
-#define CAP_ESS 0x01
-#define CAP_IBSS 0x02
-#define CAP_CRYPT 0x10
-
- /* Mode */
- cap = bss->capinfo;
- iwe.u.mode = 0;
- if (cap & CAP_ESS)
- iwe.u.mode = IW_MODE_MASTER;
- else if (cap & CAP_IBSS)
- iwe.u.mode = IW_MODE_ADHOC;
- iwe.cmd = SIOCGIWMODE;
- if (iwe.u.mode)
- current_ev = iwe_stream_add_event(info, current_ev, end_buf,
- &iwe, IW_EV_UINT_LEN);
-
- /* Encryption capability */
- if (cap & CAP_CRYPT)
- iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY;
- else
- iwe.u.data.flags = IW_ENCODE_DISABLED;
- iwe.u.data.length = 0;
- iwe.cmd = SIOCGIWENCODE;
- current_ev = iwe_stream_add_point(info, current_ev, end_buf,
- &iwe, NULL);
-
- /* Add frequency. (short) bss->channel is the frequency in MHz */
- iwe.u.freq.m = bss->channel;
- iwe.u.freq.e = 6;
- iwe.cmd = SIOCGIWFREQ;
- current_ev = iwe_stream_add_event(info, current_ev, end_buf,
- &iwe, IW_EV_FREQ_LEN);
-
- /* Add quality statistics */
- iwe.u.qual.level = bss->rssi;
- iwe.u.qual.noise = noise;
- /* do a simple SNR for quality */
- iwe.u.qual.qual = bss->rssi - noise;
- iwe.cmd = IWEVQUAL;
- current_ev = iwe_stream_add_event(info, current_ev, end_buf,
- &iwe, IW_EV_QUAL_LEN);
-
- /* Add WPA/RSN Information Element, if any */
- wpa_ie_len = prism54_wpa_bss_ie_get(priv, bss->address, wpa_ie);
- if (wpa_ie_len > 0) {
- iwe.cmd = IWEVGENIE;
- iwe.u.data.length = min_t(size_t, wpa_ie_len, MAX_WPA_IE_LEN);
- current_ev = iwe_stream_add_point(info, current_ev, end_buf,
- &iwe, wpa_ie);
- }
- /* Do the bitrates */
- {
- char *current_val = current_ev + iwe_stream_lcp_len(info);
- int i;
- int mask;
-
- iwe.cmd = SIOCGIWRATE;
- /* Those two flags are ignored... */
- iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0;
-
- /* Parse the bitmask */
- mask = 0x1;
- for(i = 0; i < sizeof(scan_rate_list); i++) {
- if(bss->rates & mask) {
- iwe.u.bitrate.value = (scan_rate_list[i] * 500000);
- current_val = iwe_stream_add_value(
- info, current_ev, current_val,
- end_buf, &iwe, IW_EV_PARAM_LEN);
- }
- mask <<= 1;
- }
- /* Check if we added any event */
- if ((current_val - current_ev) > iwe_stream_lcp_len(info))
- current_ev = current_val;
- }
-
- return current_ev;
-}
-
-static int
-prism54_get_scan(struct net_device *ndev, struct iw_request_info *info,
- struct iw_point *dwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- int i, rvalue;
- struct obj_bsslist *bsslist;
- u32 noise = 0;
- char *current_ev = extra;
- union oid_res_t r;
-
- if (islpci_get_state(priv) < PRV_STATE_INIT) {
- /* device is not ready, fail gently */
- dwrq->length = 0;
- return 0;
- }
-
- /* first get the noise value. We will use it to report the link quality */
- rvalue = mgt_get_request(priv, DOT11_OID_NOISEFLOOR, 0, NULL, &r);
- noise = r.u;
-
- /* Ask the device for a list of known bss.
- * The old API, using SIOCGIWAPLIST, had a hard limit of IW_MAX_AP=64.
- * The new API, using SIOCGIWSCAN, is only limited by the buffer size.
- * WE-14->WE-16, the buffer is limited to IW_SCAN_MAX_DATA bytes.
- * Starting with WE-17, the buffer can be as big as needed.
- * But the device won't repport anything if you change the value
- * of IWMAX_BSS=24. */
-
- rvalue |= mgt_get_request(priv, DOT11_OID_BSSLIST, 0, NULL, &r);
- bsslist = r.ptr;
-
- /* ok now, scan the list and translate its info */
- for (i = 0; i < (int) bsslist->nr; i++) {
- current_ev = prism54_translate_bss(ndev, info, current_ev,
- extra + dwrq->length,
- &(bsslist->bsslist[i]),
- noise);
-
- /* Check if there is space for one more entry */
- if((extra + dwrq->length - current_ev) <= IW_EV_ADDR_LEN) {
- /* Ask user space to try again with a bigger buffer */
- rvalue = -E2BIG;
- break;
- }
- }
-
- kfree(bsslist);
- dwrq->length = (current_ev - extra);
- dwrq->flags = 0; /* todo */
-
- return rvalue;
-}
-
-static int
-prism54_set_essid(struct net_device *ndev, struct iw_request_info *info,
- struct iw_point *dwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- struct obj_ssid essid;
-
- memset(essid.octets, 0, 33);
-
- /* Check if we were asked for `any' */
- if (dwrq->flags && dwrq->length) {
- if (dwrq->length > 32)
- return -E2BIG;
- essid.length = dwrq->length;
- memcpy(essid.octets, extra, dwrq->length);
- } else
- essid.length = 0;
-
- if (priv->iw_mode != IW_MODE_MONITOR)
- return mgt_set_request(priv, DOT11_OID_SSID, 0, &essid);
-
- /* If in monitor mode, just save to mib */
- mgt_set(priv, DOT11_OID_SSID, &essid);
- return 0;
-
-}
-
-static int
-prism54_get_essid(struct net_device *ndev, struct iw_request_info *info,
- struct iw_point *dwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- struct obj_ssid *essid;
- union oid_res_t r;
- int rvalue;
-
- rvalue = mgt_get_request(priv, DOT11_OID_SSID, 0, NULL, &r);
- essid = r.ptr;
-
- if (essid->length) {
- dwrq->flags = 1; /* set ESSID to ON for Wireless Extensions */
- /* if it is too big, trunk it */
- dwrq->length = min((u8)IW_ESSID_MAX_SIZE, essid->length);
- } else {
- dwrq->flags = 0;
- dwrq->length = 0;
- }
- essid->octets[dwrq->length] = '\0';
- memcpy(extra, essid->octets, dwrq->length);
- kfree(essid);
-
- return rvalue;
-}
-
-/* Provides no functionality, just completes the ioctl. In essence this is a
- * just a cosmetic ioctl.
- */
-static int
-prism54_set_nick(struct net_device *ndev, struct iw_request_info *info,
- struct iw_point *dwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
-
- if (dwrq->length > IW_ESSID_MAX_SIZE)
- return -E2BIG;
-
- down_write(&priv->mib_sem);
- memset(priv->nickname, 0, sizeof (priv->nickname));
- memcpy(priv->nickname, extra, dwrq->length);
- up_write(&priv->mib_sem);
-
- return 0;
-}
-
-static int
-prism54_get_nick(struct net_device *ndev, struct iw_request_info *info,
- struct iw_point *dwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
-
- dwrq->length = 0;
-
- down_read(&priv->mib_sem);
- dwrq->length = strlen(priv->nickname);
- memcpy(extra, priv->nickname, dwrq->length);
- up_read(&priv->mib_sem);
-
- return 0;
-}
-
-/* Set the allowed Bitrates */
-
-static int
-prism54_set_rate(struct net_device *ndev,
- struct iw_request_info *info,
- struct iw_param *vwrq, char *extra)
-{
-
- islpci_private *priv = netdev_priv(ndev);
- u32 rate, profile;
- char *data;
- int ret, i;
- union oid_res_t r;
-
- if (vwrq->value == -1) {
- /* auto mode. No limit. */
- profile = 1;
- return mgt_set_request(priv, DOT11_OID_PROFILES, 0, &profile);
- }
-
- ret = mgt_get_request(priv, DOT11_OID_SUPPORTEDRATES, 0, NULL, &r);
- if (ret) {
- kfree(r.ptr);
- return ret;
- }
-
- rate = (u32) (vwrq->value / 500000);
- data = r.ptr;
- i = 0;
-
- while (data[i]) {
- if (rate && (data[i] == rate)) {
- break;
- }
- if (vwrq->value == i) {
- break;
- }
- data[i] |= 0x80;
- i++;
- }
-
- if (!data[i]) {
- kfree(r.ptr);
- return -EINVAL;
- }
-
- data[i] |= 0x80;
- data[i + 1] = 0;
-
- /* Now, check if we want a fixed or auto value */
- if (vwrq->fixed) {
- data[0] = data[i];
- data[1] = 0;
- }
-
-/*
- i = 0;
- printk("prism54 rate: ");
- while(data[i]) {
- printk("%u ", data[i]);
- i++;
- }
- printk("0\n");
-*/
- profile = -1;
- ret = mgt_set_request(priv, DOT11_OID_PROFILES, 0, &profile);
- ret |= mgt_set_request(priv, DOT11_OID_EXTENDEDRATES, 0, data);
- ret |= mgt_set_request(priv, DOT11_OID_RATES, 0, data);
-
- kfree(r.ptr);
-
- return ret;
-}
-
-/* Get the current bit rate */
-static int
-prism54_get_rate(struct net_device *ndev,
- struct iw_request_info *info,
- struct iw_param *vwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- int rvalue;
- char *data;
- union oid_res_t r;
-
- /* Get the current bit rate */
- if ((rvalue = mgt_get_request(priv, GEN_OID_LINKSTATE, 0, NULL, &r)))
- return rvalue;
- vwrq->value = r.u * 500000;
-
- /* request the device for the enabled rates */
- rvalue = mgt_get_request(priv, DOT11_OID_RATES, 0, NULL, &r);
- if (rvalue) {
- kfree(r.ptr);
- return rvalue;
- }
- data = r.ptr;
- vwrq->fixed = (data[0] != 0) && (data[1] == 0);
- kfree(r.ptr);
-
- return 0;
-}
-
-static int
-prism54_set_rts(struct net_device *ndev, struct iw_request_info *info,
- struct iw_param *vwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
-
- return mgt_set_request(priv, DOT11_OID_RTSTHRESH, 0, &vwrq->value);
-}
-
-static int
-prism54_get_rts(struct net_device *ndev, struct iw_request_info *info,
- struct iw_param *vwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- union oid_res_t r;
- int rvalue;
-
- /* get the rts threshold */
- rvalue = mgt_get_request(priv, DOT11_OID_RTSTHRESH, 0, NULL, &r);
- vwrq->value = r.u;
-
- return rvalue;
-}
-
-static int
-prism54_set_frag(struct net_device *ndev, struct iw_request_info *info,
- struct iw_param *vwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
-
- return mgt_set_request(priv, DOT11_OID_FRAGTHRESH, 0, &vwrq->value);
-}
-
-static int
-prism54_get_frag(struct net_device *ndev, struct iw_request_info *info,
- struct iw_param *vwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- union oid_res_t r;
- int rvalue;
-
- rvalue = mgt_get_request(priv, DOT11_OID_FRAGTHRESH, 0, NULL, &r);
- vwrq->value = r.u;
-
- return rvalue;
-}
-
-/* Here we have (min,max) = max retries for (small frames, big frames). Where
- * big frame <=> bigger than the rts threshold
- * small frame <=> smaller than the rts threshold
- * This is not really the behavior expected by the wireless tool but it seems
- * to be a common behavior in other drivers.
- */
-
-static int
-prism54_set_retry(struct net_device *ndev, struct iw_request_info *info,
- struct iw_param *vwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- u32 slimit = 0, llimit = 0; /* short and long limit */
- u32 lifetime = 0;
- int rvalue = 0;
-
- if (vwrq->disabled)
- /* we cannot disable this feature */
- return -EINVAL;
-
- if (vwrq->flags & IW_RETRY_LIMIT) {
- if (vwrq->flags & IW_RETRY_SHORT)
- slimit = vwrq->value;
- else if (vwrq->flags & IW_RETRY_LONG)
- llimit = vwrq->value;
- else {
- /* we are asked to set both */
- slimit = vwrq->value;
- llimit = vwrq->value;
- }
- }
- if (vwrq->flags & IW_RETRY_LIFETIME)
- /* Wireless tools use us unit while the device uses 1024 us unit */
- lifetime = vwrq->value / 1024;
-
- /* now set what is requested */
- if (slimit)
- rvalue =
- mgt_set_request(priv, DOT11_OID_SHORTRETRIES, 0, &slimit);
- if (llimit)
- rvalue |=
- mgt_set_request(priv, DOT11_OID_LONGRETRIES, 0, &llimit);
- if (lifetime)
- rvalue |=
- mgt_set_request(priv, DOT11_OID_MAXTXLIFETIME, 0,
- &lifetime);
- return rvalue;
-}
-
-static int
-prism54_get_retry(struct net_device *ndev, struct iw_request_info *info,
- struct iw_param *vwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- union oid_res_t r;
- int rvalue = 0;
- vwrq->disabled = 0; /* It cannot be disabled */
-
- if ((vwrq->flags & IW_RETRY_TYPE) == IW_RETRY_LIFETIME) {
- /* we are asked for the life time */
- rvalue =
- mgt_get_request(priv, DOT11_OID_MAXTXLIFETIME, 0, NULL, &r);
- vwrq->value = r.u * 1024;
- vwrq->flags = IW_RETRY_LIFETIME;
- } else if ((vwrq->flags & IW_RETRY_LONG)) {
- /* we are asked for the long retry limit */
- rvalue |=
- mgt_get_request(priv, DOT11_OID_LONGRETRIES, 0, NULL, &r);
- vwrq->value = r.u;
- vwrq->flags = IW_RETRY_LIMIT | IW_RETRY_LONG;
- } else {
- /* default. get the short retry limit */
- rvalue |=
- mgt_get_request(priv, DOT11_OID_SHORTRETRIES, 0, NULL, &r);
- vwrq->value = r.u;
- vwrq->flags = IW_RETRY_LIMIT | IW_RETRY_SHORT;
- }
-
- return rvalue;
-}
-
-static int
-prism54_set_encode(struct net_device *ndev, struct iw_request_info *info,
- struct iw_point *dwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- int rvalue = 0, force = 0;
- int authen = DOT11_AUTH_OS, invoke = 0, exunencrypt = 0;
- union oid_res_t r;
-
- /* with the new API, it's impossible to get a NULL pointer.
- * New version of iwconfig set the IW_ENCODE_NOKEY flag
- * when no key is given, but older versions don't. */
-
- if (dwrq->length > 0) {
- /* we have a key to set */
- int index = (dwrq->flags & IW_ENCODE_INDEX) - 1;
- int current_index;
- struct obj_key key = { DOT11_PRIV_WEP, 0, "" };
-
- /* get the current key index */
- rvalue = mgt_get_request(priv, DOT11_OID_DEFKEYID, 0, NULL, &r);
- current_index = r.u;
- /* Verify that the key is not marked as invalid */
- if (!(dwrq->flags & IW_ENCODE_NOKEY)) {
- if (dwrq->length > KEY_SIZE_TKIP) {
- /* User-provided key data too big */
- return -EINVAL;
- }
- if (dwrq->length > KEY_SIZE_WEP104) {
- /* WPA-PSK TKIP */
- key.type = DOT11_PRIV_TKIP;
- key.length = KEY_SIZE_TKIP;
- } else if (dwrq->length > KEY_SIZE_WEP40) {
- /* WEP 104/128 */
- key.length = KEY_SIZE_WEP104;
- } else {
- /* WEP 40/64 */
- key.length = KEY_SIZE_WEP40;
- }
- memset(key.key, 0, sizeof (key.key));
- memcpy(key.key, extra, dwrq->length);
-
- if ((index < 0) || (index > 3))
- /* no index provided use the current one */
- index = current_index;
-
- /* now send the key to the card */
- rvalue |=
- mgt_set_request(priv, DOT11_OID_DEFKEYX, index,
- &key);
- }
- /*
- * If a valid key is set, encryption should be enabled
- * (user may turn it off later).
- * This is also how "iwconfig ethX key on" works
- */
- if ((index == current_index) && (key.length > 0))
- force = 1;
- } else {
- int index = (dwrq->flags & IW_ENCODE_INDEX) - 1;
- if ((index >= 0) && (index <= 3)) {
- /* we want to set the key index */
- rvalue |=
- mgt_set_request(priv, DOT11_OID_DEFKEYID, 0,
- &index);
- } else {
- if (!(dwrq->flags & IW_ENCODE_MODE)) {
- /* we cannot do anything. Complain. */
- return -EINVAL;
- }
- }
- }
- /* now read the flags */
- if (dwrq->flags & IW_ENCODE_DISABLED) {
- /* Encoding disabled,
- * authen = DOT11_AUTH_OS;
- * invoke = 0;
- * exunencrypt = 0; */
- }
- if (dwrq->flags & IW_ENCODE_OPEN)
- /* Encode but accept non-encoded packets. No auth */
- invoke = 1;
- if ((dwrq->flags & IW_ENCODE_RESTRICTED) || force) {
- /* Refuse non-encoded packets. Auth */
- authen = DOT11_AUTH_BOTH;
- invoke = 1;
- exunencrypt = 1;
- }
- /* do the change if requested */
- if ((dwrq->flags & IW_ENCODE_MODE) || force) {
- rvalue |=
- mgt_set_request(priv, DOT11_OID_AUTHENABLE, 0, &authen);
- rvalue |=
- mgt_set_request(priv, DOT11_OID_PRIVACYINVOKED, 0, &invoke);
- rvalue |=
- mgt_set_request(priv, DOT11_OID_EXUNENCRYPTED, 0,
- &exunencrypt);
- }
- return rvalue;
-}
-
-static int
-prism54_get_encode(struct net_device *ndev, struct iw_request_info *info,
- struct iw_point *dwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- struct obj_key *key;
- u32 devindex, index = (dwrq->flags & IW_ENCODE_INDEX) - 1;
- u32 authen = 0, invoke = 0, exunencrypt = 0;
- int rvalue;
- union oid_res_t r;
-
- /* first get the flags */
- rvalue = mgt_get_request(priv, DOT11_OID_AUTHENABLE, 0, NULL, &r);
- authen = r.u;
- rvalue |= mgt_get_request(priv, DOT11_OID_PRIVACYINVOKED, 0, NULL, &r);
- invoke = r.u;
- rvalue |= mgt_get_request(priv, DOT11_OID_EXUNENCRYPTED, 0, NULL, &r);
- exunencrypt = r.u;
-
- if (invoke && (authen == DOT11_AUTH_BOTH) && exunencrypt)
- dwrq->flags = IW_ENCODE_RESTRICTED;
- else if ((authen == DOT11_AUTH_OS) && !exunencrypt) {
- if (invoke)
- dwrq->flags = IW_ENCODE_OPEN;
- else
- dwrq->flags = IW_ENCODE_DISABLED;
- } else
- /* The card should not work in this state */
- dwrq->flags = 0;
-
- /* get the current device key index */
- rvalue |= mgt_get_request(priv, DOT11_OID_DEFKEYID, 0, NULL, &r);
- devindex = r.u;
- /* Now get the key, return it */
- if (index == -1 || index > 3)
- /* no index provided, use the current one */
- index = devindex;
- rvalue |= mgt_get_request(priv, DOT11_OID_DEFKEYX, index, NULL, &r);
- key = r.ptr;
- dwrq->length = key->length;
- memcpy(extra, key->key, dwrq->length);
- kfree(key);
- /* return the used key index */
- dwrq->flags |= devindex + 1;
-
- return rvalue;
-}
-
-static int
-prism54_get_txpower(struct net_device *ndev, struct iw_request_info *info,
- struct iw_param *vwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- union oid_res_t r;
- int rvalue;
-
- rvalue = mgt_get_request(priv, OID_INL_OUTPUTPOWER, 0, NULL, &r);
- /* intersil firmware operates in 0.25 dBm (1/4 dBm) */
- vwrq->value = (s32) r.u / 4;
- vwrq->fixed = 1;
- /* radio is not turned of
- * btw: how is possible to turn off only the radio
- */
- vwrq->disabled = 0;
-
- return rvalue;
-}
-
-static int
-prism54_set_txpower(struct net_device *ndev, struct iw_request_info *info,
- struct iw_param *vwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- s32 u = vwrq->value;
-
- /* intersil firmware operates in 0.25 dBm (1/4) */
- u *= 4;
- if (vwrq->disabled) {
- /* don't know how to disable radio */
- printk(KERN_DEBUG
- "%s: %s() disabling radio is not yet supported.\n",
- priv->ndev->name, __func__);
- return -ENOTSUPP;
- } else if (vwrq->fixed)
- /* currently only fixed value is supported */
- return mgt_set_request(priv, OID_INL_OUTPUTPOWER, 0, &u);
- else {
- printk(KERN_DEBUG
- "%s: %s() auto power will be implemented later.\n",
- priv->ndev->name, __func__);
- return -ENOTSUPP;
- }
-}
-
-static int prism54_set_genie(struct net_device *ndev,
- struct iw_request_info *info,
- struct iw_point *data, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- int alen, ret = 0;
- struct obj_attachment *attach;
-
- if (data->length > MAX_WPA_IE_LEN ||
- (data->length && extra == NULL))
- return -EINVAL;
-
- memcpy(priv->wpa_ie, extra, data->length);
- priv->wpa_ie_len = data->length;
-
- alen = sizeof(*attach) + priv->wpa_ie_len;
- attach = kzalloc(alen, GFP_KERNEL);
- if (attach == NULL)
- return -ENOMEM;
-
-#define WLAN_FC_TYPE_MGMT 0
-#define WLAN_FC_STYPE_ASSOC_REQ 0
-#define WLAN_FC_STYPE_REASSOC_REQ 2
-
- /* Note: endianness is covered by mgt_set_varlen */
- attach->type = (WLAN_FC_TYPE_MGMT << 2) |
- (WLAN_FC_STYPE_ASSOC_REQ << 4);
- attach->id = -1;
- attach->size = priv->wpa_ie_len;
- memcpy(attach->data, extra, priv->wpa_ie_len);
-
- ret = mgt_set_varlen(priv, DOT11_OID_ATTACHMENT, attach,
- priv->wpa_ie_len);
- if (ret == 0) {
- attach->type = (WLAN_FC_TYPE_MGMT << 2) |
- (WLAN_FC_STYPE_REASSOC_REQ << 4);
-
- ret = mgt_set_varlen(priv, DOT11_OID_ATTACHMENT, attach,
- priv->wpa_ie_len);
- if (ret == 0)
- printk(KERN_DEBUG "%s: WPA IE Attachment was set\n",
- ndev->name);
- }
-
- kfree(attach);
- return ret;
-}
-
-
-static int prism54_get_genie(struct net_device *ndev,
- struct iw_request_info *info,
- struct iw_point *data, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- int len = priv->wpa_ie_len;
-
- if (len <= 0) {
- data->length = 0;
- return 0;
- }
-
- if (data->length < len)
- return -E2BIG;
-
- data->length = len;
- memcpy(extra, priv->wpa_ie, len);
-
- return 0;
-}
-
-static int prism54_set_auth(struct net_device *ndev,
- struct iw_request_info *info,
- union iwreq_data *wrqu, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- struct iw_param *param = &wrqu->param;
- u32 mlmelevel = 0, authen = 0, dot1x = 0;
- u32 exunencrypt = 0, privinvoked = 0, wpa = 0;
- u32 old_wpa;
- int ret = 0;
- union oid_res_t r;
-
- if (islpci_get_state(priv) < PRV_STATE_INIT)
- return 0;
-
- /* first get the flags */
- down_write(&priv->mib_sem);
- wpa = old_wpa = priv->wpa;
- up_write(&priv->mib_sem);
- ret = mgt_get_request(priv, DOT11_OID_AUTHENABLE, 0, NULL, &r);
- authen = r.u;
- ret = mgt_get_request(priv, DOT11_OID_PRIVACYINVOKED, 0, NULL, &r);
- privinvoked = r.u;
- ret = mgt_get_request(priv, DOT11_OID_EXUNENCRYPTED, 0, NULL, &r);
- exunencrypt = r.u;
- ret = mgt_get_request(priv, DOT11_OID_DOT1XENABLE, 0, NULL, &r);
- dot1x = r.u;
- ret = mgt_get_request(priv, DOT11_OID_MLMEAUTOLEVEL, 0, NULL, &r);
- mlmelevel = r.u;
-
- if (ret < 0)
- goto out;
-
- switch (param->flags & IW_AUTH_INDEX) {
- case IW_AUTH_CIPHER_PAIRWISE:
- case IW_AUTH_CIPHER_GROUP:
- case IW_AUTH_KEY_MGMT:
- break;
-
- case IW_AUTH_WPA_ENABLED:
- /* Do the same thing as IW_AUTH_WPA_VERSION */
- if (param->value) {
- wpa = 1;
- privinvoked = 1; /* For privacy invoked */
- exunencrypt = 1; /* Filter out all unencrypted frames */
- dot1x = 0x01; /* To enable eap filter */
- mlmelevel = DOT11_MLME_EXTENDED;
- authen = DOT11_AUTH_OS; /* Only WEP uses _SK and _BOTH */
- } else {
- wpa = 0;
- privinvoked = 0;
- exunencrypt = 0; /* Do not filter un-encrypted data */
- dot1x = 0;
- mlmelevel = DOT11_MLME_AUTO;
- }
- break;
-
- case IW_AUTH_WPA_VERSION:
- if (param->value & IW_AUTH_WPA_VERSION_DISABLED) {
- wpa = 0;
- privinvoked = 0;
- exunencrypt = 0; /* Do not filter un-encrypted data */
- dot1x = 0;
- mlmelevel = DOT11_MLME_AUTO;
- } else {
- if (param->value & IW_AUTH_WPA_VERSION_WPA)
- wpa = 1;
- else if (param->value & IW_AUTH_WPA_VERSION_WPA2)
- wpa = 2;
- privinvoked = 1; /* For privacy invoked */
- exunencrypt = 1; /* Filter out all unencrypted frames */
- dot1x = 0x01; /* To enable eap filter */
- mlmelevel = DOT11_MLME_EXTENDED;
- authen = DOT11_AUTH_OS; /* Only WEP uses _SK and _BOTH */
- }
- break;
-
- case IW_AUTH_RX_UNENCRYPTED_EAPOL:
- /* dot1x should be the opposite of RX_UNENCRYPTED_EAPOL;
- * turn off dot1x when allowing receipt of unencrypted EAPOL
- * frames, turn on dot1x when receipt should be disallowed
- */
- dot1x = param->value ? 0 : 0x01;
- break;
-
- case IW_AUTH_PRIVACY_INVOKED:
- privinvoked = param->value ? 1 : 0;
- break;
-
- case IW_AUTH_DROP_UNENCRYPTED:
- exunencrypt = param->value ? 1 : 0;
- break;
-
- case IW_AUTH_80211_AUTH_ALG:
- if (param->value & IW_AUTH_ALG_SHARED_KEY) {
- /* Only WEP uses _SK and _BOTH */
- if (wpa > 0) {
- ret = -EINVAL;
- goto out;
- }
- authen = DOT11_AUTH_SK;
- } else if (param->value & IW_AUTH_ALG_OPEN_SYSTEM) {
- authen = DOT11_AUTH_OS;
- } else {
- ret = -EINVAL;
- goto out;
- }
- break;
-
- default:
- return -EOPNOTSUPP;
- }
-
- /* Set all the values */
- down_write(&priv->mib_sem);
- priv->wpa = wpa;
- up_write(&priv->mib_sem);
- mgt_set_request(priv, DOT11_OID_AUTHENABLE, 0, &authen);
- mgt_set_request(priv, DOT11_OID_PRIVACYINVOKED, 0, &privinvoked);
- mgt_set_request(priv, DOT11_OID_EXUNENCRYPTED, 0, &exunencrypt);
- mgt_set_request(priv, DOT11_OID_DOT1XENABLE, 0, &dot1x);
- mgt_set_request(priv, DOT11_OID_MLMEAUTOLEVEL, 0, &mlmelevel);
-
-out:
- return ret;
-}
-
-static int prism54_get_auth(struct net_device *ndev,
- struct iw_request_info *info,
- union iwreq_data *wrqu, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- struct iw_param *param = &wrqu->param;
- u32 wpa = 0;
- int ret = 0;
- union oid_res_t r;
-
- if (islpci_get_state(priv) < PRV_STATE_INIT)
- return 0;
-
- /* first get the flags */
- down_write(&priv->mib_sem);
- wpa = priv->wpa;
- up_write(&priv->mib_sem);
-
- switch (param->flags & IW_AUTH_INDEX) {
- case IW_AUTH_CIPHER_PAIRWISE:
- case IW_AUTH_CIPHER_GROUP:
- case IW_AUTH_KEY_MGMT:
- /*
- * wpa_supplicant will control these internally
- */
- ret = -EOPNOTSUPP;
- break;
-
- case IW_AUTH_WPA_VERSION:
- switch (wpa) {
- case 1:
- param->value = IW_AUTH_WPA_VERSION_WPA;
- break;
- case 2:
- param->value = IW_AUTH_WPA_VERSION_WPA2;
- break;
- case 0:
- default:
- param->value = IW_AUTH_WPA_VERSION_DISABLED;
- break;
- }
- break;
-
- case IW_AUTH_DROP_UNENCRYPTED:
- ret = mgt_get_request(priv, DOT11_OID_EXUNENCRYPTED, 0, NULL, &r);
- if (ret >= 0)
- param->value = r.u > 0 ? 1 : 0;
- break;
-
- case IW_AUTH_80211_AUTH_ALG:
- ret = mgt_get_request(priv, DOT11_OID_AUTHENABLE, 0, NULL, &r);
- if (ret >= 0) {
- switch (r.u) {
- case DOT11_AUTH_OS:
- param->value = IW_AUTH_ALG_OPEN_SYSTEM;
- break;
- case DOT11_AUTH_BOTH:
- case DOT11_AUTH_SK:
- param->value = IW_AUTH_ALG_SHARED_KEY;
- break;
- case DOT11_AUTH_NONE:
- default:
- param->value = 0;
- break;
- }
- }
- break;
-
- case IW_AUTH_WPA_ENABLED:
- param->value = wpa > 0 ? 1 : 0;
- break;
-
- case IW_AUTH_RX_UNENCRYPTED_EAPOL:
- ret = mgt_get_request(priv, DOT11_OID_DOT1XENABLE, 0, NULL, &r);
- if (ret >= 0)
- param->value = r.u > 0 ? 1 : 0;
- break;
-
- case IW_AUTH_PRIVACY_INVOKED:
- ret = mgt_get_request(priv, DOT11_OID_PRIVACYINVOKED, 0, NULL, &r);
- if (ret >= 0)
- param->value = r.u > 0 ? 1 : 0;
- break;
-
- default:
- return -EOPNOTSUPP;
- }
- return ret;
-}
-
-static int prism54_set_encodeext(struct net_device *ndev,
- struct iw_request_info *info,
- union iwreq_data *wrqu,
- char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- struct iw_point *encoding = &wrqu->encoding;
- struct iw_encode_ext *ext = (struct iw_encode_ext *)extra;
- int idx, alg = ext->alg, set_key = 1;
- union oid_res_t r;
- int authen = DOT11_AUTH_OS, invoke = 0, exunencrypt = 0;
- int ret = 0;
-
- if (islpci_get_state(priv) < PRV_STATE_INIT)
- return 0;
-
- /* Determine and validate the key index */
- idx = (encoding->flags & IW_ENCODE_INDEX) - 1;
- if (idx) {
- if (idx < 0 || idx > 3)
- return -EINVAL;
- } else {
- ret = mgt_get_request(priv, DOT11_OID_DEFKEYID, 0, NULL, &r);
- if (ret < 0)
- goto out;
- idx = r.u;
- }
-
- if (encoding->flags & IW_ENCODE_DISABLED)
- alg = IW_ENCODE_ALG_NONE;
-
- if (ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY) {
- /* Only set transmit key index here, actual
- * key is set below if needed.
- */
- ret = mgt_set_request(priv, DOT11_OID_DEFKEYID, 0, &idx);
- set_key = ext->key_len > 0 ? 1 : 0;
- }
-
- if (set_key) {
- struct obj_key key = { DOT11_PRIV_WEP, 0, "" };
- switch (alg) {
- case IW_ENCODE_ALG_NONE:
- break;
- case IW_ENCODE_ALG_WEP:
- if (ext->key_len > KEY_SIZE_WEP104) {
- ret = -EINVAL;
- goto out;
- }
- if (ext->key_len > KEY_SIZE_WEP40)
- key.length = KEY_SIZE_WEP104;
- else
- key.length = KEY_SIZE_WEP40;
- break;
- case IW_ENCODE_ALG_TKIP:
- if (ext->key_len > KEY_SIZE_TKIP) {
- ret = -EINVAL;
- goto out;
- }
- key.type = DOT11_PRIV_TKIP;
- key.length = KEY_SIZE_TKIP;
- break;
- default:
- return -EINVAL;
- }
-
- if (key.length) {
- memset(key.key, 0, sizeof(key.key));
- memcpy(key.key, ext->key, ext->key_len);
- ret = mgt_set_request(priv, DOT11_OID_DEFKEYX, idx,
- &key);
- if (ret < 0)
- goto out;
- }
- }
-
- /* Read the flags */
- if (encoding->flags & IW_ENCODE_DISABLED) {
- /* Encoding disabled,
- * authen = DOT11_AUTH_OS;
- * invoke = 0;
- * exunencrypt = 0; */
- }
- if (encoding->flags & IW_ENCODE_OPEN) {
- /* Encode but accept non-encoded packets. No auth */
- invoke = 1;
- }
- if (encoding->flags & IW_ENCODE_RESTRICTED) {
- /* Refuse non-encoded packets. Auth */
- authen = DOT11_AUTH_BOTH;
- invoke = 1;
- exunencrypt = 1;
- }
-
- /* do the change if requested */
- if (encoding->flags & IW_ENCODE_MODE) {
- ret = mgt_set_request(priv, DOT11_OID_AUTHENABLE, 0,
- &authen);
- ret = mgt_set_request(priv, DOT11_OID_PRIVACYINVOKED, 0,
- &invoke);
- ret = mgt_set_request(priv, DOT11_OID_EXUNENCRYPTED, 0,
- &exunencrypt);
- }
-
-out:
- return ret;
-}
-
-
-static int prism54_get_encodeext(struct net_device *ndev,
- struct iw_request_info *info,
- union iwreq_data *wrqu,
- char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- struct iw_point *encoding = &wrqu->encoding;
- struct iw_encode_ext *ext = (struct iw_encode_ext *)extra;
- int idx, max_key_len;
- union oid_res_t r;
- int authen = DOT11_AUTH_OS, invoke = 0, exunencrypt = 0, wpa = 0;
- int ret = 0;
-
- if (islpci_get_state(priv) < PRV_STATE_INIT)
- return 0;
-
- /* first get the flags */
- ret = mgt_get_request(priv, DOT11_OID_AUTHENABLE, 0, NULL, &r);
- authen = r.u;
- ret = mgt_get_request(priv, DOT11_OID_PRIVACYINVOKED, 0, NULL, &r);
- invoke = r.u;
- ret = mgt_get_request(priv, DOT11_OID_EXUNENCRYPTED, 0, NULL, &r);
- exunencrypt = r.u;
- if (ret < 0)
- goto out;
-
- max_key_len = encoding->length - sizeof(*ext);
- if (max_key_len < 0)
- return -EINVAL;
-
- idx = (encoding->flags & IW_ENCODE_INDEX) - 1;
- if (idx) {
- if (idx < 0 || idx > 3)
- return -EINVAL;
- } else {
- ret = mgt_get_request(priv, DOT11_OID_DEFKEYID, 0, NULL, &r);
- if (ret < 0)
- goto out;
- idx = r.u;
- }
-
- encoding->flags = idx + 1;
- memset(ext, 0, sizeof(*ext));
-
- switch (authen) {
- case DOT11_AUTH_BOTH:
- case DOT11_AUTH_SK:
- wrqu->encoding.flags |= IW_ENCODE_RESTRICTED;
- fallthrough;
- case DOT11_AUTH_OS:
- default:
- wrqu->encoding.flags |= IW_ENCODE_OPEN;
- break;
- }
-
- down_write(&priv->mib_sem);
- wpa = priv->wpa;
- up_write(&priv->mib_sem);
-
- if (authen == DOT11_AUTH_OS && !exunencrypt && !invoke && !wpa) {
- /* No encryption */
- ext->alg = IW_ENCODE_ALG_NONE;
- ext->key_len = 0;
- wrqu->encoding.flags |= IW_ENCODE_DISABLED;
- } else {
- struct obj_key *key;
-
- ret = mgt_get_request(priv, DOT11_OID_DEFKEYX, idx, NULL, &r);
- if (ret < 0)
- goto out;
- key = r.ptr;
- if (max_key_len < key->length) {
- ret = -E2BIG;
- goto out;
- }
- memcpy(ext->key, key->key, key->length);
- ext->key_len = key->length;
-
- switch (key->type) {
- case DOT11_PRIV_TKIP:
- ext->alg = IW_ENCODE_ALG_TKIP;
- break;
- default:
- case DOT11_PRIV_WEP:
- ext->alg = IW_ENCODE_ALG_WEP;
- break;
- }
- wrqu->encoding.flags |= IW_ENCODE_ENABLED;
- }
-
-out:
- return ret;
-}
-
-
-static int
-prism54_reset(struct net_device *ndev, struct iw_request_info *info,
- __u32 * uwrq, char *extra)
-{
- islpci_reset(netdev_priv(ndev), 0);
-
- return 0;
-}
-
-static int
-prism54_get_oid(struct net_device *ndev, struct iw_request_info *info,
- struct iw_point *dwrq, char *extra)
-{
- union oid_res_t r;
- int rvalue;
- enum oid_num_t n = dwrq->flags;
-
- rvalue = mgt_get_request(netdev_priv(ndev), n, 0, NULL, &r);
- dwrq->length = mgt_response_to_str(n, &r, extra);
- if ((isl_oid[n].flags & OID_FLAG_TYPE) != OID_TYPE_U32)
- kfree(r.ptr);
- return rvalue;
-}
-
-static int
-prism54_set_u32(struct net_device *ndev, struct iw_request_info *info,
- __u32 * uwrq, char *extra)
-{
- u32 oid = uwrq[0], u = uwrq[1];
-
- return mgt_set_request(netdev_priv(ndev), oid, 0, &u);
-}
-
-static int
-prism54_set_raw(struct net_device *ndev, struct iw_request_info *info,
- struct iw_point *dwrq, char *extra)
-{
- u32 oid = dwrq->flags;
-
- return mgt_set_request(netdev_priv(ndev), oid, 0, extra);
-}
-
-void
-prism54_acl_init(struct islpci_acl *acl)
-{
- mutex_init(&acl->lock);
- INIT_LIST_HEAD(&acl->mac_list);
- acl->size = 0;
- acl->policy = MAC_POLICY_OPEN;
-}
-
-static void
-prism54_clear_mac(struct islpci_acl *acl)
-{
- struct list_head *ptr, *next;
- struct mac_entry *entry;
-
- mutex_lock(&acl->lock);
-
- if (acl->size == 0) {
- mutex_unlock(&acl->lock);
- return;
- }
-
- for (ptr = acl->mac_list.next, next = ptr->next;
- ptr != &acl->mac_list; ptr = next, next = ptr->next) {
- entry = list_entry(ptr, struct mac_entry, _list);
- list_del(ptr);
- kfree(entry);
- }
- acl->size = 0;
- mutex_unlock(&acl->lock);
-}
-
-void
-prism54_acl_clean(struct islpci_acl *acl)
-{
- prism54_clear_mac(acl);
-}
-
-static int
-prism54_add_mac(struct net_device *ndev, struct iw_request_info *info,
- struct sockaddr *awrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- struct islpci_acl *acl = &priv->acl;
- struct mac_entry *entry;
- struct sockaddr *addr = (struct sockaddr *) extra;
-
- if (addr->sa_family != ARPHRD_ETHER)
- return -EOPNOTSUPP;
-
- entry = kmalloc(sizeof (struct mac_entry), GFP_KERNEL);
- if (entry == NULL)
- return -ENOMEM;
-
- memcpy(entry->addr, addr->sa_data, ETH_ALEN);
-
- if (mutex_lock_interruptible(&acl->lock)) {
- kfree(entry);
- return -ERESTARTSYS;
- }
- list_add_tail(&entry->_list, &acl->mac_list);
- acl->size++;
- mutex_unlock(&acl->lock);
-
- return 0;
-}
-
-static int
-prism54_del_mac(struct net_device *ndev, struct iw_request_info *info,
- struct sockaddr *awrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- struct islpci_acl *acl = &priv->acl;
- struct mac_entry *entry;
- struct sockaddr *addr = (struct sockaddr *) extra;
-
- if (addr->sa_family != ARPHRD_ETHER)
- return -EOPNOTSUPP;
-
- if (mutex_lock_interruptible(&acl->lock))
- return -ERESTARTSYS;
- list_for_each_entry(entry, &acl->mac_list, _list) {
- if (ether_addr_equal(entry->addr, addr->sa_data)) {
- list_del(&entry->_list);
- acl->size--;
- kfree(entry);
- mutex_unlock(&acl->lock);
- return 0;
- }
- }
- mutex_unlock(&acl->lock);
- return -EINVAL;
-}
-
-static int
-prism54_get_mac(struct net_device *ndev, struct iw_request_info *info,
- struct iw_point *dwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- struct islpci_acl *acl = &priv->acl;
- struct mac_entry *entry;
- struct sockaddr *dst = (struct sockaddr *) extra;
-
- dwrq->length = 0;
-
- if (mutex_lock_interruptible(&acl->lock))
- return -ERESTARTSYS;
-
- list_for_each_entry(entry, &acl->mac_list, _list) {
- memcpy(dst->sa_data, entry->addr, ETH_ALEN);
- dst->sa_family = ARPHRD_ETHER;
- dwrq->length++;
- dst++;
- }
- mutex_unlock(&acl->lock);
- return 0;
-}
-
-/* Setting policy also clears the MAC acl, even if we don't change the default
- * policy
- */
-
-static int
-prism54_set_policy(struct net_device *ndev, struct iw_request_info *info,
- __u32 * uwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- struct islpci_acl *acl = &priv->acl;
- u32 mlmeautolevel;
-
- prism54_clear_mac(acl);
-
- if ((*uwrq < MAC_POLICY_OPEN) || (*uwrq > MAC_POLICY_REJECT))
- return -EINVAL;
-
- down_write(&priv->mib_sem);
-
- acl->policy = *uwrq;
-
- /* the ACL code needs an intermediate mlmeautolevel */
- if ((priv->iw_mode == IW_MODE_MASTER) &&
- (acl->policy != MAC_POLICY_OPEN))
- mlmeautolevel = DOT11_MLME_INTERMEDIATE;
- else
- mlmeautolevel = CARD_DEFAULT_MLME_MODE;
- if (priv->wpa)
- mlmeautolevel = DOT11_MLME_EXTENDED;
- mgt_set(priv, DOT11_OID_MLMEAUTOLEVEL, &mlmeautolevel);
- /* restart the card with our new policy */
- if (mgt_commit(priv)) {
- up_write(&priv->mib_sem);
- return -EIO;
- }
- up_write(&priv->mib_sem);
-
- return 0;
-}
-
-static int
-prism54_get_policy(struct net_device *ndev, struct iw_request_info *info,
- __u32 * uwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- struct islpci_acl *acl = &priv->acl;
-
- *uwrq = acl->policy;
-
- return 0;
-}
-
-/* Return 1 only if client should be accepted. */
-
-static int
-prism54_mac_accept(struct islpci_acl *acl, char *mac)
-{
- struct mac_entry *entry;
- int res = 0;
-
- if (mutex_lock_interruptible(&acl->lock))
- return -ERESTARTSYS;
-
- if (acl->policy == MAC_POLICY_OPEN) {
- mutex_unlock(&acl->lock);
- return 1;
- }
-
- list_for_each_entry(entry, &acl->mac_list, _list) {
- if (memcmp(entry->addr, mac, ETH_ALEN) == 0) {
- res = 1;
- break;
- }
- }
- res = (acl->policy == MAC_POLICY_ACCEPT) ? !res : res;
- mutex_unlock(&acl->lock);
-
- return res;
-}
-
-static int
-prism54_kick_all(struct net_device *ndev, struct iw_request_info *info,
- struct iw_point *dwrq, char *extra)
-{
- struct obj_mlme *mlme;
- int rvalue;
-
- mlme = kmalloc(sizeof (struct obj_mlme), GFP_KERNEL);
- if (mlme == NULL)
- return -ENOMEM;
-
- /* Tell the card to kick every client */
- mlme->id = 0;
- rvalue =
- mgt_set_request(netdev_priv(ndev), DOT11_OID_DISASSOCIATE, 0, mlme);
- kfree(mlme);
-
- return rvalue;
-}
-
-static int
-prism54_kick_mac(struct net_device *ndev, struct iw_request_info *info,
- struct sockaddr *awrq, char *extra)
-{
- struct obj_mlme *mlme;
- struct sockaddr *addr = (struct sockaddr *) extra;
- int rvalue;
-
- if (addr->sa_family != ARPHRD_ETHER)
- return -EOPNOTSUPP;
-
- mlme = kmalloc(sizeof (struct obj_mlme), GFP_KERNEL);
- if (mlme == NULL)
- return -ENOMEM;
-
- /* Tell the card to only kick the corresponding bastard */
- memcpy(mlme->address, addr->sa_data, ETH_ALEN);
- mlme->id = -1;
- rvalue =
- mgt_set_request(netdev_priv(ndev), DOT11_OID_DISASSOCIATE, 0, mlme);
-
- kfree(mlme);
-
- return rvalue;
-}
-
-/* Translate a TRAP oid into a wireless event. Called in islpci_mgt_receive. */
-
-static void
-format_event(islpci_private *priv, char *dest, const char *str,
- const struct obj_mlme *mlme, u16 *length, int error)
-{
- int n = snprintf(dest, IW_CUSTOM_MAX,
- "%s %s %pM %s (%2.2X)",
- str,
- ((priv->iw_mode == IW_MODE_MASTER) ? "from" : "to"),
- mlme->address,
- (error ? (mlme->code ? " : REJECTED " : " : ACCEPTED ")
- : ""), mlme->code);
- WARN_ON(n >= IW_CUSTOM_MAX);
- *length = n;
-}
-
-static void
-send_formatted_event(islpci_private *priv, const char *str,
- const struct obj_mlme *mlme, int error)
-{
- union iwreq_data wrqu;
- char *memptr;
-
- memptr = kmalloc(IW_CUSTOM_MAX, GFP_KERNEL);
- if (!memptr)
- return;
- wrqu.data.pointer = memptr;
- wrqu.data.length = 0;
- format_event(priv, memptr, str, mlme, &wrqu.data.length,
- error);
- wireless_send_event(priv->ndev, IWEVCUSTOM, &wrqu, memptr);
- kfree(memptr);
-}
-
-static void
-send_simple_event(islpci_private *priv, const char *str)
-{
- union iwreq_data wrqu;
- char *memptr;
- int n = strlen(str);
-
- memptr = kmalloc(IW_CUSTOM_MAX, GFP_KERNEL);
- if (!memptr)
- return;
- BUG_ON(n >= IW_CUSTOM_MAX);
- wrqu.data.pointer = memptr;
- wrqu.data.length = n;
- strcpy(memptr, str);
- wireless_send_event(priv->ndev, IWEVCUSTOM, &wrqu, memptr);
- kfree(memptr);
-}
-
-static void
-link_changed(struct net_device *ndev, u32 bitrate)
-{
- islpci_private *priv = netdev_priv(ndev);
-
- if (bitrate) {
- netif_carrier_on(ndev);
- if (priv->iw_mode == IW_MODE_INFRA) {
- union iwreq_data uwrq;
- prism54_get_wap(ndev, NULL, (struct sockaddr *) &uwrq,
- NULL);
- wireless_send_event(ndev, SIOCGIWAP, &uwrq, NULL);
- } else
- send_simple_event(netdev_priv(ndev),
- "Link established");
- } else {
- netif_carrier_off(ndev);
- send_simple_event(netdev_priv(ndev), "Link lost");
- }
-}
-
-/* Beacon/ProbeResp payload header */
-struct ieee80211_beacon_phdr {
- u8 timestamp[8];
- u16 beacon_int;
- u16 capab_info;
-} __packed;
-
-#define WLAN_EID_GENERIC 0xdd
-static u8 wpa_oid[4] = { 0x00, 0x50, 0xf2, 1 };
-
-static void
-prism54_wpa_bss_ie_add(islpci_private *priv, u8 *bssid,
- u8 *wpa_ie, size_t wpa_ie_len)
-{
- struct list_head *ptr;
- struct islpci_bss_wpa_ie *bss = NULL;
-
- if (wpa_ie_len > MAX_WPA_IE_LEN)
- wpa_ie_len = MAX_WPA_IE_LEN;
-
- mutex_lock(&priv->wpa_lock);
-
- /* try to use existing entry */
- list_for_each(ptr, &priv->bss_wpa_list) {
- bss = list_entry(ptr, struct islpci_bss_wpa_ie, list);
- if (memcmp(bss->bssid, bssid, ETH_ALEN) == 0) {
- list_move(&bss->list, &priv->bss_wpa_list);
- break;
- }
- bss = NULL;
- }
-
- if (bss == NULL) {
- /* add a new BSS entry; if max number of entries is already
- * reached, replace the least recently updated */
- if (priv->num_bss_wpa >= MAX_BSS_WPA_IE_COUNT) {
- bss = list_entry(priv->bss_wpa_list.prev,
- struct islpci_bss_wpa_ie, list);
- list_del(&bss->list);
- } else {
- bss = kzalloc(sizeof (*bss), GFP_ATOMIC);
- if (bss != NULL)
- priv->num_bss_wpa++;
- }
- if (bss != NULL) {
- memcpy(bss->bssid, bssid, ETH_ALEN);
- list_add(&bss->list, &priv->bss_wpa_list);
- }
- }
-
- if (bss != NULL) {
- memcpy(bss->wpa_ie, wpa_ie, wpa_ie_len);
- bss->wpa_ie_len = wpa_ie_len;
- bss->last_update = jiffies;
- } else {
- printk(KERN_DEBUG "Failed to add BSS WPA entry for "
- "%pM\n", bssid);
- }
-
- /* expire old entries from WPA list */
- while (priv->num_bss_wpa > 0) {
- bss = list_entry(priv->bss_wpa_list.prev,
- struct islpci_bss_wpa_ie, list);
- if (!time_after(jiffies, bss->last_update + 60 * HZ))
- break;
-
- list_del(&bss->list);
- priv->num_bss_wpa--;
- kfree(bss);
- }
-
- mutex_unlock(&priv->wpa_lock);
-}
-
-static size_t
-prism54_wpa_bss_ie_get(islpci_private *priv, u8 *bssid, u8 *wpa_ie)
-{
- struct list_head *ptr;
- struct islpci_bss_wpa_ie *bss = NULL;
- size_t len = 0;
-
- mutex_lock(&priv->wpa_lock);
-
- list_for_each(ptr, &priv->bss_wpa_list) {
- bss = list_entry(ptr, struct islpci_bss_wpa_ie, list);
- if (memcmp(bss->bssid, bssid, ETH_ALEN) == 0)
- break;
- bss = NULL;
- }
- if (bss) {
- len = bss->wpa_ie_len;
- memcpy(wpa_ie, bss->wpa_ie, len);
- }
- mutex_unlock(&priv->wpa_lock);
-
- return len;
-}
-
-void
-prism54_wpa_bss_ie_init(islpci_private *priv)
-{
- INIT_LIST_HEAD(&priv->bss_wpa_list);
- mutex_init(&priv->wpa_lock);
-}
-
-void
-prism54_wpa_bss_ie_clean(islpci_private *priv)
-{
- struct islpci_bss_wpa_ie *bss, *n;
-
- list_for_each_entry_safe(bss, n, &priv->bss_wpa_list, list) {
- kfree(bss);
- }
-}
-
-static void
-prism54_process_bss_data(islpci_private *priv, u32 oid, u8 *addr,
- u8 *payload, size_t len)
-{
- struct ieee80211_beacon_phdr *hdr;
- u8 *pos, *end;
-
- if (!priv->wpa)
- return;
-
- hdr = (struct ieee80211_beacon_phdr *) payload;
- pos = (u8 *) (hdr + 1);
- end = payload + len;
- while (pos < end) {
- if (pos + 2 + pos[1] > end) {
- printk(KERN_DEBUG "Parsing Beacon/ProbeResp failed "
- "for %pM\n", addr);
- return;
- }
- if (pos[0] == WLAN_EID_GENERIC && pos[1] >= 4 &&
- memcmp(pos + 2, wpa_oid, 4) == 0) {
- prism54_wpa_bss_ie_add(priv, addr, pos, pos[1] + 2);
- return;
- }
- pos += 2 + pos[1];
- }
-}
-
-static void
-handle_request(islpci_private *priv, struct obj_mlme *mlme, enum oid_num_t oid)
-{
- if (((mlme->state == DOT11_STATE_AUTHING) ||
- (mlme->state == DOT11_STATE_ASSOCING))
- && mgt_mlme_answer(priv)) {
- /* Someone is requesting auth and we must respond. Just send back
- * the trap with error code set accordingly.
- */
- mlme->code = prism54_mac_accept(&priv->acl,
- mlme->address) ? 0 : 1;
- mgt_set_request(priv, oid, 0, mlme);
- }
-}
-
-static int
-prism54_process_trap_helper(islpci_private *priv, enum oid_num_t oid,
- char *data)
-{
- struct obj_mlme *mlme = (struct obj_mlme *) data;
- struct obj_mlmeex *mlmeex = (struct obj_mlmeex *) data;
- struct obj_mlmeex *confirm;
- u8 wpa_ie[MAX_WPA_IE_LEN];
- int wpa_ie_len;
- size_t len = 0; /* u16, better? */
- u8 *payload = NULL, *pos = NULL;
- int ret;
-
- /* I think all trapable objects are listed here.
- * Some oids have a EX version. The difference is that they are emitted
- * in DOT11_MLME_EXTENDED mode (set with DOT11_OID_MLMEAUTOLEVEL)
- * with more info.
- * The few events already defined by the wireless tools are not really
- * suited. We use the more flexible custom event facility.
- */
-
- if (oid >= DOT11_OID_BEACON) {
- len = mlmeex->size;
- payload = pos = mlmeex->data;
- }
-
- /* I fear prism54_process_bss_data won't work with big endian data */
- if ((oid == DOT11_OID_BEACON) || (oid == DOT11_OID_PROBE))
- prism54_process_bss_data(priv, oid, mlmeex->address,
- payload, len);
-
- mgt_le_to_cpu(isl_oid[oid].flags & OID_FLAG_TYPE, (void *) mlme);
-
- switch (oid) {
-
- case GEN_OID_LINKSTATE:
- link_changed(priv->ndev, (u32) *data);
- break;
-
- case DOT11_OID_MICFAILURE:
- send_simple_event(priv, "Mic failure");
- break;
-
- case DOT11_OID_DEAUTHENTICATE:
- send_formatted_event(priv, "DeAuthenticate request", mlme, 0);
- break;
-
- case DOT11_OID_AUTHENTICATE:
- handle_request(priv, mlme, oid);
- send_formatted_event(priv, "Authenticate request", mlme, 1);
- break;
-
- case DOT11_OID_DISASSOCIATE:
- send_formatted_event(priv, "Disassociate request", mlme, 0);
- break;
-
- case DOT11_OID_ASSOCIATE:
- handle_request(priv, mlme, oid);
- send_formatted_event(priv, "Associate request", mlme, 1);
- break;
-
- case DOT11_OID_REASSOCIATE:
- handle_request(priv, mlme, oid);
- send_formatted_event(priv, "ReAssociate request", mlme, 1);
- break;
-
- case DOT11_OID_BEACON:
- send_formatted_event(priv,
- "Received a beacon from an unknown AP",
- mlme, 0);
- break;
-
- case DOT11_OID_PROBE:
- /* we received a probe from a client. */
- send_formatted_event(priv, "Received a probe from client", mlme,
- 0);
- break;
-
- /* Note : "mlme" is actually a "struct obj_mlmeex *" here, but this
- * is backward compatible layout-wise with "struct obj_mlme".
- */
-
- case DOT11_OID_DEAUTHENTICATEEX:
- send_formatted_event(priv, "DeAuthenticate request", mlme, 0);
- break;
-
- case DOT11_OID_AUTHENTICATEEX:
- handle_request(priv, mlme, oid);
- send_formatted_event(priv, "Authenticate request (ex)", mlme, 1);
-
- if (priv->iw_mode != IW_MODE_MASTER
- && mlmeex->state != DOT11_STATE_AUTHING)
- break;
-
- confirm = kmalloc(sizeof(struct obj_mlmeex) + 6, GFP_ATOMIC);
-
- if (!confirm)
- break;
-
- memcpy(&confirm->address, mlmeex->address, ETH_ALEN);
- printk(KERN_DEBUG "Authenticate from: address:\t%pM\n",
- mlmeex->address);
- confirm->id = -1; /* or mlmeex->id ? */
- confirm->state = 0; /* not used */
- confirm->code = 0;
- confirm->size = 6;
- confirm->data[0] = 0x00;
- confirm->data[1] = 0x00;
- confirm->data[2] = 0x02;
- confirm->data[3] = 0x00;
- confirm->data[4] = 0x00;
- confirm->data[5] = 0x00;
-
- ret = mgt_set_varlen(priv, DOT11_OID_ASSOCIATEEX, confirm, 6);
-
- kfree(confirm);
- if (ret)
- return ret;
- break;
-
- case DOT11_OID_DISASSOCIATEEX:
- send_formatted_event(priv, "Disassociate request (ex)", mlme, 0);
- break;
-
- case DOT11_OID_ASSOCIATEEX:
- handle_request(priv, mlme, oid);
- send_formatted_event(priv, "Associate request (ex)", mlme, 1);
-
- if (priv->iw_mode != IW_MODE_MASTER
- && mlmeex->state != DOT11_STATE_ASSOCING)
- break;
-
- confirm = kmalloc(sizeof(struct obj_mlmeex), GFP_ATOMIC);
-
- if (!confirm)
- break;
-
- memcpy(&confirm->address, mlmeex->address, ETH_ALEN);
-
- confirm->id = ((struct obj_mlmeex *)mlme)->id;
- confirm->state = 0; /* not used */
- confirm->code = 0;
-
- wpa_ie_len = prism54_wpa_bss_ie_get(priv, mlmeex->address, wpa_ie);
-
- if (!wpa_ie_len) {
- printk(KERN_DEBUG "No WPA IE found from address:\t%pM\n",
- mlmeex->address);
- kfree(confirm);
- break;
- }
-
- confirm->size = wpa_ie_len;
- memcpy(&confirm->data, wpa_ie, wpa_ie_len);
-
- mgt_set_varlen(priv, oid, confirm, wpa_ie_len);
-
- kfree(confirm);
-
- break;
-
- case DOT11_OID_REASSOCIATEEX:
- handle_request(priv, mlme, oid);
- send_formatted_event(priv, "Reassociate request (ex)", mlme, 1);
-
- if (priv->iw_mode != IW_MODE_MASTER
- && mlmeex->state != DOT11_STATE_ASSOCING)
- break;
-
- confirm = kmalloc(sizeof(struct obj_mlmeex), GFP_ATOMIC);
-
- if (!confirm)
- break;
-
- memcpy(&confirm->address, mlmeex->address, ETH_ALEN);
-
- confirm->id = mlmeex->id;
- confirm->state = 0; /* not used */
- confirm->code = 0;
-
- wpa_ie_len = prism54_wpa_bss_ie_get(priv, mlmeex->address, wpa_ie);
-
- if (!wpa_ie_len) {
- printk(KERN_DEBUG "No WPA IE found from address:\t%pM\n",
- mlmeex->address);
- kfree(confirm);
- break;
- }
-
- confirm->size = wpa_ie_len;
- memcpy(&confirm->data, wpa_ie, wpa_ie_len);
-
- mgt_set_varlen(priv, oid, confirm, wpa_ie_len);
-
- kfree(confirm);
-
- break;
-
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-/*
- * Process a device trap. This is called via schedule_work(), outside of
- * interrupt context, no locks held.
- */
-void
-prism54_process_trap(struct work_struct *work)
-{
- struct islpci_mgmtframe *frame =
- container_of(work, struct islpci_mgmtframe, ws);
- struct net_device *ndev = frame->ndev;
- enum oid_num_t n = mgt_oidtonum(frame->header->oid);
-
- if (n != OID_NUM_LAST)
- prism54_process_trap_helper(netdev_priv(ndev), n, frame->data);
- islpci_mgt_release(frame);
-}
-
-int
-prism54_set_mac_address(struct net_device *ndev, void *addr)
-{
- islpci_private *priv = netdev_priv(ndev);
- int ret;
-
- if (ndev->addr_len != 6)
- return -EINVAL;
- ret = mgt_set_request(priv, GEN_OID_MACADDRESS, 0,
- &((struct sockaddr *) addr)->sa_data);
- if (!ret)
- memcpy(priv->ndev->dev_addr,
- &((struct sockaddr *) addr)->sa_data, ETH_ALEN);
-
- return ret;
-}
-
-#define PRISM54_SET_WPA SIOCIWFIRSTPRIV+12
-
-static int
-prism54_set_wpa(struct net_device *ndev, struct iw_request_info *info,
- __u32 * uwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- u32 mlme, authen, dot1x, filter, wep;
-
- if (islpci_get_state(priv) < PRV_STATE_INIT)
- return 0;
-
- wep = 1; /* For privacy invoked */
- filter = 1; /* Filter out all unencrypted frames */
- dot1x = 0x01; /* To enable eap filter */
- mlme = DOT11_MLME_EXTENDED;
- authen = DOT11_AUTH_OS; /* Only WEP uses _SK and _BOTH */
-
- down_write(&priv->mib_sem);
- priv->wpa = *uwrq;
-
- switch (priv->wpa) {
- default:
- case 0: /* Clears/disables WPA and friends */
- wep = 0;
- filter = 0; /* Do not filter un-encrypted data */
- dot1x = 0;
- mlme = DOT11_MLME_AUTO;
- printk("%s: Disabling WPA\n", ndev->name);
- break;
- case 2:
- case 1: /* WPA */
- printk("%s: Enabling WPA\n", ndev->name);
- break;
- }
- up_write(&priv->mib_sem);
-
- mgt_set_request(priv, DOT11_OID_AUTHENABLE, 0, &authen);
- mgt_set_request(priv, DOT11_OID_PRIVACYINVOKED, 0, &wep);
- mgt_set_request(priv, DOT11_OID_EXUNENCRYPTED, 0, &filter);
- mgt_set_request(priv, DOT11_OID_DOT1XENABLE, 0, &dot1x);
- mgt_set_request(priv, DOT11_OID_MLMEAUTOLEVEL, 0, &mlme);
-
- return 0;
-}
-
-static int
-prism54_get_wpa(struct net_device *ndev, struct iw_request_info *info,
- __u32 * uwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- *uwrq = priv->wpa;
- return 0;
-}
-
-static int
-prism54_set_prismhdr(struct net_device *ndev, struct iw_request_info *info,
- __u32 * uwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- priv->monitor_type =
- (*uwrq ? ARPHRD_IEEE80211_PRISM : ARPHRD_IEEE80211);
- if (priv->iw_mode == IW_MODE_MONITOR)
- priv->ndev->type = priv->monitor_type;
-
- return 0;
-}
-
-static int
-prism54_get_prismhdr(struct net_device *ndev, struct iw_request_info *info,
- __u32 * uwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- *uwrq = (priv->monitor_type == ARPHRD_IEEE80211_PRISM);
- return 0;
-}
-
-static int
-prism54_debug_oid(struct net_device *ndev, struct iw_request_info *info,
- __u32 * uwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
-
- priv->priv_oid = *uwrq;
- printk("%s: oid 0x%08X\n", ndev->name, *uwrq);
-
- return 0;
-}
-
-static int
-prism54_debug_get_oid(struct net_device *ndev, struct iw_request_info *info,
- struct iw_point *data, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- struct islpci_mgmtframe *response;
- int ret = -EIO;
-
- printk("%s: get_oid 0x%08X\n", ndev->name, priv->priv_oid);
- data->length = 0;
-
- if (islpci_get_state(priv) >= PRV_STATE_INIT) {
- ret =
- islpci_mgt_transaction(priv->ndev, PIMFOR_OP_GET,
- priv->priv_oid, extra, 256,
- &response);
- printk("%s: ret: %i\n", ndev->name, ret);
- if (ret || !response
- || response->header->operation == PIMFOR_OP_ERROR) {
- if (response) {
- islpci_mgt_release(response);
- }
- printk("%s: EIO\n", ndev->name);
- ret = -EIO;
- }
- if (!ret) {
- data->length = response->header->length;
- memcpy(extra, response->data, data->length);
- islpci_mgt_release(response);
- printk("%s: len: %i\n", ndev->name, data->length);
- }
- }
-
- return ret;
-}
-
-static int
-prism54_debug_set_oid(struct net_device *ndev, struct iw_request_info *info,
- struct iw_point *data, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- struct islpci_mgmtframe *response;
- int ret = 0, response_op = PIMFOR_OP_ERROR;
-
- printk("%s: set_oid 0x%08X\tlen: %d\n", ndev->name, priv->priv_oid,
- data->length);
-
- if (islpci_get_state(priv) >= PRV_STATE_INIT) {
- ret =
- islpci_mgt_transaction(priv->ndev, PIMFOR_OP_SET,
- priv->priv_oid, extra, data->length,
- &response);
- printk("%s: ret: %i\n", ndev->name, ret);
- if (ret || !response
- || response->header->operation == PIMFOR_OP_ERROR) {
- if (response) {
- islpci_mgt_release(response);
- }
- printk("%s: EIO\n", ndev->name);
- ret = -EIO;
- }
- if (!ret) {
- response_op = response->header->operation;
- printk("%s: response_op: %i\n", ndev->name,
- response_op);
- islpci_mgt_release(response);
- }
- }
-
- return (ret ? ret : -EINPROGRESS);
-}
-
-static int
-prism54_set_spy(struct net_device *ndev,
- struct iw_request_info *info,
- union iwreq_data *uwrq, char *extra)
-{
- islpci_private *priv = netdev_priv(ndev);
- u32 u;
- enum oid_num_t oid = OID_INL_CONFIG;
-
- down_write(&priv->mib_sem);
- mgt_get(priv, OID_INL_CONFIG, &u);
-
- if ((uwrq->data.length == 0) && (priv->spy_data.spy_number > 0))
- /* disable spy */
- u &= ~INL_CONFIG_RXANNEX;
- else if ((uwrq->data.length > 0) && (priv->spy_data.spy_number == 0))
- /* enable spy */
- u |= INL_CONFIG_RXANNEX;
-
- mgt_set(priv, OID_INL_CONFIG, &u);
- mgt_commit_list(priv, &oid, 1);
- up_write(&priv->mib_sem);
-
- return iw_handler_set_spy(ndev, info, uwrq, extra);
-}
-
-static const iw_handler prism54_handler[] = {
- (iw_handler) prism54_commit, /* SIOCSIWCOMMIT */
- (iw_handler) prism54_get_name, /* SIOCGIWNAME */
- (iw_handler) NULL, /* SIOCSIWNWID */
- (iw_handler) NULL, /* SIOCGIWNWID */
- (iw_handler) prism54_set_freq, /* SIOCSIWFREQ */
- (iw_handler) prism54_get_freq, /* SIOCGIWFREQ */
- (iw_handler) prism54_set_mode, /* SIOCSIWMODE */
- (iw_handler) prism54_get_mode, /* SIOCGIWMODE */
- (iw_handler) prism54_set_sens, /* SIOCSIWSENS */
- (iw_handler) prism54_get_sens, /* SIOCGIWSENS */
- (iw_handler) NULL, /* SIOCSIWRANGE */
- (iw_handler) prism54_get_range, /* SIOCGIWRANGE */
- (iw_handler) NULL, /* SIOCSIWPRIV */
- (iw_handler) NULL, /* SIOCGIWPRIV */
- (iw_handler) NULL, /* SIOCSIWSTATS */
- (iw_handler) NULL, /* SIOCGIWSTATS */
- prism54_set_spy, /* SIOCSIWSPY */
- iw_handler_get_spy, /* SIOCGIWSPY */
- iw_handler_set_thrspy, /* SIOCSIWTHRSPY */
- iw_handler_get_thrspy, /* SIOCGIWTHRSPY */
- (iw_handler) prism54_set_wap, /* SIOCSIWAP */
- (iw_handler) prism54_get_wap, /* SIOCGIWAP */
- (iw_handler) NULL, /* -- hole -- */
- (iw_handler) NULL, /* SIOCGIWAPLIST deprecated */
- (iw_handler) prism54_set_scan, /* SIOCSIWSCAN */
- (iw_handler) prism54_get_scan, /* SIOCGIWSCAN */
- (iw_handler) prism54_set_essid, /* SIOCSIWESSID */
- (iw_handler) prism54_get_essid, /* SIOCGIWESSID */
- (iw_handler) prism54_set_nick, /* SIOCSIWNICKN */
- (iw_handler) prism54_get_nick, /* SIOCGIWNICKN */
- (iw_handler) NULL, /* -- hole -- */
- (iw_handler) NULL, /* -- hole -- */
- (iw_handler) prism54_set_rate, /* SIOCSIWRATE */
- (iw_handler) prism54_get_rate, /* SIOCGIWRATE */
- (iw_handler) prism54_set_rts, /* SIOCSIWRTS */
- (iw_handler) prism54_get_rts, /* SIOCGIWRTS */
- (iw_handler) prism54_set_frag, /* SIOCSIWFRAG */
- (iw_handler) prism54_get_frag, /* SIOCGIWFRAG */
- (iw_handler) prism54_set_txpower, /* SIOCSIWTXPOW */
- (iw_handler) prism54_get_txpower, /* SIOCGIWTXPOW */
- (iw_handler) prism54_set_retry, /* SIOCSIWRETRY */
- (iw_handler) prism54_get_retry, /* SIOCGIWRETRY */
- (iw_handler) prism54_set_encode, /* SIOCSIWENCODE */
- (iw_handler) prism54_get_encode, /* SIOCGIWENCODE */
- (iw_handler) NULL, /* SIOCSIWPOWER */
- (iw_handler) NULL, /* SIOCGIWPOWER */
- NULL, /* -- hole -- */
- NULL, /* -- hole -- */
- (iw_handler) prism54_set_genie, /* SIOCSIWGENIE */
- (iw_handler) prism54_get_genie, /* SIOCGIWGENIE */
- (iw_handler) prism54_set_auth, /* SIOCSIWAUTH */
- (iw_handler) prism54_get_auth, /* SIOCGIWAUTH */
- (iw_handler) prism54_set_encodeext, /* SIOCSIWENCODEEXT */
- (iw_handler) prism54_get_encodeext, /* SIOCGIWENCODEEXT */
- NULL, /* SIOCSIWPMKSA */
-};
-
-/* The low order bit identify a SET (0) or a GET (1) ioctl. */
-
-#define PRISM54_RESET SIOCIWFIRSTPRIV
-#define PRISM54_GET_POLICY SIOCIWFIRSTPRIV+1
-#define PRISM54_SET_POLICY SIOCIWFIRSTPRIV+2
-#define PRISM54_GET_MAC SIOCIWFIRSTPRIV+3
-#define PRISM54_ADD_MAC SIOCIWFIRSTPRIV+4
-
-#define PRISM54_DEL_MAC SIOCIWFIRSTPRIV+6
-
-#define PRISM54_KICK_MAC SIOCIWFIRSTPRIV+8
-
-#define PRISM54_KICK_ALL SIOCIWFIRSTPRIV+10
-
-#define PRISM54_GET_WPA SIOCIWFIRSTPRIV+11
-#define PRISM54_SET_WPA SIOCIWFIRSTPRIV+12
-
-#define PRISM54_DBG_OID SIOCIWFIRSTPRIV+14
-#define PRISM54_DBG_GET_OID SIOCIWFIRSTPRIV+15
-#define PRISM54_DBG_SET_OID SIOCIWFIRSTPRIV+16
-
-#define PRISM54_GET_OID SIOCIWFIRSTPRIV+17
-#define PRISM54_SET_OID_U32 SIOCIWFIRSTPRIV+18
-#define PRISM54_SET_OID_STR SIOCIWFIRSTPRIV+20
-#define PRISM54_SET_OID_ADDR SIOCIWFIRSTPRIV+22
-
-#define PRISM54_GET_PRISMHDR SIOCIWFIRSTPRIV+23
-#define PRISM54_SET_PRISMHDR SIOCIWFIRSTPRIV+24
-
-#define IWPRIV_SET_U32(n,x) { n, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, 0, "s_"x }
-#define IWPRIV_SET_SSID(n,x) { n, IW_PRIV_TYPE_CHAR | IW_PRIV_SIZE_FIXED | 1, 0, "s_"x }
-#define IWPRIV_SET_ADDR(n,x) { n, IW_PRIV_TYPE_ADDR | IW_PRIV_SIZE_FIXED | 1, 0, "s_"x }
-#define IWPRIV_GET(n,x) { n, 0, IW_PRIV_TYPE_CHAR | IW_PRIV_SIZE_FIXED | PRIV_STR_SIZE, "g_"x }
-
-#define IWPRIV_U32(n,x) IWPRIV_SET_U32(n,x), IWPRIV_GET(n,x)
-#define IWPRIV_SSID(n,x) IWPRIV_SET_SSID(n,x), IWPRIV_GET(n,x)
-#define IWPRIV_ADDR(n,x) IWPRIV_SET_ADDR(n,x), IWPRIV_GET(n,x)
-
-/* Note : limited to 128 private ioctls (wireless tools 26) */
-
-static const struct iw_priv_args prism54_private_args[] = {
-/*{ cmd, set_args, get_args, name } */
- {PRISM54_RESET, 0, 0, "reset"},
- {PRISM54_GET_PRISMHDR, 0, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
- "get_prismhdr"},
- {PRISM54_SET_PRISMHDR, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, 0,
- "set_prismhdr"},
- {PRISM54_GET_POLICY, 0, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
- "getPolicy"},
- {PRISM54_SET_POLICY, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, 0,
- "setPolicy"},
- {PRISM54_GET_MAC, 0, IW_PRIV_TYPE_ADDR | 64, "getMac"},
- {PRISM54_ADD_MAC, IW_PRIV_TYPE_ADDR | IW_PRIV_SIZE_FIXED | 1, 0,
- "addMac"},
- {PRISM54_DEL_MAC, IW_PRIV_TYPE_ADDR | IW_PRIV_SIZE_FIXED | 1, 0,
- "delMac"},
- {PRISM54_KICK_MAC, IW_PRIV_TYPE_ADDR | IW_PRIV_SIZE_FIXED | 1, 0,
- "kickMac"},
- {PRISM54_KICK_ALL, 0, 0, "kickAll"},
- {PRISM54_GET_WPA, 0, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
- "get_wpa"},
- {PRISM54_SET_WPA, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, 0,
- "set_wpa"},
- {PRISM54_DBG_OID, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, 0,
- "dbg_oid"},
- {PRISM54_DBG_GET_OID, 0, IW_PRIV_TYPE_BYTE | 256, "dbg_get_oid"},
- {PRISM54_DBG_SET_OID, IW_PRIV_TYPE_BYTE | 256, 0, "dbg_set_oid"},
- /* --- sub-ioctls handlers --- */
- {PRISM54_GET_OID,
- 0, IW_PRIV_TYPE_CHAR | IW_PRIV_SIZE_FIXED | PRIV_STR_SIZE, ""},
- {PRISM54_SET_OID_U32,
- IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, 0, ""},
- {PRISM54_SET_OID_STR,
- IW_PRIV_TYPE_CHAR | IW_PRIV_SIZE_FIXED | 1, 0, ""},
- {PRISM54_SET_OID_ADDR,
- IW_PRIV_TYPE_ADDR | IW_PRIV_SIZE_FIXED | 1, 0, ""},
- /* --- sub-ioctls definitions --- */
- IWPRIV_ADDR(GEN_OID_MACADDRESS, "addr"),
- IWPRIV_GET(GEN_OID_LINKSTATE, "linkstate"),
- IWPRIV_U32(DOT11_OID_BSSTYPE, "bsstype"),
- IWPRIV_ADDR(DOT11_OID_BSSID, "bssid"),
- IWPRIV_U32(DOT11_OID_STATE, "state"),
- IWPRIV_U32(DOT11_OID_AID, "aid"),
-
- IWPRIV_SSID(DOT11_OID_SSIDOVERRIDE, "ssidoverride"),
-
- IWPRIV_U32(DOT11_OID_MEDIUMLIMIT, "medlimit"),
- IWPRIV_U32(DOT11_OID_BEACONPERIOD, "beacon"),
- IWPRIV_U32(DOT11_OID_DTIMPERIOD, "dtimperiod"),
-
- IWPRIV_U32(DOT11_OID_AUTHENABLE, "authenable"),
- IWPRIV_U32(DOT11_OID_PRIVACYINVOKED, "privinvok"),
- IWPRIV_U32(DOT11_OID_EXUNENCRYPTED, "exunencrypt"),
-
- IWPRIV_U32(DOT11_OID_REKEYTHRESHOLD, "rekeythresh"),
-
- IWPRIV_U32(DOT11_OID_MAXTXLIFETIME, "maxtxlife"),
- IWPRIV_U32(DOT11_OID_MAXRXLIFETIME, "maxrxlife"),
- IWPRIV_U32(DOT11_OID_ALOFT_FIXEDRATE, "fixedrate"),
- IWPRIV_U32(DOT11_OID_MAXFRAMEBURST, "frameburst"),
- IWPRIV_U32(DOT11_OID_PSM, "psm"),
-
- IWPRIV_U32(DOT11_OID_BRIDGELOCAL, "bridge"),
- IWPRIV_U32(DOT11_OID_CLIENTS, "clients"),
- IWPRIV_U32(DOT11_OID_CLIENTSASSOCIATED, "clientassoc"),
- IWPRIV_U32(DOT11_OID_DOT1XENABLE, "dot1xenable"),
- IWPRIV_U32(DOT11_OID_ANTENNARX, "rxant"),
- IWPRIV_U32(DOT11_OID_ANTENNATX, "txant"),
- IWPRIV_U32(DOT11_OID_ANTENNADIVERSITY, "antdivers"),
- IWPRIV_U32(DOT11_OID_EDTHRESHOLD, "edthresh"),
- IWPRIV_U32(DOT11_OID_PREAMBLESETTINGS, "preamble"),
- IWPRIV_GET(DOT11_OID_RATES, "rates"),
- IWPRIV_U32(DOT11_OID_OUTPUTPOWER, ".11outpower"),
- IWPRIV_GET(DOT11_OID_SUPPORTEDRATES, "supprates"),
- IWPRIV_GET(DOT11_OID_SUPPORTEDFREQUENCIES, "suppfreq"),
-
- IWPRIV_U32(DOT11_OID_NOISEFLOOR, "noisefloor"),
- IWPRIV_GET(DOT11_OID_FREQUENCYACTIVITY, "freqactivity"),
- IWPRIV_U32(DOT11_OID_NONERPPROTECTION, "nonerpprotec"),
- IWPRIV_U32(DOT11_OID_PROFILES, "profile"),
- IWPRIV_GET(DOT11_OID_EXTENDEDRATES, "extrates"),
- IWPRIV_U32(DOT11_OID_MLMEAUTOLEVEL, "mlmelevel"),
-
- IWPRIV_GET(DOT11_OID_BSSS, "bsss"),
- IWPRIV_GET(DOT11_OID_BSSLIST, "bsslist"),
- IWPRIV_U32(OID_INL_MODE, "mode"),
- IWPRIV_U32(OID_INL_CONFIG, "config"),
- IWPRIV_U32(OID_INL_DOT11D_CONFORMANCE, ".11dconform"),
- IWPRIV_GET(OID_INL_PHYCAPABILITIES, "phycapa"),
- IWPRIV_U32(OID_INL_OUTPUTPOWER, "outpower"),
-};
-
-static const iw_handler prism54_private_handler[] = {
- (iw_handler) prism54_reset,
- (iw_handler) prism54_get_policy,
- (iw_handler) prism54_set_policy,
- (iw_handler) prism54_get_mac,
- (iw_handler) prism54_add_mac,
- (iw_handler) NULL,
- (iw_handler) prism54_del_mac,
- (iw_handler) NULL,
- (iw_handler) prism54_kick_mac,
- (iw_handler) NULL,
- (iw_handler) prism54_kick_all,
- (iw_handler) prism54_get_wpa,
- (iw_handler) prism54_set_wpa,
- (iw_handler) NULL,
- (iw_handler) prism54_debug_oid,
- (iw_handler) prism54_debug_get_oid,
- (iw_handler) prism54_debug_set_oid,
- (iw_handler) prism54_get_oid,
- (iw_handler) prism54_set_u32,
- (iw_handler) NULL,
- (iw_handler) prism54_set_raw,
- (iw_handler) NULL,
- (iw_handler) prism54_set_raw,
- (iw_handler) prism54_get_prismhdr,
- (iw_handler) prism54_set_prismhdr,
-};
-
-const struct iw_handler_def prism54_handler_def = {
- .num_standard = ARRAY_SIZE(prism54_handler),
- .num_private = ARRAY_SIZE(prism54_private_handler),
- .num_private_args = ARRAY_SIZE(prism54_private_args),
- .standard = (iw_handler *) prism54_handler,
- .private = (iw_handler *) prism54_private_handler,
- .private_args = (struct iw_priv_args *) prism54_private_args,
- .get_wireless_stats = prism54_get_wireless_stats,
-};
diff --git a/drivers/net/wireless/intersil/prism54/isl_ioctl.h b/drivers/net/wireless/intersil/prism54/isl_ioctl.h
deleted file mode 100644
index 3f85fd75ac19..000000000000
--- a/drivers/net/wireless/intersil/prism54/isl_ioctl.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2002 Intersil Americas Inc.
- * (C) 2003 Aurelien Alleaume <slts@free.fr>
- * (C) 2003 Luis R. Rodriguez <mcgrof@ruslug.rutgers.edu>
- */
-
-#ifndef _ISL_IOCTL_H
-#define _ISL_IOCTL_H
-
-#include "islpci_mgt.h"
-#include "islpci_dev.h"
-
-#include <net/iw_handler.h> /* New driver API */
-
-#define SUPPORTED_WIRELESS_EXT 19
-
-void prism54_mib_init(islpci_private *);
-
-struct iw_statistics *prism54_get_wireless_stats(struct net_device *);
-void prism54_update_stats(struct work_struct *);
-
-void prism54_acl_init(struct islpci_acl *);
-void prism54_acl_clean(struct islpci_acl *);
-
-void prism54_process_trap(struct work_struct *);
-
-void prism54_wpa_bss_ie_init(islpci_private *priv);
-void prism54_wpa_bss_ie_clean(islpci_private *priv);
-
-int prism54_set_mac_address(struct net_device *, void *);
-
-extern const struct iw_handler_def prism54_handler_def;
-
-#endif /* _ISL_IOCTL_H */
diff --git a/drivers/net/wireless/intersil/prism54/isl_oid.h b/drivers/net/wireless/intersil/prism54/isl_oid.h
deleted file mode 100644
index b889bb73a485..000000000000
--- a/drivers/net/wireless/intersil/prism54/isl_oid.h
+++ /dev/null
@@ -1,492 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2003 Herbert Valerio Riedel <hvr@gnu.org>
- * Copyright (C) 2004 Luis R. Rodriguez <mcgrof@ruslug.rutgers.edu>
- * Copyright (C) 2004 Aurelien Alleaume <slts@free.fr>
- */
-
-#if !defined(_ISL_OID_H)
-#define _ISL_OID_H
-
-/*
- * MIB related constant and structure definitions for communicating
- * with the device firmware
- */
-
-struct obj_ssid {
- u8 length;
- char octets[33];
-} __packed;
-
-struct obj_key {
- u8 type; /* dot11_priv_t */
- u8 length;
- char key[32];
-} __packed;
-
-struct obj_mlme {
- u8 address[6];
- u16 id;
- u16 state;
- u16 code;
-} __packed;
-
-struct obj_mlmeex {
- u8 address[6];
- u16 id;
- u16 state;
- u16 code;
- u16 size;
- u8 data[];
-} __packed;
-
-struct obj_buffer {
- u32 size;
- u32 addr; /* 32bit bus address */
-} __packed;
-
-struct obj_bss {
- u8 address[6];
- int:16; /* padding */
-
- char state;
- char reserved;
- short age;
-
- char quality;
- char rssi;
-
- struct obj_ssid ssid;
- short channel;
- char beacon_period;
- char dtim_period;
- short capinfo;
- short rates;
- short basic_rates;
- int:16; /* padding */
-} __packed;
-
-struct obj_bsslist {
- u32 nr;
- struct obj_bss bsslist[];
-} __packed;
-
-struct obj_frequencies {
- u16 nr;
- u16 mhz[];
-} __packed;
-
-struct obj_attachment {
- char type;
- char reserved;
- short id;
- short size;
- char data[];
-} __packed;
-
-/*
- * in case everything's ok, the inlined function below will be
- * optimized away by the compiler...
- */
-static inline void
-__bug_on_wrong_struct_sizes(void)
-{
- BUILD_BUG_ON(sizeof (struct obj_ssid) != 34);
- BUILD_BUG_ON(sizeof (struct obj_key) != 34);
- BUILD_BUG_ON(sizeof (struct obj_mlme) != 12);
- BUILD_BUG_ON(sizeof (struct obj_mlmeex) != 14);
- BUILD_BUG_ON(sizeof (struct obj_buffer) != 8);
- BUILD_BUG_ON(sizeof (struct obj_bss) != 60);
- BUILD_BUG_ON(sizeof (struct obj_bsslist) != 4);
- BUILD_BUG_ON(sizeof (struct obj_frequencies) != 2);
-}
-
-enum dot11_state_t {
- DOT11_STATE_NONE = 0,
- DOT11_STATE_AUTHING = 1,
- DOT11_STATE_AUTH = 2,
- DOT11_STATE_ASSOCING = 3,
-
- DOT11_STATE_ASSOC = 5,
- DOT11_STATE_IBSS = 6,
- DOT11_STATE_WDS = 7
-};
-
-enum dot11_bsstype_t {
- DOT11_BSSTYPE_NONE = 0,
- DOT11_BSSTYPE_INFRA = 1,
- DOT11_BSSTYPE_IBSS = 2,
- DOT11_BSSTYPE_ANY = 3
-};
-
-enum dot11_auth_t {
- DOT11_AUTH_NONE = 0,
- DOT11_AUTH_OS = 1,
- DOT11_AUTH_SK = 2,
- DOT11_AUTH_BOTH = 3
-};
-
-enum dot11_mlme_t {
- DOT11_MLME_AUTO = 0,
- DOT11_MLME_INTERMEDIATE = 1,
- DOT11_MLME_EXTENDED = 2
-};
-
-enum dot11_priv_t {
- DOT11_PRIV_WEP = 0,
- DOT11_PRIV_TKIP = 1
-};
-
-/* Prism "Nitro" / Frameburst / "Packet Frame Grouping"
- * Value is in microseconds. Represents the # microseconds
- * the firmware will take to group frames before sending out then out
- * together with a CSMA contention. Without this all frames are
- * sent with a CSMA contention.
- * Bibliography:
- * https://www.hpl.hp.com/personal/Jean_Tourrilhes/Papers/Packet.Frame.Grouping.html
- */
-enum dot11_maxframeburst_t {
- /* Values for DOT11_OID_MAXFRAMEBURST */
- DOT11_MAXFRAMEBURST_OFF = 0, /* Card firmware default */
- DOT11_MAXFRAMEBURST_MIXED_SAFE = 650, /* 802.11 a,b,g safe */
- DOT11_MAXFRAMEBURST_IDEAL = 1300, /* Theoretical ideal level */
- DOT11_MAXFRAMEBURST_MAX = 5000, /* Use this as max,
- * Note: firmware allows for greater values. This is a
- * recommended max. I'll update this as I find
- * out what the real MAX is. Also note that you don't necessarily
- * get better results with a greater value here.
- */
-};
-
-/* Support for 802.11 long and short frame preambles.
- * Long preamble uses 128-bit sync field, 8-bit CRC
- * Short preamble uses 56-bit sync field, 16-bit CRC
- *
- * 802.11a -- not sure, both optionally ?
- * 802.11b supports long and optionally short
- * 802.11g supports both */
-enum dot11_preamblesettings_t {
- DOT11_PREAMBLESETTING_LONG = 0,
- /* Allows *only* long 802.11 preambles */
- DOT11_PREAMBLESETTING_SHORT = 1,
- /* Allows *only* short 802.11 preambles */
- DOT11_PREAMBLESETTING_DYNAMIC = 2
- /* AutomatiGically set */
-};
-
-/* Support for 802.11 slot timing (time between packets).
- *
- * Long uses 802.11a slot timing (9 usec ?)
- * Short uses 802.11b slot timing (20 use ?) */
-enum dot11_slotsettings_t {
- DOT11_SLOTSETTINGS_LONG = 0,
- /* Allows *only* long 802.11b slot timing */
- DOT11_SLOTSETTINGS_SHORT = 1,
- /* Allows *only* long 802.11a slot timing */
- DOT11_SLOTSETTINGS_DYNAMIC = 2
- /* AutomatiGically set */
-};
-
-/* All you need to know, ERP is "Extended Rate PHY".
- * An Extended Rate PHY (ERP) STA or AP shall support three different
- * preamble and header formats:
- * Long preamble (refer to above)
- * Short preamble (refer to above)
- * OFDM preamble ( ? )
- *
- * I'm assuming here Protection tells the AP
- * to be careful, a STA which cannot handle the long pre-amble
- * has joined.
- */
-enum do11_nonerpstatus_t {
- DOT11_ERPSTAT_NONEPRESENT = 0,
- DOT11_ERPSTAT_USEPROTECTION = 1
-};
-
-/* (ERP is "Extended Rate PHY") Way to read NONERP is NON-ERP-*
- * The key here is DOT11 NON ERP NEVER protects against
- * NON ERP STA's. You *don't* want this unless
- * you know what you are doing. It means you will only
- * get Extended Rate capabilities */
-enum dot11_nonerpprotection_t {
- DOT11_NONERP_NEVER = 0,
- DOT11_NONERP_ALWAYS = 1,
- DOT11_NONERP_DYNAMIC = 2
-};
-
-/* Preset OID configuration for 802.11 modes
- * Note: DOT11_OID_CW[MIN|MAX] hold the values of the
- * DCS MIN|MAX backoff used */
-enum dot11_profile_t { /* And set/allowed values */
- /* Allowed values for DOT11_OID_PROFILES */
- DOT11_PROFILE_B_ONLY = 0,
- /* DOT11_OID_RATES: 1, 2, 5.5, 11Mbps
- * DOT11_OID_PREAMBLESETTINGS: DOT11_PREAMBLESETTING_DYNAMIC
- * DOT11_OID_CWMIN: 31
- * DOT11_OID_NONEPROTECTION: DOT11_NOERP_DYNAMIC
- * DOT11_OID_SLOTSETTINGS: DOT11_SLOTSETTINGS_LONG
- */
- DOT11_PROFILE_MIXED_G_WIFI = 1,
- /* DOT11_OID_RATES: 1, 2, 5.5, 11, 6, 9, 12, 18, 24, 36, 48, 54Mbs
- * DOT11_OID_PREAMBLESETTINGS: DOT11_PREAMBLESETTING_DYNAMIC
- * DOT11_OID_CWMIN: 15
- * DOT11_OID_NONEPROTECTION: DOT11_NOERP_DYNAMIC
- * DOT11_OID_SLOTSETTINGS: DOT11_SLOTSETTINGS_DYNAMIC
- */
- DOT11_PROFILE_MIXED_LONG = 2, /* "Long range" */
- /* Same as Profile MIXED_G_WIFI */
- DOT11_PROFILE_G_ONLY = 3,
- /* Same as Profile MIXED_G_WIFI */
- DOT11_PROFILE_TEST = 4,
- /* Same as Profile MIXED_G_WIFI except:
- * DOT11_OID_PREAMBLESETTINGS: DOT11_PREAMBLESETTING_SHORT
- * DOT11_OID_NONEPROTECTION: DOT11_NOERP_NEVER
- * DOT11_OID_SLOTSETTINGS: DOT11_SLOTSETTINGS_SHORT
- */
- DOT11_PROFILE_B_WIFI = 5,
- /* Same as Profile B_ONLY */
- DOT11_PROFILE_A_ONLY = 6,
- /* Same as Profile MIXED_G_WIFI except:
- * DOT11_OID_RATES: 6, 9, 12, 18, 24, 36, 48, 54Mbs
- */
- DOT11_PROFILE_MIXED_SHORT = 7
- /* Same as MIXED_G_WIFI */
-};
-
-
-/* The dot11d conformance level configures the 802.11d conformance levels.
- * The following conformance levels exist:*/
-enum oid_inl_conformance_t {
- OID_INL_CONFORMANCE_NONE = 0, /* Perform active scanning */
- OID_INL_CONFORMANCE_STRICT = 1, /* Strictly adhere to 802.11d */
- OID_INL_CONFORMANCE_FLEXIBLE = 2, /* Use passed 802.11d info to
- * determine channel AND/OR just make assumption that active
- * channels are valid channels */
-};
-
-enum oid_inl_mode_t {
- INL_MODE_NONE = -1,
- INL_MODE_PROMISCUOUS = 0,
- INL_MODE_CLIENT = 1,
- INL_MODE_AP = 2,
- INL_MODE_SNIFFER = 3
-};
-
-enum oid_inl_config_t {
- INL_CONFIG_NOTHING = 0x00,
- INL_CONFIG_MANUALRUN = 0x01,
- INL_CONFIG_FRAMETRAP = 0x02,
- INL_CONFIG_RXANNEX = 0x04,
- INL_CONFIG_TXANNEX = 0x08,
- INL_CONFIG_WDS = 0x10
-};
-
-enum oid_inl_phycap_t {
- INL_PHYCAP_2400MHZ = 1,
- INL_PHYCAP_5000MHZ = 2,
- INL_PHYCAP_FAA = 0x80000000, /* Means card supports the FAA switch */
-};
-
-
-enum oid_num_t {
- GEN_OID_MACADDRESS = 0,
- GEN_OID_LINKSTATE,
- GEN_OID_WATCHDOG,
- GEN_OID_MIBOP,
- GEN_OID_OPTIONS,
- GEN_OID_LEDCONFIG,
-
- /* 802.11 */
- DOT11_OID_BSSTYPE,
- DOT11_OID_BSSID,
- DOT11_OID_SSID,
- DOT11_OID_STATE,
- DOT11_OID_AID,
- DOT11_OID_COUNTRYSTRING,
- DOT11_OID_SSIDOVERRIDE,
-
- DOT11_OID_MEDIUMLIMIT,
- DOT11_OID_BEACONPERIOD,
- DOT11_OID_DTIMPERIOD,
- DOT11_OID_ATIMWINDOW,
- DOT11_OID_LISTENINTERVAL,
- DOT11_OID_CFPPERIOD,
- DOT11_OID_CFPDURATION,
-
- DOT11_OID_AUTHENABLE,
- DOT11_OID_PRIVACYINVOKED,
- DOT11_OID_EXUNENCRYPTED,
- DOT11_OID_DEFKEYID,
- DOT11_OID_DEFKEYX, /* DOT11_OID_DEFKEY1,...DOT11_OID_DEFKEY4 */
- DOT11_OID_STAKEY,
- DOT11_OID_REKEYTHRESHOLD,
- DOT11_OID_STASC,
-
- DOT11_OID_PRIVTXREJECTED,
- DOT11_OID_PRIVRXPLAIN,
- DOT11_OID_PRIVRXFAILED,
- DOT11_OID_PRIVRXNOKEY,
-
- DOT11_OID_RTSTHRESH,
- DOT11_OID_FRAGTHRESH,
- DOT11_OID_SHORTRETRIES,
- DOT11_OID_LONGRETRIES,
- DOT11_OID_MAXTXLIFETIME,
- DOT11_OID_MAXRXLIFETIME,
- DOT11_OID_AUTHRESPTIMEOUT,
- DOT11_OID_ASSOCRESPTIMEOUT,
-
- DOT11_OID_ALOFT_TABLE,
- DOT11_OID_ALOFT_CTRL_TABLE,
- DOT11_OID_ALOFT_RETREAT,
- DOT11_OID_ALOFT_PROGRESS,
- DOT11_OID_ALOFT_FIXEDRATE,
- DOT11_OID_ALOFT_RSSIGRAPH,
- DOT11_OID_ALOFT_CONFIG,
-
- DOT11_OID_VDCFX,
- DOT11_OID_MAXFRAMEBURST,
-
- DOT11_OID_PSM,
- DOT11_OID_CAMTIMEOUT,
- DOT11_OID_RECEIVEDTIMS,
- DOT11_OID_ROAMPREFERENCE,
-
- DOT11_OID_BRIDGELOCAL,
- DOT11_OID_CLIENTS,
- DOT11_OID_CLIENTSASSOCIATED,
- DOT11_OID_CLIENTX, /* DOT11_OID_CLIENTX,...DOT11_OID_CLIENT2007 */
-
- DOT11_OID_CLIENTFIND,
- DOT11_OID_WDSLINKADD,
- DOT11_OID_WDSLINKREMOVE,
- DOT11_OID_EAPAUTHSTA,
- DOT11_OID_EAPUNAUTHSTA,
- DOT11_OID_DOT1XENABLE,
- DOT11_OID_MICFAILURE,
- DOT11_OID_REKEYINDICATE,
-
- DOT11_OID_MPDUTXSUCCESSFUL,
- DOT11_OID_MPDUTXONERETRY,
- DOT11_OID_MPDUTXMULTIPLERETRIES,
- DOT11_OID_MPDUTXFAILED,
- DOT11_OID_MPDURXSUCCESSFUL,
- DOT11_OID_MPDURXDUPS,
- DOT11_OID_RTSSUCCESSFUL,
- DOT11_OID_RTSFAILED,
- DOT11_OID_ACKFAILED,
- DOT11_OID_FRAMERECEIVES,
- DOT11_OID_FRAMEERRORS,
- DOT11_OID_FRAMEABORTS,
- DOT11_OID_FRAMEABORTSPHY,
-
- DOT11_OID_SLOTTIME,
- DOT11_OID_CWMIN, /* MIN DCS backoff */
- DOT11_OID_CWMAX, /* MAX DCS backoff */
- DOT11_OID_ACKWINDOW,
- DOT11_OID_ANTENNARX,
- DOT11_OID_ANTENNATX,
- DOT11_OID_ANTENNADIVERSITY,
- DOT11_OID_CHANNEL,
- DOT11_OID_EDTHRESHOLD,
- DOT11_OID_PREAMBLESETTINGS,
- DOT11_OID_RATES,
- DOT11_OID_CCAMODESUPPORTED,
- DOT11_OID_CCAMODE,
- DOT11_OID_RSSIVECTOR,
- DOT11_OID_OUTPUTPOWERTABLE,
- DOT11_OID_OUTPUTPOWER,
- DOT11_OID_SUPPORTEDRATES,
- DOT11_OID_FREQUENCY,
- DOT11_OID_SUPPORTEDFREQUENCIES,
- DOT11_OID_NOISEFLOOR,
- DOT11_OID_FREQUENCYACTIVITY,
- DOT11_OID_IQCALIBRATIONTABLE,
- DOT11_OID_NONERPPROTECTION,
- DOT11_OID_SLOTSETTINGS,
- DOT11_OID_NONERPTIMEOUT,
- DOT11_OID_PROFILES,
- DOT11_OID_EXTENDEDRATES,
-
- DOT11_OID_DEAUTHENTICATE,
- DOT11_OID_AUTHENTICATE,
- DOT11_OID_DISASSOCIATE,
- DOT11_OID_ASSOCIATE,
- DOT11_OID_SCAN,
- DOT11_OID_BEACON,
- DOT11_OID_PROBE,
- DOT11_OID_DEAUTHENTICATEEX,
- DOT11_OID_AUTHENTICATEEX,
- DOT11_OID_DISASSOCIATEEX,
- DOT11_OID_ASSOCIATEEX,
- DOT11_OID_REASSOCIATE,
- DOT11_OID_REASSOCIATEEX,
-
- DOT11_OID_NONERPSTATUS,
-
- DOT11_OID_STATIMEOUT,
- DOT11_OID_MLMEAUTOLEVEL,
- DOT11_OID_BSSTIMEOUT,
- DOT11_OID_ATTACHMENT,
- DOT11_OID_PSMBUFFER,
-
- DOT11_OID_BSSS,
- DOT11_OID_BSSX, /*DOT11_OID_BSS1,...,DOT11_OID_BSS64 */
- DOT11_OID_BSSFIND,
- DOT11_OID_BSSLIST,
-
- OID_INL_TUNNEL,
- OID_INL_MEMADDR,
- OID_INL_MEMORY,
- OID_INL_MODE,
- OID_INL_COMPONENT_NR,
- OID_INL_VERSION,
- OID_INL_INTERFACE_ID,
- OID_INL_COMPONENT_ID,
- OID_INL_CONFIG,
- OID_INL_DOT11D_CONFORMANCE,
- OID_INL_PHYCAPABILITIES,
- OID_INL_OUTPUTPOWER,
-
- OID_NUM_LAST
-};
-
-#define OID_FLAG_CACHED 0x80
-#define OID_FLAG_TYPE 0x7f
-
-#define OID_TYPE_U32 0x01
-#define OID_TYPE_SSID 0x02
-#define OID_TYPE_KEY 0x03
-#define OID_TYPE_BUFFER 0x04
-#define OID_TYPE_BSS 0x05
-#define OID_TYPE_BSSLIST 0x06
-#define OID_TYPE_FREQUENCIES 0x07
-#define OID_TYPE_MLME 0x08
-#define OID_TYPE_MLMEEX 0x09
-#define OID_TYPE_ADDR 0x0A
-#define OID_TYPE_RAW 0x0B
-#define OID_TYPE_ATTACH 0x0C
-
-/* OID_TYPE_MLMEEX is special because of a variable size field when sending.
- * Not yet implemented (not used in driver anyway).
- */
-
-struct oid_t {
- enum oid_num_t oid;
- short range; /* to define a range of oid */
- short size; /* max size of the associated data */
- char flags;
-};
-
-union oid_res_t {
- void *ptr;
- u32 u;
-};
-
-#define IWMAX_BITRATES 20
-#define IWMAX_BSS 24
-#define IWMAX_FREQ 30
-#define PRIV_STR_SIZE 1024
-
-#endif /* !defined(_ISL_OID_H) */
-/* EOF */
diff --git a/drivers/net/wireless/intersil/prism54/islpci_dev.c b/drivers/net/wireless/intersil/prism54/islpci_dev.c
deleted file mode 100644
index 8eb6d5e4bd57..000000000000
--- a/drivers/net/wireless/intersil/prism54/islpci_dev.c
+++ /dev/null
@@ -1,951 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2002 Intersil Americas Inc.
- * Copyright (C) 2003 Herbert Valerio Riedel <hvr@gnu.org>
- * Copyright (C) 2003 Luis R. Rodriguez <mcgrof@ruslug.rutgers.edu>
- */
-
-#include <linux/hardirq.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-
-#include <linux/netdevice.h>
-#include <linux/ethtool.h>
-#include <linux/pci.h>
-#include <linux/sched.h>
-#include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/if_arp.h>
-
-#include <asm/io.h>
-
-#include "prismcompat.h"
-#include "isl_38xx.h"
-#include "isl_ioctl.h"
-#include "islpci_dev.h"
-#include "islpci_mgt.h"
-#include "islpci_eth.h"
-#include "oid_mgt.h"
-
-#define ISL3877_IMAGE_FILE "isl3877"
-#define ISL3886_IMAGE_FILE "isl3886"
-#define ISL3890_IMAGE_FILE "isl3890"
-MODULE_FIRMWARE(ISL3877_IMAGE_FILE);
-MODULE_FIRMWARE(ISL3886_IMAGE_FILE);
-MODULE_FIRMWARE(ISL3890_IMAGE_FILE);
-
-static int prism54_bring_down(islpci_private *);
-static int islpci_alloc_memory(islpci_private *);
-
-/* Temporary dummy MAC address to use until firmware is loaded.
- * The idea there is that some tools (such as nameif) may query
- * the MAC address before the netdev is 'open'. By using a valid
- * OUI prefix, they can process the netdev properly.
- * Of course, this is not the final/real MAC address. It doesn't
- * matter, as you are suppose to be able to change it anytime via
- * ndev->set_mac_address. Jean II */
-static const unsigned char dummy_mac[6] = { 0x00, 0x30, 0xB4, 0x00, 0x00, 0x00 };
-
-static int
-isl_upload_firmware(islpci_private *priv)
-{
- u32 reg, rc;
- void __iomem *device_base = priv->device_base;
-
- /* clear the RAMBoot and the Reset bit */
- reg = readl(device_base + ISL38XX_CTRL_STAT_REG);
- reg &= ~ISL38XX_CTRL_STAT_RESET;
- reg &= ~ISL38XX_CTRL_STAT_RAMBOOT;
- writel(reg, device_base + ISL38XX_CTRL_STAT_REG);
- wmb();
- udelay(ISL38XX_WRITEIO_DELAY);
-
- /* set the Reset bit without reading the register ! */
- reg |= ISL38XX_CTRL_STAT_RESET;
- writel(reg, device_base + ISL38XX_CTRL_STAT_REG);
- wmb();
- udelay(ISL38XX_WRITEIO_DELAY);
-
- /* clear the Reset bit */
- reg &= ~ISL38XX_CTRL_STAT_RESET;
- writel(reg, device_base + ISL38XX_CTRL_STAT_REG);
- wmb();
-
- /* wait a while for the device to reboot */
- mdelay(50);
-
- {
- const struct firmware *fw_entry = NULL;
- long fw_len;
- const u32 *fw_ptr;
-
- rc = request_firmware(&fw_entry, priv->firmware, PRISM_FW_PDEV);
- if (rc) {
- printk(KERN_ERR
- "%s: request_firmware() failed for '%s'\n",
- "prism54", priv->firmware);
- return rc;
- }
- /* prepare the Direct Memory Base register */
- reg = ISL38XX_DEV_FIRMWARE_ADDRES;
-
- fw_ptr = (u32 *) fw_entry->data;
- fw_len = fw_entry->size;
-
- if (fw_len % 4) {
- printk(KERN_ERR
- "%s: firmware '%s' size is not multiple of 32bit, aborting!\n",
- "prism54", priv->firmware);
- release_firmware(fw_entry);
- return -EILSEQ; /* Illegal byte sequence */;
- }
-
- while (fw_len > 0) {
- long _fw_len =
- (fw_len >
- ISL38XX_MEMORY_WINDOW_SIZE) ?
- ISL38XX_MEMORY_WINDOW_SIZE : fw_len;
- u32 __iomem *dev_fw_ptr = device_base + ISL38XX_DIRECT_MEM_WIN;
-
- /* set the card's base address for writing the data */
- isl38xx_w32_flush(device_base, reg,
- ISL38XX_DIR_MEM_BASE_REG);
- wmb(); /* be paranoid */
-
- /* increment the write address for next iteration */
- reg += _fw_len;
- fw_len -= _fw_len;
-
- /* write the data to the Direct Memory Window 32bit-wise */
- /* memcpy_toio() doesn't guarantee 32bit writes :-| */
- while (_fw_len > 0) {
- /* use non-swapping writel() */
- __raw_writel(*fw_ptr, dev_fw_ptr);
- fw_ptr++, dev_fw_ptr++;
- _fw_len -= 4;
- }
-
- /* flush PCI posting */
- (void) readl(device_base + ISL38XX_PCI_POSTING_FLUSH);
- wmb(); /* be paranoid again */
-
- BUG_ON(_fw_len != 0);
- }
-
- BUG_ON(fw_len != 0);
-
- /* Firmware version is at offset 40 (also for "newmac") */
- printk(KERN_DEBUG "%s: firmware version: %.8s\n",
- priv->ndev->name, fw_entry->data + 40);
-
- release_firmware(fw_entry);
- }
-
- /* now reset the device
- * clear the Reset & ClkRun bit, set the RAMBoot bit */
- reg = readl(device_base + ISL38XX_CTRL_STAT_REG);
- reg &= ~ISL38XX_CTRL_STAT_CLKRUN;
- reg &= ~ISL38XX_CTRL_STAT_RESET;
- reg |= ISL38XX_CTRL_STAT_RAMBOOT;
- isl38xx_w32_flush(device_base, reg, ISL38XX_CTRL_STAT_REG);
- wmb();
- udelay(ISL38XX_WRITEIO_DELAY);
-
- /* set the reset bit latches the host override and RAMBoot bits
- * into the device for operation when the reset bit is reset */
- reg |= ISL38XX_CTRL_STAT_RESET;
- writel(reg, device_base + ISL38XX_CTRL_STAT_REG);
- /* don't do flush PCI posting here! */
- wmb();
- udelay(ISL38XX_WRITEIO_DELAY);
-
- /* clear the reset bit should start the whole circus */
- reg &= ~ISL38XX_CTRL_STAT_RESET;
- writel(reg, device_base + ISL38XX_CTRL_STAT_REG);
- /* don't do flush PCI posting here! */
- wmb();
- udelay(ISL38XX_WRITEIO_DELAY);
-
- return 0;
-}
-
-/******************************************************************************
- Device Interrupt Handler
-******************************************************************************/
-
-irqreturn_t
-islpci_interrupt(int irq, void *config)
-{
- u32 reg;
- islpci_private *priv = config;
- struct net_device *ndev = priv->ndev;
- void __iomem *device = priv->device_base;
- int powerstate = ISL38XX_PSM_POWERSAVE_STATE;
-
- /* lock the interrupt handler */
- spin_lock(&priv->slock);
-
- /* received an interrupt request on a shared IRQ line
- * first check whether the device is in sleep mode */
- reg = readl(device + ISL38XX_CTRL_STAT_REG);
- if (reg & ISL38XX_CTRL_STAT_SLEEPMODE)
- /* device is in sleep mode, IRQ was generated by someone else */
- {
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING, "Assuming someone else called the IRQ\n");
-#endif
- spin_unlock(&priv->slock);
- return IRQ_NONE;
- }
-
-
- /* check whether there is any source of interrupt on the device */
- reg = readl(device + ISL38XX_INT_IDENT_REG);
-
- /* also check the contents of the Interrupt Enable Register, because this
- * will filter out interrupt sources from other devices on the same irq ! */
- reg &= readl(device + ISL38XX_INT_EN_REG);
- reg &= ISL38XX_INT_SOURCES;
-
- if (reg != 0) {
- if (islpci_get_state(priv) != PRV_STATE_SLEEP)
- powerstate = ISL38XX_PSM_ACTIVE_STATE;
-
- /* reset the request bits in the Identification register */
- isl38xx_w32_flush(device, reg, ISL38XX_INT_ACK_REG);
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_FUNCTION_CALLS,
- "IRQ: Identification register 0x%p 0x%x\n", device, reg);
-#endif
-
- /* check for each bit in the register separately */
- if (reg & ISL38XX_INT_IDENT_UPDATE) {
-#if VERBOSE > SHOW_ERROR_MESSAGES
- /* Queue has been updated */
- DEBUG(SHOW_TRACING, "IRQ: Update flag\n");
-
- DEBUG(SHOW_QUEUE_INDEXES,
- "CB drv Qs: [%i][%i][%i][%i][%i][%i]\n",
- le32_to_cpu(priv->control_block->
- driver_curr_frag[0]),
- le32_to_cpu(priv->control_block->
- driver_curr_frag[1]),
- le32_to_cpu(priv->control_block->
- driver_curr_frag[2]),
- le32_to_cpu(priv->control_block->
- driver_curr_frag[3]),
- le32_to_cpu(priv->control_block->
- driver_curr_frag[4]),
- le32_to_cpu(priv->control_block->
- driver_curr_frag[5])
- );
-
- DEBUG(SHOW_QUEUE_INDEXES,
- "CB dev Qs: [%i][%i][%i][%i][%i][%i]\n",
- le32_to_cpu(priv->control_block->
- device_curr_frag[0]),
- le32_to_cpu(priv->control_block->
- device_curr_frag[1]),
- le32_to_cpu(priv->control_block->
- device_curr_frag[2]),
- le32_to_cpu(priv->control_block->
- device_curr_frag[3]),
- le32_to_cpu(priv->control_block->
- device_curr_frag[4]),
- le32_to_cpu(priv->control_block->
- device_curr_frag[5])
- );
-#endif
-
- /* cleanup the data low transmit queue */
- islpci_eth_cleanup_transmit(priv, priv->control_block);
-
- /* device is in active state, update the
- * powerstate flag if necessary */
- powerstate = ISL38XX_PSM_ACTIVE_STATE;
-
- /* check all three queues in priority order
- * call the PIMFOR receive function until the
- * queue is empty */
- if (isl38xx_in_queue(priv->control_block,
- ISL38XX_CB_RX_MGMTQ) != 0) {
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING,
- "Received frame in Management Queue\n");
-#endif
- islpci_mgt_receive(ndev);
-
- islpci_mgt_cleanup_transmit(ndev);
-
- /* Refill slots in receive queue */
- islpci_mgmt_rx_fill(ndev);
-
- /* no need to trigger the device, next
- islpci_mgt_transaction does it */
- }
-
- while (isl38xx_in_queue(priv->control_block,
- ISL38XX_CB_RX_DATA_LQ) != 0) {
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING,
- "Received frame in Data Low Queue\n");
-#endif
- islpci_eth_receive(priv);
- }
-
- /* check whether the data transmit queues were full */
- if (priv->data_low_tx_full) {
- /* check whether the transmit is not full anymore */
- if (ISL38XX_CB_TX_QSIZE -
- isl38xx_in_queue(priv->control_block,
- ISL38XX_CB_TX_DATA_LQ) >=
- ISL38XX_MIN_QTHRESHOLD) {
- /* nope, the driver is ready for more network frames */
- netif_wake_queue(priv->ndev);
-
- /* reset the full flag */
- priv->data_low_tx_full = 0;
- }
- }
- }
-
- if (reg & ISL38XX_INT_IDENT_INIT) {
- /* Device has been initialized */
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING,
- "IRQ: Init flag, device initialized\n");
-#endif
- wake_up(&priv->reset_done);
- }
-
- if (reg & ISL38XX_INT_IDENT_SLEEP) {
- /* Device intends to move to powersave state */
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING, "IRQ: Sleep flag\n");
-#endif
- isl38xx_handle_sleep_request(priv->control_block,
- &powerstate,
- priv->device_base);
- }
-
- if (reg & ISL38XX_INT_IDENT_WAKEUP) {
- /* Device has been woken up to active state */
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING, "IRQ: Wakeup flag\n");
-#endif
-
- isl38xx_handle_wakeup(priv->control_block,
- &powerstate, priv->device_base);
- }
- } else {
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING, "Assuming someone else called the IRQ\n");
-#endif
- spin_unlock(&priv->slock);
- return IRQ_NONE;
- }
-
- /* sleep -> ready */
- if (islpci_get_state(priv) == PRV_STATE_SLEEP
- && powerstate == ISL38XX_PSM_ACTIVE_STATE)
- islpci_set_state(priv, PRV_STATE_READY);
-
- /* !sleep -> sleep */
- if (islpci_get_state(priv) != PRV_STATE_SLEEP
- && powerstate == ISL38XX_PSM_POWERSAVE_STATE)
- islpci_set_state(priv, PRV_STATE_SLEEP);
-
- /* unlock the interrupt handler */
- spin_unlock(&priv->slock);
-
- return IRQ_HANDLED;
-}
-
-/******************************************************************************
- Network Interface Control & Statistical functions
-******************************************************************************/
-static int
-islpci_open(struct net_device *ndev)
-{
- u32 rc;
- islpci_private *priv = netdev_priv(ndev);
-
- /* reset data structures, upload firmware and reset device */
- rc = islpci_reset(priv,1);
- if (rc) {
- prism54_bring_down(priv);
- return rc; /* Returns informative message */
- }
-
- netif_start_queue(ndev);
-
- /* Turn off carrier if in STA or Ad-hoc mode. It will be turned on
- * once the firmware receives a trap of being associated
- * (GEN_OID_LINKSTATE). In other modes (AP or WDS or monitor) we
- * should just leave the carrier on as its expected the firmware
- * won't send us a trigger. */
- if (priv->iw_mode == IW_MODE_INFRA || priv->iw_mode == IW_MODE_ADHOC)
- netif_carrier_off(ndev);
- else
- netif_carrier_on(ndev);
-
- return 0;
-}
-
-static int
-islpci_close(struct net_device *ndev)
-{
- islpci_private *priv = netdev_priv(ndev);
-
- printk(KERN_DEBUG "%s: islpci_close ()\n", ndev->name);
-
- netif_stop_queue(ndev);
-
- return prism54_bring_down(priv);
-}
-
-static int
-prism54_bring_down(islpci_private *priv)
-{
- void __iomem *device_base = priv->device_base;
- u32 reg;
- /* we are going to shutdown the device */
- islpci_set_state(priv, PRV_STATE_PREBOOT);
-
- /* disable all device interrupts in case they weren't */
- isl38xx_disable_interrupts(priv->device_base);
-
- /* For safety reasons, we may want to ensure that no DMA transfer is
- * currently in progress by emptying the TX and RX queues. */
-
- /* wait until interrupts have finished executing on other CPUs */
- synchronize_irq(priv->pdev->irq);
-
- reg = readl(device_base + ISL38XX_CTRL_STAT_REG);
- reg &= ~(ISL38XX_CTRL_STAT_RESET | ISL38XX_CTRL_STAT_RAMBOOT);
- writel(reg, device_base + ISL38XX_CTRL_STAT_REG);
- wmb();
- udelay(ISL38XX_WRITEIO_DELAY);
-
- reg |= ISL38XX_CTRL_STAT_RESET;
- writel(reg, device_base + ISL38XX_CTRL_STAT_REG);
- wmb();
- udelay(ISL38XX_WRITEIO_DELAY);
-
- /* clear the Reset bit */
- reg &= ~ISL38XX_CTRL_STAT_RESET;
- writel(reg, device_base + ISL38XX_CTRL_STAT_REG);
- wmb();
-
- /* wait a while for the device to reset */
- schedule_timeout_uninterruptible(msecs_to_jiffies(50));
-
- return 0;
-}
-
-static int
-islpci_upload_fw(islpci_private *priv)
-{
- islpci_state_t old_state;
- u32 rc;
-
- old_state = islpci_set_state(priv, PRV_STATE_BOOT);
-
- printk(KERN_DEBUG "%s: uploading firmware...\n", priv->ndev->name);
-
- rc = isl_upload_firmware(priv);
- if (rc) {
- /* error uploading the firmware */
- printk(KERN_ERR "%s: could not upload firmware ('%s')\n",
- priv->ndev->name, priv->firmware);
-
- islpci_set_state(priv, old_state);
- return rc;
- }
-
- printk(KERN_DEBUG "%s: firmware upload complete\n",
- priv->ndev->name);
-
- islpci_set_state(priv, PRV_STATE_POSTBOOT);
-
- return 0;
-}
-
-static int
-islpci_reset_if(islpci_private *priv)
-{
- long remaining;
- int result = -ETIME;
- int count;
-
- DEFINE_WAIT(wait);
- prepare_to_wait(&priv->reset_done, &wait, TASK_UNINTERRUPTIBLE);
-
- /* now the last step is to reset the interface */
- isl38xx_interface_reset(priv->device_base, priv->device_host_address);
- islpci_set_state(priv, PRV_STATE_PREINIT);
-
- for(count = 0; count < 2 && result; count++) {
- /* The software reset acknowledge needs about 220 msec here.
- * Be conservative and wait for up to one second. */
-
- remaining = schedule_timeout_uninterruptible(HZ);
-
- if(remaining > 0) {
- result = 0;
- break;
- }
-
- /* If we're here it's because our IRQ hasn't yet gone through.
- * Retry a bit more...
- */
- printk(KERN_ERR "%s: no 'reset complete' IRQ seen - retrying\n",
- priv->ndev->name);
- }
-
- finish_wait(&priv->reset_done, &wait);
-
- if (result) {
- printk(KERN_ERR "%s: interface reset failure\n", priv->ndev->name);
- return result;
- }
-
- islpci_set_state(priv, PRV_STATE_INIT);
-
- /* Now that the device is 100% up, let's allow
- * for the other interrupts --
- * NOTE: this is not *yet* true since we've only allowed the
- * INIT interrupt on the IRQ line. We can perhaps poll
- * the IRQ line until we know for sure the reset went through */
- isl38xx_enable_common_interrupts(priv->device_base);
-
- down_write(&priv->mib_sem);
- result = mgt_commit(priv);
- if (result) {
- printk(KERN_ERR "%s: interface reset failure\n", priv->ndev->name);
- up_write(&priv->mib_sem);
- return result;
- }
- up_write(&priv->mib_sem);
-
- islpci_set_state(priv, PRV_STATE_READY);
-
- printk(KERN_DEBUG "%s: interface reset complete\n", priv->ndev->name);
- return 0;
-}
-
-int
-islpci_reset(islpci_private *priv, int reload_firmware)
-{
- isl38xx_control_block *cb = /* volatile not needed */
- (isl38xx_control_block *) priv->control_block;
- unsigned counter;
- int rc;
-
- if (reload_firmware)
- islpci_set_state(priv, PRV_STATE_PREBOOT);
- else
- islpci_set_state(priv, PRV_STATE_POSTBOOT);
-
- printk(KERN_DEBUG "%s: resetting device...\n", priv->ndev->name);
-
- /* disable all device interrupts in case they weren't */
- isl38xx_disable_interrupts(priv->device_base);
-
- /* flush all management queues */
- priv->index_mgmt_tx = 0;
- priv->index_mgmt_rx = 0;
-
- /* clear the indexes in the frame pointer */
- for (counter = 0; counter < ISL38XX_CB_QCOUNT; counter++) {
- cb->driver_curr_frag[counter] = cpu_to_le32(0);
- cb->device_curr_frag[counter] = cpu_to_le32(0);
- }
-
- /* reset the mgmt receive queue */
- for (counter = 0; counter < ISL38XX_CB_MGMT_QSIZE; counter++) {
- isl38xx_fragment *frag = &cb->rx_data_mgmt[counter];
- frag->size = cpu_to_le16(MGMT_FRAME_SIZE);
- frag->flags = 0;
- frag->address = cpu_to_le32(priv->mgmt_rx[counter].pci_addr);
- }
-
- for (counter = 0; counter < ISL38XX_CB_RX_QSIZE; counter++) {
- cb->rx_data_low[counter].address =
- cpu_to_le32((u32) priv->pci_map_rx_address[counter]);
- }
-
- /* since the receive queues are filled with empty fragments, now we can
- * set the corresponding indexes in the Control Block */
- priv->control_block->driver_curr_frag[ISL38XX_CB_RX_DATA_LQ] =
- cpu_to_le32(ISL38XX_CB_RX_QSIZE);
- priv->control_block->driver_curr_frag[ISL38XX_CB_RX_MGMTQ] =
- cpu_to_le32(ISL38XX_CB_MGMT_QSIZE);
-
- /* reset the remaining real index registers and full flags */
- priv->free_data_rx = 0;
- priv->free_data_tx = 0;
- priv->data_low_tx_full = 0;
-
- if (reload_firmware) { /* Should we load the firmware ? */
- /* now that the data structures are cleaned up, upload
- * firmware and reset interface */
- rc = islpci_upload_fw(priv);
- if (rc) {
- printk(KERN_ERR "%s: islpci_reset: failure\n",
- priv->ndev->name);
- return rc;
- }
- }
-
- /* finally reset interface */
- rc = islpci_reset_if(priv);
- if (rc)
- printk(KERN_ERR "prism54: Your card/socket may be faulty, or IRQ line too busy :(\n");
- return rc;
-}
-
-/******************************************************************************
- Network device configuration functions
-******************************************************************************/
-static int
-islpci_alloc_memory(islpci_private *priv)
-{
- int counter;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- printk(KERN_DEBUG "islpci_alloc_memory\n");
-#endif
-
- /* remap the PCI device base address to accessible */
- if (!(priv->device_base =
- ioremap(pci_resource_start(priv->pdev, 0),
- ISL38XX_PCI_MEM_SIZE))) {
- /* error in remapping the PCI device memory address range */
- printk(KERN_ERR "PCI memory remapping failed\n");
- return -1;
- }
-
- /* memory layout for consistent DMA region:
- *
- * Area 1: Control Block for the device interface
- * Area 2: Power Save Mode Buffer for temporary frame storage. Be aware that
- * the number of supported stations in the AP determines the minimal
- * size of the buffer !
- */
-
- /* perform the allocation */
- priv->driver_mem_address = dma_alloc_coherent(&priv->pdev->dev,
- HOST_MEM_BLOCK,
- &priv->device_host_address,
- GFP_KERNEL);
-
- if (!priv->driver_mem_address) {
- /* error allocating the block of PCI memory */
- printk(KERN_ERR "%s: could not allocate DMA memory, aborting!",
- "prism54");
- return -1;
- }
-
- /* assign the Control Block to the first address of the allocated area */
- priv->control_block =
- (isl38xx_control_block *) priv->driver_mem_address;
-
- /* set the Power Save Buffer pointer directly behind the CB */
- priv->device_psm_buffer =
- priv->device_host_address + CONTROL_BLOCK_SIZE;
-
- /* make sure all buffer pointers are initialized */
- for (counter = 0; counter < ISL38XX_CB_QCOUNT; counter++) {
- priv->control_block->driver_curr_frag[counter] = cpu_to_le32(0);
- priv->control_block->device_curr_frag[counter] = cpu_to_le32(0);
- }
-
- priv->index_mgmt_rx = 0;
- memset(priv->mgmt_rx, 0, sizeof(priv->mgmt_rx));
- memset(priv->mgmt_tx, 0, sizeof(priv->mgmt_tx));
-
- /* allocate rx queue for management frames */
- if (islpci_mgmt_rx_fill(priv->ndev) < 0)
- goto out_free;
-
- /* now get the data rx skb's */
- memset(priv->data_low_rx, 0, sizeof (priv->data_low_rx));
- memset(priv->pci_map_rx_address, 0, sizeof (priv->pci_map_rx_address));
-
- for (counter = 0; counter < ISL38XX_CB_RX_QSIZE; counter++) {
- struct sk_buff *skb;
-
- /* allocate an sk_buff for received data frames storage
- * each frame on receive size consists of 1 fragment
- * include any required allignment operations */
- if (!(skb = dev_alloc_skb(MAX_FRAGMENT_SIZE_RX + 2))) {
- /* error allocating an sk_buff structure elements */
- printk(KERN_ERR "Error allocating skb.\n");
- skb = NULL;
- goto out_free;
- }
- skb_reserve(skb, (4 - (long) skb->data) & 0x03);
- /* add the new allocated sk_buff to the buffer array */
- priv->data_low_rx[counter] = skb;
-
- /* map the allocated skb data area to pci */
- priv->pci_map_rx_address[counter] =
- dma_map_single(&priv->pdev->dev, (void *)skb->data,
- MAX_FRAGMENT_SIZE_RX + 2, DMA_FROM_DEVICE);
- if (dma_mapping_error(&priv->pdev->dev, priv->pci_map_rx_address[counter])) {
- priv->pci_map_rx_address[counter] = 0;
- /* error mapping the buffer to device
- accessible memory address */
- printk(KERN_ERR "failed to map skb DMA'able\n");
- goto out_free;
- }
- }
-
- prism54_acl_init(&priv->acl);
- prism54_wpa_bss_ie_init(priv);
- if (mgt_init(priv))
- goto out_free;
-
- return 0;
- out_free:
- islpci_free_memory(priv);
- return -1;
-}
-
-int
-islpci_free_memory(islpci_private *priv)
-{
- int counter;
-
- if (priv->device_base)
- iounmap(priv->device_base);
- priv->device_base = NULL;
-
- /* free consistent DMA area... */
- if (priv->driver_mem_address)
- dma_free_coherent(&priv->pdev->dev, HOST_MEM_BLOCK,
- priv->driver_mem_address,
- priv->device_host_address);
-
- /* clear some dangling pointers */
- priv->driver_mem_address = NULL;
- priv->device_host_address = 0;
- priv->device_psm_buffer = 0;
- priv->control_block = NULL;
-
- /* clean up mgmt rx buffers */
- for (counter = 0; counter < ISL38XX_CB_MGMT_QSIZE; counter++) {
- struct islpci_membuf *buf = &priv->mgmt_rx[counter];
- if (buf->pci_addr)
- dma_unmap_single(&priv->pdev->dev, buf->pci_addr,
- buf->size, DMA_FROM_DEVICE);
- buf->pci_addr = 0;
- kfree(buf->mem);
- buf->size = 0;
- buf->mem = NULL;
- }
-
- /* clean up data rx buffers */
- for (counter = 0; counter < ISL38XX_CB_RX_QSIZE; counter++) {
- if (priv->pci_map_rx_address[counter])
- dma_unmap_single(&priv->pdev->dev,
- priv->pci_map_rx_address[counter],
- MAX_FRAGMENT_SIZE_RX + 2,
- DMA_FROM_DEVICE);
- priv->pci_map_rx_address[counter] = 0;
-
- if (priv->data_low_rx[counter])
- dev_kfree_skb(priv->data_low_rx[counter]);
- priv->data_low_rx[counter] = NULL;
- }
-
- /* Free the access control list and the WPA list */
- prism54_acl_clean(&priv->acl);
- prism54_wpa_bss_ie_clean(priv);
- mgt_clean(priv);
-
- return 0;
-}
-
-#if 0
-static void
-islpci_set_multicast_list(struct net_device *dev)
-{
- /* put device into promisc mode and let network layer handle it */
-}
-#endif
-
-static void islpci_ethtool_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *info)
-{
- strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
- strlcpy(info->version, DRV_VERSION, sizeof(info->version));
-}
-
-static const struct ethtool_ops islpci_ethtool_ops = {
- .get_drvinfo = islpci_ethtool_get_drvinfo,
-};
-
-static const struct net_device_ops islpci_netdev_ops = {
- .ndo_open = islpci_open,
- .ndo_stop = islpci_close,
- .ndo_start_xmit = islpci_eth_transmit,
- .ndo_tx_timeout = islpci_eth_tx_timeout,
- .ndo_set_mac_address = prism54_set_mac_address,
- .ndo_validate_addr = eth_validate_addr,
-};
-
-static struct device_type wlan_type = {
- .name = "wlan",
-};
-
-struct net_device *
-islpci_setup(struct pci_dev *pdev)
-{
- islpci_private *priv;
- struct net_device *ndev = alloc_etherdev(sizeof (islpci_private));
-
- if (!ndev)
- return ndev;
-
- pci_set_drvdata(pdev, ndev);
- SET_NETDEV_DEV(ndev, &pdev->dev);
- SET_NETDEV_DEVTYPE(ndev, &wlan_type);
-
- /* setup the structure members */
- ndev->base_addr = pci_resource_start(pdev, 0);
- ndev->irq = pdev->irq;
-
- /* initialize the function pointers */
- ndev->netdev_ops = &islpci_netdev_ops;
- ndev->wireless_handlers = &prism54_handler_def;
- ndev->ethtool_ops = &islpci_ethtool_ops;
-
- /* ndev->set_multicast_list = &islpci_set_multicast_list; */
- ndev->addr_len = ETH_ALEN;
- /* Get a non-zero dummy MAC address for nameif. Jean II */
- memcpy(ndev->dev_addr, dummy_mac, ETH_ALEN);
-
- ndev->watchdog_timeo = ISLPCI_TX_TIMEOUT;
-
- /* allocate a private device structure to the network device */
- priv = netdev_priv(ndev);
- priv->ndev = ndev;
- priv->pdev = pdev;
- priv->monitor_type = ARPHRD_IEEE80211;
- priv->ndev->type = (priv->iw_mode == IW_MODE_MONITOR) ?
- priv->monitor_type : ARPHRD_ETHER;
-
- /* Add pointers to enable iwspy support. */
- priv->wireless_data.spy_data = &priv->spy_data;
- ndev->wireless_data = &priv->wireless_data;
-
- /* save the start and end address of the PCI memory area */
- ndev->mem_start = (unsigned long) priv->device_base;
- ndev->mem_end = ndev->mem_start + ISL38XX_PCI_MEM_SIZE;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING, "PCI Memory remapped to 0x%p\n", priv->device_base);
-#endif
-
- init_waitqueue_head(&priv->reset_done);
-
- /* init the queue read locks, process wait counter */
- mutex_init(&priv->mgmt_lock);
- priv->mgmt_received = NULL;
- init_waitqueue_head(&priv->mgmt_wqueue);
- mutex_init(&priv->stats_lock);
- spin_lock_init(&priv->slock);
-
- /* init state machine with off#1 state */
- priv->state = PRV_STATE_OFF;
- priv->state_off = 1;
-
- /* initialize workqueue's */
- INIT_WORK(&priv->stats_work, prism54_update_stats);
- priv->stats_timestamp = 0;
-
- INIT_WORK(&priv->reset_task, islpci_do_reset_and_wake);
- priv->reset_task_pending = 0;
-
- /* allocate various memory areas */
- if (islpci_alloc_memory(priv))
- goto do_free_netdev;
-
- /* select the firmware file depending on the device id */
- switch (pdev->device) {
- case 0x3877:
- strcpy(priv->firmware, ISL3877_IMAGE_FILE);
- break;
-
- case 0x3886:
- strcpy(priv->firmware, ISL3886_IMAGE_FILE);
- break;
-
- default:
- strcpy(priv->firmware, ISL3890_IMAGE_FILE);
- break;
- }
-
- if (register_netdev(ndev)) {
- DEBUG(SHOW_ERROR_MESSAGES,
- "ERROR: register_netdev() failed\n");
- goto do_islpci_free_memory;
- }
-
- return ndev;
-
- do_islpci_free_memory:
- islpci_free_memory(priv);
- do_free_netdev:
- free_netdev(ndev);
- priv = NULL;
- return NULL;
-}
-
-islpci_state_t
-islpci_set_state(islpci_private *priv, islpci_state_t new_state)
-{
- islpci_state_t old_state;
-
- /* lock */
- old_state = priv->state;
-
- /* this means either a race condition or some serious error in
- * the driver code */
- switch (new_state) {
- case PRV_STATE_OFF:
- priv->state_off++;
- fallthrough;
- default:
- priv->state = new_state;
- break;
-
- case PRV_STATE_PREBOOT:
- /* there are actually many off-states, enumerated by
- * state_off */
- if (old_state == PRV_STATE_OFF)
- priv->state_off--;
-
- /* only if hw_unavailable is zero now it means we either
- * were in off#1 state, or came here from
- * somewhere else */
- if (!priv->state_off)
- priv->state = new_state;
- break;
- }
-#if 0
- printk(KERN_DEBUG "%s: state transition %d -> %d (off#%d)\n",
- priv->ndev->name, old_state, new_state, priv->state_off);
-#endif
-
- /* invariants */
- BUG_ON(priv->state_off < 0);
- BUG_ON(priv->state_off && (priv->state != PRV_STATE_OFF));
- BUG_ON(!priv->state_off && (priv->state == PRV_STATE_OFF));
-
- /* unlock */
- return old_state;
-}
diff --git a/drivers/net/wireless/intersil/prism54/islpci_dev.h b/drivers/net/wireless/intersil/prism54/islpci_dev.h
deleted file mode 100644
index 4753418dce62..000000000000
--- a/drivers/net/wireless/intersil/prism54/islpci_dev.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2002 Intersil Americas Inc.
- * Copyright (C) 2003 Herbert Valerio Riedel <hvr@gnu.org>
- * Copyright (C) 2003 Luis R. Rodriguez <mcgrof@ruslug.rutgers.edu>
- * Copyright (C) 2003 Aurelien Alleaume <slts@free.fr>
- */
-
-#ifndef _ISLPCI_DEV_H
-#define _ISLPCI_DEV_H
-
-#include <linux/irqreturn.h>
-#include <linux/netdevice.h>
-#include <linux/wireless.h>
-#include <net/iw_handler.h>
-#include <linux/list.h>
-#include <linux/mutex.h>
-
-#include "isl_38xx.h"
-#include "isl_oid.h"
-#include "islpci_mgt.h"
-
-/* some states might not be superflous and may be removed when
- design is finalized (hvr) */
-typedef enum {
- PRV_STATE_OFF = 0, /* this means hw_unavailable is != 0 */
- PRV_STATE_PREBOOT, /* we are in a pre-boot state (empty RAM) */
- PRV_STATE_BOOT, /* boot state (fw upload, run fw) */
- PRV_STATE_POSTBOOT, /* after boot state, need reset now */
- PRV_STATE_PREINIT, /* pre-init state */
- PRV_STATE_INIT, /* init state (restore MIB backup to device) */
- PRV_STATE_READY, /* driver&device are in operational state */
- PRV_STATE_SLEEP /* device in sleep mode */
-} islpci_state_t;
-
-/* ACL using MAC address */
-struct mac_entry {
- struct list_head _list;
- char addr[ETH_ALEN];
-};
-
-struct islpci_acl {
- enum { MAC_POLICY_OPEN=0, MAC_POLICY_ACCEPT=1, MAC_POLICY_REJECT=2 } policy;
- struct list_head mac_list; /* a list of mac_entry */
- int size; /* size of queue */
- struct mutex lock; /* accessed in ioctls and trap_work */
-};
-
-struct islpci_membuf {
- int size; /* size of memory */
- void *mem; /* address of memory as seen by CPU */
- dma_addr_t pci_addr; /* address of memory as seen by device */
-};
-
-#define MAX_BSS_WPA_IE_COUNT 64
-#define MAX_WPA_IE_LEN 64
-struct islpci_bss_wpa_ie {
- struct list_head list;
- unsigned long last_update;
- u8 bssid[ETH_ALEN];
- u8 wpa_ie[MAX_WPA_IE_LEN];
- size_t wpa_ie_len;
-
-};
-
-typedef struct {
- spinlock_t slock; /* generic spinlock; */
-
- u32 priv_oid;
-
- /* our mib cache */
- u32 iw_mode;
- struct rw_semaphore mib_sem;
- void **mib;
- char nickname[IW_ESSID_MAX_SIZE+1];
-
- /* Take care of the wireless stats */
- struct work_struct stats_work;
- struct mutex stats_lock;
- /* remember when we last updated the stats */
- unsigned long stats_timestamp;
- /* The first is accessed under semaphore locking.
- * The second is the clean one we return to iwconfig.
- */
- struct iw_statistics local_iwstatistics;
- struct iw_statistics iwstatistics;
-
- struct iw_spy_data spy_data; /* iwspy support */
-
- struct iw_public_data wireless_data;
-
- int monitor_type; /* ARPHRD_IEEE80211 or ARPHRD_IEEE80211_PRISM */
-
- struct islpci_acl acl;
-
- /* PCI bus allocation & configuration members */
- struct pci_dev *pdev; /* PCI structure information */
- char firmware[33];
-
- void __iomem *device_base; /* ioremapped device base address */
-
- /* consistent DMA region */
- void *driver_mem_address; /* base DMA address */
- dma_addr_t device_host_address; /* base DMA address (bus address) */
- dma_addr_t device_psm_buffer; /* host memory for PSM buffering (bus address) */
-
- /* our network_device structure */
- struct net_device *ndev;
-
- /* device queue interface members */
- struct isl38xx_cb *control_block; /* device control block
- (== driver_mem_address!) */
-
- /* Each queue has three indexes:
- * free/index_mgmt/data_rx/tx (called index, see below),
- * driver_curr_frag, and device_curr_frag (in the control block)
- * All indexes are ever-increasing, but interpreted modulo the
- * device queue size when used.
- * index <= device_curr_frag <= driver_curr_frag at all times
- * For rx queues, [index, device_curr_frag) contains fragments
- * that the interrupt processing needs to handle (owned by driver).
- * [device_curr_frag, driver_curr_frag) is the free space in the
- * rx queue, waiting for data (owned by device). The driver
- * increments driver_curr_frag to indicate to the device that more
- * buffers are available.
- * If device_curr_frag == driver_curr_frag, no more rx buffers are
- * available, and the rx DMA engine of the device is halted.
- * For tx queues, [index, device_curr_frag) contains fragments
- * where tx is done; they need to be freed (owned by driver).
- * [device_curr_frag, driver_curr_frag) contains the frames
- * that are being transferred (owned by device). The driver
- * increments driver_curr_frag to indicate that more tx work
- * needs to be done.
- */
- u32 index_mgmt_rx; /* real index mgmt rx queue */
- u32 index_mgmt_tx; /* read index mgmt tx queue */
- u32 free_data_rx; /* free pointer data rx queue */
- u32 free_data_tx; /* free pointer data tx queue */
- u32 data_low_tx_full; /* full detected flag */
-
- /* frame memory buffers for the device queues */
- struct islpci_membuf mgmt_tx[ISL38XX_CB_MGMT_QSIZE];
- struct islpci_membuf mgmt_rx[ISL38XX_CB_MGMT_QSIZE];
- struct sk_buff *data_low_tx[ISL38XX_CB_TX_QSIZE];
- struct sk_buff *data_low_rx[ISL38XX_CB_RX_QSIZE];
- dma_addr_t pci_map_tx_address[ISL38XX_CB_TX_QSIZE];
- dma_addr_t pci_map_rx_address[ISL38XX_CB_RX_QSIZE];
-
- /* wait for a reset interrupt */
- wait_queue_head_t reset_done;
-
- /* used by islpci_mgt_transaction */
- struct mutex mgmt_lock; /* serialize access to mailbox and wqueue */
- struct islpci_mgmtframe *mgmt_received; /* mbox for incoming frame */
- wait_queue_head_t mgmt_wqueue; /* waitqueue for mbox */
-
- /* state machine */
- islpci_state_t state;
- int state_off; /* enumeration of off-state, if 0 then
- * we're not in any off-state */
-
- /* WPA stuff */
- int wpa; /* WPA mode enabled */
- struct list_head bss_wpa_list;
- int num_bss_wpa;
- struct mutex wpa_lock;
- u8 wpa_ie[MAX_WPA_IE_LEN];
- size_t wpa_ie_len;
-
- struct work_struct reset_task;
- int reset_task_pending;
-} islpci_private;
-
-static inline islpci_state_t
-islpci_get_state(islpci_private *priv)
-{
- /* lock */
- return priv->state;
- /* unlock */
-}
-
-islpci_state_t islpci_set_state(islpci_private *priv, islpci_state_t new_state);
-
-#define ISLPCI_TX_TIMEOUT (2*HZ)
-
-irqreturn_t islpci_interrupt(int, void *);
-
-int prism54_post_setup(islpci_private *, int);
-int islpci_reset(islpci_private *, int);
-
-static inline void
-islpci_trigger(islpci_private *priv)
-{
- isl38xx_trigger_device(islpci_get_state(priv) == PRV_STATE_SLEEP,
- priv->device_base);
-}
-
-int islpci_free_memory(islpci_private *);
-struct net_device *islpci_setup(struct pci_dev *);
-
-#define DRV_NAME "prism54"
-#define DRV_VERSION "1.2"
-
-#endif /* _ISLPCI_DEV_H */
diff --git a/drivers/net/wireless/intersil/prism54/islpci_eth.c b/drivers/net/wireless/intersil/prism54/islpci_eth.c
deleted file mode 100644
index 74dd65716afd..000000000000
--- a/drivers/net/wireless/intersil/prism54/islpci_eth.c
+++ /dev/null
@@ -1,489 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2002 Intersil Americas Inc.
- * Copyright (C) 2004 Aurelien Alleaume <slts@free.fr>
- */
-
-#include <linux/module.h>
-#include <linux/gfp.h>
-
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/if_arp.h>
-#include <asm/byteorder.h>
-
-#include "prismcompat.h"
-#include "isl_38xx.h"
-#include "islpci_eth.h"
-#include "islpci_mgt.h"
-#include "oid_mgt.h"
-
-/******************************************************************************
- Network Interface functions
-******************************************************************************/
-void
-islpci_eth_cleanup_transmit(islpci_private *priv,
- isl38xx_control_block *control_block)
-{
- struct sk_buff *skb;
- u32 index;
-
- /* compare the control block read pointer with the free pointer */
- while (priv->free_data_tx !=
- le32_to_cpu(control_block->
- device_curr_frag[ISL38XX_CB_TX_DATA_LQ])) {
- /* read the index of the first fragment to be freed */
- index = priv->free_data_tx % ISL38XX_CB_TX_QSIZE;
-
- /* check for holes in the arrays caused by multi fragment frames
- * searching for the last fragment of a frame */
- if (priv->pci_map_tx_address[index]) {
- /* entry is the last fragment of a frame
- * free the skb structure and unmap pci memory */
- skb = priv->data_low_tx[index];
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING,
- "cleanup skb %p skb->data %p skb->len %u truesize %u\n",
- skb, skb->data, skb->len, skb->truesize);
-#endif
-
- dma_unmap_single(&priv->pdev->dev,
- priv->pci_map_tx_address[index],
- skb->len, DMA_TO_DEVICE);
- dev_kfree_skb_irq(skb);
- skb = NULL;
- }
- /* increment the free data low queue pointer */
- priv->free_data_tx++;
- }
-}
-
-netdev_tx_t
-islpci_eth_transmit(struct sk_buff *skb, struct net_device *ndev)
-{
- islpci_private *priv = netdev_priv(ndev);
- isl38xx_control_block *cb = priv->control_block;
- u32 index;
- dma_addr_t pci_map_address;
- int frame_size;
- isl38xx_fragment *fragment;
- int offset;
- struct sk_buff *newskb;
- int newskb_offset;
- unsigned long flags;
- unsigned char wds_mac[6];
- u32 curr_frag;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_FUNCTION_CALLS, "islpci_eth_transmit\n");
-#endif
-
- /* lock the driver code */
- spin_lock_irqsave(&priv->slock, flags);
-
- /* check whether the destination queue has enough fragments for the frame */
- curr_frag = le32_to_cpu(cb->driver_curr_frag[ISL38XX_CB_TX_DATA_LQ]);
- if (unlikely(curr_frag - priv->free_data_tx >= ISL38XX_CB_TX_QSIZE)) {
- printk(KERN_ERR "%s: transmit device queue full when awake\n",
- ndev->name);
- netif_stop_queue(ndev);
-
- /* trigger the device */
- isl38xx_w32_flush(priv->device_base, ISL38XX_DEV_INT_UPDATE,
- ISL38XX_DEV_INT_REG);
- udelay(ISL38XX_WRITEIO_DELAY);
- goto drop_free;
- }
- /* Check alignment and WDS frame formatting. The start of the packet should
- * be aligned on a 4-byte boundary. If WDS is enabled add another 6 bytes
- * and add WDS address information */
- if (likely(((long) skb->data & 0x03) | init_wds)) {
- /* get the number of bytes to add and re-align */
- offset = (4 - (long) skb->data) & 0x03;
- offset += init_wds ? 6 : 0;
-
- /* check whether the current skb can be used */
- if (!skb_cloned(skb) && (skb_tailroom(skb) >= offset)) {
- unsigned char *src = skb->data;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING, "skb offset %i wds %i\n", offset,
- init_wds);
-#endif
-
- /* align the buffer on 4-byte boundary */
- skb_reserve(skb, (4 - (long) skb->data) & 0x03);
- if (init_wds) {
- /* wds requires an additional address field of 6 bytes */
- skb_put(skb, 6);
-#ifdef ISLPCI_ETH_DEBUG
- printk("islpci_eth_transmit:wds_mac\n");
-#endif
- memmove(skb->data + 6, src, skb->len);
- skb_copy_to_linear_data(skb, wds_mac, 6);
- } else {
- memmove(skb->data, src, skb->len);
- }
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING, "memmove %p %p %i\n", skb->data,
- src, skb->len);
-#endif
- } else {
- newskb =
- dev_alloc_skb(init_wds ? skb->len + 6 : skb->len);
- if (unlikely(newskb == NULL)) {
- printk(KERN_ERR "%s: Cannot allocate skb\n",
- ndev->name);
- goto drop_free;
- }
- newskb_offset = (4 - (long) newskb->data) & 0x03;
-
- /* Check if newskb->data is aligned */
- if (newskb_offset)
- skb_reserve(newskb, newskb_offset);
-
- skb_put(newskb, init_wds ? skb->len + 6 : skb->len);
- if (init_wds) {
- skb_copy_from_linear_data(skb,
- newskb->data + 6,
- skb->len);
- skb_copy_to_linear_data(newskb, wds_mac, 6);
-#ifdef ISLPCI_ETH_DEBUG
- printk("islpci_eth_transmit:wds_mac\n");
-#endif
- } else
- skb_copy_from_linear_data(skb, newskb->data,
- skb->len);
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING, "memcpy %p %p %i wds %i\n",
- newskb->data, skb->data, skb->len, init_wds);
-#endif
-
- newskb->dev = skb->dev;
- dev_kfree_skb_irq(skb);
- skb = newskb;
- }
- }
- /* display the buffer contents for debugging */
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_BUFFER_CONTENTS, "\ntx %p ", skb->data);
- display_buffer((char *) skb->data, skb->len);
-#endif
-
- /* map the skb buffer to pci memory for DMA operation */
- pci_map_address = dma_map_single(&priv->pdev->dev, (void *)skb->data,
- skb->len, DMA_TO_DEVICE);
- if (dma_mapping_error(&priv->pdev->dev, pci_map_address)) {
- printk(KERN_WARNING "%s: cannot map buffer to PCI\n",
- ndev->name);
- goto drop_free;
- }
- /* Place the fragment in the control block structure. */
- index = curr_frag % ISL38XX_CB_TX_QSIZE;
- fragment = &cb->tx_data_low[index];
-
- priv->pci_map_tx_address[index] = pci_map_address;
- /* store the skb address for future freeing */
- priv->data_low_tx[index] = skb;
- /* set the proper fragment start address and size information */
- frame_size = skb->len;
- fragment->size = cpu_to_le16(frame_size);
- fragment->flags = cpu_to_le16(0); /* set to 1 if more fragments */
- fragment->address = cpu_to_le32(pci_map_address);
- curr_frag++;
-
- /* The fragment address in the control block must have been
- * written before announcing the frame buffer to device. */
- wmb();
- cb->driver_curr_frag[ISL38XX_CB_TX_DATA_LQ] = cpu_to_le32(curr_frag);
-
- if (curr_frag - priv->free_data_tx + ISL38XX_MIN_QTHRESHOLD
- > ISL38XX_CB_TX_QSIZE) {
- /* stop sends from upper layers */
- netif_stop_queue(ndev);
-
- /* set the full flag for the transmission queue */
- priv->data_low_tx_full = 1;
- }
-
- ndev->stats.tx_packets++;
- ndev->stats.tx_bytes += skb->len;
-
- /* trigger the device */
- islpci_trigger(priv);
-
- /* unlock the driver code */
- spin_unlock_irqrestore(&priv->slock, flags);
-
- return NETDEV_TX_OK;
-
- drop_free:
- ndev->stats.tx_dropped++;
- spin_unlock_irqrestore(&priv->slock, flags);
- dev_kfree_skb(skb);
- return NETDEV_TX_OK;
-}
-
-static inline int
-islpci_monitor_rx(islpci_private *priv, struct sk_buff **skb)
-{
- /* The card reports full 802.11 packets but with a 20 bytes
- * header and without the FCS. But there a is a bit that
- * indicates if the packet is corrupted :-) */
- struct rfmon_header *hdr = (struct rfmon_header *) (*skb)->data;
-
- if (hdr->flags & 0x01)
- /* This one is bad. Drop it ! */
- return -1;
- if (priv->ndev->type == ARPHRD_IEEE80211_PRISM) {
- struct avs_80211_1_header *avs;
- /* extract the relevant data from the header */
- u32 clock = le32_to_cpu(hdr->clock);
- u8 rate = hdr->rate;
- u16 freq = le16_to_cpu(hdr->freq);
- u8 rssi = hdr->rssi;
-
- skb_pull(*skb, sizeof (struct rfmon_header));
-
- if (skb_headroom(*skb) < sizeof (struct avs_80211_1_header)) {
- struct sk_buff *newskb = skb_copy_expand(*skb,
- sizeof (struct
- avs_80211_1_header),
- 0, GFP_ATOMIC);
- if (newskb) {
- dev_kfree_skb_irq(*skb);
- *skb = newskb;
- } else
- return -1;
- /* This behavior is not very subtile... */
- }
-
- /* make room for the new header and fill it. */
- avs = skb_push(*skb, sizeof(struct avs_80211_1_header));
-
- avs->version = cpu_to_be32(P80211CAPTURE_VERSION);
- avs->length = cpu_to_be32(sizeof (struct avs_80211_1_header));
- avs->mactime = cpu_to_be64(clock);
- avs->hosttime = cpu_to_be64(jiffies);
- avs->phytype = cpu_to_be32(6); /*OFDM: 6 for (g), 8 for (a) */
- avs->channel = cpu_to_be32(channel_of_freq(freq));
- avs->datarate = cpu_to_be32(rate * 5);
- avs->antenna = cpu_to_be32(0); /*unknown */
- avs->priority = cpu_to_be32(0); /*unknown */
- avs->ssi_type = cpu_to_be32(3); /*2: dBm, 3: raw RSSI */
- avs->ssi_signal = cpu_to_be32(rssi & 0x7f);
- avs->ssi_noise = cpu_to_be32(priv->local_iwstatistics.qual.noise); /*better than 'undefined', I assume */
- avs->preamble = cpu_to_be32(0); /*unknown */
- avs->encoding = cpu_to_be32(0); /*unknown */
- } else
- skb_pull(*skb, sizeof (struct rfmon_header));
-
- (*skb)->protocol = htons(ETH_P_802_2);
- skb_reset_mac_header(*skb);
- (*skb)->pkt_type = PACKET_OTHERHOST;
-
- return 0;
-}
-
-int
-islpci_eth_receive(islpci_private *priv)
-{
- struct net_device *ndev = priv->ndev;
- isl38xx_control_block *control_block = priv->control_block;
- struct sk_buff *skb;
- u16 size;
- u32 index, offset;
- unsigned char *src;
- int discard = 0;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_FUNCTION_CALLS, "islpci_eth_receive\n");
-#endif
-
- /* the device has written an Ethernet frame in the data area
- * of the sk_buff without updating the structure, do it now */
- index = priv->free_data_rx % ISL38XX_CB_RX_QSIZE;
- size = le16_to_cpu(control_block->rx_data_low[index].size);
- skb = priv->data_low_rx[index];
- offset = ((unsigned long)
- le32_to_cpu(control_block->rx_data_low[index].address) -
- (unsigned long) skb->data) & 3;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING,
- "frq->addr %x skb->data %p skb->len %u offset %u truesize %u\n",
- control_block->rx_data_low[priv->free_data_rx].address, skb->data,
- skb->len, offset, skb->truesize);
-#endif
-
- /* delete the streaming DMA mapping before processing the skb */
- dma_unmap_single(&priv->pdev->dev, priv->pci_map_rx_address[index],
- MAX_FRAGMENT_SIZE_RX + 2, DMA_FROM_DEVICE);
-
- /* update the skb structure and align the buffer */
- skb_put(skb, size);
- if (offset) {
- /* shift the buffer allocation offset bytes to get the right frame */
- skb_pull(skb, 2);
- skb_put(skb, 2);
- }
-#if VERBOSE > SHOW_ERROR_MESSAGES
- /* display the buffer contents for debugging */
- DEBUG(SHOW_BUFFER_CONTENTS, "\nrx %p ", skb->data);
- display_buffer((char *) skb->data, skb->len);
-#endif
-
- /* check whether WDS is enabled and whether the data frame is a WDS frame */
-
- if (init_wds) {
- /* WDS enabled, check for the wds address on the first 6 bytes of the buffer */
- src = skb->data + 6;
- memmove(skb->data, src, skb->len - 6);
- skb_trim(skb, skb->len - 6);
- }
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING, "Fragment size %i in skb at %p\n", size, skb);
- DEBUG(SHOW_TRACING, "Skb data at %p, length %i\n", skb->data, skb->len);
-
- /* display the buffer contents for debugging */
- DEBUG(SHOW_BUFFER_CONTENTS, "\nrx %p ", skb->data);
- display_buffer((char *) skb->data, skb->len);
-#endif
- /* take care of monitor mode and spy monitoring. */
- if (unlikely(priv->iw_mode == IW_MODE_MONITOR)) {
- skb->dev = ndev;
- discard = islpci_monitor_rx(priv, &skb);
- } else {
- if (unlikely(skb->data[2 * ETH_ALEN] == 0)) {
- /* The packet has a rx_annex. Read it for spy monitoring, Then
- * remove it, while keeping the 2 leading MAC addr.
- */
- struct iw_quality wstats;
- struct rx_annex_header *annex =
- (struct rx_annex_header *) skb->data;
- wstats.level = annex->rfmon.rssi;
- /* The noise value can be a bit outdated if nobody's
- * reading wireless stats... */
- wstats.noise = priv->local_iwstatistics.qual.noise;
- wstats.qual = wstats.level - wstats.noise;
- wstats.updated = 0x07;
- /* Update spy records */
- wireless_spy_update(ndev, annex->addr2, &wstats);
-
- skb_copy_from_linear_data(skb,
- (skb->data +
- sizeof(struct rfmon_header)),
- 2 * ETH_ALEN);
- skb_pull(skb, sizeof (struct rfmon_header));
- }
- skb->protocol = eth_type_trans(skb, ndev);
- }
- skb->ip_summed = CHECKSUM_NONE;
- ndev->stats.rx_packets++;
- ndev->stats.rx_bytes += size;
-
- /* deliver the skb to the network layer */
-#ifdef ISLPCI_ETH_DEBUG
- printk
- ("islpci_eth_receive:netif_rx %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n",
- skb->data[0], skb->data[1], skb->data[2], skb->data[3],
- skb->data[4], skb->data[5]);
-#endif
- if (unlikely(discard)) {
- dev_kfree_skb_irq(skb);
- skb = NULL;
- } else
- netif_rx(skb);
-
- /* increment the read index for the rx data low queue */
- priv->free_data_rx++;
-
- /* add one or more sk_buff structures */
- while (index =
- le32_to_cpu(control_block->
- driver_curr_frag[ISL38XX_CB_RX_DATA_LQ]),
- index - priv->free_data_rx < ISL38XX_CB_RX_QSIZE) {
- /* allocate an sk_buff for received data frames storage
- * include any required allignment operations */
- skb = dev_alloc_skb(MAX_FRAGMENT_SIZE_RX + 2);
- if (unlikely(skb == NULL)) {
- /* error allocating an sk_buff structure elements */
- DEBUG(SHOW_ERROR_MESSAGES, "Error allocating skb\n");
- break;
- }
- skb_reserve(skb, (4 - (long) skb->data) & 0x03);
- /* store the new skb structure pointer */
- index = index % ISL38XX_CB_RX_QSIZE;
- priv->data_low_rx[index] = skb;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING,
- "new alloc skb %p skb->data %p skb->len %u index %u truesize %u\n",
- skb, skb->data, skb->len, index, skb->truesize);
-#endif
-
- /* set the streaming DMA mapping for proper PCI bus operation */
- priv->pci_map_rx_address[index] =
- dma_map_single(&priv->pdev->dev, (void *)skb->data,
- MAX_FRAGMENT_SIZE_RX + 2, DMA_FROM_DEVICE);
- if (dma_mapping_error(&priv->pdev->dev, priv->pci_map_rx_address[index])) {
- /* error mapping the buffer to device accessible memory address */
- DEBUG(SHOW_ERROR_MESSAGES,
- "Error mapping DMA address\n");
-
- /* free the skbuf structure before aborting */
- dev_kfree_skb_irq(skb);
- skb = NULL;
- break;
- }
- /* update the fragment address */
- control_block->rx_data_low[index].address =
- cpu_to_le32((u32)priv->pci_map_rx_address[index]);
- wmb();
-
- /* increment the driver read pointer */
- le32_add_cpu(&control_block->
- driver_curr_frag[ISL38XX_CB_RX_DATA_LQ], 1);
- }
-
- /* trigger the device */
- islpci_trigger(priv);
-
- return 0;
-}
-
-void
-islpci_do_reset_and_wake(struct work_struct *work)
-{
- islpci_private *priv = container_of(work, islpci_private, reset_task);
-
- islpci_reset(priv, 1);
- priv->reset_task_pending = 0;
- smp_wmb();
- netif_wake_queue(priv->ndev);
-}
-
-void
-islpci_eth_tx_timeout(struct net_device *ndev, unsigned int txqueue)
-{
- islpci_private *priv = netdev_priv(ndev);
-
- /* increment the transmit error counter */
- ndev->stats.tx_errors++;
-
- if (!priv->reset_task_pending) {
- printk(KERN_WARNING
- "%s: tx_timeout, scheduling reset", ndev->name);
- netif_stop_queue(ndev);
- priv->reset_task_pending = 1;
- schedule_work(&priv->reset_task);
- } else {
- printk(KERN_WARNING
- "%s: tx_timeout, waiting for reset", ndev->name);
- }
-}
diff --git a/drivers/net/wireless/intersil/prism54/islpci_eth.h b/drivers/net/wireless/intersil/prism54/islpci_eth.h
deleted file mode 100644
index e433ccdc526b..000000000000
--- a/drivers/net/wireless/intersil/prism54/islpci_eth.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2002 Intersil Americas Inc.
- */
-
-#ifndef _ISLPCI_ETH_H
-#define _ISLPCI_ETH_H
-
-#include "isl_38xx.h"
-#include "islpci_dev.h"
-
-struct rfmon_header {
- __le16 unk0; /* = 0x0000 */
- __le16 length; /* = 0x1400 */
- __le32 clock; /* 1MHz clock */
- u8 flags;
- u8 unk1;
- u8 rate;
- u8 unk2;
- __le16 freq;
- __le16 unk3;
- u8 rssi;
- u8 padding[3];
-} __packed;
-
-struct rx_annex_header {
- u8 addr1[ETH_ALEN];
- u8 addr2[ETH_ALEN];
- struct rfmon_header rfmon;
-} __packed;
-
-/* wlan-ng (and hopefully others) AVS header, version one. Fields in
- * network byte order. */
-#define P80211CAPTURE_VERSION 0x80211001
-
-struct avs_80211_1_header {
- __be32 version;
- __be32 length;
- __be64 mactime;
- __be64 hosttime;
- __be32 phytype;
- __be32 channel;
- __be32 datarate;
- __be32 antenna;
- __be32 priority;
- __be32 ssi_type;
- __be32 ssi_signal;
- __be32 ssi_noise;
- __be32 preamble;
- __be32 encoding;
-};
-
-void islpci_eth_cleanup_transmit(islpci_private *, isl38xx_control_block *);
-netdev_tx_t islpci_eth_transmit(struct sk_buff *, struct net_device *);
-int islpci_eth_receive(islpci_private *);
-void islpci_eth_tx_timeout(struct net_device *, unsigned int txqueue);
-void islpci_do_reset_and_wake(struct work_struct *);
-
-#endif /* _ISL_GEN_H */
diff --git a/drivers/net/wireless/intersil/prism54/islpci_hotplug.c b/drivers/net/wireless/intersil/prism54/islpci_hotplug.c
deleted file mode 100644
index 31a1e61326ff..000000000000
--- a/drivers/net/wireless/intersil/prism54/islpci_hotplug.c
+++ /dev/null
@@ -1,316 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2002 Intersil Americas Inc.
- * Copyright (C) 2003 Herbert Valerio Riedel <hvr@gnu.org>
- */
-
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/init.h> /* For __init, __exit */
-#include <linux/dma-mapping.h>
-
-#include "prismcompat.h"
-#include "islpci_dev.h"
-#include "islpci_mgt.h" /* for pc_debug */
-#include "isl_oid.h"
-
-MODULE_AUTHOR("[Intersil] R.Bastings and W.Termorshuizen, The prism54.org Development Team <prism54-devel@prism54.org>");
-MODULE_DESCRIPTION("The Prism54 802.11 Wireless LAN adapter");
-MODULE_LICENSE("GPL");
-
-static int init_pcitm = 0;
-module_param(init_pcitm, int, 0);
-
-/* In this order: vendor, device, subvendor, subdevice, class, class_mask,
- * driver_data
- * If you have an update for this please contact prism54-devel@prism54.org
- * The latest list can be found at http://wireless.wiki.kernel.org/en/users/Drivers/p54
- */
-static const struct pci_device_id prism54_id_tbl[] = {
- /* Intersil PRISM Duette/Prism GT Wireless LAN adapter */
- {
- 0x1260, 0x3890,
- PCI_ANY_ID, PCI_ANY_ID,
- 0, 0, 0
- },
-
- /* 3COM 3CRWE154G72 Wireless LAN adapter */
- {
- PCI_VDEVICE(3COM, 0x6001), 0
- },
-
- /* Intersil PRISM Indigo Wireless LAN adapter */
- {
- 0x1260, 0x3877,
- PCI_ANY_ID, PCI_ANY_ID,
- 0, 0, 0
- },
-
- /* Intersil PRISM Javelin/Xbow Wireless LAN adapter */
- {
- 0x1260, 0x3886,
- PCI_ANY_ID, PCI_ANY_ID,
- 0, 0, 0
- },
-
- /* End of list */
- {0,0,0,0,0,0,0}
-};
-
-/* register the device with the Hotplug facilities of the kernel */
-MODULE_DEVICE_TABLE(pci, prism54_id_tbl);
-
-static int prism54_probe(struct pci_dev *, const struct pci_device_id *);
-static void prism54_remove(struct pci_dev *);
-static int __maybe_unused prism54_suspend(struct device *);
-static int __maybe_unused prism54_resume(struct device *);
-
-static SIMPLE_DEV_PM_OPS(prism54_pm_ops, prism54_suspend, prism54_resume);
-
-static struct pci_driver prism54_driver = {
- .name = DRV_NAME,
- .id_table = prism54_id_tbl,
- .probe = prism54_probe,
- .remove = prism54_remove,
- .driver.pm = &prism54_pm_ops,
-};
-
-/******************************************************************************
- Module initialization functions
-******************************************************************************/
-
-static int
-prism54_probe(struct pci_dev *pdev, const struct pci_device_id *id)
-{
- struct net_device *ndev;
- u8 latency_tmr;
- u32 mem_addr;
- islpci_private *priv;
- int rvalue;
-
- /* Enable the pci device */
- if (pci_enable_device(pdev)) {
- printk(KERN_ERR "%s: pci_enable_device() failed.\n", DRV_NAME);
- return -ENODEV;
- }
-
- /* check whether the latency timer is set correctly */
- pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &latency_tmr);
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING, "latency timer: %x\n", latency_tmr);
-#endif
- if (latency_tmr < PCIDEVICE_LATENCY_TIMER_MIN) {
- /* set the latency timer */
- pci_write_config_byte(pdev, PCI_LATENCY_TIMER,
- PCIDEVICE_LATENCY_TIMER_VAL);
- }
-
- /* enable PCI DMA */
- if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) {
- printk(KERN_ERR "%s: 32-bit PCI DMA not supported", DRV_NAME);
- goto do_pci_disable_device;
- }
-
- /* 0x40 is the programmable timer to configure the response timeout (TRDY_TIMEOUT)
- * 0x41 is the programmable timer to configure the retry timeout (RETRY_TIMEOUT)
- * The RETRY_TIMEOUT is used to set the number of retries that the core, as a
- * Master, will perform before abandoning a cycle. The default value for
- * RETRY_TIMEOUT is 0x80, which far exceeds the PCI 2.1 requirement for new
- * devices. A write of zero to the RETRY_TIMEOUT register disables this
- * function to allow use with any non-compliant legacy devices that may
- * execute more retries.
- *
- * Writing zero to both these two registers will disable both timeouts and
- * *can* solve problems caused by devices that are slow to respond.
- * Make this configurable - MSW
- */
- if ( init_pcitm >= 0 ) {
- pci_write_config_byte(pdev, 0x40, (u8)init_pcitm);
- pci_write_config_byte(pdev, 0x41, (u8)init_pcitm);
- } else {
- printk(KERN_INFO "PCI TRDY/RETRY unchanged\n");
- }
-
- /* request the pci device I/O regions */
- rvalue = pci_request_regions(pdev, DRV_NAME);
- if (rvalue) {
- printk(KERN_ERR "%s: pci_request_regions failure (rc=%d)\n",
- DRV_NAME, rvalue);
- goto do_pci_disable_device;
- }
-
- /* check if the memory window is indeed set */
- rvalue = pci_read_config_dword(pdev, PCI_BASE_ADDRESS_0, &mem_addr);
- if (rvalue || !mem_addr) {
- printk(KERN_ERR "%s: PCI device memory region not configured; fix your BIOS or CardBus bridge/drivers\n",
- DRV_NAME);
- goto do_pci_release_regions;
- }
-
- /* enable PCI bus-mastering */
- DEBUG(SHOW_TRACING, "%s: pci_set_master(pdev)\n", DRV_NAME);
- pci_set_master(pdev);
-
- /* enable MWI */
- pci_try_set_mwi(pdev);
-
- /* setup the network device interface and its structure */
- if (!(ndev = islpci_setup(pdev))) {
- /* error configuring the driver as a network device */
- printk(KERN_ERR "%s: could not configure network device\n",
- DRV_NAME);
- goto do_pci_clear_mwi;
- }
-
- priv = netdev_priv(ndev);
- islpci_set_state(priv, PRV_STATE_PREBOOT); /* we are attempting to boot */
-
- /* card is in unknown state yet, might have some interrupts pending */
- isl38xx_disable_interrupts(priv->device_base);
-
- /* request for the interrupt before uploading the firmware */
- rvalue = request_irq(pdev->irq, islpci_interrupt,
- IRQF_SHARED, ndev->name, priv);
-
- if (rvalue) {
- /* error, could not hook the handler to the irq */
- printk(KERN_ERR "%s: could not install IRQ handler\n",
- ndev->name);
- goto do_unregister_netdev;
- }
-
- /* firmware upload is triggered in islpci_open */
-
- return 0;
-
- do_unregister_netdev:
- unregister_netdev(ndev);
- islpci_free_memory(priv);
- free_netdev(ndev);
- priv = NULL;
- do_pci_clear_mwi:
- pci_clear_mwi(pdev);
- do_pci_release_regions:
- pci_release_regions(pdev);
- do_pci_disable_device:
- pci_disable_device(pdev);
- return -EIO;
-}
-
-/* set by cleanup_module */
-static volatile int __in_cleanup_module = 0;
-
-/* this one removes one(!!) instance only */
-static void
-prism54_remove(struct pci_dev *pdev)
-{
- struct net_device *ndev = pci_get_drvdata(pdev);
- islpci_private *priv = ndev ? netdev_priv(ndev) : NULL;
- BUG_ON(!priv);
-
- if (!__in_cleanup_module) {
- printk(KERN_DEBUG "%s: hot unplug detected\n", ndev->name);
- islpci_set_state(priv, PRV_STATE_OFF);
- }
-
- printk(KERN_DEBUG "%s: removing device\n", ndev->name);
-
- unregister_netdev(ndev);
-
- /* free the interrupt request */
-
- if (islpci_get_state(priv) != PRV_STATE_OFF) {
- isl38xx_disable_interrupts(priv->device_base);
- islpci_set_state(priv, PRV_STATE_OFF);
- /* This bellow causes a lockup at rmmod time. It might be
- * because some interrupts still linger after rmmod time,
- * see bug #17 */
- /* pci_set_power_state(pdev, 3);*/ /* try to power-off */
- }
-
- free_irq(pdev->irq, priv);
-
- /* free the PCI memory and unmap the remapped page */
- islpci_free_memory(priv);
-
- free_netdev(ndev);
- priv = NULL;
-
- pci_clear_mwi(pdev);
-
- pci_release_regions(pdev);
-
- pci_disable_device(pdev);
-}
-
-static int __maybe_unused
-prism54_suspend(struct device *dev)
-{
- struct net_device *ndev = dev_get_drvdata(dev);
- islpci_private *priv = ndev ? netdev_priv(ndev) : NULL;
- BUG_ON(!priv);
-
- /* tell the device not to trigger interrupts for now... */
- isl38xx_disable_interrupts(priv->device_base);
-
- /* from now on assume the hardware was already powered down
- and don't touch it anymore */
- islpci_set_state(priv, PRV_STATE_OFF);
-
- netif_stop_queue(ndev);
- netif_device_detach(ndev);
-
- return 0;
-}
-
-static int __maybe_unused
-prism54_resume(struct device *dev)
-{
- struct net_device *ndev = dev_get_drvdata(dev);
- islpci_private *priv = ndev ? netdev_priv(ndev) : NULL;
-
- BUG_ON(!priv);
-
- printk(KERN_NOTICE "%s: got resume request\n", ndev->name);
-
- /* alright let's go into the PREBOOT state */
- islpci_reset(priv, 1);
-
- netif_device_attach(ndev);
- netif_start_queue(ndev);
-
- return 0;
-}
-
-static int __init
-prism54_module_init(void)
-{
- printk(KERN_INFO "Loaded %s driver, version %s\n",
- DRV_NAME, DRV_VERSION);
-
- __bug_on_wrong_struct_sizes ();
-
- return pci_register_driver(&prism54_driver);
-}
-
-/* by the time prism54_module_exit() terminates, as a postcondition
- * all instances will have been destroyed by calls to
- * prism54_remove() */
-static void __exit
-prism54_module_exit(void)
-{
- __in_cleanup_module = 1;
-
- pci_unregister_driver(&prism54_driver);
-
- printk(KERN_INFO "Unloaded %s driver\n", DRV_NAME);
-
- __in_cleanup_module = 0;
-}
-
-/* register entry points */
-module_init(prism54_module_init);
-module_exit(prism54_module_exit);
-/* EOF */
diff --git a/drivers/net/wireless/intersil/prism54/islpci_mgt.c b/drivers/net/wireless/intersil/prism54/islpci_mgt.c
deleted file mode 100644
index 0c7fb76c7d1c..000000000000
--- a/drivers/net/wireless/intersil/prism54/islpci_mgt.c
+++ /dev/null
@@ -1,491 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2002 Intersil Americas Inc.
- * Copyright 2004 Jens Maurer <Jens.Maurer@gmx.net>
- */
-
-#include <linux/netdevice.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-#include <linux/if_arp.h>
-
-#include "prismcompat.h"
-#include "isl_38xx.h"
-#include "islpci_mgt.h"
-#include "isl_oid.h" /* additional types and defs for isl38xx fw */
-#include "isl_ioctl.h"
-
-#include <net/iw_handler.h>
-
-/******************************************************************************
- Global variable definition section
-******************************************************************************/
-int pc_debug = VERBOSE;
-module_param(pc_debug, int, 0);
-
-/******************************************************************************
- Driver general functions
-******************************************************************************/
-#if VERBOSE > SHOW_ERROR_MESSAGES
-void
-display_buffer(char *buffer, int length)
-{
- if ((pc_debug & SHOW_BUFFER_CONTENTS) == 0)
- return;
-
- while (length > 0) {
- printk("[%02x]", *buffer & 255);
- length--;
- buffer++;
- }
-
- printk("\n");
-}
-#endif
-
-/*****************************************************************************
- Queue handling for management frames
-******************************************************************************/
-
-/*
- * Helper function to create a PIMFOR management frame header.
- */
-static void
-pimfor_encode_header(int operation, u32 oid, u32 length, pimfor_header_t *h)
-{
- h->version = PIMFOR_VERSION;
- h->operation = operation;
- h->device_id = PIMFOR_DEV_ID_MHLI_MIB;
- h->flags = 0;
- h->oid = cpu_to_be32(oid);
- h->length = cpu_to_be32(length);
-}
-
-/*
- * Helper function to analyze a PIMFOR management frame header.
- */
-static pimfor_header_t *
-pimfor_decode_header(void *data, int len)
-{
- pimfor_header_t *h = data;
-
- while ((void *) h < data + len) {
- if (h->flags & PIMFOR_FLAG_LITTLE_ENDIAN) {
- le32_to_cpus(&h->oid);
- le32_to_cpus(&h->length);
- } else {
- be32_to_cpus(&h->oid);
- be32_to_cpus(&h->length);
- }
- if (h->oid != OID_INL_TUNNEL)
- return h;
- h++;
- }
- return NULL;
-}
-
-/*
- * Fill the receive queue for management frames with fresh buffers.
- */
-int
-islpci_mgmt_rx_fill(struct net_device *ndev)
-{
- islpci_private *priv = netdev_priv(ndev);
- isl38xx_control_block *cb = /* volatile not needed */
- (isl38xx_control_block *) priv->control_block;
- u32 curr = le32_to_cpu(cb->driver_curr_frag[ISL38XX_CB_RX_MGMTQ]);
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_FUNCTION_CALLS, "islpci_mgmt_rx_fill\n");
-#endif
-
- while (curr - priv->index_mgmt_rx < ISL38XX_CB_MGMT_QSIZE) {
- u32 index = curr % ISL38XX_CB_MGMT_QSIZE;
- struct islpci_membuf *buf = &priv->mgmt_rx[index];
- isl38xx_fragment *frag = &cb->rx_data_mgmt[index];
-
- if (buf->mem == NULL) {
- buf->mem = kmalloc(MGMT_FRAME_SIZE, GFP_ATOMIC);
- if (!buf->mem)
- return -ENOMEM;
- buf->size = MGMT_FRAME_SIZE;
- }
- if (buf->pci_addr == 0) {
- buf->pci_addr = dma_map_single(&priv->pdev->dev,
- buf->mem,
- MGMT_FRAME_SIZE,
- DMA_FROM_DEVICE);
- if (dma_mapping_error(&priv->pdev->dev, buf->pci_addr)) {
- printk(KERN_WARNING
- "Failed to make memory DMA'able.\n");
- return -ENOMEM;
- }
- }
-
- /* be safe: always reset control block information */
- frag->size = cpu_to_le16(MGMT_FRAME_SIZE);
- frag->flags = 0;
- frag->address = cpu_to_le32(buf->pci_addr);
- curr++;
-
- /* The fragment address in the control block must have
- * been written before announcing the frame buffer to
- * device */
- wmb();
- cb->driver_curr_frag[ISL38XX_CB_RX_MGMTQ] = cpu_to_le32(curr);
- }
- return 0;
-}
-
-/*
- * Create and transmit a management frame using "operation" and "oid",
- * with arguments data/length.
- * We either return an error and free the frame, or we return 0 and
- * islpci_mgt_cleanup_transmit() frees the frame in the tx-done
- * interrupt.
- */
-static int
-islpci_mgt_transmit(struct net_device *ndev, int operation, unsigned long oid,
- void *data, int length)
-{
- islpci_private *priv = netdev_priv(ndev);
- isl38xx_control_block *cb =
- (isl38xx_control_block *) priv->control_block;
- void *p;
- int err = -EINVAL;
- unsigned long flags;
- isl38xx_fragment *frag;
- struct islpci_membuf buf;
- u32 curr_frag;
- int index;
- int frag_len = length + PIMFOR_HEADER_SIZE;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_FUNCTION_CALLS, "islpci_mgt_transmit\n");
-#endif
-
- if (frag_len > MGMT_FRAME_SIZE) {
- printk(KERN_DEBUG "%s: mgmt frame too large %d\n",
- ndev->name, frag_len);
- goto error;
- }
-
- err = -ENOMEM;
- p = buf.mem = kmalloc(frag_len, GFP_KERNEL);
- if (!buf.mem)
- goto error;
-
- buf.size = frag_len;
-
- /* create the header directly in the fragment data area */
- pimfor_encode_header(operation, oid, length, (pimfor_header_t *) p);
- p += PIMFOR_HEADER_SIZE;
-
- if (data)
- memcpy(p, data, length);
- else
- memset(p, 0, length);
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- {
- pimfor_header_t *h = buf.mem;
- DEBUG(SHOW_PIMFOR_FRAMES,
- "PIMFOR: op %i, oid 0x%08lx, device %i, flags 0x%x length 0x%x\n",
- h->operation, oid, h->device_id, h->flags, length);
-
- /* display the buffer contents for debugging */
- display_buffer((char *) h, sizeof (pimfor_header_t));
- display_buffer(p, length);
- }
-#endif
-
- err = -ENOMEM;
- buf.pci_addr = dma_map_single(&priv->pdev->dev, buf.mem, frag_len,
- DMA_TO_DEVICE);
- if (dma_mapping_error(&priv->pdev->dev, buf.pci_addr)) {
- printk(KERN_WARNING "%s: cannot map PCI memory for mgmt\n",
- ndev->name);
- goto error_free;
- }
-
- /* Protect the control block modifications against interrupts. */
- spin_lock_irqsave(&priv->slock, flags);
- curr_frag = le32_to_cpu(cb->driver_curr_frag[ISL38XX_CB_TX_MGMTQ]);
- if (curr_frag - priv->index_mgmt_tx >= ISL38XX_CB_MGMT_QSIZE) {
- printk(KERN_WARNING "%s: mgmt tx queue is still full\n",
- ndev->name);
- goto error_unlock;
- }
-
- /* commit the frame to the tx device queue */
- index = curr_frag % ISL38XX_CB_MGMT_QSIZE;
- priv->mgmt_tx[index] = buf;
- frag = &cb->tx_data_mgmt[index];
- frag->size = cpu_to_le16(frag_len);
- frag->flags = 0; /* for any other than the last fragment, set to 1 */
- frag->address = cpu_to_le32(buf.pci_addr);
-
- /* The fragment address in the control block must have
- * been written before announcing the frame buffer to
- * device */
- wmb();
- cb->driver_curr_frag[ISL38XX_CB_TX_MGMTQ] = cpu_to_le32(curr_frag + 1);
- spin_unlock_irqrestore(&priv->slock, flags);
-
- /* trigger the device */
- islpci_trigger(priv);
- return 0;
-
- error_unlock:
- spin_unlock_irqrestore(&priv->slock, flags);
- error_free:
- kfree(buf.mem);
- error:
- return err;
-}
-
-/*
- * Receive a management frame from the device.
- * This can be an arbitrary number of traps, and at most one response
- * frame for a previous request sent via islpci_mgt_transmit().
- */
-int
-islpci_mgt_receive(struct net_device *ndev)
-{
- islpci_private *priv = netdev_priv(ndev);
- isl38xx_control_block *cb =
- (isl38xx_control_block *) priv->control_block;
- u32 curr_frag;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_FUNCTION_CALLS, "islpci_mgt_receive\n");
-#endif
-
- /* Only once per interrupt, determine fragment range to
- * process. This avoids an endless loop (i.e. lockup) if
- * frames come in faster than we can process them. */
- curr_frag = le32_to_cpu(cb->device_curr_frag[ISL38XX_CB_RX_MGMTQ]);
- barrier();
-
- for (; priv->index_mgmt_rx < curr_frag; priv->index_mgmt_rx++) {
- pimfor_header_t *header;
- u32 index = priv->index_mgmt_rx % ISL38XX_CB_MGMT_QSIZE;
- struct islpci_membuf *buf = &priv->mgmt_rx[index];
- u16 frag_len;
- int size;
- struct islpci_mgmtframe *frame;
-
- /* I have no idea (and no documentation) if flags != 0
- * is possible. Drop the frame, reuse the buffer. */
- if (le16_to_cpu(cb->rx_data_mgmt[index].flags) != 0) {
- printk(KERN_WARNING "%s: unknown flags 0x%04x\n",
- ndev->name,
- le16_to_cpu(cb->rx_data_mgmt[index].flags));
- continue;
- }
-
- /* The device only returns the size of the header(s) here. */
- frag_len = le16_to_cpu(cb->rx_data_mgmt[index].size);
-
- /*
- * We appear to have no way to tell the device the
- * size of a receive buffer. Thus, if this check
- * triggers, we likely have kernel heap corruption. */
- if (frag_len > MGMT_FRAME_SIZE) {
- printk(KERN_WARNING
- "%s: Bogus packet size of %d (%#x).\n",
- ndev->name, frag_len, frag_len);
- frag_len = MGMT_FRAME_SIZE;
- }
-
- /* Ensure the results of device DMA are visible to the CPU. */
- dma_sync_single_for_cpu(&priv->pdev->dev, buf->pci_addr,
- buf->size, DMA_FROM_DEVICE);
-
- /* Perform endianess conversion for PIMFOR header in-place. */
- header = pimfor_decode_header(buf->mem, frag_len);
- if (!header) {
- printk(KERN_WARNING "%s: no PIMFOR header found\n",
- ndev->name);
- continue;
- }
-
- /* The device ID from the PIMFOR packet received from
- * the MVC is always 0. We forward a sensible device_id.
- * Not that anyone upstream would care... */
- header->device_id = priv->ndev->ifindex;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_PIMFOR_FRAMES,
- "PIMFOR: op %i, oid 0x%08x, device %i, flags 0x%x length 0x%x\n",
- header->operation, header->oid, header->device_id,
- header->flags, header->length);
-
- /* display the buffer contents for debugging */
- display_buffer((char *) header, PIMFOR_HEADER_SIZE);
- display_buffer((char *) header + PIMFOR_HEADER_SIZE,
- header->length);
-#endif
-
- /* nobody sends these */
- if (header->flags & PIMFOR_FLAG_APPLIC_ORIGIN) {
- printk(KERN_DEBUG
- "%s: errant PIMFOR application frame\n",
- ndev->name);
- continue;
- }
-
- /* Determine frame size, skipping OID_INL_TUNNEL headers. */
- size = PIMFOR_HEADER_SIZE + header->length;
- frame = kmalloc(sizeof(struct islpci_mgmtframe) + size,
- GFP_ATOMIC);
- if (!frame)
- continue;
-
- frame->ndev = ndev;
- memcpy(&frame->buf, header, size);
- frame->header = (pimfor_header_t *) frame->buf;
- frame->data = frame->buf + PIMFOR_HEADER_SIZE;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_PIMFOR_FRAMES,
- "frame: header: %p, data: %p, size: %d\n",
- frame->header, frame->data, size);
-#endif
-
- if (header->operation == PIMFOR_OP_TRAP) {
-#if VERBOSE > SHOW_ERROR_MESSAGES
- printk(KERN_DEBUG
- "TRAP: oid 0x%x, device %i, flags 0x%x length %i\n",
- header->oid, header->device_id, header->flags,
- header->length);
-#endif
-
- /* Create work to handle trap out of interrupt
- * context. */
- INIT_WORK(&frame->ws, prism54_process_trap);
- schedule_work(&frame->ws);
-
- } else {
- /* Signal the one waiting process that a response
- * has been received. */
- if ((frame = xchg(&priv->mgmt_received, frame)) != NULL) {
- printk(KERN_WARNING
- "%s: mgmt response not collected\n",
- ndev->name);
- kfree(frame);
- }
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_TRACING, "Wake up Mgmt Queue\n");
-#endif
- wake_up(&priv->mgmt_wqueue);
- }
-
- }
-
- return 0;
-}
-
-/*
- * Cleanup the transmit queue by freeing all frames handled by the device.
- */
-void
-islpci_mgt_cleanup_transmit(struct net_device *ndev)
-{
- islpci_private *priv = netdev_priv(ndev);
- isl38xx_control_block *cb = /* volatile not needed */
- (isl38xx_control_block *) priv->control_block;
- u32 curr_frag;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
- DEBUG(SHOW_FUNCTION_CALLS, "islpci_mgt_cleanup_transmit\n");
-#endif
-
- /* Only once per cleanup, determine fragment range to
- * process. This avoids an endless loop (i.e. lockup) if
- * the device became confused, incrementing device_curr_frag
- * rapidly. */
- curr_frag = le32_to_cpu(cb->device_curr_frag[ISL38XX_CB_TX_MGMTQ]);
- barrier();
-
- for (; priv->index_mgmt_tx < curr_frag; priv->index_mgmt_tx++) {
- int index = priv->index_mgmt_tx % ISL38XX_CB_MGMT_QSIZE;
- struct islpci_membuf *buf = &priv->mgmt_tx[index];
- dma_unmap_single(&priv->pdev->dev, buf->pci_addr, buf->size,
- DMA_TO_DEVICE);
- buf->pci_addr = 0;
- kfree(buf->mem);
- buf->mem = NULL;
- buf->size = 0;
- }
-}
-
-/*
- * Perform one request-response transaction to the device.
- */
-int
-islpci_mgt_transaction(struct net_device *ndev,
- int operation, unsigned long oid,
- void *senddata, int sendlen,
- struct islpci_mgmtframe **recvframe)
-{
- islpci_private *priv = netdev_priv(ndev);
- const long wait_cycle_jiffies = msecs_to_jiffies(ISL38XX_WAIT_CYCLE * 10);
- long timeout_left = ISL38XX_MAX_WAIT_CYCLES * wait_cycle_jiffies;
- int err;
- DEFINE_WAIT(wait);
-
- *recvframe = NULL;
-
- if (mutex_lock_interruptible(&priv->mgmt_lock))
- return -ERESTARTSYS;
-
- prepare_to_wait(&priv->mgmt_wqueue, &wait, TASK_UNINTERRUPTIBLE);
- err = islpci_mgt_transmit(ndev, operation, oid, senddata, sendlen);
- if (err)
- goto out;
-
- err = -ETIMEDOUT;
- while (timeout_left > 0) {
- int timeleft;
- struct islpci_mgmtframe *frame;
-
- timeleft = schedule_timeout_uninterruptible(wait_cycle_jiffies);
- frame = xchg(&priv->mgmt_received, NULL);
- if (frame) {
- if (frame->header->oid == oid) {
- *recvframe = frame;
- err = 0;
- goto out;
- } else {
- printk(KERN_DEBUG
- "%s: expecting oid 0x%x, received 0x%x.\n",
- ndev->name, (unsigned int) oid,
- frame->header->oid);
- kfree(frame);
- frame = NULL;
- }
- }
- if (timeleft == 0) {
- printk(KERN_DEBUG
- "%s: timeout waiting for mgmt response %lu, "
- "triggering device\n",
- ndev->name, timeout_left);
- islpci_trigger(priv);
- }
- timeout_left += timeleft - wait_cycle_jiffies;
- }
- printk(KERN_WARNING "%s: timeout waiting for mgmt response\n",
- ndev->name);
-
- /* TODO: we should reset the device here */
- out:
- finish_wait(&priv->mgmt_wqueue, &wait);
- mutex_unlock(&priv->mgmt_lock);
- return err;
-}
-
diff --git a/drivers/net/wireless/intersil/prism54/islpci_mgt.h b/drivers/net/wireless/intersil/prism54/islpci_mgt.h
deleted file mode 100644
index 1f87d0aea60c..000000000000
--- a/drivers/net/wireless/intersil/prism54/islpci_mgt.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2002 Intersil Americas Inc.
- * Copyright (C) 2003 Luis R. Rodriguez <mcgrof@ruslug.rutgers.edu>
- */
-
-#ifndef _ISLPCI_MGT_H
-#define _ISLPCI_MGT_H
-
-#include <linux/wireless.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-
-/*
- * Function definitions
- */
-
-#define K_DEBUG(f, m, args...) do { if(f & m) printk(KERN_DEBUG args); } while(0)
-#define DEBUG(f, args...) K_DEBUG(f, pc_debug, args)
-
-extern int pc_debug;
-#define init_wds 0 /* help compiler optimize away dead code */
-
-
-/* General driver definitions */
-#define PCIDEVICE_LATENCY_TIMER_MIN 0x40
-#define PCIDEVICE_LATENCY_TIMER_VAL 0x50
-
-/* Debugging verbose definitions */
-#define SHOW_NOTHING 0x00 /* overrules everything */
-#define SHOW_ANYTHING 0xFF
-#define SHOW_ERROR_MESSAGES 0x01
-#define SHOW_TRAPS 0x02
-#define SHOW_FUNCTION_CALLS 0x04
-#define SHOW_TRACING 0x08
-#define SHOW_QUEUE_INDEXES 0x10
-#define SHOW_PIMFOR_FRAMES 0x20
-#define SHOW_BUFFER_CONTENTS 0x40
-#define VERBOSE 0x01
-
-/* Default card definitions */
-#define CARD_DEFAULT_CHANNEL 6
-#define CARD_DEFAULT_MODE INL_MODE_CLIENT
-#define CARD_DEFAULT_IW_MODE IW_MODE_INFRA
-#define CARD_DEFAULT_BSSTYPE DOT11_BSSTYPE_INFRA
-#define CARD_DEFAULT_CLIENT_SSID ""
-#define CARD_DEFAULT_AP_SSID "default"
-#define CARD_DEFAULT_KEY1 "default_key_1"
-#define CARD_DEFAULT_KEY2 "default_key_2"
-#define CARD_DEFAULT_KEY3 "default_key_3"
-#define CARD_DEFAULT_KEY4 "default_key_4"
-#define CARD_DEFAULT_WEP 0
-#define CARD_DEFAULT_FILTER 0
-#define CARD_DEFAULT_WDS 0
-#define CARD_DEFAULT_AUTHEN DOT11_AUTH_OS
-#define CARD_DEFAULT_DOT1X 0
-#define CARD_DEFAULT_MLME_MODE DOT11_MLME_AUTO
-#define CARD_DEFAULT_CONFORMANCE OID_INL_CONFORMANCE_NONE
-#define CARD_DEFAULT_PROFILE DOT11_PROFILE_MIXED_G_WIFI
-#define CARD_DEFAULT_MAXFRAMEBURST DOT11_MAXFRAMEBURST_MIXED_SAFE
-
-/* PIMFOR package definitions */
-#define PIMFOR_ETHERTYPE 0x8828
-#define PIMFOR_HEADER_SIZE 12
-#define PIMFOR_VERSION 1
-#define PIMFOR_OP_GET 0
-#define PIMFOR_OP_SET 1
-#define PIMFOR_OP_RESPONSE 2
-#define PIMFOR_OP_ERROR 3
-#define PIMFOR_OP_TRAP 4
-#define PIMFOR_OP_RESERVED 5 /* till 255 */
-#define PIMFOR_DEV_ID_MHLI_MIB 0
-#define PIMFOR_FLAG_APPLIC_ORIGIN 0x01
-#define PIMFOR_FLAG_LITTLE_ENDIAN 0x02
-
-void display_buffer(char *, int);
-
-/*
- * Type definition section
- *
- * the structure defines only the header allowing copyless
- * frame handling
- */
-typedef struct {
- u8 version;
- u8 operation;
- u32 oid;
- u8 device_id;
- u8 flags;
- u32 length;
-} __packed
-pimfor_header_t;
-
-/* A received and interrupt-processed management frame, either for
- * schedule_work(prism54_process_trap) or for priv->mgmt_received,
- * processed by islpci_mgt_transaction(). */
-struct islpci_mgmtframe {
- struct net_device *ndev; /* pointer to network device */
- pimfor_header_t *header; /* payload header, points into buf */
- void *data; /* payload ex header, points into buf */
- struct work_struct ws; /* argument for schedule_work() */
- char buf[]; /* fragment buffer */
-};
-
-int
-islpci_mgt_receive(struct net_device *ndev);
-
-int
-islpci_mgmt_rx_fill(struct net_device *ndev);
-
-void
-islpci_mgt_cleanup_transmit(struct net_device *ndev);
-
-int
-islpci_mgt_transaction(struct net_device *ndev,
- int operation, unsigned long oid,
- void *senddata, int sendlen,
- struct islpci_mgmtframe **recvframe);
-
-static inline void
-islpci_mgt_release(struct islpci_mgmtframe *frame)
-{
- kfree(frame);
-}
-
-#endif /* _ISLPCI_MGT_H */
diff --git a/drivers/net/wireless/intersil/prism54/oid_mgt.c b/drivers/net/wireless/intersil/prism54/oid_mgt.c
deleted file mode 100644
index 9fd307ca4b6d..000000000000
--- a/drivers/net/wireless/intersil/prism54/oid_mgt.c
+++ /dev/null
@@ -1,889 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2003,2004 Aurelien Alleaume <slts@free.fr>
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-
-#include "prismcompat.h"
-#include "islpci_dev.h"
-#include "islpci_mgt.h"
-#include "isl_oid.h"
-#include "oid_mgt.h"
-#include "isl_ioctl.h"
-
-/* to convert between channel and freq */
-static const int frequency_list_bg[] = { 2412, 2417, 2422, 2427, 2432,
- 2437, 2442, 2447, 2452, 2457, 2462, 2467, 2472, 2484
-};
-
-int
-channel_of_freq(int f)
-{
- int c = 0;
-
- if ((f >= 2412) && (f <= 2484)) {
- while ((c < 14) && (f != frequency_list_bg[c]))
- c++;
- return (c >= 14) ? 0 : ++c;
- } else if ((f >= (int) 5000) && (f <= (int) 6000)) {
- return ( (f - 5000) / 5 );
- } else
- return 0;
-}
-
-#define OID_STRUCT(name,oid,s,t) [name] = {oid, 0, sizeof(s), t}
-#define OID_STRUCT_C(name,oid,s,t) OID_STRUCT(name,oid,s,t | OID_FLAG_CACHED)
-#define OID_U32(name,oid) OID_STRUCT(name,oid,u32,OID_TYPE_U32)
-#define OID_U32_C(name,oid) OID_STRUCT_C(name,oid,u32,OID_TYPE_U32)
-#define OID_STRUCT_MLME(name,oid) OID_STRUCT(name,oid,struct obj_mlme,OID_TYPE_MLME)
-#define OID_STRUCT_MLMEEX(name,oid) OID_STRUCT(name,oid,struct obj_mlmeex,OID_TYPE_MLMEEX)
-
-#define OID_UNKNOWN(name,oid) OID_STRUCT(name,oid,0,0)
-
-struct oid_t isl_oid[] = {
- OID_STRUCT(GEN_OID_MACADDRESS, 0x00000000, u8[6], OID_TYPE_ADDR),
- OID_U32(GEN_OID_LINKSTATE, 0x00000001),
- OID_UNKNOWN(GEN_OID_WATCHDOG, 0x00000002),
- OID_UNKNOWN(GEN_OID_MIBOP, 0x00000003),
- OID_UNKNOWN(GEN_OID_OPTIONS, 0x00000004),
- OID_UNKNOWN(GEN_OID_LEDCONFIG, 0x00000005),
-
- /* 802.11 */
- OID_U32_C(DOT11_OID_BSSTYPE, 0x10000000),
- OID_STRUCT_C(DOT11_OID_BSSID, 0x10000001, u8[6], OID_TYPE_RAW),
- OID_STRUCT_C(DOT11_OID_SSID, 0x10000002, struct obj_ssid,
- OID_TYPE_SSID),
- OID_U32(DOT11_OID_STATE, 0x10000003),
- OID_U32(DOT11_OID_AID, 0x10000004),
- OID_STRUCT(DOT11_OID_COUNTRYSTRING, 0x10000005, u8[4], OID_TYPE_RAW),
- OID_STRUCT_C(DOT11_OID_SSIDOVERRIDE, 0x10000006, struct obj_ssid,
- OID_TYPE_SSID),
-
- OID_U32(DOT11_OID_MEDIUMLIMIT, 0x11000000),
- OID_U32_C(DOT11_OID_BEACONPERIOD, 0x11000001),
- OID_U32(DOT11_OID_DTIMPERIOD, 0x11000002),
- OID_U32(DOT11_OID_ATIMWINDOW, 0x11000003),
- OID_U32(DOT11_OID_LISTENINTERVAL, 0x11000004),
- OID_U32(DOT11_OID_CFPPERIOD, 0x11000005),
- OID_U32(DOT11_OID_CFPDURATION, 0x11000006),
-
- OID_U32_C(DOT11_OID_AUTHENABLE, 0x12000000),
- OID_U32_C(DOT11_OID_PRIVACYINVOKED, 0x12000001),
- OID_U32_C(DOT11_OID_EXUNENCRYPTED, 0x12000002),
- OID_U32_C(DOT11_OID_DEFKEYID, 0x12000003),
- [DOT11_OID_DEFKEYX] = {0x12000004, 3, sizeof (struct obj_key),
- OID_FLAG_CACHED | OID_TYPE_KEY}, /* DOT11_OID_DEFKEY1,...DOT11_OID_DEFKEY4 */
- OID_UNKNOWN(DOT11_OID_STAKEY, 0x12000008),
- OID_U32(DOT11_OID_REKEYTHRESHOLD, 0x12000009),
- OID_UNKNOWN(DOT11_OID_STASC, 0x1200000a),
-
- OID_U32(DOT11_OID_PRIVTXREJECTED, 0x1a000000),
- OID_U32(DOT11_OID_PRIVRXPLAIN, 0x1a000001),
- OID_U32(DOT11_OID_PRIVRXFAILED, 0x1a000002),
- OID_U32(DOT11_OID_PRIVRXNOKEY, 0x1a000003),
-
- OID_U32_C(DOT11_OID_RTSTHRESH, 0x13000000),
- OID_U32_C(DOT11_OID_FRAGTHRESH, 0x13000001),
- OID_U32_C(DOT11_OID_SHORTRETRIES, 0x13000002),
- OID_U32_C(DOT11_OID_LONGRETRIES, 0x13000003),
- OID_U32_C(DOT11_OID_MAXTXLIFETIME, 0x13000004),
- OID_U32(DOT11_OID_MAXRXLIFETIME, 0x13000005),
- OID_U32(DOT11_OID_AUTHRESPTIMEOUT, 0x13000006),
- OID_U32(DOT11_OID_ASSOCRESPTIMEOUT, 0x13000007),
-
- OID_UNKNOWN(DOT11_OID_ALOFT_TABLE, 0x1d000000),
- OID_UNKNOWN(DOT11_OID_ALOFT_CTRL_TABLE, 0x1d000001),
- OID_UNKNOWN(DOT11_OID_ALOFT_RETREAT, 0x1d000002),
- OID_UNKNOWN(DOT11_OID_ALOFT_PROGRESS, 0x1d000003),
- OID_U32(DOT11_OID_ALOFT_FIXEDRATE, 0x1d000004),
- OID_UNKNOWN(DOT11_OID_ALOFT_RSSIGRAPH, 0x1d000005),
- OID_UNKNOWN(DOT11_OID_ALOFT_CONFIG, 0x1d000006),
-
- [DOT11_OID_VDCFX] = {0x1b000000, 7, 0, 0},
- OID_U32(DOT11_OID_MAXFRAMEBURST, 0x1b000008),
-
- OID_U32(DOT11_OID_PSM, 0x14000000),
- OID_U32(DOT11_OID_CAMTIMEOUT, 0x14000001),
- OID_U32(DOT11_OID_RECEIVEDTIMS, 0x14000002),
- OID_U32(DOT11_OID_ROAMPREFERENCE, 0x14000003),
-
- OID_U32(DOT11_OID_BRIDGELOCAL, 0x15000000),
- OID_U32(DOT11_OID_CLIENTS, 0x15000001),
- OID_U32(DOT11_OID_CLIENTSASSOCIATED, 0x15000002),
- [DOT11_OID_CLIENTX] = {0x15000003, 2006, 0, 0}, /* DOT11_OID_CLIENTX,...DOT11_OID_CLIENT2007 */
-
- OID_STRUCT(DOT11_OID_CLIENTFIND, 0x150007DB, u8[6], OID_TYPE_ADDR),
- OID_STRUCT(DOT11_OID_WDSLINKADD, 0x150007DC, u8[6], OID_TYPE_ADDR),
- OID_STRUCT(DOT11_OID_WDSLINKREMOVE, 0x150007DD, u8[6], OID_TYPE_ADDR),
- OID_STRUCT(DOT11_OID_EAPAUTHSTA, 0x150007DE, u8[6], OID_TYPE_ADDR),
- OID_STRUCT(DOT11_OID_EAPUNAUTHSTA, 0x150007DF, u8[6], OID_TYPE_ADDR),
- OID_U32_C(DOT11_OID_DOT1XENABLE, 0x150007E0),
- OID_UNKNOWN(DOT11_OID_MICFAILURE, 0x150007E1),
- OID_UNKNOWN(DOT11_OID_REKEYINDICATE, 0x150007E2),
-
- OID_U32(DOT11_OID_MPDUTXSUCCESSFUL, 0x16000000),
- OID_U32(DOT11_OID_MPDUTXONERETRY, 0x16000001),
- OID_U32(DOT11_OID_MPDUTXMULTIPLERETRIES, 0x16000002),
- OID_U32(DOT11_OID_MPDUTXFAILED, 0x16000003),
- OID_U32(DOT11_OID_MPDURXSUCCESSFUL, 0x16000004),
- OID_U32(DOT11_OID_MPDURXDUPS, 0x16000005),
- OID_U32(DOT11_OID_RTSSUCCESSFUL, 0x16000006),
- OID_U32(DOT11_OID_RTSFAILED, 0x16000007),
- OID_U32(DOT11_OID_ACKFAILED, 0x16000008),
- OID_U32(DOT11_OID_FRAMERECEIVES, 0x16000009),
- OID_U32(DOT11_OID_FRAMEERRORS, 0x1600000A),
- OID_U32(DOT11_OID_FRAMEABORTS, 0x1600000B),
- OID_U32(DOT11_OID_FRAMEABORTSPHY, 0x1600000C),
-
- OID_U32(DOT11_OID_SLOTTIME, 0x17000000),
- OID_U32(DOT11_OID_CWMIN, 0x17000001),
- OID_U32(DOT11_OID_CWMAX, 0x17000002),
- OID_U32(DOT11_OID_ACKWINDOW, 0x17000003),
- OID_U32(DOT11_OID_ANTENNARX, 0x17000004),
- OID_U32(DOT11_OID_ANTENNATX, 0x17000005),
- OID_U32(DOT11_OID_ANTENNADIVERSITY, 0x17000006),
- OID_U32_C(DOT11_OID_CHANNEL, 0x17000007),
- OID_U32_C(DOT11_OID_EDTHRESHOLD, 0x17000008),
- OID_U32(DOT11_OID_PREAMBLESETTINGS, 0x17000009),
- OID_STRUCT(DOT11_OID_RATES, 0x1700000A, u8[IWMAX_BITRATES + 1],
- OID_TYPE_RAW),
- OID_U32(DOT11_OID_CCAMODESUPPORTED, 0x1700000B),
- OID_U32(DOT11_OID_CCAMODE, 0x1700000C),
- OID_UNKNOWN(DOT11_OID_RSSIVECTOR, 0x1700000D),
- OID_UNKNOWN(DOT11_OID_OUTPUTPOWERTABLE, 0x1700000E),
- OID_U32(DOT11_OID_OUTPUTPOWER, 0x1700000F),
- OID_STRUCT(DOT11_OID_SUPPORTEDRATES, 0x17000010,
- u8[IWMAX_BITRATES + 1], OID_TYPE_RAW),
- OID_U32_C(DOT11_OID_FREQUENCY, 0x17000011),
- [DOT11_OID_SUPPORTEDFREQUENCIES] =
- {0x17000012, 0, sizeof (struct obj_frequencies)
- + sizeof (u16) * IWMAX_FREQ, OID_TYPE_FREQUENCIES},
-
- OID_U32(DOT11_OID_NOISEFLOOR, 0x17000013),
- OID_STRUCT(DOT11_OID_FREQUENCYACTIVITY, 0x17000014, u8[IWMAX_FREQ + 1],
- OID_TYPE_RAW),
- OID_UNKNOWN(DOT11_OID_IQCALIBRATIONTABLE, 0x17000015),
- OID_U32(DOT11_OID_NONERPPROTECTION, 0x17000016),
- OID_U32(DOT11_OID_SLOTSETTINGS, 0x17000017),
- OID_U32(DOT11_OID_NONERPTIMEOUT, 0x17000018),
- OID_U32(DOT11_OID_PROFILES, 0x17000019),
- OID_STRUCT(DOT11_OID_EXTENDEDRATES, 0x17000020,
- u8[IWMAX_BITRATES + 1], OID_TYPE_RAW),
-
- OID_STRUCT_MLME(DOT11_OID_DEAUTHENTICATE, 0x18000000),
- OID_STRUCT_MLME(DOT11_OID_AUTHENTICATE, 0x18000001),
- OID_STRUCT_MLME(DOT11_OID_DISASSOCIATE, 0x18000002),
- OID_STRUCT_MLME(DOT11_OID_ASSOCIATE, 0x18000003),
- OID_UNKNOWN(DOT11_OID_SCAN, 0x18000004),
- OID_STRUCT_MLMEEX(DOT11_OID_BEACON, 0x18000005),
- OID_STRUCT_MLMEEX(DOT11_OID_PROBE, 0x18000006),
- OID_STRUCT_MLMEEX(DOT11_OID_DEAUTHENTICATEEX, 0x18000007),
- OID_STRUCT_MLMEEX(DOT11_OID_AUTHENTICATEEX, 0x18000008),
- OID_STRUCT_MLMEEX(DOT11_OID_DISASSOCIATEEX, 0x18000009),
- OID_STRUCT_MLMEEX(DOT11_OID_ASSOCIATEEX, 0x1800000A),
- OID_STRUCT_MLMEEX(DOT11_OID_REASSOCIATE, 0x1800000B),
- OID_STRUCT_MLMEEX(DOT11_OID_REASSOCIATEEX, 0x1800000C),
-
- OID_U32(DOT11_OID_NONERPSTATUS, 0x1E000000),
-
- OID_U32(DOT11_OID_STATIMEOUT, 0x19000000),
- OID_U32_C(DOT11_OID_MLMEAUTOLEVEL, 0x19000001),
- OID_U32(DOT11_OID_BSSTIMEOUT, 0x19000002),
- [DOT11_OID_ATTACHMENT] = {0x19000003, 0,
- sizeof(struct obj_attachment), OID_TYPE_ATTACH},
- OID_STRUCT_C(DOT11_OID_PSMBUFFER, 0x19000004, struct obj_buffer,
- OID_TYPE_BUFFER),
-
- OID_U32(DOT11_OID_BSSS, 0x1C000000),
- [DOT11_OID_BSSX] = {0x1C000001, 63, sizeof (struct obj_bss),
- OID_TYPE_BSS}, /*DOT11_OID_BSS1,...,DOT11_OID_BSS64 */
- OID_STRUCT(DOT11_OID_BSSFIND, 0x1C000042, struct obj_bss, OID_TYPE_BSS),
- [DOT11_OID_BSSLIST] = {0x1C000043, 0, sizeof (struct
- obj_bsslist) +
- sizeof (struct obj_bss[IWMAX_BSS]),
- OID_TYPE_BSSLIST},
-
- OID_UNKNOWN(OID_INL_TUNNEL, 0xFF020000),
- OID_UNKNOWN(OID_INL_MEMADDR, 0xFF020001),
- OID_UNKNOWN(OID_INL_MEMORY, 0xFF020002),
- OID_U32_C(OID_INL_MODE, 0xFF020003),
- OID_UNKNOWN(OID_INL_COMPONENT_NR, 0xFF020004),
- OID_STRUCT(OID_INL_VERSION, 0xFF020005, u8[8], OID_TYPE_RAW),
- OID_UNKNOWN(OID_INL_INTERFACE_ID, 0xFF020006),
- OID_UNKNOWN(OID_INL_COMPONENT_ID, 0xFF020007),
- OID_U32_C(OID_INL_CONFIG, 0xFF020008),
- OID_U32_C(OID_INL_DOT11D_CONFORMANCE, 0xFF02000C),
- OID_U32(OID_INL_PHYCAPABILITIES, 0xFF02000D),
- OID_U32_C(OID_INL_OUTPUTPOWER, 0xFF02000F),
-
-};
-
-int
-mgt_init(islpci_private *priv)
-{
- int i;
-
- priv->mib = kcalloc(OID_NUM_LAST, sizeof (void *), GFP_KERNEL);
- if (!priv->mib)
- return -ENOMEM;
-
- /* Alloc the cache */
- for (i = 0; i < OID_NUM_LAST; i++) {
- if (isl_oid[i].flags & OID_FLAG_CACHED) {
- priv->mib[i] = kcalloc(isl_oid[i].size,
- (isl_oid[i].range + 1),
- GFP_KERNEL);
- if (!priv->mib[i])
- return -ENOMEM;
- } else
- priv->mib[i] = NULL;
- }
-
- init_rwsem(&priv->mib_sem);
- prism54_mib_init(priv);
-
- return 0;
-}
-
-void
-mgt_clean(islpci_private *priv)
-{
- int i;
-
- if (!priv->mib)
- return;
- for (i = 0; i < OID_NUM_LAST; i++) {
- kfree(priv->mib[i]);
- priv->mib[i] = NULL;
- }
- kfree(priv->mib);
- priv->mib = NULL;
-}
-
-void
-mgt_le_to_cpu(int type, void *data)
-{
- switch (type) {
- case OID_TYPE_U32:
- *(u32 *) data = le32_to_cpu(*(u32 *) data);
- break;
- case OID_TYPE_BUFFER:{
- struct obj_buffer *buff = data;
- buff->size = le32_to_cpu(buff->size);
- buff->addr = le32_to_cpu(buff->addr);
- break;
- }
- case OID_TYPE_BSS:{
- struct obj_bss *bss = data;
- bss->age = le16_to_cpu(bss->age);
- bss->channel = le16_to_cpu(bss->channel);
- bss->capinfo = le16_to_cpu(bss->capinfo);
- bss->rates = le16_to_cpu(bss->rates);
- bss->basic_rates = le16_to_cpu(bss->basic_rates);
- break;
- }
- case OID_TYPE_BSSLIST:{
- struct obj_bsslist *list = data;
- int i;
- list->nr = le32_to_cpu(list->nr);
- for (i = 0; i < list->nr; i++)
- mgt_le_to_cpu(OID_TYPE_BSS, &list->bsslist[i]);
- break;
- }
- case OID_TYPE_FREQUENCIES:{
- struct obj_frequencies *freq = data;
- int i;
- freq->nr = le16_to_cpu(freq->nr);
- for (i = 0; i < freq->nr; i++)
- freq->mhz[i] = le16_to_cpu(freq->mhz[i]);
- break;
- }
- case OID_TYPE_MLME:{
- struct obj_mlme *mlme = data;
- mlme->id = le16_to_cpu(mlme->id);
- mlme->state = le16_to_cpu(mlme->state);
- mlme->code = le16_to_cpu(mlme->code);
- break;
- }
- case OID_TYPE_MLMEEX:{
- struct obj_mlmeex *mlme = data;
- mlme->id = le16_to_cpu(mlme->id);
- mlme->state = le16_to_cpu(mlme->state);
- mlme->code = le16_to_cpu(mlme->code);
- mlme->size = le16_to_cpu(mlme->size);
- break;
- }
- case OID_TYPE_ATTACH:{
- struct obj_attachment *attach = data;
- attach->id = le16_to_cpu(attach->id);
- attach->size = le16_to_cpu(attach->size);
- break;
- }
- case OID_TYPE_SSID:
- case OID_TYPE_KEY:
- case OID_TYPE_ADDR:
- case OID_TYPE_RAW:
- break;
- default:
- BUG();
- }
-}
-
-static void
-mgt_cpu_to_le(int type, void *data)
-{
- switch (type) {
- case OID_TYPE_U32:
- *(u32 *) data = cpu_to_le32(*(u32 *) data);
- break;
- case OID_TYPE_BUFFER:{
- struct obj_buffer *buff = data;
- buff->size = cpu_to_le32(buff->size);
- buff->addr = cpu_to_le32(buff->addr);
- break;
- }
- case OID_TYPE_BSS:{
- struct obj_bss *bss = data;
- bss->age = cpu_to_le16(bss->age);
- bss->channel = cpu_to_le16(bss->channel);
- bss->capinfo = cpu_to_le16(bss->capinfo);
- bss->rates = cpu_to_le16(bss->rates);
- bss->basic_rates = cpu_to_le16(bss->basic_rates);
- break;
- }
- case OID_TYPE_BSSLIST:{
- struct obj_bsslist *list = data;
- int i;
- list->nr = cpu_to_le32(list->nr);
- for (i = 0; i < list->nr; i++)
- mgt_cpu_to_le(OID_TYPE_BSS, &list->bsslist[i]);
- break;
- }
- case OID_TYPE_FREQUENCIES:{
- struct obj_frequencies *freq = data;
- int i;
- freq->nr = cpu_to_le16(freq->nr);
- for (i = 0; i < freq->nr; i++)
- freq->mhz[i] = cpu_to_le16(freq->mhz[i]);
- break;
- }
- case OID_TYPE_MLME:{
- struct obj_mlme *mlme = data;
- mlme->id = cpu_to_le16(mlme->id);
- mlme->state = cpu_to_le16(mlme->state);
- mlme->code = cpu_to_le16(mlme->code);
- break;
- }
- case OID_TYPE_MLMEEX:{
- struct obj_mlmeex *mlme = data;
- mlme->id = cpu_to_le16(mlme->id);
- mlme->state = cpu_to_le16(mlme->state);
- mlme->code = cpu_to_le16(mlme->code);
- mlme->size = cpu_to_le16(mlme->size);
- break;
- }
- case OID_TYPE_ATTACH:{
- struct obj_attachment *attach = data;
- attach->id = cpu_to_le16(attach->id);
- attach->size = cpu_to_le16(attach->size);
- break;
- }
- case OID_TYPE_SSID:
- case OID_TYPE_KEY:
- case OID_TYPE_ADDR:
- case OID_TYPE_RAW:
- break;
- default:
- BUG();
- }
-}
-
-/* Note : data is modified during this function */
-
-int
-mgt_set_request(islpci_private *priv, enum oid_num_t n, int extra, void *data)
-{
- int ret = 0;
- struct islpci_mgmtframe *response = NULL;
- int response_op = PIMFOR_OP_ERROR;
- int dlen;
- void *cache, *_data = data;
- u32 oid;
-
- BUG_ON(n >= OID_NUM_LAST);
- BUG_ON(extra > isl_oid[n].range);
-
- if (!priv->mib)
- /* memory has been freed */
- return -1;
-
- dlen = isl_oid[n].size;
- cache = priv->mib[n];
- cache += (cache ? extra * dlen : 0);
- oid = isl_oid[n].oid + extra;
-
- if (_data == NULL)
- /* we are requested to re-set a cached value */
- _data = cache;
- else
- mgt_cpu_to_le(isl_oid[n].flags & OID_FLAG_TYPE, _data);
- /* If we are going to write to the cache, we don't want anyone to read
- * it -> acquire write lock.
- * Else we could acquire a read lock to be sure we don't bother the
- * commit process (which takes a write lock). But I'm not sure if it's
- * needed.
- */
- if (cache)
- down_write(&priv->mib_sem);
-
- if (islpci_get_state(priv) >= PRV_STATE_READY) {
- ret = islpci_mgt_transaction(priv->ndev, PIMFOR_OP_SET, oid,
- _data, dlen, &response);
- if (!ret) {
- response_op = response->header->operation;
- islpci_mgt_release(response);
- }
- if (ret || response_op == PIMFOR_OP_ERROR)
- ret = -EIO;
- } else if (!cache)
- ret = -EIO;
-
- if (cache) {
- if (!ret && data)
- memcpy(cache, _data, dlen);
- up_write(&priv->mib_sem);
- }
-
- /* re-set given data to what it was */
- if (data)
- mgt_le_to_cpu(isl_oid[n].flags & OID_FLAG_TYPE, data);
-
- return ret;
-}
-
-/* None of these are cached */
-int
-mgt_set_varlen(islpci_private *priv, enum oid_num_t n, void *data, int extra_len)
-{
- int ret = 0;
- struct islpci_mgmtframe *response;
- int response_op = PIMFOR_OP_ERROR;
- int dlen;
- u32 oid;
-
- BUG_ON(n >= OID_NUM_LAST);
-
- dlen = isl_oid[n].size;
- oid = isl_oid[n].oid;
-
- mgt_cpu_to_le(isl_oid[n].flags & OID_FLAG_TYPE, data);
-
- if (islpci_get_state(priv) >= PRV_STATE_READY) {
- ret = islpci_mgt_transaction(priv->ndev, PIMFOR_OP_SET, oid,
- data, dlen + extra_len, &response);
- if (!ret) {
- response_op = response->header->operation;
- islpci_mgt_release(response);
- }
- if (ret || response_op == PIMFOR_OP_ERROR)
- ret = -EIO;
- } else
- ret = -EIO;
-
- /* re-set given data to what it was */
- if (data)
- mgt_le_to_cpu(isl_oid[n].flags & OID_FLAG_TYPE, data);
-
- return ret;
-}
-
-int
-mgt_get_request(islpci_private *priv, enum oid_num_t n, int extra, void *data,
- union oid_res_t *res)
-{
-
- int ret = -EIO;
- int reslen = 0;
- struct islpci_mgmtframe *response = NULL;
-
- int dlen;
- void *cache, *_res = NULL;
- u32 oid;
-
- BUG_ON(n >= OID_NUM_LAST);
- BUG_ON(extra > isl_oid[n].range);
-
- res->ptr = NULL;
-
- if (!priv->mib)
- /* memory has been freed */
- return -1;
-
- dlen = isl_oid[n].size;
- cache = priv->mib[n];
- cache += cache ? extra * dlen : 0;
- oid = isl_oid[n].oid + extra;
- reslen = dlen;
-
- if (cache)
- down_read(&priv->mib_sem);
-
- if (islpci_get_state(priv) >= PRV_STATE_READY) {
- ret = islpci_mgt_transaction(priv->ndev, PIMFOR_OP_GET,
- oid, data, dlen, &response);
- if (ret || !response ||
- response->header->operation == PIMFOR_OP_ERROR) {
- if (response)
- islpci_mgt_release(response);
- ret = -EIO;
- }
- if (!ret) {
- _res = response->data;
- reslen = response->header->length;
- }
- } else if (cache) {
- _res = cache;
- ret = 0;
- }
- if ((isl_oid[n].flags & OID_FLAG_TYPE) == OID_TYPE_U32)
- res->u = ret ? 0 : le32_to_cpu(*(u32 *) _res);
- else {
- res->ptr = kmalloc(reslen, GFP_KERNEL);
- BUG_ON(res->ptr == NULL);
- if (ret)
- memset(res->ptr, 0, reslen);
- else {
- memcpy(res->ptr, _res, reslen);
- mgt_le_to_cpu(isl_oid[n].flags & OID_FLAG_TYPE,
- res->ptr);
- }
- }
- if (cache)
- up_read(&priv->mib_sem);
-
- if (response && !ret)
- islpci_mgt_release(response);
-
- if (reslen > isl_oid[n].size)
- printk(KERN_DEBUG
- "mgt_get_request(0x%x): received data length was bigger "
- "than expected (%d > %d). Memory is probably corrupted...",
- oid, reslen, isl_oid[n].size);
-
- return ret;
-}
-
-/* lock outside */
-int
-mgt_commit_list(islpci_private *priv, enum oid_num_t *l, int n)
-{
- int i, ret = 0;
- struct islpci_mgmtframe *response;
-
- for (i = 0; i < n; i++) {
- struct oid_t *t = &(isl_oid[l[i]]);
- void *data = priv->mib[l[i]];
- int j = 0;
- u32 oid = t->oid;
- BUG_ON(data == NULL);
- while (j <= t->range) {
- int r = islpci_mgt_transaction(priv->ndev, PIMFOR_OP_SET,
- oid, data, t->size,
- &response);
- if (response) {
- r |= (response->header->operation == PIMFOR_OP_ERROR);
- islpci_mgt_release(response);
- }
- if (r)
- printk(KERN_ERR "%s: mgt_commit_list: failure. "
- "oid=%08x err=%d\n",
- priv->ndev->name, oid, r);
- ret |= r;
- j++;
- oid++;
- data += t->size;
- }
- }
- return ret;
-}
-
-/* Lock outside */
-
-void
-mgt_set(islpci_private *priv, enum oid_num_t n, void *data)
-{
- BUG_ON(n >= OID_NUM_LAST);
- BUG_ON(priv->mib[n] == NULL);
-
- memcpy(priv->mib[n], data, isl_oid[n].size);
- mgt_cpu_to_le(isl_oid[n].flags & OID_FLAG_TYPE, priv->mib[n]);
-}
-
-void
-mgt_get(islpci_private *priv, enum oid_num_t n, void *res)
-{
- BUG_ON(n >= OID_NUM_LAST);
- BUG_ON(priv->mib[n] == NULL);
- BUG_ON(res == NULL);
-
- memcpy(res, priv->mib[n], isl_oid[n].size);
- mgt_le_to_cpu(isl_oid[n].flags & OID_FLAG_TYPE, res);
-}
-
-/* Commits the cache. Lock outside. */
-
-static enum oid_num_t commit_part1[] = {
- OID_INL_CONFIG,
- OID_INL_MODE,
- DOT11_OID_BSSTYPE,
- DOT11_OID_CHANNEL,
- DOT11_OID_MLMEAUTOLEVEL
-};
-
-static enum oid_num_t commit_part2[] = {
- DOT11_OID_SSID,
- DOT11_OID_PSMBUFFER,
- DOT11_OID_AUTHENABLE,
- DOT11_OID_PRIVACYINVOKED,
- DOT11_OID_EXUNENCRYPTED,
- DOT11_OID_DEFKEYX, /* MULTIPLE */
- DOT11_OID_DEFKEYID,
- DOT11_OID_DOT1XENABLE,
- OID_INL_DOT11D_CONFORMANCE,
- /* Do not initialize this - fw < 1.0.4.3 rejects it
- OID_INL_OUTPUTPOWER,
- */
-};
-
-/* update the MAC addr. */
-static int
-mgt_update_addr(islpci_private *priv)
-{
- struct islpci_mgmtframe *res;
- int ret;
-
- ret = islpci_mgt_transaction(priv->ndev, PIMFOR_OP_GET,
- isl_oid[GEN_OID_MACADDRESS].oid, NULL,
- isl_oid[GEN_OID_MACADDRESS].size, &res);
-
- if ((ret == 0) && res && (res->header->operation != PIMFOR_OP_ERROR))
- memcpy(priv->ndev->dev_addr, res->data, ETH_ALEN);
- else
- ret = -EIO;
- if (res)
- islpci_mgt_release(res);
-
- if (ret)
- printk(KERN_ERR "%s: mgt_update_addr: failure\n", priv->ndev->name);
- return ret;
-}
-
-int
-mgt_commit(islpci_private *priv)
-{
- int rvalue;
- enum oid_num_t u;
-
- if (islpci_get_state(priv) < PRV_STATE_INIT)
- return 0;
-
- rvalue = mgt_commit_list(priv, commit_part1, ARRAY_SIZE(commit_part1));
-
- if (priv->iw_mode != IW_MODE_MONITOR)
- rvalue |= mgt_commit_list(priv, commit_part2, ARRAY_SIZE(commit_part2));
-
- u = OID_INL_MODE;
- rvalue |= mgt_commit_list(priv, &u, 1);
- rvalue |= mgt_update_addr(priv);
-
- if (rvalue) {
- /* some request have failed. The device might be in an
- incoherent state. We should reset it ! */
- printk(KERN_DEBUG "%s: mgt_commit: failure\n", priv->ndev->name);
- }
- return rvalue;
-}
-
-/* The following OIDs need to be "unlatched":
- *
- * MEDIUMLIMIT,BEACONPERIOD,DTIMPERIOD,ATIMWINDOW,LISTENINTERVAL
- * FREQUENCY,EXTENDEDRATES.
- *
- * The way to do this is to set ESSID. Note though that they may get
- * unlatch before though by setting another OID. */
-#if 0
-void
-mgt_unlatch_all(islpci_private *priv)
-{
- u32 u;
- int rvalue = 0;
-
- if (islpci_get_state(priv) < PRV_STATE_INIT)
- return;
-
- u = DOT11_OID_SSID;
- rvalue = mgt_commit_list(priv, &u, 1);
- /* Necessary if in MANUAL RUN mode? */
-#if 0
- u = OID_INL_MODE;
- rvalue |= mgt_commit_list(priv, &u, 1);
-
- u = DOT11_OID_MLMEAUTOLEVEL;
- rvalue |= mgt_commit_list(priv, &u, 1);
-
- u = OID_INL_MODE;
- rvalue |= mgt_commit_list(priv, &u, 1);
-#endif
-
- if (rvalue)
- printk(KERN_DEBUG "%s: Unlatching OIDs failed\n", priv->ndev->name);
-}
-#endif
-
-/* This will tell you if you are allowed to answer a mlme(ex) request .*/
-
-int
-mgt_mlme_answer(islpci_private *priv)
-{
- u32 mlmeautolevel;
- /* Acquire a read lock because if we are in a mode change, it's
- * possible to answer true, while the card is leaving master to managed
- * mode. Answering to a mlme in this situation could hang the card.
- */
- down_read(&priv->mib_sem);
- mlmeautolevel =
- le32_to_cpu(*(u32 *) priv->mib[DOT11_OID_MLMEAUTOLEVEL]);
- up_read(&priv->mib_sem);
-
- return ((priv->iw_mode == IW_MODE_MASTER) &&
- (mlmeautolevel >= DOT11_MLME_INTERMEDIATE));
-}
-
-enum oid_num_t
-mgt_oidtonum(u32 oid)
-{
- int i;
-
- for (i = 0; i < OID_NUM_LAST; i++)
- if (isl_oid[i].oid == oid)
- return i;
-
- printk(KERN_DEBUG "looking for an unknown oid 0x%x", oid);
-
- return OID_NUM_LAST;
-}
-
-int
-mgt_response_to_str(enum oid_num_t n, union oid_res_t *r, char *str)
-{
- switch (isl_oid[n].flags & OID_FLAG_TYPE) {
- case OID_TYPE_U32:
- return scnprintf(str, PRIV_STR_SIZE, "%u\n", r->u);
- case OID_TYPE_BUFFER:{
- struct obj_buffer *buff = r->ptr;
- return scnprintf(str, PRIV_STR_SIZE,
- "size=%u\naddr=0x%X\n", buff->size,
- buff->addr);
- }
- break;
- case OID_TYPE_BSS:{
- struct obj_bss *bss = r->ptr;
- return scnprintf(str, PRIV_STR_SIZE,
- "age=%u\nchannel=%u\n"
- "capinfo=0x%X\nrates=0x%X\n"
- "basic_rates=0x%X\n", bss->age,
- bss->channel, bss->capinfo,
- bss->rates, bss->basic_rates);
- }
- break;
- case OID_TYPE_BSSLIST:{
- struct obj_bsslist *list = r->ptr;
- int i, k;
- k = scnprintf(str, PRIV_STR_SIZE, "nr=%u\n", list->nr);
- for (i = 0; i < list->nr; i++)
- k += scnprintf(str + k, PRIV_STR_SIZE - k,
- "bss[%u] :\nage=%u\nchannel=%u\n"
- "capinfo=0x%X\nrates=0x%X\n"
- "basic_rates=0x%X\n",
- i, list->bsslist[i].age,
- list->bsslist[i].channel,
- list->bsslist[i].capinfo,
- list->bsslist[i].rates,
- list->bsslist[i].basic_rates);
- return k;
- }
- break;
- case OID_TYPE_FREQUENCIES:{
- struct obj_frequencies *freq = r->ptr;
- int i, t;
- printk("nr : %u\n", freq->nr);
- t = scnprintf(str, PRIV_STR_SIZE, "nr=%u\n", freq->nr);
- for (i = 0; i < freq->nr; i++)
- t += scnprintf(str + t, PRIV_STR_SIZE - t,
- "mhz[%u]=%u\n", i, freq->mhz[i]);
- return t;
- }
- break;
- case OID_TYPE_MLME:{
- struct obj_mlme *mlme = r->ptr;
- return scnprintf(str, PRIV_STR_SIZE,
- "id=0x%X\nstate=0x%X\ncode=0x%X\n",
- mlme->id, mlme->state, mlme->code);
- }
- break;
- case OID_TYPE_MLMEEX:{
- struct obj_mlmeex *mlme = r->ptr;
- return scnprintf(str, PRIV_STR_SIZE,
- "id=0x%X\nstate=0x%X\n"
- "code=0x%X\nsize=0x%X\n", mlme->id,
- mlme->state, mlme->code, mlme->size);
- }
- break;
- case OID_TYPE_ATTACH:{
- struct obj_attachment *attach = r->ptr;
- return scnprintf(str, PRIV_STR_SIZE,
- "id=%d\nsize=%d\n",
- attach->id,
- attach->size);
- }
- break;
- case OID_TYPE_SSID:{
- struct obj_ssid *ssid = r->ptr;
- return scnprintf(str, PRIV_STR_SIZE,
- "length=%u\noctets=%.*s\n",
- ssid->length, ssid->length,
- ssid->octets);
- }
- break;
- case OID_TYPE_KEY:{
- struct obj_key *key = r->ptr;
- int t, i;
- t = scnprintf(str, PRIV_STR_SIZE,
- "type=0x%X\nlength=0x%X\nkey=0x",
- key->type, key->length);
- for (i = 0; i < key->length; i++)
- t += scnprintf(str + t, PRIV_STR_SIZE - t,
- "%02X:", key->key[i]);
- t += scnprintf(str + t, PRIV_STR_SIZE - t, "\n");
- return t;
- }
- break;
- case OID_TYPE_RAW:
- case OID_TYPE_ADDR:{
- unsigned char *buff = r->ptr;
- int t, i;
- t = scnprintf(str, PRIV_STR_SIZE, "hex data=");
- for (i = 0; i < isl_oid[n].size; i++)
- t += scnprintf(str + t, PRIV_STR_SIZE - t,
- "%02X:", buff[i]);
- t += scnprintf(str + t, PRIV_STR_SIZE - t, "\n");
- return t;
- }
- break;
- default:
- BUG();
- }
- return 0;
-}
diff --git a/drivers/net/wireless/intersil/prism54/oid_mgt.h b/drivers/net/wireless/intersil/prism54/oid_mgt.h
deleted file mode 100644
index a7dc9e24c0bf..000000000000
--- a/drivers/net/wireless/intersil/prism54/oid_mgt.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2003 Aurelien Alleaume <slts@free.fr>
- */
-
-#if !defined(_OID_MGT_H)
-#define _OID_MGT_H
-
-#include "isl_oid.h"
-#include "islpci_dev.h"
-
-extern struct oid_t isl_oid[];
-
-int mgt_init(islpci_private *);
-
-void mgt_clean(islpci_private *);
-
-/* I don't know where to put these 2 */
-extern const int frequency_list_a[];
-int channel_of_freq(int);
-
-void mgt_le_to_cpu(int, void *);
-
-int mgt_set_request(islpci_private *, enum oid_num_t, int, void *);
-int mgt_set_varlen(islpci_private *, enum oid_num_t, void *, int);
-
-
-int mgt_get_request(islpci_private *, enum oid_num_t, int, void *,
- union oid_res_t *);
-
-int mgt_commit_list(islpci_private *, enum oid_num_t *, int);
-
-void mgt_set(islpci_private *, enum oid_num_t, void *);
-
-void mgt_get(islpci_private *, enum oid_num_t, void *);
-
-int mgt_commit(islpci_private *);
-
-int mgt_mlme_answer(islpci_private *);
-
-enum oid_num_t mgt_oidtonum(u32 oid);
-
-int mgt_response_to_str(enum oid_num_t, union oid_res_t *, char *);
-
-#endif /* !defined(_OID_MGT_H) */
-/* EOF */
diff --git a/drivers/net/wireless/intersil/prism54/prismcompat.h b/drivers/net/wireless/intersil/prism54/prismcompat.h
deleted file mode 100644
index c4489b66d07e..000000000000
--- a/drivers/net/wireless/intersil/prism54/prismcompat.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * (C) 2004 Margit Schubert-While <margitsw@t-online.de>
- */
-
-/*
- * Compatibility header file to aid support of different kernel versions
- */
-
-#ifdef PRISM54_COMPAT24
-#include "prismcompat24.h"
-#else /* PRISM54_COMPAT24 */
-
-#ifndef _PRISM_COMPAT_H
-#define _PRISM_COMPAT_H
-
-#include <linux/device.h>
-#include <linux/firmware.h>
-#include <linux/moduleparam.h>
-#include <linux/workqueue.h>
-#include <linux/compiler.h>
-
-#ifndef __iomem
-#define __iomem
-#endif
-
-#define PRISM_FW_PDEV &priv->pdev->dev
-
-#endif /* _PRISM_COMPAT_H */
-#endif /* PRISM54_COMPAT24 */
diff --git a/drivers/net/wireless/marvell/libertas/ethtool.c b/drivers/net/wireless/marvell/libertas/ethtool.c
index 1bb8746a0b23..d8e4f29b690d 100644
--- a/drivers/net/wireless/marvell/libertas/ethtool.c
+++ b/drivers/net/wireless/marvell/libertas/ethtool.c
@@ -43,10 +43,8 @@ static int lbs_ethtool_get_eeprom(struct net_device *dev,
int ret;
if (eeprom->offset + eeprom->len > LBS_EEPROM_LEN ||
- eeprom->len > LBS_EEPROM_READ_LEN) {
- ret = -EINVAL;
- goto out;
- }
+ eeprom->len > LBS_EEPROM_READ_LEN)
+ return -EINVAL;
cmd.hdr.size = cpu_to_le16(sizeof(struct cmd_ds_802_11_eeprom_access) -
LBS_EEPROM_READ_LEN + eeprom->len);
@@ -57,8 +55,7 @@ static int lbs_ethtool_get_eeprom(struct net_device *dev,
if (!ret)
memcpy(bytes, cmd.value, eeprom->len);
-out:
- return ret;
+ return ret;
}
static void lbs_ethtool_get_wol(struct net_device *dev,
diff --git a/drivers/net/wireless/marvell/mwifiex/Makefile b/drivers/net/wireless/marvell/mwifiex/Makefile
index 162d557b78af..2bd00f40958e 100644
--- a/drivers/net/wireless/marvell/mwifiex/Makefile
+++ b/drivers/net/wireless/marvell/mwifiex/Makefile
@@ -49,6 +49,7 @@ mwifiex_sdio-y += sdio.o
obj-$(CONFIG_MWIFIEX_SDIO) += mwifiex_sdio.o
mwifiex_pcie-y += pcie.o
+mwifiex_pcie-y += pcie_quirks.o
obj-$(CONFIG_MWIFIEX_PCIE) += mwifiex_pcie.o
mwifiex_usb-y += usb.o
diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
index 3a11342a6bde..171a25742600 100644
--- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c
+++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
@@ -187,7 +187,7 @@ static int mwifiex_dnld_cmd_to_fw(struct mwifiex_private *priv,
host_cmd = (struct host_cmd_ds_command *) (cmd_node->cmd_skb->data);
/* Sanity test */
- if (host_cmd == NULL || host_cmd->size == 0) {
+ if (host_cmd->size == 0) {
mwifiex_dbg(adapter, ERROR,
"DNLD_CMD: host_cmd is null\t"
"or cmd size is 0, not sending\n");
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c
index 46517515ba72..c6ccce426b49 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie.c
+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
@@ -27,6 +27,7 @@
#include "wmm.h"
#include "11n.h"
#include "pcie.h"
+#include "pcie_quirks.h"
#define PCIE_VERSION "1.0"
#define DRV_NAME "Marvell mwifiex PCIe"
@@ -410,6 +411,9 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev,
return ret;
}
+ /* check quirks */
+ mwifiex_initialize_quirks(card);
+
if (mwifiex_add_card(card, &card->fw_done, &pcie_ops,
MWIFIEX_PCIE, &pdev->dev)) {
pr_err("%s failed\n", __func__);
@@ -524,6 +528,13 @@ static void mwifiex_pcie_reset_prepare(struct pci_dev *pdev)
mwifiex_shutdown_sw(adapter);
clear_bit(MWIFIEX_IFACE_WORK_DEVICE_DUMP, &card->work_flags);
clear_bit(MWIFIEX_IFACE_WORK_CARD_RESET, &card->work_flags);
+
+ /* On MS Surface gen4+ devices FLR isn't effective to recover from
+ * hangups, so we power-cycle the card instead.
+ */
+ if (card->quirks & QUIRK_FW_RST_D3COLD)
+ mwifiex_pcie_reset_d3cold_quirk(pdev);
+
mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__);
card->pci_reset_ongoing = true;
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.h b/drivers/net/wireless/marvell/mwifiex/pcie.h
index 5ed613d65709..981e330c77d7 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie.h
+++ b/drivers/net/wireless/marvell/mwifiex/pcie.h
@@ -244,6 +244,7 @@ struct pcie_service_card {
unsigned long work_flags;
bool pci_reset_ongoing;
+ unsigned long quirks;
};
static inline int
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c
new file mode 100644
index 000000000000..0234cf3c2974
--- /dev/null
+++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c
@@ -0,0 +1,161 @@
+/*
+ * NXP Wireless LAN device driver: PCIE and platform specific quirks
+ *
+ * This software file (the "File") is distributed by NXP
+ * under the terms of the GNU General Public License Version 2, June 1991
+ * (the "License"). You may use, redistribute and/or modify this File in
+ * accordance with the terms and conditions of the License, a copy of which
+ * is available by writing to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA or on the
+ * worldwide web at http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
+ *
+ * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
+ * ARE EXPRESSLY DISCLAIMED. The License provides additional details about
+ * this warranty disclaimer.
+ */
+
+#include <linux/dmi.h>
+
+#include "pcie_quirks.h"
+
+/* quirk table based on DMI matching */
+static const struct dmi_system_id mwifiex_quirk_table[] = {
+ {
+ .ident = "Surface Pro 4",
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 4"),
+ },
+ .driver_data = (void *)QUIRK_FW_RST_D3COLD,
+ },
+ {
+ .ident = "Surface Pro 5",
+ .matches = {
+ /* match for SKU here due to generic product name "Surface Pro" */
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_1796"),
+ },
+ .driver_data = (void *)QUIRK_FW_RST_D3COLD,
+ },
+ {
+ .ident = "Surface Pro 5 (LTE)",
+ .matches = {
+ /* match for SKU here due to generic product name "Surface Pro" */
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_1807"),
+ },
+ .driver_data = (void *)QUIRK_FW_RST_D3COLD,
+ },
+ {
+ .ident = "Surface Pro 6",
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 6"),
+ },
+ .driver_data = (void *)QUIRK_FW_RST_D3COLD,
+ },
+ {
+ .ident = "Surface Book 1",
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Book"),
+ },
+ .driver_data = (void *)QUIRK_FW_RST_D3COLD,
+ },
+ {
+ .ident = "Surface Book 2",
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Book 2"),
+ },
+ .driver_data = (void *)QUIRK_FW_RST_D3COLD,
+ },
+ {
+ .ident = "Surface Laptop 1",
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Laptop"),
+ },
+ .driver_data = (void *)QUIRK_FW_RST_D3COLD,
+ },
+ {
+ .ident = "Surface Laptop 2",
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Laptop 2"),
+ },
+ .driver_data = (void *)QUIRK_FW_RST_D3COLD,
+ },
+ {}
+};
+
+void mwifiex_initialize_quirks(struct pcie_service_card *card)
+{
+ struct pci_dev *pdev = card->dev;
+ const struct dmi_system_id *dmi_id;
+
+ dmi_id = dmi_first_match(mwifiex_quirk_table);
+ if (dmi_id)
+ card->quirks = (uintptr_t)dmi_id->driver_data;
+
+ if (!card->quirks)
+ dev_info(&pdev->dev, "no quirks enabled\n");
+ if (card->quirks & QUIRK_FW_RST_D3COLD)
+ dev_info(&pdev->dev, "quirk reset_d3cold enabled\n");
+}
+
+static void mwifiex_pcie_set_power_d3cold(struct pci_dev *pdev)
+{
+ dev_info(&pdev->dev, "putting into D3cold...\n");
+
+ pci_save_state(pdev);
+ if (pci_is_enabled(pdev))
+ pci_disable_device(pdev);
+ pci_set_power_state(pdev, PCI_D3cold);
+}
+
+static int mwifiex_pcie_set_power_d0(struct pci_dev *pdev)
+{
+ int ret;
+
+ dev_info(&pdev->dev, "putting into D0...\n");
+
+ pci_set_power_state(pdev, PCI_D0);
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ dev_err(&pdev->dev, "pci_enable_device failed\n");
+ return ret;
+ }
+ pci_restore_state(pdev);
+
+ return 0;
+}
+
+int mwifiex_pcie_reset_d3cold_quirk(struct pci_dev *pdev)
+{
+ struct pci_dev *parent_pdev = pci_upstream_bridge(pdev);
+ int ret;
+
+ /* Power-cycle (put into D3cold then D0) */
+ dev_info(&pdev->dev, "Using reset_d3cold quirk to perform FW reset\n");
+
+ /* We need to perform power-cycle also for bridge of wifi because
+ * on some devices (e.g. Surface Book 1), the OS for some reasons
+ * can't know the real power state of the bridge.
+ * When tried to power-cycle only wifi, the reset failed with the
+ * following dmesg log:
+ * "Cannot transition to power state D0 for parent in D3hot".
+ */
+ mwifiex_pcie_set_power_d3cold(pdev);
+ mwifiex_pcie_set_power_d3cold(parent_pdev);
+
+ ret = mwifiex_pcie_set_power_d0(parent_pdev);
+ if (ret)
+ return ret;
+ ret = mwifiex_pcie_set_power_d0(pdev);
+ if (ret)
+ return ret;
+
+ return 0;
+}
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h
new file mode 100644
index 000000000000..8ec4176d698f
--- /dev/null
+++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h
@@ -0,0 +1,23 @@
+/*
+ * NXP Wireless LAN device driver: PCIE and platform specific quirks
+ *
+ * This software file (the "File") is distributed by NXP
+ * under the terms of the GNU General Public License Version 2, June 1991
+ * (the "License"). You may use, redistribute and/or modify this File in
+ * accordance with the terms and conditions of the License, a copy of which
+ * is available by writing to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA or on the
+ * worldwide web at http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
+ *
+ * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
+ * ARE EXPRESSLY DISCLAIMED. The License provides additional details about
+ * this warranty disclaimer.
+ */
+
+#include "pcie.h"
+
+#define QUIRK_FW_RST_D3COLD BIT(0)
+
+void mwifiex_initialize_quirks(struct pcie_service_card *card);
+int mwifiex_pcie_reset_d3cold_quirk(struct pci_dev *pdev);
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
index 653f9e094256..4062e515697a 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
@@ -1325,8 +1325,8 @@ mwifiex_set_gen_ie_helper(struct mwifiex_private *priv, u8 *ie_data_ptr,
u16 ie_len)
{
struct ieee_types_vendor_header *pvendor_ie;
- const u8 wpa_oui[] = { 0x00, 0x50, 0xf2, 0x01 };
- const u8 wps_oui[] = { 0x00, 0x50, 0xf2, 0x04 };
+ static const u8 wpa_oui[] = { 0x00, 0x50, 0xf2, 0x01 };
+ static const u8 wps_oui[] = { 0x00, 0x50, 0xf2, 0x04 };
u16 unparsed_len = ie_len, cur_ie_len;
/* If the passed length is zero, reset the buffer */
diff --git a/drivers/net/wireless/marvell/mwifiex/usb.h b/drivers/net/wireless/marvell/mwifiex/usb.h
index d822ec15b7e6..61a96b7fbf21 100644
--- a/drivers/net/wireless/marvell/mwifiex/usb.h
+++ b/drivers/net/wireless/marvell/mwifiex/usb.h
@@ -134,7 +134,7 @@ struct fw_sync_header {
struct fw_data {
struct fw_header fw_hdr;
__le32 seq_num;
- u8 data[1];
+ u8 data[];
} __packed;
#endif /*_MWIFIEX_USB_H */
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 863aa18b3024..43960770a9af 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -111,7 +111,7 @@ mt7915_mcu_get_cipher(int cipher)
case WLAN_CIPHER_SUITE_SMS4:
return MCU_CIPHER_WAPI;
default:
- return MT_CIPHER_NONE;
+ return MCU_CIPHER_NONE;
}
}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index edd3ba3a0c2d..e68a562cc5b4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -1073,7 +1073,8 @@ enum {
};
enum mcu_cipher_type {
- MCU_CIPHER_WEP40 = 1,
+ MCU_CIPHER_NONE = 0,
+ MCU_CIPHER_WEP40,
MCU_CIPHER_WEP104,
MCU_CIPHER_WEP128,
MCU_CIPHER_TKIP,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index cd690c64f65b..9fbaacc67cfa 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -111,7 +111,7 @@ mt7921_mcu_get_cipher(int cipher)
case WLAN_CIPHER_SUITE_SMS4:
return MCU_CIPHER_WAPI;
default:
- return MT_CIPHER_NONE;
+ return MCU_CIPHER_NONE;
}
}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
index d76cf8f8dfdf..de3c091f6736 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
@@ -199,7 +199,8 @@ struct sta_rec_sec {
} __packed;
enum mcu_cipher_type {
- MCU_CIPHER_WEP40 = 1,
+ MCU_CIPHER_NONE = 0,
+ MCU_CIPHER_WEP40,
MCU_CIPHER_WEP104,
MCU_CIPHER_WEP128,
MCU_CIPHER_TKIP,
diff --git a/drivers/net/wireless/microchip/wilc1000/sdio.c b/drivers/net/wireless/microchip/wilc1000/sdio.c
index e14b9fc2c67a..42e03a701ae1 100644
--- a/drivers/net/wireless/microchip/wilc1000/sdio.c
+++ b/drivers/net/wireless/microchip/wilc1000/sdio.c
@@ -129,10 +129,8 @@ static int wilc_sdio_probe(struct sdio_func *func,
ret = wilc_cfg80211_init(&wilc, &func->dev, WILC_HIF_SDIO,
&wilc_hif_sdio);
- if (ret) {
- kfree(sdio_priv);
- return ret;
- }
+ if (ret)
+ goto free;
if (IS_ENABLED(CONFIG_WILC1000_HW_OOB_INTR)) {
struct device_node *np = func->card->dev.of_node;
@@ -148,24 +146,29 @@ static int wilc_sdio_probe(struct sdio_func *func,
wilc->bus_data = sdio_priv;
wilc->dev = &func->dev;
- wilc->rtc_clk = devm_clk_get(&func->card->dev, "rtc");
- if (PTR_ERR_OR_ZERO(wilc->rtc_clk) == -EPROBE_DEFER) {
- kfree(sdio_priv);
- return -EPROBE_DEFER;
- } else if (!IS_ERR(wilc->rtc_clk))
- clk_prepare_enable(wilc->rtc_clk);
+ wilc->rtc_clk = devm_clk_get_optional(&func->card->dev, "rtc");
+ if (IS_ERR(wilc->rtc_clk)) {
+ ret = PTR_ERR(wilc->rtc_clk);
+ goto dispose_irq;
+ }
+ clk_prepare_enable(wilc->rtc_clk);
dev_info(&func->dev, "Driver Initializing success\n");
return 0;
+
+dispose_irq:
+ irq_dispose_mapping(wilc->dev_irq_num);
+ wilc_netdev_cleanup(wilc);
+free:
+ kfree(sdio_priv);
+ return ret;
}
static void wilc_sdio_remove(struct sdio_func *func)
{
struct wilc *wilc = sdio_get_drvdata(func);
- if (!IS_ERR(wilc->rtc_clk))
- clk_disable_unprepare(wilc->rtc_clk);
-
+ clk_disable_unprepare(wilc->rtc_clk);
wilc_netdev_cleanup(wilc);
}
diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c
index 8e9aaf03a6fa..dd481dc0b5ce 100644
--- a/drivers/net/wireless/microchip/wilc1000/spi.c
+++ b/drivers/net/wireless/microchip/wilc1000/spi.c
@@ -39,6 +39,7 @@ MODULE_PARM_DESC(enable_crc16,
#define WILC_SPI_RSP_HDR_EXTRA_DATA 8
struct wilc_spi {
+ bool isinit; /* true if SPI protocol has been configured */
bool probing_crc; /* true if we're probing chip's CRC config */
bool crc7_enabled; /* true if crc7 is currently enabled */
bool crc16_enabled; /* true if crc16 is currently enabled */
@@ -154,34 +155,37 @@ static int wilc_bus_probe(struct spi_device *spi)
return -ENOMEM;
ret = wilc_cfg80211_init(&wilc, &spi->dev, WILC_HIF_SPI, &wilc_hif_spi);
- if (ret) {
- kfree(spi_priv);
- return ret;
- }
+ if (ret)
+ goto free;
spi_set_drvdata(spi, wilc);
wilc->dev = &spi->dev;
wilc->bus_data = spi_priv;
wilc->dev_irq_num = spi->irq;
- wilc->rtc_clk = devm_clk_get(&spi->dev, "rtc");
- if (PTR_ERR_OR_ZERO(wilc->rtc_clk) == -EPROBE_DEFER) {
- kfree(spi_priv);
- return -EPROBE_DEFER;
- } else if (!IS_ERR(wilc->rtc_clk))
- clk_prepare_enable(wilc->rtc_clk);
+ wilc->rtc_clk = devm_clk_get_optional(&spi->dev, "rtc");
+ if (IS_ERR(wilc->rtc_clk)) {
+ ret = PTR_ERR(wilc->rtc_clk);
+ goto netdev_cleanup;
+ }
+ clk_prepare_enable(wilc->rtc_clk);
return 0;
+
+netdev_cleanup:
+ wilc_netdev_cleanup(wilc);
+free:
+ kfree(spi_priv);
+ return ret;
}
static int wilc_bus_remove(struct spi_device *spi)
{
struct wilc *wilc = spi_get_drvdata(spi);
- if (!IS_ERR(wilc->rtc_clk))
- clk_disable_unprepare(wilc->rtc_clk);
-
+ clk_disable_unprepare(wilc->rtc_clk);
wilc_netdev_cleanup(wilc);
+
return 0;
}
@@ -905,15 +909,15 @@ static int wilc_spi_init(struct wilc *wilc, bool resume)
struct wilc_spi *spi_priv = wilc->bus_data;
u32 reg;
u32 chipid;
- static int isinit;
int ret, i;
- if (isinit) {
+ if (spi_priv->isinit) {
+ /* Confirm we can read chipid register without error: */
ret = wilc_spi_read_reg(wilc, WILC_CHIPID, &chipid);
- if (ret)
- dev_err(&spi->dev, "Fail cmd read chip id...\n");
+ if (ret == 0)
+ return 0;
- return ret;
+ dev_err(&spi->dev, "Fail cmd read chip id...\n");
}
/*
@@ -971,7 +975,7 @@ static int wilc_spi_init(struct wilc *wilc, bool resume)
spi_priv->probing_crc = false;
/*
- * make sure can read back chip id correctly
+ * make sure can read chip id without protocol error
*/
ret = wilc_spi_read_reg(wilc, WILC_CHIPID, &chipid);
if (ret) {
@@ -979,7 +983,7 @@ static int wilc_spi_init(struct wilc *wilc, bool resume)
return ret;
}
- isinit = 1;
+ spi_priv->isinit = true;
return 0;
}
diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.c b/drivers/net/wireless/microchip/wilc1000/wlan.c
index 2030fc7f53ca..200a103a0a85 100644
--- a/drivers/net/wireless/microchip/wilc1000/wlan.c
+++ b/drivers/net/wireless/microchip/wilc1000/wlan.c
@@ -1127,27 +1127,22 @@ int wilc_wlan_start(struct wilc *wilc)
}
acquire_bus(wilc, WILC_BUS_ACQUIRE_ONLY);
ret = wilc->hif_func->hif_write_reg(wilc, WILC_VMM_CORE_CFG, reg);
- if (ret) {
- release_bus(wilc, WILC_BUS_RELEASE_ONLY);
- return ret;
- }
+ if (ret)
+ goto release;
+
reg = 0;
if (wilc->io_type == WILC_HIF_SDIO && wilc->dev_irq_num)
reg |= WILC_HAVE_SDIO_IRQ_GPIO;
ret = wilc->hif_func->hif_write_reg(wilc, WILC_GP_REG_1, reg);
- if (ret) {
- release_bus(wilc, WILC_BUS_RELEASE_ONLY);
- return ret;
- }
+ if (ret)
+ goto release;
wilc->hif_func->hif_sync_ext(wilc, NUM_INT_EXT);
ret = wilc->hif_func->hif_read_reg(wilc, WILC_CHIPID, &chipid);
- if (ret) {
- release_bus(wilc, WILC_BUS_RELEASE_ONLY);
- return ret;
- }
+ if (ret)
+ goto release;
wilc->hif_func->hif_read_reg(wilc, WILC_GLB_RESET_0, &reg);
if ((reg & BIT(10)) == BIT(10)) {
@@ -1159,8 +1154,9 @@ int wilc_wlan_start(struct wilc *wilc)
reg |= BIT(10);
ret = wilc->hif_func->hif_write_reg(wilc, WILC_GLB_RESET_0, reg);
wilc->hif_func->hif_read_reg(wilc, WILC_GLB_RESET_0, &reg);
- release_bus(wilc, WILC_BUS_RELEASE_ONLY);
+release:
+ release_bus(wilc, WILC_BUS_RELEASE_ONLY);
return ret;
}
@@ -1174,36 +1170,34 @@ int wilc_wlan_stop(struct wilc *wilc, struct wilc_vif *vif)
ret = wilc->hif_func->hif_read_reg(wilc, WILC_GP_REG_0, &reg);
if (ret) {
netdev_err(vif->ndev, "Error while reading reg\n");
- release_bus(wilc, WILC_BUS_RELEASE_ALLOW_SLEEP);
- return ret;
+ goto release;
}
ret = wilc->hif_func->hif_write_reg(wilc, WILC_GP_REG_0,
(reg | WILC_ABORT_REQ_BIT));
if (ret) {
netdev_err(vif->ndev, "Error while writing reg\n");
- release_bus(wilc, WILC_BUS_RELEASE_ALLOW_SLEEP);
- return ret;
+ goto release;
}
ret = wilc->hif_func->hif_read_reg(wilc, WILC_FW_HOST_COMM, &reg);
if (ret) {
netdev_err(vif->ndev, "Error while reading reg\n");
- release_bus(wilc, WILC_BUS_RELEASE_ALLOW_SLEEP);
- return ret;
+ goto release;
}
reg = BIT(0);
ret = wilc->hif_func->hif_write_reg(wilc, WILC_FW_HOST_COMM, reg);
if (ret) {
netdev_err(vif->ndev, "Error while writing reg\n");
- release_bus(wilc, WILC_BUS_RELEASE_ALLOW_SLEEP);
- return ret;
+ goto release;
}
+ ret = 0;
+release:
release_bus(wilc, WILC_BUS_RELEASE_ALLOW_SLEEP);
- return 0;
+ return ret;
}
void wilc_wlan_cleanup(struct net_device *dev)
diff --git a/drivers/net/wireless/quantenna/qtnfmac/pcie/pearl_pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pcie/pearl_pcie.c
index 0003df577cb3..840728ed57b2 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pcie/pearl_pcie.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/pcie/pearl_pcie.c
@@ -295,9 +295,9 @@ static int pearl_skb2rbd_attach(struct qtnf_pcie_pearl_state *ps, u16 index)
priv->rx_skb[index] = skb;
rxbd = &ps->rx_bd_vbase[index];
- paddr = pci_map_single(priv->pdev, skb->data,
- SKB_BUF_SIZE, PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(priv->pdev, paddr)) {
+ paddr = dma_map_single(&priv->pdev->dev, skb->data, SKB_BUF_SIZE,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(&priv->pdev->dev, paddr)) {
pr_err("skb DMA mapping error: %pad\n", &paddr);
return -ENOMEM;
}
@@ -357,8 +357,8 @@ static void qtnf_pearl_free_xfer_buffers(struct qtnf_pcie_pearl_state *ps)
skb = priv->rx_skb[i];
paddr = QTN_HOST_ADDR(le32_to_cpu(rxbd->addr_h),
le32_to_cpu(rxbd->addr));
- pci_unmap_single(priv->pdev, paddr, SKB_BUF_SIZE,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&priv->pdev->dev, paddr,
+ SKB_BUF_SIZE, DMA_FROM_DEVICE);
dev_kfree_skb_any(skb);
priv->rx_skb[i] = NULL;
}
@@ -371,8 +371,8 @@ static void qtnf_pearl_free_xfer_buffers(struct qtnf_pcie_pearl_state *ps)
skb = priv->tx_skb[i];
paddr = QTN_HOST_ADDR(le32_to_cpu(txbd->addr_h),
le32_to_cpu(txbd->addr));
- pci_unmap_single(priv->pdev, paddr, skb->len,
- PCI_DMA_TODEVICE);
+ dma_unmap_single(&priv->pdev->dev, paddr, skb->len,
+ DMA_TO_DEVICE);
dev_kfree_skb_any(skb);
priv->tx_skb[i] = NULL;
}
@@ -485,8 +485,8 @@ static void qtnf_pearl_data_tx_reclaim(struct qtnf_pcie_pearl_state *ps)
txbd = &ps->tx_bd_vbase[i];
paddr = QTN_HOST_ADDR(le32_to_cpu(txbd->addr_h),
le32_to_cpu(txbd->addr));
- pci_unmap_single(priv->pdev, paddr, skb->len,
- PCI_DMA_TODEVICE);
+ dma_unmap_single(&priv->pdev->dev, paddr, skb->len,
+ DMA_TO_DEVICE);
if (skb->dev) {
dev_sw_netstats_tx_add(skb->dev, 1, skb->len);
@@ -559,9 +559,9 @@ static int qtnf_pcie_skb_send(struct qtnf_bus *bus, struct sk_buff *skb)
priv->tx_skb[i] = skb;
len = skb->len;
- skb_paddr = pci_map_single(priv->pdev, skb->data,
- skb->len, PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(priv->pdev, skb_paddr)) {
+ skb_paddr = dma_map_single(&priv->pdev->dev, skb->data, skb->len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&priv->pdev->dev, skb_paddr)) {
pr_err("skb DMA mapping error: %pad\n", &skb_paddr);
ret = -ENOMEM;
goto tx_done;
@@ -748,8 +748,8 @@ static int qtnf_pcie_pearl_rx_poll(struct napi_struct *napi, int budget)
if (skb) {
skb_paddr = QTN_HOST_ADDR(le32_to_cpu(rxbd->addr_h),
le32_to_cpu(rxbd->addr));
- pci_unmap_single(priv->pdev, skb_paddr, SKB_BUF_SIZE,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&priv->pdev->dev, skb_paddr,
+ SKB_BUF_SIZE, DMA_FROM_DEVICE);
}
if (consume) {
diff --git a/drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c
index 24f1be8ddcef..9534e1b33780 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c
@@ -255,9 +255,9 @@ topaz_skb2rbd_attach(struct qtnf_pcie_topaz_state *ts, u16 index, u32 wrap)
ts->base.rx_skb[index] = skb;
- paddr = pci_map_single(ts->base.pdev, skb->data,
- SKB_BUF_SIZE, PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(ts->base.pdev, paddr)) {
+ paddr = dma_map_single(&ts->base.pdev->dev, skb->data, SKB_BUF_SIZE,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(&ts->base.pdev->dev, paddr)) {
pr_err("skb mapping error: %pad\n", &paddr);
return -ENOMEM;
}
@@ -306,8 +306,8 @@ static void qtnf_topaz_free_xfer_buffers(struct qtnf_pcie_topaz_state *ts)
rxbd = &ts->rx_bd_vbase[i];
skb = priv->rx_skb[i];
paddr = QTN_HOST_ADDR(0x0, le32_to_cpu(rxbd->addr));
- pci_unmap_single(priv->pdev, paddr, SKB_BUF_SIZE,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&priv->pdev->dev, paddr,
+ SKB_BUF_SIZE, DMA_FROM_DEVICE);
dev_kfree_skb_any(skb);
priv->rx_skb[i] = NULL;
rxbd->addr = 0;
@@ -321,8 +321,8 @@ static void qtnf_topaz_free_xfer_buffers(struct qtnf_pcie_topaz_state *ts)
txbd = &ts->tx_bd_vbase[i];
skb = priv->tx_skb[i];
paddr = QTN_HOST_ADDR(0x0, le32_to_cpu(txbd->addr));
- pci_unmap_single(priv->pdev, paddr, SKB_BUF_SIZE,
- PCI_DMA_TODEVICE);
+ dma_unmap_single(&priv->pdev->dev, paddr,
+ SKB_BUF_SIZE, DMA_TO_DEVICE);
dev_kfree_skb_any(skb);
priv->tx_skb[i] = NULL;
txbd->addr = 0;
@@ -414,8 +414,8 @@ static void qtnf_topaz_data_tx_reclaim(struct qtnf_pcie_topaz_state *ts)
if (likely(skb)) {
txbd = &ts->tx_bd_vbase[i];
paddr = QTN_HOST_ADDR(0x0, le32_to_cpu(txbd->addr));
- pci_unmap_single(priv->pdev, paddr, skb->len,
- PCI_DMA_TODEVICE);
+ dma_unmap_single(&priv->pdev->dev, paddr, skb->len,
+ DMA_TO_DEVICE);
if (skb->dev) {
dev_sw_netstats_tx_add(skb->dev, 1, skb->len);
@@ -522,9 +522,9 @@ static int qtnf_pcie_data_tx(struct qtnf_bus *bus, struct sk_buff *skb,
priv->tx_skb[i] = skb;
len = skb->len;
- skb_paddr = pci_map_single(priv->pdev, skb->data,
- skb->len, PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(priv->pdev, skb_paddr)) {
+ skb_paddr = dma_map_single(&priv->pdev->dev, skb->data, skb->len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&priv->pdev->dev, skb_paddr)) {
ret = -ENOMEM;
goto tx_done;
}
@@ -653,8 +653,8 @@ static int qtnf_topaz_rx_poll(struct napi_struct *napi, int budget)
if (skb) {
skb_paddr = QTN_HOST_ADDR(0x0, le32_to_cpu(rxbd->addr));
- pci_unmap_single(priv->pdev, skb_paddr, SKB_BUF_SIZE,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&priv->pdev->dev, skb_paddr,
+ SKB_BUF_SIZE, DMA_FROM_DEVICE);
}
if (consume) {
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 590bd974d94f..0f5009c47cd0 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -982,7 +982,9 @@ AP to AP 1 1 dest AP src AP dest source
if (local->net_type == ADHOC) {
writeb(0, &ptx->mac.frame_ctl_2);
memcpy_toio(ptx->mac.addr_1, ((struct ethhdr *)data)->h_dest,
- 2 * ADDRLEN);
+ ADDRLEN);
+ memcpy_toio(ptx->mac.addr_2, ((struct ethhdr *)data)->h_source,
+ ADDRLEN);
memcpy_toio(ptx->mac.addr_3, local->bss_id, ADDRLEN);
} else { /* infrastructure */
@@ -2424,9 +2426,7 @@ static void rx_authenticate(ray_dev_t *local, struct rcs __iomem *prcs,
copy_from_rx_buff(local, buff, pkt_addr, rx_len & 0xff);
/* if we are trying to get authenticated */
if (local->sparm.b4.a_network_type == ADHOC) {
- pr_debug("ray_cs rx_auth var= %02x %02x %02x %02x %02x %02x\n",
- msg->var[0], msg->var[1], msg->var[2], msg->var[3],
- msg->var[4], msg->var[5]);
+ pr_debug("ray_cs rx_auth var= %6ph\n", msg->var);
if (msg->var[2] == 1) {
pr_debug("ray_cs Sending authentication response.\n");
if (!build_auth_frame
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
index 01735776345a..7ddce3c3f0c4 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
@@ -1378,6 +1378,8 @@ struct rtl8xxxu_priv {
u8 no_pape:1;
u8 int_buf[USB_INTR_CONTENT_LENGTH];
u8 rssi_level;
+ DECLARE_BITMAP(tx_aggr_started, IEEE80211_NUM_TIDS);
+ DECLARE_BITMAP(tid_tx_operational, IEEE80211_NUM_TIDS);
/*
* Only one virtual interface permitted because only STA mode
* is supported and no iface_combinations are provided.
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
index ac1061caacd6..774341b0005a 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
@@ -1670,7 +1670,7 @@ static int rtl8xxxu_identify_chip(struct rtl8xxxu_priv *priv)
priv->rf_paths = 2;
priv->rx_paths = 2;
priv->tx_paths = 2;
- priv->usb_interrupts = 1;
+ priv->usb_interrupts = 0;
priv->rtl_chip = RTL8192C;
}
priv->has_wifi = 1;
@@ -1680,7 +1680,7 @@ static int rtl8xxxu_identify_chip(struct rtl8xxxu_priv *priv)
priv->rx_paths = 1;
priv->tx_paths = 1;
priv->rtl_chip = RTL8188C;
- priv->usb_interrupts = 1;
+ priv->usb_interrupts = 0;
priv->has_wifi = 1;
}
@@ -4805,6 +4805,8 @@ rtl8xxxu_fill_txdesc_v1(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
struct ieee80211_rate *tx_rate = ieee80211_get_tx_rate(hw, tx_info);
struct rtl8xxxu_priv *priv = hw->priv;
struct device *dev = &priv->udev->dev;
+ u8 *qc = ieee80211_get_qos_ctl(hdr);
+ u8 tid = qc[0] & IEEE80211_QOS_CTL_TID_MASK;
u32 rate;
u16 rate_flags = tx_info->control.rates[0].flags;
u16 seq_number;
@@ -4828,7 +4830,7 @@ rtl8xxxu_fill_txdesc_v1(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
tx_desc->txdw3 = cpu_to_le32((u32)seq_number << TXDESC32_SEQ_SHIFT);
- if (ampdu_enable)
+ if (ampdu_enable && test_bit(tid, priv->tid_tx_operational))
tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_ENABLE);
else
tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_BREAK);
@@ -4876,6 +4878,8 @@ rtl8xxxu_fill_txdesc_v2(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
struct rtl8xxxu_priv *priv = hw->priv;
struct device *dev = &priv->udev->dev;
struct rtl8xxxu_txdesc40 *tx_desc40;
+ u8 *qc = ieee80211_get_qos_ctl(hdr);
+ u8 tid = qc[0] & IEEE80211_QOS_CTL_TID_MASK;
u32 rate;
u16 rate_flags = tx_info->control.rates[0].flags;
u16 seq_number;
@@ -4902,7 +4906,7 @@ rtl8xxxu_fill_txdesc_v2(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
tx_desc40->txdw9 = cpu_to_le32((u32)seq_number << TXDESC40_SEQ_SHIFT);
- if (ampdu_enable)
+ if (ampdu_enable && test_bit(tid, priv->tid_tx_operational))
tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_ENABLE);
else
tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_BREAK);
@@ -5015,12 +5019,19 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw,
if (ieee80211_is_data_qos(hdr->frame_control) && sta) {
if (sta->ht_cap.ht_supported) {
u32 ampdu, val32;
+ u8 *qc = ieee80211_get_qos_ctl(hdr);
+ u8 tid = qc[0] & IEEE80211_QOS_CTL_TID_MASK;
ampdu = (u32)sta->ht_cap.ampdu_density;
val32 = ampdu << TXDESC_AMPDU_DENSITY_SHIFT;
tx_desc->txdw2 |= cpu_to_le32(val32);
ampdu_enable = true;
+
+ if (!test_bit(tid, priv->tx_aggr_started) &&
+ !(skb->protocol == cpu_to_be16(ETH_P_PAE)))
+ if (!ieee80211_start_tx_ba_session(sta, tid, 0))
+ set_bit(tid, priv->tx_aggr_started);
}
}
@@ -6096,6 +6107,7 @@ rtl8xxxu_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
struct device *dev = &priv->udev->dev;
u8 ampdu_factor, ampdu_density;
struct ieee80211_sta *sta = params->sta;
+ u16 tid = params->tid;
enum ieee80211_ampdu_mlme_action action = params->action;
switch (action) {
@@ -6108,17 +6120,20 @@ rtl8xxxu_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
dev_dbg(dev,
"Changed HT: ampdu_factor %02x, ampdu_density %02x\n",
ampdu_factor, ampdu_density);
- break;
+ return IEEE80211_AMPDU_TX_START_IMMEDIATE;
+ case IEEE80211_AMPDU_TX_STOP_CONT:
case IEEE80211_AMPDU_TX_STOP_FLUSH:
- dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH\n", __func__);
- rtl8xxxu_set_ampdu_factor(priv, 0);
- rtl8xxxu_set_ampdu_min_space(priv, 0);
- break;
case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT:
- dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH_CONT\n",
- __func__);
+ dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_STOP\n", __func__);
rtl8xxxu_set_ampdu_factor(priv, 0);
rtl8xxxu_set_ampdu_min_space(priv, 0);
+ clear_bit(tid, priv->tx_aggr_started);
+ clear_bit(tid, priv->tid_tx_operational);
+ ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
+ break;
+ case IEEE80211_AMPDU_TX_OPERATIONAL:
+ dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_OPERATIONAL\n", __func__);
+ set_bit(tid, priv->tid_tx_operational);
break;
case IEEE80211_AMPDU_RX_START:
dev_dbg(dev, "%s: IEEE80211_AMPDU_RX_START\n", __func__);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c
index 76dd881ef9bb..9b83c710c9b8 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c
@@ -160,6 +160,15 @@ static u32 targetchnl_2g[TARGET_CHNL_NUM_2G] = {
25711, 25658, 25606, 25554, 25502, 25451, 25328
};
+static const u8 channel_all[59] = {
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58,
+ 60, 62, 64, 100, 102, 104, 106, 108, 110, 112,
+ 114, 116, 118, 120, 122, 124, 126, 128, 130,
+ 132, 134, 136, 138, 140, 149, 151, 153, 155,
+ 157, 159, 161, 163, 165
+};
+
static u32 _rtl92d_phy_calculate_bit_shift(u32 bitmask)
{
u32 i = ffs(bitmask);
@@ -681,7 +690,7 @@ static bool _rtl92d_phy_bb_config(struct ieee80211_hw *hw)
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_phy *rtlphy = &(rtlpriv->phy);
struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
- bool rtstatus = true;
+ bool rtstatus;
rtl_dbg(rtlpriv, COMP_INIT, DBG_TRACE, "==>\n");
rtstatus = _rtl92d_phy_config_bb_with_headerfile(hw,
@@ -1354,15 +1363,7 @@ static void _rtl92d_phy_switch_rf_setting(struct ieee80211_hw *hw, u8 channel)
u8 rtl92d_get_rightchnlplace_for_iqk(u8 chnl)
{
- u8 channel_all[59] = {
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58,
- 60, 62, 64, 100, 102, 104, 106, 108, 110, 112,
- 114, 116, 118, 120, 122, 124, 126, 128, 130,
- 132, 134, 136, 138, 140, 149, 151, 153, 155,
- 157, 159, 161, 163, 165
- };
- u8 place = chnl;
+ u8 place;
if (chnl > 14) {
for (place = 14; place < sizeof(channel_all); place++) {
@@ -3220,37 +3221,28 @@ void rtl92d_phy_config_macphymode_info(struct ieee80211_hw *hw)
u8 rtl92d_get_chnlgroup_fromarray(u8 chnl)
{
u8 group;
- u8 channel_info[59] = {
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56,
- 58, 60, 62, 64, 100, 102, 104, 106, 108,
- 110, 112, 114, 116, 118, 120, 122, 124,
- 126, 128, 130, 132, 134, 136, 138, 140,
- 149, 151, 153, 155, 157, 159, 161, 163,
- 165
- };
- if (channel_info[chnl] <= 3)
+ if (channel_all[chnl] <= 3)
group = 0;
- else if (channel_info[chnl] <= 9)
+ else if (channel_all[chnl] <= 9)
group = 1;
- else if (channel_info[chnl] <= 14)
+ else if (channel_all[chnl] <= 14)
group = 2;
- else if (channel_info[chnl] <= 44)
+ else if (channel_all[chnl] <= 44)
group = 3;
- else if (channel_info[chnl] <= 54)
+ else if (channel_all[chnl] <= 54)
group = 4;
- else if (channel_info[chnl] <= 64)
+ else if (channel_all[chnl] <= 64)
group = 5;
- else if (channel_info[chnl] <= 112)
+ else if (channel_all[chnl] <= 112)
group = 6;
- else if (channel_info[chnl] <= 126)
+ else if (channel_all[chnl] <= 126)
group = 7;
- else if (channel_info[chnl] <= 140)
+ else if (channel_all[chnl] <= 140)
group = 8;
- else if (channel_info[chnl] <= 153)
+ else if (channel_all[chnl] <= 153)
group = 9;
- else if (channel_info[chnl] <= 159)
+ else if (channel_all[chnl] <= 159)
group = 10;
else
group = 11;
diff --git a/drivers/net/wireless/realtek/rtw88/Makefile b/drivers/net/wireless/realtek/rtw88/Makefile
index c0e4b111c8b4..73d6807a8cdf 100644
--- a/drivers/net/wireless/realtek/rtw88/Makefile
+++ b/drivers/net/wireless/realtek/rtw88/Makefile
@@ -15,9 +15,9 @@ rtw88_core-y += main.o \
ps.o \
sec.o \
bf.o \
- wow.o \
regd.o
+rtw88_core-$(CONFIG_PM) += wow.o
obj-$(CONFIG_RTW88_8822B) += rtw88_8822b.o
rtw88_8822b-objs := rtw8822b.o rtw8822b_table.o
diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index 3bfa5ecc0053..e6399519584b 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -819,7 +819,7 @@ static u16 rtw_get_rsvd_page_probe_req_size(struct rtw_dev *rtwdev,
continue;
if ((!ssid && !rsvd_pkt->ssid) ||
rtw_ssid_equal(rsvd_pkt->ssid, ssid))
- size = rsvd_pkt->skb->len;
+ size = rsvd_pkt->probe_req_size;
}
return size;
@@ -1047,6 +1047,8 @@ static struct sk_buff *rtw_get_rsvd_page_skb(struct ieee80211_hw *hw,
ssid->ssid_len, 0);
else
skb_new = ieee80211_probereq_get(hw, vif->addr, NULL, 0, 0);
+ if (skb_new)
+ rsvd_pkt->probe_req_size = (u16)skb_new->len;
break;
case RSVD_NLO_INFO:
skb_new = rtw_nlo_info_get(hw);
@@ -1643,6 +1645,7 @@ int rtw_fw_dump_fifo(struct rtw_dev *rtwdev, u8 fifo_sel, u32 addr, u32 size,
static void __rtw_fw_update_pkt(struct rtw_dev *rtwdev, u8 pkt_id, u16 size,
u8 location)
{
+ struct rtw_chip_info *chip = rtwdev->chip;
u8 h2c_pkt[H2C_PKT_SIZE] = {0};
u16 total_size = H2C_PKT_HDR_SIZE + H2C_PKT_UPDATE_PKT_LEN;
@@ -1653,6 +1656,7 @@ static void __rtw_fw_update_pkt(struct rtw_dev *rtwdev, u8 pkt_id, u16 size,
UPDATE_PKT_SET_LOCATION(h2c_pkt, location);
/* include txdesc size */
+ size += chip->tx_pkt_desc_sz;
UPDATE_PKT_SET_SIZE(h2c_pkt, size);
rtw_fw_send_h2c_packet(rtwdev, h2c_pkt);
@@ -1662,7 +1666,7 @@ void rtw_fw_update_pkt_probe_req(struct rtw_dev *rtwdev,
struct cfg80211_ssid *ssid)
{
u8 loc;
- u32 size;
+ u16 size;
loc = rtw_get_rsvd_page_probe_req_location(rtwdev, ssid);
if (!loc) {
diff --git a/drivers/net/wireless/realtek/rtw88/fw.h b/drivers/net/wireless/realtek/rtw88/fw.h
index a8a7162fbe64..64dcde35a021 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.h
+++ b/drivers/net/wireless/realtek/rtw88/fw.h
@@ -99,7 +99,7 @@ enum rtw_beacon_filter_offload_mode {
BCN_FILTER_OFFLOAD_MODE_2,
BCN_FILTER_OFFLOAD_MODE_3,
- BCN_FILTER_OFFLOAD_MODE_DEFAULT = BCN_FILTER_OFFLOAD_MODE_1,
+ BCN_FILTER_OFFLOAD_MODE_DEFAULT = BCN_FILTER_OFFLOAD_MODE_0,
};
struct rtw_coex_info_req {
@@ -147,6 +147,7 @@ struct rtw_rsvd_page {
u8 page;
bool add_txdesc;
struct cfg80211_ssid *ssid;
+ u16 probe_req_size;
};
enum rtw_keep_alive_pkt_type {
diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index c6364837e83b..6bb55e663fc3 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -1338,6 +1338,8 @@ static void rtw_init_ht_cap(struct rtw_dev *rtwdev,
if (rtw_chip_has_rx_ldpc(rtwdev))
ht_cap->cap |= IEEE80211_HT_CAP_LDPC_CODING;
+ if (rtw_chip_has_tx_stbc(rtwdev))
+ ht_cap->cap |= IEEE80211_HT_CAP_TX_STBC;
if (efuse->hw_cap.bw & BIT(RTW_CHANNEL_WIDTH_40))
ht_cap->cap |= IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index e5af375b3dd0..56812127a053 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -1146,6 +1146,7 @@ struct rtw_chip_info {
u8 txgi_factor;
bool is_pwr_by_rate_dec;
bool rx_ldpc;
+ bool tx_stbc;
u8 max_power_index;
u16 fw_fifo_addr[RTW_FW_FIFO_MAX];
@@ -1959,6 +1960,11 @@ static inline bool rtw_chip_has_rx_ldpc(struct rtw_dev *rtwdev)
return rtwdev->chip->rx_ldpc;
}
+static inline bool rtw_chip_has_tx_stbc(struct rtw_dev *rtwdev)
+{
+ return rtwdev->chip->tx_stbc;
+}
+
static inline void rtw_release_macid(struct rtw_dev *rtwdev, u8 mac_id)
{
clear_bit(mac_id, rtwdev->mac_id_map);
diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index e7d17ab8f113..a7a6ebfaa203 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -268,11 +268,6 @@ static int rtw_pci_init_rx_ring(struct rtw_dev *rtwdev,
int i, allocated;
int ret = 0;
- if (len > TRX_BD_IDX_MASK) {
- rtw_err(rtwdev, "len %d exceeds maximum RX entries\n", len);
- return -EINVAL;
- }
-
head = dma_alloc_coherent(&pdev->dev, ring_sz, &dma, GFP_KERNEL);
if (!head) {
rtw_err(rtwdev, "failed to allocate rx ring\n");
@@ -1359,6 +1354,25 @@ static void rtw_pci_clkreq_set(struct rtw_dev *rtwdev, bool enable)
rtw_dbi_write8(rtwdev, RTK_PCIE_LINK_CFG, value);
}
+static void rtw_pci_clkreq_pad_low(struct rtw_dev *rtwdev, bool enable)
+{
+ u8 value;
+ int ret;
+
+ ret = rtw_dbi_read8(rtwdev, RTK_PCIE_LINK_CFG, &value);
+ if (ret) {
+ rtw_err(rtwdev, "failed to read CLKREQ_L1, ret=%d", ret);
+ return;
+ }
+
+ if (enable)
+ value &= ~BIT_CLKREQ_N_PAD;
+ else
+ value |= BIT_CLKREQ_N_PAD;
+
+ rtw_dbi_write8(rtwdev, RTK_PCIE_LINK_CFG, value);
+}
+
static void rtw_pci_aspm_set(struct rtw_dev *rtwdev, bool enable)
{
u8 value;
@@ -1500,11 +1514,25 @@ static void rtw_pci_phy_cfg(struct rtw_dev *rtwdev)
static int __maybe_unused rtw_pci_suspend(struct device *dev)
{
+ struct ieee80211_hw *hw = dev_get_drvdata(dev);
+ struct rtw_dev *rtwdev = hw->priv;
+ struct rtw_chip_info *chip = rtwdev->chip;
+ struct rtw_efuse *efuse = &rtwdev->efuse;
+
+ if (chip->id == RTW_CHIP_TYPE_8822C && efuse->rfe_option == 6)
+ rtw_pci_clkreq_pad_low(rtwdev, true);
return 0;
}
static int __maybe_unused rtw_pci_resume(struct device *dev)
{
+ struct ieee80211_hw *hw = dev_get_drvdata(dev);
+ struct rtw_dev *rtwdev = hw->priv;
+ struct rtw_chip_info *chip = rtwdev->chip;
+ struct rtw_efuse *efuse = &rtwdev->efuse;
+
+ if (chip->id == RTW_CHIP_TYPE_8822C && efuse->rfe_option == 6)
+ rtw_pci_clkreq_pad_low(rtwdev, false);
return 0;
}
@@ -1701,6 +1729,15 @@ static const struct dmi_system_id rtw88_pci_quirks[] = {
},
.driver_data = (void *)BIT(QUIRK_DIS_PCI_CAP_ASPM),
},
+ {
+ .callback = disable_pci_caps,
+ .ident = "HP HP Pavilion Laptop 14-ce0xxx",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion Laptop 14-ce0xxx"),
+ },
+ .driver_data = (void *)BIT(QUIRK_DIS_PCI_CAP_ASPM),
+ },
{}
};
diff --git a/drivers/net/wireless/realtek/rtw88/pci.h b/drivers/net/wireless/realtek/rtw88/pci.h
index 0ffae887527a..66f78eb7757c 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.h
+++ b/drivers/net/wireless/realtek/rtw88/pci.h
@@ -37,6 +37,7 @@
#define RTK_PCIE_LINK_CFG 0x0719
#define BIT_CLKREQ_SW_EN BIT(4)
#define BIT_L1_SW_EN BIT(3)
+#define BIT_CLKREQ_N_PAD BIT(0)
#define RTK_PCIE_CLKDLY_CTRL 0x0725
#define BIT_PCI_BCNQ_FLAG BIT(4)
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index 8bf3cd3a3678..f3ad079967a6 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -5288,6 +5288,7 @@ struct rtw_chip_info rtw8822c_hw_spec = {
.bfer_su_max_num = 2,
.bfer_mu_max_num = 1,
.rx_ldpc = true,
+ .tx_stbc = true,
#ifdef CONFIG_PM
.wow_fw_name = "rtw88/rtw8822c_wow_fw.bin",
diff --git a/drivers/net/wireless/realtek/rtw88/tx.c b/drivers/net/wireless/realtek/rtw88/tx.c
index 0193708fc013..3a101aa139ed 100644
--- a/drivers/net/wireless/realtek/rtw88/tx.c
+++ b/drivers/net/wireless/realtek/rtw88/tx.c
@@ -162,7 +162,7 @@ void rtw_tx_report_purge_timer(struct timer_list *t)
if (skb_queue_len(&tx_report->queue) == 0)
return;
- rtw_dbg(rtwdev, RTW_DBG_TX, "purge skb(s) not reported by firmware\n");
+ rtw_warn(rtwdev, "failed to get tx report from firmware\n");
spin_lock_irqsave(&tx_report->q_lock, flags);
skb_queue_purge(&tx_report->queue);
diff --git a/drivers/net/wireless/realtek/rtw88/wow.c b/drivers/net/wireless/realtek/rtw88/wow.c
index fc9544f4e5e4..89dc595094d5 100644
--- a/drivers/net/wireless/realtek/rtw88/wow.c
+++ b/drivers/net/wireless/realtek/rtw88/wow.c
@@ -12,26 +12,54 @@
static void rtw_wow_show_wakeup_reason(struct rtw_dev *rtwdev)
{
+ struct cfg80211_wowlan_nd_info nd_info;
+ struct cfg80211_wowlan_wakeup wakeup = {
+ .pattern_idx = -1,
+ };
u8 reason;
reason = rtw_read8(rtwdev, REG_WOWLAN_WAKE_REASON);
- if (reason == RTW_WOW_RSN_RX_DEAUTH)
+ switch (reason) {
+ case RTW_WOW_RSN_RX_DEAUTH:
+ wakeup.disconnect = true;
rtw_dbg(rtwdev, RTW_DBG_WOW, "WOW: Rx deauth\n");
- else if (reason == RTW_WOW_RSN_DISCONNECT)
+ break;
+ case RTW_WOW_RSN_DISCONNECT:
+ wakeup.disconnect = true;
rtw_dbg(rtwdev, RTW_DBG_WOW, "WOW: AP is off\n");
- else if (reason == RTW_WOW_RSN_RX_MAGIC_PKT)
+ break;
+ case RTW_WOW_RSN_RX_MAGIC_PKT:
+ wakeup.magic_pkt = true;
rtw_dbg(rtwdev, RTW_DBG_WOW, "WOW: Rx magic packet\n");
- else if (reason == RTW_WOW_RSN_RX_GTK_REKEY)
+ break;
+ case RTW_WOW_RSN_RX_GTK_REKEY:
+ wakeup.gtk_rekey_failure = true;
rtw_dbg(rtwdev, RTW_DBG_WOW, "WOW: Rx gtk rekey\n");
- else if (reason == RTW_WOW_RSN_RX_PTK_REKEY)
- rtw_dbg(rtwdev, RTW_DBG_WOW, "WOW: Rx ptk rekey\n");
- else if (reason == RTW_WOW_RSN_RX_PATTERN_MATCH)
+ break;
+ case RTW_WOW_RSN_RX_PATTERN_MATCH:
+ /* Current firmware and driver don't report pattern index
+ * Use pattern_idx to 0 defaultly.
+ */
+ wakeup.pattern_idx = 0;
rtw_dbg(rtwdev, RTW_DBG_WOW, "WOW: Rx pattern match packet\n");
- else if (reason == RTW_WOW_RSN_RX_NLO)
+ break;
+ case RTW_WOW_RSN_RX_NLO:
+ /* Current firmware and driver don't report ssid index.
+ * Use 0 for n_matches based on its comment.
+ */
+ nd_info.n_matches = 0;
+ wakeup.net_detect = &nd_info;
rtw_dbg(rtwdev, RTW_DBG_WOW, "Rx NLO\n");
- else
+ break;
+ default:
rtw_warn(rtwdev, "Unknown wakeup reason %x\n", reason);
+ ieee80211_report_wowlan_wakeup(rtwdev->wow.wow_vif, NULL,
+ GFP_KERNEL);
+ return;
+ }
+ ieee80211_report_wowlan_wakeup(rtwdev->wow.wow_vif, &wakeup,
+ GFP_KERNEL);
}
static void rtw_wow_pattern_write_cam(struct rtw_dev *rtwdev, u8 addr,
@@ -283,15 +311,26 @@ static void rtw_wow_rx_dma_start(struct rtw_dev *rtwdev)
static int rtw_wow_check_fw_status(struct rtw_dev *rtwdev, bool wow_enable)
{
- /* wait 100ms for wow firmware to finish work */
- msleep(100);
+ int ret;
+ u8 check;
+ u32 check_dis;
if (wow_enable) {
- if (rtw_read8(rtwdev, REG_WOWLAN_WAKE_REASON))
+ ret = read_poll_timeout(rtw_read8, check, !check, 1000,
+ 100000, true, rtwdev,
+ REG_WOWLAN_WAKE_REASON);
+ if (ret)
goto wow_fail;
} else {
- if (rtw_read32_mask(rtwdev, REG_FE1IMR, BIT_FS_RXDONE) ||
- rtw_read32_mask(rtwdev, REG_RXPKT_NUM, BIT_RW_RELEASE))
+ ret = read_poll_timeout(rtw_read32_mask, check_dis,
+ !check_dis, 1000, 100000, true, rtwdev,
+ REG_FE1IMR, BIT_FS_RXDONE);
+ if (ret)
+ goto wow_fail;
+ ret = read_poll_timeout(rtw_read32_mask, check_dis,
+ !check_dis, 1000, 100000, false, rtwdev,
+ REG_RXPKT_NUM, BIT_RW_RELEASE);
+ if (ret)
goto wow_fail;
}
@@ -432,37 +471,31 @@ static void rtw_wow_fw_media_status(struct rtw_dev *rtwdev, bool connect)
rtw_iterate_stas_atomic(rtwdev, rtw_wow_fw_media_status_iter, &data);
}
-static void rtw_wow_config_pno_rsvd_page(struct rtw_dev *rtwdev,
- struct rtw_vif *rtwvif)
+static int rtw_wow_config_wow_fw_rsvd_page(struct rtw_dev *rtwdev)
{
- rtw_add_rsvd_page_pno(rtwdev, rtwvif);
-}
-
-static void rtw_wow_config_linked_rsvd_page(struct rtw_dev *rtwdev,
- struct rtw_vif *rtwvif)
-{
- rtw_add_rsvd_page_sta(rtwdev, rtwvif);
-}
+ struct ieee80211_vif *wow_vif = rtwdev->wow.wow_vif;
+ struct rtw_vif *rtwvif = (struct rtw_vif *)wow_vif->drv_priv;
-static void rtw_wow_config_rsvd_page(struct rtw_dev *rtwdev,
- struct rtw_vif *rtwvif)
-{
rtw_remove_rsvd_page(rtwdev, rtwvif);
- if (rtw_wow_mgd_linked(rtwdev)) {
- rtw_wow_config_linked_rsvd_page(rtwdev, rtwvif);
- } else if (test_bit(RTW_FLAG_WOWLAN, rtwdev->flags) &&
- rtw_wow_no_link(rtwdev)) {
- rtw_wow_config_pno_rsvd_page(rtwdev, rtwvif);
- }
+ if (rtw_wow_no_link(rtwdev))
+ rtw_add_rsvd_page_pno(rtwdev, rtwvif);
+ else
+ rtw_add_rsvd_page_sta(rtwdev, rtwvif);
+
+ return rtw_fw_download_rsvd_page(rtwdev);
}
-static int rtw_wow_dl_fw_rsvd_page(struct rtw_dev *rtwdev)
+static int rtw_wow_config_normal_fw_rsvd_page(struct rtw_dev *rtwdev)
{
struct ieee80211_vif *wow_vif = rtwdev->wow.wow_vif;
struct rtw_vif *rtwvif = (struct rtw_vif *)wow_vif->drv_priv;
- rtw_wow_config_rsvd_page(rtwdev, rtwvif);
+ rtw_remove_rsvd_page(rtwdev, rtwvif);
+ rtw_add_rsvd_page_sta(rtwdev, rtwvif);
+
+ if (rtw_wow_no_link(rtwdev))
+ return 0;
return rtw_fw_download_rsvd_page(rtwdev);
}
@@ -660,7 +693,7 @@ static int rtw_wow_enable(struct rtw_dev *rtwdev)
set_bit(RTW_FLAG_WOWLAN, rtwdev->flags);
- ret = rtw_wow_dl_fw_rsvd_page(rtwdev);
+ ret = rtw_wow_config_wow_fw_rsvd_page(rtwdev);
if (ret) {
rtw_err(rtwdev, "failed to download wowlan rsvd page\n");
goto error;
@@ -733,7 +766,7 @@ static int rtw_wow_disable(struct rtw_dev *rtwdev)
goto out;
}
- ret = rtw_wow_dl_fw_rsvd_page(rtwdev);
+ ret = rtw_wow_config_normal_fw_rsvd_page(rtwdev);
if (ret)
rtw_err(rtwdev, "failed to download normal rsvd page\n");
diff --git a/drivers/net/wireless/rsi/rsi_91x_debugfs.c b/drivers/net/wireless/rsi/rsi_91x_debugfs.c
index 24a417ea2ae7..bf22fd948276 100644
--- a/drivers/net/wireless/rsi/rsi_91x_debugfs.c
+++ b/drivers/net/wireless/rsi/rsi_91x_debugfs.c
@@ -117,7 +117,7 @@ static int rsi_stats_read(struct seq_file *seq, void *data)
{
struct rsi_common *common = seq->private;
- unsigned char fsm_state[][32] = {
+ static const unsigned char fsm_state[][32] = {
"FSM_FW_NOT_LOADED",
"FSM_CARD_NOT_READY",
"FSM_COMMON_DEV_PARAMS_SENT",
diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c
index 99b21a2c8386..f4a26f16f00f 100644
--- a/drivers/net/wireless/rsi/rsi_91x_hal.c
+++ b/drivers/net/wireless/rsi/rsi_91x_hal.c
@@ -1038,8 +1038,10 @@ static int rsi_load_9116_firmware(struct rsi_hw *adapter)
}
ta_firmware = kmemdup(fw_entry->data, fw_entry->size, GFP_KERNEL);
- if (!ta_firmware)
+ if (!ta_firmware) {
+ status = -ENOMEM;
goto fail_release_fw;
+ }
fw_p = ta_firmware;
instructions_sz = fw_entry->size;
rsi_dbg(INFO_ZONE, "FW Length = %d bytes\n", instructions_sz);
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c
index 3fbe2a3c1455..416976f09888 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb.c
@@ -816,6 +816,7 @@ static int rsi_probe(struct usb_interface *pfunction,
} else {
rsi_dbg(ERR_ZONE, "%s: Unsupported RSI device id 0x%x\n",
__func__, id->idProduct);
+ status = -ENODEV;
goto err1;
}
diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig
index de9384326bc8..77dbfc418bce 100644
--- a/drivers/net/wwan/Kconfig
+++ b/drivers/net/wwan/Kconfig
@@ -38,6 +38,18 @@ config MHI_WWAN_CTRL
To compile this driver as a module, choose M here: the module will be
called mhi_wwan_ctrl.
+config MHI_WWAN_MBIM
+ tristate "MHI WWAN MBIM network driver for QCOM-based PCIe modems"
+ depends on MHI_BUS
+ help
+ MHI WWAN MBIM is a WWAN network driver for QCOM-based PCIe modems.
+ It implements MBIM over MHI, for IP data aggregation and muxing.
+ A default wwan0 network interface is created for MBIM data session
+ ID 0. Additional links can be created via wwan rtnetlink type.
+
+ To compile this driver as a module, choose M here: the module will be
+ called mhi_wwan_mbim.
+
config RPMSG_WWAN_CTRL
tristate "RPMSG WWAN control driver"
depends on RPMSG
diff --git a/drivers/net/wwan/Makefile b/drivers/net/wwan/Makefile
index d90ac33abaef..fe51feedac21 100644
--- a/drivers/net/wwan/Makefile
+++ b/drivers/net/wwan/Makefile
@@ -9,5 +9,6 @@ wwan-objs += wwan_core.o
obj-$(CONFIG_WWAN_HWSIM) += wwan_hwsim.o
obj-$(CONFIG_MHI_WWAN_CTRL) += mhi_wwan_ctrl.o
+obj-$(CONFIG_MHI_WWAN_MBIM) += mhi_wwan_mbim.o
obj-$(CONFIG_RPMSG_WWAN_CTRL) += rpmsg_wwan_ctrl.o
obj-$(CONFIG_IOSM) += iosm/
diff --git a/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c b/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c
index 804e6c4f2c78..519361ec40df 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c
@@ -64,10 +64,9 @@ static struct ipc_chnl_cfg modem_cfg[] = {
int ipc_chnl_cfg_get(struct ipc_chnl_cfg *chnl_cfg, int index)
{
- int array_size = ARRAY_SIZE(modem_cfg);
-
- if (index >= array_size) {
- pr_err("index: %d and array_size %d", index, array_size);
+ if (index >= ARRAY_SIZE(modem_cfg)) {
+ pr_err("index: %d and array size %zu", index,
+ ARRAY_SIZE(modem_cfg));
return -ECHRNG;
}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c
index 7f7d364d3a51..2fe88b8be348 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c
@@ -479,6 +479,7 @@ static struct pci_driver iosm_ipc_driver = {
},
.id_table = iosm_ipc_ids,
};
+module_pci_driver(iosm_ipc_driver);
int ipc_pcie_addr_map(struct iosm_pcie *ipc_pcie, unsigned char *data,
size_t size, dma_addr_t *mapping, int direction)
@@ -560,21 +561,3 @@ void ipc_pcie_kfree_skb(struct iosm_pcie *ipc_pcie, struct sk_buff *skb)
IPC_CB(skb)->mapping = 0;
dev_kfree_skb(skb);
}
-
-static int __init iosm_ipc_driver_init(void)
-{
- if (pci_register_driver(&iosm_ipc_driver)) {
- pr_err("registering of IOSM PCIe driver failed");
- return -1;
- }
-
- return 0;
-}
-
-static void __exit iosm_ipc_driver_exit(void)
-{
- pci_unregister_driver(&iosm_ipc_driver);
-}
-
-module_init(iosm_ipc_driver_init);
-module_exit(iosm_ipc_driver_exit);
diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol.c b/drivers/net/wwan/iosm/iosm_ipc_protocol.c
index 834d8b146a94..63fc7012f09f 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_protocol.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_protocol.c
@@ -239,9 +239,9 @@ struct iosm_protocol *ipc_protocol_init(struct iosm_imem *ipc_imem)
ipc_protocol->old_msg_tail = 0;
ipc_protocol->p_ap_shm =
- pci_alloc_consistent(ipc_protocol->pcie->pci,
- sizeof(*ipc_protocol->p_ap_shm),
- &ipc_protocol->phy_ap_shm);
+ dma_alloc_coherent(&ipc_protocol->pcie->pci->dev,
+ sizeof(*ipc_protocol->p_ap_shm),
+ &ipc_protocol->phy_ap_shm, GFP_KERNEL);
if (!ipc_protocol->p_ap_shm) {
dev_err(ipc_protocol->dev, "pci shm alloc error");
@@ -275,8 +275,8 @@ struct iosm_protocol *ipc_protocol_init(struct iosm_imem *ipc_imem)
void ipc_protocol_deinit(struct iosm_protocol *proto)
{
- pci_free_consistent(proto->pcie->pci, sizeof(*proto->p_ap_shm),
- proto->p_ap_shm, proto->phy_ap_shm);
+ dma_free_coherent(&proto->pcie->pci->dev, sizeof(*proto->p_ap_shm),
+ proto->p_ap_shm, proto->phy_ap_shm);
ipc_pm_deinit(proto);
kfree(proto);
diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
index 35d590743d3a..c6b032f95d2e 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
@@ -74,9 +74,9 @@ static int ipc_protocol_msg_prepipe_open(struct iosm_protocol *ipc_protocol,
return -ENOMEM;
/* Allocate the transfer descriptors for the pipe. */
- tdr = pci_alloc_consistent(ipc_protocol->pcie->pci,
- pipe->nr_of_entries * sizeof(*tdr),
- &pipe->phy_tdr_start);
+ tdr = dma_alloc_coherent(&ipc_protocol->pcie->pci->dev,
+ pipe->nr_of_entries * sizeof(*tdr),
+ &pipe->phy_tdr_start, GFP_ATOMIC);
if (!tdr) {
kfree(skbr);
dev_err(ipc_protocol->dev, "tdr alloc error");
@@ -492,10 +492,9 @@ void ipc_protocol_pipe_cleanup(struct iosm_protocol *ipc_protocol,
/* Free and reset the td and skbuf circular buffers. kfree is save! */
if (pipe->tdr_start) {
- pci_free_consistent(ipc_protocol->pcie->pci,
- sizeof(*pipe->tdr_start) *
- pipe->nr_of_entries,
- pipe->tdr_start, pipe->phy_tdr_start);
+ dma_free_coherent(&ipc_protocol->pcie->pci->dev,
+ sizeof(*pipe->tdr_start) * pipe->nr_of_entries,
+ pipe->tdr_start, pipe->phy_tdr_start);
pipe->tdr_start = NULL;
}
diff --git a/drivers/net/wwan/mhi_wwan_ctrl.c b/drivers/net/wwan/mhi_wwan_ctrl.c
index 1e18420ce404..e4d0f696687f 100644
--- a/drivers/net/wwan/mhi_wwan_ctrl.c
+++ b/drivers/net/wwan/mhi_wwan_ctrl.c
@@ -41,14 +41,14 @@ struct mhi_wwan_dev {
/* Increment RX budget and schedule RX refill if necessary */
static void mhi_wwan_rx_budget_inc(struct mhi_wwan_dev *mhiwwan)
{
- spin_lock(&mhiwwan->rx_lock);
+ spin_lock_bh(&mhiwwan->rx_lock);
mhiwwan->rx_budget++;
if (test_bit(MHI_WWAN_RX_REFILL, &mhiwwan->flags))
schedule_work(&mhiwwan->rx_refill);
- spin_unlock(&mhiwwan->rx_lock);
+ spin_unlock_bh(&mhiwwan->rx_lock);
}
/* Decrement RX budget if non-zero and return true on success */
@@ -56,7 +56,7 @@ static bool mhi_wwan_rx_budget_dec(struct mhi_wwan_dev *mhiwwan)
{
bool ret = false;
- spin_lock(&mhiwwan->rx_lock);
+ spin_lock_bh(&mhiwwan->rx_lock);
if (mhiwwan->rx_budget) {
mhiwwan->rx_budget--;
@@ -64,7 +64,7 @@ static bool mhi_wwan_rx_budget_dec(struct mhi_wwan_dev *mhiwwan)
ret = true;
}
- spin_unlock(&mhiwwan->rx_lock);
+ spin_unlock_bh(&mhiwwan->rx_lock);
return ret;
}
@@ -110,7 +110,7 @@ static int mhi_wwan_ctrl_start(struct wwan_port *port)
int ret;
/* Start mhi device's channel(s) */
- ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev, 0);
+ ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev);
if (ret)
return ret;
@@ -130,9 +130,9 @@ static void mhi_wwan_ctrl_stop(struct wwan_port *port)
{
struct mhi_wwan_dev *mhiwwan = wwan_port_get_drvdata(port);
- spin_lock(&mhiwwan->rx_lock);
+ spin_lock_bh(&mhiwwan->rx_lock);
clear_bit(MHI_WWAN_RX_REFILL, &mhiwwan->flags);
- spin_unlock(&mhiwwan->rx_lock);
+ spin_unlock_bh(&mhiwwan->rx_lock);
cancel_work_sync(&mhiwwan->rx_refill);
diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c
new file mode 100644
index 000000000000..71bf9b4f769f
--- /dev/null
+++ b/drivers/net/wwan/mhi_wwan_mbim.c
@@ -0,0 +1,658 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* MHI MBIM Network driver - Network/MBIM over MHI bus
+ *
+ * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org>
+ *
+ * This driver copy some code from cdc_ncm, which is:
+ * Copyright (C) ST-Ericsson 2010-2012
+ * and cdc_mbim, which is:
+ * Copyright (c) 2012 Smith Micro Software, Inc.
+ * Copyright (c) 2012 Bjørn Mork <bjorn@mork.no>
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/mhi.h>
+#include <linux/mii.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/usb.h>
+#include <linux/usb/cdc.h>
+#include <linux/usb/usbnet.h>
+#include <linux/usb/cdc_ncm.h>
+#include <linux/wwan.h>
+
+/* 3500 allows to optimize skb allocation, the skbs will basically fit in
+ * one 4K page. Large MBIM packets will simply be split over several MHI
+ * transfers and chained by the MHI net layer (zerocopy).
+ */
+#define MHI_DEFAULT_MRU 3500
+
+#define MHI_MBIM_DEFAULT_MTU 1500
+#define MHI_MAX_BUF_SZ 0xffff
+
+#define MBIM_NDP16_SIGN_MASK 0x00ffffff
+
+#define MHI_MBIM_LINK_HASH_SIZE 8
+#define LINK_HASH(session) ((session) % MHI_MBIM_LINK_HASH_SIZE)
+
+struct mhi_mbim_link {
+ struct mhi_mbim_context *mbim;
+ struct net_device *ndev;
+ unsigned int session;
+
+ /* stats */
+ u64_stats_t rx_packets;
+ u64_stats_t rx_bytes;
+ u64_stats_t rx_errors;
+ u64_stats_t tx_packets;
+ u64_stats_t tx_bytes;
+ u64_stats_t tx_errors;
+ u64_stats_t tx_dropped;
+ struct u64_stats_sync tx_syncp;
+ struct u64_stats_sync rx_syncp;
+
+ struct hlist_node hlnode;
+};
+
+struct mhi_mbim_context {
+ struct mhi_device *mdev;
+ struct sk_buff *skbagg_head;
+ struct sk_buff *skbagg_tail;
+ unsigned int mru;
+ u32 rx_queue_sz;
+ u16 rx_seq;
+ u16 tx_seq;
+ struct delayed_work rx_refill;
+ spinlock_t tx_lock;
+ struct hlist_head link_list[MHI_MBIM_LINK_HASH_SIZE];
+};
+
+struct mbim_tx_hdr {
+ struct usb_cdc_ncm_nth16 nth16;
+ struct usb_cdc_ncm_ndp16 ndp16;
+ struct usb_cdc_ncm_dpe16 dpe16[2];
+} __packed;
+
+static struct mhi_mbim_link *mhi_mbim_get_link_rcu(struct mhi_mbim_context *mbim,
+ unsigned int session)
+{
+ struct mhi_mbim_link *link;
+
+ hlist_for_each_entry_rcu(link, &mbim->link_list[LINK_HASH(session)], hlnode) {
+ if (link->session == session)
+ return link;
+ }
+
+ return NULL;
+}
+
+static struct sk_buff *mbim_tx_fixup(struct sk_buff *skb, unsigned int session,
+ u16 tx_seq)
+{
+ unsigned int dgram_size = skb->len;
+ struct usb_cdc_ncm_nth16 *nth16;
+ struct usb_cdc_ncm_ndp16 *ndp16;
+ struct mbim_tx_hdr *mbim_hdr;
+
+ /* Only one NDP is sent, containing the IP packet (no aggregation) */
+
+ /* Ensure we have enough headroom for crafting MBIM header */
+ if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) {
+ dev_kfree_skb_any(skb);
+ return NULL;
+ }
+
+ mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr));
+
+ /* Fill NTB header */
+ nth16 = &mbim_hdr->nth16;
+ nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN);
+ nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
+ nth16->wSequence = cpu_to_le16(tx_seq);
+ nth16->wBlockLength = cpu_to_le16(skb->len);
+ nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
+
+ /* Fill the unique NDP */
+ ndp16 = &mbim_hdr->ndp16;
+ ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN | (session << 24));
+ ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16)
+ + sizeof(struct usb_cdc_ncm_dpe16) * 2);
+ ndp16->wNextNdpIndex = 0;
+
+ /* Datagram follows the mbim header */
+ ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr));
+ ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size);
+
+ /* null termination */
+ ndp16->dpe16[1].wDatagramIndex = 0;
+ ndp16->dpe16[1].wDatagramLength = 0;
+
+ return skb;
+}
+
+static netdev_tx_t mhi_mbim_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+ struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+ struct mhi_mbim_context *mbim = link->mbim;
+ unsigned long flags;
+ int err = -ENOMEM;
+
+ /* Serialize MHI channel queuing and MBIM seq */
+ spin_lock_irqsave(&mbim->tx_lock, flags);
+
+ skb = mbim_tx_fixup(skb, link->session, mbim->tx_seq);
+ if (unlikely(!skb))
+ goto exit_unlock;
+
+ err = mhi_queue_skb(mbim->mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT);
+
+ if (mhi_queue_is_full(mbim->mdev, DMA_TO_DEVICE))
+ netif_stop_queue(ndev);
+
+ if (!err)
+ mbim->tx_seq++;
+
+exit_unlock:
+ spin_unlock_irqrestore(&mbim->tx_lock, flags);
+
+ if (unlikely(err)) {
+ net_err_ratelimited("%s: Failed to queue TX buf (%d)\n",
+ ndev->name, err);
+ dev_kfree_skb_any(skb);
+ goto exit_drop;
+ }
+
+ return NETDEV_TX_OK;
+
+exit_drop:
+ u64_stats_update_begin(&link->tx_syncp);
+ u64_stats_inc(&link->tx_dropped);
+ u64_stats_update_end(&link->tx_syncp);
+
+ return NETDEV_TX_OK;
+}
+
+static int mbim_rx_verify_nth16(struct mhi_mbim_context *mbim, struct sk_buff *skb)
+{
+ struct usb_cdc_ncm_nth16 *nth16;
+ int len;
+
+ if (skb->len < sizeof(struct usb_cdc_ncm_nth16) +
+ sizeof(struct usb_cdc_ncm_ndp16)) {
+ net_err_ratelimited("frame too short\n");
+ return -EINVAL;
+ }
+
+ nth16 = (struct usb_cdc_ncm_nth16 *)skb->data;
+
+ if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) {
+ net_err_ratelimited("invalid NTH16 signature <%#010x>\n",
+ le32_to_cpu(nth16->dwSignature));
+ return -EINVAL;
+ }
+
+ /* No limit on the block length, except the size of the data pkt */
+ len = le16_to_cpu(nth16->wBlockLength);
+ if (len > skb->len) {
+ net_err_ratelimited("NTB does not fit into the skb %u/%u\n",
+ len, skb->len);
+ return -EINVAL;
+ }
+
+ if (mbim->rx_seq + 1 != le16_to_cpu(nth16->wSequence) &&
+ (mbim->rx_seq || le16_to_cpu(nth16->wSequence)) &&
+ !(mbim->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) {
+ net_err_ratelimited("sequence number glitch prev=%d curr=%d\n",
+ mbim->rx_seq, le16_to_cpu(nth16->wSequence));
+ }
+ mbim->rx_seq = le16_to_cpu(nth16->wSequence);
+
+ return le16_to_cpu(nth16->wNdpIndex);
+}
+
+static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16)
+{
+ int ret;
+
+ if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) {
+ net_err_ratelimited("invalid DPT16 length <%u>\n",
+ le16_to_cpu(ndp16->wLength));
+ return -EINVAL;
+ }
+
+ ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16))
+ / sizeof(struct usb_cdc_ncm_dpe16));
+ ret--; /* Last entry is always a NULL terminator */
+
+ if (sizeof(struct usb_cdc_ncm_ndp16) +
+ ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) {
+ net_err_ratelimited("Invalid nframes = %d\n", ret);
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static void mhi_mbim_rx(struct mhi_mbim_context *mbim, struct sk_buff *skb)
+{
+ int ndpoffset;
+
+ /* Check NTB header and retrieve first NDP offset */
+ ndpoffset = mbim_rx_verify_nth16(mbim, skb);
+ if (ndpoffset < 0) {
+ net_err_ratelimited("mbim: Incorrect NTB header\n");
+ goto error;
+ }
+
+ /* Process each NDP */
+ while (1) {
+ struct usb_cdc_ncm_ndp16 ndp16;
+ struct usb_cdc_ncm_dpe16 dpe16;
+ struct mhi_mbim_link *link;
+ int nframes, n, dpeoffset;
+ unsigned int session;
+
+ if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) {
+ net_err_ratelimited("mbim: Incorrect NDP offset (%u)\n",
+ ndpoffset);
+ goto error;
+ }
+
+ /* Check NDP header and retrieve number of datagrams */
+ nframes = mbim_rx_verify_ndp16(skb, &ndp16);
+ if (nframes < 0) {
+ net_err_ratelimited("mbim: Incorrect NDP16\n");
+ goto error;
+ }
+
+ /* Only IP data type supported, no DSS in MHI context */
+ if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK))
+ != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) {
+ net_err_ratelimited("mbim: Unsupported NDP type\n");
+ goto next_ndp;
+ }
+
+ session = (le32_to_cpu(ndp16.dwSignature) & ~MBIM_NDP16_SIGN_MASK) >> 24;
+
+ rcu_read_lock();
+
+ link = mhi_mbim_get_link_rcu(mbim, session);
+ if (!link) {
+ net_err_ratelimited("mbim: bad packet session (%u)\n", session);
+ goto unlock;
+ }
+
+ /* de-aggregate and deliver IP packets */
+ dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16);
+ for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) {
+ u16 dgram_offset, dgram_len;
+ struct sk_buff *skbn;
+
+ if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16)))
+ break;
+
+ dgram_offset = le16_to_cpu(dpe16.wDatagramIndex);
+ dgram_len = le16_to_cpu(dpe16.wDatagramLength);
+
+ if (!dgram_offset || !dgram_len)
+ break; /* null terminator */
+
+ skbn = netdev_alloc_skb(link->ndev, dgram_len);
+ if (!skbn)
+ continue;
+
+ skb_put(skbn, dgram_len);
+ skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len);
+
+ switch (skbn->data[0] & 0xf0) {
+ case 0x40:
+ skbn->protocol = htons(ETH_P_IP);
+ break;
+ case 0x60:
+ skbn->protocol = htons(ETH_P_IPV6);
+ break;
+ default:
+ net_err_ratelimited("%s: unknown protocol\n",
+ link->ndev->name);
+ dev_kfree_skb_any(skbn);
+ u64_stats_update_begin(&link->rx_syncp);
+ u64_stats_inc(&link->rx_errors);
+ u64_stats_update_end(&link->rx_syncp);
+ continue;
+ }
+
+ u64_stats_update_begin(&link->rx_syncp);
+ u64_stats_inc(&link->rx_packets);
+ u64_stats_add(&link->rx_bytes, skbn->len);
+ u64_stats_update_end(&link->rx_syncp);
+
+ netif_rx(skbn);
+ }
+unlock:
+ rcu_read_unlock();
+next_ndp:
+ /* Other NDP to process? */
+ ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex);
+ if (!ndpoffset)
+ break;
+ }
+
+ /* free skb */
+ dev_consume_skb_any(skb);
+ return;
+error:
+ dev_kfree_skb_any(skb);
+}
+
+static struct sk_buff *mhi_net_skb_agg(struct mhi_mbim_context *mbim,
+ struct sk_buff *skb)
+{
+ struct sk_buff *head = mbim->skbagg_head;
+ struct sk_buff *tail = mbim->skbagg_tail;
+
+ /* This is non-paged skb chaining using frag_list */
+ if (!head) {
+ mbim->skbagg_head = skb;
+ return skb;
+ }
+
+ if (!skb_shinfo(head)->frag_list)
+ skb_shinfo(head)->frag_list = skb;
+ else
+ tail->next = skb;
+
+ head->len += skb->len;
+ head->data_len += skb->len;
+ head->truesize += skb->truesize;
+
+ mbim->skbagg_tail = skb;
+
+ return mbim->skbagg_head;
+}
+
+static void mhi_net_rx_refill_work(struct work_struct *work)
+{
+ struct mhi_mbim_context *mbim = container_of(work, struct mhi_mbim_context,
+ rx_refill.work);
+ struct mhi_device *mdev = mbim->mdev;
+ int err;
+
+ while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
+ struct sk_buff *skb = alloc_skb(MHI_DEFAULT_MRU, GFP_KERNEL);
+
+ if (unlikely(!skb))
+ break;
+
+ err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb,
+ MHI_DEFAULT_MRU, MHI_EOT);
+ if (unlikely(err)) {
+ kfree_skb(skb);
+ break;
+ }
+
+ /* Do not hog the CPU if rx buffers are consumed faster than
+ * queued (unlikely).
+ */
+ cond_resched();
+ }
+
+ /* If we're still starved of rx buffers, reschedule later */
+ if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mbim->rx_queue_sz)
+ schedule_delayed_work(&mbim->rx_refill, HZ / 2);
+}
+
+static void mhi_mbim_dl_callback(struct mhi_device *mhi_dev,
+ struct mhi_result *mhi_res)
+{
+ struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev);
+ struct sk_buff *skb = mhi_res->buf_addr;
+ int free_desc_count;
+
+ free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+ if (unlikely(mhi_res->transaction_status)) {
+ switch (mhi_res->transaction_status) {
+ case -EOVERFLOW:
+ /* Packet has been split over multiple transfers */
+ skb_put(skb, mhi_res->bytes_xferd);
+ mhi_net_skb_agg(mbim, skb);
+ break;
+ case -ENOTCONN:
+ /* MHI layer stopping/resetting the DL channel */
+ dev_kfree_skb_any(skb);
+ return;
+ default:
+ /* Unknown error, simply drop */
+ dev_kfree_skb_any(skb);
+ }
+ } else {
+ skb_put(skb, mhi_res->bytes_xferd);
+
+ if (mbim->skbagg_head) {
+ /* Aggregate the final fragment */
+ skb = mhi_net_skb_agg(mbim, skb);
+ mbim->skbagg_head = NULL;
+ }
+
+ mhi_mbim_rx(mbim, skb);
+ }
+
+ /* Refill if RX buffers queue becomes low */
+ if (free_desc_count >= mbim->rx_queue_sz / 2)
+ schedule_delayed_work(&mbim->rx_refill, 0);
+}
+
+static void mhi_mbim_ndo_get_stats64(struct net_device *ndev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&link->rx_syncp);
+ stats->rx_packets = u64_stats_read(&link->rx_packets);
+ stats->rx_bytes = u64_stats_read(&link->rx_bytes);
+ stats->rx_errors = u64_stats_read(&link->rx_errors);
+ } while (u64_stats_fetch_retry_irq(&link->rx_syncp, start));
+
+ do {
+ start = u64_stats_fetch_begin_irq(&link->tx_syncp);
+ stats->tx_packets = u64_stats_read(&link->tx_packets);
+ stats->tx_bytes = u64_stats_read(&link->tx_bytes);
+ stats->tx_errors = u64_stats_read(&link->tx_errors);
+ stats->tx_dropped = u64_stats_read(&link->tx_dropped);
+ } while (u64_stats_fetch_retry_irq(&link->tx_syncp, start));
+}
+
+static void mhi_mbim_ul_callback(struct mhi_device *mhi_dev,
+ struct mhi_result *mhi_res)
+{
+ struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev);
+ struct sk_buff *skb = mhi_res->buf_addr;
+ struct net_device *ndev = skb->dev;
+ struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+
+ /* Hardware has consumed the buffer, so free the skb (which is not
+ * freed by the MHI stack) and perform accounting.
+ */
+ dev_consume_skb_any(skb);
+
+ u64_stats_update_begin(&link->tx_syncp);
+ if (unlikely(mhi_res->transaction_status)) {
+ /* MHI layer stopping/resetting the UL channel */
+ if (mhi_res->transaction_status == -ENOTCONN) {
+ u64_stats_update_end(&link->tx_syncp);
+ return;
+ }
+
+ u64_stats_inc(&link->tx_errors);
+ } else {
+ u64_stats_inc(&link->tx_packets);
+ u64_stats_add(&link->tx_bytes, mhi_res->bytes_xferd);
+ }
+ u64_stats_update_end(&link->tx_syncp);
+
+ if (netif_queue_stopped(ndev) && !mhi_queue_is_full(mbim->mdev, DMA_TO_DEVICE))
+ netif_wake_queue(ndev);
+}
+
+static int mhi_mbim_ndo_open(struct net_device *ndev)
+{
+ struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+
+ /* Feed the MHI rx buffer pool */
+ schedule_delayed_work(&link->mbim->rx_refill, 0);
+
+ /* Carrier is established via out-of-band channel (e.g. qmi) */
+ netif_carrier_on(ndev);
+
+ netif_start_queue(ndev);
+
+ return 0;
+}
+
+static int mhi_mbim_ndo_stop(struct net_device *ndev)
+{
+ netif_stop_queue(ndev);
+ netif_carrier_off(ndev);
+
+ return 0;
+}
+
+static const struct net_device_ops mhi_mbim_ndo = {
+ .ndo_open = mhi_mbim_ndo_open,
+ .ndo_stop = mhi_mbim_ndo_stop,
+ .ndo_start_xmit = mhi_mbim_ndo_xmit,
+ .ndo_get_stats64 = mhi_mbim_ndo_get_stats64,
+};
+
+static int mhi_mbim_newlink(void *ctxt, struct net_device *ndev, u32 if_id,
+ struct netlink_ext_ack *extack)
+{
+ struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+ struct mhi_mbim_context *mbim = ctxt;
+
+ link->session = if_id;
+ link->mbim = mbim;
+ link->ndev = ndev;
+ u64_stats_init(&link->rx_syncp);
+ u64_stats_init(&link->tx_syncp);
+
+ rcu_read_lock();
+ if (mhi_mbim_get_link_rcu(mbim, if_id)) {
+ rcu_read_unlock();
+ return -EEXIST;
+ }
+ rcu_read_unlock();
+
+ /* Already protected by RTNL lock */
+ hlist_add_head_rcu(&link->hlnode, &mbim->link_list[LINK_HASH(if_id)]);
+
+ return register_netdevice(ndev);
+}
+
+static void mhi_mbim_dellink(void *ctxt, struct net_device *ndev,
+ struct list_head *head)
+{
+ struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+
+ hlist_del_init_rcu(&link->hlnode);
+ synchronize_rcu();
+
+ unregister_netdevice_queue(ndev, head);
+}
+
+static void mhi_mbim_setup(struct net_device *ndev)
+{
+ ndev->header_ops = NULL; /* No header */
+ ndev->type = ARPHRD_RAWIP;
+ ndev->needed_headroom = sizeof(struct mbim_tx_hdr);
+ ndev->hard_header_len = 0;
+ ndev->addr_len = 0;
+ ndev->flags = IFF_POINTOPOINT | IFF_NOARP;
+ ndev->netdev_ops = &mhi_mbim_ndo;
+ ndev->mtu = MHI_MBIM_DEFAULT_MTU;
+ ndev->min_mtu = ETH_MIN_MTU;
+ ndev->max_mtu = MHI_MAX_BUF_SZ - ndev->needed_headroom;
+ ndev->tx_queue_len = 1000;
+}
+
+static const struct wwan_ops mhi_mbim_wwan_ops = {
+ .priv_size = sizeof(struct mhi_mbim_link),
+ .setup = mhi_mbim_setup,
+ .newlink = mhi_mbim_newlink,
+ .dellink = mhi_mbim_dellink,
+};
+
+static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+ struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
+ struct mhi_mbim_context *mbim;
+ int err;
+
+ mbim = devm_kzalloc(&mhi_dev->dev, sizeof(*mbim), GFP_KERNEL);
+ if (!mbim)
+ return -ENOMEM;
+
+ spin_lock_init(&mbim->tx_lock);
+ dev_set_drvdata(&mhi_dev->dev, mbim);
+ mbim->mdev = mhi_dev;
+ mbim->mru = mhi_dev->mhi_cntrl->mru ? mhi_dev->mhi_cntrl->mru : MHI_DEFAULT_MRU;
+
+ INIT_DELAYED_WORK(&mbim->rx_refill, mhi_net_rx_refill_work);
+
+ /* Start MHI channels */
+ err = mhi_prepare_for_transfer(mhi_dev);
+ if (err)
+ return err;
+
+ /* Number of transfer descriptors determines size of the queue */
+ mbim->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+ /* Register wwan link ops with MHI controller representing WWAN instance */
+ return wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_mbim_wwan_ops, mbim, 0);
+}
+
+static void mhi_mbim_remove(struct mhi_device *mhi_dev)
+{
+ struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev);
+ struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
+
+ mhi_unprepare_from_transfer(mhi_dev);
+ cancel_delayed_work_sync(&mbim->rx_refill);
+ wwan_unregister_ops(&cntrl->mhi_dev->dev);
+ kfree_skb(mbim->skbagg_head);
+ dev_set_drvdata(&mhi_dev->dev, NULL);
+}
+
+static const struct mhi_device_id mhi_mbim_id_table[] = {
+ /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */
+ { .chan = "IP_HW0_MBIM", .driver_data = 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(mhi, mhi_mbim_id_table);
+
+static struct mhi_driver mhi_mbim_driver = {
+ .probe = mhi_mbim_probe,
+ .remove = mhi_mbim_remove,
+ .dl_xfer_cb = mhi_mbim_dl_callback,
+ .ul_xfer_cb = mhi_mbim_ul_callback,
+ .id_table = mhi_mbim_id_table,
+ .driver = {
+ .name = "mhi_wwan_mbim",
+ .owner = THIS_MODULE,
+ },
+};
+
+module_mhi_driver(mhi_mbim_driver);
+
+MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
+MODULE_DESCRIPTION("Network/MBIM over MHI");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 674a81d79db3..d293ab688044 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -164,11 +164,14 @@ static struct wwan_device *wwan_create_dev(struct device *parent)
goto done_unlock;
id = ida_alloc(&wwan_dev_ids, GFP_KERNEL);
- if (id < 0)
+ if (id < 0) {
+ wwandev = ERR_PTR(id);
goto done_unlock;
+ }
wwandev = kzalloc(sizeof(*wwandev), GFP_KERNEL);
if (!wwandev) {
+ wwandev = ERR_PTR(-ENOMEM);
ida_free(&wwan_dev_ids, id);
goto done_unlock;
}
@@ -182,7 +185,8 @@ static struct wwan_device *wwan_create_dev(struct device *parent)
err = device_register(&wwandev->dev);
if (err) {
put_device(&wwandev->dev);
- wwandev = NULL;
+ wwandev = ERR_PTR(err);
+ goto done_unlock;
}
done_unlock:
@@ -355,8 +359,8 @@ struct wwan_port *wwan_create_port(struct device *parent,
{
struct wwan_device *wwandev;
struct wwan_port *port;
- int minor, err = -ENOMEM;
char namefmt[0x20];
+ int minor, err;
if (type > WWAN_PORT_MAX || !ops)
return ERR_PTR(-EINVAL);
@@ -370,11 +374,14 @@ struct wwan_port *wwan_create_port(struct device *parent,
/* A port is exposed as character device, get a minor */
minor = ida_alloc_range(&minors, 0, WWAN_MAX_MINORS - 1, GFP_KERNEL);
- if (minor < 0)
+ if (minor < 0) {
+ err = minor;
goto error_wwandev_remove;
+ }
port = kzalloc(sizeof(*port), GFP_KERNEL);
if (!port) {
+ err = -ENOMEM;
ida_free(&minors, minor);
goto error_wwandev_remove;
}
@@ -1014,8 +1021,8 @@ int wwan_register_ops(struct device *parent, const struct wwan_ops *ops,
return -EINVAL;
wwandev = wwan_create_dev(parent);
- if (!wwandev)
- return -ENOMEM;
+ if (IS_ERR(wwandev))
+ return PTR_ERR(wwandev);
if (WARN_ON(wwandev->ops)) {
wwan_remove_dev(wwandev);
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 44275908d61a..e31b98403f31 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -126,21 +126,17 @@ struct netfront_queue {
/*
* {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
- * are linked from tx_skb_freelist through skb_entry.link.
- *
- * NB. Freelist index entries are always going to be less than
- * PAGE_OFFSET, whereas pointers to skbs will always be equal or
- * greater than PAGE_OFFSET: we use this property to distinguish
- * them.
+ * are linked from tx_skb_freelist through tx_link.
*/
- union skb_entry {
- struct sk_buff *skb;
- unsigned long link;
- } tx_skbs[NET_TX_RING_SIZE];
+ struct sk_buff *tx_skbs[NET_TX_RING_SIZE];
+ unsigned short tx_link[NET_TX_RING_SIZE];
+#define TX_LINK_NONE 0xffff
+#define TX_PENDING 0xfffe
grant_ref_t gref_tx_head;
grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
struct page *grant_tx_page[NET_TX_RING_SIZE];
unsigned tx_skb_freelist;
+ unsigned int tx_pend_queue;
spinlock_t rx_lock ____cacheline_aligned_in_smp;
struct xen_netif_rx_front_ring rx;
@@ -173,6 +169,9 @@ struct netfront_info {
bool netback_has_xdp_headroom;
bool netfront_xdp_enabled;
+ /* Is device behaving sane? */
+ bool broken;
+
atomic_t rx_gso_checksum_fixup;
};
@@ -181,33 +180,25 @@ struct netfront_rx_info {
struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
};
-static void skb_entry_set_link(union skb_entry *list, unsigned short id)
-{
- list->link = id;
-}
-
-static int skb_entry_is_link(const union skb_entry *list)
-{
- BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link));
- return (unsigned long)list->skb < PAGE_OFFSET;
-}
-
/*
* Access macros for acquiring freeing slots in tx_skbs[].
*/
-static void add_id_to_freelist(unsigned *head, union skb_entry *list,
- unsigned short id)
+static void add_id_to_list(unsigned *head, unsigned short *list,
+ unsigned short id)
{
- skb_entry_set_link(&list[id], *head);
+ list[id] = *head;
*head = id;
}
-static unsigned short get_id_from_freelist(unsigned *head,
- union skb_entry *list)
+static unsigned short get_id_from_list(unsigned *head, unsigned short *list)
{
unsigned int id = *head;
- *head = list[id].link;
+
+ if (id != TX_LINK_NONE) {
+ *head = list[id];
+ list[id] = TX_LINK_NONE;
+ }
return id;
}
@@ -363,7 +354,7 @@ static int xennet_open(struct net_device *dev)
unsigned int i = 0;
struct netfront_queue *queue = NULL;
- if (!np->queues)
+ if (!np->queues || np->broken)
return -ENODEV;
for (i = 0; i < num_queues; ++i) {
@@ -391,27 +382,47 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
unsigned short id;
struct sk_buff *skb;
bool more_to_do;
+ const struct device *dev = &queue->info->netdev->dev;
BUG_ON(!netif_carrier_ok(queue->info->netdev));
do {
prod = queue->tx.sring->rsp_prod;
+ if (RING_RESPONSE_PROD_OVERFLOW(&queue->tx, prod)) {
+ dev_alert(dev, "Illegal number of responses %u\n",
+ prod - queue->tx.rsp_cons);
+ goto err;
+ }
rmb(); /* Ensure we see responses up to 'rp'. */
for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
- struct xen_netif_tx_response *txrsp;
+ struct xen_netif_tx_response txrsp;
- txrsp = RING_GET_RESPONSE(&queue->tx, cons);
- if (txrsp->status == XEN_NETIF_RSP_NULL)
+ RING_COPY_RESPONSE(&queue->tx, cons, &txrsp);
+ if (txrsp.status == XEN_NETIF_RSP_NULL)
continue;
- id = txrsp->id;
- skb = queue->tx_skbs[id].skb;
+ id = txrsp.id;
+ if (id >= RING_SIZE(&queue->tx)) {
+ dev_alert(dev,
+ "Response has incorrect id (%u)\n",
+ id);
+ goto err;
+ }
+ if (queue->tx_link[id] != TX_PENDING) {
+ dev_alert(dev,
+ "Response for inactive request\n");
+ goto err;
+ }
+
+ queue->tx_link[id] = TX_LINK_NONE;
+ skb = queue->tx_skbs[id];
+ queue->tx_skbs[id] = NULL;
if (unlikely(gnttab_query_foreign_access(
queue->grant_tx_ref[id]) != 0)) {
- pr_alert("%s: warning -- grant still in use by backend domain\n",
- __func__);
- BUG();
+ dev_alert(dev,
+ "Grant still in use by backend domain\n");
+ goto err;
}
gnttab_end_foreign_access_ref(
queue->grant_tx_ref[id], GNTMAP_readonly);
@@ -419,7 +430,7 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
&queue->gref_tx_head, queue->grant_tx_ref[id]);
queue->grant_tx_ref[id] = GRANT_INVALID_REF;
queue->grant_tx_page[id] = NULL;
- add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, id);
+ add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, id);
dev_kfree_skb_irq(skb);
}
@@ -429,13 +440,20 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
} while (more_to_do);
xennet_maybe_wake_tx(queue);
+
+ return;
+
+ err:
+ queue->info->broken = true;
+ dev_alert(dev, "Disabled for further use\n");
}
struct xennet_gnttab_make_txreq {
struct netfront_queue *queue;
struct sk_buff *skb;
struct page *page;
- struct xen_netif_tx_request *tx; /* Last request */
+ struct xen_netif_tx_request *tx; /* Last request on ring page */
+ struct xen_netif_tx_request tx_local; /* Last request local copy*/
unsigned int size;
};
@@ -451,7 +469,7 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset,
struct netfront_queue *queue = info->queue;
struct sk_buff *skb = info->skb;
- id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs);
+ id = get_id_from_list(&queue->tx_skb_freelist, queue->tx_link);
tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
@@ -459,34 +477,37 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset,
gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
gfn, GNTMAP_readonly);
- queue->tx_skbs[id].skb = skb;
+ queue->tx_skbs[id] = skb;
queue->grant_tx_page[id] = page;
queue->grant_tx_ref[id] = ref;
- tx->id = id;
- tx->gref = ref;
- tx->offset = offset;
- tx->size = len;
- tx->flags = 0;
+ info->tx_local.id = id;
+ info->tx_local.gref = ref;
+ info->tx_local.offset = offset;
+ info->tx_local.size = len;
+ info->tx_local.flags = 0;
+
+ *tx = info->tx_local;
+
+ /*
+ * Put the request in the pending queue, it will be set to be pending
+ * when the producer index is about to be raised.
+ */
+ add_id_to_list(&queue->tx_pend_queue, queue->tx_link, id);
info->tx = tx;
- info->size += tx->size;
+ info->size += info->tx_local.size;
}
static struct xen_netif_tx_request *xennet_make_first_txreq(
- struct netfront_queue *queue, struct sk_buff *skb,
- struct page *page, unsigned int offset, unsigned int len)
+ struct xennet_gnttab_make_txreq *info,
+ unsigned int offset, unsigned int len)
{
- struct xennet_gnttab_make_txreq info = {
- .queue = queue,
- .skb = skb,
- .page = page,
- .size = 0,
- };
+ info->size = 0;
- gnttab_for_one_grant(page, offset, len, xennet_tx_setup_grant, &info);
+ gnttab_for_one_grant(info->page, offset, len, xennet_tx_setup_grant, info);
- return info.tx;
+ return info->tx;
}
static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset,
@@ -499,35 +520,27 @@ static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset,
xennet_tx_setup_grant(gfn, offset, len, data);
}
-static struct xen_netif_tx_request *xennet_make_txreqs(
- struct netfront_queue *queue, struct xen_netif_tx_request *tx,
- struct sk_buff *skb, struct page *page,
+static void xennet_make_txreqs(
+ struct xennet_gnttab_make_txreq *info,
+ struct page *page,
unsigned int offset, unsigned int len)
{
- struct xennet_gnttab_make_txreq info = {
- .queue = queue,
- .skb = skb,
- .tx = tx,
- };
-
/* Skip unused frames from start of page */
page += offset >> PAGE_SHIFT;
offset &= ~PAGE_MASK;
while (len) {
- info.page = page;
- info.size = 0;
+ info->page = page;
+ info->size = 0;
gnttab_foreach_grant_in_range(page, offset, len,
xennet_make_one_txreq,
- &info);
+ info);
page++;
offset = 0;
- len -= info.size;
+ len -= info->size;
}
-
- return info.tx;
}
/*
@@ -574,19 +587,34 @@ static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
return queue_idx;
}
+static void xennet_mark_tx_pending(struct netfront_queue *queue)
+{
+ unsigned int i;
+
+ while ((i = get_id_from_list(&queue->tx_pend_queue, queue->tx_link)) !=
+ TX_LINK_NONE)
+ queue->tx_link[i] = TX_PENDING;
+}
+
static int xennet_xdp_xmit_one(struct net_device *dev,
struct netfront_queue *queue,
struct xdp_frame *xdpf)
{
struct netfront_info *np = netdev_priv(dev);
struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
+ struct xennet_gnttab_make_txreq info = {
+ .queue = queue,
+ .skb = NULL,
+ .page = virt_to_page(xdpf->data),
+ };
int notify;
- xennet_make_first_txreq(queue, NULL,
- virt_to_page(xdpf->data),
+ xennet_make_first_txreq(&info,
offset_in_page(xdpf->data),
xdpf->len);
+ xennet_mark_tx_pending(queue);
+
RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
if (notify)
notify_remote_via_irq(queue->tx_irq);
@@ -611,6 +639,8 @@ static int xennet_xdp_xmit(struct net_device *dev, int n,
int nxmit = 0;
int i;
+ if (unlikely(np->broken))
+ return -ENODEV;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
return -EINVAL;
@@ -638,7 +668,7 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
{
struct netfront_info *np = netdev_priv(dev);
struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
- struct xen_netif_tx_request *tx, *first_tx;
+ struct xen_netif_tx_request *first_tx;
unsigned int i;
int notify;
int slots;
@@ -647,6 +677,7 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
unsigned int len;
unsigned long flags;
struct netfront_queue *queue = NULL;
+ struct xennet_gnttab_make_txreq info = { };
unsigned int num_queues = dev->real_num_tx_queues;
u16 queue_index;
struct sk_buff *nskb;
@@ -654,6 +685,8 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
/* Drop the packet if no queues are set up */
if (num_queues < 1)
goto drop;
+ if (unlikely(np->broken))
+ goto drop;
/* Determine which queue to transmit this SKB on */
queue_index = skb_get_queue_mapping(skb);
queue = &np->queues[queue_index];
@@ -704,21 +737,24 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
}
/* First request for the linear area. */
- first_tx = tx = xennet_make_first_txreq(queue, skb,
- page, offset, len);
- offset += tx->size;
+ info.queue = queue;
+ info.skb = skb;
+ info.page = page;
+ first_tx = xennet_make_first_txreq(&info, offset, len);
+ offset += info.tx_local.size;
if (offset == PAGE_SIZE) {
page++;
offset = 0;
}
- len -= tx->size;
+ len -= info.tx_local.size;
if (skb->ip_summed == CHECKSUM_PARTIAL)
/* local packet? */
- tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
+ first_tx->flags |= XEN_NETTXF_csum_blank |
+ XEN_NETTXF_data_validated;
else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
/* remote but checksummed. */
- tx->flags |= XEN_NETTXF_data_validated;
+ first_tx->flags |= XEN_NETTXF_data_validated;
/* Optional extra info after the first request. */
if (skb_shinfo(skb)->gso_size) {
@@ -727,7 +763,7 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
gso = (struct xen_netif_extra_info *)
RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
- tx->flags |= XEN_NETTXF_extra_info;
+ first_tx->flags |= XEN_NETTXF_extra_info;
gso->u.gso.size = skb_shinfo(skb)->gso_size;
gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
@@ -741,12 +777,12 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
}
/* Requests for the rest of the linear area. */
- tx = xennet_make_txreqs(queue, tx, skb, page, offset, len);
+ xennet_make_txreqs(&info, page, offset, len);
/* Requests for all the frags. */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- tx = xennet_make_txreqs(queue, tx, skb, skb_frag_page(frag),
+ xennet_make_txreqs(&info, skb_frag_page(frag),
skb_frag_off(frag),
skb_frag_size(frag));
}
@@ -757,6 +793,8 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
/* timestamp packet in software */
skb_tx_timestamp(skb);
+ xennet_mark_tx_pending(queue);
+
RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
if (notify)
notify_remote_via_irq(queue->tx_irq);
@@ -814,7 +852,7 @@ static int xennet_get_extras(struct netfront_queue *queue,
RING_IDX rp)
{
- struct xen_netif_extra_info *extra;
+ struct xen_netif_extra_info extra;
struct device *dev = &queue->info->netdev->dev;
RING_IDX cons = queue->rx.rsp_cons;
int err = 0;
@@ -830,24 +868,22 @@ static int xennet_get_extras(struct netfront_queue *queue,
break;
}
- extra = (struct xen_netif_extra_info *)
- RING_GET_RESPONSE(&queue->rx, ++cons);
+ RING_COPY_RESPONSE(&queue->rx, ++cons, &extra);
- if (unlikely(!extra->type ||
- extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+ if (unlikely(!extra.type ||
+ extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
if (net_ratelimit())
dev_warn(dev, "Invalid extra type: %d\n",
- extra->type);
+ extra.type);
err = -EINVAL;
} else {
- memcpy(&extras[extra->type - 1], extra,
- sizeof(*extra));
+ extras[extra.type - 1] = extra;
}
skb = xennet_get_rx_skb(queue, cons);
ref = xennet_get_rx_ref(queue, cons);
xennet_move_rx_slot(queue, skb, ref);
- } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
+ } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
queue->rx.rsp_cons = cons;
return err;
@@ -905,7 +941,7 @@ static int xennet_get_responses(struct netfront_queue *queue,
struct sk_buff_head *list,
bool *need_xdp_flush)
{
- struct xen_netif_rx_response *rx = &rinfo->rx;
+ struct xen_netif_rx_response *rx = &rinfo->rx, rx_local;
int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD);
RING_IDX cons = queue->rx.rsp_cons;
struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
@@ -989,7 +1025,8 @@ next:
break;
}
- rx = RING_GET_RESPONSE(&queue->rx, cons + slots);
+ RING_COPY_RESPONSE(&queue->rx, cons + slots, &rx_local);
+ rx = &rx_local;
skb = xennet_get_rx_skb(queue, cons + slots);
ref = xennet_get_rx_ref(queue, cons + slots);
slots++;
@@ -1044,10 +1081,11 @@ static int xennet_fill_frags(struct netfront_queue *queue,
struct sk_buff *nskb;
while ((nskb = __skb_dequeue(list))) {
- struct xen_netif_rx_response *rx =
- RING_GET_RESPONSE(&queue->rx, ++cons);
+ struct xen_netif_rx_response rx;
skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
+ RING_COPY_RESPONSE(&queue->rx, ++cons, &rx);
+
if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
@@ -1062,7 +1100,7 @@ static int xennet_fill_frags(struct netfront_queue *queue,
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
skb_frag_page(nfrag),
- rx->offset, rx->status, PAGE_SIZE);
+ rx.offset, rx.status, PAGE_SIZE);
skb_shinfo(nskb)->nr_frags = 0;
kfree_skb(nskb);
@@ -1156,12 +1194,19 @@ static int xennet_poll(struct napi_struct *napi, int budget)
skb_queue_head_init(&tmpq);
rp = queue->rx.sring->rsp_prod;
+ if (RING_RESPONSE_PROD_OVERFLOW(&queue->rx, rp)) {
+ dev_alert(&dev->dev, "Illegal number of responses %u\n",
+ rp - queue->rx.rsp_cons);
+ queue->info->broken = true;
+ spin_unlock(&queue->rx_lock);
+ return 0;
+ }
rmb(); /* Ensure we see queued responses up to 'rp'. */
i = queue->rx.rsp_cons;
work_done = 0;
while ((i != rp) && (work_done < budget)) {
- memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx));
+ RING_COPY_RESPONSE(&queue->rx, i, rx);
memset(extras, 0, sizeof(rinfo.extras));
err = xennet_get_responses(queue, &rinfo, rp, &tmpq,
@@ -1286,17 +1331,18 @@ static void xennet_release_tx_bufs(struct netfront_queue *queue)
for (i = 0; i < NET_TX_RING_SIZE; i++) {
/* Skip over entries which are actually freelist references */
- if (skb_entry_is_link(&queue->tx_skbs[i]))
+ if (!queue->tx_skbs[i])
continue;
- skb = queue->tx_skbs[i].skb;
+ skb = queue->tx_skbs[i];
+ queue->tx_skbs[i] = NULL;
get_page(queue->grant_tx_page[i]);
gnttab_end_foreign_access(queue->grant_tx_ref[i],
GNTMAP_readonly,
(unsigned long)page_address(queue->grant_tx_page[i]));
queue->grant_tx_page[i] = NULL;
queue->grant_tx_ref[i] = GRANT_INVALID_REF;
- add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, i);
+ add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, i);
dev_kfree_skb_irq(skb);
}
}
@@ -1376,6 +1422,9 @@ static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
struct netfront_queue *queue = dev_id;
unsigned long flags;
+ if (queue->info->broken)
+ return IRQ_HANDLED;
+
spin_lock_irqsave(&queue->tx_lock, flags);
xennet_tx_buf_gc(queue);
spin_unlock_irqrestore(&queue->tx_lock, flags);
@@ -1388,6 +1437,9 @@ static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
struct netfront_queue *queue = dev_id;
struct net_device *dev = queue->info->netdev;
+ if (queue->info->broken)
+ return IRQ_HANDLED;
+
if (likely(netif_carrier_ok(dev) &&
RING_HAS_UNCONSUMED_RESPONSES(&queue->rx)))
napi_schedule(&queue->napi);
@@ -1409,6 +1461,10 @@ static void xennet_poll_controller(struct net_device *dev)
struct netfront_info *info = netdev_priv(dev);
unsigned int num_queues = dev->real_num_tx_queues;
unsigned int i;
+
+ if (info->broken)
+ return;
+
for (i = 0; i < num_queues; ++i)
xennet_interrupt(0, &info->queues[i]);
}
@@ -1480,6 +1536,11 @@ static int xennet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
static int xennet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
+ struct netfront_info *np = netdev_priv(dev);
+
+ if (np->broken)
+ return -ENODEV;
+
switch (xdp->command) {
case XDP_SETUP_PROG:
return xennet_xdp_set(dev, xdp->prog, xdp->extack);
@@ -1853,13 +1914,15 @@ static int xennet_init_queue(struct netfront_queue *queue)
snprintf(queue->name, sizeof(queue->name), "vif%s-q%u",
devid, queue->id);
- /* Initialise tx_skbs as a free chain containing every entry. */
+ /* Initialise tx_skb_freelist as a free chain containing every entry. */
queue->tx_skb_freelist = 0;
+ queue->tx_pend_queue = TX_LINK_NONE;
for (i = 0; i < NET_TX_RING_SIZE; i++) {
- skb_entry_set_link(&queue->tx_skbs[i], i+1);
+ queue->tx_link[i] = i + 1;
queue->grant_tx_ref[i] = GRANT_INVALID_REF;
queue->grant_tx_page[i] = NULL;
}
+ queue->tx_link[NET_TX_RING_SIZE - 1] = TX_LINK_NONE;
/* Clear out rx_skbs */
for (i = 0; i < NET_RX_RING_SIZE; i++) {
@@ -2128,6 +2191,9 @@ static int talk_to_netback(struct xenbus_device *dev,
if (info->queues)
xennet_destroy_queues(info);
+ /* For the case of a reconnect reset the "broken" indicator. */
+ info->broken = false;
+
err = xennet_create_queues(info, &num_queues);
if (err < 0) {
xenbus_dev_fatal(dev, err, "creating queues");
diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
index 528745862738..c6b3334f24c9 100644
--- a/drivers/nfc/fdp/fdp.c
+++ b/drivers/nfc/fdp/fdp.c
@@ -38,7 +38,7 @@
#define NCI_OP_PROP_SET_PDATA_OID 0x23
struct fdp_nci_info {
- struct nfc_phy_ops *phy_ops;
+ const struct nfc_phy_ops *phy_ops;
struct fdp_i2c_phy *phy;
struct nci_dev *ndev;
@@ -52,7 +52,7 @@ struct fdp_nci_info {
u32 limited_otp_version;
u8 key_index;
- u8 *fw_vsc_cfg;
+ const u8 *fw_vsc_cfg;
u8 clock_type;
u32 clock_freq;
@@ -65,7 +65,7 @@ struct fdp_nci_info {
wait_queue_head_t setup_wq;
};
-static u8 nci_core_get_config_otp_ram_version[5] = {
+static const u8 nci_core_get_config_otp_ram_version[5] = {
0x04,
NCI_PARAM_ID_FW_RAM_VERSION,
NCI_PARAM_ID_FW_OTP_VERSION,
@@ -111,7 +111,7 @@ static inline int fdp_nci_patch_cmd(struct nci_dev *ndev, u8 type)
}
static inline int fdp_nci_set_production_data(struct nci_dev *ndev, u8 len,
- char *data)
+ const char *data)
{
return nci_prop_cmd(ndev, NCI_OP_PROP_SET_PDATA_OID, len, data);
}
@@ -236,7 +236,7 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type)
static int fdp_nci_open(struct nci_dev *ndev)
{
- struct fdp_nci_info *info = nci_get_drvdata(ndev);
+ const struct fdp_nci_info *info = nci_get_drvdata(ndev);
return info->phy_ops->enable(info->phy);
}
@@ -260,7 +260,7 @@ static int fdp_nci_request_firmware(struct nci_dev *ndev)
{
struct fdp_nci_info *info = nci_get_drvdata(ndev);
struct device *dev = &info->phy->i2c_dev->dev;
- u8 *data;
+ const u8 *data;
int r;
r = request_firmware(&info->ram_patch, FDP_RAM_PATCH_NAME, dev);
@@ -269,15 +269,15 @@ static int fdp_nci_request_firmware(struct nci_dev *ndev)
return r;
}
- data = (u8 *) info->ram_patch->data;
+ data = info->ram_patch->data;
info->ram_patch_version =
data[FDP_FW_HEADER_SIZE] |
(data[FDP_FW_HEADER_SIZE + 1] << 8) |
(data[FDP_FW_HEADER_SIZE + 2] << 16) |
(data[FDP_FW_HEADER_SIZE + 3] << 24);
- dev_dbg(dev, "RAM patch version: %d, size: %d\n",
- info->ram_patch_version, (int) info->ram_patch->size);
+ dev_dbg(dev, "RAM patch version: %d, size: %zu\n",
+ info->ram_patch_version, info->ram_patch->size);
r = request_firmware(&info->otp_patch, FDP_OTP_PATCH_NAME, dev);
@@ -293,8 +293,8 @@ static int fdp_nci_request_firmware(struct nci_dev *ndev)
(data[FDP_FW_HEADER_SIZE+2] << 16) |
(data[FDP_FW_HEADER_SIZE+3] << 24);
- dev_dbg(dev, "OTP patch version: %d, size: %d\n",
- info->otp_patch_version, (int) info->otp_patch->size);
+ dev_dbg(dev, "OTP patch version: %d, size: %zu\n",
+ info->otp_patch_version, info->otp_patch->size);
return 0;
}
@@ -610,8 +610,9 @@ static int fdp_nci_core_get_config_rsp_packet(struct nci_dev *ndev,
{
struct fdp_nci_info *info = nci_get_drvdata(ndev);
struct device *dev = &info->phy->i2c_dev->dev;
- struct nci_core_get_config_rsp *rsp = (void *) skb->data;
- u8 i, *p;
+ const struct nci_core_get_config_rsp *rsp = (void *) skb->data;
+ unsigned int i;
+ const u8 *p;
if (rsp->status == NCI_STATUS_OK) {
@@ -651,7 +652,7 @@ static int fdp_nci_core_get_config_rsp_packet(struct nci_dev *ndev,
return 0;
}
-static struct nci_driver_ops fdp_core_ops[] = {
+static const struct nci_driver_ops fdp_core_ops[] = {
{
.opcode = NCI_OP_CORE_GET_CONFIG_RSP,
.rsp = fdp_nci_core_get_config_rsp_packet,
@@ -662,7 +663,7 @@ static struct nci_driver_ops fdp_core_ops[] = {
},
};
-static struct nci_driver_ops fdp_prop_ops[] = {
+static const struct nci_driver_ops fdp_prop_ops[] = {
{
.opcode = nci_opcode_pack(NCI_GID_PROP, NCI_OP_PROP_PATCH_OID),
.rsp = fdp_nci_prop_patch_rsp_packet,
@@ -675,7 +676,7 @@ static struct nci_driver_ops fdp_prop_ops[] = {
},
};
-static struct nci_ops nci_ops = {
+static const struct nci_ops nci_ops = {
.open = fdp_nci_open,
.close = fdp_nci_close,
.send = fdp_nci_send,
@@ -687,10 +688,10 @@ static struct nci_ops nci_ops = {
.n_core_ops = ARRAY_SIZE(fdp_core_ops),
};
-int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops,
+int fdp_nci_probe(struct fdp_i2c_phy *phy, const struct nfc_phy_ops *phy_ops,
struct nci_dev **ndevp, int tx_headroom,
int tx_tailroom, u8 clock_type, u32 clock_freq,
- u8 *fw_vsc_cfg)
+ const u8 *fw_vsc_cfg)
{
struct device *dev = &phy->i2c_dev->dev;
struct fdp_nci_info *info;
@@ -718,6 +719,7 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops,
NFC_PROTO_NFC_DEP_MASK |
NFC_PROTO_ISO15693_MASK;
+ BUILD_BUG_ON(ARRAY_SIZE(fdp_prop_ops) > NCI_MAX_PROPRIETARY_CMD);
ndev = nci_allocate_device(&nci_ops, protocols, tx_headroom,
tx_tailroom);
if (!ndev) {
diff --git a/drivers/nfc/fdp/fdp.h b/drivers/nfc/fdp/fdp.h
index ead3b21ccae6..2e9161a4d7bf 100644
--- a/drivers/nfc/fdp/fdp.h
+++ b/drivers/nfc/fdp/fdp.h
@@ -21,9 +21,9 @@ struct fdp_i2c_phy {
uint16_t next_read_size;
};
-int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops,
+int fdp_nci_probe(struct fdp_i2c_phy *phy, const struct nfc_phy_ops *phy_ops,
struct nci_dev **ndev, int tx_headroom, int tx_tailroom,
- u8 clock_type, u32 clock_freq, u8 *fw_vsc_cfg);
+ u8 clock_type, u32 clock_freq, const u8 *fw_vsc_cfg);
void fdp_nci_remove(struct nci_dev *ndev);
#endif /* __LOCAL_FDP_H_ */
diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
index c5596e514648..051c43a2a52f 100644
--- a/drivers/nfc/fdp/i2c.c
+++ b/drivers/nfc/fdp/i2c.c
@@ -36,7 +36,7 @@
print_hex_dump(KERN_DEBUG, prefix": ", DUMP_PREFIX_OFFSET, \
16, 1, (skb)->data, (skb)->len, 0)
-static void fdp_nci_i2c_reset(struct fdp_i2c_phy *phy)
+static void fdp_nci_i2c_reset(const struct fdp_i2c_phy *phy)
{
/* Reset RST/WakeUP for at least 100 micro-second */
gpiod_set_value_cansleep(phy->power_gpio, FDP_POWER_OFF);
@@ -47,7 +47,7 @@ static void fdp_nci_i2c_reset(struct fdp_i2c_phy *phy)
static int fdp_nci_i2c_enable(void *phy_id)
{
- struct fdp_i2c_phy *phy = phy_id;
+ const struct fdp_i2c_phy *phy = phy_id;
fdp_nci_i2c_reset(phy);
@@ -56,7 +56,7 @@ static int fdp_nci_i2c_enable(void *phy_id)
static void fdp_nci_i2c_disable(void *phy_id)
{
- struct fdp_i2c_phy *phy = phy_id;
+ const struct fdp_i2c_phy *phy = phy_id;
fdp_nci_i2c_reset(phy);
}
@@ -120,7 +120,7 @@ static int fdp_nci_i2c_write(void *phy_id, struct sk_buff *skb)
return r;
}
-static struct nfc_phy_ops i2c_phy_ops = {
+static const struct nfc_phy_ops i2c_phy_ops = {
.write = fdp_nci_i2c_write,
.enable = fdp_nci_i2c_enable,
.disable = fdp_nci_i2c_disable,
diff --git a/drivers/nfc/mei_phy.c b/drivers/nfc/mei_phy.c
index e56cea716cd2..f9cca885beec 100644
--- a/drivers/nfc/mei_phy.c
+++ b/drivers/nfc/mei_phy.c
@@ -202,7 +202,7 @@ err:
return r;
}
-static int mei_nfc_send(struct nfc_mei_phy *phy, u8 *buf, size_t length)
+static int mei_nfc_send(struct nfc_mei_phy *phy, const u8 *buf, size_t length)
{
struct mei_nfc_hdr *hdr;
u8 *mei_buf;
@@ -362,7 +362,7 @@ static void nfc_mei_phy_disable(void *phy_id)
phy->powered = 0;
}
-struct nfc_phy_ops mei_phy_ops = {
+const struct nfc_phy_ops mei_phy_ops = {
.write = nfc_mei_phy_write,
.enable = nfc_mei_phy_enable,
.disable = nfc_mei_phy_disable,
diff --git a/drivers/nfc/mei_phy.h b/drivers/nfc/mei_phy.h
index 51bd44f5f3b8..2b1edb3eba15 100644
--- a/drivers/nfc/mei_phy.h
+++ b/drivers/nfc/mei_phy.h
@@ -45,7 +45,7 @@ struct nfc_mei_phy {
int hard_fault;
};
-extern struct nfc_phy_ops mei_phy_ops;
+extern const struct nfc_phy_ops mei_phy_ops;
struct nfc_mei_phy *nfc_mei_phy_alloc(struct mei_cl_device *device);
void nfc_mei_phy_free(struct nfc_mei_phy *phy);
diff --git a/drivers/nfc/microread/i2c.c b/drivers/nfc/microread/i2c.c
index dd78d987e6c9..86f593c73ed6 100644
--- a/drivers/nfc/microread/i2c.c
+++ b/drivers/nfc/microread/i2c.c
@@ -73,7 +73,7 @@ static void microread_i2c_remove_len_crc(struct sk_buff *skb)
skb_trim(skb, MICROREAD_I2C_FRAME_TAILROOM);
}
-static int check_crc(struct sk_buff *skb)
+static int check_crc(const struct sk_buff *skb)
{
int i;
u8 crc = 0;
@@ -225,7 +225,7 @@ static irqreturn_t microread_i2c_irq_thread_fn(int irq, void *phy_id)
return IRQ_HANDLED;
}
-static struct nfc_phy_ops i2c_phy_ops = {
+static const struct nfc_phy_ops i2c_phy_ops = {
.write = microread_i2c_write,
.enable = microread_i2c_enable,
.disable = microread_i2c_disable,
diff --git a/drivers/nfc/microread/mei.c b/drivers/nfc/microread/mei.c
index 8fa7771085eb..8edf761a6b2a 100644
--- a/drivers/nfc/microread/mei.c
+++ b/drivers/nfc/microread/mei.c
@@ -10,7 +10,6 @@
#include <linux/module.h>
#include <linux/mod_devicetable.h>
#include <linux/nfc.h>
-#include <net/nfc/hci.h>
#include <net/nfc/llc.h>
#include "../mei_phy.h"
diff --git a/drivers/nfc/microread/microread.c b/drivers/nfc/microread/microread.c
index b1d3975e8a81..bb4d029bb888 100644
--- a/drivers/nfc/microread/microread.c
+++ b/drivers/nfc/microread/microread.c
@@ -15,7 +15,6 @@
#include <linux/nfc.h>
#include <net/nfc/nfc.h>
#include <net/nfc/hci.h>
-#include <net/nfc/llc.h>
#include "microread.h"
@@ -131,7 +130,7 @@
#define MICROREAD_ELT_ID_SE2 0x04
#define MICROREAD_ELT_ID_SE3 0x05
-static struct nfc_hci_gate microread_gates[] = {
+static const struct nfc_hci_gate microread_gates[] = {
{MICROREAD_GATE_ID_ADM, MICROREAD_PIPE_ID_ADMIN},
{MICROREAD_GATE_ID_LOOPBACK, MICROREAD_PIPE_ID_HDS_LOOPBACK},
{MICROREAD_GATE_ID_IDT, MICROREAD_PIPE_ID_HDS_IDT},
@@ -152,7 +151,7 @@ static struct nfc_hci_gate microread_gates[] = {
#define MICROREAD_CMD_TAILROOM 2
struct microread_info {
- struct nfc_phy_ops *phy_ops;
+ const struct nfc_phy_ops *phy_ops;
void *phy_id;
struct nfc_hci_dev *hdev;
@@ -358,7 +357,7 @@ static int microread_complete_target_discovered(struct nfc_hci_dev *hdev,
static void microread_im_transceive_cb(void *context, struct sk_buff *skb,
int err)
{
- struct microread_info *info = context;
+ const struct microread_info *info = context;
switch (info->async_cb_type) {
case MICROREAD_CB_TYPE_READER_ALL:
@@ -625,7 +624,7 @@ static int microread_event_received(struct nfc_hci_dev *hdev, u8 pipe,
return r;
}
-static struct nfc_hci_ops microread_hci_ops = {
+static const struct nfc_hci_ops microread_hci_ops = {
.open = microread_open,
.close = microread_close,
.hci_ready = microread_hci_ready,
@@ -641,9 +640,9 @@ static struct nfc_hci_ops microread_hci_ops = {
.event_received = microread_event_received,
};
-int microread_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name,
- int phy_headroom, int phy_tailroom, int phy_payload,
- struct nfc_hci_dev **hdev)
+int microread_probe(void *phy_id, const struct nfc_phy_ops *phy_ops,
+ const char *llc_name, int phy_headroom, int phy_tailroom,
+ int phy_payload, struct nfc_hci_dev **hdev)
{
struct microread_info *info;
unsigned long quirks = 0;
diff --git a/drivers/nfc/microread/microread.h b/drivers/nfc/microread/microread.h
index 044f5e456375..2ee7ccfa22dd 100644
--- a/drivers/nfc/microread/microread.h
+++ b/drivers/nfc/microread/microread.h
@@ -10,9 +10,9 @@
#define DRIVER_DESC "NFC driver for microread"
-int microread_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name,
- int phy_headroom, int phy_tailroom, int phy_payload,
- struct nfc_hci_dev **hdev);
+int microread_probe(void *phy_id, const struct nfc_phy_ops *phy_ops,
+ const char *llc_name, int phy_headroom, int phy_tailroom,
+ int phy_payload, struct nfc_hci_dev **hdev);
void microread_remove(struct nfc_hci_dev *hdev);
diff --git a/drivers/nfc/nfcmrvl/fw_dnld.c b/drivers/nfc/nfcmrvl/fw_dnld.c
index aaccb8b76b3e..edac56b01fd1 100644
--- a/drivers/nfc/nfcmrvl/fw_dnld.c
+++ b/drivers/nfc/nfcmrvl/fw_dnld.c
@@ -129,7 +129,7 @@ static void fw_dnld_timeout(struct timer_list *t)
}
static int process_state_reset(struct nfcmrvl_private *priv,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
if (sizeof(nci_pattern_core_reset_ntf) != skb->len ||
memcmp(skb->data, nci_pattern_core_reset_ntf,
@@ -145,7 +145,8 @@ static int process_state_reset(struct nfcmrvl_private *priv,
return 0;
}
-static int process_state_init(struct nfcmrvl_private *priv, struct sk_buff *skb)
+static int process_state_init(struct nfcmrvl_private *priv,
+ const struct sk_buff *skb)
{
struct nci_core_set_config_cmd cmd;
@@ -175,7 +176,7 @@ static void create_lc(struct nfcmrvl_private *priv)
}
static int process_state_set_ref_clock(struct nfcmrvl_private *priv,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
struct nci_core_set_config_cmd cmd;
@@ -221,7 +222,7 @@ static int process_state_set_ref_clock(struct nfcmrvl_private *priv,
}
static int process_state_set_hi_config(struct nfcmrvl_private *priv,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
if (sizeof(nci_pattern_core_set_config_rsp) != skb->len ||
memcmp(skb->data, nci_pattern_core_set_config_rsp, skb->len))
@@ -232,7 +233,7 @@ static int process_state_set_hi_config(struct nfcmrvl_private *priv,
}
static int process_state_open_lc(struct nfcmrvl_private *priv,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
if (sizeof(nci_pattern_core_conn_create_rsp) >= skb->len ||
memcmp(skb->data, nci_pattern_core_conn_create_rsp,
@@ -347,7 +348,7 @@ static int process_state_fw_dnld(struct nfcmrvl_private *priv,
}
static int process_state_close_lc(struct nfcmrvl_private *priv,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
if (sizeof(nci_pattern_core_conn_close_rsp) != skb->len ||
memcmp(skb->data, nci_pattern_core_conn_close_rsp, skb->len))
@@ -358,7 +359,8 @@ static int process_state_close_lc(struct nfcmrvl_private *priv,
return 0;
}
-static int process_state_boot(struct nfcmrvl_private *priv, struct sk_buff *skb)
+static int process_state_boot(struct nfcmrvl_private *priv,
+ const struct sk_buff *skb)
{
if (sizeof(nci_pattern_proprietary_boot_rsp) != skb->len ||
memcmp(skb->data, nci_pattern_proprietary_boot_rsp, skb->len))
diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c
index 59a529e72d96..ceef81d93ac9 100644
--- a/drivers/nfc/nfcmrvl/i2c.c
+++ b/drivers/nfc/nfcmrvl/i2c.c
@@ -8,12 +8,9 @@
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/i2c.h>
-#include <linux/pm_runtime.h>
#include <linux/nfc.h>
-#include <linux/gpio.h>
#include <linux/delay.h>
#include <linux/of_irq.h>
-#include <linux/of_gpio.h>
#include <net/nfc/nci.h>
#include <net/nfc/nci_core.h>
#include "nfcmrvl.h"
@@ -146,7 +143,7 @@ static void nfcmrvl_i2c_nci_update_config(struct nfcmrvl_private *priv,
{
}
-static struct nfcmrvl_if_ops i2c_ops = {
+static const struct nfcmrvl_if_ops i2c_ops = {
.nci_open = nfcmrvl_i2c_nci_open,
.nci_close = nfcmrvl_i2c_nci_close,
.nci_send = nfcmrvl_i2c_nci_send,
@@ -182,8 +179,8 @@ static int nfcmrvl_i2c_parse_dt(struct device_node *node,
static int nfcmrvl_i2c_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
+ const struct nfcmrvl_platform_data *pdata;
struct nfcmrvl_i2c_drv_data *drv_data;
- struct nfcmrvl_platform_data *pdata;
struct nfcmrvl_platform_data config;
int ret;
diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c
index a4620b480c4f..2fcf545012b1 100644
--- a/drivers/nfc/nfcmrvl/main.c
+++ b/drivers/nfc/nfcmrvl/main.c
@@ -81,7 +81,7 @@ static int nfcmrvl_nci_fw_download(struct nci_dev *ndev,
return nfcmrvl_fw_dnld_start(ndev, firmware_name);
}
-static struct nci_ops nfcmrvl_nci_ops = {
+static const struct nci_ops nfcmrvl_nci_ops = {
.open = nfcmrvl_nci_open,
.close = nfcmrvl_nci_close,
.send = nfcmrvl_nci_send,
@@ -91,9 +91,9 @@ static struct nci_ops nfcmrvl_nci_ops = {
struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy,
void *drv_data,
- struct nfcmrvl_if_ops *ops,
+ const struct nfcmrvl_if_ops *ops,
struct device *dev,
- struct nfcmrvl_platform_data *pdata)
+ const struct nfcmrvl_platform_data *pdata)
{
struct nfcmrvl_private *priv;
int rc;
diff --git a/drivers/nfc/nfcmrvl/nfcmrvl.h b/drivers/nfc/nfcmrvl/nfcmrvl.h
index a715543bc9bf..165bd0a95190 100644
--- a/drivers/nfc/nfcmrvl/nfcmrvl.h
+++ b/drivers/nfc/nfcmrvl/nfcmrvl.h
@@ -77,7 +77,7 @@ struct nfcmrvl_private {
/* PHY type */
enum nfcmrvl_phy phy;
/* Low level driver ops */
- struct nfcmrvl_if_ops *if_ops;
+ const struct nfcmrvl_if_ops *if_ops;
};
struct nfcmrvl_if_ops {
@@ -92,9 +92,9 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv);
int nfcmrvl_nci_recv_frame(struct nfcmrvl_private *priv, struct sk_buff *skb);
struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy,
void *drv_data,
- struct nfcmrvl_if_ops *ops,
+ const struct nfcmrvl_if_ops *ops,
struct device *dev,
- struct nfcmrvl_platform_data *pdata);
+ const struct nfcmrvl_platform_data *pdata);
void nfcmrvl_chip_reset(struct nfcmrvl_private *priv);
diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c
index 66696321c645..5b833a9a83f8 100644
--- a/drivers/nfc/nfcmrvl/spi.c
+++ b/drivers/nfc/nfcmrvl/spi.c
@@ -7,11 +7,8 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-#include <linux/pm_runtime.h>
#include <linux/nfc.h>
-#include <linux/gpio.h>
#include <linux/of_irq.h>
-#include <linux/of_gpio.h>
#include <net/nfc/nci.h>
#include <net/nfc/nci_core.h>
#include <linux/spi/spi.h>
@@ -99,7 +96,7 @@ static void nfcmrvl_spi_nci_update_config(struct nfcmrvl_private *priv,
drv_data->nci_spi->xfer_speed_hz = config->clk;
}
-static struct nfcmrvl_if_ops spi_ops = {
+static const struct nfcmrvl_if_ops spi_ops = {
.nci_open = nfcmrvl_spi_nci_open,
.nci_close = nfcmrvl_spi_nci_close,
.nci_send = nfcmrvl_spi_nci_send,
@@ -129,7 +126,7 @@ static int nfcmrvl_spi_parse_dt(struct device_node *node,
static int nfcmrvl_spi_probe(struct spi_device *spi)
{
- struct nfcmrvl_platform_data *pdata;
+ const struct nfcmrvl_platform_data *pdata;
struct nfcmrvl_platform_data config;
struct nfcmrvl_spi_drv_data *drv_data;
int ret = 0;
diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c
index 50d86c90b9dd..9c92cbdc42f0 100644
--- a/drivers/nfc/nfcmrvl/uart.c
+++ b/drivers/nfc/nfcmrvl/uart.c
@@ -49,7 +49,7 @@ static void nfcmrvl_uart_nci_update_config(struct nfcmrvl_private *priv,
config->flow_control);
}
-static struct nfcmrvl_if_ops uart_ops = {
+static const struct nfcmrvl_if_ops uart_ops = {
.nci_open = nfcmrvl_uart_nci_open,
.nci_close = nfcmrvl_uart_nci_close,
.nci_send = nfcmrvl_uart_nci_send,
@@ -98,8 +98,8 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node,
static int nfcmrvl_nci_uart_open(struct nci_uart *nu)
{
struct nfcmrvl_private *priv;
- struct nfcmrvl_platform_data *pdata = NULL;
struct nfcmrvl_platform_data config;
+ const struct nfcmrvl_platform_data *pdata = NULL;
struct device *dev = nu->tty->dev;
/*
diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c
index 9d649b45300b..a99aedff795d 100644
--- a/drivers/nfc/nfcmrvl/usb.c
+++ b/drivers/nfc/nfcmrvl/usb.c
@@ -264,7 +264,7 @@ done:
return err;
}
-static struct nfcmrvl_if_ops usb_ops = {
+static const struct nfcmrvl_if_ops usb_ops = {
.nci_open = nfcmrvl_usb_nci_open,
.nci_close = nfcmrvl_usb_nci_close,
.nci_send = nfcmrvl_usb_nci_send,
diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c
index dd27c85190d3..85bf8d586c70 100644
--- a/drivers/nfc/nfcsim.c
+++ b/drivers/nfc/nfcsim.c
@@ -239,7 +239,7 @@ static int nfcsim_send(struct nfc_digital_dev *ddev, struct sk_buff *skb,
static void nfcsim_abort_cmd(struct nfc_digital_dev *ddev)
{
- struct nfcsim *dev = nfc_digital_get_drvdata(ddev);
+ const struct nfcsim *dev = nfc_digital_get_drvdata(ddev);
nfcsim_link_recv_cancel(dev->link_in);
}
@@ -319,7 +319,7 @@ static int nfcsim_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
return nfcsim_send(ddev, NULL, timeout, cb, arg);
}
-static struct nfc_digital_ops nfcsim_digital_ops = {
+static const struct nfc_digital_ops nfcsim_digital_ops = {
.in_configure_hw = nfcsim_in_configure_hw,
.in_send_cmd = nfcsim_in_send_cmd,
diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c
index 2b0c7232e91f..518e2afb43a8 100644
--- a/drivers/nfc/nxp-nci/core.c
+++ b/drivers/nfc/nxp-nci/core.c
@@ -83,7 +83,7 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
return r;
}
-static struct nci_ops nxp_nci_ops = {
+static const struct nci_ops nxp_nci_ops = {
.open = nxp_nci_open,
.close = nxp_nci_close,
.send = nxp_nci_send,
diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c
index cd64bfe20402..2f3f3fe9a0ba 100644
--- a/drivers/nfc/pn533/pn533.c
+++ b/drivers/nfc/pn533/pn533.c
@@ -2623,7 +2623,7 @@ static int pn533_dev_down(struct nfc_dev *nfc_dev)
return ret;
}
-static struct nfc_ops pn533_nfc_ops = {
+static const struct nfc_ops pn533_nfc_ops = {
.dev_up = pn533_dev_up,
.dev_down = pn533_dev_down,
.dep_link_up = pn533_dep_link_up,
diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c
index de59e439c369..37d26f01986b 100644
--- a/drivers/nfc/pn544/i2c.c
+++ b/drivers/nfc/pn544/i2c.c
@@ -515,7 +515,7 @@ static irqreturn_t pn544_hci_i2c_irq_thread_fn(int irq, void *phy_id)
return IRQ_HANDLED;
}
-static struct nfc_phy_ops i2c_phy_ops = {
+static const struct nfc_phy_ops i2c_phy_ops = {
.write = pn544_hci_i2c_write,
.enable = pn544_hci_i2c_enable,
.disable = pn544_hci_i2c_disable,
diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c
index b788870473e8..32a61a185142 100644
--- a/drivers/nfc/pn544/pn544.c
+++ b/drivers/nfc/pn544/pn544.c
@@ -13,7 +13,6 @@
#include <linux/nfc.h>
#include <net/nfc/hci.h>
-#include <net/nfc/llc.h>
#include "pn544.h"
@@ -86,7 +85,7 @@ enum pn544_state {
#define PN544_HCI_CMD_ATTREQUEST 0x12
#define PN544_HCI_CMD_CONTINUE_ACTIVATION 0x13
-static struct nfc_hci_gate pn544_gates[] = {
+static const struct nfc_hci_gate pn544_gates[] = {
{NFC_HCI_ADMIN_GATE, NFC_HCI_INVALID_PIPE},
{NFC_HCI_LOOPBACK_GATE, NFC_HCI_INVALID_PIPE},
{NFC_HCI_ID_MGMT_GATE, NFC_HCI_INVALID_PIPE},
@@ -108,7 +107,7 @@ static struct nfc_hci_gate pn544_gates[] = {
#define PN544_CMDS_HEADROOM 2
struct pn544_hci_info {
- struct nfc_phy_ops *phy_ops;
+ const struct nfc_phy_ops *phy_ops;
void *phy_id;
struct nfc_hci_dev *hdev;
@@ -809,7 +808,7 @@ static int pn544_hci_discover_se(struct nfc_hci_dev *hdev)
#define PN544_SE_MODE_ON 0x01
static int pn544_hci_enable_se(struct nfc_hci_dev *hdev, u32 se_idx)
{
- struct nfc_se *se;
+ const struct nfc_se *se;
u8 enable = PN544_SE_MODE_ON;
static struct uicc_gatelist {
u8 head;
@@ -864,7 +863,7 @@ static int pn544_hci_enable_se(struct nfc_hci_dev *hdev, u32 se_idx)
static int pn544_hci_disable_se(struct nfc_hci_dev *hdev, u32 se_idx)
{
- struct nfc_se *se;
+ const struct nfc_se *se;
u8 disable = PN544_SE_MODE_OFF;
se = nfc_find_se(hdev->ndev, se_idx);
@@ -881,7 +880,7 @@ static int pn544_hci_disable_se(struct nfc_hci_dev *hdev, u32 se_idx)
}
}
-static struct nfc_hci_ops pn544_hci_ops = {
+static const struct nfc_hci_ops pn544_hci_ops = {
.open = pn544_hci_open,
.close = pn544_hci_close,
.hci_ready = pn544_hci_ready,
@@ -901,9 +900,10 @@ static struct nfc_hci_ops pn544_hci_ops = {
.disable_se = pn544_hci_disable_se,
};
-int pn544_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name,
- int phy_headroom, int phy_tailroom, int phy_payload,
- fw_download_t fw_download, struct nfc_hci_dev **hdev)
+int pn544_hci_probe(void *phy_id, const struct nfc_phy_ops *phy_ops,
+ char *llc_name, int phy_headroom, int phy_tailroom,
+ int phy_payload, fw_download_t fw_download,
+ struct nfc_hci_dev **hdev)
{
struct pn544_hci_info *info;
u32 protocols;
diff --git a/drivers/nfc/pn544/pn544.h b/drivers/nfc/pn544/pn544.h
index 5634ba215ead..c6fe3e11e0c8 100644
--- a/drivers/nfc/pn544/pn544.h
+++ b/drivers/nfc/pn544/pn544.h
@@ -16,9 +16,10 @@
typedef int (*fw_download_t)(void *context, const char *firmware_name,
u8 hw_variant);
-int pn544_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name,
- int phy_headroom, int phy_tailroom, int phy_payload,
- fw_download_t fw_download, struct nfc_hci_dev **hdev);
+int pn544_hci_probe(void *phy_id, const struct nfc_phy_ops *phy_ops,
+ char *llc_name, int phy_headroom, int phy_tailroom,
+ int phy_payload, fw_download_t fw_download,
+ struct nfc_hci_dev **hdev);
void pn544_hci_remove(struct nfc_hci_dev *hdev);
#endif /* __LOCAL_PN544_H_ */
diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c
index 4df926cc37d0..517376c43b86 100644
--- a/drivers/nfc/port100.c
+++ b/drivers/nfc/port100.c
@@ -217,7 +217,7 @@ struct port100_protocol {
u8 value;
} __packed;
-static struct port100_protocol
+static const struct port100_protocol
in_protocols[][PORT100_IN_MAX_NUM_PROTOCOLS + 1] = {
[NFC_DIGITAL_FRAMING_NFCA_SHORT] = {
{ PORT100_IN_PROT_INITIAL_GUARD_TIME, 6 },
@@ -391,7 +391,7 @@ in_protocols[][PORT100_IN_MAX_NUM_PROTOCOLS + 1] = {
},
};
-static struct port100_protocol
+static const struct port100_protocol
tg_protocols[][PORT100_TG_MAX_NUM_PROTOCOLS + 1] = {
[NFC_DIGITAL_FRAMING_NFCA_SHORT] = {
{ PORT100_TG_PROT_END, 0 },
@@ -526,7 +526,7 @@ static inline u8 port100_checksum(u16 value)
}
/* The rule: sum(data elements) + checksum = 0 */
-static u8 port100_data_checksum(u8 *data, int datalen)
+static u8 port100_data_checksum(const u8 *data, int datalen)
{
u8 sum = 0;
int i;
@@ -568,10 +568,10 @@ static void port100_tx_update_payload_len(void *_frame, int len)
le16_add_cpu(&frame->datalen, len);
}
-static bool port100_rx_frame_is_valid(void *_frame)
+static bool port100_rx_frame_is_valid(const void *_frame)
{
u8 checksum;
- struct port100_frame *frame = _frame;
+ const struct port100_frame *frame = _frame;
if (frame->start_frame != cpu_to_be16(PORT100_FRAME_SOF) ||
frame->extended_frame != cpu_to_be16(PORT100_FRAME_EXT))
@@ -589,23 +589,24 @@ static bool port100_rx_frame_is_valid(void *_frame)
return true;
}
-static bool port100_rx_frame_is_ack(struct port100_ack_frame *frame)
+static bool port100_rx_frame_is_ack(const struct port100_ack_frame *frame)
{
return (frame->start_frame == cpu_to_be16(PORT100_FRAME_SOF) &&
frame->ack_frame == cpu_to_be16(PORT100_FRAME_ACK));
}
-static inline int port100_rx_frame_size(void *frame)
+static inline int port100_rx_frame_size(const void *frame)
{
- struct port100_frame *f = frame;
+ const struct port100_frame *f = frame;
return sizeof(struct port100_frame) + le16_to_cpu(f->datalen) +
PORT100_FRAME_TAIL_LEN;
}
-static bool port100_rx_frame_is_cmd_response(struct port100 *dev, void *frame)
+static bool port100_rx_frame_is_cmd_response(const struct port100 *dev,
+ const void *frame)
{
- struct port100_frame *f = frame;
+ const struct port100_frame *f = frame;
return (PORT100_FRAME_CMD(f) == PORT100_CMD_RESPONSE(dev->cmd->code));
}
@@ -655,7 +656,8 @@ sched_wq:
schedule_work(&dev->cmd_complete_work);
}
-static int port100_submit_urb_for_response(struct port100 *dev, gfp_t flags)
+static int port100_submit_urb_for_response(const struct port100 *dev,
+ gfp_t flags)
{
dev->in_urb->complete = port100_recv_response;
@@ -666,7 +668,7 @@ static void port100_recv_ack(struct urb *urb)
{
struct port100 *dev = urb->context;
struct port100_cmd *cmd = dev->cmd;
- struct port100_ack_frame *in_frame;
+ const struct port100_ack_frame *in_frame;
int rc;
cmd->status = urb->status;
@@ -708,7 +710,7 @@ sched_wq:
schedule_work(&dev->cmd_complete_work);
}
-static int port100_submit_urb_for_ack(struct port100 *dev, gfp_t flags)
+static int port100_submit_urb_for_ack(const struct port100 *dev, gfp_t flags)
{
dev->in_urb->complete = port100_recv_ack;
@@ -753,8 +755,9 @@ static int port100_send_ack(struct port100 *dev)
return rc;
}
-static int port100_send_frame_async(struct port100 *dev, struct sk_buff *out,
- struct sk_buff *in, int in_len)
+static int port100_send_frame_async(struct port100 *dev,
+ const struct sk_buff *out,
+ const struct sk_buff *in, int in_len)
{
int rc;
@@ -960,7 +963,7 @@ static void port100_abort_cmd(struct nfc_digital_dev *ddev)
usb_kill_urb(dev->in_urb);
}
-static struct sk_buff *port100_alloc_skb(struct port100 *dev, unsigned int size)
+static struct sk_buff *port100_alloc_skb(const struct port100 *dev, unsigned int size)
{
struct sk_buff *skb;
@@ -1098,7 +1101,7 @@ static int port100_in_set_rf(struct nfc_digital_dev *ddev, u8 rf)
static int port100_in_set_framing(struct nfc_digital_dev *ddev, int param)
{
struct port100 *dev = nfc_digital_get_drvdata(ddev);
- struct port100_protocol *protocols;
+ const struct port100_protocol *protocols;
struct sk_buff *skb;
struct sk_buff *resp;
int num_protocols;
@@ -1152,7 +1155,7 @@ static int port100_in_configure_hw(struct nfc_digital_dev *ddev, int type,
static void port100_in_comm_rf_complete(struct port100 *dev, void *arg,
struct sk_buff *resp)
{
- struct port100_cb_arg *cb_arg = arg;
+ const struct port100_cb_arg *cb_arg = arg;
nfc_digital_cmd_complete_t cb = cb_arg->complete_cb;
u32 status;
int rc;
@@ -1255,7 +1258,7 @@ static int port100_tg_set_rf(struct nfc_digital_dev *ddev, u8 rf)
static int port100_tg_set_framing(struct nfc_digital_dev *ddev, int param)
{
struct port100 *dev = nfc_digital_get_drvdata(ddev);
- struct port100_protocol *protocols;
+ const struct port100_protocol *protocols;
struct sk_buff *skb;
struct sk_buff *resp;
int rc;
@@ -1330,7 +1333,7 @@ static void port100_tg_comm_rf_complete(struct port100 *dev, void *arg,
struct sk_buff *resp)
{
u32 status;
- struct port100_cb_arg *cb_arg = arg;
+ const struct port100_cb_arg *cb_arg = arg;
nfc_digital_cmd_complete_t cb = cb_arg->complete_cb;
struct port100_tg_comm_rf_res *hdr;
@@ -1453,7 +1456,7 @@ static int port100_listen_mdaa(struct nfc_digital_dev *ddev,
static int port100_listen(struct nfc_digital_dev *ddev, u16 timeout,
nfc_digital_cmd_complete_t cb, void *arg)
{
- struct port100 *dev = nfc_digital_get_drvdata(ddev);
+ const struct port100 *dev = nfc_digital_get_drvdata(ddev);
struct sk_buff *skb;
skb = port100_alloc_skb(dev, 0);
@@ -1463,7 +1466,7 @@ static int port100_listen(struct nfc_digital_dev *ddev, u16 timeout,
return port100_tg_send_cmd(ddev, skb, timeout, cb, arg);
}
-static struct nfc_digital_ops port100_digital_ops = {
+static const struct nfc_digital_ops port100_digital_ops = {
.in_configure_hw = port100_in_configure_hw,
.in_send_cmd = port100_in_send_cmd,
diff --git a/drivers/nfc/s3fwrn5/core.c b/drivers/nfc/s3fwrn5/core.c
index 865d3e3d1528..1c412007fabb 100644
--- a/drivers/nfc/s3fwrn5/core.c
+++ b/drivers/nfc/s3fwrn5/core.c
@@ -143,11 +143,13 @@ static int s3fwrn5_nci_post_setup(struct nci_dev *ndev)
return nci_core_init(info->ndev);
}
-static struct nci_ops s3fwrn5_nci_ops = {
+static const struct nci_ops s3fwrn5_nci_ops = {
.open = s3fwrn5_nci_open,
.close = s3fwrn5_nci_close,
.send = s3fwrn5_nci_send,
.post_setup = s3fwrn5_nci_post_setup,
+ .prop_ops = s3fwrn5_nci_prop_ops,
+ .n_prop_ops = ARRAY_SIZE(s3fwrn5_nci_prop_ops),
};
int s3fwrn5_probe(struct nci_dev **ndev, void *phy_id, struct device *pdev,
@@ -167,9 +169,6 @@ int s3fwrn5_probe(struct nci_dev **ndev, void *phy_id, struct device *pdev,
s3fwrn5_set_mode(info, S3FWRN5_MODE_COLD);
- s3fwrn5_nci_get_prop_ops(&s3fwrn5_nci_ops.prop_ops,
- &s3fwrn5_nci_ops.n_prop_ops);
-
info->ndev = nci_allocate_device(&s3fwrn5_nci_ops,
S3FWRN5_NFC_PROTOCOLS, 0, 0);
if (!info->ndev)
diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c
index e3e72b8a29e3..1af7a1e632cf 100644
--- a/drivers/nfc/s3fwrn5/firmware.c
+++ b/drivers/nfc/s3fwrn5/firmware.c
@@ -421,10 +421,9 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info)
tfm = crypto_alloc_shash("sha1", 0, 0);
if (IS_ERR(tfm)) {
- ret = PTR_ERR(tfm);
dev_err(&fw_info->ndev->nfc_dev->dev,
"Cannot allocate shash (code=%pe)\n", tfm);
- goto out;
+ return PTR_ERR(tfm);
}
ret = crypto_shash_tfm_digest(tfm, fw->image, image_size, hash_data);
@@ -433,7 +432,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info)
if (ret) {
dev_err(&fw_info->ndev->nfc_dev->dev,
"Cannot compute hash (code=%d)\n", ret);
- goto out;
+ return ret;
}
/* Firmware update process */
@@ -446,7 +445,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info)
if (ret < 0) {
dev_err(&fw_info->ndev->nfc_dev->dev,
"Unable to enter update mode\n");
- goto out;
+ return ret;
}
for (off = 0; off < image_size; off += fw_info->sector_size) {
@@ -455,7 +454,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info)
if (ret < 0) {
dev_err(&fw_info->ndev->nfc_dev->dev,
"Firmware update error (code=%d)\n", ret);
- goto out;
+ return ret;
}
}
@@ -463,13 +462,12 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info)
if (ret < 0) {
dev_err(&fw_info->ndev->nfc_dev->dev,
"Unable to complete update mode\n");
- goto out;
+ return ret;
}
dev_info(&fw_info->ndev->nfc_dev->dev,
"Firmware update: success\n");
-out:
return ret;
}
diff --git a/drivers/nfc/s3fwrn5/nci.c b/drivers/nfc/s3fwrn5/nci.c
index f042d3eaf8f6..e374e670b36b 100644
--- a/drivers/nfc/s3fwrn5/nci.c
+++ b/drivers/nfc/s3fwrn5/nci.c
@@ -20,7 +20,7 @@ static int s3fwrn5_nci_prop_rsp(struct nci_dev *ndev, struct sk_buff *skb)
return 0;
}
-static struct nci_driver_ops s3fwrn5_nci_prop_ops[] = {
+const struct nci_driver_ops s3fwrn5_nci_prop_ops[4] = {
{
.opcode = nci_opcode_pack(NCI_GID_PROPRIETARY,
NCI_PROP_SET_RFREG),
@@ -43,12 +43,6 @@ static struct nci_driver_ops s3fwrn5_nci_prop_ops[] = {
},
};
-void s3fwrn5_nci_get_prop_ops(struct nci_driver_ops **ops, size_t *n)
-{
- *ops = s3fwrn5_nci_prop_ops;
- *n = ARRAY_SIZE(s3fwrn5_nci_prop_ops);
-}
-
#define S3FWRN5_RFREG_SECTION_SIZE 252
int s3fwrn5_nci_rf_configure(struct s3fwrn5_info *info, const char *fw_name)
diff --git a/drivers/nfc/s3fwrn5/nci.h b/drivers/nfc/s3fwrn5/nci.h
index a80f0fb082a8..c2d906591e9e 100644
--- a/drivers/nfc/s3fwrn5/nci.h
+++ b/drivers/nfc/s3fwrn5/nci.h
@@ -50,7 +50,7 @@ struct nci_prop_fw_cfg_rsp {
__u8 status;
};
-void s3fwrn5_nci_get_prop_ops(struct nci_driver_ops **ops, size_t *n);
+extern const struct nci_driver_ops s3fwrn5_nci_prop_ops[4];
int s3fwrn5_nci_rf_configure(struct s3fwrn5_info *info, const char *fw_name);
#endif /* __LOCAL_S3FWRN5_NCI_H_ */
diff --git a/drivers/nfc/st-nci/core.c b/drivers/nfc/st-nci/core.c
index 110ff1281e5f..a367136d4330 100644
--- a/drivers/nfc/st-nci/core.c
+++ b/drivers/nfc/st-nci/core.c
@@ -9,8 +9,6 @@
#include <linux/nfc.h>
#include <net/nfc/nci.h>
#include <net/nfc/nci_core.h>
-#include <linux/gpio.h>
-#include <linux/delay.h>
#include "st-nci.h"
@@ -86,7 +84,7 @@ static int st_nci_prop_rsp_packet(struct nci_dev *ndev,
return 0;
}
-static struct nci_driver_ops st_nci_prop_ops[] = {
+static const struct nci_driver_ops st_nci_prop_ops[] = {
{
.opcode = nci_opcode_pack(NCI_GID_PROPRIETARY,
ST_NCI_CORE_PROP),
@@ -94,7 +92,7 @@ static struct nci_driver_ops st_nci_prop_ops[] = {
},
};
-static struct nci_ops st_nci_ops = {
+static const struct nci_ops st_nci_ops = {
.init = st_nci_init,
.open = st_nci_open,
.close = st_nci_close,
@@ -131,6 +129,7 @@ int st_nci_probe(struct llt_ndlc *ndlc, int phy_headroom,
| NFC_PROTO_ISO15693_MASK
| NFC_PROTO_NFC_DEP_MASK;
+ BUILD_BUG_ON(ARRAY_SIZE(st_nci_prop_ops) > NCI_MAX_PROPRIETARY_CMD);
ndlc->ndev = nci_allocate_device(&st_nci_ops, protocols,
phy_headroom, phy_tailroom);
if (!ndlc->ndev) {
diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c
index 46981405e8b1..ccf6152ebb9f 100644
--- a/drivers/nfc/st-nci/i2c.c
+++ b/drivers/nfc/st-nci/i2c.c
@@ -186,7 +186,7 @@ static irqreturn_t st_nci_irq_thread_fn(int irq, void *phy_id)
return IRQ_HANDLED;
}
-static struct nfc_phy_ops i2c_phy_ops = {
+static const struct nfc_phy_ops i2c_phy_ops = {
.write = st_nci_i2c_write,
.enable = st_nci_i2c_enable,
.disable = st_nci_i2c_disable,
diff --git a/drivers/nfc/st-nci/ndlc.c b/drivers/nfc/st-nci/ndlc.c
index 5d74c674368a..e9dc313b333e 100644
--- a/drivers/nfc/st-nci/ndlc.c
+++ b/drivers/nfc/st-nci/ndlc.c
@@ -253,9 +253,9 @@ static void ndlc_t2_timeout(struct timer_list *t)
schedule_work(&ndlc->sm_work);
}
-int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev,
- int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id,
- struct st_nci_se_status *se_status)
+int ndlc_probe(void *phy_id, const struct nfc_phy_ops *phy_ops,
+ struct device *dev, int phy_headroom, int phy_tailroom,
+ struct llt_ndlc **ndlc_id, struct st_nci_se_status *se_status)
{
struct llt_ndlc *ndlc;
diff --git a/drivers/nfc/st-nci/ndlc.h b/drivers/nfc/st-nci/ndlc.h
index 066e2fd75238..c24ce9b0df52 100644
--- a/drivers/nfc/st-nci/ndlc.h
+++ b/drivers/nfc/st-nci/ndlc.h
@@ -16,7 +16,7 @@ struct st_nci_se_status;
/* Low Level Transport description */
struct llt_ndlc {
struct nci_dev *ndev;
- struct nfc_phy_ops *ops;
+ const struct nfc_phy_ops *ops;
void *phy_id;
struct timer_list t1_timer;
@@ -45,8 +45,8 @@ int ndlc_open(struct llt_ndlc *ndlc);
void ndlc_close(struct llt_ndlc *ndlc);
int ndlc_send(struct llt_ndlc *ndlc, struct sk_buff *skb);
void ndlc_recv(struct llt_ndlc *ndlc, struct sk_buff *skb);
-int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev,
- int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id,
- struct st_nci_se_status *se_status);
+int ndlc_probe(void *phy_id, const struct nfc_phy_ops *phy_ops,
+ struct device *dev, int phy_headroom, int phy_tailroom,
+ struct llt_ndlc **ndlc_id, struct st_nci_se_status *se_status);
void ndlc_remove(struct llt_ndlc *ndlc);
#endif /* __LOCAL_NDLC_H__ */
diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c
index 250d56f204c3..a620c34790e6 100644
--- a/drivers/nfc/st-nci/spi.c
+++ b/drivers/nfc/st-nci/spi.c
@@ -198,7 +198,7 @@ static irqreturn_t st_nci_irq_thread_fn(int irq, void *phy_id)
return IRQ_HANDLED;
}
-static struct nfc_phy_ops spi_phy_ops = {
+static const struct nfc_phy_ops spi_phy_ops = {
.write = st_nci_spi_write,
.enable = st_nci_spi_enable,
.disable = st_nci_spi_disable,
diff --git a/drivers/nfc/st-nci/vendor_cmds.c b/drivers/nfc/st-nci/vendor_cmds.c
index 94b600029a2a..30d2912d1a05 100644
--- a/drivers/nfc/st-nci/vendor_cmds.c
+++ b/drivers/nfc/st-nci/vendor_cmds.c
@@ -371,7 +371,7 @@ static int st_nci_manufacturer_specific(struct nfc_dev *dev, void *data,
return nfc_vendor_cmd_reply(msg);
}
-static struct nfc_vendor_cmd st_nci_vendor_cmds[] = {
+static const struct nfc_vendor_cmd st_nci_vendor_cmds[] = {
{
.vendor_id = ST_NCI_VENDOR_OUI,
.subcmd = FACTORY_MODE,
diff --git a/drivers/nfc/st21nfca/core.c b/drivers/nfc/st21nfca/core.c
index 6ca0d2f56b18..161caf2675cf 100644
--- a/drivers/nfc/st21nfca/core.c
+++ b/drivers/nfc/st21nfca/core.c
@@ -8,7 +8,6 @@
#include <linux/module.h>
#include <linux/nfc.h>
#include <net/nfc/hci.h>
-#include <net/nfc/llc.h>
#include "st21nfca.h"
@@ -72,7 +71,7 @@
static DECLARE_BITMAP(dev_mask, ST21NFCA_NUM_DEVICES);
-static struct nfc_hci_gate st21nfca_gates[] = {
+static const struct nfc_hci_gate st21nfca_gates[] = {
{NFC_HCI_ADMIN_GATE, NFC_HCI_ADMIN_PIPE},
{NFC_HCI_LINK_MGMT_GATE, NFC_HCI_LINK_MGMT_PIPE},
{ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_DEVICE_MGNT_PIPE},
@@ -912,7 +911,7 @@ static int st21nfca_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe,
}
}
-static struct nfc_hci_ops st21nfca_hci_ops = {
+static const struct nfc_hci_ops st21nfca_hci_ops = {
.open = st21nfca_hci_open,
.close = st21nfca_hci_close,
.load_session = st21nfca_hci_load_session,
@@ -935,7 +934,7 @@ static struct nfc_hci_ops st21nfca_hci_ops = {
.se_io = st21nfca_hci_se_io,
};
-int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops,
+int st21nfca_hci_probe(void *phy_id, const struct nfc_phy_ops *phy_ops,
char *llc_name, int phy_headroom, int phy_tailroom,
int phy_payload, struct nfc_hci_dev **hdev,
struct st21nfca_se_status *se_status)
diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
index 7a9f4d71707e..279d88128b2e 100644
--- a/drivers/nfc/st21nfca/i2c.c
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -18,8 +18,6 @@
#include <linux/nfc.h>
#include <linux/firmware.h>
-#include <asm/unaligned.h>
-
#include <net/nfc/hci.h>
#include <net/nfc/llc.h>
#include <net/nfc/nfc.h>
@@ -76,8 +74,8 @@ struct st21nfca_i2c_phy {
struct mutex phy_lock;
};
-static u8 len_seq[] = { 16, 24, 12, 29 };
-static u16 wait_tab[] = { 2, 3, 5, 15, 20, 40};
+static const u8 len_seq[] = { 16, 24, 12, 29 };
+static const u16 wait_tab[] = { 2, 3, 5, 15, 20, 40};
#define I2C_DUMP_SKB(info, skb) \
do { \
@@ -482,7 +480,7 @@ static irqreturn_t st21nfca_hci_irq_thread_fn(int irq, void *phy_id)
return IRQ_HANDLED;
}
-static struct nfc_phy_ops i2c_phy_ops = {
+static const struct nfc_phy_ops i2c_phy_ops = {
.write = st21nfca_hci_i2c_write,
.enable = st21nfca_hci_i2c_enable,
.disable = st21nfca_hci_i2c_disable,
diff --git a/drivers/nfc/st21nfca/st21nfca.h b/drivers/nfc/st21nfca/st21nfca.h
index 5e0de0fef1d4..cb6ad916be91 100644
--- a/drivers/nfc/st21nfca/st21nfca.h
+++ b/drivers/nfc/st21nfca/st21nfca.h
@@ -144,7 +144,7 @@ struct st21nfca_se_info {
};
struct st21nfca_hci_info {
- struct nfc_phy_ops *phy_ops;
+ const struct nfc_phy_ops *phy_ops;
void *phy_id;
struct nfc_hci_dev *hdev;
@@ -163,7 +163,7 @@ struct st21nfca_hci_info {
struct st21nfca_vendor_info vendor_info;
};
-int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops,
+int st21nfca_hci_probe(void *phy_id, const struct nfc_phy_ops *phy_ops,
char *llc_name, int phy_headroom, int phy_tailroom,
int phy_payload, struct nfc_hci_dev **hdev,
struct st21nfca_se_status *se_status);
diff --git a/drivers/nfc/st21nfca/vendor_cmds.c b/drivers/nfc/st21nfca/vendor_cmds.c
index 62332ca91554..74882866dbaf 100644
--- a/drivers/nfc/st21nfca/vendor_cmds.c
+++ b/drivers/nfc/st21nfca/vendor_cmds.c
@@ -295,7 +295,7 @@ exit:
return r;
}
-static struct nfc_vendor_cmd st21nfca_vendor_cmds[] = {
+static const struct nfc_vendor_cmd st21nfca_vendor_cmds[] = {
{
.vendor_id = ST21NFCA_VENDOR_OUI,
.subcmd = FACTORY_MODE,
diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c
index 2dc788c363fd..d16cf3ff644e 100644
--- a/drivers/nfc/st95hf/core.c
+++ b/drivers/nfc/st95hf/core.c
@@ -16,7 +16,6 @@
#include <linux/nfc.h>
#include <linux/of_gpio.h>
#include <linux/of.h>
-#include <linux/of_irq.h>
#include <linux/property.h>
#include <linux/regulator/consumer.h>
#include <linux/wait.h>
@@ -1037,7 +1036,7 @@ static void st95hf_abort_cmd(struct nfc_digital_dev *ddev)
{
}
-static struct nfc_digital_ops st95hf_nfc_digital_ops = {
+static const struct nfc_digital_ops st95hf_nfc_digital_ops = {
.in_configure_hw = st95hf_in_configure_hw,
.in_send_cmd = st95hf_in_send_cmd,
diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c
index 33978022ae47..8890fcd59c39 100644
--- a/drivers/nfc/trf7970a.c
+++ b/drivers/nfc/trf7970a.c
@@ -643,7 +643,7 @@ static void trf7970a_send_err_upstream(struct trf7970a *trf, int errno)
}
static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb,
- unsigned int len, u8 *prefix,
+ unsigned int len, const u8 *prefix,
unsigned int prefix_len)
{
struct spi_transfer t[2];
@@ -1387,9 +1387,10 @@ static int trf7970a_is_iso15693_write_or_lock(u8 cmd)
}
}
-static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb)
+static int trf7970a_per_cmd_config(struct trf7970a *trf,
+ const struct sk_buff *skb)
{
- u8 *req = skb->data;
+ const u8 *req = skb->data;
u8 special_fcn_reg1, iso_ctrl;
int ret;
@@ -1791,7 +1792,7 @@ out_err:
static int trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
nfc_digital_cmd_complete_t cb, void *arg)
{
- struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
+ const struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
dev_dbg(trf->dev, "Listen - state: %d, timeout: %d ms\n",
trf->state, timeout);
@@ -1803,7 +1804,7 @@ static int trf7970a_tg_listen_md(struct nfc_digital_dev *ddev,
u16 timeout, nfc_digital_cmd_complete_t cb,
void *arg)
{
- struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
+ const struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
int ret;
dev_dbg(trf->dev, "Listen MD - state: %d, timeout: %d ms\n",
@@ -1824,7 +1825,7 @@ static int trf7970a_tg_listen_md(struct nfc_digital_dev *ddev,
static int trf7970a_tg_get_rf_tech(struct nfc_digital_dev *ddev, u8 *rf_tech)
{
- struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
+ const struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
dev_dbg(trf->dev, "Get RF Tech - state: %d, rf_tech: %d\n",
trf->state, trf->md_rf_tech);
@@ -1861,7 +1862,7 @@ static void trf7970a_abort_cmd(struct nfc_digital_dev *ddev)
mutex_unlock(&trf->lock);
}
-static struct nfc_digital_ops trf7970a_nfc_ops = {
+static const struct nfc_digital_ops trf7970a_nfc_ops = {
.in_configure_hw = trf7970a_in_configure_hw,
.in_send_cmd = trf7970a_send_cmd,
.tg_configure_hw = trf7970a_tg_configure_hw,
@@ -1974,7 +1975,7 @@ static void trf7970a_shutdown(struct trf7970a *trf)
trf7970a_power_down(trf);
}
-static int trf7970a_get_autosuspend_delay(struct device_node *np)
+static int trf7970a_get_autosuspend_delay(const struct device_node *np)
{
int autosuspend_delay, ret;
@@ -1987,7 +1988,7 @@ static int trf7970a_get_autosuspend_delay(struct device_node *np)
static int trf7970a_probe(struct spi_device *spi)
{
- struct device_node *np = spi->dev.of_node;
+ const struct device_node *np = spi->dev.of_node;
struct trf7970a *trf;
int uvolts, autosuspend_delay, ret;
u32 clk_freq = TRF7970A_13MHZ_CLOCK_FREQUENCY;
diff --git a/drivers/nfc/virtual_ncidev.c b/drivers/nfc/virtual_ncidev.c
index f73ee0bf3593..221fa3bb8705 100644
--- a/drivers/nfc/virtual_ncidev.c
+++ b/drivers/nfc/virtual_ncidev.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
+#include <linux/wait.h>
#include <net/nfc/nci_core.h>
enum virtual_ncidev_mode {
@@ -27,6 +28,7 @@ enum virtual_ncidev_mode {
NFC_PROTO_ISO15693_MASK)
static enum virtual_ncidev_mode state;
+static DECLARE_WAIT_QUEUE_HEAD(wq);
static struct miscdevice miscdev;
static struct sk_buff *send_buff;
static struct nci_dev *ndev;
@@ -61,11 +63,12 @@ static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
}
send_buff = skb_copy(skb, GFP_KERNEL);
mutex_unlock(&nci_mutex);
+ wake_up_interruptible(&wq);
return 0;
}
-static struct nci_ops virtual_nci_ops = {
+static const struct nci_ops virtual_nci_ops = {
.open = virtual_nci_open,
.close = virtual_nci_close,
.send = virtual_nci_send
@@ -77,9 +80,11 @@ static ssize_t virtual_ncidev_read(struct file *file, char __user *buf,
size_t actual_len;
mutex_lock(&nci_mutex);
- if (!send_buff) {
+ while (!send_buff) {
mutex_unlock(&nci_mutex);
- return 0;
+ if (wait_event_interruptible(wq, send_buff))
+ return -EFAULT;
+ mutex_lock(&nci_mutex);
}
actual_len = min_t(size_t, count, send_buff->len);
@@ -170,7 +175,7 @@ static int virtual_ncidev_close(struct inode *inode, struct file *file)
static long virtual_ncidev_ioctl(struct file *flip, unsigned int cmd,
unsigned long arg)
{
- struct nfc_dev *nfc_dev = ndev->nfc_dev;
+ const struct nfc_dev *nfc_dev = ndev->nfc_dev;
void __user *p = (void __user *)arg;
if (cmd != IOCTL_GET_NCIDEV_IDX)
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 2403b71b601e..745478213ff2 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -2527,7 +2527,7 @@ static void deactivate_labels(void *region)
static int init_active_labels(struct nd_region *nd_region)
{
- int i;
+ int i, rc = 0;
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
@@ -2546,13 +2546,14 @@ static int init_active_labels(struct nd_region *nd_region)
else if (test_bit(NDD_LABELING, &nvdimm->flags))
/* fail, labels needed to disambiguate dpa */;
else
- return 0;
+ continue;
dev_err(&nd_region->dev, "%s: is %s, failing probe\n",
dev_name(&nd_mapping->nvdimm->dev),
test_bit(NDD_LOCKED, &nvdimm->flags)
? "locked" : "disabled");
- return -ENXIO;
+ rc = -ENXIO;
+ goto out;
}
nd_mapping->ndd = ndd;
atomic_inc(&nvdimm->busy);
@@ -2586,13 +2587,17 @@ static int init_active_labels(struct nd_region *nd_region)
break;
}
- if (i < nd_region->ndr_mappings) {
+ if (i < nd_region->ndr_mappings)
+ rc = -ENOMEM;
+
+out:
+ if (rc) {
deactivate_labels(nd_region);
- return -ENOMEM;
+ return rc;
}
return devm_add_action_or_reset(&nd_region->dev, deactivate_labels,
- nd_region);
+ nd_region);
}
int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index c3f3d77f1aac..dc0450ca23a3 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -33,12 +33,12 @@ config NVME_HWMON
in the system.
config NVME_FABRICS
+ select NVME_CORE
tristate
config NVME_RDMA
tristate "NVM Express over Fabrics RDMA host driver"
depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK
- select NVME_CORE
select NVME_FABRICS
select SG_POOL
help
@@ -55,7 +55,6 @@ config NVME_FC
tristate "NVM Express over Fabrics FC host driver"
depends on BLOCK
depends on HAS_DMA
- select NVME_CORE
select NVME_FABRICS
select SG_POOL
help
@@ -72,7 +71,6 @@ config NVME_TCP
tristate "NVM Express over Fabrics TCP host driver"
depends on INET
depends on BLOCK
- select NVME_CORE
select NVME_FABRICS
select CRYPTO
select CRYPTO_CRC32C
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index cbc509784b2e..dfaacd472e5d 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -12,7 +12,6 @@ obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
nvme-core-y := core.o ioctl.o
nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
-nvme-core-$(CONFIG_NVM) += lightnvm.o
nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index dfd9dec0c1f6..8679a108f571 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -587,9 +587,6 @@ static void nvme_free_ns(struct kref *kref)
{
struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
- if (ns->ndev)
- nvme_nvm_unregister(ns);
-
put_disk(ns->disk);
nvme_put_ns_head(ns->head);
nvme_put_ctrl(ns->ctrl);
@@ -968,12 +965,11 @@ void nvme_cleanup_cmd(struct request *req)
{
if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
- struct page *page = req->special_vec.bv_page;
- if (page == ctrl->discard_page)
+ if (req->special_vec.bv_page == ctrl->discard_page)
clear_bit_unlock(0, &ctrl->discard_page_busy);
else
- kfree(page_address(page) + req->special_vec.bv_offset);
+ kfree(bvec_virt(&req->special_vec));
}
}
EXPORT_SYMBOL_GPL(nvme_cleanup_cmd);
@@ -1029,7 +1025,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
return BLK_STS_IOERR;
}
- cmd->common.command_id = req->tag;
+ nvme_req(req)->genctr++;
+ cmd->common.command_id = nvme_cid(req);
trace_nvme_setup_cmd(req, cmd);
return ret;
}
@@ -1822,7 +1819,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
static inline bool nvme_first_scan(struct gendisk *disk)
{
/* nvme_alloc_ns() scans the disk prior to adding it */
- return !(disk->flags & GENHD_FL_UP);
+ return !disk_live(disk);
}
static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
@@ -1890,7 +1887,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
nvme_update_disk_info(ns->head->disk, ns, id);
blk_stack_limits(&ns->head->disk->queue->limits,
&ns->queue->limits, 0);
- blk_queue_update_readahead(ns->head->disk->queue);
+ disk_update_readahead(ns->head->disk);
blk_mq_unfreeze_queue(ns->head->disk->queue);
}
return 0;
@@ -3218,9 +3215,6 @@ static const struct attribute_group nvme_ns_id_attr_group = {
const struct attribute_group *nvme_ns_id_attr_groups[] = {
&nvme_ns_id_attr_group,
-#ifdef CONFIG_NVM
- &nvme_nvm_attr_group,
-#endif
NULL,
};
@@ -3729,9 +3723,14 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
if (!ns)
goto out_free_id;
- ns->queue = blk_mq_init_queue(ctrl->tagset);
- if (IS_ERR(ns->queue))
+ disk = blk_mq_alloc_disk(ctrl->tagset, ns);
+ if (IS_ERR(disk))
goto out_free_ns;
+ disk->fops = &nvme_bdev_ops;
+ disk->private_data = ns;
+
+ ns->disk = disk;
+ ns->queue = disk->queue;
if (ctrl->opts && ctrl->opts->data_digest)
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue);
@@ -3740,20 +3739,12 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
if (ctrl->ops->flags & NVME_F_PCI_P2PDMA)
blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue);
- ns->queue->queuedata = ns;
ns->ctrl = ctrl;
kref_init(&ns->kref);
if (nvme_init_ns_head(ns, nsid, ids, id->nmic & NVME_NS_NMIC_SHARED))
- goto out_free_queue;
+ goto out_cleanup_disk;
- disk = alloc_disk_node(0, node);
- if (!disk)
- goto out_unlink_ns;
-
- disk->fops = &nvme_bdev_ops;
- disk->private_data = ns;
- disk->queue = ns->queue;
/*
* Without the multipath code enabled, multiple controller per
* subsystems are visible as devices and thus we cannot use the
@@ -3762,17 +3753,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
if (!nvme_mpath_set_disk_name(ns, disk->disk_name, &disk->flags))
sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance,
ns->head->instance);
- ns->disk = disk;
if (nvme_update_ns_info(ns, id))
- goto out_put_disk;
-
- if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
- if (nvme_nvm_register(ns, disk->disk_name, node)) {
- dev_warn(ctrl->device, "LightNVM init failure\n");
- goto out_put_disk;
- }
- }
+ goto out_unlink_ns;
down_write(&ctrl->namespaces_rwsem);
list_add_tail(&ns->list, &ctrl->namespaces);
@@ -3789,10 +3772,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
kfree(id);
return;
- out_put_disk:
- /* prevent double queue cleanup */
- ns->disk->queue = NULL;
- put_disk(ns->disk);
+
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
@@ -3800,8 +3780,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
list_del_init(&ns->head->entry);
mutex_unlock(&ctrl->subsys->lock);
nvme_put_ns_head(ns->head);
- out_free_queue:
- blk_cleanup_queue(ns->queue);
+ out_cleanup_disk:
+ blk_cleanup_disk(disk);
out_free_ns:
kfree(ns);
out_free_id:
@@ -3826,14 +3806,12 @@ static void nvme_ns_remove(struct nvme_ns *ns)
nvme_mpath_clear_current_path(ns);
synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
- if (ns->disk->flags & GENHD_FL_UP) {
- if (!nvme_ns_head_multipath(ns->head))
- nvme_cdev_del(&ns->cdev, &ns->cdev_device);
- del_gendisk(ns->disk);
- blk_cleanup_queue(ns->queue);
- if (blk_get_integrity(ns->disk))
- blk_integrity_unregister(ns->disk);
- }
+ if (!nvme_ns_head_multipath(ns->head))
+ nvme_cdev_del(&ns->cdev, &ns->cdev_device);
+ del_gendisk(ns->disk);
+ blk_cleanup_queue(ns->queue);
+ if (blk_get_integrity(ns->disk))
+ blk_integrity_unregister(ns->disk);
down_write(&ns->ctrl->namespaces_rwsem);
list_del_init(&ns->list);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index a5469fd9d4c3..668c6bb7a567 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -719,7 +719,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -EINVAL;
goto out;
}
- nvmf_host_put(opts->host);
opts->host = nvmf_host_add(p);
kfree(p);
if (!opts->host) {
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 305ddd415e45..22314962842d 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -342,9 +342,7 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
case NVME_IOCTL_IO64_CMD:
return nvme_user_cmd64(ns->ctrl, ns, argp);
default:
- if (!ns->ndev)
- return -ENOTTY;
- return nvme_nvm_ioctl(ns, cmd, argp);
+ return -ENOTTY;
}
}
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
deleted file mode 100644
index e9d9ad47f70f..000000000000
--- a/drivers/nvme/host/lightnvm.c
+++ /dev/null
@@ -1,1274 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * nvme-lightnvm.c - LightNVM NVMe device
- *
- * Copyright (C) 2014-2015 IT University of Copenhagen
- * Initial release: Matias Bjorling <mb@lightnvm.io>
- */
-
-#include "nvme.h"
-
-#include <linux/nvme.h>
-#include <linux/bitops.h>
-#include <linux/lightnvm.h>
-#include <linux/vmalloc.h>
-#include <linux/sched/sysctl.h>
-#include <uapi/linux/lightnvm.h>
-
-enum nvme_nvm_admin_opcode {
- nvme_nvm_admin_identity = 0xe2,
- nvme_nvm_admin_get_bb_tbl = 0xf2,
- nvme_nvm_admin_set_bb_tbl = 0xf1,
-};
-
-enum nvme_nvm_log_page {
- NVME_NVM_LOG_REPORT_CHUNK = 0xca,
-};
-
-struct nvme_nvm_ph_rw {
- __u8 opcode;
- __u8 flags;
- __u16 command_id;
- __le32 nsid;
- __u64 rsvd2;
- __le64 metadata;
- __le64 prp1;
- __le64 prp2;
- __le64 spba;
- __le16 length;
- __le16 control;
- __le32 dsmgmt;
- __le64 resv;
-};
-
-struct nvme_nvm_erase_blk {
- __u8 opcode;
- __u8 flags;
- __u16 command_id;
- __le32 nsid;
- __u64 rsvd[2];
- __le64 prp1;
- __le64 prp2;
- __le64 spba;
- __le16 length;
- __le16 control;
- __le32 dsmgmt;
- __le64 resv;
-};
-
-struct nvme_nvm_identity {
- __u8 opcode;
- __u8 flags;
- __u16 command_id;
- __le32 nsid;
- __u64 rsvd[2];
- __le64 prp1;
- __le64 prp2;
- __u32 rsvd11[6];
-};
-
-struct nvme_nvm_getbbtbl {
- __u8 opcode;
- __u8 flags;
- __u16 command_id;
- __le32 nsid;
- __u64 rsvd[2];
- __le64 prp1;
- __le64 prp2;
- __le64 spba;
- __u32 rsvd4[4];
-};
-
-struct nvme_nvm_setbbtbl {
- __u8 opcode;
- __u8 flags;
- __u16 command_id;
- __le32 nsid;
- __le64 rsvd[2];
- __le64 prp1;
- __le64 prp2;
- __le64 spba;
- __le16 nlb;
- __u8 value;
- __u8 rsvd3;
- __u32 rsvd4[3];
-};
-
-struct nvme_nvm_command {
- union {
- struct nvme_common_command common;
- struct nvme_nvm_ph_rw ph_rw;
- struct nvme_nvm_erase_blk erase;
- struct nvme_nvm_identity identity;
- struct nvme_nvm_getbbtbl get_bb;
- struct nvme_nvm_setbbtbl set_bb;
- };
-};
-
-struct nvme_nvm_id12_grp {
- __u8 mtype;
- __u8 fmtype;
- __le16 res16;
- __u8 num_ch;
- __u8 num_lun;
- __u8 num_pln;
- __u8 rsvd1;
- __le16 num_chk;
- __le16 num_pg;
- __le16 fpg_sz;
- __le16 csecs;
- __le16 sos;
- __le16 rsvd2;
- __le32 trdt;
- __le32 trdm;
- __le32 tprt;
- __le32 tprm;
- __le32 tbet;
- __le32 tbem;
- __le32 mpos;
- __le32 mccap;
- __le16 cpar;
- __u8 reserved[906];
-} __packed;
-
-struct nvme_nvm_id12_addrf {
- __u8 ch_offset;
- __u8 ch_len;
- __u8 lun_offset;
- __u8 lun_len;
- __u8 pln_offset;
- __u8 pln_len;
- __u8 blk_offset;
- __u8 blk_len;
- __u8 pg_offset;
- __u8 pg_len;
- __u8 sec_offset;
- __u8 sec_len;
- __u8 res[4];
-} __packed;
-
-struct nvme_nvm_id12 {
- __u8 ver_id;
- __u8 vmnt;
- __u8 cgrps;
- __u8 res;
- __le32 cap;
- __le32 dom;
- struct nvme_nvm_id12_addrf ppaf;
- __u8 resv[228];
- struct nvme_nvm_id12_grp grp;
- __u8 resv2[2880];
-} __packed;
-
-struct nvme_nvm_bb_tbl {
- __u8 tblid[4];
- __le16 verid;
- __le16 revid;
- __le32 rvsd1;
- __le32 tblks;
- __le32 tfact;
- __le32 tgrown;
- __le32 tdresv;
- __le32 thresv;
- __le32 rsvd2[8];
- __u8 blk[];
-};
-
-struct nvme_nvm_id20_addrf {
- __u8 grp_len;
- __u8 pu_len;
- __u8 chk_len;
- __u8 lba_len;
- __u8 resv[4];
-};
-
-struct nvme_nvm_id20 {
- __u8 mjr;
- __u8 mnr;
- __u8 resv[6];
-
- struct nvme_nvm_id20_addrf lbaf;
-
- __le32 mccap;
- __u8 resv2[12];
-
- __u8 wit;
- __u8 resv3[31];
-
- /* Geometry */
- __le16 num_grp;
- __le16 num_pu;
- __le32 num_chk;
- __le32 clba;
- __u8 resv4[52];
-
- /* Write data requirements */
- __le32 ws_min;
- __le32 ws_opt;
- __le32 mw_cunits;
- __le32 maxoc;
- __le32 maxocpu;
- __u8 resv5[44];
-
- /* Performance related metrics */
- __le32 trdt;
- __le32 trdm;
- __le32 twrt;
- __le32 twrm;
- __le32 tcrst;
- __le32 tcrsm;
- __u8 resv6[40];
-
- /* Reserved area */
- __u8 resv7[2816];
-
- /* Vendor specific */
- __u8 vs[1024];
-};
-
-struct nvme_nvm_chk_meta {
- __u8 state;
- __u8 type;
- __u8 wi;
- __u8 rsvd[5];
- __le64 slba;
- __le64 cnlb;
- __le64 wp;
-};
-
-/*
- * Check we didn't inadvertently grow the command struct
- */
-static inline void _nvme_nvm_check_size(void)
-{
- BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_grp) != 960);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_addrf) != 16);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_id12) != NVME_IDENTIFY_DATA_SIZE);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_id20_addrf) != 8);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_id20) != NVME_IDENTIFY_DATA_SIZE);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != 32);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) !=
- sizeof(struct nvm_chk_meta));
-}
-
-static void nvme_nvm_set_addr_12(struct nvm_addrf_12 *dst,
- struct nvme_nvm_id12_addrf *src)
-{
- dst->ch_len = src->ch_len;
- dst->lun_len = src->lun_len;
- dst->blk_len = src->blk_len;
- dst->pg_len = src->pg_len;
- dst->pln_len = src->pln_len;
- dst->sec_len = src->sec_len;
-
- dst->ch_offset = src->ch_offset;
- dst->lun_offset = src->lun_offset;
- dst->blk_offset = src->blk_offset;
- dst->pg_offset = src->pg_offset;
- dst->pln_offset = src->pln_offset;
- dst->sec_offset = src->sec_offset;
-
- dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
- dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
- dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset;
- dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset;
- dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset;
- dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset;
-}
-
-static int nvme_nvm_setup_12(struct nvme_nvm_id12 *id,
- struct nvm_geo *geo)
-{
- struct nvme_nvm_id12_grp *src;
- int sec_per_pg, sec_per_pl, pg_per_blk;
-
- if (id->cgrps != 1)
- return -EINVAL;
-
- src = &id->grp;
-
- if (src->mtype != 0) {
- pr_err("nvm: memory type not supported\n");
- return -EINVAL;
- }
-
- /* 1.2 spec. only reports a single version id - unfold */
- geo->major_ver_id = id->ver_id;
- geo->minor_ver_id = 2;
-
- /* Set compacted version for upper layers */
- geo->version = NVM_OCSSD_SPEC_12;
-
- geo->num_ch = src->num_ch;
- geo->num_lun = src->num_lun;
- geo->all_luns = geo->num_ch * geo->num_lun;
-
- geo->num_chk = le16_to_cpu(src->num_chk);
-
- geo->csecs = le16_to_cpu(src->csecs);
- geo->sos = le16_to_cpu(src->sos);
-
- pg_per_blk = le16_to_cpu(src->num_pg);
- sec_per_pg = le16_to_cpu(src->fpg_sz) / geo->csecs;
- sec_per_pl = sec_per_pg * src->num_pln;
- geo->clba = sec_per_pl * pg_per_blk;
-
- geo->all_chunks = geo->all_luns * geo->num_chk;
- geo->total_secs = geo->clba * geo->all_chunks;
-
- geo->ws_min = sec_per_pg;
- geo->ws_opt = sec_per_pg;
- geo->mw_cunits = geo->ws_opt << 3; /* default to MLC safe values */
-
- /* Do not impose values for maximum number of open blocks as it is
- * unspecified in 1.2. Users of 1.2 must be aware of this and eventually
- * specify these values through a quirk if restrictions apply.
- */
- geo->maxoc = geo->all_luns * geo->num_chk;
- geo->maxocpu = geo->num_chk;
-
- geo->mccap = le32_to_cpu(src->mccap);
-
- geo->trdt = le32_to_cpu(src->trdt);
- geo->trdm = le32_to_cpu(src->trdm);
- geo->tprt = le32_to_cpu(src->tprt);
- geo->tprm = le32_to_cpu(src->tprm);
- geo->tbet = le32_to_cpu(src->tbet);
- geo->tbem = le32_to_cpu(src->tbem);
-
- /* 1.2 compatibility */
- geo->vmnt = id->vmnt;
- geo->cap = le32_to_cpu(id->cap);
- geo->dom = le32_to_cpu(id->dom);
-
- geo->mtype = src->mtype;
- geo->fmtype = src->fmtype;
-
- geo->cpar = le16_to_cpu(src->cpar);
- geo->mpos = le32_to_cpu(src->mpos);
-
- geo->pln_mode = NVM_PLANE_SINGLE;
-
- if (geo->mpos & 0x020202) {
- geo->pln_mode = NVM_PLANE_DOUBLE;
- geo->ws_opt <<= 1;
- } else if (geo->mpos & 0x040404) {
- geo->pln_mode = NVM_PLANE_QUAD;
- geo->ws_opt <<= 2;
- }
-
- geo->num_pln = src->num_pln;
- geo->num_pg = le16_to_cpu(src->num_pg);
- geo->fpg_sz = le16_to_cpu(src->fpg_sz);
-
- nvme_nvm_set_addr_12((struct nvm_addrf_12 *)&geo->addrf, &id->ppaf);
-
- return 0;
-}
-
-static void nvme_nvm_set_addr_20(struct nvm_addrf *dst,
- struct nvme_nvm_id20_addrf *src)
-{
- dst->ch_len = src->grp_len;
- dst->lun_len = src->pu_len;
- dst->chk_len = src->chk_len;
- dst->sec_len = src->lba_len;
-
- dst->sec_offset = 0;
- dst->chk_offset = dst->sec_len;
- dst->lun_offset = dst->chk_offset + dst->chk_len;
- dst->ch_offset = dst->lun_offset + dst->lun_len;
-
- dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
- dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
- dst->chk_mask = ((1ULL << dst->chk_len) - 1) << dst->chk_offset;
- dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset;
-}
-
-static int nvme_nvm_setup_20(struct nvme_nvm_id20 *id,
- struct nvm_geo *geo)
-{
- geo->major_ver_id = id->mjr;
- geo->minor_ver_id = id->mnr;
-
- /* Set compacted version for upper layers */
- geo->version = NVM_OCSSD_SPEC_20;
-
- geo->num_ch = le16_to_cpu(id->num_grp);
- geo->num_lun = le16_to_cpu(id->num_pu);
- geo->all_luns = geo->num_ch * geo->num_lun;
-
- geo->num_chk = le32_to_cpu(id->num_chk);
- geo->clba = le32_to_cpu(id->clba);
-
- geo->all_chunks = geo->all_luns * geo->num_chk;
- geo->total_secs = geo->clba * geo->all_chunks;
-
- geo->ws_min = le32_to_cpu(id->ws_min);
- geo->ws_opt = le32_to_cpu(id->ws_opt);
- geo->mw_cunits = le32_to_cpu(id->mw_cunits);
- geo->maxoc = le32_to_cpu(id->maxoc);
- geo->maxocpu = le32_to_cpu(id->maxocpu);
-
- geo->trdt = le32_to_cpu(id->trdt);
- geo->trdm = le32_to_cpu(id->trdm);
- geo->tprt = le32_to_cpu(id->twrt);
- geo->tprm = le32_to_cpu(id->twrm);
- geo->tbet = le32_to_cpu(id->tcrst);
- geo->tbem = le32_to_cpu(id->tcrsm);
-
- nvme_nvm_set_addr_20(&geo->addrf, &id->lbaf);
-
- return 0;
-}
-
-static int nvme_nvm_identity(struct nvm_dev *nvmdev)
-{
- struct nvme_ns *ns = nvmdev->q->queuedata;
- struct nvme_nvm_id12 *id;
- struct nvme_nvm_command c = {};
- int ret;
-
- c.identity.opcode = nvme_nvm_admin_identity;
- c.identity.nsid = cpu_to_le32(ns->head->ns_id);
-
- id = kmalloc(sizeof(struct nvme_nvm_id12), GFP_KERNEL);
- if (!id)
- return -ENOMEM;
-
- ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
- id, sizeof(struct nvme_nvm_id12));
- if (ret) {
- ret = -EIO;
- goto out;
- }
-
- /*
- * The 1.2 and 2.0 specifications share the first byte in their geometry
- * command to make it possible to know what version a device implements.
- */
- switch (id->ver_id) {
- case 1:
- ret = nvme_nvm_setup_12(id, &nvmdev->geo);
- break;
- case 2:
- ret = nvme_nvm_setup_20((struct nvme_nvm_id20 *)id,
- &nvmdev->geo);
- break;
- default:
- dev_err(ns->ctrl->device, "OCSSD revision not supported (%d)\n",
- id->ver_id);
- ret = -EINVAL;
- }
-
-out:
- kfree(id);
- return ret;
-}
-
-static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
- u8 *blks)
-{
- struct request_queue *q = nvmdev->q;
- struct nvm_geo *geo = &nvmdev->geo;
- struct nvme_ns *ns = q->queuedata;
- struct nvme_ctrl *ctrl = ns->ctrl;
- struct nvme_nvm_command c = {};
- struct nvme_nvm_bb_tbl *bb_tbl;
- int nr_blks = geo->num_chk * geo->num_pln;
- int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks;
- int ret = 0;
-
- c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl;
- c.get_bb.nsid = cpu_to_le32(ns->head->ns_id);
- c.get_bb.spba = cpu_to_le64(ppa.ppa);
-
- bb_tbl = kzalloc(tblsz, GFP_KERNEL);
- if (!bb_tbl)
- return -ENOMEM;
-
- ret = nvme_submit_sync_cmd(ctrl->admin_q, (struct nvme_command *)&c,
- bb_tbl, tblsz);
- if (ret) {
- dev_err(ctrl->device, "get bad block table failed (%d)\n", ret);
- ret = -EIO;
- goto out;
- }
-
- if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' ||
- bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') {
- dev_err(ctrl->device, "bbt format mismatch\n");
- ret = -EINVAL;
- goto out;
- }
-
- if (le16_to_cpu(bb_tbl->verid) != 1) {
- ret = -EINVAL;
- dev_err(ctrl->device, "bbt version not supported\n");
- goto out;
- }
-
- if (le32_to_cpu(bb_tbl->tblks) != nr_blks) {
- ret = -EINVAL;
- dev_err(ctrl->device,
- "bbt unsuspected blocks returned (%u!=%u)",
- le32_to_cpu(bb_tbl->tblks), nr_blks);
- goto out;
- }
-
- memcpy(blks, bb_tbl->blk, geo->num_chk * geo->num_pln);
-out:
- kfree(bb_tbl);
- return ret;
-}
-
-static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas,
- int nr_ppas, int type)
-{
- struct nvme_ns *ns = nvmdev->q->queuedata;
- struct nvme_nvm_command c = {};
- int ret = 0;
-
- c.set_bb.opcode = nvme_nvm_admin_set_bb_tbl;
- c.set_bb.nsid = cpu_to_le32(ns->head->ns_id);
- c.set_bb.spba = cpu_to_le64(ppas->ppa);
- c.set_bb.nlb = cpu_to_le16(nr_ppas - 1);
- c.set_bb.value = type;
-
- ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
- NULL, 0);
- if (ret)
- dev_err(ns->ctrl->device, "set bad block table failed (%d)\n",
- ret);
- return ret;
-}
-
-/*
- * Expect the lba in device format
- */
-static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
- sector_t slba, int nchks,
- struct nvm_chk_meta *meta)
-{
- struct nvm_geo *geo = &ndev->geo;
- struct nvme_ns *ns = ndev->q->queuedata;
- struct nvme_ctrl *ctrl = ns->ctrl;
- struct nvme_nvm_chk_meta *dev_meta, *dev_meta_off;
- struct ppa_addr ppa;
- size_t left = nchks * sizeof(struct nvme_nvm_chk_meta);
- size_t log_pos, offset, len;
- int i, max_len;
- int ret = 0;
-
- /*
- * limit requests to maximum 256K to avoid issuing arbitrary large
- * requests when the device does not specific a maximum transfer size.
- */
- max_len = min_t(unsigned int, ctrl->max_hw_sectors << 9, 256 * 1024);
-
- dev_meta = kmalloc(max_len, GFP_KERNEL);
- if (!dev_meta)
- return -ENOMEM;
-
- /* Normalize lba address space to obtain log offset */
- ppa.ppa = slba;
- ppa = dev_to_generic_addr(ndev, ppa);
-
- log_pos = ppa.m.chk;
- log_pos += ppa.m.pu * geo->num_chk;
- log_pos += ppa.m.grp * geo->num_lun * geo->num_chk;
-
- offset = log_pos * sizeof(struct nvme_nvm_chk_meta);
-
- while (left) {
- len = min_t(unsigned int, left, max_len);
-
- memset(dev_meta, 0, max_len);
- dev_meta_off = dev_meta;
-
- ret = nvme_get_log(ctrl, ns->head->ns_id,
- NVME_NVM_LOG_REPORT_CHUNK, 0, NVME_CSI_NVM,
- dev_meta, len, offset);
- if (ret) {
- dev_err(ctrl->device, "Get REPORT CHUNK log error\n");
- break;
- }
-
- for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) {
- meta->state = dev_meta_off->state;
- meta->type = dev_meta_off->type;
- meta->wi = dev_meta_off->wi;
- meta->slba = le64_to_cpu(dev_meta_off->slba);
- meta->cnlb = le64_to_cpu(dev_meta_off->cnlb);
- meta->wp = le64_to_cpu(dev_meta_off->wp);
-
- meta++;
- dev_meta_off++;
- }
-
- offset += len;
- left -= len;
- }
-
- kfree(dev_meta);
-
- return ret;
-}
-
-static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
- struct nvme_nvm_command *c)
-{
- c->ph_rw.opcode = rqd->opcode;
- c->ph_rw.nsid = cpu_to_le32(ns->head->ns_id);
- c->ph_rw.spba = cpu_to_le64(rqd->ppa_addr.ppa);
- c->ph_rw.metadata = cpu_to_le64(rqd->dma_meta_list);
- c->ph_rw.control = cpu_to_le16(rqd->flags);
- c->ph_rw.length = cpu_to_le16(rqd->nr_ppas - 1);
-}
-
-static void nvme_nvm_end_io(struct request *rq, blk_status_t status)
-{
- struct nvm_rq *rqd = rq->end_io_data;
-
- rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64);
- rqd->error = nvme_req(rq)->status;
- nvm_end_io(rqd);
-
- kfree(nvme_req(rq)->cmd);
- blk_mq_free_request(rq);
-}
-
-static struct request *nvme_nvm_alloc_request(struct request_queue *q,
- struct nvm_rq *rqd,
- struct nvme_nvm_command *cmd)
-{
- struct nvme_ns *ns = q->queuedata;
- struct request *rq;
-
- nvme_nvm_rqtocmd(rqd, ns, cmd);
-
- rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0);
- if (IS_ERR(rq))
- return rq;
-
- rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
-
- if (rqd->bio)
- blk_rq_append_bio(rq, rqd->bio);
- else
- rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
-
- return rq;
-}
-
-static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd,
- void *buf)
-{
- struct nvm_geo *geo = &dev->geo;
- struct request_queue *q = dev->q;
- struct nvme_nvm_command *cmd;
- struct request *rq;
- int ret;
-
- cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- rq = nvme_nvm_alloc_request(q, rqd, cmd);
- if (IS_ERR(rq)) {
- ret = PTR_ERR(rq);
- goto err_free_cmd;
- }
-
- if (buf) {
- ret = blk_rq_map_kern(q, rq, buf, geo->csecs * rqd->nr_ppas,
- GFP_KERNEL);
- if (ret)
- goto err_free_cmd;
- }
-
- rq->end_io_data = rqd;
-
- blk_execute_rq_nowait(NULL, rq, 0, nvme_nvm_end_io);
-
- return 0;
-
-err_free_cmd:
- kfree(cmd);
- return ret;
-}
-
-static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name,
- int size)
-{
- struct nvme_ns *ns = nvmdev->q->queuedata;
-
- return dma_pool_create(name, ns->ctrl->dev, size, PAGE_SIZE, 0);
-}
-
-static void nvme_nvm_destroy_dma_pool(void *pool)
-{
- struct dma_pool *dma_pool = pool;
-
- dma_pool_destroy(dma_pool);
-}
-
-static void *nvme_nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
- gfp_t mem_flags, dma_addr_t *dma_handler)
-{
- return dma_pool_alloc(pool, mem_flags, dma_handler);
-}
-
-static void nvme_nvm_dev_dma_free(void *pool, void *addr,
- dma_addr_t dma_handler)
-{
- dma_pool_free(pool, addr, dma_handler);
-}
-
-static struct nvm_dev_ops nvme_nvm_dev_ops = {
- .identity = nvme_nvm_identity,
-
- .get_bb_tbl = nvme_nvm_get_bb_tbl,
- .set_bb_tbl = nvme_nvm_set_bb_tbl,
-
- .get_chk_meta = nvme_nvm_get_chk_meta,
-
- .submit_io = nvme_nvm_submit_io,
-
- .create_dma_pool = nvme_nvm_create_dma_pool,
- .destroy_dma_pool = nvme_nvm_destroy_dma_pool,
- .dev_dma_alloc = nvme_nvm_dev_dma_alloc,
- .dev_dma_free = nvme_nvm_dev_dma_free,
-};
-
-static int nvme_nvm_submit_user_cmd(struct request_queue *q,
- struct nvme_ns *ns,
- struct nvme_nvm_command *vcmd,
- void __user *ubuf, unsigned int bufflen,
- void __user *meta_buf, unsigned int meta_len,
- void __user *ppa_buf, unsigned int ppa_len,
- u32 *result, u64 *status, unsigned int timeout)
-{
- bool write = nvme_is_write((struct nvme_command *)vcmd);
- struct nvm_dev *dev = ns->ndev;
- struct request *rq;
- struct bio *bio = NULL;
- __le64 *ppa_list = NULL;
- dma_addr_t ppa_dma;
- __le64 *metadata = NULL;
- dma_addr_t metadata_dma;
- DECLARE_COMPLETION_ONSTACK(wait);
- int ret = 0;
-
- rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0);
- if (IS_ERR(rq)) {
- ret = -ENOMEM;
- goto err_cmd;
- }
-
- if (timeout)
- rq->timeout = timeout;
-
- if (ppa_buf && ppa_len) {
- ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma);
- if (!ppa_list) {
- ret = -ENOMEM;
- goto err_rq;
- }
- if (copy_from_user(ppa_list, (void __user *)ppa_buf,
- sizeof(u64) * (ppa_len + 1))) {
- ret = -EFAULT;
- goto err_ppa;
- }
- vcmd->ph_rw.spba = cpu_to_le64(ppa_dma);
- } else {
- vcmd->ph_rw.spba = cpu_to_le64((uintptr_t)ppa_buf);
- }
-
- if (ubuf && bufflen) {
- ret = blk_rq_map_user(q, rq, NULL, ubuf, bufflen, GFP_KERNEL);
- if (ret)
- goto err_ppa;
- bio = rq->bio;
-
- if (meta_buf && meta_len) {
- metadata = dma_pool_alloc(dev->dma_pool, GFP_KERNEL,
- &metadata_dma);
- if (!metadata) {
- ret = -ENOMEM;
- goto err_map;
- }
-
- if (write) {
- if (copy_from_user(metadata,
- (void __user *)meta_buf,
- meta_len)) {
- ret = -EFAULT;
- goto err_meta;
- }
- }
- vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma);
- }
-
- bio_set_dev(bio, ns->disk->part0);
- }
-
- blk_execute_rq(NULL, rq, 0);
-
- if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
- ret = -EINTR;
- else if (nvme_req(rq)->status & 0x7ff)
- ret = -EIO;
- if (result)
- *result = nvme_req(rq)->status & 0x7ff;
- if (status)
- *status = le64_to_cpu(nvme_req(rq)->result.u64);
-
- if (metadata && !ret && !write) {
- if (copy_to_user(meta_buf, (void *)metadata, meta_len))
- ret = -EFAULT;
- }
-err_meta:
- if (meta_buf && meta_len)
- dma_pool_free(dev->dma_pool, metadata, metadata_dma);
-err_map:
- if (bio)
- blk_rq_unmap_user(bio);
-err_ppa:
- if (ppa_buf && ppa_len)
- dma_pool_free(dev->dma_pool, ppa_list, ppa_dma);
-err_rq:
- blk_mq_free_request(rq);
-err_cmd:
- return ret;
-}
-
-static int nvme_nvm_submit_vio(struct nvme_ns *ns,
- struct nvm_user_vio __user *uvio)
-{
- struct nvm_user_vio vio;
- struct nvme_nvm_command c;
- unsigned int length;
- int ret;
-
- if (copy_from_user(&vio, uvio, sizeof(vio)))
- return -EFAULT;
- if (vio.flags)
- return -EINVAL;
-
- memset(&c, 0, sizeof(c));
- c.ph_rw.opcode = vio.opcode;
- c.ph_rw.nsid = cpu_to_le32(ns->head->ns_id);
- c.ph_rw.control = cpu_to_le16(vio.control);
- c.ph_rw.length = cpu_to_le16(vio.nppas);
-
- length = (vio.nppas + 1) << ns->lba_shift;
-
- ret = nvme_nvm_submit_user_cmd(ns->queue, ns, &c,
- (void __user *)(uintptr_t)vio.addr, length,
- (void __user *)(uintptr_t)vio.metadata,
- vio.metadata_len,
- (void __user *)(uintptr_t)vio.ppa_list, vio.nppas,
- &vio.result, &vio.status, 0);
-
- if (ret && copy_to_user(uvio, &vio, sizeof(vio)))
- return -EFAULT;
-
- return ret;
-}
-
-static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin,
- struct nvm_passthru_vio __user *uvcmd)
-{
- struct nvm_passthru_vio vcmd;
- struct nvme_nvm_command c;
- struct request_queue *q;
- unsigned int timeout = 0;
- int ret;
-
- if (copy_from_user(&vcmd, uvcmd, sizeof(vcmd)))
- return -EFAULT;
- if ((vcmd.opcode != 0xF2) && (!capable(CAP_SYS_ADMIN)))
- return -EACCES;
- if (vcmd.flags)
- return -EINVAL;
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = vcmd.opcode;
- c.common.nsid = cpu_to_le32(ns->head->ns_id);
- c.common.cdw2[0] = cpu_to_le32(vcmd.cdw2);
- c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3);
- /* cdw11-12 */
- c.ph_rw.length = cpu_to_le16(vcmd.nppas);
- c.ph_rw.control = cpu_to_le16(vcmd.control);
- c.common.cdw13 = cpu_to_le32(vcmd.cdw13);
- c.common.cdw14 = cpu_to_le32(vcmd.cdw14);
- c.common.cdw15 = cpu_to_le32(vcmd.cdw15);
-
- if (vcmd.timeout_ms)
- timeout = msecs_to_jiffies(vcmd.timeout_ms);
-
- q = admin ? ns->ctrl->admin_q : ns->queue;
-
- ret = nvme_nvm_submit_user_cmd(q, ns,
- (struct nvme_nvm_command *)&c,
- (void __user *)(uintptr_t)vcmd.addr, vcmd.data_len,
- (void __user *)(uintptr_t)vcmd.metadata,
- vcmd.metadata_len,
- (void __user *)(uintptr_t)vcmd.ppa_list, vcmd.nppas,
- &vcmd.result, &vcmd.status, timeout);
-
- if (ret && copy_to_user(uvcmd, &vcmd, sizeof(vcmd)))
- return -EFAULT;
-
- return ret;
-}
-
-int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp)
-{
- switch (cmd) {
- case NVME_NVM_IOCTL_ADMIN_VIO:
- return nvme_nvm_user_vcmd(ns, 1, argp);
- case NVME_NVM_IOCTL_IO_VIO:
- return nvme_nvm_user_vcmd(ns, 0, argp);
- case NVME_NVM_IOCTL_SUBMIT_VIO:
- return nvme_nvm_submit_vio(ns, argp);
- default:
- return -ENOTTY;
- }
-}
-
-int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
-{
- struct request_queue *q = ns->queue;
- struct nvm_dev *dev;
- struct nvm_geo *geo;
-
- _nvme_nvm_check_size();
-
- dev = nvm_alloc_dev(node);
- if (!dev)
- return -ENOMEM;
-
- /* Note that csecs and sos will be overridden if it is a 1.2 drive. */
- geo = &dev->geo;
- geo->csecs = 1 << ns->lba_shift;
- geo->sos = ns->ms;
- if (ns->features & NVME_NS_EXT_LBAS)
- geo->ext = true;
- else
- geo->ext = false;
- geo->mdts = ns->ctrl->max_hw_sectors;
-
- dev->q = q;
- memcpy(dev->name, disk_name, DISK_NAME_LEN);
- dev->ops = &nvme_nvm_dev_ops;
- dev->private_data = ns;
- ns->ndev = dev;
-
- return nvm_register(dev);
-}
-
-void nvme_nvm_unregister(struct nvme_ns *ns)
-{
- nvm_unregister(ns->ndev);
-}
-
-static ssize_t nvm_dev_attr_show(struct device *dev,
- struct device_attribute *dattr, char *page)
-{
- struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
- struct nvm_dev *ndev = ns->ndev;
- struct nvm_geo *geo = &ndev->geo;
- struct attribute *attr;
-
- if (!ndev)
- return 0;
-
- attr = &dattr->attr;
-
- if (strcmp(attr->name, "version") == 0) {
- if (geo->major_ver_id == 1)
- return scnprintf(page, PAGE_SIZE, "%u\n",
- geo->major_ver_id);
- else
- return scnprintf(page, PAGE_SIZE, "%u.%u\n",
- geo->major_ver_id,
- geo->minor_ver_id);
- } else if (strcmp(attr->name, "capabilities") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->cap);
- } else if (strcmp(attr->name, "read_typ") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdt);
- } else if (strcmp(attr->name, "read_max") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdm);
- } else {
- return scnprintf(page,
- PAGE_SIZE,
- "Unhandled attr(%s) in `%s`\n",
- attr->name, __func__);
- }
-}
-
-static ssize_t nvm_dev_attr_show_ppaf(struct nvm_addrf_12 *ppaf, char *page)
-{
- return scnprintf(page, PAGE_SIZE,
- "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
- ppaf->ch_offset, ppaf->ch_len,
- ppaf->lun_offset, ppaf->lun_len,
- ppaf->pln_offset, ppaf->pln_len,
- ppaf->blk_offset, ppaf->blk_len,
- ppaf->pg_offset, ppaf->pg_len,
- ppaf->sec_offset, ppaf->sec_len);
-}
-
-static ssize_t nvm_dev_attr_show_12(struct device *dev,
- struct device_attribute *dattr, char *page)
-{
- struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
- struct nvm_dev *ndev = ns->ndev;
- struct nvm_geo *geo = &ndev->geo;
- struct attribute *attr;
-
- if (!ndev)
- return 0;
-
- attr = &dattr->attr;
-
- if (strcmp(attr->name, "vendor_opcode") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->vmnt);
- } else if (strcmp(attr->name, "device_mode") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->dom);
- /* kept for compatibility */
- } else if (strcmp(attr->name, "media_manager") == 0) {
- return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm");
- } else if (strcmp(attr->name, "ppa_format") == 0) {
- return nvm_dev_attr_show_ppaf((void *)&geo->addrf, page);
- } else if (strcmp(attr->name, "media_type") == 0) { /* u8 */
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->mtype);
- } else if (strcmp(attr->name, "flash_media_type") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->fmtype);
- } else if (strcmp(attr->name, "num_channels") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch);
- } else if (strcmp(attr->name, "num_luns") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun);
- } else if (strcmp(attr->name, "num_planes") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pln);
- } else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk);
- } else if (strcmp(attr->name, "num_pages") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pg);
- } else if (strcmp(attr->name, "page_size") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->fpg_sz);
- } else if (strcmp(attr->name, "hw_sector_size") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->csecs);
- } else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->sos);
- } else if (strcmp(attr->name, "prog_typ") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt);
- } else if (strcmp(attr->name, "prog_max") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm);
- } else if (strcmp(attr->name, "erase_typ") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet);
- } else if (strcmp(attr->name, "erase_max") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem);
- } else if (strcmp(attr->name, "multiplane_modes") == 0) {
- return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mpos);
- } else if (strcmp(attr->name, "media_capabilities") == 0) {
- return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mccap);
- } else if (strcmp(attr->name, "max_phys_secs") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", NVM_MAX_VLBA);
- } else {
- return scnprintf(page, PAGE_SIZE,
- "Unhandled attr(%s) in `%s`\n",
- attr->name, __func__);
- }
-}
-
-static ssize_t nvm_dev_attr_show_20(struct device *dev,
- struct device_attribute *dattr, char *page)
-{
- struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
- struct nvm_dev *ndev = ns->ndev;
- struct nvm_geo *geo = &ndev->geo;
- struct attribute *attr;
-
- if (!ndev)
- return 0;
-
- attr = &dattr->attr;
-
- if (strcmp(attr->name, "groups") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch);
- } else if (strcmp(attr->name, "punits") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun);
- } else if (strcmp(attr->name, "chunks") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk);
- } else if (strcmp(attr->name, "clba") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->clba);
- } else if (strcmp(attr->name, "ws_min") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_min);
- } else if (strcmp(attr->name, "ws_opt") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_opt);
- } else if (strcmp(attr->name, "maxoc") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxoc);
- } else if (strcmp(attr->name, "maxocpu") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxocpu);
- } else if (strcmp(attr->name, "mw_cunits") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->mw_cunits);
- } else if (strcmp(attr->name, "write_typ") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt);
- } else if (strcmp(attr->name, "write_max") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm);
- } else if (strcmp(attr->name, "reset_typ") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet);
- } else if (strcmp(attr->name, "reset_max") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem);
- } else {
- return scnprintf(page, PAGE_SIZE,
- "Unhandled attr(%s) in `%s`\n",
- attr->name, __func__);
- }
-}
-
-#define NVM_DEV_ATTR_RO(_name) \
- DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL)
-#define NVM_DEV_ATTR_12_RO(_name) \
- DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_12, NULL)
-#define NVM_DEV_ATTR_20_RO(_name) \
- DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_20, NULL)
-
-/* general attributes */
-static NVM_DEV_ATTR_RO(version);
-static NVM_DEV_ATTR_RO(capabilities);
-
-static NVM_DEV_ATTR_RO(read_typ);
-static NVM_DEV_ATTR_RO(read_max);
-
-/* 1.2 values */
-static NVM_DEV_ATTR_12_RO(vendor_opcode);
-static NVM_DEV_ATTR_12_RO(device_mode);
-static NVM_DEV_ATTR_12_RO(ppa_format);
-static NVM_DEV_ATTR_12_RO(media_manager);
-static NVM_DEV_ATTR_12_RO(media_type);
-static NVM_DEV_ATTR_12_RO(flash_media_type);
-static NVM_DEV_ATTR_12_RO(num_channels);
-static NVM_DEV_ATTR_12_RO(num_luns);
-static NVM_DEV_ATTR_12_RO(num_planes);
-static NVM_DEV_ATTR_12_RO(num_blocks);
-static NVM_DEV_ATTR_12_RO(num_pages);
-static NVM_DEV_ATTR_12_RO(page_size);
-static NVM_DEV_ATTR_12_RO(hw_sector_size);
-static NVM_DEV_ATTR_12_RO(oob_sector_size);
-static NVM_DEV_ATTR_12_RO(prog_typ);
-static NVM_DEV_ATTR_12_RO(prog_max);
-static NVM_DEV_ATTR_12_RO(erase_typ);
-static NVM_DEV_ATTR_12_RO(erase_max);
-static NVM_DEV_ATTR_12_RO(multiplane_modes);
-static NVM_DEV_ATTR_12_RO(media_capabilities);
-static NVM_DEV_ATTR_12_RO(max_phys_secs);
-
-/* 2.0 values */
-static NVM_DEV_ATTR_20_RO(groups);
-static NVM_DEV_ATTR_20_RO(punits);
-static NVM_DEV_ATTR_20_RO(chunks);
-static NVM_DEV_ATTR_20_RO(clba);
-static NVM_DEV_ATTR_20_RO(ws_min);
-static NVM_DEV_ATTR_20_RO(ws_opt);
-static NVM_DEV_ATTR_20_RO(maxoc);
-static NVM_DEV_ATTR_20_RO(maxocpu);
-static NVM_DEV_ATTR_20_RO(mw_cunits);
-static NVM_DEV_ATTR_20_RO(write_typ);
-static NVM_DEV_ATTR_20_RO(write_max);
-static NVM_DEV_ATTR_20_RO(reset_typ);
-static NVM_DEV_ATTR_20_RO(reset_max);
-
-static struct attribute *nvm_dev_attrs[] = {
- /* version agnostic attrs */
- &dev_attr_version.attr,
- &dev_attr_capabilities.attr,
- &dev_attr_read_typ.attr,
- &dev_attr_read_max.attr,
-
- /* 1.2 attrs */
- &dev_attr_vendor_opcode.attr,
- &dev_attr_device_mode.attr,
- &dev_attr_media_manager.attr,
- &dev_attr_ppa_format.attr,
- &dev_attr_media_type.attr,
- &dev_attr_flash_media_type.attr,
- &dev_attr_num_channels.attr,
- &dev_attr_num_luns.attr,
- &dev_attr_num_planes.attr,
- &dev_attr_num_blocks.attr,
- &dev_attr_num_pages.attr,
- &dev_attr_page_size.attr,
- &dev_attr_hw_sector_size.attr,
- &dev_attr_oob_sector_size.attr,
- &dev_attr_prog_typ.attr,
- &dev_attr_prog_max.attr,
- &dev_attr_erase_typ.attr,
- &dev_attr_erase_max.attr,
- &dev_attr_multiplane_modes.attr,
- &dev_attr_media_capabilities.attr,
- &dev_attr_max_phys_secs.attr,
-
- /* 2.0 attrs */
- &dev_attr_groups.attr,
- &dev_attr_punits.attr,
- &dev_attr_chunks.attr,
- &dev_attr_clba.attr,
- &dev_attr_ws_min.attr,
- &dev_attr_ws_opt.attr,
- &dev_attr_maxoc.attr,
- &dev_attr_maxocpu.attr,
- &dev_attr_mw_cunits.attr,
-
- &dev_attr_write_typ.attr,
- &dev_attr_write_max.attr,
- &dev_attr_reset_typ.attr,
- &dev_attr_reset_max.attr,
-
- NULL,
-};
-
-static umode_t nvm_dev_attrs_visible(struct kobject *kobj,
- struct attribute *attr, int index)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct gendisk *disk = dev_to_disk(dev);
- struct nvme_ns *ns = disk->private_data;
- struct nvm_dev *ndev = ns->ndev;
- struct device_attribute *dev_attr =
- container_of(attr, typeof(*dev_attr), attr);
-
- if (!ndev)
- return 0;
-
- if (dev_attr->show == nvm_dev_attr_show)
- return attr->mode;
-
- switch (ndev->geo.major_ver_id) {
- case 1:
- if (dev_attr->show == nvm_dev_attr_show_12)
- return attr->mode;
- break;
- case 2:
- if (dev_attr->show == nvm_dev_attr_show_20)
- return attr->mode;
- break;
- }
-
- return 0;
-}
-
-const struct attribute_group nvme_nvm_attr_group = {
- .name = "lightnvm",
- .attrs = nvm_dev_attrs,
- .is_visible = nvm_dev_attrs_visible,
-};
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 3f32c5e86bfc..37ce3e8b1db2 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -765,7 +765,7 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
if (!head->disk)
return;
kblockd_schedule_work(&head->requeue_work);
- if (head->disk->flags & GENHD_FL_UP) {
+ if (test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
nvme_cdev_del(&head->cdev, &head->cdev_device);
del_gendisk(head->disk);
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 5cd1fa3b8464..a2e1f298b217 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -11,7 +11,6 @@
#include <linux/pci.h>
#include <linux/kref.h>
#include <linux/blk-mq.h>
-#include <linux/lightnvm.h>
#include <linux/sed-opal.h>
#include <linux/fault-inject.h>
#include <linux/rcupdate.h>
@@ -48,11 +47,6 @@ extern struct workqueue_struct *nvme_wq;
extern struct workqueue_struct *nvme_reset_wq;
extern struct workqueue_struct *nvme_delete_wq;
-enum {
- NVME_NS_LBA = 0,
- NVME_NS_LIGHTNVM = 1,
-};
-
/*
* List of workarounds for devices that required behavior not specified in
* the standard.
@@ -93,11 +87,6 @@ enum nvme_quirks {
NVME_QUIRK_NO_DEEPEST_PS = (1 << 5),
/*
- * Supports the LighNVM command set if indicated in vs[1].
- */
- NVME_QUIRK_LIGHTNVM = (1 << 6),
-
- /*
* Set MEDIUM priority on SQ creation
*/
NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7),
@@ -158,6 +147,7 @@ enum nvme_quirks {
struct nvme_request {
struct nvme_command *cmd;
union nvme_result result;
+ u8 genctr;
u8 retries;
u8 flags;
u16 status;
@@ -449,7 +439,6 @@ struct nvme_ns {
u32 ana_grpid;
#endif
struct list_head siblings;
- struct nvm_dev *ndev;
struct kref kref;
struct nvme_ns_head *head;
@@ -497,6 +486,49 @@ struct nvme_ctrl_ops {
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
};
+/*
+ * nvme command_id is constructed as such:
+ * | xxxx | xxxxxxxxxxxx |
+ * gen request tag
+ */
+#define nvme_genctr_mask(gen) (gen & 0xf)
+#define nvme_cid_install_genctr(gen) (nvme_genctr_mask(gen) << 12)
+#define nvme_genctr_from_cid(cid) ((cid & 0xf000) >> 12)
+#define nvme_tag_from_cid(cid) (cid & 0xfff)
+
+static inline u16 nvme_cid(struct request *rq)
+{
+ return nvme_cid_install_genctr(nvme_req(rq)->genctr) | rq->tag;
+}
+
+static inline struct request *nvme_find_rq(struct blk_mq_tags *tags,
+ u16 command_id)
+{
+ u8 genctr = nvme_genctr_from_cid(command_id);
+ u16 tag = nvme_tag_from_cid(command_id);
+ struct request *rq;
+
+ rq = blk_mq_tag_to_rq(tags, tag);
+ if (unlikely(!rq)) {
+ pr_err("could not locate request for tag %#x\n",
+ tag);
+ return NULL;
+ }
+ if (unlikely(nvme_genctr_mask(nvme_req(rq)->genctr) != genctr)) {
+ dev_err(nvme_req(rq)->ctrl->device,
+ "request %#x genctr mismatch (got %#x expected %#x)\n",
+ tag, genctr, nvme_genctr_mask(nvme_req(rq)->genctr));
+ return NULL;
+ }
+ return rq;
+}
+
+static inline struct request *nvme_cid_to_rq(struct blk_mq_tags *tags,
+ u16 command_id)
+{
+ return blk_mq_tag_to_rq(tags, nvme_tag_from_cid(command_id));
+}
+
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj,
const char *dev_name);
@@ -594,7 +626,8 @@ static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl)
static inline bool nvme_is_aen_req(u16 qid, __u16 command_id)
{
- return !qid && command_id >= NVME_AQ_BLK_MQ_DEPTH;
+ return !qid &&
+ nvme_tag_from_cid(command_id) >= NVME_AQ_BLK_MQ_DEPTH;
}
void nvme_complete_rq(struct request *req);
@@ -823,26 +856,6 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
}
#endif
-#ifdef CONFIG_NVM
-int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
-void nvme_nvm_unregister(struct nvme_ns *ns);
-extern const struct attribute_group nvme_nvm_attr_group;
-int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp);
-#else
-static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
- int node)
-{
- return 0;
-}
-
-static inline void nvme_nvm_unregister(struct nvme_ns *ns) {};
-static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
- void __user *argp)
-{
- return -ENOTTY;
-}
-#endif /* CONFIG_NVM */
-
static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
{
return dev_to_disk(dev)->private_data;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 51852085239e..b82492cd7503 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -60,6 +60,8 @@ MODULE_PARM_DESC(sgl_threshold,
"Use SGLs when average request segment size is larger or equal to "
"this size. Use 0 to disable SGLs.");
+#define NVME_PCI_MIN_QUEUE_SIZE 2
+#define NVME_PCI_MAX_QUEUE_SIZE 4095
static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
static const struct kernel_param_ops io_queue_depth_ops = {
.set = io_queue_depth_set,
@@ -68,7 +70,7 @@ static const struct kernel_param_ops io_queue_depth_ops = {
static unsigned int io_queue_depth = 1024;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
-MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
+MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2 and < 4096");
static int io_queue_count_set(const char *val, const struct kernel_param *kp)
{
@@ -135,6 +137,7 @@ struct nvme_dev {
u32 cmbloc;
struct nvme_ctrl ctrl;
u32 last_ps;
+ bool hmb;
mempool_t *iod_mempool;
@@ -153,18 +156,14 @@ struct nvme_dev {
unsigned int nr_allocated_queues;
unsigned int nr_write_queues;
unsigned int nr_poll_queues;
+
+ bool attrs_added;
};
static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
{
- int ret;
- u32 n;
-
- ret = kstrtou32(val, 10, &n);
- if (ret != 0 || n < 2)
- return -EINVAL;
-
- return param_set_uint(val, kp);
+ return param_set_uint_minmax(val, kp, NVME_PCI_MIN_QUEUE_SIZE,
+ NVME_PCI_MAX_QUEUE_SIZE);
}
static inline unsigned int sq_idx(unsigned int qid, u32 stride)
@@ -1014,7 +1013,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
return;
}
- req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), command_id);
+ req = nvme_find_rq(nvme_queue_tagset(nvmeq), command_id);
if (unlikely(!req)) {
dev_warn(nvmeq->dev->ctrl.device,
"invalid id %d completed on queue %d\n",
@@ -1808,17 +1807,6 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
return ret >= 0 ? 0 : ret;
}
-static ssize_t nvme_cmb_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
-
- return scnprintf(buf, PAGE_SIZE, "cmbloc : x%08x\ncmbsz : x%08x\n",
- ndev->cmbloc, ndev->cmbsz);
-}
-static DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL);
-
static u64 nvme_cmb_size_unit(struct nvme_dev *dev)
{
u8 szu = (dev->cmbsz >> NVME_CMBSZ_SZU_SHIFT) & NVME_CMBSZ_SZU_MASK;
@@ -1887,20 +1875,6 @@ static void nvme_map_cmb(struct nvme_dev *dev)
if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) ==
(NVME_CMBSZ_WDS | NVME_CMBSZ_RDS))
pci_p2pmem_publish(pdev, true);
-
- if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
- &dev_attr_cmb.attr, NULL))
- dev_warn(dev->ctrl.device,
- "failed to add sysfs attribute for CMB\n");
-}
-
-static inline void nvme_release_cmb(struct nvme_dev *dev)
-{
- if (dev->cmb_size) {
- sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
- &dev_attr_cmb.attr, NULL);
- dev->cmb_size = 0;
- }
}
static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
@@ -1923,7 +1897,9 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
dev_warn(dev->ctrl.device,
"failed to set host mem (err %d, flags %#x).\n",
ret, bits);
- }
+ } else
+ dev->hmb = bits & NVME_HOST_MEM_ENABLE;
+
return ret;
}
@@ -2080,6 +2056,102 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
return ret;
}
+static ssize_t cmb_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "cmbloc : x%08x\ncmbsz : x%08x\n",
+ ndev->cmbloc, ndev->cmbsz);
+}
+static DEVICE_ATTR_RO(cmb);
+
+static ssize_t cmbloc_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "%u\n", ndev->cmbloc);
+}
+static DEVICE_ATTR_RO(cmbloc);
+
+static ssize_t cmbsz_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "%u\n", ndev->cmbsz);
+}
+static DEVICE_ATTR_RO(cmbsz);
+
+static ssize_t hmb_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "%d\n", ndev->hmb);
+}
+
+static ssize_t hmb_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
+ bool new;
+ int ret;
+
+ if (strtobool(buf, &new) < 0)
+ return -EINVAL;
+
+ if (new == ndev->hmb)
+ return count;
+
+ if (new) {
+ ret = nvme_setup_host_mem(ndev);
+ } else {
+ ret = nvme_set_host_mem(ndev, 0);
+ if (!ret)
+ nvme_free_host_mem(ndev);
+ }
+
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+static DEVICE_ATTR_RW(hmb);
+
+static umode_t nvme_pci_attrs_are_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct nvme_ctrl *ctrl =
+ dev_get_drvdata(container_of(kobj, struct device, kobj));
+ struct nvme_dev *dev = to_nvme_dev(ctrl);
+
+ if (a == &dev_attr_cmb.attr ||
+ a == &dev_attr_cmbloc.attr ||
+ a == &dev_attr_cmbsz.attr) {
+ if (!dev->cmbsz)
+ return 0;
+ }
+ if (a == &dev_attr_hmb.attr && !ctrl->hmpre)
+ return 0;
+
+ return a->mode;
+}
+
+static struct attribute *nvme_pci_attrs[] = {
+ &dev_attr_cmb.attr,
+ &dev_attr_cmbloc.attr,
+ &dev_attr_cmbsz.attr,
+ &dev_attr_hmb.attr,
+ NULL,
+};
+
+static const struct attribute_group nvme_pci_attr_group = {
+ .attrs = nvme_pci_attrs,
+ .is_visible = nvme_pci_attrs_are_visible,
+};
+
/*
* nirqs is the number of interrupts available for write and read
* queues. The core already reserved an interrupt for the admin queue.
@@ -2751,6 +2823,10 @@ static void nvme_reset_work(struct work_struct *work)
goto out;
}
+ if (!dev->attrs_added && !sysfs_create_group(&dev->ctrl.device->kobj,
+ &nvme_pci_attr_group))
+ dev->attrs_added = true;
+
nvme_start_ctrl(&dev->ctrl);
return;
@@ -2999,6 +3075,13 @@ static void nvme_shutdown(struct pci_dev *pdev)
nvme_disable_prepare_reset(dev, true);
}
+static void nvme_remove_attrs(struct nvme_dev *dev)
+{
+ if (dev->attrs_added)
+ sysfs_remove_group(&dev->ctrl.device->kobj,
+ &nvme_pci_attr_group);
+}
+
/*
* The driver's remove may be called on a device in a partially initialized
* state. This function must not have any dependencies on the device state in
@@ -3020,7 +3103,7 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_stop_ctrl(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl);
nvme_dev_disable(dev, true);
- nvme_release_cmb(dev);
+ nvme_remove_attrs(dev);
nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
@@ -3047,8 +3130,13 @@ static int nvme_resume(struct device *dev)
if (ndev->last_ps == U32_MAX ||
nvme_set_power_state(ctrl, ndev->last_ps) != 0)
- return nvme_try_sched_reset(&ndev->ctrl);
+ goto reset;
+ if (ctrl->hmpre && nvme_setup_host_mem(ndev))
+ goto reset;
+
return 0;
+reset:
+ return nvme_try_sched_reset(ctrl);
}
static int nvme_suspend(struct device *dev)
@@ -3072,15 +3160,9 @@ static int nvme_suspend(struct device *dev)
* the PCI bus layer to put it into D3 in order to take the PCIe link
* down, so as to allow the platform to achieve its minimum low-power
* state (which may not be possible if the link is up).
- *
- * If a host memory buffer is enabled, shut down the device as the NVMe
- * specification allows the device to access the host memory buffer in
- * host DRAM from all power states, but hosts will fail access to DRAM
- * during S3.
*/
if (pm_suspend_via_firmware() || !ctrl->npss ||
!pcie_aspm_enabled(pdev) ||
- ndev->nr_host_mem_descs ||
(ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
return nvme_disable_prepare_reset(ndev, true);
@@ -3091,6 +3173,17 @@ static int nvme_suspend(struct device *dev)
if (ctrl->state != NVME_CTRL_LIVE)
goto unfreeze;
+ /*
+ * Host memory access may not be successful in a system suspend state,
+ * but the specification allows the controller to access memory in a
+ * non-operational power state.
+ */
+ if (ndev->hmb) {
+ ret = nvme_set_host_mem(ndev, 0);
+ if (ret < 0)
+ goto unfreeze;
+ }
+
ret = nvme_get_power_state(ctrl, &ndev->last_ps);
if (ret < 0)
goto unfreeze;
@@ -3243,12 +3336,6 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */
.driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
- { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */
- .driver_data = NVME_QUIRK_LIGHTNVM, },
- { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */
- .driver_data = NVME_QUIRK_LIGHTNVM, },
- { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */
- .driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 7f6b3a991501..a68704e39084 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -735,13 +735,13 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
if (ret)
return ret;
- ctrl->ctrl.queue_count = nr_io_queues + 1;
- if (ctrl->ctrl.queue_count < 2) {
+ if (nr_io_queues == 0) {
dev_err(ctrl->ctrl.device,
"unable to set any I/O queues\n");
return -ENOMEM;
}
+ ctrl->ctrl.queue_count = nr_io_queues + 1;
dev_info(ctrl->ctrl.device,
"creating %d I/O queues.\n", nr_io_queues);
@@ -1730,10 +1730,10 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
struct request *rq;
struct nvme_rdma_request *req;
- rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id);
+ rq = nvme_find_rq(nvme_rdma_tagset(queue), cqe->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
- "tag 0x%x on QP %#x not found\n",
+ "got bad command_id %#x on QP %#x\n",
cqe->command_id, queue->qp->qp_num);
nvme_rdma_error_recovery(queue->ctrl);
return;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 8cb15ee5b249..645025620154 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -487,11 +487,11 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
{
struct request *rq;
- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), cqe->command_id);
+ rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
- "queue %d tag 0x%x not found\n",
- nvme_tcp_queue_id(queue), cqe->command_id);
+ "got bad cqe.command_id %#x on queue %d\n",
+ cqe->command_id, nvme_tcp_queue_id(queue));
nvme_tcp_error_recovery(&queue->ctrl->ctrl);
return -EINVAL;
}
@@ -508,11 +508,11 @@ static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue,
{
struct request *rq;
- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
+ rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
- "queue %d tag %#x not found\n",
- nvme_tcp_queue_id(queue), pdu->command_id);
+ "got bad c2hdata.command_id %#x on queue %d\n",
+ pdu->command_id, nvme_tcp_queue_id(queue));
return -ENOENT;
}
@@ -606,7 +606,7 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
data->hdr.plen =
cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
data->ttag = pdu->ttag;
- data->command_id = rq->tag;
+ data->command_id = nvme_cid(rq);
data->data_offset = cpu_to_le32(req->data_sent);
data->data_length = cpu_to_le32(req->pdu_len);
return 0;
@@ -619,11 +619,11 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
struct request *rq;
int ret;
- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
+ rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
- "queue %d tag %#x not found\n",
- nvme_tcp_queue_id(queue), pdu->command_id);
+ "got bad r2t.command_id %#x on queue %d\n",
+ pdu->command_id, nvme_tcp_queue_id(queue));
return -ENOENT;
}
req = blk_mq_rq_to_pdu(rq);
@@ -702,17 +702,9 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
unsigned int *offset, size_t *len)
{
struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu;
- struct nvme_tcp_request *req;
- struct request *rq;
-
- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
- if (!rq) {
- dev_err(queue->ctrl->ctrl.device,
- "queue %d tag %#x not found\n",
- nvme_tcp_queue_id(queue), pdu->command_id);
- return -ENOENT;
- }
- req = blk_mq_rq_to_pdu(rq);
+ struct request *rq =
+ nvme_cid_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
while (true) {
int recv_len, ret;
@@ -804,8 +796,8 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
}
if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
- struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
- pdu->command_id);
+ struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
+ pdu->command_id);
nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
queue->nr_cqe++;
@@ -1228,6 +1220,7 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
sock_release(queue->sock);
kfree(queue->pdu);
+ mutex_destroy(&queue->send_mutex);
mutex_destroy(&queue->queue_lock);
}
@@ -1533,6 +1526,7 @@ err_sock:
sock_release(queue->sock);
queue->sock = NULL;
err_destroy_mutex:
+ mutex_destroy(&queue->send_mutex);
mutex_destroy(&queue->queue_lock);
return ret;
}
@@ -1769,13 +1763,13 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
if (ret)
return ret;
- ctrl->queue_count = nr_io_queues + 1;
- if (ctrl->queue_count < 2) {
+ if (nr_io_queues == 0) {
dev_err(ctrl->device,
"unable to set any I/O queues\n");
return -ENOMEM;
}
+ ctrl->queue_count = nr_io_queues + 1;
dev_info(ctrl->device,
"creating %d I/O queues.\n", nr_io_queues);
diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c
index 6543015b6121..2a89c5aa0790 100644
--- a/drivers/nvme/host/trace.c
+++ b/drivers/nvme/host/trace.c
@@ -72,6 +72,20 @@ static const char *nvme_trace_admin_identify(struct trace_seq *p, u8 *cdw10)
return ret;
}
+static const char *nvme_trace_admin_set_features(struct trace_seq *p,
+ u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 fid = cdw10[0];
+ u8 sv = cdw10[3] & 0x8;
+ u32 cdw11 = get_unaligned_le32(cdw10 + 4);
+
+ trace_seq_printf(p, "fid=0x%x, sv=0x%x, cdw11=0x%x", fid, sv, cdw11);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
static const char *nvme_trace_admin_get_features(struct trace_seq *p,
u8 *cdw10)
{
@@ -80,7 +94,7 @@ static const char *nvme_trace_admin_get_features(struct trace_seq *p,
u8 sel = cdw10[1] & 0x7;
u32 cdw11 = get_unaligned_le32(cdw10 + 4);
- trace_seq_printf(p, "fid=0x%x sel=0x%x cdw11=0x%x", fid, sel, cdw11);
+ trace_seq_printf(p, "fid=0x%x, sel=0x%x, cdw11=0x%x", fid, sel, cdw11);
trace_seq_putc(p, 0);
return ret;
@@ -201,6 +215,8 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p,
return nvme_trace_create_cq(p, cdw10);
case nvme_admin_identify:
return nvme_trace_admin_identify(p, cdw10);
+ case nvme_admin_set_features:
+ return nvme_trace_admin_set_features(p, cdw10);
case nvme_admin_get_features:
return nvme_trace_admin_get_features(p, cdw10);
case nvme_admin_get_lba_status:
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index 4be2ececbc45..973561c93888 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -31,7 +31,6 @@ config NVME_TARGET_PASSTHRU
config NVME_TARGET_LOOP
tristate "NVMe loopback device support"
depends on NVME_TARGET
- select NVME_CORE
select NVME_FABRICS
select SG_POOL
help
@@ -65,7 +64,6 @@ config NVME_TARGET_FC
config NVME_TARGET_FCLOOP
tristate "NVMe over Fabrics FC Transport Loopback Test driver"
depends on NVME_TARGET
- select NVME_CORE
select NVME_FABRICS
select SG_POOL
depends on NVME_FC
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index ac7210a3ea1c..66d05eecc2a9 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -802,6 +802,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
* controller teardown as a result of a keep-alive expiration.
*/
ctrl->reset_tbkas = true;
+ sq->ctrl->sqs[sq->qid] = NULL;
nvmet_ctrl_put(ctrl);
sq->ctrl = NULL; /* allows reusing the queue later */
}
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 7d0f3523fdab..7d0454cee920 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -109,21 +109,38 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
u16 qid = le16_to_cpu(c->qid);
u16 sqsize = le16_to_cpu(c->sqsize);
struct nvmet_ctrl *old;
+ u16 mqes = NVME_CAP_MQES(ctrl->cap);
u16 ret;
- old = cmpxchg(&req->sq->ctrl, NULL, ctrl);
- if (old) {
- pr_warn("queue already connected!\n");
- req->error_loc = offsetof(struct nvmf_connect_command, opcode);
- return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
- }
if (!sqsize) {
pr_warn("queue size zero!\n");
req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
+ req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(sqsize);
ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
goto err;
}
+ if (ctrl->sqs[qid] != NULL) {
+ pr_warn("qid %u has already been created\n", qid);
+ req->error_loc = offsetof(struct nvmf_connect_command, qid);
+ return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+ }
+
+ if (sqsize > mqes) {
+ pr_warn("sqsize %u is larger than MQES supported %u cntlid %d\n",
+ sqsize, mqes, ctrl->cntlid);
+ req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
+ req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(sqsize);
+ return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ }
+
+ old = cmpxchg(&req->sq->ctrl, NULL, ctrl);
+ if (old) {
+ pr_warn("queue already connected!\n");
+ req->error_loc = offsetof(struct nvmf_connect_command, opcode);
+ return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
+ }
+
/* note: convert queue size from 0's-based value to 1's-based value */
nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1);
nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1);
@@ -138,6 +155,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
if (ret) {
pr_err("failed to install queue %d cntlid %d ret %x\n",
qid, ctrl->cntlid, ret);
+ ctrl->sqs[qid] = NULL;
goto err;
}
}
@@ -260,11 +278,11 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
}
status = nvmet_install_queue(ctrl, req);
- if (status) {
- /* pass back cntlid that had the issue of installing queue */
- req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
+ if (status)
goto out_ctrl_put;
- }
+
+ /* pass back cntlid for successful completion */
+ req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 3a17a7e26bbf..0285ccc7541f 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -107,10 +107,10 @@ static void nvme_loop_queue_response(struct nvmet_req *req)
} else {
struct request *rq;
- rq = blk_mq_tag_to_rq(nvme_loop_tagset(queue), cqe->command_id);
+ rq = nvme_find_rq(nvme_loop_tagset(queue), cqe->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
- "tag 0x%x on queue %d not found\n",
+ "got bad command_id %#x on queue %d\n",
cqe->command_id, nvme_loop_queue_idx(queue));
return;
}
diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c
index 1373a3c67962..bff454d46255 100644
--- a/drivers/nvme/target/trace.c
+++ b/drivers/nvme/target/trace.c
@@ -27,7 +27,7 @@ static const char *nvmet_trace_admin_get_features(struct trace_seq *p,
u8 sel = cdw10[1] & 0x7;
u32 cdw11 = get_unaligned_le32(cdw10 + 4);
- trace_seq_printf(p, "fid=0x%x sel=0x%x cdw11=0x%x", fid, sel, cdw11);
+ trace_seq_printf(p, "fid=0x%x, sel=0x%x, cdw11=0x%x", fid, sel, cdw11);
trace_seq_putc(p, 0);
return ret;
@@ -49,6 +49,20 @@ static const char *nvmet_trace_get_lba_status(struct trace_seq *p,
return ret;
}
+static const char *nvmet_trace_admin_set_features(struct trace_seq *p,
+ u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 fid = cdw10[0];
+ u8 sv = cdw10[3] & 0x8;
+ u32 cdw11 = get_unaligned_le32(cdw10 + 4);
+
+ trace_seq_printf(p, "fid=0x%x, sv=0x%x, cdw11=0x%x", fid, sv, cdw11);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
static const char *nvmet_trace_read_write(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
@@ -94,6 +108,8 @@ const char *nvmet_trace_parse_admin_cmd(struct trace_seq *p,
switch (opcode) {
case nvme_admin_identify:
return nvmet_trace_admin_identify(p, cdw10);
+ case nvme_admin_set_features:
+ return nvmet_trace_admin_set_features(p, cdw10);
case nvme_admin_get_features:
return nvmet_trace_admin_get_features(p, cdw10);
case nvme_admin_get_lba_status:
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index 17f8b7a45f21..46bc30fe85d2 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -115,14 +115,11 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
}
status = nvmet_req_find_ns(req);
- if (status) {
- status = NVME_SC_INTERNAL;
+ if (status)
goto done;
- }
if (!bdev_is_zoned(req->ns->bdev)) {
req->error_loc = offsetof(struct nvme_identify, nsid);
- status = NVME_SC_INVALID_NS | NVME_SC_DNR;
goto done;
}
diff --git a/drivers/nvmem/Kconfig b/drivers/nvmem/Kconfig
index dd2019006838..39854d43758b 100644
--- a/drivers/nvmem/Kconfig
+++ b/drivers/nvmem/Kconfig
@@ -107,6 +107,17 @@ config MTK_EFUSE
This driver can also be built as a module. If so, the module
will be called efuse-mtk.
+config NVMEM_NINTENDO_OTP
+ tristate "Nintendo Wii and Wii U OTP Support"
+ help
+ This is a driver exposing the OTP of a Nintendo Wii or Wii U console.
+
+ This memory contains common and per-console keys, signatures and
+ related data required to access peripherals.
+
+ This driver can also be built as a module. If so, the module
+ will be called nvmem-nintendo-otp.
+
config QCOM_QFPROM
tristate "QCOM QFPROM Support"
depends on ARCH_QCOM || COMPILE_TEST
diff --git a/drivers/nvmem/Makefile b/drivers/nvmem/Makefile
index bbea1410240a..dcbbde35b6a8 100644
--- a/drivers/nvmem/Makefile
+++ b/drivers/nvmem/Makefile
@@ -23,6 +23,8 @@ obj-$(CONFIG_NVMEM_LPC18XX_OTP) += nvmem_lpc18xx_otp.o
nvmem_lpc18xx_otp-y := lpc18xx_otp.o
obj-$(CONFIG_NVMEM_MXS_OCOTP) += nvmem-mxs-ocotp.o
nvmem-mxs-ocotp-y := mxs-ocotp.o
+obj-$(CONFIG_NVMEM_NINTENDO_OTP) += nvmem-nintendo-otp.o
+nvmem-nintendo-otp-y := nintendo-otp.o
obj-$(CONFIG_MTK_EFUSE) += nvmem_mtk-efuse.o
nvmem_mtk-efuse-y := mtk-efuse.o
obj-$(CONFIG_QCOM_QFPROM) += nvmem_qfprom.o
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index b3bc30a04ed7..3d87fadaa160 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -824,8 +824,11 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
if (nvmem->nkeepout) {
rval = nvmem_validate_keepouts(nvmem);
- if (rval)
- goto err_put_device;
+ if (rval) {
+ ida_free(&nvmem_ida, nvmem->id);
+ kfree(nvmem);
+ return ERR_PTR(rval);
+ }
}
dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
diff --git a/drivers/nvmem/nintendo-otp.c b/drivers/nvmem/nintendo-otp.c
new file mode 100644
index 000000000000..33961b17f9f1
--- /dev/null
+++ b/drivers/nvmem/nintendo-otp.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Nintendo Wii and Wii U OTP driver
+ *
+ * This is a driver exposing the OTP of a Nintendo Wii or Wii U console.
+ *
+ * This memory contains common and per-console keys, signatures and
+ * related data required to access peripherals.
+ *
+ * Based on reversed documentation from https://wiiubrew.org/wiki/Hardware/OTP
+ *
+ * Copyright (C) 2021 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
+ */
+
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/nvmem-provider.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#define HW_OTPCMD 0
+#define HW_OTPDATA 4
+#define OTP_READ 0x80000000
+#define BANK_SIZE 128
+#define WORD_SIZE 4
+
+struct nintendo_otp_priv {
+ void __iomem *regs;
+};
+
+struct nintendo_otp_devtype_data {
+ const char *name;
+ unsigned int num_banks;
+};
+
+static const struct nintendo_otp_devtype_data hollywood_otp_data = {
+ .name = "wii-otp",
+ .num_banks = 1,
+};
+
+static const struct nintendo_otp_devtype_data latte_otp_data = {
+ .name = "wiiu-otp",
+ .num_banks = 8,
+};
+
+static int nintendo_otp_reg_read(void *context,
+ unsigned int reg, void *_val, size_t bytes)
+{
+ struct nintendo_otp_priv *priv = context;
+ u32 *val = _val;
+ int words = bytes / WORD_SIZE;
+ u32 bank, addr;
+
+ while (words--) {
+ bank = (reg / BANK_SIZE) << 8;
+ addr = (reg / WORD_SIZE) % (BANK_SIZE / WORD_SIZE);
+ iowrite32be(OTP_READ | bank | addr, priv->regs + HW_OTPCMD);
+ *val++ = ioread32be(priv->regs + HW_OTPDATA);
+ reg += WORD_SIZE;
+ }
+
+ return 0;
+}
+
+static const struct of_device_id nintendo_otp_of_table[] = {
+ { .compatible = "nintendo,hollywood-otp", .data = &hollywood_otp_data },
+ { .compatible = "nintendo,latte-otp", .data = &latte_otp_data },
+ {/* sentinel */},
+};
+MODULE_DEVICE_TABLE(of, nintendo_otp_of_table);
+
+static int nintendo_otp_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ const struct of_device_id *of_id =
+ of_match_device(nintendo_otp_of_table, dev);
+ struct resource *res;
+ struct nvmem_device *nvmem;
+ struct nintendo_otp_priv *priv;
+
+ struct nvmem_config config = {
+ .stride = WORD_SIZE,
+ .word_size = WORD_SIZE,
+ .reg_read = nintendo_otp_reg_read,
+ .read_only = true,
+ .root_only = true,
+ };
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ priv->regs = devm_ioremap_resource(dev, res);
+ if (IS_ERR(priv->regs))
+ return PTR_ERR(priv->regs);
+
+ if (of_id->data) {
+ const struct nintendo_otp_devtype_data *data = of_id->data;
+ config.name = data->name;
+ config.size = data->num_banks * BANK_SIZE;
+ }
+
+ config.dev = dev;
+ config.priv = priv;
+
+ nvmem = devm_nvmem_register(dev, &config);
+
+ return PTR_ERR_OR_ZERO(nvmem);
+}
+
+static struct platform_driver nintendo_otp_driver = {
+ .probe = nintendo_otp_probe,
+ .driver = {
+ .name = "nintendo-otp",
+ .of_match_table = nintendo_otp_of_table,
+ },
+};
+module_platform_driver(nintendo_otp_driver);
+MODULE_AUTHOR("Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>");
+MODULE_DESCRIPTION("Nintendo Wii and Wii U OTP driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvmem/qfprom.c b/drivers/nvmem/qfprom.c
index 81fbad5e939d..c500d6235bf6 100644
--- a/drivers/nvmem/qfprom.c
+++ b/drivers/nvmem/qfprom.c
@@ -12,6 +12,8 @@
#include <linux/mod_devicetable.h>
#include <linux/nvmem-provider.h>
#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
#include <linux/property.h>
#include <linux/regulator/consumer.h>
@@ -139,6 +141,12 @@ static void qfprom_disable_fuse_blowing(const struct qfprom_priv *priv,
{
int ret;
+ writel(old->timer_val, priv->qfpconf + QFPROM_BLOW_TIMER_OFFSET);
+ writel(old->accel_val, priv->qfpconf + QFPROM_ACCEL_OFFSET);
+
+ dev_pm_genpd_set_performance_state(priv->dev, 0);
+ pm_runtime_put(priv->dev);
+
/*
* This may be a shared rail and may be able to run at a lower rate
* when we're not blowing fuses. At the moment, the regulator framework
@@ -159,9 +167,6 @@ static void qfprom_disable_fuse_blowing(const struct qfprom_priv *priv,
"Failed to set clock rate for disable (ignoring)\n");
clk_disable_unprepare(priv->secclk);
-
- writel(old->timer_val, priv->qfpconf + QFPROM_BLOW_TIMER_OFFSET);
- writel(old->accel_val, priv->qfpconf + QFPROM_ACCEL_OFFSET);
}
/**
@@ -212,6 +217,14 @@ static int qfprom_enable_fuse_blowing(const struct qfprom_priv *priv,
goto err_clk_rate_set;
}
+ ret = pm_runtime_get_sync(priv->dev);
+ if (ret < 0) {
+ pm_runtime_put_noidle(priv->dev);
+ dev_err(priv->dev, "Failed to enable power-domain\n");
+ goto err_reg_enable;
+ }
+ dev_pm_genpd_set_performance_state(priv->dev, INT_MAX);
+
old->timer_val = readl(priv->qfpconf + QFPROM_BLOW_TIMER_OFFSET);
old->accel_val = readl(priv->qfpconf + QFPROM_ACCEL_OFFSET);
writel(priv->soc_data->qfprom_blow_timer_value,
@@ -221,6 +234,8 @@ static int qfprom_enable_fuse_blowing(const struct qfprom_priv *priv,
return 0;
+err_reg_enable:
+ regulator_disable(priv->vcc);
err_clk_rate_set:
clk_set_rate(priv->secclk, old->clk_rate);
err_clk_prepared:
@@ -320,6 +335,11 @@ static int qfprom_reg_read(void *context,
return 0;
}
+static void qfprom_runtime_disable(void *data)
+{
+ pm_runtime_disable(data);
+}
+
static const struct qfprom_soc_data qfprom_7_8_data = {
.accel_value = 0xD10,
.qfprom_blow_timer_value = 25,
@@ -420,6 +440,11 @@ static int qfprom_probe(struct platform_device *pdev)
econfig.reg_write = qfprom_reg_write;
}
+ pm_runtime_enable(dev);
+ ret = devm_add_action_or_reset(dev, qfprom_runtime_disable, dev);
+ if (ret)
+ return ret;
+
nvmem = devm_nvmem_register(dev, &econfig);
return PTR_ERR_OR_ZERO(nvmem);
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index b335c077f215..04b4691a8aac 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -893,6 +893,10 @@ static int _set_required_opps(struct device *dev,
if (!required_opp_tables)
return 0;
+ /* required-opps not fully initialized yet */
+ if (lazy_linking_pending(opp_table))
+ return -EBUSY;
+
/*
* We only support genpd's OPPs in the "required-opps" for now, as we
* don't know much about other use cases. Error out if the required OPP
@@ -903,10 +907,6 @@ static int _set_required_opps(struct device *dev,
return -ENOENT;
}
- /* required-opps not fully initialized yet */
- if (lazy_linking_pending(opp_table))
- return -EBUSY;
-
/* Single genpd case */
if (!genpd_virt_devs)
return _set_required_opp(dev, dev, opp, 0);
@@ -1856,9 +1856,6 @@ void dev_pm_opp_put_supported_hw(struct opp_table *opp_table)
if (unlikely(!opp_table))
return;
- /* Make sure there are no concurrent readers while updating opp_table */
- WARN_ON(!list_empty(&opp_table->opp_list));
-
kfree(opp_table->supported_hw);
opp_table->supported_hw = NULL;
opp_table->supported_hw_count = 0;
@@ -1944,9 +1941,6 @@ void dev_pm_opp_put_prop_name(struct opp_table *opp_table)
if (unlikely(!opp_table))
return;
- /* Make sure there are no concurrent readers while updating opp_table */
- WARN_ON(!list_empty(&opp_table->opp_list));
-
kfree(opp_table->prop_name);
opp_table->prop_name = NULL;
@@ -2056,9 +2050,6 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table)
if (!opp_table->regulators)
goto put_opp_table;
- /* Make sure there are no concurrent readers while updating opp_table */
- WARN_ON(!list_empty(&opp_table->opp_list));
-
if (opp_table->enabled) {
for (i = opp_table->regulator_count - 1; i >= 0; i--)
regulator_disable(opp_table->regulators[i]);
@@ -2178,9 +2169,6 @@ void dev_pm_opp_put_clkname(struct opp_table *opp_table)
if (unlikely(!opp_table))
return;
- /* Make sure there are no concurrent readers while updating opp_table */
- WARN_ON(!list_empty(&opp_table->opp_list));
-
clk_put(opp_table->clk);
opp_table->clk = ERR_PTR(-EINVAL);
@@ -2279,9 +2267,6 @@ void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table)
if (unlikely(!opp_table))
return;
- /* Make sure there are no concurrent readers while updating opp_table */
- WARN_ON(!list_empty(&opp_table->opp_list));
-
opp_table->set_opp = NULL;
mutex_lock(&opp_table->lock);
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index d298e38aaf7e..2a97c6535c4c 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -95,15 +95,7 @@ static struct dev_pm_opp *_find_opp_of_np(struct opp_table *opp_table,
static struct device_node *of_parse_required_opp(struct device_node *np,
int index)
{
- struct device_node *required_np;
-
- required_np = of_parse_phandle(np, "required-opps", index);
- if (unlikely(!required_np)) {
- pr_err("%s: Unable to parse required-opps: %pOF, index: %d\n",
- __func__, np, index);
- }
-
- return required_np;
+ return of_parse_phandle(np, "required-opps", index);
}
/* The caller must call dev_pm_opp_put_opp_table() after the table is used */
@@ -964,8 +956,9 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table)
}
}
- /* There should be one of more OPP defined */
- if (WARN_ON(!count)) {
+ /* There should be one or more OPPs defined */
+ if (!count) {
+ dev_err(dev, "%s: no supported OPPs", __func__);
ret = -ENOENT;
goto remove_static_opp;
}
@@ -1327,7 +1320,7 @@ int of_get_required_opp_performance_state(struct device_node *np, int index)
required_np = of_parse_required_opp(np, index);
if (!required_np)
- return -EINVAL;
+ return -ENODEV;
opp_table = _find_table_of_opp_np(required_np);
if (IS_ERR(opp_table)) {
diff --git a/drivers/parport/ieee1284_ops.c b/drivers/parport/ieee1284_ops.c
index 2c11bd3fe1fd..17061f1df0f4 100644
--- a/drivers/parport/ieee1284_ops.c
+++ b/drivers/parport/ieee1284_ops.c
@@ -518,7 +518,7 @@ size_t parport_ieee1284_ecp_read_data (struct parport *port,
goto out;
/* Yield the port for a while. */
- if (count && dev->port->irq != PARPORT_IRQ_NONE) {
+ if (dev->port->irq != PARPORT_IRQ_NONE) {
parport_release (dev);
schedule_timeout_interruptible(msecs_to_jiffies(40));
parport_claim_or_block (dev);
diff --git a/drivers/parport/parport_serial.c b/drivers/parport/parport_serial.c
index 96b888bb49c6..9f5d784cd95d 100644
--- a/drivers/parport/parport_serial.c
+++ b/drivers/parport/parport_serial.c
@@ -606,12 +606,15 @@ static int parport_register(struct pci_dev *dev, const struct pci_device_id *id)
"hi" as an offset (see SYBA
def.) */
/* TODO: test if sharing interrupts works */
- irq = dev->irq;
- if (irq == IRQ_NONE) {
+ irq = pci_irq_vector(dev, 0);
+ if (irq < 0)
+ return irq;
+ if (irq == 0)
+ irq = PARPORT_IRQ_NONE;
+ if (irq == PARPORT_IRQ_NONE) {
dev_dbg(&dev->dev,
"PCI parallel port detected: I/O at %#lx(%#lx)\n",
io_lo, io_hi);
- irq = PARPORT_IRQ_NONE;
} else {
dev_dbg(&dev->dev,
"PCI parallel port detected: I/O at %#lx(%#lx), IRQ %d\n",
diff --git a/drivers/pci/controller/pci-ixp4xx.c b/drivers/pci/controller/pci-ixp4xx.c
index 896a45b24236..654ac4a82beb 100644
--- a/drivers/pci/controller/pci-ixp4xx.c
+++ b/drivers/pci/controller/pci-ixp4xx.c
@@ -145,7 +145,7 @@ static int ixp4xx_pci_check_master_abort(struct ixp4xx_pci *p)
return 0;
}
-static int ixp4xx_pci_read(struct ixp4xx_pci *p, u32 addr, u32 cmd, u32 *data)
+static int ixp4xx_pci_read_indirect(struct ixp4xx_pci *p, u32 addr, u32 cmd, u32 *data)
{
ixp4xx_writel(p, IXP4XX_PCI_NP_AD, addr);
@@ -170,7 +170,7 @@ static int ixp4xx_pci_read(struct ixp4xx_pci *p, u32 addr, u32 cmd, u32 *data)
return ixp4xx_pci_check_master_abort(p);
}
-static int ixp4xx_pci_write(struct ixp4xx_pci *p, u32 addr, u32 cmd, u32 data)
+static int ixp4xx_pci_write_indirect(struct ixp4xx_pci *p, u32 addr, u32 cmd, u32 data)
{
ixp4xx_writel(p, IXP4XX_PCI_NP_AD, addr);
@@ -308,7 +308,7 @@ static int ixp4xx_pci_read_config(struct pci_bus *bus, unsigned int devfn,
dev_dbg(p->dev, "read_config from %d size %d dev %d:%d:%d address: %08x cmd: %08x\n",
where, size, bus_num, PCI_SLOT(devfn), PCI_FUNC(devfn), addr, cmd);
- ret = ixp4xx_pci_read(p, addr, cmd, &val);
+ ret = ixp4xx_pci_read_indirect(p, addr, cmd, &val);
if (ret)
return PCIBIOS_DEVICE_NOT_FOUND;
@@ -356,7 +356,7 @@ static int ixp4xx_pci_write_config(struct pci_bus *bus, unsigned int devfn,
dev_dbg(p->dev, "write_config_byte %#x to %d size %d dev %d:%d:%d addr: %08x cmd %08x\n",
value, where, size, bus_num, PCI_SLOT(devfn), PCI_FUNC(devfn), addr, cmd);
- ret = ixp4xx_pci_write(p, addr, cmd, val);
+ ret = ixp4xx_pci_write_indirect(p, addr, cmd, val);
if (ret)
return PCIBIOS_DEVICE_NOT_FOUND;
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 9232255c8515..0099a00af361 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -129,93 +129,95 @@ void __weak arch_restore_msi_irqs(struct pci_dev *dev)
return default_restore_msi_irqs(dev);
}
-static inline __attribute_const__ u32 msi_mask(unsigned x)
-{
- /* Don't shift by >= width of type */
- if (x >= 5)
- return 0xffffffff;
- return (1 << (1 << x)) - 1;
-}
-
/*
* PCI 2.3 does not specify mask bits for each MSI interrupt. Attempting to
* mask all MSI interrupts by clearing the MSI enable bit does not work
* reliably as devices without an INTx disable bit will then generate a
* level IRQ which will never be cleared.
*/
-u32 __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
+static inline __attribute_const__ u32 msi_multi_mask(struct msi_desc *desc)
{
- u32 mask_bits = desc->masked;
+ /* Don't shift by >= width of type */
+ if (desc->msi_attrib.multi_cap >= 5)
+ return 0xffffffff;
+ return (1 << (1 << desc->msi_attrib.multi_cap)) - 1;
+}
- if (pci_msi_ignore_mask || !desc->msi_attrib.maskbit)
- return 0;
+static noinline void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set)
+{
+ raw_spinlock_t *lock = &desc->dev->msi_lock;
+ unsigned long flags;
- mask_bits &= ~mask;
- mask_bits |= flag;
+ raw_spin_lock_irqsave(lock, flags);
+ desc->msi_mask &= ~clear;
+ desc->msi_mask |= set;
pci_write_config_dword(msi_desc_to_pci_dev(desc), desc->mask_pos,
- mask_bits);
-
- return mask_bits;
+ desc->msi_mask);
+ raw_spin_unlock_irqrestore(lock, flags);
}
-static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
+static inline void pci_msi_mask(struct msi_desc *desc, u32 mask)
{
- desc->masked = __pci_msi_desc_mask_irq(desc, mask, flag);
+ pci_msi_update_mask(desc, 0, mask);
}
-static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
+static inline void pci_msi_unmask(struct msi_desc *desc, u32 mask)
{
- if (desc->msi_attrib.is_virtual)
- return NULL;
+ pci_msi_update_mask(desc, mask, 0);
+}
- return desc->mask_base +
- desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+static inline void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
+{
+ return desc->mask_base + desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
}
/*
- * This internal function does not flush PCI writes to the device.
- * All users must ensure that they read from the device before either
- * assuming that the device state is up to date, or returning out of this
- * file. This saves a few milliseconds when initialising devices with lots
- * of MSI-X interrupts.
+ * This internal function does not flush PCI writes to the device. All
+ * users must ensure that they read from the device before either assuming
+ * that the device state is up to date, or returning out of this file.
+ * It does not affect the msi_desc::msix_ctrl cache either. Use with care!
*/
-u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag)
+static void pci_msix_write_vector_ctrl(struct msi_desc *desc, u32 ctrl)
{
- u32 mask_bits = desc->masked;
- void __iomem *desc_addr;
+ void __iomem *desc_addr = pci_msix_desc_addr(desc);
- if (pci_msi_ignore_mask)
- return 0;
-
- desc_addr = pci_msix_desc_addr(desc);
- if (!desc_addr)
- return 0;
-
- mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
- if (flag & PCI_MSIX_ENTRY_CTRL_MASKBIT)
- mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
+ writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
+}
- writel(mask_bits, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
+static inline void pci_msix_mask(struct msi_desc *desc)
+{
+ desc->msix_ctrl |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
+ pci_msix_write_vector_ctrl(desc, desc->msix_ctrl);
+ /* Flush write to device */
+ readl(desc->mask_base);
+}
- return mask_bits;
+static inline void pci_msix_unmask(struct msi_desc *desc)
+{
+ desc->msix_ctrl &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
+ pci_msix_write_vector_ctrl(desc, desc->msix_ctrl);
}
-static void msix_mask_irq(struct msi_desc *desc, u32 flag)
+static void __pci_msi_mask_desc(struct msi_desc *desc, u32 mask)
{
- desc->masked = __pci_msix_desc_mask_irq(desc, flag);
+ if (pci_msi_ignore_mask || desc->msi_attrib.is_virtual)
+ return;
+
+ if (desc->msi_attrib.is_msix)
+ pci_msix_mask(desc);
+ else if (desc->msi_attrib.maskbit)
+ pci_msi_mask(desc, mask);
}
-static void msi_set_mask_bit(struct irq_data *data, u32 flag)
+static void __pci_msi_unmask_desc(struct msi_desc *desc, u32 mask)
{
- struct msi_desc *desc = irq_data_get_msi_desc(data);
+ if (pci_msi_ignore_mask || desc->msi_attrib.is_virtual)
+ return;
- if (desc->msi_attrib.is_msix) {
- msix_mask_irq(desc, flag);
- readl(desc->mask_base); /* Flush write to device */
- } else {
- unsigned offset = data->irq - desc->irq;
- msi_mask_irq(desc, 1 << offset, flag << offset);
- }
+ if (desc->msi_attrib.is_msix)
+ pci_msix_unmask(desc);
+ else if (desc->msi_attrib.maskbit)
+ pci_msi_unmask(desc, mask);
}
/**
@@ -224,7 +226,9 @@ static void msi_set_mask_bit(struct irq_data *data, u32 flag)
*/
void pci_msi_mask_irq(struct irq_data *data)
{
- msi_set_mask_bit(data, 1);
+ struct msi_desc *desc = irq_data_get_msi_desc(data);
+
+ __pci_msi_mask_desc(desc, BIT(data->irq - desc->irq));
}
EXPORT_SYMBOL_GPL(pci_msi_mask_irq);
@@ -234,7 +238,9 @@ EXPORT_SYMBOL_GPL(pci_msi_mask_irq);
*/
void pci_msi_unmask_irq(struct irq_data *data)
{
- msi_set_mask_bit(data, 0);
+ struct msi_desc *desc = irq_data_get_msi_desc(data);
+
+ __pci_msi_unmask_desc(desc, BIT(data->irq - desc->irq));
}
EXPORT_SYMBOL_GPL(pci_msi_unmask_irq);
@@ -255,10 +261,8 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
if (entry->msi_attrib.is_msix) {
void __iomem *base = pci_msix_desc_addr(entry);
- if (!base) {
- WARN_ON(1);
+ if (WARN_ON_ONCE(entry->msi_attrib.is_virtual))
return;
- }
msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
@@ -289,13 +293,32 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
/* Don't touch the hardware now */
} else if (entry->msi_attrib.is_msix) {
void __iomem *base = pci_msix_desc_addr(entry);
+ u32 ctrl = entry->msix_ctrl;
+ bool unmasked = !(ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT);
- if (!base)
+ if (entry->msi_attrib.is_virtual)
goto skip;
+ /*
+ * The specification mandates that the entry is masked
+ * when the message is modified:
+ *
+ * "If software changes the Address or Data value of an
+ * entry while the entry is unmasked, the result is
+ * undefined."
+ */
+ if (unmasked)
+ pci_msix_write_vector_ctrl(entry, ctrl | PCI_MSIX_ENTRY_CTRL_MASKBIT);
+
writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
+
+ if (unmasked)
+ pci_msix_write_vector_ctrl(entry, ctrl);
+
+ /* Ensure that the writes are visible in the device */
+ readl(base + PCI_MSIX_ENTRY_DATA);
} else {
int pos = dev->msi_cap;
u16 msgctl;
@@ -316,6 +339,8 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
pci_write_config_word(dev, pos + PCI_MSI_DATA_32,
msg->data);
}
+ /* Ensure that the writes are visible in the device */
+ pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
}
skip:
@@ -338,9 +363,7 @@ static void free_msi_irqs(struct pci_dev *dev)
{
struct list_head *msi_list = dev_to_msi_list(&dev->dev);
struct msi_desc *entry, *tmp;
- struct attribute **msi_attrs;
- struct device_attribute *dev_attr;
- int i, count = 0;
+ int i;
for_each_pci_msi_entry(entry, dev)
if (entry->irq)
@@ -360,18 +383,7 @@ static void free_msi_irqs(struct pci_dev *dev)
}
if (dev->msi_irq_groups) {
- sysfs_remove_groups(&dev->dev.kobj, dev->msi_irq_groups);
- msi_attrs = dev->msi_irq_groups[0]->attrs;
- while (msi_attrs[count]) {
- dev_attr = container_of(msi_attrs[count],
- struct device_attribute, attr);
- kfree(dev_attr->attr.name);
- kfree(dev_attr);
- ++count;
- }
- kfree(msi_attrs);
- kfree(dev->msi_irq_groups[0]);
- kfree(dev->msi_irq_groups);
+ msi_destroy_sysfs(&dev->dev, dev->msi_irq_groups);
dev->msi_irq_groups = NULL;
}
}
@@ -408,8 +420,7 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
arch_restore_msi_irqs(dev);
pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
- msi_mask_irq(entry, msi_mask(entry->msi_attrib.multi_cap),
- entry->masked);
+ pci_msi_update_mask(entry, 0, 0);
control &= ~PCI_MSI_FLAGS_QSIZE;
control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
@@ -440,7 +451,7 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
arch_restore_msi_irqs(dev);
for_each_pci_msi_entry(entry, dev)
- msix_mask_irq(entry, entry->masked);
+ pci_msix_write_vector_ctrl(entry, entry->msix_ctrl);
pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
}
@@ -452,102 +463,6 @@ void pci_restore_msi_state(struct pci_dev *dev)
}
EXPORT_SYMBOL_GPL(pci_restore_msi_state);
-static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct msi_desc *entry;
- unsigned long irq;
- int retval;
-
- retval = kstrtoul(attr->attr.name, 10, &irq);
- if (retval)
- return retval;
-
- entry = irq_get_msi_desc(irq);
- if (!entry)
- return -ENODEV;
-
- return sysfs_emit(buf, "%s\n",
- entry->msi_attrib.is_msix ? "msix" : "msi");
-}
-
-static int populate_msi_sysfs(struct pci_dev *pdev)
-{
- struct attribute **msi_attrs;
- struct attribute *msi_attr;
- struct device_attribute *msi_dev_attr;
- struct attribute_group *msi_irq_group;
- const struct attribute_group **msi_irq_groups;
- struct msi_desc *entry;
- int ret = -ENOMEM;
- int num_msi = 0;
- int count = 0;
- int i;
-
- /* Determine how many msi entries we have */
- for_each_pci_msi_entry(entry, pdev)
- num_msi += entry->nvec_used;
- if (!num_msi)
- return 0;
-
- /* Dynamically create the MSI attributes for the PCI device */
- msi_attrs = kcalloc(num_msi + 1, sizeof(void *), GFP_KERNEL);
- if (!msi_attrs)
- return -ENOMEM;
- for_each_pci_msi_entry(entry, pdev) {
- for (i = 0; i < entry->nvec_used; i++) {
- msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
- if (!msi_dev_attr)
- goto error_attrs;
- msi_attrs[count] = &msi_dev_attr->attr;
-
- sysfs_attr_init(&msi_dev_attr->attr);
- msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
- entry->irq + i);
- if (!msi_dev_attr->attr.name)
- goto error_attrs;
- msi_dev_attr->attr.mode = S_IRUGO;
- msi_dev_attr->show = msi_mode_show;
- ++count;
- }
- }
-
- msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL);
- if (!msi_irq_group)
- goto error_attrs;
- msi_irq_group->name = "msi_irqs";
- msi_irq_group->attrs = msi_attrs;
-
- msi_irq_groups = kcalloc(2, sizeof(void *), GFP_KERNEL);
- if (!msi_irq_groups)
- goto error_irq_group;
- msi_irq_groups[0] = msi_irq_group;
-
- ret = sysfs_create_groups(&pdev->dev.kobj, msi_irq_groups);
- if (ret)
- goto error_irq_groups;
- pdev->msi_irq_groups = msi_irq_groups;
-
- return 0;
-
-error_irq_groups:
- kfree(msi_irq_groups);
-error_irq_group:
- kfree(msi_irq_group);
-error_attrs:
- count = 0;
- msi_attr = msi_attrs[count];
- while (msi_attr) {
- msi_dev_attr = container_of(msi_attr, struct device_attribute, attr);
- kfree(msi_attr->name);
- kfree(msi_dev_attr);
- ++count;
- msi_attr = msi_attrs[count];
- }
- kfree(msi_attrs);
- return ret;
-}
-
static struct msi_desc *
msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
{
@@ -581,7 +496,7 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
/* Save the initial mask status */
if (entry->msi_attrib.maskbit)
- pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
+ pci_read_config_dword(dev, entry->mask_pos, &entry->msi_mask);
out:
kfree(masks);
@@ -592,8 +507,11 @@ static int msi_verify_entries(struct pci_dev *dev)
{
struct msi_desc *entry;
+ if (!dev->no_64bit_msi)
+ return 0;
+
for_each_pci_msi_entry(entry, dev) {
- if (entry->msg.address_hi && dev->no_64bit_msi) {
+ if (entry->msg.address_hi) {
pci_err(dev, "arch assigned 64-bit MSI address %#x%08x but device only supports 32 bits\n",
entry->msg.address_hi, entry->msg.address_lo);
return -EIO;
@@ -619,7 +537,6 @@ static int msi_capability_init(struct pci_dev *dev, int nvec,
{
struct msi_desc *entry;
int ret;
- unsigned mask;
pci_msi_set_enable(dev, 0); /* Disable MSI during set up */
@@ -628,31 +545,23 @@ static int msi_capability_init(struct pci_dev *dev, int nvec,
return -ENOMEM;
/* All MSIs are unmasked by default; mask them all */
- mask = msi_mask(entry->msi_attrib.multi_cap);
- msi_mask_irq(entry, mask, mask);
+ pci_msi_mask(entry, msi_multi_mask(entry));
list_add_tail(&entry->list, dev_to_msi_list(&dev->dev));
/* Configure MSI capability structure */
ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
- if (ret) {
- msi_mask_irq(entry, mask, ~mask);
- free_msi_irqs(dev);
- return ret;
- }
+ if (ret)
+ goto err;
ret = msi_verify_entries(dev);
- if (ret) {
- msi_mask_irq(entry, mask, ~mask);
- free_msi_irqs(dev);
- return ret;
- }
+ if (ret)
+ goto err;
- ret = populate_msi_sysfs(dev);
- if (ret) {
- msi_mask_irq(entry, mask, ~mask);
- free_msi_irqs(dev);
- return ret;
+ dev->msi_irq_groups = msi_populate_sysfs(&dev->dev);
+ if (IS_ERR(dev->msi_irq_groups)) {
+ ret = PTR_ERR(dev->msi_irq_groups);
+ goto err;
}
/* Set MSI enabled bits */
@@ -663,6 +572,11 @@ static int msi_capability_init(struct pci_dev *dev, int nvec,
pcibios_free_irq(dev);
dev->irq = entry->irq;
return 0;
+
+err:
+ pci_msi_unmask(entry, msi_multi_mask(entry));
+ free_msi_irqs(dev);
+ return ret;
}
static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries)
@@ -691,6 +605,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
{
struct irq_affinity_desc *curmsk, *masks = NULL;
struct msi_desc *entry;
+ void __iomem *addr;
int ret, i;
int vec_count = pci_msix_vec_count(dev);
@@ -711,6 +626,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
entry->msi_attrib.is_msix = 1;
entry->msi_attrib.is_64 = 1;
+
if (entries)
entry->msi_attrib.entry_nr = entries[i].entry;
else
@@ -722,6 +638,11 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
entry->msi_attrib.default_irq = dev->irq;
entry->mask_base = base;
+ if (!entry->msi_attrib.is_virtual) {
+ addr = pci_msix_desc_addr(entry);
+ entry->msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
+ }
+
list_add_tail(&entry->list, dev_to_msi_list(&dev->dev));
if (masks)
curmsk++;
@@ -732,26 +653,28 @@ out:
return ret;
}
-static void msix_program_entries(struct pci_dev *dev,
- struct msix_entry *entries)
+static void msix_update_entries(struct pci_dev *dev, struct msix_entry *entries)
{
struct msi_desc *entry;
- int i = 0;
- void __iomem *desc_addr;
for_each_pci_msi_entry(entry, dev) {
- if (entries)
- entries[i++].vector = entry->irq;
+ if (entries) {
+ entries->vector = entry->irq;
+ entries++;
+ }
+ }
+}
- desc_addr = pci_msix_desc_addr(entry);
- if (desc_addr)
- entry->masked = readl(desc_addr +
- PCI_MSIX_ENTRY_VECTOR_CTRL);
- else
- entry->masked = 0;
+static void msix_mask_all(void __iomem *base, int tsize)
+{
+ u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT;
+ int i;
- msix_mask_irq(entry, 1);
- }
+ if (pci_msi_ignore_mask)
+ return;
+
+ for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE)
+ writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL);
}
/**
@@ -768,22 +691,33 @@ static void msix_program_entries(struct pci_dev *dev,
static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
int nvec, struct irq_affinity *affd)
{
- int ret;
- u16 control;
void __iomem *base;
+ int ret, tsize;
+ u16 control;
- /* Ensure MSI-X is disabled while it is set up */
- pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
+ /*
+ * Some devices require MSI-X to be enabled before the MSI-X
+ * registers can be accessed. Mask all the vectors to prevent
+ * interrupts coming in before they're fully set up.
+ */
+ pci_msix_clear_and_set_ctrl(dev, 0, PCI_MSIX_FLAGS_MASKALL |
+ PCI_MSIX_FLAGS_ENABLE);
pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
/* Request & Map MSI-X table region */
- base = msix_map_region(dev, msix_table_size(control));
- if (!base)
- return -ENOMEM;
+ tsize = msix_table_size(control);
+ base = msix_map_region(dev, tsize);
+ if (!base) {
+ ret = -ENOMEM;
+ goto out_disable;
+ }
+
+ /* Ensure that all table entries are masked. */
+ msix_mask_all(base, tsize);
ret = msix_setup_entries(dev, base, entries, nvec, affd);
if (ret)
- return ret;
+ goto out_disable;
ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
if (ret)
@@ -794,19 +728,13 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
if (ret)
goto out_free;
- /*
- * Some devices require MSI-X to be enabled before we can touch the
- * MSI-X registers. We need to mask all the vectors to prevent
- * interrupts coming in before they're fully set up.
- */
- pci_msix_clear_and_set_ctrl(dev, 0,
- PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE);
-
- msix_program_entries(dev, entries);
+ msix_update_entries(dev, entries);
- ret = populate_msi_sysfs(dev);
- if (ret)
+ dev->msi_irq_groups = msi_populate_sysfs(&dev->dev);
+ if (IS_ERR(dev->msi_irq_groups)) {
+ ret = PTR_ERR(dev->msi_irq_groups);
goto out_free;
+ }
/* Set MSI-X enabled bits and unmask the function */
pci_intx_for_msi(dev, 0);
@@ -836,6 +764,9 @@ out_avail:
out_free:
free_msi_irqs(dev);
+out_disable:
+ pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
+
return ret;
}
@@ -916,7 +847,6 @@ EXPORT_SYMBOL(pci_msi_vec_count);
static void pci_msi_shutdown(struct pci_dev *dev)
{
struct msi_desc *desc;
- u32 mask;
if (!pci_msi_enable || !dev || !dev->msi_enabled)
return;
@@ -929,9 +859,7 @@ static void pci_msi_shutdown(struct pci_dev *dev)
dev->msi_enabled = 0;
/* Return the device with MSI unmasked as initial states */
- mask = msi_mask(desc->msi_attrib.multi_cap);
- /* Keep cached state to be restored */
- __pci_msi_desc_mask_irq(desc, mask, ~mask);
+ pci_msi_unmask(desc, msi_multi_mask(desc));
/* Restore dev->irq to its default pin-assertion IRQ */
dev->irq = desc->msi_attrib.default_irq;
@@ -1016,10 +944,8 @@ static void pci_msix_shutdown(struct pci_dev *dev)
}
/* Return the device with MSI-X masked as initial states */
- for_each_pci_msi_entry(entry, dev) {
- /* Keep cached states to be restored */
- __pci_msix_desc_mask_irq(entry, 1);
- }
+ for_each_pci_msi_entry(entry, dev)
+ pci_msix_mask(entry);
pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
pci_intx_for_msi(dev, 1);
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index f65382915f01..b70f61fbcd4b 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -978,7 +978,7 @@ void pci_create_legacy_files(struct pci_bus *b)
b->legacy_mem->size = 1024*1024;
b->legacy_mem->attr.mode = 0600;
b->legacy_mem->mmap = pci_mmap_legacy_mem;
- b->legacy_io->f_mapping = iomem_get_mapping;
+ b->legacy_mem->f_mapping = iomem_get_mapping;
pci_adjust_legacy_attr(b, pci_mmap_mem);
error = device_create_bin_file(&b->dev, b->legacy_mem);
if (error)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index aacf575c15cf..a5e6759c407b 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1906,11 +1906,7 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags)
* so that things like MSI message writing will behave as expected
* (e.g. if the device really is in D0 at enable time).
*/
- if (dev->pm_cap) {
- u16 pmcsr;
- pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
- dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
- }
+ pci_update_current_state(dev, dev->current_state);
if (atomic_inc_return(&dev->enable_cnt) > 1)
return 0; /* already enabled */
@@ -2495,7 +2491,14 @@ static int __pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable
if (enable) {
int error;
- if (pci_pme_capable(dev, state))
+ /*
+ * Enable PME signaling if the device can signal PME from
+ * D3cold regardless of whether or not it can signal PME from
+ * the current target state, because that will allow it to
+ * signal PME when the hierarchy above it goes into D3cold and
+ * the device itself ends up in D3cold as a result of that.
+ */
+ if (pci_pme_capable(dev, state) || pci_pme_capable(dev, PCI_D3cold))
pci_pme_active(dev, true);
else
ret = 1;
@@ -2599,16 +2602,20 @@ static pci_power_t pci_target_state(struct pci_dev *dev, bool wakeup)
if (dev->current_state == PCI_D3cold)
target_state = PCI_D3cold;
- if (wakeup) {
+ if (wakeup && dev->pme_support) {
+ pci_power_t state = target_state;
+
/*
* Find the deepest state from which the device can generate
* PME#.
*/
- if (dev->pme_support) {
- while (target_state
- && !(dev->pme_support & (1 << target_state)))
- target_state--;
- }
+ while (state && !(dev->pme_support & (1 << state)))
+ state--;
+
+ if (state)
+ return state;
+ else if (dev->pme_support & 1)
+ return PCI_D0;
}
return target_state;
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 93dcdd431072..2f52110cac97 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -597,11 +597,8 @@ static inline void pcie_ecrc_get_policy(char *str) { }
#ifdef CONFIG_PCIE_PTM
void pci_ptm_init(struct pci_dev *dev);
-int pci_enable_ptm(struct pci_dev *dev, u8 *granularity);
#else
static inline void pci_ptm_init(struct pci_dev *dev) { }
-static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
-{ return -EINVAL; }
#endif
struct pci_dev_reset_methods {
diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c
index 95d4eef2c9e8..8a4ad974c5ac 100644
--- a/drivers/pci/pcie/ptm.c
+++ b/drivers/pci/pcie/ptm.c
@@ -204,3 +204,12 @@ int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
return 0;
}
EXPORT_SYMBOL(pci_enable_ptm);
+
+bool pcie_ptm_enabled(struct pci_dev *dev)
+{
+ if (!dev)
+ return false;
+
+ return dev->ptm_enabled;
+}
+EXPORT_SYMBOL(pcie_ptm_enabled);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 6d74386eadc2..ab3de1551b50 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1900,6 +1900,7 @@ static void quirk_ryzen_xhci_d3hot(struct pci_dev *dev)
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15e0, quirk_ryzen_xhci_d3hot);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15e1, quirk_ryzen_xhci_d3hot);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1639, quirk_ryzen_xhci_d3hot);
#ifdef CONFIG_X86_IO_APIC
static int dmi_disable_ioapicreroute(const struct dmi_system_id *d)
diff --git a/drivers/phy/amlogic/phy-meson8b-usb2.c b/drivers/phy/amlogic/phy-meson8b-usb2.c
index 03c061dd5f0d..cf10bed40528 100644
--- a/drivers/phy/amlogic/phy-meson8b-usb2.c
+++ b/drivers/phy/amlogic/phy-meson8b-usb2.c
@@ -219,6 +219,10 @@ static int phy_meson8b_usb2_power_off(struct phy *phy)
clk_disable_unprepare(priv->clk_usb);
clk_disable_unprepare(priv->clk_usb_general);
+ /* power off the PHY by putting it into reset mode */
+ regmap_update_bits(priv->regmap, REG_CTRL, REG_CTRL_POWER_ON_RESET,
+ REG_CTRL_POWER_ON_RESET);
+
return 0;
}
@@ -273,8 +277,8 @@ static int phy_meson8b_usb2_probe(struct platform_device *pdev)
phy = devm_phy_create(&pdev->dev, NULL, &phy_meson8b_usb2_ops);
if (IS_ERR(phy)) {
- dev_err(&pdev->dev, "failed to create PHY\n");
- return PTR_ERR(phy);
+ return dev_err_probe(&pdev->dev, PTR_ERR(phy),
+ "failed to create PHY\n");
}
phy_set_drvdata(phy, priv);
diff --git a/drivers/phy/cadence/phy-cadence-torrent.c b/drivers/phy/cadence/phy-cadence-torrent.c
index 0477e7beebbf..415ace64adc5 100644
--- a/drivers/phy/cadence/phy-cadence-torrent.c
+++ b/drivers/phy/cadence/phy-cadence-torrent.c
@@ -24,13 +24,15 @@
#include <linux/reset.h>
#include <linux/regmap.h>
-#define REF_CLK_19_2MHz 19200000
-#define REF_CLK_25MHz 25000000
+#define REF_CLK_19_2MHZ 19200000
+#define REF_CLK_25MHZ 25000000
+#define REF_CLK_100MHZ 100000000
#define MAX_NUM_LANES 4
#define DEFAULT_MAX_BIT_RATE 8100 /* in Mbps */
#define NUM_SSC_MODE 3
+#define NUM_REF_CLK 3
#define NUM_PHY_TYPE 6
#define POLL_TIMEOUT_US 5000
@@ -49,6 +51,10 @@
#define TORRENT_PHY_PCS_COMMON_OFFSET(block_offset) \
(0xC000 << (block_offset))
+#define TORRENT_PHY_PCS_LANE_CDB_OFFSET(ln, block_offset, reg_offset) \
+ ((0xD000 << (block_offset)) + \
+ (((ln) << 8) << (reg_offset)))
+
#define TORRENT_PHY_PMA_COMMON_OFFSET(block_offset) \
(0xE000 << (block_offset))
@@ -101,6 +107,7 @@
#define CMN_PLL0_FRACDIVH_M0 0x0092U
#define CMN_PLL0_HIGH_THR_M0 0x0093U
#define CMN_PLL0_DSM_DIAG_M0 0x0094U
+#define CMN_PLL0_DSM_FBH_OVRD_M0 0x0095U
#define CMN_PLL0_SS_CTRL1_M0 0x0098U
#define CMN_PLL0_SS_CTRL2_M0 0x0099U
#define CMN_PLL0_SS_CTRL3_M0 0x009AU
@@ -220,6 +227,9 @@
#define PHY_PIPE_USB3_GEN2_POST_CFG0 0x0022U
#define PHY_PIPE_USB3_GEN2_POST_CFG1 0x0023U
+/* PHY PCS lane registers */
+#define PHY_PCS_ISO_LINK_CTRL 0x000BU
+
/* PHY PMA common registers */
#define PHY_PMA_CMN_CTRL1 0x0000U
#define PHY_PMA_CMN_CTRL2 0x0001U
@@ -244,6 +254,9 @@ static const struct reg_field phy_pma_pll_raw_ctrl =
static const struct reg_field phy_reset_ctrl =
REG_FIELD(PHY_RESET, 8, 8);
+static const struct reg_field phy_pcs_iso_link_ctrl_1 =
+ REG_FIELD(PHY_PCS_ISO_LINK_CTRL, 1, 1);
+
static const struct reg_field phy_pipe_cmn_ctrl1_0 = REG_FIELD(PHY_PIPE_CMN_CTRL1, 0, 0);
#define REFCLK_OUT_NUM_CMN_CONFIG 5
@@ -273,6 +286,12 @@ enum cdns_torrent_phy_type {
TYPE_USB,
};
+enum cdns_torrent_ref_clk {
+ CLK_19_2_MHZ,
+ CLK_25_MHZ,
+ CLK_100_MHZ
+};
+
enum cdns_torrent_ssc_mode {
NO_SSC,
EXTERNAL_SSC,
@@ -296,7 +315,7 @@ struct cdns_torrent_phy {
struct reset_control *apb_rst;
struct device *dev;
struct clk *clk;
- unsigned long ref_clk_rate;
+ enum cdns_torrent_ref_clk ref_clk_rate;
struct cdns_torrent_inst phys[MAX_NUM_LANES];
int nsubnodes;
const struct cdns_torrent_data *init_data;
@@ -306,12 +325,14 @@ struct cdns_torrent_phy {
struct regmap *regmap_phy_pma_common_cdb;
struct regmap *regmap_tx_lane_cdb[MAX_NUM_LANES];
struct regmap *regmap_rx_lane_cdb[MAX_NUM_LANES];
+ struct regmap *regmap_phy_pcs_lane_cdb[MAX_NUM_LANES];
struct regmap *regmap_dptx_phy_reg;
struct regmap_field *phy_pll_cfg;
struct regmap_field *phy_pma_cmn_ctrl_1;
struct regmap_field *phy_pma_cmn_ctrl_2;
struct regmap_field *phy_pma_pll_raw_ctrl;
struct regmap_field *phy_reset_ctrl;
+ struct regmap_field *phy_pcs_iso_link_ctrl_1[MAX_NUM_LANES];
struct clk *clks[CDNS_TORRENT_REFCLK_DRIVER + 1];
struct clk_onecell_data clk_data;
};
@@ -333,57 +354,6 @@ struct cdns_torrent_derived_refclk {
#define to_cdns_torrent_derived_refclk(_hw) \
container_of(_hw, struct cdns_torrent_derived_refclk, hw)
-static int cdns_torrent_phy_init(struct phy *phy);
-static int cdns_torrent_dp_init(struct phy *phy);
-static int cdns_torrent_dp_run(struct cdns_torrent_phy *cdns_phy,
- u32 num_lanes);
-static
-int cdns_torrent_dp_wait_pma_cmn_ready(struct cdns_torrent_phy *cdns_phy);
-static void cdns_torrent_dp_pma_cfg(struct cdns_torrent_phy *cdns_phy,
- struct cdns_torrent_inst *inst);
-static
-void cdns_torrent_dp_pma_cmn_cfg_19_2mhz(struct cdns_torrent_phy *cdns_phy);
-static
-void cdns_torrent_dp_pma_cmn_vco_cfg_19_2mhz(struct cdns_torrent_phy *cdns_phy,
- u32 rate, bool ssc);
-static
-void cdns_torrent_dp_pma_cmn_cfg_25mhz(struct cdns_torrent_phy *cdns_phy);
-static
-void cdns_torrent_dp_pma_cmn_vco_cfg_25mhz(struct cdns_torrent_phy *cdns_phy,
- u32 rate, bool ssc);
-static void cdns_torrent_dp_pma_lane_cfg(struct cdns_torrent_phy *cdns_phy,
- unsigned int lane);
-static void cdns_torrent_dp_pma_cmn_rate(struct cdns_torrent_phy *cdns_phy,
- u32 rate, u32 num_lanes);
-static int cdns_torrent_dp_configure(struct phy *phy,
- union phy_configure_opts *opts);
-static int cdns_torrent_dp_set_power_state(struct cdns_torrent_phy *cdns_phy,
- u32 num_lanes,
- enum phy_powerstate powerstate);
-static int cdns_torrent_phy_on(struct phy *phy);
-static int cdns_torrent_phy_off(struct phy *phy);
-
-static const struct phy_ops cdns_torrent_phy_ops = {
- .init = cdns_torrent_phy_init,
- .configure = cdns_torrent_dp_configure,
- .power_on = cdns_torrent_phy_on,
- .power_off = cdns_torrent_phy_off,
- .owner = THIS_MODULE,
-};
-
-static int cdns_torrent_noop_phy_on(struct phy *phy)
-{
- /* Give 5ms to 10ms delay for the PIPE clock to be stable */
- usleep_range(5000, 10000);
-
- return 0;
-}
-
-static const struct phy_ops noop_ops = {
- .power_on = cdns_torrent_noop_phy_on,
- .owner = THIS_MODULE,
-};
-
struct cdns_reg_pairs {
u32 val;
u32 off;
@@ -403,12 +373,12 @@ struct cdns_torrent_data {
[NUM_SSC_MODE];
struct cdns_torrent_vals *pcs_cmn_vals[NUM_PHY_TYPE][NUM_PHY_TYPE]
[NUM_SSC_MODE];
- struct cdns_torrent_vals *cmn_vals[NUM_PHY_TYPE][NUM_PHY_TYPE]
- [NUM_SSC_MODE];
- struct cdns_torrent_vals *tx_ln_vals[NUM_PHY_TYPE][NUM_PHY_TYPE]
- [NUM_SSC_MODE];
- struct cdns_torrent_vals *rx_ln_vals[NUM_PHY_TYPE][NUM_PHY_TYPE]
- [NUM_SSC_MODE];
+ struct cdns_torrent_vals *cmn_vals[NUM_REF_CLK][NUM_PHY_TYPE]
+ [NUM_PHY_TYPE][NUM_SSC_MODE];
+ struct cdns_torrent_vals *tx_ln_vals[NUM_REF_CLK][NUM_PHY_TYPE]
+ [NUM_PHY_TYPE][NUM_SSC_MODE];
+ struct cdns_torrent_vals *rx_ln_vals[NUM_REF_CLK][NUM_PHY_TYPE]
+ [NUM_PHY_TYPE][NUM_SSC_MODE];
};
struct cdns_regmap_cdb_context {
@@ -497,6 +467,22 @@ static const struct regmap_config cdns_torrent_common_cdb_config = {
.reg_read = cdns_regmap_read,
};
+#define TORRENT_PHY_PCS_LANE_CDB_REGMAP_CONF(n) \
+{ \
+ .name = "torrent_phy_pcs_lane" n "_cdb", \
+ .reg_stride = 1, \
+ .fast_io = true, \
+ .reg_write = cdns_regmap_write, \
+ .reg_read = cdns_regmap_read, \
+}
+
+static const struct regmap_config cdns_torrent_phy_pcs_lane_cdb_config[] = {
+ TORRENT_PHY_PCS_LANE_CDB_REGMAP_CONF("0"),
+ TORRENT_PHY_PCS_LANE_CDB_REGMAP_CONF("1"),
+ TORRENT_PHY_PCS_LANE_CDB_REGMAP_CONF("2"),
+ TORRENT_PHY_PCS_LANE_CDB_REGMAP_CONF("3"),
+};
+
static const struct regmap_config cdns_torrent_phy_pcs_cmn_cdb_config = {
.name = "torrent_phy_pcs_cmn_cdb",
.reg_stride = 1,
@@ -615,6 +601,351 @@ static const struct coefficients vltg_coeff[4][4] = {
}
};
+static const char *cdns_torrent_get_phy_type(enum cdns_torrent_phy_type phy_type)
+{
+ switch (phy_type) {
+ case TYPE_DP:
+ return "DisplayPort";
+ case TYPE_PCIE:
+ return "PCIe";
+ case TYPE_SGMII:
+ return "SGMII";
+ case TYPE_QSGMII:
+ return "QSGMII";
+ case TYPE_USB:
+ return "USB";
+ default:
+ return "None";
+ }
+}
+
+/*
+ * Set registers responsible for enabling and configuring SSC, with second and
+ * third register values provided by parameters.
+ */
+static
+void cdns_torrent_dp_enable_ssc_19_2mhz(struct cdns_torrent_phy *cdns_phy,
+ u32 ctrl2_val, u32 ctrl3_val)
+{
+ struct regmap *regmap = cdns_phy->regmap_common_cdb;
+
+ cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL1_M0, 0x0001);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL1_M0, ctrl2_val);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL1_M0, ctrl3_val);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL4_M0, 0x0003);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL1_M0, 0x0001);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL1_M0, ctrl2_val);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL1_M0, ctrl3_val);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL4_M0, 0x0003);
+}
+
+static
+void cdns_torrent_dp_pma_cmn_vco_cfg_19_2mhz(struct cdns_torrent_phy *cdns_phy,
+ u32 rate, bool ssc)
+{
+ struct regmap *regmap = cdns_phy->regmap_common_cdb;
+
+ /* Assumes 19.2 MHz refclock */
+ switch (rate) {
+ /* Setting VCO for 10.8GHz */
+ case 2700:
+ case 5400:
+ cdns_torrent_phy_write(regmap, CMN_PLL0_INTDIV_M0, 0x0119);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVL_M0, 0x4000);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_HIGH_THR_M0, 0x00BC);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CTRL_M0, 0x0012);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_INTDIV_M0, 0x0119);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVL_M0, 0x4000);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_HIGH_THR_M0, 0x00BC);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CTRL_M0, 0x0012);
+ if (ssc)
+ cdns_torrent_dp_enable_ssc_19_2mhz(cdns_phy, 0x033A, 0x006A);
+ break;
+ /* Setting VCO for 9.72GHz */
+ case 1620:
+ case 2430:
+ case 3240:
+ cdns_torrent_phy_write(regmap, CMN_PLL0_INTDIV_M0, 0x01FA);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVL_M0, 0x4000);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_HIGH_THR_M0, 0x0152);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CTRL_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_INTDIV_M0, 0x01FA);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVL_M0, 0x4000);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_HIGH_THR_M0, 0x0152);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CTRL_M0, 0x0002);
+ if (ssc)
+ cdns_torrent_dp_enable_ssc_19_2mhz(cdns_phy, 0x05DD, 0x0069);
+ break;
+ /* Setting VCO for 8.64GHz */
+ case 2160:
+ case 4320:
+ cdns_torrent_phy_write(regmap, CMN_PLL0_INTDIV_M0, 0x01C2);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVL_M0, 0x0000);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_HIGH_THR_M0, 0x012C);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CTRL_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_INTDIV_M0, 0x01C2);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVL_M0, 0x0000);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_HIGH_THR_M0, 0x012C);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CTRL_M0, 0x0002);
+ if (ssc)
+ cdns_torrent_dp_enable_ssc_19_2mhz(cdns_phy, 0x0536, 0x0069);
+ break;
+ /* Setting VCO for 8.1GHz */
+ case 8100:
+ cdns_torrent_phy_write(regmap, CMN_PLL0_INTDIV_M0, 0x01A5);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVL_M0, 0xE000);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_HIGH_THR_M0, 0x011A);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CTRL_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_INTDIV_M0, 0x01A5);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVL_M0, 0xE000);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_HIGH_THR_M0, 0x011A);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CTRL_M0, 0x0002);
+ if (ssc)
+ cdns_torrent_dp_enable_ssc_19_2mhz(cdns_phy, 0x04D7, 0x006A);
+ break;
+ }
+
+ if (ssc) {
+ cdns_torrent_phy_write(regmap, CMN_PLL0_VCOCAL_PLLCNT_START, 0x025E);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_LOCK_PLLCNT_THR, 0x0005);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_VCOCAL_PLLCNT_START, 0x025E);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_LOCK_PLLCNT_THR, 0x0005);
+ } else {
+ cdns_torrent_phy_write(regmap, CMN_PLL0_VCOCAL_PLLCNT_START, 0x0260);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_VCOCAL_PLLCNT_START, 0x0260);
+ /* Set reset register values to disable SSC */
+ cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL1_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL2_M0, 0x0000);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL3_M0, 0x0000);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL4_M0, 0x0000);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_LOCK_PLLCNT_THR, 0x0003);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL1_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL2_M0, 0x0000);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL3_M0, 0x0000);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL4_M0, 0x0000);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_LOCK_PLLCNT_THR, 0x0003);
+ }
+
+ cdns_torrent_phy_write(regmap, CMN_PLL0_LOCK_REFCNT_START, 0x0099);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_LOCK_PLLCNT_START, 0x0099);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_LOCK_REFCNT_START, 0x0099);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_LOCK_PLLCNT_START, 0x0099);
+}
+
+/*
+ * Set registers responsible for enabling and configuring SSC, with second
+ * register value provided by a parameter.
+ */
+static void cdns_torrent_dp_enable_ssc_25mhz(struct cdns_torrent_phy *cdns_phy,
+ u32 ctrl2_val)
+{
+ struct regmap *regmap = cdns_phy->regmap_common_cdb;
+
+ cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL1_M0, 0x0001);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL1_M0, ctrl2_val);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL1_M0, 0x007F);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL4_M0, 0x0003);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL1_M0, 0x0001);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL1_M0, ctrl2_val);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL1_M0, 0x007F);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL4_M0, 0x0003);
+}
+
+static
+void cdns_torrent_dp_pma_cmn_vco_cfg_25mhz(struct cdns_torrent_phy *cdns_phy,
+ u32 rate, bool ssc)
+{
+ struct regmap *regmap = cdns_phy->regmap_common_cdb;
+
+ /* Assumes 25 MHz refclock */
+ switch (rate) {
+ /* Setting VCO for 10.8GHz */
+ case 2700:
+ case 5400:
+ cdns_torrent_phy_write(regmap, CMN_PLL0_INTDIV_M0, 0x01B0);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVL_M0, 0x0000);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_HIGH_THR_M0, 0x0120);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_INTDIV_M0, 0x01B0);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVL_M0, 0x0000);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_HIGH_THR_M0, 0x0120);
+ if (ssc)
+ cdns_torrent_dp_enable_ssc_25mhz(cdns_phy, 0x0423);
+ break;
+ /* Setting VCO for 9.72GHz */
+ case 1620:
+ case 2430:
+ case 3240:
+ cdns_torrent_phy_write(regmap, CMN_PLL0_INTDIV_M0, 0x0184);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVL_M0, 0xCCCD);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_HIGH_THR_M0, 0x0104);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_INTDIV_M0, 0x0184);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVL_M0, 0xCCCD);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_HIGH_THR_M0, 0x0104);
+ if (ssc)
+ cdns_torrent_dp_enable_ssc_25mhz(cdns_phy, 0x03B9);
+ break;
+ /* Setting VCO for 8.64GHz */
+ case 2160:
+ case 4320:
+ cdns_torrent_phy_write(regmap, CMN_PLL0_INTDIV_M0, 0x0159);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVL_M0, 0x999A);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_HIGH_THR_M0, 0x00E7);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_INTDIV_M0, 0x0159);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVL_M0, 0x999A);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_HIGH_THR_M0, 0x00E7);
+ if (ssc)
+ cdns_torrent_dp_enable_ssc_25mhz(cdns_phy, 0x034F);
+ break;
+ /* Setting VCO for 8.1GHz */
+ case 8100:
+ cdns_torrent_phy_write(regmap, CMN_PLL0_INTDIV_M0, 0x0144);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVL_M0, 0x0000);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_HIGH_THR_M0, 0x00D8);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_INTDIV_M0, 0x0144);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVL_M0, 0x0000);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_HIGH_THR_M0, 0x00D8);
+ if (ssc)
+ cdns_torrent_dp_enable_ssc_25mhz(cdns_phy, 0x031A);
+ break;
+ }
+
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CTRL_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CTRL_M0, 0x0002);
+
+ if (ssc) {
+ cdns_torrent_phy_write(regmap,
+ CMN_PLL0_VCOCAL_PLLCNT_START, 0x0315);
+ cdns_torrent_phy_write(regmap,
+ CMN_PLL0_LOCK_PLLCNT_THR, 0x0005);
+ cdns_torrent_phy_write(regmap,
+ CMN_PLL1_VCOCAL_PLLCNT_START, 0x0315);
+ cdns_torrent_phy_write(regmap,
+ CMN_PLL1_LOCK_PLLCNT_THR, 0x0005);
+ } else {
+ cdns_torrent_phy_write(regmap,
+ CMN_PLL0_VCOCAL_PLLCNT_START, 0x0317);
+ cdns_torrent_phy_write(regmap,
+ CMN_PLL1_VCOCAL_PLLCNT_START, 0x0317);
+ /* Set reset register values to disable SSC */
+ cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL1_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL2_M0, 0x0000);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL3_M0, 0x0000);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL4_M0, 0x0000);
+ cdns_torrent_phy_write(regmap,
+ CMN_PLL0_LOCK_PLLCNT_THR, 0x0003);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL1_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL2_M0, 0x0000);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL3_M0, 0x0000);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL4_M0, 0x0000);
+ cdns_torrent_phy_write(regmap,
+ CMN_PLL1_LOCK_PLLCNT_THR, 0x0003);
+ }
+
+ cdns_torrent_phy_write(regmap, CMN_PLL0_LOCK_REFCNT_START, 0x00C7);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_LOCK_PLLCNT_START, 0x00C7);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_LOCK_REFCNT_START, 0x00C7);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_LOCK_PLLCNT_START, 0x00C7);
+}
+
+static
+void cdns_torrent_dp_pma_cmn_vco_cfg_100mhz(struct cdns_torrent_phy *cdns_phy,
+ u32 rate, bool ssc)
+{
+ struct regmap *regmap = cdns_phy->regmap_common_cdb;
+
+ /* Assumes 100 MHz refclock */
+ switch (rate) {
+ /* Setting VCO for 10.8GHz */
+ case 2700:
+ case 5400:
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CP_PADJ_M0, 0x0028);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_DSM_FBH_OVRD_M0, 0x0022);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_DSM_FBH_OVRD_M0, 0x0022);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_DSM_FBL_OVRD_M0, 0x000C);
+ break;
+ /* Setting VCO for 9.72GHz */
+ case 1620:
+ case 2430:
+ case 3240:
+ cdns_torrent_phy_write(regmap, CMN_PLL0_DSM_DIAG_M0, 0x0004);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_DSM_DIAG_M0, 0x0004);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CP_PADJ_M0, 0x0509);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CP_PADJ_M0, 0x0509);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CP_IADJ_M0, 0x0F00);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CP_IADJ_M0, 0x0F00);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_FILT_PADJ_M0, 0x0F08);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_FILT_PADJ_M0, 0x0F08);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_INTDIV_M0, 0x0061);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_INTDIV_M0, 0x0061);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVL_M0, 0x3333);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVL_M0, 0x3333);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_HIGH_THR_M0, 0x0042);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_HIGH_THR_M0, 0x0042);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CTRL_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CTRL_M0, 0x0002);
+ break;
+ /* Setting VCO for 8.64GHz */
+ case 2160:
+ case 4320:
+ cdns_torrent_phy_write(regmap, CMN_PLL0_DSM_DIAG_M0, 0x0004);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_DSM_DIAG_M0, 0x0004);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CP_PADJ_M0, 0x0509);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CP_PADJ_M0, 0x0509);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CP_IADJ_M0, 0x0F00);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CP_IADJ_M0, 0x0F00);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_FILT_PADJ_M0, 0x0F08);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_FILT_PADJ_M0, 0x0F08);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_INTDIV_M0, 0x0056);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_INTDIV_M0, 0x0056);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVL_M0, 0x6666);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVL_M0, 0x6666);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_HIGH_THR_M0, 0x003A);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_HIGH_THR_M0, 0x003A);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CTRL_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CTRL_M0, 0x0002);
+ break;
+ /* Setting VCO for 8.1GHz */
+ case 8100:
+ cdns_torrent_phy_write(regmap, CMN_PLL0_DSM_DIAG_M0, 0x0004);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_DSM_DIAG_M0, 0x0004);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CP_PADJ_M0, 0x0509);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CP_PADJ_M0, 0x0509);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CP_IADJ_M0, 0x0F00);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CP_IADJ_M0, 0x0F00);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_FILT_PADJ_M0, 0x0F08);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_FILT_PADJ_M0, 0x0F08);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_INTDIV_M0, 0x0051);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_INTDIV_M0, 0x0051);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVH_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PLL0_HIGH_THR_M0, 0x0036);
+ cdns_torrent_phy_write(regmap, CMN_PLL1_HIGH_THR_M0, 0x0036);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CTRL_M0, 0x0002);
+ cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CTRL_M0, 0x0002);
+ break;
+ }
+}
+
/*
* Enable or disable PLL for selected lanes.
*/
@@ -669,6 +1000,161 @@ static int cdns_torrent_dp_set_pll_en(struct cdns_torrent_phy *cdns_phy,
return ret;
}
+static int cdns_torrent_dp_set_power_state(struct cdns_torrent_phy *cdns_phy,
+ u32 num_lanes,
+ enum phy_powerstate powerstate)
+{
+ /* Register value for power state for a single byte. */
+ u32 value_part;
+ u32 value;
+ u32 mask;
+ u32 read_val;
+ u32 ret;
+ struct regmap *regmap = cdns_phy->regmap_dptx_phy_reg;
+
+ switch (powerstate) {
+ case (POWERSTATE_A0):
+ value_part = 0x01U;
+ break;
+ case (POWERSTATE_A2):
+ value_part = 0x04U;
+ break;
+ default:
+ /* Powerstate A3 */
+ value_part = 0x08U;
+ break;
+ }
+
+ /* Select values of registers and mask, depending on enabled
+ * lane count.
+ */
+ switch (num_lanes) {
+ /* lane 0 */
+ case (1):
+ value = value_part;
+ mask = 0x0000003FU;
+ break;
+ /* lanes 0-1 */
+ case (2):
+ value = (value_part
+ | (value_part << 8));
+ mask = 0x00003F3FU;
+ break;
+ /* lanes 0-3, all */
+ default:
+ value = (value_part
+ | (value_part << 8)
+ | (value_part << 16)
+ | (value_part << 24));
+ mask = 0x3F3F3F3FU;
+ break;
+ }
+
+ /* Set power state A<n>. */
+ cdns_torrent_dp_write(regmap, PHY_PMA_XCVR_POWER_STATE_REQ, value);
+ /* Wait, until PHY acknowledges power state completion. */
+ ret = regmap_read_poll_timeout(regmap, PHY_PMA_XCVR_POWER_STATE_ACK,
+ read_val, (read_val & mask) == value, 0,
+ POLL_TIMEOUT_US);
+ cdns_torrent_dp_write(regmap, PHY_PMA_XCVR_POWER_STATE_REQ, 0x00000000);
+ ndelay(100);
+
+ return ret;
+}
+
+static int cdns_torrent_dp_run(struct cdns_torrent_phy *cdns_phy, u32 num_lanes)
+{
+ unsigned int read_val;
+ int ret;
+ struct regmap *regmap = cdns_phy->regmap_dptx_phy_reg;
+
+ /*
+ * waiting for ACK of pma_xcvr_pllclk_en_ln_*, only for the
+ * master lane
+ */
+ ret = regmap_read_poll_timeout(regmap, PHY_PMA_XCVR_PLLCLK_EN_ACK,
+ read_val, read_val & 1,
+ 0, POLL_TIMEOUT_US);
+ if (ret == -ETIMEDOUT) {
+ dev_err(cdns_phy->dev,
+ "timeout waiting for link PLL clock enable ack\n");
+ return ret;
+ }
+
+ ndelay(100);
+
+ ret = cdns_torrent_dp_set_power_state(cdns_phy, num_lanes,
+ POWERSTATE_A2);
+ if (ret)
+ return ret;
+
+ ret = cdns_torrent_dp_set_power_state(cdns_phy, num_lanes,
+ POWERSTATE_A0);
+
+ return ret;
+}
+
+static int cdns_torrent_dp_wait_pma_cmn_ready(struct cdns_torrent_phy *cdns_phy)
+{
+ unsigned int reg;
+ int ret;
+ struct regmap *regmap = cdns_phy->regmap_dptx_phy_reg;
+
+ ret = regmap_read_poll_timeout(regmap, PHY_PMA_CMN_READY, reg,
+ reg & 1, 0, POLL_TIMEOUT_US);
+ if (ret == -ETIMEDOUT) {
+ dev_err(cdns_phy->dev,
+ "timeout waiting for PMA common ready\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static void cdns_torrent_dp_pma_cmn_rate(struct cdns_torrent_phy *cdns_phy,
+ u32 rate, u32 num_lanes)
+{
+ unsigned int clk_sel_val = 0;
+ unsigned int hsclk_div_val = 0;
+ unsigned int i;
+
+ switch (rate) {
+ case 1620:
+ clk_sel_val = 0x0f01;
+ hsclk_div_val = 2;
+ break;
+ case 2160:
+ case 2430:
+ case 2700:
+ clk_sel_val = 0x0701;
+ hsclk_div_val = 1;
+ break;
+ case 3240:
+ clk_sel_val = 0x0b00;
+ hsclk_div_val = 2;
+ break;
+ case 4320:
+ case 5400:
+ clk_sel_val = 0x0301;
+ hsclk_div_val = 0;
+ break;
+ case 8100:
+ clk_sel_val = 0x0200;
+ hsclk_div_val = 0;
+ break;
+ }
+
+ cdns_torrent_phy_write(cdns_phy->regmap_common_cdb,
+ CMN_PDIAG_PLL0_CLK_SEL_M0, clk_sel_val);
+ cdns_torrent_phy_write(cdns_phy->regmap_common_cdb,
+ CMN_PDIAG_PLL1_CLK_SEL_M0, clk_sel_val);
+
+ /* PMA lane configuration to deal with multi-link operation */
+ for (i = 0; i < num_lanes; i++)
+ cdns_torrent_phy_write(cdns_phy->regmap_tx_lane_cdb[i],
+ XCVR_DIAG_HSCLK_DIV, hsclk_div_val);
+}
+
/*
* Perform register operations related to setting link rate, once powerstate is
* set and PLL disable request was processed.
@@ -676,8 +1162,7 @@ static int cdns_torrent_dp_set_pll_en(struct cdns_torrent_phy *cdns_phy,
static int cdns_torrent_dp_configure_rate(struct cdns_torrent_phy *cdns_phy,
struct phy_configure_opts_dp *dp)
{
- u32 ret;
- u32 read_val;
+ u32 read_val, ret;
/* Disable the cmn_pll0_en before re-programming the new data rate. */
regmap_field_write(cdns_phy->phy_pma_pll_raw_ctrl, 0x0);
@@ -695,17 +1180,16 @@ static int cdns_torrent_dp_configure_rate(struct cdns_torrent_phy *cdns_phy,
ndelay(200);
/* DP Rate Change - VCO Output settings. */
- if (cdns_phy->ref_clk_rate == REF_CLK_19_2MHz) {
+ if (cdns_phy->ref_clk_rate == CLK_19_2_MHZ)
/* PMA common configuration 19.2MHz */
- cdns_torrent_dp_pma_cmn_vco_cfg_19_2mhz(cdns_phy, dp->link_rate,
- dp->ssc);
- cdns_torrent_dp_pma_cmn_cfg_19_2mhz(cdns_phy);
- } else if (cdns_phy->ref_clk_rate == REF_CLK_25MHz) {
+ cdns_torrent_dp_pma_cmn_vco_cfg_19_2mhz(cdns_phy, dp->link_rate, dp->ssc);
+ else if (cdns_phy->ref_clk_rate == CLK_25_MHZ)
/* PMA common configuration 25MHz */
- cdns_torrent_dp_pma_cmn_vco_cfg_25mhz(cdns_phy, dp->link_rate,
- dp->ssc);
- cdns_torrent_dp_pma_cmn_cfg_25mhz(cdns_phy);
- }
+ cdns_torrent_dp_pma_cmn_vco_cfg_25mhz(cdns_phy, dp->link_rate, dp->ssc);
+ else if (cdns_phy->ref_clk_rate == CLK_100_MHZ)
+ /* PMA common configuration 100MHz */
+ cdns_torrent_dp_pma_cmn_vco_cfg_100mhz(cdns_phy, dp->link_rate, dp->ssc);
+
cdns_torrent_dp_pma_cmn_rate(cdns_phy, dp->link_rate, dp->lanes);
/* Enable the cmn_pll0_en. */
@@ -984,28 +1468,71 @@ static int cdns_torrent_dp_configure(struct phy *phy,
return ret;
}
-static int cdns_torrent_dp_init(struct phy *phy)
+static int cdns_torrent_phy_on(struct phy *phy)
{
- unsigned char lane_bits;
- int ret;
struct cdns_torrent_inst *inst = phy_get_drvdata(phy);
struct cdns_torrent_phy *cdns_phy = dev_get_drvdata(phy->dev.parent);
- struct regmap *regmap = cdns_phy->regmap_dptx_phy_reg;
+ u32 read_val;
+ int ret;
- switch (cdns_phy->ref_clk_rate) {
- case REF_CLK_19_2MHz:
- case REF_CLK_25MHz:
- /* Valid Ref Clock Rate */
- break;
- default:
- dev_err(cdns_phy->dev, "Unsupported Ref Clock Rate\n");
- return -EINVAL;
+ if (cdns_phy->nsubnodes == 1) {
+ /* Take the PHY lane group out of reset */
+ reset_control_deassert(inst->lnk_rst);
+
+ /* Take the PHY out of reset */
+ ret = reset_control_deassert(cdns_phy->phy_rst);
+ if (ret)
+ return ret;
}
- cdns_torrent_dp_write(regmap, PHY_AUX_CTRL, 0x0003); /* enable AUX */
+ /*
+ * Wait for cmn_ready assertion
+ * PHY_PMA_CMN_CTRL1[0] == 1
+ */
+ ret = regmap_field_read_poll_timeout(cdns_phy->phy_pma_cmn_ctrl_1,
+ read_val, read_val, 1000,
+ PLL_LOCK_TIMEOUT);
+ if (ret) {
+ dev_err(cdns_phy->dev, "Timeout waiting for CMN ready\n");
+ return ret;
+ }
+
+ if (inst->phy_type == TYPE_PCIE || inst->phy_type == TYPE_USB) {
+ ret = regmap_field_read_poll_timeout(cdns_phy->phy_pcs_iso_link_ctrl_1[inst->mlane],
+ read_val, !read_val, 1000,
+ PLL_LOCK_TIMEOUT);
+ if (ret == -ETIMEDOUT) {
+ dev_err(cdns_phy->dev, "Timeout waiting for PHY status ready\n");
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int cdns_torrent_phy_off(struct phy *phy)
+{
+ struct cdns_torrent_inst *inst = phy_get_drvdata(phy);
+ struct cdns_torrent_phy *cdns_phy = dev_get_drvdata(phy->dev.parent);
+ int ret;
- /* PHY PMA registers configuration function */
- cdns_torrent_dp_pma_cfg(cdns_phy, inst);
+ if (cdns_phy->nsubnodes != 1)
+ return 0;
+
+ ret = reset_control_assert(cdns_phy->phy_rst);
+ if (ret)
+ return ret;
+
+ return reset_control_assert(inst->lnk_rst);
+}
+
+static void cdns_torrent_dp_common_init(struct cdns_torrent_phy *cdns_phy,
+ struct cdns_torrent_inst *inst)
+{
+ struct regmap *regmap = cdns_phy->regmap_dptx_phy_reg;
+ unsigned char lane_bits;
+
+ cdns_torrent_dp_write(regmap, PHY_AUX_CTRL, 0x0003); /* enable AUX */
/*
* Set lines power state to A0
@@ -1024,21 +1551,35 @@ static int cdns_torrent_dp_init(struct phy *phy)
/* release pma_xcvr_pllclk_en_ln_*, only for the master lane */
cdns_torrent_dp_write(regmap, PHY_PMA_XCVR_PLLCLK_EN, 0x0001);
- /* PHY PMA registers configuration functions */
- /* Initialize PHY with max supported link rate, without SSC. */
- if (cdns_phy->ref_clk_rate == REF_CLK_19_2MHz)
+ /*
+ * PHY PMA registers configuration functions
+ * Initialize PHY with max supported link rate, without SSC.
+ */
+ if (cdns_phy->ref_clk_rate == CLK_19_2_MHZ)
cdns_torrent_dp_pma_cmn_vco_cfg_19_2mhz(cdns_phy,
cdns_phy->max_bit_rate,
false);
- else if (cdns_phy->ref_clk_rate == REF_CLK_25MHz)
+ else if (cdns_phy->ref_clk_rate == CLK_25_MHZ)
cdns_torrent_dp_pma_cmn_vco_cfg_25mhz(cdns_phy,
cdns_phy->max_bit_rate,
false);
+ else if (cdns_phy->ref_clk_rate == CLK_100_MHZ)
+ cdns_torrent_dp_pma_cmn_vco_cfg_100mhz(cdns_phy,
+ cdns_phy->max_bit_rate,
+ false);
+
cdns_torrent_dp_pma_cmn_rate(cdns_phy, cdns_phy->max_bit_rate,
inst->num_lanes);
/* take out of reset */
regmap_field_write(cdns_phy->phy_reset_ctrl, 0x1);
+}
+
+static int cdns_torrent_dp_start(struct cdns_torrent_phy *cdns_phy,
+ struct cdns_torrent_inst *inst,
+ struct phy *phy)
+{
+ int ret;
cdns_torrent_phy_on(phy);
@@ -1051,615 +1592,25 @@ static int cdns_torrent_dp_init(struct phy *phy)
return ret;
}
-static
-int cdns_torrent_dp_wait_pma_cmn_ready(struct cdns_torrent_phy *cdns_phy)
-{
- unsigned int reg;
- int ret;
- struct regmap *regmap = cdns_phy->regmap_dptx_phy_reg;
-
- ret = regmap_read_poll_timeout(regmap, PHY_PMA_CMN_READY, reg,
- reg & 1, 0, POLL_TIMEOUT_US);
- if (ret == -ETIMEDOUT) {
- dev_err(cdns_phy->dev,
- "timeout waiting for PMA common ready\n");
- return -ETIMEDOUT;
- }
-
- return 0;
-}
-
-static void cdns_torrent_dp_pma_cfg(struct cdns_torrent_phy *cdns_phy,
- struct cdns_torrent_inst *inst)
-{
- unsigned int i;
-
- if (cdns_phy->ref_clk_rate == REF_CLK_19_2MHz)
- /* PMA common configuration 19.2MHz */
- cdns_torrent_dp_pma_cmn_cfg_19_2mhz(cdns_phy);
- else if (cdns_phy->ref_clk_rate == REF_CLK_25MHz)
- /* PMA common configuration 25MHz */
- cdns_torrent_dp_pma_cmn_cfg_25mhz(cdns_phy);
-
- /* PMA lane configuration to deal with multi-link operation */
- for (i = 0; i < inst->num_lanes; i++)
- cdns_torrent_dp_pma_lane_cfg(cdns_phy, i);
-}
-
-static
-void cdns_torrent_dp_pma_cmn_cfg_19_2mhz(struct cdns_torrent_phy *cdns_phy)
-{
- struct regmap *regmap = cdns_phy->regmap_common_cdb;
-
- /* refclock registers - assumes 19.2 MHz refclock */
- cdns_torrent_phy_write(regmap, CMN_SSM_BIAS_TMR, 0x0014);
- cdns_torrent_phy_write(regmap, CMN_PLLSM0_PLLPRE_TMR, 0x0027);
- cdns_torrent_phy_write(regmap, CMN_PLLSM0_PLLLOCK_TMR, 0x00A1);
- cdns_torrent_phy_write(regmap, CMN_PLLSM1_PLLPRE_TMR, 0x0027);
- cdns_torrent_phy_write(regmap, CMN_PLLSM1_PLLLOCK_TMR, 0x00A1);
- cdns_torrent_phy_write(regmap, CMN_BGCAL_INIT_TMR, 0x0060);
- cdns_torrent_phy_write(regmap, CMN_BGCAL_ITER_TMR, 0x0060);
- cdns_torrent_phy_write(regmap, CMN_IBCAL_INIT_TMR, 0x0014);
- cdns_torrent_phy_write(regmap, CMN_TXPUCAL_INIT_TMR, 0x0018);
- cdns_torrent_phy_write(regmap, CMN_TXPUCAL_ITER_TMR, 0x0005);
- cdns_torrent_phy_write(regmap, CMN_TXPDCAL_INIT_TMR, 0x0018);
- cdns_torrent_phy_write(regmap, CMN_TXPDCAL_ITER_TMR, 0x0005);
- cdns_torrent_phy_write(regmap, CMN_RXCAL_INIT_TMR, 0x0240);
- cdns_torrent_phy_write(regmap, CMN_RXCAL_ITER_TMR, 0x0005);
- cdns_torrent_phy_write(regmap, CMN_SD_CAL_INIT_TMR, 0x0002);
- cdns_torrent_phy_write(regmap, CMN_SD_CAL_ITER_TMR, 0x0002);
- cdns_torrent_phy_write(regmap, CMN_SD_CAL_REFTIM_START, 0x000B);
- cdns_torrent_phy_write(regmap, CMN_SD_CAL_PLLCNT_START, 0x0137);
-
- /* PLL registers */
- cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CP_PADJ_M0, 0x0509);
- cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CP_IADJ_M0, 0x0F00);
- cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_FILT_PADJ_M0, 0x0F08);
- cdns_torrent_phy_write(regmap, CMN_PLL0_DSM_DIAG_M0, 0x0004);
- cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CP_PADJ_M0, 0x0509);
- cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CP_IADJ_M0, 0x0F00);
- cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_FILT_PADJ_M0, 0x0F08);
- cdns_torrent_phy_write(regmap, CMN_PLL1_DSM_DIAG_M0, 0x0004);
- cdns_torrent_phy_write(regmap, CMN_PLL0_VCOCAL_INIT_TMR, 0x00C0);
- cdns_torrent_phy_write(regmap, CMN_PLL0_VCOCAL_ITER_TMR, 0x0004);
- cdns_torrent_phy_write(regmap, CMN_PLL1_VCOCAL_INIT_TMR, 0x00C0);
- cdns_torrent_phy_write(regmap, CMN_PLL1_VCOCAL_ITER_TMR, 0x0004);
- cdns_torrent_phy_write(regmap, CMN_PLL0_VCOCAL_REFTIM_START, 0x0260);
- cdns_torrent_phy_write(regmap, CMN_PLL0_VCOCAL_TCTRL, 0x0003);
- cdns_torrent_phy_write(regmap, CMN_PLL1_VCOCAL_REFTIM_START, 0x0260);
- cdns_torrent_phy_write(regmap, CMN_PLL1_VCOCAL_TCTRL, 0x0003);
-}
-
-/*
- * Set registers responsible for enabling and configuring SSC, with second and
- * third register values provided by parameters.
- */
-static
-void cdns_torrent_dp_enable_ssc_19_2mhz(struct cdns_torrent_phy *cdns_phy,
- u32 ctrl2_val, u32 ctrl3_val)
-{
- struct regmap *regmap = cdns_phy->regmap_common_cdb;
-
- cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL1_M0, 0x0001);
- cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL1_M0, ctrl2_val);
- cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL1_M0, ctrl3_val);
- cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL4_M0, 0x0003);
- cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL1_M0, 0x0001);
- cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL1_M0, ctrl2_val);
- cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL1_M0, ctrl3_val);
- cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL4_M0, 0x0003);
-}
-
-static
-void cdns_torrent_dp_pma_cmn_vco_cfg_19_2mhz(struct cdns_torrent_phy *cdns_phy,
- u32 rate, bool ssc)
-{
- struct regmap *regmap = cdns_phy->regmap_common_cdb;
-
- /* Assumes 19.2 MHz refclock */
- switch (rate) {
- /* Setting VCO for 10.8GHz */
- case 2700:
- case 5400:
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_INTDIV_M0, 0x0119);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_FRACDIVL_M0, 0x4000);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_FRACDIVH_M0, 0x0002);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_HIGH_THR_M0, 0x00BC);
- cdns_torrent_phy_write(regmap,
- CMN_PDIAG_PLL0_CTRL_M0, 0x0012);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_INTDIV_M0, 0x0119);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_FRACDIVL_M0, 0x4000);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_FRACDIVH_M0, 0x0002);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_HIGH_THR_M0, 0x00BC);
- cdns_torrent_phy_write(regmap,
- CMN_PDIAG_PLL1_CTRL_M0, 0x0012);
- if (ssc)
- cdns_torrent_dp_enable_ssc_19_2mhz(cdns_phy, 0x033A,
- 0x006A);
- break;
- /* Setting VCO for 9.72GHz */
- case 1620:
- case 2430:
- case 3240:
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_INTDIV_M0, 0x01FA);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_FRACDIVL_M0, 0x4000);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_FRACDIVH_M0, 0x0002);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_HIGH_THR_M0, 0x0152);
- cdns_torrent_phy_write(regmap,
- CMN_PDIAG_PLL0_CTRL_M0, 0x0002);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_INTDIV_M0, 0x01FA);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_FRACDIVL_M0, 0x4000);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_FRACDIVH_M0, 0x0002);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_HIGH_THR_M0, 0x0152);
- cdns_torrent_phy_write(regmap,
- CMN_PDIAG_PLL1_CTRL_M0, 0x0002);
- if (ssc)
- cdns_torrent_dp_enable_ssc_19_2mhz(cdns_phy, 0x05DD,
- 0x0069);
- break;
- /* Setting VCO for 8.64GHz */
- case 2160:
- case 4320:
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_INTDIV_M0, 0x01C2);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_FRACDIVL_M0, 0x0000);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_FRACDIVH_M0, 0x0002);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_HIGH_THR_M0, 0x012C);
- cdns_torrent_phy_write(regmap,
- CMN_PDIAG_PLL0_CTRL_M0, 0x0002);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_INTDIV_M0, 0x01C2);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_FRACDIVL_M0, 0x0000);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_FRACDIVH_M0, 0x0002);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_HIGH_THR_M0, 0x012C);
- cdns_torrent_phy_write(regmap,
- CMN_PDIAG_PLL1_CTRL_M0, 0x0002);
- if (ssc)
- cdns_torrent_dp_enable_ssc_19_2mhz(cdns_phy, 0x0536,
- 0x0069);
- break;
- /* Setting VCO for 8.1GHz */
- case 8100:
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_INTDIV_M0, 0x01A5);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_FRACDIVL_M0, 0xE000);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_FRACDIVH_M0, 0x0002);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_HIGH_THR_M0, 0x011A);
- cdns_torrent_phy_write(regmap,
- CMN_PDIAG_PLL0_CTRL_M0, 0x0002);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_INTDIV_M0, 0x01A5);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_FRACDIVL_M0, 0xE000);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_FRACDIVH_M0, 0x0002);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_HIGH_THR_M0, 0x011A);
- cdns_torrent_phy_write(regmap,
- CMN_PDIAG_PLL1_CTRL_M0, 0x0002);
- if (ssc)
- cdns_torrent_dp_enable_ssc_19_2mhz(cdns_phy, 0x04D7,
- 0x006A);
- break;
- }
-
- if (ssc) {
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_VCOCAL_PLLCNT_START, 0x025E);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_LOCK_PLLCNT_THR, 0x0005);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_VCOCAL_PLLCNT_START, 0x025E);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_LOCK_PLLCNT_THR, 0x0005);
- } else {
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_VCOCAL_PLLCNT_START, 0x0260);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_VCOCAL_PLLCNT_START, 0x0260);
- /* Set reset register values to disable SSC */
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_SS_CTRL1_M0, 0x0002);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_SS_CTRL2_M0, 0x0000);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_SS_CTRL3_M0, 0x0000);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_SS_CTRL4_M0, 0x0000);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_LOCK_PLLCNT_THR, 0x0003);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_SS_CTRL1_M0, 0x0002);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_SS_CTRL2_M0, 0x0000);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_SS_CTRL3_M0, 0x0000);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_SS_CTRL4_M0, 0x0000);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_LOCK_PLLCNT_THR, 0x0003);
- }
-
- cdns_torrent_phy_write(regmap, CMN_PLL0_LOCK_REFCNT_START, 0x0099);
- cdns_torrent_phy_write(regmap, CMN_PLL0_LOCK_PLLCNT_START, 0x0099);
- cdns_torrent_phy_write(regmap, CMN_PLL1_LOCK_REFCNT_START, 0x0099);
- cdns_torrent_phy_write(regmap, CMN_PLL1_LOCK_PLLCNT_START, 0x0099);
-}
-
-static
-void cdns_torrent_dp_pma_cmn_cfg_25mhz(struct cdns_torrent_phy *cdns_phy)
-{
- struct regmap *regmap = cdns_phy->regmap_common_cdb;
-
- /* refclock registers - assumes 25 MHz refclock */
- cdns_torrent_phy_write(regmap, CMN_SSM_BIAS_TMR, 0x0019);
- cdns_torrent_phy_write(regmap, CMN_PLLSM0_PLLPRE_TMR, 0x0032);
- cdns_torrent_phy_write(regmap, CMN_PLLSM0_PLLLOCK_TMR, 0x00D1);
- cdns_torrent_phy_write(regmap, CMN_PLLSM1_PLLPRE_TMR, 0x0032);
- cdns_torrent_phy_write(regmap, CMN_PLLSM1_PLLLOCK_TMR, 0x00D1);
- cdns_torrent_phy_write(regmap, CMN_BGCAL_INIT_TMR, 0x007D);
- cdns_torrent_phy_write(regmap, CMN_BGCAL_ITER_TMR, 0x007D);
- cdns_torrent_phy_write(regmap, CMN_IBCAL_INIT_TMR, 0x0019);
- cdns_torrent_phy_write(regmap, CMN_TXPUCAL_INIT_TMR, 0x001E);
- cdns_torrent_phy_write(regmap, CMN_TXPUCAL_ITER_TMR, 0x0006);
- cdns_torrent_phy_write(regmap, CMN_TXPDCAL_INIT_TMR, 0x001E);
- cdns_torrent_phy_write(regmap, CMN_TXPDCAL_ITER_TMR, 0x0006);
- cdns_torrent_phy_write(regmap, CMN_RXCAL_INIT_TMR, 0x02EE);
- cdns_torrent_phy_write(regmap, CMN_RXCAL_ITER_TMR, 0x0006);
- cdns_torrent_phy_write(regmap, CMN_SD_CAL_INIT_TMR, 0x0002);
- cdns_torrent_phy_write(regmap, CMN_SD_CAL_ITER_TMR, 0x0002);
- cdns_torrent_phy_write(regmap, CMN_SD_CAL_REFTIM_START, 0x000E);
- cdns_torrent_phy_write(regmap, CMN_SD_CAL_PLLCNT_START, 0x012B);
-
- /* PLL registers */
- cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CP_PADJ_M0, 0x0509);
- cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CP_IADJ_M0, 0x0F00);
- cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_FILT_PADJ_M0, 0x0F08);
- cdns_torrent_phy_write(regmap, CMN_PLL0_DSM_DIAG_M0, 0x0004);
- cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CP_PADJ_M0, 0x0509);
- cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CP_IADJ_M0, 0x0F00);
- cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_FILT_PADJ_M0, 0x0F08);
- cdns_torrent_phy_write(regmap, CMN_PLL1_DSM_DIAG_M0, 0x0004);
- cdns_torrent_phy_write(regmap, CMN_PLL0_VCOCAL_INIT_TMR, 0x00FA);
- cdns_torrent_phy_write(regmap, CMN_PLL0_VCOCAL_ITER_TMR, 0x0004);
- cdns_torrent_phy_write(regmap, CMN_PLL1_VCOCAL_INIT_TMR, 0x00FA);
- cdns_torrent_phy_write(regmap, CMN_PLL1_VCOCAL_ITER_TMR, 0x0004);
- cdns_torrent_phy_write(regmap, CMN_PLL0_VCOCAL_REFTIM_START, 0x0317);
- cdns_torrent_phy_write(regmap, CMN_PLL0_VCOCAL_TCTRL, 0x0003);
- cdns_torrent_phy_write(regmap, CMN_PLL1_VCOCAL_REFTIM_START, 0x0317);
- cdns_torrent_phy_write(regmap, CMN_PLL1_VCOCAL_TCTRL, 0x0003);
-}
-
-/*
- * Set registers responsible for enabling and configuring SSC, with second
- * register value provided by a parameter.
- */
-static void cdns_torrent_dp_enable_ssc_25mhz(struct cdns_torrent_phy *cdns_phy,
- u32 ctrl2_val)
-{
- struct regmap *regmap = cdns_phy->regmap_common_cdb;
-
- cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL1_M0, 0x0001);
- cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL1_M0, ctrl2_val);
- cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL1_M0, 0x007F);
- cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL4_M0, 0x0003);
- cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL1_M0, 0x0001);
- cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL1_M0, ctrl2_val);
- cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL1_M0, 0x007F);
- cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL4_M0, 0x0003);
-}
-
-static
-void cdns_torrent_dp_pma_cmn_vco_cfg_25mhz(struct cdns_torrent_phy *cdns_phy,
- u32 rate, bool ssc)
-{
- struct regmap *regmap = cdns_phy->regmap_common_cdb;
-
- /* Assumes 25 MHz refclock */
- switch (rate) {
- /* Setting VCO for 10.8GHz */
- case 2700:
- case 5400:
- cdns_torrent_phy_write(regmap, CMN_PLL0_INTDIV_M0, 0x01B0);
- cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVL_M0, 0x0000);
- cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVH_M0, 0x0002);
- cdns_torrent_phy_write(regmap, CMN_PLL0_HIGH_THR_M0, 0x0120);
- cdns_torrent_phy_write(regmap, CMN_PLL1_INTDIV_M0, 0x01B0);
- cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVL_M0, 0x0000);
- cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVH_M0, 0x0002);
- cdns_torrent_phy_write(regmap, CMN_PLL1_HIGH_THR_M0, 0x0120);
- if (ssc)
- cdns_torrent_dp_enable_ssc_25mhz(cdns_phy, 0x0423);
- break;
- /* Setting VCO for 9.72GHz */
- case 1620:
- case 2430:
- case 3240:
- cdns_torrent_phy_write(regmap, CMN_PLL0_INTDIV_M0, 0x0184);
- cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVL_M0, 0xCCCD);
- cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVH_M0, 0x0002);
- cdns_torrent_phy_write(regmap, CMN_PLL0_HIGH_THR_M0, 0x0104);
- cdns_torrent_phy_write(regmap, CMN_PLL1_INTDIV_M0, 0x0184);
- cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVL_M0, 0xCCCD);
- cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVH_M0, 0x0002);
- cdns_torrent_phy_write(regmap, CMN_PLL1_HIGH_THR_M0, 0x0104);
- if (ssc)
- cdns_torrent_dp_enable_ssc_25mhz(cdns_phy, 0x03B9);
- break;
- /* Setting VCO for 8.64GHz */
- case 2160:
- case 4320:
- cdns_torrent_phy_write(regmap, CMN_PLL0_INTDIV_M0, 0x0159);
- cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVL_M0, 0x999A);
- cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVH_M0, 0x0002);
- cdns_torrent_phy_write(regmap, CMN_PLL0_HIGH_THR_M0, 0x00E7);
- cdns_torrent_phy_write(regmap, CMN_PLL1_INTDIV_M0, 0x0159);
- cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVL_M0, 0x999A);
- cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVH_M0, 0x0002);
- cdns_torrent_phy_write(regmap, CMN_PLL1_HIGH_THR_M0, 0x00E7);
- if (ssc)
- cdns_torrent_dp_enable_ssc_25mhz(cdns_phy, 0x034F);
- break;
- /* Setting VCO for 8.1GHz */
- case 8100:
- cdns_torrent_phy_write(regmap, CMN_PLL0_INTDIV_M0, 0x0144);
- cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVL_M0, 0x0000);
- cdns_torrent_phy_write(regmap, CMN_PLL0_FRACDIVH_M0, 0x0002);
- cdns_torrent_phy_write(regmap, CMN_PLL0_HIGH_THR_M0, 0x00D8);
- cdns_torrent_phy_write(regmap, CMN_PLL1_INTDIV_M0, 0x0144);
- cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVL_M0, 0x0000);
- cdns_torrent_phy_write(regmap, CMN_PLL1_FRACDIVH_M0, 0x0002);
- cdns_torrent_phy_write(regmap, CMN_PLL1_HIGH_THR_M0, 0x00D8);
- if (ssc)
- cdns_torrent_dp_enable_ssc_25mhz(cdns_phy, 0x031A);
- break;
- }
-
- cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL0_CTRL_M0, 0x0002);
- cdns_torrent_phy_write(regmap, CMN_PDIAG_PLL1_CTRL_M0, 0x0002);
-
- if (ssc) {
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_VCOCAL_PLLCNT_START, 0x0315);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_LOCK_PLLCNT_THR, 0x0005);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_VCOCAL_PLLCNT_START, 0x0315);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_LOCK_PLLCNT_THR, 0x0005);
- } else {
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_VCOCAL_PLLCNT_START, 0x0317);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_VCOCAL_PLLCNT_START, 0x0317);
- /* Set reset register values to disable SSC */
- cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL1_M0, 0x0002);
- cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL2_M0, 0x0000);
- cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL3_M0, 0x0000);
- cdns_torrent_phy_write(regmap, CMN_PLL0_SS_CTRL4_M0, 0x0000);
- cdns_torrent_phy_write(regmap,
- CMN_PLL0_LOCK_PLLCNT_THR, 0x0003);
- cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL1_M0, 0x0002);
- cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL2_M0, 0x0000);
- cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL3_M0, 0x0000);
- cdns_torrent_phy_write(regmap, CMN_PLL1_SS_CTRL4_M0, 0x0000);
- cdns_torrent_phy_write(regmap,
- CMN_PLL1_LOCK_PLLCNT_THR, 0x0003);
- }
-
- cdns_torrent_phy_write(regmap, CMN_PLL0_LOCK_REFCNT_START, 0x00C7);
- cdns_torrent_phy_write(regmap, CMN_PLL0_LOCK_PLLCNT_START, 0x00C7);
- cdns_torrent_phy_write(regmap, CMN_PLL1_LOCK_REFCNT_START, 0x00C7);
- cdns_torrent_phy_write(regmap, CMN_PLL1_LOCK_PLLCNT_START, 0x00C7);
-}
-
-static void cdns_torrent_dp_pma_cmn_rate(struct cdns_torrent_phy *cdns_phy,
- u32 rate, u32 num_lanes)
-{
- unsigned int clk_sel_val = 0;
- unsigned int hsclk_div_val = 0;
- unsigned int i;
-
- /* 16'h0000 for single DP link configuration */
- regmap_field_write(cdns_phy->phy_pll_cfg, 0x0);
-
- switch (rate) {
- case 1620:
- clk_sel_val = 0x0f01;
- hsclk_div_val = 2;
- break;
- case 2160:
- case 2430:
- case 2700:
- clk_sel_val = 0x0701;
- hsclk_div_val = 1;
- break;
- case 3240:
- clk_sel_val = 0x0b00;
- hsclk_div_val = 2;
- break;
- case 4320:
- case 5400:
- clk_sel_val = 0x0301;
- hsclk_div_val = 0;
- break;
- case 8100:
- clk_sel_val = 0x0200;
- hsclk_div_val = 0;
- break;
- }
-
- cdns_torrent_phy_write(cdns_phy->regmap_common_cdb,
- CMN_PDIAG_PLL0_CLK_SEL_M0, clk_sel_val);
- cdns_torrent_phy_write(cdns_phy->regmap_common_cdb,
- CMN_PDIAG_PLL1_CLK_SEL_M0, clk_sel_val);
-
- /* PMA lane configuration to deal with multi-link operation */
- for (i = 0; i < num_lanes; i++)
- cdns_torrent_phy_write(cdns_phy->regmap_tx_lane_cdb[i],
- XCVR_DIAG_HSCLK_DIV, hsclk_div_val);
-}
-
-static void cdns_torrent_dp_pma_lane_cfg(struct cdns_torrent_phy *cdns_phy,
- unsigned int lane)
-{
- /* Per lane, refclock-dependent receiver detection setting */
- if (cdns_phy->ref_clk_rate == REF_CLK_19_2MHz)
- cdns_torrent_phy_write(cdns_phy->regmap_tx_lane_cdb[lane],
- TX_RCVDET_ST_TMR, 0x0780);
- else if (cdns_phy->ref_clk_rate == REF_CLK_25MHz)
- cdns_torrent_phy_write(cdns_phy->regmap_tx_lane_cdb[lane],
- TX_RCVDET_ST_TMR, 0x09C4);
-
- /* Writing Tx/Rx Power State Controllers registers */
- cdns_torrent_phy_write(cdns_phy->regmap_tx_lane_cdb[lane],
- TX_PSC_A0, 0x00FB);
- cdns_torrent_phy_write(cdns_phy->regmap_tx_lane_cdb[lane],
- TX_PSC_A2, 0x04AA);
- cdns_torrent_phy_write(cdns_phy->regmap_tx_lane_cdb[lane],
- TX_PSC_A3, 0x04AA);
- cdns_torrent_phy_write(cdns_phy->regmap_rx_lane_cdb[lane],
- RX_PSC_A0, 0x0000);
- cdns_torrent_phy_write(cdns_phy->regmap_rx_lane_cdb[lane],
- RX_PSC_A2, 0x0000);
- cdns_torrent_phy_write(cdns_phy->regmap_rx_lane_cdb[lane],
- RX_PSC_A3, 0x0000);
-
- cdns_torrent_phy_write(cdns_phy->regmap_rx_lane_cdb[lane],
- RX_PSC_CAL, 0x0000);
-
- cdns_torrent_phy_write(cdns_phy->regmap_rx_lane_cdb[lane],
- RX_REE_GCSM1_CTRL, 0x0000);
- cdns_torrent_phy_write(cdns_phy->regmap_rx_lane_cdb[lane],
- RX_REE_GCSM2_CTRL, 0x0000);
- cdns_torrent_phy_write(cdns_phy->regmap_rx_lane_cdb[lane],
- RX_REE_PERGCSM_CTRL, 0x0000);
-
- cdns_torrent_phy_write(cdns_phy->regmap_tx_lane_cdb[lane],
- XCVR_DIAG_BIDI_CTRL, 0x000F);
- cdns_torrent_phy_write(cdns_phy->regmap_tx_lane_cdb[lane],
- XCVR_DIAG_PLLDRC_CTRL, 0x0001);
- cdns_torrent_phy_write(cdns_phy->regmap_tx_lane_cdb[lane],
- XCVR_DIAG_HSCLK_SEL, 0x0000);
-}
-
-static int cdns_torrent_dp_set_power_state(struct cdns_torrent_phy *cdns_phy,
- u32 num_lanes,
- enum phy_powerstate powerstate)
+static int cdns_torrent_dp_init(struct phy *phy)
{
- /* Register value for power state for a single byte. */
- u32 value_part;
- u32 value;
- u32 mask;
- u32 read_val;
- u32 ret;
- struct regmap *regmap = cdns_phy->regmap_dptx_phy_reg;
-
- switch (powerstate) {
- case (POWERSTATE_A0):
- value_part = 0x01U;
- break;
- case (POWERSTATE_A2):
- value_part = 0x04U;
- break;
- default:
- /* Powerstate A3 */
- value_part = 0x08U;
- break;
- }
+ struct cdns_torrent_inst *inst = phy_get_drvdata(phy);
+ struct cdns_torrent_phy *cdns_phy = dev_get_drvdata(phy->dev.parent);
- /* Select values of registers and mask, depending on enabled
- * lane count.
- */
- switch (num_lanes) {
- /* lane 0 */
- case (1):
- value = value_part;
- mask = 0x0000003FU;
- break;
- /* lanes 0-1 */
- case (2):
- value = (value_part
- | (value_part << 8));
- mask = 0x00003F3FU;
+ switch (cdns_phy->ref_clk_rate) {
+ case CLK_19_2_MHZ:
+ case CLK_25_MHZ:
+ case CLK_100_MHZ:
+ /* Valid Ref Clock Rate */
break;
- /* lanes 0-3, all */
default:
- value = (value_part
- | (value_part << 8)
- | (value_part << 16)
- | (value_part << 24));
- mask = 0x3F3F3F3FU;
- break;
- }
-
- /* Set power state A<n>. */
- cdns_torrent_dp_write(regmap, PHY_PMA_XCVR_POWER_STATE_REQ, value);
- /* Wait, until PHY acknowledges power state completion. */
- ret = regmap_read_poll_timeout(regmap, PHY_PMA_XCVR_POWER_STATE_ACK,
- read_val, (read_val & mask) == value, 0,
- POLL_TIMEOUT_US);
- cdns_torrent_dp_write(regmap, PHY_PMA_XCVR_POWER_STATE_REQ, 0x00000000);
- ndelay(100);
-
- return ret;
-}
-
-static int cdns_torrent_dp_run(struct cdns_torrent_phy *cdns_phy, u32 num_lanes)
-{
- unsigned int read_val;
- int ret;
- struct regmap *regmap = cdns_phy->regmap_dptx_phy_reg;
-
- /*
- * waiting for ACK of pma_xcvr_pllclk_en_ln_*, only for the
- * master lane
- */
- ret = regmap_read_poll_timeout(regmap, PHY_PMA_XCVR_PLLCLK_EN_ACK,
- read_val, read_val & 1,
- 0, POLL_TIMEOUT_US);
- if (ret == -ETIMEDOUT) {
- dev_err(cdns_phy->dev,
- "timeout waiting for link PLL clock enable ack\n");
- return ret;
+ dev_err(cdns_phy->dev, "Unsupported Ref Clock Rate\n");
+ return -EINVAL;
}
- ndelay(100);
-
- ret = cdns_torrent_dp_set_power_state(cdns_phy, num_lanes,
- POWERSTATE_A2);
- if (ret)
- return ret;
-
- ret = cdns_torrent_dp_set_power_state(cdns_phy, num_lanes,
- POWERSTATE_A0);
+ cdns_torrent_dp_common_init(cdns_phy, inst);
- return ret;
+ return cdns_torrent_dp_start(cdns_phy, inst, phy);
}
static int cdns_torrent_derived_refclk_enable(struct clk_hw *hw)
@@ -1764,56 +1715,6 @@ static int cdns_torrent_derived_refclk_register(struct cdns_torrent_phy *cdns_ph
return 0;
}
-static int cdns_torrent_phy_on(struct phy *phy)
-{
- struct cdns_torrent_inst *inst = phy_get_drvdata(phy);
- struct cdns_torrent_phy *cdns_phy = dev_get_drvdata(phy->dev.parent);
- u32 read_val;
- int ret;
-
- if (cdns_phy->nsubnodes == 1) {
- /* Take the PHY lane group out of reset */
- reset_control_deassert(inst->lnk_rst);
-
- /* Take the PHY out of reset */
- ret = reset_control_deassert(cdns_phy->phy_rst);
- if (ret)
- return ret;
- }
-
- /*
- * Wait for cmn_ready assertion
- * PHY_PMA_CMN_CTRL1[0] == 1
- */
- ret = regmap_field_read_poll_timeout(cdns_phy->phy_pma_cmn_ctrl_1,
- read_val, read_val, 1000,
- PLL_LOCK_TIMEOUT);
- if (ret) {
- dev_err(cdns_phy->dev, "Timeout waiting for CMN ready\n");
- return ret;
- }
-
- mdelay(10);
-
- return 0;
-}
-
-static int cdns_torrent_phy_off(struct phy *phy)
-{
- struct cdns_torrent_inst *inst = phy_get_drvdata(phy);
- struct cdns_torrent_phy *cdns_phy = dev_get_drvdata(phy->dev.parent);
- int ret;
-
- if (cdns_phy->nsubnodes != 1)
- return 0;
-
- ret = reset_control_assert(cdns_phy->phy_rst);
- if (ret)
- return ret;
-
- return reset_control_assert(inst->lnk_rst);
-}
-
static struct regmap *cdns_regmap_init(struct device *dev, void __iomem *base,
u32 block_offset,
u8 reg_offset_shift,
@@ -1854,6 +1755,7 @@ static int cdns_torrent_regfield_init(struct cdns_torrent_phy *cdns_phy)
struct device *dev = cdns_phy->dev;
struct regmap_field *field;
struct regmap *regmap;
+ int i;
regmap = cdns_phy->regmap_phy_pcs_common_cdb;
field = devm_regmap_field_alloc(dev, regmap, phy_pll_cfg);
@@ -1887,6 +1789,16 @@ static int cdns_torrent_regfield_init(struct cdns_torrent_phy *cdns_phy)
}
cdns_phy->phy_pma_pll_raw_ctrl = field;
+ for (i = 0; i < MAX_NUM_LANES; i++) {
+ regmap = cdns_phy->regmap_phy_pcs_lane_cdb[i];
+ field = devm_regmap_field_alloc(dev, regmap, phy_pcs_iso_link_ctrl_1);
+ if (IS_ERR(field)) {
+ dev_err(dev, "PHY_PCS_ISO_LINK_CTRL reg field init for ln %d failed\n", i);
+ return PTR_ERR(field);
+ }
+ cdns_phy->phy_pcs_iso_link_ctrl_1[i] = field;
+ }
+
return 0;
}
@@ -1947,6 +1859,17 @@ static int cdns_torrent_regmap_init(struct cdns_torrent_phy *cdns_phy)
return PTR_ERR(regmap);
}
cdns_phy->regmap_rx_lane_cdb[i] = regmap;
+
+ block_offset = TORRENT_PHY_PCS_LANE_CDB_OFFSET(i, block_offset_shift,
+ reg_offset_shift);
+ regmap = cdns_regmap_init(dev, sd_base, block_offset,
+ reg_offset_shift,
+ &cdns_torrent_phy_pcs_lane_cdb_config[i]);
+ if (IS_ERR(regmap)) {
+ dev_err(dev, "Failed to init PHY PCS lane CDB regmap\n");
+ return PTR_ERR(regmap);
+ }
+ cdns_phy->regmap_phy_pcs_lane_cdb[i] = regmap;
}
block_offset = TORRENT_COMMON_CDB_OFFSET;
@@ -1987,6 +1910,7 @@ static int cdns_torrent_phy_init(struct phy *phy)
struct cdns_torrent_phy *cdns_phy = dev_get_drvdata(phy->dev.parent);
const struct cdns_torrent_data *init_data = cdns_phy->init_data;
struct cdns_torrent_vals *cmn_vals, *tx_ln_vals, *rx_ln_vals;
+ enum cdns_torrent_ref_clk ref_clk = cdns_phy->ref_clk_rate;
struct cdns_torrent_vals *link_cmn_vals, *xcvr_diag_vals;
struct cdns_torrent_inst *inst = phy_get_drvdata(phy);
enum cdns_torrent_phy_type phy_type = inst->phy_type;
@@ -2000,9 +1924,6 @@ static int cdns_torrent_phy_init(struct phy *phy)
if (cdns_phy->nsubnodes > 1)
return 0;
- if (phy_type == TYPE_DP)
- return cdns_torrent_dp_init(phy);
-
/**
* Spread spectrum generation is not required or supported
* for SGMII/QSGMII
@@ -2052,7 +1973,7 @@ static int cdns_torrent_phy_init(struct phy *phy)
}
/* PMA common registers configurations */
- cmn_vals = init_data->cmn_vals[phy_type][TYPE_NONE][ssc];
+ cmn_vals = init_data->cmn_vals[ref_clk][phy_type][TYPE_NONE][ssc];
if (cmn_vals) {
reg_pairs = cmn_vals->reg_pairs;
num_regs = cmn_vals->num_regs;
@@ -2063,7 +1984,7 @@ static int cdns_torrent_phy_init(struct phy *phy)
}
/* PMA TX lane registers configurations */
- tx_ln_vals = init_data->tx_ln_vals[phy_type][TYPE_NONE][ssc];
+ tx_ln_vals = init_data->tx_ln_vals[ref_clk][phy_type][TYPE_NONE][ssc];
if (tx_ln_vals) {
reg_pairs = tx_ln_vals->reg_pairs;
num_regs = tx_ln_vals->num_regs;
@@ -2076,7 +1997,7 @@ static int cdns_torrent_phy_init(struct phy *phy)
}
/* PMA RX lane registers configurations */
- rx_ln_vals = init_data->rx_ln_vals[phy_type][TYPE_NONE][ssc];
+ rx_ln_vals = init_data->rx_ln_vals[ref_clk][phy_type][TYPE_NONE][ssc];
if (rx_ln_vals) {
reg_pairs = rx_ln_vals->reg_pairs;
num_regs = rx_ln_vals->num_regs;
@@ -2088,14 +2009,39 @@ static int cdns_torrent_phy_init(struct phy *phy)
}
}
+ if (phy_type == TYPE_DP)
+ return cdns_torrent_dp_init(phy);
+
+ return 0;
+}
+
+static const struct phy_ops cdns_torrent_phy_ops = {
+ .init = cdns_torrent_phy_init,
+ .configure = cdns_torrent_dp_configure,
+ .power_on = cdns_torrent_phy_on,
+ .power_off = cdns_torrent_phy_off,
+ .owner = THIS_MODULE,
+};
+
+static int cdns_torrent_noop_phy_on(struct phy *phy)
+{
+ /* Give 5ms to 10ms delay for the PIPE clock to be stable */
+ usleep_range(5000, 10000);
+
return 0;
}
+static const struct phy_ops noop_ops = {
+ .power_on = cdns_torrent_noop_phy_on,
+ .owner = THIS_MODULE,
+};
+
static
int cdns_torrent_phy_configure_multilink(struct cdns_torrent_phy *cdns_phy)
{
const struct cdns_torrent_data *init_data = cdns_phy->init_data;
struct cdns_torrent_vals *cmn_vals, *tx_ln_vals, *rx_ln_vals;
+ enum cdns_torrent_ref_clk ref_clk = cdns_phy->ref_clk_rate;
struct cdns_torrent_vals *link_cmn_vals, *xcvr_diag_vals;
enum cdns_torrent_phy_type phy_t1, phy_t2, tmp_phy_type;
struct cdns_torrent_vals *pcs_cmn_vals;
@@ -2184,7 +2130,7 @@ int cdns_torrent_phy_configure_multilink(struct cdns_torrent_phy *cdns_phy)
}
/* PMA common registers configurations */
- cmn_vals = init_data->cmn_vals[phy_t1][phy_t2][ssc];
+ cmn_vals = init_data->cmn_vals[ref_clk][phy_t1][phy_t2][ssc];
if (cmn_vals) {
reg_pairs = cmn_vals->reg_pairs;
num_regs = cmn_vals->num_regs;
@@ -2195,7 +2141,7 @@ int cdns_torrent_phy_configure_multilink(struct cdns_torrent_phy *cdns_phy)
}
/* PMA TX lane registers configurations */
- tx_ln_vals = init_data->tx_ln_vals[phy_t1][phy_t2][ssc];
+ tx_ln_vals = init_data->tx_ln_vals[ref_clk][phy_t1][phy_t2][ssc];
if (tx_ln_vals) {
reg_pairs = tx_ln_vals->reg_pairs;
num_regs = tx_ln_vals->num_regs;
@@ -2208,7 +2154,7 @@ int cdns_torrent_phy_configure_multilink(struct cdns_torrent_phy *cdns_phy)
}
/* PMA RX lane registers configurations */
- rx_ln_vals = init_data->rx_ln_vals[phy_t1][phy_t2][ssc];
+ rx_ln_vals = init_data->rx_ln_vals[ref_clk][phy_t1][phy_t2][ssc];
if (rx_ln_vals) {
reg_pairs = rx_ln_vals->reg_pairs;
num_regs = rx_ln_vals->num_regs;
@@ -2286,6 +2232,7 @@ static int cdns_torrent_reset(struct cdns_torrent_phy *cdns_phy)
static int cdns_torrent_clk(struct cdns_torrent_phy *cdns_phy)
{
struct device *dev = cdns_phy->dev;
+ unsigned long ref_clk_rate;
int ret;
cdns_phy->clk = devm_clk_get(dev, "refclk");
@@ -2300,13 +2247,29 @@ static int cdns_torrent_clk(struct cdns_torrent_phy *cdns_phy)
return ret;
}
- cdns_phy->ref_clk_rate = clk_get_rate(cdns_phy->clk);
- if (!(cdns_phy->ref_clk_rate)) {
+ ref_clk_rate = clk_get_rate(cdns_phy->clk);
+ if (!ref_clk_rate) {
dev_err(cdns_phy->dev, "Failed to get ref clock rate\n");
clk_disable_unprepare(cdns_phy->clk);
return -EINVAL;
}
+ switch (ref_clk_rate) {
+ case REF_CLK_19_2MHZ:
+ cdns_phy->ref_clk_rate = CLK_19_2_MHZ;
+ break;
+ case REF_CLK_25MHZ:
+ cdns_phy->ref_clk_rate = CLK_25_MHZ;
+ break;
+ case REF_CLK_100MHZ:
+ cdns_phy->ref_clk_rate = CLK_100_MHZ;
+ break;
+ default:
+ dev_err(cdns_phy->dev, "Invalid Ref Clock Rate\n");
+ clk_disable_unprepare(cdns_phy->clk);
+ return -EINVAL;
+ }
+
return 0;
}
@@ -2505,10 +2468,9 @@ static int cdns_torrent_phy_probe(struct platform_device *pdev)
init_dp_regmap++;
}
- dev_info(dev, "%d lanes, max bit rate %d.%03d Gbps\n",
- cdns_phy->phys[node].num_lanes,
- cdns_phy->max_bit_rate / 1000,
- cdns_phy->max_bit_rate % 1000);
+ dev_dbg(dev, "DP max bit rate %d.%03d Gbps\n",
+ cdns_phy->max_bit_rate / 1000,
+ cdns_phy->max_bit_rate % 1000);
gphy->attrs.bus_width = cdns_phy->phys[node].num_lanes;
gphy->attrs.max_link_rate = cdns_phy->max_bit_rate;
@@ -2540,6 +2502,17 @@ static int cdns_torrent_phy_probe(struct platform_device *pdev)
goto put_lnk_rst;
}
+ if (cdns_phy->nsubnodes > 1)
+ dev_dbg(dev, "Multi-link: %s (%d lanes) & %s (%d lanes)",
+ cdns_torrent_get_phy_type(cdns_phy->phys[0].phy_type),
+ cdns_phy->phys[0].num_lanes,
+ cdns_torrent_get_phy_type(cdns_phy->phys[1].phy_type),
+ cdns_phy->phys[1].num_lanes);
+ else
+ dev_dbg(dev, "Single link: %s (%d lanes)",
+ cdns_torrent_get_phy_type(cdns_phy->phys[0].phy_type),
+ cdns_phy->phys[0].num_lanes);
+
return 0;
put_child:
@@ -2573,6 +2546,206 @@ static int cdns_torrent_phy_remove(struct platform_device *pdev)
return 0;
}
+/* Single DisplayPort(DP) link configuration */
+static struct cdns_reg_pairs sl_dp_link_cmn_regs[] = {
+ {0x0000, PHY_PLL_CFG},
+};
+
+static struct cdns_reg_pairs sl_dp_xcvr_diag_ln_regs[] = {
+ {0x0000, XCVR_DIAG_HSCLK_SEL},
+ {0x0001, XCVR_DIAG_PLLDRC_CTRL}
+};
+
+static struct cdns_torrent_vals sl_dp_link_cmn_vals = {
+ .reg_pairs = sl_dp_link_cmn_regs,
+ .num_regs = ARRAY_SIZE(sl_dp_link_cmn_regs),
+};
+
+static struct cdns_torrent_vals sl_dp_xcvr_diag_ln_vals = {
+ .reg_pairs = sl_dp_xcvr_diag_ln_regs,
+ .num_regs = ARRAY_SIZE(sl_dp_xcvr_diag_ln_regs),
+};
+
+/* Single DP, 19.2 MHz Ref clk, no SSC */
+static struct cdns_reg_pairs sl_dp_19_2_no_ssc_cmn_regs[] = {
+ {0x0014, CMN_SSM_BIAS_TMR},
+ {0x0027, CMN_PLLSM0_PLLPRE_TMR},
+ {0x00A1, CMN_PLLSM0_PLLLOCK_TMR},
+ {0x0027, CMN_PLLSM1_PLLPRE_TMR},
+ {0x00A1, CMN_PLLSM1_PLLLOCK_TMR},
+ {0x0060, CMN_BGCAL_INIT_TMR},
+ {0x0060, CMN_BGCAL_ITER_TMR},
+ {0x0014, CMN_IBCAL_INIT_TMR},
+ {0x0018, CMN_TXPUCAL_INIT_TMR},
+ {0x0005, CMN_TXPUCAL_ITER_TMR},
+ {0x0018, CMN_TXPDCAL_INIT_TMR},
+ {0x0005, CMN_TXPDCAL_ITER_TMR},
+ {0x0240, CMN_RXCAL_INIT_TMR},
+ {0x0005, CMN_RXCAL_ITER_TMR},
+ {0x0002, CMN_SD_CAL_INIT_TMR},
+ {0x0002, CMN_SD_CAL_ITER_TMR},
+ {0x000B, CMN_SD_CAL_REFTIM_START},
+ {0x0137, CMN_SD_CAL_PLLCNT_START},
+ {0x0509, CMN_PDIAG_PLL0_CP_PADJ_M0},
+ {0x0F00, CMN_PDIAG_PLL0_CP_IADJ_M0},
+ {0x0F08, CMN_PDIAG_PLL0_FILT_PADJ_M0},
+ {0x0004, CMN_PLL0_DSM_DIAG_M0},
+ {0x0509, CMN_PDIAG_PLL1_CP_PADJ_M0},
+ {0x0F00, CMN_PDIAG_PLL1_CP_IADJ_M0},
+ {0x0F08, CMN_PDIAG_PLL1_FILT_PADJ_M0},
+ {0x0004, CMN_PLL1_DSM_DIAG_M0},
+ {0x00C0, CMN_PLL0_VCOCAL_INIT_TMR},
+ {0x0004, CMN_PLL0_VCOCAL_ITER_TMR},
+ {0x00C0, CMN_PLL1_VCOCAL_INIT_TMR},
+ {0x0004, CMN_PLL1_VCOCAL_ITER_TMR},
+ {0x0260, CMN_PLL0_VCOCAL_REFTIM_START},
+ {0x0003, CMN_PLL0_VCOCAL_TCTRL},
+ {0x0260, CMN_PLL1_VCOCAL_REFTIM_START},
+ {0x0003, CMN_PLL1_VCOCAL_TCTRL}
+};
+
+static struct cdns_reg_pairs sl_dp_19_2_no_ssc_tx_ln_regs[] = {
+ {0x0780, TX_RCVDET_ST_TMR},
+ {0x00FB, TX_PSC_A0},
+ {0x04AA, TX_PSC_A2},
+ {0x04AA, TX_PSC_A3},
+ {0x000F, XCVR_DIAG_BIDI_CTRL}
+};
+
+static struct cdns_reg_pairs sl_dp_19_2_no_ssc_rx_ln_regs[] = {
+ {0x0000, RX_PSC_A0},
+ {0x0000, RX_PSC_A2},
+ {0x0000, RX_PSC_A3},
+ {0x0000, RX_PSC_CAL},
+ {0x0000, RX_REE_GCSM1_CTRL},
+ {0x0000, RX_REE_GCSM2_CTRL},
+ {0x0000, RX_REE_PERGCSM_CTRL}
+};
+
+static struct cdns_torrent_vals sl_dp_19_2_no_ssc_cmn_vals = {
+ .reg_pairs = sl_dp_19_2_no_ssc_cmn_regs,
+ .num_regs = ARRAY_SIZE(sl_dp_19_2_no_ssc_cmn_regs),
+};
+
+static struct cdns_torrent_vals sl_dp_19_2_no_ssc_tx_ln_vals = {
+ .reg_pairs = sl_dp_19_2_no_ssc_tx_ln_regs,
+ .num_regs = ARRAY_SIZE(sl_dp_19_2_no_ssc_tx_ln_regs),
+};
+
+static struct cdns_torrent_vals sl_dp_19_2_no_ssc_rx_ln_vals = {
+ .reg_pairs = sl_dp_19_2_no_ssc_rx_ln_regs,
+ .num_regs = ARRAY_SIZE(sl_dp_19_2_no_ssc_rx_ln_regs),
+};
+
+/* Single DP, 25 MHz Ref clk, no SSC */
+static struct cdns_reg_pairs sl_dp_25_no_ssc_cmn_regs[] = {
+ {0x0019, CMN_SSM_BIAS_TMR},
+ {0x0032, CMN_PLLSM0_PLLPRE_TMR},
+ {0x00D1, CMN_PLLSM0_PLLLOCK_TMR},
+ {0x0032, CMN_PLLSM1_PLLPRE_TMR},
+ {0x00D1, CMN_PLLSM1_PLLLOCK_TMR},
+ {0x007D, CMN_BGCAL_INIT_TMR},
+ {0x007D, CMN_BGCAL_ITER_TMR},
+ {0x0019, CMN_IBCAL_INIT_TMR},
+ {0x001E, CMN_TXPUCAL_INIT_TMR},
+ {0x0006, CMN_TXPUCAL_ITER_TMR},
+ {0x001E, CMN_TXPDCAL_INIT_TMR},
+ {0x0006, CMN_TXPDCAL_ITER_TMR},
+ {0x02EE, CMN_RXCAL_INIT_TMR},
+ {0x0006, CMN_RXCAL_ITER_TMR},
+ {0x0002, CMN_SD_CAL_INIT_TMR},
+ {0x0002, CMN_SD_CAL_ITER_TMR},
+ {0x000E, CMN_SD_CAL_REFTIM_START},
+ {0x012B, CMN_SD_CAL_PLLCNT_START},
+ {0x0509, CMN_PDIAG_PLL0_CP_PADJ_M0},
+ {0x0F00, CMN_PDIAG_PLL0_CP_IADJ_M0},
+ {0x0F08, CMN_PDIAG_PLL0_FILT_PADJ_M0},
+ {0x0004, CMN_PLL0_DSM_DIAG_M0},
+ {0x0509, CMN_PDIAG_PLL1_CP_PADJ_M0},
+ {0x0F00, CMN_PDIAG_PLL1_CP_IADJ_M0},
+ {0x0F08, CMN_PDIAG_PLL1_FILT_PADJ_M0},
+ {0x0004, CMN_PLL1_DSM_DIAG_M0},
+ {0x00FA, CMN_PLL0_VCOCAL_INIT_TMR},
+ {0x0004, CMN_PLL0_VCOCAL_ITER_TMR},
+ {0x00FA, CMN_PLL1_VCOCAL_INIT_TMR},
+ {0x0004, CMN_PLL1_VCOCAL_ITER_TMR},
+ {0x0317, CMN_PLL0_VCOCAL_REFTIM_START},
+ {0x0003, CMN_PLL0_VCOCAL_TCTRL},
+ {0x0317, CMN_PLL1_VCOCAL_REFTIM_START},
+ {0x0003, CMN_PLL1_VCOCAL_TCTRL}
+};
+
+static struct cdns_reg_pairs sl_dp_25_no_ssc_tx_ln_regs[] = {
+ {0x09C4, TX_RCVDET_ST_TMR},
+ {0x00FB, TX_PSC_A0},
+ {0x04AA, TX_PSC_A2},
+ {0x04AA, TX_PSC_A3},
+ {0x000F, XCVR_DIAG_BIDI_CTRL}
+};
+
+static struct cdns_reg_pairs sl_dp_25_no_ssc_rx_ln_regs[] = {
+ {0x0000, RX_PSC_A0},
+ {0x0000, RX_PSC_A2},
+ {0x0000, RX_PSC_A3},
+ {0x0000, RX_PSC_CAL},
+ {0x0000, RX_REE_GCSM1_CTRL},
+ {0x0000, RX_REE_GCSM2_CTRL},
+ {0x0000, RX_REE_PERGCSM_CTRL}
+};
+
+static struct cdns_torrent_vals sl_dp_25_no_ssc_cmn_vals = {
+ .reg_pairs = sl_dp_25_no_ssc_cmn_regs,
+ .num_regs = ARRAY_SIZE(sl_dp_25_no_ssc_cmn_regs),
+};
+
+static struct cdns_torrent_vals sl_dp_25_no_ssc_tx_ln_vals = {
+ .reg_pairs = sl_dp_25_no_ssc_tx_ln_regs,
+ .num_regs = ARRAY_SIZE(sl_dp_25_no_ssc_tx_ln_regs),
+};
+
+static struct cdns_torrent_vals sl_dp_25_no_ssc_rx_ln_vals = {
+ .reg_pairs = sl_dp_25_no_ssc_rx_ln_regs,
+ .num_regs = ARRAY_SIZE(sl_dp_25_no_ssc_rx_ln_regs),
+};
+
+/* Single DP, 100 MHz Ref clk, no SSC */
+static struct cdns_reg_pairs sl_dp_100_no_ssc_cmn_regs[] = {
+ {0x0003, CMN_PLL0_VCOCAL_TCTRL},
+ {0x0003, CMN_PLL1_VCOCAL_TCTRL}
+};
+
+static struct cdns_reg_pairs sl_dp_100_no_ssc_tx_ln_regs[] = {
+ {0x00FB, TX_PSC_A0},
+ {0x04AA, TX_PSC_A2},
+ {0x04AA, TX_PSC_A3},
+ {0x000F, XCVR_DIAG_BIDI_CTRL}
+};
+
+static struct cdns_reg_pairs sl_dp_100_no_ssc_rx_ln_regs[] = {
+ {0x0000, RX_PSC_A0},
+ {0x0000, RX_PSC_A2},
+ {0x0000, RX_PSC_A3},
+ {0x0000, RX_PSC_CAL},
+ {0x0000, RX_REE_GCSM1_CTRL},
+ {0x0000, RX_REE_GCSM2_CTRL},
+ {0x0000, RX_REE_PERGCSM_CTRL}
+};
+
+static struct cdns_torrent_vals sl_dp_100_no_ssc_cmn_vals = {
+ .reg_pairs = sl_dp_100_no_ssc_cmn_regs,
+ .num_regs = ARRAY_SIZE(sl_dp_100_no_ssc_cmn_regs),
+};
+
+static struct cdns_torrent_vals sl_dp_100_no_ssc_tx_ln_vals = {
+ .reg_pairs = sl_dp_100_no_ssc_tx_ln_regs,
+ .num_regs = ARRAY_SIZE(sl_dp_100_no_ssc_tx_ln_regs),
+};
+
+static struct cdns_torrent_vals sl_dp_100_no_ssc_rx_ln_vals = {
+ .reg_pairs = sl_dp_100_no_ssc_rx_ln_regs,
+ .num_regs = ARRAY_SIZE(sl_dp_100_no_ssc_rx_ln_regs),
+};
+
/* USB and SGMII/QSGMII link configuration */
static struct cdns_reg_pairs usb_sgmii_link_cmn_regs[] = {
{0x0002, PHY_PLL_CFG},
@@ -3311,6 +3484,11 @@ static const struct cdns_torrent_data cdns_map_torrent = {
.block_offset_shift = 0x2,
.reg_offset_shift = 0x2,
.link_cmn_vals = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_link_cmn_vals,
+ },
+ },
[TYPE_PCIE] = {
[TYPE_NONE] = {
[NO_SSC] = NULL,
@@ -3387,6 +3565,11 @@ static const struct cdns_torrent_data cdns_map_torrent = {
},
},
.xcvr_diag_vals = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_xcvr_diag_ln_vals,
+ },
+ },
[TYPE_PCIE] = {
[TYPE_NONE] = {
[NO_SSC] = NULL,
@@ -3487,230 +3670,293 @@ static const struct cdns_torrent_data cdns_map_torrent = {
},
},
.cmn_vals = {
- [TYPE_PCIE] = {
- [TYPE_NONE] = {
- [NO_SSC] = NULL,
- [EXTERNAL_SSC] = NULL,
- [INTERNAL_SSC] = &sl_pcie_100_int_ssc_cmn_vals,
- },
- [TYPE_SGMII] = {
- [NO_SSC] = &pcie_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &pcie_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &pcie_100_int_ssc_cmn_vals,
- },
- [TYPE_QSGMII] = {
- [NO_SSC] = &pcie_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &pcie_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &pcie_100_int_ssc_cmn_vals,
- },
- [TYPE_USB] = {
- [NO_SSC] = &pcie_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &pcie_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &pcie_100_int_ssc_cmn_vals,
- },
- },
- [TYPE_SGMII] = {
- [TYPE_NONE] = {
- [NO_SSC] = &sl_sgmii_100_no_ssc_cmn_vals,
- },
- [TYPE_PCIE] = {
- [NO_SSC] = &sgmii_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &sgmii_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &sgmii_100_int_ssc_cmn_vals,
- },
- [TYPE_USB] = {
- [NO_SSC] = &sgmii_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &sgmii_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &sgmii_100_no_ssc_cmn_vals,
+ [CLK_19_2_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_19_2_no_ssc_cmn_vals,
+ },
},
},
- [TYPE_QSGMII] = {
- [TYPE_NONE] = {
- [NO_SSC] = &sl_qsgmii_100_no_ssc_cmn_vals,
- },
- [TYPE_PCIE] = {
- [NO_SSC] = &qsgmii_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &qsgmii_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &qsgmii_100_int_ssc_cmn_vals,
- },
- [TYPE_USB] = {
- [NO_SSC] = &qsgmii_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &qsgmii_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &qsgmii_100_no_ssc_cmn_vals,
+ [CLK_25_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_25_no_ssc_cmn_vals,
+ },
},
},
- [TYPE_USB] = {
- [TYPE_NONE] = {
- [NO_SSC] = &sl_usb_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &sl_usb_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &sl_usb_100_int_ssc_cmn_vals,
+ [CLK_100_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_100_no_ssc_cmn_vals,
+ },
},
[TYPE_PCIE] = {
- [NO_SSC] = &usb_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &usb_100_int_ssc_cmn_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = NULL,
+ [EXTERNAL_SSC] = NULL,
+ [INTERNAL_SSC] = &sl_pcie_100_int_ssc_cmn_vals,
+ },
+ [TYPE_SGMII] = {
+ [NO_SSC] = &pcie_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &pcie_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &pcie_100_int_ssc_cmn_vals,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = &pcie_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &pcie_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &pcie_100_int_ssc_cmn_vals,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = &pcie_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &pcie_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &pcie_100_int_ssc_cmn_vals,
+ },
},
[TYPE_SGMII] = {
- [NO_SSC] = &sl_usb_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &sl_usb_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &sl_usb_100_int_ssc_cmn_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_sgmii_100_no_ssc_cmn_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &sgmii_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &sgmii_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &sgmii_100_int_ssc_cmn_vals,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = &sgmii_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &sgmii_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &sgmii_100_no_ssc_cmn_vals,
+ },
},
[TYPE_QSGMII] = {
- [NO_SSC] = &sl_usb_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &sl_usb_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &sl_usb_100_int_ssc_cmn_vals,
- },
- },
- },
- .tx_ln_vals = {
- [TYPE_PCIE] = {
- [TYPE_NONE] = {
- [NO_SSC] = NULL,
- [EXTERNAL_SSC] = NULL,
- [INTERNAL_SSC] = NULL,
- },
- [TYPE_SGMII] = {
- [NO_SSC] = NULL,
- [EXTERNAL_SSC] = NULL,
- [INTERNAL_SSC] = NULL,
- },
- [TYPE_QSGMII] = {
- [NO_SSC] = NULL,
- [EXTERNAL_SSC] = NULL,
- [INTERNAL_SSC] = NULL,
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_qsgmii_100_no_ssc_cmn_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &qsgmii_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &qsgmii_100_int_ssc_cmn_vals,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &qsgmii_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &qsgmii_100_no_ssc_cmn_vals,
+ },
},
[TYPE_USB] = {
- [NO_SSC] = NULL,
- [EXTERNAL_SSC] = NULL,
- [INTERNAL_SSC] = NULL,
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_usb_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &sl_usb_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &sl_usb_100_int_ssc_cmn_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &usb_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &usb_100_int_ssc_cmn_vals,
+ },
+ [TYPE_SGMII] = {
+ [NO_SSC] = &sl_usb_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &sl_usb_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &sl_usb_100_int_ssc_cmn_vals,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = &sl_usb_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &sl_usb_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &sl_usb_100_int_ssc_cmn_vals,
+ },
},
},
- [TYPE_SGMII] = {
- [TYPE_NONE] = {
- [NO_SSC] = &sgmii_100_no_ssc_tx_ln_vals,
- },
- [TYPE_PCIE] = {
- [NO_SSC] = &sgmii_100_no_ssc_tx_ln_vals,
- [EXTERNAL_SSC] = &sgmii_100_no_ssc_tx_ln_vals,
- [INTERNAL_SSC] = &sgmii_100_no_ssc_tx_ln_vals,
- },
- [TYPE_USB] = {
- [NO_SSC] = &sgmii_100_no_ssc_tx_ln_vals,
- [EXTERNAL_SSC] = &sgmii_100_no_ssc_tx_ln_vals,
- [INTERNAL_SSC] = &sgmii_100_no_ssc_tx_ln_vals,
+ },
+ .tx_ln_vals = {
+ [CLK_19_2_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_19_2_no_ssc_tx_ln_vals,
+ },
},
},
- [TYPE_QSGMII] = {
- [TYPE_NONE] = {
- [NO_SSC] = &qsgmii_100_no_ssc_tx_ln_vals,
- },
- [TYPE_PCIE] = {
- [NO_SSC] = &qsgmii_100_no_ssc_tx_ln_vals,
- [EXTERNAL_SSC] = &qsgmii_100_no_ssc_tx_ln_vals,
- [INTERNAL_SSC] = &qsgmii_100_no_ssc_tx_ln_vals,
- },
- [TYPE_USB] = {
- [NO_SSC] = &qsgmii_100_no_ssc_tx_ln_vals,
- [EXTERNAL_SSC] = &qsgmii_100_no_ssc_tx_ln_vals,
- [INTERNAL_SSC] = &qsgmii_100_no_ssc_tx_ln_vals,
+ [CLK_25_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_25_no_ssc_tx_ln_vals,
+ },
},
},
- [TYPE_USB] = {
- [TYPE_NONE] = {
- [NO_SSC] = &usb_100_no_ssc_tx_ln_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
- [INTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [CLK_100_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_100_no_ssc_tx_ln_vals,
+ },
},
[TYPE_PCIE] = {
- [NO_SSC] = &usb_100_no_ssc_tx_ln_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
- [INTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = NULL,
+ [EXTERNAL_SSC] = NULL,
+ [INTERNAL_SSC] = NULL,
+ },
+ [TYPE_SGMII] = {
+ [NO_SSC] = NULL,
+ [EXTERNAL_SSC] = NULL,
+ [INTERNAL_SSC] = NULL,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = NULL,
+ [EXTERNAL_SSC] = NULL,
+ [INTERNAL_SSC] = NULL,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = NULL,
+ [EXTERNAL_SSC] = NULL,
+ [INTERNAL_SSC] = NULL,
+ },
},
[TYPE_SGMII] = {
- [NO_SSC] = &usb_100_no_ssc_tx_ln_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
- [INTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = &sgmii_100_no_ssc_tx_ln_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &sgmii_100_no_ssc_tx_ln_vals,
+ [EXTERNAL_SSC] = &sgmii_100_no_ssc_tx_ln_vals,
+ [INTERNAL_SSC] = &sgmii_100_no_ssc_tx_ln_vals,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = &sgmii_100_no_ssc_tx_ln_vals,
+ [EXTERNAL_SSC] = &sgmii_100_no_ssc_tx_ln_vals,
+ [INTERNAL_SSC] = &sgmii_100_no_ssc_tx_ln_vals,
+ },
},
[TYPE_QSGMII] = {
- [NO_SSC] = &usb_100_no_ssc_tx_ln_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
- [INTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
- },
- },
- },
- .rx_ln_vals = {
- [TYPE_PCIE] = {
- [TYPE_NONE] = {
- [NO_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- },
- [TYPE_SGMII] = {
- [NO_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- },
- [TYPE_QSGMII] = {
- [NO_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_tx_ln_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_tx_ln_vals,
+ [EXTERNAL_SSC] = &qsgmii_100_no_ssc_tx_ln_vals,
+ [INTERNAL_SSC] = &qsgmii_100_no_ssc_tx_ln_vals,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_tx_ln_vals,
+ [EXTERNAL_SSC] = &qsgmii_100_no_ssc_tx_ln_vals,
+ [INTERNAL_SSC] = &qsgmii_100_no_ssc_tx_ln_vals,
+ },
},
[TYPE_USB] = {
- [NO_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [INTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [INTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ },
+ [TYPE_SGMII] = {
+ [NO_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [INTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [INTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ },
},
},
- [TYPE_SGMII] = {
- [TYPE_NONE] = {
- [NO_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
- },
- [TYPE_PCIE] = {
- [NO_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
- },
- [TYPE_USB] = {
- [NO_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
+ },
+ .rx_ln_vals = {
+ [CLK_19_2_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_19_2_no_ssc_rx_ln_vals,
+ },
},
},
- [TYPE_QSGMII] = {
- [TYPE_NONE] = {
- [NO_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
- },
- [TYPE_PCIE] = {
- [NO_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
- },
- [TYPE_USB] = {
- [NO_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
+ [CLK_25_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_25_no_ssc_rx_ln_vals,
+ },
},
},
- [TYPE_USB] = {
- [TYPE_NONE] = {
- [NO_SSC] = &usb_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [CLK_100_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_100_no_ssc_rx_ln_vals,
+ },
},
[TYPE_PCIE] = {
- [NO_SSC] = &usb_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_SGMII] = {
+ [NO_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ },
},
[TYPE_SGMII] = {
- [NO_SSC] = &usb_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
+ },
},
[TYPE_QSGMII] = {
- [NO_SSC] = &usb_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
+ },
+ },
+ [TYPE_USB] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_SGMII] = {
+ [NO_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ },
},
},
},
@@ -3720,6 +3966,11 @@ static const struct cdns_torrent_data ti_j721e_map_torrent = {
.block_offset_shift = 0x0,
.reg_offset_shift = 0x1,
.link_cmn_vals = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_link_cmn_vals,
+ },
+ },
[TYPE_PCIE] = {
[TYPE_NONE] = {
[NO_SSC] = NULL,
@@ -3796,6 +4047,11 @@ static const struct cdns_torrent_data ti_j721e_map_torrent = {
},
},
.xcvr_diag_vals = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_xcvr_diag_ln_vals,
+ },
+ },
[TYPE_PCIE] = {
[TYPE_NONE] = {
[NO_SSC] = NULL,
@@ -3896,230 +4152,293 @@ static const struct cdns_torrent_data ti_j721e_map_torrent = {
},
},
.cmn_vals = {
- [TYPE_PCIE] = {
- [TYPE_NONE] = {
- [NO_SSC] = NULL,
- [EXTERNAL_SSC] = NULL,
- [INTERNAL_SSC] = &sl_pcie_100_int_ssc_cmn_vals,
- },
- [TYPE_SGMII] = {
- [NO_SSC] = &pcie_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &pcie_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &pcie_100_int_ssc_cmn_vals,
- },
- [TYPE_QSGMII] = {
- [NO_SSC] = &pcie_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &pcie_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &pcie_100_int_ssc_cmn_vals,
- },
- [TYPE_USB] = {
- [NO_SSC] = &pcie_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &pcie_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &pcie_100_int_ssc_cmn_vals,
- },
- },
- [TYPE_SGMII] = {
- [TYPE_NONE] = {
- [NO_SSC] = &sl_sgmii_100_no_ssc_cmn_vals,
- },
- [TYPE_PCIE] = {
- [NO_SSC] = &sgmii_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &sgmii_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &sgmii_100_int_ssc_cmn_vals,
- },
- [TYPE_USB] = {
- [NO_SSC] = &sgmii_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &sgmii_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &sgmii_100_no_ssc_cmn_vals,
+ [CLK_19_2_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_19_2_no_ssc_cmn_vals,
+ },
},
},
- [TYPE_QSGMII] = {
- [TYPE_NONE] = {
- [NO_SSC] = &sl_qsgmii_100_no_ssc_cmn_vals,
- },
- [TYPE_PCIE] = {
- [NO_SSC] = &qsgmii_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &qsgmii_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &qsgmii_100_int_ssc_cmn_vals,
- },
- [TYPE_USB] = {
- [NO_SSC] = &qsgmii_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &qsgmii_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &qsgmii_100_no_ssc_cmn_vals,
+ [CLK_25_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_25_no_ssc_cmn_vals,
+ },
},
},
- [TYPE_USB] = {
- [TYPE_NONE] = {
- [NO_SSC] = &sl_usb_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &sl_usb_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &sl_usb_100_int_ssc_cmn_vals,
+ [CLK_100_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_100_no_ssc_cmn_vals,
+ },
},
[TYPE_PCIE] = {
- [NO_SSC] = &usb_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &usb_100_int_ssc_cmn_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = NULL,
+ [EXTERNAL_SSC] = NULL,
+ [INTERNAL_SSC] = &sl_pcie_100_int_ssc_cmn_vals,
+ },
+ [TYPE_SGMII] = {
+ [NO_SSC] = &pcie_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &pcie_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &pcie_100_int_ssc_cmn_vals,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = &pcie_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &pcie_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &pcie_100_int_ssc_cmn_vals,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = &pcie_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &pcie_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &pcie_100_int_ssc_cmn_vals,
+ },
},
[TYPE_SGMII] = {
- [NO_SSC] = &sl_usb_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &sl_usb_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &sl_usb_100_int_ssc_cmn_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_sgmii_100_no_ssc_cmn_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &sgmii_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &sgmii_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &sgmii_100_int_ssc_cmn_vals,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = &sgmii_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &sgmii_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &sgmii_100_no_ssc_cmn_vals,
+ },
},
[TYPE_QSGMII] = {
- [NO_SSC] = &sl_usb_100_no_ssc_cmn_vals,
- [EXTERNAL_SSC] = &sl_usb_100_no_ssc_cmn_vals,
- [INTERNAL_SSC] = &sl_usb_100_int_ssc_cmn_vals,
- },
- },
- },
- .tx_ln_vals = {
- [TYPE_PCIE] = {
- [TYPE_NONE] = {
- [NO_SSC] = NULL,
- [EXTERNAL_SSC] = NULL,
- [INTERNAL_SSC] = NULL,
- },
- [TYPE_SGMII] = {
- [NO_SSC] = NULL,
- [EXTERNAL_SSC] = NULL,
- [INTERNAL_SSC] = NULL,
- },
- [TYPE_QSGMII] = {
- [NO_SSC] = NULL,
- [EXTERNAL_SSC] = NULL,
- [INTERNAL_SSC] = NULL,
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_qsgmii_100_no_ssc_cmn_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &qsgmii_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &qsgmii_100_int_ssc_cmn_vals,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &qsgmii_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &qsgmii_100_no_ssc_cmn_vals,
+ },
},
[TYPE_USB] = {
- [NO_SSC] = NULL,
- [EXTERNAL_SSC] = NULL,
- [INTERNAL_SSC] = NULL,
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_usb_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &sl_usb_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &sl_usb_100_int_ssc_cmn_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &usb_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &usb_100_int_ssc_cmn_vals,
+ },
+ [TYPE_SGMII] = {
+ [NO_SSC] = &sl_usb_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &sl_usb_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &sl_usb_100_int_ssc_cmn_vals,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = &sl_usb_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &sl_usb_100_no_ssc_cmn_vals,
+ [INTERNAL_SSC] = &sl_usb_100_int_ssc_cmn_vals,
+ },
},
},
- [TYPE_SGMII] = {
- [TYPE_NONE] = {
- [NO_SSC] = &ti_sgmii_100_no_ssc_tx_ln_vals,
- },
- [TYPE_PCIE] = {
- [NO_SSC] = &ti_sgmii_100_no_ssc_tx_ln_vals,
- [EXTERNAL_SSC] = &ti_sgmii_100_no_ssc_tx_ln_vals,
- [INTERNAL_SSC] = &ti_sgmii_100_no_ssc_tx_ln_vals,
- },
- [TYPE_USB] = {
- [NO_SSC] = &ti_sgmii_100_no_ssc_tx_ln_vals,
- [EXTERNAL_SSC] = &ti_sgmii_100_no_ssc_tx_ln_vals,
- [INTERNAL_SSC] = &ti_sgmii_100_no_ssc_tx_ln_vals,
+ },
+ .tx_ln_vals = {
+ [CLK_19_2_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_19_2_no_ssc_tx_ln_vals,
+ },
},
},
- [TYPE_QSGMII] = {
- [TYPE_NONE] = {
- [NO_SSC] = &ti_qsgmii_100_no_ssc_tx_ln_vals,
- },
- [TYPE_PCIE] = {
- [NO_SSC] = &ti_qsgmii_100_no_ssc_tx_ln_vals,
- [EXTERNAL_SSC] = &ti_qsgmii_100_no_ssc_tx_ln_vals,
- [INTERNAL_SSC] = &ti_qsgmii_100_no_ssc_tx_ln_vals,
- },
- [TYPE_USB] = {
- [NO_SSC] = &ti_qsgmii_100_no_ssc_tx_ln_vals,
- [EXTERNAL_SSC] = &ti_qsgmii_100_no_ssc_tx_ln_vals,
- [INTERNAL_SSC] = &ti_qsgmii_100_no_ssc_tx_ln_vals,
+ [CLK_25_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_25_no_ssc_tx_ln_vals,
+ },
},
},
- [TYPE_USB] = {
- [TYPE_NONE] = {
- [NO_SSC] = &usb_100_no_ssc_tx_ln_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
- [INTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [CLK_100_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_100_no_ssc_tx_ln_vals,
+ },
},
[TYPE_PCIE] = {
- [NO_SSC] = &usb_100_no_ssc_tx_ln_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
- [INTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
- },
- [TYPE_SGMII] = {
- [NO_SSC] = &usb_100_no_ssc_tx_ln_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
- [INTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
- },
- [TYPE_QSGMII] = {
- [NO_SSC] = &usb_100_no_ssc_tx_ln_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
- [INTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
- },
- },
- },
- .rx_ln_vals = {
- [TYPE_PCIE] = {
- [TYPE_NONE] = {
- [NO_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = NULL,
+ [EXTERNAL_SSC] = NULL,
+ [INTERNAL_SSC] = NULL,
+ },
+ [TYPE_SGMII] = {
+ [NO_SSC] = NULL,
+ [EXTERNAL_SSC] = NULL,
+ [INTERNAL_SSC] = NULL,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = NULL,
+ [EXTERNAL_SSC] = NULL,
+ [INTERNAL_SSC] = NULL,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = NULL,
+ [EXTERNAL_SSC] = NULL,
+ [INTERNAL_SSC] = NULL,
+ },
},
[TYPE_SGMII] = {
- [NO_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = &ti_sgmii_100_no_ssc_tx_ln_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &ti_sgmii_100_no_ssc_tx_ln_vals,
+ [EXTERNAL_SSC] = &ti_sgmii_100_no_ssc_tx_ln_vals,
+ [INTERNAL_SSC] = &ti_sgmii_100_no_ssc_tx_ln_vals,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = &ti_sgmii_100_no_ssc_tx_ln_vals,
+ [EXTERNAL_SSC] = &ti_sgmii_100_no_ssc_tx_ln_vals,
+ [INTERNAL_SSC] = &ti_sgmii_100_no_ssc_tx_ln_vals,
+ },
},
[TYPE_QSGMII] = {
- [NO_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = &ti_qsgmii_100_no_ssc_tx_ln_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &ti_qsgmii_100_no_ssc_tx_ln_vals,
+ [EXTERNAL_SSC] = &ti_qsgmii_100_no_ssc_tx_ln_vals,
+ [INTERNAL_SSC] = &ti_qsgmii_100_no_ssc_tx_ln_vals,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = &ti_qsgmii_100_no_ssc_tx_ln_vals,
+ [EXTERNAL_SSC] = &ti_qsgmii_100_no_ssc_tx_ln_vals,
+ [INTERNAL_SSC] = &ti_qsgmii_100_no_ssc_tx_ln_vals,
+ },
},
[TYPE_USB] = {
- [NO_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [INTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [INTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ },
+ [TYPE_SGMII] = {
+ [NO_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [INTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ [INTERNAL_SSC] = &usb_100_no_ssc_tx_ln_vals,
+ },
},
},
- [TYPE_SGMII] = {
- [TYPE_NONE] = {
- [NO_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
- },
- [TYPE_PCIE] = {
- [NO_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
- },
- [TYPE_USB] = {
- [NO_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
+ },
+ .rx_ln_vals = {
+ [CLK_19_2_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_19_2_no_ssc_rx_ln_vals,
+ },
},
},
- [TYPE_QSGMII] = {
- [TYPE_NONE] = {
- [NO_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
- },
- [TYPE_PCIE] = {
- [NO_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
- },
- [TYPE_USB] = {
- [NO_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
+ [CLK_25_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_25_no_ssc_rx_ln_vals,
+ },
},
},
- [TYPE_USB] = {
- [TYPE_NONE] = {
- [NO_SSC] = &usb_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [CLK_100_MHZ] = {
+ [TYPE_DP] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &sl_dp_100_no_ssc_rx_ln_vals,
+ },
},
[TYPE_PCIE] = {
- [NO_SSC] = &usb_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_SGMII] = {
+ [NO_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &pcie_100_no_ssc_rx_ln_vals,
+ },
},
[TYPE_SGMII] = {
- [NO_SSC] = &usb_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &sgmii_100_no_ssc_rx_ln_vals,
+ },
},
[TYPE_QSGMII] = {
- [NO_SSC] = &usb_100_no_ssc_rx_ln_vals,
- [EXTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
- [INTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [TYPE_NONE] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_USB] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &qsgmii_100_no_ssc_rx_ln_vals,
+ },
+ },
+ [TYPE_USB] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_PCIE] = {
+ [NO_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_SGMII] = {
+ [NO_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [EXTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ [INTERNAL_SSC] = &usb_100_no_ssc_rx_ln_vals,
+ },
},
},
},
diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
index 810f25a47632..6781488cfc58 100644
--- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
+++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
@@ -29,22 +29,16 @@
#define COMPHY_FW_MODE_SATA 0x1
#define COMPHY_FW_MODE_SGMII 0x2
-#define COMPHY_FW_MODE_HS_SGMII 0x3
+#define COMPHY_FW_MODE_2500BASEX 0x3
#define COMPHY_FW_MODE_USB3H 0x4
#define COMPHY_FW_MODE_USB3D 0x5
#define COMPHY_FW_MODE_PCIE 0x6
-#define COMPHY_FW_MODE_RXAUI 0x7
-#define COMPHY_FW_MODE_XFI 0x8
-#define COMPHY_FW_MODE_SFI 0x9
#define COMPHY_FW_MODE_USB3 0xa
#define COMPHY_FW_SPEED_1_25G 0 /* SGMII 1G */
#define COMPHY_FW_SPEED_2_5G 1
-#define COMPHY_FW_SPEED_3_125G 2 /* SGMII 2.5G */
+#define COMPHY_FW_SPEED_3_125G 2 /* 2500BASE-X */
#define COMPHY_FW_SPEED_5G 3
-#define COMPHY_FW_SPEED_5_15625G 4 /* XFI 5G */
-#define COMPHY_FW_SPEED_6G 5
-#define COMPHY_FW_SPEED_10_3125G 6 /* XFI 10G */
#define COMPHY_FW_SPEED_MAX 0x3F
#define COMPHY_FW_MODE(mode) ((mode) << 12)
@@ -84,14 +78,14 @@ static const struct mvebu_a3700_comphy_conf mvebu_a3700_comphy_modes[] = {
MVEBU_A3700_COMPHY_CONF_ETH(0, PHY_INTERFACE_MODE_SGMII, 1,
COMPHY_FW_MODE_SGMII),
MVEBU_A3700_COMPHY_CONF_ETH(0, PHY_INTERFACE_MODE_2500BASEX, 1,
- COMPHY_FW_MODE_HS_SGMII),
+ COMPHY_FW_MODE_2500BASEX),
/* lane 1 */
MVEBU_A3700_COMPHY_CONF_GEN(1, PHY_MODE_PCIE, 0,
COMPHY_FW_MODE_PCIE),
MVEBU_A3700_COMPHY_CONF_ETH(1, PHY_INTERFACE_MODE_SGMII, 0,
COMPHY_FW_MODE_SGMII),
MVEBU_A3700_COMPHY_CONF_ETH(1, PHY_INTERFACE_MODE_2500BASEX, 0,
- COMPHY_FW_MODE_HS_SGMII),
+ COMPHY_FW_MODE_2500BASEX),
/* lane 2 */
MVEBU_A3700_COMPHY_CONF_GEN(2, PHY_MODE_SATA, 0,
COMPHY_FW_MODE_SATA),
@@ -205,7 +199,7 @@ static int mvebu_a3700_comphy_power_on(struct phy *phy)
COMPHY_FW_SPEED_1_25G);
break;
case PHY_INTERFACE_MODE_2500BASEX:
- dev_dbg(lane->dev, "set lane %d to HS SGMII mode\n",
+ dev_dbg(lane->dev, "set lane %d to 2500BASEX mode\n",
lane->id);
fw_param = COMPHY_FW_NET(fw_mode, lane->port,
COMPHY_FW_SPEED_3_125G);
diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
index 53ad127b100f..bbd6f2ad6f24 100644
--- a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
+++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
@@ -167,7 +167,7 @@
#define COMPHY_FW_MODE_SATA 0x1
#define COMPHY_FW_MODE_SGMII 0x2 /* SGMII 1G */
-#define COMPHY_FW_MODE_HS_SGMII 0x3 /* SGMII 2.5G */
+#define COMPHY_FW_MODE_2500BASEX 0x3 /* 2500BASE-X */
#define COMPHY_FW_MODE_USB3H 0x4
#define COMPHY_FW_MODE_USB3D 0x5
#define COMPHY_FW_MODE_PCIE 0x6
@@ -207,7 +207,7 @@ static const struct mvebu_comphy_conf mvebu_comphy_cp110_modes[] = {
/* lane 0 */
GEN_CONF(0, 0, PHY_MODE_PCIE, COMPHY_FW_MODE_PCIE),
ETH_CONF(0, 1, PHY_INTERFACE_MODE_SGMII, 0x1, COMPHY_FW_MODE_SGMII),
- ETH_CONF(0, 1, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_HS_SGMII),
+ ETH_CONF(0, 1, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_2500BASEX),
GEN_CONF(0, 1, PHY_MODE_SATA, COMPHY_FW_MODE_SATA),
/* lane 1 */
GEN_CONF(1, 0, PHY_MODE_USB_HOST_SS, COMPHY_FW_MODE_USB3H),
@@ -215,10 +215,10 @@ static const struct mvebu_comphy_conf mvebu_comphy_cp110_modes[] = {
GEN_CONF(1, 0, PHY_MODE_SATA, COMPHY_FW_MODE_SATA),
GEN_CONF(1, 0, PHY_MODE_PCIE, COMPHY_FW_MODE_PCIE),
ETH_CONF(1, 2, PHY_INTERFACE_MODE_SGMII, 0x1, COMPHY_FW_MODE_SGMII),
- ETH_CONF(1, 2, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_HS_SGMII),
+ ETH_CONF(1, 2, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_2500BASEX),
/* lane 2 */
ETH_CONF(2, 0, PHY_INTERFACE_MODE_SGMII, 0x1, COMPHY_FW_MODE_SGMII),
- ETH_CONF(2, 0, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_HS_SGMII),
+ ETH_CONF(2, 0, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_2500BASEX),
ETH_CONF(2, 0, PHY_INTERFACE_MODE_RXAUI, 0x1, COMPHY_FW_MODE_RXAUI),
ETH_CONF(2, 0, PHY_INTERFACE_MODE_10GBASER, 0x1, COMPHY_FW_MODE_XFI),
GEN_CONF(2, 0, PHY_MODE_USB_HOST_SS, COMPHY_FW_MODE_USB3H),
@@ -227,26 +227,26 @@ static const struct mvebu_comphy_conf mvebu_comphy_cp110_modes[] = {
/* lane 3 */
GEN_CONF(3, 0, PHY_MODE_PCIE, COMPHY_FW_MODE_PCIE),
ETH_CONF(3, 1, PHY_INTERFACE_MODE_SGMII, 0x2, COMPHY_FW_MODE_SGMII),
- ETH_CONF(3, 1, PHY_INTERFACE_MODE_2500BASEX, 0x2, COMPHY_FW_MODE_HS_SGMII),
+ ETH_CONF(3, 1, PHY_INTERFACE_MODE_2500BASEX, 0x2, COMPHY_FW_MODE_2500BASEX),
ETH_CONF(3, 1, PHY_INTERFACE_MODE_RXAUI, 0x1, COMPHY_FW_MODE_RXAUI),
GEN_CONF(3, 1, PHY_MODE_USB_HOST_SS, COMPHY_FW_MODE_USB3H),
GEN_CONF(3, 1, PHY_MODE_SATA, COMPHY_FW_MODE_SATA),
/* lane 4 */
ETH_CONF(4, 0, PHY_INTERFACE_MODE_SGMII, 0x2, COMPHY_FW_MODE_SGMII),
- ETH_CONF(4, 0, PHY_INTERFACE_MODE_2500BASEX, 0x2, COMPHY_FW_MODE_HS_SGMII),
+ ETH_CONF(4, 0, PHY_INTERFACE_MODE_2500BASEX, 0x2, COMPHY_FW_MODE_2500BASEX),
ETH_CONF(4, 0, PHY_INTERFACE_MODE_10GBASER, 0x2, COMPHY_FW_MODE_XFI),
ETH_CONF(4, 0, PHY_INTERFACE_MODE_RXAUI, 0x2, COMPHY_FW_MODE_RXAUI),
GEN_CONF(4, 0, PHY_MODE_USB_DEVICE_SS, COMPHY_FW_MODE_USB3D),
GEN_CONF(4, 1, PHY_MODE_USB_HOST_SS, COMPHY_FW_MODE_USB3H),
GEN_CONF(4, 1, PHY_MODE_PCIE, COMPHY_FW_MODE_PCIE),
ETH_CONF(4, 1, PHY_INTERFACE_MODE_SGMII, 0x1, COMPHY_FW_MODE_SGMII),
- ETH_CONF(4, 1, PHY_INTERFACE_MODE_2500BASEX, -1, COMPHY_FW_MODE_HS_SGMII),
+ ETH_CONF(4, 1, PHY_INTERFACE_MODE_2500BASEX, -1, COMPHY_FW_MODE_2500BASEX),
ETH_CONF(4, 1, PHY_INTERFACE_MODE_10GBASER, -1, COMPHY_FW_MODE_XFI),
/* lane 5 */
ETH_CONF(5, 1, PHY_INTERFACE_MODE_RXAUI, 0x2, COMPHY_FW_MODE_RXAUI),
GEN_CONF(5, 1, PHY_MODE_SATA, COMPHY_FW_MODE_SATA),
ETH_CONF(5, 2, PHY_INTERFACE_MODE_SGMII, 0x1, COMPHY_FW_MODE_SGMII),
- ETH_CONF(5, 2, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_HS_SGMII),
+ ETH_CONF(5, 2, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_2500BASEX),
GEN_CONF(5, 2, PHY_MODE_PCIE, COMPHY_FW_MODE_PCIE),
};
diff --git a/drivers/phy/mediatek/phy-mtk-hdmi.c b/drivers/phy/mediatek/phy-mtk-hdmi.c
index 8ad8f717ef43..5fb4217fb8e0 100644
--- a/drivers/phy/mediatek/phy-mtk-hdmi.c
+++ b/drivers/phy/mediatek/phy-mtk-hdmi.c
@@ -100,7 +100,6 @@ static int mtk_hdmi_phy_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct mtk_hdmi_phy *hdmi_phy;
- struct resource *mem;
struct clk *ref_clk;
const char *ref_clk_name;
struct clk_init_data clk_init = {
@@ -116,11 +115,9 @@ static int mtk_hdmi_phy_probe(struct platform_device *pdev)
if (!hdmi_phy)
return -ENOMEM;
- mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- hdmi_phy->regs = devm_ioremap_resource(dev, mem);
- if (IS_ERR(hdmi_phy->regs)) {
+ hdmi_phy->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(hdmi_phy->regs))
return PTR_ERR(hdmi_phy->regs);
- }
ref_clk = devm_clk_get(dev, "pll_ref");
if (IS_ERR(ref_clk)) {
diff --git a/drivers/phy/mediatek/phy-mtk-mipi-dsi.c b/drivers/phy/mediatek/phy-mtk-mipi-dsi.c
index 01cf31633019..28ad9403c441 100644
--- a/drivers/phy/mediatek/phy-mtk-mipi-dsi.c
+++ b/drivers/phy/mediatek/phy-mtk-mipi-dsi.c
@@ -130,7 +130,6 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct mtk_mipi_tx *mipi_tx;
- struct resource *mem;
const char *ref_clk_name;
struct clk *ref_clk;
struct clk_init_data clk_init = {
@@ -148,11 +147,9 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev)
mipi_tx->driver_data = of_device_get_match_data(dev);
- mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- mipi_tx->regs = devm_ioremap_resource(dev, mem);
- if (IS_ERR(mipi_tx->regs)) {
+ mipi_tx->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(mipi_tx->regs))
return PTR_ERR(mipi_tx->regs);
- }
ref_clk = devm_clk_get(dev, NULL);
if (IS_ERR(ref_clk)) {
@@ -203,10 +200,8 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev)
phy_set_drvdata(phy, mipi_tx);
phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
- if (IS_ERR(phy_provider)) {
- ret = PTR_ERR(phy_provider);
- return ret;
- }
+ if (IS_ERR(phy_provider))
+ return PTR_ERR(phy_provider);
mipi_tx->dev = dev;
diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c
index 731c483a04de..cdcef865fe9e 100644
--- a/drivers/phy/mediatek/phy-mtk-tphy.c
+++ b/drivers/phy/mediatek/phy-mtk-tphy.c
@@ -10,11 +10,13 @@
#include <linux/delay.h>
#include <linux/io.h>
#include <linux/iopoll.h>
+#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/phy/phy.h>
#include <linux/platform_device.h>
+#include <linux/regmap.h>
/* version V1 sub-banks offset base address */
/* banks shared by multiple phys */
@@ -27,7 +29,8 @@
#define SSUSB_SIFSLV_V1_U3PHYD 0x000
#define SSUSB_SIFSLV_V1_U3PHYA 0x200
-/* version V2 sub-banks offset base address */
+/* version V2/V3 sub-banks offset base address */
+/* V3: U2FREQ is not used anymore, but reserved */
/* u2 phy banks */
#define SSUSB_SIFSLV_V2_MISC 0x000
#define SSUSB_SIFSLV_V2_U2FREQ 0x100
@@ -40,6 +43,8 @@
#define U3P_USBPHYACR0 0x000
#define PA0_RG_U2PLL_FORCE_ON BIT(15)
+#define PA0_USB20_PLL_PREDIV GENMASK(7, 6)
+#define PA0_USB20_PLL_PREDIV_VAL(x) ((0x3 & (x)) << 6)
#define PA0_RG_USB20_INTR_EN BIT(5)
#define U3P_USBPHYACR1 0x004
@@ -51,6 +56,8 @@
#define PA1_RG_TERM_SEL_VAL(x) ((0x7 & (x)) << 8)
#define U3P_USBPHYACR2 0x008
+#define PA2_RG_U2PLL_BW GENMASK(21, 19)
+#define PA2_RG_U2PLL_BW_VAL(x) ((0x7 & (x)) << 19)
#define PA2_RG_SIF_U2PLL_FORCE_EN BIT(18)
#define U3P_USBPHYACR5 0x014
@@ -72,6 +79,14 @@
#define P2C_USB20_GPIO_MODE BIT(8)
#define P2C_U2_GPIO_CTR_MSK (P2C_RG_USB20_GPIO_CTL | P2C_USB20_GPIO_MODE)
+#define U3P_U2PHYA_RESV 0x030
+#define P2R_RG_U2PLL_FBDIV_26M 0x1bb13b
+#define P2R_RG_U2PLL_FBDIV_48M 0x3c0000
+
+#define U3P_U2PHYA_RESV1 0x044
+#define P2R_RG_U2PLL_REFCLK_SEL BIT(5)
+#define P2R_RG_U2PLL_FRA_EN BIT(3)
+
#define U3D_U2PHYDCR0 0x060
#define P2C_RG_SIF_U2PLL_FORCE_ON BIT(24)
@@ -267,14 +282,31 @@
#define RG_CDR_BIRLTD0_GEN3_MSK GENMASK(4, 0)
#define RG_CDR_BIRLTD0_GEN3_VAL(x) (0x1f & (x))
+/* PHY switch between pcie/usb3/sgmii/sata */
+#define USB_PHY_SWITCH_CTRL 0x0
+#define RG_PHY_SW_TYPE GENMASK(3, 0)
+#define RG_PHY_SW_PCIE 0x0
+#define RG_PHY_SW_USB3 0x1
+#define RG_PHY_SW_SGMII 0x2
+#define RG_PHY_SW_SATA 0x3
+
+#define TPHY_CLKS_CNT 2
+
enum mtk_phy_version {
MTK_PHY_V1 = 1,
MTK_PHY_V2,
+ MTK_PHY_V3,
};
struct mtk_phy_pdata {
/* avoid RX sensitivity level degradation only for mt8173 */
bool avoid_rx_sen_degradation;
+ /*
+ * workaround only for mt8195, HW fix it for others of V3,
+ * u2phy should use integer mode instead of fractional mode of
+ * 48M PLL, fix it by switching PLL to 26M from default 48M
+ */
+ bool sw_pll_48m_to_26m;
enum mtk_phy_version version;
};
@@ -298,10 +330,12 @@ struct mtk_phy_instance {
struct u2phy_banks u2_banks;
struct u3phy_banks u3_banks;
};
- struct clk *ref_clk; /* reference clock of (digital) phy */
- struct clk *da_ref_clk; /* reference clock of analog phy */
+ struct clk_bulk_data clks[TPHY_CLKS_CNT];
u32 index;
- u8 type;
+ u32 type;
+ struct regmap *type_sw;
+ u32 type_sw_reg;
+ u32 type_sw_index;
int eye_src;
int eye_vrt;
int eye_term;
@@ -330,6 +364,10 @@ static void hs_slew_rate_calibrate(struct mtk_tphy *tphy,
int fm_out;
u32 tmp;
+ /* HW V3 doesn't support slew rate cal anymore */
+ if (tphy->pdata->version == MTK_PHY_V3)
+ return;
+
/* use force value */
if (instance->eye_src)
return;
@@ -450,6 +488,33 @@ static void u3_phy_instance_init(struct mtk_tphy *tphy,
dev_dbg(tphy->dev, "%s(%d)\n", __func__, instance->index);
}
+static void u2_phy_pll_26m_set(struct mtk_tphy *tphy,
+ struct mtk_phy_instance *instance)
+{
+ struct u2phy_banks *u2_banks = &instance->u2_banks;
+ void __iomem *com = u2_banks->com;
+ u32 tmp;
+
+ if (!tphy->pdata->sw_pll_48m_to_26m)
+ return;
+
+ tmp = readl(com + U3P_USBPHYACR0);
+ tmp &= ~PA0_USB20_PLL_PREDIV;
+ tmp |= PA0_USB20_PLL_PREDIV_VAL(0);
+ writel(tmp, com + U3P_USBPHYACR0);
+
+ tmp = readl(com + U3P_USBPHYACR2);
+ tmp &= ~PA2_RG_U2PLL_BW;
+ tmp |= PA2_RG_U2PLL_BW_VAL(3);
+ writel(tmp, com + U3P_USBPHYACR2);
+
+ writel(P2R_RG_U2PLL_FBDIV_26M, com + U3P_U2PHYA_RESV);
+
+ tmp = readl(com + U3P_U2PHYA_RESV1);
+ tmp |= P2R_RG_U2PLL_FRA_EN | P2R_RG_U2PLL_REFCLK_SEL;
+ writel(tmp, com + U3P_U2PHYA_RESV1);
+}
+
static void u2_phy_instance_init(struct mtk_tphy *tphy,
struct mtk_phy_instance *instance)
{
@@ -509,6 +574,9 @@ static void u2_phy_instance_init(struct mtk_tphy *tphy,
tmp |= PA6_RG_U2_SQTH_VAL(2);
writel(tmp, com + U3P_USBPHYACR6);
+ /* Workaround only for mt8195, HW fix it for others (V3) */
+ u2_phy_pll_26m_set(tphy, instance);
+
dev_dbg(tphy->dev, "%s(%d)\n", __func__, index);
}
@@ -878,7 +946,7 @@ static void u2_phy_props_set(struct mtk_tphy *tphy,
writel(tmp, com + U3P_U2PHYBC12C);
}
- if (instance->eye_src) {
+ if (tphy->pdata->version < MTK_PHY_V3 && instance->eye_src) {
tmp = readl(com + U3P_USBPHYACR5);
tmp &= ~PA5_RG_U2_HSTX_SRCTRL;
tmp |= PA5_RG_U2_HSTX_SRCTRL_VAL(instance->eye_src);
@@ -914,24 +982,73 @@ static void u2_phy_props_set(struct mtk_tphy *tphy,
}
}
-static int mtk_phy_init(struct phy *phy)
+/* type switch for usb3/pcie/sgmii/sata */
+static int phy_type_syscon_get(struct mtk_phy_instance *instance,
+ struct device_node *dn)
{
- struct mtk_phy_instance *instance = phy_get_drvdata(phy);
- struct mtk_tphy *tphy = dev_get_drvdata(phy->dev.parent);
+ struct of_phandle_args args;
int ret;
- ret = clk_prepare_enable(instance->ref_clk);
- if (ret) {
- dev_err(tphy->dev, "failed to enable ref_clk\n");
+ /* type switch function is optional */
+ if (!of_property_read_bool(dn, "mediatek,syscon-type"))
+ return 0;
+
+ ret = of_parse_phandle_with_fixed_args(dn, "mediatek,syscon-type",
+ 2, 0, &args);
+ if (ret)
return ret;
+
+ instance->type_sw_reg = args.args[0];
+ instance->type_sw_index = args.args[1] & 0x3; /* <=3 */
+ instance->type_sw = syscon_node_to_regmap(args.np);
+ of_node_put(args.np);
+ dev_info(&instance->phy->dev, "type_sw - reg %#x, index %d\n",
+ instance->type_sw_reg, instance->type_sw_index);
+
+ return PTR_ERR_OR_ZERO(instance->type_sw);
+}
+
+static int phy_type_set(struct mtk_phy_instance *instance)
+{
+ int type;
+ u32 mask;
+
+ if (!instance->type_sw)
+ return 0;
+
+ switch (instance->type) {
+ case PHY_TYPE_USB3:
+ type = RG_PHY_SW_USB3;
+ break;
+ case PHY_TYPE_PCIE:
+ type = RG_PHY_SW_PCIE;
+ break;
+ case PHY_TYPE_SGMII:
+ type = RG_PHY_SW_SGMII;
+ break;
+ case PHY_TYPE_SATA:
+ type = RG_PHY_SW_SATA;
+ break;
+ case PHY_TYPE_USB2:
+ default:
+ return 0;
}
- ret = clk_prepare_enable(instance->da_ref_clk);
- if (ret) {
- dev_err(tphy->dev, "failed to enable da_ref\n");
- clk_disable_unprepare(instance->ref_clk);
+ mask = RG_PHY_SW_TYPE << (instance->type_sw_index * BITS_PER_BYTE);
+ regmap_update_bits(instance->type_sw, instance->type_sw_reg, mask, type);
+
+ return 0;
+}
+
+static int mtk_phy_init(struct phy *phy)
+{
+ struct mtk_phy_instance *instance = phy_get_drvdata(phy);
+ struct mtk_tphy *tphy = dev_get_drvdata(phy->dev.parent);
+ int ret;
+
+ ret = clk_bulk_prepare_enable(TPHY_CLKS_CNT, instance->clks);
+ if (ret)
return ret;
- }
switch (instance->type) {
case PHY_TYPE_USB2:
@@ -947,10 +1064,12 @@ static int mtk_phy_init(struct phy *phy)
case PHY_TYPE_SATA:
sata_phy_instance_init(tphy, instance);
break;
+ case PHY_TYPE_SGMII:
+ /* nothing to do, only used to set type */
+ break;
default:
dev_err(tphy->dev, "incompatible PHY type\n");
- clk_disable_unprepare(instance->ref_clk);
- clk_disable_unprepare(instance->da_ref_clk);
+ clk_bulk_disable_unprepare(TPHY_CLKS_CNT, instance->clks);
return -EINVAL;
}
@@ -993,8 +1112,7 @@ static int mtk_phy_exit(struct phy *phy)
if (instance->type == PHY_TYPE_USB2)
u2_phy_instance_exit(tphy, instance);
- clk_disable_unprepare(instance->ref_clk);
- clk_disable_unprepare(instance->da_ref_clk);
+ clk_bulk_disable_unprepare(TPHY_CLKS_CNT, instance->clks);
return 0;
}
@@ -1037,21 +1155,27 @@ static struct phy *mtk_phy_xlate(struct device *dev,
if (!(instance->type == PHY_TYPE_USB2 ||
instance->type == PHY_TYPE_USB3 ||
instance->type == PHY_TYPE_PCIE ||
- instance->type == PHY_TYPE_SATA)) {
+ instance->type == PHY_TYPE_SATA ||
+ instance->type == PHY_TYPE_SGMII)) {
dev_err(dev, "unsupported device type: %d\n", instance->type);
return ERR_PTR(-EINVAL);
}
- if (tphy->pdata->version == MTK_PHY_V1) {
+ switch (tphy->pdata->version) {
+ case MTK_PHY_V1:
phy_v1_banks_init(tphy, instance);
- } else if (tphy->pdata->version == MTK_PHY_V2) {
+ break;
+ case MTK_PHY_V2:
+ case MTK_PHY_V3:
phy_v2_banks_init(tphy, instance);
- } else {
+ break;
+ default:
dev_err(dev, "phy version is not supported\n");
return ERR_PTR(-EINVAL);
}
phy_parse_property(tphy, instance);
+ phy_type_set(instance);
return instance->phy;
}
@@ -1075,17 +1199,28 @@ static const struct mtk_phy_pdata tphy_v2_pdata = {
.version = MTK_PHY_V2,
};
+static const struct mtk_phy_pdata tphy_v3_pdata = {
+ .version = MTK_PHY_V3,
+};
+
static const struct mtk_phy_pdata mt8173_pdata = {
.avoid_rx_sen_degradation = true,
.version = MTK_PHY_V1,
};
+static const struct mtk_phy_pdata mt8195_pdata = {
+ .sw_pll_48m_to_26m = true,
+ .version = MTK_PHY_V3,
+};
+
static const struct of_device_id mtk_tphy_id_table[] = {
{ .compatible = "mediatek,mt2701-u3phy", .data = &tphy_v1_pdata },
{ .compatible = "mediatek,mt2712-u3phy", .data = &tphy_v2_pdata },
{ .compatible = "mediatek,mt8173-u3phy", .data = &mt8173_pdata },
+ { .compatible = "mediatek,mt8195-tphy", .data = &mt8195_pdata },
{ .compatible = "mediatek,generic-tphy-v1", .data = &tphy_v1_pdata },
{ .compatible = "mediatek,generic-tphy-v2", .data = &tphy_v2_pdata },
+ { .compatible = "mediatek,generic-tphy-v3", .data = &tphy_v3_pdata },
{ },
};
MODULE_DEVICE_TABLE(of, mtk_tphy_id_table);
@@ -1129,16 +1264,21 @@ static int mtk_tphy_probe(struct platform_device *pdev)
}
}
- tphy->src_ref_clk = U3P_REF_CLK;
- tphy->src_coef = U3P_SLEW_RATE_COEF;
- /* update parameters of slew rate calibrate if exist */
- device_property_read_u32(dev, "mediatek,src-ref-clk-mhz",
- &tphy->src_ref_clk);
- device_property_read_u32(dev, "mediatek,src-coef", &tphy->src_coef);
+ if (tphy->pdata->version < MTK_PHY_V3) {
+ tphy->src_ref_clk = U3P_REF_CLK;
+ tphy->src_coef = U3P_SLEW_RATE_COEF;
+ /* update parameters of slew rate calibrate if exist */
+ device_property_read_u32(dev, "mediatek,src-ref-clk-mhz",
+ &tphy->src_ref_clk);
+ device_property_read_u32(dev, "mediatek,src-coef",
+ &tphy->src_coef);
+ }
port = 0;
for_each_child_of_node(np, child_np) {
struct mtk_phy_instance *instance;
+ struct clk_bulk_data *clks;
+ struct device *subdev;
struct phy *phy;
instance = devm_kzalloc(dev, sizeof(*instance), GFP_KERNEL);
@@ -1156,16 +1296,16 @@ static int mtk_tphy_probe(struct platform_device *pdev)
goto put_child;
}
+ subdev = &phy->dev;
retval = of_address_to_resource(child_np, 0, &res);
if (retval) {
- dev_err(dev, "failed to get address resource(id-%d)\n",
+ dev_err(subdev, "failed to get address resource(id-%d)\n",
port);
goto put_child;
}
- instance->port_base = devm_ioremap_resource(&phy->dev, &res);
+ instance->port_base = devm_ioremap_resource(subdev, &res);
if (IS_ERR(instance->port_base)) {
- dev_err(dev, "failed to remap phy regs\n");
retval = PTR_ERR(instance->port_base);
goto put_child;
}
@@ -1175,20 +1315,16 @@ static int mtk_tphy_probe(struct platform_device *pdev)
phy_set_drvdata(phy, instance);
port++;
- instance->ref_clk = devm_clk_get_optional(&phy->dev, "ref");
- if (IS_ERR(instance->ref_clk)) {
- dev_err(dev, "failed to get ref_clk(id-%d)\n", port);
- retval = PTR_ERR(instance->ref_clk);
+ clks = instance->clks;
+ clks[0].id = "ref"; /* digital (& analog) clock */
+ clks[1].id = "da_ref"; /* analog clock */
+ retval = devm_clk_bulk_get_optional(subdev, TPHY_CLKS_CNT, clks);
+ if (retval)
goto put_child;
- }
- instance->da_ref_clk =
- devm_clk_get_optional(&phy->dev, "da_ref");
- if (IS_ERR(instance->da_ref_clk)) {
- dev_err(dev, "failed to get da_ref_clk(id-%d)\n", port);
- retval = PTR_ERR(instance->da_ref_clk);
+ retval = phy_type_syscon_get(instance, child_np);
+ if (retval)
goto put_child;
- }
}
provider = devm_of_phy_provider_register(dev, mtk_phy_xlate);
diff --git a/drivers/phy/mediatek/phy-mtk-ufs.c b/drivers/phy/mediatek/phy-mtk-ufs.c
index 769b00b038d8..a6af06941203 100644
--- a/drivers/phy/mediatek/phy-mtk-ufs.c
+++ b/drivers/phy/mediatek/phy-mtk-ufs.c
@@ -31,11 +31,12 @@
#define FRC_CDR_ISO_EN BIT(19)
#define CDR_ISO_EN BIT(20)
+#define UFSPHY_CLKS_CNT 2
+
struct ufs_mtk_phy {
struct device *dev;
void __iomem *mmio;
- struct clk *mp_clk;
- struct clk *unipro_clk;
+ struct clk_bulk_data clks[UFSPHY_CLKS_CNT];
};
static inline u32 mphy_readl(struct ufs_mtk_phy *phy, u32 reg)
@@ -74,20 +75,11 @@ static struct ufs_mtk_phy *get_ufs_mtk_phy(struct phy *generic_phy)
static int ufs_mtk_phy_clk_init(struct ufs_mtk_phy *phy)
{
struct device *dev = phy->dev;
+ struct clk_bulk_data *clks = phy->clks;
- phy->unipro_clk = devm_clk_get(dev, "unipro");
- if (IS_ERR(phy->unipro_clk)) {
- dev_err(dev, "failed to get clock: unipro");
- return PTR_ERR(phy->unipro_clk);
- }
-
- phy->mp_clk = devm_clk_get(dev, "mp");
- if (IS_ERR(phy->mp_clk)) {
- dev_err(dev, "failed to get clock: mp");
- return PTR_ERR(phy->mp_clk);
- }
-
- return 0;
+ clks[0].id = "unipro";
+ clks[1].id = "mp";
+ return devm_clk_bulk_get(dev, UFSPHY_CLKS_CNT, clks);
}
static void ufs_mtk_phy_set_active(struct ufs_mtk_phy *phy)
@@ -150,26 +142,13 @@ static int ufs_mtk_phy_power_on(struct phy *generic_phy)
struct ufs_mtk_phy *phy = get_ufs_mtk_phy(generic_phy);
int ret;
- ret = clk_prepare_enable(phy->unipro_clk);
- if (ret) {
- dev_err(phy->dev, "unipro_clk enable failed %d\n", ret);
- goto out;
- }
-
- ret = clk_prepare_enable(phy->mp_clk);
- if (ret) {
- dev_err(phy->dev, "mp_clk enable failed %d\n", ret);
- goto out_unprepare_unipro_clk;
- }
+ ret = clk_bulk_prepare_enable(UFSPHY_CLKS_CNT, phy->clks);
+ if (ret)
+ return ret;
ufs_mtk_phy_set_active(phy);
return 0;
-
-out_unprepare_unipro_clk:
- clk_disable_unprepare(phy->unipro_clk);
-out:
- return ret;
}
static int ufs_mtk_phy_power_off(struct phy *generic_phy)
@@ -178,8 +157,7 @@ static int ufs_mtk_phy_power_off(struct phy *generic_phy)
ufs_mtk_phy_set_deep_hibern(phy);
- clk_disable_unprepare(phy->unipro_clk);
- clk_disable_unprepare(phy->mp_clk);
+ clk_bulk_disable_unprepare(UFSPHY_CLKS_CNT, phy->clks);
return 0;
}
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.c b/drivers/phy/qualcomm/phy-qcom-qmp.c
index cfe359488f5c..f14032170b1c 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp.c
@@ -234,6 +234,11 @@ static const unsigned int sdm845_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = {
[QPHY_PCS_READY_STATUS] = 0x160,
};
+static const unsigned int sm6115_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = {
+ [QPHY_START_CTRL] = 0x00,
+ [QPHY_PCS_READY_STATUS] = 0x168,
+};
+
static const unsigned int sm8250_pcie_regs_layout[QPHY_LAYOUT_SIZE] = {
[QPHY_SW_RESET] = 0x00,
[QPHY_START_CTRL] = 0x44,
@@ -1329,6 +1334,97 @@ static const struct qmp_phy_init_tbl qmp_v3_usb3_uniphy_pcs_tbl[] = {
QMP_PHY_INIT_CFG(QPHY_V3_PCS_REFGEN_REQ_CONFIG2, 0x60),
};
+static const struct qmp_phy_init_tbl sm6115_ufsphy_serdes_tbl[] = {
+ QMP_PHY_INIT_CFG(QSERDES_COM_CMN_CONFIG, 0x0e),
+ QMP_PHY_INIT_CFG(QSERDES_COM_SYSCLK_EN_SEL, 0x14),
+ QMP_PHY_INIT_CFG(QSERDES_COM_CLK_SELECT, 0x30),
+ QMP_PHY_INIT_CFG(QSERDES_COM_SYS_CLK_CTRL, 0x02),
+ QMP_PHY_INIT_CFG(QSERDES_COM_BIAS_EN_CLKBUFLR_EN, 0x08),
+ QMP_PHY_INIT_CFG(QSERDES_COM_BG_TIMER, 0x0a),
+ QMP_PHY_INIT_CFG(QSERDES_COM_HSCLK_SEL, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_COM_CORECLK_DIV, 0x0a),
+ QMP_PHY_INIT_CFG(QSERDES_COM_CORECLK_DIV_MODE1, 0x0a),
+ QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP_EN, 0x01),
+ QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_CTRL, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_COM_RESETSM_CNTRL, 0x20),
+ QMP_PHY_INIT_CFG(QSERDES_COM_CORE_CLK_EN, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP_CFG, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_TIMER1, 0xff),
+ QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_TIMER2, 0x3f),
+ QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_MAP, 0x04),
+ QMP_PHY_INIT_CFG(QSERDES_COM_SVS_MODE_CLK_SEL, 0x05),
+ QMP_PHY_INIT_CFG(QSERDES_COM_DEC_START_MODE0, 0x82),
+ QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START1_MODE0, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START2_MODE0, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START3_MODE0, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_COM_CP_CTRL_MODE0, 0x0b),
+ QMP_PHY_INIT_CFG(QSERDES_COM_PLL_RCTRL_MODE0, 0x16),
+ QMP_PHY_INIT_CFG(QSERDES_COM_PLL_CCTRL_MODE0, 0x28),
+ QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_GAIN0_MODE0, 0x80),
+ QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_GAIN1_MODE0, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE1_MODE0, 0x28),
+ QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE2_MODE0, 0x02),
+ QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP1_MODE0, 0xff),
+ QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP2_MODE0, 0x0c),
+ QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP3_MODE0, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_COM_DEC_START_MODE1, 0x98),
+ QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START1_MODE1, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START2_MODE1, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START3_MODE1, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_COM_CP_CTRL_MODE1, 0x0b),
+ QMP_PHY_INIT_CFG(QSERDES_COM_PLL_RCTRL_MODE1, 0x16),
+ QMP_PHY_INIT_CFG(QSERDES_COM_PLL_CCTRL_MODE1, 0x28),
+ QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_GAIN0_MODE1, 0x80),
+ QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_GAIN1_MODE1, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE1_MODE1, 0xd6),
+ QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE2_MODE1, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP1_MODE1, 0x32),
+ QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP2_MODE1, 0x0f),
+ QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP3_MODE1, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_COM_PLL_IVCO, 0x0f),
+ QMP_PHY_INIT_CFG(QSERDES_COM_BG_TRIM, 0x0f),
+ QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_INITVAL1, 0xff),
+ QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_INITVAL2, 0x00),
+
+ /* Rate B */
+ QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_MAP, 0x44),
+};
+
+static const struct qmp_phy_init_tbl sm6115_ufsphy_tx_tbl[] = {
+ QMP_PHY_INIT_CFG(QSERDES_TX_HIGHZ_TRANSCEIVEREN_BIAS_DRVR_EN, 0x45),
+ QMP_PHY_INIT_CFG(QSERDES_TX_LANE_MODE, 0x06),
+};
+
+static const struct qmp_phy_init_tbl sm6115_ufsphy_rx_tbl[] = {
+ QMP_PHY_INIT_CFG(QSERDES_RX_SIGDET_LVL, 0x24),
+ QMP_PHY_INIT_CFG(QSERDES_RX_SIGDET_CNTRL, 0x0F),
+ QMP_PHY_INIT_CFG(QSERDES_RX_RX_INTERFACE_MODE, 0x40),
+ QMP_PHY_INIT_CFG(QSERDES_RX_SIGDET_DEGLITCH_CNTRL, 0x1E),
+ QMP_PHY_INIT_CFG(QSERDES_RX_UCDR_FASTLOCK_FO_GAIN, 0x0B),
+ QMP_PHY_INIT_CFG(QSERDES_RX_RX_TERM_BW, 0x5B),
+ QMP_PHY_INIT_CFG(QSERDES_RX_RX_EQ_GAIN1_LSB, 0xFF),
+ QMP_PHY_INIT_CFG(QSERDES_RX_RX_EQ_GAIN1_MSB, 0x3F),
+ QMP_PHY_INIT_CFG(QSERDES_RX_RX_EQ_GAIN2_LSB, 0xFF),
+ QMP_PHY_INIT_CFG(QSERDES_RX_RX_EQ_GAIN2_MSB, 0x3F),
+ QMP_PHY_INIT_CFG(QSERDES_RX_RX_EQU_ADAPTOR_CNTRL2, 0x0D),
+ QMP_PHY_INIT_CFG(QSERDES_RX_UCDR_SVS_SO_GAIN_HALF, 0x04),
+ QMP_PHY_INIT_CFG(QSERDES_RX_UCDR_SVS_SO_GAIN_QUARTER, 0x04),
+ QMP_PHY_INIT_CFG(QSERDES_RX_UCDR_SVS_SO_GAIN, 0x04),
+ QMP_PHY_INIT_CFG(QSERDES_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x5B),
+};
+
+static const struct qmp_phy_init_tbl sm6115_ufsphy_pcs_tbl[] = {
+ QMP_PHY_INIT_CFG(QPHY_RX_PWM_GEAR_BAND, 0x15),
+ QMP_PHY_INIT_CFG(QPHY_RX_SIGDET_CTRL2, 0x6d),
+ QMP_PHY_INIT_CFG(QPHY_TX_LARGE_AMP_DRV_LVL, 0x0f),
+ QMP_PHY_INIT_CFG(QPHY_TX_SMALL_AMP_DRV_LVL, 0x02),
+ QMP_PHY_INIT_CFG(QPHY_RX_MIN_STALL_NOCONFIG_TIME_CAP, 0x28),
+ QMP_PHY_INIT_CFG(QPHY_RX_SYM_RESYNC_CTRL, 0x03),
+ QMP_PHY_INIT_CFG(QPHY_TX_LARGE_AMP_POST_EMP_LVL, 0x12),
+ QMP_PHY_INIT_CFG(QPHY_TX_SMALL_AMP_POST_EMP_LVL, 0x0f),
+ QMP_PHY_INIT_CFG(QPHY_RX_MIN_HIBERN8_TIME, 0x9a), /* 8 us */
+};
+
static const struct qmp_phy_init_tbl sdm845_ufsphy_serdes_tbl[] = {
QMP_PHY_INIT_CFG(QSERDES_V3_COM_SYS_CLK_CTRL, 0x02),
QMP_PHY_INIT_CFG(QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN, 0x04),
@@ -2035,6 +2131,113 @@ static const struct qmp_phy_init_tbl qmp_v4_dp_tx_tbl[] = {
QMP_PHY_INIT_CFG(QSERDES_V4_TX_TX_EMP_POST1_LVL, 0x20),
};
+static const struct qmp_phy_init_tbl sc8180x_qmp_pcie_serdes_tbl[] = {
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_SYSCLK_EN_SEL, 0x08),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_CLK_SELECT, 0x34),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_CORECLK_DIV_MODE1, 0x08),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_PLL_IVCO, 0x0f),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_LOCK_CMP_EN, 0x42),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_VCO_TUNE1_MODE0, 0x24),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_VCO_TUNE2_MODE1, 0x03),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_VCO_TUNE1_MODE1, 0xb4),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_VCO_TUNE_MAP, 0x02),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_BIN_VCOCAL_HSCLK_SEL, 0x11),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_DEC_START_MODE0, 0x82),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_DIV_FRAC_START3_MODE0, 0x03),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_DIV_FRAC_START2_MODE0, 0x55),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_DIV_FRAC_START1_MODE0, 0x55),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_LOCK_CMP2_MODE0, 0x1a),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_LOCK_CMP1_MODE0, 0x0a),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_DEC_START_MODE1, 0x68),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_DIV_FRAC_START3_MODE1, 0x02),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_DIV_FRAC_START2_MODE1, 0xaa),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_DIV_FRAC_START1_MODE1, 0xab),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_LOCK_CMP2_MODE1, 0x34),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_LOCK_CMP1_MODE1, 0x14),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_HSCLK_SEL, 0x01),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_CP_CTRL_MODE0, 0x06),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_PLL_RCTRL_MODE0, 0x16),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_PLL_CCTRL_MODE0, 0x36),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_CP_CTRL_MODE1, 0x06),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_PLL_RCTRL_MODE1, 0x16),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_PLL_CCTRL_MODE1, 0x36),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x1e),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0xca),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_BIN_VCOCAL_CMP_CODE2_MODE1, 0x18),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_BIN_VCOCAL_CMP_CODE1_MODE1, 0xa2),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_SYSCLK_BUF_ENABLE, 0x07),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_SSC_EN_CENTER, 0x01),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_SSC_PER1, 0x31),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_SSC_PER2, 0x01),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_SSC_STEP_SIZE1_MODE0, 0xde),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_SSC_STEP_SIZE2_MODE0, 0x07),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_SSC_STEP_SIZE1_MODE1, 0x4c),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_SSC_STEP_SIZE2_MODE1, 0x06),
+ QMP_PHY_INIT_CFG(QSERDES_V4_COM_CLK_ENABLE1, 0x90),
+};
+
+static const struct qmp_phy_init_tbl sc8180x_qmp_pcie_tx_tbl[] = {
+ QMP_PHY_INIT_CFG(QSERDES_V4_TX_RCV_DETECT_LVL_2, 0x12),
+ QMP_PHY_INIT_CFG(QSERDES_V4_TX_LANE_MODE_1, 0x5),
+};
+
+static const struct qmp_phy_init_tbl sc8180x_qmp_pcie_rx_tbl[] = {
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_SIGDET_CNTRL, 0x03),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_SIGDET_ENABLES, 0x1c),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_SIGDET_DEGLITCH_CNTRL, 0x14),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_EQU_ADAPTOR_CNTRL1, 0x07),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_EQU_ADAPTOR_CNTRL2, 0x6e),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_EQU_ADAPTOR_CNTRL3, 0x6e),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_EQU_ADAPTOR_CNTRL4, 0x4a),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_DFE_EN_TIMER, 0x04),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x7f),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_PI_CONTROLS, 0x70),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x17),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_VGA_CAL_CNTRL1, 0x54),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_VGA_CAL_CNTRL2, 0x37),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_10_LOW, 0xd4),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_10_HIGH, 0x54),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_10_HIGH2, 0xdb),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_10_HIGH3, 0x39),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_10_HIGH4, 0x31),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_01_LOW, 0x24),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_01_HIGH, 0xe4),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_01_HIGH2, 0xec),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_01_HIGH3, 0x39),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_01_HIGH4, 0x36),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_00_LOW, 0x7f),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_00_HIGH, 0xff),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_00_HIGH2, 0xff),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_00_HIGH3, 0xdb),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_00_HIGH4, 0x75),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_IDAC_TSETTLE_HIGH, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_IDAC_TSETTLE_LOW, 0xc0),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_AUX_DATA_TCOARSE_TFINE, 0xa0),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_RCLK_AUXDATA_SEL, 0xc0),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_DCC_CTRL1, 0x0c),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_GM_CAL, 0x05),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_FO_GAIN, 0x0c),
+ QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_SO_GAIN, 0x03),
+};
+
+static const struct qmp_phy_init_tbl sc8180x_qmp_pcie_pcs_tbl[] = {
+ QMP_PHY_INIT_CFG(QPHY_V4_PCS_P2U3_WAKEUP_DLY_TIME_AUXCLK_L, 0x01),
+ QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_SIGDET_LVL, 0xaa),
+ QMP_PHY_INIT_CFG(QPHY_V4_PCS_RATE_SLEW_CNTRL1, 0x0b),
+ QMP_PHY_INIT_CFG(QPHY_V4_PCS_REFGEN_REQ_CONFIG1, 0x0d),
+ QMP_PHY_INIT_CFG(QPHY_V4_PCS_EQ_CONFIG5, 0x01),
+};
+
+static const struct qmp_phy_init_tbl sc8180x_qmp_pcie_pcs_misc_tbl[] = {
+ QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00),
+ QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_L1P1_WAKEUP_DLY_TIME_AUXCLK_L, 0x01),
+ QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_L1P2_WAKEUP_DLY_TIME_AUXCLK_L, 0x01),
+ QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_INT_AUX_CLK_CONFIG1, 0x00),
+ QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_PRESET_P10_PRE, 0x00),
+ QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_PRESET_P10_POST, 0x58),
+ QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1),
+};
+
static const struct qmp_phy_init_tbl sm8250_qmp_pcie_serdes_tbl[] = {
QMP_PHY_INIT_CFG(QSERDES_V4_COM_SYSCLK_EN_SEL, 0x08),
QMP_PHY_INIT_CFG(QSERDES_V4_COM_CLK_SELECT, 0x34),
@@ -3289,6 +3492,31 @@ static const struct qmp_phy_cfg sdm845_ufsphy_cfg = {
.no_pcs_sw_reset = true,
};
+static const struct qmp_phy_cfg sm6115_ufsphy_cfg = {
+ .type = PHY_TYPE_UFS,
+ .nlanes = 1,
+
+ .serdes_tbl = sm6115_ufsphy_serdes_tbl,
+ .serdes_tbl_num = ARRAY_SIZE(sm6115_ufsphy_serdes_tbl),
+ .tx_tbl = sm6115_ufsphy_tx_tbl,
+ .tx_tbl_num = ARRAY_SIZE(sm6115_ufsphy_tx_tbl),
+ .rx_tbl = sm6115_ufsphy_rx_tbl,
+ .rx_tbl_num = ARRAY_SIZE(sm6115_ufsphy_rx_tbl),
+ .pcs_tbl = sm6115_ufsphy_pcs_tbl,
+ .pcs_tbl_num = ARRAY_SIZE(sm6115_ufsphy_pcs_tbl),
+ .clk_list = sdm845_ufs_phy_clk_l,
+ .num_clks = ARRAY_SIZE(sdm845_ufs_phy_clk_l),
+ .vreg_list = qmp_phy_vreg_l,
+ .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l),
+ .regs = sm6115_ufsphy_regs_layout,
+
+ .start_ctrl = SERDES_START,
+ .pwrdn_ctrl = SW_PWRDN,
+
+ .is_dual_lane_phy = false,
+ .no_pcs_sw_reset = true,
+};
+
static const struct qmp_phy_cfg msm8998_pciephy_cfg = {
.type = PHY_TYPE_PCIE,
.nlanes = 1,
@@ -3399,6 +3627,76 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
.is_dual_lane_phy = true,
};
+static const struct qmp_phy_cfg sc8180x_pciephy_cfg = {
+ .type = PHY_TYPE_PCIE,
+ .nlanes = 1,
+
+ .serdes_tbl = sc8180x_qmp_pcie_serdes_tbl,
+ .serdes_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl),
+ .tx_tbl = sc8180x_qmp_pcie_tx_tbl,
+ .tx_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_tx_tbl),
+ .rx_tbl = sc8180x_qmp_pcie_rx_tbl,
+ .rx_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_rx_tbl),
+ .pcs_tbl = sc8180x_qmp_pcie_pcs_tbl,
+ .pcs_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_pcs_tbl),
+ .pcs_misc_tbl = sc8180x_qmp_pcie_pcs_misc_tbl,
+ .pcs_misc_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_pcs_misc_tbl),
+ .clk_list = sdm845_pciephy_clk_l,
+ .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l),
+ .reset_list = sdm845_pciephy_reset_l,
+ .num_resets = ARRAY_SIZE(sdm845_pciephy_reset_l),
+ .vreg_list = qmp_phy_vreg_l,
+ .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l),
+ .regs = sm8250_pcie_regs_layout,
+
+ .start_ctrl = PCS_START | SERDES_START,
+ .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL,
+
+ .has_pwrdn_delay = true,
+ .pwrdn_delay_min = 995, /* us */
+ .pwrdn_delay_max = 1005, /* us */
+};
+
+static const struct qmp_phy_cfg sc8180x_dpphy_cfg = {
+ .type = PHY_TYPE_DP,
+ .nlanes = 1,
+
+ .serdes_tbl = qmp_v4_dp_serdes_tbl,
+ .serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl),
+ .tx_tbl = qmp_v4_dp_tx_tbl,
+ .tx_tbl_num = ARRAY_SIZE(qmp_v4_dp_tx_tbl),
+
+ .serdes_tbl_rbr = qmp_v4_dp_serdes_tbl_rbr,
+ .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl_rbr),
+ .serdes_tbl_hbr = qmp_v4_dp_serdes_tbl_hbr,
+ .serdes_tbl_hbr_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl_hbr),
+ .serdes_tbl_hbr2 = qmp_v4_dp_serdes_tbl_hbr2,
+ .serdes_tbl_hbr2_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl_hbr2),
+ .serdes_tbl_hbr3 = qmp_v4_dp_serdes_tbl_hbr3,
+ .serdes_tbl_hbr3_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl_hbr3),
+
+ .clk_list = qmp_v3_phy_clk_l,
+ .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l),
+ .reset_list = sc7180_usb3phy_reset_l,
+ .num_resets = ARRAY_SIZE(sc7180_usb3phy_reset_l),
+ .vreg_list = qmp_phy_vreg_l,
+ .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l),
+ .regs = qmp_v3_usb3phy_regs_layout,
+
+ .has_phy_dp_com_ctrl = true,
+ .is_dual_lane_phy = true,
+
+ .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init,
+ .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx,
+ .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy,
+ .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate,
+};
+
+static const struct qmp_phy_combo_cfg sc8180x_usb3dpphy_cfg = {
+ .usb_cfg = &sm8150_usb3phy_cfg,
+ .dp_cfg = &sc8180x_dpphy_cfg,
+};
+
static const struct qmp_phy_cfg sm8150_usb3_uniphy_cfg = {
.type = PHY_TYPE_USB3,
.nlanes = 1,
@@ -5018,6 +5316,7 @@ static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy,
{
struct clk_init_data init = { };
struct qmp_phy_dp_clks *dp_clks;
+ char name[64];
int ret;
dp_clks = devm_kzalloc(qmp->dev, sizeof(*dp_clks), GFP_KERNEL);
@@ -5027,15 +5326,17 @@ static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy,
dp_clks->qphy = qphy;
qphy->dp_clks = dp_clks;
+ snprintf(name, sizeof(name), "%s::link_clk", dev_name(qmp->dev));
init.ops = &qcom_qmp_dp_link_clk_ops;
- init.name = "qmp_dp_phy_pll_link_clk";
+ init.name = name;
dp_clks->dp_link_hw.init = &init;
ret = devm_clk_hw_register(qmp->dev, &dp_clks->dp_link_hw);
if (ret)
return ret;
+ snprintf(name, sizeof(name), "%s::vco_div_clk", dev_name(qmp->dev));
init.ops = &qcom_qmp_dp_pixel_clk_ops;
- init.name = "qmp_dp_phy_pll_vco_div_clk";
+ init.name = name;
dp_clks->dp_pixel_hw.init = &init;
ret = devm_clk_hw_register(qmp->dev, &dp_clks->dp_pixel_hw);
if (ret)
@@ -5226,18 +5527,27 @@ static const struct of_device_id qcom_qmp_phy_of_match_table[] = {
.compatible = "qcom,ipq6018-qmp-pcie-phy",
.data = &ipq6018_pciephy_cfg,
}, {
+ .compatible = "qcom,ipq6018-qmp-usb3-phy",
+ .data = &ipq8074_usb3phy_cfg,
+ }, {
.compatible = "qcom,sc7180-qmp-usb3-phy",
.data = &sc7180_usb3phy_cfg,
}, {
.compatible = "qcom,sc7180-qmp-usb3-dp-phy",
/* It's a combo phy */
}, {
+ .compatible = "qcom,sc8180x-qmp-pcie-phy",
+ .data = &sc8180x_pciephy_cfg,
+ }, {
.compatible = "qcom,sc8180x-qmp-ufs-phy",
.data = &sm8150_ufsphy_cfg,
}, {
.compatible = "qcom,sc8180x-qmp-usb3-phy",
.data = &sm8150_usb3phy_cfg,
}, {
+ .compatible = "qcom,sc8180x-qmp-usb3-dp-phy",
+ /* It's a combo phy */
+ }, {
.compatible = "qcom,sdm845-qhp-pcie-phy",
.data = &sdm845_qhp_pciephy_cfg,
}, {
@@ -5256,6 +5566,9 @@ static const struct of_device_id qcom_qmp_phy_of_match_table[] = {
.compatible = "qcom,msm8998-qmp-usb3-phy",
.data = &msm8998_usb3phy_cfg,
}, {
+ .compatible = "qcom,sm6115-qmp-ufs-phy",
+ .data = &sm6115_ufsphy_cfg,
+ }, {
.compatible = "qcom,sm8150-qmp-ufs-phy",
.data = &sm8150_ufsphy_cfg,
}, {
@@ -5314,6 +5627,10 @@ static const struct of_device_id qcom_qmp_combo_phy_of_match_table[] = {
.compatible = "qcom,sm8250-qmp-usb3-dp-phy",
.data = &sm8250_usb3dpphy_cfg,
},
+ {
+ .compatible = "qcom,sc8180x-qmp-usb3-dp-phy",
+ .data = &sc8180x_usb3dpphy_cfg,
+ },
{ }
};
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h
index 6592b58b13f6..bebeac2c091c 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp.h
+++ b/drivers/phy/qualcomm/phy-qcom-qmp.h
@@ -191,6 +191,8 @@
#define QSERDES_COM_VCO_TUNE2_MODE0 0x130
#define QSERDES_COM_VCO_TUNE1_MODE1 0x134
#define QSERDES_COM_VCO_TUNE2_MODE1 0x138
+#define QSERDES_COM_VCO_TUNE_INITVAL1 0x13c
+#define QSERDES_COM_VCO_TUNE_INITVAL2 0x140
#define QSERDES_COM_VCO_TUNE_TIMER1 0x144
#define QSERDES_COM_VCO_TUNE_TIMER2 0x148
#define QSERDES_COM_BG_CTRL 0x170
@@ -220,6 +222,10 @@
/* Only for QMP V2 PHY - RX registers */
#define QSERDES_RX_UCDR_SO_GAIN_HALF 0x010
#define QSERDES_RX_UCDR_SO_GAIN 0x01c
+#define QSERDES_RX_UCDR_SVS_SO_GAIN_HALF 0x030
+#define QSERDES_RX_UCDR_SVS_SO_GAIN_QUARTER 0x034
+#define QSERDES_RX_UCDR_SVS_SO_GAIN_EIGHTH 0x038
+#define QSERDES_RX_UCDR_SVS_SO_GAIN 0x03c
#define QSERDES_RX_UCDR_FASTLOCK_FO_GAIN 0x040
#define QSERDES_RX_UCDR_SO_SATURATION_AND_ENABLE 0x048
#define QSERDES_RX_RX_TERM_BW 0x090
@@ -243,6 +249,10 @@
#define QPHY_POWER_DOWN_CONTROL 0x04
#define QPHY_TXDEEMPH_M6DB_V0 0x24
#define QPHY_TXDEEMPH_M3P5DB_V0 0x28
+#define QPHY_TX_LARGE_AMP_DRV_LVL 0x34
+#define QPHY_TX_LARGE_AMP_POST_EMP_LVL 0x38
+#define QPHY_TX_SMALL_AMP_DRV_LVL 0x3c
+#define QPHY_TX_SMALL_AMP_POST_EMP_LVL 0x40
#define QPHY_ENDPOINT_REFCLK_DRIVE 0x54
#define QPHY_RX_IDLE_DTCT_CNTRL 0x58
#define QPHY_POWER_STATE_CONFIG1 0x60
@@ -253,6 +263,11 @@
#define QPHY_LOCK_DETECT_CONFIG3 0x88
#define QPHY_PWRUP_RESET_DLY_TIME_AUXCLK 0xa0
#define QPHY_LP_WAKEUP_DLY_TIME_AUXCLK 0xa4
+#define QPHY_RX_MIN_STALL_NOCONFIG_TIME_CAP 0xcc
+#define QPHY_RX_SYM_RESYNC_CTRL 0x13c
+#define QPHY_RX_MIN_HIBERN8_TIME 0x140
+#define QPHY_RX_SIGDET_CTRL2 0x148
+#define QPHY_RX_PWM_GEAR_BAND 0x154
#define QPHY_PLL_LOCK_CHK_DLY_TIME_AUXCLK_LSB 0x1A8
#define QPHY_OSC_DTCT_ACTIONS 0x1AC
#define QPHY_RX_SIGDET_LVL 0x1D8
@@ -280,6 +295,8 @@
#define QSERDES_V3_COM_SSC_PER2 0x020
#define QSERDES_V3_COM_SSC_STEP_SIZE1 0x024
#define QSERDES_V3_COM_SSC_STEP_SIZE2 0x028
+#define QSERDES_V3_COM_POST_DIV 0x02c
+#define QSERDES_V3_COM_POST_DIV_MUX 0x030
#define QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN 0x034
# define QSERDES_V3_COM_BIAS_EN 0x0001
# define QSERDES_V3_COM_BIAS_EN_MUX 0x0002
@@ -291,6 +308,7 @@
#define QSERDES_V3_COM_CLK_ENABLE1 0x038
#define QSERDES_V3_COM_SYS_CLK_CTRL 0x03c
#define QSERDES_V3_COM_SYSCLK_BUF_ENABLE 0x040
+#define QSERDES_V3_COM_PLL_EN 0x044
#define QSERDES_V3_COM_PLL_IVCO 0x048
#define QSERDES_V3_COM_LOCK_CMP1_MODE0 0x098
#define QSERDES_V3_COM_LOCK_CMP2_MODE0 0x09c
diff --git a/drivers/phy/qualcomm/phy-qcom-usb-hs.c b/drivers/phy/qualcomm/phy-qcom-usb-hs.c
index 5c6c17673396..53e46c220a3a 100644
--- a/drivers/phy/qualcomm/phy-qcom-usb-hs.c
+++ b/drivers/phy/qualcomm/phy-qcom-usb-hs.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/**
+/*
* Copyright (C) 2016 Linaro Ltd
*/
#include <linux/module.h>
diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c
index fbc55232120e..9de617ca9daa 100644
--- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c
+++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c
@@ -64,6 +64,7 @@
/* VBCTRL */
#define USB2_VBCTRL_OCCLREN BIT(16)
#define USB2_VBCTRL_DRVVBUSSEL BIT(8)
+#define USB2_VBCTRL_VBOUT BIT(0)
/* LINECTRL1 */
#define USB2_LINECTRL1_DPRPD_EN BIT(19)
@@ -78,6 +79,10 @@
#define USB2_ADPCTRL_IDPULLUP BIT(5) /* 1 = ID sampling is enabled */
#define USB2_ADPCTRL_DRVVBUS BIT(4)
+/* RZ/G2L specific */
+#define USB2_OBINT_IDCHG_EN BIT(0)
+#define USB2_LINECTRL1_USB2_IDMON BIT(0)
+
#define NUM_OF_PHYS 4
enum rcar_gen3_phy_index {
PHY_INDEX_BOTH_HC,
@@ -112,9 +117,16 @@ struct rcar_gen3_chan {
struct mutex lock; /* protects rphys[...].powered */
enum usb_dr_mode dr_mode;
int irq;
+ u32 obint_enable_bits;
bool extcon_host;
bool is_otg_channel;
bool uses_otg_pins;
+ bool soc_no_adp_ctrl;
+};
+
+struct rcar_gen3_phy_drv_data {
+ const struct phy_ops *phy_usb2_ops;
+ bool no_adp_ctrl;
};
/*
@@ -172,14 +184,22 @@ static void rcar_gen3_set_linectrl(struct rcar_gen3_chan *ch, int dp, int dm)
static void rcar_gen3_enable_vbus_ctrl(struct rcar_gen3_chan *ch, int vbus)
{
void __iomem *usb2_base = ch->base;
- u32 val = readl(usb2_base + USB2_ADPCTRL);
+ u32 vbus_ctrl_reg = USB2_ADPCTRL;
+ u32 vbus_ctrl_val = USB2_ADPCTRL_DRVVBUS;
+ u32 val;
dev_vdbg(ch->dev, "%s: %08x, %d\n", __func__, val, vbus);
+ if (ch->soc_no_adp_ctrl) {
+ vbus_ctrl_reg = USB2_VBCTRL;
+ vbus_ctrl_val = USB2_VBCTRL_VBOUT;
+ }
+
+ val = readl(usb2_base + vbus_ctrl_reg);
if (vbus)
- val |= USB2_ADPCTRL_DRVVBUS;
+ val |= vbus_ctrl_val;
else
- val &= ~USB2_ADPCTRL_DRVVBUS;
- writel(val, usb2_base + USB2_ADPCTRL);
+ val &= ~vbus_ctrl_val;
+ writel(val, usb2_base + vbus_ctrl_reg);
}
static void rcar_gen3_control_otg_irq(struct rcar_gen3_chan *ch, int enable)
@@ -188,9 +208,9 @@ static void rcar_gen3_control_otg_irq(struct rcar_gen3_chan *ch, int enable)
u32 val = readl(usb2_base + USB2_OBINTEN);
if (ch->uses_otg_pins && enable)
- val |= USB2_OBINT_BITS;
+ val |= ch->obint_enable_bits;
else
- val &= ~USB2_OBINT_BITS;
+ val &= ~ch->obint_enable_bits;
writel(val, usb2_base + USB2_OBINTEN);
}
@@ -252,6 +272,9 @@ static bool rcar_gen3_check_id(struct rcar_gen3_chan *ch)
if (!ch->uses_otg_pins)
return (ch->dr_mode == USB_DR_MODE_HOST) ? false : true;
+ if (ch->soc_no_adp_ctrl)
+ return !!(readl(ch->base + USB2_LINECTRL1) & USB2_LINECTRL1_USB2_IDMON);
+
return !!(readl(ch->base + USB2_ADPCTRL) & USB2_ADPCTRL_IDDIG);
}
@@ -376,16 +399,17 @@ static void rcar_gen3_init_otg(struct rcar_gen3_chan *ch)
USB2_LINECTRL1_DMRPD_EN | USB2_LINECTRL1_DM_RPD;
writel(val, usb2_base + USB2_LINECTRL1);
- val = readl(usb2_base + USB2_VBCTRL);
- val &= ~USB2_VBCTRL_OCCLREN;
- writel(val | USB2_VBCTRL_DRVVBUSSEL, usb2_base + USB2_VBCTRL);
- val = readl(usb2_base + USB2_ADPCTRL);
- writel(val | USB2_ADPCTRL_IDPULLUP, usb2_base + USB2_ADPCTRL);
-
+ if (!ch->soc_no_adp_ctrl) {
+ val = readl(usb2_base + USB2_VBCTRL);
+ val &= ~USB2_VBCTRL_OCCLREN;
+ writel(val | USB2_VBCTRL_DRVVBUSSEL, usb2_base + USB2_VBCTRL);
+ val = readl(usb2_base + USB2_ADPCTRL);
+ writel(val | USB2_ADPCTRL_IDPULLUP, usb2_base + USB2_ADPCTRL);
+ }
msleep(20);
writel(0xffffffff, usb2_base + USB2_OBINTSTA);
- writel(USB2_OBINT_BITS, usb2_base + USB2_OBINTEN);
+ writel(ch->obint_enable_bits, usb2_base + USB2_OBINTEN);
rcar_gen3_device_recognition(ch);
}
@@ -397,9 +421,9 @@ static irqreturn_t rcar_gen3_phy_usb2_irq(int irq, void *_ch)
u32 status = readl(usb2_base + USB2_OBINTSTA);
irqreturn_t ret = IRQ_NONE;
- if (status & USB2_OBINT_BITS) {
+ if (status & ch->obint_enable_bits) {
dev_vdbg(ch->dev, "%s: %08x\n", __func__, status);
- writel(USB2_OBINT_BITS, usb2_base + USB2_OBINTSTA);
+ writel(ch->obint_enable_bits, usb2_base + USB2_OBINTSTA);
rcar_gen3_device_recognition(ch);
ret = IRQ_HANDLED;
}
@@ -535,26 +559,45 @@ static const struct phy_ops rz_g1c_phy_usb2_ops = {
.owner = THIS_MODULE,
};
+static const struct rcar_gen3_phy_drv_data rcar_gen3_phy_usb2_data = {
+ .phy_usb2_ops = &rcar_gen3_phy_usb2_ops,
+ .no_adp_ctrl = false,
+};
+
+static const struct rcar_gen3_phy_drv_data rz_g1c_phy_usb2_data = {
+ .phy_usb2_ops = &rz_g1c_phy_usb2_ops,
+ .no_adp_ctrl = false,
+};
+
+static const struct rcar_gen3_phy_drv_data rz_g2l_phy_usb2_data = {
+ .phy_usb2_ops = &rcar_gen3_phy_usb2_ops,
+ .no_adp_ctrl = true,
+};
+
static const struct of_device_id rcar_gen3_phy_usb2_match_table[] = {
{
.compatible = "renesas,usb2-phy-r8a77470",
- .data = &rz_g1c_phy_usb2_ops,
+ .data = &rz_g1c_phy_usb2_data,
},
{
.compatible = "renesas,usb2-phy-r8a7795",
- .data = &rcar_gen3_phy_usb2_ops,
+ .data = &rcar_gen3_phy_usb2_data,
},
{
.compatible = "renesas,usb2-phy-r8a7796",
- .data = &rcar_gen3_phy_usb2_ops,
+ .data = &rcar_gen3_phy_usb2_data,
},
{
.compatible = "renesas,usb2-phy-r8a77965",
- .data = &rcar_gen3_phy_usb2_ops,
+ .data = &rcar_gen3_phy_usb2_data,
+ },
+ {
+ .compatible = "renesas,rzg2l-usb2-phy",
+ .data = &rz_g2l_phy_usb2_data,
},
{
.compatible = "renesas,rcar-gen3-usb2-phy",
- .data = &rcar_gen3_phy_usb2_ops,
+ .data = &rcar_gen3_phy_usb2_data,
},
{ /* sentinel */ },
};
@@ -608,10 +651,10 @@ static enum usb_dr_mode rcar_gen3_get_dr_mode(struct device_node *np)
static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
{
+ const struct rcar_gen3_phy_drv_data *phy_data;
struct device *dev = &pdev->dev;
struct rcar_gen3_chan *channel;
struct phy_provider *provider;
- const struct phy_ops *phy_usb2_ops;
int ret = 0, i;
if (!dev->of_node) {
@@ -627,6 +670,7 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
if (IS_ERR(channel->base))
return PTR_ERR(channel->base);
+ channel->obint_enable_bits = USB2_OBINT_BITS;
/* get irq number here and request_irq for OTG in phy_init */
channel->irq = platform_get_irq_optional(pdev, 0);
channel->dr_mode = rcar_gen3_get_dr_mode(dev->of_node);
@@ -653,16 +697,21 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
* And then, phy-core will manage runtime pm for this device.
*/
pm_runtime_enable(dev);
- phy_usb2_ops = of_device_get_match_data(dev);
- if (!phy_usb2_ops) {
+
+ phy_data = of_device_get_match_data(dev);
+ if (!phy_data) {
ret = -EINVAL;
goto error;
}
+ channel->soc_no_adp_ctrl = phy_data->no_adp_ctrl;
+ if (phy_data->no_adp_ctrl)
+ channel->obint_enable_bits = USB2_OBINT_IDCHG_EN;
+
mutex_init(&channel->lock);
for (i = 0; i < NUM_OF_PHYS; i++) {
channel->rphys[i].phy = devm_phy_create(dev, NULL,
- phy_usb2_ops);
+ phy_data->phy_usb2_ops);
if (IS_ERR(channel->rphys[i].phy)) {
dev_err(dev, "Failed to create USB2 PHY\n");
ret = PTR_ERR(channel->rphys[i].phy);
diff --git a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
index beacac1dd253..4f569d9307b9 100644
--- a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
+++ b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
@@ -1180,8 +1180,10 @@ static int rockchip_usb2phy_probe(struct platform_device *pdev)
next_child:
/* to prevent out of boundary */
- if (++index >= rphy->phy_cfg->num_ports)
+ if (++index >= rphy->phy_cfg->num_ports) {
+ of_node_put(child_np);
break;
+ }
}
provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
diff --git a/drivers/phy/samsung/Makefile b/drivers/phy/samsung/Makefile
index 3959100fe8a2..65e4cc59403f 100644
--- a/drivers/phy/samsung/Makefile
+++ b/drivers/phy/samsung/Makefile
@@ -2,7 +2,10 @@
obj-$(CONFIG_PHY_EXYNOS_DP_VIDEO) += phy-exynos-dp-video.o
obj-$(CONFIG_PHY_EXYNOS_MIPI_VIDEO) += phy-exynos-mipi-video.o
obj-$(CONFIG_PHY_EXYNOS_PCIE) += phy-exynos-pcie.o
-obj-$(CONFIG_PHY_SAMSUNG_UFS) += phy-samsung-ufs.o
+obj-$(CONFIG_PHY_SAMSUNG_UFS) += phy-exynos-ufs.o
+phy-exynos-ufs-y += phy-samsung-ufs.o
+phy-exynos-ufs-y += phy-exynos7-ufs.o
+phy-exynos-ufs-y += phy-exynosautov9-ufs.o
obj-$(CONFIG_PHY_SAMSUNG_USB2) += phy-exynos-usb2.o
phy-exynos-usb2-y += phy-samsung-usb2.o
phy-exynos-usb2-$(CONFIG_PHY_EXYNOS4210_USB2) += phy-exynos4210-usb2.o
diff --git a/drivers/phy/samsung/phy-exynos7-ufs.h b/drivers/phy/samsung/phy-exynos7-ufs.c
index 518923141958..7c9008e163db 100644
--- a/drivers/phy/samsung/phy-exynos7-ufs.h
+++ b/drivers/phy/samsung/phy-exynos7-ufs.c
@@ -1,11 +1,9 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+// SPDX-License-Identifier: GPL-2.0-only
/*
* UFS PHY driver data for Samsung EXYNOS7 SoC
*
* Copyright (C) 2020 Samsung Electronics Co., Ltd.
*/
-#ifndef _PHY_EXYNOS7_UFS_H_
-#define _PHY_EXYNOS7_UFS_H_
#include "phy-samsung-ufs.h"
@@ -68,7 +66,7 @@ static const struct samsung_ufs_phy_cfg *exynos7_ufs_phy_cfgs[CFG_TAG_MAX] = {
[CFG_POST_PWR_HS] = exynos7_post_pwr_hs_cfg,
};
-static struct samsung_ufs_phy_drvdata exynos7_ufs_phy = {
+const struct samsung_ufs_phy_drvdata exynos7_ufs_phy = {
.cfg = exynos7_ufs_phy_cfgs,
.isol = {
.offset = EXYNOS7_EMBEDDED_COMBO_PHY_CTRL,
@@ -77,5 +75,3 @@ static struct samsung_ufs_phy_drvdata exynos7_ufs_phy = {
},
.has_symbol_clk = 1,
};
-
-#endif /* _PHY_EXYNOS7_UFS_H_ */
diff --git a/drivers/phy/samsung/phy-exynosautov9-ufs.c b/drivers/phy/samsung/phy-exynosautov9-ufs.c
new file mode 100644
index 000000000000..36398a15c2db
--- /dev/null
+++ b/drivers/phy/samsung/phy-exynosautov9-ufs.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * UFS PHY driver data for Samsung EXYNOSAUTO v9 SoC
+ *
+ * Copyright (C) 2021 Samsung Electronics Co., Ltd.
+ */
+
+#include "phy-samsung-ufs.h"
+
+#define EXYNOSAUTOV9_EMBEDDED_COMBO_PHY_CTRL 0x728
+#define EXYNOSAUTOV9_EMBEDDED_COMBO_PHY_CTRL_MASK 0x1
+#define EXYNOSAUTOV9_EMBEDDED_COMBO_PHY_CTRL_EN BIT(0)
+
+#define PHY_TRSV_REG_CFG_AUTOV9(o, v, d) \
+ PHY_TRSV_REG_CFG_OFFSET(o, v, d, 0x50)
+
+/* Calibration for phy initialization */
+static const struct samsung_ufs_phy_cfg exynosautov9_pre_init_cfg[] = {
+ PHY_COMN_REG_CFG(0x023, 0x80, PWR_MODE_ANY),
+ PHY_COMN_REG_CFG(0x01d, 0x10, PWR_MODE_ANY),
+
+ PHY_TRSV_REG_CFG_AUTOV9(0x044, 0xb5, PWR_MODE_ANY),
+ PHY_TRSV_REG_CFG_AUTOV9(0x04d, 0x43, PWR_MODE_ANY),
+ PHY_TRSV_REG_CFG_AUTOV9(0x05b, 0x20, PWR_MODE_ANY),
+ PHY_TRSV_REG_CFG_AUTOV9(0x05e, 0xc0, PWR_MODE_ANY),
+ PHY_TRSV_REG_CFG_AUTOV9(0x038, 0x12, PWR_MODE_ANY),
+ PHY_TRSV_REG_CFG_AUTOV9(0x059, 0x58, PWR_MODE_ANY),
+ PHY_TRSV_REG_CFG_AUTOV9(0x06c, 0x18, PWR_MODE_ANY),
+ PHY_TRSV_REG_CFG_AUTOV9(0x06d, 0x02, PWR_MODE_ANY),
+
+ PHY_COMN_REG_CFG(0x023, 0xc0, PWR_MODE_ANY),
+ PHY_COMN_REG_CFG(0x023, 0x00, PWR_MODE_ANY),
+
+ PHY_TRSV_REG_CFG(0x042, 0x5d, PWR_MODE_ANY),
+ PHY_TRSV_REG_CFG(0x043, 0x80, PWR_MODE_ANY),
+
+ END_UFS_PHY_CFG,
+};
+
+/* Calibration for HS mode series A/B */
+static const struct samsung_ufs_phy_cfg exynosautov9_pre_pwr_hs_cfg[] = {
+ PHY_TRSV_REG_CFG(0x032, 0xbc, PWR_MODE_HS_ANY),
+ PHY_TRSV_REG_CFG(0x03c, 0x7f, PWR_MODE_HS_ANY),
+ PHY_TRSV_REG_CFG(0x048, 0xc0, PWR_MODE_HS_ANY),
+
+ PHY_TRSV_REG_CFG(0x04a, 0x00, PWR_MODE_HS_G3_SER_B),
+ PHY_TRSV_REG_CFG(0x04b, 0x10, PWR_MODE_HS_G1_SER_B |
+ PWR_MODE_HS_G3_SER_B),
+ PHY_TRSV_REG_CFG(0x04d, 0x63, PWR_MODE_HS_G3_SER_B),
+
+ END_UFS_PHY_CFG,
+};
+
+static const struct samsung_ufs_phy_cfg *exynosautov9_ufs_phy_cfgs[CFG_TAG_MAX] = {
+ [CFG_PRE_INIT] = exynosautov9_pre_init_cfg,
+ [CFG_PRE_PWR_HS] = exynosautov9_pre_pwr_hs_cfg,
+};
+
+const struct samsung_ufs_phy_drvdata exynosautov9_ufs_phy = {
+ .cfg = exynosautov9_ufs_phy_cfgs,
+ .isol = {
+ .offset = EXYNOSAUTOV9_EMBEDDED_COMBO_PHY_CTRL,
+ .mask = EXYNOSAUTOV9_EMBEDDED_COMBO_PHY_CTRL_MASK,
+ .en = EXYNOSAUTOV9_EMBEDDED_COMBO_PHY_CTRL_EN,
+ },
+ .has_symbol_clk = 0,
+};
diff --git a/drivers/phy/samsung/phy-samsung-ufs.c b/drivers/phy/samsung/phy-samsung-ufs.c
index dd9ab1519d83..602ddef259eb 100644
--- a/drivers/phy/samsung/phy-samsung-ufs.c
+++ b/drivers/phy/samsung/phy-samsung-ufs.c
@@ -347,6 +347,9 @@ static const struct of_device_id samsung_ufs_phy_match[] = {
{
.compatible = "samsung,exynos7-ufs-phy",
.data = &exynos7_ufs_phy,
+ }, {
+ .compatible = "samsung,exynosautov9-ufs-phy",
+ .data = &exynosautov9_ufs_phy,
},
{},
};
diff --git a/drivers/phy/samsung/phy-samsung-ufs.h b/drivers/phy/samsung/phy-samsung-ufs.h
index 5de78710524c..91a0e9f94f98 100644
--- a/drivers/phy/samsung/phy-samsung-ufs.h
+++ b/drivers/phy/samsung/phy-samsung-ufs.h
@@ -10,6 +10,9 @@
#ifndef _PHY_SAMSUNG_UFS_
#define _PHY_SAMSUNG_UFS_
+#include <linux/phy/phy.h>
+#include <linux/regmap.h>
+
#define PHY_COMN_BLK 1
#define PHY_TRSV_BLK 2
#define END_UFS_PHY_CFG { 0 }
@@ -24,14 +27,17 @@
.id = PHY_COMN_BLK, \
}
-#define PHY_TRSV_REG_CFG(o, v, d) { \
+#define PHY_TRSV_REG_CFG_OFFSET(o, v, d, c) { \
.off_0 = PHY_APB_ADDR((o)), \
- .off_1 = PHY_APB_ADDR((o) + PHY_TRSV_CH_OFFSET), \
+ .off_1 = PHY_APB_ADDR((o) + (c)), \
.val = (v), \
.desc = (d), \
.id = PHY_TRSV_BLK, \
}
+#define PHY_TRSV_REG_CFG(o, v, d) \
+ PHY_TRSV_REG_CFG_OFFSET(o, v, d, PHY_TRSV_CH_OFFSET)
+
/* UFS PHY registers */
#define PHY_PLL_LOCK_STATUS 0x1e
#define PHY_CDR_LOCK_STATUS 0x5e
@@ -134,6 +140,7 @@ static inline void samsung_ufs_phy_ctrl_isol(
phy->isol->mask, isol ? 0 : phy->isol->en);
}
-#include "phy-exynos7-ufs.h"
+extern const struct samsung_ufs_phy_drvdata exynos7_ufs_phy;
+extern const struct samsung_ufs_phy_drvdata exynosautov9_ufs_phy;
#endif /* _PHY_SAMSUNG_UFS_ */
diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c
index 0aadac678191..963de5913e50 100644
--- a/drivers/phy/tegra/xusb.c
+++ b/drivers/phy/tegra/xusb.c
@@ -1273,7 +1273,7 @@ static int tegra_xusb_padctl_remove(struct platform_device *pdev)
return err;
}
-static int tegra_xusb_padctl_suspend_noirq(struct device *dev)
+static __maybe_unused int tegra_xusb_padctl_suspend_noirq(struct device *dev)
{
struct tegra_xusb_padctl *padctl = dev_get_drvdata(dev);
@@ -1283,7 +1283,7 @@ static int tegra_xusb_padctl_suspend_noirq(struct device *dev)
return 0;
}
-static int tegra_xusb_padctl_resume_noirq(struct device *dev)
+static __maybe_unused int tegra_xusb_padctl_resume_noirq(struct device *dev)
{
struct tegra_xusb_padctl *padctl = dev_get_drvdata(dev);
diff --git a/drivers/phy/ti/phy-twl4030-usb.c b/drivers/phy/ti/phy-twl4030-usb.c
index 5771e2486a3b..ac71017a0bc1 100644
--- a/drivers/phy/ti/phy-twl4030-usb.c
+++ b/drivers/phy/ti/phy-twl4030-usb.c
@@ -162,6 +162,8 @@ struct twl4030_usb {
atomic_t connected;
bool vbus_supplied;
bool musb_mailbox_pending;
+ unsigned long runtime_suspended:1;
+ unsigned long needs_resume:1;
struct delayed_work id_workaround_work;
};
@@ -384,6 +386,9 @@ static void __twl4030_phy_power(struct twl4030_usb *twl, int on)
WARN_ON(twl4030_usb_write_verify(twl, PHY_PWR_CTRL, pwr) < 0);
}
+static int twl4030_usb_runtime_suspend(struct device *dev);
+static int twl4030_usb_runtime_resume(struct device *dev);
+
static int __maybe_unused twl4030_usb_suspend(struct device *dev)
{
struct twl4030_usb *twl = dev_get_drvdata(dev);
@@ -395,6 +400,10 @@ static int __maybe_unused twl4030_usb_suspend(struct device *dev)
*/
dev_dbg(twl->dev, "%s\n", __func__);
disable_irq(twl->irq);
+ if (!twl->runtime_suspended && !atomic_read(&twl->connected)) {
+ twl4030_usb_runtime_suspend(dev);
+ twl->needs_resume = 1;
+ }
return 0;
}
@@ -405,9 +414,13 @@ static int __maybe_unused twl4030_usb_resume(struct device *dev)
dev_dbg(twl->dev, "%s\n", __func__);
enable_irq(twl->irq);
+ if (twl->needs_resume)
+ twl4030_usb_runtime_resume(dev);
/* check whether cable status changed */
twl4030_usb_irq(0, twl);
+ twl->runtime_suspended = 0;
+
return 0;
}
@@ -422,6 +435,8 @@ static int __maybe_unused twl4030_usb_runtime_suspend(struct device *dev)
regulator_disable(twl->usb1v8);
regulator_disable(twl->usb3v1);
+ twl->runtime_suspended = 1;
+
return 0;
}
diff --git a/drivers/phy/xilinx/phy-zynqmp.c b/drivers/phy/xilinx/phy-zynqmp.c
index 35652152ce5d..f478d8a17115 100644
--- a/drivers/phy/xilinx/phy-zynqmp.c
+++ b/drivers/phy/xilinx/phy-zynqmp.c
@@ -626,6 +626,9 @@ static int xpsgtr_phy_power_on(struct phy *phy)
struct xpsgtr_phy *gtr_phy = phy_get_drvdata(phy);
int ret = 0;
+ /* Skip initialization if not required. */
+ if (!xpsgtr_phy_init_required(gtr_phy))
+ return ret;
/*
* Wait for the PLL to lock. For DP, only wait on DP0 to avoid
* cumulating waits for both lanes. The user is expected to initialize
diff --git a/drivers/pinctrl/actions/pinctrl-owl.c b/drivers/pinctrl/actions/pinctrl-owl.c
index c8b3e396ea27..781f2200ed58 100644
--- a/drivers/pinctrl/actions/pinctrl-owl.c
+++ b/drivers/pinctrl/actions/pinctrl-owl.c
@@ -833,7 +833,7 @@ static void owl_gpio_irq_handler(struct irq_desc *desc)
unsigned int parent = irq_desc_get_irq(desc);
const struct owl_gpio_port *port;
void __iomem *base;
- unsigned int pin, irq, offset = 0, i;
+ unsigned int pin, offset = 0, i;
unsigned long pending_irq;
chained_irq_enter(chip, desc);
@@ -849,8 +849,7 @@ static void owl_gpio_irq_handler(struct irq_desc *desc)
pending_irq = readl_relaxed(base + port->intc_pd);
for_each_set_bit(pin, &pending_irq, port->pins) {
- irq = irq_find_mapping(domain, offset + pin);
- generic_handle_irq(irq);
+ generic_handle_domain_irq(domain, offset + pin);
/* clear pending interrupt */
owl_gpio_update_reg(base + port->intc_pd, pin, true);
diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
index 2c87af1180c4..8b34d2c308c7 100644
--- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c
+++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
@@ -395,8 +395,8 @@ static void bcm2835_gpio_irq_handle_bank(struct bcm2835_pinctrl *pc,
events &= pc->enabled_irq_map[bank];
for_each_set_bit(offset, &events, 32) {
gpio = (32 * bank) + offset;
- generic_handle_irq(irq_linear_revmap(pc->gpio_chip.irq.domain,
- gpio));
+ generic_handle_domain_irq(pc->gpio_chip.irq.domain,
+ gpio);
}
}
diff --git a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
index dc511b9a6b43..a7a0dd638a26 100644
--- a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
+++ b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
@@ -176,7 +176,6 @@ static void iproc_gpio_irq_handler(struct irq_desc *desc)
for_each_set_bit(bit, &val, NGPIOS_PER_BANK) {
unsigned pin = NGPIOS_PER_BANK * i + bit;
- int child_irq = irq_find_mapping(gc->irq.domain, pin);
/*
* Clear the interrupt before invoking the
@@ -185,7 +184,7 @@ static void iproc_gpio_irq_handler(struct irq_desc *desc)
writel(BIT(bit), chip->base + (i * GPIO_BANK_SIZE) +
IPROC_GPIO_INT_CLR_OFFSET);
- generic_handle_irq(child_irq);
+ generic_handle_domain_irq(gc->irq.domain, pin);
}
}
diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
index a00a42a61a90..e03142895f61 100644
--- a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
+++ b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
@@ -155,8 +155,7 @@ static irqreturn_t nsp_gpio_irq_handler(int irq, void *data)
int_bits = level | event;
for_each_set_bit(bit, &int_bits, gc->ngpio)
- generic_handle_irq(
- irq_linear_revmap(gc->irq.domain, bit));
+ generic_handle_domain_irq(gc->irq.domain, bit);
}
return int_bits ? IRQ_HANDLED : IRQ_NONE;
diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c
index 394a421a19d5..8f23d126c6a7 100644
--- a/drivers/pinctrl/intel/pinctrl-baytrail.c
+++ b/drivers/pinctrl/intel/pinctrl-baytrail.c
@@ -1444,7 +1444,6 @@ static void byt_gpio_irq_handler(struct irq_desc *desc)
u32 base, pin;
void __iomem *reg;
unsigned long pending;
- unsigned int virq;
/* check from GPIO controller which pin triggered the interrupt */
for (base = 0; base < vg->chip.ngpio; base += 32) {
@@ -1460,10 +1459,8 @@ static void byt_gpio_irq_handler(struct irq_desc *desc)
raw_spin_lock(&byt_lock);
pending = readl(reg);
raw_spin_unlock(&byt_lock);
- for_each_set_bit(pin, &pending, 32) {
- virq = irq_find_mapping(vg->chip.irq.domain, base + pin);
- generic_handle_irq(virq);
- }
+ for_each_set_bit(pin, &pending, 32)
+ generic_handle_domain_irq(vg->chip.irq.domain, base + pin);
}
chip->irq_eoi(data);
}
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index 2ed17cdf946d..980099028cf8 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -1409,11 +1409,10 @@ static void chv_gpio_irq_handler(struct irq_desc *desc)
raw_spin_unlock_irqrestore(&chv_lock, flags);
for_each_set_bit(intr_line, &pending, community->nirqs) {
- unsigned int irq, offset;
+ unsigned int offset;
offset = cctx->intr_lines[intr_line];
- irq = irq_find_mapping(gc->irq.domain, offset);
- generic_handle_irq(irq);
+ generic_handle_domain_irq(gc->irq.domain, offset);
}
chained_irq_exit(chip, desc);
diff --git a/drivers/pinctrl/intel/pinctrl-lynxpoint.c b/drivers/pinctrl/intel/pinctrl-lynxpoint.c
index 0a48ca46ab59..561fa322b0b4 100644
--- a/drivers/pinctrl/intel/pinctrl-lynxpoint.c
+++ b/drivers/pinctrl/intel/pinctrl-lynxpoint.c
@@ -653,12 +653,8 @@ static void lp_gpio_irq_handler(struct irq_desc *desc)
/* Only interrupts that are enabled */
pending = ioread32(reg) & ioread32(ena);
- for_each_set_bit(pin, &pending, 32) {
- unsigned int irq;
-
- irq = irq_find_mapping(lg->chip.irq.domain, base + pin);
- generic_handle_irq(irq);
- }
+ for_each_set_bit(pin, &pending, 32)
+ generic_handle_domain_irq(lg->chip.irq.domain, base + pin);
}
chip->irq_eoi(data);
}
diff --git a/drivers/pinctrl/intel/pinctrl-tigerlake.c b/drivers/pinctrl/intel/pinctrl-tigerlake.c
index 3e4ef2b87526..0bcd19597e4a 100644
--- a/drivers/pinctrl/intel/pinctrl-tigerlake.c
+++ b/drivers/pinctrl/intel/pinctrl-tigerlake.c
@@ -701,32 +701,32 @@ static const struct pinctrl_pin_desc tglh_pins[] = {
static const struct intel_padgroup tglh_community0_gpps[] = {
TGL_GPP(0, 0, 24, 0), /* GPP_A */
- TGL_GPP(1, 25, 44, 128), /* GPP_R */
- TGL_GPP(2, 45, 70, 32), /* GPP_B */
- TGL_GPP(3, 71, 78, INTEL_GPIO_BASE_NOMAP), /* vGPIO_0 */
+ TGL_GPP(1, 25, 44, 32), /* GPP_R */
+ TGL_GPP(2, 45, 70, 64), /* GPP_B */
+ TGL_GPP(3, 71, 78, 96), /* vGPIO_0 */
};
static const struct intel_padgroup tglh_community1_gpps[] = {
- TGL_GPP(0, 79, 104, 96), /* GPP_D */
- TGL_GPP(1, 105, 128, 64), /* GPP_C */
- TGL_GPP(2, 129, 136, 160), /* GPP_S */
- TGL_GPP(3, 137, 153, 192), /* GPP_G */
- TGL_GPP(4, 154, 180, 224), /* vGPIO */
+ TGL_GPP(0, 79, 104, 128), /* GPP_D */
+ TGL_GPP(1, 105, 128, 160), /* GPP_C */
+ TGL_GPP(2, 129, 136, 192), /* GPP_S */
+ TGL_GPP(3, 137, 153, 224), /* GPP_G */
+ TGL_GPP(4, 154, 180, 256), /* vGPIO */
};
static const struct intel_padgroup tglh_community3_gpps[] = {
- TGL_GPP(0, 181, 193, 256), /* GPP_E */
- TGL_GPP(1, 194, 217, 288), /* GPP_F */
+ TGL_GPP(0, 181, 193, 288), /* GPP_E */
+ TGL_GPP(1, 194, 217, 320), /* GPP_F */
};
static const struct intel_padgroup tglh_community4_gpps[] = {
- TGL_GPP(0, 218, 241, 320), /* GPP_H */
+ TGL_GPP(0, 218, 241, 352), /* GPP_H */
TGL_GPP(1, 242, 251, 384), /* GPP_J */
- TGL_GPP(2, 252, 266, 352), /* GPP_K */
+ TGL_GPP(2, 252, 266, 416), /* GPP_K */
};
static const struct intel_padgroup tglh_community5_gpps[] = {
- TGL_GPP(0, 267, 281, 416), /* GPP_I */
+ TGL_GPP(0, 267, 281, 448), /* GPP_I */
TGL_GPP(1, 282, 290, INTEL_GPIO_BASE_NOMAP), /* JTAG */
};
diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c
index 3b9b5dbd7968..f7b54a551764 100644
--- a/drivers/pinctrl/mediatek/mtk-eint.c
+++ b/drivers/pinctrl/mediatek/mtk-eint.c
@@ -319,7 +319,7 @@ static void mtk_eint_irq_handler(struct irq_desc *desc)
struct irq_chip *chip = irq_desc_get_chip(desc);
struct mtk_eint *eint = irq_desc_get_handler_data(desc);
unsigned int status, eint_num;
- int offset, mask_offset, index, virq;
+ int offset, mask_offset, index;
void __iomem *reg = mtk_eint_get_offset(eint, 0, eint->regs->stat);
int dual_edge, start_level, curr_level;
@@ -331,7 +331,6 @@ static void mtk_eint_irq_handler(struct irq_desc *desc)
offset = __ffs(status);
mask_offset = eint_num >> 5;
index = eint_num + offset;
- virq = irq_find_mapping(eint->domain, index);
status &= ~BIT(offset);
/*
@@ -361,7 +360,7 @@ static void mtk_eint_irq_handler(struct irq_desc *desc)
index);
}
- generic_handle_irq(virq);
+ generic_handle_domain_irq(eint->domain, index);
if (dual_edge) {
curr_level = mtk_eint_flip_edge(eint, index);
diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c
index 5b3b048725cc..45ebdeba985a 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c
@@ -925,12 +925,10 @@ int mtk_pinconf_adv_pull_set(struct mtk_pinctrl *hw,
err = hw->soc->bias_set(hw, desc, pullup);
if (err)
return err;
- } else if (hw->soc->bias_set_combo) {
- err = hw->soc->bias_set_combo(hw, desc, pullup, arg);
- if (err)
- return err;
} else {
- return -ENOTSUPP;
+ err = mtk_pinconf_bias_set_rev1(hw, desc, pullup);
+ if (err)
+ err = mtk_pinconf_bias_set(hw, desc, pullup);
}
}
diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
index abfe11c7b49f..39828e9c3120 100644
--- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c
+++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
@@ -815,7 +815,7 @@ static void nmk_gpio_irq_handler(struct irq_desc *desc)
while (status) {
int bit = __ffs(status);
- generic_handle_irq(irq_find_mapping(chip->irq.domain, bit));
+ generic_handle_domain_irq(chip->irq.domain, bit);
status &= ~BIT(bit);
}
diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
index bb1ea47ec4c6..4d81908d6725 100644
--- a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
+++ b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
@@ -231,7 +231,7 @@ static void npcmgpio_irq_handler(struct irq_desc *desc)
sts &= en;
for_each_set_bit(bit, (const void *)&sts, NPCM7XX_GPIO_PER_BANK)
- generic_handle_irq(irq_linear_revmap(gc->irq.domain, bit));
+ generic_handle_domain_irq(gc->irq.domain, bit);
chained_irq_exit(chip, desc);
}
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index a76be6cc26ee..c001f2ed20f8 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -444,8 +444,7 @@ static int amd_gpio_irq_set_wake(struct irq_data *d, unsigned int on)
unsigned long flags;
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct amd_gpio *gpio_dev = gpiochip_get_data(gc);
- u32 wake_mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) |
- BIT(WAKE_CNTRL_OFF_S4);
+ u32 wake_mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3);
raw_spin_lock_irqsave(&gpio_dev->lock, flags);
pin_reg = readl(gpio_dev->base + (d->hwirq)*4);
@@ -621,14 +620,12 @@ static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id)
if (!(regval & PIN_IRQ_PENDING) ||
!(regval & BIT(INTERRUPT_MASK_OFF)))
continue;
- irq = irq_find_mapping(gc->irq.domain, irqnr + i);
- if (irq != 0)
- generic_handle_irq(irq);
+ generic_handle_domain_irq(gc->irq.domain, irqnr + i);
/* Clear interrupt.
* We must read the pin register again, in case the
* value was changed while executing
- * generic_handle_irq() above.
+ * generic_handle_domain_irq() above.
* If we didn't find a mapping for the interrupt,
* disable it in order to avoid a system hang caused
* by an interrupt storm.
diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c
index 72e6df7abe8c..6022496bb6a9 100644
--- a/drivers/pinctrl/pinctrl-at91.c
+++ b/drivers/pinctrl/pinctrl-at91.c
@@ -1712,10 +1712,8 @@ static void gpio_irq_handler(struct irq_desc *desc)
continue;
}
- for_each_set_bit(n, &isr, BITS_PER_LONG) {
- generic_handle_irq(irq_find_mapping(
- gpio_chip->irq.domain, n));
- }
+ for_each_set_bit(n, &isr, BITS_PER_LONG)
+ generic_handle_domain_irq(gpio_chip->irq.domain, n);
}
chained_irq_exit(chip, desc);
/* now it may re-trigger */
diff --git a/drivers/pinctrl/pinctrl-equilibrium.c b/drivers/pinctrl/pinctrl-equilibrium.c
index 38cc20fa9d5a..fb713f9c53d0 100644
--- a/drivers/pinctrl/pinctrl-equilibrium.c
+++ b/drivers/pinctrl/pinctrl-equilibrium.c
@@ -155,7 +155,7 @@ static void eqbr_irq_handler(struct irq_desc *desc)
pins = readl(gctrl->membase + GPIO_IRNCR);
for_each_set_bit(offset, &pins, gc->ngpio)
- generic_handle_irq(irq_find_mapping(gc->irq.domain, offset));
+ generic_handle_domain_irq(gc->irq.domain, offset);
chained_irq_exit(ic, desc);
}
diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c
index 983ba9865f77..ce9cc719c395 100644
--- a/drivers/pinctrl/pinctrl-ingenic.c
+++ b/drivers/pinctrl/pinctrl-ingenic.c
@@ -3080,7 +3080,7 @@ static void ingenic_gpio_irq_handler(struct irq_desc *desc)
flag = ingenic_gpio_read_reg(jzgc, JZ4730_GPIO_GPFR);
for_each_set_bit(i, &flag, 32)
- generic_handle_irq(irq_linear_revmap(gc->irq.domain, i));
+ generic_handle_domain_irq(gc->irq.domain, i);
chained_irq_exit(irq_chip, desc);
}
diff --git a/drivers/pinctrl/pinctrl-k210.c b/drivers/pinctrl/pinctrl-k210.c
index f831526d06ff..49e32684dbb2 100644
--- a/drivers/pinctrl/pinctrl-k210.c
+++ b/drivers/pinctrl/pinctrl-k210.c
@@ -950,23 +950,37 @@ static int k210_fpioa_probe(struct platform_device *pdev)
return ret;
pdata->pclk = devm_clk_get_optional(dev, "pclk");
- if (!IS_ERR(pdata->pclk))
- clk_prepare_enable(pdata->pclk);
+ if (!IS_ERR(pdata->pclk)) {
+ ret = clk_prepare_enable(pdata->pclk);
+ if (ret)
+ goto disable_clk;
+ }
pdata->sysctl_map =
syscon_regmap_lookup_by_phandle_args(np,
"canaan,k210-sysctl-power",
1, &pdata->power_offset);
- if (IS_ERR(pdata->sysctl_map))
- return PTR_ERR(pdata->sysctl_map);
+ if (IS_ERR(pdata->sysctl_map)) {
+ ret = PTR_ERR(pdata->sysctl_map);
+ goto disable_pclk;
+ }
k210_fpioa_init_ties(pdata);
pdata->pctl = pinctrl_register(&k210_pinctrl_desc, dev, (void *)pdata);
- if (IS_ERR(pdata->pctl))
- return PTR_ERR(pdata->pctl);
+ if (IS_ERR(pdata->pctl)) {
+ ret = PTR_ERR(pdata->pctl);
+ goto disable_pclk;
+ }
return 0;
+
+disable_pclk:
+ clk_disable_unprepare(pdata->pclk);
+disable_clk:
+ clk_disable_unprepare(pdata->clk);
+
+ return ret;
}
static const struct of_device_id k210_fpioa_dt_ids[] = {
diff --git a/drivers/pinctrl/pinctrl-microchip-sgpio.c b/drivers/pinctrl/pinctrl-microchip-sgpio.c
index 165cb7a59715..072bccdea2a5 100644
--- a/drivers/pinctrl/pinctrl-microchip-sgpio.c
+++ b/drivers/pinctrl/pinctrl-microchip-sgpio.c
@@ -673,7 +673,7 @@ static void sgpio_irq_handler(struct irq_desc *desc)
for_each_set_bit(port, &val, SGPIO_BITS_PER_WORD) {
gpio = sgpio_addr_to_pin(priv, port, bit);
- generic_handle_irq(irq_linear_revmap(chip->irq.domain, gpio));
+ generic_handle_domain_irq(chip->irq.domain, gpio);
}
chained_irq_exit(parent_chip, desc);
diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c
index e470c16718de..0a36ec8775a3 100644
--- a/drivers/pinctrl/pinctrl-ocelot.c
+++ b/drivers/pinctrl/pinctrl-ocelot.c
@@ -1290,8 +1290,7 @@ static void ocelot_irq_handler(struct irq_desc *desc)
for_each_set_bit(irq, &irqs,
min(32U, info->desc->npins - 32 * i))
- generic_handle_irq(irq_linear_revmap(chip->irq.domain,
- irq + 32 * i));
+ generic_handle_domain_irq(chip->irq.domain, irq + 32 * i);
chained_irq_exit(parent_chip, desc);
}
diff --git a/drivers/pinctrl/pinctrl-oxnas.c b/drivers/pinctrl/pinctrl-oxnas.c
index 5a312279b3c7..cebd810bd6d1 100644
--- a/drivers/pinctrl/pinctrl-oxnas.c
+++ b/drivers/pinctrl/pinctrl-oxnas.c
@@ -1055,7 +1055,7 @@ static void oxnas_gpio_irq_handler(struct irq_desc *desc)
stat = readl(bank->reg_base + IRQ_PENDING);
for_each_set_bit(pin, &stat, BITS_PER_LONG)
- generic_handle_irq(irq_linear_revmap(gc->irq.domain, pin));
+ generic_handle_domain_irq(gc->irq.domain, pin);
chained_irq_exit(chip, desc);
}
diff --git a/drivers/pinctrl/pinctrl-pic32.c b/drivers/pinctrl/pinctrl-pic32.c
index a6e2a4a4ca95..748dabd8db6e 100644
--- a/drivers/pinctrl/pinctrl-pic32.c
+++ b/drivers/pinctrl/pinctrl-pic32.c
@@ -2101,7 +2101,7 @@ static void pic32_gpio_irq_handler(struct irq_desc *desc)
pending = pic32_gpio_get_pending(gc, stat);
for_each_set_bit(pin, &pending, BITS_PER_LONG)
- generic_handle_irq(irq_linear_revmap(gc->irq.domain, pin));
+ generic_handle_domain_irq(gc->irq.domain, pin);
chained_irq_exit(chip, desc);
}
diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c
index ec761ba2a2da..8d271c6b0ca4 100644
--- a/drivers/pinctrl/pinctrl-pistachio.c
+++ b/drivers/pinctrl/pinctrl-pistachio.c
@@ -1306,7 +1306,7 @@ static void pistachio_gpio_irq_handler(struct irq_desc *desc)
pending = gpio_readl(bank, GPIO_INTERRUPT_STATUS) &
gpio_readl(bank, GPIO_INTERRUPT_EN);
for_each_set_bit(pin, &pending, 16)
- generic_handle_irq(irq_linear_revmap(gc->irq.domain, pin));
+ generic_handle_domain_irq(gc->irq.domain, pin);
chained_irq_exit(chip, desc);
}
diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c
index 067fc4208de4..ae33e376695f 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -21,8 +21,8 @@
#include <linux/io.h>
#include <linux/bitops.h>
#include <linux/gpio/driver.h>
-#include <linux/of_device.h>
#include <linux/of_address.h>
+#include <linux/of_device.h>
#include <linux/of_irq.h>
#include <linux/pinctrl/machine.h>
#include <linux/pinctrl/pinconf.h>
@@ -37,35 +37,7 @@
#include "core.h"
#include "pinconf.h"
-
-/* GPIO control registers */
-#define GPIO_SWPORT_DR 0x00
-#define GPIO_SWPORT_DDR 0x04
-#define GPIO_INTEN 0x30
-#define GPIO_INTMASK 0x34
-#define GPIO_INTTYPE_LEVEL 0x38
-#define GPIO_INT_POLARITY 0x3c
-#define GPIO_INT_STATUS 0x40
-#define GPIO_INT_RAWSTATUS 0x44
-#define GPIO_DEBOUNCE 0x48
-#define GPIO_PORTS_EOI 0x4c
-#define GPIO_EXT_PORT 0x50
-#define GPIO_LS_SYNC 0x60
-
-enum rockchip_pinctrl_type {
- PX30,
- RV1108,
- RK2928,
- RK3066B,
- RK3128,
- RK3188,
- RK3288,
- RK3308,
- RK3368,
- RK3399,
- RK3568,
-};
-
+#include "pinctrl-rockchip.h"
/**
* Generate a bitmask for setting a value (v) with a write mask bit in hiword
@@ -84,103 +56,6 @@ enum rockchip_pinctrl_type {
#define IOMUX_WIDTH_3BIT BIT(4)
#define IOMUX_WIDTH_2BIT BIT(5)
-/**
- * struct rockchip_iomux
- * @type: iomux variant using IOMUX_* constants
- * @offset: if initialized to -1 it will be autocalculated, by specifying
- * an initial offset value the relevant source offset can be reset
- * to a new value for autocalculating the following iomux registers.
- */
-struct rockchip_iomux {
- int type;
- int offset;
-};
-
-/*
- * enum type index corresponding to rockchip_perpin_drv_list arrays index.
- */
-enum rockchip_pin_drv_type {
- DRV_TYPE_IO_DEFAULT = 0,
- DRV_TYPE_IO_1V8_OR_3V0,
- DRV_TYPE_IO_1V8_ONLY,
- DRV_TYPE_IO_1V8_3V0_AUTO,
- DRV_TYPE_IO_3V3_ONLY,
- DRV_TYPE_MAX
-};
-
-/*
- * enum type index corresponding to rockchip_pull_list arrays index.
- */
-enum rockchip_pin_pull_type {
- PULL_TYPE_IO_DEFAULT = 0,
- PULL_TYPE_IO_1V8_ONLY,
- PULL_TYPE_MAX
-};
-
-/**
- * struct rockchip_drv
- * @drv_type: drive strength variant using rockchip_perpin_drv_type
- * @offset: if initialized to -1 it will be autocalculated, by specifying
- * an initial offset value the relevant source offset can be reset
- * to a new value for autocalculating the following drive strength
- * registers. if used chips own cal_drv func instead to calculate
- * registers offset, the variant could be ignored.
- */
-struct rockchip_drv {
- enum rockchip_pin_drv_type drv_type;
- int offset;
-};
-
-/**
- * struct rockchip_pin_bank
- * @reg_base: register base of the gpio bank
- * @regmap_pull: optional separate register for additional pull settings
- * @clk: clock of the gpio bank
- * @irq: interrupt of the gpio bank
- * @saved_masks: Saved content of GPIO_INTEN at suspend time.
- * @pin_base: first pin number
- * @nr_pins: number of pins in this bank
- * @name: name of the bank
- * @bank_num: number of the bank, to account for holes
- * @iomux: array describing the 4 iomux sources of the bank
- * @drv: array describing the 4 drive strength sources of the bank
- * @pull_type: array describing the 4 pull type sources of the bank
- * @valid: is all necessary information present
- * @of_node: dt node of this bank
- * @drvdata: common pinctrl basedata
- * @domain: irqdomain of the gpio bank
- * @gpio_chip: gpiolib chip
- * @grange: gpio range
- * @slock: spinlock for the gpio bank
- * @toggle_edge_mode: bit mask to toggle (falling/rising) edge mode
- * @recalced_mask: bit mask to indicate a need to recalulate the mask
- * @route_mask: bits describing the routing pins of per bank
- */
-struct rockchip_pin_bank {
- void __iomem *reg_base;
- struct regmap *regmap_pull;
- struct clk *clk;
- int irq;
- u32 saved_masks;
- u32 pin_base;
- u8 nr_pins;
- char *name;
- u8 bank_num;
- struct rockchip_iomux iomux[4];
- struct rockchip_drv drv[4];
- enum rockchip_pin_pull_type pull_type[4];
- bool valid;
- struct device_node *of_node;
- struct rockchip_pinctrl *drvdata;
- struct irq_domain *domain;
- struct gpio_chip gpio_chip;
- struct pinctrl_gpio_range grange;
- raw_spinlock_t slock;
- u32 toggle_edge_mode;
- u32 recalced_mask;
- u32 route_mask;
-};
-
#define PIN_BANK(id, pins, label) \
{ \
.bank_num = id, \
@@ -320,119 +195,6 @@ struct rockchip_pin_bank {
#define RK_MUXROUTE_PMU(ID, PIN, FUNC, REG, VAL) \
PIN_BANK_MUX_ROUTE_FLAGS(ID, PIN, FUNC, REG, VAL, ROCKCHIP_ROUTE_PMU)
-/**
- * struct rockchip_mux_recalced_data: represent a pin iomux data.
- * @num: bank number.
- * @pin: pin number.
- * @bit: index at register.
- * @reg: register offset.
- * @mask: mask bit
- */
-struct rockchip_mux_recalced_data {
- u8 num;
- u8 pin;
- u32 reg;
- u8 bit;
- u8 mask;
-};
-
-enum rockchip_mux_route_location {
- ROCKCHIP_ROUTE_SAME = 0,
- ROCKCHIP_ROUTE_PMU,
- ROCKCHIP_ROUTE_GRF,
-};
-
-/**
- * struct rockchip_mux_recalced_data: represent a pin iomux data.
- * @bank_num: bank number.
- * @pin: index at register or used to calc index.
- * @func: the min pin.
- * @route_location: the mux route location (same, pmu, grf).
- * @route_offset: the max pin.
- * @route_val: the register offset.
- */
-struct rockchip_mux_route_data {
- u8 bank_num;
- u8 pin;
- u8 func;
- enum rockchip_mux_route_location route_location;
- u32 route_offset;
- u32 route_val;
-};
-
-struct rockchip_pin_ctrl {
- struct rockchip_pin_bank *pin_banks;
- u32 nr_banks;
- u32 nr_pins;
- char *label;
- enum rockchip_pinctrl_type type;
- int grf_mux_offset;
- int pmu_mux_offset;
- int grf_drv_offset;
- int pmu_drv_offset;
- struct rockchip_mux_recalced_data *iomux_recalced;
- u32 niomux_recalced;
- struct rockchip_mux_route_data *iomux_routes;
- u32 niomux_routes;
-
- void (*pull_calc_reg)(struct rockchip_pin_bank *bank,
- int pin_num, struct regmap **regmap,
- int *reg, u8 *bit);
- void (*drv_calc_reg)(struct rockchip_pin_bank *bank,
- int pin_num, struct regmap **regmap,
- int *reg, u8 *bit);
- int (*schmitt_calc_reg)(struct rockchip_pin_bank *bank,
- int pin_num, struct regmap **regmap,
- int *reg, u8 *bit);
-};
-
-struct rockchip_pin_config {
- unsigned int func;
- unsigned long *configs;
- unsigned int nconfigs;
-};
-
-/**
- * struct rockchip_pin_group: represent group of pins of a pinmux function.
- * @name: name of the pin group, used to lookup the group.
- * @pins: the pins included in this group.
- * @npins: number of pins included in this group.
- * @data: local pin configuration
- */
-struct rockchip_pin_group {
- const char *name;
- unsigned int npins;
- unsigned int *pins;
- struct rockchip_pin_config *data;
-};
-
-/**
- * struct rockchip_pmx_func: represent a pin function.
- * @name: name of the pin function, used to lookup the function.
- * @groups: one or more names of pin groups that provide this function.
- * @ngroups: number of groups included in @groups.
- */
-struct rockchip_pmx_func {
- const char *name;
- const char **groups;
- u8 ngroups;
-};
-
-struct rockchip_pinctrl {
- struct regmap *regmap_base;
- int reg_size;
- struct regmap *regmap_pull;
- struct regmap *regmap_pmu;
- struct device *dev;
- struct rockchip_pin_ctrl *ctrl;
- struct pinctrl_desc pctl;
- struct pinctrl_dev *pctl_dev;
- struct rockchip_pin_group *groups;
- unsigned int ngroups;
- struct rockchip_pmx_func *functions;
- unsigned int nfunctions;
-};
-
static struct regmap_config rockchip_regmap_config = {
.reg_bits = 32,
.val_bits = 32,
@@ -2295,86 +2057,11 @@ static int rockchip_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
return 0;
}
-static int rockchip_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
-{
- struct rockchip_pin_bank *bank = gpiochip_get_data(chip);
- u32 data;
- int ret;
-
- ret = clk_enable(bank->clk);
- if (ret < 0) {
- dev_err(bank->drvdata->dev,
- "failed to enable clock for bank %s\n", bank->name);
- return ret;
- }
- data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR);
- clk_disable(bank->clk);
-
- if (data & BIT(offset))
- return GPIO_LINE_DIRECTION_OUT;
-
- return GPIO_LINE_DIRECTION_IN;
-}
-
-/*
- * The calls to gpio_direction_output() and gpio_direction_input()
- * leads to this function call (via the pinctrl_gpio_direction_{input|output}()
- * function called from the gpiolib interface).
- */
-static int _rockchip_pmx_gpio_set_direction(struct gpio_chip *chip,
- int pin, bool input)
-{
- struct rockchip_pin_bank *bank;
- int ret;
- unsigned long flags;
- u32 data;
-
- bank = gpiochip_get_data(chip);
-
- ret = rockchip_set_mux(bank, pin, RK_FUNC_GPIO);
- if (ret < 0)
- return ret;
-
- clk_enable(bank->clk);
- raw_spin_lock_irqsave(&bank->slock, flags);
-
- data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR);
- /* set bit to 1 for output, 0 for input */
- if (!input)
- data |= BIT(pin);
- else
- data &= ~BIT(pin);
- writel_relaxed(data, bank->reg_base + GPIO_SWPORT_DDR);
-
- raw_spin_unlock_irqrestore(&bank->slock, flags);
- clk_disable(bank->clk);
-
- return 0;
-}
-
-static int rockchip_pmx_gpio_set_direction(struct pinctrl_dev *pctldev,
- struct pinctrl_gpio_range *range,
- unsigned offset, bool input)
-{
- struct rockchip_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
- struct gpio_chip *chip;
- int pin;
-
- chip = range->gc;
- pin = offset - chip->base;
- dev_dbg(info->dev, "gpio_direction for pin %u as %s-%d to %s\n",
- offset, range->name, pin, input ? "input" : "output");
-
- return _rockchip_pmx_gpio_set_direction(chip, offset - chip->base,
- input);
-}
-
static const struct pinmux_ops rockchip_pmx_ops = {
.get_functions_count = rockchip_pmx_get_funcs_count,
.get_function_name = rockchip_pmx_get_func_name,
.get_function_groups = rockchip_pmx_get_groups,
.set_mux = rockchip_pmx_set,
- .gpio_set_direction = rockchip_pmx_gpio_set_direction,
};
/*
@@ -2405,15 +2092,13 @@ static bool rockchip_pinconf_pull_valid(struct rockchip_pin_ctrl *ctrl,
return false;
}
-static void rockchip_gpio_set(struct gpio_chip *gc, unsigned offset, int value);
-static int rockchip_gpio_get(struct gpio_chip *gc, unsigned offset);
-
/* set the pin config settings for a specified pin */
static int rockchip_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
unsigned long *configs, unsigned num_configs)
{
struct rockchip_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
struct rockchip_pin_bank *bank = pin_to_bank(info, pin);
+ struct gpio_chip *gpio = &bank->gpio_chip;
enum pin_config_param param;
u32 arg;
int i;
@@ -2446,10 +2131,13 @@ static int rockchip_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
return rc;
break;
case PIN_CONFIG_OUTPUT:
- rockchip_gpio_set(&bank->gpio_chip,
- pin - bank->pin_base, arg);
- rc = _rockchip_pmx_gpio_set_direction(&bank->gpio_chip,
- pin - bank->pin_base, false);
+ rc = rockchip_set_mux(bank, pin - bank->pin_base,
+ RK_FUNC_GPIO);
+ if (rc != RK_FUNC_GPIO)
+ return -EINVAL;
+
+ rc = gpio->direction_output(gpio, pin - bank->pin_base,
+ arg);
if (rc)
return rc;
break;
@@ -2487,6 +2175,7 @@ static int rockchip_pinconf_get(struct pinctrl_dev *pctldev, unsigned int pin,
{
struct rockchip_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
struct rockchip_pin_bank *bank = pin_to_bank(info, pin);
+ struct gpio_chip *gpio = &bank->gpio_chip;
enum pin_config_param param = pinconf_to_config_param(*config);
u16 arg;
int rc;
@@ -2515,7 +2204,7 @@ static int rockchip_pinconf_get(struct pinctrl_dev *pctldev, unsigned int pin,
if (rc != RK_FUNC_GPIO)
return -EINVAL;
- rc = rockchip_gpio_get(&bank->gpio_chip, pin - bank->pin_base);
+ rc = gpio->get(gpio, pin - bank->pin_base);
if (rc < 0)
return rc;
@@ -2753,7 +2442,7 @@ static int rockchip_pinctrl_register(struct platform_device *pdev,
ctrldesc->npins = info->ctrl->nr_pins;
pdesc = pindesc;
- for (bank = 0 , k = 0; bank < info->ctrl->nr_banks; bank++) {
+ for (bank = 0, k = 0; bank < info->ctrl->nr_banks; bank++) {
pin_bank = &info->ctrl->pin_banks[bank];
for (pin = 0; pin < pin_bank->nr_pins; pin++, k++) {
pdesc->number = k;
@@ -2773,553 +2462,9 @@ static int rockchip_pinctrl_register(struct platform_device *pdev,
return PTR_ERR(info->pctl_dev);
}
- for (bank = 0; bank < info->ctrl->nr_banks; ++bank) {
- pin_bank = &info->ctrl->pin_banks[bank];
- pin_bank->grange.name = pin_bank->name;
- pin_bank->grange.id = bank;
- pin_bank->grange.pin_base = pin_bank->pin_base;
- pin_bank->grange.base = pin_bank->gpio_chip.base;
- pin_bank->grange.npins = pin_bank->gpio_chip.ngpio;
- pin_bank->grange.gc = &pin_bank->gpio_chip;
- pinctrl_add_gpio_range(info->pctl_dev, &pin_bank->grange);
- }
-
return 0;
}
-/*
- * GPIO handling
- */
-
-static void rockchip_gpio_set(struct gpio_chip *gc, unsigned offset, int value)
-{
- struct rockchip_pin_bank *bank = gpiochip_get_data(gc);
- void __iomem *reg = bank->reg_base + GPIO_SWPORT_DR;
- unsigned long flags;
- u32 data;
-
- clk_enable(bank->clk);
- raw_spin_lock_irqsave(&bank->slock, flags);
-
- data = readl(reg);
- data &= ~BIT(offset);
- if (value)
- data |= BIT(offset);
- writel(data, reg);
-
- raw_spin_unlock_irqrestore(&bank->slock, flags);
- clk_disable(bank->clk);
-}
-
-/*
- * Returns the level of the pin for input direction and setting of the DR
- * register for output gpios.
- */
-static int rockchip_gpio_get(struct gpio_chip *gc, unsigned offset)
-{
- struct rockchip_pin_bank *bank = gpiochip_get_data(gc);
- u32 data;
-
- clk_enable(bank->clk);
- data = readl(bank->reg_base + GPIO_EXT_PORT);
- clk_disable(bank->clk);
- data >>= offset;
- data &= 1;
- return data;
-}
-
-/*
- * gpiolib gpio_direction_input callback function. The setting of the pin
- * mux function as 'gpio input' will be handled by the pinctrl subsystem
- * interface.
- */
-static int rockchip_gpio_direction_input(struct gpio_chip *gc, unsigned offset)
-{
- return pinctrl_gpio_direction_input(gc->base + offset);
-}
-
-/*
- * gpiolib gpio_direction_output callback function. The setting of the pin
- * mux function as 'gpio output' will be handled by the pinctrl subsystem
- * interface.
- */
-static int rockchip_gpio_direction_output(struct gpio_chip *gc,
- unsigned offset, int value)
-{
- rockchip_gpio_set(gc, offset, value);
- return pinctrl_gpio_direction_output(gc->base + offset);
-}
-
-static void rockchip_gpio_set_debounce(struct gpio_chip *gc,
- unsigned int offset, bool enable)
-{
- struct rockchip_pin_bank *bank = gpiochip_get_data(gc);
- void __iomem *reg = bank->reg_base + GPIO_DEBOUNCE;
- unsigned long flags;
- u32 data;
-
- clk_enable(bank->clk);
- raw_spin_lock_irqsave(&bank->slock, flags);
-
- data = readl(reg);
- if (enable)
- data |= BIT(offset);
- else
- data &= ~BIT(offset);
- writel(data, reg);
-
- raw_spin_unlock_irqrestore(&bank->slock, flags);
- clk_disable(bank->clk);
-}
-
-/*
- * gpiolib set_config callback function. The setting of the pin
- * mux function as 'gpio output' will be handled by the pinctrl subsystem
- * interface.
- */
-static int rockchip_gpio_set_config(struct gpio_chip *gc, unsigned int offset,
- unsigned long config)
-{
- enum pin_config_param param = pinconf_to_config_param(config);
-
- switch (param) {
- case PIN_CONFIG_INPUT_DEBOUNCE:
- rockchip_gpio_set_debounce(gc, offset, true);
- /*
- * Rockchip's gpio could only support up to one period
- * of the debounce clock(pclk), which is far away from
- * satisftying the requirement, as pclk is usually near
- * 100MHz shared by all peripherals. So the fact is it
- * has crippled debounce capability could only be useful
- * to prevent any spurious glitches from waking up the system
- * if the gpio is conguired as wakeup interrupt source. Let's
- * still return -ENOTSUPP as before, to make sure the caller
- * of gpiod_set_debounce won't change its behaviour.
- */
- return -ENOTSUPP;
- default:
- return -ENOTSUPP;
- }
-}
-
-/*
- * gpiolib gpio_to_irq callback function. Creates a mapping between a GPIO pin
- * and a virtual IRQ, if not already present.
- */
-static int rockchip_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
-{
- struct rockchip_pin_bank *bank = gpiochip_get_data(gc);
- unsigned int virq;
-
- if (!bank->domain)
- return -ENXIO;
-
- clk_enable(bank->clk);
- virq = irq_create_mapping(bank->domain, offset);
- clk_disable(bank->clk);
-
- return (virq) ? : -ENXIO;
-}
-
-static const struct gpio_chip rockchip_gpiolib_chip = {
- .request = gpiochip_generic_request,
- .free = gpiochip_generic_free,
- .set = rockchip_gpio_set,
- .get = rockchip_gpio_get,
- .get_direction = rockchip_gpio_get_direction,
- .direction_input = rockchip_gpio_direction_input,
- .direction_output = rockchip_gpio_direction_output,
- .set_config = rockchip_gpio_set_config,
- .to_irq = rockchip_gpio_to_irq,
- .owner = THIS_MODULE,
-};
-
-/*
- * Interrupt handling
- */
-
-static void rockchip_irq_demux(struct irq_desc *desc)
-{
- struct irq_chip *chip = irq_desc_get_chip(desc);
- struct rockchip_pin_bank *bank = irq_desc_get_handler_data(desc);
- u32 pend;
-
- dev_dbg(bank->drvdata->dev, "got irq for bank %s\n", bank->name);
-
- chained_irq_enter(chip, desc);
-
- pend = readl_relaxed(bank->reg_base + GPIO_INT_STATUS);
-
- while (pend) {
- unsigned int irq, virq;
-
- irq = __ffs(pend);
- pend &= ~BIT(irq);
- virq = irq_find_mapping(bank->domain, irq);
-
- if (!virq) {
- dev_err(bank->drvdata->dev, "unmapped irq %d\n", irq);
- continue;
- }
-
- dev_dbg(bank->drvdata->dev, "handling irq %d\n", irq);
-
- /*
- * Triggering IRQ on both rising and falling edge
- * needs manual intervention.
- */
- if (bank->toggle_edge_mode & BIT(irq)) {
- u32 data, data_old, polarity;
- unsigned long flags;
-
- data = readl_relaxed(bank->reg_base + GPIO_EXT_PORT);
- do {
- raw_spin_lock_irqsave(&bank->slock, flags);
-
- polarity = readl_relaxed(bank->reg_base +
- GPIO_INT_POLARITY);
- if (data & BIT(irq))
- polarity &= ~BIT(irq);
- else
- polarity |= BIT(irq);
- writel(polarity,
- bank->reg_base + GPIO_INT_POLARITY);
-
- raw_spin_unlock_irqrestore(&bank->slock, flags);
-
- data_old = data;
- data = readl_relaxed(bank->reg_base +
- GPIO_EXT_PORT);
- } while ((data & BIT(irq)) != (data_old & BIT(irq)));
- }
-
- generic_handle_irq(virq);
- }
-
- chained_irq_exit(chip, desc);
-}
-
-static int rockchip_irq_set_type(struct irq_data *d, unsigned int type)
-{
- struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
- struct rockchip_pin_bank *bank = gc->private;
- u32 mask = BIT(d->hwirq);
- u32 polarity;
- u32 level;
- u32 data;
- unsigned long flags;
- int ret;
-
- /* make sure the pin is configured as gpio input */
- ret = rockchip_set_mux(bank, d->hwirq, RK_FUNC_GPIO);
- if (ret < 0)
- return ret;
-
- clk_enable(bank->clk);
- raw_spin_lock_irqsave(&bank->slock, flags);
-
- data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR);
- data &= ~mask;
- writel_relaxed(data, bank->reg_base + GPIO_SWPORT_DDR);
-
- raw_spin_unlock_irqrestore(&bank->slock, flags);
-
- if (type & IRQ_TYPE_EDGE_BOTH)
- irq_set_handler_locked(d, handle_edge_irq);
- else
- irq_set_handler_locked(d, handle_level_irq);
-
- raw_spin_lock_irqsave(&bank->slock, flags);
- irq_gc_lock(gc);
-
- level = readl_relaxed(gc->reg_base + GPIO_INTTYPE_LEVEL);
- polarity = readl_relaxed(gc->reg_base + GPIO_INT_POLARITY);
-
- switch (type) {
- case IRQ_TYPE_EDGE_BOTH:
- bank->toggle_edge_mode |= mask;
- level |= mask;
-
- /*
- * Determine gpio state. If 1 next interrupt should be falling
- * otherwise rising.
- */
- data = readl(bank->reg_base + GPIO_EXT_PORT);
- if (data & mask)
- polarity &= ~mask;
- else
- polarity |= mask;
- break;
- case IRQ_TYPE_EDGE_RISING:
- bank->toggle_edge_mode &= ~mask;
- level |= mask;
- polarity |= mask;
- break;
- case IRQ_TYPE_EDGE_FALLING:
- bank->toggle_edge_mode &= ~mask;
- level |= mask;
- polarity &= ~mask;
- break;
- case IRQ_TYPE_LEVEL_HIGH:
- bank->toggle_edge_mode &= ~mask;
- level &= ~mask;
- polarity |= mask;
- break;
- case IRQ_TYPE_LEVEL_LOW:
- bank->toggle_edge_mode &= ~mask;
- level &= ~mask;
- polarity &= ~mask;
- break;
- default:
- irq_gc_unlock(gc);
- raw_spin_unlock_irqrestore(&bank->slock, flags);
- clk_disable(bank->clk);
- return -EINVAL;
- }
-
- writel_relaxed(level, gc->reg_base + GPIO_INTTYPE_LEVEL);
- writel_relaxed(polarity, gc->reg_base + GPIO_INT_POLARITY);
-
- irq_gc_unlock(gc);
- raw_spin_unlock_irqrestore(&bank->slock, flags);
- clk_disable(bank->clk);
-
- return 0;
-}
-
-static void rockchip_irq_suspend(struct irq_data *d)
-{
- struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
- struct rockchip_pin_bank *bank = gc->private;
-
- clk_enable(bank->clk);
- bank->saved_masks = irq_reg_readl(gc, GPIO_INTMASK);
- irq_reg_writel(gc, ~gc->wake_active, GPIO_INTMASK);
- clk_disable(bank->clk);
-}
-
-static void rockchip_irq_resume(struct irq_data *d)
-{
- struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
- struct rockchip_pin_bank *bank = gc->private;
-
- clk_enable(bank->clk);
- irq_reg_writel(gc, bank->saved_masks, GPIO_INTMASK);
- clk_disable(bank->clk);
-}
-
-static void rockchip_irq_enable(struct irq_data *d)
-{
- struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
- struct rockchip_pin_bank *bank = gc->private;
-
- clk_enable(bank->clk);
- irq_gc_mask_clr_bit(d);
-}
-
-static void rockchip_irq_disable(struct irq_data *d)
-{
- struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
- struct rockchip_pin_bank *bank = gc->private;
-
- irq_gc_mask_set_bit(d);
- clk_disable(bank->clk);
-}
-
-static int rockchip_interrupts_register(struct platform_device *pdev,
- struct rockchip_pinctrl *info)
-{
- struct rockchip_pin_ctrl *ctrl = info->ctrl;
- struct rockchip_pin_bank *bank = ctrl->pin_banks;
- unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
- struct irq_chip_generic *gc;
- int ret;
- int i;
-
- for (i = 0; i < ctrl->nr_banks; ++i, ++bank) {
- if (!bank->valid) {
- dev_warn(&pdev->dev, "bank %s is not valid\n",
- bank->name);
- continue;
- }
-
- ret = clk_enable(bank->clk);
- if (ret) {
- dev_err(&pdev->dev, "failed to enable clock for bank %s\n",
- bank->name);
- continue;
- }
-
- bank->domain = irq_domain_add_linear(bank->of_node, 32,
- &irq_generic_chip_ops, NULL);
- if (!bank->domain) {
- dev_warn(&pdev->dev, "could not initialize irq domain for bank %s\n",
- bank->name);
- clk_disable(bank->clk);
- continue;
- }
-
- ret = irq_alloc_domain_generic_chips(bank->domain, 32, 1,
- "rockchip_gpio_irq", handle_level_irq,
- clr, 0, 0);
- if (ret) {
- dev_err(&pdev->dev, "could not alloc generic chips for bank %s\n",
- bank->name);
- irq_domain_remove(bank->domain);
- clk_disable(bank->clk);
- continue;
- }
-
- gc = irq_get_domain_generic_chip(bank->domain, 0);
- gc->reg_base = bank->reg_base;
- gc->private = bank;
- gc->chip_types[0].regs.mask = GPIO_INTMASK;
- gc->chip_types[0].regs.ack = GPIO_PORTS_EOI;
- gc->chip_types[0].chip.irq_ack = irq_gc_ack_set_bit;
- gc->chip_types[0].chip.irq_mask = irq_gc_mask_set_bit;
- gc->chip_types[0].chip.irq_unmask = irq_gc_mask_clr_bit;
- gc->chip_types[0].chip.irq_enable = rockchip_irq_enable;
- gc->chip_types[0].chip.irq_disable = rockchip_irq_disable;
- gc->chip_types[0].chip.irq_set_wake = irq_gc_set_wake;
- gc->chip_types[0].chip.irq_suspend = rockchip_irq_suspend;
- gc->chip_types[0].chip.irq_resume = rockchip_irq_resume;
- gc->chip_types[0].chip.irq_set_type = rockchip_irq_set_type;
- gc->wake_enabled = IRQ_MSK(bank->nr_pins);
-
- /*
- * Linux assumes that all interrupts start out disabled/masked.
- * Our driver only uses the concept of masked and always keeps
- * things enabled, so for us that's all masked and all enabled.
- */
- writel_relaxed(0xffffffff, bank->reg_base + GPIO_INTMASK);
- writel_relaxed(0xffffffff, bank->reg_base + GPIO_PORTS_EOI);
- writel_relaxed(0xffffffff, bank->reg_base + GPIO_INTEN);
- gc->mask_cache = 0xffffffff;
-
- irq_set_chained_handler_and_data(bank->irq,
- rockchip_irq_demux, bank);
- clk_disable(bank->clk);
- }
-
- return 0;
-}
-
-static int rockchip_gpiolib_register(struct platform_device *pdev,
- struct rockchip_pinctrl *info)
-{
- struct rockchip_pin_ctrl *ctrl = info->ctrl;
- struct rockchip_pin_bank *bank = ctrl->pin_banks;
- struct gpio_chip *gc;
- int ret;
- int i;
-
- for (i = 0; i < ctrl->nr_banks; ++i, ++bank) {
- if (!bank->valid) {
- dev_warn(&pdev->dev, "bank %s is not valid\n",
- bank->name);
- continue;
- }
-
- bank->gpio_chip = rockchip_gpiolib_chip;
-
- gc = &bank->gpio_chip;
- gc->base = bank->pin_base;
- gc->ngpio = bank->nr_pins;
- gc->parent = &pdev->dev;
- gc->of_node = bank->of_node;
- gc->label = bank->name;
-
- ret = gpiochip_add_data(gc, bank);
- if (ret) {
- dev_err(&pdev->dev, "failed to register gpio_chip %s, error code: %d\n",
- gc->label, ret);
- goto fail;
- }
- }
-
- rockchip_interrupts_register(pdev, info);
-
- return 0;
-
-fail:
- for (--i, --bank; i >= 0; --i, --bank) {
- if (!bank->valid)
- continue;
- gpiochip_remove(&bank->gpio_chip);
- }
- return ret;
-}
-
-static int rockchip_gpiolib_unregister(struct platform_device *pdev,
- struct rockchip_pinctrl *info)
-{
- struct rockchip_pin_ctrl *ctrl = info->ctrl;
- struct rockchip_pin_bank *bank = ctrl->pin_banks;
- int i;
-
- for (i = 0; i < ctrl->nr_banks; ++i, ++bank) {
- if (!bank->valid)
- continue;
- gpiochip_remove(&bank->gpio_chip);
- }
-
- return 0;
-}
-
-static int rockchip_get_bank_data(struct rockchip_pin_bank *bank,
- struct rockchip_pinctrl *info)
-{
- struct resource res;
- void __iomem *base;
-
- if (of_address_to_resource(bank->of_node, 0, &res)) {
- dev_err(info->dev, "cannot find IO resource for bank\n");
- return -ENOENT;
- }
-
- bank->reg_base = devm_ioremap_resource(info->dev, &res);
- if (IS_ERR(bank->reg_base))
- return PTR_ERR(bank->reg_base);
-
- /*
- * special case, where parts of the pull setting-registers are
- * part of the PMU register space
- */
- if (of_device_is_compatible(bank->of_node,
- "rockchip,rk3188-gpio-bank0")) {
- struct device_node *node;
-
- node = of_parse_phandle(bank->of_node->parent,
- "rockchip,pmu", 0);
- if (!node) {
- if (of_address_to_resource(bank->of_node, 1, &res)) {
- dev_err(info->dev, "cannot find IO resource for bank\n");
- return -ENOENT;
- }
-
- base = devm_ioremap_resource(info->dev, &res);
- if (IS_ERR(base))
- return PTR_ERR(base);
- rockchip_regmap_config.max_register =
- resource_size(&res) - 4;
- rockchip_regmap_config.name =
- "rockchip,rk3188-gpio-bank0-pull";
- bank->regmap_pull = devm_regmap_init_mmio(info->dev,
- base,
- &rockchip_regmap_config);
- }
- of_node_put(node);
- }
-
- bank->irq = irq_of_parse_and_map(bank->of_node, 0);
-
- bank->clk = of_clk_get(bank->of_node, 0);
- if (IS_ERR(bank->clk))
- return PTR_ERR(bank->clk);
-
- return clk_prepare(bank->clk);
-}
-
static const struct of_device_id rockchip_pinctrl_dt_match[];
/* retrieve the soc specific data */
@@ -3329,7 +2474,6 @@ static struct rockchip_pin_ctrl *rockchip_pinctrl_get_soc_data(
{
const struct of_device_id *match;
struct device_node *node = pdev->dev.of_node;
- struct device_node *np;
struct rockchip_pin_ctrl *ctrl;
struct rockchip_pin_bank *bank;
int grf_offs, pmu_offs, drv_grf_offs, drv_pmu_offs, i, j;
@@ -3337,23 +2481,6 @@ static struct rockchip_pin_ctrl *rockchip_pinctrl_get_soc_data(
match = of_match_node(rockchip_pinctrl_dt_match, node);
ctrl = (struct rockchip_pin_ctrl *)match->data;
- for_each_child_of_node(node, np) {
- if (!of_find_property(np, "gpio-controller", NULL))
- continue;
-
- bank = ctrl->pin_banks;
- for (i = 0; i < ctrl->nr_banks; ++i, ++bank) {
- if (!strcmp(bank->name, np->name)) {
- bank->of_node = np;
-
- if (!rockchip_get_bank_data(bank, d))
- bank->valid = true;
-
- break;
- }
- }
- }
-
grf_offs = ctrl->grf_mux_offset;
pmu_offs = ctrl->pmu_mux_offset;
drv_pmu_offs = ctrl->pmu_drv_offset;
@@ -3574,18 +2701,18 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev)
return PTR_ERR(info->regmap_pmu);
}
- ret = rockchip_gpiolib_register(pdev, info);
+ ret = rockchip_pinctrl_register(pdev, info);
if (ret)
return ret;
- ret = rockchip_pinctrl_register(pdev, info);
+ platform_set_drvdata(pdev, info);
+
+ ret = of_platform_populate(np, rockchip_bank_match, NULL, NULL);
if (ret) {
- rockchip_gpiolib_unregister(pdev, info);
+ dev_err(&pdev->dev, "failed to register gpio device\n");
return ret;
}
- platform_set_drvdata(pdev, info);
-
return 0;
}
diff --git a/drivers/pinctrl/pinctrl-rockchip.h b/drivers/pinctrl/pinctrl-rockchip.h
new file mode 100644
index 000000000000..589d4d2a98c9
--- /dev/null
+++ b/drivers/pinctrl/pinctrl-rockchip.h
@@ -0,0 +1,287 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021 Rockchip Electronics Co. Ltd.
+ *
+ * Copyright (c) 2013 MundoReader S.L.
+ * Author: Heiko Stuebner <heiko@sntech.de>
+ *
+ * With some ideas taken from pinctrl-samsung:
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com
+ * Copyright (c) 2012 Linaro Ltd
+ * https://www.linaro.org
+ *
+ * and pinctrl-at91:
+ * Copyright (C) 2011-2012 Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
+ */
+
+#ifndef _PINCTRL_ROCKCHIP_H
+#define _PINCTRL_ROCKCHIP_H
+
+enum rockchip_pinctrl_type {
+ PX30,
+ RV1108,
+ RK2928,
+ RK3066B,
+ RK3128,
+ RK3188,
+ RK3288,
+ RK3308,
+ RK3368,
+ RK3399,
+ RK3568,
+};
+
+/**
+ * struct rockchip_gpio_regs
+ * @port_dr: data register
+ * @port_ddr: data direction register
+ * @int_en: interrupt enable
+ * @int_mask: interrupt mask
+ * @int_type: interrupt trigger type, such as high, low, edge trriger type.
+ * @int_polarity: interrupt polarity enable register
+ * @int_bothedge: interrupt bothedge enable register
+ * @int_status: interrupt status register
+ * @int_rawstatus: int_status = int_rawstatus & int_mask
+ * @debounce: enable debounce for interrupt signal
+ * @dbclk_div_en: enable divider for debounce clock
+ * @dbclk_div_con: setting for divider of debounce clock
+ * @port_eoi: end of interrupt of the port
+ * @ext_port: port data from external
+ * @version_id: controller version register
+ */
+struct rockchip_gpio_regs {
+ u32 port_dr;
+ u32 port_ddr;
+ u32 int_en;
+ u32 int_mask;
+ u32 int_type;
+ u32 int_polarity;
+ u32 int_bothedge;
+ u32 int_status;
+ u32 int_rawstatus;
+ u32 debounce;
+ u32 dbclk_div_en;
+ u32 dbclk_div_con;
+ u32 port_eoi;
+ u32 ext_port;
+ u32 version_id;
+};
+
+/**
+ * struct rockchip_iomux
+ * @type: iomux variant using IOMUX_* constants
+ * @offset: if initialized to -1 it will be autocalculated, by specifying
+ * an initial offset value the relevant source offset can be reset
+ * to a new value for autocalculating the following iomux registers.
+ */
+struct rockchip_iomux {
+ int type;
+ int offset;
+};
+
+/*
+ * enum type index corresponding to rockchip_perpin_drv_list arrays index.
+ */
+enum rockchip_pin_drv_type {
+ DRV_TYPE_IO_DEFAULT = 0,
+ DRV_TYPE_IO_1V8_OR_3V0,
+ DRV_TYPE_IO_1V8_ONLY,
+ DRV_TYPE_IO_1V8_3V0_AUTO,
+ DRV_TYPE_IO_3V3_ONLY,
+ DRV_TYPE_MAX
+};
+
+/*
+ * enum type index corresponding to rockchip_pull_list arrays index.
+ */
+enum rockchip_pin_pull_type {
+ PULL_TYPE_IO_DEFAULT = 0,
+ PULL_TYPE_IO_1V8_ONLY,
+ PULL_TYPE_MAX
+};
+
+/**
+ * struct rockchip_drv
+ * @drv_type: drive strength variant using rockchip_perpin_drv_type
+ * @offset: if initialized to -1 it will be autocalculated, by specifying
+ * an initial offset value the relevant source offset can be reset
+ * to a new value for autocalculating the following drive strength
+ * registers. if used chips own cal_drv func instead to calculate
+ * registers offset, the variant could be ignored.
+ */
+struct rockchip_drv {
+ enum rockchip_pin_drv_type drv_type;
+ int offset;
+};
+
+/**
+ * struct rockchip_pin_bank
+ * @dev: the pinctrl device bind to the bank
+ * @reg_base: register base of the gpio bank
+ * @regmap_pull: optional separate register for additional pull settings
+ * @clk: clock of the gpio bank
+ * @db_clk: clock of the gpio debounce
+ * @irq: interrupt of the gpio bank
+ * @saved_masks: Saved content of GPIO_INTEN at suspend time.
+ * @pin_base: first pin number
+ * @nr_pins: number of pins in this bank
+ * @name: name of the bank
+ * @bank_num: number of the bank, to account for holes
+ * @iomux: array describing the 4 iomux sources of the bank
+ * @drv: array describing the 4 drive strength sources of the bank
+ * @pull_type: array describing the 4 pull type sources of the bank
+ * @valid: is all necessary information present
+ * @of_node: dt node of this bank
+ * @drvdata: common pinctrl basedata
+ * @domain: irqdomain of the gpio bank
+ * @gpio_chip: gpiolib chip
+ * @grange: gpio range
+ * @slock: spinlock for the gpio bank
+ * @toggle_edge_mode: bit mask to toggle (falling/rising) edge mode
+ * @recalced_mask: bit mask to indicate a need to recalulate the mask
+ * @route_mask: bits describing the routing pins of per bank
+ */
+struct rockchip_pin_bank {
+ struct device *dev;
+ void __iomem *reg_base;
+ struct regmap *regmap_pull;
+ struct clk *clk;
+ struct clk *db_clk;
+ int irq;
+ u32 saved_masks;
+ u32 pin_base;
+ u8 nr_pins;
+ char *name;
+ u8 bank_num;
+ struct rockchip_iomux iomux[4];
+ struct rockchip_drv drv[4];
+ enum rockchip_pin_pull_type pull_type[4];
+ bool valid;
+ struct device_node *of_node;
+ struct rockchip_pinctrl *drvdata;
+ struct irq_domain *domain;
+ struct gpio_chip gpio_chip;
+ struct pinctrl_gpio_range grange;
+ raw_spinlock_t slock;
+ const struct rockchip_gpio_regs *gpio_regs;
+ u32 gpio_type;
+ u32 toggle_edge_mode;
+ u32 recalced_mask;
+ u32 route_mask;
+};
+
+/**
+ * struct rockchip_mux_recalced_data: represent a pin iomux data.
+ * @num: bank number.
+ * @pin: pin number.
+ * @bit: index at register.
+ * @reg: register offset.
+ * @mask: mask bit
+ */
+struct rockchip_mux_recalced_data {
+ u8 num;
+ u8 pin;
+ u32 reg;
+ u8 bit;
+ u8 mask;
+};
+
+enum rockchip_mux_route_location {
+ ROCKCHIP_ROUTE_SAME = 0,
+ ROCKCHIP_ROUTE_PMU,
+ ROCKCHIP_ROUTE_GRF,
+};
+
+/**
+ * struct rockchip_mux_recalced_data: represent a pin iomux data.
+ * @bank_num: bank number.
+ * @pin: index at register or used to calc index.
+ * @func: the min pin.
+ * @route_location: the mux route location (same, pmu, grf).
+ * @route_offset: the max pin.
+ * @route_val: the register offset.
+ */
+struct rockchip_mux_route_data {
+ u8 bank_num;
+ u8 pin;
+ u8 func;
+ enum rockchip_mux_route_location route_location;
+ u32 route_offset;
+ u32 route_val;
+};
+
+struct rockchip_pin_ctrl {
+ struct rockchip_pin_bank *pin_banks;
+ u32 nr_banks;
+ u32 nr_pins;
+ char *label;
+ enum rockchip_pinctrl_type type;
+ int grf_mux_offset;
+ int pmu_mux_offset;
+ int grf_drv_offset;
+ int pmu_drv_offset;
+ struct rockchip_mux_recalced_data *iomux_recalced;
+ u32 niomux_recalced;
+ struct rockchip_mux_route_data *iomux_routes;
+ u32 niomux_routes;
+
+ void (*pull_calc_reg)(struct rockchip_pin_bank *bank,
+ int pin_num, struct regmap **regmap,
+ int *reg, u8 *bit);
+ void (*drv_calc_reg)(struct rockchip_pin_bank *bank,
+ int pin_num, struct regmap **regmap,
+ int *reg, u8 *bit);
+ int (*schmitt_calc_reg)(struct rockchip_pin_bank *bank,
+ int pin_num, struct regmap **regmap,
+ int *reg, u8 *bit);
+};
+
+struct rockchip_pin_config {
+ unsigned int func;
+ unsigned long *configs;
+ unsigned int nconfigs;
+};
+
+/**
+ * struct rockchip_pin_group: represent group of pins of a pinmux function.
+ * @name: name of the pin group, used to lookup the group.
+ * @pins: the pins included in this group.
+ * @npins: number of pins included in this group.
+ * @data: local pin configuration
+ */
+struct rockchip_pin_group {
+ const char *name;
+ unsigned int npins;
+ unsigned int *pins;
+ struct rockchip_pin_config *data;
+};
+
+/**
+ * struct rockchip_pmx_func: represent a pin function.
+ * @name: name of the pin function, used to lookup the function.
+ * @groups: one or more names of pin groups that provide this function.
+ * @ngroups: number of groups included in @groups.
+ */
+struct rockchip_pmx_func {
+ const char *name;
+ const char **groups;
+ u8 ngroups;
+};
+
+struct rockchip_pinctrl {
+ struct regmap *regmap_base;
+ int reg_size;
+ struct regmap *regmap_pull;
+ struct regmap *regmap_pmu;
+ struct device *dev;
+ struct rockchip_pin_ctrl *ctrl;
+ struct pinctrl_desc pctl;
+ struct pinctrl_dev *pctl_dev;
+ struct rockchip_pin_group *groups;
+ unsigned int ngroups;
+ struct rockchip_pmx_func *functions;
+ unsigned int nfunctions;
+};
+
+#endif
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index e3aa64798f7d..aa6e72214609 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1491,8 +1491,8 @@ static int pcs_irq_handle(struct pcs_soc_data *pcs_soc)
mask = pcs->read(pcswi->reg);
raw_spin_unlock(&pcs->lock);
if (mask & pcs_soc->irq_status_mask) {
- generic_handle_irq(irq_find_mapping(pcs->domain,
- pcswi->hwirq));
+ generic_handle_domain_irq(pcs->domain,
+ pcswi->hwirq);
count++;
}
}
diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c
index 43d9e6c7fd81..fa3edb4b898a 100644
--- a/drivers/pinctrl/pinctrl-st.c
+++ b/drivers/pinctrl/pinctrl-st.c
@@ -1420,7 +1420,7 @@ static void __gpio_irq_handler(struct st_gpio_bank *bank)
continue;
}
- generic_handle_irq(irq_find_mapping(bank->gpio_chip.irq.domain, n));
+ generic_handle_domain_irq(bank->gpio_chip.irq.domain, n);
}
}
}
diff --git a/drivers/pinctrl/qcom/Kconfig b/drivers/pinctrl/qcom/Kconfig
index 2f51b4f99393..cad4e60df618 100644
--- a/drivers/pinctrl/qcom/Kconfig
+++ b/drivers/pinctrl/qcom/Kconfig
@@ -13,7 +13,7 @@ config PINCTRL_MSM
config PINCTRL_APQ8064
tristate "Qualcomm APQ8064 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -21,7 +21,7 @@ config PINCTRL_APQ8064
config PINCTRL_APQ8084
tristate "Qualcomm APQ8084 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -29,7 +29,7 @@ config PINCTRL_APQ8084
config PINCTRL_IPQ4019
tristate "Qualcomm IPQ4019 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -37,7 +37,7 @@ config PINCTRL_IPQ4019
config PINCTRL_IPQ8064
tristate "Qualcomm IPQ8064 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -45,7 +45,7 @@ config PINCTRL_IPQ8064
config PINCTRL_IPQ8074
tristate "Qualcomm Technologies, Inc. IPQ8074 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for
@@ -55,7 +55,7 @@ config PINCTRL_IPQ8074
config PINCTRL_IPQ6018
tristate "Qualcomm Technologies, Inc. IPQ6018 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for
@@ -65,7 +65,7 @@ config PINCTRL_IPQ6018
config PINCTRL_MSM8226
tristate "Qualcomm 8226 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -74,7 +74,7 @@ config PINCTRL_MSM8226
config PINCTRL_MSM8660
tristate "Qualcomm 8660 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -82,7 +82,7 @@ config PINCTRL_MSM8660
config PINCTRL_MSM8960
tristate "Qualcomm 8960 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -90,7 +90,7 @@ config PINCTRL_MSM8960
config PINCTRL_MDM9615
tristate "Qualcomm 9615 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -98,7 +98,7 @@ config PINCTRL_MDM9615
config PINCTRL_MSM8X74
tristate "Qualcomm 8x74 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -106,7 +106,7 @@ config PINCTRL_MSM8X74
config PINCTRL_MSM8916
tristate "Qualcomm 8916 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -114,7 +114,7 @@ config PINCTRL_MSM8916
config PINCTRL_MSM8953
tristate "Qualcomm 8953 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -124,7 +124,7 @@ config PINCTRL_MSM8953
config PINCTRL_MSM8976
tristate "Qualcomm 8976 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -134,7 +134,7 @@ config PINCTRL_MSM8976
config PINCTRL_MSM8994
tristate "Qualcomm 8994 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -143,7 +143,7 @@ config PINCTRL_MSM8994
config PINCTRL_MSM8996
tristate "Qualcomm MSM8996 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -151,7 +151,7 @@ config PINCTRL_MSM8996
config PINCTRL_MSM8998
tristate "Qualcomm MSM8998 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -159,7 +159,7 @@ config PINCTRL_MSM8998
config PINCTRL_QCS404
tristate "Qualcomm QCS404 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -167,7 +167,7 @@ config PINCTRL_QCS404
config PINCTRL_QDF2XXX
tristate "Qualcomm Technologies QDF2xxx pin controller driver"
- depends on GPIOLIB && ACPI
+ depends on ACPI
depends on PINCTRL_MSM
help
This is the GPIO driver for the TLMM block found on the
@@ -175,7 +175,7 @@ config PINCTRL_QDF2XXX
config PINCTRL_QCOM_SPMI_PMIC
tristate "Qualcomm SPMI PMIC pin controller driver"
- depends on GPIOLIB && OF && SPMI
+ depends on OF && SPMI
select REGMAP_SPMI
select PINMUX
select PINCONF
@@ -190,7 +190,7 @@ config PINCTRL_QCOM_SPMI_PMIC
config PINCTRL_QCOM_SSBI_PMIC
tristate "Qualcomm SSBI PMIC pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
select PINMUX
select PINCONF
select GENERIC_PINCONF
@@ -204,7 +204,7 @@ config PINCTRL_QCOM_SSBI_PMIC
config PINCTRL_SC7180
tristate "Qualcomm Technologies Inc SC7180 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -213,7 +213,7 @@ config PINCTRL_SC7180
config PINCTRL_SC7280
tristate "Qualcomm Technologies Inc SC7280 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -222,7 +222,7 @@ config PINCTRL_SC7280
config PINCTRL_SC8180X
tristate "Qualcomm Technologies Inc SC8180x pin controller driver"
- depends on GPIOLIB && (OF || ACPI)
+ depends on (OF || ACPI)
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -231,7 +231,7 @@ config PINCTRL_SC8180X
config PINCTRL_SDM660
tristate "Qualcomm Technologies Inc SDM660 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -240,7 +240,7 @@ config PINCTRL_SDM660
config PINCTRL_SDM845
tristate "Qualcomm Technologies Inc SDM845 pin controller driver"
- depends on GPIOLIB && (OF || ACPI)
+ depends on (OF || ACPI)
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -249,7 +249,7 @@ config PINCTRL_SDM845
config PINCTRL_SDX55
tristate "Qualcomm Technologies Inc SDX55 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -258,7 +258,7 @@ config PINCTRL_SDX55
config PINCTRL_SM6125
tristate "Qualcomm Technologies Inc SM6125 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -267,7 +267,7 @@ config PINCTRL_SM6125
config PINCTRL_SM8150
tristate "Qualcomm Technologies Inc SM8150 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -276,7 +276,7 @@ config PINCTRL_SM8150
config PINCTRL_SM8250
tristate "Qualcomm Technologies Inc SM8250 pin controller driver"
- depends on GPIOLIB && OF
+ depends on OF
depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
@@ -285,8 +285,7 @@ config PINCTRL_SM8250
config PINCTRL_SM8350
tristate "Qualcomm Technologies Inc SM8350 pin controller driver"
- depends on GPIOLIB && OF
- select PINCTRL_MSM
+ depends on PINCTRL_MSM
help
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
Qualcomm Technologies Inc TLMM block found on the Qualcomm
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index d70caecd21d2..8476a8ac4451 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -1177,7 +1177,6 @@ static void msm_gpio_irq_handler(struct irq_desc *desc)
const struct msm_pingroup *g;
struct msm_pinctrl *pctrl = gpiochip_get_data(gc);
struct irq_chip *chip = irq_desc_get_chip(desc);
- int irq_pin;
int handled = 0;
u32 val;
int i;
@@ -1192,8 +1191,7 @@ static void msm_gpio_irq_handler(struct irq_desc *desc)
g = &pctrl->soc->groups[i];
val = msm_readl_intr_status(pctrl, g);
if (val & BIT(g->intr_status_bit)) {
- irq_pin = irq_find_mapping(gc->irq.domain, i);
- generic_handle_irq(irq_pin);
+ generic_handle_domain_irq(gc->irq.domain, i);
handled++;
}
}
diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c
index 2b99f4130e1e..0489c899b401 100644
--- a/drivers/pinctrl/samsung/pinctrl-exynos.c
+++ b/drivers/pinctrl/samsung/pinctrl-exynos.c
@@ -246,7 +246,8 @@ static irqreturn_t exynos_eint_gpio_irq(int irq, void *data)
{
struct samsung_pinctrl_drv_data *d = data;
struct samsung_pin_bank *bank = d->pin_banks;
- unsigned int svc, group, pin, virq;
+ unsigned int svc, group, pin;
+ int ret;
svc = readl(bank->eint_base + EXYNOS_SVC_OFFSET);
group = EXYNOS_SVC_GROUP(svc);
@@ -256,10 +257,10 @@ static irqreturn_t exynos_eint_gpio_irq(int irq, void *data)
return IRQ_HANDLED;
bank += (group - 1);
- virq = irq_linear_revmap(bank->irq_domain, pin);
- if (!virq)
+ ret = generic_handle_domain_irq(bank->irq_domain, pin);
+ if (ret)
return IRQ_NONE;
- generic_handle_irq(virq);
+
return IRQ_HANDLED;
}
@@ -473,12 +474,10 @@ static void exynos_irq_eint0_15(struct irq_desc *desc)
struct exynos_weint_data *eintd = irq_desc_get_handler_data(desc);
struct samsung_pin_bank *bank = eintd->bank;
struct irq_chip *chip = irq_desc_get_chip(desc);
- int eint_irq;
chained_irq_enter(chip, desc);
- eint_irq = irq_linear_revmap(bank->irq_domain, eintd->irq);
- generic_handle_irq(eint_irq);
+ generic_handle_domain_irq(bank->irq_domain, eintd->irq);
chained_irq_exit(chip, desc);
}
@@ -490,7 +489,7 @@ static inline void exynos_irq_demux_eint(unsigned int pend,
while (pend) {
irq = fls(pend) - 1;
- generic_handle_irq(irq_find_mapping(domain, irq));
+ generic_handle_domain_irq(domain, irq);
pend &= ~(1 << irq);
}
}
diff --git a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c
index 00d77d6946b5..ac1eba30cf40 100644
--- a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c
+++ b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c
@@ -234,14 +234,12 @@ static void s3c2410_demux_eint0_3(struct irq_desc *desc)
{
struct irq_data *data = irq_desc_get_irq_data(desc);
struct s3c24xx_eint_data *eint_data = irq_desc_get_handler_data(desc);
- unsigned int virq;
+ int ret;
/* the first 4 eints have a simple 1 to 1 mapping */
- virq = irq_linear_revmap(eint_data->domains[data->hwirq], data->hwirq);
+ ret = generic_handle_domain_irq(eint_data->domains[data->hwirq], data->hwirq);
/* Something must be really wrong if an unmapped EINT is unmasked */
- BUG_ON(!virq);
-
- generic_handle_irq(virq);
+ BUG_ON(ret);
}
/* Handling of EINTs 0-3 on S3C2412 and S3C2413 */
@@ -290,16 +288,14 @@ static void s3c2412_demux_eint0_3(struct irq_desc *desc)
struct s3c24xx_eint_data *eint_data = irq_desc_get_handler_data(desc);
struct irq_data *data = irq_desc_get_irq_data(desc);
struct irq_chip *chip = irq_data_get_irq_chip(data);
- unsigned int virq;
+ int ret;
chained_irq_enter(chip, desc);
/* the first 4 eints have a simple 1 to 1 mapping */
- virq = irq_linear_revmap(eint_data->domains[data->hwirq], data->hwirq);
+ ret = generic_handle_domain_irq(eint_data->domains[data->hwirq], data->hwirq);
/* Something must be really wrong if an unmapped EINT is unmasked */
- BUG_ON(!virq);
-
- generic_handle_irq(virq);
+ BUG_ON(ret);
chained_irq_exit(chip, desc);
}
@@ -364,15 +360,14 @@ static inline void s3c24xx_demux_eint(struct irq_desc *desc,
pend &= range;
while (pend) {
- unsigned int virq, irq;
+ unsigned int irq;
+ int ret;
irq = __ffs(pend);
pend &= ~(1 << irq);
- virq = irq_linear_revmap(data->domains[irq], irq - offset);
+ ret = generic_handle_domain_irq(data->domains[irq], irq - offset);
/* Something is really wrong if an unmapped EINT is unmasked */
- BUG_ON(!virq);
-
- generic_handle_irq(virq);
+ BUG_ON(ret);
}
chained_irq_exit(chip, desc);
diff --git a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c
index 53e2a6412add..c5f95a1071ae 100644
--- a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c
+++ b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c
@@ -414,7 +414,7 @@ static void s3c64xx_eint_gpio_irq(struct irq_desc *desc)
unsigned int svc;
unsigned int group;
unsigned int pin;
- unsigned int virq;
+ int ret;
svc = readl(drvdata->virt_base + SERVICE_REG);
group = SVC_GROUP(svc);
@@ -431,14 +431,12 @@ static void s3c64xx_eint_gpio_irq(struct irq_desc *desc)
pin -= 8;
}
- virq = irq_linear_revmap(data->domains[group], pin);
+ ret = generic_handle_domain_irq(data->domains[group], pin);
/*
* Something must be really wrong if an unmapped EINT
* was unmasked...
*/
- BUG_ON(!virq);
-
- generic_handle_irq(virq);
+ BUG_ON(ret);
} while (1);
chained_irq_exit(chip, desc);
@@ -607,18 +605,17 @@ static inline void s3c64xx_irq_demux_eint(struct irq_desc *desc, u32 range)
pend &= range;
while (pend) {
- unsigned int virq, irq;
+ unsigned int irq;
+ int ret;
irq = fls(pend) - 1;
pend &= ~(1 << irq);
- virq = irq_linear_revmap(data->domains[irq], data->pins[irq]);
+ ret = generic_handle_domain_irq(data->domains[irq], data->pins[irq]);
/*
* Something must be really wrong if an unmapped EINT
* was unmasked...
*/
- BUG_ON(!virq);
-
- generic_handle_irq(virq);
+ BUG_ON(ret);
}
chained_irq_exit(chip, desc);
diff --git a/drivers/pinctrl/spear/pinctrl-plgpio.c b/drivers/pinctrl/spear/pinctrl-plgpio.c
index 1ebbc49b16f1..43bb334af1e1 100644
--- a/drivers/pinctrl/spear/pinctrl-plgpio.c
+++ b/drivers/pinctrl/spear/pinctrl-plgpio.c
@@ -400,8 +400,7 @@ static void plgpio_irq_handler(struct irq_desc *desc)
/* get correct irq line number */
pin = i * MAX_GPIO_PER_REG + pin;
- generic_handle_irq(
- irq_find_mapping(gc->irq.domain, pin));
+ generic_handle_domain_irq(gc->irq.domain, pin);
}
}
chained_irq_exit(irqchip, desc);
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
index dc8d39ae045b..862c84efb718 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
@@ -1149,11 +1149,9 @@ static void sunxi_pinctrl_irq_handler(struct irq_desc *desc)
if (val) {
int irqoffset;
- for_each_set_bit(irqoffset, &val, IRQ_PER_BANK) {
- int pin_irq = irq_find_mapping(pctl->domain,
- bank * IRQ_PER_BANK + irqoffset);
- generic_handle_irq(pin_irq);
- }
+ for_each_set_bit(irqoffset, &val, IRQ_PER_BANK)
+ generic_handle_domain_irq(pctl->domain,
+ bank * IRQ_PER_BANK + irqoffset);
}
chained_irq_exit(chip, desc);
@@ -1219,10 +1217,12 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev)
}
/*
- * We suppose that we won't have any more functions than pins,
- * we'll reallocate that later anyway
+ * Find an upper bound for the maximum number of functions: in
+ * the worst case we have gpio_in, gpio_out, irq and up to four
+ * special functions per pin, plus one entry for the sentinel.
+ * We'll reallocate that later anyway.
*/
- pctl->functions = kcalloc(pctl->ngroups,
+ pctl->functions = kcalloc(4 * pctl->ngroups + 4,
sizeof(*pctl->functions),
GFP_KERNEL);
if (!pctl->functions)
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 7d385c3b2239..d12db6c316ea 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -508,6 +508,7 @@ config THINKPAD_ACPI
depends on RFKILL || RFKILL = n
depends on ACPI_VIDEO || ACPI_VIDEO = n
depends on BACKLIGHT_CLASS_DEVICE
+ depends on I2C
select ACPI_PLATFORM_PROFILE
select HWMON
select NVRAM
@@ -691,6 +692,7 @@ config INTEL_HID_EVENT
tristate "INTEL HID Event"
depends on ACPI
depends on INPUT
+ depends on I2C
select INPUT_SPARSEKMAP
help
This driver provides support for the Intel HID Event hotkey interface.
@@ -742,6 +744,7 @@ config INTEL_VBTN
tristate "INTEL VIRTUAL BUTTON"
depends on ACPI
depends on INPUT
+ depends on I2C
select INPUT_SPARSEKMAP
help
This driver provides support for the Intel Virtual Button interface.
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 0cb927f0f301..a81dc4b191b7 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -41,6 +41,10 @@ static int wapf = -1;
module_param(wapf, uint, 0444);
MODULE_PARM_DESC(wapf, "WAPF value");
+static int tablet_mode_sw = -1;
+module_param(tablet_mode_sw, uint, 0444);
+MODULE_PARM_DESC(tablet_mode_sw, "Tablet mode detect: -1:auto 0:disable 1:kbd-dock 2:lid-flip");
+
static struct quirk_entry *quirks;
static bool asus_q500a_i8042_filter(unsigned char data, unsigned char str,
@@ -458,6 +462,15 @@ static const struct dmi_system_id asus_quirks[] = {
},
.driver_data = &quirk_asus_use_lid_flip_devid,
},
+ {
+ .callback = dmi_matched,
+ .ident = "ASUS TP200s / E205SA",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "E205SA"),
+ },
+ .driver_data = &quirk_asus_use_lid_flip_devid,
+ },
{},
};
@@ -477,6 +490,21 @@ static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver)
else
wapf = quirks->wapf;
+ switch (tablet_mode_sw) {
+ case 0:
+ quirks->use_kbd_dock_devid = false;
+ quirks->use_lid_flip_devid = false;
+ break;
+ case 1:
+ quirks->use_kbd_dock_devid = true;
+ quirks->use_lid_flip_devid = false;
+ break;
+ case 2:
+ quirks->use_kbd_dock_devid = false;
+ quirks->use_lid_flip_devid = true;
+ break;
+ }
+
if (quirks->i8042_filter) {
ret = i8042_install_filter(quirks->i8042_filter);
if (ret) {
diff --git a/drivers/platform/x86/dual_accel_detect.h b/drivers/platform/x86/dual_accel_detect.h
new file mode 100644
index 000000000000..a9eae17cc43d
--- /dev/null
+++ b/drivers/platform/x86/dual_accel_detect.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Helper code to detect 360 degree hinges (yoga) style 2-in-1 devices using 2 accelerometers
+ * to allow the OS to determine the angle between the display and the base of the device.
+ *
+ * On Windows these are read by a special HingeAngleService process which calls undocumented
+ * ACPI methods, to let the firmware know if the 2-in-1 is in tablet- or laptop-mode.
+ * The firmware may use this to disable the kbd and touchpad to avoid spurious input in
+ * tablet-mode as well as to report SW_TABLET_MODE info to the OS.
+ *
+ * Since Linux does not call these undocumented methods, the SW_TABLET_MODE info reported
+ * by various drivers/platform/x86 drivers is incorrect. These drivers use the detection
+ * code in this file to disable SW_TABLET_MODE reporting to avoid reporting broken info
+ * (instead userspace can derive the status itself by directly reading the 2 accels).
+ */
+
+#include <linux/acpi.h>
+#include <linux/i2c.h>
+
+static int dual_accel_i2c_resource_count(struct acpi_resource *ares, void *data)
+{
+ struct acpi_resource_i2c_serialbus *sb;
+ int *count = data;
+
+ if (i2c_acpi_get_i2c_resource(ares, &sb))
+ *count = *count + 1;
+
+ return 1;
+}
+
+static int dual_accel_i2c_client_count(struct acpi_device *adev)
+{
+ int ret, count = 0;
+ LIST_HEAD(r);
+
+ ret = acpi_dev_get_resources(adev, &r, dual_accel_i2c_resource_count, &count);
+ if (ret < 0)
+ return ret;
+
+ acpi_dev_free_resource_list(&r);
+ return count;
+}
+
+static bool dual_accel_detect_bosc0200(void)
+{
+ struct acpi_device *adev;
+ int count;
+
+ adev = acpi_dev_get_first_match_dev("BOSC0200", NULL, -1);
+ if (!adev)
+ return false;
+
+ count = dual_accel_i2c_client_count(adev);
+
+ acpi_dev_put(adev);
+
+ return count == 2;
+}
+
+static bool dual_accel_detect(void)
+{
+ /* Systems which use a pair of accels with KIOX010A / KIOX020A ACPI ids */
+ if (acpi_dev_present("KIOX010A", NULL, -1) &&
+ acpi_dev_present("KIOX020A", NULL, -1))
+ return true;
+
+ /* Systems which use a single DUAL250E ACPI device to model 2 accels */
+ if (acpi_dev_present("DUAL250E", NULL, -1))
+ return true;
+
+ /* Systems which use a single BOSC0200 ACPI device to model 2 accels */
+ if (dual_accel_detect_bosc0200())
+ return true;
+
+ return false;
+}
diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c
index fbb224a82e34..7f3a03f937f6 100644
--- a/drivers/platform/x86/gigabyte-wmi.c
+++ b/drivers/platform/x86/gigabyte-wmi.c
@@ -140,6 +140,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev)
}}
static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M S2H V2"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE V2"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"),
@@ -147,6 +148,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"),
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 UD"),
{ }
diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c
index e5fbe017f8e1..2e4e97a626a5 100644
--- a/drivers/platform/x86/intel-hid.c
+++ b/drivers/platform/x86/intel-hid.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/suspend.h>
+#include "dual_accel_detect.h"
/* When NOT in tablet mode, VGBS returns with the flag 0x40 */
#define TABLET_MODE_FLAG BIT(6)
@@ -122,6 +123,7 @@ struct intel_hid_priv {
struct input_dev *array;
struct input_dev *switches;
bool wakeup_mode;
+ bool dual_accel;
};
#define HID_EVENT_FILTER_UUID "eeec56b3-4442-408f-a792-4edd4d758054"
@@ -451,22 +453,9 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
* SW_TABLET_MODE report, in these cases we enable support when receiving
* the first event instead of during driver setup.
*
- * Some 360 degree hinges (yoga) style 2-in-1 devices use 2 accelerometers
- * to allow the OS to determine the angle between the display and the base
- * of the device. On Windows these are read by a special HingeAngleService
- * process which calls an ACPI DSM (Device Specific Method) on the
- * ACPI KIOX010A device node for the sensor in the display, to let the
- * firmware know if the 2-in-1 is in tablet- or laptop-mode so that it can
- * disable the kbd and touchpad to avoid spurious input in tablet-mode.
- *
- * The linux kxcjk1013 driver calls the DSM for this once at probe time
- * to ensure that the builtin kbd and touchpad work. On some devices this
- * causes a "spurious" 0xcd event on the intel-hid ACPI dev. In this case
- * there is not a functional tablet-mode switch, so we should not register
- * the tablet-mode switch device.
+ * See dual_accel_detect.h for more info on the dual_accel check.
*/
- if (!priv->switches && (event == 0xcc || event == 0xcd) &&
- !acpi_dev_present("KIOX010A", NULL, -1)) {
+ if (!priv->switches && !priv->dual_accel && (event == 0xcc || event == 0xcd)) {
dev_info(&device->dev, "switch event received, enable switches supports\n");
err = intel_hid_switches_setup(device);
if (err)
@@ -607,6 +596,8 @@ static int intel_hid_probe(struct platform_device *device)
return -ENOMEM;
dev_set_drvdata(&device->dev, priv);
+ priv->dual_accel = dual_accel_detect();
+
err = intel_hid_input_setup(device);
if (err) {
pr_err("Failed to setup Intel HID hotkeys\n");
diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c
index 888a764efad1..309166431063 100644
--- a/drivers/platform/x86/intel-vbtn.c
+++ b/drivers/platform/x86/intel-vbtn.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/suspend.h>
+#include "dual_accel_detect.h"
/* Returned when NOT in tablet mode on some HP Stream x360 11 models */
#define VGBS_TABLET_MODE_FLAG_ALT 0x10
@@ -66,6 +67,7 @@ static const struct key_entry intel_vbtn_switchmap[] = {
struct intel_vbtn_priv {
struct input_dev *buttons_dev;
struct input_dev *switches_dev;
+ bool dual_accel;
bool has_buttons;
bool has_switches;
bool wakeup_mode;
@@ -160,6 +162,10 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
input_dev = priv->buttons_dev;
} else if ((ke = sparse_keymap_entry_from_scancode(priv->switches_dev, event))) {
if (!priv->has_switches) {
+ /* See dual_accel_detect.h for more info */
+ if (priv->dual_accel)
+ return;
+
dev_info(&device->dev, "Registering Intel Virtual Switches input-dev after receiving a switch event\n");
ret = input_register_device(priv->switches_dev);
if (ret)
@@ -248,11 +254,15 @@ static const struct dmi_system_id dmi_switches_allow_list[] = {
{} /* Array terminator */
};
-static bool intel_vbtn_has_switches(acpi_handle handle)
+static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel)
{
unsigned long long vgbs;
acpi_status status;
+ /* See dual_accel_detect.h for more info */
+ if (dual_accel)
+ return false;
+
if (!dmi_check_system(dmi_switches_allow_list))
return false;
@@ -263,13 +273,14 @@ static bool intel_vbtn_has_switches(acpi_handle handle)
static int intel_vbtn_probe(struct platform_device *device)
{
acpi_handle handle = ACPI_HANDLE(&device->dev);
- bool has_buttons, has_switches;
+ bool dual_accel, has_buttons, has_switches;
struct intel_vbtn_priv *priv;
acpi_status status;
int err;
+ dual_accel = dual_accel_detect();
has_buttons = acpi_has_method(handle, "VBDL");
- has_switches = intel_vbtn_has_switches(handle);
+ has_switches = intel_vbtn_has_switches(handle, dual_accel);
if (!has_buttons && !has_switches) {
dev_warn(&device->dev, "failed to read Intel Virtual Button driver\n");
@@ -281,6 +292,7 @@ static int intel_vbtn_probe(struct platform_device *device)
return -ENOMEM;
dev_set_drvdata(&device->dev, priv);
+ priv->dual_accel = dual_accel;
priv->has_buttons = has_buttons;
priv->has_switches = has_switches;
diff --git a/drivers/platform/x86/pcengines-apuv2.c b/drivers/platform/x86/pcengines-apuv2.c
index c37349f97bb8..d063d91db9bc 100644
--- a/drivers/platform/x86/pcengines-apuv2.c
+++ b/drivers/platform/x86/pcengines-apuv2.c
@@ -94,6 +94,7 @@ static struct gpiod_lookup_table gpios_led_table = {
NULL, 1, GPIO_ACTIVE_LOW),
GPIO_LOOKUP_IDX(AMD_FCH_GPIO_DRIVER_NAME, APU2_GPIO_LINE_LED3,
NULL, 2, GPIO_ACTIVE_LOW),
+ {} /* Terminating entry */
}
};
@@ -123,6 +124,7 @@ static struct gpiod_lookup_table gpios_key_table = {
.table = {
GPIO_LOOKUP_IDX(AMD_FCH_GPIO_DRIVER_NAME, APU2_GPIO_LINE_MODESW,
NULL, 0, GPIO_ACTIVE_LOW),
+ {} /* Terminating entry */
}
};
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 603156a6e3ed..50ff04c84650 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -73,6 +73,7 @@
#include <linux/uaccess.h>
#include <acpi/battery.h>
#include <acpi/video.h>
+#include "dual_accel_detect.h"
/* ThinkPad CMOS commands */
#define TP_CMOS_VOLUME_DOWN 0
@@ -3232,7 +3233,7 @@ static int hotkey_init_tablet_mode(void)
* the laptop/tent/tablet mode to the EC. The bmc150 iio driver
* does not support this, so skip the hotkey on these models.
*/
- if (has_tablet_mode && !acpi_dev_present("BOSC0200", "1", -1))
+ if (has_tablet_mode && !dual_accel_detect())
tp_features.hotkey_tablet = TP_HOTKEY_TABLET_USES_GMMS;
type = "GMMS";
} else if (acpi_evalf(hkey_handle, &res, "MHKG", "qd")) {
diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig
index 4d1192062508..4b563db3ab3e 100644
--- a/drivers/power/reset/Kconfig
+++ b/drivers/power/reset/Kconfig
@@ -204,6 +204,12 @@ config POWER_RESET_ST
help
Reset support for STMicroelectronics boards.
+config POWER_RESET_TPS65086
+ bool "TPS65086 restart driver"
+ depends on MFD_TPS65086
+ help
+ This driver adds support for resetting the TPS65086 PMIC on restart.
+
config POWER_RESET_VERSATILE
bool "ARM Versatile family reboot driver"
depends on ARM
diff --git a/drivers/power/reset/Makefile b/drivers/power/reset/Makefile
index cf3f4d02d8a5..f606a2f60539 100644
--- a/drivers/power/reset/Makefile
+++ b/drivers/power/reset/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_POWER_RESET_QNAP) += qnap-poweroff.o
obj-$(CONFIG_POWER_RESET_REGULATOR) += regulator-poweroff.o
obj-$(CONFIG_POWER_RESET_RESTART) += restart-poweroff.o
obj-$(CONFIG_POWER_RESET_ST) += st-poweroff.o
+obj-$(CONFIG_POWER_RESET_TPS65086) += tps65086-restart.o
obj-$(CONFIG_POWER_RESET_VERSATILE) += arm-versatile-reboot.o
obj-$(CONFIG_POWER_RESET_VEXPRESS) += vexpress-poweroff.o
obj-$(CONFIG_POWER_RESET_XGENE) += xgene-reboot.o
diff --git a/drivers/power/reset/linkstation-poweroff.c b/drivers/power/reset/linkstation-poweroff.c
index f1e843df0e16..02f5fdb8ffc4 100644
--- a/drivers/power/reset/linkstation-poweroff.c
+++ b/drivers/power/reset/linkstation-poweroff.c
@@ -19,6 +19,7 @@
#define MII_MARVELL_PHY_PAGE 22
#define MII_PHY_LED_CTRL 16
+#define MII_PHY_LED_POL_CTRL 17
#define MII_88E1318S_PHY_LED_TCR 18
#define MII_88E1318S_PHY_WOL_CTRL 16
#define MII_M1011_IEVENT 19
@@ -29,11 +30,23 @@
#define LED2_FORCE_ON (0x8 << 8)
#define LEDMASK GENMASK(11,8)
+#define MII_88E1318S_PHY_LED_POL_LED2 BIT(4)
+
+struct power_off_cfg {
+ char *mdio_node_name;
+ void (*phy_set_reg)(bool restart);
+};
+
static struct phy_device *phydev;
+static const struct power_off_cfg *cfg;
-static void mvphy_reg_intn(u16 data)
+static void linkstation_mvphy_reg_intn(bool restart)
{
int rc = 0, saved_page;
+ u16 data = 0;
+
+ if (restart)
+ data = MII_88E1318S_PHY_LED_TCR_FORCE_INT;
saved_page = phy_select_page(phydev, MII_MARVELL_LED_PAGE);
if (saved_page < 0)
@@ -66,11 +79,52 @@ err:
dev_err(&phydev->mdio.dev, "Write register failed, %d\n", rc);
}
+static void readynas_mvphy_set_reg(bool restart)
+{
+ int rc = 0, saved_page;
+ u16 data = 0;
+
+ if (restart)
+ data = MII_88E1318S_PHY_LED_POL_LED2;
+
+ saved_page = phy_select_page(phydev, MII_MARVELL_LED_PAGE);
+ if (saved_page < 0)
+ goto err;
+
+ /* Set the LED[2].0 Polarity bit to the required state */
+ __phy_modify(phydev, MII_PHY_LED_POL_CTRL,
+ MII_88E1318S_PHY_LED_POL_LED2, data);
+
+ if (!data) {
+ /* If WOL was enabled and a magic packet was received before powering
+ * off, we won't be able to wake up by sending another magic packet.
+ * Clear WOL status.
+ */
+ __phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_MARVELL_WOL_PAGE);
+ __phy_set_bits(phydev, MII_88E1318S_PHY_WOL_CTRL,
+ MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS);
+ }
+err:
+ rc = phy_restore_page(phydev, saved_page, rc);
+ if (rc < 0)
+ dev_err(&phydev->mdio.dev, "Write register failed, %d\n", rc);
+}
+
+static const struct power_off_cfg linkstation_power_off_cfg = {
+ .mdio_node_name = "mdio",
+ .phy_set_reg = linkstation_mvphy_reg_intn,
+};
+
+static const struct power_off_cfg readynas_power_off_cfg = {
+ .mdio_node_name = "mdio-bus",
+ .phy_set_reg = readynas_mvphy_set_reg,
+};
+
static int linkstation_reboot_notifier(struct notifier_block *nb,
unsigned long action, void *unused)
{
if (action == SYS_RESTART)
- mvphy_reg_intn(MII_88E1318S_PHY_LED_TCR_FORCE_INT);
+ cfg->phy_set_reg(true);
return NOTIFY_DONE;
}
@@ -82,14 +136,21 @@ static struct notifier_block linkstation_reboot_nb = {
static void linkstation_poweroff(void)
{
unregister_reboot_notifier(&linkstation_reboot_nb);
- mvphy_reg_intn(0);
+ cfg->phy_set_reg(false);
kernel_restart("Power off");
}
static const struct of_device_id ls_poweroff_of_match[] = {
- { .compatible = "buffalo,ls421d" },
- { .compatible = "buffalo,ls421de" },
+ { .compatible = "buffalo,ls421d",
+ .data = &linkstation_power_off_cfg,
+ },
+ { .compatible = "buffalo,ls421de",
+ .data = &linkstation_power_off_cfg,
+ },
+ { .compatible = "netgear,readynas-duo-v2",
+ .data = &readynas_power_off_cfg,
+ },
{ },
};
@@ -97,13 +158,17 @@ static int __init linkstation_poweroff_init(void)
{
struct mii_bus *bus;
struct device_node *dn;
+ const struct of_device_id *match;
dn = of_find_matching_node(NULL, ls_poweroff_of_match);
if (!dn)
return -ENODEV;
of_node_put(dn);
- dn = of_find_node_by_name(NULL, "mdio");
+ match = of_match_node(ls_poweroff_of_match, dn);
+ cfg = match->data;
+
+ dn = of_find_node_by_name(NULL, cfg->mdio_node_name);
if (!dn)
return -ENODEV;
diff --git a/drivers/power/reset/tps65086-restart.c b/drivers/power/reset/tps65086-restart.c
new file mode 100644
index 000000000000..78b89f745a3d
--- /dev/null
+++ b/drivers/power/reset/tps65086-restart.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Emil Renner Berthing
+ */
+
+#include <linux/mfd/tps65086.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/reboot.h>
+
+struct tps65086_restart {
+ struct notifier_block handler;
+ struct device *dev;
+};
+
+static int tps65086_restart_notify(struct notifier_block *this,
+ unsigned long mode, void *cmd)
+{
+ struct tps65086_restart *tps65086_restart =
+ container_of(this, struct tps65086_restart, handler);
+ struct tps65086 *tps65086 = dev_get_drvdata(tps65086_restart->dev->parent);
+ int ret;
+
+ ret = regmap_write(tps65086->regmap, TPS65086_FORCESHUTDN, 1);
+ if (ret) {
+ dev_err(tps65086_restart->dev, "%s: error writing to tps65086 pmic: %d\n",
+ __func__, ret);
+ return NOTIFY_DONE;
+ }
+
+ /* give it a little time */
+ mdelay(200);
+
+ WARN_ON(1);
+
+ return NOTIFY_DONE;
+}
+
+static int tps65086_restart_probe(struct platform_device *pdev)
+{
+ struct tps65086_restart *tps65086_restart;
+ int ret;
+
+ tps65086_restart = devm_kzalloc(&pdev->dev, sizeof(*tps65086_restart), GFP_KERNEL);
+ if (!tps65086_restart)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, tps65086_restart);
+
+ tps65086_restart->handler.notifier_call = tps65086_restart_notify;
+ tps65086_restart->handler.priority = 192;
+ tps65086_restart->dev = &pdev->dev;
+
+ ret = register_restart_handler(&tps65086_restart->handler);
+ if (ret) {
+ dev_err(&pdev->dev, "%s: cannot register restart handler: %d\n",
+ __func__, ret);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static int tps65086_restart_remove(struct platform_device *pdev)
+{
+ struct tps65086_restart *tps65086_restart = platform_get_drvdata(pdev);
+ int ret;
+
+ ret = unregister_restart_handler(&tps65086_restart->handler);
+ if (ret) {
+ dev_err(&pdev->dev, "%s: cannot unregister restart handler: %d\n",
+ __func__, ret);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static const struct platform_device_id tps65086_restart_id_table[] = {
+ { "tps65086-reset", },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(platform, tps65086_restart_id_table);
+
+static struct platform_driver tps65086_restart_driver = {
+ .driver = {
+ .name = "tps65086-restart",
+ },
+ .probe = tps65086_restart_probe,
+ .remove = tps65086_restart_remove,
+ .id_table = tps65086_restart_id_table,
+};
+module_platform_driver(tps65086_restart_driver);
+
+MODULE_AUTHOR("Emil Renner Berthing <kernel@esmil.dk>");
+MODULE_DESCRIPTION("TPS65086 restart driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig
index 11f5368e810e..fcc7534edcb2 100644
--- a/drivers/power/supply/Kconfig
+++ b/drivers/power/supply/Kconfig
@@ -358,7 +358,7 @@ config AXP288_CHARGER
config AXP288_FUEL_GAUGE
tristate "X-Powers AXP288 Fuel Gauge"
- depends on MFD_AXP20X && IIO
+ depends on MFD_AXP20X && IIO && IOSF_MBI
help
Say yes here to have support for X-Power power management IC (PMIC)
Fuel Gauge. The device provides battery statistics and status
@@ -577,6 +577,17 @@ config CHARGER_MP2629
Battery charger. This driver provides Battery charger power management
functions on the systems.
+config CHARGER_MT6360
+ tristate "Mediatek MT6360 Charger Driver"
+ depends on MFD_MT6360
+ depends on REGULATOR
+ select LINEAR_RANGES
+ help
+ Say Y here to enable MT6360 Charger Part.
+ The device supports High-Accuracy Voltage/Current Regulation,
+ Average Input Current Regulation, Battery Temperature Sensing,
+ Over-Temperature Protection, DPDM Detection for BC1.2.
+
config CHARGER_QCOM_SMBB
tristate "Qualcomm Switch-Mode Battery Charger and Boost"
depends on MFD_SPMI_PMIC || COMPILE_TEST
@@ -669,6 +680,7 @@ config CHARGER_BQ256XX
config CHARGER_SMB347
tristate "Summit Microelectronics SMB3XX Battery Charger"
depends on I2C
+ depends on REGULATOR
select REGMAP_I2C
help
Say Y to include support for Summit Microelectronics SMB345,
@@ -736,6 +748,16 @@ config CHARGER_CROS_USBPD
what is connected to USB PD ports from the EC and converts
that into power_supply properties.
+config CHARGER_CROS_PCHG
+ tristate "ChromeOS EC based peripheral charger"
+ depends on MFD_CROS_EC_DEV
+ default MFD_CROS_EC_DEV
+ help
+ Say Y here to enable ChromeOS EC based peripheral charge driver.
+ This driver gets various information about the devices connected to
+ the peripheral charge ports from the EC and converts that into
+ power_supply properties.
+
config CHARGER_SC2731
tristate "Spreadtrum SC2731 charger driver"
depends on MFD_SC27XX_PMIC || COMPILE_TEST
@@ -782,6 +804,8 @@ config CHARGER_WILCO
config RN5T618_POWER
tristate "RN5T618 charger/fuel gauge support"
depends on MFD_RN5T618
+ depends on RN5T618_ADC
+ depends on IIO
help
Say Y here to have support for RN5T618 PMIC family fuel gauge and charger.
This driver can also be built as a module. If so, the module will be
diff --git a/drivers/power/supply/Makefile b/drivers/power/supply/Makefile
index 33059a91f60c..4e55a11aab79 100644
--- a/drivers/power/supply/Makefile
+++ b/drivers/power/supply/Makefile
@@ -60,7 +60,7 @@ obj-$(CONFIG_BATTERY_TWL4030_MADC) += twl4030_madc_battery.o
obj-$(CONFIG_CHARGER_88PM860X) += 88pm860x_charger.o
obj-$(CONFIG_CHARGER_PCF50633) += pcf50633-charger.o
obj-$(CONFIG_BATTERY_RX51) += rx51_battery.o
-obj-$(CONFIG_AB8500_BM) += ab8500_bmdata.o ab8500_charger.o ab8500_fg.o ab8500_btemp.o abx500_chargalg.o
+obj-$(CONFIG_AB8500_BM) += ab8500_bmdata.o ab8500_charger.o ab8500_fg.o ab8500_btemp.o ab8500_chargalg.o
obj-$(CONFIG_CHARGER_CPCAP) += cpcap-charger.o
obj-$(CONFIG_CHARGER_ISP1704) += isp1704_charger.o
obj-$(CONFIG_CHARGER_MAX8903) += max8903_charger.o
@@ -78,6 +78,7 @@ obj-$(CONFIG_CHARGER_MAX77693) += max77693_charger.o
obj-$(CONFIG_CHARGER_MAX8997) += max8997_charger.o
obj-$(CONFIG_CHARGER_MAX8998) += max8998_charger.o
obj-$(CONFIG_CHARGER_MP2629) += mp2629_charger.o
+obj-$(CONFIG_CHARGER_MT6360) += mt6360_charger.o
obj-$(CONFIG_CHARGER_QCOM_SMBB) += qcom_smbb.o
obj-$(CONFIG_CHARGER_BQ2415X) += bq2415x_charger.o
obj-$(CONFIG_CHARGER_BQ24190) += bq24190_charger.o
@@ -93,6 +94,7 @@ obj-$(CONFIG_CHARGER_TPS65217) += tps65217_charger.o
obj-$(CONFIG_AXP288_FUEL_GAUGE) += axp288_fuel_gauge.o
obj-$(CONFIG_AXP288_CHARGER) += axp288_charger.o
obj-$(CONFIG_CHARGER_CROS_USBPD) += cros_usbpd-charger.o
+obj-$(CONFIG_CHARGER_CROS_PCHG) += cros_peripheral_charger.o
obj-$(CONFIG_CHARGER_SC2731) += sc2731_charger.o
obj-$(CONFIG_FUEL_GAUGE_SC27XX) += sc27xx_fuel_gauge.o
obj-$(CONFIG_CHARGER_UCS1002) += ucs1002_power.o
diff --git a/drivers/power/supply/ab8500-bm.h b/drivers/power/supply/ab8500-bm.h
index 0c940571e5b0..d11405b7ee1a 100644
--- a/drivers/power/supply/ab8500-bm.h
+++ b/drivers/power/supply/ab8500-bm.h
@@ -269,43 +269,43 @@ enum bup_vch_sel {
/*
* ADC for the battery thermistor.
- * When using the ABx500_ADC_THERM_BATCTRL the battery ID resistor is combined
+ * When using the AB8500_ADC_THERM_BATCTRL the battery ID resistor is combined
* with a NTC resistor to both identify the battery and to measure its
* temperature. Different phone manufactures uses different techniques to both
* identify the battery and to read its temperature.
*/
-enum abx500_adc_therm {
- ABx500_ADC_THERM_BATCTRL,
- ABx500_ADC_THERM_BATTEMP,
+enum ab8500_adc_therm {
+ AB8500_ADC_THERM_BATCTRL,
+ AB8500_ADC_THERM_BATTEMP,
};
/**
- * struct abx500_res_to_temp - defines one point in a temp to res curve. To
+ * struct ab8500_res_to_temp - defines one point in a temp to res curve. To
* be used in battery packs that combines the identification resistor with a
* NTC resistor.
* @temp: battery pack temperature in Celsius
* @resist: NTC resistor net total resistance
*/
-struct abx500_res_to_temp {
+struct ab8500_res_to_temp {
int temp;
int resist;
};
/**
- * struct abx500_v_to_cap - Table for translating voltage to capacity
+ * struct ab8500_v_to_cap - Table for translating voltage to capacity
* @voltage: Voltage in mV
* @capacity: Capacity in percent
*/
-struct abx500_v_to_cap {
+struct ab8500_v_to_cap {
int voltage;
int capacity;
};
/* Forward declaration */
-struct abx500_fg;
+struct ab8500_fg;
/**
- * struct abx500_fg_parameters - Fuel gauge algorithm parameters, in seconds
+ * struct ab8500_fg_parameters - Fuel gauge algorithm parameters, in seconds
* if not specified
* @recovery_sleep_timer: Time between measurements while recovering
* @recovery_total_time: Total recovery time
@@ -333,7 +333,7 @@ struct abx500_fg;
* @pcut_max_restart: Max number of restarts
* @pcut_debounce_time: Sets battery debounce time
*/
-struct abx500_fg_parameters {
+struct ab8500_fg_parameters {
int recovery_sleep_timer;
int recovery_total_time;
int init_timer;
@@ -357,13 +357,13 @@ struct abx500_fg_parameters {
};
/**
- * struct abx500_charger_maximization - struct used by the board config.
+ * struct ab8500_charger_maximization - struct used by the board config.
* @use_maxi: Enable maximization for this battery type
* @maxi_chg_curr: Maximum charger current allowed
* @maxi_wait_cycles: cycles to wait before setting charger current
* @charger_curr_step delta between two charger current settings (mA)
*/
-struct abx500_maxim_parameters {
+struct ab8500_maxim_parameters {
bool ena_maxi;
int chg_curr;
int wait_cycles;
@@ -371,7 +371,7 @@ struct abx500_maxim_parameters {
};
/**
- * struct abx500_battery_type - different batteries supported
+ * struct ab8500_battery_type - different batteries supported
* @name: battery technology
* @resis_high: battery upper resistance limit
* @resis_low: battery lower resistance limit
@@ -400,7 +400,7 @@ struct abx500_maxim_parameters {
* @n_batres_tbl_elements number of elements in the batres_tbl
* @batres_tbl battery internal resistance vs temperature table
*/
-struct abx500_battery_type {
+struct ab8500_battery_type {
int name;
int resis_high;
int resis_low;
@@ -421,22 +421,22 @@ struct abx500_battery_type {
int low_high_vol_lvl;
int battery_resistance;
int n_temp_tbl_elements;
- const struct abx500_res_to_temp *r_to_t_tbl;
+ const struct ab8500_res_to_temp *r_to_t_tbl;
int n_v_cap_tbl_elements;
- const struct abx500_v_to_cap *v_to_cap_tbl;
+ const struct ab8500_v_to_cap *v_to_cap_tbl;
int n_batres_tbl_elements;
const struct batres_vs_temp *batres_tbl;
};
/**
- * struct abx500_bm_capacity_levels - abx500 capacity level data
+ * struct ab8500_bm_capacity_levels - ab8500 capacity level data
* @critical: critical capacity level in percent
* @low: low capacity level in percent
* @normal: normal capacity level in percent
* @high: high capacity level in percent
* @full: full capacity level in percent
*/
-struct abx500_bm_capacity_levels {
+struct ab8500_bm_capacity_levels {
int critical;
int low;
int normal;
@@ -445,13 +445,13 @@ struct abx500_bm_capacity_levels {
};
/**
- * struct abx500_bm_charger_parameters - Charger specific parameters
+ * struct ab8500_bm_charger_parameters - Charger specific parameters
* @usb_volt_max: maximum allowed USB charger voltage in mV
* @usb_curr_max: maximum allowed USB charger current in mA
* @ac_volt_max: maximum allowed AC charger voltage in mV
* @ac_curr_max: maximum allowed AC charger current in mA
*/
-struct abx500_bm_charger_parameters {
+struct ab8500_bm_charger_parameters {
int usb_volt_max;
int usb_curr_max;
int ac_volt_max;
@@ -459,7 +459,7 @@ struct abx500_bm_charger_parameters {
};
/**
- * struct abx500_bm_data - abx500 battery management data
+ * struct ab8500_bm_data - ab8500 battery management data
* @temp_under under this temp, charging is stopped
* @temp_low between this temp and temp_under charging is reduced
* @temp_high between this temp and temp_over charging is reduced
@@ -473,7 +473,7 @@ struct abx500_bm_charger_parameters {
* @bkup_bat_i current which we charge the backup battery with
* @no_maintenance indicates that maintenance charging is disabled
* @capacity_scaling indicates whether capacity scaling is to be used
- * @abx500_adc_therm placement of thermistor, batctrl or battemp adc
+ * @ab8500_adc_therm placement of thermistor, batctrl or battemp adc
* @chg_unknown_bat flag to enable charging of unknown batteries
* @enable_overshoot flag to enable VBAT overshoot control
* @auto_trig flag to enable auto adc trigger
@@ -494,7 +494,7 @@ struct abx500_bm_charger_parameters {
* @chg_params charger parameters
* @fg_params fuel gauge parameters
*/
-struct abx500_bm_data {
+struct ab8500_bm_data {
int temp_under;
int temp_low;
int temp_high;
@@ -511,7 +511,7 @@ struct abx500_bm_data {
bool chg_unknown_bat;
bool enable_overshoot;
bool auto_trig;
- enum abx500_adc_therm adc_therm;
+ enum ab8500_adc_therm adc_therm;
int fg_res;
int n_btypes;
int batt_id;
@@ -523,11 +523,11 @@ struct abx500_bm_data {
int n_chg_in_curr;
int *chg_output_curr;
int *chg_input_curr;
- const struct abx500_maxim_parameters *maxi;
- const struct abx500_bm_capacity_levels *cap_levels;
- struct abx500_battery_type *bat_type;
- const struct abx500_bm_charger_parameters *chg_params;
- const struct abx500_fg_parameters *fg_params;
+ const struct ab8500_maxim_parameters *maxi;
+ const struct ab8500_bm_capacity_levels *cap_levels;
+ struct ab8500_battery_type *bat_type;
+ const struct ab8500_bm_charger_parameters *chg_params;
+ const struct ab8500_fg_parameters *fg_params;
};
enum {
@@ -561,160 +561,7 @@ struct batres_vs_temp {
/* Forward declaration */
struct ab8500_fg;
-/**
- * struct ab8500_fg_parameters - Fuel gauge algorithm parameters, in seconds
- * if not specified
- * @recovery_sleep_timer: Time between measurements while recovering
- * @recovery_total_time: Total recovery time
- * @init_timer: Measurement interval during startup
- * @init_discard_time: Time we discard voltage measurement at startup
- * @init_total_time: Total init time during startup
- * @high_curr_time: Time current has to be high to go to recovery
- * @accu_charging: FG accumulation time while charging
- * @accu_high_curr: FG accumulation time in high current mode
- * @high_curr_threshold: High current threshold, in mA
- * @lowbat_threshold: Low battery threshold, in mV
- * @battok_falling_th_sel0 Threshold in mV for battOk signal sel0
- * Resolution in 50 mV step.
- * @battok_raising_th_sel1 Threshold in mV for battOk signal sel1
- * Resolution in 50 mV step.
- * @user_cap_limit Capacity reported from user must be within this
- * limit to be considered as sane, in percentage
- * points.
- * @maint_thres This is the threshold where we stop reporting
- * battery full while in maintenance, in per cent
- * @pcut_enable: Enable power cut feature in ab8505
- * @pcut_max_time: Max time threshold
- * @pcut_flag_time: Flagtime threshold
- * @pcut_max_restart: Max number of restarts
- * @pcut_debunce_time: Sets battery debounce time
- */
-struct ab8500_fg_parameters {
- int recovery_sleep_timer;
- int recovery_total_time;
- int init_timer;
- int init_discard_time;
- int init_total_time;
- int high_curr_time;
- int accu_charging;
- int accu_high_curr;
- int high_curr_threshold;
- int lowbat_threshold;
- int battok_falling_th_sel0;
- int battok_raising_th_sel1;
- int user_cap_limit;
- int maint_thres;
- bool pcut_enable;
- u8 pcut_max_time;
- u8 pcut_flag_time;
- u8 pcut_max_restart;
- u8 pcut_debunce_time;
-};
-
-/**
- * struct ab8500_charger_maximization - struct used by the board config.
- * @use_maxi: Enable maximization for this battery type
- * @maxi_chg_curr: Maximum charger current allowed
- * @maxi_wait_cycles: cycles to wait before setting charger current
- * @charger_curr_step delta between two charger current settings (mA)
- */
-struct ab8500_maxim_parameters {
- bool ena_maxi;
- int chg_curr;
- int wait_cycles;
- int charger_curr_step;
-};
-
-/**
- * struct ab8500_bm_capacity_levels - ab8500 capacity level data
- * @critical: critical capacity level in percent
- * @low: low capacity level in percent
- * @normal: normal capacity level in percent
- * @high: high capacity level in percent
- * @full: full capacity level in percent
- */
-struct ab8500_bm_capacity_levels {
- int critical;
- int low;
- int normal;
- int high;
- int full;
-};
-
-/**
- * struct ab8500_bm_charger_parameters - Charger specific parameters
- * @usb_volt_max: maximum allowed USB charger voltage in mV
- * @usb_curr_max: maximum allowed USB charger current in mA
- * @ac_volt_max: maximum allowed AC charger voltage in mV
- * @ac_curr_max: maximum allowed AC charger current in mA
- */
-struct ab8500_bm_charger_parameters {
- int usb_volt_max;
- int usb_curr_max;
- int ac_volt_max;
- int ac_curr_max;
-};
-
-/**
- * struct ab8500_bm_data - ab8500 battery management data
- * @temp_under under this temp, charging is stopped
- * @temp_low between this temp and temp_under charging is reduced
- * @temp_high between this temp and temp_over charging is reduced
- * @temp_over over this temp, charging is stopped
- * @temp_interval_chg temperature measurement interval in s when charging
- * @temp_interval_nochg temperature measurement interval in s when not charging
- * @main_safety_tmr_h safety timer for main charger
- * @usb_safety_tmr_h safety timer for usb charger
- * @bkup_bat_v voltage which we charge the backup battery with
- * @bkup_bat_i current which we charge the backup battery with
- * @no_maintenance indicates that maintenance charging is disabled
- * @capacity_scaling indicates whether capacity scaling is to be used
- * @adc_therm placement of thermistor, batctrl or battemp adc
- * @chg_unknown_bat flag to enable charging of unknown batteries
- * @enable_overshoot flag to enable VBAT overshoot control
- * @fg_res resistance of FG resistor in 0.1mOhm
- * @n_btypes number of elements in array bat_type
- * @batt_id index of the identified battery in array bat_type
- * @interval_charging charge alg cycle period time when charging (sec)
- * @interval_not_charging charge alg cycle period time when not charging (sec)
- * @temp_hysteresis temperature hysteresis
- * @gnd_lift_resistance Battery ground to phone ground resistance (mOhm)
- * @maxi: maximization parameters
- * @cap_levels capacity in percent for the different capacity levels
- * @bat_type table of supported battery types
- * @chg_params charger parameters
- * @fg_params fuel gauge parameters
- */
-struct ab8500_bm_data {
- int temp_under;
- int temp_low;
- int temp_high;
- int temp_over;
- int temp_interval_chg;
- int temp_interval_nochg;
- int main_safety_tmr_h;
- int usb_safety_tmr_h;
- int bkup_bat_v;
- int bkup_bat_i;
- bool no_maintenance;
- bool capacity_scaling;
- bool chg_unknown_bat;
- bool enable_overshoot;
- enum abx500_adc_therm adc_therm;
- int fg_res;
- int n_btypes;
- int batt_id;
- int interval_charging;
- int interval_not_charging;
- int temp_hysteresis;
- int gnd_lift_resistance;
- const struct ab8500_maxim_parameters *maxi;
- const struct ab8500_bm_capacity_levels *cap_levels;
- const struct ab8500_bm_charger_parameters *chg_params;
- const struct ab8500_fg_parameters *fg_params;
-};
-
-extern struct abx500_bm_data ab8500_bm_data;
+extern struct ab8500_bm_data ab8500_bm_data;
void ab8500_charger_usb_state_changed(u8 bm_usb_state, u16 mA);
struct ab8500_fg *ab8500_fg_get(void);
@@ -725,10 +572,10 @@ int ab8500_fg_inst_curr_started(struct ab8500_fg *di);
int ab8500_fg_inst_curr_done(struct ab8500_fg *di);
int ab8500_bm_of_probe(struct device *dev,
struct device_node *np,
- struct abx500_bm_data *bm);
+ struct ab8500_bm_data *bm);
extern struct platform_driver ab8500_fg_driver;
extern struct platform_driver ab8500_btemp_driver;
-extern struct platform_driver abx500_chargalg_driver;
+extern struct platform_driver ab8500_chargalg_driver;
#endif /* _AB8500_CHARGER_H_ */
diff --git a/drivers/power/supply/ab8500_bmdata.c b/drivers/power/supply/ab8500_bmdata.c
index c2b8c0bb77e2..6f5fb794042c 100644
--- a/drivers/power/supply/ab8500_bmdata.c
+++ b/drivers/power/supply/ab8500_bmdata.c
@@ -2,8 +2,6 @@
#include <linux/export.h>
#include <linux/power_supply.h>
#include <linux/of.h>
-#include <linux/mfd/abx500.h>
-#include <linux/mfd/abx500/ab8500.h>
#include "ab8500-bm.h"
@@ -13,7 +11,7 @@
* Note that the res_to_temp table must be strictly sorted by falling resistance
* values to work.
*/
-const struct abx500_res_to_temp ab8500_temp_tbl_a_thermistor[] = {
+const struct ab8500_res_to_temp ab8500_temp_tbl_a_thermistor[] = {
{-5, 53407},
{ 0, 48594},
{ 5, 43804},
@@ -35,7 +33,7 @@ EXPORT_SYMBOL(ab8500_temp_tbl_a_thermistor);
const int ab8500_temp_tbl_a_size = ARRAY_SIZE(ab8500_temp_tbl_a_thermistor);
EXPORT_SYMBOL(ab8500_temp_tbl_a_size);
-const struct abx500_res_to_temp ab8500_temp_tbl_b_thermistor[] = {
+const struct ab8500_res_to_temp ab8500_temp_tbl_b_thermistor[] = {
{-5, 200000},
{ 0, 159024},
{ 5, 151921},
@@ -57,7 +55,7 @@ EXPORT_SYMBOL(ab8500_temp_tbl_b_thermistor);
const int ab8500_temp_tbl_b_size = ARRAY_SIZE(ab8500_temp_tbl_b_thermistor);
EXPORT_SYMBOL(ab8500_temp_tbl_b_size);
-static const struct abx500_v_to_cap cap_tbl_a_thermistor[] = {
+static const struct ab8500_v_to_cap cap_tbl_a_thermistor[] = {
{4171, 100},
{4114, 95},
{4009, 83},
@@ -80,7 +78,7 @@ static const struct abx500_v_to_cap cap_tbl_a_thermistor[] = {
{3247, 0},
};
-static const struct abx500_v_to_cap cap_tbl_b_thermistor[] = {
+static const struct ab8500_v_to_cap cap_tbl_b_thermistor[] = {
{4161, 100},
{4124, 98},
{4044, 90},
@@ -103,7 +101,7 @@ static const struct abx500_v_to_cap cap_tbl_b_thermistor[] = {
{3250, 0},
};
-static const struct abx500_v_to_cap cap_tbl[] = {
+static const struct ab8500_v_to_cap cap_tbl[] = {
{4186, 100},
{4163, 99},
{4114, 95},
@@ -134,7 +132,7 @@ static const struct abx500_v_to_cap cap_tbl[] = {
* Note that the res_to_temp table must be strictly sorted by falling
* resistance values to work.
*/
-static const struct abx500_res_to_temp temp_tbl[] = {
+static const struct ab8500_res_to_temp temp_tbl[] = {
{-5, 214834},
{ 0, 162943},
{ 5, 124820},
@@ -191,7 +189,7 @@ static const struct batres_vs_temp temp_to_batres_tbl_9100[] = {
{-20, 180},
};
-static struct abx500_battery_type bat_type_thermistor[] = {
+static struct ab8500_battery_type bat_type_thermistor[] = {
[BATTERY_UNKNOWN] = {
/* First element always represent the UNKNOWN battery */
.name = POWER_SUPPLY_TECHNOLOGY_UNKNOWN,
@@ -277,7 +275,7 @@ static struct abx500_battery_type bat_type_thermistor[] = {
},
};
-static struct abx500_battery_type bat_type_ext_thermistor[] = {
+static struct ab8500_battery_type bat_type_ext_thermistor[] = {
[BATTERY_UNKNOWN] = {
/* First element always represent the UNKNOWN battery */
.name = POWER_SUPPLY_TECHNOLOGY_UNKNOWN,
@@ -394,7 +392,7 @@ static struct abx500_battery_type bat_type_ext_thermistor[] = {
},
};
-static const struct abx500_bm_capacity_levels cap_levels = {
+static const struct ab8500_bm_capacity_levels cap_levels = {
.critical = 2,
.low = 10,
.normal = 70,
@@ -402,7 +400,7 @@ static const struct abx500_bm_capacity_levels cap_levels = {
.full = 100,
};
-static const struct abx500_fg_parameters fg = {
+static const struct ab8500_fg_parameters fg = {
.recovery_sleep_timer = 10,
.recovery_total_time = 100,
.init_timer = 1,
@@ -424,14 +422,14 @@ static const struct abx500_fg_parameters fg = {
.pcut_debounce_time = 2,
};
-static const struct abx500_maxim_parameters ab8500_maxi_params = {
+static const struct ab8500_maxim_parameters ab8500_maxi_params = {
.ena_maxi = true,
.chg_curr = 910,
.wait_cycles = 10,
.charger_curr_step = 100,
};
-static const struct abx500_bm_charger_parameters chg = {
+static const struct ab8500_bm_charger_parameters chg = {
.usb_volt_max = 5500,
.usb_curr_max = 1500,
.ac_volt_max = 7500,
@@ -456,7 +454,7 @@ static int ab8500_charge_input_curr_map[] = {
700, 800, 900, 1000, 1100, 1300, 1400, 1500,
};
-struct abx500_bm_data ab8500_bm_data = {
+struct ab8500_bm_data ab8500_bm_data = {
.temp_under = 3,
.temp_low = 8,
.temp_high = 43,
@@ -469,7 +467,7 @@ struct abx500_bm_data ab8500_bm_data = {
.bkup_bat_i = BUP_ICH_SEL_150UA,
.no_maintenance = false,
.capacity_scaling = false,
- .adc_therm = ABx500_ADC_THERM_BATCTRL,
+ .adc_therm = AB8500_ADC_THERM_BATCTRL,
.chg_unknown_bat = false,
.enable_overshoot = false,
.fg_res = 100,
@@ -492,7 +490,7 @@ struct abx500_bm_data ab8500_bm_data = {
int ab8500_bm_of_probe(struct device *dev,
struct device_node *np,
- struct abx500_bm_data *bm)
+ struct ab8500_bm_data *bm)
{
const struct batres_vs_temp *tmp_batres_tbl;
struct device_node *battery_node;
@@ -531,7 +529,7 @@ int ab8500_bm_of_probe(struct device *dev,
} else {
bm->n_btypes = 4;
bm->bat_type = bat_type_ext_thermistor;
- bm->adc_therm = ABx500_ADC_THERM_BATTEMP;
+ bm->adc_therm = AB8500_ADC_THERM_BATTEMP;
tmp_batres_tbl = temp_to_batres_tbl_ext_thermistor;
}
diff --git a/drivers/power/supply/ab8500_btemp.c b/drivers/power/supply/ab8500_btemp.c
index dbdcff32f353..b6c9111d77d7 100644
--- a/drivers/power/supply/ab8500_btemp.c
+++ b/drivers/power/supply/ab8500_btemp.c
@@ -27,6 +27,7 @@
#include <linux/mfd/abx500.h>
#include <linux/mfd/abx500/ab8500.h>
#include <linux/iio/consumer.h>
+#include <linux/fixp-arith.h>
#include "ab8500-bm.h"
@@ -102,7 +103,7 @@ struct ab8500_btemp {
struct iio_channel *btemp_ball;
struct iio_channel *bat_ctrl;
struct ab8500_fg *fg;
- struct abx500_bm_data *bm;
+ struct ab8500_bm_data *bm;
struct power_supply *btemp_psy;
struct ab8500_btemp_events events;
struct ab8500_btemp_ranges btemp_ranges;
@@ -144,7 +145,7 @@ static int ab8500_btemp_batctrl_volt_to_res(struct ab8500_btemp *di,
return (450000 * (v_batctrl)) / (1800 - v_batctrl);
}
- if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL) {
+ if (di->bm->adc_therm == AB8500_ADC_THERM_BATCTRL) {
/*
* If the battery has internal NTC, we use the current
* source to calculate the resistance.
@@ -206,7 +207,7 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di,
return 0;
/* Only do this for batteries with internal NTC */
- if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && enable) {
+ if (di->bm->adc_therm == AB8500_ADC_THERM_BATCTRL && enable) {
if (di->curr_source == BTEMP_BATCTRL_CURR_SRC_7UA)
curr = BAT_CTRL_7U_ENA;
@@ -239,7 +240,7 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di,
__func__);
goto disable_curr_source;
}
- } else if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && !enable) {
+ } else if (di->bm->adc_therm == AB8500_ADC_THERM_BATCTRL && !enable) {
dev_dbg(di->dev, "Disable BATCTRL curr source\n");
/* Write 0 to the curr bits */
@@ -417,7 +418,7 @@ static int ab8500_btemp_get_batctrl_res(struct ab8500_btemp *di)
* based on the NTC resistance.
*/
static int ab8500_btemp_res_to_temp(struct ab8500_btemp *di,
- const struct abx500_res_to_temp *tbl, int tbl_size, int res)
+ const struct ab8500_res_to_temp *tbl, int tbl_size, int res)
{
int i;
/*
@@ -437,8 +438,9 @@ static int ab8500_btemp_res_to_temp(struct ab8500_btemp *di,
i++;
}
- return tbl[i].temp + ((tbl[i + 1].temp - tbl[i].temp) *
- (res - tbl[i].resist)) / (tbl[i + 1].resist - tbl[i].resist);
+ return fixp_linear_interpolate(tbl[i].resist, tbl[i].temp,
+ tbl[i + 1].resist, tbl[i + 1].temp,
+ res);
}
/**
@@ -456,7 +458,7 @@ static int ab8500_btemp_measure_temp(struct ab8500_btemp *di)
id = di->bm->batt_id;
- if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL &&
+ if (di->bm->adc_therm == AB8500_ADC_THERM_BATCTRL &&
id != BATTERY_UNKNOWN) {
rbat = ab8500_btemp_get_batctrl_res(di);
@@ -525,7 +527,7 @@ static int ab8500_btemp_id(struct ab8500_btemp *di)
dev_dbg(di->dev, "Battery detected on %s"
" low %d < res %d < high: %d"
" index: %d\n",
- di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL ?
+ di->bm->adc_therm == AB8500_ADC_THERM_BATCTRL ?
"BATCTRL" : "BATTEMP",
di->bm->bat_type[i].resis_low, res,
di->bm->bat_type[i].resis_high, i);
@@ -545,7 +547,7 @@ static int ab8500_btemp_id(struct ab8500_btemp *di)
* We only have to change current source if the
* detected type is Type 1.
*/
- if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL &&
+ if (di->bm->adc_therm == AB8500_ADC_THERM_BATCTRL &&
di->bm->batt_id == 1) {
dev_dbg(di->dev, "Set BATCTRL current source to 20uA\n");
di->curr_source = BTEMP_BATCTRL_CURR_SRC_20UA;
diff --git a/drivers/power/supply/abx500_chargalg.c b/drivers/power/supply/ab8500_chargalg.c
index b72826cf6794..ff4b26b1ceca 100644
--- a/drivers/power/supply/abx500_chargalg.c
+++ b/drivers/power/supply/ab8500_chargalg.c
@@ -3,7 +3,7 @@
* Copyright (C) ST-Ericsson SA 2012
* Copyright (c) 2012 Sony Mobile Communications AB
*
- * Charging algorithm driver for abx500 variants
+ * Charging algorithm driver for AB8500
*
* Authors:
* Johan Palsson <johan.palsson@stericsson.com>
@@ -49,18 +49,18 @@
#define CHARGALG_CURR_STEP_LOW 0
#define CHARGALG_CURR_STEP_HIGH 100
-enum abx500_chargers {
+enum ab8500_chargers {
NO_CHG,
AC_CHG,
USB_CHG,
};
-struct abx500_chargalg_charger_info {
- enum abx500_chargers conn_chg;
- enum abx500_chargers prev_conn_chg;
- enum abx500_chargers online_chg;
- enum abx500_chargers prev_online_chg;
- enum abx500_chargers charger_type;
+struct ab8500_chargalg_charger_info {
+ enum ab8500_chargers conn_chg;
+ enum ab8500_chargers prev_conn_chg;
+ enum ab8500_chargers online_chg;
+ enum ab8500_chargers prev_online_chg;
+ enum ab8500_chargers charger_type;
bool usb_chg_ok;
bool ac_chg_ok;
int usb_volt;
@@ -73,18 +73,18 @@ struct abx500_chargalg_charger_info {
int ac_iset;
};
-struct abx500_chargalg_suspension_status {
+struct ab8500_chargalg_suspension_status {
bool suspended_change;
bool ac_suspended;
bool usb_suspended;
};
-struct abx500_chargalg_current_step_status {
+struct ab8500_chargalg_current_step_status {
bool curr_step_change;
int curr_step;
};
-struct abx500_chargalg_battery_data {
+struct ab8500_chargalg_battery_data {
int temp;
int volt;
int avg_curr;
@@ -92,7 +92,7 @@ struct abx500_chargalg_battery_data {
int percent;
};
-enum abx500_chargalg_states {
+enum ab8500_chargalg_states {
STATE_HANDHELD_INIT,
STATE_HANDHELD,
STATE_CHG_NOT_OK_INIT,
@@ -123,7 +123,7 @@ enum abx500_chargalg_states {
STATE_WD_EXPIRED,
};
-static const char *states[] = {
+static const char * const states[] = {
"HANDHELD_INIT",
"HANDHELD",
"CHG_NOT_OK_INIT",
@@ -154,7 +154,7 @@ static const char *states[] = {
"WD_EXPIRED",
};
-struct abx500_chargalg_events {
+struct ab8500_chargalg_events {
bool batt_unknown;
bool mainextchnotok;
bool batt_ovv;
@@ -176,7 +176,7 @@ struct abx500_chargalg_events {
};
/**
- * struct abx500_charge_curr_maximization - Charger maximization parameters
+ * struct ab8500_charge_curr_maximization - Charger maximization parameters
* @original_iset: the non optimized/maximised charger current
* @current_iset: the charging current used at this moment
* @test_delta_i: the delta between the current we want to charge and the
@@ -190,7 +190,7 @@ struct abx500_chargalg_events {
* @level: tells in how many steps the charging current has been
increased
*/
-struct abx500_charge_curr_maximization {
+struct ab8500_charge_curr_maximization {
int original_iset;
int current_iset;
int test_delta_i;
@@ -207,7 +207,7 @@ enum maxim_ret {
};
/**
- * struct abx500_chargalg - abx500 Charging algorithm device information
+ * struct ab8500_chargalg - ab8500 Charging algorithm device information
* @dev: pointer to the structure device
* @charge_status: battery operating status
* @eoc_cnt: counter used to determine end-of_charge
@@ -223,7 +223,7 @@ enum maxim_ret {
* @susp_status: current charger suspension status
* @bm: Platform specific battery management information
* @curr_status: Current step status for over-current protection
- * @parent: pointer to the struct abx500
+ * @parent: pointer to the struct ab8500
* @chargalg_psy: structure that holds the battery properties exposed by
* the charging algorithm
* @events: structure for information about events triggered
@@ -235,25 +235,25 @@ enum maxim_ret {
* @maintenance_timer: maintenance charging timer
* @chargalg_kobject: structure of type kobject
*/
-struct abx500_chargalg {
+struct ab8500_chargalg {
struct device *dev;
int charge_status;
int eoc_cnt;
bool maintenance_chg;
int t_hyst_norm;
int t_hyst_lowhigh;
- enum abx500_chargalg_states charge_state;
- struct abx500_charge_curr_maximization ccm;
- struct abx500_chargalg_charger_info chg_info;
- struct abx500_chargalg_battery_data batt_data;
- struct abx500_chargalg_suspension_status susp_status;
+ enum ab8500_chargalg_states charge_state;
+ struct ab8500_charge_curr_maximization ccm;
+ struct ab8500_chargalg_charger_info chg_info;
+ struct ab8500_chargalg_battery_data batt_data;
+ struct ab8500_chargalg_suspension_status susp_status;
struct ab8500 *parent;
- struct abx500_chargalg_current_step_status curr_status;
- struct abx500_bm_data *bm;
+ struct ab8500_chargalg_current_step_status curr_status;
+ struct ab8500_bm_data *bm;
struct power_supply *chargalg_psy;
struct ux500_charger *ac_chg;
struct ux500_charger *usb_chg;
- struct abx500_chargalg_events events;
+ struct ab8500_chargalg_events events;
struct workqueue_struct *chargalg_wq;
struct delayed_work chargalg_periodic_work;
struct delayed_work chargalg_wd_work;
@@ -267,28 +267,28 @@ struct abx500_chargalg {
BLOCKING_NOTIFIER_HEAD(charger_notifier_list);
/* Main battery properties */
-static enum power_supply_property abx500_chargalg_props[] = {
+static enum power_supply_property ab8500_chargalg_props[] = {
POWER_SUPPLY_PROP_STATUS,
POWER_SUPPLY_PROP_HEALTH,
};
-struct abx500_chargalg_sysfs_entry {
+struct ab8500_chargalg_sysfs_entry {
struct attribute attr;
- ssize_t (*show)(struct abx500_chargalg *, char *);
- ssize_t (*store)(struct abx500_chargalg *, const char *, size_t);
+ ssize_t (*show)(struct ab8500_chargalg *di, char *buf);
+ ssize_t (*store)(struct ab8500_chargalg *di, const char *buf, size_t length);
};
/**
- * abx500_chargalg_safety_timer_expired() - Expiration of the safety timer
+ * ab8500_chargalg_safety_timer_expired() - Expiration of the safety timer
* @timer: pointer to the hrtimer structure
*
* This function gets called when the safety timer for the charger
* expires
*/
static enum hrtimer_restart
-abx500_chargalg_safety_timer_expired(struct hrtimer *timer)
+ab8500_chargalg_safety_timer_expired(struct hrtimer *timer)
{
- struct abx500_chargalg *di = container_of(timer, struct abx500_chargalg,
+ struct ab8500_chargalg *di = container_of(timer, struct ab8500_chargalg,
safety_timer);
dev_err(di->dev, "Safety timer expired\n");
di->events.safety_timer_expired = true;
@@ -300,7 +300,7 @@ abx500_chargalg_safety_timer_expired(struct hrtimer *timer)
}
/**
- * abx500_chargalg_maintenance_timer_expired() - Expiration of
+ * ab8500_chargalg_maintenance_timer_expired() - Expiration of
* the maintenance timer
* @timer: pointer to the timer structure
*
@@ -308,10 +308,10 @@ abx500_chargalg_safety_timer_expired(struct hrtimer *timer)
* expires
*/
static enum hrtimer_restart
-abx500_chargalg_maintenance_timer_expired(struct hrtimer *timer)
+ab8500_chargalg_maintenance_timer_expired(struct hrtimer *timer)
{
- struct abx500_chargalg *di = container_of(timer, struct abx500_chargalg,
+ struct ab8500_chargalg *di = container_of(timer, struct ab8500_chargalg,
maintenance_timer);
dev_dbg(di->dev, "Maintenance timer expired\n");
@@ -324,13 +324,13 @@ abx500_chargalg_maintenance_timer_expired(struct hrtimer *timer)
}
/**
- * abx500_chargalg_state_to() - Change charge state
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_state_to() - Change charge state
+ * @di: pointer to the ab8500_chargalg structure
*
* This function gets called when a charge state change should occur
*/
-static void abx500_chargalg_state_to(struct abx500_chargalg *di,
- enum abx500_chargalg_states state)
+static void ab8500_chargalg_state_to(struct ab8500_chargalg *di,
+ enum ab8500_chargalg_states state)
{
dev_dbg(di->dev,
"State changed: %s (From state: [%d] %s =to=> [%d] %s )\n",
@@ -343,7 +343,7 @@ static void abx500_chargalg_state_to(struct abx500_chargalg *di,
di->charge_state = state;
}
-static int abx500_chargalg_check_charger_enable(struct abx500_chargalg *di)
+static int ab8500_chargalg_check_charger_enable(struct ab8500_chargalg *di)
{
switch (di->charge_state) {
case STATE_NORMAL:
@@ -368,13 +368,13 @@ static int abx500_chargalg_check_charger_enable(struct abx500_chargalg *di)
}
/**
- * abx500_chargalg_check_charger_connection() - Check charger connection change
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_check_charger_connection() - Check charger connection change
+ * @di: pointer to the ab8500_chargalg structure
*
* This function will check if there is a change in the charger connection
* and change charge state accordingly. AC has precedence over USB.
*/
-static int abx500_chargalg_check_charger_connection(struct abx500_chargalg *di)
+static int ab8500_chargalg_check_charger_connection(struct ab8500_chargalg *di)
{
if (di->chg_info.conn_chg != di->chg_info.prev_conn_chg ||
di->susp_status.suspended_change) {
@@ -387,23 +387,23 @@ static int abx500_chargalg_check_charger_connection(struct abx500_chargalg *di)
dev_dbg(di->dev, "Charging source is AC\n");
if (di->chg_info.charger_type != AC_CHG) {
di->chg_info.charger_type = AC_CHG;
- abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
+ ab8500_chargalg_state_to(di, STATE_NORMAL_INIT);
}
} else if ((di->chg_info.conn_chg & USB_CHG) &&
!di->susp_status.usb_suspended) {
dev_dbg(di->dev, "Charging source is USB\n");
di->chg_info.charger_type = USB_CHG;
- abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
+ ab8500_chargalg_state_to(di, STATE_NORMAL_INIT);
} else if (di->chg_info.conn_chg &&
(di->susp_status.ac_suspended ||
di->susp_status.usb_suspended)) {
dev_dbg(di->dev, "Charging is suspended\n");
di->chg_info.charger_type = NO_CHG;
- abx500_chargalg_state_to(di, STATE_SUSPENDED_INIT);
+ ab8500_chargalg_state_to(di, STATE_SUSPENDED_INIT);
} else {
dev_dbg(di->dev, "Charging source is OFF\n");
di->chg_info.charger_type = NO_CHG;
- abx500_chargalg_state_to(di, STATE_HANDHELD_INIT);
+ ab8500_chargalg_state_to(di, STATE_HANDHELD_INIT);
}
di->chg_info.prev_conn_chg = di->chg_info.conn_chg;
di->susp_status.suspended_change = false;
@@ -412,29 +412,29 @@ static int abx500_chargalg_check_charger_connection(struct abx500_chargalg *di)
}
/**
- * abx500_chargalg_check_current_step_status() - Check charging current
+ * ab8500_chargalg_check_current_step_status() - Check charging current
* step status.
- * @di: pointer to the abx500_chargalg structure
+ * @di: pointer to the ab8500_chargalg structure
*
* This function will check if there is a change in the charging current step
* and change charge state accordingly.
*/
-static void abx500_chargalg_check_current_step_status
- (struct abx500_chargalg *di)
+static void ab8500_chargalg_check_current_step_status
+ (struct ab8500_chargalg *di)
{
if (di->curr_status.curr_step_change)
- abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
+ ab8500_chargalg_state_to(di, STATE_NORMAL_INIT);
di->curr_status.curr_step_change = false;
}
/**
- * abx500_chargalg_start_safety_timer() - Start charging safety timer
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_start_safety_timer() - Start charging safety timer
+ * @di: pointer to the ab8500_chargalg structure
*
* The safety timer is used to avoid overcharging of old or bad batteries.
* There are different timers for AC and USB
*/
-static void abx500_chargalg_start_safety_timer(struct abx500_chargalg *di)
+static void ab8500_chargalg_start_safety_timer(struct ab8500_chargalg *di)
{
/* Charger-dependent expiration time in hours*/
int timer_expiration = 0;
@@ -461,27 +461,27 @@ static void abx500_chargalg_start_safety_timer(struct abx500_chargalg *di)
}
/**
- * abx500_chargalg_stop_safety_timer() - Stop charging safety timer
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_stop_safety_timer() - Stop charging safety timer
+ * @di: pointer to the ab8500_chargalg structure
*
* The safety timer is stopped whenever the NORMAL state is exited
*/
-static void abx500_chargalg_stop_safety_timer(struct abx500_chargalg *di)
+static void ab8500_chargalg_stop_safety_timer(struct ab8500_chargalg *di)
{
if (hrtimer_try_to_cancel(&di->safety_timer) >= 0)
di->events.safety_timer_expired = false;
}
/**
- * abx500_chargalg_start_maintenance_timer() - Start charging maintenance timer
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_start_maintenance_timer() - Start charging maintenance timer
+ * @di: pointer to the ab8500_chargalg structure
* @duration: duration of ther maintenance timer in hours
*
* The maintenance timer is used to maintain the charge in the battery once
* the battery is considered full. These timers are chosen to match the
* discharge curve of the battery
*/
-static void abx500_chargalg_start_maintenance_timer(struct abx500_chargalg *di,
+static void ab8500_chargalg_start_maintenance_timer(struct ab8500_chargalg *di,
int duration)
{
hrtimer_set_expires_range(&di->maintenance_timer,
@@ -492,26 +492,26 @@ static void abx500_chargalg_start_maintenance_timer(struct abx500_chargalg *di,
}
/**
- * abx500_chargalg_stop_maintenance_timer() - Stop maintenance timer
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_stop_maintenance_timer() - Stop maintenance timer
+ * @di: pointer to the ab8500_chargalg structure
*
* The maintenance timer is stopped whenever maintenance ends or when another
* state is entered
*/
-static void abx500_chargalg_stop_maintenance_timer(struct abx500_chargalg *di)
+static void ab8500_chargalg_stop_maintenance_timer(struct ab8500_chargalg *di)
{
if (hrtimer_try_to_cancel(&di->maintenance_timer) >= 0)
di->events.maintenance_timer_expired = false;
}
/**
- * abx500_chargalg_kick_watchdog() - Kick charger watchdog
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_kick_watchdog() - Kick charger watchdog
+ * @di: pointer to the ab8500_chargalg structure
*
* The charger watchdog have to be kicked periodically whenever the charger is
* on, else the ABB will reset the system
*/
-static int abx500_chargalg_kick_watchdog(struct abx500_chargalg *di)
+static int ab8500_chargalg_kick_watchdog(struct ab8500_chargalg *di)
{
/* Check if charger exists and kick watchdog if charging */
if (di->ac_chg && di->ac_chg->ops.kick_wd &&
@@ -526,8 +526,7 @@ static int abx500_chargalg_kick_watchdog(struct abx500_chargalg *di)
di->usb_chg->ops.kick_wd(di->usb_chg);
return di->ac_chg->ops.kick_wd(di->ac_chg);
- }
- else if (di->usb_chg && di->usb_chg->ops.kick_wd &&
+ } else if (di->usb_chg && di->usb_chg->ops.kick_wd &&
di->chg_info.online_chg & USB_CHG)
return di->usb_chg->ops.kick_wd(di->usb_chg);
@@ -535,8 +534,8 @@ static int abx500_chargalg_kick_watchdog(struct abx500_chargalg *di)
}
/**
- * abx500_chargalg_ac_en() - Turn on/off the AC charger
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_ac_en() - Turn on/off the AC charger
+ * @di: pointer to the ab8500_chargalg structure
* @enable: charger on/off
* @vset: requested charger output voltage
* @iset: requested charger output current
@@ -544,10 +543,10 @@ static int abx500_chargalg_kick_watchdog(struct abx500_chargalg *di)
* The AC charger will be turned on/off with the requested charge voltage and
* current
*/
-static int abx500_chargalg_ac_en(struct abx500_chargalg *di, int enable,
+static int ab8500_chargalg_ac_en(struct ab8500_chargalg *di, int enable,
int vset, int iset)
{
- static int abx500_chargalg_ex_ac_enable_toggle;
+ static int ab8500_chargalg_ex_ac_enable_toggle;
if (!di->ac_chg || !di->ac_chg->ops.enable)
return -ENXIO;
@@ -563,18 +562,18 @@ static int abx500_chargalg_ac_en(struct abx500_chargalg *di, int enable,
/* Enable external charger */
if (enable && di->ac_chg->external &&
- !abx500_chargalg_ex_ac_enable_toggle) {
+ !ab8500_chargalg_ex_ac_enable_toggle) {
blocking_notifier_call_chain(&charger_notifier_list,
0, di->dev);
- abx500_chargalg_ex_ac_enable_toggle++;
+ ab8500_chargalg_ex_ac_enable_toggle++;
}
return di->ac_chg->ops.enable(di->ac_chg, enable, vset, iset);
}
/**
- * abx500_chargalg_usb_en() - Turn on/off the USB charger
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_usb_en() - Turn on/off the USB charger
+ * @di: pointer to the ab8500_chargalg structure
* @enable: charger on/off
* @vset: requested charger output voltage
* @iset: requested charger output current
@@ -582,7 +581,7 @@ static int abx500_chargalg_ac_en(struct abx500_chargalg *di, int enable,
* The USB charger will be turned on/off with the requested charge voltage and
* current
*/
-static int abx500_chargalg_usb_en(struct abx500_chargalg *di, int enable,
+static int ab8500_chargalg_usb_en(struct ab8500_chargalg *di, int enable,
int vset, int iset)
{
if (!di->usb_chg || !di->usb_chg->ops.enable)
@@ -601,14 +600,14 @@ static int abx500_chargalg_usb_en(struct abx500_chargalg *di, int enable,
}
/**
- * abx500_chargalg_update_chg_curr() - Update charger current
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_update_chg_curr() - Update charger current
+ * @di: pointer to the ab8500_chargalg structure
* @iset: requested charger output current
*
* The charger output current will be updated for the charger
* that is currently in use
*/
-static int abx500_chargalg_update_chg_curr(struct abx500_chargalg *di,
+static int ab8500_chargalg_update_chg_curr(struct ab8500_chargalg *di,
int iset)
{
/* Check if charger exists and update current if charging */
@@ -642,19 +641,19 @@ static int abx500_chargalg_update_chg_curr(struct abx500_chargalg *di,
}
/**
- * abx500_chargalg_stop_charging() - Stop charging
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_stop_charging() - Stop charging
+ * @di: pointer to the ab8500_chargalg structure
*
* This function is called from any state where charging should be stopped.
* All charging is disabled and all status parameters and timers are changed
* accordingly
*/
-static void abx500_chargalg_stop_charging(struct abx500_chargalg *di)
+static void ab8500_chargalg_stop_charging(struct ab8500_chargalg *di)
{
- abx500_chargalg_ac_en(di, false, 0, 0);
- abx500_chargalg_usb_en(di, false, 0, 0);
- abx500_chargalg_stop_safety_timer(di);
- abx500_chargalg_stop_maintenance_timer(di);
+ ab8500_chargalg_ac_en(di, false, 0, 0);
+ ab8500_chargalg_usb_en(di, false, 0, 0);
+ ab8500_chargalg_stop_safety_timer(di);
+ ab8500_chargalg_stop_maintenance_timer(di);
di->charge_status = POWER_SUPPLY_STATUS_NOT_CHARGING;
di->maintenance_chg = false;
cancel_delayed_work(&di->chargalg_wd_work);
@@ -662,19 +661,19 @@ static void abx500_chargalg_stop_charging(struct abx500_chargalg *di)
}
/**
- * abx500_chargalg_hold_charging() - Pauses charging
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_hold_charging() - Pauses charging
+ * @di: pointer to the ab8500_chargalg structure
*
* This function is called in the case where maintenance charging has been
* disabled and instead a battery voltage mode is entered to check when the
* battery voltage has reached a certain recharge voltage
*/
-static void abx500_chargalg_hold_charging(struct abx500_chargalg *di)
+static void ab8500_chargalg_hold_charging(struct ab8500_chargalg *di)
{
- abx500_chargalg_ac_en(di, false, 0, 0);
- abx500_chargalg_usb_en(di, false, 0, 0);
- abx500_chargalg_stop_safety_timer(di);
- abx500_chargalg_stop_maintenance_timer(di);
+ ab8500_chargalg_ac_en(di, false, 0, 0);
+ ab8500_chargalg_usb_en(di, false, 0, 0);
+ ab8500_chargalg_stop_safety_timer(di);
+ ab8500_chargalg_stop_maintenance_timer(di);
di->charge_status = POWER_SUPPLY_STATUS_CHARGING;
di->maintenance_chg = false;
cancel_delayed_work(&di->chargalg_wd_work);
@@ -682,30 +681,30 @@ static void abx500_chargalg_hold_charging(struct abx500_chargalg *di)
}
/**
- * abx500_chargalg_start_charging() - Start the charger
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_start_charging() - Start the charger
+ * @di: pointer to the ab8500_chargalg structure
* @vset: requested charger output voltage
* @iset: requested charger output current
*
* A charger will be enabled depending on the requested charger type that was
* detected previously.
*/
-static void abx500_chargalg_start_charging(struct abx500_chargalg *di,
+static void ab8500_chargalg_start_charging(struct ab8500_chargalg *di,
int vset, int iset)
{
switch (di->chg_info.charger_type) {
case AC_CHG:
dev_dbg(di->dev,
"AC parameters: Vset %d, Ich %d\n", vset, iset);
- abx500_chargalg_usb_en(di, false, 0, 0);
- abx500_chargalg_ac_en(di, true, vset, iset);
+ ab8500_chargalg_usb_en(di, false, 0, 0);
+ ab8500_chargalg_ac_en(di, true, vset, iset);
break;
case USB_CHG:
dev_dbg(di->dev,
"USB parameters: Vset %d, Ich %d\n", vset, iset);
- abx500_chargalg_ac_en(di, false, 0, 0);
- abx500_chargalg_usb_en(di, true, vset, iset);
+ ab8500_chargalg_ac_en(di, false, 0, 0);
+ ab8500_chargalg_usb_en(di, true, vset, iset);
break;
default:
@@ -715,13 +714,13 @@ static void abx500_chargalg_start_charging(struct abx500_chargalg *di,
}
/**
- * abx500_chargalg_check_temp() - Check battery temperature ranges
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_check_temp() - Check battery temperature ranges
+ * @di: pointer to the ab8500_chargalg structure
*
* The battery temperature is checked against the predefined limits and the
* charge state is changed accordingly
*/
-static void abx500_chargalg_check_temp(struct abx500_chargalg *di)
+static void ab8500_chargalg_check_temp(struct ab8500_chargalg *di)
{
if (di->batt_data.temp > (di->bm->temp_low + di->t_hyst_norm) &&
di->batt_data.temp < (di->bm->temp_high - di->t_hyst_norm)) {
@@ -750,8 +749,8 @@ static void abx500_chargalg_check_temp(struct abx500_chargalg *di)
di->t_hyst_norm = 0;
di->t_hyst_lowhigh = di->bm->temp_hysteresis;
} else {
- /* Within hysteresis */
- dev_dbg(di->dev, "Within hysteresis limit temp: %d "
+ /* Within hysteresis */
+ dev_dbg(di->dev, "Within hysteresis limit temp: %d "
"hyst_lowhigh %d, hyst normal %d\n",
di->batt_data.temp, di->t_hyst_lowhigh,
di->t_hyst_norm);
@@ -760,12 +759,12 @@ static void abx500_chargalg_check_temp(struct abx500_chargalg *di)
}
/**
- * abx500_chargalg_check_charger_voltage() - Check charger voltage
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_check_charger_voltage() - Check charger voltage
+ * @di: pointer to the ab8500_chargalg structure
*
* Charger voltage is checked against maximum limit
*/
-static void abx500_chargalg_check_charger_voltage(struct abx500_chargalg *di)
+static void ab8500_chargalg_check_charger_voltage(struct ab8500_chargalg *di)
{
if (di->chg_info.usb_volt > di->bm->chg_params->usb_volt_max)
di->chg_info.usb_chg_ok = false;
@@ -780,14 +779,14 @@ static void abx500_chargalg_check_charger_voltage(struct abx500_chargalg *di)
}
/**
- * abx500_chargalg_end_of_charge() - Check if end-of-charge criteria is fulfilled
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_end_of_charge() - Check if end-of-charge criteria is fulfilled
+ * @di: pointer to the ab8500_chargalg structure
*
* End-of-charge criteria is fulfilled when the battery voltage is above a
* certain limit and the battery current is below a certain limit for a
* predefined number of consecutive seconds. If true, the battery is full
*/
-static void abx500_chargalg_end_of_charge(struct abx500_chargalg *di)
+static void ab8500_chargalg_end_of_charge(struct ab8500_chargalg *di)
{
if (di->charge_status == POWER_SUPPLY_STATUS_CHARGING &&
di->charge_state == STATE_NORMAL &&
@@ -815,7 +814,7 @@ static void abx500_chargalg_end_of_charge(struct abx500_chargalg *di)
}
}
-static void init_maxim_chg_curr(struct abx500_chargalg *di)
+static void init_maxim_chg_curr(struct ab8500_chargalg *di)
{
di->ccm.original_iset =
di->bm->bat_type[di->bm->batt_id].normal_cur_lvl;
@@ -828,15 +827,15 @@ static void init_maxim_chg_curr(struct abx500_chargalg *di)
}
/**
- * abx500_chargalg_chg_curr_maxim - increases the charger current to
+ * ab8500_chargalg_chg_curr_maxim - increases the charger current to
* compensate for the system load
- * @di pointer to the abx500_chargalg structure
+ * @di pointer to the ab8500_chargalg structure
*
* This maximization function is used to raise the charger current to get the
* battery current as close to the optimal value as possible. The battery
* current during charging is affected by the system load
*/
-static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di)
+static enum maxim_ret ab8500_chargalg_chg_curr_maxim(struct ab8500_chargalg *di)
{
int delta_i;
@@ -867,7 +866,7 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di)
di->ccm.wait_cnt = 0;
- if ((di->batt_data.inst_curr > di->ccm.original_iset)) {
+ if (di->batt_data.inst_curr > di->ccm.original_iset) {
dev_dbg(di->dev, " Maximization Ibat (%dmA) too high"
" (limit %dmA) (current iset: %dmA)!\n",
di->batt_data.inst_curr, di->ccm.original_iset,
@@ -908,21 +907,21 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di)
}
}
-static void handle_maxim_chg_curr(struct abx500_chargalg *di)
+static void handle_maxim_chg_curr(struct ab8500_chargalg *di)
{
enum maxim_ret ret;
int result;
- ret = abx500_chargalg_chg_curr_maxim(di);
+ ret = ab8500_chargalg_chg_curr_maxim(di);
switch (ret) {
case MAXIM_RET_CHANGE:
- result = abx500_chargalg_update_chg_curr(di,
+ result = ab8500_chargalg_update_chg_curr(di,
di->ccm.current_iset);
if (result)
dev_err(di->dev, "failed to set chg curr\n");
break;
case MAXIM_RET_IBAT_TOO_HIGH:
- result = abx500_chargalg_update_chg_curr(di,
+ result = ab8500_chargalg_update_chg_curr(di,
di->bm->bat_type[di->bm->batt_id].normal_cur_lvl);
if (result)
dev_err(di->dev, "failed to set chg curr\n");
@@ -935,12 +934,12 @@ static void handle_maxim_chg_curr(struct abx500_chargalg *di)
}
}
-static int abx500_chargalg_get_ext_psy_data(struct device *dev, void *data)
+static int ab8500_chargalg_get_ext_psy_data(struct device *dev, void *data)
{
struct power_supply *psy;
struct power_supply *ext = dev_get_drvdata(dev);
const char **supplicants = (const char **)ext->supplied_to;
- struct abx500_chargalg *di;
+ struct ab8500_chargalg *di;
union power_supply_propval ret;
int j;
bool capacity_updated = false;
@@ -1260,7 +1259,7 @@ static int abx500_chargalg_get_ext_psy_data(struct device *dev, void *data)
}
/**
- * abx500_chargalg_external_power_changed() - callback for power supply changes
+ * ab8500_chargalg_external_power_changed() - callback for power supply changes
* @psy: pointer to the structure power_supply
*
* This function is the entry point of the pointer external_power_changed
@@ -1268,26 +1267,27 @@ static int abx500_chargalg_get_ext_psy_data(struct device *dev, void *data)
* This function gets executed when there is a change in any external power
* supply that this driver needs to be notified of.
*/
-static void abx500_chargalg_external_power_changed(struct power_supply *psy)
+static void ab8500_chargalg_external_power_changed(struct power_supply *psy)
{
- struct abx500_chargalg *di = power_supply_get_drvdata(psy);
+ struct ab8500_chargalg *di = power_supply_get_drvdata(psy);
/*
* Trigger execution of the algorithm instantly and read
* all power_supply properties there instead
*/
- queue_work(di->chargalg_wq, &di->chargalg_work);
+ if (di->chargalg_wq)
+ queue_work(di->chargalg_wq, &di->chargalg_work);
}
/**
- * abx500_chargalg_algorithm() - Main function for the algorithm
- * @di: pointer to the abx500_chargalg structure
+ * ab8500_chargalg_algorithm() - Main function for the algorithm
+ * @di: pointer to the ab8500_chargalg structure
*
* This is the main control function for the charging algorithm.
* It is called periodically or when something happens that will
* trigger a state change
*/
-static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
+static void ab8500_chargalg_algorithm(struct ab8500_chargalg *di)
{
int charger_status;
int ret;
@@ -1295,17 +1295,17 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
/* Collect data from all power_supply class devices */
class_for_each_device(power_supply_class, NULL,
- di->chargalg_psy, abx500_chargalg_get_ext_psy_data);
+ di->chargalg_psy, ab8500_chargalg_get_ext_psy_data);
- abx500_chargalg_end_of_charge(di);
- abx500_chargalg_check_temp(di);
- abx500_chargalg_check_charger_voltage(di);
+ ab8500_chargalg_end_of_charge(di);
+ ab8500_chargalg_check_temp(di);
+ ab8500_chargalg_check_charger_voltage(di);
- charger_status = abx500_chargalg_check_charger_connection(di);
- abx500_chargalg_check_current_step_status(di);
+ charger_status = ab8500_chargalg_check_charger_connection(di);
+ ab8500_chargalg_check_current_step_status(di);
if (is_ab8500(di->parent)) {
- ret = abx500_chargalg_check_charger_enable(di);
+ ret = ab8500_chargalg_check_charger_enable(di);
if (ret < 0)
dev_err(di->dev, "Checking charger is enabled error"
": Returned Value %d\n", ret);
@@ -1320,7 +1320,7 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
(di->events.batt_unknown && !di->bm->chg_unknown_bat)) {
if (di->charge_state != STATE_HANDHELD) {
di->events.safety_timer_expired = false;
- abx500_chargalg_state_to(di, STATE_HANDHELD_INIT);
+ ab8500_chargalg_state_to(di, STATE_HANDHELD_INIT);
}
}
@@ -1333,7 +1333,7 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
/* Safety timer expiration */
else if (di->events.safety_timer_expired) {
if (di->charge_state != STATE_SAFETY_TIMER_EXPIRED)
- abx500_chargalg_state_to(di,
+ ab8500_chargalg_state_to(di,
STATE_SAFETY_TIMER_EXPIRED_INIT);
}
/*
@@ -1344,7 +1344,7 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
/* Battery removed */
else if (di->events.batt_rem) {
if (di->charge_state != STATE_BATT_REMOVED)
- abx500_chargalg_state_to(di, STATE_BATT_REMOVED_INIT);
+ ab8500_chargalg_state_to(di, STATE_BATT_REMOVED_INIT);
}
/* Main or USB charger not ok. */
else if (di->events.mainextchnotok || di->events.usbchargernotok) {
@@ -1354,7 +1354,7 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
*/
if (di->charge_state != STATE_CHG_NOT_OK &&
!di->events.vbus_collapsed)
- abx500_chargalg_state_to(di, STATE_CHG_NOT_OK_INIT);
+ ab8500_chargalg_state_to(di, STATE_CHG_NOT_OK_INIT);
}
/* VBUS, Main or VBAT OVV. */
else if (di->events.vbus_ovv ||
@@ -1363,31 +1363,31 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
!di->chg_info.usb_chg_ok ||
!di->chg_info.ac_chg_ok) {
if (di->charge_state != STATE_OVV_PROTECT)
- abx500_chargalg_state_to(di, STATE_OVV_PROTECT_INIT);
+ ab8500_chargalg_state_to(di, STATE_OVV_PROTECT_INIT);
}
/* USB Thermal, stop charging */
else if (di->events.main_thermal_prot ||
di->events.usb_thermal_prot) {
if (di->charge_state != STATE_HW_TEMP_PROTECT)
- abx500_chargalg_state_to(di,
+ ab8500_chargalg_state_to(di,
STATE_HW_TEMP_PROTECT_INIT);
}
/* Battery temp over/under */
else if (di->events.btemp_underover) {
if (di->charge_state != STATE_TEMP_UNDEROVER)
- abx500_chargalg_state_to(di,
+ ab8500_chargalg_state_to(di,
STATE_TEMP_UNDEROVER_INIT);
}
/* Watchdog expired */
else if (di->events.ac_wd_expired ||
di->events.usb_wd_expired) {
if (di->charge_state != STATE_WD_EXPIRED)
- abx500_chargalg_state_to(di, STATE_WD_EXPIRED_INIT);
+ ab8500_chargalg_state_to(di, STATE_WD_EXPIRED_INIT);
}
/* Battery temp high/low */
else if (di->events.btemp_lowhigh) {
if (di->charge_state != STATE_TEMP_LOWHIGH)
- abx500_chargalg_state_to(di, STATE_TEMP_LOWHIGH_INIT);
+ ab8500_chargalg_state_to(di, STATE_TEMP_LOWHIGH_INIT);
}
dev_dbg(di->dev,
@@ -1419,9 +1419,9 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
switch (di->charge_state) {
case STATE_HANDHELD_INIT:
- abx500_chargalg_stop_charging(di);
+ ab8500_chargalg_stop_charging(di);
di->charge_status = POWER_SUPPLY_STATUS_DISCHARGING;
- abx500_chargalg_state_to(di, STATE_HANDHELD);
+ ab8500_chargalg_state_to(di, STATE_HANDHELD);
fallthrough;
case STATE_HANDHELD:
@@ -1429,14 +1429,14 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
case STATE_SUSPENDED_INIT:
if (di->susp_status.ac_suspended)
- abx500_chargalg_ac_en(di, false, 0, 0);
+ ab8500_chargalg_ac_en(di, false, 0, 0);
if (di->susp_status.usb_suspended)
- abx500_chargalg_usb_en(di, false, 0, 0);
- abx500_chargalg_stop_safety_timer(di);
- abx500_chargalg_stop_maintenance_timer(di);
+ ab8500_chargalg_usb_en(di, false, 0, 0);
+ ab8500_chargalg_stop_safety_timer(di);
+ ab8500_chargalg_stop_maintenance_timer(di);
di->charge_status = POWER_SUPPLY_STATUS_NOT_CHARGING;
di->maintenance_chg = false;
- abx500_chargalg_state_to(di, STATE_SUSPENDED);
+ ab8500_chargalg_state_to(di, STATE_SUSPENDED);
power_supply_changed(di->chargalg_psy);
fallthrough;
@@ -1445,29 +1445,29 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
break;
case STATE_BATT_REMOVED_INIT:
- abx500_chargalg_stop_charging(di);
- abx500_chargalg_state_to(di, STATE_BATT_REMOVED);
+ ab8500_chargalg_stop_charging(di);
+ ab8500_chargalg_state_to(di, STATE_BATT_REMOVED);
fallthrough;
case STATE_BATT_REMOVED:
if (!di->events.batt_rem)
- abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
+ ab8500_chargalg_state_to(di, STATE_NORMAL_INIT);
break;
case STATE_HW_TEMP_PROTECT_INIT:
- abx500_chargalg_stop_charging(di);
- abx500_chargalg_state_to(di, STATE_HW_TEMP_PROTECT);
+ ab8500_chargalg_stop_charging(di);
+ ab8500_chargalg_state_to(di, STATE_HW_TEMP_PROTECT);
fallthrough;
case STATE_HW_TEMP_PROTECT:
if (!di->events.main_thermal_prot &&
!di->events.usb_thermal_prot)
- abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
+ ab8500_chargalg_state_to(di, STATE_NORMAL_INIT);
break;
case STATE_OVV_PROTECT_INIT:
- abx500_chargalg_stop_charging(di);
- abx500_chargalg_state_to(di, STATE_OVV_PROTECT);
+ ab8500_chargalg_stop_charging(di);
+ ab8500_chargalg_state_to(di, STATE_OVV_PROTECT);
fallthrough;
case STATE_OVV_PROTECT:
@@ -1476,23 +1476,23 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
!di->events.batt_ovv &&
di->chg_info.usb_chg_ok &&
di->chg_info.ac_chg_ok)
- abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
+ ab8500_chargalg_state_to(di, STATE_NORMAL_INIT);
break;
case STATE_CHG_NOT_OK_INIT:
- abx500_chargalg_stop_charging(di);
- abx500_chargalg_state_to(di, STATE_CHG_NOT_OK);
+ ab8500_chargalg_stop_charging(di);
+ ab8500_chargalg_state_to(di, STATE_CHG_NOT_OK);
fallthrough;
case STATE_CHG_NOT_OK:
if (!di->events.mainextchnotok &&
!di->events.usbchargernotok)
- abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
+ ab8500_chargalg_state_to(di, STATE_NORMAL_INIT);
break;
case STATE_SAFETY_TIMER_EXPIRED_INIT:
- abx500_chargalg_stop_charging(di);
- abx500_chargalg_state_to(di, STATE_SAFETY_TIMER_EXPIRED);
+ ab8500_chargalg_stop_charging(di);
+ ab8500_chargalg_state_to(di, STATE_SAFETY_TIMER_EXPIRED);
fallthrough;
case STATE_SAFETY_TIMER_EXPIRED:
@@ -1501,20 +1501,20 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
case STATE_NORMAL_INIT:
if (di->curr_status.curr_step == CHARGALG_CURR_STEP_LOW)
- abx500_chargalg_stop_charging(di);
+ ab8500_chargalg_stop_charging(di);
else {
curr_step_lvl = di->bm->bat_type[
di->bm->batt_id].normal_cur_lvl
* di->curr_status.curr_step
/ CHARGALG_CURR_STEP_HIGH;
- abx500_chargalg_start_charging(di,
+ ab8500_chargalg_start_charging(di,
di->bm->bat_type[di->bm->batt_id]
.normal_vol_lvl, curr_step_lvl);
}
- abx500_chargalg_state_to(di, STATE_NORMAL);
- abx500_chargalg_start_safety_timer(di);
- abx500_chargalg_stop_maintenance_timer(di);
+ ab8500_chargalg_state_to(di, STATE_NORMAL);
+ ab8500_chargalg_start_safety_timer(di);
+ ab8500_chargalg_stop_maintenance_timer(di);
init_maxim_chg_curr(di);
di->charge_status = POWER_SUPPLY_STATUS_CHARGING;
di->eoc_cnt = 0;
@@ -1528,104 +1528,103 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
if (di->charge_status == POWER_SUPPLY_STATUS_FULL &&
di->maintenance_chg) {
if (di->bm->no_maintenance)
- abx500_chargalg_state_to(di,
+ ab8500_chargalg_state_to(di,
STATE_WAIT_FOR_RECHARGE_INIT);
else
- abx500_chargalg_state_to(di,
+ ab8500_chargalg_state_to(di,
STATE_MAINTENANCE_A_INIT);
}
break;
/* This state will be used when the maintenance state is disabled */
case STATE_WAIT_FOR_RECHARGE_INIT:
- abx500_chargalg_hold_charging(di);
- abx500_chargalg_state_to(di, STATE_WAIT_FOR_RECHARGE);
+ ab8500_chargalg_hold_charging(di);
+ ab8500_chargalg_state_to(di, STATE_WAIT_FOR_RECHARGE);
fallthrough;
case STATE_WAIT_FOR_RECHARGE:
if (di->batt_data.percent <=
- di->bm->bat_type[di->bm->batt_id].
- recharge_cap)
- abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
+ di->bm->bat_type[di->bm->batt_id].recharge_cap)
+ ab8500_chargalg_state_to(di, STATE_NORMAL_INIT);
break;
case STATE_MAINTENANCE_A_INIT:
- abx500_chargalg_stop_safety_timer(di);
- abx500_chargalg_start_maintenance_timer(di,
+ ab8500_chargalg_stop_safety_timer(di);
+ ab8500_chargalg_start_maintenance_timer(di,
di->bm->bat_type[
di->bm->batt_id].maint_a_chg_timer_h);
- abx500_chargalg_start_charging(di,
+ ab8500_chargalg_start_charging(di,
di->bm->bat_type[
di->bm->batt_id].maint_a_vol_lvl,
di->bm->bat_type[
di->bm->batt_id].maint_a_cur_lvl);
- abx500_chargalg_state_to(di, STATE_MAINTENANCE_A);
+ ab8500_chargalg_state_to(di, STATE_MAINTENANCE_A);
power_supply_changed(di->chargalg_psy);
fallthrough;
case STATE_MAINTENANCE_A:
if (di->events.maintenance_timer_expired) {
- abx500_chargalg_stop_maintenance_timer(di);
- abx500_chargalg_state_to(di, STATE_MAINTENANCE_B_INIT);
+ ab8500_chargalg_stop_maintenance_timer(di);
+ ab8500_chargalg_state_to(di, STATE_MAINTENANCE_B_INIT);
}
break;
case STATE_MAINTENANCE_B_INIT:
- abx500_chargalg_start_maintenance_timer(di,
+ ab8500_chargalg_start_maintenance_timer(di,
di->bm->bat_type[
di->bm->batt_id].maint_b_chg_timer_h);
- abx500_chargalg_start_charging(di,
+ ab8500_chargalg_start_charging(di,
di->bm->bat_type[
di->bm->batt_id].maint_b_vol_lvl,
di->bm->bat_type[
di->bm->batt_id].maint_b_cur_lvl);
- abx500_chargalg_state_to(di, STATE_MAINTENANCE_B);
+ ab8500_chargalg_state_to(di, STATE_MAINTENANCE_B);
power_supply_changed(di->chargalg_psy);
fallthrough;
case STATE_MAINTENANCE_B:
if (di->events.maintenance_timer_expired) {
- abx500_chargalg_stop_maintenance_timer(di);
- abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
+ ab8500_chargalg_stop_maintenance_timer(di);
+ ab8500_chargalg_state_to(di, STATE_NORMAL_INIT);
}
break;
case STATE_TEMP_LOWHIGH_INIT:
- abx500_chargalg_start_charging(di,
+ ab8500_chargalg_start_charging(di,
di->bm->bat_type[
di->bm->batt_id].low_high_vol_lvl,
di->bm->bat_type[
di->bm->batt_id].low_high_cur_lvl);
- abx500_chargalg_stop_maintenance_timer(di);
+ ab8500_chargalg_stop_maintenance_timer(di);
di->charge_status = POWER_SUPPLY_STATUS_CHARGING;
- abx500_chargalg_state_to(di, STATE_TEMP_LOWHIGH);
+ ab8500_chargalg_state_to(di, STATE_TEMP_LOWHIGH);
power_supply_changed(di->chargalg_psy);
fallthrough;
case STATE_TEMP_LOWHIGH:
if (!di->events.btemp_lowhigh)
- abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
+ ab8500_chargalg_state_to(di, STATE_NORMAL_INIT);
break;
case STATE_WD_EXPIRED_INIT:
- abx500_chargalg_stop_charging(di);
- abx500_chargalg_state_to(di, STATE_WD_EXPIRED);
+ ab8500_chargalg_stop_charging(di);
+ ab8500_chargalg_state_to(di, STATE_WD_EXPIRED);
fallthrough;
case STATE_WD_EXPIRED:
if (!di->events.ac_wd_expired &&
!di->events.usb_wd_expired)
- abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
+ ab8500_chargalg_state_to(di, STATE_NORMAL_INIT);
break;
case STATE_TEMP_UNDEROVER_INIT:
- abx500_chargalg_stop_charging(di);
- abx500_chargalg_state_to(di, STATE_TEMP_UNDEROVER);
+ ab8500_chargalg_stop_charging(di);
+ ab8500_chargalg_state_to(di, STATE_TEMP_UNDEROVER);
fallthrough;
case STATE_TEMP_UNDEROVER:
if (!di->events.btemp_underover)
- abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
+ ab8500_chargalg_state_to(di, STATE_NORMAL_INIT);
break;
}
@@ -1637,17 +1636,17 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
}
/**
- * abx500_chargalg_periodic_work() - Periodic work for the algorithm
+ * ab8500_chargalg_periodic_work() - Periodic work for the algorithm
* @work: pointer to the work_struct structure
*
* Work queue function for the charging algorithm
*/
-static void abx500_chargalg_periodic_work(struct work_struct *work)
+static void ab8500_chargalg_periodic_work(struct work_struct *work)
{
- struct abx500_chargalg *di = container_of(work,
- struct abx500_chargalg, chargalg_periodic_work.work);
+ struct ab8500_chargalg *di = container_of(work,
+ struct ab8500_chargalg, chargalg_periodic_work.work);
- abx500_chargalg_algorithm(di);
+ ab8500_chargalg_algorithm(di);
/*
* If a charger is connected then the battery has to be monitored
@@ -1664,20 +1663,18 @@ static void abx500_chargalg_periodic_work(struct work_struct *work)
}
/**
- * abx500_chargalg_wd_work() - periodic work to kick the charger watchdog
+ * ab8500_chargalg_wd_work() - periodic work to kick the charger watchdog
* @work: pointer to the work_struct structure
*
* Work queue function for kicking the charger watchdog
*/
-static void abx500_chargalg_wd_work(struct work_struct *work)
+static void ab8500_chargalg_wd_work(struct work_struct *work)
{
int ret;
- struct abx500_chargalg *di = container_of(work,
- struct abx500_chargalg, chargalg_wd_work.work);
-
- dev_dbg(di->dev, "abx500_chargalg_wd_work\n");
+ struct ab8500_chargalg *di = container_of(work,
+ struct ab8500_chargalg, chargalg_wd_work.work);
- ret = abx500_chargalg_kick_watchdog(di);
+ ret = ab8500_chargalg_kick_watchdog(di);
if (ret < 0)
dev_err(di->dev, "failed to kick watchdog\n");
@@ -1686,21 +1683,21 @@ static void abx500_chargalg_wd_work(struct work_struct *work)
}
/**
- * abx500_chargalg_work() - Work to run the charging algorithm instantly
+ * ab8500_chargalg_work() - Work to run the charging algorithm instantly
* @work: pointer to the work_struct structure
*
* Work queue function for calling the charging algorithm
*/
-static void abx500_chargalg_work(struct work_struct *work)
+static void ab8500_chargalg_work(struct work_struct *work)
{
- struct abx500_chargalg *di = container_of(work,
- struct abx500_chargalg, chargalg_work);
+ struct ab8500_chargalg *di = container_of(work,
+ struct ab8500_chargalg, chargalg_work);
- abx500_chargalg_algorithm(di);
+ ab8500_chargalg_algorithm(di);
}
/**
- * abx500_chargalg_get_property() - get the chargalg properties
+ * ab8500_chargalg_get_property() - get the chargalg properties
* @psy: pointer to the power_supply structure
* @psp: pointer to the power_supply_property structure
* @val: pointer to the power_supply_propval union
@@ -1711,11 +1708,11 @@ static void abx500_chargalg_work(struct work_struct *work)
* health: health of the battery
* Returns error code in case of failure else 0 on success
*/
-static int abx500_chargalg_get_property(struct power_supply *psy,
+static int ab8500_chargalg_get_property(struct power_supply *psy,
enum power_supply_property psp,
union power_supply_propval *val)
{
- struct abx500_chargalg *di = power_supply_get_drvdata(psy);
+ struct ab8500_chargalg *di = power_supply_get_drvdata(psy);
switch (psp) {
case POWER_SUPPLY_PROP_STATUS:
@@ -1744,16 +1741,16 @@ static int abx500_chargalg_get_property(struct power_supply *psy,
/* Exposure to the sysfs interface */
-static ssize_t abx500_chargalg_curr_step_show(struct abx500_chargalg *di,
+static ssize_t ab8500_chargalg_curr_step_show(struct ab8500_chargalg *di,
char *buf)
{
return sprintf(buf, "%d\n", di->curr_status.curr_step);
}
-static ssize_t abx500_chargalg_curr_step_store(struct abx500_chargalg *di,
+static ssize_t ab8500_chargalg_curr_step_store(struct ab8500_chargalg *di,
const char *buf, size_t length)
{
- long int param;
+ long param;
int ret;
ret = kstrtol(buf, 10, &param);
@@ -1775,7 +1772,7 @@ static ssize_t abx500_chargalg_curr_step_store(struct abx500_chargalg *di,
}
-static ssize_t abx500_chargalg_en_show(struct abx500_chargalg *di,
+static ssize_t ab8500_chargalg_en_show(struct ab8500_chargalg *di,
char *buf)
{
return sprintf(buf, "%d\n",
@@ -1783,10 +1780,10 @@ static ssize_t abx500_chargalg_en_show(struct abx500_chargalg *di,
di->susp_status.usb_suspended);
}
-static ssize_t abx500_chargalg_en_store(struct abx500_chargalg *di,
+static ssize_t ab8500_chargalg_en_store(struct ab8500_chargalg *di,
const char *buf, size_t length)
{
- long int param;
+ long param;
int ac_usb;
int ret;
@@ -1830,22 +1827,22 @@ static ssize_t abx500_chargalg_en_store(struct abx500_chargalg *di,
return strlen(buf);
}
-static struct abx500_chargalg_sysfs_entry abx500_chargalg_en_charger =
- __ATTR(chargalg, 0644, abx500_chargalg_en_show,
- abx500_chargalg_en_store);
+static struct ab8500_chargalg_sysfs_entry ab8500_chargalg_en_charger =
+ __ATTR(chargalg, 0644, ab8500_chargalg_en_show,
+ ab8500_chargalg_en_store);
-static struct abx500_chargalg_sysfs_entry abx500_chargalg_curr_step =
- __ATTR(chargalg_curr_step, 0644, abx500_chargalg_curr_step_show,
- abx500_chargalg_curr_step_store);
+static struct ab8500_chargalg_sysfs_entry ab8500_chargalg_curr_step =
+ __ATTR(chargalg_curr_step, 0644, ab8500_chargalg_curr_step_show,
+ ab8500_chargalg_curr_step_store);
-static ssize_t abx500_chargalg_sysfs_show(struct kobject *kobj,
+static ssize_t ab8500_chargalg_sysfs_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
- struct abx500_chargalg_sysfs_entry *entry = container_of(attr,
- struct abx500_chargalg_sysfs_entry, attr);
+ struct ab8500_chargalg_sysfs_entry *entry = container_of(attr,
+ struct ab8500_chargalg_sysfs_entry, attr);
- struct abx500_chargalg *di = container_of(kobj,
- struct abx500_chargalg, chargalg_kobject);
+ struct ab8500_chargalg *di = container_of(kobj,
+ struct ab8500_chargalg, chargalg_kobject);
if (!entry->show)
return -EIO;
@@ -1853,14 +1850,14 @@ static ssize_t abx500_chargalg_sysfs_show(struct kobject *kobj,
return entry->show(di, buf);
}
-static ssize_t abx500_chargalg_sysfs_charger(struct kobject *kobj,
+static ssize_t ab8500_chargalg_sysfs_charger(struct kobject *kobj,
struct attribute *attr, const char *buf, size_t length)
{
- struct abx500_chargalg_sysfs_entry *entry = container_of(attr,
- struct abx500_chargalg_sysfs_entry, attr);
+ struct ab8500_chargalg_sysfs_entry *entry = container_of(attr,
+ struct ab8500_chargalg_sysfs_entry, attr);
- struct abx500_chargalg *di = container_of(kobj,
- struct abx500_chargalg, chargalg_kobject);
+ struct ab8500_chargalg *di = container_of(kobj,
+ struct ab8500_chargalg, chargalg_kobject);
if (!entry->store)
return -EIO;
@@ -1868,47 +1865,47 @@ static ssize_t abx500_chargalg_sysfs_charger(struct kobject *kobj,
return entry->store(di, buf, length);
}
-static struct attribute *abx500_chargalg_chg[] = {
- &abx500_chargalg_en_charger.attr,
- &abx500_chargalg_curr_step.attr,
+static struct attribute *ab8500_chargalg_chg[] = {
+ &ab8500_chargalg_en_charger.attr,
+ &ab8500_chargalg_curr_step.attr,
NULL,
};
-static const struct sysfs_ops abx500_chargalg_sysfs_ops = {
- .show = abx500_chargalg_sysfs_show,
- .store = abx500_chargalg_sysfs_charger,
+static const struct sysfs_ops ab8500_chargalg_sysfs_ops = {
+ .show = ab8500_chargalg_sysfs_show,
+ .store = ab8500_chargalg_sysfs_charger,
};
-static struct kobj_type abx500_chargalg_ktype = {
- .sysfs_ops = &abx500_chargalg_sysfs_ops,
- .default_attrs = abx500_chargalg_chg,
+static struct kobj_type ab8500_chargalg_ktype = {
+ .sysfs_ops = &ab8500_chargalg_sysfs_ops,
+ .default_attrs = ab8500_chargalg_chg,
};
/**
- * abx500_chargalg_sysfs_exit() - de-init of sysfs entry
- * @di: pointer to the struct abx500_chargalg
+ * ab8500_chargalg_sysfs_exit() - de-init of sysfs entry
+ * @di: pointer to the struct ab8500_chargalg
*
* This function removes the entry in sysfs.
*/
-static void abx500_chargalg_sysfs_exit(struct abx500_chargalg *di)
+static void ab8500_chargalg_sysfs_exit(struct ab8500_chargalg *di)
{
kobject_del(&di->chargalg_kobject);
}
/**
- * abx500_chargalg_sysfs_init() - init of sysfs entry
- * @di: pointer to the struct abx500_chargalg
+ * ab8500_chargalg_sysfs_init() - init of sysfs entry
+ * @di: pointer to the struct ab8500_chargalg
*
* This function adds an entry in sysfs.
* Returns error code in case of failure else 0(on success)
*/
-static int abx500_chargalg_sysfs_init(struct abx500_chargalg *di)
+static int ab8500_chargalg_sysfs_init(struct ab8500_chargalg *di)
{
int ret = 0;
ret = kobject_init_and_add(&di->chargalg_kobject,
- &abx500_chargalg_ktype,
- NULL, "abx500_chargalg");
+ &ab8500_chargalg_ktype,
+ NULL, "ab8500_chargalg");
if (ret < 0)
dev_err(di->dev, "failed to create sysfs entry\n");
@@ -1916,9 +1913,9 @@ static int abx500_chargalg_sysfs_init(struct abx500_chargalg *di)
}
/* Exposure to the sysfs interface <<END>> */
-static int __maybe_unused abx500_chargalg_resume(struct device *dev)
+static int __maybe_unused ab8500_chargalg_resume(struct device *dev)
{
- struct abx500_chargalg *di = dev_get_drvdata(dev);
+ struct ab8500_chargalg *di = dev_get_drvdata(dev);
/* Kick charger watchdog if charging (any charger online) */
if (di->chg_info.online_chg)
@@ -1933,9 +1930,9 @@ static int __maybe_unused abx500_chargalg_resume(struct device *dev)
return 0;
}
-static int __maybe_unused abx500_chargalg_suspend(struct device *dev)
+static int __maybe_unused ab8500_chargalg_suspend(struct device *dev)
{
- struct abx500_chargalg *di = dev_get_drvdata(dev);
+ struct ab8500_chargalg *di = dev_get_drvdata(dev);
if (di->chg_info.online_chg)
cancel_delayed_work_sync(&di->chargalg_wd_work);
@@ -1949,22 +1946,22 @@ static char *supply_interface[] = {
"ab8500_fg",
};
-static const struct power_supply_desc abx500_chargalg_desc = {
- .name = "abx500_chargalg",
+static const struct power_supply_desc ab8500_chargalg_desc = {
+ .name = "ab8500_chargalg",
.type = POWER_SUPPLY_TYPE_BATTERY,
- .properties = abx500_chargalg_props,
- .num_properties = ARRAY_SIZE(abx500_chargalg_props),
- .get_property = abx500_chargalg_get_property,
- .external_power_changed = abx500_chargalg_external_power_changed,
+ .properties = ab8500_chargalg_props,
+ .num_properties = ARRAY_SIZE(ab8500_chargalg_props),
+ .get_property = ab8500_chargalg_get_property,
+ .external_power_changed = ab8500_chargalg_external_power_changed,
};
-static int abx500_chargalg_bind(struct device *dev, struct device *master,
+static int ab8500_chargalg_bind(struct device *dev, struct device *master,
void *data)
{
- struct abx500_chargalg *di = dev_get_drvdata(dev);
+ struct ab8500_chargalg *di = dev_get_drvdata(dev);
/* Create a work queue for the chargalg */
- di->chargalg_wq = alloc_ordered_workqueue("abx500_chargalg_wq",
+ di->chargalg_wq = alloc_ordered_workqueue("ab8500_chargalg_wq",
WQ_MEM_RECLAIM);
if (di->chargalg_wq == NULL) {
dev_err(di->dev, "failed to create work queue\n");
@@ -1977,10 +1974,10 @@ static int abx500_chargalg_bind(struct device *dev, struct device *master,
return 0;
}
-static void abx500_chargalg_unbind(struct device *dev, struct device *master,
+static void ab8500_chargalg_unbind(struct device *dev, struct device *master,
void *data)
{
- struct abx500_chargalg *di = dev_get_drvdata(dev);
+ struct ab8500_chargalg *di = dev_get_drvdata(dev);
/* Stop all timers and work */
hrtimer_cancel(&di->safety_timer);
@@ -1995,16 +1992,16 @@ static void abx500_chargalg_unbind(struct device *dev, struct device *master,
flush_scheduled_work();
}
-static const struct component_ops abx500_chargalg_component_ops = {
- .bind = abx500_chargalg_bind,
- .unbind = abx500_chargalg_unbind,
+static const struct component_ops ab8500_chargalg_component_ops = {
+ .bind = ab8500_chargalg_bind,
+ .unbind = ab8500_chargalg_unbind,
};
-static int abx500_chargalg_probe(struct platform_device *pdev)
+static int ab8500_chargalg_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct power_supply_config psy_cfg = {};
- struct abx500_chargalg *di;
+ struct ab8500_chargalg *di;
int ret = 0;
di = devm_kzalloc(dev, sizeof(*di), GFP_KERNEL);
@@ -2023,28 +2020,28 @@ static int abx500_chargalg_probe(struct platform_device *pdev)
/* Initilialize safety timer */
hrtimer_init(&di->safety_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
- di->safety_timer.function = abx500_chargalg_safety_timer_expired;
+ di->safety_timer.function = ab8500_chargalg_safety_timer_expired;
/* Initilialize maintenance timer */
hrtimer_init(&di->maintenance_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
di->maintenance_timer.function =
- abx500_chargalg_maintenance_timer_expired;
+ ab8500_chargalg_maintenance_timer_expired;
/* Init work for chargalg */
INIT_DEFERRABLE_WORK(&di->chargalg_periodic_work,
- abx500_chargalg_periodic_work);
+ ab8500_chargalg_periodic_work);
INIT_DEFERRABLE_WORK(&di->chargalg_wd_work,
- abx500_chargalg_wd_work);
+ ab8500_chargalg_wd_work);
/* Init work for chargalg */
- INIT_WORK(&di->chargalg_work, abx500_chargalg_work);
+ INIT_WORK(&di->chargalg_work, ab8500_chargalg_work);
/* To detect charger at startup */
di->chg_info.prev_conn_chg = -1;
/* Register chargalg power supply class */
di->chargalg_psy = devm_power_supply_register(di->dev,
- &abx500_chargalg_desc,
+ &ab8500_chargalg_desc,
&psy_cfg);
if (IS_ERR(di->chargalg_psy)) {
dev_err(di->dev, "failed to register chargalg psy\n");
@@ -2054,7 +2051,7 @@ static int abx500_chargalg_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, di);
/* sysfs interface to enable/disable charging from user space */
- ret = abx500_chargalg_sysfs_init(di);
+ ret = ab8500_chargalg_sysfs_init(di);
if (ret) {
dev_err(di->dev, "failed to create sysfs entry\n");
return ret;
@@ -2062,38 +2059,38 @@ static int abx500_chargalg_probe(struct platform_device *pdev)
di->curr_status.curr_step = CHARGALG_CURR_STEP_HIGH;
dev_info(di->dev, "probe success\n");
- return component_add(dev, &abx500_chargalg_component_ops);
+ return component_add(dev, &ab8500_chargalg_component_ops);
}
-static int abx500_chargalg_remove(struct platform_device *pdev)
+static int ab8500_chargalg_remove(struct platform_device *pdev)
{
- struct abx500_chargalg *di = platform_get_drvdata(pdev);
+ struct ab8500_chargalg *di = platform_get_drvdata(pdev);
- component_del(&pdev->dev, &abx500_chargalg_component_ops);
+ component_del(&pdev->dev, &ab8500_chargalg_component_ops);
/* sysfs interface to enable/disable charging from user space */
- abx500_chargalg_sysfs_exit(di);
+ ab8500_chargalg_sysfs_exit(di);
return 0;
}
-static SIMPLE_DEV_PM_OPS(abx500_chargalg_pm_ops, abx500_chargalg_suspend, abx500_chargalg_resume);
+static SIMPLE_DEV_PM_OPS(ab8500_chargalg_pm_ops, ab8500_chargalg_suspend, ab8500_chargalg_resume);
static const struct of_device_id ab8500_chargalg_match[] = {
{ .compatible = "stericsson,ab8500-chargalg", },
{ },
};
-struct platform_driver abx500_chargalg_driver = {
- .probe = abx500_chargalg_probe,
- .remove = abx500_chargalg_remove,
+struct platform_driver ab8500_chargalg_driver = {
+ .probe = ab8500_chargalg_probe,
+ .remove = ab8500_chargalg_remove,
.driver = {
- .name = "ab8500-chargalg",
+ .name = "ab8500_chargalg",
.of_match_table = ab8500_chargalg_match,
- .pm = &abx500_chargalg_pm_ops,
+ .pm = &ab8500_chargalg_pm_ops,
},
};
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Johan Palsson, Karl Komierowski");
-MODULE_ALIAS("platform:abx500-chargalg");
-MODULE_DESCRIPTION("abx500 battery charging algorithm");
+MODULE_ALIAS("platform:ab8500-chargalg");
+MODULE_DESCRIPTION("ab8500 battery charging algorithm");
diff --git a/drivers/power/supply/ab8500_charger.c b/drivers/power/supply/ab8500_charger.c
index fa49e12e5a60..15eadaf46f14 100644
--- a/drivers/power/supply/ab8500_charger.c
+++ b/drivers/power/supply/ab8500_charger.c
@@ -292,7 +292,7 @@ struct ab8500_charger {
struct iio_channel *adc_main_charger_c;
struct iio_channel *adc_vbus_v;
struct iio_channel *adc_usb_charger_c;
- struct abx500_bm_data *bm;
+ struct ab8500_bm_data *bm;
struct ab8500_charger_event_flags flags;
struct ab8500_charger_usb_state usb_state;
struct ab8500_charger_max_usb_in_curr max_usb_in_curr;
@@ -3388,7 +3388,7 @@ static const struct component_master_ops ab8500_charger_comp_ops = {
static struct platform_driver *const ab8500_charger_component_drivers[] = {
&ab8500_fg_driver,
&ab8500_btemp_driver,
- &abx500_chargalg_driver,
+ &ab8500_chargalg_driver,
};
static int ab8500_charger_compare_dev(struct device *dev, void *data)
diff --git a/drivers/power/supply/ab8500_fg.c b/drivers/power/supply/ab8500_fg.c
index a6ebdb269fdd..05fe9724ba50 100644
--- a/drivers/power/supply/ab8500_fg.c
+++ b/drivers/power/supply/ab8500_fg.c
@@ -34,6 +34,7 @@
#include <linux/mfd/abx500/ab8500.h>
#include <linux/iio/consumer.h>
#include <linux/kernel.h>
+#include <linux/fixp-arith.h>
#include "ab8500-bm.h"
@@ -56,9 +57,6 @@
/* FG constants */
#define BATT_OVV 0x01
-#define interpolate(x, x1, y1, x2, y2) \
- ((y1) + ((((y2) - (y1)) * ((x) - (x1))) / ((x2) - (x1))));
-
/**
* struct ab8500_fg_interrupts - ab8500 fg interrupts
* @name: name of the interrupt
@@ -227,7 +225,7 @@ struct ab8500_fg {
struct ab8500_fg_avg_cap avg_cap;
struct ab8500 *parent;
struct iio_channel *main_bat_v;
- struct abx500_bm_data *bm;
+ struct ab8500_bm_data *bm;
struct power_supply *fg_psy;
struct workqueue_struct *fg_wq;
struct delayed_work fg_periodic_work;
@@ -856,7 +854,7 @@ static int ab8500_fg_bat_voltage(struct ab8500_fg *di)
static int ab8500_fg_volt_to_capacity(struct ab8500_fg *di, int voltage)
{
int i, tbl_size;
- const struct abx500_v_to_cap *tbl;
+ const struct ab8500_v_to_cap *tbl;
int cap = 0;
tbl = di->bm->bat_type[di->bm->batt_id].v_to_cap_tbl;
@@ -868,11 +866,12 @@ static int ab8500_fg_volt_to_capacity(struct ab8500_fg *di, int voltage)
}
if ((i > 0) && (i < tbl_size)) {
- cap = interpolate(voltage,
+ cap = fixp_linear_interpolate(
tbl[i].voltage,
tbl[i].capacity * 10,
tbl[i-1].voltage,
- tbl[i-1].capacity * 10);
+ tbl[i-1].capacity * 10,
+ voltage);
} else if (i == 0) {
cap = 1000;
} else {
@@ -920,11 +919,12 @@ static int ab8500_fg_battery_resistance(struct ab8500_fg *di)
}
if ((i > 0) && (i < tbl_size)) {
- resist = interpolate(di->bat_temp / 10,
+ resist = fixp_linear_interpolate(
tbl[i].temp,
tbl[i].resist,
tbl[i-1].temp,
- tbl[i-1].resist);
+ tbl[i-1].resist,
+ di->bat_temp / 10);
} else if (i == 0) {
resist = tbl[0].resist;
} else {
@@ -2235,7 +2235,7 @@ static int ab8500_fg_get_ext_psy_data(struct device *dev, void *data)
case POWER_SUPPLY_TYPE_BATTERY:
if (!di->flags.batt_id_received &&
di->bm->batt_id != BATTERY_UNKNOWN) {
- const struct abx500_battery_type *b;
+ const struct ab8500_battery_type *b;
b = &(di->bm->bat_type[di->bm->batt_id]);
diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c
index a4df1ea92386..b9553be9bed5 100644
--- a/drivers/power/supply/axp288_charger.c
+++ b/drivers/power/supply/axp288_charger.c
@@ -813,7 +813,7 @@ static int axp288_charger_probe(struct platform_device *pdev)
if (val == 0)
return -ENODEV;
- info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
+ info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
@@ -823,7 +823,7 @@ static int axp288_charger_probe(struct platform_device *pdev)
info->cable.edev = extcon_get_extcon_dev(AXP288_EXTCON_DEV_NAME);
if (info->cable.edev == NULL) {
- dev_dbg(&pdev->dev, "%s is not ready, probe deferred\n",
+ dev_dbg(dev, "%s is not ready, probe deferred\n",
AXP288_EXTCON_DEV_NAME);
return -EPROBE_DEFER;
}
@@ -834,8 +834,7 @@ static int axp288_charger_probe(struct platform_device *pdev)
dev_dbg(dev, "EXTCON_USB_HOST is not ready, probe deferred\n");
return -EPROBE_DEFER;
}
- dev_info(&pdev->dev,
- "Using " USB_HOST_EXTCON_HID " extcon for usb-id\n");
+ dev_info(dev, "Using " USB_HOST_EXTCON_HID " extcon for usb-id\n");
}
platform_set_drvdata(pdev, info);
@@ -874,7 +873,7 @@ static int axp288_charger_probe(struct platform_device *pdev)
INIT_WORK(&info->otg.work, axp288_charger_otg_evt_worker);
info->otg.id_nb.notifier_call = axp288_charger_handle_otg_evt;
if (info->otg.cable) {
- ret = devm_extcon_register_notifier(&pdev->dev, info->otg.cable,
+ ret = devm_extcon_register_notifier(dev, info->otg.cable,
EXTCON_USB_HOST, &info->otg.id_nb);
if (ret) {
dev_err(dev, "failed to register EXTCON_USB_HOST notifier\n");
@@ -899,7 +898,7 @@ static int axp288_charger_probe(struct platform_device *pdev)
NULL, axp288_charger_irq_thread_handler,
IRQF_ONESHOT, info->pdev->name, info);
if (ret) {
- dev_err(&pdev->dev, "failed to request interrupt=%d\n",
+ dev_err(dev, "failed to request interrupt=%d\n",
info->irq[i]);
return ret;
}
diff --git a/drivers/power/supply/axp288_fuel_gauge.c b/drivers/power/supply/axp288_fuel_gauge.c
index 2ba2d8d6b8e6..c1da217fdb0e 100644
--- a/drivers/power/supply/axp288_fuel_gauge.c
+++ b/drivers/power/supply/axp288_fuel_gauge.c
@@ -2,7 +2,8 @@
/*
* axp288_fuel_gauge.c - Xpower AXP288 PMIC Fuel Gauge Driver
*
- * Copyright (C) 2016-2017 Hans de Goede <hdegoede@redhat.com>
+ * Copyright (C) 2020-2021 Andrejus Basovas <xxx@yyy.tld>
+ * Copyright (C) 2016-2021 Hans de Goede <hdegoede@redhat.com>
* Copyright (C) 2014 Intel Corporation
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -19,38 +20,37 @@
#include <linux/platform_device.h>
#include <linux/power_supply.h>
#include <linux/iio/consumer.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
#include <asm/unaligned.h>
+#include <asm/iosf_mbi.h>
-#define PS_STAT_VBUS_TRIGGER (1 << 0)
-#define PS_STAT_BAT_CHRG_DIR (1 << 2)
-#define PS_STAT_VBAT_ABOVE_VHOLD (1 << 3)
-#define PS_STAT_VBUS_VALID (1 << 4)
-#define PS_STAT_VBUS_PRESENT (1 << 5)
+#define PS_STAT_VBUS_TRIGGER (1 << 0)
+#define PS_STAT_BAT_CHRG_DIR (1 << 2)
+#define PS_STAT_VBAT_ABOVE_VHOLD (1 << 3)
+#define PS_STAT_VBUS_VALID (1 << 4)
+#define PS_STAT_VBUS_PRESENT (1 << 5)
-#define CHRG_STAT_BAT_SAFE_MODE (1 << 3)
+#define CHRG_STAT_BAT_SAFE_MODE (1 << 3)
#define CHRG_STAT_BAT_VALID (1 << 4)
-#define CHRG_STAT_BAT_PRESENT (1 << 5)
+#define CHRG_STAT_BAT_PRESENT (1 << 5)
#define CHRG_STAT_CHARGING (1 << 6)
#define CHRG_STAT_PMIC_OTP (1 << 7)
#define CHRG_CCCV_CC_MASK 0xf /* 4 bits */
-#define CHRG_CCCV_CC_BIT_POS 0
+#define CHRG_CCCV_CC_BIT_POS 0
#define CHRG_CCCV_CC_OFFSET 200 /* 200mA */
-#define CHRG_CCCV_CC_LSB_RES 200 /* 200mA */
+#define CHRG_CCCV_CC_LSB_RES 200 /* 200mA */
#define CHRG_CCCV_ITERM_20P (1 << 4) /* 20% of CC */
#define CHRG_CCCV_CV_MASK 0x60 /* 2 bits */
-#define CHRG_CCCV_CV_BIT_POS 5
+#define CHRG_CCCV_CV_BIT_POS 5
#define CHRG_CCCV_CV_4100MV 0x0 /* 4.10V */
#define CHRG_CCCV_CV_4150MV 0x1 /* 4.15V */
#define CHRG_CCCV_CV_4200MV 0x2 /* 4.20V */
#define CHRG_CCCV_CV_4350MV 0x3 /* 4.35V */
#define CHRG_CCCV_CHG_EN (1 << 7)
-#define FG_CNTL_OCV_ADJ_STAT (1 << 2)
+#define FG_CNTL_OCV_ADJ_STAT (1 << 2)
#define FG_CNTL_OCV_ADJ_EN (1 << 3)
-#define FG_CNTL_CAP_ADJ_STAT (1 << 4)
+#define FG_CNTL_CAP_ADJ_STAT (1 << 4)
#define FG_CNTL_CAP_ADJ_EN (1 << 5)
#define FG_CNTL_CC_EN (1 << 6)
#define FG_CNTL_GAUGE_EN (1 << 7)
@@ -71,23 +71,23 @@
#define FG_CC_CAP_VALID (1 << 7)
#define FG_CC_CAP_VAL_MASK 0x7F
-#define FG_LOW_CAP_THR1_MASK 0xf0 /* 5% tp 20% */
+#define FG_LOW_CAP_THR1_MASK 0xf0 /* 5% tp 20% */
#define FG_LOW_CAP_THR1_VAL 0xa0 /* 15 perc */
-#define FG_LOW_CAP_THR2_MASK 0x0f /* 0% to 15% */
+#define FG_LOW_CAP_THR2_MASK 0x0f /* 0% to 15% */
#define FG_LOW_CAP_WARN_THR 14 /* 14 perc */
#define FG_LOW_CAP_CRIT_THR 4 /* 4 perc */
#define FG_LOW_CAP_SHDN_THR 0 /* 0 perc */
-#define NR_RETRY_CNT 3
-#define DEV_NAME "axp288_fuel_gauge"
+#define DEV_NAME "axp288_fuel_gauge"
/* 1.1mV per LSB expressed in uV */
#define VOLTAGE_FROM_ADC(a) ((a * 11) / 10)
/* properties converted to uV, uA */
-#define PROP_VOLT(a) ((a) * 1000)
-#define PROP_CURR(a) ((a) * 1000)
+#define PROP_VOLT(a) ((a) * 1000)
+#define PROP_CURR(a) ((a) * 1000)
-#define AXP288_FG_INTR_NUM 6
+#define AXP288_REG_UPDATE_INTERVAL (60 * HZ)
+#define AXP288_FG_INTR_NUM 6
enum {
QWBTU_IRQ = 0,
WBTU_IRQ,
@@ -98,9 +98,6 @@ enum {
};
enum {
- BAT_TEMP = 0,
- PMIC_TEMP,
- SYSTEM_TEMP,
BAT_CHRG_CURR,
BAT_D_CURR,
BAT_VOLT,
@@ -108,7 +105,7 @@ enum {
};
struct axp288_fg_info {
- struct platform_device *pdev;
+ struct device *dev;
struct regmap *regmap;
struct regmap_irq_chip_data *regmap_irqc;
int irq[AXP288_FG_INTR_NUM];
@@ -117,7 +114,21 @@ struct axp288_fg_info {
struct mutex lock;
int status;
int max_volt;
+ int pwr_op;
+ int low_cap;
struct dentry *debug_file;
+
+ char valid; /* zero until following fields are valid */
+ unsigned long last_updated; /* in jiffies */
+
+ int pwr_stat;
+ int fg_res;
+ int bat_volt;
+ int d_curr;
+ int c_curr;
+ int ocv;
+ int fg_cc_mtr1;
+ int fg_des_cap1;
};
static enum power_supply_property fuel_gauge_props[] = {
@@ -137,17 +148,12 @@ static enum power_supply_property fuel_gauge_props[] = {
static int fuel_gauge_reg_readb(struct axp288_fg_info *info, int reg)
{
- int ret, i;
unsigned int val;
+ int ret;
- for (i = 0; i < NR_RETRY_CNT; i++) {
- ret = regmap_read(info->regmap, reg, &val);
- if (ret != -EBUSY)
- break;
- }
-
+ ret = regmap_read(info->regmap, reg, &val);
if (ret < 0) {
- dev_err(&info->pdev->dev, "axp288 reg read err:%d\n", ret);
+ dev_err(info->dev, "Error reading reg 0x%02x err: %d\n", reg, ret);
return ret;
}
@@ -161,7 +167,7 @@ static int fuel_gauge_reg_writeb(struct axp288_fg_info *info, int reg, u8 val)
ret = regmap_write(info->regmap, reg, (unsigned int)val);
if (ret < 0)
- dev_err(&info->pdev->dev, "axp288 reg write err:%d\n", ret);
+ dev_err(info->dev, "Error writing reg 0x%02x err: %d\n", reg, ret);
return ret;
}
@@ -173,15 +179,13 @@ static int fuel_gauge_read_15bit_word(struct axp288_fg_info *info, int reg)
ret = regmap_bulk_read(info->regmap, reg, buf, 2);
if (ret < 0) {
- dev_err(&info->pdev->dev, "Error reading reg 0x%02x err: %d\n",
- reg, ret);
+ dev_err(info->dev, "Error reading reg 0x%02x err: %d\n", reg, ret);
return ret;
}
ret = get_unaligned_be16(buf);
if (!(ret & FG_15BIT_WORD_VALID)) {
- dev_err(&info->pdev->dev, "Error reg 0x%02x contents not valid\n",
- reg);
+ dev_err(info->dev, "Error reg 0x%02x contents not valid\n", reg);
return -ENXIO;
}
@@ -195,8 +199,7 @@ static int fuel_gauge_read_12bit_word(struct axp288_fg_info *info, int reg)
ret = regmap_bulk_read(info->regmap, reg, buf, 2);
if (ret < 0) {
- dev_err(&info->pdev->dev, "Error reading reg 0x%02x err: %d\n",
- reg, ret);
+ dev_err(info->dev, "Error reading reg 0x%02x err: %d\n", reg, ret);
return ret;
}
@@ -204,139 +207,78 @@ static int fuel_gauge_read_12bit_word(struct axp288_fg_info *info, int reg)
return (buf[0] << 4) | ((buf[1] >> 4) & 0x0f);
}
-#ifdef CONFIG_DEBUG_FS
-static int fuel_gauge_debug_show(struct seq_file *s, void *data)
+static int fuel_gauge_update_registers(struct axp288_fg_info *info)
{
- struct axp288_fg_info *info = s->private;
- int raw_val, ret;
-
- seq_printf(s, " PWR_STATUS[%02x] : %02x\n",
- AXP20X_PWR_INPUT_STATUS,
- fuel_gauge_reg_readb(info, AXP20X_PWR_INPUT_STATUS));
- seq_printf(s, "PWR_OP_MODE[%02x] : %02x\n",
- AXP20X_PWR_OP_MODE,
- fuel_gauge_reg_readb(info, AXP20X_PWR_OP_MODE));
- seq_printf(s, " CHRG_CTRL1[%02x] : %02x\n",
- AXP20X_CHRG_CTRL1,
- fuel_gauge_reg_readb(info, AXP20X_CHRG_CTRL1));
- seq_printf(s, " VLTF[%02x] : %02x\n",
- AXP20X_V_LTF_DISCHRG,
- fuel_gauge_reg_readb(info, AXP20X_V_LTF_DISCHRG));
- seq_printf(s, " VHTF[%02x] : %02x\n",
- AXP20X_V_HTF_DISCHRG,
- fuel_gauge_reg_readb(info, AXP20X_V_HTF_DISCHRG));
- seq_printf(s, " CC_CTRL[%02x] : %02x\n",
- AXP20X_CC_CTRL,
- fuel_gauge_reg_readb(info, AXP20X_CC_CTRL));
- seq_printf(s, "BATTERY CAP[%02x] : %02x\n",
- AXP20X_FG_RES,
- fuel_gauge_reg_readb(info, AXP20X_FG_RES));
- seq_printf(s, " FG_RDC1[%02x] : %02x\n",
- AXP288_FG_RDC1_REG,
- fuel_gauge_reg_readb(info, AXP288_FG_RDC1_REG));
- seq_printf(s, " FG_RDC0[%02x] : %02x\n",
- AXP288_FG_RDC0_REG,
- fuel_gauge_reg_readb(info, AXP288_FG_RDC0_REG));
- seq_printf(s, " FG_OCV[%02x] : %04x\n",
- AXP288_FG_OCVH_REG,
- fuel_gauge_read_12bit_word(info, AXP288_FG_OCVH_REG));
- seq_printf(s, " FG_DES_CAP[%02x] : %04x\n",
- AXP288_FG_DES_CAP1_REG,
- fuel_gauge_read_15bit_word(info, AXP288_FG_DES_CAP1_REG));
- seq_printf(s, " FG_CC_MTR[%02x] : %04x\n",
- AXP288_FG_CC_MTR1_REG,
- fuel_gauge_read_15bit_word(info, AXP288_FG_CC_MTR1_REG));
- seq_printf(s, " FG_OCV_CAP[%02x] : %02x\n",
- AXP288_FG_OCV_CAP_REG,
- fuel_gauge_reg_readb(info, AXP288_FG_OCV_CAP_REG));
- seq_printf(s, " FG_CC_CAP[%02x] : %02x\n",
- AXP288_FG_CC_CAP_REG,
- fuel_gauge_reg_readb(info, AXP288_FG_CC_CAP_REG));
- seq_printf(s, " FG_LOW_CAP[%02x] : %02x\n",
- AXP288_FG_LOW_CAP_REG,
- fuel_gauge_reg_readb(info, AXP288_FG_LOW_CAP_REG));
- seq_printf(s, "TUNING_CTL0[%02x] : %02x\n",
- AXP288_FG_TUNE0,
- fuel_gauge_reg_readb(info, AXP288_FG_TUNE0));
- seq_printf(s, "TUNING_CTL1[%02x] : %02x\n",
- AXP288_FG_TUNE1,
- fuel_gauge_reg_readb(info, AXP288_FG_TUNE1));
- seq_printf(s, "TUNING_CTL2[%02x] : %02x\n",
- AXP288_FG_TUNE2,
- fuel_gauge_reg_readb(info, AXP288_FG_TUNE2));
- seq_printf(s, "TUNING_CTL3[%02x] : %02x\n",
- AXP288_FG_TUNE3,
- fuel_gauge_reg_readb(info, AXP288_FG_TUNE3));
- seq_printf(s, "TUNING_CTL4[%02x] : %02x\n",
- AXP288_FG_TUNE4,
- fuel_gauge_reg_readb(info, AXP288_FG_TUNE4));
- seq_printf(s, "TUNING_CTL5[%02x] : %02x\n",
- AXP288_FG_TUNE5,
- fuel_gauge_reg_readb(info, AXP288_FG_TUNE5));
-
- ret = iio_read_channel_raw(info->iio_channel[BAT_TEMP], &raw_val);
- if (ret >= 0)
- seq_printf(s, "axp288-batttemp : %d\n", raw_val);
- ret = iio_read_channel_raw(info->iio_channel[PMIC_TEMP], &raw_val);
- if (ret >= 0)
- seq_printf(s, "axp288-pmictemp : %d\n", raw_val);
- ret = iio_read_channel_raw(info->iio_channel[SYSTEM_TEMP], &raw_val);
- if (ret >= 0)
- seq_printf(s, "axp288-systtemp : %d\n", raw_val);
- ret = iio_read_channel_raw(info->iio_channel[BAT_CHRG_CURR], &raw_val);
- if (ret >= 0)
- seq_printf(s, "axp288-chrgcurr : %d\n", raw_val);
- ret = iio_read_channel_raw(info->iio_channel[BAT_D_CURR], &raw_val);
- if (ret >= 0)
- seq_printf(s, "axp288-dchrgcur : %d\n", raw_val);
- ret = iio_read_channel_raw(info->iio_channel[BAT_VOLT], &raw_val);
- if (ret >= 0)
- seq_printf(s, "axp288-battvolt : %d\n", raw_val);
+ int ret;
- return 0;
-}
+ if (info->valid && time_before(jiffies, info->last_updated + AXP288_REG_UPDATE_INTERVAL))
+ return 0;
-DEFINE_SHOW_ATTRIBUTE(fuel_gauge_debug);
+ dev_dbg(info->dev, "Fuel Gauge updating register values...\n");
-static void fuel_gauge_create_debugfs(struct axp288_fg_info *info)
-{
- info->debug_file = debugfs_create_file("fuelgauge", 0666, NULL,
- info, &fuel_gauge_debug_fops);
-}
+ ret = iosf_mbi_block_punit_i2c_access();
+ if (ret < 0)
+ return ret;
-static void fuel_gauge_remove_debugfs(struct axp288_fg_info *info)
-{
- debugfs_remove(info->debug_file);
-}
-#else
-static inline void fuel_gauge_create_debugfs(struct axp288_fg_info *info)
-{
-}
-static inline void fuel_gauge_remove_debugfs(struct axp288_fg_info *info)
-{
+ ret = fuel_gauge_reg_readb(info, AXP20X_PWR_INPUT_STATUS);
+ if (ret < 0)
+ goto out;
+ info->pwr_stat = ret;
+
+ ret = fuel_gauge_reg_readb(info, AXP20X_FG_RES);
+ if (ret < 0)
+ goto out;
+ info->fg_res = ret;
+
+ ret = iio_read_channel_raw(info->iio_channel[BAT_VOLT], &info->bat_volt);
+ if (ret < 0)
+ goto out;
+
+ if (info->pwr_stat & PS_STAT_BAT_CHRG_DIR) {
+ info->d_curr = 0;
+ ret = iio_read_channel_raw(info->iio_channel[BAT_CHRG_CURR], &info->c_curr);
+ if (ret < 0)
+ goto out;
+ } else {
+ info->c_curr = 0;
+ ret = iio_read_channel_raw(info->iio_channel[BAT_D_CURR], &info->d_curr);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = fuel_gauge_read_12bit_word(info, AXP288_FG_OCVH_REG);
+ if (ret < 0)
+ goto out;
+ info->ocv = ret;
+
+ ret = fuel_gauge_read_15bit_word(info, AXP288_FG_CC_MTR1_REG);
+ if (ret < 0)
+ goto out;
+ info->fg_cc_mtr1 = ret;
+
+ ret = fuel_gauge_read_15bit_word(info, AXP288_FG_DES_CAP1_REG);
+ if (ret < 0)
+ goto out;
+ info->fg_des_cap1 = ret;
+
+ info->last_updated = jiffies;
+ info->valid = 1;
+ ret = 0;
+out:
+ iosf_mbi_unblock_punit_i2c_access();
+ return ret;
}
-#endif
static void fuel_gauge_get_status(struct axp288_fg_info *info)
{
- int pwr_stat, fg_res, curr, ret;
-
- pwr_stat = fuel_gauge_reg_readb(info, AXP20X_PWR_INPUT_STATUS);
- if (pwr_stat < 0) {
- dev_err(&info->pdev->dev,
- "PWR STAT read failed:%d\n", pwr_stat);
- return;
- }
+ int pwr_stat = info->pwr_stat;
+ int fg_res = info->fg_res;
+ int curr = info->d_curr;
/* Report full if Vbus is valid and the reported capacity is 100% */
if (!(pwr_stat & PS_STAT_VBUS_VALID))
goto not_full;
- fg_res = fuel_gauge_reg_readb(info, AXP20X_FG_RES);
- if (fg_res < 0) {
- dev_err(&info->pdev->dev, "FG RES read failed: %d\n", fg_res);
- return;
- }
if (!(fg_res & FG_REP_CAP_VALID))
goto not_full;
@@ -354,11 +296,6 @@ static void fuel_gauge_get_status(struct axp288_fg_info *info)
if (fg_res < 90 || (pwr_stat & PS_STAT_BAT_CHRG_DIR))
goto not_full;
- ret = iio_read_channel_raw(info->iio_channel[BAT_D_CURR], &curr);
- if (ret < 0) {
- dev_err(&info->pdev->dev, "FG get current failed: %d\n", ret);
- return;
- }
if (curr == 0) {
info->status = POWER_SUPPLY_STATUS_FULL;
return;
@@ -371,61 +308,16 @@ not_full:
info->status = POWER_SUPPLY_STATUS_DISCHARGING;
}
-static int fuel_gauge_get_vbatt(struct axp288_fg_info *info, int *vbatt)
-{
- int ret = 0, raw_val;
-
- ret = iio_read_channel_raw(info->iio_channel[BAT_VOLT], &raw_val);
- if (ret < 0)
- goto vbatt_read_fail;
-
- *vbatt = VOLTAGE_FROM_ADC(raw_val);
-vbatt_read_fail:
- return ret;
-}
-
-static int fuel_gauge_get_current(struct axp288_fg_info *info, int *cur)
-{
- int ret, discharge;
-
- /* First check discharge current, so that we do only 1 read on bat. */
- ret = iio_read_channel_raw(info->iio_channel[BAT_D_CURR], &discharge);
- if (ret < 0)
- return ret;
-
- if (discharge > 0) {
- *cur = -1 * discharge;
- return 0;
- }
-
- return iio_read_channel_raw(info->iio_channel[BAT_CHRG_CURR], cur);
-}
-
-static int fuel_gauge_get_vocv(struct axp288_fg_info *info, int *vocv)
-{
- int ret;
-
- ret = fuel_gauge_read_12bit_word(info, AXP288_FG_OCVH_REG);
- if (ret >= 0)
- *vocv = VOLTAGE_FROM_ADC(ret);
-
- return ret;
-}
-
static int fuel_gauge_battery_health(struct axp288_fg_info *info)
{
- int ret, vocv, health = POWER_SUPPLY_HEALTH_UNKNOWN;
-
- ret = fuel_gauge_get_vocv(info, &vocv);
- if (ret < 0)
- goto health_read_fail;
+ int vocv = VOLTAGE_FROM_ADC(info->ocv);
+ int health = POWER_SUPPLY_HEALTH_UNKNOWN;
if (vocv > info->max_volt)
health = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
else
health = POWER_SUPPLY_HEALTH_GOOD;
-health_read_fail:
return health;
}
@@ -434,9 +326,14 @@ static int fuel_gauge_get_property(struct power_supply *ps,
union power_supply_propval *val)
{
struct axp288_fg_info *info = power_supply_get_drvdata(ps);
- int ret = 0, value;
+ int ret, value;
mutex_lock(&info->lock);
+
+ ret = fuel_gauge_update_registers(info);
+ if (ret < 0)
+ goto out;
+
switch (prop) {
case POWER_SUPPLY_PROP_STATUS:
fuel_gauge_get_status(info);
@@ -446,78 +343,52 @@ static int fuel_gauge_get_property(struct power_supply *ps,
val->intval = fuel_gauge_battery_health(info);
break;
case POWER_SUPPLY_PROP_VOLTAGE_NOW:
- ret = fuel_gauge_get_vbatt(info, &value);
- if (ret < 0)
- goto fuel_gauge_read_err;
+ value = VOLTAGE_FROM_ADC(info->bat_volt);
val->intval = PROP_VOLT(value);
break;
case POWER_SUPPLY_PROP_VOLTAGE_OCV:
- ret = fuel_gauge_get_vocv(info, &value);
- if (ret < 0)
- goto fuel_gauge_read_err;
+ value = VOLTAGE_FROM_ADC(info->ocv);
val->intval = PROP_VOLT(value);
break;
case POWER_SUPPLY_PROP_CURRENT_NOW:
- ret = fuel_gauge_get_current(info, &value);
- if (ret < 0)
- goto fuel_gauge_read_err;
+ if (info->d_curr > 0)
+ value = -1 * info->d_curr;
+ else
+ value = info->c_curr;
+
val->intval = PROP_CURR(value);
break;
case POWER_SUPPLY_PROP_PRESENT:
- ret = fuel_gauge_reg_readb(info, AXP20X_PWR_OP_MODE);
- if (ret < 0)
- goto fuel_gauge_read_err;
-
- if (ret & CHRG_STAT_BAT_PRESENT)
+ if (info->pwr_op & CHRG_STAT_BAT_PRESENT)
val->intval = 1;
else
val->intval = 0;
break;
case POWER_SUPPLY_PROP_CAPACITY:
- ret = fuel_gauge_reg_readb(info, AXP20X_FG_RES);
- if (ret < 0)
- goto fuel_gauge_read_err;
-
- if (!(ret & FG_REP_CAP_VALID))
- dev_err(&info->pdev->dev,
- "capacity measurement not valid\n");
- val->intval = (ret & FG_REP_CAP_VAL_MASK);
+ if (!(info->fg_res & FG_REP_CAP_VALID))
+ dev_err(info->dev, "capacity measurement not valid\n");
+ val->intval = (info->fg_res & FG_REP_CAP_VAL_MASK);
break;
case POWER_SUPPLY_PROP_CAPACITY_ALERT_MIN:
- ret = fuel_gauge_reg_readb(info, AXP288_FG_LOW_CAP_REG);
- if (ret < 0)
- goto fuel_gauge_read_err;
- val->intval = (ret & 0x0f);
+ val->intval = (info->low_cap & 0x0f);
break;
case POWER_SUPPLY_PROP_TECHNOLOGY:
val->intval = POWER_SUPPLY_TECHNOLOGY_LION;
break;
case POWER_SUPPLY_PROP_CHARGE_NOW:
- ret = fuel_gauge_read_15bit_word(info, AXP288_FG_CC_MTR1_REG);
- if (ret < 0)
- goto fuel_gauge_read_err;
-
- val->intval = ret * FG_DES_CAP_RES_LSB;
+ val->intval = info->fg_cc_mtr1 * FG_DES_CAP_RES_LSB;
break;
case POWER_SUPPLY_PROP_CHARGE_FULL:
- ret = fuel_gauge_read_15bit_word(info, AXP288_FG_DES_CAP1_REG);
- if (ret < 0)
- goto fuel_gauge_read_err;
-
- val->intval = ret * FG_DES_CAP_RES_LSB;
+ val->intval = info->fg_des_cap1 * FG_DES_CAP_RES_LSB;
break;
case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
val->intval = PROP_VOLT(info->max_volt);
break;
default:
- mutex_unlock(&info->lock);
- return -EINVAL;
+ ret = -EINVAL;
}
- mutex_unlock(&info->lock);
- return 0;
-
-fuel_gauge_read_err:
+out:
mutex_unlock(&info->lock);
return ret;
}
@@ -527,7 +398,7 @@ static int fuel_gauge_set_property(struct power_supply *ps,
const union power_supply_propval *val)
{
struct axp288_fg_info *info = power_supply_get_drvdata(ps);
- int ret = 0;
+ int new_low_cap, ret = 0;
mutex_lock(&info->lock);
switch (prop) {
@@ -536,12 +407,12 @@ static int fuel_gauge_set_property(struct power_supply *ps,
ret = -EINVAL;
break;
}
- ret = fuel_gauge_reg_readb(info, AXP288_FG_LOW_CAP_REG);
- if (ret < 0)
- break;
- ret &= 0xf0;
- ret |= (val->intval & 0xf);
- ret = fuel_gauge_reg_writeb(info, AXP288_FG_LOW_CAP_REG, ret);
+ new_low_cap = info->low_cap;
+ new_low_cap &= 0xf0;
+ new_low_cap |= (val->intval & 0xf);
+ ret = fuel_gauge_reg_writeb(info, AXP288_FG_LOW_CAP_REG, new_low_cap);
+ if (ret == 0)
+ info->low_cap = new_low_cap;
break;
default:
ret = -EINVAL;
@@ -579,37 +450,35 @@ static irqreturn_t fuel_gauge_thread_handler(int irq, void *dev)
}
if (i >= AXP288_FG_INTR_NUM) {
- dev_warn(&info->pdev->dev, "spurious interrupt!!\n");
+ dev_warn(info->dev, "spurious interrupt!!\n");
return IRQ_NONE;
}
switch (i) {
case QWBTU_IRQ:
- dev_info(&info->pdev->dev,
- "Quit Battery under temperature in work mode IRQ (QWBTU)\n");
+ dev_info(info->dev, "Quit Battery under temperature in work mode IRQ (QWBTU)\n");
break;
case WBTU_IRQ:
- dev_info(&info->pdev->dev,
- "Battery under temperature in work mode IRQ (WBTU)\n");
+ dev_info(info->dev, "Battery under temperature in work mode IRQ (WBTU)\n");
break;
case QWBTO_IRQ:
- dev_info(&info->pdev->dev,
- "Quit Battery over temperature in work mode IRQ (QWBTO)\n");
+ dev_info(info->dev, "Quit Battery over temperature in work mode IRQ (QWBTO)\n");
break;
case WBTO_IRQ:
- dev_info(&info->pdev->dev,
- "Battery over temperature in work mode IRQ (WBTO)\n");
+ dev_info(info->dev, "Battery over temperature in work mode IRQ (WBTO)\n");
break;
case WL2_IRQ:
- dev_info(&info->pdev->dev, "Low Batt Warning(2) INTR\n");
+ dev_info(info->dev, "Low Batt Warning(2) INTR\n");
break;
case WL1_IRQ:
- dev_info(&info->pdev->dev, "Low Batt Warning(1) INTR\n");
+ dev_info(info->dev, "Low Batt Warning(1) INTR\n");
break;
default:
- dev_warn(&info->pdev->dev, "Spurious Interrupt!!!\n");
+ dev_warn(info->dev, "Spurious Interrupt!!!\n");
}
+ info->valid = 0; /* Force updating of the cached registers */
+
power_supply_changed(info->bat);
return IRQ_HANDLED;
}
@@ -618,6 +487,7 @@ static void fuel_gauge_external_power_changed(struct power_supply *psy)
{
struct axp288_fg_info *info = power_supply_get_drvdata(psy);
+ info->valid = 0; /* Force updating of the cached registers */
power_supply_changed(info->bat);
}
@@ -632,16 +502,15 @@ static const struct power_supply_desc fuel_gauge_desc = {
.external_power_changed = fuel_gauge_external_power_changed,
};
-static void fuel_gauge_init_irq(struct axp288_fg_info *info)
+static void fuel_gauge_init_irq(struct axp288_fg_info *info, struct platform_device *pdev)
{
int ret, i, pirq;
for (i = 0; i < AXP288_FG_INTR_NUM; i++) {
- pirq = platform_get_irq(info->pdev, i);
+ pirq = platform_get_irq(pdev, i);
info->irq[i] = regmap_irq_get_virq(info->regmap_irqc, pirq);
if (info->irq[i] < 0) {
- dev_warn(&info->pdev->dev,
- "regmap_irq get virq failed for IRQ %d: %d\n",
+ dev_warn(info->dev, "regmap_irq get virq failed for IRQ %d: %d\n",
pirq, info->irq[i]);
info->irq[i] = -1;
goto intr_failed;
@@ -650,14 +519,10 @@ static void fuel_gauge_init_irq(struct axp288_fg_info *info)
NULL, fuel_gauge_thread_handler,
IRQF_ONESHOT, DEV_NAME, info);
if (ret) {
- dev_warn(&info->pdev->dev,
- "request irq failed for IRQ %d: %d\n",
+ dev_warn(info->dev, "request irq failed for IRQ %d: %d\n",
pirq, info->irq[i]);
info->irq[i] = -1;
goto intr_failed;
- } else {
- dev_info(&info->pdev->dev, "HW IRQ %d -> VIRQ %d\n",
- pirq, info->irq[i]);
}
}
return;
@@ -753,9 +618,6 @@ static int axp288_fuel_gauge_probe(struct platform_device *pdev)
struct axp20x_dev *axp20x = dev_get_drvdata(pdev->dev.parent);
struct power_supply_config psy_cfg = {};
static const char * const iio_chan_name[] = {
- [BAT_TEMP] = "axp288-batt-temp",
- [PMIC_TEMP] = "axp288-pmic-temp",
- [SYSTEM_TEMP] = "axp288-system-temp",
[BAT_CHRG_CURR] = "axp288-chrg-curr",
[BAT_D_CURR] = "axp288-chrg-d-curr",
[BAT_VOLT] = "axp288-batt-volt",
@@ -765,24 +627,15 @@ static int axp288_fuel_gauge_probe(struct platform_device *pdev)
if (dmi_check_system(axp288_no_battery_list))
return -ENODEV;
- /*
- * On some devices the fuelgauge and charger parts of the axp288 are
- * not used, check that the fuelgauge is enabled (CC_CTRL != 0).
- */
- ret = regmap_read(axp20x->regmap, AXP20X_CC_CTRL, &val);
- if (ret < 0)
- return ret;
- if (val == 0)
- return -ENODEV;
-
info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
- info->pdev = pdev;
+ info->dev = &pdev->dev;
info->regmap = axp20x->regmap;
info->regmap_irqc = axp20x->regmap_irqc;
info->status = POWER_SUPPLY_STATUS_UNKNOWN;
+ info->valid = 0;
platform_set_drvdata(pdev, info);
@@ -808,19 +661,35 @@ static int axp288_fuel_gauge_probe(struct platform_device *pdev)
}
}
- ret = fuel_gauge_reg_readb(info, AXP288_FG_DES_CAP1_REG);
+ ret = iosf_mbi_block_punit_i2c_access();
if (ret < 0)
goto out_free_iio_chan;
+ /*
+ * On some devices the fuelgauge and charger parts of the axp288 are
+ * not used, check that the fuelgauge is enabled (CC_CTRL != 0).
+ */
+ ret = regmap_read(axp20x->regmap, AXP20X_CC_CTRL, &val);
+ if (ret < 0)
+ goto unblock_punit_i2c_access;
+ if (val == 0) {
+ ret = -ENODEV;
+ goto unblock_punit_i2c_access;
+ }
+
+ ret = fuel_gauge_reg_readb(info, AXP288_FG_DES_CAP1_REG);
+ if (ret < 0)
+ goto unblock_punit_i2c_access;
+
if (!(ret & FG_DES_CAP1_VALID)) {
dev_err(&pdev->dev, "axp288 not configured by firmware\n");
ret = -ENODEV;
- goto out_free_iio_chan;
+ goto unblock_punit_i2c_access;
}
ret = fuel_gauge_reg_readb(info, AXP20X_CHRG_CTRL1);
if (ret < 0)
- goto out_free_iio_chan;
+ goto unblock_punit_i2c_access;
switch ((ret & CHRG_CCCV_CV_MASK) >> CHRG_CCCV_CV_BIT_POS) {
case CHRG_CCCV_CV_4100MV:
info->max_volt = 4100;
@@ -836,6 +705,22 @@ static int axp288_fuel_gauge_probe(struct platform_device *pdev)
break;
}
+ ret = fuel_gauge_reg_readb(info, AXP20X_PWR_OP_MODE);
+ if (ret < 0)
+ goto unblock_punit_i2c_access;
+ info->pwr_op = ret;
+
+ ret = fuel_gauge_reg_readb(info, AXP288_FG_LOW_CAP_REG);
+ if (ret < 0)
+ goto unblock_punit_i2c_access;
+ info->low_cap = ret;
+
+unblock_punit_i2c_access:
+ iosf_mbi_unblock_punit_i2c_access();
+ /* In case we arrive here by goto because of a register access error */
+ if (ret < 0)
+ goto out_free_iio_chan;
+
psy_cfg.drv_data = info;
info->bat = power_supply_register(&pdev->dev, &fuel_gauge_desc, &psy_cfg);
if (IS_ERR(info->bat)) {
@@ -844,8 +729,7 @@ static int axp288_fuel_gauge_probe(struct platform_device *pdev)
goto out_free_iio_chan;
}
- fuel_gauge_create_debugfs(info);
- fuel_gauge_init_irq(info);
+ fuel_gauge_init_irq(info, pdev);
return 0;
@@ -869,7 +753,6 @@ static int axp288_fuel_gauge_remove(struct platform_device *pdev)
int i;
power_supply_unregister(info->bat);
- fuel_gauge_remove_debugfs(info);
for (i = 0; i < AXP288_FG_INTR_NUM; i++)
if (info->irq[i] >= 0)
diff --git a/drivers/power/supply/bq24735-charger.c b/drivers/power/supply/bq24735-charger.c
index b5d619db79f6..3ce36d09c017 100644
--- a/drivers/power/supply/bq24735-charger.c
+++ b/drivers/power/supply/bq24735-charger.c
@@ -31,9 +31,8 @@
#include <linux/power/bq24735-charger.h>
-#define BQ24735_CHG_OPT 0x12
-#define BQ24735_CHG_OPT_CHARGE_DISABLE (1 << 0)
-#define BQ24735_CHG_OPT_AC_PRESENT (1 << 4)
+/* BQ24735 available commands and their respective masks */
+#define BQ24735_CHARGE_OPT 0x12
#define BQ24735_CHARGE_CURRENT 0x14
#define BQ24735_CHARGE_CURRENT_MASK 0x1fc0
#define BQ24735_CHARGE_VOLTAGE 0x15
@@ -43,6 +42,10 @@
#define BQ24735_MANUFACTURER_ID 0xfe
#define BQ24735_DEVICE_ID 0xff
+/* ChargeOptions bits of interest */
+#define BQ24735_CHARGE_OPT_CHG_DISABLE (1 << 0)
+#define BQ24735_CHARGE_OPT_AC_PRESENT (1 << 4)
+
struct bq24735 {
struct power_supply *charger;
struct power_supply_desc charger_desc;
@@ -167,8 +170,8 @@ static inline int bq24735_enable_charging(struct bq24735 *charger)
if (ret)
return ret;
- return bq24735_update_word(charger->client, BQ24735_CHG_OPT,
- BQ24735_CHG_OPT_CHARGE_DISABLE, 0);
+ return bq24735_update_word(charger->client, BQ24735_CHARGE_OPT,
+ BQ24735_CHARGE_OPT_CHG_DISABLE, 0);
}
static inline int bq24735_disable_charging(struct bq24735 *charger)
@@ -176,9 +179,9 @@ static inline int bq24735_disable_charging(struct bq24735 *charger)
if (charger->pdata->ext_control)
return 0;
- return bq24735_update_word(charger->client, BQ24735_CHG_OPT,
- BQ24735_CHG_OPT_CHARGE_DISABLE,
- BQ24735_CHG_OPT_CHARGE_DISABLE);
+ return bq24735_update_word(charger->client, BQ24735_CHARGE_OPT,
+ BQ24735_CHARGE_OPT_CHG_DISABLE,
+ BQ24735_CHARGE_OPT_CHG_DISABLE);
}
static bool bq24735_charger_is_present(struct bq24735 *charger)
@@ -188,14 +191,14 @@ static bool bq24735_charger_is_present(struct bq24735 *charger)
} else {
int ac = 0;
- ac = bq24735_read_word(charger->client, BQ24735_CHG_OPT);
+ ac = bq24735_read_word(charger->client, BQ24735_CHARGE_OPT);
if (ac < 0) {
dev_dbg(&charger->client->dev,
"Failed to read charger options : %d\n",
ac);
return false;
}
- return (ac & BQ24735_CHG_OPT_AC_PRESENT) ? true : false;
+ return (ac & BQ24735_CHARGE_OPT_AC_PRESENT) ? true : false;
}
return false;
@@ -208,11 +211,11 @@ static int bq24735_charger_is_charging(struct bq24735 *charger)
if (!bq24735_charger_is_present(charger))
return 0;
- ret = bq24735_read_word(charger->client, BQ24735_CHG_OPT);
+ ret = bq24735_read_word(charger->client, BQ24735_CHARGE_OPT);
if (ret < 0)
return ret;
- return !(ret & BQ24735_CHG_OPT_CHARGE_DISABLE);
+ return !(ret & BQ24735_CHARGE_OPT_CHG_DISABLE);
}
static void bq24735_update(struct bq24735 *charger)
diff --git a/drivers/power/supply/cros_peripheral_charger.c b/drivers/power/supply/cros_peripheral_charger.c
new file mode 100644
index 000000000000..305f10dfc06d
--- /dev/null
+++ b/drivers/power/supply/cros_peripheral_charger.c
@@ -0,0 +1,386 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Power supply driver for ChromeOS EC based Peripheral Device Charger.
+ *
+ * Copyright 2020 Google LLC.
+ */
+
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/slab.h>
+#include <linux/stringify.h>
+#include <linux/types.h>
+
+#define DRV_NAME "cros-ec-pchg"
+#define PCHG_DIR_PREFIX "peripheral"
+#define PCHG_DIR_NAME PCHG_DIR_PREFIX "%d"
+#define PCHG_DIR_NAME_LENGTH \
+ sizeof(PCHG_DIR_PREFIX __stringify(EC_PCHG_MAX_PORTS))
+#define PCHG_CACHE_UPDATE_DELAY msecs_to_jiffies(500)
+
+struct port_data {
+ int port_number;
+ char name[PCHG_DIR_NAME_LENGTH];
+ struct power_supply *psy;
+ struct power_supply_desc psy_desc;
+ int psy_status;
+ int battery_percentage;
+ int charge_type;
+ struct charger_data *charger;
+ unsigned long last_update;
+};
+
+struct charger_data {
+ struct device *dev;
+ struct cros_ec_dev *ec_dev;
+ struct cros_ec_device *ec_device;
+ int num_registered_psy;
+ struct port_data *ports[EC_PCHG_MAX_PORTS];
+ struct notifier_block notifier;
+};
+
+static enum power_supply_property cros_pchg_props[] = {
+ POWER_SUPPLY_PROP_STATUS,
+ POWER_SUPPLY_PROP_CHARGE_TYPE,
+ POWER_SUPPLY_PROP_CAPACITY,
+ POWER_SUPPLY_PROP_SCOPE,
+};
+
+static int cros_pchg_ec_command(const struct charger_data *charger,
+ unsigned int version,
+ unsigned int command,
+ const void *outdata,
+ unsigned int outsize,
+ void *indata,
+ unsigned int insize)
+{
+ struct cros_ec_dev *ec_dev = charger->ec_dev;
+ struct cros_ec_command *msg;
+ int ret;
+
+ msg = kzalloc(sizeof(*msg) + max(outsize, insize), GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ msg->version = version;
+ msg->command = ec_dev->cmd_offset + command;
+ msg->outsize = outsize;
+ msg->insize = insize;
+
+ if (outsize)
+ memcpy(msg->data, outdata, outsize);
+
+ ret = cros_ec_cmd_xfer_status(charger->ec_device, msg);
+ if (ret >= 0 && insize)
+ memcpy(indata, msg->data, insize);
+
+ kfree(msg);
+ return ret;
+}
+
+static const unsigned int pchg_cmd_version = 1;
+
+static bool cros_pchg_cmd_ver_check(const struct charger_data *charger)
+{
+ struct ec_params_get_cmd_versions_v1 req;
+ struct ec_response_get_cmd_versions rsp;
+ int ret;
+
+ req.cmd = EC_CMD_PCHG;
+ ret = cros_pchg_ec_command(charger, 1, EC_CMD_GET_CMD_VERSIONS,
+ &req, sizeof(req), &rsp, sizeof(rsp));
+ if (ret < 0) {
+ dev_warn(charger->dev,
+ "Unable to get versions of EC_CMD_PCHG (err:%d)\n",
+ ret);
+ return false;
+ }
+
+ return !!(rsp.version_mask & BIT(pchg_cmd_version));
+}
+
+static int cros_pchg_port_count(const struct charger_data *charger)
+{
+ struct ec_response_pchg_count rsp;
+ int ret;
+
+ ret = cros_pchg_ec_command(charger, 0, EC_CMD_PCHG_COUNT,
+ NULL, 0, &rsp, sizeof(rsp));
+ if (ret < 0) {
+ dev_warn(charger->dev,
+ "Unable to get number or ports (err:%d)\n", ret);
+ return ret;
+ }
+
+ return rsp.port_count;
+}
+
+static int cros_pchg_get_status(struct port_data *port)
+{
+ struct charger_data *charger = port->charger;
+ struct ec_params_pchg req;
+ struct ec_response_pchg rsp;
+ struct device *dev = charger->dev;
+ int old_status = port->psy_status;
+ int old_percentage = port->battery_percentage;
+ int ret;
+
+ req.port = port->port_number;
+ ret = cros_pchg_ec_command(charger, pchg_cmd_version, EC_CMD_PCHG,
+ &req, sizeof(req), &rsp, sizeof(rsp));
+ if (ret < 0) {
+ dev_err(dev, "Unable to get port.%d status (err:%d)\n",
+ port->port_number, ret);
+ return ret;
+ }
+
+ switch (rsp.state) {
+ case PCHG_STATE_RESET:
+ case PCHG_STATE_INITIALIZED:
+ case PCHG_STATE_ENABLED:
+ default:
+ port->psy_status = POWER_SUPPLY_STATUS_UNKNOWN;
+ port->charge_type = POWER_SUPPLY_CHARGE_TYPE_NONE;
+ break;
+ case PCHG_STATE_DETECTED:
+ port->psy_status = POWER_SUPPLY_STATUS_CHARGING;
+ port->charge_type = POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
+ break;
+ case PCHG_STATE_CHARGING:
+ port->psy_status = POWER_SUPPLY_STATUS_CHARGING;
+ port->charge_type = POWER_SUPPLY_CHARGE_TYPE_STANDARD;
+ break;
+ case PCHG_STATE_FULL:
+ port->psy_status = POWER_SUPPLY_STATUS_FULL;
+ port->charge_type = POWER_SUPPLY_CHARGE_TYPE_NONE;
+ break;
+ }
+
+ port->battery_percentage = rsp.battery_percentage;
+
+ if (port->psy_status != old_status ||
+ port->battery_percentage != old_percentage)
+ power_supply_changed(port->psy);
+
+ dev_dbg(dev,
+ "Port %d: state=%d battery=%d%%\n",
+ port->port_number, rsp.state, rsp.battery_percentage);
+
+ return 0;
+}
+
+static int cros_pchg_get_port_status(struct port_data *port, bool ratelimit)
+{
+ int ret;
+
+ if (ratelimit &&
+ time_is_after_jiffies(port->last_update + PCHG_CACHE_UPDATE_DELAY))
+ return 0;
+
+ ret = cros_pchg_get_status(port);
+ if (ret < 0)
+ return ret;
+
+ port->last_update = jiffies;
+
+ return ret;
+}
+
+static int cros_pchg_get_prop(struct power_supply *psy,
+ enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ struct port_data *port = power_supply_get_drvdata(psy);
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_STATUS:
+ case POWER_SUPPLY_PROP_CAPACITY:
+ case POWER_SUPPLY_PROP_CHARGE_TYPE:
+ cros_pchg_get_port_status(port, true);
+ break;
+ default:
+ break;
+ }
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_STATUS:
+ val->intval = port->psy_status;
+ break;
+ case POWER_SUPPLY_PROP_CAPACITY:
+ val->intval = port->battery_percentage;
+ break;
+ case POWER_SUPPLY_PROP_CHARGE_TYPE:
+ val->intval = port->charge_type;
+ break;
+ case POWER_SUPPLY_PROP_SCOPE:
+ val->intval = POWER_SUPPLY_SCOPE_DEVICE;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int cros_pchg_event(const struct charger_data *charger,
+ unsigned long host_event)
+{
+ int i;
+
+ for (i = 0; i < charger->num_registered_psy; i++)
+ cros_pchg_get_port_status(charger->ports[i], false);
+
+ return NOTIFY_OK;
+}
+
+static u32 cros_get_device_event(const struct charger_data *charger)
+{
+ struct ec_params_device_event req;
+ struct ec_response_device_event rsp;
+ struct device *dev = charger->dev;
+ int ret;
+
+ req.param = EC_DEVICE_EVENT_PARAM_GET_CURRENT_EVENTS;
+ ret = cros_pchg_ec_command(charger, 0, EC_CMD_DEVICE_EVENT,
+ &req, sizeof(req), &rsp, sizeof(rsp));
+ if (ret < 0) {
+ dev_warn(dev, "Unable to get device events (err:%d)\n", ret);
+ return 0;
+ }
+
+ return rsp.event_mask;
+}
+
+static int cros_ec_notify(struct notifier_block *nb,
+ unsigned long queued_during_suspend,
+ void *data)
+{
+ struct cros_ec_device *ec_dev = (struct cros_ec_device *)data;
+ u32 host_event = cros_ec_get_host_event(ec_dev);
+ struct charger_data *charger =
+ container_of(nb, struct charger_data, notifier);
+ u32 device_event_mask;
+
+ if (!host_event)
+ return NOTIFY_DONE;
+
+ if (!(host_event & EC_HOST_EVENT_MASK(EC_HOST_EVENT_DEVICE)))
+ return NOTIFY_DONE;
+
+ /*
+ * todo: Retrieve device event mask in common place
+ * (e.g. cros_ec_proto.c).
+ */
+ device_event_mask = cros_get_device_event(charger);
+ if (!(device_event_mask & EC_DEVICE_EVENT_MASK(EC_DEVICE_EVENT_WLC)))
+ return NOTIFY_DONE;
+
+ return cros_pchg_event(charger, host_event);
+}
+
+static int cros_pchg_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct cros_ec_dev *ec_dev = dev_get_drvdata(dev->parent);
+ struct cros_ec_device *ec_device = ec_dev->ec_dev;
+ struct power_supply_desc *psy_desc;
+ struct charger_data *charger;
+ struct power_supply *psy;
+ struct port_data *port;
+ struct notifier_block *nb;
+ int num_ports;
+ int ret;
+ int i;
+
+ charger = devm_kzalloc(dev, sizeof(*charger), GFP_KERNEL);
+ if (!charger)
+ return -ENOMEM;
+
+ charger->dev = dev;
+ charger->ec_dev = ec_dev;
+ charger->ec_device = ec_device;
+
+ ret = cros_pchg_port_count(charger);
+ if (ret <= 0) {
+ /*
+ * This feature is enabled by the EC and the kernel driver is
+ * included by default for CrOS devices. Don't need to be loud
+ * since this error can be normal.
+ */
+ dev_info(dev, "No peripheral charge ports (err:%d)\n", ret);
+ return -ENODEV;
+ }
+
+ if (!cros_pchg_cmd_ver_check(charger)) {
+ dev_err(dev, "EC_CMD_PCHG version %d isn't available.\n",
+ pchg_cmd_version);
+ return -EOPNOTSUPP;
+ }
+
+ num_ports = ret;
+ if (num_ports > EC_PCHG_MAX_PORTS) {
+ dev_err(dev, "Too many peripheral charge ports (%d)\n",
+ num_ports);
+ return -ENOBUFS;
+ }
+
+ dev_info(dev, "%d peripheral charge ports found\n", num_ports);
+
+ for (i = 0; i < num_ports; i++) {
+ struct power_supply_config psy_cfg = {};
+
+ port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
+ if (!port)
+ return -ENOMEM;
+
+ port->charger = charger;
+ port->port_number = i;
+ snprintf(port->name, sizeof(port->name), PCHG_DIR_NAME, i);
+
+ psy_desc = &port->psy_desc;
+ psy_desc->name = port->name;
+ psy_desc->type = POWER_SUPPLY_TYPE_BATTERY;
+ psy_desc->get_property = cros_pchg_get_prop;
+ psy_desc->external_power_changed = NULL;
+ psy_desc->properties = cros_pchg_props;
+ psy_desc->num_properties = ARRAY_SIZE(cros_pchg_props);
+ psy_cfg.drv_data = port;
+
+ psy = devm_power_supply_register(dev, psy_desc, &psy_cfg);
+ if (IS_ERR(psy))
+ return dev_err_probe(dev, PTR_ERR(psy),
+ "Failed to register power supply\n");
+ port->psy = psy;
+
+ charger->ports[charger->num_registered_psy++] = port;
+ }
+
+ if (!charger->num_registered_psy)
+ return -ENODEV;
+
+ nb = &charger->notifier;
+ nb->notifier_call = cros_ec_notify;
+ ret = blocking_notifier_chain_register(&ec_dev->ec_dev->event_notifier,
+ nb);
+ if (ret < 0)
+ dev_err(dev, "Failed to register notifier (err:%d)\n", ret);
+
+ return 0;
+}
+
+static struct platform_driver cros_pchg_driver = {
+ .driver = {
+ .name = DRV_NAME,
+ },
+ .probe = cros_pchg_probe
+};
+
+module_platform_driver(cros_pchg_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ChromeOS EC peripheral device charger");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/drivers/power/supply/cw2015_battery.c b/drivers/power/supply/cw2015_battery.c
index d110597746b0..091868e9e9e8 100644
--- a/drivers/power/supply/cw2015_battery.c
+++ b/drivers/power/supply/cw2015_battery.c
@@ -679,7 +679,9 @@ static int cw_bat_probe(struct i2c_client *client)
&cw2015_bat_desc,
&psy_cfg);
if (IS_ERR(cw_bat->rk_bat)) {
- dev_err(cw_bat->dev, "Failed to register power supply\n");
+ /* try again if this happens */
+ dev_err_probe(&client->dev, PTR_ERR(cw_bat->rk_bat),
+ "Failed to register power supply\n");
return PTR_ERR(cw_bat->rk_bat);
}
diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index ce2041b30a06..8dffae76b6a3 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -36,8 +36,6 @@
/* Interrupt mask bits */
#define CONFIG_ALRT_BIT_ENBL (1 << 2)
-#define STATUS_INTR_SOCMIN_BIT (1 << 10)
-#define STATUS_INTR_SOCMAX_BIT (1 << 14)
#define VFSOC0_LOCK 0x0000
#define VFSOC0_UNLOCK 0x0080
@@ -285,8 +283,6 @@ static int max17042_get_property(struct power_supply *psy,
case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
if (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17042)
ret = regmap_read(map, MAX17042_V_empty, &data);
- else if (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17055)
- ret = regmap_read(map, MAX17055_V_empty, &data);
else
ret = regmap_read(map, MAX17047_V_empty, &data);
if (ret < 0)
@@ -748,7 +744,7 @@ static inline void max17042_override_por_values(struct max17042_chip *chip)
struct max17042_config_data *config = chip->pdata->config_data;
max17042_override_por(map, MAX17042_TGAIN, config->tgain);
- max17042_override_por(map, MAx17042_TOFF, config->toff);
+ max17042_override_por(map, MAX17042_TOFF, config->toff);
max17042_override_por(map, MAX17042_CGAIN, config->cgain);
max17042_override_por(map, MAX17042_COFF, config->coff);
@@ -767,36 +763,36 @@ static inline void max17042_override_por_values(struct max17042_chip *chip)
max17042_override_por(map, MAX17042_FilterCFG, config->filter_cfg);
max17042_override_por(map, MAX17042_RelaxCFG, config->relax_cfg);
max17042_override_por(map, MAX17042_MiscCFG, config->misc_cfg);
- max17042_override_por(map, MAX17042_MaskSOC, config->masksoc);
max17042_override_por(map, MAX17042_FullCAP, config->fullcap);
max17042_override_por(map, MAX17042_FullCAPNom, config->fullcapnom);
- if (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17042)
- max17042_override_por(map, MAX17042_SOC_empty,
- config->socempty);
- max17042_override_por(map, MAX17042_LAvg_empty, config->lavg_empty);
max17042_override_por(map, MAX17042_dQacc, config->dqacc);
max17042_override_por(map, MAX17042_dPacc, config->dpacc);
- if (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17042)
- max17042_override_por(map, MAX17042_V_empty, config->vempty);
- if (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17055)
- max17042_override_por(map, MAX17055_V_empty, config->vempty);
- else
- max17042_override_por(map, MAX17047_V_empty, config->vempty);
- max17042_override_por(map, MAX17042_TempNom, config->temp_nom);
- max17042_override_por(map, MAX17042_TempLim, config->temp_lim);
- max17042_override_por(map, MAX17042_FCTC, config->fctc);
max17042_override_por(map, MAX17042_RCOMP0, config->rcomp0);
max17042_override_por(map, MAX17042_TempCo, config->tcompc0);
- if (chip->chip_type &&
- ((chip->chip_type == MAXIM_DEVICE_TYPE_MAX17042) ||
+
+ if (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17042) {
+ max17042_override_por(map, MAX17042_MaskSOC, config->masksoc);
+ max17042_override_por(map, MAX17042_SOC_empty, config->socempty);
+ max17042_override_por(map, MAX17042_V_empty, config->vempty);
+ max17042_override_por(map, MAX17042_EmptyTempCo, config->empty_tempco);
+ max17042_override_por(map, MAX17042_K_empty0, config->kempty0);
+ }
+
+ if ((chip->chip_type == MAXIM_DEVICE_TYPE_MAX17042) ||
(chip->chip_type == MAXIM_DEVICE_TYPE_MAX17047) ||
- (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17050))) {
- max17042_override_por(map, MAX17042_EmptyTempCo,
- config->empty_tempco);
- max17042_override_por(map, MAX17042_K_empty0,
- config->kempty0);
+ (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17050)) {
+ max17042_override_por(map, MAX17042_LAvg_empty, config->lavg_empty);
+ max17042_override_por(map, MAX17042_TempNom, config->temp_nom);
+ max17042_override_por(map, MAX17042_TempLim, config->temp_lim);
+ max17042_override_por(map, MAX17042_FCTC, config->fctc);
+ }
+
+ if ((chip->chip_type == MAXIM_DEVICE_TYPE_MAX17047) ||
+ (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17050) ||
+ (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17055)) {
+ max17042_override_por(map, MAX17047_V_empty, config->vempty);
}
}
@@ -869,11 +865,14 @@ static irqreturn_t max17042_thread_handler(int id, void *dev)
{
struct max17042_chip *chip = dev;
u32 val;
+ int ret;
- regmap_read(chip->regmap, MAX17042_STATUS, &val);
- if ((val & STATUS_INTR_SOCMIN_BIT) ||
- (val & STATUS_INTR_SOCMAX_BIT)) {
- dev_info(&chip->client->dev, "SOC threshold INTR\n");
+ ret = regmap_read(chip->regmap, MAX17042_STATUS, &val);
+ if (ret)
+ return IRQ_HANDLED;
+
+ if ((val & STATUS_SMN_BIT) || (val & STATUS_SMX_BIT)) {
+ dev_dbg(&chip->client->dev, "SOC threshold INTR\n");
max17042_set_soc_threshold(chip, 1);
}
@@ -1196,6 +1195,7 @@ static const struct of_device_id max17042_dt_match[] = {
{ .compatible = "maxim,max17047" },
{ .compatible = "maxim,max17050" },
{ .compatible = "maxim,max17055" },
+ { .compatible = "maxim,max77849-battery" },
{ },
};
MODULE_DEVICE_TABLE(of, max17042_dt_match);
@@ -1206,6 +1206,7 @@ static const struct i2c_device_id max17042_id[] = {
{ "max17047", MAXIM_DEVICE_TYPE_MAX17047 },
{ "max17050", MAXIM_DEVICE_TYPE_MAX17050 },
{ "max17055", MAXIM_DEVICE_TYPE_MAX17055 },
+ { "max77849-battery", MAXIM_DEVICE_TYPE_MAX17047 },
{ }
};
MODULE_DEVICE_TABLE(i2c, max17042_id);
diff --git a/drivers/power/supply/mt6360_charger.c b/drivers/power/supply/mt6360_charger.c
new file mode 100644
index 000000000000..3abaa72e0668
--- /dev/null
+++ b/drivers/power/supply/mt6360_charger.c
@@ -0,0 +1,867 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 MediaTek Inc.
+ */
+
+#include <linux/devm-helpers.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/linear_range.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+
+#define MT6360_PMU_CHG_CTRL1 0x311
+#define MT6360_PMU_CHG_CTRL2 0x312
+#define MT6360_PMU_CHG_CTRL3 0x313
+#define MT6360_PMU_CHG_CTRL4 0x314
+#define MT6360_PMU_CHG_CTRL5 0x315
+#define MT6360_PMU_CHG_CTRL6 0x316
+#define MT6360_PMU_CHG_CTRL7 0x317
+#define MT6360_PMU_CHG_CTRL8 0x318
+#define MT6360_PMU_CHG_CTRL9 0x319
+#define MT6360_PMU_CHG_CTRL10 0x31A
+#define MT6360_PMU_DEVICE_TYPE 0x322
+#define MT6360_PMU_USB_STATUS1 0x327
+#define MT6360_PMU_CHG_STAT 0x34A
+#define MT6360_PMU_CHG_CTRL19 0x361
+#define MT6360_PMU_FOD_STAT 0x3E7
+
+/* MT6360_PMU_CHG_CTRL1 */
+#define MT6360_FSLP_SHFT (3)
+#define MT6360_FSLP_MASK BIT(MT6360_FSLP_SHFT)
+#define MT6360_OPA_MODE_SHFT (0)
+#define MT6360_OPA_MODE_MASK BIT(MT6360_OPA_MODE_SHFT)
+/* MT6360_PMU_CHG_CTRL2 */
+#define MT6360_IINLMTSEL_SHFT (2)
+#define MT6360_IINLMTSEL_MASK GENMASK(3, 2)
+/* MT6360_PMU_CHG_CTRL3 */
+#define MT6360_IAICR_SHFT (2)
+#define MT6360_IAICR_MASK GENMASK(7, 2)
+#define MT6360_ILIM_EN_MASK BIT(0)
+/* MT6360_PMU_CHG_CTRL4 */
+#define MT6360_VOREG_SHFT (1)
+#define MT6360_VOREG_MASK GENMASK(7, 1)
+/* MT6360_PMU_CHG_CTRL5 */
+#define MT6360_VOBST_MASK GENMASK(7, 2)
+/* MT6360_PMU_CHG_CTRL6 */
+#define MT6360_VMIVR_SHFT (1)
+#define MT6360_VMIVR_MASK GENMASK(7, 1)
+/* MT6360_PMU_CHG_CTRL7 */
+#define MT6360_ICHG_SHFT (2)
+#define MT6360_ICHG_MASK GENMASK(7, 2)
+/* MT6360_PMU_CHG_CTRL8 */
+#define MT6360_IPREC_SHFT (0)
+#define MT6360_IPREC_MASK GENMASK(3, 0)
+/* MT6360_PMU_CHG_CTRL9 */
+#define MT6360_IEOC_SHFT (4)
+#define MT6360_IEOC_MASK GENMASK(7, 4)
+/* MT6360_PMU_CHG_CTRL10 */
+#define MT6360_OTG_OC_MASK GENMASK(3, 0)
+/* MT6360_PMU_DEVICE_TYPE */
+#define MT6360_USBCHGEN_MASK BIT(7)
+/* MT6360_PMU_USB_STATUS1 */
+#define MT6360_USB_STATUS_SHFT (4)
+#define MT6360_USB_STATUS_MASK GENMASK(6, 4)
+/* MT6360_PMU_CHG_STAT */
+#define MT6360_CHG_STAT_SHFT (6)
+#define MT6360_CHG_STAT_MASK GENMASK(7, 6)
+#define MT6360_VBAT_LVL_MASK BIT(5)
+/* MT6360_PMU_CHG_CTRL19 */
+#define MT6360_VINOVP_SHFT (5)
+#define MT6360_VINOVP_MASK GENMASK(6, 5)
+/* MT6360_PMU_FOD_STAT */
+#define MT6360_CHRDET_EXT_MASK BIT(4)
+
+/* uV */
+#define MT6360_VMIVR_MIN 3900000
+#define MT6360_VMIVR_MAX 13400000
+#define MT6360_VMIVR_STEP 100000
+/* uA */
+#define MT6360_ICHG_MIN 100000
+#define MT6360_ICHG_MAX 5000000
+#define MT6360_ICHG_STEP 100000
+/* uV */
+#define MT6360_VOREG_MIN 3900000
+#define MT6360_VOREG_MAX 4710000
+#define MT6360_VOREG_STEP 10000
+/* uA */
+#define MT6360_AICR_MIN 100000
+#define MT6360_AICR_MAX 3250000
+#define MT6360_AICR_STEP 50000
+/* uA */
+#define MT6360_IPREC_MIN 100000
+#define MT6360_IPREC_MAX 850000
+#define MT6360_IPREC_STEP 50000
+/* uA */
+#define MT6360_IEOC_MIN 100000
+#define MT6360_IEOC_MAX 850000
+#define MT6360_IEOC_STEP 50000
+
+enum {
+ MT6360_RANGE_VMIVR,
+ MT6360_RANGE_ICHG,
+ MT6360_RANGE_VOREG,
+ MT6360_RANGE_AICR,
+ MT6360_RANGE_IPREC,
+ MT6360_RANGE_IEOC,
+ MT6360_RANGE_MAX,
+};
+
+#define MT6360_LINEAR_RANGE(idx, _min, _min_sel, _max_sel, _step) \
+ [idx] = REGULATOR_LINEAR_RANGE(_min, _min_sel, _max_sel, _step)
+
+static const struct linear_range mt6360_chg_range[MT6360_RANGE_MAX] = {
+ MT6360_LINEAR_RANGE(MT6360_RANGE_VMIVR, 3900000, 0, 0x5F, 100000),
+ MT6360_LINEAR_RANGE(MT6360_RANGE_ICHG, 100000, 0, 0x31, 100000),
+ MT6360_LINEAR_RANGE(MT6360_RANGE_VOREG, 3900000, 0, 0x51, 10000),
+ MT6360_LINEAR_RANGE(MT6360_RANGE_AICR, 100000, 0, 0x3F, 50000),
+ MT6360_LINEAR_RANGE(MT6360_RANGE_IPREC, 100000, 0, 0x0F, 50000),
+ MT6360_LINEAR_RANGE(MT6360_RANGE_IEOC, 100000, 0, 0x0F, 50000),
+};
+
+struct mt6360_chg_info {
+ struct device *dev;
+ struct regmap *regmap;
+ struct power_supply_desc psy_desc;
+ struct power_supply *psy;
+ struct regulator_dev *otg_rdev;
+ struct mutex chgdet_lock;
+ u32 vinovp;
+ bool pwr_rdy;
+ bool bc12_en;
+ int psy_usb_type;
+ struct work_struct chrdet_work;
+};
+
+enum mt6360_iinlmtsel {
+ MT6360_IINLMTSEL_AICR_3250 = 0,
+ MT6360_IINLMTSEL_CHG_TYPE,
+ MT6360_IINLMTSEL_AICR,
+ MT6360_IINLMTSEL_LOWER_LEVEL,
+};
+
+enum mt6360_pmu_chg_type {
+ MT6360_CHG_TYPE_NOVBUS = 0,
+ MT6360_CHG_TYPE_UNDER_GOING,
+ MT6360_CHG_TYPE_SDP,
+ MT6360_CHG_TYPE_SDPNSTD,
+ MT6360_CHG_TYPE_DCP,
+ MT6360_CHG_TYPE_CDP,
+ MT6360_CHG_TYPE_DISABLE_BC12,
+ MT6360_CHG_TYPE_MAX,
+};
+
+static enum power_supply_usb_type mt6360_charger_usb_types[] = {
+ POWER_SUPPLY_USB_TYPE_UNKNOWN,
+ POWER_SUPPLY_USB_TYPE_SDP,
+ POWER_SUPPLY_USB_TYPE_DCP,
+ POWER_SUPPLY_USB_TYPE_CDP,
+};
+
+static int mt6360_get_chrdet_ext_stat(struct mt6360_chg_info *mci,
+ bool *pwr_rdy)
+{
+ int ret;
+ unsigned int regval;
+
+ ret = regmap_read(mci->regmap, MT6360_PMU_FOD_STAT, &regval);
+ if (ret < 0)
+ return ret;
+ *pwr_rdy = (regval & MT6360_CHRDET_EXT_MASK) ? true : false;
+ return 0;
+}
+
+static int mt6360_charger_get_online(struct mt6360_chg_info *mci,
+ union power_supply_propval *val)
+{
+ int ret;
+ bool pwr_rdy;
+
+ ret = mt6360_get_chrdet_ext_stat(mci, &pwr_rdy);
+ if (ret < 0)
+ return ret;
+ val->intval = pwr_rdy ? true : false;
+ return 0;
+}
+
+static int mt6360_charger_get_status(struct mt6360_chg_info *mci,
+ union power_supply_propval *val)
+{
+ int status, ret;
+ unsigned int regval;
+ bool pwr_rdy;
+
+ ret = mt6360_get_chrdet_ext_stat(mci, &pwr_rdy);
+ if (ret < 0)
+ return ret;
+ if (!pwr_rdy) {
+ status = POWER_SUPPLY_STATUS_DISCHARGING;
+ goto out;
+ }
+
+ ret = regmap_read(mci->regmap, MT6360_PMU_CHG_STAT, &regval);
+ if (ret < 0)
+ return ret;
+ regval &= MT6360_CHG_STAT_MASK;
+ regval >>= MT6360_CHG_STAT_SHFT;
+ switch (regval) {
+ case 0x0:
+ status = POWER_SUPPLY_STATUS_NOT_CHARGING;
+ break;
+ case 0x1:
+ status = POWER_SUPPLY_STATUS_CHARGING;
+ break;
+ case 0x2:
+ status = POWER_SUPPLY_STATUS_FULL;
+ break;
+ default:
+ ret = -EIO;
+ }
+out:
+ if (!ret)
+ val->intval = status;
+ return ret;
+}
+
+static int mt6360_charger_get_charge_type(struct mt6360_chg_info *mci,
+ union power_supply_propval *val)
+{
+ int type, ret;
+ unsigned int regval;
+ u8 chg_stat;
+
+ ret = regmap_read(mci->regmap, MT6360_PMU_CHG_STAT, &regval);
+ if (ret < 0)
+ return ret;
+
+ chg_stat = (regval & MT6360_CHG_STAT_MASK) >> MT6360_CHG_STAT_SHFT;
+ switch (chg_stat) {
+ case 0x01: /* Charge in Progress */
+ if (regval & MT6360_VBAT_LVL_MASK)
+ type = POWER_SUPPLY_CHARGE_TYPE_FAST;
+ else
+ type = POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
+ break;
+ case 0x00: /* Not Charging */
+ case 0x02: /* Charge Done */
+ case 0x03: /* Charge Fault */
+ default:
+ type = POWER_SUPPLY_CHARGE_TYPE_NONE;
+ break;
+ }
+
+ val->intval = type;
+ return 0;
+}
+
+static int mt6360_charger_get_ichg(struct mt6360_chg_info *mci,
+ union power_supply_propval *val)
+{
+ int ret;
+ u32 sel, value;
+
+ ret = regmap_read(mci->regmap, MT6360_PMU_CHG_CTRL7, &sel);
+ if (ret < 0)
+ return ret;
+ sel = (sel & MT6360_ICHG_MASK) >> MT6360_ICHG_SHFT;
+ ret = linear_range_get_value(&mt6360_chg_range[MT6360_RANGE_ICHG], sel, &value);
+ if (!ret)
+ val->intval = value;
+ return ret;
+}
+
+static int mt6360_charger_get_max_ichg(struct mt6360_chg_info *mci,
+ union power_supply_propval *val)
+{
+ val->intval = MT6360_ICHG_MAX;
+ return 0;
+}
+
+static int mt6360_charger_get_cv(struct mt6360_chg_info *mci,
+ union power_supply_propval *val)
+{
+ int ret;
+ u32 sel, value;
+
+ ret = regmap_read(mci->regmap, MT6360_PMU_CHG_CTRL4, &sel);
+ if (ret < 0)
+ return ret;
+ sel = (sel & MT6360_VOREG_MASK) >> MT6360_VOREG_SHFT;
+ ret = linear_range_get_value(&mt6360_chg_range[MT6360_RANGE_VOREG], sel, &value);
+ if (!ret)
+ val->intval = value;
+ return ret;
+}
+
+static int mt6360_charger_get_max_cv(struct mt6360_chg_info *mci,
+ union power_supply_propval *val)
+{
+ val->intval = MT6360_VOREG_MAX;
+ return 0;
+}
+
+static int mt6360_charger_get_aicr(struct mt6360_chg_info *mci,
+ union power_supply_propval *val)
+{
+ int ret;
+ u32 sel, value;
+
+ ret = regmap_read(mci->regmap, MT6360_PMU_CHG_CTRL3, &sel);
+ if (ret < 0)
+ return ret;
+ sel = (sel & MT6360_IAICR_MASK) >> MT6360_IAICR_SHFT;
+ ret = linear_range_get_value(&mt6360_chg_range[MT6360_RANGE_AICR], sel, &value);
+ if (!ret)
+ val->intval = value;
+ return ret;
+}
+
+static int mt6360_charger_get_mivr(struct mt6360_chg_info *mci,
+ union power_supply_propval *val)
+{
+ int ret;
+ u32 sel, value;
+
+ ret = regmap_read(mci->regmap, MT6360_PMU_CHG_CTRL6, &sel);
+ if (ret < 0)
+ return ret;
+ sel = (sel & MT6360_VMIVR_MASK) >> MT6360_VMIVR_SHFT;
+ ret = linear_range_get_value(&mt6360_chg_range[MT6360_RANGE_VMIVR], sel, &value);
+ if (!ret)
+ val->intval = value;
+ return ret;
+}
+
+static int mt6360_charger_get_iprechg(struct mt6360_chg_info *mci,
+ union power_supply_propval *val)
+{
+ int ret;
+ u32 sel, value;
+
+ ret = regmap_read(mci->regmap, MT6360_PMU_CHG_CTRL8, &sel);
+ if (ret < 0)
+ return ret;
+ sel = (sel & MT6360_IPREC_MASK) >> MT6360_IPREC_SHFT;
+ ret = linear_range_get_value(&mt6360_chg_range[MT6360_RANGE_IPREC], sel, &value);
+ if (!ret)
+ val->intval = value;
+ return ret;
+}
+
+static int mt6360_charger_get_ieoc(struct mt6360_chg_info *mci,
+ union power_supply_propval *val)
+{
+ int ret;
+ u32 sel, value;
+
+ ret = regmap_read(mci->regmap, MT6360_PMU_CHG_CTRL9, &sel);
+ if (ret < 0)
+ return ret;
+ sel = (sel & MT6360_IEOC_MASK) >> MT6360_IEOC_SHFT;
+ ret = linear_range_get_value(&mt6360_chg_range[MT6360_RANGE_IEOC], sel, &value);
+ if (!ret)
+ val->intval = value;
+ return ret;
+}
+
+static int mt6360_charger_set_online(struct mt6360_chg_info *mci,
+ const union power_supply_propval *val)
+{
+ u8 force_sleep = val->intval ? 0 : 1;
+
+ return regmap_update_bits(mci->regmap,
+ MT6360_PMU_CHG_CTRL1,
+ MT6360_FSLP_MASK,
+ force_sleep << MT6360_FSLP_SHFT);
+}
+
+static int mt6360_charger_set_ichg(struct mt6360_chg_info *mci,
+ const union power_supply_propval *val)
+{
+ u32 sel;
+
+ linear_range_get_selector_within(&mt6360_chg_range[MT6360_RANGE_ICHG], val->intval, &sel);
+ return regmap_update_bits(mci->regmap,
+ MT6360_PMU_CHG_CTRL7,
+ MT6360_ICHG_MASK,
+ sel << MT6360_ICHG_SHFT);
+}
+
+static int mt6360_charger_set_cv(struct mt6360_chg_info *mci,
+ const union power_supply_propval *val)
+{
+ u32 sel;
+
+ linear_range_get_selector_within(&mt6360_chg_range[MT6360_RANGE_VOREG], val->intval, &sel);
+ return regmap_update_bits(mci->regmap,
+ MT6360_PMU_CHG_CTRL4,
+ MT6360_VOREG_MASK,
+ sel << MT6360_VOREG_SHFT);
+}
+
+static int mt6360_charger_set_aicr(struct mt6360_chg_info *mci,
+ const union power_supply_propval *val)
+{
+ u32 sel;
+
+ linear_range_get_selector_within(&mt6360_chg_range[MT6360_RANGE_AICR], val->intval, &sel);
+ return regmap_update_bits(mci->regmap,
+ MT6360_PMU_CHG_CTRL3,
+ MT6360_IAICR_MASK,
+ sel << MT6360_IAICR_SHFT);
+}
+
+static int mt6360_charger_set_mivr(struct mt6360_chg_info *mci,
+ const union power_supply_propval *val)
+{
+ u32 sel;
+
+ linear_range_get_selector_within(&mt6360_chg_range[MT6360_RANGE_VMIVR], val->intval, &sel);
+ return regmap_update_bits(mci->regmap,
+ MT6360_PMU_CHG_CTRL3,
+ MT6360_VMIVR_MASK,
+ sel << MT6360_VMIVR_SHFT);
+}
+
+static int mt6360_charger_set_iprechg(struct mt6360_chg_info *mci,
+ const union power_supply_propval *val)
+{
+ u32 sel;
+
+ linear_range_get_selector_within(&mt6360_chg_range[MT6360_RANGE_IPREC], val->intval, &sel);
+ return regmap_update_bits(mci->regmap,
+ MT6360_PMU_CHG_CTRL8,
+ MT6360_IPREC_MASK,
+ sel << MT6360_IPREC_SHFT);
+}
+
+static int mt6360_charger_set_ieoc(struct mt6360_chg_info *mci,
+ const union power_supply_propval *val)
+{
+ u32 sel;
+
+ linear_range_get_selector_within(&mt6360_chg_range[MT6360_RANGE_IEOC], val->intval, &sel);
+ return regmap_update_bits(mci->regmap,
+ MT6360_PMU_CHG_CTRL9,
+ MT6360_IEOC_MASK,
+ sel << MT6360_IEOC_SHFT);
+}
+
+static int mt6360_charger_get_property(struct power_supply *psy,
+ enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ struct mt6360_chg_info *mci = power_supply_get_drvdata(psy);
+ int ret = 0;
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_ONLINE:
+ ret = mt6360_charger_get_online(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_STATUS:
+ ret = mt6360_charger_get_status(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_CHARGE_TYPE:
+ ret = mt6360_charger_get_charge_type(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
+ ret = mt6360_charger_get_ichg(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
+ ret = mt6360_charger_get_max_ichg(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
+ ret = mt6360_charger_get_cv(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX:
+ ret = mt6360_charger_get_max_cv(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
+ ret = mt6360_charger_get_aicr(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_INPUT_VOLTAGE_LIMIT:
+ ret = mt6360_charger_get_mivr(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_PRECHARGE_CURRENT:
+ ret = mt6360_charger_get_iprechg(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT:
+ ret = mt6360_charger_get_ieoc(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_USB_TYPE:
+ val->intval = mci->psy_usb_type;
+ break;
+ default:
+ ret = -ENODATA;
+ }
+ return ret;
+}
+
+static int mt6360_charger_set_property(struct power_supply *psy,
+ enum power_supply_property psp,
+ const union power_supply_propval *val)
+{
+ struct mt6360_chg_info *mci = power_supply_get_drvdata(psy);
+ int ret;
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_ONLINE:
+ ret = mt6360_charger_set_online(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
+ ret = mt6360_charger_set_ichg(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
+ ret = mt6360_charger_set_cv(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
+ ret = mt6360_charger_set_aicr(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_INPUT_VOLTAGE_LIMIT:
+ ret = mt6360_charger_set_mivr(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_PRECHARGE_CURRENT:
+ ret = mt6360_charger_set_iprechg(mci, val);
+ break;
+ case POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT:
+ ret = mt6360_charger_set_ieoc(mci, val);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ return ret;
+}
+
+static int mt6360_charger_property_is_writeable(struct power_supply *psy,
+ enum power_supply_property psp)
+{
+ switch (psp) {
+ case POWER_SUPPLY_PROP_ONLINE:
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
+ case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
+ case POWER_SUPPLY_PROP_INPUT_VOLTAGE_LIMIT:
+ case POWER_SUPPLY_PROP_PRECHARGE_CURRENT:
+ case POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static enum power_supply_property mt6360_charger_properties[] = {
+ POWER_SUPPLY_PROP_ONLINE,
+ POWER_SUPPLY_PROP_STATUS,
+ POWER_SUPPLY_PROP_CHARGE_TYPE,
+ POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT,
+ POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX,
+ POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE,
+ POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX,
+ POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT,
+ POWER_SUPPLY_PROP_INPUT_VOLTAGE_LIMIT,
+ POWER_SUPPLY_PROP_PRECHARGE_CURRENT,
+ POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT,
+ POWER_SUPPLY_PROP_USB_TYPE,
+};
+
+static const struct power_supply_desc mt6360_charger_desc = {
+ .type = POWER_SUPPLY_TYPE_USB,
+ .properties = mt6360_charger_properties,
+ .num_properties = ARRAY_SIZE(mt6360_charger_properties),
+ .get_property = mt6360_charger_get_property,
+ .set_property = mt6360_charger_set_property,
+ .property_is_writeable = mt6360_charger_property_is_writeable,
+ .usb_types = mt6360_charger_usb_types,
+ .num_usb_types = ARRAY_SIZE(mt6360_charger_usb_types),
+};
+
+static const struct regulator_ops mt6360_chg_otg_ops = {
+ .list_voltage = regulator_list_voltage_linear,
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .is_enabled = regulator_is_enabled_regmap,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
+ .get_voltage_sel = regulator_get_voltage_sel_regmap,
+};
+
+static const struct regulator_desc mt6360_otg_rdesc = {
+ .of_match = "usb-otg-vbus",
+ .name = "usb-otg-vbus",
+ .ops = &mt6360_chg_otg_ops,
+ .owner = THIS_MODULE,
+ .type = REGULATOR_VOLTAGE,
+ .min_uV = 4425000,
+ .uV_step = 25000,
+ .n_voltages = 57,
+ .vsel_reg = MT6360_PMU_CHG_CTRL5,
+ .vsel_mask = MT6360_VOBST_MASK,
+ .enable_reg = MT6360_PMU_CHG_CTRL1,
+ .enable_mask = MT6360_OPA_MODE_MASK,
+};
+
+static irqreturn_t mt6360_pmu_attach_i_handler(int irq, void *data)
+{
+ struct mt6360_chg_info *mci = data;
+ int ret;
+ unsigned int usb_status;
+ int last_usb_type;
+
+ mutex_lock(&mci->chgdet_lock);
+ if (!mci->bc12_en) {
+ dev_warn(mci->dev, "Received attach interrupt, bc12 disabled, ignore irq\n");
+ goto out;
+ }
+ last_usb_type = mci->psy_usb_type;
+ /* Plug in */
+ ret = regmap_read(mci->regmap, MT6360_PMU_USB_STATUS1, &usb_status);
+ if (ret < 0)
+ goto out;
+ usb_status &= MT6360_USB_STATUS_MASK;
+ usb_status >>= MT6360_USB_STATUS_SHFT;
+ switch (usb_status) {
+ case MT6360_CHG_TYPE_NOVBUS:
+ dev_dbg(mci->dev, "Received attach interrupt, no vbus\n");
+ goto out;
+ case MT6360_CHG_TYPE_UNDER_GOING:
+ dev_dbg(mci->dev, "Received attach interrupt, under going...\n");
+ goto out;
+ case MT6360_CHG_TYPE_SDP:
+ mci->psy_usb_type = POWER_SUPPLY_USB_TYPE_SDP;
+ break;
+ case MT6360_CHG_TYPE_SDPNSTD:
+ mci->psy_usb_type = POWER_SUPPLY_USB_TYPE_SDP;
+ break;
+ case MT6360_CHG_TYPE_CDP:
+ mci->psy_usb_type = POWER_SUPPLY_USB_TYPE_CDP;
+ break;
+ case MT6360_CHG_TYPE_DCP:
+ mci->psy_usb_type = POWER_SUPPLY_USB_TYPE_DCP;
+ break;
+ case MT6360_CHG_TYPE_DISABLE_BC12:
+ dev_dbg(mci->dev, "Received attach interrupt, bc12 detect not enable\n");
+ goto out;
+ default:
+ mci->psy_usb_type = POWER_SUPPLY_USB_TYPE_UNKNOWN;
+ dev_dbg(mci->dev, "Received attach interrupt, reserved address\n");
+ goto out;
+ }
+
+ dev_dbg(mci->dev, "Received attach interrupt, chg_type = %d\n", mci->psy_usb_type);
+ if (last_usb_type != mci->psy_usb_type)
+ power_supply_changed(mci->psy);
+out:
+ mutex_unlock(&mci->chgdet_lock);
+ return IRQ_HANDLED;
+}
+
+static void mt6360_handle_chrdet_ext_evt(struct mt6360_chg_info *mci)
+{
+ int ret;
+ bool pwr_rdy;
+
+ mutex_lock(&mci->chgdet_lock);
+ ret = mt6360_get_chrdet_ext_stat(mci, &pwr_rdy);
+ if (ret < 0)
+ goto out;
+ if (mci->pwr_rdy == pwr_rdy) {
+ dev_dbg(mci->dev, "Received vbus interrupt, pwr_rdy is same(%d)\n", pwr_rdy);
+ goto out;
+ }
+ mci->pwr_rdy = pwr_rdy;
+ dev_dbg(mci->dev, "Received vbus interrupt, pwr_rdy = %d\n", pwr_rdy);
+ if (!pwr_rdy) {
+ mci->psy_usb_type = POWER_SUPPLY_USB_TYPE_UNKNOWN;
+ power_supply_changed(mci->psy);
+
+ }
+ ret = regmap_update_bits(mci->regmap,
+ MT6360_PMU_DEVICE_TYPE,
+ MT6360_USBCHGEN_MASK,
+ pwr_rdy ? MT6360_USBCHGEN_MASK : 0);
+ if (ret < 0)
+ goto out;
+ mci->bc12_en = pwr_rdy;
+out:
+ mutex_unlock(&mci->chgdet_lock);
+}
+
+static void mt6360_chrdet_work(struct work_struct *work)
+{
+ struct mt6360_chg_info *mci = (struct mt6360_chg_info *)container_of(
+ work, struct mt6360_chg_info, chrdet_work);
+
+ mt6360_handle_chrdet_ext_evt(mci);
+}
+
+static irqreturn_t mt6360_pmu_chrdet_ext_evt_handler(int irq, void *data)
+{
+ struct mt6360_chg_info *mci = data;
+
+ mt6360_handle_chrdet_ext_evt(mci);
+ return IRQ_HANDLED;
+}
+
+static int mt6360_chg_irq_register(struct platform_device *pdev)
+{
+ const struct {
+ const char *name;
+ irq_handler_t handler;
+ } irq_descs[] = {
+ { "attach_i", mt6360_pmu_attach_i_handler },
+ { "chrdet_ext_evt", mt6360_pmu_chrdet_ext_evt_handler }
+ };
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(irq_descs); i++) {
+ ret = platform_get_irq_byname(pdev, irq_descs[i].name);
+ if (ret < 0)
+ return ret;
+
+ ret = devm_request_threaded_irq(&pdev->dev, ret, NULL,
+ irq_descs[i].handler,
+ IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+ irq_descs[i].name,
+ platform_get_drvdata(pdev));
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret, "Failed to request %s irq\n",
+ irq_descs[i].name);
+ }
+
+ return 0;
+}
+
+static u32 mt6360_vinovp_trans_to_sel(u32 val)
+{
+ u32 vinovp_tbl[] = { 5500000, 6500000, 11000000, 14500000 };
+ int i;
+
+ /* Select the smaller and equal supported value */
+ for (i = 0; i < ARRAY_SIZE(vinovp_tbl)-1; i++) {
+ if (val < vinovp_tbl[i+1])
+ break;
+ }
+ return i;
+}
+
+static int mt6360_chg_init_setting(struct mt6360_chg_info *mci)
+{
+ int ret;
+ u32 sel;
+
+ sel = mt6360_vinovp_trans_to_sel(mci->vinovp);
+ ret = regmap_update_bits(mci->regmap, MT6360_PMU_CHG_CTRL19,
+ MT6360_VINOVP_MASK, sel << MT6360_VINOVP_SHFT);
+ if (ret)
+ return dev_err_probe(mci->dev, ret, "%s: Failed to apply vinovp\n", __func__);
+ ret = regmap_update_bits(mci->regmap, MT6360_PMU_DEVICE_TYPE,
+ MT6360_USBCHGEN_MASK, 0);
+ if (ret)
+ return dev_err_probe(mci->dev, ret, "%s: Failed to disable bc12\n", __func__);
+ ret = regmap_update_bits(mci->regmap, MT6360_PMU_CHG_CTRL2,
+ MT6360_IINLMTSEL_MASK,
+ MT6360_IINLMTSEL_AICR <<
+ MT6360_IINLMTSEL_SHFT);
+ if (ret)
+ return dev_err_probe(mci->dev, ret,
+ "%s: Failed to switch iinlmtsel to aicr\n", __func__);
+ usleep_range(5000, 6000);
+ ret = regmap_update_bits(mci->regmap, MT6360_PMU_CHG_CTRL3,
+ MT6360_ILIM_EN_MASK, 0);
+ if (ret)
+ return dev_err_probe(mci->dev, ret,
+ "%s: Failed to disable ilim\n", __func__);
+ ret = regmap_update_bits(mci->regmap, MT6360_PMU_CHG_CTRL10,
+ MT6360_OTG_OC_MASK, MT6360_OTG_OC_MASK);
+ if (ret)
+ return dev_err_probe(mci->dev, ret,
+ "%s: Failed to config otg oc to 3A\n", __func__);
+ return 0;
+}
+
+static int mt6360_charger_probe(struct platform_device *pdev)
+{
+ struct mt6360_chg_info *mci;
+ struct power_supply_config charger_cfg = {};
+ struct regulator_config config = { };
+ int ret;
+
+ mci = devm_kzalloc(&pdev->dev, sizeof(*mci), GFP_KERNEL);
+ if (!mci)
+ return -ENOMEM;
+
+ mci->dev = &pdev->dev;
+ mci->vinovp = 6500000;
+ mutex_init(&mci->chgdet_lock);
+ platform_set_drvdata(pdev, mci);
+ devm_work_autocancel(&pdev->dev, &mci->chrdet_work, mt6360_chrdet_work);
+
+ ret = device_property_read_u32(&pdev->dev, "richtek,vinovp-microvolt", &mci->vinovp);
+ if (ret)
+ dev_warn(&pdev->dev, "Failed to parse vinovp in DT, keep default 6.5v\n");
+
+ mci->regmap = dev_get_regmap(pdev->dev.parent, NULL);
+ if (!mci->regmap)
+ return dev_err_probe(&pdev->dev, -ENODEV, "Failed to get parent regmap\n");
+
+ ret = mt6360_chg_init_setting(mci);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "Failed to initial setting\n");
+
+ memcpy(&mci->psy_desc, &mt6360_charger_desc, sizeof(mci->psy_desc));
+ mci->psy_desc.name = dev_name(&pdev->dev);
+ charger_cfg.drv_data = mci;
+ charger_cfg.of_node = pdev->dev.of_node;
+ mci->psy = devm_power_supply_register(&pdev->dev,
+ &mci->psy_desc, &charger_cfg);
+ if (IS_ERR(mci->psy))
+ return dev_err_probe(&pdev->dev, PTR_ERR(mci->psy),
+ "Failed to register power supply dev\n");
+
+
+ ret = mt6360_chg_irq_register(pdev);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "Failed to register irqs\n");
+
+ config.dev = &pdev->dev;
+ config.regmap = mci->regmap;
+ mci->otg_rdev = devm_regulator_register(&pdev->dev, &mt6360_otg_rdesc,
+ &config);
+ if (IS_ERR(mci->otg_rdev))
+ return PTR_ERR(mci->otg_rdev);
+
+ schedule_work(&mci->chrdet_work);
+
+ return 0;
+}
+
+static const struct of_device_id __maybe_unused mt6360_charger_of_id[] = {
+ { .compatible = "mediatek,mt6360-chg", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, mt6360_charger_of_id);
+
+static const struct platform_device_id mt6360_charger_id[] = {
+ { "mt6360-chg", 0 },
+ {},
+};
+MODULE_DEVICE_TABLE(platform, mt6360_charger_id);
+
+static struct platform_driver mt6360_charger_driver = {
+ .driver = {
+ .name = "mt6360-chg",
+ .of_match_table = of_match_ptr(mt6360_charger_of_id),
+ },
+ .probe = mt6360_charger_probe,
+ .id_table = mt6360_charger_id,
+};
+module_platform_driver(mt6360_charger_driver);
+
+MODULE_AUTHOR("Gene Chen <gene_chen@richtek.com>");
+MODULE_DESCRIPTION("MT6360 Charger Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index d99e2f11c183..0c2132c7f5d4 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -571,6 +571,7 @@ int power_supply_get_battery_info(struct power_supply *psy,
int err, len, index;
const __be32 *list;
+ info->technology = POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
info->energy_full_design_uwh = -EINVAL;
info->charge_full_design_uah = -EINVAL;
info->voltage_min_design_uv = -EINVAL;
@@ -618,6 +619,24 @@ int power_supply_get_battery_info(struct power_supply *psy,
* Documentation/power/power_supply_class.rst.
*/
+ if (!of_property_read_string(battery_np, "device-chemistry", &value)) {
+ if (!strcmp("nickel-cadmium", value))
+ info->technology = POWER_SUPPLY_TECHNOLOGY_NiCd;
+ else if (!strcmp("nickel-metal-hydride", value))
+ info->technology = POWER_SUPPLY_TECHNOLOGY_NiMH;
+ else if (!strcmp("lithium-ion", value))
+ /* Imprecise lithium-ion type */
+ info->technology = POWER_SUPPLY_TECHNOLOGY_LION;
+ else if (!strcmp("lithium-ion-polymer", value))
+ info->technology = POWER_SUPPLY_TECHNOLOGY_LIPO;
+ else if (!strcmp("lithium-ion-iron-phosphate", value))
+ info->technology = POWER_SUPPLY_TECHNOLOGY_LiFe;
+ else if (!strcmp("lithium-ion-manganese-oxide", value))
+ info->technology = POWER_SUPPLY_TECHNOLOGY_LiMn;
+ else
+ dev_warn(&psy->dev, "%s unknown battery type\n", value);
+ }
+
of_property_read_u32(battery_np, "energy-full-design-microwatt-hours",
&info->energy_full_design_uwh);
of_property_read_u32(battery_np, "charge-full-design-microamp-hours",
diff --git a/drivers/power/supply/qcom_smbb.c b/drivers/power/supply/qcom_smbb.c
index c890e1cec720..84cc9fba029d 100644
--- a/drivers/power/supply/qcom_smbb.c
+++ b/drivers/power/supply/qcom_smbb.c
@@ -929,11 +929,8 @@ static int smbb_charger_probe(struct platform_device *pdev)
int irq;
irq = platform_get_irq_byname(pdev, smbb_charger_irqs[i].name);
- if (irq < 0) {
- dev_err(&pdev->dev, "failed to get irq '%s'\n",
- smbb_charger_irqs[i].name);
+ if (irq < 0)
return irq;
- }
smbb_charger_irqs[i].handler(irq, chg);
diff --git a/drivers/power/supply/rn5t618_power.c b/drivers/power/supply/rn5t618_power.c
index 819061918b2a..a5e09ac78a50 100644
--- a/drivers/power/supply/rn5t618_power.c
+++ b/drivers/power/supply/rn5t618_power.c
@@ -9,10 +9,12 @@
#include <linux/device.h>
#include <linux/bitops.h>
#include <linux/errno.h>
+#include <linux/iio/consumer.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/mfd/rn5t618.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/power_supply.h>
#include <linux/regmap.h>
@@ -64,6 +66,8 @@ struct rn5t618_power_info {
struct power_supply *battery;
struct power_supply *usb;
struct power_supply *adp;
+ struct iio_channel *channel_vusb;
+ struct iio_channel *channel_vadp;
int irq;
};
@@ -77,6 +81,7 @@ static enum power_supply_usb_type rn5t618_usb_types[] = {
static enum power_supply_property rn5t618_usb_props[] = {
/* input current limit is not very accurate */
POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT,
+ POWER_SUPPLY_PROP_VOLTAGE_NOW,
POWER_SUPPLY_PROP_STATUS,
POWER_SUPPLY_PROP_USB_TYPE,
POWER_SUPPLY_PROP_ONLINE,
@@ -85,6 +90,7 @@ static enum power_supply_property rn5t618_usb_props[] = {
static enum power_supply_property rn5t618_adp_props[] = {
/* input current limit is not very accurate */
POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT,
+ POWER_SUPPLY_PROP_VOLTAGE_NOW,
POWER_SUPPLY_PROP_STATUS,
POWER_SUPPLY_PROP_ONLINE,
};
@@ -464,6 +470,15 @@ static int rn5t618_adp_get_property(struct power_supply *psy,
val->intval = FROM_CUR_REG(regval);
break;
+ case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+ if (!info->channel_vadp)
+ return -ENODATA;
+
+ ret = iio_read_channel_processed_scale(info->channel_vadp, &val->intval, 1000);
+ if (ret < 0)
+ return ret;
+
+ break;
default:
return -EINVAL;
}
@@ -589,6 +604,15 @@ static int rn5t618_usb_get_property(struct power_supply *psy,
val->intval = FROM_CUR_REG(regval);
}
break;
+ case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+ if (!info->channel_vusb)
+ return -ENODATA;
+
+ ret = iio_read_channel_processed_scale(info->channel_vusb, &val->intval, 1000);
+ if (ret < 0)
+ return ret;
+
+ break;
default:
return -EINVAL;
}
@@ -711,6 +735,20 @@ static int rn5t618_power_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, info);
+ info->channel_vusb = devm_iio_channel_get(&pdev->dev, "vusb");
+ if (IS_ERR(info->channel_vusb)) {
+ if (PTR_ERR(info->channel_vusb) == -ENODEV)
+ return -EPROBE_DEFER;
+ return PTR_ERR(info->channel_vusb);
+ }
+
+ info->channel_vadp = devm_iio_channel_get(&pdev->dev, "vadp");
+ if (IS_ERR(info->channel_vadp)) {
+ if (PTR_ERR(info->channel_vadp) == -ENODEV)
+ return -EPROBE_DEFER;
+ return PTR_ERR(info->channel_vadp);
+ }
+
ret = regmap_read(info->rn5t618->regmap, RN5T618_CONTROL, &v);
if (ret)
return ret;
diff --git a/drivers/power/supply/sbs-battery.c b/drivers/power/supply/sbs-battery.c
index f84dbaab283a..c4a95b01463a 100644
--- a/drivers/power/supply/sbs-battery.c
+++ b/drivers/power/supply/sbs-battery.c
@@ -31,8 +31,9 @@ enum {
REG_CURRENT_AVG,
REG_MAX_ERR,
REG_CAPACITY,
- REG_TIME_TO_EMPTY,
- REG_TIME_TO_FULL,
+ REG_TIME_TO_EMPTY_NOW,
+ REG_TIME_TO_EMPTY_AVG,
+ REG_TIME_TO_FULL_AVG,
REG_STATUS,
REG_CAPACITY_LEVEL,
REG_CYCLE_COUNT,
@@ -102,7 +103,7 @@ static const struct chip_data {
[REG_TEMPERATURE] =
SBS_DATA(POWER_SUPPLY_PROP_TEMP, 0x08, 0, 65535),
[REG_VOLTAGE] =
- SBS_DATA(POWER_SUPPLY_PROP_VOLTAGE_NOW, 0x09, 0, 20000),
+ SBS_DATA(POWER_SUPPLY_PROP_VOLTAGE_NOW, 0x09, 0, 65535),
[REG_CURRENT_NOW] =
SBS_DATA(POWER_SUPPLY_PROP_CURRENT_NOW, 0x0A, -32768, 32767),
[REG_CURRENT_AVG] =
@@ -119,9 +120,11 @@ static const struct chip_data {
SBS_DATA(POWER_SUPPLY_PROP_ENERGY_FULL, 0x10, 0, 65535),
[REG_FULL_CHARGE_CAPACITY_CHARGE] =
SBS_DATA(POWER_SUPPLY_PROP_CHARGE_FULL, 0x10, 0, 65535),
- [REG_TIME_TO_EMPTY] =
+ [REG_TIME_TO_EMPTY_NOW] =
+ SBS_DATA(POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW, 0x11, 0, 65535),
+ [REG_TIME_TO_EMPTY_AVG] =
SBS_DATA(POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG, 0x12, 0, 65535),
- [REG_TIME_TO_FULL] =
+ [REG_TIME_TO_FULL_AVG] =
SBS_DATA(POWER_SUPPLY_PROP_TIME_TO_FULL_AVG, 0x13, 0, 65535),
[REG_CHARGE_CURRENT] =
SBS_DATA(POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX, 0x14, 0, 65535),
@@ -165,6 +168,7 @@ static const enum power_supply_property sbs_properties[] = {
POWER_SUPPLY_PROP_CAPACITY,
POWER_SUPPLY_PROP_CAPACITY_ERROR_MARGIN,
POWER_SUPPLY_PROP_TEMP,
+ POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG,
POWER_SUPPLY_PROP_TIME_TO_FULL_AVG,
POWER_SUPPLY_PROP_SERIAL_NUMBER,
@@ -748,6 +752,7 @@ static void sbs_unit_adjustment(struct i2c_client *client,
val->intval -= TEMP_KELVIN_TO_CELSIUS;
break;
+ case POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW:
case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
case POWER_SUPPLY_PROP_TIME_TO_FULL_AVG:
/* sbs provides time to empty and time to full in minutes.
@@ -966,6 +971,7 @@ static int sbs_get_property(struct power_supply *psy,
case POWER_SUPPLY_PROP_CURRENT_NOW:
case POWER_SUPPLY_PROP_CURRENT_AVG:
case POWER_SUPPLY_PROP_TEMP:
+ case POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW:
case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
case POWER_SUPPLY_PROP_TIME_TO_FULL_AVG:
case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
diff --git a/drivers/power/supply/sc27xx_fuel_gauge.c b/drivers/power/supply/sc27xx_fuel_gauge.c
index 1ae8374e1ceb..ae45069bd5e1 100644
--- a/drivers/power/supply/sc27xx_fuel_gauge.c
+++ b/drivers/power/supply/sc27xx_fuel_gauge.c
@@ -1229,10 +1229,8 @@ static int sc27xx_fgu_probe(struct platform_device *pdev)
}
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- dev_err(dev, "no irq resource specified\n");
+ if (irq < 0)
return irq;
- }
ret = devm_request_threaded_irq(data->dev, irq, NULL,
sc27xx_fgu_interrupt,
diff --git a/drivers/power/supply/smb347-charger.c b/drivers/power/supply/smb347-charger.c
index df240420f2de..753944e774c4 100644
--- a/drivers/power/supply/smb347-charger.c
+++ b/drivers/power/supply/smb347-charger.c
@@ -18,6 +18,7 @@
#include <linux/power_supply.h>
#include <linux/property.h>
#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
#include <dt-bindings/power/summit,smb347-charger.h>
@@ -55,6 +56,7 @@
#define CFG_PIN_EN_CTRL_ACTIVE_LOW 0x60
#define CFG_PIN_EN_APSD_IRQ BIT(1)
#define CFG_PIN_EN_CHARGER_ERROR BIT(2)
+#define CFG_PIN_EN_CTRL BIT(4)
#define CFG_THERM 0x07
#define CFG_THERM_SOFT_HOT_COMPENSATION_MASK 0x03
#define CFG_THERM_SOFT_HOT_COMPENSATION_SHIFT 0
@@ -62,12 +64,15 @@
#define CFG_THERM_SOFT_COLD_COMPENSATION_SHIFT 2
#define CFG_THERM_MONITOR_DISABLED BIT(4)
#define CFG_SYSOK 0x08
+#define CFG_SYSOK_INOK_ACTIVE_HIGH BIT(0)
#define CFG_SYSOK_SUSPEND_HARD_LIMIT_DISABLED BIT(2)
#define CFG_OTHER 0x09
#define CFG_OTHER_RID_MASK 0xc0
#define CFG_OTHER_RID_ENABLED_AUTO_OTG 0xc0
#define CFG_OTG 0x0a
#define CFG_OTG_TEMP_THRESHOLD_MASK 0x30
+#define CFG_OTG_CURRENT_LIMIT_250mA BIT(2)
+#define CFG_OTG_CURRENT_LIMIT_750mA BIT(3)
#define CFG_OTG_TEMP_THRESHOLD_SHIFT 4
#define CFG_OTG_CC_COMPENSATION_MASK 0xc0
#define CFG_OTG_CC_COMPENSATION_SHIFT 6
@@ -91,6 +96,7 @@
#define CMD_A 0x30
#define CMD_A_CHG_ENABLED BIT(1)
#define CMD_A_SUSPEND_ENABLED BIT(2)
+#define CMD_A_OTG_ENABLED BIT(4)
#define CMD_A_ALLOW_WRITE BIT(7)
#define CMD_B 0x31
#define CMD_C 0x33
@@ -132,11 +138,12 @@
* @regmap: pointer to driver regmap
* @mains: power_supply instance for AC/DC power
* @usb: power_supply instance for USB power
+ * @usb_rdev: USB VBUS regulator device
* @id: SMB charger ID
* @mains_online: is AC/DC input connected
* @usb_online: is USB input connected
- * @charging_enabled: is charging enabled
* @irq_unsupported: is interrupt unsupported by SMB hardware
+ * @usb_vbus_enabled: is USB VBUS powered by SMB charger
* @max_charge_current: maximum current (in uA) the battery can be charged
* @max_charge_voltage: maximum voltage (in uV) the battery can be charged
* @pre_charge_current: current (in uA) to use in pre-charging phase
@@ -167,6 +174,8 @@
* @use_usb_otg: USB OTG output can be used (not implemented yet)
* @enable_control: how charging enable/disable is controlled
* (driver/pin controls)
+ * @inok_polarity: polarity of INOK signal which denotes presence of external
+ * power supply
*
* @use_main, @use_usb, and @use_usb_otg are means to enable/disable
* hardware support for these. This is useful when we want to have for
@@ -189,11 +198,12 @@ struct smb347_charger {
struct regmap *regmap;
struct power_supply *mains;
struct power_supply *usb;
+ struct regulator_dev *usb_rdev;
unsigned int id;
bool mains_online;
bool usb_online;
- bool charging_enabled;
bool irq_unsupported;
+ bool usb_vbus_enabled;
unsigned int max_charge_current;
unsigned int max_charge_voltage;
@@ -214,6 +224,7 @@ struct smb347_charger {
bool use_usb;
bool use_usb_otg;
unsigned int enable_control;
+ unsigned int inok_polarity;
};
enum smb_charger_chipid {
@@ -358,21 +369,18 @@ static int smb347_charging_status(struct smb347_charger *smb)
static int smb347_charging_set(struct smb347_charger *smb, bool enable)
{
- int ret = 0;
-
if (smb->enable_control != SMB3XX_CHG_ENABLE_SW) {
dev_dbg(smb->dev, "charging enable/disable in SW disabled\n");
return 0;
}
- if (smb->charging_enabled != enable) {
- ret = regmap_update_bits(smb->regmap, CMD_A, CMD_A_CHG_ENABLED,
- enable ? CMD_A_CHG_ENABLED : 0);
- if (!ret)
- smb->charging_enabled = enable;
+ if (enable && smb->usb_vbus_enabled) {
+ dev_dbg(smb->dev, "charging not enabled because USB is in host mode\n");
+ return 0;
}
- return ret;
+ return regmap_update_bits(smb->regmap, CMD_A, CMD_A_CHG_ENABLED,
+ enable ? CMD_A_CHG_ENABLED : 0);
}
static inline int smb347_charging_enable(struct smb347_charger *smb)
@@ -671,10 +679,22 @@ static int smb347_set_temp_limits(struct smb347_charger *smb)
*
* Returns %0 on success and negative errno in case of failure.
*/
-static int smb347_set_writable(struct smb347_charger *smb, bool writable)
+static int smb347_set_writable(struct smb347_charger *smb, bool writable,
+ bool irq_toggle)
{
- return regmap_update_bits(smb->regmap, CMD_A, CMD_A_ALLOW_WRITE,
- writable ? CMD_A_ALLOW_WRITE : 0);
+ struct i2c_client *client = to_i2c_client(smb->dev);
+ int ret;
+
+ if (writable && irq_toggle && !smb->irq_unsupported)
+ disable_irq(client->irq);
+
+ ret = regmap_update_bits(smb->regmap, CMD_A, CMD_A_ALLOW_WRITE,
+ writable ? CMD_A_ALLOW_WRITE : 0);
+
+ if ((!writable || ret) && irq_toggle && !smb->irq_unsupported)
+ enable_irq(client->irq);
+
+ return ret;
}
static int smb347_hw_init(struct smb347_charger *smb)
@@ -682,7 +702,7 @@ static int smb347_hw_init(struct smb347_charger *smb)
unsigned int val;
int ret;
- ret = smb347_set_writable(smb, true);
+ ret = smb347_set_writable(smb, true, false);
if (ret < 0)
return ret;
@@ -724,6 +744,15 @@ static int smb347_hw_init(struct smb347_charger *smb)
if (ret < 0)
goto fail;
+ /* Activate pin control, making it writable. */
+ switch (smb->enable_control) {
+ case SMB3XX_CHG_ENABLE_PIN_ACTIVE_LOW:
+ case SMB3XX_CHG_ENABLE_PIN_ACTIVE_HIGH:
+ ret = regmap_set_bits(smb->regmap, CFG_PIN, CFG_PIN_EN_CTRL);
+ if (ret < 0)
+ goto fail;
+ }
+
/*
* Make the charging functionality controllable by a write to the
* command register unless pin control is specified in the platform
@@ -758,7 +787,7 @@ static int smb347_hw_init(struct smb347_charger *smb)
ret = smb347_start_stop_charging(smb);
fail:
- smb347_set_writable(smb, false);
+ smb347_set_writable(smb, false, false);
return ret;
}
@@ -866,7 +895,7 @@ static int smb347_irq_set(struct smb347_charger *smb, bool enable)
if (smb->irq_unsupported)
return 0;
- ret = smb347_set_writable(smb, true);
+ ret = smb347_set_writable(smb, true, true);
if (ret < 0)
return ret;
@@ -891,7 +920,7 @@ static int smb347_irq_set(struct smb347_charger *smb, bool enable)
ret = regmap_update_bits(smb->regmap, CFG_PIN, CFG_PIN_EN_CHARGER_ERROR,
enable ? CFG_PIN_EN_CHARGER_ERROR : 0);
fail:
- smb347_set_writable(smb, false);
+ smb347_set_writable(smb, false, true);
return ret;
}
@@ -919,7 +948,7 @@ static int smb347_irq_init(struct smb347_charger *smb,
if (!client->irq)
return 0;
- ret = smb347_set_writable(smb, true);
+ ret = smb347_set_writable(smb, true, false);
if (ret < 0)
return ret;
@@ -931,7 +960,7 @@ static int smb347_irq_init(struct smb347_charger *smb,
CFG_STAT_ACTIVE_HIGH | CFG_STAT_DISABLED,
CFG_STAT_DISABLED);
- smb347_set_writable(smb, false);
+ smb347_set_writable(smb, false, false);
if (ret < 0) {
dev_warn(smb->dev, "failed to initialize IRQ: %d\n", ret);
@@ -1241,6 +1270,13 @@ static void smb347_dt_parse_dev_info(struct smb347_charger *smb)
/* Select charging control */
device_property_read_u32(dev, "summit,enable-charge-control",
&smb->enable_control);
+
+ /*
+ * Polarity of INOK signal indicating presence of external power
+ * supply connected to the charger.
+ */
+ device_property_read_u32(dev, "summit,inok-polarity",
+ &smb->inok_polarity);
}
static int smb347_get_battery_info(struct smb347_charger *smb)
@@ -1292,12 +1328,176 @@ static int smb347_get_battery_info(struct smb347_charger *smb)
return 0;
}
+static int smb347_usb_vbus_get_current_limit(struct regulator_dev *rdev)
+{
+ struct smb347_charger *smb = rdev_get_drvdata(rdev);
+ unsigned int val;
+ int ret;
+
+ ret = regmap_read(smb->regmap, CFG_OTG, &val);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * It's unknown what happens if this bit is unset due to lack of
+ * access to the datasheet, assume it's limit-enable.
+ */
+ if (!(val & CFG_OTG_CURRENT_LIMIT_250mA))
+ return 0;
+
+ return val & CFG_OTG_CURRENT_LIMIT_750mA ? 750000 : 250000;
+}
+
+static int smb347_usb_vbus_set_new_current_limit(struct smb347_charger *smb,
+ int max_uA)
+{
+ const unsigned int mask = CFG_OTG_CURRENT_LIMIT_750mA |
+ CFG_OTG_CURRENT_LIMIT_250mA;
+ unsigned int val = CFG_OTG_CURRENT_LIMIT_250mA;
+ int ret;
+
+ if (max_uA >= 750000)
+ val |= CFG_OTG_CURRENT_LIMIT_750mA;
+
+ ret = regmap_update_bits(smb->regmap, CFG_OTG, mask, val);
+ if (ret < 0)
+ dev_err(smb->dev, "failed to change USB current limit\n");
+
+ return ret;
+}
+
+static int smb347_usb_vbus_set_current_limit(struct regulator_dev *rdev,
+ int min_uA, int max_uA)
+{
+ struct smb347_charger *smb = rdev_get_drvdata(rdev);
+ int ret;
+
+ ret = smb347_set_writable(smb, true, true);
+ if (ret < 0)
+ return ret;
+
+ ret = smb347_usb_vbus_set_new_current_limit(smb, max_uA);
+ smb347_set_writable(smb, false, true);
+
+ return ret;
+}
+
+static int smb347_usb_vbus_regulator_enable(struct regulator_dev *rdev)
+{
+ struct smb347_charger *smb = rdev_get_drvdata(rdev);
+ int ret, max_uA;
+
+ ret = smb347_set_writable(smb, true, true);
+ if (ret < 0)
+ return ret;
+
+ smb347_charging_disable(smb);
+
+ if (device_property_read_bool(&rdev->dev, "summit,needs-inok-toggle")) {
+ unsigned int sysok = 0;
+
+ if (smb->inok_polarity == SMB3XX_SYSOK_INOK_ACTIVE_LOW)
+ sysok = CFG_SYSOK_INOK_ACTIVE_HIGH;
+
+ /*
+ * VBUS won't be powered if INOK is active, so we need to
+ * manually disable INOK on some platforms.
+ */
+ ret = regmap_update_bits(smb->regmap, CFG_SYSOK,
+ CFG_SYSOK_INOK_ACTIVE_HIGH, sysok);
+ if (ret < 0) {
+ dev_err(smb->dev, "failed to disable INOK\n");
+ goto done;
+ }
+ }
+
+ ret = smb347_usb_vbus_get_current_limit(rdev);
+ if (ret < 0) {
+ dev_err(smb->dev, "failed to get USB VBUS current limit\n");
+ goto done;
+ }
+
+ max_uA = ret;
+
+ ret = smb347_usb_vbus_set_new_current_limit(smb, 250000);
+ if (ret < 0) {
+ dev_err(smb->dev, "failed to preset USB VBUS current limit\n");
+ goto done;
+ }
+
+ ret = regmap_set_bits(smb->regmap, CMD_A, CMD_A_OTG_ENABLED);
+ if (ret < 0) {
+ dev_err(smb->dev, "failed to enable USB VBUS\n");
+ goto done;
+ }
+
+ smb->usb_vbus_enabled = true;
+
+ ret = smb347_usb_vbus_set_new_current_limit(smb, max_uA);
+ if (ret < 0) {
+ dev_err(smb->dev, "failed to restore USB VBUS current limit\n");
+ goto done;
+ }
+done:
+ smb347_set_writable(smb, false, true);
+
+ return ret;
+}
+
+static int smb347_usb_vbus_regulator_disable(struct regulator_dev *rdev)
+{
+ struct smb347_charger *smb = rdev_get_drvdata(rdev);
+ int ret;
+
+ ret = smb347_set_writable(smb, true, true);
+ if (ret < 0)
+ return ret;
+
+ ret = regmap_clear_bits(smb->regmap, CMD_A, CMD_A_OTG_ENABLED);
+ if (ret < 0) {
+ dev_err(smb->dev, "failed to disable USB VBUS\n");
+ goto done;
+ }
+
+ smb->usb_vbus_enabled = false;
+
+ if (device_property_read_bool(&rdev->dev, "summit,needs-inok-toggle")) {
+ unsigned int sysok = 0;
+
+ if (smb->inok_polarity == SMB3XX_SYSOK_INOK_ACTIVE_HIGH)
+ sysok = CFG_SYSOK_INOK_ACTIVE_HIGH;
+
+ ret = regmap_update_bits(smb->regmap, CFG_SYSOK,
+ CFG_SYSOK_INOK_ACTIVE_HIGH, sysok);
+ if (ret < 0) {
+ dev_err(smb->dev, "failed to enable INOK\n");
+ goto done;
+ }
+ }
+
+ smb347_start_stop_charging(smb);
+done:
+ smb347_set_writable(smb, false, true);
+
+ return ret;
+}
+
static const struct regmap_config smb347_regmap = {
.reg_bits = 8,
.val_bits = 8,
.max_register = SMB347_MAX_REGISTER,
.volatile_reg = smb347_volatile_reg,
.readable_reg = smb347_readable_reg,
+ .cache_type = REGCACHE_FLAT,
+ .num_reg_defaults_raw = SMB347_MAX_REGISTER,
+};
+
+static const struct regulator_ops smb347_usb_vbus_regulator_ops = {
+ .is_enabled = regulator_is_enabled_regmap,
+ .enable = smb347_usb_vbus_regulator_enable,
+ .disable = smb347_usb_vbus_regulator_disable,
+ .get_current_limit = smb347_usb_vbus_get_current_limit,
+ .set_current_limit = smb347_usb_vbus_set_current_limit,
};
static const struct power_supply_desc smb347_mains_desc = {
@@ -1316,10 +1516,24 @@ static const struct power_supply_desc smb347_usb_desc = {
.num_properties = ARRAY_SIZE(smb347_properties),
};
+static const struct regulator_desc smb347_usb_vbus_regulator_desc = {
+ .name = "smb347-usb-vbus",
+ .of_match = of_match_ptr("usb-vbus"),
+ .ops = &smb347_usb_vbus_regulator_ops,
+ .type = REGULATOR_VOLTAGE,
+ .owner = THIS_MODULE,
+ .enable_reg = CMD_A,
+ .enable_mask = CMD_A_OTG_ENABLED,
+ .enable_val = CMD_A_OTG_ENABLED,
+ .fixed_uV = 5000000,
+ .n_voltages = 1,
+};
+
static int smb347_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
struct power_supply_config mains_usb_cfg = {};
+ struct regulator_config usb_rdev_cfg = {};
struct device *dev = &client->dev;
struct smb347_charger *smb;
int ret;
@@ -1367,6 +1581,18 @@ static int smb347_probe(struct i2c_client *client,
if (ret)
return ret;
+ usb_rdev_cfg.dev = dev;
+ usb_rdev_cfg.driver_data = smb;
+ usb_rdev_cfg.regmap = smb->regmap;
+
+ smb->usb_rdev = devm_regulator_register(dev,
+ &smb347_usb_vbus_regulator_desc,
+ &usb_rdev_cfg);
+ if (IS_ERR(smb->usb_rdev)) {
+ smb347_irq_disable(smb);
+ return PTR_ERR(smb->usb_rdev);
+ }
+
return 0;
}
@@ -1374,11 +1600,17 @@ static int smb347_remove(struct i2c_client *client)
{
struct smb347_charger *smb = i2c_get_clientdata(client);
+ smb347_usb_vbus_regulator_disable(smb->usb_rdev);
smb347_irq_disable(smb);
return 0;
}
+static void smb347_shutdown(struct i2c_client *client)
+{
+ smb347_remove(client);
+}
+
static const struct i2c_device_id smb347_id[] = {
{ "smb345", SMB345 },
{ "smb347", SMB347 },
@@ -1402,6 +1634,7 @@ static struct i2c_driver smb347_driver = {
},
.probe = smb347_probe,
.remove = smb347_remove,
+ .shutdown = smb347_shutdown,
.id_table = smb347_id,
};
module_i2c_driver(smb347_driver);
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 73cf68af9770..7c0099e7a6d7 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -158,16 +158,16 @@ static int get_energy_counter(struct powercap_zone *power_zone,
/* prevent CPU hotplug, make sure the RAPL domain does not go
* away while reading the counter.
*/
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) {
*energy_raw = energy_now;
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
- put_online_cpus();
+ cpus_read_unlock();
return -EIO;
}
@@ -216,11 +216,11 @@ static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
return -EACCES;
- get_online_cpus();
+ cpus_read_lock();
rapl_write_data_raw(rd, PL1_ENABLE, mode);
if (rapl_defaults->set_floor_freq)
rapl_defaults->set_floor_freq(rd, mode);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
@@ -234,13 +234,13 @@ static int get_domain_enable(struct powercap_zone *power_zone, bool *mode)
*mode = false;
return 0;
}
- get_online_cpus();
+ cpus_read_lock();
if (rapl_read_data_raw(rd, PL1_ENABLE, true, &val)) {
- put_online_cpus();
+ cpus_read_unlock();
return -EIO;
}
*mode = val;
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
@@ -317,7 +317,7 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid,
int ret = 0;
int id;
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
@@ -350,7 +350,7 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid,
if (!ret)
package_power_limit_irq_save(rp);
set_exit:
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
@@ -363,7 +363,7 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
int ret = 0;
int id;
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
@@ -382,7 +382,7 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
prim = POWER_LIMIT4;
break;
default:
- put_online_cpus();
+ cpus_read_unlock();
return -EINVAL;
}
if (rapl_read_data_raw(rd, prim, true, &val))
@@ -391,7 +391,7 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
*data = val;
get_exit:
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
@@ -403,7 +403,7 @@ static int set_time_window(struct powercap_zone *power_zone, int cid,
int ret = 0;
int id;
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
@@ -423,7 +423,7 @@ static int set_time_window(struct powercap_zone *power_zone, int cid,
}
set_time_exit:
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
@@ -435,7 +435,7 @@ static int get_time_window(struct powercap_zone *power_zone, int cid,
int ret = 0;
int id;
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
@@ -458,14 +458,14 @@ static int get_time_window(struct powercap_zone *power_zone, int cid,
val = 0;
break;
default:
- put_online_cpus();
+ cpus_read_unlock();
return -EINVAL;
}
if (!ret)
*data = val;
get_time_exit:
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
@@ -491,7 +491,7 @@ static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
int prim;
int ret = 0;
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
switch (rd->rpl[id].prim_id) {
case PL1_ENABLE:
@@ -504,7 +504,7 @@ static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
prim = MAX_POWER;
break;
default:
- put_online_cpus();
+ cpus_read_unlock();
return -EINVAL;
}
if (rapl_read_data_raw(rd, prim, true, &val))
@@ -516,7 +516,7 @@ static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
if (rd->rpl[id].prim_id == PL4_ENABLE)
*data = *data * 2;
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
@@ -1358,7 +1358,7 @@ static void power_limit_state_save(void)
struct rapl_domain *rd;
int nr_pl, ret, i;
- get_online_cpus();
+ cpus_read_lock();
list_for_each_entry(rp, &rapl_packages, plist) {
if (!rp->power_zone)
continue;
@@ -1390,7 +1390,7 @@ static void power_limit_state_save(void)
}
}
}
- put_online_cpus();
+ cpus_read_unlock();
}
static void power_limit_state_restore(void)
@@ -1399,7 +1399,7 @@ static void power_limit_state_restore(void)
struct rapl_domain *rd;
int nr_pl, i;
- get_online_cpus();
+ cpus_read_lock();
list_for_each_entry(rp, &rapl_packages, plist) {
if (!rp->power_zone)
continue;
@@ -1425,7 +1425,7 @@ static void power_limit_state_restore(void)
}
}
}
- put_online_cpus();
+ cpus_read_unlock();
}
static int rapl_pm_callback(struct notifier_block *nb,
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index cc3b22881bfe..1be45f36ab6c 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -138,6 +138,8 @@ static int rapl_msr_write_raw(int cpu, struct reg_action *ra)
/* List of verified CPUs. */
static const struct x86_cpu_id pl4_support_ids[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_TIGERLAKE_L, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_L, X86_FEATURE_ANY },
{}
};
diff --git a/drivers/pps/clients/pps_parport.c b/drivers/pps/clients/pps_parport.c
index 7a41fb7b0dec..42f93d4c6ee3 100644
--- a/drivers/pps/clients/pps_parport.c
+++ b/drivers/pps/clients/pps_parport.c
@@ -22,8 +22,6 @@
#include <linux/parport.h>
#include <linux/pps_kernel.h>
-#define DRVDESC "parallel port PPS client"
-
/* module parameters */
#define CLEAR_WAIT_MAX 100
@@ -138,6 +136,12 @@ static void parport_attach(struct parport *port)
.dev = NULL
};
+ if (clear_wait > CLEAR_WAIT_MAX) {
+ pr_err("clear_wait value should be not greater then %d\n",
+ CLEAR_WAIT_MAX);
+ return;
+ }
+
device = kzalloc(sizeof(struct pps_client_pp), GFP_KERNEL);
if (!device) {
pr_err("memory allocation failed, not attaching\n");
@@ -214,38 +218,8 @@ static struct parport_driver pps_parport_driver = {
.detach = parport_detach,
.devmodel = true,
};
-
-/* module staff */
-
-static int __init pps_parport_init(void)
-{
- int ret;
-
- pr_info(DRVDESC "\n");
-
- if (clear_wait > CLEAR_WAIT_MAX) {
- pr_err("clear_wait value should be not greater"
- " then %d\n", CLEAR_WAIT_MAX);
- return -EINVAL;
- }
-
- ret = parport_register_driver(&pps_parport_driver);
- if (ret) {
- pr_err("unable to register with parport\n");
- return ret;
- }
-
- return 0;
-}
-
-static void __exit pps_parport_exit(void)
-{
- parport_unregister_driver(&pps_parport_driver);
-}
-
-module_init(pps_parport_init);
-module_exit(pps_parport_exit);
+module_parport_driver(pps_parport_driver);
MODULE_AUTHOR("Alexander Gordeev <lasaine@lvk.cs.msu.su>");
-MODULE_DESCRIPTION(DRVDESC);
+MODULE_DESCRIPTION("parallel port PPS client");
MODULE_LICENSE("GPL");
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index 8c20e524e9ad..f02bedf41264 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -8,6 +8,7 @@ menu "PTP clock support"
config PTP_1588_CLOCK
tristate "PTP clock support"
depends on NET && POSIX_TIMERS
+ default ETHERNET
select PPS
select NET_PTP_CLASSIFY
help
@@ -26,6 +27,18 @@ config PTP_1588_CLOCK
To compile this driver as a module, choose M here: the module
will be called ptp.
+config PTP_1588_CLOCK_OPTIONAL
+ tristate
+ default y if PTP_1588_CLOCK=n
+ default PTP_1588_CLOCK
+ help
+ Drivers that can optionally use the PTP_1588_CLOCK framework
+ should depend on this symbol to prevent them from being built
+ into vmlinux while the PTP support itself is in a loadable
+ module.
+ If PTP support is disabled, this dependency will still be
+ met, and drivers refer to dummy helpers.
+
config PTP_1588_CLOCK_DTE
tristate "Broadcom DTE as PTP clock"
depends on PTP_1588_CLOCK
@@ -90,8 +103,9 @@ config PTP_1588_CLOCK_INES
config PTP_1588_CLOCK_PCH
tristate "Intel PCH EG20T as PTP clock"
depends on X86_32 || COMPILE_TEST
- depends on HAS_IOMEM && NET
- imply PTP_1588_CLOCK
+ depends on HAS_IOMEM && PCI
+ depends on NET
+ depends on PTP_1588_CLOCK
help
This driver adds support for using the PCH EG20T as a PTP
clock. The hardware supports time stamping of PTP packets
@@ -157,7 +171,10 @@ config PTP_1588_CLOCK_OCP
tristate "OpenCompute TimeCard as PTP clock"
depends on PTP_1588_CLOCK
depends on HAS_IOMEM && PCI
- default n
+ depends on I2C && MTD
+ depends on SERIAL_8250
+ depends on !S390
+ select NET_DEVLINK
help
This driver adds support for an OpenCompute time card.
diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
index 0d1034e3ed0f..caf9b37c5eb1 100644
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -6,15 +6,29 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
+#include <linux/serial_8250.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
#include <linux/ptp_clock_kernel.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/xilinx_spi.h>
+#include <net/devlink.h>
+#include <linux/i2c.h>
+#include <linux/mtd/mtd.h>
-static const struct pci_device_id ptp_ocp_pcidev_id[] = {
- { PCI_DEVICE(0x1d9b, 0x0400) },
- { 0 }
-};
-MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id);
+#ifndef PCI_VENDOR_ID_FACEBOOK
+#define PCI_VENDOR_ID_FACEBOOK 0x1d9b
+#endif
-#define OCP_REGISTER_OFFSET 0x01000000
+#ifndef PCI_DEVICE_ID_FACEBOOK_TIMECARD
+#define PCI_DEVICE_ID_FACEBOOK_TIMECARD 0x0400
+#endif
+
+static struct class timecard_class = {
+ .owner = THIS_MODULE,
+ .name = "timecard",
+};
struct ocp_reg {
u32 ctrl;
@@ -29,18 +43,29 @@ struct ocp_reg {
u32 __pad1[2];
u32 offset_ns;
u32 offset_window_ns;
+ u32 __pad2[2];
+ u32 drift_ns;
+ u32 drift_window_ns;
+ u32 __pad3[6];
+ u32 servo_offset_p;
+ u32 servo_offset_i;
+ u32 servo_drift_p;
+ u32 servo_drift_i;
};
#define OCP_CTRL_ENABLE BIT(0)
#define OCP_CTRL_ADJUST_TIME BIT(1)
#define OCP_CTRL_ADJUST_OFFSET BIT(2)
+#define OCP_CTRL_ADJUST_DRIFT BIT(3)
+#define OCP_CTRL_ADJUST_SERVO BIT(8)
#define OCP_CTRL_READ_TIME_REQ BIT(30)
#define OCP_CTRL_READ_TIME_DONE BIT(31)
#define OCP_STATUS_IN_SYNC BIT(0)
+#define OCP_STATUS_IN_HOLDOVER BIT(1)
#define OCP_SELECT_CLK_NONE 0
-#define OCP_SELECT_CLK_REG 6
+#define OCP_SELECT_CLK_REG 0xfe
struct tod_reg {
u32 ctrl;
@@ -55,8 +80,6 @@ struct tod_reg {
u32 leap;
};
-#define TOD_REGISTER_OFFSET 0x01050000
-
#define TOD_CTRL_PROTOCOL BIT(28)
#define TOD_CTRL_DISABLE_FMT_A BIT(17)
#define TOD_CTRL_DISABLE_FMT_B BIT(16)
@@ -68,16 +91,264 @@ struct tod_reg {
#define TOD_STATUS_UTC_VALID BIT(8)
#define TOD_STATUS_LEAP_VALID BIT(16)
+struct ts_reg {
+ u32 enable;
+ u32 error;
+ u32 polarity;
+ u32 version;
+ u32 __pad0[4];
+ u32 cable_delay;
+ u32 __pad1[3];
+ u32 intr;
+ u32 intr_mask;
+ u32 event_count;
+ u32 __pad2[1];
+ u32 ts_count;
+ u32 time_ns;
+ u32 time_sec;
+ u32 data_width;
+ u32 data;
+};
+
+struct pps_reg {
+ u32 ctrl;
+ u32 status;
+ u32 __pad0[6];
+ u32 cable_delay;
+};
+
+#define PPS_STATUS_FILTER_ERR BIT(0)
+#define PPS_STATUS_SUPERV_ERR BIT(1)
+
+struct img_reg {
+ u32 version;
+};
+
+struct ptp_ocp_flash_info {
+ const char *name;
+ int pci_offset;
+ int data_size;
+ void *data;
+};
+
+struct ptp_ocp_ext_info {
+ const char *name;
+ int index;
+ irqreturn_t (*irq_fcn)(int irq, void *priv);
+ int (*enable)(void *priv, bool enable);
+};
+
+struct ptp_ocp_ext_src {
+ void __iomem *mem;
+ struct ptp_ocp *bp;
+ struct ptp_ocp_ext_info *info;
+ int irq_vec;
+};
+
struct ptp_ocp {
struct pci_dev *pdev;
+ struct device dev;
spinlock_t lock;
- void __iomem *base;
struct ocp_reg __iomem *reg;
struct tod_reg __iomem *tod;
+ struct pps_reg __iomem *pps_to_ext;
+ struct pps_reg __iomem *pps_to_clk;
+ struct ptp_ocp_ext_src *pps;
+ struct ptp_ocp_ext_src *ts0;
+ struct ptp_ocp_ext_src *ts1;
+ struct img_reg __iomem *image;
struct ptp_clock *ptp;
struct ptp_clock_info ptp_info;
+ struct platform_device *i2c_ctrl;
+ struct platform_device *spi_flash;
+ struct clk_hw *i2c_clk;
+ struct timer_list watchdog;
+ time64_t gnss_lost;
+ int id;
+ int n_irqs;
+ int gnss_port;
+ int mac_port; /* miniature atomic clock */
+ u8 serial[6];
+ int flash_start;
+ bool has_serial;
};
+struct ocp_resource {
+ unsigned long offset;
+ int size;
+ int irq_vec;
+ int (*setup)(struct ptp_ocp *bp, struct ocp_resource *r);
+ void *extra;
+ unsigned long bp_offset;
+};
+
+static int ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_serial(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r);
+static irqreturn_t ptp_ocp_ts_irq(int irq, void *priv);
+static int ptp_ocp_ts_enable(void *priv, bool enable);
+
+#define bp_assign_entry(bp, res, val) ({ \
+ uintptr_t addr = (uintptr_t)(bp) + (res)->bp_offset; \
+ *(typeof(val) *)addr = val; \
+})
+
+#define OCP_RES_LOCATION(member) \
+ .bp_offset = offsetof(struct ptp_ocp, member)
+
+#define OCP_MEM_RESOURCE(member) \
+ OCP_RES_LOCATION(member), .setup = ptp_ocp_register_mem
+
+#define OCP_SERIAL_RESOURCE(member) \
+ OCP_RES_LOCATION(member), .setup = ptp_ocp_register_serial
+
+#define OCP_I2C_RESOURCE(member) \
+ OCP_RES_LOCATION(member), .setup = ptp_ocp_register_i2c
+
+#define OCP_SPI_RESOURCE(member) \
+ OCP_RES_LOCATION(member), .setup = ptp_ocp_register_spi
+
+#define OCP_EXT_RESOURCE(member) \
+ OCP_RES_LOCATION(member), .setup = ptp_ocp_register_ext
+
+/* This is the MSI vector mapping used.
+ * 0: N/C
+ * 1: TS0
+ * 2: TS1
+ * 3: GPS
+ * 4: GPS2 (n/c)
+ * 5: MAC
+ * 6: SPI IMU (inertial measurement unit)
+ * 7: I2C oscillator
+ * 8: HWICAP
+ * 9: SPI Flash
+ */
+
+static struct ocp_resource ocp_fb_resource[] = {
+ {
+ OCP_MEM_RESOURCE(reg),
+ .offset = 0x01000000, .size = 0x10000,
+ },
+ {
+ OCP_EXT_RESOURCE(ts0),
+ .offset = 0x01010000, .size = 0x10000, .irq_vec = 1,
+ .extra = &(struct ptp_ocp_ext_info) {
+ .name = "ts0", .index = 0,
+ .irq_fcn = ptp_ocp_ts_irq,
+ .enable = ptp_ocp_ts_enable,
+ },
+ },
+ {
+ OCP_EXT_RESOURCE(ts1),
+ .offset = 0x01020000, .size = 0x10000, .irq_vec = 2,
+ .extra = &(struct ptp_ocp_ext_info) {
+ .name = "ts1", .index = 1,
+ .irq_fcn = ptp_ocp_ts_irq,
+ .enable = ptp_ocp_ts_enable,
+ },
+ },
+ {
+ OCP_MEM_RESOURCE(pps_to_ext),
+ .offset = 0x01030000, .size = 0x10000,
+ },
+ {
+ OCP_MEM_RESOURCE(pps_to_clk),
+ .offset = 0x01040000, .size = 0x10000,
+ },
+ {
+ OCP_MEM_RESOURCE(tod),
+ .offset = 0x01050000, .size = 0x10000,
+ },
+ {
+ OCP_MEM_RESOURCE(image),
+ .offset = 0x00020000, .size = 0x1000,
+ },
+ {
+ OCP_I2C_RESOURCE(i2c_ctrl),
+ .offset = 0x00150000, .size = 0x10000, .irq_vec = 7,
+ },
+ {
+ OCP_SERIAL_RESOURCE(gnss_port),
+ .offset = 0x00160000 + 0x1000, .irq_vec = 3,
+ },
+ {
+ OCP_SERIAL_RESOURCE(mac_port),
+ .offset = 0x00180000 + 0x1000, .irq_vec = 5,
+ },
+ {
+ OCP_SPI_RESOURCE(spi_flash),
+ .offset = 0x00310000, .size = 0x10000, .irq_vec = 9,
+ .extra = &(struct ptp_ocp_flash_info) {
+ .name = "xilinx_spi", .pci_offset = 0,
+ .data_size = sizeof(struct xspi_platform_data),
+ .data = &(struct xspi_platform_data) {
+ .num_chipselect = 1,
+ .bits_per_word = 8,
+ .num_devices = 1,
+ .devices = &(struct spi_board_info) {
+ .modalias = "spi-nor",
+ },
+ },
+ },
+ },
+ {
+ .setup = ptp_ocp_fb_board_init,
+ },
+ { }
+};
+
+static const struct pci_device_id ptp_ocp_pcidev_id[] = {
+ { PCI_DEVICE_DATA(FACEBOOK, TIMECARD, &ocp_fb_resource) },
+ { 0 }
+};
+MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id);
+
+static DEFINE_MUTEX(ptp_ocp_lock);
+static DEFINE_IDR(ptp_ocp_idr);
+
+static struct {
+ const char *name;
+ int value;
+} ptp_ocp_clock[] = {
+ { .name = "NONE", .value = 0 },
+ { .name = "TOD", .value = 1 },
+ { .name = "IRIG", .value = 2 },
+ { .name = "PPS", .value = 3 },
+ { .name = "PTP", .value = 4 },
+ { .name = "RTC", .value = 5 },
+ { .name = "DCF", .value = 6 },
+ { .name = "REGS", .value = 0xfe },
+ { .name = "EXT", .value = 0xff },
+};
+
+static const char *
+ptp_ocp_clock_name_from_val(int val)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++)
+ if (ptp_ocp_clock[i].value == val)
+ return ptp_ocp_clock[i].name;
+ return NULL;
+}
+
+static int
+ptp_ocp_clock_val_from_name(const char *name)
+{
+ const char *clk;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) {
+ clk = ptp_ocp_clock[i].name;
+ if (!strncasecmp(name, clk, strlen(clk)))
+ return ptp_ocp_clock[i].value;
+ }
+ return -EINVAL;
+}
+
static int
__ptp_ocp_gettime_locked(struct ptp_ocp *bp, struct timespec64 *ts,
struct ptp_system_timestamp *sts)
@@ -192,6 +463,45 @@ ptp_ocp_null_adjfine(struct ptp_clock_info *ptp_info, long scaled_ppm)
return -EOPNOTSUPP;
}
+static int
+ptp_ocp_adjphase(struct ptp_clock_info *ptp_info, s32 phase_ns)
+{
+ return -EOPNOTSUPP;
+}
+
+static int
+ptp_ocp_enable(struct ptp_clock_info *ptp_info, struct ptp_clock_request *rq,
+ int on)
+{
+ struct ptp_ocp *bp = container_of(ptp_info, struct ptp_ocp, ptp_info);
+ struct ptp_ocp_ext_src *ext = NULL;
+ int err;
+
+ switch (rq->type) {
+ case PTP_CLK_REQ_EXTTS:
+ switch (rq->extts.index) {
+ case 0:
+ ext = bp->ts0;
+ break;
+ case 1:
+ ext = bp->ts1;
+ break;
+ }
+ break;
+ case PTP_CLK_REQ_PPS:
+ ext = bp->pps;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ err = -ENXIO;
+ if (ext)
+ err = ext->info->enable(ext, on);
+
+ return err;
+}
+
static const struct ptp_clock_info ptp_ocp_clock_info = {
.owner = THIS_MODULE,
.name = KBUILD_MODNAME,
@@ -200,10 +510,57 @@ static const struct ptp_clock_info ptp_ocp_clock_info = {
.settime64 = ptp_ocp_settime,
.adjtime = ptp_ocp_adjtime,
.adjfine = ptp_ocp_null_adjfine,
+ .adjphase = ptp_ocp_adjphase,
+ .enable = ptp_ocp_enable,
+ .pps = true,
+ .n_ext_ts = 2,
};
+static void
+__ptp_ocp_clear_drift_locked(struct ptp_ocp *bp)
+{
+ u32 ctrl, select;
+
+ select = ioread32(&bp->reg->select);
+ iowrite32(OCP_SELECT_CLK_REG, &bp->reg->select);
+
+ iowrite32(0, &bp->reg->drift_ns);
+
+ ctrl = ioread32(&bp->reg->ctrl);
+ ctrl |= OCP_CTRL_ADJUST_DRIFT;
+ iowrite32(ctrl, &bp->reg->ctrl);
+
+ /* restore clock selection */
+ iowrite32(select >> 16, &bp->reg->select);
+}
+
+static void
+ptp_ocp_watchdog(struct timer_list *t)
+{
+ struct ptp_ocp *bp = from_timer(bp, t, watchdog);
+ unsigned long flags;
+ u32 status;
+
+ status = ioread32(&bp->pps_to_clk->status);
+
+ if (status & PPS_STATUS_SUPERV_ERR) {
+ iowrite32(status, &bp->pps_to_clk->status);
+ if (!bp->gnss_lost) {
+ spin_lock_irqsave(&bp->lock, flags);
+ __ptp_ocp_clear_drift_locked(bp);
+ spin_unlock_irqrestore(&bp->lock, flags);
+ bp->gnss_lost = ktime_get_real_seconds();
+ }
+
+ } else if (bp->gnss_lost) {
+ bp->gnss_lost = 0;
+ }
+
+ mod_timer(&bp->watchdog, jiffies + HZ);
+}
+
static int
-ptp_ocp_check_clock(struct ptp_ocp *bp)
+ptp_ocp_init_clock(struct ptp_ocp *bp)
{
struct timespec64 ts;
bool sync;
@@ -214,6 +571,17 @@ ptp_ocp_check_clock(struct ptp_ocp *bp)
ctrl |= OCP_CTRL_ENABLE;
iowrite32(ctrl, &bp->reg->ctrl);
+ /* NO DRIFT Correction */
+ /* offset_p:i 1/8, offset_i: 1/16, drift_p: 0, drift_i: 0 */
+ iowrite32(0x2000, &bp->reg->servo_offset_p);
+ iowrite32(0x1000, &bp->reg->servo_offset_i);
+ iowrite32(0, &bp->reg->servo_drift_p);
+ iowrite32(0, &bp->reg->servo_drift_i);
+
+ /* latch servo values */
+ ctrl |= OCP_CTRL_ADJUST_SERVO;
+ iowrite32(ctrl, &bp->reg->ctrl);
+
if ((ioread32(&bp->reg->ctrl) & OCP_CTRL_ENABLE) == 0) {
dev_err(&bp->pdev->dev, "clock not enabled\n");
return -ENODEV;
@@ -229,6 +597,9 @@ ptp_ocp_check_clock(struct ptp_ocp *bp)
ts.tv_sec, ts.tv_nsec,
sync ? "in-sync" : "UNSYNCED");
+ timer_setup(&bp->watchdog, ptp_ocp_watchdog, 0);
+ mod_timer(&bp->watchdog, jiffies + HZ);
+
return 0;
}
@@ -278,82 +649,840 @@ ptp_ocp_tod_info(struct ptp_ocp *bp)
reg & TOD_STATUS_LEAP_VALID ? 1 : 0);
}
+static int
+ptp_ocp_firstchild(struct device *dev, void *data)
+{
+ return 1;
+}
+
+static int
+ptp_ocp_read_i2c(struct i2c_adapter *adap, u8 addr, u8 reg, u8 sz, u8 *data)
+{
+ struct i2c_msg msgs[2] = {
+ {
+ .addr = addr,
+ .len = 1,
+ .buf = &reg,
+ },
+ {
+ .addr = addr,
+ .flags = I2C_M_RD,
+ .len = 2,
+ .buf = data,
+ },
+ };
+ int err;
+ u8 len;
+
+ /* xiic-i2c for some stupid reason only does 2 byte reads. */
+ while (sz) {
+ len = min_t(u8, sz, 2);
+ msgs[1].len = len;
+ err = i2c_transfer(adap, msgs, 2);
+ if (err != msgs[1].len)
+ return err;
+ msgs[1].buf += len;
+ reg += len;
+ sz -= len;
+ }
+ return 0;
+}
+
+static void
+ptp_ocp_get_serial_number(struct ptp_ocp *bp)
+{
+ struct i2c_adapter *adap;
+ struct device *dev;
+ int err;
+
+ dev = device_find_child(&bp->i2c_ctrl->dev, NULL, ptp_ocp_firstchild);
+ if (!dev) {
+ dev_err(&bp->pdev->dev, "Can't find I2C adapter\n");
+ return;
+ }
+
+ adap = i2c_verify_adapter(dev);
+ if (!adap) {
+ dev_err(&bp->pdev->dev, "device '%s' isn't an I2C adapter\n",
+ dev_name(dev));
+ goto out;
+ }
+
+ err = ptp_ocp_read_i2c(adap, 0x58, 0x9A, 6, bp->serial);
+ if (err) {
+ dev_err(&bp->pdev->dev, "could not read eeprom: %d\n", err);
+ goto out;
+ }
+
+ bp->has_serial = true;
+
+out:
+ put_device(dev);
+}
+
static void
ptp_ocp_info(struct ptp_ocp *bp)
{
- static const char * const clock_name[] = {
- "NO", "TOD", "IRIG", "PPS", "PTP", "RTC", "REGS", "EXT"
- };
u32 version, select;
version = ioread32(&bp->reg->version);
select = ioread32(&bp->reg->select);
dev_info(&bp->pdev->dev, "Version %d.%d.%d, clock %s, device ptp%d\n",
version >> 24, (version >> 16) & 0xff, version & 0xffff,
- clock_name[select & 7],
+ ptp_ocp_clock_name_from_val(select >> 16),
ptp_clock_index(bp->ptp));
ptp_ocp_tod_info(bp);
}
+static struct device *
+ptp_ocp_find_flash(struct ptp_ocp *bp)
+{
+ struct device *dev, *last;
+
+ last = NULL;
+ dev = &bp->spi_flash->dev;
+
+ while ((dev = device_find_child(dev, NULL, ptp_ocp_firstchild))) {
+ if (!strcmp("mtd", dev_bus_name(dev)))
+ break;
+ put_device(last);
+ last = dev;
+ }
+ put_device(last);
+
+ return dev;
+}
+
static int
-ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ptp_ocp_devlink_flash(struct devlink *devlink, struct device *dev,
+ const struct firmware *fw)
{
- struct ptp_ocp *bp;
+ struct mtd_info *mtd = dev_get_drvdata(dev);
+ struct ptp_ocp *bp = devlink_priv(devlink);
+ size_t off, len, resid, wrote;
+ struct erase_info erase;
+ size_t base, blksz;
+ int err = 0;
+
+ off = 0;
+ base = bp->flash_start;
+ blksz = 4096;
+ resid = fw->size;
+
+ while (resid) {
+ devlink_flash_update_status_notify(devlink, "Flashing",
+ NULL, off, fw->size);
+
+ len = min_t(size_t, resid, blksz);
+ erase.addr = base + off;
+ erase.len = blksz;
+
+ err = mtd_erase(mtd, &erase);
+ if (err)
+ goto out;
+
+ err = mtd_write(mtd, base + off, len, &wrote, &fw->data[off]);
+ if (err)
+ goto out;
+
+ off += blksz;
+ resid -= len;
+ }
+out:
+ return err;
+}
+
+static int
+ptp_ocp_devlink_flash_update(struct devlink *devlink,
+ struct devlink_flash_update_params *params,
+ struct netlink_ext_ack *extack)
+{
+ struct ptp_ocp *bp = devlink_priv(devlink);
+ struct device *dev;
+ const char *msg;
+ int err;
+
+ dev = ptp_ocp_find_flash(bp);
+ if (!dev) {
+ dev_err(&bp->pdev->dev, "Can't find Flash SPI adapter\n");
+ return -ENODEV;
+ }
+
+ devlink_flash_update_status_notify(devlink, "Preparing to flash",
+ NULL, 0, 0);
+
+ err = ptp_ocp_devlink_flash(devlink, dev, params->fw);
+
+ msg = err ? "Flash error" : "Flash complete";
+ devlink_flash_update_status_notify(devlink, msg, NULL, 0, 0);
+
+ put_device(dev);
+ return err;
+}
+
+static int
+ptp_ocp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+ struct ptp_ocp *bp = devlink_priv(devlink);
+ char buf[32];
+ int err;
+
+ err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
+ if (err)
+ return err;
+
+ if (bp->image) {
+ u32 ver = ioread32(&bp->image->version);
+
+ if (ver & 0xffff) {
+ sprintf(buf, "%d", ver);
+ err = devlink_info_version_running_put(req,
+ "fw",
+ buf);
+ } else {
+ sprintf(buf, "%d", ver >> 16);
+ err = devlink_info_version_running_put(req,
+ "loader",
+ buf);
+ }
+ if (err)
+ return err;
+ }
+
+ if (!bp->has_serial)
+ ptp_ocp_get_serial_number(bp);
+
+ if (bp->has_serial) {
+ sprintf(buf, "%pM", bp->serial);
+ err = devlink_info_serial_number_put(req, buf);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static const struct devlink_ops ptp_ocp_devlink_ops = {
+ .flash_update = ptp_ocp_devlink_flash_update,
+ .info_get = ptp_ocp_devlink_info_get,
+};
+
+static void __iomem *
+__ptp_ocp_get_mem(struct ptp_ocp *bp, unsigned long start, int size)
+{
+ struct resource res = DEFINE_RES_MEM_NAMED(start, size, "ptp_ocp");
+
+ return devm_ioremap_resource(&bp->pdev->dev, &res);
+}
+
+static void __iomem *
+ptp_ocp_get_mem(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ unsigned long start;
+
+ start = pci_resource_start(bp->pdev, 0) + r->offset;
+ return __ptp_ocp_get_mem(bp, start, r->size);
+}
+
+static void
+ptp_ocp_set_irq_resource(struct resource *res, int irq)
+{
+ struct resource r = DEFINE_RES_IRQ(irq);
+ *res = r;
+}
+
+static void
+ptp_ocp_set_mem_resource(struct resource *res, unsigned long start, int size)
+{
+ struct resource r = DEFINE_RES_MEM(start, size);
+ *res = r;
+}
+
+static int
+ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ struct ptp_ocp_flash_info *info;
+ struct pci_dev *pdev = bp->pdev;
+ struct platform_device *p;
+ struct resource res[2];
+ unsigned long start;
+ int id;
+
+ /* XXX hack to work around old FPGA */
+ if (bp->n_irqs < 10) {
+ dev_err(&bp->pdev->dev, "FPGA does not have SPI devices\n");
+ return 0;
+ }
+
+ if (r->irq_vec > bp->n_irqs) {
+ dev_err(&bp->pdev->dev, "spi device irq %d out of range\n",
+ r->irq_vec);
+ return 0;
+ }
+
+ start = pci_resource_start(pdev, 0) + r->offset;
+ ptp_ocp_set_mem_resource(&res[0], start, r->size);
+ ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec));
+
+ info = r->extra;
+ id = pci_dev_id(pdev) << 1;
+ id += info->pci_offset;
+
+ p = platform_device_register_resndata(&pdev->dev, info->name, id,
+ res, 2, info->data,
+ info->data_size);
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+
+ bp_assign_entry(bp, r, p);
+
+ return 0;
+}
+
+static struct platform_device *
+ptp_ocp_i2c_bus(struct pci_dev *pdev, struct ocp_resource *r, int id)
+{
+ struct resource res[2];
+ unsigned long start;
+
+ start = pci_resource_start(pdev, 0) + r->offset;
+ ptp_ocp_set_mem_resource(&res[0], start, r->size);
+ ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec));
+
+ return platform_device_register_resndata(&pdev->dev, "xiic-i2c",
+ id, res, 2, NULL, 0);
+}
+
+static int
+ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ struct pci_dev *pdev = bp->pdev;
+ struct platform_device *p;
+ struct clk_hw *clk;
+ char buf[32];
+ int id;
+
+ if (r->irq_vec > bp->n_irqs) {
+ dev_err(&bp->pdev->dev, "i2c device irq %d out of range\n",
+ r->irq_vec);
+ return 0;
+ }
+
+ id = pci_dev_id(bp->pdev);
+
+ sprintf(buf, "AXI.%d", id);
+ clk = clk_hw_register_fixed_rate(&pdev->dev, buf, NULL, 0, 50000000);
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+ bp->i2c_clk = clk;
+
+ sprintf(buf, "xiic-i2c.%d", id);
+ devm_clk_hw_register_clkdev(&pdev->dev, clk, NULL, buf);
+ p = ptp_ocp_i2c_bus(bp->pdev, r, id);
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+
+ bp_assign_entry(bp, r, p);
+
+ return 0;
+}
+
+static irqreturn_t
+ptp_ocp_ts_irq(int irq, void *priv)
+{
+ struct ptp_ocp_ext_src *ext = priv;
+ struct ts_reg __iomem *reg = ext->mem;
+ struct ptp_clock_event ev;
+ u32 sec, nsec;
+
+ /* XXX should fix API - this converts s/ns -> ts -> s/ns */
+ sec = ioread32(&reg->time_sec);
+ nsec = ioread32(&reg->time_ns);
+
+ ev.type = PTP_CLOCK_EXTTS;
+ ev.index = ext->info->index;
+ ev.timestamp = sec * 1000000000ULL + nsec;
+
+ ptp_clock_event(ext->bp->ptp, &ev);
+
+ iowrite32(1, &reg->intr); /* write 1 to ack */
+
+ return IRQ_HANDLED;
+}
+
+static int
+ptp_ocp_ts_enable(void *priv, bool enable)
+{
+ struct ptp_ocp_ext_src *ext = priv;
+ struct ts_reg __iomem *reg = ext->mem;
+
+ if (enable) {
+ iowrite32(1, &reg->enable);
+ iowrite32(1, &reg->intr_mask);
+ iowrite32(1, &reg->intr);
+ } else {
+ iowrite32(0, &reg->intr_mask);
+ iowrite32(0, &reg->enable);
+ }
+
+ return 0;
+}
+
+static void
+ptp_ocp_unregister_ext(struct ptp_ocp_ext_src *ext)
+{
+ ext->info->enable(ext, false);
+ pci_free_irq(ext->bp->pdev, ext->irq_vec, ext);
+ kfree(ext);
+}
+
+static int
+ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ struct pci_dev *pdev = bp->pdev;
+ struct ptp_ocp_ext_src *ext;
int err;
- bp = kzalloc(sizeof(*bp), GFP_KERNEL);
- if (!bp)
+ ext = kzalloc(sizeof(*ext), GFP_KERNEL);
+ if (!ext)
return -ENOMEM;
+
+ err = -EINVAL;
+ ext->mem = ptp_ocp_get_mem(bp, r);
+ if (!ext->mem)
+ goto out;
+
+ ext->bp = bp;
+ ext->info = r->extra;
+ ext->irq_vec = r->irq_vec;
+
+ err = pci_request_irq(pdev, r->irq_vec, ext->info->irq_fcn, NULL,
+ ext, "ocp%d.%s", bp->id, ext->info->name);
+ if (err) {
+ dev_err(&pdev->dev, "Could not get irq %d\n", r->irq_vec);
+ goto out;
+ }
+
+ bp_assign_entry(bp, r, ext);
+
+ return 0;
+
+out:
+ kfree(ext);
+ return err;
+}
+
+static int
+ptp_ocp_serial_line(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ struct pci_dev *pdev = bp->pdev;
+ struct uart_8250_port uart;
+
+ /* Setting UPF_IOREMAP and leaving port.membase unspecified lets
+ * the serial port device claim and release the pci resource.
+ */
+ memset(&uart, 0, sizeof(uart));
+ uart.port.dev = &pdev->dev;
+ uart.port.iotype = UPIO_MEM;
+ uart.port.regshift = 2;
+ uart.port.mapbase = pci_resource_start(pdev, 0) + r->offset;
+ uart.port.irq = pci_irq_vector(pdev, r->irq_vec);
+ uart.port.uartclk = 50000000;
+ uart.port.flags = UPF_FIXED_TYPE | UPF_IOREMAP;
+ uart.port.type = PORT_16550A;
+
+ return serial8250_register_8250_port(&uart);
+}
+
+static int
+ptp_ocp_register_serial(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ int port;
+
+ if (r->irq_vec > bp->n_irqs) {
+ dev_err(&bp->pdev->dev, "serial device irq %d out of range\n",
+ r->irq_vec);
+ return 0;
+ }
+
+ port = ptp_ocp_serial_line(bp, r);
+ if (port < 0)
+ return port;
+
+ bp_assign_entry(bp, r, port);
+
+ return 0;
+}
+
+static int
+ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ void __iomem *mem;
+
+ mem = ptp_ocp_get_mem(bp, r);
+ if (!mem)
+ return -EINVAL;
+
+ bp_assign_entry(bp, r, mem);
+
+ return 0;
+}
+
+/* FB specific board initializers; last "resource" registered. */
+static int
+ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ bp->flash_start = 1024 * 4096;
+
+ return ptp_ocp_init_clock(bp);
+}
+
+static int
+ptp_ocp_register_resources(struct ptp_ocp *bp, kernel_ulong_t driver_data)
+{
+ struct ocp_resource *r, *table;
+ int err = 0;
+
+ table = (struct ocp_resource *)driver_data;
+ for (r = table; r->setup; r++) {
+ err = r->setup(bp, r);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+static ssize_t
+serialnum_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct ptp_ocp *bp = dev_get_drvdata(dev);
+
+ if (!bp->has_serial)
+ ptp_ocp_get_serial_number(bp);
+
+ return sysfs_emit(buf, "%pM\n", bp->serial);
+}
+static DEVICE_ATTR_RO(serialnum);
+
+static ssize_t
+gnss_sync_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct ptp_ocp *bp = dev_get_drvdata(dev);
+ ssize_t ret;
+
+ if (bp->gnss_lost)
+ ret = sysfs_emit(buf, "LOST @ %ptT\n", &bp->gnss_lost);
+ else
+ ret = sysfs_emit(buf, "SYNC\n");
+
+ return ret;
+}
+static DEVICE_ATTR_RO(gnss_sync);
+
+static ssize_t
+clock_source_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct ptp_ocp *bp = dev_get_drvdata(dev);
+ const char *p;
+ u32 select;
+
+ select = ioread32(&bp->reg->select);
+ p = ptp_ocp_clock_name_from_val(select >> 16);
+
+ return sysfs_emit(buf, "%s\n", p);
+}
+
+static ssize_t
+clock_source_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct ptp_ocp *bp = dev_get_drvdata(dev);
+ unsigned long flags;
+ int val;
+
+ val = ptp_ocp_clock_val_from_name(buf);
+ if (val < 0)
+ return val;
+
+ spin_lock_irqsave(&bp->lock, flags);
+ iowrite32(val, &bp->reg->select);
+ spin_unlock_irqrestore(&bp->lock, flags);
+
+ return count;
+}
+static DEVICE_ATTR_RW(clock_source);
+
+static ssize_t
+available_clock_sources_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ const char *clk;
+ ssize_t count;
+ int i;
+
+ count = 0;
+ for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) {
+ clk = ptp_ocp_clock[i].name;
+ count += sysfs_emit_at(buf, count, "%s ", clk);
+ }
+ if (count)
+ count--;
+ count += sysfs_emit_at(buf, count, "\n");
+ return count;
+}
+static DEVICE_ATTR_RO(available_clock_sources);
+
+static struct attribute *timecard_attrs[] = {
+ &dev_attr_serialnum.attr,
+ &dev_attr_gnss_sync.attr,
+ &dev_attr_clock_source.attr,
+ &dev_attr_available_clock_sources.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(timecard);
+
+static void
+ptp_ocp_dev_release(struct device *dev)
+{
+ struct ptp_ocp *bp = dev_get_drvdata(dev);
+
+ mutex_lock(&ptp_ocp_lock);
+ idr_remove(&ptp_ocp_idr, bp->id);
+ mutex_unlock(&ptp_ocp_lock);
+}
+
+static int
+ptp_ocp_device_init(struct ptp_ocp *bp, struct pci_dev *pdev)
+{
+ int err;
+
+ mutex_lock(&ptp_ocp_lock);
+ err = idr_alloc(&ptp_ocp_idr, bp, 0, 0, GFP_KERNEL);
+ mutex_unlock(&ptp_ocp_lock);
+ if (err < 0) {
+ dev_err(&pdev->dev, "idr_alloc failed: %d\n", err);
+ return err;
+ }
+ bp->id = err;
+
+ bp->ptp_info = ptp_ocp_clock_info;
+ spin_lock_init(&bp->lock);
+ bp->gnss_port = -1;
+ bp->mac_port = -1;
bp->pdev = pdev;
+
+ device_initialize(&bp->dev);
+ dev_set_name(&bp->dev, "ocp%d", bp->id);
+ bp->dev.class = &timecard_class;
+ bp->dev.parent = &pdev->dev;
+ bp->dev.release = ptp_ocp_dev_release;
+ dev_set_drvdata(&bp->dev, bp);
+
+ err = device_add(&bp->dev);
+ if (err) {
+ dev_err(&bp->dev, "device add failed: %d\n", err);
+ goto out;
+ }
+
pci_set_drvdata(pdev, bp);
+ return 0;
+
+out:
+ ptp_ocp_dev_release(&bp->dev);
+ put_device(&bp->dev);
+ return err;
+}
+
+static void
+ptp_ocp_symlink(struct ptp_ocp *bp, struct device *child, const char *link)
+{
+ struct device *dev = &bp->dev;
+
+ if (sysfs_create_link(&dev->kobj, &child->kobj, link))
+ dev_err(dev, "%s symlink failed\n", link);
+}
+
+static void
+ptp_ocp_link_child(struct ptp_ocp *bp, const char *name, const char *link)
+{
+ struct device *dev, *child;
+
+ dev = &bp->pdev->dev;
+
+ child = device_find_child_by_name(dev, name);
+ if (!child) {
+ dev_err(dev, "Could not find device %s\n", name);
+ return;
+ }
+
+ ptp_ocp_symlink(bp, child, link);
+ put_device(child);
+}
+
+static int
+ptp_ocp_complete(struct ptp_ocp *bp)
+{
+ struct pps_device *pps;
+ char buf[32];
+
+ if (bp->gnss_port != -1) {
+ sprintf(buf, "ttyS%d", bp->gnss_port);
+ ptp_ocp_link_child(bp, buf, "ttyGNSS");
+ }
+ if (bp->mac_port != -1) {
+ sprintf(buf, "ttyS%d", bp->mac_port);
+ ptp_ocp_link_child(bp, buf, "ttyMAC");
+ }
+ sprintf(buf, "ptp%d", ptp_clock_index(bp->ptp));
+ ptp_ocp_link_child(bp, buf, "ptp");
+
+ pps = pps_lookup_dev(bp->ptp);
+ if (pps)
+ ptp_ocp_symlink(bp, pps->dev, "pps");
+
+ if (device_add_groups(&bp->dev, timecard_groups))
+ pr_err("device add groups failed\n");
+
+ return 0;
+}
+
+static void
+ptp_ocp_resource_summary(struct ptp_ocp *bp)
+{
+ struct device *dev = &bp->pdev->dev;
+
+ if (bp->image) {
+ u32 ver = ioread32(&bp->image->version);
+
+ dev_info(dev, "version %x\n", ver);
+ if (ver & 0xffff)
+ dev_info(dev, "regular image, version %d\n",
+ ver & 0xffff);
+ else
+ dev_info(dev, "golden image, version %d\n",
+ ver >> 16);
+ }
+ if (bp->gnss_port != -1)
+ dev_info(dev, "GNSS @ /dev/ttyS%d 115200\n", bp->gnss_port);
+ if (bp->mac_port != -1)
+ dev_info(dev, "MAC @ /dev/ttyS%d 57600\n", bp->mac_port);
+}
+
+static void
+ptp_ocp_detach_sysfs(struct ptp_ocp *bp)
+{
+ struct device *dev = &bp->dev;
+
+ sysfs_remove_link(&dev->kobj, "ttyGNSS");
+ sysfs_remove_link(&dev->kobj, "ttyMAC");
+ sysfs_remove_link(&dev->kobj, "ptp");
+ sysfs_remove_link(&dev->kobj, "pps");
+ device_remove_groups(dev, timecard_groups);
+}
+
+static void
+ptp_ocp_detach(struct ptp_ocp *bp)
+{
+ ptp_ocp_detach_sysfs(bp);
+ if (timer_pending(&bp->watchdog))
+ del_timer_sync(&bp->watchdog);
+ if (bp->ts0)
+ ptp_ocp_unregister_ext(bp->ts0);
+ if (bp->ts1)
+ ptp_ocp_unregister_ext(bp->ts1);
+ if (bp->pps)
+ ptp_ocp_unregister_ext(bp->pps);
+ if (bp->gnss_port != -1)
+ serial8250_unregister_port(bp->gnss_port);
+ if (bp->mac_port != -1)
+ serial8250_unregister_port(bp->mac_port);
+ if (bp->spi_flash)
+ platform_device_unregister(bp->spi_flash);
+ if (bp->i2c_ctrl)
+ platform_device_unregister(bp->i2c_ctrl);
+ if (bp->i2c_clk)
+ clk_hw_unregister_fixed_rate(bp->i2c_clk);
+ if (bp->n_irqs)
+ pci_free_irq_vectors(bp->pdev);
+ if (bp->ptp)
+ ptp_clock_unregister(bp->ptp);
+ device_unregister(&bp->dev);
+}
+
+static int
+ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct devlink *devlink;
+ struct ptp_ocp *bp;
+ int err;
+
+ devlink = devlink_alloc(&ptp_ocp_devlink_ops, sizeof(*bp), &pdev->dev);
+ if (!devlink) {
+ dev_err(&pdev->dev, "devlink_alloc failed\n");
+ return -ENOMEM;
+ }
+
+ err = devlink_register(devlink);
+ if (err)
+ goto out_free;
+
err = pci_enable_device(pdev);
if (err) {
dev_err(&pdev->dev, "pci_enable_device\n");
- goto out_free;
+ goto out_unregister;
}
- err = pci_request_regions(pdev, KBUILD_MODNAME);
- if (err) {
- dev_err(&pdev->dev, "pci_request_region\n");
+ bp = devlink_priv(devlink);
+ err = ptp_ocp_device_init(bp, pdev);
+ if (err)
goto out_disable;
- }
- bp->base = pci_ioremap_bar(pdev, 0);
- if (!bp->base) {
- dev_err(&pdev->dev, "io_remap bar0\n");
- err = -ENOMEM;
- goto out_release_regions;
+ /* compat mode.
+ * Older FPGA firmware only returns 2 irq's.
+ * allow this - if not all of the IRQ's are returned, skip the
+ * extra devices and just register the clock.
+ */
+ err = pci_alloc_irq_vectors(pdev, 1, 10, PCI_IRQ_MSI | PCI_IRQ_MSIX);
+ if (err < 0) {
+ dev_err(&pdev->dev, "alloc_irq_vectors err: %d\n", err);
+ goto out;
}
- bp->reg = bp->base + OCP_REGISTER_OFFSET;
- bp->tod = bp->base + TOD_REGISTER_OFFSET;
- bp->ptp_info = ptp_ocp_clock_info;
- spin_lock_init(&bp->lock);
+ bp->n_irqs = err;
+ pci_set_master(pdev);
- err = ptp_ocp_check_clock(bp);
+ err = ptp_ocp_register_resources(bp, id->driver_data);
if (err)
goto out;
bp->ptp = ptp_clock_register(&bp->ptp_info, &pdev->dev);
if (IS_ERR(bp->ptp)) {
- dev_err(&pdev->dev, "ptp_clock_register\n");
err = PTR_ERR(bp->ptp);
+ dev_err(&pdev->dev, "ptp_clock_register: %d\n", err);
+ bp->ptp = NULL;
goto out;
}
+ err = ptp_ocp_complete(bp);
+ if (err)
+ goto out;
+
ptp_ocp_info(bp);
+ ptp_ocp_resource_summary(bp);
return 0;
out:
- pci_iounmap(pdev, bp->base);
-out_release_regions:
- pci_release_regions(pdev);
+ ptp_ocp_detach(bp);
+ pci_set_drvdata(pdev, NULL);
out_disable:
pci_disable_device(pdev);
+out_unregister:
+ devlink_unregister(devlink);
out_free:
- kfree(bp);
+ devlink_free(devlink);
return err;
}
@@ -362,13 +1491,14 @@ static void
ptp_ocp_remove(struct pci_dev *pdev)
{
struct ptp_ocp *bp = pci_get_drvdata(pdev);
+ struct devlink *devlink = priv_to_devlink(bp);
- ptp_clock_unregister(bp->ptp);
- pci_iounmap(pdev, bp->base);
- pci_release_regions(pdev);
- pci_disable_device(pdev);
+ ptp_ocp_detach(bp);
pci_set_drvdata(pdev, NULL);
- kfree(bp);
+ pci_disable_device(pdev);
+
+ devlink_unregister(devlink);
+ devlink_free(devlink);
}
static struct pci_driver ptp_ocp_driver = {
@@ -378,19 +1508,84 @@ static struct pci_driver ptp_ocp_driver = {
.remove = ptp_ocp_remove,
};
+static int
+ptp_ocp_i2c_notifier_call(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev, *child = data;
+ struct ptp_ocp *bp;
+ bool add;
+
+ switch (action) {
+ case BUS_NOTIFY_ADD_DEVICE:
+ case BUS_NOTIFY_DEL_DEVICE:
+ add = action == BUS_NOTIFY_ADD_DEVICE;
+ break;
+ default:
+ return 0;
+ }
+
+ if (!i2c_verify_adapter(child))
+ return 0;
+
+ dev = child;
+ while ((dev = dev->parent))
+ if (dev->driver && !strcmp(dev->driver->name, KBUILD_MODNAME))
+ goto found;
+ return 0;
+
+found:
+ bp = dev_get_drvdata(dev);
+ if (add)
+ ptp_ocp_symlink(bp, child, "i2c");
+ else
+ sysfs_remove_link(&bp->dev.kobj, "i2c");
+
+ return 0;
+}
+
+static struct notifier_block ptp_ocp_i2c_notifier = {
+ .notifier_call = ptp_ocp_i2c_notifier_call,
+};
+
static int __init
ptp_ocp_init(void)
{
+ const char *what;
int err;
+ what = "timecard class";
+ err = class_register(&timecard_class);
+ if (err)
+ goto out;
+
+ what = "i2c notifier";
+ err = bus_register_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier);
+ if (err)
+ goto out_notifier;
+
+ what = "ptp_ocp driver";
err = pci_register_driver(&ptp_ocp_driver);
+ if (err)
+ goto out_register;
+
+ return 0;
+
+out_register:
+ bus_unregister_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier);
+out_notifier:
+ class_unregister(&timecard_class);
+out:
+ pr_err(KBUILD_MODNAME ": failed to register %s: %d\n", what, err);
return err;
}
static void __exit
ptp_ocp_fini(void)
{
+ bus_unregister_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier);
pci_unregister_driver(&ptp_ocp_driver);
+ class_unregister(&timecard_class);
}
module_init(ptp_ocp_init);
diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c
index b3d96b747292..41b92dc2f011 100644
--- a/drivers/ptp/ptp_sysfs.c
+++ b/drivers/ptp/ptp_sysfs.c
@@ -154,7 +154,7 @@ static int unregister_vclock(struct device *dev, void *data)
struct ptp_clock *ptp = dev_get_drvdata(dev);
struct ptp_clock_info *info = ptp->info;
struct ptp_vclock *vclock;
- u8 *num = data;
+ u32 *num = data;
vclock = info_to_vclock(info);
dev_info(dev->parent, "delete virtual clock ptp%d\n",
diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c
index e0f87c57749a..baee0379482b 100644
--- a/drivers/ptp/ptp_vclock.c
+++ b/drivers/ptp/ptp_vclock.c
@@ -149,6 +149,7 @@ void ptp_vclock_unregister(struct ptp_vclock *vclock)
kfree(vclock);
}
+#if IS_BUILTIN(CONFIG_PTP_1588_CLOCK)
int ptp_get_vclocks_index(int pclock_index, int **vclock_index)
{
char name[PTP_CLOCK_NAME_LEN] = "";
@@ -217,3 +218,4 @@ void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps,
hwtstamps->hwtstamp = ns_to_ktime(ns);
}
EXPORT_SYMBOL(ptp_convert_timestamp);
+#endif
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 24ce9a17ab4f..4fd13b06231f 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -1044,7 +1044,7 @@ config REGULATOR_RT6160
help
This adds support for voltage regulator in Richtek RT6160.
This device automatically change voltage output mode from
- Buck or Boost. The mode transistion depend on the input source voltage.
+ Buck or Boost. The mode transition depend on the input source voltage.
The wide output range is from 2025mV to 5200mV and can be used on most
common application scenario.
@@ -1053,10 +1053,21 @@ config REGULATOR_RT6245
depends on I2C
select REGMAP_I2C
help
- This adds supprot for Richtek RT6245 voltage regulator.
+ This adds support for Richtek RT6245 voltage regulator.
It can support up to 14A output current and adjustable output voltage
from 0.4375V to 1.3875V, per step 12.5mV.
+config REGULATOR_RTQ2134
+ tristate "Richtek RTQ2134 SubPMIC Regulator"
+ depends on I2C
+ select REGMAP_I2C
+ help
+ This driver adds support for RTQ2134 SubPMIC regulators.
+ The RTQ2134 is a multi-phase, programmable power management IC that
+ integrate with four high efficient, synchronous step-down converter
+ cores. It features wide output voltage range and the capability to
+ configure the corresponding power stages.
+
config REGULATOR_RTMV20
tristate "Richtek RTMV20 Laser Diode Regulator"
depends on I2C
@@ -1066,6 +1077,15 @@ config REGULATOR_RTMV20
the Richtek RTMV20. It can support the load current up to 6A and
integrate strobe/vsync/fsin signal to synchronize the IR camera.
+config REGULATOR_RTQ6752
+ tristate "Richtek RTQ6752 TFT LCD voltage regulator"
+ depends on I2C
+ select REGMAP_I2C
+ help
+ This driver adds support for Richtek RTQ6752. RTQ6752 includes two
+ synchronous boost converters for PAVDD, and one synchronous NAVDD
+ buck-boost. This device is suitable for automotive TFT-LCD panel.
+
config REGULATOR_S2MPA01
tristate "Samsung S2MPA01 voltage regulator"
depends on MFD_SEC_CORE || COMPILE_TEST
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 8c2f82206b94..9e382b50a5ef 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -128,6 +128,8 @@ obj-$(CONFIG_REGULATOR_RT5033) += rt5033-regulator.o
obj-$(CONFIG_REGULATOR_RT6160) += rt6160-regulator.o
obj-$(CONFIG_REGULATOR_RT6245) += rt6245-regulator.o
obj-$(CONFIG_REGULATOR_RTMV20) += rtmv20-regulator.o
+obj-$(CONFIG_REGULATOR_RTQ2134) += rtq2134-regulator.o
+obj-$(CONFIG_REGULATOR_RTQ6752) += rtq6752-regulator.o
obj-$(CONFIG_REGULATOR_S2MPA01) += s2mpa01.o
obj-$(CONFIG_REGULATOR_S2MPS11) += s2mps11.o
obj-$(CONFIG_REGULATOR_S5M8767) += s5m8767.o
diff --git a/drivers/regulator/bd718x7-regulator.c b/drivers/regulator/bd718x7-regulator.c
index b1eb46961993..d60fccedb250 100644
--- a/drivers/regulator/bd718x7-regulator.c
+++ b/drivers/regulator/bd718x7-regulator.c
@@ -55,7 +55,8 @@
#define BD718XX_HWOPNAME(swopname) swopname##_hwcontrol
#define BD718XX_OPS(name, _list_voltage, _map_voltage, _set_voltage_sel, \
- _get_voltage_sel, _set_voltage_time_sel, _set_ramp_delay) \
+ _get_voltage_sel, _set_voltage_time_sel, _set_ramp_delay, \
+ _set_uvp, _set_ovp) \
static const struct regulator_ops name = { \
.enable = regulator_enable_regmap, \
.disable = regulator_disable_regmap, \
@@ -66,6 +67,8 @@ static const struct regulator_ops name = { \
.get_voltage_sel = (_get_voltage_sel), \
.set_voltage_time_sel = (_set_voltage_time_sel), \
.set_ramp_delay = (_set_ramp_delay), \
+ .set_under_voltage_protection = (_set_uvp), \
+ .set_over_voltage_protection = (_set_ovp), \
}; \
\
static const struct regulator_ops BD718XX_HWOPNAME(name) = { \
@@ -76,6 +79,8 @@ static const struct regulator_ops BD718XX_HWOPNAME(name) = { \
.get_voltage_sel = (_get_voltage_sel), \
.set_voltage_time_sel = (_set_voltage_time_sel), \
.set_ramp_delay = (_set_ramp_delay), \
+ .set_under_voltage_protection = (_set_uvp), \
+ .set_over_voltage_protection = (_set_ovp), \
} \
/*
@@ -154,17 +159,9 @@ static void voltage_change_done(struct regulator_dev *rdev, unsigned int sel,
* exceed it due to the scheduling.
*/
msleep(1);
- /*
- * Note for next hacker. The PWRGOOD should not be masked on
- * BD71847 so we will just unconditionally enable detection
- * when voltage is set.
- * If someone want's to disable PWRGOOD he must implement
- * caching and restoring the old value here. I am not
- * aware of such use-cases so for the sake of the simplicity
- * we just always enable PWRGOOD here.
- */
- ret = regmap_update_bits(rdev->regmap, BD718XX_REG_MVRFLTMASK2,
- *mask, 0);
+
+ ret = regmap_clear_bits(rdev->regmap, BD718XX_REG_MVRFLTMASK2,
+ *mask);
if (ret)
dev_err(&rdev->dev,
"Failed to re-enable voltage monitoring (%d)\n",
@@ -208,12 +205,27 @@ static int voltage_change_prepare(struct regulator_dev *rdev, unsigned int sel,
* time configurable.
*/
if (new > now) {
+ int tmp;
+ int prot_bit;
int ldo_offset = rdev->desc->id - BD718XX_LDO1;
- *mask = BD718XX_LDO1_VRMON80 << ldo_offset;
- ret = regmap_update_bits(rdev->regmap,
- BD718XX_REG_MVRFLTMASK2,
- *mask, *mask);
+ prot_bit = BD718XX_LDO1_VRMON80 << ldo_offset;
+ ret = regmap_read(rdev->regmap, BD718XX_REG_MVRFLTMASK2,
+ &tmp);
+ if (ret) {
+ dev_err(&rdev->dev,
+ "Failed to read voltage monitoring state\n");
+ return ret;
+ }
+
+ if (!(tmp & prot_bit)) {
+ /* We disable protection if it was enabled... */
+ ret = regmap_set_bits(rdev->regmap,
+ BD718XX_REG_MVRFLTMASK2,
+ prot_bit);
+ /* ...and we also want to re-enable it */
+ *mask = prot_bit;
+ }
if (ret) {
dev_err(&rdev->dev,
"Failed to stop voltage monitoring\n");
@@ -267,99 +279,6 @@ static int bd71837_set_voltage_sel_pickable_restricted(
}
/*
- * OPS common for BD71847 and BD71850
- */
-BD718XX_OPS(bd718xx_pickable_range_ldo_ops,
- regulator_list_voltage_pickable_linear_range, NULL,
- bd718xx_set_voltage_sel_pickable_restricted,
- regulator_get_voltage_sel_pickable_regmap, NULL, NULL);
-
-/* BD71847 and BD71850 LDO 5 is by default OFF at RUN state */
-static const struct regulator_ops bd718xx_ldo5_ops_hwstate = {
- .is_enabled = never_enabled_by_hwstate,
- .list_voltage = regulator_list_voltage_pickable_linear_range,
- .set_voltage_sel = bd718xx_set_voltage_sel_pickable_restricted,
- .get_voltage_sel = regulator_get_voltage_sel_pickable_regmap,
-};
-
-BD718XX_OPS(bd718xx_pickable_range_buck_ops,
- regulator_list_voltage_pickable_linear_range, NULL,
- regulator_set_voltage_sel_pickable_regmap,
- regulator_get_voltage_sel_pickable_regmap,
- regulator_set_voltage_time_sel, NULL);
-
-BD718XX_OPS(bd718xx_ldo_regulator_ops, regulator_list_voltage_linear_range,
- NULL, bd718xx_set_voltage_sel_restricted,
- regulator_get_voltage_sel_regmap, NULL, NULL);
-
-BD718XX_OPS(bd718xx_ldo_regulator_nolinear_ops, regulator_list_voltage_table,
- NULL, bd718xx_set_voltage_sel_restricted,
- regulator_get_voltage_sel_regmap, NULL, NULL);
-
-BD718XX_OPS(bd718xx_buck_regulator_ops, regulator_list_voltage_linear_range,
- NULL, regulator_set_voltage_sel_regmap,
- regulator_get_voltage_sel_regmap, regulator_set_voltage_time_sel,
- NULL);
-
-BD718XX_OPS(bd718xx_buck_regulator_nolinear_ops, regulator_list_voltage_table,
- regulator_map_voltage_ascend, regulator_set_voltage_sel_regmap,
- regulator_get_voltage_sel_regmap, regulator_set_voltage_time_sel,
- NULL);
-
-/*
- * OPS for BD71837
- */
-BD718XX_OPS(bd71837_pickable_range_ldo_ops,
- regulator_list_voltage_pickable_linear_range, NULL,
- bd71837_set_voltage_sel_pickable_restricted,
- regulator_get_voltage_sel_pickable_regmap, NULL, NULL);
-
-BD718XX_OPS(bd71837_pickable_range_buck_ops,
- regulator_list_voltage_pickable_linear_range, NULL,
- bd71837_set_voltage_sel_pickable_restricted,
- regulator_get_voltage_sel_pickable_regmap,
- regulator_set_voltage_time_sel, NULL);
-
-BD718XX_OPS(bd71837_ldo_regulator_ops, regulator_list_voltage_linear_range,
- NULL, bd71837_set_voltage_sel_restricted,
- regulator_get_voltage_sel_regmap, NULL, NULL);
-
-BD718XX_OPS(bd71837_ldo_regulator_nolinear_ops, regulator_list_voltage_table,
- NULL, bd71837_set_voltage_sel_restricted,
- regulator_get_voltage_sel_regmap, NULL, NULL);
-
-BD718XX_OPS(bd71837_buck_regulator_ops, regulator_list_voltage_linear_range,
- NULL, bd71837_set_voltage_sel_restricted,
- regulator_get_voltage_sel_regmap, regulator_set_voltage_time_sel,
- NULL);
-
-BD718XX_OPS(bd71837_buck_regulator_nolinear_ops, regulator_list_voltage_table,
- regulator_map_voltage_ascend, bd71837_set_voltage_sel_restricted,
- regulator_get_voltage_sel_regmap, regulator_set_voltage_time_sel,
- NULL);
-/*
- * BD71837 bucks 3 and 4 support defining their enable/disable state also
- * when buck enable state is under HW state machine control. In that case the
- * bit [2] in CTRL register is used to indicate if regulator should be ON.
- */
-static const struct regulator_ops bd71837_buck34_ops_hwctrl = {
- .is_enabled = bd71837_get_buck34_enable_hwctrl,
- .list_voltage = regulator_list_voltage_linear_range,
- .set_voltage_sel = regulator_set_voltage_sel_regmap,
- .get_voltage_sel = regulator_get_voltage_sel_regmap,
- .set_voltage_time_sel = regulator_set_voltage_time_sel,
- .set_ramp_delay = regulator_set_ramp_delay_regmap,
-};
-
-/*
- * OPS for all of the ICs - BD718(37/47/50)
- */
-BD718XX_OPS(bd718xx_dvs_buck_regulator_ops, regulator_list_voltage_linear_range,
- NULL, regulator_set_voltage_sel_regmap,
- regulator_get_voltage_sel_regmap, regulator_set_voltage_time_sel,
- /* bd718xx_buck1234_set_ramp_delay */ regulator_set_ramp_delay_regmap);
-
-/*
* BD71837 BUCK1/2/3/4
* BD71847 BUCK1/2
* 0.70 to 1.30V (10mV step)
@@ -536,6 +455,238 @@ struct bd718xx_regulator_data {
int additional_init_amnt;
};
+static int bd718x7_xvp_sanity_check(struct regulator_dev *rdev, int lim_uV,
+ int severity)
+{
+ /*
+ * BD71837/47/50 ... (ICs supported by this driver) do not provide
+ * warnings, only protection
+ */
+ if (severity != REGULATOR_SEVERITY_PROT) {
+ dev_err(&rdev->dev,
+ "Unsupported Under Voltage protection level\n");
+ return -EINVAL;
+ }
+
+ /*
+ * And protection limit is not changeable. It can only be enabled
+ * or disabled
+ */
+ if (lim_uV)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int bd718x7_set_ldo_uvp(struct regulator_dev *rdev, int lim_uV,
+ int severity, bool enable)
+{
+ int ldo_offset = rdev->desc->id - BD718XX_LDO1;
+ int prot_bit, ret;
+
+ ret = bd718x7_xvp_sanity_check(rdev, lim_uV, severity);
+ if (ret)
+ return ret;
+
+ prot_bit = BD718XX_LDO1_VRMON80 << ldo_offset;
+
+ if (enable)
+ return regmap_clear_bits(rdev->regmap, BD718XX_REG_MVRFLTMASK2,
+ prot_bit);
+
+ return regmap_set_bits(rdev->regmap, BD718XX_REG_MVRFLTMASK2,
+ prot_bit);
+}
+
+static int bd718x7_get_buck_prot_reg(int id, int *reg)
+{
+
+ if (id > BD718XX_BUCK8) {
+ WARN_ON(id > BD718XX_BUCK8);
+ return -EINVAL;
+ }
+
+ if (id > BD718XX_BUCK4)
+ *reg = BD718XX_REG_MVRFLTMASK0;
+ else
+ *reg = BD718XX_REG_MVRFLTMASK1;
+
+ return 0;
+}
+
+static int bd718x7_get_buck_ovp_info(int id, int *reg, int *bit)
+{
+ int ret;
+
+ ret = bd718x7_get_buck_prot_reg(id, reg);
+ if (ret)
+ return ret;
+
+ *bit = BIT((id % 4) * 2 + 1);
+
+ return 0;
+}
+
+static int bd718x7_get_buck_uvp_info(int id, int *reg, int *bit)
+{
+ int ret;
+
+ ret = bd718x7_get_buck_prot_reg(id, reg);
+ if (ret)
+ return ret;
+
+ *bit = BIT((id % 4) * 2);
+
+ return 0;
+}
+
+static int bd718x7_set_buck_uvp(struct regulator_dev *rdev, int lim_uV,
+ int severity, bool enable)
+{
+ int bit, reg, ret;
+
+ ret = bd718x7_xvp_sanity_check(rdev, lim_uV, severity);
+ if (ret)
+ return ret;
+
+ ret = bd718x7_get_buck_uvp_info(rdev->desc->id, &reg, &bit);
+ if (ret)
+ return ret;
+
+ if (enable)
+ return regmap_clear_bits(rdev->regmap, reg, bit);
+
+ return regmap_set_bits(rdev->regmap, reg, bit);
+
+}
+
+static int bd718x7_set_buck_ovp(struct regulator_dev *rdev, int lim_uV,
+ int severity,
+ bool enable)
+{
+ int bit, reg, ret;
+
+ ret = bd718x7_xvp_sanity_check(rdev, lim_uV, severity);
+ if (ret)
+ return ret;
+
+ ret = bd718x7_get_buck_ovp_info(rdev->desc->id, &reg, &bit);
+ if (ret)
+ return ret;
+
+ if (enable)
+ return regmap_clear_bits(rdev->regmap, reg, bit);
+
+ return regmap_set_bits(rdev->regmap, reg, bit);
+}
+
+/*
+ * OPS common for BD71847 and BD71850
+ */
+BD718XX_OPS(bd718xx_pickable_range_ldo_ops,
+ regulator_list_voltage_pickable_linear_range, NULL,
+ bd718xx_set_voltage_sel_pickable_restricted,
+ regulator_get_voltage_sel_pickable_regmap, NULL, NULL,
+ bd718x7_set_ldo_uvp, NULL);
+
+/* BD71847 and BD71850 LDO 5 is by default OFF at RUN state */
+static const struct regulator_ops bd718xx_ldo5_ops_hwstate = {
+ .is_enabled = never_enabled_by_hwstate,
+ .list_voltage = regulator_list_voltage_pickable_linear_range,
+ .set_voltage_sel = bd718xx_set_voltage_sel_pickable_restricted,
+ .get_voltage_sel = regulator_get_voltage_sel_pickable_regmap,
+ .set_under_voltage_protection = bd718x7_set_ldo_uvp,
+};
+
+BD718XX_OPS(bd718xx_pickable_range_buck_ops,
+ regulator_list_voltage_pickable_linear_range, NULL,
+ regulator_set_voltage_sel_pickable_regmap,
+ regulator_get_voltage_sel_pickable_regmap,
+ regulator_set_voltage_time_sel, NULL, bd718x7_set_buck_uvp,
+ bd718x7_set_buck_ovp);
+
+BD718XX_OPS(bd718xx_ldo_regulator_ops, regulator_list_voltage_linear_range,
+ NULL, bd718xx_set_voltage_sel_restricted,
+ regulator_get_voltage_sel_regmap, NULL, NULL, bd718x7_set_ldo_uvp,
+ NULL);
+
+BD718XX_OPS(bd718xx_ldo_regulator_nolinear_ops, regulator_list_voltage_table,
+ NULL, bd718xx_set_voltage_sel_restricted,
+ regulator_get_voltage_sel_regmap, NULL, NULL, bd718x7_set_ldo_uvp,
+ NULL);
+
+BD718XX_OPS(bd718xx_buck_regulator_ops, regulator_list_voltage_linear_range,
+ NULL, regulator_set_voltage_sel_regmap,
+ regulator_get_voltage_sel_regmap, regulator_set_voltage_time_sel,
+ NULL, bd718x7_set_buck_uvp, bd718x7_set_buck_ovp);
+
+BD718XX_OPS(bd718xx_buck_regulator_nolinear_ops, regulator_list_voltage_table,
+ regulator_map_voltage_ascend, regulator_set_voltage_sel_regmap,
+ regulator_get_voltage_sel_regmap, regulator_set_voltage_time_sel,
+ NULL, bd718x7_set_buck_uvp, bd718x7_set_buck_ovp);
+
+/*
+ * OPS for BD71837
+ */
+BD718XX_OPS(bd71837_pickable_range_ldo_ops,
+ regulator_list_voltage_pickable_linear_range, NULL,
+ bd71837_set_voltage_sel_pickable_restricted,
+ regulator_get_voltage_sel_pickable_regmap, NULL, NULL,
+ bd718x7_set_ldo_uvp, NULL);
+
+BD718XX_OPS(bd71837_pickable_range_buck_ops,
+ regulator_list_voltage_pickable_linear_range, NULL,
+ bd71837_set_voltage_sel_pickable_restricted,
+ regulator_get_voltage_sel_pickable_regmap,
+ regulator_set_voltage_time_sel, NULL, bd718x7_set_buck_uvp,
+ bd718x7_set_buck_ovp);
+
+BD718XX_OPS(bd71837_ldo_regulator_ops, regulator_list_voltage_linear_range,
+ NULL, bd71837_set_voltage_sel_restricted,
+ regulator_get_voltage_sel_regmap, NULL, NULL, bd718x7_set_ldo_uvp,
+ NULL);
+
+BD718XX_OPS(bd71837_ldo_regulator_nolinear_ops, regulator_list_voltage_table,
+ NULL, bd71837_set_voltage_sel_restricted,
+ regulator_get_voltage_sel_regmap, NULL, NULL, bd718x7_set_ldo_uvp,
+ NULL);
+
+BD718XX_OPS(bd71837_buck_regulator_ops, regulator_list_voltage_linear_range,
+ NULL, bd71837_set_voltage_sel_restricted,
+ regulator_get_voltage_sel_regmap, regulator_set_voltage_time_sel,
+ NULL, bd718x7_set_buck_uvp, bd718x7_set_buck_ovp);
+
+BD718XX_OPS(bd71837_buck_regulator_nolinear_ops, regulator_list_voltage_table,
+ regulator_map_voltage_ascend, bd71837_set_voltage_sel_restricted,
+ regulator_get_voltage_sel_regmap, regulator_set_voltage_time_sel,
+ NULL, bd718x7_set_buck_uvp, bd718x7_set_buck_ovp);
+/*
+ * BD71837 bucks 3 and 4 support defining their enable/disable state also
+ * when buck enable state is under HW state machine control. In that case the
+ * bit [2] in CTRL register is used to indicate if regulator should be ON.
+ */
+static const struct regulator_ops bd71837_buck34_ops_hwctrl = {
+ .is_enabled = bd71837_get_buck34_enable_hwctrl,
+ .list_voltage = regulator_list_voltage_linear_range,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
+ .get_voltage_sel = regulator_get_voltage_sel_regmap,
+ .set_voltage_time_sel = regulator_set_voltage_time_sel,
+ .set_ramp_delay = regulator_set_ramp_delay_regmap,
+ .set_under_voltage_protection = bd718x7_set_buck_uvp,
+ .set_over_voltage_protection = bd718x7_set_buck_ovp,
+};
+
+/*
+ * OPS for all of the ICs - BD718(37/47/50)
+ */
+BD718XX_OPS(bd718xx_dvs_buck_regulator_ops, regulator_list_voltage_linear_range,
+ NULL, regulator_set_voltage_sel_regmap,
+ regulator_get_voltage_sel_regmap, regulator_set_voltage_time_sel,
+ regulator_set_ramp_delay_regmap, bd718x7_set_buck_uvp,
+ bd718x7_set_buck_ovp);
+
+
+
/*
* There is a HW quirk in BD71837. The shutdown sequence timings for
* bucks/LDOs which are controlled via register interface are changed.
diff --git a/drivers/regulator/da9063-regulator.c b/drivers/regulator/da9063-regulator.c
index cf7d5341750e..82f52a2a031a 100644
--- a/drivers/regulator/da9063-regulator.c
+++ b/drivers/regulator/da9063-regulator.c
@@ -412,6 +412,134 @@ static int da9063_ldo_set_suspend_mode(struct regulator_dev *rdev,
return regmap_field_write(regl->suspend_sleep, val);
}
+static unsigned int da9063_get_overdrive_mask(const struct regulator_desc *desc)
+{
+ switch (desc->id) {
+ case DA9063_ID_BCORES_MERGED:
+ case DA9063_ID_BCORE1:
+ return DA9063_BCORE1_OD;
+ case DA9063_ID_BCORE2:
+ return DA9063_BCORE2_OD;
+ case DA9063_ID_BPRO:
+ return DA9063_BPRO_OD;
+ default:
+ return 0;
+ }
+}
+
+static int da9063_buck_set_limit_set_overdrive(struct regulator_dev *rdev,
+ int min_uA, int max_uA,
+ unsigned int overdrive_mask)
+{
+ /*
+ * When enabling overdrive, do it before changing the current limit to
+ * ensure sufficient supply throughout the switch.
+ */
+ struct da9063_regulator *regl = rdev_get_drvdata(rdev);
+ int ret;
+ unsigned int orig_overdrive;
+
+ ret = regmap_read(regl->hw->regmap, DA9063_REG_CONFIG_H,
+ &orig_overdrive);
+ if (ret < 0)
+ return ret;
+ orig_overdrive &= overdrive_mask;
+
+ if (orig_overdrive == 0) {
+ ret = regmap_set_bits(regl->hw->regmap, DA9063_REG_CONFIG_H,
+ overdrive_mask);
+ if (ret < 0)
+ return ret;
+ }
+
+ ret = regulator_set_current_limit_regmap(rdev, min_uA / 2, max_uA / 2);
+ if (ret < 0 && orig_overdrive == 0)
+ /*
+ * regulator_set_current_limit_regmap may have rejected the
+ * change because of unusable min_uA and/or max_uA inputs.
+ * Attempt to restore original overdrive state, ignore failure-
+ * on-failure.
+ */
+ regmap_clear_bits(regl->hw->regmap, DA9063_REG_CONFIG_H,
+ overdrive_mask);
+
+ return ret;
+}
+
+static int da9063_buck_set_limit_clear_overdrive(struct regulator_dev *rdev,
+ int min_uA, int max_uA,
+ unsigned int overdrive_mask)
+{
+ /*
+ * When disabling overdrive, do it after changing the current limit to
+ * ensure sufficient supply throughout the switch.
+ */
+ struct da9063_regulator *regl = rdev_get_drvdata(rdev);
+ int ret, orig_limit;
+
+ ret = regmap_read(rdev->regmap, rdev->desc->csel_reg, &orig_limit);
+ if (ret < 0)
+ return ret;
+
+ ret = regulator_set_current_limit_regmap(rdev, min_uA, max_uA);
+ if (ret < 0)
+ return ret;
+
+ ret = regmap_clear_bits(regl->hw->regmap, DA9063_REG_CONFIG_H,
+ overdrive_mask);
+ if (ret < 0)
+ /*
+ * Attempt to restore original current limit, ignore failure-
+ * on-failure.
+ */
+ regmap_write(rdev->regmap, rdev->desc->csel_reg, orig_limit);
+
+ return ret;
+}
+
+static int da9063_buck_set_current_limit(struct regulator_dev *rdev,
+ int min_uA, int max_uA)
+{
+ unsigned int overdrive_mask, n_currents;
+
+ overdrive_mask = da9063_get_overdrive_mask(rdev->desc);
+ if (overdrive_mask) {
+ n_currents = rdev->desc->n_current_limits;
+ if (n_currents == 0)
+ return -EINVAL;
+
+ if (max_uA > rdev->desc->curr_table[n_currents - 1])
+ return da9063_buck_set_limit_set_overdrive(rdev, min_uA,
+ max_uA,
+ overdrive_mask);
+
+ return da9063_buck_set_limit_clear_overdrive(rdev, min_uA,
+ max_uA,
+ overdrive_mask);
+ }
+ return regulator_set_current_limit_regmap(rdev, min_uA, max_uA);
+}
+
+static int da9063_buck_get_current_limit(struct regulator_dev *rdev)
+{
+ struct da9063_regulator *regl = rdev_get_drvdata(rdev);
+ int val, ret, limit;
+ unsigned int mask;
+
+ limit = regulator_get_current_limit_regmap(rdev);
+ if (limit < 0)
+ return limit;
+ mask = da9063_get_overdrive_mask(rdev->desc);
+ if (mask) {
+ ret = regmap_read(regl->hw->regmap, DA9063_REG_CONFIG_H, &val);
+ if (ret < 0)
+ return ret;
+ if (val & mask)
+ limit *= 2;
+ }
+ return limit;
+}
+
static const struct regulator_ops da9063_buck_ops = {
.enable = regulator_enable_regmap,
.disable = regulator_disable_regmap,
@@ -419,8 +547,8 @@ static const struct regulator_ops da9063_buck_ops = {
.get_voltage_sel = regulator_get_voltage_sel_regmap,
.set_voltage_sel = regulator_set_voltage_sel_regmap,
.list_voltage = regulator_list_voltage_linear,
- .set_current_limit = regulator_set_current_limit_regmap,
- .get_current_limit = regulator_get_current_limit_regmap,
+ .set_current_limit = da9063_buck_set_current_limit,
+ .get_current_limit = da9063_buck_get_current_limit,
.set_mode = da9063_buck_set_mode,
.get_mode = da9063_buck_get_mode,
.get_status = da9063_buck_get_status,
diff --git a/drivers/regulator/dbx500-prcmu.c b/drivers/regulator/dbx500-prcmu.c
index 8b70bfe88019..a45c1e1ac7ef 100644
--- a/drivers/regulator/dbx500-prcmu.c
+++ b/drivers/regulator/dbx500-prcmu.c
@@ -117,11 +117,11 @@ ux500_regulator_debug_init(struct platform_device *pdev,
rdebug.dir = debugfs_create_dir("ux500-regulator", NULL);
/* create "status" file */
- debugfs_create_file("status", S_IRUGO, rdebug.dir, &pdev->dev,
+ debugfs_create_file("status", 0444, rdebug.dir, &pdev->dev,
&ux500_regulator_status_fops);
/* create "power-state-count" file */
- debugfs_create_file("power-state-count", S_IRUGO, rdebug.dir,
+ debugfs_create_file("power-state-count", 0444, rdebug.dir,
&pdev->dev, &ux500_regulator_power_state_cnt_fops);
rdebug.regulator_array = regulator_info;
diff --git a/drivers/regulator/devres.c b/drivers/regulator/devres.c
index a8de0aa88bad..9113233f41cd 100644
--- a/drivers/regulator/devres.c
+++ b/drivers/regulator/devres.c
@@ -205,35 +205,6 @@ struct regulator_dev *devm_regulator_register(struct device *dev,
}
EXPORT_SYMBOL_GPL(devm_regulator_register);
-static int devm_rdev_match(struct device *dev, void *res, void *data)
-{
- struct regulator_dev **r = res;
- if (!r || !*r) {
- WARN_ON(!r || !*r);
- return 0;
- }
- return *r == data;
-}
-
-/**
- * devm_regulator_unregister - Resource managed regulator_unregister()
- * @dev: device to supply
- * @rdev: regulator to free
- *
- * Unregister a regulator registered with devm_regulator_register().
- * Normally this function will not need to be called and the resource
- * management code will ensure that the resource is freed.
- */
-void devm_regulator_unregister(struct device *dev, struct regulator_dev *rdev)
-{
- int rc;
-
- rc = devres_release(dev, devm_rdev_release, devm_rdev_match, rdev);
- if (rc != 0)
- WARN_ON(rc);
-}
-EXPORT_SYMBOL_GPL(devm_regulator_unregister);
-
struct regulator_supply_alias_match {
struct device *dev;
const char *id;
@@ -296,19 +267,8 @@ int devm_regulator_register_supply_alias(struct device *dev, const char *id,
}
EXPORT_SYMBOL_GPL(devm_regulator_register_supply_alias);
-/**
- * devm_regulator_unregister_supply_alias - Resource managed
- * regulator_unregister_supply_alias()
- *
- * @dev: device to supply
- * @id: supply name or regulator ID
- *
- * Unregister an alias registered with
- * devm_regulator_register_supply_alias(). Normally this function
- * will not need to be called and the resource management code
- * will ensure that the resource is freed.
- */
-void devm_regulator_unregister_supply_alias(struct device *dev, const char *id)
+static void devm_regulator_unregister_supply_alias(struct device *dev,
+ const char *id)
{
struct regulator_supply_alias_match match;
int rc;
@@ -321,7 +281,6 @@ void devm_regulator_unregister_supply_alias(struct device *dev, const char *id)
if (rc != 0)
WARN_ON(rc);
}
-EXPORT_SYMBOL_GPL(devm_regulator_unregister_supply_alias);
/**
* devm_regulator_bulk_register_supply_alias - Managed register
@@ -373,30 +332,6 @@ err:
}
EXPORT_SYMBOL_GPL(devm_regulator_bulk_register_supply_alias);
-/**
- * devm_regulator_bulk_unregister_supply_alias - Managed unregister
- * multiple aliases
- *
- * @dev: device to supply
- * @id: list of supply names or regulator IDs
- * @num_id: number of aliases to unregister
- *
- * Unregister aliases registered with
- * devm_regulator_bulk_register_supply_alias(). Normally this function
- * will not need to be called and the resource management code
- * will ensure that the resource is freed.
- */
-void devm_regulator_bulk_unregister_supply_alias(struct device *dev,
- const char *const *id,
- int num_id)
-{
- int i;
-
- for (i = 0; i < num_id; ++i)
- devm_regulator_unregister_supply_alias(dev, id[i]);
-}
-EXPORT_SYMBOL_GPL(devm_regulator_bulk_unregister_supply_alias);
-
struct regulator_notifier_match {
struct regulator *regulator;
struct notifier_block *nb;
diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index 39284610a536..599ad201dca7 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -287,8 +287,9 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
drvdata->dev = devm_regulator_register(&pdev->dev, &drvdata->desc,
&cfg);
if (IS_ERR(drvdata->dev)) {
- ret = PTR_ERR(drvdata->dev);
- dev_err(&pdev->dev, "Failed to register regulator: %d\n", ret);
+ ret = dev_err_probe(&pdev->dev, PTR_ERR(drvdata->dev),
+ "Failed to register regulator: %ld\n",
+ PTR_ERR(drvdata->dev));
return ret;
}
diff --git a/drivers/regulator/hi6421v600-regulator.c b/drivers/regulator/hi6421v600-regulator.c
index 845bc3b4026d..662d87ae61cb 100644
--- a/drivers/regulator/hi6421v600-regulator.c
+++ b/drivers/regulator/hi6421v600-regulator.c
@@ -4,7 +4,7 @@
//
// Copyright (c) 2013 Linaro Ltd.
// Copyright (c) 2011 HiSilicon Ltd.
-// Copyright (c) 2020-2021 Huawei Technologies Co., Ltd
+// Copyright (c) 2020-2021 Huawei Technologies Co., Ltd.
//
// Guodong Xu <guodong.xu@linaro.org>
@@ -27,34 +27,34 @@ struct hi6421_spmi_reg_info {
u32 eco_uA;
};
-static const unsigned int ldo3_voltages[] = {
+static const unsigned int range_1v5_to_2v0[] = {
1500000, 1550000, 1600000, 1650000,
1700000, 1725000, 1750000, 1775000,
1800000, 1825000, 1850000, 1875000,
1900000, 1925000, 1950000, 2000000
};
-static const unsigned int ldo4_voltages[] = {
+static const unsigned int range_1v725_to_1v9[] = {
1725000, 1750000, 1775000, 1800000,
1825000, 1850000, 1875000, 1900000
};
-static const unsigned int ldo9_voltages[] = {
+static const unsigned int range_1v75_to_3v3[] = {
1750000, 1800000, 1825000, 2800000,
2850000, 2950000, 3000000, 3300000
};
-static const unsigned int ldo15_voltages[] = {
+static const unsigned int range_1v8_to_3v0[] = {
1800000, 1850000, 2400000, 2600000,
2700000, 2850000, 2950000, 3000000
};
-static const unsigned int ldo17_voltages[] = {
+static const unsigned int range_2v5_to_3v3[] = {
2500000, 2600000, 2700000, 2800000,
3000000, 3100000, 3200000, 3300000
};
-static const unsigned int ldo34_voltages[] = {
+static const unsigned int range_2v6_to_3v3[] = {
2600000, 2700000, 2800000, 2900000,
3000000, 3100000, 3200000, 3300000
};
@@ -73,14 +73,14 @@ static const unsigned int ldo34_voltages[] = {
*/
#define HI6421V600_LDO(_id, vtable, ereg, emask, vreg, \
odelay, etime, ecomask, ecoamp) \
- [HI6421V600_##_id] = { \
+ [hi6421v600_##_id] = { \
.desc = { \
.name = #_id, \
.of_match = of_match_ptr(#_id), \
.regulators_node = of_match_ptr("regulators"), \
.ops = &hi6421_spmi_ldo_rops, \
.type = REGULATOR_VOLTAGE, \
- .id = HI6421V600_##_id, \
+ .id = hi6421v600_##_id, \
.owner = THIS_MODULE, \
.volt_table = vtable, \
.n_voltages = ARRAY_SIZE(vtable), \
@@ -185,46 +185,46 @@ static const struct regulator_ops hi6421_spmi_ldo_rops = {
/* HI6421v600 regulators with known registers */
enum hi6421_spmi_regulator_id {
- HI6421V600_LDO3,
- HI6421V600_LDO4,
- HI6421V600_LDO9,
- HI6421V600_LDO15,
- HI6421V600_LDO16,
- HI6421V600_LDO17,
- HI6421V600_LDO33,
- HI6421V600_LDO34,
+ hi6421v600_ldo3,
+ hi6421v600_ldo4,
+ hi6421v600_ldo9,
+ hi6421v600_ldo15,
+ hi6421v600_ldo16,
+ hi6421v600_ldo17,
+ hi6421v600_ldo33,
+ hi6421v600_ldo34,
};
static struct hi6421_spmi_reg_info regulator_info[] = {
- HI6421V600_LDO(LDO3, ldo3_voltages,
+ HI6421V600_LDO(ldo3, range_1v5_to_2v0,
0x16, 0x01, 0x51,
20000, 120,
0, 0),
- HI6421V600_LDO(LDO4, ldo4_voltages,
+ HI6421V600_LDO(ldo4, range_1v725_to_1v9,
0x17, 0x01, 0x52,
20000, 120,
0x10, 10000),
- HI6421V600_LDO(LDO9, ldo9_voltages,
+ HI6421V600_LDO(ldo9, range_1v75_to_3v3,
0x1c, 0x01, 0x57,
20000, 360,
0x10, 10000),
- HI6421V600_LDO(LDO15, ldo15_voltages,
+ HI6421V600_LDO(ldo15, range_1v8_to_3v0,
0x21, 0x01, 0x5c,
20000, 360,
0x10, 10000),
- HI6421V600_LDO(LDO16, ldo15_voltages,
+ HI6421V600_LDO(ldo16, range_1v8_to_3v0,
0x22, 0x01, 0x5d,
20000, 360,
0x10, 10000),
- HI6421V600_LDO(LDO17, ldo17_voltages,
+ HI6421V600_LDO(ldo17, range_2v5_to_3v3,
0x23, 0x01, 0x5e,
20000, 120,
0x10, 10000),
- HI6421V600_LDO(LDO33, ldo17_voltages,
+ HI6421V600_LDO(ldo33, range_2v5_to_3v3,
0x32, 0x01, 0x6d,
20000, 120,
0, 0),
- HI6421V600_LDO(LDO34, ldo34_voltages,
+ HI6421V600_LDO(ldo34, range_2v6_to_3v3,
0x33, 0x01, 0x6e,
20000, 120,
0, 0),
diff --git a/drivers/regulator/irq_helpers.c b/drivers/regulator/irq_helpers.c
index fabe2e53093e..522764435575 100644
--- a/drivers/regulator/irq_helpers.c
+++ b/drivers/regulator/irq_helpers.c
@@ -184,7 +184,7 @@ static irqreturn_t regulator_notifier_isr(int irq, void *data)
* If retry_count exceeds the given safety limit we call IC specific die
* handler which can try disabling regulator(s).
*
- * If no die handler is given we will just bug() as a last resort.
+ * If no die handler is given we will just power-off as a last resort.
*
* We could try disabling all associated rdevs - but we might shoot
* ourselves in the head and leave the problematic regulator enabled. So
diff --git a/drivers/regulator/mt6358-regulator.c b/drivers/regulator/mt6358-regulator.c
index 0d35be4e0e5a..eb8027813b99 100644
--- a/drivers/regulator/mt6358-regulator.c
+++ b/drivers/regulator/mt6358-regulator.c
@@ -28,18 +28,15 @@ struct mt6358_regulator_info {
u32 qi;
const u32 *index_table;
unsigned int n_table;
- u32 vsel_shift;
u32 da_vsel_reg;
u32 da_vsel_mask;
- u32 da_vsel_shift;
u32 modeset_reg;
u32 modeset_mask;
- u32 modeset_shift;
};
#define MT6358_BUCK(match, vreg, min, max, step, \
volt_ranges, vosel_mask, _da_vsel_reg, _da_vsel_mask, \
- _da_vsel_shift, _modeset_reg, _modeset_shift) \
+ _modeset_reg, _modeset_shift) \
[MT6358_ID_##vreg] = { \
.desc = { \
.name = #vreg, \
@@ -61,15 +58,13 @@ struct mt6358_regulator_info {
.qi = BIT(0), \
.da_vsel_reg = _da_vsel_reg, \
.da_vsel_mask = _da_vsel_mask, \
- .da_vsel_shift = _da_vsel_shift, \
.modeset_reg = _modeset_reg, \
.modeset_mask = BIT(_modeset_shift), \
- .modeset_shift = _modeset_shift \
}
#define MT6358_LDO(match, vreg, ldo_volt_table, \
ldo_index_table, enreg, enbit, vosel, \
- vosel_mask, vosel_shift) \
+ vosel_mask) \
[MT6358_ID_##vreg] = { \
.desc = { \
.name = #vreg, \
@@ -89,12 +84,11 @@ struct mt6358_regulator_info {
.qi = BIT(15), \
.index_table = ldo_index_table, \
.n_table = ARRAY_SIZE(ldo_index_table), \
- .vsel_shift = vosel_shift, \
}
#define MT6358_LDO1(match, vreg, min, max, step, \
volt_ranges, _da_vsel_reg, _da_vsel_mask, \
- _da_vsel_shift, vosel, vosel_mask) \
+ vosel, vosel_mask) \
[MT6358_ID_##vreg] = { \
.desc = { \
.name = #vreg, \
@@ -113,7 +107,6 @@ struct mt6358_regulator_info {
}, \
.da_vsel_reg = _da_vsel_reg, \
.da_vsel_mask = _da_vsel_mask, \
- .da_vsel_shift = _da_vsel_shift, \
.status_reg = MT6358_LDO_##vreg##_DBG1, \
.qi = BIT(0), \
}
@@ -260,9 +253,9 @@ static int mt6358_set_voltage_sel(struct regulator_dev *rdev,
pvol = info->index_table;
idx = pvol[selector];
+ idx <<= ffs(info->desc.vsel_mask) - 1;
ret = regmap_update_bits(rdev->regmap, info->desc.vsel_reg,
- info->desc.vsel_mask,
- idx << info->vsel_shift);
+ info->desc.vsel_mask, idx);
return ret;
}
@@ -282,7 +275,8 @@ static int mt6358_get_voltage_sel(struct regulator_dev *rdev)
return ret;
}
- selector = (selector & info->desc.vsel_mask) >> info->vsel_shift;
+ selector = (selector & info->desc.vsel_mask) >>
+ (ffs(info->desc.vsel_mask) - 1);
pvol = info->index_table;
for (idx = 0; idx < info->desc.n_voltages; idx++) {
if (pvol[idx] == selector)
@@ -305,7 +299,7 @@ static int mt6358_get_buck_voltage_sel(struct regulator_dev *rdev)
return ret;
}
- ret = (regval >> info->da_vsel_shift) & info->da_vsel_mask;
+ ret = (regval & info->da_vsel_mask) >> (ffs(info->da_vsel_mask) - 1);
return ret;
}
@@ -342,11 +336,10 @@ static int mt6358_regulator_set_mode(struct regulator_dev *rdev,
return -EINVAL;
}
- dev_dbg(&rdev->dev, "mt6358 buck set_mode %#x, %#x, %#x, %#x\n",
- info->modeset_reg, info->modeset_mask,
- info->modeset_shift, val);
+ dev_dbg(&rdev->dev, "mt6358 buck set_mode %#x, %#x, %#x\n",
+ info->modeset_reg, info->modeset_mask, val);
- val <<= info->modeset_shift;
+ val <<= ffs(info->modeset_mask) - 1;
return regmap_update_bits(rdev->regmap, info->modeset_reg,
info->modeset_mask, val);
@@ -364,7 +357,7 @@ static unsigned int mt6358_regulator_get_mode(struct regulator_dev *rdev)
return ret;
}
- switch ((regval & info->modeset_mask) >> info->modeset_shift) {
+ switch ((regval & info->modeset_mask) >> (ffs(info->modeset_mask) - 1)) {
case MT6358_BUCK_MODE_AUTO:
return REGULATOR_MODE_NORMAL;
case MT6358_BUCK_MODE_FORCE_PWM:
@@ -412,30 +405,30 @@ static const struct regulator_ops mt6358_volt_fixed_ops = {
static struct mt6358_regulator_info mt6358_regulators[] = {
MT6358_BUCK("buck_vdram1", VDRAM1, 500000, 2087500, 12500,
buck_volt_range2, 0x7f, MT6358_BUCK_VDRAM1_DBG0, 0x7f,
- 0, MT6358_VDRAM1_ANA_CON0, 8),
+ MT6358_VDRAM1_ANA_CON0, 8),
MT6358_BUCK("buck_vcore", VCORE, 500000, 1293750, 6250,
buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_DBG0, 0x7f,
- 0, MT6358_VCORE_VGPU_ANA_CON0, 1),
+ MT6358_VCORE_VGPU_ANA_CON0, 1),
MT6358_BUCK("buck_vpa", VPA, 500000, 3650000, 50000,
- buck_volt_range3, 0x3f, MT6358_BUCK_VPA_DBG0, 0x3f, 0,
+ buck_volt_range3, 0x3f, MT6358_BUCK_VPA_DBG0, 0x3f,
MT6358_VPA_ANA_CON0, 3),
MT6358_BUCK("buck_vproc11", VPROC11, 500000, 1293750, 6250,
buck_volt_range1, 0x7f, MT6358_BUCK_VPROC11_DBG0, 0x7f,
- 0, MT6358_VPROC_ANA_CON0, 1),
+ MT6358_VPROC_ANA_CON0, 1),
MT6358_BUCK("buck_vproc12", VPROC12, 500000, 1293750, 6250,
buck_volt_range1, 0x7f, MT6358_BUCK_VPROC12_DBG0, 0x7f,
- 0, MT6358_VPROC_ANA_CON0, 2),
+ MT6358_VPROC_ANA_CON0, 2),
MT6358_BUCK("buck_vgpu", VGPU, 500000, 1293750, 6250,
- buck_volt_range1, 0x7f, MT6358_BUCK_VGPU_ELR0, 0x7f, 0,
+ buck_volt_range1, 0x7f, MT6358_BUCK_VGPU_ELR0, 0x7f,
MT6358_VCORE_VGPU_ANA_CON0, 2),
MT6358_BUCK("buck_vs2", VS2, 500000, 2087500, 12500,
- buck_volt_range2, 0x7f, MT6358_BUCK_VS2_DBG0, 0x7f, 0,
+ buck_volt_range2, 0x7f, MT6358_BUCK_VS2_DBG0, 0x7f,
MT6358_VS2_ANA_CON0, 8),
MT6358_BUCK("buck_vmodem", VMODEM, 500000, 1293750, 6250,
buck_volt_range1, 0x7f, MT6358_BUCK_VMODEM_DBG0, 0x7f,
- 0, MT6358_VMODEM_ANA_CON0, 8),
+ MT6358_VMODEM_ANA_CON0, 8),
MT6358_BUCK("buck_vs1", VS1, 1000000, 2587500, 12500,
- buck_volt_range4, 0x7f, MT6358_BUCK_VS1_DBG0, 0x7f, 0,
+ buck_volt_range4, 0x7f, MT6358_BUCK_VS1_DBG0, 0x7f,
MT6358_VS1_ANA_CON0, 8),
MT6358_REG_FIXED("ldo_vrf12", VRF12,
MT6358_LDO_VRF12_CON0, 0, 1200000),
@@ -457,49 +450,49 @@ static struct mt6358_regulator_info mt6358_regulators[] = {
MT6358_REG_FIXED("ldo_vaud28", VAUD28,
MT6358_LDO_VAUD28_CON0, 0, 2800000),
MT6358_LDO("ldo_vdram2", VDRAM2, vdram2_voltages, vdram2_idx,
- MT6358_LDO_VDRAM2_CON0, 0, MT6358_LDO_VDRAM2_ELR0, 0xf, 0),
+ MT6358_LDO_VDRAM2_CON0, 0, MT6358_LDO_VDRAM2_ELR0, 0xf),
MT6358_LDO("ldo_vsim1", VSIM1, vsim_voltages, vsim_idx,
- MT6358_LDO_VSIM1_CON0, 0, MT6358_VSIM1_ANA_CON0, 0xf00, 8),
+ MT6358_LDO_VSIM1_CON0, 0, MT6358_VSIM1_ANA_CON0, 0xf00),
MT6358_LDO("ldo_vibr", VIBR, vibr_voltages, vibr_idx,
- MT6358_LDO_VIBR_CON0, 0, MT6358_VIBR_ANA_CON0, 0xf00, 8),
+ MT6358_LDO_VIBR_CON0, 0, MT6358_VIBR_ANA_CON0, 0xf00),
MT6358_LDO("ldo_vusb", VUSB, vusb_voltages, vusb_idx,
- MT6358_LDO_VUSB_CON0_0, 0, MT6358_VUSB_ANA_CON0, 0x700, 8),
+ MT6358_LDO_VUSB_CON0_0, 0, MT6358_VUSB_ANA_CON0, 0x700),
MT6358_LDO("ldo_vcamd", VCAMD, vcamd_voltages, vcamd_idx,
- MT6358_LDO_VCAMD_CON0, 0, MT6358_VCAMD_ANA_CON0, 0xf00, 8),
+ MT6358_LDO_VCAMD_CON0, 0, MT6358_VCAMD_ANA_CON0, 0xf00),
MT6358_LDO("ldo_vefuse", VEFUSE, vefuse_voltages, vefuse_idx,
- MT6358_LDO_VEFUSE_CON0, 0, MT6358_VEFUSE_ANA_CON0, 0xf00, 8),
+ MT6358_LDO_VEFUSE_CON0, 0, MT6358_VEFUSE_ANA_CON0, 0xf00),
MT6358_LDO("ldo_vmch", VMCH, vmch_vemc_voltages, vmch_vemc_idx,
- MT6358_LDO_VMCH_CON0, 0, MT6358_VMCH_ANA_CON0, 0x700, 8),
+ MT6358_LDO_VMCH_CON0, 0, MT6358_VMCH_ANA_CON0, 0x700),
MT6358_LDO("ldo_vcama1", VCAMA1, vcama_voltages, vcama_idx,
- MT6358_LDO_VCAMA1_CON0, 0, MT6358_VCAMA1_ANA_CON0, 0xf00, 8),
+ MT6358_LDO_VCAMA1_CON0, 0, MT6358_VCAMA1_ANA_CON0, 0xf00),
MT6358_LDO("ldo_vemc", VEMC, vmch_vemc_voltages, vmch_vemc_idx,
- MT6358_LDO_VEMC_CON0, 0, MT6358_VEMC_ANA_CON0, 0x700, 8),
+ MT6358_LDO_VEMC_CON0, 0, MT6358_VEMC_ANA_CON0, 0x700),
MT6358_LDO("ldo_vcn33_bt", VCN33_BT, vcn33_bt_wifi_voltages,
vcn33_bt_wifi_idx, MT6358_LDO_VCN33_CON0_0,
- 0, MT6358_VCN33_ANA_CON0, 0x300, 8),
+ 0, MT6358_VCN33_ANA_CON0, 0x300),
MT6358_LDO("ldo_vcn33_wifi", VCN33_WIFI, vcn33_bt_wifi_voltages,
vcn33_bt_wifi_idx, MT6358_LDO_VCN33_CON0_1,
- 0, MT6358_VCN33_ANA_CON0, 0x300, 8),
+ 0, MT6358_VCN33_ANA_CON0, 0x300),
MT6358_LDO("ldo_vcama2", VCAMA2, vcama_voltages, vcama_idx,
- MT6358_LDO_VCAMA2_CON0, 0, MT6358_VCAMA2_ANA_CON0, 0xf00, 8),
+ MT6358_LDO_VCAMA2_CON0, 0, MT6358_VCAMA2_ANA_CON0, 0xf00),
MT6358_LDO("ldo_vmc", VMC, vmc_voltages, vmc_idx,
- MT6358_LDO_VMC_CON0, 0, MT6358_VMC_ANA_CON0, 0xf00, 8),
+ MT6358_LDO_VMC_CON0, 0, MT6358_VMC_ANA_CON0, 0xf00),
MT6358_LDO("ldo_vldo28", VLDO28, vldo28_voltages, vldo28_idx,
MT6358_LDO_VLDO28_CON0_0, 0,
- MT6358_VLDO28_ANA_CON0, 0x300, 8),
+ MT6358_VLDO28_ANA_CON0, 0x300),
MT6358_LDO("ldo_vsim2", VSIM2, vsim_voltages, vsim_idx,
- MT6358_LDO_VSIM2_CON0, 0, MT6358_VSIM2_ANA_CON0, 0xf00, 8),
+ MT6358_LDO_VSIM2_CON0, 0, MT6358_VSIM2_ANA_CON0, 0xf00),
MT6358_LDO1("ldo_vsram_proc11", VSRAM_PROC11, 500000, 1293750, 6250,
- buck_volt_range1, MT6358_LDO_VSRAM_PROC11_DBG0, 0x7f, 8,
+ buck_volt_range1, MT6358_LDO_VSRAM_PROC11_DBG0, 0x7f00,
MT6358_LDO_VSRAM_CON0, 0x7f),
MT6358_LDO1("ldo_vsram_others", VSRAM_OTHERS, 500000, 1293750, 6250,
- buck_volt_range1, MT6358_LDO_VSRAM_OTHERS_DBG0, 0x7f, 8,
+ buck_volt_range1, MT6358_LDO_VSRAM_OTHERS_DBG0, 0x7f00,
MT6358_LDO_VSRAM_CON2, 0x7f),
MT6358_LDO1("ldo_vsram_gpu", VSRAM_GPU, 500000, 1293750, 6250,
- buck_volt_range1, MT6358_LDO_VSRAM_GPU_DBG0, 0x7f, 8,
+ buck_volt_range1, MT6358_LDO_VSRAM_GPU_DBG0, 0x7f00,
MT6358_LDO_VSRAM_CON3, 0x7f),
MT6358_LDO1("ldo_vsram_proc12", VSRAM_PROC12, 500000, 1293750, 6250,
- buck_volt_range1, MT6358_LDO_VSRAM_PROC12_DBG0, 0x7f, 8,
+ buck_volt_range1, MT6358_LDO_VSRAM_PROC12_DBG0, 0x7f00,
MT6358_LDO_VSRAM_CON1, 0x7f),
};
diff --git a/drivers/regulator/mt6359-regulator.c b/drivers/regulator/mt6359-regulator.c
index 7ce0bd377a08..de3b0462832c 100644
--- a/drivers/regulator/mt6359-regulator.c
+++ b/drivers/regulator/mt6359-regulator.c
@@ -27,7 +27,6 @@
* @qi: Mask for query enable signal status of regulators.
* @modeset_reg: for operating AUTO/PWM mode register.
* @modeset_mask: MASK for operating modeset register.
- * @modeset_shift: SHIFT for operating modeset register.
*/
struct mt6359_regulator_info {
struct regulator_desc desc;
@@ -35,10 +34,8 @@ struct mt6359_regulator_info {
u32 qi;
u32 modeset_reg;
u32 modeset_mask;
- u32 modeset_shift;
u32 lp_mode_reg;
u32 lp_mode_mask;
- u32 lp_mode_shift;
};
#define MT6359_BUCK(match, _name, min, max, step, \
@@ -68,10 +65,8 @@ struct mt6359_regulator_info {
.qi = BIT(0), \
.lp_mode_reg = _lp_mode_reg, \
.lp_mode_mask = BIT(_lp_mode_shift), \
- .lp_mode_shift = _lp_mode_shift, \
.modeset_reg = _modeset_reg, \
.modeset_mask = BIT(_modeset_shift), \
- .modeset_shift = _modeset_shift \
}
#define MT6359_LDO_LINEAR(match, _name, min, max, step, \
@@ -282,8 +277,10 @@ static unsigned int mt6359_regulator_get_mode(struct regulator_dev *rdev)
return ret;
}
- if ((regval & info->modeset_mask) >> info->modeset_shift ==
- MT6359_BUCK_MODE_FORCE_PWM)
+ regval &= info->modeset_mask;
+ regval >>= ffs(info->modeset_mask) - 1;
+
+ if (regval == MT6359_BUCK_MODE_FORCE_PWM)
return REGULATOR_MODE_FAST;
ret = regmap_read(rdev->regmap, info->lp_mode_reg, &regval);
@@ -310,7 +307,7 @@ static int mt6359_regulator_set_mode(struct regulator_dev *rdev,
switch (mode) {
case REGULATOR_MODE_FAST:
val = MT6359_BUCK_MODE_FORCE_PWM;
- val <<= info->modeset_shift;
+ val <<= ffs(info->modeset_mask) - 1;
ret = regmap_update_bits(rdev->regmap,
info->modeset_reg,
info->modeset_mask,
@@ -319,14 +316,14 @@ static int mt6359_regulator_set_mode(struct regulator_dev *rdev,
case REGULATOR_MODE_NORMAL:
if (curr_mode == REGULATOR_MODE_FAST) {
val = MT6359_BUCK_MODE_AUTO;
- val <<= info->modeset_shift;
+ val <<= ffs(info->modeset_mask) - 1;
ret = regmap_update_bits(rdev->regmap,
info->modeset_reg,
info->modeset_mask,
val);
} else if (curr_mode == REGULATOR_MODE_IDLE) {
val = MT6359_BUCK_MODE_NORMAL;
- val <<= info->lp_mode_shift;
+ val <<= ffs(info->lp_mode_mask) - 1;
ret = regmap_update_bits(rdev->regmap,
info->lp_mode_reg,
info->lp_mode_mask,
@@ -336,7 +333,7 @@ static int mt6359_regulator_set_mode(struct regulator_dev *rdev,
break;
case REGULATOR_MODE_IDLE:
val = MT6359_BUCK_MODE_LP >> 1;
- val <<= info->lp_mode_shift;
+ val <<= ffs(info->lp_mode_mask) - 1;
ret = regmap_update_bits(rdev->regmap,
info->lp_mode_reg,
info->lp_mode_mask,
diff --git a/drivers/regulator/mt6397-regulator.c b/drivers/regulator/mt6397-regulator.c
index 0a30df5e414f..b9bf7ade1f8a 100644
--- a/drivers/regulator/mt6397-regulator.c
+++ b/drivers/regulator/mt6397-regulator.c
@@ -32,7 +32,6 @@ struct mt6397_regulator_info {
u32 vselctrl_mask;
u32 modeset_reg;
u32 modeset_mask;
- u32 modeset_shift;
};
#define MT6397_BUCK(match, vreg, min, max, step, volt_ranges, enreg, \
@@ -61,7 +60,6 @@ struct mt6397_regulator_info {
.vselctrl_mask = BIT(1), \
.modeset_reg = _modeset_reg, \
.modeset_mask = BIT(_modeset_shift), \
- .modeset_shift = _modeset_shift \
}
#define MT6397_LDO(match, vreg, ldo_volt_table, enreg, enbit, vosel, \
@@ -175,11 +173,11 @@ static int mt6397_regulator_set_mode(struct regulator_dev *rdev,
goto err_mode;
}
- dev_dbg(&rdev->dev, "mt6397 buck set_mode %#x, %#x, %#x, %#x\n",
- info->modeset_reg, info->modeset_mask,
- info->modeset_shift, val);
+ dev_dbg(&rdev->dev, "mt6397 buck set_mode %#x, %#x, %#x\n",
+ info->modeset_reg, info->modeset_mask, val);
+
+ val <<= ffs(info->modeset_mask) - 1;
- val <<= info->modeset_shift;
ret = regmap_update_bits(rdev->regmap, info->modeset_reg,
info->modeset_mask, val);
err_mode:
@@ -204,7 +202,10 @@ static unsigned int mt6397_regulator_get_mode(struct regulator_dev *rdev)
return ret;
}
- switch ((regval & info->modeset_mask) >> info->modeset_shift) {
+ regval &= info->modeset_mask;
+ regval >>= ffs(info->modeset_mask) - 1;
+
+ switch (regval) {
case MT6397_BUCK_MODE_AUTO:
return REGULATOR_MODE_NORMAL;
case MT6397_BUCK_MODE_FORCE_PWM:
diff --git a/drivers/regulator/rt5033-regulator.c b/drivers/regulator/rt5033-regulator.c
index 0e7311629165..da4cf5a6acc2 100644
--- a/drivers/regulator/rt5033-regulator.c
+++ b/drivers/regulator/rt5033-regulator.c
@@ -13,6 +13,16 @@
#include <linux/mfd/rt5033-private.h>
#include <linux/regulator/of_regulator.h>
+static const struct linear_range rt5033_buck_ranges[] = {
+ REGULATOR_LINEAR_RANGE(1000000, 0, 20, 100000),
+ REGULATOR_LINEAR_RANGE(3000000, 21, 31, 0),
+};
+
+static const struct linear_range rt5033_ldo_ranges[] = {
+ REGULATOR_LINEAR_RANGE(1200000, 0, 18, 100000),
+ REGULATOR_LINEAR_RANGE(3000000, 19, 31, 0),
+};
+
static const struct regulator_ops rt5033_safe_ldo_ops = {
.is_enabled = regulator_is_enabled_regmap,
.enable = regulator_enable_regmap,
@@ -24,8 +34,7 @@ static const struct regulator_ops rt5033_buck_ops = {
.is_enabled = regulator_is_enabled_regmap,
.enable = regulator_enable_regmap,
.disable = regulator_disable_regmap,
- .list_voltage = regulator_list_voltage_linear,
- .map_voltage = regulator_map_voltage_linear,
+ .list_voltage = regulator_list_voltage_linear_range,
.get_voltage_sel = regulator_get_voltage_sel_regmap,
.set_voltage_sel = regulator_set_voltage_sel_regmap,
};
@@ -40,8 +49,8 @@ static const struct regulator_desc rt5033_supported_regulators[] = {
.type = REGULATOR_VOLTAGE,
.owner = THIS_MODULE,
.n_voltages = RT5033_REGULATOR_BUCK_VOLTAGE_STEP_NUM,
- .min_uV = RT5033_REGULATOR_BUCK_VOLTAGE_MIN,
- .uV_step = RT5033_REGULATOR_BUCK_VOLTAGE_STEP,
+ .linear_ranges = rt5033_buck_ranges,
+ .n_linear_ranges = ARRAY_SIZE(rt5033_buck_ranges),
.enable_reg = RT5033_REG_CTRL,
.enable_mask = RT5033_CTRL_EN_BUCK_MASK,
.vsel_reg = RT5033_REG_BUCK_CTRL,
@@ -56,8 +65,8 @@ static const struct regulator_desc rt5033_supported_regulators[] = {
.type = REGULATOR_VOLTAGE,
.owner = THIS_MODULE,
.n_voltages = RT5033_REGULATOR_LDO_VOLTAGE_STEP_NUM,
- .min_uV = RT5033_REGULATOR_LDO_VOLTAGE_MIN,
- .uV_step = RT5033_REGULATOR_LDO_VOLTAGE_STEP,
+ .linear_ranges = rt5033_ldo_ranges,
+ .n_linear_ranges = ARRAY_SIZE(rt5033_ldo_ranges),
.enable_reg = RT5033_REG_CTRL,
.enable_mask = RT5033_CTRL_EN_LDO_MASK,
.vsel_reg = RT5033_REG_LDO_CTRL,
diff --git a/drivers/regulator/rt6245-regulator.c b/drivers/regulator/rt6245-regulator.c
index d3299a72fd10..cb22a207e9ff 100644
--- a/drivers/regulator/rt6245-regulator.c
+++ b/drivers/regulator/rt6245-regulator.c
@@ -144,7 +144,7 @@ static int rt6245_init_device_properties(struct device *dev)
static int rt6245_reg_write(void *context, unsigned int reg, unsigned int val)
{
struct i2c_client *i2c = context;
- const u8 func_base[] = { 0x6F, 0x73, 0x78, 0x61, 0x7C, 0 };
+ static const u8 func_base[] = { 0x6F, 0x73, 0x78, 0x61, 0x7C, 0 };
unsigned int code, bit_count;
code = func_base[reg];
diff --git a/drivers/regulator/rtq2134-regulator.c b/drivers/regulator/rtq2134-regulator.c
new file mode 100644
index 000000000000..f21e3f8b21f2
--- /dev/null
+++ b/drivers/regulator/rtq2134-regulator.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/bitops.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+
+enum {
+ RTQ2134_IDX_BUCK1 = 0,
+ RTQ2134_IDX_BUCK2,
+ RTQ2134_IDX_BUCK3,
+ RTQ2134_IDX_MAX
+};
+
+#define RTQ2134_AUTO_MODE 0
+#define RTQ2134_FCCM_MODE 1
+
+#define RTQ2134_BUCK_DVS0_CTRL 0
+#define RTQ2134_BUCK_VSEL_CTRL 2
+
+#define RTQ2134_REG_IO_CHIPNAME 0x01
+#define RTQ2134_REG_FLT_RECORDTEMP 0x13
+#define RTQ2134_REG_FLT_RECORDBUCK(_id) (0x14 + (_id))
+#define RTQ2134_REG_FLT_BUCKCTRL(_id) (0x37 + (_id))
+#define RTQ2134_REG_BUCK1_CFG0 0x42
+#define RTQ2134_REG_BUCK1_DVS0CFG1 0x48
+#define RTQ2134_REG_BUCK1_DVS0CFG0 0x49
+#define RTQ2134_REG_BUCK1_DVS1CFG1 0x4A
+#define RTQ2134_REG_BUCK1_DVS1CFG0 0x4B
+#define RTQ2134_REG_BUCK1_DVSCFG 0x52
+#define RTQ2134_REG_BUCK1_RSPCFG 0x54
+#define RTQ2134_REG_BUCK2_CFG0 0x5F
+#define RTQ2134_REG_BUCK2_DVS0CFG1 0x62
+#define RTQ2134_REG_BUCK2_DVS0CFG0 0x63
+#define RTQ2134_REG_BUCK2_DVS1CFG1 0x64
+#define RTQ2134_REG_BUCK2_DVS1CFG0 0x65
+#define RTQ2134_REG_BUCK2_DVSCFG 0x6C
+#define RTQ2134_REG_BUCK2_RSPCFG 0x6E
+#define RTQ2134_REG_BUCK3_CFG0 0x79
+#define RTQ2134_REG_BUCK3_DVS0CFG1 0x7C
+#define RTQ2134_REG_BUCK3_DVS0CFG0 0x7D
+#define RTQ2134_REG_BUCK3_DVS1CFG1 0x7E
+#define RTQ2134_REG_BUCK3_DVS1CFG0 0x7F
+#define RTQ2134_REG_BUCK3_DVSCFG 0x86
+#define RTQ2134_REG_BUCK3_RSPCFG 0x88
+#define RTQ2134_REG_BUCK3_SLEWCTRL 0x89
+
+#define RTQ2134_VOUT_MAXNUM 256
+#define RTQ2134_VOUT_MASK 0xFF
+#define RTQ2134_VOUTEN_MASK BIT(0)
+#define RTQ2134_ACTDISCHG_MASK BIT(0)
+#define RTQ2134_RSPUP_MASK GENMASK(6, 4)
+#define RTQ2134_FCCM_MASK BIT(5)
+#define RTQ2134_UVHICCUP_MASK BIT(3)
+#define RTQ2134_BUCKDVS_CTRL_MASK GENMASK(1, 0)
+#define RTQ2134_CHIPOT_MASK BIT(2)
+#define RTQ2134_BUCKOV_MASK BIT(5)
+#define RTQ2134_BUCKUV_MASK BIT(4)
+
+struct rtq2134_regulator_desc {
+ struct regulator_desc desc;
+ /* Extension for proprietary register and mask */
+ unsigned int mode_reg;
+ unsigned int mode_mask;
+ unsigned int suspend_enable_reg;
+ unsigned int suspend_enable_mask;
+ unsigned int suspend_vsel_reg;
+ unsigned int suspend_vsel_mask;
+ unsigned int suspend_mode_reg;
+ unsigned int suspend_mode_mask;
+ unsigned int dvs_ctrl_reg;
+};
+
+static int rtq2134_buck_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+ struct rtq2134_regulator_desc *desc =
+ (struct rtq2134_regulator_desc *)rdev->desc;
+ unsigned int val;
+
+ if (mode == REGULATOR_MODE_NORMAL)
+ val = RTQ2134_AUTO_MODE;
+ else if (mode == REGULATOR_MODE_FAST)
+ val = RTQ2134_FCCM_MODE;
+ else
+ return -EINVAL;
+
+ val <<= ffs(desc->mode_mask) - 1;
+ return regmap_update_bits(rdev->regmap, desc->mode_reg, desc->mode_mask,
+ val);
+}
+
+static unsigned int rtq2134_buck_get_mode(struct regulator_dev *rdev)
+{
+ struct rtq2134_regulator_desc *desc =
+ (struct rtq2134_regulator_desc *)rdev->desc;
+ unsigned int mode;
+ int ret;
+
+ ret = regmap_read(rdev->regmap, desc->mode_reg, &mode);
+ if (ret)
+ return ret;
+
+ if (mode & desc->mode_mask)
+ return REGULATOR_MODE_FAST;
+ return REGULATOR_MODE_NORMAL;
+}
+
+static int rtq2134_buck_set_suspend_voltage(struct regulator_dev *rdev, int uV)
+{
+ struct rtq2134_regulator_desc *desc =
+ (struct rtq2134_regulator_desc *)rdev->desc;
+ int sel;
+
+ sel = regulator_map_voltage_linear_range(rdev, uV, uV);
+ if (sel < 0)
+ return sel;
+
+ sel <<= ffs(desc->suspend_vsel_mask) - 1;
+
+ return regmap_update_bits(rdev->regmap, desc->suspend_vsel_reg,
+ desc->suspend_vsel_mask, sel);
+}
+
+static int rtq2134_buck_set_suspend_enable(struct regulator_dev *rdev)
+{
+ struct rtq2134_regulator_desc *desc =
+ (struct rtq2134_regulator_desc *)rdev->desc;
+ unsigned int val = desc->suspend_enable_mask;
+
+ return regmap_update_bits(rdev->regmap, desc->suspend_enable_reg,
+ desc->suspend_enable_mask, val);
+}
+
+static int rtq2134_buck_set_suspend_disable(struct regulator_dev *rdev)
+{
+ struct rtq2134_regulator_desc *desc =
+ (struct rtq2134_regulator_desc *)rdev->desc;
+
+ return regmap_update_bits(rdev->regmap, desc->suspend_enable_reg,
+ desc->suspend_enable_mask, 0);
+}
+
+static int rtq2134_buck_set_suspend_mode(struct regulator_dev *rdev,
+ unsigned int mode)
+{
+ struct rtq2134_regulator_desc *desc =
+ (struct rtq2134_regulator_desc *)rdev->desc;
+ unsigned int val;
+
+ if (mode == REGULATOR_MODE_NORMAL)
+ val = RTQ2134_AUTO_MODE;
+ else if (mode == REGULATOR_MODE_FAST)
+ val = RTQ2134_FCCM_MODE;
+ else
+ return -EINVAL;
+
+ val <<= ffs(desc->suspend_mode_mask) - 1;
+ return regmap_update_bits(rdev->regmap, desc->suspend_mode_reg,
+ desc->suspend_mode_mask, val);
+}
+
+static int rtq2134_buck_get_error_flags(struct regulator_dev *rdev,
+ unsigned int *flags)
+{
+ int rid = rdev_get_id(rdev);
+ unsigned int chip_error, buck_error, events = 0;
+ int ret;
+
+ ret = regmap_read(rdev->regmap, RTQ2134_REG_FLT_RECORDTEMP,
+ &chip_error);
+ if (ret) {
+ dev_err(&rdev->dev, "Failed to get chip error flag\n");
+ return ret;
+ }
+
+ ret = regmap_read(rdev->regmap, RTQ2134_REG_FLT_RECORDBUCK(rid),
+ &buck_error);
+ if (ret) {
+ dev_err(&rdev->dev, "Failed to get buck error flag\n");
+ return ret;
+ }
+
+ if (chip_error & RTQ2134_CHIPOT_MASK)
+ events |= REGULATOR_ERROR_OVER_TEMP;
+
+ if (buck_error & RTQ2134_BUCKUV_MASK)
+ events |= REGULATOR_ERROR_UNDER_VOLTAGE;
+
+ if (buck_error & RTQ2134_BUCKOV_MASK)
+ events |= REGULATOR_ERROR_REGULATION_OUT;
+
+ *flags = events;
+ return 0;
+}
+
+static const struct regulator_ops rtq2134_buck_ops = {
+ .list_voltage = regulator_list_voltage_linear_range,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
+ .get_voltage_sel = regulator_get_voltage_sel_regmap,
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .is_enabled = regulator_is_enabled_regmap,
+ .set_active_discharge = regulator_set_active_discharge_regmap,
+ .set_ramp_delay = regulator_set_ramp_delay_regmap,
+ .set_mode = rtq2134_buck_set_mode,
+ .get_mode = rtq2134_buck_get_mode,
+ .set_suspend_voltage = rtq2134_buck_set_suspend_voltage,
+ .set_suspend_enable = rtq2134_buck_set_suspend_enable,
+ .set_suspend_disable = rtq2134_buck_set_suspend_disable,
+ .set_suspend_mode = rtq2134_buck_set_suspend_mode,
+ .get_error_flags = rtq2134_buck_get_error_flags,
+};
+
+static const struct linear_range rtq2134_buck_vout_ranges[] = {
+ REGULATOR_LINEAR_RANGE(300000, 0, 200, 5000),
+ REGULATOR_LINEAR_RANGE(1310000, 201, 255, 10000)
+};
+
+static unsigned int rtq2134_buck_of_map_mode(unsigned int mode)
+{
+ switch (mode) {
+ case RTQ2134_AUTO_MODE:
+ return REGULATOR_MODE_NORMAL;
+ case RTQ2134_FCCM_MODE:
+ return REGULATOR_MODE_FAST;
+ }
+
+ return REGULATOR_MODE_INVALID;
+}
+
+static int rtq2134_buck_of_parse_cb(struct device_node *np,
+ const struct regulator_desc *desc,
+ struct regulator_config *cfg)
+{
+ struct rtq2134_regulator_desc *rdesc =
+ (struct rtq2134_regulator_desc *)desc;
+ int rid = desc->id;
+ bool uv_shutdown, vsel_dvs;
+ unsigned int val;
+ int ret;
+
+ vsel_dvs = of_property_read_bool(np, "richtek,use-vsel-dvs");
+ if (vsel_dvs)
+ val = RTQ2134_BUCK_VSEL_CTRL;
+ else
+ val = RTQ2134_BUCK_DVS0_CTRL;
+
+ ret = regmap_update_bits(cfg->regmap, rdesc->dvs_ctrl_reg,
+ RTQ2134_BUCKDVS_CTRL_MASK, val);
+ if (ret)
+ return ret;
+
+ uv_shutdown = of_property_read_bool(np, "richtek,uv-shutdown");
+ if (uv_shutdown)
+ val = 0;
+ else
+ val = RTQ2134_UVHICCUP_MASK;
+
+ return regmap_update_bits(cfg->regmap, RTQ2134_REG_FLT_BUCKCTRL(rid),
+ RTQ2134_UVHICCUP_MASK, val);
+}
+
+static const unsigned int rtq2134_buck_ramp_delay_table[] = {
+ 0, 16000, 0, 8000, 4000, 2000, 1000, 500
+};
+
+#define RTQ2134_BUCK_DESC(_id) { \
+ .desc = { \
+ .name = "rtq2134_buck" #_id, \
+ .of_match = of_match_ptr("buck" #_id), \
+ .regulators_node = of_match_ptr("regulators"), \
+ .id = RTQ2134_IDX_BUCK##_id, \
+ .type = REGULATOR_VOLTAGE, \
+ .owner = THIS_MODULE, \
+ .ops = &rtq2134_buck_ops, \
+ .n_voltages = RTQ2134_VOUT_MAXNUM, \
+ .linear_ranges = rtq2134_buck_vout_ranges, \
+ .n_linear_ranges = ARRAY_SIZE(rtq2134_buck_vout_ranges), \
+ .vsel_reg = RTQ2134_REG_BUCK##_id##_DVS0CFG1, \
+ .vsel_mask = RTQ2134_VOUT_MASK, \
+ .enable_reg = RTQ2134_REG_BUCK##_id##_DVS0CFG0, \
+ .enable_mask = RTQ2134_VOUTEN_MASK, \
+ .active_discharge_reg = RTQ2134_REG_BUCK##_id##_CFG0, \
+ .active_discharge_mask = RTQ2134_ACTDISCHG_MASK, \
+ .ramp_reg = RTQ2134_REG_BUCK##_id##_RSPCFG, \
+ .ramp_mask = RTQ2134_RSPUP_MASK, \
+ .ramp_delay_table = rtq2134_buck_ramp_delay_table, \
+ .n_ramp_values = ARRAY_SIZE(rtq2134_buck_ramp_delay_table), \
+ .of_map_mode = rtq2134_buck_of_map_mode, \
+ .of_parse_cb = rtq2134_buck_of_parse_cb, \
+ }, \
+ .mode_reg = RTQ2134_REG_BUCK##_id##_DVS0CFG0, \
+ .mode_mask = RTQ2134_FCCM_MASK, \
+ .suspend_mode_reg = RTQ2134_REG_BUCK##_id##_DVS1CFG0, \
+ .suspend_mode_mask = RTQ2134_FCCM_MASK, \
+ .suspend_enable_reg = RTQ2134_REG_BUCK##_id##_DVS1CFG0, \
+ .suspend_enable_mask = RTQ2134_VOUTEN_MASK, \
+ .suspend_vsel_reg = RTQ2134_REG_BUCK##_id##_DVS1CFG1, \
+ .suspend_vsel_mask = RTQ2134_VOUT_MASK, \
+ .dvs_ctrl_reg = RTQ2134_REG_BUCK##_id##_DVSCFG, \
+}
+
+static const struct rtq2134_regulator_desc rtq2134_regulator_descs[] = {
+ RTQ2134_BUCK_DESC(1),
+ RTQ2134_BUCK_DESC(2),
+ RTQ2134_BUCK_DESC(3)
+};
+
+static bool rtq2134_is_accissible_reg(struct device *dev, unsigned int reg)
+{
+ if (reg >= RTQ2134_REG_IO_CHIPNAME && reg <= RTQ2134_REG_BUCK3_SLEWCTRL)
+ return true;
+ return false;
+}
+
+static const struct regmap_config rtq2134_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = RTQ2134_REG_BUCK3_SLEWCTRL,
+
+ .readable_reg = rtq2134_is_accissible_reg,
+ .writeable_reg = rtq2134_is_accissible_reg,
+};
+
+static int rtq2134_probe(struct i2c_client *i2c)
+{
+ struct regmap *regmap;
+ struct regulator_dev *rdev;
+ struct regulator_config regulator_cfg = {};
+ int i;
+
+ regmap = devm_regmap_init_i2c(i2c, &rtq2134_regmap_config);
+ if (IS_ERR(regmap)) {
+ dev_err(&i2c->dev, "Failed to allocate regmap\n");
+ return PTR_ERR(regmap);
+ }
+
+ regulator_cfg.dev = &i2c->dev;
+ regulator_cfg.regmap = regmap;
+ for (i = 0; i < ARRAY_SIZE(rtq2134_regulator_descs); i++) {
+ rdev = devm_regulator_register(&i2c->dev,
+ &rtq2134_regulator_descs[i].desc,
+ &regulator_cfg);
+ if (IS_ERR(rdev)) {
+ dev_err(&i2c->dev, "Failed to init %d regulator\n", i);
+ return PTR_ERR(rdev);
+ }
+ }
+
+ return 0;
+}
+
+static const struct of_device_id __maybe_unused rtq2134_device_tables[] = {
+ { .compatible = "richtek,rtq2134", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, rtq2134_device_tables);
+
+static struct i2c_driver rtq2134_driver = {
+ .driver = {
+ .name = "rtq2134",
+ .of_match_table = rtq2134_device_tables,
+ },
+ .probe_new = rtq2134_probe,
+};
+module_i2c_driver(rtq2134_driver);
+
+MODULE_AUTHOR("ChiYuan Huang <cy_huang@richtek.com>");
+MODULE_DESCRIPTION("Richtek RTQ2134 Regulator Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/regulator/rtq6752-regulator.c b/drivers/regulator/rtq6752-regulator.c
new file mode 100644
index 000000000000..609d3fcf4923
--- /dev/null
+++ b/drivers/regulator/rtq6752-regulator.c
@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+
+enum {
+ RTQ6752_IDX_PAVDD = 0,
+ RTQ6752_IDX_NAVDD = 1,
+ RTQ6752_IDX_MAX
+};
+
+#define RTQ6752_REG_PAVDD 0x00
+#define RTQ6752_REG_NAVDD 0x01
+#define RTQ6752_REG_PAVDDONDLY 0x07
+#define RTQ6752_REG_PAVDDSSTIME 0x08
+#define RTQ6752_REG_NAVDDONDLY 0x0D
+#define RTQ6752_REG_NAVDDSSTIME 0x0E
+#define RTQ6752_REG_OPTION1 0x12
+#define RTQ6752_REG_CHSWITCH 0x16
+#define RTQ6752_REG_FAULT 0x1D
+
+#define RTQ6752_VOUT_MASK GENMASK(5, 0)
+#define RTQ6752_NAVDDEN_MASK BIT(3)
+#define RTQ6752_PAVDDEN_MASK BIT(0)
+#define RTQ6752_PAVDDAD_MASK BIT(4)
+#define RTQ6752_NAVDDAD_MASK BIT(3)
+#define RTQ6752_PAVDDF_MASK BIT(3)
+#define RTQ6752_NAVDDF_MASK BIT(0)
+#define RTQ6752_ENABLE_MASK (BIT(RTQ6752_IDX_MAX) - 1)
+
+#define RTQ6752_VOUT_MINUV 5000000
+#define RTQ6752_VOUT_STEPUV 50000
+#define RTQ6752_VOUT_NUM 47
+#define RTQ6752_I2CRDY_TIMEUS 1000
+#define RTQ6752_MINSS_TIMEUS 5000
+
+struct rtq6752_priv {
+ struct regmap *regmap;
+ struct gpio_desc *enable_gpio;
+ struct mutex lock;
+ unsigned char enable_flag;
+};
+
+static int rtq6752_set_vdd_enable(struct regulator_dev *rdev)
+{
+ struct rtq6752_priv *priv = rdev_get_drvdata(rdev);
+ int rid = rdev_get_id(rdev), ret;
+
+ mutex_lock(&priv->lock);
+ if (priv->enable_gpio) {
+ gpiod_set_value(priv->enable_gpio, 1);
+
+ usleep_range(RTQ6752_I2CRDY_TIMEUS,
+ RTQ6752_I2CRDY_TIMEUS + 100);
+ }
+
+ if (!priv->enable_flag) {
+ regcache_cache_only(priv->regmap, false);
+ ret = regcache_sync(priv->regmap);
+ if (ret) {
+ mutex_unlock(&priv->lock);
+ return ret;
+ }
+ }
+
+ priv->enable_flag |= BIT(rid);
+ mutex_unlock(&priv->lock);
+
+ return regulator_enable_regmap(rdev);
+}
+
+static int rtq6752_set_vdd_disable(struct regulator_dev *rdev)
+{
+ struct rtq6752_priv *priv = rdev_get_drvdata(rdev);
+ int rid = rdev_get_id(rdev), ret;
+
+ ret = regulator_disable_regmap(rdev);
+ if (ret)
+ return ret;
+
+ mutex_lock(&priv->lock);
+ priv->enable_flag &= ~BIT(rid);
+
+ if (!priv->enable_flag) {
+ regcache_cache_only(priv->regmap, true);
+ regcache_mark_dirty(priv->regmap);
+ }
+
+ if (priv->enable_gpio)
+ gpiod_set_value(priv->enable_gpio, 0);
+
+ mutex_unlock(&priv->lock);
+
+ return 0;
+}
+
+static int rtq6752_get_error_flags(struct regulator_dev *rdev,
+ unsigned int *flags)
+{
+ unsigned int val, events = 0;
+ const unsigned int fault_mask[] = {
+ RTQ6752_PAVDDF_MASK, RTQ6752_NAVDDF_MASK };
+ int rid = rdev_get_id(rdev), ret;
+
+ ret = regmap_read(rdev->regmap, RTQ6752_REG_FAULT, &val);
+ if (ret)
+ return ret;
+
+ if (val & fault_mask[rid])
+ events = REGULATOR_ERROR_REGULATION_OUT;
+
+ *flags = events;
+ return 0;
+}
+
+static const struct regulator_ops rtq6752_regulator_ops = {
+ .list_voltage = regulator_list_voltage_linear,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
+ .get_voltage_sel = regulator_get_voltage_sel_regmap,
+ .enable = rtq6752_set_vdd_enable,
+ .disable = rtq6752_set_vdd_disable,
+ .is_enabled = regulator_is_enabled_regmap,
+ .set_active_discharge = regulator_set_active_discharge_regmap,
+ .get_error_flags = rtq6752_get_error_flags,
+};
+
+static const struct regulator_desc rtq6752_regulator_descs[] = {
+ {
+ .name = "rtq6752-pavdd",
+ .of_match = of_match_ptr("pavdd"),
+ .regulators_node = of_match_ptr("regulators"),
+ .id = RTQ6752_IDX_PAVDD,
+ .n_voltages = RTQ6752_VOUT_NUM,
+ .ops = &rtq6752_regulator_ops,
+ .owner = THIS_MODULE,
+ .min_uV = RTQ6752_VOUT_MINUV,
+ .uV_step = RTQ6752_VOUT_STEPUV,
+ .enable_time = RTQ6752_MINSS_TIMEUS,
+ .vsel_reg = RTQ6752_REG_PAVDD,
+ .vsel_mask = RTQ6752_VOUT_MASK,
+ .enable_reg = RTQ6752_REG_CHSWITCH,
+ .enable_mask = RTQ6752_PAVDDEN_MASK,
+ .active_discharge_reg = RTQ6752_REG_OPTION1,
+ .active_discharge_mask = RTQ6752_PAVDDAD_MASK,
+ .active_discharge_off = RTQ6752_PAVDDAD_MASK,
+ },
+ {
+ .name = "rtq6752-navdd",
+ .of_match = of_match_ptr("navdd"),
+ .regulators_node = of_match_ptr("regulators"),
+ .id = RTQ6752_IDX_NAVDD,
+ .n_voltages = RTQ6752_VOUT_NUM,
+ .ops = &rtq6752_regulator_ops,
+ .owner = THIS_MODULE,
+ .min_uV = RTQ6752_VOUT_MINUV,
+ .uV_step = RTQ6752_VOUT_STEPUV,
+ .enable_time = RTQ6752_MINSS_TIMEUS,
+ .vsel_reg = RTQ6752_REG_NAVDD,
+ .vsel_mask = RTQ6752_VOUT_MASK,
+ .enable_reg = RTQ6752_REG_CHSWITCH,
+ .enable_mask = RTQ6752_NAVDDEN_MASK,
+ .active_discharge_reg = RTQ6752_REG_OPTION1,
+ .active_discharge_mask = RTQ6752_NAVDDAD_MASK,
+ .active_discharge_off = RTQ6752_NAVDDAD_MASK,
+ }
+};
+
+static int rtq6752_init_device_properties(struct rtq6752_priv *priv)
+{
+ u8 raw_vals[] = { 0, 0 };
+ int ret;
+
+ /* Configure PAVDD on and softstart delay time to the minimum */
+ ret = regmap_raw_write(priv->regmap, RTQ6752_REG_PAVDDONDLY, raw_vals,
+ ARRAY_SIZE(raw_vals));
+ if (ret)
+ return ret;
+
+ /* Configure NAVDD on and softstart delay time to the minimum */
+ return regmap_raw_write(priv->regmap, RTQ6752_REG_NAVDDONDLY, raw_vals,
+ ARRAY_SIZE(raw_vals));
+}
+
+static bool rtq6752_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+ if (reg == RTQ6752_REG_FAULT)
+ return true;
+ return false;
+}
+
+static const struct reg_default rtq6752_reg_defaults[] = {
+ { RTQ6752_REG_PAVDD, 0x14 },
+ { RTQ6752_REG_NAVDD, 0x14 },
+ { RTQ6752_REG_PAVDDONDLY, 0x01 },
+ { RTQ6752_REG_PAVDDSSTIME, 0x01 },
+ { RTQ6752_REG_NAVDDONDLY, 0x01 },
+ { RTQ6752_REG_NAVDDSSTIME, 0x01 },
+ { RTQ6752_REG_OPTION1, 0x07 },
+ { RTQ6752_REG_CHSWITCH, 0x29 },
+};
+
+static const struct regmap_config rtq6752_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .cache_type = REGCACHE_RBTREE,
+ .max_register = RTQ6752_REG_FAULT,
+ .reg_defaults = rtq6752_reg_defaults,
+ .num_reg_defaults = ARRAY_SIZE(rtq6752_reg_defaults),
+ .volatile_reg = rtq6752_is_volatile_reg,
+};
+
+static int rtq6752_probe(struct i2c_client *i2c)
+{
+ struct rtq6752_priv *priv;
+ struct regulator_config reg_cfg = {};
+ struct regulator_dev *rdev;
+ int i, ret;
+
+ priv = devm_kzalloc(&i2c->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ mutex_init(&priv->lock);
+
+ priv->enable_gpio = devm_gpiod_get_optional(&i2c->dev, "enable",
+ GPIOD_OUT_HIGH);
+ if (IS_ERR(priv->enable_gpio)) {
+ dev_err(&i2c->dev, "Failed to get 'enable' gpio\n");
+ return PTR_ERR(priv->enable_gpio);
+ }
+
+ usleep_range(RTQ6752_I2CRDY_TIMEUS, RTQ6752_I2CRDY_TIMEUS + 100);
+ /* Default EN pin to high, PAVDD and NAVDD will be on */
+ priv->enable_flag = RTQ6752_ENABLE_MASK;
+
+ priv->regmap = devm_regmap_init_i2c(i2c, &rtq6752_regmap_config);
+ if (IS_ERR(priv->regmap)) {
+ dev_err(&i2c->dev, "Failed to init regmap\n");
+ return PTR_ERR(priv->regmap);
+ }
+
+ ret = rtq6752_init_device_properties(priv);
+ if (ret) {
+ dev_err(&i2c->dev, "Failed to init device properties\n");
+ return ret;
+ }
+
+ reg_cfg.dev = &i2c->dev;
+ reg_cfg.regmap = priv->regmap;
+ reg_cfg.driver_data = priv;
+
+ for (i = 0; i < ARRAY_SIZE(rtq6752_regulator_descs); i++) {
+ rdev = devm_regulator_register(&i2c->dev,
+ rtq6752_regulator_descs + i,
+ &reg_cfg);
+ if (IS_ERR(rdev)) {
+ dev_err(&i2c->dev, "Failed to init %d regulator\n", i);
+ return PTR_ERR(rdev);
+ }
+ }
+
+ return 0;
+}
+
+static const struct of_device_id __maybe_unused rtq6752_device_table[] = {
+ { .compatible = "richtek,rtq6752", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, rtq6752_device_table);
+
+static struct i2c_driver rtq6752_driver = {
+ .driver = {
+ .name = "rtq6752",
+ .of_match_table = rtq6752_device_table,
+ },
+ .probe_new = rtq6752_probe,
+};
+module_i2c_driver(rtq6752_driver);
+
+MODULE_AUTHOR("ChiYuan Huang <cy_huang@richtek.com>");
+MODULE_DESCRIPTION("Richtek RTQ6752 Regulator Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/regulator/sy7636a-regulator.c b/drivers/regulator/sy7636a-regulator.c
index e021ae08cbaa..8360b3947ead 100644
--- a/drivers/regulator/sy7636a-regulator.c
+++ b/drivers/regulator/sy7636a-regulator.c
@@ -13,7 +13,10 @@
#include <linux/gpio/consumer.h>
#include <linux/mfd/sy7636a.h>
-#define SY7636A_POLL_ENABLED_TIME 500
+struct sy7636a_data {
+ struct regmap *regmap;
+ struct gpio_desc *pgood_gpio;
+};
static int sy7636a_get_vcom_voltage_op(struct regulator_dev *rdev)
{
@@ -35,10 +38,10 @@ static int sy7636a_get_vcom_voltage_op(struct regulator_dev *rdev)
static int sy7636a_get_status(struct regulator_dev *rdev)
{
- struct sy7636a *sy7636a = rdev_get_drvdata(rdev);
+ struct sy7636a_data *data = dev_get_drvdata(rdev->dev.parent);
int ret = 0;
- ret = gpiod_get_value_cansleep(sy7636a->pgood_gpio);
+ ret = gpiod_get_value_cansleep(data->pgood_gpio);
if (ret < 0)
dev_err(&rdev->dev, "Failed to read pgood gpio: %d\n", ret);
@@ -61,46 +64,50 @@ static const struct regulator_desc desc = {
.owner = THIS_MODULE,
.enable_reg = SY7636A_REG_OPERATION_MODE_CRL,
.enable_mask = SY7636A_OPERATION_MODE_CRL_ONOFF,
- .poll_enabled_time = SY7636A_POLL_ENABLED_TIME,
.regulators_node = of_match_ptr("regulators"),
.of_match = of_match_ptr("vcom"),
};
static int sy7636a_regulator_probe(struct platform_device *pdev)
{
- struct sy7636a *sy7636a = dev_get_drvdata(pdev->dev.parent);
+ struct regmap *regmap = dev_get_drvdata(pdev->dev.parent);
struct regulator_config config = { };
struct regulator_dev *rdev;
struct gpio_desc *gdp;
+ struct sy7636a_data *data;
int ret;
- if (!sy7636a)
+ if (!regmap)
return -EPROBE_DEFER;
- platform_set_drvdata(pdev, sy7636a);
-
- gdp = devm_gpiod_get(sy7636a->dev, "epd-pwr-good", GPIOD_IN);
+ gdp = devm_gpiod_get(pdev->dev.parent, "epd-pwr-good", GPIOD_IN);
if (IS_ERR(gdp)) {
- dev_err(sy7636a->dev, "Power good GPIO fault %ld\n", PTR_ERR(gdp));
+ dev_err(pdev->dev.parent, "Power good GPIO fault %ld\n", PTR_ERR(gdp));
return PTR_ERR(gdp);
}
- sy7636a->pgood_gpio = gdp;
+ data = devm_kzalloc(&pdev->dev, sizeof(struct sy7636a_data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->regmap = regmap;
+ data->pgood_gpio = gdp;
+
+ platform_set_drvdata(pdev, data);
- ret = regmap_write(sy7636a->regmap, SY7636A_REG_POWER_ON_DELAY_TIME, 0x0);
+ ret = regmap_write(regmap, SY7636A_REG_POWER_ON_DELAY_TIME, 0x0);
if (ret) {
- dev_err(sy7636a->dev, "Failed to initialize regulator: %d\n", ret);
+ dev_err(pdev->dev.parent, "Failed to initialize regulator: %d\n", ret);
return ret;
}
config.dev = &pdev->dev;
- config.dev->of_node = sy7636a->dev->of_node;
- config.driver_data = sy7636a;
- config.regmap = sy7636a->regmap;
+ config.dev->of_node = pdev->dev.parent->of_node;
+ config.regmap = regmap;
rdev = devm_regulator_register(&pdev->dev, &desc, &config);
if (IS_ERR(rdev)) {
- dev_err(sy7636a->dev, "Failed to register %s regulator\n",
+ dev_err(pdev->dev.parent, "Failed to register %s regulator\n",
pdev->name);
return PTR_ERR(rdev);
}
diff --git a/drivers/regulator/sy8824x.c b/drivers/regulator/sy8824x.c
index 62d243f3b904..5e915cf307b3 100644
--- a/drivers/regulator/sy8824x.c
+++ b/drivers/regulator/sy8824x.c
@@ -25,6 +25,7 @@ struct sy8824_config {
unsigned int vsel_min;
unsigned int vsel_step;
unsigned int vsel_count;
+ const struct regmap_config *config;
};
struct sy8824_device_info {
@@ -110,6 +111,15 @@ static int sy8824_regulator_register(struct sy8824_device_info *di,
static const struct regmap_config sy8824_regmap_config = {
.reg_bits = 8,
.val_bits = 8,
+ .num_reg_defaults_raw = 1,
+ .cache_type = REGCACHE_FLAT,
+};
+
+static const struct regmap_config sy20276_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .num_reg_defaults_raw = 2,
+ .cache_type = REGCACHE_FLAT,
};
static int sy8824_i2c_probe(struct i2c_client *client)
@@ -134,7 +144,7 @@ static int sy8824_i2c_probe(struct i2c_client *client)
di->dev = dev;
di->cfg = of_device_get_match_data(dev);
- regmap = devm_regmap_init_i2c(client, &sy8824_regmap_config);
+ regmap = devm_regmap_init_i2c(client, di->cfg->config);
if (IS_ERR(regmap)) {
dev_err(dev, "Failed to allocate regmap!\n");
return PTR_ERR(regmap);
@@ -160,6 +170,7 @@ static const struct sy8824_config sy8824c_cfg = {
.vsel_min = 762500,
.vsel_step = 12500,
.vsel_count = 64,
+ .config = &sy8824_regmap_config,
};
static const struct sy8824_config sy8824e_cfg = {
@@ -169,6 +180,7 @@ static const struct sy8824_config sy8824e_cfg = {
.vsel_min = 700000,
.vsel_step = 12500,
.vsel_count = 64,
+ .config = &sy8824_regmap_config,
};
static const struct sy8824_config sy20276_cfg = {
@@ -178,6 +190,7 @@ static const struct sy8824_config sy20276_cfg = {
.vsel_min = 600000,
.vsel_step = 10000,
.vsel_count = 128,
+ .config = &sy20276_regmap_config,
};
static const struct sy8824_config sy20278_cfg = {
@@ -187,6 +200,7 @@ static const struct sy8824_config sy20278_cfg = {
.vsel_min = 762500,
.vsel_step = 12500,
.vsel_count = 64,
+ .config = &sy20276_regmap_config,
};
static const struct of_device_id sy8824_dt_ids[] = {
diff --git a/drivers/regulator/sy8827n.c b/drivers/regulator/sy8827n.c
index 52e8c17afe24..7d5d9f879ce3 100644
--- a/drivers/regulator/sy8827n.c
+++ b/drivers/regulator/sy8827n.c
@@ -19,6 +19,10 @@
#define SY8827N_MODE (1 << 6)
#define SY8827N_VSEL1 1
#define SY8827N_CTRL 2
+#define SY8827N_ID1 3
+#define SY8827N_ID2 4
+#define SY8827N_PGOOD 5
+#define SY8827N_MAX (SY8827N_PGOOD + 1)
#define SY8827N_NVOLTAGES 64
#define SY8827N_VSELMIN 600000
@@ -102,9 +106,19 @@ static int sy8827n_regulator_register(struct sy8827n_device_info *di,
return PTR_ERR_OR_ZERO(rdev);
}
+static bool sy8827n_volatile_reg(struct device *dev, unsigned int reg)
+{
+ if (reg == SY8827N_PGOOD)
+ return true;
+ return false;
+}
+
static const struct regmap_config sy8827n_regmap_config = {
.reg_bits = 8,
.val_bits = 8,
+ .volatile_reg = sy8827n_volatile_reg,
+ .num_reg_defaults_raw = SY8827N_MAX,
+ .cache_type = REGCACHE_FLAT,
};
static int sy8827n_i2c_probe(struct i2c_client *client)
diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c
index 1d5b0a1b86f7..06cbe60c990f 100644
--- a/drivers/regulator/tps65910-regulator.c
+++ b/drivers/regulator/tps65910-regulator.c
@@ -1211,12 +1211,10 @@ static int tps65910_probe(struct platform_device *pdev)
rdev = devm_regulator_register(&pdev->dev, &pmic->desc[i],
&config);
- if (IS_ERR(rdev)) {
- dev_err(tps65910->dev,
- "failed to register %s regulator\n",
- pdev->name);
- return PTR_ERR(rdev);
- }
+ if (IS_ERR(rdev))
+ return dev_err_probe(tps65910->dev, PTR_ERR(rdev),
+ "failed to register %s regulator\n",
+ pdev->name);
/* Save regulator for cleanup */
pmic->rdev[i] = rdev;
diff --git a/drivers/regulator/vctrl-regulator.c b/drivers/regulator/vctrl-regulator.c
index cbadb1c99679..d2a37978fc3a 100644
--- a/drivers/regulator/vctrl-regulator.c
+++ b/drivers/regulator/vctrl-regulator.c
@@ -37,7 +37,6 @@ struct vctrl_voltage_table {
struct vctrl_data {
struct regulator_dev *rdev;
struct regulator_desc desc;
- struct regulator *ctrl_reg;
bool enabled;
unsigned int min_slew_down_rate;
unsigned int ovp_threshold;
@@ -82,7 +81,12 @@ static int vctrl_calc_output_voltage(struct vctrl_data *vctrl, int ctrl_uV)
static int vctrl_get_voltage(struct regulator_dev *rdev)
{
struct vctrl_data *vctrl = rdev_get_drvdata(rdev);
- int ctrl_uV = regulator_get_voltage_rdev(vctrl->ctrl_reg->rdev);
+ int ctrl_uV;
+
+ if (!rdev->supply)
+ return -EPROBE_DEFER;
+
+ ctrl_uV = regulator_get_voltage_rdev(rdev->supply->rdev);
return vctrl_calc_output_voltage(vctrl, ctrl_uV);
}
@@ -92,14 +96,19 @@ static int vctrl_set_voltage(struct regulator_dev *rdev,
unsigned int *selector)
{
struct vctrl_data *vctrl = rdev_get_drvdata(rdev);
- struct regulator *ctrl_reg = vctrl->ctrl_reg;
- int orig_ctrl_uV = regulator_get_voltage_rdev(ctrl_reg->rdev);
- int uV = vctrl_calc_output_voltage(vctrl, orig_ctrl_uV);
+ int orig_ctrl_uV;
+ int uV;
int ret;
+ if (!rdev->supply)
+ return -EPROBE_DEFER;
+
+ orig_ctrl_uV = regulator_get_voltage_rdev(rdev->supply->rdev);
+ uV = vctrl_calc_output_voltage(vctrl, orig_ctrl_uV);
+
if (req_min_uV >= uV || !vctrl->ovp_threshold)
/* voltage rising or no OVP */
- return regulator_set_voltage_rdev(ctrl_reg->rdev,
+ return regulator_set_voltage_rdev(rdev->supply->rdev,
vctrl_calc_ctrl_voltage(vctrl, req_min_uV),
vctrl_calc_ctrl_voltage(vctrl, req_max_uV),
PM_SUSPEND_ON);
@@ -117,7 +126,7 @@ static int vctrl_set_voltage(struct regulator_dev *rdev,
next_uV = max_t(int, req_min_uV, uV - max_drop_uV);
next_ctrl_uV = vctrl_calc_ctrl_voltage(vctrl, next_uV);
- ret = regulator_set_voltage_rdev(ctrl_reg->rdev,
+ ret = regulator_set_voltage_rdev(rdev->supply->rdev,
next_ctrl_uV,
next_ctrl_uV,
PM_SUSPEND_ON);
@@ -134,7 +143,7 @@ static int vctrl_set_voltage(struct regulator_dev *rdev,
err:
/* Try to go back to original voltage */
- regulator_set_voltage_rdev(ctrl_reg->rdev, orig_ctrl_uV, orig_ctrl_uV,
+ regulator_set_voltage_rdev(rdev->supply->rdev, orig_ctrl_uV, orig_ctrl_uV,
PM_SUSPEND_ON);
return ret;
@@ -151,16 +160,18 @@ static int vctrl_set_voltage_sel(struct regulator_dev *rdev,
unsigned int selector)
{
struct vctrl_data *vctrl = rdev_get_drvdata(rdev);
- struct regulator *ctrl_reg = vctrl->ctrl_reg;
unsigned int orig_sel = vctrl->sel;
int ret;
+ if (!rdev->supply)
+ return -EPROBE_DEFER;
+
if (selector >= rdev->desc->n_voltages)
return -EINVAL;
if (selector >= vctrl->sel || !vctrl->ovp_threshold) {
/* voltage rising or no OVP */
- ret = regulator_set_voltage_rdev(ctrl_reg->rdev,
+ ret = regulator_set_voltage_rdev(rdev->supply->rdev,
vctrl->vtable[selector].ctrl,
vctrl->vtable[selector].ctrl,
PM_SUSPEND_ON);
@@ -179,7 +190,7 @@ static int vctrl_set_voltage_sel(struct regulator_dev *rdev,
else
next_sel = vctrl->vtable[vctrl->sel].ovp_min_sel;
- ret = regulator_set_voltage_rdev(ctrl_reg->rdev,
+ ret = regulator_set_voltage_rdev(rdev->supply->rdev,
vctrl->vtable[next_sel].ctrl,
vctrl->vtable[next_sel].ctrl,
PM_SUSPEND_ON);
@@ -202,7 +213,7 @@ static int vctrl_set_voltage_sel(struct regulator_dev *rdev,
err:
if (vctrl->sel != orig_sel) {
/* Try to go back to original voltage */
- if (!regulator_set_voltage_rdev(ctrl_reg->rdev,
+ if (!regulator_set_voltage_rdev(rdev->supply->rdev,
vctrl->vtable[orig_sel].ctrl,
vctrl->vtable[orig_sel].ctrl,
PM_SUSPEND_ON))
@@ -234,10 +245,6 @@ static int vctrl_parse_dt(struct platform_device *pdev,
u32 pval;
u32 vrange_ctrl[2];
- vctrl->ctrl_reg = devm_regulator_get(&pdev->dev, "ctrl");
- if (IS_ERR(vctrl->ctrl_reg))
- return PTR_ERR(vctrl->ctrl_reg);
-
ret = of_property_read_u32(np, "ovp-threshold-percent", &pval);
if (!ret) {
vctrl->ovp_threshold = pval;
@@ -315,11 +322,11 @@ static int vctrl_cmp_ctrl_uV(const void *a, const void *b)
return at->ctrl - bt->ctrl;
}
-static int vctrl_init_vtable(struct platform_device *pdev)
+static int vctrl_init_vtable(struct platform_device *pdev,
+ struct regulator *ctrl_reg)
{
struct vctrl_data *vctrl = platform_get_drvdata(pdev);
struct regulator_desc *rdesc = &vctrl->desc;
- struct regulator *ctrl_reg = vctrl->ctrl_reg;
struct vctrl_voltage_range *vrange_ctrl = &vctrl->vrange.ctrl;
int n_voltages;
int ctrl_uV;
@@ -395,23 +402,19 @@ static int vctrl_init_vtable(struct platform_device *pdev)
static int vctrl_enable(struct regulator_dev *rdev)
{
struct vctrl_data *vctrl = rdev_get_drvdata(rdev);
- int ret = regulator_enable(vctrl->ctrl_reg);
- if (!ret)
- vctrl->enabled = true;
+ vctrl->enabled = true;
- return ret;
+ return 0;
}
static int vctrl_disable(struct regulator_dev *rdev)
{
struct vctrl_data *vctrl = rdev_get_drvdata(rdev);
- int ret = regulator_disable(vctrl->ctrl_reg);
- if (!ret)
- vctrl->enabled = false;
+ vctrl->enabled = false;
- return ret;
+ return 0;
}
static int vctrl_is_enabled(struct regulator_dev *rdev)
@@ -447,6 +450,7 @@ static int vctrl_probe(struct platform_device *pdev)
struct regulator_desc *rdesc;
struct regulator_config cfg = { };
struct vctrl_voltage_range *vrange_ctrl;
+ struct regulator *ctrl_reg;
int ctrl_uV;
int ret;
@@ -461,15 +465,20 @@ static int vctrl_probe(struct platform_device *pdev)
if (ret)
return ret;
+ ctrl_reg = devm_regulator_get(&pdev->dev, "ctrl");
+ if (IS_ERR(ctrl_reg))
+ return PTR_ERR(ctrl_reg);
+
vrange_ctrl = &vctrl->vrange.ctrl;
rdesc = &vctrl->desc;
rdesc->name = "vctrl";
rdesc->type = REGULATOR_VOLTAGE;
rdesc->owner = THIS_MODULE;
+ rdesc->supply_name = "ctrl";
- if ((regulator_get_linear_step(vctrl->ctrl_reg) == 1) ||
- (regulator_count_voltages(vctrl->ctrl_reg) == -EINVAL)) {
+ if ((regulator_get_linear_step(ctrl_reg) == 1) ||
+ (regulator_count_voltages(ctrl_reg) == -EINVAL)) {
rdesc->continuous_voltage_range = true;
rdesc->ops = &vctrl_ops_cont;
} else {
@@ -486,11 +495,12 @@ static int vctrl_probe(struct platform_device *pdev)
cfg.init_data = init_data;
if (!rdesc->continuous_voltage_range) {
- ret = vctrl_init_vtable(pdev);
+ ret = vctrl_init_vtable(pdev, ctrl_reg);
if (ret)
return ret;
- ctrl_uV = regulator_get_voltage_rdev(vctrl->ctrl_reg->rdev);
+ /* Use locked consumer API when not in regulator framework */
+ ctrl_uV = regulator_get_voltage(ctrl_reg);
if (ctrl_uV < 0) {
dev_err(&pdev->dev, "failed to get control voltage\n");
return ctrl_uV;
@@ -513,6 +523,9 @@ static int vctrl_probe(struct platform_device *pdev)
}
}
+ /* Drop ctrl-supply here in favor of regulator core managed supply */
+ devm_regulator_put(ctrl_reg);
+
vctrl->rdev = devm_regulator_register(&pdev->dev, rdesc, &cfg);
if (IS_ERR(vctrl->rdev)) {
ret = PTR_ERR(vctrl->rdev);
diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig
index 328f70f633eb..5656cac04b4c 100644
--- a/drivers/reset/Kconfig
+++ b/drivers/reset/Kconfig
@@ -116,7 +116,7 @@ config RESET_LPC18XX
config RESET_MCHP_SPARX5
bool "Microchip Sparx5 reset driver"
- depends on HAS_IOMEM || COMPILE_TEST
+ depends on ARCH_SPARX5 || COMPILE_TEST
default y if SPARX5_SWITCH
select MFD_SYSCON
help
diff --git a/drivers/reset/reset-zynqmp.c b/drivers/reset/reset-zynqmp.c
index daa425e74c96..59dc0ff9af9e 100644
--- a/drivers/reset/reset-zynqmp.c
+++ b/drivers/reset/reset-zynqmp.c
@@ -53,7 +53,8 @@ static int zynqmp_reset_status(struct reset_controller_dev *rcdev,
unsigned long id)
{
struct zynqmp_reset_data *priv = to_zynqmp_reset_data(rcdev);
- int val, err;
+ int err;
+ u32 val;
err = zynqmp_pm_reset_get_status(priv->data->reset_id + id, &val);
if (err)
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index 6bb775236c16..db5987281010 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -552,7 +552,7 @@ static struct dasd_ccw_req *dasd_diag_build_cp(struct dasd_device *memdev,
dbio = dreq->bio;
recid = first_rec;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
for (off = 0; off < bv.bv_len; off += blksize) {
memset(dbio, 0, sizeof (struct dasd_diag_bio));
dbio->type = rw_cmd;
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index fb5d8152652d..460e0f1cca53 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -3276,7 +3276,7 @@ static int dasd_eckd_ese_read(struct dasd_ccw_req *cqr, struct irb *irb)
end_blk = (curr_trk + 1) * recs_per_trk;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
for (off = 0; off < bv.bv_len; off += blksize) {
if (first_blk + blk_count >= end_blk) {
cqr->proc_bytes = blk_count * blksize;
@@ -4008,7 +4008,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single(
last_rec - recid + 1, cmd, basedev, blksize);
}
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
if (dasd_page_cache) {
char *copy = kmem_cache_alloc(dasd_page_cache,
GFP_DMA | __GFP_NOWARN);
@@ -4175,7 +4175,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track(
idaw_dst = NULL;
idaw_len = 0;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
seg_len = bv.bv_len;
while (seg_len) {
if (new_track) {
@@ -4518,7 +4518,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track(
new_track = 1;
recid = first_rec;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
seg_len = bv.bv_len;
while (seg_len) {
if (new_track) {
@@ -4551,7 +4551,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track(
}
} else {
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
last_tidaw = itcw_add_tidaw(itcw, 0x00,
dst, bv.bv_len);
if (IS_ERR(last_tidaw)) {
@@ -4787,7 +4787,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev,
idaws = idal_create_words(idaws, rawpadpage, PAGE_SIZE);
}
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
seg_len = bv.bv_len;
if (cmd == DASD_ECKD_CCW_READ_TRACK)
memset(dst, 0, seg_len);
@@ -4848,7 +4848,7 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req)
if (private->uses_cdl == 0 || recid > 2*blk_per_trk)
ccw++;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
for (off = 0; off < bv.bv_len; off += blksize) {
/* Skip locate record. */
if (private->uses_cdl && recid <= 2*blk_per_trk)
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index 3ad319aee51e..e084f4dedddd 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -501,7 +501,7 @@ static struct dasd_ccw_req *dasd_fba_build_cp_regular(
}
recid = first_rec;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
if (dasd_page_cache) {
char *copy = kmem_cache_alloc(dasd_page_cache,
GFP_DMA | __GFP_NOWARN);
@@ -583,7 +583,7 @@ dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req)
if (private->rdc_data.mode.bits.data_chain != 0)
ccw++;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv.bv_page) + bv.bv_offset;
+ dst = bvec_virt(&bv);
for (off = 0; off < bv.bv_len; off += blksize) {
/* Skip locate record. */
if (private->rdc_data.mode.bits.data_chain == 0)
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 493e8469893c..fa966e0db6ca 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -24,6 +24,8 @@
#include "dasd_int.h"
+static struct lock_class_key dasd_bio_compl_lkclass;
+
/*
* Allocate and register gendisk structure for device.
*/
@@ -38,13 +40,15 @@ int dasd_gendisk_alloc(struct dasd_block *block)
if (base->devindex >= DASD_PER_MAJOR)
return -EBUSY;
- gdp = alloc_disk(1 << DASD_PARTN_BITS);
+ gdp = __alloc_disk_node(block->request_queue, NUMA_NO_NODE,
+ &dasd_bio_compl_lkclass);
if (!gdp)
return -ENOMEM;
/* Initialize gendisk structure. */
gdp->major = DASD_MAJOR;
gdp->first_minor = base->devindex << DASD_PARTN_BITS;
+ gdp->minors = 1 << DASD_PARTN_BITS;
gdp->fops = &dasd_device_operations;
/*
@@ -73,7 +77,6 @@ int dasd_gendisk_alloc(struct dasd_block *block)
test_bit(DASD_FLAG_DEVICE_RO, &base->flags))
set_disk_ro(gdp, 1);
dasd_add_link_to_gendisk(gdp, base);
- gdp->queue = block->request_queue;
block->gdp = gdp;
set_capacity(block->gdp, 0);
device_add_disk(&base->cdev->dev, block->gdp, NULL);
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 9f6424408946..468cbeb539ff 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -575,10 +575,8 @@ int dasd_ioctl(struct block_device *bdev, fmode_t mode,
else
argp = (void __user *)arg;
- if ((_IOC_DIR(cmd) != _IOC_NONE) && !arg) {
- PRINT_DEBUG("empty data ptr");
+ if ((_IOC_DIR(cmd) != _IOC_NONE) && !arg)
return -EINVAL;
- }
base = dasd_device_from_gendisk(bdev->bd_disk);
if (!base)
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 29180bdf0977..5be3d1c39a78 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -892,8 +892,7 @@ dcssblk_submit_bio(struct bio *bio)
index = (bio->bi_iter.bi_sector >> 3);
bio_for_each_segment(bvec, bio, iter) {
- page_addr = (unsigned long)
- page_address(bvec.bv_page) + bvec.bv_offset;
+ page_addr = (unsigned long)bvec_virt(&bvec);
source_addr = dev_info->start + (index<<12) + bytes_done;
if (unlikely((page_addr & 4095) != 0) || (bvec.bv_len & 4095) != 0)
// More paranoia.
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index 792b4bfa6d9a..b4b84e3e0949 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -21,11 +21,30 @@
#include <linux/platform_device.h>
#include <asm/types.h>
#include <asm/irq.h>
+#include <asm/debug.h>
#include "sclp.h"
#define SCLP_HEADER "sclp: "
+struct sclp_trace_entry {
+ char id[4];
+ u32 a;
+ u64 b;
+};
+
+#define SCLP_TRACE_ENTRY_SIZE sizeof(struct sclp_trace_entry)
+#define SCLP_TRACE_MAX_SIZE 128
+#define SCLP_TRACE_EVENT_MAX_SIZE 64
+
+/* Debug trace area intended for all entries in abbreviated form. */
+DEFINE_STATIC_DEBUG_INFO(sclp_debug, "sclp", 8, 1, SCLP_TRACE_ENTRY_SIZE,
+ &debug_hex_ascii_view);
+
+/* Error trace area intended for full entries relating to failed requests. */
+DEFINE_STATIC_DEBUG_INFO(sclp_debug_err, "sclp_err", 4, 1,
+ SCLP_TRACE_ENTRY_SIZE, &debug_hex_ascii_view);
+
/* Lock to protect internal data consistency. */
static DEFINE_SPINLOCK(sclp_lock);
@@ -54,6 +73,114 @@ int sclp_console_drop = 1;
/* Number of times the console dropped buffer pages */
unsigned long sclp_console_full;
+/* The currently active SCLP command word. */
+static sclp_cmdw_t active_cmd;
+
+static inline void sclp_trace(int prio, char *id, u32 a, u64 b, bool err)
+{
+ struct sclp_trace_entry e;
+
+ memset(&e, 0, sizeof(e));
+ strncpy(e.id, id, sizeof(e.id));
+ e.a = a;
+ e.b = b;
+ debug_event(&sclp_debug, prio, &e, sizeof(e));
+ if (err)
+ debug_event(&sclp_debug_err, 0, &e, sizeof(e));
+}
+
+static inline int no_zeroes_len(void *data, int len)
+{
+ char *d = data;
+
+ /* Minimize trace area usage by not tracing trailing zeroes. */
+ while (len > SCLP_TRACE_ENTRY_SIZE && d[len - 1] == 0)
+ len--;
+
+ return len;
+}
+
+static inline void sclp_trace_bin(int prio, void *d, int len, int errlen)
+{
+ debug_event(&sclp_debug, prio, d, no_zeroes_len(d, len));
+ if (errlen)
+ debug_event(&sclp_debug_err, 0, d, no_zeroes_len(d, errlen));
+}
+
+static inline int abbrev_len(sclp_cmdw_t cmd, struct sccb_header *sccb)
+{
+ struct evbuf_header *evbuf = (struct evbuf_header *)(sccb + 1);
+ int len = sccb->length, limit = SCLP_TRACE_MAX_SIZE;
+
+ /* Full SCCB tracing if debug level is set to max. */
+ if (sclp_debug.level == DEBUG_MAX_LEVEL)
+ return len;
+
+ /* Minimal tracing for console writes. */
+ if (cmd == SCLP_CMDW_WRITE_EVENT_DATA &&
+ (evbuf->type == EVTYP_MSG || evbuf->type == EVTYP_VT220MSG))
+ limit = SCLP_TRACE_ENTRY_SIZE;
+
+ return min(len, limit);
+}
+
+static inline void sclp_trace_sccb(int prio, char *id, u32 a, u64 b,
+ sclp_cmdw_t cmd, struct sccb_header *sccb,
+ bool err)
+{
+ sclp_trace(prio, id, a, b, err);
+ if (sccb) {
+ sclp_trace_bin(prio + 1, sccb, abbrev_len(cmd, sccb),
+ err ? sccb->length : 0);
+ }
+}
+
+static inline void sclp_trace_evbuf(int prio, char *id, u32 a, u64 b,
+ struct evbuf_header *evbuf, bool err)
+{
+ sclp_trace(prio, id, a, b, err);
+ sclp_trace_bin(prio + 1, evbuf,
+ min((int)evbuf->length, (int)SCLP_TRACE_EVENT_MAX_SIZE),
+ err ? evbuf->length : 0);
+}
+
+static inline void sclp_trace_req(int prio, char *id, struct sclp_req *req,
+ bool err)
+{
+ struct sccb_header *sccb = req->sccb;
+ union {
+ struct {
+ u16 status;
+ u16 response;
+ u16 timeout;
+ u16 start_count;
+ };
+ u64 b;
+ } summary;
+
+ summary.status = req->status;
+ summary.response = sccb ? sccb->response_code : 0;
+ summary.timeout = (u16)req->queue_timeout;
+ summary.start_count = (u16)req->start_count;
+
+ sclp_trace(prio, id, (u32)(addr_t)sccb, summary.b, err);
+}
+
+static inline void sclp_trace_register(int prio, char *id, u32 a, u64 b,
+ struct sclp_register *reg)
+{
+ struct {
+ u64 receive;
+ u64 send;
+ } d;
+
+ d.receive = reg->receive_mask;
+ d.send = reg->send_mask;
+
+ sclp_trace(prio, id, a, b, false);
+ sclp_trace_bin(prio, &d, sizeof(d), 0);
+}
+
static int __init sclp_setup_console_pages(char *str)
{
int pages, rc;
@@ -162,6 +289,9 @@ static void sclp_request_timeout(bool force_restart)
{
unsigned long flags;
+ /* TMO: A timeout occurred (a=force_restart) */
+ sclp_trace(2, "TMO", force_restart, 0, true);
+
spin_lock_irqsave(&sclp_lock, flags);
if (force_restart) {
if (sclp_running_state == sclp_running_state_running) {
@@ -237,6 +367,12 @@ static void sclp_req_queue_timeout(struct timer_list *unused)
do {
req = __sclp_req_queue_remove_expired_req();
+
+ if (req) {
+ /* RQTM: Request timed out (a=sccb, b=summary) */
+ sclp_trace_req(2, "RQTM", req, true);
+ }
+
if (req && req->callback)
req->callback(req, req->callback_data);
} while (req);
@@ -248,6 +384,25 @@ static void sclp_req_queue_timeout(struct timer_list *unused)
spin_unlock_irqrestore(&sclp_lock, flags);
}
+static int sclp_service_call_trace(sclp_cmdw_t command, void *sccb)
+{
+ static u64 srvc_count;
+ int rc;
+
+ /* SRV1: Service call about to be issued (a=command, b=sccb address) */
+ sclp_trace_sccb(0, "SRV1", command, (u64)sccb, command, sccb, false);
+
+ rc = sclp_service_call(command, sccb);
+
+ /* SRV2: Service call was issued (a=rc, b=SRVC sequence number) */
+ sclp_trace(0, "SRV2", -rc, ++srvc_count, rc != 0);
+
+ if (rc == 0)
+ active_cmd = command;
+
+ return rc;
+}
+
/* Try to start a request. Return zero if the request was successfully
* started or if it will be started at a later time. Return non-zero otherwise.
* Called while sclp_lock is locked. */
@@ -259,7 +414,7 @@ __sclp_start_request(struct sclp_req *req)
if (sclp_running_state != sclp_running_state_idle)
return 0;
del_timer(&sclp_request_timer);
- rc = sclp_service_call(req->command, req->sccb);
+ rc = sclp_service_call_trace(req->command, req->sccb);
req->start_count++;
if (rc == 0) {
@@ -309,6 +464,10 @@ sclp_process_queue(void)
}
/* Post-processing for aborted request */
list_del(&req->list);
+
+ /* RQAB: Request aborted (a=sccb, b=summary) */
+ sclp_trace_req(2, "RQAB", req, true);
+
if (req->callback) {
spin_unlock_irqrestore(&sclp_lock, flags);
req->callback(req, req->callback_data);
@@ -341,6 +500,10 @@ sclp_add_request(struct sclp_req *req)
spin_unlock_irqrestore(&sclp_lock, flags);
return -EIO;
}
+
+ /* RQAD: Request was added (a=sccb, b=caller) */
+ sclp_trace(2, "RQAD", (u32)(addr_t)req->sccb, _RET_IP_, false);
+
req->status = SCLP_REQ_QUEUED;
req->start_count = 0;
list_add_tail(&req->list, &sclp_req_queue);
@@ -394,6 +557,11 @@ sclp_dispatch_evbufs(struct sccb_header *sccb)
else
reg = NULL;
}
+
+ /* EVNT: Event callback (b=receiver) */
+ sclp_trace_evbuf(2, "EVNT", 0, reg ? (u64)reg->receiver_fn : 0,
+ evbuf, !reg);
+
if (reg && reg->receiver_fn) {
spin_unlock_irqrestore(&sclp_lock, flags);
reg->receiver_fn(evbuf);
@@ -455,6 +623,30 @@ __sclp_find_req(u32 sccb)
return NULL;
}
+static bool ok_response(u32 sccb_int, sclp_cmdw_t cmd)
+{
+ struct sccb_header *sccb = (struct sccb_header *)(addr_t)sccb_int;
+ struct evbuf_header *evbuf;
+ u16 response;
+
+ if (!sccb)
+ return true;
+
+ /* Check SCCB response. */
+ response = sccb->response_code & 0xff;
+ if (response != 0x10 && response != 0x20)
+ return false;
+
+ /* Check event-processed flag on outgoing events. */
+ if (cmd == SCLP_CMDW_WRITE_EVENT_DATA) {
+ evbuf = (struct evbuf_header *)(sccb + 1);
+ if (!(evbuf->flags & 0x80))
+ return false;
+ }
+
+ return true;
+}
+
/* Handler for external interruption. Perform request post-processing.
* Prepare read event data request if necessary. Start processing of next
* request on queue. */
@@ -469,6 +661,12 @@ static void sclp_interrupt_handler(struct ext_code ext_code,
spin_lock(&sclp_lock);
finished_sccb = param32 & 0xfffffff8;
evbuf_pending = param32 & 0x3;
+
+ /* INT: Interrupt received (a=intparm, b=cmd) */
+ sclp_trace_sccb(0, "INT", param32, active_cmd, active_cmd,
+ (struct sccb_header *)(addr_t)finished_sccb,
+ !ok_response(finished_sccb, active_cmd));
+
if (finished_sccb) {
del_timer(&sclp_request_timer);
sclp_running_state = sclp_running_state_reset_pending;
@@ -477,13 +675,21 @@ static void sclp_interrupt_handler(struct ext_code ext_code,
/* Request post-processing */
list_del(&req->list);
req->status = SCLP_REQ_DONE;
+
+ /* RQOK: Request success (a=sccb, b=summary) */
+ sclp_trace_req(2, "RQOK", req, false);
+
if (req->callback) {
spin_unlock(&sclp_lock);
req->callback(req, req->callback_data);
spin_lock(&sclp_lock);
}
+ } else {
+ /* UNEX: Unexpected SCCB completion (a=sccb address) */
+ sclp_trace(0, "UNEX", finished_sccb, 0, true);
}
sclp_running_state = sclp_running_state_idle;
+ active_cmd = 0;
}
if (evbuf_pending &&
sclp_activation_state == sclp_activation_state_active)
@@ -507,9 +713,13 @@ sclp_sync_wait(void)
unsigned long long old_tick;
unsigned long flags;
unsigned long cr0, cr0_sync;
+ static u64 sync_count;
u64 timeout;
int irq_context;
+ /* SYN1: Synchronous wait start (a=runstate, b=sync count) */
+ sclp_trace(4, "SYN1", sclp_running_state, ++sync_count, false);
+
/* We'll be disabling timer interrupts, so we need a custom timeout
* mechanism */
timeout = 0;
@@ -547,6 +757,9 @@ sclp_sync_wait(void)
_local_bh_enable();
local_tick_enable(old_tick);
local_irq_restore(flags);
+
+ /* SYN2: Synchronous wait end (a=runstate, b=sync_count) */
+ sclp_trace(4, "SYN2", sclp_running_state, sync_count, false);
}
EXPORT_SYMBOL(sclp_sync_wait);
@@ -576,8 +789,13 @@ sclp_dispatch_state_change(void)
reg = NULL;
}
spin_unlock_irqrestore(&sclp_lock, flags);
- if (reg && reg->state_change_fn)
+ if (reg && reg->state_change_fn) {
+ /* STCG: State-change callback (b=callback) */
+ sclp_trace(2, "STCG", 0, (u64)reg->state_change_fn,
+ false);
+
reg->state_change_fn(reg);
+ }
} while (reg);
}
@@ -651,6 +869,9 @@ sclp_register(struct sclp_register *reg)
sccb_mask_t send_mask;
int rc;
+ /* REG: Event listener registered (b=caller) */
+ sclp_trace_register(2, "REG", 0, _RET_IP_, reg);
+
rc = sclp_init();
if (rc)
return rc;
@@ -683,6 +904,9 @@ sclp_unregister(struct sclp_register *reg)
{
unsigned long flags;
+ /* UREG: Event listener unregistered (b=caller) */
+ sclp_trace_register(2, "UREG", 0, _RET_IP_, reg);
+
spin_lock_irqsave(&sclp_lock, flags);
list_del(&reg->list);
spin_unlock_irqrestore(&sclp_lock, flags);
@@ -932,7 +1156,7 @@ sclp_check_interface(void)
for (retry = 0; retry <= SCLP_INIT_RETRY; retry++) {
__sclp_make_init_req(0, 0);
sccb = (struct init_sccb *) sclp_init_req.sccb;
- rc = sclp_service_call(sclp_init_req.command, sccb);
+ rc = sclp_service_call_trace(sclp_init_req.command, sccb);
if (rc == -EIO)
break;
sclp_init_req.status = SCLP_REQ_RUNNING;
diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h
index 8dd8ad83b78b..5e434108aae6 100644
--- a/drivers/s390/char/sclp.h
+++ b/drivers/s390/char/sclp.h
@@ -310,8 +310,6 @@ extern int sclp_console_drop;
extern unsigned long sclp_console_full;
extern bool sclp_mask_compat_mode;
-extern char *sclp_early_sccb;
-
void sclp_early_wait_irq(void);
int sclp_early_cmd(sclp_cmdw_t cmd, void *sccb);
unsigned int sclp_early_con_check_linemode(struct init_sccb *sccb);
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index ab0518cfdcfe..998933e83610 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -457,7 +457,7 @@ static int __init sclp_detect_standby_memory(void)
struct read_storage_sccb *sccb;
int i, id, assigned, rc;
- if (OLDMEM_BASE) /* No standby memory in kdump mode */
+ if (oldmem_data.start) /* No standby memory in kdump mode */
return 0;
if ((sclp.facilities & 0xe00000000000ULL) != 0xe00000000000ULL)
return 0;
diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c
index 039b2074db7e..c365110f2dae 100644
--- a/drivers/s390/char/sclp_config.c
+++ b/drivers/s390/char/sclp_config.c
@@ -50,12 +50,12 @@ static void sclp_cpu_capability_notify(struct work_struct *work)
s390_update_cpu_mhz();
pr_info("CPU capability may have changed\n");
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu) {
dev = get_cpu_device(cpu);
kobject_uevent(&dev->kobj, KOBJ_CHANGE);
}
- put_online_cpus();
+ cpus_read_unlock();
}
static void __ref sclp_cpu_change_notify(struct work_struct *work)
diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c
index b7329af076a0..676634de65a8 100644
--- a/drivers/s390/char/sclp_early_core.c
+++ b/drivers/s390/char/sclp_early_core.c
@@ -17,7 +17,7 @@
static struct read_info_sccb __bootdata(sclp_info_sccb);
static int __bootdata(sclp_info_sccb_valid);
-char *sclp_early_sccb = (char *) EARLY_SCCB_OFFSET;
+char *__bootdata(sclp_early_sccb);
int sclp_init_state = sclp_init_state_uninitialized;
/*
* Used to keep track of the size of the event masks. Qemu until version 2.11
@@ -211,6 +211,11 @@ static int sclp_early_setup(int disable, int *have_linemode, int *have_vt220)
return rc;
}
+void sclp_early_set_buffer(void *sccb)
+{
+ sclp_early_sccb = sccb;
+}
+
/*
* Output one or more lines of text on the SCLP console (VT220 and /
* or line-mode).
@@ -235,11 +240,20 @@ void sclp_early_printk(const char *str)
__sclp_early_printk(str, strlen(str));
}
+/*
+ * We can't pass sclp_info_sccb to sclp_early_cmd() here directly,
+ * because it might not fulfil the requiremets for a SCLP communication buffer:
+ * - lie below 2G in memory
+ * - be page-aligned
+ * Therefore, we use the buffer sclp_early_sccb (which fulfils all those
+ * requirements) temporarily for communication and copy a received response
+ * back into the buffer sclp_info_sccb upon successful completion.
+ */
int __init sclp_early_read_info(void)
{
int i;
int length = test_facility(140) ? EXT_SCCB_READ_SCP : PAGE_SIZE;
- struct read_info_sccb *sccb = &sclp_info_sccb;
+ struct read_info_sccb *sccb = (struct read_info_sccb *)sclp_early_sccb;
sclp_cmdw_t commands[] = {SCLP_CMDW_READ_SCP_INFO_FORCED,
SCLP_CMDW_READ_SCP_INFO};
@@ -251,6 +265,7 @@ int __init sclp_early_read_info(void)
if (sclp_early_cmd(commands[i], sccb))
break;
if (sccb->header.response_code == 0x10) {
+ memcpy(&sclp_info_sccb, sccb, length);
sclp_info_sccb_valid = 1;
return 0;
}
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index b5b0848da93b..3ba2d934a3e8 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -269,7 +269,7 @@ static int __init zcore_init(void)
if (!is_ipl_type_dump())
return -ENODATA;
- if (OLDMEM_BASE)
+ if (oldmem_data.start)
return -ENODATA;
zcore_dbf = debug_register("zcore", 4, 1, 4 * sizeof(long));
diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index 382c5b5f8cd3..2ec741106cb6 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -500,28 +500,6 @@ void ccwgroup_driver_unregister(struct ccwgroup_driver *cdriver)
EXPORT_SYMBOL(ccwgroup_driver_unregister);
/**
- * get_ccwgroupdev_by_busid() - obtain device from a bus id
- * @gdrv: driver the device is owned by
- * @bus_id: bus id of the device to be searched
- *
- * This function searches all devices owned by @gdrv for a device with a bus
- * id matching @bus_id.
- * Returns:
- * If a match is found, its reference count of the found device is increased
- * and it is returned; else %NULL is returned.
- */
-struct ccwgroup_device *get_ccwgroupdev_by_busid(struct ccwgroup_driver *gdrv,
- char *bus_id)
-{
- struct device *dev;
-
- dev = driver_find_device_by_name(&gdrv->driver, bus_id);
-
- return dev ? to_ccwgroupdev(dev) : NULL;
-}
-EXPORT_SYMBOL_GPL(get_ccwgroupdev_by_busid);
-
-/**
* ccwgroup_probe_ccwdev() - probe function for slave devices
* @cdev: ccw device to be probed
*
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index ebc321edba51..3377097e65de 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -430,9 +430,26 @@ static ssize_t pimpampom_show(struct device *dev,
}
static DEVICE_ATTR_RO(pimpampom);
+static ssize_t dev_busid_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct subchannel *sch = to_subchannel(dev);
+ struct pmcw *pmcw = &sch->schib.pmcw;
+
+ if ((pmcw->st == SUBCHANNEL_TYPE_IO ||
+ pmcw->st == SUBCHANNEL_TYPE_MSG) && pmcw->dnv)
+ return sysfs_emit(buf, "0.%x.%04x\n", sch->schid.ssid,
+ pmcw->dev);
+ else
+ return sysfs_emit(buf, "none\n");
+}
+static DEVICE_ATTR_RO(dev_busid);
+
static struct attribute *io_subchannel_type_attrs[] = {
&dev_attr_chpids.attr,
&dev_attr_pimpampom.attr,
+ &dev_attr_dev_busid.attr,
NULL,
};
ATTRIBUTE_GROUPS(io_subchannel_type);
@@ -886,6 +903,18 @@ static ssize_t real_cssid_show(struct device *dev, struct device_attribute *a,
}
static DEVICE_ATTR_RO(real_cssid);
+static ssize_t rescan_store(struct device *dev, struct device_attribute *a,
+ const char *buf, size_t count)
+{
+ CIO_TRACE_EVENT(4, "usr-rescan");
+
+ css_schedule_eval_all();
+ css_complete_work();
+
+ return count;
+}
+static DEVICE_ATTR_WO(rescan);
+
static ssize_t cm_enable_show(struct device *dev, struct device_attribute *a,
char *buf)
{
@@ -932,6 +961,7 @@ static umode_t cm_enable_mode(struct kobject *kobj, struct attribute *attr,
static struct attribute *cssdev_attrs[] = {
&dev_attr_real_cssid.attr,
+ &dev_attr_rescan.attr,
NULL,
};
diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
index f69ffbb8edc9..99c2212dc6a6 100644
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -126,21 +126,9 @@ static inline int do_eqbs(u64 token, unsigned char *state, int queue,
struct qdio_irq;
-struct siga_flag {
- u8 input:1;
- u8 output:1;
- u8 sync:1;
- u8 sync_after_ai:1;
- u8 sync_out_after_pci:1;
- u8:3;
-} __attribute__ ((packed));
-
struct qdio_dev_perf_stat {
unsigned int adapter_int;
unsigned int qdio_int;
- unsigned int pci_request_int;
-
- unsigned int tasklet_outbound;
unsigned int siga_read;
unsigned int siga_write;
@@ -150,7 +138,6 @@ struct qdio_dev_perf_stat {
unsigned int stop_polling;
unsigned int inbound_queue_full;
unsigned int outbound_call;
- unsigned int outbound_handler;
unsigned int outbound_queue_full;
unsigned int fast_requeue;
unsigned int target_full;
@@ -180,12 +167,6 @@ struct qdio_input_q {
};
struct qdio_output_q {
- /* PCIs are enabled for the queue */
- int pci_out_enabled;
- /* timer to check for more outbound work */
- struct timer_list timer;
- /* tasklet to check for completions */
- struct tasklet_struct tasklet;
};
/*
@@ -250,8 +231,7 @@ struct qdio_irq {
unsigned long sch_token; /* QEBSM facility */
enum qdio_irq_states state;
-
- struct siga_flag siga_flag; /* siga sync information from qdioac */
+ u8 qdioac1;
int nr_input_qs;
int nr_output_qs;
@@ -263,7 +243,6 @@ struct qdio_irq {
struct qdio_ssqd_desc ssqd_desc;
void (*orig_handler) (struct ccw_device *, unsigned long, struct irb *);
- unsigned int scan_threshold; /* used SBALs before tasklet schedule */
int perf_stat_enabled;
struct qdr *qdr;
@@ -325,13 +304,9 @@ static inline void qdio_deliver_irq(struct qdio_irq *irq)
#define pci_out_supported(irq) ((irq)->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED)
#define is_qebsm(q) (q->irq_ptr->sch_token != 0)
-#define need_siga_in(q) (q->irq_ptr->siga_flag.input)
-#define need_siga_out(q) (q->irq_ptr->siga_flag.output)
-#define need_siga_sync(q) (unlikely(q->irq_ptr->siga_flag.sync))
-#define need_siga_sync_after_ai(q) \
- (unlikely(q->irq_ptr->siga_flag.sync_after_ai))
-#define need_siga_sync_out_after_pci(q) \
- (unlikely(q->irq_ptr->siga_flag.sync_out_after_pci))
+#define qdio_need_siga_in(irq) ((irq)->qdioac1 & AC1_SIGA_INPUT_NEEDED)
+#define qdio_need_siga_out(irq) ((irq)->qdioac1 & AC1_SIGA_OUTPUT_NEEDED)
+#define qdio_need_siga_sync(irq) (unlikely((irq)->qdioac1 & AC1_SIGA_SYNC_NEEDED))
#define for_each_input_queue(irq_ptr, q, i) \
for (i = 0; i < irq_ptr->nr_input_qs && \
@@ -345,11 +320,6 @@ static inline void qdio_deliver_irq(struct qdio_irq *irq)
#define sub_buf(bufnr, dec) QDIO_BUFNR((bufnr) - (dec))
#define prev_buf(bufnr) sub_buf(bufnr, 1)
-#define queue_irqs_enabled(q) \
- (test_bit(QDIO_QUEUE_IRQS_DISABLED, &q->u.in.queue_irq_state) == 0)
-#define queue_irqs_disabled(q) \
- (test_bit(QDIO_QUEUE_IRQS_DISABLED, &q->u.in.queue_irq_state) != 0)
-
extern u64 last_ai_time;
/* prototypes for thin interrupt */
@@ -360,8 +330,6 @@ void qdio_thinint_exit(void);
int test_nonshared_ind(struct qdio_irq *);
/* prototypes for setup */
-void qdio_outbound_tasklet(struct tasklet_struct *t);
-void qdio_outbound_timer(struct timer_list *t);
void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
struct irb *irb);
int qdio_allocate_qs(struct qdio_irq *irq_ptr, int nr_input_qs,
diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c
index 00384f58f218..4bb7965daa0f 100644
--- a/drivers/s390/cio/qdio_debug.c
+++ b/drivers/s390/cio/qdio_debug.c
@@ -197,8 +197,6 @@ DEFINE_SHOW_ATTRIBUTE(ssqd);
static char *qperf_names[] = {
"Assumed adapter interrupts",
"QDIO interrupts",
- "Requested PCIs",
- "Outbound tasklet runs",
"SIGA read",
"SIGA write",
"SIGA sync",
@@ -206,7 +204,6 @@ static char *qperf_names[] = {
"Inbound stop_polling",
"Inbound queue full",
"Outbound calls",
- "Outbound handler",
"Outbound queue full",
"Outbound fast_requeue",
"Outbound target_full",
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 3052fab00597..45e810c6ea3b 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -10,7 +10,6 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
-#include <linux/timer.h>
#include <linux/delay.h>
#include <linux/gfp.h>
#include <linux/io.h>
@@ -304,12 +303,22 @@ static inline int qdio_siga_sync(struct qdio_q *q, unsigned int output,
return (cc) ? -EIO : 0;
}
+static inline int qdio_sync_input_queue(struct qdio_q *q)
+{
+ return qdio_siga_sync(q, 0, q->mask);
+}
+
+static inline int qdio_sync_output_queue(struct qdio_q *q)
+{
+ return qdio_siga_sync(q, q->mask, 0);
+}
+
static inline int qdio_siga_sync_q(struct qdio_q *q)
{
if (q->is_input_q)
- return qdio_siga_sync(q, 0, q->mask);
+ return qdio_sync_input_queue(q);
else
- return qdio_siga_sync(q, q->mask, 0);
+ return qdio_sync_output_queue(q);
}
static int qdio_siga_output(struct qdio_q *q, unsigned int count,
@@ -373,22 +382,10 @@ static inline int qdio_siga_input(struct qdio_q *q)
return (cc) ? -EIO : 0;
}
-#define qdio_siga_sync_out(q) qdio_siga_sync(q, ~0U, 0)
-#define qdio_siga_sync_all(q) qdio_siga_sync(q, ~0U, ~0U)
-
-static inline void qdio_sync_queues(struct qdio_q *q)
-{
- /* PCI capable outbound queues will also be scanned so sync them too */
- if (pci_out_supported(q->irq_ptr))
- qdio_siga_sync_all(q);
- else
- qdio_siga_sync_q(q);
-}
-
int debug_get_buf_state(struct qdio_q *q, unsigned int bufnr,
unsigned char *state)
{
- if (need_siga_sync(q))
+ if (qdio_need_siga_sync(q->irq_ptr))
qdio_siga_sync_q(q);
return get_buf_state(q, bufnr, state, 0);
}
@@ -455,10 +452,9 @@ static int get_inbound_buffer_frontier(struct qdio_q *q, unsigned int start,
if (!count)
return 0;
- /*
- * No siga sync here, as a PCI or we after a thin interrupt
- * already sync'ed the queues.
- */
+ if (qdio_need_siga_sync(q->irq_ptr))
+ qdio_sync_input_queue(q);
+
count = get_buf_states(q, start, &state, count, 1);
if (!count)
return 0;
@@ -510,8 +506,8 @@ static inline int qdio_inbound_q_done(struct qdio_q *q, unsigned int start)
if (!atomic_read(&q->nr_buf_used))
return 1;
- if (need_siga_sync(q))
- qdio_siga_sync_q(q);
+ if (qdio_need_siga_sync(q->irq_ptr))
+ qdio_sync_input_queue(q);
get_buf_state(q, start, &state, 0);
if (state == SLSB_P_INPUT_PRIMED || state == SLSB_P_INPUT_ERROR)
@@ -521,15 +517,6 @@ static inline int qdio_inbound_q_done(struct qdio_q *q, unsigned int start)
return 1;
}
-static inline int qdio_tasklet_schedule(struct qdio_q *q)
-{
- if (likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE)) {
- tasklet_schedule(&q->u.out.tasklet);
- return 0;
- }
- return -EPERM;
-}
-
static int get_outbound_buffer_frontier(struct qdio_q *q, unsigned int start,
unsigned int *error)
{
@@ -538,17 +525,13 @@ static int get_outbound_buffer_frontier(struct qdio_q *q, unsigned int start,
q->timestamp = get_tod_clock_fast();
- if (need_siga_sync(q))
- if (((queue_type(q) != QDIO_IQDIO_QFMT) &&
- !pci_out_supported(q->irq_ptr)) ||
- (queue_type(q) == QDIO_IQDIO_QFMT &&
- multicast_outbound(q)))
- qdio_siga_sync_q(q);
-
count = atomic_read(&q->nr_buf_used);
if (!count)
return 0;
+ if (qdio_need_siga_sync(q->irq_ptr))
+ qdio_sync_output_queue(q);
+
count = get_buf_states(q, start, &state, count, 0);
if (!count)
return 0;
@@ -595,19 +578,13 @@ static int get_outbound_buffer_frontier(struct qdio_q *q, unsigned int start,
}
}
-/* all buffers processed? */
-static inline int qdio_outbound_q_done(struct qdio_q *q)
-{
- return atomic_read(&q->nr_buf_used) == 0;
-}
-
static int qdio_kick_outbound_q(struct qdio_q *q, unsigned int count,
unsigned long aob)
{
int retries = 0, cc;
unsigned int busy_bit;
- if (!need_siga_out(q))
+ if (!qdio_need_siga_out(q->irq_ptr))
return 0;
DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "siga-w:%1d", q->nr);
@@ -644,75 +621,6 @@ retry:
return cc;
}
-void qdio_outbound_tasklet(struct tasklet_struct *t)
-{
- struct qdio_output_q *out_q = from_tasklet(out_q, t, tasklet);
- struct qdio_q *q = container_of(out_q, struct qdio_q, u.out);
- unsigned int start = q->first_to_check;
- unsigned int error = 0;
- int count;
-
- qperf_inc(q, tasklet_outbound);
- WARN_ON_ONCE(atomic_read(&q->nr_buf_used) < 0);
-
- count = get_outbound_buffer_frontier(q, start, &error);
- if (count) {
- q->first_to_check = add_buf(start, count);
-
- if (q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE) {
- qperf_inc(q, outbound_handler);
- DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "koh: s:%02x c:%02x",
- start, count);
-
- q->handler(q->irq_ptr->cdev, error, q->nr, start,
- count, q->irq_ptr->int_parm);
- }
- }
-
- if (queue_type(q) == QDIO_ZFCP_QFMT && !pci_out_supported(q->irq_ptr) &&
- !qdio_outbound_q_done(q))
- goto sched;
-
- if (q->u.out.pci_out_enabled)
- return;
-
- /*
- * Now we know that queue type is either qeth without pci enabled
- * or HiperSockets. Make sure buffer switch from PRIMED to EMPTY
- * is noticed and outbound_handler is called after some time.
- */
- if (qdio_outbound_q_done(q))
- del_timer_sync(&q->u.out.timer);
- else
- if (!timer_pending(&q->u.out.timer) &&
- likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE))
- mod_timer(&q->u.out.timer, jiffies + 10 * HZ);
- return;
-
-sched:
- qdio_tasklet_schedule(q);
-}
-
-void qdio_outbound_timer(struct timer_list *t)
-{
- struct qdio_q *q = from_timer(q, t, u.out.timer);
-
- qdio_tasklet_schedule(q);
-}
-
-static inline void qdio_check_outbound_pci_queues(struct qdio_irq *irq)
-{
- struct qdio_q *out;
- int i;
-
- if (!pci_out_supported(irq) || !irq->scan_threshold)
- return;
-
- for_each_output_queue(irq, out, i)
- if (!qdio_outbound_q_done(out))
- qdio_tasklet_schedule(out);
-}
-
static inline void qdio_set_state(struct qdio_irq *irq_ptr,
enum qdio_irq_states state)
{
@@ -734,25 +642,11 @@ static void qdio_irq_check_sense(struct qdio_irq *irq_ptr, struct irb *irb)
/* PCI interrupt handler */
static void qdio_int_handler_pci(struct qdio_irq *irq_ptr)
{
- int i;
- struct qdio_q *q;
-
if (unlikely(irq_ptr->state != QDIO_IRQ_STATE_ACTIVE))
return;
qdio_deliver_irq(irq_ptr);
irq_ptr->last_data_irq_time = S390_lowcore.int_clock;
-
- if (!pci_out_supported(irq_ptr) || !irq_ptr->scan_threshold)
- return;
-
- for_each_output_queue(irq_ptr, q, i) {
- if (qdio_outbound_q_done(q))
- continue;
- if (need_siga_sync(q) && need_siga_sync_out_after_pci(q))
- qdio_siga_sync_q(q);
- qdio_tasklet_schedule(q);
- }
}
static void qdio_handle_activate_check(struct qdio_irq *irq_ptr,
@@ -879,15 +773,34 @@ int qdio_get_ssqd_desc(struct ccw_device *cdev,
}
EXPORT_SYMBOL_GPL(qdio_get_ssqd_desc);
-static void qdio_shutdown_queues(struct qdio_irq *irq_ptr)
+static int qdio_cancel_ccw(struct qdio_irq *irq, int how)
{
- struct qdio_q *q;
- int i;
+ struct ccw_device *cdev = irq->cdev;
+ long timeout;
+ int rc;
- for_each_output_queue(irq_ptr, q, i) {
- del_timer_sync(&q->u.out.timer);
- tasklet_kill(&q->u.out.tasklet);
+ spin_lock_irq(get_ccwdev_lock(cdev));
+ qdio_set_state(irq, QDIO_IRQ_STATE_CLEANUP);
+ if (how & QDIO_FLAG_CLEANUP_USING_CLEAR)
+ rc = ccw_device_clear(cdev, QDIO_DOING_CLEANUP);
+ else
+ /* default behaviour is halt */
+ rc = ccw_device_halt(cdev, QDIO_DOING_CLEANUP);
+ spin_unlock_irq(get_ccwdev_lock(cdev));
+ if (rc) {
+ DBF_ERROR("%4x SHUTD ERR", irq->schid.sch_no);
+ DBF_ERROR("rc:%4d", rc);
+ return rc;
}
+
+ timeout = wait_event_interruptible_timeout(cdev->private->wait_q,
+ irq->state == QDIO_IRQ_STATE_INACTIVE ||
+ irq->state == QDIO_IRQ_STATE_ERR,
+ 10 * HZ);
+ if (timeout <= 0)
+ rc = (timeout == -ERESTARTSYS) ? -EINTR : -ETIME;
+
+ return rc;
}
/**
@@ -919,35 +832,13 @@ int qdio_shutdown(struct ccw_device *cdev, int how)
}
/*
- * Indicate that the device is going down. Scheduling the queue
- * tasklets is forbidden from here on.
+ * Indicate that the device is going down.
*/
qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED);
- qdio_shutdown_queues(irq_ptr);
qdio_shutdown_debug_entries(irq_ptr);
- /* cleanup subchannel */
- spin_lock_irq(get_ccwdev_lock(cdev));
- qdio_set_state(irq_ptr, QDIO_IRQ_STATE_CLEANUP);
- if (how & QDIO_FLAG_CLEANUP_USING_CLEAR)
- rc = ccw_device_clear(cdev, QDIO_DOING_CLEANUP);
- else
- /* default behaviour is halt */
- rc = ccw_device_halt(cdev, QDIO_DOING_CLEANUP);
- spin_unlock_irq(get_ccwdev_lock(cdev));
- if (rc) {
- DBF_ERROR("%4x SHUTD ERR", irq_ptr->schid.sch_no);
- DBF_ERROR("rc:%4d", rc);
- goto no_cleanup;
- }
-
- wait_event_interruptible_timeout(cdev->private->wait_q,
- irq_ptr->state == QDIO_IRQ_STATE_INACTIVE ||
- irq_ptr->state == QDIO_IRQ_STATE_ERR,
- 10 * HZ);
-
-no_cleanup:
+ rc = qdio_cancel_ccw(irq_ptr, how);
qdio_shutdown_thinint(irq_ptr);
qdio_shutdown_irq(irq_ptr);
@@ -1061,8 +952,6 @@ static void qdio_trace_init_data(struct qdio_irq *irq,
DBF_DEV_EVENT(DBF_ERR, irq, "qfmt:%1u", data->q_format);
DBF_DEV_EVENT(DBF_ERR, irq, "qpff%4x", data->qib_param_field_format);
DBF_DEV_HEX(irq, &data->qib_param_field, sizeof(void *), DBF_ERR);
- DBF_DEV_HEX(irq, &data->input_slib_elements, sizeof(void *), DBF_ERR);
- DBF_DEV_HEX(irq, &data->output_slib_elements, sizeof(void *), DBF_ERR);
DBF_DEV_EVENT(DBF_ERR, irq, "niq:%1u noq:%1u", data->no_input_qs,
data->no_output_qs);
DBF_DEV_HEX(irq, &data->input_handler, sizeof(void *), DBF_ERR);
@@ -1083,6 +972,7 @@ int qdio_establish(struct ccw_device *cdev,
{
struct qdio_irq *irq_ptr = cdev->private->qdio_data;
struct subchannel_id schid;
+ long timeout;
int rc;
ccw_device_get_schid(cdev, &schid);
@@ -1111,17 +1001,14 @@ int qdio_establish(struct ccw_device *cdev,
qdio_setup_irq(irq_ptr, init_data);
rc = qdio_establish_thinint(irq_ptr);
- if (rc) {
- qdio_shutdown_irq(irq_ptr);
- mutex_unlock(&irq_ptr->setup_mutex);
- return rc;
- }
+ if (rc)
+ goto err_thinint;
/* establish q */
irq_ptr->ccw.cmd_code = irq_ptr->equeue.cmd;
irq_ptr->ccw.flags = CCW_FLAG_SLI;
irq_ptr->ccw.count = irq_ptr->equeue.count;
- irq_ptr->ccw.cda = (u32)((addr_t)irq_ptr->qdr);
+ irq_ptr->ccw.cda = (u32) virt_to_phys(irq_ptr->qdr);
spin_lock_irq(get_ccwdev_lock(cdev));
ccw_device_set_options_mask(cdev, 0);
@@ -1131,20 +1018,20 @@ int qdio_establish(struct ccw_device *cdev,
if (rc) {
DBF_ERROR("%4x est IO ERR", irq_ptr->schid.sch_no);
DBF_ERROR("rc:%4x", rc);
- qdio_shutdown_thinint(irq_ptr);
- qdio_shutdown_irq(irq_ptr);
- mutex_unlock(&irq_ptr->setup_mutex);
- return rc;
+ goto err_ccw_start;
}
- wait_event_interruptible_timeout(cdev->private->wait_q,
- irq_ptr->state == QDIO_IRQ_STATE_ESTABLISHED ||
- irq_ptr->state == QDIO_IRQ_STATE_ERR, HZ);
+ timeout = wait_event_interruptible_timeout(cdev->private->wait_q,
+ irq_ptr->state == QDIO_IRQ_STATE_ESTABLISHED ||
+ irq_ptr->state == QDIO_IRQ_STATE_ERR, HZ);
+ if (timeout <= 0) {
+ rc = (timeout == -ERESTARTSYS) ? -EINTR : -ETIME;
+ goto err_ccw_timeout;
+ }
if (irq_ptr->state != QDIO_IRQ_STATE_ESTABLISHED) {
- mutex_unlock(&irq_ptr->setup_mutex);
- qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR);
- return -EIO;
+ rc = -EIO;
+ goto err_ccw_error;
}
qdio_setup_ssqd_info(irq_ptr);
@@ -1156,6 +1043,17 @@ int qdio_establish(struct ccw_device *cdev,
qdio_print_subchannel_info(irq_ptr);
qdio_setup_debug_entries(irq_ptr);
return 0;
+
+err_ccw_timeout:
+ qdio_cancel_ccw(irq_ptr, QDIO_FLAG_CLEANUP_USING_CLEAR);
+err_ccw_error:
+err_ccw_start:
+ qdio_shutdown_thinint(irq_ptr);
+err_thinint:
+ qdio_shutdown_irq(irq_ptr);
+ qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
+ mutex_unlock(&irq_ptr->setup_mutex);
+ return rc;
}
EXPORT_SYMBOL_GPL(qdio_establish);
@@ -1219,12 +1117,10 @@ EXPORT_SYMBOL_GPL(qdio_activate);
/**
* handle_inbound - reset processed input buffers
* @q: queue containing the buffers
- * @callflags: flags
* @bufnr: first buffer to process
* @count: how many buffers are emptied
*/
-static int handle_inbound(struct qdio_q *q, unsigned int callflags,
- int bufnr, int count)
+static int handle_inbound(struct qdio_q *q, int bufnr, int count)
{
int overlap;
@@ -1241,7 +1137,7 @@ static int handle_inbound(struct qdio_q *q, unsigned int callflags,
count = set_buf_states(q, bufnr, SLSB_CU_INPUT_EMPTY, count);
atomic_add(count, &q->nr_buf_used);
- if (need_siga_in(q))
+ if (qdio_need_siga_in(q->irq_ptr))
return qdio_siga_input(q);
return 0;
@@ -1250,16 +1146,13 @@ static int handle_inbound(struct qdio_q *q, unsigned int callflags,
/**
* handle_outbound - process filled outbound buffers
* @q: queue containing the buffers
- * @callflags: flags
* @bufnr: first buffer to process
* @count: how many buffers are filled
* @aob: asynchronous operation block
*/
-static int handle_outbound(struct qdio_q *q, unsigned int callflags,
- unsigned int bufnr, unsigned int count,
+static int handle_outbound(struct qdio_q *q, unsigned int bufnr, unsigned int count,
struct qaob *aob)
{
- const unsigned int scan_threshold = q->irq_ptr->scan_threshold;
unsigned char state = 0;
int used, rc = 0;
@@ -1271,19 +1164,13 @@ static int handle_outbound(struct qdio_q *q, unsigned int callflags,
if (used == QDIO_MAX_BUFFERS_PER_Q)
qperf_inc(q, outbound_queue_full);
- if (callflags & QDIO_FLAG_PCI_OUT) {
- q->u.out.pci_out_enabled = 1;
- qperf_inc(q, pci_request_int);
- } else
- q->u.out.pci_out_enabled = 0;
-
if (queue_type(q) == QDIO_IQDIO_QFMT) {
unsigned long phys_aob = aob ? virt_to_phys(aob) : 0;
WARN_ON_ONCE(!IS_ALIGNED(phys_aob, 256));
rc = qdio_kick_outbound_q(q, count, phys_aob);
- } else if (need_siga_sync(q)) {
- rc = qdio_siga_sync_q(q);
+ } else if (qdio_need_siga_sync(q->irq_ptr)) {
+ rc = qdio_sync_output_queue(q);
} else if (count < QDIO_MAX_BUFFERS_PER_Q &&
get_buf_state(q, prev_buf(bufnr), &state, 0) > 0 &&
state == SLSB_CU_OUTPUT_PRIMED) {
@@ -1293,18 +1180,6 @@ static int handle_outbound(struct qdio_q *q, unsigned int callflags,
rc = qdio_kick_outbound_q(q, count, 0);
}
- /* Let drivers implement their own completion scanning: */
- if (!scan_threshold)
- return rc;
-
- /* in case of SIGA errors we must process the error immediately */
- if (used >= scan_threshold || rc)
- qdio_tasklet_schedule(q);
- else
- /* free the SBALs in case of no further traffic */
- if (!timer_pending(&q->u.out.timer) &&
- likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE))
- mod_timer(&q->u.out.timer, jiffies + HZ);
return rc;
}
@@ -1336,11 +1211,9 @@ int do_QDIO(struct ccw_device *cdev, unsigned int callflags,
if (!count)
return 0;
if (callflags & QDIO_FLAG_SYNC_INPUT)
- return handle_inbound(irq_ptr->input_qs[q_nr],
- callflags, bufnr, count);
+ return handle_inbound(irq_ptr->input_qs[q_nr], bufnr, count);
else if (callflags & QDIO_FLAG_SYNC_OUTPUT)
- return handle_outbound(irq_ptr->output_qs[q_nr],
- callflags, bufnr, count, aob);
+ return handle_outbound(irq_ptr->output_qs[q_nr], bufnr, count, aob);
return -EINVAL;
}
EXPORT_SYMBOL_GPL(do_QDIO);
@@ -1420,53 +1293,11 @@ int qdio_inspect_queue(struct ccw_device *cdev, unsigned int nr, bool is_input,
return -ENODEV;
q = is_input ? irq_ptr->input_qs[nr] : irq_ptr->output_qs[nr];
- if (need_siga_sync(q))
- qdio_siga_sync_q(q);
-
return __qdio_inspect_queue(q, bufnr, error);
}
EXPORT_SYMBOL_GPL(qdio_inspect_queue);
/**
- * qdio_get_next_buffers - process input buffers
- * @cdev: associated ccw_device for the qdio subchannel
- * @nr: input queue number
- * @bufnr: first filled buffer number
- * @error: buffers are in error state
- *
- * Return codes
- * < 0 - error
- * = 0 - no new buffers found
- * > 0 - number of processed buffers
- */
-int qdio_get_next_buffers(struct ccw_device *cdev, int nr, int *bufnr,
- int *error)
-{
- struct qdio_q *q;
- struct qdio_irq *irq_ptr = cdev->private->qdio_data;
-
- if (!irq_ptr)
- return -ENODEV;
- q = irq_ptr->input_qs[nr];
-
- /*
- * Cannot rely on automatic sync after interrupt since queues may
- * also be examined without interrupt.
- */
- if (need_siga_sync(q))
- qdio_sync_queues(q);
-
- qdio_check_outbound_pci_queues(irq_ptr);
-
- /* Note: upper-layer MUST stop processing immediately here ... */
- if (unlikely(q->irq_ptr->state != QDIO_IRQ_STATE_ACTIVE))
- return -EIO;
-
- return __qdio_inspect_queue(q, bufnr, error);
-}
-EXPORT_SYMBOL(qdio_get_next_buffers);
-
-/**
* qdio_stop_irq - disable interrupt processing for the device
* @cdev: associated ccw_device for the qdio subchannel
*
diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c
index da67e4979402..20efafe47897 100644
--- a/drivers/s390/cio/qdio_setup.c
+++ b/drivers/s390/cio/qdio_setup.c
@@ -89,55 +89,6 @@ void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count)
}
EXPORT_SYMBOL_GPL(qdio_reset_buffers);
-/*
- * qebsm is only available under 64bit but the adapter sets the feature
- * flag anyway, so we manually override it.
- */
-static inline int qebsm_possible(void)
-{
- return css_general_characteristics.qebsm;
-}
-
-/*
- * qib_param_field: pointer to 128 bytes or NULL, if no param field
- * nr_input_qs: pointer to nr_queues*128 words of data or NULL
- */
-static void set_impl_params(struct qdio_irq *irq_ptr,
- unsigned int qib_param_field_format,
- unsigned char *qib_param_field,
- unsigned long *input_slib_elements,
- unsigned long *output_slib_elements)
-{
- struct qdio_q *q;
- int i, j;
-
- if (!irq_ptr)
- return;
-
- irq_ptr->qib.pfmt = qib_param_field_format;
- if (qib_param_field)
- memcpy(irq_ptr->qib.parm, qib_param_field,
- sizeof(irq_ptr->qib.parm));
-
- if (!input_slib_elements)
- goto output;
-
- for_each_input_queue(irq_ptr, q, i) {
- for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++)
- q->slib->slibe[j].parms =
- input_slib_elements[i * QDIO_MAX_BUFFERS_PER_Q + j];
- }
-output:
- if (!output_slib_elements)
- return;
-
- for_each_output_queue(irq_ptr, q, i) {
- for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++)
- q->slib->slibe[j].parms =
- output_slib_elements[i * QDIO_MAX_BUFFERS_PER_Q + j];
- }
-}
-
static void __qdio_free_queues(struct qdio_q **queues, unsigned int count)
{
struct qdio_q *q;
@@ -267,26 +218,9 @@ static void setup_queues(struct qdio_irq *irq_ptr,
q->is_input_q = 0;
setup_storage_lists(q, irq_ptr,
qdio_init->output_sbal_addr_array[i], i);
-
- tasklet_setup(&q->u.out.tasklet, qdio_outbound_tasklet);
- timer_setup(&q->u.out.timer, qdio_outbound_timer, 0);
}
}
-static void process_ac_flags(struct qdio_irq *irq_ptr, unsigned char qdioac)
-{
- if (qdioac & AC1_SIGA_INPUT_NEEDED)
- irq_ptr->siga_flag.input = 1;
- if (qdioac & AC1_SIGA_OUTPUT_NEEDED)
- irq_ptr->siga_flag.output = 1;
- if (qdioac & AC1_SIGA_SYNC_NEEDED)
- irq_ptr->siga_flag.sync = 1;
- if (!(qdioac & AC1_AUTOMATIC_SYNC_ON_THININT))
- irq_ptr->siga_flag.sync_after_ai = 1;
- if (!(qdioac & AC1_AUTOMATIC_SYNC_ON_OUT_PCI))
- irq_ptr->siga_flag.sync_out_after_pci = 1;
-}
-
static void check_and_setup_qebsm(struct qdio_irq *irq_ptr,
unsigned char qdioac, unsigned long token)
{
@@ -363,7 +297,7 @@ void qdio_setup_ssqd_info(struct qdio_irq *irq_ptr)
qdioac = irq_ptr->ssqd_desc.qdioac1;
check_and_setup_qebsm(irq_ptr, qdioac, irq_ptr->ssqd_desc.sch_token);
- process_ac_flags(irq_ptr, qdioac);
+ irq_ptr->qdioac1 = qdioac;
DBF_EVENT("ac 1:%2x 2:%4x", qdioac, irq_ptr->ssqd_desc.qdioac2);
DBF_EVENT("3:%4x qib:%4x", irq_ptr->ssqd_desc.qdioac3, irq_ptr->qib.ac);
}
@@ -386,6 +320,8 @@ static void setup_qdr(struct qdio_irq *irq_ptr,
struct qdesfmt0 *desc = &irq_ptr->qdr->qdf0[0];
int i;
+ memset(irq_ptr->qdr, 0, sizeof(struct qdr));
+
irq_ptr->qdr->qfmt = qdio_init->q_format;
irq_ptr->qdr->ac = qdio_init->qdr_ac;
irq_ptr->qdr->iqdcnt = qdio_init->no_input_qs;
@@ -405,12 +341,15 @@ static void setup_qdr(struct qdio_irq *irq_ptr,
static void setup_qib(struct qdio_irq *irq_ptr,
struct qdio_initialize *init_data)
{
- if (qebsm_possible())
- irq_ptr->qib.rflags |= QIB_RFLAGS_ENABLE_QEBSM;
-
- irq_ptr->qib.rflags |= init_data->qib_rflags;
+ memset(&irq_ptr->qib, 0, sizeof(irq_ptr->qib));
irq_ptr->qib.qfmt = init_data->q_format;
+ irq_ptr->qib.pfmt = init_data->qib_param_field_format;
+
+ irq_ptr->qib.rflags = init_data->qib_rflags;
+ if (css_general_characteristics.qebsm)
+ irq_ptr->qib.rflags |= QIB_RFLAGS_ENABLE_QEBSM;
+
if (init_data->no_input_qs)
irq_ptr->qib.isliba =
(unsigned long)(irq_ptr->input_qs[0]->slib);
@@ -419,6 +358,10 @@ static void setup_qib(struct qdio_irq *irq_ptr,
(unsigned long)(irq_ptr->output_qs[0]->slib);
memcpy(irq_ptr->qib.ebcnam, dev_name(&irq_ptr->cdev->dev), 8);
ASCEBC(irq_ptr->qib.ebcnam, 8);
+
+ if (init_data->qib_param_field)
+ memcpy(irq_ptr->qib.parm, init_data->qib_param_field,
+ sizeof(irq_ptr->qib.parm));
}
int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data)
@@ -426,8 +369,7 @@ int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data)
struct ccw_device *cdev = irq_ptr->cdev;
struct ciw *ciw;
- memset(&irq_ptr->qib, 0, sizeof(irq_ptr->qib));
- memset(&irq_ptr->siga_flag, 0, sizeof(irq_ptr->siga_flag));
+ irq_ptr->qdioac1 = 0;
memset(&irq_ptr->ccw, 0, sizeof(irq_ptr->ccw));
memset(&irq_ptr->ssqd_desc, 0, sizeof(irq_ptr->ssqd_desc));
memset(&irq_ptr->perf_stat, 0, sizeof(irq_ptr->perf_stat));
@@ -436,13 +378,9 @@ int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data)
irq_ptr->sch_token = irq_ptr->perf_stat_enabled = 0;
irq_ptr->state = QDIO_IRQ_STATE_INACTIVE;
- /* wipes qib.ac, required by ar7063 */
- memset(irq_ptr->qdr, 0, sizeof(struct qdr));
-
irq_ptr->int_parm = init_data->int_parm;
irq_ptr->nr_input_qs = init_data->no_input_qs;
irq_ptr->nr_output_qs = init_data->no_output_qs;
- irq_ptr->scan_threshold = init_data->scan_threshold;
ccw_device_get_schid(cdev, &irq_ptr->schid);
setup_queues(irq_ptr, init_data);
@@ -450,10 +388,6 @@ int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data)
set_bit(QDIO_IRQ_DISABLED, &irq_ptr->poll_state);
setup_qib(irq_ptr, init_data);
- set_impl_params(irq_ptr, init_data->qib_param_field_format,
- init_data->qib_param_field,
- init_data->input_slib_elements,
- init_data->output_slib_elements);
/* fill input and output descriptors */
setup_qdr(irq_ptr, init_data);
@@ -497,11 +431,8 @@ void qdio_shutdown_irq(struct qdio_irq *irq)
void qdio_print_subchannel_info(struct qdio_irq *irq_ptr)
{
- char s[80];
-
- snprintf(s, 80, "qdio: %s %s on SC %x using "
- "AI:%d QEBSM:%d PRI:%d TDD:%d SIGA:%s%s%s%s%s\n",
- dev_name(&irq_ptr->cdev->dev),
+ dev_info(&irq_ptr->cdev->dev,
+ "qdio: %s on SC %x using AI:%d QEBSM:%d PRI:%d TDD:%d SIGA:%s%s%s\n",
(irq_ptr->qib.qfmt == QDIO_QETH_QFMT) ? "OSA" :
((irq_ptr->qib.qfmt == QDIO_ZFCP_QFMT) ? "ZFCP" : "HS"),
irq_ptr->schid.sch_no,
@@ -509,12 +440,9 @@ void qdio_print_subchannel_info(struct qdio_irq *irq_ptr)
(irq_ptr->sch_token) ? 1 : 0,
pci_out_supported(irq_ptr) ? 1 : 0,
css_general_characteristics.aif_tdd,
- (irq_ptr->siga_flag.input) ? "R" : " ",
- (irq_ptr->siga_flag.output) ? "W" : " ",
- (irq_ptr->siga_flag.sync) ? "S" : " ",
- (irq_ptr->siga_flag.sync_after_ai) ? "A" : " ",
- (irq_ptr->siga_flag.sync_out_after_pci) ? "P" : " ");
- printk(KERN_INFO "%s", s);
+ qdio_need_siga_in(irq_ptr) ? "R" : " ",
+ qdio_need_siga_out(irq_ptr) ? "W" : " ",
+ qdio_need_siga_sync(irq_ptr) ? "S" : " ");
}
int __init qdio_setup_init(void)
@@ -541,7 +469,7 @@ int __init qdio_setup_init(void)
(css_general_characteristics.aif_osa) ? 1 : 0);
/* Check for QEBSM support in general (bit 58). */
- DBF_EVENT("cssQEBSM:%1d", (qebsm_possible()) ? 1 : 0);
+ DBF_EVENT("cssQEBSM:%1d", css_general_characteristics.qebsm);
rc = 0;
out:
return rc;
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 0992edcaf1af..f433428057d9 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -127,7 +127,7 @@ static struct bus_type ap_bus_type;
/* Adapter interrupt definitions */
static void ap_interrupt_handler(struct airq_struct *airq, bool floating);
-static int ap_airq_flag;
+static bool ap_irq_flag;
static struct airq_struct ap_airq = {
.handler = ap_interrupt_handler,
@@ -135,15 +135,6 @@ static struct airq_struct ap_airq = {
};
/**
- * ap_using_interrupts() - Returns non-zero if interrupt support is
- * available.
- */
-static inline int ap_using_interrupts(void)
-{
- return ap_airq_flag;
-}
-
-/**
* ap_airq_ptr() - Get the address of the adapter interrupt indicator
*
* Returns the address of the local-summary-indicator of the adapter
@@ -152,7 +143,7 @@ static inline int ap_using_interrupts(void)
*/
void *ap_airq_ptr(void)
{
- if (ap_using_interrupts())
+ if (ap_irq_flag)
return ap_airq.lsi_ptr;
return NULL;
}
@@ -396,7 +387,7 @@ void ap_wait(enum ap_sm_wait wait)
switch (wait) {
case AP_SM_WAIT_AGAIN:
case AP_SM_WAIT_INTERRUPT:
- if (ap_using_interrupts())
+ if (ap_irq_flag)
break;
if (ap_poll_kthread) {
wake_up(&ap_poll_wait);
@@ -471,7 +462,7 @@ static void ap_tasklet_fn(unsigned long dummy)
* be received. Doing it in the beginning of the tasklet is therefor
* important that no requests on any AP get lost.
*/
- if (ap_using_interrupts())
+ if (ap_irq_flag)
xchg(ap_airq.lsi_ptr, 0);
spin_lock_bh(&ap_queues_lock);
@@ -541,7 +532,7 @@ static int ap_poll_thread_start(void)
{
int rc;
- if (ap_using_interrupts() || ap_poll_kthread)
+ if (ap_irq_flag || ap_poll_kthread)
return 0;
mutex_lock(&ap_poll_thread_mutex);
ap_poll_kthread = kthread_run(ap_poll_thread, NULL, "appoll");
@@ -703,7 +694,7 @@ static int __ap_calc_helper(struct device *dev, void *arg)
if (is_queue_dev(dev)) {
pctrs->apqns++;
- if ((to_ap_dev(dev))->drv)
+ if (dev->driver)
pctrs->bound++;
}
@@ -883,7 +874,6 @@ static int ap_device_probe(struct device *dev)
to_ap_queue(dev)->qid);
spin_unlock_bh(&ap_queues_lock);
- ap_dev->drv = ap_drv;
rc = ap_drv->probe ? ap_drv->probe(ap_dev) : -ENODEV;
if (rc) {
@@ -891,7 +881,6 @@ static int ap_device_probe(struct device *dev)
if (is_queue_dev(dev))
hash_del(&to_ap_queue(dev)->hnode);
spin_unlock_bh(&ap_queues_lock);
- ap_dev->drv = NULL;
} else
ap_check_bindings_complete();
@@ -904,7 +893,7 @@ out:
static void ap_device_remove(struct device *dev)
{
struct ap_device *ap_dev = to_ap_dev(dev);
- struct ap_driver *ap_drv = ap_dev->drv;
+ struct ap_driver *ap_drv = to_ap_drv(dev->driver);
/* prepare ap queue device removal */
if (is_queue_dev(dev))
@@ -923,7 +912,6 @@ static void ap_device_remove(struct device *dev)
if (is_queue_dev(dev))
hash_del(&to_ap_queue(dev)->hnode);
spin_unlock_bh(&ap_queues_lock);
- ap_dev->drv = NULL;
put_device(dev);
}
@@ -1185,7 +1173,7 @@ static BUS_ATTR_RO(ap_adapter_mask);
static ssize_t ap_interrupts_show(struct bus_type *bus, char *buf)
{
return scnprintf(buf, PAGE_SIZE, "%d\n",
- ap_using_interrupts() ? 1 : 0);
+ ap_irq_flag ? 1 : 0);
}
static BUS_ATTR_RO(ap_interrupts);
@@ -1910,7 +1898,7 @@ static int __init ap_module_init(void)
/* enable interrupts if available */
if (ap_interrupts_available()) {
rc = register_adapter_interrupt(&ap_airq);
- ap_airq_flag = (rc == 0);
+ ap_irq_flag = (rc == 0);
}
/* Create /sys/bus/ap. */
@@ -1954,7 +1942,7 @@ out_work:
out_bus:
bus_unregister(&ap_bus_type);
out:
- if (ap_using_interrupts())
+ if (ap_irq_flag)
unregister_adapter_interrupt(&ap_airq);
kfree(ap_qci_info);
return rc;
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 8f18abdbbc2b..95b577754b35 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -81,12 +81,6 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr)
#define AP_FUNC_APXA 6
/*
- * AP interrupt states
- */
-#define AP_INTR_DISABLED 0 /* AP interrupt disabled */
-#define AP_INTR_ENABLED 1 /* AP interrupt enabled */
-
-/*
* AP queue state machine states
*/
enum ap_sm_state {
@@ -112,7 +106,7 @@ enum ap_sm_event {
* AP queue state wait behaviour
*/
enum ap_sm_wait {
- AP_SM_WAIT_AGAIN, /* retry immediately */
+ AP_SM_WAIT_AGAIN = 0, /* retry immediately */
AP_SM_WAIT_TIMEOUT, /* wait for timeout */
AP_SM_WAIT_INTERRUPT, /* wait for thin interrupt (if available) */
AP_SM_WAIT_NONE, /* no wait */
@@ -157,7 +151,6 @@ void ap_driver_unregister(struct ap_driver *);
struct ap_device {
struct device device;
- struct ap_driver *drv; /* Pointer to AP device driver. */
int device_type; /* AP device type. */
};
@@ -165,7 +158,6 @@ struct ap_device {
struct ap_card {
struct ap_device ap_dev;
- void *private; /* ap driver private pointer. */
int raw_hwtype; /* AP raw hardware type. */
unsigned int functions; /* AP device function bitfield. */
int queue_depth; /* AP queue depth.*/
@@ -182,11 +174,10 @@ struct ap_queue {
struct hlist_node hnode; /* Node for the ap_queues hashtable */
struct ap_card *card; /* Ptr to assoc. AP card. */
spinlock_t lock; /* Per device lock. */
- void *private; /* ap driver private pointer. */
enum ap_dev_state dev_state; /* queue device state */
bool config; /* configured state */
ap_qid_t qid; /* AP queue id. */
- int interrupt; /* indicate if interrupts are enabled */
+ bool interrupt; /* indicate if interrupts are enabled */
int queue_count; /* # messages currently on AP queue. */
int pendingq_count; /* # requests on pendingq list. */
int requestq_count; /* # requests on requestq list. */
diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
index 669f96fddad6..d70c4d3d0907 100644
--- a/drivers/s390/crypto/ap_queue.c
+++ b/drivers/s390/crypto/ap_queue.c
@@ -19,7 +19,7 @@
static void __ap_flush_queue(struct ap_queue *aq);
/**
- * ap_queue_enable_interruption(): Enable interruption on an AP queue.
+ * ap_queue_enable_irq(): Enable interrupt support on this AP queue.
* @qid: The AP queue number
* @ind: the notification indicator byte
*
@@ -27,7 +27,7 @@ static void __ap_flush_queue(struct ap_queue *aq);
* value it waits a while and tests the AP queue if interrupts
* have been switched on using ap_test_queue().
*/
-static int ap_queue_enable_interruption(struct ap_queue *aq, void *ind)
+static int ap_queue_enable_irq(struct ap_queue *aq, void *ind)
{
struct ap_queue_status status;
struct ap_qirq_ctrl qirqctrl = { 0 };
@@ -218,7 +218,8 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq)
return AP_SM_WAIT_NONE;
case AP_RESPONSE_NO_PENDING_REPLY:
if (aq->queue_count > 0)
- return AP_SM_WAIT_INTERRUPT;
+ return aq->interrupt ?
+ AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_TIMEOUT;
aq->sm_state = AP_SM_STATE_IDLE;
return AP_SM_WAIT_NONE;
default:
@@ -272,7 +273,8 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq)
fallthrough;
case AP_RESPONSE_Q_FULL:
aq->sm_state = AP_SM_STATE_QUEUE_FULL;
- return AP_SM_WAIT_INTERRUPT;
+ return aq->interrupt ?
+ AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_TIMEOUT;
case AP_RESPONSE_RESET_IN_PROGRESS:
aq->sm_state = AP_SM_STATE_RESET_WAIT;
return AP_SM_WAIT_TIMEOUT;
@@ -322,7 +324,7 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq)
case AP_RESPONSE_NORMAL:
case AP_RESPONSE_RESET_IN_PROGRESS:
aq->sm_state = AP_SM_STATE_RESET_WAIT;
- aq->interrupt = AP_INTR_DISABLED;
+ aq->interrupt = false;
return AP_SM_WAIT_TIMEOUT;
default:
aq->dev_state = AP_DEV_STATE_ERROR;
@@ -355,7 +357,7 @@ static enum ap_sm_wait ap_sm_reset_wait(struct ap_queue *aq)
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
lsi_ptr = ap_airq_ptr();
- if (lsi_ptr && ap_queue_enable_interruption(aq, lsi_ptr) == 0)
+ if (lsi_ptr && ap_queue_enable_irq(aq, lsi_ptr) == 0)
aq->sm_state = AP_SM_STATE_SETIRQ_WAIT;
else
aq->sm_state = (aq->queue_count > 0) ?
@@ -396,7 +398,7 @@ static enum ap_sm_wait ap_sm_setirq_wait(struct ap_queue *aq)
if (status.irq_enabled == 1) {
/* Irqs are now enabled */
- aq->interrupt = AP_INTR_ENABLED;
+ aq->interrupt = true;
aq->sm_state = (aq->queue_count > 0) ?
AP_SM_STATE_WORKING : AP_SM_STATE_IDLE;
}
@@ -586,7 +588,7 @@ static ssize_t interrupt_show(struct device *dev,
spin_lock_bh(&aq->lock);
if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT)
rc = scnprintf(buf, PAGE_SIZE, "Enable Interrupt pending.\n");
- else if (aq->interrupt == AP_INTR_ENABLED)
+ else if (aq->interrupt)
rc = scnprintf(buf, PAGE_SIZE, "Interrupts enabled.\n");
else
rc = scnprintf(buf, PAGE_SIZE, "Interrupts disabled.\n");
@@ -767,7 +769,7 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type)
aq->ap_dev.device.type = &ap_queue_type;
aq->ap_dev.device_type = device_type;
aq->qid = qid;
- aq->interrupt = AP_INTR_DISABLED;
+ aq->interrupt = false;
spin_lock_init(&aq->lock);
INIT_LIST_HEAD(&aq->pendingq);
INIT_LIST_HEAD(&aq->requestq);
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 122c85c22469..67f145589f58 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -35,7 +35,7 @@ static int match_apqn(struct device *dev, const void *data)
}
/**
- * vfio_ap_get_queue: Retrieve a queue with a specific APQN from a list
+ * vfio_ap_get_queue - retrieve a queue with a specific APQN from a list
* @matrix_mdev: the associated mediated matrix
* @apqn: The queue APQN
*
@@ -43,7 +43,7 @@ static int match_apqn(struct device *dev, const void *data)
* devices of the vfio_ap_drv.
* Verify that the APID and the APQI are set in the matrix.
*
- * Returns the pointer to the associated vfio_ap_queue
+ * Return: the pointer to the associated vfio_ap_queue
*/
static struct vfio_ap_queue *vfio_ap_get_queue(
struct ap_matrix_mdev *matrix_mdev,
@@ -64,7 +64,7 @@ static struct vfio_ap_queue *vfio_ap_get_queue(
}
/**
- * vfio_ap_wait_for_irqclear
+ * vfio_ap_wait_for_irqclear - clears the IR bit or gives up after 5 tries
* @apqn: The AP Queue number
*
* Checks the IRQ bit for the status of this APQN using ap_tapq.
@@ -72,7 +72,6 @@ static struct vfio_ap_queue *vfio_ap_get_queue(
* Returns if ap_tapq function failed with invalid, deconfigured or
* checkstopped AP.
* Otherwise retries up to 5 times after waiting 20ms.
- *
*/
static void vfio_ap_wait_for_irqclear(int apqn)
{
@@ -105,13 +104,12 @@ static void vfio_ap_wait_for_irqclear(int apqn)
}
/**
- * vfio_ap_free_aqic_resources
+ * vfio_ap_free_aqic_resources - free vfio_ap_queue resources
* @q: The vfio_ap_queue
*
* Unregisters the ISC in the GIB when the saved ISC not invalid.
- * Unpin the guest's page holding the NIB when it exist.
- * Reset the saved_pfn and saved_isc to invalid values.
- *
+ * Unpins the guest's page holding the NIB when it exists.
+ * Resets the saved_pfn and saved_isc to invalid values.
*/
static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
{
@@ -130,7 +128,7 @@ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
}
/**
- * vfio_ap_irq_disable
+ * vfio_ap_irq_disable - disables and clears an ap_queue interrupt
* @q: The vfio_ap_queue
*
* Uses ap_aqic to disable the interruption and in case of success, reset
@@ -144,6 +142,8 @@ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
*
* Returns if ap_aqic function failed with invalid, deconfigured or
* checkstopped AP.
+ *
+ * Return: &struct ap_queue_status
*/
static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
{
@@ -183,9 +183,8 @@ end_free:
}
/**
- * vfio_ap_setirq: Enable Interruption for a APQN
+ * vfio_ap_irq_enable - Enable Interruption for a APQN
*
- * @dev: the device associated with the ap_queue
* @q: the vfio_ap_queue holding AQIC parameters
*
* Pin the NIB saved in *q
@@ -197,6 +196,8 @@ end_free:
*
* Otherwise return the ap_queue_status returned by the ap_aqic(),
* all retry handling will be done by the guest.
+ *
+ * Return: &struct ap_queue_status
*/
static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
int isc,
@@ -253,7 +254,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
}
/**
- * handle_pqap: PQAP instruction callback
+ * handle_pqap - PQAP instruction callback
*
* @vcpu: The vcpu on which we received the PQAP instruction
*
@@ -270,8 +271,8 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
* We take the matrix_dev lock to ensure serialization on queues and
* mediated device access.
*
- * Return 0 if we could handle the request inside KVM.
- * otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
+ * Return: 0 if we could handle the request inside KVM.
+ * Otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
*/
static int handle_pqap(struct kvm_vcpu *vcpu)
{
@@ -426,7 +427,7 @@ struct vfio_ap_queue_reserved {
};
/**
- * vfio_ap_has_queue
+ * vfio_ap_has_queue - determines if the AP queue containing the target in @data
*
* @dev: an AP queue device
* @data: a struct vfio_ap_queue_reserved reference
@@ -443,7 +444,7 @@ struct vfio_ap_queue_reserved {
* - If @data contains only an apqi value, @data will be flagged as
* reserved if the APQI field in the AP queue device matches
*
- * Returns 0 to indicate the input to function succeeded. Returns -EINVAL if
+ * Return: 0 to indicate the input to function succeeded. Returns -EINVAL if
* @data does not contain either an apid or apqi.
*/
static int vfio_ap_has_queue(struct device *dev, void *data)
@@ -473,9 +474,9 @@ static int vfio_ap_has_queue(struct device *dev, void *data)
}
/**
- * vfio_ap_verify_queue_reserved
+ * vfio_ap_verify_queue_reserved - verifies that the AP queue containing
+ * @apid or @aqpi is reserved
*
- * @matrix_dev: a mediated matrix device
* @apid: an AP adapter ID
* @apqi: an AP queue index
*
@@ -492,7 +493,7 @@ static int vfio_ap_has_queue(struct device *dev, void *data)
* - If only @apqi is not NULL, then there must be an AP queue device bound
* to the vfio_ap driver with an APQN containing @apqi
*
- * Returns 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL.
+ * Return: 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL.
*/
static int vfio_ap_verify_queue_reserved(unsigned long *apid,
unsigned long *apqi)
@@ -536,15 +537,15 @@ vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev,
}
/**
- * vfio_ap_mdev_verify_no_sharing
+ * vfio_ap_mdev_verify_no_sharing - verifies that the AP matrix is not configured
+ *
+ * @matrix_mdev: the mediated matrix device
*
* Verifies that the APQNs derived from the cross product of the AP adapter IDs
* and AP queue indexes comprising the AP matrix are not configured for another
* mediated device. AP queue sharing is not allowed.
*
- * @matrix_mdev: the mediated matrix device
- *
- * Returns 0 if the APQNs are not shared, otherwise; returns -EADDRINUSE.
+ * Return: 0 if the APQNs are not shared; otherwise returns -EADDRINUSE.
*/
static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
{
@@ -578,7 +579,8 @@ static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
}
/**
- * assign_adapter_store
+ * assign_adapter_store - parses the APID from @buf and sets the
+ * corresponding bit in the mediated matrix device's APM
*
* @dev: the matrix device
* @attr: the mediated matrix device's assign_adapter attribute
@@ -586,10 +588,7 @@ static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
* be assigned
* @count: the number of bytes in @buf
*
- * Parses the APID from @buf and sets the corresponding bit in the mediated
- * matrix device's APM.
- *
- * Returns the number of bytes processed if the APID is valid; otherwise,
+ * Return: the number of bytes processed if the APID is valid; otherwise,
* returns one of the following errors:
*
* 1. -EINVAL
@@ -666,17 +665,15 @@ done:
static DEVICE_ATTR_WO(assign_adapter);
/**
- * unassign_adapter_store
+ * unassign_adapter_store - parses the APID from @buf and clears the
+ * corresponding bit in the mediated matrix device's APM
*
* @dev: the matrix device
* @attr: the mediated matrix device's unassign_adapter attribute
* @buf: a buffer containing the adapter number (APID) to be unassigned
* @count: the number of bytes in @buf
*
- * Parses the APID from @buf and clears the corresponding bit in the mediated
- * matrix device's APM.
- *
- * Returns the number of bytes processed if the APID is valid; otherwise,
+ * Return: the number of bytes processed if the APID is valid; otherwise,
* returns one of the following errors:
* -EINVAL if the APID is not a number
* -ENODEV if the APID it exceeds the maximum value configured for the
@@ -740,7 +737,9 @@ vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev,
}
/**
- * assign_domain_store
+ * assign_domain_store - parses the APQI from @buf and sets the
+ * corresponding bit in the mediated matrix device's AQM
+ *
*
* @dev: the matrix device
* @attr: the mediated matrix device's assign_domain attribute
@@ -748,10 +747,7 @@ vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev,
* be assigned
* @count: the number of bytes in @buf
*
- * Parses the APQI from @buf and sets the corresponding bit in the mediated
- * matrix device's AQM.
- *
- * Returns the number of bytes processed if the APQI is valid; otherwise returns
+ * Return: the number of bytes processed if the APQI is valid; otherwise returns
* one of the following errors:
*
* 1. -EINVAL
@@ -824,7 +820,8 @@ static DEVICE_ATTR_WO(assign_domain);
/**
- * unassign_domain_store
+ * unassign_domain_store - parses the APQI from @buf and clears the
+ * corresponding bit in the mediated matrix device's AQM
*
* @dev: the matrix device
* @attr: the mediated matrix device's unassign_domain attribute
@@ -832,10 +829,7 @@ static DEVICE_ATTR_WO(assign_domain);
* be unassigned
* @count: the number of bytes in @buf
*
- * Parses the APQI from @buf and clears the corresponding bit in the
- * mediated matrix device's AQM.
- *
- * Returns the number of bytes processed if the APQI is valid; otherwise,
+ * Return: the number of bytes processed if the APQI is valid; otherwise,
* returns one of the following errors:
* -EINVAL if the APQI is not a number
* -ENODEV if the APQI exceeds the maximum value configured for the system
@@ -879,17 +873,16 @@ done:
static DEVICE_ATTR_WO(unassign_domain);
/**
- * assign_control_domain_store
+ * assign_control_domain_store - parses the domain ID from @buf and sets
+ * the corresponding bit in the mediated matrix device's ADM
+ *
*
* @dev: the matrix device
* @attr: the mediated matrix device's assign_control_domain attribute
* @buf: a buffer containing the domain ID to be assigned
* @count: the number of bytes in @buf
*
- * Parses the domain ID from @buf and sets the corresponding bit in the mediated
- * matrix device's ADM.
- *
- * Returns the number of bytes processed if the domain ID is valid; otherwise,
+ * Return: the number of bytes processed if the domain ID is valid; otherwise,
* returns one of the following errors:
* -EINVAL if the ID is not a number
* -ENODEV if the ID exceeds the maximum value configured for the system
@@ -937,17 +930,15 @@ done:
static DEVICE_ATTR_WO(assign_control_domain);
/**
- * unassign_control_domain_store
+ * unassign_control_domain_store - parses the domain ID from @buf and
+ * clears the corresponding bit in the mediated matrix device's ADM
*
* @dev: the matrix device
* @attr: the mediated matrix device's unassign_control_domain attribute
* @buf: a buffer containing the domain ID to be unassigned
* @count: the number of bytes in @buf
*
- * Parses the domain ID from @buf and clears the corresponding bit in the
- * mediated matrix device's ADM.
- *
- * Returns the number of bytes processed if the domain ID is valid; otherwise,
+ * Return: the number of bytes processed if the domain ID is valid; otherwise,
* returns one of the following errors:
* -EINVAL if the ID is not a number
* -ENODEV if the ID exceeds the maximum value configured for the system
@@ -1085,14 +1076,12 @@ static const struct attribute_group *vfio_ap_mdev_attr_groups[] = {
};
/**
- * vfio_ap_mdev_set_kvm
+ * vfio_ap_mdev_set_kvm - sets all data for @matrix_mdev that are needed
+ * to manage AP resources for the guest whose state is represented by @kvm
*
* @matrix_mdev: a mediated matrix device
* @kvm: reference to KVM instance
*
- * Sets all data for @matrix_mdev that are needed to manage AP resources
- * for the guest whose state is represented by @kvm.
- *
* Note: The matrix_dev->lock must be taken prior to calling
* this function; however, the lock will be temporarily released while the
* guest's AP configuration is set to avoid a potential lockdep splat.
@@ -1100,7 +1089,7 @@ static const struct attribute_group *vfio_ap_mdev_attr_groups[] = {
* certain circumstances, will result in a circular lock dependency if this is
* done under the @matrix_mdev->lock.
*
- * Return 0 if no other mediated matrix device has a reference to @kvm;
+ * Return: 0 if no other mediated matrix device has a reference to @kvm;
* otherwise, returns an -EPERM.
*/
static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
@@ -1131,8 +1120,8 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
return 0;
}
-/*
- * vfio_ap_mdev_iommu_notifier: IOMMU notifier callback
+/**
+ * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback
*
* @nb: The notifier block
* @action: Action to be taken
@@ -1141,6 +1130,7 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
* For an UNMAP request, unpin the guest IOVA (the NIB guest address we
* pinned before). Other requests are ignored.
*
+ * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE.
*/
static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
unsigned long action, void *data)
@@ -1161,19 +1151,17 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
}
/**
- * vfio_ap_mdev_unset_kvm
+ * vfio_ap_mdev_unset_kvm - performs clean-up of resources no longer needed
+ * by @matrix_mdev.
*
* @matrix_mdev: a matrix mediated device
*
- * Performs clean-up of resources no longer needed by @matrix_mdev.
- *
* Note: The matrix_dev->lock must be taken prior to calling
* this function; however, the lock will be temporarily released while the
* guest's AP configuration is cleared to avoid a potential lockdep splat.
* The kvm->lock is taken to clear the guest's AP configuration which, under
* certain circumstances, will result in a circular lock dependency if this is
* done under the @matrix_mdev->lock.
- *
*/
static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
{
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 529ffe26ea9d..fa0cb8633040 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -572,14 +572,14 @@ static inline struct zcrypt_queue *zcrypt_pick_queue(struct zcrypt_card *zc,
struct module **pmod,
unsigned int weight)
{
- if (!zq || !try_module_get(zq->queue->ap_dev.drv->driver.owner))
+ if (!zq || !try_module_get(zq->queue->ap_dev.device.driver->owner))
return NULL;
zcrypt_queue_get(zq);
get_device(&zq->queue->ap_dev.device);
atomic_add(weight, &zc->load);
atomic_add(weight, &zq->load);
zq->request_count++;
- *pmod = zq->queue->ap_dev.drv->driver.owner;
+ *pmod = zq->queue->ap_dev.device.driver->owner;
return zq;
}
diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c
index 40fd5d37d26a..ef11d2a0ca6c 100644
--- a/drivers/s390/crypto/zcrypt_card.c
+++ b/drivers/s390/crypto/zcrypt_card.c
@@ -39,7 +39,7 @@
static ssize_t type_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct zcrypt_card *zc = to_ap_card(dev)->private;
+ struct zcrypt_card *zc = dev_get_drvdata(dev);
return scnprintf(buf, PAGE_SIZE, "%s\n", zc->type_string);
}
@@ -50,8 +50,8 @@ static ssize_t online_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
+ struct zcrypt_card *zc = dev_get_drvdata(dev);
struct ap_card *ac = to_ap_card(dev);
- struct zcrypt_card *zc = ac->private;
int online = ac->config && zc->online ? 1 : 0;
return scnprintf(buf, PAGE_SIZE, "%d\n", online);
@@ -61,8 +61,8 @@ static ssize_t online_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
+ struct zcrypt_card *zc = dev_get_drvdata(dev);
struct ap_card *ac = to_ap_card(dev);
- struct zcrypt_card *zc = ac->private;
struct zcrypt_queue *zq;
int online, id, i = 0, maxzqs = 0;
struct zcrypt_queue **zq_uelist = NULL;
@@ -116,7 +116,7 @@ static ssize_t load_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct zcrypt_card *zc = to_ap_card(dev)->private;
+ struct zcrypt_card *zc = dev_get_drvdata(dev);
return scnprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&zc->load));
}
diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c
index bc34bedf9db8..6a3c2b460965 100644
--- a/drivers/s390/crypto/zcrypt_ccamisc.c
+++ b/drivers/s390/crypto/zcrypt_ccamisc.c
@@ -1724,10 +1724,10 @@ static int fetch_cca_info(u16 cardnr, u16 domain, struct cca_info *ci)
rlen = vlen = PAGE_SIZE/2;
rc = cca_query_crypto_facility(cardnr, domain, "STATICSB",
rarray, &rlen, varray, &vlen);
- if (rc == 0 && rlen >= 10*8 && vlen >= 240) {
- ci->new_apka_mk_state = (char) rarray[7*8];
- ci->cur_apka_mk_state = (char) rarray[8*8];
- ci->old_apka_mk_state = (char) rarray[9*8];
+ if (rc == 0 && rlen >= 13*8 && vlen >= 240) {
+ ci->new_apka_mk_state = (char) rarray[10*8];
+ ci->cur_apka_mk_state = (char) rarray[11*8];
+ ci->old_apka_mk_state = (char) rarray[12*8];
if (ci->old_apka_mk_state == '2')
memcpy(&ci->old_apka_mkvp, varray + 208, 8);
if (ci->cur_apka_mk_state == '2')
diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c
index 62ceeb7fc125..fa8293d37006 100644
--- a/drivers/s390/crypto/zcrypt_cex2a.c
+++ b/drivers/s390/crypto/zcrypt_cex2a.c
@@ -89,7 +89,7 @@ static int zcrypt_cex2a_card_probe(struct ap_device *ap_dev)
if (!zc)
return -ENOMEM;
zc->card = ac;
- ac->private = zc;
+ dev_set_drvdata(&ap_dev->device, zc);
if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX2A) {
zc->min_mod_size = CEX2A_MIN_MOD_SIZE;
@@ -118,7 +118,6 @@ static int zcrypt_cex2a_card_probe(struct ap_device *ap_dev)
rc = zcrypt_card_register(zc);
if (rc) {
- ac->private = NULL;
zcrypt_card_free(zc);
}
@@ -131,10 +130,9 @@ static int zcrypt_cex2a_card_probe(struct ap_device *ap_dev)
*/
static void zcrypt_cex2a_card_remove(struct ap_device *ap_dev)
{
- struct zcrypt_card *zc = to_ap_card(&ap_dev->device)->private;
+ struct zcrypt_card *zc = dev_get_drvdata(&ap_dev->device);
- if (zc)
- zcrypt_card_unregister(zc);
+ zcrypt_card_unregister(zc);
}
static struct ap_driver zcrypt_cex2a_card_driver = {
@@ -176,10 +174,9 @@ static int zcrypt_cex2a_queue_probe(struct ap_device *ap_dev)
ap_queue_init_state(aq);
ap_queue_init_reply(aq, &zq->reply);
aq->request_timeout = CEX2A_CLEANUP_TIME;
- aq->private = zq;
+ dev_set_drvdata(&ap_dev->device, zq);
rc = zcrypt_queue_register(zq);
if (rc) {
- aq->private = NULL;
zcrypt_queue_free(zq);
}
@@ -192,11 +189,9 @@ static int zcrypt_cex2a_queue_probe(struct ap_device *ap_dev)
*/
static void zcrypt_cex2a_queue_remove(struct ap_device *ap_dev)
{
- struct ap_queue *aq = to_ap_queue(&ap_dev->device);
- struct zcrypt_queue *zq = aq->private;
+ struct zcrypt_queue *zq = dev_get_drvdata(&ap_dev->device);
- if (zq)
- zcrypt_queue_unregister(zq);
+ zcrypt_queue_unregister(zq);
}
static struct ap_driver zcrypt_cex2a_queue_driver = {
diff --git a/drivers/s390/crypto/zcrypt_cex2c.c b/drivers/s390/crypto/zcrypt_cex2c.c
index 7a8cbdbe4408..a0b9f1153e12 100644
--- a/drivers/s390/crypto/zcrypt_cex2c.c
+++ b/drivers/s390/crypto/zcrypt_cex2c.c
@@ -66,9 +66,9 @@ static ssize_t cca_serialnr_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
+ struct zcrypt_card *zc = dev_get_drvdata(dev);
struct cca_info ci;
struct ap_card *ac = to_ap_card(dev);
- struct zcrypt_card *zc = ac->private;
memset(&ci, 0, sizeof(ci));
@@ -97,9 +97,9 @@ static ssize_t cca_mkvps_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
+ struct zcrypt_queue *zq = dev_get_drvdata(dev);
int n = 0;
struct cca_info ci;
- struct zcrypt_queue *zq = to_ap_queue(dev)->private;
static const char * const cao_state[] = { "invalid", "valid" };
static const char * const new_state[] = { "empty", "partial", "full" };
@@ -261,7 +261,7 @@ static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev)
if (!zc)
return -ENOMEM;
zc->card = ac;
- ac->private = zc;
+ dev_set_drvdata(&ap_dev->device, zc);
switch (ac->ap_dev.device_type) {
case AP_DEVICE_TYPE_CEX2C:
zc->user_space_type = ZCRYPT_CEX2C;
@@ -287,7 +287,6 @@ static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev)
rc = zcrypt_card_register(zc);
if (rc) {
- ac->private = NULL;
zcrypt_card_free(zc);
return rc;
}
@@ -297,7 +296,6 @@ static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev)
&cca_card_attr_grp);
if (rc) {
zcrypt_card_unregister(zc);
- ac->private = NULL;
zcrypt_card_free(zc);
}
}
@@ -311,13 +309,13 @@ static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev)
*/
static void zcrypt_cex2c_card_remove(struct ap_device *ap_dev)
{
+ struct zcrypt_card *zc = dev_get_drvdata(&ap_dev->device);
struct ap_card *ac = to_ap_card(&ap_dev->device);
- struct zcrypt_card *zc = to_ap_card(&ap_dev->device)->private;
if (ap_test_bit(&ac->functions, AP_FUNC_COPRO))
sysfs_remove_group(&ap_dev->device.kobj, &cca_card_attr_grp);
- if (zc)
- zcrypt_card_unregister(zc);
+
+ zcrypt_card_unregister(zc);
}
static struct ap_driver zcrypt_cex2c_card_driver = {
@@ -359,10 +357,9 @@ static int zcrypt_cex2c_queue_probe(struct ap_device *ap_dev)
ap_queue_init_state(aq);
ap_queue_init_reply(aq, &zq->reply);
aq->request_timeout = CEX2C_CLEANUP_TIME;
- aq->private = zq;
+ dev_set_drvdata(&ap_dev->device, zq);
rc = zcrypt_queue_register(zq);
if (rc) {
- aq->private = NULL;
zcrypt_queue_free(zq);
return rc;
}
@@ -372,7 +369,6 @@ static int zcrypt_cex2c_queue_probe(struct ap_device *ap_dev)
&cca_queue_attr_grp);
if (rc) {
zcrypt_queue_unregister(zq);
- aq->private = NULL;
zcrypt_queue_free(zq);
}
}
@@ -386,13 +382,13 @@ static int zcrypt_cex2c_queue_probe(struct ap_device *ap_dev)
*/
static void zcrypt_cex2c_queue_remove(struct ap_device *ap_dev)
{
+ struct zcrypt_queue *zq = dev_get_drvdata(&ap_dev->device);
struct ap_queue *aq = to_ap_queue(&ap_dev->device);
- struct zcrypt_queue *zq = aq->private;
if (ap_test_bit(&aq->card->functions, AP_FUNC_COPRO))
sysfs_remove_group(&ap_dev->device.kobj, &cca_queue_attr_grp);
- if (zq)
- zcrypt_queue_unregister(zq);
+
+ zcrypt_queue_unregister(zq);
}
static struct ap_driver zcrypt_cex2c_queue_driver = {
diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c
index f518b5fc7e5d..1f7ec54142e1 100644
--- a/drivers/s390/crypto/zcrypt_cex4.c
+++ b/drivers/s390/crypto/zcrypt_cex4.c
@@ -75,9 +75,9 @@ static ssize_t cca_serialnr_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
+ struct zcrypt_card *zc = dev_get_drvdata(dev);
struct cca_info ci;
struct ap_card *ac = to_ap_card(dev);
- struct zcrypt_card *zc = ac->private;
memset(&ci, 0, sizeof(ci));
@@ -106,9 +106,9 @@ static ssize_t cca_mkvps_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
+ struct zcrypt_queue *zq = dev_get_drvdata(dev);
int n = 0;
struct cca_info ci;
- struct zcrypt_queue *zq = to_ap_queue(dev)->private;
static const char * const cao_state[] = { "invalid", "valid" };
static const char * const new_state[] = { "empty", "partial", "full" };
@@ -187,9 +187,9 @@ static ssize_t ep11_api_ordinalnr_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
+ struct zcrypt_card *zc = dev_get_drvdata(dev);
struct ep11_card_info ci;
struct ap_card *ac = to_ap_card(dev);
- struct zcrypt_card *zc = ac->private;
memset(&ci, 0, sizeof(ci));
@@ -208,9 +208,9 @@ static ssize_t ep11_fw_version_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
+ struct zcrypt_card *zc = dev_get_drvdata(dev);
struct ep11_card_info ci;
struct ap_card *ac = to_ap_card(dev);
- struct zcrypt_card *zc = ac->private;
memset(&ci, 0, sizeof(ci));
@@ -231,9 +231,9 @@ static ssize_t ep11_serialnr_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
+ struct zcrypt_card *zc = dev_get_drvdata(dev);
struct ep11_card_info ci;
struct ap_card *ac = to_ap_card(dev);
- struct zcrypt_card *zc = ac->private;
memset(&ci, 0, sizeof(ci));
@@ -264,10 +264,10 @@ static ssize_t ep11_card_op_modes_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
+ struct zcrypt_card *zc = dev_get_drvdata(dev);
int i, n = 0;
struct ep11_card_info ci;
struct ap_card *ac = to_ap_card(dev);
- struct zcrypt_card *zc = ac->private;
memset(&ci, 0, sizeof(ci));
@@ -309,9 +309,9 @@ static ssize_t ep11_mkvps_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
+ struct zcrypt_queue *zq = dev_get_drvdata(dev);
int n = 0;
struct ep11_domain_info di;
- struct zcrypt_queue *zq = to_ap_queue(dev)->private;
static const char * const cwk_state[] = { "invalid", "valid" };
static const char * const nwk_state[] = { "empty", "uncommitted",
"committed" };
@@ -357,9 +357,9 @@ static ssize_t ep11_queue_op_modes_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
+ struct zcrypt_queue *zq = dev_get_drvdata(dev);
int i, n = 0;
struct ep11_domain_info di;
- struct zcrypt_queue *zq = to_ap_queue(dev)->private;
memset(&di, 0, sizeof(di));
@@ -441,7 +441,7 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
if (!zc)
return -ENOMEM;
zc->card = ac;
- ac->private = zc;
+ dev_set_drvdata(&ap_dev->device, zc);
if (ap_test_bit(&ac->functions, AP_FUNC_ACCEL)) {
if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX4) {
zc->type_string = "CEX4A";
@@ -539,7 +539,6 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
rc = zcrypt_card_register(zc);
if (rc) {
- ac->private = NULL;
zcrypt_card_free(zc);
return rc;
}
@@ -549,7 +548,6 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
&cca_card_attr_grp);
if (rc) {
zcrypt_card_unregister(zc);
- ac->private = NULL;
zcrypt_card_free(zc);
}
} else if (ap_test_bit(&ac->functions, AP_FUNC_EP11)) {
@@ -557,7 +555,6 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
&ep11_card_attr_grp);
if (rc) {
zcrypt_card_unregister(zc);
- ac->private = NULL;
zcrypt_card_free(zc);
}
}
@@ -571,15 +568,15 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
*/
static void zcrypt_cex4_card_remove(struct ap_device *ap_dev)
{
+ struct zcrypt_card *zc = dev_get_drvdata(&ap_dev->device);
struct ap_card *ac = to_ap_card(&ap_dev->device);
- struct zcrypt_card *zc = ac->private;
if (ap_test_bit(&ac->functions, AP_FUNC_COPRO))
sysfs_remove_group(&ap_dev->device.kobj, &cca_card_attr_grp);
else if (ap_test_bit(&ac->functions, AP_FUNC_EP11))
sysfs_remove_group(&ap_dev->device.kobj, &ep11_card_attr_grp);
- if (zc)
- zcrypt_card_unregister(zc);
+
+ zcrypt_card_unregister(zc);
}
static struct ap_driver zcrypt_cex4_card_driver = {
@@ -629,10 +626,9 @@ static int zcrypt_cex4_queue_probe(struct ap_device *ap_dev)
ap_queue_init_state(aq);
ap_queue_init_reply(aq, &zq->reply);
aq->request_timeout = CEX4_CLEANUP_TIME;
- aq->private = zq;
+ dev_set_drvdata(&ap_dev->device, zq);
rc = zcrypt_queue_register(zq);
if (rc) {
- aq->private = NULL;
zcrypt_queue_free(zq);
return rc;
}
@@ -642,7 +638,6 @@ static int zcrypt_cex4_queue_probe(struct ap_device *ap_dev)
&cca_queue_attr_grp);
if (rc) {
zcrypt_queue_unregister(zq);
- aq->private = NULL;
zcrypt_queue_free(zq);
}
} else if (ap_test_bit(&aq->card->functions, AP_FUNC_EP11)) {
@@ -650,7 +645,6 @@ static int zcrypt_cex4_queue_probe(struct ap_device *ap_dev)
&ep11_queue_attr_grp);
if (rc) {
zcrypt_queue_unregister(zq);
- aq->private = NULL;
zcrypt_queue_free(zq);
}
}
@@ -664,15 +658,15 @@ static int zcrypt_cex4_queue_probe(struct ap_device *ap_dev)
*/
static void zcrypt_cex4_queue_remove(struct ap_device *ap_dev)
{
+ struct zcrypt_queue *zq = dev_get_drvdata(&ap_dev->device);
struct ap_queue *aq = to_ap_queue(&ap_dev->device);
- struct zcrypt_queue *zq = aq->private;
if (ap_test_bit(&aq->card->functions, AP_FUNC_COPRO))
sysfs_remove_group(&ap_dev->device.kobj, &cca_queue_attr_grp);
else if (ap_test_bit(&aq->card->functions, AP_FUNC_EP11))
sysfs_remove_group(&ap_dev->device.kobj, &ep11_queue_attr_grp);
- if (zq)
- zcrypt_queue_unregister(zq);
+
+ zcrypt_queue_unregister(zq);
}
static struct ap_driver zcrypt_cex4_queue_driver = {
diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c
index 20f12288a8c1..398bde237e37 100644
--- a/drivers/s390/crypto/zcrypt_queue.c
+++ b/drivers/s390/crypto/zcrypt_queue.c
@@ -40,8 +40,8 @@ static ssize_t online_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
+ struct zcrypt_queue *zq = dev_get_drvdata(dev);
struct ap_queue *aq = to_ap_queue(dev);
- struct zcrypt_queue *zq = aq->private;
int online = aq->config && zq->online ? 1 : 0;
return scnprintf(buf, PAGE_SIZE, "%d\n", online);
@@ -51,8 +51,8 @@ static ssize_t online_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
+ struct zcrypt_queue *zq = dev_get_drvdata(dev);
struct ap_queue *aq = to_ap_queue(dev);
- struct zcrypt_queue *zq = aq->private;
struct zcrypt_card *zc = zq->zcard;
int online;
@@ -83,7 +83,7 @@ static ssize_t load_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct zcrypt_queue *zq = to_ap_queue(dev)->private;
+ struct zcrypt_queue *zq = dev_get_drvdata(dev);
return scnprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&zq->load));
}
@@ -170,7 +170,7 @@ int zcrypt_queue_register(struct zcrypt_queue *zq)
int rc;
spin_lock(&zcrypt_list_lock);
- zc = zq->queue->card->private;
+ zc = dev_get_drvdata(&zq->queue->card->ap_dev.device);
zcrypt_card_get(zc);
zq->zcard = zc;
zq->online = 1; /* New devices are online by default. */
diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig
index bf236d474538..9c67b97faba2 100644
--- a/drivers/s390/net/Kconfig
+++ b/drivers/s390/net/Kconfig
@@ -74,6 +74,7 @@ config QETH_L2
def_tristate y
prompt "qeth layer 2 device support"
depends on QETH
+ depends on BRIDGE || BRIDGE=n
help
Select this option to be able to run qeth devices in layer 2 mode.
To compile as a module, choose M. The module name is qeth_l2.
@@ -88,15 +89,6 @@ config QETH_L3
To compile as a module choose M. The module name is qeth_l3.
If unsure, choose Y.
-config QETH_OSN
- def_bool !HAVE_MARCH_Z14_FEATURES
- prompt "qeth OSN device support"
- depends on QETH
- help
- This enables the qeth driver to support devices in OSN mode.
- This feature will be removed in 2021.
- If unsure, choose N.
-
config QETH_OSX
def_bool !HAVE_MARCH_Z15_FEATURES
prompt "qeth OSX device support"
diff --git a/drivers/s390/net/ctcm_fsms.c b/drivers/s390/net/ctcm_fsms.c
index 377e3689d1d4..06281a0a0552 100644
--- a/drivers/s390/net/ctcm_fsms.c
+++ b/drivers/s390/net/ctcm_fsms.c
@@ -1444,7 +1444,7 @@ again:
if (do_debug_ccw)
ctcmpc_dumpit((char *)&ch->ccw[0],
sizeof(struct ccw1) * 3);
- dolock = !in_irq();
+ dolock = !in_hardirq();
if (dolock)
spin_lock_irqsave(
get_ccwdev_lock(ch->cdev), saveflags);
diff --git a/drivers/s390/net/ctcm_mpc.c b/drivers/s390/net/ctcm_mpc.c
index 19ee91acb89d..f0436f555c62 100644
--- a/drivers/s390/net/ctcm_mpc.c
+++ b/drivers/s390/net/ctcm_mpc.c
@@ -1773,7 +1773,7 @@ static void mpc_action_side_xid(fsm_instance *fsm, void *arg, int side)
CTCM_D3_DUMP((char *)ch->xid, XID2_LENGTH);
CTCM_D3_DUMP((char *)ch->xid_id, 4);
- if (!in_irq()) {
+ if (!in_hardirq()) {
/* Such conditional locking is a known problem for
* sparse because its static undeterministic.
* Warnings should be ignored here. */
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index f4d554ea0c93..535a60b3946d 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -259,22 +259,10 @@ struct qeth_hdr_layer2 {
__u8 reserved2[16];
} __attribute__ ((packed));
-struct qeth_hdr_osn {
- __u8 id;
- __u8 reserved;
- __u16 seq_no;
- __u16 reserved2;
- __u16 control_flags;
- __u16 pdu_length;
- __u8 reserved3[18];
- __u32 ccid;
-} __attribute__ ((packed));
-
struct qeth_hdr {
union {
struct qeth_hdr_layer2 l2;
struct qeth_hdr_layer3 l3;
- struct qeth_hdr_osn osn;
} hdr;
} __attribute__ ((packed));
@@ -341,7 +329,6 @@ enum qeth_header_ids {
QETH_HEADER_TYPE_LAYER3 = 0x01,
QETH_HEADER_TYPE_LAYER2 = 0x02,
QETH_HEADER_TYPE_L3_TSO = 0x03,
- QETH_HEADER_TYPE_OSN = 0x04,
QETH_HEADER_TYPE_L2_TSO = 0x06,
QETH_HEADER_MASK_INVAL = 0x80,
};
@@ -779,18 +766,13 @@ enum qeth_threads {
QETH_RECOVER_THREAD = 1,
};
-struct qeth_osn_info {
- int (*assist_cb)(struct net_device *dev, void *data);
- int (*data_cb)(struct sk_buff *skb);
-};
-
struct qeth_discipline {
- const struct device_type *devtype;
int (*setup) (struct ccwgroup_device *);
void (*remove) (struct ccwgroup_device *);
int (*set_online)(struct qeth_card *card, bool carrier_ok);
void (*set_offline)(struct qeth_card *card);
- int (*do_ioctl)(struct net_device *dev, struct ifreq *rq, int cmd);
+ int (*do_ioctl)(struct net_device *dev, struct ifreq *rq,
+ void __user *data, int cmd);
int (*control_event_handler)(struct qeth_card *card,
struct qeth_ipa_cmd *cmd);
};
@@ -865,7 +847,6 @@ struct qeth_card {
/* QDIO buffer handling */
struct qeth_qdio_info qdio;
int read_or_write_problem;
- struct qeth_osn_info osn_info;
const struct qeth_discipline *discipline;
atomic_t force_alloc_skb;
struct service_level qeth_service_level;
@@ -1058,10 +1039,7 @@ int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb);
extern const struct qeth_discipline qeth_l2_discipline;
extern const struct qeth_discipline qeth_l3_discipline;
extern const struct ethtool_ops qeth_ethtool_ops;
-extern const struct ethtool_ops qeth_osn_ethtool_ops;
extern const struct attribute_group *qeth_dev_groups[];
-extern const struct attribute_group *qeth_osn_dev_groups[];
-extern const struct device_type qeth_generic_devtype;
const char *qeth_get_cardname_short(struct qeth_card *);
int qeth_resize_buffer_pool(struct qeth_card *card, unsigned int count);
@@ -1069,11 +1047,9 @@ int qeth_setup_discipline(struct qeth_card *card, enum qeth_discipline_id disc);
void qeth_remove_discipline(struct qeth_card *card);
/* exports for qeth discipline device drivers */
-extern struct kmem_cache *qeth_core_header_cache;
extern struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS];
struct net_device *qeth_clone_netdev(struct net_device *orig);
-struct qeth_card *qeth_get_card_by_busid(char *bus_id);
void qeth_set_allowed_threads(struct qeth_card *card, unsigned long threads,
int clear_start_mask);
int qeth_threads_running(struct qeth_card *, unsigned long);
@@ -1088,9 +1064,6 @@ struct qeth_cmd_buffer *qeth_ipa_alloc_cmd(struct qeth_card *card,
enum qeth_ipa_cmds cmd_code,
enum qeth_prot_versions prot,
unsigned int data_length);
-struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel,
- unsigned int length, unsigned int ccws,
- long timeout);
struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card,
enum qeth_ipa_funcs ipa_func,
u16 cmd_code,
@@ -1099,18 +1072,12 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card,
struct qeth_cmd_buffer *qeth_get_diag_cmd(struct qeth_card *card,
enum qeth_diags_cmds sub_cmd,
unsigned int data_length);
-void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason);
-void qeth_put_cmd(struct qeth_cmd_buffer *iob);
int qeth_schedule_recovery(struct qeth_card *card);
int qeth_poll(struct napi_struct *napi, int budget);
void qeth_setadp_promisc_mode(struct qeth_card *card, bool enable);
int qeth_setadpparms_change_macaddr(struct qeth_card *);
void qeth_tx_timeout(struct net_device *, unsigned int txqueue);
-void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
- u16 cmd_length,
- bool (*match)(struct qeth_cmd_buffer *iob,
- struct qeth_cmd_buffer *reply));
int qeth_query_switch_attributes(struct qeth_card *card,
struct qeth_switch_info *sw_info);
int qeth_query_card_info(struct qeth_card *card,
@@ -1118,12 +1085,9 @@ int qeth_query_card_info(struct qeth_card *card,
int qeth_setadpparms_set_access_ctrl(struct qeth_card *card,
enum qeth_ipa_isolation_modes mode);
-unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset);
-int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
- struct sk_buff *skb, struct qeth_hdr *hdr,
- unsigned int offset, unsigned int hd_len,
- int elements_needed);
int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+int qeth_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+ void __user *data, int cmd);
void qeth_dbf_longtext(debug_info_t *id, int level, char *text, ...);
int qeth_configure_cq(struct qeth_card *, enum qeth_cq);
int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action);
@@ -1148,11 +1112,4 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
struct qeth_hdr *hdr, struct sk_buff *skb,
__be16 proto, unsigned int data_len));
-/* exports for OSN */
-int qeth_osn_assist(struct net_device *, void *, int);
-int qeth_osn_register(unsigned char *read_dev_no, struct net_device **,
- int (*assist_cb)(struct net_device *, void *),
- int (*data_cb)(struct sk_buff *));
-void qeth_osn_deregister(struct net_device *);
-
#endif /* __QETH_CORE_H__ */
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 62f88ccbd03f..41ca6273b750 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -57,8 +57,7 @@ struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS] = {
};
EXPORT_SYMBOL_GPL(qeth_dbf);
-struct kmem_cache *qeth_core_header_cache;
-EXPORT_SYMBOL_GPL(qeth_core_header_cache);
+static struct kmem_cache *qeth_core_header_cache;
static struct kmem_cache *qeth_qdio_outbuf_cache;
static struct device *qeth_core_root_dev;
@@ -101,8 +100,6 @@ static const char *qeth_get_cardname(struct qeth_card *card)
return " OSD Express";
case QETH_CARD_TYPE_IQD:
return " HiperSockets";
- case QETH_CARD_TYPE_OSN:
- return " OSN QDIO";
case QETH_CARD_TYPE_OSM:
return " OSM QDIO";
case QETH_CARD_TYPE_OSX:
@@ -157,8 +154,6 @@ const char *qeth_get_cardname_short(struct qeth_card *card)
}
case QETH_CARD_TYPE_IQD:
return "HiperSockets";
- case QETH_CARD_TYPE_OSN:
- return "OSN";
case QETH_CARD_TYPE_OSM:
return "OSM_1000";
case QETH_CARD_TYPE_OSX:
@@ -431,6 +426,13 @@ static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15,
return n;
}
+static void qeth_put_cmd(struct qeth_cmd_buffer *iob)
+{
+ if (refcount_dec_and_test(&iob->ref_count)) {
+ kfree(iob->data);
+ kfree(iob);
+ }
+}
static void qeth_setup_ccw(struct ccw1 *ccw, u8 cmd_code, u8 flags, u32 len,
void *data)
{
@@ -499,12 +501,11 @@ static void qeth_dequeue_cmd(struct qeth_card *card,
spin_unlock_irq(&card->lock);
}
-void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason)
+static void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason)
{
iob->rc = reason;
complete(&iob->done);
}
-EXPORT_SYMBOL_GPL(qeth_notify_cmd);
static void qeth_flush_local_addrs4(struct qeth_card *card)
{
@@ -781,10 +782,7 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card,
QETH_CARD_TEXT(card, 5, "chkipad");
if (IS_IPA_REPLY(cmd)) {
- if (cmd->hdr.command != IPA_CMD_SETCCID &&
- cmd->hdr.command != IPA_CMD_DELCCID &&
- cmd->hdr.command != IPA_CMD_MODCCID &&
- cmd->hdr.command != IPA_CMD_SET_DIAG_ASS)
+ if (cmd->hdr.command != IPA_CMD_SET_DIAG_ASS)
qeth_issue_ipa_msg(cmd, cmd->hdr.return_code, card);
return cmd;
}
@@ -819,8 +817,6 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card,
if (card->discipline->control_event_handler(card, cmd))
return cmd;
return NULL;
- case IPA_CMD_MODCCID:
- return cmd;
case IPA_CMD_REGISTER_LOCAL_ADDR:
if (cmd->hdr.prot_version == QETH_PROT_IPV4)
qeth_add_local_addrs4(card, &cmd->data.local_addrs4);
@@ -877,15 +873,6 @@ static int qeth_check_idx_response(struct qeth_card *card,
return 0;
}
-void qeth_put_cmd(struct qeth_cmd_buffer *iob)
-{
- if (refcount_dec_and_test(&iob->ref_count)) {
- kfree(iob->data);
- kfree(iob);
- }
-}
-EXPORT_SYMBOL_GPL(qeth_put_cmd);
-
static void qeth_release_buffer_cb(struct qeth_card *card,
struct qeth_cmd_buffer *iob,
unsigned int data_length)
@@ -899,9 +886,9 @@ static void qeth_cancel_cmd(struct qeth_cmd_buffer *iob, int rc)
qeth_put_cmd(iob);
}
-struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel,
- unsigned int length, unsigned int ccws,
- long timeout)
+static struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel,
+ unsigned int length,
+ unsigned int ccws, long timeout)
{
struct qeth_cmd_buffer *iob;
@@ -927,7 +914,6 @@ struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel,
iob->length = length;
return iob;
}
-EXPORT_SYMBOL_GPL(qeth_alloc_cmd);
static void qeth_issue_next_read_cb(struct qeth_card *card,
struct qeth_cmd_buffer *iob,
@@ -958,11 +944,6 @@ static void qeth_issue_next_read_cb(struct qeth_card *card,
cmd = qeth_check_ipa_data(card, cmd);
if (!cmd)
goto out;
- if (IS_OSN(card) && card->osn_info.assist_cb &&
- cmd->hdr.command != IPA_CMD_STARTLAN) {
- card->osn_info.assist_cb(card->dev, cmd);
- goto out;
- }
}
/* match against pending cmd requests */
@@ -1835,7 +1816,7 @@ static enum qeth_discipline_id qeth_enforce_discipline(struct qeth_card *card)
{
enum qeth_discipline_id disc = QETH_DISCIPLINE_UNDETERMINED;
- if (IS_OSM(card) || IS_OSN(card))
+ if (IS_OSM(card))
disc = QETH_DISCIPLINE_LAYER2;
else if (IS_VM_NIC(card))
disc = IS_IQD(card) ? QETH_DISCIPLINE_LAYER3 :
@@ -1885,7 +1866,6 @@ static void qeth_idx_init(struct qeth_card *card)
card->info.func_level = QETH_IDX_FUNC_LEVEL_IQD;
break;
case QETH_CARD_TYPE_OSD:
- case QETH_CARD_TYPE_OSN:
card->info.func_level = QETH_IDX_FUNC_LEVEL_OSD;
break;
default:
@@ -2442,9 +2422,7 @@ static int qeth_ulp_enable_cb(struct qeth_card *card, struct qeth_reply *reply,
static u8 qeth_mpc_select_prot_type(struct qeth_card *card)
{
- if (IS_OSN(card))
- return QETH_PROT_OSN2;
- return IS_LAYER2(card) ? QETH_PROT_LAYER2 : QETH_PROT_TCPIP;
+ return IS_LAYER2(card) ? QETH_MPC_PROT_L2 : QETH_MPC_PROT_L3;
}
static int qeth_ulp_enable(struct qeth_card *card)
@@ -3000,10 +2978,8 @@ static void qeth_ipa_finalize_cmd(struct qeth_card *card,
__ipa_cmd(iob)->hdr.seqno = card->seqno.ipa++;
}
-void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
- u16 cmd_length,
- bool (*match)(struct qeth_cmd_buffer *iob,
- struct qeth_cmd_buffer *reply))
+static void qeth_prepare_ipa_cmd(struct qeth_card *card,
+ struct qeth_cmd_buffer *iob, u16 cmd_length)
{
u8 prot_type = qeth_mpc_select_prot_type(card);
u16 total_length = iob->length;
@@ -3011,7 +2987,6 @@ void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
qeth_setup_ccw(__ccw_from_cmd(iob), CCW_CMD_WRITE, 0, total_length,
iob->data);
iob->finalize = qeth_ipa_finalize_cmd;
- iob->match = match;
memcpy(iob->data, IPA_PDU_HEADER, IPA_PDU_HEADER_SIZE);
memcpy(QETH_IPA_PDU_LEN_TOTAL(iob->data), &total_length, 2);
@@ -3022,7 +2997,6 @@ void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
&card->token.ulp_connection_r, QETH_MPC_TOKEN_LENGTH);
memcpy(QETH_IPA_PDU_LEN_PDU3(iob->data), &cmd_length, 2);
}
-EXPORT_SYMBOL_GPL(qeth_prepare_ipa_cmd);
static bool qeth_ipa_match_reply(struct qeth_cmd_buffer *iob,
struct qeth_cmd_buffer *reply)
@@ -3046,7 +3020,8 @@ struct qeth_cmd_buffer *qeth_ipa_alloc_cmd(struct qeth_card *card,
if (!iob)
return NULL;
- qeth_prepare_ipa_cmd(card, iob, data_length, qeth_ipa_match_reply);
+ qeth_prepare_ipa_cmd(card, iob, data_length);
+ iob->match = qeth_ipa_match_reply;
hdr = &__ipa_cmd(iob)->hdr;
hdr->command = cmd_code;
@@ -3804,14 +3779,10 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev,
unsigned long card_ptr)
{
struct qeth_card *card = (struct qeth_card *) card_ptr;
- struct net_device *dev = card->dev;
- QETH_CARD_TEXT(card, 6, "qdouhdl");
- if (qdio_error & QDIO_ERROR_FATAL) {
- QETH_CARD_TEXT(card, 2, "achkcond");
- netif_tx_stop_all_queues(dev);
- qeth_schedule_recovery(card);
- }
+ QETH_CARD_TEXT(card, 2, "achkcond");
+ netif_tx_stop_all_queues(card->dev);
+ qeth_schedule_recovery(card);
}
/**
@@ -3894,7 +3865,8 @@ static int qeth_get_elements_for_frags(struct sk_buff *skb)
* Returns the number of pages, and thus QDIO buffer elements, needed to map the
* skb's data (both its linear part and paged fragments).
*/
-unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset)
+static unsigned int qeth_count_elements(struct sk_buff *skb,
+ unsigned int data_offset)
{
unsigned int elements = qeth_get_elements_for_frags(skb);
addr_t end = (addr_t)skb->data + skb_headlen(skb);
@@ -3904,7 +3876,6 @@ unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset)
elements += qeth_get_elements_for_range(start, end);
return elements;
}
-EXPORT_SYMBOL_GPL(qeth_count_elements);
#define QETH_HDR_CACHE_OBJ_SIZE (sizeof(struct qeth_hdr_tso) + \
MAX_TCP_HEADER)
@@ -4192,10 +4163,11 @@ static int __qeth_xmit(struct qeth_card *card, struct qeth_qdio_out_q *queue,
return 0;
}
-int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
- struct sk_buff *skb, struct qeth_hdr *hdr,
- unsigned int offset, unsigned int hd_len,
- int elements_needed)
+static int qeth_do_send_packet(struct qeth_card *card,
+ struct qeth_qdio_out_q *queue,
+ struct sk_buff *skb, struct qeth_hdr *hdr,
+ unsigned int offset, unsigned int hd_len,
+ unsigned int elements_needed)
{
unsigned int start_index = queue->next_buf_to_fill;
struct qeth_qdio_out_buffer *buffer;
@@ -4275,7 +4247,6 @@ out:
netif_tx_start_queue(txq);
return rc;
}
-EXPORT_SYMBOL_GPL(qeth_do_send_packet);
static void qeth_fill_tso_ext(struct qeth_hdr_tso *hdr,
unsigned int payload_len, struct sk_buff *skb,
@@ -4554,7 +4525,6 @@ static int qeth_mdio_read(struct net_device *dev, int phy_id, int regnum)
case MII_BMCR: /* Basic mode control register */
rc = BMCR_FULLDPLX;
if ((card->info.link_type != QETH_LINK_TYPE_GBIT_ETH) &&
- (card->info.link_type != QETH_LINK_TYPE_OSN) &&
(card->info.link_type != QETH_LINK_TYPE_10GBIT_ETH) &&
(card->info.link_type != QETH_LINK_TYPE_25GBIT_ETH))
rc |= BMCR_SPEED100;
@@ -5266,10 +5236,6 @@ static struct ccw_device_id qeth_ids[] = {
.driver_info = QETH_CARD_TYPE_OSD},
{CCW_DEVICE_DEVTYPE(0x1731, 0x05, 0x1732, 0x05),
.driver_info = QETH_CARD_TYPE_IQD},
-#ifdef CONFIG_QETH_OSN
- {CCW_DEVICE_DEVTYPE(0x1731, 0x06, 0x1732, 0x06),
- .driver_info = QETH_CARD_TYPE_OSN},
-#endif
{CCW_DEVICE_DEVTYPE(0x1731, 0x02, 0x1732, 0x03),
.driver_info = QETH_CARD_TYPE_OSM},
#ifdef CONFIG_QETH_OSX
@@ -5628,14 +5594,6 @@ static void qeth_receive_skb(struct qeth_card *card, struct sk_buff *skb,
bool is_cso;
switch (hdr->hdr.l2.id) {
- case QETH_HEADER_TYPE_OSN:
- skb_push(skb, sizeof(*hdr));
- skb_copy_to_linear_data(skb, hdr, sizeof(*hdr));
- QETH_CARD_STAT_ADD(card, rx_bytes, skb->len);
- QETH_CARD_STAT_INC(card, rx_packets);
-
- card->osn_info.data_cb(skb);
- return;
#if IS_ENABLED(CONFIG_QETH_L3)
case QETH_HEADER_TYPE_LAYER3:
qeth_l3_rebuild_skb(card, skb, hdr);
@@ -5750,16 +5708,6 @@ next_packet:
linear_len = sizeof(struct iphdr);
headroom = ETH_HLEN;
break;
- case QETH_HEADER_TYPE_OSN:
- skb_len = hdr->hdr.osn.pdu_length;
- if (!IS_OSN(card)) {
- QETH_CARD_STAT_INC(card, rx_dropped_notsupp);
- goto walk_packet;
- }
-
- linear_len = skb_len;
- headroom = sizeof(struct qeth_hdr);
- break;
default:
if (hdr->hdr.l2.id & QETH_HEADER_MASK_INVAL)
QETH_CARD_STAT_INC(card, rx_frame_errors);
@@ -5777,8 +5725,7 @@ next_packet:
use_rx_sg = (card->options.cq == QETH_CQ_ENABLED) ||
(skb_len > READ_ONCE(priv->rx_copybreak) &&
- !atomic_read(&card->force_alloc_skb) &&
- !IS_OSN(card));
+ !atomic_read(&card->force_alloc_skb));
if (use_rx_sg) {
/* QETH_CQ_ENABLED only: */
@@ -6335,14 +6282,9 @@ void qeth_remove_discipline(struct qeth_card *card)
card->discipline = NULL;
}
-const struct device_type qeth_generic_devtype = {
+static const struct device_type qeth_generic_devtype = {
.name = "qeth_generic",
};
-EXPORT_SYMBOL_GPL(qeth_generic_devtype);
-
-static const struct device_type qeth_osn_devtype = {
- .name = "qeth_osn",
-};
#define DBF_NAME_LEN 20
@@ -6425,10 +6367,6 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card)
case QETH_CARD_TYPE_OSM:
dev = alloc_etherdev(sizeof(*priv));
break;
- case QETH_CARD_TYPE_OSN:
- dev = alloc_netdev(sizeof(*priv), "osn%d", NET_NAME_UNKNOWN,
- ether_setup);
- break;
default:
dev = alloc_etherdev_mqs(sizeof(*priv), QETH_MAX_OUT_QUEUES, 1);
}
@@ -6442,23 +6380,19 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card)
dev->ml_priv = card;
dev->watchdog_timeo = QETH_TX_TIMEOUT;
- dev->min_mtu = IS_OSN(card) ? 64 : 576;
+ dev->min_mtu = 576;
/* initialized when device first goes online: */
dev->max_mtu = 0;
dev->mtu = 0;
SET_NETDEV_DEV(dev, &card->gdev->dev);
netif_carrier_off(dev);
- if (IS_OSN(card)) {
- dev->ethtool_ops = &qeth_osn_ethtool_ops;
- } else {
- dev->ethtool_ops = &qeth_ethtool_ops;
- dev->priv_flags &= ~IFF_TX_SKB_SHARING;
- dev->hw_features |= NETIF_F_SG;
- dev->vlan_features |= NETIF_F_SG;
- if (IS_IQD(card))
- dev->features |= NETIF_F_SG;
- }
+ dev->ethtool_ops = &qeth_ethtool_ops;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ dev->hw_features |= NETIF_F_SG;
+ dev->vlan_features |= NETIF_F_SG;
+ if (IS_IQD(card))
+ dev->features |= NETIF_F_SG;
return dev;
}
@@ -6521,10 +6455,7 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev)
if (rc)
goto err_chp_desc;
- if (IS_OSN(card))
- gdev->dev.groups = qeth_osn_dev_groups;
- else
- gdev->dev.groups = qeth_dev_groups;
+ gdev->dev.groups = qeth_dev_groups;
enforced_disc = qeth_enforce_discipline(card);
switch (enforced_disc) {
@@ -6538,8 +6469,6 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev)
if (rc)
goto err_setup_disc;
- gdev->dev.type = IS_OSN(card) ? &qeth_osn_devtype :
- card->discipline->devtype;
break;
}
@@ -6657,36 +6586,42 @@ static struct ccwgroup_driver qeth_core_ccwgroup_driver = {
.shutdown = qeth_core_shutdown,
};
-struct qeth_card *qeth_get_card_by_busid(char *bus_id)
-{
- struct ccwgroup_device *gdev;
- struct qeth_card *card;
-
- gdev = get_ccwgroupdev_by_busid(&qeth_core_ccwgroup_driver, bus_id);
- if (!gdev)
- return NULL;
-
- card = dev_get_drvdata(&gdev->dev);
- put_device(&gdev->dev);
- return card;
-}
-EXPORT_SYMBOL_GPL(qeth_get_card_by_busid);
-
-int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+int qeth_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd)
{
struct qeth_card *card = dev->ml_priv;
- struct mii_ioctl_data *mii_data;
int rc = 0;
switch (cmd) {
case SIOC_QETH_ADP_SET_SNMP_CONTROL:
- rc = qeth_snmp_command(card, rq->ifr_ifru.ifru_data);
+ rc = qeth_snmp_command(card, data);
break;
case SIOC_QETH_GET_CARD_TYPE:
if ((IS_OSD(card) || IS_OSM(card) || IS_OSX(card)) &&
!IS_VM_NIC(card))
return 1;
return 0;
+ case SIOC_QETH_QUERY_OAT:
+ rc = qeth_query_oat_command(card, data);
+ break;
+ default:
+ if (card->discipline->do_ioctl)
+ rc = card->discipline->do_ioctl(dev, rq, data, cmd);
+ else
+ rc = -EOPNOTSUPP;
+ }
+ if (rc)
+ QETH_CARD_TEXT_(card, 2, "ioce%x", rc);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(qeth_siocdevprivate);
+
+int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+ struct qeth_card *card = dev->ml_priv;
+ struct mii_ioctl_data *mii_data;
+ int rc = 0;
+
+ switch (cmd) {
case SIOCGMIIPHY:
mii_data = if_mii(rq);
mii_data->phy_id = 0;
@@ -6699,14 +6634,8 @@ int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
mii_data->val_out = qeth_mdio_read(dev,
mii_data->phy_id, mii_data->reg_num);
break;
- case SIOC_QETH_QUERY_OAT:
- rc = qeth_query_oat_command(card, rq->ifr_ifru.ifru_data);
- break;
default:
- if (card->discipline->do_ioctl)
- rc = card->discipline->do_ioctl(dev, rq, cmd);
- else
- rc = -EOPNOTSUPP;
+ return -EOPNOTSUPP;
}
if (rc)
QETH_CARD_TEXT_(card, 2, "ioce%x", rc);
diff --git a/drivers/s390/net/qeth_core_mpc.c b/drivers/s390/net/qeth_core_mpc.c
index 68c2588b9dcc..d9266f7d8187 100644
--- a/drivers/s390/net/qeth_core_mpc.c
+++ b/drivers/s390/net/qeth_core_mpc.c
@@ -232,9 +232,6 @@ static const struct ipa_cmd_names qeth_ipa_cmd_names[] = {
{IPA_CMD_DELVLAN, "delvlan"},
{IPA_CMD_VNICC, "vnic_characteristics"},
{IPA_CMD_SETBRIDGEPORT_OSA, "set_bridge_port(osa)"},
- {IPA_CMD_SETCCID, "setccid"},
- {IPA_CMD_DELCCID, "delccid"},
- {IPA_CMD_MODCCID, "modccid"},
{IPA_CMD_SETIP, "setip"},
{IPA_CMD_QIPASSIST, "qipassist"},
{IPA_CMD_SETASSPARMS, "setassparms"},
diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index e4bde7daf083..6257f00786b3 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -34,8 +34,6 @@ extern const unsigned char IPA_PDU_HEADER[];
/*****************************************************************************/
#define IPA_CMD_INITIATOR_HOST 0x00
#define IPA_CMD_INITIATOR_OSA 0x01
-#define IPA_CMD_INITIATOR_HOST_REPLY 0x80
-#define IPA_CMD_INITIATOR_OSA_REPLY 0x81
#define IPA_CMD_PRIM_VERSION_NO 0x01
struct qeth_ipa_caps {
@@ -66,7 +64,6 @@ static inline bool qeth_ipa_caps_enabled(struct qeth_ipa_caps *caps, u32 mask)
enum qeth_card_types {
QETH_CARD_TYPE_OSD = 1,
QETH_CARD_TYPE_IQD = 5,
- QETH_CARD_TYPE_OSN = 6,
QETH_CARD_TYPE_OSM = 3,
QETH_CARD_TYPE_OSX = 2,
};
@@ -75,12 +72,6 @@ enum qeth_card_types {
#define IS_OSD(card) ((card)->info.type == QETH_CARD_TYPE_OSD)
#define IS_OSM(card) ((card)->info.type == QETH_CARD_TYPE_OSM)
-#ifdef CONFIG_QETH_OSN
-#define IS_OSN(card) ((card)->info.type == QETH_CARD_TYPE_OSN)
-#else
-#define IS_OSN(card) false
-#endif
-
#ifdef CONFIG_QETH_OSX
#define IS_OSX(card) ((card)->info.type == QETH_CARD_TYPE_OSX)
#else
@@ -95,7 +86,6 @@ enum qeth_link_types {
QETH_LINK_TYPE_FAST_ETH = 0x01,
QETH_LINK_TYPE_HSTR = 0x02,
QETH_LINK_TYPE_GBIT_ETH = 0x03,
- QETH_LINK_TYPE_OSN = 0x04,
QETH_LINK_TYPE_10GBIT_ETH = 0x10,
QETH_LINK_TYPE_25GBIT_ETH = 0x12,
QETH_LINK_TYPE_LANE_ETH100 = 0x81,
@@ -126,9 +116,6 @@ enum qeth_ipa_cmds {
IPA_CMD_DELVLAN = 0x26,
IPA_CMD_VNICC = 0x2a,
IPA_CMD_SETBRIDGEPORT_OSA = 0x2b,
- IPA_CMD_SETCCID = 0x41,
- IPA_CMD_DELCCID = 0x42,
- IPA_CMD_MODCCID = 0x43,
IPA_CMD_SETIP = 0xb1,
IPA_CMD_QIPASSIST = 0xb2,
IPA_CMD_SETASSPARMS = 0xb3,
@@ -879,8 +866,7 @@ extern const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc);
extern const char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd);
/* Helper functions */
-#define IS_IPA_REPLY(cmd) ((cmd->hdr.initiator == IPA_CMD_INITIATOR_HOST) || \
- (cmd->hdr.initiator == IPA_CMD_INITIATOR_OSA_REPLY))
+#define IS_IPA_REPLY(cmd) ((cmd)->hdr.initiator == IPA_CMD_INITIATOR_HOST)
/*****************************************************************************/
/* END OF IP Assist related definitions */
@@ -919,10 +905,9 @@ extern const unsigned char ULP_ENABLE[];
(PDU_ENCAPSULATION(buffer) + 0x17)
#define QETH_ULP_ENABLE_RESP_LINK_TYPE(buffer) \
(PDU_ENCAPSULATION(buffer) + 0x2b)
-/* Layer 2 definitions */
-#define QETH_PROT_LAYER2 0x08
-#define QETH_PROT_TCPIP 0x03
-#define QETH_PROT_OSN2 0x0a
+
+#define QETH_MPC_PROT_L2 0x08
+#define QETH_MPC_PROT_L3 0x03
#define QETH_ULP_ENABLE_PROT_TYPE(buffer) (buffer + 0x50)
#define QETH_IPA_CMD_PROT_TYPE(buffer) (buffer + 0x19)
diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index 5815114da468..406be169173c 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c
@@ -671,11 +671,6 @@ static const struct attribute_group qeth_dev_group = {
.attrs = qeth_dev_attrs,
};
-const struct attribute_group *qeth_osn_dev_groups[] = {
- &qeth_dev_group,
- NULL,
-};
-
const struct attribute_group *qeth_dev_groups[] = {
&qeth_dev_group,
&qeth_dev_extended_group,
diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c
index 2c4cb300a8fc..46d0fe0d0e8a 100644
--- a/drivers/s390/net/qeth_ethtool.c
+++ b/drivers/s390/net/qeth_ethtool.c
@@ -123,7 +123,9 @@ static void __qeth_set_coalesce(struct net_device *dev,
}
static int qeth_set_coalesce(struct net_device *dev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct qeth_card *card = dev->ml_priv;
struct qeth_qdio_out_q *queue;
@@ -469,10 +471,3 @@ const struct ethtool_ops qeth_ethtool_ops = {
.set_per_queue_coalesce = qeth_set_per_queue_coalesce,
.get_link_ksettings = qeth_get_link_ksettings,
};
-
-const struct ethtool_ops qeth_osn_ethtool_ops = {
- .get_strings = qeth_get_strings,
- .get_ethtool_stats = qeth_get_ethtool_stats,
- .get_sset_count = qeth_get_sset_count,
- .get_drvinfo = qeth_get_drvinfo,
-};
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 2abf86c104d5..72e84ff9fea5 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -279,7 +279,7 @@ static void qeth_l2_set_pnso_mode(struct qeth_card *card,
static void qeth_l2_dev2br_fdb_flush(struct qeth_card *card)
{
- struct switchdev_notifier_fdb_info info;
+ struct switchdev_notifier_fdb_info info = {};
QETH_CARD_TEXT(card, 2, "fdbflush");
@@ -309,17 +309,16 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card)
/* fall back to alternative mechanism: */
}
- if (!IS_OSN(card)) {
- rc = qeth_setadpparms_change_macaddr(card);
- if (!rc)
- goto out;
- QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %x: %#x\n",
- CARD_DEVID(card), rc);
- QETH_CARD_TEXT_(card, 2, "1err%04x", rc);
- /* fall back once more: */
- }
+ rc = qeth_setadpparms_change_macaddr(card);
+ if (!rc)
+ goto out;
+ QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %x: %#x\n",
+ CARD_DEVID(card), rc);
+ QETH_CARD_TEXT_(card, 2, "1err%04x", rc);
- /* some devices don't support a custom MAC address: */
+ /* Fall back once more, but some devices don't support a custom MAC
+ * address:
+ */
if (IS_OSM(card) || IS_OSX(card))
return (rc) ? rc : -EADDRNOTAVAIL;
eth_hw_addr_random(card->dev);
@@ -334,7 +333,7 @@ static void qeth_l2_register_dev_addr(struct qeth_card *card)
if (!is_valid_ether_addr(card->dev->dev_addr))
qeth_l2_request_initial_mac(card);
- if (!IS_OSN(card) && !qeth_l2_send_setmac(card, card->dev->dev_addr))
+ if (!qeth_l2_send_setmac(card, card->dev->dev_addr))
card->info.dev_addr_is_registered = 1;
else
card->info.dev_addr_is_registered = 0;
@@ -496,44 +495,6 @@ static void qeth_l2_rx_mode_work(struct work_struct *work)
qeth_l2_set_promisc_mode(card);
}
-static int qeth_l2_xmit_osn(struct qeth_card *card, struct sk_buff *skb,
- struct qeth_qdio_out_q *queue)
-{
- gfp_t gfp = GFP_ATOMIC | (skb_pfmemalloc(skb) ? __GFP_MEMALLOC : 0);
- struct qeth_hdr *hdr = (struct qeth_hdr *)skb->data;
- addr_t end = (addr_t)(skb->data + sizeof(*hdr));
- addr_t start = (addr_t)skb->data;
- unsigned int elements = 0;
- unsigned int hd_len = 0;
- int rc;
-
- if (skb->protocol == htons(ETH_P_IPV6))
- return -EPROTONOSUPPORT;
-
- if (qeth_get_elements_for_range(start, end) > 1) {
- /* Misaligned HW header, move it to its own buffer element. */
- hdr = kmem_cache_alloc(qeth_core_header_cache, gfp);
- if (!hdr)
- return -ENOMEM;
- hd_len = sizeof(*hdr);
- skb_copy_from_linear_data(skb, (char *)hdr, hd_len);
- elements++;
- }
-
- elements += qeth_count_elements(skb, hd_len);
- if (elements > queue->max_elements) {
- rc = -E2BIG;
- goto out;
- }
-
- rc = qeth_do_send_packet(card, queue, skb, hdr, hd_len, hd_len,
- elements);
-out:
- if (rc && hd_len)
- kmem_cache_free(qeth_core_header_cache, hdr);
- return rc;
-}
-
static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
@@ -548,12 +509,8 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
txq = qeth_iqd_translate_txq(dev, txq);
queue = card->qdio.out_qs[txq];
- if (IS_OSN(card))
- rc = qeth_l2_xmit_osn(card, skb, queue);
- else
- rc = qeth_xmit(card, skb, queue, vlan_get_protocol(skb),
- qeth_l2_fill_header);
-
+ rc = qeth_xmit(card, skb, queue, vlan_get_protocol(skb),
+ qeth_l2_fill_header);
if (!rc)
return NETDEV_TX_OK;
@@ -679,7 +636,7 @@ static void qeth_l2_dev2br_fdb_notify(struct qeth_card *card, u8 code,
struct net_if_token *token,
struct mac_addr_lnid *addr_lnid)
{
- struct switchdev_notifier_fdb_info info;
+ struct switchdev_notifier_fdb_info info = {};
u8 ntfy_mac[ETH_ALEN];
ether_addr_copy(ntfy_mac, addr_lnid->mac);
@@ -760,6 +717,227 @@ static int qeth_l2_dev2br_an_set(struct qeth_card *card, bool enable)
return rc;
}
+struct qeth_l2_br2dev_event_work {
+ struct work_struct work;
+ struct net_device *br_dev;
+ struct net_device *lsync_dev;
+ struct net_device *dst_dev;
+ unsigned long event;
+ unsigned char addr[ETH_ALEN];
+};
+
+static const struct net_device_ops qeth_l2_netdev_ops;
+
+static bool qeth_l2_must_learn(struct net_device *netdev,
+ struct net_device *dstdev)
+{
+ struct qeth_priv *priv;
+
+ priv = netdev_priv(netdev);
+ return (netdev != dstdev &&
+ (priv->brport_features & BR_LEARNING_SYNC) &&
+ !(br_port_flag_is_set(netdev, BR_ISOLATED) &&
+ br_port_flag_is_set(dstdev, BR_ISOLATED)) &&
+ netdev->netdev_ops == &qeth_l2_netdev_ops);
+}
+
+/**
+ * qeth_l2_br2dev_worker() - update local MACs
+ * @work: bridge to device FDB update
+ *
+ * Update local MACs of a learning_sync bridgeport so it can receive
+ * messages for a destination port.
+ * In case of an isolated learning_sync port, also update its isolated
+ * siblings.
+ */
+static void qeth_l2_br2dev_worker(struct work_struct *work)
+{
+ struct qeth_l2_br2dev_event_work *br2dev_event_work =
+ container_of(work, struct qeth_l2_br2dev_event_work, work);
+ struct net_device *lsyncdev = br2dev_event_work->lsync_dev;
+ struct net_device *dstdev = br2dev_event_work->dst_dev;
+ struct net_device *brdev = br2dev_event_work->br_dev;
+ unsigned long event = br2dev_event_work->event;
+ unsigned char *addr = br2dev_event_work->addr;
+ struct qeth_card *card = lsyncdev->ml_priv;
+ struct net_device *lowerdev;
+ struct list_head *iter;
+ int err = 0;
+
+ kfree(br2dev_event_work);
+ QETH_CARD_TEXT_(card, 4, "b2dw%04x", event);
+ QETH_CARD_TEXT_(card, 4, "ma%012lx", ether_addr_to_u64(addr));
+
+ rcu_read_lock();
+ /* Verify preconditions are still valid: */
+ if (!netif_is_bridge_port(lsyncdev) ||
+ brdev != netdev_master_upper_dev_get_rcu(lsyncdev))
+ goto unlock;
+ if (!qeth_l2_must_learn(lsyncdev, dstdev))
+ goto unlock;
+
+ if (br_port_flag_is_set(lsyncdev, BR_ISOLATED)) {
+ /* Update lsyncdev and its isolated sibling(s): */
+ iter = &brdev->adj_list.lower;
+ lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+ while (lowerdev) {
+ if (br_port_flag_is_set(lowerdev, BR_ISOLATED)) {
+ switch (event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ err = dev_uc_add(lowerdev, addr);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ err = dev_uc_del(lowerdev, addr);
+ break;
+ default:
+ break;
+ }
+ if (err) {
+ QETH_CARD_TEXT(card, 2, "b2derris");
+ QETH_CARD_TEXT_(card, 2,
+ "err%02x%03d", event,
+ lowerdev->ifindex);
+ }
+ }
+ lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+ }
+ } else {
+ switch (event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ err = dev_uc_add(lsyncdev, addr);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ err = dev_uc_del(lsyncdev, addr);
+ break;
+ default:
+ break;
+ }
+ if (err)
+ QETH_CARD_TEXT_(card, 2, "b2derr%02x", event);
+ }
+
+unlock:
+ rcu_read_unlock();
+ dev_put(brdev);
+ dev_put(lsyncdev);
+ dev_put(dstdev);
+}
+
+static int qeth_l2_br2dev_queue_work(struct net_device *brdev,
+ struct net_device *lsyncdev,
+ struct net_device *dstdev,
+ unsigned long event,
+ const unsigned char *addr)
+{
+ struct qeth_l2_br2dev_event_work *worker_data;
+ struct qeth_card *card;
+
+ worker_data = kzalloc(sizeof(*worker_data), GFP_ATOMIC);
+ if (!worker_data)
+ return -ENOMEM;
+ INIT_WORK(&worker_data->work, qeth_l2_br2dev_worker);
+ worker_data->br_dev = brdev;
+ worker_data->lsync_dev = lsyncdev;
+ worker_data->dst_dev = dstdev;
+ worker_data->event = event;
+ ether_addr_copy(worker_data->addr, addr);
+
+ card = lsyncdev->ml_priv;
+ /* Take a reference on the sw port devices and the bridge */
+ dev_hold(brdev);
+ dev_hold(lsyncdev);
+ dev_hold(dstdev);
+ queue_work(card->event_wq, &worker_data->work);
+ return 0;
+}
+
+/* Called under rtnl_lock */
+static int qeth_l2_switchdev_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dstdev, *brdev, *lowerdev;
+ struct switchdev_notifier_fdb_info *fdb_info;
+ struct switchdev_notifier_info *info = ptr;
+ struct list_head *iter;
+ struct qeth_card *card;
+ int rc;
+
+ if (!(event == SWITCHDEV_FDB_ADD_TO_DEVICE ||
+ event == SWITCHDEV_FDB_DEL_TO_DEVICE))
+ return NOTIFY_DONE;
+
+ dstdev = switchdev_notifier_info_to_dev(info);
+ brdev = netdev_master_upper_dev_get_rcu(dstdev);
+ if (!brdev || !netif_is_bridge_master(brdev))
+ return NOTIFY_DONE;
+ fdb_info = container_of(info,
+ struct switchdev_notifier_fdb_info,
+ info);
+ iter = &brdev->adj_list.lower;
+ lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+ while (lowerdev) {
+ if (qeth_l2_must_learn(lowerdev, dstdev)) {
+ card = lowerdev->ml_priv;
+ QETH_CARD_TEXT_(card, 4, "b2dqw%03x", event);
+ rc = qeth_l2_br2dev_queue_work(brdev, lowerdev,
+ dstdev, event,
+ fdb_info->addr);
+ if (rc) {
+ QETH_CARD_TEXT(card, 2, "b2dqwerr");
+ return NOTIFY_BAD;
+ }
+ }
+ lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block qeth_l2_sw_notifier = {
+ .notifier_call = qeth_l2_switchdev_event,
+};
+
+static refcount_t qeth_l2_switchdev_notify_refcnt;
+
+/* Called under rtnl_lock */
+static void qeth_l2_br2dev_get(void)
+{
+ int rc;
+
+ if (!refcount_inc_not_zero(&qeth_l2_switchdev_notify_refcnt)) {
+ rc = register_switchdev_notifier(&qeth_l2_sw_notifier);
+ if (rc) {
+ QETH_DBF_MESSAGE(2,
+ "failed to register qeth_l2_sw_notifier: %d\n",
+ rc);
+ } else {
+ refcount_set(&qeth_l2_switchdev_notify_refcnt, 1);
+ QETH_DBF_MESSAGE(2, "qeth_l2_sw_notifier registered\n");
+ }
+ }
+ QETH_DBF_TEXT_(SETUP, 2, "b2d+%04d",
+ qeth_l2_switchdev_notify_refcnt.refs.counter);
+}
+
+/* Called under rtnl_lock */
+static void qeth_l2_br2dev_put(void)
+{
+ int rc;
+
+ if (refcount_dec_and_test(&qeth_l2_switchdev_notify_refcnt)) {
+ rc = unregister_switchdev_notifier(&qeth_l2_sw_notifier);
+ if (rc) {
+ QETH_DBF_MESSAGE(2,
+ "failed to unregister qeth_l2_sw_notifier: %d\n",
+ rc);
+ } else {
+ QETH_DBF_MESSAGE(2,
+ "qeth_l2_sw_notifier unregistered\n");
+ }
+ }
+ QETH_DBF_TEXT_(SETUP, 2, "b2d-%04d",
+ qeth_l2_switchdev_notify_refcnt.refs.counter);
+}
+
static int qeth_l2_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct net_device *dev, u32 filter_mask,
int nlflags)
@@ -853,16 +1031,19 @@ static int qeth_l2_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
} else if (enable) {
qeth_l2_set_pnso_mode(card, QETH_PNSO_ADDR_INFO);
rc = qeth_l2_dev2br_an_set(card, true);
- if (rc)
+ if (rc) {
qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE);
- else
+ } else {
priv->brport_features |= BR_LEARNING_SYNC;
+ qeth_l2_br2dev_get();
+ }
} else {
rc = qeth_l2_dev2br_an_set(card, false);
if (!rc) {
qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE);
priv->brport_features ^= BR_LEARNING_SYNC;
qeth_l2_dev2br_fdb_flush(card);
+ qeth_l2_br2dev_put();
}
}
mutex_unlock(&card->sbp_lock);
@@ -879,7 +1060,8 @@ static const struct net_device_ops qeth_l2_netdev_ops = {
.ndo_select_queue = qeth_l2_select_queue,
.ndo_validate_addr = qeth_l2_validate_addr,
.ndo_set_rx_mode = qeth_l2_set_rx_mode,
- .ndo_do_ioctl = qeth_do_ioctl,
+ .ndo_eth_ioctl = qeth_do_ioctl,
+ .ndo_siocdevprivate = qeth_siocdevprivate,
.ndo_set_mac_address = qeth_l2_set_mac_address,
.ndo_vlan_rx_add_vid = qeth_l2_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = qeth_l2_vlan_rx_kill_vid,
@@ -890,23 +1072,8 @@ static const struct net_device_ops qeth_l2_netdev_ops = {
.ndo_bridge_setlink = qeth_l2_bridge_setlink,
};
-static const struct net_device_ops qeth_osn_netdev_ops = {
- .ndo_open = qeth_open,
- .ndo_stop = qeth_stop,
- .ndo_get_stats64 = qeth_get_stats64,
- .ndo_start_xmit = qeth_l2_hard_start_xmit,
- .ndo_validate_addr = eth_validate_addr,
- .ndo_tx_timeout = qeth_tx_timeout,
-};
-
static int qeth_l2_setup_netdev(struct qeth_card *card)
{
- if (IS_OSN(card)) {
- card->dev->netdev_ops = &qeth_osn_netdev_ops;
- card->dev->flags |= IFF_NOARP;
- goto add_napi;
- }
-
card->dev->needed_headroom = sizeof(struct qeth_hdr);
card->dev->netdev_ops = &qeth_l2_netdev_ops;
card->dev->priv_flags |= IFF_UNICAST_FLT;
@@ -952,7 +1119,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
PAGE_SIZE * (QDIO_MAX_ELEMENTS_PER_BUFFER - 1));
}
-add_napi:
netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
return register_netdev(card->dev);
}
@@ -1044,84 +1210,6 @@ static void qeth_l2_enable_brport_features(struct qeth_card *card)
}
}
-#ifdef CONFIG_QETH_OSN
-static void qeth_osn_assist_cb(struct qeth_card *card,
- struct qeth_cmd_buffer *iob,
- unsigned int data_length)
-{
- qeth_notify_cmd(iob, 0);
- qeth_put_cmd(iob);
-}
-
-int qeth_osn_assist(struct net_device *dev, void *data, int data_len)
-{
- struct qeth_cmd_buffer *iob;
- struct qeth_card *card;
-
- if (data_len < 0)
- return -EINVAL;
- if (!dev)
- return -ENODEV;
- card = dev->ml_priv;
- if (!card)
- return -ENODEV;
- QETH_CARD_TEXT(card, 2, "osnsdmc");
- if (!qeth_card_hw_is_reachable(card))
- return -ENODEV;
-
- iob = qeth_alloc_cmd(&card->write, IPA_PDU_HEADER_SIZE + data_len, 1,
- QETH_IPA_TIMEOUT);
- if (!iob)
- return -ENOMEM;
-
- qeth_prepare_ipa_cmd(card, iob, (u16) data_len, NULL);
-
- memcpy(__ipa_cmd(iob), data, data_len);
- iob->callback = qeth_osn_assist_cb;
- return qeth_send_ipa_cmd(card, iob, NULL, NULL);
-}
-EXPORT_SYMBOL(qeth_osn_assist);
-
-int qeth_osn_register(unsigned char *read_dev_no, struct net_device **dev,
- int (*assist_cb)(struct net_device *, void *),
- int (*data_cb)(struct sk_buff *))
-{
- struct qeth_card *card;
- char bus_id[16];
- u16 devno;
-
- memcpy(&devno, read_dev_no, 2);
- sprintf(bus_id, "0.0.%04x", devno);
- card = qeth_get_card_by_busid(bus_id);
- if (!card || !IS_OSN(card))
- return -ENODEV;
- *dev = card->dev;
-
- QETH_CARD_TEXT(card, 2, "osnreg");
- if ((assist_cb == NULL) || (data_cb == NULL))
- return -EINVAL;
- card->osn_info.assist_cb = assist_cb;
- card->osn_info.data_cb = data_cb;
- return 0;
-}
-EXPORT_SYMBOL(qeth_osn_register);
-
-void qeth_osn_deregister(struct net_device *dev)
-{
- struct qeth_card *card;
-
- if (!dev)
- return;
- card = dev->ml_priv;
- if (!card)
- return;
- QETH_CARD_TEXT(card, 2, "osndereg");
- card->osn_info.assist_cb = NULL;
- card->osn_info.data_cb = NULL;
-}
-EXPORT_SYMBOL(qeth_osn_deregister);
-#endif
-
/* SETBRIDGEPORT support, async notifications */
enum qeth_an_event_type {anev_reg_unreg, anev_abort, anev_reset};
@@ -2190,16 +2278,15 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
struct qeth_card *card = dev_get_drvdata(&gdev->dev);
int rc;
- if (IS_OSN(card))
- dev_notice(&gdev->dev, "OSN support will be dropped in 2021\n");
-
qeth_l2_vnicc_set_defaults(card);
mutex_init(&card->sbp_lock);
- if (gdev->dev.type == &qeth_generic_devtype) {
+ if (gdev->dev.type) {
rc = device_add_groups(&gdev->dev, qeth_l2_attr_groups);
if (rc)
return rc;
+ } else {
+ gdev->dev.type = &qeth_l2_devtype;
}
INIT_WORK(&card->rx_mode_work, qeth_l2_rx_mode_work);
@@ -2209,9 +2296,11 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
static void qeth_l2_remove_device(struct ccwgroup_device *gdev)
{
struct qeth_card *card = dev_get_drvdata(&gdev->dev);
+ struct qeth_priv *priv;
- if (gdev->dev.type == &qeth_generic_devtype)
+ if (gdev->dev.type != &qeth_l2_devtype)
device_remove_groups(&gdev->dev, qeth_l2_attr_groups);
+
qeth_set_allowed_threads(card, 0, 1);
wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
@@ -2219,8 +2308,15 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev)
qeth_set_offline(card, card->discipline, false);
cancel_work_sync(&card->close_dev_work);
- if (card->dev->reg_state == NETREG_REGISTERED)
+ if (card->dev->reg_state == NETREG_REGISTERED) {
+ priv = netdev_priv(card->dev);
+ if (priv->brport_features & BR_LEARNING_SYNC) {
+ rtnl_lock();
+ qeth_l2_br2dev_put();
+ rtnl_unlock();
+ }
unregister_netdev(card->dev);
+ }
}
static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok)
@@ -2331,7 +2427,6 @@ static int qeth_l2_control_event(struct qeth_card *card,
}
const struct qeth_discipline qeth_l2_discipline = {
- .devtype = &qeth_l2_devtype,
.setup = qeth_l2_probe_device,
.remove = qeth_l2_remove_device,
.set_online = qeth_l2_set_online,
@@ -2344,6 +2439,7 @@ EXPORT_SYMBOL_GPL(qeth_l2_discipline);
static int __init qeth_l2_init(void)
{
pr_info("register layer 2 discipline\n");
+ refcount_set(&qeth_l2_switchdev_notify_refcnt, 0);
return 0;
}
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index f0d6f205c53c..3a523e700a5a 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1512,7 +1512,7 @@ static int qeth_l3_arp_flush_cache(struct qeth_card *card)
return rc;
}
-static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd)
{
struct qeth_card *card = dev->ml_priv;
struct qeth_arp_cache_entry arp_entry;
@@ -1532,13 +1532,13 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
rc = -EPERM;
break;
}
- rc = qeth_l3_arp_query(card, rq->ifr_ifru.ifru_data);
+ rc = qeth_l3_arp_query(card, data);
break;
case SIOC_QETH_ARP_ADD_ENTRY:
case SIOC_QETH_ARP_REMOVE_ENTRY:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- if (copy_from_user(&arp_entry, rq->ifr_data, sizeof(arp_entry)))
+ if (copy_from_user(&arp_entry, data, sizeof(arp_entry)))
return -EFAULT;
arp_cmd = (cmd == SIOC_QETH_ARP_ADD_ENTRY) ?
@@ -1841,7 +1841,8 @@ static const struct net_device_ops qeth_l3_netdev_ops = {
.ndo_select_queue = qeth_l3_iqd_select_queue,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = qeth_l3_set_rx_mode,
- .ndo_do_ioctl = qeth_do_ioctl,
+ .ndo_eth_ioctl = qeth_do_ioctl,
+ .ndo_siocdevprivate = qeth_siocdevprivate,
.ndo_fix_features = qeth_fix_features,
.ndo_set_features = qeth_set_features,
.ndo_tx_timeout = qeth_tx_timeout,
@@ -1856,7 +1857,8 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = {
.ndo_select_queue = qeth_l3_osa_select_queue,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = qeth_l3_set_rx_mode,
- .ndo_do_ioctl = qeth_do_ioctl,
+ .ndo_eth_ioctl = qeth_do_ioctl,
+ .ndo_siocdevprivate = qeth_siocdevprivate,
.ndo_fix_features = qeth_fix_features,
.ndo_set_features = qeth_set_features,
.ndo_tx_timeout = qeth_tx_timeout,
@@ -1940,12 +1942,14 @@ static int qeth_l3_probe_device(struct ccwgroup_device *gdev)
if (!card->cmd_wq)
return -ENOMEM;
- if (gdev->dev.type == &qeth_generic_devtype) {
+ if (gdev->dev.type) {
rc = device_add_groups(&gdev->dev, qeth_l3_attr_groups);
if (rc) {
destroy_workqueue(card->cmd_wq);
return rc;
}
+ } else {
+ gdev->dev.type = &qeth_l3_devtype;
}
INIT_WORK(&card->rx_mode_work, qeth_l3_rx_mode_work);
@@ -1956,7 +1960,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
{
struct qeth_card *card = dev_get_drvdata(&cgdev->dev);
- if (cgdev->dev.type == &qeth_generic_devtype)
+ if (cgdev->dev.type != &qeth_l3_devtype)
device_remove_groups(&cgdev->dev, qeth_l3_attr_groups);
qeth_set_allowed_threads(card, 0, 1);
@@ -2065,7 +2069,6 @@ static int qeth_l3_control_event(struct qeth_card *card,
}
const struct qeth_discipline qeth_l3_discipline = {
- .devtype = &qeth_l3_devtype,
.setup = qeth_l3_probe_device,
.remove = qeth_l3_remove_device,
.set_online = qeth_l3_set_online,
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index 6671d9563f6c..8f19bed6384e 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -69,10 +69,7 @@ static void zfcp_qdio_int_req(struct ccw_device *cdev, unsigned int qdio_err,
{
struct zfcp_qdio *qdio = (struct zfcp_qdio *) parm;
- if (unlikely(qdio_err)) {
- zfcp_qdio_handler_error(qdio, "qdireq1", qdio_err);
- return;
- }
+ zfcp_qdio_handler_error(qdio, "qdireq1", qdio_err);
}
static void zfcp_qdio_request_tasklet(struct tasklet_struct *tasklet)
diff --git a/drivers/scsi/cxgbi/cxgb4i/Kconfig b/drivers/scsi/cxgbi/cxgb4i/Kconfig
index 8b0deece9758..63c8a0f3cd0c 100644
--- a/drivers/scsi/cxgbi/cxgb4i/Kconfig
+++ b/drivers/scsi/cxgbi/cxgb4i/Kconfig
@@ -2,6 +2,7 @@
config SCSI_CXGB4_ISCSI
tristate "Chelsio T4 iSCSI support"
depends on PCI && INET && (IPV6 || IPV6=n)
+ depends on PTP_1588_CLOCK_OPTIONAL
depends on THERMAL || !THERMAL
depends on ETHERNET
depends on TLS || TLS=n
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 5983e05b648f..e29523a1b530 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -13193,6 +13193,8 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
if (!phba)
return -ENOMEM;
+ INIT_LIST_HEAD(&phba->poll_list);
+
/* Perform generic PCI device enabling operation */
error = lpfc_enable_pci_dev(phba);
if (error)
@@ -13327,7 +13329,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
/* Enable RAS FW log support */
lpfc_sli4_ras_setup(phba);
- INIT_LIST_HEAD(&phba->poll_list);
timer_setup(&phba->cpuhp_poll_timer, lpfc_sli4_poll_hbtimer, 0);
cpuhp_state_add_instance_nocalls(lpfc_cpuhp_state, &phba->cpuhp);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 19b1c0cf5f2a..cf4a3a2c22ad 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -7851,7 +7851,7 @@ _base_make_ioc_operational(struct MPT3SAS_ADAPTER *ioc)
return r;
}
- rc = _base_static_config_pages(ioc);
+ r = _base_static_config_pages(ioc);
if (r)
return r;
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index ae9bfc658203..c0d31119d6d7 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -808,12 +808,15 @@ store_state_field(struct device *dev, struct device_attribute *attr,
ret = scsi_device_set_state(sdev, state);
/*
* If the device state changes to SDEV_RUNNING, we need to
- * rescan the device to revalidate it, and run the queue to
- * avoid I/O hang.
+ * run the queue to avoid I/O hang, and rescan the device
+ * to revalidate it. Running the queue first is necessary
+ * because another thread may be waiting inside
+ * blk_mq_freeze_queue_wait() and because that call may be
+ * waiting for pending I/O to finish.
*/
if (ret == 0 && state == SDEV_RUNNING) {
- scsi_rescan_device(dev);
blk_mq_run_hw_queues(sdev->request_queue, true);
+ scsi_rescan_device(dev);
}
mutex_unlock(&sdev->state_mutex);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b8d55af763f9..610ebba0d66e 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -129,6 +129,7 @@ static DEFINE_MUTEX(sd_ref_mutex);
static struct kmem_cache *sd_cdb_cache;
static mempool_t *sd_cdb_pool;
static mempool_t *sd_page_pool;
+static struct lock_class_key sd_bio_compl_lkclass;
static const char *sd_cache_types[] = {
"write through", "none", "write back",
@@ -886,7 +887,7 @@ static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
cmd->cmnd[0] = UNMAP;
cmd->cmnd[8] = 24;
- buf = page_address(rq->special_vec.bv_page);
+ buf = bvec_virt(&rq->special_vec);
put_unaligned_be16(6 + 16, &buf[0]);
put_unaligned_be16(16, &buf[2]);
put_unaligned_be64(lba, &buf[8]);
@@ -3408,7 +3409,8 @@ static int sd_probe(struct device *dev)
if (!sdkp)
goto out;
- gd = alloc_disk(SD_MINORS);
+ gd = __alloc_disk_node(sdp->request_queue, NUMA_NO_NODE,
+ &sd_bio_compl_lkclass);
if (!gd)
goto out_free;
@@ -3454,10 +3456,10 @@ static int sd_probe(struct device *dev)
gd->major = sd_major((index & 0xf0) >> 4);
gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
+ gd->minors = SD_MINORS;
gd->fops = &sd_fops;
gd->private_data = &sdkp->driver;
- gd->queue = sdkp->device->request_queue;
/* defaults, until the device tells us otherwise */
sdp->sector_size = 512;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 91e2221bbb0d..d5889b4f0fd4 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -166,7 +166,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */
bool exclude; /* 1->open(O_EXCL) succeeded and is active */
int open_cnt; /* count of opens (perhaps < num(sfds) ) */
char sgdebug; /* 0->off, 1->sense, 9->dump dev, 10-> all devs */
- struct gendisk *disk;
+ char name[DISK_NAME_LEN];
struct cdev * cdev; /* char_dev [sysfs: /sys/cdev/major/sg<n>] */
struct kref d_ref;
} Sg_device;
@@ -202,8 +202,7 @@ static void sg_device_destroy(struct kref *kref);
#define SZ_SG_REQ_INFO sizeof(sg_req_info_t)
#define sg_printk(prefix, sdp, fmt, a...) \
- sdev_prefix_printk(prefix, (sdp)->device, \
- (sdp)->disk->disk_name, fmt, ##a)
+ sdev_prefix_printk(prefix, (sdp)->device, (sdp)->name, fmt, ##a)
/*
* The SCSI interfaces that use read() and write() as an asynchronous variant of
@@ -832,7 +831,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
srp->rq->timeout = timeout;
kref_get(&sfp->f_ref); /* sg_rq_end_io() does kref_put(). */
- blk_execute_rq_nowait(sdp->disk, srp->rq, at_head, sg_rq_end_io);
+ blk_execute_rq_nowait(NULL, srp->rq, at_head, sg_rq_end_io);
return 0;
}
@@ -1119,8 +1118,7 @@ sg_ioctl_common(struct file *filp, Sg_device *sdp, Sg_fd *sfp,
return put_user(max_sectors_bytes(sdp->device->request_queue),
ip);
case BLKTRACESETUP:
- return blk_trace_setup(sdp->device->request_queue,
- sdp->disk->disk_name,
+ return blk_trace_setup(sdp->device->request_queue, sdp->name,
MKDEV(SCSI_GENERIC_MAJOR, sdp->index),
NULL, p);
case BLKTRACESTART:
@@ -1456,7 +1454,7 @@ static struct class *sg_sysfs_class;
static int sg_sysfs_valid = 0;
static Sg_device *
-sg_alloc(struct gendisk *disk, struct scsi_device *scsidp)
+sg_alloc(struct scsi_device *scsidp)
{
struct request_queue *q = scsidp->request_queue;
Sg_device *sdp;
@@ -1492,9 +1490,7 @@ sg_alloc(struct gendisk *disk, struct scsi_device *scsidp)
SCSI_LOG_TIMEOUT(3, sdev_printk(KERN_INFO, scsidp,
"sg_alloc: dev=%d \n", k));
- sprintf(disk->disk_name, "sg%d", k);
- disk->first_minor = k;
- sdp->disk = disk;
+ sprintf(sdp->name, "sg%d", k);
sdp->device = scsidp;
mutex_init(&sdp->open_rel_lock);
INIT_LIST_HEAD(&sdp->sfds);
@@ -1521,19 +1517,11 @@ static int
sg_add_device(struct device *cl_dev, struct class_interface *cl_intf)
{
struct scsi_device *scsidp = to_scsi_device(cl_dev->parent);
- struct gendisk *disk;
Sg_device *sdp = NULL;
struct cdev * cdev = NULL;
int error;
unsigned long iflags;
- disk = alloc_disk(1);
- if (!disk) {
- pr_warn("%s: alloc_disk failed\n", __func__);
- return -ENOMEM;
- }
- disk->major = SCSI_GENERIC_MAJOR;
-
error = -ENOMEM;
cdev = cdev_alloc();
if (!cdev) {
@@ -1543,7 +1531,7 @@ sg_add_device(struct device *cl_dev, struct class_interface *cl_intf)
cdev->owner = THIS_MODULE;
cdev->ops = &sg_fops;
- sdp = sg_alloc(disk, scsidp);
+ sdp = sg_alloc(scsidp);
if (IS_ERR(sdp)) {
pr_warn("%s: sg_alloc failed\n", __func__);
error = PTR_ERR(sdp);
@@ -1561,7 +1549,7 @@ sg_add_device(struct device *cl_dev, struct class_interface *cl_intf)
sg_class_member = device_create(sg_sysfs_class, cl_dev->parent,
MKDEV(SCSI_GENERIC_MAJOR,
sdp->index),
- sdp, "%s", disk->disk_name);
+ sdp, "%s", sdp->name);
if (IS_ERR(sg_class_member)) {
pr_err("%s: device_create failed\n", __func__);
error = PTR_ERR(sg_class_member);
@@ -1589,7 +1577,6 @@ cdev_add_err:
kfree(sdp);
out:
- put_disk(disk);
if (cdev)
cdev_del(cdev);
return error;
@@ -1613,7 +1600,6 @@ sg_device_destroy(struct kref *kref)
SCSI_LOG_TIMEOUT(3,
sg_printk(KERN_INFO, sdp, "sg_device_destroy\n"));
- put_disk(sdp->disk);
kfree(sdp);
}
@@ -2606,7 +2592,7 @@ static int sg_proc_seq_show_debug(struct seq_file *s, void *v)
goto skip;
read_lock(&sdp->sfd_lock);
if (!list_empty(&sdp->sfds)) {
- seq_printf(s, " >>> device=%s ", sdp->disk->disk_name);
+ seq_printf(s, " >>> device=%s ", sdp->name);
if (atomic_read(&sdp->detaching))
seq_puts(s, "detaching pending close ");
else if (sdp->device) {
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index a6d3ac0a6cbc..2942a4ec9bdd 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -106,6 +106,8 @@ static struct scsi_driver sr_template = {
static unsigned long sr_index_bits[SR_DISKS / BITS_PER_LONG];
static DEFINE_SPINLOCK(sr_index_lock);
+static struct lock_class_key sr_bio_compl_lkclass;
+
/* This semaphore is used to mediate the 0->1 reference get in the
* face of object destruction (i.e. we can't allow a get on an
* object after last put) */
@@ -712,7 +714,8 @@ static int sr_probe(struct device *dev)
kref_init(&cd->kref);
- disk = alloc_disk(1);
+ disk = __alloc_disk_node(sdev->request_queue, NUMA_NO_NODE,
+ &sr_bio_compl_lkclass);
if (!disk)
goto fail_free;
mutex_init(&cd->lock);
@@ -729,6 +732,7 @@ static int sr_probe(struct device *dev)
disk->major = SCSI_CDROM_MAJOR;
disk->first_minor = minor;
+ disk->minors = 1;
sprintf(disk->disk_name, "sr%d", minor);
disk->fops = &sr_bdops;
disk->flags = GENHD_FL_CD | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
@@ -762,7 +766,6 @@ static int sr_probe(struct device *dev)
set_capacity(disk, cd->capacity);
disk->private_data = &cd->driver;
- disk->queue = sdev->request_queue;
if (register_cdrom(disk, &cd->cdi))
goto fail_minor;
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index c6f14540ae03..d1abc020f3c0 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -309,13 +309,8 @@ static char * st_incompatible(struct scsi_device* SDp)
}
-static inline char *tape_name(struct scsi_tape *tape)
-{
- return tape->disk->disk_name;
-}
-
#define st_printk(prefix, t, fmt, a...) \
- sdev_prefix_printk(prefix, (t)->device, tape_name(t), fmt, ##a)
+ sdev_prefix_printk(prefix, (t)->device, (t)->name, fmt, ##a)
#ifdef DEBUG
#define DEBC_printk(t, fmt, a...) \
if (debugging) { st_printk(ST_DEB_MSG, t, fmt, ##a ); }
@@ -363,7 +358,7 @@ static int st_chk_result(struct scsi_tape *STp, struct st_request * SRpnt)
int result = SRpnt->result;
u8 scode;
DEB(const char *stp;)
- char *name = tape_name(STp);
+ char *name = STp->name;
struct st_cmdstatus *cmdstatp;
if (!result)
@@ -3841,8 +3836,9 @@ static long st_ioctl_common(struct file *file, unsigned int cmd_in, void __user
!capable(CAP_SYS_RAWIO))
i = -EPERM;
else
- i = scsi_cmd_ioctl(STp->disk->queue, STp->disk,
- file->f_mode, cmd_in, p);
+ i = scsi_cmd_ioctl(STp->device->request_queue,
+ NULL, file->f_mode, cmd_in,
+ p);
if (i != -ENOTTY)
return i;
break;
@@ -4216,7 +4212,7 @@ static int create_one_cdev(struct scsi_tape *tape, int mode, int rew)
i = mode << (4 - ST_NBR_MODE_BITS);
snprintf(name, 10, "%s%s%s", rew ? "n" : "",
- tape->disk->disk_name, st_formats[i]);
+ tape->name, st_formats[i]);
dev = device_create(&st_sysfs_class, &tape->device->sdev_gendev,
cdev_devno, &tape->modes[mode], "%s", name);
@@ -4271,7 +4267,6 @@ static void remove_cdevs(struct scsi_tape *tape)
static int st_probe(struct device *dev)
{
struct scsi_device *SDp = to_scsi_device(dev);
- struct gendisk *disk = NULL;
struct scsi_tape *tpnt = NULL;
struct st_modedef *STm;
struct st_partstat *STps;
@@ -4301,27 +4296,13 @@ static int st_probe(struct device *dev)
goto out;
}
- disk = alloc_disk(1);
- if (!disk) {
- sdev_printk(KERN_ERR, SDp,
- "st: out of memory. Device not attached.\n");
- goto out_buffer_free;
- }
-
tpnt = kzalloc(sizeof(struct scsi_tape), GFP_KERNEL);
if (tpnt == NULL) {
sdev_printk(KERN_ERR, SDp,
"st: Can't allocate device descriptor.\n");
- goto out_put_disk;
+ goto out_buffer_free;
}
kref_init(&tpnt->kref);
- tpnt->disk = disk;
- disk->private_data = &tpnt->driver;
- /* SCSI tape doesn't register this gendisk via add_disk(). Manually
- * take queue reference that release_disk() expects. */
- if (!blk_get_queue(SDp->request_queue))
- goto out_put_disk;
- disk->queue = SDp->request_queue;
tpnt->driver = &st_template;
tpnt->device = SDp;
@@ -4394,10 +4375,10 @@ static int st_probe(struct device *dev)
idr_preload_end();
if (error < 0) {
pr_warn("st: idr allocation failed: %d\n", error);
- goto out_put_queue;
+ goto out_free_tape;
}
tpnt->index = error;
- sprintf(disk->disk_name, "st%d", tpnt->index);
+ sprintf(tpnt->name, "st%d", tpnt->index);
tpnt->stats = kzalloc(sizeof(struct scsi_tape_stats), GFP_KERNEL);
if (tpnt->stats == NULL) {
sdev_printk(KERN_ERR, SDp,
@@ -4414,9 +4395,9 @@ static int st_probe(struct device *dev)
scsi_autopm_put_device(SDp);
sdev_printk(KERN_NOTICE, SDp,
- "Attached scsi tape %s\n", tape_name(tpnt));
+ "Attached scsi tape %s\n", tpnt->name);
sdev_printk(KERN_INFO, SDp, "%s: try direct i/o: %s (alignment %d B)\n",
- tape_name(tpnt), tpnt->try_dio ? "yes" : "no",
+ tpnt->name, tpnt->try_dio ? "yes" : "no",
queue_dma_alignment(SDp->request_queue) + 1);
return 0;
@@ -4428,10 +4409,7 @@ out_idr_remove:
spin_lock(&st_index_lock);
idr_remove(&st_index_idr, tpnt->index);
spin_unlock(&st_index_lock);
-out_put_queue:
- blk_put_queue(disk->queue);
-out_put_disk:
- put_disk(disk);
+out_free_tape:
kfree(tpnt);
out_buffer_free:
kfree(buffer);
@@ -4470,7 +4448,6 @@ static int st_remove(struct device *dev)
static void scsi_tape_release(struct kref *kref)
{
struct scsi_tape *tpnt = to_scsi_tape(kref);
- struct gendisk *disk = tpnt->disk;
tpnt->device = NULL;
@@ -4480,8 +4457,6 @@ static void scsi_tape_release(struct kref *kref)
kfree(tpnt->buffer);
}
- disk->private_data = NULL;
- put_disk(disk);
kfree(tpnt->stats);
kfree(tpnt);
return;
diff --git a/drivers/scsi/st.h b/drivers/scsi/st.h
index 9d3c38bb0794..c0ef0d9aaf8a 100644
--- a/drivers/scsi/st.h
+++ b/drivers/scsi/st.h
@@ -187,7 +187,7 @@ struct scsi_tape {
unsigned char last_cmnd[6];
unsigned char last_sense[16];
#endif
- struct gendisk *disk;
+ char name[DISK_NAME_LEN];
struct kref kref;
struct scsi_tape_stats *stats;
};
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 328bb961c281..37506b3fe5a9 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1199,14 +1199,24 @@ static void storvsc_on_io_completion(struct storvsc_device *stor_device,
vstor_packet->vm_srb.sense_info_length);
if (vstor_packet->vm_srb.scsi_status != 0 ||
- vstor_packet->vm_srb.srb_status != SRB_STATUS_SUCCESS)
- storvsc_log(device, STORVSC_LOGGING_ERROR,
+ vstor_packet->vm_srb.srb_status != SRB_STATUS_SUCCESS) {
+
+ /*
+ * Log TEST_UNIT_READY errors only as warnings. Hyper-V can
+ * return errors when detecting devices using TEST_UNIT_READY,
+ * and logging these as errors produces unhelpful noise.
+ */
+ int loglevel = (stor_pkt->vm_srb.cdb[0] == TEST_UNIT_READY) ?
+ STORVSC_LOGGING_WARN : STORVSC_LOGGING_ERROR;
+
+ storvsc_log(device, loglevel,
"tag#%d cmd 0x%x status: scsi 0x%x srb 0x%x hv 0x%x\n",
request->cmd->request->tag,
stor_pkt->vm_srb.cdb[0],
vstor_packet->vm_srb.scsi_status,
vstor_packet->vm_srb.srb_status,
vstor_packet->status);
+ }
if (vstor_packet->vm_srb.scsi_status == SAM_STAT_CHECK_CONDITION &&
(vstor_packet->vm_srb.srb_status & SRB_STATUS_AUTOSENSE_VALID))
diff --git a/drivers/slimbus/messaging.c b/drivers/slimbus/messaging.c
index f2b5d347d227..e5ae26227bdb 100644
--- a/drivers/slimbus/messaging.c
+++ b/drivers/slimbus/messaging.c
@@ -66,7 +66,7 @@ int slim_alloc_txn_tid(struct slim_controller *ctrl, struct slim_msg_txn *txn)
int ret = 0;
spin_lock_irqsave(&ctrl->txn_lock, flags);
- ret = idr_alloc_cyclic(&ctrl->tid_idr, txn, 0,
+ ret = idr_alloc_cyclic(&ctrl->tid_idr, txn, 1,
SLIM_MAX_TIDS, GFP_ATOMIC);
if (ret < 0) {
spin_unlock_irqrestore(&ctrl->txn_lock, flags);
@@ -131,7 +131,8 @@ int slim_do_transfer(struct slim_controller *ctrl, struct slim_msg_txn *txn)
goto slim_xfer_err;
}
}
-
+ /* Initialize tid to invalid value */
+ txn->tid = 0;
need_tid = slim_tid_txn(txn->mt, txn->mc);
if (need_tid) {
@@ -163,7 +164,7 @@ int slim_do_transfer(struct slim_controller *ctrl, struct slim_msg_txn *txn)
txn->mt, txn->mc, txn->la, ret);
slim_xfer_err:
- if (!clk_pause_msg && (!need_tid || ret == -ETIMEDOUT)) {
+ if (!clk_pause_msg && (txn->tid == 0 || ret == -ETIMEDOUT)) {
/*
* remove runtime-pm vote if this was TX only, or
* if there was error during this transaction
diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c
index c054e83ab636..7040293c2ee8 100644
--- a/drivers/slimbus/qcom-ngd-ctrl.c
+++ b/drivers/slimbus/qcom-ngd-ctrl.c
@@ -618,7 +618,7 @@ static void qcom_slim_ngd_rx(struct qcom_slim_ngd_ctrl *ctrl, u8 *buf)
(mc == SLIM_USR_MC_GENERIC_ACK &&
mt == SLIM_MSG_MT_SRC_REFERRED_USER)) {
slim_msg_response(&ctrl->ctrl, &buf[4], buf[3], len - 4);
- pm_runtime_mark_last_busy(ctrl->dev);
+ pm_runtime_mark_last_busy(ctrl->ctrl.dev);
}
}
@@ -1080,7 +1080,8 @@ static void qcom_slim_ngd_setup(struct qcom_slim_ngd_ctrl *ctrl)
{
u32 cfg = readl_relaxed(ctrl->ngd->base);
- if (ctrl->state == QCOM_SLIM_NGD_CTRL_DOWN)
+ if (ctrl->state == QCOM_SLIM_NGD_CTRL_DOWN ||
+ ctrl->state == QCOM_SLIM_NGD_CTRL_ASLEEP)
qcom_slim_ngd_init_dma(ctrl);
/* By default enable message queues */
@@ -1131,6 +1132,7 @@ static int qcom_slim_ngd_power_up(struct qcom_slim_ngd_ctrl *ctrl)
dev_info(ctrl->dev, "Subsys restart: ADSP active framer\n");
return 0;
}
+ qcom_slim_ngd_setup(ctrl);
return 0;
}
@@ -1257,13 +1259,14 @@ static int qcom_slim_ngd_enable(struct qcom_slim_ngd_ctrl *ctrl, bool enable)
}
/* controller state should be in sync with framework state */
complete(&ctrl->qmi.qmi_comp);
- if (!pm_runtime_enabled(ctrl->dev) ||
- !pm_runtime_suspended(ctrl->dev))
- qcom_slim_ngd_runtime_resume(ctrl->dev);
+ if (!pm_runtime_enabled(ctrl->ctrl.dev) ||
+ !pm_runtime_suspended(ctrl->ctrl.dev))
+ qcom_slim_ngd_runtime_resume(ctrl->ctrl.dev);
else
- pm_runtime_resume(ctrl->dev);
- pm_runtime_mark_last_busy(ctrl->dev);
- pm_runtime_put(ctrl->dev);
+ pm_runtime_resume(ctrl->ctrl.dev);
+
+ pm_runtime_mark_last_busy(ctrl->ctrl.dev);
+ pm_runtime_put(ctrl->ctrl.dev);
ret = slim_register_controller(&ctrl->ctrl);
if (ret) {
@@ -1389,7 +1392,7 @@ static int qcom_slim_ngd_ssr_pdr_notify(struct qcom_slim_ngd_ctrl *ctrl,
/* Make sure the last dma xfer is finished */
mutex_lock(&ctrl->tx_lock);
if (ctrl->state != QCOM_SLIM_NGD_CTRL_DOWN) {
- pm_runtime_get_noresume(ctrl->dev);
+ pm_runtime_get_noresume(ctrl->ctrl.dev);
ctrl->state = QCOM_SLIM_NGD_CTRL_DOWN;
qcom_slim_ngd_down(ctrl);
qcom_slim_ngd_exit_dma(ctrl);
@@ -1617,6 +1620,7 @@ static int __maybe_unused qcom_slim_ngd_runtime_suspend(struct device *dev)
struct qcom_slim_ngd_ctrl *ctrl = dev_get_drvdata(dev);
int ret = 0;
+ qcom_slim_ngd_exit_dma(ctrl);
if (!ctrl->qmi.handle)
return 0;
diff --git a/drivers/soc/fsl/qe/qe_ic.c b/drivers/soc/fsl/qe/qe_ic.c
index 3f711c1a0996..bbae3d39c7be 100644
--- a/drivers/soc/fsl/qe/qe_ic.c
+++ b/drivers/soc/fsl/qe/qe_ic.c
@@ -23,6 +23,7 @@
#include <linux/signal.h>
#include <linux/device.h>
#include <linux/spinlock.h>
+#include <linux/platform_device.h>
#include <asm/irq.h>
#include <asm/io.h>
#include <soc/fsl/qe/qe.h>
@@ -53,8 +54,8 @@ struct qe_ic {
struct irq_chip hc_irq;
/* VIRQ numbers of QE high/low irqs */
- unsigned int virq_high;
- unsigned int virq_low;
+ int virq_high;
+ int virq_low;
};
/*
@@ -404,42 +405,40 @@ static void qe_ic_cascade_muxed_mpic(struct irq_desc *desc)
chip->irq_eoi(&desc->irq_data);
}
-static void __init qe_ic_init(struct device_node *node)
+static int qe_ic_init(struct platform_device *pdev)
{
+ struct device *dev = &pdev->dev;
void (*low_handler)(struct irq_desc *desc);
void (*high_handler)(struct irq_desc *desc);
struct qe_ic *qe_ic;
- struct resource res;
- u32 ret;
+ struct resource *res;
+ struct device_node *node = pdev->dev.of_node;
- ret = of_address_to_resource(node, 0, &res);
- if (ret)
- return;
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (res == NULL) {
+ dev_err(dev, "no memory resource defined\n");
+ return -ENODEV;
+ }
- qe_ic = kzalloc(sizeof(*qe_ic), GFP_KERNEL);
+ qe_ic = devm_kzalloc(dev, sizeof(*qe_ic), GFP_KERNEL);
if (qe_ic == NULL)
- return;
+ return -ENOMEM;
- qe_ic->irqhost = irq_domain_add_linear(node, NR_QE_IC_INTS,
- &qe_ic_host_ops, qe_ic);
- if (qe_ic->irqhost == NULL) {
- kfree(qe_ic);
- return;
+ qe_ic->regs = devm_ioremap(dev, res->start, resource_size(res));
+ if (qe_ic->regs == NULL) {
+ dev_err(dev, "failed to ioremap() registers\n");
+ return -ENODEV;
}
- qe_ic->regs = ioremap(res.start, resource_size(&res));
-
qe_ic->hc_irq = qe_ic_irq_chip;
- qe_ic->virq_high = irq_of_parse_and_map(node, 0);
- qe_ic->virq_low = irq_of_parse_and_map(node, 1);
+ qe_ic->virq_high = platform_get_irq(pdev, 0);
+ qe_ic->virq_low = platform_get_irq(pdev, 1);
- if (!qe_ic->virq_low) {
- printk(KERN_ERR "Failed to map QE_IC low IRQ\n");
- kfree(qe_ic);
- return;
- }
- if (qe_ic->virq_high != qe_ic->virq_low) {
+ if (qe_ic->virq_low <= 0)
+ return -ENODEV;
+
+ if (qe_ic->virq_high > 0 && qe_ic->virq_high != qe_ic->virq_low) {
low_handler = qe_ic_cascade_low;
high_handler = qe_ic_cascade_high;
} else {
@@ -447,29 +446,42 @@ static void __init qe_ic_init(struct device_node *node)
high_handler = NULL;
}
+ qe_ic->irqhost = irq_domain_add_linear(node, NR_QE_IC_INTS,
+ &qe_ic_host_ops, qe_ic);
+ if (qe_ic->irqhost == NULL) {
+ dev_err(dev, "failed to add irq domain\n");
+ return -ENODEV;
+ }
+
qe_ic_write(qe_ic->regs, QEIC_CICR, 0);
irq_set_handler_data(qe_ic->virq_low, qe_ic);
irq_set_chained_handler(qe_ic->virq_low, low_handler);
- if (qe_ic->virq_high && qe_ic->virq_high != qe_ic->virq_low) {
+ if (high_handler) {
irq_set_handler_data(qe_ic->virq_high, qe_ic);
irq_set_chained_handler(qe_ic->virq_high, high_handler);
}
+ return 0;
}
+static const struct of_device_id qe_ic_ids[] = {
+ { .compatible = "fsl,qe-ic"},
+ { .type = "qeic"},
+ {},
+};
-static int __init qe_ic_of_init(void)
+static struct platform_driver qe_ic_driver =
{
- struct device_node *np;
+ .driver = {
+ .name = "qe-ic",
+ .of_match_table = qe_ic_ids,
+ },
+ .probe = qe_ic_init,
+};
- np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
- if (!np) {
- np = of_find_node_by_type(NULL, "qeic");
- if (!np)
- return -ENODEV;
- }
- qe_ic_init(np);
- of_node_put(np);
+static int __init qe_ic_of_init(void)
+{
+ platform_driver_register(&qe_ic_driver);
return 0;
}
subsys_initcall(qe_ic_of_init);
diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c
index 3e6d4addac2f..1b115734a8f6 100644
--- a/drivers/soundwire/bus.c
+++ b/drivers/soundwire/bus.c
@@ -390,7 +390,10 @@ sdw_nread_no_pm(struct sdw_slave *slave, u32 addr, size_t count, u8 *val)
if (ret < 0)
return ret;
- return sdw_transfer(slave->bus, &msg);
+ ret = sdw_transfer(slave->bus, &msg);
+ if (slave->is_mockup_device)
+ ret = 0;
+ return ret;
}
static int
@@ -404,7 +407,10 @@ sdw_nwrite_no_pm(struct sdw_slave *slave, u32 addr, size_t count, const u8 *val)
if (ret < 0)
return ret;
- return sdw_transfer(slave->bus, &msg);
+ ret = sdw_transfer(slave->bus, &msg);
+ if (slave->is_mockup_device)
+ ret = 0;
+ return ret;
}
int sdw_write_no_pm(struct sdw_slave *slave, u32 addr, u8 value)
@@ -896,7 +902,8 @@ static int sdw_bus_wait_for_clk_prep_deprep(struct sdw_bus *bus, u16 dev_num)
do {
val = sdw_bread_no_pm(bus, dev_num, SDW_SCP_STAT);
if (val < 0) {
- dev_err(bus->dev, "SDW_SCP_STAT bread failed:%d\n", val);
+ if (val != -ENODATA)
+ dev_err(bus->dev, "SDW_SCP_STAT bread failed:%d\n", val);
return val;
}
val &= SDW_SCP_STAT_CLK_STP_NF;
@@ -1853,6 +1860,7 @@ void sdw_clear_slave_status(struct sdw_bus *bus, u32 request)
if (slave->status != SDW_SLAVE_UNATTACHED) {
sdw_modify_slave_status(slave, SDW_SLAVE_UNATTACHED);
slave->first_interrupt_done = false;
+ sdw_update_slave_status(slave, SDW_SLAVE_UNATTACHED);
}
/* keep track of request, used in pm_runtime resume */
diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c
index 25950422b085..4fcc3ba93004 100644
--- a/drivers/soundwire/cadence_master.c
+++ b/drivers/soundwire/cadence_master.c
@@ -450,6 +450,40 @@ static int cdns_parity_error_injection(void *data, u64 value)
DEFINE_DEBUGFS_ATTRIBUTE(cdns_parity_error_fops, NULL,
cdns_parity_error_injection, "%llu\n");
+static int cdns_set_pdi_loopback_source(void *data, u64 value)
+{
+ struct sdw_cdns *cdns = data;
+ unsigned int pdi_out_num = cdns->pcm.num_bd + cdns->pcm.num_out;
+
+ if (value > pdi_out_num)
+ return -EINVAL;
+
+ /* Userspace changed the hardware state behind the kernel's back */
+ add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+
+ cdns->pdi_loopback_source = value;
+
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(cdns_pdi_loopback_source_fops, NULL, cdns_set_pdi_loopback_source, "%llu\n");
+
+static int cdns_set_pdi_loopback_target(void *data, u64 value)
+{
+ struct sdw_cdns *cdns = data;
+ unsigned int pdi_in_num = cdns->pcm.num_bd + cdns->pcm.num_in;
+
+ if (value > pdi_in_num)
+ return -EINVAL;
+
+ /* Userspace changed the hardware state behind the kernel's back */
+ add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+
+ cdns->pdi_loopback_target = value;
+
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(cdns_pdi_loopback_target_fops, NULL, cdns_set_pdi_loopback_target, "%llu\n");
+
/**
* sdw_cdns_debugfs_init() - Cadence debugfs init
* @cdns: Cadence instance
@@ -464,6 +498,16 @@ void sdw_cdns_debugfs_init(struct sdw_cdns *cdns, struct dentry *root)
debugfs_create_file("cdns-parity-error-injection", 0200, root, cdns,
&cdns_parity_error_fops);
+
+ cdns->pdi_loopback_source = -1;
+ cdns->pdi_loopback_target = -1;
+
+ debugfs_create_file("cdns-pdi-loopback-source", 0200, root, cdns,
+ &cdns_pdi_loopback_source_fops);
+
+ debugfs_create_file("cdns-pdi-loopback-target", 0200, root, cdns,
+ &cdns_pdi_loopback_target_fops);
+
}
EXPORT_SYMBOL_GPL(sdw_cdns_debugfs_init);
@@ -822,7 +866,6 @@ irqreturn_t sdw_cdns_irq(int irq, void *dev_id)
{
struct sdw_cdns *cdns = dev_id;
u32 int_status;
- int ret = IRQ_HANDLED;
/* Check if the link is up */
if (!cdns->link_up)
@@ -900,7 +943,7 @@ irqreturn_t sdw_cdns_irq(int irq, void *dev_id)
}
cdns_writel(cdns, CDNS_MCP_INTSTAT, int_status);
- return ret;
+ return IRQ_HANDLED;
}
EXPORT_SYMBOL(sdw_cdns_irq);
@@ -936,6 +979,49 @@ static void cdns_update_slave_status_work(struct work_struct *work)
}
+/* paranoia check to make sure self-cleared bits are indeed cleared */
+void sdw_cdns_check_self_clearing_bits(struct sdw_cdns *cdns, const char *string,
+ bool initial_delay, int reset_iterations)
+{
+ u32 mcp_control;
+ u32 mcp_config_update;
+ int i;
+
+ if (initial_delay)
+ usleep_range(1000, 1500);
+
+ mcp_control = cdns_readl(cdns, CDNS_MCP_CONTROL);
+
+ /* the following bits should be cleared immediately */
+ if (mcp_control & CDNS_MCP_CONTROL_CMD_RST)
+ dev_err(cdns->dev, "%s failed: MCP_CONTROL_CMD_RST is not cleared\n", string);
+ if (mcp_control & CDNS_MCP_CONTROL_SOFT_RST)
+ dev_err(cdns->dev, "%s failed: MCP_CONTROL_SOFT_RST is not cleared\n", string);
+ if (mcp_control & CDNS_MCP_CONTROL_SW_RST)
+ dev_err(cdns->dev, "%s failed: MCP_CONTROL_SW_RST is not cleared\n", string);
+ if (mcp_control & CDNS_MCP_CONTROL_CLK_STOP_CLR)
+ dev_err(cdns->dev, "%s failed: MCP_CONTROL_CLK_STOP_CLR is not cleared\n", string);
+ mcp_config_update = cdns_readl(cdns, CDNS_MCP_CONFIG_UPDATE);
+ if (mcp_config_update & CDNS_MCP_CONFIG_UPDATE_BIT)
+ dev_err(cdns->dev, "%s failed: MCP_CONFIG_UPDATE_BIT is not cleared\n", string);
+
+ i = 0;
+ while (mcp_control & CDNS_MCP_CONTROL_HW_RST) {
+ if (i == reset_iterations) {
+ dev_err(cdns->dev, "%s failed: MCP_CONTROL_HW_RST is not cleared\n", string);
+ break;
+ }
+
+ dev_dbg(cdns->dev, "%s: MCP_CONTROL_HW_RST is not cleared at iteration %d\n", string, i);
+ i++;
+
+ usleep_range(1000, 1500);
+ mcp_control = cdns_readl(cdns, CDNS_MCP_CONTROL);
+ }
+
+}
+EXPORT_SYMBOL(sdw_cdns_check_self_clearing_bits);
+
/*
* init routines
*/
@@ -946,10 +1032,7 @@ static void cdns_update_slave_status_work(struct work_struct *work)
*/
int sdw_cdns_exit_reset(struct sdw_cdns *cdns)
{
- /* program maximum length reset to be safe */
- cdns_updatel(cdns, CDNS_MCP_CONTROL,
- CDNS_MCP_CONTROL_RST_DELAY,
- CDNS_MCP_CONTROL_RST_DELAY);
+ /* keep reset delay unchanged to 4096 cycles */
/* use hardware generated reset */
cdns_updatel(cdns, CDNS_MCP_CONTROL,
@@ -1213,6 +1296,8 @@ int sdw_cdns_init(struct sdw_cdns *cdns)
cdns_init_clock_ctrl(cdns);
+ sdw_cdns_check_self_clearing_bits(cdns, __func__, false, 0);
+
/* reset msg_count to default value of FIFOLEVEL */
cdns->msg_count = cdns_readl(cdns, CDNS_MCP_FIFOLEVEL);
@@ -1286,20 +1371,37 @@ static int cdns_port_params(struct sdw_bus *bus,
struct sdw_port_params *p_params, unsigned int bank)
{
struct sdw_cdns *cdns = bus_to_cdns(bus);
- int dpn_config = 0, dpn_config_off;
+ int dpn_config_off_source;
+ int dpn_config_off_target;
+ int target_num = p_params->num;
+ int source_num = p_params->num;
+ bool override = false;
+ int dpn_config;
+
+ if (target_num == cdns->pdi_loopback_target &&
+ cdns->pdi_loopback_source != -1) {
+ source_num = cdns->pdi_loopback_source;
+ override = true;
+ }
- if (bank)
- dpn_config_off = CDNS_DPN_B1_CONFIG(p_params->num);
- else
- dpn_config_off = CDNS_DPN_B0_CONFIG(p_params->num);
+ if (bank) {
+ dpn_config_off_source = CDNS_DPN_B1_CONFIG(source_num);
+ dpn_config_off_target = CDNS_DPN_B1_CONFIG(target_num);
+ } else {
+ dpn_config_off_source = CDNS_DPN_B0_CONFIG(source_num);
+ dpn_config_off_target = CDNS_DPN_B0_CONFIG(target_num);
+ }
- dpn_config = cdns_readl(cdns, dpn_config_off);
+ dpn_config = cdns_readl(cdns, dpn_config_off_source);
- u32p_replace_bits(&dpn_config, (p_params->bps - 1), CDNS_DPN_CONFIG_WL);
- u32p_replace_bits(&dpn_config, p_params->flow_mode, CDNS_DPN_CONFIG_PORT_FLOW);
- u32p_replace_bits(&dpn_config, p_params->data_mode, CDNS_DPN_CONFIG_PORT_DAT);
+ /* use port params if there is no loopback, otherwise use source as is */
+ if (!override) {
+ u32p_replace_bits(&dpn_config, p_params->bps - 1, CDNS_DPN_CONFIG_WL);
+ u32p_replace_bits(&dpn_config, p_params->flow_mode, CDNS_DPN_CONFIG_PORT_FLOW);
+ u32p_replace_bits(&dpn_config, p_params->data_mode, CDNS_DPN_CONFIG_PORT_DAT);
+ }
- cdns_writel(cdns, dpn_config_off, dpn_config);
+ cdns_writel(cdns, dpn_config_off_target, dpn_config);
return 0;
}
@@ -1309,11 +1411,27 @@ static int cdns_transport_params(struct sdw_bus *bus,
enum sdw_reg_bank bank)
{
struct sdw_cdns *cdns = bus_to_cdns(bus);
- int dpn_offsetctrl = 0, dpn_offsetctrl_off;
- int dpn_config = 0, dpn_config_off;
- int dpn_hctrl = 0, dpn_hctrl_off;
- int num = t_params->port_num;
- int dpn_samplectrl_off;
+ int dpn_config;
+ int dpn_config_off_source;
+ int dpn_config_off_target;
+ int dpn_hctrl;
+ int dpn_hctrl_off_source;
+ int dpn_hctrl_off_target;
+ int dpn_offsetctrl;
+ int dpn_offsetctrl_off_source;
+ int dpn_offsetctrl_off_target;
+ int dpn_samplectrl;
+ int dpn_samplectrl_off_source;
+ int dpn_samplectrl_off_target;
+ int source_num = t_params->port_num;
+ int target_num = t_params->port_num;
+ bool override = false;
+
+ if (target_num == cdns->pdi_loopback_target &&
+ cdns->pdi_loopback_source != -1) {
+ source_num = cdns->pdi_loopback_source;
+ override = true;
+ }
/*
* Note: Only full data port is supported on the Master side for
@@ -1321,32 +1439,59 @@ static int cdns_transport_params(struct sdw_bus *bus,
*/
if (bank) {
- dpn_config_off = CDNS_DPN_B1_CONFIG(num);
- dpn_samplectrl_off = CDNS_DPN_B1_SAMPLE_CTRL(num);
- dpn_hctrl_off = CDNS_DPN_B1_HCTRL(num);
- dpn_offsetctrl_off = CDNS_DPN_B1_OFFSET_CTRL(num);
+ dpn_config_off_source = CDNS_DPN_B1_CONFIG(source_num);
+ dpn_hctrl_off_source = CDNS_DPN_B1_HCTRL(source_num);
+ dpn_offsetctrl_off_source = CDNS_DPN_B1_OFFSET_CTRL(source_num);
+ dpn_samplectrl_off_source = CDNS_DPN_B1_SAMPLE_CTRL(source_num);
+
+ dpn_config_off_target = CDNS_DPN_B1_CONFIG(target_num);
+ dpn_hctrl_off_target = CDNS_DPN_B1_HCTRL(target_num);
+ dpn_offsetctrl_off_target = CDNS_DPN_B1_OFFSET_CTRL(target_num);
+ dpn_samplectrl_off_target = CDNS_DPN_B1_SAMPLE_CTRL(target_num);
+
} else {
- dpn_config_off = CDNS_DPN_B0_CONFIG(num);
- dpn_samplectrl_off = CDNS_DPN_B0_SAMPLE_CTRL(num);
- dpn_hctrl_off = CDNS_DPN_B0_HCTRL(num);
- dpn_offsetctrl_off = CDNS_DPN_B0_OFFSET_CTRL(num);
+ dpn_config_off_source = CDNS_DPN_B0_CONFIG(source_num);
+ dpn_hctrl_off_source = CDNS_DPN_B0_HCTRL(source_num);
+ dpn_offsetctrl_off_source = CDNS_DPN_B0_OFFSET_CTRL(source_num);
+ dpn_samplectrl_off_source = CDNS_DPN_B0_SAMPLE_CTRL(source_num);
+
+ dpn_config_off_target = CDNS_DPN_B0_CONFIG(target_num);
+ dpn_hctrl_off_target = CDNS_DPN_B0_HCTRL(target_num);
+ dpn_offsetctrl_off_target = CDNS_DPN_B0_OFFSET_CTRL(target_num);
+ dpn_samplectrl_off_target = CDNS_DPN_B0_SAMPLE_CTRL(target_num);
}
- dpn_config = cdns_readl(cdns, dpn_config_off);
- u32p_replace_bits(&dpn_config, t_params->blk_grp_ctrl, CDNS_DPN_CONFIG_BGC);
- u32p_replace_bits(&dpn_config, t_params->blk_pkg_mode, CDNS_DPN_CONFIG_BPM);
- cdns_writel(cdns, dpn_config_off, dpn_config);
+ dpn_config = cdns_readl(cdns, dpn_config_off_source);
+ if (!override) {
+ u32p_replace_bits(&dpn_config, t_params->blk_grp_ctrl, CDNS_DPN_CONFIG_BGC);
+ u32p_replace_bits(&dpn_config, t_params->blk_pkg_mode, CDNS_DPN_CONFIG_BPM);
+ }
+ cdns_writel(cdns, dpn_config_off_target, dpn_config);
- u32p_replace_bits(&dpn_offsetctrl, t_params->offset1, CDNS_DPN_OFFSET_CTRL_1);
- u32p_replace_bits(&dpn_offsetctrl, t_params->offset2, CDNS_DPN_OFFSET_CTRL_2);
- cdns_writel(cdns, dpn_offsetctrl_off, dpn_offsetctrl);
+ if (!override) {
+ dpn_offsetctrl = 0;
+ u32p_replace_bits(&dpn_offsetctrl, t_params->offset1, CDNS_DPN_OFFSET_CTRL_1);
+ u32p_replace_bits(&dpn_offsetctrl, t_params->offset2, CDNS_DPN_OFFSET_CTRL_2);
+ } else {
+ dpn_offsetctrl = cdns_readl(cdns, dpn_offsetctrl_off_source);
+ }
+ cdns_writel(cdns, dpn_offsetctrl_off_target, dpn_offsetctrl);
- u32p_replace_bits(&dpn_hctrl, t_params->hstart, CDNS_DPN_HCTRL_HSTART);
- u32p_replace_bits(&dpn_hctrl, t_params->hstop, CDNS_DPN_HCTRL_HSTOP);
- u32p_replace_bits(&dpn_hctrl, t_params->lane_ctrl, CDNS_DPN_HCTRL_LCTRL);
+ if (!override) {
+ dpn_hctrl = 0;
+ u32p_replace_bits(&dpn_hctrl, t_params->hstart, CDNS_DPN_HCTRL_HSTART);
+ u32p_replace_bits(&dpn_hctrl, t_params->hstop, CDNS_DPN_HCTRL_HSTOP);
+ u32p_replace_bits(&dpn_hctrl, t_params->lane_ctrl, CDNS_DPN_HCTRL_LCTRL);
+ } else {
+ dpn_hctrl = cdns_readl(cdns, dpn_hctrl_off_source);
+ }
+ cdns_writel(cdns, dpn_hctrl_off_target, dpn_hctrl);
- cdns_writel(cdns, dpn_hctrl_off, dpn_hctrl);
- cdns_writel(cdns, dpn_samplectrl_off, (t_params->sample_interval - 1));
+ if (!override)
+ dpn_samplectrl = t_params->sample_interval - 1;
+ else
+ dpn_samplectrl = cdns_readl(cdns, dpn_samplectrl_off_source);
+ cdns_writel(cdns, dpn_samplectrl_off_target, dpn_samplectrl);
return 0;
}
@@ -1397,6 +1542,8 @@ int sdw_cdns_clock_stop(struct sdw_cdns *cdns, bool block_wake)
struct sdw_slave *slave;
int ret;
+ sdw_cdns_check_self_clearing_bits(cdns, __func__, false, 0);
+
/* Check suspend status */
if (sdw_cdns_is_clock_stop(cdns)) {
dev_dbg(cdns->dev, "Clock is already stopped\n");
diff --git a/drivers/soundwire/cadence_master.h b/drivers/soundwire/cadence_master.h
index 0e7f8b35bb21..e587aede63bf 100644
--- a/drivers/soundwire/cadence_master.h
+++ b/drivers/soundwire/cadence_master.h
@@ -129,6 +129,9 @@ struct sdw_cdns {
struct sdw_cdns_streams pcm;
struct sdw_cdns_streams pdm;
+ int pdi_loopback_source;
+ int pdi_loopback_target;
+
void __iomem *registers;
bool link_up;
@@ -184,4 +187,8 @@ int cdns_bus_conf(struct sdw_bus *bus, struct sdw_bus_params *params);
int cdns_set_sdw_stream(struct snd_soc_dai *dai,
void *stream, bool pcm, int direction);
+
+void sdw_cdns_check_self_clearing_bits(struct sdw_cdns *cdns, const char *string,
+ bool initial_delay, int reset_iterations);
+
#endif /* __SDW_CADENCE_H */
diff --git a/drivers/soundwire/dmi-quirks.c b/drivers/soundwire/dmi-quirks.c
index 5db0a2443a1d..0ca2a3e3a02e 100644
--- a/drivers/soundwire/dmi-quirks.c
+++ b/drivers/soundwire/dmi-quirks.c
@@ -16,18 +16,18 @@ struct adr_remap {
};
/*
- * HP Spectre 360 Convertible devices do not expose the correct _ADR
- * in the DSDT.
+ * Some TigerLake devices based on an initial Intel BIOS do not expose
+ * the correct _ADR in the DSDT.
* Remap the bad _ADR values to the ones reported by hardware
*/
-static const struct adr_remap hp_spectre_360[] = {
+static const struct adr_remap intel_tgl_bios[] = {
{
- 0x000010025D070100,
- 0x000020025D071100
+ 0x000010025D070100ull,
+ 0x000020025D071100ull
},
{
- 0x000110025d070100,
- 0x000120025D130800
+ 0x000110025d070100ull,
+ 0x000120025D130800ull
},
{}
};
@@ -39,18 +39,18 @@ static const struct adr_remap hp_spectre_360[] = {
static const struct adr_remap dell_sku_0A3E[] = {
/* rt715 on link0 */
{
- 0x00020025d071100,
- 0x00021025d071500
+ 0x00020025d071100ull,
+ 0x00021025d071500ull
},
/* rt711 on link1 */
{
- 0x000120025d130800,
- 0x000120025d071100,
+ 0x000120025d130800ull,
+ 0x000120025d071100ull,
},
/* rt1308 on link2 */
{
- 0x000220025d071500,
- 0x000220025d130800
+ 0x000220025d071500ull,
+ 0x000220025d130800ull
},
{}
};
@@ -61,7 +61,15 @@ static const struct dmi_system_id adr_remap_quirk_table[] = {
DMI_MATCH(DMI_SYS_VENDOR, "HP"),
DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Convertible"),
},
- .driver_data = (void *)hp_spectre_360,
+ .driver_data = (void *)intel_tgl_bios,
+ },
+ {
+ /* quirk used for NUC15 'Bishop County' LAPBC510 and LAPBC710 skews */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Intel(R) Client Systems"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "LAPBC"),
+ },
+ .driver_data = (void *)intel_tgl_bios,
},
{
.matches = {
diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index c11e3d8cd308..f66fcbc33a2f 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -23,6 +23,7 @@
#include "intel.h"
#define INTEL_MASTER_SUSPEND_DELAY_MS 3000
+#define INTEL_MASTER_RESET_ITERATIONS 10
/*
* debug/config flags for the Intel SoundWire Master.
@@ -537,12 +538,14 @@ static int intel_link_power_down(struct sdw_intel *sdw)
mutex_lock(sdw->link_res->shim_lock);
- intel_shim_master_ip_to_glue(sdw);
-
if (!(*shim_mask & BIT(link_id)))
dev_err(sdw->cdns.dev,
"%s: Unbalanced power-up/down calls\n", __func__);
+ sdw->cdns.link_up = false;
+
+ intel_shim_master_ip_to_glue(sdw);
+
*shim_mask &= ~BIT(link_id);
if (!*shim_mask) {
@@ -559,18 +562,19 @@ static int intel_link_power_down(struct sdw_intel *sdw)
link_control &= spa_mask;
ret = intel_clear_bit(shim, SDW_SHIM_LCTL, link_control, cpa_mask);
+ if (ret < 0) {
+ dev_err(sdw->cdns.dev, "%s: could not power down link\n", __func__);
+
+ /*
+ * we leave the sdw->cdns.link_up flag as false since we've disabled
+ * the link at this point and cannot handle interrupts any longer.
+ */
+ }
}
mutex_unlock(sdw->link_res->shim_lock);
- if (ret < 0) {
- dev_err(sdw->cdns.dev, "%s: could not power down link\n", __func__);
-
- return ret;
- }
-
- sdw->cdns.link_up = false;
- return 0;
+ return ret;
}
static void intel_shim_sync_arm(struct sdw_intel *sdw)
@@ -1467,6 +1471,8 @@ int intel_link_startup(struct auxiliary_device *auxdev)
goto err_interrupt;
}
}
+ sdw_cdns_check_self_clearing_bits(cdns, __func__,
+ true, INTEL_MASTER_RESET_ITERATIONS);
/* Register DAIs */
ret = intel_register_dai(sdw);
@@ -1519,6 +1525,7 @@ int intel_link_startup(struct auxiliary_device *auxdev)
if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE))
pm_runtime_idle(dev);
+ sdw->startup_done = true;
return 0;
err_interrupt:
@@ -1558,8 +1565,9 @@ int intel_link_process_wakeen_event(struct auxiliary_device *auxdev)
sdw = dev_get_drvdata(dev);
bus = &sdw->cdns.bus;
- if (bus->prop.hw_disabled) {
- dev_dbg(dev, "SoundWire master %d is disabled, ignoring\n", bus->link_id);
+ if (bus->prop.hw_disabled || !sdw->startup_done) {
+ dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
+ bus->link_id);
return 0;
}
@@ -1588,6 +1596,87 @@ int intel_link_process_wakeen_event(struct auxiliary_device *auxdev)
* PM calls
*/
+static int intel_resume_child_device(struct device *dev, void *data)
+{
+ int ret;
+ struct sdw_slave *slave = dev_to_sdw_dev(dev);
+
+ if (!slave->probed) {
+ dev_dbg(dev, "%s: skipping device, no probed driver\n", __func__);
+ return 0;
+ }
+ if (!slave->dev_num_sticky) {
+ dev_dbg(dev, "%s: skipping device, never detected on bus\n", __func__);
+ return 0;
+ }
+
+ ret = pm_request_resume(dev);
+ if (ret < 0)
+ dev_err(dev, "%s: pm_request_resume failed: %d\n", __func__, ret);
+
+ return ret;
+}
+
+static int __maybe_unused intel_pm_prepare(struct device *dev)
+{
+ struct sdw_cdns *cdns = dev_get_drvdata(dev);
+ struct sdw_intel *sdw = cdns_to_intel(cdns);
+ struct sdw_bus *bus = &cdns->bus;
+ u32 clock_stop_quirks;
+ int ret = 0;
+
+ if (bus->prop.hw_disabled || !sdw->startup_done) {
+ dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
+ bus->link_id);
+ return 0;
+ }
+
+ clock_stop_quirks = sdw->link_res->clock_stop_quirks;
+
+ if (pm_runtime_suspended(dev) &&
+ pm_runtime_suspended(dev->parent) &&
+ ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) ||
+ !clock_stop_quirks)) {
+ /*
+ * if we've enabled clock stop, and the parent is suspended, the SHIM registers
+ * are not accessible and the shim wake cannot be disabled.
+ * The only solution is to resume the entire bus to full power
+ */
+
+ /*
+ * If any operation in this block fails, we keep going since we don't want
+ * to prevent system suspend from happening and errors should be recoverable
+ * on resume.
+ */
+
+ /*
+ * first resume the device for this link. This will also by construction
+ * resume the PCI parent device.
+ */
+ ret = pm_request_resume(dev);
+ if (ret < 0) {
+ dev_err(dev, "%s: pm_request_resume failed: %d\n", __func__, ret);
+ return 0;
+ }
+
+ /*
+ * Continue resuming the entire bus (parent + child devices) to exit
+ * the clock stop mode. If there are no devices connected on this link
+ * this is a no-op.
+ * The resume to full power could have been implemented with a .prepare
+ * step in SoundWire codec drivers. This would however require a lot
+ * of code to handle an Intel-specific corner case. It is simpler in
+ * practice to add a loop at the link level.
+ */
+ ret = device_for_each_child(bus->dev, NULL, intel_resume_child_device);
+
+ if (ret < 0)
+ dev_err(dev, "%s: intel_resume_child_device failed: %d\n", __func__, ret);
+ }
+
+ return 0;
+}
+
static int __maybe_unused intel_suspend(struct device *dev)
{
struct sdw_cdns *cdns = dev_get_drvdata(dev);
@@ -1596,8 +1685,8 @@ static int __maybe_unused intel_suspend(struct device *dev)
u32 clock_stop_quirks;
int ret;
- if (bus->prop.hw_disabled) {
- dev_dbg(dev, "SoundWire master %d is disabled, ignoring\n",
+ if (bus->prop.hw_disabled || !sdw->startup_done) {
+ dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
bus->link_id);
return 0;
}
@@ -1607,19 +1696,18 @@ static int __maybe_unused intel_suspend(struct device *dev)
clock_stop_quirks = sdw->link_res->clock_stop_quirks;
- if ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET ||
- !clock_stop_quirks) &&
- !pm_runtime_suspended(dev->parent)) {
-
- /*
- * if we've enabled clock stop, and the parent
- * is still active, disable shim wake. The
- * SHIM registers are not accessible if the
- * parent is already pm_runtime suspended so
- * it's too late to change that configuration
- */
-
- intel_shim_wake(sdw, false);
+ if ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) ||
+ !clock_stop_quirks) {
+
+ if (pm_runtime_suspended(dev->parent)) {
+ /*
+ * paranoia check: this should not happen with the .prepare
+ * resume to full power
+ */
+ dev_err(dev, "%s: invalid config: parent is suspended\n", __func__);
+ } else {
+ intel_shim_wake(sdw, false);
+ }
}
return 0;
@@ -1650,8 +1738,8 @@ static int __maybe_unused intel_suspend_runtime(struct device *dev)
u32 clock_stop_quirks;
int ret;
- if (bus->prop.hw_disabled) {
- dev_dbg(dev, "SoundWire master %d is disabled, ignoring\n",
+ if (bus->prop.hw_disabled || !sdw->startup_done) {
+ dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
bus->link_id);
return 0;
}
@@ -1715,8 +1803,8 @@ static int __maybe_unused intel_resume(struct device *dev)
bool multi_link;
int ret;
- if (bus->prop.hw_disabled) {
- dev_dbg(dev, "SoundWire master %d is disabled, ignoring\n",
+ if (bus->prop.hw_disabled || !sdw->startup_done) {
+ dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
bus->link_id);
return 0;
}
@@ -1783,6 +1871,8 @@ static int __maybe_unused intel_resume(struct device *dev)
return ret;
}
}
+ sdw_cdns_check_self_clearing_bits(cdns, __func__,
+ true, INTEL_MASTER_RESET_ITERATIONS);
/*
* after system resume, the pm_runtime suspend() may kick in
@@ -1811,8 +1901,8 @@ static int __maybe_unused intel_resume_runtime(struct device *dev)
int status;
int ret;
- if (bus->prop.hw_disabled) {
- dev_dbg(dev, "SoundWire master %d is disabled, ignoring\n",
+ if (bus->prop.hw_disabled || !sdw->startup_done) {
+ dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
bus->link_id);
return 0;
}
@@ -1867,6 +1957,9 @@ static int __maybe_unused intel_resume_runtime(struct device *dev)
return ret;
}
}
+ sdw_cdns_check_self_clearing_bits(cdns, "intel_resume_runtime TEARDOWN",
+ true, INTEL_MASTER_RESET_ITERATIONS);
+
} else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) {
ret = intel_init(sdw);
if (ret) {
@@ -1940,6 +2033,9 @@ static int __maybe_unused intel_resume_runtime(struct device *dev)
}
}
}
+ sdw_cdns_check_self_clearing_bits(cdns, "intel_resume_runtime BUS_RESET",
+ true, INTEL_MASTER_RESET_ITERATIONS);
+
} else if (!clock_stop_quirks) {
clock_stop0 = sdw_cdns_is_clock_stop(&sdw->cdns);
@@ -1963,6 +2059,9 @@ static int __maybe_unused intel_resume_runtime(struct device *dev)
dev_err(dev, "unable to resume master during resume\n");
return ret;
}
+
+ sdw_cdns_check_self_clearing_bits(cdns, "intel_resume_runtime no_quirks",
+ true, INTEL_MASTER_RESET_ITERATIONS);
} else {
dev_err(dev, "%s clock_stop_quirks %x unsupported\n",
__func__, clock_stop_quirks);
@@ -1973,6 +2072,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev)
}
static const struct dev_pm_ops intel_pm = {
+ .prepare = intel_pm_prepare,
SET_SYSTEM_SLEEP_PM_OPS(intel_suspend, intel_resume)
SET_RUNTIME_PM_OPS(intel_suspend_runtime, intel_resume_runtime, NULL)
};
diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h
index 0b47b148da3f..cd93a44dba9a 100644
--- a/drivers/soundwire/intel.h
+++ b/drivers/soundwire/intel.h
@@ -41,6 +41,7 @@ struct sdw_intel {
struct sdw_cdns cdns;
int instance;
struct sdw_intel_link_res *link_res;
+ bool startup_done;
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs;
#endif
diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
index 1a18308f4ef4..5d4f6b308ef7 100644
--- a/drivers/soundwire/stream.c
+++ b/drivers/soundwire/stream.c
@@ -133,6 +133,9 @@ static int sdw_program_slave_port_params(struct sdw_bus *bus,
int ret;
u8 wbuf;
+ if (s_rt->slave->is_mockup_device)
+ return 0;
+
dpn_prop = sdw_get_slave_dpn_prop(s_rt->slave,
s_rt->direction,
t_params->port_num);
@@ -697,7 +700,7 @@ static int sdw_bank_switch(struct sdw_bus *bus, int m_rt_count)
else
ret = sdw_transfer(bus, wr_msg);
- if (ret < 0) {
+ if (ret < 0 && ret != -ENODATA) {
dev_err(bus->dev, "Slave frame_ctrl reg write failed\n");
goto error;
}
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index e71a4c514f7b..83e352b0c8f9 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -658,6 +658,18 @@ config SPI_ROCKCHIP
The main usecase of this controller is to use spi flash as boot
device.
+config SPI_ROCKCHIP_SFC
+ tristate "Rockchip Serial Flash Controller (SFC)"
+ depends on ARCH_ROCKCHIP || COMPILE_TEST
+ depends on HAS_IOMEM && HAS_DMA
+ help
+ This enables support for Rockchip serial flash controller. This
+ is a specialized controller used to access SPI flash on some
+ Rockchip SOCs.
+
+ ROCKCHIP SFC supports DMA and PIO modes. When DMA is not available,
+ the driver automatically falls back to PIO mode.
+
config SPI_RB4XX
tristate "Mikrotik RB4XX SPI master"
depends on SPI_MASTER && ATH79
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 13e54c45e9df..699db95c8441 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -95,6 +95,7 @@ obj-$(CONFIG_SPI_QCOM_GENI) += spi-geni-qcom.o
obj-$(CONFIG_SPI_QCOM_QSPI) += spi-qcom-qspi.o
obj-$(CONFIG_SPI_QUP) += spi-qup.o
obj-$(CONFIG_SPI_ROCKCHIP) += spi-rockchip.o
+obj-$(CONFIG_SPI_ROCKCHIP_SFC) += spi-rockchip-sfc.o
obj-$(CONFIG_SPI_RB4XX) += spi-rb4xx.o
obj-$(CONFIG_MACH_REALTEK_RTL) += spi-realtek-rtl.o
obj-$(CONFIG_SPI_RPCIF) += spi-rpc-if.o
diff --git a/drivers/spi/spi-altera-dfl.c b/drivers/spi/spi-altera-dfl.c
index 39a3e1a032e0..44fc9ee13fc7 100644
--- a/drivers/spi/spi-altera-dfl.c
+++ b/drivers/spi/spi-altera-dfl.c
@@ -104,13 +104,6 @@ static const struct regmap_config indirect_regbus_cfg = {
.reg_read = indirect_bus_reg_read,
};
-static struct spi_board_info m10_bmc_info = {
- .modalias = "m10-d5005",
- .max_speed_hz = 12500000,
- .bus_num = 0,
- .chip_select = 0,
-};
-
static void config_spi_master(void __iomem *base, struct spi_master *master)
{
u64 v;
@@ -130,6 +123,7 @@ static void config_spi_master(void __iomem *base, struct spi_master *master)
static int dfl_spi_altera_probe(struct dfl_device *dfl_dev)
{
+ struct spi_board_info board_info = { 0 };
struct device *dev = &dfl_dev->dev;
struct spi_master *master;
struct altera_spi *hw;
@@ -170,9 +164,18 @@ static int dfl_spi_altera_probe(struct dfl_device *dfl_dev)
goto exit;
}
- if (!spi_new_device(master, &m10_bmc_info)) {
+ if (dfl_dev->revision == FME_FEATURE_REV_MAX10_SPI_N5010)
+ strscpy(board_info.modalias, "m10-n5010", SPI_NAME_SIZE);
+ else
+ strscpy(board_info.modalias, "m10-d5005", SPI_NAME_SIZE);
+
+ board_info.max_speed_hz = 12500000;
+ board_info.bus_num = 0;
+ board_info.chip_select = 0;
+
+ if (!spi_new_device(master, &board_info)) {
dev_err(dev, "%s failed to create SPI device: %s\n",
- __func__, m10_bmc_info.modalias);
+ __func__, board_info.modalias);
}
return 0;
diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c
index 37eab100a7d8..7d709a8c833b 100644
--- a/drivers/spi/spi-bcm2835aux.c
+++ b/drivers/spi/spi-bcm2835aux.c
@@ -143,12 +143,12 @@ static void bcm2835aux_debugfs_remove(struct bcm2835aux_spi *bs)
}
#endif /* CONFIG_DEBUG_FS */
-static inline u32 bcm2835aux_rd(struct bcm2835aux_spi *bs, unsigned reg)
+static inline u32 bcm2835aux_rd(struct bcm2835aux_spi *bs, unsigned int reg)
{
return readl(bs->regs + reg);
}
-static inline void bcm2835aux_wr(struct bcm2835aux_spi *bs, unsigned reg,
+static inline void bcm2835aux_wr(struct bcm2835aux_spi *bs, unsigned int reg,
u32 val)
{
writel(val, bs->regs + reg);
diff --git a/drivers/spi/spi-coldfire-qspi.c b/drivers/spi/spi-coldfire-qspi.c
index 8996115ce736..263ce9047327 100644
--- a/drivers/spi/spi-coldfire-qspi.c
+++ b/drivers/spi/spi-coldfire-qspi.c
@@ -444,7 +444,7 @@ static int mcfqspi_remove(struct platform_device *pdev)
mcfqspi_wr_qmr(mcfqspi, MCFQSPI_QMR_MSTR);
mcfqspi_cs_teardown(mcfqspi);
- clk_disable(mcfqspi->clk);
+ clk_disable_unprepare(mcfqspi->clk);
return 0;
}
diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index e114e6fe5ea5..d112c2cac042 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -213,12 +213,6 @@ static void davinci_spi_chipselect(struct spi_device *spi, int value)
* line for the controller
*/
if (spi->cs_gpiod) {
- /*
- * FIXME: is this code ever executed? This host does not
- * set SPI_MASTER_GPIO_SS so this chipselect callback should
- * not get called from the SPI core when we are using
- * GPIOs for chip select.
- */
if (value == BITBANG_CS_ACTIVE)
gpiod_set_value(spi->cs_gpiod, 1);
else
@@ -945,7 +939,7 @@ static int davinci_spi_probe(struct platform_device *pdev)
master->bus_num = pdev->id;
master->num_chipselect = pdata->num_chipselect;
master->bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 16);
- master->flags = SPI_MASTER_MUST_RX;
+ master->flags = SPI_MASTER_MUST_RX | SPI_MASTER_GPIO_SS;
master->setup = davinci_spi_setup;
master->cleanup = davinci_spi_cleanup;
master->can_dma = davinci_spi_can_dma;
diff --git a/drivers/spi/spi-ep93xx.c b/drivers/spi/spi-ep93xx.c
index aa676559d273..5896a7b2fade 100644
--- a/drivers/spi/spi-ep93xx.c
+++ b/drivers/spi/spi-ep93xx.c
@@ -550,7 +550,7 @@ static int ep93xx_spi_prepare_hardware(struct spi_master *master)
u32 val;
int ret;
- ret = clk_enable(espi->clk);
+ ret = clk_prepare_enable(espi->clk);
if (ret)
return ret;
@@ -570,7 +570,7 @@ static int ep93xx_spi_unprepare_hardware(struct spi_master *master)
val &= ~SSPCR1_SSE;
writel(val, espi->mmio + SSPCR1);
- clk_disable(espi->clk);
+ clk_disable_unprepare(espi->clk);
return 0;
}
diff --git a/drivers/spi/spi-fsi.c b/drivers/spi/spi-fsi.c
index 87f8829c3995..829770b8ec74 100644
--- a/drivers/spi/spi-fsi.c
+++ b/drivers/spi/spi-fsi.c
@@ -25,16 +25,11 @@
#define SPI_FSI_BASE 0x70000
#define SPI_FSI_INIT_TIMEOUT_MS 1000
-#define SPI_FSI_MAX_XFR_SIZE 2048
-#define SPI_FSI_MAX_XFR_SIZE_RESTRICTED 8
+#define SPI_FSI_MAX_RX_SIZE 8
+#define SPI_FSI_MAX_TX_SIZE 40
#define SPI_FSI_ERROR 0x0
#define SPI_FSI_COUNTER_CFG 0x1
-#define SPI_FSI_COUNTER_CFG_LOOPS(x) (((u64)(x) & 0xffULL) << 32)
-#define SPI_FSI_COUNTER_CFG_N2_RX BIT_ULL(8)
-#define SPI_FSI_COUNTER_CFG_N2_TX BIT_ULL(9)
-#define SPI_FSI_COUNTER_CFG_N2_IMPLICIT BIT_ULL(10)
-#define SPI_FSI_COUNTER_CFG_N2_RELOAD BIT_ULL(11)
#define SPI_FSI_CFG1 0x2
#define SPI_FSI_CLOCK_CFG 0x3
#define SPI_FSI_CLOCK_CFG_MM_ENABLE BIT_ULL(32)
@@ -76,8 +71,6 @@ struct fsi_spi {
struct device *dev; /* SPI controller device */
struct fsi_device *fsi; /* FSI2SPI CFAM engine device */
u32 base;
- size_t max_xfr_size;
- bool restricted;
};
struct fsi_spi_sequence {
@@ -241,7 +234,7 @@ static int fsi_spi_reset(struct fsi_spi *ctx)
return fsi_spi_write_reg(ctx, SPI_FSI_STATUS, 0ULL);
}
-static int fsi_spi_sequence_add(struct fsi_spi_sequence *seq, u8 val)
+static void fsi_spi_sequence_add(struct fsi_spi_sequence *seq, u8 val)
{
/*
* Add the next byte of instruction to the 8-byte sequence register.
@@ -251,8 +244,6 @@ static int fsi_spi_sequence_add(struct fsi_spi_sequence *seq, u8 val)
*/
seq->data |= (u64)val << seq->bit;
seq->bit -= 8;
-
- return ((64 - seq->bit) / 8) - 2;
}
static void fsi_spi_sequence_init(struct fsi_spi_sequence *seq)
@@ -261,71 +252,11 @@ static void fsi_spi_sequence_init(struct fsi_spi_sequence *seq)
seq->data = 0ULL;
}
-static int fsi_spi_sequence_transfer(struct fsi_spi *ctx,
- struct fsi_spi_sequence *seq,
- struct spi_transfer *transfer)
-{
- int loops;
- int idx;
- int rc;
- u8 val = 0;
- u8 len = min(transfer->len, 8U);
- u8 rem = transfer->len % len;
-
- loops = transfer->len / len;
-
- if (transfer->tx_buf) {
- val = SPI_FSI_SEQUENCE_SHIFT_OUT(len);
- idx = fsi_spi_sequence_add(seq, val);
-
- if (rem)
- rem = SPI_FSI_SEQUENCE_SHIFT_OUT(rem);
- } else if (transfer->rx_buf) {
- val = SPI_FSI_SEQUENCE_SHIFT_IN(len);
- idx = fsi_spi_sequence_add(seq, val);
-
- if (rem)
- rem = SPI_FSI_SEQUENCE_SHIFT_IN(rem);
- } else {
- return -EINVAL;
- }
-
- if (ctx->restricted && loops > 1) {
- dev_warn(ctx->dev,
- "Transfer too large; no branches permitted.\n");
- return -EINVAL;
- }
-
- if (loops > 1) {
- u64 cfg = SPI_FSI_COUNTER_CFG_LOOPS(loops - 1);
-
- fsi_spi_sequence_add(seq, SPI_FSI_SEQUENCE_BRANCH(idx));
-
- if (transfer->rx_buf)
- cfg |= SPI_FSI_COUNTER_CFG_N2_RX |
- SPI_FSI_COUNTER_CFG_N2_TX |
- SPI_FSI_COUNTER_CFG_N2_IMPLICIT |
- SPI_FSI_COUNTER_CFG_N2_RELOAD;
-
- rc = fsi_spi_write_reg(ctx, SPI_FSI_COUNTER_CFG, cfg);
- if (rc)
- return rc;
- } else {
- fsi_spi_write_reg(ctx, SPI_FSI_COUNTER_CFG, 0ULL);
- }
-
- if (rem)
- fsi_spi_sequence_add(seq, rem);
-
- return 0;
-}
-
static int fsi_spi_transfer_data(struct fsi_spi *ctx,
struct spi_transfer *transfer)
{
int rc = 0;
u64 status = 0ULL;
- u64 cfg = 0ULL;
if (transfer->tx_buf) {
int nb;
@@ -363,16 +294,6 @@ static int fsi_spi_transfer_data(struct fsi_spi *ctx,
u64 in = 0ULL;
u8 *rx = transfer->rx_buf;
- rc = fsi_spi_read_reg(ctx, SPI_FSI_COUNTER_CFG, &cfg);
- if (rc)
- return rc;
-
- if (cfg & SPI_FSI_COUNTER_CFG_N2_IMPLICIT) {
- rc = fsi_spi_write_reg(ctx, SPI_FSI_DATA_TX, 0);
- if (rc)
- return rc;
- }
-
while (transfer->len > recv) {
do {
rc = fsi_spi_read_reg(ctx, SPI_FSI_STATUS,
@@ -439,6 +360,10 @@ static int fsi_spi_transfer_init(struct fsi_spi *ctx)
}
} while (seq_state && (seq_state != SPI_FSI_STATUS_SEQ_STATE_IDLE));
+ rc = fsi_spi_write_reg(ctx, SPI_FSI_COUNTER_CFG, 0ULL);
+ if (rc)
+ return rc;
+
rc = fsi_spi_read_reg(ctx, SPI_FSI_CLOCK_CFG, &clock_cfg);
if (rc)
return rc;
@@ -459,6 +384,7 @@ static int fsi_spi_transfer_one_message(struct spi_controller *ctlr,
{
int rc;
u8 seq_slave = SPI_FSI_SEQUENCE_SEL_SLAVE(mesg->spi->chip_select + 1);
+ unsigned int len;
struct spi_transfer *transfer;
struct fsi_spi *ctx = spi_controller_get_devdata(ctlr);
@@ -471,8 +397,7 @@ static int fsi_spi_transfer_one_message(struct spi_controller *ctlr,
struct spi_transfer *next = NULL;
/* Sequencer must do shift out (tx) first. */
- if (!transfer->tx_buf ||
- transfer->len > (ctx->max_xfr_size + 8)) {
+ if (!transfer->tx_buf || transfer->len > SPI_FSI_MAX_TX_SIZE) {
rc = -EINVAL;
goto error;
}
@@ -486,9 +411,13 @@ static int fsi_spi_transfer_one_message(struct spi_controller *ctlr,
fsi_spi_sequence_init(&seq);
fsi_spi_sequence_add(&seq, seq_slave);
- rc = fsi_spi_sequence_transfer(ctx, &seq, transfer);
- if (rc)
- goto error;
+ len = transfer->len;
+ while (len > 8) {
+ fsi_spi_sequence_add(&seq,
+ SPI_FSI_SEQUENCE_SHIFT_OUT(8));
+ len -= 8;
+ }
+ fsi_spi_sequence_add(&seq, SPI_FSI_SEQUENCE_SHIFT_OUT(len));
if (!list_is_last(&transfer->transfer_list,
&mesg->transfers)) {
@@ -496,7 +425,9 @@ static int fsi_spi_transfer_one_message(struct spi_controller *ctlr,
/* Sequencer can only do shift in (rx) after tx. */
if (next->rx_buf) {
- if (next->len > ctx->max_xfr_size) {
+ u8 shift;
+
+ if (next->len > SPI_FSI_MAX_RX_SIZE) {
rc = -EINVAL;
goto error;
}
@@ -504,10 +435,8 @@ static int fsi_spi_transfer_one_message(struct spi_controller *ctlr,
dev_dbg(ctx->dev, "Sequence rx of %d bytes.\n",
next->len);
- rc = fsi_spi_sequence_transfer(ctx, &seq,
- next);
- if (rc)
- goto error;
+ shift = SPI_FSI_SEQUENCE_SHIFT_IN(next->len);
+ fsi_spi_sequence_add(&seq, shift);
} else {
next = NULL;
}
@@ -541,9 +470,7 @@ error:
static size_t fsi_spi_max_transfer_size(struct spi_device *spi)
{
- struct fsi_spi *ctx = spi_controller_get_devdata(spi->controller);
-
- return ctx->max_xfr_size;
+ return SPI_FSI_MAX_RX_SIZE;
}
static int fsi_spi_probe(struct device *dev)
@@ -582,14 +509,6 @@ static int fsi_spi_probe(struct device *dev)
ctx->fsi = fsi;
ctx->base = base + SPI_FSI_BASE;
- if (of_device_is_compatible(np, "ibm,fsi2spi-restricted")) {
- ctx->restricted = true;
- ctx->max_xfr_size = SPI_FSI_MAX_XFR_SIZE_RESTRICTED;
- } else {
- ctx->restricted = false;
- ctx->max_xfr_size = SPI_FSI_MAX_XFR_SIZE;
- }
-
rc = devm_spi_register_controller(dev, ctlr);
if (rc)
spi_controller_put(ctlr);
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index fb45e6af6638..fd004c9db9dc 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -530,6 +530,7 @@ static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)
goto err_rx_dma_buf;
}
+ memset(&cfg, 0, sizeof(cfg));
cfg.src_addr = phy_addr + SPI_POPR;
cfg.dst_addr = phy_addr + SPI_PUSHR;
cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index b3861fb88711..2f51421e2a71 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -549,12 +549,6 @@ static void setup_fifo_xfer(struct spi_transfer *xfer,
*/
spin_lock_irq(&mas->lock);
geni_se_setup_m_cmd(se, m_cmd, FRAGMENTATION);
-
- /*
- * TX_WATERMARK_REG should be set after SPI configuration and
- * setting up GENI SE engine, as driver starts data transfer
- * for the watermark interrupt.
- */
if (m_cmd & SPI_TX_ONLY) {
if (geni_spi_handle_tx(mas))
writel(mas->tx_wm, se->base + SE_GENI_TX_WATERMARK_REG);
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index fa68e9817929..8d8df51c5466 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -1052,12 +1052,8 @@ static void spi_imx_set_burst_len(struct spi_imx_data *spi_imx, int n_bits)
static void spi_imx_push(struct spi_imx_data *spi_imx)
{
- unsigned int burst_len, fifo_words;
+ unsigned int burst_len;
- if (spi_imx->dynamic_burst)
- fifo_words = 4;
- else
- fifo_words = spi_imx_bytes_per_word(spi_imx->bits_per_word);
/*
* Reload the FIFO when the remaining bytes to be transferred in the
* current burst is 0. This only applies when bits_per_word is a
@@ -1076,7 +1072,7 @@ static void spi_imx_push(struct spi_imx_data *spi_imx)
spi_imx->remainder = burst_len;
} else {
- spi_imx->remainder = fifo_words;
+ spi_imx->remainder = spi_imx_bytes_per_word(spi_imx->bits_per_word);
}
}
@@ -1084,8 +1080,7 @@ static void spi_imx_push(struct spi_imx_data *spi_imx)
if (!spi_imx->count)
break;
if (spi_imx->dynamic_burst &&
- spi_imx->txfifo >= DIV_ROUND_UP(spi_imx->remainder,
- fifo_words))
+ spi_imx->txfifo >= DIV_ROUND_UP(spi_imx->remainder, 4))
break;
spi_imx->tx(spi_imx);
spi_imx->txfifo++;
@@ -1195,6 +1190,7 @@ static int spi_imx_setupxfer(struct spi_device *spi,
* dynamic_burst in that case.
*/
if (spi_imx->devtype_data->dynamic_burst && !spi_imx->slave_mode &&
+ !(spi->mode & SPI_CS_WORD) &&
(spi_imx->bits_per_word == 8 ||
spi_imx->bits_per_word == 16 ||
spi_imx->bits_per_word == 32)) {
@@ -1630,6 +1626,15 @@ static int spi_imx_probe(struct platform_device *pdev)
is_imx53_ecspi(spi_imx))
spi_imx->bitbang.master->mode_bits |= SPI_LOOP | SPI_READY;
+ if (is_imx51_ecspi(spi_imx) &&
+ device_property_read_u32(&pdev->dev, "cs-gpios", NULL))
+ /*
+ * When using HW-CS implementing SPI_CS_WORD can be done by just
+ * setting the burst length to the word size. This is
+ * considerably faster than manually controlling the CS.
+ */
+ spi_imx->bitbang.master->mode_bits |= SPI_CS_WORD;
+
spi_imx->spi_drctl = spi_drctl;
init_completion(&spi_imx->xfer_done);
diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index 7914255521c3..386e8c84be0a 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -42,8 +42,9 @@
#define SPI_CFG1_CS_IDLE_OFFSET 0
#define SPI_CFG1_PACKET_LOOP_OFFSET 8
#define SPI_CFG1_PACKET_LENGTH_OFFSET 16
-#define SPI_CFG1_GET_TICK_DLY_OFFSET 30
+#define SPI_CFG1_GET_TICK_DLY_OFFSET 29
+#define SPI_CFG1_GET_TICK_DLY_MASK 0xe0000000
#define SPI_CFG1_CS_IDLE_MASK 0xff
#define SPI_CFG1_PACKET_LOOP_MASK 0xff00
#define SPI_CFG1_PACKET_LENGTH_MASK 0x3ff0000
@@ -90,6 +91,8 @@ struct mtk_spi_compatible {
bool enhance_timing;
/* some IC support DMA addr extension */
bool dma_ext;
+ /* some IC no need unprepare SPI clk */
+ bool no_need_unprepare;
};
struct mtk_spi {
@@ -104,6 +107,7 @@ struct mtk_spi {
struct scatterlist *tx_sgl, *rx_sgl;
u32 tx_sgl_len, rx_sgl_len;
const struct mtk_spi_compatible *dev_comp;
+ u32 spi_clk_hz;
};
static const struct mtk_spi_compatible mtk_common_compat;
@@ -135,12 +139,21 @@ static const struct mtk_spi_compatible mt8183_compat = {
.enhance_timing = true,
};
+static const struct mtk_spi_compatible mt6893_compat = {
+ .need_pad_sel = true,
+ .must_tx = true,
+ .enhance_timing = true,
+ .dma_ext = true,
+ .no_need_unprepare = true,
+};
+
/*
* A piece of default chip info unless the platform
* supplies it.
*/
static const struct mtk_chip_config mtk_default_chip_info = {
.sample_sel = 0,
+ .tick_delay = 0,
};
static const struct of_device_id mtk_spi_of_match[] = {
@@ -174,6 +187,9 @@ static const struct of_device_id mtk_spi_of_match[] = {
{ .compatible = "mediatek,mt8192-spi",
.data = (void *)&mt6765_compat,
},
+ { .compatible = "mediatek,mt6893-spi",
+ .data = (void *)&mt6893_compat,
+ },
{}
};
MODULE_DEVICE_TABLE(of, mtk_spi_of_match);
@@ -192,6 +208,65 @@ static void mtk_spi_reset(struct mtk_spi *mdata)
writel(reg_val, mdata->base + SPI_CMD_REG);
}
+static int mtk_spi_set_hw_cs_timing(struct spi_device *spi)
+{
+ struct mtk_spi *mdata = spi_master_get_devdata(spi->master);
+ struct spi_delay *cs_setup = &spi->cs_setup;
+ struct spi_delay *cs_hold = &spi->cs_hold;
+ struct spi_delay *cs_inactive = &spi->cs_inactive;
+ u32 setup, hold, inactive;
+ u32 reg_val;
+ int delay;
+
+ delay = spi_delay_to_ns(cs_setup, NULL);
+ if (delay < 0)
+ return delay;
+ setup = (delay * DIV_ROUND_UP(mdata->spi_clk_hz, 1000000)) / 1000;
+
+ delay = spi_delay_to_ns(cs_hold, NULL);
+ if (delay < 0)
+ return delay;
+ hold = (delay * DIV_ROUND_UP(mdata->spi_clk_hz, 1000000)) / 1000;
+
+ delay = spi_delay_to_ns(cs_inactive, NULL);
+ if (delay < 0)
+ return delay;
+ inactive = (delay * DIV_ROUND_UP(mdata->spi_clk_hz, 1000000)) / 1000;
+
+ setup = setup ? setup : 1;
+ hold = hold ? hold : 1;
+ inactive = inactive ? inactive : 1;
+
+ reg_val = readl(mdata->base + SPI_CFG0_REG);
+ if (mdata->dev_comp->enhance_timing) {
+ hold = min_t(u32, hold, 0x10000);
+ setup = min_t(u32, setup, 0x10000);
+ reg_val &= ~(0xffff << SPI_ADJUST_CFG0_CS_HOLD_OFFSET);
+ reg_val |= (((hold - 1) & 0xffff)
+ << SPI_ADJUST_CFG0_CS_HOLD_OFFSET);
+ reg_val &= ~(0xffff << SPI_ADJUST_CFG0_CS_SETUP_OFFSET);
+ reg_val |= (((setup - 1) & 0xffff)
+ << SPI_ADJUST_CFG0_CS_SETUP_OFFSET);
+ } else {
+ hold = min_t(u32, hold, 0x100);
+ setup = min_t(u32, setup, 0x100);
+ reg_val &= ~(0xff << SPI_CFG0_CS_HOLD_OFFSET);
+ reg_val |= (((hold - 1) & 0xff) << SPI_CFG0_CS_HOLD_OFFSET);
+ reg_val &= ~(0xff << SPI_CFG0_CS_SETUP_OFFSET);
+ reg_val |= (((setup - 1) & 0xff)
+ << SPI_CFG0_CS_SETUP_OFFSET);
+ }
+ writel(reg_val, mdata->base + SPI_CFG0_REG);
+
+ inactive = min_t(u32, inactive, 0x100);
+ reg_val = readl(mdata->base + SPI_CFG1_REG);
+ reg_val &= ~SPI_CFG1_CS_IDLE_MASK;
+ reg_val |= (((inactive - 1) & 0xff) << SPI_CFG1_CS_IDLE_OFFSET);
+ writel(reg_val, mdata->base + SPI_CFG1_REG);
+
+ return 0;
+}
+
static int mtk_spi_prepare_message(struct spi_master *master,
struct spi_message *msg)
{
@@ -261,6 +336,15 @@ static int mtk_spi_prepare_message(struct spi_master *master,
writel(mdata->pad_sel[spi->chip_select],
mdata->base + SPI_PAD_SEL_REG);
+ /* tick delay */
+ reg_val = readl(mdata->base + SPI_CFG1_REG);
+ reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK;
+ reg_val |= ((chip_config->tick_delay & 0x7)
+ << SPI_CFG1_GET_TICK_DLY_OFFSET);
+ writel(reg_val, mdata->base + SPI_CFG1_REG);
+
+ /* set hw cs timing */
+ mtk_spi_set_hw_cs_timing(spi);
return 0;
}
@@ -287,12 +371,11 @@ static void mtk_spi_set_cs(struct spi_device *spi, bool enable)
static void mtk_spi_prepare_transfer(struct spi_master *master,
struct spi_transfer *xfer)
{
- u32 spi_clk_hz, div, sck_time, reg_val;
+ u32 div, sck_time, reg_val;
struct mtk_spi *mdata = spi_master_get_devdata(master);
- spi_clk_hz = clk_get_rate(mdata->spi_clk);
- if (xfer->speed_hz < spi_clk_hz / 2)
- div = DIV_ROUND_UP(spi_clk_hz, xfer->speed_hz);
+ if (xfer->speed_hz < mdata->spi_clk_hz / 2)
+ div = DIV_ROUND_UP(mdata->spi_clk_hz, xfer->speed_hz);
else
div = 1;
@@ -507,52 +590,6 @@ static bool mtk_spi_can_dma(struct spi_master *master,
(unsigned long)xfer->rx_buf % 4 == 0);
}
-static int mtk_spi_set_hw_cs_timing(struct spi_device *spi,
- struct spi_delay *setup,
- struct spi_delay *hold,
- struct spi_delay *inactive)
-{
- struct mtk_spi *mdata = spi_master_get_devdata(spi->master);
- u16 setup_dly, hold_dly, inactive_dly;
- u32 reg_val;
-
- if ((setup && setup->unit != SPI_DELAY_UNIT_SCK) ||
- (hold && hold->unit != SPI_DELAY_UNIT_SCK) ||
- (inactive && inactive->unit != SPI_DELAY_UNIT_SCK)) {
- dev_err(&spi->dev,
- "Invalid delay unit, should be SPI_DELAY_UNIT_SCK\n");
- return -EINVAL;
- }
-
- setup_dly = setup ? setup->value : 1;
- hold_dly = hold ? hold->value : 1;
- inactive_dly = inactive ? inactive->value : 1;
-
- reg_val = readl(mdata->base + SPI_CFG0_REG);
- if (mdata->dev_comp->enhance_timing) {
- reg_val &= ~(0xffff << SPI_ADJUST_CFG0_CS_HOLD_OFFSET);
- reg_val |= (((hold_dly - 1) & 0xffff)
- << SPI_ADJUST_CFG0_CS_HOLD_OFFSET);
- reg_val &= ~(0xffff << SPI_ADJUST_CFG0_CS_SETUP_OFFSET);
- reg_val |= (((setup_dly - 1) & 0xffff)
- << SPI_ADJUST_CFG0_CS_SETUP_OFFSET);
- } else {
- reg_val &= ~(0xff << SPI_CFG0_CS_HOLD_OFFSET);
- reg_val |= (((hold_dly - 1) & 0xff) << SPI_CFG0_CS_HOLD_OFFSET);
- reg_val &= ~(0xff << SPI_CFG0_CS_SETUP_OFFSET);
- reg_val |= (((setup_dly - 1) & 0xff)
- << SPI_CFG0_CS_SETUP_OFFSET);
- }
- writel(reg_val, mdata->base + SPI_CFG0_REG);
-
- reg_val = readl(mdata->base + SPI_CFG1_REG);
- reg_val &= ~SPI_CFG1_CS_IDLE_MASK;
- reg_val |= (((inactive_dly - 1) & 0xff) << SPI_CFG1_CS_IDLE_OFFSET);
- writel(reg_val, mdata->base + SPI_CFG1_REG);
-
- return 0;
-}
-
static int mtk_spi_setup(struct spi_device *spi)
{
struct mtk_spi *mdata = spi_master_get_devdata(spi->master);
@@ -790,7 +827,12 @@ static int mtk_spi_probe(struct platform_device *pdev)
goto err_put_master;
}
- clk_disable_unprepare(mdata->spi_clk);
+ mdata->spi_clk_hz = clk_get_rate(mdata->spi_clk);
+
+ if (mdata->dev_comp->no_need_unprepare)
+ clk_disable(mdata->spi_clk);
+ else
+ clk_disable_unprepare(mdata->spi_clk);
pm_runtime_enable(&pdev->dev);
@@ -858,6 +900,9 @@ static int mtk_spi_remove(struct platform_device *pdev)
mtk_spi_reset(mdata);
+ if (mdata->dev_comp->no_need_unprepare)
+ clk_unprepare(mdata->spi_clk);
+
return 0;
}
@@ -906,7 +951,10 @@ static int mtk_spi_runtime_suspend(struct device *dev)
struct spi_master *master = dev_get_drvdata(dev);
struct mtk_spi *mdata = spi_master_get_devdata(master);
- clk_disable_unprepare(mdata->spi_clk);
+ if (mdata->dev_comp->no_need_unprepare)
+ clk_disable(mdata->spi_clk);
+ else
+ clk_disable_unprepare(mdata->spi_clk);
return 0;
}
@@ -917,7 +965,10 @@ static int mtk_spi_runtime_resume(struct device *dev)
struct mtk_spi *mdata = spi_master_get_devdata(master);
int ret;
- ret = clk_prepare_enable(mdata->spi_clk);
+ if (mdata->dev_comp->no_need_unprepare)
+ ret = clk_enable(mdata->spi_clk);
+ else
+ ret = clk_prepare_enable(mdata->spi_clk);
if (ret < 0) {
dev_err(dev, "failed to enable spi_clk (%d)\n", ret);
return ret;
diff --git a/drivers/spi/spi-mxic.c b/drivers/spi/spi-mxic.c
index 96b418293bf2..45889947afed 100644
--- a/drivers/spi/spi-mxic.c
+++ b/drivers/spi/spi-mxic.c
@@ -335,8 +335,10 @@ static int mxic_spi_data_xfer(struct mxic_spi *mxic, const void *txbuf,
static bool mxic_spi_mem_supports_op(struct spi_mem *mem,
const struct spi_mem_op *op)
{
- if (op->data.buswidth > 4 || op->addr.buswidth > 4 ||
- op->dummy.buswidth > 4 || op->cmd.buswidth > 4)
+ bool all_false;
+
+ if (op->data.buswidth > 8 || op->addr.buswidth > 8 ||
+ op->dummy.buswidth > 8 || op->cmd.buswidth > 8)
return false;
if (op->data.nbytes && op->dummy.nbytes &&
@@ -346,7 +348,13 @@ static bool mxic_spi_mem_supports_op(struct spi_mem *mem,
if (op->addr.nbytes > 7)
return false;
- return spi_mem_default_supports_op(mem, op);
+ all_false = !op->cmd.dtr && !op->addr.dtr && !op->dummy.dtr &&
+ !op->data.dtr;
+
+ if (all_false)
+ return spi_mem_default_supports_op(mem, op);
+ else
+ return spi_mem_dtr_supports_op(mem, op);
}
static int mxic_spi_mem_exec_op(struct spi_mem *mem,
@@ -355,14 +363,15 @@ static int mxic_spi_mem_exec_op(struct spi_mem *mem,
struct mxic_spi *mxic = spi_master_get_devdata(mem->spi->master);
int nio = 1, i, ret;
u32 ss_ctrl;
- u8 addr[8];
- u8 opcode = op->cmd.opcode;
+ u8 addr[8], cmd[2];
ret = mxic_spi_set_freq(mxic, mem->spi->max_speed_hz);
if (ret)
return ret;
- if (mem->spi->mode & (SPI_TX_QUAD | SPI_RX_QUAD))
+ if (mem->spi->mode & (SPI_TX_OCTAL | SPI_RX_OCTAL))
+ nio = 8;
+ else if (mem->spi->mode & (SPI_TX_QUAD | SPI_RX_QUAD))
nio = 4;
else if (mem->spi->mode & (SPI_TX_DUAL | SPI_RX_DUAL))
nio = 2;
@@ -374,19 +383,26 @@ static int mxic_spi_mem_exec_op(struct spi_mem *mem,
mxic->regs + HC_CFG);
writel(HC_EN_BIT, mxic->regs + HC_EN);
- ss_ctrl = OP_CMD_BYTES(1) | OP_CMD_BUSW(fls(op->cmd.buswidth) - 1);
+ ss_ctrl = OP_CMD_BYTES(op->cmd.nbytes) |
+ OP_CMD_BUSW(fls(op->cmd.buswidth) - 1) |
+ (op->cmd.dtr ? OP_CMD_DDR : 0);
if (op->addr.nbytes)
ss_ctrl |= OP_ADDR_BYTES(op->addr.nbytes) |
- OP_ADDR_BUSW(fls(op->addr.buswidth) - 1);
+ OP_ADDR_BUSW(fls(op->addr.buswidth) - 1) |
+ (op->addr.dtr ? OP_ADDR_DDR : 0);
if (op->dummy.nbytes)
ss_ctrl |= OP_DUMMY_CYC(op->dummy.nbytes);
if (op->data.nbytes) {
- ss_ctrl |= OP_DATA_BUSW(fls(op->data.buswidth) - 1);
- if (op->data.dir == SPI_MEM_DATA_IN)
+ ss_ctrl |= OP_DATA_BUSW(fls(op->data.buswidth) - 1) |
+ (op->data.dtr ? OP_DATA_DDR : 0);
+ if (op->data.dir == SPI_MEM_DATA_IN) {
ss_ctrl |= OP_READ;
+ if (op->data.dtr)
+ ss_ctrl |= OP_DQS_EN;
+ }
}
writel(ss_ctrl, mxic->regs + SS_CTRL(mem->spi->chip_select));
@@ -394,7 +410,10 @@ static int mxic_spi_mem_exec_op(struct spi_mem *mem,
writel(readl(mxic->regs + HC_CFG) | HC_CFG_MAN_CS_ASSERT,
mxic->regs + HC_CFG);
- ret = mxic_spi_data_xfer(mxic, &opcode, NULL, 1);
+ for (i = 0; i < op->cmd.nbytes; i++)
+ cmd[i] = op->cmd.opcode >> (8 * (op->cmd.nbytes - i - 1));
+
+ ret = mxic_spi_data_xfer(mxic, cmd, NULL, op->cmd.nbytes);
if (ret)
goto out;
@@ -567,7 +586,8 @@ static int mxic_spi_probe(struct platform_device *pdev)
master->bits_per_word_mask = SPI_BPW_MASK(8);
master->mode_bits = SPI_CPOL | SPI_CPHA |
SPI_RX_DUAL | SPI_TX_DUAL |
- SPI_RX_QUAD | SPI_TX_QUAD;
+ SPI_RX_QUAD | SPI_TX_QUAD |
+ SPI_RX_OCTAL | SPI_TX_OCTAL;
mxic_spi_hw_init(mxic);
diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c
index 34b31aba3981..e8de3cbbfb2a 100644
--- a/drivers/spi/spi-orion.c
+++ b/drivers/spi/spi-orion.c
@@ -328,8 +328,16 @@ orion_spi_setup_transfer(struct spi_device *spi, struct spi_transfer *t)
static void orion_spi_set_cs(struct spi_device *spi, bool enable)
{
struct orion_spi *orion_spi;
+ void __iomem *ctrl_reg;
+ u32 val;
orion_spi = spi_master_get_devdata(spi->master);
+ ctrl_reg = spi_reg(orion_spi, ORION_SPI_IF_CTRL_REG);
+
+ val = readl(ctrl_reg);
+
+ /* Clear existing chip-select and assertion state */
+ val &= ~(ORION_SPI_CS_MASK | 0x1);
/*
* If this line is using a GPIO to control chip select, this internal
@@ -338,9 +346,7 @@ static void orion_spi_set_cs(struct spi_device *spi, bool enable)
* as it is handled by a GPIO, but that doesn't matter. What we need
* is to deassert the old chip select and assert some other chip select.
*/
- orion_spi_clrbits(orion_spi, ORION_SPI_IF_CTRL_REG, ORION_SPI_CS_MASK);
- orion_spi_setbits(orion_spi, ORION_SPI_IF_CTRL_REG,
- ORION_SPI_CS(spi->chip_select));
+ val |= ORION_SPI_CS(spi->chip_select);
/*
* Chip select logic is inverted from spi_set_cs(). For lines using a
@@ -350,9 +356,13 @@ static void orion_spi_set_cs(struct spi_device *spi, bool enable)
* doesn't matter.
*/
if (!enable)
- orion_spi_setbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
- else
- orion_spi_clrbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
+ val |= 0x1;
+
+ /*
+ * To avoid toggling unwanted chip selects update the register
+ * with a single write.
+ */
+ writel(val, ctrl_reg);
}
static inline int orion_spi_wait_till_ready(struct orion_spi *orion_spi)
diff --git a/drivers/spi/spi-pic32.c b/drivers/spi/spi-pic32.c
index 104bde153efd..5eb7b61bbb4d 100644
--- a/drivers/spi/spi-pic32.c
+++ b/drivers/spi/spi-pic32.c
@@ -361,6 +361,7 @@ static int pic32_spi_dma_config(struct pic32_spi *pic32s, u32 dma_width)
struct dma_slave_config cfg;
int ret;
+ memset(&cfg, 0, sizeof(cfg));
cfg.device_fc = true;
cfg.src_addr = pic32s->dma_base + buf_offset;
cfg.dst_addr = pic32s->dma_base + buf_offset;
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 974e30744b83..1573f6d8eb48 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -594,24 +594,29 @@ static int u32_reader(struct driver_data *drv_data)
static void reset_sccr1(struct driver_data *drv_data)
{
- struct chip_data *chip =
- spi_get_ctldata(drv_data->controller->cur_msg->spi);
- u32 sccr1_reg;
+ u32 mask = drv_data->int_cr1 | drv_data->dma_cr1, threshold;
+ struct chip_data *chip;
+
+ if (drv_data->controller->cur_msg) {
+ chip = spi_get_ctldata(drv_data->controller->cur_msg->spi);
+ threshold = chip->threshold;
+ } else {
+ threshold = 0;
+ }
- sccr1_reg = pxa2xx_spi_read(drv_data, SSCR1) & ~drv_data->int_cr1;
switch (drv_data->ssp_type) {
case QUARK_X1000_SSP:
- sccr1_reg &= ~QUARK_X1000_SSCR1_RFT;
+ mask |= QUARK_X1000_SSCR1_RFT;
break;
case CE4100_SSP:
- sccr1_reg &= ~CE4100_SSCR1_RFT;
+ mask |= CE4100_SSCR1_RFT;
break;
default:
- sccr1_reg &= ~SSCR1_RFT;
+ mask |= SSCR1_RFT;
break;
}
- sccr1_reg |= chip->threshold;
- pxa2xx_spi_write(drv_data, SSCR1, sccr1_reg);
+
+ pxa2xx_spi_update(drv_data, SSCR1, mask, threshold);
}
static void int_stop_and_reset(struct driver_data *drv_data)
@@ -724,11 +729,8 @@ static irqreturn_t interrupt_transfer(struct driver_data *drv_data)
static void handle_bad_msg(struct driver_data *drv_data)
{
+ int_stop_and_reset(drv_data);
pxa2xx_spi_off(drv_data);
- clear_SSCR1_bits(drv_data, drv_data->int_cr1);
- if (!pxa25x_ssp_comp(drv_data))
- pxa2xx_spi_write(drv_data, SSTO, 0);
- write_SSSR_CS(drv_data, drv_data->clear_sr);
dev_err(drv_data->ssp->dev, "bad message state in interrupt handler\n");
}
@@ -1156,13 +1158,10 @@ static void pxa2xx_spi_handle_err(struct spi_controller *controller,
{
struct driver_data *drv_data = spi_controller_get_devdata(controller);
+ int_stop_and_reset(drv_data);
+
/* Disable the SSP */
pxa2xx_spi_off(drv_data);
- /* Clear and disable interrupts and service requests */
- write_SSSR_CS(drv_data, drv_data->clear_sr);
- clear_SSCR1_bits(drv_data, drv_data->int_cr1 | drv_data->dma_cr1);
- if (!pxa25x_ssp_comp(drv_data))
- pxa2xx_spi_write(drv_data, SSTO, 0);
/*
* Stop the DMA if running. Note DMA callback handler may have unset
diff --git a/drivers/spi/spi-rockchip-sfc.c b/drivers/spi/spi-rockchip-sfc.c
new file mode 100644
index 000000000000..a46b38544027
--- /dev/null
+++ b/drivers/spi/spi-rockchip-sfc.c
@@ -0,0 +1,694 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Rockchip Serial Flash Controller Driver
+ *
+ * Copyright (c) 2017-2021, Rockchip Inc.
+ * Author: Shawn Lin <shawn.lin@rock-chips.com>
+ * Chris Morgan <macroalpha82@gmail.com>
+ * Jon Lin <Jon.lin@rock-chips.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/iopoll.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/spi/spi-mem.h>
+
+/* System control */
+#define SFC_CTRL 0x0
+#define SFC_CTRL_PHASE_SEL_NEGETIVE BIT(1)
+#define SFC_CTRL_CMD_BITS_SHIFT 8
+#define SFC_CTRL_ADDR_BITS_SHIFT 10
+#define SFC_CTRL_DATA_BITS_SHIFT 12
+
+/* Interrupt mask */
+#define SFC_IMR 0x4
+#define SFC_IMR_RX_FULL BIT(0)
+#define SFC_IMR_RX_UFLOW BIT(1)
+#define SFC_IMR_TX_OFLOW BIT(2)
+#define SFC_IMR_TX_EMPTY BIT(3)
+#define SFC_IMR_TRAN_FINISH BIT(4)
+#define SFC_IMR_BUS_ERR BIT(5)
+#define SFC_IMR_NSPI_ERR BIT(6)
+#define SFC_IMR_DMA BIT(7)
+
+/* Interrupt clear */
+#define SFC_ICLR 0x8
+#define SFC_ICLR_RX_FULL BIT(0)
+#define SFC_ICLR_RX_UFLOW BIT(1)
+#define SFC_ICLR_TX_OFLOW BIT(2)
+#define SFC_ICLR_TX_EMPTY BIT(3)
+#define SFC_ICLR_TRAN_FINISH BIT(4)
+#define SFC_ICLR_BUS_ERR BIT(5)
+#define SFC_ICLR_NSPI_ERR BIT(6)
+#define SFC_ICLR_DMA BIT(7)
+
+/* FIFO threshold level */
+#define SFC_FTLR 0xc
+#define SFC_FTLR_TX_SHIFT 0
+#define SFC_FTLR_TX_MASK 0x1f
+#define SFC_FTLR_RX_SHIFT 8
+#define SFC_FTLR_RX_MASK 0x1f
+
+/* Reset FSM and FIFO */
+#define SFC_RCVR 0x10
+#define SFC_RCVR_RESET BIT(0)
+
+/* Enhanced mode */
+#define SFC_AX 0x14
+
+/* Address Bit number */
+#define SFC_ABIT 0x18
+
+/* Interrupt status */
+#define SFC_ISR 0x1c
+#define SFC_ISR_RX_FULL_SHIFT BIT(0)
+#define SFC_ISR_RX_UFLOW_SHIFT BIT(1)
+#define SFC_ISR_TX_OFLOW_SHIFT BIT(2)
+#define SFC_ISR_TX_EMPTY_SHIFT BIT(3)
+#define SFC_ISR_TX_FINISH_SHIFT BIT(4)
+#define SFC_ISR_BUS_ERR_SHIFT BIT(5)
+#define SFC_ISR_NSPI_ERR_SHIFT BIT(6)
+#define SFC_ISR_DMA_SHIFT BIT(7)
+
+/* FIFO status */
+#define SFC_FSR 0x20
+#define SFC_FSR_TX_IS_FULL BIT(0)
+#define SFC_FSR_TX_IS_EMPTY BIT(1)
+#define SFC_FSR_RX_IS_EMPTY BIT(2)
+#define SFC_FSR_RX_IS_FULL BIT(3)
+#define SFC_FSR_TXLV_MASK GENMASK(12, 8)
+#define SFC_FSR_TXLV_SHIFT 8
+#define SFC_FSR_RXLV_MASK GENMASK(20, 16)
+#define SFC_FSR_RXLV_SHIFT 16
+
+/* FSM status */
+#define SFC_SR 0x24
+#define SFC_SR_IS_IDLE 0x0
+#define SFC_SR_IS_BUSY 0x1
+
+/* Raw interrupt status */
+#define SFC_RISR 0x28
+#define SFC_RISR_RX_FULL BIT(0)
+#define SFC_RISR_RX_UNDERFLOW BIT(1)
+#define SFC_RISR_TX_OVERFLOW BIT(2)
+#define SFC_RISR_TX_EMPTY BIT(3)
+#define SFC_RISR_TRAN_FINISH BIT(4)
+#define SFC_RISR_BUS_ERR BIT(5)
+#define SFC_RISR_NSPI_ERR BIT(6)
+#define SFC_RISR_DMA BIT(7)
+
+/* Version */
+#define SFC_VER 0x2C
+#define SFC_VER_3 0x3
+#define SFC_VER_4 0x4
+#define SFC_VER_5 0x5
+
+/* Delay line controller resiter */
+#define SFC_DLL_CTRL0 0x3C
+#define SFC_DLL_CTRL0_SCLK_SMP_DLL BIT(15)
+#define SFC_DLL_CTRL0_DLL_MAX_VER4 0xFFU
+#define SFC_DLL_CTRL0_DLL_MAX_VER5 0x1FFU
+
+/* Master trigger */
+#define SFC_DMA_TRIGGER 0x80
+#define SFC_DMA_TRIGGER_START 1
+
+/* Src or Dst addr for master */
+#define SFC_DMA_ADDR 0x84
+
+/* Length control register extension 32GB */
+#define SFC_LEN_CTRL 0x88
+#define SFC_LEN_CTRL_TRB_SEL 1
+#define SFC_LEN_EXT 0x8C
+
+/* Command */
+#define SFC_CMD 0x100
+#define SFC_CMD_IDX_SHIFT 0
+#define SFC_CMD_DUMMY_SHIFT 8
+#define SFC_CMD_DIR_SHIFT 12
+#define SFC_CMD_DIR_RD 0
+#define SFC_CMD_DIR_WR 1
+#define SFC_CMD_ADDR_SHIFT 14
+#define SFC_CMD_ADDR_0BITS 0
+#define SFC_CMD_ADDR_24BITS 1
+#define SFC_CMD_ADDR_32BITS 2
+#define SFC_CMD_ADDR_XBITS 3
+#define SFC_CMD_TRAN_BYTES_SHIFT 16
+#define SFC_CMD_CS_SHIFT 30
+
+/* Address */
+#define SFC_ADDR 0x104
+
+/* Data */
+#define SFC_DATA 0x108
+
+/* The controller and documentation reports that it supports up to 4 CS
+ * devices (0-3), however I have only been able to test a single CS (CS 0)
+ * due to the configuration of my device.
+ */
+#define SFC_MAX_CHIPSELECT_NUM 4
+
+/* The SFC can transfer max 16KB - 1 at one time
+ * we set it to 15.5KB here for alignment.
+ */
+#define SFC_MAX_IOSIZE_VER3 (512 * 31)
+
+/* DMA is only enabled for large data transmission */
+#define SFC_DMA_TRANS_THRETHOLD (0x40)
+
+/* Maximum clock values from datasheet suggest keeping clock value under
+ * 150MHz. No minimum or average value is suggested.
+ */
+#define SFC_MAX_SPEED (150 * 1000 * 1000)
+
+struct rockchip_sfc {
+ struct device *dev;
+ void __iomem *regbase;
+ struct clk *hclk;
+ struct clk *clk;
+ u32 frequency;
+ /* virtual mapped addr for dma_buffer */
+ void *buffer;
+ dma_addr_t dma_buffer;
+ struct completion cp;
+ bool use_dma;
+ u32 max_iosize;
+ u16 version;
+};
+
+static int rockchip_sfc_reset(struct rockchip_sfc *sfc)
+{
+ int err;
+ u32 status;
+
+ writel_relaxed(SFC_RCVR_RESET, sfc->regbase + SFC_RCVR);
+
+ err = readl_poll_timeout(sfc->regbase + SFC_RCVR, status,
+ !(status & SFC_RCVR_RESET), 20,
+ jiffies_to_usecs(HZ));
+ if (err)
+ dev_err(sfc->dev, "SFC reset never finished\n");
+
+ /* Still need to clear the masked interrupt from RISR */
+ writel_relaxed(0xFFFFFFFF, sfc->regbase + SFC_ICLR);
+
+ dev_dbg(sfc->dev, "reset\n");
+
+ return err;
+}
+
+static u16 rockchip_sfc_get_version(struct rockchip_sfc *sfc)
+{
+ return (u16)(readl(sfc->regbase + SFC_VER) & 0xffff);
+}
+
+static u32 rockchip_sfc_get_max_iosize(struct rockchip_sfc *sfc)
+{
+ return SFC_MAX_IOSIZE_VER3;
+}
+
+static void rockchip_sfc_irq_unmask(struct rockchip_sfc *sfc, u32 mask)
+{
+ u32 reg;
+
+ /* Enable transfer complete interrupt */
+ reg = readl(sfc->regbase + SFC_IMR);
+ reg &= ~mask;
+ writel(reg, sfc->regbase + SFC_IMR);
+}
+
+static void rockchip_sfc_irq_mask(struct rockchip_sfc *sfc, u32 mask)
+{
+ u32 reg;
+
+ /* Disable transfer finish interrupt */
+ reg = readl(sfc->regbase + SFC_IMR);
+ reg |= mask;
+ writel(reg, sfc->regbase + SFC_IMR);
+}
+
+static int rockchip_sfc_init(struct rockchip_sfc *sfc)
+{
+ writel(0, sfc->regbase + SFC_CTRL);
+ writel(0xFFFFFFFF, sfc->regbase + SFC_ICLR);
+ rockchip_sfc_irq_mask(sfc, 0xFFFFFFFF);
+ if (rockchip_sfc_get_version(sfc) >= SFC_VER_4)
+ writel(SFC_LEN_CTRL_TRB_SEL, sfc->regbase + SFC_LEN_CTRL);
+
+ return 0;
+}
+
+static int rockchip_sfc_wait_txfifo_ready(struct rockchip_sfc *sfc, u32 timeout_us)
+{
+ int ret = 0;
+ u32 status;
+
+ ret = readl_poll_timeout(sfc->regbase + SFC_FSR, status,
+ status & SFC_FSR_TXLV_MASK, 0,
+ timeout_us);
+ if (ret) {
+ dev_dbg(sfc->dev, "sfc wait tx fifo timeout\n");
+
+ return -ETIMEDOUT;
+ }
+
+ return (status & SFC_FSR_TXLV_MASK) >> SFC_FSR_TXLV_SHIFT;
+}
+
+static int rockchip_sfc_wait_rxfifo_ready(struct rockchip_sfc *sfc, u32 timeout_us)
+{
+ int ret = 0;
+ u32 status;
+
+ ret = readl_poll_timeout(sfc->regbase + SFC_FSR, status,
+ status & SFC_FSR_RXLV_MASK, 0,
+ timeout_us);
+ if (ret) {
+ dev_dbg(sfc->dev, "sfc wait rx fifo timeout\n");
+
+ return -ETIMEDOUT;
+ }
+
+ return (status & SFC_FSR_RXLV_MASK) >> SFC_FSR_RXLV_SHIFT;
+}
+
+static void rockchip_sfc_adjust_op_work(struct spi_mem_op *op)
+{
+ if (unlikely(op->dummy.nbytes && !op->addr.nbytes)) {
+ /*
+ * SFC not support output DUMMY cycles right after CMD cycles, so
+ * treat it as ADDR cycles.
+ */
+ op->addr.nbytes = op->dummy.nbytes;
+ op->addr.buswidth = op->dummy.buswidth;
+ op->addr.val = 0xFFFFFFFFF;
+
+ op->dummy.nbytes = 0;
+ }
+}
+
+static int rockchip_sfc_xfer_setup(struct rockchip_sfc *sfc,
+ struct spi_mem *mem,
+ const struct spi_mem_op *op,
+ u32 len)
+{
+ u32 ctrl = 0, cmd = 0;
+
+ /* set CMD */
+ cmd = op->cmd.opcode;
+ ctrl |= ((op->cmd.buswidth >> 1) << SFC_CTRL_CMD_BITS_SHIFT);
+
+ /* set ADDR */
+ if (op->addr.nbytes) {
+ if (op->addr.nbytes == 4) {
+ cmd |= SFC_CMD_ADDR_32BITS << SFC_CMD_ADDR_SHIFT;
+ } else if (op->addr.nbytes == 3) {
+ cmd |= SFC_CMD_ADDR_24BITS << SFC_CMD_ADDR_SHIFT;
+ } else {
+ cmd |= SFC_CMD_ADDR_XBITS << SFC_CMD_ADDR_SHIFT;
+ writel(op->addr.nbytes * 8 - 1, sfc->regbase + SFC_ABIT);
+ }
+
+ ctrl |= ((op->addr.buswidth >> 1) << SFC_CTRL_ADDR_BITS_SHIFT);
+ }
+
+ /* set DUMMY */
+ if (op->dummy.nbytes) {
+ if (op->dummy.buswidth == 4)
+ cmd |= op->dummy.nbytes * 2 << SFC_CMD_DUMMY_SHIFT;
+ else if (op->dummy.buswidth == 2)
+ cmd |= op->dummy.nbytes * 4 << SFC_CMD_DUMMY_SHIFT;
+ else
+ cmd |= op->dummy.nbytes * 8 << SFC_CMD_DUMMY_SHIFT;
+ }
+
+ /* set DATA */
+ if (sfc->version >= SFC_VER_4) /* Clear it if no data to transfer */
+ writel(len, sfc->regbase + SFC_LEN_EXT);
+ else
+ cmd |= len << SFC_CMD_TRAN_BYTES_SHIFT;
+ if (len) {
+ if (op->data.dir == SPI_MEM_DATA_OUT)
+ cmd |= SFC_CMD_DIR_WR << SFC_CMD_DIR_SHIFT;
+
+ ctrl |= ((op->data.buswidth >> 1) << SFC_CTRL_DATA_BITS_SHIFT);
+ }
+ if (!len && op->addr.nbytes)
+ cmd |= SFC_CMD_DIR_WR << SFC_CMD_DIR_SHIFT;
+
+ /* set the Controller */
+ ctrl |= SFC_CTRL_PHASE_SEL_NEGETIVE;
+ cmd |= mem->spi->chip_select << SFC_CMD_CS_SHIFT;
+
+ dev_dbg(sfc->dev, "sfc addr.nbytes=%x(x%d) dummy.nbytes=%x(x%d)\n",
+ op->addr.nbytes, op->addr.buswidth,
+ op->dummy.nbytes, op->dummy.buswidth);
+ dev_dbg(sfc->dev, "sfc ctrl=%x cmd=%x addr=%llx len=%x\n",
+ ctrl, cmd, op->addr.val, len);
+
+ writel(ctrl, sfc->regbase + SFC_CTRL);
+ writel(cmd, sfc->regbase + SFC_CMD);
+ if (op->addr.nbytes)
+ writel(op->addr.val, sfc->regbase + SFC_ADDR);
+
+ return 0;
+}
+
+static int rockchip_sfc_write_fifo(struct rockchip_sfc *sfc, const u8 *buf, int len)
+{
+ u8 bytes = len & 0x3;
+ u32 dwords;
+ int tx_level;
+ u32 write_words;
+ u32 tmp = 0;
+
+ dwords = len >> 2;
+ while (dwords) {
+ tx_level = rockchip_sfc_wait_txfifo_ready(sfc, 1000);
+ if (tx_level < 0)
+ return tx_level;
+ write_words = min_t(u32, tx_level, dwords);
+ iowrite32_rep(sfc->regbase + SFC_DATA, buf, write_words);
+ buf += write_words << 2;
+ dwords -= write_words;
+ }
+
+ /* write the rest non word aligned bytes */
+ if (bytes) {
+ tx_level = rockchip_sfc_wait_txfifo_ready(sfc, 1000);
+ if (tx_level < 0)
+ return tx_level;
+ memcpy(&tmp, buf, bytes);
+ writel(tmp, sfc->regbase + SFC_DATA);
+ }
+
+ return len;
+}
+
+static int rockchip_sfc_read_fifo(struct rockchip_sfc *sfc, u8 *buf, int len)
+{
+ u8 bytes = len & 0x3;
+ u32 dwords;
+ u8 read_words;
+ int rx_level;
+ int tmp;
+
+ /* word aligned access only */
+ dwords = len >> 2;
+ while (dwords) {
+ rx_level = rockchip_sfc_wait_rxfifo_ready(sfc, 1000);
+ if (rx_level < 0)
+ return rx_level;
+ read_words = min_t(u32, rx_level, dwords);
+ ioread32_rep(sfc->regbase + SFC_DATA, buf, read_words);
+ buf += read_words << 2;
+ dwords -= read_words;
+ }
+
+ /* read the rest non word aligned bytes */
+ if (bytes) {
+ rx_level = rockchip_sfc_wait_rxfifo_ready(sfc, 1000);
+ if (rx_level < 0)
+ return rx_level;
+ tmp = readl(sfc->regbase + SFC_DATA);
+ memcpy(buf, &tmp, bytes);
+ }
+
+ return len;
+}
+
+static int rockchip_sfc_fifo_transfer_dma(struct rockchip_sfc *sfc, dma_addr_t dma_buf, size_t len)
+{
+ writel(0xFFFFFFFF, sfc->regbase + SFC_ICLR);
+ writel((u32)dma_buf, sfc->regbase + SFC_DMA_ADDR);
+ writel(SFC_DMA_TRIGGER_START, sfc->regbase + SFC_DMA_TRIGGER);
+
+ return len;
+}
+
+static int rockchip_sfc_xfer_data_poll(struct rockchip_sfc *sfc,
+ const struct spi_mem_op *op, u32 len)
+{
+ dev_dbg(sfc->dev, "sfc xfer_poll len=%x\n", len);
+
+ if (op->data.dir == SPI_MEM_DATA_OUT)
+ return rockchip_sfc_write_fifo(sfc, op->data.buf.out, len);
+ else
+ return rockchip_sfc_read_fifo(sfc, op->data.buf.in, len);
+}
+
+static int rockchip_sfc_xfer_data_dma(struct rockchip_sfc *sfc,
+ const struct spi_mem_op *op, u32 len)
+{
+ int ret;
+
+ dev_dbg(sfc->dev, "sfc xfer_dma len=%x\n", len);
+
+ if (op->data.dir == SPI_MEM_DATA_OUT)
+ memcpy(sfc->buffer, op->data.buf.out, len);
+
+ ret = rockchip_sfc_fifo_transfer_dma(sfc, sfc->dma_buffer, len);
+ if (!wait_for_completion_timeout(&sfc->cp, msecs_to_jiffies(2000))) {
+ dev_err(sfc->dev, "DMA wait for transfer finish timeout\n");
+ ret = -ETIMEDOUT;
+ }
+ rockchip_sfc_irq_mask(sfc, SFC_IMR_DMA);
+ if (op->data.dir == SPI_MEM_DATA_IN)
+ memcpy(op->data.buf.in, sfc->buffer, len);
+
+ return ret;
+}
+
+static int rockchip_sfc_xfer_done(struct rockchip_sfc *sfc, u32 timeout_us)
+{
+ int ret = 0;
+ u32 status;
+
+ ret = readl_poll_timeout(sfc->regbase + SFC_SR, status,
+ !(status & SFC_SR_IS_BUSY),
+ 20, timeout_us);
+ if (ret) {
+ dev_err(sfc->dev, "wait sfc idle timeout\n");
+ rockchip_sfc_reset(sfc);
+
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
+static int rockchip_sfc_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+ struct rockchip_sfc *sfc = spi_master_get_devdata(mem->spi->master);
+ u32 len = op->data.nbytes;
+ int ret;
+
+ if (unlikely(mem->spi->max_speed_hz != sfc->frequency)) {
+ ret = clk_set_rate(sfc->clk, mem->spi->max_speed_hz);
+ if (ret)
+ return ret;
+ sfc->frequency = mem->spi->max_speed_hz;
+ dev_dbg(sfc->dev, "set_freq=%dHz real_freq=%ldHz\n",
+ sfc->frequency, clk_get_rate(sfc->clk));
+ }
+
+ rockchip_sfc_adjust_op_work((struct spi_mem_op *)op);
+ rockchip_sfc_xfer_setup(sfc, mem, op, len);
+ if (len) {
+ if (likely(sfc->use_dma) && len >= SFC_DMA_TRANS_THRETHOLD) {
+ init_completion(&sfc->cp);
+ rockchip_sfc_irq_unmask(sfc, SFC_IMR_DMA);
+ ret = rockchip_sfc_xfer_data_dma(sfc, op, len);
+ } else {
+ ret = rockchip_sfc_xfer_data_poll(sfc, op, len);
+ }
+
+ if (ret != len) {
+ dev_err(sfc->dev, "xfer data failed ret %d dir %d\n", ret, op->data.dir);
+
+ return -EIO;
+ }
+ }
+
+ return rockchip_sfc_xfer_done(sfc, 100000);
+}
+
+static int rockchip_sfc_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
+{
+ struct rockchip_sfc *sfc = spi_master_get_devdata(mem->spi->master);
+
+ op->data.nbytes = min(op->data.nbytes, sfc->max_iosize);
+
+ return 0;
+}
+
+static const struct spi_controller_mem_ops rockchip_sfc_mem_ops = {
+ .exec_op = rockchip_sfc_exec_mem_op,
+ .adjust_op_size = rockchip_sfc_adjust_op_size,
+};
+
+static irqreturn_t rockchip_sfc_irq_handler(int irq, void *dev_id)
+{
+ struct rockchip_sfc *sfc = dev_id;
+ u32 reg;
+
+ reg = readl(sfc->regbase + SFC_RISR);
+
+ /* Clear interrupt */
+ writel_relaxed(reg, sfc->regbase + SFC_ICLR);
+
+ if (reg & SFC_RISR_DMA) {
+ complete(&sfc->cp);
+
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+}
+
+static int rockchip_sfc_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct spi_master *master;
+ struct resource *res;
+ struct rockchip_sfc *sfc;
+ int ret;
+
+ master = devm_spi_alloc_master(&pdev->dev, sizeof(*sfc));
+ if (!master)
+ return -ENOMEM;
+
+ master->flags = SPI_MASTER_HALF_DUPLEX;
+ master->mem_ops = &rockchip_sfc_mem_ops;
+ master->dev.of_node = pdev->dev.of_node;
+ master->mode_bits = SPI_TX_QUAD | SPI_TX_DUAL | SPI_RX_QUAD | SPI_RX_DUAL;
+ master->max_speed_hz = SFC_MAX_SPEED;
+ master->num_chipselect = SFC_MAX_CHIPSELECT_NUM;
+
+ sfc = spi_master_get_devdata(master);
+ sfc->dev = dev;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ sfc->regbase = devm_ioremap_resource(dev, res);
+ if (IS_ERR(sfc->regbase))
+ return PTR_ERR(sfc->regbase);
+
+ sfc->clk = devm_clk_get(&pdev->dev, "clk_sfc");
+ if (IS_ERR(sfc->clk)) {
+ dev_err(&pdev->dev, "Failed to get sfc interface clk\n");
+ return PTR_ERR(sfc->clk);
+ }
+
+ sfc->hclk = devm_clk_get(&pdev->dev, "hclk_sfc");
+ if (IS_ERR(sfc->hclk)) {
+ dev_err(&pdev->dev, "Failed to get sfc ahb clk\n");
+ return PTR_ERR(sfc->hclk);
+ }
+
+ sfc->use_dma = !of_property_read_bool(sfc->dev->of_node,
+ "rockchip,sfc-no-dma");
+
+ if (sfc->use_dma) {
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+ if (ret) {
+ dev_warn(dev, "Unable to set dma mask\n");
+ return ret;
+ }
+
+ sfc->buffer = dmam_alloc_coherent(dev, SFC_MAX_IOSIZE_VER3,
+ &sfc->dma_buffer,
+ GFP_KERNEL);
+ if (!sfc->buffer)
+ return -ENOMEM;
+ }
+
+ ret = clk_prepare_enable(sfc->hclk);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to enable ahb clk\n");
+ goto err_hclk;
+ }
+
+ ret = clk_prepare_enable(sfc->clk);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to enable interface clk\n");
+ goto err_clk;
+ }
+
+ /* Find the irq */
+ ret = platform_get_irq(pdev, 0);
+ if (ret < 0) {
+ dev_err(dev, "Failed to get the irq\n");
+ goto err_irq;
+ }
+
+ ret = devm_request_irq(dev, ret, rockchip_sfc_irq_handler,
+ 0, pdev->name, sfc);
+ if (ret) {
+ dev_err(dev, "Failed to request irq\n");
+
+ return ret;
+ }
+
+ ret = rockchip_sfc_init(sfc);
+ if (ret)
+ goto err_irq;
+
+ sfc->max_iosize = rockchip_sfc_get_max_iosize(sfc);
+ sfc->version = rockchip_sfc_get_version(sfc);
+
+ ret = spi_register_master(master);
+ if (ret)
+ goto err_irq;
+
+ return 0;
+
+err_irq:
+ clk_disable_unprepare(sfc->clk);
+err_clk:
+ clk_disable_unprepare(sfc->hclk);
+err_hclk:
+ return ret;
+}
+
+static int rockchip_sfc_remove(struct platform_device *pdev)
+{
+ struct spi_master *master = platform_get_drvdata(pdev);
+ struct rockchip_sfc *sfc = platform_get_drvdata(pdev);
+
+ spi_unregister_master(master);
+
+ clk_disable_unprepare(sfc->clk);
+ clk_disable_unprepare(sfc->hclk);
+
+ return 0;
+}
+
+static const struct of_device_id rockchip_sfc_dt_ids[] = {
+ { .compatible = "rockchip,sfc"},
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, rockchip_sfc_dt_ids);
+
+static struct platform_driver rockchip_sfc_driver = {
+ .driver = {
+ .name = "rockchip-sfc",
+ .of_match_table = rockchip_sfc_dt_ids,
+ },
+ .probe = rockchip_sfc_probe,
+ .remove = rockchip_sfc_remove,
+};
+module_platform_driver(rockchip_sfc_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Rockchip Serial Flash Controller Driver");
+MODULE_AUTHOR("Shawn Lin <shawn.lin@rock-chips.com>");
+MODULE_AUTHOR("Chris Morgan <macromorgan@hotmail.com>");
+MODULE_AUTHOR("Jon Lin <Jon.lin@rock-chips.com>");
diff --git a/drivers/spi/spi-sprd-adi.c b/drivers/spi/spi-sprd-adi.c
index ab19068be867..1edbf44c05a7 100644
--- a/drivers/spi/spi-sprd-adi.c
+++ b/drivers/spi/spi-sprd-adi.c
@@ -52,10 +52,20 @@
/*
* ADI slave devices include RTC, ADC, regulator, charger, thermal and so on.
- * The slave devices address offset is always 0x8000 and size is 4K.
+ * ADI supports 12/14bit address for r2p0, and additional 17bit for r3p0 or
+ * later versions. Since bit[1:0] are zero, so the spec describe them as
+ * 10/12/15bit address mode.
+ * The 10bit mode supports sigle slave, 12/15bit mode supports 3 slave, the
+ * high two bits is slave_id.
+ * The slave devices address offset is 0x8000 for 10/12bit address mode,
+ * and 0x20000 for 15bit mode.
*/
-#define ADI_SLAVE_ADDR_SIZE SZ_4K
-#define ADI_SLAVE_OFFSET 0x8000
+#define ADI_10BIT_SLAVE_ADDR_SIZE SZ_4K
+#define ADI_10BIT_SLAVE_OFFSET 0x8000
+#define ADI_12BIT_SLAVE_ADDR_SIZE SZ_16K
+#define ADI_12BIT_SLAVE_OFFSET 0x8000
+#define ADI_15BIT_SLAVE_ADDR_SIZE SZ_128K
+#define ADI_15BIT_SLAVE_OFFSET 0x20000
/* Timeout (ms) for the trylock of hardware spinlocks */
#define ADI_HWSPINLOCK_TIMEOUT 5000
@@ -67,24 +77,35 @@
#define ADI_FIFO_DRAIN_TIMEOUT 1000
#define ADI_READ_TIMEOUT 2000
-#define REG_ADDR_LOW_MASK GENMASK(11, 0)
+
+/*
+ * Read back address from REG_ADI_RD_DATA bit[30:16] which maps to:
+ * REG_ADI_RD_CMD bit[14:0] for r2p0
+ * REG_ADI_RD_CMD bit[16:2] for r3p0
+ */
+#define RDBACK_ADDR_MASK_R2 GENMASK(14, 0)
+#define RDBACK_ADDR_MASK_R3 GENMASK(16, 2)
+#define RDBACK_ADDR_SHIFT_R3 2
/* Registers definitions for PMIC watchdog controller */
-#define REG_WDG_LOAD_LOW 0x80
-#define REG_WDG_LOAD_HIGH 0x84
-#define REG_WDG_CTRL 0x88
-#define REG_WDG_LOCK 0xa0
+#define REG_WDG_LOAD_LOW 0x0
+#define REG_WDG_LOAD_HIGH 0x4
+#define REG_WDG_CTRL 0x8
+#define REG_WDG_LOCK 0x20
/* Bits definitions for register REG_WDG_CTRL */
#define BIT_WDG_RUN BIT(1)
#define BIT_WDG_NEW BIT(2)
#define BIT_WDG_RST BIT(3)
+/* Bits definitions for register REG_MODULE_EN */
+#define BIT_WDG_EN BIT(2)
+
/* Registers definitions for PMIC */
#define PMIC_RST_STATUS 0xee8
#define PMIC_MODULE_EN 0xc08
#define PMIC_CLK_EN 0xc18
-#define BIT_WDG_EN BIT(2)
+#define PMIC_WDG_BASE 0x80
/* Definition of PMIC reset status register */
#define HWRST_STATUS_SECURITY 0x02
@@ -103,10 +124,26 @@
#define HWRST_STATUS_WATCHDOG 0xf0
/* Use default timeout 50 ms that converts to watchdog values */
-#define WDG_LOAD_VAL ((50 * 1000) / 32768)
+#define WDG_LOAD_VAL ((50 * 32768) / 1000)
#define WDG_LOAD_MASK GENMASK(15, 0)
#define WDG_UNLOCK_KEY 0xe551
+struct sprd_adi_wdg {
+ u32 base;
+ u32 rst_sts;
+ u32 wdg_en;
+ u32 wdg_clk;
+};
+
+struct sprd_adi_data {
+ u32 slave_offset;
+ u32 slave_addr_size;
+ int (*read_check)(u32 val, u32 reg);
+ int (*restart)(struct notifier_block *this,
+ unsigned long mode, void *cmd);
+ void (*wdg_rst)(void *p);
+};
+
struct sprd_adi {
struct spi_controller *ctlr;
struct device *dev;
@@ -115,26 +152,21 @@ struct sprd_adi {
unsigned long slave_vbase;
unsigned long slave_pbase;
struct notifier_block restart_handler;
+ const struct sprd_adi_data *data;
};
-static int sprd_adi_check_paddr(struct sprd_adi *sadi, u32 paddr)
+static int sprd_adi_check_addr(struct sprd_adi *sadi, u32 reg)
{
- if (paddr < sadi->slave_pbase || paddr >
- (sadi->slave_pbase + ADI_SLAVE_ADDR_SIZE)) {
+ if (reg >= sadi->data->slave_addr_size) {
dev_err(sadi->dev,
- "slave physical address is incorrect, addr = 0x%x\n",
- paddr);
+ "slave address offset is incorrect, reg = 0x%x\n",
+ reg);
return -EINVAL;
}
return 0;
}
-static unsigned long sprd_adi_to_vaddr(struct sprd_adi *sadi, u32 paddr)
-{
- return (paddr - sadi->slave_pbase + sadi->slave_vbase);
-}
-
static int sprd_adi_drain_fifo(struct sprd_adi *sadi)
{
u32 timeout = ADI_FIFO_DRAIN_TIMEOUT;
@@ -161,11 +193,35 @@ static int sprd_adi_fifo_is_full(struct sprd_adi *sadi)
return readl_relaxed(sadi->base + REG_ADI_ARM_FIFO_STS) & BIT_FIFO_FULL;
}
-static int sprd_adi_read(struct sprd_adi *sadi, u32 reg_paddr, u32 *read_val)
+static int sprd_adi_read_check(u32 val, u32 addr)
+{
+ u32 rd_addr;
+
+ rd_addr = (val & RD_ADDR_MASK) >> RD_ADDR_SHIFT;
+
+ if (rd_addr != addr) {
+ pr_err("ADI read error, addr = 0x%x, val = 0x%x\n", addr, val);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int sprd_adi_read_check_r2(u32 val, u32 reg)
+{
+ return sprd_adi_read_check(val, reg & RDBACK_ADDR_MASK_R2);
+}
+
+static int sprd_adi_read_check_r3(u32 val, u32 reg)
+{
+ return sprd_adi_read_check(val, (reg & RDBACK_ADDR_MASK_R3) >> RDBACK_ADDR_SHIFT_R3);
+}
+
+static int sprd_adi_read(struct sprd_adi *sadi, u32 reg, u32 *read_val)
{
int read_timeout = ADI_READ_TIMEOUT;
unsigned long flags;
- u32 val, rd_addr;
+ u32 val;
int ret = 0;
if (sadi->hwlock) {
@@ -178,11 +234,15 @@ static int sprd_adi_read(struct sprd_adi *sadi, u32 reg_paddr, u32 *read_val)
}
}
+ ret = sprd_adi_check_addr(sadi, reg);
+ if (ret)
+ goto out;
+
/*
- * Set the physical register address need to read into RD_CMD register,
+ * Set the slave address offset need to read into RD_CMD register,
* then ADI controller will start to transfer automatically.
*/
- writel_relaxed(reg_paddr, sadi->base + REG_ADI_RD_CMD);
+ writel_relaxed(reg, sadi->base + REG_ADI_RD_CMD);
/*
* Wait read operation complete, the BIT_RD_CMD_BUSY will be set
@@ -205,18 +265,15 @@ static int sprd_adi_read(struct sprd_adi *sadi, u32 reg_paddr, u32 *read_val)
}
/*
- * The return value includes data and read register address, from bit 0
- * to bit 15 are data, and from bit 16 to bit 30 are read register
- * address. Then we can check the returned register address to validate
- * data.
+ * The return value before adi r5p0 includes data and read register
+ * address, from bit 0to bit 15 are data, and from bit 16 to bit 30
+ * are read register address. Then we can check the returned register
+ * address to validate data.
*/
- rd_addr = (val & RD_ADDR_MASK) >> RD_ADDR_SHIFT;
-
- if (rd_addr != (reg_paddr & REG_ADDR_LOW_MASK)) {
- dev_err(sadi->dev, "read error, reg addr = 0x%x, val = 0x%x\n",
- reg_paddr, val);
- ret = -EIO;
- goto out;
+ if (sadi->data->read_check) {
+ ret = sadi->data->read_check(val, reg);
+ if (ret < 0)
+ goto out;
}
*read_val = val & RD_VALUE_MASK;
@@ -227,9 +284,8 @@ out:
return ret;
}
-static int sprd_adi_write(struct sprd_adi *sadi, u32 reg_paddr, u32 val)
+static int sprd_adi_write(struct sprd_adi *sadi, u32 reg, u32 val)
{
- unsigned long reg = sprd_adi_to_vaddr(sadi, reg_paddr);
u32 timeout = ADI_FIFO_DRAIN_TIMEOUT;
unsigned long flags;
int ret;
@@ -244,6 +300,10 @@ static int sprd_adi_write(struct sprd_adi *sadi, u32 reg_paddr, u32 val)
}
}
+ ret = sprd_adi_check_addr(sadi, reg);
+ if (ret)
+ goto out;
+
ret = sprd_adi_drain_fifo(sadi);
if (ret < 0)
goto out;
@@ -254,7 +314,8 @@ static int sprd_adi_write(struct sprd_adi *sadi, u32 reg_paddr, u32 val)
*/
do {
if (!sprd_adi_fifo_is_full(sadi)) {
- writel_relaxed(val, (void __iomem *)reg);
+ /* we need virtual register address to write. */
+ writel_relaxed(val, (void __iomem *)(sadi->slave_vbase + reg));
break;
}
@@ -277,60 +338,41 @@ static int sprd_adi_transfer_one(struct spi_controller *ctlr,
struct spi_transfer *t)
{
struct sprd_adi *sadi = spi_controller_get_devdata(ctlr);
- u32 phy_reg, val;
+ u32 reg, val;
int ret;
if (t->rx_buf) {
- phy_reg = *(u32 *)t->rx_buf + sadi->slave_pbase;
-
- ret = sprd_adi_check_paddr(sadi, phy_reg);
- if (ret)
- return ret;
-
- ret = sprd_adi_read(sadi, phy_reg, &val);
- if (ret)
- return ret;
-
+ reg = *(u32 *)t->rx_buf;
+ ret = sprd_adi_read(sadi, reg, &val);
*(u32 *)t->rx_buf = val;
} else if (t->tx_buf) {
u32 *p = (u32 *)t->tx_buf;
-
- /*
- * Get the physical register address need to write and convert
- * the physical address to virtual address. Since we need
- * virtual register address to write.
- */
- phy_reg = *p++ + sadi->slave_pbase;
- ret = sprd_adi_check_paddr(sadi, phy_reg);
- if (ret)
- return ret;
-
+ reg = *p++;
val = *p;
- ret = sprd_adi_write(sadi, phy_reg, val);
- if (ret)
- return ret;
+ ret = sprd_adi_write(sadi, reg, val);
} else {
dev_err(sadi->dev, "no buffer for transfer\n");
- return -EINVAL;
+ ret = -EINVAL;
}
- return 0;
+ return ret;
}
-static void sprd_adi_set_wdt_rst_mode(struct sprd_adi *sadi)
+static void sprd_adi_set_wdt_rst_mode(void *p)
{
#if IS_ENABLED(CONFIG_SPRD_WATCHDOG)
u32 val;
+ struct sprd_adi *sadi = (struct sprd_adi *)p;
- /* Set default watchdog reboot mode */
- sprd_adi_read(sadi, sadi->slave_pbase + PMIC_RST_STATUS, &val);
+ /* Init watchdog reset mode */
+ sprd_adi_read(sadi, PMIC_RST_STATUS, &val);
val |= HWRST_STATUS_WATCHDOG;
- sprd_adi_write(sadi, sadi->slave_pbase + PMIC_RST_STATUS, val);
+ sprd_adi_write(sadi, PMIC_RST_STATUS, val);
#endif
}
-static int sprd_adi_restart_handler(struct notifier_block *this,
- unsigned long mode, void *cmd)
+static int sprd_adi_restart(struct notifier_block *this, unsigned long mode,
+ void *cmd, struct sprd_adi_wdg *wdg)
{
struct sprd_adi *sadi = container_of(this, struct sprd_adi,
restart_handler);
@@ -366,40 +408,40 @@ static int sprd_adi_restart_handler(struct notifier_block *this,
reboot_mode = HWRST_STATUS_NORMAL;
/* Record the reboot mode */
- sprd_adi_read(sadi, sadi->slave_pbase + PMIC_RST_STATUS, &val);
+ sprd_adi_read(sadi, wdg->rst_sts, &val);
val &= ~HWRST_STATUS_WATCHDOG;
val |= reboot_mode;
- sprd_adi_write(sadi, sadi->slave_pbase + PMIC_RST_STATUS, val);
+ sprd_adi_write(sadi, wdg->rst_sts, val);
/* Enable the interface clock of the watchdog */
- sprd_adi_read(sadi, sadi->slave_pbase + PMIC_MODULE_EN, &val);
+ sprd_adi_read(sadi, wdg->wdg_en, &val);
val |= BIT_WDG_EN;
- sprd_adi_write(sadi, sadi->slave_pbase + PMIC_MODULE_EN, val);
+ sprd_adi_write(sadi, wdg->wdg_en, val);
/* Enable the work clock of the watchdog */
- sprd_adi_read(sadi, sadi->slave_pbase + PMIC_CLK_EN, &val);
+ sprd_adi_read(sadi, wdg->wdg_clk, &val);
val |= BIT_WDG_EN;
- sprd_adi_write(sadi, sadi->slave_pbase + PMIC_CLK_EN, val);
+ sprd_adi_write(sadi, wdg->wdg_clk, val);
/* Unlock the watchdog */
- sprd_adi_write(sadi, sadi->slave_pbase + REG_WDG_LOCK, WDG_UNLOCK_KEY);
+ sprd_adi_write(sadi, wdg->base + REG_WDG_LOCK, WDG_UNLOCK_KEY);
- sprd_adi_read(sadi, sadi->slave_pbase + REG_WDG_CTRL, &val);
+ sprd_adi_read(sadi, wdg->base + REG_WDG_CTRL, &val);
val |= BIT_WDG_NEW;
- sprd_adi_write(sadi, sadi->slave_pbase + REG_WDG_CTRL, val);
+ sprd_adi_write(sadi, wdg->base + REG_WDG_CTRL, val);
/* Load the watchdog timeout value, 50ms is always enough. */
- sprd_adi_write(sadi, sadi->slave_pbase + REG_WDG_LOAD_HIGH, 0);
- sprd_adi_write(sadi, sadi->slave_pbase + REG_WDG_LOAD_LOW,
+ sprd_adi_write(sadi, wdg->base + REG_WDG_LOAD_HIGH, 0);
+ sprd_adi_write(sadi, wdg->base + REG_WDG_LOAD_LOW,
WDG_LOAD_VAL & WDG_LOAD_MASK);
/* Start the watchdog to reset system */
- sprd_adi_read(sadi, sadi->slave_pbase + REG_WDG_CTRL, &val);
+ sprd_adi_read(sadi, wdg->base + REG_WDG_CTRL, &val);
val |= BIT_WDG_RUN | BIT_WDG_RST;
- sprd_adi_write(sadi, sadi->slave_pbase + REG_WDG_CTRL, val);
+ sprd_adi_write(sadi, wdg->base + REG_WDG_CTRL, val);
/* Lock the watchdog */
- sprd_adi_write(sadi, sadi->slave_pbase + REG_WDG_LOCK, ~WDG_UNLOCK_KEY);
+ sprd_adi_write(sadi, wdg->base + REG_WDG_LOCK, ~WDG_UNLOCK_KEY);
mdelay(1000);
@@ -407,6 +449,19 @@ static int sprd_adi_restart_handler(struct notifier_block *this,
return NOTIFY_DONE;
}
+static int sprd_adi_restart_sc9860(struct notifier_block *this,
+ unsigned long mode, void *cmd)
+{
+ struct sprd_adi_wdg wdg = {
+ .base = PMIC_WDG_BASE,
+ .rst_sts = PMIC_RST_STATUS,
+ .wdg_en = PMIC_MODULE_EN,
+ .wdg_clk = PMIC_CLK_EN,
+ };
+
+ return sprd_adi_restart(this, mode, cmd, &wdg);
+}
+
static void sprd_adi_hw_init(struct sprd_adi *sadi)
{
struct device_node *np = sadi->dev->of_node;
@@ -458,10 +513,11 @@ static void sprd_adi_hw_init(struct sprd_adi *sadi)
static int sprd_adi_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
+ const struct sprd_adi_data *data;
struct spi_controller *ctlr;
struct sprd_adi *sadi;
struct resource *res;
- u32 num_chipselect;
+ u16 num_chipselect;
int ret;
if (!np) {
@@ -469,6 +525,12 @@ static int sprd_adi_probe(struct platform_device *pdev)
return -ENODEV;
}
+ data = of_device_get_match_data(&pdev->dev);
+ if (!data) {
+ dev_err(&pdev->dev, "no matching driver data found\n");
+ return -EINVAL;
+ }
+
pdev->id = of_alias_get_id(np, "spi");
num_chipselect = of_get_child_count(np);
@@ -486,10 +548,12 @@ static int sprd_adi_probe(struct platform_device *pdev)
goto put_ctlr;
}
- sadi->slave_vbase = (unsigned long)sadi->base + ADI_SLAVE_OFFSET;
- sadi->slave_pbase = res->start + ADI_SLAVE_OFFSET;
+ sadi->slave_vbase = (unsigned long)sadi->base +
+ data->slave_offset;
+ sadi->slave_pbase = res->start + data->slave_offset;
sadi->ctlr = ctlr;
sadi->dev = &pdev->dev;
+ sadi->data = data;
ret = of_hwspin_lock_get_id(np, 0);
if (ret > 0 || (IS_ENABLED(CONFIG_HWSPINLOCK) && ret == 0)) {
sadi->hwlock =
@@ -510,7 +574,9 @@ static int sprd_adi_probe(struct platform_device *pdev)
}
sprd_adi_hw_init(sadi);
- sprd_adi_set_wdt_rst_mode(sadi);
+
+ if (sadi->data->wdg_rst)
+ sadi->data->wdg_rst(sadi);
ctlr->dev.of_node = pdev->dev.of_node;
ctlr->bus_num = pdev->id;
@@ -525,12 +591,14 @@ static int sprd_adi_probe(struct platform_device *pdev)
goto put_ctlr;
}
- sadi->restart_handler.notifier_call = sprd_adi_restart_handler;
- sadi->restart_handler.priority = 128;
- ret = register_restart_handler(&sadi->restart_handler);
- if (ret) {
- dev_err(&pdev->dev, "can not register restart handler\n");
- goto put_ctlr;
+ if (sadi->data->restart) {
+ sadi->restart_handler.notifier_call = sadi->data->restart;
+ sadi->restart_handler.priority = 128;
+ ret = register_restart_handler(&sadi->restart_handler);
+ if (ret) {
+ dev_err(&pdev->dev, "can not register restart handler\n");
+ goto put_ctlr;
+ }
}
return 0;
@@ -549,9 +617,38 @@ static int sprd_adi_remove(struct platform_device *pdev)
return 0;
}
+static struct sprd_adi_data sc9860_data = {
+ .slave_offset = ADI_10BIT_SLAVE_OFFSET,
+ .slave_addr_size = ADI_10BIT_SLAVE_ADDR_SIZE,
+ .read_check = sprd_adi_read_check_r2,
+ .restart = sprd_adi_restart_sc9860,
+ .wdg_rst = sprd_adi_set_wdt_rst_mode,
+};
+
+static struct sprd_adi_data sc9863_data = {
+ .slave_offset = ADI_12BIT_SLAVE_OFFSET,
+ .slave_addr_size = ADI_12BIT_SLAVE_ADDR_SIZE,
+ .read_check = sprd_adi_read_check_r3,
+};
+
+static struct sprd_adi_data ums512_data = {
+ .slave_offset = ADI_15BIT_SLAVE_OFFSET,
+ .slave_addr_size = ADI_15BIT_SLAVE_ADDR_SIZE,
+ .read_check = sprd_adi_read_check_r3,
+};
+
static const struct of_device_id sprd_adi_of_match[] = {
{
.compatible = "sprd,sc9860-adi",
+ .data = &sc9860_data,
+ },
+ {
+ .compatible = "sprd,sc9863-adi",
+ .data = &sc9863_data,
+ },
+ {
+ .compatible = "sprd,ums512-adi",
+ .data = &ums512_data,
},
{ },
};
diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 05618a618939..9bd3fd1652f7 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -162,6 +162,8 @@
#define SPI_3WIRE_TX 3
#define SPI_3WIRE_RX 4
+#define STM32_SPI_AUTOSUSPEND_DELAY 1 /* 1 ms */
+
/*
* use PIO for small transfers, avoiding DMA setup/teardown overhead for drivers
* without fifo buffers.
@@ -568,29 +570,30 @@ static void stm32f4_spi_read_rx(struct stm32_spi *spi)
/**
* stm32h7_spi_read_rxfifo - Read bytes in Receive Data Register
* @spi: pointer to the spi controller data structure
- * @flush: boolean indicating that FIFO should be flushed
*
* Write in rx_buf depends on remaining bytes to avoid to write beyond
* rx_buf end.
*/
-static void stm32h7_spi_read_rxfifo(struct stm32_spi *spi, bool flush)
+static void stm32h7_spi_read_rxfifo(struct stm32_spi *spi)
{
u32 sr = readl_relaxed(spi->base + STM32H7_SPI_SR);
u32 rxplvl = FIELD_GET(STM32H7_SPI_SR_RXPLVL, sr);
while ((spi->rx_len > 0) &&
((sr & STM32H7_SPI_SR_RXP) ||
- (flush && ((sr & STM32H7_SPI_SR_RXWNE) || (rxplvl > 0))))) {
+ ((sr & STM32H7_SPI_SR_EOT) &&
+ ((sr & STM32H7_SPI_SR_RXWNE) || (rxplvl > 0))))) {
u32 offs = spi->cur_xferlen - spi->rx_len;
if ((spi->rx_len >= sizeof(u32)) ||
- (flush && (sr & STM32H7_SPI_SR_RXWNE))) {
+ (sr & STM32H7_SPI_SR_RXWNE)) {
u32 *rx_buf32 = (u32 *)(spi->rx_buf + offs);
*rx_buf32 = readl_relaxed(spi->base + STM32H7_SPI_RXDR);
spi->rx_len -= sizeof(u32);
} else if ((spi->rx_len >= sizeof(u16)) ||
- (flush && (rxplvl >= 2 || spi->cur_bpw > 8))) {
+ (!(sr & STM32H7_SPI_SR_RXWNE) &&
+ (rxplvl >= 2 || spi->cur_bpw > 8))) {
u16 *rx_buf16 = (u16 *)(spi->rx_buf + offs);
*rx_buf16 = readw_relaxed(spi->base + STM32H7_SPI_RXDR);
@@ -606,8 +609,8 @@ static void stm32h7_spi_read_rxfifo(struct stm32_spi *spi, bool flush)
rxplvl = FIELD_GET(STM32H7_SPI_SR_RXPLVL, sr);
}
- dev_dbg(spi->dev, "%s%s: %d bytes left\n", __func__,
- flush ? "(flush)" : "", spi->rx_len);
+ dev_dbg(spi->dev, "%s: %d bytes left (sr=%08x)\n",
+ __func__, spi->rx_len, sr);
}
/**
@@ -674,18 +677,12 @@ static void stm32f4_spi_disable(struct stm32_spi *spi)
* stm32h7_spi_disable - Disable SPI controller
* @spi: pointer to the spi controller data structure
*
- * RX-Fifo is flushed when SPI controller is disabled. To prevent any data
- * loss, use stm32h7_spi_read_rxfifo(flush) to read the remaining bytes in
- * RX-Fifo.
- * Normally, if TSIZE has been configured, we should relax the hardware at the
- * reception of the EOT interrupt. But in case of error, EOT will not be
- * raised. So the subsystem unprepare_message call allows us to properly
- * complete the transfer from an hardware point of view.
+ * RX-Fifo is flushed when SPI controller is disabled.
*/
static void stm32h7_spi_disable(struct stm32_spi *spi)
{
unsigned long flags;
- u32 cr1, sr;
+ u32 cr1;
dev_dbg(spi->dev, "disable controller\n");
@@ -698,25 +695,6 @@ static void stm32h7_spi_disable(struct stm32_spi *spi)
return;
}
- /* Wait on EOT or suspend the flow */
- if (readl_relaxed_poll_timeout_atomic(spi->base + STM32H7_SPI_SR,
- sr, !(sr & STM32H7_SPI_SR_EOT),
- 10, 100000) < 0) {
- if (cr1 & STM32H7_SPI_CR1_CSTART) {
- writel_relaxed(cr1 | STM32H7_SPI_CR1_CSUSP,
- spi->base + STM32H7_SPI_CR1);
- if (readl_relaxed_poll_timeout_atomic(
- spi->base + STM32H7_SPI_SR,
- sr, !(sr & STM32H7_SPI_SR_SUSP),
- 10, 100000) < 0)
- dev_warn(spi->dev,
- "Suspend request timeout\n");
- }
- }
-
- if (!spi->cur_usedma && spi->rx_buf && (spi->rx_len > 0))
- stm32h7_spi_read_rxfifo(spi, true);
-
if (spi->cur_usedma && spi->dma_tx)
dmaengine_terminate_all(spi->dma_tx);
if (spi->cur_usedma && spi->dma_rx)
@@ -911,7 +889,7 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id)
if (__ratelimit(&rs))
dev_dbg_ratelimited(spi->dev, "Communication suspended\n");
if (!spi->cur_usedma && (spi->rx_buf && (spi->rx_len > 0)))
- stm32h7_spi_read_rxfifo(spi, false);
+ stm32h7_spi_read_rxfifo(spi);
/*
* If communication is suspended while using DMA, it means
* that something went wrong, so stop the current transfer
@@ -932,8 +910,10 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id)
if (sr & STM32H7_SPI_SR_EOT) {
if (!spi->cur_usedma && (spi->rx_buf && (spi->rx_len > 0)))
- stm32h7_spi_read_rxfifo(spi, true);
- end = true;
+ stm32h7_spi_read_rxfifo(spi);
+ if (!spi->cur_usedma ||
+ (spi->cur_comm == SPI_SIMPLEX_TX || spi->cur_comm == SPI_3WIRE_TX))
+ end = true;
}
if (sr & STM32H7_SPI_SR_TXP)
@@ -942,7 +922,7 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id)
if (sr & STM32H7_SPI_SR_RXP)
if (!spi->cur_usedma && (spi->rx_buf && (spi->rx_len > 0)))
- stm32h7_spi_read_rxfifo(spi, false);
+ stm32h7_spi_read_rxfifo(spi);
writel_relaxed(sr & mask, spi->base + STM32H7_SPI_IFCR);
@@ -1041,42 +1021,17 @@ static void stm32f4_spi_dma_tx_cb(void *data)
}
/**
- * stm32f4_spi_dma_rx_cb - dma callback
+ * stm32_spi_dma_rx_cb - dma callback
* @data: pointer to the spi controller data structure
*
* DMA callback is called when the transfer is complete for DMA RX channel.
*/
-static void stm32f4_spi_dma_rx_cb(void *data)
+static void stm32_spi_dma_rx_cb(void *data)
{
struct stm32_spi *spi = data;
spi_finalize_current_transfer(spi->master);
- stm32f4_spi_disable(spi);
-}
-
-/**
- * stm32h7_spi_dma_cb - dma callback
- * @data: pointer to the spi controller data structure
- *
- * DMA callback is called when the transfer is complete or when an error
- * occurs. If the transfer is complete, EOT flag is raised.
- */
-static void stm32h7_spi_dma_cb(void *data)
-{
- struct stm32_spi *spi = data;
- unsigned long flags;
- u32 sr;
-
- spin_lock_irqsave(&spi->lock, flags);
-
- sr = readl_relaxed(spi->base + STM32H7_SPI_SR);
-
- spin_unlock_irqrestore(&spi->lock, flags);
-
- if (!(sr & STM32H7_SPI_SR_EOT))
- dev_warn(spi->dev, "DMA error (sr=0x%08x)\n", sr);
-
- /* Now wait for EOT, or SUSP or OVR in case of error */
+ spi->cfg->disable(spi);
}
/**
@@ -1242,11 +1197,13 @@ static void stm32f4_spi_transfer_one_dma_start(struct stm32_spi *spi)
*/
static void stm32h7_spi_transfer_one_dma_start(struct stm32_spi *spi)
{
- /* Enable the interrupts relative to the end of transfer */
- stm32_spi_set_bits(spi, STM32H7_SPI_IER, STM32H7_SPI_IER_EOTIE |
- STM32H7_SPI_IER_TXTFIE |
- STM32H7_SPI_IER_OVRIE |
- STM32H7_SPI_IER_MODFIE);
+ uint32_t ier = STM32H7_SPI_IER_OVRIE | STM32H7_SPI_IER_MODFIE;
+
+ /* Enable the interrupts */
+ if (spi->cur_comm == SPI_SIMPLEX_TX || spi->cur_comm == SPI_3WIRE_TX)
+ ier |= STM32H7_SPI_IER_EOTIE | STM32H7_SPI_IER_TXTFIE;
+
+ stm32_spi_set_bits(spi, STM32H7_SPI_IER, ier);
stm32_spi_enable(spi);
@@ -1645,10 +1602,6 @@ static int stm32_spi_transfer_one(struct spi_master *master,
struct stm32_spi *spi = spi_master_get_devdata(master);
int ret;
- /* Don't do anything on 0 bytes transfers */
- if (transfer->len == 0)
- return 0;
-
spi->tx_buf = transfer->tx_buf;
spi->rx_buf = transfer->rx_buf;
spi->tx_len = spi->tx_buf ? transfer->len : 0;
@@ -1762,7 +1715,7 @@ static const struct stm32_spi_cfg stm32f4_spi_cfg = {
.set_mode = stm32f4_spi_set_mode,
.transfer_one_dma_start = stm32f4_spi_transfer_one_dma_start,
.dma_tx_cb = stm32f4_spi_dma_tx_cb,
- .dma_rx_cb = stm32f4_spi_dma_rx_cb,
+ .dma_rx_cb = stm32_spi_dma_rx_cb,
.transfer_one_irq = stm32f4_spi_transfer_one_irq,
.irq_handler_event = stm32f4_spi_irq_event,
.irq_handler_thread = stm32f4_spi_irq_thread,
@@ -1782,8 +1735,11 @@ static const struct stm32_spi_cfg stm32h7_spi_cfg = {
.set_data_idleness = stm32h7_spi_data_idleness,
.set_number_of_data = stm32h7_spi_number_of_data,
.transfer_one_dma_start = stm32h7_spi_transfer_one_dma_start,
- .dma_rx_cb = stm32h7_spi_dma_cb,
- .dma_tx_cb = stm32h7_spi_dma_cb,
+ .dma_rx_cb = stm32_spi_dma_rx_cb,
+ /*
+ * dma_tx_cb is not necessary since in case of TX, dma is followed by
+ * SPI access hence handling is performed within the SPI interrupt
+ */
.transfer_one_irq = stm32h7_spi_transfer_one_irq,
.irq_handler_thread = stm32h7_spi_irq_thread,
.baud_rate_div_min = STM32H7_SPI_MBR_DIV_MIN,
@@ -1927,6 +1883,9 @@ static int stm32_spi_probe(struct platform_device *pdev)
if (spi->dma_tx || spi->dma_rx)
master->can_dma = stm32_spi_can_dma;
+ pm_runtime_set_autosuspend_delay(&pdev->dev,
+ STM32_SPI_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(&pdev->dev);
pm_runtime_set_active(&pdev->dev);
pm_runtime_get_noresume(&pdev->dev);
pm_runtime_enable(&pdev->dev);
@@ -1938,6 +1897,9 @@ static int stm32_spi_probe(struct platform_device *pdev)
goto err_pm_disable;
}
+ pm_runtime_mark_last_busy(&pdev->dev);
+ pm_runtime_put_autosuspend(&pdev->dev);
+
dev_info(&pdev->dev, "driver initialized\n");
return 0;
@@ -1946,6 +1908,7 @@ err_pm_disable:
pm_runtime_disable(&pdev->dev);
pm_runtime_put_noidle(&pdev->dev);
pm_runtime_set_suspended(&pdev->dev);
+ pm_runtime_dont_use_autosuspend(&pdev->dev);
err_dma_release:
if (spi->dma_tx)
dma_release_channel(spi->dma_tx);
@@ -1970,6 +1933,8 @@ static int stm32_spi_remove(struct platform_device *pdev)
pm_runtime_disable(&pdev->dev);
pm_runtime_put_noidle(&pdev->dev);
pm_runtime_set_suspended(&pdev->dev);
+ pm_runtime_dont_use_autosuspend(&pdev->dev);
+
if (master->dma_tx)
dma_release_channel(master->dma_tx);
if (master->dma_rx)
diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c
index 5131141bbf0d..e9de1d958bbd 100644
--- a/drivers/spi/spi-tegra114.c
+++ b/drivers/spi/spi-tegra114.c
@@ -717,12 +717,12 @@ static void tegra_spi_deinit_dma_param(struct tegra_spi_data *tspi,
dma_release_channel(dma_chan);
}
-static int tegra_spi_set_hw_cs_timing(struct spi_device *spi,
- struct spi_delay *setup,
- struct spi_delay *hold,
- struct spi_delay *inactive)
+static int tegra_spi_set_hw_cs_timing(struct spi_device *spi)
{
struct tegra_spi_data *tspi = spi_master_get_devdata(spi->master);
+ struct spi_delay *setup = &spi->cs_setup;
+ struct spi_delay *hold = &spi->cs_hold;
+ struct spi_delay *inactive = &spi->cs_inactive;
u8 setup_dly, hold_dly, inactive_dly;
u32 setup_hold;
u32 spi_cs_timing;
diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c
index 6a726c95ac7a..ebd27f883033 100644
--- a/drivers/spi/spi-tegra20-slink.c
+++ b/drivers/spi/spi-tegra20-slink.c
@@ -1061,33 +1061,12 @@ static int tegra_slink_probe(struct platform_device *pdev)
dev_err(&pdev->dev, "Can not get clock %d\n", ret);
goto exit_free_master;
}
- ret = clk_prepare(tspi->clk);
- if (ret < 0) {
- dev_err(&pdev->dev, "Clock prepare failed %d\n", ret);
- goto exit_free_master;
- }
- ret = clk_enable(tspi->clk);
- if (ret < 0) {
- dev_err(&pdev->dev, "Clock enable failed %d\n", ret);
- goto exit_clk_unprepare;
- }
-
- spi_irq = platform_get_irq(pdev, 0);
- tspi->irq = spi_irq;
- ret = request_threaded_irq(tspi->irq, tegra_slink_isr,
- tegra_slink_isr_thread, IRQF_ONESHOT,
- dev_name(&pdev->dev), tspi);
- if (ret < 0) {
- dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n",
- tspi->irq);
- goto exit_clk_disable;
- }
tspi->rst = devm_reset_control_get_exclusive(&pdev->dev, "spi");
if (IS_ERR(tspi->rst)) {
dev_err(&pdev->dev, "can not get reset\n");
ret = PTR_ERR(tspi->rst);
- goto exit_free_irq;
+ goto exit_free_master;
}
tspi->max_buf_size = SLINK_FIFO_DEPTH << 2;
@@ -1095,7 +1074,7 @@ static int tegra_slink_probe(struct platform_device *pdev)
ret = tegra_slink_init_dma_param(tspi, true);
if (ret < 0)
- goto exit_free_irq;
+ goto exit_free_master;
ret = tegra_slink_init_dma_param(tspi, false);
if (ret < 0)
goto exit_rx_dma_free;
@@ -1106,16 +1085,9 @@ static int tegra_slink_probe(struct platform_device *pdev)
init_completion(&tspi->xfer_completion);
pm_runtime_enable(&pdev->dev);
- if (!pm_runtime_enabled(&pdev->dev)) {
- ret = tegra_slink_runtime_resume(&pdev->dev);
- if (ret)
- goto exit_pm_disable;
- }
-
- ret = pm_runtime_get_sync(&pdev->dev);
- if (ret < 0) {
+ ret = pm_runtime_resume_and_get(&pdev->dev);
+ if (ret) {
dev_err(&pdev->dev, "pm runtime get failed, e = %d\n", ret);
- pm_runtime_put_noidle(&pdev->dev);
goto exit_pm_disable;
}
@@ -1123,33 +1095,43 @@ static int tegra_slink_probe(struct platform_device *pdev)
udelay(2);
reset_control_deassert(tspi->rst);
+ spi_irq = platform_get_irq(pdev, 0);
+ tspi->irq = spi_irq;
+ ret = request_threaded_irq(tspi->irq, tegra_slink_isr,
+ tegra_slink_isr_thread, IRQF_ONESHOT,
+ dev_name(&pdev->dev), tspi);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n",
+ tspi->irq);
+ goto exit_pm_put;
+ }
+
tspi->def_command_reg = SLINK_M_S;
tspi->def_command2_reg = SLINK_CS_ACTIVE_BETWEEN;
tegra_slink_writel(tspi, tspi->def_command_reg, SLINK_COMMAND);
tegra_slink_writel(tspi, tspi->def_command2_reg, SLINK_COMMAND2);
- pm_runtime_put(&pdev->dev);
master->dev.of_node = pdev->dev.of_node;
- ret = devm_spi_register_master(&pdev->dev, master);
+ ret = spi_register_master(master);
if (ret < 0) {
dev_err(&pdev->dev, "can not register to master err %d\n", ret);
- goto exit_pm_disable;
+ goto exit_free_irq;
}
+
+ pm_runtime_put(&pdev->dev);
+
return ret;
+exit_free_irq:
+ free_irq(spi_irq, tspi);
+exit_pm_put:
+ pm_runtime_put(&pdev->dev);
exit_pm_disable:
pm_runtime_disable(&pdev->dev);
- if (!pm_runtime_status_suspended(&pdev->dev))
- tegra_slink_runtime_suspend(&pdev->dev);
+
tegra_slink_deinit_dma_param(tspi, false);
exit_rx_dma_free:
tegra_slink_deinit_dma_param(tspi, true);
-exit_free_irq:
- free_irq(spi_irq, tspi);
-exit_clk_disable:
- clk_disable(tspi->clk);
-exit_clk_unprepare:
- clk_unprepare(tspi->clk);
exit_free_master:
spi_master_put(master);
return ret;
@@ -1160,10 +1142,11 @@ static int tegra_slink_remove(struct platform_device *pdev)
struct spi_master *master = platform_get_drvdata(pdev);
struct tegra_slink_data *tspi = spi_master_get_devdata(master);
+ spi_unregister_master(master);
+
free_irq(tspi->irq, tspi);
- clk_disable(tspi->clk);
- clk_unprepare(tspi->clk);
+ pm_runtime_disable(&pdev->dev);
if (tspi->tx_dma_chan)
tegra_slink_deinit_dma_param(tspi, false);
@@ -1171,10 +1154,6 @@ static int tegra_slink_remove(struct platform_device *pdev)
if (tspi->rx_dma_chan)
tegra_slink_deinit_dma_param(tspi, true);
- pm_runtime_disable(&pdev->dev);
- if (!pm_runtime_status_suspended(&pdev->dev))
- tegra_slink_runtime_suspend(&pdev->dev);
-
return 0;
}
diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c
index 9262c6418463..cfa222c9bd5e 100644
--- a/drivers/spi/spi-zynq-qspi.c
+++ b/drivers/spi/spi-zynq-qspi.c
@@ -545,7 +545,7 @@ static int zynq_qspi_exec_mem_op(struct spi_mem *mem,
zynq_qspi_write_op(xqspi, ZYNQ_QSPI_FIFO_DEPTH, true);
zynq_qspi_write(xqspi, ZYNQ_QSPI_IEN_OFFSET,
ZYNQ_QSPI_IXR_RXTX_MASK);
- if (!wait_for_completion_interruptible_timeout(&xqspi->data_completion,
+ if (!wait_for_completion_timeout(&xqspi->data_completion,
msecs_to_jiffies(1000)))
err = -ETIMEDOUT;
}
@@ -563,7 +563,7 @@ static int zynq_qspi_exec_mem_op(struct spi_mem *mem,
zynq_qspi_write_op(xqspi, ZYNQ_QSPI_FIFO_DEPTH, true);
zynq_qspi_write(xqspi, ZYNQ_QSPI_IEN_OFFSET,
ZYNQ_QSPI_IXR_RXTX_MASK);
- if (!wait_for_completion_interruptible_timeout(&xqspi->data_completion,
+ if (!wait_for_completion_timeout(&xqspi->data_completion,
msecs_to_jiffies(1000)))
err = -ETIMEDOUT;
}
@@ -579,7 +579,7 @@ static int zynq_qspi_exec_mem_op(struct spi_mem *mem,
zynq_qspi_write_op(xqspi, ZYNQ_QSPI_FIFO_DEPTH, true);
zynq_qspi_write(xqspi, ZYNQ_QSPI_IEN_OFFSET,
ZYNQ_QSPI_IXR_RXTX_MASK);
- if (!wait_for_completion_interruptible_timeout(&xqspi->data_completion,
+ if (!wait_for_completion_timeout(&xqspi->data_completion,
msecs_to_jiffies(1000)))
err = -ETIMEDOUT;
@@ -603,7 +603,7 @@ static int zynq_qspi_exec_mem_op(struct spi_mem *mem,
zynq_qspi_write_op(xqspi, ZYNQ_QSPI_FIFO_DEPTH, true);
zynq_qspi_write(xqspi, ZYNQ_QSPI_IEN_OFFSET,
ZYNQ_QSPI_IXR_RXTX_MASK);
- if (!wait_for_completion_interruptible_timeout(&xqspi->data_completion,
+ if (!wait_for_completion_timeout(&xqspi->data_completion,
msecs_to_jiffies(1000)))
err = -ETIMEDOUT;
}
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 45b7f6f7a245..57e2499ec1ed 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -844,9 +844,9 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio) ||
!spi->controller->set_cs_timing) {
if (activate)
- spi_delay_exec(&spi->controller->cs_setup, NULL);
+ spi_delay_exec(&spi->cs_setup, NULL);
else
- spi_delay_exec(&spi->controller->cs_hold, NULL);
+ spi_delay_exec(&spi->cs_hold, NULL);
}
if (spi->mode & SPI_CS_HIGH)
@@ -889,7 +889,7 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio) ||
!spi->controller->set_cs_timing) {
if (!activate)
- spi_delay_exec(&spi->controller->cs_inactive, NULL);
+ spi_delay_exec(&spi->cs_inactive, NULL);
}
}
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 24e9469ea35b..6dc29ce3b4bf 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -677,7 +677,6 @@ static struct class *spidev_class;
static const struct of_device_id spidev_dt_ids[] = {
{ .compatible = "rohm,dh2228fv" },
{ .compatible = "lineartechnology,ltc2488" },
- { .compatible = "ge,achc" },
{ .compatible = "semtech,sx1301" },
{ .compatible = "lwn,bk4" },
{ .compatible = "dh,dhcom-board" },
diff --git a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c
index 6f5fe5092154..c8a625667e81 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c
@@ -1904,8 +1904,8 @@ int __atomisp_streamoff(struct file *file, void *fh, enum v4l2_buf_type type)
dev_dbg(isp->dev, "Stop stream on pad %d for asd%d\n",
atomisp_subdev_source_pad(vdev), asd->index);
- BUG_ON(!rt_mutex_is_locked(&isp->mutex));
- BUG_ON(!mutex_is_locked(&isp->streamoff_mutex));
+ lockdep_assert_held(&isp->mutex);
+ lockdep_assert_held(&isp->streamoff_mutex);
if (type != V4L2_BUF_TYPE_VIDEO_CAPTURE) {
dev_dbg(isp->dev, "unsupported v4l2 buf type\n");
diff --git a/drivers/staging/media/av7110/av7110.h b/drivers/staging/media/av7110/av7110.h
index b8e8fc8ddbe9..809d938ae166 100644
--- a/drivers/staging/media/av7110/av7110.h
+++ b/drivers/staging/media/av7110/av7110.h
@@ -9,12 +9,11 @@
#include <linux/input.h>
#include <linux/time.h>
-#include "video.h"
-#include "audio.h"
-#include "osd.h"
-
+#include <linux/dvb/video.h>
+#include <linux/dvb/audio.h>
#include <linux/dvb/dmx.h>
#include <linux/dvb/ca.h>
+#include <linux/dvb/osd.h>
#include <linux/dvb/net.h>
#include <linux/mutex.h>
diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c
index dcbba9621b21..5d24c1b6663b 100644
--- a/drivers/staging/octeon/ethernet.c
+++ b/drivers/staging/octeon/ethernet.c
@@ -524,7 +524,7 @@ static const struct net_device_ops cvm_oct_npi_netdev_ops = {
.ndo_start_xmit = cvm_oct_xmit,
.ndo_set_rx_mode = cvm_oct_common_set_multicast_list,
.ndo_set_mac_address = cvm_oct_common_set_mac_address,
- .ndo_do_ioctl = cvm_oct_ioctl,
+ .ndo_eth_ioctl = cvm_oct_ioctl,
.ndo_change_mtu = cvm_oct_common_change_mtu,
.ndo_get_stats = cvm_oct_common_get_stats,
#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -540,7 +540,7 @@ static const struct net_device_ops cvm_oct_xaui_netdev_ops = {
.ndo_start_xmit = cvm_oct_xmit,
.ndo_set_rx_mode = cvm_oct_common_set_multicast_list,
.ndo_set_mac_address = cvm_oct_common_set_mac_address,
- .ndo_do_ioctl = cvm_oct_ioctl,
+ .ndo_eth_ioctl = cvm_oct_ioctl,
.ndo_change_mtu = cvm_oct_common_change_mtu,
.ndo_get_stats = cvm_oct_common_get_stats,
#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -556,7 +556,7 @@ static const struct net_device_ops cvm_oct_sgmii_netdev_ops = {
.ndo_start_xmit = cvm_oct_xmit,
.ndo_set_rx_mode = cvm_oct_common_set_multicast_list,
.ndo_set_mac_address = cvm_oct_common_set_mac_address,
- .ndo_do_ioctl = cvm_oct_ioctl,
+ .ndo_eth_ioctl = cvm_oct_ioctl,
.ndo_change_mtu = cvm_oct_common_change_mtu,
.ndo_get_stats = cvm_oct_common_get_stats,
#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -570,7 +570,7 @@ static const struct net_device_ops cvm_oct_spi_netdev_ops = {
.ndo_start_xmit = cvm_oct_xmit,
.ndo_set_rx_mode = cvm_oct_common_set_multicast_list,
.ndo_set_mac_address = cvm_oct_common_set_mac_address,
- .ndo_do_ioctl = cvm_oct_ioctl,
+ .ndo_eth_ioctl = cvm_oct_ioctl,
.ndo_change_mtu = cvm_oct_common_change_mtu,
.ndo_get_stats = cvm_oct_common_get_stats,
#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -586,7 +586,7 @@ static const struct net_device_ops cvm_oct_rgmii_netdev_ops = {
.ndo_start_xmit = cvm_oct_xmit,
.ndo_set_rx_mode = cvm_oct_common_set_multicast_list,
.ndo_set_mac_address = cvm_oct_common_set_mac_address,
- .ndo_do_ioctl = cvm_oct_ioctl,
+ .ndo_eth_ioctl = cvm_oct_ioctl,
.ndo_change_mtu = cvm_oct_common_change_mtu,
.ndo_get_stats = cvm_oct_common_get_stats,
#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -599,7 +599,7 @@ static const struct net_device_ops cvm_oct_pow_netdev_ops = {
.ndo_start_xmit = cvm_oct_xmit_pow,
.ndo_set_rx_mode = cvm_oct_common_set_multicast_list,
.ndo_set_mac_address = cvm_oct_common_set_mac_address,
- .ndo_do_ioctl = cvm_oct_ioctl,
+ .ndo_eth_ioctl = cvm_oct_ioctl,
.ndo_change_mtu = cvm_oct_common_change_mtu,
.ndo_get_stats = cvm_oct_common_get_stats,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/staging/qlge/qlge_ethtool.c b/drivers/staging/qlge/qlge_ethtool.c
index 87d60115ac67..12efcd1057ba 100644
--- a/drivers/staging/qlge/qlge_ethtool.c
+++ b/drivers/staging/qlge/qlge_ethtool.c
@@ -621,7 +621,10 @@ static void qlge_get_regs(struct net_device *ndev,
regs->len = sizeof(struct qlge_reg_dump);
}
-static int qlge_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *c)
+static int qlge_get_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *c,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct qlge_adapter *qdev = netdev_to_qdev(ndev);
@@ -644,7 +647,10 @@ static int qlge_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *c
return 0;
}
-static int qlge_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *c)
+static int qlge_set_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *c,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct qlge_adapter *qdev = netdev_to_qdev(ndev);
diff --git a/drivers/staging/qlge/qlge_main.c b/drivers/staging/qlge/qlge_main.c
index 19a02e958865..8fcdf89da8aa 100644
--- a/drivers/staging/qlge/qlge_main.c
+++ b/drivers/staging/qlge/qlge_main.c
@@ -4547,7 +4547,8 @@ static int qlge_probe(struct pci_dev *pdev,
static int cards_found;
int err;
- devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter));
+ devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter),
+ &pdev->dev);
if (!devlink)
return -ENOMEM;
@@ -4613,7 +4614,7 @@ static int qlge_probe(struct pci_dev *pdev,
goto netdev_free;
}
- err = devlink_register(devlink, &pdev->dev);
+ err = devlink_register(devlink);
if (err)
goto netdev_free;
diff --git a/drivers/staging/rtl8188eu/include/osdep_intf.h b/drivers/staging/rtl8188eu/include/osdep_intf.h
index 5012b9176526..34decb03e92f 100644
--- a/drivers/staging/rtl8188eu/include/osdep_intf.h
+++ b/drivers/staging/rtl8188eu/include/osdep_intf.h
@@ -22,6 +22,8 @@ void rtw_stop_drv_threads(struct adapter *padapter);
void rtw_cancel_all_timer(struct adapter *padapter);
int rtw_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+int rtw_android_priv_cmd(struct net_device *dev, struct ifreq *rq,
+ void __user *data, int cmd);
struct net_device *rtw_init_netdev(void);
u16 rtw_recv_select_queue(struct sk_buff *skb);
diff --git a/drivers/staging/rtl8188eu/include/rtw_android.h b/drivers/staging/rtl8188eu/include/rtw_android.h
index 2c26993b8205..3018fc1e8de8 100644
--- a/drivers/staging/rtl8188eu/include/rtw_android.h
+++ b/drivers/staging/rtl8188eu/include/rtw_android.h
@@ -45,6 +45,7 @@ enum ANDROID_WIFI_CMD {
ANDROID_WIFI_CMD_MAX
};
-int rtw_android_priv_cmd(struct net_device *net, struct ifreq *ifr, int cmd);
+int rtw_android_priv_cmd(struct net_device *net, struct ifreq *ifr,
+ void __user *data, int cmd);
#endif /* __RTW_ANDROID_H__ */
diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
index b958a8d882b0..193a3dde462c 100644
--- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
+++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
@@ -2769,9 +2769,6 @@ int rtw_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
ret = rtw_hostapd_ioctl(dev, &wrq->u.data);
break;
#endif /* CONFIG_88EU_AP_MODE */
- case (SIOCDEVPRIVATE + 1):
- ret = rtw_android_priv_cmd(dev, rq, cmd);
- break;
default:
ret = -EOPNOTSUPP;
break;
diff --git a/drivers/staging/rtl8188eu/os_dep/os_intfs.c b/drivers/staging/rtl8188eu/os_dep/os_intfs.c
index 423c382e3d20..596e03e7b286 100644
--- a/drivers/staging/rtl8188eu/os_dep/os_intfs.c
+++ b/drivers/staging/rtl8188eu/os_dep/os_intfs.c
@@ -288,6 +288,7 @@ static const struct net_device_ops rtw_netdev_ops = {
.ndo_set_mac_address = rtw_net_set_mac_address,
.ndo_get_stats = rtw_net_get_stats,
.ndo_do_ioctl = rtw_ioctl,
+ .ndo_siocdevprivate = rtw_android_priv_cmd,
};
static const struct device_type wlan_type = {
diff --git a/drivers/staging/rtl8188eu/os_dep/rtw_android.c b/drivers/staging/rtl8188eu/os_dep/rtw_android.c
index 3c5446999686..a13df3880378 100644
--- a/drivers/staging/rtl8188eu/os_dep/rtw_android.c
+++ b/drivers/staging/rtl8188eu/os_dep/rtw_android.c
@@ -5,6 +5,7 @@
*
******************************************************************************/
+#include <linux/compat.h>
#include <linux/module.h>
#include <linux/netdevice.h>
@@ -116,7 +117,8 @@ static int android_get_p2p_addr(struct net_device *net, char *command,
return ETH_ALEN;
}
-int rtw_android_priv_cmd(struct net_device *net, struct ifreq *ifr, int cmd)
+int rtw_android_priv_cmd(struct net_device *net, struct ifreq *ifr,
+ void __user *data, int cmd)
{
int ret = 0;
char *command;
@@ -124,9 +126,15 @@ int rtw_android_priv_cmd(struct net_device *net, struct ifreq *ifr, int cmd)
int bytes_written = 0;
struct android_wifi_priv_cmd priv_cmd;
- if (!ifr->ifr_data)
+ if (cmd != SIOCDEVPRIVATE)
+ return -EOPNOTSUPP;
+
+ if (in_compat_syscall()) /* to be implemented */
+ return -EOPNOTSUPP;
+
+ if (!data)
return -EINVAL;
- if (copy_from_user(&priv_cmd, ifr->ifr_data, sizeof(priv_cmd)))
+ if (copy_from_user(&priv_cmd, data, sizeof(priv_cmd)))
return -EFAULT;
if (priv_cmd.total_len < 1)
return -EINVAL;
diff --git a/drivers/staging/rtl8723bs/include/osdep_intf.h b/drivers/staging/rtl8723bs/include/osdep_intf.h
index 111e0179712a..5badd441c14b 100644
--- a/drivers/staging/rtl8723bs/include/osdep_intf.h
+++ b/drivers/staging/rtl8723bs/include/osdep_intf.h
@@ -48,6 +48,8 @@ void rtw_stop_drv_threads(struct adapter *padapter);
void rtw_cancel_all_timer(struct adapter *padapter);
int rtw_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+int rtw_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+ void __user *data, int cmd);
int rtw_init_netdev_name(struct net_device *pnetdev, const char *ifname);
struct net_device *rtw_init_netdev(struct adapter *padapter);
diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
index f95000df8942..aa7bd76bb5f1 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
@@ -4485,6 +4485,21 @@ exit:
return err;
}
+int rtw_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+ void __user *data, int cmd)
+{
+ struct iwreq *wrq = (struct iwreq *)rq;
+
+ /* little hope of fixing this, better remove the whole function */
+ if (in_compat_syscall())
+ return -EOPNOTSUPP;
+
+ if (cmd != SIOCDEVPRIVATE)
+ return -EOPNOTSUPP;
+
+ return rtw_ioctl_wext_private(dev, &wrq->u);
+}
+
int rtw_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
struct iwreq *wrq = (struct iwreq *)rq;
@@ -4497,9 +4512,6 @@ int rtw_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
case RTL_IOCTL_HOSTAPD:
ret = rtw_hostapd_ioctl(dev, &wrq->u.data);
break;
- case SIOCDEVPRIVATE:
- ret = rtw_ioctl_wext_private(dev, &wrq->u);
- break;
default:
ret = -EOPNOTSUPP;
break;
diff --git a/drivers/staging/rtl8723bs/os_dep/os_intfs.c b/drivers/staging/rtl8723bs/os_dep/os_intfs.c
index 648456b992bb..9e38b53d3b4a 100644
--- a/drivers/staging/rtl8723bs/os_dep/os_intfs.c
+++ b/drivers/staging/rtl8723bs/os_dep/os_intfs.c
@@ -459,6 +459,7 @@ static const struct net_device_ops rtw_netdev_ops = {
.ndo_set_mac_address = rtw_net_set_mac_address,
.ndo_get_stats = rtw_net_get_stats,
.ndo_do_ioctl = rtw_ioctl,
+ .ndo_siocdevprivate = rtw_siocdevprivate,
};
int rtw_init_netdev_name(struct net_device *pnetdev, const char *ifname)
diff --git a/drivers/staging/wlan-ng/p80211netdev.c b/drivers/staging/wlan-ng/p80211netdev.c
index 6f470e7ba647..1c62130a5eee 100644
--- a/drivers/staging/wlan-ng/p80211netdev.c
+++ b/drivers/staging/wlan-ng/p80211netdev.c
@@ -98,8 +98,8 @@ static int p80211knetdev_stop(struct net_device *netdev);
static netdev_tx_t p80211knetdev_hard_start_xmit(struct sk_buff *skb,
struct net_device *netdev);
static void p80211knetdev_set_multicast_list(struct net_device *dev);
-static int p80211knetdev_do_ioctl(struct net_device *dev, struct ifreq *ifr,
- int cmd);
+static int p80211knetdev_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd);
static int p80211knetdev_set_mac_address(struct net_device *dev, void *addr);
static void p80211knetdev_tx_timeout(struct net_device *netdev, unsigned int txqueue);
static int p80211_rx_typedrop(struct wlandevice *wlandev, u16 fc);
@@ -461,56 +461,8 @@ static void p80211knetdev_set_multicast_list(struct net_device *dev)
wlandev->set_multicast_list(wlandev, dev);
}
-#ifdef SIOCETHTOOL
-
-static int p80211netdev_ethtool(struct wlandevice *wlandev,
- void __user *useraddr)
-{
- u32 ethcmd;
- struct ethtool_drvinfo info;
- struct ethtool_value edata;
-
- memset(&info, 0, sizeof(info));
- memset(&edata, 0, sizeof(edata));
-
- if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
- return -EFAULT;
-
- switch (ethcmd) {
- case ETHTOOL_GDRVINFO:
- info.cmd = ethcmd;
- snprintf(info.driver, sizeof(info.driver), "p80211_%s",
- wlandev->nsdname);
- snprintf(info.version, sizeof(info.version), "%s",
- WLAN_RELEASE);
-
- if (copy_to_user(useraddr, &info, sizeof(info)))
- return -EFAULT;
- return 0;
-#ifdef ETHTOOL_GLINK
- case ETHTOOL_GLINK:
- edata.cmd = ethcmd;
-
- if (wlandev->linkstatus &&
- (wlandev->macmode != WLAN_MACMODE_NONE)) {
- edata.data = 1;
- } else {
- edata.data = 0;
- }
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-#endif
- }
-
- return -EOPNOTSUPP;
-}
-
-#endif
-
/*----------------------------------------------------------------
- * p80211knetdev_do_ioctl
+ * p80211knetdev_siocdevprivate
*
* Handle an ioctl call on one of our devices. Everything Linux
* ioctl specific is done here. Then we pass the contents of the
@@ -537,8 +489,9 @@ static int p80211netdev_ethtool(struct wlandevice *wlandev,
* locks.
*----------------------------------------------------------------
*/
-static int p80211knetdev_do_ioctl(struct net_device *dev,
- struct ifreq *ifr, int cmd)
+static int p80211knetdev_siocdevprivate(struct net_device *dev,
+ struct ifreq *ifr,
+ void __user *data, int cmd)
{
int result = 0;
struct p80211ioctl_req *req = (struct p80211ioctl_req *)ifr;
@@ -547,13 +500,8 @@ static int p80211knetdev_do_ioctl(struct net_device *dev,
netdev_dbg(dev, "rx'd ioctl, cmd=%d, len=%d\n", cmd, req->len);
-#ifdef SIOCETHTOOL
- if (cmd == SIOCETHTOOL) {
- result =
- p80211netdev_ethtool(wlandev, (void __user *)ifr->ifr_data);
- goto bail;
- }
-#endif
+ if (in_compat_syscall())
+ return -EOPNOTSUPP;
/* Test the magic, assume ifr is good if it's there */
if (req->magic != P80211_IOCTL_MAGIC) {
@@ -569,7 +517,7 @@ static int p80211knetdev_do_ioctl(struct net_device *dev,
goto bail;
}
- msgbuf = memdup_user(req->data, req->len);
+ msgbuf = memdup_user(data, req->len);
if (IS_ERR(msgbuf)) {
result = PTR_ERR(msgbuf);
goto bail;
@@ -578,10 +526,8 @@ static int p80211knetdev_do_ioctl(struct net_device *dev,
result = p80211req_dorequest(wlandev, msgbuf);
if (result == 0) {
- if (copy_to_user
- (req->data, msgbuf, req->len)) {
+ if (copy_to_user(data, msgbuf, req->len))
result = -EFAULT;
- }
}
kfree(msgbuf);
@@ -682,7 +628,7 @@ static const struct net_device_ops p80211_netdev_ops = {
.ndo_stop = p80211knetdev_stop,
.ndo_start_xmit = p80211knetdev_hard_start_xmit,
.ndo_set_rx_mode = p80211knetdev_set_multicast_list,
- .ndo_do_ioctl = p80211knetdev_do_ioctl,
+ .ndo_siocdevprivate = p80211knetdev_siocdevprivate,
.ndo_set_mac_address = p80211knetdev_set_mac_address,
.ndo_tx_timeout = p80211knetdev_tx_timeout,
.ndo_validate_addr = eth_validate_addr,
diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c
index 99abdc03c44c..dab7e8fb1059 100644
--- a/drivers/thermal/intel/therm_throt.c
+++ b/drivers/thermal/intel/therm_throt.c
@@ -569,13 +569,18 @@ static void notify_thresholds(__u64 msr_val)
platform_thermal_notify(msr_val);
}
+void __weak notify_hwp_interrupt(void)
+{
+ wrmsrl_safe(MSR_HWP_STATUS, 0);
+}
+
/* Thermal transition interrupt handler */
void intel_thermal_interrupt(void)
{
__u64 msr_val;
if (static_cpu_has(X86_FEATURE_HWP))
- wrmsrl_safe(MSR_HWP_STATUS, 0);
+ notify_hwp_interrupt();
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
diff --git a/drivers/thermal/intel/thermal_interrupt.h b/drivers/thermal/intel/thermal_interrupt.h
index 53f427bb58dc..01e7bed2ffc7 100644
--- a/drivers/thermal/intel/thermal_interrupt.h
+++ b/drivers/thermal/intel/thermal_interrupt.h
@@ -12,4 +12,7 @@ extern int (*platform_thermal_notify)(__u64 msr_val);
* callback has rate control */
extern bool (*platform_thermal_package_rate_control)(void);
+/* Handle HWP interrupt */
+extern void notify_hwp_interrupt(void);
+
#endif /* _INTEL_THERMAL_INTERRUPT_H */
diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index 5bb928b7873e..3e3b8873fa29 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -1524,11 +1524,11 @@ static int hdlcdev_close(struct net_device *dev)
*
* Return: 0 if success, otherwise error code
*/
-static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int hdlcdev_ioctl(struct net_device *dev, struct if_settings *ifs)
{
const size_t size = sizeof(sync_serial_settings);
sync_serial_settings new_line;
- sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync;
+ sync_serial_settings __user *line = ifs->ifs_ifsu.sync;
struct slgt_info *info = dev_to_port(dev);
unsigned int flags;
@@ -1538,17 +1538,14 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (info->port.count)
return -EBUSY;
- if (cmd != SIOCWANDEV)
- return hdlc_ioctl(dev, ifr, cmd);
-
memset(&new_line, 0, sizeof(new_line));
- switch(ifr->ifr_settings.type) {
+ switch (ifs->type) {
case IF_GET_IFACE: /* return current sync_serial_settings */
- ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL;
- if (ifr->ifr_settings.size < size) {
- ifr->ifr_settings.size = size; /* data size wanted */
+ ifs->type = IF_IFACE_SYNC_SERIAL;
+ if (ifs->size < size) {
+ ifs->size = size; /* data size wanted */
return -ENOBUFS;
}
@@ -1615,7 +1612,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return 0;
default:
- return hdlc_ioctl(dev, ifr, cmd);
+ return hdlc_ioctl(dev, ifs);
}
}
@@ -1688,7 +1685,7 @@ static const struct net_device_ops hdlcdev_ops = {
.ndo_open = hdlcdev_open,
.ndo_stop = hdlcdev_close,
.ndo_start_xmit = hdlc_start_xmit,
- .ndo_do_ioctl = hdlcdev_ioctl,
+ .ndo_siocwandev = hdlcdev_ioctl,
.ndo_tx_timeout = hdlcdev_tx_timeout,
};
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index ef981d3b7bb4..cb72393f92d3 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -2059,7 +2059,7 @@ static void restore_cur(struct vc_data *vc)
enum { ESnormal, ESesc, ESsquare, ESgetpars, ESfunckey,
EShash, ESsetG0, ESsetG1, ESpercent, EScsiignore, ESnonstd,
- ESpalette, ESosc };
+ ESpalette, ESosc, ESapc, ESpm, ESdcs };
/* console_lock is held (except via vc_init()) */
static void reset_terminal(struct vc_data *vc, int do_clear)
@@ -2133,20 +2133,28 @@ static void vc_setGx(struct vc_data *vc, unsigned int which, int c)
vc->vc_translate = set_translate(*charset, vc);
}
+/* is this state an ANSI control string? */
+static bool ansi_control_string(unsigned int state)
+{
+ if (state == ESosc || state == ESapc || state == ESpm || state == ESdcs)
+ return true;
+ return false;
+}
+
/* console_lock is held */
static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c)
{
/*
* Control characters can be used in the _middle_
- * of an escape sequence.
+ * of an escape sequence, aside from ANSI control strings.
*/
- if (vc->vc_state == ESosc && c>=8 && c<=13) /* ... except for OSC */
+ if (ansi_control_string(vc->vc_state) && c >= 8 && c <= 13)
return;
switch (c) {
case 0:
return;
case 7:
- if (vc->vc_state == ESosc)
+ if (ansi_control_string(vc->vc_state))
vc->vc_state = ESnormal;
else if (vc->vc_bell_duration)
kd_mksound(vc->vc_bell_pitch, vc->vc_bell_duration);
@@ -2207,6 +2215,12 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c)
case ']':
vc->vc_state = ESnonstd;
return;
+ case '_':
+ vc->vc_state = ESapc;
+ return;
+ case '^':
+ vc->vc_state = ESpm;
+ return;
case '%':
vc->vc_state = ESpercent;
return;
@@ -2224,6 +2238,9 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c)
if (vc->state.x < VC_TABSTOPS_COUNT)
set_bit(vc->state.x, vc->vc_tab_stop);
return;
+ case 'P':
+ vc->vc_state = ESdcs;
+ return;
case 'Z':
respond_ID(tty);
return;
@@ -2520,8 +2537,14 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c)
vc_setGx(vc, 1, c);
vc->vc_state = ESnormal;
return;
+ case ESapc:
+ return;
case ESosc:
return;
+ case ESpm:
+ return;
+ case ESdcs:
+ return;
default:
vc->vc_state = ESnormal;
}
diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index 0e0cd9e9e589..3639bb6dc372 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -246,6 +246,8 @@ int vt_waitactive(int n)
*
* XXX It should at least call into the driver, fbdev's definitely need to
* restore their engine state. --BenH
+ *
+ * Called with the console lock held.
*/
static int vt_kdsetmode(struct vc_data *vc, unsigned long mode)
{
@@ -262,7 +264,6 @@ static int vt_kdsetmode(struct vc_data *vc, unsigned long mode)
return -EINVAL;
}
- /* FIXME: this needs the console lock extending */
if (vc->vc_mode == mode)
return 0;
@@ -271,12 +272,10 @@ static int vt_kdsetmode(struct vc_data *vc, unsigned long mode)
return 0;
/* explicitly blank/unblank the screen if switching modes */
- console_lock();
if (mode == KD_TEXT)
do_unblank_screen(1);
else
do_blank_screen(1);
- console_unlock();
return 0;
}
@@ -378,7 +377,10 @@ static int vt_k_ioctl(struct tty_struct *tty, unsigned int cmd,
if (!perm)
return -EPERM;
- return vt_kdsetmode(vc, arg);
+ console_lock();
+ ret = vt_kdsetmode(vc, arg);
+ console_unlock();
+ return ret;
case KDGETMODE:
return put_user(vc->vc_mode, (int __user *)arg);
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index b8d4b2d327b2..ccb68fe6202e 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -940,19 +940,19 @@ static struct dwc3_trb *dwc3_ep_prev_trb(struct dwc3_ep *dep, u8 index)
static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep)
{
- struct dwc3_trb *tmp;
u8 trbs_left;
/*
- * If enqueue & dequeue are equal than it is either full or empty.
- *
- * One way to know for sure is if the TRB right before us has HWO bit
- * set or not. If it has, then we're definitely full and can't fit any
- * more transfers in our ring.
+ * If the enqueue & dequeue are equal then the TRB ring is either full
+ * or empty. It's considered full when there are DWC3_TRB_NUM-1 of TRBs
+ * pending to be processed by the driver.
*/
if (dep->trb_enqueue == dep->trb_dequeue) {
- tmp = dwc3_ep_prev_trb(dep, dep->trb_enqueue);
- if (tmp->ctrl & DWC3_TRB_CTRL_HWO)
+ /*
+ * If there is any request remained in the started_list at
+ * this point, that means there is no TRB available.
+ */
+ if (!list_empty(&dep->started_list))
return 0;
return DWC3_TRB_NUM - 1;
@@ -1741,13 +1741,9 @@ static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep)
{
struct dwc3_request *req;
struct dwc3_request *tmp;
- struct list_head local;
struct dwc3 *dwc = dep->dwc;
-restart:
- list_replace_init(&dep->cancelled_list, &local);
-
- list_for_each_entry_safe(req, tmp, &local, list) {
+ list_for_each_entry_safe(req, tmp, &dep->cancelled_list, list) {
dwc3_gadget_ep_skip_trbs(dep, req);
switch (req->status) {
case DWC3_REQUEST_STATUS_DISCONNECTED:
@@ -1765,9 +1761,6 @@ restart:
break;
}
}
-
- if (!list_empty(&dep->cancelled_list))
- goto restart;
}
static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
@@ -2250,10 +2243,8 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
ret = wait_for_completion_timeout(&dwc->ep0_in_setup,
msecs_to_jiffies(DWC3_PULL_UP_TIMEOUT));
- if (ret == 0) {
- dev_err(dwc->dev, "timed out waiting for SETUP phase\n");
- return -ETIMEDOUT;
- }
+ if (ret == 0)
+ dev_warn(dwc->dev, "timed out waiting for SETUP phase\n");
}
/*
@@ -2465,6 +2456,7 @@ static int __dwc3_gadget_start(struct dwc3 *dwc)
/* begin to receive SETUP packets */
dwc->ep0state = EP0_SETUP_PHASE;
dwc->link_state = DWC3_LINK_STATE_SS_DIS;
+ dwc->delayed_status = false;
dwc3_ep0_out_start(dwc);
dwc3_gadget_enable_irq(dwc);
@@ -2976,12 +2968,8 @@ static void dwc3_gadget_ep_cleanup_completed_requests(struct dwc3_ep *dep,
{
struct dwc3_request *req;
struct dwc3_request *tmp;
- struct list_head local;
-restart:
- list_replace_init(&dep->started_list, &local);
-
- list_for_each_entry_safe(req, tmp, &local, list) {
+ list_for_each_entry_safe(req, tmp, &dep->started_list, list) {
int ret;
ret = dwc3_gadget_ep_cleanup_completed_request(dep, event,
@@ -2989,9 +2977,6 @@ restart:
if (ret)
break;
}
-
- if (!list_empty(&dep->started_list))
- goto restart;
}
static bool dwc3_gadget_ep_should_continue(struct dwc3_ep *dep)
diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c
index 018dd0978995..9e5c950612d0 100644
--- a/drivers/usb/gadget/function/u_audio.c
+++ b/drivers/usb/gadget/function/u_audio.c
@@ -230,7 +230,13 @@ static void u_audio_iso_fback_complete(struct usb_ep *ep,
int status = req->status;
/* i/f shutting down */
- if (!prm->fb_ep_enabled || req->status == -ESHUTDOWN)
+ if (!prm->fb_ep_enabled) {
+ kfree(req->buf);
+ usb_ep_free_request(ep, req);
+ return;
+ }
+
+ if (req->status == -ESHUTDOWN)
return;
/*
@@ -388,8 +394,6 @@ static inline void free_ep(struct uac_rtd_params *prm, struct usb_ep *ep)
if (!prm->ep_enabled)
return;
- prm->ep_enabled = false;
-
audio_dev = uac->audio_dev;
params = &audio_dev->params;
@@ -407,6 +411,8 @@ static inline void free_ep(struct uac_rtd_params *prm, struct usb_ep *ep)
}
}
+ prm->ep_enabled = false;
+
if (usb_ep_disable(ep))
dev_err(uac->card->dev, "%s:%d Error!\n", __func__, __LINE__);
}
@@ -418,15 +424,16 @@ static inline void free_ep_fback(struct uac_rtd_params *prm, struct usb_ep *ep)
if (!prm->fb_ep_enabled)
return;
- prm->fb_ep_enabled = false;
-
if (prm->req_fback) {
- usb_ep_dequeue(ep, prm->req_fback);
- kfree(prm->req_fback->buf);
- usb_ep_free_request(ep, prm->req_fback);
+ if (usb_ep_dequeue(ep, prm->req_fback)) {
+ kfree(prm->req_fback->buf);
+ usb_ep_free_request(ep, prm->req_fback);
+ }
prm->req_fback = NULL;
}
+ prm->fb_ep_enabled = false;
+
if (usb_ep_disable(ep))
dev_err(uac->card->dev, "%s:%d Error!\n", __func__, __LINE__);
}
diff --git a/drivers/usb/host/xhci-pci-renesas.c b/drivers/usb/host/xhci-pci-renesas.c
index 5923844ed821..ef5e91a5542d 100644
--- a/drivers/usb/host/xhci-pci-renesas.c
+++ b/drivers/usb/host/xhci-pci-renesas.c
@@ -207,7 +207,8 @@ static int renesas_check_rom_state(struct pci_dev *pdev)
return 0;
case RENESAS_ROM_STATUS_NO_RESULT: /* No result yet */
- return 0;
+ dev_dbg(&pdev->dev, "Unknown ROM status ...\n");
+ return -ENOENT;
case RENESAS_ROM_STATUS_ERROR: /* Error State */
default: /* All other states are marked as "Reserved states" */
@@ -224,14 +225,6 @@ static int renesas_fw_check_running(struct pci_dev *pdev)
u8 fw_state;
int err;
- /* Check if device has ROM and loaded, if so skip everything */
- err = renesas_check_rom(pdev);
- if (err) { /* we have rom */
- err = renesas_check_rom_state(pdev);
- if (!err)
- return err;
- }
-
/*
* Test if the device is actually needing the firmware. As most
* BIOSes will initialize the device for us. If the device is
@@ -591,21 +584,39 @@ int renesas_xhci_check_request_fw(struct pci_dev *pdev,
(struct xhci_driver_data *)id->driver_data;
const char *fw_name = driver_data->firmware;
const struct firmware *fw;
+ bool has_rom;
int err;
+ /* Check if device has ROM and loaded, if so skip everything */
+ has_rom = renesas_check_rom(pdev);
+ if (has_rom) {
+ err = renesas_check_rom_state(pdev);
+ if (!err)
+ return 0;
+ else if (err != -ENOENT)
+ has_rom = false;
+ }
+
err = renesas_fw_check_running(pdev);
/* Continue ahead, if the firmware is already running. */
if (err == 0)
return 0;
+ /* no firmware interface available */
if (err != 1)
- return err;
+ return has_rom ? 0 : err;
pci_dev_get(pdev);
- err = request_firmware(&fw, fw_name, &pdev->dev);
+ err = firmware_request_nowarn(&fw, fw_name, &pdev->dev);
pci_dev_put(pdev);
if (err) {
- dev_err(&pdev->dev, "request_firmware failed: %d\n", err);
+ if (has_rom) {
+ dev_info(&pdev->dev, "failed to load firmware %s, fallback to ROM\n",
+ fw_name);
+ return 0;
+ }
+ dev_err(&pdev->dev, "failed to load firmware %s: %d\n",
+ fw_name, err);
return err;
}
diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 8a521b5ea769..2db917eab799 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -851,7 +851,6 @@ static struct usb_serial_driver ch341_device = {
.owner = THIS_MODULE,
.name = "ch341-uart",
},
- .bulk_in_size = 512,
.id_table = id_table,
.num_ports = 1,
.open = ch341_open,
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 039450069ca4..29c765cc8495 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -2074,6 +2074,8 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(4) | RSVD(5) },
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */
.driver_info = RSVD(6) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */
{ USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */
{ USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index b9bb63d749ec..5d05de666597 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -341,6 +341,7 @@ struct tcpm_port {
bool vbus_source;
bool vbus_charge;
+ /* Set to true when Discover_Identity Command is expected to be sent in Ready states. */
bool send_discover;
bool op_vsafe5v;
@@ -370,6 +371,7 @@ struct tcpm_port {
struct hrtimer send_discover_timer;
struct kthread_work send_discover_work;
bool state_machine_running;
+ /* Set to true when VDM State Machine has following actions. */
bool vdm_sm_running;
struct completion tx_complete;
@@ -1431,6 +1433,7 @@ static void tcpm_queue_vdm(struct tcpm_port *port, const u32 header,
/* Set ready, vdm state machine will actually send */
port->vdm_retries = 0;
port->vdm_state = VDM_STATE_READY;
+ port->vdm_sm_running = true;
mod_vdm_delayed_work(port, 0);
}
@@ -1673,7 +1676,6 @@ static int tcpm_pd_svdm(struct tcpm_port *port, struct typec_altmode *adev,
rlen = 1;
} else {
tcpm_register_partner_altmodes(port);
- port->vdm_sm_running = false;
}
break;
case CMD_ENTER_MODE:
@@ -1721,14 +1723,12 @@ static int tcpm_pd_svdm(struct tcpm_port *port, struct typec_altmode *adev,
(VDO_SVDM_VERS(svdm_version));
break;
}
- port->vdm_sm_running = false;
break;
default:
response[0] = p[0] | VDO_CMDT(CMDT_RSP_NAK);
rlen = 1;
response[0] = (response[0] & ~VDO_SVDM_VERS_MASK) |
(VDO_SVDM_VERS(svdm_version));
- port->vdm_sm_running = false;
break;
}
@@ -1737,6 +1737,10 @@ static int tcpm_pd_svdm(struct tcpm_port *port, struct typec_altmode *adev,
return rlen;
}
+static void tcpm_pd_handle_msg(struct tcpm_port *port,
+ enum pd_msg_request message,
+ enum tcpm_ams ams);
+
static void tcpm_handle_vdm_request(struct tcpm_port *port,
const __le32 *payload, int cnt)
{
@@ -1764,11 +1768,25 @@ static void tcpm_handle_vdm_request(struct tcpm_port *port,
port->vdm_state = VDM_STATE_DONE;
}
- if (PD_VDO_SVDM(p[0])) {
+ if (PD_VDO_SVDM(p[0]) && (adev || tcpm_vdm_ams(port) || port->nr_snk_vdo)) {
+ /*
+ * Here a SVDM is received (INIT or RSP or unknown). Set the vdm_sm_running in
+ * advance because we are dropping the lock but may send VDMs soon.
+ * For the cases of INIT received:
+ * - If no response to send, it will be cleared later in this function.
+ * - If there are responses to send, it will be cleared in the state machine.
+ * For the cases of RSP received:
+ * - If no further INIT to send, it will be cleared later in this function.
+ * - Otherwise, it will be cleared in the state machine if timeout or it will go
+ * back here until no further INIT to send.
+ * For the cases of unknown type received:
+ * - We will send NAK and the flag will be cleared in the state machine.
+ */
+ port->vdm_sm_running = true;
rlen = tcpm_pd_svdm(port, adev, p, cnt, response, &adev_action);
} else {
if (port->negotiated_rev >= PD_REV30)
- tcpm_queue_message(port, PD_MSG_CTRL_NOT_SUPP);
+ tcpm_pd_handle_msg(port, PD_MSG_CTRL_NOT_SUPP, NONE_AMS);
}
/*
@@ -1833,6 +1851,8 @@ static void tcpm_handle_vdm_request(struct tcpm_port *port,
if (rlen > 0)
tcpm_queue_vdm(port, response[0], &response[1], rlen - 1);
+ else
+ port->vdm_sm_running = false;
}
static void tcpm_send_vdm(struct tcpm_port *port, u32 vid, int cmd,
@@ -1898,8 +1918,10 @@ static void vdm_run_state_machine(struct tcpm_port *port)
* if there's traffic or we're not in PDO ready state don't send
* a VDM.
*/
- if (port->state != SRC_READY && port->state != SNK_READY)
+ if (port->state != SRC_READY && port->state != SNK_READY) {
+ port->vdm_sm_running = false;
break;
+ }
/* TODO: AMS operation for Unstructured VDM */
if (PD_VDO_SVDM(vdo_hdr) && PD_VDO_CMDT(vdo_hdr) == CMDT_INIT) {
@@ -2471,10 +2493,7 @@ static void tcpm_pd_data_request(struct tcpm_port *port,
NONE_AMS);
break;
case PD_DATA_VENDOR_DEF:
- if (tcpm_vdm_ams(port) || port->nr_snk_vdo)
- tcpm_handle_vdm_request(port, msg->payload, cnt);
- else if (port->negotiated_rev > PD_REV20)
- tcpm_pd_handle_msg(port, PD_MSG_CTRL_NOT_SUPP, NONE_AMS);
+ tcpm_handle_vdm_request(port, msg->payload, cnt);
break;
case PD_DATA_BIST:
port->bist_request = le32_to_cpu(msg->payload[0]);
@@ -2555,10 +2574,6 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
TYPEC_PWR_MODE_PD,
port->pps_data.active,
port->supply_voltage);
- /* Set VDM running flag ASAP */
- if (port->data_role == TYPEC_HOST &&
- port->send_discover)
- port->vdm_sm_running = true;
tcpm_set_state(port, SNK_READY, 0);
} else {
/*
@@ -2596,14 +2611,10 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
switch (port->state) {
case SNK_NEGOTIATE_CAPABILITIES:
/* USB PD specification, Figure 8-43 */
- if (port->explicit_contract) {
+ if (port->explicit_contract)
next_state = SNK_READY;
- if (port->data_role == TYPEC_HOST &&
- port->send_discover)
- port->vdm_sm_running = true;
- } else {
+ else
next_state = SNK_WAIT_CAPABILITIES;
- }
/* Threshold was relaxed before sending Request. Restore it back. */
tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_PD,
@@ -2618,10 +2629,6 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
port->pps_status = (type == PD_CTRL_WAIT ?
-EAGAIN : -EOPNOTSUPP);
- if (port->data_role == TYPEC_HOST &&
- port->send_discover)
- port->vdm_sm_running = true;
-
/* Threshold was relaxed before sending Request. Restore it back. */
tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_PD,
port->pps_data.active,
@@ -2697,10 +2704,6 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
}
break;
case DR_SWAP_SEND:
- if (port->data_role == TYPEC_DEVICE &&
- port->send_discover)
- port->vdm_sm_running = true;
-
tcpm_set_state(port, DR_SWAP_CHANGE_DR, 0);
break;
case PR_SWAP_SEND:
@@ -2738,7 +2741,7 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
PD_MSG_CTRL_NOT_SUPP,
NONE_AMS);
} else {
- if (port->vdm_sm_running) {
+ if (port->send_discover) {
tcpm_queue_message(port, PD_MSG_CTRL_WAIT);
break;
}
@@ -2754,7 +2757,7 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
PD_MSG_CTRL_NOT_SUPP,
NONE_AMS);
} else {
- if (port->vdm_sm_running) {
+ if (port->send_discover) {
tcpm_queue_message(port, PD_MSG_CTRL_WAIT);
break;
}
@@ -2763,7 +2766,7 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
}
break;
case PD_CTRL_VCONN_SWAP:
- if (port->vdm_sm_running) {
+ if (port->send_discover) {
tcpm_queue_message(port, PD_MSG_CTRL_WAIT);
break;
}
@@ -4479,18 +4482,20 @@ static void run_state_machine(struct tcpm_port *port)
/* DR_Swap states */
case DR_SWAP_SEND:
tcpm_pd_send_control(port, PD_CTRL_DR_SWAP);
+ if (port->data_role == TYPEC_DEVICE || port->negotiated_rev > PD_REV20)
+ port->send_discover = true;
tcpm_set_state_cond(port, DR_SWAP_SEND_TIMEOUT,
PD_T_SENDER_RESPONSE);
break;
case DR_SWAP_ACCEPT:
tcpm_pd_send_control(port, PD_CTRL_ACCEPT);
- /* Set VDM state machine running flag ASAP */
- if (port->data_role == TYPEC_DEVICE && port->send_discover)
- port->vdm_sm_running = true;
+ if (port->data_role == TYPEC_DEVICE || port->negotiated_rev > PD_REV20)
+ port->send_discover = true;
tcpm_set_state_cond(port, DR_SWAP_CHANGE_DR, 0);
break;
case DR_SWAP_SEND_TIMEOUT:
tcpm_swap_complete(port, -ETIMEDOUT);
+ port->send_discover = false;
tcpm_ams_finish(port);
tcpm_set_state(port, ready_state(port), 0);
break;
@@ -4502,7 +4507,6 @@ static void run_state_machine(struct tcpm_port *port)
} else {
tcpm_set_roles(port, true, port->pwr_role,
TYPEC_HOST);
- port->send_discover = true;
}
tcpm_ams_finish(port);
tcpm_set_state(port, ready_state(port), 0);
@@ -4645,8 +4649,6 @@ static void run_state_machine(struct tcpm_port *port)
break;
case VCONN_SWAP_SEND_TIMEOUT:
tcpm_swap_complete(port, -ETIMEDOUT);
- if (port->data_role == TYPEC_HOST && port->send_discover)
- port->vdm_sm_running = true;
tcpm_set_state(port, ready_state(port), 0);
break;
case VCONN_SWAP_START:
@@ -4662,14 +4664,10 @@ static void run_state_machine(struct tcpm_port *port)
case VCONN_SWAP_TURN_ON_VCONN:
tcpm_set_vconn(port, true);
tcpm_pd_send_control(port, PD_CTRL_PS_RDY);
- if (port->data_role == TYPEC_HOST && port->send_discover)
- port->vdm_sm_running = true;
tcpm_set_state(port, ready_state(port), 0);
break;
case VCONN_SWAP_TURN_OFF_VCONN:
tcpm_set_vconn(port, false);
- if (port->data_role == TYPEC_HOST && port->send_discover)
- port->vdm_sm_running = true;
tcpm_set_state(port, ready_state(port), 0);
break;
@@ -4677,8 +4675,6 @@ static void run_state_machine(struct tcpm_port *port)
case PR_SWAP_CANCEL:
case VCONN_SWAP_CANCEL:
tcpm_swap_complete(port, port->swap_status);
- if (port->data_role == TYPEC_HOST && port->send_discover)
- port->vdm_sm_running = true;
if (port->pwr_role == TYPEC_SOURCE)
tcpm_set_state(port, SRC_READY, 0);
else
@@ -5028,9 +5024,6 @@ static void _tcpm_pd_vbus_on(struct tcpm_port *port)
switch (port->state) {
case SNK_TRANSITION_SINK_VBUS:
port->explicit_contract = true;
- /* Set the VDM flag ASAP */
- if (port->data_role == TYPEC_HOST && port->send_discover)
- port->vdm_sm_running = true;
tcpm_set_state(port, SNK_READY, 0);
break;
case SNK_DISCOVERY:
@@ -5425,15 +5418,18 @@ static void tcpm_send_discover_work(struct kthread_work *work)
if (!port->send_discover)
goto unlock;
+ if (port->data_role == TYPEC_DEVICE && port->negotiated_rev < PD_REV30) {
+ port->send_discover = false;
+ goto unlock;
+ }
+
/* Retry if the port is not idle */
if ((port->state != SRC_READY && port->state != SNK_READY) || port->vdm_sm_running) {
mod_send_discover_delayed_work(port, SEND_DISCOVER_RETRY_MS);
goto unlock;
}
- /* Only send the Message if the port is host for PD rev2.0 */
- if (port->data_role == TYPEC_HOST || port->negotiated_rev > PD_REV20)
- tcpm_send_vdm(port, USB_SID_PD, CMD_DISCOVER_IDENT, NULL, 0);
+ tcpm_send_vdm(port, USB_SID_PD, CMD_DISCOVER_IDENT, NULL, 0);
unlock:
mutex_unlock(&port->lock);
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index 21b78f1cd521..351c6cfb24c3 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -493,9 +493,9 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
dev, &ifc_vdpa_ops, NULL);
- if (adapter == NULL) {
+ if (IS_ERR(adapter)) {
IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
- return -ENOMEM;
+ return PTR_ERR(adapter);
}
pci_set_master(pdev);
diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
index dcee6039e966..e59135fa867e 100644
--- a/drivers/vdpa/mlx5/core/mr.c
+++ b/drivers/vdpa/mlx5/core/mr.c
@@ -512,11 +512,6 @@ out:
mutex_unlock(&mr->mkey_mtx);
}
-static bool map_empty(struct vhost_iotlb *iotlb)
-{
- return !vhost_iotlb_itree_first(iotlb, 0, U64_MAX);
-}
-
int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
bool *change_map)
{
@@ -524,10 +519,6 @@ int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *io
int err = 0;
*change_map = false;
- if (map_empty(iotlb)) {
- mlx5_vdpa_destroy_mr(mvdev);
- return 0;
- }
mutex_lock(&mr->mkey_mtx);
if (mr->initialized) {
mlx5_vdpa_info(mvdev, "memory map update\n");
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 2a31467f7ac5..5906cada2293 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -526,7 +526,6 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
void __iomem *uar_page = ndev->mvdev.res.uar->map;
u32 out[MLX5_ST_SZ_DW(create_cq_out)];
struct mlx5_vdpa_cq *vcq = &mvq->cq;
- unsigned int irqn;
__be64 *pas;
int inlen;
void *cqc;
@@ -566,14 +565,14 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
/* Use vector 0 by default. Consider adding code to choose least used
* vector.
*/
- err = mlx5_vector2eqn(mdev, 0, &eqn, &irqn);
+ err = mlx5_vector2eqn(mdev, 0, &eqn);
if (err)
goto err_vec;
cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
@@ -753,12 +752,12 @@ static int get_queue_type(struct mlx5_vdpa_net *ndev)
type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
/* prefer split queue */
- if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)
- return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
+ if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
+ return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
- WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT));
+ WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
- return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
+ return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
}
static bool vq_is_tx(u16 idx)
@@ -2030,6 +2029,12 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
return -ENOSPC;
mdev = mgtdev->madev->mdev;
+ if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
+ MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
+ dev_warn(mdev->device, "missing support for split virtqueues\n");
+ return -EOPNOTSUPP;
+ }
+
/* we save one virtqueue for control virtqueue should we require it */
max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 14e024de5cbf..c621cf7feec0 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -251,8 +251,10 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
dev_attr->name);
- if (!vdpasim)
+ if (IS_ERR(vdpasim)) {
+ ret = PTR_ERR(vdpasim);
goto err_alloc;
+ }
vdpasim->dev_attr = *dev_attr;
INIT_WORK(&vdpasim->work, dev_attr->work_fn);
diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
index 7b4a6396c553..fe0527329857 100644
--- a/drivers/vdpa/virtio_pci/vp_vdpa.c
+++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
@@ -436,9 +436,9 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa,
dev, &vp_vdpa_ops, NULL);
- if (vp_vdpa == NULL) {
+ if (IS_ERR(vp_vdpa)) {
dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n");
- return -ENOMEM;
+ return PTR_ERR(vp_vdpa);
}
mdev = &vp_vdpa->mdev;
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 6414bd5741b8..3a249ee7e144 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -643,8 +643,6 @@ static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len)
!vhost_vq_avail_empty(vq->dev, vq);
}
-#define SKB_FRAG_PAGE_ORDER get_order(32768)
-
static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz,
struct page_frag *pfrag, gfp_t gfp)
{
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 210ab35a7ebf..9479f7f79217 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -614,7 +614,8 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
long pinned;
int ret = 0;
- if (msg->iova < v->range.first ||
+ if (msg->iova < v->range.first || !msg->size ||
+ msg->iova > U64_MAX - msg->size + 1 ||
msg->iova + msg->size - 1 > v->range.last)
return -EINVAL;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index b9e853e6094d..59edb5a1ffe2 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -735,10 +735,16 @@ static bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
(sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8);
}
+/* Make sure 64 bit math will not overflow. */
static bool vhost_overflow(u64 uaddr, u64 size)
{
- /* Make sure 64 bit math will not overflow. */
- return uaddr > ULONG_MAX || size > ULONG_MAX || uaddr > ULONG_MAX - size;
+ if (uaddr > ULONG_MAX || size > ULONG_MAX)
+ return true;
+
+ if (!size)
+ return false;
+
+ return uaddr > ULONG_MAX - size + 1;
}
/* Caller should have vq mutex and device mutex. */
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 4af8fa259d65..14e2043d7685 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -359,7 +359,7 @@ __vringh_iov(struct vringh *vrh, u16 i,
iov = wiov;
else {
iov = riov;
- if (unlikely(wiov && wiov->i)) {
+ if (unlikely(wiov && wiov->used)) {
vringh_bad("Readable desc %p after writable",
&descs[i]);
err = -EINVAL;
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 2a6055c0d4d3..1ea0c1f6a1fd 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -354,6 +354,7 @@ int register_virtio_device(struct virtio_device *dev)
virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
INIT_LIST_HEAD(&dev->vqs);
+ spin_lock_init(&dev->vqs_list_lock);
/*
* device_add() causes the bus infrastructure to look for a matching
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 09ed55de07d7..b91bc810a87e 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -1242,12 +1242,19 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
do_online = virtio_mem_bbm_get_bb_state(vm, id) !=
VIRTIO_MEM_BBM_BB_FAKE_OFFLINE;
}
+
+ /*
+ * virtio_mem_set_fake_offline() might sleep, we don't need
+ * the device anymore. See virtio_mem_remove() how races
+ * between memory onlining and device removal are handled.
+ */
+ rcu_read_unlock();
+
if (do_online)
generic_online_page(page, order);
else
virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order,
false);
- rcu_read_unlock();
return;
}
rcu_read_unlock();
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 222d630c41fc..b35bb2d57f62 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -576,6 +576,13 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
struct device *dev = get_device(&vp_dev->vdev.dev);
+ /*
+ * Device is marked broken on surprise removal so that virtio upper
+ * layers can abort any ongoing operation.
+ */
+ if (!pci_device_is_present(pci_dev))
+ virtio_break_device(&vp_dev->vdev);
+
pci_disable_sriov(pci_dev);
unregister_virtio_device(&vp_dev->vdev);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 89bfe46a8a7f..dd95dfd85e98 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/hrtimer.h>
#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
#include <xen/xen.h>
#ifdef DEBUG
@@ -1755,7 +1756,9 @@ static struct virtqueue *vring_create_virtqueue_packed(
cpu_to_le16(vq->packed.event_flags_shadow);
}
+ spin_lock(&vdev->vqs_list_lock);
list_add_tail(&vq->vq.list, &vdev->vqs);
+ spin_unlock(&vdev->vqs_list_lock);
return &vq->vq;
err_desc_extra:
@@ -2229,7 +2232,9 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
memset(vq->split.desc_state, 0, vring.num *
sizeof(struct vring_desc_state_split));
+ spin_lock(&vdev->vqs_list_lock);
list_add_tail(&vq->vq.list, &vdev->vqs);
+ spin_unlock(&vdev->vqs_list_lock);
return &vq->vq;
err_extra:
@@ -2291,6 +2296,10 @@ void vring_del_virtqueue(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
+ spin_lock(&vq->vq.vdev->vqs_list_lock);
+ list_del(&_vq->list);
+ spin_unlock(&vq->vq.vdev->vqs_list_lock);
+
if (vq->we_own_ring) {
if (vq->packed_ring) {
vring_free_queue(vq->vq.vdev,
@@ -2321,7 +2330,6 @@ void vring_del_virtqueue(struct virtqueue *_vq)
kfree(vq->split.desc_state);
kfree(vq->split.desc_extra);
}
- list_del(&_vq->list);
kfree(vq);
}
EXPORT_SYMBOL_GPL(vring_del_virtqueue);
@@ -2373,7 +2381,7 @@ bool virtqueue_is_broken(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
- return vq->broken;
+ return READ_ONCE(vq->broken);
}
EXPORT_SYMBOL_GPL(virtqueue_is_broken);
@@ -2385,10 +2393,14 @@ void virtio_break_device(struct virtio_device *dev)
{
struct virtqueue *_vq;
+ spin_lock(&dev->vqs_list_lock);
list_for_each_entry(_vq, &dev->vqs, list) {
struct vring_virtqueue *vq = to_vvq(_vq);
- vq->broken = true;
+
+ /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
+ WRITE_ONCE(vq->broken, true);
}
+ spin_unlock(&dev->vqs_list_lock);
}
EXPORT_SYMBOL_GPL(virtio_break_device);
diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
index e1a141135992..72eaef2caeb1 100644
--- a/drivers/virtio/virtio_vdpa.c
+++ b/drivers/virtio/virtio_vdpa.c
@@ -151,6 +151,9 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
if (!name)
return NULL;
+ if (index >= vdpa->nvqs)
+ return ERR_PTR(-ENOENT);
+
/* Queue shouldn't already be set up. */
if (ops->get_vq_ready(vdpa, index))
return ERR_PTR(-ENOENT);
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index d7e361fb0548..a78704ae3618 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -198,12 +198,12 @@ static void disable_dynirq(struct irq_data *data);
static DEFINE_PER_CPU(unsigned int, irq_epoch);
-static void clear_evtchn_to_irq_row(unsigned row)
+static void clear_evtchn_to_irq_row(int *evtchn_row)
{
unsigned col;
for (col = 0; col < EVTCHN_PER_ROW; col++)
- WRITE_ONCE(evtchn_to_irq[row][col], -1);
+ WRITE_ONCE(evtchn_row[col], -1);
}
static void clear_evtchn_to_irq_all(void)
@@ -213,7 +213,7 @@ static void clear_evtchn_to_irq_all(void)
for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
if (evtchn_to_irq[row] == NULL)
continue;
- clear_evtchn_to_irq_row(row);
+ clear_evtchn_to_irq_row(evtchn_to_irq[row]);
}
}
@@ -221,6 +221,7 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
{
unsigned row;
unsigned col;
+ int *evtchn_row;
if (evtchn >= xen_evtchn_max_channels())
return -EINVAL;
@@ -233,11 +234,18 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
if (irq == -1)
return 0;
- evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL);
- if (evtchn_to_irq[row] == NULL)
+ evtchn_row = (int *) __get_free_pages(GFP_KERNEL, 0);
+ if (evtchn_row == NULL)
return -ENOMEM;
- clear_evtchn_to_irq_row(row);
+ clear_evtchn_to_irq_row(evtchn_row);
+
+ /*
+ * We've prepared an empty row for the mapping. If a different
+ * thread was faster inserting it, we can drop ours.
+ */
+ if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL)
+ free_page((unsigned long) evtchn_row);
}
WRITE_ONCE(evtchn_to_irq[row][col], irq);
@@ -1009,7 +1017,7 @@ static void __unbind_from_irq(unsigned int irq)
int xen_bind_pirq_gsi_to_irq(unsigned gsi,
unsigned pirq, int shareable, char *name)
{
- int irq = -1;
+ int irq;
struct physdev_irq irq_op;
int ret;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 59c32c9b799f..aab5e6538660 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -121,10 +121,6 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
p9_debug(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
- /* No mandatory locks */
- if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
- return -ENOLCK;
-
if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
filemap_write_and_wait(inode->i_mapping);
invalidate_mapping_pages(&inode->i_data, 0, -1);
@@ -312,10 +308,6 @@ static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %pD\n",
filp, cmd, fl, filp);
- /* No mandatory locks */
- if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
- goto out_err;
-
if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
filemap_write_and_wait(inode->i_mapping);
invalidate_mapping_pages(&inode->i_data, 0, -1);
@@ -327,7 +319,6 @@ static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
ret = v9fs_file_getlock(filp, fl);
else
ret = -EINVAL;
-out_err:
return ret;
}
@@ -348,10 +339,6 @@ static int v9fs_file_flock_dotl(struct file *filp, int cmd,
p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %pD\n",
filp, cmd, fl, filp);
- /* No mandatory locks */
- if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
- goto out_err;
-
if (!(fl->fl_flags & FL_FLOCK))
goto out_err;
@@ -625,12 +612,7 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma);
inode = file_inode(vma->vm_file);
-
- if (!mapping_can_writeback(inode->i_mapping))
- wbc.nr_to_write = 0;
-
- might_sleep();
- sync_inode(inode, &wbc);
+ filemap_fdatawrite_wbc(inode->i_mapping, &wbc);
}
diff --git a/fs/Kconfig b/fs/Kconfig
index a7749c126b8e..b11bd4b387e1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -101,16 +101,6 @@ config FILE_LOCKING
for filesystems like NFS and for the flock() system
call. Disabling this option saves about 11k.
-config MANDATORY_FILE_LOCKING
- bool "Enable Mandatory file locking"
- depends on FILE_LOCKING
- default y
- help
- This option enables files appropriately marked files on appropriely
- mounted filesystems to support mandatory locking.
-
- To the best of my knowledge this is dead code that no one cares about.
-
source "fs/crypto/Kconfig"
source "fs/verity/Kconfig"
@@ -358,7 +348,15 @@ config NFS_V4_2_SSC_HELPER
source "net/sunrpc/Kconfig"
source "fs/ceph/Kconfig"
+
source "fs/cifs/Kconfig"
+source "fs/ksmbd/Kconfig"
+
+config CIFS_COMMON
+ tristate
+ default y if CIFS=y
+ default m if CIFS=m
+
source "fs/coda/Kconfig"
source "fs/afs/Kconfig"
source "fs/9p/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index f98f3e691c37..354e2ba3ee67 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -96,7 +96,9 @@ obj-$(CONFIG_LOCKD) += lockd/
obj-$(CONFIG_NLS) += nls/
obj-$(CONFIG_UNICODE) += unicode/
obj-$(CONFIG_SYSV_FS) += sysv/
+obj-$(CONFIG_CIFS_COMMON) += cifs_common/
obj-$(CONFIG_CIFS) += cifs/
+obj-$(CONFIG_SMB_SERVER) += ksmbd/
obj-$(CONFIG_HPFS_FS) += hpfs/
obj-$(CONFIG_NTFS_FS) += ntfs/
obj-$(CONFIG_UFS_FS) += ufs/
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index cb3054c7843e..c4210a3964d8 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -772,10 +772,6 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
fl->fl_type, fl->fl_flags,
(long long) fl->fl_start, (long long) fl->fl_end);
- /* AFS doesn't support mandatory locks */
- if (__mandatory_lock(&vnode->vfs_inode) && fl->fl_type != F_UNLCK)
- return -ENOLCK;
-
if (IS_GETLK(cmd))
return afs_do_getlk(file, fl);
diff --git a/fs/aio.c b/fs/aio.c
index 76ce0cc3ee4e..51b08ab01dff 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1695,7 +1695,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
list_del(&iocb->ki_list);
iocb->ki_res.res = mangle_poll(mask);
req->done = true;
- if (iocb->ki_eventfd && eventfd_signal_count()) {
+ if (iocb->ki_eventfd && eventfd_signal_allowed()) {
iocb = NULL;
INIT_WORK(&req->work, aio_poll_put_work);
schedule_work(&req->work);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9ef4f1fc2cb0..45df6cbccf12 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -35,6 +35,7 @@
#include <linux/uaccess.h>
#include <linux/suspend.h>
#include "internal.h"
+#include "../block/blk.h"
struct bdev_inode {
struct block_device bdev;
@@ -385,7 +386,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
(bdev_logical_block_size(bdev) - 1))
return -EINVAL;
- bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
+ bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
dio = container_of(bio, struct blkdev_dio, bio);
dio->is_sync = is_sync = is_sync_kiocb(iocb);
@@ -513,7 +514,9 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
static __init int blkdev_init(void)
{
- return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
+ return bioset_init(&blkdev_dio_pool, 4,
+ offsetof(struct blkdev_dio, bio),
+ BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE);
}
module_init(blkdev_init);
@@ -686,7 +689,8 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence)
return retval;
}
-int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
+static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
struct inode *bd_inode = bdev_file_inode(filp);
struct block_device *bdev = I_BDEV(bd_inode);
@@ -707,7 +711,6 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
return error;
}
-EXPORT_SYMBOL(blkdev_fsync);
/**
* bdev_read_page() - Start reading a page from a block device
@@ -801,7 +804,6 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
if (!ei)
return NULL;
memset(&ei->bdev, 0, sizeof(ei->bdev));
- ei->bdev.bd_bdi = &noop_backing_dev_info;
return &ei->vfs_inode;
}
@@ -812,8 +814,15 @@ static void bdev_free_inode(struct inode *inode)
free_percpu(bdev->bd_stats);
kfree(bdev->bd_meta_info);
- if (!bdev_is_partition(bdev))
+ if (!bdev_is_partition(bdev)) {
+ if (bdev->bd_disk && bdev->bd_disk->bdi)
+ bdi_put(bdev->bd_disk->bdi);
kfree(bdev->bd_disk);
+ }
+
+ if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR)
+ blk_free_ext_minor(MINOR(bdev->bd_dev));
+
kmem_cache_free(bdev_cachep, BDEV_I(inode));
}
@@ -826,16 +835,9 @@ static void init_once(void *data)
static void bdev_evict_inode(struct inode *inode)
{
- struct block_device *bdev = &BDEV_I(inode)->bdev;
truncate_inode_pages_final(&inode->i_data);
invalidate_inode_buffers(inode); /* is it needed here? */
clear_inode(inode);
- /* Detach inode from wb early as bdi_put() may free bdi->wb */
- inode_detach_wb(inode);
- if (bdev->bd_bdi != &noop_backing_dev_info) {
- bdi_put(bdev->bd_bdi);
- bdev->bd_bdi = &noop_backing_dev_info;
- }
}
static const struct super_operations bdev_sops = {
@@ -902,9 +904,6 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
bdev->bd_disk = disk;
bdev->bd_partno = partno;
bdev->bd_inode = inode;
-#ifdef CONFIG_SYSFS
- INIT_LIST_HEAD(&bdev->bd_holder_disks);
-#endif
bdev->bd_stats = alloc_percpu(struct disk_stats);
if (!bdev->bd_stats) {
iput(inode);
@@ -921,31 +920,6 @@ void bdev_add(struct block_device *bdev, dev_t dev)
insert_inode_hash(bdev->bd_inode);
}
-static struct block_device *bdget(dev_t dev)
-{
- struct inode *inode;
-
- inode = ilookup(blockdev_superblock, dev);
- if (!inode)
- return NULL;
- return &BDEV_I(inode)->bdev;
-}
-
-/**
- * bdgrab -- Grab a reference to an already referenced block device
- * @bdev: Block device to grab a reference to.
- *
- * Returns the block_device with an additional reference when successful,
- * or NULL if the inode is already beeing freed.
- */
-struct block_device *bdgrab(struct block_device *bdev)
-{
- if (!igrab(bdev->bd_inode))
- return NULL;
- return bdev;
-}
-EXPORT_SYMBOL(bdgrab);
-
long nr_blockdev_pages(void)
{
struct inode *inode;
@@ -959,12 +933,6 @@ long nr_blockdev_pages(void)
return ret;
}
-void bdput(struct block_device *bdev)
-{
- iput(bdev->bd_inode);
-}
-EXPORT_SYMBOL(bdput);
-
/**
* bd_may_claim - test whether a block device can be claimed
* @bdev: block device of interest
@@ -1094,148 +1062,6 @@ void bd_abort_claiming(struct block_device *bdev, void *holder)
}
EXPORT_SYMBOL(bd_abort_claiming);
-#ifdef CONFIG_SYSFS
-struct bd_holder_disk {
- struct list_head list;
- struct gendisk *disk;
- int refcnt;
-};
-
-static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
- struct gendisk *disk)
-{
- struct bd_holder_disk *holder;
-
- list_for_each_entry(holder, &bdev->bd_holder_disks, list)
- if (holder->disk == disk)
- return holder;
- return NULL;
-}
-
-static int add_symlink(struct kobject *from, struct kobject *to)
-{
- return sysfs_create_link(from, to, kobject_name(to));
-}
-
-static void del_symlink(struct kobject *from, struct kobject *to)
-{
- sysfs_remove_link(from, kobject_name(to));
-}
-
-/**
- * bd_link_disk_holder - create symlinks between holding disk and slave bdev
- * @bdev: the claimed slave bdev
- * @disk: the holding disk
- *
- * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
- *
- * This functions creates the following sysfs symlinks.
- *
- * - from "slaves" directory of the holder @disk to the claimed @bdev
- * - from "holders" directory of the @bdev to the holder @disk
- *
- * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is
- * passed to bd_link_disk_holder(), then:
- *
- * /sys/block/dm-0/slaves/sda --> /sys/block/sda
- * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
- *
- * The caller must have claimed @bdev before calling this function and
- * ensure that both @bdev and @disk are valid during the creation and
- * lifetime of these symlinks.
- *
- * CONTEXT:
- * Might sleep.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
-{
- struct bd_holder_disk *holder;
- int ret = 0;
-
- mutex_lock(&bdev->bd_disk->open_mutex);
-
- WARN_ON_ONCE(!bdev->bd_holder);
-
- /* FIXME: remove the following once add_disk() handles errors */
- if (WARN_ON(!disk->slave_dir || !bdev->bd_holder_dir))
- goto out_unlock;
-
- holder = bd_find_holder_disk(bdev, disk);
- if (holder) {
- holder->refcnt++;
- goto out_unlock;
- }
-
- holder = kzalloc(sizeof(*holder), GFP_KERNEL);
- if (!holder) {
- ret = -ENOMEM;
- goto out_unlock;
- }
-
- INIT_LIST_HEAD(&holder->list);
- holder->disk = disk;
- holder->refcnt = 1;
-
- ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
- if (ret)
- goto out_free;
-
- ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
- if (ret)
- goto out_del;
- /*
- * bdev could be deleted beneath us which would implicitly destroy
- * the holder directory. Hold on to it.
- */
- kobject_get(bdev->bd_holder_dir);
-
- list_add(&holder->list, &bdev->bd_holder_disks);
- goto out_unlock;
-
-out_del:
- del_symlink(disk->slave_dir, bdev_kobj(bdev));
-out_free:
- kfree(holder);
-out_unlock:
- mutex_unlock(&bdev->bd_disk->open_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(bd_link_disk_holder);
-
-/**
- * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder()
- * @bdev: the calimed slave bdev
- * @disk: the holding disk
- *
- * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
- *
- * CONTEXT:
- * Might sleep.
- */
-void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
-{
- struct bd_holder_disk *holder;
-
- mutex_lock(&bdev->bd_disk->open_mutex);
-
- holder = bd_find_holder_disk(bdev, disk);
-
- if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
- del_symlink(disk->slave_dir, bdev_kobj(bdev));
- del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
- kobject_put(bdev->bd_holder_dir);
- list_del_init(&holder->list);
- kfree(holder);
- }
-
- mutex_unlock(&bdev->bd_disk->open_mutex);
-}
-EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
-#endif
-
static void blkdev_flush_mapping(struct block_device *bdev)
{
WARN_ON_ONCE(bdev->bd_holders);
@@ -1260,11 +1086,8 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
}
}
- if (!bdev->bd_openers) {
+ if (!bdev->bd_openers)
set_init_blocksize(bdev);
- if (bdev->bd_bdi == &noop_backing_dev_info)
- bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
- }
if (test_bit(GD_NEED_PART_SCAN, &disk->state))
bdev_disk_changed(disk, false);
bdev->bd_openers++;
@@ -1282,16 +1105,14 @@ static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
static int blkdev_get_part(struct block_device *part, fmode_t mode)
{
struct gendisk *disk = part->bd_disk;
- struct block_device *whole;
int ret;
if (part->bd_openers)
goto done;
- whole = bdgrab(disk->part0);
- ret = blkdev_get_whole(whole, mode);
+ ret = blkdev_get_whole(bdev_whole(part), mode);
if (ret)
- goto out_put_whole;
+ return ret;
ret = -ENXIO;
if (!bdev_nr_sectors(part))
@@ -1299,16 +1120,12 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
disk->open_partitions++;
set_init_blocksize(part);
- if (part->bd_bdi == &noop_backing_dev_info)
- part->bd_bdi = bdi_get(disk->queue->backing_dev_info);
done:
part->bd_openers++;
return 0;
out_blkdev_put:
- blkdev_put_whole(whole, mode);
-out_put_whole:
- bdput(whole);
+ blkdev_put_whole(bdev_whole(part), mode);
return ret;
}
@@ -1321,42 +1138,42 @@ static void blkdev_put_part(struct block_device *part, fmode_t mode)
blkdev_flush_mapping(part);
whole->bd_disk->open_partitions--;
blkdev_put_whole(whole, mode);
- bdput(whole);
}
struct block_device *blkdev_get_no_open(dev_t dev)
{
struct block_device *bdev;
- struct gendisk *disk;
+ struct inode *inode;
- bdev = bdget(dev);
- if (!bdev) {
+ inode = ilookup(blockdev_superblock, dev);
+ if (!inode) {
blk_request_module(dev);
- bdev = bdget(dev);
- if (!bdev)
+ inode = ilookup(blockdev_superblock, dev);
+ if (!inode)
return NULL;
}
- disk = bdev->bd_disk;
- if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj))
- goto bdput;
- if ((disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
- goto put_disk;
- if (!try_module_get(bdev->bd_disk->fops->owner))
- goto put_disk;
+ /* switch from the inode reference to a device mode one: */
+ bdev = &BDEV_I(inode)->bdev;
+ if (!kobject_get_unless_zero(&bdev->bd_device.kobj))
+ bdev = NULL;
+ iput(inode);
+
+ if (!bdev)
+ return NULL;
+ if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN) ||
+ !try_module_get(bdev->bd_disk->fops->owner)) {
+ put_device(&bdev->bd_device);
+ return NULL;
+ }
+
return bdev;
-put_disk:
- put_disk(disk);
-bdput:
- bdput(bdev);
- return NULL;
}
void blkdev_put_no_open(struct block_device *bdev)
{
module_put(bdev->bd_disk->fops->owner);
- put_disk(bdev->bd_disk);
- bdput(bdev);
+ put_device(&bdev->bd_device);
}
/**
@@ -1409,7 +1226,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
mutex_lock(&disk->open_mutex);
ret = -ENXIO;
- if (!(disk->flags & GENHD_FL_UP))
+ if (!disk_live(disk))
goto abort_claiming;
if (bdev_is_partition(bdev))
ret = blkdev_get_part(bdev, mode);
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index cec88a66bd6c..3dcf9bcc2326 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -36,6 +36,7 @@ btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
btrfs-$(CONFIG_BLK_DEV_ZONED) += zoned.o
+btrfs-$(CONFIG_FS_VERITY) += verity.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index d95eb5c8cb37..c9f9789e828f 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -53,7 +53,8 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
}
static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
- struct inode *inode, struct posix_acl *acl, int type)
+ struct user_namespace *mnt_userns,
+ struct inode *inode, struct posix_acl *acl, int type)
{
int ret, size = 0;
const char *name;
@@ -114,12 +115,12 @@ int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
umode_t old_mode = inode->i_mode;
if (type == ACL_TYPE_ACCESS && acl) {
- ret = posix_acl_update_mode(&init_user_ns, inode,
+ ret = posix_acl_update_mode(mnt_userns, inode,
&inode->i_mode, &acl);
if (ret)
return ret;
}
- ret = __btrfs_set_acl(NULL, inode, acl, type);
+ ret = __btrfs_set_acl(NULL, mnt_userns, inode, acl, type);
if (ret)
inode->i_mode = old_mode;
return ret;
@@ -140,14 +141,14 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
return ret;
if (default_acl) {
- ret = __btrfs_set_acl(trans, inode, default_acl,
+ ret = __btrfs_set_acl(trans, &init_user_ns, inode, default_acl,
ACL_TYPE_DEFAULT);
posix_acl_release(default_acl);
}
if (acl) {
if (!ret)
- ret = __btrfs_set_acl(trans, inode, acl,
+ ret = __btrfs_set_acl(trans, &init_user_ns, inode, acl,
ACL_TYPE_ACCESS);
posix_acl_release(acl);
}
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 78b202d198b8..f735b8798ba1 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1211,7 +1211,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
again:
head = NULL;
- ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
if (ret < 0)
goto out;
BUG_ON(ret == 0);
@@ -1488,14 +1488,14 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots,
- bool ignore_offset, bool skip_commit_root_sem)
+ bool skip_commit_root_sem)
{
int ret;
if (!trans && !skip_commit_root_sem)
down_read(&fs_info->commit_root_sem);
ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
- time_seq, roots, ignore_offset);
+ time_seq, roots, false);
if (!trans && !skip_commit_root_sem)
up_read(&fs_info->commit_root_sem);
return ret;
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index ff5f07f9940b..ba454032dbe2 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -47,7 +47,7 @@ int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
const u64 *extent_item_pos, bool ignore_offset);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 time_seq, struct ulist **roots, bool ignore_offset,
+ u64 time_seq, struct ulist **roots,
bool skip_commit_root_sem);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off,
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 9e7d9d0c763d..a3b830b8410a 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1561,7 +1561,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
div64_u64(zone_unusable * 100, bg->length));
trace_btrfs_reclaim_block_group(bg);
ret = btrfs_relocate_chunk(fs_info, bg->start);
- if (ret)
+ if (ret && ret != -EAGAIN)
btrfs_err(fs_info, "error relocating chunk %llu",
bg->start);
@@ -2105,11 +2105,22 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
bg->used = em->len;
bg->flags = map->type;
ret = btrfs_add_block_group_cache(fs_info, bg);
+ /*
+ * We may have some valid block group cache added already, in
+ * that case we skip to the next one.
+ */
+ if (ret == -EEXIST) {
+ ret = 0;
+ btrfs_put_block_group(bg);
+ continue;
+ }
+
if (ret) {
btrfs_remove_free_space_cache(bg);
btrfs_put_block_group(bg);
break;
}
+
btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
0, 0, &space_info);
bg->space_info = space_info;
@@ -2212,6 +2223,14 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
ret = check_chunk_block_group_mappings(info);
error:
btrfs_free_path(path);
+ /*
+ * We've hit some error while reading the extent tree, and have
+ * rescue=ibadroots mount option.
+ * Try to fill the tree using dummy block groups so that the user can
+ * continue to mount and grab their data.
+ */
+ if (ret && btrfs_test_opt(info, IGNOREBADROOTS))
+ ret = fill_dummy_bgs(info);
return ret;
}
@@ -2244,6 +2263,95 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans,
return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi));
}
+static int insert_dev_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device, u64 chunk_offset,
+ u64 start, u64 num_bytes)
+{
+ struct btrfs_fs_info *fs_info = device->fs_info;
+ struct btrfs_root *root = fs_info->dev_root;
+ struct btrfs_path *path;
+ struct btrfs_dev_extent *extent;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ int ret;
+
+ WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
+ WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = device->devid;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+ key.offset = start;
+ ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*extent));
+ if (ret)
+ goto out;
+
+ leaf = path->nodes[0];
+ extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent);
+ btrfs_set_dev_extent_chunk_tree(leaf, extent, BTRFS_CHUNK_TREE_OBJECTID);
+ btrfs_set_dev_extent_chunk_objectid(leaf, extent,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
+
+ btrfs_set_dev_extent_length(leaf, extent, num_bytes);
+ btrfs_mark_buffer_dirty(leaf);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * This function belongs to phase 2.
+ *
+ * See the comment at btrfs_chunk_alloc() for details about the chunk allocation
+ * phases.
+ */
+static int insert_dev_extents(struct btrfs_trans_handle *trans,
+ u64 chunk_offset, u64 chunk_size)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_device *device;
+ struct extent_map *em;
+ struct map_lookup *map;
+ u64 dev_offset;
+ u64 stripe_size;
+ int i;
+ int ret = 0;
+
+ em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
+ if (IS_ERR(em))
+ return PTR_ERR(em);
+
+ map = em->map_lookup;
+ stripe_size = em->orig_block_len;
+
+ /*
+ * Take the device list mutex to prevent races with the final phase of
+ * a device replace operation that replaces the device object associated
+ * with the map's stripes, because the device object's id can change
+ * at any time during that final phase of the device replace operation
+ * (dev-replace.c:btrfs_dev_replace_finishing()), so we could grab the
+ * replaced device and then see it with an ID of BTRFS_DEV_REPLACE_DEVID,
+ * resulting in persisting a device extent item with such ID.
+ */
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ for (i = 0; i < map->num_stripes; i++) {
+ device = map->stripes[i].dev;
+ dev_offset = map->stripes[i].physical;
+
+ ret = insert_dev_extent(trans, device, chunk_offset, dev_offset,
+ stripe_size);
+ if (ret)
+ break;
+ }
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+
+ free_extent_map(em);
+ return ret;
+}
+
/*
* This function, btrfs_create_pending_block_groups(), belongs to the phase 2 of
* chunk allocation.
@@ -2278,8 +2386,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
if (ret)
btrfs_abort_transaction(trans, ret);
}
- ret = btrfs_finish_chunk_alloc(trans, block_group->start,
- block_group->length);
+ ret = insert_dev_extents(trans, block_group->start,
+ block_group->length);
if (ret)
btrfs_abort_transaction(trans, ret);
add_block_group_free_space(trans, block_group);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index c652e19ad74e..76ee1452c57b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -51,6 +51,13 @@ enum {
* the file range, inode's io_tree).
*/
BTRFS_INODE_NO_DELALLOC_FLUSH,
+ /*
+ * Set when we are working on enabling verity for a file. Computing and
+ * writing the whole Merkle tree can take a while so we want to prevent
+ * races where two separate tasks attempt to simultaneously start verity
+ * on the same file.
+ */
+ BTRFS_INODE_VERITY_IN_PROGRESS,
};
/* in memory btrfs inode */
@@ -189,8 +196,10 @@ struct btrfs_inode {
*/
u64 csum_bytes;
- /* flags field from the on disk inode */
+ /* Backwards incompatible flags, lower half of inode_item::flags */
u32 flags;
+ /* Read-only compatibility flags, upper half of inode_item::flags */
+ u32 ro_flags;
/*
* Counters to keep track of the number of extent item's we may use due
@@ -348,6 +357,22 @@ struct btrfs_dio_private {
u8 csums[];
};
+/*
+ * btrfs_inode_item stores flags in a u64, btrfs_inode stores them in two
+ * separate u32s. These two functions convert between the two representations.
+ */
+static inline u64 btrfs_inode_combine_flags(u32 flags, u32 ro_flags)
+{
+ return (flags | ((u64)ro_flags << 32));
+}
+
+static inline void btrfs_inode_split_flags(u64 inode_item_flags,
+ u32 *flags, u32 *ro_flags)
+{
+ *flags = (u32)inode_item_flags;
+ *ro_flags = (u32)(inode_item_flags >> 32);
+}
+
/* Array of bytes with variable length, hexadecimal format 0x1234 */
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 169508609324..86816088927f 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -243,47 +243,6 @@ struct btrfsic_state {
u32 datablock_size;
};
-static void btrfsic_block_init(struct btrfsic_block *b);
-static struct btrfsic_block *btrfsic_block_alloc(void);
-static void btrfsic_block_free(struct btrfsic_block *b);
-static void btrfsic_block_link_init(struct btrfsic_block_link *n);
-static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
-static void btrfsic_block_link_free(struct btrfsic_block_link *n);
-static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
-static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
-static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
-static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
-static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
- struct btrfsic_block_hashtable *h);
-static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
-static struct btrfsic_block *btrfsic_block_hashtable_lookup(
- struct block_device *bdev,
- u64 dev_bytenr,
- struct btrfsic_block_hashtable *h);
-static void btrfsic_block_link_hashtable_init(
- struct btrfsic_block_link_hashtable *h);
-static void btrfsic_block_link_hashtable_add(
- struct btrfsic_block_link *l,
- struct btrfsic_block_link_hashtable *h);
-static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
-static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
- struct block_device *bdev_ref_to,
- u64 dev_bytenr_ref_to,
- struct block_device *bdev_ref_from,
- u64 dev_bytenr_ref_from,
- struct btrfsic_block_link_hashtable *h);
-static void btrfsic_dev_state_hashtable_init(
- struct btrfsic_dev_state_hashtable *h);
-static void btrfsic_dev_state_hashtable_add(
- struct btrfsic_dev_state *ds,
- struct btrfsic_dev_state_hashtable *h);
-static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
-static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev,
- struct btrfsic_dev_state_hashtable *h);
-static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
-static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
-static int btrfsic_process_superblock(struct btrfsic_state *state,
- struct btrfs_fs_devices *fs_devices);
static int btrfsic_process_metablock(struct btrfsic_state *state,
struct btrfsic_block *block,
struct btrfsic_block_data_ctx *block_ctx,
@@ -313,14 +272,6 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
static int btrfsic_read_block(struct btrfsic_state *state,
struct btrfsic_block_data_ctx *block_ctx);
-static void btrfsic_dump_database(struct btrfsic_state *state);
-static int btrfsic_test_for_metadata(struct btrfsic_state *state,
- char **datav, unsigned int num_pages);
-static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
- u64 dev_bytenr, char **mapped_datav,
- unsigned int num_pages,
- struct bio *bio, int *bio_is_patched,
- int submit_bio_bh_rw);
static int btrfsic_process_written_superblock(
struct btrfsic_state *state,
struct btrfsic_block *const block,
@@ -1558,10 +1509,8 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
/* Pages must be unmapped in reverse order */
while (num_pages > 0) {
num_pages--;
- if (block_ctx->datav[num_pages]) {
- kunmap_local(block_ctx->datav[num_pages]);
+ if (block_ctx->datav[num_pages])
block_ctx->datav[num_pages] = NULL;
- }
if (block_ctx->pagev[num_pages]) {
__free_page(block_ctx->pagev[num_pages]);
block_ctx->pagev[num_pages] = NULL;
@@ -1638,7 +1587,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
i = j;
}
for (i = 0; i < num_pages; i++)
- block_ctx->datav[i] = kmap_local_page(block_ctx->pagev[i]);
+ block_ctx->datav[i] = page_address(block_ctx->pagev[i]);
return block_ctx->len;
}
@@ -2703,7 +2652,7 @@ static void __btrfsic_submit_bio(struct bio *bio)
bio_for_each_segment(bvec, bio, iter) {
BUG_ON(bvec.bv_len != PAGE_SIZE);
- mapped_datav[i] = kmap_local_page(bvec.bv_page);
+ mapped_datav[i] = page_address(bvec.bv_page);
i++;
if (dev_state->state->print_mask &
@@ -2716,9 +2665,6 @@ static void __btrfsic_submit_bio(struct bio *bio)
mapped_datav, segs,
bio, &bio_is_patched,
bio->bi_opf);
- /* Unmap in reverse order */
- for (--i; i >= 0; i--)
- kunmap_local(mapped_datav[i]);
kfree(mapped_datav);
} else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) {
if (dev_state->state->print_mask &
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 30d82cdf128c..7869ad12bc6e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -172,10 +172,9 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
/* Hash through the page sector by sector */
for (pg_offset = 0; pg_offset < bytes_left;
pg_offset += sectorsize) {
- kaddr = kmap_atomic(page);
+ kaddr = page_address(page);
crypto_shash_digest(shash, kaddr + pg_offset,
sectorsize, csum);
- kunmap_atomic(kaddr);
if (memcmp(&csum, cb_sum, csum_size) != 0) {
btrfs_print_data_csum_error(inode, disk_start,
@@ -565,6 +564,16 @@ static noinline int add_ra_bio_pages(struct inode *inode,
if (isize == 0)
return 0;
+ /*
+ * For current subpage support, we only support 64K page size,
+ * which means maximum compressed extent size (128K) is just 2x page
+ * size.
+ * This makes readahead less effective, so here disable readahead for
+ * subpage for now, until full compressed write is supported.
+ */
+ if (btrfs_sb(inode->i_sb)->sectorsize < PAGE_SIZE)
+ return 0;
+
end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
while (last_offset < compressed_end) {
@@ -673,6 +682,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
struct page *page;
struct bio *comp_bio;
u64 cur_disk_byte = bio->bi_iter.bi_sector << 9;
+ u64 file_offset;
u64 em_len;
u64 em_start;
struct extent_map *em;
@@ -682,15 +692,17 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
em_tree = &BTRFS_I(inode)->extent_tree;
+ file_offset = bio_first_bvec_all(bio)->bv_offset +
+ page_offset(bio_first_page_all(bio));
+
/* we need the actual starting offset of this extent in the file */
read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree,
- page_offset(bio_first_page_all(bio)),
- fs_info->sectorsize);
+ em = lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize);
read_unlock(&em_tree->lock);
if (!em)
return BLK_STS_IOERR;
+ ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
if (!cb)
@@ -721,8 +733,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
goto fail1;
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
- cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS |
- __GFP_HIGHMEM);
+ cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS);
if (!cb->compressed_pages[pg_index]) {
faili = pg_index - 1;
ret = BLK_STS_RESOURCE;
@@ -1261,96 +1272,82 @@ void __cold btrfs_exit_compress(void)
}
/*
- * Copy uncompressed data from working buffer to pages.
+ * Copy decompressed data from working buffer to pages.
+ *
+ * @buf: The decompressed data buffer
+ * @buf_len: The decompressed data length
+ * @decompressed: Number of bytes that are already decompressed inside the
+ * compressed extent
+ * @cb: The compressed extent descriptor
+ * @orig_bio: The original bio that the caller wants to read for
+ *
+ * An easier to understand graph is like below:
+ *
+ * |<- orig_bio ->| |<- orig_bio->|
+ * |<------- full decompressed extent ----->|
+ * |<----------- @cb range ---->|
+ * | |<-- @buf_len -->|
+ * |<--- @decompressed --->|
+ *
+ * Note that, @cb can be a subpage of the full decompressed extent, but
+ * @cb->start always has the same as the orig_file_offset value of the full
+ * decompressed extent.
*
- * buf_start is the byte offset we're of the start of our workspace buffer.
+ * When reading compressed extent, we have to read the full compressed extent,
+ * while @orig_bio may only want part of the range.
+ * Thus this function will ensure only data covered by @orig_bio will be copied
+ * to.
*
- * total_out is the last byte of the buffer
+ * Return 0 if we have copied all needed contents for @orig_bio.
+ * Return >0 if we need continue decompress.
*/
-int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
- unsigned long total_out, u64 disk_start,
- struct bio *bio)
+int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
+ struct compressed_bio *cb, u32 decompressed)
{
- unsigned long buf_offset;
- unsigned long current_buf_start;
- unsigned long start_byte;
- unsigned long prev_start_byte;
- unsigned long working_bytes = total_out - buf_start;
- unsigned long bytes;
- struct bio_vec bvec = bio_iter_iovec(bio, bio->bi_iter);
-
- /*
- * start byte is the first byte of the page we're currently
- * copying into relative to the start of the compressed data.
- */
- start_byte = page_offset(bvec.bv_page) - disk_start;
-
- /* we haven't yet hit data corresponding to this page */
- if (total_out <= start_byte)
- return 1;
-
- /*
- * the start of the data we care about is offset into
- * the middle of our working buffer
- */
- if (total_out > start_byte && buf_start < start_byte) {
- buf_offset = start_byte - buf_start;
- working_bytes -= buf_offset;
- } else {
- buf_offset = 0;
- }
- current_buf_start = buf_start;
-
- /* copy bytes from the working buffer into the pages */
- while (working_bytes > 0) {
- bytes = min_t(unsigned long, bvec.bv_len,
- PAGE_SIZE - (buf_offset % PAGE_SIZE));
- bytes = min(bytes, working_bytes);
-
- memcpy_to_page(bvec.bv_page, bvec.bv_offset, buf + buf_offset,
- bytes);
- flush_dcache_page(bvec.bv_page);
+ struct bio *orig_bio = cb->orig_bio;
+ /* Offset inside the full decompressed extent */
+ u32 cur_offset;
+
+ cur_offset = decompressed;
+ /* The main loop to do the copy */
+ while (cur_offset < decompressed + buf_len) {
+ struct bio_vec bvec;
+ size_t copy_len;
+ u32 copy_start;
+ /* Offset inside the full decompressed extent */
+ u32 bvec_offset;
+
+ bvec = bio_iter_iovec(orig_bio, orig_bio->bi_iter);
+ /*
+ * cb->start may underflow, but subtracting that value can still
+ * give us correct offset inside the full decompressed extent.
+ */
+ bvec_offset = page_offset(bvec.bv_page) + bvec.bv_offset - cb->start;
- buf_offset += bytes;
- working_bytes -= bytes;
- current_buf_start += bytes;
+ /* Haven't reached the bvec range, exit */
+ if (decompressed + buf_len <= bvec_offset)
+ return 1;
- /* check if we need to pick another page */
- bio_advance(bio, bytes);
- if (!bio->bi_iter.bi_size)
- return 0;
- bvec = bio_iter_iovec(bio, bio->bi_iter);
- prev_start_byte = start_byte;
- start_byte = page_offset(bvec.bv_page) - disk_start;
+ copy_start = max(cur_offset, bvec_offset);
+ copy_len = min(bvec_offset + bvec.bv_len,
+ decompressed + buf_len) - copy_start;
+ ASSERT(copy_len);
/*
- * We need to make sure we're only adjusting
- * our offset into compression working buffer when
- * we're switching pages. Otherwise we can incorrectly
- * keep copying when we were actually done.
+ * Extra range check to ensure we didn't go beyond
+ * @buf + @buf_len.
*/
- if (start_byte != prev_start_byte) {
- /*
- * make sure our new page is covered by this
- * working buffer
- */
- if (total_out <= start_byte)
- return 1;
+ ASSERT(copy_start - decompressed < buf_len);
+ memcpy_to_page(bvec.bv_page, bvec.bv_offset,
+ buf + copy_start - decompressed, copy_len);
+ flush_dcache_page(bvec.bv_page);
+ cur_offset += copy_len;
- /*
- * the next page in the biovec might not be adjacent
- * to the last page, but it might still be found
- * inside this working buffer. bump our offset pointer
- */
- if (total_out > start_byte &&
- current_buf_start < start_byte) {
- buf_offset = start_byte - buf_start;
- working_bytes = total_out - start_byte;
- current_buf_start = buf_start + buf_offset;
- }
- }
+ bio_advance(orig_bio, copy_len);
+ /* Finished the bio */
+ if (!orig_bio->bi_iter.bi_size)
+ return 0;
}
-
return 1;
}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index c359f20920d0..399be0b435bf 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -86,9 +86,8 @@ int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
unsigned long *total_out);
int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
unsigned long start_byte, size_t srclen, size_t destlen);
-int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
- unsigned long total_out, u64 disk_start,
- struct bio *bio);
+int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
+ struct compressed_bio *cb, u32 decompressed);
blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
unsigned int len, u64 disk_start,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c5c08c87e130..84627cbd5b5b 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -726,21 +726,21 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
/*
* search for key in the extent_buffer. The items start at offset p,
- * and they are item_size apart. There are 'max' items in p.
+ * and they are item_size apart.
*
* the slot in the array is returned via slot, and it points to
* the place where you would insert key if it is not found in
* the array.
*
- * slot may point to max if the key is bigger than all of the keys
+ * Slot may point to total number of items if the key is bigger than
+ * all of the keys
*/
static noinline int generic_bin_search(struct extent_buffer *eb,
unsigned long p, int item_size,
- const struct btrfs_key *key,
- int max, int *slot)
+ const struct btrfs_key *key, int *slot)
{
int low = 0;
- int high = max;
+ int high = btrfs_header_nritems(eb);
int ret;
const int key_size = sizeof(struct btrfs_disk_key);
@@ -799,15 +799,11 @@ int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
if (btrfs_header_level(eb) == 0)
return generic_bin_search(eb,
offsetof(struct btrfs_leaf, items),
- sizeof(struct btrfs_item),
- key, btrfs_header_nritems(eb),
- slot);
+ sizeof(struct btrfs_item), key, slot);
else
return generic_bin_search(eb,
offsetof(struct btrfs_node, ptrs),
- sizeof(struct btrfs_key_ptr),
- key, btrfs_header_nritems(eb),
- slot);
+ sizeof(struct btrfs_key_ptr), key, slot);
}
static void root_add_used(struct btrfs_root *root, u32 size)
@@ -1237,7 +1233,6 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
u64 target;
u64 nread = 0;
u64 nread_max;
- struct extent_buffer *eb;
u32 nr;
u32 blocksize;
u32 nscan = 0;
@@ -1266,10 +1261,14 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
search = btrfs_node_blockptr(node, slot);
blocksize = fs_info->nodesize;
- eb = find_extent_buffer(fs_info, search);
- if (eb) {
- free_extent_buffer(eb);
- return;
+ if (path->reada != READA_FORWARD_ALWAYS) {
+ struct extent_buffer *eb;
+
+ eb = find_extent_buffer(fs_info, search);
+ if (eb) {
+ free_extent_buffer(eb);
+ return;
+ }
}
target = search;
@@ -2103,6 +2102,27 @@ again:
}
/*
+ * Execute search and call btrfs_previous_item to traverse backwards if the item
+ * was not found.
+ *
+ * Return 0 if found, 1 if not found and < 0 if error.
+ */
+int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
+ struct btrfs_path *path)
+{
+ int ret;
+
+ ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+ if (ret > 0)
+ ret = btrfs_previous_item(root, path, key->objectid, key->type);
+
+ if (ret == 0)
+ btrfs_item_key_to_cpu(path->nodes[0], key, path->slots[0]);
+
+ return ret;
+}
+
+/*
* adjust the pointers going up the tree, starting at level
* making sure the right key of each node is points to 'key'.
* This is used after shifting pointers to the left, so it stops
@@ -4358,16 +4378,6 @@ next:
return 1;
}
-/*
- * search the tree again to find a leaf with greater keys
- * returns 0 if it found something or 1 if there are no greater leaves.
- * returns < 0 on io errors.
- */
-int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
-{
- return btrfs_next_old_leaf(root, path, 0);
-}
-
int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
u64 time_seq)
{
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e5e53e592d4f..f07c82fafa04 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -281,7 +281,8 @@ struct btrfs_super_block {
#define BTRFS_FEATURE_COMPAT_RO_SUPP \
(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \
- BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)
+ BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID | \
+ BTRFS_FEATURE_COMPAT_RO_VERITY)
#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
@@ -1012,8 +1013,6 @@ struct btrfs_fs_info {
u64 zoned;
};
- /* Max size to emit ZONE_APPEND write command */
- u64 max_zone_append_size;
struct mutex zoned_meta_io_lock;
spinlock_t treelog_bg_lock;
u64 treelog_bg;
@@ -1484,20 +1483,20 @@ do { \
/*
* Inode flags
*/
-#define BTRFS_INODE_NODATASUM (1 << 0)
-#define BTRFS_INODE_NODATACOW (1 << 1)
-#define BTRFS_INODE_READONLY (1 << 2)
-#define BTRFS_INODE_NOCOMPRESS (1 << 3)
-#define BTRFS_INODE_PREALLOC (1 << 4)
-#define BTRFS_INODE_SYNC (1 << 5)
-#define BTRFS_INODE_IMMUTABLE (1 << 6)
-#define BTRFS_INODE_APPEND (1 << 7)
-#define BTRFS_INODE_NODUMP (1 << 8)
-#define BTRFS_INODE_NOATIME (1 << 9)
-#define BTRFS_INODE_DIRSYNC (1 << 10)
-#define BTRFS_INODE_COMPRESS (1 << 11)
-
-#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
+#define BTRFS_INODE_NODATASUM (1U << 0)
+#define BTRFS_INODE_NODATACOW (1U << 1)
+#define BTRFS_INODE_READONLY (1U << 2)
+#define BTRFS_INODE_NOCOMPRESS (1U << 3)
+#define BTRFS_INODE_PREALLOC (1U << 4)
+#define BTRFS_INODE_SYNC (1U << 5)
+#define BTRFS_INODE_IMMUTABLE (1U << 6)
+#define BTRFS_INODE_APPEND (1U << 7)
+#define BTRFS_INODE_NODUMP (1U << 8)
+#define BTRFS_INODE_NOATIME (1U << 9)
+#define BTRFS_INODE_DIRSYNC (1U << 10)
+#define BTRFS_INODE_COMPRESS (1U << 11)
+
+#define BTRFS_INODE_ROOT_ITEM_INIT (1U << 31)
#define BTRFS_INODE_FLAG_MASK \
(BTRFS_INODE_NODATASUM | \
@@ -1514,6 +1513,10 @@ do { \
BTRFS_INODE_COMPRESS | \
BTRFS_INODE_ROOT_ITEM_INIT)
+#define BTRFS_INODE_RO_VERITY (1U << 0)
+
+#define BTRFS_INODE_RO_FLAG_MASK (BTRFS_INODE_RO_VERITY)
+
struct btrfs_map_token {
struct extent_buffer *eb;
char *kaddr;
@@ -2781,10 +2784,11 @@ enum btrfs_flush_state {
FLUSH_DELAYED_REFS = 4,
FLUSH_DELALLOC = 5,
FLUSH_DELALLOC_WAIT = 6,
- ALLOC_CHUNK = 7,
- ALLOC_CHUNK_FORCE = 8,
- RUN_DELAYED_IPUTS = 9,
- COMMIT_TRANS = 10,
+ FLUSH_DELALLOC_FULL = 7,
+ ALLOC_CHUNK = 8,
+ ALLOC_CHUNK_FORCE = 9,
+ RUN_DELAYED_IPUTS = 10,
+ COMMIT_TRANS = 11,
};
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
@@ -2901,10 +2905,13 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1);
}
-int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
u64 time_seq);
+
+int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
+ struct btrfs_path *path);
+
static inline int btrfs_next_old_item(struct btrfs_root *root,
struct btrfs_path *p, u64 time_seq)
{
@@ -2913,6 +2920,18 @@ static inline int btrfs_next_old_item(struct btrfs_root *root,
return btrfs_next_old_leaf(root, p, time_seq);
return 0;
}
+
+/*
+ * Search the tree again to find a leaf with greater keys.
+ *
+ * Returns 0 if it found something or 1 if there are no greater leaves.
+ * Returns < 0 on error.
+ */
+static inline int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
+{
+ return btrfs_next_old_leaf(root, path, 0);
+}
+
static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
{
return btrfs_next_old_item(root, p, 0);
@@ -3145,7 +3164,8 @@ int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
struct btrfs_root *new_root,
- struct btrfs_root *parent_root);
+ struct btrfs_root *parent_root,
+ struct user_namespace *mnt_userns);
void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
unsigned *bits);
void btrfs_clear_delalloc_extent(struct inode *inode,
@@ -3194,10 +3214,10 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
u64 start, u64 end, int *page_started, unsigned long *nr_written,
struct writeback_control *wbc);
-int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end);
+int btrfs_writepage_cow_fixup(struct page *page);
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
struct page *page, u64 start,
- u64 end, int uptodate);
+ u64 end, bool uptodate);
extern const struct dentry_operations btrfs_dentry_operations;
extern const struct iomap_ops btrfs_dio_iomap_ops;
extern const struct iomap_dio_ops btrfs_dio_ops;
@@ -3779,6 +3799,30 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
return signal_pending(current);
}
+/* verity.c */
+#ifdef CONFIG_FS_VERITY
+
+extern const struct fsverity_operations btrfs_verityops;
+int btrfs_drop_verity_items(struct btrfs_inode *inode);
+
+BTRFS_SETGET_FUNCS(verity_descriptor_encryption, struct btrfs_verity_descriptor_item,
+ encryption, 8);
+BTRFS_SETGET_FUNCS(verity_descriptor_size, struct btrfs_verity_descriptor_item,
+ size, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_encryption,
+ struct btrfs_verity_descriptor_item, encryption, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_size,
+ struct btrfs_verity_descriptor_item, size, 64);
+
+#else
+
+static inline int btrfs_drop_verity_items(struct btrfs_inode *inode)
+{
+ return 0;
+}
+
+#endif
+
/* Sanity test specific functions */
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
void btrfs_test_destroy_inode(struct inode *inode);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 257c1e18abd4..1e08eb2b27f0 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -6,7 +6,6 @@
#include <linux/slab.h>
#include <linux/iversion.h>
-#include <linux/sched/mm.h>
#include "misc.h"
#include "delayed-inode.h"
#include "disk-io.h"
@@ -672,176 +671,119 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
}
/*
- * This helper will insert some continuous items into the same leaf according
- * to the free space of the leaf.
+ * Insert a single delayed item or a batch of delayed items that have consecutive
+ * keys if they exist.
*/
-static int btrfs_batch_insert_items(struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_delayed_item *item)
+static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_delayed_item *first_item)
{
- struct btrfs_delayed_item *curr, *next;
- int free_space;
- int total_size = 0;
- struct extent_buffer *leaf;
- char *data_ptr;
- struct btrfs_key *keys;
- u32 *data_size;
- struct list_head head;
- int slot;
+ LIST_HEAD(batch);
+ struct btrfs_delayed_item *curr;
+ struct btrfs_delayed_item *next;
+ const int max_size = BTRFS_LEAF_DATA_SIZE(root->fs_info);
+ int total_size;
int nitems;
- int i;
- int ret = 0;
-
- BUG_ON(!path->nodes[0]);
+ char *ins_data = NULL;
+ struct btrfs_key *ins_keys;
+ u32 *ins_sizes;
+ int ret;
- leaf = path->nodes[0];
- free_space = btrfs_leaf_free_space(leaf);
- INIT_LIST_HEAD(&head);
+ list_add_tail(&first_item->tree_list, &batch);
+ nitems = 1;
+ total_size = first_item->data_len + sizeof(struct btrfs_item);
+ curr = first_item;
- next = item;
- nitems = 0;
+ while (true) {
+ int next_size;
- /*
- * count the number of the continuous items that we can insert in batch
- */
- while (total_size + next->data_len + sizeof(struct btrfs_item) <=
- free_space) {
- total_size += next->data_len + sizeof(struct btrfs_item);
- list_add_tail(&next->tree_list, &head);
- nitems++;
-
- curr = next;
next = __btrfs_next_delayed_item(curr);
- if (!next)
+ if (!next || !btrfs_is_continuous_delayed_item(curr, next))
break;
- if (!btrfs_is_continuous_delayed_item(curr, next))
+ next_size = next->data_len + sizeof(struct btrfs_item);
+ if (total_size + next_size > max_size)
break;
- }
- if (!nitems) {
- ret = 0;
- goto out;
+ list_add_tail(&next->tree_list, &batch);
+ nitems++;
+ total_size += next_size;
+ curr = next;
}
- keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
- if (!keys) {
- ret = -ENOMEM;
- goto out;
- }
+ if (nitems == 1) {
+ ins_keys = &first_item->key;
+ ins_sizes = &first_item->data_len;
+ } else {
+ int i = 0;
- data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
- if (!data_size) {
- ret = -ENOMEM;
- goto error;
+ ins_data = kmalloc(nitems * sizeof(u32) +
+ nitems * sizeof(struct btrfs_key), GFP_NOFS);
+ if (!ins_data) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ins_sizes = (u32 *)ins_data;
+ ins_keys = (struct btrfs_key *)(ins_data + nitems * sizeof(u32));
+ list_for_each_entry(curr, &batch, tree_list) {
+ ins_keys[i] = curr->key;
+ ins_sizes[i] = curr->data_len;
+ i++;
+ }
}
- /* get keys of all the delayed items */
- i = 0;
- list_for_each_entry(next, &head, tree_list) {
- keys[i] = next->key;
- data_size[i] = next->data_len;
- i++;
- }
+ ret = btrfs_insert_empty_items(trans, root, path, ins_keys, ins_sizes,
+ nitems);
+ if (ret)
+ goto out;
- /* insert the keys of the items */
- setup_items_for_insert(root, path, keys, data_size, nitems);
+ list_for_each_entry(curr, &batch, tree_list) {
+ char *data_ptr;
- /* insert the dir index items */
- slot = path->slots[0];
- list_for_each_entry_safe(curr, next, &head, tree_list) {
- data_ptr = btrfs_item_ptr(leaf, slot, char);
- write_extent_buffer(leaf, &curr->data,
- (unsigned long)data_ptr,
- curr->data_len);
- slot++;
+ data_ptr = btrfs_item_ptr(path->nodes[0], path->slots[0], char);
+ write_extent_buffer(path->nodes[0], &curr->data,
+ (unsigned long)data_ptr, curr->data_len);
+ path->slots[0]++;
+ }
- btrfs_delayed_item_release_metadata(root, curr);
+ /*
+ * Now release our path before releasing the delayed items and their
+ * metadata reservations, so that we don't block other tasks for more
+ * time than needed.
+ */
+ btrfs_release_path(path);
+ list_for_each_entry_safe(curr, next, &batch, tree_list) {
list_del(&curr->tree_list);
+ btrfs_delayed_item_release_metadata(root, curr);
btrfs_release_delayed_item(curr);
}
-
-error:
- kfree(data_size);
- kfree(keys);
out:
+ kfree(ins_data);
return ret;
}
-/*
- * This helper can just do simple insertion that needn't extend item for new
- * data, such as directory name index insertion, inode insertion.
- */
-static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_delayed_item *delayed_item)
-{
- struct extent_buffer *leaf;
- unsigned int nofs_flag;
- char *ptr;
- int ret;
-
- nofs_flag = memalloc_nofs_save();
- ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
- delayed_item->data_len);
- memalloc_nofs_restore(nofs_flag);
- if (ret < 0 && ret != -EEXIST)
- return ret;
-
- leaf = path->nodes[0];
-
- ptr = btrfs_item_ptr(leaf, path->slots[0], char);
-
- write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
- delayed_item->data_len);
- btrfs_mark_buffer_dirty(leaf);
-
- btrfs_delayed_item_release_metadata(root, delayed_item);
- return 0;
-}
-
-/*
- * we insert an item first, then if there are some continuous items, we try
- * to insert those items into the same leaf.
- */
static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_root *root,
struct btrfs_delayed_node *node)
{
- struct btrfs_delayed_item *curr, *prev;
int ret = 0;
-do_again:
- mutex_lock(&node->mutex);
- curr = __btrfs_first_delayed_insertion_item(node);
- if (!curr)
- goto insert_end;
-
- ret = btrfs_insert_delayed_item(trans, root, path, curr);
- if (ret < 0) {
- btrfs_release_path(path);
- goto insert_end;
- }
+ while (ret == 0) {
+ struct btrfs_delayed_item *curr;
- prev = curr;
- curr = __btrfs_next_delayed_item(prev);
- if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
- /* insert the continuous items into the same leaf */
- path->slots[0]++;
- btrfs_batch_insert_items(root, path, curr);
+ mutex_lock(&node->mutex);
+ curr = __btrfs_first_delayed_insertion_item(node);
+ if (!curr) {
+ mutex_unlock(&node->mutex);
+ break;
+ }
+ ret = btrfs_insert_delayed_item(trans, root, path, curr);
+ mutex_unlock(&node->mutex);
}
- btrfs_release_delayed_item(prev);
- btrfs_mark_buffer_dirty(path->nodes[0]);
- btrfs_release_path(path);
- mutex_unlock(&node->mutex);
- goto do_again;
-
-insert_end:
- mutex_unlock(&node->mutex);
return ret;
}
@@ -914,7 +856,6 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *node)
{
struct btrfs_delayed_item *curr, *prev;
- unsigned int nofs_flag;
int ret = 0;
do_again:
@@ -923,9 +864,7 @@ do_again:
if (!curr)
goto delete_fail;
- nofs_flag = memalloc_nofs_save();
ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
- memalloc_nofs_restore(nofs_flag);
if (ret < 0)
goto delete_fail;
else if (ret > 0) {
@@ -994,7 +933,6 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
struct btrfs_key key;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
- unsigned int nofs_flag;
int mod;
int ret;
@@ -1007,9 +945,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
else
mod = 1;
- nofs_flag = memalloc_nofs_save();
ret = btrfs_lookup_inode(trans, root, path, &key, mod);
- memalloc_nofs_restore(nofs_flag);
if (ret > 0)
ret = -ENOENT;
if (ret < 0)
@@ -1066,9 +1002,7 @@ search:
key.type = BTRFS_INODE_EXTREF_KEY;
key.offset = -1;
- nofs_flag = memalloc_nofs_save();
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- memalloc_nofs_restore(nofs_flag);
if (ret < 0)
goto err_out;
ASSERT(ret);
@@ -1711,6 +1645,8 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode_item *inode_item,
struct inode *inode)
{
+ u64 flags;
+
btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
@@ -1723,7 +1659,9 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
inode_peek_iversion(inode));
btrfs_set_stack_inode_transid(inode_item, trans->transid);
btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
- btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
+ flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
+ BTRFS_I(inode)->ro_flags);
+ btrfs_set_stack_inode_flags(inode_item, flags);
btrfs_set_stack_inode_block_group(inode_item, 0);
btrfs_set_stack_timespec_sec(&inode_item->atime,
@@ -1781,7 +1719,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
btrfs_stack_inode_sequence(inode_item));
inode->i_rdev = 0;
*rdev = btrfs_stack_inode_rdev(inode_item);
- BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
+ btrfs_inode_split_flags(btrfs_stack_inode_flags(inode_item),
+ &BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags);
inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 98b63ebed539..f1274d5c3805 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -170,6 +170,25 @@ out_free:
return 0;
}
+static struct btrfs_dir_item *btrfs_lookup_match_dir(
+ struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *key, const char *name,
+ int name_len, int mod)
+{
+ const int ins_len = (mod < 0 ? -1 : 0);
+ const int cow = (mod != 0);
+ int ret;
+
+ ret = btrfs_search_slot(trans, root, key, path, ins_len, cow);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret > 0)
+ return ERR_PTR(-ENOENT);
+
+ return btrfs_match_dir_item_name(root->fs_info, path, name, name_len);
+}
+
/*
* lookup a directory item based on name. 'dir' is the objectid
* we're searching in, and 'mod' tells us if you plan on deleting the
@@ -181,23 +200,18 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
const char *name, int name_len,
int mod)
{
- int ret;
struct btrfs_key key;
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
+ struct btrfs_dir_item *di;
key.objectid = dir;
key.type = BTRFS_DIR_ITEM_KEY;
-
key.offset = btrfs_name_hash(name, name_len);
- ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
+ di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod);
+ if (IS_ERR(di) && PTR_ERR(di) == -ENOENT)
return NULL;
- return btrfs_match_dir_item_name(root->fs_info, path, name, name_len);
+ return di;
}
int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
@@ -211,7 +225,6 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
int slot;
struct btrfs_path *path;
-
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -220,20 +233,20 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
key.type = BTRFS_DIR_ITEM_KEY;
key.offset = btrfs_name_hash(name, name_len);
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-
- /* return back any errors */
- if (ret < 0)
- goto out;
+ di = btrfs_lookup_match_dir(NULL, root, path, &key, name, name_len, 0);
+ if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
+ /* Nothing found, we're safe */
+ if (ret == -ENOENT) {
+ ret = 0;
+ goto out;
+ }
- /* nothing found, we're safe */
- if (ret > 0) {
- ret = 0;
- goto out;
+ if (ret < 0)
+ goto out;
}
/* we found an item, look for our name in the item */
- di = btrfs_match_dir_item_name(root->fs_info, path, name, name_len);
if (di) {
/* our exact name was found */
ret = -EEXIST;
@@ -274,21 +287,13 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
u64 objectid, const char *name, int name_len,
int mod)
{
- int ret;
struct btrfs_key key;
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
key.objectid = dir;
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = objectid;
- ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
- return ERR_PTR(-ENOENT);
- return btrfs_match_dir_item_name(root->fs_info, path, name, name_len);
+ return btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod);
}
struct btrfs_dir_item *
@@ -345,21 +350,18 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
const char *name, u16 name_len,
int mod)
{
- int ret;
struct btrfs_key key;
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
+ struct btrfs_dir_item *di;
key.objectid = dir;
key.type = BTRFS_XATTR_ITEM_KEY;
key.offset = btrfs_name_hash(name, name_len);
- ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
+
+ di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod);
+ if (IS_ERR(di) && PTR_ERR(di) == -ENOENT)
return NULL;
- return btrfs_match_dir_item_name(root->fs_info, path, name, name_len);
+ return di;
}
/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a59ab7b9aea0..2f9515dccce0 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3392,11 +3392,16 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
goto fail_alloc;
}
- /* For 4K sector size support, it's only read-only */
- if (PAGE_SIZE == SZ_64K && sectorsize == SZ_4K) {
- if (!sb_rdonly(sb) || btrfs_super_log_root(disk_super)) {
+ if (sectorsize != PAGE_SIZE) {
+ btrfs_warn(fs_info,
+ "read-write for sector size %u with page size %lu is experimental",
+ sectorsize, PAGE_SIZE);
+ }
+ if (sectorsize != PAGE_SIZE) {
+ if (btrfs_super_incompat_flags(fs_info->super_copy) &
+ BTRFS_FEATURE_INCOMPAT_RAID56) {
btrfs_err(fs_info,
- "subpage sectorsize %u only supported read-only for page size %lu",
+ "RAID56 is not yet supported for sector size %u with page size %lu",
sectorsize, PAGE_SIZE);
err = -EINVAL;
goto fail_alloc;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 268ce58d4569..fc3da7585fb7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -153,7 +153,7 @@ search_again:
else
key.type = BTRFS_EXTENT_ITEM_KEY;
- ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
if (ret < 0)
goto out_free;
@@ -5950,9 +5950,9 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
*/
int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
{
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_block_group *cache = NULL;
struct btrfs_device *device;
- struct list_head *devices;
u64 group_trimmed;
u64 range_end = U64_MAX;
u64 start;
@@ -6016,9 +6016,9 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
btrfs_warn(fs_info,
"failed to trim %llu block group(s), last error %d",
bg_failed, bg_ret);
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
- devices = &fs_info->fs_devices->devices;
- list_for_each_entry(device, devices, dev_list) {
+
+ mutex_lock(&fs_devices->device_list_mutex);
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
continue;
@@ -6031,7 +6031,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
trimmed += group_trimmed;
}
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ mutex_unlock(&fs_devices->device_list_mutex);
if (dev_failed)
btrfs_warn(fs_info,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 9e81d25dea70..aaddd7225348 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -13,6 +13,7 @@
#include <linux/pagevec.h>
#include <linux/prefetch.h>
#include <linux/cleancache.h>
+#include <linux/fsverity.h>
#include "misc.h"
#include "extent_io.h"
#include "extent-io-tree.h"
@@ -172,6 +173,8 @@ int __must_check submit_one_bio(struct bio *bio, int mirror_num,
bio->bi_private = NULL;
+ /* Caller should ensure the bio has at least some range added */
+ ASSERT(bio->bi_iter.bi_size);
if (is_data_inode(tree->private_data))
ret = btrfs_submit_data_bio(tree->private_data, bio, mirror_num,
bio_flags);
@@ -2245,18 +2248,6 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
return bitset;
}
-/*
- * helper function to set a given page up to date if all the
- * extents in the tree for that page are up to date
- */
-static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
-{
- u64 start = page_offset(page);
- u64 end = start + PAGE_SIZE - 1;
- if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
- SetPageUptodate(page);
-}
-
int free_io_failure(struct extent_io_tree *failure_tree,
struct extent_io_tree *io_tree,
struct io_failure_record *rec)
@@ -2688,7 +2679,15 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
start + len <= page_offset(page) + PAGE_SIZE);
if (uptodate) {
- btrfs_page_set_uptodate(fs_info, page, start, len);
+ if (fsverity_active(page->mapping->host) &&
+ !PageError(page) &&
+ !PageUptodate(page) &&
+ start < i_size_read(page->mapping->host) &&
+ !fsverity_verify_page(page)) {
+ btrfs_page_set_error(fs_info, page, start, len);
+ } else {
+ btrfs_page_set_uptodate(fs_info, page, start, len);
+ }
} else {
btrfs_page_clear_uptodate(fs_info, page, start, len);
btrfs_page_set_error(fs_info, page, start, len);
@@ -2779,7 +2778,7 @@ next:
void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
{
struct btrfs_inode *inode;
- int uptodate = (err == 0);
+ const bool uptodate = (err == 0);
int ret = 0;
ASSERT(page && page->mapping);
@@ -2787,8 +2786,14 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
btrfs_writepage_endio_finish_ordered(inode, page, start, end, uptodate);
if (!uptodate) {
- ClearPageUptodate(page);
- SetPageError(page);
+ const struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ u32 len;
+
+ ASSERT(end + 1 - start <= U32_MAX);
+ len = end + 1 - start;
+
+ btrfs_page_clear_uptodate(fs_info, page, start, len);
+ btrfs_page_set_error(fs_info, page, start, len);
ret = err < 0 ? err : -EIO;
mapping_set_error(page->mapping, ret);
}
@@ -3097,7 +3102,7 @@ readpage_ok:
/* Update page status and unlock */
end_page_read(page, uptodate, start, len);
endio_readpage_release_extent(&processed, BTRFS_I(inode),
- start, end, uptodate);
+ start, end, PageUptodate(page));
}
/* Release the last extent */
endio_readpage_release_extent(&processed, NULL, 0, 0, false);
@@ -3153,11 +3158,13 @@ struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
return bio;
}
-struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
+struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
{
struct bio *bio;
struct btrfs_io_bio *btrfs_bio;
+ ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
+
/* this will never fail when it's backed by a bioset */
bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
ASSERT(bio);
@@ -3181,20 +3188,22 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
* @size: portion of page that we want to write
* @prev_bio_flags: flags of previous bio to see if we can merge the current one
* @bio_flags: flags of the current bio to see if we can merge them
- * @return: true if page was added, false otherwise
*
* Attempt to add a page to bio considering stripe alignment etc.
*
- * Return true if successfully page added. Otherwise, return false.
+ * Return >= 0 for the number of bytes added to the bio.
+ * Can return 0 if the current bio is already at stripe/zone boundary.
+ * Return <0 for error.
*/
-static bool btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
- struct page *page,
- u64 disk_bytenr, unsigned int size,
- unsigned int pg_offset,
- unsigned long bio_flags)
+static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
+ struct page *page,
+ u64 disk_bytenr, unsigned int size,
+ unsigned int pg_offset,
+ unsigned long bio_flags)
{
struct bio *bio = bio_ctrl->bio;
u32 bio_size = bio->bi_iter.bi_size;
+ u32 real_size;
const sector_t sector = disk_bytenr >> SECTOR_SHIFT;
bool contig;
int ret;
@@ -3203,29 +3212,36 @@ static bool btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
/* The limit should be calculated when bio_ctrl->bio is allocated */
ASSERT(bio_ctrl->len_to_oe_boundary && bio_ctrl->len_to_stripe_boundary);
if (bio_ctrl->bio_flags != bio_flags)
- return false;
+ return 0;
if (bio_ctrl->bio_flags & EXTENT_BIO_COMPRESSED)
contig = bio->bi_iter.bi_sector == sector;
else
contig = bio_end_sector(bio) == sector;
if (!contig)
- return false;
+ return 0;
- if (bio_size + size > bio_ctrl->len_to_oe_boundary ||
- bio_size + size > bio_ctrl->len_to_stripe_boundary)
- return false;
+ real_size = min(bio_ctrl->len_to_oe_boundary,
+ bio_ctrl->len_to_stripe_boundary) - bio_size;
+ real_size = min(real_size, size);
+
+ /*
+ * If real_size is 0, never call bio_add_*_page(), as even size is 0,
+ * bio will still execute its endio function on the page!
+ */
+ if (real_size == 0)
+ return 0;
if (bio_op(bio) == REQ_OP_ZONE_APPEND)
- ret = bio_add_zone_append_page(bio, page, size, pg_offset);
+ ret = bio_add_zone_append_page(bio, page, real_size, pg_offset);
else
- ret = bio_add_page(bio, page, size, pg_offset);
+ ret = bio_add_page(bio, page, real_size, pg_offset);
- return ret == size;
+ return ret;
}
static int calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
- struct btrfs_inode *inode)
+ struct btrfs_inode *inode, u64 file_offset)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_io_geometry geom;
@@ -3266,9 +3282,8 @@ static int calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
return 0;
}
- ASSERT(fs_info->max_zone_append_size > 0);
/* Ordered extent not yet created, so we're good */
- ordered = btrfs_lookup_ordered_extent(inode, logical);
+ ordered = btrfs_lookup_ordered_extent(inode, file_offset);
if (!ordered) {
bio_ctrl->len_to_oe_boundary = U32_MAX;
return 0;
@@ -3280,6 +3295,62 @@ static int calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
return 0;
}
+static int alloc_new_bio(struct btrfs_inode *inode,
+ struct btrfs_bio_ctrl *bio_ctrl,
+ struct writeback_control *wbc,
+ unsigned int opf,
+ bio_end_io_t end_io_func,
+ u64 disk_bytenr, u32 offset, u64 file_offset,
+ unsigned long bio_flags)
+{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct bio *bio;
+ int ret;
+
+ /*
+ * For compressed page range, its disk_bytenr is always @disk_bytenr
+ * passed in, no matter if we have added any range into previous bio.
+ */
+ if (bio_flags & EXTENT_BIO_COMPRESSED)
+ bio = btrfs_bio_alloc(disk_bytenr);
+ else
+ bio = btrfs_bio_alloc(disk_bytenr + offset);
+ bio_ctrl->bio = bio;
+ bio_ctrl->bio_flags = bio_flags;
+ bio->bi_end_io = end_io_func;
+ bio->bi_private = &inode->io_tree;
+ bio->bi_write_hint = inode->vfs_inode.i_write_hint;
+ bio->bi_opf = opf;
+ ret = calc_bio_boundaries(bio_ctrl, inode, file_offset);
+ if (ret < 0)
+ goto error;
+ if (wbc) {
+ struct block_device *bdev;
+
+ bdev = fs_info->fs_devices->latest_bdev;
+ bio_set_dev(bio, bdev);
+ wbc_init_bio(wbc, bio);
+ }
+ if (btrfs_is_zoned(fs_info) && bio_op(bio) == REQ_OP_ZONE_APPEND) {
+ struct btrfs_device *device;
+
+ device = btrfs_zoned_get_device(fs_info, disk_bytenr,
+ fs_info->sectorsize);
+ if (IS_ERR(device)) {
+ ret = PTR_ERR(device);
+ goto error;
+ }
+
+ btrfs_io_bio(bio)->device = device;
+ }
+ return 0;
+error:
+ bio_ctrl->bio = NULL;
+ bio->bi_status = errno_to_blk_status(ret);
+ bio_endio(bio);
+ return ret;
+}
+
/*
* @opf: bio REQ_OP_* and REQ_* flags as one value
* @wbc: optional writeback control for io accounting
@@ -3305,61 +3376,67 @@ static int submit_extent_page(unsigned int opf,
bool force_bio_submit)
{
int ret = 0;
- struct bio *bio;
- size_t io_size = min_t(size_t, size, PAGE_SIZE);
struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
- struct extent_io_tree *tree = &inode->io_tree;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ unsigned int cur = pg_offset;
ASSERT(bio_ctrl);
ASSERT(pg_offset < PAGE_SIZE && size <= PAGE_SIZE &&
pg_offset + size <= PAGE_SIZE);
- if (bio_ctrl->bio) {
- bio = bio_ctrl->bio;
- if (force_bio_submit ||
- !btrfs_bio_add_page(bio_ctrl, page, disk_bytenr, io_size,
- pg_offset, bio_flags)) {
- ret = submit_one_bio(bio, mirror_num, bio_ctrl->bio_flags);
+ if (force_bio_submit && bio_ctrl->bio) {
+ ret = submit_one_bio(bio_ctrl->bio, mirror_num, bio_ctrl->bio_flags);
+ bio_ctrl->bio = NULL;
+ if (ret < 0)
+ return ret;
+ }
+
+ while (cur < pg_offset + size) {
+ u32 offset = cur - pg_offset;
+ int added;
+
+ /* Allocate new bio if needed */
+ if (!bio_ctrl->bio) {
+ ret = alloc_new_bio(inode, bio_ctrl, wbc, opf,
+ end_io_func, disk_bytenr, offset,
+ page_offset(page) + cur,
+ bio_flags);
+ if (ret < 0)
+ return ret;
+ }
+ /*
+ * We must go through btrfs_bio_add_page() to ensure each
+ * page range won't cross various boundaries.
+ */
+ if (bio_flags & EXTENT_BIO_COMPRESSED)
+ added = btrfs_bio_add_page(bio_ctrl, page, disk_bytenr,
+ size - offset, pg_offset + offset,
+ bio_flags);
+ else
+ added = btrfs_bio_add_page(bio_ctrl, page,
+ disk_bytenr + offset, size - offset,
+ pg_offset + offset, bio_flags);
+
+ /* Metadata page range should never be split */
+ if (!is_data_inode(&inode->vfs_inode))
+ ASSERT(added == 0 || added == size - offset);
+
+ /* At least we added some page, update the account */
+ if (wbc && added)
+ wbc_account_cgroup_owner(wbc, page, added);
+
+ /* We have reached boundary, submit right now */
+ if (added < size - offset) {
+ /* The bio should contain some page(s) */
+ ASSERT(bio_ctrl->bio->bi_iter.bi_size);
+ ret = submit_one_bio(bio_ctrl->bio, mirror_num,
+ bio_ctrl->bio_flags);
bio_ctrl->bio = NULL;
if (ret < 0)
return ret;
- } else {
- if (wbc)
- wbc_account_cgroup_owner(wbc, page, io_size);
- return 0;
}
+ cur += added;
}
-
- bio = btrfs_bio_alloc(disk_bytenr);
- bio_add_page(bio, page, io_size, pg_offset);
- bio->bi_end_io = end_io_func;
- bio->bi_private = tree;
- bio->bi_write_hint = page->mapping->host->i_write_hint;
- bio->bi_opf = opf;
- if (wbc) {
- struct block_device *bdev;
-
- bdev = fs_info->fs_devices->latest_bdev;
- bio_set_dev(bio, bdev);
- wbc_init_bio(wbc, bio);
- wbc_account_cgroup_owner(wbc, page, io_size);
- }
- if (btrfs_is_zoned(fs_info) && bio_op(bio) == REQ_OP_ZONE_APPEND) {
- struct btrfs_device *device;
-
- device = btrfs_zoned_get_device(fs_info, disk_bytenr, io_size);
- if (IS_ERR(device))
- return PTR_ERR(device);
-
- btrfs_io_bio(bio)->device = device;
- }
-
- bio_ctrl->bio = bio;
- bio_ctrl->bio_flags = bio_flags;
- ret = calc_bio_boundaries(bio_ctrl, inode);
-
- return ret;
+ return 0;
}
static int attach_extent_buffer_page(struct extent_buffer *eb,
@@ -3488,7 +3565,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
size_t pg_offset = 0;
size_t iosize;
size_t blocksize = inode->i_sb->s_blocksize;
- unsigned long this_bio_flag = 0;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
ret = set_page_extent_mapped(page);
@@ -3519,6 +3595,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
}
begin_page_read(fs_info, page);
while (cur <= end) {
+ unsigned long this_bio_flag = 0;
bool force_bio_submit = false;
u64 disk_bytenr;
@@ -3627,7 +3704,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
/* the get_extent function already copied into the page */
if (test_range_bit(tree, cur, cur_end,
EXTENT_UPTODATE, 1, NULL)) {
- check_page_uptodate(tree, page);
unlock_extent(tree, cur, cur + iosize - 1);
end_page_read(page, true, cur, iosize);
cur = cur + iosize;
@@ -3722,14 +3798,9 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
delalloc_end, &page_started, nr_written, wbc);
if (ret) {
- SetPageError(page);
- /*
- * btrfs_run_delalloc_range should return < 0 for error
- * but just in case, we use > 0 here meaning the IO is
- * started, so we don't want to return > 0 unless
- * things are going well.
- */
- return ret < 0 ? ret : -EIO;
+ btrfs_page_set_error(inode->root->fs_info, page,
+ page_offset(page), PAGE_SIZE);
+ return ret;
}
/*
* delalloc_end is already one less than the total length, so
@@ -3829,9 +3900,8 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
int *nr_ret)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- u64 start = page_offset(page);
- u64 end = start + PAGE_SIZE - 1;
- u64 cur = start;
+ u64 cur = page_offset(page);
+ u64 end = cur + PAGE_SIZE - 1;
u64 extent_offset;
u64 block_start;
struct extent_map *em;
@@ -3841,7 +3911,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
const unsigned int write_flags = wbc_to_write_flags(wbc);
bool compressed;
- ret = btrfs_writepage_cow_fixup(page, start, end);
+ ret = btrfs_writepage_cow_fixup(page);
if (ret) {
/* Fixup worker will requeue */
redirty_page_for_writepage(wbc, page);
@@ -3865,7 +3935,16 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
if (cur >= i_size) {
btrfs_writepage_endio_finish_ordered(inode, page, cur,
- end, 1);
+ end, true);
+ /*
+ * This range is beyond i_size, thus we don't need to
+ * bother writing back.
+ * But we still need to clear the dirty subpage bit, or
+ * the next time the page gets dirtied, we will try to
+ * writeback the sectors with subpage dirty bits,
+ * causing writeback without ordered extent.
+ */
+ btrfs_page_clear_dirty(fs_info, page, cur, end + 1 - cur);
break;
}
@@ -3915,7 +3994,8 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
nr++;
else
btrfs_writepage_endio_finish_ordered(inode,
- page, cur, cur + iosize - 1, 1);
+ page, cur, cur + iosize - 1, true);
+ btrfs_page_clear_dirty(fs_info, page, cur, iosize);
cur += iosize;
continue;
}
@@ -3951,6 +4031,12 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
cur += iosize;
nr++;
}
+ /*
+ * If we finish without problem, we should not only clear page dirty,
+ * but also empty subpage dirty bits
+ */
+ if (!ret)
+ btrfs_page_assert_not_dirty(fs_info, page);
*nr_ret = nr;
return ret;
}
@@ -3981,7 +4067,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
WARN_ON(!PageLocked(page));
- ClearPageError(page);
+ btrfs_page_clear_error(btrfs_sb(inode->i_sb), page,
+ page_offset(page), PAGE_SIZE);
pg_offset = offset_in_page(i_size);
if (page->index > end_index ||
@@ -4022,10 +4109,39 @@ done:
set_page_writeback(page);
end_page_writeback(page);
}
- if (PageError(page)) {
- ret = ret < 0 ? ret : -EIO;
+ /*
+ * Here we used to have a check for PageError() and then set @ret and
+ * call end_extent_writepage().
+ *
+ * But in fact setting @ret here will cause different error paths
+ * between subpage and regular sectorsize.
+ *
+ * For regular page size, we never submit current page, but only add
+ * current page to current bio.
+ * The bio submission can only happen in next page.
+ * Thus if we hit the PageError() branch, @ret is already set to
+ * non-zero value and will not get updated for regular sectorsize.
+ *
+ * But for subpage case, it's possible we submit part of current page,
+ * thus can get PageError() set by submitted bio of the same page,
+ * while our @ret is still 0.
+ *
+ * So here we unify the behavior and don't set @ret.
+ * Error can still be properly passed to higher layer as page will
+ * be set error, here we just don't handle the IO failure.
+ *
+ * NOTE: This is just a hotfix for subpage.
+ * The root fix will be properly ending ordered extent when we hit
+ * an error during writeback.
+ *
+ * But that needs a bigger refactoring, as we not only need to grab the
+ * submitted OE, but also need to know exactly at which bytenr we hit
+ * the error.
+ * Currently the full page based __extent_writepage_io() is not
+ * capable of that.
+ */
+ if (PageError(page))
end_extent_writepage(page, ret, start, page_end);
- }
unlock_page(page);
ASSERT(ret <= 0);
return ret;
@@ -4984,7 +5100,7 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
ret = __extent_writepage(page, &wbc_writepages, &epd);
else {
btrfs_writepage_endio_finish_ordered(BTRFS_I(inode),
- page, start, start + PAGE_SIZE - 1, 1);
+ page, start, start + PAGE_SIZE - 1, true);
unlock_page(page);
}
put_page(page);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 62027f551b44..53abdc280451 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -280,7 +280,7 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
struct bio *btrfs_bio_alloc(u64 first_byte);
struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs);
struct bio *btrfs_bio_clone(struct bio *bio);
-struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size);
+struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size);
int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct page *page,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index df6631eefc65..2673c6ba7a4e 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -233,7 +233,6 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_path *path, u64 objectid,
u64 offset, int mod)
{
- int ret;
struct btrfs_key file_key;
int ins_len = mod < 0 ? -1 : 0;
int cow = mod != 0;
@@ -241,8 +240,8 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
file_key.objectid = objectid;
file_key.offset = offset;
file_key.type = BTRFS_EXTENT_DATA_KEY;
- ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
- return ret;
+
+ return btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
}
/*
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ee34497500e1..7ff577005d0f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -16,6 +16,7 @@
#include <linux/btrfs.h>
#include <linux/uio.h>
#include <linux/iversion.h>
+#include <linux/fsverity.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -1340,7 +1341,18 @@ static int prepare_uptodate_page(struct inode *inode,
unlock_page(page);
return -EIO;
}
- if (page->mapping != inode->i_mapping) {
+
+ /*
+ * Since btrfs_readpage() will unlock the page before it
+ * returns, there is a window where btrfs_releasepage() can be
+ * called to release the page. Here we check both inode
+ * mapping and PagePrivate() to make sure the page was not
+ * released.
+ *
+ * The private flag check is essential for subpage as we need
+ * to store extra bitmap using page->private.
+ */
+ if (page->mapping != inode->i_mapping || !PagePrivate(page)) {
unlock_page(page);
return -EAGAIN;
}
@@ -3604,7 +3616,13 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
static int btrfs_file_open(struct inode *inode, struct file *filp)
{
+ int ret;
+
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
+
+ ret = fsverity_file_open(inode, filp);
+ if (ret)
+ return ret;
return generic_file_open(inode, filp);
}
@@ -3633,6 +3651,9 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
+ if (fsverity_active(inode))
+ return 0;
+
if (check_direct_read(btrfs_sb(inode->i_sb), to, iocb->ki_pos))
return 0;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 2131ae5b9ed7..da0eee7c9e5f 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -344,19 +344,13 @@ fail:
static void readahead_cache(struct inode *inode)
{
- struct file_ra_state *ra;
+ struct file_ra_state ra;
unsigned long last_index;
- ra = kzalloc(sizeof(*ra), GFP_NOFS);
- if (!ra)
- return;
-
- file_ra_state_init(ra, inode->i_mapping);
+ file_ra_state_init(&ra, inode->i_mapping);
last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
- page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
-
- kfree(ra);
+ page_cache_sync_readahead(inode->i_mapping, &ra, NULL, 0, last_index);
}
static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
@@ -2544,6 +2538,7 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
u64 offset = bytenr - block_group->start;
u64 to_free, to_unusable;
+ const int bg_reclaim_threshold = READ_ONCE(fs_info->bg_reclaim_threshold);
spin_lock(&ctl->tree_lock);
if (!used)
@@ -2573,9 +2568,9 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
/* All the region is now unusable. Mark it as unused and reclaim */
if (block_group->zone_unusable == block_group->length) {
btrfs_mark_bg_unused(block_group);
- } else if (block_group->zone_unusable >=
- div_factor_fine(block_group->length,
- fs_info->bg_reclaim_threshold)) {
+ } else if (bg_reclaim_threshold &&
+ block_group->zone_unusable >=
+ div_factor_fine(block_group->length, bg_reclaim_threshold)) {
btrfs_mark_bg_to_reclaim(block_group);
}
@@ -2652,8 +2647,11 @@ int btrfs_remove_free_space(struct btrfs_block_group *block_group,
* btrfs_pin_extent_for_log_replay() when replaying the log.
* Advance the pointer not to overwrite the tree-log nodes.
*/
- if (block_group->alloc_offset < offset + bytes)
- block_group->alloc_offset = offset + bytes;
+ if (block_group->start + block_group->alloc_offset <
+ offset + bytes) {
+ block_group->alloc_offset =
+ offset + bytes - block_group->start;
+ }
return 0;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0117d867ecf8..487533c35ddb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -32,6 +32,7 @@
#include <linux/sched/mm.h>
#include <linux/iomap.h>
#include <asm/unaligned.h>
+#include <linux/fsverity.h>
#include "misc.h"
#include "ctree.h"
#include "disk-io.h"
@@ -286,9 +287,8 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
cur_size = min_t(unsigned long, compressed_size,
PAGE_SIZE);
- kaddr = kmap_atomic(cpage);
+ kaddr = page_address(cpage);
write_extent_buffer(leaf, kaddr, ptr, cur_size);
- kunmap_atomic(kaddr);
i++;
ptr += cur_size;
@@ -490,6 +490,9 @@ static noinline int add_async_extent(struct async_chunk *cow,
*/
static inline bool inode_can_compress(struct btrfs_inode *inode)
{
+ /* Subpage doesn't support compression yet */
+ if (inode->root->fs_info->sectorsize < PAGE_SIZE)
+ return false;
if (inode->flags & BTRFS_INODE_NODATACOW ||
inode->flags & BTRFS_INODE_NODATASUM)
return false;
@@ -629,7 +632,7 @@ again:
* inode has not been flagged as nocompress. This flag can
* change at any time if we discover bad compression ratios.
*/
- if (nr_pages > 1 && inode_need_compress(BTRFS_I(inode), start, end)) {
+ if (inode_need_compress(BTRFS_I(inode), start, end)) {
WARN_ON(pages);
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
if (!pages) {
@@ -682,7 +685,11 @@ again:
}
}
cont:
- if (start == 0) {
+ /*
+ * Check cow_file_range() for why we don't even try to create inline
+ * extent for subpage case.
+ */
+ if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
/* lets try to make an inline extent */
if (ret || total_in < actual_end) {
/* we didn't compress the entire range, try
@@ -973,7 +980,7 @@ retry:
p->mapping = inode->vfs_inode.i_mapping;
btrfs_writepage_endio_finish_ordered(inode, p, start,
- end, 0);
+ end, false);
p->mapping = NULL;
extent_clear_unlock_delalloc(inode, start, end, NULL, 0,
@@ -1080,7 +1087,17 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
- if (start == 0) {
+ /*
+ * Due to the page size limit, for subpage we can only trigger the
+ * writeback for the dirty sectors of page, that means data writeback
+ * is doing more writeback than what we want.
+ *
+ * This is especially unexpected for some call sites like fallocate,
+ * where we only increase i_size after everything is done.
+ * This means we can trigger inline extent even if we didn't want to.
+ * So here we skip inline extent creation completely.
+ */
+ if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
/* lets try to make an inline extent */
ret = cow_file_range_inline(inode, start, end, 0,
BTRFS_COMPRESS_NONE, NULL);
@@ -1290,11 +1307,6 @@ static noinline void async_cow_submit(struct btrfs_work *work)
nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
PAGE_SHIFT;
- /* atomic_sub_return implies a barrier */
- if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
- 5 * SZ_1M)
- cond_wake_up_nomb(&fs_info->async_submit_wait);
-
/*
* ->inode could be NULL if async_chunk_start has failed to compress,
* in which case we don't have anything to submit, yet we need to
@@ -1303,6 +1315,11 @@ static noinline void async_cow_submit(struct btrfs_work *work)
*/
if (async_chunk->inode)
submit_compressed_extents(async_chunk);
+
+ /* atomic_sub_return implies a barrier */
+ if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
+ 5 * SZ_1M)
+ cond_wake_up_nomb(&fs_info->async_submit_wait);
}
static noinline void async_cow_free(struct btrfs_work *work)
@@ -1946,6 +1963,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
ret = cow_file_range_async(inode, wbc, locked_page, start, end,
page_started, nr_written);
}
+ ASSERT(ret <= 0);
if (ret)
btrfs_cleanup_ordered_extents(inode, locked_page, start,
end - start + 1);
@@ -2285,7 +2303,6 @@ static int split_zoned_em(struct btrfs_inode *inode, u64 start, u64 len,
struct extent_map *split_mid = NULL;
struct extent_map *split_post = NULL;
int ret = 0;
- int modified;
unsigned long flags;
/* Sanity check */
@@ -2315,11 +2332,12 @@ static int split_zoned_em(struct btrfs_inode *inode, u64 start, u64 len,
ASSERT(em->len == len);
ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
+ ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
+ ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
+ ASSERT(!list_empty(&em->list));
flags = em->flags;
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
- clear_bit(EXTENT_FLAG_LOGGING, &flags);
- modified = !list_empty(&em->list);
/* First, replace the em with a new extent_map starting from * em->start */
split_pre->start = em->start;
@@ -2333,7 +2351,7 @@ static int split_zoned_em(struct btrfs_inode *inode, u64 start, u64 len,
split_pre->compress_type = em->compress_type;
split_pre->generation = em->generation;
- replace_extent_mapping(em_tree, em, split_pre, modified);
+ replace_extent_mapping(em_tree, em, split_pre, 1);
/*
* Now we only have an extent_map at:
@@ -2353,7 +2371,7 @@ static int split_zoned_em(struct btrfs_inode *inode, u64 start, u64 len,
split_mid->flags = flags;
split_mid->compress_type = em->compress_type;
split_mid->generation = em->generation;
- add_extent_mapping(em_tree, split_mid, modified);
+ add_extent_mapping(em_tree, split_mid, 1);
}
if (post) {
@@ -2367,7 +2385,7 @@ static int split_zoned_em(struct btrfs_inode *inode, u64 start, u64 len,
split_post->flags = flags;
split_post->compress_type = em->compress_type;
split_post->generation = em->generation;
- add_extent_mapping(em_tree, split_post, modified);
+ add_extent_mapping(em_tree, split_post, 1);
}
/* Once for us */
@@ -2770,7 +2788,7 @@ out_page:
* to fix it up. The async helper will wait for ordered extents, set
* the delalloc bit and make it safe to write the page.
*/
-int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
+int btrfs_writepage_cow_fixup(struct page *page)
{
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -3171,7 +3189,7 @@ static void finish_ordered_fn(struct btrfs_work *work)
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
struct page *page, u64 start,
- u64 end, int uptodate)
+ u64 end, bool uptodate)
{
trace_btrfs_writepage_end_io_hook(inode, start, end, uptodate);
@@ -3257,25 +3275,44 @@ unsigned int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
return 0;
}
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
+ /*
+ * For subpage case, above PageChecked is not safe as it's not subpage
+ * compatible.
+ * But for now only cow fixup and compressed read utilize PageChecked
+ * flag, while in this context we can easily use io_bio->csum to
+ * determine if we really need to do csum verification.
+ *
+ * So for now, just exit if io_bio->csum is NULL, as it means it's
+ * compressed read, and its compressed data csum has already been
+ * verified.
+ */
+ if (io_bio->csum == NULL)
return 0;
- if (!root->fs_info->csum_root)
+ if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
return 0;
- if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
- test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
- clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);
+ if (!root->fs_info->csum_root)
return 0;
- }
ASSERT(page_offset(page) <= start &&
end <= page_offset(page) + PAGE_SIZE - 1);
for (pg_off = offset_in_page(start);
pg_off < offset_in_page(end);
pg_off += sectorsize, bio_offset += sectorsize) {
+ u64 file_offset = pg_off + page_offset(page);
int ret;
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
+ test_range_bit(io_tree, file_offset,
+ file_offset + sectorsize - 1,
+ EXTENT_NODATASUM, 1, NULL)) {
+ /* Skip the range without csum for data reloc inode */
+ clear_extent_bits(io_tree, file_offset,
+ file_offset + sectorsize - 1,
+ EXTENT_NODATASUM);
+ continue;
+ }
ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off,
page_offset(page) + pg_off);
if (ret < 0) {
@@ -3520,7 +3557,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
/*
* If we have an inode with links, there are a couple of
- * possibilities. Old kernels (before v3.12) used to create an
+ * possibilities:
+ *
+ * 1. We were halfway through creating fsverity metadata for the
+ * file. In that case, the orphan item represents incomplete
+ * fsverity metadata which must be cleaned up with
+ * btrfs_drop_verity_items and deleting the orphan item.
+
+ * 2. Old kernels (before v3.12) used to create an
* orphan item for truncate indicating that there were possibly
* extent items past i_size that needed to be deleted. In v3.12,
* truncate was changed to update i_size in sync with the extent
@@ -3538,8 +3582,12 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
* but either way, we can delete the orphan item.
*/
if (ret == -ENOENT || inode->i_nlink) {
- if (!ret)
+ if (!ret) {
+ ret = btrfs_drop_verity_items(BTRFS_I(inode));
iput(inode);
+ if (ret)
+ goto out;
+ }
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -3728,7 +3776,8 @@ static int btrfs_read_locked_inode(struct inode *inode,
rdev = btrfs_inode_rdev(leaf, inode_item);
BTRFS_I(inode)->index_cnt = (u64)-1;
- BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
+ btrfs_inode_split_flags(btrfs_inode_flags(leaf, inode_item),
+ &BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags);
cache_index:
/*
@@ -3859,6 +3908,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
struct inode *inode)
{
struct btrfs_map_token token;
+ u64 flags;
btrfs_init_map_token(&token, leaf);
@@ -3894,7 +3944,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
btrfs_set_token_inode_transid(&token, item, trans->transid);
btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
- btrfs_set_token_inode_flags(&token, item, BTRFS_I(inode)->flags);
+ flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
+ BTRFS_I(inode)->ro_flags);
+ btrfs_set_token_inode_flags(&token, item, flags);
btrfs_set_token_inode_block_group(&token, item, 0);
}
@@ -5088,15 +5140,13 @@ static int maybe_insert_hole(struct btrfs_root *root, struct btrfs_inode *inode,
int ret;
/*
- * Still need to make sure the inode looks like it's been updated so
- * that any holes get logged if we fsync.
+ * If NO_HOLES is enabled, we don't need to do anything.
+ * Later, up in the call chain, either btrfs_set_inode_last_sub_trans()
+ * or btrfs_update_inode() will be called, which guarantee that the next
+ * fsync will know this inode was changed and needs to be logged.
*/
- if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
- inode->last_trans = fs_info->generation;
- inode->last_sub_trans = root->log_transid;
- inode->last_log_commit = root->last_log_commit;
+ if (btrfs_fs_incompat(fs_info, NO_HOLES))
return 0;
- }
/*
* 1 - for the one we're dropping
@@ -5342,7 +5392,7 @@ static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentr
if (btrfs_root_readonly(root))
return -EROFS;
- err = setattr_prepare(&init_user_ns, dentry, attr);
+ err = setattr_prepare(mnt_userns, dentry, attr);
if (err)
return err;
@@ -5353,13 +5403,12 @@ static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentr
}
if (attr->ia_valid) {
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(mnt_userns, inode, attr);
inode_inc_iversion(inode);
err = btrfs_dirty_inode(inode);
if (!err && attr->ia_valid & ATTR_MODE)
- err = posix_acl_chmod(&init_user_ns, inode,
- inode->i_mode);
+ err = posix_acl_chmod(mnt_userns, inode, inode->i_mode);
}
return err;
@@ -5522,6 +5571,7 @@ void btrfs_evict_inode(struct inode *inode)
trace_btrfs_inode_evict(inode);
if (!root) {
+ fsverity_cleanup_inode(inode);
clear_inode(inode);
return;
}
@@ -5604,6 +5654,7 @@ no_delete:
* to retry these periodically in the future.
*/
btrfs_remove_delayed_node(BTRFS_I(inode));
+ fsverity_cleanup_inode(inode);
clear_inode(inode);
}
@@ -6370,6 +6421,7 @@ static void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
+ struct user_namespace *mnt_userns,
struct inode *dir,
const char *name, int name_len,
u64 ref_objectid, u64 objectid,
@@ -6479,7 +6531,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
if (ret != 0)
goto fail_unlock;
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(mnt_userns, inode, dir, mode);
inode_set_bytes(inode, 0);
inode->i_mtime = current_time(inode);
@@ -6664,9 +6716,9 @@ static int btrfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
goto out_unlock;
- inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
- mode, &index);
+ inode = btrfs_new_inode(trans, root, mnt_userns, dir,
+ dentry->d_name.name, dentry->d_name.len,
+ btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
inode = NULL;
@@ -6728,9 +6780,9 @@ static int btrfs_create(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
goto out_unlock;
- inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
- mode, &index);
+ inode = btrfs_new_inode(trans, root, mnt_userns, dir,
+ dentry->d_name.name, dentry->d_name.len,
+ btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
inode = NULL;
@@ -6873,8 +6925,9 @@ static int btrfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
goto out_fail;
- inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
+ inode = btrfs_new_inode(trans, root, mnt_userns, dir,
+ dentry->d_name.name, dentry->d_name.len,
+ btrfs_ino(BTRFS_I(dir)), objectid,
S_IFDIR | mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
@@ -8194,9 +8247,10 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
return dip;
}
-static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
+static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter,
struct bio *dio_bio, loff_t file_offset)
{
+ struct inode *inode = iter->inode;
const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
const bool raid56 = (btrfs_data_alloc_profile(fs_info) &
@@ -8206,13 +8260,13 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
u64 start_sector;
int async_submit = 0;
u64 submit_len;
- int clone_offset = 0;
- int clone_len;
+ u64 clone_offset = 0;
+ u64 clone_len;
u64 logical;
int ret;
blk_status_t status;
struct btrfs_io_geometry geom;
- struct btrfs_dio_data *dio_data = iomap->private;
+ struct btrfs_dio_data *dio_data = iter->iomap.private;
struct extent_map *em = NULL;
dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
@@ -8255,9 +8309,9 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
status = errno_to_blk_status(ret);
goto out_err_em;
}
- ASSERT(geom.len <= INT_MAX);
- clone_len = min_t(int, submit_len, geom.len);
+ clone_len = min(submit_len, geom.len);
+ ASSERT(clone_len <= UINT_MAX);
/*
* This will never fail as it's passing GPF_NOFS and
@@ -8401,11 +8455,47 @@ static void btrfs_readahead(struct readahead_control *rac)
extent_readahead(rac);
}
+/*
+ * For releasepage() and invalidatepage() we have a race window where
+ * end_page_writeback() is called but the subpage spinlock is not yet released.
+ * If we continue to release/invalidate the page, we could cause use-after-free
+ * for subpage spinlock. So this function is to spin and wait for subpage
+ * spinlock.
+ */
+static void wait_subpage_spinlock(struct page *page)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
+ struct btrfs_subpage *subpage;
+
+ if (fs_info->sectorsize == PAGE_SIZE)
+ return;
+
+ ASSERT(PagePrivate(page) && page->private);
+ subpage = (struct btrfs_subpage *)page->private;
+
+ /*
+ * This may look insane as we just acquire the spinlock and release it,
+ * without doing anything. But we just want to make sure no one is
+ * still holding the subpage spinlock.
+ * And since the page is not dirty nor writeback, and we have page
+ * locked, the only possible way to hold a spinlock is from the endio
+ * function to clear page writeback.
+ *
+ * Here we just acquire the spinlock so that all existing callers
+ * should exit and we're safe to release/invalidate the page.
+ */
+ spin_lock_irq(&subpage->lock);
+ spin_unlock_irq(&subpage->lock);
+}
+
static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
{
int ret = try_release_extent_mapping(page, gfp_flags);
- if (ret == 1)
+
+ if (ret == 1) {
+ wait_subpage_spinlock(page);
clear_page_extent_mapped(page);
+ }
return ret;
}
@@ -8469,6 +8559,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
* do double ordered extent accounting on the same page.
*/
wait_on_page_writeback(page);
+ wait_subpage_spinlock(page);
/*
* For subpage case, we have call sites like
@@ -8557,7 +8648,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
spin_unlock_irq(&inode->ordered_tree.lock);
if (btrfs_dec_test_ordered_pending(inode, &ordered,
- cur, range_end + 1 - cur, 1)) {
+ cur, range_end + 1 - cur)) {
btrfs_finish_ordered_io(ordered);
/*
* The ordered extent has finished, now we're again
@@ -8938,7 +9029,8 @@ out:
*/
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
struct btrfs_root *new_root,
- struct btrfs_root *parent_root)
+ struct btrfs_root *parent_root,
+ struct user_namespace *mnt_userns)
{
struct inode *inode;
int err;
@@ -8949,7 +9041,8 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
if (err < 0)
return err;
- inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, ino, ino,
+ inode = btrfs_new_inode(trans, new_root, mnt_userns, NULL, "..", 2,
+ ino, ino,
S_IFDIR | (~current_umask() & S_IRWXUGO),
&index);
if (IS_ERR(inode))
@@ -8993,6 +9086,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->defrag_bytes = 0;
ei->disk_i_size = 0;
ei->flags = 0;
+ ei->ro_flags = 0;
ei->csum_bytes = 0;
ei->index_cnt = (u64)-1;
ei->dir_index = 0;
@@ -9174,6 +9268,7 @@ static int btrfs_getattr(struct user_namespace *mnt_userns,
struct inode *inode = d_inode(path->dentry);
u32 blocksize = inode->i_sb->s_blocksize;
u32 bi_flags = BTRFS_I(inode)->flags;
+ u32 bi_ro_flags = BTRFS_I(inode)->ro_flags;
stat->result_mask |= STATX_BTIME;
stat->btime.tv_sec = BTRFS_I(inode)->i_otime.tv_sec;
@@ -9186,13 +9281,15 @@ static int btrfs_getattr(struct user_namespace *mnt_userns,
stat->attributes |= STATX_ATTR_IMMUTABLE;
if (bi_flags & BTRFS_INODE_NODUMP)
stat->attributes |= STATX_ATTR_NODUMP;
+ if (bi_ro_flags & BTRFS_INODE_RO_VERITY)
+ stat->attributes |= STATX_ATTR_VERITY;
stat->attributes_mask |= (STATX_ATTR_APPEND |
STATX_ATTR_COMPRESSED |
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(mnt_userns, inode, stat);
stat->dev = BTRFS_I(inode)->root->anon_dev;
spin_lock(&BTRFS_I(inode)->lock);
@@ -9226,8 +9323,14 @@ static int btrfs_rename_exchange(struct inode *old_dir,
bool dest_log_pinned = false;
bool need_abort = false;
- /* we only allow rename subvolume link between subvolumes */
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
+ /*
+ * For non-subvolumes allow exchange only within one subvolume, in the
+ * same inode namespace. Two subvolumes (represented as directory) can
+ * be exchanged as they're a logical link and have a fixed inode number.
+ */
+ if (root != dest &&
+ (old_ino != BTRFS_FIRST_FREE_OBJECTID ||
+ new_ino != BTRFS_FIRST_FREE_OBJECTID))
return -EXDEV;
/* close the race window with snapshot create/destroy ioctl */
@@ -9274,8 +9377,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* force full log commit if subvolume involved. */
btrfs_set_log_full_commit(trans);
} else {
- btrfs_pin_log_trans(root);
- root_log_pinned = true;
ret = btrfs_insert_inode_ref(trans, dest,
new_dentry->d_name.name,
new_dentry->d_name.len,
@@ -9292,8 +9393,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* force full log commit if subvolume involved. */
btrfs_set_log_full_commit(trans);
} else {
- btrfs_pin_log_trans(dest);
- dest_log_pinned = true;
ret = btrfs_insert_inode_ref(trans, root,
old_dentry->d_name.name,
old_dentry->d_name.len,
@@ -9324,6 +9423,29 @@ static int btrfs_rename_exchange(struct inode *old_dir,
BTRFS_I(new_inode), 1);
}
+ /*
+ * Now pin the logs of the roots. We do it to ensure that no other task
+ * can sync the logs while we are in progress with the rename, because
+ * that could result in an inconsistency in case any of the inodes that
+ * are part of this rename operation were logged before.
+ *
+ * We pin the logs even if at this precise moment none of the inodes was
+ * logged before. This is because right after we checked for that, some
+ * other task fsyncing some other inode not involved with this rename
+ * operation could log that one of our inodes exists.
+ *
+ * We don't need to pin the logs before the above calls to
+ * btrfs_insert_inode_ref(), since those don't ever need to change a log.
+ */
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ btrfs_pin_log_trans(root);
+ root_log_pinned = true;
+ }
+ if (new_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ btrfs_pin_log_trans(dest);
+ dest_log_pinned = true;
+ }
+
/* src is a subvolume */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
@@ -9405,8 +9527,7 @@ out_fail:
if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
- (new_inode &&
- btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
+ btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))
btrfs_set_log_full_commit(trans);
if (root_log_pinned) {
@@ -9430,6 +9551,7 @@ out_notrans:
static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
+ struct user_namespace *mnt_userns,
struct inode *dir,
struct dentry *dentry)
{
@@ -9442,7 +9564,7 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
if (ret)
return ret;
- inode = btrfs_new_inode(trans, root, dir,
+ inode = btrfs_new_inode(trans, root, mnt_userns, dir,
dentry->d_name.name,
dentry->d_name.len,
btrfs_ino(BTRFS_I(dir)),
@@ -9479,9 +9601,10 @@ out:
return ret;
}
-static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int btrfs_rename(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
{
struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
struct btrfs_trans_handle *trans;
@@ -9576,8 +9699,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
/* force full log commit if subvolume involved. */
btrfs_set_log_full_commit(trans);
} else {
- btrfs_pin_log_trans(root);
- log_pinned = true;
ret = btrfs_insert_inode_ref(trans, dest,
new_dentry->d_name.name,
new_dentry->d_name.len,
@@ -9601,6 +9722,25 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
} else {
+ /*
+ * Now pin the log. We do it to ensure that no other task can
+ * sync the log while we are in progress with the rename, as
+ * that could result in an inconsistency in case any of the
+ * inodes that are part of this rename operation were logged
+ * before.
+ *
+ * We pin the log even if at this precise moment none of the
+ * inodes was logged before. This is because right after we
+ * checked for that, some other task fsyncing some other inode
+ * not involved with this rename operation could log that one of
+ * our inodes exists.
+ *
+ * We don't need to pin the logs before the above call to
+ * btrfs_insert_inode_ref(), since that does not need to change
+ * a log.
+ */
+ btrfs_pin_log_trans(root);
+ log_pinned = true;
ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
BTRFS_I(d_inode(old_dentry)),
old_dentry->d_name.name,
@@ -9654,8 +9794,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
if (flags & RENAME_WHITEOUT) {
- ret = btrfs_whiteout_for_rename(trans, root, old_dir,
- old_dentry);
+ ret = btrfs_whiteout_for_rename(trans, root, mnt_userns,
+ old_dir, old_dentry);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -9705,7 +9845,8 @@ static int btrfs_rename2(struct user_namespace *mnt_userns, struct inode *old_di
return btrfs_rename_exchange(old_dir, old_dentry, new_dir,
new_dentry);
- return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
+ return btrfs_rename(mnt_userns, old_dir, old_dentry, new_dir,
+ new_dentry, flags);
}
struct btrfs_delalloc_work {
@@ -9802,11 +9943,7 @@ static int start_delalloc_inodes(struct btrfs_root *root,
btrfs_queue_work(root->fs_info->flush_workers,
&work->work);
} else {
- ret = sync_inode(inode, wbc);
- if (!ret &&
- test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
- &BTRFS_I(inode)->runtime_flags))
- ret = sync_inode(inode, wbc);
+ ret = filemap_fdatawrite_wbc(inode->i_mapping, wbc);
btrfs_add_delayed_iput(inode);
if (ret || wbc->nr_to_write <= 0)
goto out;
@@ -9941,9 +10078,10 @@ static int btrfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
goto out_unlock;
- inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(BTRFS_I(dir)),
- objectid, S_IFLNK|S_IRWXUGO, &index);
+ inode = btrfs_new_inode(trans, root, mnt_userns, dir,
+ dentry->d_name.name, dentry->d_name.len,
+ btrfs_ino(BTRFS_I(dir)), objectid,
+ S_IFLNK | S_IRWXUGO, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
inode = NULL;
@@ -10267,7 +10405,7 @@ static int btrfs_permission(struct user_namespace *mnt_userns,
if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
return -EACCES;
}
- return generic_permission(&init_user_ns, inode, mask);
+ return generic_permission(mnt_userns, inode, mask);
}
static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
@@ -10292,7 +10430,7 @@ static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
if (ret)
goto out;
- inode = btrfs_new_inode(trans, root, dir, NULL, 0,
+ inode = btrfs_new_inode(trans, root, mnt_userns, dir, NULL, 0,
btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0ba98e08a029..41524f9aeac3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -27,6 +27,7 @@
#include <linux/uaccess.h>
#include <linux/iversion.h>
#include <linux/fileattr.h>
+#include <linux/fsverity.h>
#include "ctree.h"
#include "disk-io.h"
#include "export.h"
@@ -103,9 +104,11 @@ static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
* Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
* ioctl.
*/
-static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags)
+static unsigned int btrfs_inode_flags_to_fsflags(struct btrfs_inode *binode)
{
unsigned int iflags = 0;
+ u32 flags = binode->flags;
+ u32 ro_flags = binode->ro_flags;
if (flags & BTRFS_INODE_SYNC)
iflags |= FS_SYNC_FL;
@@ -121,6 +124,8 @@ static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags)
iflags |= FS_DIRSYNC_FL;
if (flags & BTRFS_INODE_NODATACOW)
iflags |= FS_NOCOW_FL;
+ if (ro_flags & BTRFS_INODE_RO_VERITY)
+ iflags |= FS_VERITY_FL;
if (flags & BTRFS_INODE_NOCOMPRESS)
iflags |= FS_NOCOMP_FL;
@@ -148,10 +153,12 @@ void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (binode->flags & BTRFS_INODE_DIRSYNC)
new_fl |= S_DIRSYNC;
+ if (binode->ro_flags & BTRFS_INODE_RO_VERITY)
+ new_fl |= S_VERITY;
set_mask_bits(&inode->i_flags,
- S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC,
- new_fl);
+ S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC |
+ S_VERITY, new_fl);
}
/*
@@ -200,7 +207,7 @@ int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
{
struct btrfs_inode *binode = BTRFS_I(d_inode(dentry));
- fileattr_fill_flags(fa, btrfs_inode_flags_to_fsflags(binode->flags));
+ fileattr_fill_flags(fa, btrfs_inode_flags_to_fsflags(binode));
return 0;
}
@@ -224,7 +231,7 @@ int btrfs_fileattr_set(struct user_namespace *mnt_userns,
return -EOPNOTSUPP;
fsflags = btrfs_mask_fsflags_for_type(inode, fa->flags);
- old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
+ old_fsflags = btrfs_inode_flags_to_fsflags(binode);
ret = check_fsflags(old_fsflags, fsflags);
if (ret)
return ret;
@@ -492,8 +499,8 @@ int __pure btrfs_is_empty_uuid(u8 *uuid)
return 1;
}
-static noinline int create_subvol(struct inode *dir,
- struct dentry *dentry,
+static noinline int create_subvol(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *dentry,
const char *name, int namelen,
struct btrfs_qgroup_inherit *inherit)
{
@@ -638,7 +645,7 @@ static noinline int create_subvol(struct inode *dir,
goto fail;
}
- ret = btrfs_create_subvol_root(trans, new_root, root);
+ ret = btrfs_create_subvol_root(trans, new_root, root, mnt_userns);
btrfs_put_root(new_root);
if (ret) {
/* We potentially lose an unused inode item here */
@@ -830,7 +837,8 @@ free_pending:
* nfs_async_unlink().
*/
-static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
+static int btrfs_may_delete(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *victim, int isdir)
{
int error;
@@ -840,12 +848,12 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
BUG_ON(d_inode(victim->d_parent) != dir);
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
- error = inode_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC);
+ error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
if (error)
return error;
if (IS_APPEND(dir))
return -EPERM;
- if (check_sticky(&init_user_ns, dir, d_inode(victim)) ||
+ if (check_sticky(mnt_userns, dir, d_inode(victim)) ||
IS_APPEND(d_inode(victim)) || IS_IMMUTABLE(d_inode(victim)) ||
IS_SWAPFILE(d_inode(victim)))
return -EPERM;
@@ -864,13 +872,16 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
}
/* copy of may_create in fs/namei.c() */
-static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
+static inline int btrfs_may_create(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *child)
{
if (d_really_is_positive(child))
return -EEXIST;
if (IS_DEADDIR(dir))
return -ENOENT;
- return inode_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC);
+ if (!fsuidgid_has_mapping(dir->i_sb, mnt_userns))
+ return -EOVERFLOW;
+ return inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
}
/*
@@ -879,6 +890,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
* inside this filesystem so it's quite a bit simpler.
*/
static noinline int btrfs_mksubvol(const struct path *parent,
+ struct user_namespace *mnt_userns,
const char *name, int namelen,
struct btrfs_root *snap_src,
bool readonly,
@@ -893,12 +905,12 @@ static noinline int btrfs_mksubvol(const struct path *parent,
if (error == -EINTR)
return error;
- dentry = lookup_one_len(name, parent->dentry, namelen);
+ dentry = lookup_one(mnt_userns, name, parent->dentry, namelen);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_unlock;
- error = btrfs_may_create(dir, dentry);
+ error = btrfs_may_create(mnt_userns, dir, dentry);
if (error)
goto out_dput;
@@ -920,7 +932,7 @@ static noinline int btrfs_mksubvol(const struct path *parent,
if (snap_src)
error = create_snapshot(snap_src, dir, dentry, readonly, inherit);
else
- error = create_subvol(dir, dentry, name, namelen, inherit);
+ error = create_subvol(mnt_userns, dir, dentry, name, namelen, inherit);
if (!error)
fsnotify_mkdir(dir, dentry);
@@ -934,6 +946,7 @@ out_unlock:
}
static noinline int btrfs_mksnapshot(const struct path *parent,
+ struct user_namespace *mnt_userns,
const char *name, int namelen,
struct btrfs_root *root,
bool readonly,
@@ -963,7 +976,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
- ret = btrfs_mksubvol(parent, name, namelen,
+ ret = btrfs_mksubvol(parent, mnt_userns, name, namelen,
root, readonly, inherit);
out:
if (snapshot_force_cow)
@@ -1792,6 +1805,7 @@ out_drop:
}
static noinline int __btrfs_ioctl_snap_create(struct file *file,
+ struct user_namespace *mnt_userns,
const char *name, unsigned long fd, int subvol,
bool readonly,
struct btrfs_qgroup_inherit *inherit)
@@ -1819,8 +1833,8 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
}
if (subvol) {
- ret = btrfs_mksubvol(&file->f_path, name, namelen,
- NULL, readonly, inherit);
+ ret = btrfs_mksubvol(&file->f_path, mnt_userns, name,
+ namelen, NULL, readonly, inherit);
} else {
struct fd src = fdget(fd);
struct inode *src_inode;
@@ -1834,16 +1848,17 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
btrfs_info(BTRFS_I(file_inode(file))->root->fs_info,
"Snapshot src from another FS");
ret = -EXDEV;
- } else if (!inode_owner_or_capable(&init_user_ns, src_inode)) {
+ } else if (!inode_owner_or_capable(mnt_userns, src_inode)) {
/*
* Subvolume creation is not restricted, but snapshots
* are limited to own subvolumes only
*/
ret = -EPERM;
} else {
- ret = btrfs_mksnapshot(&file->f_path, name, namelen,
- BTRFS_I(src_inode)->root,
- readonly, inherit);
+ ret = btrfs_mksnapshot(&file->f_path, mnt_userns,
+ name, namelen,
+ BTRFS_I(src_inode)->root,
+ readonly, inherit);
}
fdput(src);
}
@@ -1867,8 +1882,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
return PTR_ERR(vol_args);
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd,
- subvol, false, NULL);
+ ret = __btrfs_ioctl_snap_create(file, file_mnt_user_ns(file),
+ vol_args->name, vol_args->fd, subvol,
+ false, NULL);
kfree(vol_args);
return ret;
@@ -1926,8 +1942,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
}
}
- ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd,
- subvol, readonly, inherit);
+ ret = __btrfs_ioctl_snap_create(file, file_mnt_user_ns(file),
+ vol_args->name, vol_args->fd, subvol,
+ readonly, inherit);
if (ret)
goto free_inherit;
free_inherit:
@@ -1971,7 +1988,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
u64 flags;
int ret = 0;
- if (!inode_owner_or_capable(&init_user_ns, inode))
+ if (!inode_owner_or_capable(file_mnt_user_ns(file), inode))
return -EPERM;
ret = mnt_want_write_file(file);
@@ -2382,23 +2399,16 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
key.offset = (u64)-1;
while (1) {
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ ret = btrfs_search_backwards(root, &key, path);
if (ret < 0)
goto out;
else if (ret > 0) {
- ret = btrfs_previous_item(root, path, dirid,
- BTRFS_INODE_REF_KEY);
- if (ret < 0)
- goto out;
- else if (ret > 0) {
- ret = -ENOENT;
- goto out;
- }
+ ret = -ENOENT;
+ goto out;
}
l = path->nodes[0];
slot = path->slots[0];
- btrfs_item_key_to_cpu(l, &key, slot);
iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
len = btrfs_inode_ref_name_len(l, iref);
@@ -2429,7 +2439,8 @@ out:
return ret;
}
-static int btrfs_search_path_in_tree_user(struct inode *inode,
+static int btrfs_search_path_in_tree_user(struct user_namespace *mnt_userns,
+ struct inode *inode,
struct btrfs_ioctl_ino_lookup_user_args *args)
{
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
@@ -2473,23 +2484,16 @@ static int btrfs_search_path_in_tree_user(struct inode *inode,
key.type = BTRFS_INODE_REF_KEY;
key.offset = (u64)-1;
while (1) {
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0) {
+ ret = btrfs_search_backwards(root, &key, path);
+ if (ret < 0)
+ goto out_put;
+ else if (ret > 0) {
+ ret = -ENOENT;
goto out_put;
- } else if (ret > 0) {
- ret = btrfs_previous_item(root, path, dirid,
- BTRFS_INODE_REF_KEY);
- if (ret < 0) {
- goto out_put;
- } else if (ret > 0) {
- ret = -ENOENT;
- goto out_put;
- }
}
leaf = path->nodes[0];
slot = path->slots[0];
- btrfs_item_key_to_cpu(leaf, &key, slot);
iref = btrfs_item_ptr(leaf, slot, struct btrfs_inode_ref);
len = btrfs_inode_ref_name_len(leaf, iref);
@@ -2527,7 +2531,7 @@ static int btrfs_search_path_in_tree_user(struct inode *inode,
ret = PTR_ERR(temp_inode);
goto out_put;
}
- ret = inode_permission(&init_user_ns, temp_inode,
+ ret = inode_permission(mnt_userns, temp_inode,
MAY_READ | MAY_EXEC);
iput(temp_inode);
if (ret) {
@@ -2669,7 +2673,7 @@ static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp)
return -EACCES;
}
- ret = btrfs_search_path_in_tree_user(inode, args);
+ ret = btrfs_search_path_in_tree_user(file_mnt_user_ns(file), inode, args);
if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
ret = -EFAULT;
@@ -2905,6 +2909,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
struct btrfs_root *dest = NULL;
struct btrfs_ioctl_vol_args *vol_args = NULL;
struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL;
+ struct user_namespace *mnt_userns = file_mnt_user_ns(file);
char *subvol_name, *subvol_name_ptr = NULL;
int subvol_namelen;
int err = 0;
@@ -2932,6 +2937,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (err)
goto out;
} else {
+ struct inode *old_dir;
+
if (vol_args2->subvolid < BTRFS_FIRST_FREE_OBJECTID) {
err = -EINVAL;
goto out;
@@ -2968,6 +2975,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
err = PTR_ERR(parent);
goto out_drop_write;
}
+ old_dir = dir;
dir = d_inode(parent);
/*
@@ -2978,6 +2986,20 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
*/
destroy_parent = true;
+ /*
+ * On idmapped mounts, deletion via subvolid is
+ * restricted to subvolumes that are immediate
+ * ancestors of the inode referenced by the file
+ * descriptor in the ioctl. Otherwise the idmapping
+ * could potentially be abused to delete subvolumes
+ * anywhere in the filesystem the user wouldn't be able
+ * to delete without an idmapped mount.
+ */
+ if (old_dir != dir && mnt_userns != &init_user_ns) {
+ err = -EOPNOTSUPP;
+ goto free_parent;
+ }
+
subvol_name_ptr = btrfs_get_subvol_name_from_objectid(
fs_info, vol_args2->subvolid);
if (IS_ERR(subvol_name_ptr)) {
@@ -3016,7 +3038,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
if (err == -EINTR)
goto free_subvol_name;
- dentry = lookup_one_len(subvol_name, parent, subvol_namelen);
+ dentry = lookup_one(mnt_userns, subvol_name, parent, subvol_namelen);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
goto out_unlock_dir;
@@ -3058,14 +3080,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (root == dest)
goto out_dput;
- err = inode_permission(&init_user_ns, inode,
- MAY_WRITE | MAY_EXEC);
+ err = inode_permission(mnt_userns, inode, MAY_WRITE | MAY_EXEC);
if (err)
goto out_dput;
}
/* check if subvolume may be deleted by a user */
- err = btrfs_may_delete(dir, dentry, 1);
+ err = btrfs_may_delete(mnt_userns, dir, dentry, 1);
if (err)
goto out_dput;
@@ -3103,7 +3124,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
{
struct inode *inode = file_inode(file);
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_ioctl_defrag_range_args *range;
+ struct btrfs_ioctl_defrag_range_args range = {0};
int ret;
ret = mnt_want_write_file(file);
@@ -3115,6 +3136,12 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
goto out;
}
+ /* Subpage defrag will be supported in later commits */
+ if (root->fs_info->sectorsize < PAGE_SIZE) {
+ ret = -ENOTTY;
+ goto out;
+ }
+
switch (inode->i_mode & S_IFMT) {
case S_IFDIR:
if (!capable(CAP_SYS_ADMIN)) {
@@ -3135,33 +3162,24 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
goto out;
}
- range = kzalloc(sizeof(*range), GFP_KERNEL);
- if (!range) {
- ret = -ENOMEM;
- goto out;
- }
-
if (argp) {
- if (copy_from_user(range, argp,
- sizeof(*range))) {
+ if (copy_from_user(&range, argp, sizeof(range))) {
ret = -EFAULT;
- kfree(range);
goto out;
}
/* compression requires us to start the IO */
- if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
- range->flags |= BTRFS_DEFRAG_RANGE_START_IO;
- range->extent_thresh = (u32)-1;
+ if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
+ range.flags |= BTRFS_DEFRAG_RANGE_START_IO;
+ range.extent_thresh = (u32)-1;
}
} else {
/* the rest are all set to zero by kzalloc */
- range->len = (u64)-1;
+ range.len = (u64)-1;
}
ret = btrfs_defrag_file(file_inode(file), file,
- range, BTRFS_OLDEST_GENERATION, 0);
+ &range, BTRFS_OLDEST_GENERATION, 0);
if (ret > 0)
ret = 0;
- kfree(range);
break;
default:
ret = -EINVAL;
@@ -4404,25 +4422,20 @@ drop_write:
static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info,
void __user *arg)
{
- struct btrfs_ioctl_quota_rescan_args *qsa;
+ struct btrfs_ioctl_quota_rescan_args qsa = {0};
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- qsa = kzalloc(sizeof(*qsa), GFP_KERNEL);
- if (!qsa)
- return -ENOMEM;
-
if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
- qsa->flags = 1;
- qsa->progress = fs_info->qgroup_rescan_progress.objectid;
+ qsa.flags = 1;
+ qsa.progress = fs_info->qgroup_rescan_progress.objectid;
}
- if (copy_to_user(arg, qsa, sizeof(*qsa)))
+ if (copy_to_user(arg, &qsa, sizeof(qsa)))
ret = -EFAULT;
- kfree(qsa);
return ret;
}
@@ -4436,6 +4449,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info,
}
static long _btrfs_ioctl_set_received_subvol(struct file *file,
+ struct user_namespace *mnt_userns,
struct btrfs_ioctl_received_subvol_args *sa)
{
struct inode *inode = file_inode(file);
@@ -4447,7 +4461,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
int ret = 0;
int received_uuid_changed;
- if (!inode_owner_or_capable(&init_user_ns, inode))
+ if (!inode_owner_or_capable(mnt_userns, inode))
return -EPERM;
ret = mnt_want_write_file(file);
@@ -4552,7 +4566,7 @@ static long btrfs_ioctl_set_received_subvol_32(struct file *file,
args64->rtime.nsec = args32->rtime.nsec;
args64->flags = args32->flags;
- ret = _btrfs_ioctl_set_received_subvol(file, args64);
+ ret = _btrfs_ioctl_set_received_subvol(file, file_mnt_user_ns(file), args64);
if (ret)
goto out;
@@ -4586,7 +4600,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
if (IS_ERR(sa))
return PTR_ERR(sa);
- ret = _btrfs_ioctl_set_received_subvol(file, sa);
+ ret = _btrfs_ioctl_set_received_subvol(file, file_mnt_user_ns(file), sa);
if (ret)
goto out;
@@ -5013,6 +5027,10 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_get_subvol_rootref(file, argp);
case BTRFS_IOC_INO_LOOKUP_USER:
return btrfs_ioctl_ino_lookup_user(file, argp);
+ case FS_IOC_ENABLE_VERITY:
+ return fsverity_ioctl_enable(file, (const void __user *)argp);
+ case FS_IOC_MEASURE_VERITY:
+ return fsverity_ioctl_measure(file, argp);
}
return -ENOTTY;
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cd042c7567a4..c25dfd1a8a54 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -14,6 +14,7 @@
#include <linux/lzo.h>
#include <linux/refcount.h>
#include "compression.h"
+#include "ctree.h"
#define LZO_LEN 4
@@ -140,18 +141,18 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
*total_in = 0;
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
- data_in = kmap(in_page);
+ data_in = page_address(in_page);
/*
* store the size of all chunks of compressed data in
* the first 4 bytes
*/
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
- cpage_out = kmap(out_page);
+ cpage_out = page_address(out_page);
out_offset = LZO_LEN;
tot_out = LZO_LEN;
pages[0] = out_page;
@@ -209,19 +210,18 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
if (out_len == 0 && tot_in >= len)
break;
- kunmap(out_page);
if (nr_pages == nr_dest_pages) {
out_page = NULL;
ret = -E2BIG;
goto out;
}
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
- cpage_out = kmap(out_page);
+ cpage_out = page_address(out_page);
pages[nr_pages++] = out_page;
pg_bytes_left = PAGE_SIZE;
@@ -243,12 +243,11 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
break;
bytes_left = len - tot_in;
- kunmap(in_page);
put_page(in_page);
start += PAGE_SIZE;
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
- data_in = kmap(in_page);
+ data_in = page_address(in_page);
in_len = min(bytes_left, PAGE_SIZE);
}
@@ -258,164 +257,130 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
}
/* store the size of all chunks of compressed data */
- sizes_ptr = kmap_local_page(pages[0]);
+ sizes_ptr = page_address(pages[0]);
write_compress_length(sizes_ptr, tot_out);
- kunmap_local(sizes_ptr);
ret = 0;
*total_out = tot_out;
*total_in = tot_in;
out:
*out_pages = nr_pages;
- if (out_page)
- kunmap(out_page);
- if (in_page) {
- kunmap(in_page);
+ if (in_page)
put_page(in_page);
- }
return ret;
}
+/*
+ * Copy the compressed segment payload into @dest.
+ *
+ * For the payload there will be no padding, just need to do page switching.
+ */
+static void copy_compressed_segment(struct compressed_bio *cb,
+ char *dest, u32 len, u32 *cur_in)
+{
+ u32 orig_in = *cur_in;
+
+ while (*cur_in < orig_in + len) {
+ struct page *cur_page;
+ u32 copy_len = min_t(u32, PAGE_SIZE - offset_in_page(*cur_in),
+ orig_in + len - *cur_in);
+
+ ASSERT(copy_len);
+ cur_page = cb->compressed_pages[*cur_in / PAGE_SIZE];
+
+ memcpy(dest + *cur_in - orig_in,
+ page_address(cur_page) + offset_in_page(*cur_in),
+ copy_len);
+
+ *cur_in += copy_len;
+ }
+}
+
int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
- int ret = 0, ret2;
- char *data_in;
- unsigned long page_in_index = 0;
- size_t srclen = cb->compressed_len;
- unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
- unsigned long buf_start;
- unsigned long buf_offset = 0;
- unsigned long bytes;
- unsigned long working_bytes;
- size_t in_len;
- size_t out_len;
- const size_t max_segment_len = lzo1x_worst_compress(PAGE_SIZE);
- unsigned long in_offset;
- unsigned long in_page_bytes_left;
- unsigned long tot_in;
- unsigned long tot_out;
- unsigned long tot_len;
- char *buf;
- bool may_late_unmap, need_unmap;
- struct page **pages_in = cb->compressed_pages;
- u64 disk_start = cb->start;
- struct bio *orig_bio = cb->orig_bio;
+ const struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
+ const u32 sectorsize = fs_info->sectorsize;
+ int ret;
+ /* Compressed data length, can be unaligned */
+ u32 len_in;
+ /* Offset inside the compressed data */
+ u32 cur_in = 0;
+ /* Bytes decompressed so far */
+ u32 cur_out = 0;
+
+ len_in = read_compress_length(page_address(cb->compressed_pages[0]));
+ cur_in += LZO_LEN;
- data_in = kmap(pages_in[0]);
- tot_len = read_compress_length(data_in);
/*
- * Compressed data header check.
+ * LZO header length check
*
- * The real compressed size can't exceed the maximum extent length, and
- * all pages should be used (whole unused page with just the segment
- * header is not possible). If this happens it means the compressed
- * extent is corrupted.
+ * The total length should not exceed the maximum extent length,
+ * and all sectors should be used.
+ * If this happens, it means the compressed extent is corrupted.
*/
- if (tot_len > min_t(size_t, BTRFS_MAX_COMPRESSED, srclen) ||
- tot_len < srclen - PAGE_SIZE) {
- ret = -EUCLEAN;
- goto done;
+ if (len_in > min_t(size_t, BTRFS_MAX_COMPRESSED, cb->compressed_len) ||
+ round_up(len_in, sectorsize) < cb->compressed_len) {
+ btrfs_err(fs_info,
+ "invalid lzo header, lzo len %u compressed len %u",
+ len_in, cb->compressed_len);
+ return -EUCLEAN;
}
- tot_in = LZO_LEN;
- in_offset = LZO_LEN;
- in_page_bytes_left = PAGE_SIZE - LZO_LEN;
-
- tot_out = 0;
-
- while (tot_in < tot_len) {
- in_len = read_compress_length(data_in + in_offset);
- in_page_bytes_left -= LZO_LEN;
- in_offset += LZO_LEN;
- tot_in += LZO_LEN;
+ /* Go through each lzo segment */
+ while (cur_in < len_in) {
+ struct page *cur_page;
+ /* Length of the compressed segment */
+ u32 seg_len;
+ u32 sector_bytes_left;
+ size_t out_len = lzo1x_worst_compress(sectorsize);
/*
- * Segment header check.
- *
- * The segment length must not exceed the maximum LZO
- * compression size, nor the total compressed size.
+ * We should always have enough space for one segment header
+ * inside current sector.
*/
- if (in_len > max_segment_len || tot_in + in_len > tot_len) {
- ret = -EUCLEAN;
- goto done;
- }
-
- tot_in += in_len;
- working_bytes = in_len;
- may_late_unmap = need_unmap = false;
-
- /* fast path: avoid using the working buffer */
- if (in_page_bytes_left >= in_len) {
- buf = data_in + in_offset;
- bytes = in_len;
- may_late_unmap = true;
- goto cont;
- }
-
- /* copy bytes from the pages into the working buffer */
- buf = workspace->cbuf;
- buf_offset = 0;
- while (working_bytes) {
- bytes = min(working_bytes, in_page_bytes_left);
-
- memcpy(buf + buf_offset, data_in + in_offset, bytes);
- buf_offset += bytes;
-cont:
- working_bytes -= bytes;
- in_page_bytes_left -= bytes;
- in_offset += bytes;
-
- /* check if we need to pick another page */
- if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN)
- || in_page_bytes_left == 0) {
- tot_in += in_page_bytes_left;
-
- if (working_bytes == 0 && tot_in >= tot_len)
- break;
-
- if (page_in_index + 1 >= total_pages_in) {
- ret = -EIO;
- goto done;
- }
-
- if (may_late_unmap)
- need_unmap = true;
- else
- kunmap(pages_in[page_in_index]);
-
- data_in = kmap(pages_in[++page_in_index]);
-
- in_page_bytes_left = PAGE_SIZE;
- in_offset = 0;
- }
- }
-
- out_len = max_segment_len;
- ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
- &out_len);
- if (need_unmap)
- kunmap(pages_in[page_in_index - 1]);
+ ASSERT(cur_in / sectorsize ==
+ (cur_in + LZO_LEN - 1) / sectorsize);
+ cur_page = cb->compressed_pages[cur_in / PAGE_SIZE];
+ ASSERT(cur_page);
+ seg_len = read_compress_length(page_address(cur_page) +
+ offset_in_page(cur_in));
+ cur_in += LZO_LEN;
+
+ /* Copy the compressed segment payload into workspace */
+ copy_compressed_segment(cb, workspace->cbuf, seg_len, &cur_in);
+
+ /* Decompress the data */
+ ret = lzo1x_decompress_safe(workspace->cbuf, seg_len,
+ workspace->buf, &out_len);
if (ret != LZO_E_OK) {
- pr_warn("BTRFS: decompress failed\n");
+ btrfs_err(fs_info, "failed to decompress");
ret = -EIO;
- break;
+ goto out;
}
- buf_start = tot_out;
- tot_out += out_len;
+ /* Copy the data into inode pages */
+ ret = btrfs_decompress_buf2page(workspace->buf, out_len, cb, cur_out);
+ cur_out += out_len;
- ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
- tot_out, disk_start, orig_bio);
- if (ret2 == 0)
- break;
+ /* All data read, exit */
+ if (ret == 0)
+ goto out;
+ ret = 0;
+
+ /* Check if the sector has enough space for a segment header */
+ sector_bytes_left = sectorsize - (cur_in % sectorsize);
+ if (sector_bytes_left >= LZO_LEN)
+ continue;
+
+ /* Skip the padding zeros */
+ cur_in += sector_bytes_left;
}
-done:
- kunmap(pages_in[page_in_index]);
+out:
if (!ret)
- zero_fill_bio(orig_bio);
+ zero_fill_bio(cb->orig_bio);
return ret;
}
@@ -466,7 +431,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
destlen = min_t(unsigned long, destlen, PAGE_SIZE);
bytes = min_t(unsigned long, destlen, out_len - start_byte);
- kaddr = kmap_local_page(dest_page);
+ kaddr = page_address(dest_page);
memcpy(kaddr, workspace->buf + start_byte, bytes);
/*
@@ -476,7 +441,6 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
*/
if (bytes < destlen)
memset(kaddr+bytes, 0, destlen-bytes);
- kunmap_local(kaddr);
out:
return ret;
}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 5c0f8481e25e..edb65abf0393 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -446,7 +446,6 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
* Will be also used to store the finished ordered extent.
* @file_offset: File offset for the finished IO
* @io_size: Length of the finish IO range
- * @uptodate: If the IO finishes without problem
*
* Return true if the ordered extent is finished in the range, and update
* @cached.
@@ -457,7 +456,7 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
*/
bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
struct btrfs_ordered_extent **cached,
- u64 file_offset, u64 io_size, int uptodate)
+ u64 file_offset, u64 io_size)
{
struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
struct rb_node *node;
@@ -486,8 +485,6 @@ have_entry:
entry->bytes_left, io_size);
entry->bytes_left -= io_size;
- if (!uptodate)
- set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
if (entry->bytes_left == 0) {
/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index b2d88aba8420..4194e960ff61 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -177,7 +177,7 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
bool uptodate);
bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
struct btrfs_ordered_extent **cached,
- u64 file_offset, u64 io_size, int uptodate);
+ u64 file_offset, u64 io_size);
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
int type);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 0fa121171ca1..db680f5be745 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1733,7 +1733,7 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
ASSERT(trans != NULL);
ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root,
- false, true);
+ true);
if (ret < 0) {
trans->fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
btrfs_warn(trans->fs_info,
@@ -2651,7 +2651,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
/* Search commit root to find old_roots */
ret = btrfs_find_all_roots(NULL, fs_info,
record->bytenr, 0,
- &record->old_roots, false, false);
+ &record->old_roots, false);
if (ret < 0)
goto cleanup;
}
@@ -2667,7 +2667,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
* current root. It's safe inside commit_transaction().
*/
ret = btrfs_find_all_roots(trans, fs_info,
- record->bytenr, BTRFS_SEQ_LAST, &new_roots, false, false);
+ record->bytenr, BTRFS_SEQ_LAST, &new_roots, false);
if (ret < 0)
goto cleanup;
if (qgroup_to_skip) {
@@ -3201,7 +3201,7 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
num_bytes = found.offset;
ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
- &roots, false, false);
+ &roots, false);
if (ret < 0)
goto out;
/* For rescan, just pass old_roots as NULL */
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 244d499ebc72..d8d268ca8aa7 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1035,7 +1035,7 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
for (i = 0; i < rbio->nr_pages; i++) {
if (rbio->stripe_pages[i])
continue;
- page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ page = alloc_page(GFP_NOFS);
if (!page)
return -ENOMEM;
rbio->stripe_pages[i] = page;
@@ -1054,7 +1054,7 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
for (; i < rbio->nr_pages; i++) {
if (rbio->stripe_pages[i])
continue;
- page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ page = alloc_page(GFP_NOFS);
if (!page)
return -ENOMEM;
rbio->stripe_pages[i] = page;
@@ -1636,10 +1636,10 @@ struct btrfs_plug_cb {
static int plug_cmp(void *priv, const struct list_head *a,
const struct list_head *b)
{
- struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
- plug_list);
- struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
- plug_list);
+ const struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
+ plug_list);
+ const struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
+ plug_list);
u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
@@ -2300,7 +2300,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
if (rbio->stripe_pages[index])
continue;
- page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ page = alloc_page(GFP_NOFS);
if (!page)
return -ENOMEM;
rbio->stripe_pages[index] = page;
@@ -2350,14 +2350,14 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
if (!need_check)
goto writeback;
- p_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ p_page = alloc_page(GFP_NOFS);
if (!p_page)
goto cleanup;
SetPageUptodate(p_page);
if (has_qstripe) {
/* RAID6, allocate and map temp space for the Q stripe */
- q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ q_page = alloc_page(GFP_NOFS);
if (!q_page) {
__free_page(p_page);
goto cleanup;
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index 8e026de74c44..d2062d5f71dd 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -264,8 +264,8 @@ static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info,
struct block_entry *be = NULL, *exist;
struct root_entry *re = NULL;
- re = kzalloc(sizeof(struct root_entry), GFP_KERNEL);
- be = kzalloc(sizeof(struct block_entry), GFP_KERNEL);
+ re = kzalloc(sizeof(struct root_entry), GFP_NOFS);
+ be = kzalloc(sizeof(struct block_entry), GFP_NOFS);
if (!be || !re) {
kfree(re);
kfree(be);
@@ -313,7 +313,7 @@ static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root,
struct root_entry *re;
struct ref_entry *ref = NULL, *exist;
- ref = kmalloc(sizeof(struct ref_entry), GFP_KERNEL);
+ ref = kmalloc(sizeof(struct ref_entry), GFP_NOFS);
if (!ref)
return -ENOMEM;
@@ -358,7 +358,7 @@ static int add_shared_data_ref(struct btrfs_fs_info *fs_info,
struct block_entry *be;
struct ref_entry *ref;
- ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
+ ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
if (!ref)
return -ENOMEM;
be = add_block_entry(fs_info, bytenr, num_bytes, 0);
@@ -393,7 +393,7 @@ static int add_extent_data_ref(struct btrfs_fs_info *fs_info,
u64 offset = btrfs_extent_data_ref_offset(leaf, dref);
u32 num_refs = btrfs_extent_data_ref_count(leaf, dref);
- ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
+ ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
if (!ref)
return -ENOMEM;
be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index fc831597cb22..914d403b4415 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -24,6 +24,7 @@
#include "block-group.h"
#include "backref.h"
#include "misc.h"
+#include "subpage.h"
/*
* Relocation overview
@@ -2781,10 +2782,70 @@ static noinline_for_stack int prealloc_file_extent_cluster(
u64 num_bytes;
int nr;
int ret = 0;
+ u64 i_size = i_size_read(&inode->vfs_inode);
u64 prealloc_start = cluster->start - offset;
u64 prealloc_end = cluster->end - offset;
u64 cur_offset = prealloc_start;
+ /*
+ * For subpage case, previous i_size may not be aligned to PAGE_SIZE.
+ * This means the range [i_size, PAGE_END + 1) is filled with zeros by
+ * btrfs_do_readpage() call of previously relocated file cluster.
+ *
+ * If the current cluster starts in the above range, btrfs_do_readpage()
+ * will skip the read, and relocate_one_page() will later writeback
+ * the padding zeros as new data, causing data corruption.
+ *
+ * Here we have to manually invalidate the range (i_size, PAGE_END + 1).
+ */
+ if (!IS_ALIGNED(i_size, PAGE_SIZE)) {
+ struct address_space *mapping = inode->vfs_inode.i_mapping;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ const u32 sectorsize = fs_info->sectorsize;
+ struct page *page;
+
+ ASSERT(sectorsize < PAGE_SIZE);
+ ASSERT(IS_ALIGNED(i_size, sectorsize));
+
+ /*
+ * Subpage can't handle page with DIRTY but without UPTODATE
+ * bit as it can lead to the following deadlock:
+ *
+ * btrfs_readpage()
+ * | Page already *locked*
+ * |- btrfs_lock_and_flush_ordered_range()
+ * |- btrfs_start_ordered_extent()
+ * |- extent_write_cache_pages()
+ * |- lock_page()
+ * We try to lock the page we already hold.
+ *
+ * Here we just writeback the whole data reloc inode, so that
+ * we will be ensured to have no dirty range in the page, and
+ * are safe to clear the uptodate bits.
+ *
+ * This shouldn't cause too much overhead, as we need to write
+ * the data back anyway.
+ */
+ ret = filemap_write_and_wait(mapping);
+ if (ret < 0)
+ return ret;
+
+ clear_extent_bits(&inode->io_tree, i_size,
+ round_up(i_size, PAGE_SIZE) - 1,
+ EXTENT_UPTODATE);
+ page = find_lock_page(mapping, i_size >> PAGE_SHIFT);
+ /*
+ * If page is freed we don't need to do anything then, as we
+ * will re-read the whole page anyway.
+ */
+ if (page) {
+ btrfs_subpage_clear_uptodate(fs_info, page, i_size,
+ round_up(i_size, PAGE_SIZE) - i_size);
+ unlock_page(page);
+ put_page(page);
+ }
+ }
+
BUG_ON(cluster->start != cluster->boundary[0]);
ret = btrfs_alloc_data_chunk_ondemand(inode,
prealloc_end + 1 - prealloc_start);
@@ -2886,19 +2947,149 @@ noinline int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
}
ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
-static int relocate_file_extent_cluster(struct inode *inode,
- struct file_extent_cluster *cluster)
+static u64 get_cluster_boundary_end(struct file_extent_cluster *cluster,
+ int cluster_nr)
+{
+ /* Last extent, use cluster end directly */
+ if (cluster_nr >= cluster->nr - 1)
+ return cluster->end;
+
+ /* Use next boundary start*/
+ return cluster->boundary[cluster_nr + 1] - 1;
+}
+
+static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
+ struct file_extent_cluster *cluster,
+ int *cluster_nr, unsigned long page_index)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ u64 offset = BTRFS_I(inode)->index_cnt;
+ const unsigned long last_index = (cluster->end - offset) >> PAGE_SHIFT;
+ gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
+ struct page *page;
u64 page_start;
u64 page_end;
+ u64 cur;
+ int ret;
+
+ ASSERT(page_index <= last_index);
+ page = find_lock_page(inode->i_mapping, page_index);
+ if (!page) {
+ page_cache_sync_readahead(inode->i_mapping, ra, NULL,
+ page_index, last_index + 1 - page_index);
+ page = find_or_create_page(inode->i_mapping, page_index, mask);
+ if (!page)
+ return -ENOMEM;
+ }
+ ret = set_page_extent_mapped(page);
+ if (ret < 0)
+ goto release_page;
+
+ if (PageReadahead(page))
+ page_cache_async_readahead(inode->i_mapping, ra, NULL, page,
+ page_index, last_index + 1 - page_index);
+
+ if (!PageUptodate(page)) {
+ btrfs_readpage(NULL, page);
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ ret = -EIO;
+ goto release_page;
+ }
+ }
+
+ page_start = page_offset(page);
+ page_end = page_start + PAGE_SIZE - 1;
+
+ /*
+ * Start from the cluster, as for subpage case, the cluster can start
+ * inside the page.
+ */
+ cur = max(page_start, cluster->boundary[*cluster_nr] - offset);
+ while (cur <= page_end) {
+ u64 extent_start = cluster->boundary[*cluster_nr] - offset;
+ u64 extent_end = get_cluster_boundary_end(cluster,
+ *cluster_nr) - offset;
+ u64 clamped_start = max(page_start, extent_start);
+ u64 clamped_end = min(page_end, extent_end);
+ u32 clamped_len = clamped_end + 1 - clamped_start;
+
+ /* Reserve metadata for this range */
+ ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
+ clamped_len);
+ if (ret)
+ goto release_page;
+
+ /* Mark the range delalloc and dirty for later writeback */
+ lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end);
+ ret = btrfs_set_extent_delalloc(BTRFS_I(inode), clamped_start,
+ clamped_end, 0, NULL);
+ if (ret) {
+ clear_extent_bits(&BTRFS_I(inode)->io_tree,
+ clamped_start, clamped_end,
+ EXTENT_LOCKED | EXTENT_BOUNDARY);
+ btrfs_delalloc_release_metadata(BTRFS_I(inode),
+ clamped_len, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode),
+ clamped_len);
+ goto release_page;
+ }
+ btrfs_page_set_dirty(fs_info, page, clamped_start, clamped_len);
+
+ /*
+ * Set the boundary if it's inside the page.
+ * Data relocation requires the destination extents to have the
+ * same size as the source.
+ * EXTENT_BOUNDARY bit prevents current extent from being merged
+ * with previous extent.
+ */
+ if (in_range(cluster->boundary[*cluster_nr] - offset,
+ page_start, PAGE_SIZE)) {
+ u64 boundary_start = cluster->boundary[*cluster_nr] -
+ offset;
+ u64 boundary_end = boundary_start +
+ fs_info->sectorsize - 1;
+
+ set_extent_bits(&BTRFS_I(inode)->io_tree,
+ boundary_start, boundary_end,
+ EXTENT_BOUNDARY);
+ }
+ unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), clamped_len);
+ cur += clamped_len;
+
+ /* Crossed extent end, go to next extent */
+ if (cur >= extent_end) {
+ (*cluster_nr)++;
+ /* Just finished the last extent of the cluster, exit. */
+ if (*cluster_nr >= cluster->nr)
+ break;
+ }
+ }
+ unlock_page(page);
+ put_page(page);
+
+ balance_dirty_pages_ratelimited(inode->i_mapping);
+ btrfs_throttle(fs_info);
+ if (btrfs_should_cancel_balance(fs_info))
+ ret = -ECANCELED;
+ return ret;
+
+release_page:
+ unlock_page(page);
+ put_page(page);
+ return ret;
+}
+
+static int relocate_file_extent_cluster(struct inode *inode,
+ struct file_extent_cluster *cluster)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 offset = BTRFS_I(inode)->index_cnt;
unsigned long index;
unsigned long last_index;
- struct page *page;
struct file_ra_state *ra;
- gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
- int nr = 0;
+ int cluster_nr = 0;
int ret = 0;
if (!cluster->nr)
@@ -2919,109 +3110,14 @@ static int relocate_file_extent_cluster(struct inode *inode,
if (ret)
goto out;
- index = (cluster->start - offset) >> PAGE_SHIFT;
last_index = (cluster->end - offset) >> PAGE_SHIFT;
- while (index <= last_index) {
- ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
- PAGE_SIZE);
- if (ret)
- goto out;
-
- page = find_lock_page(inode->i_mapping, index);
- if (!page) {
- page_cache_sync_readahead(inode->i_mapping,
- ra, NULL, index,
- last_index + 1 - index);
- page = find_or_create_page(inode->i_mapping, index,
- mask);
- if (!page) {
- btrfs_delalloc_release_metadata(BTRFS_I(inode),
- PAGE_SIZE, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode),
- PAGE_SIZE);
- ret = -ENOMEM;
- goto out;
- }
- }
- ret = set_page_extent_mapped(page);
- if (ret < 0) {
- btrfs_delalloc_release_metadata(BTRFS_I(inode),
- PAGE_SIZE, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
- unlock_page(page);
- put_page(page);
- goto out;
- }
-
- if (PageReadahead(page)) {
- page_cache_async_readahead(inode->i_mapping,
- ra, NULL, page, index,
- last_index + 1 - index);
- }
-
- if (!PageUptodate(page)) {
- btrfs_readpage(NULL, page);
- lock_page(page);
- if (!PageUptodate(page)) {
- unlock_page(page);
- put_page(page);
- btrfs_delalloc_release_metadata(BTRFS_I(inode),
- PAGE_SIZE, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode),
- PAGE_SIZE);
- ret = -EIO;
- goto out;
- }
- }
-
- page_start = page_offset(page);
- page_end = page_start + PAGE_SIZE - 1;
-
- lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
-
- if (nr < cluster->nr &&
- page_start + offset == cluster->boundary[nr]) {
- set_extent_bits(&BTRFS_I(inode)->io_tree,
- page_start, page_end,
- EXTENT_BOUNDARY);
- nr++;
- }
-
- ret = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start,
- page_end, 0, NULL);
- if (ret) {
- unlock_page(page);
- put_page(page);
- btrfs_delalloc_release_metadata(BTRFS_I(inode),
- PAGE_SIZE, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode),
- PAGE_SIZE);
-
- clear_extent_bits(&BTRFS_I(inode)->io_tree,
- page_start, page_end,
- EXTENT_LOCKED | EXTENT_BOUNDARY);
- goto out;
-
- }
- set_page_dirty(page);
-
- unlock_extent(&BTRFS_I(inode)->io_tree,
- page_start, page_end);
- unlock_page(page);
- put_page(page);
-
- index++;
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
- balance_dirty_pages_ratelimited(inode->i_mapping);
- btrfs_throttle(fs_info);
- if (btrfs_should_cancel_balance(fs_info)) {
- ret = -ECANCELED;
- goto out;
- }
- }
- WARN_ON(nr != cluster->nr);
+ for (index = (cluster->start - offset) >> PAGE_SHIFT;
+ index <= last_index && !ret; index++)
+ ret = relocate_one_page(inode, ra, cluster, &cluster_nr, index);
if (btrfs_is_zoned(fs_info) && !ret)
ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ if (ret == 0)
+ WARN_ON(cluster_nr != cluster->nr);
out:
kfree(ra);
return ret;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 6ac37ae6c811..72f9b865e847 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1198,7 +1198,7 @@ struct backref_ctx {
static int __clone_root_cmp_bsearch(const void *key, const void *elt)
{
u64 root = (u64)(uintptr_t)key;
- struct clone_root *cr = (struct clone_root *)elt;
+ const struct clone_root *cr = elt;
if (root < cr->root->root_key.objectid)
return -1;
@@ -1209,8 +1209,8 @@ static int __clone_root_cmp_bsearch(const void *key, const void *elt)
static int __clone_root_cmp_sort(const void *e1, const void *e2)
{
- struct clone_root *cr1 = (struct clone_root *)e1;
- struct clone_root *cr2 = (struct clone_root *)e2;
+ const struct clone_root *cr1 = e1;
+ const struct clone_root *cr2 = e2;
if (cr1->root->root_key.objectid < cr2->root->root_key.objectid)
return -1;
@@ -1307,7 +1307,7 @@ static int find_extent_clone(struct send_ctx *sctx,
u64 flags = 0;
struct btrfs_file_extent_item *fi;
struct extent_buffer *eb = path->nodes[0];
- struct backref_ctx *backref_ctx = NULL;
+ struct backref_ctx backref_ctx = {0};
struct clone_root *cur_clone_root;
struct btrfs_key found_key;
struct btrfs_path *tmp_path;
@@ -1322,12 +1322,6 @@ static int find_extent_clone(struct send_ctx *sctx,
/* We only use this path under the commit sem */
tmp_path->need_commit_sem = 0;
- backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_KERNEL);
- if (!backref_ctx) {
- ret = -ENOMEM;
- goto out;
- }
-
if (data_offset >= ino_size) {
/*
* There may be extents that lie behind the file's size.
@@ -1392,12 +1386,12 @@ static int find_extent_clone(struct send_ctx *sctx,
cur_clone_root->found_refs = 0;
}
- backref_ctx->sctx = sctx;
- backref_ctx->found = 0;
- backref_ctx->cur_objectid = ino;
- backref_ctx->cur_offset = data_offset;
- backref_ctx->found_itself = 0;
- backref_ctx->extent_len = num_bytes;
+ backref_ctx.sctx = sctx;
+ backref_ctx.found = 0;
+ backref_ctx.cur_objectid = ino;
+ backref_ctx.cur_offset = data_offset;
+ backref_ctx.found_itself = 0;
+ backref_ctx.extent_len = num_bytes;
/*
* The last extent of a file may be too large due to page alignment.
@@ -1405,7 +1399,7 @@ static int find_extent_clone(struct send_ctx *sctx,
* __iterate_backrefs work.
*/
if (data_offset + num_bytes >= ino_size)
- backref_ctx->extent_len = ino_size - data_offset;
+ backref_ctx.extent_len = ino_size - data_offset;
/*
* Now collect all backrefs.
@@ -1416,12 +1410,12 @@ static int find_extent_clone(struct send_ctx *sctx,
extent_item_pos = 0;
ret = iterate_extent_inodes(fs_info, found_key.objectid,
extent_item_pos, 1, __iterate_backrefs,
- backref_ctx, false);
+ &backref_ctx, false);
if (ret < 0)
goto out;
- if (!backref_ctx->found_itself) {
+ if (!backref_ctx.found_itself) {
/* found a bug in backref code? */
ret = -EIO;
btrfs_err(fs_info,
@@ -1434,7 +1428,7 @@ static int find_extent_clone(struct send_ctx *sctx,
"find_extent_clone: data_offset=%llu, ino=%llu, num_bytes=%llu, logical=%llu",
data_offset, ino, num_bytes, logical);
- if (!backref_ctx->found)
+ if (!backref_ctx.found)
btrfs_debug(fs_info, "no clones found");
cur_clone_root = NULL;
@@ -1458,7 +1452,6 @@ static int find_extent_clone(struct send_ctx *sctx,
out:
btrfs_free_path(tmp_path);
- kfree(backref_ctx);
return ret;
}
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index f79bf85f2439..5ada02e0e629 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -493,6 +493,11 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
long time_left;
int loops;
+ delalloc_bytes = percpu_counter_sum_positive(&fs_info->delalloc_bytes);
+ ordered_bytes = percpu_counter_sum_positive(&fs_info->ordered_bytes);
+ if (delalloc_bytes == 0 && ordered_bytes == 0)
+ return;
+
/* Calc the number of the pages we need flush for space reservation */
if (to_reclaim == U64_MAX) {
items = U64_MAX;
@@ -500,22 +505,21 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
/*
* to_reclaim is set to however much metadata we need to
* reclaim, but reclaiming that much data doesn't really track
- * exactly, so increase the amount to reclaim by 2x in order to
- * make sure we're flushing enough delalloc to hopefully reclaim
- * some metadata reservations.
+ * exactly. What we really want to do is reclaim full inode's
+ * worth of reservations, however that's not available to us
+ * here. We will take a fraction of the delalloc bytes for our
+ * flushing loops and hope for the best. Delalloc will expand
+ * the amount we write to cover an entire dirty extent, which
+ * will reclaim the metadata reservation for that range. If
+ * it's not enough subsequent flush stages will be more
+ * aggressive.
*/
+ to_reclaim = max(to_reclaim, delalloc_bytes >> 3);
items = calc_reclaim_items_nr(fs_info, to_reclaim) * 2;
- to_reclaim = items * EXTENT_SIZE_PER_ITEM;
}
trans = (struct btrfs_trans_handle *)current->journal_info;
- delalloc_bytes = percpu_counter_sum_positive(
- &fs_info->delalloc_bytes);
- ordered_bytes = percpu_counter_sum_positive(&fs_info->ordered_bytes);
- if (delalloc_bytes == 0 && ordered_bytes == 0)
- return;
-
/*
* If we are doing more ordered than delalloc we need to just wait on
* ordered extents, otherwise we'll waste time trying to flush delalloc
@@ -528,9 +532,49 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
while ((delalloc_bytes || ordered_bytes) && loops < 3) {
u64 temp = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
long nr_pages = min_t(u64, temp, LONG_MAX);
+ int async_pages;
btrfs_start_delalloc_roots(fs_info, nr_pages, true);
+ /*
+ * We need to make sure any outstanding async pages are now
+ * processed before we continue. This is because things like
+ * sync_inode() try to be smart and skip writing if the inode is
+ * marked clean. We don't use filemap_fwrite for flushing
+ * because we want to control how many pages we write out at a
+ * time, thus this is the only safe way to make sure we've
+ * waited for outstanding compressed workers to have started
+ * their jobs and thus have ordered extents set up properly.
+ *
+ * This exists because we do not want to wait for each
+ * individual inode to finish its async work, we simply want to
+ * start the IO on everybody, and then come back here and wait
+ * for all of the async work to catch up. Once we're done with
+ * that we know we'll have ordered extents for everything and we
+ * can decide if we wait for that or not.
+ *
+ * If we choose to replace this in the future, make absolutely
+ * sure that the proper waiting is being done in the async case,
+ * as there have been bugs in that area before.
+ */
+ async_pages = atomic_read(&fs_info->async_delalloc_pages);
+ if (!async_pages)
+ goto skip_async;
+
+ /*
+ * We don't want to wait forever, if we wrote less pages in this
+ * loop than we have outstanding, only wait for that number of
+ * pages, otherwise we can wait for all async pages to finish
+ * before continuing.
+ */
+ if (async_pages > nr_pages)
+ async_pages -= nr_pages;
+ else
+ async_pages = 0;
+ wait_event(fs_info->async_submit_wait,
+ atomic_read(&fs_info->async_delalloc_pages) <=
+ async_pages);
+skip_async:
loops++;
if (wait_ordered && !trans) {
btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
@@ -595,8 +639,11 @@ static void flush_space(struct btrfs_fs_info *fs_info,
break;
case FLUSH_DELALLOC:
case FLUSH_DELALLOC_WAIT:
+ case FLUSH_DELALLOC_FULL:
+ if (state == FLUSH_DELALLOC_FULL)
+ num_bytes = U64_MAX;
shrink_delalloc(fs_info, space_info, num_bytes,
- state == FLUSH_DELALLOC_WAIT, for_preempt);
+ state != FLUSH_DELALLOC, for_preempt);
break;
case FLUSH_DELAYED_REFS_NR:
case FLUSH_DELAYED_REFS:
@@ -686,7 +733,7 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
{
u64 global_rsv_size = fs_info->global_block_rsv.reserved;
u64 ordered, delalloc;
- u64 thresh = div_factor_fine(space_info->total_bytes, 98);
+ u64 thresh = div_factor_fine(space_info->total_bytes, 90);
u64 used;
/* If we're just plain full then async reclaim just slows us down. */
@@ -694,6 +741,20 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
global_rsv_size) >= thresh)
return false;
+ used = space_info->bytes_may_use + space_info->bytes_pinned;
+
+ /* The total flushable belongs to the global rsv, don't flush. */
+ if (global_rsv_size >= used)
+ return false;
+
+ /*
+ * 128MiB is 1/4 of the maximum global rsv size. If we have less than
+ * that devoted to other reservations then there's no sense in flushing,
+ * we don't have a lot of things that need flushing.
+ */
+ if (used - global_rsv_size <= SZ_128M)
+ return false;
+
/*
* We have tickets queued, bail so we don't compete with the async
* flushers.
@@ -824,6 +885,8 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
struct reserve_ticket *ticket;
u64 tickets_id = space_info->tickets_id;
+ trace_btrfs_fail_all_tickets(fs_info, space_info);
+
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
btrfs_info(fs_info, "cannot satisfy tickets, dumping space info");
__btrfs_dump_space_info(fs_info, space_info);
@@ -905,6 +968,14 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
}
/*
+ * We do not want to empty the system of delalloc unless we're
+ * under heavy pressure, so allow one trip through the flushing
+ * logic before we start doing a FLUSH_DELALLOC_FULL.
+ */
+ if (flush_state == FLUSH_DELALLOC_FULL && !commit_cycles)
+ flush_state++;
+
+ /*
* We don't want to force a chunk allocation until we've tried
* pretty hard to reclaim space. Think of the case where we
* freed up a bunch of space and so have a lot of pinned space
@@ -1067,7 +1138,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
* so if we now have space to allocate do the force chunk allocation.
*/
static const enum btrfs_flush_state data_flush_states[] = {
- FLUSH_DELALLOC_WAIT,
+ FLUSH_DELALLOC_FULL,
RUN_DELAYED_IPUTS,
COMMIT_TRANS,
ALLOC_CHUNK_FORCE,
@@ -1156,6 +1227,7 @@ static const enum btrfs_flush_state evict_flush_states[] = {
FLUSH_DELAYED_REFS,
FLUSH_DELALLOC,
FLUSH_DELALLOC_WAIT,
+ FLUSH_DELALLOC_FULL,
ALLOC_CHUNK,
COMMIT_TRANS,
};
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index 8260f8bb3ff0..f429256f56db 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -73,7 +73,7 @@ u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \
} \
token->kaddr = page_address(token->eb->pages[idx]); \
token->offset = idx << PAGE_SHIFT; \
- if (oip + size <= PAGE_SIZE) \
+ if (INLINE_EXTENT_BUFFER_PAGES == 1 || oip + size <= PAGE_SIZE ) \
return get_unaligned_le##bits(token->kaddr + oip); \
\
memcpy(lebytes, token->kaddr + oip, part); \
@@ -94,7 +94,7 @@ u##bits btrfs_get_##bits(const struct extent_buffer *eb, \
u8 lebytes[sizeof(u##bits)]; \
\
ASSERT(check_setget_bounds(eb, ptr, off, size)); \
- if (oip + size <= PAGE_SIZE) \
+ if (INLINE_EXTENT_BUFFER_PAGES == 1 || oip + size <= PAGE_SIZE) \
return get_unaligned_le##bits(kaddr + oip); \
\
memcpy(lebytes, kaddr + oip, part); \
@@ -124,7 +124,7 @@ void btrfs_set_token_##bits(struct btrfs_map_token *token, \
} \
token->kaddr = page_address(token->eb->pages[idx]); \
token->offset = idx << PAGE_SHIFT; \
- if (oip + size <= PAGE_SIZE) { \
+ if (INLINE_EXTENT_BUFFER_PAGES == 1 || oip + size <= PAGE_SIZE) { \
put_unaligned_le##bits(val, token->kaddr + oip); \
return; \
} \
@@ -146,7 +146,7 @@ void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \
u8 lebytes[sizeof(u##bits)]; \
\
ASSERT(check_setget_bounds(eb, ptr, off, size)); \
- if (oip + size <= PAGE_SIZE) { \
+ if (INLINE_EXTENT_BUFFER_PAGES == 1 || oip + size <= PAGE_SIZE) { \
put_unaligned_le##bits(val, kaddr + oip); \
return; \
} \
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 640bcd21bf28..cb10e56ee31e 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -435,8 +435,10 @@ void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info,
spin_lock_irqsave(&subpage->lock, flags);
subpage->writeback_bitmap &= ~tmp;
- if (subpage->writeback_bitmap == 0)
+ if (subpage->writeback_bitmap == 0) {
+ ASSERT(PageWriteback(page));
end_page_writeback(page);
+ }
spin_unlock_irqrestore(&subpage->lock, flags);
}
@@ -559,3 +561,23 @@ IMPLEMENT_BTRFS_PAGE_OPS(writeback, set_page_writeback, end_page_writeback,
PageWriteback);
IMPLEMENT_BTRFS_PAGE_OPS(ordered, SetPageOrdered, ClearPageOrdered,
PageOrdered);
+
+/*
+ * Make sure not only the page dirty bit is cleared, but also subpage dirty bit
+ * is cleared.
+ */
+void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
+ struct page *page)
+{
+ struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
+
+ if (!IS_ENABLED(CONFIG_BTRFS_ASSERT))
+ return;
+
+ ASSERT(!PageDirty(page));
+ if (fs_info->sectorsize == PAGE_SIZE)
+ return;
+
+ ASSERT(PagePrivate(page) && page->private);
+ ASSERT(subpage->dirty_bitmap == 0);
+}
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index 4d7aca85d915..0120948f37a1 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -126,4 +126,7 @@ DECLARE_BTRFS_SUBPAGE_OPS(ordered);
bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len);
+void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
+ struct page *page);
+
#endif
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d07b18b2b250..537d90bf5d84 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1201,21 +1201,14 @@ char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = (u64)-1;
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ ret = btrfs_search_backwards(root, &key, path);
if (ret < 0) {
goto err;
} else if (ret > 0) {
- ret = btrfs_previous_item(root, path, subvol_objectid,
- BTRFS_ROOT_BACKREF_KEY);
- if (ret < 0) {
- goto err;
- } else if (ret > 0) {
- ret = -ENOENT;
- goto err;
- }
+ ret = -ENOENT;
+ goto err;
}
- btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
subvol_objectid = key.offset;
root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
@@ -1248,21 +1241,14 @@ char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
key.type = BTRFS_INODE_REF_KEY;
key.offset = (u64)-1;
- ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
+ ret = btrfs_search_backwards(fs_root, &key, path);
if (ret < 0) {
goto err;
} else if (ret > 0) {
- ret = btrfs_previous_item(fs_root, path, dirid,
- BTRFS_INODE_REF_KEY);
- if (ret < 0) {
- goto err;
- } else if (ret > 0) {
- ret = -ENOENT;
- goto err;
- }
+ ret = -ENOENT;
+ goto err;
}
- btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
dirid = key.offset;
inode_ref = btrfs_item_ptr(path->nodes[0],
@@ -1353,6 +1339,9 @@ static int btrfs_fill_super(struct super_block *sb,
sb->s_op = &btrfs_super_ops;
sb->s_d_op = &btrfs_dentry_operations;
sb->s_export_op = &btrfs_export_ops;
+#ifdef CONFIG_FS_VERITY
+ sb->s_vop = &btrfs_verityops;
+#endif
sb->s_xattr = btrfs_xattr_handlers;
sb->s_time_gran = 1;
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
@@ -2041,13 +2030,6 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
ret = -EINVAL;
goto restore;
}
- if (fs_info->sectorsize < PAGE_SIZE) {
- btrfs_warn(fs_info,
- "read-write mount is not yet allowed for sectorsize %u page size %lu",
- fs_info->sectorsize, PAGE_SIZE);
- ret = -EINVAL;
- goto restore;
- }
/*
* NOTE: when remounting with a change that does writes, don't
@@ -2096,16 +2078,15 @@ restore:
}
/* Used to sort the devices by max_avail(descending sort) */
-static inline int btrfs_cmp_device_free_bytes(const void *dev_info1,
- const void *dev_info2)
+static int btrfs_cmp_device_free_bytes(const void *a, const void *b)
{
- if (((struct btrfs_device_info *)dev_info1)->max_avail >
- ((struct btrfs_device_info *)dev_info2)->max_avail)
+ const struct btrfs_device_info *dev_info1 = a;
+ const struct btrfs_device_info *dev_info2 = b;
+
+ if (dev_info1->max_avail > dev_info2->max_avail)
return -1;
- else if (((struct btrfs_device_info *)dev_info1)->max_avail <
- ((struct btrfs_device_info *)dev_info2)->max_avail)
+ else if (dev_info1->max_avail < dev_info2->max_avail)
return 1;
- else
return 0;
}
@@ -2381,7 +2362,7 @@ static struct file_system_type btrfs_root_fs_type = {
.name = "btrfs",
.mount = btrfs_mount_root,
.kill_sb = btrfs_kill_super,
- .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP,
};
MODULE_ALIAS_FS("btrfs");
@@ -2572,6 +2553,11 @@ static void __init btrfs_print_mod_info(void)
#else
", zoned=no"
#endif
+#ifdef CONFIG_FS_VERITY
+ ", fsverity=yes"
+#else
+ ", fsverity=no"
+#endif
;
pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
}
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 9d1d140118ff..25a6f587852b 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -22,6 +22,26 @@
#include "block-group.h"
#include "qgroup.h"
+/*
+ * Structure name Path
+ * --------------------------------------------------------------------------
+ * btrfs_supported_static_feature_attrs /sys/fs/btrfs/features
+ * btrfs_supported_feature_attrs /sys/fs/btrfs/features and
+ * /sys/fs/btrfs/<uuid>/features
+ * btrfs_attrs /sys/fs/btrfs/<uuid>
+ * devid_attrs /sys/fs/btrfs/<uuid>/devinfo/<devid>
+ * allocation_attrs /sys/fs/btrfs/<uuid>/allocation
+ * qgroup_attrs /sys/fs/btrfs/<uuid>/qgroups/<level>_<qgroupid>
+ * space_info_attrs /sys/fs/btrfs/<uuid>/allocation/<bg-type>
+ * raid_attrs /sys/fs/btrfs/<uuid>/allocation/<bg-type>/<bg-profile>
+ *
+ * When built with BTRFS_CONFIG_DEBUG:
+ *
+ * btrfs_debug_feature_attrs /sys/fs/btrfs/debug
+ * btrfs_debug_mount_attrs /sys/fs/btrfs/<uuid>/debug
+ * discard_debug_attrs /sys/fs/btrfs/<uuid>/debug/discard
+ */
+
struct btrfs_feature_attr {
struct kobj_attribute kobj_attr;
enum btrfs_feature_set feature_set;
@@ -267,7 +287,17 @@ BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34);
#ifdef CONFIG_BTRFS_DEBUG
BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED);
#endif
+#ifdef CONFIG_FS_VERITY
+BTRFS_FEAT_ATTR_COMPAT_RO(verity, VERITY);
+#endif
+/*
+ * Features which depend on feature bits and may differ between each fs.
+ *
+ * /sys/fs/btrfs/features - all available features implemeted by this version
+ * /sys/fs/btrfs/UUID/features - features of the fs which are enabled or
+ * can be changed on a mounted filesystem.
+ */
static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(mixed_backref),
BTRFS_FEAT_ATTR_PTR(default_subvol),
@@ -285,16 +315,12 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
#ifdef CONFIG_BTRFS_DEBUG
BTRFS_FEAT_ATTR_PTR(zoned),
#endif
+#ifdef CONFIG_FS_VERITY
+ BTRFS_FEAT_ATTR_PTR(verity),
+#endif
NULL
};
-/*
- * Features which depend on feature bits and may differ between each fs.
- *
- * /sys/fs/btrfs/features lists all available features of this kernel while
- * /sys/fs/btrfs/UUID/features shows features of the fs which are enabled or
- * can be changed online.
- */
static const struct attribute_group btrfs_feature_attr_group = {
.name = "features",
.is_visible = btrfs_feature_visible,
@@ -366,6 +392,10 @@ static ssize_t supported_sectorsizes_show(struct kobject *kobj,
{
ssize_t ret = 0;
+ /* 4K sector size is also supported with 64K page size */
+ if (PAGE_SIZE == SZ_64K)
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%u ", SZ_4K);
+
/* Only sectorsize == PAGE_SIZE is now supported */
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%lu\n", PAGE_SIZE);
@@ -374,6 +404,12 @@ static ssize_t supported_sectorsizes_show(struct kobject *kobj,
BTRFS_ATTR(static_feature, supported_sectorsizes,
supported_sectorsizes_show);
+/*
+ * Features which only depend on kernel version.
+ *
+ * These are listed in /sys/fs/btrfs/features along with
+ * btrfs_supported_feature_attrs.
+ */
static struct attribute *btrfs_supported_static_feature_attrs[] = {
BTRFS_ATTR_PTR(static_feature, rmdir_subvol),
BTRFS_ATTR_PTR(static_feature, supported_checksums),
@@ -383,12 +419,6 @@ static struct attribute *btrfs_supported_static_feature_attrs[] = {
NULL
};
-/*
- * Features which only depend on kernel version.
- *
- * These are listed in /sys/fs/btrfs/features along with
- * btrfs_feature_attr_group
- */
static const struct attribute_group btrfs_static_feature_attr_group = {
.name = "features",
.attrs = btrfs_supported_static_feature_attrs,
@@ -547,6 +577,11 @@ static ssize_t btrfs_discard_max_discard_size_store(struct kobject *kobj,
BTRFS_ATTR_RW(discard, max_discard_size, btrfs_discard_max_discard_size_show,
btrfs_discard_max_discard_size_store);
+/*
+ * Per-filesystem debugging of discard (when mounted with discard=async).
+ *
+ * Path: /sys/fs/btrfs/<uuid>/debug/discard/
+ */
static const struct attribute *discard_debug_attrs[] = {
BTRFS_ATTR_PTR(discard, discardable_bytes),
BTRFS_ATTR_PTR(discard, discardable_extents),
@@ -560,15 +595,19 @@ static const struct attribute *discard_debug_attrs[] = {
};
/*
- * Runtime debugging exported via sysfs
+ * Per-filesystem runtime debugging exported via sysfs.
*
- * /sys/fs/btrfs/debug - applies to module or all filesystems
- * /sys/fs/btrfs/UUID - applies only to the given filesystem
+ * Path: /sys/fs/btrfs/UUID/debug/
*/
static const struct attribute *btrfs_debug_mount_attrs[] = {
NULL,
};
+/*
+ * Runtime debugging exported via sysfs, applies to all mounted filesystems.
+ *
+ * Path: /sys/fs/btrfs/debug
+ */
static struct attribute *btrfs_debug_feature_attrs[] = {
NULL
};
@@ -637,6 +676,11 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
}
+/*
+ * Allocation information about block group profiles.
+ *
+ * Path: /sys/fs/btrfs/<uuid>/allocation/<bg-type>/<bg-profile>/
+ */
static struct attribute *raid_attrs[] = {
BTRFS_ATTR_PTR(raid, total_bytes),
BTRFS_ATTR_PTR(raid, used_bytes),
@@ -676,6 +720,11 @@ SPACE_INFO_ATTR(bytes_zone_unusable);
SPACE_INFO_ATTR(disk_used);
SPACE_INFO_ATTR(disk_total);
+/*
+ * Allocation information about block group types.
+ *
+ * Path: /sys/fs/btrfs/<uuid>/allocation/<bg-type>/
+ */
static struct attribute *space_info_attrs[] = {
BTRFS_ATTR_PTR(space_info, flags),
BTRFS_ATTR_PTR(space_info, total_bytes),
@@ -703,6 +752,11 @@ static struct kobj_type space_info_ktype = {
.default_groups = space_info_groups,
};
+/*
+ * Allocation information about block groups.
+ *
+ * Path: /sys/fs/btrfs/<uuid>/allocation/
+ */
static const struct attribute *allocation_attrs[] = {
BTRFS_ATTR_PTR(allocation, global_rsv_reserved),
BTRFS_ATTR_PTR(allocation, global_rsv_size),
@@ -974,7 +1028,8 @@ static ssize_t btrfs_bg_reclaim_threshold_show(struct kobject *kobj,
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
ssize_t ret;
- ret = scnprintf(buf, PAGE_SIZE, "%d\n", fs_info->bg_reclaim_threshold);
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n",
+ READ_ONCE(fs_info->bg_reclaim_threshold));
return ret;
}
@@ -991,16 +1046,21 @@ static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
if (ret)
return ret;
- if (thresh <= 50 || thresh > 100)
+ if (thresh != 0 && (thresh <= 50 || thresh > 100))
return -EINVAL;
- fs_info->bg_reclaim_threshold = thresh;
+ WRITE_ONCE(fs_info->bg_reclaim_threshold, thresh);
return len;
}
BTRFS_ATTR_RW(, bg_reclaim_threshold, btrfs_bg_reclaim_threshold_show,
btrfs_bg_reclaim_threshold_store);
+/*
+ * Per-filesystem information and stats.
+ *
+ * Path: /sys/fs/btrfs/<uuid>/
+ */
static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(, label),
BTRFS_ATTR_PTR(, nodesize),
@@ -1510,6 +1570,11 @@ static ssize_t btrfs_devinfo_error_stats_show(struct kobject *kobj,
}
BTRFS_ATTR(devid, error_stats, btrfs_devinfo_error_stats_show);
+/*
+ * Information about one device.
+ *
+ * Path: /sys/fs/btrfs/<uuid>/devinfo/<devid>/
+ */
static struct attribute *devid_attrs[] = {
BTRFS_ATTR_PTR(devid, error_stats),
BTRFS_ATTR_PTR(devid, in_fs_metadata),
@@ -1799,6 +1864,11 @@ QGROUP_RSV_ATTR(data, BTRFS_QGROUP_RSV_DATA);
QGROUP_RSV_ATTR(meta_pertrans, BTRFS_QGROUP_RSV_META_PERTRANS);
QGROUP_RSV_ATTR(meta_prealloc, BTRFS_QGROUP_RSV_META_PREALLOC);
+/*
+ * Qgroup information.
+ *
+ * Path: /sys/fs/btrfs/<uuid>/qgroups/<level>_<qgroupid>/
+ */
static struct attribute *qgroup_attrs[] = {
BTRFS_ATTR_PTR(qgroup, referenced),
BTRFS_ATTR_PTR(qgroup, exclusive),
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 98b5aaba46f1..19ba7d5b7d8f 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -223,8 +223,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
* we can only call btrfs_qgroup_account_extent() directly to test
* quota.
*/
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
@@ -236,8 +235,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
if (ret)
return ret;
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -260,8 +258,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
old_roots = NULL;
new_roots = NULL;
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
@@ -272,8 +269,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
if (ret)
return -EINVAL;
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -324,8 +320,7 @@ static int test_multiple_refs(struct btrfs_root *root,
return ret;
}
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
@@ -337,8 +332,7 @@ static int test_multiple_refs(struct btrfs_root *root,
if (ret)
return ret;
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -359,8 +353,7 @@ static int test_multiple_refs(struct btrfs_root *root,
return -EINVAL;
}
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
@@ -372,8 +365,7 @@ static int test_multiple_refs(struct btrfs_root *root,
if (ret)
return ret;
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -400,8 +392,7 @@ static int test_multiple_refs(struct btrfs_root *root,
return -EINVAL;
}
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
@@ -413,8 +404,7 @@ static int test_multiple_refs(struct btrfs_root *root,
if (ret)
return ret;
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index a8b2e0d2c025..7733e8ac0a69 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -24,6 +24,7 @@
#include "compression.h"
#include "volumes.h"
#include "misc.h"
+#include "btrfs_inode.h"
/*
* Error message should follow the following format:
@@ -873,13 +874,22 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
}
}
- if (unlikely((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
- (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) ||
- (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
- (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
- (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) ||
+ if (unlikely((type & BTRFS_BLOCK_GROUP_RAID10 &&
+ sub_stripes != btrfs_raid_array[BTRFS_RAID_RAID10].sub_stripes) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1 &&
+ num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1].devs_min) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1C3 &&
+ num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1C3].devs_min) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1C4 &&
+ num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1C4].devs_min) ||
+ (type & BTRFS_BLOCK_GROUP_RAID5 &&
+ num_stripes < btrfs_raid_array[BTRFS_RAID_RAID5].devs_min) ||
+ (type & BTRFS_BLOCK_GROUP_RAID6 &&
+ num_stripes < btrfs_raid_array[BTRFS_RAID_RAID6].devs_min) ||
+ (type & BTRFS_BLOCK_GROUP_DUP &&
+ num_stripes != btrfs_raid_array[BTRFS_RAID_DUP].dev_stripes) ||
((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
- num_stripes != 1))) {
+ num_stripes != btrfs_raid_array[BTRFS_RAID_SINGLE].dev_stripes))) {
chunk_err(leaf, chunk, logical,
"invalid num_stripes:sub_stripes %u:%u for profile %llu",
num_stripes, sub_stripes,
@@ -999,6 +1009,8 @@ static int check_inode_item(struct extent_buffer *leaf,
u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
u32 mode;
int ret;
+ u32 flags;
+ u32 ro_flags;
ret = check_inode_key(leaf, key, slot);
if (unlikely(ret < 0))
@@ -1054,11 +1066,17 @@ static int check_inode_item(struct extent_buffer *leaf,
btrfs_inode_nlink(leaf, iitem));
return -EUCLEAN;
}
- if (unlikely(btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK)) {
+ btrfs_inode_split_flags(btrfs_inode_flags(leaf, iitem), &flags, &ro_flags);
+ if (unlikely(flags & ~BTRFS_INODE_FLAG_MASK)) {
inode_item_err(leaf, slot,
- "unknown flags detected: 0x%llx",
- btrfs_inode_flags(leaf, iitem) &
- ~BTRFS_INODE_FLAG_MASK);
+ "unknown incompat flags detected: 0x%x", flags);
+ return -EUCLEAN;
+ }
+ if (unlikely(!sb_rdonly(fs_info->sb) &&
+ (ro_flags & ~BTRFS_INODE_RO_FLAG_MASK))) {
+ inode_item_err(leaf, slot,
+ "unknown ro-compat flags detected on writeable mount: 0x%x",
+ ro_flags);
return -EUCLEAN;
}
return 0;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index e6430ac9bbe8..f7efc26aa82a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -753,7 +753,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
*/
ret = btrfs_lookup_data_extent(fs_info, ins.objectid,
ins.offset);
- if (ret == 0) {
+ if (ret < 0) {
+ goto out;
+ } else if (ret == 0) {
btrfs_init_generic_ref(&ref,
BTRFS_ADD_DELAYED_REF,
ins.objectid, ins.offset, 0);
@@ -3039,8 +3041,6 @@ static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root,
list_del_init(&ctx->list);
ctx->log_ret = error;
}
-
- INIT_LIST_HEAD(&root->log_ctxs[index]);
}
/*
@@ -3328,10 +3328,16 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
goto out_wake_log_root;
}
- mutex_lock(&root->log_mutex);
- if (root->last_log_commit < log_transid)
- root->last_log_commit = log_transid;
- mutex_unlock(&root->log_mutex);
+ /*
+ * We know there can only be one task here, since we have not yet set
+ * root->log_commit[index1] to 0 and any task attempting to sync the
+ * log must wait for the previous log transaction to commit if it's
+ * still in progress or wait for the current log transaction commit if
+ * someone else already started it. We use <= and not < because the
+ * first log transaction has an ID of 0.
+ */
+ ASSERT(root->last_log_commit <= log_transid);
+ root->last_log_commit = log_transid;
out_wake_log_root:
mutex_lock(&log_root_tree->log_mutex);
@@ -3417,14 +3423,10 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
}
/*
- * Check if an inode was logged in the current transaction. We can't always rely
- * on an inode's logged_trans value, because it's an in-memory only field and
- * therefore not persisted. This means that its value is lost if the inode gets
- * evicted and loaded again from disk (in which case it has a value of 0, and
- * certainly it is smaller then any possible transaction ID), when that happens
- * the full_sync flag is set in the inode's runtime flags, so on that case we
- * assume eviction happened and ignore the logged_trans value, assuming the
- * worst case, that the inode was logged before in the current transaction.
+ * Check if an inode was logged in the current transaction. This may often
+ * return some false positives, because logged_trans is an in memory only field,
+ * not persisted anywhere. This is meant to be used in contexts where a false
+ * positive has no functional consequences.
*/
static bool inode_logged(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode)
@@ -3432,8 +3434,17 @@ static bool inode_logged(struct btrfs_trans_handle *trans,
if (inode->logged_trans == trans->transid)
return true;
- if (inode->last_trans == trans->transid &&
- test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) &&
+ /*
+ * The inode's logged_trans is always 0 when we load it (because it is
+ * not persisted in the inode item or elsewhere). So if it is 0, the
+ * inode was last modified in the current transaction then the inode may
+ * have been logged before in the current transaction, then evicted and
+ * loaded again in the current transaction - or may have never been logged
+ * in the current transaction, but since we can not be sure, we have to
+ * assume it was, otherwise our callers can leave an inconsistent log.
+ */
+ if (inode->logged_trans == 0 &&
+ inode->last_trans == trans->transid &&
!test_bit(BTRFS_FS_LOG_RECOVERING, &trans->fs_info->flags))
return true;
@@ -3913,6 +3924,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
u64 logged_isize)
{
struct btrfs_map_token token;
+ u64 flags;
btrfs_init_map_token(&token, leaf);
@@ -3962,20 +3974,49 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
btrfs_set_token_inode_transid(&token, item, trans->transid);
btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
- btrfs_set_token_inode_flags(&token, item, BTRFS_I(inode)->flags);
+ flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
+ BTRFS_I(inode)->ro_flags);
+ btrfs_set_token_inode_flags(&token, item, flags);
btrfs_set_token_inode_block_group(&token, item, 0);
}
static int log_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_root *log, struct btrfs_path *path,
- struct btrfs_inode *inode)
+ struct btrfs_inode *inode, bool inode_item_dropped)
{
struct btrfs_inode_item *inode_item;
int ret;
- ret = btrfs_insert_empty_item(trans, log, path,
- &inode->location, sizeof(*inode_item));
- if (ret && ret != -EEXIST)
+ /*
+ * If we are doing a fast fsync and the inode was logged before in the
+ * current transaction, then we know the inode was previously logged and
+ * it exists in the log tree. For performance reasons, in this case use
+ * btrfs_search_slot() directly with ins_len set to 0 so that we never
+ * attempt a write lock on the leaf's parent, which adds unnecessary lock
+ * contention in case there are concurrent fsyncs for other inodes of the
+ * same subvolume. Using btrfs_insert_empty_item() when the inode item
+ * already exists can also result in unnecessarily splitting a leaf.
+ */
+ if (!inode_item_dropped && inode->logged_trans == trans->transid) {
+ ret = btrfs_search_slot(trans, log, &inode->location, path, 0, 1);
+ ASSERT(ret <= 0);
+ if (ret > 0)
+ ret = -ENOENT;
+ } else {
+ /*
+ * This means it is the first fsync in the current transaction,
+ * so the inode item is not in the log and we need to insert it.
+ * We can never get -EEXIST because we are only called for a fast
+ * fsync and in case an inode eviction happens after the inode was
+ * logged before in the current transaction, when we load again
+ * the inode, we set BTRFS_INODE_NEEDS_FULL_SYNC on its runtime
+ * flags and set ->logged_trans to 0.
+ */
+ ret = btrfs_insert_empty_item(trans, log, path, &inode->location,
+ sizeof(*inode_item));
+ ASSERT(ret != -EEXIST);
+ }
+ if (ret)
return ret;
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
@@ -4160,7 +4201,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
static int extent_cmp(void *priv, const struct list_head *a,
const struct list_head *b)
{
- struct extent_map *em1, *em2;
+ const struct extent_map *em1, *em2;
em1 = list_entry(a, struct extent_map, list);
em2 = list_entry(b, struct extent_map, list);
@@ -5053,8 +5094,8 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
/*
* Check the inode's logged_trans only instead of
* btrfs_inode_in_log(). This is because the last_log_commit of
- * the inode is not updated when we only log that it exists and
- * it has the full sync bit set (see btrfs_log_inode()).
+ * the inode is not updated when we only log that it exists (see
+ * btrfs_log_inode()).
*/
if (BTRFS_I(inode)->logged_trans == trans->transid) {
spin_unlock(&BTRFS_I(inode)->lock);
@@ -5299,6 +5340,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
bool need_log_inode_item = true;
bool xattrs_logged = false;
bool recursive_logging = false;
+ bool inode_item_dropped = true;
path = btrfs_alloc_path();
if (!path)
@@ -5433,6 +5475,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
} else {
if (inode_only == LOG_INODE_ALL)
fast_search = true;
+ inode_item_dropped = false;
goto log_extents;
}
@@ -5466,7 +5509,7 @@ log_extents:
btrfs_release_path(path);
btrfs_release_path(dst_path);
if (need_log_inode_item) {
- err = log_inode_item(trans, log, dst_path, inode);
+ err = log_inode_item(trans, log, dst_path, inode, inode_item_dropped);
if (err)
goto out_unlock;
/*
@@ -5573,6 +5616,13 @@ static bool need_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode)
{
/*
+ * If a directory was not modified, no dentries added or removed, we can
+ * and should avoid logging it.
+ */
+ if (S_ISDIR(inode->vfs_inode.i_mode) && inode->last_trans < trans->transid)
+ return false;
+
+ /*
* If this inode does not have new/updated/deleted xattrs since the last
* time it was logged and is flagged as logged in the current transaction,
* we can skip logging it. As for new/deleted names, those are updated in
diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c
new file mode 100644
index 000000000000..28d443d3ef93
--- /dev/null
+++ b/fs/btrfs/verity.c
@@ -0,0 +1,811 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/rwsem.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/iversion.h>
+#include <linux/fsverity.h>
+#include <linux/sched/mm.h>
+#include "ctree.h"
+#include "btrfs_inode.h"
+#include "transaction.h"
+#include "disk-io.h"
+#include "locking.h"
+
+/*
+ * Implementation of the interface defined in struct fsverity_operations.
+ *
+ * The main question is how and where to store the verity descriptor and the
+ * Merkle tree. We store both in dedicated btree items in the filesystem tree,
+ * together with the rest of the inode metadata. This means we'll need to do
+ * extra work to encrypt them once encryption is supported in btrfs, but btrfs
+ * has a lot of careful code around i_size and it seems better to make a new key
+ * type than try and adjust all of our expectations for i_size.
+ *
+ * Note that this differs from the implementation in ext4 and f2fs, where
+ * this data is stored as if it were in the file, but past EOF. However, btrfs
+ * does not have a widespread mechanism for caching opaque metadata pages, so we
+ * do pretend that the Merkle tree pages themselves are past EOF for the
+ * purposes of caching them (as opposed to creating a virtual inode).
+ *
+ * fs verity items are stored under two different key types on disk.
+ * The descriptor items:
+ * [ inode objectid, BTRFS_VERITY_DESC_ITEM_KEY, offset ]
+ *
+ * At offset 0, we store a btrfs_verity_descriptor_item which tracks the
+ * size of the descriptor item and some extra data for encryption.
+ * Starting at offset 1, these hold the generic fs verity descriptor.
+ * The latter are opaque to btrfs, we just read and write them as a blob for
+ * the higher level verity code. The most common descriptor size is 256 bytes.
+ *
+ * The merkle tree items:
+ * [ inode objectid, BTRFS_VERITY_MERKLE_ITEM_KEY, offset ]
+ *
+ * These also start at offset 0, and correspond to the merkle tree bytes.
+ * So when fsverity asks for page 0 of the merkle tree, we pull up one page
+ * starting at offset 0 for this key type. These are also opaque to btrfs,
+ * we're blindly storing whatever fsverity sends down.
+ *
+ * Another important consideration is the fact that the Merkle tree data scales
+ * linearly with the size of the file (with 4K pages/blocks and SHA-256, it's
+ * ~1/127th the size) so for large files, writing the tree can be a lengthy
+ * operation. For that reason, we guard the whole enable verity operation
+ * (between begin_enable_verity and end_enable_verity) with an orphan item.
+ * Again, because the data can be pretty large, it's quite possible that we
+ * could run out of space writing it, so we try our best to handle errors by
+ * stopping and rolling back rather than aborting the victim transaction.
+ */
+
+#define MERKLE_START_ALIGN 65536
+
+/*
+ * Compute the logical file offset where we cache the Merkle tree.
+ *
+ * @inode: inode of the verity file
+ *
+ * For the purposes of caching the Merkle tree pages, as required by
+ * fs-verity, it is convenient to do size computations in terms of a file
+ * offset, rather than in terms of page indices.
+ *
+ * Use 64K to be sure it's past the last page in the file, even with 64K pages.
+ * That rounding operation itself can overflow loff_t, so we do it in u64 and
+ * check.
+ *
+ * Returns the file offset on success, negative error code on failure.
+ */
+static loff_t merkle_file_pos(const struct inode *inode)
+{
+ u64 sz = inode->i_size;
+ u64 rounded = round_up(sz, MERKLE_START_ALIGN);
+
+ if (rounded > inode->i_sb->s_maxbytes)
+ return -EFBIG;
+
+ return rounded;
+}
+
+/*
+ * Drop all the items for this inode with this key_type.
+ *
+ * @inode: inode to drop items for
+ * @key_type: type of items to drop (BTRFS_VERITY_DESC_ITEM or
+ * BTRFS_VERITY_MERKLE_ITEM)
+ *
+ * Before doing a verity enable we cleanup any existing verity items.
+ * This is also used to clean up if a verity enable failed half way through.
+ *
+ * Returns number of dropped items on success, negative error code on failure.
+ */
+static int drop_verity_items(struct btrfs_inode *inode, u8 key_type)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root = inode->root;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ int count = 0;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ while (1) {
+ /* 1 for the item being dropped */
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+
+ /*
+ * Walk backwards through all the items until we find one that
+ * isn't from our key type or objectid
+ */
+ key.objectid = btrfs_ino(inode);
+ key.type = key_type;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret > 0) {
+ ret = 0;
+ /* No more keys of this type, we're done */
+ if (path->slots[0] == 0)
+ break;
+ path->slots[0]--;
+ } else if (ret < 0) {
+ btrfs_end_transaction(trans);
+ goto out;
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ /* No more keys of this type, we're done */
+ if (key.objectid != btrfs_ino(inode) || key.type != key_type)
+ break;
+
+ /*
+ * This shouldn't be a performance sensitive function because
+ * it's not used as part of truncate. If it ever becomes
+ * perf sensitive, change this to walk forward and bulk delete
+ * items
+ */
+ ret = btrfs_del_items(trans, root, path, path->slots[0], 1);
+ if (ret) {
+ btrfs_end_transaction(trans);
+ goto out;
+ }
+ count++;
+ btrfs_release_path(path);
+ btrfs_end_transaction(trans);
+ }
+ ret = count;
+ btrfs_end_transaction(trans);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * Drop all verity items
+ *
+ * @inode: inode to drop verity items for
+ *
+ * In most contexts where we are dropping verity items, we want to do it for all
+ * the types of verity items, not a particular one.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int btrfs_drop_verity_items(struct btrfs_inode *inode)
+{
+ int ret;
+
+ ret = drop_verity_items(inode, BTRFS_VERITY_DESC_ITEM_KEY);
+ if (ret < 0)
+ return ret;
+ ret = drop_verity_items(inode, BTRFS_VERITY_MERKLE_ITEM_KEY);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+/*
+ * Insert and write inode items with a given key type and offset.
+ *
+ * @inode: inode to insert for
+ * @key_type: key type to insert
+ * @offset: item offset to insert at
+ * @src: source data to write
+ * @len: length of source data to write
+ *
+ * Write len bytes from src into items of up to 2K length.
+ * The inserted items will have key (ino, key_type, offset + off) where off is
+ * consecutively increasing from 0 up to the last item ending at offset + len.
+ *
+ * Returns 0 on success and a negative error code on failure.
+ */
+static int write_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
+ const char *src, u64 len)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ struct btrfs_root *root = inode->root;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ unsigned long copy_bytes;
+ unsigned long src_offset = 0;
+ void *data;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ while (len > 0) {
+ /* 1 for the new item being inserted */
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
+
+ key.objectid = btrfs_ino(inode);
+ key.type = key_type;
+ key.offset = offset;
+
+ /*
+ * Insert 2K at a time mostly to be friendly for smaller leaf
+ * size filesystems
+ */
+ copy_bytes = min_t(u64, len, 2048);
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key, copy_bytes);
+ if (ret) {
+ btrfs_end_transaction(trans);
+ break;
+ }
+
+ leaf = path->nodes[0];
+
+ data = btrfs_item_ptr(leaf, path->slots[0], void);
+ write_extent_buffer(leaf, src + src_offset,
+ (unsigned long)data, copy_bytes);
+ offset += copy_bytes;
+ src_offset += copy_bytes;
+ len -= copy_bytes;
+
+ btrfs_release_path(path);
+ btrfs_end_transaction(trans);
+ }
+
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * Read inode items of the given key type and offset from the btree.
+ *
+ * @inode: inode to read items of
+ * @key_type: key type to read
+ * @offset: item offset to read from
+ * @dest: Buffer to read into. This parameter has slightly tricky
+ * semantics. If it is NULL, the function will not do any copying
+ * and will just return the size of all the items up to len bytes.
+ * If dest_page is passed, then the function will kmap_local the
+ * page and ignore dest, but it must still be non-NULL to avoid the
+ * counting-only behavior.
+ * @len: length in bytes to read
+ * @dest_page: copy into this page instead of the dest buffer
+ *
+ * Helper function to read items from the btree. This returns the number of
+ * bytes read or < 0 for errors. We can return short reads if the items don't
+ * exist on disk or aren't big enough to fill the desired length. Supports
+ * reading into a provided buffer (dest) or into the page cache
+ *
+ * Returns number of bytes read or a negative error code on failure.
+ */
+static int read_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
+ char *dest, u64 len, struct page *dest_page)
+{
+ struct btrfs_path *path;
+ struct btrfs_root *root = inode->root;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ u64 item_end;
+ u64 copy_end;
+ int copied = 0;
+ u32 copy_offset;
+ unsigned long copy_bytes;
+ unsigned long dest_offset = 0;
+ void *data;
+ char *kaddr = dest;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ if (dest_page)
+ path->reada = READA_FORWARD;
+
+ key.objectid = btrfs_ino(inode);
+ key.type = key_type;
+ key.offset = offset;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = 0;
+ if (path->slots[0] == 0)
+ goto out;
+ path->slots[0]--;
+ }
+
+ while (len > 0) {
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+ if (key.objectid != btrfs_ino(inode) || key.type != key_type)
+ break;
+
+ item_end = btrfs_item_size_nr(leaf, path->slots[0]) + key.offset;
+
+ if (copied > 0) {
+ /*
+ * Once we've copied something, we want all of the items
+ * to be sequential
+ */
+ if (key.offset != offset)
+ break;
+ } else {
+ /*
+ * Our initial offset might be in the middle of an
+ * item. Make sure it all makes sense.
+ */
+ if (key.offset > offset)
+ break;
+ if (item_end <= offset)
+ break;
+ }
+
+ /* desc = NULL to just sum all the item lengths */
+ if (!dest)
+ copy_end = item_end;
+ else
+ copy_end = min(offset + len, item_end);
+
+ /* Number of bytes in this item we want to copy */
+ copy_bytes = copy_end - offset;
+
+ /* Offset from the start of item for copying */
+ copy_offset = offset - key.offset;
+
+ if (dest) {
+ if (dest_page)
+ kaddr = kmap_local_page(dest_page);
+
+ data = btrfs_item_ptr(leaf, path->slots[0], void);
+ read_extent_buffer(leaf, kaddr + dest_offset,
+ (unsigned long)data + copy_offset,
+ copy_bytes);
+
+ if (dest_page)
+ kunmap_local(kaddr);
+ }
+
+ offset += copy_bytes;
+ dest_offset += copy_bytes;
+ len -= copy_bytes;
+ copied += copy_bytes;
+
+ path->slots[0]++;
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ /*
+ * We've reached the last slot in this leaf and we need
+ * to go to the next leaf.
+ */
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0) {
+ break;
+ } else if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ }
+ }
+out:
+ btrfs_free_path(path);
+ if (!ret)
+ ret = copied;
+ return ret;
+}
+
+/*
+ * Delete an fsverity orphan
+ *
+ * @trans: transaction to do the delete in
+ * @inode: inode to orphan
+ *
+ * Capture verity orphan specific logic that is repeated in the couple places
+ * we delete verity orphans. Specifically, handling ENOENT and ignoring inodes
+ * with 0 links.
+ *
+ * Returns zero on success or a negative error code on failure.
+ */
+static int del_orphan(struct btrfs_trans_handle *trans, struct btrfs_inode *inode)
+{
+ struct btrfs_root *root = inode->root;
+ int ret;
+
+ /*
+ * If the inode has no links, it is either already unlinked, or was
+ * created with O_TMPFILE. In either case, it should have an orphan from
+ * that other operation. Rather than reference count the orphans, we
+ * simply ignore them here, because we only invoke the verity path in
+ * the orphan logic when i_nlink is 1.
+ */
+ if (!inode->vfs_inode.i_nlink)
+ return 0;
+
+ ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
+ if (ret == -ENOENT)
+ ret = 0;
+ return ret;
+}
+
+/*
+ * Rollback in-progress verity if we encounter an error.
+ *
+ * @inode: inode verity had an error for
+ *
+ * We try to handle recoverable errors while enabling verity by rolling it back
+ * and just failing the operation, rather than having an fs level error no
+ * matter what. However, any error in rollback is unrecoverable.
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+static int rollback_verity(struct btrfs_inode *inode)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root = inode->root;
+ int ret;
+
+ ASSERT(inode_is_locked(&inode->vfs_inode));
+ truncate_inode_pages(inode->vfs_inode.i_mapping, inode->vfs_inode.i_size);
+ clear_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags);
+ ret = btrfs_drop_verity_items(inode);
+ if (ret) {
+ btrfs_handle_fs_error(root->fs_info, ret,
+ "failed to drop verity items in rollback %llu",
+ (u64)inode->vfs_inode.i_ino);
+ goto out;
+ }
+
+ /*
+ * 1 for updating the inode flag
+ * 1 for deleting the orphan
+ */
+ trans = btrfs_start_transaction(root, 2);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ btrfs_handle_fs_error(root->fs_info, ret,
+ "failed to start transaction in verity rollback %llu",
+ (u64)inode->vfs_inode.i_ino);
+ goto out;
+ }
+ inode->ro_flags &= ~BTRFS_INODE_RO_VERITY;
+ btrfs_sync_inode_flags_to_i_flags(&inode->vfs_inode);
+ ret = btrfs_update_inode(trans, root, inode);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+ ret = del_orphan(trans, inode);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+ btrfs_end_transaction(trans);
+out:
+ return ret;
+}
+
+/*
+ * Finalize making the file a valid verity file
+ *
+ * @inode: inode to be marked as verity
+ * @desc: contents of the verity descriptor to write (not NULL)
+ * @desc_size: size of the verity descriptor
+ *
+ * Do the actual work of finalizing verity after successfully writing the Merkle
+ * tree:
+ *
+ * - write out the descriptor items
+ * - mark the inode with the verity flag
+ * - delete the orphan item
+ * - mark the ro compat bit
+ * - clear the in progress bit
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+static int finish_verity(struct btrfs_inode *inode, const void *desc,
+ size_t desc_size)
+{
+ struct btrfs_trans_handle *trans = NULL;
+ struct btrfs_root *root = inode->root;
+ struct btrfs_verity_descriptor_item item;
+ int ret;
+
+ /* Write out the descriptor item */
+ memset(&item, 0, sizeof(item));
+ btrfs_set_stack_verity_descriptor_size(&item, desc_size);
+ ret = write_key_bytes(inode, BTRFS_VERITY_DESC_ITEM_KEY, 0,
+ (const char *)&item, sizeof(item));
+ if (ret)
+ goto out;
+
+ /* Write out the descriptor itself */
+ ret = write_key_bytes(inode, BTRFS_VERITY_DESC_ITEM_KEY, 1,
+ desc, desc_size);
+ if (ret)
+ goto out;
+
+ /*
+ * 1 for updating the inode flag
+ * 1 for deleting the orphan
+ */
+ trans = btrfs_start_transaction(root, 2);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+ inode->ro_flags |= BTRFS_INODE_RO_VERITY;
+ btrfs_sync_inode_flags_to_i_flags(&inode->vfs_inode);
+ ret = btrfs_update_inode(trans, root, inode);
+ if (ret)
+ goto end_trans;
+ ret = del_orphan(trans, inode);
+ if (ret)
+ goto end_trans;
+ clear_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags);
+ btrfs_set_fs_compat_ro(root->fs_info, VERITY);
+end_trans:
+ btrfs_end_transaction(trans);
+out:
+ return ret;
+
+}
+
+/*
+ * fsverity op that begins enabling verity.
+ *
+ * @filp: file to enable verity on
+ *
+ * Begin enabling fsverity for the file. We drop any existing verity items, add
+ * an orphan and set the in progress bit.
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+static int btrfs_begin_enable_verity(struct file *filp)
+{
+ struct btrfs_inode *inode = BTRFS_I(file_inode(filp));
+ struct btrfs_root *root = inode->root;
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ ASSERT(inode_is_locked(file_inode(filp)));
+
+ if (test_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags))
+ return -EBUSY;
+
+ /*
+ * This should almost never do anything, but theoretically, it's
+ * possible that we failed to enable verity on a file, then were
+ * interrupted or failed while rolling back, failed to cleanup the
+ * orphan, and finally attempt to enable verity again.
+ */
+ ret = btrfs_drop_verity_items(inode);
+ if (ret)
+ return ret;
+
+ /* 1 for the orphan item */
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ ret = btrfs_orphan_add(trans, inode);
+ if (!ret)
+ set_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags);
+ btrfs_end_transaction(trans);
+
+ return 0;
+}
+
+/*
+ * fsverity op that ends enabling verity.
+ *
+ * @filp: file we are finishing enabling verity on
+ * @desc: verity descriptor to write out (NULL in error conditions)
+ * @desc_size: size of the verity descriptor (variable with signatures)
+ * @merkle_tree_size: size of the merkle tree in bytes
+ *
+ * If desc is null, then VFS is signaling an error occurred during verity
+ * enable, and we should try to rollback. Otherwise, attempt to finish verity.
+ *
+ * Returns 0 on success, negative error code on error.
+ */
+static int btrfs_end_enable_verity(struct file *filp, const void *desc,
+ size_t desc_size, u64 merkle_tree_size)
+{
+ struct btrfs_inode *inode = BTRFS_I(file_inode(filp));
+ int ret = 0;
+ int rollback_ret;
+
+ ASSERT(inode_is_locked(file_inode(filp)));
+
+ if (desc == NULL)
+ goto rollback;
+
+ ret = finish_verity(inode, desc, desc_size);
+ if (ret)
+ goto rollback;
+ return ret;
+
+rollback:
+ rollback_ret = rollback_verity(inode);
+ if (rollback_ret)
+ btrfs_err(inode->root->fs_info,
+ "failed to rollback verity items: %d", rollback_ret);
+ return ret;
+}
+
+/*
+ * fsverity op that gets the struct fsverity_descriptor.
+ *
+ * @inode: inode to get the descriptor of
+ * @buf: output buffer for the descriptor contents
+ * @buf_size: size of the output buffer. 0 to query the size
+ *
+ * fsverity does a two pass setup for reading the descriptor, in the first pass
+ * it calls with buf_size = 0 to query the size of the descriptor, and then in
+ * the second pass it actually reads the descriptor off disk.
+ *
+ * Returns the size on success or a negative error code on failure.
+ */
+static int btrfs_get_verity_descriptor(struct inode *inode, void *buf,
+ size_t buf_size)
+{
+ u64 true_size;
+ int ret = 0;
+ struct btrfs_verity_descriptor_item item;
+
+ memset(&item, 0, sizeof(item));
+ ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_DESC_ITEM_KEY, 0,
+ (char *)&item, sizeof(item), NULL);
+ if (ret < 0)
+ return ret;
+
+ if (item.reserved[0] != 0 || item.reserved[1] != 0)
+ return -EUCLEAN;
+
+ true_size = btrfs_stack_verity_descriptor_size(&item);
+ if (true_size > INT_MAX)
+ return -EUCLEAN;
+
+ if (buf_size == 0)
+ return true_size;
+ if (buf_size < true_size)
+ return -ERANGE;
+
+ ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_DESC_ITEM_KEY, 1,
+ buf, buf_size, NULL);
+ if (ret < 0)
+ return ret;
+ if (ret != true_size)
+ return -EIO;
+
+ return true_size;
+}
+
+/*
+ * fsverity op that reads and caches a merkle tree page.
+ *
+ * @inode: inode to read a merkle tree page for
+ * @index: page index relative to the start of the merkle tree
+ * @num_ra_pages: number of pages to readahead. Optional, we ignore it
+ *
+ * The Merkle tree is stored in the filesystem btree, but its pages are cached
+ * with a logical position past EOF in the inode's mapping.
+ *
+ * Returns the page we read, or an ERR_PTR on error.
+ */
+static struct page *btrfs_read_merkle_tree_page(struct inode *inode,
+ pgoff_t index,
+ unsigned long num_ra_pages)
+{
+ struct page *page;
+ u64 off = (u64)index << PAGE_SHIFT;
+ loff_t merkle_pos = merkle_file_pos(inode);
+ int ret;
+
+ if (merkle_pos < 0)
+ return ERR_PTR(merkle_pos);
+ if (merkle_pos > inode->i_sb->s_maxbytes - off - PAGE_SIZE)
+ return ERR_PTR(-EFBIG);
+ index += merkle_pos >> PAGE_SHIFT;
+again:
+ page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
+ if (page) {
+ if (PageUptodate(page))
+ return page;
+
+ lock_page(page);
+ /*
+ * We only insert uptodate pages, so !Uptodate has to be
+ * an error
+ */
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ put_page(page);
+ return ERR_PTR(-EIO);
+ }
+ unlock_page(page);
+ return page;
+ }
+
+ page = __page_cache_alloc(mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * Merkle item keys are indexed from byte 0 in the merkle tree.
+ * They have the form:
+ *
+ * [ inode objectid, BTRFS_MERKLE_ITEM_KEY, offset in bytes ]
+ */
+ ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY, off,
+ page_address(page), PAGE_SIZE, page);
+ if (ret < 0) {
+ put_page(page);
+ return ERR_PTR(ret);
+ }
+ if (ret < PAGE_SIZE)
+ memzero_page(page, ret, PAGE_SIZE - ret);
+
+ SetPageUptodate(page);
+ ret = add_to_page_cache_lru(page, inode->i_mapping, index, GFP_NOFS);
+
+ if (!ret) {
+ /* Inserted and ready for fsverity */
+ unlock_page(page);
+ } else {
+ put_page(page);
+ /* Did someone race us into inserting this page? */
+ if (ret == -EEXIST)
+ goto again;
+ page = ERR_PTR(ret);
+ }
+ return page;
+}
+
+/*
+ * fsverity op that writes a Merkle tree block into the btree.
+ *
+ * @inode: inode to write a Merkle tree block for
+ * @buf: Merkle tree data block to write
+ * @index: index of the block in the Merkle tree
+ * @log_blocksize: log base 2 of the Merkle tree block size
+ *
+ * Note that the block size could be different from the page size, so it is not
+ * safe to assume that index is a page index.
+ *
+ * Returns 0 on success or negative error code on failure
+ */
+static int btrfs_write_merkle_tree_block(struct inode *inode, const void *buf,
+ u64 index, int log_blocksize)
+{
+ u64 off = index << log_blocksize;
+ u64 len = 1ULL << log_blocksize;
+ loff_t merkle_pos = merkle_file_pos(inode);
+
+ if (merkle_pos < 0)
+ return merkle_pos;
+ if (merkle_pos > inode->i_sb->s_maxbytes - off - len)
+ return -EFBIG;
+
+ return write_key_bytes(BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY,
+ off, buf, len);
+}
+
+const struct fsverity_operations btrfs_verityops = {
+ .begin_enable_verity = btrfs_begin_enable_verity,
+ .end_enable_verity = btrfs_end_enable_verity,
+ .get_verity_descriptor = btrfs_get_verity_descriptor,
+ .read_merkle_tree_page = btrfs_read_merkle_tree_page,
+ .write_merkle_tree_block = btrfs_write_merkle_tree_block,
+};
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 70f94b75f25a..ec3a874165de 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -38,7 +38,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.sub_stripes = 2,
.dev_stripes = 1,
.devs_max = 0, /* 0 == as many as possible */
- .devs_min = 4,
+ .devs_min = 2,
.tolerated_failures = 1,
.devs_increment = 2,
.ncopies = 2,
@@ -103,7 +103,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.sub_stripes = 1,
.dev_stripes = 1,
.devs_max = 0,
- .devs_min = 2,
+ .devs_min = 1,
.tolerated_failures = 0,
.devs_increment = 1,
.ncopies = 1,
@@ -153,6 +153,32 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
},
};
+/*
+ * Convert block group flags (BTRFS_BLOCK_GROUP_*) to btrfs_raid_types, which
+ * can be used as index to access btrfs_raid_array[].
+ */
+enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags)
+{
+ if (flags & BTRFS_BLOCK_GROUP_RAID10)
+ return BTRFS_RAID_RAID10;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID1)
+ return BTRFS_RAID_RAID1;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID1C3)
+ return BTRFS_RAID_RAID1C3;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID1C4)
+ return BTRFS_RAID_RAID1C4;
+ else if (flags & BTRFS_BLOCK_GROUP_DUP)
+ return BTRFS_RAID_DUP;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID0)
+ return BTRFS_RAID_RAID0;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID5)
+ return BTRFS_RAID_RAID5;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID6)
+ return BTRFS_RAID_RAID6;
+
+ return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
+}
+
const char *btrfs_bg_type_to_raid_name(u64 flags)
{
const int index = btrfs_bg_flags_to_raid_index(flags);
@@ -404,44 +430,6 @@ void __exit btrfs_cleanup_fs_uuids(void)
}
}
-/*
- * Returns a pointer to a new btrfs_device on success; ERR_PTR() on error.
- * Returned struct is not linked onto any lists and must be destroyed using
- * btrfs_free_device.
- */
-static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_device *dev;
-
- dev = kzalloc(sizeof(*dev), GFP_KERNEL);
- if (!dev)
- return ERR_PTR(-ENOMEM);
-
- /*
- * Preallocate a bio that's always going to be used for flushing device
- * barriers and matches the device lifespan
- */
- dev->flush_bio = bio_kmalloc(GFP_KERNEL, 0);
- if (!dev->flush_bio) {
- kfree(dev);
- return ERR_PTR(-ENOMEM);
- }
-
- INIT_LIST_HEAD(&dev->dev_list);
- INIT_LIST_HEAD(&dev->dev_alloc_list);
- INIT_LIST_HEAD(&dev->post_commit_list);
-
- atomic_set(&dev->reada_in_flight, 0);
- atomic_set(&dev->dev_stats_ccnt, 0);
- btrfs_device_data_ordered_init(dev);
- INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
- INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
- extent_io_tree_init(fs_info, &dev->alloc_state,
- IO_TREE_DEVICE_ALLOC_STATE, NULL);
-
- return dev;
-}
-
static noinline struct btrfs_fs_devices *find_fsid(
const u8 *fsid, const u8 *metadata_fsid)
{
@@ -1130,6 +1118,9 @@ static void btrfs_close_one_device(struct btrfs_device *device)
fs_devices->rw_devices--;
}
+ if (device->devid == BTRFS_DEV_REPLACE_DEVID)
+ clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
+
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
fs_devices->missing_devices--;
@@ -1228,7 +1219,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
static int devid_cmp(void *priv, const struct list_head *a,
const struct list_head *b)
{
- struct btrfs_device *dev1, *dev2;
+ const struct btrfs_device *dev1, *dev2;
dev1 = list_entry(a, struct btrfs_device, dev_list);
dev2 = list_entry(b, struct btrfs_device, dev_list);
@@ -1598,14 +1589,9 @@ again:
key.offset = search_start;
key.type = BTRFS_DEV_EXTENT_KEY;
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ ret = btrfs_search_backwards(root, &key, path);
if (ret < 0)
goto out;
- if (ret > 0) {
- ret = btrfs_previous_item(root, path, key.objectid, key.type);
- if (ret < 0)
- goto out;
- }
while (1) {
l = path->nodes[0];
@@ -1759,48 +1745,6 @@ out:
return ret;
}
-static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
- struct btrfs_device *device,
- u64 chunk_offset, u64 start, u64 num_bytes)
-{
- int ret;
- struct btrfs_path *path;
- struct btrfs_fs_info *fs_info = device->fs_info;
- struct btrfs_root *root = fs_info->dev_root;
- struct btrfs_dev_extent *extent;
- struct extent_buffer *leaf;
- struct btrfs_key key;
-
- WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
- WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = device->devid;
- key.offset = start;
- key.type = BTRFS_DEV_EXTENT_KEY;
- ret = btrfs_insert_empty_item(trans, root, path, &key,
- sizeof(*extent));
- if (ret)
- goto out;
-
- leaf = path->nodes[0];
- extent = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_dev_extent);
- btrfs_set_dev_extent_chunk_tree(leaf, extent,
- BTRFS_CHUNK_TREE_OBJECTID);
- btrfs_set_dev_extent_chunk_objectid(leaf, extent,
- BTRFS_FIRST_CHUNK_TREE_OBJECTID);
- btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
-
- btrfs_set_dev_extent_length(leaf, extent, num_bytes);
- btrfs_mark_buffer_dirty(leaf);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
{
struct extent_map_tree *em_tree;
@@ -2003,12 +1947,8 @@ static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
if (!(all_avail & btrfs_raid_array[i].bg_flag))
continue;
- if (num_devices < btrfs_raid_array[i].devs_min) {
- int ret = btrfs_raid_array[i].mindev_error;
-
- if (ret)
- return ret;
- }
+ if (num_devices < btrfs_raid_array[i].devs_min)
+ return btrfs_raid_array[i].mindev_error;
}
return 0;
@@ -2137,7 +2077,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
if (IS_ERR(device)) {
if (PTR_ERR(device) == -ENOENT &&
- strcmp(device_path, "missing") == 0)
+ device_path && strcmp(device_path, "missing") == 0)
ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
else
ret = PTR_ERR(device);
@@ -3622,10 +3562,7 @@ static u64 calc_data_stripes(u64 type, int num_stripes)
const int ncopies = btrfs_raid_array[index].ncopies;
const int nparity = btrfs_raid_array[index].nparity;
- if (nparity)
- return num_stripes - nparity;
- else
- return num_stripes / ncopies;
+ return (num_stripes - nparity) / ncopies;
}
/* [pstart, pend) */
@@ -4025,6 +3962,13 @@ static inline int validate_convert_profile(struct btrfs_fs_info *fs_info,
if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
return true;
+ if (fs_info->sectorsize < PAGE_SIZE &&
+ bargs->target & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+ btrfs_err(fs_info,
+ "RAID56 is not yet supported for sectorsize %u with page size %lu",
+ fs_info->sectorsize, PAGE_SIZE);
+ return false;
+ }
/* Profile is valid and does not have bits outside of the allowed set */
if (alloc_profile_is_valid(bargs->target, 1) &&
(bargs->target & ~allowed) == 0)
@@ -5464,56 +5408,6 @@ out:
}
/*
- * This function, btrfs_finish_chunk_alloc(), belongs to phase 2.
- *
- * See the comment at btrfs_chunk_alloc() for details about the chunk allocation
- * phases.
- */
-int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
- u64 chunk_offset, u64 chunk_size)
-{
- struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_device *device;
- struct extent_map *em;
- struct map_lookup *map;
- u64 dev_offset;
- u64 stripe_size;
- int i;
- int ret = 0;
-
- em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
- if (IS_ERR(em))
- return PTR_ERR(em);
-
- map = em->map_lookup;
- stripe_size = em->orig_block_len;
-
- /*
- * Take the device list mutex to prevent races with the final phase of
- * a device replace operation that replaces the device object associated
- * with the map's stripes, because the device object's id can change
- * at any time during that final phase of the device replace operation
- * (dev-replace.c:btrfs_dev_replace_finishing()), so we could grab the
- * replaced device and then see it with an ID of BTRFS_DEV_REPLACE_DEVID,
- * resulting in persisting a device extent item with such ID.
- */
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
- for (i = 0; i < map->num_stripes; i++) {
- device = map->stripes[i].dev;
- dev_offset = map->stripes[i].physical;
-
- ret = btrfs_alloc_dev_extent(trans, device, chunk_offset,
- dev_offset, stripe_size);
- if (ret)
- break;
- }
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-
- free_extent_map(em);
- return ret;
-}
-
-/*
* This function, btrfs_chunk_alloc_add_chunk_item(), typically belongs to the
* phase 1 of chunk allocation. It belongs to phase 2 only when allocating system
* chunks.
@@ -6923,9 +6817,31 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
if (WARN_ON(!devid && !fs_info))
return ERR_PTR(-EINVAL);
- dev = __alloc_device(fs_info);
- if (IS_ERR(dev))
- return dev;
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * Preallocate a bio that's always going to be used for flushing device
+ * barriers and matches the device lifespan
+ */
+ dev->flush_bio = bio_kmalloc(GFP_KERNEL, 0);
+ if (!dev->flush_bio) {
+ kfree(dev);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ INIT_LIST_HEAD(&dev->dev_list);
+ INIT_LIST_HEAD(&dev->dev_alloc_list);
+ INIT_LIST_HEAD(&dev->post_commit_list);
+
+ atomic_set(&dev->reada_in_flight, 0);
+ atomic_set(&dev->dev_stats_ccnt, 0);
+ btrfs_device_data_ordered_init(dev);
+ INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+ INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+ extent_io_tree_init(fs_info, &dev->alloc_state,
+ IO_TREE_DEVICE_ALLOC_STATE, NULL);
if (devid)
tmp = *devid;
@@ -6961,15 +6877,7 @@ static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
{
- int index = btrfs_bg_flags_to_raid_index(type);
- int ncopies = btrfs_raid_array[index].ncopies;
- const int nparity = btrfs_raid_array[index].nparity;
- int data_stripes;
-
- if (nparity)
- data_stripes = num_stripes - nparity;
- else
- data_stripes = num_stripes / ncopies;
+ const int data_stripes = calc_data_stripes(type, num_stripes);
return div_u64(chunk_len, data_stripes);
}
@@ -8144,7 +8052,7 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
goto out;
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
- ret = btrfs_next_item(root, path);
+ ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto out;
/* No dev extents at all? Not good */
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 55a8ba244716..b082250b42e0 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -508,8 +508,6 @@ int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
u64 logical, u64 len);
unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
u64 logical);
-int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
- u64 chunk_offset, u64 chunk_size);
int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans,
struct btrfs_block_group *bg);
int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset);
@@ -568,32 +566,6 @@ static inline void btrfs_dev_stat_set(struct btrfs_device *dev,
atomic_inc(&dev->dev_stats_ccnt);
}
-/*
- * Convert block group flags (BTRFS_BLOCK_GROUP_*) to btrfs_raid_types, which
- * can be used as index to access btrfs_raid_array[].
- */
-static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
-{
- if (flags & BTRFS_BLOCK_GROUP_RAID10)
- return BTRFS_RAID_RAID10;
- else if (flags & BTRFS_BLOCK_GROUP_RAID1)
- return BTRFS_RAID_RAID1;
- else if (flags & BTRFS_BLOCK_GROUP_RAID1C3)
- return BTRFS_RAID_RAID1C3;
- else if (flags & BTRFS_BLOCK_GROUP_RAID1C4)
- return BTRFS_RAID_RAID1C4;
- else if (flags & BTRFS_BLOCK_GROUP_DUP)
- return BTRFS_RAID_DUP;
- else if (flags & BTRFS_BLOCK_GROUP_RAID0)
- return BTRFS_RAID_RAID0;
- else if (flags & BTRFS_BLOCK_GROUP_RAID5)
- return BTRFS_RAID_RAID5;
- else if (flags & BTRFS_BLOCK_GROUP_RAID6)
- return BTRFS_RAID_RAID6;
-
- return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
-}
-
void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
struct list_head * __attribute_const__ btrfs_get_fs_uuids(void);
@@ -603,6 +575,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
struct block_device *bdev,
const char *device_path);
+enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags);
int btrfs_bg_type_to_factor(u64 flags);
const char *btrfs_bg_type_to_raid_name(u64 flags);
int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index c3fa7d3fa770..8afa90074891 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -121,12 +121,12 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
workspace->strm.total_in = 0;
workspace->strm.total_out = 0;
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
- cpage_out = kmap(out_page);
+ cpage_out = page_address(out_page);
pages[0] = out_page;
nr_pages = 1;
@@ -148,26 +148,22 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
int i;
for (i = 0; i < in_buf_pages; i++) {
- if (in_page) {
- kunmap(in_page);
+ if (in_page)
put_page(in_page);
- }
in_page = find_get_page(mapping,
start >> PAGE_SHIFT);
- data_in = kmap(in_page);
+ data_in = page_address(in_page);
memcpy(workspace->buf + i * PAGE_SIZE,
data_in, PAGE_SIZE);
start += PAGE_SIZE;
}
workspace->strm.next_in = workspace->buf;
} else {
- if (in_page) {
- kunmap(in_page);
+ if (in_page)
put_page(in_page);
- }
in_page = find_get_page(mapping,
start >> PAGE_SHIFT);
- data_in = kmap(in_page);
+ data_in = page_address(in_page);
start += PAGE_SIZE;
workspace->strm.next_in = data_in;
}
@@ -196,18 +192,17 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
* the stream end if required
*/
if (workspace->strm.avail_out == 0) {
- kunmap(out_page);
if (nr_pages == nr_dest_pages) {
out_page = NULL;
ret = -E2BIG;
goto out;
}
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
- cpage_out = kmap(out_page);
+ cpage_out = page_address(out_page);
pages[nr_pages] = out_page;
nr_pages++;
workspace->strm.avail_out = PAGE_SIZE;
@@ -234,18 +229,17 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
goto out;
} else if (workspace->strm.avail_out == 0) {
/* get another page for the stream end */
- kunmap(out_page);
if (nr_pages == nr_dest_pages) {
out_page = NULL;
ret = -E2BIG;
goto out;
}
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
- cpage_out = kmap(out_page);
+ cpage_out = page_address(out_page);
pages[nr_pages] = out_page;
nr_pages++;
workspace->strm.avail_out = PAGE_SIZE;
@@ -264,13 +258,8 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
*total_in = workspace->strm.total_in;
out:
*out_pages = nr_pages;
- if (out_page)
- kunmap(out_page);
-
- if (in_page) {
- kunmap(in_page);
+ if (in_page)
put_page(in_page);
- }
return ret;
}
@@ -286,10 +275,8 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
unsigned long buf_start;
struct page **pages_in = cb->compressed_pages;
- u64 disk_start = cb->start;
- struct bio *orig_bio = cb->orig_bio;
- data_in = kmap(pages_in[page_in_index]);
+ data_in = page_address(pages_in[page_in_index]);
workspace->strm.next_in = data_in;
workspace->strm.avail_in = min_t(size_t, srclen, PAGE_SIZE);
workspace->strm.total_in = 0;
@@ -311,7 +298,6 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) {
pr_warn("BTRFS: inflateInit failed\n");
- kunmap(pages_in[page_in_index]);
return -EIO;
}
while (workspace->strm.total_in < srclen) {
@@ -326,9 +312,8 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
if (buf_start == total_out)
break;
- ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
- total_out, disk_start,
- orig_bio);
+ ret2 = btrfs_decompress_buf2page(workspace->buf,
+ total_out - buf_start, cb, buf_start);
if (ret2 == 0) {
ret = 0;
goto done;
@@ -339,17 +324,16 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
if (workspace->strm.avail_in == 0) {
unsigned long tmp;
- kunmap(pages_in[page_in_index]);
+
page_in_index++;
if (page_in_index >= total_pages_in) {
data_in = NULL;
break;
}
- data_in = kmap(pages_in[page_in_index]);
+ data_in = page_address(pages_in[page_in_index]);
workspace->strm.next_in = data_in;
tmp = srclen - workspace->strm.total_in;
- workspace->strm.avail_in = min(tmp,
- PAGE_SIZE);
+ workspace->strm.avail_in = min(tmp, PAGE_SIZE);
}
}
if (ret != Z_STREAM_END)
@@ -358,10 +342,8 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
ret = 0;
done:
zlib_inflateEnd(&workspace->strm);
- if (data_in)
- kunmap(pages_in[page_in_index]);
if (!ret)
- zero_fill_bio(orig_bio);
+ zero_fill_bio(cb->orig_bio);
return ret;
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 907c2cc45c9c..47af1ab3bf12 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -245,7 +245,7 @@ static int calculate_emulated_zone_size(struct btrfs_fs_info *fs_info)
goto out;
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
- ret = btrfs_next_item(root, path);
+ ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto out;
/* No dev extents at all? Not good */
@@ -296,7 +296,6 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
struct btrfs_fs_info *fs_info = device->fs_info;
struct btrfs_zoned_device_info *zone_info = NULL;
struct block_device *bdev = device->bdev;
- struct request_queue *queue = bdev_get_queue(bdev);
sector_t nr_sectors;
sector_t sector = 0;
struct blk_zone *zones = NULL;
@@ -348,19 +347,10 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
nr_sectors = bdev_nr_sectors(bdev);
zone_info->zone_size_shift = ilog2(zone_info->zone_size);
- zone_info->max_zone_append_size =
- (u64)queue_max_zone_append_sectors(queue) << SECTOR_SHIFT;
zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors);
if (!IS_ALIGNED(nr_sectors, zone_sectors))
zone_info->nr_zones++;
- if (bdev_is_zoned(bdev) && zone_info->max_zone_append_size == 0) {
- btrfs_err(fs_info, "zoned: device %pg does not support zone append",
- bdev);
- ret = -EINVAL;
- goto out;
- }
-
zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
if (!zone_info->seq_zones) {
ret = -ENOMEM;
@@ -529,7 +519,6 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
u64 zoned_devices = 0;
u64 nr_devices = 0;
u64 zone_size = 0;
- u64 max_zone_append_size = 0;
const bool incompat_zoned = btrfs_fs_incompat(fs_info, ZONED);
int ret = 0;
@@ -565,11 +554,6 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
ret = -EINVAL;
goto out;
}
- if (!max_zone_append_size ||
- (zone_info->max_zone_append_size &&
- zone_info->max_zone_append_size < max_zone_append_size))
- max_zone_append_size =
- zone_info->max_zone_append_size;
}
nr_devices++;
}
@@ -619,7 +603,6 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
}
fs_info->zone_size = zone_size;
- fs_info->max_zone_append_size = max_zone_append_size;
fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
/*
@@ -1318,9 +1301,6 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
if (!btrfs_is_zoned(fs_info))
return false;
- if (!fs_info->max_zone_append_size)
- return false;
-
if (!is_data_inode(&inode->vfs_inode))
return false;
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index b0ae2608cb6b..4b299705bb12 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -22,7 +22,6 @@ struct btrfs_zoned_device_info {
*/
u64 zone_size;
u8 zone_size_shift;
- u64 max_zone_append_size;
u32 nr_zones;
unsigned long *seq_zones;
unsigned long *empty_zones;
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index 3e26b466476a..56dce9f00988 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -399,19 +399,19 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
/* map in the first page of input data */
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
- workspace->in_buf.src = kmap(in_page);
+ workspace->in_buf.src = page_address(in_page);
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
/* Allocate and map in the output buffer */
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
pages[nr_pages++] = out_page;
- workspace->out_buf.dst = kmap(out_page);
+ workspace->out_buf.dst = page_address(out_page);
workspace->out_buf.pos = 0;
workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
@@ -446,19 +446,18 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
if (workspace->out_buf.pos == workspace->out_buf.size) {
tot_out += PAGE_SIZE;
max_out -= PAGE_SIZE;
- kunmap(out_page);
if (nr_pages == nr_dest_pages) {
out_page = NULL;
ret = -E2BIG;
goto out;
}
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
pages[nr_pages++] = out_page;
- workspace->out_buf.dst = kmap(out_page);
+ workspace->out_buf.dst = page_address(out_page);
workspace->out_buf.pos = 0;
workspace->out_buf.size = min_t(size_t, max_out,
PAGE_SIZE);
@@ -473,13 +472,12 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
/* Check if we need more input */
if (workspace->in_buf.pos == workspace->in_buf.size) {
tot_in += PAGE_SIZE;
- kunmap(in_page);
put_page(in_page);
start += PAGE_SIZE;
len -= PAGE_SIZE;
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
- workspace->in_buf.src = kmap(in_page);
+ workspace->in_buf.src = page_address(in_page);
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
}
@@ -506,19 +504,18 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
tot_out += PAGE_SIZE;
max_out -= PAGE_SIZE;
- kunmap(out_page);
if (nr_pages == nr_dest_pages) {
out_page = NULL;
ret = -E2BIG;
goto out;
}
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
pages[nr_pages++] = out_page;
- workspace->out_buf.dst = kmap(out_page);
+ workspace->out_buf.dst = page_address(out_page);
workspace->out_buf.pos = 0;
workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
}
@@ -534,12 +531,8 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
out:
*out_pages = nr_pages;
/* Cleanup */
- if (in_page) {
- kunmap(in_page);
+ if (in_page)
put_page(in_page);
- }
- if (out_page)
- kunmap(out_page);
return ret;
}
@@ -547,8 +540,6 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
struct page **pages_in = cb->compressed_pages;
- u64 disk_start = cb->start;
- struct bio *orig_bio = cb->orig_bio;
size_t srclen = cb->compressed_len;
ZSTD_DStream *stream;
int ret = 0;
@@ -565,7 +556,7 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
goto done;
}
- workspace->in_buf.src = kmap(pages_in[page_in_index]);
+ workspace->in_buf.src = page_address(pages_in[page_in_index]);
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
@@ -589,7 +580,7 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
workspace->out_buf.pos = 0;
ret = btrfs_decompress_buf2page(workspace->out_buf.dst,
- buf_start, total_out, disk_start, orig_bio);
+ total_out - buf_start, cb, buf_start);
if (ret == 0)
break;
@@ -601,23 +592,21 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
break;
if (workspace->in_buf.pos == workspace->in_buf.size) {
- kunmap(pages_in[page_in_index++]);
+ page_in_index++;
if (page_in_index >= total_pages_in) {
workspace->in_buf.src = NULL;
ret = -EIO;
goto done;
}
srclen -= PAGE_SIZE;
- workspace->in_buf.src = kmap(pages_in[page_in_index]);
+ workspace->in_buf.src = page_address(pages_in[page_in_index]);
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
}
}
ret = 0;
- zero_fill_bio(orig_bio);
+ zero_fill_bio(cb->orig_bio);
done:
- if (workspace->in_buf.src)
- kunmap(pages_in[page_in_index]);
return ret;
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 6290c3afdba4..bd6a9e9fbd64 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1912,7 +1912,7 @@ EXPORT_SYMBOL(page_zero_new_buffers);
static void
iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
- struct iomap *iomap)
+ const struct iomap *iomap)
{
loff_t offset = block << inode->i_blkbits;
@@ -1966,7 +1966,7 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
}
int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
- get_block_t *get_block, struct iomap *iomap)
+ get_block_t *get_block, const struct iomap *iomap)
{
unsigned from = pos & (PAGE_SIZE - 1);
unsigned to = from + len;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index a1e2813731d1..7e7a897ae0d3 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1395,9 +1395,11 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
ret = VM_FAULT_SIGBUS;
} else {
struct address_space *mapping = inode->i_mapping;
- struct page *page = find_or_create_page(mapping, 0,
- mapping_gfp_constraint(mapping,
- ~__GFP_FS));
+ struct page *page;
+
+ filemap_invalidate_lock_shared(mapping);
+ page = find_or_create_page(mapping, 0,
+ mapping_gfp_constraint(mapping, ~__GFP_FS));
if (!page) {
ret = VM_FAULT_OOM;
goto out_inline;
@@ -1418,6 +1420,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
vmf->page = page;
ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
out_inline:
+ filemap_invalidate_unlock_shared(mapping);
dout("filemap_fault %p %llu read inline data ret %x\n",
inode, off, ret);
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7bdefd0c789a..39db97f149b9 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1743,7 +1743,11 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
struct ceph_cap_flush *ceph_alloc_cap_flush(void)
{
- return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
+ struct ceph_cap_flush *cf;
+
+ cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
+ cf->is_capsnap = false;
+ return cf;
}
void ceph_free_cap_flush(struct ceph_cap_flush *cf)
@@ -1778,7 +1782,7 @@ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
prev->wake = true;
wake = false;
}
- list_del(&cf->g_list);
+ list_del_init(&cf->g_list);
return wake;
}
@@ -1793,7 +1797,7 @@ static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
prev->wake = true;
wake = false;
}
- list_del(&cf->i_list);
+ list_del_init(&cf->i_list);
return wake;
}
@@ -2352,7 +2356,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) {
- if (!cf->caps) {
+ if (cf->is_capsnap) {
last_snap_flush = cf->tid;
break;
}
@@ -2371,7 +2375,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
first_tid = cf->tid + 1;
- if (cf->caps) {
+ if (!cf->is_capsnap) {
struct cap_msg_args arg;
dout("kick_flushing_caps %p cap %p tid %llu %s\n",
@@ -3516,7 +3520,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
cleaned = cf->caps;
/* Is this a capsnap? */
- if (cf->caps == 0)
+ if (cf->is_capsnap)
continue;
if (cf->tid <= flush_tid) {
@@ -3589,8 +3593,9 @@ out:
while (!list_empty(&to_remove)) {
cf = list_first_entry(&to_remove,
struct ceph_cap_flush, i_list);
- list_del(&cf->i_list);
- ceph_free_cap_flush(cf);
+ list_del_init(&cf->i_list);
+ if (!cf->is_capsnap)
+ ceph_free_cap_flush(cf);
}
if (wake_ci)
@@ -4150,11 +4155,19 @@ bad:
/*
* Delayed work handler to process end of delayed cap release LRU list.
+ *
+ * If new caps are added to the list while processing it, these won't get
+ * processed in this run. In this case, the ci->i_hold_caps_max will be
+ * returned so that the work can be scheduled accordingly.
*/
-void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
+unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
{
struct inode *inode;
struct ceph_inode_info *ci;
+ struct ceph_mount_options *opt = mdsc->fsc->mount_options;
+ unsigned long delay_max = opt->caps_wanted_delay_max * HZ;
+ unsigned long loop_start = jiffies;
+ unsigned long delay = 0;
dout("check_delayed_caps\n");
spin_lock(&mdsc->cap_delay_lock);
@@ -4162,6 +4175,11 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
ci = list_first_entry(&mdsc->cap_delay_list,
struct ceph_inode_info,
i_cap_delay_list);
+ if (time_before(loop_start, ci->i_hold_caps_max - delay_max)) {
+ dout("%s caps added recently. Exiting loop", __func__);
+ delay = ci->i_hold_caps_max;
+ break;
+ }
if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 &&
time_before(jiffies, ci->i_hold_caps_max))
break;
@@ -4177,6 +4195,8 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
}
}
spin_unlock(&mdsc->cap_delay_lock);
+
+ return delay;
}
/*
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d1755ac1d964..e1d605a02d4a 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -2088,6 +2088,7 @@ static long ceph_fallocate(struct file *file, int mode,
if (ret < 0)
goto unlock;
+ filemap_invalidate_lock(inode->i_mapping);
ceph_zero_pagecache_range(inode, offset, length);
ret = ceph_zero_objects(inode, offset, length);
@@ -2100,6 +2101,7 @@ static long ceph_fallocate(struct file *file, int mode,
if (dirty)
__mark_inode_dirty(inode, dirty);
}
+ filemap_invalidate_unlock(inode->i_mapping);
ceph_put_cap_refs(ci, got);
unlock:
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index fa8a847743d0..bdeb271f47d9 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -240,9 +240,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
if (!(fl->fl_flags & FL_POSIX))
return -ENOLCK;
- /* No mandatory locks */
- if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
- return -ENOLCK;
dout("ceph_lock, fl_owner: %p\n", fl->fl_owner);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9db1b39df773..0b69aec23e5c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1616,7 +1616,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
spin_lock(&mdsc->cap_dirty_lock);
list_for_each_entry(cf, &to_remove, i_list)
- list_del(&cf->g_list);
+ list_del_init(&cf->g_list);
if (!list_empty(&ci->i_dirty_item)) {
pr_warn_ratelimited(
@@ -1668,8 +1668,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_cap_flush *cf;
cf = list_first_entry(&to_remove,
struct ceph_cap_flush, i_list);
- list_del(&cf->i_list);
- ceph_free_cap_flush(cf);
+ list_del_init(&cf->i_list);
+ if (!cf->is_capsnap)
+ ceph_free_cap_flush(cf);
}
wake_up_all(&ci->i_cap_wq);
@@ -4490,22 +4491,29 @@ void inc_session_sequence(struct ceph_mds_session *s)
}
/*
- * delayed work -- periodically trim expired leases, renew caps with mds
+ * delayed work -- periodically trim expired leases, renew caps with mds. If
+ * the @delay parameter is set to 0 or if it's more than 5 secs, the default
+ * workqueue delay value of 5 secs will be used.
*/
-static void schedule_delayed(struct ceph_mds_client *mdsc)
+static void schedule_delayed(struct ceph_mds_client *mdsc, unsigned long delay)
{
- int delay = 5;
- unsigned hz = round_jiffies_relative(HZ * delay);
- schedule_delayed_work(&mdsc->delayed_work, hz);
+ unsigned long max_delay = HZ * 5;
+
+ /* 5 secs default delay */
+ if (!delay || (delay > max_delay))
+ delay = max_delay;
+ schedule_delayed_work(&mdsc->delayed_work,
+ round_jiffies_relative(delay));
}
static void delayed_work(struct work_struct *work)
{
- int i;
struct ceph_mds_client *mdsc =
container_of(work, struct ceph_mds_client, delayed_work.work);
+ unsigned long delay;
int renew_interval;
int renew_caps;
+ int i;
dout("mdsc delayed_work\n");
@@ -4545,7 +4553,7 @@ static void delayed_work(struct work_struct *work)
}
mutex_unlock(&mdsc->mutex);
- ceph_check_delayed_caps(mdsc);
+ delay = ceph_check_delayed_caps(mdsc);
ceph_queue_cap_reclaim_work(mdsc);
@@ -4553,7 +4561,7 @@ static void delayed_work(struct work_struct *work)
maybe_recover_session(mdsc);
- schedule_delayed(mdsc);
+ schedule_delayed(mdsc, delay);
}
int ceph_mdsc_init(struct ceph_fs_client *fsc)
@@ -5030,7 +5038,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
mdsc->mdsmap->m_epoch);
mutex_unlock(&mdsc->mutex);
- schedule_delayed(mdsc);
+ schedule_delayed(mdsc, 0);
return;
bad_unlock:
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index abd9af7727ad..3c444b9cb17b 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -394,9 +394,11 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
{
int i;
- for (i = 0; i < m->possible_max_rank; i++)
- kfree(m->m_info[i].export_targets);
- kfree(m->m_info);
+ if (m->m_info) {
+ for (i = 0; i < m->possible_max_rank; i++)
+ kfree(m->m_info[i].export_targets);
+ kfree(m->m_info);
+ }
kfree(m->m_data_pg_pools);
kfree(m);
}
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 4ac0606dcbd4..15105f9da3fd 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -67,19 +67,19 @@ void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
{
lockdep_assert_held(&mdsc->snap_rwsem);
- dout("get_realm %p %d -> %d\n", realm,
- atomic_read(&realm->nref), atomic_read(&realm->nref)+1);
/*
- * since we _only_ increment realm refs or empty the empty
- * list with snap_rwsem held, adjusting the empty list here is
- * safe. we do need to protect against concurrent empty list
- * additions, however.
+ * The 0->1 and 1->0 transitions must take the snap_empty_lock
+ * atomically with the refcount change. Go ahead and bump the
+ * nref here, unless it's 0, in which case we take the spinlock
+ * and then do the increment and remove it from the list.
*/
- if (atomic_inc_return(&realm->nref) == 1) {
- spin_lock(&mdsc->snap_empty_lock);
+ if (atomic_inc_not_zero(&realm->nref))
+ return;
+
+ spin_lock(&mdsc->snap_empty_lock);
+ if (atomic_inc_return(&realm->nref) == 1)
list_del_init(&realm->empty_item);
- spin_unlock(&mdsc->snap_empty_lock);
- }
+ spin_unlock(&mdsc->snap_empty_lock);
}
static void __insert_snap_realm(struct rb_root *root,
@@ -208,28 +208,28 @@ static void __put_snap_realm(struct ceph_mds_client *mdsc,
{
lockdep_assert_held_write(&mdsc->snap_rwsem);
- dout("__put_snap_realm %llx %p %d -> %d\n", realm->ino, realm,
- atomic_read(&realm->nref), atomic_read(&realm->nref)-1);
+ /*
+ * We do not require the snap_empty_lock here, as any caller that
+ * increments the value must hold the snap_rwsem.
+ */
if (atomic_dec_and_test(&realm->nref))
__destroy_snap_realm(mdsc, realm);
}
/*
- * caller needn't hold any locks
+ * See comments in ceph_get_snap_realm. Caller needn't hold any locks.
*/
void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
struct ceph_snap_realm *realm)
{
- dout("put_snap_realm %llx %p %d -> %d\n", realm->ino, realm,
- atomic_read(&realm->nref), atomic_read(&realm->nref)-1);
- if (!atomic_dec_and_test(&realm->nref))
+ if (!atomic_dec_and_lock(&realm->nref, &mdsc->snap_empty_lock))
return;
if (down_write_trylock(&mdsc->snap_rwsem)) {
+ spin_unlock(&mdsc->snap_empty_lock);
__destroy_snap_realm(mdsc, realm);
up_write(&mdsc->snap_rwsem);
} else {
- spin_lock(&mdsc->snap_empty_lock);
list_add(&realm->empty_item, &mdsc->snap_empty);
spin_unlock(&mdsc->snap_empty_lock);
}
@@ -487,6 +487,9 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci)
pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode);
return;
}
+ capsnap->cap_flush.is_capsnap = true;
+ INIT_LIST_HEAD(&capsnap->cap_flush.i_list);
+ INIT_LIST_HEAD(&capsnap->cap_flush.g_list);
spin_lock(&ci->i_ceph_lock);
used = __ceph_caps_used(ci);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 6b6332a5c113..b1a363641beb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -182,8 +182,9 @@ struct ceph_cap {
struct ceph_cap_flush {
u64 tid;
- int caps; /* 0 means capsnap */
+ int caps;
bool wake; /* wake up flush waiters when finish ? */
+ bool is_capsnap; /* true means capsnap */
struct list_head g_list; // global
struct list_head i_list; // per inode
};
@@ -1167,7 +1168,7 @@ extern void ceph_flush_snaps(struct ceph_inode_info *ci,
extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct ceph_mds_session *session);
-extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
+extern unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
extern int ceph_drop_caps_for_unlink(struct inode *inode);
extern int ceph_encode_inode_release(void **p, struct inode *inode,
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 7364950a9ef4..3b7e3b9e4fd2 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -4,19 +4,16 @@ config CIFS
depends on INET
select NLS
select CRYPTO
- select CRYPTO_MD4
select CRYPTO_MD5
select CRYPTO_SHA256
select CRYPTO_SHA512
select CRYPTO_CMAC
select CRYPTO_HMAC
- select CRYPTO_LIB_ARC4
select CRYPTO_AEAD2
select CRYPTO_CCM
select CRYPTO_GCM
select CRYPTO_ECB
select CRYPTO_AES
- select CRYPTO_LIB_DES
select KEYS
select DNS_RESOLVER
select ASN1
@@ -85,33 +82,6 @@ config CIFS_ALLOW_INSECURE_LEGACY
If unsure, say Y.
-config CIFS_WEAK_PW_HASH
- bool "Support legacy servers which use weaker LANMAN security"
- depends on CIFS && CIFS_ALLOW_INSECURE_LEGACY
- help
- Modern CIFS servers including Samba and most Windows versions
- (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
- security mechanisms. These hash the password more securely
- than the mechanisms used in the older LANMAN version of the
- SMB protocol but LANMAN based authentication is needed to
- establish sessions with some old SMB servers.
-
- Enabling this option allows the cifs module to mount to older
- LANMAN based servers such as OS/2 and Windows 95, but such
- mounts may be less secure than mounts using NTLM or more recent
- security mechanisms if you are on a public network. Unless you
- have a need to access old SMB servers (and are on a private
- network) you probably want to say N. Even if this support
- is enabled in the kernel build, LANMAN authentication will not be
- used automatically. At runtime LANMAN mounts are disabled but
- can be set to required (or optional) either in
- /proc/fs/cifs (see Documentation/admin-guide/cifs/usage.rst for
- more detail) or via an option on the mount command. This support
- is disabled by default in order to reduce the possibility of a
- downgrade attack.
-
- If unsure, say N.
-
config CIFS_UPCALL
bool "Kerberos/SPNEGO advanced session setup"
depends on CIFS
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 8857ac7e7a14..51a824fc926a 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -250,9 +250,6 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
seq_printf(m, ",ALLOW_INSECURE_LEGACY");
#endif
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
- seq_printf(m, ",WEAK_PW_HASH");
-#endif
#ifdef CONFIG_CIFS_POSIX
seq_printf(m, ",CIFS_POSIX");
#endif
@@ -929,14 +926,6 @@ cifs_security_flags_handle_must_flags(unsigned int *flags)
*flags = CIFSSEC_MUST_NTLMSSP;
else if ((*flags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2)
*flags = CIFSSEC_MUST_NTLMV2;
- else if ((*flags & CIFSSEC_MUST_NTLM) == CIFSSEC_MUST_NTLM)
- *flags = CIFSSEC_MUST_NTLM;
- else if (CIFSSEC_MUST_LANMAN &&
- (*flags & CIFSSEC_MUST_LANMAN) == CIFSSEC_MUST_LANMAN)
- *flags = CIFSSEC_MUST_LANMAN;
- else if (CIFSSEC_MUST_PLNTXT &&
- (*flags & CIFSSEC_MUST_PLNTXT) == CIFSSEC_MUST_PLNTXT)
- *flags = CIFSSEC_MUST_PLNTXT;
*flags |= signflags;
}
diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
index 93b47818c6c2..12bde7bfda86 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/cifs/cifs_swn.c
@@ -147,8 +147,6 @@ static int cifs_swn_send_register_message(struct cifs_swn_reg *swnreg)
goto nlmsg_fail;
}
break;
- case LANMAN:
- case NTLM:
case NTLMv2:
case RawNTLMSSP:
ret = cifs_swn_auth_info_ntlm(swnreg->tcon, skb);
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 9bd03a231032..171ad8b42107 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -358,14 +358,9 @@ cifs_strndup_from_utf16(const char *src, const int maxlen,
if (!dst)
return NULL;
cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage,
- NO_MAP_UNI_RSVD);
+ NO_MAP_UNI_RSVD);
} else {
- len = strnlen(src, maxlen);
- len++;
- dst = kmalloc(len, GFP_KERNEL);
- if (!dst)
- return NULL;
- strlcpy(dst, src, len);
+ dst = kstrndup(src, maxlen, GFP_KERNEL);
}
return dst;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index ecf15d845dbd..6679e07e533e 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -22,7 +22,7 @@
#include <linux/random.h>
#include <linux/highmem.h>
#include <linux/fips.h>
-#include <crypto/arc4.h>
+#include "../cifs_common/arc4.h"
#include <crypto/aead.h>
int __cifs_calc_signature(struct smb_rqst *rqst,
@@ -250,87 +250,6 @@ int cifs_verify_signature(struct smb_rqst *rqst,
}
-/* first calculate 24 bytes ntlm response and then 16 byte session key */
-int setup_ntlm_response(struct cifs_ses *ses, const struct nls_table *nls_cp)
-{
- int rc = 0;
- unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
- char temp_key[CIFS_SESS_KEY_SIZE];
-
- if (!ses)
- return -EINVAL;
-
- ses->auth_key.response = kmalloc(temp_len, GFP_KERNEL);
- if (!ses->auth_key.response)
- return -ENOMEM;
-
- ses->auth_key.len = temp_len;
-
- rc = SMBNTencrypt(ses->password, ses->server->cryptkey,
- ses->auth_key.response + CIFS_SESS_KEY_SIZE, nls_cp);
- if (rc) {
- cifs_dbg(FYI, "%s Can't generate NTLM response, error: %d\n",
- __func__, rc);
- return rc;
- }
-
- rc = E_md4hash(ses->password, temp_key, nls_cp);
- if (rc) {
- cifs_dbg(FYI, "%s Can't generate NT hash, error: %d\n",
- __func__, rc);
- return rc;
- }
-
- rc = mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE);
- if (rc)
- cifs_dbg(FYI, "%s Can't generate NTLM session key, error: %d\n",
- __func__, rc);
-
- return rc;
-}
-
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
- char *lnm_session_key)
-{
- int i, len;
- int rc;
- char password_with_pad[CIFS_ENCPWD_SIZE] = {0};
-
- if (password) {
- for (len = 0; len < CIFS_ENCPWD_SIZE; len++)
- if (!password[len])
- break;
-
- memcpy(password_with_pad, password, len);
- }
-
- if (!encrypt && global_secflags & CIFSSEC_MAY_PLNTXT) {
- memcpy(lnm_session_key, password_with_pad,
- CIFS_ENCPWD_SIZE);
- return 0;
- }
-
- /* calculate old style session key */
- /* calling toupper is less broken than repeatedly
- calling nls_toupper would be since that will never
- work for UTF8, but neither handles multibyte code pages
- but the only alternative would be converting to UCS-16 (Unicode)
- (using a routine something like UniStrupr) then
- uppercasing and then converting back from Unicode - which
- would only worth doing it if we knew it were utf8. Basically
- utf8 and other multibyte codepages each need their own strupper
- function since a byte at a time will ont work. */
-
- for (i = 0; i < CIFS_ENCPWD_SIZE; i++)
- password_with_pad[i] = toupper(password_with_pad[i]);
-
- rc = SMBencrypt(password_with_pad, cryptkey, lnm_session_key);
-
- return rc;
-}
-#endif /* CIFS_WEAK_PW_HASH */
-
/* Build a proper attribute value/target info pairs blob.
* Fill in netbios and dns domain name and workstation name
* and client time (total five av pairs and + one end of fields indicator.
@@ -780,9 +699,9 @@ calc_seckey(struct cifs_ses *ses)
return -ENOMEM;
}
- arc4_setkey(ctx_arc4, ses->auth_key.response, CIFS_SESS_KEY_SIZE);
- arc4_crypt(ctx_arc4, ses->ntlmssp->ciphertext, sec_key,
- CIFS_CPHTXT_SIZE);
+ cifs_arc4_setkey(ctx_arc4, ses->auth_key.response, CIFS_SESS_KEY_SIZE);
+ cifs_arc4_crypt(ctx_arc4, ses->ntlmssp->ciphertext, sec_key,
+ CIFS_CPHTXT_SIZE);
/* make secondary_key/nonce as session key */
memcpy(ses->auth_key.response, sec_key, CIFS_SESS_KEY_SIZE);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 64b71c4e2a9d..8c20bfa187ac 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -399,7 +399,6 @@ cifs_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
- cifs_fscache_release_inode_cookie(inode);
}
static void
@@ -438,15 +437,9 @@ cifs_show_security(struct seq_file *s, struct cifs_ses *ses)
seq_puts(s, ",sec=");
switch (ses->sectype) {
- case LANMAN:
- seq_puts(s, "lanman");
- break;
case NTLMv2:
seq_puts(s, "ntlmv2");
break;
- case NTLM:
- seq_puts(s, "ntlm");
- break;
case Kerberos:
seq_puts(s, "krb5");
break;
@@ -1755,7 +1748,6 @@ MODULE_DESCRIPTION
MODULE_VERSION(CIFS_VERSION);
MODULE_SOFTDEP("ecb");
MODULE_SOFTDEP("hmac");
-MODULE_SOFTDEP("md4");
MODULE_SOFTDEP("md5");
MODULE_SOFTDEP("nls");
MODULE_SOFTDEP("aes");
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index c0bfc2f01030..c068f7d8d879 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -114,8 +114,6 @@ enum statusEnum {
enum securityEnum {
Unspecified = 0, /* not specified */
- LANMAN, /* Legacy LANMAN auth */
- NTLM, /* Legacy NTLM012 auth with NTLM hash */
NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */
RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */
Kerberos, /* Kerberos via SPNEGO */
@@ -634,7 +632,6 @@ struct TCP_Server_Info {
struct session_key session_key;
unsigned long lstrp; /* when we got last response from this server */
struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
-#define CIFS_NEGFLAVOR_LANMAN 0 /* wct == 13, LANMAN */
#define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */
#define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */
char negflavor; /* NEGOTIATE response flavor */
@@ -1611,6 +1608,11 @@ struct dfs_info3_param {
int ttl;
};
+struct file_list {
+ struct list_head list;
+ struct cifsFileInfo *cfile;
+};
+
/*
* common struct for holding inode info when searching for or updating an
* inode with new info
@@ -1729,16 +1731,8 @@ static inline bool is_retryable_error(int error)
/* Security Flags: indicate type of session setup needed */
#define CIFSSEC_MAY_SIGN 0x00001
-#define CIFSSEC_MAY_NTLM 0x00002
#define CIFSSEC_MAY_NTLMV2 0x00004
#define CIFSSEC_MAY_KRB5 0x00008
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-#define CIFSSEC_MAY_LANMAN 0x00010
-#define CIFSSEC_MAY_PLNTXT 0x00020
-#else
-#define CIFSSEC_MAY_LANMAN 0
-#define CIFSSEC_MAY_PLNTXT 0
-#endif /* weak passwords */
#define CIFSSEC_MAY_SEAL 0x00040 /* not supported yet */
#define CIFSSEC_MAY_NTLMSSP 0x00080 /* raw ntlmssp with ntlmv2 */
@@ -1746,32 +1740,19 @@ static inline bool is_retryable_error(int error)
/* note that only one of the following can be set so the
result of setting MUST flags more than once will be to
require use of the stronger protocol */
-#define CIFSSEC_MUST_NTLM 0x02002
#define CIFSSEC_MUST_NTLMV2 0x04004
#define CIFSSEC_MUST_KRB5 0x08008
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-#define CIFSSEC_MUST_LANMAN 0x10010
-#define CIFSSEC_MUST_PLNTXT 0x20020
-#ifdef CONFIG_CIFS_UPCALL
-#define CIFSSEC_MASK 0xBF0BF /* allows weak security but also krb5 */
-#else
-#define CIFSSEC_MASK 0xB70B7 /* current flags supported if weak */
-#endif /* UPCALL */
-#else /* do not allow weak pw hash */
-#define CIFSSEC_MUST_LANMAN 0
-#define CIFSSEC_MUST_PLNTXT 0
#ifdef CONFIG_CIFS_UPCALL
#define CIFSSEC_MASK 0x8F08F /* flags supported if no weak allowed */
#else
#define CIFSSEC_MASK 0x87087 /* flags supported if no weak allowed */
#endif /* UPCALL */
-#endif /* WEAK_PW_HASH */
#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */
#define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */
#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP)
-#define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2)
-#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP)
+#define CIFSSEC_MAX (CIFSSEC_MUST_NTLMV2)
+#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP)
/*
*****************************************************************
* All constants go here
@@ -1935,10 +1916,6 @@ static inline char *get_security_type_str(enum securityEnum sectype)
return "Kerberos";
case NTLMv2:
return "NTLMv2";
- case NTLM:
- return "NTLM";
- case LANMAN:
- return "LANMAN";
default:
return "Unknown";
}
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index f6e235001358..dc920e206336 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -14,13 +14,7 @@
#include <asm/unaligned.h>
#include "smbfsctl.h"
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-#define LANMAN_PROT 0
-#define LANMAN2_PROT 1
-#define CIFS_PROT 2
-#else
#define CIFS_PROT 0
-#endif
#define POSIX_PROT (CIFS_PROT+1)
#define BAD_PROT 0xFFFF
@@ -505,30 +499,8 @@ typedef struct negotiate_req {
unsigned char DialectsArray[1];
} __attribute__((packed)) NEGOTIATE_REQ;
-/* Dialect index is 13 for LANMAN */
-
#define MIN_TZ_ADJ (15 * 60) /* minimum grid for timezones in seconds */
-typedef struct lanman_neg_rsp {
- struct smb_hdr hdr; /* wct = 13 */
- __le16 DialectIndex;
- __le16 SecurityMode;
- __le16 MaxBufSize;
- __le16 MaxMpxCount;
- __le16 MaxNumberVcs;
- __le16 RawMode;
- __le32 SessionKey;
- struct {
- __le16 Time;
- __le16 Date;
- } __attribute__((packed)) SrvTime;
- __le16 ServerTimeZone;
- __le16 EncryptionKeyLength;
- __le16 Reserved;
- __u16 ByteCount;
- unsigned char EncryptionKey[1];
-} __attribute__((packed)) LANMAN_NEG_RSP;
-
#define READ_RAW_ENABLE 1
#define WRITE_RAW_ENABLE 2
#define RAW_ENABLE (READ_RAW_ENABLE | WRITE_RAW_ENABLE)
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e0def0f0714b..f9740c21ca3d 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -498,19 +498,12 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
extern int cifs_verify_signature(struct smb_rqst *rqst,
struct TCP_Server_Info *server,
__u32 expected_sequence_number);
-extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *,
- const struct nls_table *);
-extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *);
extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
extern void cifs_crypto_secmech_release(struct TCP_Server_Info *server);
extern int calc_seckey(struct cifs_ses *);
extern int generate_smb30signingkey(struct cifs_ses *);
extern int generate_smb311signingkey(struct cifs_ses *);
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-extern int calc_lanman_hash(const char *password, const char *cryptkey,
- bool encrypt, char *lnm_session_key);
-#endif /* CIFS_WEAK_PW_HASH */
extern int CIFSSMBCopy(unsigned int xid,
struct cifs_tcon *source_tcon,
const char *fromName,
@@ -547,11 +540,8 @@ extern int check_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
struct cifs_fattr *fattr,
const unsigned char *path);
-extern int mdfour(unsigned char *, unsigned char *, int);
extern int E_md4hash(const unsigned char *passwd, unsigned char *p16,
const struct nls_table *codepage);
-extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
- unsigned char *p24);
extern int
cifs_setup_volume_info(struct smb3_fs_context *ctx, const char *mntopts, const char *devname);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 65d1a65bfc37..a8e41c1e80ca 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -42,10 +42,6 @@ static struct {
int index;
char *name;
} protocols[] = {
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
- {LANMAN_PROT, "\2LM1.2X002"},
- {LANMAN2_PROT, "\2LANMAN2.1"},
-#endif /* weak password hashing for legacy clients */
{CIFS_PROT, "\2NT LM 0.12"},
{POSIX_PROT, "\2POSIX 2"},
{BAD_PROT, "\2"}
@@ -55,10 +51,6 @@ static struct {
int index;
char *name;
} protocols[] = {
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
- {LANMAN_PROT, "\2LM1.2X002"},
- {LANMAN2_PROT, "\2LANMAN2.1"},
-#endif /* weak password hashing for legacy clients */
{CIFS_PROT, "\2NT LM 0.12"},
{BAD_PROT, "\2"}
};
@@ -66,17 +58,9 @@ static struct {
/* define the number of elements in the cifs dialect array */
#ifdef CONFIG_CIFS_POSIX
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-#define CIFS_NUM_PROT 4
-#else
#define CIFS_NUM_PROT 2
-#endif /* CIFS_WEAK_PW_HASH */
#else /* not posix */
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-#define CIFS_NUM_PROT 3
-#else
#define CIFS_NUM_PROT 1
-#endif /* CONFIG_CIFS_WEAK_PW_HASH */
#endif /* CIFS_POSIX */
/*
@@ -475,89 +459,6 @@ cifs_enable_signing(struct TCP_Server_Info *server, bool mnt_sign_required)
return 0;
}
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-static int
-decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr)
-{
- __s16 tmp;
- struct lanman_neg_rsp *rsp = (struct lanman_neg_rsp *)pSMBr;
-
- if (server->dialect != LANMAN_PROT && server->dialect != LANMAN2_PROT)
- return -EOPNOTSUPP;
-
- server->sec_mode = le16_to_cpu(rsp->SecurityMode);
- server->maxReq = min_t(unsigned int,
- le16_to_cpu(rsp->MaxMpxCount),
- cifs_max_pending);
- set_credits(server, server->maxReq);
- server->maxBuf = le16_to_cpu(rsp->MaxBufSize);
- /* set up max_read for readpages check */
- server->max_read = server->maxBuf;
- /* even though we do not use raw we might as well set this
- accurately, in case we ever find a need for it */
- if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) {
- server->max_rw = 0xFF00;
- server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE;
- } else {
- server->max_rw = 0;/* do not need to use raw anyway */
- server->capabilities = CAP_MPX_MODE;
- }
- tmp = (__s16)le16_to_cpu(rsp->ServerTimeZone);
- if (tmp == -1) {
- /* OS/2 often does not set timezone therefore
- * we must use server time to calc time zone.
- * Could deviate slightly from the right zone.
- * Smallest defined timezone difference is 15 minutes
- * (i.e. Nepal). Rounding up/down is done to match
- * this requirement.
- */
- int val, seconds, remain, result;
- struct timespec64 ts;
- time64_t utc = ktime_get_real_seconds();
- ts = cnvrtDosUnixTm(rsp->SrvTime.Date,
- rsp->SrvTime.Time, 0);
- cifs_dbg(FYI, "SrvTime %lld sec since 1970 (utc: %lld) diff: %lld\n",
- ts.tv_sec, utc,
- utc - ts.tv_sec);
- val = (int)(utc - ts.tv_sec);
- seconds = abs(val);
- result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ;
- remain = seconds % MIN_TZ_ADJ;
- if (remain >= (MIN_TZ_ADJ / 2))
- result += MIN_TZ_ADJ;
- if (val < 0)
- result = -result;
- server->timeAdj = result;
- } else {
- server->timeAdj = (int)tmp;
- server->timeAdj *= 60; /* also in seconds */
- }
- cifs_dbg(FYI, "server->timeAdj: %d seconds\n", server->timeAdj);
-
-
- /* BB get server time for time conversions and add
- code to use it and timezone since this is not UTC */
-
- if (rsp->EncryptionKeyLength ==
- cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) {
- memcpy(server->cryptkey, rsp->EncryptionKey,
- CIFS_CRYPTO_KEY_SIZE);
- } else if (server->sec_mode & SECMODE_PW_ENCRYPT) {
- return -EIO; /* need cryptkey unless plain text */
- }
-
- cifs_dbg(FYI, "LANMAN negotiated\n");
- return 0;
-}
-#else
-static inline int
-decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr)
-{
- cifs_dbg(VFS, "mount failed, cifs module not built with CIFS_WEAK_PW_HASH support\n");
- return -EOPNOTSUPP;
-}
-#endif
-
static bool
should_set_ext_sec_flag(enum securityEnum sectype)
{
@@ -626,16 +527,12 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses)
server->dialect = le16_to_cpu(pSMBr->DialectIndex);
cifs_dbg(FYI, "Dialect: %d\n", server->dialect);
/* Check wct = 1 error case */
- if ((pSMBr->hdr.WordCount < 13) || (server->dialect == BAD_PROT)) {
+ if ((pSMBr->hdr.WordCount <= 13) || (server->dialect == BAD_PROT)) {
/* core returns wct = 1, but we do not ask for core - otherwise
small wct just comes when dialect index is -1 indicating we
could not negotiate a common dialect */
rc = -EOPNOTSUPP;
goto neg_err_exit;
- } else if (pSMBr->hdr.WordCount == 13) {
- server->negflavor = CIFS_NEGFLAVOR_LANMAN;
- rc = decode_lanman_negprot_rsp(server, pSMBr);
- goto signing_check;
} else if (pSMBr->hdr.WordCount != 17) {
/* unknown wct */
rc = -EOPNOTSUPP;
@@ -677,7 +574,6 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses)
server->capabilities &= ~CAP_EXTENDED_SECURITY;
}
-signing_check:
if (!rc)
rc = cifs_enable_signing(server, ses->sign);
neg_err_exit:
@@ -2101,6 +1997,7 @@ cifs_writev_complete(struct work_struct *work)
else if (wdata->result < 0)
SetPageError(page);
end_page_writeback(page);
+ cifs_readpage_to_fscache(inode, page);
put_page(page);
}
if (wdata->result != -EAGAIN)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 3781eee9360a..0db344807ef1 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3684,38 +3684,6 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
*bcc_ptr = 0; /* password is null byte */
bcc_ptr++; /* skip password */
/* already aligned so no need to do it below */
- } else {
- pSMB->PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
- /* BB FIXME add code to fail this if NTLMv2 or Kerberos
- specified as required (when that support is added to
- the vfs in the future) as only NTLM or the much
- weaker LANMAN (which we do not send by default) is accepted
- by Samba (not sure whether other servers allow
- NTLMv2 password here) */
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
- if ((global_secflags & CIFSSEC_MAY_LANMAN) &&
- (ses->sectype == LANMAN))
- calc_lanman_hash(tcon->password, ses->server->cryptkey,
- ses->server->sec_mode &
- SECMODE_PW_ENCRYPT ? true : false,
- bcc_ptr);
- else
-#endif /* CIFS_WEAK_PW_HASH */
- rc = SMBNTencrypt(tcon->password, ses->server->cryptkey,
- bcc_ptr, nls_codepage);
- if (rc) {
- cifs_dbg(FYI, "%s Can't generate NTLM rsp. Error: %d\n",
- __func__, rc);
- cifs_buf_release(smb_buffer);
- return rc;
- }
-
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
- if (ses->capabilities & CAP_UNICODE) {
- /* must align unicode strings */
- *bcc_ptr = 0; /* null byte password */
- bcc_ptr++;
- }
}
if (ses->server->sign)
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 79402ca0ddfa..5f8a302ffcb2 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -100,7 +100,7 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page,
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
pplen = cifs_sb->prepath ? strlen(cifs_sb->prepath) + 1 : 0;
- s = dentry_path_raw(direntry, page, PAGE_SIZE);
+ s = dentry_path_raw(direntry, page, PATH_MAX);
if (IS_ERR(s))
return s;
if (!s[1]) // for root we want "", not "/"
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0a72840a88f1..d0216472f1c6 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -377,6 +377,8 @@ static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
struct cifsLockInfo *li, *tmp;
struct super_block *sb = inode->i_sb;
+ cifs_fscache_release_inode_cookie(inode);
+
/*
* Delete any outstanding lock records. We'll lose them when the file
* is closed anyway.
@@ -882,8 +884,10 @@ int cifs_close(struct inode *inode, struct file *file)
if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
cinode->lease_granted &&
dclose) {
- if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags))
+ if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
inode->i_ctime = inode->i_mtime = current_time(inode);
+ cifs_fscache_update_inode_cookie(inode);
+ }
spin_lock(&cinode->deferred_lock);
cifs_add_deferred_close(cfile, dclose);
if (cfile->deferred_close_scheduled &&
@@ -4170,6 +4174,10 @@ static vm_fault_t
cifs_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
+ struct file *file = vmf->vma->vm_file;
+ struct inode *inode = file_inode(file);
+
+ cifs_fscache_wait_on_page_write(inode, page);
lock_page(page);
return VM_FAULT_LOCKED;
@@ -4235,13 +4243,16 @@ cifs_readv_complete(struct work_struct *work)
(rdata->result == -EAGAIN && got_bytes)) {
flush_dcache_page(page);
SetPageUptodate(page);
- }
+ } else
+ SetPageError(page);
unlock_page(page);
if (rdata->result == 0 ||
(rdata->result == -EAGAIN && got_bytes))
cifs_readpage_to_fscache(rdata->mapping->host, page);
+ else
+ cifs_fscache_uncache_page(rdata->mapping->host, page);
got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
@@ -4848,34 +4859,33 @@ void cifs_oplock_break(struct work_struct *work)
oplock_break_ack:
/*
- * releasing stale oplock after recent reconnect of smb session using
- * a now incorrect file handle is not a data integrity issue but do
- * not bother sending an oplock release if session to server still is
- * disconnected since oplock already released by the server
- */
- if (!cfile->oplock_break_cancelled) {
- rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
- cinode);
- cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
- }
- /*
* When oplock break is received and there are no active
* file handles but cached, then schedule deferred close immediately.
* So, new open will not use cached handle.
*/
spin_lock(&CIFS_I(inode)->deferred_lock);
is_deferred = cifs_is_deferred_close(cfile, &dclose);
+ spin_unlock(&CIFS_I(inode)->deferred_lock);
if (is_deferred &&
cfile->deferred_close_scheduled &&
delayed_work_pending(&cfile->deferred)) {
- /*
- * If there is no pending work, mod_delayed_work queues new work.
- * So, Increase the ref count to avoid use-after-free.
- */
- if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0))
- cifsFileInfo_get(cfile);
+ if (cancel_delayed_work(&cfile->deferred)) {
+ _cifsFileInfo_put(cfile, false, false);
+ goto oplock_break_done;
+ }
}
- spin_unlock(&CIFS_I(inode)->deferred_lock);
+ /*
+ * releasing stale oplock after recent reconnect of smb session using
+ * a now incorrect file handle is not a data integrity issue but do
+ * not bother sending an oplock release if session to server still is
+ * disconnected since oplock already released by the server
+ */
+ if (!cfile->oplock_break_cancelled) {
+ rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
+ cinode);
+ cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
+ }
+oplock_break_done:
_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
cifs_done_oplock_break(cinode);
}
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index eed59bc1d913..3109def8e199 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -57,12 +57,9 @@ static const match_table_t cifs_secflavor_tokens = {
{ Opt_sec_krb5p, "krb5p" },
{ Opt_sec_ntlmsspi, "ntlmsspi" },
{ Opt_sec_ntlmssp, "ntlmssp" },
- { Opt_ntlm, "ntlm" },
- { Opt_sec_ntlmi, "ntlmi" },
{ Opt_sec_ntlmv2, "nontlm" },
{ Opt_sec_ntlmv2, "ntlmv2" },
{ Opt_sec_ntlmv2i, "ntlmv2i" },
- { Opt_sec_lanman, "lanman" },
{ Opt_sec_none, "none" },
{ Opt_sec_err, NULL }
@@ -221,23 +218,12 @@ cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_c
case Opt_sec_ntlmssp:
ctx->sectype = RawNTLMSSP;
break;
- case Opt_sec_ntlmi:
- ctx->sign = true;
- fallthrough;
- case Opt_ntlm:
- ctx->sectype = NTLM;
- break;
case Opt_sec_ntlmv2i:
ctx->sign = true;
fallthrough;
case Opt_sec_ntlmv2:
ctx->sectype = NTLMv2;
break;
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
- case Opt_sec_lanman:
- ctx->sectype = LANMAN;
- break;
-#endif
case Opt_sec_none:
ctx->nullauth = 1;
break;
@@ -1266,10 +1252,17 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
ctx->posix_paths = 1;
break;
case Opt_unix:
- if (result.negated)
+ if (result.negated) {
+ if (ctx->linux_ext == 1)
+ pr_warn_once("conflicting posix mount options specified\n");
ctx->linux_ext = 0;
- else
ctx->no_linux_ext = 1;
+ } else {
+ if (ctx->no_linux_ext == 1)
+ pr_warn_once("conflicting posix mount options specified\n");
+ ctx->linux_ext = 1;
+ ctx->no_linux_ext = 0;
+ }
break;
case Opt_nocase:
ctx->nocase = 1;
diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h
index b6243972edf3..a42ba71d7a81 100644
--- a/fs/cifs/fs_context.h
+++ b/fs/cifs/fs_context.h
@@ -47,11 +47,8 @@ enum cifs_sec_param {
Opt_sec_krb5p,
Opt_sec_ntlmsspi,
Opt_sec_ntlmssp,
- Opt_ntlm,
- Opt_sec_ntlmi,
Opt_sec_ntlmv2,
Opt_sec_ntlmv2i,
- Opt_sec_lanman,
Opt_sec_none,
Opt_sec_err
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index dd625033cd6b..fab47fa7df74 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -176,29 +176,34 @@ void cifs_fscache_release_inode_cookie(struct inode *inode)
auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
+ /* fscache_relinquish_cookie does not seem to update auxdata */
+ fscache_update_cookie(cifsi->fscache, &auxdata);
fscache_relinquish_cookie(cifsi->fscache, &auxdata, false);
cifsi->fscache = NULL;
}
}
-static void cifs_fscache_disable_inode_cookie(struct inode *inode)
+void cifs_fscache_update_inode_cookie(struct inode *inode)
{
+ struct cifs_fscache_inode_auxdata auxdata;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
if (cifsi->fscache) {
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.eof = cifsi->server_eof;
+ auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
+ auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
+ auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
+ auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
+
cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
- fscache_uncache_all_inode_pages(cifsi->fscache, inode);
- fscache_relinquish_cookie(cifsi->fscache, NULL, true);
- cifsi->fscache = NULL;
+ fscache_update_cookie(cifsi->fscache, &auxdata);
}
}
void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
{
- if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
- cifs_fscache_disable_inode_cookie(inode);
- else
- cifs_fscache_enable_inode_cookie(inode);
+ cifs_fscache_enable_inode_cookie(inode);
}
void cifs_fscache_reset_inode_cookie(struct inode *inode)
@@ -310,6 +315,8 @@ void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
struct cifsInodeInfo *cifsi = CIFS_I(inode);
int ret;
+ WARN_ON(!cifsi->fscache);
+
cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n",
__func__, cifsi->fscache, page, inode);
ret = fscache_write_page(cifsi->fscache, page,
@@ -334,3 +341,21 @@ void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
fscache_wait_on_page_write(cookie, page);
fscache_uncache_page(cookie, page);
}
+
+void __cifs_fscache_wait_on_page_write(struct inode *inode, struct page *page)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct fscache_cookie *cookie = cifsi->fscache;
+
+ cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie);
+ fscache_wait_on_page_write(cookie, page);
+}
+
+void __cifs_fscache_uncache_page(struct inode *inode, struct page *page)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct fscache_cookie *cookie = cifsi->fscache;
+
+ cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie);
+ fscache_uncache_page(cookie, page);
+}
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index 3d55cb2ef055..82e856b9cf89 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -55,10 +55,13 @@ extern void cifs_fscache_get_super_cookie(struct cifs_tcon *);
extern void cifs_fscache_release_super_cookie(struct cifs_tcon *);
extern void cifs_fscache_release_inode_cookie(struct inode *);
+extern void cifs_fscache_update_inode_cookie(struct inode *inode);
extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *);
extern void cifs_fscache_reset_inode_cookie(struct inode *);
extern void __cifs_fscache_invalidate_page(struct page *, struct inode *);
+extern void __cifs_fscache_wait_on_page_write(struct inode *inode, struct page *page);
+extern void __cifs_fscache_uncache_page(struct inode *inode, struct page *page);
extern int cifs_fscache_release_page(struct page *page, gfp_t gfp);
extern int __cifs_readpage_from_fscache(struct inode *, struct page *);
extern int __cifs_readpages_from_fscache(struct inode *,
@@ -76,6 +79,20 @@ static inline void cifs_fscache_invalidate_page(struct page *page,
__cifs_fscache_invalidate_page(page, inode);
}
+static inline void cifs_fscache_wait_on_page_write(struct inode *inode,
+ struct page *page)
+{
+ if (PageFsCache(page))
+ __cifs_fscache_wait_on_page_write(inode, page);
+}
+
+static inline void cifs_fscache_uncache_page(struct inode *inode,
+ struct page *page)
+{
+ if (PageFsCache(page))
+ __cifs_fscache_uncache_page(inode, page);
+}
+
static inline int cifs_readpage_from_fscache(struct inode *inode,
struct page *page)
{
@@ -123,6 +140,7 @@ static inline void
cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) {}
static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
+static inline void cifs_fscache_update_inode_cookie(struct inode *inode) {}
static inline void cifs_fscache_set_inode_cookie(struct inode *inode,
struct file *filp) {}
static inline void cifs_fscache_reset_inode_cookie(struct inode *inode) {}
@@ -133,6 +151,11 @@ static inline int cifs_fscache_release_page(struct page *page, gfp_t gfp)
static inline void cifs_fscache_invalidate_page(struct page *page,
struct inode *inode) {}
+static inline void cifs_fscache_wait_on_page_write(struct inode *inode,
+ struct page *page) {}
+static inline void cifs_fscache_uncache_page(struct inode *inode,
+ struct page *page) {}
+
static inline int
cifs_readpage_from_fscache(struct inode *inode, struct page *page)
{
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b96b253e7635..50c01cff4c84 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1625,7 +1625,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
goto unlink_out;
}
- cifs_close_all_deferred_files(tcon);
+ cifs_close_deferred_file(CIFS_I(inode));
if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
rc = CIFSPOSIXDelFile(xid, tcon, full_path,
@@ -2084,6 +2084,7 @@ cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir,
FILE_UNIX_BASIC_INFO *info_buf_target;
unsigned int xid;
int rc, tmprc;
+ int retry_count = 0;
if (flags & ~RENAME_NOREPLACE)
return -EINVAL;
@@ -2113,10 +2114,24 @@ cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir,
goto cifs_rename_exit;
}
- cifs_close_all_deferred_files(tcon);
+ cifs_close_deferred_file(CIFS_I(d_inode(source_dentry)));
+ if (d_inode(target_dentry) != NULL)
+ cifs_close_deferred_file(CIFS_I(d_inode(target_dentry)));
+
rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry,
to_name);
+ if (rc == -EACCES) {
+ while (retry_count < 3) {
+ cifs_close_all_deferred_files(tcon);
+ rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry,
+ to_name);
+ if (rc != -EACCES)
+ break;
+ retry_count++;
+ }
+ }
+
/*
* No-replace is the natural behavior for CIFS, so skip unlink hacks.
*/
@@ -2282,6 +2297,7 @@ cifs_revalidate_mapping(struct inode *inode)
{
int rc;
unsigned long *flags = &CIFS_I(inode)->flags;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
/* swapfiles are not supposed to be shared */
if (IS_SWAPFILE(inode))
@@ -2293,11 +2309,16 @@ cifs_revalidate_mapping(struct inode *inode)
return rc;
if (test_and_clear_bit(CIFS_INO_INVALID_MAPPING, flags)) {
+ /* for cache=singleclient, do not invalidate */
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE)
+ goto skip_invalidate;
+
rc = cifs_invalidate_mapping(inode);
if (rc)
set_bit(CIFS_INO_INVALID_MAPPING, flags);
}
+skip_invalidate:
clear_bit_unlock(CIFS_INO_LOCK, flags);
smp_mb__after_atomic();
wake_up_bit(flags, CIFS_INO_LOCK);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 844abeb2b48f..9469f1cf0b46 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -723,13 +723,31 @@ void
cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode)
{
struct cifsFileInfo *cfile = NULL;
- struct cifs_deferred_close *dclose;
+ struct file_list *tmp_list, *tmp_next_list;
+ struct list_head file_head;
+
+ if (cifs_inode == NULL)
+ return;
+ INIT_LIST_HEAD(&file_head);
+ spin_lock(&cifs_inode->open_file_lock);
list_for_each_entry(cfile, &cifs_inode->openFileList, flist) {
- spin_lock(&cifs_inode->deferred_lock);
- if (cifs_is_deferred_close(cfile, &dclose))
- mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
- spin_unlock(&cifs_inode->deferred_lock);
+ if (delayed_work_pending(&cfile->deferred)) {
+ if (cancel_delayed_work(&cfile->deferred)) {
+ tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC);
+ if (tmp_list == NULL)
+ continue;
+ tmp_list->cfile = cfile;
+ list_add_tail(&tmp_list->list, &file_head);
+ }
+ }
+ }
+ spin_unlock(&cifs_inode->open_file_lock);
+
+ list_for_each_entry_safe(tmp_list, tmp_next_list, &file_head, list) {
+ _cifsFileInfo_put(tmp_list->cfile, true, false);
+ list_del(&tmp_list->list);
+ kfree(tmp_list);
}
}
@@ -738,20 +756,30 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon)
{
struct cifsFileInfo *cfile;
struct list_head *tmp;
+ struct file_list *tmp_list, *tmp_next_list;
+ struct list_head file_head;
+ INIT_LIST_HEAD(&file_head);
spin_lock(&tcon->open_file_lock);
list_for_each(tmp, &tcon->openFileList) {
cfile = list_entry(tmp, struct cifsFileInfo, tlist);
if (delayed_work_pending(&cfile->deferred)) {
- /*
- * If there is no pending work, mod_delayed_work queues new work.
- * So, Increase the ref count to avoid use-after-free.
- */
- if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0))
- cifsFileInfo_get(cfile);
+ if (cancel_delayed_work(&cfile->deferred)) {
+ tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC);
+ if (tmp_list == NULL)
+ continue;
+ tmp_list->cfile = cfile;
+ list_add_tail(&tmp_list->list, &file_head);
+ }
}
}
spin_unlock(&tcon->open_file_lock);
+
+ list_for_each_entry_safe(tmp_list, tmp_next_list, &file_head, list) {
+ _cifsFileInfo_put(tmp_list->cfile, true, false);
+ list_del(&tmp_list->list);
+ kfree(tmp_list);
+ }
}
/* parses DFS refferal V3 structure
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index bfee176b901d..54d77c99e21c 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -369,7 +369,7 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb,
*/
static int
-initiate_cifs_search(const unsigned int xid, struct file *file,
+_initiate_cifs_search(const unsigned int xid, struct file *file,
const char *full_path)
{
__u16 search_flags;
@@ -451,6 +451,27 @@ error_exit:
return rc;
}
+static int
+initiate_cifs_search(const unsigned int xid, struct file *file,
+ const char *full_path)
+{
+ int rc, retry_count = 0;
+
+ do {
+ rc = _initiate_cifs_search(xid, file, full_path);
+ /*
+ * If we don't have enough credits to start reading the
+ * directory just try again after short wait.
+ */
+ if (rc != -EDEADLK)
+ break;
+
+ usleep_range(512, 2048);
+ } while (retry_count++ < 5);
+
+ return rc;
+}
+
/* return length of unicode string in bytes */
static int cifs_unicode_bytelen(const char *str)
{
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index c5785fd3f52e..118403fbeda2 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -799,30 +799,16 @@ cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
}
case CIFS_NEGFLAVOR_UNENCAP:
switch (requested) {
- case NTLM:
case NTLMv2:
return requested;
case Unspecified:
if (global_secflags & CIFSSEC_MAY_NTLMV2)
return NTLMv2;
- if (global_secflags & CIFSSEC_MAY_NTLM)
- return NTLM;
break;
default:
break;
}
- fallthrough; /* to attempt LANMAN authentication next */
- case CIFS_NEGFLAVOR_LANMAN:
- switch (requested) {
- case LANMAN:
- return requested;
- case Unspecified:
- if (global_secflags & CIFSSEC_MAY_LANMAN)
- return LANMAN;
- fallthrough;
- default:
- return Unspecified;
- }
+ fallthrough;
default:
return Unspecified;
}
@@ -877,7 +863,7 @@ sess_alloc_buffer(struct sess_data *sess_data, int wct)
return 0;
out_free_smb_buf:
- kfree(smb_buf);
+ cifs_small_buf_release(smb_buf);
sess_data->iov[0].iov_base = NULL;
sess_data->iov[0].iov_len = 0;
sess_data->buf0_type = CIFS_NO_BUFFER;
@@ -947,230 +933,6 @@ sess_sendreceive(struct sess_data *sess_data)
return rc;
}
-/*
- * LANMAN and plaintext are less secure and off by default.
- * So we make this explicitly be turned on in kconfig (in the
- * build) and turned on at runtime (changed from the default)
- * in proc/fs/cifs or via mount parm. Unfortunately this is
- * needed for old Win (e.g. Win95), some obscure NAS and OS/2
- */
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-static void
-sess_auth_lanman(struct sess_data *sess_data)
-{
- int rc = 0;
- struct smb_hdr *smb_buf;
- SESSION_SETUP_ANDX *pSMB;
- char *bcc_ptr;
- struct cifs_ses *ses = sess_data->ses;
- char lnm_session_key[CIFS_AUTH_RESP_SIZE];
- __u16 bytes_remaining;
-
- /* lanman 2 style sessionsetup */
- /* wct = 10 */
- rc = sess_alloc_buffer(sess_data, 10);
- if (rc)
- goto out;
-
- pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
- bcc_ptr = sess_data->iov[2].iov_base;
- (void)cifs_ssetup_hdr(ses, pSMB);
-
- pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
-
- if (ses->user_name != NULL) {
- /* no capabilities flags in old lanman negotiation */
- pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
-
- /* Calculate hash with password and copy into bcc_ptr.
- * Encryption Key (stored as in cryptkey) gets used if the
- * security mode bit in Negotiate Protocol response states
- * to use challenge/response method (i.e. Password bit is 1).
- */
- rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
- ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
- true : false, lnm_session_key);
- if (rc)
- goto out;
-
- memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
- } else {
- pSMB->old_req.PasswordLength = 0;
- }
-
- /*
- * can not sign if LANMAN negotiated so no need
- * to calculate signing key? but what if server
- * changed to do higher than lanman dialect and
- * we reconnected would we ever calc signing_key?
- */
-
- cifs_dbg(FYI, "Negotiating LANMAN setting up strings\n");
- /* Unicode not allowed for LANMAN dialects */
- ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
-
- sess_data->iov[2].iov_len = (long) bcc_ptr -
- (long) sess_data->iov[2].iov_base;
-
- rc = sess_sendreceive(sess_data);
- if (rc)
- goto out;
-
- pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
- smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
-
- /* lanman response has a word count of 3 */
- if (smb_buf->WordCount != 3) {
- rc = -EIO;
- cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
- goto out;
- }
-
- if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
- cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
-
- ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
- cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
-
- bytes_remaining = get_bcc(smb_buf);
- bcc_ptr = pByteArea(smb_buf);
-
- /* BB check if Unicode and decode strings */
- if (bytes_remaining == 0) {
- /* no string area to decode, do nothing */
- } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
- /* unicode string area must be word-aligned */
- if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
- ++bcc_ptr;
- --bytes_remaining;
- }
- decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
- sess_data->nls_cp);
- } else {
- decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
- sess_data->nls_cp);
- }
-
- rc = sess_establish_session(sess_data);
-out:
- sess_data->result = rc;
- sess_data->func = NULL;
- sess_free_buffer(sess_data);
-}
-
-#endif
-
-static void
-sess_auth_ntlm(struct sess_data *sess_data)
-{
- int rc = 0;
- struct smb_hdr *smb_buf;
- SESSION_SETUP_ANDX *pSMB;
- char *bcc_ptr;
- struct cifs_ses *ses = sess_data->ses;
- __u32 capabilities;
- __u16 bytes_remaining;
-
- /* old style NTLM sessionsetup */
- /* wct = 13 */
- rc = sess_alloc_buffer(sess_data, 13);
- if (rc)
- goto out;
-
- pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
- bcc_ptr = sess_data->iov[2].iov_base;
- capabilities = cifs_ssetup_hdr(ses, pSMB);
-
- pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
- if (ses->user_name != NULL) {
- pSMB->req_no_secext.CaseInsensitivePasswordLength =
- cpu_to_le16(CIFS_AUTH_RESP_SIZE);
- pSMB->req_no_secext.CaseSensitivePasswordLength =
- cpu_to_le16(CIFS_AUTH_RESP_SIZE);
-
- /* calculate ntlm response and session key */
- rc = setup_ntlm_response(ses, sess_data->nls_cp);
- if (rc) {
- cifs_dbg(VFS, "Error %d during NTLM authentication\n",
- rc);
- goto out;
- }
-
- /* copy ntlm response */
- memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
- CIFS_AUTH_RESP_SIZE);
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
- memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
- CIFS_AUTH_RESP_SIZE);
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
- } else {
- pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
- pSMB->req_no_secext.CaseSensitivePasswordLength = 0;
- }
-
- if (ses->capabilities & CAP_UNICODE) {
- /* unicode strings must be word aligned */
- if (sess_data->iov[0].iov_len % 2) {
- *bcc_ptr = 0;
- bcc_ptr++;
- }
- unicode_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
- } else {
- ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
- }
-
-
- sess_data->iov[2].iov_len = (long) bcc_ptr -
- (long) sess_data->iov[2].iov_base;
-
- rc = sess_sendreceive(sess_data);
- if (rc)
- goto out;
-
- pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
- smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
-
- if (smb_buf->WordCount != 3) {
- rc = -EIO;
- cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
- goto out;
- }
-
- if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
- cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
-
- ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
- cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
-
- bytes_remaining = get_bcc(smb_buf);
- bcc_ptr = pByteArea(smb_buf);
-
- /* BB check if Unicode and decode strings */
- if (bytes_remaining == 0) {
- /* no string area to decode, do nothing */
- } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
- /* unicode string area must be word-aligned */
- if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
- ++bcc_ptr;
- --bytes_remaining;
- }
- decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
- sess_data->nls_cp);
- } else {
- decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
- sess_data->nls_cp);
- }
-
- rc = sess_establish_session(sess_data);
-out:
- sess_data->result = rc;
- sess_data->func = NULL;
- sess_free_buffer(sess_data);
- kfree(ses->auth_key.response);
- ses->auth_key.response = NULL;
-}
-
static void
sess_auth_ntlmv2(struct sess_data *sess_data)
{
@@ -1675,21 +1437,6 @@ static int select_sec(struct cifs_ses *ses, struct sess_data *sess_data)
}
switch (type) {
- case LANMAN:
- /* LANMAN and plaintext are less secure and off by default.
- * So we make this explicitly be turned on in kconfig (in the
- * build) and turned on at runtime (changed from the default)
- * in proc/fs/cifs or via mount parm. Unfortunately this is
- * needed for old Win (e.g. Win95), some obscure NAS and OS/2 */
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
- sess_data->func = sess_auth_lanman;
- break;
-#else
- return -EOPNOTSUPP;
-#endif
- case NTLM:
- sess_data->func = sess_auth_ntlm;
- break;
case NTLMv2:
sess_data->func = sess_auth_ntlmv2;
break;
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index cea39bcecbab..181514b8770d 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: LGPL-2.1
/*
- * fs/smb2/smb2maperror.c
*
* Functions which do error mapping of SMB2 status codes to POSIX errors
*
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 2dfd0d8297eb..ddc0e8f97872 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3590,6 +3590,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
return rc;
}
+ filemap_invalidate_lock(inode->i_mapping);
/*
* We implement the punch hole through ioctl, so we need remove the page
* caches first, otherwise the data may be inconsistent with the server.
@@ -3607,6 +3608,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
sizeof(struct file_zero_data_information),
CIFSMaxBufSize, NULL, NULL);
free_xid(xid);
+ filemap_invalidate_unlock(inode->i_mapping);
return rc;
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 781d14e5f2af..b6d2e3591927 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2426,7 +2426,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len)
memcpy(aclptr, &acl, sizeof(struct cifs_acl));
buf->ccontext.DataLength = cpu_to_le32(ptr - (__u8 *)&buf->sd);
- *len = ptr - (__u8 *)buf;
+ *len = roundup(ptr - (__u8 *)buf, 8);
return buf;
}
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 39a938443e3e..10047cc55286 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -18,13 +18,13 @@
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/random.h>
-#include <crypto/des.h>
#include "cifs_fs_sb.h"
#include "cifs_unicode.h"
#include "cifspdu.h"
#include "cifsglob.h"
#include "cifs_debug.h"
#include "cifsproto.h"
+#include "../cifs_common/md4.h"
#ifndef false
#define false 0
@@ -38,126 +38,29 @@
#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8)
#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val)))
-static void
-str_to_key(unsigned char *str, unsigned char *key)
-{
- int i;
-
- key[0] = str[0] >> 1;
- key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
- key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
- key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
- key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
- key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
- key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
- key[7] = str[6] & 0x7F;
- for (i = 0; i < 8; i++)
- key[i] = (key[i] << 1);
-}
-
-static int
-smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
-{
- unsigned char key2[8];
- struct des_ctx ctx;
-
- str_to_key(key, key2);
-
- if (fips_enabled) {
- cifs_dbg(VFS, "FIPS compliance enabled: DES not permitted\n");
- return -ENOENT;
- }
-
- des_expand_key(&ctx, key2, DES_KEY_SIZE);
- des_encrypt(&ctx, out, in);
- memzero_explicit(&ctx, sizeof(ctx));
-
- return 0;
-}
-
-static int
-E_P16(unsigned char *p14, unsigned char *p16)
-{
- int rc;
- unsigned char sp8[8] =
- { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
-
- rc = smbhash(p16, sp8, p14);
- if (rc)
- return rc;
- rc = smbhash(p16 + 8, sp8, p14 + 7);
- return rc;
-}
-
-static int
-E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
-{
- int rc;
-
- rc = smbhash(p24, c8, p21);
- if (rc)
- return rc;
- rc = smbhash(p24 + 8, c8, p21 + 7);
- if (rc)
- return rc;
- rc = smbhash(p24 + 16, c8, p21 + 14);
- return rc;
-}
-
/* produce a md4 message digest from data of length n bytes */
-int
+static int
mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
{
int rc;
- struct crypto_shash *md4 = NULL;
- struct sdesc *sdescmd4 = NULL;
-
- rc = cifs_alloc_hash("md4", &md4, &sdescmd4);
- if (rc)
- goto mdfour_err;
+ struct md4_ctx mctx;
- rc = crypto_shash_init(&sdescmd4->shash);
+ rc = cifs_md4_init(&mctx);
if (rc) {
- cifs_dbg(VFS, "%s: Could not init md4 shash\n", __func__);
+ cifs_dbg(VFS, "%s: Could not init MD4\n", __func__);
goto mdfour_err;
}
- rc = crypto_shash_update(&sdescmd4->shash, link_str, link_len);
+ rc = cifs_md4_update(&mctx, link_str, link_len);
if (rc) {
- cifs_dbg(VFS, "%s: Could not update with link_str\n", __func__);
+ cifs_dbg(VFS, "%s: Could not update MD4\n", __func__);
goto mdfour_err;
}
- rc = crypto_shash_final(&sdescmd4->shash, md4_hash);
+ rc = cifs_md4_final(&mctx, md4_hash);
if (rc)
- cifs_dbg(VFS, "%s: Could not generate md4 hash\n", __func__);
-
-mdfour_err:
- cifs_free_hash(&md4, &sdescmd4);
- return rc;
-}
-
-/*
- This implements the X/Open SMB password encryption
- It takes a password, a 8 byte "crypt key" and puts 24 bytes of
- encrypted password into p24 */
-/* Note that password must be uppercased and null terminated */
-int
-SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24)
-{
- int rc;
- unsigned char p14[14], p16[16], p21[21];
+ cifs_dbg(VFS, "%s: Could not finalize MD4\n", __func__);
- memset(p14, '\0', 14);
- memset(p16, '\0', 16);
- memset(p21, '\0', 21);
-
- memcpy(p14, passwd, 14);
- rc = E_P16(p14, p16);
- if (rc)
- return rc;
-
- memcpy(p21, p16, 16);
- rc = E_P24(p21, c8, p24);
+mdfour_err:
return rc;
}
@@ -186,25 +89,3 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16,
return rc;
}
-
-/* Does the NT MD4 hash then des encryption. */
-int
-SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24,
- const struct nls_table *codepage)
-{
- int rc;
- unsigned char p16[16], p21[21];
-
- memset(p16, '\0', 16);
- memset(p21, '\0', 21);
-
- rc = E_md4hash(passwd, p16, codepage);
- if (rc) {
- cifs_dbg(FYI, "%s Can't generate NT hash, error: %d\n",
- __func__, rc);
- return rc;
- }
- memcpy(p21, p16, 16);
- rc = E_P24(p21, c8, p24);
- return rc;
-}
diff --git a/fs/cifs_common/Makefile b/fs/cifs_common/Makefile
new file mode 100644
index 000000000000..6fedd2f88a25
--- /dev/null
+++ b/fs/cifs_common/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for Linux filesystem routines that are shared by client and server.
+#
+
+obj-$(CONFIG_CIFS_COMMON) += cifs_arc4.o
+obj-$(CONFIG_CIFS_COMMON) += cifs_md4.o
diff --git a/fs/cifs_common/arc4.h b/fs/cifs_common/arc4.h
new file mode 100644
index 000000000000..12e71ec033a1
--- /dev/null
+++ b/fs/cifs_common/arc4.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Common values for ARC4 Cipher Algorithm
+ */
+
+#ifndef _CRYPTO_ARC4_H
+#define _CRYPTO_ARC4_H
+
+#include <linux/types.h>
+
+#define ARC4_MIN_KEY_SIZE 1
+#define ARC4_MAX_KEY_SIZE 256
+#define ARC4_BLOCK_SIZE 1
+
+struct arc4_ctx {
+ u32 S[256];
+ u32 x, y;
+};
+
+int cifs_arc4_setkey(struct arc4_ctx *ctx, const u8 *in_key, unsigned int key_len);
+void cifs_arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int len);
+
+#endif /* _CRYPTO_ARC4_H */
diff --git a/fs/cifs_common/cifs_arc4.c b/fs/cifs_common/cifs_arc4.c
new file mode 100644
index 000000000000..b964cc682944
--- /dev/null
+++ b/fs/cifs_common/cifs_arc4.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Cryptographic API
+ *
+ * ARC4 Cipher Algorithm
+ *
+ * Jon Oberheide <jon@oberheide.org>
+ */
+
+#include <linux/module.h>
+#include "arc4.h"
+
+MODULE_LICENSE("GPL");
+
+int cifs_arc4_setkey(struct arc4_ctx *ctx, const u8 *in_key, unsigned int key_len)
+{
+ int i, j = 0, k = 0;
+
+ ctx->x = 1;
+ ctx->y = 0;
+
+ for (i = 0; i < 256; i++)
+ ctx->S[i] = i;
+
+ for (i = 0; i < 256; i++) {
+ u32 a = ctx->S[i];
+
+ j = (j + in_key[k] + a) & 0xff;
+ ctx->S[i] = ctx->S[j];
+ ctx->S[j] = a;
+ if (++k >= key_len)
+ k = 0;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cifs_arc4_setkey);
+
+void cifs_arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int len)
+{
+ u32 *const S = ctx->S;
+ u32 x, y, a, b;
+ u32 ty, ta, tb;
+
+ if (len == 0)
+ return;
+
+ x = ctx->x;
+ y = ctx->y;
+
+ a = S[x];
+ y = (y + a) & 0xff;
+ b = S[y];
+
+ do {
+ S[y] = a;
+ a = (a + b) & 0xff;
+ S[x] = b;
+ x = (x + 1) & 0xff;
+ ta = S[x];
+ ty = (y + ta) & 0xff;
+ tb = S[ty];
+ *out++ = *in++ ^ S[a];
+ if (--len == 0)
+ break;
+ y = ty;
+ a = ta;
+ b = tb;
+ } while (true);
+
+ ctx->x = x;
+ ctx->y = y;
+}
+EXPORT_SYMBOL_GPL(cifs_arc4_crypt);
+
+static int __init
+init_cifs_common(void)
+{
+ return 0;
+}
+static void __init
+exit_cifs_common(void)
+{
+}
+
+module_init(init_cifs_common)
+module_exit(exit_cifs_common)
diff --git a/fs/cifs_common/cifs_md4.c b/fs/cifs_common/cifs_md4.c
new file mode 100644
index 000000000000..50f78cfc6ce9
--- /dev/null
+++ b/fs/cifs_common/cifs_md4.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Cryptographic API.
+ *
+ * MD4 Message Digest Algorithm (RFC1320).
+ *
+ * Implementation derived from Andrew Tridgell and Steve French's
+ * CIFS MD4 implementation, and the cryptoapi implementation
+ * originally based on the public domain implementation written
+ * by Colin Plumb in 1993.
+ *
+ * Copyright (c) Andrew Tridgell 1997-1998.
+ * Modified by Steve French (sfrench@us.ibm.com) 2002
+ * Copyright (c) Cryptoapi developers.
+ * Copyright (c) 2002 David S. Miller (davem@redhat.com)
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ *
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include "md4.h"
+
+MODULE_LICENSE("GPL");
+
+static inline u32 lshift(u32 x, unsigned int s)
+{
+ x &= 0xFFFFFFFF;
+ return ((x << s) & 0xFFFFFFFF) | (x >> (32 - s));
+}
+
+static inline u32 F(u32 x, u32 y, u32 z)
+{
+ return (x & y) | ((~x) & z);
+}
+
+static inline u32 G(u32 x, u32 y, u32 z)
+{
+ return (x & y) | (x & z) | (y & z);
+}
+
+static inline u32 H(u32 x, u32 y, u32 z)
+{
+ return x ^ y ^ z;
+}
+
+#define ROUND1(a,b,c,d,k,s) (a = lshift(a + F(b,c,d) + k, s))
+#define ROUND2(a,b,c,d,k,s) (a = lshift(a + G(b,c,d) + k + (u32)0x5A827999,s))
+#define ROUND3(a,b,c,d,k,s) (a = lshift(a + H(b,c,d) + k + (u32)0x6ED9EBA1,s))
+
+static void md4_transform(u32 *hash, u32 const *in)
+{
+ u32 a, b, c, d;
+
+ a = hash[0];
+ b = hash[1];
+ c = hash[2];
+ d = hash[3];
+
+ ROUND1(a, b, c, d, in[0], 3);
+ ROUND1(d, a, b, c, in[1], 7);
+ ROUND1(c, d, a, b, in[2], 11);
+ ROUND1(b, c, d, a, in[3], 19);
+ ROUND1(a, b, c, d, in[4], 3);
+ ROUND1(d, a, b, c, in[5], 7);
+ ROUND1(c, d, a, b, in[6], 11);
+ ROUND1(b, c, d, a, in[7], 19);
+ ROUND1(a, b, c, d, in[8], 3);
+ ROUND1(d, a, b, c, in[9], 7);
+ ROUND1(c, d, a, b, in[10], 11);
+ ROUND1(b, c, d, a, in[11], 19);
+ ROUND1(a, b, c, d, in[12], 3);
+ ROUND1(d, a, b, c, in[13], 7);
+ ROUND1(c, d, a, b, in[14], 11);
+ ROUND1(b, c, d, a, in[15], 19);
+
+ ROUND2(a, b, c, d, in[0], 3);
+ ROUND2(d, a, b, c, in[4], 5);
+ ROUND2(c, d, a, b, in[8], 9);
+ ROUND2(b, c, d, a, in[12], 13);
+ ROUND2(a, b, c, d, in[1], 3);
+ ROUND2(d, a, b, c, in[5], 5);
+ ROUND2(c, d, a, b, in[9], 9);
+ ROUND2(b, c, d, a, in[13], 13);
+ ROUND2(a, b, c, d, in[2], 3);
+ ROUND2(d, a, b, c, in[6], 5);
+ ROUND2(c, d, a, b, in[10], 9);
+ ROUND2(b, c, d, a, in[14], 13);
+ ROUND2(a, b, c, d, in[3], 3);
+ ROUND2(d, a, b, c, in[7], 5);
+ ROUND2(c, d, a, b, in[11], 9);
+ ROUND2(b, c, d, a, in[15], 13);
+
+ ROUND3(a, b, c, d, in[0], 3);
+ ROUND3(d, a, b, c, in[8], 9);
+ ROUND3(c, d, a, b, in[4], 11);
+ ROUND3(b, c, d, a, in[12], 15);
+ ROUND3(a, b, c, d, in[2], 3);
+ ROUND3(d, a, b, c, in[10], 9);
+ ROUND3(c, d, a, b, in[6], 11);
+ ROUND3(b, c, d, a, in[14], 15);
+ ROUND3(a, b, c, d, in[1], 3);
+ ROUND3(d, a, b, c, in[9], 9);
+ ROUND3(c, d, a, b, in[5], 11);
+ ROUND3(b, c, d, a, in[13], 15);
+ ROUND3(a, b, c, d, in[3], 3);
+ ROUND3(d, a, b, c, in[11], 9);
+ ROUND3(c, d, a, b, in[7], 11);
+ ROUND3(b, c, d, a, in[15], 15);
+
+ hash[0] += a;
+ hash[1] += b;
+ hash[2] += c;
+ hash[3] += d;
+}
+
+static inline void md4_transform_helper(struct md4_ctx *ctx)
+{
+ le32_to_cpu_array(ctx->block, ARRAY_SIZE(ctx->block));
+ md4_transform(ctx->hash, ctx->block);
+}
+
+int cifs_md4_init(struct md4_ctx *mctx)
+{
+ memset(mctx, 0, sizeof(struct md4_ctx));
+ mctx->hash[0] = 0x67452301;
+ mctx->hash[1] = 0xefcdab89;
+ mctx->hash[2] = 0x98badcfe;
+ mctx->hash[3] = 0x10325476;
+ mctx->byte_count = 0;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cifs_md4_init);
+
+int cifs_md4_update(struct md4_ctx *mctx, const u8 *data, unsigned int len)
+{
+ const u32 avail = sizeof(mctx->block) - (mctx->byte_count & 0x3f);
+
+ mctx->byte_count += len;
+
+ if (avail > len) {
+ memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
+ data, len);
+ return 0;
+ }
+
+ memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
+ data, avail);
+
+ md4_transform_helper(mctx);
+ data += avail;
+ len -= avail;
+
+ while (len >= sizeof(mctx->block)) {
+ memcpy(mctx->block, data, sizeof(mctx->block));
+ md4_transform_helper(mctx);
+ data += sizeof(mctx->block);
+ len -= sizeof(mctx->block);
+ }
+
+ memcpy(mctx->block, data, len);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cifs_md4_update);
+
+int cifs_md4_final(struct md4_ctx *mctx, u8 *out)
+{
+ const unsigned int offset = mctx->byte_count & 0x3f;
+ char *p = (char *)mctx->block + offset;
+ int padding = 56 - (offset + 1);
+
+ *p++ = 0x80;
+ if (padding < 0) {
+ memset(p, 0x00, padding + sizeof(u64));
+ md4_transform_helper(mctx);
+ p = (char *)mctx->block;
+ padding = 56;
+ }
+
+ memset(p, 0, padding);
+ mctx->block[14] = mctx->byte_count << 3;
+ mctx->block[15] = mctx->byte_count >> 29;
+ le32_to_cpu_array(mctx->block, (sizeof(mctx->block) -
+ sizeof(u64)) / sizeof(u32));
+ md4_transform(mctx->hash, mctx->block);
+ cpu_to_le32_array(mctx->hash, ARRAY_SIZE(mctx->hash));
+ memcpy(out, mctx->hash, sizeof(mctx->hash));
+ memset(mctx, 0, sizeof(*mctx));
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cifs_md4_final);
diff --git a/fs/cifs_common/md4.h b/fs/cifs_common/md4.h
new file mode 100644
index 000000000000..5337becc699a
--- /dev/null
+++ b/fs/cifs_common/md4.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Common values for ARC4 Cipher Algorithm
+ */
+
+#ifndef _CIFS_MD4_H
+#define _CIFS_MD4_H
+
+#include <linux/types.h>
+
+#define MD4_DIGEST_SIZE 16
+#define MD4_HMAC_BLOCK_SIZE 64
+#define MD4_BLOCK_WORDS 16
+#define MD4_HASH_WORDS 4
+
+struct md4_ctx {
+ u32 hash[MD4_HASH_WORDS];
+ u32 block[MD4_BLOCK_WORDS];
+ u64 byte_count;
+};
+
+
+int cifs_md4_init(struct md4_ctx *mctx);
+int cifs_md4_update(struct md4_ctx *mctx, const u8 *data, unsigned int len);
+int cifs_md4_final(struct md4_ctx *mctx, u8 *out);
+
+#endif /* _CIFS_MD4_H */
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 5a0be9985bae..0ad32150611e 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -177,28 +177,22 @@ out:
return retval;
}
-/* Fill [buffer, buffer + pos) with data coming from @from. */
-static int fill_write_buffer(struct configfs_buffer *buffer, loff_t pos,
+/* Fill @buffer with data coming from @from. */
+static int fill_write_buffer(struct configfs_buffer *buffer,
struct iov_iter *from)
{
- loff_t to_copy;
int copied;
- u8 *to;
if (!buffer->page)
buffer->page = (char *)__get_free_pages(GFP_KERNEL, 0);
if (!buffer->page)
return -ENOMEM;
- to_copy = SIMPLE_ATTR_SIZE - 1 - pos;
- if (to_copy <= 0)
- return 0;
- to = buffer->page + pos;
- copied = copy_from_iter(to, to_copy, from);
+ copied = copy_from_iter(buffer->page, SIMPLE_ATTR_SIZE - 1, from);
buffer->needs_read_fill = 1;
/* if buf is assumed to contain a string, terminate it by \0,
* so e.g. sscanf() can scan the string easily */
- to[copied] = 0;
+ buffer->page[copied] = 0;
return copied ? : -EFAULT;
}
@@ -227,10 +221,10 @@ static ssize_t configfs_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct configfs_buffer *buffer = file->private_data;
- ssize_t len;
+ int len;
mutex_lock(&buffer->mutex);
- len = fill_write_buffer(buffer, iocb->ki_pos, from);
+ len = fill_write_buffer(buffer, from);
if (len > 0)
len = flush_write_buffer(file, buffer, len);
if (len > 0)
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index d00455440d08..eb538c28df94 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -26,7 +26,7 @@
* it to find the directory entry again if requested. Naively, that would just
* mean using the ciphertext filenames. However, since the ciphertext filenames
* can contain illegal characters ('\0' and '/'), they must be encoded in some
- * way. We use base64. But that can cause names to exceed NAME_MAX (255
+ * way. We use base64url. But that can cause names to exceed NAME_MAX (255
* bytes), so we also need to use a strong hash to abbreviate long names.
*
* The filesystem may also need another kind of hash, the "dirhash", to quickly
@@ -38,7 +38,7 @@
* casefolded directories use this type of dirhash. At least in these cases,
* each no-key name must include the name's dirhash too.
*
- * To meet all these requirements, we base64-encode the following
+ * To meet all these requirements, we base64url-encode the following
* variable-length structure. It contains the dirhash, or 0's if the filesystem
* didn't provide one; up to 149 bytes of the ciphertext name; and for
* ciphertexts longer than 149 bytes, also the SHA-256 of the remaining bytes.
@@ -52,15 +52,19 @@ struct fscrypt_nokey_name {
u32 dirhash[2];
u8 bytes[149];
u8 sha256[SHA256_DIGEST_SIZE];
-}; /* 189 bytes => 252 bytes base64-encoded, which is <= NAME_MAX (255) */
+}; /* 189 bytes => 252 bytes base64url-encoded, which is <= NAME_MAX (255) */
/*
- * Decoded size of max-size nokey name, i.e. a name that was abbreviated using
+ * Decoded size of max-size no-key name, i.e. a name that was abbreviated using
* the strong hash and thus includes the 'sha256' field. This isn't simply
* sizeof(struct fscrypt_nokey_name), as the padding at the end isn't included.
*/
#define FSCRYPT_NOKEY_NAME_MAX offsetofend(struct fscrypt_nokey_name, sha256)
+/* Encoded size of max-size no-key name */
+#define FSCRYPT_NOKEY_NAME_MAX_ENCODED \
+ FSCRYPT_BASE64URL_CHARS(FSCRYPT_NOKEY_NAME_MAX)
+
static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
{
if (str->len == 1 && str->name[0] == '.')
@@ -175,62 +179,82 @@ static int fname_decrypt(const struct inode *inode,
return 0;
}
-static const char lookup_table[65] =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+static const char base64url_table[65] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
-#define BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
+#define FSCRYPT_BASE64URL_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
/**
- * base64_encode() - base64-encode some bytes
- * @src: the bytes to encode
- * @len: number of bytes to encode
- * @dst: (output) the base64-encoded string. Not NUL-terminated.
+ * fscrypt_base64url_encode() - base64url-encode some binary data
+ * @src: the binary data to encode
+ * @srclen: the length of @src in bytes
+ * @dst: (output) the base64url-encoded string. Not NUL-terminated.
*
- * Encodes the input string using characters from the set [A-Za-z0-9+,].
- * The encoded string is roughly 4/3 times the size of the input string.
+ * Encodes data using base64url encoding, i.e. the "Base 64 Encoding with URL
+ * and Filename Safe Alphabet" specified by RFC 4648. '='-padding isn't used,
+ * as it's unneeded and not required by the RFC. base64url is used instead of
+ * base64 to avoid the '/' character, which isn't allowed in filenames.
*
- * Return: length of the encoded string
+ * Return: the length of the resulting base64url-encoded string in bytes.
+ * This will be equal to FSCRYPT_BASE64URL_CHARS(srclen).
*/
-static int base64_encode(const u8 *src, int len, char *dst)
+static int fscrypt_base64url_encode(const u8 *src, int srclen, char *dst)
{
- int i, bits = 0, ac = 0;
+ u32 ac = 0;
+ int bits = 0;
+ int i;
char *cp = dst;
- for (i = 0; i < len; i++) {
- ac += src[i] << bits;
+ for (i = 0; i < srclen; i++) {
+ ac = (ac << 8) | src[i];
bits += 8;
do {
- *cp++ = lookup_table[ac & 0x3f];
- ac >>= 6;
bits -= 6;
+ *cp++ = base64url_table[(ac >> bits) & 0x3f];
} while (bits >= 6);
}
if (bits)
- *cp++ = lookup_table[ac & 0x3f];
+ *cp++ = base64url_table[(ac << (6 - bits)) & 0x3f];
return cp - dst;
}
-static int base64_decode(const char *src, int len, u8 *dst)
+/**
+ * fscrypt_base64url_decode() - base64url-decode a string
+ * @src: the string to decode. Doesn't need to be NUL-terminated.
+ * @srclen: the length of @src in bytes
+ * @dst: (output) the decoded binary data
+ *
+ * Decodes a string using base64url encoding, i.e. the "Base 64 Encoding with
+ * URL and Filename Safe Alphabet" specified by RFC 4648. '='-padding isn't
+ * accepted, nor are non-encoding characters such as whitespace.
+ *
+ * This implementation hasn't been optimized for performance.
+ *
+ * Return: the length of the resulting decoded binary data in bytes,
+ * or -1 if the string isn't a valid base64url string.
+ */
+static int fscrypt_base64url_decode(const char *src, int srclen, u8 *dst)
{
- int i, bits = 0, ac = 0;
- const char *p;
- u8 *cp = dst;
+ u32 ac = 0;
+ int bits = 0;
+ int i;
+ u8 *bp = dst;
+
+ for (i = 0; i < srclen; i++) {
+ const char *p = strchr(base64url_table, src[i]);
- for (i = 0; i < len; i++) {
- p = strchr(lookup_table, src[i]);
if (p == NULL || src[i] == 0)
- return -2;
- ac += (p - lookup_table) << bits;
+ return -1;
+ ac = (ac << 6) | (p - base64url_table);
bits += 6;
if (bits >= 8) {
- *cp++ = ac & 0xff;
- ac >>= 8;
bits -= 8;
+ *bp++ = (u8)(ac >> bits);
}
}
- if (ac)
+ if (ac & ((1 << bits) - 1))
return -1;
- return cp - dst;
+ return bp - dst;
}
bool fscrypt_fname_encrypted_size(const union fscrypt_policy *policy,
@@ -263,10 +287,8 @@ bool fscrypt_fname_encrypted_size(const union fscrypt_policy *policy,
int fscrypt_fname_alloc_buffer(u32 max_encrypted_len,
struct fscrypt_str *crypto_str)
{
- const u32 max_encoded_len = BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX);
- u32 max_presented_len;
-
- max_presented_len = max(max_encoded_len, max_encrypted_len);
+ u32 max_presented_len = max_t(u32, FSCRYPT_NOKEY_NAME_MAX_ENCODED,
+ max_encrypted_len);
crypto_str->name = kmalloc(max_presented_len + 1, GFP_NOFS);
if (!crypto_str->name)
@@ -342,7 +364,7 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode,
offsetof(struct fscrypt_nokey_name, bytes));
BUILD_BUG_ON(offsetofend(struct fscrypt_nokey_name, bytes) !=
offsetof(struct fscrypt_nokey_name, sha256));
- BUILD_BUG_ON(BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX) > NAME_MAX);
+ BUILD_BUG_ON(FSCRYPT_NOKEY_NAME_MAX_ENCODED > NAME_MAX);
nokey_name.dirhash[0] = hash;
nokey_name.dirhash[1] = minor_hash;
@@ -358,7 +380,8 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode,
nokey_name.sha256);
size = FSCRYPT_NOKEY_NAME_MAX;
}
- oname->len = base64_encode((const u8 *)&nokey_name, size, oname->name);
+ oname->len = fscrypt_base64url_encode((const u8 *)&nokey_name, size,
+ oname->name);
return 0;
}
EXPORT_SYMBOL(fscrypt_fname_disk_to_usr);
@@ -432,14 +455,15 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
* user-supplied name
*/
- if (iname->len > BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX))
+ if (iname->len > FSCRYPT_NOKEY_NAME_MAX_ENCODED)
return -ENOENT;
fname->crypto_buf.name = kmalloc(FSCRYPT_NOKEY_NAME_MAX, GFP_KERNEL);
if (fname->crypto_buf.name == NULL)
return -ENOMEM;
- ret = base64_decode(iname->name, iname->len, fname->crypto_buf.name);
+ ret = fscrypt_base64url_decode(iname->name, iname->len,
+ fname->crypto_buf.name);
if (ret < (int)offsetof(struct fscrypt_nokey_name, bytes[1]) ||
(ret > offsetof(struct fscrypt_nokey_name, sha256) &&
ret != FSCRYPT_NOKEY_NAME_MAX)) {
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index a73b0376e6f3..af74599ae1cf 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -384,3 +384,47 @@ err_kfree:
return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(fscrypt_get_symlink);
+
+/**
+ * fscrypt_symlink_getattr() - set the correct st_size for encrypted symlinks
+ * @path: the path for the encrypted symlink being queried
+ * @stat: the struct being filled with the symlink's attributes
+ *
+ * Override st_size of encrypted symlinks to be the length of the decrypted
+ * symlink target (or the no-key encoded symlink target, if the key is
+ * unavailable) rather than the length of the encrypted symlink target. This is
+ * necessary for st_size to match the symlink target that userspace actually
+ * sees. POSIX requires this, and some userspace programs depend on it.
+ *
+ * This requires reading the symlink target from disk if needed, setting up the
+ * inode's encryption key if possible, and then decrypting or encoding the
+ * symlink target. This makes lstat() more heavyweight than is normally the
+ * case. However, decrypted symlink targets will be cached in ->i_link, so
+ * usually the symlink won't have to be read and decrypted again later if/when
+ * it is actually followed, readlink() is called, or lstat() is called again.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fscrypt_symlink_getattr(const struct path *path, struct kstat *stat)
+{
+ struct dentry *dentry = path->dentry;
+ struct inode *inode = d_inode(dentry);
+ const char *link;
+ DEFINE_DELAYED_CALL(done);
+
+ /*
+ * To get the symlink target that userspace will see (whether it's the
+ * decrypted target or the no-key encoded target), we can just get it in
+ * the same way the VFS does during path resolution and readlink().
+ */
+ link = READ_ONCE(inode->i_link);
+ if (!link) {
+ link = inode->i_op->get_link(dentry, inode, &done);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+ }
+ stat->size = strlen(link);
+ do_delayed_call(&done);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fscrypt_symlink_getattr);
diff --git a/fs/dax.c b/fs/dax.c
index da41f9363568..4e3e5a283a91 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -722,7 +722,7 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d
return rc;
id = dax_read_lock();
- rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(PAGE_SIZE), &kaddr, NULL);
+ rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
@@ -1005,12 +1005,12 @@ int dax_writeback_mapping_range(struct address_space *mapping,
}
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
-static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
+static sector_t dax_iomap_sector(const struct iomap *iomap, loff_t pos)
{
return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
}
-static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
+static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size,
pfn_t *pfnp)
{
const sector_t sector = dax_iomap_sector(iomap, pos);
@@ -1066,6 +1066,66 @@ static vm_fault_t dax_load_hole(struct xa_state *xas,
return ret;
}
+#ifdef CONFIG_FS_DAX_PMD
+static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
+ const struct iomap *iomap, void **entry)
+{
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
+ unsigned long pmd_addr = vmf->address & PMD_MASK;
+ struct vm_area_struct *vma = vmf->vma;
+ struct inode *inode = mapping->host;
+ pgtable_t pgtable = NULL;
+ struct page *zero_page;
+ spinlock_t *ptl;
+ pmd_t pmd_entry;
+ pfn_t pfn;
+
+ zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
+
+ if (unlikely(!zero_page))
+ goto fallback;
+
+ pfn = page_to_pfn_t(zero_page);
+ *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
+ DAX_PMD | DAX_ZERO_PAGE, false);
+
+ if (arch_needs_pgtable_deposit()) {
+ pgtable = pte_alloc_one(vma->vm_mm);
+ if (!pgtable)
+ return VM_FAULT_OOM;
+ }
+
+ ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
+ if (!pmd_none(*(vmf->pmd))) {
+ spin_unlock(ptl);
+ goto fallback;
+ }
+
+ if (pgtable) {
+ pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
+ mm_inc_nr_ptes(vma->vm_mm);
+ }
+ pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
+ pmd_entry = pmd_mkhuge(pmd_entry);
+ set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
+ spin_unlock(ptl);
+ trace_dax_pmd_load_hole(inode, vmf, zero_page, *entry);
+ return VM_FAULT_NOPAGE;
+
+fallback:
+ if (pgtable)
+ pte_free(vma->vm_mm, pgtable);
+ trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
+ return VM_FAULT_FALLBACK;
+}
+#else
+static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
+ const struct iomap *iomap, void **entry)
+{
+ return VM_FAULT_FALLBACK;
+}
+#endif /* CONFIG_FS_DAX_PMD */
+
s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap)
{
sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
@@ -1103,20 +1163,21 @@ s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap)
return size;
}
-static loff_t
-dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
- struct iomap *iomap, struct iomap *srcmap)
+static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
+ struct iov_iter *iter)
{
+ const struct iomap *iomap = &iomi->iomap;
+ loff_t length = iomap_length(iomi);
+ loff_t pos = iomi->pos;
struct block_device *bdev = iomap->bdev;
struct dax_device *dax_dev = iomap->dax_dev;
- struct iov_iter *iter = data;
loff_t end = pos + length, done = 0;
ssize_t ret = 0;
size_t xfer;
int id;
if (iov_iter_rw(iter) == READ) {
- end = min(end, i_size_read(inode));
+ end = min(end, i_size_read(iomi->inode));
if (pos >= end)
return 0;
@@ -1133,7 +1194,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
* written by write(2) is visible in mmap.
*/
if (iomap->flags & IOMAP_F_NEW) {
- invalidate_inode_pages2_range(inode->i_mapping,
+ invalidate_inode_pages2_range(iomi->inode->i_mapping,
pos >> PAGE_SHIFT,
(end - 1) >> PAGE_SHIFT);
}
@@ -1209,31 +1270,29 @@ ssize_t
dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops)
{
- struct address_space *mapping = iocb->ki_filp->f_mapping;
- struct inode *inode = mapping->host;
- loff_t pos = iocb->ki_pos, ret = 0, done = 0;
- unsigned flags = 0;
+ struct iomap_iter iomi = {
+ .inode = iocb->ki_filp->f_mapping->host,
+ .pos = iocb->ki_pos,
+ .len = iov_iter_count(iter),
+ };
+ loff_t done = 0;
+ int ret;
if (iov_iter_rw(iter) == WRITE) {
- lockdep_assert_held_write(&inode->i_rwsem);
- flags |= IOMAP_WRITE;
+ lockdep_assert_held_write(&iomi.inode->i_rwsem);
+ iomi.flags |= IOMAP_WRITE;
} else {
- lockdep_assert_held(&inode->i_rwsem);
+ lockdep_assert_held(&iomi.inode->i_rwsem);
}
if (iocb->ki_flags & IOCB_NOWAIT)
- flags |= IOMAP_NOWAIT;
+ iomi.flags |= IOMAP_NOWAIT;
- while (iov_iter_count(iter)) {
- ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
- iter, dax_iomap_actor);
- if (ret <= 0)
- break;
- pos += ret;
- done += ret;
- }
+ while ((ret = iomap_iter(&iomi, ops)) > 0)
+ iomi.processed = dax_iomap_iter(&iomi, iter);
- iocb->ki_pos += done;
+ done = iomi.pos - iocb->ki_pos;
+ iocb->ki_pos = iomi.pos;
return done ? done : ret;
}
EXPORT_SYMBOL_GPL(dax_iomap_rw);
@@ -1250,44 +1309,146 @@ static vm_fault_t dax_fault_return(int error)
* flushed on write-faults (non-cow), but not read-faults.
*/
static bool dax_fault_is_synchronous(unsigned long flags,
- struct vm_area_struct *vma, struct iomap *iomap)
+ struct vm_area_struct *vma, const struct iomap *iomap)
{
return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC)
&& (iomap->flags & IOMAP_F_DIRTY);
}
+/*
+ * When handling a synchronous page fault and the inode need a fsync, we can
+ * insert the PTE/PMD into page tables only after that fsync happened. Skip
+ * insertion for now and return the pfn so that caller can insert it after the
+ * fsync is done.
+ */
+static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn)
+{
+ if (WARN_ON_ONCE(!pfnp))
+ return VM_FAULT_SIGBUS;
+ *pfnp = pfn;
+ return VM_FAULT_NEEDDSYNC;
+}
+
+static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf,
+ const struct iomap_iter *iter)
+{
+ sector_t sector = dax_iomap_sector(&iter->iomap, iter->pos);
+ unsigned long vaddr = vmf->address;
+ vm_fault_t ret;
+ int error = 0;
+
+ switch (iter->iomap.type) {
+ case IOMAP_HOLE:
+ case IOMAP_UNWRITTEN:
+ clear_user_highpage(vmf->cow_page, vaddr);
+ break;
+ case IOMAP_MAPPED:
+ error = copy_cow_page_dax(iter->iomap.bdev, iter->iomap.dax_dev,
+ sector, vmf->cow_page, vaddr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ error = -EIO;
+ break;
+ }
+
+ if (error)
+ return dax_fault_return(error);
+
+ __SetPageUptodate(vmf->cow_page);
+ ret = finish_fault(vmf);
+ if (!ret)
+ return VM_FAULT_DONE_COW;
+ return ret;
+}
+
+/**
+ * dax_fault_iter - Common actor to handle pfn insertion in PTE/PMD fault.
+ * @vmf: vm fault instance
+ * @iter: iomap iter
+ * @pfnp: pfn to be returned
+ * @xas: the dax mapping tree of a file
+ * @entry: an unlocked dax entry to be inserted
+ * @pmd: distinguish whether it is a pmd fault
+ */
+static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
+ const struct iomap_iter *iter, pfn_t *pfnp,
+ struct xa_state *xas, void **entry, bool pmd)
+{
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
+ const struct iomap *iomap = &iter->iomap;
+ size_t size = pmd ? PMD_SIZE : PAGE_SIZE;
+ loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT;
+ bool write = vmf->flags & FAULT_FLAG_WRITE;
+ bool sync = dax_fault_is_synchronous(iter->flags, vmf->vma, iomap);
+ unsigned long entry_flags = pmd ? DAX_PMD : 0;
+ int err = 0;
+ pfn_t pfn;
+
+ if (!pmd && vmf->cow_page)
+ return dax_fault_cow_page(vmf, iter);
+
+ /* if we are reading UNWRITTEN and HOLE, return a hole. */
+ if (!write &&
+ (iomap->type == IOMAP_UNWRITTEN || iomap->type == IOMAP_HOLE)) {
+ if (!pmd)
+ return dax_load_hole(xas, mapping, entry, vmf);
+ return dax_pmd_load_hole(xas, vmf, iomap, entry);
+ }
+
+ if (iomap->type != IOMAP_MAPPED) {
+ WARN_ON_ONCE(1);
+ return pmd ? VM_FAULT_FALLBACK : VM_FAULT_SIGBUS;
+ }
+
+ err = dax_iomap_pfn(&iter->iomap, pos, size, &pfn);
+ if (err)
+ return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err);
+
+ *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, entry_flags,
+ write && !sync);
+
+ if (sync)
+ return dax_fault_synchronous_pfnp(pfnp, pfn);
+
+ /* insert PMD pfn */
+ if (pmd)
+ return vmf_insert_pfn_pmd(vmf, pfn, write);
+
+ /* insert PTE pfn */
+ if (write)
+ return vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
+ return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
+}
+
static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
int *iomap_errp, const struct iomap_ops *ops)
{
- struct vm_area_struct *vma = vmf->vma;
- struct address_space *mapping = vma->vm_file->f_mapping;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
XA_STATE(xas, &mapping->i_pages, vmf->pgoff);
- struct inode *inode = mapping->host;
- unsigned long vaddr = vmf->address;
- loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
- struct iomap iomap = { .type = IOMAP_HOLE };
- struct iomap srcmap = { .type = IOMAP_HOLE };
- unsigned flags = IOMAP_FAULT;
- int error, major = 0;
- bool write = vmf->flags & FAULT_FLAG_WRITE;
- bool sync;
+ struct iomap_iter iter = {
+ .inode = mapping->host,
+ .pos = (loff_t)vmf->pgoff << PAGE_SHIFT,
+ .len = PAGE_SIZE,
+ .flags = IOMAP_FAULT,
+ };
vm_fault_t ret = 0;
void *entry;
- pfn_t pfn;
+ int error;
- trace_dax_pte_fault(inode, vmf, ret);
+ trace_dax_pte_fault(iter.inode, vmf, ret);
/*
* Check whether offset isn't beyond end of file now. Caller is supposed
* to hold locks serializing us with truncate / punch hole so this is
* a reliable test.
*/
- if (pos >= i_size_read(inode)) {
+ if (iter.pos >= i_size_read(iter.inode)) {
ret = VM_FAULT_SIGBUS;
goto out;
}
- if (write && !vmf->cow_page)
- flags |= IOMAP_WRITE;
+ if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
+ iter.flags |= IOMAP_WRITE;
entry = grab_mapping_entry(&xas, mapping, 0);
if (xa_is_internal(entry)) {
@@ -1306,234 +1467,103 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
goto unlock_entry;
}
- /*
- * Note that we don't bother to use iomap_apply here: DAX required
- * the file system block size to be equal the page size, which means
- * that we never have to deal with more than a single extent here.
- */
- error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap, &srcmap);
- if (iomap_errp)
- *iomap_errp = error;
- if (error) {
- ret = dax_fault_return(error);
- goto unlock_entry;
- }
- if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
- error = -EIO; /* fs corruption? */
- goto error_finish_iomap;
- }
-
- if (vmf->cow_page) {
- sector_t sector = dax_iomap_sector(&iomap, pos);
-
- switch (iomap.type) {
- case IOMAP_HOLE:
- case IOMAP_UNWRITTEN:
- clear_user_highpage(vmf->cow_page, vaddr);
- break;
- case IOMAP_MAPPED:
- error = copy_cow_page_dax(iomap.bdev, iomap.dax_dev,
- sector, vmf->cow_page, vaddr);
- break;
- default:
- WARN_ON_ONCE(1);
- error = -EIO;
- break;
+ while ((error = iomap_iter(&iter, ops)) > 0) {
+ if (WARN_ON_ONCE(iomap_length(&iter) < PAGE_SIZE)) {
+ iter.processed = -EIO; /* fs corruption? */
+ continue;
}
- if (error)
- goto error_finish_iomap;
-
- __SetPageUptodate(vmf->cow_page);
- ret = finish_fault(vmf);
- if (!ret)
- ret = VM_FAULT_DONE_COW;
- goto finish_iomap;
- }
-
- sync = dax_fault_is_synchronous(flags, vma, &iomap);
-
- switch (iomap.type) {
- case IOMAP_MAPPED:
- if (iomap.flags & IOMAP_F_NEW) {
+ ret = dax_fault_iter(vmf, &iter, pfnp, &xas, &entry, false);
+ if (ret != VM_FAULT_SIGBUS &&
+ (iter.iomap.flags & IOMAP_F_NEW)) {
count_vm_event(PGMAJFAULT);
- count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
- major = VM_FAULT_MAJOR;
+ count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
+ ret |= VM_FAULT_MAJOR;
}
- error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn);
- if (error < 0)
- goto error_finish_iomap;
-
- entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
- 0, write && !sync);
- /*
- * If we are doing synchronous page fault and inode needs fsync,
- * we can insert PTE into page tables only after that happens.
- * Skip insertion for now and return the pfn so that caller can
- * insert it after fsync is done.
- */
- if (sync) {
- if (WARN_ON_ONCE(!pfnp)) {
- error = -EIO;
- goto error_finish_iomap;
- }
- *pfnp = pfn;
- ret = VM_FAULT_NEEDDSYNC | major;
- goto finish_iomap;
- }
- trace_dax_insert_mapping(inode, vmf, entry);
- if (write)
- ret = vmf_insert_mixed_mkwrite(vma, vaddr, pfn);
- else
- ret = vmf_insert_mixed(vma, vaddr, pfn);
-
- goto finish_iomap;
- case IOMAP_UNWRITTEN:
- case IOMAP_HOLE:
- if (!write) {
- ret = dax_load_hole(&xas, mapping, &entry, vmf);
- goto finish_iomap;
- }
- fallthrough;
- default:
- WARN_ON_ONCE(1);
- error = -EIO;
- break;
+ if (!(ret & VM_FAULT_ERROR))
+ iter.processed = PAGE_SIZE;
}
- error_finish_iomap:
- ret = dax_fault_return(error);
- finish_iomap:
- if (ops->iomap_end) {
- int copied = PAGE_SIZE;
+ if (iomap_errp)
+ *iomap_errp = error;
+ if (!ret && error)
+ ret = dax_fault_return(error);
- if (ret & VM_FAULT_ERROR)
- copied = 0;
- /*
- * The fault is done by now and there's no way back (other
- * thread may be already happily using PTE we have installed).
- * Just ignore error from ->iomap_end since we cannot do much
- * with it.
- */
- ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
- }
- unlock_entry:
+unlock_entry:
dax_unlock_entry(&xas, entry);
- out:
- trace_dax_pte_fault_done(inode, vmf, ret);
- return ret | major;
+out:
+ trace_dax_pte_fault_done(iter.inode, vmf, ret);
+ return ret;
}
#ifdef CONFIG_FS_DAX_PMD
-static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
- struct iomap *iomap, void **entry)
+static bool dax_fault_check_fallback(struct vm_fault *vmf, struct xa_state *xas,
+ pgoff_t max_pgoff)
{
- struct address_space *mapping = vmf->vma->vm_file->f_mapping;
unsigned long pmd_addr = vmf->address & PMD_MASK;
- struct vm_area_struct *vma = vmf->vma;
- struct inode *inode = mapping->host;
- pgtable_t pgtable = NULL;
- struct page *zero_page;
- spinlock_t *ptl;
- pmd_t pmd_entry;
- pfn_t pfn;
-
- zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
-
- if (unlikely(!zero_page))
- goto fallback;
+ bool write = vmf->flags & FAULT_FLAG_WRITE;
- pfn = page_to_pfn_t(zero_page);
- *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
- DAX_PMD | DAX_ZERO_PAGE, false);
+ /*
+ * Make sure that the faulting address's PMD offset (color) matches
+ * the PMD offset from the start of the file. This is necessary so
+ * that a PMD range in the page table overlaps exactly with a PMD
+ * range in the page cache.
+ */
+ if ((vmf->pgoff & PG_PMD_COLOUR) !=
+ ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
+ return true;
- if (arch_needs_pgtable_deposit()) {
- pgtable = pte_alloc_one(vma->vm_mm);
- if (!pgtable)
- return VM_FAULT_OOM;
- }
+ /* Fall back to PTEs if we're going to COW */
+ if (write && !(vmf->vma->vm_flags & VM_SHARED))
+ return true;
- ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
- if (!pmd_none(*(vmf->pmd))) {
- spin_unlock(ptl);
- goto fallback;
- }
+ /* If the PMD would extend outside the VMA */
+ if (pmd_addr < vmf->vma->vm_start)
+ return true;
+ if ((pmd_addr + PMD_SIZE) > vmf->vma->vm_end)
+ return true;
- if (pgtable) {
- pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
- mm_inc_nr_ptes(vma->vm_mm);
- }
- pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
- pmd_entry = pmd_mkhuge(pmd_entry);
- set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
- spin_unlock(ptl);
- trace_dax_pmd_load_hole(inode, vmf, zero_page, *entry);
- return VM_FAULT_NOPAGE;
+ /* If the PMD would extend beyond the file size */
+ if ((xas->xa_index | PG_PMD_COLOUR) >= max_pgoff)
+ return true;
-fallback:
- if (pgtable)
- pte_free(vma->vm_mm, pgtable);
- trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
- return VM_FAULT_FALLBACK;
+ return false;
}
static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
const struct iomap_ops *ops)
{
- struct vm_area_struct *vma = vmf->vma;
- struct address_space *mapping = vma->vm_file->f_mapping;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER);
- unsigned long pmd_addr = vmf->address & PMD_MASK;
- bool write = vmf->flags & FAULT_FLAG_WRITE;
- bool sync;
- unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
- struct inode *inode = mapping->host;
- vm_fault_t result = VM_FAULT_FALLBACK;
- struct iomap iomap = { .type = IOMAP_HOLE };
- struct iomap srcmap = { .type = IOMAP_HOLE };
+ struct iomap_iter iter = {
+ .inode = mapping->host,
+ .len = PMD_SIZE,
+ .flags = IOMAP_FAULT,
+ };
+ vm_fault_t ret = VM_FAULT_FALLBACK;
pgoff_t max_pgoff;
void *entry;
- loff_t pos;
int error;
- pfn_t pfn;
+
+ if (vmf->flags & FAULT_FLAG_WRITE)
+ iter.flags |= IOMAP_WRITE;
/*
* Check whether offset isn't beyond end of file now. Caller is
* supposed to hold locks serializing us with truncate / punch hole so
* this is a reliable test.
*/
- max_pgoff = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
-
- trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
-
- /*
- * Make sure that the faulting address's PMD offset (color) matches
- * the PMD offset from the start of the file. This is necessary so
- * that a PMD range in the page table overlaps exactly with a PMD
- * range in the page cache.
- */
- if ((vmf->pgoff & PG_PMD_COLOUR) !=
- ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
- goto fallback;
+ max_pgoff = DIV_ROUND_UP(i_size_read(iter.inode), PAGE_SIZE);
- /* Fall back to PTEs if we're going to COW */
- if (write && !(vma->vm_flags & VM_SHARED))
- goto fallback;
-
- /* If the PMD would extend outside the VMA */
- if (pmd_addr < vma->vm_start)
- goto fallback;
- if ((pmd_addr + PMD_SIZE) > vma->vm_end)
- goto fallback;
+ trace_dax_pmd_fault(iter.inode, vmf, max_pgoff, 0);
if (xas.xa_index >= max_pgoff) {
- result = VM_FAULT_SIGBUS;
+ ret = VM_FAULT_SIGBUS;
goto out;
}
- /* If the PMD would extend beyond the file size */
- if ((xas.xa_index | PG_PMD_COLOUR) >= max_pgoff)
+ if (dax_fault_check_fallback(vmf, &xas, max_pgoff))
goto fallback;
/*
@@ -1544,7 +1574,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
*/
entry = grab_mapping_entry(&xas, mapping, PMD_ORDER);
if (xa_is_internal(entry)) {
- result = xa_to_internal(entry);
+ ret = xa_to_internal(entry);
goto fallback;
}
@@ -1556,88 +1586,30 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
*/
if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) &&
!pmd_devmap(*vmf->pmd)) {
- result = 0;
+ ret = 0;
goto unlock_entry;
}
- /*
- * Note that we don't use iomap_apply here. We aren't doing I/O, only
- * setting up a mapping, so really we're using iomap_begin() as a way
- * to look up our filesystem block.
- */
- pos = (loff_t)xas.xa_index << PAGE_SHIFT;
- error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap,
- &srcmap);
- if (error)
- goto unlock_entry;
-
- if (iomap.offset + iomap.length < pos + PMD_SIZE)
- goto finish_iomap;
-
- sync = dax_fault_is_synchronous(iomap_flags, vma, &iomap);
-
- switch (iomap.type) {
- case IOMAP_MAPPED:
- error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn);
- if (error < 0)
- goto finish_iomap;
-
- entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
- DAX_PMD, write && !sync);
-
- /*
- * If we are doing synchronous page fault and inode needs fsync,
- * we can insert PMD into page tables only after that happens.
- * Skip insertion for now and return the pfn so that caller can
- * insert it after fsync is done.
- */
- if (sync) {
- if (WARN_ON_ONCE(!pfnp))
- goto finish_iomap;
- *pfnp = pfn;
- result = VM_FAULT_NEEDDSYNC;
- goto finish_iomap;
- }
+ iter.pos = (loff_t)xas.xa_index << PAGE_SHIFT;
+ while ((error = iomap_iter(&iter, ops)) > 0) {
+ if (iomap_length(&iter) < PMD_SIZE)
+ continue; /* actually breaks out of the loop */
- trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry);
- result = vmf_insert_pfn_pmd(vmf, pfn, write);
- break;
- case IOMAP_UNWRITTEN:
- case IOMAP_HOLE:
- if (WARN_ON_ONCE(write))
- break;
- result = dax_pmd_load_hole(&xas, vmf, &iomap, &entry);
- break;
- default:
- WARN_ON_ONCE(1);
- break;
+ ret = dax_fault_iter(vmf, &iter, pfnp, &xas, &entry, true);
+ if (ret != VM_FAULT_FALLBACK)
+ iter.processed = PMD_SIZE;
}
- finish_iomap:
- if (ops->iomap_end) {
- int copied = PMD_SIZE;
-
- if (result == VM_FAULT_FALLBACK)
- copied = 0;
- /*
- * The fault is done by now and there's no way back (other
- * thread may be already happily using PMD we have installed).
- * Just ignore error from ->iomap_end since we cannot do much
- * with it.
- */
- ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags,
- &iomap);
- }
- unlock_entry:
+unlock_entry:
dax_unlock_entry(&xas, entry);
- fallback:
- if (result == VM_FAULT_FALLBACK) {
- split_huge_pmd(vma, vmf->pmd, vmf->address);
+fallback:
+ if (ret == VM_FAULT_FALLBACK) {
+ split_huge_pmd(vmf->vma, vmf->pmd, vmf->address);
count_vm_event(THP_FAULT_FALLBACK);
}
out:
- trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result);
- return result;
+ trace_dax_pmd_fault_done(iter.inode, vmf, max_pgoff, ret);
+ return ret;
}
#else
static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
diff --git a/fs/eventfd.c b/fs/eventfd.c
index e265b6dd4f34..3627dd7d25db 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -25,8 +25,6 @@
#include <linux/idr.h>
#include <linux/uio.h>
-DEFINE_PER_CPU(int, eventfd_wake_count);
-
static DEFINE_IDA(eventfd_ida);
struct eventfd_ctx {
@@ -67,21 +65,21 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
* Deadlock or stack overflow issues can happen if we recurse here
* through waitqueue wakeup handlers. If the caller users potentially
* nested waitqueues with custom wakeup handlers, then it should
- * check eventfd_signal_count() before calling this function. If
- * it returns true, the eventfd_signal() call should be deferred to a
+ * check eventfd_signal_allowed() before calling this function. If
+ * it returns false, the eventfd_signal() call should be deferred to a
* safe context.
*/
- if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
+ if (WARN_ON_ONCE(current->in_eventfd_signal))
return 0;
spin_lock_irqsave(&ctx->wqh.lock, flags);
- this_cpu_inc(eventfd_wake_count);
+ current->in_eventfd_signal = 1;
if (ULLONG_MAX - ctx->count < n)
n = ULLONG_MAX - ctx->count;
ctx->count += n;
if (waitqueue_active(&ctx->wqh))
wake_up_locked_poll(&ctx->wqh, EPOLLIN);
- this_cpu_dec(eventfd_wake_count);
+ current->in_eventfd_signal = 0;
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
return n;
diff --git a/fs/exec.c b/fs/exec.c
index 38f63451b928..3b78b22addfb 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2070,10 +2070,8 @@ SYSCALL_DEFINE5(execveat,
const char __user *const __user *, envp,
int, flags)
{
- int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
-
return do_execveat(fd,
- getname_flags(filename, lookup_flags, NULL),
+ getname_uflags(filename, flags),
argv, envp, flags);
}
@@ -2091,10 +2089,8 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
const compat_uptr_t __user *, envp,
int, flags)
{
- int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
-
return compat_do_execveat(fd,
- getname_flags(filename, lookup_flags, NULL),
+ getname_uflags(filename, flags),
argv, envp, flags);
}
#endif
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
index 54eec9185627..1248ff4ef562 100644
--- a/fs/ext2/Kconfig
+++ b/fs/ext2/Kconfig
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config EXT2_FS
tristate "Second extended fs support"
+ select FS_IOMAP
help
Ext2 is a standard Linux file system for hard disks.
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index e512630cb63e..3be9dd6412b7 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -667,9 +667,6 @@ struct ext2_inode_info {
struct rw_semaphore xattr_sem;
#endif
rwlock_t i_meta_lock;
-#ifdef CONFIG_FS_DAX
- struct rw_semaphore dax_sem;
-#endif
/*
* truncate_mutex is for serialising ext2_truncate() against
@@ -685,14 +682,6 @@ struct ext2_inode_info {
#endif
};
-#ifdef CONFIG_FS_DAX
-#define dax_sem_down_write(ext2_inode) down_write(&(ext2_inode)->dax_sem)
-#define dax_sem_up_write(ext2_inode) up_write(&(ext2_inode)->dax_sem)
-#else
-#define dax_sem_down_write(ext2_inode)
-#define dax_sem_up_write(ext2_inode)
-#endif
-
/*
* Inode dynamic state flags
*/
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index f98466acc672..eb97aa3d700e 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -81,7 +81,7 @@ out_unlock:
*
* mmap_lock (MM)
* sb_start_pagefault (vfs, freeze)
- * ext2_inode_info->dax_sem
+ * address_space->invalidate_lock
* address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX)
* ext2_inode_info->truncate_mutex
*
@@ -91,7 +91,6 @@ out_unlock:
static vm_fault_t ext2_dax_fault(struct vm_fault *vmf)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
- struct ext2_inode_info *ei = EXT2_I(inode);
vm_fault_t ret;
bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
(vmf->vma->vm_flags & VM_SHARED);
@@ -100,11 +99,11 @@ static vm_fault_t ext2_dax_fault(struct vm_fault *vmf)
sb_start_pagefault(inode->i_sb);
file_update_time(vmf->vma->vm_file);
}
- down_read(&ei->dax_sem);
+ filemap_invalidate_lock_shared(inode->i_mapping);
ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, NULL, &ext2_iomap_ops);
- up_read(&ei->dax_sem);
+ filemap_invalidate_unlock_shared(inode->i_mapping);
if (write)
sb_end_pagefault(inode->i_sb);
return ret;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index dadb121beb22..333fa62661d5 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -799,7 +799,6 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
}
-#ifdef CONFIG_FS_DAX
static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned flags, struct iomap *iomap, struct iomap *srcmap)
{
@@ -852,16 +851,18 @@ const struct iomap_ops ext2_iomap_ops = {
.iomap_begin = ext2_iomap_begin,
.iomap_end = ext2_iomap_end,
};
-#else
-/* Define empty ops for !CONFIG_FS_DAX case to avoid ugly ifdefs */
-const struct iomap_ops ext2_iomap_ops;
-#endif /* CONFIG_FS_DAX */
int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
- return generic_block_fiemap(inode, fieinfo, start, len,
- ext2_get_block);
+ int ret;
+
+ inode_lock(inode);
+ len = min_t(u64, len, i_size_read(inode));
+ ret = iomap_fiemap(inode, fieinfo, start, len, &ext2_iomap_ops);
+ inode_unlock(inode);
+
+ return ret;
}
static int ext2_writepage(struct page *page, struct writeback_control *wbc)
@@ -1177,7 +1178,7 @@ static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int de
ext2_free_data(inode, p, q);
}
-/* dax_sem must be held when calling this function */
+/* mapping->invalidate_lock must be held when calling this function */
static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
{
__le32 *i_data = EXT2_I(inode)->i_data;
@@ -1194,7 +1195,7 @@ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
#ifdef CONFIG_FS_DAX
- WARN_ON(!rwsem_is_locked(&ei->dax_sem));
+ WARN_ON(!rwsem_is_locked(&inode->i_mapping->invalidate_lock));
#endif
n = ext2_block_to_path(inode, iblock, offsets, NULL);
@@ -1276,9 +1277,9 @@ static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
if (ext2_inode_is_fast_symlink(inode))
return;
- dax_sem_down_write(EXT2_I(inode));
+ filemap_invalidate_lock(inode->i_mapping);
__ext2_truncate_blocks(inode, offset);
- dax_sem_up_write(EXT2_I(inode));
+ filemap_invalidate_unlock(inode->i_mapping);
}
static int ext2_setsize(struct inode *inode, loff_t newsize)
@@ -1308,10 +1309,10 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
if (error)
return error;
- dax_sem_down_write(EXT2_I(inode));
+ filemap_invalidate_lock(inode->i_mapping);
truncate_setsize(inode, newsize);
__ext2_truncate_blocks(inode, newsize);
- dax_sem_up_write(EXT2_I(inode));
+ filemap_invalidate_unlock(inode->i_mapping);
inode->i_mtime = inode->i_ctime = current_time(inode);
if (inode_needs_sync(inode)) {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 21e09fbaa46f..987bcf32ed46 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -206,9 +206,6 @@ static void init_once(void *foo)
init_rwsem(&ei->xattr_sem);
#endif
mutex_init(&ei->truncate_mutex);
-#ifdef CONFIG_FS_DAX
- init_rwsem(&ei->dax_sem);
-#endif
inode_init_once(&ei->vfs_inode);
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3c51e243450d..7ebaf66b6e31 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1086,15 +1086,6 @@ struct ext4_inode_info {
* by other means, so we have i_data_sem.
*/
struct rw_semaphore i_data_sem;
- /*
- * i_mmap_sem is for serializing page faults with truncate / punch hole
- * operations. We have to make sure that new page cannot be faulted in
- * a section of the inode that is being punched. We cannot easily use
- * i_data_sem for this since we need protection for the whole punch
- * operation and i_data_sem ranks below transaction start so we have
- * to occasionally drop it.
- */
- struct rw_semaphore i_mmap_sem;
struct inode vfs_inode;
struct jbd2_inode *jinode;
@@ -2972,7 +2963,6 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
loff_t lstart, loff_t lend);
extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf);
-extern vm_fault_t ext4_filemap_fault(struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
extern void ext4_da_release_space(struct inode *inode, int to_free);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 92ad64b89d9b..c33e0a2cb6c3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4474,6 +4474,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
loff_t len, int mode)
{
struct inode *inode = file_inode(file);
+ struct address_space *mapping = file->f_mapping;
handle_t *handle = NULL;
unsigned int max_blocks;
loff_t new_size = 0;
@@ -4560,17 +4561,17 @@ static long ext4_zero_range(struct file *file, loff_t offset,
* Prevent page faults from reinstantiating pages we have
* released from page cache.
*/
- down_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
ret = ext4_break_layouts(inode);
if (ret) {
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
goto out_mutex;
}
ret = ext4_update_disksize_before_punch(inode, offset, len);
if (ret) {
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
goto out_mutex;
}
/* Now release the pages and zero block aligned part of pages */
@@ -4579,7 +4580,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
flags);
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
if (ret)
goto out_mutex;
}
@@ -5221,6 +5222,7 @@ out:
static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
{
struct super_block *sb = inode->i_sb;
+ struct address_space *mapping = inode->i_mapping;
ext4_lblk_t punch_start, punch_stop;
handle_t *handle;
unsigned int credits;
@@ -5274,7 +5276,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
* Prevent page faults from reinstantiating pages we have released from
* page cache.
*/
- down_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
ret = ext4_break_layouts(inode);
if (ret)
@@ -5289,15 +5291,15 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
* Write tail of the last page before removed range since it will get
* removed from the page cache below.
*/
- ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
+ ret = filemap_write_and_wait_range(mapping, ioffset, offset);
if (ret)
goto out_mmap;
/*
* Write data that will be shifted to preserve them when discarding
* page cache below. We are also protected from pages becoming dirty
- * by i_mmap_sem.
+ * by i_rwsem and invalidate_lock.
*/
- ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
+ ret = filemap_write_and_wait_range(mapping, offset + len,
LLONG_MAX);
if (ret)
goto out_mmap;
@@ -5350,7 +5352,7 @@ out_stop:
ext4_journal_stop(handle);
ext4_fc_stop_ineligible(sb);
out_mmap:
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
out_mutex:
inode_unlock(inode);
return ret;
@@ -5367,6 +5369,7 @@ out_mutex:
static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
{
struct super_block *sb = inode->i_sb;
+ struct address_space *mapping = inode->i_mapping;
handle_t *handle;
struct ext4_ext_path *path;
struct ext4_extent *extent;
@@ -5425,7 +5428,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
* Prevent page faults from reinstantiating pages we have released from
* page cache.
*/
- down_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
ret = ext4_break_layouts(inode);
if (ret)
@@ -5526,7 +5529,7 @@ out_stop:
ext4_journal_stop(handle);
ext4_fc_stop_ineligible(sb);
out_mmap:
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
out_mutex:
inode_unlock(inode);
return ret;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 816dedcbd541..d3b4ed91aa68 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -704,22 +704,23 @@ static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
*/
bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
(vmf->vma->vm_flags & VM_SHARED);
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
pfn_t pfn;
if (write) {
sb_start_pagefault(sb);
file_update_time(vmf->vma->vm_file);
- down_read(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock_shared(mapping);
retry:
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
EXT4_DATA_TRANS_BLOCKS(sb));
if (IS_ERR(handle)) {
- up_read(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock_shared(mapping);
sb_end_pagefault(sb);
return VM_FAULT_SIGBUS;
}
} else {
- down_read(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock_shared(mapping);
}
result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops);
if (write) {
@@ -731,10 +732,10 @@ retry:
/* Handling synchronous page fault? */
if (result & VM_FAULT_NEEDDSYNC)
result = dax_finish_sync_fault(vmf, pe_size, pfn);
- up_read(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock_shared(mapping);
sb_end_pagefault(sb);
} else {
- up_read(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock_shared(mapping);
}
return result;
@@ -756,7 +757,7 @@ static const struct vm_operations_struct ext4_dax_vm_ops = {
#endif
static const struct vm_operations_struct ext4_file_vm_ops = {
- .fault = ext4_filemap_fault,
+ .fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = ext4_page_mkwrite,
};
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d8de607849df..325c038e7b23 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3950,20 +3950,19 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
return ret;
}
-static void ext4_wait_dax_page(struct ext4_inode_info *ei)
+static void ext4_wait_dax_page(struct inode *inode)
{
- up_write(&ei->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
schedule();
- down_write(&ei->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
}
int ext4_break_layouts(struct inode *inode)
{
- struct ext4_inode_info *ei = EXT4_I(inode);
struct page *page;
int error;
- if (WARN_ON_ONCE(!rwsem_is_locked(&ei->i_mmap_sem)))
+ if (WARN_ON_ONCE(!rwsem_is_locked(&inode->i_mapping->invalidate_lock)))
return -EINVAL;
do {
@@ -3974,7 +3973,7 @@ int ext4_break_layouts(struct inode *inode)
error = ___wait_var_event(&page->_refcount,
atomic_read(&page->_refcount) == 1,
TASK_INTERRUPTIBLE, 0, 0,
- ext4_wait_dax_page(ei));
+ ext4_wait_dax_page(inode));
} while (error == 0);
return error;
@@ -4005,9 +4004,9 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
if (ext4_has_inline_data(inode)) {
- down_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
ret = ext4_convert_inline_data(inode);
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
if (ret)
return ret;
}
@@ -4058,7 +4057,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
* Prevent page faults from reinstantiating pages we have released from
* page cache.
*/
- down_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
ret = ext4_break_layouts(inode);
if (ret)
@@ -4131,7 +4130,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
out_stop:
ext4_journal_stop(handle);
out_dio:
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
out_mutex:
inode_unlock(inode);
return ret;
@@ -5426,11 +5425,11 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
inode_dio_wait(inode);
}
- down_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
rc = ext4_break_layouts(inode);
if (rc) {
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
goto err_out;
}
@@ -5506,7 +5505,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
error = rc;
}
out_mmap_sem:
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
}
if (!error) {
@@ -5983,10 +5982,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
* data (and journalled aops don't know how to handle these cases).
*/
if (val) {
- down_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
err = filemap_write_and_wait(inode->i_mapping);
if (err < 0) {
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
return err;
}
}
@@ -6019,7 +6018,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
percpu_up_write(&sbi->s_writepages_rwsem);
if (val)
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
/* Finally we can mark the inode as dirty. */
@@ -6063,7 +6062,7 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
- down_read(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock_shared(mapping);
err = ext4_convert_inline_data(inode);
if (err)
@@ -6176,7 +6175,7 @@ retry_alloc:
out_ret:
ret = block_page_mkwrite_return(err);
out:
- up_read(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock_shared(mapping);
sb_end_pagefault(inode->i_sb);
return ret;
out_error:
@@ -6184,15 +6183,3 @@ out_error:
ext4_journal_stop(handle);
goto out;
}
-
-vm_fault_t ext4_filemap_fault(struct vm_fault *vmf)
-{
- struct inode *inode = file_inode(vmf->vma->vm_file);
- vm_fault_t ret;
-
- down_read(&EXT4_I(inode)->i_mmap_sem);
- ret = filemap_fault(vmf);
- up_read(&EXT4_I(inode)->i_mmap_sem);
-
- return ret;
-}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 6eed6170aded..4fb5fe083c2b 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -148,7 +148,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
goto journal_err_out;
}
- down_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
err = filemap_write_and_wait(inode->i_mapping);
if (err)
goto err_out;
@@ -256,7 +256,7 @@ err_out1:
ext4_double_up_write_data_sem(inode, inode_bl);
err_out:
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
journal_err_out:
unlock_two_nondirectories(inode, inode_bl);
iput(inode_bl);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dfa09a277b56..d6df62fc810c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -90,12 +90,9 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
/*
* Lock ordering
*
- * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
- * i_mmap_rwsem (inode->i_mmap_rwsem)!
- *
* page fault path:
- * mmap_lock -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
- * page lock -> i_data_sem (rw)
+ * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
+ * -> page lock -> i_data_sem (rw)
*
* buffered write path:
* sb_start_write -> i_mutex -> mmap_lock
@@ -103,8 +100,9 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
* i_data_sem (rw)
*
* truncate:
- * sb_start_write -> i_mutex -> i_mmap_sem (w) -> i_mmap_rwsem (w) -> page lock
- * sb_start_write -> i_mutex -> i_mmap_sem (w) -> transaction start ->
+ * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
+ * page lock
+ * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
* i_data_sem (rw)
*
* direct IO:
@@ -1360,7 +1358,6 @@ static void init_once(void *foo)
INIT_LIST_HEAD(&ei->i_orphan);
init_rwsem(&ei->xattr_sem);
init_rwsem(&ei->i_data_sem);
- init_rwsem(&ei->i_mmap_sem);
inode_init_once(&ei->vfs_inode);
ext4_fc_init_inode(&ei->vfs_inode);
}
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index dd05af983092..69109746e6e2 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -52,10 +52,20 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
return paddr;
}
+static int ext4_encrypted_symlink_getattr(struct user_namespace *mnt_userns,
+ const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
+{
+ ext4_getattr(mnt_userns, path, stat, request_mask, query_flags);
+
+ return fscrypt_symlink_getattr(path, stat);
+}
+
const struct inode_operations ext4_encrypted_symlink_inode_operations = {
.get_link = ext4_encrypted_get_link,
.setattr = ext4_setattr,
- .getattr = ext4_getattr,
+ .getattr = ext4_encrypted_symlink_getattr,
.listxattr = ext4_listxattr,
};
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
index bcbe3668c1d4..ce84aa2786c7 100644
--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
@@ -11,14 +11,16 @@
*/
static inline void ext4_truncate_failed_write(struct inode *inode)
{
+ struct address_space *mapping = inode->i_mapping;
+
/*
* We don't need to call ext4_break_layouts() because the blocks we
* are truncating were never visible to userspace.
*/
- down_write(&EXT4_I(inode)->i_mmap_sem);
- truncate_inode_pages(inode->i_mapping, inode->i_size);
+ filemap_invalidate_lock(mapping);
+ truncate_inode_pages(mapping, inode->i_size);
ext4_truncate(inode);
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
}
/*
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index d2cf48c5a2e4..eb222b35edef 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3187,12 +3187,12 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
/* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
if (to > i_size && !f2fs_verity_in_progress(inode)) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
truncate_pagecache(inode, i_size);
f2fs_truncate_blocks(inode, i_size, true);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -3852,7 +3852,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
int ret = 0;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
set_inode_flag(inode, FI_ALIGNED_WRITE);
@@ -3894,7 +3894,7 @@ done:
clear_inode_flag(inode, FI_DO_DEFRAG);
clear_inode_flag(inode, FI_ALIGNED_WRITE);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ee8eb33e2c25..906b2c4b50e7 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -754,7 +754,6 @@ struct f2fs_inode_info {
/* avoid racing between foreground op and gc */
struct rw_semaphore i_gc_rwsem[2];
- struct rw_semaphore i_mmap_sem;
struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */
int i_extra_isize; /* size of extra space located in i_addr */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6afd4562335f..1ff333755721 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -38,10 +38,7 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
struct inode *inode = file_inode(vmf->vma->vm_file);
vm_fault_t ret;
- down_read(&F2FS_I(inode)->i_mmap_sem);
ret = filemap_fault(vmf);
- up_read(&F2FS_I(inode)->i_mmap_sem);
-
if (!ret)
f2fs_update_iostat(F2FS_I_SB(inode), APP_MAPPED_READ_IO,
F2FS_BLKSIZE);
@@ -101,7 +98,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
file_update_time(vmf->vma->vm_file);
- down_read(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock_shared(inode->i_mapping);
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping ||
page_offset(page) > i_size_read(inode) ||
@@ -159,7 +156,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
trace_f2fs_vm_page_mkwrite(page, DATA);
out_sem:
- up_read(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock_shared(inode->i_mapping);
sb_end_pagefault(inode->i_sb);
err:
@@ -940,7 +937,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
}
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
truncate_setsize(inode, attr->ia_size);
@@ -950,7 +947,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
* do not trim all blocks after i_size if target size is
* larger than i_size.
*/
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (err)
return err;
@@ -1095,7 +1092,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
blk_end = (loff_t)pg_end << PAGE_SHIFT;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
truncate_inode_pages_range(mapping, blk_start,
blk_end - 1);
@@ -1104,7 +1101,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
ret = f2fs_truncate_hole(inode, pg_start, pg_end);
f2fs_unlock_op(sbi);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -1339,7 +1336,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
f2fs_lock_op(sbi);
f2fs_drop_extent_tree(inode);
@@ -1347,7 +1344,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
f2fs_unlock_op(sbi);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
@@ -1378,13 +1375,13 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
return ret;
/* write out all moved pages, if possible */
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
new_size = i_size_read(inode) - len;
ret = f2fs_truncate_blocks(inode, new_size, true);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
if (!ret)
f2fs_i_size_write(inode, new_size);
return ret;
@@ -1484,7 +1481,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
pgoff_t end;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
truncate_pagecache_range(inode,
(loff_t)index << PAGE_SHIFT,
@@ -1496,7 +1493,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
if (ret) {
f2fs_unlock_op(sbi);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
@@ -1508,7 +1505,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_balance_fs(sbi, dn.node_changed);
@@ -1543,6 +1540,7 @@ out:
static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct address_space *mapping = inode->i_mapping;
pgoff_t nr, pg_start, pg_end, delta, idx;
loff_t new_size;
int ret = 0;
@@ -1565,14 +1563,14 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_balance_fs(sbi, true);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
if (ret)
return ret;
/* write out all dirty pages from offset */
- ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
+ ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
if (ret)
return ret;
@@ -1583,7 +1581,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
truncate_pagecache(inode, offset);
while (!ret && idx > pg_start) {
@@ -1599,14 +1597,14 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
idx + delta, nr, false);
f2fs_unlock_op(sbi);
}
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
/* write out all moved pages, if possible */
- down_write(&F2FS_I(inode)->i_mmap_sem);
- filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
+ filemap_invalidate_lock(mapping);
+ filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
if (!ret)
f2fs_i_size_write(inode, new_size);
@@ -3440,7 +3438,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
goto out;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
@@ -3476,7 +3474,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
}
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
out:
inode_unlock(inode);
@@ -3593,7 +3591,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
}
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
@@ -3629,7 +3627,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
}
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
if (ret >= 0) {
clear_inode_flag(inode, FI_COMPRESS_RELEASED);
@@ -3748,7 +3746,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
goto err;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
ret = filemap_write_and_wait_range(mapping, range.start,
to_end ? LLONG_MAX : end_addr - 1);
@@ -3835,7 +3833,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
ret = f2fs_secure_erase(prev_bdev, inode, prev_index,
prev_block, len, range.flags);
out:
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
err:
inode_unlock(inode);
@@ -4313,9 +4311,9 @@ write:
/* if we couldn't write data, we should deallocate blocks. */
if (preallocated && i_size_read(inode) < target_size) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
f2fs_truncate(inode);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index e149c8c66a71..9c528e583c9d 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -1323,9 +1323,19 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
return target;
}
+static int f2fs_encrypted_symlink_getattr(struct user_namespace *mnt_userns,
+ const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
+{
+ f2fs_getattr(mnt_userns, path, stat, request_mask, query_flags);
+
+ return fscrypt_symlink_getattr(path, stat);
+}
+
const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
.get_link = f2fs_encrypted_get_link,
- .getattr = f2fs_getattr,
+ .getattr = f2fs_encrypted_symlink_getattr,
.setattr = f2fs_setattr,
.listxattr = f2fs_listxattr,
};
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8fecd3050ccd..ce2ab1b85c11 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1289,7 +1289,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
mutex_init(&fi->inmem_lock);
init_rwsem(&fi->i_gc_rwsem[READ]);
init_rwsem(&fi->i_gc_rwsem[WRITE]);
- init_rwsem(&fi->i_mmap_sem);
init_rwsem(&fi->i_xattr_sem);
/* Will be used by directory only */
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 6642246206bd..daad532a4e2b 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -378,7 +378,7 @@ out:
ret = kstrtol(name, 10, &data);
if (ret)
return ret;
- if (data >= IOPRIO_BE_NR || data < 0)
+ if (data >= IOPRIO_NR_LEVELS || data < 0)
return -EINVAL;
cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, data);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 860e884e56e8..978ac6751aeb 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -5,6 +5,7 @@
#include <linux/blkdev.h>
#include <linux/sched/signal.h>
+#include <linux/backing-dev-defs.h>
#include "fat.h"
struct fatent_operations {
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f946bec8f1f1..68added37c15 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -150,7 +150,8 @@ void f_delown(struct file *filp)
pid_t f_getown(struct file *filp)
{
pid_t pid = 0;
- read_lock(&filp->f_owner.lock);
+
+ read_lock_irq(&filp->f_owner.lock);
rcu_read_lock();
if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) {
pid = pid_vnr(filp->f_owner.pid);
@@ -158,7 +159,7 @@ pid_t f_getown(struct file *filp)
pid = -pid;
}
rcu_read_unlock();
- read_unlock(&filp->f_owner.lock);
+ read_unlock_irq(&filp->f_owner.lock);
return pid;
}
@@ -208,7 +209,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
struct f_owner_ex owner = {};
int ret = 0;
- read_lock(&filp->f_owner.lock);
+ read_lock_irq(&filp->f_owner.lock);
rcu_read_lock();
if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type))
owner.pid = pid_vnr(filp->f_owner.pid);
@@ -231,7 +232,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
ret = -EINVAL;
break;
}
- read_unlock(&filp->f_owner.lock);
+ read_unlock_irq(&filp->f_owner.lock);
if (!ret) {
ret = copy_to_user(owner_p, &owner, sizeof(owner));
@@ -249,10 +250,10 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
uid_t src[2];
int err;
- read_lock(&filp->f_owner.lock);
+ read_lock_irq(&filp->f_owner.lock);
src[0] = from_kuid(user_ns, filp->f_owner.uid);
src[1] = from_kuid(user_ns, filp->f_owner.euid);
- read_unlock(&filp->f_owner.lock);
+ read_unlock_irq(&filp->f_owner.lock);
err = put_user(src[0], &dst[0]);
err |= put_user(src[1], &dst[1]);
@@ -1003,13 +1004,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
{
while (fa) {
struct fown_struct *fown;
+ unsigned long flags;
if (fa->magic != FASYNC_MAGIC) {
printk(KERN_ERR "kill_fasync: bad magic number in "
"fasync_struct!\n");
return;
}
- read_lock(&fa->fa_lock);
+ read_lock_irqsave(&fa->fa_lock, flags);
if (fa->fa_file) {
fown = &fa->fa_file->f_owner;
/* Don't send SIGURG to processes which have not set a
@@ -1018,7 +1020,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
if (!(sig == SIGURG && fown->signum == 0))
send_sigio(fown, fa->fa_fd, band);
}
- read_unlock(&fa->fa_lock);
+ read_unlock_irqrestore(&fa->fa_lock, flags);
fa = rcu_dereference(fa->fa_next);
}
}
diff --git a/fs/file.c b/fs/file.c
index 86dc9956af32..d8afa8266859 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -596,18 +596,32 @@ void fd_install(unsigned int fd, struct file *file)
EXPORT_SYMBOL(fd_install);
+/**
+ * pick_file - return file associatd with fd
+ * @files: file struct to retrieve file from
+ * @fd: file descriptor to retrieve file for
+ *
+ * If this functions returns an EINVAL error pointer the fd was beyond the
+ * current maximum number of file descriptors for that fdtable.
+ *
+ * Returns: The file associated with @fd, on error returns an error pointer.
+ */
static struct file *pick_file(struct files_struct *files, unsigned fd)
{
- struct file *file = NULL;
+ struct file *file;
struct fdtable *fdt;
spin_lock(&files->file_lock);
fdt = files_fdtable(files);
- if (fd >= fdt->max_fds)
+ if (fd >= fdt->max_fds) {
+ file = ERR_PTR(-EINVAL);
goto out_unlock;
+ }
file = fdt->fd[fd];
- if (!file)
+ if (!file) {
+ file = ERR_PTR(-EBADF);
goto out_unlock;
+ }
rcu_assign_pointer(fdt->fd[fd], NULL);
__put_unused_fd(files, fd);
@@ -622,7 +636,7 @@ int close_fd(unsigned fd)
struct file *file;
file = pick_file(files, fd);
- if (!file)
+ if (IS_ERR(file))
return -EBADF;
return filp_close(file, files);
@@ -663,11 +677,16 @@ static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
struct file *file;
file = pick_file(cur_fds, fd++);
- if (!file)
+ if (!IS_ERR(file)) {
+ /* found a valid file to close */
+ filp_close(file, cur_fds);
+ cond_resched();
continue;
+ }
- filp_close(file, cur_fds);
- cond_resched();
+ /* beyond the last fd in that table */
+ if (PTR_ERR(file) == -EINVAL)
+ return;
}
}
@@ -682,7 +701,6 @@ static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
*/
int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
{
- unsigned int cur_max;
struct task_struct *me = current;
struct files_struct *cur_fds = me->files, *fds = NULL;
@@ -692,26 +710,26 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
if (fd > max_fd)
return -EINVAL;
- rcu_read_lock();
- cur_max = files_fdtable(cur_fds)->max_fds;
- rcu_read_unlock();
-
- /* cap to last valid index into fdtable */
- cur_max--;
-
if (flags & CLOSE_RANGE_UNSHARE) {
int ret;
unsigned int max_unshare_fds = NR_OPEN_MAX;
/*
- * If the requested range is greater than the current maximum,
- * we're closing everything so only copy all file descriptors
- * beneath the lowest file descriptor.
- * If the caller requested all fds to be made cloexec copy all
- * of the file descriptors since they still want to use them.
+ * If the caller requested all fds to be made cloexec we always
+ * copy all of the file descriptors since they still want to
+ * use them.
*/
- if (!(flags & CLOSE_RANGE_CLOEXEC) && (max_fd >= cur_max))
- max_unshare_fds = fd;
+ if (!(flags & CLOSE_RANGE_CLOEXEC)) {
+ /*
+ * If the requested range is greater than the current
+ * maximum, we're closing everything so only copy all
+ * file descriptors beneath the lowest file descriptor.
+ */
+ rcu_read_lock();
+ if (max_fd >= last_fd(files_fdtable(cur_fds)))
+ max_unshare_fds = fd;
+ rcu_read_unlock();
+ }
ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds);
if (ret)
@@ -725,8 +743,6 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
swap(cur_fds, fds);
}
- max_fd = min(max_fd, cur_max);
-
if (flags & CLOSE_RANGE_CLOEXEC)
__range_cloexec(cur_fds, fd, max_fd);
else
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 4c3370548982..eb57dade6076 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2730,23 +2730,6 @@ int write_inode_now(struct inode *inode, int sync)
EXPORT_SYMBOL(write_inode_now);
/**
- * sync_inode - write an inode and its pages to disk.
- * @inode: the inode to sync
- * @wbc: controls the writeback mode
- *
- * sync_inode() will write an inode and its pages to disk. It will also
- * correctly update the inode on its superblock's dirty inode lists and will
- * update inode->i_state.
- *
- * The caller must have a ref on the inode.
- */
-int sync_inode(struct inode *inode, struct writeback_control *wbc)
-{
- return writeback_single_inode(inode, wbc);
-}
-EXPORT_SYMBOL(sync_inode);
-
-/**
* sync_inode_metadata - write an inode to disk
* @inode: the inode to sync
* @wait: wait for I/O to complete.
@@ -2762,6 +2745,6 @@ int sync_inode_metadata(struct inode *inode, int wait)
.nr_to_write = 0, /* metadata-only */
};
- return sync_inode(inode, &wbc);
+ return writeback_single_inode(inode, &wbc);
}
EXPORT_SYMBOL(sync_inode_metadata);
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index e55723744f58..281d79f8b3d3 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -444,12 +444,12 @@ static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos,
/*
* Can't do inline reclaim in fault path. We call
* dax_layout_busy_page() before we free a range. And
- * fuse_wait_dax_page() drops fi->i_mmap_sem lock and requires it.
- * In fault path we enter with fi->i_mmap_sem held and can't drop
- * it. Also in fault path we hold fi->i_mmap_sem shared and not
- * exclusive, so that creates further issues with fuse_wait_dax_page().
- * Hence return -EAGAIN and fuse_dax_fault() will wait for a memory
- * range to become free and retry.
+ * fuse_wait_dax_page() drops mapping->invalidate_lock and requires it.
+ * In fault path we enter with mapping->invalidate_lock held and can't
+ * drop it. Also in fault path we hold mapping->invalidate_lock shared
+ * and not exclusive, so that creates further issues with
+ * fuse_wait_dax_page(). Hence return -EAGAIN and fuse_dax_fault()
+ * will wait for a memory range to become free and retry.
*/
if (flags & IOMAP_FAULT) {
alloc_dmap = alloc_dax_mapping(fcd);
@@ -513,7 +513,7 @@ static int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos,
down_write(&fi->dax->sem);
node = interval_tree_iter_first(&fi->dax->tree, idx, idx);
- /* We are holding either inode lock or i_mmap_sem, and that should
+ /* We are holding either inode lock or invalidate_lock, and that should
* ensure that dmap can't be truncated. We are holding a reference
* on dmap and that should make sure it can't be reclaimed. So dmap
* should still be there in tree despite the fact we dropped and
@@ -660,14 +660,12 @@ static const struct iomap_ops fuse_iomap_ops = {
static void fuse_wait_dax_page(struct inode *inode)
{
- struct fuse_inode *fi = get_fuse_inode(inode);
-
- up_write(&fi->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
schedule();
- down_write(&fi->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
}
-/* Should be called with fi->i_mmap_sem lock held exclusively */
+/* Should be called with mapping->invalidate_lock held exclusively */
static int __fuse_dax_break_layouts(struct inode *inode, bool *retry,
loff_t start, loff_t end)
{
@@ -813,18 +811,18 @@ retry:
* we do not want any read/write/mmap to make progress and try
* to populate page cache or access memory we are trying to free.
*/
- down_read(&get_fuse_inode(inode)->i_mmap_sem);
+ filemap_invalidate_lock_shared(inode->i_mapping);
ret = dax_iomap_fault(vmf, pe_size, &pfn, &error, &fuse_iomap_ops);
if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) {
error = 0;
retry = true;
- up_read(&get_fuse_inode(inode)->i_mmap_sem);
+ filemap_invalidate_unlock_shared(inode->i_mapping);
goto retry;
}
if (ret & VM_FAULT_NEEDDSYNC)
ret = dax_finish_sync_fault(vmf, pe_size, pfn);
- up_read(&get_fuse_inode(inode)->i_mmap_sem);
+ filemap_invalidate_unlock_shared(inode->i_mapping);
if (write)
sb_end_pagefault(sb);
@@ -960,7 +958,7 @@ inode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode,
int ret;
struct interval_tree_node *node;
- down_write(&fi->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
/* Lookup a dmap and corresponding file offset to reclaim. */
down_read(&fi->dax->sem);
@@ -1021,7 +1019,7 @@ inode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode,
out_write_dmap_sem:
up_write(&fi->dax->sem);
out_mmap_sem:
- up_write(&fi->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
return dmap;
}
@@ -1050,10 +1048,10 @@ alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode)
* had a reference or some other temporary failure,
* Try again. We want to give up inline reclaim only
* if there is no range assigned to this node. Otherwise
- * if a deadlock is possible if we sleep with fi->i_mmap_sem
- * held and worker to free memory can't make progress due
- * to unavailability of fi->i_mmap_sem lock. So sleep
- * only if fi->dax->nr=0
+ * if a deadlock is possible if we sleep with
+ * mapping->invalidate_lock held and worker to free memory
+ * can't make progress due to unavailability of
+ * mapping->invalidate_lock. So sleep only if fi->dax->nr=0
*/
if (retry)
continue;
@@ -1061,8 +1059,8 @@ alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode)
* There are no mappings which can be reclaimed. Wait for one.
* We are not holding fi->dax->sem. So it is possible
* that range gets added now. But as we are not holding
- * fi->i_mmap_sem, worker should still be able to free up
- * a range and wake us up.
+ * mapping->invalidate_lock, worker should still be able to
+ * free up a range and wake us up.
*/
if (!fi->dax->nr && !(fcd->nr_free_ranges > 0)) {
if (wait_event_killable_exclusive(fcd->range_waitq,
@@ -1108,7 +1106,7 @@ static int lookup_and_reclaim_dmap_locked(struct fuse_conn_dax *fcd,
/*
* Free a range of memory.
* Locking:
- * 1. Take fi->i_mmap_sem to block dax faults.
+ * 1. Take mapping->invalidate_lock to block dax faults.
* 2. Take fi->dax->sem to protect interval tree and also to make sure
* read/write can not reuse a dmap which we might be freeing.
*/
@@ -1122,7 +1120,7 @@ static int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd,
loff_t dmap_start = start_idx << FUSE_DAX_SHIFT;
loff_t dmap_end = (dmap_start + FUSE_DAX_SZ) - 1;
- down_write(&fi->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end);
if (ret) {
pr_debug("virtio_fs: fuse_dax_break_layouts() failed. err=%d\n",
@@ -1134,7 +1132,7 @@ static int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd,
ret = lookup_and_reclaim_dmap_locked(fcd, inode, start_idx);
up_write(&fi->dax->sem);
out_mmap_sem:
- up_write(&fi->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
return ret;
}
@@ -1235,8 +1233,6 @@ void fuse_dax_conn_free(struct fuse_conn *fc)
static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
{
long nr_pages, nr_ranges;
- void *kaddr;
- pfn_t pfn;
struct fuse_dax_mapping *range;
int ret, id;
size_t dax_size = -1;
@@ -1248,8 +1244,8 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker);
id = dax_read_lock();
- nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), &kaddr,
- &pfn);
+ nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), NULL,
+ NULL);
dax_read_unlock(id);
if (nr_pages < 0) {
pr_debug("dax_direct_access() returned %ld\n", nr_pages);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index eade6f965b2e..d9b977c0f38d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1556,6 +1556,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_conn *fc = fm->fc;
struct fuse_inode *fi = get_fuse_inode(inode);
+ struct address_space *mapping = inode->i_mapping;
FUSE_ARGS(args);
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
@@ -1580,11 +1581,11 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
}
if (FUSE_IS_DAX(inode) && is_truncate) {
- down_write(&fi->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
fault_blocked = true;
err = fuse_dax_break_layouts(inode, 0, 0);
if (err) {
- up_write(&fi->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
return err;
}
}
@@ -1694,13 +1695,13 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
if ((is_truncate || !is_wb) &&
S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
truncate_pagecache(inode, outarg.attr.size);
- invalidate_inode_pages2(inode->i_mapping);
+ invalidate_inode_pages2(mapping);
}
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
out:
if (fault_blocked)
- up_write(&fi->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
return 0;
@@ -1711,7 +1712,7 @@ error:
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
if (fault_blocked)
- up_write(&fi->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
return err;
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 97f860cfc195..621a662c19fb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -243,7 +243,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
}
if (dax_truncate) {
- down_write(&get_fuse_inode(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
err = fuse_dax_break_layouts(inode, 0, 0);
if (err)
goto out;
@@ -255,7 +255,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
out:
if (dax_truncate)
- up_write(&get_fuse_inode(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
if (is_wb_truncate | dax_truncate) {
fuse_release_nowrite(inode);
@@ -2920,7 +2920,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (lock_inode) {
inode_lock(inode);
if (block_faults) {
- down_write(&fi->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
err = fuse_dax_break_layouts(inode, 0, 0);
if (err)
goto out;
@@ -2976,7 +2976,7 @@ out:
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
if (block_faults)
- up_write(&fi->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
if (lock_inode)
inode_unlock(inode);
@@ -3045,7 +3045,7 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
* modifications. Yet this does give less guarantees than if the
* copying was performed with write(2).
*
- * To fix this a i_mmap_sem style lock could be used to prevent new
+ * To fix this a mapping->invalidate_lock could be used to prevent new
* faults while the copy is ongoing.
*/
err = fuse_writeback_range(inode_out, pos_out, pos_out + len - 1);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 07829ce78695..6fb639b97ea8 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -149,13 +149,6 @@ struct fuse_inode {
/** Lock to protect write related fields */
spinlock_t lock;
- /**
- * Can't take inode lock in fault path (leads to circular dependency).
- * Introduce another semaphore which can be taken in fault path and
- * then other filesystem paths can take this to block faults.
- */
- struct rw_semaphore i_mmap_sem;
-
#ifdef CONFIG_FUSE_DAX
/*
* Dax specific inode data
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index b9beb39a4a18..e07e429f32e1 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -85,7 +85,6 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi->orig_ino = 0;
fi->state = 0;
mutex_init(&fi->mutex);
- init_rwsem(&fi->i_mmap_sem);
spin_lock_init(&fi->lock);
fi->forget = fuse_alloc_forget();
if (!fi->forget)
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 81d8f064126e..005e920f5d4a 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -574,10 +574,9 @@ void adjust_fs_space(struct inode *inode)
{
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
- struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
- struct buffer_head *m_bh, *l_bh;
+ struct buffer_head *m_bh;
u64 fs_total, new_free;
if (gfs2_trans_begin(sdp, 2 * RES_STATFS, 0) != 0)
@@ -600,11 +599,7 @@ void adjust_fs_space(struct inode *inode)
(unsigned long long)new_free);
gfs2_statfs_change(sdp, new_free, new_free, 0);
- if (gfs2_meta_inode_buffer(l_ip, &l_bh) != 0)
- goto out2;
- update_statfs(sdp, m_bh, l_bh);
- brelse(l_bh);
-out2:
+ update_statfs(sdp, m_bh);
brelse(m_bh);
out:
sdp->sd_rindex_uptodate = 0;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index ed8b67b21718..5414c2c33580 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1002,7 +1002,7 @@ static void gfs2_write_unlock(struct inode *inode)
}
static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
- unsigned len, struct iomap *iomap)
+ unsigned len)
{
unsigned int blockmask = i_blocksize(inode) - 1;
struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -1013,8 +1013,7 @@ static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
}
static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
- unsigned copied, struct page *page,
- struct iomap *iomap)
+ unsigned copied, struct page *page)
{
struct gfs2_trans *tr = current->journal_info;
struct gfs2_inode *ip = GFS2_I(inode);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 84ec053d43b4..c559827cb6f9 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1237,9 +1237,6 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
if (!(fl->fl_flags & FL_POSIX))
return -ENOLCK;
- if (__mandatory_lock(&ip->i_inode) && fl->fl_type != F_UNLCK)
- return -ENOLCK;
-
if (cmd == F_CANCELLK) {
/* Hack: */
cmd = F_SETLK;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1f3902ecdded..e0eaa9cf9fb6 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1494,12 +1494,11 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
list_del_init(&gh->gh_list);
clear_bit(HIF_HOLDER, &gh->gh_iflags);
- if (find_first_holder(gl) == NULL) {
- if (list_empty(&gl->gl_holders) &&
- !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
- !test_bit(GLF_DEMOTE, &gl->gl_flags))
- fast_path = 1;
- }
+ if (list_empty(&gl->gl_holders) &&
+ !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
+ !test_bit(GLF_DEMOTE, &gl->gl_flags))
+ fast_path = 1;
+
if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
gfs2_glock_add_to_lru(gl);
@@ -2077,8 +2076,6 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
*p++ = 'H';
if (test_bit(HIF_WAIT, &iflags))
*p++ = 'W';
- if (test_bit(HIF_FIRST, &iflags))
- *p++ = 'F';
*p = 0;
return buf;
}
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 54d3fbeb3002..79c621c7863d 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -33,16 +33,18 @@ extern struct workqueue_struct *gfs2_control_wq;
static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
{
- fs_err(gl->gl_name.ln_sbd,
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+ fs_err(sdp,
"AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page "
"state 0x%lx\n",
bh, (unsigned long long)bh->b_blocknr, bh->b_state,
bh->b_page->mapping, bh->b_page->flags);
- fs_err(gl->gl_name.ln_sbd, "AIL glock %u:%llu mapping %p\n",
+ fs_err(sdp, "AIL glock %u:%llu mapping %p\n",
gl->gl_name.ln_type, gl->gl_name.ln_number,
gfs2_glock2aspace(gl));
- gfs2_lm(gl->gl_name.ln_sbd, "AIL error\n");
- gfs2_withdraw(gl->gl_name.ln_sbd);
+ gfs2_lm(sdp, "AIL error\n");
+ gfs2_withdraw_delayed(sdp);
}
/**
@@ -610,16 +612,13 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl)
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
- if (error)
- gfs2_consist(sdp);
- if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
- gfs2_consist(sdp);
-
- /* Initialize some head of the log stuff */
- if (!gfs2_withdrawn(sdp)) {
- sdp->sd_log_sequence = head.lh_sequence + 1;
- gfs2_log_pointers_init(sdp, head.lh_blkno);
- }
+ if (gfs2_assert_withdraw_delayed(sdp, !error))
+ return error;
+ if (gfs2_assert_withdraw_delayed(sdp, head.lh_flags &
+ GFS2_LOG_HEAD_UNMOUNT))
+ return -EIO;
+ sdp->sd_log_sequence = head.lh_sequence + 1;
+ gfs2_log_pointers_init(sdp, head.lh_blkno);
}
return 0;
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index e6f820f146cb..0fe49770166e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -253,7 +253,6 @@ struct gfs2_lkstats {
enum {
/* States */
HIF_HOLDER = 6, /* Set for gh that "holds" the glock */
- HIF_FIRST = 7,
HIF_WAIT = 10,
};
@@ -768,6 +767,7 @@ struct gfs2_sbd {
struct gfs2_glock *sd_jinode_gl;
struct gfs2_holder sd_sc_gh;
+ struct buffer_head *sd_sc_bh;
struct gfs2_holder sd_qc_gh;
struct completion sd_journal_ready;
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index dac040162ecc..50578f881e6d 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -299,6 +299,11 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_update_request_times(gl);
+ /* don't want to call dlm if we've unmounted the lock protocol */
+ if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) {
+ gfs2_glock_free(gl);
+ return;
+ }
/* don't want to skip dlm_unlock writing the lvb when lock has one */
if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 42c15cfc0821..f0ee3ff6f9a8 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -594,7 +594,7 @@ void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
{
unsigned int blks = tr->tr_reserved;
unsigned int revokes = tr->tr_revokes;
- unsigned int revoke_blks = 0;
+ unsigned int revoke_blks;
*extra_revokes = 0;
if (revokes) {
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 8ee05d25dfa6..ca0bb3a73912 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -761,6 +761,32 @@ static void buf_lo_before_scan(struct gfs2_jdesc *jd,
jd->jd_replayed_blocks = 0;
}
+#define obsolete_rgrp_replay \
+"Replaying 0x%llx from jid=%d/0x%llx but we already have a bh!\n"
+#define obsolete_rgrp_replay2 \
+"busy:%d, pinned:%d rg_gen:0x%llx, j_gen:0x%llx\n"
+
+static void obsolete_rgrp(struct gfs2_jdesc *jd, struct buffer_head *bh_log,
+ u64 blkno)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+ struct gfs2_rgrpd *rgd;
+ struct gfs2_rgrp *jrgd = (struct gfs2_rgrp *)bh_log->b_data;
+
+ rgd = gfs2_blk2rgrpd(sdp, blkno, false);
+ if (rgd && rgd->rd_addr == blkno &&
+ rgd->rd_bits && rgd->rd_bits->bi_bh) {
+ fs_info(sdp, obsolete_rgrp_replay, (unsigned long long)blkno,
+ jd->jd_jid, bh_log->b_blocknr);
+ fs_info(sdp, obsolete_rgrp_replay2,
+ buffer_busy(rgd->rd_bits->bi_bh) ? 1 : 0,
+ buffer_pinned(rgd->rd_bits->bi_bh),
+ rgd->rd_igeneration,
+ be64_to_cpu(jrgd->rg_igeneration));
+ gfs2_dump_glock(NULL, rgd->rd_gl, true);
+ }
+}
+
static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
struct gfs2_log_descriptor *ld, __be64 *ptr,
int pass)
@@ -799,21 +825,9 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
struct gfs2_meta_header *mh =
(struct gfs2_meta_header *)bh_ip->b_data;
- if (mh->mh_type == cpu_to_be32(GFS2_METATYPE_RG)) {
- struct gfs2_rgrpd *rgd;
-
- rgd = gfs2_blk2rgrpd(sdp, blkno, false);
- if (rgd && rgd->rd_addr == blkno &&
- rgd->rd_bits && rgd->rd_bits->bi_bh) {
- fs_info(sdp, "Replaying 0x%llx but we "
- "already have a bh!\n",
- (unsigned long long)blkno);
- fs_info(sdp, "busy:%d, pinned:%d\n",
- buffer_busy(rgd->rd_bits->bi_bh) ? 1 : 0,
- buffer_pinned(rgd->rd_bits->bi_bh));
- gfs2_dump_glock(NULL, rgd->rd_gl, true);
- }
- }
+ if (mh->mh_type == cpu_to_be32(GFS2_METATYPE_RG))
+ obsolete_rgrp(jd, bh_log, blkno);
+
mark_buffer_dirty(bh_ip);
}
brelse(bh_log);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 7c9619997355..72d30a682ece 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -258,8 +258,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
struct buffer_head *bh, *bhs[2];
int num = 0;
- if (unlikely(gfs2_withdrawn(sdp)) &&
- (!sdp->sd_jdesc || gl != sdp->sd_jinode_gl)) {
+ if (unlikely(gfs2_withdrawn(sdp)) && !gfs2_withdraw_in_prog(sdp)) {
*bhp = NULL;
return -EIO;
}
@@ -317,7 +316,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
- if (unlikely(gfs2_withdrawn(sdp)))
+ if (unlikely(gfs2_withdrawn(sdp)) && !gfs2_withdraw_in_prog(sdp))
return -EIO;
wait_on_buffer(bh);
@@ -328,7 +327,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
gfs2_io_error_bh_wd(sdp, bh);
return -EIO;
}
- if (unlikely(gfs2_withdrawn(sdp)))
+ if (unlikely(gfs2_withdrawn(sdp)) && !gfs2_withdraw_in_prog(sdp))
return -EIO;
return 0;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 5f4504dd0875..7f8410d8fdc1 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -614,6 +614,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
break;
}
+ d_mark_dontcache(jd->jd_inode);
spin_lock(&sdp->sd_jindex_spin);
jd->jd_jid = sdp->sd_journals++;
jip = GFS2_I(jd->jd_inode);
@@ -677,6 +678,7 @@ static int init_statfs(struct gfs2_sbd *sdp)
error = PTR_ERR(lsi->si_sc_inode);
fs_err(sdp, "can't find local \"sc\" file#%u: %d\n",
jd->jd_jid, error);
+ kfree(lsi);
goto free_local;
}
lsi->si_jid = jd->jd_jid;
@@ -695,8 +697,16 @@ static int init_statfs(struct gfs2_sbd *sdp)
fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
goto free_local;
}
+ /* read in the local statfs buffer - other nodes don't change it. */
+ error = gfs2_meta_inode_buffer(ip, &sdp->sd_sc_bh);
+ if (error) {
+ fs_err(sdp, "Cannot read in local statfs: %d\n", error);
+ goto unlock_sd_gh;
+ }
return 0;
+unlock_sd_gh:
+ gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
free_local:
free_local_statfs_inodes(sdp);
iput(pn);
@@ -710,6 +720,7 @@ out:
static void uninit_statfs(struct gfs2_sbd *sdp)
{
if (!sdp->sd_args.ar_spectator) {
+ brelse(sdp->sd_sc_bh);
gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
free_local_statfs_inodes(sdp);
}
@@ -1088,6 +1099,34 @@ void gfs2_online_uevent(struct gfs2_sbd *sdp)
kobject_uevent_env(&sdp->sd_kobj, KOBJ_ONLINE, envp);
}
+static int init_threads(struct gfs2_sbd *sdp)
+{
+ struct task_struct *p;
+ int error = 0;
+
+ p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
+ if (IS_ERR(p)) {
+ error = PTR_ERR(p);
+ fs_err(sdp, "can't start logd thread: %d\n", error);
+ return error;
+ }
+ sdp->sd_logd_process = p;
+
+ p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
+ if (IS_ERR(p)) {
+ error = PTR_ERR(p);
+ fs_err(sdp, "can't start quotad thread: %d\n", error);
+ goto fail;
+ }
+ sdp->sd_quotad_process = p;
+ return 0;
+
+fail:
+ kthread_stop(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
+ return error;
+}
+
/**
* gfs2_fill_super - Read in superblock
* @sb: The VFS superblock
@@ -1216,6 +1255,14 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
goto fail_per_node;
}
+ if (!sb_rdonly(sb)) {
+ error = init_threads(sdp);
+ if (error) {
+ gfs2_withdraw_delayed(sdp);
+ goto fail_per_node;
+ }
+ }
+
error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
if (error)
goto fail_per_node;
@@ -1225,6 +1272,12 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
gfs2_freeze_unlock(&freeze_gh);
if (error) {
+ if (sdp->sd_quotad_process)
+ kthread_stop(sdp->sd_quotad_process);
+ sdp->sd_quotad_process = NULL;
+ if (sdp->sd_logd_process)
+ kthread_stop(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
fs_err(sdp, "can't make FS RW: %d\n", error);
goto fail_per_node;
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4d4ceb0b6903..6e00d15ef0a8 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -119,34 +119,6 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd)
return 0;
}
-static int init_threads(struct gfs2_sbd *sdp)
-{
- struct task_struct *p;
- int error = 0;
-
- p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
- if (IS_ERR(p)) {
- error = PTR_ERR(p);
- fs_err(sdp, "can't start logd thread: %d\n", error);
- return error;
- }
- sdp->sd_logd_process = p;
-
- p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
- if (IS_ERR(p)) {
- error = PTR_ERR(p);
- fs_err(sdp, "can't start quotad thread: %d\n", error);
- goto fail;
- }
- sdp->sd_quotad_process = p;
- return 0;
-
-fail:
- kthread_stop(sdp->sd_logd_process);
- sdp->sd_logd_process = NULL;
- return error;
-}
-
/**
* gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
* @sdp: the filesystem
@@ -161,26 +133,17 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
struct gfs2_log_header_host head;
int error;
- error = init_threads(sdp);
- if (error) {
- gfs2_withdraw_delayed(sdp);
- return error;
- }
-
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
- if (gfs2_withdrawn(sdp)) {
- error = -EIO;
- goto fail;
- }
+ if (gfs2_withdrawn(sdp))
+ return -EIO;
error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
if (error || gfs2_withdrawn(sdp))
- goto fail;
+ return error;
if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
gfs2_consist(sdp);
- error = -EIO;
- goto fail;
+ return -EIO;
}
/* Initialize some head of the log stuff */
@@ -188,20 +151,8 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
gfs2_log_pointers_init(sdp, head.lh_blkno);
error = gfs2_quota_init(sdp);
- if (error || gfs2_withdrawn(sdp))
- goto fail;
-
- set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
-
- return 0;
-
-fail:
- if (sdp->sd_quotad_process)
- kthread_stop(sdp->sd_quotad_process);
- sdp->sd_quotad_process = NULL;
- if (sdp->sd_logd_process)
- kthread_stop(sdp->sd_logd_process);
- sdp->sd_logd_process = NULL;
+ if (!error && !gfs2_withdrawn(sdp))
+ set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
return error;
}
@@ -227,9 +178,8 @@ int gfs2_statfs_init(struct gfs2_sbd *sdp)
{
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
- struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
- struct buffer_head *m_bh, *l_bh;
+ struct buffer_head *m_bh;
struct gfs2_holder gh;
int error;
@@ -248,21 +198,15 @@ int gfs2_statfs_init(struct gfs2_sbd *sdp)
sizeof(struct gfs2_dinode));
spin_unlock(&sdp->sd_statfs_spin);
} else {
- error = gfs2_meta_inode_buffer(l_ip, &l_bh);
- if (error)
- goto out_m_bh;
-
spin_lock(&sdp->sd_statfs_spin);
gfs2_statfs_change_in(m_sc, m_bh->b_data +
sizeof(struct gfs2_dinode));
- gfs2_statfs_change_in(l_sc, l_bh->b_data +
+ gfs2_statfs_change_in(l_sc, sdp->sd_sc_bh->b_data +
sizeof(struct gfs2_dinode));
spin_unlock(&sdp->sd_statfs_spin);
- brelse(l_bh);
}
-out_m_bh:
brelse(m_bh);
out:
gfs2_glock_dq_uninit(&gh);
@@ -275,22 +219,17 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
- struct buffer_head *l_bh;
s64 x, y;
int need_sync = 0;
- int error;
-
- error = gfs2_meta_inode_buffer(l_ip, &l_bh);
- if (error)
- return;
- gfs2_trans_add_meta(l_ip->i_gl, l_bh);
+ gfs2_trans_add_meta(l_ip->i_gl, sdp->sd_sc_bh);
spin_lock(&sdp->sd_statfs_spin);
l_sc->sc_total += total;
l_sc->sc_free += free;
l_sc->sc_dinodes += dinodes;
- gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode));
+ gfs2_statfs_change_out(l_sc, sdp->sd_sc_bh->b_data +
+ sizeof(struct gfs2_dinode));
if (sdp->sd_args.ar_statfs_percent) {
x = 100 * l_sc->sc_free;
y = m_sc->sc_free * sdp->sd_args.ar_statfs_percent;
@@ -299,20 +238,18 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
}
spin_unlock(&sdp->sd_statfs_spin);
- brelse(l_bh);
if (need_sync)
gfs2_wake_up_statfs(sdp);
}
-void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
- struct buffer_head *l_bh)
+void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh)
{
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
- gfs2_trans_add_meta(l_ip->i_gl, l_bh);
+ gfs2_trans_add_meta(l_ip->i_gl, sdp->sd_sc_bh);
gfs2_trans_add_meta(m_ip->i_gl, m_bh);
spin_lock(&sdp->sd_statfs_spin);
@@ -320,7 +257,7 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
m_sc->sc_free += l_sc->sc_free;
m_sc->sc_dinodes += l_sc->sc_dinodes;
memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
- memset(l_bh->b_data + sizeof(struct gfs2_dinode),
+ memset(sdp->sd_sc_bh->b_data + sizeof(struct gfs2_dinode),
0, sizeof(struct gfs2_statfs_change));
gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
spin_unlock(&sdp->sd_statfs_spin);
@@ -330,11 +267,10 @@ int gfs2_statfs_sync(struct super_block *sb, int type)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
- struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
struct gfs2_holder gh;
- struct buffer_head *m_bh, *l_bh;
+ struct buffer_head *m_bh;
int error;
error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
@@ -355,21 +291,15 @@ int gfs2_statfs_sync(struct super_block *sb, int type)
}
spin_unlock(&sdp->sd_statfs_spin);
- error = gfs2_meta_inode_buffer(l_ip, &l_bh);
- if (error)
- goto out_bh;
-
error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
if (error)
- goto out_bh2;
+ goto out_bh;
- update_statfs(sdp, m_bh, l_bh);
+ update_statfs(sdp, m_bh);
sdp->sd_statfs_force_sync = 0;
gfs2_trans_end(sdp);
-out_bh2:
- brelse(l_bh);
out_bh:
brelse(m_bh);
out_unlock:
@@ -675,6 +605,7 @@ restart:
gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
if (gfs2_holder_initialized(&sdp->sd_jinode_gh))
gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
+ brelse(sdp->sd_sc_bh);
gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
free_local_statfs_inodes(sdp);
@@ -1016,7 +947,7 @@ static int gfs2_drop_inode(struct inode *inode)
gfs2_glock_hold(gl);
if (!gfs2_queue_delete_work(gl, 0))
gfs2_glock_queue_put(gl);
- return false;
+ return 0;
}
return generic_drop_inode(inode);
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index ec4affb33ed5..58d13fd77aed 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -43,8 +43,7 @@ extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc,
const void *buf);
extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc,
void *buf);
-extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
- struct buffer_head *l_bh);
+extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh);
extern int gfs2_statfs_sync(struct super_block *sb, int type);
extern void gfs2_freeze_func(struct work_struct *work);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index f4325b44956d..cf345a86ef67 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -278,6 +278,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
goto skip_recovery;
}
sdp->sd_jdesc->jd_inode = inode;
+ d_mark_dontcache(inode);
/*
* Now wait until recovery is complete.
@@ -295,7 +296,7 @@ skip_recovery:
fs_warn(sdp, "Journal recovery complete for jid %d.\n",
sdp->sd_lockstruct.ls_jid);
else
- fs_warn(sdp, "Journal recovery skipped for %d until next "
+ fs_warn(sdp, "Journal recovery skipped for jid %d until next "
"mount.\n", sdp->sd_lockstruct.ls_jid);
fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held);
sdp->sd_glock_dqs_held = 0;
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 69e1a0ae5a4d..78ec190f4155 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -218,6 +218,11 @@ static inline bool gfs2_withdrawing(struct gfs2_sbd *sdp)
!test_bit(SDF_WITHDRAWN, &sdp->sd_flags);
}
+static inline bool gfs2_withdraw_in_prog(struct gfs2_sbd *sdp)
+{
+ return test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
+}
+
#define gfs2_tune_get(sdp, field) \
gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
diff --git a/fs/hpfs/Kconfig b/fs/hpfs/Kconfig
index 2b36dc6f0a10..ec975f466877 100644
--- a/fs/hpfs/Kconfig
+++ b/fs/hpfs/Kconfig
@@ -2,6 +2,7 @@
config HPFS_FS
tristate "OS/2 HPFS file system support"
depends on BLOCK
+ select FS_IOMAP
help
OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
is the file system used for organizing files on OS/2 hard disk
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index c3a49aacf20a..fb37f57130aa 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -9,6 +9,7 @@
#include "hpfs_fn.h"
#include <linux/mpage.h>
+#include <linux/iomap.h>
#include <linux/fiemap.h>
#define BLOCKS(size) (((size) + 511) >> 9)
@@ -116,6 +117,47 @@ static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_he
return r;
}
+static int hpfs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ unsigned flags, struct iomap *iomap, struct iomap *srcmap)
+{
+ struct super_block *sb = inode->i_sb;
+ unsigned int blkbits = inode->i_blkbits;
+ unsigned int n_secs;
+ secno s;
+
+ if (WARN_ON_ONCE(flags & (IOMAP_WRITE | IOMAP_ZERO)))
+ return -EINVAL;
+
+ iomap->bdev = inode->i_sb->s_bdev;
+ iomap->offset = offset;
+
+ hpfs_lock(sb);
+ s = hpfs_bmap(inode, offset >> blkbits, &n_secs);
+ if (s) {
+ n_secs = hpfs_search_hotfix_map_for_range(sb, s,
+ min_t(loff_t, n_secs, length));
+ if (unlikely(!n_secs)) {
+ s = hpfs_search_hotfix_map(sb, s);
+ n_secs = 1;
+ }
+ iomap->type = IOMAP_MAPPED;
+ iomap->flags = IOMAP_F_MERGED;
+ iomap->addr = (u64)s << blkbits;
+ iomap->length = (u64)n_secs << blkbits;
+ } else {
+ iomap->type = IOMAP_HOLE;
+ iomap->addr = IOMAP_NULL_ADDR;
+ iomap->length = 1 << blkbits;
+ }
+
+ hpfs_unlock(sb);
+ return 0;
+}
+
+static const struct iomap_ops hpfs_iomap_ops = {
+ .iomap_begin = hpfs_iomap_begin,
+};
+
static int hpfs_readpage(struct file *file, struct page *page)
{
return mpage_readpage(page, hpfs_get_block);
@@ -192,7 +234,14 @@ static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
static int hpfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len)
{
- return generic_block_fiemap(inode, fieinfo, start, len, hpfs_get_block);
+ int ret;
+
+ inode_lock(inode);
+ len = min_t(u64, len, i_size_read(inode));
+ ret = iomap_fiemap(inode, fieinfo, start, len, &hpfs_iomap_ops);
+ inode_unlock(inode);
+
+ return ret;
}
const struct address_space_operations hpfs_aops = {
diff --git a/fs/inode.c b/fs/inode.c
index c93500d84264..84c528cd1955 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -190,6 +190,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
mapping->private_data = NULL;
mapping->writeback_index = 0;
+ __init_rwsem(&mapping->invalidate_lock, "mapping.invalidate_lock",
+ &sb->s_type->invalidate_lock_key);
inode->i_private = NULL;
inode->i_mapping = mapping;
INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */
diff --git a/fs/internal.h b/fs/internal.h
index 82e8eb32ff3d..68a2ae029a27 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -48,8 +48,8 @@ static inline int emergency_thaw_bdev(struct super_block *sb)
/*
* buffer.c
*/
-extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
- get_block_t *get_block, struct iomap *iomap);
+int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
+ get_block_t *get_block, const struct iomap *iomap);
/*
* char_dev.c
@@ -71,11 +71,15 @@ extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
struct path *path, struct path *root);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *);
-long do_rmdir(int dfd, struct filename *name);
-long do_unlinkat(int dfd, struct filename *name);
+int do_rmdir(int dfd, struct filename *name);
+int do_unlinkat(int dfd, struct filename *name);
int may_linkat(struct user_namespace *mnt_userns, struct path *link);
int do_renameat2(int olddfd, struct filename *oldname, int newdfd,
struct filename *newname, unsigned int flags);
+int do_mkdirat(int dfd, struct filename *name, umode_t mode);
+int do_symlinkat(struct filename *from, int newdfd, struct filename *to);
+int do_linkat(int olddfd, struct filename *old, int newdfd,
+ struct filename *new, int flags);
/*
* namespace.c
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 12fc19353bb0..cd9bd095fb1b 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -51,6 +51,10 @@ struct io_worker {
struct completion ref_done;
+ unsigned long create_state;
+ struct callback_head create_work;
+ int create_index;
+
struct rcu_head rcu;
};
@@ -129,7 +133,7 @@ struct io_cb_cancel_data {
bool cancel_all;
};
-static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index);
+static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first);
static void io_wqe_dec_running(struct io_worker *worker);
static bool io_worker_get(struct io_worker *worker)
@@ -174,7 +178,7 @@ static void io_worker_exit(struct io_worker *worker)
complete(&worker->ref_done);
wait_for_completion(&worker->ref_done);
- raw_spin_lock_irq(&wqe->lock);
+ raw_spin_lock(&wqe->lock);
if (worker->flags & IO_WORKER_F_FREE)
hlist_nulls_del_rcu(&worker->nulls_node);
list_del_rcu(&worker->all_list);
@@ -184,7 +188,7 @@ static void io_worker_exit(struct io_worker *worker)
worker->flags = 0;
current->flags &= ~PF_IO_WORKER;
preempt_enable();
- raw_spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock(&wqe->lock);
kfree_rcu(worker, rcu);
io_worker_ref_put(wqe->wq);
@@ -248,18 +252,21 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
rcu_read_unlock();
if (!ret) {
- bool do_create = false;
+ bool do_create = false, first = false;
- raw_spin_lock_irq(&wqe->lock);
+ raw_spin_lock(&wqe->lock);
if (acct->nr_workers < acct->max_workers) {
- atomic_inc(&acct->nr_running);
- atomic_inc(&wqe->wq->worker_refs);
+ if (!acct->nr_workers)
+ first = true;
acct->nr_workers++;
do_create = true;
}
- raw_spin_unlock_irq(&wqe->lock);
- if (do_create)
- create_io_worker(wqe->wq, wqe, acct->index);
+ raw_spin_unlock(&wqe->lock);
+ if (do_create) {
+ atomic_inc(&acct->nr_running);
+ atomic_inc(&wqe->wq->worker_refs);
+ create_io_worker(wqe->wq, wqe, acct->index, first);
+ }
}
}
@@ -270,50 +277,63 @@ static void io_wqe_inc_running(struct io_worker *worker)
atomic_inc(&acct->nr_running);
}
-struct create_worker_data {
- struct callback_head work;
- struct io_wqe *wqe;
- int index;
-};
-
static void create_worker_cb(struct callback_head *cb)
{
- struct create_worker_data *cwd;
+ struct io_worker *worker;
struct io_wq *wq;
struct io_wqe *wqe;
struct io_wqe_acct *acct;
+ bool do_create = false, first = false;
- cwd = container_of(cb, struct create_worker_data, work);
- wqe = cwd->wqe;
+ worker = container_of(cb, struct io_worker, create_work);
+ wqe = worker->wqe;
wq = wqe->wq;
- acct = &wqe->acct[cwd->index];
- raw_spin_lock_irq(&wqe->lock);
- if (acct->nr_workers < acct->max_workers)
+ acct = &wqe->acct[worker->create_index];
+ raw_spin_lock(&wqe->lock);
+ if (acct->nr_workers < acct->max_workers) {
+ if (!acct->nr_workers)
+ first = true;
acct->nr_workers++;
- raw_spin_unlock_irq(&wqe->lock);
- create_io_worker(wq, cwd->wqe, cwd->index);
- kfree(cwd);
+ do_create = true;
+ }
+ raw_spin_unlock(&wqe->lock);
+ if (do_create) {
+ create_io_worker(wq, wqe, worker->create_index, first);
+ } else {
+ atomic_dec(&acct->nr_running);
+ io_worker_ref_put(wq);
+ }
+ clear_bit_unlock(0, &worker->create_state);
+ io_worker_release(worker);
}
-static void io_queue_worker_create(struct io_wqe *wqe, struct io_wqe_acct *acct)
+static void io_queue_worker_create(struct io_wqe *wqe, struct io_worker *worker,
+ struct io_wqe_acct *acct)
{
- struct create_worker_data *cwd;
struct io_wq *wq = wqe->wq;
/* raced with exit, just ignore create call */
if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
goto fail;
+ if (!io_worker_get(worker))
+ goto fail;
+ /*
+ * create_state manages ownership of create_work/index. We should
+ * only need one entry per worker, as the worker going to sleep
+ * will trigger the condition, and waking will clear it once it
+ * runs the task_work.
+ */
+ if (test_bit(0, &worker->create_state) ||
+ test_and_set_bit_lock(0, &worker->create_state))
+ goto fail_release;
- cwd = kmalloc(sizeof(*cwd), GFP_ATOMIC);
- if (cwd) {
- init_task_work(&cwd->work, create_worker_cb);
- cwd->wqe = wqe;
- cwd->index = acct->index;
- if (!task_work_add(wq->task, &cwd->work, TWA_SIGNAL))
- return;
-
- kfree(cwd);
- }
+ init_task_work(&worker->create_work, create_worker_cb);
+ worker->create_index = acct->index;
+ if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL))
+ return;
+ clear_bit_unlock(0, &worker->create_state);
+fail_release:
+ io_worker_release(worker);
fail:
atomic_dec(&acct->nr_running);
io_worker_ref_put(wq);
@@ -331,7 +351,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe)) {
atomic_inc(&acct->nr_running);
atomic_inc(&wqe->wq->worker_refs);
- io_queue_worker_create(wqe, acct);
+ io_queue_worker_create(wqe, worker, acct);
}
}
@@ -404,7 +424,28 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
spin_unlock(&wq->hash->wait.lock);
}
-static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
+/*
+ * We can always run the work if the worker is currently the same type as
+ * the work (eg both are bound, or both are unbound). If they are not the
+ * same, only allow it if incrementing the worker count would be allowed.
+ */
+static bool io_worker_can_run_work(struct io_worker *worker,
+ struct io_wq_work *work)
+{
+ struct io_wqe_acct *acct;
+
+ if (!(worker->flags & IO_WORKER_F_BOUND) !=
+ !(work->flags & IO_WQ_WORK_UNBOUND))
+ return true;
+
+ /* not the same type, check if we'd go over the limit */
+ acct = io_work_get_acct(worker->wqe, work);
+ return acct->nr_workers < acct->max_workers;
+}
+
+static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
+ struct io_worker *worker,
+ bool *stalled)
__must_hold(wqe->lock)
{
struct io_wq_work_node *node, *prev;
@@ -416,6 +457,9 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
work = container_of(node, struct io_wq_work, list);
+ if (!io_worker_can_run_work(worker, work))
+ break;
+
/* not hashed, can run anytime */
if (!io_wq_is_hashed(work)) {
wq_list_del(&wqe->work_list, node, prev);
@@ -442,6 +486,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
raw_spin_unlock(&wqe->lock);
io_wait_on_hash(wqe, stall_hash);
raw_spin_lock(&wqe->lock);
+ *stalled = true;
}
return NULL;
@@ -465,9 +510,9 @@ static void io_assign_current_work(struct io_worker *worker,
cond_resched();
}
- spin_lock_irq(&worker->lock);
+ spin_lock(&worker->lock);
worker->cur_work = work;
- spin_unlock_irq(&worker->lock);
+ spin_unlock(&worker->lock);
}
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
@@ -481,6 +526,7 @@ static void io_worker_handle_work(struct io_worker *worker)
do {
struct io_wq_work *work;
+ bool stalled;
get_next:
/*
* If we got some work, mark us as busy. If we didn't, but
@@ -489,13 +535,14 @@ get_next:
* can't make progress, any work completion or insertion will
* clear the stalled flag.
*/
- work = io_get_next_work(wqe);
+ stalled = false;
+ work = io_get_next_work(wqe, worker, &stalled);
if (work)
__io_worker_busy(wqe, worker, work);
- else if (!wq_list_empty(&wqe->work_list))
+ else if (stalled)
wqe->flags |= IO_WQE_FLAG_STALLED;
- raw_spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock(&wqe->lock);
if (!work)
break;
io_assign_current_work(worker, work);
@@ -527,16 +574,16 @@ get_next:
clear_bit(hash, &wq->hash->map);
if (wq_has_sleeper(&wq->hash->wait))
wake_up(&wq->hash->wait);
- raw_spin_lock_irq(&wqe->lock);
+ raw_spin_lock(&wqe->lock);
wqe->flags &= ~IO_WQE_FLAG_STALLED;
/* skip unnecessary unlock-lock wqe->lock */
if (!work)
goto get_next;
- raw_spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock(&wqe->lock);
}
} while (work);
- raw_spin_lock_irq(&wqe->lock);
+ raw_spin_lock(&wqe->lock);
} while (1);
}
@@ -557,13 +604,13 @@ static int io_wqe_worker(void *data)
set_current_state(TASK_INTERRUPTIBLE);
loop:
- raw_spin_lock_irq(&wqe->lock);
+ raw_spin_lock(&wqe->lock);
if (io_wqe_run_queue(wqe)) {
io_worker_handle_work(worker);
goto loop;
}
__io_worker_idle(wqe, worker);
- raw_spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock(&wqe->lock);
if (io_flush_signals())
continue;
ret = schedule_timeout(WORKER_IDLE_TIMEOUT);
@@ -582,7 +629,7 @@ loop:
}
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
- raw_spin_lock_irq(&wqe->lock);
+ raw_spin_lock(&wqe->lock);
io_worker_handle_work(worker);
}
@@ -624,12 +671,12 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
worker->flags &= ~IO_WORKER_F_RUNNING;
- raw_spin_lock_irq(&worker->wqe->lock);
+ raw_spin_lock(&worker->wqe->lock);
io_wqe_dec_running(worker);
- raw_spin_unlock_irq(&worker->wqe->lock);
+ raw_spin_unlock(&worker->wqe->lock);
}
-static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
+static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first)
{
struct io_wqe_acct *acct = &wqe->acct[index];
struct io_worker *worker;
@@ -652,9 +699,9 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
kfree(worker);
fail:
atomic_dec(&acct->nr_running);
- raw_spin_lock_irq(&wqe->lock);
+ raw_spin_lock(&wqe->lock);
acct->nr_workers--;
- raw_spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock(&wqe->lock);
io_worker_ref_put(wq);
return;
}
@@ -664,15 +711,15 @@ fail:
set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
tsk->flags |= PF_NO_SETAFFINITY;
- raw_spin_lock_irq(&wqe->lock);
+ raw_spin_lock(&wqe->lock);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
list_add_tail_rcu(&worker->all_list, &wqe->all_list);
worker->flags |= IO_WORKER_F_FREE;
if (index == IO_WQ_ACCT_BOUND)
worker->flags |= IO_WORKER_F_BOUND;
- if ((acct->nr_workers == 1) && (worker->flags & IO_WORKER_F_BOUND))
+ if (first && (worker->flags & IO_WORKER_F_BOUND))
worker->flags |= IO_WORKER_F_FIXED;
- raw_spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock(&wqe->lock);
wake_up_new_task(tsk);
}
@@ -747,8 +794,7 @@ append:
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
{
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
- int work_flags;
- unsigned long flags;
+ bool do_wake;
/*
* If io-wq is exiting for this task, or if the request has explicitly
@@ -760,14 +806,14 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
return;
}
- work_flags = work->flags;
- raw_spin_lock_irqsave(&wqe->lock, flags);
+ raw_spin_lock(&wqe->lock);
io_wqe_insert_work(wqe, work);
wqe->flags &= ~IO_WQE_FLAG_STALLED;
- raw_spin_unlock_irqrestore(&wqe->lock, flags);
+ do_wake = (work->flags & IO_WQ_WORK_CONCURRENT) ||
+ !atomic_read(&acct->nr_running);
+ raw_spin_unlock(&wqe->lock);
- if ((work_flags & IO_WQ_WORK_CONCURRENT) ||
- !atomic_read(&acct->nr_running))
+ if (do_wake)
io_wqe_wake_worker(wqe, acct);
}
@@ -793,19 +839,18 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
{
struct io_cb_cancel_data *match = data;
- unsigned long flags;
/*
* Hold the lock to avoid ->cur_work going out of scope, caller
* may dereference the passed in work.
*/
- spin_lock_irqsave(&worker->lock, flags);
+ spin_lock(&worker->lock);
if (worker->cur_work &&
match->fn(worker->cur_work, match->data)) {
set_notify_signal(worker->task);
match->nr_running++;
}
- spin_unlock_irqrestore(&worker->lock, flags);
+ spin_unlock(&worker->lock);
return match->nr_running && !match->cancel_all;
}
@@ -833,16 +878,15 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
{
struct io_wq_work_node *node, *prev;
struct io_wq_work *work;
- unsigned long flags;
retry:
- raw_spin_lock_irqsave(&wqe->lock, flags);
+ raw_spin_lock(&wqe->lock);
wq_list_for_each(node, prev, &wqe->work_list) {
work = container_of(node, struct io_wq_work, list);
if (!match->fn(work, match->data))
continue;
io_wqe_remove_pending(wqe, work, prev);
- raw_spin_unlock_irqrestore(&wqe->lock, flags);
+ raw_spin_unlock(&wqe->lock);
io_run_cancel(work, wqe);
match->nr_pending++;
if (!match->cancel_all)
@@ -851,7 +895,7 @@ retry:
/* not safe to continue after unlock */
goto retry;
}
- raw_spin_unlock_irqrestore(&wqe->lock, flags);
+ raw_spin_unlock(&wqe->lock);
}
static void io_wqe_cancel_running_work(struct io_wqe *wqe,
@@ -992,12 +1036,12 @@ err_wq:
static bool io_task_work_match(struct callback_head *cb, void *data)
{
- struct create_worker_data *cwd;
+ struct io_worker *worker;
if (cb->func != create_worker_cb)
return false;
- cwd = container_of(cb, struct create_worker_data, work);
- return cwd->wqe->wq == data;
+ worker = container_of(cb, struct io_worker, create_work);
+ return worker->wqe->wq == data;
}
void io_wq_exit_start(struct io_wq *wq)
@@ -1014,12 +1058,13 @@ static void io_wq_exit_workers(struct io_wq *wq)
return;
while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
- struct create_worker_data *cwd;
+ struct io_worker *worker;
- cwd = container_of(cb, struct create_worker_data, work);
- atomic_dec(&cwd->wqe->acct[cwd->index].nr_running);
+ worker = container_of(cb, struct io_worker, create_work);
+ atomic_dec(&worker->wqe->acct[worker->create_index].nr_running);
io_worker_ref_put(wq);
- kfree(cwd);
+ clear_bit_unlock(0, &worker->create_state);
+ io_worker_release(worker);
}
rcu_read_lock();
@@ -1131,6 +1176,35 @@ int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask)
return 0;
}
+/*
+ * Set max number of unbounded workers, returns old value. If new_count is 0,
+ * then just return the old value.
+ */
+int io_wq_max_workers(struct io_wq *wq, int *new_count)
+{
+ int i, node, prev = 0;
+
+ for (i = 0; i < 2; i++) {
+ if (new_count[i] > task_rlimit(current, RLIMIT_NPROC))
+ new_count[i] = task_rlimit(current, RLIMIT_NPROC);
+ }
+
+ rcu_read_lock();
+ for_each_node(node) {
+ struct io_wqe_acct *acct;
+
+ for (i = 0; i < 2; i++) {
+ acct = &wq->wqes[node]->acct[i];
+ prev = max_t(int, acct->max_workers, prev);
+ if (new_count[i])
+ acct->max_workers = new_count[i];
+ new_count[i] = prev;
+ }
+ }
+ rcu_read_unlock();
+ return 0;
+}
+
static __init int io_wq_init(void)
{
int ret;
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 3999ee58ff26..bf5c4c533760 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -44,6 +44,7 @@ static inline void wq_list_add_after(struct io_wq_work_node *node,
static inline void wq_list_add_tail(struct io_wq_work_node *node,
struct io_wq_work_list *list)
{
+ node->next = NULL;
if (!list->first) {
list->last = node;
WRITE_ONCE(list->first, node);
@@ -51,7 +52,6 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node,
list->last->next = node;
list->last = node;
}
- node->next = NULL;
}
static inline void wq_list_cut(struct io_wq_work_list *list,
@@ -128,6 +128,7 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
void io_wq_hash_work(struct io_wq_work *work, void *val);
int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
+int io_wq_max_workers(struct io_wq *wq, int *new_count);
static inline bool io_wq_is_hashed(struct io_wq_work *work)
{
diff --git a/fs/io_uring.c b/fs/io_uring.c
index bf548af0426c..6f35b1285865 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -78,6 +78,7 @@
#include <linux/task_work.h>
#include <linux/pagemap.h>
#include <linux/io_uring.h>
+#include <linux/tracehook.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -91,17 +92,12 @@
#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES)
#define IORING_SQPOLL_CAP_ENTRIES_VALUE 8
-/*
- * Shift of 9 is 512 entries, or exactly one page on 64-bit archs
- */
-#define IORING_FILE_TABLE_SHIFT 9
-#define IORING_MAX_FILES_TABLE (1U << IORING_FILE_TABLE_SHIFT)
-#define IORING_FILE_TABLE_MASK (IORING_MAX_FILES_TABLE - 1)
-#define IORING_MAX_FIXED_FILES (64 * IORING_MAX_FILES_TABLE)
+/* only define max */
+#define IORING_MAX_FIXED_FILES (1U << 15)
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
IORING_REGISTER_LAST + IORING_OP_LAST)
-#define IO_RSRC_TAG_TABLE_SHIFT 9
+#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3)
#define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT)
#define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1)
@@ -234,8 +230,7 @@ struct io_rsrc_put {
};
struct io_file_table {
- /* two level table */
- struct io_fixed_file **files;
+ struct io_fixed_file *files;
};
struct io_rsrc_node {
@@ -300,18 +295,10 @@ struct io_sq_data {
struct completion exited;
};
-#define IO_IOPOLL_BATCH 8
#define IO_COMPL_BATCH 32
#define IO_REQ_CACHE_SIZE 32
#define IO_REQ_ALLOC_BATCH 8
-struct io_comp_state {
- struct io_kiocb *reqs[IO_COMPL_BATCH];
- unsigned int nr;
- /* inline/task_work completion list, under ->uring_lock */
- struct list_head free_list;
-};
-
struct io_submit_link {
struct io_kiocb *head;
struct io_kiocb *last;
@@ -332,14 +319,11 @@ struct io_submit_state {
/*
* Batch completion logic
*/
- struct io_comp_state comp;
+ struct io_kiocb *compl_reqs[IO_COMPL_BATCH];
+ unsigned int compl_nr;
+ /* inline/task_work completion list, under ->uring_lock */
+ struct list_head free_list;
- /*
- * File reference cache
- */
- struct file *file;
- unsigned int fd;
- unsigned int file_refs;
unsigned int ios_left;
};
@@ -391,6 +375,7 @@ struct io_ring_ctx {
struct io_submit_state submit_state;
struct list_head timeout_list;
+ struct list_head ltimeout_list;
struct list_head cq_overflow_list;
struct xarray io_buffers;
struct xarray personalities;
@@ -425,6 +410,8 @@ struct io_ring_ctx {
struct {
spinlock_t completion_lock;
+ spinlock_t timeout_lock;
+
/*
* ->iopoll_list is protected by the ctx->uring_lock for
* io_uring instances that don't use IORING_SETUP_SQPOLL.
@@ -486,8 +473,8 @@ struct io_uring_task {
spinlock_t task_lock;
struct io_wq_work_list task_list;
- unsigned long task_state;
struct callback_head task_work;
+ bool task_running;
};
/*
@@ -522,6 +509,7 @@ struct io_timeout_data {
struct hrtimer timer;
struct timespec64 ts;
enum hrtimer_mode mode;
+ u32 flags;
};
struct io_accept {
@@ -529,6 +517,7 @@ struct io_accept {
struct sockaddr __user *addr;
int __user *addr_len;
int flags;
+ u32 file_slot;
unsigned long nofile;
};
@@ -552,6 +541,8 @@ struct io_timeout {
struct list_head list;
/* head of the link, used by linked timeouts only */
struct io_kiocb *head;
+ /* for linked completions */
+ struct io_kiocb *prev;
};
struct io_timeout_rem {
@@ -561,6 +552,7 @@ struct io_timeout_rem {
/* timeout update */
struct timespec64 ts;
u32 flags;
+ bool ltimeout;
};
struct io_rw {
@@ -592,6 +584,7 @@ struct io_sr_msg {
struct io_open {
struct file *file;
int dfd;
+ u32 file_slot;
struct filename *filename;
struct open_how how;
unsigned long nofile;
@@ -674,9 +667,31 @@ struct io_unlink {
struct filename *filename;
};
+struct io_mkdir {
+ struct file *file;
+ int dfd;
+ umode_t mode;
+ struct filename *filename;
+};
+
+struct io_symlink {
+ struct file *file;
+ int new_dfd;
+ struct filename *oldpath;
+ struct filename *newpath;
+};
+
+struct io_hardlink {
+ struct file *file;
+ int old_dfd;
+ int new_dfd;
+ struct filename *oldpath;
+ struct filename *newpath;
+ int flags;
+};
+
struct io_completion {
struct file *file;
- struct list_head list;
u32 cflags;
};
@@ -718,14 +733,15 @@ enum {
REQ_F_NEED_CLEANUP_BIT,
REQ_F_POLLED_BIT,
REQ_F_BUFFER_SELECTED_BIT,
- REQ_F_LTIMEOUT_ACTIVE_BIT,
REQ_F_COMPLETE_INLINE_BIT,
REQ_F_REISSUE_BIT,
REQ_F_DONT_REISSUE_BIT,
REQ_F_CREDS_BIT,
+ REQ_F_REFCOUNT_BIT,
+ REQ_F_ARM_LTIMEOUT_BIT,
/* keep async read/write and isreg together and in order */
- REQ_F_ASYNC_READ_BIT,
- REQ_F_ASYNC_WRITE_BIT,
+ REQ_F_NOWAIT_READ_BIT,
+ REQ_F_NOWAIT_WRITE_BIT,
REQ_F_ISREG_BIT,
/* not a real bit, just to check we're not overflowing the space */
@@ -762,8 +778,6 @@ enum {
REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
/* buffer already selected */
REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT),
- /* linked timeout is active, i.e. prepared by link's head */
- REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
/* completion is deferred through io_comp_state */
REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT),
/* caller should reissue async */
@@ -771,13 +785,17 @@ enum {
/* don't attempt request reissue, see io_rw_reissue() */
REQ_F_DONT_REISSUE = BIT(REQ_F_DONT_REISSUE_BIT),
/* supports async reads */
- REQ_F_ASYNC_READ = BIT(REQ_F_ASYNC_READ_BIT),
+ REQ_F_NOWAIT_READ = BIT(REQ_F_NOWAIT_READ_BIT),
/* supports async writes */
- REQ_F_ASYNC_WRITE = BIT(REQ_F_ASYNC_WRITE_BIT),
+ REQ_F_NOWAIT_WRITE = BIT(REQ_F_NOWAIT_WRITE_BIT),
/* regular file */
REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
/* has creds assigned */
REQ_F_CREDS = BIT(REQ_F_CREDS_BIT),
+ /* skip refcounting if not set */
+ REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT),
+ /* there is a linked timeout that has to be armed */
+ REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT),
};
struct async_poll {
@@ -785,7 +803,7 @@ struct async_poll {
struct io_poll_iocb *double_poll;
};
-typedef void (*io_req_tw_func_t)(struct io_kiocb *req);
+typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked);
struct io_task_work {
union {
@@ -831,6 +849,9 @@ struct io_kiocb {
struct io_shutdown shutdown;
struct io_rename rename;
struct io_unlink unlink;
+ struct io_mkdir mkdir;
+ struct io_symlink symlink;
+ struct io_hardlink hardlink;
/* use only after cleaning per-op data, see io_clean_op() */
struct io_completion compl;
};
@@ -1042,39 +1063,43 @@ static const struct io_op_def io_op_defs[] = {
},
[IORING_OP_RENAMEAT] = {},
[IORING_OP_UNLINKAT] = {},
+ [IORING_OP_MKDIRAT] = {},
+ [IORING_OP_SYMLINKAT] = {},
+ [IORING_OP_LINKAT] = {},
};
+/* requests with any of those set should undergo io_disarm_next() */
+#define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL)
+
static bool io_disarm_next(struct io_kiocb *req);
static void io_uring_del_tctx_node(unsigned long index);
static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
struct task_struct *task,
bool cancel_all);
static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
-static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx);
static bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
long res, unsigned int cflags);
static void io_put_req(struct io_kiocb *req);
-static void io_put_req_deferred(struct io_kiocb *req, int nr);
+static void io_put_req_deferred(struct io_kiocb *req);
static void io_dismantle_req(struct io_kiocb *req);
-static void io_put_task(struct task_struct *task, int nr);
-static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
static void io_queue_linked_timeout(struct io_kiocb *req);
static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
struct io_uring_rsrc_update2 *up,
unsigned nr_args);
static void io_clean_op(struct io_kiocb *req);
-static struct file *io_file_get(struct io_submit_state *state,
+static struct file *io_file_get(struct io_ring_ctx *ctx,
struct io_kiocb *req, int fd, bool fixed);
static void __io_queue_sqe(struct io_kiocb *req);
static void io_rsrc_put_work(struct work_struct *work);
static void io_req_task_queue(struct io_kiocb *req);
static void io_submit_flush_completions(struct io_ring_ctx *ctx);
-static bool io_poll_remove_waitqs(struct io_kiocb *req);
static int io_req_prep_async(struct io_kiocb *req);
-static void io_fallback_req_func(struct work_struct *unused);
+static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
+ unsigned int issue_flags, u32 slot_index);
+static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
static struct kmem_cache *req_cachep;
@@ -1093,9 +1118,65 @@ struct sock *io_uring_get_socket(struct file *file)
}
EXPORT_SYMBOL(io_uring_get_socket);
+static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked)
+{
+ if (!*locked) {
+ mutex_lock(&ctx->uring_lock);
+ *locked = true;
+ }
+}
+
#define io_for_each_link(pos, head) \
for (pos = (head); pos; pos = pos->link)
+/*
+ * Shamelessly stolen from the mm implementation of page reference checking,
+ * see commit f958d7b528b1 for details.
+ */
+#define req_ref_zero_or_close_to_overflow(req) \
+ ((unsigned int) atomic_read(&(req->refs)) + 127u <= 127u)
+
+static inline bool req_ref_inc_not_zero(struct io_kiocb *req)
+{
+ WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
+ return atomic_inc_not_zero(&req->refs);
+}
+
+static inline bool req_ref_put_and_test(struct io_kiocb *req)
+{
+ if (likely(!(req->flags & REQ_F_REFCOUNT)))
+ return true;
+
+ WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
+ return atomic_dec_and_test(&req->refs);
+}
+
+static inline void req_ref_put(struct io_kiocb *req)
+{
+ WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
+ WARN_ON_ONCE(req_ref_put_and_test(req));
+}
+
+static inline void req_ref_get(struct io_kiocb *req)
+{
+ WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
+ WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
+ atomic_inc(&req->refs);
+}
+
+static inline void __io_req_set_refcount(struct io_kiocb *req, int nr)
+{
+ if (!(req->flags & REQ_F_REFCOUNT)) {
+ req->flags |= REQ_F_REFCOUNT;
+ atomic_set(&req->refs, nr);
+ }
+}
+
+static inline void io_req_set_refcount(struct io_kiocb *req)
+{
+ __io_req_set_refcount(req, 1);
+}
+
static inline void io_req_set_rsrc_node(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -1140,6 +1221,12 @@ static inline void req_set_fail(struct io_kiocb *req)
req->flags |= REQ_F_FAIL;
}
+static inline void req_fail_link_node(struct io_kiocb *req, int res)
+{
+ req_set_fail(req);
+ req->result = res;
+}
+
static void io_ring_ctx_ref_free(struct percpu_ref *ref)
{
struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
@@ -1152,6 +1239,27 @@ static inline bool io_is_timeout_noseq(struct io_kiocb *req)
return !req->timeout.off;
}
+static void io_fallback_req_func(struct work_struct *work)
+{
+ struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
+ fallback_work.work);
+ struct llist_node *node = llist_del_all(&ctx->fallback_llist);
+ struct io_kiocb *req, *tmp;
+ bool locked = false;
+
+ percpu_ref_get(&ctx->refs);
+ llist_for_each_entry_safe(req, tmp, node, io_task_work.fallback_node)
+ req->io_task_work.func(req, &locked);
+
+ if (locked) {
+ if (ctx->submit_state.compl_nr)
+ io_submit_flush_completions(ctx);
+ mutex_unlock(&ctx->uring_lock);
+ }
+ percpu_ref_put(&ctx->refs);
+
+}
+
static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
{
struct io_ring_ctx *ctx;
@@ -1197,15 +1305,17 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->cq_wait);
spin_lock_init(&ctx->completion_lock);
+ spin_lock_init(&ctx->timeout_lock);
INIT_LIST_HEAD(&ctx->iopoll_list);
INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list);
+ INIT_LIST_HEAD(&ctx->ltimeout_list);
spin_lock_init(&ctx->rsrc_ref_lock);
INIT_LIST_HEAD(&ctx->rsrc_ref_list);
INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work);
init_llist_head(&ctx->rsrc_put_llist);
INIT_LIST_HEAD(&ctx->tctx_list);
- INIT_LIST_HEAD(&ctx->submit_state.comp.free_list);
+ INIT_LIST_HEAD(&ctx->submit_state.free_list);
INIT_LIST_HEAD(&ctx->locked_free_list);
INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
return ctx;
@@ -1235,6 +1345,20 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
return false;
}
+#define FFS_ASYNC_READ 0x1UL
+#define FFS_ASYNC_WRITE 0x2UL
+#ifdef CONFIG_64BIT
+#define FFS_ISREG 0x4UL
+#else
+#define FFS_ISREG 0x0UL
+#endif
+#define FFS_MASK ~(FFS_ASYNC_READ|FFS_ASYNC_WRITE|FFS_ISREG)
+
+static inline bool io_req_ffs_set(struct io_kiocb *req)
+{
+ return IS_ENABLED(CONFIG_64BIT) && (req->flags & REQ_F_FIXED_FILE);
+}
+
static void io_req_track_inflight(struct io_kiocb *req)
{
if (!(req->flags & REQ_F_INFLIGHT)) {
@@ -1243,6 +1367,32 @@ static void io_req_track_inflight(struct io_kiocb *req)
}
}
+static inline void io_unprep_linked_timeout(struct io_kiocb *req)
+{
+ req->flags &= ~REQ_F_LINK_TIMEOUT;
+}
+
+static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req)
+{
+ if (WARN_ON_ONCE(!req->link))
+ return NULL;
+
+ req->flags &= ~REQ_F_ARM_LTIMEOUT;
+ req->flags |= REQ_F_LINK_TIMEOUT;
+
+ /* linked timeouts should have two refs once prep'ed */
+ io_req_set_refcount(req);
+ __io_req_set_refcount(req->link, 2);
+ return req->link;
+}
+
+static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
+{
+ if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT)))
+ return NULL;
+ return __io_prep_linked_timeout(req);
+}
+
static void io_prep_async_work(struct io_kiocb *req)
{
const struct io_op_def *def = &io_op_defs[req->opcode];
@@ -1282,22 +1432,25 @@ static void io_prep_async_link(struct io_kiocb *req)
if (req->flags & REQ_F_LINK_TIMEOUT) {
struct io_ring_ctx *ctx = req->ctx;
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
io_for_each_link(cur, req)
io_prep_async_work(cur);
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
} else {
io_for_each_link(cur, req)
io_prep_async_work(cur);
}
}
-static void io_queue_async_work(struct io_kiocb *req)
+static void io_queue_async_work(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *link = io_prep_linked_timeout(req);
struct io_uring_task *tctx = req->task->io_uring;
+ /* must not take the lock, NULL it as a precaution */
+ locked = NULL;
+
BUG_ON(!tctx);
BUG_ON(!tctx->io_wq);
@@ -1323,6 +1476,7 @@ static void io_queue_async_work(struct io_kiocb *req)
static void io_kill_timeout(struct io_kiocb *req, int status)
__must_hold(&req->ctx->completion_lock)
+ __must_hold(&req->ctx->timeout_lock)
{
struct io_timeout_data *io = req->async_data;
@@ -1331,7 +1485,7 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
atomic_read(&req->ctx->cq_timeouts) + 1);
list_del_init(&req->timeout.list);
io_cqring_fill_event(req->ctx, req->user_data, status, 0);
- io_put_req_deferred(req, 1);
+ io_put_req_deferred(req);
}
}
@@ -1350,9 +1504,11 @@ static void io_queue_deferred(struct io_ring_ctx *ctx)
}
static void io_flush_timeouts(struct io_ring_ctx *ctx)
+ __must_hold(&ctx->completion_lock)
{
u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+ spin_lock_irq(&ctx->timeout_lock);
while (!list_empty(&ctx->timeout_list)) {
u32 events_needed, events_got;
struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
@@ -1377,6 +1533,7 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
io_kill_timeout(req, 0);
}
ctx->cq_last_tm_flush = seq;
+ spin_unlock_irq(&ctx->timeout_lock);
}
static void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
@@ -1433,13 +1590,22 @@ static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
return !ctx->eventfd_async || io_wq_current_is_worker();
}
+/*
+ * This should only get called when at least one event has been posted.
+ * Some applications rely on the eventfd notification count only changing
+ * IFF a new CQE has been added to the CQ ring. There's no depedency on
+ * 1:1 relationship between how many times this function is called (and
+ * hence the eventfd count) and number of CQEs posted to the CQ ring.
+ */
static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
{
- /* see waitqueue_active() comment */
- smp_mb();
-
- if (waitqueue_active(&ctx->cq_wait))
- wake_up(&ctx->cq_wait);
+ /*
+ * wake_up_all() may seem excessive, but io_wake_function() and
+ * io_should_wake() handle the termination of the loop and only
+ * wake as many waiters as we need to.
+ */
+ if (wq_has_sleeper(&ctx->cq_wait))
+ wake_up_all(&ctx->cq_wait);
if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait))
wake_up(&ctx->sq_data->wait);
if (io_should_trigger_evfd(ctx))
@@ -1452,12 +1618,9 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
{
- /* see waitqueue_active() comment */
- smp_mb();
-
if (ctx->flags & IORING_SETUP_SQPOLL) {
- if (waitqueue_active(&ctx->cq_wait))
- wake_up(&ctx->cq_wait);
+ if (wq_has_sleeper(&ctx->cq_wait))
+ wake_up_all(&ctx->cq_wait);
}
if (io_should_trigger_evfd(ctx))
eventfd_signal(ctx->cq_ev_fd, 1);
@@ -1470,14 +1633,13 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
/* Returns true if there are no backlogged entries after the flush */
static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
{
- unsigned long flags;
bool all_flushed, posted;
if (!force && __io_cqring_events(ctx) == ctx->cq_entries)
return false;
posted = false;
- spin_lock_irqsave(&ctx->completion_lock, flags);
+ spin_lock(&ctx->completion_lock);
while (!list_empty(&ctx->cq_overflow_list)) {
struct io_uring_cqe *cqe = io_get_cqe(ctx);
struct io_overflow_cqe *ocqe;
@@ -1499,18 +1661,19 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
all_flushed = list_empty(&ctx->cq_overflow_list);
if (all_flushed) {
clear_bit(0, &ctx->check_cq_overflow);
- ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
+ WRITE_ONCE(ctx->rings->sq_flags,
+ ctx->rings->sq_flags & ~IORING_SQ_CQ_OVERFLOW);
}
if (posted)
io_commit_cqring(ctx);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ spin_unlock(&ctx->completion_lock);
if (posted)
io_cqring_ev_posted(ctx);
return all_flushed;
}
-static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
+static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx)
{
bool ret = true;
@@ -1518,7 +1681,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
/* iopoll syncs against uring_lock, not completion_lock */
if (ctx->flags & IORING_SETUP_IOPOLL)
mutex_lock(&ctx->uring_lock);
- ret = __io_cqring_overflow_flush(ctx, force);
+ ret = __io_cqring_overflow_flush(ctx, false);
if (ctx->flags & IORING_SETUP_IOPOLL)
mutex_unlock(&ctx->uring_lock);
}
@@ -1526,39 +1689,37 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
return ret;
}
-/*
- * Shamelessly stolen from the mm implementation of page reference checking,
- * see commit f958d7b528b1 for details.
- */
-#define req_ref_zero_or_close_to_overflow(req) \
- ((unsigned int) atomic_read(&(req->refs)) + 127u <= 127u)
-
-static inline bool req_ref_inc_not_zero(struct io_kiocb *req)
+/* must to be called somewhat shortly after putting a request */
+static inline void io_put_task(struct task_struct *task, int nr)
{
- return atomic_inc_not_zero(&req->refs);
-}
+ struct io_uring_task *tctx = task->io_uring;
-static inline bool req_ref_sub_and_test(struct io_kiocb *req, int refs)
-{
- WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
- return atomic_sub_and_test(refs, &req->refs);
+ if (likely(task == current)) {
+ tctx->cached_refs += nr;
+ } else {
+ percpu_counter_sub(&tctx->inflight, nr);
+ if (unlikely(atomic_read(&tctx->in_idle)))
+ wake_up(&tctx->wait);
+ put_task_struct_many(task, nr);
+ }
}
-static inline bool req_ref_put_and_test(struct io_kiocb *req)
+static void io_task_refs_refill(struct io_uring_task *tctx)
{
- WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
- return atomic_dec_and_test(&req->refs);
-}
+ unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR;
-static inline void req_ref_put(struct io_kiocb *req)
-{
- WARN_ON_ONCE(req_ref_put_and_test(req));
+ percpu_counter_add(&tctx->inflight, refill);
+ refcount_add(refill, &current->usage);
+ tctx->cached_refs += refill;
}
-static inline void req_ref_get(struct io_kiocb *req)
+static inline void io_get_task_refs(int nr)
{
- WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
- atomic_inc(&req->refs);
+ struct io_uring_task *tctx = current->io_uring;
+
+ tctx->cached_refs -= nr;
+ if (unlikely(tctx->cached_refs < 0))
+ io_task_refs_refill(tctx);
}
static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
@@ -1578,7 +1739,9 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
}
if (list_empty(&ctx->cq_overflow_list)) {
set_bit(0, &ctx->check_cq_overflow);
- ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
+ WRITE_ONCE(ctx->rings->sq_flags,
+ ctx->rings->sq_flags | IORING_SQ_CQ_OVERFLOW);
+
}
ocqe->cqe.user_data = user_data;
ocqe->cqe.res = res;
@@ -1620,9 +1783,8 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
unsigned int cflags)
{
struct io_ring_ctx *ctx = req->ctx;
- unsigned long flags;
- spin_lock_irqsave(&ctx->completion_lock, flags);
+ spin_lock(&ctx->completion_lock);
__io_cqring_fill_event(ctx, req->user_data, res, cflags);
/*
* If we're the last reference to this request, add to our locked
@@ -1630,7 +1792,7 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
*/
if (req_ref_put_and_test(req)) {
if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
- if (req->flags & (REQ_F_LINK_TIMEOUT | REQ_F_FAIL))
+ if (req->flags & IO_DISARM_MASK)
io_disarm_next(req);
if (req->link) {
io_req_task_queue(req->link);
@@ -1639,14 +1801,14 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
}
io_dismantle_req(req);
io_put_task(req->task, 1);
- list_add(&req->compl.list, &ctx->locked_free_list);
+ list_add(&req->inflight_entry, &ctx->locked_free_list);
ctx->locked_free_nr++;
} else {
if (!percpu_ref_tryget(&ctx->refs))
req = NULL;
}
io_commit_cqring(ctx);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ spin_unlock(&ctx->completion_lock);
if (req) {
io_cqring_ev_posted(ctx);
@@ -1686,24 +1848,35 @@ static inline void io_req_complete(struct io_kiocb *req, long res)
static void io_req_complete_failed(struct io_kiocb *req, long res)
{
req_set_fail(req);
- io_put_req(req);
io_req_complete_post(req, res, 0);
}
+/*
+ * Don't initialise the fields below on every allocation, but do that in
+ * advance and keep them valid across allocations.
+ */
+static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx)
+{
+ req->ctx = ctx;
+ req->link = NULL;
+ req->async_data = NULL;
+ /* not necessary, but safer to zero */
+ req->result = 0;
+}
+
static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx,
- struct io_comp_state *cs)
+ struct io_submit_state *state)
{
- spin_lock_irq(&ctx->completion_lock);
- list_splice_init(&ctx->locked_free_list, &cs->free_list);
+ spin_lock(&ctx->completion_lock);
+ list_splice_init(&ctx->locked_free_list, &state->free_list);
ctx->locked_free_nr = 0;
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
}
/* Returns true IFF there are requests in the cache */
static bool io_flush_cached_reqs(struct io_ring_ctx *ctx)
{
struct io_submit_state *state = &ctx->submit_state;
- struct io_comp_state *cs = &state->comp;
int nr;
/*
@@ -1712,14 +1885,14 @@ static bool io_flush_cached_reqs(struct io_ring_ctx *ctx)
* side cache.
*/
if (READ_ONCE(ctx->locked_free_nr) > IO_COMPL_BATCH)
- io_flush_cached_locked_reqs(ctx, cs);
+ io_flush_cached_locked_reqs(ctx, state);
nr = state->free_reqs;
- while (!list_empty(&cs->free_list)) {
- struct io_kiocb *req = list_first_entry(&cs->free_list,
- struct io_kiocb, compl.list);
+ while (!list_empty(&state->free_list)) {
+ struct io_kiocb *req = list_first_entry(&state->free_list,
+ struct io_kiocb, inflight_entry);
- list_del(&req->compl.list);
+ list_del(&req->inflight_entry);
state->reqs[nr++] = req;
if (nr == ARRAY_SIZE(state->reqs))
break;
@@ -1729,48 +1902,41 @@ static bool io_flush_cached_reqs(struct io_ring_ctx *ctx)
return nr != 0;
}
+/*
+ * A request might get retired back into the request caches even before opcode
+ * handlers and io_issue_sqe() are done with it, e.g. inline completion path.
+ * Because of that, io_alloc_req() should be called only under ->uring_lock
+ * and with extra caution to not get a request that is still worked on.
+ */
static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx)
+ __must_hold(&ctx->uring_lock)
{
struct io_submit_state *state = &ctx->submit_state;
+ gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
+ int ret, i;
BUILD_BUG_ON(ARRAY_SIZE(state->reqs) < IO_REQ_ALLOC_BATCH);
- if (!state->free_reqs) {
- gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
- int ret, i;
-
- if (io_flush_cached_reqs(ctx))
- goto got_req;
-
- ret = kmem_cache_alloc_bulk(req_cachep, gfp, IO_REQ_ALLOC_BATCH,
- state->reqs);
+ if (likely(state->free_reqs || io_flush_cached_reqs(ctx)))
+ goto got_req;
- /*
- * Bulk alloc is all-or-nothing. If we fail to get a batch,
- * retry single alloc to be on the safe side.
- */
- if (unlikely(ret <= 0)) {
- state->reqs[0] = kmem_cache_alloc(req_cachep, gfp);
- if (!state->reqs[0])
- return NULL;
- ret = 1;
- }
+ ret = kmem_cache_alloc_bulk(req_cachep, gfp, IO_REQ_ALLOC_BATCH,
+ state->reqs);
- /*
- * Don't initialise the fields below on every allocation, but
- * do that in advance and keep valid on free.
- */
- for (i = 0; i < ret; i++) {
- struct io_kiocb *req = state->reqs[i];
-
- req->ctx = ctx;
- req->link = NULL;
- req->async_data = NULL;
- /* not necessary, but safer to zero */
- req->result = 0;
- }
- state->free_reqs = ret;
+ /*
+ * Bulk alloc is all-or-nothing. If we fail to get a batch,
+ * retry single alloc to be on the safe side.
+ */
+ if (unlikely(ret <= 0)) {
+ state->reqs[0] = kmem_cache_alloc(req_cachep, gfp);
+ if (!state->reqs[0])
+ return NULL;
+ ret = 1;
}
+
+ for (i = 0; i < ret; i++)
+ io_preinit_req(state->reqs[i], ctx);
+ state->free_reqs = ret;
got_req:
state->free_reqs--;
return state->reqs[state->free_reqs];
@@ -1798,17 +1964,6 @@ static void io_dismantle_req(struct io_kiocb *req)
}
}
-/* must to be called somewhat shortly after putting a request */
-static inline void io_put_task(struct task_struct *task, int nr)
-{
- struct io_uring_task *tctx = task->io_uring;
-
- percpu_counter_sub(&tctx->inflight, nr);
- if (unlikely(atomic_read(&tctx->in_idle)))
- wake_up(&tctx->wait);
- put_task_struct_many(task, nr);
-}
-
static void __io_free_req(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -1816,7 +1971,11 @@ static void __io_free_req(struct io_kiocb *req)
io_dismantle_req(req);
io_put_task(req->task, 1);
- kmem_cache_free(req_cachep, req);
+ spin_lock(&ctx->completion_lock);
+ list_add(&req->inflight_entry, &ctx->locked_free_list);
+ ctx->locked_free_nr++;
+ spin_unlock(&ctx->completion_lock);
+
percpu_ref_put(&ctx->refs);
}
@@ -1830,22 +1989,20 @@ static inline void io_remove_next_linked(struct io_kiocb *req)
static bool io_kill_linked_timeout(struct io_kiocb *req)
__must_hold(&req->ctx->completion_lock)
+ __must_hold(&req->ctx->timeout_lock)
{
struct io_kiocb *link = req->link;
- /*
- * Can happen if a linked timeout fired and link had been like
- * req -> link t-out -> link t-out [-> ...]
- */
- if (link && (link->flags & REQ_F_LTIMEOUT_ACTIVE)) {
+ if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
struct io_timeout_data *io = link->async_data;
io_remove_next_linked(req);
link->timeout.head = NULL;
if (hrtimer_try_to_cancel(&io->timer) != -1) {
+ list_del(&link->timeout.list);
io_cqring_fill_event(link->ctx, link->user_data,
-ECANCELED, 0);
- io_put_req_deferred(link, 1);
+ io_put_req_deferred(link);
return true;
}
}
@@ -1859,12 +2016,17 @@ static void io_fail_links(struct io_kiocb *req)
req->link = NULL;
while (link) {
+ long res = -ECANCELED;
+
+ if (link->flags & REQ_F_FAIL)
+ res = link->result;
+
nxt = link->link;
link->link = NULL;
trace_io_uring_fail_link(req, link);
- io_cqring_fill_event(link->ctx, link->user_data, -ECANCELED, 0);
- io_put_req_deferred(link, 2);
+ io_cqring_fill_event(link->ctx, link->user_data, res, 0);
+ io_put_req_deferred(link);
link = nxt;
}
}
@@ -1874,8 +2036,24 @@ static bool io_disarm_next(struct io_kiocb *req)
{
bool posted = false;
- if (likely(req->flags & REQ_F_LINK_TIMEOUT))
+ if (req->flags & REQ_F_ARM_LTIMEOUT) {
+ struct io_kiocb *link = req->link;
+
+ req->flags &= ~REQ_F_ARM_LTIMEOUT;
+ if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
+ io_remove_next_linked(req);
+ io_cqring_fill_event(link->ctx, link->user_data,
+ -ECANCELED, 0);
+ io_put_req_deferred(link);
+ posted = true;
+ }
+ } else if (req->flags & REQ_F_LINK_TIMEOUT) {
+ struct io_ring_ctx *ctx = req->ctx;
+
+ spin_lock_irq(&ctx->timeout_lock);
posted = io_kill_linked_timeout(req);
+ spin_unlock_irq(&ctx->timeout_lock);
+ }
if (unlikely((req->flags & REQ_F_FAIL) &&
!(req->flags & REQ_F_HARDLINK))) {
posted |= (req->link != NULL);
@@ -1894,16 +2072,15 @@ static struct io_kiocb *__io_req_find_next(struct io_kiocb *req)
* dependencies to the next request. In case of failure, fail the rest
* of the chain.
*/
- if (req->flags & (REQ_F_LINK_TIMEOUT | REQ_F_FAIL)) {
+ if (req->flags & IO_DISARM_MASK) {
struct io_ring_ctx *ctx = req->ctx;
- unsigned long flags;
bool posted;
- spin_lock_irqsave(&ctx->completion_lock, flags);
+ spin_lock(&ctx->completion_lock);
posted = io_disarm_next(req);
if (posted)
io_commit_cqring(req->ctx);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ spin_unlock(&ctx->completion_lock);
if (posted)
io_cqring_ev_posted(ctx);
}
@@ -1919,20 +2096,22 @@ static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
return __io_req_find_next(req);
}
-static void ctx_flush_and_put(struct io_ring_ctx *ctx)
+static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
{
if (!ctx)
return;
- if (ctx->submit_state.comp.nr) {
- mutex_lock(&ctx->uring_lock);
- io_submit_flush_completions(ctx);
+ if (*locked) {
+ if (ctx->submit_state.compl_nr)
+ io_submit_flush_completions(ctx);
mutex_unlock(&ctx->uring_lock);
+ *locked = false;
}
percpu_ref_put(&ctx->refs);
}
static void tctx_task_work(struct callback_head *cb)
{
+ bool locked = false;
struct io_ring_ctx *ctx = NULL;
struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
task_work);
@@ -1943,37 +2122,32 @@ static void tctx_task_work(struct callback_head *cb)
spin_lock_irq(&tctx->task_lock);
node = tctx->task_list.first;
INIT_WQ_LIST(&tctx->task_list);
+ if (!node)
+ tctx->task_running = false;
spin_unlock_irq(&tctx->task_lock);
+ if (!node)
+ break;
- while (node) {
+ do {
struct io_wq_work_node *next = node->next;
struct io_kiocb *req = container_of(node, struct io_kiocb,
io_task_work.node);
if (req->ctx != ctx) {
- ctx_flush_and_put(ctx);
+ ctx_flush_and_put(ctx, &locked);
ctx = req->ctx;
+ /* if not contended, grab and improve batching */
+ locked = mutex_trylock(&ctx->uring_lock);
percpu_ref_get(&ctx->refs);
}
- req->io_task_work.func(req);
+ req->io_task_work.func(req, &locked);
node = next;
- }
- if (wq_list_empty(&tctx->task_list)) {
- spin_lock_irq(&tctx->task_lock);
- clear_bit(0, &tctx->task_state);
- if (wq_list_empty(&tctx->task_list)) {
- spin_unlock_irq(&tctx->task_lock);
- break;
- }
- spin_unlock_irq(&tctx->task_lock);
- /* another tctx_task_work() is enqueued, yield */
- if (test_and_set_bit(0, &tctx->task_state))
- break;
- }
+ } while (node);
+
cond_resched();
}
- ctx_flush_and_put(ctx);
+ ctx_flush_and_put(ctx, &locked);
}
static void io_req_task_work_add(struct io_kiocb *req)
@@ -1983,19 +2157,20 @@ static void io_req_task_work_add(struct io_kiocb *req)
enum task_work_notify_mode notify;
struct io_wq_work_node *node;
unsigned long flags;
+ bool running;
WARN_ON_ONCE(!tctx);
spin_lock_irqsave(&tctx->task_lock, flags);
wq_list_add_tail(&req->io_task_work.node, &tctx->task_list);
+ running = tctx->task_running;
+ if (!running)
+ tctx->task_running = true;
spin_unlock_irqrestore(&tctx->task_lock, flags);
/* task_work already pending, we're done */
- if (test_bit(0, &tctx->task_state) ||
- test_and_set_bit(0, &tctx->task_state))
+ if (running)
return;
- if (unlikely(tsk->flags & PF_EXITING))
- goto fail;
/*
* SQPOLL kernel thread doesn't need notification, just a wakeup. For
@@ -2008,9 +2183,9 @@ static void io_req_task_work_add(struct io_kiocb *req)
wake_up_process(tsk);
return;
}
-fail:
- clear_bit(0, &tctx->task_state);
+
spin_lock_irqsave(&tctx->task_lock, flags);
+ tctx->task_running = false;
node = tctx->task_list.first;
INIT_WQ_LIST(&tctx->task_list);
spin_unlock_irqrestore(&tctx->task_lock, flags);
@@ -2024,27 +2199,25 @@ fail:
}
}
-static void io_req_task_cancel(struct io_kiocb *req)
+static void io_req_task_cancel(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
- /* ctx is guaranteed to stay alive while we hold uring_lock */
- mutex_lock(&ctx->uring_lock);
+ /* not needed for normal modes, but SQPOLL depends on it */
+ io_tw_lock(ctx, locked);
io_req_complete_failed(req, req->result);
- mutex_unlock(&ctx->uring_lock);
}
-static void io_req_task_submit(struct io_kiocb *req)
+static void io_req_task_submit(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
- /* ctx stays valid until unlock, even if we drop all ours ctx->refs */
- mutex_lock(&ctx->uring_lock);
- if (!(req->task->flags & PF_EXITING) && !req->task->in_execve)
+ io_tw_lock(ctx, locked);
+ /* req->task == current here, checking PF_EXITING is safe */
+ if (likely(!(req->task->flags & PF_EXITING)))
__io_queue_sqe(req);
else
io_req_complete_failed(req, -EFAULT);
- mutex_unlock(&ctx->uring_lock);
}
static void io_req_task_queue_fail(struct io_kiocb *req, int ret)
@@ -2080,6 +2253,11 @@ static void io_free_req(struct io_kiocb *req)
__io_free_req(req);
}
+static void io_free_req_work(struct io_kiocb *req, bool *locked)
+{
+ io_free_req(req);
+}
+
struct req_batch {
struct task_struct *task;
int task_refs;
@@ -2096,10 +2274,10 @@ static inline void io_init_req_batch(struct req_batch *rb)
static void io_req_free_batch_finish(struct io_ring_ctx *ctx,
struct req_batch *rb)
{
- if (rb->task)
- io_put_task(rb->task, rb->task_refs);
if (rb->ctx_refs)
percpu_ref_put_many(&ctx->refs, rb->ctx_refs);
+ if (rb->task)
+ io_put_task(rb->task, rb->task_refs);
}
static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req,
@@ -2120,37 +2298,37 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req,
if (state->free_reqs != ARRAY_SIZE(state->reqs))
state->reqs[state->free_reqs++] = req;
else
- list_add(&req->compl.list, &state->comp.free_list);
+ list_add(&req->inflight_entry, &state->free_list);
}
static void io_submit_flush_completions(struct io_ring_ctx *ctx)
+ __must_hold(&ctx->uring_lock)
{
- struct io_comp_state *cs = &ctx->submit_state.comp;
- int i, nr = cs->nr;
+ struct io_submit_state *state = &ctx->submit_state;
+ int i, nr = state->compl_nr;
struct req_batch rb;
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
for (i = 0; i < nr; i++) {
- struct io_kiocb *req = cs->reqs[i];
+ struct io_kiocb *req = state->compl_reqs[i];
__io_cqring_fill_event(ctx, req->user_data, req->result,
req->compl.cflags);
}
io_commit_cqring(ctx);
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
io_init_req_batch(&rb);
for (i = 0; i < nr; i++) {
- struct io_kiocb *req = cs->reqs[i];
+ struct io_kiocb *req = state->compl_reqs[i];
- /* submission and completion refs */
- if (req_ref_sub_and_test(req, 2))
+ if (req_ref_put_and_test(req))
io_req_free_batch(&rb, req, &ctx->submit_state);
}
io_req_free_batch_finish(ctx, &rb);
- cs->nr = 0;
+ state->compl_nr = 0;
}
/*
@@ -2174,16 +2352,12 @@ static inline void io_put_req(struct io_kiocb *req)
io_free_req(req);
}
-static void io_free_req_deferred(struct io_kiocb *req)
+static inline void io_put_req_deferred(struct io_kiocb *req)
{
- req->io_task_work.func = io_free_req;
- io_req_task_work_add(req);
-}
-
-static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
-{
- if (req_ref_sub_and_test(req, refs))
- io_free_req_deferred(req);
+ if (req_ref_put_and_test(req)) {
+ req->io_task_work.func = io_free_req_work;
+ io_req_task_work_add(req);
+ }
}
static unsigned io_cqring_events(struct io_ring_ctx *ctx)
@@ -2216,15 +2390,17 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
{
struct io_buffer *kbuf;
+ if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+ return 0;
kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
return io_put_kbuf(req, kbuf);
}
static inline bool io_run_task_work(void)
{
- if (current->task_works) {
+ if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) {
__set_current_state(TASK_RUNNING);
- task_work_run();
+ tracehook_notify_signal();
return true;
}
@@ -2235,7 +2411,7 @@ static inline bool io_run_task_work(void)
* Find and free completed poll iocbs
*/
static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
- struct list_head *done, bool resubmit)
+ struct list_head *done)
{
struct req_batch rb;
struct io_kiocb *req;
@@ -2245,23 +2421,18 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
io_init_req_batch(&rb);
while (!list_empty(done)) {
- int cflags = 0;
-
req = list_first_entry(done, struct io_kiocb, inflight_entry);
list_del(&req->inflight_entry);
- if (READ_ONCE(req->result) == -EAGAIN && resubmit &&
+ if (READ_ONCE(req->result) == -EAGAIN &&
!(req->flags & REQ_F_DONT_REISSUE)) {
req->iopoll_completed = 0;
- req_ref_get(req);
io_req_task_queue_reissue(req);
continue;
}
- if (req->flags & REQ_F_BUFFER_SELECTED)
- cflags = io_put_rw_kbuf(req);
-
- __io_cqring_fill_event(ctx, req->user_data, req->result, cflags);
+ __io_cqring_fill_event(ctx, req->user_data, req->result,
+ io_put_rw_kbuf(req));
(*nr_events)++;
if (req_ref_put_and_test(req))
@@ -2274,12 +2445,11 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
}
static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
- long min, bool resubmit)
+ long min)
{
struct io_kiocb *req, *tmp;
LIST_HEAD(done);
bool spin;
- int ret;
/*
* Only spin for completions if we don't have multiple devices hanging
@@ -2287,9 +2457,9 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
*/
spin = !ctx->poll_multi_queue && *nr_events < min;
- ret = 0;
list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) {
struct kiocb *kiocb = &req->rw.kiocb;
+ int ret;
/*
* Move completed and retryable entries to our local lists.
@@ -2304,22 +2474,20 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
break;
ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin);
- if (ret < 0)
- break;
+ if (unlikely(ret < 0))
+ return ret;
+ else if (ret)
+ spin = false;
/* iopoll may have completed current req */
if (READ_ONCE(req->iopoll_completed))
list_move_tail(&req->inflight_entry, &done);
-
- if (ret && spin)
- spin = false;
- ret = 0;
}
if (!list_empty(&done))
- io_iopoll_complete(ctx, nr_events, &done, resubmit);
+ io_iopoll_complete(ctx, nr_events, &done);
- return ret;
+ return 0;
}
/*
@@ -2335,7 +2503,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
while (!list_empty(&ctx->iopoll_list)) {
unsigned int nr_events = 0;
- io_do_iopoll(ctx, &nr_events, 0, false);
+ io_do_iopoll(ctx, &nr_events, 0);
/* let it sleep and repeat later if can't complete a request */
if (nr_events == 0)
@@ -2397,7 +2565,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
list_empty(&ctx->iopoll_list))
break;
}
- ret = io_do_iopoll(ctx, &nr_events, min, true);
+ ret = io_do_iopoll(ctx, &nr_events, min);
} while (!ret && nr_events < min && !need_resched());
out:
mutex_unlock(&ctx->uring_lock);
@@ -2466,42 +2634,57 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
}
#endif
-static void io_fallback_req_func(struct work_struct *work)
+static bool __io_complete_rw_common(struct io_kiocb *req, long res)
{
- struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
- fallback_work.work);
- struct llist_node *node = llist_del_all(&ctx->fallback_llist);
- struct io_kiocb *req, *tmp;
-
- llist_for_each_entry_safe(req, tmp, node, io_task_work.fallback_node)
- req->io_task_work.func(req);
-}
-
-static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
- unsigned int issue_flags)
-{
- int cflags = 0;
-
if (req->rw.kiocb.ki_flags & IOCB_WRITE)
kiocb_end_write(req);
if (res != req->result) {
if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
io_rw_should_reissue(req)) {
req->flags |= REQ_F_REISSUE;
- return;
+ return true;
}
req_set_fail(req);
+ req->result = res;
+ }
+ return false;
+}
+
+static void io_req_task_complete(struct io_kiocb *req, bool *locked)
+{
+ unsigned int cflags = io_put_rw_kbuf(req);
+ long res = req->result;
+
+ if (*locked) {
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_submit_state *state = &ctx->submit_state;
+
+ io_req_complete_state(req, res, cflags);
+ state->compl_reqs[state->compl_nr++] = req;
+ if (state->compl_nr == ARRAY_SIZE(state->compl_reqs))
+ io_submit_flush_completions(ctx);
+ } else {
+ io_req_complete_post(req, res, cflags);
}
- if (req->flags & REQ_F_BUFFER_SELECTED)
- cflags = io_put_rw_kbuf(req);
- __io_req_complete(req, issue_flags, res, cflags);
+}
+
+static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
+ unsigned int issue_flags)
+{
+ if (__io_complete_rw_common(req, res))
+ return;
+ __io_req_complete(req, 0, req->result, io_put_rw_kbuf(req));
}
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
- __io_complete_rw(req, res, res2, 0);
+ if (__io_complete_rw_common(req, res))
+ return;
+ req->result = res;
+ req->io_task_work.func = io_req_task_complete;
+ io_req_task_work_add(req);
}
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
@@ -2587,40 +2770,6 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
}
}
-static inline void io_state_file_put(struct io_submit_state *state)
-{
- if (state->file_refs) {
- fput_many(state->file, state->file_refs);
- state->file_refs = 0;
- }
-}
-
-/*
- * Get as many references to a file as we have IOs left in this submission,
- * assuming most submissions are for one file, or at least that each file
- * has more than one submission.
- */
-static struct file *__io_file_get(struct io_submit_state *state, int fd)
-{
- if (!state)
- return fget(fd);
-
- if (state->file_refs) {
- if (state->fd == fd) {
- state->file_refs--;
- return state->file;
- }
- io_state_file_put(state);
- }
- state->file = fget_many(fd, state->ios_left);
- if (unlikely(!state->file))
- return NULL;
-
- state->fd = fd;
- state->file_refs = state->ios_left - 1;
- return state->file;
-}
-
static bool io_bdev_nowait(struct block_device *bdev)
{
return !bdev || blk_queue_nowait(bdev_get_queue(bdev));
@@ -2631,7 +2780,7 @@ static bool io_bdev_nowait(struct block_device *bdev)
* any file. For now, just ensure that anything potentially problematic is done
* inline.
*/
-static bool __io_file_supports_async(struct file *file, int rw)
+static bool __io_file_supports_nowait(struct file *file, int rw)
{
umode_t mode = file_inode(file)->i_mode;
@@ -2664,14 +2813,14 @@ static bool __io_file_supports_async(struct file *file, int rw)
return file->f_op->write_iter != NULL;
}
-static bool io_file_supports_async(struct io_kiocb *req, int rw)
+static bool io_file_supports_nowait(struct io_kiocb *req, int rw)
{
- if (rw == READ && (req->flags & REQ_F_ASYNC_READ))
+ if (rw == READ && (req->flags & REQ_F_NOWAIT_READ))
return true;
- else if (rw == WRITE && (req->flags & REQ_F_ASYNC_WRITE))
+ else if (rw == WRITE && (req->flags & REQ_F_NOWAIT_WRITE))
return true;
- return __io_file_supports_async(req->file, rw);
+ return __io_file_supports_nowait(req->file, rw);
}
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -2682,7 +2831,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
unsigned ioprio;
int ret;
- if (!(req->flags & REQ_F_ISREG) && S_ISREG(file_inode(file)->i_mode))
+ if (!io_req_ffs_set(req) && S_ISREG(file_inode(file)->i_mode))
req->flags |= REQ_F_ISREG;
kiocb->ki_pos = READ_ONCE(sqe->off);
@@ -2715,7 +2864,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
!kiocb->ki_filp->f_op->iopoll)
return -EOPNOTSUPP;
- kiocb->ki_flags |= IOCB_HIPRI;
+ kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE;
kiocb->ki_complete = io_complete_rw_iopoll;
req->iopoll_completed = 0;
} else {
@@ -2782,15 +2931,11 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
if (check_reissue && (req->flags & REQ_F_REISSUE)) {
req->flags &= ~REQ_F_REISSUE;
if (io_resubmit_prep(req)) {
- req_ref_get(req);
io_req_task_queue_reissue(req);
} else {
- int cflags = 0;
-
req_set_fail(req);
- if (req->flags & REQ_F_BUFFER_SELECTED)
- cflags = io_put_rw_kbuf(req);
- __io_req_complete(req, issue_flags, ret, cflags);
+ __io_req_complete(req, issue_flags, ret,
+ io_put_rw_kbuf(req));
}
}
}
@@ -3208,9 +3353,6 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
req->rw.kiocb.ki_flags &= ~IOCB_WAITQ;
list_del_init(&wait->entry);
-
- /* submit ref gets dropped, acquire a new one */
- req_ref_get(req);
io_req_task_queue(req);
return 1;
}
@@ -3295,7 +3437,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
kiocb->ki_flags |= IOCB_NOWAIT;
/* If the file doesn't support async, just async punt */
- if (force_nonblock && !io_file_supports_async(req, READ)) {
+ if (force_nonblock && !io_file_supports_nowait(req, READ)) {
ret = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
return ret ?: -EAGAIN;
}
@@ -3400,7 +3542,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
kiocb->ki_flags |= IOCB_NOWAIT;
/* If the file doesn't support async, just async punt */
- if (force_nonblock && !io_file_supports_async(req, WRITE))
+ if (force_nonblock && !io_file_supports_nowait(req, WRITE))
goto copy_iov;
/* file path doesn't support NOWAIT for non-direct_IO */
@@ -3475,7 +3617,7 @@ static int io_renameat_prep(struct io_kiocb *req,
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->buf_index)
+ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -3526,7 +3668,8 @@ static int io_unlinkat_prep(struct io_kiocb *req,
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
+ if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
+ sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -3566,14 +3709,157 @@ static int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
+static int io_mkdirat_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_mkdir *mkd = &req->mkdir;
+ const char __user *fname;
+
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->off || sqe->rw_flags || sqe->buf_index ||
+ sqe->splice_fd_in)
+ return -EINVAL;
+ if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ return -EBADF;
+
+ mkd->dfd = READ_ONCE(sqe->fd);
+ mkd->mode = READ_ONCE(sqe->len);
+
+ fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ mkd->filename = getname(fname);
+ if (IS_ERR(mkd->filename))
+ return PTR_ERR(mkd->filename);
+
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return 0;
+}
+
+static int io_mkdirat(struct io_kiocb *req, int issue_flags)
+{
+ struct io_mkdir *mkd = &req->mkdir;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+ ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode);
+
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_complete(req, ret);
+ return 0;
+}
+
+static int io_symlinkat_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_symlink *sl = &req->symlink;
+ const char __user *oldpath, *newpath;
+
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->len || sqe->rw_flags || sqe->buf_index ||
+ sqe->splice_fd_in)
+ return -EINVAL;
+ if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ return -EBADF;
+
+ sl->new_dfd = READ_ONCE(sqe->fd);
+ oldpath = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ newpath = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+
+ sl->oldpath = getname(oldpath);
+ if (IS_ERR(sl->oldpath))
+ return PTR_ERR(sl->oldpath);
+
+ sl->newpath = getname(newpath);
+ if (IS_ERR(sl->newpath)) {
+ putname(sl->oldpath);
+ return PTR_ERR(sl->newpath);
+ }
+
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return 0;
+}
+
+static int io_symlinkat(struct io_kiocb *req, int issue_flags)
+{
+ struct io_symlink *sl = &req->symlink;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+ ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath);
+
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_complete(req, ret);
+ return 0;
+}
+
+static int io_linkat_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_hardlink *lnk = &req->hardlink;
+ const char __user *oldf, *newf;
+
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+ return -EINVAL;
+ if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ return -EBADF;
+
+ lnk->old_dfd = READ_ONCE(sqe->fd);
+ lnk->new_dfd = READ_ONCE(sqe->len);
+ oldf = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ newf = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+ lnk->flags = READ_ONCE(sqe->hardlink_flags);
+
+ lnk->oldpath = getname(oldf);
+ if (IS_ERR(lnk->oldpath))
+ return PTR_ERR(lnk->oldpath);
+
+ lnk->newpath = getname(newf);
+ if (IS_ERR(lnk->newpath)) {
+ putname(lnk->oldpath);
+ return PTR_ERR(lnk->newpath);
+ }
+
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return 0;
+}
+
+static int io_linkat(struct io_kiocb *req, int issue_flags)
+{
+ struct io_hardlink *lnk = &req->hardlink;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+ ret = do_linkat(lnk->old_dfd, lnk->oldpath, lnk->new_dfd,
+ lnk->newpath, lnk->flags);
+
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_complete(req, ret);
+ return 0;
+}
+
static int io_shutdown_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags ||
- sqe->buf_index)
+ if (unlikely(sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags ||
+ sqe->buf_index || sqe->splice_fd_in))
return -EINVAL;
req->shutdown.how = READ_ONCE(sqe->len);
@@ -3622,7 +3908,7 @@ static int __io_splice_prep(struct io_kiocb *req,
if (unlikely(sp->flags & ~valid_flags))
return -EINVAL;
- sp->file_in = io_file_get(NULL, req, READ_ONCE(sqe->splice_fd_in),
+ sp->file_in = io_file_get(req->ctx, req, READ_ONCE(sqe->splice_fd_in),
(sp->flags & SPLICE_F_FD_IN_FIXED));
if (!sp->file_in)
return -EBADF;
@@ -3721,7 +4007,8 @@ static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
+ if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
+ sqe->splice_fd_in))
return -EINVAL;
req->sync.flags = READ_ONCE(sqe->fsync_flags);
@@ -3754,7 +4041,8 @@ static int io_fsync(struct io_kiocb *req, unsigned int issue_flags)
static int io_fallocate_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
- if (sqe->ioprio || sqe->buf_index || sqe->rw_flags)
+ if (sqe->ioprio || sqe->buf_index || sqe->rw_flags ||
+ sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -3785,6 +4073,8 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
const char __user *fname;
int ret;
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
if (unlikely(sqe->ioprio || sqe->buf_index))
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
@@ -3802,6 +4092,11 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
req->open.filename = NULL;
return ret;
}
+
+ req->open.file_slot = READ_ONCE(sqe->file_index);
+ if (req->open.file_slot && (req->open.how.flags & O_CLOEXEC))
+ return -EINVAL;
+
req->open.nofile = rlimit(RLIMIT_NOFILE);
req->flags |= REQ_F_NEED_CLEANUP;
return 0;
@@ -3809,12 +4104,9 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- u64 flags, mode;
+ u64 mode = READ_ONCE(sqe->len);
+ u64 flags = READ_ONCE(sqe->open_flags);
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- mode = READ_ONCE(sqe->len);
- flags = READ_ONCE(sqe->open_flags);
req->open.how = build_open_how(flags, mode);
return __io_openat_prep(req, sqe);
}
@@ -3825,8 +4117,6 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
size_t len;
int ret;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
how = u64_to_user_ptr(READ_ONCE(sqe->addr2));
len = READ_ONCE(sqe->len);
if (len < OPEN_HOW_SIZE_VER0)
@@ -3844,8 +4134,8 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
{
struct open_flags op;
struct file *file;
- bool nonblock_set;
- bool resolve_nonblock;
+ bool resolve_nonblock, nonblock_set;
+ bool fixed = !!req->open.file_slot;
int ret;
ret = build_open_flags(&req->open.how, &op);
@@ -3864,9 +4154,11 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
op.open_flag |= O_NONBLOCK;
}
- ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
- if (ret < 0)
- goto err;
+ if (!fixed) {
+ ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
+ if (ret < 0)
+ goto err;
+ }
file = do_filp_open(req->open.dfd, req->open.filename, &op);
if (IS_ERR(file)) {
@@ -3875,7 +4167,8 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
* marginal gain for something that is now known to be a slower
* path. So just put it, and we'll get a new one when we retry.
*/
- put_unused_fd(ret);
+ if (!fixed)
+ put_unused_fd(ret);
ret = PTR_ERR(file);
/* only retry if RESOLVE_CACHED wasn't already set by application */
@@ -3888,7 +4181,12 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set)
file->f_flags &= ~O_NONBLOCK;
fsnotify_open(file);
- fd_install(ret, file);
+
+ if (!fixed)
+ fd_install(ret, file);
+ else
+ ret = io_install_fixed_file(req, file, issue_flags,
+ req->open.file_slot - 1);
err:
putname(req->open.filename);
req->flags &= ~REQ_F_NEED_CLEANUP;
@@ -3909,7 +4207,8 @@ static int io_remove_buffers_prep(struct io_kiocb *req,
struct io_provide_buf *p = &req->pbuf;
u64 tmp;
- if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off)
+ if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
+ sqe->splice_fd_in)
return -EINVAL;
tmp = READ_ONCE(sqe->fd);
@@ -3980,7 +4279,7 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
struct io_provide_buf *p = &req->pbuf;
u64 tmp;
- if (sqe->ioprio || sqe->rw_flags)
+ if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
tmp = READ_ONCE(sqe->fd);
@@ -4067,7 +4366,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_EPOLL)
- if (sqe->ioprio || sqe->buf_index)
+ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -4113,7 +4412,7 @@ static int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
- if (sqe->ioprio || sqe->buf_index || sqe->off)
+ if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -4148,7 +4447,7 @@ static int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- if (sqe->ioprio || sqe->buf_index || sqe->addr)
+ if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -4186,7 +4485,7 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->buf_index)
+ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
@@ -4222,7 +4521,7 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
- sqe->rw_flags || sqe->buf_index)
+ sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
@@ -4283,7 +4582,8 @@ static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
+ if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
+ sqe->splice_fd_in))
return -EINVAL;
req->sync.off = READ_ONCE(sqe->off);
@@ -4717,6 +5017,15 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
accept->flags = READ_ONCE(sqe->accept_flags);
accept->nofile = rlimit(RLIMIT_NOFILE);
+
+ accept->file_slot = READ_ONCE(sqe->file_index);
+ if (accept->file_slot && ((req->open.how.flags & O_CLOEXEC) ||
+ (accept->flags & SOCK_CLOEXEC)))
+ return -EINVAL;
+ if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ return -EINVAL;
+ if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
+ accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
return 0;
}
@@ -4725,20 +5034,35 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
struct io_accept *accept = &req->accept;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
- int ret;
+ bool fixed = !!accept->file_slot;
+ struct file *file;
+ int ret, fd;
if (req->file->f_flags & O_NONBLOCK)
req->flags |= REQ_F_NOWAIT;
- ret = __sys_accept4_file(req->file, file_flags, accept->addr,
- accept->addr_len, accept->flags,
- accept->nofile);
- if (ret == -EAGAIN && force_nonblock)
- return -EAGAIN;
- if (ret < 0) {
+ if (!fixed) {
+ fd = __get_unused_fd_flags(accept->flags, accept->nofile);
+ if (unlikely(fd < 0))
+ return fd;
+ }
+ file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
+ accept->flags);
+ if (IS_ERR(file)) {
+ if (!fixed)
+ put_unused_fd(fd);
+ ret = PTR_ERR(file);
+ if (ret == -EAGAIN && force_nonblock)
+ return -EAGAIN;
if (ret == -ERESTARTSYS)
ret = -EINTR;
req_set_fail(req);
+ } else if (!fixed) {
+ fd_install(fd, file);
+ ret = fd;
+ } else {
+ ret = io_install_fixed_file(req, file, issue_flags,
+ accept->file_slot - 1);
}
__io_req_complete(req, issue_flags, ret, 0);
return 0;
@@ -4758,7 +5082,8 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags)
+ if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags ||
+ sqe->splice_fd_in)
return -EINVAL;
conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -4871,6 +5196,7 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
{
struct io_ring_ctx *ctx = req->ctx;
+ /* req->task == current here, checking PF_EXITING is safe */
if (unlikely(req->task->flags & PF_EXITING))
WRITE_ONCE(poll->canceled, true);
@@ -4880,7 +5206,7 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
req->result = vfs_poll(req->file, &pt) & poll->events;
}
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
if (!req->result && !READ_ONCE(poll->canceled)) {
add_wait_queue(poll->head, &poll->wait);
return true;
@@ -4914,12 +5240,12 @@ static void io_poll_remove_double(struct io_kiocb *req)
if (poll && poll->head) {
struct wait_queue_head *head = poll->head;
- spin_lock(&head->lock);
+ spin_lock_irq(&head->lock);
list_del_init(&poll->wait.entry);
if (poll->wait.private)
req_ref_put(req);
poll->head = NULL;
- spin_unlock(&head->lock);
+ spin_unlock_irq(&head->lock);
}
}
@@ -4949,13 +5275,13 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
return !(flags & IORING_CQE_F_MORE);
}
-static void io_poll_task_func(struct io_kiocb *req)
+static void io_poll_task_func(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *nxt;
if (io_poll_rewait(req, &req->poll)) {
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
} else {
bool done;
@@ -4967,13 +5293,13 @@ static void io_poll_task_func(struct io_kiocb *req)
req->result = 0;
add_wait_queue(req->poll.head, &req->poll.wait);
}
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
if (done) {
nxt = io_put_req_find_next(req);
if (nxt)
- io_req_task_submit(nxt);
+ io_req_task_submit(nxt, locked);
}
}
}
@@ -4984,6 +5310,7 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
struct io_kiocb *req = wait->private;
struct io_poll_iocb *poll = io_poll_get_single(req);
__poll_t mask = key_to_poll(key);
+ unsigned long flags;
/* for instances that support it check for an event match first: */
if (mask && !(mask & poll->events))
@@ -4996,13 +5323,13 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
if (poll->head) {
bool done;
- spin_lock(&poll->head->lock);
+ spin_lock_irqsave(&poll->head->lock, flags);
done = list_empty(&poll->wait.entry);
if (!done)
list_del_init(&poll->wait.entry);
/* make sure double remove sees this as being gone */
wait->private = NULL;
- spin_unlock(&poll->head->lock);
+ spin_unlock_irqrestore(&poll->head->lock, flags);
if (!done) {
/* use wait func handler, so it matches the rq type */
poll->wait.func(&poll->wait, mode, sync, key);
@@ -5039,8 +5366,13 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
if (unlikely(pt->nr_entries)) {
struct io_poll_iocb *poll_one = poll;
+ /* double add on the same waitqueue head, ignore */
+ if (poll_one->head == head)
+ return;
/* already have a 2nd entry, fail a third attempt */
if (*poll_ptr) {
+ if ((*poll_ptr)->head == head)
+ return;
pt->error = -EINVAL;
return;
}
@@ -5050,9 +5382,6 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
*/
if (!(poll_one->events & EPOLLONESHOT))
poll_one->events |= EPOLLONESHOT;
- /* double add on the same waitqueue head, ignore */
- if (poll_one->head == head)
- return;
poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
if (!poll) {
pt->error = -ENOMEM;
@@ -5082,7 +5411,7 @@ static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
__io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
}
-static void io_async_task_func(struct io_kiocb *req)
+static void io_async_task_func(struct io_kiocb *req, bool *locked)
{
struct async_poll *apoll = req->apoll;
struct io_ring_ctx *ctx = req->ctx;
@@ -5090,16 +5419,16 @@ static void io_async_task_func(struct io_kiocb *req)
trace_io_uring_task_run(req->ctx, req, req->opcode, req->user_data);
if (io_poll_rewait(req, &apoll->poll)) {
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
return;
}
hash_del(&req->hash_node);
io_poll_remove_double(req);
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
if (!READ_ONCE(apoll->poll.canceled))
- io_req_task_submit(req);
+ io_req_task_submit(req, locked);
else
io_req_complete_failed(req, -ECANCELED);
}
@@ -5148,11 +5477,11 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
if (unlikely(!ipt->nr_entries) && !ipt->error)
ipt->error = -EINVAL;
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
if (ipt->error || (mask && (poll->events & EPOLLONESHOT)))
io_poll_remove_double(req);
if (likely(poll->head)) {
- spin_lock(&poll->head->lock);
+ spin_lock_irq(&poll->head->lock);
if (unlikely(list_empty(&poll->wait.entry))) {
if (ipt->error)
cancel = true;
@@ -5165,7 +5494,7 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
WRITE_ONCE(poll->canceled, true);
else if (!poll->done) /* actually waiting for an event */
io_poll_req_insert(req);
- spin_unlock(&poll->head->lock);
+ spin_unlock_irq(&poll->head->lock);
}
return mask;
@@ -5207,7 +5536,7 @@ static int io_arm_poll_handler(struct io_kiocb *req)
}
/* if we can't nonblock try, then no point in arming a poll handler */
- if (!io_file_supports_async(req, rw))
+ if (!io_file_supports_nowait(req, rw))
return IO_APOLL_ABORTED;
apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
@@ -5217,16 +5546,14 @@ static int io_arm_poll_handler(struct io_kiocb *req)
req->apoll = apoll;
req->flags |= REQ_F_POLLED;
ipt.pt._qproc = io_async_queue_proc;
+ io_req_set_refcount(req);
ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,
io_async_wake);
- if (ret || ipt.error) {
- spin_unlock_irq(&ctx->completion_lock);
- if (ret)
- return IO_APOLL_READY;
- return IO_APOLL_ABORTED;
- }
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
+ if (ret || ipt.error)
+ return ret ? IO_APOLL_READY : IO_APOLL_ABORTED;
+
trace_io_uring_poll_arm(ctx, req, req->opcode, req->user_data,
mask, apoll->poll.events);
return IO_APOLL_OK;
@@ -5240,19 +5567,19 @@ static bool __io_poll_remove_one(struct io_kiocb *req,
if (!poll->head)
return false;
- spin_lock(&poll->head->lock);
+ spin_lock_irq(&poll->head->lock);
if (do_cancel)
WRITE_ONCE(poll->canceled, true);
if (!list_empty(&poll->wait.entry)) {
list_del_init(&poll->wait.entry);
do_complete = true;
}
- spin_unlock(&poll->head->lock);
+ spin_unlock_irq(&poll->head->lock);
hash_del(&req->hash_node);
return do_complete;
}
-static bool io_poll_remove_waitqs(struct io_kiocb *req)
+static bool io_poll_remove_one(struct io_kiocb *req)
__must_hold(&req->ctx->completion_lock)
{
bool do_complete;
@@ -5260,26 +5587,12 @@ static bool io_poll_remove_waitqs(struct io_kiocb *req)
io_poll_remove_double(req);
do_complete = __io_poll_remove_one(req, io_poll_get_single(req), true);
- if (req->opcode != IORING_OP_POLL_ADD && do_complete) {
- /* non-poll requests have submit ref still */
- req_ref_put(req);
- }
- return do_complete;
-}
-
-static bool io_poll_remove_one(struct io_kiocb *req)
- __must_hold(&req->ctx->completion_lock)
-{
- bool do_complete;
-
- do_complete = io_poll_remove_waitqs(req);
if (do_complete) {
io_cqring_fill_event(req->ctx, req->user_data, -ECANCELED, 0);
io_commit_cqring(req->ctx);
req_set_fail(req);
- io_put_req_deferred(req, 1);
+ io_put_req_deferred(req);
}
-
return do_complete;
}
@@ -5293,7 +5606,7 @@ static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
struct io_kiocb *req;
int posted = 0, i;
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
struct hlist_head *list;
@@ -5303,7 +5616,7 @@ static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
posted += io_poll_remove_one(req);
}
}
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
if (posted)
io_cqring_ev_posted(ctx);
@@ -5366,7 +5679,7 @@ static int io_poll_update_prep(struct io_kiocb *req,
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->buf_index)
+ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
flags = READ_ONCE(sqe->len);
if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
@@ -5421,6 +5734,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
if (flags & ~IORING_POLL_ADD_MULTI)
return -EINVAL;
+ io_req_set_refcount(req);
poll->events = io_poll_parse_events(sqe, flags);
return 0;
}
@@ -5441,7 +5755,7 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
ipt.error = 0;
io_poll_complete(req, mask);
}
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
if (mask) {
io_cqring_ev_posted(ctx);
@@ -5458,7 +5772,7 @@ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
bool completing;
int ret;
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
preq = io_poll_find(ctx, req->poll_update.old_user_data, true);
if (!preq) {
ret = -ENOENT;
@@ -5485,7 +5799,7 @@ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
ret = 0;
err:
if (ret < 0) {
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
req_set_fail(req);
io_req_complete(req, ret);
return 0;
@@ -5498,7 +5812,7 @@ err:
}
if (req->poll_update.update_user_data)
preq->user_data = req->poll_update.new_user_data;
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
/* complete update request, we're done with it */
io_req_complete(req, ret);
@@ -5513,6 +5827,12 @@ err:
return 0;
}
+static void io_req_task_timeout(struct io_kiocb *req, bool *locked)
+{
+ req_set_fail(req);
+ io_req_complete_post(req, -ETIME, 0);
+}
+
static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
{
struct io_timeout_data *data = container_of(timer,
@@ -5521,24 +5841,20 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
- spin_lock_irqsave(&ctx->completion_lock, flags);
+ spin_lock_irqsave(&ctx->timeout_lock, flags);
list_del_init(&req->timeout.list);
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
+ spin_unlock_irqrestore(&ctx->timeout_lock, flags);
- io_cqring_fill_event(ctx, req->user_data, -ETIME, 0);
- io_commit_cqring(ctx);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
-
- io_cqring_ev_posted(ctx);
- req_set_fail(req);
- io_put_req(req);
+ req->io_task_work.func = io_req_task_timeout;
+ io_req_task_work_add(req);
return HRTIMER_NORESTART;
}
static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
__u64 user_data)
- __must_hold(&ctx->completion_lock)
+ __must_hold(&ctx->timeout_lock)
{
struct io_timeout_data *io;
struct io_kiocb *req;
@@ -5561,6 +5877,7 @@ static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
__must_hold(&ctx->completion_lock)
+ __must_hold(&ctx->timeout_lock)
{
struct io_kiocb *req = io_timeout_extract(ctx, user_data);
@@ -5569,13 +5886,54 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
req_set_fail(req);
io_cqring_fill_event(ctx, req->user_data, -ECANCELED, 0);
- io_put_req_deferred(req, 1);
+ io_put_req_deferred(req);
+ return 0;
+}
+
+static clockid_t io_timeout_get_clock(struct io_timeout_data *data)
+{
+ switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) {
+ case IORING_TIMEOUT_BOOTTIME:
+ return CLOCK_BOOTTIME;
+ case IORING_TIMEOUT_REALTIME:
+ return CLOCK_REALTIME;
+ default:
+ /* can't happen, vetted at prep time */
+ WARN_ON_ONCE(1);
+ fallthrough;
+ case 0:
+ return CLOCK_MONOTONIC;
+ }
+}
+
+static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
+ struct timespec64 *ts, enum hrtimer_mode mode)
+ __must_hold(&ctx->timeout_lock)
+{
+ struct io_timeout_data *io;
+ struct io_kiocb *req;
+ bool found = false;
+
+ list_for_each_entry(req, &ctx->ltimeout_list, timeout.list) {
+ found = user_data == req->user_data;
+ if (found)
+ break;
+ }
+ if (!found)
+ return -ENOENT;
+
+ io = req->async_data;
+ if (hrtimer_try_to_cancel(&io->timer) == -1)
+ return -EALREADY;
+ hrtimer_init(&io->timer, io_timeout_get_clock(io), mode);
+ io->timer.function = io_link_timeout_fn;
+ hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode);
return 0;
}
static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
struct timespec64 *ts, enum hrtimer_mode mode)
- __must_hold(&ctx->completion_lock)
+ __must_hold(&ctx->timeout_lock)
{
struct io_kiocb *req = io_timeout_extract(ctx, user_data);
struct io_timeout_data *data;
@@ -5586,7 +5944,7 @@ static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
req->timeout.off = 0; /* noseq */
data = req->async_data;
list_add_tail(&req->timeout.list, &ctx->timeout_list);
- hrtimer_init(&data->timer, CLOCK_MONOTONIC, mode);
+ hrtimer_init(&data->timer, io_timeout_get_clock(data), mode);
data->timer.function = io_timeout_fn;
hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode);
return 0;
@@ -5601,13 +5959,18 @@ static int io_timeout_remove_prep(struct io_kiocb *req,
return -EINVAL;
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->len)
+ if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->splice_fd_in)
return -EINVAL;
+ tr->ltimeout = false;
tr->addr = READ_ONCE(sqe->addr);
tr->flags = READ_ONCE(sqe->timeout_flags);
- if (tr->flags & IORING_TIMEOUT_UPDATE) {
- if (tr->flags & ~(IORING_TIMEOUT_UPDATE|IORING_TIMEOUT_ABS))
+ if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) {
+ if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
+ return -EINVAL;
+ if (tr->flags & IORING_LINK_TIMEOUT_UPDATE)
+ tr->ltimeout = true;
+ if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS))
return -EINVAL;
if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2)))
return -EFAULT;
@@ -5634,20 +5997,26 @@ static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
struct io_ring_ctx *ctx = req->ctx;
int ret;
- spin_lock_irq(&ctx->completion_lock);
- if (!(req->timeout_rem.flags & IORING_TIMEOUT_UPDATE))
+ if (!(req->timeout_rem.flags & IORING_TIMEOUT_UPDATE)) {
+ spin_lock(&ctx->completion_lock);
+ spin_lock_irq(&ctx->timeout_lock);
ret = io_timeout_cancel(ctx, tr->addr);
- else
- ret = io_timeout_update(ctx, tr->addr, &tr->ts,
- io_translate_timeout_mode(tr->flags));
+ spin_unlock_irq(&ctx->timeout_lock);
+ spin_unlock(&ctx->completion_lock);
+ } else {
+ enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
+
+ spin_lock_irq(&ctx->timeout_lock);
+ if (tr->ltimeout)
+ ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode);
+ else
+ ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode);
+ spin_unlock_irq(&ctx->timeout_lock);
+ }
- io_cqring_fill_event(ctx, req->user_data, ret, 0);
- io_commit_cqring(ctx);
- spin_unlock_irq(&ctx->completion_lock);
- io_cqring_ev_posted(ctx);
if (ret < 0)
req_set_fail(req);
- io_put_req(req);
+ io_req_complete_post(req, ret, 0);
return 0;
}
@@ -5660,14 +6029,19 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->len != 1)
+ if (sqe->ioprio || sqe->buf_index || sqe->len != 1 ||
+ sqe->splice_fd_in)
return -EINVAL;
if (off && is_timeout_link)
return -EINVAL;
flags = READ_ONCE(sqe->timeout_flags);
- if (flags & ~IORING_TIMEOUT_ABS)
+ if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK))
+ return -EINVAL;
+ /* more than one clock specified is invalid, obviously */
+ if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
return -EINVAL;
+ INIT_LIST_HEAD(&req->timeout.list);
req->timeout.off = off;
if (unlikely(off && !req->ctx->off_timeout_used))
req->ctx->off_timeout_used = true;
@@ -5677,14 +6051,24 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
data = req->async_data;
data->req = req;
+ data->flags = flags;
if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
return -EFAULT;
data->mode = io_translate_timeout_mode(flags);
- hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode);
- if (is_timeout_link)
- io_req_track_inflight(req);
+ hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode);
+
+ if (is_timeout_link) {
+ struct io_submit_link *link = &req->ctx->submit_state.link;
+
+ if (!link->head)
+ return -EINVAL;
+ if (link->last->opcode == IORING_OP_LINK_TIMEOUT)
+ return -EINVAL;
+ req->timeout.head = link->last;
+ link->last->flags |= REQ_F_ARM_LTIMEOUT;
+ }
return 0;
}
@@ -5695,7 +6079,7 @@ static int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
struct list_head *entry;
u32 tail, off = req->timeout.off;
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock_irq(&ctx->timeout_lock);
/*
* sqe->off holds how many events that need to occur for this
@@ -5734,7 +6118,7 @@ add:
list_add(&req->timeout.list, entry);
data->timer.function = io_timeout_fn;
hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock_irq(&ctx->timeout_lock);
return 0;
}
@@ -5777,31 +6161,27 @@ static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data,
return ret;
}
-static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
- struct io_kiocb *req, __u64 sqe_addr,
- int success_ret)
+static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
{
- unsigned long flags;
+ struct io_ring_ctx *ctx = req->ctx;
int ret;
+ WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
+
ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
- spin_lock_irqsave(&ctx->completion_lock, flags);
if (ret != -ENOENT)
- goto done;
+ return ret;
+
+ spin_lock(&ctx->completion_lock);
+ spin_lock_irq(&ctx->timeout_lock);
ret = io_timeout_cancel(ctx, sqe_addr);
+ spin_unlock_irq(&ctx->timeout_lock);
if (ret != -ENOENT)
- goto done;
+ goto out;
ret = io_poll_cancel(ctx, sqe_addr, false);
-done:
- if (!ret)
- ret = success_ret;
- io_cqring_fill_event(ctx, req->user_data, ret, 0);
- io_commit_cqring(ctx);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
- io_cqring_ev_posted(ctx);
-
- if (ret < 0)
- req_set_fail(req);
+out:
+ spin_unlock(&ctx->completion_lock);
+ return ret;
}
static int io_async_cancel_prep(struct io_kiocb *req,
@@ -5811,7 +6191,8 @@ static int io_async_cancel_prep(struct io_kiocb *req,
return -EINVAL;
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags)
+ if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags ||
+ sqe->splice_fd_in)
return -EINVAL;
req->cancel.addr = READ_ONCE(sqe->addr);
@@ -5825,18 +6206,9 @@ static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
struct io_tctx_node *node;
int ret;
- /* tasks should wait for their io-wq threads, so safe w/o sync */
- ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
- spin_lock_irq(&ctx->completion_lock);
- if (ret != -ENOENT)
- goto done;
- ret = io_timeout_cancel(ctx, sqe_addr);
- if (ret != -ENOENT)
- goto done;
- ret = io_poll_cancel(ctx, sqe_addr, false);
+ ret = io_try_cancel_userdata(req, sqe_addr);
if (ret != -ENOENT)
goto done;
- spin_unlock_irq(&ctx->completion_lock);
/* slow path, try all io-wq's */
io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
@@ -5849,17 +6221,10 @@ static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
break;
}
io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
-
- spin_lock_irq(&ctx->completion_lock);
done:
- io_cqring_fill_event(ctx, req->user_data, ret, 0);
- io_commit_cqring(ctx);
- spin_unlock_irq(&ctx->completion_lock);
- io_cqring_ev_posted(ctx);
-
if (ret < 0)
req_set_fail(req);
- io_put_req(req);
+ io_req_complete_post(req, ret, 0);
return 0;
}
@@ -5868,7 +6233,7 @@ static int io_rsrc_update_prep(struct io_kiocb *req,
{
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
return -EINVAL;
- if (sqe->ioprio || sqe->rw_flags)
+ if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
req->rsrc_update.offset = READ_ONCE(sqe->off);
@@ -5976,6 +6341,12 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return io_renameat_prep(req, sqe);
case IORING_OP_UNLINKAT:
return io_unlinkat_prep(req, sqe);
+ case IORING_OP_MKDIRAT:
+ return io_mkdirat_prep(req, sqe);
+ case IORING_OP_SYMLINKAT:
+ return io_symlinkat_prep(req, sqe);
+ case IORING_OP_LINKAT:
+ return io_linkat_prep(req, sqe);
}
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6070,11 +6441,11 @@ fail:
return true;
}
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) {
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
kfree(de);
- io_queue_async_work(req);
+ io_queue_async_work(req, NULL);
return true;
}
@@ -6082,7 +6453,7 @@ fail:
de->req = req;
de->seq = seq;
list_add_tail(&de->list, &ctx->defer_list);
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
return true;
}
@@ -6139,6 +6510,17 @@ static void io_clean_op(struct io_kiocb *req)
case IORING_OP_UNLINKAT:
putname(req->unlink.filename);
break;
+ case IORING_OP_MKDIRAT:
+ putname(req->mkdir.filename);
+ break;
+ case IORING_OP_SYMLINKAT:
+ putname(req->symlink.oldpath);
+ putname(req->symlink.newpath);
+ break;
+ case IORING_OP_LINKAT:
+ putname(req->hardlink.oldpath);
+ putname(req->hardlink.newpath);
+ break;
}
}
if ((req->flags & REQ_F_POLLED) && req->apoll) {
@@ -6267,6 +6649,15 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
case IORING_OP_UNLINKAT:
ret = io_unlinkat(req, issue_flags);
break;
+ case IORING_OP_MKDIRAT:
+ ret = io_mkdirat(req, issue_flags);
+ break;
+ case IORING_OP_SYMLINKAT:
+ ret = io_symlinkat(req, issue_flags);
+ break;
+ case IORING_OP_LINKAT:
+ ret = io_linkat(req, issue_flags);
+ break;
default:
ret = -EINVAL;
break;
@@ -6283,16 +6674,31 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
+static struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
+{
+ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+
+ req = io_put_req_find_next(req);
+ return req ? &req->work : NULL;
+}
+
static void io_wq_submit_work(struct io_wq_work *work)
{
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
struct io_kiocb *timeout;
int ret = 0;
+ /* one will be dropped by ->io_free_work() after returning to io-wq */
+ if (!(req->flags & REQ_F_REFCOUNT))
+ __io_req_set_refcount(req, 2);
+ else
+ req_ref_get(req);
+
timeout = io_prep_linked_timeout(req);
if (timeout)
io_queue_linked_timeout(timeout);
+ /* either cancelled or io-wq is dying, so don't touch tctx->iowq */
if (work->flags & IO_WQ_WORK_CANCEL)
ret = -ECANCELED;
@@ -6311,29 +6717,14 @@ static void io_wq_submit_work(struct io_wq_work *work)
}
/* avoid locking problems by failing it from a clean context */
- if (ret) {
- /* io-wq is going to take one down */
- req_ref_get(req);
+ if (ret)
io_req_task_queue_fail(req, ret);
- }
}
-#define FFS_ASYNC_READ 0x1UL
-#define FFS_ASYNC_WRITE 0x2UL
-#ifdef CONFIG_64BIT
-#define FFS_ISREG 0x4UL
-#else
-#define FFS_ISREG 0x0UL
-#endif
-#define FFS_MASK ~(FFS_ASYNC_READ|FFS_ASYNC_WRITE|FFS_ISREG)
-
static inline struct io_fixed_file *io_fixed_file_slot(struct io_file_table *table,
- unsigned i)
+ unsigned i)
{
- struct io_fixed_file *table_l2;
-
- table_l2 = table->files[i >> IORING_FILE_TABLE_SHIFT];
- return &table_l2[i & IORING_FILE_TABLE_MASK];
+ return &table->files[i];
}
static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
@@ -6348,45 +6739,69 @@ static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file
{
unsigned long file_ptr = (unsigned long) file;
- if (__io_file_supports_async(file, READ))
+ if (__io_file_supports_nowait(file, READ))
file_ptr |= FFS_ASYNC_READ;
- if (__io_file_supports_async(file, WRITE))
+ if (__io_file_supports_nowait(file, WRITE))
file_ptr |= FFS_ASYNC_WRITE;
if (S_ISREG(file_inode(file)->i_mode))
file_ptr |= FFS_ISREG;
file_slot->file_ptr = file_ptr;
}
-static struct file *io_file_get(struct io_submit_state *state,
- struct io_kiocb *req, int fd, bool fixed)
+static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx,
+ struct io_kiocb *req, int fd)
{
- struct io_ring_ctx *ctx = req->ctx;
struct file *file;
+ unsigned long file_ptr;
- if (fixed) {
- unsigned long file_ptr;
+ if (unlikely((unsigned int)fd >= ctx->nr_user_files))
+ return NULL;
+ fd = array_index_nospec(fd, ctx->nr_user_files);
+ file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr;
+ file = (struct file *) (file_ptr & FFS_MASK);
+ file_ptr &= ~FFS_MASK;
+ /* mask in overlapping REQ_F and FFS bits */
+ req->flags |= (file_ptr << REQ_F_NOWAIT_READ_BIT);
+ io_req_set_rsrc_node(req);
+ return file;
+}
- if (unlikely((unsigned int)fd >= ctx->nr_user_files))
- return NULL;
- fd = array_index_nospec(fd, ctx->nr_user_files);
- file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr;
- file = (struct file *) (file_ptr & FFS_MASK);
- file_ptr &= ~FFS_MASK;
- /* mask in overlapping REQ_F and FFS bits */
- req->flags |= (file_ptr << REQ_F_ASYNC_READ_BIT);
- io_req_set_rsrc_node(req);
- } else {
- trace_io_uring_file_get(ctx, fd);
- file = __io_file_get(state, fd);
+static struct file *io_file_get_normal(struct io_ring_ctx *ctx,
+ struct io_kiocb *req, int fd)
+{
+ struct file *file = fget(fd);
- /* we don't allow fixed io_uring files */
- if (file && unlikely(file->f_op == &io_uring_fops))
- io_req_track_inflight(req);
- }
+ trace_io_uring_file_get(ctx, fd);
+ /* we don't allow fixed io_uring files */
+ if (file && unlikely(file->f_op == &io_uring_fops))
+ io_req_track_inflight(req);
return file;
}
+static inline struct file *io_file_get(struct io_ring_ctx *ctx,
+ struct io_kiocb *req, int fd, bool fixed)
+{
+ if (fixed)
+ return io_file_get_fixed(ctx, req, fd);
+ else
+ return io_file_get_normal(ctx, req, fd);
+}
+
+static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
+{
+ struct io_kiocb *prev = req->timeout.prev;
+ int ret;
+
+ if (prev) {
+ ret = io_try_cancel_userdata(req, prev->user_data);
+ io_req_complete_post(req, ret ?: -ETIME, 0);
+ io_put_req(prev);
+ } else {
+ io_req_complete_post(req, -ETIME, 0);
+ }
+}
+
static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
{
struct io_timeout_data *data = container_of(timer,
@@ -6395,7 +6810,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
- spin_lock_irqsave(&ctx->completion_lock, flags);
+ spin_lock_irqsave(&ctx->timeout_lock, flags);
prev = req->timeout.head;
req->timeout.head = NULL;
@@ -6408,15 +6823,12 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
if (!req_ref_inc_not_zero(prev))
prev = NULL;
}
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ list_del(&req->timeout.list);
+ req->timeout.prev = prev;
+ spin_unlock_irqrestore(&ctx->timeout_lock, flags);
- if (prev) {
- io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
- io_put_req_deferred(prev, 1);
- io_put_req_deferred(req, 1);
- } else {
- io_req_complete_post(req, -ETIME, 0);
- }
+ req->io_task_work.func = io_req_task_link_timeout;
+ io_req_task_work_add(req);
return HRTIMER_NORESTART;
}
@@ -6424,7 +6836,7 @@ static void io_queue_linked_timeout(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock_irq(&ctx->timeout_lock);
/*
* If the back reference is NULL, then our linked request finished
* before we got a chance to setup the timer
@@ -6435,29 +6847,17 @@ static void io_queue_linked_timeout(struct io_kiocb *req)
data->timer.function = io_link_timeout_fn;
hrtimer_start(&data->timer, timespec64_to_ktime(data->ts),
data->mode);
+ list_add_tail(&req->timeout.list, &ctx->ltimeout_list);
}
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock_irq(&ctx->timeout_lock);
/* drop submission reference */
io_put_req(req);
}
-static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
-{
- struct io_kiocb *nxt = req->link;
-
- if (!nxt || (req->flags & REQ_F_LINK_TIMEOUT) ||
- nxt->opcode != IORING_OP_LINK_TIMEOUT)
- return NULL;
-
- nxt->timeout.head = req;
- nxt->flags |= REQ_F_LTIMEOUT_ACTIVE;
- req->flags |= REQ_F_LINK_TIMEOUT;
- return nxt;
-}
-
static void __io_queue_sqe(struct io_kiocb *req)
+ __must_hold(&req->ctx->uring_lock)
{
- struct io_kiocb *linked_timeout = io_prep_linked_timeout(req);
+ struct io_kiocb *linked_timeout;
int ret;
issue_sqe:
@@ -6468,50 +6868,60 @@ issue_sqe:
* doesn't support non-blocking read/write attempts
*/
if (likely(!ret)) {
- /* drop submission reference */
if (req->flags & REQ_F_COMPLETE_INLINE) {
struct io_ring_ctx *ctx = req->ctx;
- struct io_comp_state *cs = &ctx->submit_state.comp;
+ struct io_submit_state *state = &ctx->submit_state;
- cs->reqs[cs->nr++] = req;
- if (cs->nr == ARRAY_SIZE(cs->reqs))
+ state->compl_reqs[state->compl_nr++] = req;
+ if (state->compl_nr == ARRAY_SIZE(state->compl_reqs))
io_submit_flush_completions(ctx);
- } else {
- io_put_req(req);
+ return;
}
+
+ linked_timeout = io_prep_linked_timeout(req);
+ if (linked_timeout)
+ io_queue_linked_timeout(linked_timeout);
} else if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
+ linked_timeout = io_prep_linked_timeout(req);
+
switch (io_arm_poll_handler(req)) {
case IO_APOLL_READY:
+ if (linked_timeout)
+ io_unprep_linked_timeout(req);
goto issue_sqe;
case IO_APOLL_ABORTED:
/*
* Queued up for async execution, worker will release
* submit reference when the iocb is actually submitted.
*/
- io_queue_async_work(req);
+ io_queue_async_work(req, NULL);
break;
}
+
+ if (linked_timeout)
+ io_queue_linked_timeout(linked_timeout);
} else {
io_req_complete_failed(req, ret);
}
- if (linked_timeout)
- io_queue_linked_timeout(linked_timeout);
}
static inline void io_queue_sqe(struct io_kiocb *req)
+ __must_hold(&req->ctx->uring_lock)
{
if (unlikely(req->ctx->drain_active) && io_drain_req(req))
return;
- if (likely(!(req->flags & REQ_F_FORCE_ASYNC))) {
+ if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) {
__io_queue_sqe(req);
+ } else if (req->flags & REQ_F_FAIL) {
+ io_req_complete_failed(req, req->result);
} else {
int ret = io_req_prep_async(req);
if (unlikely(ret))
io_req_complete_failed(req, ret);
else
- io_queue_async_work(req);
+ io_queue_async_work(req, NULL);
}
}
@@ -6543,19 +6953,19 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx,
static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
const struct io_uring_sqe *sqe)
+ __must_hold(&ctx->uring_lock)
{
struct io_submit_state *state;
unsigned int sqe_flags;
int personality, ret = 0;
+ /* req is partially pre-initialised, see io_preinit_req() */
req->opcode = READ_ONCE(sqe->opcode);
/* same numerical values with corresponding REQ_F_*, safe to copy */
req->flags = sqe_flags = READ_ONCE(sqe->flags);
req->user_data = READ_ONCE(sqe->user_data);
req->file = NULL;
req->fixed_rsrc_refs = NULL;
- /* one is dropped after submission, the other at completion */
- atomic_set(&req->refs, 2);
req->task = current;
/* enforce forwards compatibility on users */
@@ -6593,9 +7003,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
}
if (io_op_defs[req->opcode].needs_file) {
- bool fixed = req->flags & REQ_F_FIXED_FILE;
-
- req->file = io_file_get(state, req, READ_ONCE(sqe->fd), fixed);
+ req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd),
+ (sqe_flags & IOSQE_FIXED_FILE));
if (unlikely(!req->file))
ret = -EBADF;
}
@@ -6606,6 +7015,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
const struct io_uring_sqe *sqe)
+ __must_hold(&ctx->uring_lock)
{
struct io_submit_link *link = &ctx->submit_state.link;
int ret;
@@ -6613,20 +7023,34 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
ret = io_init_req(ctx, req, sqe);
if (unlikely(ret)) {
fail_req:
+ /* fail even hard links since we don't submit */
if (link->head) {
- /* fail even hard links since we don't submit */
- req_set_fail(link->head);
- io_req_complete_failed(link->head, -ECANCELED);
- link->head = NULL;
+ /*
+ * we can judge a link req is failed or cancelled by if
+ * REQ_F_FAIL is set, but the head is an exception since
+ * it may be set REQ_F_FAIL because of other req's failure
+ * so let's leverage req->result to distinguish if a head
+ * is set REQ_F_FAIL because of its failure or other req's
+ * failure so that we can set the correct ret code for it.
+ * init result here to avoid affecting the normal path.
+ */
+ if (!(link->head->flags & REQ_F_FAIL))
+ req_fail_link_node(link->head, -ECANCELED);
+ } else if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
+ /*
+ * the current req is a normal req, we should return
+ * error and thus break the submittion loop.
+ */
+ io_req_complete_failed(req, ret);
+ return ret;
}
- io_req_complete_failed(req, ret);
- return ret;
+ req_fail_link_node(req, ret);
+ } else {
+ ret = io_req_prep(req, sqe);
+ if (unlikely(ret))
+ goto fail_req;
}
- ret = io_req_prep(req, sqe);
- if (unlikely(ret))
- goto fail_req;
-
/* don't need @sqe from now on */
trace_io_uring_submit_sqe(ctx, req, req->opcode, req->user_data,
req->flags, true,
@@ -6642,9 +7066,14 @@ fail_req:
if (link->head) {
struct io_kiocb *head = link->head;
- ret = io_req_prep_async(req);
- if (unlikely(ret))
- goto fail_req;
+ if (!(req->flags & REQ_F_FAIL)) {
+ ret = io_req_prep_async(req);
+ if (unlikely(ret)) {
+ req_fail_link_node(req, ret);
+ if (!(head->flags & REQ_F_FAIL))
+ req_fail_link_node(head, -ECANCELED);
+ }
+ }
trace_io_uring_link(ctx, req, head);
link->last->link = req;
link->last = req;
@@ -6674,11 +7103,10 @@ static void io_submit_state_end(struct io_submit_state *state,
{
if (state->link.head)
io_queue_sqe(state->link.head);
- if (state->comp.nr)
+ if (state->compl_nr)
io_submit_flush_completions(ctx);
if (state->plug_started)
blk_finish_plug(&state->plug);
- io_state_file_put(state);
}
/*
@@ -6738,26 +7166,17 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
}
static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
+ __must_hold(&ctx->uring_lock)
{
- struct io_uring_task *tctx;
int submitted = 0;
/* make sure SQ entry isn't read before tail */
nr = min3(nr, ctx->sq_entries, io_sqring_entries(ctx));
if (!percpu_ref_tryget_many(&ctx->refs, nr))
return -EAGAIN;
+ io_get_task_refs(nr);
- tctx = current->io_uring;
- tctx->cached_refs -= nr;
- if (unlikely(tctx->cached_refs < 0)) {
- unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR;
-
- percpu_counter_add(&tctx->inflight, refill);
- refcount_add(refill, &current->usage);
- tctx->cached_refs += refill;
- }
io_submit_state_start(&ctx->submit_state, nr);
-
while (submitted < nr) {
const struct io_uring_sqe *sqe;
struct io_kiocb *req;
@@ -6770,7 +7189,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
}
sqe = io_get_sqe(ctx);
if (unlikely(!sqe)) {
- kmem_cache_free(req_cachep, req);
+ list_add(&req->inflight_entry, &ctx->submit_state.free_list);
break;
}
/* will complete beyond this point, count as submitted */
@@ -6802,16 +7221,18 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx)
{
/* Tell userspace we may need a wakeup call */
- spin_lock_irq(&ctx->completion_lock);
- ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
- spin_unlock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
+ WRITE_ONCE(ctx->rings->sq_flags,
+ ctx->rings->sq_flags | IORING_SQ_NEED_WAKEUP);
+ spin_unlock(&ctx->completion_lock);
}
static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx)
{
- spin_lock_irq(&ctx->completion_lock);
- ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
- spin_unlock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
+ WRITE_ONCE(ctx->rings->sq_flags,
+ ctx->rings->sq_flags & ~IORING_SQ_NEED_WAKEUP);
+ spin_unlock(&ctx->completion_lock);
}
static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
@@ -6833,7 +7254,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
mutex_lock(&ctx->uring_lock);
if (!list_empty(&ctx->iopoll_list))
- io_do_iopoll(ctx, &nr_events, 0, true);
+ io_do_iopoll(ctx, &nr_events, 0);
/*
* Don't submit if refs are dying, good for io_uring_register(),
@@ -6968,21 +7389,21 @@ static int io_sq_thread(void *data)
struct io_wait_queue {
struct wait_queue_entry wq;
struct io_ring_ctx *ctx;
- unsigned to_wait;
+ unsigned cq_tail;
unsigned nr_timeouts;
};
static inline bool io_should_wake(struct io_wait_queue *iowq)
{
struct io_ring_ctx *ctx = iowq->ctx;
+ int dist = ctx->cached_cq_tail - (int) iowq->cq_tail;
/*
* Wake up if we have enough events, or if a timeout occurred since we
* started waiting. For timeouts, we always want to return to userspace,
* regardless of event count.
*/
- return io_cqring_events(ctx) >= iowq->to_wait ||
- atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
+ return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
}
static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
@@ -7038,21 +7459,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
const sigset_t __user *sig, size_t sigsz,
struct __kernel_timespec __user *uts)
{
- struct io_wait_queue iowq = {
- .wq = {
- .private = current,
- .func = io_wake_function,
- .entry = LIST_HEAD_INIT(iowq.wq.entry),
- },
- .ctx = ctx,
- .to_wait = min_events,
- };
+ struct io_wait_queue iowq;
struct io_rings *rings = ctx->rings;
signed long timeout = MAX_SCHEDULE_TIMEOUT;
int ret;
do {
- io_cqring_overflow_flush(ctx, false);
+ io_cqring_overflow_flush(ctx);
if (io_cqring_events(ctx) >= min_events)
return 0;
if (!io_run_task_work())
@@ -7080,11 +7493,17 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
timeout = timespec64_to_jiffies(&ts);
}
+ init_waitqueue_func_entry(&iowq.wq, io_wake_function);
+ iowq.wq.private = current;
+ INIT_LIST_HEAD(&iowq.wq.entry);
+ iowq.ctx = ctx;
iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
+ iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events;
+
trace_io_uring_cqring_wait(ctx, min_events);
do {
/* if we can't even flush overflow, don't wait for more */
- if (!io_cqring_overflow_flush(ctx, false)) {
+ if (!io_cqring_overflow_flush(ctx)) {
ret = -EBUSY;
break;
}
@@ -7115,14 +7534,14 @@ static void **io_alloc_page_table(size_t size)
size_t init_size = size;
void **table;
- table = kcalloc(nr_tables, sizeof(*table), GFP_KERNEL);
+ table = kcalloc(nr_tables, sizeof(*table), GFP_KERNEL_ACCOUNT);
if (!table)
return NULL;
for (i = 0; i < nr_tables; i++) {
unsigned int this_size = min_t(size_t, size, PAGE_SIZE);
- table[i] = kzalloc(this_size, GFP_KERNEL);
+ table[i] = kzalloc(this_size, GFP_KERNEL_ACCOUNT);
if (!table[i]) {
io_free_page_table(table, init_size);
return NULL;
@@ -7132,20 +7551,54 @@ static void **io_alloc_page_table(size_t size)
return table;
}
-static inline void io_rsrc_ref_lock(struct io_ring_ctx *ctx)
+static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node)
{
- spin_lock_bh(&ctx->rsrc_ref_lock);
+ percpu_ref_exit(&ref_node->refs);
+ kfree(ref_node);
}
-static inline void io_rsrc_ref_unlock(struct io_ring_ctx *ctx)
+static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
{
- spin_unlock_bh(&ctx->rsrc_ref_lock);
+ struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs);
+ struct io_ring_ctx *ctx = node->rsrc_data->ctx;
+ unsigned long flags;
+ bool first_add = false;
+
+ spin_lock_irqsave(&ctx->rsrc_ref_lock, flags);
+ node->done = true;
+
+ while (!list_empty(&ctx->rsrc_ref_list)) {
+ node = list_first_entry(&ctx->rsrc_ref_list,
+ struct io_rsrc_node, node);
+ /* recycle ref nodes in order */
+ if (!node->done)
+ break;
+ list_del(&node->node);
+ first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist);
+ }
+ spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags);
+
+ if (first_add)
+ mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ);
}
-static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node)
+static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
{
- percpu_ref_exit(&ref_node->refs);
- kfree(ref_node);
+ struct io_rsrc_node *ref_node;
+
+ ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL);
+ if (!ref_node)
+ return NULL;
+
+ if (percpu_ref_init(&ref_node->refs, io_rsrc_node_ref_zero,
+ 0, GFP_KERNEL)) {
+ kfree(ref_node);
+ return NULL;
+ }
+ INIT_LIST_HEAD(&ref_node->node);
+ INIT_LIST_HEAD(&ref_node->rsrc_list);
+ ref_node->done = false;
+ return ref_node;
}
static void io_rsrc_node_switch(struct io_ring_ctx *ctx,
@@ -7158,9 +7611,9 @@ static void io_rsrc_node_switch(struct io_ring_ctx *ctx,
struct io_rsrc_node *rsrc_node = ctx->rsrc_node;
rsrc_node->rsrc_data = data_to_kill;
- io_rsrc_ref_lock(ctx);
+ spin_lock_irq(&ctx->rsrc_ref_lock);
list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list);
- io_rsrc_ref_unlock(ctx);
+ spin_unlock_irq(&ctx->rsrc_ref_lock);
atomic_inc(&data_to_kill->refs);
percpu_ref_kill(&rsrc_node->refs);
@@ -7199,17 +7652,19 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
/* kill initial ref, already quiesced if zero */
if (atomic_dec_and_test(&data->refs))
break;
+ mutex_unlock(&ctx->uring_lock);
flush_delayed_work(&ctx->rsrc_put_work);
ret = wait_for_completion_interruptible(&data->done);
- if (!ret)
+ if (!ret) {
+ mutex_lock(&ctx->uring_lock);
break;
+ }
atomic_inc(&data->refs);
/* wait for all works potentially completing data->done */
flush_delayed_work(&ctx->rsrc_put_work);
reinit_completion(&data->done);
- mutex_unlock(&ctx->uring_lock);
ret = io_run_task_work_sig();
mutex_lock(&ctx->uring_lock);
} while (ret >= 0);
@@ -7277,17 +7732,14 @@ fail:
static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
{
- size_t size = nr_files * sizeof(struct io_fixed_file);
-
- table->files = (struct io_fixed_file **)io_alloc_page_table(size);
+ table->files = kvcalloc(nr_files, sizeof(table->files[0]),
+ GFP_KERNEL_ACCOUNT);
return !!table->files;
}
-static void io_free_file_tables(struct io_file_table *table, unsigned nr_files)
+static void io_free_file_tables(struct io_file_table *table)
{
- size_t size = nr_files * sizeof(struct io_fixed_file);
-
- io_free_page_table((void **)table->files, size);
+ kvfree(table->files);
table->files = NULL;
}
@@ -7312,7 +7764,7 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
fput(file);
}
#endif
- io_free_file_tables(&ctx->file_table, ctx->nr_user_files);
+ io_free_file_tables(&ctx->file_table);
io_rsrc_data_free(ctx->file_data);
ctx->file_data = NULL;
ctx->nr_user_files = 0;
@@ -7628,11 +8080,11 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
bool lock_ring = ctx->flags & IORING_SETUP_IOPOLL;
io_ring_submit_lock(ctx, lock_ring);
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
io_cqring_fill_event(ctx, prsrc->tag, 0, 0);
ctx->cq_extra++;
io_commit_cqring(ctx);
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
io_ring_submit_unlock(ctx, lock_ring);
}
@@ -7664,49 +8116,6 @@ static void io_rsrc_put_work(struct work_struct *work)
}
}
-static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
-{
- struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs);
- struct io_ring_ctx *ctx = node->rsrc_data->ctx;
- bool first_add = false;
-
- io_rsrc_ref_lock(ctx);
- node->done = true;
-
- while (!list_empty(&ctx->rsrc_ref_list)) {
- node = list_first_entry(&ctx->rsrc_ref_list,
- struct io_rsrc_node, node);
- /* recycle ref nodes in order */
- if (!node->done)
- break;
- list_del(&node->node);
- first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist);
- }
- io_rsrc_ref_unlock(ctx);
-
- if (first_add)
- mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ);
-}
-
-static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
-{
- struct io_rsrc_node *ref_node;
-
- ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL);
- if (!ref_node)
- return NULL;
-
- if (percpu_ref_init(&ref_node->refs, io_rsrc_node_ref_zero,
- 0, GFP_KERNEL)) {
- kfree(ref_node);
- return NULL;
- }
- INIT_LIST_HEAD(&ref_node->node);
- INIT_LIST_HEAD(&ref_node->rsrc_list);
- ref_node->done = false;
- return ref_node;
-}
-
static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned nr_args, u64 __user *tags)
{
@@ -7721,6 +8130,8 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return -EINVAL;
if (nr_args > IORING_MAX_FIXED_FILES)
return -EMFILE;
+ if (nr_args > rlimit(RLIMIT_NOFILE))
+ return -EMFILE;
ret = io_rsrc_node_switch_start(ctx);
if (ret)
return ret;
@@ -7779,7 +8190,7 @@ out_fput:
if (file)
fput(file);
}
- io_free_file_tables(&ctx->file_table, nr_args);
+ io_free_file_tables(&ctx->file_table);
ctx->nr_user_files = 0;
out_free:
io_rsrc_data_free(ctx->file_data);
@@ -7830,6 +8241,46 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
#endif
}
+static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
+ unsigned int issue_flags, u32 slot_index)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+ struct io_fixed_file *file_slot;
+ int ret = -EBADF;
+
+ io_ring_submit_lock(ctx, !force_nonblock);
+ if (file->f_op == &io_uring_fops)
+ goto err;
+ ret = -ENXIO;
+ if (!ctx->file_data)
+ goto err;
+ ret = -EINVAL;
+ if (slot_index >= ctx->nr_user_files)
+ goto err;
+
+ slot_index = array_index_nospec(slot_index, ctx->nr_user_files);
+ file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
+ ret = -EBADF;
+ if (file_slot->file_ptr)
+ goto err;
+
+ *io_get_tag_slot(ctx->file_data, slot_index) = 0;
+ io_fixed_file_set(file_slot, file);
+ ret = io_sqe_file_register(ctx, file, slot_index);
+ if (ret) {
+ file_slot->file_ptr = 0;
+ goto err;
+ }
+
+ ret = 0;
+err:
+ io_ring_submit_unlock(ctx, !force_nonblock);
+ if (ret)
+ fput(file);
+ return ret;
+}
+
static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
struct io_rsrc_node *node, void *rsrc)
{
@@ -7925,14 +8376,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
return done ? done : err;
}
-static struct io_wq_work *io_free_work(struct io_wq_work *work)
-{
- struct io_kiocb *req = container_of(work, struct io_kiocb, work);
-
- req = io_put_req_find_next(req);
- return req ? &req->work : NULL;
-}
-
static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,
struct task_struct *task)
{
@@ -7956,7 +8399,7 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,
data.hash = hash;
data.task = task;
- data.free_work = io_free_work;
+ data.free_work = io_wq_free_work;
data.do_work = io_wq_submit_work;
/* Do QD, or 4 * CPUS, whatever is smallest */
@@ -8623,43 +9066,36 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
__io_remove_buffers(ctx, buf, index, -1U);
}
-static void io_req_cache_free(struct list_head *list, struct task_struct *tsk)
+static void io_req_cache_free(struct list_head *list)
{
struct io_kiocb *req, *nxt;
- list_for_each_entry_safe(req, nxt, list, compl.list) {
- if (tsk && req->task != tsk)
- continue;
- list_del(&req->compl.list);
+ list_for_each_entry_safe(req, nxt, list, inflight_entry) {
+ list_del(&req->inflight_entry);
kmem_cache_free(req_cachep, req);
}
}
static void io_req_caches_free(struct io_ring_ctx *ctx)
{
- struct io_submit_state *submit_state = &ctx->submit_state;
- struct io_comp_state *cs = &ctx->submit_state.comp;
+ struct io_submit_state *state = &ctx->submit_state;
mutex_lock(&ctx->uring_lock);
- if (submit_state->free_reqs) {
- kmem_cache_free_bulk(req_cachep, submit_state->free_reqs,
- submit_state->reqs);
- submit_state->free_reqs = 0;
+ if (state->free_reqs) {
+ kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs);
+ state->free_reqs = 0;
}
- io_flush_cached_locked_reqs(ctx, cs);
- io_req_cache_free(&cs->free_list, NULL);
+ io_flush_cached_locked_reqs(ctx, state);
+ io_req_cache_free(&state->free_list);
mutex_unlock(&ctx->uring_lock);
}
-static bool io_wait_rsrc_data(struct io_rsrc_data *data)
+static void io_wait_rsrc_data(struct io_rsrc_data *data)
{
- if (!data)
- return false;
- if (!atomic_dec_and_test(&data->refs))
+ if (data && !atomic_dec_and_test(&data->refs))
wait_for_completion(&data->done);
- return true;
}
static void io_ring_ctx_free(struct io_ring_ctx *ctx)
@@ -8671,10 +9107,14 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
ctx->mm_account = NULL;
}
+ /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */
+ io_wait_rsrc_data(ctx->buf_data);
+ io_wait_rsrc_data(ctx->file_data);
+
mutex_lock(&ctx->uring_lock);
- if (io_wait_rsrc_data(ctx->buf_data))
+ if (ctx->buf_data)
__io_sqe_buffers_unregister(ctx);
- if (io_wait_rsrc_data(ctx->file_data))
+ if (ctx->file_data)
__io_sqe_files_unregister(ctx);
if (ctx->rings)
__io_cqring_overflow_flush(ctx, true);
@@ -8700,6 +9140,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
sock_release(ctx->ring_sock);
}
#endif
+ WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
io_mem_free(ctx->rings);
io_mem_free(ctx->sq_sqes);
@@ -8799,6 +9240,7 @@ static void io_ring_exit_work(struct work_struct *work)
{
struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work);
unsigned long timeout = jiffies + HZ * 60 * 5;
+ unsigned long interval = HZ / 20;
struct io_tctx_exit exit;
struct io_tctx_node *node;
int ret;
@@ -8823,8 +9265,11 @@ static void io_ring_exit_work(struct work_struct *work)
io_sq_thread_unpark(sqd);
}
- WARN_ON_ONCE(time_after(jiffies, timeout));
- } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
+ if (WARN_ON_ONCE(time_after(jiffies, timeout))) {
+ /* there is little hope left, don't run it too often */
+ interval = HZ * 60;
+ }
+ } while (!wait_for_completion_timeout(&ctx->ref_comp, interval));
init_completion(&exit.completion);
init_task_work(&exit.task_work, io_tctx_exit_cb);
@@ -8853,8 +9298,8 @@ static void io_ring_exit_work(struct work_struct *work)
mutex_lock(&ctx->uring_lock);
}
mutex_unlock(&ctx->uring_lock);
- spin_lock_irq(&ctx->completion_lock);
- spin_unlock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
io_ring_ctx_free(ctx);
}
@@ -8866,16 +9311,18 @@ static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
struct io_kiocb *req, *tmp;
int canceled = 0;
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
+ spin_lock_irq(&ctx->timeout_lock);
list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
if (io_match_task(req, tsk, cancel_all)) {
io_kill_timeout(req, -ECANCELED);
canceled++;
}
}
+ spin_unlock_irq(&ctx->timeout_lock);
if (canceled != 0)
io_commit_cqring(ctx);
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
if (canceled != 0)
io_cqring_ev_posted(ctx);
return canceled != 0;
@@ -8931,13 +9378,12 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
bool ret;
if (!cancel->all && (req->flags & REQ_F_LINK_TIMEOUT)) {
- unsigned long flags;
struct io_ring_ctx *ctx = req->ctx;
/* protect against races with linked timeouts */
- spin_lock_irqsave(&ctx->completion_lock, flags);
+ spin_lock(&ctx->completion_lock);
ret = io_match_task(req, cancel->task, cancel->all);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ spin_unlock(&ctx->completion_lock);
} else {
ret = io_match_task(req, cancel->task, cancel->all);
}
@@ -8950,14 +9396,14 @@ static bool io_cancel_defer_files(struct io_ring_ctx *ctx,
struct io_defer_entry *de;
LIST_HEAD(list);
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
list_for_each_entry_reverse(de, &ctx->defer_list, list) {
if (io_match_task(de->req, task, cancel_all)) {
list_cut_position(&list, &ctx->defer_list, &de->list);
break;
}
}
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
if (list_empty(&list))
return false;
@@ -9122,8 +9568,8 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx)
* Must be after io_uring_del_task_file() (removes nodes under
* uring_lock) to avoid race with io_uring_try_cancel_iowq().
*/
- tctx->io_wq = NULL;
io_wq_put_and_exit(wq);
+ tctx->io_wq = NULL;
}
}
@@ -9139,9 +9585,11 @@ static void io_uring_drop_tctx_refs(struct task_struct *task)
struct io_uring_task *tctx = task->io_uring;
unsigned int refs = tctx->cached_refs;
- tctx->cached_refs = 0;
- percpu_counter_sub(&tctx->inflight, refs);
- put_task_struct_many(task, refs);
+ if (refs) {
+ tctx->cached_refs = 0;
+ percpu_counter_sub(&tctx->inflight, refs);
+ put_task_struct_many(task, refs);
+ }
}
/*
@@ -9162,9 +9610,9 @@ static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
if (tctx->io_wq)
io_wq_exit_start(tctx->io_wq);
- io_uring_drop_tctx_refs(current);
atomic_inc(&tctx->in_idle);
do {
+ io_uring_drop_tctx_refs(current);
/* read completions before cancelations */
inflight = tctx_inflight(tctx, !cancel_all);
if (!inflight)
@@ -9188,6 +9636,7 @@ static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
}
prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
+ io_uring_drop_tctx_refs(current);
/*
* If we've seen completions, retry without waiting. This
* avoids a race where a completion comes in before we did
@@ -9206,9 +9655,9 @@ static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
}
}
-void __io_uring_cancel(struct files_struct *files)
+void __io_uring_cancel(bool cancel_all)
{
- io_uring_cancel_generic(!files, NULL);
+ io_uring_cancel_generic(cancel_all, NULL);
}
static void *io_uring_validate_mmap_request(struct file *file,
@@ -9368,11 +9817,12 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
*/
ret = 0;
if (ctx->flags & IORING_SETUP_SQPOLL) {
- io_cqring_overflow_flush(ctx, false);
+ io_cqring_overflow_flush(ctx);
- ret = -EOWNERDEAD;
- if (unlikely(ctx->sq_data->thread == NULL))
+ if (unlikely(ctx->sq_data->thread == NULL)) {
+ ret = -EOWNERDEAD;
goto out;
+ }
if (flags & IORING_ENTER_SQ_WAKEUP)
wake_up(&ctx->sq_data->wait);
if (flags & IORING_ENTER_SQ_WAIT) {
@@ -9503,7 +9953,7 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
io_uring_show_cred(m, index, cred);
}
seq_printf(m, "PollList:\n");
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
struct hlist_head *list = &ctx->cancel_hash[i];
struct io_kiocb *req;
@@ -9512,7 +9962,7 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
seq_printf(m, " op=%d, task_works=%d\n", req->opcode,
req->task->task_works != NULL);
}
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
if (has_lock)
mutex_unlock(&ctx->uring_lock);
}
@@ -9840,10 +10290,11 @@ static int io_register_personality(struct io_ring_ctx *ctx)
ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
- if (!ret)
- return id;
- put_cred(creds);
- return ret;
+ if (ret < 0) {
+ put_cred(creds);
+ return ret;
+ }
+ return id;
}
static int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg,
@@ -10044,6 +10495,31 @@ static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
return io_wq_cpu_affinity(tctx->io_wq, NULL);
}
+static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
+ void __user *arg)
+{
+ struct io_uring_task *tctx = current->io_uring;
+ __u32 new_count[2];
+ int i, ret;
+
+ if (!tctx || !tctx->io_wq)
+ return -EINVAL;
+ if (copy_from_user(new_count, arg, sizeof(new_count)))
+ return -EFAULT;
+ for (i = 0; i < ARRAY_SIZE(new_count); i++)
+ if (new_count[i] > INT_MAX)
+ return -EINVAL;
+
+ ret = io_wq_max_workers(tctx->io_wq, new_count);
+ if (ret)
+ return ret;
+
+ if (copy_to_user(arg, new_count, sizeof(new_count)))
+ return -EFAULT;
+
+ return 0;
+}
+
static bool io_register_op_must_quiesce(int op)
{
switch (op) {
@@ -10061,12 +10537,40 @@ static bool io_register_op_must_quiesce(int op)
case IORING_REGISTER_BUFFERS_UPDATE:
case IORING_REGISTER_IOWQ_AFF:
case IORING_UNREGISTER_IOWQ_AFF:
+ case IORING_REGISTER_IOWQ_MAX_WORKERS:
return false;
default:
return true;
}
}
+static int io_ctx_quiesce(struct io_ring_ctx *ctx)
+{
+ long ret;
+
+ percpu_ref_kill(&ctx->refs);
+
+ /*
+ * Drop uring mutex before waiting for references to exit. If another
+ * thread is currently inside io_uring_enter() it might need to grab the
+ * uring_lock to make progress. If we hold it here across the drain
+ * wait, then we can deadlock. It's safe to drop the mutex here, since
+ * no new references will come in after we've killed the percpu ref.
+ */
+ mutex_unlock(&ctx->uring_lock);
+ do {
+ ret = wait_for_completion_interruptible(&ctx->ref_comp);
+ if (!ret)
+ break;
+ ret = io_run_task_work_sig();
+ } while (ret >= 0);
+ mutex_lock(&ctx->uring_lock);
+
+ if (ret)
+ io_refs_resurrect(&ctx->refs, &ctx->ref_comp);
+ return ret;
+}
+
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
void __user *arg, unsigned nr_args)
__releases(ctx->uring_lock)
@@ -10091,31 +10595,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
}
if (io_register_op_must_quiesce(opcode)) {
- percpu_ref_kill(&ctx->refs);
-
- /*
- * Drop uring mutex before waiting for references to exit. If
- * another thread is currently inside io_uring_enter() it might
- * need to grab the uring_lock to make progress. If we hold it
- * here across the drain wait, then we can deadlock. It's safe
- * to drop the mutex here, since no new references will come in
- * after we've killed the percpu ref.
- */
- mutex_unlock(&ctx->uring_lock);
- do {
- ret = wait_for_completion_interruptible(&ctx->ref_comp);
- if (!ret)
- break;
- ret = io_run_task_work_sig();
- if (ret < 0)
- break;
- } while (1);
- mutex_lock(&ctx->uring_lock);
-
- if (ret) {
- io_refs_resurrect(&ctx->refs, &ctx->ref_comp);
+ ret = io_ctx_quiesce(ctx);
+ if (ret)
return ret;
- }
}
switch (opcode) {
@@ -10212,6 +10694,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_unregister_iowq_aff(ctx);
break;
+ case IORING_REGISTER_IOWQ_MAX_WORKERS:
+ ret = -EINVAL;
+ if (!arg || nr_args != 2)
+ break;
+ ret = io_register_iowq_max_workers(ctx, arg);
+ break;
default:
ret = -EINVAL;
break;
@@ -10293,11 +10781,16 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(40, __u16, buf_group);
BUILD_BUG_SQE_ELEM(42, __u16, personality);
BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
+ BUILD_BUG_SQE_ELEM(44, __u32, file_index);
BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
sizeof(struct io_uring_rsrc_update));
BUILD_BUG_ON(sizeof(struct io_uring_rsrc_update) >
sizeof(struct io_uring_rsrc_update2));
+
+ /* ->buf_index is u16 */
+ BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16));
+
/* should fit into one byte */
BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1e2204fa9963..504e69578112 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -263,209 +263,6 @@ static long ioctl_file_clone_range(struct file *file,
args.src_length, args.dest_offset);
}
-#ifdef CONFIG_BLOCK
-
-static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
-{
- return (offset >> inode->i_blkbits);
-}
-
-static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
-{
- return (blk << inode->i_blkbits);
-}
-
-/**
- * __generic_block_fiemap - FIEMAP for block based inodes (no locking)
- * @inode: the inode to map
- * @fieinfo: the fiemap info struct that will be passed back to userspace
- * @start: where to start mapping in the inode
- * @len: how much space to map
- * @get_block: the fs's get_block function
- *
- * This does FIEMAP for block based inodes. Basically it will just loop
- * through get_block until we hit the number of extents we want to map, or we
- * go past the end of the file and hit a hole.
- *
- * If it is possible to have data blocks beyond a hole past @inode->i_size, then
- * please do not use this function, it will stop at the first unmapped block
- * beyond i_size.
- *
- * If you use this function directly, you need to do your own locking. Use
- * generic_block_fiemap if you want the locking done for you.
- */
-static int __generic_block_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo, loff_t start,
- loff_t len, get_block_t *get_block)
-{
- struct buffer_head map_bh;
- sector_t start_blk, last_blk;
- loff_t isize = i_size_read(inode);
- u64 logical = 0, phys = 0, size = 0;
- u32 flags = FIEMAP_EXTENT_MERGED;
- bool past_eof = false, whole_file = false;
- int ret = 0;
-
- ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_SYNC);
- if (ret)
- return ret;
-
- /*
- * Either the i_mutex or other appropriate locking needs to be held
- * since we expect isize to not change at all through the duration of
- * this call.
- */
- if (len >= isize) {
- whole_file = true;
- len = isize;
- }
-
- /*
- * Some filesystems can't deal with being asked to map less than
- * blocksize, so make sure our len is at least block length.
- */
- if (logical_to_blk(inode, len) == 0)
- len = blk_to_logical(inode, 1);
-
- start_blk = logical_to_blk(inode, start);
- last_blk = logical_to_blk(inode, start + len - 1);
-
- do {
- /*
- * we set b_size to the total size we want so it will map as
- * many contiguous blocks as possible at once
- */
- memset(&map_bh, 0, sizeof(struct buffer_head));
- map_bh.b_size = len;
-
- ret = get_block(inode, start_blk, &map_bh, 0);
- if (ret)
- break;
-
- /* HOLE */
- if (!buffer_mapped(&map_bh)) {
- start_blk++;
-
- /*
- * We want to handle the case where there is an
- * allocated block at the front of the file, and then
- * nothing but holes up to the end of the file properly,
- * to make sure that extent at the front gets properly
- * marked with FIEMAP_EXTENT_LAST
- */
- if (!past_eof &&
- blk_to_logical(inode, start_blk) >= isize)
- past_eof = 1;
-
- /*
- * First hole after going past the EOF, this is our
- * last extent
- */
- if (past_eof && size) {
- flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST;
- ret = fiemap_fill_next_extent(fieinfo, logical,
- phys, size,
- flags);
- } else if (size) {
- ret = fiemap_fill_next_extent(fieinfo, logical,
- phys, size, flags);
- size = 0;
- }
-
- /* if we have holes up to/past EOF then we're done */
- if (start_blk > last_blk || past_eof || ret)
- break;
- } else {
- /*
- * We have gone over the length of what we wanted to
- * map, and it wasn't the entire file, so add the extent
- * we got last time and exit.
- *
- * This is for the case where say we want to map all the
- * way up to the second to the last block in a file, but
- * the last block is a hole, making the second to last
- * block FIEMAP_EXTENT_LAST. In this case we want to
- * see if there is a hole after the second to last block
- * so we can mark it properly. If we found data after
- * we exceeded the length we were requesting, then we
- * are good to go, just add the extent to the fieinfo
- * and break
- */
- if (start_blk > last_blk && !whole_file) {
- ret = fiemap_fill_next_extent(fieinfo, logical,
- phys, size,
- flags);
- break;
- }
-
- /*
- * if size != 0 then we know we already have an extent
- * to add, so add it.
- */
- if (size) {
- ret = fiemap_fill_next_extent(fieinfo, logical,
- phys, size,
- flags);
- if (ret)
- break;
- }
-
- logical = blk_to_logical(inode, start_blk);
- phys = blk_to_logical(inode, map_bh.b_blocknr);
- size = map_bh.b_size;
- flags = FIEMAP_EXTENT_MERGED;
-
- start_blk += logical_to_blk(inode, size);
-
- /*
- * If we are past the EOF, then we need to make sure as
- * soon as we find a hole that the last extent we found
- * is marked with FIEMAP_EXTENT_LAST
- */
- if (!past_eof && logical + size >= isize)
- past_eof = true;
- }
- cond_resched();
- if (fatal_signal_pending(current)) {
- ret = -EINTR;
- break;
- }
-
- } while (1);
-
- /* If ret is 1 then we just hit the end of the extent array */
- if (ret == 1)
- ret = 0;
-
- return ret;
-}
-
-/**
- * generic_block_fiemap - FIEMAP for block based inodes
- * @inode: The inode to map
- * @fieinfo: The mapping information
- * @start: The initial block to map
- * @len: The length of the extect to attempt to map
- * @get_block: The block mapping function for the fs
- *
- * Calls __generic_block_fiemap to map the inode, after taking
- * the inode's mutex lock.
- */
-
-int generic_block_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo, u64 start,
- u64 len, get_block_t *get_block)
-{
- int ret;
- inode_lock(inode);
- ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
- inode_unlock(inode);
- return ret;
-}
-EXPORT_SYMBOL(generic_block_fiemap);
-
-#endif /* CONFIG_BLOCK */
-
/*
* This provides compatibility with legacy XFS pre-allocation ioctls
* which predate the fallocate syscall.
@@ -817,6 +614,14 @@ static int fileattr_set_prepare(struct inode *inode,
if ((old_ma->fsx_xflags ^ fa->fsx_xflags) &
FS_XFLAG_PROJINHERIT)
return -EINVAL;
+ } else {
+ /*
+ * Caller is allowed to change the project ID. If it is being
+ * changed, make sure that the new value is valid.
+ */
+ if (old_ma->fsx_projid != fa->fsx_projid &&
+ !projid_valid(make_kprojid(&init_user_ns, fa->fsx_projid)))
+ return -EINVAL;
}
/* Check extent size hints. */
diff --git a/fs/iomap/Makefile b/fs/iomap/Makefile
index eef2722d93a1..4143a3ff89db 100644
--- a/fs/iomap/Makefile
+++ b/fs/iomap/Makefile
@@ -9,9 +9,9 @@ ccflags-y += -I $(srctree)/$(src) # needed for trace events
obj-$(CONFIG_FS_IOMAP) += iomap.o
iomap-y += trace.o \
- apply.o \
buffered-io.o \
direct-io.o \
fiemap.o \
+ iter.o \
seek.o
iomap-$(CONFIG_SWAP) += swapfile.o
diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c
deleted file mode 100644
index 26ab6563181f..000000000000
--- a/fs/iomap/apply.c
+++ /dev/null
@@ -1,99 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2010 Red Hat, Inc.
- * Copyright (c) 2016-2018 Christoph Hellwig.
- */
-#include <linux/module.h>
-#include <linux/compiler.h>
-#include <linux/fs.h>
-#include <linux/iomap.h>
-#include "trace.h"
-
-/*
- * Execute a iomap write on a segment of the mapping that spans a
- * contiguous range of pages that have identical block mapping state.
- *
- * This avoids the need to map pages individually, do individual allocations
- * for each page and most importantly avoid the need for filesystem specific
- * locking per page. Instead, all the operations are amortised over the entire
- * range of pages. It is assumed that the filesystems will lock whatever
- * resources they require in the iomap_begin call, and release them in the
- * iomap_end call.
- */
-loff_t
-iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
- const struct iomap_ops *ops, void *data, iomap_actor_t actor)
-{
- struct iomap iomap = { .type = IOMAP_HOLE };
- struct iomap srcmap = { .type = IOMAP_HOLE };
- loff_t written = 0, ret;
- u64 end;
-
- trace_iomap_apply(inode, pos, length, flags, ops, actor, _RET_IP_);
-
- /*
- * Need to map a range from start position for length bytes. This can
- * span multiple pages - it is only guaranteed to return a range of a
- * single type of pages (e.g. all into a hole, all mapped or all
- * unwritten). Failure at this point has nothing to undo.
- *
- * If allocation is required for this range, reserve the space now so
- * that the allocation is guaranteed to succeed later on. Once we copy
- * the data into the page cache pages, then we cannot fail otherwise we
- * expose transient stale data. If the reserve fails, we can safely
- * back out at this point as there is nothing to undo.
- */
- ret = ops->iomap_begin(inode, pos, length, flags, &iomap, &srcmap);
- if (ret)
- return ret;
- if (WARN_ON(iomap.offset > pos)) {
- written = -EIO;
- goto out;
- }
- if (WARN_ON(iomap.length == 0)) {
- written = -EIO;
- goto out;
- }
-
- trace_iomap_apply_dstmap(inode, &iomap);
- if (srcmap.type != IOMAP_HOLE)
- trace_iomap_apply_srcmap(inode, &srcmap);
-
- /*
- * Cut down the length to the one actually provided by the filesystem,
- * as it might not be able to give us the whole size that we requested.
- */
- end = iomap.offset + iomap.length;
- if (srcmap.type != IOMAP_HOLE)
- end = min(end, srcmap.offset + srcmap.length);
- if (pos + length > end)
- length = end - pos;
-
- /*
- * Now that we have guaranteed that the space allocation will succeed,
- * we can do the copy-in page by page without having to worry about
- * failures exposing transient data.
- *
- * To support COW operations, we read in data for partially blocks from
- * the srcmap if the file system filled it in. In that case we the
- * length needs to be limited to the earlier of the ends of the iomaps.
- * If the file system did not provide a srcmap we pass in the normal
- * iomap into the actors so that they don't need to have special
- * handling for the two cases.
- */
- written = actor(inode, pos, length, data, &iomap,
- srcmap.type != IOMAP_HOLE ? &srcmap : &iomap);
-
-out:
- /*
- * Now the data has been copied, commit the range we've copied. This
- * should not fail unless the filesystem has had a fatal error.
- */
- if (ops->iomap_end) {
- ret = ops->iomap_end(inode, pos, length,
- written > 0 ? written : 0,
- flags, &iomap);
- }
-
- return written ? written : ret;
-}
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 87ccb3438bec..9cc5798423d1 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -36,7 +36,7 @@ static inline struct iomap_page *to_iomap_page(struct page *page)
{
/*
* per-block data is stored in the head page. Callers should
- * not be dealing with tail pages (and if they are, they can
+ * not be dealing with tail pages, and if they are, they can
* call thp_head() first.
*/
VM_BUG_ON_PGFLAGS(PageTail(page), page);
@@ -98,7 +98,7 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
unsigned last = (poff + plen - 1) >> block_bits;
/*
- * If the block size is smaller than the page size we need to check the
+ * If the block size is smaller than the page size, we need to check the
* per-block uptodate status and adjust the offset and length if needed
* to avoid reading in already uptodate ranges.
*/
@@ -126,7 +126,7 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
}
/*
- * If the extent spans the block that contains the i_size we need to
+ * If the extent spans the block that contains the i_size, we need to
* handle both halves separately so that we properly zero data in the
* page cache for blocks that are entirely outside of i_size.
*/
@@ -205,60 +205,67 @@ struct iomap_readpage_ctx {
struct readahead_control *rac;
};
-static void
-iomap_read_inline_data(struct inode *inode, struct page *page,
- struct iomap *iomap)
+static loff_t iomap_read_inline_data(const struct iomap_iter *iter,
+ struct page *page)
{
- size_t size = i_size_read(inode);
+ const struct iomap *iomap = iomap_iter_srcmap(iter);
+ size_t size = i_size_read(iter->inode) - iomap->offset;
+ size_t poff = offset_in_page(iomap->offset);
void *addr;
if (PageUptodate(page))
- return;
-
- BUG_ON(page_has_private(page));
- BUG_ON(page->index);
- BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
-
- addr = kmap_atomic(page);
+ return PAGE_SIZE - poff;
+
+ if (WARN_ON_ONCE(size > PAGE_SIZE - poff))
+ return -EIO;
+ if (WARN_ON_ONCE(size > PAGE_SIZE -
+ offset_in_page(iomap->inline_data)))
+ return -EIO;
+ if (WARN_ON_ONCE(size > iomap->length))
+ return -EIO;
+ if (poff > 0)
+ iomap_page_create(iter->inode, page);
+
+ addr = kmap_local_page(page) + poff;
memcpy(addr, iomap->inline_data, size);
- memset(addr + size, 0, PAGE_SIZE - size);
- kunmap_atomic(addr);
- SetPageUptodate(page);
+ memset(addr + size, 0, PAGE_SIZE - poff - size);
+ kunmap_local(addr);
+ iomap_set_range_uptodate(page, poff, PAGE_SIZE - poff);
+ return PAGE_SIZE - poff;
}
-static inline bool iomap_block_needs_zeroing(struct inode *inode,
- struct iomap *iomap, loff_t pos)
+static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter,
+ loff_t pos)
{
- return iomap->type != IOMAP_MAPPED ||
- (iomap->flags & IOMAP_F_NEW) ||
- pos >= i_size_read(inode);
+ const struct iomap *srcmap = iomap_iter_srcmap(iter);
+
+ return srcmap->type != IOMAP_MAPPED ||
+ (srcmap->flags & IOMAP_F_NEW) ||
+ pos >= i_size_read(iter->inode);
}
-static loff_t
-iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
- struct iomap *iomap, struct iomap *srcmap)
+static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
+ struct iomap_readpage_ctx *ctx, loff_t offset)
{
- struct iomap_readpage_ctx *ctx = data;
+ const struct iomap *iomap = &iter->iomap;
+ loff_t pos = iter->pos + offset;
+ loff_t length = iomap_length(iter) - offset;
struct page *page = ctx->cur_page;
struct iomap_page *iop;
- bool same_page = false, is_contig = false;
loff_t orig_pos = pos;
unsigned poff, plen;
sector_t sector;
- if (iomap->type == IOMAP_INLINE) {
- WARN_ON_ONCE(pos);
- iomap_read_inline_data(inode, page, iomap);
- return PAGE_SIZE;
- }
+ if (iomap->type == IOMAP_INLINE)
+ return min(iomap_read_inline_data(iter, page), length);
/* zero post-eof blocks as the page may be mapped */
- iop = iomap_page_create(inode, page);
- iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
+ iop = iomap_page_create(iter->inode, page);
+ iomap_adjust_read_range(iter->inode, iop, &pos, length, &poff, &plen);
if (plen == 0)
goto done;
- if (iomap_block_needs_zeroing(inode, iomap, pos)) {
+ if (iomap_block_needs_zeroing(iter, pos)) {
zero_user(page, poff, plen);
iomap_set_range_uptodate(page, poff, plen);
goto done;
@@ -268,16 +275,10 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
if (iop)
atomic_add(plen, &iop->read_bytes_pending);
- /* Try to merge into a previous segment if we can */
sector = iomap_sector(iomap, pos);
- if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
- if (__bio_try_merge_page(ctx->bio, page, plen, poff,
- &same_page))
- goto done;
- is_contig = true;
- }
-
- if (!is_contig || bio_full(ctx->bio, plen)) {
+ if (!ctx->bio ||
+ bio_end_sector(ctx->bio) != sector ||
+ bio_add_page(ctx->bio, page, plen, poff) != plen) {
gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
gfp_t orig_gfp = gfp;
unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE);
@@ -301,13 +302,12 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
ctx->bio->bi_iter.bi_sector = sector;
bio_set_dev(ctx->bio, iomap->bdev);
ctx->bio->bi_end_io = iomap_read_end_io;
+ __bio_add_page(ctx->bio, page, plen, poff);
}
-
- bio_add_page(ctx->bio, page, plen, poff);
done:
/*
* Move the caller beyond our range so that it keeps making progress.
- * For that we have to include any leading non-uptodate ranges, but
+ * For that, we have to include any leading non-uptodate ranges, but
* we can skip trailing ones as they will be handled in the next
* iteration.
*/
@@ -317,23 +317,23 @@ done:
int
iomap_readpage(struct page *page, const struct iomap_ops *ops)
{
- struct iomap_readpage_ctx ctx = { .cur_page = page };
- struct inode *inode = page->mapping->host;
- unsigned poff;
- loff_t ret;
+ struct iomap_iter iter = {
+ .inode = page->mapping->host,
+ .pos = page_offset(page),
+ .len = PAGE_SIZE,
+ };
+ struct iomap_readpage_ctx ctx = {
+ .cur_page = page,
+ };
+ int ret;
trace_iomap_readpage(page->mapping->host, 1);
- for (poff = 0; poff < PAGE_SIZE; poff += ret) {
- ret = iomap_apply(inode, page_offset(page) + poff,
- PAGE_SIZE - poff, 0, ops, &ctx,
- iomap_readpage_actor);
- if (ret <= 0) {
- WARN_ON_ONCE(ret == 0);
- SetPageError(page);
- break;
- }
- }
+ while ((ret = iomap_iter(&iter, ops)) > 0)
+ iter.processed = iomap_readpage_iter(&iter, &ctx, 0);
+
+ if (ret < 0)
+ SetPageError(page);
if (ctx.bio) {
submit_bio(ctx.bio);
@@ -344,23 +344,22 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops)
}
/*
- * Just like mpage_readahead and block_read_full_page we always
+ * Just like mpage_readahead and block_read_full_page, we always
* return 0 and just mark the page as PageError on errors. This
- * should be cleaned up all through the stack eventually.
+ * should be cleaned up throughout the stack eventually.
*/
return 0;
}
EXPORT_SYMBOL_GPL(iomap_readpage);
-static loff_t
-iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
- void *data, struct iomap *iomap, struct iomap *srcmap)
+static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
+ struct iomap_readpage_ctx *ctx)
{
- struct iomap_readpage_ctx *ctx = data;
+ loff_t length = iomap_length(iter);
loff_t done, ret;
for (done = 0; done < length; done += ret) {
- if (ctx->cur_page && offset_in_page(pos + done) == 0) {
+ if (ctx->cur_page && offset_in_page(iter->pos + done) == 0) {
if (!ctx->cur_page_in_bio)
unlock_page(ctx->cur_page);
put_page(ctx->cur_page);
@@ -370,8 +369,7 @@ iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
ctx->cur_page = readahead_page(ctx->rac);
ctx->cur_page_in_bio = false;
}
- ret = iomap_readpage_actor(inode, pos + done, length - done,
- ctx, iomap, srcmap);
+ ret = iomap_readpage_iter(iter, ctx, done);
}
return done;
@@ -394,25 +392,19 @@ iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
*/
void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
{
- struct inode *inode = rac->mapping->host;
- loff_t pos = readahead_pos(rac);
- size_t length = readahead_length(rac);
+ struct iomap_iter iter = {
+ .inode = rac->mapping->host,
+ .pos = readahead_pos(rac),
+ .len = readahead_length(rac),
+ };
struct iomap_readpage_ctx ctx = {
.rac = rac,
};
- trace_iomap_readahead(inode, readahead_count(rac));
+ trace_iomap_readahead(rac->mapping->host, readahead_count(rac));
- while (length > 0) {
- ssize_t ret = iomap_apply(inode, pos, length, 0, ops,
- &ctx, iomap_readahead_actor);
- if (ret <= 0) {
- WARN_ON_ONCE(ret == 0);
- break;
- }
- pos += ret;
- length -= ret;
- }
+ while (iomap_iter(&iter, ops) > 0)
+ iter.processed = iomap_readahead_iter(&iter, &ctx);
if (ctx.bio)
submit_bio(ctx.bio);
@@ -467,7 +459,7 @@ iomap_releasepage(struct page *page, gfp_t gfp_mask)
/*
* mm accommodates an old ext3 case where clean pages might not have had
* the dirty bit cleared. Thus, it can send actual dirty pages to
- * ->releasepage() via shrink_active_list(), skip those here.
+ * ->releasepage() via shrink_active_list(); skip those here.
*/
if (PageDirty(page) || PageWriteback(page))
return 0;
@@ -482,7 +474,7 @@ iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len)
trace_iomap_invalidatepage(page->mapping->host, offset, len);
/*
- * If we are invalidating the entire page, clear the dirty state from it
+ * If we're invalidating the entire page, clear the dirty state from it
* and release it to avoid unnecessary buildup of the LRU.
*/
if (offset == 0 && len == PAGE_SIZE) {
@@ -516,10 +508,6 @@ iomap_migrate_page(struct address_space *mapping, struct page *newpage,
EXPORT_SYMBOL_GPL(iomap_migrate_page);
#endif /* CONFIG_MIGRATION */
-enum {
- IOMAP_WRITE_F_UNSHARE = (1 << 0),
-};
-
static void
iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
{
@@ -535,7 +523,7 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
static int
iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff,
- unsigned plen, struct iomap *iomap)
+ unsigned plen, const struct iomap *iomap)
{
struct bio_vec bvec;
struct bio bio;
@@ -548,12 +536,12 @@ iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff,
return submit_bio_wait(&bio);
}
-static int
-__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
- struct page *page, struct iomap *srcmap)
+static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
+ unsigned len, struct page *page)
{
- struct iomap_page *iop = iomap_page_create(inode, page);
- loff_t block_size = i_blocksize(inode);
+ const struct iomap *srcmap = iomap_iter_srcmap(iter);
+ struct iomap_page *iop = iomap_page_create(iter->inode, page);
+ loff_t block_size = i_blocksize(iter->inode);
loff_t block_start = round_down(pos, block_size);
loff_t block_end = round_up(pos + len, block_size);
unsigned from = offset_in_page(pos), to = from + len, poff, plen;
@@ -563,18 +551,18 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
ClearPageError(page);
do {
- iomap_adjust_read_range(inode, iop, &block_start,
+ iomap_adjust_read_range(iter->inode, iop, &block_start,
block_end - block_start, &poff, &plen);
if (plen == 0)
break;
- if (!(flags & IOMAP_WRITE_F_UNSHARE) &&
+ if (!(iter->flags & IOMAP_UNSHARE) &&
(from <= poff || from >= poff + plen) &&
(to <= poff || to >= poff + plen))
continue;
- if (iomap_block_needs_zeroing(inode, srcmap, block_start)) {
- if (WARN_ON_ONCE(flags & IOMAP_WRITE_F_UNSHARE))
+ if (iomap_block_needs_zeroing(iter, block_start)) {
+ if (WARN_ON_ONCE(iter->flags & IOMAP_UNSHARE))
return -EIO;
zero_user_segments(page, poff, from, to, poff + plen);
} else {
@@ -589,41 +577,54 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
return 0;
}
-static int
-iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
+static int iomap_write_begin_inline(const struct iomap_iter *iter,
+ struct page *page)
{
- const struct iomap_page_ops *page_ops = iomap->page_ops;
+ int ret;
+
+ /* needs more work for the tailpacking case; disable for now */
+ if (WARN_ON_ONCE(iomap_iter_srcmap(iter)->offset != 0))
+ return -EIO;
+ ret = iomap_read_inline_data(iter, page);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
+ unsigned len, struct page **pagep)
+{
+ const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
+ const struct iomap *srcmap = iomap_iter_srcmap(iter);
struct page *page;
int status = 0;
- BUG_ON(pos + len > iomap->offset + iomap->length);
- if (srcmap != iomap)
+ BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
+ if (srcmap != &iter->iomap)
BUG_ON(pos + len > srcmap->offset + srcmap->length);
if (fatal_signal_pending(current))
return -EINTR;
if (page_ops && page_ops->page_prepare) {
- status = page_ops->page_prepare(inode, pos, len, iomap);
+ status = page_ops->page_prepare(iter->inode, pos, len);
if (status)
return status;
}
- page = grab_cache_page_write_begin(inode->i_mapping, pos >> PAGE_SHIFT,
- AOP_FLAG_NOFS);
+ page = grab_cache_page_write_begin(iter->inode->i_mapping,
+ pos >> PAGE_SHIFT, AOP_FLAG_NOFS);
if (!page) {
status = -ENOMEM;
goto out_no_page;
}
if (srcmap->type == IOMAP_INLINE)
- iomap_read_inline_data(inode, page, srcmap);
- else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
+ status = iomap_write_begin_inline(iter, page);
+ else if (srcmap->flags & IOMAP_F_BUFFER_HEAD)
status = __block_write_begin_int(page, pos, len, NULL, srcmap);
else
- status = __iomap_write_begin(inode, pos, len, flags, page,
- srcmap);
+ status = __iomap_write_begin(iter, pos, len, page);
if (unlikely(status))
goto out_unlock;
@@ -634,11 +635,11 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
out_unlock:
unlock_page(page);
put_page(page);
- iomap_write_failed(inode, pos, len);
+ iomap_write_failed(iter->inode, pos, len);
out_no_page:
if (page_ops && page_ops->page_done)
- page_ops->page_done(inode, pos, 0, NULL, iomap);
+ page_ops->page_done(iter->inode, pos, 0, NULL);
return status;
}
@@ -650,13 +651,13 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
/*
* The blocks that were entirely written will now be uptodate, so we
* don't have to worry about a readpage reading them and overwriting a
- * partial write. However if we have encountered a short write and only
+ * partial write. However, if we've encountered a short write and only
* partially written into a block, it will not be marked uptodate, so a
* readpage might come in and destroy our partial write.
*
- * Do the simplest thing, and just treat any short write to a non
- * uptodate page as a zero-length write, and force the caller to redo
- * the whole thing.
+ * Do the simplest thing and just treat any short write to a
+ * non-uptodate page as a zero-length write, and force the caller to
+ * redo the whole thing.
*/
if (unlikely(copied < len && !PageUptodate(page)))
return 0;
@@ -665,39 +666,40 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
return copied;
}
-static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
- struct iomap *iomap, loff_t pos, size_t copied)
+static size_t iomap_write_end_inline(const struct iomap_iter *iter,
+ struct page *page, loff_t pos, size_t copied)
{
+ const struct iomap *iomap = &iter->iomap;
void *addr;
WARN_ON_ONCE(!PageUptodate(page));
- BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
+ BUG_ON(!iomap_inline_data_valid(iomap));
flush_dcache_page(page);
- addr = kmap_atomic(page);
- memcpy(iomap->inline_data + pos, addr + pos, copied);
- kunmap_atomic(addr);
+ addr = kmap_local_page(page) + pos;
+ memcpy(iomap_inline_data(iomap, pos), addr, copied);
+ kunmap_local(addr);
- mark_inode_dirty(inode);
+ mark_inode_dirty(iter->inode);
return copied;
}
/* Returns the number of bytes copied. May be 0. Cannot be an errno. */
-static size_t iomap_write_end(struct inode *inode, loff_t pos, size_t len,
- size_t copied, struct page *page, struct iomap *iomap,
- struct iomap *srcmap)
+static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
+ size_t copied, struct page *page)
{
- const struct iomap_page_ops *page_ops = iomap->page_ops;
- loff_t old_size = inode->i_size;
+ const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
+ const struct iomap *srcmap = iomap_iter_srcmap(iter);
+ loff_t old_size = iter->inode->i_size;
size_t ret;
if (srcmap->type == IOMAP_INLINE) {
- ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
+ ret = iomap_write_end_inline(iter, page, pos, copied);
} else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
- ret = block_write_end(NULL, inode->i_mapping, pos, len, copied,
- page, NULL);
+ ret = block_write_end(NULL, iter->inode->i_mapping, pos, len,
+ copied, page, NULL);
} else {
- ret = __iomap_write_end(inode, pos, len, copied, page);
+ ret = __iomap_write_end(iter->inode, pos, len, copied, page);
}
/*
@@ -706,29 +708,28 @@ static size_t iomap_write_end(struct inode *inode, loff_t pos, size_t len,
* preferably after I/O completion so that no stale data is exposed.
*/
if (pos + ret > old_size) {
- i_size_write(inode, pos + ret);
- iomap->flags |= IOMAP_F_SIZE_CHANGED;
+ i_size_write(iter->inode, pos + ret);
+ iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
}
unlock_page(page);
if (old_size < pos)
- pagecache_isize_extended(inode, old_size, pos);
+ pagecache_isize_extended(iter->inode, old_size, pos);
if (page_ops && page_ops->page_done)
- page_ops->page_done(inode, pos, ret, page, iomap);
+ page_ops->page_done(iter->inode, pos, ret, page);
put_page(page);
if (ret < len)
- iomap_write_failed(inode, pos, len);
+ iomap_write_failed(iter->inode, pos, len);
return ret;
}
-static loff_t
-iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
- struct iomap *iomap, struct iomap *srcmap)
+static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
{
- struct iov_iter *i = data;
- long status = 0;
+ loff_t length = iomap_length(iter);
+ loff_t pos = iter->pos;
ssize_t written = 0;
+ long status = 0;
do {
struct page *page;
@@ -744,7 +745,7 @@ again:
bytes = length;
/*
- * Bring in the user page that we will copy from _first_.
+ * Bring in the user page that we'll copy from _first_.
* Otherwise there's a nasty deadlock on copying from the
* same page as we're writing to, without it being marked
* up-to-date.
@@ -754,18 +755,16 @@ again:
break;
}
- status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap,
- srcmap);
+ status = iomap_write_begin(iter, pos, bytes, &page);
if (unlikely(status))
break;
- if (mapping_writably_mapped(inode->i_mapping))
+ if (mapping_writably_mapped(iter->inode->i_mapping))
flush_dcache_page(page);
copied = copy_page_from_iter_atomic(page, offset, bytes, i);
- status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
- srcmap);
+ status = iomap_write_end(iter, pos, bytes, copied, page);
if (unlikely(copied != status))
iov_iter_revert(i, copied - status);
@@ -786,36 +785,38 @@ again:
written += status;
length -= status;
- balance_dirty_pages_ratelimited(inode->i_mapping);
+ balance_dirty_pages_ratelimited(iter->inode->i_mapping);
} while (iov_iter_count(i) && length);
return written ? written : status;
}
ssize_t
-iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter,
+iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
const struct iomap_ops *ops)
{
- struct inode *inode = iocb->ki_filp->f_mapping->host;
- loff_t pos = iocb->ki_pos, ret = 0, written = 0;
-
- while (iov_iter_count(iter)) {
- ret = iomap_apply(inode, pos, iov_iter_count(iter),
- IOMAP_WRITE, ops, iter, iomap_write_actor);
- if (ret <= 0)
- break;
- pos += ret;
- written += ret;
- }
+ struct iomap_iter iter = {
+ .inode = iocb->ki_filp->f_mapping->host,
+ .pos = iocb->ki_pos,
+ .len = iov_iter_count(i),
+ .flags = IOMAP_WRITE,
+ };
+ int ret;
- return written ? written : ret;
+ while ((ret = iomap_iter(&iter, ops)) > 0)
+ iter.processed = iomap_write_iter(&iter, i);
+ if (iter.pos == iocb->ki_pos)
+ return ret;
+ return iter.pos - iocb->ki_pos;
}
EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
-static loff_t
-iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
- struct iomap *iomap, struct iomap *srcmap)
+static loff_t iomap_unshare_iter(struct iomap_iter *iter)
{
+ struct iomap *iomap = &iter->iomap;
+ const struct iomap *srcmap = iomap_iter_srcmap(iter);
+ loff_t pos = iter->pos;
+ loff_t length = iomap_length(iter);
long status = 0;
loff_t written = 0;
@@ -831,13 +832,11 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
unsigned long bytes = min_t(loff_t, PAGE_SIZE - offset, length);
struct page *page;
- status = iomap_write_begin(inode, pos, bytes,
- IOMAP_WRITE_F_UNSHARE, &page, iomap, srcmap);
+ status = iomap_write_begin(iter, pos, bytes, &page);
if (unlikely(status))
return status;
- status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
- srcmap);
+ status = iomap_write_end(iter, pos, bytes, bytes, page);
if (WARN_ON_ONCE(status == 0))
return -EIO;
@@ -847,7 +846,7 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
written += status;
length -= status;
- balance_dirty_pages_ratelimited(inode->i_mapping);
+ balance_dirty_pages_ratelimited(iter->inode->i_mapping);
} while (length);
return written;
@@ -857,44 +856,43 @@ int
iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
const struct iomap_ops *ops)
{
- loff_t ret;
-
- while (len) {
- ret = iomap_apply(inode, pos, len, IOMAP_WRITE, ops, NULL,
- iomap_unshare_actor);
- if (ret <= 0)
- return ret;
- pos += ret;
- len -= ret;
- }
+ struct iomap_iter iter = {
+ .inode = inode,
+ .pos = pos,
+ .len = len,
+ .flags = IOMAP_WRITE | IOMAP_UNSHARE,
+ };
+ int ret;
- return 0;
+ while ((ret = iomap_iter(&iter, ops)) > 0)
+ iter.processed = iomap_unshare_iter(&iter);
+ return ret;
}
EXPORT_SYMBOL_GPL(iomap_file_unshare);
-static s64 iomap_zero(struct inode *inode, loff_t pos, u64 length,
- struct iomap *iomap, struct iomap *srcmap)
+static s64 __iomap_zero_iter(struct iomap_iter *iter, loff_t pos, u64 length)
{
struct page *page;
int status;
unsigned offset = offset_in_page(pos);
unsigned bytes = min_t(u64, PAGE_SIZE - offset, length);
- status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
+ status = iomap_write_begin(iter, pos, bytes, &page);
if (status)
return status;
zero_user(page, offset, bytes);
mark_page_accessed(page);
- return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
+ return iomap_write_end(iter, pos, bytes, bytes, page);
}
-static loff_t iomap_zero_range_actor(struct inode *inode, loff_t pos,
- loff_t length, void *data, struct iomap *iomap,
- struct iomap *srcmap)
+static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
{
- bool *did_zero = data;
+ struct iomap *iomap = &iter->iomap;
+ const struct iomap *srcmap = iomap_iter_srcmap(iter);
+ loff_t pos = iter->pos;
+ loff_t length = iomap_length(iter);
loff_t written = 0;
/* already zeroed? we're done. */
@@ -904,10 +902,10 @@ static loff_t iomap_zero_range_actor(struct inode *inode, loff_t pos,
do {
s64 bytes;
- if (IS_DAX(inode))
+ if (IS_DAX(iter->inode))
bytes = dax_iomap_zero(pos, length, iomap);
else
- bytes = iomap_zero(inode, pos, length, iomap, srcmap);
+ bytes = __iomap_zero_iter(iter, pos, length);
if (bytes < 0)
return bytes;
@@ -925,19 +923,17 @@ int
iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
const struct iomap_ops *ops)
{
- loff_t ret;
-
- while (len > 0) {
- ret = iomap_apply(inode, pos, len, IOMAP_ZERO,
- ops, did_zero, iomap_zero_range_actor);
- if (ret <= 0)
- return ret;
-
- pos += ret;
- len -= ret;
- }
+ struct iomap_iter iter = {
+ .inode = inode,
+ .pos = pos,
+ .len = len,
+ .flags = IOMAP_ZERO,
+ };
+ int ret;
- return 0;
+ while ((ret = iomap_iter(&iter, ops)) > 0)
+ iter.processed = iomap_zero_iter(&iter, did_zero);
+ return ret;
}
EXPORT_SYMBOL_GPL(iomap_zero_range);
@@ -955,15 +951,15 @@ iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
}
EXPORT_SYMBOL_GPL(iomap_truncate_page);
-static loff_t
-iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
- void *data, struct iomap *iomap, struct iomap *srcmap)
+static loff_t iomap_page_mkwrite_iter(struct iomap_iter *iter,
+ struct page *page)
{
- struct page *page = data;
+ loff_t length = iomap_length(iter);
int ret;
- if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
- ret = __block_write_begin_int(page, pos, length, NULL, iomap);
+ if (iter->iomap.flags & IOMAP_F_BUFFER_HEAD) {
+ ret = __block_write_begin_int(page, iter->pos, length, NULL,
+ &iter->iomap);
if (ret)
return ret;
block_commit_write(page, 0, length);
@@ -977,29 +973,24 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
{
+ struct iomap_iter iter = {
+ .inode = file_inode(vmf->vma->vm_file),
+ .flags = IOMAP_WRITE | IOMAP_FAULT,
+ };
struct page *page = vmf->page;
- struct inode *inode = file_inode(vmf->vma->vm_file);
- unsigned long length;
- loff_t offset;
ssize_t ret;
lock_page(page);
- ret = page_mkwrite_check_truncate(page, inode);
+ ret = page_mkwrite_check_truncate(page, iter.inode);
if (ret < 0)
goto out_unlock;
- length = ret;
-
- offset = page_offset(page);
- while (length > 0) {
- ret = iomap_apply(inode, offset, length,
- IOMAP_WRITE | IOMAP_FAULT, ops, page,
- iomap_page_mkwrite_actor);
- if (unlikely(ret <= 0))
- goto out_unlock;
- offset += ret;
- length -= ret;
- }
+ iter.pos = page_offset(page);
+ iter.len = ret;
+ while ((ret = iomap_iter(&iter, ops)) > 0)
+ iter.processed = iomap_page_mkwrite_iter(&iter, page);
+ if (ret < 0)
+ goto out_unlock;
wait_for_stable_page(page);
return VM_FAULT_LOCKED;
out_unlock:
@@ -1016,7 +1007,7 @@ iomap_finish_page_writeback(struct inode *inode, struct page *page,
if (error) {
SetPageError(page);
- mapping_set_error(inode->i_mapping, -EIO);
+ mapping_set_error(inode->i_mapping, error);
}
WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop);
@@ -1153,7 +1144,7 @@ static void iomap_writepage_end_bio(struct bio *bio)
* Submit the final bio for an ioend.
*
* If @error is non-zero, it means that we have a situation where some part of
- * the submission process has failed after we have marked paged for writeback
+ * the submission process has failed after we've marked pages for writeback
* and unlocked them. In this situation, we need to fail the bio instead of
* submitting it. This typically only happens on a filesystem shutdown.
*/
@@ -1168,7 +1159,7 @@ iomap_submit_ioend(struct iomap_writepage_ctx *wpc, struct iomap_ioend *ioend,
error = wpc->ops->prepare_ioend(ioend, error);
if (error) {
/*
- * If we are failing the IO now, just mark the ioend with an
+ * If we're failing the IO now, just mark the ioend with an
* error and finish it. This will run IO completion immediately
* as there is only one reference to the ioend at this point in
* time.
@@ -1210,7 +1201,7 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
/*
* Allocate a new bio, and chain the old bio to the new one.
*
- * Note that we have to do perform the chaining in this unintuitive order
+ * Note that we have to perform the chaining in this unintuitive order
* so that the bi_private linkage is set up in the right direction for the
* traversal in iomap_finish_ioend().
*/
@@ -1249,7 +1240,7 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset,
/*
* Test to see if we have an existing ioend structure that we could append to
- * first, otherwise finish off the current ioend and start another.
+ * first; otherwise finish off the current ioend and start another.
*/
static void
iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
@@ -1259,7 +1250,6 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
sector_t sector = iomap_sector(&wpc->iomap, offset);
unsigned len = i_blocksize(inode);
unsigned poff = offset & (PAGE_SIZE - 1);
- bool merged, same_page = false;
if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, offset, sector)) {
if (wpc->ioend)
@@ -1267,19 +1257,13 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
wpc->ioend = iomap_alloc_ioend(inode, wpc, offset, sector, wbc);
}
- merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
- &same_page);
- if (iop)
- atomic_add(len, &iop->write_bytes_pending);
-
- if (!merged) {
- if (bio_full(wpc->ioend->io_bio, len)) {
- wpc->ioend->io_bio =
- iomap_chain_bio(wpc->ioend->io_bio);
- }
- bio_add_page(wpc->ioend->io_bio, page, len, poff);
+ if (bio_add_page(wpc->ioend->io_bio, page, len, poff) != len) {
+ wpc->ioend->io_bio = iomap_chain_bio(wpc->ioend->io_bio);
+ __bio_add_page(wpc->ioend->io_bio, page, len, poff);
}
+ if (iop)
+ atomic_add(len, &iop->write_bytes_pending);
wpc->ioend->io_size += len;
wbc_account_cgroup_owner(wbc, page, len);
}
@@ -1287,9 +1271,9 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
/*
* We implement an immediate ioend submission policy here to avoid needing to
* chain multiple ioends and hence nest mempool allocations which can violate
- * forward progress guarantees we need to provide. The current ioend we are
- * adding blocks to is cached on the writepage context, and if the new block
- * does not append to the cached ioend it will create a new ioend and cache that
+ * the forward progress guarantees we need to provide. The current ioend we're
+ * adding blocks to is cached in the writepage context, and if the new block
+ * doesn't append to the cached ioend, it will create a new ioend and cache that
* instead.
*
* If a new ioend is created and cached, the old ioend is returned and queued
@@ -1351,7 +1335,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
if (unlikely(error)) {
/*
* Let the filesystem know what portion of the current page
- * failed to map. If the page wasn't been added to ioend, it
+ * failed to map. If the page hasn't been added to ioend, it
* won't be affected by I/O completion and we must unlock it
* now.
*/
@@ -1368,7 +1352,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
unlock_page(page);
/*
- * Preserve the original error if there was one, otherwise catch
+ * Preserve the original error if there was one; catch
* submission errors here and propagate into subsequent ioend
* submissions.
*/
@@ -1395,8 +1379,8 @@ done:
/*
* Write out a dirty page.
*
- * For delalloc space on the page we need to allocate space and flush it.
- * For unwritten space on the page we need to start the conversion to
+ * For delalloc space on the page, we need to allocate space and flush it.
+ * For unwritten space on the page, we need to start the conversion to
* regular allocated space.
*/
static int
@@ -1411,7 +1395,7 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data)
trace_iomap_writepage(inode, page_offset(page), PAGE_SIZE);
/*
- * Refuse to write the page out if we are called from reclaim context.
+ * Refuse to write the page out if we're called from reclaim context.
*
* This avoids stack overflows when called from deeply used stacks in
* random callers for direct reclaim or memcg reclaim. We explicitly
@@ -1456,20 +1440,20 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data)
unsigned offset_into_page = offset & (PAGE_SIZE - 1);
/*
- * Skip the page if it is fully outside i_size, e.g. due to a
- * truncate operation that is in progress. We must redirty the
+ * Skip the page if it's fully outside i_size, e.g. due to a
+ * truncate operation that's in progress. We must redirty the
* page so that reclaim stops reclaiming it. Otherwise
* iomap_vm_releasepage() is called on it and gets confused.
*
- * Note that the end_index is unsigned long, it would overflow
- * if the given offset is greater than 16TB on 32-bit system
- * and if we do check the page is fully outside i_size or not
- * via "if (page->index >= end_index + 1)" as "end_index + 1"
- * will be evaluated to 0. Hence this page will be redirtied
- * and be written out repeatedly which would result in an
- * infinite loop, the user program that perform this operation
- * will hang. Instead, we can verify this situation by checking
- * if the page to write is totally beyond the i_size or if it's
+ * Note that the end_index is unsigned long. If the given
+ * offset is greater than 16TB on a 32-bit system then if we
+ * checked if the page is fully outside i_size with
+ * "if (page->index >= end_index + 1)", "end_index + 1" would
+ * overflow and evaluate to 0. Hence this page would be
+ * redirtied and written out repeatedly, which would result in
+ * an infinite loop; the user program performing this operation
+ * would hang. Instead, we can detect this situation by
+ * checking if the page is totally beyond i_size or if its
* offset is just equal to the EOF.
*/
if (page->index > end_index ||
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 9398b8c31323..4ecd255e0511 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2010 Red Hat, Inc.
- * Copyright (c) 2016-2018 Christoph Hellwig.
+ * Copyright (c) 2016-2021 Christoph Hellwig.
*/
#include <linux/module.h>
#include <linux/compiler.h>
@@ -59,19 +59,17 @@ int iomap_dio_iopoll(struct kiocb *kiocb, bool spin)
}
EXPORT_SYMBOL_GPL(iomap_dio_iopoll);
-static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
- struct bio *bio, loff_t pos)
+static void iomap_dio_submit_bio(const struct iomap_iter *iter,
+ struct iomap_dio *dio, struct bio *bio, loff_t pos)
{
atomic_inc(&dio->ref);
if (dio->iocb->ki_flags & IOCB_HIPRI)
bio_set_polled(bio, dio->iocb);
- dio->submit.last_queue = bdev_get_queue(iomap->bdev);
+ dio->submit.last_queue = bdev_get_queue(iter->iomap.bdev);
if (dio->dops && dio->dops->submit_io)
- dio->submit.cookie = dio->dops->submit_io(
- file_inode(dio->iocb->ki_filp),
- iomap, bio, pos);
+ dio->submit.cookie = dio->dops->submit_io(iter, bio, pos);
else
dio->submit.cookie = submit_bio(bio);
}
@@ -181,24 +179,23 @@ static void iomap_dio_bio_end_io(struct bio *bio)
}
}
-static void
-iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
- unsigned len)
+static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
+ loff_t pos, unsigned len)
{
struct page *page = ZERO_PAGE(0);
int flags = REQ_SYNC | REQ_IDLE;
struct bio *bio;
bio = bio_alloc(GFP_KERNEL, 1);
- bio_set_dev(bio, iomap->bdev);
- bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
+ bio_set_dev(bio, iter->iomap.bdev);
+ bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos);
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
get_page(page);
__bio_add_page(bio, page, len, 0);
bio_set_op_attrs(bio, REQ_OP_WRITE, flags);
- iomap_dio_submit_bio(dio, iomap, bio, pos);
+ iomap_dio_submit_bio(iter, dio, bio, pos);
}
/*
@@ -206,8 +203,8 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
* mapping, and whether or not we want FUA. Note that we can end up
* clearing the WRITE_FUA flag in the dio request.
*/
-static inline unsigned int
-iomap_dio_bio_opflags(struct iomap_dio *dio, struct iomap *iomap, bool use_fua)
+static inline unsigned int iomap_dio_bio_opflags(struct iomap_dio *dio,
+ const struct iomap *iomap, bool use_fua)
{
unsigned int opflags = REQ_SYNC | REQ_IDLE;
@@ -229,13 +226,16 @@ iomap_dio_bio_opflags(struct iomap_dio *dio, struct iomap *iomap, bool use_fua)
return opflags;
}
-static loff_t
-iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
- struct iomap_dio *dio, struct iomap *iomap)
+static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
+ struct iomap_dio *dio)
{
+ const struct iomap *iomap = &iter->iomap;
+ struct inode *inode = iter->inode;
unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
unsigned int fs_block_size = i_blocksize(inode), pad;
unsigned int align = iov_iter_alignment(dio->submit.iter);
+ loff_t length = iomap_length(iter);
+ loff_t pos = iter->pos;
unsigned int bio_opf;
struct bio *bio;
bool need_zeroout = false;
@@ -286,7 +286,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
/* zero out from the start of the block to the write offset */
pad = pos & (fs_block_size - 1);
if (pad)
- iomap_dio_zero(dio, iomap, pos - pad, pad);
+ iomap_dio_zero(iter, dio, pos - pad, pad);
}
/*
@@ -339,7 +339,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter,
BIO_MAX_VECS);
- iomap_dio_submit_bio(dio, iomap, bio, pos);
+ iomap_dio_submit_bio(iter, dio, bio, pos);
pos += n;
} while (nr_pages);
@@ -355,7 +355,7 @@ zero_tail:
/* zero out from the end of the write to the end of the block */
pad = pos & (fs_block_size - 1);
if (pad)
- iomap_dio_zero(dio, iomap, pos, fs_block_size - pad);
+ iomap_dio_zero(iter, dio, pos, fs_block_size - pad);
}
out:
/* Undo iter limitation to current extent */
@@ -365,65 +365,67 @@ out:
return ret;
}
-static loff_t
-iomap_dio_hole_actor(loff_t length, struct iomap_dio *dio)
+static loff_t iomap_dio_hole_iter(const struct iomap_iter *iter,
+ struct iomap_dio *dio)
{
- length = iov_iter_zero(length, dio->submit.iter);
+ loff_t length = iov_iter_zero(iomap_length(iter), dio->submit.iter);
+
dio->size += length;
return length;
}
-static loff_t
-iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
- struct iomap_dio *dio, struct iomap *iomap)
+static loff_t iomap_dio_inline_iter(const struct iomap_iter *iomi,
+ struct iomap_dio *dio)
{
+ const struct iomap *iomap = &iomi->iomap;
struct iov_iter *iter = dio->submit.iter;
+ void *inline_data = iomap_inline_data(iomap, iomi->pos);
+ loff_t length = iomap_length(iomi);
+ loff_t pos = iomi->pos;
size_t copied;
- BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
+ if (WARN_ON_ONCE(!iomap_inline_data_valid(iomap)))
+ return -EIO;
if (dio->flags & IOMAP_DIO_WRITE) {
- loff_t size = inode->i_size;
+ loff_t size = iomi->inode->i_size;
if (pos > size)
- memset(iomap->inline_data + size, 0, pos - size);
- copied = copy_from_iter(iomap->inline_data + pos, length, iter);
+ memset(iomap_inline_data(iomap, size), 0, pos - size);
+ copied = copy_from_iter(inline_data, length, iter);
if (copied) {
if (pos + copied > size)
- i_size_write(inode, pos + copied);
- mark_inode_dirty(inode);
+ i_size_write(iomi->inode, pos + copied);
+ mark_inode_dirty(iomi->inode);
}
} else {
- copied = copy_to_iter(iomap->inline_data + pos, length, iter);
+ copied = copy_to_iter(inline_data, length, iter);
}
dio->size += copied;
return copied;
}
-static loff_t
-iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
- void *data, struct iomap *iomap, struct iomap *srcmap)
+static loff_t iomap_dio_iter(const struct iomap_iter *iter,
+ struct iomap_dio *dio)
{
- struct iomap_dio *dio = data;
-
- switch (iomap->type) {
+ switch (iter->iomap.type) {
case IOMAP_HOLE:
if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE))
return -EIO;
- return iomap_dio_hole_actor(length, dio);
+ return iomap_dio_hole_iter(iter, dio);
case IOMAP_UNWRITTEN:
if (!(dio->flags & IOMAP_DIO_WRITE))
- return iomap_dio_hole_actor(length, dio);
- return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
+ return iomap_dio_hole_iter(iter, dio);
+ return iomap_dio_bio_iter(iter, dio);
case IOMAP_MAPPED:
- return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
+ return iomap_dio_bio_iter(iter, dio);
case IOMAP_INLINE:
- return iomap_dio_inline_actor(inode, pos, length, dio, iomap);
+ return iomap_dio_inline_iter(iter, dio);
case IOMAP_DELALLOC:
/*
* DIO is not serialised against mmap() access at all, and so
* if the page_mkwrite occurs between the writeback and the
- * iomap_apply() call in the DIO path, then it will see the
+ * iomap_iter() call in the DIO path, then it will see the
* DELALLOC block that the page-mkwrite allocated.
*/
pr_warn_ratelimited("Direct I/O collision with buffered writes! File: %pD4 Comm: %.20s\n",
@@ -454,16 +456,19 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = file_inode(iocb->ki_filp);
- size_t count = iov_iter_count(iter);
- loff_t pos = iocb->ki_pos;
- loff_t end = iocb->ki_pos + count - 1, ret = 0;
+ struct iomap_iter iomi = {
+ .inode = inode,
+ .pos = iocb->ki_pos,
+ .len = iov_iter_count(iter),
+ .flags = IOMAP_DIRECT,
+ };
+ loff_t end = iomi.pos + iomi.len - 1, ret = 0;
bool wait_for_completion =
is_sync_kiocb(iocb) || (dio_flags & IOMAP_DIO_FORCE_WAIT);
- unsigned int iomap_flags = IOMAP_DIRECT;
struct blk_plug plug;
struct iomap_dio *dio;
- if (!count)
+ if (!iomi.len)
return NULL;
dio = kmalloc(sizeof(*dio), GFP_KERNEL);
@@ -484,29 +489,30 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->submit.last_queue = NULL;
if (iov_iter_rw(iter) == READ) {
- if (pos >= dio->i_size)
+ if (iomi.pos >= dio->i_size)
goto out_free_dio;
if (iocb->ki_flags & IOCB_NOWAIT) {
- if (filemap_range_needs_writeback(mapping, pos, end)) {
+ if (filemap_range_needs_writeback(mapping, iomi.pos,
+ end)) {
ret = -EAGAIN;
goto out_free_dio;
}
- iomap_flags |= IOMAP_NOWAIT;
+ iomi.flags |= IOMAP_NOWAIT;
}
if (iter_is_iovec(iter))
dio->flags |= IOMAP_DIO_DIRTY;
} else {
- iomap_flags |= IOMAP_WRITE;
+ iomi.flags |= IOMAP_WRITE;
dio->flags |= IOMAP_DIO_WRITE;
if (iocb->ki_flags & IOCB_NOWAIT) {
- if (filemap_range_has_page(mapping, pos, end)) {
+ if (filemap_range_has_page(mapping, iomi.pos, end)) {
ret = -EAGAIN;
goto out_free_dio;
}
- iomap_flags |= IOMAP_NOWAIT;
+ iomi.flags |= IOMAP_NOWAIT;
}
/* for data sync or sync, we need sync completion processing */
@@ -525,12 +531,13 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
ret = -EAGAIN;
- if (pos >= dio->i_size || pos + count > dio->i_size)
+ if (iomi.pos >= dio->i_size ||
+ iomi.pos + iomi.len > dio->i_size)
goto out_free_dio;
- iomap_flags |= IOMAP_OVERWRITE_ONLY;
+ iomi.flags |= IOMAP_OVERWRITE_ONLY;
}
- ret = filemap_write_and_wait_range(mapping, pos, end);
+ ret = filemap_write_and_wait_range(mapping, iomi.pos, end);
if (ret)
goto out_free_dio;
@@ -540,9 +547,10 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
* If this invalidation fails, let the caller fall back to
* buffered I/O.
*/
- if (invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
- end >> PAGE_SHIFT)) {
- trace_iomap_dio_invalidate_fail(inode, pos, count);
+ if (invalidate_inode_pages2_range(mapping,
+ iomi.pos >> PAGE_SHIFT, end >> PAGE_SHIFT)) {
+ trace_iomap_dio_invalidate_fail(inode, iomi.pos,
+ iomi.len);
ret = -ENOTBLK;
goto out_free_dio;
}
@@ -557,31 +565,23 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
inode_dio_begin(inode);
blk_start_plug(&plug);
- do {
- ret = iomap_apply(inode, pos, count, iomap_flags, ops, dio,
- iomap_dio_actor);
- if (ret <= 0) {
- /* magic error code to fall back to buffered I/O */
- if (ret == -ENOTBLK) {
- wait_for_completion = true;
- ret = 0;
- }
- break;
- }
- pos += ret;
-
- if (iov_iter_rw(iter) == READ && pos >= dio->i_size) {
- /*
- * We only report that we've read data up to i_size.
- * Revert iter to a state corresponding to that as
- * some callers (such as splice code) rely on it.
- */
- iov_iter_revert(iter, pos - dio->i_size);
- break;
- }
- } while ((count = iov_iter_count(iter)) > 0);
+ while ((ret = iomap_iter(&iomi, ops)) > 0)
+ iomi.processed = iomap_dio_iter(&iomi, dio);
blk_finish_plug(&plug);
+ /*
+ * We only report that we've read data up to i_size.
+ * Revert iter to a state corresponding to that as some callers (such
+ * as the splice code) rely on it.
+ */
+ if (iov_iter_rw(iter) == READ && iomi.pos >= dio->i_size)
+ iov_iter_revert(iter, iomi.pos - dio->i_size);
+
+ /* magic error code to fall back to buffered I/O */
+ if (ret == -ENOTBLK) {
+ wait_for_completion = true;
+ ret = 0;
+ }
if (ret < 0)
iomap_dio_set_error(dio, ret);
diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c
index aab070df4a21..66cf267c68ae 100644
--- a/fs/iomap/fiemap.c
+++ b/fs/iomap/fiemap.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2016-2018 Christoph Hellwig.
+ * Copyright (c) 2016-2021 Christoph Hellwig.
*/
#include <linux/module.h>
#include <linux/compiler.h>
@@ -8,13 +8,8 @@
#include <linux/iomap.h>
#include <linux/fiemap.h>
-struct fiemap_ctx {
- struct fiemap_extent_info *fi;
- struct iomap prev;
-};
-
static int iomap_to_fiemap(struct fiemap_extent_info *fi,
- struct iomap *iomap, u32 flags)
+ const struct iomap *iomap, u32 flags)
{
switch (iomap->type) {
case IOMAP_HOLE:
@@ -43,24 +38,22 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
iomap->length, flags);
}
-static loff_t
-iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
- struct iomap *iomap, struct iomap *srcmap)
+static loff_t iomap_fiemap_iter(const struct iomap_iter *iter,
+ struct fiemap_extent_info *fi, struct iomap *prev)
{
- struct fiemap_ctx *ctx = data;
- loff_t ret = length;
+ int ret;
- if (iomap->type == IOMAP_HOLE)
- return length;
+ if (iter->iomap.type == IOMAP_HOLE)
+ return iomap_length(iter);
- ret = iomap_to_fiemap(ctx->fi, &ctx->prev, 0);
- ctx->prev = *iomap;
+ ret = iomap_to_fiemap(fi, prev, 0);
+ *prev = iter->iomap;
switch (ret) {
case 0: /* success */
- return length;
+ return iomap_length(iter);
case 1: /* extent array full */
return 0;
- default:
+ default: /* error */
return ret;
}
}
@@ -68,73 +61,63 @@ iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
u64 start, u64 len, const struct iomap_ops *ops)
{
- struct fiemap_ctx ctx;
- loff_t ret;
-
- memset(&ctx, 0, sizeof(ctx));
- ctx.fi = fi;
- ctx.prev.type = IOMAP_HOLE;
+ struct iomap_iter iter = {
+ .inode = inode,
+ .pos = start,
+ .len = len,
+ .flags = IOMAP_REPORT,
+ };
+ struct iomap prev = {
+ .type = IOMAP_HOLE,
+ };
+ int ret;
- ret = fiemap_prep(inode, fi, start, &len, 0);
+ ret = fiemap_prep(inode, fi, start, &iter.len, 0);
if (ret)
return ret;
- while (len > 0) {
- ret = iomap_apply(inode, start, len, IOMAP_REPORT, ops, &ctx,
- iomap_fiemap_actor);
- /* inode with no (attribute) mapping will give ENOENT */
- if (ret == -ENOENT)
- break;
- if (ret < 0)
- return ret;
- if (ret == 0)
- break;
-
- start += ret;
- len -= ret;
- }
+ while ((ret = iomap_iter(&iter, ops)) > 0)
+ iter.processed = iomap_fiemap_iter(&iter, fi, &prev);
- if (ctx.prev.type != IOMAP_HOLE) {
- ret = iomap_to_fiemap(fi, &ctx.prev, FIEMAP_EXTENT_LAST);
+ if (prev.type != IOMAP_HOLE) {
+ ret = iomap_to_fiemap(fi, &prev, FIEMAP_EXTENT_LAST);
if (ret < 0)
return ret;
}
+ /* inode with no (attribute) mapping will give ENOENT */
+ if (ret < 0 && ret != -ENOENT)
+ return ret;
return 0;
}
EXPORT_SYMBOL_GPL(iomap_fiemap);
-static loff_t
-iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length,
- void *data, struct iomap *iomap, struct iomap *srcmap)
-{
- sector_t *bno = data, addr;
-
- if (iomap->type == IOMAP_MAPPED) {
- addr = (pos - iomap->offset + iomap->addr) >> inode->i_blkbits;
- *bno = addr;
- }
- return 0;
-}
-
/* legacy ->bmap interface. 0 is the error return (!) */
sector_t
iomap_bmap(struct address_space *mapping, sector_t bno,
const struct iomap_ops *ops)
{
- struct inode *inode = mapping->host;
- loff_t pos = bno << inode->i_blkbits;
- unsigned blocksize = i_blocksize(inode);
+ struct iomap_iter iter = {
+ .inode = mapping->host,
+ .pos = (loff_t)bno << mapping->host->i_blkbits,
+ .len = i_blocksize(mapping->host),
+ .flags = IOMAP_REPORT,
+ };
+ const unsigned int blkshift = mapping->host->i_blkbits - SECTOR_SHIFT;
int ret;
if (filemap_write_and_wait(mapping))
return 0;
bno = 0;
- ret = iomap_apply(inode, pos, blocksize, 0, ops, &bno,
- iomap_bmap_actor);
+ while ((ret = iomap_iter(&iter, ops)) > 0) {
+ if (iter.iomap.type == IOMAP_MAPPED)
+ bno = iomap_sector(&iter.iomap, iter.pos) >> blkshift;
+ /* leave iter.processed unset to abort loop */
+ }
if (ret)
return 0;
+
return bno;
}
EXPORT_SYMBOL_GPL(iomap_bmap);
diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c
new file mode 100644
index 000000000000..a1c7592d2ade
--- /dev/null
+++ b/fs/iomap/iter.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2010 Red Hat, Inc.
+ * Copyright (c) 2016-2021 Christoph Hellwig.
+ */
+#include <linux/fs.h>
+#include <linux/iomap.h>
+#include "trace.h"
+
+static inline int iomap_iter_advance(struct iomap_iter *iter)
+{
+ /* handle the previous iteration (if any) */
+ if (iter->iomap.length) {
+ if (iter->processed <= 0)
+ return iter->processed;
+ if (WARN_ON_ONCE(iter->processed > iomap_length(iter)))
+ return -EIO;
+ iter->pos += iter->processed;
+ iter->len -= iter->processed;
+ if (!iter->len)
+ return 0;
+ }
+
+ /* clear the state for the next iteration */
+ iter->processed = 0;
+ memset(&iter->iomap, 0, sizeof(iter->iomap));
+ memset(&iter->srcmap, 0, sizeof(iter->srcmap));
+ return 1;
+}
+
+static inline void iomap_iter_done(struct iomap_iter *iter)
+{
+ WARN_ON_ONCE(iter->iomap.offset > iter->pos);
+ WARN_ON_ONCE(iter->iomap.length == 0);
+ WARN_ON_ONCE(iter->iomap.offset + iter->iomap.length <= iter->pos);
+
+ trace_iomap_iter_dstmap(iter->inode, &iter->iomap);
+ if (iter->srcmap.type != IOMAP_HOLE)
+ trace_iomap_iter_srcmap(iter->inode, &iter->srcmap);
+}
+
+/**
+ * iomap_iter - iterate over a ranges in a file
+ * @iter: iteration structue
+ * @ops: iomap ops provided by the file system
+ *
+ * Iterate over filesystem-provided space mappings for the provided file range.
+ *
+ * This function handles cleanup of resources acquired for iteration when the
+ * filesystem indicates there are no more space mappings, which means that this
+ * function must be called in a loop that continues as long it returns a
+ * positive value. If 0 or a negative value is returned, the caller must not
+ * return to the loop body. Within a loop body, there are two ways to break out
+ * of the loop body: leave @iter.processed unchanged, or set it to a negative
+ * errno.
+ */
+int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops)
+{
+ int ret;
+
+ if (iter->iomap.length && ops->iomap_end) {
+ ret = ops->iomap_end(iter->inode, iter->pos, iomap_length(iter),
+ iter->processed > 0 ? iter->processed : 0,
+ iter->flags, &iter->iomap);
+ if (ret < 0 && !iter->processed)
+ return ret;
+ }
+
+ trace_iomap_iter(iter, ops, _RET_IP_);
+ ret = iomap_iter_advance(iter);
+ if (ret <= 0)
+ return ret;
+
+ ret = ops->iomap_begin(iter->inode, iter->pos, iter->len, iter->flags,
+ &iter->iomap, &iter->srcmap);
+ if (ret < 0)
+ return ret;
+ iomap_iter_done(iter);
+ return 1;
+}
diff --git a/fs/iomap/seek.c b/fs/iomap/seek.c
index ce6fb810854f..a845c012b50c 100644
--- a/fs/iomap/seek.c
+++ b/fs/iomap/seek.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2017 Red Hat, Inc.
- * Copyright (c) 2018 Christoph Hellwig.
+ * Copyright (c) 2018-2021 Christoph Hellwig.
*/
#include <linux/module.h>
#include <linux/compiler.h>
@@ -10,21 +10,20 @@
#include <linux/pagemap.h>
#include <linux/pagevec.h>
-static loff_t
-iomap_seek_hole_actor(struct inode *inode, loff_t start, loff_t length,
- void *data, struct iomap *iomap, struct iomap *srcmap)
+static loff_t iomap_seek_hole_iter(const struct iomap_iter *iter,
+ loff_t *hole_pos)
{
- loff_t offset = start;
+ loff_t length = iomap_length(iter);
- switch (iomap->type) {
+ switch (iter->iomap.type) {
case IOMAP_UNWRITTEN:
- offset = mapping_seek_hole_data(inode->i_mapping, start,
- start + length, SEEK_HOLE);
- if (offset == start + length)
+ *hole_pos = mapping_seek_hole_data(iter->inode->i_mapping,
+ iter->pos, iter->pos + length, SEEK_HOLE);
+ if (*hole_pos == iter->pos + length)
return length;
- fallthrough;
+ return 0;
case IOMAP_HOLE:
- *(loff_t *)data = offset;
+ *hole_pos = iter->pos;
return 0;
default:
return length;
@@ -32,70 +31,73 @@ iomap_seek_hole_actor(struct inode *inode, loff_t start, loff_t length,
}
loff_t
-iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
+iomap_seek_hole(struct inode *inode, loff_t pos, const struct iomap_ops *ops)
{
loff_t size = i_size_read(inode);
- loff_t ret;
+ struct iomap_iter iter = {
+ .inode = inode,
+ .pos = pos,
+ .flags = IOMAP_REPORT,
+ };
+ int ret;
/* Nothing to be found before or beyond the end of the file. */
- if (offset < 0 || offset >= size)
+ if (pos < 0 || pos >= size)
return -ENXIO;
- while (offset < size) {
- ret = iomap_apply(inode, offset, size - offset, IOMAP_REPORT,
- ops, &offset, iomap_seek_hole_actor);
- if (ret < 0)
- return ret;
- if (ret == 0)
- break;
- offset += ret;
- }
-
- return offset;
+ iter.len = size - pos;
+ while ((ret = iomap_iter(&iter, ops)) > 0)
+ iter.processed = iomap_seek_hole_iter(&iter, &pos);
+ if (ret < 0)
+ return ret;
+ if (iter.len) /* found hole before EOF */
+ return pos;
+ return size;
}
EXPORT_SYMBOL_GPL(iomap_seek_hole);
-static loff_t
-iomap_seek_data_actor(struct inode *inode, loff_t start, loff_t length,
- void *data, struct iomap *iomap, struct iomap *srcmap)
+static loff_t iomap_seek_data_iter(const struct iomap_iter *iter,
+ loff_t *hole_pos)
{
- loff_t offset = start;
+ loff_t length = iomap_length(iter);
- switch (iomap->type) {
+ switch (iter->iomap.type) {
case IOMAP_HOLE:
return length;
case IOMAP_UNWRITTEN:
- offset = mapping_seek_hole_data(inode->i_mapping, start,
- start + length, SEEK_DATA);
- if (offset < 0)
+ *hole_pos = mapping_seek_hole_data(iter->inode->i_mapping,
+ iter->pos, iter->pos + length, SEEK_DATA);
+ if (*hole_pos < 0)
return length;
- fallthrough;
+ return 0;
default:
- *(loff_t *)data = offset;
+ *hole_pos = iter->pos;
return 0;
}
}
loff_t
-iomap_seek_data(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
+iomap_seek_data(struct inode *inode, loff_t pos, const struct iomap_ops *ops)
{
loff_t size = i_size_read(inode);
- loff_t ret;
+ struct iomap_iter iter = {
+ .inode = inode,
+ .pos = pos,
+ .flags = IOMAP_REPORT,
+ };
+ int ret;
/* Nothing to be found before or beyond the end of the file. */
- if (offset < 0 || offset >= size)
+ if (pos < 0 || pos >= size)
return -ENXIO;
- while (offset < size) {
- ret = iomap_apply(inode, offset, size - offset, IOMAP_REPORT,
- ops, &offset, iomap_seek_data_actor);
- if (ret < 0)
- return ret;
- if (ret == 0)
- return offset;
- offset += ret;
- }
-
+ iter.len = size - pos;
+ while ((ret = iomap_iter(&iter, ops)) > 0)
+ iter.processed = iomap_seek_data_iter(&iter, &pos);
+ if (ret < 0)
+ return ret;
+ if (iter.len) /* found data before EOF */
+ return pos;
/* We've reached the end of the file without finding data */
return -ENXIO;
}
diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c
index 6250ca6a1f85..5fc0ac36dee3 100644
--- a/fs/iomap/swapfile.c
+++ b/fs/iomap/swapfile.c
@@ -31,11 +31,16 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
{
struct iomap *iomap = &isi->iomap;
unsigned long nr_pages;
+ unsigned long max_pages;
uint64_t first_ppage;
uint64_t first_ppage_reported;
uint64_t next_ppage;
int error;
+ if (unlikely(isi->nr_pages >= isi->sis->max))
+ return 0;
+ max_pages = isi->sis->max - isi->nr_pages;
+
/*
* Round the start up and the end down so that the physical
* extent aligns to a page boundary.
@@ -48,6 +53,7 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
if (first_ppage >= next_ppage)
return 0;
nr_pages = next_ppage - first_ppage;
+ nr_pages = min(nr_pages, max_pages);
/*
* Calculate how much swap space we're adding; the first page contains
@@ -88,13 +94,9 @@ static int iomap_swapfile_fail(struct iomap_swapfile_info *isi, const char *str)
* swap only cares about contiguous page-aligned physical extents and makes no
* distinction between written and unwritten extents.
*/
-static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
- loff_t count, void *data, struct iomap *iomap,
- struct iomap *srcmap)
+static loff_t iomap_swapfile_iter(const struct iomap_iter *iter,
+ struct iomap *iomap, struct iomap_swapfile_info *isi)
{
- struct iomap_swapfile_info *isi = data;
- int error;
-
switch (iomap->type) {
case IOMAP_MAPPED:
case IOMAP_UNWRITTEN:
@@ -125,12 +127,12 @@ static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
isi->iomap.length += iomap->length;
} else {
/* Otherwise, add the retained iomap and store this one. */
- error = iomap_swapfile_add_extent(isi);
+ int error = iomap_swapfile_add_extent(isi);
if (error)
return error;
memcpy(&isi->iomap, iomap, sizeof(isi->iomap));
}
- return count;
+ return iomap_length(iter);
}
/*
@@ -141,16 +143,19 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
struct file *swap_file, sector_t *pagespan,
const struct iomap_ops *ops)
{
+ struct inode *inode = swap_file->f_mapping->host;
+ struct iomap_iter iter = {
+ .inode = inode,
+ .pos = 0,
+ .len = ALIGN_DOWN(i_size_read(inode), PAGE_SIZE),
+ .flags = IOMAP_REPORT,
+ };
struct iomap_swapfile_info isi = {
.sis = sis,
.lowest_ppage = (sector_t)-1ULL,
.file = swap_file,
};
- struct address_space *mapping = swap_file->f_mapping;
- struct inode *inode = mapping->host;
- loff_t pos = 0;
- loff_t len = ALIGN_DOWN(i_size_read(inode), PAGE_SIZE);
- loff_t ret;
+ int ret;
/*
* Persist all file mapping metadata so that we won't have any
@@ -160,15 +165,10 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
if (ret)
return ret;
- while (len > 0) {
- ret = iomap_apply(inode, pos, len, IOMAP_REPORT,
- ops, &isi, iomap_swapfile_activate_actor);
- if (ret <= 0)
- return ret;
-
- pos += ret;
- len -= ret;
- }
+ while ((ret = iomap_iter(&iter, ops)) > 0)
+ iter.processed = iomap_swapfile_iter(&iter, &iter.iomap, &isi);
+ if (ret < 0)
+ return ret;
if (isi.iomap.length) {
ret = iomap_swapfile_add_extent(&isi);
diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
index fdc7ae388476..65e39785c284 100644
--- a/fs/iomap/trace.h
+++ b/fs/iomap/trace.h
@@ -1,9 +1,18 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Copyright (c) 2009-2019 Christoph Hellwig
+ * Copyright (c) 2009-2021 Christoph Hellwig
*
- * NOTE: none of these tracepoints shall be consider a stable kernel ABI
+ * NOTE: none of these tracepoints shall be considered a stable kernel ABI
* as they can change at any time.
+ *
+ * Current conventions for printing numbers measuring specific units:
+ *
+ * offset: byte offset into a subcomponent of a file operation
+ * pos: file offset, in bytes
+ * length: length of a file operation, in bytes
+ * ino: inode number
+ *
+ * Numbers describing space allocations should be formatted in hexadecimal.
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM iomap
@@ -42,14 +51,14 @@ DEFINE_READPAGE_EVENT(iomap_readpage);
DEFINE_READPAGE_EVENT(iomap_readahead);
DECLARE_EVENT_CLASS(iomap_range_class,
- TP_PROTO(struct inode *inode, unsigned long off, unsigned int len),
+ TP_PROTO(struct inode *inode, loff_t off, u64 len),
TP_ARGS(inode, off, len),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(u64, ino)
__field(loff_t, size)
- __field(unsigned long, offset)
- __field(unsigned int, length)
+ __field(loff_t, offset)
+ __field(u64, length)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
@@ -58,8 +67,7 @@ DECLARE_EVENT_CLASS(iomap_range_class,
__entry->offset = off;
__entry->length = len;
),
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset %lx "
- "length %x",
+ TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx length 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->size,
@@ -69,7 +77,7 @@ DECLARE_EVENT_CLASS(iomap_range_class,
#define DEFINE_RANGE_EVENT(name) \
DEFINE_EVENT(iomap_range_class, name, \
- TP_PROTO(struct inode *inode, unsigned long off, unsigned int len),\
+ TP_PROTO(struct inode *inode, loff_t off, u64 len),\
TP_ARGS(inode, off, len))
DEFINE_RANGE_EVENT(iomap_writepage);
DEFINE_RANGE_EVENT(iomap_releasepage);
@@ -122,8 +130,8 @@ DECLARE_EVENT_CLASS(iomap_class,
__entry->flags = iomap->flags;
__entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0;
),
- TP_printk("dev %d:%d ino 0x%llx bdev %d:%d addr %lld offset %lld "
- "length %llu type %s flags %s",
+ TP_printk("dev %d:%d ino 0x%llx bdev %d:%d addr 0x%llx offset 0x%llx "
+ "length 0x%llx type %s flags %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
MAJOR(__entry->bdev), MINOR(__entry->bdev),
@@ -138,36 +146,32 @@ DECLARE_EVENT_CLASS(iomap_class,
DEFINE_EVENT(iomap_class, name, \
TP_PROTO(struct inode *inode, struct iomap *iomap), \
TP_ARGS(inode, iomap))
-DEFINE_IOMAP_EVENT(iomap_apply_dstmap);
-DEFINE_IOMAP_EVENT(iomap_apply_srcmap);
+DEFINE_IOMAP_EVENT(iomap_iter_dstmap);
+DEFINE_IOMAP_EVENT(iomap_iter_srcmap);
-TRACE_EVENT(iomap_apply,
- TP_PROTO(struct inode *inode, loff_t pos, loff_t length,
- unsigned int flags, const void *ops, void *actor,
- unsigned long caller),
- TP_ARGS(inode, pos, length, flags, ops, actor, caller),
+TRACE_EVENT(iomap_iter,
+ TP_PROTO(struct iomap_iter *iter, const void *ops,
+ unsigned long caller),
+ TP_ARGS(iter, ops, caller),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(u64, ino)
__field(loff_t, pos)
- __field(loff_t, length)
+ __field(u64, length)
__field(unsigned int, flags)
__field(const void *, ops)
- __field(void *, actor)
__field(unsigned long, caller)
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
- __entry->ino = inode->i_ino;
- __entry->pos = pos;
- __entry->length = length;
- __entry->flags = flags;
+ __entry->dev = iter->inode->i_sb->s_dev;
+ __entry->ino = iter->inode->i_ino;
+ __entry->pos = iter->pos;
+ __entry->length = iomap_length(iter);
+ __entry->flags = iter->flags;
__entry->ops = ops;
- __entry->actor = actor;
__entry->caller = caller;
),
- TP_printk("dev %d:%d ino 0x%llx pos %lld length %lld flags %s (0x%x) "
- "ops %ps caller %pS actor %ps",
+ TP_printk("dev %d:%d ino 0x%llx pos 0x%llx length 0x%llx flags %s (0x%x) ops %ps caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->pos,
@@ -175,8 +179,7 @@ TRACE_EVENT(iomap_apply,
__print_flags(__entry->flags, "|", IOMAP_FLAGS_STRINGS),
__entry->flags,
__entry->ops,
- (void *)__entry->caller,
- __entry->actor)
+ (void *)__entry->caller)
);
#endif /* _IOMAP_TRACE_H */
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 21edc423b79f..678e2c51b855 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -155,7 +155,6 @@ struct iso9660_options{
unsigned int overriderockperm:1;
unsigned int uid_set:1;
unsigned int gid_set:1;
- unsigned int utf8:1;
unsigned char map;
unsigned char check;
unsigned int blocksize;
@@ -356,7 +355,6 @@ static int parse_options(char *options, struct iso9660_options *popt)
popt->gid = GLOBAL_ROOT_GID;
popt->uid = GLOBAL_ROOT_UID;
popt->iocharset = NULL;
- popt->utf8 = 0;
popt->overriderockperm = 0;
popt->session=-1;
popt->sbsector=-1;
@@ -389,10 +387,13 @@ static int parse_options(char *options, struct iso9660_options *popt)
case Opt_cruft:
popt->cruft = 1;
break;
+#ifdef CONFIG_JOLIET
case Opt_utf8:
- popt->utf8 = 1;
+ kfree(popt->iocharset);
+ popt->iocharset = kstrdup("utf8", GFP_KERNEL);
+ if (!popt->iocharset)
+ return 0;
break;
-#ifdef CONFIG_JOLIET
case Opt_iocharset:
kfree(popt->iocharset);
popt->iocharset = match_strdup(&args[0]);
@@ -495,7 +496,6 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
if (sbi->s_nocompress) seq_puts(m, ",nocompress");
if (sbi->s_overriderockperm) seq_puts(m, ",overriderockperm");
if (sbi->s_showassoc) seq_puts(m, ",showassoc");
- if (sbi->s_utf8) seq_puts(m, ",utf8");
if (sbi->s_check) seq_printf(m, ",check=%c", sbi->s_check);
if (sbi->s_mapping) seq_printf(m, ",map=%c", sbi->s_mapping);
@@ -518,9 +518,10 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
seq_printf(m, ",fmode=%o", sbi->s_fmode);
#ifdef CONFIG_JOLIET
- if (sbi->s_nls_iocharset &&
- strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0)
+ if (sbi->s_nls_iocharset)
seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset);
+ else
+ seq_puts(m, ",iocharset=utf8");
#endif
return 0;
}
@@ -863,14 +864,13 @@ root_found:
sbi->s_nls_iocharset = NULL;
#ifdef CONFIG_JOLIET
- if (joliet_level && opt.utf8 == 0) {
+ if (joliet_level) {
char *p = opt.iocharset ? opt.iocharset : CONFIG_NLS_DEFAULT;
- sbi->s_nls_iocharset = load_nls(p);
- if (! sbi->s_nls_iocharset) {
- /* Fail only if explicit charset specified */
- if (opt.iocharset)
+ if (strcmp(p, "utf8") != 0) {
+ sbi->s_nls_iocharset = opt.iocharset ?
+ load_nls(opt.iocharset) : load_nls_default();
+ if (!sbi->s_nls_iocharset)
goto out_freesbi;
- sbi->s_nls_iocharset = load_nls_default();
}
}
#endif
@@ -886,7 +886,6 @@ root_found:
sbi->s_gid = opt.gid;
sbi->s_uid_set = opt.uid_set;
sbi->s_gid_set = opt.gid_set;
- sbi->s_utf8 = opt.utf8;
sbi->s_nocompress = opt.nocompress;
sbi->s_overriderockperm = opt.overriderockperm;
/*
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 055ec6c586f7..dcdc191ed183 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -44,7 +44,6 @@ struct isofs_sb_info {
unsigned char s_session;
unsigned int s_high_sierra:1;
unsigned int s_rock:2;
- unsigned int s_utf8:1;
unsigned int s_cruft:1; /* Broken disks with high byte of length
* containing junk */
unsigned int s_nocompress:1;
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c
index be8b6a9d0b92..c0f04a1e7f69 100644
--- a/fs/isofs/joliet.c
+++ b/fs/isofs/joliet.c
@@ -41,14 +41,12 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls)
int
get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode)
{
- unsigned char utf8;
struct nls_table *nls;
unsigned char len = 0;
- utf8 = ISOFS_SB(inode->i_sb)->s_utf8;
nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;
- if (utf8) {
+ if (!nls) {
len = utf16s_to_utf8s((const wchar_t *) de->name,
de->name_len[0] >> 1, UTF16_BIG_ENDIAN,
outname, PAGE_SIZE);
diff --git a/fs/ksmbd/Kconfig b/fs/ksmbd/Kconfig
new file mode 100644
index 000000000000..b83cbd756ae5
--- /dev/null
+++ b/fs/ksmbd/Kconfig
@@ -0,0 +1,68 @@
+config SMB_SERVER
+ tristate "SMB3 server support (EXPERIMENTAL)"
+ depends on INET
+ depends on MULTIUSER
+ depends on FILE_LOCKING
+ select NLS
+ select NLS_UTF8
+ select CRYPTO
+ select CRYPTO_MD4
+ select CRYPTO_MD5
+ select CRYPTO_HMAC
+ select CRYPTO_ECB
+ select CRYPTO_LIB_DES
+ select CRYPTO_SHA256
+ select CRYPTO_CMAC
+ select CRYPTO_SHA512
+ select CRYPTO_AEAD2
+ select CRYPTO_CCM
+ select CRYPTO_GCM
+ select ASN1
+ select OID_REGISTRY
+ default n
+ help
+ Choose Y here if you want to allow SMB3 compliant clients
+ to access files residing on this system using SMB3 protocol.
+ To compile the SMB3 server support as a module,
+ choose M here: the module will be called ksmbd.
+
+ You may choose to use a samba server instead, in which
+ case you can choose N here.
+
+ You also need to install user space programs which can be found
+ in ksmbd-tools, available from
+ https://github.com/cifsd-team/ksmbd-tools.
+ More detail about how to run the ksmbd kernel server is
+ available via README file
+ (https://github.com/cifsd-team/ksmbd-tools/blob/master/README).
+
+ ksmbd kernel server includes support for auto-negotiation,
+ Secure negotiate, Pre-authentication integrity, oplock/lease,
+ compound requests, multi-credit, packet signing, RDMA(smbdirect),
+ smb3 encryption, copy-offload, secure per-user session
+ establishment via NTLM or NTLMv2.
+
+config SMB_SERVER_SMBDIRECT
+ bool "Support for SMB Direct protocol"
+ depends on SMB_SERVER=m && INFINIBAND && INFINIBAND_ADDR_TRANS || SMB_SERVER=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y
+ select SG_POOL
+ default n
+
+ help
+ Enables SMB Direct support for SMB 3.0, 3.02 and 3.1.1.
+
+ SMB Direct allows transferring SMB packets over RDMA. If unsure,
+ say N.
+
+config SMB_SERVER_CHECK_CAP_NET_ADMIN
+ bool "Enable check network administration capability"
+ depends on SMB_SERVER
+ default y
+
+ help
+ Prevent unprivileged processes to start the ksmbd kernel server.
+
+config SMB_SERVER_KERBEROS5
+ bool "Support for Kerberos 5"
+ depends on SMB_SERVER
+ default n
diff --git a/fs/ksmbd/Makefile b/fs/ksmbd/Makefile
new file mode 100644
index 000000000000..7d6337a7dee4
--- /dev/null
+++ b/fs/ksmbd/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Makefile for Linux SMB3 kernel server
+#
+obj-$(CONFIG_SMB_SERVER) += ksmbd.o
+
+ksmbd-y := unicode.o auth.o vfs.o vfs_cache.o server.o ndr.o \
+ misc.o oplock.o connection.o ksmbd_work.o crypto_ctx.o \
+ mgmt/ksmbd_ida.o mgmt/user_config.o mgmt/share_config.o \
+ mgmt/tree_connect.o mgmt/user_session.o smb_common.o \
+ transport_tcp.o transport_ipc.o smbacl.o smb2pdu.o \
+ smb2ops.o smb2misc.o ksmbd_spnego_negtokeninit.asn1.o \
+ ksmbd_spnego_negtokentarg.asn1.o asn1.o
+
+$(obj)/asn1.o: $(obj)/ksmbd_spnego_negtokeninit.asn1.h $(obj)/ksmbd_spnego_negtokentarg.asn1.h
+
+$(obj)/ksmbd_spnego_negtokeninit.asn1.o: $(obj)/ksmbd_spnego_negtokeninit.asn1.c $(obj)/ksmbd_spnego_negtokeninit.asn1.h
+$(obj)/ksmbd_spnego_negtokentarg.asn1.o: $(obj)/ksmbd_spnego_negtokentarg.asn1.c $(obj)/ksmbd_spnego_negtokentarg.asn1.h
+
+ksmbd-$(CONFIG_SMB_SERVER_SMBDIRECT) += transport_rdma.o
diff --git a/fs/ksmbd/asn1.c b/fs/ksmbd/asn1.c
new file mode 100644
index 000000000000..b014f4638610
--- /dev/null
+++ b/fs/ksmbd/asn1.c
@@ -0,0 +1,343 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * The ASB.1/BER parsing code is derived from ip_nat_snmp_basic.c which was in
+ * turn derived from the gxsnmp package by Gregory McLean & Jochen Friedrich
+ *
+ * Copyright (c) 2000 RP Internet (www.rpi.net.au).
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/oid_registry.h>
+
+#include "glob.h"
+
+#include "asn1.h"
+#include "connection.h"
+#include "auth.h"
+#include "ksmbd_spnego_negtokeninit.asn1.h"
+#include "ksmbd_spnego_negtokentarg.asn1.h"
+
+#define SPNEGO_OID_LEN 7
+#define NTLMSSP_OID_LEN 10
+#define KRB5_OID_LEN 7
+#define KRB5U2U_OID_LEN 8
+#define MSKRB5_OID_LEN 7
+static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 };
+static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 };
+static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 };
+static unsigned long KRB5U2U_OID[8] = { 1, 2, 840, 113554, 1, 2, 2, 3 };
+static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 };
+
+static char NTLMSSP_OID_STR[NTLMSSP_OID_LEN] = { 0x2b, 0x06, 0x01, 0x04, 0x01,
+ 0x82, 0x37, 0x02, 0x02, 0x0a };
+
+static bool
+asn1_subid_decode(const unsigned char **begin, const unsigned char *end,
+ unsigned long *subid)
+{
+ const unsigned char *ptr = *begin;
+ unsigned char ch;
+
+ *subid = 0;
+
+ do {
+ if (ptr >= end)
+ return false;
+
+ ch = *ptr++;
+ *subid <<= 7;
+ *subid |= ch & 0x7F;
+ } while ((ch & 0x80) == 0x80);
+
+ *begin = ptr;
+ return true;
+}
+
+static bool asn1_oid_decode(const unsigned char *value, size_t vlen,
+ unsigned long **oid, size_t *oidlen)
+{
+ const unsigned char *iptr = value, *end = value + vlen;
+ unsigned long *optr;
+ unsigned long subid;
+
+ vlen += 1;
+ if (vlen < 2 || vlen > UINT_MAX / sizeof(unsigned long))
+ goto fail_nullify;
+
+ *oid = kmalloc(vlen * sizeof(unsigned long), GFP_KERNEL);
+ if (!*oid)
+ return false;
+
+ optr = *oid;
+
+ if (!asn1_subid_decode(&iptr, end, &subid))
+ goto fail;
+
+ if (subid < 40) {
+ optr[0] = 0;
+ optr[1] = subid;
+ } else if (subid < 80) {
+ optr[0] = 1;
+ optr[1] = subid - 40;
+ } else {
+ optr[0] = 2;
+ optr[1] = subid - 80;
+ }
+
+ *oidlen = 2;
+ optr += 2;
+
+ while (iptr < end) {
+ if (++(*oidlen) > vlen)
+ goto fail;
+
+ if (!asn1_subid_decode(&iptr, end, optr++))
+ goto fail;
+ }
+ return true;
+
+fail:
+ kfree(*oid);
+fail_nullify:
+ *oid = NULL;
+ return false;
+}
+
+static bool oid_eq(unsigned long *oid1, unsigned int oid1len,
+ unsigned long *oid2, unsigned int oid2len)
+{
+ if (oid1len != oid2len)
+ return false;
+
+ return memcmp(oid1, oid2, oid1len) == 0;
+}
+
+int
+ksmbd_decode_negTokenInit(unsigned char *security_blob, int length,
+ struct ksmbd_conn *conn)
+{
+ return asn1_ber_decoder(&ksmbd_spnego_negtokeninit_decoder, conn,
+ security_blob, length);
+}
+
+int
+ksmbd_decode_negTokenTarg(unsigned char *security_blob, int length,
+ struct ksmbd_conn *conn)
+{
+ return asn1_ber_decoder(&ksmbd_spnego_negtokentarg_decoder, conn,
+ security_blob, length);
+}
+
+static int compute_asn_hdr_len_bytes(int len)
+{
+ if (len > 0xFFFFFF)
+ return 4;
+ else if (len > 0xFFFF)
+ return 3;
+ else if (len > 0xFF)
+ return 2;
+ else if (len > 0x7F)
+ return 1;
+ else
+ return 0;
+}
+
+static void encode_asn_tag(char *buf, unsigned int *ofs, char tag, char seq,
+ int length)
+{
+ int i;
+ int index = *ofs;
+ char hdr_len = compute_asn_hdr_len_bytes(length);
+ int len = length + 2 + hdr_len;
+
+ /* insert tag */
+ buf[index++] = tag;
+
+ if (!hdr_len) {
+ buf[index++] = len;
+ } else {
+ buf[index++] = 0x80 | hdr_len;
+ for (i = hdr_len - 1; i >= 0; i--)
+ buf[index++] = (len >> (i * 8)) & 0xFF;
+ }
+
+ /* insert seq */
+ len = len - (index - *ofs);
+ buf[index++] = seq;
+
+ if (!hdr_len) {
+ buf[index++] = len;
+ } else {
+ buf[index++] = 0x80 | hdr_len;
+ for (i = hdr_len - 1; i >= 0; i--)
+ buf[index++] = (len >> (i * 8)) & 0xFF;
+ }
+
+ *ofs += (index - *ofs);
+}
+
+int build_spnego_ntlmssp_neg_blob(unsigned char **pbuffer, u16 *buflen,
+ char *ntlm_blob, int ntlm_blob_len)
+{
+ char *buf;
+ unsigned int ofs = 0;
+ int neg_result_len = 4 + compute_asn_hdr_len_bytes(1) * 2 + 1;
+ int oid_len = 4 + compute_asn_hdr_len_bytes(NTLMSSP_OID_LEN) * 2 +
+ NTLMSSP_OID_LEN;
+ int ntlmssp_len = 4 + compute_asn_hdr_len_bytes(ntlm_blob_len) * 2 +
+ ntlm_blob_len;
+ int total_len = 4 + compute_asn_hdr_len_bytes(neg_result_len +
+ oid_len + ntlmssp_len) * 2 +
+ neg_result_len + oid_len + ntlmssp_len;
+
+ buf = kmalloc(total_len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /* insert main gss header */
+ encode_asn_tag(buf, &ofs, 0xa1, 0x30, neg_result_len + oid_len +
+ ntlmssp_len);
+
+ /* insert neg result */
+ encode_asn_tag(buf, &ofs, 0xa0, 0x0a, 1);
+ buf[ofs++] = 1;
+
+ /* insert oid */
+ encode_asn_tag(buf, &ofs, 0xa1, 0x06, NTLMSSP_OID_LEN);
+ memcpy(buf + ofs, NTLMSSP_OID_STR, NTLMSSP_OID_LEN);
+ ofs += NTLMSSP_OID_LEN;
+
+ /* insert response token - ntlmssp blob */
+ encode_asn_tag(buf, &ofs, 0xa2, 0x04, ntlm_blob_len);
+ memcpy(buf + ofs, ntlm_blob, ntlm_blob_len);
+ ofs += ntlm_blob_len;
+
+ *pbuffer = buf;
+ *buflen = total_len;
+ return 0;
+}
+
+int build_spnego_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
+ int neg_result)
+{
+ char *buf;
+ unsigned int ofs = 0;
+ int neg_result_len = 4 + compute_asn_hdr_len_bytes(1) * 2 + 1;
+ int total_len = 4 + compute_asn_hdr_len_bytes(neg_result_len) * 2 +
+ neg_result_len;
+
+ buf = kmalloc(total_len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /* insert main gss header */
+ encode_asn_tag(buf, &ofs, 0xa1, 0x30, neg_result_len);
+
+ /* insert neg result */
+ encode_asn_tag(buf, &ofs, 0xa0, 0x0a, 1);
+ if (neg_result)
+ buf[ofs++] = 2;
+ else
+ buf[ofs++] = 0;
+
+ *pbuffer = buf;
+ *buflen = total_len;
+ return 0;
+}
+
+int ksmbd_gssapi_this_mech(void *context, size_t hdrlen, unsigned char tag,
+ const void *value, size_t vlen)
+{
+ unsigned long *oid;
+ size_t oidlen;
+ int err = 0;
+
+ if (!asn1_oid_decode(value, vlen, &oid, &oidlen)) {
+ err = -EBADMSG;
+ goto out;
+ }
+
+ if (!oid_eq(oid, oidlen, SPNEGO_OID, SPNEGO_OID_LEN))
+ err = -EBADMSG;
+ kfree(oid);
+out:
+ if (err) {
+ char buf[50];
+
+ sprint_oid(value, vlen, buf, sizeof(buf));
+ ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
+ }
+ return err;
+}
+
+int ksmbd_neg_token_init_mech_type(void *context, size_t hdrlen,
+ unsigned char tag, const void *value,
+ size_t vlen)
+{
+ struct ksmbd_conn *conn = context;
+ unsigned long *oid;
+ size_t oidlen;
+ int mech_type;
+ char buf[50];
+
+ if (!asn1_oid_decode(value, vlen, &oid, &oidlen))
+ goto fail;
+
+ if (oid_eq(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN))
+ mech_type = KSMBD_AUTH_NTLMSSP;
+ else if (oid_eq(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN))
+ mech_type = KSMBD_AUTH_MSKRB5;
+ else if (oid_eq(oid, oidlen, KRB5_OID, KRB5_OID_LEN))
+ mech_type = KSMBD_AUTH_KRB5;
+ else if (oid_eq(oid, oidlen, KRB5U2U_OID, KRB5U2U_OID_LEN))
+ mech_type = KSMBD_AUTH_KRB5U2U;
+ else
+ goto fail;
+
+ conn->auth_mechs |= mech_type;
+ if (conn->preferred_auth_mech == 0)
+ conn->preferred_auth_mech = mech_type;
+
+ kfree(oid);
+ return 0;
+
+fail:
+ kfree(oid);
+ sprint_oid(value, vlen, buf, sizeof(buf));
+ ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
+ return -EBADMSG;
+}
+
+int ksmbd_neg_token_init_mech_token(void *context, size_t hdrlen,
+ unsigned char tag, const void *value,
+ size_t vlen)
+{
+ struct ksmbd_conn *conn = context;
+
+ conn->mechToken = kmalloc(vlen + 1, GFP_KERNEL);
+ if (!conn->mechToken)
+ return -ENOMEM;
+
+ memcpy(conn->mechToken, value, vlen);
+ conn->mechToken[vlen] = '\0';
+ return 0;
+}
+
+int ksmbd_neg_token_targ_resp_token(void *context, size_t hdrlen,
+ unsigned char tag, const void *value,
+ size_t vlen)
+{
+ struct ksmbd_conn *conn = context;
+
+ conn->mechToken = kmalloc(vlen + 1, GFP_KERNEL);
+ if (!conn->mechToken)
+ return -ENOMEM;
+
+ memcpy(conn->mechToken, value, vlen);
+ conn->mechToken[vlen] = '\0';
+ return 0;
+}
diff --git a/fs/ksmbd/asn1.h b/fs/ksmbd/asn1.h
new file mode 100644
index 000000000000..ce105f4ce305
--- /dev/null
+++ b/fs/ksmbd/asn1.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * The ASB.1/BER parsing code is derived from ip_nat_snmp_basic.c which was in
+ * turn derived from the gxsnmp package by Gregory McLean & Jochen Friedrich
+ *
+ * Copyright (c) 2000 RP Internet (www.rpi.net.au).
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __ASN1_H__
+#define __ASN1_H__
+
+int ksmbd_decode_negTokenInit(unsigned char *security_blob, int length,
+ struct ksmbd_conn *conn);
+int ksmbd_decode_negTokenTarg(unsigned char *security_blob, int length,
+ struct ksmbd_conn *conn);
+int build_spnego_ntlmssp_neg_blob(unsigned char **pbuffer, u16 *buflen,
+ char *ntlm_blob, int ntlm_blob_len);
+int build_spnego_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
+ int neg_result);
+#endif /* __ASN1_H__ */
diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c
new file mode 100644
index 000000000000..de36f12070bf
--- /dev/null
+++ b/fs/ksmbd/auth.c
@@ -0,0 +1,1364 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/backing-dev.h>
+#include <linux/writeback.h>
+#include <linux/uio.h>
+#include <linux/xattr.h>
+#include <crypto/hash.h>
+#include <crypto/aead.h>
+#include <linux/random.h>
+#include <linux/scatterlist.h>
+
+#include "auth.h"
+#include "glob.h"
+
+#include <linux/fips.h>
+#include <crypto/des.h>
+
+#include "server.h"
+#include "smb_common.h"
+#include "connection.h"
+#include "mgmt/user_session.h"
+#include "mgmt/user_config.h"
+#include "crypto_ctx.h"
+#include "transport_ipc.h"
+
+/*
+ * Fixed format data defining GSS header and fixed string
+ * "not_defined_in_RFC4178@please_ignore".
+ * So sec blob data in neg phase could be generated statically.
+ */
+static char NEGOTIATE_GSS_HEADER[AUTH_GSS_LENGTH] = {
+#ifdef CONFIG_SMB_SERVER_KERBEROS5
+ 0x60, 0x5e, 0x06, 0x06, 0x2b, 0x06, 0x01, 0x05,
+ 0x05, 0x02, 0xa0, 0x54, 0x30, 0x52, 0xa0, 0x24,
+ 0x30, 0x22, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86,
+ 0xf7, 0x12, 0x01, 0x02, 0x02, 0x06, 0x09, 0x2a,
+ 0x86, 0x48, 0x82, 0xf7, 0x12, 0x01, 0x02, 0x02,
+ 0x06, 0x0a, 0x2b, 0x06, 0x01, 0x04, 0x01, 0x82,
+ 0x37, 0x02, 0x02, 0x0a, 0xa3, 0x2a, 0x30, 0x28,
+ 0xa0, 0x26, 0x1b, 0x24, 0x6e, 0x6f, 0x74, 0x5f,
+ 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x64, 0x5f,
+ 0x69, 0x6e, 0x5f, 0x52, 0x46, 0x43, 0x34, 0x31,
+ 0x37, 0x38, 0x40, 0x70, 0x6c, 0x65, 0x61, 0x73,
+ 0x65, 0x5f, 0x69, 0x67, 0x6e, 0x6f, 0x72, 0x65
+#else
+ 0x60, 0x48, 0x06, 0x06, 0x2b, 0x06, 0x01, 0x05,
+ 0x05, 0x02, 0xa0, 0x3e, 0x30, 0x3c, 0xa0, 0x0e,
+ 0x30, 0x0c, 0x06, 0x0a, 0x2b, 0x06, 0x01, 0x04,
+ 0x01, 0x82, 0x37, 0x02, 0x02, 0x0a, 0xa3, 0x2a,
+ 0x30, 0x28, 0xa0, 0x26, 0x1b, 0x24, 0x6e, 0x6f,
+ 0x74, 0x5f, 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65,
+ 0x64, 0x5f, 0x69, 0x6e, 0x5f, 0x52, 0x46, 0x43,
+ 0x34, 0x31, 0x37, 0x38, 0x40, 0x70, 0x6c, 0x65,
+ 0x61, 0x73, 0x65, 0x5f, 0x69, 0x67, 0x6e, 0x6f,
+ 0x72, 0x65
+#endif
+};
+
+void ksmbd_copy_gss_neg_header(void *buf)
+{
+ memcpy(buf, NEGOTIATE_GSS_HEADER, AUTH_GSS_LENGTH);
+}
+
+static void
+str_to_key(unsigned char *str, unsigned char *key)
+{
+ int i;
+
+ key[0] = str[0] >> 1;
+ key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
+ key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
+ key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
+ key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
+ key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
+ key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
+ key[7] = str[6] & 0x7F;
+ for (i = 0; i < 8; i++)
+ key[i] = (key[i] << 1);
+}
+
+static int
+smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
+{
+ unsigned char key2[8];
+ struct des_ctx ctx;
+
+ if (fips_enabled) {
+ ksmbd_debug(AUTH, "FIPS compliance enabled: DES not permitted\n");
+ return -ENOENT;
+ }
+
+ str_to_key(key, key2);
+ des_expand_key(&ctx, key2, DES_KEY_SIZE);
+ des_encrypt(&ctx, out, in);
+ memzero_explicit(&ctx, sizeof(ctx));
+ return 0;
+}
+
+static int ksmbd_enc_p24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
+{
+ int rc;
+
+ rc = smbhash(p24, c8, p21);
+ if (rc)
+ return rc;
+ rc = smbhash(p24 + 8, c8, p21 + 7);
+ if (rc)
+ return rc;
+ return smbhash(p24 + 16, c8, p21 + 14);
+}
+
+/* produce a md4 message digest from data of length n bytes */
+static int ksmbd_enc_md4(unsigned char *md4_hash, unsigned char *link_str,
+ int link_len)
+{
+ int rc;
+ struct ksmbd_crypto_ctx *ctx;
+
+ ctx = ksmbd_crypto_ctx_find_md4();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "Crypto md4 allocation error\n");
+ return -ENOMEM;
+ }
+
+ rc = crypto_shash_init(CRYPTO_MD4(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not init md4 shash\n");
+ goto out;
+ }
+
+ rc = crypto_shash_update(CRYPTO_MD4(ctx), link_str, link_len);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not update with link_str\n");
+ goto out;
+ }
+
+ rc = crypto_shash_final(CRYPTO_MD4(ctx), md4_hash);
+ if (rc)
+ ksmbd_debug(AUTH, "Could not generate md4 hash\n");
+out:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
+
+static int ksmbd_enc_update_sess_key(unsigned char *md5_hash, char *nonce,
+ char *server_challenge, int len)
+{
+ int rc;
+ struct ksmbd_crypto_ctx *ctx;
+
+ ctx = ksmbd_crypto_ctx_find_md5();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "Crypto md5 allocation error\n");
+ return -ENOMEM;
+ }
+
+ rc = crypto_shash_init(CRYPTO_MD5(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not init md5 shash\n");
+ goto out;
+ }
+
+ rc = crypto_shash_update(CRYPTO_MD5(ctx), server_challenge, len);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not update with challenge\n");
+ goto out;
+ }
+
+ rc = crypto_shash_update(CRYPTO_MD5(ctx), nonce, len);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not update with nonce\n");
+ goto out;
+ }
+
+ rc = crypto_shash_final(CRYPTO_MD5(ctx), md5_hash);
+ if (rc)
+ ksmbd_debug(AUTH, "Could not generate md5 hash\n");
+out:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
+
+/**
+ * ksmbd_gen_sess_key() - function to generate session key
+ * @sess: session of connection
+ * @hash: source hash value to be used for find session key
+ * @hmac: source hmac value to be used for finding session key
+ *
+ */
+static int ksmbd_gen_sess_key(struct ksmbd_session *sess, char *hash,
+ char *hmac)
+{
+ struct ksmbd_crypto_ctx *ctx;
+ int rc;
+
+ ctx = ksmbd_crypto_ctx_find_hmacmd5();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "could not crypto alloc hmacmd5\n");
+ return -ENOMEM;
+ }
+
+ rc = crypto_shash_setkey(CRYPTO_HMACMD5_TFM(ctx),
+ hash,
+ CIFS_HMAC_MD5_HASH_SIZE);
+ if (rc) {
+ ksmbd_debug(AUTH, "hmacmd5 set key fail error %d\n", rc);
+ goto out;
+ }
+
+ rc = crypto_shash_init(CRYPTO_HMACMD5(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "could not init hmacmd5 error %d\n", rc);
+ goto out;
+ }
+
+ rc = crypto_shash_update(CRYPTO_HMACMD5(ctx),
+ hmac,
+ SMB2_NTLMV2_SESSKEY_SIZE);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not update with response error %d\n", rc);
+ goto out;
+ }
+
+ rc = crypto_shash_final(CRYPTO_HMACMD5(ctx), sess->sess_key);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not generate hmacmd5 hash error %d\n", rc);
+ goto out;
+ }
+
+out:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
+
+static int calc_ntlmv2_hash(struct ksmbd_session *sess, char *ntlmv2_hash,
+ char *dname)
+{
+ int ret, len, conv_len;
+ wchar_t *domain = NULL;
+ __le16 *uniname = NULL;
+ struct ksmbd_crypto_ctx *ctx;
+
+ ctx = ksmbd_crypto_ctx_find_hmacmd5();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "can't generate ntlmv2 hash\n");
+ return -ENOMEM;
+ }
+
+ ret = crypto_shash_setkey(CRYPTO_HMACMD5_TFM(ctx),
+ user_passkey(sess->user),
+ CIFS_ENCPWD_SIZE);
+ if (ret) {
+ ksmbd_debug(AUTH, "Could not set NT Hash as a key\n");
+ goto out;
+ }
+
+ ret = crypto_shash_init(CRYPTO_HMACMD5(ctx));
+ if (ret) {
+ ksmbd_debug(AUTH, "could not init hmacmd5\n");
+ goto out;
+ }
+
+ /* convert user_name to unicode */
+ len = strlen(user_name(sess->user));
+ uniname = kzalloc(2 + UNICODE_LEN(len), GFP_KERNEL);
+ if (!uniname) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ conv_len = smb_strtoUTF16(uniname, user_name(sess->user), len,
+ sess->conn->local_nls);
+ if (conv_len < 0 || conv_len > len) {
+ ret = -EINVAL;
+ goto out;
+ }
+ UniStrupr(uniname);
+
+ ret = crypto_shash_update(CRYPTO_HMACMD5(ctx),
+ (char *)uniname,
+ UNICODE_LEN(conv_len));
+ if (ret) {
+ ksmbd_debug(AUTH, "Could not update with user\n");
+ goto out;
+ }
+
+ /* Convert domain name or conn name to unicode and uppercase */
+ len = strlen(dname);
+ domain = kzalloc(2 + UNICODE_LEN(len), GFP_KERNEL);
+ if (!domain) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ conv_len = smb_strtoUTF16((__le16 *)domain, dname, len,
+ sess->conn->local_nls);
+ if (conv_len < 0 || conv_len > len) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = crypto_shash_update(CRYPTO_HMACMD5(ctx),
+ (char *)domain,
+ UNICODE_LEN(conv_len));
+ if (ret) {
+ ksmbd_debug(AUTH, "Could not update with domain\n");
+ goto out;
+ }
+
+ ret = crypto_shash_final(CRYPTO_HMACMD5(ctx), ntlmv2_hash);
+ if (ret)
+ ksmbd_debug(AUTH, "Could not generate md5 hash\n");
+out:
+ kfree(uniname);
+ kfree(domain);
+ ksmbd_release_crypto_ctx(ctx);
+ return ret;
+}
+
+/**
+ * ksmbd_auth_ntlm() - NTLM authentication handler
+ * @sess: session of connection
+ * @pw_buf: NTLM challenge response
+ * @passkey: user password
+ *
+ * Return: 0 on success, error number on error
+ */
+int ksmbd_auth_ntlm(struct ksmbd_session *sess, char *pw_buf)
+{
+ int rc;
+ unsigned char p21[21];
+ char key[CIFS_AUTH_RESP_SIZE];
+
+ memset(p21, '\0', 21);
+ memcpy(p21, user_passkey(sess->user), CIFS_NTHASH_SIZE);
+ rc = ksmbd_enc_p24(p21, sess->ntlmssp.cryptkey, key);
+ if (rc) {
+ pr_err("password processing failed\n");
+ return rc;
+ }
+
+ ksmbd_enc_md4(sess->sess_key, user_passkey(sess->user),
+ CIFS_SMB1_SESSKEY_SIZE);
+ memcpy(sess->sess_key + CIFS_SMB1_SESSKEY_SIZE, key,
+ CIFS_AUTH_RESP_SIZE);
+ sess->sequence_number = 1;
+
+ if (strncmp(pw_buf, key, CIFS_AUTH_RESP_SIZE) != 0) {
+ ksmbd_debug(AUTH, "ntlmv1 authentication failed\n");
+ return -EINVAL;
+ }
+
+ ksmbd_debug(AUTH, "ntlmv1 authentication pass\n");
+ return 0;
+}
+
+/**
+ * ksmbd_auth_ntlmv2() - NTLMv2 authentication handler
+ * @sess: session of connection
+ * @ntlmv2: NTLMv2 challenge response
+ * @blen: NTLMv2 blob length
+ * @domain_name: domain name
+ *
+ * Return: 0 on success, error number on error
+ */
+int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
+ int blen, char *domain_name)
+{
+ char ntlmv2_hash[CIFS_ENCPWD_SIZE];
+ char ntlmv2_rsp[CIFS_HMAC_MD5_HASH_SIZE];
+ struct ksmbd_crypto_ctx *ctx;
+ char *construct = NULL;
+ int rc, len;
+
+ ctx = ksmbd_crypto_ctx_find_hmacmd5();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "could not crypto alloc hmacmd5\n");
+ return -ENOMEM;
+ }
+
+ rc = calc_ntlmv2_hash(sess, ntlmv2_hash, domain_name);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not get v2 hash rc %d\n", rc);
+ goto out;
+ }
+
+ rc = crypto_shash_setkey(CRYPTO_HMACMD5_TFM(ctx),
+ ntlmv2_hash,
+ CIFS_HMAC_MD5_HASH_SIZE);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not set NTLMV2 Hash as a key\n");
+ goto out;
+ }
+
+ rc = crypto_shash_init(CRYPTO_HMACMD5(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not init hmacmd5\n");
+ goto out;
+ }
+
+ len = CIFS_CRYPTO_KEY_SIZE + blen;
+ construct = kzalloc(len, GFP_KERNEL);
+ if (!construct) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ memcpy(construct, sess->ntlmssp.cryptkey, CIFS_CRYPTO_KEY_SIZE);
+ memcpy(construct + CIFS_CRYPTO_KEY_SIZE, &ntlmv2->blob_signature, blen);
+
+ rc = crypto_shash_update(CRYPTO_HMACMD5(ctx), construct, len);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not update with response\n");
+ goto out;
+ }
+
+ rc = crypto_shash_final(CRYPTO_HMACMD5(ctx), ntlmv2_rsp);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not generate md5 hash\n");
+ goto out;
+ }
+
+ rc = ksmbd_gen_sess_key(sess, ntlmv2_hash, ntlmv2_rsp);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not generate sess key\n");
+ goto out;
+ }
+
+ if (memcmp(ntlmv2->ntlmv2_hash, ntlmv2_rsp, CIFS_HMAC_MD5_HASH_SIZE) != 0)
+ rc = -EINVAL;
+out:
+ ksmbd_release_crypto_ctx(ctx);
+ kfree(construct);
+ return rc;
+}
+
+/**
+ * __ksmbd_auth_ntlmv2() - NTLM2(extended security) authentication handler
+ * @sess: session of connection
+ * @client_nonce: client nonce from LM response.
+ * @ntlm_resp: ntlm response data from client.
+ *
+ * Return: 0 on success, error number on error
+ */
+static int __ksmbd_auth_ntlmv2(struct ksmbd_session *sess, char *client_nonce,
+ char *ntlm_resp)
+{
+ char sess_key[CIFS_SMB1_SESSKEY_SIZE] = {0};
+ int rc;
+ unsigned char p21[21];
+ char key[CIFS_AUTH_RESP_SIZE];
+
+ rc = ksmbd_enc_update_sess_key(sess_key,
+ client_nonce,
+ (char *)sess->ntlmssp.cryptkey, 8);
+ if (rc) {
+ pr_err("password processing failed\n");
+ goto out;
+ }
+
+ memset(p21, '\0', 21);
+ memcpy(p21, user_passkey(sess->user), CIFS_NTHASH_SIZE);
+ rc = ksmbd_enc_p24(p21, sess_key, key);
+ if (rc) {
+ pr_err("password processing failed\n");
+ goto out;
+ }
+
+ if (memcmp(ntlm_resp, key, CIFS_AUTH_RESP_SIZE) != 0)
+ rc = -EINVAL;
+out:
+ return rc;
+}
+
+/**
+ * ksmbd_decode_ntlmssp_auth_blob() - helper function to construct
+ * authenticate blob
+ * @authblob: authenticate blob source pointer
+ * @usr: user details
+ * @sess: session of connection
+ *
+ * Return: 0 on success, error number on error
+ */
+int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
+ int blob_len, struct ksmbd_session *sess)
+{
+ char *domain_name;
+ unsigned int lm_off, nt_off;
+ unsigned short nt_len;
+ int ret;
+
+ if (blob_len < sizeof(struct authenticate_message)) {
+ ksmbd_debug(AUTH, "negotiate blob len %d too small\n",
+ blob_len);
+ return -EINVAL;
+ }
+
+ if (memcmp(authblob->Signature, "NTLMSSP", 8)) {
+ ksmbd_debug(AUTH, "blob signature incorrect %s\n",
+ authblob->Signature);
+ return -EINVAL;
+ }
+
+ lm_off = le32_to_cpu(authblob->LmChallengeResponse.BufferOffset);
+ nt_off = le32_to_cpu(authblob->NtChallengeResponse.BufferOffset);
+ nt_len = le16_to_cpu(authblob->NtChallengeResponse.Length);
+
+ /* process NTLM authentication */
+ if (nt_len == CIFS_AUTH_RESP_SIZE) {
+ if (le32_to_cpu(authblob->NegotiateFlags) &
+ NTLMSSP_NEGOTIATE_EXTENDED_SEC)
+ return __ksmbd_auth_ntlmv2(sess, (char *)authblob +
+ lm_off, (char *)authblob + nt_off);
+ else
+ return ksmbd_auth_ntlm(sess, (char *)authblob +
+ nt_off);
+ }
+
+ /* TODO : use domain name that imported from configuration file */
+ domain_name = smb_strndup_from_utf16((const char *)authblob +
+ le32_to_cpu(authblob->DomainName.BufferOffset),
+ le16_to_cpu(authblob->DomainName.Length), true,
+ sess->conn->local_nls);
+ if (IS_ERR(domain_name))
+ return PTR_ERR(domain_name);
+
+ /* process NTLMv2 authentication */
+ ksmbd_debug(AUTH, "decode_ntlmssp_authenticate_blob dname%s\n",
+ domain_name);
+ ret = ksmbd_auth_ntlmv2(sess, (struct ntlmv2_resp *)((char *)authblob + nt_off),
+ nt_len - CIFS_ENCPWD_SIZE,
+ domain_name);
+ kfree(domain_name);
+ return ret;
+}
+
+/**
+ * ksmbd_decode_ntlmssp_neg_blob() - helper function to construct
+ * negotiate blob
+ * @negblob: negotiate blob source pointer
+ * @rsp: response header pointer to be updated
+ * @sess: session of connection
+ *
+ */
+int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
+ int blob_len, struct ksmbd_session *sess)
+{
+ if (blob_len < sizeof(struct negotiate_message)) {
+ ksmbd_debug(AUTH, "negotiate blob len %d too small\n",
+ blob_len);
+ return -EINVAL;
+ }
+
+ if (memcmp(negblob->Signature, "NTLMSSP", 8)) {
+ ksmbd_debug(AUTH, "blob signature incorrect %s\n",
+ negblob->Signature);
+ return -EINVAL;
+ }
+
+ sess->ntlmssp.client_flags = le32_to_cpu(negblob->NegotiateFlags);
+ return 0;
+}
+
+/**
+ * ksmbd_build_ntlmssp_challenge_blob() - helper function to construct
+ * challenge blob
+ * @chgblob: challenge blob source pointer to initialize
+ * @rsp: response header pointer to be updated
+ * @sess: session of connection
+ *
+ */
+unsigned int
+ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
+ struct ksmbd_session *sess)
+{
+ struct target_info *tinfo;
+ wchar_t *name;
+ __u8 *target_name;
+ unsigned int flags, blob_off, blob_len, type, target_info_len = 0;
+ int len, uni_len, conv_len;
+ int cflags = sess->ntlmssp.client_flags;
+
+ memcpy(chgblob->Signature, NTLMSSP_SIGNATURE, 8);
+ chgblob->MessageType = NtLmChallenge;
+
+ flags = NTLMSSP_NEGOTIATE_UNICODE |
+ NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_TARGET_TYPE_SERVER |
+ NTLMSSP_NEGOTIATE_TARGET_INFO;
+
+ if (cflags & NTLMSSP_NEGOTIATE_SIGN) {
+ flags |= NTLMSSP_NEGOTIATE_SIGN;
+ flags |= cflags & (NTLMSSP_NEGOTIATE_128 |
+ NTLMSSP_NEGOTIATE_56);
+ }
+
+ if (cflags & NTLMSSP_NEGOTIATE_ALWAYS_SIGN)
+ flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
+
+ if (cflags & NTLMSSP_REQUEST_TARGET)
+ flags |= NTLMSSP_REQUEST_TARGET;
+
+ if (sess->conn->use_spnego &&
+ (cflags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
+ flags |= NTLMSSP_NEGOTIATE_EXTENDED_SEC;
+
+ chgblob->NegotiateFlags = cpu_to_le32(flags);
+ len = strlen(ksmbd_netbios_name());
+ name = kmalloc(2 + UNICODE_LEN(len), GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ conv_len = smb_strtoUTF16((__le16 *)name, ksmbd_netbios_name(), len,
+ sess->conn->local_nls);
+ if (conv_len < 0 || conv_len > len) {
+ kfree(name);
+ return -EINVAL;
+ }
+
+ uni_len = UNICODE_LEN(conv_len);
+
+ blob_off = sizeof(struct challenge_message);
+ blob_len = blob_off + uni_len;
+
+ chgblob->TargetName.Length = cpu_to_le16(uni_len);
+ chgblob->TargetName.MaximumLength = cpu_to_le16(uni_len);
+ chgblob->TargetName.BufferOffset = cpu_to_le32(blob_off);
+
+ /* Initialize random conn challenge */
+ get_random_bytes(sess->ntlmssp.cryptkey, sizeof(__u64));
+ memcpy(chgblob->Challenge, sess->ntlmssp.cryptkey,
+ CIFS_CRYPTO_KEY_SIZE);
+
+ /* Add Target Information to security buffer */
+ chgblob->TargetInfoArray.BufferOffset = cpu_to_le32(blob_len);
+
+ target_name = (__u8 *)chgblob + blob_off;
+ memcpy(target_name, name, uni_len);
+ tinfo = (struct target_info *)(target_name + uni_len);
+
+ chgblob->TargetInfoArray.Length = 0;
+ /* Add target info list for NetBIOS/DNS settings */
+ for (type = NTLMSSP_AV_NB_COMPUTER_NAME;
+ type <= NTLMSSP_AV_DNS_DOMAIN_NAME; type++) {
+ tinfo->Type = cpu_to_le16(type);
+ tinfo->Length = cpu_to_le16(uni_len);
+ memcpy(tinfo->Content, name, uni_len);
+ tinfo = (struct target_info *)((char *)tinfo + 4 + uni_len);
+ target_info_len += 4 + uni_len;
+ }
+
+ /* Add terminator subblock */
+ tinfo->Type = 0;
+ tinfo->Length = 0;
+ target_info_len += 4;
+
+ chgblob->TargetInfoArray.Length = cpu_to_le16(target_info_len);
+ chgblob->TargetInfoArray.MaximumLength = cpu_to_le16(target_info_len);
+ blob_len += target_info_len;
+ kfree(name);
+ ksmbd_debug(AUTH, "NTLMSSP SecurityBufferLength %d\n", blob_len);
+ return blob_len;
+}
+
+#ifdef CONFIG_SMB_SERVER_KERBEROS5
+int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob,
+ int in_len, char *out_blob, int *out_len)
+{
+ struct ksmbd_spnego_authen_response *resp;
+ struct ksmbd_user *user = NULL;
+ int retval;
+
+ resp = ksmbd_ipc_spnego_authen_request(in_blob, in_len);
+ if (!resp) {
+ ksmbd_debug(AUTH, "SPNEGO_AUTHEN_REQUEST failure\n");
+ return -EINVAL;
+ }
+
+ if (!(resp->login_response.status & KSMBD_USER_FLAG_OK)) {
+ ksmbd_debug(AUTH, "krb5 authentication failure\n");
+ retval = -EPERM;
+ goto out;
+ }
+
+ if (*out_len <= resp->spnego_blob_len) {
+ ksmbd_debug(AUTH, "buf len %d, but blob len %d\n",
+ *out_len, resp->spnego_blob_len);
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (resp->session_key_len > sizeof(sess->sess_key)) {
+ ksmbd_debug(AUTH, "session key is too long\n");
+ retval = -EINVAL;
+ goto out;
+ }
+
+ user = ksmbd_alloc_user(&resp->login_response);
+ if (!user) {
+ ksmbd_debug(AUTH, "login failure\n");
+ retval = -ENOMEM;
+ goto out;
+ }
+ sess->user = user;
+
+ memcpy(sess->sess_key, resp->payload, resp->session_key_len);
+ memcpy(out_blob, resp->payload + resp->session_key_len,
+ resp->spnego_blob_len);
+ *out_len = resp->spnego_blob_len;
+ retval = 0;
+out:
+ kvfree(resp);
+ return retval;
+}
+#else
+int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob,
+ int in_len, char *out_blob, int *out_len)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+/**
+ * ksmbd_sign_smb2_pdu() - function to generate packet signing
+ * @conn: connection
+ * @key: signing key
+ * @iov: buffer iov array
+ * @n_vec: number of iovecs
+ * @sig: signature value generated for client request packet
+ *
+ */
+int ksmbd_sign_smb2_pdu(struct ksmbd_conn *conn, char *key, struct kvec *iov,
+ int n_vec, char *sig)
+{
+ struct ksmbd_crypto_ctx *ctx;
+ int rc, i;
+
+ ctx = ksmbd_crypto_ctx_find_hmacsha256();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "could not crypto alloc hmacmd5\n");
+ return -ENOMEM;
+ }
+
+ rc = crypto_shash_setkey(CRYPTO_HMACSHA256_TFM(ctx),
+ key,
+ SMB2_NTLMV2_SESSKEY_SIZE);
+ if (rc)
+ goto out;
+
+ rc = crypto_shash_init(CRYPTO_HMACSHA256(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "hmacsha256 init error %d\n", rc);
+ goto out;
+ }
+
+ for (i = 0; i < n_vec; i++) {
+ rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx),
+ iov[i].iov_base,
+ iov[i].iov_len);
+ if (rc) {
+ ksmbd_debug(AUTH, "hmacsha256 update error %d\n", rc);
+ goto out;
+ }
+ }
+
+ rc = crypto_shash_final(CRYPTO_HMACSHA256(ctx), sig);
+ if (rc)
+ ksmbd_debug(AUTH, "hmacsha256 generation error %d\n", rc);
+out:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
+
+/**
+ * ksmbd_sign_smb3_pdu() - function to generate packet signing
+ * @conn: connection
+ * @key: signing key
+ * @iov: buffer iov array
+ * @n_vec: number of iovecs
+ * @sig: signature value generated for client request packet
+ *
+ */
+int ksmbd_sign_smb3_pdu(struct ksmbd_conn *conn, char *key, struct kvec *iov,
+ int n_vec, char *sig)
+{
+ struct ksmbd_crypto_ctx *ctx;
+ int rc, i;
+
+ ctx = ksmbd_crypto_ctx_find_cmacaes();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "could not crypto alloc cmac\n");
+ return -ENOMEM;
+ }
+
+ rc = crypto_shash_setkey(CRYPTO_CMACAES_TFM(ctx),
+ key,
+ SMB2_CMACAES_SIZE);
+ if (rc)
+ goto out;
+
+ rc = crypto_shash_init(CRYPTO_CMACAES(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "cmaces init error %d\n", rc);
+ goto out;
+ }
+
+ for (i = 0; i < n_vec; i++) {
+ rc = crypto_shash_update(CRYPTO_CMACAES(ctx),
+ iov[i].iov_base,
+ iov[i].iov_len);
+ if (rc) {
+ ksmbd_debug(AUTH, "cmaces update error %d\n", rc);
+ goto out;
+ }
+ }
+
+ rc = crypto_shash_final(CRYPTO_CMACAES(ctx), sig);
+ if (rc)
+ ksmbd_debug(AUTH, "cmaces generation error %d\n", rc);
+out:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
+
+struct derivation {
+ struct kvec label;
+ struct kvec context;
+ bool binding;
+};
+
+static int generate_key(struct ksmbd_session *sess, struct kvec label,
+ struct kvec context, __u8 *key, unsigned int key_size)
+{
+ unsigned char zero = 0x0;
+ __u8 i[4] = {0, 0, 0, 1};
+ __u8 L128[4] = {0, 0, 0, 128};
+ __u8 L256[4] = {0, 0, 1, 0};
+ int rc;
+ unsigned char prfhash[SMB2_HMACSHA256_SIZE];
+ unsigned char *hashptr = prfhash;
+ struct ksmbd_crypto_ctx *ctx;
+
+ memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE);
+ memset(key, 0x0, key_size);
+
+ ctx = ksmbd_crypto_ctx_find_hmacsha256();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "could not crypto alloc hmacmd5\n");
+ return -ENOMEM;
+ }
+
+ rc = crypto_shash_setkey(CRYPTO_HMACSHA256_TFM(ctx),
+ sess->sess_key,
+ SMB2_NTLMV2_SESSKEY_SIZE);
+ if (rc)
+ goto smb3signkey_ret;
+
+ rc = crypto_shash_init(CRYPTO_HMACSHA256(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "hmacsha256 init error %d\n", rc);
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), i, 4);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not update with n\n");
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx),
+ label.iov_base,
+ label.iov_len);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not update with label\n");
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), &zero, 1);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not update with zero\n");
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx),
+ context.iov_base,
+ context.iov_len);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not update with context\n");
+ goto smb3signkey_ret;
+ }
+
+ if (sess->conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM ||
+ sess->conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+ rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), L256, 4);
+ else
+ rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), L128, 4);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not update with L\n");
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_final(CRYPTO_HMACSHA256(ctx), hashptr);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not generate hmacmd5 hash error %d\n",
+ rc);
+ goto smb3signkey_ret;
+ }
+
+ memcpy(key, hashptr, key_size);
+
+smb3signkey_ret:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
+
+static int generate_smb3signingkey(struct ksmbd_session *sess,
+ struct ksmbd_conn *conn,
+ const struct derivation *signing)
+{
+ int rc;
+ struct channel *chann;
+ char *key;
+
+ chann = lookup_chann_list(sess, conn);
+ if (!chann)
+ return 0;
+
+ if (sess->conn->dialect >= SMB30_PROT_ID && signing->binding)
+ key = chann->smb3signingkey;
+ else
+ key = sess->smb3signingkey;
+
+ rc = generate_key(sess, signing->label, signing->context, key,
+ SMB3_SIGN_KEY_SIZE);
+ if (rc)
+ return rc;
+
+ if (!(sess->conn->dialect >= SMB30_PROT_ID && signing->binding))
+ memcpy(chann->smb3signingkey, key, SMB3_SIGN_KEY_SIZE);
+
+ ksmbd_debug(AUTH, "dumping generated AES signing keys\n");
+ ksmbd_debug(AUTH, "Session Id %llu\n", sess->id);
+ ksmbd_debug(AUTH, "Session Key %*ph\n",
+ SMB2_NTLMV2_SESSKEY_SIZE, sess->sess_key);
+ ksmbd_debug(AUTH, "Signing Key %*ph\n",
+ SMB3_SIGN_KEY_SIZE, key);
+ return 0;
+}
+
+int ksmbd_gen_smb30_signingkey(struct ksmbd_session *sess,
+ struct ksmbd_conn *conn)
+{
+ struct derivation d;
+
+ d.label.iov_base = "SMB2AESCMAC";
+ d.label.iov_len = 12;
+ d.context.iov_base = "SmbSign";
+ d.context.iov_len = 8;
+ d.binding = conn->binding;
+
+ return generate_smb3signingkey(sess, conn, &d);
+}
+
+int ksmbd_gen_smb311_signingkey(struct ksmbd_session *sess,
+ struct ksmbd_conn *conn)
+{
+ struct derivation d;
+
+ d.label.iov_base = "SMBSigningKey";
+ d.label.iov_len = 14;
+ if (conn->binding) {
+ struct preauth_session *preauth_sess;
+
+ preauth_sess = ksmbd_preauth_session_lookup(conn, sess->id);
+ if (!preauth_sess)
+ return -ENOENT;
+ d.context.iov_base = preauth_sess->Preauth_HashValue;
+ } else {
+ d.context.iov_base = sess->Preauth_HashValue;
+ }
+ d.context.iov_len = 64;
+ d.binding = conn->binding;
+
+ return generate_smb3signingkey(sess, conn, &d);
+}
+
+struct derivation_twin {
+ struct derivation encryption;
+ struct derivation decryption;
+};
+
+static int generate_smb3encryptionkey(struct ksmbd_session *sess,
+ const struct derivation_twin *ptwin)
+{
+ int rc;
+
+ rc = generate_key(sess, ptwin->encryption.label,
+ ptwin->encryption.context, sess->smb3encryptionkey,
+ SMB3_ENC_DEC_KEY_SIZE);
+ if (rc)
+ return rc;
+
+ rc = generate_key(sess, ptwin->decryption.label,
+ ptwin->decryption.context,
+ sess->smb3decryptionkey, SMB3_ENC_DEC_KEY_SIZE);
+ if (rc)
+ return rc;
+
+ ksmbd_debug(AUTH, "dumping generated AES encryption keys\n");
+ ksmbd_debug(AUTH, "Cipher type %d\n", sess->conn->cipher_type);
+ ksmbd_debug(AUTH, "Session Id %llu\n", sess->id);
+ ksmbd_debug(AUTH, "Session Key %*ph\n",
+ SMB2_NTLMV2_SESSKEY_SIZE, sess->sess_key);
+ if (sess->conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM ||
+ sess->conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM) {
+ ksmbd_debug(AUTH, "ServerIn Key %*ph\n",
+ SMB3_GCM256_CRYPTKEY_SIZE, sess->smb3encryptionkey);
+ ksmbd_debug(AUTH, "ServerOut Key %*ph\n",
+ SMB3_GCM256_CRYPTKEY_SIZE, sess->smb3decryptionkey);
+ } else {
+ ksmbd_debug(AUTH, "ServerIn Key %*ph\n",
+ SMB3_GCM128_CRYPTKEY_SIZE, sess->smb3encryptionkey);
+ ksmbd_debug(AUTH, "ServerOut Key %*ph\n",
+ SMB3_GCM128_CRYPTKEY_SIZE, sess->smb3decryptionkey);
+ }
+ return 0;
+}
+
+int ksmbd_gen_smb30_encryptionkey(struct ksmbd_session *sess)
+{
+ struct derivation_twin twin;
+ struct derivation *d;
+
+ d = &twin.encryption;
+ d->label.iov_base = "SMB2AESCCM";
+ d->label.iov_len = 11;
+ d->context.iov_base = "ServerOut";
+ d->context.iov_len = 10;
+
+ d = &twin.decryption;
+ d->label.iov_base = "SMB2AESCCM";
+ d->label.iov_len = 11;
+ d->context.iov_base = "ServerIn ";
+ d->context.iov_len = 10;
+
+ return generate_smb3encryptionkey(sess, &twin);
+}
+
+int ksmbd_gen_smb311_encryptionkey(struct ksmbd_session *sess)
+{
+ struct derivation_twin twin;
+ struct derivation *d;
+
+ d = &twin.encryption;
+ d->label.iov_base = "SMBS2CCipherKey";
+ d->label.iov_len = 16;
+ d->context.iov_base = sess->Preauth_HashValue;
+ d->context.iov_len = 64;
+
+ d = &twin.decryption;
+ d->label.iov_base = "SMBC2SCipherKey";
+ d->label.iov_len = 16;
+ d->context.iov_base = sess->Preauth_HashValue;
+ d->context.iov_len = 64;
+
+ return generate_smb3encryptionkey(sess, &twin);
+}
+
+int ksmbd_gen_preauth_integrity_hash(struct ksmbd_conn *conn, char *buf,
+ __u8 *pi_hash)
+{
+ int rc;
+ struct smb2_hdr *rcv_hdr = (struct smb2_hdr *)buf;
+ char *all_bytes_msg = (char *)&rcv_hdr->ProtocolId;
+ int msg_size = be32_to_cpu(rcv_hdr->smb2_buf_length);
+ struct ksmbd_crypto_ctx *ctx = NULL;
+
+ if (conn->preauth_info->Preauth_HashId !=
+ SMB2_PREAUTH_INTEGRITY_SHA512)
+ return -EINVAL;
+
+ ctx = ksmbd_crypto_ctx_find_sha512();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "could not alloc sha512\n");
+ return -ENOMEM;
+ }
+
+ rc = crypto_shash_init(CRYPTO_SHA512(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "could not init shashn");
+ goto out;
+ }
+
+ rc = crypto_shash_update(CRYPTO_SHA512(ctx), pi_hash, 64);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not update with n\n");
+ goto out;
+ }
+
+ rc = crypto_shash_update(CRYPTO_SHA512(ctx), all_bytes_msg, msg_size);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not update with n\n");
+ goto out;
+ }
+
+ rc = crypto_shash_final(CRYPTO_SHA512(ctx), pi_hash);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not generate hash err : %d\n", rc);
+ goto out;
+ }
+out:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
+
+int ksmbd_gen_sd_hash(struct ksmbd_conn *conn, char *sd_buf, int len,
+ __u8 *pi_hash)
+{
+ int rc;
+ struct ksmbd_crypto_ctx *ctx = NULL;
+
+ ctx = ksmbd_crypto_ctx_find_sha256();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "could not alloc sha256\n");
+ return -ENOMEM;
+ }
+
+ rc = crypto_shash_init(CRYPTO_SHA256(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "could not init shashn");
+ goto out;
+ }
+
+ rc = crypto_shash_update(CRYPTO_SHA256(ctx), sd_buf, len);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not update with n\n");
+ goto out;
+ }
+
+ rc = crypto_shash_final(CRYPTO_SHA256(ctx), pi_hash);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not generate hash err : %d\n", rc);
+ goto out;
+ }
+out:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
+
+static int ksmbd_get_encryption_key(struct ksmbd_conn *conn, __u64 ses_id,
+ int enc, u8 *key)
+{
+ struct ksmbd_session *sess;
+ u8 *ses_enc_key;
+
+ sess = ksmbd_session_lookup_all(conn, ses_id);
+ if (!sess)
+ return -EINVAL;
+
+ ses_enc_key = enc ? sess->smb3encryptionkey :
+ sess->smb3decryptionkey;
+ memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE);
+
+ return 0;
+}
+
+static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf,
+ unsigned int buflen)
+{
+ void *addr;
+
+ if (is_vmalloc_addr(buf))
+ addr = vmalloc_to_page(buf);
+ else
+ addr = virt_to_page(buf);
+ sg_set_page(sg, addr, buflen, offset_in_page(buf));
+}
+
+static struct scatterlist *ksmbd_init_sg(struct kvec *iov, unsigned int nvec,
+ u8 *sign)
+{
+ struct scatterlist *sg;
+ unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
+ int i, nr_entries[3] = {0}, total_entries = 0, sg_idx = 0;
+
+ if (!nvec)
+ return NULL;
+
+ for (i = 0; i < nvec - 1; i++) {
+ unsigned long kaddr = (unsigned long)iov[i + 1].iov_base;
+
+ if (is_vmalloc_addr(iov[i + 1].iov_base)) {
+ nr_entries[i] = ((kaddr + iov[i + 1].iov_len +
+ PAGE_SIZE - 1) >> PAGE_SHIFT) -
+ (kaddr >> PAGE_SHIFT);
+ } else {
+ nr_entries[i]++;
+ }
+ total_entries += nr_entries[i];
+ }
+
+ /* Add two entries for transform header and signature */
+ total_entries += 2;
+
+ sg = kmalloc_array(total_entries, sizeof(struct scatterlist), GFP_KERNEL);
+ if (!sg)
+ return NULL;
+
+ sg_init_table(sg, total_entries);
+ smb2_sg_set_buf(&sg[sg_idx++], iov[0].iov_base + 24, assoc_data_len);
+ for (i = 0; i < nvec - 1; i++) {
+ void *data = iov[i + 1].iov_base;
+ int len = iov[i + 1].iov_len;
+
+ if (is_vmalloc_addr(data)) {
+ int j, offset = offset_in_page(data);
+
+ for (j = 0; j < nr_entries[i]; j++) {
+ unsigned int bytes = PAGE_SIZE - offset;
+
+ if (!len)
+ break;
+
+ if (bytes > len)
+ bytes = len;
+
+ sg_set_page(&sg[sg_idx++],
+ vmalloc_to_page(data), bytes,
+ offset_in_page(data));
+
+ data += bytes;
+ len -= bytes;
+ offset = 0;
+ }
+ } else {
+ sg_set_page(&sg[sg_idx++], virt_to_page(data), len,
+ offset_in_page(data));
+ }
+ }
+ smb2_sg_set_buf(&sg[sg_idx], sign, SMB2_SIGNATURE_SIZE);
+ return sg;
+}
+
+int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
+ unsigned int nvec, int enc)
+{
+ struct smb2_transform_hdr *tr_hdr =
+ (struct smb2_transform_hdr *)iov[0].iov_base;
+ unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
+ int rc;
+ struct scatterlist *sg;
+ u8 sign[SMB2_SIGNATURE_SIZE] = {};
+ u8 key[SMB3_ENC_DEC_KEY_SIZE];
+ struct aead_request *req;
+ char *iv;
+ unsigned int iv_len;
+ struct crypto_aead *tfm;
+ unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
+ struct ksmbd_crypto_ctx *ctx;
+
+ rc = ksmbd_get_encryption_key(conn,
+ le64_to_cpu(tr_hdr->SessionId),
+ enc,
+ key);
+ if (rc) {
+ pr_err("Could not get %scryption key\n", enc ? "en" : "de");
+ return rc;
+ }
+
+ if (conn->cipher_type == SMB2_ENCRYPTION_AES128_GCM ||
+ conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+ ctx = ksmbd_crypto_ctx_find_gcm();
+ else
+ ctx = ksmbd_crypto_ctx_find_ccm();
+ if (!ctx) {
+ pr_err("crypto alloc failed\n");
+ return -ENOMEM;
+ }
+
+ if (conn->cipher_type == SMB2_ENCRYPTION_AES128_GCM ||
+ conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+ tfm = CRYPTO_GCM(ctx);
+ else
+ tfm = CRYPTO_CCM(ctx);
+
+ if (conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM ||
+ conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+ rc = crypto_aead_setkey(tfm, key, SMB3_GCM256_CRYPTKEY_SIZE);
+ else
+ rc = crypto_aead_setkey(tfm, key, SMB3_GCM128_CRYPTKEY_SIZE);
+ if (rc) {
+ pr_err("Failed to set aead key %d\n", rc);
+ goto free_ctx;
+ }
+
+ rc = crypto_aead_setauthsize(tfm, SMB2_SIGNATURE_SIZE);
+ if (rc) {
+ pr_err("Failed to set authsize %d\n", rc);
+ goto free_ctx;
+ }
+
+ req = aead_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ rc = -ENOMEM;
+ goto free_ctx;
+ }
+
+ if (!enc) {
+ memcpy(sign, &tr_hdr->Signature, SMB2_SIGNATURE_SIZE);
+ crypt_len += SMB2_SIGNATURE_SIZE;
+ }
+
+ sg = ksmbd_init_sg(iov, nvec, sign);
+ if (!sg) {
+ pr_err("Failed to init sg\n");
+ rc = -ENOMEM;
+ goto free_req;
+ }
+
+ iv_len = crypto_aead_ivsize(tfm);
+ iv = kzalloc(iv_len, GFP_KERNEL);
+ if (!iv) {
+ rc = -ENOMEM;
+ goto free_sg;
+ }
+
+ if (conn->cipher_type == SMB2_ENCRYPTION_AES128_GCM ||
+ conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM) {
+ memcpy(iv, (char *)tr_hdr->Nonce, SMB3_AES_GCM_NONCE);
+ } else {
+ iv[0] = 3;
+ memcpy(iv + 1, (char *)tr_hdr->Nonce, SMB3_AES_CCM_NONCE);
+ }
+
+ aead_request_set_crypt(req, sg, sg, crypt_len, iv);
+ aead_request_set_ad(req, assoc_data_len);
+ aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+
+ if (enc)
+ rc = crypto_aead_encrypt(req);
+ else
+ rc = crypto_aead_decrypt(req);
+ if (rc)
+ goto free_iv;
+
+ if (enc)
+ memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE);
+
+free_iv:
+ kfree(iv);
+free_sg:
+ kfree(sg);
+free_req:
+ kfree(req);
+free_ctx:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
diff --git a/fs/ksmbd/auth.h b/fs/ksmbd/auth.h
new file mode 100644
index 000000000000..9c2d4badd05d
--- /dev/null
+++ b/fs/ksmbd/auth.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __AUTH_H__
+#define __AUTH_H__
+
+#include "ntlmssp.h"
+
+#ifdef CONFIG_SMB_SERVER_KERBEROS5
+#define AUTH_GSS_LENGTH 96
+#define AUTH_GSS_PADDING 0
+#else
+#define AUTH_GSS_LENGTH 74
+#define AUTH_GSS_PADDING 6
+#endif
+
+#define CIFS_HMAC_MD5_HASH_SIZE (16)
+#define CIFS_NTHASH_SIZE (16)
+
+/*
+ * Size of the ntlm client response
+ */
+#define CIFS_AUTH_RESP_SIZE 24
+#define CIFS_SMB1_SIGNATURE_SIZE 8
+#define CIFS_SMB1_SESSKEY_SIZE 16
+
+#define KSMBD_AUTH_NTLMSSP 0x0001
+#define KSMBD_AUTH_KRB5 0x0002
+#define KSMBD_AUTH_MSKRB5 0x0004
+#define KSMBD_AUTH_KRB5U2U 0x0008
+
+struct ksmbd_session;
+struct ksmbd_conn;
+struct kvec;
+
+int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
+ unsigned int nvec, int enc);
+void ksmbd_copy_gss_neg_header(void *buf);
+int ksmbd_auth_ntlm(struct ksmbd_session *sess, char *pw_buf);
+int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
+ int blen, char *domain_name);
+int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
+ int blob_len, struct ksmbd_session *sess);
+int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
+ int blob_len, struct ksmbd_session *sess);
+unsigned int
+ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
+ struct ksmbd_session *sess);
+int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob,
+ int in_len, char *out_blob, int *out_len);
+int ksmbd_sign_smb2_pdu(struct ksmbd_conn *conn, char *key, struct kvec *iov,
+ int n_vec, char *sig);
+int ksmbd_sign_smb3_pdu(struct ksmbd_conn *conn, char *key, struct kvec *iov,
+ int n_vec, char *sig);
+int ksmbd_gen_smb30_signingkey(struct ksmbd_session *sess,
+ struct ksmbd_conn *conn);
+int ksmbd_gen_smb311_signingkey(struct ksmbd_session *sess,
+ struct ksmbd_conn *conn);
+int ksmbd_gen_smb30_encryptionkey(struct ksmbd_session *sess);
+int ksmbd_gen_smb311_encryptionkey(struct ksmbd_session *sess);
+int ksmbd_gen_preauth_integrity_hash(struct ksmbd_conn *conn, char *buf,
+ __u8 *pi_hash);
+int ksmbd_gen_sd_hash(struct ksmbd_conn *conn, char *sd_buf, int len,
+ __u8 *pi_hash);
+#endif
diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c
new file mode 100644
index 000000000000..af086d35398a
--- /dev/null
+++ b/fs/ksmbd/connection.c
@@ -0,0 +1,413 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <namjae.jeon@protocolfreedom.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/mutex.h>
+#include <linux/freezer.h>
+#include <linux/module.h>
+
+#include "server.h"
+#include "smb_common.h"
+#include "mgmt/ksmbd_ida.h"
+#include "connection.h"
+#include "transport_tcp.h"
+#include "transport_rdma.h"
+
+static DEFINE_MUTEX(init_lock);
+
+static struct ksmbd_conn_ops default_conn_ops;
+
+LIST_HEAD(conn_list);
+DEFINE_RWLOCK(conn_list_lock);
+
+/**
+ * ksmbd_conn_free() - free resources of the connection instance
+ *
+ * @conn: connection instance to be cleand up
+ *
+ * During the thread termination, the corresponding conn instance
+ * resources(sock/memory) are released and finally the conn object is freed.
+ */
+void ksmbd_conn_free(struct ksmbd_conn *conn)
+{
+ write_lock(&conn_list_lock);
+ list_del(&conn->conns_list);
+ write_unlock(&conn_list_lock);
+
+ kvfree(conn->request_buf);
+ kfree(conn->preauth_info);
+ kfree(conn);
+}
+
+/**
+ * ksmbd_conn_alloc() - initialize a new connection instance
+ *
+ * Return: ksmbd_conn struct on success, otherwise NULL
+ */
+struct ksmbd_conn *ksmbd_conn_alloc(void)
+{
+ struct ksmbd_conn *conn;
+
+ conn = kzalloc(sizeof(struct ksmbd_conn), GFP_KERNEL);
+ if (!conn)
+ return NULL;
+
+ conn->need_neg = true;
+ conn->status = KSMBD_SESS_NEW;
+ conn->local_nls = load_nls("utf8");
+ if (!conn->local_nls)
+ conn->local_nls = load_nls_default();
+ atomic_set(&conn->req_running, 0);
+ atomic_set(&conn->r_count, 0);
+ init_waitqueue_head(&conn->req_running_q);
+ INIT_LIST_HEAD(&conn->conns_list);
+ INIT_LIST_HEAD(&conn->sessions);
+ INIT_LIST_HEAD(&conn->requests);
+ INIT_LIST_HEAD(&conn->async_requests);
+ spin_lock_init(&conn->request_lock);
+ spin_lock_init(&conn->credits_lock);
+ ida_init(&conn->async_ida);
+
+ spin_lock_init(&conn->llist_lock);
+ INIT_LIST_HEAD(&conn->lock_list);
+
+ write_lock(&conn_list_lock);
+ list_add(&conn->conns_list, &conn_list);
+ write_unlock(&conn_list_lock);
+ return conn;
+}
+
+bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c)
+{
+ struct ksmbd_conn *t;
+ bool ret = false;
+
+ read_lock(&conn_list_lock);
+ list_for_each_entry(t, &conn_list, conns_list) {
+ if (memcmp(t->ClientGUID, c->ClientGUID, SMB2_CLIENT_GUID_SIZE))
+ continue;
+
+ ret = true;
+ break;
+ }
+ read_unlock(&conn_list_lock);
+ return ret;
+}
+
+void ksmbd_conn_enqueue_request(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct list_head *requests_queue = NULL;
+
+ if (conn->ops->get_cmd_val(work) != SMB2_CANCEL_HE) {
+ requests_queue = &conn->requests;
+ work->syncronous = true;
+ }
+
+ if (requests_queue) {
+ atomic_inc(&conn->req_running);
+ spin_lock(&conn->request_lock);
+ list_add_tail(&work->request_entry, requests_queue);
+ spin_unlock(&conn->request_lock);
+ }
+}
+
+int ksmbd_conn_try_dequeue_request(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ int ret = 1;
+
+ if (list_empty(&work->request_entry) &&
+ list_empty(&work->async_request_entry))
+ return 0;
+
+ if (!work->multiRsp)
+ atomic_dec(&conn->req_running);
+ spin_lock(&conn->request_lock);
+ if (!work->multiRsp) {
+ list_del_init(&work->request_entry);
+ if (work->syncronous == false)
+ list_del_init(&work->async_request_entry);
+ ret = 0;
+ }
+ spin_unlock(&conn->request_lock);
+
+ wake_up_all(&conn->req_running_q);
+ return ret;
+}
+
+static void ksmbd_conn_lock(struct ksmbd_conn *conn)
+{
+ mutex_lock(&conn->srv_mutex);
+}
+
+static void ksmbd_conn_unlock(struct ksmbd_conn *conn)
+{
+ mutex_unlock(&conn->srv_mutex);
+}
+
+void ksmbd_conn_wait_idle(struct ksmbd_conn *conn)
+{
+ wait_event(conn->req_running_q, atomic_read(&conn->req_running) < 2);
+}
+
+int ksmbd_conn_write(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb_hdr *rsp_hdr = work->response_buf;
+ size_t len = 0;
+ int sent;
+ struct kvec iov[3];
+ int iov_idx = 0;
+
+ ksmbd_conn_try_dequeue_request(work);
+ if (!rsp_hdr) {
+ pr_err("NULL response header\n");
+ return -EINVAL;
+ }
+
+ if (work->tr_buf) {
+ iov[iov_idx] = (struct kvec) { work->tr_buf,
+ sizeof(struct smb2_transform_hdr) };
+ len += iov[iov_idx++].iov_len;
+ }
+
+ if (work->aux_payload_sz) {
+ iov[iov_idx] = (struct kvec) { rsp_hdr, work->resp_hdr_sz };
+ len += iov[iov_idx++].iov_len;
+ iov[iov_idx] = (struct kvec) { work->aux_payload_buf, work->aux_payload_sz };
+ len += iov[iov_idx++].iov_len;
+ } else {
+ if (work->tr_buf)
+ iov[iov_idx].iov_len = work->resp_hdr_sz;
+ else
+ iov[iov_idx].iov_len = get_rfc1002_len(rsp_hdr) + 4;
+ iov[iov_idx].iov_base = rsp_hdr;
+ len += iov[iov_idx++].iov_len;
+ }
+
+ ksmbd_conn_lock(conn);
+ sent = conn->transport->ops->writev(conn->transport, &iov[0],
+ iov_idx, len,
+ work->need_invalidate_rkey,
+ work->remote_key);
+ ksmbd_conn_unlock(conn);
+
+ if (sent < 0) {
+ pr_err("Failed to send message: %d\n", sent);
+ return sent;
+ }
+
+ return 0;
+}
+
+int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, void *buf,
+ unsigned int buflen, u32 remote_key, u64 remote_offset,
+ u32 remote_len)
+{
+ int ret = -EINVAL;
+
+ if (conn->transport->ops->rdma_read)
+ ret = conn->transport->ops->rdma_read(conn->transport,
+ buf, buflen,
+ remote_key, remote_offset,
+ remote_len);
+ return ret;
+}
+
+int ksmbd_conn_rdma_write(struct ksmbd_conn *conn, void *buf,
+ unsigned int buflen, u32 remote_key,
+ u64 remote_offset, u32 remote_len)
+{
+ int ret = -EINVAL;
+
+ if (conn->transport->ops->rdma_write)
+ ret = conn->transport->ops->rdma_write(conn->transport,
+ buf, buflen,
+ remote_key, remote_offset,
+ remote_len);
+ return ret;
+}
+
+bool ksmbd_conn_alive(struct ksmbd_conn *conn)
+{
+ if (!ksmbd_server_running())
+ return false;
+
+ if (conn->status == KSMBD_SESS_EXITING)
+ return false;
+
+ if (kthread_should_stop())
+ return false;
+
+ if (atomic_read(&conn->stats.open_files_count) > 0)
+ return true;
+
+ /*
+ * Stop current session if the time that get last request from client
+ * is bigger than deadtime user configured and opening file count is
+ * zero.
+ */
+ if (server_conf.deadtime > 0 &&
+ time_after(jiffies, conn->last_active + server_conf.deadtime)) {
+ ksmbd_debug(CONN, "No response from client in %lu minutes\n",
+ server_conf.deadtime / SMB_ECHO_INTERVAL);
+ return false;
+ }
+ return true;
+}
+
+/**
+ * ksmbd_conn_handler_loop() - session thread to listen on new smb requests
+ * @p: connection instance
+ *
+ * One thread each per connection
+ *
+ * Return: 0 on success
+ */
+int ksmbd_conn_handler_loop(void *p)
+{
+ struct ksmbd_conn *conn = (struct ksmbd_conn *)p;
+ struct ksmbd_transport *t = conn->transport;
+ unsigned int pdu_size;
+ char hdr_buf[4] = {0,};
+ int size;
+
+ mutex_init(&conn->srv_mutex);
+ __module_get(THIS_MODULE);
+
+ if (t->ops->prepare && t->ops->prepare(t))
+ goto out;
+
+ conn->last_active = jiffies;
+ while (ksmbd_conn_alive(conn)) {
+ if (try_to_freeze())
+ continue;
+
+ kvfree(conn->request_buf);
+ conn->request_buf = NULL;
+
+ size = t->ops->read(t, hdr_buf, sizeof(hdr_buf));
+ if (size != sizeof(hdr_buf))
+ break;
+
+ pdu_size = get_rfc1002_len(hdr_buf);
+ ksmbd_debug(CONN, "RFC1002 header %u bytes\n", pdu_size);
+
+ /* make sure we have enough to get to SMB header end */
+ if (!ksmbd_pdu_size_has_room(pdu_size)) {
+ ksmbd_debug(CONN, "SMB request too short (%u bytes)\n",
+ pdu_size);
+ continue;
+ }
+
+ /* 4 for rfc1002 length field */
+ size = pdu_size + 4;
+ conn->request_buf = kvmalloc(size, GFP_KERNEL);
+ if (!conn->request_buf)
+ continue;
+
+ memcpy(conn->request_buf, hdr_buf, sizeof(hdr_buf));
+ if (!ksmbd_smb_request(conn))
+ break;
+
+ /*
+ * We already read 4 bytes to find out PDU size, now
+ * read in PDU
+ */
+ size = t->ops->read(t, conn->request_buf + 4, pdu_size);
+ if (size < 0) {
+ pr_err("sock_read failed: %d\n", size);
+ break;
+ }
+
+ if (size != pdu_size) {
+ pr_err("PDU error. Read: %d, Expected: %d\n",
+ size, pdu_size);
+ continue;
+ }
+
+ if (!default_conn_ops.process_fn) {
+ pr_err("No connection request callback\n");
+ break;
+ }
+
+ if (default_conn_ops.process_fn(conn)) {
+ pr_err("Cannot handle request\n");
+ break;
+ }
+ }
+
+out:
+ /* Wait till all reference dropped to the Server object*/
+ while (atomic_read(&conn->r_count) > 0)
+ schedule_timeout(HZ);
+
+ unload_nls(conn->local_nls);
+ if (default_conn_ops.terminate_fn)
+ default_conn_ops.terminate_fn(conn);
+ t->ops->disconnect(t);
+ module_put(THIS_MODULE);
+ return 0;
+}
+
+void ksmbd_conn_init_server_callbacks(struct ksmbd_conn_ops *ops)
+{
+ default_conn_ops.process_fn = ops->process_fn;
+ default_conn_ops.terminate_fn = ops->terminate_fn;
+}
+
+int ksmbd_conn_transport_init(void)
+{
+ int ret;
+
+ mutex_lock(&init_lock);
+ ret = ksmbd_tcp_init();
+ if (ret) {
+ pr_err("Failed to init TCP subsystem: %d\n", ret);
+ goto out;
+ }
+
+ ret = ksmbd_rdma_init();
+ if (ret) {
+ pr_err("Failed to init RDMA subsystem: %d\n", ret);
+ goto out;
+ }
+out:
+ mutex_unlock(&init_lock);
+ return ret;
+}
+
+static void stop_sessions(void)
+{
+ struct ksmbd_conn *conn;
+
+again:
+ read_lock(&conn_list_lock);
+ list_for_each_entry(conn, &conn_list, conns_list) {
+ struct task_struct *task;
+
+ task = conn->transport->handler;
+ if (task)
+ ksmbd_debug(CONN, "Stop session handler %s/%d\n",
+ task->comm, task_pid_nr(task));
+ conn->status = KSMBD_SESS_EXITING;
+ }
+ read_unlock(&conn_list_lock);
+
+ if (!list_empty(&conn_list)) {
+ schedule_timeout_interruptible(HZ / 10); /* 100ms */
+ goto again;
+ }
+}
+
+void ksmbd_conn_transport_destroy(void)
+{
+ mutex_lock(&init_lock);
+ ksmbd_tcp_destroy();
+ ksmbd_rdma_destroy();
+ stop_sessions();
+ mutex_unlock(&init_lock);
+}
diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h
new file mode 100644
index 000000000000..e5403c587a58
--- /dev/null
+++ b/fs/ksmbd/connection.h
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_CONNECTION_H__
+#define __KSMBD_CONNECTION_H__
+
+#include <linux/list.h>
+#include <linux/ip.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/inet_connection_sock.h>
+#include <net/request_sock.h>
+#include <linux/kthread.h>
+#include <linux/nls.h>
+
+#include "smb_common.h"
+#include "ksmbd_work.h"
+
+#define KSMBD_SOCKET_BACKLOG 16
+
+/*
+ * WARNING
+ *
+ * This is nothing but a HACK. Session status should move to channel
+ * or to session. As of now we have 1 tcp_conn : 1 ksmbd_session, but
+ * we need to change it to 1 tcp_conn : N ksmbd_sessions.
+ */
+enum {
+ KSMBD_SESS_NEW = 0,
+ KSMBD_SESS_GOOD,
+ KSMBD_SESS_EXITING,
+ KSMBD_SESS_NEED_RECONNECT,
+ KSMBD_SESS_NEED_NEGOTIATE
+};
+
+struct ksmbd_stats {
+ atomic_t open_files_count;
+ atomic64_t request_served;
+};
+
+struct ksmbd_transport;
+
+struct ksmbd_conn {
+ struct smb_version_values *vals;
+ struct smb_version_ops *ops;
+ struct smb_version_cmds *cmds;
+ unsigned int max_cmds;
+ struct mutex srv_mutex;
+ int status;
+ unsigned int cli_cap;
+ char *request_buf;
+ struct ksmbd_transport *transport;
+ struct nls_table *local_nls;
+ struct list_head conns_list;
+ /* smb session 1 per user */
+ struct list_head sessions;
+ unsigned long last_active;
+ /* How many request are running currently */
+ atomic_t req_running;
+ /* References which are made for this Server object*/
+ atomic_t r_count;
+ unsigned short total_credits;
+ unsigned short max_credits;
+ spinlock_t credits_lock;
+ wait_queue_head_t req_running_q;
+ /* Lock to protect requests list*/
+ spinlock_t request_lock;
+ struct list_head requests;
+ struct list_head async_requests;
+ int connection_type;
+ struct ksmbd_stats stats;
+ char ClientGUID[SMB2_CLIENT_GUID_SIZE];
+ union {
+ /* pending trans request table */
+ struct trans_state *recent_trans;
+ /* Used by ntlmssp */
+ char *ntlmssp_cryptkey;
+ };
+
+ spinlock_t llist_lock;
+ struct list_head lock_list;
+
+ struct preauth_integrity_info *preauth_info;
+
+ bool need_neg;
+ unsigned int auth_mechs;
+ unsigned int preferred_auth_mech;
+ bool sign;
+ bool use_spnego:1;
+ __u16 cli_sec_mode;
+ __u16 srv_sec_mode;
+ /* dialect index that server chose */
+ __u16 dialect;
+
+ char *mechToken;
+
+ struct ksmbd_conn_ops *conn_ops;
+
+ /* Preauth Session Table */
+ struct list_head preauth_sess_table;
+
+ struct sockaddr_storage peer_addr;
+
+ /* Identifier for async message */
+ struct ida async_ida;
+
+ __le16 cipher_type;
+ __le16 compress_algorithm;
+ bool posix_ext_supported;
+ bool signing_negotiated;
+ __le16 signing_algorithm;
+ bool binding;
+};
+
+struct ksmbd_conn_ops {
+ int (*process_fn)(struct ksmbd_conn *conn);
+ int (*terminate_fn)(struct ksmbd_conn *conn);
+};
+
+struct ksmbd_transport_ops {
+ int (*prepare)(struct ksmbd_transport *t);
+ void (*disconnect)(struct ksmbd_transport *t);
+ int (*read)(struct ksmbd_transport *t, char *buf, unsigned int size);
+ int (*writev)(struct ksmbd_transport *t, struct kvec *iovs, int niov,
+ int size, bool need_invalidate_rkey,
+ unsigned int remote_key);
+ int (*rdma_read)(struct ksmbd_transport *t, void *buf, unsigned int len,
+ u32 remote_key, u64 remote_offset, u32 remote_len);
+ int (*rdma_write)(struct ksmbd_transport *t, void *buf,
+ unsigned int len, u32 remote_key, u64 remote_offset,
+ u32 remote_len);
+};
+
+struct ksmbd_transport {
+ struct ksmbd_conn *conn;
+ struct ksmbd_transport_ops *ops;
+ struct task_struct *handler;
+};
+
+#define KSMBD_TCP_RECV_TIMEOUT (7 * HZ)
+#define KSMBD_TCP_SEND_TIMEOUT (5 * HZ)
+#define KSMBD_TCP_PEER_SOCKADDR(c) ((struct sockaddr *)&((c)->peer_addr))
+
+extern struct list_head conn_list;
+extern rwlock_t conn_list_lock;
+
+bool ksmbd_conn_alive(struct ksmbd_conn *conn);
+void ksmbd_conn_wait_idle(struct ksmbd_conn *conn);
+struct ksmbd_conn *ksmbd_conn_alloc(void);
+void ksmbd_conn_free(struct ksmbd_conn *conn);
+bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c);
+int ksmbd_conn_write(struct ksmbd_work *work);
+int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, void *buf,
+ unsigned int buflen, u32 remote_key, u64 remote_offset,
+ u32 remote_len);
+int ksmbd_conn_rdma_write(struct ksmbd_conn *conn, void *buf,
+ unsigned int buflen, u32 remote_key, u64 remote_offset,
+ u32 remote_len);
+void ksmbd_conn_enqueue_request(struct ksmbd_work *work);
+int ksmbd_conn_try_dequeue_request(struct ksmbd_work *work);
+void ksmbd_conn_init_server_callbacks(struct ksmbd_conn_ops *ops);
+int ksmbd_conn_handler_loop(void *p);
+int ksmbd_conn_transport_init(void);
+void ksmbd_conn_transport_destroy(void);
+
+/*
+ * WARNING
+ *
+ * This is a hack. We will move status to a proper place once we land
+ * a multi-sessions support.
+ */
+static inline bool ksmbd_conn_good(struct ksmbd_work *work)
+{
+ return work->conn->status == KSMBD_SESS_GOOD;
+}
+
+static inline bool ksmbd_conn_need_negotiate(struct ksmbd_work *work)
+{
+ return work->conn->status == KSMBD_SESS_NEED_NEGOTIATE;
+}
+
+static inline bool ksmbd_conn_need_reconnect(struct ksmbd_work *work)
+{
+ return work->conn->status == KSMBD_SESS_NEED_RECONNECT;
+}
+
+static inline bool ksmbd_conn_exiting(struct ksmbd_work *work)
+{
+ return work->conn->status == KSMBD_SESS_EXITING;
+}
+
+static inline void ksmbd_conn_set_good(struct ksmbd_work *work)
+{
+ work->conn->status = KSMBD_SESS_GOOD;
+}
+
+static inline void ksmbd_conn_set_need_negotiate(struct ksmbd_work *work)
+{
+ work->conn->status = KSMBD_SESS_NEED_NEGOTIATE;
+}
+
+static inline void ksmbd_conn_set_need_reconnect(struct ksmbd_work *work)
+{
+ work->conn->status = KSMBD_SESS_NEED_RECONNECT;
+}
+
+static inline void ksmbd_conn_set_exiting(struct ksmbd_work *work)
+{
+ work->conn->status = KSMBD_SESS_EXITING;
+}
+#endif /* __CONNECTION_H__ */
diff --git a/fs/ksmbd/crypto_ctx.c b/fs/ksmbd/crypto_ctx.c
new file mode 100644
index 000000000000..5f4b1008d17e
--- /dev/null
+++ b/fs/ksmbd/crypto_ctx.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+
+#include "glob.h"
+#include "crypto_ctx.h"
+
+struct crypto_ctx_list {
+ spinlock_t ctx_lock;
+ int avail_ctx;
+ struct list_head idle_ctx;
+ wait_queue_head_t ctx_wait;
+};
+
+static struct crypto_ctx_list ctx_list;
+
+static inline void free_aead(struct crypto_aead *aead)
+{
+ if (aead)
+ crypto_free_aead(aead);
+}
+
+static void free_shash(struct shash_desc *shash)
+{
+ if (shash) {
+ crypto_free_shash(shash->tfm);
+ kfree(shash);
+ }
+}
+
+static struct crypto_aead *alloc_aead(int id)
+{
+ struct crypto_aead *tfm = NULL;
+
+ switch (id) {
+ case CRYPTO_AEAD_AES_GCM:
+ tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
+ break;
+ case CRYPTO_AEAD_AES_CCM:
+ tfm = crypto_alloc_aead("ccm(aes)", 0, 0);
+ break;
+ default:
+ pr_err("Does not support encrypt ahead(id : %d)\n", id);
+ return NULL;
+ }
+
+ if (IS_ERR(tfm)) {
+ pr_err("Failed to alloc encrypt aead : %ld\n", PTR_ERR(tfm));
+ return NULL;
+ }
+
+ return tfm;
+}
+
+static struct shash_desc *alloc_shash_desc(int id)
+{
+ struct crypto_shash *tfm = NULL;
+ struct shash_desc *shash;
+
+ switch (id) {
+ case CRYPTO_SHASH_HMACMD5:
+ tfm = crypto_alloc_shash("hmac(md5)", 0, 0);
+ break;
+ case CRYPTO_SHASH_HMACSHA256:
+ tfm = crypto_alloc_shash("hmac(sha256)", 0, 0);
+ break;
+ case CRYPTO_SHASH_CMACAES:
+ tfm = crypto_alloc_shash("cmac(aes)", 0, 0);
+ break;
+ case CRYPTO_SHASH_SHA256:
+ tfm = crypto_alloc_shash("sha256", 0, 0);
+ break;
+ case CRYPTO_SHASH_SHA512:
+ tfm = crypto_alloc_shash("sha512", 0, 0);
+ break;
+ case CRYPTO_SHASH_MD4:
+ tfm = crypto_alloc_shash("md4", 0, 0);
+ break;
+ case CRYPTO_SHASH_MD5:
+ tfm = crypto_alloc_shash("md5", 0, 0);
+ break;
+ default:
+ return NULL;
+ }
+
+ if (IS_ERR(tfm))
+ return NULL;
+
+ shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(tfm),
+ GFP_KERNEL);
+ if (!shash)
+ crypto_free_shash(tfm);
+ else
+ shash->tfm = tfm;
+ return shash;
+}
+
+static void ctx_free(struct ksmbd_crypto_ctx *ctx)
+{
+ int i;
+
+ for (i = 0; i < CRYPTO_SHASH_MAX; i++)
+ free_shash(ctx->desc[i]);
+ for (i = 0; i < CRYPTO_AEAD_MAX; i++)
+ free_aead(ctx->ccmaes[i]);
+ kfree(ctx);
+}
+
+static struct ksmbd_crypto_ctx *ksmbd_find_crypto_ctx(void)
+{
+ struct ksmbd_crypto_ctx *ctx;
+
+ while (1) {
+ spin_lock(&ctx_list.ctx_lock);
+ if (!list_empty(&ctx_list.idle_ctx)) {
+ ctx = list_entry(ctx_list.idle_ctx.next,
+ struct ksmbd_crypto_ctx,
+ list);
+ list_del(&ctx->list);
+ spin_unlock(&ctx_list.ctx_lock);
+ return ctx;
+ }
+
+ if (ctx_list.avail_ctx > num_online_cpus()) {
+ spin_unlock(&ctx_list.ctx_lock);
+ wait_event(ctx_list.ctx_wait,
+ !list_empty(&ctx_list.idle_ctx));
+ continue;
+ }
+
+ ctx_list.avail_ctx++;
+ spin_unlock(&ctx_list.ctx_lock);
+
+ ctx = kzalloc(sizeof(struct ksmbd_crypto_ctx), GFP_KERNEL);
+ if (!ctx) {
+ spin_lock(&ctx_list.ctx_lock);
+ ctx_list.avail_ctx--;
+ spin_unlock(&ctx_list.ctx_lock);
+ wait_event(ctx_list.ctx_wait,
+ !list_empty(&ctx_list.idle_ctx));
+ continue;
+ }
+ break;
+ }
+ return ctx;
+}
+
+void ksmbd_release_crypto_ctx(struct ksmbd_crypto_ctx *ctx)
+{
+ if (!ctx)
+ return;
+
+ spin_lock(&ctx_list.ctx_lock);
+ if (ctx_list.avail_ctx <= num_online_cpus()) {
+ list_add(&ctx->list, &ctx_list.idle_ctx);
+ spin_unlock(&ctx_list.ctx_lock);
+ wake_up(&ctx_list.ctx_wait);
+ return;
+ }
+
+ ctx_list.avail_ctx--;
+ spin_unlock(&ctx_list.ctx_lock);
+ ctx_free(ctx);
+}
+
+static struct ksmbd_crypto_ctx *____crypto_shash_ctx_find(int id)
+{
+ struct ksmbd_crypto_ctx *ctx;
+
+ if (id >= CRYPTO_SHASH_MAX)
+ return NULL;
+
+ ctx = ksmbd_find_crypto_ctx();
+ if (ctx->desc[id])
+ return ctx;
+
+ ctx->desc[id] = alloc_shash_desc(id);
+ if (ctx->desc[id])
+ return ctx;
+ ksmbd_release_crypto_ctx(ctx);
+ return NULL;
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_hmacmd5(void)
+{
+ return ____crypto_shash_ctx_find(CRYPTO_SHASH_HMACMD5);
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_hmacsha256(void)
+{
+ return ____crypto_shash_ctx_find(CRYPTO_SHASH_HMACSHA256);
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_cmacaes(void)
+{
+ return ____crypto_shash_ctx_find(CRYPTO_SHASH_CMACAES);
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_sha256(void)
+{
+ return ____crypto_shash_ctx_find(CRYPTO_SHASH_SHA256);
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_sha512(void)
+{
+ return ____crypto_shash_ctx_find(CRYPTO_SHASH_SHA512);
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_md4(void)
+{
+ return ____crypto_shash_ctx_find(CRYPTO_SHASH_MD4);
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_md5(void)
+{
+ return ____crypto_shash_ctx_find(CRYPTO_SHASH_MD5);
+}
+
+static struct ksmbd_crypto_ctx *____crypto_aead_ctx_find(int id)
+{
+ struct ksmbd_crypto_ctx *ctx;
+
+ if (id >= CRYPTO_AEAD_MAX)
+ return NULL;
+
+ ctx = ksmbd_find_crypto_ctx();
+ if (ctx->ccmaes[id])
+ return ctx;
+
+ ctx->ccmaes[id] = alloc_aead(id);
+ if (ctx->ccmaes[id])
+ return ctx;
+ ksmbd_release_crypto_ctx(ctx);
+ return NULL;
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_gcm(void)
+{
+ return ____crypto_aead_ctx_find(CRYPTO_AEAD_AES_GCM);
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_ccm(void)
+{
+ return ____crypto_aead_ctx_find(CRYPTO_AEAD_AES_CCM);
+}
+
+void ksmbd_crypto_destroy(void)
+{
+ struct ksmbd_crypto_ctx *ctx;
+
+ while (!list_empty(&ctx_list.idle_ctx)) {
+ ctx = list_entry(ctx_list.idle_ctx.next,
+ struct ksmbd_crypto_ctx,
+ list);
+ list_del(&ctx->list);
+ ctx_free(ctx);
+ }
+}
+
+int ksmbd_crypto_create(void)
+{
+ struct ksmbd_crypto_ctx *ctx;
+
+ spin_lock_init(&ctx_list.ctx_lock);
+ INIT_LIST_HEAD(&ctx_list.idle_ctx);
+ init_waitqueue_head(&ctx_list.ctx_wait);
+ ctx_list.avail_ctx = 1;
+
+ ctx = kzalloc(sizeof(struct ksmbd_crypto_ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ list_add(&ctx->list, &ctx_list.idle_ctx);
+ return 0;
+}
diff --git a/fs/ksmbd/crypto_ctx.h b/fs/ksmbd/crypto_ctx.h
new file mode 100644
index 000000000000..ef11154b43df
--- /dev/null
+++ b/fs/ksmbd/crypto_ctx.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __CRYPTO_CTX_H__
+#define __CRYPTO_CTX_H__
+
+#include <crypto/hash.h>
+#include <crypto/aead.h>
+
+enum {
+ CRYPTO_SHASH_HMACMD5 = 0,
+ CRYPTO_SHASH_HMACSHA256,
+ CRYPTO_SHASH_CMACAES,
+ CRYPTO_SHASH_SHA256,
+ CRYPTO_SHASH_SHA512,
+ CRYPTO_SHASH_MD4,
+ CRYPTO_SHASH_MD5,
+ CRYPTO_SHASH_MAX,
+};
+
+enum {
+ CRYPTO_AEAD_AES_GCM = 16,
+ CRYPTO_AEAD_AES_CCM,
+ CRYPTO_AEAD_MAX,
+};
+
+enum {
+ CRYPTO_BLK_ECBDES = 32,
+ CRYPTO_BLK_MAX,
+};
+
+struct ksmbd_crypto_ctx {
+ struct list_head list;
+
+ struct shash_desc *desc[CRYPTO_SHASH_MAX];
+ struct crypto_aead *ccmaes[CRYPTO_AEAD_MAX];
+};
+
+#define CRYPTO_HMACMD5(c) ((c)->desc[CRYPTO_SHASH_HMACMD5])
+#define CRYPTO_HMACSHA256(c) ((c)->desc[CRYPTO_SHASH_HMACSHA256])
+#define CRYPTO_CMACAES(c) ((c)->desc[CRYPTO_SHASH_CMACAES])
+#define CRYPTO_SHA256(c) ((c)->desc[CRYPTO_SHASH_SHA256])
+#define CRYPTO_SHA512(c) ((c)->desc[CRYPTO_SHASH_SHA512])
+#define CRYPTO_MD4(c) ((c)->desc[CRYPTO_SHASH_MD4])
+#define CRYPTO_MD5(c) ((c)->desc[CRYPTO_SHASH_MD5])
+
+#define CRYPTO_HMACMD5_TFM(c) ((c)->desc[CRYPTO_SHASH_HMACMD5]->tfm)
+#define CRYPTO_HMACSHA256_TFM(c)\
+ ((c)->desc[CRYPTO_SHASH_HMACSHA256]->tfm)
+#define CRYPTO_CMACAES_TFM(c) ((c)->desc[CRYPTO_SHASH_CMACAES]->tfm)
+#define CRYPTO_SHA256_TFM(c) ((c)->desc[CRYPTO_SHASH_SHA256]->tfm)
+#define CRYPTO_SHA512_TFM(c) ((c)->desc[CRYPTO_SHASH_SHA512]->tfm)
+#define CRYPTO_MD4_TFM(c) ((c)->desc[CRYPTO_SHASH_MD4]->tfm)
+#define CRYPTO_MD5_TFM(c) ((c)->desc[CRYPTO_SHASH_MD5]->tfm)
+
+#define CRYPTO_GCM(c) ((c)->ccmaes[CRYPTO_AEAD_AES_GCM])
+#define CRYPTO_CCM(c) ((c)->ccmaes[CRYPTO_AEAD_AES_CCM])
+
+void ksmbd_release_crypto_ctx(struct ksmbd_crypto_ctx *ctx);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_hmacmd5(void);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_hmacsha256(void);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_cmacaes(void);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_sha512(void);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_sha256(void);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_md4(void);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_md5(void);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_gcm(void);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_ccm(void);
+void ksmbd_crypto_destroy(void);
+int ksmbd_crypto_create(void);
+
+#endif /* __CRYPTO_CTX_H__ */
diff --git a/fs/ksmbd/glob.h b/fs/ksmbd/glob.h
new file mode 100644
index 000000000000..49a5a3afa118
--- /dev/null
+++ b/fs/ksmbd/glob.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_GLOB_H
+#define __KSMBD_GLOB_H
+
+#include <linux/ctype.h>
+
+#include "unicode.h"
+#include "vfs_cache.h"
+
+#define KSMBD_VERSION "3.1.9"
+
+extern int ksmbd_debug_types;
+
+#define KSMBD_DEBUG_SMB BIT(0)
+#define KSMBD_DEBUG_AUTH BIT(1)
+#define KSMBD_DEBUG_VFS BIT(2)
+#define KSMBD_DEBUG_OPLOCK BIT(3)
+#define KSMBD_DEBUG_IPC BIT(4)
+#define KSMBD_DEBUG_CONN BIT(5)
+#define KSMBD_DEBUG_RDMA BIT(6)
+#define KSMBD_DEBUG_ALL (KSMBD_DEBUG_SMB | KSMBD_DEBUG_AUTH | \
+ KSMBD_DEBUG_VFS | KSMBD_DEBUG_OPLOCK | \
+ KSMBD_DEBUG_IPC | KSMBD_DEBUG_CONN | \
+ KSMBD_DEBUG_RDMA)
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#ifdef SUBMOD_NAME
+#define pr_fmt(fmt) "ksmbd: " SUBMOD_NAME ": " fmt
+#else
+#define pr_fmt(fmt) "ksmbd: " fmt
+#endif
+
+#define ksmbd_debug(type, fmt, ...) \
+ do { \
+ if (ksmbd_debug_types & KSMBD_DEBUG_##type) \
+ pr_info(fmt, ##__VA_ARGS__); \
+ } while (0)
+
+#define UNICODE_LEN(x) ((x) * 2)
+
+#endif /* __KSMBD_GLOB_H */
diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h
new file mode 100644
index 000000000000..2fbe2bc1e093
--- /dev/null
+++ b/fs/ksmbd/ksmbd_netlink.h
@@ -0,0 +1,395 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ *
+ * linux-ksmbd-devel@lists.sourceforge.net
+ */
+
+#ifndef _LINUX_KSMBD_SERVER_H
+#define _LINUX_KSMBD_SERVER_H
+
+#include <linux/types.h>
+
+/*
+ * This is a userspace ABI to communicate data between ksmbd and user IPC
+ * daemon using netlink. This is added to track and cache user account DB
+ * and share configuration info from userspace.
+ *
+ * - KSMBD_EVENT_HEARTBEAT_REQUEST(ksmbd_heartbeat)
+ * This event is to check whether user IPC daemon is alive. If user IPC
+ * daemon is dead, ksmbd keep existing connection till disconnecting and
+ * new connection will be denied.
+ *
+ * - KSMBD_EVENT_STARTING_UP(ksmbd_startup_request)
+ * This event is to receive the information that initializes the ksmbd
+ * server from the user IPC daemon and to start the server. The global
+ * section parameters are given from smb.conf as initialization
+ * information.
+ *
+ * - KSMBD_EVENT_SHUTTING_DOWN(ksmbd_shutdown_request)
+ * This event is to shutdown ksmbd server.
+ *
+ * - KSMBD_EVENT_LOGIN_REQUEST/RESPONSE(ksmbd_login_request/response)
+ * This event is to get user account info to user IPC daemon.
+ *
+ * - KSMBD_EVENT_SHARE_CONFIG_REQUEST/RESPONSE(ksmbd_share_config_request/response)
+ * This event is to get net share configuration info.
+ *
+ * - KSMBD_EVENT_TREE_CONNECT_REQUEST/RESPONSE(ksmbd_tree_connect_request/response)
+ * This event is to get session and tree connect info.
+ *
+ * - KSMBD_EVENT_TREE_DISCONNECT_REQUEST(ksmbd_tree_disconnect_request)
+ * This event is to send tree disconnect info to user IPC daemon.
+ *
+ * - KSMBD_EVENT_LOGOUT_REQUEST(ksmbd_logout_request)
+ * This event is to send logout request to user IPC daemon.
+ *
+ * - KSMBD_EVENT_RPC_REQUEST/RESPONSE(ksmbd_rpc_command)
+ * This event is to make DCE/RPC request like srvsvc, wkssvc, lsarpc,
+ * samr to be processed in userspace.
+ *
+ * - KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST/RESPONSE(ksmbd_spnego_authen_request/response)
+ * This event is to make kerberos authentication to be processed in
+ * userspace.
+ */
+
+#define KSMBD_GENL_NAME "SMBD_GENL"
+#define KSMBD_GENL_VERSION 0x01
+
+#define KSMBD_REQ_MAX_ACCOUNT_NAME_SZ 48
+#define KSMBD_REQ_MAX_HASH_SZ 18
+#define KSMBD_REQ_MAX_SHARE_NAME 64
+
+/*
+ * IPC heartbeat frame to check whether user IPC daemon is alive.
+ */
+struct ksmbd_heartbeat {
+ __u32 handle;
+};
+
+/*
+ * Global config flags.
+ */
+#define KSMBD_GLOBAL_FLAG_INVALID (0)
+#define KSMBD_GLOBAL_FLAG_SMB2_LEASES BIT(0)
+#define KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION BIT(1)
+#define KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL BIT(2)
+
+/*
+ * IPC request for ksmbd server startup
+ */
+struct ksmbd_startup_request {
+ __u32 flags; /* Flags for global config */
+ __s32 signing; /* Signing enabled */
+ __s8 min_prot[16]; /* The minimum SMB protocol version */
+ __s8 max_prot[16]; /* The maximum SMB protocol version */
+ __s8 netbios_name[16];
+ __s8 work_group[64]; /* Workgroup */
+ __s8 server_string[64]; /* Server string */
+ __u16 tcp_port; /* tcp port */
+ __u16 ipc_timeout; /*
+ * specifies the number of seconds
+ * server will wait for the userspace to
+ * reply to heartbeat frames.
+ */
+ __u32 deadtime; /* Number of minutes of inactivity */
+ __u32 file_max; /* Limits the maximum number of open files */
+ __u32 smb2_max_write; /* MAX write size */
+ __u32 smb2_max_read; /* MAX read size */
+ __u32 smb2_max_trans; /* MAX trans size */
+ __u32 share_fake_fscaps; /*
+ * Support some special application that
+ * makes QFSINFO calls to check whether
+ * we set the SPARSE_FILES bit (0x40).
+ */
+ __u32 sub_auth[3]; /* Subauth value for Security ID */
+ __u32 ifc_list_sz; /* interfaces list size */
+ __s8 ____payload[];
+};
+
+#define KSMBD_STARTUP_CONFIG_INTERFACES(s) ((s)->____payload)
+
+/*
+ * IPC request to shutdown ksmbd server.
+ */
+struct ksmbd_shutdown_request {
+ __s32 reserved;
+};
+
+/*
+ * IPC user login request.
+ */
+struct ksmbd_login_request {
+ __u32 handle;
+ __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
+};
+
+/*
+ * IPC user login response.
+ */
+struct ksmbd_login_response {
+ __u32 handle;
+ __u32 gid; /* group id */
+ __u32 uid; /* user id */
+ __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
+ __u16 status;
+ __u16 hash_sz; /* hash size */
+ __s8 hash[KSMBD_REQ_MAX_HASH_SZ]; /* password hash */
+};
+
+/*
+ * IPC request to fetch net share config.
+ */
+struct ksmbd_share_config_request {
+ __u32 handle;
+ __s8 share_name[KSMBD_REQ_MAX_SHARE_NAME]; /* share name */
+};
+
+/*
+ * IPC response to the net share config request.
+ */
+struct ksmbd_share_config_response {
+ __u32 handle;
+ __u32 flags;
+ __u16 create_mask;
+ __u16 directory_mask;
+ __u16 force_create_mode;
+ __u16 force_directory_mode;
+ __u16 force_uid;
+ __u16 force_gid;
+ __u32 veto_list_sz;
+ __s8 ____payload[];
+};
+
+#define KSMBD_SHARE_CONFIG_VETO_LIST(s) ((s)->____payload)
+
+static inline char *
+ksmbd_share_config_path(struct ksmbd_share_config_response *sc)
+{
+ char *p = sc->____payload;
+
+ if (sc->veto_list_sz)
+ p += sc->veto_list_sz + 1;
+
+ return p;
+}
+
+/*
+ * IPC request for tree connection. This request include session and tree
+ * connect info from client.
+ */
+struct ksmbd_tree_connect_request {
+ __u32 handle;
+ __u16 account_flags;
+ __u16 flags;
+ __u64 session_id;
+ __u64 connect_id;
+ __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ];
+ __s8 share[KSMBD_REQ_MAX_SHARE_NAME];
+ __s8 peer_addr[64];
+};
+
+/*
+ * IPC Response structure for tree connection.
+ */
+struct ksmbd_tree_connect_response {
+ __u32 handle;
+ __u16 status;
+ __u16 connection_flags;
+};
+
+/*
+ * IPC Request struture to disconnect tree connection.
+ */
+struct ksmbd_tree_disconnect_request {
+ __u64 session_id; /* session id */
+ __u64 connect_id; /* tree connection id */
+};
+
+/*
+ * IPC Response structure to logout user account.
+ */
+struct ksmbd_logout_request {
+ __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
+};
+
+/*
+ * RPC command structure to send rpc request like srvsvc or wkssvc to
+ * IPC user daemon.
+ */
+struct ksmbd_rpc_command {
+ __u32 handle;
+ __u32 flags;
+ __u32 payload_sz;
+ __u8 payload[];
+};
+
+/*
+ * IPC Request Kerberos authentication
+ */
+struct ksmbd_spnego_authen_request {
+ __u32 handle;
+ __u16 spnego_blob_len; /* the length of spnego_blob */
+ __u8 spnego_blob[0]; /*
+ * the GSS token from SecurityBuffer of
+ * SMB2 SESSION SETUP request
+ */
+};
+
+/*
+ * Response data which includes the GSS token and the session key generated by
+ * user daemon.
+ */
+struct ksmbd_spnego_authen_response {
+ __u32 handle;
+ struct ksmbd_login_response login_response; /*
+ * the login response with
+ * a user identified by the
+ * GSS token from a client
+ */
+ __u16 session_key_len; /* the length of the session key */
+ __u16 spnego_blob_len; /*
+ * the length of the GSS token which will be
+ * stored in SecurityBuffer of SMB2 SESSION
+ * SETUP response
+ */
+ __u8 payload[]; /* session key + AP_REP */
+};
+
+/*
+ * This also used as NETLINK attribute type value.
+ *
+ * NOTE:
+ * Response message type value should be equal to
+ * request message type value + 1.
+ */
+enum ksmbd_event {
+ KSMBD_EVENT_UNSPEC = 0,
+ KSMBD_EVENT_HEARTBEAT_REQUEST,
+
+ KSMBD_EVENT_STARTING_UP,
+ KSMBD_EVENT_SHUTTING_DOWN,
+
+ KSMBD_EVENT_LOGIN_REQUEST,
+ KSMBD_EVENT_LOGIN_RESPONSE = 5,
+
+ KSMBD_EVENT_SHARE_CONFIG_REQUEST,
+ KSMBD_EVENT_SHARE_CONFIG_RESPONSE,
+
+ KSMBD_EVENT_TREE_CONNECT_REQUEST,
+ KSMBD_EVENT_TREE_CONNECT_RESPONSE,
+
+ KSMBD_EVENT_TREE_DISCONNECT_REQUEST = 10,
+
+ KSMBD_EVENT_LOGOUT_REQUEST,
+
+ KSMBD_EVENT_RPC_REQUEST,
+ KSMBD_EVENT_RPC_RESPONSE,
+
+ KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST,
+ KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE = 15,
+
+ KSMBD_EVENT_MAX
+};
+
+/*
+ * Enumeration for IPC tree connect status.
+ */
+enum KSMBD_TREE_CONN_STATUS {
+ KSMBD_TREE_CONN_STATUS_OK = 0,
+ KSMBD_TREE_CONN_STATUS_NOMEM,
+ KSMBD_TREE_CONN_STATUS_NO_SHARE,
+ KSMBD_TREE_CONN_STATUS_NO_USER,
+ KSMBD_TREE_CONN_STATUS_INVALID_USER,
+ KSMBD_TREE_CONN_STATUS_HOST_DENIED = 5,
+ KSMBD_TREE_CONN_STATUS_CONN_EXIST,
+ KSMBD_TREE_CONN_STATUS_TOO_MANY_CONNS,
+ KSMBD_TREE_CONN_STATUS_TOO_MANY_SESSIONS,
+ KSMBD_TREE_CONN_STATUS_ERROR,
+};
+
+/*
+ * User config flags.
+ */
+#define KSMBD_USER_FLAG_INVALID (0)
+#define KSMBD_USER_FLAG_OK BIT(0)
+#define KSMBD_USER_FLAG_BAD_PASSWORD BIT(1)
+#define KSMBD_USER_FLAG_BAD_UID BIT(2)
+#define KSMBD_USER_FLAG_BAD_USER BIT(3)
+#define KSMBD_USER_FLAG_GUEST_ACCOUNT BIT(4)
+
+/*
+ * Share config flags.
+ */
+#define KSMBD_SHARE_FLAG_INVALID (0)
+#define KSMBD_SHARE_FLAG_AVAILABLE BIT(0)
+#define KSMBD_SHARE_FLAG_BROWSEABLE BIT(1)
+#define KSMBD_SHARE_FLAG_WRITEABLE BIT(2)
+#define KSMBD_SHARE_FLAG_READONLY BIT(3)
+#define KSMBD_SHARE_FLAG_GUEST_OK BIT(4)
+#define KSMBD_SHARE_FLAG_GUEST_ONLY BIT(5)
+#define KSMBD_SHARE_FLAG_STORE_DOS_ATTRS BIT(6)
+#define KSMBD_SHARE_FLAG_OPLOCKS BIT(7)
+#define KSMBD_SHARE_FLAG_PIPE BIT(8)
+#define KSMBD_SHARE_FLAG_HIDE_DOT_FILES BIT(9)
+#define KSMBD_SHARE_FLAG_INHERIT_OWNER BIT(10)
+#define KSMBD_SHARE_FLAG_STREAMS BIT(11)
+#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12)
+#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13)
+
+/*
+ * Tree connect request flags.
+ */
+#define KSMBD_TREE_CONN_FLAG_REQUEST_SMB1 (0)
+#define KSMBD_TREE_CONN_FLAG_REQUEST_IPV6 BIT(0)
+#define KSMBD_TREE_CONN_FLAG_REQUEST_SMB2 BIT(1)
+
+/*
+ * Tree connect flags.
+ */
+#define KSMBD_TREE_CONN_FLAG_GUEST_ACCOUNT BIT(0)
+#define KSMBD_TREE_CONN_FLAG_READ_ONLY BIT(1)
+#define KSMBD_TREE_CONN_FLAG_WRITABLE BIT(2)
+#define KSMBD_TREE_CONN_FLAG_ADMIN_ACCOUNT BIT(3)
+
+/*
+ * RPC over IPC.
+ */
+#define KSMBD_RPC_METHOD_RETURN BIT(0)
+#define KSMBD_RPC_SRVSVC_METHOD_INVOKE BIT(1)
+#define KSMBD_RPC_SRVSVC_METHOD_RETURN (KSMBD_RPC_SRVSVC_METHOD_INVOKE | KSMBD_RPC_METHOD_RETURN)
+#define KSMBD_RPC_WKSSVC_METHOD_INVOKE BIT(2)
+#define KSMBD_RPC_WKSSVC_METHOD_RETURN (KSMBD_RPC_WKSSVC_METHOD_INVOKE | KSMBD_RPC_METHOD_RETURN)
+#define KSMBD_RPC_IOCTL_METHOD (BIT(3) | KSMBD_RPC_METHOD_RETURN)
+#define KSMBD_RPC_OPEN_METHOD BIT(4)
+#define KSMBD_RPC_WRITE_METHOD BIT(5)
+#define KSMBD_RPC_READ_METHOD (BIT(6) | KSMBD_RPC_METHOD_RETURN)
+#define KSMBD_RPC_CLOSE_METHOD BIT(7)
+#define KSMBD_RPC_RAP_METHOD (BIT(8) | KSMBD_RPC_METHOD_RETURN)
+#define KSMBD_RPC_RESTRICTED_CONTEXT BIT(9)
+#define KSMBD_RPC_SAMR_METHOD_INVOKE BIT(10)
+#define KSMBD_RPC_SAMR_METHOD_RETURN (KSMBD_RPC_SAMR_METHOD_INVOKE | KSMBD_RPC_METHOD_RETURN)
+#define KSMBD_RPC_LSARPC_METHOD_INVOKE BIT(11)
+#define KSMBD_RPC_LSARPC_METHOD_RETURN (KSMBD_RPC_LSARPC_METHOD_INVOKE | KSMBD_RPC_METHOD_RETURN)
+
+/*
+ * RPC status definitions.
+ */
+#define KSMBD_RPC_OK 0
+#define KSMBD_RPC_EBAD_FUNC 0x00000001
+#define KSMBD_RPC_EACCESS_DENIED 0x00000005
+#define KSMBD_RPC_EBAD_FID 0x00000006
+#define KSMBD_RPC_ENOMEM 0x00000008
+#define KSMBD_RPC_EBAD_DATA 0x0000000D
+#define KSMBD_RPC_ENOTIMPLEMENTED 0x00000040
+#define KSMBD_RPC_EINVALID_PARAMETER 0x00000057
+#define KSMBD_RPC_EMORE_DATA 0x000000EA
+#define KSMBD_RPC_EINVALID_LEVEL 0x0000007C
+#define KSMBD_RPC_SOME_NOT_MAPPED 0x00000107
+
+#define KSMBD_CONFIG_OPT_DISABLED 0
+#define KSMBD_CONFIG_OPT_ENABLED 1
+#define KSMBD_CONFIG_OPT_AUTO 2
+#define KSMBD_CONFIG_OPT_MANDATORY 3
+
+#endif /* _LINUX_KSMBD_SERVER_H */
diff --git a/fs/ksmbd/ksmbd_spnego_negtokeninit.asn1 b/fs/ksmbd/ksmbd_spnego_negtokeninit.asn1
new file mode 100644
index 000000000000..0065f191b54b
--- /dev/null
+++ b/fs/ksmbd/ksmbd_spnego_negtokeninit.asn1
@@ -0,0 +1,31 @@
+GSSAPI ::=
+ [APPLICATION 0] IMPLICIT SEQUENCE {
+ thisMech
+ OBJECT IDENTIFIER ({ksmbd_gssapi_this_mech}),
+ negotiationToken
+ NegotiationToken
+ }
+
+MechType ::= OBJECT IDENTIFIER ({ksmbd_neg_token_init_mech_type})
+
+MechTypeList ::= SEQUENCE OF MechType
+
+NegTokenInit ::=
+ SEQUENCE {
+ mechTypes
+ [0] MechTypeList,
+ reqFlags
+ [1] BIT STRING OPTIONAL,
+ mechToken
+ [2] OCTET STRING OPTIONAL ({ksmbd_neg_token_init_mech_token}),
+ mechListMIC
+ [3] OCTET STRING OPTIONAL
+ }
+
+NegotiationToken ::=
+ CHOICE {
+ negTokenInit
+ [0] NegTokenInit,
+ negTokenTarg
+ [1] ANY
+ }
diff --git a/fs/ksmbd/ksmbd_spnego_negtokentarg.asn1 b/fs/ksmbd/ksmbd_spnego_negtokentarg.asn1
new file mode 100644
index 000000000000..1151933e7b9c
--- /dev/null
+++ b/fs/ksmbd/ksmbd_spnego_negtokentarg.asn1
@@ -0,0 +1,19 @@
+GSSAPI ::=
+ CHOICE {
+ negTokenInit
+ [0] ANY,
+ negTokenTarg
+ [1] NegTokenTarg
+ }
+
+NegTokenTarg ::=
+ SEQUENCE {
+ negResult
+ [0] ENUMERATED OPTIONAL,
+ supportedMech
+ [1] OBJECT IDENTIFIER OPTIONAL,
+ responseToken
+ [2] OCTET STRING OPTIONAL ({ksmbd_neg_token_targ_resp_token}),
+ mechListMIC
+ [3] OCTET STRING OPTIONAL
+ }
diff --git a/fs/ksmbd/ksmbd_work.c b/fs/ksmbd/ksmbd_work.c
new file mode 100644
index 000000000000..fd58eb4809f6
--- /dev/null
+++ b/fs/ksmbd/ksmbd_work.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+#include "server.h"
+#include "connection.h"
+#include "ksmbd_work.h"
+#include "mgmt/ksmbd_ida.h"
+
+static struct kmem_cache *work_cache;
+static struct workqueue_struct *ksmbd_wq;
+
+struct ksmbd_work *ksmbd_alloc_work_struct(void)
+{
+ struct ksmbd_work *work = kmem_cache_zalloc(work_cache, GFP_KERNEL);
+
+ if (work) {
+ work->compound_fid = KSMBD_NO_FID;
+ work->compound_pfid = KSMBD_NO_FID;
+ INIT_LIST_HEAD(&work->request_entry);
+ INIT_LIST_HEAD(&work->async_request_entry);
+ INIT_LIST_HEAD(&work->fp_entry);
+ INIT_LIST_HEAD(&work->interim_entry);
+ }
+ return work;
+}
+
+void ksmbd_free_work_struct(struct ksmbd_work *work)
+{
+ WARN_ON(work->saved_cred != NULL);
+
+ kvfree(work->response_buf);
+ kvfree(work->aux_payload_buf);
+ kfree(work->tr_buf);
+ kvfree(work->request_buf);
+ if (work->async_id)
+ ksmbd_release_id(&work->conn->async_ida, work->async_id);
+ kmem_cache_free(work_cache, work);
+}
+
+void ksmbd_work_pool_destroy(void)
+{
+ kmem_cache_destroy(work_cache);
+}
+
+int ksmbd_work_pool_init(void)
+{
+ work_cache = kmem_cache_create("ksmbd_work_cache",
+ sizeof(struct ksmbd_work), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!work_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+int ksmbd_workqueue_init(void)
+{
+ ksmbd_wq = alloc_workqueue("ksmbd-io", 0, 0);
+ if (!ksmbd_wq)
+ return -ENOMEM;
+ return 0;
+}
+
+void ksmbd_workqueue_destroy(void)
+{
+ flush_workqueue(ksmbd_wq);
+ destroy_workqueue(ksmbd_wq);
+ ksmbd_wq = NULL;
+}
+
+bool ksmbd_queue_work(struct ksmbd_work *work)
+{
+ return queue_work(ksmbd_wq, &work->work);
+}
diff --git a/fs/ksmbd/ksmbd_work.h b/fs/ksmbd/ksmbd_work.h
new file mode 100644
index 000000000000..f7156bc50049
--- /dev/null
+++ b/fs/ksmbd/ksmbd_work.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_WORK_H__
+#define __KSMBD_WORK_H__
+
+#include <linux/ctype.h>
+#include <linux/workqueue.h>
+
+struct ksmbd_conn;
+struct ksmbd_session;
+struct ksmbd_tree_connect;
+
+enum {
+ KSMBD_WORK_ACTIVE = 0,
+ KSMBD_WORK_CANCELLED,
+ KSMBD_WORK_CLOSED,
+};
+
+/* one of these for every pending CIFS request at the connection */
+struct ksmbd_work {
+ /* Server corresponding to this mid */
+ struct ksmbd_conn *conn;
+ struct ksmbd_session *sess;
+ struct ksmbd_tree_connect *tcon;
+
+ /* Pointer to received SMB header */
+ void *request_buf;
+ /* Response buffer */
+ void *response_buf;
+
+ /* Read data buffer */
+ void *aux_payload_buf;
+
+ /* Next cmd hdr in compound req buf*/
+ int next_smb2_rcv_hdr_off;
+ /* Next cmd hdr in compound rsp buf*/
+ int next_smb2_rsp_hdr_off;
+
+ /*
+ * Current Local FID assigned compound response if SMB2 CREATE
+ * command is present in compound request
+ */
+ u64 compound_fid;
+ u64 compound_pfid;
+ u64 compound_sid;
+
+ const struct cred *saved_cred;
+
+ /* Number of granted credits */
+ unsigned int credits_granted;
+
+ /* response smb header size */
+ unsigned int resp_hdr_sz;
+ unsigned int response_sz;
+ /* Read data count */
+ unsigned int aux_payload_sz;
+
+ void *tr_buf;
+
+ unsigned char state;
+ /* Multiple responses for one request e.g. SMB ECHO */
+ bool multiRsp:1;
+ /* No response for cancelled request */
+ bool send_no_response:1;
+ /* Request is encrypted */
+ bool encrypted:1;
+ /* Is this SYNC or ASYNC ksmbd_work */
+ bool syncronous:1;
+ bool need_invalidate_rkey:1;
+
+ unsigned int remote_key;
+ /* cancel works */
+ int async_id;
+ void **cancel_argv;
+ void (*cancel_fn)(void **argv);
+
+ struct work_struct work;
+ /* List head at conn->requests */
+ struct list_head request_entry;
+ /* List head at conn->async_requests */
+ struct list_head async_request_entry;
+ struct list_head fp_entry;
+ struct list_head interim_entry;
+};
+
+/**
+ * ksmbd_resp_buf_next - Get next buffer on compound response.
+ * @work: smb work containing response buffer
+ */
+static inline void *ksmbd_resp_buf_next(struct ksmbd_work *work)
+{
+ return work->response_buf + work->next_smb2_rsp_hdr_off;
+}
+
+/**
+ * ksmbd_req_buf_next - Get next buffer on compound request.
+ * @work: smb work containing response buffer
+ */
+static inline void *ksmbd_req_buf_next(struct ksmbd_work *work)
+{
+ return work->request_buf + work->next_smb2_rcv_hdr_off;
+}
+
+struct ksmbd_work *ksmbd_alloc_work_struct(void);
+void ksmbd_free_work_struct(struct ksmbd_work *work);
+
+void ksmbd_work_pool_destroy(void);
+int ksmbd_work_pool_init(void);
+
+int ksmbd_workqueue_init(void);
+void ksmbd_workqueue_destroy(void);
+bool ksmbd_queue_work(struct ksmbd_work *work);
+
+#endif /* __KSMBD_WORK_H__ */
diff --git a/fs/ksmbd/mgmt/ksmbd_ida.c b/fs/ksmbd/mgmt/ksmbd_ida.c
new file mode 100644
index 000000000000..54194d959a5e
--- /dev/null
+++ b/fs/ksmbd/mgmt/ksmbd_ida.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include "ksmbd_ida.h"
+
+static inline int __acquire_id(struct ida *ida, int from, int to)
+{
+ return ida_simple_get(ida, from, to, GFP_KERNEL);
+}
+
+int ksmbd_acquire_smb2_tid(struct ida *ida)
+{
+ int id;
+
+ id = __acquire_id(ida, 1, 0xFFFFFFFF);
+
+ return id;
+}
+
+int ksmbd_acquire_smb2_uid(struct ida *ida)
+{
+ int id;
+
+ id = __acquire_id(ida, 1, 0);
+ if (id == 0xFFFE)
+ id = __acquire_id(ida, 1, 0);
+
+ return id;
+}
+
+int ksmbd_acquire_async_msg_id(struct ida *ida)
+{
+ return __acquire_id(ida, 1, 0);
+}
+
+int ksmbd_acquire_id(struct ida *ida)
+{
+ return __acquire_id(ida, 0, 0);
+}
+
+void ksmbd_release_id(struct ida *ida, int id)
+{
+ ida_simple_remove(ida, id);
+}
diff --git a/fs/ksmbd/mgmt/ksmbd_ida.h b/fs/ksmbd/mgmt/ksmbd_ida.h
new file mode 100644
index 000000000000..2bc07b16cfde
--- /dev/null
+++ b/fs/ksmbd/mgmt/ksmbd_ida.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_IDA_MANAGEMENT_H__
+#define __KSMBD_IDA_MANAGEMENT_H__
+
+#include <linux/slab.h>
+#include <linux/idr.h>
+
+/*
+ * 2.2.1.6.7 TID Generation
+ * The value 0xFFFF MUST NOT be used as a valid TID. All other
+ * possible values for TID, including zero (0x0000), are valid.
+ * The value 0xFFFF is used to specify all TIDs or no TID,
+ * depending upon the context in which it is used.
+ */
+int ksmbd_acquire_smb2_tid(struct ida *ida);
+
+/*
+ * 2.2.1.6.8 UID Generation
+ * The value 0xFFFE was declared reserved in the LAN Manager 1.0
+ * documentation, so a value of 0xFFFE SHOULD NOT be used as a
+ * valid UID.<21> All other possible values for a UID, excluding
+ * zero (0x0000), are valid.
+ */
+int ksmbd_acquire_smb2_uid(struct ida *ida);
+int ksmbd_acquire_async_msg_id(struct ida *ida);
+
+int ksmbd_acquire_id(struct ida *ida);
+
+void ksmbd_release_id(struct ida *ida, int id);
+#endif /* __KSMBD_IDA_MANAGEMENT_H__ */
diff --git a/fs/ksmbd/mgmt/share_config.c b/fs/ksmbd/mgmt/share_config.c
new file mode 100644
index 000000000000..cb72d30f5b71
--- /dev/null
+++ b/fs/ksmbd/mgmt/share_config.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/rwsem.h>
+#include <linux/parser.h>
+#include <linux/namei.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include "share_config.h"
+#include "user_config.h"
+#include "user_session.h"
+#include "../transport_ipc.h"
+
+#define SHARE_HASH_BITS 3
+static DEFINE_HASHTABLE(shares_table, SHARE_HASH_BITS);
+static DECLARE_RWSEM(shares_table_lock);
+
+struct ksmbd_veto_pattern {
+ char *pattern;
+ struct list_head list;
+};
+
+static unsigned int share_name_hash(char *name)
+{
+ return jhash(name, strlen(name), 0);
+}
+
+static void kill_share(struct ksmbd_share_config *share)
+{
+ while (!list_empty(&share->veto_list)) {
+ struct ksmbd_veto_pattern *p;
+
+ p = list_entry(share->veto_list.next,
+ struct ksmbd_veto_pattern,
+ list);
+ list_del(&p->list);
+ kfree(p->pattern);
+ kfree(p);
+ }
+
+ if (share->path)
+ path_put(&share->vfs_path);
+ kfree(share->name);
+ kfree(share->path);
+ kfree(share);
+}
+
+void __ksmbd_share_config_put(struct ksmbd_share_config *share)
+{
+ down_write(&shares_table_lock);
+ hash_del(&share->hlist);
+ up_write(&shares_table_lock);
+
+ kill_share(share);
+}
+
+static struct ksmbd_share_config *
+__get_share_config(struct ksmbd_share_config *share)
+{
+ if (!atomic_inc_not_zero(&share->refcount))
+ return NULL;
+ return share;
+}
+
+static struct ksmbd_share_config *__share_lookup(char *name)
+{
+ struct ksmbd_share_config *share;
+ unsigned int key = share_name_hash(name);
+
+ hash_for_each_possible(shares_table, share, hlist, key) {
+ if (!strcmp(name, share->name))
+ return share;
+ }
+ return NULL;
+}
+
+static int parse_veto_list(struct ksmbd_share_config *share,
+ char *veto_list,
+ int veto_list_sz)
+{
+ int sz = 0;
+
+ if (!veto_list_sz)
+ return 0;
+
+ while (veto_list_sz > 0) {
+ struct ksmbd_veto_pattern *p;
+
+ sz = strlen(veto_list);
+ if (!sz)
+ break;
+
+ p = kzalloc(sizeof(struct ksmbd_veto_pattern), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ p->pattern = kstrdup(veto_list, GFP_KERNEL);
+ if (!p->pattern) {
+ kfree(p);
+ return -ENOMEM;
+ }
+
+ list_add(&p->list, &share->veto_list);
+
+ veto_list += sz + 1;
+ veto_list_sz -= (sz + 1);
+ }
+
+ return 0;
+}
+
+static struct ksmbd_share_config *share_config_request(char *name)
+{
+ struct ksmbd_share_config_response *resp;
+ struct ksmbd_share_config *share = NULL;
+ struct ksmbd_share_config *lookup;
+ int ret;
+
+ resp = ksmbd_ipc_share_config_request(name);
+ if (!resp)
+ return NULL;
+
+ if (resp->flags == KSMBD_SHARE_FLAG_INVALID)
+ goto out;
+
+ share = kzalloc(sizeof(struct ksmbd_share_config), GFP_KERNEL);
+ if (!share)
+ goto out;
+
+ share->flags = resp->flags;
+ atomic_set(&share->refcount, 1);
+ INIT_LIST_HEAD(&share->veto_list);
+ share->name = kstrdup(name, GFP_KERNEL);
+
+ if (!test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) {
+ share->path = kstrdup(ksmbd_share_config_path(resp),
+ GFP_KERNEL);
+ if (share->path)
+ share->path_sz = strlen(share->path);
+ share->create_mask = resp->create_mask;
+ share->directory_mask = resp->directory_mask;
+ share->force_create_mode = resp->force_create_mode;
+ share->force_directory_mode = resp->force_directory_mode;
+ share->force_uid = resp->force_uid;
+ share->force_gid = resp->force_gid;
+ ret = parse_veto_list(share,
+ KSMBD_SHARE_CONFIG_VETO_LIST(resp),
+ resp->veto_list_sz);
+ if (!ret && share->path) {
+ ret = kern_path(share->path, 0, &share->vfs_path);
+ if (ret) {
+ ksmbd_debug(SMB, "failed to access '%s'\n",
+ share->path);
+ /* Avoid put_path() */
+ kfree(share->path);
+ share->path = NULL;
+ }
+ }
+ if (ret || !share->name) {
+ kill_share(share);
+ share = NULL;
+ goto out;
+ }
+ }
+
+ down_write(&shares_table_lock);
+ lookup = __share_lookup(name);
+ if (lookup)
+ lookup = __get_share_config(lookup);
+ if (!lookup) {
+ hash_add(shares_table, &share->hlist, share_name_hash(name));
+ } else {
+ kill_share(share);
+ share = lookup;
+ }
+ up_write(&shares_table_lock);
+
+out:
+ kvfree(resp);
+ return share;
+}
+
+static void strtolower(char *share_name)
+{
+ while (*share_name) {
+ *share_name = tolower(*share_name);
+ share_name++;
+ }
+}
+
+struct ksmbd_share_config *ksmbd_share_config_get(char *name)
+{
+ struct ksmbd_share_config *share;
+
+ strtolower(name);
+
+ down_read(&shares_table_lock);
+ share = __share_lookup(name);
+ if (share)
+ share = __get_share_config(share);
+ up_read(&shares_table_lock);
+
+ if (share)
+ return share;
+ return share_config_request(name);
+}
+
+bool ksmbd_share_veto_filename(struct ksmbd_share_config *share,
+ const char *filename)
+{
+ struct ksmbd_veto_pattern *p;
+
+ list_for_each_entry(p, &share->veto_list, list) {
+ if (match_wildcard(p->pattern, filename))
+ return true;
+ }
+ return false;
+}
+
+void ksmbd_share_configs_cleanup(void)
+{
+ struct ksmbd_share_config *share;
+ struct hlist_node *tmp;
+ int i;
+
+ down_write(&shares_table_lock);
+ hash_for_each_safe(shares_table, i, tmp, share, hlist) {
+ hash_del(&share->hlist);
+ kill_share(share);
+ }
+ up_write(&shares_table_lock);
+}
diff --git a/fs/ksmbd/mgmt/share_config.h b/fs/ksmbd/mgmt/share_config.h
new file mode 100644
index 000000000000..953befc94e84
--- /dev/null
+++ b/fs/ksmbd/mgmt/share_config.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __SHARE_CONFIG_MANAGEMENT_H__
+#define __SHARE_CONFIG_MANAGEMENT_H__
+
+#include <linux/workqueue.h>
+#include <linux/hashtable.h>
+#include <linux/path.h>
+
+struct ksmbd_share_config {
+ char *name;
+ char *path;
+
+ unsigned int path_sz;
+ unsigned int flags;
+ struct list_head veto_list;
+
+ struct path vfs_path;
+
+ atomic_t refcount;
+ struct hlist_node hlist;
+ unsigned short create_mask;
+ unsigned short directory_mask;
+ unsigned short force_create_mode;
+ unsigned short force_directory_mode;
+ unsigned short force_uid;
+ unsigned short force_gid;
+};
+
+#define KSMBD_SHARE_INVALID_UID ((__u16)-1)
+#define KSMBD_SHARE_INVALID_GID ((__u16)-1)
+
+static inline int share_config_create_mode(struct ksmbd_share_config *share,
+ umode_t posix_mode)
+{
+ if (!share->force_create_mode) {
+ if (!posix_mode)
+ return share->create_mask;
+ else
+ return posix_mode & share->create_mask;
+ }
+ return share->force_create_mode & share->create_mask;
+}
+
+static inline int share_config_directory_mode(struct ksmbd_share_config *share,
+ umode_t posix_mode)
+{
+ if (!share->force_directory_mode) {
+ if (!posix_mode)
+ return share->directory_mask;
+ else
+ return posix_mode & share->directory_mask;
+ }
+
+ return share->force_directory_mode & share->directory_mask;
+}
+
+static inline int test_share_config_flag(struct ksmbd_share_config *share,
+ int flag)
+{
+ return share->flags & flag;
+}
+
+void __ksmbd_share_config_put(struct ksmbd_share_config *share);
+
+static inline void ksmbd_share_config_put(struct ksmbd_share_config *share)
+{
+ if (!atomic_dec_and_test(&share->refcount))
+ return;
+ __ksmbd_share_config_put(share);
+}
+
+struct ksmbd_share_config *ksmbd_share_config_get(char *name);
+bool ksmbd_share_veto_filename(struct ksmbd_share_config *share,
+ const char *filename);
+void ksmbd_share_configs_cleanup(void);
+
+#endif /* __SHARE_CONFIG_MANAGEMENT_H__ */
diff --git a/fs/ksmbd/mgmt/tree_connect.c b/fs/ksmbd/mgmt/tree_connect.c
new file mode 100644
index 000000000000..0d28e723a28c
--- /dev/null
+++ b/fs/ksmbd/mgmt/tree_connect.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+
+#include "../transport_ipc.h"
+#include "../connection.h"
+
+#include "tree_connect.h"
+#include "user_config.h"
+#include "share_config.h"
+#include "user_session.h"
+
+struct ksmbd_tree_conn_status
+ksmbd_tree_conn_connect(struct ksmbd_session *sess, char *share_name)
+{
+ struct ksmbd_tree_conn_status status = {-EINVAL, NULL};
+ struct ksmbd_tree_connect_response *resp = NULL;
+ struct ksmbd_share_config *sc;
+ struct ksmbd_tree_connect *tree_conn = NULL;
+ struct sockaddr *peer_addr;
+ int ret;
+
+ sc = ksmbd_share_config_get(share_name);
+ if (!sc)
+ return status;
+
+ tree_conn = kzalloc(sizeof(struct ksmbd_tree_connect), GFP_KERNEL);
+ if (!tree_conn) {
+ status.ret = -ENOMEM;
+ goto out_error;
+ }
+
+ tree_conn->id = ksmbd_acquire_tree_conn_id(sess);
+ if (tree_conn->id < 0) {
+ status.ret = -EINVAL;
+ goto out_error;
+ }
+
+ peer_addr = KSMBD_TCP_PEER_SOCKADDR(sess->conn);
+ resp = ksmbd_ipc_tree_connect_request(sess,
+ sc,
+ tree_conn,
+ peer_addr);
+ if (!resp) {
+ status.ret = -EINVAL;
+ goto out_error;
+ }
+
+ status.ret = resp->status;
+ if (status.ret != KSMBD_TREE_CONN_STATUS_OK)
+ goto out_error;
+
+ tree_conn->flags = resp->connection_flags;
+ tree_conn->user = sess->user;
+ tree_conn->share_conf = sc;
+ status.tree_conn = tree_conn;
+
+ ret = xa_err(xa_store(&sess->tree_conns, tree_conn->id, tree_conn,
+ GFP_KERNEL));
+ if (ret) {
+ status.ret = -ENOMEM;
+ goto out_error;
+ }
+ kvfree(resp);
+ return status;
+
+out_error:
+ if (tree_conn)
+ ksmbd_release_tree_conn_id(sess, tree_conn->id);
+ ksmbd_share_config_put(sc);
+ kfree(tree_conn);
+ kvfree(resp);
+ return status;
+}
+
+int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess,
+ struct ksmbd_tree_connect *tree_conn)
+{
+ int ret;
+
+ ret = ksmbd_ipc_tree_disconnect_request(sess->id, tree_conn->id);
+ ksmbd_release_tree_conn_id(sess, tree_conn->id);
+ xa_erase(&sess->tree_conns, tree_conn->id);
+ ksmbd_share_config_put(tree_conn->share_conf);
+ kfree(tree_conn);
+ return ret;
+}
+
+struct ksmbd_tree_connect *ksmbd_tree_conn_lookup(struct ksmbd_session *sess,
+ unsigned int id)
+{
+ return xa_load(&sess->tree_conns, id);
+}
+
+struct ksmbd_share_config *ksmbd_tree_conn_share(struct ksmbd_session *sess,
+ unsigned int id)
+{
+ struct ksmbd_tree_connect *tc;
+
+ tc = ksmbd_tree_conn_lookup(sess, id);
+ if (tc)
+ return tc->share_conf;
+ return NULL;
+}
+
+int ksmbd_tree_conn_session_logoff(struct ksmbd_session *sess)
+{
+ int ret = 0;
+ struct ksmbd_tree_connect *tc;
+ unsigned long id;
+
+ xa_for_each(&sess->tree_conns, id, tc)
+ ret |= ksmbd_tree_conn_disconnect(sess, tc);
+ xa_destroy(&sess->tree_conns);
+ return ret;
+}
diff --git a/fs/ksmbd/mgmt/tree_connect.h b/fs/ksmbd/mgmt/tree_connect.h
new file mode 100644
index 000000000000..18e2a996e0aa
--- /dev/null
+++ b/fs/ksmbd/mgmt/tree_connect.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __TREE_CONNECT_MANAGEMENT_H__
+#define __TREE_CONNECT_MANAGEMENT_H__
+
+#include <linux/hashtable.h>
+
+#include "../ksmbd_netlink.h"
+
+struct ksmbd_share_config;
+struct ksmbd_user;
+
+struct ksmbd_tree_connect {
+ int id;
+
+ unsigned int flags;
+ struct ksmbd_share_config *share_conf;
+ struct ksmbd_user *user;
+
+ struct list_head list;
+
+ int maximal_access;
+ bool posix_extensions;
+};
+
+struct ksmbd_tree_conn_status {
+ unsigned int ret;
+ struct ksmbd_tree_connect *tree_conn;
+};
+
+static inline int test_tree_conn_flag(struct ksmbd_tree_connect *tree_conn,
+ int flag)
+{
+ return tree_conn->flags & flag;
+}
+
+struct ksmbd_session;
+
+struct ksmbd_tree_conn_status
+ksmbd_tree_conn_connect(struct ksmbd_session *sess, char *share_name);
+
+int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess,
+ struct ksmbd_tree_connect *tree_conn);
+
+struct ksmbd_tree_connect *ksmbd_tree_conn_lookup(struct ksmbd_session *sess,
+ unsigned int id);
+
+struct ksmbd_share_config *ksmbd_tree_conn_share(struct ksmbd_session *sess,
+ unsigned int id);
+
+int ksmbd_tree_conn_session_logoff(struct ksmbd_session *sess);
+
+#endif /* __TREE_CONNECT_MANAGEMENT_H__ */
diff --git a/fs/ksmbd/mgmt/user_config.c b/fs/ksmbd/mgmt/user_config.c
new file mode 100644
index 000000000000..d21629ae5c89
--- /dev/null
+++ b/fs/ksmbd/mgmt/user_config.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include "user_config.h"
+#include "../transport_ipc.h"
+
+struct ksmbd_user *ksmbd_login_user(const char *account)
+{
+ struct ksmbd_login_response *resp;
+ struct ksmbd_user *user = NULL;
+
+ resp = ksmbd_ipc_login_request(account);
+ if (!resp)
+ return NULL;
+
+ if (!(resp->status & KSMBD_USER_FLAG_OK))
+ goto out;
+
+ user = ksmbd_alloc_user(resp);
+out:
+ kvfree(resp);
+ return user;
+}
+
+struct ksmbd_user *ksmbd_alloc_user(struct ksmbd_login_response *resp)
+{
+ struct ksmbd_user *user = NULL;
+
+ user = kmalloc(sizeof(struct ksmbd_user), GFP_KERNEL);
+ if (!user)
+ return NULL;
+
+ user->name = kstrdup(resp->account, GFP_KERNEL);
+ user->flags = resp->status;
+ user->gid = resp->gid;
+ user->uid = resp->uid;
+ user->passkey_sz = resp->hash_sz;
+ user->passkey = kmalloc(resp->hash_sz, GFP_KERNEL);
+ if (user->passkey)
+ memcpy(user->passkey, resp->hash, resp->hash_sz);
+
+ if (!user->name || !user->passkey) {
+ kfree(user->name);
+ kfree(user->passkey);
+ kfree(user);
+ user = NULL;
+ }
+ return user;
+}
+
+void ksmbd_free_user(struct ksmbd_user *user)
+{
+ ksmbd_ipc_logout_request(user->name);
+ kfree(user->name);
+ kfree(user->passkey);
+ kfree(user);
+}
+
+int ksmbd_anonymous_user(struct ksmbd_user *user)
+{
+ if (user->name[0] == '\0')
+ return 1;
+ return 0;
+}
diff --git a/fs/ksmbd/mgmt/user_config.h b/fs/ksmbd/mgmt/user_config.h
new file mode 100644
index 000000000000..b2bb074a0150
--- /dev/null
+++ b/fs/ksmbd/mgmt/user_config.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __USER_CONFIG_MANAGEMENT_H__
+#define __USER_CONFIG_MANAGEMENT_H__
+
+#include "../glob.h"
+
+struct ksmbd_user {
+ unsigned short flags;
+
+ unsigned int uid;
+ unsigned int gid;
+
+ char *name;
+
+ size_t passkey_sz;
+ char *passkey;
+};
+
+static inline bool user_guest(struct ksmbd_user *user)
+{
+ return user->flags & KSMBD_USER_FLAG_GUEST_ACCOUNT;
+}
+
+static inline void set_user_flag(struct ksmbd_user *user, int flag)
+{
+ user->flags |= flag;
+}
+
+static inline int test_user_flag(struct ksmbd_user *user, int flag)
+{
+ return user->flags & flag;
+}
+
+static inline void set_user_guest(struct ksmbd_user *user)
+{
+}
+
+static inline char *user_passkey(struct ksmbd_user *user)
+{
+ return user->passkey;
+}
+
+static inline char *user_name(struct ksmbd_user *user)
+{
+ return user->name;
+}
+
+static inline unsigned int user_uid(struct ksmbd_user *user)
+{
+ return user->uid;
+}
+
+static inline unsigned int user_gid(struct ksmbd_user *user)
+{
+ return user->gid;
+}
+
+struct ksmbd_user *ksmbd_login_user(const char *account);
+struct ksmbd_user *ksmbd_alloc_user(struct ksmbd_login_response *resp);
+void ksmbd_free_user(struct ksmbd_user *user);
+int ksmbd_anonymous_user(struct ksmbd_user *user);
+#endif /* __USER_CONFIG_MANAGEMENT_H__ */
diff --git a/fs/ksmbd/mgmt/user_session.c b/fs/ksmbd/mgmt/user_session.c
new file mode 100644
index 000000000000..8d8ffd8c6f19
--- /dev/null
+++ b/fs/ksmbd/mgmt/user_session.c
@@ -0,0 +1,369 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/rwsem.h>
+#include <linux/xarray.h>
+
+#include "ksmbd_ida.h"
+#include "user_session.h"
+#include "user_config.h"
+#include "tree_connect.h"
+#include "../transport_ipc.h"
+#include "../connection.h"
+#include "../vfs_cache.h"
+
+static DEFINE_IDA(session_ida);
+
+#define SESSION_HASH_BITS 3
+static DEFINE_HASHTABLE(sessions_table, SESSION_HASH_BITS);
+static DECLARE_RWSEM(sessions_table_lock);
+
+struct ksmbd_session_rpc {
+ int id;
+ unsigned int method;
+ struct list_head list;
+};
+
+static void free_channel_list(struct ksmbd_session *sess)
+{
+ struct channel *chann, *tmp;
+
+ list_for_each_entry_safe(chann, tmp, &sess->ksmbd_chann_list,
+ chann_list) {
+ list_del(&chann->chann_list);
+ kfree(chann);
+ }
+}
+
+static void __session_rpc_close(struct ksmbd_session *sess,
+ struct ksmbd_session_rpc *entry)
+{
+ struct ksmbd_rpc_command *resp;
+
+ resp = ksmbd_rpc_close(sess, entry->id);
+ if (!resp)
+ pr_err("Unable to close RPC pipe %d\n", entry->id);
+
+ kvfree(resp);
+ ksmbd_rpc_id_free(entry->id);
+ kfree(entry);
+}
+
+static void ksmbd_session_rpc_clear_list(struct ksmbd_session *sess)
+{
+ struct ksmbd_session_rpc *entry;
+
+ while (!list_empty(&sess->rpc_handle_list)) {
+ entry = list_entry(sess->rpc_handle_list.next,
+ struct ksmbd_session_rpc,
+ list);
+
+ list_del(&entry->list);
+ __session_rpc_close(sess, entry);
+ }
+}
+
+static int __rpc_method(char *rpc_name)
+{
+ if (!strcmp(rpc_name, "\\srvsvc") || !strcmp(rpc_name, "srvsvc"))
+ return KSMBD_RPC_SRVSVC_METHOD_INVOKE;
+
+ if (!strcmp(rpc_name, "\\wkssvc") || !strcmp(rpc_name, "wkssvc"))
+ return KSMBD_RPC_WKSSVC_METHOD_INVOKE;
+
+ if (!strcmp(rpc_name, "LANMAN") || !strcmp(rpc_name, "lanman"))
+ return KSMBD_RPC_RAP_METHOD;
+
+ if (!strcmp(rpc_name, "\\samr") || !strcmp(rpc_name, "samr"))
+ return KSMBD_RPC_SAMR_METHOD_INVOKE;
+
+ if (!strcmp(rpc_name, "\\lsarpc") || !strcmp(rpc_name, "lsarpc"))
+ return KSMBD_RPC_LSARPC_METHOD_INVOKE;
+
+ pr_err("Unsupported RPC: %s\n", rpc_name);
+ return 0;
+}
+
+int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name)
+{
+ struct ksmbd_session_rpc *entry;
+ struct ksmbd_rpc_command *resp;
+ int method;
+
+ method = __rpc_method(rpc_name);
+ if (!method)
+ return -EINVAL;
+
+ entry = kzalloc(sizeof(struct ksmbd_session_rpc), GFP_KERNEL);
+ if (!entry)
+ return -EINVAL;
+
+ list_add(&entry->list, &sess->rpc_handle_list);
+ entry->method = method;
+ entry->id = ksmbd_ipc_id_alloc();
+ if (entry->id < 0)
+ goto error;
+
+ resp = ksmbd_rpc_open(sess, entry->id);
+ if (!resp)
+ goto error;
+
+ kvfree(resp);
+ return entry->id;
+error:
+ list_del(&entry->list);
+ kfree(entry);
+ return -EINVAL;
+}
+
+void ksmbd_session_rpc_close(struct ksmbd_session *sess, int id)
+{
+ struct ksmbd_session_rpc *entry;
+
+ list_for_each_entry(entry, &sess->rpc_handle_list, list) {
+ if (entry->id == id) {
+ list_del(&entry->list);
+ __session_rpc_close(sess, entry);
+ break;
+ }
+ }
+}
+
+int ksmbd_session_rpc_method(struct ksmbd_session *sess, int id)
+{
+ struct ksmbd_session_rpc *entry;
+
+ list_for_each_entry(entry, &sess->rpc_handle_list, list) {
+ if (entry->id == id)
+ return entry->method;
+ }
+ return 0;
+}
+
+void ksmbd_session_destroy(struct ksmbd_session *sess)
+{
+ if (!sess)
+ return;
+
+ if (!atomic_dec_and_test(&sess->refcnt))
+ return;
+
+ list_del(&sess->sessions_entry);
+
+ down_write(&sessions_table_lock);
+ hash_del(&sess->hlist);
+ up_write(&sessions_table_lock);
+
+ if (sess->user)
+ ksmbd_free_user(sess->user);
+
+ ksmbd_tree_conn_session_logoff(sess);
+ ksmbd_destroy_file_table(&sess->file_table);
+ ksmbd_session_rpc_clear_list(sess);
+ free_channel_list(sess);
+ kfree(sess->Preauth_HashValue);
+ ksmbd_release_id(&session_ida, sess->id);
+ kfree(sess);
+}
+
+static struct ksmbd_session *__session_lookup(unsigned long long id)
+{
+ struct ksmbd_session *sess;
+
+ hash_for_each_possible(sessions_table, sess, hlist, id) {
+ if (id == sess->id)
+ return sess;
+ }
+ return NULL;
+}
+
+void ksmbd_session_register(struct ksmbd_conn *conn,
+ struct ksmbd_session *sess)
+{
+ sess->conn = conn;
+ list_add(&sess->sessions_entry, &conn->sessions);
+}
+
+void ksmbd_sessions_deregister(struct ksmbd_conn *conn)
+{
+ struct ksmbd_session *sess;
+
+ while (!list_empty(&conn->sessions)) {
+ sess = list_entry(conn->sessions.next,
+ struct ksmbd_session,
+ sessions_entry);
+
+ ksmbd_session_destroy(sess);
+ }
+}
+
+static bool ksmbd_session_id_match(struct ksmbd_session *sess,
+ unsigned long long id)
+{
+ return sess->id == id;
+}
+
+struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn,
+ unsigned long long id)
+{
+ struct ksmbd_session *sess = NULL;
+
+ list_for_each_entry(sess, &conn->sessions, sessions_entry) {
+ if (ksmbd_session_id_match(sess, id))
+ return sess;
+ }
+ return NULL;
+}
+
+int get_session(struct ksmbd_session *sess)
+{
+ return atomic_inc_not_zero(&sess->refcnt);
+}
+
+void put_session(struct ksmbd_session *sess)
+{
+ if (atomic_dec_and_test(&sess->refcnt))
+ pr_err("get/%s seems to be mismatched.", __func__);
+}
+
+struct ksmbd_session *ksmbd_session_lookup_slowpath(unsigned long long id)
+{
+ struct ksmbd_session *sess;
+
+ down_read(&sessions_table_lock);
+ sess = __session_lookup(id);
+ if (sess) {
+ if (!get_session(sess))
+ sess = NULL;
+ }
+ up_read(&sessions_table_lock);
+
+ return sess;
+}
+
+struct ksmbd_session *ksmbd_session_lookup_all(struct ksmbd_conn *conn,
+ unsigned long long id)
+{
+ struct ksmbd_session *sess;
+
+ sess = ksmbd_session_lookup(conn, id);
+ if (!sess && conn->binding)
+ sess = ksmbd_session_lookup_slowpath(id);
+ return sess;
+}
+
+struct preauth_session *ksmbd_preauth_session_alloc(struct ksmbd_conn *conn,
+ u64 sess_id)
+{
+ struct preauth_session *sess;
+
+ sess = kmalloc(sizeof(struct preauth_session), GFP_KERNEL);
+ if (!sess)
+ return NULL;
+
+ sess->id = sess_id;
+ memcpy(sess->Preauth_HashValue, conn->preauth_info->Preauth_HashValue,
+ PREAUTH_HASHVALUE_SIZE);
+ list_add(&sess->preauth_entry, &conn->preauth_sess_table);
+
+ return sess;
+}
+
+static bool ksmbd_preauth_session_id_match(struct preauth_session *sess,
+ unsigned long long id)
+{
+ return sess->id == id;
+}
+
+struct preauth_session *ksmbd_preauth_session_lookup(struct ksmbd_conn *conn,
+ unsigned long long id)
+{
+ struct preauth_session *sess = NULL;
+
+ list_for_each_entry(sess, &conn->preauth_sess_table, preauth_entry) {
+ if (ksmbd_preauth_session_id_match(sess, id))
+ return sess;
+ }
+ return NULL;
+}
+
+static int __init_smb2_session(struct ksmbd_session *sess)
+{
+ int id = ksmbd_acquire_smb2_uid(&session_ida);
+
+ if (id < 0)
+ return -EINVAL;
+ sess->id = id;
+ return 0;
+}
+
+static struct ksmbd_session *__session_create(int protocol)
+{
+ struct ksmbd_session *sess;
+ int ret;
+
+ sess = kzalloc(sizeof(struct ksmbd_session), GFP_KERNEL);
+ if (!sess)
+ return NULL;
+
+ if (ksmbd_init_file_table(&sess->file_table))
+ goto error;
+
+ set_session_flag(sess, protocol);
+ INIT_LIST_HEAD(&sess->sessions_entry);
+ xa_init(&sess->tree_conns);
+ INIT_LIST_HEAD(&sess->ksmbd_chann_list);
+ INIT_LIST_HEAD(&sess->rpc_handle_list);
+ sess->sequence_number = 1;
+ atomic_set(&sess->refcnt, 1);
+
+ switch (protocol) {
+ case CIFDS_SESSION_FLAG_SMB2:
+ ret = __init_smb2_session(sess);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret)
+ goto error;
+
+ ida_init(&sess->tree_conn_ida);
+
+ if (protocol == CIFDS_SESSION_FLAG_SMB2) {
+ down_write(&sessions_table_lock);
+ hash_add(sessions_table, &sess->hlist, sess->id);
+ up_write(&sessions_table_lock);
+ }
+ return sess;
+
+error:
+ ksmbd_session_destroy(sess);
+ return NULL;
+}
+
+struct ksmbd_session *ksmbd_smb2_session_create(void)
+{
+ return __session_create(CIFDS_SESSION_FLAG_SMB2);
+}
+
+int ksmbd_acquire_tree_conn_id(struct ksmbd_session *sess)
+{
+ int id = -EINVAL;
+
+ if (test_session_flag(sess, CIFDS_SESSION_FLAG_SMB2))
+ id = ksmbd_acquire_smb2_tid(&sess->tree_conn_ida);
+
+ return id;
+}
+
+void ksmbd_release_tree_conn_id(struct ksmbd_session *sess, int id)
+{
+ if (id >= 0)
+ ksmbd_release_id(&sess->tree_conn_ida, id);
+}
diff --git a/fs/ksmbd/mgmt/user_session.h b/fs/ksmbd/mgmt/user_session.h
new file mode 100644
index 000000000000..82289c3cbd2b
--- /dev/null
+++ b/fs/ksmbd/mgmt/user_session.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __USER_SESSION_MANAGEMENT_H__
+#define __USER_SESSION_MANAGEMENT_H__
+
+#include <linux/hashtable.h>
+#include <linux/xarray.h>
+
+#include "../smb_common.h"
+#include "../ntlmssp.h"
+
+#define CIFDS_SESSION_FLAG_SMB2 BIT(1)
+
+#define PREAUTH_HASHVALUE_SIZE 64
+
+struct ksmbd_file_table;
+
+struct channel {
+ __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
+ struct ksmbd_conn *conn;
+ struct list_head chann_list;
+};
+
+struct preauth_session {
+ __u8 Preauth_HashValue[PREAUTH_HASHVALUE_SIZE];
+ u64 id;
+ struct list_head preauth_entry;
+};
+
+struct ksmbd_session {
+ u64 id;
+
+ struct ksmbd_user *user;
+ struct ksmbd_conn *conn;
+ unsigned int sequence_number;
+ unsigned int flags;
+
+ bool sign;
+ bool enc;
+ bool is_anonymous;
+
+ int state;
+ __u8 *Preauth_HashValue;
+
+ struct ntlmssp_auth ntlmssp;
+ char sess_key[CIFS_KEY_SIZE];
+
+ struct hlist_node hlist;
+ struct list_head ksmbd_chann_list;
+ struct xarray tree_conns;
+ struct ida tree_conn_ida;
+ struct list_head rpc_handle_list;
+
+ __u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE];
+ __u8 smb3decryptionkey[SMB3_ENC_DEC_KEY_SIZE];
+ __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
+
+ struct list_head sessions_entry;
+ struct ksmbd_file_table file_table;
+ atomic_t refcnt;
+};
+
+static inline int test_session_flag(struct ksmbd_session *sess, int bit)
+{
+ return sess->flags & bit;
+}
+
+static inline void set_session_flag(struct ksmbd_session *sess, int bit)
+{
+ sess->flags |= bit;
+}
+
+static inline void clear_session_flag(struct ksmbd_session *sess, int bit)
+{
+ sess->flags &= ~bit;
+}
+
+struct ksmbd_session *ksmbd_smb2_session_create(void);
+
+void ksmbd_session_destroy(struct ksmbd_session *sess);
+
+struct ksmbd_session *ksmbd_session_lookup_slowpath(unsigned long long id);
+struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn,
+ unsigned long long id);
+void ksmbd_session_register(struct ksmbd_conn *conn,
+ struct ksmbd_session *sess);
+void ksmbd_sessions_deregister(struct ksmbd_conn *conn);
+struct ksmbd_session *ksmbd_session_lookup_all(struct ksmbd_conn *conn,
+ unsigned long long id);
+struct preauth_session *ksmbd_preauth_session_alloc(struct ksmbd_conn *conn,
+ u64 sess_id);
+struct preauth_session *ksmbd_preauth_session_lookup(struct ksmbd_conn *conn,
+ unsigned long long id);
+
+int ksmbd_acquire_tree_conn_id(struct ksmbd_session *sess);
+void ksmbd_release_tree_conn_id(struct ksmbd_session *sess, int id);
+
+int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name);
+void ksmbd_session_rpc_close(struct ksmbd_session *sess, int id);
+int ksmbd_session_rpc_method(struct ksmbd_session *sess, int id);
+int get_session(struct ksmbd_session *sess);
+void put_session(struct ksmbd_session *sess);
+#endif /* __USER_SESSION_MANAGEMENT_H__ */
diff --git a/fs/ksmbd/misc.c b/fs/ksmbd/misc.c
new file mode 100644
index 000000000000..0b307ca28a19
--- /dev/null
+++ b/fs/ksmbd/misc.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/xattr.h>
+#include <linux/fs.h>
+
+#include "misc.h"
+#include "smb_common.h"
+#include "connection.h"
+#include "vfs.h"
+
+#include "mgmt/share_config.h"
+
+/**
+ * match_pattern() - compare a string with a pattern which might include
+ * wildcard '*' and '?'
+ * TODO : implement consideration about DOS_DOT, DOS_QM and DOS_STAR
+ *
+ * @string: string to compare with a pattern
+ * @len: string length
+ * @pattern: pattern string which might include wildcard '*' and '?'
+ *
+ * Return: 0 if pattern matched with the string, otherwise non zero value
+ */
+int match_pattern(const char *str, size_t len, const char *pattern)
+{
+ const char *s = str;
+ const char *p = pattern;
+ bool star = false;
+
+ while (*s && len) {
+ switch (*p) {
+ case '?':
+ s++;
+ len--;
+ p++;
+ break;
+ case '*':
+ star = true;
+ str = s;
+ if (!*++p)
+ return true;
+ pattern = p;
+ break;
+ default:
+ if (tolower(*s) == tolower(*p)) {
+ s++;
+ len--;
+ p++;
+ } else {
+ if (!star)
+ return false;
+ str++;
+ s = str;
+ p = pattern;
+ }
+ break;
+ }
+ }
+
+ if (*p == '*')
+ ++p;
+ return !*p;
+}
+
+/*
+ * is_char_allowed() - check for valid character
+ * @ch: input character to be checked
+ *
+ * Return: 1 if char is allowed, otherwise 0
+ */
+static inline int is_char_allowed(char ch)
+{
+ /* check for control chars, wildcards etc. */
+ if (!(ch & 0x80) &&
+ (ch <= 0x1f ||
+ ch == '?' || ch == '"' || ch == '<' ||
+ ch == '>' || ch == '|' || ch == '*'))
+ return 0;
+
+ return 1;
+}
+
+int ksmbd_validate_filename(char *filename)
+{
+ while (*filename) {
+ char c = *filename;
+
+ filename++;
+ if (!is_char_allowed(c)) {
+ ksmbd_debug(VFS, "File name validation failed: 0x%x\n", c);
+ return -ENOENT;
+ }
+ }
+
+ return 0;
+}
+
+static int ksmbd_validate_stream_name(char *stream_name)
+{
+ while (*stream_name) {
+ char c = *stream_name;
+
+ stream_name++;
+ if (c == '/' || c == ':' || c == '\\') {
+ pr_err("Stream name validation failed: %c\n", c);
+ return -ENOENT;
+ }
+ }
+
+ return 0;
+}
+
+int parse_stream_name(char *filename, char **stream_name, int *s_type)
+{
+ char *stream_type;
+ char *s_name;
+ int rc = 0;
+
+ s_name = filename;
+ filename = strsep(&s_name, ":");
+ ksmbd_debug(SMB, "filename : %s, streams : %s\n", filename, s_name);
+ if (strchr(s_name, ':')) {
+ stream_type = s_name;
+ s_name = strsep(&stream_type, ":");
+
+ rc = ksmbd_validate_stream_name(s_name);
+ if (rc < 0) {
+ rc = -ENOENT;
+ goto out;
+ }
+
+ ksmbd_debug(SMB, "stream name : %s, stream type : %s\n", s_name,
+ stream_type);
+ if (!strncasecmp("$data", stream_type, 5))
+ *s_type = DATA_STREAM;
+ else if (!strncasecmp("$index_allocation", stream_type, 17))
+ *s_type = DIR_STREAM;
+ else
+ rc = -ENOENT;
+ }
+
+ *stream_name = s_name;
+out:
+ return rc;
+}
+
+/**
+ * convert_to_nt_pathname() - extract and return windows path string
+ * whose share directory prefix was removed from file path
+ * @filename : unix filename
+ * @sharepath: share path string
+ *
+ * Return : windows path string or error
+ */
+
+char *convert_to_nt_pathname(char *filename, char *sharepath)
+{
+ char *ab_pathname;
+ int len, name_len;
+
+ name_len = strlen(filename);
+ ab_pathname = kmalloc(name_len, GFP_KERNEL);
+ if (!ab_pathname)
+ return NULL;
+
+ ab_pathname[0] = '\\';
+ ab_pathname[1] = '\0';
+
+ len = strlen(sharepath);
+ if (!strncmp(filename, sharepath, len) && name_len != len) {
+ strscpy(ab_pathname, &filename[len], name_len);
+ ksmbd_conv_path_to_windows(ab_pathname);
+ }
+
+ return ab_pathname;
+}
+
+int get_nlink(struct kstat *st)
+{
+ int nlink;
+
+ nlink = st->nlink;
+ if (S_ISDIR(st->mode))
+ nlink--;
+
+ return nlink;
+}
+
+void ksmbd_conv_path_to_unix(char *path)
+{
+ strreplace(path, '\\', '/');
+}
+
+void ksmbd_strip_last_slash(char *path)
+{
+ int len = strlen(path);
+
+ while (len && path[len - 1] == '/') {
+ path[len - 1] = '\0';
+ len--;
+ }
+}
+
+void ksmbd_conv_path_to_windows(char *path)
+{
+ strreplace(path, '/', '\\');
+}
+
+/**
+ * ksmbd_extract_sharename() - get share name from tree connect request
+ * @treename: buffer containing tree name and share name
+ *
+ * Return: share name on success, otherwise error
+ */
+char *ksmbd_extract_sharename(char *treename)
+{
+ char *name = treename;
+ char *dst;
+ char *pos = strrchr(name, '\\');
+
+ if (pos)
+ name = (pos + 1);
+
+ /* caller has to free the memory */
+ dst = kstrdup(name, GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+ return dst;
+}
+
+/**
+ * convert_to_unix_name() - convert windows name to unix format
+ * @path: name to be converted
+ * @tid: tree id of mathing share
+ *
+ * Return: converted name on success, otherwise NULL
+ */
+char *convert_to_unix_name(struct ksmbd_share_config *share, char *name)
+{
+ int no_slash = 0, name_len, path_len;
+ char *new_name;
+
+ if (name[0] == '/')
+ name++;
+
+ path_len = share->path_sz;
+ name_len = strlen(name);
+ new_name = kmalloc(path_len + name_len + 2, GFP_KERNEL);
+ if (!new_name)
+ return new_name;
+
+ memcpy(new_name, share->path, path_len);
+ if (new_name[path_len - 1] != '/') {
+ new_name[path_len] = '/';
+ no_slash = 1;
+ }
+
+ memcpy(new_name + path_len + no_slash, name, name_len);
+ path_len += name_len + no_slash;
+ new_name[path_len] = 0x00;
+ return new_name;
+}
+
+char *ksmbd_convert_dir_info_name(struct ksmbd_dir_info *d_info,
+ const struct nls_table *local_nls,
+ int *conv_len)
+{
+ char *conv;
+ int sz = min(4 * d_info->name_len, PATH_MAX);
+
+ if (!sz)
+ return NULL;
+
+ conv = kmalloc(sz, GFP_KERNEL);
+ if (!conv)
+ return NULL;
+
+ /* XXX */
+ *conv_len = smbConvertToUTF16((__le16 *)conv, d_info->name,
+ d_info->name_len, local_nls, 0);
+ *conv_len *= 2;
+
+ /* We allocate buffer twice bigger than needed. */
+ conv[*conv_len] = 0x00;
+ conv[*conv_len + 1] = 0x00;
+ return conv;
+}
+
+/*
+ * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units)
+ * into Unix UTC (based 1970-01-01, in seconds).
+ */
+struct timespec64 ksmbd_NTtimeToUnix(__le64 ntutc)
+{
+ struct timespec64 ts;
+
+ /* Subtract the NTFS time offset, then convert to 1s intervals. */
+ s64 t = le64_to_cpu(ntutc) - NTFS_TIME_OFFSET;
+ u64 abs_t;
+
+ /*
+ * Unfortunately can not use normal 64 bit division on 32 bit arch, but
+ * the alternative, do_div, does not work with negative numbers so have
+ * to special case them
+ */
+ if (t < 0) {
+ abs_t = -t;
+ ts.tv_nsec = do_div(abs_t, 10000000) * 100;
+ ts.tv_nsec = -ts.tv_nsec;
+ ts.tv_sec = -abs_t;
+ } else {
+ abs_t = t;
+ ts.tv_nsec = do_div(abs_t, 10000000) * 100;
+ ts.tv_sec = abs_t;
+ }
+
+ return ts;
+}
+
+/* Convert the Unix UTC into NT UTC. */
+inline u64 ksmbd_UnixTimeToNT(struct timespec64 t)
+{
+ /* Convert to 100ns intervals and then add the NTFS time offset. */
+ return (u64)t.tv_sec * 10000000 + t.tv_nsec / 100 + NTFS_TIME_OFFSET;
+}
+
+inline long long ksmbd_systime(void)
+{
+ struct timespec64 ts;
+
+ ktime_get_real_ts64(&ts);
+ return ksmbd_UnixTimeToNT(ts);
+}
diff --git a/fs/ksmbd/misc.h b/fs/ksmbd/misc.h
new file mode 100644
index 000000000000..af8717d4d85b
--- /dev/null
+++ b/fs/ksmbd/misc.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_MISC_H__
+#define __KSMBD_MISC_H__
+
+struct ksmbd_share_config;
+struct nls_table;
+struct kstat;
+struct ksmbd_file;
+
+int match_pattern(const char *str, size_t len, const char *pattern);
+int ksmbd_validate_filename(char *filename);
+int parse_stream_name(char *filename, char **stream_name, int *s_type);
+char *convert_to_nt_pathname(char *filename, char *sharepath);
+int get_nlink(struct kstat *st);
+void ksmbd_conv_path_to_unix(char *path);
+void ksmbd_strip_last_slash(char *path);
+void ksmbd_conv_path_to_windows(char *path);
+char *ksmbd_extract_sharename(char *treename);
+char *convert_to_unix_name(struct ksmbd_share_config *share, char *name);
+
+#define KSMBD_DIR_INFO_ALIGNMENT 8
+struct ksmbd_dir_info;
+char *ksmbd_convert_dir_info_name(struct ksmbd_dir_info *d_info,
+ const struct nls_table *local_nls,
+ int *conv_len);
+
+#define NTFS_TIME_OFFSET ((u64)(369 * 365 + 89) * 24 * 3600 * 10000000)
+struct timespec64 ksmbd_NTtimeToUnix(__le64 ntutc);
+u64 ksmbd_UnixTimeToNT(struct timespec64 t);
+long long ksmbd_systime(void);
+#endif /* __KSMBD_MISC_H__ */
diff --git a/fs/ksmbd/ndr.c b/fs/ksmbd/ndr.c
new file mode 100644
index 000000000000..2243a2c64b37
--- /dev/null
+++ b/fs/ksmbd/ndr.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Samsung Electronics Co., Ltd.
+ * Author(s): Namjae Jeon <linkinjeon@kernel.org>
+ */
+
+#include <linux/fs.h>
+
+#include "glob.h"
+#include "ndr.h"
+
+static inline char *ndr_get_field(struct ndr *n)
+{
+ return n->data + n->offset;
+}
+
+static int try_to_realloc_ndr_blob(struct ndr *n, size_t sz)
+{
+ char *data;
+
+ data = krealloc(n->data, n->offset + sz + 1024, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ n->data = data;
+ n->length += 1024;
+ memset(n->data + n->offset, 0, 1024);
+ return 0;
+}
+
+static void ndr_write_int16(struct ndr *n, __u16 value)
+{
+ if (n->length <= n->offset + sizeof(value))
+ try_to_realloc_ndr_blob(n, sizeof(value));
+
+ *(__le16 *)ndr_get_field(n) = cpu_to_le16(value);
+ n->offset += sizeof(value);
+}
+
+static void ndr_write_int32(struct ndr *n, __u32 value)
+{
+ if (n->length <= n->offset + sizeof(value))
+ try_to_realloc_ndr_blob(n, sizeof(value));
+
+ *(__le32 *)ndr_get_field(n) = cpu_to_le32(value);
+ n->offset += sizeof(value);
+}
+
+static void ndr_write_int64(struct ndr *n, __u64 value)
+{
+ if (n->length <= n->offset + sizeof(value))
+ try_to_realloc_ndr_blob(n, sizeof(value));
+
+ *(__le64 *)ndr_get_field(n) = cpu_to_le64(value);
+ n->offset += sizeof(value);
+}
+
+static int ndr_write_bytes(struct ndr *n, void *value, size_t sz)
+{
+ if (n->length <= n->offset + sz)
+ try_to_realloc_ndr_blob(n, sz);
+
+ memcpy(ndr_get_field(n), value, sz);
+ n->offset += sz;
+ return 0;
+}
+
+static int ndr_write_string(struct ndr *n, char *value)
+{
+ size_t sz;
+
+ sz = strlen(value) + 1;
+ if (n->length <= n->offset + sz)
+ try_to_realloc_ndr_blob(n, sz);
+
+ memcpy(ndr_get_field(n), value, sz);
+ n->offset += sz;
+ n->offset = ALIGN(n->offset, 2);
+ return 0;
+}
+
+static int ndr_read_string(struct ndr *n, void *value, size_t sz)
+{
+ int len = strnlen(ndr_get_field(n), sz);
+
+ memcpy(value, ndr_get_field(n), len);
+ len++;
+ n->offset += len;
+ n->offset = ALIGN(n->offset, 2);
+ return 0;
+}
+
+static int ndr_read_bytes(struct ndr *n, void *value, size_t sz)
+{
+ memcpy(value, ndr_get_field(n), sz);
+ n->offset += sz;
+ return 0;
+}
+
+static __u16 ndr_read_int16(struct ndr *n)
+{
+ __u16 ret;
+
+ ret = le16_to_cpu(*(__le16 *)ndr_get_field(n));
+ n->offset += sizeof(__u16);
+ return ret;
+}
+
+static __u32 ndr_read_int32(struct ndr *n)
+{
+ __u32 ret;
+
+ ret = le32_to_cpu(*(__le32 *)ndr_get_field(n));
+ n->offset += sizeof(__u32);
+ return ret;
+}
+
+static __u64 ndr_read_int64(struct ndr *n)
+{
+ __u64 ret;
+
+ ret = le64_to_cpu(*(__le64 *)ndr_get_field(n));
+ n->offset += sizeof(__u64);
+ return ret;
+}
+
+int ndr_encode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da)
+{
+ char hex_attr[12] = {0};
+
+ n->offset = 0;
+ n->length = 1024;
+ n->data = kzalloc(n->length, GFP_KERNEL);
+ if (!n->data)
+ return -ENOMEM;
+
+ if (da->version == 3) {
+ snprintf(hex_attr, 10, "0x%x", da->attr);
+ ndr_write_string(n, hex_attr);
+ } else {
+ ndr_write_string(n, "");
+ }
+ ndr_write_int16(n, da->version);
+ ndr_write_int32(n, da->version);
+
+ ndr_write_int32(n, da->flags);
+ ndr_write_int32(n, da->attr);
+ if (da->version == 3) {
+ ndr_write_int32(n, da->ea_size);
+ ndr_write_int64(n, da->size);
+ ndr_write_int64(n, da->alloc_size);
+ } else {
+ ndr_write_int64(n, da->itime);
+ }
+ ndr_write_int64(n, da->create_time);
+ if (da->version == 3)
+ ndr_write_int64(n, da->change_time);
+ return 0;
+}
+
+int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da)
+{
+ char *hex_attr;
+ int version2;
+
+ hex_attr = kzalloc(n->length, GFP_KERNEL);
+ if (!hex_attr)
+ return -ENOMEM;
+
+ n->offset = 0;
+ ndr_read_string(n, hex_attr, n->length);
+ kfree(hex_attr);
+ da->version = ndr_read_int16(n);
+
+ if (da->version != 3 && da->version != 4) {
+ pr_err("v%d version is not supported\n", da->version);
+ return -EINVAL;
+ }
+
+ version2 = ndr_read_int32(n);
+ if (da->version != version2) {
+ pr_err("ndr version mismatched(version: %d, version2: %d)\n",
+ da->version, version2);
+ return -EINVAL;
+ }
+
+ ndr_read_int32(n);
+ da->attr = ndr_read_int32(n);
+ if (da->version == 4) {
+ da->itime = ndr_read_int64(n);
+ da->create_time = ndr_read_int64(n);
+ } else {
+ ndr_read_int32(n);
+ ndr_read_int64(n);
+ ndr_read_int64(n);
+ da->create_time = ndr_read_int64(n);
+ ndr_read_int64(n);
+ }
+
+ return 0;
+}
+
+static int ndr_encode_posix_acl_entry(struct ndr *n, struct xattr_smb_acl *acl)
+{
+ int i;
+
+ ndr_write_int32(n, acl->count);
+ n->offset = ALIGN(n->offset, 8);
+ ndr_write_int32(n, acl->count);
+ ndr_write_int32(n, 0);
+
+ for (i = 0; i < acl->count; i++) {
+ n->offset = ALIGN(n->offset, 8);
+ ndr_write_int16(n, acl->entries[i].type);
+ ndr_write_int16(n, acl->entries[i].type);
+
+ if (acl->entries[i].type == SMB_ACL_USER) {
+ n->offset = ALIGN(n->offset, 8);
+ ndr_write_int64(n, acl->entries[i].uid);
+ } else if (acl->entries[i].type == SMB_ACL_GROUP) {
+ n->offset = ALIGN(n->offset, 8);
+ ndr_write_int64(n, acl->entries[i].gid);
+ }
+
+ /* push permission */
+ ndr_write_int32(n, acl->entries[i].perm);
+ }
+
+ return 0;
+}
+
+int ndr_encode_posix_acl(struct ndr *n,
+ struct user_namespace *user_ns,
+ struct inode *inode,
+ struct xattr_smb_acl *acl,
+ struct xattr_smb_acl *def_acl)
+{
+ int ref_id = 0x00020000;
+
+ n->offset = 0;
+ n->length = 1024;
+ n->data = kzalloc(n->length, GFP_KERNEL);
+ if (!n->data)
+ return -ENOMEM;
+
+ if (acl) {
+ /* ACL ACCESS */
+ ndr_write_int32(n, ref_id);
+ ref_id += 4;
+ } else {
+ ndr_write_int32(n, 0);
+ }
+
+ if (def_acl) {
+ /* DEFAULT ACL ACCESS */
+ ndr_write_int32(n, ref_id);
+ ref_id += 4;
+ } else {
+ ndr_write_int32(n, 0);
+ }
+
+ ndr_write_int64(n, from_kuid(user_ns, inode->i_uid));
+ ndr_write_int64(n, from_kgid(user_ns, inode->i_gid));
+ ndr_write_int32(n, inode->i_mode);
+
+ if (acl) {
+ ndr_encode_posix_acl_entry(n, acl);
+ if (def_acl)
+ ndr_encode_posix_acl_entry(n, def_acl);
+ }
+ return 0;
+}
+
+int ndr_encode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
+{
+ int ref_id = 0x00020004;
+
+ n->offset = 0;
+ n->length = 2048;
+ n->data = kzalloc(n->length, GFP_KERNEL);
+ if (!n->data)
+ return -ENOMEM;
+
+ ndr_write_int16(n, acl->version);
+ ndr_write_int32(n, acl->version);
+ ndr_write_int16(n, 2);
+ ndr_write_int32(n, ref_id);
+
+ /* push hash type and hash 64bytes */
+ ndr_write_int16(n, acl->hash_type);
+ ndr_write_bytes(n, acl->hash, XATTR_SD_HASH_SIZE);
+ ndr_write_bytes(n, acl->desc, acl->desc_len);
+ ndr_write_int64(n, acl->current_time);
+ ndr_write_bytes(n, acl->posix_acl_hash, XATTR_SD_HASH_SIZE);
+
+ /* push ndr for security descriptor */
+ ndr_write_bytes(n, acl->sd_buf, acl->sd_size);
+
+ return 0;
+}
+
+int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
+{
+ int version2;
+
+ n->offset = 0;
+ acl->version = ndr_read_int16(n);
+ if (acl->version != 4) {
+ pr_err("v%d version is not supported\n", acl->version);
+ return -EINVAL;
+ }
+
+ version2 = ndr_read_int32(n);
+ if (acl->version != version2) {
+ pr_err("ndr version mismatched(version: %d, version2: %d)\n",
+ acl->version, version2);
+ return -EINVAL;
+ }
+
+ /* Read Level */
+ ndr_read_int16(n);
+ /* Read Ref Id */
+ ndr_read_int32(n);
+ acl->hash_type = ndr_read_int16(n);
+ ndr_read_bytes(n, acl->hash, XATTR_SD_HASH_SIZE);
+
+ ndr_read_bytes(n, acl->desc, 10);
+ if (strncmp(acl->desc, "posix_acl", 9)) {
+ pr_err("Invalid acl description : %s\n", acl->desc);
+ return -EINVAL;
+ }
+
+ /* Read Time */
+ ndr_read_int64(n);
+ /* Read Posix ACL hash */
+ ndr_read_bytes(n, acl->posix_acl_hash, XATTR_SD_HASH_SIZE);
+ acl->sd_size = n->length - n->offset;
+ acl->sd_buf = kzalloc(acl->sd_size, GFP_KERNEL);
+ if (!acl->sd_buf)
+ return -ENOMEM;
+
+ ndr_read_bytes(n, acl->sd_buf, acl->sd_size);
+
+ return 0;
+}
diff --git a/fs/ksmbd/ndr.h b/fs/ksmbd/ndr.h
new file mode 100644
index 000000000000..60ca265d1bb0
--- /dev/null
+++ b/fs/ksmbd/ndr.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 Samsung Electronics Co., Ltd.
+ * Author(s): Namjae Jeon <linkinjeon@kernel.org>
+ */
+
+struct ndr {
+ char *data;
+ int offset;
+ int length;
+};
+
+#define NDR_NTSD_OFFSETOF 0xA0
+
+int ndr_encode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da);
+int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da);
+int ndr_encode_posix_acl(struct ndr *n, struct user_namespace *user_ns,
+ struct inode *inode, struct xattr_smb_acl *acl,
+ struct xattr_smb_acl *def_acl);
+int ndr_encode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl);
+int ndr_encode_v3_ntacl(struct ndr *n, struct xattr_ntacl *acl);
+int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl);
diff --git a/fs/ksmbd/nterr.h b/fs/ksmbd/nterr.h
new file mode 100644
index 000000000000..2f358f88a018
--- /dev/null
+++ b/fs/ksmbd/nterr.h
@@ -0,0 +1,543 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Unix SMB/Netbios implementation.
+ * Version 1.9.
+ * NT error code constants
+ * Copyright (C) Andrew Tridgell 1992-2000
+ * Copyright (C) John H Terpstra 1996-2000
+ * Copyright (C) Luke Kenneth Casson Leighton 1996-2000
+ * Copyright (C) Paul Ashton 1998-2000
+ */
+
+#ifndef _NTERR_H
+#define _NTERR_H
+
+/* Win32 Status codes. */
+#define NT_STATUS_MORE_ENTRIES 0x0105
+#define NT_ERROR_INVALID_PARAMETER 0x0057
+#define NT_ERROR_INSUFFICIENT_BUFFER 0x007a
+#define NT_STATUS_1804 0x070c
+#define NT_STATUS_NOTIFY_ENUM_DIR 0x010c
+#define NT_STATUS_INVALID_LOCK_RANGE (0xC0000000 | 0x01a1)
+/*
+ * Win32 Error codes extracted using a loop in smbclient then printing a netmon
+ * sniff to a file.
+ */
+
+#define NT_STATUS_OK 0x0000
+#define NT_STATUS_SOME_UNMAPPED 0x0107
+#define NT_STATUS_BUFFER_OVERFLOW 0x80000005
+#define NT_STATUS_NO_MORE_ENTRIES 0x8000001a
+#define NT_STATUS_MEDIA_CHANGED 0x8000001c
+#define NT_STATUS_END_OF_MEDIA 0x8000001e
+#define NT_STATUS_MEDIA_CHECK 0x80000020
+#define NT_STATUS_NO_DATA_DETECTED 0x8000001c
+#define NT_STATUS_STOPPED_ON_SYMLINK 0x8000002d
+#define NT_STATUS_DEVICE_REQUIRES_CLEANING 0x80000288
+#define NT_STATUS_DEVICE_DOOR_OPEN 0x80000288
+#define NT_STATUS_UNSUCCESSFUL (0xC0000000 | 0x0001)
+#define NT_STATUS_NOT_IMPLEMENTED (0xC0000000 | 0x0002)
+#define NT_STATUS_INVALID_INFO_CLASS (0xC0000000 | 0x0003)
+#define NT_STATUS_INFO_LENGTH_MISMATCH (0xC0000000 | 0x0004)
+#define NT_STATUS_ACCESS_VIOLATION (0xC0000000 | 0x0005)
+#define NT_STATUS_IN_PAGE_ERROR (0xC0000000 | 0x0006)
+#define NT_STATUS_PAGEFILE_QUOTA (0xC0000000 | 0x0007)
+#define NT_STATUS_INVALID_HANDLE (0xC0000000 | 0x0008)
+#define NT_STATUS_BAD_INITIAL_STACK (0xC0000000 | 0x0009)
+#define NT_STATUS_BAD_INITIAL_PC (0xC0000000 | 0x000a)
+#define NT_STATUS_INVALID_CID (0xC0000000 | 0x000b)
+#define NT_STATUS_TIMER_NOT_CANCELED (0xC0000000 | 0x000c)
+#define NT_STATUS_INVALID_PARAMETER (0xC0000000 | 0x000d)
+#define NT_STATUS_NO_SUCH_DEVICE (0xC0000000 | 0x000e)
+#define NT_STATUS_NO_SUCH_FILE (0xC0000000 | 0x000f)
+#define NT_STATUS_INVALID_DEVICE_REQUEST (0xC0000000 | 0x0010)
+#define NT_STATUS_END_OF_FILE (0xC0000000 | 0x0011)
+#define NT_STATUS_WRONG_VOLUME (0xC0000000 | 0x0012)
+#define NT_STATUS_NO_MEDIA_IN_DEVICE (0xC0000000 | 0x0013)
+#define NT_STATUS_UNRECOGNIZED_MEDIA (0xC0000000 | 0x0014)
+#define NT_STATUS_NONEXISTENT_SECTOR (0xC0000000 | 0x0015)
+#define NT_STATUS_MORE_PROCESSING_REQUIRED (0xC0000000 | 0x0016)
+#define NT_STATUS_NO_MEMORY (0xC0000000 | 0x0017)
+#define NT_STATUS_CONFLICTING_ADDRESSES (0xC0000000 | 0x0018)
+#define NT_STATUS_NOT_MAPPED_VIEW (0xC0000000 | 0x0019)
+#define NT_STATUS_UNABLE_TO_FREE_VM (0x80000000 | 0x001a)
+#define NT_STATUS_UNABLE_TO_DELETE_SECTION (0xC0000000 | 0x001b)
+#define NT_STATUS_INVALID_SYSTEM_SERVICE (0xC0000000 | 0x001c)
+#define NT_STATUS_ILLEGAL_INSTRUCTION (0xC0000000 | 0x001d)
+#define NT_STATUS_INVALID_LOCK_SEQUENCE (0xC0000000 | 0x001e)
+#define NT_STATUS_INVALID_VIEW_SIZE (0xC0000000 | 0x001f)
+#define NT_STATUS_INVALID_FILE_FOR_SECTION (0xC0000000 | 0x0020)
+#define NT_STATUS_ALREADY_COMMITTED (0xC0000000 | 0x0021)
+#define NT_STATUS_ACCESS_DENIED (0xC0000000 | 0x0022)
+#define NT_STATUS_BUFFER_TOO_SMALL (0xC0000000 | 0x0023)
+#define NT_STATUS_OBJECT_TYPE_MISMATCH (0xC0000000 | 0x0024)
+#define NT_STATUS_NONCONTINUABLE_EXCEPTION (0xC0000000 | 0x0025)
+#define NT_STATUS_INVALID_DISPOSITION (0xC0000000 | 0x0026)
+#define NT_STATUS_UNWIND (0xC0000000 | 0x0027)
+#define NT_STATUS_BAD_STACK (0xC0000000 | 0x0028)
+#define NT_STATUS_INVALID_UNWIND_TARGET (0xC0000000 | 0x0029)
+#define NT_STATUS_NOT_LOCKED (0xC0000000 | 0x002a)
+#define NT_STATUS_PARITY_ERROR (0xC0000000 | 0x002b)
+#define NT_STATUS_UNABLE_TO_DECOMMIT_VM (0xC0000000 | 0x002c)
+#define NT_STATUS_NOT_COMMITTED (0xC0000000 | 0x002d)
+#define NT_STATUS_INVALID_PORT_ATTRIBUTES (0xC0000000 | 0x002e)
+#define NT_STATUS_PORT_MESSAGE_TOO_LONG (0xC0000000 | 0x002f)
+#define NT_STATUS_INVALID_PARAMETER_MIX (0xC0000000 | 0x0030)
+#define NT_STATUS_INVALID_QUOTA_LOWER (0xC0000000 | 0x0031)
+#define NT_STATUS_DISK_CORRUPT_ERROR (0xC0000000 | 0x0032)
+#define NT_STATUS_OBJECT_NAME_INVALID (0xC0000000 | 0x0033)
+#define NT_STATUS_OBJECT_NAME_NOT_FOUND (0xC0000000 | 0x0034)
+#define NT_STATUS_OBJECT_NAME_COLLISION (0xC0000000 | 0x0035)
+#define NT_STATUS_HANDLE_NOT_WAITABLE (0xC0000000 | 0x0036)
+#define NT_STATUS_PORT_DISCONNECTED (0xC0000000 | 0x0037)
+#define NT_STATUS_DEVICE_ALREADY_ATTACHED (0xC0000000 | 0x0038)
+#define NT_STATUS_OBJECT_PATH_INVALID (0xC0000000 | 0x0039)
+#define NT_STATUS_OBJECT_PATH_NOT_FOUND (0xC0000000 | 0x003a)
+#define NT_STATUS_OBJECT_PATH_SYNTAX_BAD (0xC0000000 | 0x003b)
+#define NT_STATUS_DATA_OVERRUN (0xC0000000 | 0x003c)
+#define NT_STATUS_DATA_LATE_ERROR (0xC0000000 | 0x003d)
+#define NT_STATUS_DATA_ERROR (0xC0000000 | 0x003e)
+#define NT_STATUS_CRC_ERROR (0xC0000000 | 0x003f)
+#define NT_STATUS_SECTION_TOO_BIG (0xC0000000 | 0x0040)
+#define NT_STATUS_PORT_CONNECTION_REFUSED (0xC0000000 | 0x0041)
+#define NT_STATUS_INVALID_PORT_HANDLE (0xC0000000 | 0x0042)
+#define NT_STATUS_SHARING_VIOLATION (0xC0000000 | 0x0043)
+#define NT_STATUS_QUOTA_EXCEEDED (0xC0000000 | 0x0044)
+#define NT_STATUS_INVALID_PAGE_PROTECTION (0xC0000000 | 0x0045)
+#define NT_STATUS_MUTANT_NOT_OWNED (0xC0000000 | 0x0046)
+#define NT_STATUS_SEMAPHORE_LIMIT_EXCEEDED (0xC0000000 | 0x0047)
+#define NT_STATUS_PORT_ALREADY_SET (0xC0000000 | 0x0048)
+#define NT_STATUS_SECTION_NOT_IMAGE (0xC0000000 | 0x0049)
+#define NT_STATUS_SUSPEND_COUNT_EXCEEDED (0xC0000000 | 0x004a)
+#define NT_STATUS_THREAD_IS_TERMINATING (0xC0000000 | 0x004b)
+#define NT_STATUS_BAD_WORKING_SET_LIMIT (0xC0000000 | 0x004c)
+#define NT_STATUS_INCOMPATIBLE_FILE_MAP (0xC0000000 | 0x004d)
+#define NT_STATUS_SECTION_PROTECTION (0xC0000000 | 0x004e)
+#define NT_STATUS_EAS_NOT_SUPPORTED (0xC0000000 | 0x004f)
+#define NT_STATUS_EA_TOO_LARGE (0xC0000000 | 0x0050)
+#define NT_STATUS_NONEXISTENT_EA_ENTRY (0xC0000000 | 0x0051)
+#define NT_STATUS_NO_EAS_ON_FILE (0xC0000000 | 0x0052)
+#define NT_STATUS_EA_CORRUPT_ERROR (0xC0000000 | 0x0053)
+#define NT_STATUS_FILE_LOCK_CONFLICT (0xC0000000 | 0x0054)
+#define NT_STATUS_LOCK_NOT_GRANTED (0xC0000000 | 0x0055)
+#define NT_STATUS_DELETE_PENDING (0xC0000000 | 0x0056)
+#define NT_STATUS_CTL_FILE_NOT_SUPPORTED (0xC0000000 | 0x0057)
+#define NT_STATUS_UNKNOWN_REVISION (0xC0000000 | 0x0058)
+#define NT_STATUS_REVISION_MISMATCH (0xC0000000 | 0x0059)
+#define NT_STATUS_INVALID_OWNER (0xC0000000 | 0x005a)
+#define NT_STATUS_INVALID_PRIMARY_GROUP (0xC0000000 | 0x005b)
+#define NT_STATUS_NO_IMPERSONATION_TOKEN (0xC0000000 | 0x005c)
+#define NT_STATUS_CANT_DISABLE_MANDATORY (0xC0000000 | 0x005d)
+#define NT_STATUS_NO_LOGON_SERVERS (0xC0000000 | 0x005e)
+#define NT_STATUS_NO_SUCH_LOGON_SESSION (0xC0000000 | 0x005f)
+#define NT_STATUS_NO_SUCH_PRIVILEGE (0xC0000000 | 0x0060)
+#define NT_STATUS_PRIVILEGE_NOT_HELD (0xC0000000 | 0x0061)
+#define NT_STATUS_INVALID_ACCOUNT_NAME (0xC0000000 | 0x0062)
+#define NT_STATUS_USER_EXISTS (0xC0000000 | 0x0063)
+#define NT_STATUS_NO_SUCH_USER (0xC0000000 | 0x0064)
+#define NT_STATUS_GROUP_EXISTS (0xC0000000 | 0x0065)
+#define NT_STATUS_NO_SUCH_GROUP (0xC0000000 | 0x0066)
+#define NT_STATUS_MEMBER_IN_GROUP (0xC0000000 | 0x0067)
+#define NT_STATUS_MEMBER_NOT_IN_GROUP (0xC0000000 | 0x0068)
+#define NT_STATUS_LAST_ADMIN (0xC0000000 | 0x0069)
+#define NT_STATUS_WRONG_PASSWORD (0xC0000000 | 0x006a)
+#define NT_STATUS_ILL_FORMED_PASSWORD (0xC0000000 | 0x006b)
+#define NT_STATUS_PASSWORD_RESTRICTION (0xC0000000 | 0x006c)
+#define NT_STATUS_LOGON_FAILURE (0xC0000000 | 0x006d)
+#define NT_STATUS_ACCOUNT_RESTRICTION (0xC0000000 | 0x006e)
+#define NT_STATUS_INVALID_LOGON_HOURS (0xC0000000 | 0x006f)
+#define NT_STATUS_INVALID_WORKSTATION (0xC0000000 | 0x0070)
+#define NT_STATUS_PASSWORD_EXPIRED (0xC0000000 | 0x0071)
+#define NT_STATUS_ACCOUNT_DISABLED (0xC0000000 | 0x0072)
+#define NT_STATUS_NONE_MAPPED (0xC0000000 | 0x0073)
+#define NT_STATUS_TOO_MANY_LUIDS_REQUESTED (0xC0000000 | 0x0074)
+#define NT_STATUS_LUIDS_EXHAUSTED (0xC0000000 | 0x0075)
+#define NT_STATUS_INVALID_SUB_AUTHORITY (0xC0000000 | 0x0076)
+#define NT_STATUS_INVALID_ACL (0xC0000000 | 0x0077)
+#define NT_STATUS_INVALID_SID (0xC0000000 | 0x0078)
+#define NT_STATUS_INVALID_SECURITY_DESCR (0xC0000000 | 0x0079)
+#define NT_STATUS_PROCEDURE_NOT_FOUND (0xC0000000 | 0x007a)
+#define NT_STATUS_INVALID_IMAGE_FORMAT (0xC0000000 | 0x007b)
+#define NT_STATUS_NO_TOKEN (0xC0000000 | 0x007c)
+#define NT_STATUS_BAD_INHERITANCE_ACL (0xC0000000 | 0x007d)
+#define NT_STATUS_RANGE_NOT_LOCKED (0xC0000000 | 0x007e)
+#define NT_STATUS_DISK_FULL (0xC0000000 | 0x007f)
+#define NT_STATUS_SERVER_DISABLED (0xC0000000 | 0x0080)
+#define NT_STATUS_SERVER_NOT_DISABLED (0xC0000000 | 0x0081)
+#define NT_STATUS_TOO_MANY_GUIDS_REQUESTED (0xC0000000 | 0x0082)
+#define NT_STATUS_GUIDS_EXHAUSTED (0xC0000000 | 0x0083)
+#define NT_STATUS_INVALID_ID_AUTHORITY (0xC0000000 | 0x0084)
+#define NT_STATUS_AGENTS_EXHAUSTED (0xC0000000 | 0x0085)
+#define NT_STATUS_INVALID_VOLUME_LABEL (0xC0000000 | 0x0086)
+#define NT_STATUS_SECTION_NOT_EXTENDED (0xC0000000 | 0x0087)
+#define NT_STATUS_NOT_MAPPED_DATA (0xC0000000 | 0x0088)
+#define NT_STATUS_RESOURCE_DATA_NOT_FOUND (0xC0000000 | 0x0089)
+#define NT_STATUS_RESOURCE_TYPE_NOT_FOUND (0xC0000000 | 0x008a)
+#define NT_STATUS_RESOURCE_NAME_NOT_FOUND (0xC0000000 | 0x008b)
+#define NT_STATUS_ARRAY_BOUNDS_EXCEEDED (0xC0000000 | 0x008c)
+#define NT_STATUS_FLOAT_DENORMAL_OPERAND (0xC0000000 | 0x008d)
+#define NT_STATUS_FLOAT_DIVIDE_BY_ZERO (0xC0000000 | 0x008e)
+#define NT_STATUS_FLOAT_INEXACT_RESULT (0xC0000000 | 0x008f)
+#define NT_STATUS_FLOAT_INVALID_OPERATION (0xC0000000 | 0x0090)
+#define NT_STATUS_FLOAT_OVERFLOW (0xC0000000 | 0x0091)
+#define NT_STATUS_FLOAT_STACK_CHECK (0xC0000000 | 0x0092)
+#define NT_STATUS_FLOAT_UNDERFLOW (0xC0000000 | 0x0093)
+#define NT_STATUS_INTEGER_DIVIDE_BY_ZERO (0xC0000000 | 0x0094)
+#define NT_STATUS_INTEGER_OVERFLOW (0xC0000000 | 0x0095)
+#define NT_STATUS_PRIVILEGED_INSTRUCTION (0xC0000000 | 0x0096)
+#define NT_STATUS_TOO_MANY_PAGING_FILES (0xC0000000 | 0x0097)
+#define NT_STATUS_FILE_INVALID (0xC0000000 | 0x0098)
+#define NT_STATUS_ALLOTTED_SPACE_EXCEEDED (0xC0000000 | 0x0099)
+#define NT_STATUS_INSUFFICIENT_RESOURCES (0xC0000000 | 0x009a)
+#define NT_STATUS_DFS_EXIT_PATH_FOUND (0xC0000000 | 0x009b)
+#define NT_STATUS_DEVICE_DATA_ERROR (0xC0000000 | 0x009c)
+#define NT_STATUS_DEVICE_NOT_CONNECTED (0xC0000000 | 0x009d)
+#define NT_STATUS_DEVICE_POWER_FAILURE (0xC0000000 | 0x009e)
+#define NT_STATUS_FREE_VM_NOT_AT_BASE (0xC0000000 | 0x009f)
+#define NT_STATUS_MEMORY_NOT_ALLOCATED (0xC0000000 | 0x00a0)
+#define NT_STATUS_WORKING_SET_QUOTA (0xC0000000 | 0x00a1)
+#define NT_STATUS_MEDIA_WRITE_PROTECTED (0xC0000000 | 0x00a2)
+#define NT_STATUS_DEVICE_NOT_READY (0xC0000000 | 0x00a3)
+#define NT_STATUS_INVALID_GROUP_ATTRIBUTES (0xC0000000 | 0x00a4)
+#define NT_STATUS_BAD_IMPERSONATION_LEVEL (0xC0000000 | 0x00a5)
+#define NT_STATUS_CANT_OPEN_ANONYMOUS (0xC0000000 | 0x00a6)
+#define NT_STATUS_BAD_VALIDATION_CLASS (0xC0000000 | 0x00a7)
+#define NT_STATUS_BAD_TOKEN_TYPE (0xC0000000 | 0x00a8)
+#define NT_STATUS_BAD_MASTER_BOOT_RECORD (0xC0000000 | 0x00a9)
+#define NT_STATUS_INSTRUCTION_MISALIGNMENT (0xC0000000 | 0x00aa)
+#define NT_STATUS_INSTANCE_NOT_AVAILABLE (0xC0000000 | 0x00ab)
+#define NT_STATUS_PIPE_NOT_AVAILABLE (0xC0000000 | 0x00ac)
+#define NT_STATUS_INVALID_PIPE_STATE (0xC0000000 | 0x00ad)
+#define NT_STATUS_PIPE_BUSY (0xC0000000 | 0x00ae)
+#define NT_STATUS_ILLEGAL_FUNCTION (0xC0000000 | 0x00af)
+#define NT_STATUS_PIPE_DISCONNECTED (0xC0000000 | 0x00b0)
+#define NT_STATUS_PIPE_CLOSING (0xC0000000 | 0x00b1)
+#define NT_STATUS_PIPE_CONNECTED (0xC0000000 | 0x00b2)
+#define NT_STATUS_PIPE_LISTENING (0xC0000000 | 0x00b3)
+#define NT_STATUS_INVALID_READ_MODE (0xC0000000 | 0x00b4)
+#define NT_STATUS_IO_TIMEOUT (0xC0000000 | 0x00b5)
+#define NT_STATUS_FILE_FORCED_CLOSED (0xC0000000 | 0x00b6)
+#define NT_STATUS_PROFILING_NOT_STARTED (0xC0000000 | 0x00b7)
+#define NT_STATUS_PROFILING_NOT_STOPPED (0xC0000000 | 0x00b8)
+#define NT_STATUS_COULD_NOT_INTERPRET (0xC0000000 | 0x00b9)
+#define NT_STATUS_FILE_IS_A_DIRECTORY (0xC0000000 | 0x00ba)
+#define NT_STATUS_NOT_SUPPORTED (0xC0000000 | 0x00bb)
+#define NT_STATUS_REMOTE_NOT_LISTENING (0xC0000000 | 0x00bc)
+#define NT_STATUS_DUPLICATE_NAME (0xC0000000 | 0x00bd)
+#define NT_STATUS_BAD_NETWORK_PATH (0xC0000000 | 0x00be)
+#define NT_STATUS_NETWORK_BUSY (0xC0000000 | 0x00bf)
+#define NT_STATUS_DEVICE_DOES_NOT_EXIST (0xC0000000 | 0x00c0)
+#define NT_STATUS_TOO_MANY_COMMANDS (0xC0000000 | 0x00c1)
+#define NT_STATUS_ADAPTER_HARDWARE_ERROR (0xC0000000 | 0x00c2)
+#define NT_STATUS_INVALID_NETWORK_RESPONSE (0xC0000000 | 0x00c3)
+#define NT_STATUS_UNEXPECTED_NETWORK_ERROR (0xC0000000 | 0x00c4)
+#define NT_STATUS_BAD_REMOTE_ADAPTER (0xC0000000 | 0x00c5)
+#define NT_STATUS_PRINT_QUEUE_FULL (0xC0000000 | 0x00c6)
+#define NT_STATUS_NO_SPOOL_SPACE (0xC0000000 | 0x00c7)
+#define NT_STATUS_PRINT_CANCELLED (0xC0000000 | 0x00c8)
+#define NT_STATUS_NETWORK_NAME_DELETED (0xC0000000 | 0x00c9)
+#define NT_STATUS_NETWORK_ACCESS_DENIED (0xC0000000 | 0x00ca)
+#define NT_STATUS_BAD_DEVICE_TYPE (0xC0000000 | 0x00cb)
+#define NT_STATUS_BAD_NETWORK_NAME (0xC0000000 | 0x00cc)
+#define NT_STATUS_TOO_MANY_NAMES (0xC0000000 | 0x00cd)
+#define NT_STATUS_TOO_MANY_SESSIONS (0xC0000000 | 0x00ce)
+#define NT_STATUS_SHARING_PAUSED (0xC0000000 | 0x00cf)
+#define NT_STATUS_REQUEST_NOT_ACCEPTED (0xC0000000 | 0x00d0)
+#define NT_STATUS_REDIRECTOR_PAUSED (0xC0000000 | 0x00d1)
+#define NT_STATUS_NET_WRITE_FAULT (0xC0000000 | 0x00d2)
+#define NT_STATUS_PROFILING_AT_LIMIT (0xC0000000 | 0x00d3)
+#define NT_STATUS_NOT_SAME_DEVICE (0xC0000000 | 0x00d4)
+#define NT_STATUS_FILE_RENAMED (0xC0000000 | 0x00d5)
+#define NT_STATUS_VIRTUAL_CIRCUIT_CLOSED (0xC0000000 | 0x00d6)
+#define NT_STATUS_NO_SECURITY_ON_OBJECT (0xC0000000 | 0x00d7)
+#define NT_STATUS_CANT_WAIT (0xC0000000 | 0x00d8)
+#define NT_STATUS_PIPE_EMPTY (0xC0000000 | 0x00d9)
+#define NT_STATUS_CANT_ACCESS_DOMAIN_INFO (0xC0000000 | 0x00da)
+#define NT_STATUS_CANT_TERMINATE_SELF (0xC0000000 | 0x00db)
+#define NT_STATUS_INVALID_SERVER_STATE (0xC0000000 | 0x00dc)
+#define NT_STATUS_INVALID_DOMAIN_STATE (0xC0000000 | 0x00dd)
+#define NT_STATUS_INVALID_DOMAIN_ROLE (0xC0000000 | 0x00de)
+#define NT_STATUS_NO_SUCH_DOMAIN (0xC0000000 | 0x00df)
+#define NT_STATUS_DOMAIN_EXISTS (0xC0000000 | 0x00e0)
+#define NT_STATUS_DOMAIN_LIMIT_EXCEEDED (0xC0000000 | 0x00e1)
+#define NT_STATUS_OPLOCK_NOT_GRANTED (0xC0000000 | 0x00e2)
+#define NT_STATUS_INVALID_OPLOCK_PROTOCOL (0xC0000000 | 0x00e3)
+#define NT_STATUS_INTERNAL_DB_CORRUPTION (0xC0000000 | 0x00e4)
+#define NT_STATUS_INTERNAL_ERROR (0xC0000000 | 0x00e5)
+#define NT_STATUS_GENERIC_NOT_MAPPED (0xC0000000 | 0x00e6)
+#define NT_STATUS_BAD_DESCRIPTOR_FORMAT (0xC0000000 | 0x00e7)
+#define NT_STATUS_INVALID_USER_BUFFER (0xC0000000 | 0x00e8)
+#define NT_STATUS_UNEXPECTED_IO_ERROR (0xC0000000 | 0x00e9)
+#define NT_STATUS_UNEXPECTED_MM_CREATE_ERR (0xC0000000 | 0x00ea)
+#define NT_STATUS_UNEXPECTED_MM_MAP_ERROR (0xC0000000 | 0x00eb)
+#define NT_STATUS_UNEXPECTED_MM_EXTEND_ERR (0xC0000000 | 0x00ec)
+#define NT_STATUS_NOT_LOGON_PROCESS (0xC0000000 | 0x00ed)
+#define NT_STATUS_LOGON_SESSION_EXISTS (0xC0000000 | 0x00ee)
+#define NT_STATUS_INVALID_PARAMETER_1 (0xC0000000 | 0x00ef)
+#define NT_STATUS_INVALID_PARAMETER_2 (0xC0000000 | 0x00f0)
+#define NT_STATUS_INVALID_PARAMETER_3 (0xC0000000 | 0x00f1)
+#define NT_STATUS_INVALID_PARAMETER_4 (0xC0000000 | 0x00f2)
+#define NT_STATUS_INVALID_PARAMETER_5 (0xC0000000 | 0x00f3)
+#define NT_STATUS_INVALID_PARAMETER_6 (0xC0000000 | 0x00f4)
+#define NT_STATUS_INVALID_PARAMETER_7 (0xC0000000 | 0x00f5)
+#define NT_STATUS_INVALID_PARAMETER_8 (0xC0000000 | 0x00f6)
+#define NT_STATUS_INVALID_PARAMETER_9 (0xC0000000 | 0x00f7)
+#define NT_STATUS_INVALID_PARAMETER_10 (0xC0000000 | 0x00f8)
+#define NT_STATUS_INVALID_PARAMETER_11 (0xC0000000 | 0x00f9)
+#define NT_STATUS_INVALID_PARAMETER_12 (0xC0000000 | 0x00fa)
+#define NT_STATUS_REDIRECTOR_NOT_STARTED (0xC0000000 | 0x00fb)
+#define NT_STATUS_REDIRECTOR_STARTED (0xC0000000 | 0x00fc)
+#define NT_STATUS_STACK_OVERFLOW (0xC0000000 | 0x00fd)
+#define NT_STATUS_NO_SUCH_PACKAGE (0xC0000000 | 0x00fe)
+#define NT_STATUS_BAD_FUNCTION_TABLE (0xC0000000 | 0x00ff)
+#define NT_STATUS_DIRECTORY_NOT_EMPTY (0xC0000000 | 0x0101)
+#define NT_STATUS_FILE_CORRUPT_ERROR (0xC0000000 | 0x0102)
+#define NT_STATUS_NOT_A_DIRECTORY (0xC0000000 | 0x0103)
+#define NT_STATUS_BAD_LOGON_SESSION_STATE (0xC0000000 | 0x0104)
+#define NT_STATUS_LOGON_SESSION_COLLISION (0xC0000000 | 0x0105)
+#define NT_STATUS_NAME_TOO_LONG (0xC0000000 | 0x0106)
+#define NT_STATUS_FILES_OPEN (0xC0000000 | 0x0107)
+#define NT_STATUS_CONNECTION_IN_USE (0xC0000000 | 0x0108)
+#define NT_STATUS_MESSAGE_NOT_FOUND (0xC0000000 | 0x0109)
+#define NT_STATUS_PROCESS_IS_TERMINATING (0xC0000000 | 0x010a)
+#define NT_STATUS_INVALID_LOGON_TYPE (0xC0000000 | 0x010b)
+#define NT_STATUS_NO_GUID_TRANSLATION (0xC0000000 | 0x010c)
+#define NT_STATUS_CANNOT_IMPERSONATE (0xC0000000 | 0x010d)
+#define NT_STATUS_IMAGE_ALREADY_LOADED (0xC0000000 | 0x010e)
+#define NT_STATUS_ABIOS_NOT_PRESENT (0xC0000000 | 0x010f)
+#define NT_STATUS_ABIOS_LID_NOT_EXIST (0xC0000000 | 0x0110)
+#define NT_STATUS_ABIOS_LID_ALREADY_OWNED (0xC0000000 | 0x0111)
+#define NT_STATUS_ABIOS_NOT_LID_OWNER (0xC0000000 | 0x0112)
+#define NT_STATUS_ABIOS_INVALID_COMMAND (0xC0000000 | 0x0113)
+#define NT_STATUS_ABIOS_INVALID_LID (0xC0000000 | 0x0114)
+#define NT_STATUS_ABIOS_SELECTOR_NOT_AVAILABLE (0xC0000000 | 0x0115)
+#define NT_STATUS_ABIOS_INVALID_SELECTOR (0xC0000000 | 0x0116)
+#define NT_STATUS_NO_LDT (0xC0000000 | 0x0117)
+#define NT_STATUS_INVALID_LDT_SIZE (0xC0000000 | 0x0118)
+#define NT_STATUS_INVALID_LDT_OFFSET (0xC0000000 | 0x0119)
+#define NT_STATUS_INVALID_LDT_DESCRIPTOR (0xC0000000 | 0x011a)
+#define NT_STATUS_INVALID_IMAGE_NE_FORMAT (0xC0000000 | 0x011b)
+#define NT_STATUS_RXACT_INVALID_STATE (0xC0000000 | 0x011c)
+#define NT_STATUS_RXACT_COMMIT_FAILURE (0xC0000000 | 0x011d)
+#define NT_STATUS_MAPPED_FILE_SIZE_ZERO (0xC0000000 | 0x011e)
+#define NT_STATUS_TOO_MANY_OPENED_FILES (0xC0000000 | 0x011f)
+#define NT_STATUS_CANCELLED (0xC0000000 | 0x0120)
+#define NT_STATUS_CANNOT_DELETE (0xC0000000 | 0x0121)
+#define NT_STATUS_INVALID_COMPUTER_NAME (0xC0000000 | 0x0122)
+#define NT_STATUS_FILE_DELETED (0xC0000000 | 0x0123)
+#define NT_STATUS_SPECIAL_ACCOUNT (0xC0000000 | 0x0124)
+#define NT_STATUS_SPECIAL_GROUP (0xC0000000 | 0x0125)
+#define NT_STATUS_SPECIAL_USER (0xC0000000 | 0x0126)
+#define NT_STATUS_MEMBERS_PRIMARY_GROUP (0xC0000000 | 0x0127)
+#define NT_STATUS_FILE_CLOSED (0xC0000000 | 0x0128)
+#define NT_STATUS_TOO_MANY_THREADS (0xC0000000 | 0x0129)
+#define NT_STATUS_THREAD_NOT_IN_PROCESS (0xC0000000 | 0x012a)
+#define NT_STATUS_TOKEN_ALREADY_IN_USE (0xC0000000 | 0x012b)
+#define NT_STATUS_PAGEFILE_QUOTA_EXCEEDED (0xC0000000 | 0x012c)
+#define NT_STATUS_COMMITMENT_LIMIT (0xC0000000 | 0x012d)
+#define NT_STATUS_INVALID_IMAGE_LE_FORMAT (0xC0000000 | 0x012e)
+#define NT_STATUS_INVALID_IMAGE_NOT_MZ (0xC0000000 | 0x012f)
+#define NT_STATUS_INVALID_IMAGE_PROTECT (0xC0000000 | 0x0130)
+#define NT_STATUS_INVALID_IMAGE_WIN_16 (0xC0000000 | 0x0131)
+#define NT_STATUS_LOGON_SERVER_CONFLICT (0xC0000000 | 0x0132)
+#define NT_STATUS_TIME_DIFFERENCE_AT_DC (0xC0000000 | 0x0133)
+#define NT_STATUS_SYNCHRONIZATION_REQUIRED (0xC0000000 | 0x0134)
+#define NT_STATUS_DLL_NOT_FOUND (0xC0000000 | 0x0135)
+#define NT_STATUS_OPEN_FAILED (0xC0000000 | 0x0136)
+#define NT_STATUS_IO_PRIVILEGE_FAILED (0xC0000000 | 0x0137)
+#define NT_STATUS_ORDINAL_NOT_FOUND (0xC0000000 | 0x0138)
+#define NT_STATUS_ENTRYPOINT_NOT_FOUND (0xC0000000 | 0x0139)
+#define NT_STATUS_CONTROL_C_EXIT (0xC0000000 | 0x013a)
+#define NT_STATUS_LOCAL_DISCONNECT (0xC0000000 | 0x013b)
+#define NT_STATUS_REMOTE_DISCONNECT (0xC0000000 | 0x013c)
+#define NT_STATUS_REMOTE_RESOURCES (0xC0000000 | 0x013d)
+#define NT_STATUS_LINK_FAILED (0xC0000000 | 0x013e)
+#define NT_STATUS_LINK_TIMEOUT (0xC0000000 | 0x013f)
+#define NT_STATUS_INVALID_CONNECTION (0xC0000000 | 0x0140)
+#define NT_STATUS_INVALID_ADDRESS (0xC0000000 | 0x0141)
+#define NT_STATUS_DLL_INIT_FAILED (0xC0000000 | 0x0142)
+#define NT_STATUS_MISSING_SYSTEMFILE (0xC0000000 | 0x0143)
+#define NT_STATUS_UNHANDLED_EXCEPTION (0xC0000000 | 0x0144)
+#define NT_STATUS_APP_INIT_FAILURE (0xC0000000 | 0x0145)
+#define NT_STATUS_PAGEFILE_CREATE_FAILED (0xC0000000 | 0x0146)
+#define NT_STATUS_NO_PAGEFILE (0xC0000000 | 0x0147)
+#define NT_STATUS_INVALID_LEVEL (0xC0000000 | 0x0148)
+#define NT_STATUS_WRONG_PASSWORD_CORE (0xC0000000 | 0x0149)
+#define NT_STATUS_ILLEGAL_FLOAT_CONTEXT (0xC0000000 | 0x014a)
+#define NT_STATUS_PIPE_BROKEN (0xC0000000 | 0x014b)
+#define NT_STATUS_REGISTRY_CORRUPT (0xC0000000 | 0x014c)
+#define NT_STATUS_REGISTRY_IO_FAILED (0xC0000000 | 0x014d)
+#define NT_STATUS_NO_EVENT_PAIR (0xC0000000 | 0x014e)
+#define NT_STATUS_UNRECOGNIZED_VOLUME (0xC0000000 | 0x014f)
+#define NT_STATUS_SERIAL_NO_DEVICE_INITED (0xC0000000 | 0x0150)
+#define NT_STATUS_NO_SUCH_ALIAS (0xC0000000 | 0x0151)
+#define NT_STATUS_MEMBER_NOT_IN_ALIAS (0xC0000000 | 0x0152)
+#define NT_STATUS_MEMBER_IN_ALIAS (0xC0000000 | 0x0153)
+#define NT_STATUS_ALIAS_EXISTS (0xC0000000 | 0x0154)
+#define NT_STATUS_LOGON_NOT_GRANTED (0xC0000000 | 0x0155)
+#define NT_STATUS_TOO_MANY_SECRETS (0xC0000000 | 0x0156)
+#define NT_STATUS_SECRET_TOO_LONG (0xC0000000 | 0x0157)
+#define NT_STATUS_INTERNAL_DB_ERROR (0xC0000000 | 0x0158)
+#define NT_STATUS_FULLSCREEN_MODE (0xC0000000 | 0x0159)
+#define NT_STATUS_TOO_MANY_CONTEXT_IDS (0xC0000000 | 0x015a)
+#define NT_STATUS_LOGON_TYPE_NOT_GRANTED (0xC0000000 | 0x015b)
+#define NT_STATUS_NOT_REGISTRY_FILE (0xC0000000 | 0x015c)
+#define NT_STATUS_NT_CROSS_ENCRYPTION_REQUIRED (0xC0000000 | 0x015d)
+#define NT_STATUS_DOMAIN_CTRLR_CONFIG_ERROR (0xC0000000 | 0x015e)
+#define NT_STATUS_FT_MISSING_MEMBER (0xC0000000 | 0x015f)
+#define NT_STATUS_ILL_FORMED_SERVICE_ENTRY (0xC0000000 | 0x0160)
+#define NT_STATUS_ILLEGAL_CHARACTER (0xC0000000 | 0x0161)
+#define NT_STATUS_UNMAPPABLE_CHARACTER (0xC0000000 | 0x0162)
+#define NT_STATUS_UNDEFINED_CHARACTER (0xC0000000 | 0x0163)
+#define NT_STATUS_FLOPPY_VOLUME (0xC0000000 | 0x0164)
+#define NT_STATUS_FLOPPY_ID_MARK_NOT_FOUND (0xC0000000 | 0x0165)
+#define NT_STATUS_FLOPPY_WRONG_CYLINDER (0xC0000000 | 0x0166)
+#define NT_STATUS_FLOPPY_UNKNOWN_ERROR (0xC0000000 | 0x0167)
+#define NT_STATUS_FLOPPY_BAD_REGISTERS (0xC0000000 | 0x0168)
+#define NT_STATUS_DISK_RECALIBRATE_FAILED (0xC0000000 | 0x0169)
+#define NT_STATUS_DISK_OPERATION_FAILED (0xC0000000 | 0x016a)
+#define NT_STATUS_DISK_RESET_FAILED (0xC0000000 | 0x016b)
+#define NT_STATUS_SHARED_IRQ_BUSY (0xC0000000 | 0x016c)
+#define NT_STATUS_FT_ORPHANING (0xC0000000 | 0x016d)
+#define NT_STATUS_PARTITION_FAILURE (0xC0000000 | 0x0172)
+#define NT_STATUS_INVALID_BLOCK_LENGTH (0xC0000000 | 0x0173)
+#define NT_STATUS_DEVICE_NOT_PARTITIONED (0xC0000000 | 0x0174)
+#define NT_STATUS_UNABLE_TO_LOCK_MEDIA (0xC0000000 | 0x0175)
+#define NT_STATUS_UNABLE_TO_UNLOAD_MEDIA (0xC0000000 | 0x0176)
+#define NT_STATUS_EOM_OVERFLOW (0xC0000000 | 0x0177)
+#define NT_STATUS_NO_MEDIA (0xC0000000 | 0x0178)
+#define NT_STATUS_NO_SUCH_MEMBER (0xC0000000 | 0x017a)
+#define NT_STATUS_INVALID_MEMBER (0xC0000000 | 0x017b)
+#define NT_STATUS_KEY_DELETED (0xC0000000 | 0x017c)
+#define NT_STATUS_NO_LOG_SPACE (0xC0000000 | 0x017d)
+#define NT_STATUS_TOO_MANY_SIDS (0xC0000000 | 0x017e)
+#define NT_STATUS_LM_CROSS_ENCRYPTION_REQUIRED (0xC0000000 | 0x017f)
+#define NT_STATUS_KEY_HAS_CHILDREN (0xC0000000 | 0x0180)
+#define NT_STATUS_CHILD_MUST_BE_VOLATILE (0xC0000000 | 0x0181)
+#define NT_STATUS_DEVICE_CONFIGURATION_ERROR (0xC0000000 | 0x0182)
+#define NT_STATUS_DRIVER_INTERNAL_ERROR (0xC0000000 | 0x0183)
+#define NT_STATUS_INVALID_DEVICE_STATE (0xC0000000 | 0x0184)
+#define NT_STATUS_IO_DEVICE_ERROR (0xC0000000 | 0x0185)
+#define NT_STATUS_DEVICE_PROTOCOL_ERROR (0xC0000000 | 0x0186)
+#define NT_STATUS_BACKUP_CONTROLLER (0xC0000000 | 0x0187)
+#define NT_STATUS_LOG_FILE_FULL (0xC0000000 | 0x0188)
+#define NT_STATUS_TOO_LATE (0xC0000000 | 0x0189)
+#define NT_STATUS_NO_TRUST_LSA_SECRET (0xC0000000 | 0x018a)
+#define NT_STATUS_NO_TRUST_SAM_ACCOUNT (0xC0000000 | 0x018b)
+#define NT_STATUS_TRUSTED_DOMAIN_FAILURE (0xC0000000 | 0x018c)
+#define NT_STATUS_TRUSTED_RELATIONSHIP_FAILURE (0xC0000000 | 0x018d)
+#define NT_STATUS_EVENTLOG_FILE_CORRUPT (0xC0000000 | 0x018e)
+#define NT_STATUS_EVENTLOG_CANT_START (0xC0000000 | 0x018f)
+#define NT_STATUS_TRUST_FAILURE (0xC0000000 | 0x0190)
+#define NT_STATUS_MUTANT_LIMIT_EXCEEDED (0xC0000000 | 0x0191)
+#define NT_STATUS_NETLOGON_NOT_STARTED (0xC0000000 | 0x0192)
+#define NT_STATUS_ACCOUNT_EXPIRED (0xC0000000 | 0x0193)
+#define NT_STATUS_POSSIBLE_DEADLOCK (0xC0000000 | 0x0194)
+#define NT_STATUS_NETWORK_CREDENTIAL_CONFLICT (0xC0000000 | 0x0195)
+#define NT_STATUS_REMOTE_SESSION_LIMIT (0xC0000000 | 0x0196)
+#define NT_STATUS_EVENTLOG_FILE_CHANGED (0xC0000000 | 0x0197)
+#define NT_STATUS_NOLOGON_INTERDOMAIN_TRUST_ACCOUNT (0xC0000000 | 0x0198)
+#define NT_STATUS_NOLOGON_WORKSTATION_TRUST_ACCOUNT (0xC0000000 | 0x0199)
+#define NT_STATUS_NOLOGON_SERVER_TRUST_ACCOUNT (0xC0000000 | 0x019a)
+#define NT_STATUS_DOMAIN_TRUST_INCONSISTENT (0xC0000000 | 0x019b)
+#define NT_STATUS_FS_DRIVER_REQUIRED (0xC0000000 | 0x019c)
+#define NT_STATUS_NO_USER_SESSION_KEY (0xC0000000 | 0x0202)
+#define NT_STATUS_USER_SESSION_DELETED (0xC0000000 | 0x0203)
+#define NT_STATUS_RESOURCE_LANG_NOT_FOUND (0xC0000000 | 0x0204)
+#define NT_STATUS_INSUFF_SERVER_RESOURCES (0xC0000000 | 0x0205)
+#define NT_STATUS_INVALID_BUFFER_SIZE (0xC0000000 | 0x0206)
+#define NT_STATUS_INVALID_ADDRESS_COMPONENT (0xC0000000 | 0x0207)
+#define NT_STATUS_INVALID_ADDRESS_WILDCARD (0xC0000000 | 0x0208)
+#define NT_STATUS_TOO_MANY_ADDRESSES (0xC0000000 | 0x0209)
+#define NT_STATUS_ADDRESS_ALREADY_EXISTS (0xC0000000 | 0x020a)
+#define NT_STATUS_ADDRESS_CLOSED (0xC0000000 | 0x020b)
+#define NT_STATUS_CONNECTION_DISCONNECTED (0xC0000000 | 0x020c)
+#define NT_STATUS_CONNECTION_RESET (0xC0000000 | 0x020d)
+#define NT_STATUS_TOO_MANY_NODES (0xC0000000 | 0x020e)
+#define NT_STATUS_TRANSACTION_ABORTED (0xC0000000 | 0x020f)
+#define NT_STATUS_TRANSACTION_TIMED_OUT (0xC0000000 | 0x0210)
+#define NT_STATUS_TRANSACTION_NO_RELEASE (0xC0000000 | 0x0211)
+#define NT_STATUS_TRANSACTION_NO_MATCH (0xC0000000 | 0x0212)
+#define NT_STATUS_TRANSACTION_RESPONDED (0xC0000000 | 0x0213)
+#define NT_STATUS_TRANSACTION_INVALID_ID (0xC0000000 | 0x0214)
+#define NT_STATUS_TRANSACTION_INVALID_TYPE (0xC0000000 | 0x0215)
+#define NT_STATUS_NOT_SERVER_SESSION (0xC0000000 | 0x0216)
+#define NT_STATUS_NOT_CLIENT_SESSION (0xC0000000 | 0x0217)
+#define NT_STATUS_CANNOT_LOAD_REGISTRY_FILE (0xC0000000 | 0x0218)
+#define NT_STATUS_DEBUG_ATTACH_FAILED (0xC0000000 | 0x0219)
+#define NT_STATUS_SYSTEM_PROCESS_TERMINATED (0xC0000000 | 0x021a)
+#define NT_STATUS_DATA_NOT_ACCEPTED (0xC0000000 | 0x021b)
+#define NT_STATUS_NO_BROWSER_SERVERS_FOUND (0xC0000000 | 0x021c)
+#define NT_STATUS_VDM_HARD_ERROR (0xC0000000 | 0x021d)
+#define NT_STATUS_DRIVER_CANCEL_TIMEOUT (0xC0000000 | 0x021e)
+#define NT_STATUS_REPLY_MESSAGE_MISMATCH (0xC0000000 | 0x021f)
+#define NT_STATUS_MAPPED_ALIGNMENT (0xC0000000 | 0x0220)
+#define NT_STATUS_IMAGE_CHECKSUM_MISMATCH (0xC0000000 | 0x0221)
+#define NT_STATUS_LOST_WRITEBEHIND_DATA (0xC0000000 | 0x0222)
+#define NT_STATUS_CLIENT_SERVER_PARAMETERS_INVALID (0xC0000000 | 0x0223)
+#define NT_STATUS_PASSWORD_MUST_CHANGE (0xC0000000 | 0x0224)
+#define NT_STATUS_NOT_FOUND (0xC0000000 | 0x0225)
+#define NT_STATUS_NOT_TINY_STREAM (0xC0000000 | 0x0226)
+#define NT_STATUS_RECOVERY_FAILURE (0xC0000000 | 0x0227)
+#define NT_STATUS_STACK_OVERFLOW_READ (0xC0000000 | 0x0228)
+#define NT_STATUS_FAIL_CHECK (0xC0000000 | 0x0229)
+#define NT_STATUS_DUPLICATE_OBJECTID (0xC0000000 | 0x022a)
+#define NT_STATUS_OBJECTID_EXISTS (0xC0000000 | 0x022b)
+#define NT_STATUS_CONVERT_TO_LARGE (0xC0000000 | 0x022c)
+#define NT_STATUS_RETRY (0xC0000000 | 0x022d)
+#define NT_STATUS_FOUND_OUT_OF_SCOPE (0xC0000000 | 0x022e)
+#define NT_STATUS_ALLOCATE_BUCKET (0xC0000000 | 0x022f)
+#define NT_STATUS_PROPSET_NOT_FOUND (0xC0000000 | 0x0230)
+#define NT_STATUS_MARSHALL_OVERFLOW (0xC0000000 | 0x0231)
+#define NT_STATUS_INVALID_VARIANT (0xC0000000 | 0x0232)
+#define NT_STATUS_DOMAIN_CONTROLLER_NOT_FOUND (0xC0000000 | 0x0233)
+#define NT_STATUS_ACCOUNT_LOCKED_OUT (0xC0000000 | 0x0234)
+#define NT_STATUS_HANDLE_NOT_CLOSABLE (0xC0000000 | 0x0235)
+#define NT_STATUS_CONNECTION_REFUSED (0xC0000000 | 0x0236)
+#define NT_STATUS_GRACEFUL_DISCONNECT (0xC0000000 | 0x0237)
+#define NT_STATUS_ADDRESS_ALREADY_ASSOCIATED (0xC0000000 | 0x0238)
+#define NT_STATUS_ADDRESS_NOT_ASSOCIATED (0xC0000000 | 0x0239)
+#define NT_STATUS_CONNECTION_INVALID (0xC0000000 | 0x023a)
+#define NT_STATUS_CONNECTION_ACTIVE (0xC0000000 | 0x023b)
+#define NT_STATUS_NETWORK_UNREACHABLE (0xC0000000 | 0x023c)
+#define NT_STATUS_HOST_UNREACHABLE (0xC0000000 | 0x023d)
+#define NT_STATUS_PROTOCOL_UNREACHABLE (0xC0000000 | 0x023e)
+#define NT_STATUS_PORT_UNREACHABLE (0xC0000000 | 0x023f)
+#define NT_STATUS_REQUEST_ABORTED (0xC0000000 | 0x0240)
+#define NT_STATUS_CONNECTION_ABORTED (0xC0000000 | 0x0241)
+#define NT_STATUS_BAD_COMPRESSION_BUFFER (0xC0000000 | 0x0242)
+#define NT_STATUS_USER_MAPPED_FILE (0xC0000000 | 0x0243)
+#define NT_STATUS_AUDIT_FAILED (0xC0000000 | 0x0244)
+#define NT_STATUS_TIMER_RESOLUTION_NOT_SET (0xC0000000 | 0x0245)
+#define NT_STATUS_CONNECTION_COUNT_LIMIT (0xC0000000 | 0x0246)
+#define NT_STATUS_LOGIN_TIME_RESTRICTION (0xC0000000 | 0x0247)
+#define NT_STATUS_LOGIN_WKSTA_RESTRICTION (0xC0000000 | 0x0248)
+#define NT_STATUS_IMAGE_MP_UP_MISMATCH (0xC0000000 | 0x0249)
+#define NT_STATUS_INSUFFICIENT_LOGON_INFO (0xC0000000 | 0x0250)
+#define NT_STATUS_BAD_DLL_ENTRYPOINT (0xC0000000 | 0x0251)
+#define NT_STATUS_BAD_SERVICE_ENTRYPOINT (0xC0000000 | 0x0252)
+#define NT_STATUS_LPC_REPLY_LOST (0xC0000000 | 0x0253)
+#define NT_STATUS_IP_ADDRESS_CONFLICT1 (0xC0000000 | 0x0254)
+#define NT_STATUS_IP_ADDRESS_CONFLICT2 (0xC0000000 | 0x0255)
+#define NT_STATUS_REGISTRY_QUOTA_LIMIT (0xC0000000 | 0x0256)
+#define NT_STATUS_PATH_NOT_COVERED (0xC0000000 | 0x0257)
+#define NT_STATUS_NO_CALLBACK_ACTIVE (0xC0000000 | 0x0258)
+#define NT_STATUS_LICENSE_QUOTA_EXCEEDED (0xC0000000 | 0x0259)
+#define NT_STATUS_PWD_TOO_SHORT (0xC0000000 | 0x025a)
+#define NT_STATUS_PWD_TOO_RECENT (0xC0000000 | 0x025b)
+#define NT_STATUS_PWD_HISTORY_CONFLICT (0xC0000000 | 0x025c)
+#define NT_STATUS_PLUGPLAY_NO_DEVICE (0xC0000000 | 0x025e)
+#define NT_STATUS_UNSUPPORTED_COMPRESSION (0xC0000000 | 0x025f)
+#define NT_STATUS_INVALID_HW_PROFILE (0xC0000000 | 0x0260)
+#define NT_STATUS_INVALID_PLUGPLAY_DEVICE_PATH (0xC0000000 | 0x0261)
+#define NT_STATUS_DRIVER_ORDINAL_NOT_FOUND (0xC0000000 | 0x0262)
+#define NT_STATUS_DRIVER_ENTRYPOINT_NOT_FOUND (0xC0000000 | 0x0263)
+#define NT_STATUS_RESOURCE_NOT_OWNED (0xC0000000 | 0x0264)
+#define NT_STATUS_TOO_MANY_LINKS (0xC0000000 | 0x0265)
+#define NT_STATUS_QUOTA_LIST_INCONSISTENT (0xC0000000 | 0x0266)
+#define NT_STATUS_FILE_IS_OFFLINE (0xC0000000 | 0x0267)
+#define NT_STATUS_NETWORK_SESSION_EXPIRED (0xC0000000 | 0x035c)
+#define NT_STATUS_NO_SUCH_JOB (0xC0000000 | 0xEDE) /* scheduler */
+#define NT_STATUS_NO_PREAUTH_INTEGRITY_HASH_OVERLAP (0xC0000000 | 0x5D0000)
+#define NT_STATUS_PENDING 0x00000103
+#endif /* _NTERR_H */
diff --git a/fs/ksmbd/ntlmssp.h b/fs/ksmbd/ntlmssp.h
new file mode 100644
index 000000000000..adaf4c0cbe8f
--- /dev/null
+++ b/fs/ksmbd/ntlmssp.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/*
+ * Copyright (c) International Business Machines Corp., 2002,2007
+ * Author(s): Steve French (sfrench@us.ibm.com)
+ */
+
+#ifndef __KSMBD_NTLMSSP_H
+#define __KSMBD_NTLMSSP_H
+
+#define NTLMSSP_SIGNATURE "NTLMSSP"
+
+/* Security blob target info data */
+#define TGT_Name "KSMBD"
+
+/*
+ * Size of the crypto key returned on the negotiate SMB in bytes
+ */
+#define CIFS_CRYPTO_KEY_SIZE (8)
+#define CIFS_KEY_SIZE (40)
+
+/*
+ * Size of encrypted user password in bytes
+ */
+#define CIFS_ENCPWD_SIZE (16)
+#define CIFS_CPHTXT_SIZE (16)
+
+/* Message Types */
+#define NtLmNegotiate cpu_to_le32(1)
+#define NtLmChallenge cpu_to_le32(2)
+#define NtLmAuthenticate cpu_to_le32(3)
+#define UnknownMessage cpu_to_le32(8)
+
+/* Negotiate Flags */
+#define NTLMSSP_NEGOTIATE_UNICODE 0x01 /* Text strings are unicode */
+#define NTLMSSP_NEGOTIATE_OEM 0x02 /* Text strings are in OEM */
+#define NTLMSSP_REQUEST_TARGET 0x04 /* Srv returns its auth realm */
+/* define reserved9 0x08 */
+#define NTLMSSP_NEGOTIATE_SIGN 0x0010 /* Request signing capability */
+#define NTLMSSP_NEGOTIATE_SEAL 0x0020 /* Request confidentiality */
+#define NTLMSSP_NEGOTIATE_DGRAM 0x0040
+#define NTLMSSP_NEGOTIATE_LM_KEY 0x0080 /* Use LM session key */
+/* defined reserved 8 0x0100 */
+#define NTLMSSP_NEGOTIATE_NTLM 0x0200 /* NTLM authentication */
+#define NTLMSSP_NEGOTIATE_NT_ONLY 0x0400 /* Lanman not allowed */
+#define NTLMSSP_ANONYMOUS 0x0800
+#define NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED 0x1000 /* reserved6 */
+#define NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED 0x2000
+#define NTLMSSP_NEGOTIATE_LOCAL_CALL 0x4000 /* client/server same machine */
+#define NTLMSSP_NEGOTIATE_ALWAYS_SIGN 0x8000 /* Sign. All security levels */
+#define NTLMSSP_TARGET_TYPE_DOMAIN 0x10000
+#define NTLMSSP_TARGET_TYPE_SERVER 0x20000
+#define NTLMSSP_TARGET_TYPE_SHARE 0x40000
+#define NTLMSSP_NEGOTIATE_EXTENDED_SEC 0x80000 /* NB:not related to NTLMv2 pwd*/
+/* #define NTLMSSP_REQUEST_INIT_RESP 0x100000 */
+#define NTLMSSP_NEGOTIATE_IDENTIFY 0x100000
+#define NTLMSSP_REQUEST_ACCEPT_RESP 0x200000 /* reserved5 */
+#define NTLMSSP_REQUEST_NON_NT_KEY 0x400000
+#define NTLMSSP_NEGOTIATE_TARGET_INFO 0x800000
+/* #define reserved4 0x1000000 */
+#define NTLMSSP_NEGOTIATE_VERSION 0x2000000 /* we do not set */
+/* #define reserved3 0x4000000 */
+/* #define reserved2 0x8000000 */
+/* #define reserved1 0x10000000 */
+#define NTLMSSP_NEGOTIATE_128 0x20000000
+#define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000
+#define NTLMSSP_NEGOTIATE_56 0x80000000
+
+/* Define AV Pair Field IDs */
+enum av_field_type {
+ NTLMSSP_AV_EOL = 0,
+ NTLMSSP_AV_NB_COMPUTER_NAME,
+ NTLMSSP_AV_NB_DOMAIN_NAME,
+ NTLMSSP_AV_DNS_COMPUTER_NAME,
+ NTLMSSP_AV_DNS_DOMAIN_NAME,
+ NTLMSSP_AV_DNS_TREE_NAME,
+ NTLMSSP_AV_FLAGS,
+ NTLMSSP_AV_TIMESTAMP,
+ NTLMSSP_AV_RESTRICTION,
+ NTLMSSP_AV_TARGET_NAME,
+ NTLMSSP_AV_CHANNEL_BINDINGS
+};
+
+/* Although typedefs are not commonly used for structure definitions */
+/* in the Linux kernel, in this particular case they are useful */
+/* to more closely match the standards document for NTLMSSP from */
+/* OpenGroup and to make the code more closely match the standard in */
+/* appearance */
+
+struct security_buffer {
+ __le16 Length;
+ __le16 MaximumLength;
+ __le32 BufferOffset; /* offset to buffer */
+} __packed;
+
+struct target_info {
+ __le16 Type;
+ __le16 Length;
+ __u8 Content[0];
+} __packed;
+
+struct negotiate_message {
+ __u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
+ __le32 MessageType; /* NtLmNegotiate = 1 */
+ __le32 NegotiateFlags;
+ struct security_buffer DomainName; /* RFC 1001 style and ASCII */
+ struct security_buffer WorkstationName; /* RFC 1001 and ASCII */
+ /*
+ * struct security_buffer for version info not present since we
+ * do not set the version is present flag
+ */
+ char DomainString[0];
+ /* followed by WorkstationString */
+} __packed;
+
+struct challenge_message {
+ __u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
+ __le32 MessageType; /* NtLmChallenge = 2 */
+ struct security_buffer TargetName;
+ __le32 NegotiateFlags;
+ __u8 Challenge[CIFS_CRYPTO_KEY_SIZE];
+ __u8 Reserved[8];
+ struct security_buffer TargetInfoArray;
+ /*
+ * struct security_buffer for version info not present since we
+ * do not set the version is present flag
+ */
+} __packed;
+
+struct authenticate_message {
+ __u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
+ __le32 MessageType; /* NtLmsAuthenticate = 3 */
+ struct security_buffer LmChallengeResponse;
+ struct security_buffer NtChallengeResponse;
+ struct security_buffer DomainName;
+ struct security_buffer UserName;
+ struct security_buffer WorkstationName;
+ struct security_buffer SessionKey;
+ __le32 NegotiateFlags;
+ /*
+ * struct security_buffer for version info not present since we
+ * do not set the version is present flag
+ */
+ char UserString[0];
+} __packed;
+
+struct ntlmv2_resp {
+ char ntlmv2_hash[CIFS_ENCPWD_SIZE];
+ __le32 blob_signature;
+ __u32 reserved;
+ __le64 time;
+ __u64 client_chal; /* random */
+ __u32 reserved2;
+ /* array of name entries could follow ending in minimum 4 byte struct */
+} __packed;
+
+/* per smb session structure/fields */
+struct ntlmssp_auth {
+ /* whether session key is per smb session */
+ bool sesskey_per_smbsess;
+ /* sent by client in type 1 ntlmsssp exchange */
+ __u32 client_flags;
+ /* sent by server in type 2 ntlmssp exchange */
+ __u32 conn_flags;
+ /* sent to server */
+ unsigned char ciphertext[CIFS_CPHTXT_SIZE];
+ /* used by ntlmssp */
+ char cryptkey[CIFS_CRYPTO_KEY_SIZE];
+};
+#endif /* __KSMBD_NTLMSSP_H */
diff --git a/fs/ksmbd/oplock.c b/fs/ksmbd/oplock.c
new file mode 100644
index 000000000000..6ace6c2f22dc
--- /dev/null
+++ b/fs/ksmbd/oplock.c
@@ -0,0 +1,1709 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/moduleparam.h>
+
+#include "glob.h"
+#include "oplock.h"
+
+#include "smb_common.h"
+#include "smbstatus.h"
+#include "connection.h"
+#include "mgmt/user_session.h"
+#include "mgmt/share_config.h"
+#include "mgmt/tree_connect.h"
+
+static LIST_HEAD(lease_table_list);
+static DEFINE_RWLOCK(lease_list_lock);
+
+/**
+ * alloc_opinfo() - allocate a new opinfo object for oplock info
+ * @work: smb work
+ * @id: fid of open file
+ * @Tid: tree id of connection
+ *
+ * Return: allocated opinfo object on success, otherwise NULL
+ */
+static struct oplock_info *alloc_opinfo(struct ksmbd_work *work,
+ u64 id, __u16 Tid)
+{
+ struct ksmbd_session *sess = work->sess;
+ struct oplock_info *opinfo;
+
+ opinfo = kzalloc(sizeof(struct oplock_info), GFP_KERNEL);
+ if (!opinfo)
+ return NULL;
+
+ opinfo->sess = sess;
+ opinfo->conn = sess->conn;
+ opinfo->level = SMB2_OPLOCK_LEVEL_NONE;
+ opinfo->op_state = OPLOCK_STATE_NONE;
+ opinfo->pending_break = 0;
+ opinfo->fid = id;
+ opinfo->Tid = Tid;
+ INIT_LIST_HEAD(&opinfo->op_entry);
+ INIT_LIST_HEAD(&opinfo->interim_list);
+ init_waitqueue_head(&opinfo->oplock_q);
+ init_waitqueue_head(&opinfo->oplock_brk);
+ atomic_set(&opinfo->refcount, 1);
+ atomic_set(&opinfo->breaking_cnt, 0);
+
+ return opinfo;
+}
+
+static void lease_add_list(struct oplock_info *opinfo)
+{
+ struct lease_table *lb = opinfo->o_lease->l_lb;
+
+ spin_lock(&lb->lb_lock);
+ list_add_rcu(&opinfo->lease_entry, &lb->lease_list);
+ spin_unlock(&lb->lb_lock);
+}
+
+static void lease_del_list(struct oplock_info *opinfo)
+{
+ struct lease_table *lb = opinfo->o_lease->l_lb;
+
+ if (!lb)
+ return;
+
+ spin_lock(&lb->lb_lock);
+ if (list_empty(&opinfo->lease_entry)) {
+ spin_unlock(&lb->lb_lock);
+ return;
+ }
+
+ list_del_init(&opinfo->lease_entry);
+ opinfo->o_lease->l_lb = NULL;
+ spin_unlock(&lb->lb_lock);
+}
+
+static void lb_add(struct lease_table *lb)
+{
+ write_lock(&lease_list_lock);
+ list_add(&lb->l_entry, &lease_table_list);
+ write_unlock(&lease_list_lock);
+}
+
+static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx)
+{
+ struct lease *lease;
+
+ lease = kmalloc(sizeof(struct lease), GFP_KERNEL);
+ if (!lease)
+ return -ENOMEM;
+
+ memcpy(lease->lease_key, lctx->lease_key, SMB2_LEASE_KEY_SIZE);
+ lease->state = lctx->req_state;
+ lease->new_state = 0;
+ lease->flags = lctx->flags;
+ lease->duration = lctx->duration;
+ memcpy(lease->parent_lease_key, lctx->parent_lease_key, SMB2_LEASE_KEY_SIZE);
+ lease->version = lctx->version;
+ lease->epoch = 0;
+ INIT_LIST_HEAD(&opinfo->lease_entry);
+ opinfo->o_lease = lease;
+
+ return 0;
+}
+
+static void free_lease(struct oplock_info *opinfo)
+{
+ struct lease *lease;
+
+ lease = opinfo->o_lease;
+ kfree(lease);
+}
+
+static void free_opinfo(struct oplock_info *opinfo)
+{
+ if (opinfo->is_lease)
+ free_lease(opinfo);
+ kfree(opinfo);
+}
+
+static inline void opinfo_free_rcu(struct rcu_head *rcu_head)
+{
+ struct oplock_info *opinfo;
+
+ opinfo = container_of(rcu_head, struct oplock_info, rcu_head);
+ free_opinfo(opinfo);
+}
+
+struct oplock_info *opinfo_get(struct ksmbd_file *fp)
+{
+ struct oplock_info *opinfo;
+
+ rcu_read_lock();
+ opinfo = rcu_dereference(fp->f_opinfo);
+ if (opinfo && !atomic_inc_not_zero(&opinfo->refcount))
+ opinfo = NULL;
+ rcu_read_unlock();
+
+ return opinfo;
+}
+
+static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci)
+{
+ struct oplock_info *opinfo;
+
+ if (list_empty(&ci->m_op_list))
+ return NULL;
+
+ rcu_read_lock();
+ opinfo = list_first_or_null_rcu(&ci->m_op_list, struct oplock_info,
+ op_entry);
+ if (opinfo && !atomic_inc_not_zero(&opinfo->refcount))
+ opinfo = NULL;
+ rcu_read_unlock();
+
+ return opinfo;
+}
+
+void opinfo_put(struct oplock_info *opinfo)
+{
+ if (!atomic_dec_and_test(&opinfo->refcount))
+ return;
+
+ call_rcu(&opinfo->rcu_head, opinfo_free_rcu);
+}
+
+static void opinfo_add(struct oplock_info *opinfo)
+{
+ struct ksmbd_inode *ci = opinfo->o_fp->f_ci;
+
+ write_lock(&ci->m_lock);
+ list_add_rcu(&opinfo->op_entry, &ci->m_op_list);
+ write_unlock(&ci->m_lock);
+}
+
+static void opinfo_del(struct oplock_info *opinfo)
+{
+ struct ksmbd_inode *ci = opinfo->o_fp->f_ci;
+
+ if (opinfo->is_lease) {
+ write_lock(&lease_list_lock);
+ lease_del_list(opinfo);
+ write_unlock(&lease_list_lock);
+ }
+ write_lock(&ci->m_lock);
+ list_del_rcu(&opinfo->op_entry);
+ write_unlock(&ci->m_lock);
+}
+
+static unsigned long opinfo_count(struct ksmbd_file *fp)
+{
+ if (ksmbd_stream_fd(fp))
+ return atomic_read(&fp->f_ci->sop_count);
+ else
+ return atomic_read(&fp->f_ci->op_count);
+}
+
+static void opinfo_count_inc(struct ksmbd_file *fp)
+{
+ if (ksmbd_stream_fd(fp))
+ return atomic_inc(&fp->f_ci->sop_count);
+ else
+ return atomic_inc(&fp->f_ci->op_count);
+}
+
+static void opinfo_count_dec(struct ksmbd_file *fp)
+{
+ if (ksmbd_stream_fd(fp))
+ return atomic_dec(&fp->f_ci->sop_count);
+ else
+ return atomic_dec(&fp->f_ci->op_count);
+}
+
+/**
+ * opinfo_write_to_read() - convert a write oplock to read oplock
+ * @opinfo: current oplock info
+ *
+ * Return: 0 on success, otherwise -EINVAL
+ */
+int opinfo_write_to_read(struct oplock_info *opinfo)
+{
+ struct lease *lease = opinfo->o_lease;
+
+ if (!(opinfo->level == SMB2_OPLOCK_LEVEL_BATCH ||
+ opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE)) {
+ pr_err("bad oplock(0x%x)\n", opinfo->level);
+ if (opinfo->is_lease)
+ pr_err("lease state(0x%x)\n", lease->state);
+ return -EINVAL;
+ }
+ opinfo->level = SMB2_OPLOCK_LEVEL_II;
+
+ if (opinfo->is_lease)
+ lease->state = lease->new_state;
+ return 0;
+}
+
+/**
+ * opinfo_read_handle_to_read() - convert a read/handle oplock to read oplock
+ * @opinfo: current oplock info
+ *
+ * Return: 0 on success, otherwise -EINVAL
+ */
+int opinfo_read_handle_to_read(struct oplock_info *opinfo)
+{
+ struct lease *lease = opinfo->o_lease;
+
+ lease->state = lease->new_state;
+ opinfo->level = SMB2_OPLOCK_LEVEL_II;
+ return 0;
+}
+
+/**
+ * opinfo_write_to_none() - convert a write oplock to none
+ * @opinfo: current oplock info
+ *
+ * Return: 0 on success, otherwise -EINVAL
+ */
+int opinfo_write_to_none(struct oplock_info *opinfo)
+{
+ struct lease *lease = opinfo->o_lease;
+
+ if (!(opinfo->level == SMB2_OPLOCK_LEVEL_BATCH ||
+ opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE)) {
+ pr_err("bad oplock(0x%x)\n", opinfo->level);
+ if (opinfo->is_lease)
+ pr_err("lease state(0x%x)\n", lease->state);
+ return -EINVAL;
+ }
+ opinfo->level = SMB2_OPLOCK_LEVEL_NONE;
+ if (opinfo->is_lease)
+ lease->state = lease->new_state;
+ return 0;
+}
+
+/**
+ * opinfo_read_to_none() - convert a write read to none
+ * @opinfo: current oplock info
+ *
+ * Return: 0 on success, otherwise -EINVAL
+ */
+int opinfo_read_to_none(struct oplock_info *opinfo)
+{
+ struct lease *lease = opinfo->o_lease;
+
+ if (opinfo->level != SMB2_OPLOCK_LEVEL_II) {
+ pr_err("bad oplock(0x%x)\n", opinfo->level);
+ if (opinfo->is_lease)
+ pr_err("lease state(0x%x)\n", lease->state);
+ return -EINVAL;
+ }
+ opinfo->level = SMB2_OPLOCK_LEVEL_NONE;
+ if (opinfo->is_lease)
+ lease->state = lease->new_state;
+ return 0;
+}
+
+/**
+ * lease_read_to_write() - upgrade lease state from read to write
+ * @opinfo: current lease info
+ *
+ * Return: 0 on success, otherwise -EINVAL
+ */
+int lease_read_to_write(struct oplock_info *opinfo)
+{
+ struct lease *lease = opinfo->o_lease;
+
+ if (!(lease->state & SMB2_LEASE_READ_CACHING_LE)) {
+ ksmbd_debug(OPLOCK, "bad lease state(0x%x)\n", lease->state);
+ return -EINVAL;
+ }
+
+ lease->new_state = SMB2_LEASE_NONE_LE;
+ lease->state |= SMB2_LEASE_WRITE_CACHING_LE;
+ if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE)
+ opinfo->level = SMB2_OPLOCK_LEVEL_BATCH;
+ else
+ opinfo->level = SMB2_OPLOCK_LEVEL_EXCLUSIVE;
+ return 0;
+}
+
+/**
+ * lease_none_upgrade() - upgrade lease state from none
+ * @opinfo: current lease info
+ * @new_state: new lease state
+ *
+ * Return: 0 on success, otherwise -EINVAL
+ */
+static int lease_none_upgrade(struct oplock_info *opinfo, __le32 new_state)
+{
+ struct lease *lease = opinfo->o_lease;
+
+ if (!(lease->state == SMB2_LEASE_NONE_LE)) {
+ ksmbd_debug(OPLOCK, "bad lease state(0x%x)\n", lease->state);
+ return -EINVAL;
+ }
+
+ lease->new_state = SMB2_LEASE_NONE_LE;
+ lease->state = new_state;
+ if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE)
+ if (lease->state & SMB2_LEASE_WRITE_CACHING_LE)
+ opinfo->level = SMB2_OPLOCK_LEVEL_BATCH;
+ else
+ opinfo->level = SMB2_OPLOCK_LEVEL_II;
+ else if (lease->state & SMB2_LEASE_WRITE_CACHING_LE)
+ opinfo->level = SMB2_OPLOCK_LEVEL_EXCLUSIVE;
+ else if (lease->state & SMB2_LEASE_READ_CACHING_LE)
+ opinfo->level = SMB2_OPLOCK_LEVEL_II;
+
+ return 0;
+}
+
+/**
+ * close_id_del_oplock() - release oplock object at file close time
+ * @fp: ksmbd file pointer
+ */
+void close_id_del_oplock(struct ksmbd_file *fp)
+{
+ struct oplock_info *opinfo;
+
+ if (S_ISDIR(file_inode(fp->filp)->i_mode))
+ return;
+
+ opinfo = opinfo_get(fp);
+ if (!opinfo)
+ return;
+
+ opinfo_del(opinfo);
+
+ rcu_assign_pointer(fp->f_opinfo, NULL);
+ if (opinfo->op_state == OPLOCK_ACK_WAIT) {
+ opinfo->op_state = OPLOCK_CLOSING;
+ wake_up_interruptible_all(&opinfo->oplock_q);
+ if (opinfo->is_lease) {
+ atomic_set(&opinfo->breaking_cnt, 0);
+ wake_up_interruptible_all(&opinfo->oplock_brk);
+ }
+ }
+
+ opinfo_count_dec(fp);
+ atomic_dec(&opinfo->refcount);
+ opinfo_put(opinfo);
+}
+
+/**
+ * grant_write_oplock() - grant exclusive/batch oplock or write lease
+ * @opinfo_new: new oplock info object
+ * @req_oplock: request oplock
+ * @lctx: lease context information
+ *
+ * Return: 0
+ */
+static void grant_write_oplock(struct oplock_info *opinfo_new, int req_oplock,
+ struct lease_ctx_info *lctx)
+{
+ struct lease *lease = opinfo_new->o_lease;
+
+ if (req_oplock == SMB2_OPLOCK_LEVEL_BATCH)
+ opinfo_new->level = SMB2_OPLOCK_LEVEL_BATCH;
+ else
+ opinfo_new->level = SMB2_OPLOCK_LEVEL_EXCLUSIVE;
+
+ if (lctx) {
+ lease->state = lctx->req_state;
+ memcpy(lease->lease_key, lctx->lease_key, SMB2_LEASE_KEY_SIZE);
+ }
+}
+
+/**
+ * grant_read_oplock() - grant level2 oplock or read lease
+ * @opinfo_new: new oplock info object
+ * @lctx: lease context information
+ *
+ * Return: 0
+ */
+static void grant_read_oplock(struct oplock_info *opinfo_new,
+ struct lease_ctx_info *lctx)
+{
+ struct lease *lease = opinfo_new->o_lease;
+
+ opinfo_new->level = SMB2_OPLOCK_LEVEL_II;
+
+ if (lctx) {
+ lease->state = SMB2_LEASE_READ_CACHING_LE;
+ if (lctx->req_state & SMB2_LEASE_HANDLE_CACHING_LE)
+ lease->state |= SMB2_LEASE_HANDLE_CACHING_LE;
+ memcpy(lease->lease_key, lctx->lease_key, SMB2_LEASE_KEY_SIZE);
+ }
+}
+
+/**
+ * grant_none_oplock() - grant none oplock or none lease
+ * @opinfo_new: new oplock info object
+ * @lctx: lease context information
+ *
+ * Return: 0
+ */
+static void grant_none_oplock(struct oplock_info *opinfo_new,
+ struct lease_ctx_info *lctx)
+{
+ struct lease *lease = opinfo_new->o_lease;
+
+ opinfo_new->level = SMB2_OPLOCK_LEVEL_NONE;
+
+ if (lctx) {
+ lease->state = 0;
+ memcpy(lease->lease_key, lctx->lease_key, SMB2_LEASE_KEY_SIZE);
+ }
+}
+
+static inline int compare_guid_key(struct oplock_info *opinfo,
+ const char *guid1, const char *key1)
+{
+ const char *guid2, *key2;
+
+ guid2 = opinfo->conn->ClientGUID;
+ key2 = opinfo->o_lease->lease_key;
+ if (!memcmp(guid1, guid2, SMB2_CLIENT_GUID_SIZE) &&
+ !memcmp(key1, key2, SMB2_LEASE_KEY_SIZE))
+ return 1;
+
+ return 0;
+}
+
+/**
+ * same_client_has_lease() - check whether current lease request is
+ * from lease owner of file
+ * @ci: master file pointer
+ * @client_guid: Client GUID
+ * @lctx: lease context information
+ *
+ * Return: oplock(lease) object on success, otherwise NULL
+ */
+static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci,
+ char *client_guid,
+ struct lease_ctx_info *lctx)
+{
+ int ret;
+ struct lease *lease;
+ struct oplock_info *opinfo;
+ struct oplock_info *m_opinfo = NULL;
+
+ if (!lctx)
+ return NULL;
+
+ /*
+ * Compare lease key and client_guid to know request from same owner
+ * of same client
+ */
+ read_lock(&ci->m_lock);
+ list_for_each_entry(opinfo, &ci->m_op_list, op_entry) {
+ if (!opinfo->is_lease)
+ continue;
+ read_unlock(&ci->m_lock);
+ lease = opinfo->o_lease;
+
+ ret = compare_guid_key(opinfo, client_guid, lctx->lease_key);
+ if (ret) {
+ m_opinfo = opinfo;
+ /* skip upgrading lease about breaking lease */
+ if (atomic_read(&opinfo->breaking_cnt)) {
+ read_lock(&ci->m_lock);
+ continue;
+ }
+
+ /* upgrading lease */
+ if ((atomic_read(&ci->op_count) +
+ atomic_read(&ci->sop_count)) == 1) {
+ if (lease->state ==
+ (lctx->req_state & lease->state)) {
+ lease->state |= lctx->req_state;
+ if (lctx->req_state &
+ SMB2_LEASE_WRITE_CACHING_LE)
+ lease_read_to_write(opinfo);
+ }
+ } else if ((atomic_read(&ci->op_count) +
+ atomic_read(&ci->sop_count)) > 1) {
+ if (lctx->req_state ==
+ (SMB2_LEASE_READ_CACHING_LE |
+ SMB2_LEASE_HANDLE_CACHING_LE))
+ lease->state = lctx->req_state;
+ }
+
+ if (lctx->req_state && lease->state ==
+ SMB2_LEASE_NONE_LE)
+ lease_none_upgrade(opinfo, lctx->req_state);
+ }
+ read_lock(&ci->m_lock);
+ }
+ read_unlock(&ci->m_lock);
+
+ return m_opinfo;
+}
+
+static void wait_for_break_ack(struct oplock_info *opinfo)
+{
+ int rc = 0;
+
+ rc = wait_event_interruptible_timeout(opinfo->oplock_q,
+ opinfo->op_state == OPLOCK_STATE_NONE ||
+ opinfo->op_state == OPLOCK_CLOSING,
+ OPLOCK_WAIT_TIME);
+
+ /* is this a timeout ? */
+ if (!rc) {
+ if (opinfo->is_lease)
+ opinfo->o_lease->state = SMB2_LEASE_NONE_LE;
+ opinfo->level = SMB2_OPLOCK_LEVEL_NONE;
+ opinfo->op_state = OPLOCK_STATE_NONE;
+ }
+}
+
+static void wake_up_oplock_break(struct oplock_info *opinfo)
+{
+ clear_bit_unlock(0, &opinfo->pending_break);
+ /* memory barrier is needed for wake_up_bit() */
+ smp_mb__after_atomic();
+ wake_up_bit(&opinfo->pending_break, 0);
+}
+
+static int oplock_break_pending(struct oplock_info *opinfo, int req_op_level)
+{
+ while (test_and_set_bit(0, &opinfo->pending_break)) {
+ wait_on_bit(&opinfo->pending_break, 0, TASK_UNINTERRUPTIBLE);
+
+ /* Not immediately break to none. */
+ opinfo->open_trunc = 0;
+
+ if (opinfo->op_state == OPLOCK_CLOSING)
+ return -ENOENT;
+ else if (!opinfo->is_lease && opinfo->level <= req_op_level)
+ return 1;
+ }
+
+ if (!opinfo->is_lease && opinfo->level <= req_op_level) {
+ wake_up_oplock_break(opinfo);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int allocate_oplock_break_buf(struct ksmbd_work *work)
+{
+ work->response_buf = kzalloc(MAX_CIFS_SMALL_BUFFER_SIZE, GFP_KERNEL);
+ if (!work->response_buf)
+ return -ENOMEM;
+ work->response_sz = MAX_CIFS_SMALL_BUFFER_SIZE;
+ return 0;
+}
+
+/**
+ * __smb2_oplock_break_noti() - send smb2 oplock break cmd from conn
+ * to client
+ * @wk: smb work object
+ *
+ * There are two ways this function can be called. 1- while file open we break
+ * from exclusive/batch lock to levelII oplock and 2- while file write/truncate
+ * we break from levelII oplock no oplock.
+ * work->request_buf contains oplock_info.
+ */
+static void __smb2_oplock_break_noti(struct work_struct *wk)
+{
+ struct smb2_oplock_break *rsp = NULL;
+ struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work);
+ struct ksmbd_conn *conn = work->conn;
+ struct oplock_break_info *br_info = work->request_buf;
+ struct smb2_hdr *rsp_hdr;
+ struct ksmbd_file *fp;
+
+ fp = ksmbd_lookup_durable_fd(br_info->fid);
+ if (!fp) {
+ atomic_dec(&conn->r_count);
+ ksmbd_free_work_struct(work);
+ return;
+ }
+
+ if (allocate_oplock_break_buf(work)) {
+ pr_err("smb2_allocate_rsp_buf failed! ");
+ atomic_dec(&conn->r_count);
+ ksmbd_fd_put(work, fp);
+ ksmbd_free_work_struct(work);
+ return;
+ }
+
+ rsp_hdr = work->response_buf;
+ memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
+ rsp_hdr->smb2_buf_length =
+ cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
+ rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
+ rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
+ rsp_hdr->CreditRequest = cpu_to_le16(0);
+ rsp_hdr->Command = SMB2_OPLOCK_BREAK;
+ rsp_hdr->Flags = (SMB2_FLAGS_SERVER_TO_REDIR);
+ rsp_hdr->NextCommand = 0;
+ rsp_hdr->MessageId = cpu_to_le64(-1);
+ rsp_hdr->Id.SyncId.ProcessId = 0;
+ rsp_hdr->Id.SyncId.TreeId = 0;
+ rsp_hdr->SessionId = 0;
+ memset(rsp_hdr->Signature, 0, 16);
+
+ rsp = work->response_buf;
+
+ rsp->StructureSize = cpu_to_le16(24);
+ if (!br_info->open_trunc &&
+ (br_info->level == SMB2_OPLOCK_LEVEL_BATCH ||
+ br_info->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE))
+ rsp->OplockLevel = SMB2_OPLOCK_LEVEL_II;
+ else
+ rsp->OplockLevel = SMB2_OPLOCK_LEVEL_NONE;
+ rsp->Reserved = 0;
+ rsp->Reserved2 = 0;
+ rsp->PersistentFid = cpu_to_le64(fp->persistent_id);
+ rsp->VolatileFid = cpu_to_le64(fp->volatile_id);
+
+ inc_rfc1001_len(rsp, 24);
+
+ ksmbd_debug(OPLOCK,
+ "sending oplock break v_id %llu p_id = %llu lock level = %d\n",
+ rsp->VolatileFid, rsp->PersistentFid, rsp->OplockLevel);
+
+ ksmbd_fd_put(work, fp);
+ ksmbd_conn_write(work);
+ ksmbd_free_work_struct(work);
+ atomic_dec(&conn->r_count);
+}
+
+/**
+ * smb2_oplock_break_noti() - send smb2 exclusive/batch to level2 oplock
+ * break command from server to client
+ * @opinfo: oplock info object
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int smb2_oplock_break_noti(struct oplock_info *opinfo)
+{
+ struct ksmbd_conn *conn = opinfo->conn;
+ struct oplock_break_info *br_info;
+ int ret = 0;
+ struct ksmbd_work *work = ksmbd_alloc_work_struct();
+
+ if (!work)
+ return -ENOMEM;
+
+ br_info = kmalloc(sizeof(struct oplock_break_info), GFP_KERNEL);
+ if (!br_info) {
+ ksmbd_free_work_struct(work);
+ return -ENOMEM;
+ }
+
+ br_info->level = opinfo->level;
+ br_info->fid = opinfo->fid;
+ br_info->open_trunc = opinfo->open_trunc;
+
+ work->request_buf = (char *)br_info;
+ work->conn = conn;
+ work->sess = opinfo->sess;
+
+ atomic_inc(&conn->r_count);
+ if (opinfo->op_state == OPLOCK_ACK_WAIT) {
+ INIT_WORK(&work->work, __smb2_oplock_break_noti);
+ ksmbd_queue_work(work);
+
+ wait_for_break_ack(opinfo);
+ } else {
+ __smb2_oplock_break_noti(&work->work);
+ if (opinfo->level == SMB2_OPLOCK_LEVEL_II)
+ opinfo->level = SMB2_OPLOCK_LEVEL_NONE;
+ }
+ return ret;
+}
+
+/**
+ * __smb2_lease_break_noti() - send lease break command from server
+ * to client
+ * @wk: smb work object
+ */
+static void __smb2_lease_break_noti(struct work_struct *wk)
+{
+ struct smb2_lease_break *rsp = NULL;
+ struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work);
+ struct lease_break_info *br_info = work->request_buf;
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_hdr *rsp_hdr;
+
+ if (allocate_oplock_break_buf(work)) {
+ ksmbd_debug(OPLOCK, "smb2_allocate_rsp_buf failed! ");
+ ksmbd_free_work_struct(work);
+ atomic_dec(&conn->r_count);
+ return;
+ }
+
+ rsp_hdr = work->response_buf;
+ memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
+ rsp_hdr->smb2_buf_length =
+ cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
+ rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
+ rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
+ rsp_hdr->CreditRequest = cpu_to_le16(0);
+ rsp_hdr->Command = SMB2_OPLOCK_BREAK;
+ rsp_hdr->Flags = (SMB2_FLAGS_SERVER_TO_REDIR);
+ rsp_hdr->NextCommand = 0;
+ rsp_hdr->MessageId = cpu_to_le64(-1);
+ rsp_hdr->Id.SyncId.ProcessId = 0;
+ rsp_hdr->Id.SyncId.TreeId = 0;
+ rsp_hdr->SessionId = 0;
+ memset(rsp_hdr->Signature, 0, 16);
+
+ rsp = work->response_buf;
+ rsp->StructureSize = cpu_to_le16(44);
+ rsp->Epoch = br_info->epoch;
+ rsp->Flags = 0;
+
+ if (br_info->curr_state & (SMB2_LEASE_WRITE_CACHING_LE |
+ SMB2_LEASE_HANDLE_CACHING_LE))
+ rsp->Flags = SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED;
+
+ memcpy(rsp->LeaseKey, br_info->lease_key, SMB2_LEASE_KEY_SIZE);
+ rsp->CurrentLeaseState = br_info->curr_state;
+ rsp->NewLeaseState = br_info->new_state;
+ rsp->BreakReason = 0;
+ rsp->AccessMaskHint = 0;
+ rsp->ShareMaskHint = 0;
+
+ inc_rfc1001_len(rsp, 44);
+
+ ksmbd_conn_write(work);
+ ksmbd_free_work_struct(work);
+ atomic_dec(&conn->r_count);
+}
+
+/**
+ * smb2_lease_break_noti() - break lease when a new client request
+ * write lease
+ * @opinfo: conains lease state information
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int smb2_lease_break_noti(struct oplock_info *opinfo)
+{
+ struct ksmbd_conn *conn = opinfo->conn;
+ struct list_head *tmp, *t;
+ struct ksmbd_work *work;
+ struct lease_break_info *br_info;
+ struct lease *lease = opinfo->o_lease;
+
+ work = ksmbd_alloc_work_struct();
+ if (!work)
+ return -ENOMEM;
+
+ br_info = kmalloc(sizeof(struct lease_break_info), GFP_KERNEL);
+ if (!br_info) {
+ ksmbd_free_work_struct(work);
+ return -ENOMEM;
+ }
+
+ br_info->curr_state = lease->state;
+ br_info->new_state = lease->new_state;
+ if (lease->version == 2)
+ br_info->epoch = cpu_to_le16(++lease->epoch);
+ else
+ br_info->epoch = 0;
+ memcpy(br_info->lease_key, lease->lease_key, SMB2_LEASE_KEY_SIZE);
+
+ work->request_buf = (char *)br_info;
+ work->conn = conn;
+ work->sess = opinfo->sess;
+
+ atomic_inc(&conn->r_count);
+ if (opinfo->op_state == OPLOCK_ACK_WAIT) {
+ list_for_each_safe(tmp, t, &opinfo->interim_list) {
+ struct ksmbd_work *in_work;
+
+ in_work = list_entry(tmp, struct ksmbd_work,
+ interim_entry);
+ setup_async_work(in_work, NULL, NULL);
+ smb2_send_interim_resp(in_work, STATUS_PENDING);
+ list_del(&in_work->interim_entry);
+ }
+ INIT_WORK(&work->work, __smb2_lease_break_noti);
+ ksmbd_queue_work(work);
+ wait_for_break_ack(opinfo);
+ } else {
+ __smb2_lease_break_noti(&work->work);
+ if (opinfo->o_lease->new_state == SMB2_LEASE_NONE_LE) {
+ opinfo->level = SMB2_OPLOCK_LEVEL_NONE;
+ opinfo->o_lease->state = SMB2_LEASE_NONE_LE;
+ }
+ }
+ return 0;
+}
+
+static void wait_lease_breaking(struct oplock_info *opinfo)
+{
+ if (!opinfo->is_lease)
+ return;
+
+ wake_up_interruptible_all(&opinfo->oplock_brk);
+ if (atomic_read(&opinfo->breaking_cnt)) {
+ int ret = 0;
+
+ ret = wait_event_interruptible_timeout(opinfo->oplock_brk,
+ atomic_read(&opinfo->breaking_cnt) == 0,
+ HZ);
+ if (!ret)
+ atomic_set(&opinfo->breaking_cnt, 0);
+ }
+}
+
+static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level)
+{
+ int err = 0;
+
+ /* Need to break exclusive/batch oplock, write lease or overwrite_if */
+ ksmbd_debug(OPLOCK,
+ "request to send oplock(level : 0x%x) break notification\n",
+ brk_opinfo->level);
+
+ if (brk_opinfo->is_lease) {
+ struct lease *lease = brk_opinfo->o_lease;
+
+ atomic_inc(&brk_opinfo->breaking_cnt);
+
+ err = oplock_break_pending(brk_opinfo, req_op_level);
+ if (err)
+ return err < 0 ? err : 0;
+
+ if (brk_opinfo->open_trunc) {
+ /*
+ * Create overwrite break trigger the lease break to
+ * none.
+ */
+ lease->new_state = SMB2_LEASE_NONE_LE;
+ } else {
+ if (lease->state & SMB2_LEASE_WRITE_CACHING_LE) {
+ if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE)
+ lease->new_state =
+ SMB2_LEASE_READ_CACHING_LE |
+ SMB2_LEASE_HANDLE_CACHING_LE;
+ else
+ lease->new_state =
+ SMB2_LEASE_READ_CACHING_LE;
+ } else {
+ if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE)
+ lease->new_state =
+ SMB2_LEASE_READ_CACHING_LE;
+ else
+ lease->new_state = SMB2_LEASE_NONE_LE;
+ }
+ }
+
+ if (lease->state & (SMB2_LEASE_WRITE_CACHING_LE |
+ SMB2_LEASE_HANDLE_CACHING_LE))
+ brk_opinfo->op_state = OPLOCK_ACK_WAIT;
+ else
+ atomic_dec(&brk_opinfo->breaking_cnt);
+ } else {
+ err = oplock_break_pending(brk_opinfo, req_op_level);
+ if (err)
+ return err < 0 ? err : 0;
+
+ if (brk_opinfo->level == SMB2_OPLOCK_LEVEL_BATCH ||
+ brk_opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE)
+ brk_opinfo->op_state = OPLOCK_ACK_WAIT;
+ }
+
+ if (brk_opinfo->is_lease)
+ err = smb2_lease_break_noti(brk_opinfo);
+ else
+ err = smb2_oplock_break_noti(brk_opinfo);
+
+ ksmbd_debug(OPLOCK, "oplock granted = %d\n", brk_opinfo->level);
+ if (brk_opinfo->op_state == OPLOCK_CLOSING)
+ err = -ENOENT;
+ wake_up_oplock_break(brk_opinfo);
+
+ wait_lease_breaking(brk_opinfo);
+
+ return err;
+}
+
+void destroy_lease_table(struct ksmbd_conn *conn)
+{
+ struct lease_table *lb, *lbtmp;
+ struct oplock_info *opinfo;
+
+ write_lock(&lease_list_lock);
+ if (list_empty(&lease_table_list)) {
+ write_unlock(&lease_list_lock);
+ return;
+ }
+
+ list_for_each_entry_safe(lb, lbtmp, &lease_table_list, l_entry) {
+ if (conn && memcmp(lb->client_guid, conn->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE))
+ continue;
+again:
+ rcu_read_lock();
+ list_for_each_entry_rcu(opinfo, &lb->lease_list,
+ lease_entry) {
+ rcu_read_unlock();
+ lease_del_list(opinfo);
+ goto again;
+ }
+ rcu_read_unlock();
+ list_del(&lb->l_entry);
+ kfree(lb);
+ }
+ write_unlock(&lease_list_lock);
+}
+
+int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci,
+ struct lease_ctx_info *lctx)
+{
+ struct oplock_info *opinfo;
+ int err = 0;
+ struct lease_table *lb;
+
+ if (!lctx)
+ return err;
+
+ read_lock(&lease_list_lock);
+ if (list_empty(&lease_table_list)) {
+ read_unlock(&lease_list_lock);
+ return 0;
+ }
+
+ list_for_each_entry(lb, &lease_table_list, l_entry) {
+ if (!memcmp(lb->client_guid, sess->conn->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE))
+ goto found;
+ }
+ read_unlock(&lease_list_lock);
+
+ return 0;
+
+found:
+ rcu_read_lock();
+ list_for_each_entry_rcu(opinfo, &lb->lease_list, lease_entry) {
+ if (!atomic_inc_not_zero(&opinfo->refcount))
+ continue;
+ rcu_read_unlock();
+ if (opinfo->o_fp->f_ci == ci)
+ goto op_next;
+ err = compare_guid_key(opinfo, sess->conn->ClientGUID,
+ lctx->lease_key);
+ if (err) {
+ err = -EINVAL;
+ ksmbd_debug(OPLOCK,
+ "found same lease key is already used in other files\n");
+ opinfo_put(opinfo);
+ goto out;
+ }
+op_next:
+ opinfo_put(opinfo);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+
+out:
+ read_unlock(&lease_list_lock);
+ return err;
+}
+
+static void copy_lease(struct oplock_info *op1, struct oplock_info *op2)
+{
+ struct lease *lease1 = op1->o_lease;
+ struct lease *lease2 = op2->o_lease;
+
+ op2->level = op1->level;
+ lease2->state = lease1->state;
+ memcpy(lease2->lease_key, lease1->lease_key,
+ SMB2_LEASE_KEY_SIZE);
+ lease2->duration = lease1->duration;
+ lease2->flags = lease1->flags;
+}
+
+static int add_lease_global_list(struct oplock_info *opinfo)
+{
+ struct lease_table *lb;
+
+ read_lock(&lease_list_lock);
+ list_for_each_entry(lb, &lease_table_list, l_entry) {
+ if (!memcmp(lb->client_guid, opinfo->conn->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE)) {
+ opinfo->o_lease->l_lb = lb;
+ lease_add_list(opinfo);
+ read_unlock(&lease_list_lock);
+ return 0;
+ }
+ }
+ read_unlock(&lease_list_lock);
+
+ lb = kmalloc(sizeof(struct lease_table), GFP_KERNEL);
+ if (!lb)
+ return -ENOMEM;
+
+ memcpy(lb->client_guid, opinfo->conn->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE);
+ INIT_LIST_HEAD(&lb->lease_list);
+ spin_lock_init(&lb->lb_lock);
+ opinfo->o_lease->l_lb = lb;
+ lease_add_list(opinfo);
+ lb_add(lb);
+ return 0;
+}
+
+static void set_oplock_level(struct oplock_info *opinfo, int level,
+ struct lease_ctx_info *lctx)
+{
+ switch (level) {
+ case SMB2_OPLOCK_LEVEL_BATCH:
+ case SMB2_OPLOCK_LEVEL_EXCLUSIVE:
+ grant_write_oplock(opinfo, level, lctx);
+ break;
+ case SMB2_OPLOCK_LEVEL_II:
+ grant_read_oplock(opinfo, lctx);
+ break;
+ default:
+ grant_none_oplock(opinfo, lctx);
+ break;
+ }
+}
+
+/**
+ * smb_grant_oplock() - handle oplock/lease request on file open
+ * @work: smb work
+ * @req_op_level: oplock level
+ * @pid: id of open file
+ * @fp: ksmbd file pointer
+ * @tid: Tree id of connection
+ * @lctx: lease context information on file open
+ * @share_ret: share mode
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid,
+ struct ksmbd_file *fp, __u16 tid,
+ struct lease_ctx_info *lctx, int share_ret)
+{
+ struct ksmbd_session *sess = work->sess;
+ int err = 0;
+ struct oplock_info *opinfo = NULL, *prev_opinfo = NULL;
+ struct ksmbd_inode *ci = fp->f_ci;
+ bool prev_op_has_lease;
+ __le32 prev_op_state = 0;
+
+ /* not support directory lease */
+ if (S_ISDIR(file_inode(fp->filp)->i_mode))
+ return 0;
+
+ opinfo = alloc_opinfo(work, pid, tid);
+ if (!opinfo)
+ return -ENOMEM;
+
+ if (lctx) {
+ err = alloc_lease(opinfo, lctx);
+ if (err)
+ goto err_out;
+ opinfo->is_lease = 1;
+ }
+
+ /* ci does not have any oplock */
+ if (!opinfo_count(fp))
+ goto set_lev;
+
+ /* grant none-oplock if second open is trunc */
+ if (fp->attrib_only && fp->cdoption != FILE_OVERWRITE_IF_LE &&
+ fp->cdoption != FILE_OVERWRITE_LE &&
+ fp->cdoption != FILE_SUPERSEDE_LE) {
+ req_op_level = SMB2_OPLOCK_LEVEL_NONE;
+ goto set_lev;
+ }
+
+ if (lctx) {
+ struct oplock_info *m_opinfo;
+
+ /* is lease already granted ? */
+ m_opinfo = same_client_has_lease(ci, sess->conn->ClientGUID,
+ lctx);
+ if (m_opinfo) {
+ copy_lease(m_opinfo, opinfo);
+ if (atomic_read(&m_opinfo->breaking_cnt))
+ opinfo->o_lease->flags =
+ SMB2_LEASE_FLAG_BREAK_IN_PROGRESS_LE;
+ goto out;
+ }
+ }
+ prev_opinfo = opinfo_get_list(ci);
+ if (!prev_opinfo ||
+ (prev_opinfo->level == SMB2_OPLOCK_LEVEL_NONE && lctx))
+ goto set_lev;
+ prev_op_has_lease = prev_opinfo->is_lease;
+ if (prev_op_has_lease)
+ prev_op_state = prev_opinfo->o_lease->state;
+
+ if (share_ret < 0 &&
+ prev_opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
+ err = share_ret;
+ opinfo_put(prev_opinfo);
+ goto err_out;
+ }
+
+ if (prev_opinfo->level != SMB2_OPLOCK_LEVEL_BATCH &&
+ prev_opinfo->level != SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
+ opinfo_put(prev_opinfo);
+ goto op_break_not_needed;
+ }
+
+ list_add(&work->interim_entry, &prev_opinfo->interim_list);
+ err = oplock_break(prev_opinfo, SMB2_OPLOCK_LEVEL_II);
+ opinfo_put(prev_opinfo);
+ if (err == -ENOENT)
+ goto set_lev;
+ /* Check all oplock was freed by close */
+ else if (err < 0)
+ goto err_out;
+
+op_break_not_needed:
+ if (share_ret < 0) {
+ err = share_ret;
+ goto err_out;
+ }
+
+ if (req_op_level != SMB2_OPLOCK_LEVEL_NONE)
+ req_op_level = SMB2_OPLOCK_LEVEL_II;
+
+ /* grant fixed oplock on stacked locking between lease and oplock */
+ if (prev_op_has_lease && !lctx)
+ if (prev_op_state & SMB2_LEASE_HANDLE_CACHING_LE)
+ req_op_level = SMB2_OPLOCK_LEVEL_NONE;
+
+ if (!prev_op_has_lease && lctx) {
+ req_op_level = SMB2_OPLOCK_LEVEL_II;
+ lctx->req_state = SMB2_LEASE_READ_CACHING_LE;
+ }
+
+set_lev:
+ set_oplock_level(opinfo, req_op_level, lctx);
+
+out:
+ rcu_assign_pointer(fp->f_opinfo, opinfo);
+ opinfo->o_fp = fp;
+
+ opinfo_count_inc(fp);
+ opinfo_add(opinfo);
+ if (opinfo->is_lease) {
+ err = add_lease_global_list(opinfo);
+ if (err)
+ goto err_out;
+ }
+
+ return 0;
+err_out:
+ free_opinfo(opinfo);
+ return err;
+}
+
+/**
+ * smb_break_all_write_oplock() - break batch/exclusive oplock to level2
+ * @work: smb work
+ * @fp: ksmbd file pointer
+ * @is_trunc: truncate on open
+ */
+static void smb_break_all_write_oplock(struct ksmbd_work *work,
+ struct ksmbd_file *fp, int is_trunc)
+{
+ struct oplock_info *brk_opinfo;
+
+ brk_opinfo = opinfo_get_list(fp->f_ci);
+ if (!brk_opinfo)
+ return;
+ if (brk_opinfo->level != SMB2_OPLOCK_LEVEL_BATCH &&
+ brk_opinfo->level != SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
+ opinfo_put(brk_opinfo);
+ return;
+ }
+
+ brk_opinfo->open_trunc = is_trunc;
+ list_add(&work->interim_entry, &brk_opinfo->interim_list);
+ oplock_break(brk_opinfo, SMB2_OPLOCK_LEVEL_II);
+ opinfo_put(brk_opinfo);
+}
+
+/**
+ * smb_break_all_levII_oplock() - send level2 oplock or read lease break command
+ * from server to client
+ * @work: smb work
+ * @fp: ksmbd file pointer
+ * @is_trunc: truncate on open
+ */
+void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
+ int is_trunc)
+{
+ struct oplock_info *op, *brk_op;
+ struct ksmbd_inode *ci;
+ struct ksmbd_conn *conn = work->sess->conn;
+
+ if (!test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_OPLOCKS))
+ return;
+
+ ci = fp->f_ci;
+ op = opinfo_get(fp);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(brk_op, &ci->m_op_list, op_entry) {
+ if (!atomic_inc_not_zero(&brk_op->refcount))
+ continue;
+ rcu_read_unlock();
+ if (brk_op->is_lease && (brk_op->o_lease->state &
+ (~(SMB2_LEASE_READ_CACHING_LE |
+ SMB2_LEASE_HANDLE_CACHING_LE)))) {
+ ksmbd_debug(OPLOCK, "unexpected lease state(0x%x)\n",
+ brk_op->o_lease->state);
+ goto next;
+ } else if (brk_op->level !=
+ SMB2_OPLOCK_LEVEL_II) {
+ ksmbd_debug(OPLOCK, "unexpected oplock(0x%x)\n",
+ brk_op->level);
+ goto next;
+ }
+
+ /* Skip oplock being break to none */
+ if (brk_op->is_lease &&
+ brk_op->o_lease->new_state == SMB2_LEASE_NONE_LE &&
+ atomic_read(&brk_op->breaking_cnt))
+ goto next;
+
+ if (op && op->is_lease && brk_op->is_lease &&
+ !memcmp(conn->ClientGUID, brk_op->conn->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE) &&
+ !memcmp(op->o_lease->lease_key, brk_op->o_lease->lease_key,
+ SMB2_LEASE_KEY_SIZE))
+ goto next;
+ brk_op->open_trunc = is_trunc;
+ oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE);
+next:
+ opinfo_put(brk_op);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+
+ if (op)
+ opinfo_put(op);
+}
+
+/**
+ * smb_break_all_oplock() - break both batch/exclusive and level2 oplock
+ * @work: smb work
+ * @fp: ksmbd file pointer
+ */
+void smb_break_all_oplock(struct ksmbd_work *work, struct ksmbd_file *fp)
+{
+ if (!test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_OPLOCKS))
+ return;
+
+ smb_break_all_write_oplock(work, fp, 1);
+ smb_break_all_levII_oplock(work, fp, 1);
+}
+
+/**
+ * smb2_map_lease_to_oplock() - map lease state to corresponding oplock type
+ * @lease_state: lease type
+ *
+ * Return: 0 if no mapping, otherwise corresponding oplock type
+ */
+__u8 smb2_map_lease_to_oplock(__le32 lease_state)
+{
+ if (lease_state == (SMB2_LEASE_HANDLE_CACHING_LE |
+ SMB2_LEASE_READ_CACHING_LE |
+ SMB2_LEASE_WRITE_CACHING_LE)) {
+ return SMB2_OPLOCK_LEVEL_BATCH;
+ } else if (lease_state != SMB2_LEASE_WRITE_CACHING_LE &&
+ lease_state & SMB2_LEASE_WRITE_CACHING_LE) {
+ if (!(lease_state & SMB2_LEASE_HANDLE_CACHING_LE))
+ return SMB2_OPLOCK_LEVEL_EXCLUSIVE;
+ } else if (lease_state & SMB2_LEASE_READ_CACHING_LE) {
+ return SMB2_OPLOCK_LEVEL_II;
+ }
+ return 0;
+}
+
+/**
+ * create_lease_buf() - create lease context for open cmd response
+ * @rbuf: buffer to create lease context response
+ * @lease: buffer to stored parsed lease state information
+ */
+void create_lease_buf(u8 *rbuf, struct lease *lease)
+{
+ char *LeaseKey = (char *)&lease->lease_key;
+
+ if (lease->version == 2) {
+ struct create_lease_v2 *buf = (struct create_lease_v2 *)rbuf;
+ char *ParentLeaseKey = (char *)&lease->parent_lease_key;
+
+ memset(buf, 0, sizeof(struct create_lease_v2));
+ buf->lcontext.LeaseKeyLow = *((__le64 *)LeaseKey);
+ buf->lcontext.LeaseKeyHigh = *((__le64 *)(LeaseKey + 8));
+ buf->lcontext.LeaseFlags = lease->flags;
+ buf->lcontext.LeaseState = lease->state;
+ buf->lcontext.ParentLeaseKeyLow = *((__le64 *)ParentLeaseKey);
+ buf->lcontext.ParentLeaseKeyHigh = *((__le64 *)(ParentLeaseKey + 8));
+ buf->ccontext.DataOffset = cpu_to_le16(offsetof
+ (struct create_lease_v2, lcontext));
+ buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context_v2));
+ buf->ccontext.NameOffset = cpu_to_le16(offsetof
+ (struct create_lease_v2, Name));
+ buf->ccontext.NameLength = cpu_to_le16(4);
+ buf->Name[0] = 'R';
+ buf->Name[1] = 'q';
+ buf->Name[2] = 'L';
+ buf->Name[3] = 's';
+ } else {
+ struct create_lease *buf = (struct create_lease *)rbuf;
+
+ memset(buf, 0, sizeof(struct create_lease));
+ buf->lcontext.LeaseKeyLow = *((__le64 *)LeaseKey);
+ buf->lcontext.LeaseKeyHigh = *((__le64 *)(LeaseKey + 8));
+ buf->lcontext.LeaseFlags = lease->flags;
+ buf->lcontext.LeaseState = lease->state;
+ buf->ccontext.DataOffset = cpu_to_le16(offsetof
+ (struct create_lease, lcontext));
+ buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context));
+ buf->ccontext.NameOffset = cpu_to_le16(offsetof
+ (struct create_lease, Name));
+ buf->ccontext.NameLength = cpu_to_le16(4);
+ buf->Name[0] = 'R';
+ buf->Name[1] = 'q';
+ buf->Name[2] = 'L';
+ buf->Name[3] = 's';
+ }
+}
+
+/**
+ * parse_lease_state() - parse lease context containted in file open request
+ * @open_req: buffer containing smb2 file open(create) request
+ *
+ * Return: oplock state, -ENOENT if create lease context not found
+ */
+struct lease_ctx_info *parse_lease_state(void *open_req)
+{
+ char *data_offset;
+ struct create_context *cc;
+ unsigned int next = 0;
+ char *name;
+ bool found = false;
+ struct smb2_create_req *req = (struct smb2_create_req *)open_req;
+ struct lease_ctx_info *lreq = kzalloc(sizeof(struct lease_ctx_info),
+ GFP_KERNEL);
+ if (!lreq)
+ return NULL;
+
+ data_offset = (char *)req + 4 + le32_to_cpu(req->CreateContextsOffset);
+ cc = (struct create_context *)data_offset;
+ do {
+ cc = (struct create_context *)((char *)cc + next);
+ name = le16_to_cpu(cc->NameOffset) + (char *)cc;
+ if (le16_to_cpu(cc->NameLength) != 4 ||
+ strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4)) {
+ next = le32_to_cpu(cc->Next);
+ continue;
+ }
+ found = true;
+ break;
+ } while (next != 0);
+
+ if (found) {
+ if (sizeof(struct lease_context_v2) == le32_to_cpu(cc->DataLength)) {
+ struct create_lease_v2 *lc = (struct create_lease_v2 *)cc;
+
+ *((__le64 *)lreq->lease_key) = lc->lcontext.LeaseKeyLow;
+ *((__le64 *)(lreq->lease_key + 8)) = lc->lcontext.LeaseKeyHigh;
+ lreq->req_state = lc->lcontext.LeaseState;
+ lreq->flags = lc->lcontext.LeaseFlags;
+ lreq->duration = lc->lcontext.LeaseDuration;
+ *((__le64 *)lreq->parent_lease_key) = lc->lcontext.ParentLeaseKeyLow;
+ *((__le64 *)(lreq->parent_lease_key + 8)) = lc->lcontext.ParentLeaseKeyHigh;
+ lreq->version = 2;
+ } else {
+ struct create_lease *lc = (struct create_lease *)cc;
+
+ *((__le64 *)lreq->lease_key) = lc->lcontext.LeaseKeyLow;
+ *((__le64 *)(lreq->lease_key + 8)) = lc->lcontext.LeaseKeyHigh;
+ lreq->req_state = lc->lcontext.LeaseState;
+ lreq->flags = lc->lcontext.LeaseFlags;
+ lreq->duration = lc->lcontext.LeaseDuration;
+ lreq->version = 1;
+ }
+ return lreq;
+ }
+
+ kfree(lreq);
+ return NULL;
+}
+
+/**
+ * smb2_find_context_vals() - find a particular context info in open request
+ * @open_req: buffer containing smb2 file open(create) request
+ * @tag: context name to search for
+ *
+ * Return: pointer to requested context, NULL if @str context not found
+ * or error pointer if name length is invalid.
+ */
+struct create_context *smb2_find_context_vals(void *open_req, const char *tag)
+{
+ char *data_offset;
+ struct create_context *cc;
+ unsigned int next = 0;
+ char *name;
+ struct smb2_create_req *req = (struct smb2_create_req *)open_req;
+
+ data_offset = (char *)req + 4 + le32_to_cpu(req->CreateContextsOffset);
+ cc = (struct create_context *)data_offset;
+ do {
+ int val;
+
+ cc = (struct create_context *)((char *)cc + next);
+ name = le16_to_cpu(cc->NameOffset) + (char *)cc;
+ val = le16_to_cpu(cc->NameLength);
+ if (val < 4)
+ return ERR_PTR(-EINVAL);
+
+ if (memcmp(name, tag, val) == 0)
+ return cc;
+ next = le32_to_cpu(cc->Next);
+ } while (next != 0);
+
+ return NULL;
+}
+
+/**
+ * create_durable_rsp_buf() - create durable handle context
+ * @cc: buffer to create durable context response
+ */
+void create_durable_rsp_buf(char *cc)
+{
+ struct create_durable_rsp *buf;
+
+ buf = (struct create_durable_rsp *)cc;
+ memset(buf, 0, sizeof(struct create_durable_rsp));
+ buf->ccontext.DataOffset = cpu_to_le16(offsetof
+ (struct create_durable_rsp, Data));
+ buf->ccontext.DataLength = cpu_to_le32(8);
+ buf->ccontext.NameOffset = cpu_to_le16(offsetof
+ (struct create_durable_rsp, Name));
+ buf->ccontext.NameLength = cpu_to_le16(4);
+ /* SMB2_CREATE_DURABLE_HANDLE_RESPONSE is "DHnQ" */
+ buf->Name[0] = 'D';
+ buf->Name[1] = 'H';
+ buf->Name[2] = 'n';
+ buf->Name[3] = 'Q';
+}
+
+/**
+ * create_durable_v2_rsp_buf() - create durable handle v2 context
+ * @cc: buffer to create durable context response
+ * @fp: ksmbd file pointer
+ */
+void create_durable_v2_rsp_buf(char *cc, struct ksmbd_file *fp)
+{
+ struct create_durable_v2_rsp *buf;
+
+ buf = (struct create_durable_v2_rsp *)cc;
+ memset(buf, 0, sizeof(struct create_durable_rsp));
+ buf->ccontext.DataOffset = cpu_to_le16(offsetof
+ (struct create_durable_rsp, Data));
+ buf->ccontext.DataLength = cpu_to_le32(8);
+ buf->ccontext.NameOffset = cpu_to_le16(offsetof
+ (struct create_durable_rsp, Name));
+ buf->ccontext.NameLength = cpu_to_le16(4);
+ /* SMB2_CREATE_DURABLE_HANDLE_RESPONSE_V2 is "DH2Q" */
+ buf->Name[0] = 'D';
+ buf->Name[1] = 'H';
+ buf->Name[2] = '2';
+ buf->Name[3] = 'Q';
+
+ buf->Timeout = cpu_to_le32(fp->durable_timeout);
+}
+
+/**
+ * create_mxac_rsp_buf() - create query maximal access context
+ * @cc: buffer to create maximal access context response
+ * @maximal_access: maximal access
+ */
+void create_mxac_rsp_buf(char *cc, int maximal_access)
+{
+ struct create_mxac_rsp *buf;
+
+ buf = (struct create_mxac_rsp *)cc;
+ memset(buf, 0, sizeof(struct create_mxac_rsp));
+ buf->ccontext.DataOffset = cpu_to_le16(offsetof
+ (struct create_mxac_rsp, QueryStatus));
+ buf->ccontext.DataLength = cpu_to_le32(8);
+ buf->ccontext.NameOffset = cpu_to_le16(offsetof
+ (struct create_mxac_rsp, Name));
+ buf->ccontext.NameLength = cpu_to_le16(4);
+ /* SMB2_CREATE_QUERY_MAXIMAL_ACCESS_RESPONSE is "MxAc" */
+ buf->Name[0] = 'M';
+ buf->Name[1] = 'x';
+ buf->Name[2] = 'A';
+ buf->Name[3] = 'c';
+
+ buf->QueryStatus = STATUS_SUCCESS;
+ buf->MaximalAccess = cpu_to_le32(maximal_access);
+}
+
+void create_disk_id_rsp_buf(char *cc, __u64 file_id, __u64 vol_id)
+{
+ struct create_disk_id_rsp *buf;
+
+ buf = (struct create_disk_id_rsp *)cc;
+ memset(buf, 0, sizeof(struct create_disk_id_rsp));
+ buf->ccontext.DataOffset = cpu_to_le16(offsetof
+ (struct create_disk_id_rsp, DiskFileId));
+ buf->ccontext.DataLength = cpu_to_le32(32);
+ buf->ccontext.NameOffset = cpu_to_le16(offsetof
+ (struct create_mxac_rsp, Name));
+ buf->ccontext.NameLength = cpu_to_le16(4);
+ /* SMB2_CREATE_QUERY_ON_DISK_ID_RESPONSE is "QFid" */
+ buf->Name[0] = 'Q';
+ buf->Name[1] = 'F';
+ buf->Name[2] = 'i';
+ buf->Name[3] = 'd';
+
+ buf->DiskFileId = cpu_to_le64(file_id);
+ buf->VolumeId = cpu_to_le64(vol_id);
+}
+
+/**
+ * create_posix_rsp_buf() - create posix extension context
+ * @cc: buffer to create posix on posix response
+ * @fp: ksmbd file pointer
+ */
+void create_posix_rsp_buf(char *cc, struct ksmbd_file *fp)
+{
+ struct create_posix_rsp *buf;
+ struct inode *inode = file_inode(fp->filp);
+ struct user_namespace *user_ns = file_mnt_user_ns(fp->filp);
+
+ buf = (struct create_posix_rsp *)cc;
+ memset(buf, 0, sizeof(struct create_posix_rsp));
+ buf->ccontext.DataOffset = cpu_to_le16(offsetof
+ (struct create_posix_rsp, nlink));
+ buf->ccontext.DataLength = cpu_to_le32(52);
+ buf->ccontext.NameOffset = cpu_to_le16(offsetof
+ (struct create_posix_rsp, Name));
+ buf->ccontext.NameLength = cpu_to_le16(POSIX_CTXT_DATA_LEN);
+ /* SMB2_CREATE_TAG_POSIX is "0x93AD25509CB411E7B42383DE968BCD7C" */
+ buf->Name[0] = 0x93;
+ buf->Name[1] = 0xAD;
+ buf->Name[2] = 0x25;
+ buf->Name[3] = 0x50;
+ buf->Name[4] = 0x9C;
+ buf->Name[5] = 0xB4;
+ buf->Name[6] = 0x11;
+ buf->Name[7] = 0xE7;
+ buf->Name[8] = 0xB4;
+ buf->Name[9] = 0x23;
+ buf->Name[10] = 0x83;
+ buf->Name[11] = 0xDE;
+ buf->Name[12] = 0x96;
+ buf->Name[13] = 0x8B;
+ buf->Name[14] = 0xCD;
+ buf->Name[15] = 0x7C;
+
+ buf->nlink = cpu_to_le32(inode->i_nlink);
+ buf->reparse_tag = cpu_to_le32(fp->volatile_id);
+ buf->mode = cpu_to_le32(inode->i_mode);
+ id_to_sid(from_kuid(user_ns, inode->i_uid),
+ SIDNFS_USER, (struct smb_sid *)&buf->SidBuffer[0]);
+ id_to_sid(from_kgid(user_ns, inode->i_gid),
+ SIDNFS_GROUP, (struct smb_sid *)&buf->SidBuffer[20]);
+}
+
+/*
+ * Find lease object(opinfo) for given lease key/fid from lease
+ * break/file close path.
+ */
+/**
+ * lookup_lease_in_table() - find a matching lease info object
+ * @conn: connection instance
+ * @lease_key: lease key to be searched for
+ *
+ * Return: opinfo if found matching opinfo, otherwise NULL
+ */
+struct oplock_info *lookup_lease_in_table(struct ksmbd_conn *conn,
+ char *lease_key)
+{
+ struct oplock_info *opinfo = NULL, *ret_op = NULL;
+ struct lease_table *lt;
+ int ret;
+
+ read_lock(&lease_list_lock);
+ list_for_each_entry(lt, &lease_table_list, l_entry) {
+ if (!memcmp(lt->client_guid, conn->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE))
+ goto found;
+ }
+
+ read_unlock(&lease_list_lock);
+ return NULL;
+
+found:
+ rcu_read_lock();
+ list_for_each_entry_rcu(opinfo, &lt->lease_list, lease_entry) {
+ if (!atomic_inc_not_zero(&opinfo->refcount))
+ continue;
+ rcu_read_unlock();
+ if (!opinfo->op_state || opinfo->op_state == OPLOCK_CLOSING)
+ goto op_next;
+ if (!(opinfo->o_lease->state &
+ (SMB2_LEASE_HANDLE_CACHING_LE |
+ SMB2_LEASE_WRITE_CACHING_LE)))
+ goto op_next;
+ ret = compare_guid_key(opinfo, conn->ClientGUID,
+ lease_key);
+ if (ret) {
+ ksmbd_debug(OPLOCK, "found opinfo\n");
+ ret_op = opinfo;
+ goto out;
+ }
+op_next:
+ opinfo_put(opinfo);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+
+out:
+ read_unlock(&lease_list_lock);
+ return ret_op;
+}
+
+int smb2_check_durable_oplock(struct ksmbd_file *fp,
+ struct lease_ctx_info *lctx, char *name)
+{
+ struct oplock_info *opinfo = opinfo_get(fp);
+ int ret = 0;
+
+ if (opinfo && opinfo->is_lease) {
+ if (!lctx) {
+ pr_err("open does not include lease\n");
+ ret = -EBADF;
+ goto out;
+ }
+ if (memcmp(opinfo->o_lease->lease_key, lctx->lease_key,
+ SMB2_LEASE_KEY_SIZE)) {
+ pr_err("invalid lease key\n");
+ ret = -EBADF;
+ goto out;
+ }
+ if (name && strcmp(fp->filename, name)) {
+ pr_err("invalid name reconnect %s\n", name);
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+out:
+ if (opinfo)
+ opinfo_put(opinfo);
+ return ret;
+}
diff --git a/fs/ksmbd/oplock.h b/fs/ksmbd/oplock.h
new file mode 100644
index 000000000000..119b8047cfbd
--- /dev/null
+++ b/fs/ksmbd/oplock.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_OPLOCK_H
+#define __KSMBD_OPLOCK_H
+
+#include "smb_common.h"
+
+#define OPLOCK_WAIT_TIME (35 * HZ)
+
+/* SMB2 Oplock levels */
+#define SMB2_OPLOCK_LEVEL_NONE 0x00
+#define SMB2_OPLOCK_LEVEL_II 0x01
+#define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08
+#define SMB2_OPLOCK_LEVEL_BATCH 0x09
+#define SMB2_OPLOCK_LEVEL_LEASE 0xFF
+
+/* Oplock states */
+#define OPLOCK_STATE_NONE 0x00
+#define OPLOCK_ACK_WAIT 0x01
+#define OPLOCK_CLOSING 0x02
+
+#define OPLOCK_WRITE_TO_READ 0x01
+#define OPLOCK_READ_HANDLE_TO_READ 0x02
+#define OPLOCK_WRITE_TO_NONE 0x04
+#define OPLOCK_READ_TO_NONE 0x08
+
+#define SMB2_LEASE_KEY_SIZE 16
+
+struct lease_ctx_info {
+ __u8 lease_key[SMB2_LEASE_KEY_SIZE];
+ __le32 req_state;
+ __le32 flags;
+ __le64 duration;
+ __u8 parent_lease_key[SMB2_LEASE_KEY_SIZE];
+ int version;
+};
+
+struct lease_table {
+ char client_guid[SMB2_CLIENT_GUID_SIZE];
+ struct list_head lease_list;
+ struct list_head l_entry;
+ spinlock_t lb_lock;
+};
+
+struct lease {
+ __u8 lease_key[SMB2_LEASE_KEY_SIZE];
+ __le32 state;
+ __le32 new_state;
+ __le32 flags;
+ __le64 duration;
+ __u8 parent_lease_key[SMB2_LEASE_KEY_SIZE];
+ int version;
+ unsigned short epoch;
+ struct lease_table *l_lb;
+};
+
+struct oplock_info {
+ struct ksmbd_conn *conn;
+ struct ksmbd_session *sess;
+ struct ksmbd_work *work;
+ struct ksmbd_file *o_fp;
+ int level;
+ int op_state;
+ unsigned long pending_break;
+ u64 fid;
+ atomic_t breaking_cnt;
+ atomic_t refcount;
+ __u16 Tid;
+ bool is_lease;
+ bool open_trunc; /* truncate on open */
+ struct lease *o_lease;
+ struct list_head interim_list;
+ struct list_head op_entry;
+ struct list_head lease_entry;
+ wait_queue_head_t oplock_q; /* Other server threads */
+ wait_queue_head_t oplock_brk; /* oplock breaking wait */
+ struct rcu_head rcu_head;
+};
+
+struct lease_break_info {
+ __le32 curr_state;
+ __le32 new_state;
+ __le16 epoch;
+ char lease_key[SMB2_LEASE_KEY_SIZE];
+};
+
+struct oplock_break_info {
+ int level;
+ int open_trunc;
+ int fid;
+};
+
+int smb_grant_oplock(struct ksmbd_work *work, int req_op_level,
+ u64 pid, struct ksmbd_file *fp, __u16 tid,
+ struct lease_ctx_info *lctx, int share_ret);
+void smb_break_all_levII_oplock(struct ksmbd_work *work,
+ struct ksmbd_file *fp, int is_trunc);
+int opinfo_write_to_read(struct oplock_info *opinfo);
+int opinfo_read_handle_to_read(struct oplock_info *opinfo);
+int opinfo_write_to_none(struct oplock_info *opinfo);
+int opinfo_read_to_none(struct oplock_info *opinfo);
+void close_id_del_oplock(struct ksmbd_file *fp);
+void smb_break_all_oplock(struct ksmbd_work *work, struct ksmbd_file *fp);
+struct oplock_info *opinfo_get(struct ksmbd_file *fp);
+void opinfo_put(struct oplock_info *opinfo);
+
+/* Lease related functions */
+void create_lease_buf(u8 *rbuf, struct lease *lease);
+struct lease_ctx_info *parse_lease_state(void *open_req);
+__u8 smb2_map_lease_to_oplock(__le32 lease_state);
+int lease_read_to_write(struct oplock_info *opinfo);
+
+/* Durable related functions */
+void create_durable_rsp_buf(char *cc);
+void create_durable_v2_rsp_buf(char *cc, struct ksmbd_file *fp);
+void create_mxac_rsp_buf(char *cc, int maximal_access);
+void create_disk_id_rsp_buf(char *cc, __u64 file_id, __u64 vol_id);
+void create_posix_rsp_buf(char *cc, struct ksmbd_file *fp);
+struct create_context *smb2_find_context_vals(void *open_req, const char *str);
+struct oplock_info *lookup_lease_in_table(struct ksmbd_conn *conn,
+ char *lease_key);
+int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci,
+ struct lease_ctx_info *lctx);
+void destroy_lease_table(struct ksmbd_conn *conn);
+int smb2_check_durable_oplock(struct ksmbd_file *fp,
+ struct lease_ctx_info *lctx, char *name);
+#endif /* __KSMBD_OPLOCK_H */
diff --git a/fs/ksmbd/server.c b/fs/ksmbd/server.c
new file mode 100644
index 000000000000..e6a9f6aa47eb
--- /dev/null
+++ b/fs/ksmbd/server.c
@@ -0,0 +1,633 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include "glob.h"
+#include "oplock.h"
+#include "misc.h"
+#include <linux/sched/signal.h>
+#include <linux/workqueue.h>
+#include <linux/sysfs.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include "server.h"
+#include "smb_common.h"
+#include "smbstatus.h"
+#include "connection.h"
+#include "transport_ipc.h"
+#include "mgmt/user_session.h"
+#include "crypto_ctx.h"
+#include "auth.h"
+
+int ksmbd_debug_types;
+
+struct ksmbd_server_config server_conf;
+
+enum SERVER_CTRL_TYPE {
+ SERVER_CTRL_TYPE_INIT,
+ SERVER_CTRL_TYPE_RESET,
+};
+
+struct server_ctrl_struct {
+ int type;
+ struct work_struct ctrl_work;
+};
+
+static DEFINE_MUTEX(ctrl_lock);
+
+static int ___server_conf_set(int idx, char *val)
+{
+ if (idx >= ARRAY_SIZE(server_conf.conf))
+ return -EINVAL;
+
+ if (!val || val[0] == 0x00)
+ return -EINVAL;
+
+ kfree(server_conf.conf[idx]);
+ server_conf.conf[idx] = kstrdup(val, GFP_KERNEL);
+ if (!server_conf.conf[idx])
+ return -ENOMEM;
+ return 0;
+}
+
+int ksmbd_set_netbios_name(char *v)
+{
+ return ___server_conf_set(SERVER_CONF_NETBIOS_NAME, v);
+}
+
+int ksmbd_set_server_string(char *v)
+{
+ return ___server_conf_set(SERVER_CONF_SERVER_STRING, v);
+}
+
+int ksmbd_set_work_group(char *v)
+{
+ return ___server_conf_set(SERVER_CONF_WORK_GROUP, v);
+}
+
+char *ksmbd_netbios_name(void)
+{
+ return server_conf.conf[SERVER_CONF_NETBIOS_NAME];
+}
+
+char *ksmbd_server_string(void)
+{
+ return server_conf.conf[SERVER_CONF_SERVER_STRING];
+}
+
+char *ksmbd_work_group(void)
+{
+ return server_conf.conf[SERVER_CONF_WORK_GROUP];
+}
+
+/**
+ * check_conn_state() - check state of server thread connection
+ * @work: smb work containing server thread information
+ *
+ * Return: 0 on valid connection, otherwise 1 to reconnect
+ */
+static inline int check_conn_state(struct ksmbd_work *work)
+{
+ struct smb_hdr *rsp_hdr;
+
+ if (ksmbd_conn_exiting(work) || ksmbd_conn_need_reconnect(work)) {
+ rsp_hdr = work->response_buf;
+ rsp_hdr->Status.CifsError = STATUS_CONNECTION_DISCONNECTED;
+ return 1;
+ }
+ return 0;
+}
+
+#define SERVER_HANDLER_CONTINUE 0
+#define SERVER_HANDLER_ABORT 1
+
+static int __process_request(struct ksmbd_work *work, struct ksmbd_conn *conn,
+ u16 *cmd)
+{
+ struct smb_version_cmds *cmds;
+ u16 command;
+ int ret;
+
+ if (check_conn_state(work))
+ return SERVER_HANDLER_CONTINUE;
+
+ if (ksmbd_verify_smb_message(work))
+ return SERVER_HANDLER_ABORT;
+
+ command = conn->ops->get_cmd_val(work);
+ *cmd = command;
+
+andx_again:
+ if (command >= conn->max_cmds) {
+ conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER);
+ return SERVER_HANDLER_CONTINUE;
+ }
+
+ cmds = &conn->cmds[command];
+ if (!cmds->proc) {
+ ksmbd_debug(SMB, "*** not implemented yet cmd = %x\n", command);
+ conn->ops->set_rsp_status(work, STATUS_NOT_IMPLEMENTED);
+ return SERVER_HANDLER_CONTINUE;
+ }
+
+ if (work->sess && conn->ops->is_sign_req(work, command)) {
+ ret = conn->ops->check_sign_req(work);
+ if (!ret) {
+ conn->ops->set_rsp_status(work, STATUS_ACCESS_DENIED);
+ return SERVER_HANDLER_CONTINUE;
+ }
+ }
+
+ ret = cmds->proc(work);
+
+ if (ret < 0)
+ ksmbd_debug(CONN, "Failed to process %u [%d]\n", command, ret);
+ /* AndX commands - chained request can return positive values */
+ else if (ret > 0) {
+ command = ret;
+ *cmd = command;
+ goto andx_again;
+ }
+
+ if (work->send_no_response)
+ return SERVER_HANDLER_ABORT;
+ return SERVER_HANDLER_CONTINUE;
+}
+
+static void __handle_ksmbd_work(struct ksmbd_work *work,
+ struct ksmbd_conn *conn)
+{
+ u16 command = 0;
+ int rc;
+
+ if (conn->ops->allocate_rsp_buf(work))
+ return;
+
+ if (conn->ops->is_transform_hdr &&
+ conn->ops->is_transform_hdr(work->request_buf)) {
+ rc = conn->ops->decrypt_req(work);
+ if (rc < 0) {
+ conn->ops->set_rsp_status(work, STATUS_DATA_ERROR);
+ goto send;
+ }
+
+ work->encrypted = true;
+ }
+
+ rc = conn->ops->init_rsp_hdr(work);
+ if (rc) {
+ /* either uid or tid is not correct */
+ conn->ops->set_rsp_status(work, STATUS_INVALID_HANDLE);
+ goto send;
+ }
+
+ if (conn->ops->check_user_session) {
+ rc = conn->ops->check_user_session(work);
+ if (rc < 0) {
+ command = conn->ops->get_cmd_val(work);
+ conn->ops->set_rsp_status(work,
+ STATUS_USER_SESSION_DELETED);
+ goto send;
+ } else if (rc > 0) {
+ rc = conn->ops->get_ksmbd_tcon(work);
+ if (rc < 0) {
+ conn->ops->set_rsp_status(work,
+ STATUS_NETWORK_NAME_DELETED);
+ goto send;
+ }
+ }
+ }
+
+ do {
+ rc = __process_request(work, conn, &command);
+ if (rc == SERVER_HANDLER_ABORT)
+ break;
+
+ /*
+ * Call smb2_set_rsp_credits() function to set number of credits
+ * granted in hdr of smb2 response.
+ */
+ if (conn->ops->set_rsp_credits) {
+ spin_lock(&conn->credits_lock);
+ rc = conn->ops->set_rsp_credits(work);
+ spin_unlock(&conn->credits_lock);
+ if (rc < 0) {
+ conn->ops->set_rsp_status(work,
+ STATUS_INVALID_PARAMETER);
+ goto send;
+ }
+ }
+
+ if (work->sess &&
+ (work->sess->sign || smb3_11_final_sess_setup_resp(work) ||
+ conn->ops->is_sign_req(work, command)))
+ conn->ops->set_sign_rsp(work);
+ } while (is_chained_smb2_message(work));
+
+ if (work->send_no_response)
+ return;
+
+send:
+ smb3_preauth_hash_rsp(work);
+ if (work->sess && work->sess->enc && work->encrypted &&
+ conn->ops->encrypt_resp) {
+ rc = conn->ops->encrypt_resp(work);
+ if (rc < 0) {
+ conn->ops->set_rsp_status(work, STATUS_DATA_ERROR);
+ goto send;
+ }
+ }
+
+ ksmbd_conn_write(work);
+}
+
+/**
+ * handle_ksmbd_work() - process pending smb work requests
+ * @wk: smb work containing request command buffer
+ *
+ * called by kworker threads to processing remaining smb work requests
+ */
+static void handle_ksmbd_work(struct work_struct *wk)
+{
+ struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work);
+ struct ksmbd_conn *conn = work->conn;
+
+ atomic64_inc(&conn->stats.request_served);
+
+ __handle_ksmbd_work(work, conn);
+
+ ksmbd_conn_try_dequeue_request(work);
+ ksmbd_free_work_struct(work);
+ atomic_dec(&conn->r_count);
+}
+
+/**
+ * queue_ksmbd_work() - queue a smb request to worker thread queue
+ * for proccessing smb command and sending response
+ * @conn: connection instance
+ *
+ * read remaining data from socket create and submit work.
+ */
+static int queue_ksmbd_work(struct ksmbd_conn *conn)
+{
+ struct ksmbd_work *work;
+
+ work = ksmbd_alloc_work_struct();
+ if (!work) {
+ pr_err("allocation for work failed\n");
+ return -ENOMEM;
+ }
+
+ work->conn = conn;
+ work->request_buf = conn->request_buf;
+ conn->request_buf = NULL;
+
+ if (ksmbd_init_smb_server(work)) {
+ ksmbd_free_work_struct(work);
+ return -EINVAL;
+ }
+
+ ksmbd_conn_enqueue_request(work);
+ atomic_inc(&conn->r_count);
+ /* update activity on connection */
+ conn->last_active = jiffies;
+ INIT_WORK(&work->work, handle_ksmbd_work);
+ ksmbd_queue_work(work);
+ return 0;
+}
+
+static int ksmbd_server_process_request(struct ksmbd_conn *conn)
+{
+ return queue_ksmbd_work(conn);
+}
+
+static int ksmbd_server_terminate_conn(struct ksmbd_conn *conn)
+{
+ ksmbd_sessions_deregister(conn);
+ destroy_lease_table(conn);
+ return 0;
+}
+
+static void ksmbd_server_tcp_callbacks_init(void)
+{
+ struct ksmbd_conn_ops ops;
+
+ ops.process_fn = ksmbd_server_process_request;
+ ops.terminate_fn = ksmbd_server_terminate_conn;
+
+ ksmbd_conn_init_server_callbacks(&ops);
+}
+
+static void server_conf_free(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(server_conf.conf); i++) {
+ kfree(server_conf.conf[i]);
+ server_conf.conf[i] = NULL;
+ }
+}
+
+static int server_conf_init(void)
+{
+ WRITE_ONCE(server_conf.state, SERVER_STATE_STARTING_UP);
+ server_conf.enforced_signing = 0;
+ server_conf.min_protocol = ksmbd_min_protocol();
+ server_conf.max_protocol = ksmbd_max_protocol();
+ server_conf.auth_mechs = KSMBD_AUTH_NTLMSSP;
+#ifdef CONFIG_SMB_SERVER_KERBEROS5
+ server_conf.auth_mechs |= KSMBD_AUTH_KRB5 |
+ KSMBD_AUTH_MSKRB5;
+#endif
+ return 0;
+}
+
+static void server_ctrl_handle_init(struct server_ctrl_struct *ctrl)
+{
+ int ret;
+
+ ret = ksmbd_conn_transport_init();
+ if (ret) {
+ server_queue_ctrl_reset_work();
+ return;
+ }
+
+ WRITE_ONCE(server_conf.state, SERVER_STATE_RUNNING);
+}
+
+static void server_ctrl_handle_reset(struct server_ctrl_struct *ctrl)
+{
+ ksmbd_ipc_soft_reset();
+ ksmbd_conn_transport_destroy();
+ server_conf_free();
+ server_conf_init();
+ WRITE_ONCE(server_conf.state, SERVER_STATE_STARTING_UP);
+}
+
+static void server_ctrl_handle_work(struct work_struct *work)
+{
+ struct server_ctrl_struct *ctrl;
+
+ ctrl = container_of(work, struct server_ctrl_struct, ctrl_work);
+
+ mutex_lock(&ctrl_lock);
+ switch (ctrl->type) {
+ case SERVER_CTRL_TYPE_INIT:
+ server_ctrl_handle_init(ctrl);
+ break;
+ case SERVER_CTRL_TYPE_RESET:
+ server_ctrl_handle_reset(ctrl);
+ break;
+ default:
+ pr_err("Unknown server work type: %d\n", ctrl->type);
+ }
+ mutex_unlock(&ctrl_lock);
+ kfree(ctrl);
+ module_put(THIS_MODULE);
+}
+
+static int __queue_ctrl_work(int type)
+{
+ struct server_ctrl_struct *ctrl;
+
+ ctrl = kmalloc(sizeof(struct server_ctrl_struct), GFP_KERNEL);
+ if (!ctrl)
+ return -ENOMEM;
+
+ __module_get(THIS_MODULE);
+ ctrl->type = type;
+ INIT_WORK(&ctrl->ctrl_work, server_ctrl_handle_work);
+ queue_work(system_long_wq, &ctrl->ctrl_work);
+ return 0;
+}
+
+int server_queue_ctrl_init_work(void)
+{
+ return __queue_ctrl_work(SERVER_CTRL_TYPE_INIT);
+}
+
+int server_queue_ctrl_reset_work(void)
+{
+ return __queue_ctrl_work(SERVER_CTRL_TYPE_RESET);
+}
+
+static ssize_t stats_show(struct class *class, struct class_attribute *attr,
+ char *buf)
+{
+ /*
+ * Inc this each time you change stats output format,
+ * so user space will know what to do.
+ */
+ static int stats_version = 2;
+ static const char * const state[] = {
+ "startup",
+ "running",
+ "reset",
+ "shutdown"
+ };
+
+ ssize_t sz = scnprintf(buf, PAGE_SIZE, "%d %s %d %lu\n", stats_version,
+ state[server_conf.state], server_conf.tcp_port,
+ server_conf.ipc_last_active / HZ);
+ return sz;
+}
+
+static ssize_t kill_server_store(struct class *class,
+ struct class_attribute *attr, const char *buf,
+ size_t len)
+{
+ if (!sysfs_streq(buf, "hard"))
+ return len;
+
+ pr_info("kill command received\n");
+ mutex_lock(&ctrl_lock);
+ WRITE_ONCE(server_conf.state, SERVER_STATE_RESETTING);
+ __module_get(THIS_MODULE);
+ server_ctrl_handle_reset(NULL);
+ module_put(THIS_MODULE);
+ mutex_unlock(&ctrl_lock);
+ return len;
+}
+
+static const char * const debug_type_strings[] = {"smb", "auth", "vfs",
+ "oplock", "ipc", "conn",
+ "rdma"};
+
+static ssize_t debug_show(struct class *class, struct class_attribute *attr,
+ char *buf)
+{
+ ssize_t sz = 0;
+ int i, pos = 0;
+
+ for (i = 0; i < ARRAY_SIZE(debug_type_strings); i++) {
+ if ((ksmbd_debug_types >> i) & 1) {
+ pos = scnprintf(buf + sz,
+ PAGE_SIZE - sz,
+ "[%s] ",
+ debug_type_strings[i]);
+ } else {
+ pos = scnprintf(buf + sz,
+ PAGE_SIZE - sz,
+ "%s ",
+ debug_type_strings[i]);
+ }
+ sz += pos;
+ }
+ sz += scnprintf(buf + sz, PAGE_SIZE - sz, "\n");
+ return sz;
+}
+
+static ssize_t debug_store(struct class *class, struct class_attribute *attr,
+ const char *buf, size_t len)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(debug_type_strings); i++) {
+ if (sysfs_streq(buf, "all")) {
+ if (ksmbd_debug_types == KSMBD_DEBUG_ALL)
+ ksmbd_debug_types = 0;
+ else
+ ksmbd_debug_types = KSMBD_DEBUG_ALL;
+ break;
+ }
+
+ if (sysfs_streq(buf, debug_type_strings[i])) {
+ if (ksmbd_debug_types & (1 << i))
+ ksmbd_debug_types &= ~(1 << i);
+ else
+ ksmbd_debug_types |= (1 << i);
+ break;
+ }
+ }
+
+ return len;
+}
+
+static CLASS_ATTR_RO(stats);
+static CLASS_ATTR_WO(kill_server);
+static CLASS_ATTR_RW(debug);
+
+static struct attribute *ksmbd_control_class_attrs[] = {
+ &class_attr_stats.attr,
+ &class_attr_kill_server.attr,
+ &class_attr_debug.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(ksmbd_control_class);
+
+static struct class ksmbd_control_class = {
+ .name = "ksmbd-control",
+ .owner = THIS_MODULE,
+ .class_groups = ksmbd_control_class_groups,
+};
+
+static int ksmbd_server_shutdown(void)
+{
+ WRITE_ONCE(server_conf.state, SERVER_STATE_SHUTTING_DOWN);
+
+ class_unregister(&ksmbd_control_class);
+ ksmbd_workqueue_destroy();
+ ksmbd_ipc_release();
+ ksmbd_conn_transport_destroy();
+ ksmbd_crypto_destroy();
+ ksmbd_free_global_file_table();
+ destroy_lease_table(NULL);
+ ksmbd_work_pool_destroy();
+ ksmbd_exit_file_cache();
+ server_conf_free();
+ return 0;
+}
+
+static int __init ksmbd_server_init(void)
+{
+ int ret;
+
+ ret = class_register(&ksmbd_control_class);
+ if (ret) {
+ pr_err("Unable to register ksmbd-control class\n");
+ return ret;
+ }
+
+ ksmbd_server_tcp_callbacks_init();
+
+ ret = server_conf_init();
+ if (ret)
+ goto err_unregister;
+
+ ret = ksmbd_work_pool_init();
+ if (ret)
+ goto err_unregister;
+
+ ret = ksmbd_init_file_cache();
+ if (ret)
+ goto err_destroy_work_pools;
+
+ ret = ksmbd_ipc_init();
+ if (ret)
+ goto err_exit_file_cache;
+
+ ret = ksmbd_init_global_file_table();
+ if (ret)
+ goto err_ipc_release;
+
+ ret = ksmbd_inode_hash_init();
+ if (ret)
+ goto err_destroy_file_table;
+
+ ret = ksmbd_crypto_create();
+ if (ret)
+ goto err_release_inode_hash;
+
+ ret = ksmbd_workqueue_init();
+ if (ret)
+ goto err_crypto_destroy;
+ return 0;
+
+err_crypto_destroy:
+ ksmbd_crypto_destroy();
+err_release_inode_hash:
+ ksmbd_release_inode_hash();
+err_destroy_file_table:
+ ksmbd_free_global_file_table();
+err_ipc_release:
+ ksmbd_ipc_release();
+err_exit_file_cache:
+ ksmbd_exit_file_cache();
+err_destroy_work_pools:
+ ksmbd_work_pool_destroy();
+err_unregister:
+ class_unregister(&ksmbd_control_class);
+
+ return ret;
+}
+
+/**
+ * ksmbd_server_exit() - shutdown forker thread and free memory at module exit
+ */
+static void __exit ksmbd_server_exit(void)
+{
+ ksmbd_server_shutdown();
+ ksmbd_release_inode_hash();
+}
+
+MODULE_AUTHOR("Namjae Jeon <linkinjeon@kernel.org>");
+MODULE_VERSION(KSMBD_VERSION);
+MODULE_DESCRIPTION("Linux kernel CIFS/SMB SERVER");
+MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: ecb");
+MODULE_SOFTDEP("pre: hmac");
+MODULE_SOFTDEP("pre: md4");
+MODULE_SOFTDEP("pre: md5");
+MODULE_SOFTDEP("pre: nls");
+MODULE_SOFTDEP("pre: aes");
+MODULE_SOFTDEP("pre: cmac");
+MODULE_SOFTDEP("pre: sha256");
+MODULE_SOFTDEP("pre: sha512");
+MODULE_SOFTDEP("pre: aead2");
+MODULE_SOFTDEP("pre: ccm");
+MODULE_SOFTDEP("pre: gcm");
+module_init(ksmbd_server_init)
+module_exit(ksmbd_server_exit)
diff --git a/fs/ksmbd/server.h b/fs/ksmbd/server.h
new file mode 100644
index 000000000000..ac9d932f8c8a
--- /dev/null
+++ b/fs/ksmbd/server.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __SERVER_H__
+#define __SERVER_H__
+
+#include "smbacl.h"
+
+/*
+ * Server state type
+ */
+enum {
+ SERVER_STATE_STARTING_UP,
+ SERVER_STATE_RUNNING,
+ SERVER_STATE_RESETTING,
+ SERVER_STATE_SHUTTING_DOWN,
+};
+
+/*
+ * Server global config string index
+ */
+enum {
+ SERVER_CONF_NETBIOS_NAME,
+ SERVER_CONF_SERVER_STRING,
+ SERVER_CONF_WORK_GROUP,
+};
+
+struct ksmbd_server_config {
+ unsigned int flags;
+ unsigned int state;
+ short signing;
+ short enforced_signing;
+ short min_protocol;
+ short max_protocol;
+ unsigned short tcp_port;
+ unsigned short ipc_timeout;
+ unsigned long ipc_last_active;
+ unsigned long deadtime;
+ unsigned int share_fake_fscaps;
+ struct smb_sid domain_sid;
+ unsigned int auth_mechs;
+
+ char *conf[SERVER_CONF_WORK_GROUP + 1];
+};
+
+extern struct ksmbd_server_config server_conf;
+
+int ksmbd_set_netbios_name(char *v);
+int ksmbd_set_server_string(char *v);
+int ksmbd_set_work_group(char *v);
+
+char *ksmbd_netbios_name(void);
+char *ksmbd_server_string(void);
+char *ksmbd_work_group(void);
+
+static inline int ksmbd_server_running(void)
+{
+ return READ_ONCE(server_conf.state) == SERVER_STATE_RUNNING;
+}
+
+static inline int ksmbd_server_configurable(void)
+{
+ return READ_ONCE(server_conf.state) < SERVER_STATE_RESETTING;
+}
+
+int server_queue_ctrl_init_work(void);
+int server_queue_ctrl_reset_work(void);
+#endif /* __SERVER_H__ */
diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c
new file mode 100644
index 000000000000..9aa46bb3e10d
--- /dev/null
+++ b/fs/ksmbd/smb2misc.c
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include "glob.h"
+#include "nterr.h"
+#include "smb2pdu.h"
+#include "smb_common.h"
+#include "smbstatus.h"
+#include "mgmt/user_session.h"
+#include "connection.h"
+
+static int check_smb2_hdr(struct smb2_hdr *hdr)
+{
+ /*
+ * Make sure that this really is an SMB, that it is a response.
+ */
+ if (hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR)
+ return 1;
+ return 0;
+}
+
+/*
+ * The following table defines the expected "StructureSize" of SMB2 requests
+ * in order by SMB2 command. This is similar to "wct" in SMB/CIFS requests.
+ *
+ * Note that commands are defined in smb2pdu.h in le16 but the array below is
+ * indexed by command in host byte order
+ */
+static const __le16 smb2_req_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
+ /* SMB2_NEGOTIATE */ cpu_to_le16(36),
+ /* SMB2_SESSION_SETUP */ cpu_to_le16(25),
+ /* SMB2_LOGOFF */ cpu_to_le16(4),
+ /* SMB2_TREE_CONNECT */ cpu_to_le16(9),
+ /* SMB2_TREE_DISCONNECT */ cpu_to_le16(4),
+ /* SMB2_CREATE */ cpu_to_le16(57),
+ /* SMB2_CLOSE */ cpu_to_le16(24),
+ /* SMB2_FLUSH */ cpu_to_le16(24),
+ /* SMB2_READ */ cpu_to_le16(49),
+ /* SMB2_WRITE */ cpu_to_le16(49),
+ /* SMB2_LOCK */ cpu_to_le16(48),
+ /* SMB2_IOCTL */ cpu_to_le16(57),
+ /* SMB2_CANCEL */ cpu_to_le16(4),
+ /* SMB2_ECHO */ cpu_to_le16(4),
+ /* SMB2_QUERY_DIRECTORY */ cpu_to_le16(33),
+ /* SMB2_CHANGE_NOTIFY */ cpu_to_le16(32),
+ /* SMB2_QUERY_INFO */ cpu_to_le16(41),
+ /* SMB2_SET_INFO */ cpu_to_le16(33),
+ /* use 44 for lease break */
+ /* SMB2_OPLOCK_BREAK */ cpu_to_le16(36)
+};
+
+/*
+ * The size of the variable area depends on the offset and length fields
+ * located in different fields for various SMB2 requests. SMB2 requests
+ * with no variable length info, show an offset of zero for the offset field.
+ */
+static const bool has_smb2_data_area[NUMBER_OF_SMB2_COMMANDS] = {
+ /* SMB2_NEGOTIATE */ true,
+ /* SMB2_SESSION_SETUP */ true,
+ /* SMB2_LOGOFF */ false,
+ /* SMB2_TREE_CONNECT */ true,
+ /* SMB2_TREE_DISCONNECT */ false,
+ /* SMB2_CREATE */ true,
+ /* SMB2_CLOSE */ false,
+ /* SMB2_FLUSH */ false,
+ /* SMB2_READ */ true,
+ /* SMB2_WRITE */ true,
+ /* SMB2_LOCK */ true,
+ /* SMB2_IOCTL */ true,
+ /* SMB2_CANCEL */ false, /* BB CHECK this not listed in documentation */
+ /* SMB2_ECHO */ false,
+ /* SMB2_QUERY_DIRECTORY */ true,
+ /* SMB2_CHANGE_NOTIFY */ false,
+ /* SMB2_QUERY_INFO */ true,
+ /* SMB2_SET_INFO */ true,
+ /* SMB2_OPLOCK_BREAK */ false
+};
+
+/*
+ * Returns the pointer to the beginning of the data area. Length of the data
+ * area and the offset to it (from the beginning of the smb are also returned.
+ */
+static char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
+{
+ *off = 0;
+ *len = 0;
+
+ /* error reqeusts do not have data area */
+ if (hdr->Status && hdr->Status != STATUS_MORE_PROCESSING_REQUIRED &&
+ (((struct smb2_err_rsp *)hdr)->StructureSize) == SMB2_ERROR_STRUCTURE_SIZE2_LE)
+ return NULL;
+
+ /*
+ * Following commands have data areas so we have to get the location
+ * of the data buffer offset and data buffer length for the particular
+ * command.
+ */
+ switch (hdr->Command) {
+ case SMB2_SESSION_SETUP:
+ *off = le16_to_cpu(((struct smb2_sess_setup_req *)hdr)->SecurityBufferOffset);
+ *len = le16_to_cpu(((struct smb2_sess_setup_req *)hdr)->SecurityBufferLength);
+ break;
+ case SMB2_TREE_CONNECT:
+ *off = le16_to_cpu(((struct smb2_tree_connect_req *)hdr)->PathOffset);
+ *len = le16_to_cpu(((struct smb2_tree_connect_req *)hdr)->PathLength);
+ break;
+ case SMB2_CREATE:
+ {
+ if (((struct smb2_create_req *)hdr)->CreateContextsLength) {
+ *off = le32_to_cpu(((struct smb2_create_req *)
+ hdr)->CreateContextsOffset);
+ *len = le32_to_cpu(((struct smb2_create_req *)
+ hdr)->CreateContextsLength);
+ break;
+ }
+
+ *off = le16_to_cpu(((struct smb2_create_req *)hdr)->NameOffset);
+ *len = le16_to_cpu(((struct smb2_create_req *)hdr)->NameLength);
+ break;
+ }
+ case SMB2_QUERY_INFO:
+ *off = le16_to_cpu(((struct smb2_query_info_req *)hdr)->InputBufferOffset);
+ *len = le32_to_cpu(((struct smb2_query_info_req *)hdr)->InputBufferLength);
+ break;
+ case SMB2_SET_INFO:
+ *off = le16_to_cpu(((struct smb2_set_info_req *)hdr)->BufferOffset);
+ *len = le32_to_cpu(((struct smb2_set_info_req *)hdr)->BufferLength);
+ break;
+ case SMB2_READ:
+ *off = le16_to_cpu(((struct smb2_read_req *)hdr)->ReadChannelInfoOffset);
+ *len = le16_to_cpu(((struct smb2_read_req *)hdr)->ReadChannelInfoLength);
+ break;
+ case SMB2_WRITE:
+ if (((struct smb2_write_req *)hdr)->DataOffset) {
+ *off = le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset);
+ *len = le32_to_cpu(((struct smb2_write_req *)hdr)->Length);
+ break;
+ }
+
+ *off = le16_to_cpu(((struct smb2_write_req *)hdr)->WriteChannelInfoOffset);
+ *len = le16_to_cpu(((struct smb2_write_req *)hdr)->WriteChannelInfoLength);
+ break;
+ case SMB2_QUERY_DIRECTORY:
+ *off = le16_to_cpu(((struct smb2_query_directory_req *)hdr)->FileNameOffset);
+ *len = le16_to_cpu(((struct smb2_query_directory_req *)hdr)->FileNameLength);
+ break;
+ case SMB2_LOCK:
+ {
+ int lock_count;
+
+ /*
+ * smb2_lock request size is 48 included single
+ * smb2_lock_element structure size.
+ */
+ lock_count = le16_to_cpu(((struct smb2_lock_req *)hdr)->LockCount) - 1;
+ if (lock_count > 0) {
+ *off = __SMB2_HEADER_STRUCTURE_SIZE + 48;
+ *len = sizeof(struct smb2_lock_element) * lock_count;
+ }
+ break;
+ }
+ case SMB2_IOCTL:
+ *off = le32_to_cpu(((struct smb2_ioctl_req *)hdr)->InputOffset);
+ *len = le32_to_cpu(((struct smb2_ioctl_req *)hdr)->InputCount);
+
+ break;
+ default:
+ ksmbd_debug(SMB, "no length check for command\n");
+ break;
+ }
+
+ /*
+ * Invalid length or offset probably means data area is invalid, but
+ * we have little choice but to ignore the data area in this case.
+ */
+ if (*off > 4096) {
+ ksmbd_debug(SMB, "offset %d too large, data area ignored\n",
+ *off);
+ *len = 0;
+ *off = 0;
+ } else if (*off < 0) {
+ ksmbd_debug(SMB,
+ "negative offset %d to data invalid ignore data area\n",
+ *off);
+ *off = 0;
+ *len = 0;
+ } else if (*len < 0) {
+ ksmbd_debug(SMB,
+ "negative data length %d invalid, data area ignored\n",
+ *len);
+ *len = 0;
+ } else if (*len > 128 * 1024) {
+ ksmbd_debug(SMB, "data area larger than 128K: %d\n", *len);
+ *len = 0;
+ }
+
+ /* return pointer to beginning of data area, ie offset from SMB start */
+ if ((*off != 0) && (*len != 0))
+ return (char *)hdr + *off;
+ else
+ return NULL;
+}
+
+/*
+ * Calculate the size of the SMB message based on the fixed header
+ * portion, the number of word parameters and the data portion of the message.
+ */
+static unsigned int smb2_calc_size(void *buf)
+{
+ struct smb2_pdu *pdu = (struct smb2_pdu *)buf;
+ struct smb2_hdr *hdr = &pdu->hdr;
+ int offset; /* the offset from the beginning of SMB to data area */
+ int data_length; /* the length of the variable length data area */
+ /* Structure Size has already been checked to make sure it is 64 */
+ int len = le16_to_cpu(hdr->StructureSize);
+
+ /*
+ * StructureSize2, ie length of fixed parameter area has already
+ * been checked to make sure it is the correct length.
+ */
+ len += le16_to_cpu(pdu->StructureSize2);
+
+ if (has_smb2_data_area[le16_to_cpu(hdr->Command)] == false)
+ goto calc_size_exit;
+
+ smb2_get_data_area_len(&offset, &data_length, hdr);
+ ksmbd_debug(SMB, "SMB2 data length %d offset %d\n", data_length,
+ offset);
+
+ if (data_length > 0) {
+ /*
+ * Check to make sure that data area begins after fixed area,
+ * Note that last byte of the fixed area is part of data area
+ * for some commands, typically those with odd StructureSize,
+ * so we must add one to the calculation.
+ */
+ if (offset + 1 < len)
+ ksmbd_debug(SMB,
+ "data area offset %d overlaps SMB2 header %d\n",
+ offset + 1, len);
+ else
+ len = offset + data_length;
+ }
+calc_size_exit:
+ ksmbd_debug(SMB, "SMB2 len %d\n", len);
+ return len;
+}
+
+static inline int smb2_query_info_req_len(struct smb2_query_info_req *h)
+{
+ return le32_to_cpu(h->InputBufferLength) +
+ le32_to_cpu(h->OutputBufferLength);
+}
+
+static inline int smb2_set_info_req_len(struct smb2_set_info_req *h)
+{
+ return le32_to_cpu(h->BufferLength);
+}
+
+static inline int smb2_read_req_len(struct smb2_read_req *h)
+{
+ return le32_to_cpu(h->Length);
+}
+
+static inline int smb2_write_req_len(struct smb2_write_req *h)
+{
+ return le32_to_cpu(h->Length);
+}
+
+static inline int smb2_query_dir_req_len(struct smb2_query_directory_req *h)
+{
+ return le32_to_cpu(h->OutputBufferLength);
+}
+
+static inline int smb2_ioctl_req_len(struct smb2_ioctl_req *h)
+{
+ return le32_to_cpu(h->InputCount) +
+ le32_to_cpu(h->OutputCount);
+}
+
+static inline int smb2_ioctl_resp_len(struct smb2_ioctl_req *h)
+{
+ return le32_to_cpu(h->MaxInputResponse) +
+ le32_to_cpu(h->MaxOutputResponse);
+}
+
+static int smb2_validate_credit_charge(struct smb2_hdr *hdr)
+{
+ int req_len = 0, expect_resp_len = 0, calc_credit_num, max_len;
+ int credit_charge = le16_to_cpu(hdr->CreditCharge);
+ void *__hdr = hdr;
+
+ switch (hdr->Command) {
+ case SMB2_QUERY_INFO:
+ req_len = smb2_query_info_req_len(__hdr);
+ break;
+ case SMB2_SET_INFO:
+ req_len = smb2_set_info_req_len(__hdr);
+ break;
+ case SMB2_READ:
+ req_len = smb2_read_req_len(__hdr);
+ break;
+ case SMB2_WRITE:
+ req_len = smb2_write_req_len(__hdr);
+ break;
+ case SMB2_QUERY_DIRECTORY:
+ req_len = smb2_query_dir_req_len(__hdr);
+ break;
+ case SMB2_IOCTL:
+ req_len = smb2_ioctl_req_len(__hdr);
+ expect_resp_len = smb2_ioctl_resp_len(__hdr);
+ break;
+ default:
+ return 0;
+ }
+
+ credit_charge = max(1, credit_charge);
+ max_len = max(req_len, expect_resp_len);
+ calc_credit_num = DIV_ROUND_UP(max_len, SMB2_MAX_BUFFER_SIZE);
+
+ if (credit_charge < calc_credit_num) {
+ pr_err("Insufficient credit charge, given: %d, needed: %d\n",
+ credit_charge, calc_credit_num);
+ return 1;
+ }
+
+ return 0;
+}
+
+int ksmbd_smb2_check_message(struct ksmbd_work *work)
+{
+ struct smb2_pdu *pdu = work->request_buf;
+ struct smb2_hdr *hdr = &pdu->hdr;
+ int command;
+ __u32 clc_len; /* calculated length */
+ __u32 len = get_rfc1002_len(pdu);
+
+ if (work->next_smb2_rcv_hdr_off) {
+ pdu = ksmbd_req_buf_next(work);
+ hdr = &pdu->hdr;
+ }
+
+ if (le32_to_cpu(hdr->NextCommand) > 0) {
+ len = le32_to_cpu(hdr->NextCommand);
+ } else if (work->next_smb2_rcv_hdr_off) {
+ len -= work->next_smb2_rcv_hdr_off;
+ len = round_up(len, 8);
+ }
+
+ if (check_smb2_hdr(hdr))
+ return 1;
+
+ if (hdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) {
+ ksmbd_debug(SMB, "Illegal structure size %u\n",
+ le16_to_cpu(hdr->StructureSize));
+ return 1;
+ }
+
+ command = le16_to_cpu(hdr->Command);
+ if (command >= NUMBER_OF_SMB2_COMMANDS) {
+ ksmbd_debug(SMB, "Illegal SMB2 command %d\n", command);
+ return 1;
+ }
+
+ if (smb2_req_struct_sizes[command] != pdu->StructureSize2) {
+ if (command != SMB2_OPLOCK_BREAK_HE &&
+ (hdr->Status == 0 || pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2_LE)) {
+ /* error packets have 9 byte structure size */
+ ksmbd_debug(SMB,
+ "Illegal request size %u for command %d\n",
+ le16_to_cpu(pdu->StructureSize2), command);
+ return 1;
+ } else if (command == SMB2_OPLOCK_BREAK_HE &&
+ hdr->Status == 0 &&
+ le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_20 &&
+ le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_21) {
+ /* special case for SMB2.1 lease break message */
+ ksmbd_debug(SMB,
+ "Illegal request size %d for oplock break\n",
+ le16_to_cpu(pdu->StructureSize2));
+ return 1;
+ }
+ }
+
+ if ((work->conn->vals->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU) &&
+ smb2_validate_credit_charge(hdr)) {
+ work->conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER);
+ return 1;
+ }
+
+ clc_len = smb2_calc_size(hdr);
+ if (len != clc_len) {
+ /* server can return one byte more due to implied bcc[0] */
+ if (clc_len == len + 1)
+ return 0;
+
+ /*
+ * Some windows servers (win2016) will pad also the final
+ * PDU in a compound to 8 bytes.
+ */
+ if (ALIGN(clc_len, 8) == len)
+ return 0;
+
+ /*
+ * windows client also pad up to 8 bytes when compounding.
+ * If pad is longer than eight bytes, log the server behavior
+ * (once), since may indicate a problem but allow it and
+ * continue since the frame is parseable.
+ */
+ if (clc_len < len) {
+ ksmbd_debug(SMB,
+ "cli req padded more than expected. Length %d not %d for cmd:%d mid:%llu\n",
+ len, clc_len, command,
+ le64_to_cpu(hdr->MessageId));
+ return 0;
+ }
+
+ if (command == SMB2_LOCK_HE && len == 88)
+ return 0;
+
+ ksmbd_debug(SMB,
+ "cli req too short, len %d not %d. cmd:%d mid:%llu\n",
+ len, clc_len, command,
+ le64_to_cpu(hdr->MessageId));
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int smb2_negotiate_request(struct ksmbd_work *work)
+{
+ return ksmbd_smb_negotiate_common(work, SMB2_NEGOTIATE_HE);
+}
diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c
new file mode 100644
index 000000000000..197473871aa4
--- /dev/null
+++ b/fs/ksmbd/smb2ops.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/slab.h>
+#include "glob.h"
+#include "smb2pdu.h"
+
+#include "auth.h"
+#include "connection.h"
+#include "smb_common.h"
+#include "server.h"
+
+static struct smb_version_values smb21_server_values = {
+ .version_string = SMB21_VERSION_STRING,
+ .protocol_id = SMB21_PROT_ID,
+ .capabilities = SMB2_GLOBAL_CAP_LARGE_MTU,
+ .max_read_size = SMB21_DEFAULT_IOSIZE,
+ .max_write_size = SMB21_DEFAULT_IOSIZE,
+ .max_trans_size = SMB21_DEFAULT_IOSIZE,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED,
+ .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+ .header_size = sizeof(struct smb2_hdr),
+ .max_header_size = MAX_SMB2_HDR_SIZE,
+ .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .lock_cmd = SMB2_LOCK,
+ .cap_unix = 0,
+ .cap_nt_find = SMB2_NT_FIND,
+ .cap_large_files = SMB2_LARGE_FILES,
+ .create_lease_size = sizeof(struct create_lease),
+ .create_durable_size = sizeof(struct create_durable_rsp),
+ .create_mxac_size = sizeof(struct create_mxac_rsp),
+ .create_disk_id_size = sizeof(struct create_disk_id_rsp),
+ .create_posix_size = sizeof(struct create_posix_rsp),
+};
+
+static struct smb_version_values smb30_server_values = {
+ .version_string = SMB30_VERSION_STRING,
+ .protocol_id = SMB30_PROT_ID,
+ .capabilities = SMB2_GLOBAL_CAP_LARGE_MTU,
+ .max_read_size = SMB3_DEFAULT_IOSIZE,
+ .max_write_size = SMB3_DEFAULT_IOSIZE,
+ .max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED,
+ .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+ .header_size = sizeof(struct smb2_hdr),
+ .max_header_size = MAX_SMB2_HDR_SIZE,
+ .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .lock_cmd = SMB2_LOCK,
+ .cap_unix = 0,
+ .cap_nt_find = SMB2_NT_FIND,
+ .cap_large_files = SMB2_LARGE_FILES,
+ .create_lease_size = sizeof(struct create_lease_v2),
+ .create_durable_size = sizeof(struct create_durable_rsp),
+ .create_durable_v2_size = sizeof(struct create_durable_v2_rsp),
+ .create_mxac_size = sizeof(struct create_mxac_rsp),
+ .create_disk_id_size = sizeof(struct create_disk_id_rsp),
+ .create_posix_size = sizeof(struct create_posix_rsp),
+};
+
+static struct smb_version_values smb302_server_values = {
+ .version_string = SMB302_VERSION_STRING,
+ .protocol_id = SMB302_PROT_ID,
+ .capabilities = SMB2_GLOBAL_CAP_LARGE_MTU,
+ .max_read_size = SMB3_DEFAULT_IOSIZE,
+ .max_write_size = SMB3_DEFAULT_IOSIZE,
+ .max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED,
+ .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+ .header_size = sizeof(struct smb2_hdr),
+ .max_header_size = MAX_SMB2_HDR_SIZE,
+ .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .lock_cmd = SMB2_LOCK,
+ .cap_unix = 0,
+ .cap_nt_find = SMB2_NT_FIND,
+ .cap_large_files = SMB2_LARGE_FILES,
+ .create_lease_size = sizeof(struct create_lease_v2),
+ .create_durable_size = sizeof(struct create_durable_rsp),
+ .create_durable_v2_size = sizeof(struct create_durable_v2_rsp),
+ .create_mxac_size = sizeof(struct create_mxac_rsp),
+ .create_disk_id_size = sizeof(struct create_disk_id_rsp),
+ .create_posix_size = sizeof(struct create_posix_rsp),
+};
+
+static struct smb_version_values smb311_server_values = {
+ .version_string = SMB311_VERSION_STRING,
+ .protocol_id = SMB311_PROT_ID,
+ .capabilities = SMB2_GLOBAL_CAP_LARGE_MTU,
+ .max_read_size = SMB3_DEFAULT_IOSIZE,
+ .max_write_size = SMB3_DEFAULT_IOSIZE,
+ .max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED,
+ .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+ .header_size = sizeof(struct smb2_hdr),
+ .max_header_size = MAX_SMB2_HDR_SIZE,
+ .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .lock_cmd = SMB2_LOCK,
+ .cap_unix = 0,
+ .cap_nt_find = SMB2_NT_FIND,
+ .cap_large_files = SMB2_LARGE_FILES,
+ .create_lease_size = sizeof(struct create_lease_v2),
+ .create_durable_size = sizeof(struct create_durable_rsp),
+ .create_durable_v2_size = sizeof(struct create_durable_v2_rsp),
+ .create_mxac_size = sizeof(struct create_mxac_rsp),
+ .create_disk_id_size = sizeof(struct create_disk_id_rsp),
+ .create_posix_size = sizeof(struct create_posix_rsp),
+};
+
+static struct smb_version_ops smb2_0_server_ops = {
+ .get_cmd_val = get_smb2_cmd_val,
+ .init_rsp_hdr = init_smb2_rsp_hdr,
+ .set_rsp_status = set_smb2_rsp_status,
+ .allocate_rsp_buf = smb2_allocate_rsp_buf,
+ .set_rsp_credits = smb2_set_rsp_credits,
+ .check_user_session = smb2_check_user_session,
+ .get_ksmbd_tcon = smb2_get_ksmbd_tcon,
+ .is_sign_req = smb2_is_sign_req,
+ .check_sign_req = smb2_check_sign_req,
+ .set_sign_rsp = smb2_set_sign_rsp
+};
+
+static struct smb_version_ops smb3_0_server_ops = {
+ .get_cmd_val = get_smb2_cmd_val,
+ .init_rsp_hdr = init_smb2_rsp_hdr,
+ .set_rsp_status = set_smb2_rsp_status,
+ .allocate_rsp_buf = smb2_allocate_rsp_buf,
+ .set_rsp_credits = smb2_set_rsp_credits,
+ .check_user_session = smb2_check_user_session,
+ .get_ksmbd_tcon = smb2_get_ksmbd_tcon,
+ .is_sign_req = smb2_is_sign_req,
+ .check_sign_req = smb3_check_sign_req,
+ .set_sign_rsp = smb3_set_sign_rsp,
+ .generate_signingkey = ksmbd_gen_smb30_signingkey,
+ .generate_encryptionkey = ksmbd_gen_smb30_encryptionkey,
+ .is_transform_hdr = smb3_is_transform_hdr,
+ .decrypt_req = smb3_decrypt_req,
+ .encrypt_resp = smb3_encrypt_resp
+};
+
+static struct smb_version_ops smb3_11_server_ops = {
+ .get_cmd_val = get_smb2_cmd_val,
+ .init_rsp_hdr = init_smb2_rsp_hdr,
+ .set_rsp_status = set_smb2_rsp_status,
+ .allocate_rsp_buf = smb2_allocate_rsp_buf,
+ .set_rsp_credits = smb2_set_rsp_credits,
+ .check_user_session = smb2_check_user_session,
+ .get_ksmbd_tcon = smb2_get_ksmbd_tcon,
+ .is_sign_req = smb2_is_sign_req,
+ .check_sign_req = smb3_check_sign_req,
+ .set_sign_rsp = smb3_set_sign_rsp,
+ .generate_signingkey = ksmbd_gen_smb311_signingkey,
+ .generate_encryptionkey = ksmbd_gen_smb311_encryptionkey,
+ .is_transform_hdr = smb3_is_transform_hdr,
+ .decrypt_req = smb3_decrypt_req,
+ .encrypt_resp = smb3_encrypt_resp
+};
+
+static struct smb_version_cmds smb2_0_server_cmds[NUMBER_OF_SMB2_COMMANDS] = {
+ [SMB2_NEGOTIATE_HE] = { .proc = smb2_negotiate_request, },
+ [SMB2_SESSION_SETUP_HE] = { .proc = smb2_sess_setup, },
+ [SMB2_TREE_CONNECT_HE] = { .proc = smb2_tree_connect,},
+ [SMB2_TREE_DISCONNECT_HE] = { .proc = smb2_tree_disconnect,},
+ [SMB2_LOGOFF_HE] = { .proc = smb2_session_logoff,},
+ [SMB2_CREATE_HE] = { .proc = smb2_open},
+ [SMB2_QUERY_INFO_HE] = { .proc = smb2_query_info},
+ [SMB2_QUERY_DIRECTORY_HE] = { .proc = smb2_query_dir},
+ [SMB2_CLOSE_HE] = { .proc = smb2_close},
+ [SMB2_ECHO_HE] = { .proc = smb2_echo},
+ [SMB2_SET_INFO_HE] = { .proc = smb2_set_info},
+ [SMB2_READ_HE] = { .proc = smb2_read},
+ [SMB2_WRITE_HE] = { .proc = smb2_write},
+ [SMB2_FLUSH_HE] = { .proc = smb2_flush},
+ [SMB2_CANCEL_HE] = { .proc = smb2_cancel},
+ [SMB2_LOCK_HE] = { .proc = smb2_lock},
+ [SMB2_IOCTL_HE] = { .proc = smb2_ioctl},
+ [SMB2_OPLOCK_BREAK_HE] = { .proc = smb2_oplock_break},
+ [SMB2_CHANGE_NOTIFY_HE] = { .proc = smb2_notify},
+};
+
+int init_smb2_0_server(struct ksmbd_conn *conn)
+{
+ return -EOPNOTSUPP;
+}
+
+/**
+ * init_smb2_1_server() - initialize a smb server connection with smb2.1
+ * command dispatcher
+ * @conn: connection instance
+ */
+void init_smb2_1_server(struct ksmbd_conn *conn)
+{
+ conn->vals = &smb21_server_values;
+ conn->ops = &smb2_0_server_ops;
+ conn->cmds = smb2_0_server_cmds;
+ conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
+ conn->max_credits = SMB2_MAX_CREDITS;
+ conn->signing_algorithm = SIGNING_ALG_HMAC_SHA256;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
+}
+
+/**
+ * init_smb3_0_server() - initialize a smb server connection with smb3.0
+ * command dispatcher
+ * @conn: connection instance
+ */
+void init_smb3_0_server(struct ksmbd_conn *conn)
+{
+ conn->vals = &smb30_server_values;
+ conn->ops = &smb3_0_server_ops;
+ conn->cmds = smb2_0_server_cmds;
+ conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
+ conn->max_credits = SMB2_MAX_CREDITS;
+ conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION &&
+ conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
+}
+
+/**
+ * init_smb3_02_server() - initialize a smb server connection with smb3.02
+ * command dispatcher
+ * @conn: connection instance
+ */
+void init_smb3_02_server(struct ksmbd_conn *conn)
+{
+ conn->vals = &smb302_server_values;
+ conn->ops = &smb3_0_server_ops;
+ conn->cmds = smb2_0_server_cmds;
+ conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
+ conn->max_credits = SMB2_MAX_CREDITS;
+ conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION &&
+ conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
+}
+
+/**
+ * init_smb3_11_server() - initialize a smb server connection with smb3.11
+ * command dispatcher
+ * @conn: connection instance
+ */
+int init_smb3_11_server(struct ksmbd_conn *conn)
+{
+ conn->vals = &smb311_server_values;
+ conn->ops = &smb3_11_server_ops;
+ conn->cmds = smb2_0_server_cmds;
+ conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
+ conn->max_credits = SMB2_MAX_CREDITS;
+ conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
+
+ if (conn->cipher_type)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
+
+ INIT_LIST_HEAD(&conn->preauth_sess_table);
+ return 0;
+}
+
+void init_smb2_max_read_size(unsigned int sz)
+{
+ smb21_server_values.max_read_size = sz;
+ smb30_server_values.max_read_size = sz;
+ smb302_server_values.max_read_size = sz;
+ smb311_server_values.max_read_size = sz;
+}
+
+void init_smb2_max_write_size(unsigned int sz)
+{
+ smb21_server_values.max_write_size = sz;
+ smb30_server_values.max_write_size = sz;
+ smb302_server_values.max_write_size = sz;
+ smb311_server_values.max_write_size = sz;
+}
+
+void init_smb2_max_trans_size(unsigned int sz)
+{
+ smb21_server_values.max_trans_size = sz;
+ smb30_server_values.max_trans_size = sz;
+ smb302_server_values.max_trans_size = sz;
+ smb311_server_values.max_trans_size = sz;
+}
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
new file mode 100644
index 000000000000..d329ea49fa14
--- /dev/null
+++ b/fs/ksmbd/smb2pdu.c
@@ -0,0 +1,8373 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#include <linux/syscalls.h>
+#include <linux/namei.h>
+#include <linux/statfs.h>
+#include <linux/ethtool.h>
+#include <linux/falloc.h>
+
+#include "glob.h"
+#include "smb2pdu.h"
+#include "smbfsctl.h"
+#include "oplock.h"
+#include "smbacl.h"
+
+#include "auth.h"
+#include "asn1.h"
+#include "connection.h"
+#include "transport_ipc.h"
+#include "transport_rdma.h"
+#include "vfs.h"
+#include "vfs_cache.h"
+#include "misc.h"
+
+#include "server.h"
+#include "smb_common.h"
+#include "smbstatus.h"
+#include "ksmbd_work.h"
+#include "mgmt/user_config.h"
+#include "mgmt/share_config.h"
+#include "mgmt/tree_connect.h"
+#include "mgmt/user_session.h"
+#include "mgmt/ksmbd_ida.h"
+#include "ndr.h"
+
+static void __wbuf(struct ksmbd_work *work, void **req, void **rsp)
+{
+ if (work->next_smb2_rcv_hdr_off) {
+ *req = ksmbd_req_buf_next(work);
+ *rsp = ksmbd_resp_buf_next(work);
+ } else {
+ *req = work->request_buf;
+ *rsp = work->response_buf;
+ }
+}
+
+#define WORK_BUFFERS(w, rq, rs) __wbuf((w), (void **)&(rq), (void **)&(rs))
+
+/**
+ * check_session_id() - check for valid session id in smb header
+ * @conn: connection instance
+ * @id: session id from smb header
+ *
+ * Return: 1 if valid session id, otherwise 0
+ */
+static inline bool check_session_id(struct ksmbd_conn *conn, u64 id)
+{
+ struct ksmbd_session *sess;
+
+ if (id == 0 || id == -1)
+ return false;
+
+ sess = ksmbd_session_lookup_all(conn, id);
+ if (sess)
+ return true;
+ pr_err("Invalid user session id: %llu\n", id);
+ return false;
+}
+
+struct channel *lookup_chann_list(struct ksmbd_session *sess, struct ksmbd_conn *conn)
+{
+ struct channel *chann;
+
+ list_for_each_entry(chann, &sess->ksmbd_chann_list, chann_list) {
+ if (chann->conn == conn)
+ return chann;
+ }
+
+ return NULL;
+}
+
+/**
+ * smb2_get_ksmbd_tcon() - get tree connection information using a tree id.
+ * @work: smb work
+ *
+ * Return: 0 if there is a tree connection matched or these are
+ * skipable commands, otherwise error
+ */
+int smb2_get_ksmbd_tcon(struct ksmbd_work *work)
+{
+ struct smb2_hdr *req_hdr = work->request_buf;
+ int tree_id;
+
+ work->tcon = NULL;
+ if (work->conn->ops->get_cmd_val(work) == SMB2_TREE_CONNECT_HE ||
+ work->conn->ops->get_cmd_val(work) == SMB2_CANCEL_HE ||
+ work->conn->ops->get_cmd_val(work) == SMB2_LOGOFF_HE) {
+ ksmbd_debug(SMB, "skip to check tree connect request\n");
+ return 0;
+ }
+
+ if (xa_empty(&work->sess->tree_conns)) {
+ ksmbd_debug(SMB, "NO tree connected\n");
+ return -ENOENT;
+ }
+
+ tree_id = le32_to_cpu(req_hdr->Id.SyncId.TreeId);
+ work->tcon = ksmbd_tree_conn_lookup(work->sess, tree_id);
+ if (!work->tcon) {
+ pr_err("Invalid tid %d\n", tree_id);
+ return -EINVAL;
+ }
+
+ return 1;
+}
+
+/**
+ * smb2_set_err_rsp() - set error response code on smb response
+ * @work: smb work containing response buffer
+ */
+void smb2_set_err_rsp(struct ksmbd_work *work)
+{
+ struct smb2_err_rsp *err_rsp;
+
+ if (work->next_smb2_rcv_hdr_off)
+ err_rsp = ksmbd_resp_buf_next(work);
+ else
+ err_rsp = work->response_buf;
+
+ if (err_rsp->hdr.Status != STATUS_STOPPED_ON_SYMLINK) {
+ err_rsp->StructureSize = SMB2_ERROR_STRUCTURE_SIZE2_LE;
+ err_rsp->ErrorContextCount = 0;
+ err_rsp->Reserved = 0;
+ err_rsp->ByteCount = 0;
+ err_rsp->ErrorData[0] = 0;
+ inc_rfc1001_len(work->response_buf, SMB2_ERROR_STRUCTURE_SIZE2);
+ }
+}
+
+/**
+ * is_smb2_neg_cmd() - is it smb2 negotiation command
+ * @work: smb work containing smb header
+ *
+ * Return: true if smb2 negotiation command, otherwise false
+ */
+bool is_smb2_neg_cmd(struct ksmbd_work *work)
+{
+ struct smb2_hdr *hdr = work->request_buf;
+
+ /* is it SMB2 header ? */
+ if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
+ return false;
+
+ /* make sure it is request not response message */
+ if (hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR)
+ return false;
+
+ if (hdr->Command != SMB2_NEGOTIATE)
+ return false;
+
+ return true;
+}
+
+/**
+ * is_smb2_rsp() - is it smb2 response
+ * @work: smb work containing smb response buffer
+ *
+ * Return: true if smb2 response, otherwise false
+ */
+bool is_smb2_rsp(struct ksmbd_work *work)
+{
+ struct smb2_hdr *hdr = work->response_buf;
+
+ /* is it SMB2 header ? */
+ if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
+ return false;
+
+ /* make sure it is response not request message */
+ if (!(hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR))
+ return false;
+
+ return true;
+}
+
+/**
+ * get_smb2_cmd_val() - get smb command code from smb header
+ * @work: smb work containing smb request buffer
+ *
+ * Return: smb2 request command value
+ */
+u16 get_smb2_cmd_val(struct ksmbd_work *work)
+{
+ struct smb2_hdr *rcv_hdr;
+
+ if (work->next_smb2_rcv_hdr_off)
+ rcv_hdr = ksmbd_req_buf_next(work);
+ else
+ rcv_hdr = work->request_buf;
+ return le16_to_cpu(rcv_hdr->Command);
+}
+
+/**
+ * set_smb2_rsp_status() - set error response code on smb2 header
+ * @work: smb work containing response buffer
+ * @err: error response code
+ */
+void set_smb2_rsp_status(struct ksmbd_work *work, __le32 err)
+{
+ struct smb2_hdr *rsp_hdr;
+
+ if (work->next_smb2_rcv_hdr_off)
+ rsp_hdr = ksmbd_resp_buf_next(work);
+ else
+ rsp_hdr = work->response_buf;
+ rsp_hdr->Status = err;
+ smb2_set_err_rsp(work);
+}
+
+/**
+ * init_smb2_neg_rsp() - initialize smb2 response for negotiate command
+ * @work: smb work containing smb request buffer
+ *
+ * smb2 negotiate response is sent in reply of smb1 negotiate command for
+ * dialect auto-negotiation.
+ */
+int init_smb2_neg_rsp(struct ksmbd_work *work)
+{
+ struct smb2_hdr *rsp_hdr;
+ struct smb2_negotiate_rsp *rsp;
+ struct ksmbd_conn *conn = work->conn;
+
+ if (conn->need_neg == false)
+ return -EINVAL;
+ if (!(conn->dialect >= SMB20_PROT_ID &&
+ conn->dialect <= SMB311_PROT_ID))
+ return -EINVAL;
+
+ rsp_hdr = work->response_buf;
+
+ memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
+
+ rsp_hdr->smb2_buf_length =
+ cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
+
+ rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
+ rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
+ rsp_hdr->CreditRequest = cpu_to_le16(2);
+ rsp_hdr->Command = SMB2_NEGOTIATE;
+ rsp_hdr->Flags = (SMB2_FLAGS_SERVER_TO_REDIR);
+ rsp_hdr->NextCommand = 0;
+ rsp_hdr->MessageId = 0;
+ rsp_hdr->Id.SyncId.ProcessId = 0;
+ rsp_hdr->Id.SyncId.TreeId = 0;
+ rsp_hdr->SessionId = 0;
+ memset(rsp_hdr->Signature, 0, 16);
+
+ rsp = work->response_buf;
+
+ WARN_ON(ksmbd_conn_good(work));
+
+ rsp->StructureSize = cpu_to_le16(65);
+ ksmbd_debug(SMB, "conn->dialect 0x%x\n", conn->dialect);
+ rsp->DialectRevision = cpu_to_le16(conn->dialect);
+ /* Not setting conn guid rsp->ServerGUID, as it
+ * not used by client for identifying connection
+ */
+ rsp->Capabilities = cpu_to_le32(conn->vals->capabilities);
+ /* Default Max Message Size till SMB2.0, 64K*/
+ rsp->MaxTransactSize = cpu_to_le32(conn->vals->max_trans_size);
+ rsp->MaxReadSize = cpu_to_le32(conn->vals->max_read_size);
+ rsp->MaxWriteSize = cpu_to_le32(conn->vals->max_write_size);
+
+ rsp->SystemTime = cpu_to_le64(ksmbd_systime());
+ rsp->ServerStartTime = 0;
+
+ rsp->SecurityBufferOffset = cpu_to_le16(128);
+ rsp->SecurityBufferLength = cpu_to_le16(AUTH_GSS_LENGTH);
+ ksmbd_copy_gss_neg_header(((char *)(&rsp->hdr) +
+ sizeof(rsp->hdr.smb2_buf_length)) +
+ le16_to_cpu(rsp->SecurityBufferOffset));
+ inc_rfc1001_len(rsp, sizeof(struct smb2_negotiate_rsp) -
+ sizeof(struct smb2_hdr) - sizeof(rsp->Buffer) +
+ AUTH_GSS_LENGTH);
+ rsp->SecurityMode = SMB2_NEGOTIATE_SIGNING_ENABLED_LE;
+ if (server_conf.signing == KSMBD_CONFIG_OPT_MANDATORY)
+ rsp->SecurityMode |= SMB2_NEGOTIATE_SIGNING_REQUIRED_LE;
+ conn->use_spnego = true;
+
+ ksmbd_conn_set_need_negotiate(work);
+ return 0;
+}
+
+static int smb2_consume_credit_charge(struct ksmbd_work *work,
+ unsigned short credit_charge)
+{
+ struct ksmbd_conn *conn = work->conn;
+ unsigned int rsp_credits = 1;
+
+ if (!conn->total_credits)
+ return 0;
+
+ if (credit_charge > 0)
+ rsp_credits = credit_charge;
+
+ conn->total_credits -= rsp_credits;
+ return rsp_credits;
+}
+
+/**
+ * smb2_set_rsp_credits() - set number of credits in response buffer
+ * @work: smb work containing smb response buffer
+ */
+int smb2_set_rsp_credits(struct ksmbd_work *work)
+{
+ struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work);
+ struct smb2_hdr *hdr = ksmbd_resp_buf_next(work);
+ struct ksmbd_conn *conn = work->conn;
+ unsigned short credits_requested = le16_to_cpu(req_hdr->CreditRequest);
+ unsigned short credit_charge = 1, credits_granted = 0;
+ unsigned short aux_max, aux_credits, min_credits;
+ int rsp_credit_charge;
+
+ if (hdr->Command == SMB2_CANCEL)
+ goto out;
+
+ /* get default minimum credits by shifting maximum credits by 4 */
+ min_credits = conn->max_credits >> 4;
+
+ if (conn->total_credits >= conn->max_credits) {
+ pr_err("Total credits overflow: %d\n", conn->total_credits);
+ conn->total_credits = min_credits;
+ }
+
+ rsp_credit_charge =
+ smb2_consume_credit_charge(work, le16_to_cpu(req_hdr->CreditCharge));
+ if (rsp_credit_charge < 0)
+ return -EINVAL;
+
+ hdr->CreditCharge = cpu_to_le16(rsp_credit_charge);
+
+ if (credits_requested > 0) {
+ aux_credits = credits_requested - 1;
+ aux_max = 32;
+ if (hdr->Command == SMB2_NEGOTIATE)
+ aux_max = 0;
+ aux_credits = (aux_credits < aux_max) ? aux_credits : aux_max;
+ credits_granted = aux_credits + credit_charge;
+
+ /* if credits granted per client is getting bigger than default
+ * minimum credits then we should wrap it up within the limits.
+ */
+ if ((conn->total_credits + credits_granted) > min_credits)
+ credits_granted = min_credits - conn->total_credits;
+ /*
+ * TODO: Need to adjuct CreditRequest value according to
+ * current cpu load
+ */
+ } else if (conn->total_credits == 0) {
+ credits_granted = 1;
+ }
+
+ conn->total_credits += credits_granted;
+ work->credits_granted += credits_granted;
+
+ if (!req_hdr->NextCommand) {
+ /* Update CreditRequest in last request */
+ hdr->CreditRequest = cpu_to_le16(work->credits_granted);
+ }
+out:
+ ksmbd_debug(SMB,
+ "credits: requested[%d] granted[%d] total_granted[%d]\n",
+ credits_requested, credits_granted,
+ conn->total_credits);
+ return 0;
+}
+
+/**
+ * init_chained_smb2_rsp() - initialize smb2 chained response
+ * @work: smb work containing smb response buffer
+ */
+static void init_chained_smb2_rsp(struct ksmbd_work *work)
+{
+ struct smb2_hdr *req = ksmbd_req_buf_next(work);
+ struct smb2_hdr *rsp = ksmbd_resp_buf_next(work);
+ struct smb2_hdr *rsp_hdr;
+ struct smb2_hdr *rcv_hdr;
+ int next_hdr_offset = 0;
+ int len, new_len;
+
+ /* Len of this response = updated RFC len - offset of previous cmd
+ * in the compound rsp
+ */
+
+ /* Storing the current local FID which may be needed by subsequent
+ * command in the compound request
+ */
+ if (req->Command == SMB2_CREATE && rsp->Status == STATUS_SUCCESS) {
+ work->compound_fid =
+ le64_to_cpu(((struct smb2_create_rsp *)rsp)->
+ VolatileFileId);
+ work->compound_pfid =
+ le64_to_cpu(((struct smb2_create_rsp *)rsp)->
+ PersistentFileId);
+ work->compound_sid = le64_to_cpu(rsp->SessionId);
+ }
+
+ len = get_rfc1002_len(work->response_buf) - work->next_smb2_rsp_hdr_off;
+ next_hdr_offset = le32_to_cpu(req->NextCommand);
+
+ new_len = ALIGN(len, 8);
+ inc_rfc1001_len(work->response_buf, ((sizeof(struct smb2_hdr) - 4)
+ + new_len - len));
+ rsp->NextCommand = cpu_to_le32(new_len);
+
+ work->next_smb2_rcv_hdr_off += next_hdr_offset;
+ work->next_smb2_rsp_hdr_off += new_len;
+ ksmbd_debug(SMB,
+ "Compound req new_len = %d rcv off = %d rsp off = %d\n",
+ new_len, work->next_smb2_rcv_hdr_off,
+ work->next_smb2_rsp_hdr_off);
+
+ rsp_hdr = ksmbd_resp_buf_next(work);
+ rcv_hdr = ksmbd_req_buf_next(work);
+
+ if (!(rcv_hdr->Flags & SMB2_FLAGS_RELATED_OPERATIONS)) {
+ ksmbd_debug(SMB, "related flag should be set\n");
+ work->compound_fid = KSMBD_NO_FID;
+ work->compound_pfid = KSMBD_NO_FID;
+ }
+ memset((char *)rsp_hdr + 4, 0, sizeof(struct smb2_hdr) + 2);
+ rsp_hdr->ProtocolId = rcv_hdr->ProtocolId;
+ rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
+ rsp_hdr->Command = rcv_hdr->Command;
+
+ /*
+ * Message is response. We don't grant oplock yet.
+ */
+ rsp_hdr->Flags = (SMB2_FLAGS_SERVER_TO_REDIR |
+ SMB2_FLAGS_RELATED_OPERATIONS);
+ rsp_hdr->NextCommand = 0;
+ rsp_hdr->MessageId = rcv_hdr->MessageId;
+ rsp_hdr->Id.SyncId.ProcessId = rcv_hdr->Id.SyncId.ProcessId;
+ rsp_hdr->Id.SyncId.TreeId = rcv_hdr->Id.SyncId.TreeId;
+ rsp_hdr->SessionId = rcv_hdr->SessionId;
+ memcpy(rsp_hdr->Signature, rcv_hdr->Signature, 16);
+}
+
+/**
+ * is_chained_smb2_message() - check for chained command
+ * @work: smb work containing smb request buffer
+ *
+ * Return: true if chained request, otherwise false
+ */
+bool is_chained_smb2_message(struct ksmbd_work *work)
+{
+ struct smb2_hdr *hdr = work->request_buf;
+ unsigned int len;
+
+ if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
+ return false;
+
+ hdr = ksmbd_req_buf_next(work);
+ if (le32_to_cpu(hdr->NextCommand) > 0) {
+ ksmbd_debug(SMB, "got SMB2 chained command\n");
+ init_chained_smb2_rsp(work);
+ return true;
+ } else if (work->next_smb2_rcv_hdr_off) {
+ /*
+ * This is last request in chained command,
+ * align response to 8 byte
+ */
+ len = ALIGN(get_rfc1002_len(work->response_buf), 8);
+ len = len - get_rfc1002_len(work->response_buf);
+ if (len) {
+ ksmbd_debug(SMB, "padding len %u\n", len);
+ inc_rfc1001_len(work->response_buf, len);
+ if (work->aux_payload_sz)
+ work->aux_payload_sz += len;
+ }
+ }
+ return false;
+}
+
+/**
+ * init_smb2_rsp_hdr() - initialize smb2 response
+ * @work: smb work containing smb request buffer
+ *
+ * Return: 0
+ */
+int init_smb2_rsp_hdr(struct ksmbd_work *work)
+{
+ struct smb2_hdr *rsp_hdr = work->response_buf;
+ struct smb2_hdr *rcv_hdr = work->request_buf;
+ struct ksmbd_conn *conn = work->conn;
+
+ memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
+ rsp_hdr->smb2_buf_length =
+ cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
+ rsp_hdr->ProtocolId = rcv_hdr->ProtocolId;
+ rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
+ rsp_hdr->Command = rcv_hdr->Command;
+
+ /*
+ * Message is response. We don't grant oplock yet.
+ */
+ rsp_hdr->Flags = (SMB2_FLAGS_SERVER_TO_REDIR);
+ rsp_hdr->NextCommand = 0;
+ rsp_hdr->MessageId = rcv_hdr->MessageId;
+ rsp_hdr->Id.SyncId.ProcessId = rcv_hdr->Id.SyncId.ProcessId;
+ rsp_hdr->Id.SyncId.TreeId = rcv_hdr->Id.SyncId.TreeId;
+ rsp_hdr->SessionId = rcv_hdr->SessionId;
+ memcpy(rsp_hdr->Signature, rcv_hdr->Signature, 16);
+
+ work->syncronous = true;
+ if (work->async_id) {
+ ksmbd_release_id(&conn->async_ida, work->async_id);
+ work->async_id = 0;
+ }
+
+ return 0;
+}
+
+/**
+ * smb2_allocate_rsp_buf() - allocate smb2 response buffer
+ * @work: smb work containing smb request buffer
+ *
+ * Return: 0 on success, otherwise -ENOMEM
+ */
+int smb2_allocate_rsp_buf(struct ksmbd_work *work)
+{
+ struct smb2_hdr *hdr = work->request_buf;
+ size_t small_sz = MAX_CIFS_SMALL_BUFFER_SIZE;
+ size_t large_sz = work->conn->vals->max_trans_size + MAX_SMB2_HDR_SIZE;
+ size_t sz = small_sz;
+ int cmd = le16_to_cpu(hdr->Command);
+
+ if (cmd == SMB2_IOCTL_HE || cmd == SMB2_QUERY_DIRECTORY_HE)
+ sz = large_sz;
+
+ if (cmd == SMB2_QUERY_INFO_HE) {
+ struct smb2_query_info_req *req;
+
+ req = work->request_buf;
+ if (req->InfoType == SMB2_O_INFO_FILE &&
+ (req->FileInfoClass == FILE_FULL_EA_INFORMATION ||
+ req->FileInfoClass == FILE_ALL_INFORMATION))
+ sz = large_sz;
+ }
+
+ /* allocate large response buf for chained commands */
+ if (le32_to_cpu(hdr->NextCommand) > 0)
+ sz = large_sz;
+
+ work->response_buf = kvmalloc(sz, GFP_KERNEL | __GFP_ZERO);
+ if (!work->response_buf)
+ return -ENOMEM;
+
+ work->response_sz = sz;
+ return 0;
+}
+
+/**
+ * smb2_check_user_session() - check for valid session for a user
+ * @work: smb work containing smb request buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_check_user_session(struct ksmbd_work *work)
+{
+ struct smb2_hdr *req_hdr = work->request_buf;
+ struct ksmbd_conn *conn = work->conn;
+ unsigned int cmd = conn->ops->get_cmd_val(work);
+ unsigned long long sess_id;
+
+ work->sess = NULL;
+ /*
+ * SMB2_ECHO, SMB2_NEGOTIATE, SMB2_SESSION_SETUP command do not
+ * require a session id, so no need to validate user session's for
+ * these commands.
+ */
+ if (cmd == SMB2_ECHO_HE || cmd == SMB2_NEGOTIATE_HE ||
+ cmd == SMB2_SESSION_SETUP_HE)
+ return 0;
+
+ if (!ksmbd_conn_good(work))
+ return -EINVAL;
+
+ sess_id = le64_to_cpu(req_hdr->SessionId);
+ /* Check for validity of user session */
+ work->sess = ksmbd_session_lookup_all(conn, sess_id);
+ if (work->sess)
+ return 1;
+ ksmbd_debug(SMB, "Invalid user session, Uid %llu\n", sess_id);
+ return -EINVAL;
+}
+
+static void destroy_previous_session(struct ksmbd_user *user, u64 id)
+{
+ struct ksmbd_session *prev_sess = ksmbd_session_lookup_slowpath(id);
+ struct ksmbd_user *prev_user;
+
+ if (!prev_sess)
+ return;
+
+ prev_user = prev_sess->user;
+
+ if (!prev_user ||
+ strcmp(user->name, prev_user->name) ||
+ user->passkey_sz != prev_user->passkey_sz ||
+ memcmp(user->passkey, prev_user->passkey, user->passkey_sz)) {
+ put_session(prev_sess);
+ return;
+ }
+
+ put_session(prev_sess);
+ ksmbd_session_destroy(prev_sess);
+}
+
+/**
+ * smb2_get_name() - get filename string from on the wire smb format
+ * @share: ksmbd_share_config pointer
+ * @src: source buffer
+ * @maxlen: maxlen of source string
+ * @nls_table: nls_table pointer
+ *
+ * Return: matching converted filename on success, otherwise error ptr
+ */
+static char *
+smb2_get_name(struct ksmbd_share_config *share, const char *src,
+ const int maxlen, struct nls_table *local_nls)
+{
+ char *name, *unixname;
+
+ name = smb_strndup_from_utf16(src, maxlen, 1, local_nls);
+ if (IS_ERR(name)) {
+ pr_err("failed to get name %ld\n", PTR_ERR(name));
+ return name;
+ }
+
+ /* change it to absolute unix name */
+ ksmbd_conv_path_to_unix(name);
+ ksmbd_strip_last_slash(name);
+
+ unixname = convert_to_unix_name(share, name);
+ kfree(name);
+ if (!unixname) {
+ pr_err("can not convert absolute name\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ksmbd_debug(SMB, "absolute name = %s\n", unixname);
+ return unixname;
+}
+
+int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg)
+{
+ struct smb2_hdr *rsp_hdr;
+ struct ksmbd_conn *conn = work->conn;
+ int id;
+
+ rsp_hdr = work->response_buf;
+ rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND;
+
+ id = ksmbd_acquire_async_msg_id(&conn->async_ida);
+ if (id < 0) {
+ pr_err("Failed to alloc async message id\n");
+ return id;
+ }
+ work->syncronous = false;
+ work->async_id = id;
+ rsp_hdr->Id.AsyncId = cpu_to_le64(id);
+
+ ksmbd_debug(SMB,
+ "Send interim Response to inform async request id : %d\n",
+ work->async_id);
+
+ work->cancel_fn = fn;
+ work->cancel_argv = arg;
+
+ if (list_empty(&work->async_request_entry)) {
+ spin_lock(&conn->request_lock);
+ list_add_tail(&work->async_request_entry, &conn->async_requests);
+ spin_unlock(&conn->request_lock);
+ }
+
+ return 0;
+}
+
+void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status)
+{
+ struct smb2_hdr *rsp_hdr;
+
+ rsp_hdr = work->response_buf;
+ smb2_set_err_rsp(work);
+ rsp_hdr->Status = status;
+
+ work->multiRsp = 1;
+ ksmbd_conn_write(work);
+ rsp_hdr->Status = 0;
+ work->multiRsp = 0;
+}
+
+static __le32 smb2_get_reparse_tag_special_file(umode_t mode)
+{
+ if (S_ISDIR(mode) || S_ISREG(mode))
+ return 0;
+
+ if (S_ISLNK(mode))
+ return IO_REPARSE_TAG_LX_SYMLINK_LE;
+ else if (S_ISFIFO(mode))
+ return IO_REPARSE_TAG_LX_FIFO_LE;
+ else if (S_ISSOCK(mode))
+ return IO_REPARSE_TAG_AF_UNIX_LE;
+ else if (S_ISCHR(mode))
+ return IO_REPARSE_TAG_LX_CHR_LE;
+ else if (S_ISBLK(mode))
+ return IO_REPARSE_TAG_LX_BLK_LE;
+
+ return 0;
+}
+
+/**
+ * smb2_get_dos_mode() - get file mode in dos format from unix mode
+ * @stat: kstat containing file mode
+ * @attribute: attribute flags
+ *
+ * Return: converted dos mode
+ */
+static int smb2_get_dos_mode(struct kstat *stat, int attribute)
+{
+ int attr = 0;
+
+ if (S_ISDIR(stat->mode)) {
+ attr = ATTR_DIRECTORY |
+ (attribute & (ATTR_HIDDEN | ATTR_SYSTEM));
+ } else {
+ attr = (attribute & 0x00005137) | ATTR_ARCHIVE;
+ attr &= ~(ATTR_DIRECTORY);
+ if (S_ISREG(stat->mode) && (server_conf.share_fake_fscaps &
+ FILE_SUPPORTS_SPARSE_FILES))
+ attr |= ATTR_SPARSE;
+
+ if (smb2_get_reparse_tag_special_file(stat->mode))
+ attr |= ATTR_REPARSE;
+ }
+
+ return attr;
+}
+
+static void build_preauth_ctxt(struct smb2_preauth_neg_context *pneg_ctxt,
+ __le16 hash_id)
+{
+ pneg_ctxt->ContextType = SMB2_PREAUTH_INTEGRITY_CAPABILITIES;
+ pneg_ctxt->DataLength = cpu_to_le16(38);
+ pneg_ctxt->HashAlgorithmCount = cpu_to_le16(1);
+ pneg_ctxt->Reserved = cpu_to_le32(0);
+ pneg_ctxt->SaltLength = cpu_to_le16(SMB311_SALT_SIZE);
+ get_random_bytes(pneg_ctxt->Salt, SMB311_SALT_SIZE);
+ pneg_ctxt->HashAlgorithms = hash_id;
+}
+
+static void build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt,
+ __le16 cipher_type)
+{
+ pneg_ctxt->ContextType = SMB2_ENCRYPTION_CAPABILITIES;
+ pneg_ctxt->DataLength = cpu_to_le16(4);
+ pneg_ctxt->Reserved = cpu_to_le32(0);
+ pneg_ctxt->CipherCount = cpu_to_le16(1);
+ pneg_ctxt->Ciphers[0] = cipher_type;
+}
+
+static void build_compression_ctxt(struct smb2_compression_ctx *pneg_ctxt,
+ __le16 comp_algo)
+{
+ pneg_ctxt->ContextType = SMB2_COMPRESSION_CAPABILITIES;
+ pneg_ctxt->DataLength =
+ cpu_to_le16(sizeof(struct smb2_compression_ctx)
+ - sizeof(struct smb2_neg_context));
+ pneg_ctxt->Reserved = cpu_to_le32(0);
+ pneg_ctxt->CompressionAlgorithmCount = cpu_to_le16(1);
+ pneg_ctxt->Reserved1 = cpu_to_le32(0);
+ pneg_ctxt->CompressionAlgorithms[0] = comp_algo;
+}
+
+static void build_sign_cap_ctxt(struct smb2_signing_capabilities *pneg_ctxt,
+ __le16 sign_algo)
+{
+ pneg_ctxt->ContextType = SMB2_SIGNING_CAPABILITIES;
+ pneg_ctxt->DataLength =
+ cpu_to_le16((sizeof(struct smb2_signing_capabilities) + 2)
+ - sizeof(struct smb2_neg_context));
+ pneg_ctxt->Reserved = cpu_to_le32(0);
+ pneg_ctxt->SigningAlgorithmCount = cpu_to_le16(1);
+ pneg_ctxt->SigningAlgorithms[0] = sign_algo;
+}
+
+static void build_posix_ctxt(struct smb2_posix_neg_context *pneg_ctxt)
+{
+ pneg_ctxt->ContextType = SMB2_POSIX_EXTENSIONS_AVAILABLE;
+ pneg_ctxt->DataLength = cpu_to_le16(POSIX_CTXT_DATA_LEN);
+ /* SMB2_CREATE_TAG_POSIX is "0x93AD25509CB411E7B42383DE968BCD7C" */
+ pneg_ctxt->Name[0] = 0x93;
+ pneg_ctxt->Name[1] = 0xAD;
+ pneg_ctxt->Name[2] = 0x25;
+ pneg_ctxt->Name[3] = 0x50;
+ pneg_ctxt->Name[4] = 0x9C;
+ pneg_ctxt->Name[5] = 0xB4;
+ pneg_ctxt->Name[6] = 0x11;
+ pneg_ctxt->Name[7] = 0xE7;
+ pneg_ctxt->Name[8] = 0xB4;
+ pneg_ctxt->Name[9] = 0x23;
+ pneg_ctxt->Name[10] = 0x83;
+ pneg_ctxt->Name[11] = 0xDE;
+ pneg_ctxt->Name[12] = 0x96;
+ pneg_ctxt->Name[13] = 0x8B;
+ pneg_ctxt->Name[14] = 0xCD;
+ pneg_ctxt->Name[15] = 0x7C;
+}
+
+static void assemble_neg_contexts(struct ksmbd_conn *conn,
+ struct smb2_negotiate_rsp *rsp)
+{
+ /* +4 is to account for the RFC1001 len field */
+ char *pneg_ctxt = (char *)rsp +
+ le32_to_cpu(rsp->NegotiateContextOffset) + 4;
+ int neg_ctxt_cnt = 1;
+ int ctxt_size;
+
+ ksmbd_debug(SMB,
+ "assemble SMB2_PREAUTH_INTEGRITY_CAPABILITIES context\n");
+ build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt,
+ conn->preauth_info->Preauth_HashId);
+ rsp->NegotiateContextCount = cpu_to_le16(neg_ctxt_cnt);
+ inc_rfc1001_len(rsp, AUTH_GSS_PADDING);
+ ctxt_size = sizeof(struct smb2_preauth_neg_context);
+ /* Round to 8 byte boundary */
+ pneg_ctxt += round_up(sizeof(struct smb2_preauth_neg_context), 8);
+
+ if (conn->cipher_type) {
+ ctxt_size = round_up(ctxt_size, 8);
+ ksmbd_debug(SMB,
+ "assemble SMB2_ENCRYPTION_CAPABILITIES context\n");
+ build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt,
+ conn->cipher_type);
+ rsp->NegotiateContextCount = cpu_to_le16(++neg_ctxt_cnt);
+ ctxt_size += sizeof(struct smb2_encryption_neg_context) + 2;
+ /* Round to 8 byte boundary */
+ pneg_ctxt +=
+ round_up(sizeof(struct smb2_encryption_neg_context) + 2,
+ 8);
+ }
+
+ if (conn->compress_algorithm) {
+ ctxt_size = round_up(ctxt_size, 8);
+ ksmbd_debug(SMB,
+ "assemble SMB2_COMPRESSION_CAPABILITIES context\n");
+ /* Temporarily set to SMB3_COMPRESS_NONE */
+ build_compression_ctxt((struct smb2_compression_ctx *)pneg_ctxt,
+ conn->compress_algorithm);
+ rsp->NegotiateContextCount = cpu_to_le16(++neg_ctxt_cnt);
+ ctxt_size += sizeof(struct smb2_compression_ctx) + 2;
+ /* Round to 8 byte boundary */
+ pneg_ctxt += round_up(sizeof(struct smb2_compression_ctx) + 2,
+ 8);
+ }
+
+ if (conn->posix_ext_supported) {
+ ctxt_size = round_up(ctxt_size, 8);
+ ksmbd_debug(SMB,
+ "assemble SMB2_POSIX_EXTENSIONS_AVAILABLE context\n");
+ build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt);
+ rsp->NegotiateContextCount = cpu_to_le16(++neg_ctxt_cnt);
+ ctxt_size += sizeof(struct smb2_posix_neg_context);
+ /* Round to 8 byte boundary */
+ pneg_ctxt += round_up(sizeof(struct smb2_posix_neg_context), 8);
+ }
+
+ if (conn->signing_negotiated) {
+ ctxt_size = round_up(ctxt_size, 8);
+ ksmbd_debug(SMB,
+ "assemble SMB2_SIGNING_CAPABILITIES context\n");
+ build_sign_cap_ctxt((struct smb2_signing_capabilities *)pneg_ctxt,
+ conn->signing_algorithm);
+ rsp->NegotiateContextCount = cpu_to_le16(++neg_ctxt_cnt);
+ ctxt_size += sizeof(struct smb2_signing_capabilities) + 2;
+ }
+
+ inc_rfc1001_len(rsp, ctxt_size);
+}
+
+static __le32 decode_preauth_ctxt(struct ksmbd_conn *conn,
+ struct smb2_preauth_neg_context *pneg_ctxt)
+{
+ __le32 err = STATUS_NO_PREAUTH_INTEGRITY_HASH_OVERLAP;
+
+ if (pneg_ctxt->HashAlgorithms == SMB2_PREAUTH_INTEGRITY_SHA512) {
+ conn->preauth_info->Preauth_HashId =
+ SMB2_PREAUTH_INTEGRITY_SHA512;
+ err = STATUS_SUCCESS;
+ }
+
+ return err;
+}
+
+static void decode_encrypt_ctxt(struct ksmbd_conn *conn,
+ struct smb2_encryption_neg_context *pneg_ctxt,
+ int len_of_ctxts)
+{
+ int cph_cnt = le16_to_cpu(pneg_ctxt->CipherCount);
+ int i, cphs_size = cph_cnt * sizeof(__le16);
+
+ conn->cipher_type = 0;
+
+ if (sizeof(struct smb2_encryption_neg_context) + cphs_size >
+ len_of_ctxts) {
+ pr_err("Invalid cipher count(%d)\n", cph_cnt);
+ return;
+ }
+
+ if (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION))
+ return;
+
+ for (i = 0; i < cph_cnt; i++) {
+ if (pneg_ctxt->Ciphers[i] == SMB2_ENCRYPTION_AES128_GCM ||
+ pneg_ctxt->Ciphers[i] == SMB2_ENCRYPTION_AES128_CCM ||
+ pneg_ctxt->Ciphers[i] == SMB2_ENCRYPTION_AES256_CCM ||
+ pneg_ctxt->Ciphers[i] == SMB2_ENCRYPTION_AES256_GCM) {
+ ksmbd_debug(SMB, "Cipher ID = 0x%x\n",
+ pneg_ctxt->Ciphers[i]);
+ conn->cipher_type = pneg_ctxt->Ciphers[i];
+ break;
+ }
+ }
+}
+
+static void decode_compress_ctxt(struct ksmbd_conn *conn,
+ struct smb2_compression_ctx *pneg_ctxt)
+{
+ conn->compress_algorithm = SMB3_COMPRESS_NONE;
+}
+
+static void decode_sign_cap_ctxt(struct ksmbd_conn *conn,
+ struct smb2_signing_capabilities *pneg_ctxt,
+ int len_of_ctxts)
+{
+ int sign_algo_cnt = le16_to_cpu(pneg_ctxt->SigningAlgorithmCount);
+ int i, sign_alos_size = sign_algo_cnt * sizeof(__le16);
+
+ conn->signing_negotiated = false;
+
+ if (sizeof(struct smb2_signing_capabilities) + sign_alos_size >
+ len_of_ctxts) {
+ pr_err("Invalid signing algorithm count(%d)\n", sign_algo_cnt);
+ return;
+ }
+
+ for (i = 0; i < sign_algo_cnt; i++) {
+ if (pneg_ctxt->SigningAlgorithms[i] == SIGNING_ALG_HMAC_SHA256 ||
+ pneg_ctxt->SigningAlgorithms[i] == SIGNING_ALG_AES_CMAC) {
+ ksmbd_debug(SMB, "Signing Algorithm ID = 0x%x\n",
+ pneg_ctxt->SigningAlgorithms[i]);
+ conn->signing_negotiated = true;
+ conn->signing_algorithm =
+ pneg_ctxt->SigningAlgorithms[i];
+ break;
+ }
+ }
+}
+
+static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
+ struct smb2_negotiate_req *req)
+{
+ /* +4 is to account for the RFC1001 len field */
+ struct smb2_neg_context *pctx = (struct smb2_neg_context *)((char *)req + 4);
+ int i = 0, len_of_ctxts;
+ int offset = le32_to_cpu(req->NegotiateContextOffset);
+ int neg_ctxt_cnt = le16_to_cpu(req->NegotiateContextCount);
+ int len_of_smb = be32_to_cpu(req->hdr.smb2_buf_length);
+ __le32 status = STATUS_INVALID_PARAMETER;
+
+ ksmbd_debug(SMB, "decoding %d negotiate contexts\n", neg_ctxt_cnt);
+ if (len_of_smb <= offset) {
+ ksmbd_debug(SMB, "Invalid response: negotiate context offset\n");
+ return status;
+ }
+
+ len_of_ctxts = len_of_smb - offset;
+
+ while (i++ < neg_ctxt_cnt) {
+ int clen;
+
+ /* check that offset is not beyond end of SMB */
+ if (len_of_ctxts == 0)
+ break;
+
+ if (len_of_ctxts < sizeof(struct smb2_neg_context))
+ break;
+
+ pctx = (struct smb2_neg_context *)((char *)pctx + offset);
+ clen = le16_to_cpu(pctx->DataLength);
+ if (clen + sizeof(struct smb2_neg_context) > len_of_ctxts)
+ break;
+
+ if (pctx->ContextType == SMB2_PREAUTH_INTEGRITY_CAPABILITIES) {
+ ksmbd_debug(SMB,
+ "deassemble SMB2_PREAUTH_INTEGRITY_CAPABILITIES context\n");
+ if (conn->preauth_info->Preauth_HashId)
+ break;
+
+ status = decode_preauth_ctxt(conn,
+ (struct smb2_preauth_neg_context *)pctx);
+ if (status != STATUS_SUCCESS)
+ break;
+ } else if (pctx->ContextType == SMB2_ENCRYPTION_CAPABILITIES) {
+ ksmbd_debug(SMB,
+ "deassemble SMB2_ENCRYPTION_CAPABILITIES context\n");
+ if (conn->cipher_type)
+ break;
+
+ decode_encrypt_ctxt(conn,
+ (struct smb2_encryption_neg_context *)pctx,
+ len_of_ctxts);
+ } else if (pctx->ContextType == SMB2_COMPRESSION_CAPABILITIES) {
+ ksmbd_debug(SMB,
+ "deassemble SMB2_COMPRESSION_CAPABILITIES context\n");
+ if (conn->compress_algorithm)
+ break;
+
+ decode_compress_ctxt(conn,
+ (struct smb2_compression_ctx *)pctx);
+ } else if (pctx->ContextType == SMB2_NETNAME_NEGOTIATE_CONTEXT_ID) {
+ ksmbd_debug(SMB,
+ "deassemble SMB2_NETNAME_NEGOTIATE_CONTEXT_ID context\n");
+ } else if (pctx->ContextType == SMB2_POSIX_EXTENSIONS_AVAILABLE) {
+ ksmbd_debug(SMB,
+ "deassemble SMB2_POSIX_EXTENSIONS_AVAILABLE context\n");
+ conn->posix_ext_supported = true;
+ } else if (pctx->ContextType == SMB2_SIGNING_CAPABILITIES) {
+ ksmbd_debug(SMB,
+ "deassemble SMB2_SIGNING_CAPABILITIES context\n");
+ decode_sign_cap_ctxt(conn,
+ (struct smb2_signing_capabilities *)pctx,
+ len_of_ctxts);
+ }
+
+ /* offsets must be 8 byte aligned */
+ clen = (clen + 7) & ~0x7;
+ offset = clen + sizeof(struct smb2_neg_context);
+ len_of_ctxts -= clen + sizeof(struct smb2_neg_context);
+ }
+ return status;
+}
+
+/**
+ * smb2_handle_negotiate() - handler for smb2 negotiate command
+ * @work: smb work containing smb request buffer
+ *
+ * Return: 0
+ */
+int smb2_handle_negotiate(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_negotiate_req *req = work->request_buf;
+ struct smb2_negotiate_rsp *rsp = work->response_buf;
+ int rc = 0;
+ __le32 status;
+
+ ksmbd_debug(SMB, "Received negotiate request\n");
+ conn->need_neg = false;
+ if (ksmbd_conn_good(work)) {
+ pr_err("conn->tcp_status is already in CifsGood State\n");
+ work->send_no_response = 1;
+ return rc;
+ }
+
+ if (req->DialectCount == 0) {
+ pr_err("malformed packet\n");
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ rc = -EINVAL;
+ goto err_out;
+ }
+
+ conn->cli_cap = le32_to_cpu(req->Capabilities);
+ switch (conn->dialect) {
+ case SMB311_PROT_ID:
+ conn->preauth_info =
+ kzalloc(sizeof(struct preauth_integrity_info),
+ GFP_KERNEL);
+ if (!conn->preauth_info) {
+ rc = -ENOMEM;
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ goto err_out;
+ }
+
+ status = deassemble_neg_contexts(conn, req);
+ if (status != STATUS_SUCCESS) {
+ pr_err("deassemble_neg_contexts error(0x%x)\n",
+ status);
+ rsp->hdr.Status = status;
+ rc = -EINVAL;
+ goto err_out;
+ }
+
+ rc = init_smb3_11_server(conn);
+ if (rc < 0) {
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ goto err_out;
+ }
+
+ ksmbd_gen_preauth_integrity_hash(conn,
+ work->request_buf,
+ conn->preauth_info->Preauth_HashValue);
+ rsp->NegotiateContextOffset =
+ cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
+ assemble_neg_contexts(conn, rsp);
+ break;
+ case SMB302_PROT_ID:
+ init_smb3_02_server(conn);
+ break;
+ case SMB30_PROT_ID:
+ init_smb3_0_server(conn);
+ break;
+ case SMB21_PROT_ID:
+ init_smb2_1_server(conn);
+ break;
+ case SMB20_PROT_ID:
+ rc = init_smb2_0_server(conn);
+ if (rc) {
+ rsp->hdr.Status = STATUS_NOT_SUPPORTED;
+ goto err_out;
+ }
+ break;
+ case SMB2X_PROT_ID:
+ case BAD_PROT_ID:
+ default:
+ ksmbd_debug(SMB, "Server dialect :0x%x not supported\n",
+ conn->dialect);
+ rsp->hdr.Status = STATUS_NOT_SUPPORTED;
+ rc = -EINVAL;
+ goto err_out;
+ }
+ rsp->Capabilities = cpu_to_le32(conn->vals->capabilities);
+
+ /* For stats */
+ conn->connection_type = conn->dialect;
+
+ rsp->MaxTransactSize = cpu_to_le32(conn->vals->max_trans_size);
+ rsp->MaxReadSize = cpu_to_le32(conn->vals->max_read_size);
+ rsp->MaxWriteSize = cpu_to_le32(conn->vals->max_write_size);
+
+ if (conn->dialect > SMB20_PROT_ID) {
+ memcpy(conn->ClientGUID, req->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE);
+ conn->cli_sec_mode = le16_to_cpu(req->SecurityMode);
+ }
+
+ rsp->StructureSize = cpu_to_le16(65);
+ rsp->DialectRevision = cpu_to_le16(conn->dialect);
+ /* Not setting conn guid rsp->ServerGUID, as it
+ * not used by client for identifying server
+ */
+ memset(rsp->ServerGUID, 0, SMB2_CLIENT_GUID_SIZE);
+
+ rsp->SystemTime = cpu_to_le64(ksmbd_systime());
+ rsp->ServerStartTime = 0;
+ ksmbd_debug(SMB, "negotiate context offset %d, count %d\n",
+ le32_to_cpu(rsp->NegotiateContextOffset),
+ le16_to_cpu(rsp->NegotiateContextCount));
+
+ rsp->SecurityBufferOffset = cpu_to_le16(128);
+ rsp->SecurityBufferLength = cpu_to_le16(AUTH_GSS_LENGTH);
+ ksmbd_copy_gss_neg_header(((char *)(&rsp->hdr) +
+ sizeof(rsp->hdr.smb2_buf_length)) +
+ le16_to_cpu(rsp->SecurityBufferOffset));
+ inc_rfc1001_len(rsp, sizeof(struct smb2_negotiate_rsp) -
+ sizeof(struct smb2_hdr) - sizeof(rsp->Buffer) +
+ AUTH_GSS_LENGTH);
+ rsp->SecurityMode = SMB2_NEGOTIATE_SIGNING_ENABLED_LE;
+ conn->use_spnego = true;
+
+ if ((server_conf.signing == KSMBD_CONFIG_OPT_AUTO ||
+ server_conf.signing == KSMBD_CONFIG_OPT_DISABLED) &&
+ req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED_LE)
+ conn->sign = true;
+ else if (server_conf.signing == KSMBD_CONFIG_OPT_MANDATORY) {
+ server_conf.enforced_signing = true;
+ rsp->SecurityMode |= SMB2_NEGOTIATE_SIGNING_REQUIRED_LE;
+ conn->sign = true;
+ }
+
+ conn->srv_sec_mode = le16_to_cpu(rsp->SecurityMode);
+ ksmbd_conn_set_need_negotiate(work);
+
+err_out:
+ if (rc < 0)
+ smb2_set_err_rsp(work);
+
+ return rc;
+}
+
+static int alloc_preauth_hash(struct ksmbd_session *sess,
+ struct ksmbd_conn *conn)
+{
+ if (sess->Preauth_HashValue)
+ return 0;
+
+ sess->Preauth_HashValue = kmemdup(conn->preauth_info->Preauth_HashValue,
+ PREAUTH_HASHVALUE_SIZE, GFP_KERNEL);
+ if (!sess->Preauth_HashValue)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int generate_preauth_hash(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_session *sess = work->sess;
+ u8 *preauth_hash;
+
+ if (conn->dialect != SMB311_PROT_ID)
+ return 0;
+
+ if (conn->binding) {
+ struct preauth_session *preauth_sess;
+
+ preauth_sess = ksmbd_preauth_session_lookup(conn, sess->id);
+ if (!preauth_sess) {
+ preauth_sess = ksmbd_preauth_session_alloc(conn, sess->id);
+ if (!preauth_sess)
+ return -ENOMEM;
+ }
+
+ preauth_hash = preauth_sess->Preauth_HashValue;
+ } else {
+ if (!sess->Preauth_HashValue)
+ if (alloc_preauth_hash(sess, conn))
+ return -ENOMEM;
+ preauth_hash = sess->Preauth_HashValue;
+ }
+
+ ksmbd_gen_preauth_integrity_hash(conn, work->request_buf, preauth_hash);
+ return 0;
+}
+
+static int decode_negotiation_token(struct ksmbd_work *work,
+ struct negotiate_message *negblob)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_sess_setup_req *req;
+ int sz;
+
+ if (!conn->use_spnego)
+ return -EINVAL;
+
+ req = work->request_buf;
+ sz = le16_to_cpu(req->SecurityBufferLength);
+
+ if (ksmbd_decode_negTokenInit((char *)negblob, sz, conn)) {
+ if (ksmbd_decode_negTokenTarg((char *)negblob, sz, conn)) {
+ conn->auth_mechs |= KSMBD_AUTH_NTLMSSP;
+ conn->preferred_auth_mech = KSMBD_AUTH_NTLMSSP;
+ conn->use_spnego = false;
+ }
+ }
+ return 0;
+}
+
+static int ntlm_negotiate(struct ksmbd_work *work,
+ struct negotiate_message *negblob)
+{
+ struct smb2_sess_setup_req *req = work->request_buf;
+ struct smb2_sess_setup_rsp *rsp = work->response_buf;
+ struct challenge_message *chgblob;
+ unsigned char *spnego_blob = NULL;
+ u16 spnego_blob_len;
+ char *neg_blob;
+ int sz, rc;
+
+ ksmbd_debug(SMB, "negotiate phase\n");
+ sz = le16_to_cpu(req->SecurityBufferLength);
+ rc = ksmbd_decode_ntlmssp_neg_blob(negblob, sz, work->sess);
+ if (rc)
+ return rc;
+
+ sz = le16_to_cpu(rsp->SecurityBufferOffset);
+ chgblob =
+ (struct challenge_message *)((char *)&rsp->hdr.ProtocolId + sz);
+ memset(chgblob, 0, sizeof(struct challenge_message));
+
+ if (!work->conn->use_spnego) {
+ sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->sess);
+ if (sz < 0)
+ return -ENOMEM;
+
+ rsp->SecurityBufferLength = cpu_to_le16(sz);
+ return 0;
+ }
+
+ sz = sizeof(struct challenge_message);
+ sz += (strlen(ksmbd_netbios_name()) * 2 + 1 + 4) * 6;
+
+ neg_blob = kzalloc(sz, GFP_KERNEL);
+ if (!neg_blob)
+ return -ENOMEM;
+
+ chgblob = (struct challenge_message *)neg_blob;
+ sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->sess);
+ if (sz < 0) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = build_spnego_ntlmssp_neg_blob(&spnego_blob, &spnego_blob_len,
+ neg_blob, sz);
+ if (rc) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ sz = le16_to_cpu(rsp->SecurityBufferOffset);
+ memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len);
+ rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len);
+
+out:
+ kfree(spnego_blob);
+ kfree(neg_blob);
+ return rc;
+}
+
+static struct authenticate_message *user_authblob(struct ksmbd_conn *conn,
+ struct smb2_sess_setup_req *req)
+{
+ int sz;
+
+ if (conn->use_spnego && conn->mechToken)
+ return (struct authenticate_message *)conn->mechToken;
+
+ sz = le16_to_cpu(req->SecurityBufferOffset);
+ return (struct authenticate_message *)((char *)&req->hdr.ProtocolId
+ + sz);
+}
+
+static struct ksmbd_user *session_user(struct ksmbd_conn *conn,
+ struct smb2_sess_setup_req *req)
+{
+ struct authenticate_message *authblob;
+ struct ksmbd_user *user;
+ char *name;
+ int sz;
+
+ authblob = user_authblob(conn, req);
+ sz = le32_to_cpu(authblob->UserName.BufferOffset);
+ name = smb_strndup_from_utf16((const char *)authblob + sz,
+ le16_to_cpu(authblob->UserName.Length),
+ true,
+ conn->local_nls);
+ if (IS_ERR(name)) {
+ pr_err("cannot allocate memory\n");
+ return NULL;
+ }
+
+ ksmbd_debug(SMB, "session setup request for user %s\n", name);
+ user = ksmbd_login_user(name);
+ kfree(name);
+ return user;
+}
+
+static int ntlm_authenticate(struct ksmbd_work *work)
+{
+ struct smb2_sess_setup_req *req = work->request_buf;
+ struct smb2_sess_setup_rsp *rsp = work->response_buf;
+ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_session *sess = work->sess;
+ struct channel *chann = NULL;
+ struct ksmbd_user *user;
+ u64 prev_id;
+ int sz, rc;
+
+ ksmbd_debug(SMB, "authenticate phase\n");
+ if (conn->use_spnego) {
+ unsigned char *spnego_blob;
+ u16 spnego_blob_len;
+
+ rc = build_spnego_ntlmssp_auth_blob(&spnego_blob,
+ &spnego_blob_len,
+ 0);
+ if (rc)
+ return -ENOMEM;
+
+ sz = le16_to_cpu(rsp->SecurityBufferOffset);
+ memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len);
+ rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len);
+ kfree(spnego_blob);
+ inc_rfc1001_len(rsp, spnego_blob_len - 1);
+ }
+
+ user = session_user(conn, req);
+ if (!user) {
+ ksmbd_debug(SMB, "Unknown user name or an error\n");
+ return -EPERM;
+ }
+
+ /* Check for previous session */
+ prev_id = le64_to_cpu(req->PreviousSessionId);
+ if (prev_id && prev_id != sess->id)
+ destroy_previous_session(user, prev_id);
+
+ if (sess->state == SMB2_SESSION_VALID) {
+ /*
+ * Reuse session if anonymous try to connect
+ * on reauthetication.
+ */
+ if (ksmbd_anonymous_user(user)) {
+ ksmbd_free_user(user);
+ return 0;
+ }
+ ksmbd_free_user(sess->user);
+ }
+
+ sess->user = user;
+ if (user_guest(sess->user)) {
+ if (conn->sign) {
+ ksmbd_debug(SMB, "Guest login not allowed when signing enabled\n");
+ return -EPERM;
+ }
+
+ rsp->SessionFlags = SMB2_SESSION_FLAG_IS_GUEST_LE;
+ } else {
+ struct authenticate_message *authblob;
+
+ authblob = user_authblob(conn, req);
+ sz = le16_to_cpu(req->SecurityBufferLength);
+ rc = ksmbd_decode_ntlmssp_auth_blob(authblob, sz, sess);
+ if (rc) {
+ set_user_flag(sess->user, KSMBD_USER_FLAG_BAD_PASSWORD);
+ ksmbd_debug(SMB, "authentication failed\n");
+ return -EPERM;
+ }
+
+ /*
+ * If session state is SMB2_SESSION_VALID, We can assume
+ * that it is reauthentication. And the user/password
+ * has been verified, so return it here.
+ */
+ if (sess->state == SMB2_SESSION_VALID) {
+ if (conn->binding)
+ goto binding_session;
+ return 0;
+ }
+
+ if ((conn->sign || server_conf.enforced_signing) ||
+ (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
+ sess->sign = true;
+
+ if (conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION &&
+ conn->ops->generate_encryptionkey &&
+ !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
+ rc = conn->ops->generate_encryptionkey(sess);
+ if (rc) {
+ ksmbd_debug(SMB,
+ "SMB3 encryption key generation failed\n");
+ return -EINVAL;
+ }
+ sess->enc = true;
+ rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
+ /*
+ * signing is disable if encryption is enable
+ * on this session
+ */
+ sess->sign = false;
+ }
+ }
+
+binding_session:
+ if (conn->dialect >= SMB30_PROT_ID) {
+ chann = lookup_chann_list(sess, conn);
+ if (!chann) {
+ chann = kmalloc(sizeof(struct channel), GFP_KERNEL);
+ if (!chann)
+ return -ENOMEM;
+
+ chann->conn = conn;
+ INIT_LIST_HEAD(&chann->chann_list);
+ list_add(&chann->chann_list, &sess->ksmbd_chann_list);
+ }
+ }
+
+ if (conn->ops->generate_signingkey) {
+ rc = conn->ops->generate_signingkey(sess, conn);
+ if (rc) {
+ ksmbd_debug(SMB, "SMB3 signing key generation failed\n");
+ return -EINVAL;
+ }
+ }
+
+ if (conn->dialect > SMB20_PROT_ID) {
+ if (!ksmbd_conn_lookup_dialect(conn)) {
+ pr_err("fail to verify the dialect\n");
+ return -ENOENT;
+ }
+ }
+ return 0;
+}
+
+#ifdef CONFIG_SMB_SERVER_KERBEROS5
+static int krb5_authenticate(struct ksmbd_work *work)
+{
+ struct smb2_sess_setup_req *req = work->request_buf;
+ struct smb2_sess_setup_rsp *rsp = work->response_buf;
+ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_session *sess = work->sess;
+ char *in_blob, *out_blob;
+ struct channel *chann = NULL;
+ u64 prev_sess_id;
+ int in_len, out_len;
+ int retval;
+
+ in_blob = (char *)&req->hdr.ProtocolId +
+ le16_to_cpu(req->SecurityBufferOffset);
+ in_len = le16_to_cpu(req->SecurityBufferLength);
+ out_blob = (char *)&rsp->hdr.ProtocolId +
+ le16_to_cpu(rsp->SecurityBufferOffset);
+ out_len = work->response_sz -
+ offsetof(struct smb2_hdr, smb2_buf_length) -
+ le16_to_cpu(rsp->SecurityBufferOffset);
+
+ /* Check previous session */
+ prev_sess_id = le64_to_cpu(req->PreviousSessionId);
+ if (prev_sess_id && prev_sess_id != sess->id)
+ destroy_previous_session(sess->user, prev_sess_id);
+
+ if (sess->state == SMB2_SESSION_VALID)
+ ksmbd_free_user(sess->user);
+
+ retval = ksmbd_krb5_authenticate(sess, in_blob, in_len,
+ out_blob, &out_len);
+ if (retval) {
+ ksmbd_debug(SMB, "krb5 authentication failed\n");
+ return -EINVAL;
+ }
+ rsp->SecurityBufferLength = cpu_to_le16(out_len);
+ inc_rfc1001_len(rsp, out_len - 1);
+
+ if ((conn->sign || server_conf.enforced_signing) ||
+ (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
+ sess->sign = true;
+
+ if ((conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) &&
+ conn->ops->generate_encryptionkey) {
+ retval = conn->ops->generate_encryptionkey(sess);
+ if (retval) {
+ ksmbd_debug(SMB,
+ "SMB3 encryption key generation failed\n");
+ return -EINVAL;
+ }
+ sess->enc = true;
+ rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
+ sess->sign = false;
+ }
+
+ if (conn->dialect >= SMB30_PROT_ID) {
+ chann = lookup_chann_list(sess, conn);
+ if (!chann) {
+ chann = kmalloc(sizeof(struct channel), GFP_KERNEL);
+ if (!chann)
+ return -ENOMEM;
+
+ chann->conn = conn;
+ INIT_LIST_HEAD(&chann->chann_list);
+ list_add(&chann->chann_list, &sess->ksmbd_chann_list);
+ }
+ }
+
+ if (conn->ops->generate_signingkey) {
+ retval = conn->ops->generate_signingkey(sess, conn);
+ if (retval) {
+ ksmbd_debug(SMB, "SMB3 signing key generation failed\n");
+ return -EINVAL;
+ }
+ }
+
+ if (conn->dialect > SMB20_PROT_ID) {
+ if (!ksmbd_conn_lookup_dialect(conn)) {
+ pr_err("fail to verify the dialect\n");
+ return -ENOENT;
+ }
+ }
+ return 0;
+}
+#else
+static int krb5_authenticate(struct ksmbd_work *work)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+int smb2_sess_setup(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_sess_setup_req *req = work->request_buf;
+ struct smb2_sess_setup_rsp *rsp = work->response_buf;
+ struct ksmbd_session *sess;
+ struct negotiate_message *negblob;
+ int rc = 0;
+
+ ksmbd_debug(SMB, "Received request for session setup\n");
+
+ rsp->StructureSize = cpu_to_le16(9);
+ rsp->SessionFlags = 0;
+ rsp->SecurityBufferOffset = cpu_to_le16(72);
+ rsp->SecurityBufferLength = 0;
+ inc_rfc1001_len(rsp, 9);
+
+ if (!req->hdr.SessionId) {
+ sess = ksmbd_smb2_session_create();
+ if (!sess) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+ rsp->hdr.SessionId = cpu_to_le64(sess->id);
+ ksmbd_session_register(conn, sess);
+ } else if (conn->dialect >= SMB30_PROT_ID &&
+ (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) &&
+ req->Flags & SMB2_SESSION_REQ_FLAG_BINDING) {
+ u64 sess_id = le64_to_cpu(req->hdr.SessionId);
+
+ sess = ksmbd_session_lookup_slowpath(sess_id);
+ if (!sess) {
+ rc = -ENOENT;
+ goto out_err;
+ }
+
+ if (conn->dialect != sess->conn->dialect) {
+ rc = -EINVAL;
+ goto out_err;
+ }
+
+ if (!(req->hdr.Flags & SMB2_FLAGS_SIGNED)) {
+ rc = -EINVAL;
+ goto out_err;
+ }
+
+ if (strncmp(conn->ClientGUID, sess->conn->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE)) {
+ rc = -ENOENT;
+ goto out_err;
+ }
+
+ if (sess->state == SMB2_SESSION_IN_PROGRESS) {
+ rc = -EACCES;
+ goto out_err;
+ }
+
+ if (sess->state == SMB2_SESSION_EXPIRED) {
+ rc = -EFAULT;
+ goto out_err;
+ }
+
+ if (ksmbd_session_lookup(conn, sess_id)) {
+ rc = -EACCES;
+ goto out_err;
+ }
+
+ conn->binding = true;
+ } else if ((conn->dialect < SMB30_PROT_ID ||
+ server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) &&
+ (req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
+ sess = NULL;
+ rc = -EACCES;
+ goto out_err;
+ } else {
+ sess = ksmbd_session_lookup(conn,
+ le64_to_cpu(req->hdr.SessionId));
+ if (!sess) {
+ rc = -ENOENT;
+ goto out_err;
+ }
+ }
+ work->sess = sess;
+
+ if (sess->state == SMB2_SESSION_EXPIRED)
+ sess->state = SMB2_SESSION_IN_PROGRESS;
+
+ negblob = (struct negotiate_message *)((char *)&req->hdr.ProtocolId +
+ le16_to_cpu(req->SecurityBufferOffset));
+
+ if (decode_negotiation_token(work, negblob) == 0) {
+ if (conn->mechToken)
+ negblob = (struct negotiate_message *)conn->mechToken;
+ }
+
+ if (server_conf.auth_mechs & conn->auth_mechs) {
+ rc = generate_preauth_hash(work);
+ if (rc)
+ goto out_err;
+
+ if (conn->preferred_auth_mech &
+ (KSMBD_AUTH_KRB5 | KSMBD_AUTH_MSKRB5)) {
+ rc = krb5_authenticate(work);
+ if (rc) {
+ rc = -EINVAL;
+ goto out_err;
+ }
+
+ ksmbd_conn_set_good(work);
+ sess->state = SMB2_SESSION_VALID;
+ kfree(sess->Preauth_HashValue);
+ sess->Preauth_HashValue = NULL;
+ } else if (conn->preferred_auth_mech == KSMBD_AUTH_NTLMSSP) {
+ if (negblob->MessageType == NtLmNegotiate) {
+ rc = ntlm_negotiate(work, negblob);
+ if (rc)
+ goto out_err;
+ rsp->hdr.Status =
+ STATUS_MORE_PROCESSING_REQUIRED;
+ /*
+ * Note: here total size -1 is done as an
+ * adjustment for 0 size blob
+ */
+ inc_rfc1001_len(rsp, le16_to_cpu(rsp->SecurityBufferLength) - 1);
+
+ } else if (negblob->MessageType == NtLmAuthenticate) {
+ rc = ntlm_authenticate(work);
+ if (rc)
+ goto out_err;
+
+ ksmbd_conn_set_good(work);
+ sess->state = SMB2_SESSION_VALID;
+ if (conn->binding) {
+ struct preauth_session *preauth_sess;
+
+ preauth_sess =
+ ksmbd_preauth_session_lookup(conn, sess->id);
+ if (preauth_sess) {
+ list_del(&preauth_sess->preauth_entry);
+ kfree(preauth_sess);
+ }
+ }
+ kfree(sess->Preauth_HashValue);
+ sess->Preauth_HashValue = NULL;
+ }
+ } else {
+ /* TODO: need one more negotiation */
+ pr_err("Not support the preferred authentication\n");
+ rc = -EINVAL;
+ }
+ } else {
+ pr_err("Not support authentication\n");
+ rc = -EINVAL;
+ }
+
+out_err:
+ if (rc == -EINVAL)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ else if (rc == -ENOENT)
+ rsp->hdr.Status = STATUS_USER_SESSION_DELETED;
+ else if (rc == -EACCES)
+ rsp->hdr.Status = STATUS_REQUEST_NOT_ACCEPTED;
+ else if (rc == -EFAULT)
+ rsp->hdr.Status = STATUS_NETWORK_SESSION_EXPIRED;
+ else if (rc == -ENOMEM)
+ rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
+ else if (rc)
+ rsp->hdr.Status = STATUS_LOGON_FAILURE;
+
+ if (conn->use_spnego && conn->mechToken) {
+ kfree(conn->mechToken);
+ conn->mechToken = NULL;
+ }
+
+ if (rc < 0 && sess) {
+ ksmbd_session_destroy(sess);
+ work->sess = NULL;
+ }
+
+ return rc;
+}
+
+/**
+ * smb2_tree_connect() - handler for smb2 tree connect command
+ * @work: smb work containing smb request buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_tree_connect(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_tree_connect_req *req = work->request_buf;
+ struct smb2_tree_connect_rsp *rsp = work->response_buf;
+ struct ksmbd_session *sess = work->sess;
+ char *treename = NULL, *name = NULL;
+ struct ksmbd_tree_conn_status status;
+ struct ksmbd_share_config *share;
+ int rc = -EINVAL;
+
+ treename = smb_strndup_from_utf16(req->Buffer,
+ le16_to_cpu(req->PathLength), true,
+ conn->local_nls);
+ if (IS_ERR(treename)) {
+ pr_err("treename is NULL\n");
+ status.ret = KSMBD_TREE_CONN_STATUS_ERROR;
+ goto out_err1;
+ }
+
+ name = ksmbd_extract_sharename(treename);
+ if (IS_ERR(name)) {
+ status.ret = KSMBD_TREE_CONN_STATUS_ERROR;
+ goto out_err1;
+ }
+
+ ksmbd_debug(SMB, "tree connect request for tree %s treename %s\n",
+ name, treename);
+
+ status = ksmbd_tree_conn_connect(sess, name);
+ if (status.ret == KSMBD_TREE_CONN_STATUS_OK)
+ rsp->hdr.Id.SyncId.TreeId = cpu_to_le32(status.tree_conn->id);
+ else
+ goto out_err1;
+
+ share = status.tree_conn->share_conf;
+ if (test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) {
+ ksmbd_debug(SMB, "IPC share path request\n");
+ rsp->ShareType = SMB2_SHARE_TYPE_PIPE;
+ rsp->MaximalAccess = FILE_READ_DATA_LE | FILE_READ_EA_LE |
+ FILE_EXECUTE_LE | FILE_READ_ATTRIBUTES_LE |
+ FILE_DELETE_LE | FILE_READ_CONTROL_LE |
+ FILE_WRITE_DAC_LE | FILE_WRITE_OWNER_LE |
+ FILE_SYNCHRONIZE_LE;
+ } else {
+ rsp->ShareType = SMB2_SHARE_TYPE_DISK;
+ rsp->MaximalAccess = FILE_READ_DATA_LE | FILE_READ_EA_LE |
+ FILE_EXECUTE_LE | FILE_READ_ATTRIBUTES_LE;
+ if (test_tree_conn_flag(status.tree_conn,
+ KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ rsp->MaximalAccess |= FILE_WRITE_DATA_LE |
+ FILE_APPEND_DATA_LE | FILE_WRITE_EA_LE |
+ FILE_DELETE_LE | FILE_WRITE_ATTRIBUTES_LE |
+ FILE_DELETE_CHILD_LE | FILE_READ_CONTROL_LE |
+ FILE_WRITE_DAC_LE | FILE_WRITE_OWNER_LE |
+ FILE_SYNCHRONIZE_LE;
+ }
+ }
+
+ status.tree_conn->maximal_access = le32_to_cpu(rsp->MaximalAccess);
+ if (conn->posix_ext_supported)
+ status.tree_conn->posix_extensions = true;
+
+out_err1:
+ rsp->StructureSize = cpu_to_le16(16);
+ rsp->Capabilities = 0;
+ rsp->Reserved = 0;
+ /* default manual caching */
+ rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING;
+ inc_rfc1001_len(rsp, 16);
+
+ if (!IS_ERR(treename))
+ kfree(treename);
+ if (!IS_ERR(name))
+ kfree(name);
+
+ switch (status.ret) {
+ case KSMBD_TREE_CONN_STATUS_OK:
+ rsp->hdr.Status = STATUS_SUCCESS;
+ rc = 0;
+ break;
+ case KSMBD_TREE_CONN_STATUS_NO_SHARE:
+ rsp->hdr.Status = STATUS_BAD_NETWORK_PATH;
+ break;
+ case -ENOMEM:
+ case KSMBD_TREE_CONN_STATUS_NOMEM:
+ rsp->hdr.Status = STATUS_NO_MEMORY;
+ break;
+ case KSMBD_TREE_CONN_STATUS_ERROR:
+ case KSMBD_TREE_CONN_STATUS_TOO_MANY_CONNS:
+ case KSMBD_TREE_CONN_STATUS_TOO_MANY_SESSIONS:
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ break;
+ case -EINVAL:
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ break;
+ default:
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ }
+
+ return rc;
+}
+
+/**
+ * smb2_create_open_flags() - convert smb open flags to unix open flags
+ * @file_present: is file already present
+ * @access: file access flags
+ * @disposition: file disposition flags
+ * @may_flags: set with MAY_ flags
+ *
+ * Return: file open flags
+ */
+static int smb2_create_open_flags(bool file_present, __le32 access,
+ __le32 disposition,
+ int *may_flags)
+{
+ int oflags = O_NONBLOCK | O_LARGEFILE;
+
+ if (access & FILE_READ_DESIRED_ACCESS_LE &&
+ access & FILE_WRITE_DESIRE_ACCESS_LE) {
+ oflags |= O_RDWR;
+ *may_flags = MAY_OPEN | MAY_READ | MAY_WRITE;
+ } else if (access & FILE_WRITE_DESIRE_ACCESS_LE) {
+ oflags |= O_WRONLY;
+ *may_flags = MAY_OPEN | MAY_WRITE;
+ } else {
+ oflags |= O_RDONLY;
+ *may_flags = MAY_OPEN | MAY_READ;
+ }
+
+ if (access == FILE_READ_ATTRIBUTES_LE)
+ oflags |= O_PATH;
+
+ if (file_present) {
+ switch (disposition & FILE_CREATE_MASK_LE) {
+ case FILE_OPEN_LE:
+ case FILE_CREATE_LE:
+ break;
+ case FILE_SUPERSEDE_LE:
+ case FILE_OVERWRITE_LE:
+ case FILE_OVERWRITE_IF_LE:
+ oflags |= O_TRUNC;
+ break;
+ default:
+ break;
+ }
+ } else {
+ switch (disposition & FILE_CREATE_MASK_LE) {
+ case FILE_SUPERSEDE_LE:
+ case FILE_CREATE_LE:
+ case FILE_OPEN_IF_LE:
+ case FILE_OVERWRITE_IF_LE:
+ oflags |= O_CREAT;
+ break;
+ case FILE_OPEN_LE:
+ case FILE_OVERWRITE_LE:
+ oflags &= ~O_CREAT;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return oflags;
+}
+
+/**
+ * smb2_tree_disconnect() - handler for smb tree connect request
+ * @work: smb work containing request buffer
+ *
+ * Return: 0
+ */
+int smb2_tree_disconnect(struct ksmbd_work *work)
+{
+ struct smb2_tree_disconnect_rsp *rsp = work->response_buf;
+ struct ksmbd_session *sess = work->sess;
+ struct ksmbd_tree_connect *tcon = work->tcon;
+
+ rsp->StructureSize = cpu_to_le16(4);
+ inc_rfc1001_len(rsp, 4);
+
+ ksmbd_debug(SMB, "request\n");
+
+ if (!tcon) {
+ struct smb2_tree_disconnect_req *req = work->request_buf;
+
+ ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId);
+ rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED;
+ smb2_set_err_rsp(work);
+ return 0;
+ }
+
+ ksmbd_close_tree_conn_fds(work);
+ ksmbd_tree_conn_disconnect(sess, tcon);
+ return 0;
+}
+
+/**
+ * smb2_session_logoff() - handler for session log off request
+ * @work: smb work containing request buffer
+ *
+ * Return: 0
+ */
+int smb2_session_logoff(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_logoff_rsp *rsp = work->response_buf;
+ struct ksmbd_session *sess = work->sess;
+
+ rsp->StructureSize = cpu_to_le16(4);
+ inc_rfc1001_len(rsp, 4);
+
+ ksmbd_debug(SMB, "request\n");
+
+ /* Got a valid session, set connection state */
+ WARN_ON(sess->conn != conn);
+
+ /* setting CifsExiting here may race with start_tcp_sess */
+ ksmbd_conn_set_need_reconnect(work);
+ ksmbd_close_session_fds(work);
+ ksmbd_conn_wait_idle(conn);
+
+ if (ksmbd_tree_conn_session_logoff(sess)) {
+ struct smb2_logoff_req *req = work->request_buf;
+
+ ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId);
+ rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED;
+ smb2_set_err_rsp(work);
+ return 0;
+ }
+
+ ksmbd_destroy_file_table(&sess->file_table);
+ sess->state = SMB2_SESSION_EXPIRED;
+
+ ksmbd_free_user(sess->user);
+ sess->user = NULL;
+
+ /* let start_tcp_sess free connection info now */
+ ksmbd_conn_set_need_negotiate(work);
+ return 0;
+}
+
+/**
+ * create_smb2_pipe() - create IPC pipe
+ * @work: smb work containing request buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+static noinline int create_smb2_pipe(struct ksmbd_work *work)
+{
+ struct smb2_create_rsp *rsp = work->response_buf;
+ struct smb2_create_req *req = work->request_buf;
+ int id;
+ int err;
+ char *name;
+
+ name = smb_strndup_from_utf16(req->Buffer, le16_to_cpu(req->NameLength),
+ 1, work->conn->local_nls);
+ if (IS_ERR(name)) {
+ rsp->hdr.Status = STATUS_NO_MEMORY;
+ err = PTR_ERR(name);
+ goto out;
+ }
+
+ id = ksmbd_session_rpc_open(work->sess, name);
+ if (id < 0) {
+ pr_err("Unable to open RPC pipe: %d\n", id);
+ err = id;
+ goto out;
+ }
+
+ rsp->hdr.Status = STATUS_SUCCESS;
+ rsp->StructureSize = cpu_to_le16(89);
+ rsp->OplockLevel = SMB2_OPLOCK_LEVEL_NONE;
+ rsp->Reserved = 0;
+ rsp->CreateAction = cpu_to_le32(FILE_OPENED);
+
+ rsp->CreationTime = cpu_to_le64(0);
+ rsp->LastAccessTime = cpu_to_le64(0);
+ rsp->ChangeTime = cpu_to_le64(0);
+ rsp->AllocationSize = cpu_to_le64(0);
+ rsp->EndofFile = cpu_to_le64(0);
+ rsp->FileAttributes = ATTR_NORMAL_LE;
+ rsp->Reserved2 = 0;
+ rsp->VolatileFileId = cpu_to_le64(id);
+ rsp->PersistentFileId = 0;
+ rsp->CreateContextsOffset = 0;
+ rsp->CreateContextsLength = 0;
+
+ inc_rfc1001_len(rsp, 88); /* StructureSize - 1*/
+ kfree(name);
+ return 0;
+
+out:
+ switch (err) {
+ case -EINVAL:
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ break;
+ case -ENOSPC:
+ case -ENOMEM:
+ rsp->hdr.Status = STATUS_NO_MEMORY;
+ break;
+ }
+
+ if (!IS_ERR(name))
+ kfree(name);
+
+ smb2_set_err_rsp(work);
+ return err;
+}
+
+/**
+ * smb2_set_ea() - handler for setting extended attributes using set
+ * info command
+ * @eabuf: set info command buffer
+ * @path: dentry path for get ea
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int smb2_set_ea(struct smb2_ea_info *eabuf, struct path *path)
+{
+ struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ char *attr_name = NULL, *value;
+ int rc = 0;
+ int next = 0;
+
+ attr_name = kmalloc(XATTR_NAME_MAX + 1, GFP_KERNEL);
+ if (!attr_name)
+ return -ENOMEM;
+
+ do {
+ if (!eabuf->EaNameLength)
+ goto next;
+
+ ksmbd_debug(SMB,
+ "name : <%s>, name_len : %u, value_len : %u, next : %u\n",
+ eabuf->name, eabuf->EaNameLength,
+ le16_to_cpu(eabuf->EaValueLength),
+ le32_to_cpu(eabuf->NextEntryOffset));
+
+ if (eabuf->EaNameLength >
+ (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) {
+ rc = -EINVAL;
+ break;
+ }
+
+ memcpy(attr_name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
+ memcpy(&attr_name[XATTR_USER_PREFIX_LEN], eabuf->name,
+ eabuf->EaNameLength);
+ attr_name[XATTR_USER_PREFIX_LEN + eabuf->EaNameLength] = '\0';
+ value = (char *)&eabuf->name + eabuf->EaNameLength + 1;
+
+ if (!eabuf->EaValueLength) {
+ rc = ksmbd_vfs_casexattr_len(user_ns,
+ path->dentry,
+ attr_name,
+ XATTR_USER_PREFIX_LEN +
+ eabuf->EaNameLength);
+
+ /* delete the EA only when it exits */
+ if (rc > 0) {
+ rc = ksmbd_vfs_remove_xattr(user_ns,
+ path->dentry,
+ attr_name);
+
+ if (rc < 0) {
+ ksmbd_debug(SMB,
+ "remove xattr failed(%d)\n",
+ rc);
+ break;
+ }
+ }
+
+ /* if the EA doesn't exist, just do nothing. */
+ rc = 0;
+ } else {
+ rc = ksmbd_vfs_setxattr(user_ns,
+ path->dentry, attr_name, value,
+ le16_to_cpu(eabuf->EaValueLength), 0);
+ if (rc < 0) {
+ ksmbd_debug(SMB,
+ "ksmbd_vfs_setxattr is failed(%d)\n",
+ rc);
+ break;
+ }
+ }
+
+next:
+ next = le32_to_cpu(eabuf->NextEntryOffset);
+ eabuf = (struct smb2_ea_info *)((char *)eabuf + next);
+ } while (next != 0);
+
+ kfree(attr_name);
+ return rc;
+}
+
+static noinline int smb2_set_stream_name_xattr(struct path *path,
+ struct ksmbd_file *fp,
+ char *stream_name, int s_type)
+{
+ struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ size_t xattr_stream_size;
+ char *xattr_stream_name;
+ int rc;
+
+ rc = ksmbd_vfs_xattr_stream_name(stream_name,
+ &xattr_stream_name,
+ &xattr_stream_size,
+ s_type);
+ if (rc)
+ return rc;
+
+ fp->stream.name = xattr_stream_name;
+ fp->stream.size = xattr_stream_size;
+
+ /* Check if there is stream prefix in xattr space */
+ rc = ksmbd_vfs_casexattr_len(user_ns,
+ path->dentry,
+ xattr_stream_name,
+ xattr_stream_size);
+ if (rc >= 0)
+ return 0;
+
+ if (fp->cdoption == FILE_OPEN_LE) {
+ ksmbd_debug(SMB, "XATTR stream name lookup failed: %d\n", rc);
+ return -EBADF;
+ }
+
+ rc = ksmbd_vfs_setxattr(user_ns, path->dentry,
+ xattr_stream_name, NULL, 0, 0);
+ if (rc < 0)
+ pr_err("Failed to store XATTR stream name :%d\n", rc);
+ return 0;
+}
+
+static int smb2_remove_smb_xattrs(struct path *path)
+{
+ struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ char *name, *xattr_list = NULL;
+ ssize_t xattr_list_len;
+ int err = 0;
+
+ xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list);
+ if (xattr_list_len < 0) {
+ goto out;
+ } else if (!xattr_list_len) {
+ ksmbd_debug(SMB, "empty xattr in the file\n");
+ goto out;
+ }
+
+ for (name = xattr_list; name - xattr_list < xattr_list_len;
+ name += strlen(name) + 1) {
+ ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
+
+ if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
+ strncmp(&name[XATTR_USER_PREFIX_LEN], DOS_ATTRIBUTE_PREFIX,
+ DOS_ATTRIBUTE_PREFIX_LEN) &&
+ strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN))
+ continue;
+
+ err = ksmbd_vfs_remove_xattr(user_ns, path->dentry, name);
+ if (err)
+ ksmbd_debug(SMB, "remove xattr failed : %s\n", name);
+ }
+out:
+ kvfree(xattr_list);
+ return err;
+}
+
+static int smb2_create_truncate(struct path *path)
+{
+ int rc = vfs_truncate(path, 0);
+
+ if (rc) {
+ pr_err("vfs_truncate failed, rc %d\n", rc);
+ return rc;
+ }
+
+ rc = smb2_remove_smb_xattrs(path);
+ if (rc == -EOPNOTSUPP)
+ rc = 0;
+ if (rc)
+ ksmbd_debug(SMB,
+ "ksmbd_truncate_stream_name_xattr failed, rc %d\n",
+ rc);
+ return rc;
+}
+
+static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, struct path *path,
+ struct ksmbd_file *fp)
+{
+ struct xattr_dos_attrib da = {0};
+ int rc;
+
+ if (!test_share_config_flag(tcon->share_conf,
+ KSMBD_SHARE_FLAG_STORE_DOS_ATTRS))
+ return;
+
+ da.version = 4;
+ da.attr = le32_to_cpu(fp->f_ci->m_fattr);
+ da.itime = da.create_time = fp->create_time;
+ da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
+ XATTR_DOSINFO_ITIME;
+
+ rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_user_ns(path->mnt),
+ path->dentry, &da);
+ if (rc)
+ ksmbd_debug(SMB, "failed to store file attribute into xattr\n");
+}
+
+static void smb2_update_xattrs(struct ksmbd_tree_connect *tcon,
+ struct path *path, struct ksmbd_file *fp)
+{
+ struct xattr_dos_attrib da;
+ int rc;
+
+ fp->f_ci->m_fattr &= ~(ATTR_HIDDEN_LE | ATTR_SYSTEM_LE);
+
+ /* get FileAttributes from XATTR_NAME_DOS_ATTRIBUTE */
+ if (!test_share_config_flag(tcon->share_conf,
+ KSMBD_SHARE_FLAG_STORE_DOS_ATTRS))
+ return;
+
+ rc = ksmbd_vfs_get_dos_attrib_xattr(mnt_user_ns(path->mnt),
+ path->dentry, &da);
+ if (rc > 0) {
+ fp->f_ci->m_fattr = cpu_to_le32(da.attr);
+ fp->create_time = da.create_time;
+ fp->itime = da.itime;
+ }
+}
+
+static int smb2_creat(struct ksmbd_work *work, struct path *path, char *name,
+ int open_flags, umode_t posix_mode, bool is_dir)
+{
+ struct ksmbd_tree_connect *tcon = work->tcon;
+ struct ksmbd_share_config *share = tcon->share_conf;
+ umode_t mode;
+ int rc;
+
+ if (!(open_flags & O_CREAT))
+ return -EBADF;
+
+ ksmbd_debug(SMB, "file does not exist, so creating\n");
+ if (is_dir == true) {
+ ksmbd_debug(SMB, "creating directory\n");
+
+ mode = share_config_directory_mode(share, posix_mode);
+ rc = ksmbd_vfs_mkdir(work, name, mode);
+ if (rc)
+ return rc;
+ } else {
+ ksmbd_debug(SMB, "creating regular file\n");
+
+ mode = share_config_create_mode(share, posix_mode);
+ rc = ksmbd_vfs_create(work, name, mode);
+ if (rc)
+ return rc;
+ }
+
+ rc = ksmbd_vfs_kern_path(name, 0, path, 0);
+ if (rc) {
+ pr_err("cannot get linux path (%s), err = %d\n",
+ name, rc);
+ return rc;
+ }
+ return 0;
+}
+
+static int smb2_create_sd_buffer(struct ksmbd_work *work,
+ struct smb2_create_req *req,
+ struct path *path)
+{
+ struct create_context *context;
+ struct create_sd_buf_req *sd_buf;
+
+ if (!req->CreateContextsOffset)
+ return -ENOENT;
+
+ /* Parse SD BUFFER create contexts */
+ context = smb2_find_context_vals(req, SMB2_CREATE_SD_BUFFER);
+ if (!context)
+ return -ENOENT;
+ else if (IS_ERR(context))
+ return PTR_ERR(context);
+
+ ksmbd_debug(SMB,
+ "Set ACLs using SMB2_CREATE_SD_BUFFER context\n");
+ sd_buf = (struct create_sd_buf_req *)context;
+ return set_info_sec(work->conn, work->tcon, path, &sd_buf->ntsd,
+ le32_to_cpu(sd_buf->ccontext.DataLength), true);
+}
+
+static void ksmbd_acls_fattr(struct smb_fattr *fattr, struct inode *inode)
+{
+ fattr->cf_uid = inode->i_uid;
+ fattr->cf_gid = inode->i_gid;
+ fattr->cf_mode = inode->i_mode;
+ fattr->cf_acls = NULL;
+ fattr->cf_dacls = NULL;
+
+ if (IS_ENABLED(CONFIG_FS_POSIX_ACL)) {
+ fattr->cf_acls = get_acl(inode, ACL_TYPE_ACCESS);
+ if (S_ISDIR(inode->i_mode))
+ fattr->cf_dacls = get_acl(inode, ACL_TYPE_DEFAULT);
+ }
+}
+
+/**
+ * smb2_open() - handler for smb file open request
+ * @work: smb work containing request buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_open(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_session *sess = work->sess;
+ struct ksmbd_tree_connect *tcon = work->tcon;
+ struct smb2_create_req *req;
+ struct smb2_create_rsp *rsp, *rsp_org;
+ struct path path;
+ struct ksmbd_share_config *share = tcon->share_conf;
+ struct ksmbd_file *fp = NULL;
+ struct file *filp = NULL;
+ struct user_namespace *user_ns = NULL;
+ struct kstat stat;
+ struct create_context *context;
+ struct lease_ctx_info *lc = NULL;
+ struct create_ea_buf_req *ea_buf = NULL;
+ struct oplock_info *opinfo;
+ __le32 *next_ptr = NULL;
+ int req_op_level = 0, open_flags = 0, may_flags = 0, file_info = 0;
+ int rc = 0, len = 0;
+ int contxt_cnt = 0, query_disk_id = 0;
+ int maximal_access_ctxt = 0, posix_ctxt = 0;
+ int s_type = 0;
+ int next_off = 0;
+ char *name = NULL;
+ char *stream_name = NULL;
+ bool file_present = false, created = false, already_permitted = false;
+ int share_ret, need_truncate = 0;
+ u64 time;
+ umode_t posix_mode = 0;
+ __le32 daccess, maximal_access = 0;
+
+ rsp_org = work->response_buf;
+ WORK_BUFFERS(work, req, rsp);
+
+ if (req->hdr.NextCommand && !work->next_smb2_rcv_hdr_off &&
+ (req->hdr.Flags & SMB2_FLAGS_RELATED_OPERATIONS)) {
+ ksmbd_debug(SMB, "invalid flag in chained command\n");
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ smb2_set_err_rsp(work);
+ return -EINVAL;
+ }
+
+ if (test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) {
+ ksmbd_debug(SMB, "IPC pipe create request\n");
+ return create_smb2_pipe(work);
+ }
+
+ if (req->NameLength) {
+ if ((req->CreateOptions & FILE_DIRECTORY_FILE_LE) &&
+ *(char *)req->Buffer == '\\') {
+ pr_err("not allow directory name included leading slash\n");
+ rc = -EINVAL;
+ goto err_out1;
+ }
+
+ name = smb2_get_name(share,
+ req->Buffer,
+ le16_to_cpu(req->NameLength),
+ work->conn->local_nls);
+ if (IS_ERR(name)) {
+ rc = PTR_ERR(name);
+ if (rc != -ENOMEM)
+ rc = -ENOENT;
+ name = NULL;
+ goto err_out1;
+ }
+
+ ksmbd_debug(SMB, "converted name = %s\n", name);
+ if (strchr(name, ':')) {
+ if (!test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_STREAMS)) {
+ rc = -EBADF;
+ goto err_out1;
+ }
+ rc = parse_stream_name(name, &stream_name, &s_type);
+ if (rc < 0)
+ goto err_out1;
+ }
+
+ rc = ksmbd_validate_filename(name);
+ if (rc < 0)
+ goto err_out1;
+
+ if (ksmbd_share_veto_filename(share, name)) {
+ rc = -ENOENT;
+ ksmbd_debug(SMB, "Reject open(), vetoed file: %s\n",
+ name);
+ goto err_out1;
+ }
+ } else {
+ len = strlen(share->path);
+ ksmbd_debug(SMB, "share path len %d\n", len);
+ name = kmalloc(len + 1, GFP_KERNEL);
+ if (!name) {
+ rsp->hdr.Status = STATUS_NO_MEMORY;
+ rc = -ENOMEM;
+ goto err_out1;
+ }
+
+ memcpy(name, share->path, len);
+ *(name + len) = '\0';
+ }
+
+ req_op_level = req->RequestedOplockLevel;
+ if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE)
+ lc = parse_lease_state(req);
+
+ if (le32_to_cpu(req->ImpersonationLevel) > le32_to_cpu(IL_DELEGATE_LE)) {
+ pr_err("Invalid impersonationlevel : 0x%x\n",
+ le32_to_cpu(req->ImpersonationLevel));
+ rc = -EIO;
+ rsp->hdr.Status = STATUS_BAD_IMPERSONATION_LEVEL;
+ goto err_out1;
+ }
+
+ if (req->CreateOptions && !(req->CreateOptions & CREATE_OPTIONS_MASK)) {
+ pr_err("Invalid create options : 0x%x\n",
+ le32_to_cpu(req->CreateOptions));
+ rc = -EINVAL;
+ goto err_out1;
+ } else {
+ if (req->CreateOptions & FILE_SEQUENTIAL_ONLY_LE &&
+ req->CreateOptions & FILE_RANDOM_ACCESS_LE)
+ req->CreateOptions = ~(FILE_SEQUENTIAL_ONLY_LE);
+
+ if (req->CreateOptions &
+ (FILE_OPEN_BY_FILE_ID_LE | CREATE_TREE_CONNECTION |
+ FILE_RESERVE_OPFILTER_LE)) {
+ rc = -EOPNOTSUPP;
+ goto err_out1;
+ }
+
+ if (req->CreateOptions & FILE_DIRECTORY_FILE_LE) {
+ if (req->CreateOptions & FILE_NON_DIRECTORY_FILE_LE) {
+ rc = -EINVAL;
+ goto err_out1;
+ } else if (req->CreateOptions & FILE_NO_COMPRESSION_LE) {
+ req->CreateOptions = ~(FILE_NO_COMPRESSION_LE);
+ }
+ }
+ }
+
+ if (le32_to_cpu(req->CreateDisposition) >
+ le32_to_cpu(FILE_OVERWRITE_IF_LE)) {
+ pr_err("Invalid create disposition : 0x%x\n",
+ le32_to_cpu(req->CreateDisposition));
+ rc = -EINVAL;
+ goto err_out1;
+ }
+
+ if (!(req->DesiredAccess & DESIRED_ACCESS_MASK)) {
+ pr_err("Invalid desired access : 0x%x\n",
+ le32_to_cpu(req->DesiredAccess));
+ rc = -EACCES;
+ goto err_out1;
+ }
+
+ if (req->FileAttributes && !(req->FileAttributes & ATTR_MASK_LE)) {
+ pr_err("Invalid file attribute : 0x%x\n",
+ le32_to_cpu(req->FileAttributes));
+ rc = -EINVAL;
+ goto err_out1;
+ }
+
+ if (req->CreateContextsOffset) {
+ /* Parse non-durable handle create contexts */
+ context = smb2_find_context_vals(req, SMB2_CREATE_EA_BUFFER);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out1;
+ } else if (context) {
+ ea_buf = (struct create_ea_buf_req *)context;
+ if (req->CreateOptions & FILE_NO_EA_KNOWLEDGE_LE) {
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ rc = -EACCES;
+ goto err_out1;
+ }
+ }
+
+ context = smb2_find_context_vals(req,
+ SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out1;
+ } else if (context) {
+ ksmbd_debug(SMB,
+ "get query maximal access context\n");
+ maximal_access_ctxt = 1;
+ }
+
+ context = smb2_find_context_vals(req,
+ SMB2_CREATE_TIMEWARP_REQUEST);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out1;
+ } else if (context) {
+ ksmbd_debug(SMB, "get timewarp context\n");
+ rc = -EBADF;
+ goto err_out1;
+ }
+
+ if (tcon->posix_extensions) {
+ context = smb2_find_context_vals(req,
+ SMB2_CREATE_TAG_POSIX);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out1;
+ } else if (context) {
+ struct create_posix *posix =
+ (struct create_posix *)context;
+ ksmbd_debug(SMB, "get posix context\n");
+
+ posix_mode = le32_to_cpu(posix->Mode);
+ posix_ctxt = 1;
+ }
+ }
+ }
+
+ if (ksmbd_override_fsids(work)) {
+ rc = -ENOMEM;
+ goto err_out1;
+ }
+
+ if (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE) {
+ /*
+ * On delete request, instead of following up, need to
+ * look the current entity
+ */
+ rc = ksmbd_vfs_kern_path(name, 0, &path, 1);
+ if (!rc) {
+ /*
+ * If file exists with under flags, return access
+ * denied error.
+ */
+ if (req->CreateDisposition == FILE_OVERWRITE_IF_LE ||
+ req->CreateDisposition == FILE_OPEN_IF_LE) {
+ rc = -EACCES;
+ path_put(&path);
+ goto err_out;
+ }
+
+ if (!test_tree_conn_flag(tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ ksmbd_debug(SMB,
+ "User does not have write permission\n");
+ rc = -EACCES;
+ path_put(&path);
+ goto err_out;
+ }
+ }
+ } else {
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS)) {
+ /*
+ * Use LOOKUP_FOLLOW to follow the path of
+ * symlink in path buildup
+ */
+ rc = ksmbd_vfs_kern_path(name, LOOKUP_FOLLOW, &path, 1);
+ if (rc) { /* Case for broken link ?*/
+ rc = ksmbd_vfs_kern_path(name, 0, &path, 1);
+ }
+ } else {
+ rc = ksmbd_vfs_kern_path(name, 0, &path, 1);
+ if (!rc && d_is_symlink(path.dentry)) {
+ rc = -EACCES;
+ path_put(&path);
+ goto err_out;
+ }
+ }
+ }
+
+ if (rc) {
+ if (rc == -EACCES) {
+ ksmbd_debug(SMB,
+ "User does not have right permission\n");
+ goto err_out;
+ }
+ ksmbd_debug(SMB, "can not get linux path for %s, rc = %d\n",
+ name, rc);
+ rc = 0;
+ } else {
+ file_present = true;
+ user_ns = mnt_user_ns(path.mnt);
+ generic_fillattr(user_ns, d_inode(path.dentry), &stat);
+ }
+ if (stream_name) {
+ if (req->CreateOptions & FILE_DIRECTORY_FILE_LE) {
+ if (s_type == DATA_STREAM) {
+ rc = -EIO;
+ rsp->hdr.Status = STATUS_NOT_A_DIRECTORY;
+ }
+ } else {
+ if (S_ISDIR(stat.mode) && s_type == DATA_STREAM) {
+ rc = -EIO;
+ rsp->hdr.Status = STATUS_FILE_IS_A_DIRECTORY;
+ }
+ }
+
+ if (req->CreateOptions & FILE_DIRECTORY_FILE_LE &&
+ req->FileAttributes & ATTR_NORMAL_LE) {
+ rsp->hdr.Status = STATUS_NOT_A_DIRECTORY;
+ rc = -EIO;
+ }
+
+ if (rc < 0)
+ goto err_out;
+ }
+
+ if (file_present && req->CreateOptions & FILE_NON_DIRECTORY_FILE_LE &&
+ S_ISDIR(stat.mode) && !(req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)) {
+ ksmbd_debug(SMB, "open() argument is a directory: %s, %x\n",
+ name, req->CreateOptions);
+ rsp->hdr.Status = STATUS_FILE_IS_A_DIRECTORY;
+ rc = -EIO;
+ goto err_out;
+ }
+
+ if (file_present && (req->CreateOptions & FILE_DIRECTORY_FILE_LE) &&
+ !(req->CreateDisposition == FILE_CREATE_LE) &&
+ !S_ISDIR(stat.mode)) {
+ rsp->hdr.Status = STATUS_NOT_A_DIRECTORY;
+ rc = -EIO;
+ goto err_out;
+ }
+
+ if (!stream_name && file_present &&
+ req->CreateDisposition == FILE_CREATE_LE) {
+ rc = -EEXIST;
+ goto err_out;
+ }
+
+ daccess = smb_map_generic_desired_access(req->DesiredAccess);
+
+ if (file_present && !(req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)) {
+ rc = smb_check_perm_dacl(conn, &path, &daccess,
+ sess->user->uid);
+ if (rc)
+ goto err_out;
+ }
+
+ if (daccess & FILE_MAXIMAL_ACCESS_LE) {
+ if (!file_present) {
+ daccess = cpu_to_le32(GENERIC_ALL_FLAGS);
+ } else {
+ rc = ksmbd_vfs_query_maximal_access(user_ns,
+ path.dentry,
+ &daccess);
+ if (rc)
+ goto err_out;
+ already_permitted = true;
+ }
+ maximal_access = daccess;
+ }
+
+ open_flags = smb2_create_open_flags(file_present, daccess,
+ req->CreateDisposition,
+ &may_flags);
+
+ if (!test_tree_conn_flag(tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ if (open_flags & O_CREAT) {
+ ksmbd_debug(SMB,
+ "User does not have write permission\n");
+ rc = -EACCES;
+ goto err_out;
+ }
+ }
+
+ /*create file if not present */
+ if (!file_present) {
+ rc = smb2_creat(work, &path, name, open_flags, posix_mode,
+ req->CreateOptions & FILE_DIRECTORY_FILE_LE);
+ if (rc) {
+ if (rc == -ENOENT) {
+ rc = -EIO;
+ rsp->hdr.Status = STATUS_OBJECT_PATH_NOT_FOUND;
+ }
+ goto err_out;
+ }
+
+ created = true;
+ user_ns = mnt_user_ns(path.mnt);
+ if (ea_buf) {
+ rc = smb2_set_ea(&ea_buf->ea, &path);
+ if (rc == -EOPNOTSUPP)
+ rc = 0;
+ else if (rc)
+ goto err_out;
+ }
+ } else if (!already_permitted) {
+ /* FILE_READ_ATTRIBUTE is allowed without inode_permission,
+ * because execute(search) permission on a parent directory,
+ * is already granted.
+ */
+ if (daccess & ~(FILE_READ_ATTRIBUTES_LE | FILE_READ_CONTROL_LE)) {
+ rc = inode_permission(user_ns,
+ d_inode(path.dentry),
+ may_flags);
+ if (rc)
+ goto err_out;
+
+ if ((daccess & FILE_DELETE_LE) ||
+ (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)) {
+ rc = ksmbd_vfs_may_delete(user_ns,
+ path.dentry);
+ if (rc)
+ goto err_out;
+ }
+ }
+ }
+
+ rc = ksmbd_query_inode_status(d_inode(path.dentry->d_parent));
+ if (rc == KSMBD_INODE_STATUS_PENDING_DELETE) {
+ rc = -EBUSY;
+ goto err_out;
+ }
+
+ rc = 0;
+ filp = dentry_open(&path, open_flags, current_cred());
+ if (IS_ERR(filp)) {
+ rc = PTR_ERR(filp);
+ pr_err("dentry open for dir failed, rc %d\n", rc);
+ goto err_out;
+ }
+
+ if (file_present) {
+ if (!(open_flags & O_TRUNC))
+ file_info = FILE_OPENED;
+ else
+ file_info = FILE_OVERWRITTEN;
+
+ if ((req->CreateDisposition & FILE_CREATE_MASK_LE) ==
+ FILE_SUPERSEDE_LE)
+ file_info = FILE_SUPERSEDED;
+ } else if (open_flags & O_CREAT) {
+ file_info = FILE_CREATED;
+ }
+
+ ksmbd_vfs_set_fadvise(filp, req->CreateOptions);
+
+ /* Obtain Volatile-ID */
+ fp = ksmbd_open_fd(work, filp);
+ if (IS_ERR(fp)) {
+ fput(filp);
+ rc = PTR_ERR(fp);
+ fp = NULL;
+ goto err_out;
+ }
+
+ /* Get Persistent-ID */
+ ksmbd_open_durable_fd(fp);
+ if (!has_file_id(fp->persistent_id)) {
+ rc = -ENOMEM;
+ goto err_out;
+ }
+
+ fp->filename = name;
+ fp->cdoption = req->CreateDisposition;
+ fp->daccess = daccess;
+ fp->saccess = req->ShareAccess;
+ fp->coption = req->CreateOptions;
+
+ /* Set default windows and posix acls if creating new file */
+ if (created) {
+ int posix_acl_rc;
+ struct inode *inode = d_inode(path.dentry);
+
+ posix_acl_rc = ksmbd_vfs_inherit_posix_acl(user_ns,
+ inode,
+ d_inode(path.dentry->d_parent));
+ if (posix_acl_rc)
+ ksmbd_debug(SMB, "inherit posix acl failed : %d\n", posix_acl_rc);
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_ACL_XATTR)) {
+ rc = smb_inherit_dacl(conn, &path, sess->user->uid,
+ sess->user->gid);
+ }
+
+ if (rc) {
+ rc = smb2_create_sd_buffer(work, req, &path);
+ if (rc) {
+ if (posix_acl_rc)
+ ksmbd_vfs_set_init_posix_acl(user_ns,
+ inode);
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_ACL_XATTR)) {
+ struct smb_fattr fattr;
+ struct smb_ntsd *pntsd;
+ int pntsd_size, ace_num = 0;
+
+ ksmbd_acls_fattr(&fattr, inode);
+ if (fattr.cf_acls)
+ ace_num = fattr.cf_acls->a_count;
+ if (fattr.cf_dacls)
+ ace_num += fattr.cf_dacls->a_count;
+
+ pntsd = kmalloc(sizeof(struct smb_ntsd) +
+ sizeof(struct smb_sid) * 3 +
+ sizeof(struct smb_acl) +
+ sizeof(struct smb_ace) * ace_num * 2,
+ GFP_KERNEL);
+ if (!pntsd)
+ goto err_out;
+
+ rc = build_sec_desc(user_ns,
+ pntsd, NULL,
+ OWNER_SECINFO |
+ GROUP_SECINFO |
+ DACL_SECINFO,
+ &pntsd_size, &fattr);
+ posix_acl_release(fattr.cf_acls);
+ posix_acl_release(fattr.cf_dacls);
+
+ rc = ksmbd_vfs_set_sd_xattr(conn,
+ user_ns,
+ path.dentry,
+ pntsd,
+ pntsd_size);
+ kfree(pntsd);
+ if (rc)
+ pr_err("failed to store ntacl in xattr : %d\n",
+ rc);
+ }
+ }
+ }
+ rc = 0;
+ }
+
+ if (stream_name) {
+ rc = smb2_set_stream_name_xattr(&path,
+ fp,
+ stream_name,
+ s_type);
+ if (rc)
+ goto err_out;
+ file_info = FILE_CREATED;
+ }
+
+ fp->attrib_only = !(req->DesiredAccess & ~(FILE_READ_ATTRIBUTES_LE |
+ FILE_WRITE_ATTRIBUTES_LE | FILE_SYNCHRONIZE_LE));
+ if (!S_ISDIR(file_inode(filp)->i_mode) && open_flags & O_TRUNC &&
+ !fp->attrib_only && !stream_name) {
+ smb_break_all_oplock(work, fp);
+ need_truncate = 1;
+ }
+
+ /* fp should be searchable through ksmbd_inode.m_fp_list
+ * after daccess, saccess, attrib_only, and stream are
+ * initialized.
+ */
+ write_lock(&fp->f_ci->m_lock);
+ list_add(&fp->node, &fp->f_ci->m_fp_list);
+ write_unlock(&fp->f_ci->m_lock);
+
+ rc = ksmbd_vfs_getattr(&path, &stat);
+ if (rc) {
+ generic_fillattr(user_ns, d_inode(path.dentry), &stat);
+ rc = 0;
+ }
+
+ /* Check delete pending among previous fp before oplock break */
+ if (ksmbd_inode_pending_delete(fp)) {
+ rc = -EBUSY;
+ goto err_out;
+ }
+
+ share_ret = ksmbd_smb_check_shared_mode(fp->filp, fp);
+ if (!test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_OPLOCKS) ||
+ (req_op_level == SMB2_OPLOCK_LEVEL_LEASE &&
+ !(conn->vals->capabilities & SMB2_GLOBAL_CAP_LEASING))) {
+ if (share_ret < 0 && !S_ISDIR(file_inode(fp->filp)->i_mode)) {
+ rc = share_ret;
+ goto err_out;
+ }
+ } else {
+ if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) {
+ req_op_level = smb2_map_lease_to_oplock(lc->req_state);
+ ksmbd_debug(SMB,
+ "lease req for(%s) req oplock state 0x%x, lease state 0x%x\n",
+ name, req_op_level, lc->req_state);
+ rc = find_same_lease_key(sess, fp->f_ci, lc);
+ if (rc)
+ goto err_out;
+ } else if (open_flags == O_RDONLY &&
+ (req_op_level == SMB2_OPLOCK_LEVEL_BATCH ||
+ req_op_level == SMB2_OPLOCK_LEVEL_EXCLUSIVE))
+ req_op_level = SMB2_OPLOCK_LEVEL_II;
+
+ rc = smb_grant_oplock(work, req_op_level,
+ fp->persistent_id, fp,
+ le32_to_cpu(req->hdr.Id.SyncId.TreeId),
+ lc, share_ret);
+ if (rc < 0)
+ goto err_out;
+ }
+
+ if (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)
+ ksmbd_fd_set_delete_on_close(fp, file_info);
+
+ if (need_truncate) {
+ rc = smb2_create_truncate(&path);
+ if (rc)
+ goto err_out;
+ }
+
+ if (req->CreateContextsOffset) {
+ struct create_alloc_size_req *az_req;
+
+ az_req = (struct create_alloc_size_req *)smb2_find_context_vals(req,
+ SMB2_CREATE_ALLOCATION_SIZE);
+ if (IS_ERR(az_req)) {
+ rc = PTR_ERR(az_req);
+ goto err_out;
+ } else if (az_req) {
+ loff_t alloc_size = le64_to_cpu(az_req->AllocationSize);
+ int err;
+
+ ksmbd_debug(SMB,
+ "request smb2 create allocate size : %llu\n",
+ alloc_size);
+ smb_break_all_levII_oplock(work, fp, 1);
+ err = vfs_fallocate(fp->filp, FALLOC_FL_KEEP_SIZE, 0,
+ alloc_size);
+ if (err < 0)
+ ksmbd_debug(SMB,
+ "vfs_fallocate is failed : %d\n",
+ err);
+ }
+
+ context = smb2_find_context_vals(req, SMB2_CREATE_QUERY_ON_DISK_ID);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out;
+ } else if (context) {
+ ksmbd_debug(SMB, "get query on disk id context\n");
+ query_disk_id = 1;
+ }
+ }
+
+ if (stat.result_mask & STATX_BTIME)
+ fp->create_time = ksmbd_UnixTimeToNT(stat.btime);
+ else
+ fp->create_time = ksmbd_UnixTimeToNT(stat.ctime);
+ if (req->FileAttributes || fp->f_ci->m_fattr == 0)
+ fp->f_ci->m_fattr =
+ cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes)));
+
+ if (!created)
+ smb2_update_xattrs(tcon, &path, fp);
+ else
+ smb2_new_xattrs(tcon, &path, fp);
+
+ memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE);
+
+ generic_fillattr(user_ns, file_inode(fp->filp),
+ &stat);
+
+ rsp->StructureSize = cpu_to_le16(89);
+ rcu_read_lock();
+ opinfo = rcu_dereference(fp->f_opinfo);
+ rsp->OplockLevel = opinfo != NULL ? opinfo->level : 0;
+ rcu_read_unlock();
+ rsp->Reserved = 0;
+ rsp->CreateAction = cpu_to_le32(file_info);
+ rsp->CreationTime = cpu_to_le64(fp->create_time);
+ time = ksmbd_UnixTimeToNT(stat.atime);
+ rsp->LastAccessTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(stat.mtime);
+ rsp->LastWriteTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(stat.ctime);
+ rsp->ChangeTime = cpu_to_le64(time);
+ rsp->AllocationSize = S_ISDIR(stat.mode) ? 0 :
+ cpu_to_le64(stat.blocks << 9);
+ rsp->EndofFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ rsp->FileAttributes = fp->f_ci->m_fattr;
+
+ rsp->Reserved2 = 0;
+
+ rsp->PersistentFileId = cpu_to_le64(fp->persistent_id);
+ rsp->VolatileFileId = cpu_to_le64(fp->volatile_id);
+
+ rsp->CreateContextsOffset = 0;
+ rsp->CreateContextsLength = 0;
+ inc_rfc1001_len(rsp_org, 88); /* StructureSize - 1*/
+
+ /* If lease is request send lease context response */
+ if (opinfo && opinfo->is_lease) {
+ struct create_context *lease_ccontext;
+
+ ksmbd_debug(SMB, "lease granted on(%s) lease state 0x%x\n",
+ name, opinfo->o_lease->state);
+ rsp->OplockLevel = SMB2_OPLOCK_LEVEL_LEASE;
+
+ lease_ccontext = (struct create_context *)rsp->Buffer;
+ contxt_cnt++;
+ create_lease_buf(rsp->Buffer, opinfo->o_lease);
+ le32_add_cpu(&rsp->CreateContextsLength,
+ conn->vals->create_lease_size);
+ inc_rfc1001_len(rsp_org, conn->vals->create_lease_size);
+ next_ptr = &lease_ccontext->Next;
+ next_off = conn->vals->create_lease_size;
+ }
+
+ if (maximal_access_ctxt) {
+ struct create_context *mxac_ccontext;
+
+ if (maximal_access == 0)
+ ksmbd_vfs_query_maximal_access(user_ns,
+ path.dentry,
+ &maximal_access);
+ mxac_ccontext = (struct create_context *)(rsp->Buffer +
+ le32_to_cpu(rsp->CreateContextsLength));
+ contxt_cnt++;
+ create_mxac_rsp_buf(rsp->Buffer +
+ le32_to_cpu(rsp->CreateContextsLength),
+ le32_to_cpu(maximal_access));
+ le32_add_cpu(&rsp->CreateContextsLength,
+ conn->vals->create_mxac_size);
+ inc_rfc1001_len(rsp_org, conn->vals->create_mxac_size);
+ if (next_ptr)
+ *next_ptr = cpu_to_le32(next_off);
+ next_ptr = &mxac_ccontext->Next;
+ next_off = conn->vals->create_mxac_size;
+ }
+
+ if (query_disk_id) {
+ struct create_context *disk_id_ccontext;
+
+ disk_id_ccontext = (struct create_context *)(rsp->Buffer +
+ le32_to_cpu(rsp->CreateContextsLength));
+ contxt_cnt++;
+ create_disk_id_rsp_buf(rsp->Buffer +
+ le32_to_cpu(rsp->CreateContextsLength),
+ stat.ino, tcon->id);
+ le32_add_cpu(&rsp->CreateContextsLength,
+ conn->vals->create_disk_id_size);
+ inc_rfc1001_len(rsp_org, conn->vals->create_disk_id_size);
+ if (next_ptr)
+ *next_ptr = cpu_to_le32(next_off);
+ next_ptr = &disk_id_ccontext->Next;
+ next_off = conn->vals->create_disk_id_size;
+ }
+
+ if (posix_ctxt) {
+ contxt_cnt++;
+ create_posix_rsp_buf(rsp->Buffer +
+ le32_to_cpu(rsp->CreateContextsLength),
+ fp);
+ le32_add_cpu(&rsp->CreateContextsLength,
+ conn->vals->create_posix_size);
+ inc_rfc1001_len(rsp_org, conn->vals->create_posix_size);
+ if (next_ptr)
+ *next_ptr = cpu_to_le32(next_off);
+ }
+
+ if (contxt_cnt > 0) {
+ rsp->CreateContextsOffset =
+ cpu_to_le32(offsetof(struct smb2_create_rsp, Buffer)
+ - 4);
+ }
+
+err_out:
+ if (file_present || created)
+ path_put(&path);
+ ksmbd_revert_fsids(work);
+err_out1:
+ if (rc) {
+ if (rc == -EINVAL)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ else if (rc == -EOPNOTSUPP)
+ rsp->hdr.Status = STATUS_NOT_SUPPORTED;
+ else if (rc == -EACCES || rc == -ESTALE)
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ else if (rc == -ENOENT)
+ rsp->hdr.Status = STATUS_OBJECT_NAME_INVALID;
+ else if (rc == -EPERM)
+ rsp->hdr.Status = STATUS_SHARING_VIOLATION;
+ else if (rc == -EBUSY)
+ rsp->hdr.Status = STATUS_DELETE_PENDING;
+ else if (rc == -EBADF)
+ rsp->hdr.Status = STATUS_OBJECT_NAME_NOT_FOUND;
+ else if (rc == -ENOEXEC)
+ rsp->hdr.Status = STATUS_DUPLICATE_OBJECTID;
+ else if (rc == -ENXIO)
+ rsp->hdr.Status = STATUS_NO_SUCH_DEVICE;
+ else if (rc == -EEXIST)
+ rsp->hdr.Status = STATUS_OBJECT_NAME_COLLISION;
+ else if (rc == -EMFILE)
+ rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
+ if (!rsp->hdr.Status)
+ rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
+
+ if (!fp || !fp->filename)
+ kfree(name);
+ if (fp)
+ ksmbd_fd_put(work, fp);
+ smb2_set_err_rsp(work);
+ ksmbd_debug(SMB, "Error response: %x\n", rsp->hdr.Status);
+ }
+
+ kfree(lc);
+
+ return 0;
+}
+
+static int readdir_info_level_struct_sz(int info_level)
+{
+ switch (info_level) {
+ case FILE_FULL_DIRECTORY_INFORMATION:
+ return sizeof(struct file_full_directory_info);
+ case FILE_BOTH_DIRECTORY_INFORMATION:
+ return sizeof(struct file_both_directory_info);
+ case FILE_DIRECTORY_INFORMATION:
+ return sizeof(struct file_directory_info);
+ case FILE_NAMES_INFORMATION:
+ return sizeof(struct file_names_info);
+ case FILEID_FULL_DIRECTORY_INFORMATION:
+ return sizeof(struct file_id_full_dir_info);
+ case FILEID_BOTH_DIRECTORY_INFORMATION:
+ return sizeof(struct file_id_both_directory_info);
+ case SMB_FIND_FILE_POSIX_INFO:
+ return sizeof(struct smb2_posix_info);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int dentry_name(struct ksmbd_dir_info *d_info, int info_level)
+{
+ switch (info_level) {
+ case FILE_FULL_DIRECTORY_INFORMATION:
+ {
+ struct file_full_directory_info *ffdinfo;
+
+ ffdinfo = (struct file_full_directory_info *)d_info->rptr;
+ d_info->rptr += le32_to_cpu(ffdinfo->NextEntryOffset);
+ d_info->name = ffdinfo->FileName;
+ d_info->name_len = le32_to_cpu(ffdinfo->FileNameLength);
+ return 0;
+ }
+ case FILE_BOTH_DIRECTORY_INFORMATION:
+ {
+ struct file_both_directory_info *fbdinfo;
+
+ fbdinfo = (struct file_both_directory_info *)d_info->rptr;
+ d_info->rptr += le32_to_cpu(fbdinfo->NextEntryOffset);
+ d_info->name = fbdinfo->FileName;
+ d_info->name_len = le32_to_cpu(fbdinfo->FileNameLength);
+ return 0;
+ }
+ case FILE_DIRECTORY_INFORMATION:
+ {
+ struct file_directory_info *fdinfo;
+
+ fdinfo = (struct file_directory_info *)d_info->rptr;
+ d_info->rptr += le32_to_cpu(fdinfo->NextEntryOffset);
+ d_info->name = fdinfo->FileName;
+ d_info->name_len = le32_to_cpu(fdinfo->FileNameLength);
+ return 0;
+ }
+ case FILE_NAMES_INFORMATION:
+ {
+ struct file_names_info *fninfo;
+
+ fninfo = (struct file_names_info *)d_info->rptr;
+ d_info->rptr += le32_to_cpu(fninfo->NextEntryOffset);
+ d_info->name = fninfo->FileName;
+ d_info->name_len = le32_to_cpu(fninfo->FileNameLength);
+ return 0;
+ }
+ case FILEID_FULL_DIRECTORY_INFORMATION:
+ {
+ struct file_id_full_dir_info *dinfo;
+
+ dinfo = (struct file_id_full_dir_info *)d_info->rptr;
+ d_info->rptr += le32_to_cpu(dinfo->NextEntryOffset);
+ d_info->name = dinfo->FileName;
+ d_info->name_len = le32_to_cpu(dinfo->FileNameLength);
+ return 0;
+ }
+ case FILEID_BOTH_DIRECTORY_INFORMATION:
+ {
+ struct file_id_both_directory_info *fibdinfo;
+
+ fibdinfo = (struct file_id_both_directory_info *)d_info->rptr;
+ d_info->rptr += le32_to_cpu(fibdinfo->NextEntryOffset);
+ d_info->name = fibdinfo->FileName;
+ d_info->name_len = le32_to_cpu(fibdinfo->FileNameLength);
+ return 0;
+ }
+ case SMB_FIND_FILE_POSIX_INFO:
+ {
+ struct smb2_posix_info *posix_info;
+
+ posix_info = (struct smb2_posix_info *)d_info->rptr;
+ d_info->rptr += le32_to_cpu(posix_info->NextEntryOffset);
+ d_info->name = posix_info->name;
+ d_info->name_len = le32_to_cpu(posix_info->name_len);
+ return 0;
+ }
+ default:
+ return -EINVAL;
+ }
+}
+
+/**
+ * smb2_populate_readdir_entry() - encode directory entry in smb2 response
+ * buffer
+ * @conn: connection instance
+ * @info_level: smb information level
+ * @d_info: structure included variables for query dir
+ * @user_ns: user namespace
+ * @ksmbd_kstat: ksmbd wrapper of dirent stat information
+ *
+ * if directory has many entries, find first can't read it fully.
+ * find next might be called multiple times to read remaining dir entries
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
+ struct ksmbd_dir_info *d_info,
+ struct user_namespace *user_ns,
+ struct ksmbd_kstat *ksmbd_kstat)
+{
+ int next_entry_offset = 0;
+ char *conv_name;
+ int conv_len;
+ void *kstat;
+ int struct_sz, rc = 0;
+
+ conv_name = ksmbd_convert_dir_info_name(d_info,
+ conn->local_nls,
+ &conv_len);
+ if (!conv_name)
+ return -ENOMEM;
+
+ /* Somehow the name has only terminating NULL bytes */
+ if (conv_len < 0) {
+ rc = -EINVAL;
+ goto free_conv_name;
+ }
+
+ struct_sz = readdir_info_level_struct_sz(info_level);
+ next_entry_offset = ALIGN(struct_sz - 1 + conv_len,
+ KSMBD_DIR_INFO_ALIGNMENT);
+
+ if (next_entry_offset > d_info->out_buf_len) {
+ d_info->out_buf_len = 0;
+ rc = -ENOSPC;
+ goto free_conv_name;
+ }
+
+ kstat = d_info->wptr;
+ if (info_level != FILE_NAMES_INFORMATION)
+ kstat = ksmbd_vfs_init_kstat(&d_info->wptr, ksmbd_kstat);
+
+ switch (info_level) {
+ case FILE_FULL_DIRECTORY_INFORMATION:
+ {
+ struct file_full_directory_info *ffdinfo;
+
+ ffdinfo = (struct file_full_directory_info *)kstat;
+ ffdinfo->FileNameLength = cpu_to_le32(conv_len);
+ ffdinfo->EaSize =
+ smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
+ if (ffdinfo->EaSize)
+ ffdinfo->ExtFileAttributes = ATTR_REPARSE_POINT_LE;
+ if (d_info->hide_dot_file && d_info->name[0] == '.')
+ ffdinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+ memcpy(ffdinfo->FileName, conv_name, conv_len);
+ ffdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILE_BOTH_DIRECTORY_INFORMATION:
+ {
+ struct file_both_directory_info *fbdinfo;
+
+ fbdinfo = (struct file_both_directory_info *)kstat;
+ fbdinfo->FileNameLength = cpu_to_le32(conv_len);
+ fbdinfo->EaSize =
+ smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
+ if (fbdinfo->EaSize)
+ fbdinfo->ExtFileAttributes = ATTR_REPARSE_POINT_LE;
+ fbdinfo->ShortNameLength = 0;
+ fbdinfo->Reserved = 0;
+ if (d_info->hide_dot_file && d_info->name[0] == '.')
+ fbdinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+ memcpy(fbdinfo->FileName, conv_name, conv_len);
+ fbdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILE_DIRECTORY_INFORMATION:
+ {
+ struct file_directory_info *fdinfo;
+
+ fdinfo = (struct file_directory_info *)kstat;
+ fdinfo->FileNameLength = cpu_to_le32(conv_len);
+ if (d_info->hide_dot_file && d_info->name[0] == '.')
+ fdinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+ memcpy(fdinfo->FileName, conv_name, conv_len);
+ fdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILE_NAMES_INFORMATION:
+ {
+ struct file_names_info *fninfo;
+
+ fninfo = (struct file_names_info *)kstat;
+ fninfo->FileNameLength = cpu_to_le32(conv_len);
+ memcpy(fninfo->FileName, conv_name, conv_len);
+ fninfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILEID_FULL_DIRECTORY_INFORMATION:
+ {
+ struct file_id_full_dir_info *dinfo;
+
+ dinfo = (struct file_id_full_dir_info *)kstat;
+ dinfo->FileNameLength = cpu_to_le32(conv_len);
+ dinfo->EaSize =
+ smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
+ if (dinfo->EaSize)
+ dinfo->ExtFileAttributes = ATTR_REPARSE_POINT_LE;
+ dinfo->Reserved = 0;
+ dinfo->UniqueId = cpu_to_le64(ksmbd_kstat->kstat->ino);
+ if (d_info->hide_dot_file && d_info->name[0] == '.')
+ dinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+ memcpy(dinfo->FileName, conv_name, conv_len);
+ dinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILEID_BOTH_DIRECTORY_INFORMATION:
+ {
+ struct file_id_both_directory_info *fibdinfo;
+
+ fibdinfo = (struct file_id_both_directory_info *)kstat;
+ fibdinfo->FileNameLength = cpu_to_le32(conv_len);
+ fibdinfo->EaSize =
+ smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
+ if (fibdinfo->EaSize)
+ fibdinfo->ExtFileAttributes = ATTR_REPARSE_POINT_LE;
+ fibdinfo->UniqueId = cpu_to_le64(ksmbd_kstat->kstat->ino);
+ fibdinfo->ShortNameLength = 0;
+ fibdinfo->Reserved = 0;
+ fibdinfo->Reserved2 = cpu_to_le16(0);
+ if (d_info->hide_dot_file && d_info->name[0] == '.')
+ fibdinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+ memcpy(fibdinfo->FileName, conv_name, conv_len);
+ fibdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case SMB_FIND_FILE_POSIX_INFO:
+ {
+ struct smb2_posix_info *posix_info;
+ u64 time;
+
+ posix_info = (struct smb2_posix_info *)kstat;
+ posix_info->Ignored = 0;
+ posix_info->CreationTime = cpu_to_le64(ksmbd_kstat->create_time);
+ time = ksmbd_UnixTimeToNT(ksmbd_kstat->kstat->ctime);
+ posix_info->ChangeTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(ksmbd_kstat->kstat->atime);
+ posix_info->LastAccessTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(ksmbd_kstat->kstat->mtime);
+ posix_info->LastWriteTime = cpu_to_le64(time);
+ posix_info->EndOfFile = cpu_to_le64(ksmbd_kstat->kstat->size);
+ posix_info->AllocationSize = cpu_to_le64(ksmbd_kstat->kstat->blocks << 9);
+ posix_info->DeviceId = cpu_to_le32(ksmbd_kstat->kstat->rdev);
+ posix_info->HardLinks = cpu_to_le32(ksmbd_kstat->kstat->nlink);
+ posix_info->Mode = cpu_to_le32(ksmbd_kstat->kstat->mode);
+ posix_info->Inode = cpu_to_le64(ksmbd_kstat->kstat->ino);
+ posix_info->DosAttributes =
+ S_ISDIR(ksmbd_kstat->kstat->mode) ? ATTR_DIRECTORY_LE : ATTR_ARCHIVE_LE;
+ if (d_info->hide_dot_file && d_info->name[0] == '.')
+ posix_info->DosAttributes |= ATTR_HIDDEN_LE;
+ id_to_sid(from_kuid(user_ns, ksmbd_kstat->kstat->uid),
+ SIDNFS_USER, (struct smb_sid *)&posix_info->SidBuffer[0]);
+ id_to_sid(from_kgid(user_ns, ksmbd_kstat->kstat->gid),
+ SIDNFS_GROUP, (struct smb_sid *)&posix_info->SidBuffer[20]);
+ memcpy(posix_info->name, conv_name, conv_len);
+ posix_info->name_len = cpu_to_le32(conv_len);
+ posix_info->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+
+ } /* switch (info_level) */
+
+ d_info->last_entry_offset = d_info->data_count;
+ d_info->data_count += next_entry_offset;
+ d_info->out_buf_len -= next_entry_offset;
+ d_info->wptr += next_entry_offset;
+
+ ksmbd_debug(SMB,
+ "info_level : %d, buf_len :%d, next_offset : %d, data_count : %d\n",
+ info_level, d_info->out_buf_len,
+ next_entry_offset, d_info->data_count);
+
+free_conv_name:
+ kfree(conv_name);
+ return rc;
+}
+
+struct smb2_query_dir_private {
+ struct ksmbd_work *work;
+ char *search_pattern;
+ struct ksmbd_file *dir_fp;
+
+ struct ksmbd_dir_info *d_info;
+ int info_level;
+};
+
+static void lock_dir(struct ksmbd_file *dir_fp)
+{
+ struct dentry *dir = dir_fp->filp->f_path.dentry;
+
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
+}
+
+static void unlock_dir(struct ksmbd_file *dir_fp)
+{
+ struct dentry *dir = dir_fp->filp->f_path.dentry;
+
+ inode_unlock(d_inode(dir));
+}
+
+static int process_query_dir_entries(struct smb2_query_dir_private *priv)
+{
+ struct user_namespace *user_ns = file_mnt_user_ns(priv->dir_fp->filp);
+ struct kstat kstat;
+ struct ksmbd_kstat ksmbd_kstat;
+ int rc;
+ int i;
+
+ for (i = 0; i < priv->d_info->num_entry; i++) {
+ struct dentry *dent;
+
+ if (dentry_name(priv->d_info, priv->info_level))
+ return -EINVAL;
+
+ lock_dir(priv->dir_fp);
+ dent = lookup_one_len(priv->d_info->name,
+ priv->dir_fp->filp->f_path.dentry,
+ priv->d_info->name_len);
+ unlock_dir(priv->dir_fp);
+
+ if (IS_ERR(dent)) {
+ ksmbd_debug(SMB, "Cannot lookup `%s' [%ld]\n",
+ priv->d_info->name,
+ PTR_ERR(dent));
+ continue;
+ }
+ if (unlikely(d_is_negative(dent))) {
+ dput(dent);
+ ksmbd_debug(SMB, "Negative dentry `%s'\n",
+ priv->d_info->name);
+ continue;
+ }
+
+ ksmbd_kstat.kstat = &kstat;
+ if (priv->info_level != FILE_NAMES_INFORMATION)
+ ksmbd_vfs_fill_dentry_attrs(priv->work,
+ user_ns,
+ dent,
+ &ksmbd_kstat);
+
+ rc = smb2_populate_readdir_entry(priv->work->conn,
+ priv->info_level,
+ priv->d_info,
+ user_ns,
+ &ksmbd_kstat);
+ dput(dent);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
+static int reserve_populate_dentry(struct ksmbd_dir_info *d_info,
+ int info_level)
+{
+ int struct_sz;
+ int conv_len;
+ int next_entry_offset;
+
+ struct_sz = readdir_info_level_struct_sz(info_level);
+ if (struct_sz == -EOPNOTSUPP)
+ return -EOPNOTSUPP;
+
+ conv_len = (d_info->name_len + 1) * 2;
+ next_entry_offset = ALIGN(struct_sz - 1 + conv_len,
+ KSMBD_DIR_INFO_ALIGNMENT);
+
+ if (next_entry_offset > d_info->out_buf_len) {
+ d_info->out_buf_len = 0;
+ return -ENOSPC;
+ }
+
+ switch (info_level) {
+ case FILE_FULL_DIRECTORY_INFORMATION:
+ {
+ struct file_full_directory_info *ffdinfo;
+
+ ffdinfo = (struct file_full_directory_info *)d_info->wptr;
+ memcpy(ffdinfo->FileName, d_info->name, d_info->name_len);
+ ffdinfo->FileName[d_info->name_len] = 0x00;
+ ffdinfo->FileNameLength = cpu_to_le32(d_info->name_len);
+ ffdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILE_BOTH_DIRECTORY_INFORMATION:
+ {
+ struct file_both_directory_info *fbdinfo;
+
+ fbdinfo = (struct file_both_directory_info *)d_info->wptr;
+ memcpy(fbdinfo->FileName, d_info->name, d_info->name_len);
+ fbdinfo->FileName[d_info->name_len] = 0x00;
+ fbdinfo->FileNameLength = cpu_to_le32(d_info->name_len);
+ fbdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILE_DIRECTORY_INFORMATION:
+ {
+ struct file_directory_info *fdinfo;
+
+ fdinfo = (struct file_directory_info *)d_info->wptr;
+ memcpy(fdinfo->FileName, d_info->name, d_info->name_len);
+ fdinfo->FileName[d_info->name_len] = 0x00;
+ fdinfo->FileNameLength = cpu_to_le32(d_info->name_len);
+ fdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILE_NAMES_INFORMATION:
+ {
+ struct file_names_info *fninfo;
+
+ fninfo = (struct file_names_info *)d_info->wptr;
+ memcpy(fninfo->FileName, d_info->name, d_info->name_len);
+ fninfo->FileName[d_info->name_len] = 0x00;
+ fninfo->FileNameLength = cpu_to_le32(d_info->name_len);
+ fninfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILEID_FULL_DIRECTORY_INFORMATION:
+ {
+ struct file_id_full_dir_info *dinfo;
+
+ dinfo = (struct file_id_full_dir_info *)d_info->wptr;
+ memcpy(dinfo->FileName, d_info->name, d_info->name_len);
+ dinfo->FileName[d_info->name_len] = 0x00;
+ dinfo->FileNameLength = cpu_to_le32(d_info->name_len);
+ dinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILEID_BOTH_DIRECTORY_INFORMATION:
+ {
+ struct file_id_both_directory_info *fibdinfo;
+
+ fibdinfo = (struct file_id_both_directory_info *)d_info->wptr;
+ memcpy(fibdinfo->FileName, d_info->name, d_info->name_len);
+ fibdinfo->FileName[d_info->name_len] = 0x00;
+ fibdinfo->FileNameLength = cpu_to_le32(d_info->name_len);
+ fibdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case SMB_FIND_FILE_POSIX_INFO:
+ {
+ struct smb2_posix_info *posix_info;
+
+ posix_info = (struct smb2_posix_info *)d_info->wptr;
+ memcpy(posix_info->name, d_info->name, d_info->name_len);
+ posix_info->name[d_info->name_len] = 0x00;
+ posix_info->name_len = cpu_to_le32(d_info->name_len);
+ posix_info->NextEntryOffset =
+ cpu_to_le32(next_entry_offset);
+ break;
+ }
+ } /* switch (info_level) */
+
+ d_info->num_entry++;
+ d_info->out_buf_len -= next_entry_offset;
+ d_info->wptr += next_entry_offset;
+ return 0;
+}
+
+static int __query_dir(struct dir_context *ctx, const char *name, int namlen,
+ loff_t offset, u64 ino, unsigned int d_type)
+{
+ struct ksmbd_readdir_data *buf;
+ struct smb2_query_dir_private *priv;
+ struct ksmbd_dir_info *d_info;
+ int rc;
+
+ buf = container_of(ctx, struct ksmbd_readdir_data, ctx);
+ priv = buf->private;
+ d_info = priv->d_info;
+
+ /* dot and dotdot entries are already reserved */
+ if (!strcmp(".", name) || !strcmp("..", name))
+ return 0;
+ if (ksmbd_share_veto_filename(priv->work->tcon->share_conf, name))
+ return 0;
+ if (!match_pattern(name, namlen, priv->search_pattern))
+ return 0;
+
+ d_info->name = name;
+ d_info->name_len = namlen;
+ rc = reserve_populate_dentry(d_info, priv->info_level);
+ if (rc)
+ return rc;
+ if (d_info->flags & SMB2_RETURN_SINGLE_ENTRY) {
+ d_info->out_buf_len = 0;
+ return 0;
+ }
+ return 0;
+}
+
+static void restart_ctx(struct dir_context *ctx)
+{
+ ctx->pos = 0;
+}
+
+static int verify_info_level(int info_level)
+{
+ switch (info_level) {
+ case FILE_FULL_DIRECTORY_INFORMATION:
+ case FILE_BOTH_DIRECTORY_INFORMATION:
+ case FILE_DIRECTORY_INFORMATION:
+ case FILE_NAMES_INFORMATION:
+ case FILEID_FULL_DIRECTORY_INFORMATION:
+ case FILEID_BOTH_DIRECTORY_INFORMATION:
+ case SMB_FIND_FILE_POSIX_INFO:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int smb2_query_dir(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_query_directory_req *req;
+ struct smb2_query_directory_rsp *rsp, *rsp_org;
+ struct ksmbd_share_config *share = work->tcon->share_conf;
+ struct ksmbd_file *dir_fp = NULL;
+ struct ksmbd_dir_info d_info;
+ int rc = 0;
+ char *srch_ptr = NULL;
+ unsigned char srch_flag;
+ int buffer_sz;
+ struct smb2_query_dir_private query_dir_private = {NULL, };
+
+ rsp_org = work->response_buf;
+ WORK_BUFFERS(work, req, rsp);
+
+ if (ksmbd_override_fsids(work)) {
+ rsp->hdr.Status = STATUS_NO_MEMORY;
+ smb2_set_err_rsp(work);
+ return -ENOMEM;
+ }
+
+ rc = verify_info_level(req->FileInformationClass);
+ if (rc) {
+ rc = -EFAULT;
+ goto err_out2;
+ }
+
+ dir_fp = ksmbd_lookup_fd_slow(work,
+ le64_to_cpu(req->VolatileFileId),
+ le64_to_cpu(req->PersistentFileId));
+ if (!dir_fp) {
+ rc = -EBADF;
+ goto err_out2;
+ }
+
+ if (!(dir_fp->daccess & FILE_LIST_DIRECTORY_LE) ||
+ inode_permission(file_mnt_user_ns(dir_fp->filp),
+ file_inode(dir_fp->filp),
+ MAY_READ | MAY_EXEC)) {
+ pr_err("no right to enumerate directory (%pd)\n",
+ dir_fp->filp->f_path.dentry);
+ rc = -EACCES;
+ goto err_out2;
+ }
+
+ if (!S_ISDIR(file_inode(dir_fp->filp)->i_mode)) {
+ pr_err("can't do query dir for a file\n");
+ rc = -EINVAL;
+ goto err_out2;
+ }
+
+ srch_flag = req->Flags;
+ srch_ptr = smb_strndup_from_utf16(req->Buffer,
+ le16_to_cpu(req->FileNameLength), 1,
+ conn->local_nls);
+ if (IS_ERR(srch_ptr)) {
+ ksmbd_debug(SMB, "Search Pattern not found\n");
+ rc = -EINVAL;
+ goto err_out2;
+ } else {
+ ksmbd_debug(SMB, "Search pattern is %s\n", srch_ptr);
+ }
+
+ ksmbd_debug(SMB, "Directory name is %s\n", dir_fp->filename);
+
+ if (srch_flag & SMB2_REOPEN || srch_flag & SMB2_RESTART_SCANS) {
+ ksmbd_debug(SMB, "Restart directory scan\n");
+ generic_file_llseek(dir_fp->filp, 0, SEEK_SET);
+ restart_ctx(&dir_fp->readdir_data.ctx);
+ }
+
+ memset(&d_info, 0, sizeof(struct ksmbd_dir_info));
+ d_info.wptr = (char *)rsp->Buffer;
+ d_info.rptr = (char *)rsp->Buffer;
+ d_info.out_buf_len = (work->response_sz - (get_rfc1002_len(rsp_org) + 4));
+ d_info.out_buf_len = min_t(int, d_info.out_buf_len, le32_to_cpu(req->OutputBufferLength)) -
+ sizeof(struct smb2_query_directory_rsp);
+ d_info.flags = srch_flag;
+
+ /*
+ * reserve dot and dotdot entries in head of buffer
+ * in first response
+ */
+ rc = ksmbd_populate_dot_dotdot_entries(work, req->FileInformationClass,
+ dir_fp, &d_info, srch_ptr,
+ smb2_populate_readdir_entry);
+ if (rc == -ENOSPC)
+ rc = 0;
+ else if (rc)
+ goto err_out;
+
+ if (test_share_config_flag(share, KSMBD_SHARE_FLAG_HIDE_DOT_FILES))
+ d_info.hide_dot_file = true;
+
+ buffer_sz = d_info.out_buf_len;
+ d_info.rptr = d_info.wptr;
+ query_dir_private.work = work;
+ query_dir_private.search_pattern = srch_ptr;
+ query_dir_private.dir_fp = dir_fp;
+ query_dir_private.d_info = &d_info;
+ query_dir_private.info_level = req->FileInformationClass;
+ dir_fp->readdir_data.private = &query_dir_private;
+ set_ctx_actor(&dir_fp->readdir_data.ctx, __query_dir);
+
+ rc = iterate_dir(dir_fp->filp, &dir_fp->readdir_data.ctx);
+ if (rc == 0)
+ restart_ctx(&dir_fp->readdir_data.ctx);
+ if (rc == -ENOSPC)
+ rc = 0;
+ if (rc)
+ goto err_out;
+
+ d_info.wptr = d_info.rptr;
+ d_info.out_buf_len = buffer_sz;
+ rc = process_query_dir_entries(&query_dir_private);
+ if (rc)
+ goto err_out;
+
+ if (!d_info.data_count && d_info.out_buf_len >= 0) {
+ if (srch_flag & SMB2_RETURN_SINGLE_ENTRY && !is_asterisk(srch_ptr)) {
+ rsp->hdr.Status = STATUS_NO_SUCH_FILE;
+ } else {
+ dir_fp->dot_dotdot[0] = dir_fp->dot_dotdot[1] = 0;
+ rsp->hdr.Status = STATUS_NO_MORE_FILES;
+ }
+ rsp->StructureSize = cpu_to_le16(9);
+ rsp->OutputBufferOffset = cpu_to_le16(0);
+ rsp->OutputBufferLength = cpu_to_le32(0);
+ rsp->Buffer[0] = 0;
+ inc_rfc1001_len(rsp_org, 9);
+ } else {
+ ((struct file_directory_info *)
+ ((char *)rsp->Buffer + d_info.last_entry_offset))
+ ->NextEntryOffset = 0;
+
+ rsp->StructureSize = cpu_to_le16(9);
+ rsp->OutputBufferOffset = cpu_to_le16(72);
+ rsp->OutputBufferLength = cpu_to_le32(d_info.data_count);
+ inc_rfc1001_len(rsp_org, 8 + d_info.data_count);
+ }
+
+ kfree(srch_ptr);
+ ksmbd_fd_put(work, dir_fp);
+ ksmbd_revert_fsids(work);
+ return 0;
+
+err_out:
+ pr_err("error while processing smb2 query dir rc = %d\n", rc);
+ kfree(srch_ptr);
+
+err_out2:
+ if (rc == -EINVAL)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ else if (rc == -EACCES)
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ else if (rc == -ENOENT)
+ rsp->hdr.Status = STATUS_NO_SUCH_FILE;
+ else if (rc == -EBADF)
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ else if (rc == -ENOMEM)
+ rsp->hdr.Status = STATUS_NO_MEMORY;
+ else if (rc == -EFAULT)
+ rsp->hdr.Status = STATUS_INVALID_INFO_CLASS;
+ if (!rsp->hdr.Status)
+ rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
+
+ smb2_set_err_rsp(work);
+ ksmbd_fd_put(work, dir_fp);
+ ksmbd_revert_fsids(work);
+ return 0;
+}
+
+/**
+ * buffer_check_err() - helper function to check buffer errors
+ * @reqOutputBufferLength: max buffer length expected in command response
+ * @rsp: query info response buffer contains output buffer length
+ * @infoclass_size: query info class response buffer size
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int buffer_check_err(int reqOutputBufferLength,
+ struct smb2_query_info_rsp *rsp, int infoclass_size)
+{
+ if (reqOutputBufferLength < le32_to_cpu(rsp->OutputBufferLength)) {
+ if (reqOutputBufferLength < infoclass_size) {
+ pr_err("Invalid Buffer Size Requested\n");
+ rsp->hdr.Status = STATUS_INFO_LENGTH_MISMATCH;
+ rsp->hdr.smb2_buf_length = cpu_to_be32(sizeof(struct smb2_hdr) - 4);
+ return -EINVAL;
+ }
+
+ ksmbd_debug(SMB, "Buffer Overflow\n");
+ rsp->hdr.Status = STATUS_BUFFER_OVERFLOW;
+ rsp->hdr.smb2_buf_length = cpu_to_be32(sizeof(struct smb2_hdr) - 4 +
+ reqOutputBufferLength);
+ rsp->OutputBufferLength = cpu_to_le32(reqOutputBufferLength);
+ }
+ return 0;
+}
+
+static void get_standard_info_pipe(struct smb2_query_info_rsp *rsp)
+{
+ struct smb2_file_standard_info *sinfo;
+
+ sinfo = (struct smb2_file_standard_info *)rsp->Buffer;
+
+ sinfo->AllocationSize = cpu_to_le64(4096);
+ sinfo->EndOfFile = cpu_to_le64(0);
+ sinfo->NumberOfLinks = cpu_to_le32(1);
+ sinfo->DeletePending = 1;
+ sinfo->Directory = 0;
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_standard_info));
+ inc_rfc1001_len(rsp, sizeof(struct smb2_file_standard_info));
+}
+
+static void get_internal_info_pipe(struct smb2_query_info_rsp *rsp, u64 num)
+{
+ struct smb2_file_internal_info *file_info;
+
+ file_info = (struct smb2_file_internal_info *)rsp->Buffer;
+
+ /* any unique number */
+ file_info->IndexNumber = cpu_to_le64(num | (1ULL << 63));
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_internal_info));
+ inc_rfc1001_len(rsp, sizeof(struct smb2_file_internal_info));
+}
+
+static int smb2_get_info_file_pipe(struct ksmbd_session *sess,
+ struct smb2_query_info_req *req,
+ struct smb2_query_info_rsp *rsp)
+{
+ u64 id;
+ int rc;
+
+ /*
+ * Windows can sometime send query file info request on
+ * pipe without opening it, checking error condition here
+ */
+ id = le64_to_cpu(req->VolatileFileId);
+ if (!ksmbd_session_rpc_method(sess, id))
+ return -ENOENT;
+
+ ksmbd_debug(SMB, "FileInfoClass %u, FileId 0x%llx\n",
+ req->FileInfoClass, le64_to_cpu(req->VolatileFileId));
+
+ switch (req->FileInfoClass) {
+ case FILE_STANDARD_INFORMATION:
+ get_standard_info_pipe(rsp);
+ rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
+ rsp, FILE_STANDARD_INFORMATION_SIZE);
+ break;
+ case FILE_INTERNAL_INFORMATION:
+ get_internal_info_pipe(rsp, id);
+ rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
+ rsp, FILE_INTERNAL_INFORMATION_SIZE);
+ break;
+ default:
+ ksmbd_debug(SMB, "smb2_info_file_pipe for %u not supported\n",
+ req->FileInfoClass);
+ rc = -EOPNOTSUPP;
+ }
+ return rc;
+}
+
+/**
+ * smb2_get_ea() - handler for smb2 get extended attribute command
+ * @work: smb work containing query info command buffer
+ * @fp: ksmbd_file pointer
+ * @req: get extended attribute request
+ * @rsp: response buffer pointer
+ * @rsp_org: base response buffer pointer in case of chained response
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp,
+ struct smb2_query_info_req *req,
+ struct smb2_query_info_rsp *rsp, void *rsp_org)
+{
+ struct smb2_ea_info *eainfo, *prev_eainfo;
+ char *name, *ptr, *xattr_list = NULL, *buf;
+ int rc, name_len, value_len, xattr_list_len, idx;
+ ssize_t buf_free_len, alignment_bytes, next_offset, rsp_data_cnt = 0;
+ struct smb2_ea_info_req *ea_req = NULL;
+ struct path *path;
+ struct user_namespace *user_ns = file_mnt_user_ns(fp->filp);
+
+ if (!(fp->daccess & FILE_READ_EA_LE)) {
+ pr_err("Not permitted to read ext attr : 0x%x\n",
+ fp->daccess);
+ return -EACCES;
+ }
+
+ path = &fp->filp->f_path;
+ /* single EA entry is requested with given user.* name */
+ if (req->InputBufferLength) {
+ ea_req = (struct smb2_ea_info_req *)req->Buffer;
+ } else {
+ /* need to send all EAs, if no specific EA is requested*/
+ if (le32_to_cpu(req->Flags) & SL_RETURN_SINGLE_ENTRY)
+ ksmbd_debug(SMB,
+ "All EAs are requested but need to send single EA entry in rsp flags 0x%x\n",
+ le32_to_cpu(req->Flags));
+ }
+
+ buf_free_len = work->response_sz -
+ (get_rfc1002_len(rsp_org) + 4) -
+ sizeof(struct smb2_query_info_rsp);
+
+ if (le32_to_cpu(req->OutputBufferLength) < buf_free_len)
+ buf_free_len = le32_to_cpu(req->OutputBufferLength);
+
+ rc = ksmbd_vfs_listxattr(path->dentry, &xattr_list);
+ if (rc < 0) {
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+ goto out;
+ } else if (!rc) { /* there is no EA in the file */
+ ksmbd_debug(SMB, "no ea data in the file\n");
+ goto done;
+ }
+ xattr_list_len = rc;
+
+ ptr = (char *)rsp->Buffer;
+ eainfo = (struct smb2_ea_info *)ptr;
+ prev_eainfo = eainfo;
+ idx = 0;
+
+ while (idx < xattr_list_len) {
+ name = xattr_list + idx;
+ name_len = strlen(name);
+
+ ksmbd_debug(SMB, "%s, len %d\n", name, name_len);
+ idx += name_len + 1;
+
+ /*
+ * CIFS does not support EA other than user.* namespace,
+ * still keep the framework generic, to list other attrs
+ * in future.
+ */
+ if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+ continue;
+
+ if (!strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX,
+ STREAM_PREFIX_LEN))
+ continue;
+
+ if (req->InputBufferLength &&
+ strncmp(&name[XATTR_USER_PREFIX_LEN], ea_req->name,
+ ea_req->EaNameLength))
+ continue;
+
+ if (!strncmp(&name[XATTR_USER_PREFIX_LEN],
+ DOS_ATTRIBUTE_PREFIX, DOS_ATTRIBUTE_PREFIX_LEN))
+ continue;
+
+ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+ name_len -= XATTR_USER_PREFIX_LEN;
+
+ ptr = (char *)(&eainfo->name + name_len + 1);
+ buf_free_len -= (offsetof(struct smb2_ea_info, name) +
+ name_len + 1);
+ /* bailout if xattr can't fit in buf_free_len */
+ value_len = ksmbd_vfs_getxattr(user_ns, path->dentry,
+ name, &buf);
+ if (value_len <= 0) {
+ rc = -ENOENT;
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+ goto out;
+ }
+
+ buf_free_len -= value_len;
+ if (buf_free_len < 0) {
+ kfree(buf);
+ break;
+ }
+
+ memcpy(ptr, buf, value_len);
+ kfree(buf);
+
+ ptr += value_len;
+ eainfo->Flags = 0;
+ eainfo->EaNameLength = name_len;
+
+ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+ memcpy(eainfo->name, &name[XATTR_USER_PREFIX_LEN],
+ name_len);
+ else
+ memcpy(eainfo->name, name, name_len);
+
+ eainfo->name[name_len] = '\0';
+ eainfo->EaValueLength = cpu_to_le16(value_len);
+ next_offset = offsetof(struct smb2_ea_info, name) +
+ name_len + 1 + value_len;
+
+ /* align next xattr entry at 4 byte bundary */
+ alignment_bytes = ((next_offset + 3) & ~3) - next_offset;
+ if (alignment_bytes) {
+ memset(ptr, '\0', alignment_bytes);
+ ptr += alignment_bytes;
+ next_offset += alignment_bytes;
+ buf_free_len -= alignment_bytes;
+ }
+ eainfo->NextEntryOffset = cpu_to_le32(next_offset);
+ prev_eainfo = eainfo;
+ eainfo = (struct smb2_ea_info *)ptr;
+ rsp_data_cnt += next_offset;
+
+ if (req->InputBufferLength) {
+ ksmbd_debug(SMB, "single entry requested\n");
+ break;
+ }
+ }
+
+ /* no more ea entries */
+ prev_eainfo->NextEntryOffset = 0;
+done:
+ rc = 0;
+ if (rsp_data_cnt == 0)
+ rsp->hdr.Status = STATUS_NO_EAS_ON_FILE;
+ rsp->OutputBufferLength = cpu_to_le32(rsp_data_cnt);
+ inc_rfc1001_len(rsp_org, rsp_data_cnt);
+out:
+ kvfree(xattr_list);
+ return rc;
+}
+
+static void get_file_access_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_access_info *file_info;
+
+ file_info = (struct smb2_file_access_info *)rsp->Buffer;
+ file_info->AccessFlags = fp->daccess;
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_access_info));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_access_info));
+}
+
+static int get_file_basic_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_all_info *basic_info;
+ struct kstat stat;
+ u64 time;
+
+ if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) {
+ pr_err("no right to read the attributes : 0x%x\n",
+ fp->daccess);
+ return -EACCES;
+ }
+
+ basic_info = (struct smb2_file_all_info *)rsp->Buffer;
+ generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp),
+ &stat);
+ basic_info->CreationTime = cpu_to_le64(fp->create_time);
+ time = ksmbd_UnixTimeToNT(stat.atime);
+ basic_info->LastAccessTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(stat.mtime);
+ basic_info->LastWriteTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(stat.ctime);
+ basic_info->ChangeTime = cpu_to_le64(time);
+ basic_info->Attributes = fp->f_ci->m_fattr;
+ basic_info->Pad1 = 0;
+ rsp->OutputBufferLength =
+ cpu_to_le32(offsetof(struct smb2_file_all_info, AllocationSize));
+ inc_rfc1001_len(rsp_org, offsetof(struct smb2_file_all_info,
+ AllocationSize));
+ return 0;
+}
+
+static unsigned long long get_allocation_size(struct inode *inode,
+ struct kstat *stat)
+{
+ unsigned long long alloc_size = 0;
+
+ if (!S_ISDIR(stat->mode)) {
+ if ((inode->i_blocks << 9) <= stat->size)
+ alloc_size = stat->size;
+ else
+ alloc_size = inode->i_blocks << 9;
+ }
+
+ return alloc_size;
+}
+
+static void get_file_standard_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_standard_info *sinfo;
+ unsigned int delete_pending;
+ struct inode *inode;
+ struct kstat stat;
+
+ inode = file_inode(fp->filp);
+ generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat);
+
+ sinfo = (struct smb2_file_standard_info *)rsp->Buffer;
+ delete_pending = ksmbd_inode_pending_delete(fp);
+
+ sinfo->AllocationSize = cpu_to_le64(get_allocation_size(inode, &stat));
+ sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ sinfo->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending);
+ sinfo->DeletePending = delete_pending;
+ sinfo->Directory = S_ISDIR(stat.mode) ? 1 : 0;
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_standard_info));
+ inc_rfc1001_len(rsp_org,
+ sizeof(struct smb2_file_standard_info));
+}
+
+static void get_file_alignment_info(struct smb2_query_info_rsp *rsp,
+ void *rsp_org)
+{
+ struct smb2_file_alignment_info *file_info;
+
+ file_info = (struct smb2_file_alignment_info *)rsp->Buffer;
+ file_info->AlignmentRequirement = 0;
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_alignment_info));
+ inc_rfc1001_len(rsp_org,
+ sizeof(struct smb2_file_alignment_info));
+}
+
+static int get_file_all_info(struct ksmbd_work *work,
+ struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp,
+ void *rsp_org)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_file_all_info *file_info;
+ unsigned int delete_pending;
+ struct inode *inode;
+ struct kstat stat;
+ int conv_len;
+ char *filename;
+ u64 time;
+
+ if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) {
+ ksmbd_debug(SMB, "no right to read the attributes : 0x%x\n",
+ fp->daccess);
+ return -EACCES;
+ }
+
+ filename = convert_to_nt_pathname(fp->filename,
+ work->tcon->share_conf->path);
+ if (!filename)
+ return -ENOMEM;
+
+ inode = file_inode(fp->filp);
+ generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat);
+
+ ksmbd_debug(SMB, "filename = %s\n", filename);
+ delete_pending = ksmbd_inode_pending_delete(fp);
+ file_info = (struct smb2_file_all_info *)rsp->Buffer;
+
+ file_info->CreationTime = cpu_to_le64(fp->create_time);
+ time = ksmbd_UnixTimeToNT(stat.atime);
+ file_info->LastAccessTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(stat.mtime);
+ file_info->LastWriteTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(stat.ctime);
+ file_info->ChangeTime = cpu_to_le64(time);
+ file_info->Attributes = fp->f_ci->m_fattr;
+ file_info->Pad1 = 0;
+ file_info->AllocationSize =
+ cpu_to_le64(get_allocation_size(inode, &stat));
+ file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ file_info->NumberOfLinks =
+ cpu_to_le32(get_nlink(&stat) - delete_pending);
+ file_info->DeletePending = delete_pending;
+ file_info->Directory = S_ISDIR(stat.mode) ? 1 : 0;
+ file_info->Pad2 = 0;
+ file_info->IndexNumber = cpu_to_le64(stat.ino);
+ file_info->EASize = 0;
+ file_info->AccessFlags = fp->daccess;
+ file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos);
+ file_info->Mode = fp->coption;
+ file_info->AlignmentRequirement = 0;
+ conv_len = smbConvertToUTF16((__le16 *)file_info->FileName, filename,
+ PATH_MAX, conn->local_nls, 0);
+ conv_len *= 2;
+ file_info->FileNameLength = cpu_to_le32(conv_len);
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_all_info) + conv_len - 1);
+ kfree(filename);
+ inc_rfc1001_len(rsp_org, le32_to_cpu(rsp->OutputBufferLength));
+ return 0;
+}
+
+static void get_file_alternate_info(struct ksmbd_work *work,
+ struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp,
+ void *rsp_org)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_file_alt_name_info *file_info;
+ struct dentry *dentry = fp->filp->f_path.dentry;
+ int conv_len;
+
+ spin_lock(&dentry->d_lock);
+ file_info = (struct smb2_file_alt_name_info *)rsp->Buffer;
+ conv_len = ksmbd_extract_shortname(conn,
+ dentry->d_name.name,
+ file_info->FileName);
+ spin_unlock(&dentry->d_lock);
+ file_info->FileNameLength = cpu_to_le32(conv_len);
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_alt_name_info) + conv_len);
+ inc_rfc1001_len(rsp_org, le32_to_cpu(rsp->OutputBufferLength));
+}
+
+static void get_file_stream_info(struct ksmbd_work *work,
+ struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp,
+ void *rsp_org)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_file_stream_info *file_info;
+ char *stream_name, *xattr_list = NULL, *stream_buf;
+ struct kstat stat;
+ struct path *path = &fp->filp->f_path;
+ ssize_t xattr_list_len;
+ int nbytes = 0, streamlen, stream_name_len, next, idx = 0;
+
+ generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp),
+ &stat);
+ file_info = (struct smb2_file_stream_info *)rsp->Buffer;
+
+ xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list);
+ if (xattr_list_len < 0) {
+ goto out;
+ } else if (!xattr_list_len) {
+ ksmbd_debug(SMB, "empty xattr in the file\n");
+ goto out;
+ }
+
+ while (idx < xattr_list_len) {
+ stream_name = xattr_list + idx;
+ streamlen = strlen(stream_name);
+ idx += streamlen + 1;
+
+ ksmbd_debug(SMB, "%s, len %d\n", stream_name, streamlen);
+
+ if (strncmp(&stream_name[XATTR_USER_PREFIX_LEN],
+ STREAM_PREFIX, STREAM_PREFIX_LEN))
+ continue;
+
+ stream_name_len = streamlen - (XATTR_USER_PREFIX_LEN +
+ STREAM_PREFIX_LEN);
+ streamlen = stream_name_len;
+
+ /* plus : size */
+ streamlen += 1;
+ stream_buf = kmalloc(streamlen + 1, GFP_KERNEL);
+ if (!stream_buf)
+ break;
+
+ streamlen = snprintf(stream_buf, streamlen + 1,
+ ":%s", &stream_name[XATTR_NAME_STREAM_LEN]);
+
+ file_info = (struct smb2_file_stream_info *)&rsp->Buffer[nbytes];
+ streamlen = smbConvertToUTF16((__le16 *)file_info->StreamName,
+ stream_buf, streamlen,
+ conn->local_nls, 0);
+ streamlen *= 2;
+ kfree(stream_buf);
+ file_info->StreamNameLength = cpu_to_le32(streamlen);
+ file_info->StreamSize = cpu_to_le64(stream_name_len);
+ file_info->StreamAllocationSize = cpu_to_le64(stream_name_len);
+
+ next = sizeof(struct smb2_file_stream_info) + streamlen;
+ nbytes += next;
+ file_info->NextEntryOffset = cpu_to_le32(next);
+ }
+
+ if (nbytes) {
+ file_info = (struct smb2_file_stream_info *)
+ &rsp->Buffer[nbytes];
+ streamlen = smbConvertToUTF16((__le16 *)file_info->StreamName,
+ "::$DATA", 7, conn->local_nls, 0);
+ streamlen *= 2;
+ file_info->StreamNameLength = cpu_to_le32(streamlen);
+ file_info->StreamSize = S_ISDIR(stat.mode) ? 0 :
+ cpu_to_le64(stat.size);
+ file_info->StreamAllocationSize = S_ISDIR(stat.mode) ? 0 :
+ cpu_to_le64(stat.size);
+ nbytes += sizeof(struct smb2_file_stream_info) + streamlen;
+ }
+
+ /* last entry offset should be 0 */
+ file_info->NextEntryOffset = 0;
+out:
+ kvfree(xattr_list);
+
+ rsp->OutputBufferLength = cpu_to_le32(nbytes);
+ inc_rfc1001_len(rsp_org, nbytes);
+}
+
+static void get_file_internal_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_internal_info *file_info;
+ struct kstat stat;
+
+ generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp),
+ &stat);
+ file_info = (struct smb2_file_internal_info *)rsp->Buffer;
+ file_info->IndexNumber = cpu_to_le64(stat.ino);
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_internal_info));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_internal_info));
+}
+
+static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_ntwrk_info *file_info;
+ struct inode *inode;
+ struct kstat stat;
+ u64 time;
+
+ if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) {
+ pr_err("no right to read the attributes : 0x%x\n",
+ fp->daccess);
+ return -EACCES;
+ }
+
+ file_info = (struct smb2_file_ntwrk_info *)rsp->Buffer;
+
+ inode = file_inode(fp->filp);
+ generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat);
+
+ file_info->CreationTime = cpu_to_le64(fp->create_time);
+ time = ksmbd_UnixTimeToNT(stat.atime);
+ file_info->LastAccessTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(stat.mtime);
+ file_info->LastWriteTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(stat.ctime);
+ file_info->ChangeTime = cpu_to_le64(time);
+ file_info->Attributes = fp->f_ci->m_fattr;
+ file_info->AllocationSize =
+ cpu_to_le64(get_allocation_size(inode, &stat));
+ file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ file_info->Reserved = cpu_to_le32(0);
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_ntwrk_info));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_ntwrk_info));
+ return 0;
+}
+
+static void get_file_ea_info(struct smb2_query_info_rsp *rsp, void *rsp_org)
+{
+ struct smb2_file_ea_info *file_info;
+
+ file_info = (struct smb2_file_ea_info *)rsp->Buffer;
+ file_info->EASize = 0;
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_ea_info));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_ea_info));
+}
+
+static void get_file_position_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_pos_info *file_info;
+
+ file_info = (struct smb2_file_pos_info *)rsp->Buffer;
+ file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos);
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_pos_info));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_pos_info));
+}
+
+static void get_file_mode_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_mode_info *file_info;
+
+ file_info = (struct smb2_file_mode_info *)rsp->Buffer;
+ file_info->Mode = fp->coption & FILE_MODE_INFO_MASK;
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_mode_info));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_mode_info));
+}
+
+static void get_file_compression_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_comp_info *file_info;
+ struct kstat stat;
+
+ generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp),
+ &stat);
+
+ file_info = (struct smb2_file_comp_info *)rsp->Buffer;
+ file_info->CompressedFileSize = cpu_to_le64(stat.blocks << 9);
+ file_info->CompressionFormat = COMPRESSION_FORMAT_NONE;
+ file_info->CompressionUnitShift = 0;
+ file_info->ChunkShift = 0;
+ file_info->ClusterShift = 0;
+ memset(&file_info->Reserved[0], 0, 3);
+
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_comp_info));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_comp_info));
+}
+
+static int get_file_attribute_tag_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_attr_tag_info *file_info;
+
+ if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) {
+ pr_err("no right to read the attributes : 0x%x\n",
+ fp->daccess);
+ return -EACCES;
+ }
+
+ file_info = (struct smb2_file_attr_tag_info *)rsp->Buffer;
+ file_info->FileAttributes = fp->f_ci->m_fattr;
+ file_info->ReparseTag = 0;
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_attr_tag_info));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_attr_tag_info));
+ return 0;
+}
+
+static int find_file_posix_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb311_posix_qinfo *file_info;
+ struct inode *inode = file_inode(fp->filp);
+ u64 time;
+
+ file_info = (struct smb311_posix_qinfo *)rsp->Buffer;
+ file_info->CreationTime = cpu_to_le64(fp->create_time);
+ time = ksmbd_UnixTimeToNT(inode->i_atime);
+ file_info->LastAccessTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(inode->i_mtime);
+ file_info->LastWriteTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(inode->i_ctime);
+ file_info->ChangeTime = cpu_to_le64(time);
+ file_info->DosAttributes = fp->f_ci->m_fattr;
+ file_info->Inode = cpu_to_le64(inode->i_ino);
+ file_info->EndOfFile = cpu_to_le64(inode->i_size);
+ file_info->AllocationSize = cpu_to_le64(inode->i_blocks << 9);
+ file_info->HardLinks = cpu_to_le32(inode->i_nlink);
+ file_info->Mode = cpu_to_le32(inode->i_mode);
+ file_info->DeviceId = cpu_to_le32(inode->i_rdev);
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb311_posix_qinfo));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb311_posix_qinfo));
+ return 0;
+}
+
+static int smb2_get_info_file(struct ksmbd_work *work,
+ struct smb2_query_info_req *req,
+ struct smb2_query_info_rsp *rsp, void *rsp_org)
+{
+ struct ksmbd_file *fp;
+ int fileinfoclass = 0;
+ int rc = 0;
+ int file_infoclass_size;
+ unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_PIPE)) {
+ /* smb2 info file called for pipe */
+ return smb2_get_info_file_pipe(work->sess, req, rsp);
+ }
+
+ if (work->next_smb2_rcv_hdr_off) {
+ if (!has_file_id(le64_to_cpu(req->VolatileFileId))) {
+ ksmbd_debug(SMB, "Compound request set FID = %llu\n",
+ work->compound_fid);
+ id = work->compound_fid;
+ pid = work->compound_pfid;
+ }
+ }
+
+ if (!has_file_id(id)) {
+ id = le64_to_cpu(req->VolatileFileId);
+ pid = le64_to_cpu(req->PersistentFileId);
+ }
+
+ fp = ksmbd_lookup_fd_slow(work, id, pid);
+ if (!fp)
+ return -ENOENT;
+
+ fileinfoclass = req->FileInfoClass;
+
+ switch (fileinfoclass) {
+ case FILE_ACCESS_INFORMATION:
+ get_file_access_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_ACCESS_INFORMATION_SIZE;
+ break;
+
+ case FILE_BASIC_INFORMATION:
+ rc = get_file_basic_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_BASIC_INFORMATION_SIZE;
+ break;
+
+ case FILE_STANDARD_INFORMATION:
+ get_file_standard_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_STANDARD_INFORMATION_SIZE;
+ break;
+
+ case FILE_ALIGNMENT_INFORMATION:
+ get_file_alignment_info(rsp, rsp_org);
+ file_infoclass_size = FILE_ALIGNMENT_INFORMATION_SIZE;
+ break;
+
+ case FILE_ALL_INFORMATION:
+ rc = get_file_all_info(work, rsp, fp, rsp_org);
+ file_infoclass_size = FILE_ALL_INFORMATION_SIZE;
+ break;
+
+ case FILE_ALTERNATE_NAME_INFORMATION:
+ get_file_alternate_info(work, rsp, fp, rsp_org);
+ file_infoclass_size = FILE_ALTERNATE_NAME_INFORMATION_SIZE;
+ break;
+
+ case FILE_STREAM_INFORMATION:
+ get_file_stream_info(work, rsp, fp, rsp_org);
+ file_infoclass_size = FILE_STREAM_INFORMATION_SIZE;
+ break;
+
+ case FILE_INTERNAL_INFORMATION:
+ get_file_internal_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_INTERNAL_INFORMATION_SIZE;
+ break;
+
+ case FILE_NETWORK_OPEN_INFORMATION:
+ rc = get_file_network_open_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_NETWORK_OPEN_INFORMATION_SIZE;
+ break;
+
+ case FILE_EA_INFORMATION:
+ get_file_ea_info(rsp, rsp_org);
+ file_infoclass_size = FILE_EA_INFORMATION_SIZE;
+ break;
+
+ case FILE_FULL_EA_INFORMATION:
+ rc = smb2_get_ea(work, fp, req, rsp, rsp_org);
+ file_infoclass_size = FILE_FULL_EA_INFORMATION_SIZE;
+ break;
+
+ case FILE_POSITION_INFORMATION:
+ get_file_position_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_POSITION_INFORMATION_SIZE;
+ break;
+
+ case FILE_MODE_INFORMATION:
+ get_file_mode_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_MODE_INFORMATION_SIZE;
+ break;
+
+ case FILE_COMPRESSION_INFORMATION:
+ get_file_compression_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_COMPRESSION_INFORMATION_SIZE;
+ break;
+
+ case FILE_ATTRIBUTE_TAG_INFORMATION:
+ rc = get_file_attribute_tag_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_ATTRIBUTE_TAG_INFORMATION_SIZE;
+ break;
+ case SMB_FIND_FILE_POSIX_INFO:
+ if (!work->tcon->posix_extensions) {
+ pr_err("client doesn't negotiate with SMB3.1.1 POSIX Extensions\n");
+ rc = -EOPNOTSUPP;
+ } else {
+ rc = find_file_posix_info(rsp, fp, rsp_org);
+ file_infoclass_size = sizeof(struct smb311_posix_qinfo);
+ }
+ break;
+ default:
+ ksmbd_debug(SMB, "fileinfoclass %d not supported yet\n",
+ fileinfoclass);
+ rc = -EOPNOTSUPP;
+ }
+ if (!rc)
+ rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
+ rsp,
+ file_infoclass_size);
+ ksmbd_fd_put(work, fp);
+ return rc;
+}
+
+static int smb2_get_info_filesystem(struct ksmbd_work *work,
+ struct smb2_query_info_req *req,
+ struct smb2_query_info_rsp *rsp, void *rsp_org)
+{
+ struct ksmbd_session *sess = work->sess;
+ struct ksmbd_conn *conn = sess->conn;
+ struct ksmbd_share_config *share = work->tcon->share_conf;
+ int fsinfoclass = 0;
+ struct kstatfs stfs;
+ struct path path;
+ int rc = 0, len;
+ int fs_infoclass_size = 0;
+ int lookup_flags = 0;
+
+ if (test_share_config_flag(share, KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS))
+ lookup_flags = LOOKUP_FOLLOW;
+
+ rc = ksmbd_vfs_kern_path(share->path, lookup_flags, &path, 0);
+ if (rc) {
+ pr_err("cannot create vfs path\n");
+ return -EIO;
+ }
+
+ rc = vfs_statfs(&path, &stfs);
+ if (rc) {
+ pr_err("cannot do stat of path %s\n", share->path);
+ path_put(&path);
+ return -EIO;
+ }
+
+ fsinfoclass = req->FileInfoClass;
+
+ switch (fsinfoclass) {
+ case FS_DEVICE_INFORMATION:
+ {
+ struct filesystem_device_info *info;
+
+ info = (struct filesystem_device_info *)rsp->Buffer;
+
+ info->DeviceType = cpu_to_le32(stfs.f_type);
+ info->DeviceCharacteristics = cpu_to_le32(0x00000020);
+ rsp->OutputBufferLength = cpu_to_le32(8);
+ inc_rfc1001_len(rsp_org, 8);
+ fs_infoclass_size = FS_DEVICE_INFORMATION_SIZE;
+ break;
+ }
+ case FS_ATTRIBUTE_INFORMATION:
+ {
+ struct filesystem_attribute_info *info;
+ size_t sz;
+
+ info = (struct filesystem_attribute_info *)rsp->Buffer;
+ info->Attributes = cpu_to_le32(FILE_SUPPORTS_OBJECT_IDS |
+ FILE_PERSISTENT_ACLS |
+ FILE_UNICODE_ON_DISK |
+ FILE_CASE_PRESERVED_NAMES |
+ FILE_CASE_SENSITIVE_SEARCH |
+ FILE_SUPPORTS_BLOCK_REFCOUNTING);
+
+ info->Attributes |= cpu_to_le32(server_conf.share_fake_fscaps);
+
+ info->MaxPathNameComponentLength = cpu_to_le32(stfs.f_namelen);
+ len = smbConvertToUTF16((__le16 *)info->FileSystemName,
+ "NTFS", PATH_MAX, conn->local_nls, 0);
+ len = len * 2;
+ info->FileSystemNameLen = cpu_to_le32(len);
+ sz = sizeof(struct filesystem_attribute_info) - 2 + len;
+ rsp->OutputBufferLength = cpu_to_le32(sz);
+ inc_rfc1001_len(rsp_org, sz);
+ fs_infoclass_size = FS_ATTRIBUTE_INFORMATION_SIZE;
+ break;
+ }
+ case FS_VOLUME_INFORMATION:
+ {
+ struct filesystem_vol_info *info;
+ size_t sz;
+
+ info = (struct filesystem_vol_info *)(rsp->Buffer);
+ info->VolumeCreationTime = 0;
+ /* Taking dummy value of serial number*/
+ info->SerialNumber = cpu_to_le32(0xbc3ac512);
+ len = smbConvertToUTF16((__le16 *)info->VolumeLabel,
+ share->name, PATH_MAX,
+ conn->local_nls, 0);
+ len = len * 2;
+ info->VolumeLabelSize = cpu_to_le32(len);
+ info->Reserved = 0;
+ sz = sizeof(struct filesystem_vol_info) - 2 + len;
+ rsp->OutputBufferLength = cpu_to_le32(sz);
+ inc_rfc1001_len(rsp_org, sz);
+ fs_infoclass_size = FS_VOLUME_INFORMATION_SIZE;
+ break;
+ }
+ case FS_SIZE_INFORMATION:
+ {
+ struct filesystem_info *info;
+
+ info = (struct filesystem_info *)(rsp->Buffer);
+ info->TotalAllocationUnits = cpu_to_le64(stfs.f_blocks);
+ info->FreeAllocationUnits = cpu_to_le64(stfs.f_bfree);
+ info->SectorsPerAllocationUnit = cpu_to_le32(1);
+ info->BytesPerSector = cpu_to_le32(stfs.f_bsize);
+ rsp->OutputBufferLength = cpu_to_le32(24);
+ inc_rfc1001_len(rsp_org, 24);
+ fs_infoclass_size = FS_SIZE_INFORMATION_SIZE;
+ break;
+ }
+ case FS_FULL_SIZE_INFORMATION:
+ {
+ struct smb2_fs_full_size_info *info;
+
+ info = (struct smb2_fs_full_size_info *)(rsp->Buffer);
+ info->TotalAllocationUnits = cpu_to_le64(stfs.f_blocks);
+ info->CallerAvailableAllocationUnits =
+ cpu_to_le64(stfs.f_bavail);
+ info->ActualAvailableAllocationUnits =
+ cpu_to_le64(stfs.f_bfree);
+ info->SectorsPerAllocationUnit = cpu_to_le32(1);
+ info->BytesPerSector = cpu_to_le32(stfs.f_bsize);
+ rsp->OutputBufferLength = cpu_to_le32(32);
+ inc_rfc1001_len(rsp_org, 32);
+ fs_infoclass_size = FS_FULL_SIZE_INFORMATION_SIZE;
+ break;
+ }
+ case FS_OBJECT_ID_INFORMATION:
+ {
+ struct object_id_info *info;
+
+ info = (struct object_id_info *)(rsp->Buffer);
+
+ if (!user_guest(sess->user))
+ memcpy(info->objid, user_passkey(sess->user), 16);
+ else
+ memset(info->objid, 0, 16);
+
+ info->extended_info.magic = cpu_to_le32(EXTENDED_INFO_MAGIC);
+ info->extended_info.version = cpu_to_le32(1);
+ info->extended_info.release = cpu_to_le32(1);
+ info->extended_info.rel_date = 0;
+ memcpy(info->extended_info.version_string, "1.1.0", strlen("1.1.0"));
+ rsp->OutputBufferLength = cpu_to_le32(64);
+ inc_rfc1001_len(rsp_org, 64);
+ fs_infoclass_size = FS_OBJECT_ID_INFORMATION_SIZE;
+ break;
+ }
+ case FS_SECTOR_SIZE_INFORMATION:
+ {
+ struct smb3_fs_ss_info *info;
+
+ info = (struct smb3_fs_ss_info *)(rsp->Buffer);
+
+ info->LogicalBytesPerSector = cpu_to_le32(stfs.f_bsize);
+ info->PhysicalBytesPerSectorForAtomicity =
+ cpu_to_le32(stfs.f_bsize);
+ info->PhysicalBytesPerSectorForPerf = cpu_to_le32(stfs.f_bsize);
+ info->FSEffPhysicalBytesPerSectorForAtomicity =
+ cpu_to_le32(stfs.f_bsize);
+ info->Flags = cpu_to_le32(SSINFO_FLAGS_ALIGNED_DEVICE |
+ SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE);
+ info->ByteOffsetForSectorAlignment = 0;
+ info->ByteOffsetForPartitionAlignment = 0;
+ rsp->OutputBufferLength = cpu_to_le32(28);
+ inc_rfc1001_len(rsp_org, 28);
+ fs_infoclass_size = FS_SECTOR_SIZE_INFORMATION_SIZE;
+ break;
+ }
+ case FS_CONTROL_INFORMATION:
+ {
+ /*
+ * TODO : The current implementation is based on
+ * test result with win7(NTFS) server. It's need to
+ * modify this to get valid Quota values
+ * from Linux kernel
+ */
+ struct smb2_fs_control_info *info;
+
+ info = (struct smb2_fs_control_info *)(rsp->Buffer);
+ info->FreeSpaceStartFiltering = 0;
+ info->FreeSpaceThreshold = 0;
+ info->FreeSpaceStopFiltering = 0;
+ info->DefaultQuotaThreshold = cpu_to_le64(SMB2_NO_FID);
+ info->DefaultQuotaLimit = cpu_to_le64(SMB2_NO_FID);
+ info->Padding = 0;
+ rsp->OutputBufferLength = cpu_to_le32(48);
+ inc_rfc1001_len(rsp_org, 48);
+ fs_infoclass_size = FS_CONTROL_INFORMATION_SIZE;
+ break;
+ }
+ case FS_POSIX_INFORMATION:
+ {
+ struct filesystem_posix_info *info;
+
+ if (!work->tcon->posix_extensions) {
+ pr_err("client doesn't negotiate with SMB3.1.1 POSIX Extensions\n");
+ rc = -EOPNOTSUPP;
+ } else {
+ info = (struct filesystem_posix_info *)(rsp->Buffer);
+ info->OptimalTransferSize = cpu_to_le32(stfs.f_bsize);
+ info->BlockSize = cpu_to_le32(stfs.f_bsize);
+ info->TotalBlocks = cpu_to_le64(stfs.f_blocks);
+ info->BlocksAvail = cpu_to_le64(stfs.f_bfree);
+ info->UserBlocksAvail = cpu_to_le64(stfs.f_bavail);
+ info->TotalFileNodes = cpu_to_le64(stfs.f_files);
+ info->FreeFileNodes = cpu_to_le64(stfs.f_ffree);
+ rsp->OutputBufferLength = cpu_to_le32(56);
+ inc_rfc1001_len(rsp_org, 56);
+ fs_infoclass_size = FS_POSIX_INFORMATION_SIZE;
+ }
+ break;
+ }
+ default:
+ path_put(&path);
+ return -EOPNOTSUPP;
+ }
+ rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
+ rsp,
+ fs_infoclass_size);
+ path_put(&path);
+ return rc;
+}
+
+static int smb2_get_info_sec(struct ksmbd_work *work,
+ struct smb2_query_info_req *req,
+ struct smb2_query_info_rsp *rsp, void *rsp_org)
+{
+ struct ksmbd_file *fp;
+ struct user_namespace *user_ns;
+ struct smb_ntsd *pntsd = (struct smb_ntsd *)rsp->Buffer, *ppntsd = NULL;
+ struct smb_fattr fattr = {{0}};
+ struct inode *inode;
+ __u32 secdesclen;
+ unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
+ int addition_info = le32_to_cpu(req->AdditionalInformation);
+ int rc;
+
+ if (addition_info & ~(OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO |
+ PROTECTED_DACL_SECINFO |
+ UNPROTECTED_DACL_SECINFO)) {
+ pr_err("Unsupported addition info: 0x%x)\n",
+ addition_info);
+
+ pntsd->revision = cpu_to_le16(1);
+ pntsd->type = cpu_to_le16(SELF_RELATIVE | DACL_PROTECTED);
+ pntsd->osidoffset = 0;
+ pntsd->gsidoffset = 0;
+ pntsd->sacloffset = 0;
+ pntsd->dacloffset = 0;
+
+ secdesclen = sizeof(struct smb_ntsd);
+ rsp->OutputBufferLength = cpu_to_le32(secdesclen);
+ inc_rfc1001_len(rsp_org, secdesclen);
+
+ return 0;
+ }
+
+ if (work->next_smb2_rcv_hdr_off) {
+ if (!has_file_id(le64_to_cpu(req->VolatileFileId))) {
+ ksmbd_debug(SMB, "Compound request set FID = %llu\n",
+ work->compound_fid);
+ id = work->compound_fid;
+ pid = work->compound_pfid;
+ }
+ }
+
+ if (!has_file_id(id)) {
+ id = le64_to_cpu(req->VolatileFileId);
+ pid = le64_to_cpu(req->PersistentFileId);
+ }
+
+ fp = ksmbd_lookup_fd_slow(work, id, pid);
+ if (!fp)
+ return -ENOENT;
+
+ user_ns = file_mnt_user_ns(fp->filp);
+ inode = file_inode(fp->filp);
+ ksmbd_acls_fattr(&fattr, inode);
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_ACL_XATTR))
+ ksmbd_vfs_get_sd_xattr(work->conn, user_ns,
+ fp->filp->f_path.dentry, &ppntsd);
+
+ rc = build_sec_desc(user_ns, pntsd, ppntsd, addition_info,
+ &secdesclen, &fattr);
+ posix_acl_release(fattr.cf_acls);
+ posix_acl_release(fattr.cf_dacls);
+ kfree(ppntsd);
+ ksmbd_fd_put(work, fp);
+ if (rc)
+ return rc;
+
+ rsp->OutputBufferLength = cpu_to_le32(secdesclen);
+ inc_rfc1001_len(rsp_org, secdesclen);
+ return 0;
+}
+
+/**
+ * smb2_query_info() - handler for smb2 query info command
+ * @work: smb work containing query info request buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_query_info(struct ksmbd_work *work)
+{
+ struct smb2_query_info_req *req;
+ struct smb2_query_info_rsp *rsp, *rsp_org;
+ int rc = 0;
+
+ rsp_org = work->response_buf;
+ WORK_BUFFERS(work, req, rsp);
+
+ ksmbd_debug(SMB, "GOT query info request\n");
+
+ switch (req->InfoType) {
+ case SMB2_O_INFO_FILE:
+ ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILE\n");
+ rc = smb2_get_info_file(work, req, rsp, (void *)rsp_org);
+ break;
+ case SMB2_O_INFO_FILESYSTEM:
+ ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILESYSTEM\n");
+ rc = smb2_get_info_filesystem(work, req, rsp, (void *)rsp_org);
+ break;
+ case SMB2_O_INFO_SECURITY:
+ ksmbd_debug(SMB, "GOT SMB2_O_INFO_SECURITY\n");
+ rc = smb2_get_info_sec(work, req, rsp, (void *)rsp_org);
+ break;
+ default:
+ ksmbd_debug(SMB, "InfoType %d not supported yet\n",
+ req->InfoType);
+ rc = -EOPNOTSUPP;
+ }
+
+ if (rc < 0) {
+ if (rc == -EACCES)
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ else if (rc == -ENOENT)
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ else if (rc == -EIO)
+ rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
+ else if (rc == -EOPNOTSUPP || rsp->hdr.Status == 0)
+ rsp->hdr.Status = STATUS_INVALID_INFO_CLASS;
+ smb2_set_err_rsp(work);
+
+ ksmbd_debug(SMB, "error while processing smb2 query rc = %d\n",
+ rc);
+ return rc;
+ }
+ rsp->StructureSize = cpu_to_le16(9);
+ rsp->OutputBufferOffset = cpu_to_le16(72);
+ inc_rfc1001_len(rsp_org, 8);
+ return 0;
+}
+
+/**
+ * smb2_close_pipe() - handler for closing IPC pipe
+ * @work: smb work containing close request buffer
+ *
+ * Return: 0
+ */
+static noinline int smb2_close_pipe(struct ksmbd_work *work)
+{
+ u64 id;
+ struct smb2_close_req *req = work->request_buf;
+ struct smb2_close_rsp *rsp = work->response_buf;
+
+ id = le64_to_cpu(req->VolatileFileId);
+ ksmbd_session_rpc_close(work->sess, id);
+
+ rsp->StructureSize = cpu_to_le16(60);
+ rsp->Flags = 0;
+ rsp->Reserved = 0;
+ rsp->CreationTime = 0;
+ rsp->LastAccessTime = 0;
+ rsp->LastWriteTime = 0;
+ rsp->ChangeTime = 0;
+ rsp->AllocationSize = 0;
+ rsp->EndOfFile = 0;
+ rsp->Attributes = 0;
+ inc_rfc1001_len(rsp, 60);
+ return 0;
+}
+
+/**
+ * smb2_close() - handler for smb2 close file command
+ * @work: smb work containing close request buffer
+ *
+ * Return: 0
+ */
+int smb2_close(struct ksmbd_work *work)
+{
+ u64 volatile_id = KSMBD_NO_FID;
+ u64 sess_id;
+ struct smb2_close_req *req;
+ struct smb2_close_rsp *rsp;
+ struct smb2_close_rsp *rsp_org;
+ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_file *fp;
+ struct inode *inode;
+ u64 time;
+ int err = 0;
+
+ rsp_org = work->response_buf;
+ WORK_BUFFERS(work, req, rsp);
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_PIPE)) {
+ ksmbd_debug(SMB, "IPC pipe close request\n");
+ return smb2_close_pipe(work);
+ }
+
+ sess_id = le64_to_cpu(req->hdr.SessionId);
+ if (req->hdr.Flags & SMB2_FLAGS_RELATED_OPERATIONS)
+ sess_id = work->compound_sid;
+
+ work->compound_sid = 0;
+ if (check_session_id(conn, sess_id)) {
+ work->compound_sid = sess_id;
+ } else {
+ rsp->hdr.Status = STATUS_USER_SESSION_DELETED;
+ if (req->hdr.Flags & SMB2_FLAGS_RELATED_OPERATIONS)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ err = -EBADF;
+ goto out;
+ }
+
+ if (work->next_smb2_rcv_hdr_off &&
+ !has_file_id(le64_to_cpu(req->VolatileFileId))) {
+ if (!has_file_id(work->compound_fid)) {
+ /* file already closed, return FILE_CLOSED */
+ ksmbd_debug(SMB, "file already closed\n");
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ err = -EBADF;
+ goto out;
+ } else {
+ ksmbd_debug(SMB,
+ "Compound request set FID = %llu:%llu\n",
+ work->compound_fid,
+ work->compound_pfid);
+ volatile_id = work->compound_fid;
+
+ /* file closed, stored id is not valid anymore */
+ work->compound_fid = KSMBD_NO_FID;
+ work->compound_pfid = KSMBD_NO_FID;
+ }
+ } else {
+ volatile_id = le64_to_cpu(req->VolatileFileId);
+ }
+ ksmbd_debug(SMB, "volatile_id = %llu\n", volatile_id);
+
+ rsp->StructureSize = cpu_to_le16(60);
+ rsp->Reserved = 0;
+
+ if (req->Flags == SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB) {
+ fp = ksmbd_lookup_fd_fast(work, volatile_id);
+ if (!fp) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ inode = file_inode(fp->filp);
+ rsp->Flags = SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB;
+ rsp->AllocationSize = S_ISDIR(inode->i_mode) ? 0 :
+ cpu_to_le64(inode->i_blocks << 9);
+ rsp->EndOfFile = cpu_to_le64(inode->i_size);
+ rsp->Attributes = fp->f_ci->m_fattr;
+ rsp->CreationTime = cpu_to_le64(fp->create_time);
+ time = ksmbd_UnixTimeToNT(inode->i_atime);
+ rsp->LastAccessTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(inode->i_mtime);
+ rsp->LastWriteTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(inode->i_ctime);
+ rsp->ChangeTime = cpu_to_le64(time);
+ ksmbd_fd_put(work, fp);
+ } else {
+ rsp->Flags = 0;
+ rsp->AllocationSize = 0;
+ rsp->EndOfFile = 0;
+ rsp->Attributes = 0;
+ rsp->CreationTime = 0;
+ rsp->LastAccessTime = 0;
+ rsp->LastWriteTime = 0;
+ rsp->ChangeTime = 0;
+ }
+
+ err = ksmbd_close_fd(work, volatile_id);
+out:
+ if (err) {
+ if (rsp->hdr.Status == 0)
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ smb2_set_err_rsp(work);
+ } else {
+ inc_rfc1001_len(rsp_org, 60);
+ }
+
+ return 0;
+}
+
+/**
+ * smb2_echo() - handler for smb2 echo(ping) command
+ * @work: smb work containing echo request buffer
+ *
+ * Return: 0
+ */
+int smb2_echo(struct ksmbd_work *work)
+{
+ struct smb2_echo_rsp *rsp = work->response_buf;
+
+ rsp->StructureSize = cpu_to_le16(4);
+ rsp->Reserved = 0;
+ inc_rfc1001_len(rsp, 4);
+ return 0;
+}
+
+static int smb2_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
+ struct smb2_file_rename_info *file_info,
+ struct nls_table *local_nls)
+{
+ struct ksmbd_share_config *share = fp->tcon->share_conf;
+ char *new_name = NULL, *abs_oldname = NULL, *old_name = NULL;
+ char *pathname = NULL;
+ struct path path;
+ bool file_present = true;
+ int rc;
+
+ ksmbd_debug(SMB, "setting FILE_RENAME_INFO\n");
+ pathname = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!pathname)
+ return -ENOMEM;
+
+ abs_oldname = d_path(&fp->filp->f_path, pathname, PATH_MAX);
+ if (IS_ERR(abs_oldname)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ old_name = strrchr(abs_oldname, '/');
+ if (old_name && old_name[1] != '\0') {
+ old_name++;
+ } else {
+ ksmbd_debug(SMB, "can't get last component in path %s\n",
+ abs_oldname);
+ rc = -ENOENT;
+ goto out;
+ }
+
+ new_name = smb2_get_name(share,
+ file_info->FileName,
+ le32_to_cpu(file_info->FileNameLength),
+ local_nls);
+ if (IS_ERR(new_name)) {
+ rc = PTR_ERR(new_name);
+ goto out;
+ }
+
+ if (strchr(new_name, ':')) {
+ int s_type;
+ char *xattr_stream_name, *stream_name = NULL;
+ size_t xattr_stream_size;
+ int len;
+
+ rc = parse_stream_name(new_name, &stream_name, &s_type);
+ if (rc < 0)
+ goto out;
+
+ len = strlen(new_name);
+ if (new_name[len - 1] != '/') {
+ pr_err("not allow base filename in rename\n");
+ rc = -ESHARE;
+ goto out;
+ }
+
+ rc = ksmbd_vfs_xattr_stream_name(stream_name,
+ &xattr_stream_name,
+ &xattr_stream_size,
+ s_type);
+ if (rc)
+ goto out;
+
+ rc = ksmbd_vfs_setxattr(file_mnt_user_ns(fp->filp),
+ fp->filp->f_path.dentry,
+ xattr_stream_name,
+ NULL, 0, 0);
+ if (rc < 0) {
+ pr_err("failed to store stream name in xattr: %d\n",
+ rc);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ goto out;
+ }
+
+ ksmbd_debug(SMB, "new name %s\n", new_name);
+ rc = ksmbd_vfs_kern_path(new_name, 0, &path, 1);
+ if (rc)
+ file_present = false;
+ else
+ path_put(&path);
+
+ if (ksmbd_share_veto_filename(share, new_name)) {
+ rc = -ENOENT;
+ ksmbd_debug(SMB, "Can't rename vetoed file: %s\n", new_name);
+ goto out;
+ }
+
+ if (file_info->ReplaceIfExists) {
+ if (file_present) {
+ rc = ksmbd_vfs_remove_file(work, new_name);
+ if (rc) {
+ if (rc != -ENOTEMPTY)
+ rc = -EINVAL;
+ ksmbd_debug(SMB, "cannot delete %s, rc %d\n",
+ new_name, rc);
+ goto out;
+ }
+ }
+ } else {
+ if (file_present &&
+ strncmp(old_name, path.dentry->d_name.name, strlen(old_name))) {
+ rc = -EEXIST;
+ ksmbd_debug(SMB,
+ "cannot rename already existing file\n");
+ goto out;
+ }
+ }
+
+ rc = ksmbd_vfs_fp_rename(work, fp, new_name);
+out:
+ kfree(pathname);
+ if (!IS_ERR(new_name))
+ kfree(new_name);
+ return rc;
+}
+
+static int smb2_create_link(struct ksmbd_work *work,
+ struct ksmbd_share_config *share,
+ struct smb2_file_link_info *file_info,
+ struct file *filp,
+ struct nls_table *local_nls)
+{
+ char *link_name = NULL, *target_name = NULL, *pathname = NULL;
+ struct path path;
+ bool file_present = true;
+ int rc;
+
+ ksmbd_debug(SMB, "setting FILE_LINK_INFORMATION\n");
+ pathname = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!pathname)
+ return -ENOMEM;
+
+ link_name = smb2_get_name(share,
+ file_info->FileName,
+ le32_to_cpu(file_info->FileNameLength),
+ local_nls);
+ if (IS_ERR(link_name) || S_ISDIR(file_inode(filp)->i_mode)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ ksmbd_debug(SMB, "link name is %s\n", link_name);
+ target_name = d_path(&filp->f_path, pathname, PATH_MAX);
+ if (IS_ERR(target_name)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ ksmbd_debug(SMB, "target name is %s\n", target_name);
+ rc = ksmbd_vfs_kern_path(link_name, 0, &path, 0);
+ if (rc)
+ file_present = false;
+ else
+ path_put(&path);
+
+ if (file_info->ReplaceIfExists) {
+ if (file_present) {
+ rc = ksmbd_vfs_remove_file(work, link_name);
+ if (rc) {
+ rc = -EINVAL;
+ ksmbd_debug(SMB, "cannot delete %s\n",
+ link_name);
+ goto out;
+ }
+ }
+ } else {
+ if (file_present) {
+ rc = -EEXIST;
+ ksmbd_debug(SMB, "link already exists\n");
+ goto out;
+ }
+ }
+
+ rc = ksmbd_vfs_link(work, target_name, link_name);
+ if (rc)
+ rc = -EINVAL;
+out:
+ if (!IS_ERR(link_name))
+ kfree(link_name);
+ kfree(pathname);
+ return rc;
+}
+
+static int set_file_basic_info(struct ksmbd_file *fp, char *buf,
+ struct ksmbd_share_config *share)
+{
+ struct smb2_file_all_info *file_info;
+ struct iattr attrs;
+ struct iattr temp_attrs;
+ struct file *filp;
+ struct inode *inode;
+ struct user_namespace *user_ns;
+ int rc;
+
+ if (!(fp->daccess & FILE_WRITE_ATTRIBUTES_LE))
+ return -EACCES;
+
+ file_info = (struct smb2_file_all_info *)buf;
+ attrs.ia_valid = 0;
+ filp = fp->filp;
+ inode = file_inode(filp);
+ user_ns = file_mnt_user_ns(filp);
+
+ if (file_info->CreationTime)
+ fp->create_time = le64_to_cpu(file_info->CreationTime);
+
+ if (file_info->LastAccessTime) {
+ attrs.ia_atime = ksmbd_NTtimeToUnix(file_info->LastAccessTime);
+ attrs.ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
+ }
+
+ if (file_info->ChangeTime) {
+ temp_attrs.ia_ctime = ksmbd_NTtimeToUnix(file_info->ChangeTime);
+ attrs.ia_ctime = temp_attrs.ia_ctime;
+ attrs.ia_valid |= ATTR_CTIME;
+ } else {
+ temp_attrs.ia_ctime = inode->i_ctime;
+ }
+
+ if (file_info->LastWriteTime) {
+ attrs.ia_mtime = ksmbd_NTtimeToUnix(file_info->LastWriteTime);
+ attrs.ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
+ }
+
+ if (file_info->Attributes) {
+ if (!S_ISDIR(inode->i_mode) &&
+ file_info->Attributes & ATTR_DIRECTORY_LE) {
+ pr_err("can't change a file to a directory\n");
+ return -EINVAL;
+ }
+
+ if (!(S_ISDIR(inode->i_mode) && file_info->Attributes == ATTR_NORMAL_LE))
+ fp->f_ci->m_fattr = file_info->Attributes |
+ (fp->f_ci->m_fattr & ATTR_DIRECTORY_LE);
+ }
+
+ if (test_share_config_flag(share, KSMBD_SHARE_FLAG_STORE_DOS_ATTRS) &&
+ (file_info->CreationTime || file_info->Attributes)) {
+ struct xattr_dos_attrib da = {0};
+
+ da.version = 4;
+ da.itime = fp->itime;
+ da.create_time = fp->create_time;
+ da.attr = le32_to_cpu(fp->f_ci->m_fattr);
+ da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
+ XATTR_DOSINFO_ITIME;
+
+ rc = ksmbd_vfs_set_dos_attrib_xattr(user_ns,
+ filp->f_path.dentry, &da);
+ if (rc)
+ ksmbd_debug(SMB,
+ "failed to restore file attribute in EA\n");
+ rc = 0;
+ }
+
+ /*
+ * HACK : set ctime here to avoid ctime changed
+ * when file_info->ChangeTime is zero.
+ */
+ attrs.ia_ctime = temp_attrs.ia_ctime;
+ attrs.ia_valid |= ATTR_CTIME;
+
+ if (attrs.ia_valid) {
+ struct dentry *dentry = filp->f_path.dentry;
+ struct inode *inode = d_inode(dentry);
+
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ return -EACCES;
+
+ rc = setattr_prepare(user_ns, dentry, &attrs);
+ if (rc)
+ return -EINVAL;
+
+ inode_lock(inode);
+ setattr_copy(user_ns, inode, &attrs);
+ attrs.ia_valid &= ~ATTR_CTIME;
+ rc = notify_change(user_ns, dentry, &attrs, NULL);
+ inode_unlock(inode);
+ }
+ return 0;
+}
+
+static int set_file_allocation_info(struct ksmbd_work *work,
+ struct ksmbd_file *fp, char *buf)
+{
+ /*
+ * TODO : It's working fine only when store dos attributes
+ * is not yes. need to implement a logic which works
+ * properly with any smb.conf option
+ */
+
+ struct smb2_file_alloc_info *file_alloc_info;
+ loff_t alloc_blks;
+ struct inode *inode;
+ int rc;
+
+ if (!(fp->daccess & FILE_WRITE_DATA_LE))
+ return -EACCES;
+
+ file_alloc_info = (struct smb2_file_alloc_info *)buf;
+ alloc_blks = (le64_to_cpu(file_alloc_info->AllocationSize) + 511) >> 9;
+ inode = file_inode(fp->filp);
+
+ if (alloc_blks > inode->i_blocks) {
+ smb_break_all_levII_oplock(work, fp, 1);
+ rc = vfs_fallocate(fp->filp, FALLOC_FL_KEEP_SIZE, 0,
+ alloc_blks * 512);
+ if (rc && rc != -EOPNOTSUPP) {
+ pr_err("vfs_fallocate is failed : %d\n", rc);
+ return rc;
+ }
+ } else if (alloc_blks < inode->i_blocks) {
+ loff_t size;
+
+ /*
+ * Allocation size could be smaller than original one
+ * which means allocated blocks in file should be
+ * deallocated. use truncate to cut out it, but inode
+ * size is also updated with truncate offset.
+ * inode size is retained by backup inode size.
+ */
+ size = i_size_read(inode);
+ rc = ksmbd_vfs_truncate(work, NULL, fp, alloc_blks * 512);
+ if (rc) {
+ pr_err("truncate failed! filename : %s, err %d\n",
+ fp->filename, rc);
+ return rc;
+ }
+ if (size < alloc_blks * 512)
+ i_size_write(inode, size);
+ }
+ return 0;
+}
+
+static int set_end_of_file_info(struct ksmbd_work *work, struct ksmbd_file *fp,
+ char *buf)
+{
+ struct smb2_file_eof_info *file_eof_info;
+ loff_t newsize;
+ struct inode *inode;
+ int rc;
+
+ if (!(fp->daccess & FILE_WRITE_DATA_LE))
+ return -EACCES;
+
+ file_eof_info = (struct smb2_file_eof_info *)buf;
+ newsize = le64_to_cpu(file_eof_info->EndOfFile);
+ inode = file_inode(fp->filp);
+
+ /*
+ * If FILE_END_OF_FILE_INFORMATION of set_info_file is called
+ * on FAT32 shared device, truncate execution time is too long
+ * and network error could cause from windows client. because
+ * truncate of some filesystem like FAT32 fill zero data in
+ * truncated range.
+ */
+ if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC) {
+ ksmbd_debug(SMB, "filename : %s truncated to newsize %lld\n",
+ fp->filename, newsize);
+ rc = ksmbd_vfs_truncate(work, NULL, fp, newsize);
+ if (rc) {
+ ksmbd_debug(SMB, "truncate failed! filename : %s err %d\n",
+ fp->filename, rc);
+ if (rc != -EAGAIN)
+ rc = -EBADF;
+ return rc;
+ }
+ }
+ return 0;
+}
+
+static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
+ char *buf)
+{
+ struct ksmbd_file *parent_fp;
+ struct dentry *parent;
+ struct dentry *dentry = fp->filp->f_path.dentry;
+ int ret;
+
+ if (!(fp->daccess & FILE_DELETE_LE)) {
+ pr_err("no right to delete : 0x%x\n", fp->daccess);
+ return -EACCES;
+ }
+
+ if (ksmbd_stream_fd(fp))
+ goto next;
+
+ parent = dget_parent(dentry);
+ ret = ksmbd_vfs_lock_parent(parent, dentry);
+ if (ret) {
+ dput(parent);
+ return ret;
+ }
+
+ parent_fp = ksmbd_lookup_fd_inode(d_inode(parent));
+ inode_unlock(d_inode(parent));
+ dput(parent);
+
+ if (parent_fp) {
+ if (parent_fp->daccess & FILE_DELETE_LE) {
+ pr_err("parent dir is opened with delete access\n");
+ return -ESHARE;
+ }
+ }
+next:
+ return smb2_rename(work, fp,
+ (struct smb2_file_rename_info *)buf,
+ work->sess->conn->local_nls);
+}
+
+static int set_file_disposition_info(struct ksmbd_file *fp, char *buf)
+{
+ struct smb2_file_disposition_info *file_info;
+ struct inode *inode;
+
+ if (!(fp->daccess & FILE_DELETE_LE)) {
+ pr_err("no right to delete : 0x%x\n", fp->daccess);
+ return -EACCES;
+ }
+
+ inode = file_inode(fp->filp);
+ file_info = (struct smb2_file_disposition_info *)buf;
+ if (file_info->DeletePending) {
+ if (S_ISDIR(inode->i_mode) &&
+ ksmbd_vfs_empty_dir(fp) == -ENOTEMPTY)
+ return -EBUSY;
+ ksmbd_set_inode_pending_delete(fp);
+ } else {
+ ksmbd_clear_inode_pending_delete(fp);
+ }
+ return 0;
+}
+
+static int set_file_position_info(struct ksmbd_file *fp, char *buf)
+{
+ struct smb2_file_pos_info *file_info;
+ loff_t current_byte_offset;
+ unsigned long sector_size;
+ struct inode *inode;
+
+ inode = file_inode(fp->filp);
+ file_info = (struct smb2_file_pos_info *)buf;
+ current_byte_offset = le64_to_cpu(file_info->CurrentByteOffset);
+ sector_size = inode->i_sb->s_blocksize;
+
+ if (current_byte_offset < 0 ||
+ (fp->coption == FILE_NO_INTERMEDIATE_BUFFERING_LE &&
+ current_byte_offset & (sector_size - 1))) {
+ pr_err("CurrentByteOffset is not valid : %llu\n",
+ current_byte_offset);
+ return -EINVAL;
+ }
+
+ fp->filp->f_pos = current_byte_offset;
+ return 0;
+}
+
+static int set_file_mode_info(struct ksmbd_file *fp, char *buf)
+{
+ struct smb2_file_mode_info *file_info;
+ __le32 mode;
+
+ file_info = (struct smb2_file_mode_info *)buf;
+ mode = file_info->Mode;
+
+ if ((mode & ~FILE_MODE_INFO_MASK) ||
+ (mode & FILE_SYNCHRONOUS_IO_ALERT_LE &&
+ mode & FILE_SYNCHRONOUS_IO_NONALERT_LE)) {
+ pr_err("Mode is not valid : 0x%x\n", le32_to_cpu(mode));
+ return -EINVAL;
+ }
+
+ /*
+ * TODO : need to implement consideration for
+ * FILE_SYNCHRONOUS_IO_ALERT and FILE_SYNCHRONOUS_IO_NONALERT
+ */
+ ksmbd_vfs_set_fadvise(fp->filp, mode);
+ fp->coption = mode;
+ return 0;
+}
+
+/**
+ * smb2_set_info_file() - handler for smb2 set info command
+ * @work: smb work containing set info command buffer
+ * @fp: ksmbd_file pointer
+ * @info_class: smb2 set info class
+ * @share: ksmbd_share_config pointer
+ *
+ * Return: 0 on success, otherwise error
+ * TODO: need to implement an error handling for STATUS_INFO_LENGTH_MISMATCH
+ */
+static int smb2_set_info_file(struct ksmbd_work *work, struct ksmbd_file *fp,
+ int info_class, char *buf,
+ struct ksmbd_share_config *share)
+{
+ switch (info_class) {
+ case FILE_BASIC_INFORMATION:
+ return set_file_basic_info(fp, buf, share);
+
+ case FILE_ALLOCATION_INFORMATION:
+ return set_file_allocation_info(work, fp, buf);
+
+ case FILE_END_OF_FILE_INFORMATION:
+ return set_end_of_file_info(work, fp, buf);
+
+ case FILE_RENAME_INFORMATION:
+ if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ ksmbd_debug(SMB,
+ "User does not have write permission\n");
+ return -EACCES;
+ }
+ return set_rename_info(work, fp, buf);
+
+ case FILE_LINK_INFORMATION:
+ return smb2_create_link(work, work->tcon->share_conf,
+ (struct smb2_file_link_info *)buf, fp->filp,
+ work->sess->conn->local_nls);
+
+ case FILE_DISPOSITION_INFORMATION:
+ if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ ksmbd_debug(SMB,
+ "User does not have write permission\n");
+ return -EACCES;
+ }
+ return set_file_disposition_info(fp, buf);
+
+ case FILE_FULL_EA_INFORMATION:
+ {
+ if (!(fp->daccess & FILE_WRITE_EA_LE)) {
+ pr_err("Not permitted to write ext attr: 0x%x\n",
+ fp->daccess);
+ return -EACCES;
+ }
+
+ return smb2_set_ea((struct smb2_ea_info *)buf,
+ &fp->filp->f_path);
+ }
+
+ case FILE_POSITION_INFORMATION:
+ return set_file_position_info(fp, buf);
+
+ case FILE_MODE_INFORMATION:
+ return set_file_mode_info(fp, buf);
+ }
+
+ pr_err("Unimplemented Fileinfoclass :%d\n", info_class);
+ return -EOPNOTSUPP;
+}
+
+static int smb2_set_info_sec(struct ksmbd_file *fp, int addition_info,
+ char *buffer, int buf_len)
+{
+ struct smb_ntsd *pntsd = (struct smb_ntsd *)buffer;
+
+ fp->saccess |= FILE_SHARE_DELETE_LE;
+
+ return set_info_sec(fp->conn, fp->tcon, &fp->filp->f_path, pntsd,
+ buf_len, false);
+}
+
+/**
+ * smb2_set_info() - handler for smb2 set info command handler
+ * @work: smb work containing set info request buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_set_info(struct ksmbd_work *work)
+{
+ struct smb2_set_info_req *req;
+ struct smb2_set_info_rsp *rsp, *rsp_org;
+ struct ksmbd_file *fp;
+ int rc = 0;
+ unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
+
+ ksmbd_debug(SMB, "Received set info request\n");
+
+ rsp_org = work->response_buf;
+ if (work->next_smb2_rcv_hdr_off) {
+ req = ksmbd_req_buf_next(work);
+ rsp = ksmbd_resp_buf_next(work);
+ if (!has_file_id(le64_to_cpu(req->VolatileFileId))) {
+ ksmbd_debug(SMB, "Compound request set FID = %llu\n",
+ work->compound_fid);
+ id = work->compound_fid;
+ pid = work->compound_pfid;
+ }
+ } else {
+ req = work->request_buf;
+ rsp = work->response_buf;
+ }
+
+ if (!has_file_id(id)) {
+ id = le64_to_cpu(req->VolatileFileId);
+ pid = le64_to_cpu(req->PersistentFileId);
+ }
+
+ fp = ksmbd_lookup_fd_slow(work, id, pid);
+ if (!fp) {
+ ksmbd_debug(SMB, "Invalid id for close: %u\n", id);
+ rc = -ENOENT;
+ goto err_out;
+ }
+
+ switch (req->InfoType) {
+ case SMB2_O_INFO_FILE:
+ ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILE\n");
+ rc = smb2_set_info_file(work, fp, req->FileInfoClass,
+ req->Buffer, work->tcon->share_conf);
+ break;
+ case SMB2_O_INFO_SECURITY:
+ ksmbd_debug(SMB, "GOT SMB2_O_INFO_SECURITY\n");
+ if (ksmbd_override_fsids(work)) {
+ rc = -ENOMEM;
+ goto err_out;
+ }
+ rc = smb2_set_info_sec(fp,
+ le32_to_cpu(req->AdditionalInformation),
+ req->Buffer,
+ le32_to_cpu(req->BufferLength));
+ ksmbd_revert_fsids(work);
+ break;
+ default:
+ rc = -EOPNOTSUPP;
+ }
+
+ if (rc < 0)
+ goto err_out;
+
+ rsp->StructureSize = cpu_to_le16(2);
+ inc_rfc1001_len(rsp_org, 2);
+ ksmbd_fd_put(work, fp);
+ return 0;
+
+err_out:
+ if (rc == -EACCES || rc == -EPERM)
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ else if (rc == -EINVAL)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ else if (rc == -ESHARE)
+ rsp->hdr.Status = STATUS_SHARING_VIOLATION;
+ else if (rc == -ENOENT)
+ rsp->hdr.Status = STATUS_OBJECT_NAME_INVALID;
+ else if (rc == -EBUSY || rc == -ENOTEMPTY)
+ rsp->hdr.Status = STATUS_DIRECTORY_NOT_EMPTY;
+ else if (rc == -EAGAIN)
+ rsp->hdr.Status = STATUS_FILE_LOCK_CONFLICT;
+ else if (rc == -EBADF || rc == -ESTALE)
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+ else if (rc == -EEXIST)
+ rsp->hdr.Status = STATUS_OBJECT_NAME_COLLISION;
+ else if (rsp->hdr.Status == 0 || rc == -EOPNOTSUPP)
+ rsp->hdr.Status = STATUS_INVALID_INFO_CLASS;
+ smb2_set_err_rsp(work);
+ ksmbd_fd_put(work, fp);
+ ksmbd_debug(SMB, "error while processing smb2 query rc = %d\n", rc);
+ return rc;
+}
+
+/**
+ * smb2_read_pipe() - handler for smb2 read from IPC pipe
+ * @work: smb work containing read IPC pipe command buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+static noinline int smb2_read_pipe(struct ksmbd_work *work)
+{
+ int nbytes = 0, err;
+ u64 id;
+ struct ksmbd_rpc_command *rpc_resp;
+ struct smb2_read_req *req = work->request_buf;
+ struct smb2_read_rsp *rsp = work->response_buf;
+
+ id = le64_to_cpu(req->VolatileFileId);
+
+ inc_rfc1001_len(rsp, 16);
+ rpc_resp = ksmbd_rpc_read(work->sess, id);
+ if (rpc_resp) {
+ if (rpc_resp->flags != KSMBD_RPC_OK) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ work->aux_payload_buf =
+ kvmalloc(rpc_resp->payload_sz, GFP_KERNEL | __GFP_ZERO);
+ if (!work->aux_payload_buf) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ memcpy(work->aux_payload_buf, rpc_resp->payload,
+ rpc_resp->payload_sz);
+
+ nbytes = rpc_resp->payload_sz;
+ work->resp_hdr_sz = get_rfc1002_len(rsp) + 4;
+ work->aux_payload_sz = nbytes;
+ kvfree(rpc_resp);
+ }
+
+ rsp->StructureSize = cpu_to_le16(17);
+ rsp->DataOffset = 80;
+ rsp->Reserved = 0;
+ rsp->DataLength = cpu_to_le32(nbytes);
+ rsp->DataRemaining = 0;
+ rsp->Reserved2 = 0;
+ inc_rfc1001_len(rsp, nbytes);
+ return 0;
+
+out:
+ rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
+ smb2_set_err_rsp(work);
+ kvfree(rpc_resp);
+ return err;
+}
+
+static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work,
+ struct smb2_read_req *req, void *data_buf,
+ size_t length)
+{
+ struct smb2_buffer_desc_v1 *desc =
+ (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
+ int err;
+
+ if (work->conn->dialect == SMB30_PROT_ID &&
+ req->Channel != SMB2_CHANNEL_RDMA_V1)
+ return -EINVAL;
+
+ if (req->ReadChannelInfoOffset == 0 ||
+ le16_to_cpu(req->ReadChannelInfoLength) < sizeof(*desc))
+ return -EINVAL;
+
+ work->need_invalidate_rkey =
+ (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
+ work->remote_key = le32_to_cpu(desc->token);
+
+ err = ksmbd_conn_rdma_write(work->conn, data_buf, length,
+ le32_to_cpu(desc->token),
+ le64_to_cpu(desc->offset),
+ le32_to_cpu(desc->length));
+ if (err)
+ return err;
+
+ return length;
+}
+
+/**
+ * smb2_read() - handler for smb2 read from file
+ * @work: smb work containing read command buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_read(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_read_req *req;
+ struct smb2_read_rsp *rsp, *rsp_org;
+ struct ksmbd_file *fp;
+ loff_t offset;
+ size_t length, mincount;
+ ssize_t nbytes = 0, remain_bytes = 0;
+ int err = 0;
+
+ rsp_org = work->response_buf;
+ WORK_BUFFERS(work, req, rsp);
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_PIPE)) {
+ ksmbd_debug(SMB, "IPC pipe read request\n");
+ return smb2_read_pipe(work);
+ }
+
+ fp = ksmbd_lookup_fd_slow(work, le64_to_cpu(req->VolatileFileId),
+ le64_to_cpu(req->PersistentFileId));
+ if (!fp) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ if (!(fp->daccess & (FILE_READ_DATA_LE | FILE_READ_ATTRIBUTES_LE))) {
+ pr_err("Not permitted to read : 0x%x\n", fp->daccess);
+ err = -EACCES;
+ goto out;
+ }
+
+ offset = le64_to_cpu(req->Offset);
+ length = le32_to_cpu(req->Length);
+ mincount = le32_to_cpu(req->MinimumCount);
+
+ if (length > conn->vals->max_read_size) {
+ ksmbd_debug(SMB, "limiting read size to max size(%u)\n",
+ conn->vals->max_read_size);
+ err = -EINVAL;
+ goto out;
+ }
+
+ ksmbd_debug(SMB, "filename %pd, offset %lld, len %zu\n",
+ fp->filp->f_path.dentry, offset, length);
+
+ work->aux_payload_buf = kvmalloc(length, GFP_KERNEL | __GFP_ZERO);
+ if (!work->aux_payload_buf) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ nbytes = ksmbd_vfs_read(work, fp, length, &offset);
+ if (nbytes < 0) {
+ err = nbytes;
+ goto out;
+ }
+
+ if ((nbytes == 0 && length != 0) || nbytes < mincount) {
+ kvfree(work->aux_payload_buf);
+ work->aux_payload_buf = NULL;
+ rsp->hdr.Status = STATUS_END_OF_FILE;
+ smb2_set_err_rsp(work);
+ ksmbd_fd_put(work, fp);
+ return 0;
+ }
+
+ ksmbd_debug(SMB, "nbytes %zu, offset %lld mincount %zu\n",
+ nbytes, offset, mincount);
+
+ if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE ||
+ req->Channel == SMB2_CHANNEL_RDMA_V1) {
+ /* write data to the client using rdma channel */
+ remain_bytes = smb2_read_rdma_channel(work, req,
+ work->aux_payload_buf,
+ nbytes);
+ kvfree(work->aux_payload_buf);
+ work->aux_payload_buf = NULL;
+
+ nbytes = 0;
+ if (remain_bytes < 0) {
+ err = (int)remain_bytes;
+ goto out;
+ }
+ }
+
+ rsp->StructureSize = cpu_to_le16(17);
+ rsp->DataOffset = 80;
+ rsp->Reserved = 0;
+ rsp->DataLength = cpu_to_le32(nbytes);
+ rsp->DataRemaining = cpu_to_le32(remain_bytes);
+ rsp->Reserved2 = 0;
+ inc_rfc1001_len(rsp_org, 16);
+ work->resp_hdr_sz = get_rfc1002_len(rsp_org) + 4;
+ work->aux_payload_sz = nbytes;
+ inc_rfc1001_len(rsp_org, nbytes);
+ ksmbd_fd_put(work, fp);
+ return 0;
+
+out:
+ if (err) {
+ if (err == -EISDIR)
+ rsp->hdr.Status = STATUS_INVALID_DEVICE_REQUEST;
+ else if (err == -EAGAIN)
+ rsp->hdr.Status = STATUS_FILE_LOCK_CONFLICT;
+ else if (err == -ENOENT)
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ else if (err == -EACCES)
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ else if (err == -ESHARE)
+ rsp->hdr.Status = STATUS_SHARING_VIOLATION;
+ else if (err == -EINVAL)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ else
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+
+ smb2_set_err_rsp(work);
+ }
+ ksmbd_fd_put(work, fp);
+ return err;
+}
+
+/**
+ * smb2_write_pipe() - handler for smb2 write on IPC pipe
+ * @work: smb work containing write IPC pipe command buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+static noinline int smb2_write_pipe(struct ksmbd_work *work)
+{
+ struct smb2_write_req *req = work->request_buf;
+ struct smb2_write_rsp *rsp = work->response_buf;
+ struct ksmbd_rpc_command *rpc_resp;
+ u64 id = 0;
+ int err = 0, ret = 0;
+ char *data_buf;
+ size_t length;
+
+ length = le32_to_cpu(req->Length);
+ id = le64_to_cpu(req->VolatileFileId);
+
+ if (le16_to_cpu(req->DataOffset) ==
+ (offsetof(struct smb2_write_req, Buffer) - 4)) {
+ data_buf = (char *)&req->Buffer[0];
+ } else {
+ if ((le16_to_cpu(req->DataOffset) > get_rfc1002_len(req)) ||
+ (le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req))) {
+ pr_err("invalid write data offset %u, smb_len %u\n",
+ le16_to_cpu(req->DataOffset),
+ get_rfc1002_len(req));
+ err = -EINVAL;
+ goto out;
+ }
+
+ data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
+ le16_to_cpu(req->DataOffset));
+ }
+
+ rpc_resp = ksmbd_rpc_write(work->sess, id, data_buf, length);
+ if (rpc_resp) {
+ if (rpc_resp->flags == KSMBD_RPC_ENOTIMPLEMENTED) {
+ rsp->hdr.Status = STATUS_NOT_SUPPORTED;
+ kvfree(rpc_resp);
+ smb2_set_err_rsp(work);
+ return -EOPNOTSUPP;
+ }
+ if (rpc_resp->flags != KSMBD_RPC_OK) {
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+ smb2_set_err_rsp(work);
+ kvfree(rpc_resp);
+ return ret;
+ }
+ kvfree(rpc_resp);
+ }
+
+ rsp->StructureSize = cpu_to_le16(17);
+ rsp->DataOffset = 0;
+ rsp->Reserved = 0;
+ rsp->DataLength = cpu_to_le32(length);
+ rsp->DataRemaining = 0;
+ rsp->Reserved2 = 0;
+ inc_rfc1001_len(rsp, 16);
+ return 0;
+out:
+ if (err) {
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+ smb2_set_err_rsp(work);
+ }
+
+ return err;
+}
+
+static ssize_t smb2_write_rdma_channel(struct ksmbd_work *work,
+ struct smb2_write_req *req,
+ struct ksmbd_file *fp,
+ loff_t offset, size_t length, bool sync)
+{
+ struct smb2_buffer_desc_v1 *desc;
+ char *data_buf;
+ int ret;
+ ssize_t nbytes;
+
+ desc = (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
+
+ if (work->conn->dialect == SMB30_PROT_ID &&
+ req->Channel != SMB2_CHANNEL_RDMA_V1)
+ return -EINVAL;
+
+ if (req->Length != 0 || req->DataOffset != 0)
+ return -EINVAL;
+
+ if (req->WriteChannelInfoOffset == 0 ||
+ le16_to_cpu(req->WriteChannelInfoLength) < sizeof(*desc))
+ return -EINVAL;
+
+ work->need_invalidate_rkey =
+ (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
+ work->remote_key = le32_to_cpu(desc->token);
+
+ data_buf = kvmalloc(length, GFP_KERNEL | __GFP_ZERO);
+ if (!data_buf)
+ return -ENOMEM;
+
+ ret = ksmbd_conn_rdma_read(work->conn, data_buf, length,
+ le32_to_cpu(desc->token),
+ le64_to_cpu(desc->offset),
+ le32_to_cpu(desc->length));
+ if (ret < 0) {
+ kvfree(data_buf);
+ return ret;
+ }
+
+ ret = ksmbd_vfs_write(work, fp, data_buf, length, &offset, sync, &nbytes);
+ kvfree(data_buf);
+ if (ret < 0)
+ return ret;
+
+ return nbytes;
+}
+
+/**
+ * smb2_write() - handler for smb2 write from file
+ * @work: smb work containing write command buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_write(struct ksmbd_work *work)
+{
+ struct smb2_write_req *req;
+ struct smb2_write_rsp *rsp, *rsp_org;
+ struct ksmbd_file *fp = NULL;
+ loff_t offset;
+ size_t length;
+ ssize_t nbytes;
+ char *data_buf;
+ bool writethrough = false;
+ int err = 0;
+
+ rsp_org = work->response_buf;
+ WORK_BUFFERS(work, req, rsp);
+
+ if (test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_PIPE)) {
+ ksmbd_debug(SMB, "IPC pipe write request\n");
+ return smb2_write_pipe(work);
+ }
+
+ if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ ksmbd_debug(SMB, "User does not have write permission\n");
+ err = -EACCES;
+ goto out;
+ }
+
+ fp = ksmbd_lookup_fd_slow(work, le64_to_cpu(req->VolatileFileId),
+ le64_to_cpu(req->PersistentFileId));
+ if (!fp) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ if (!(fp->daccess & (FILE_WRITE_DATA_LE | FILE_READ_ATTRIBUTES_LE))) {
+ pr_err("Not permitted to write : 0x%x\n", fp->daccess);
+ err = -EACCES;
+ goto out;
+ }
+
+ offset = le64_to_cpu(req->Offset);
+ length = le32_to_cpu(req->Length);
+
+ if (length > work->conn->vals->max_write_size) {
+ ksmbd_debug(SMB, "limiting write size to max size(%u)\n",
+ work->conn->vals->max_write_size);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH)
+ writethrough = true;
+
+ if (req->Channel != SMB2_CHANNEL_RDMA_V1 &&
+ req->Channel != SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
+ if (le16_to_cpu(req->DataOffset) ==
+ (offsetof(struct smb2_write_req, Buffer) - 4)) {
+ data_buf = (char *)&req->Buffer[0];
+ } else {
+ if ((le16_to_cpu(req->DataOffset) > get_rfc1002_len(req)) ||
+ (le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req))) {
+ pr_err("invalid write data offset %u, smb_len %u\n",
+ le16_to_cpu(req->DataOffset),
+ get_rfc1002_len(req));
+ err = -EINVAL;
+ goto out;
+ }
+
+ data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
+ le16_to_cpu(req->DataOffset));
+ }
+
+ ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags));
+ if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH)
+ writethrough = true;
+
+ ksmbd_debug(SMB, "filename %pd, offset %lld, len %zu\n",
+ fp->filp->f_path.dentry, offset, length);
+ err = ksmbd_vfs_write(work, fp, data_buf, length, &offset,
+ writethrough, &nbytes);
+ if (err < 0)
+ goto out;
+ } else {
+ /* read data from the client using rdma channel, and
+ * write the data.
+ */
+ nbytes = smb2_write_rdma_channel(work, req, fp, offset,
+ le32_to_cpu(req->RemainingBytes),
+ writethrough);
+ if (nbytes < 0) {
+ err = (int)nbytes;
+ goto out;
+ }
+ }
+
+ rsp->StructureSize = cpu_to_le16(17);
+ rsp->DataOffset = 0;
+ rsp->Reserved = 0;
+ rsp->DataLength = cpu_to_le32(nbytes);
+ rsp->DataRemaining = 0;
+ rsp->Reserved2 = 0;
+ inc_rfc1001_len(rsp_org, 16);
+ ksmbd_fd_put(work, fp);
+ return 0;
+
+out:
+ if (err == -EAGAIN)
+ rsp->hdr.Status = STATUS_FILE_LOCK_CONFLICT;
+ else if (err == -ENOSPC || err == -EFBIG)
+ rsp->hdr.Status = STATUS_DISK_FULL;
+ else if (err == -ENOENT)
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ else if (err == -EACCES)
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ else if (err == -ESHARE)
+ rsp->hdr.Status = STATUS_SHARING_VIOLATION;
+ else if (err == -EINVAL)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ else
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+
+ smb2_set_err_rsp(work);
+ ksmbd_fd_put(work, fp);
+ return err;
+}
+
+/**
+ * smb2_flush() - handler for smb2 flush file - fsync
+ * @work: smb work containing flush command buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_flush(struct ksmbd_work *work)
+{
+ struct smb2_flush_req *req;
+ struct smb2_flush_rsp *rsp, *rsp_org;
+ int err;
+
+ rsp_org = work->response_buf;
+ WORK_BUFFERS(work, req, rsp);
+
+ ksmbd_debug(SMB, "SMB2_FLUSH called for fid %llu\n",
+ le64_to_cpu(req->VolatileFileId));
+
+ err = ksmbd_vfs_fsync(work,
+ le64_to_cpu(req->VolatileFileId),
+ le64_to_cpu(req->PersistentFileId));
+ if (err)
+ goto out;
+
+ rsp->StructureSize = cpu_to_le16(4);
+ rsp->Reserved = 0;
+ inc_rfc1001_len(rsp_org, 4);
+ return 0;
+
+out:
+ if (err) {
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+ smb2_set_err_rsp(work);
+ }
+
+ return err;
+}
+
+/**
+ * smb2_cancel() - handler for smb2 cancel command
+ * @work: smb work containing cancel command buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_cancel(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_hdr *hdr = work->request_buf;
+ struct smb2_hdr *chdr;
+ struct ksmbd_work *cancel_work = NULL;
+ int canceled = 0;
+ struct list_head *command_list;
+
+ ksmbd_debug(SMB, "smb2 cancel called on mid %llu, async flags 0x%x\n",
+ hdr->MessageId, hdr->Flags);
+
+ if (hdr->Flags & SMB2_FLAGS_ASYNC_COMMAND) {
+ command_list = &conn->async_requests;
+
+ spin_lock(&conn->request_lock);
+ list_for_each_entry(cancel_work, command_list,
+ async_request_entry) {
+ chdr = cancel_work->request_buf;
+
+ if (cancel_work->async_id !=
+ le64_to_cpu(hdr->Id.AsyncId))
+ continue;
+
+ ksmbd_debug(SMB,
+ "smb2 with AsyncId %llu cancelled command = 0x%x\n",
+ le64_to_cpu(hdr->Id.AsyncId),
+ le16_to_cpu(chdr->Command));
+ canceled = 1;
+ break;
+ }
+ spin_unlock(&conn->request_lock);
+ } else {
+ command_list = &conn->requests;
+
+ spin_lock(&conn->request_lock);
+ list_for_each_entry(cancel_work, command_list, request_entry) {
+ chdr = cancel_work->request_buf;
+
+ if (chdr->MessageId != hdr->MessageId ||
+ cancel_work == work)
+ continue;
+
+ ksmbd_debug(SMB,
+ "smb2 with mid %llu cancelled command = 0x%x\n",
+ le64_to_cpu(hdr->MessageId),
+ le16_to_cpu(chdr->Command));
+ canceled = 1;
+ break;
+ }
+ spin_unlock(&conn->request_lock);
+ }
+
+ if (canceled) {
+ cancel_work->state = KSMBD_WORK_CANCELLED;
+ if (cancel_work->cancel_fn)
+ cancel_work->cancel_fn(cancel_work->cancel_argv);
+ }
+
+ /* For SMB2_CANCEL command itself send no response*/
+ work->send_no_response = 1;
+ return 0;
+}
+
+struct file_lock *smb_flock_init(struct file *f)
+{
+ struct file_lock *fl;
+
+ fl = locks_alloc_lock();
+ if (!fl)
+ goto out;
+
+ locks_init_lock(fl);
+
+ fl->fl_owner = f;
+ fl->fl_pid = current->tgid;
+ fl->fl_file = f;
+ fl->fl_flags = FL_POSIX;
+ fl->fl_ops = NULL;
+ fl->fl_lmops = NULL;
+
+out:
+ return fl;
+}
+
+static int smb2_set_flock_flags(struct file_lock *flock, int flags)
+{
+ int cmd = -EINVAL;
+
+ /* Checking for wrong flag combination during lock request*/
+ switch (flags) {
+ case SMB2_LOCKFLAG_SHARED:
+ ksmbd_debug(SMB, "received shared request\n");
+ cmd = F_SETLKW;
+ flock->fl_type = F_RDLCK;
+ flock->fl_flags |= FL_SLEEP;
+ break;
+ case SMB2_LOCKFLAG_EXCLUSIVE:
+ ksmbd_debug(SMB, "received exclusive request\n");
+ cmd = F_SETLKW;
+ flock->fl_type = F_WRLCK;
+ flock->fl_flags |= FL_SLEEP;
+ break;
+ case SMB2_LOCKFLAG_SHARED | SMB2_LOCKFLAG_FAIL_IMMEDIATELY:
+ ksmbd_debug(SMB,
+ "received shared & fail immediately request\n");
+ cmd = F_SETLK;
+ flock->fl_type = F_RDLCK;
+ break;
+ case SMB2_LOCKFLAG_EXCLUSIVE | SMB2_LOCKFLAG_FAIL_IMMEDIATELY:
+ ksmbd_debug(SMB,
+ "received exclusive & fail immediately request\n");
+ cmd = F_SETLK;
+ flock->fl_type = F_WRLCK;
+ break;
+ case SMB2_LOCKFLAG_UNLOCK:
+ ksmbd_debug(SMB, "received unlock request\n");
+ flock->fl_type = F_UNLCK;
+ cmd = 0;
+ break;
+ }
+
+ return cmd;
+}
+
+static struct ksmbd_lock *smb2_lock_init(struct file_lock *flock,
+ unsigned int cmd, int flags,
+ struct list_head *lock_list)
+{
+ struct ksmbd_lock *lock;
+
+ lock = kzalloc(sizeof(struct ksmbd_lock), GFP_KERNEL);
+ if (!lock)
+ return NULL;
+
+ lock->cmd = cmd;
+ lock->fl = flock;
+ lock->start = flock->fl_start;
+ lock->end = flock->fl_end;
+ lock->flags = flags;
+ if (lock->start == lock->end)
+ lock->zero_len = 1;
+ INIT_LIST_HEAD(&lock->clist);
+ INIT_LIST_HEAD(&lock->flist);
+ INIT_LIST_HEAD(&lock->llist);
+ list_add_tail(&lock->llist, lock_list);
+
+ return lock;
+}
+
+static void smb2_remove_blocked_lock(void **argv)
+{
+ struct file_lock *flock = (struct file_lock *)argv[0];
+
+ ksmbd_vfs_posix_lock_unblock(flock);
+ wake_up(&flock->fl_wait);
+}
+
+static inline bool lock_defer_pending(struct file_lock *fl)
+{
+ /* check pending lock waiters */
+ return waitqueue_active(&fl->fl_wait);
+}
+
+/**
+ * smb2_lock() - handler for smb2 file lock command
+ * @work: smb work containing lock command buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_lock(struct ksmbd_work *work)
+{
+ struct smb2_lock_req *req = work->request_buf;
+ struct smb2_lock_rsp *rsp = work->response_buf;
+ struct smb2_lock_element *lock_ele;
+ struct ksmbd_file *fp = NULL;
+ struct file_lock *flock = NULL;
+ struct file *filp = NULL;
+ int lock_count;
+ int flags = 0;
+ int cmd = 0;
+ int err = -EIO, i, rc = 0;
+ u64 lock_start, lock_length;
+ struct ksmbd_lock *smb_lock = NULL, *cmp_lock, *tmp, *tmp2;
+ struct ksmbd_conn *conn;
+ int nolock = 0;
+ LIST_HEAD(lock_list);
+ LIST_HEAD(rollback_list);
+ int prior_lock = 0;
+
+ ksmbd_debug(SMB, "Received lock request\n");
+ fp = ksmbd_lookup_fd_slow(work,
+ le64_to_cpu(req->VolatileFileId),
+ le64_to_cpu(req->PersistentFileId));
+ if (!fp) {
+ ksmbd_debug(SMB, "Invalid file id for lock : %llu\n",
+ le64_to_cpu(req->VolatileFileId));
+ err = -ENOENT;
+ goto out2;
+ }
+
+ filp = fp->filp;
+ lock_count = le16_to_cpu(req->LockCount);
+ lock_ele = req->locks;
+
+ ksmbd_debug(SMB, "lock count is %d\n", lock_count);
+ if (!lock_count) {
+ err = -EINVAL;
+ goto out2;
+ }
+
+ for (i = 0; i < lock_count; i++) {
+ flags = le32_to_cpu(lock_ele[i].Flags);
+
+ flock = smb_flock_init(filp);
+ if (!flock)
+ goto out;
+
+ cmd = smb2_set_flock_flags(flock, flags);
+
+ lock_start = le64_to_cpu(lock_ele[i].Offset);
+ lock_length = le64_to_cpu(lock_ele[i].Length);
+ if (lock_start > U64_MAX - lock_length) {
+ pr_err("Invalid lock range requested\n");
+ rsp->hdr.Status = STATUS_INVALID_LOCK_RANGE;
+ goto out;
+ }
+
+ if (lock_start > OFFSET_MAX)
+ flock->fl_start = OFFSET_MAX;
+ else
+ flock->fl_start = lock_start;
+
+ lock_length = le64_to_cpu(lock_ele[i].Length);
+ if (lock_length > OFFSET_MAX - flock->fl_start)
+ lock_length = OFFSET_MAX - flock->fl_start;
+
+ flock->fl_end = flock->fl_start + lock_length;
+
+ if (flock->fl_end < flock->fl_start) {
+ ksmbd_debug(SMB,
+ "the end offset(%llx) is smaller than the start offset(%llx)\n",
+ flock->fl_end, flock->fl_start);
+ rsp->hdr.Status = STATUS_INVALID_LOCK_RANGE;
+ goto out;
+ }
+
+ /* Check conflict locks in one request */
+ list_for_each_entry(cmp_lock, &lock_list, llist) {
+ if (cmp_lock->fl->fl_start <= flock->fl_start &&
+ cmp_lock->fl->fl_end >= flock->fl_end) {
+ if (cmp_lock->fl->fl_type != F_UNLCK &&
+ flock->fl_type != F_UNLCK) {
+ pr_err("conflict two locks in one request\n");
+ err = -EINVAL;
+ goto out;
+ }
+ }
+ }
+
+ smb_lock = smb2_lock_init(flock, cmd, flags, &lock_list);
+ if (!smb_lock) {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ list_for_each_entry_safe(smb_lock, tmp, &lock_list, llist) {
+ if (smb_lock->cmd < 0) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (!(smb_lock->flags & SMB2_LOCKFLAG_MASK)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if ((prior_lock & (SMB2_LOCKFLAG_EXCLUSIVE | SMB2_LOCKFLAG_SHARED) &&
+ smb_lock->flags & SMB2_LOCKFLAG_UNLOCK) ||
+ (prior_lock == SMB2_LOCKFLAG_UNLOCK &&
+ !(smb_lock->flags & SMB2_LOCKFLAG_UNLOCK))) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ prior_lock = smb_lock->flags;
+
+ if (!(smb_lock->flags & SMB2_LOCKFLAG_UNLOCK) &&
+ !(smb_lock->flags & SMB2_LOCKFLAG_FAIL_IMMEDIATELY))
+ goto no_check_cl;
+
+ nolock = 1;
+ /* check locks in connection list */
+ read_lock(&conn_list_lock);
+ list_for_each_entry(conn, &conn_list, conns_list) {
+ spin_lock(&conn->llist_lock);
+ list_for_each_entry_safe(cmp_lock, tmp2, &conn->lock_list, clist) {
+ if (file_inode(cmp_lock->fl->fl_file) !=
+ file_inode(smb_lock->fl->fl_file))
+ continue;
+
+ if (smb_lock->fl->fl_type == F_UNLCK) {
+ if (cmp_lock->fl->fl_file == smb_lock->fl->fl_file &&
+ cmp_lock->start == smb_lock->start &&
+ cmp_lock->end == smb_lock->end &&
+ !lock_defer_pending(cmp_lock->fl)) {
+ nolock = 0;
+ list_del(&cmp_lock->flist);
+ list_del(&cmp_lock->clist);
+ spin_unlock(&conn->llist_lock);
+ read_unlock(&conn_list_lock);
+
+ locks_free_lock(cmp_lock->fl);
+ kfree(cmp_lock);
+ goto out_check_cl;
+ }
+ continue;
+ }
+
+ if (cmp_lock->fl->fl_file == smb_lock->fl->fl_file) {
+ if (smb_lock->flags & SMB2_LOCKFLAG_SHARED)
+ continue;
+ } else {
+ if (cmp_lock->flags & SMB2_LOCKFLAG_SHARED)
+ continue;
+ }
+
+ /* check zero byte lock range */
+ if (cmp_lock->zero_len && !smb_lock->zero_len &&
+ cmp_lock->start > smb_lock->start &&
+ cmp_lock->start < smb_lock->end) {
+ spin_unlock(&conn->llist_lock);
+ read_unlock(&conn_list_lock);
+ pr_err("previous lock conflict with zero byte lock range\n");
+ goto out;
+ }
+
+ if (smb_lock->zero_len && !cmp_lock->zero_len &&
+ smb_lock->start > cmp_lock->start &&
+ smb_lock->start < cmp_lock->end) {
+ spin_unlock(&conn->llist_lock);
+ read_unlock(&conn_list_lock);
+ pr_err("current lock conflict with zero byte lock range\n");
+ goto out;
+ }
+
+ if (((cmp_lock->start <= smb_lock->start &&
+ cmp_lock->end > smb_lock->start) ||
+ (cmp_lock->start < smb_lock->end &&
+ cmp_lock->end >= smb_lock->end)) &&
+ !cmp_lock->zero_len && !smb_lock->zero_len) {
+ spin_unlock(&conn->llist_lock);
+ read_unlock(&conn_list_lock);
+ pr_err("Not allow lock operation on exclusive lock range\n");
+ goto out;
+ }
+ }
+ spin_unlock(&conn->llist_lock);
+ }
+ read_unlock(&conn_list_lock);
+out_check_cl:
+ if (smb_lock->fl->fl_type == F_UNLCK && nolock) {
+ pr_err("Try to unlock nolocked range\n");
+ rsp->hdr.Status = STATUS_RANGE_NOT_LOCKED;
+ goto out;
+ }
+
+no_check_cl:
+ if (smb_lock->zero_len) {
+ err = 0;
+ goto skip;
+ }
+
+ flock = smb_lock->fl;
+ list_del(&smb_lock->llist);
+retry:
+ rc = vfs_lock_file(filp, smb_lock->cmd, flock, NULL);
+skip:
+ if (flags & SMB2_LOCKFLAG_UNLOCK) {
+ if (!rc) {
+ ksmbd_debug(SMB, "File unlocked\n");
+ } else if (rc == -ENOENT) {
+ rsp->hdr.Status = STATUS_NOT_LOCKED;
+ goto out;
+ }
+ locks_free_lock(flock);
+ kfree(smb_lock);
+ } else {
+ if (rc == FILE_LOCK_DEFERRED) {
+ void **argv;
+
+ ksmbd_debug(SMB,
+ "would have to wait for getting lock\n");
+ spin_lock(&work->conn->llist_lock);
+ list_add_tail(&smb_lock->clist,
+ &work->conn->lock_list);
+ spin_unlock(&work->conn->llist_lock);
+ list_add(&smb_lock->llist, &rollback_list);
+
+ argv = kmalloc(sizeof(void *), GFP_KERNEL);
+ if (!argv) {
+ err = -ENOMEM;
+ goto out;
+ }
+ argv[0] = flock;
+
+ rc = setup_async_work(work,
+ smb2_remove_blocked_lock,
+ argv);
+ if (rc) {
+ err = -ENOMEM;
+ goto out;
+ }
+ spin_lock(&fp->f_lock);
+ list_add(&work->fp_entry, &fp->blocked_works);
+ spin_unlock(&fp->f_lock);
+
+ smb2_send_interim_resp(work, STATUS_PENDING);
+
+ ksmbd_vfs_posix_lock_wait(flock);
+
+ if (work->state != KSMBD_WORK_ACTIVE) {
+ list_del(&smb_lock->llist);
+ spin_lock(&work->conn->llist_lock);
+ list_del(&smb_lock->clist);
+ spin_unlock(&work->conn->llist_lock);
+ locks_free_lock(flock);
+
+ if (work->state == KSMBD_WORK_CANCELLED) {
+ spin_lock(&fp->f_lock);
+ list_del(&work->fp_entry);
+ spin_unlock(&fp->f_lock);
+ rsp->hdr.Status =
+ STATUS_CANCELLED;
+ kfree(smb_lock);
+ smb2_send_interim_resp(work,
+ STATUS_CANCELLED);
+ work->send_no_response = 1;
+ goto out;
+ }
+ init_smb2_rsp_hdr(work);
+ smb2_set_err_rsp(work);
+ rsp->hdr.Status =
+ STATUS_RANGE_NOT_LOCKED;
+ kfree(smb_lock);
+ goto out2;
+ }
+
+ list_del(&smb_lock->llist);
+ spin_lock(&work->conn->llist_lock);
+ list_del(&smb_lock->clist);
+ spin_unlock(&work->conn->llist_lock);
+
+ spin_lock(&fp->f_lock);
+ list_del(&work->fp_entry);
+ spin_unlock(&fp->f_lock);
+ goto retry;
+ } else if (!rc) {
+ spin_lock(&work->conn->llist_lock);
+ list_add_tail(&smb_lock->clist,
+ &work->conn->lock_list);
+ list_add_tail(&smb_lock->flist,
+ &fp->lock_list);
+ spin_unlock(&work->conn->llist_lock);
+ list_add(&smb_lock->llist, &rollback_list);
+ ksmbd_debug(SMB, "successful in taking lock\n");
+ } else {
+ goto out;
+ }
+ }
+ }
+
+ if (atomic_read(&fp->f_ci->op_count) > 1)
+ smb_break_all_oplock(work, fp);
+
+ rsp->StructureSize = cpu_to_le16(4);
+ ksmbd_debug(SMB, "successful in taking lock\n");
+ rsp->hdr.Status = STATUS_SUCCESS;
+ rsp->Reserved = 0;
+ inc_rfc1001_len(rsp, 4);
+ ksmbd_fd_put(work, fp);
+ return 0;
+
+out:
+ list_for_each_entry_safe(smb_lock, tmp, &lock_list, llist) {
+ locks_free_lock(smb_lock->fl);
+ list_del(&smb_lock->llist);
+ kfree(smb_lock);
+ }
+
+ list_for_each_entry_safe(smb_lock, tmp, &rollback_list, llist) {
+ struct file_lock *rlock = NULL;
+
+ rlock = smb_flock_init(filp);
+ rlock->fl_type = F_UNLCK;
+ rlock->fl_start = smb_lock->start;
+ rlock->fl_end = smb_lock->end;
+
+ rc = vfs_lock_file(filp, 0, rlock, NULL);
+ if (rc)
+ pr_err("rollback unlock fail : %d\n", rc);
+
+ list_del(&smb_lock->llist);
+ spin_lock(&work->conn->llist_lock);
+ if (!list_empty(&smb_lock->flist))
+ list_del(&smb_lock->flist);
+ list_del(&smb_lock->clist);
+ spin_unlock(&work->conn->llist_lock);
+
+ locks_free_lock(smb_lock->fl);
+ locks_free_lock(rlock);
+ kfree(smb_lock);
+ }
+out2:
+ ksmbd_debug(SMB, "failed in taking lock(flags : %x), err : %d\n", flags, err);
+
+ if (!rsp->hdr.Status) {
+ if (err == -EINVAL)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ else if (err == -ENOMEM)
+ rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
+ else if (err == -ENOENT)
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ else
+ rsp->hdr.Status = STATUS_LOCK_NOT_GRANTED;
+ }
+
+ smb2_set_err_rsp(work);
+ ksmbd_fd_put(work, fp);
+ return err;
+}
+
+static int fsctl_copychunk(struct ksmbd_work *work, struct smb2_ioctl_req *req,
+ struct smb2_ioctl_rsp *rsp)
+{
+ struct copychunk_ioctl_req *ci_req;
+ struct copychunk_ioctl_rsp *ci_rsp;
+ struct ksmbd_file *src_fp = NULL, *dst_fp = NULL;
+ struct srv_copychunk *chunks;
+ unsigned int i, chunk_count, chunk_count_written = 0;
+ unsigned int chunk_size_written = 0;
+ loff_t total_size_written = 0;
+ int ret, cnt_code;
+
+ cnt_code = le32_to_cpu(req->CntCode);
+ ci_req = (struct copychunk_ioctl_req *)&req->Buffer[0];
+ ci_rsp = (struct copychunk_ioctl_rsp *)&rsp->Buffer[0];
+
+ rsp->VolatileFileId = req->VolatileFileId;
+ rsp->PersistentFileId = req->PersistentFileId;
+ ci_rsp->ChunksWritten =
+ cpu_to_le32(ksmbd_server_side_copy_max_chunk_count());
+ ci_rsp->ChunkBytesWritten =
+ cpu_to_le32(ksmbd_server_side_copy_max_chunk_size());
+ ci_rsp->TotalBytesWritten =
+ cpu_to_le32(ksmbd_server_side_copy_max_total_size());
+
+ chunks = (struct srv_copychunk *)&ci_req->Chunks[0];
+ chunk_count = le32_to_cpu(ci_req->ChunkCount);
+ total_size_written = 0;
+
+ /* verify the SRV_COPYCHUNK_COPY packet */
+ if (chunk_count > ksmbd_server_side_copy_max_chunk_count() ||
+ le32_to_cpu(req->InputCount) <
+ offsetof(struct copychunk_ioctl_req, Chunks) +
+ chunk_count * sizeof(struct srv_copychunk)) {
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ return -EINVAL;
+ }
+
+ for (i = 0; i < chunk_count; i++) {
+ if (le32_to_cpu(chunks[i].Length) == 0 ||
+ le32_to_cpu(chunks[i].Length) > ksmbd_server_side_copy_max_chunk_size())
+ break;
+ total_size_written += le32_to_cpu(chunks[i].Length);
+ }
+
+ if (i < chunk_count ||
+ total_size_written > ksmbd_server_side_copy_max_total_size()) {
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ return -EINVAL;
+ }
+
+ src_fp = ksmbd_lookup_foreign_fd(work,
+ le64_to_cpu(ci_req->ResumeKey[0]));
+ dst_fp = ksmbd_lookup_fd_slow(work,
+ le64_to_cpu(req->VolatileFileId),
+ le64_to_cpu(req->PersistentFileId));
+ ret = -EINVAL;
+ if (!src_fp ||
+ src_fp->persistent_id != le64_to_cpu(ci_req->ResumeKey[1])) {
+ rsp->hdr.Status = STATUS_OBJECT_NAME_NOT_FOUND;
+ goto out;
+ }
+
+ if (!dst_fp) {
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ goto out;
+ }
+
+ /*
+ * FILE_READ_DATA should only be included in
+ * the FSCTL_COPYCHUNK case
+ */
+ if (cnt_code == FSCTL_COPYCHUNK &&
+ !(dst_fp->daccess & (FILE_READ_DATA_LE | FILE_GENERIC_READ_LE))) {
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ goto out;
+ }
+
+ ret = ksmbd_vfs_copy_file_ranges(work, src_fp, dst_fp,
+ chunks, chunk_count,
+ &chunk_count_written,
+ &chunk_size_written,
+ &total_size_written);
+ if (ret < 0) {
+ if (ret == -EACCES)
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ if (ret == -EAGAIN)
+ rsp->hdr.Status = STATUS_FILE_LOCK_CONFLICT;
+ else if (ret == -EBADF)
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+ else if (ret == -EFBIG || ret == -ENOSPC)
+ rsp->hdr.Status = STATUS_DISK_FULL;
+ else if (ret == -EINVAL)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ else if (ret == -EISDIR)
+ rsp->hdr.Status = STATUS_FILE_IS_A_DIRECTORY;
+ else if (ret == -E2BIG)
+ rsp->hdr.Status = STATUS_INVALID_VIEW_SIZE;
+ else
+ rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
+ }
+
+ ci_rsp->ChunksWritten = cpu_to_le32(chunk_count_written);
+ ci_rsp->ChunkBytesWritten = cpu_to_le32(chunk_size_written);
+ ci_rsp->TotalBytesWritten = cpu_to_le32(total_size_written);
+out:
+ ksmbd_fd_put(work, src_fp);
+ ksmbd_fd_put(work, dst_fp);
+ return ret;
+}
+
+static __be32 idev_ipv4_address(struct in_device *idev)
+{
+ __be32 addr = 0;
+
+ struct in_ifaddr *ifa;
+
+ rcu_read_lock();
+ in_dev_for_each_ifa_rcu(ifa, idev) {
+ if (ifa->ifa_flags & IFA_F_SECONDARY)
+ continue;
+
+ addr = ifa->ifa_address;
+ break;
+ }
+ rcu_read_unlock();
+ return addr;
+}
+
+static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
+ struct smb2_ioctl_req *req,
+ struct smb2_ioctl_rsp *rsp)
+{
+ struct network_interface_info_ioctl_rsp *nii_rsp = NULL;
+ int nbytes = 0;
+ struct net_device *netdev;
+ struct sockaddr_storage_rsp *sockaddr_storage;
+ unsigned int flags;
+ unsigned long long speed;
+ struct sockaddr_in6 *csin6 = (struct sockaddr_in6 *)&conn->peer_addr;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, netdev) {
+ if (netdev->type == ARPHRD_LOOPBACK)
+ continue;
+
+ flags = dev_get_flags(netdev);
+ if (!(flags & IFF_RUNNING))
+ continue;
+
+ nii_rsp = (struct network_interface_info_ioctl_rsp *)
+ &rsp->Buffer[nbytes];
+ nii_rsp->IfIndex = cpu_to_le32(netdev->ifindex);
+
+ nii_rsp->Capability = 0;
+ if (ksmbd_rdma_capable_netdev(netdev))
+ nii_rsp->Capability |= cpu_to_le32(RDMA_CAPABLE);
+
+ nii_rsp->Next = cpu_to_le32(152);
+ nii_rsp->Reserved = 0;
+
+ if (netdev->ethtool_ops->get_link_ksettings) {
+ struct ethtool_link_ksettings cmd;
+
+ netdev->ethtool_ops->get_link_ksettings(netdev, &cmd);
+ speed = cmd.base.speed;
+ } else {
+ pr_err("%s %s\n", netdev->name,
+ "speed is unknown, defaulting to 1Gb/sec");
+ speed = SPEED_1000;
+ }
+
+ speed *= 1000000;
+ nii_rsp->LinkSpeed = cpu_to_le64(speed);
+
+ sockaddr_storage = (struct sockaddr_storage_rsp *)
+ nii_rsp->SockAddr_Storage;
+ memset(sockaddr_storage, 0, 128);
+
+ if (conn->peer_addr.ss_family == PF_INET ||
+ ipv6_addr_v4mapped(&csin6->sin6_addr)) {
+ struct in_device *idev;
+
+ sockaddr_storage->Family = cpu_to_le16(INTERNETWORK);
+ sockaddr_storage->addr4.Port = 0;
+
+ idev = __in_dev_get_rtnl(netdev);
+ if (!idev)
+ continue;
+ sockaddr_storage->addr4.IPv4address =
+ idev_ipv4_address(idev);
+ } else {
+ struct inet6_dev *idev6;
+ struct inet6_ifaddr *ifa;
+ __u8 *ipv6_addr = sockaddr_storage->addr6.IPv6address;
+
+ sockaddr_storage->Family = cpu_to_le16(INTERNETWORKV6);
+ sockaddr_storage->addr6.Port = 0;
+ sockaddr_storage->addr6.FlowInfo = 0;
+
+ idev6 = __in6_dev_get(netdev);
+ if (!idev6)
+ continue;
+
+ list_for_each_entry(ifa, &idev6->addr_list, if_list) {
+ if (ifa->flags & (IFA_F_TENTATIVE |
+ IFA_F_DEPRECATED))
+ continue;
+ memcpy(ipv6_addr, ifa->addr.s6_addr, 16);
+ break;
+ }
+ sockaddr_storage->addr6.ScopeId = 0;
+ }
+
+ nbytes += sizeof(struct network_interface_info_ioctl_rsp);
+ }
+ rtnl_unlock();
+
+ /* zero if this is last one */
+ if (nii_rsp)
+ nii_rsp->Next = 0;
+
+ if (!nbytes) {
+ rsp->hdr.Status = STATUS_BUFFER_TOO_SMALL;
+ return -EINVAL;
+ }
+
+ rsp->PersistentFileId = cpu_to_le64(SMB2_NO_FID);
+ rsp->VolatileFileId = cpu_to_le64(SMB2_NO_FID);
+ return nbytes;
+}
+
+static int fsctl_validate_negotiate_info(struct ksmbd_conn *conn,
+ struct validate_negotiate_info_req *neg_req,
+ struct validate_negotiate_info_rsp *neg_rsp)
+{
+ int ret = 0;
+ int dialect;
+
+ dialect = ksmbd_lookup_dialect_by_id(neg_req->Dialects,
+ neg_req->DialectCount);
+ if (dialect == BAD_PROT_ID || dialect != conn->dialect) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ if (strncmp(neg_req->Guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE)) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ if (le16_to_cpu(neg_req->SecurityMode) != conn->cli_sec_mode) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ if (le32_to_cpu(neg_req->Capabilities) != conn->cli_cap) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ neg_rsp->Capabilities = cpu_to_le32(conn->vals->capabilities);
+ memset(neg_rsp->Guid, 0, SMB2_CLIENT_GUID_SIZE);
+ neg_rsp->SecurityMode = cpu_to_le16(conn->srv_sec_mode);
+ neg_rsp->Dialect = cpu_to_le16(conn->dialect);
+err_out:
+ return ret;
+}
+
+static int fsctl_query_allocated_ranges(struct ksmbd_work *work, u64 id,
+ struct file_allocated_range_buffer *qar_req,
+ struct file_allocated_range_buffer *qar_rsp,
+ int in_count, int *out_count)
+{
+ struct ksmbd_file *fp;
+ loff_t start, length;
+ int ret = 0;
+
+ *out_count = 0;
+ if (in_count == 0)
+ return -EINVAL;
+
+ fp = ksmbd_lookup_fd_fast(work, id);
+ if (!fp)
+ return -ENOENT;
+
+ start = le64_to_cpu(qar_req->file_offset);
+ length = le64_to_cpu(qar_req->length);
+
+ ret = ksmbd_vfs_fqar_lseek(fp, start, length,
+ qar_rsp, in_count, out_count);
+ if (ret && ret != -E2BIG)
+ *out_count = 0;
+
+ ksmbd_fd_put(work, fp);
+ return ret;
+}
+
+static int fsctl_pipe_transceive(struct ksmbd_work *work, u64 id,
+ int out_buf_len, struct smb2_ioctl_req *req,
+ struct smb2_ioctl_rsp *rsp)
+{
+ struct ksmbd_rpc_command *rpc_resp;
+ char *data_buf = (char *)&req->Buffer[0];
+ int nbytes = 0;
+
+ rpc_resp = ksmbd_rpc_ioctl(work->sess, id, data_buf,
+ le32_to_cpu(req->InputCount));
+ if (rpc_resp) {
+ if (rpc_resp->flags == KSMBD_RPC_SOME_NOT_MAPPED) {
+ /*
+ * set STATUS_SOME_NOT_MAPPED response
+ * for unknown domain sid.
+ */
+ rsp->hdr.Status = STATUS_SOME_NOT_MAPPED;
+ } else if (rpc_resp->flags == KSMBD_RPC_ENOTIMPLEMENTED) {
+ rsp->hdr.Status = STATUS_NOT_SUPPORTED;
+ goto out;
+ } else if (rpc_resp->flags != KSMBD_RPC_OK) {
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ goto out;
+ }
+
+ nbytes = rpc_resp->payload_sz;
+ if (rpc_resp->payload_sz > out_buf_len) {
+ rsp->hdr.Status = STATUS_BUFFER_OVERFLOW;
+ nbytes = out_buf_len;
+ }
+
+ if (!rpc_resp->payload_sz) {
+ rsp->hdr.Status =
+ STATUS_UNEXPECTED_IO_ERROR;
+ goto out;
+ }
+
+ memcpy((char *)rsp->Buffer, rpc_resp->payload, nbytes);
+ }
+out:
+ kvfree(rpc_resp);
+ return nbytes;
+}
+
+static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id,
+ struct file_sparse *sparse)
+{
+ struct ksmbd_file *fp;
+ struct user_namespace *user_ns;
+ int ret = 0;
+ __le32 old_fattr;
+
+ fp = ksmbd_lookup_fd_fast(work, id);
+ if (!fp)
+ return -ENOENT;
+ user_ns = file_mnt_user_ns(fp->filp);
+
+ old_fattr = fp->f_ci->m_fattr;
+ if (sparse->SetSparse)
+ fp->f_ci->m_fattr |= ATTR_SPARSE_FILE_LE;
+ else
+ fp->f_ci->m_fattr &= ~ATTR_SPARSE_FILE_LE;
+
+ if (fp->f_ci->m_fattr != old_fattr &&
+ test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_STORE_DOS_ATTRS)) {
+ struct xattr_dos_attrib da;
+
+ ret = ksmbd_vfs_get_dos_attrib_xattr(user_ns,
+ fp->filp->f_path.dentry, &da);
+ if (ret <= 0)
+ goto out;
+
+ da.attr = le32_to_cpu(fp->f_ci->m_fattr);
+ ret = ksmbd_vfs_set_dos_attrib_xattr(user_ns,
+ fp->filp->f_path.dentry, &da);
+ if (ret)
+ fp->f_ci->m_fattr = old_fattr;
+ }
+
+out:
+ ksmbd_fd_put(work, fp);
+ return ret;
+}
+
+static int fsctl_request_resume_key(struct ksmbd_work *work,
+ struct smb2_ioctl_req *req,
+ struct resume_key_ioctl_rsp *key_rsp)
+{
+ struct ksmbd_file *fp;
+
+ fp = ksmbd_lookup_fd_slow(work,
+ le64_to_cpu(req->VolatileFileId),
+ le64_to_cpu(req->PersistentFileId));
+ if (!fp)
+ return -ENOENT;
+
+ memset(key_rsp, 0, sizeof(*key_rsp));
+ key_rsp->ResumeKey[0] = req->VolatileFileId;
+ key_rsp->ResumeKey[1] = req->PersistentFileId;
+ ksmbd_fd_put(work, fp);
+
+ return 0;
+}
+
+/**
+ * smb2_ioctl() - handler for smb2 ioctl command
+ * @work: smb work containing ioctl command buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_ioctl(struct ksmbd_work *work)
+{
+ struct smb2_ioctl_req *req;
+ struct smb2_ioctl_rsp *rsp, *rsp_org;
+ int cnt_code, nbytes = 0;
+ int out_buf_len;
+ u64 id = KSMBD_NO_FID;
+ struct ksmbd_conn *conn = work->conn;
+ int ret = 0;
+
+ rsp_org = work->response_buf;
+ if (work->next_smb2_rcv_hdr_off) {
+ req = ksmbd_req_buf_next(work);
+ rsp = ksmbd_resp_buf_next(work);
+ if (!has_file_id(le64_to_cpu(req->VolatileFileId))) {
+ ksmbd_debug(SMB, "Compound request set FID = %llu\n",
+ work->compound_fid);
+ id = work->compound_fid;
+ }
+ } else {
+ req = work->request_buf;
+ rsp = work->response_buf;
+ }
+
+ if (!has_file_id(id))
+ id = le64_to_cpu(req->VolatileFileId);
+
+ if (req->Flags != cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL)) {
+ rsp->hdr.Status = STATUS_NOT_SUPPORTED;
+ goto out;
+ }
+
+ cnt_code = le32_to_cpu(req->CntCode);
+ out_buf_len = le32_to_cpu(req->MaxOutputResponse);
+ out_buf_len = min(KSMBD_IPC_MAX_PAYLOAD, out_buf_len);
+
+ switch (cnt_code) {
+ case FSCTL_DFS_GET_REFERRALS:
+ case FSCTL_DFS_GET_REFERRALS_EX:
+ /* Not support DFS yet */
+ rsp->hdr.Status = STATUS_FS_DRIVER_REQUIRED;
+ goto out;
+ case FSCTL_CREATE_OR_GET_OBJECT_ID:
+ {
+ struct file_object_buf_type1_ioctl_rsp *obj_buf;
+
+ nbytes = sizeof(struct file_object_buf_type1_ioctl_rsp);
+ obj_buf = (struct file_object_buf_type1_ioctl_rsp *)
+ &rsp->Buffer[0];
+
+ /*
+ * TODO: This is dummy implementation to pass smbtorture
+ * Need to check correct response later
+ */
+ memset(obj_buf->ObjectId, 0x0, 16);
+ memset(obj_buf->BirthVolumeId, 0x0, 16);
+ memset(obj_buf->BirthObjectId, 0x0, 16);
+ memset(obj_buf->DomainId, 0x0, 16);
+
+ break;
+ }
+ case FSCTL_PIPE_TRANSCEIVE:
+ nbytes = fsctl_pipe_transceive(work, id, out_buf_len, req, rsp);
+ break;
+ case FSCTL_VALIDATE_NEGOTIATE_INFO:
+ if (conn->dialect < SMB30_PROT_ID) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ ret = fsctl_validate_negotiate_info(conn,
+ (struct validate_negotiate_info_req *)&req->Buffer[0],
+ (struct validate_negotiate_info_rsp *)&rsp->Buffer[0]);
+ if (ret < 0)
+ goto out;
+
+ nbytes = sizeof(struct validate_negotiate_info_rsp);
+ rsp->PersistentFileId = cpu_to_le64(SMB2_NO_FID);
+ rsp->VolatileFileId = cpu_to_le64(SMB2_NO_FID);
+ break;
+ case FSCTL_QUERY_NETWORK_INTERFACE_INFO:
+ nbytes = fsctl_query_iface_info_ioctl(conn, req, rsp);
+ if (nbytes < 0)
+ goto out;
+ break;
+ case FSCTL_REQUEST_RESUME_KEY:
+ if (out_buf_len < sizeof(struct resume_key_ioctl_rsp)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = fsctl_request_resume_key(work, req,
+ (struct resume_key_ioctl_rsp *)&rsp->Buffer[0]);
+ if (ret < 0)
+ goto out;
+ rsp->PersistentFileId = req->PersistentFileId;
+ rsp->VolatileFileId = req->VolatileFileId;
+ nbytes = sizeof(struct resume_key_ioctl_rsp);
+ break;
+ case FSCTL_COPYCHUNK:
+ case FSCTL_COPYCHUNK_WRITE:
+ if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ ksmbd_debug(SMB,
+ "User does not have write permission\n");
+ ret = -EACCES;
+ goto out;
+ }
+
+ if (out_buf_len < sizeof(struct copychunk_ioctl_rsp)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ nbytes = sizeof(struct copychunk_ioctl_rsp);
+ fsctl_copychunk(work, req, rsp);
+ break;
+ case FSCTL_SET_SPARSE:
+ ret = fsctl_set_sparse(work, id,
+ (struct file_sparse *)&req->Buffer[0]);
+ if (ret < 0)
+ goto out;
+ break;
+ case FSCTL_SET_ZERO_DATA:
+ {
+ struct file_zero_data_information *zero_data;
+ struct ksmbd_file *fp;
+ loff_t off, len;
+
+ if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ ksmbd_debug(SMB,
+ "User does not have write permission\n");
+ ret = -EACCES;
+ goto out;
+ }
+
+ zero_data =
+ (struct file_zero_data_information *)&req->Buffer[0];
+
+ fp = ksmbd_lookup_fd_fast(work, id);
+ if (!fp) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ off = le64_to_cpu(zero_data->FileOffset);
+ len = le64_to_cpu(zero_data->BeyondFinalZero) - off;
+
+ ret = ksmbd_vfs_zero_data(work, fp, off, len);
+ ksmbd_fd_put(work, fp);
+ if (ret < 0)
+ goto out;
+ break;
+ }
+ case FSCTL_QUERY_ALLOCATED_RANGES:
+ ret = fsctl_query_allocated_ranges(work, id,
+ (struct file_allocated_range_buffer *)&req->Buffer[0],
+ (struct file_allocated_range_buffer *)&rsp->Buffer[0],
+ out_buf_len /
+ sizeof(struct file_allocated_range_buffer), &nbytes);
+ if (ret == -E2BIG) {
+ rsp->hdr.Status = STATUS_BUFFER_OVERFLOW;
+ } else if (ret < 0) {
+ nbytes = 0;
+ goto out;
+ }
+
+ nbytes *= sizeof(struct file_allocated_range_buffer);
+ break;
+ case FSCTL_GET_REPARSE_POINT:
+ {
+ struct reparse_data_buffer *reparse_ptr;
+ struct ksmbd_file *fp;
+
+ reparse_ptr = (struct reparse_data_buffer *)&rsp->Buffer[0];
+ fp = ksmbd_lookup_fd_fast(work, id);
+ if (!fp) {
+ pr_err("not found fp!!\n");
+ ret = -ENOENT;
+ goto out;
+ }
+
+ reparse_ptr->ReparseTag =
+ smb2_get_reparse_tag_special_file(file_inode(fp->filp)->i_mode);
+ reparse_ptr->ReparseDataLength = 0;
+ ksmbd_fd_put(work, fp);
+ nbytes = sizeof(struct reparse_data_buffer);
+ break;
+ }
+ case FSCTL_DUPLICATE_EXTENTS_TO_FILE:
+ {
+ struct ksmbd_file *fp_in, *fp_out = NULL;
+ struct duplicate_extents_to_file *dup_ext;
+ loff_t src_off, dst_off, length, cloned;
+
+ dup_ext = (struct duplicate_extents_to_file *)&req->Buffer[0];
+
+ fp_in = ksmbd_lookup_fd_slow(work, dup_ext->VolatileFileHandle,
+ dup_ext->PersistentFileHandle);
+ if (!fp_in) {
+ pr_err("not found file handle in duplicate extent to file\n");
+ ret = -ENOENT;
+ goto out;
+ }
+
+ fp_out = ksmbd_lookup_fd_fast(work, id);
+ if (!fp_out) {
+ pr_err("not found fp\n");
+ ret = -ENOENT;
+ goto dup_ext_out;
+ }
+
+ src_off = le64_to_cpu(dup_ext->SourceFileOffset);
+ dst_off = le64_to_cpu(dup_ext->TargetFileOffset);
+ length = le64_to_cpu(dup_ext->ByteCount);
+ cloned = vfs_clone_file_range(fp_in->filp, src_off, fp_out->filp,
+ dst_off, length, 0);
+ if (cloned == -EXDEV || cloned == -EOPNOTSUPP) {
+ ret = -EOPNOTSUPP;
+ goto dup_ext_out;
+ } else if (cloned != length) {
+ cloned = vfs_copy_file_range(fp_in->filp, src_off,
+ fp_out->filp, dst_off, length, 0);
+ if (cloned != length) {
+ if (cloned < 0)
+ ret = cloned;
+ else
+ ret = -EINVAL;
+ }
+ }
+
+dup_ext_out:
+ ksmbd_fd_put(work, fp_in);
+ ksmbd_fd_put(work, fp_out);
+ if (ret < 0)
+ goto out;
+ break;
+ }
+ default:
+ ksmbd_debug(SMB, "not implemented yet ioctl command 0x%x\n",
+ cnt_code);
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ rsp->CntCode = cpu_to_le32(cnt_code);
+ rsp->InputCount = cpu_to_le32(0);
+ rsp->InputOffset = cpu_to_le32(112);
+ rsp->OutputOffset = cpu_to_le32(112);
+ rsp->OutputCount = cpu_to_le32(nbytes);
+ rsp->StructureSize = cpu_to_le16(49);
+ rsp->Reserved = cpu_to_le16(0);
+ rsp->Flags = cpu_to_le32(0);
+ rsp->Reserved2 = cpu_to_le32(0);
+ inc_rfc1001_len(rsp_org, 48 + nbytes);
+
+ return 0;
+
+out:
+ if (ret == -EACCES)
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ else if (ret == -ENOENT)
+ rsp->hdr.Status = STATUS_OBJECT_NAME_NOT_FOUND;
+ else if (ret == -EOPNOTSUPP)
+ rsp->hdr.Status = STATUS_NOT_SUPPORTED;
+ else if (ret < 0 || rsp->hdr.Status == 0)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ smb2_set_err_rsp(work);
+ return 0;
+}
+
+/**
+ * smb20_oplock_break_ack() - handler for smb2.0 oplock break command
+ * @work: smb work containing oplock break command buffer
+ *
+ * Return: 0
+ */
+static void smb20_oplock_break_ack(struct ksmbd_work *work)
+{
+ struct smb2_oplock_break *req = work->request_buf;
+ struct smb2_oplock_break *rsp = work->response_buf;
+ struct ksmbd_file *fp;
+ struct oplock_info *opinfo = NULL;
+ __le32 err = 0;
+ int ret = 0;
+ u64 volatile_id, persistent_id;
+ char req_oplevel = 0, rsp_oplevel = 0;
+ unsigned int oplock_change_type;
+
+ volatile_id = le64_to_cpu(req->VolatileFid);
+ persistent_id = le64_to_cpu(req->PersistentFid);
+ req_oplevel = req->OplockLevel;
+ ksmbd_debug(OPLOCK, "v_id %llu, p_id %llu request oplock level %d\n",
+ volatile_id, persistent_id, req_oplevel);
+
+ fp = ksmbd_lookup_fd_slow(work, volatile_id, persistent_id);
+ if (!fp) {
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ smb2_set_err_rsp(work);
+ return;
+ }
+
+ opinfo = opinfo_get(fp);
+ if (!opinfo) {
+ pr_err("unexpected null oplock_info\n");
+ rsp->hdr.Status = STATUS_INVALID_OPLOCK_PROTOCOL;
+ smb2_set_err_rsp(work);
+ ksmbd_fd_put(work, fp);
+ return;
+ }
+
+ if (opinfo->level == SMB2_OPLOCK_LEVEL_NONE) {
+ rsp->hdr.Status = STATUS_INVALID_OPLOCK_PROTOCOL;
+ goto err_out;
+ }
+
+ if (opinfo->op_state == OPLOCK_STATE_NONE) {
+ ksmbd_debug(SMB, "unexpected oplock state 0x%x\n", opinfo->op_state);
+ rsp->hdr.Status = STATUS_UNSUCCESSFUL;
+ goto err_out;
+ }
+
+ if ((opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE ||
+ opinfo->level == SMB2_OPLOCK_LEVEL_BATCH) &&
+ (req_oplevel != SMB2_OPLOCK_LEVEL_II &&
+ req_oplevel != SMB2_OPLOCK_LEVEL_NONE)) {
+ err = STATUS_INVALID_OPLOCK_PROTOCOL;
+ oplock_change_type = OPLOCK_WRITE_TO_NONE;
+ } else if (opinfo->level == SMB2_OPLOCK_LEVEL_II &&
+ req_oplevel != SMB2_OPLOCK_LEVEL_NONE) {
+ err = STATUS_INVALID_OPLOCK_PROTOCOL;
+ oplock_change_type = OPLOCK_READ_TO_NONE;
+ } else if (req_oplevel == SMB2_OPLOCK_LEVEL_II ||
+ req_oplevel == SMB2_OPLOCK_LEVEL_NONE) {
+ err = STATUS_INVALID_DEVICE_STATE;
+ if ((opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE ||
+ opinfo->level == SMB2_OPLOCK_LEVEL_BATCH) &&
+ req_oplevel == SMB2_OPLOCK_LEVEL_II) {
+ oplock_change_type = OPLOCK_WRITE_TO_READ;
+ } else if ((opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE ||
+ opinfo->level == SMB2_OPLOCK_LEVEL_BATCH) &&
+ req_oplevel == SMB2_OPLOCK_LEVEL_NONE) {
+ oplock_change_type = OPLOCK_WRITE_TO_NONE;
+ } else if (opinfo->level == SMB2_OPLOCK_LEVEL_II &&
+ req_oplevel == SMB2_OPLOCK_LEVEL_NONE) {
+ oplock_change_type = OPLOCK_READ_TO_NONE;
+ } else {
+ oplock_change_type = 0;
+ }
+ } else {
+ oplock_change_type = 0;
+ }
+
+ switch (oplock_change_type) {
+ case OPLOCK_WRITE_TO_READ:
+ ret = opinfo_write_to_read(opinfo);
+ rsp_oplevel = SMB2_OPLOCK_LEVEL_II;
+ break;
+ case OPLOCK_WRITE_TO_NONE:
+ ret = opinfo_write_to_none(opinfo);
+ rsp_oplevel = SMB2_OPLOCK_LEVEL_NONE;
+ break;
+ case OPLOCK_READ_TO_NONE:
+ ret = opinfo_read_to_none(opinfo);
+ rsp_oplevel = SMB2_OPLOCK_LEVEL_NONE;
+ break;
+ default:
+ pr_err("unknown oplock change 0x%x -> 0x%x\n",
+ opinfo->level, rsp_oplevel);
+ }
+
+ if (ret < 0) {
+ rsp->hdr.Status = err;
+ goto err_out;
+ }
+
+ opinfo_put(opinfo);
+ ksmbd_fd_put(work, fp);
+ opinfo->op_state = OPLOCK_STATE_NONE;
+ wake_up_interruptible_all(&opinfo->oplock_q);
+
+ rsp->StructureSize = cpu_to_le16(24);
+ rsp->OplockLevel = rsp_oplevel;
+ rsp->Reserved = 0;
+ rsp->Reserved2 = 0;
+ rsp->VolatileFid = cpu_to_le64(volatile_id);
+ rsp->PersistentFid = cpu_to_le64(persistent_id);
+ inc_rfc1001_len(rsp, 24);
+ return;
+
+err_out:
+ opinfo->op_state = OPLOCK_STATE_NONE;
+ wake_up_interruptible_all(&opinfo->oplock_q);
+
+ opinfo_put(opinfo);
+ ksmbd_fd_put(work, fp);
+ smb2_set_err_rsp(work);
+}
+
+static int check_lease_state(struct lease *lease, __le32 req_state)
+{
+ if ((lease->new_state ==
+ (SMB2_LEASE_READ_CACHING_LE | SMB2_LEASE_HANDLE_CACHING_LE)) &&
+ !(req_state & SMB2_LEASE_WRITE_CACHING_LE)) {
+ lease->new_state = req_state;
+ return 0;
+ }
+
+ if (lease->new_state == req_state)
+ return 0;
+
+ return 1;
+}
+
+/**
+ * smb21_lease_break_ack() - handler for smb2.1 lease break command
+ * @work: smb work containing lease break command buffer
+ *
+ * Return: 0
+ */
+static void smb21_lease_break_ack(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_lease_ack *req = work->request_buf;
+ struct smb2_lease_ack *rsp = work->response_buf;
+ struct oplock_info *opinfo;
+ __le32 err = 0;
+ int ret = 0;
+ unsigned int lease_change_type;
+ __le32 lease_state;
+ struct lease *lease;
+
+ ksmbd_debug(OPLOCK, "smb21 lease break, lease state(0x%x)\n",
+ le32_to_cpu(req->LeaseState));
+ opinfo = lookup_lease_in_table(conn, req->LeaseKey);
+ if (!opinfo) {
+ ksmbd_debug(OPLOCK, "file not opened\n");
+ smb2_set_err_rsp(work);
+ rsp->hdr.Status = STATUS_UNSUCCESSFUL;
+ return;
+ }
+ lease = opinfo->o_lease;
+
+ if (opinfo->op_state == OPLOCK_STATE_NONE) {
+ pr_err("unexpected lease break state 0x%x\n",
+ opinfo->op_state);
+ rsp->hdr.Status = STATUS_UNSUCCESSFUL;
+ goto err_out;
+ }
+
+ if (check_lease_state(lease, req->LeaseState)) {
+ rsp->hdr.Status = STATUS_REQUEST_NOT_ACCEPTED;
+ ksmbd_debug(OPLOCK,
+ "req lease state: 0x%x, expected state: 0x%x\n",
+ req->LeaseState, lease->new_state);
+ goto err_out;
+ }
+
+ if (!atomic_read(&opinfo->breaking_cnt)) {
+ rsp->hdr.Status = STATUS_UNSUCCESSFUL;
+ goto err_out;
+ }
+
+ /* check for bad lease state */
+ if (req->LeaseState &
+ (~(SMB2_LEASE_READ_CACHING_LE | SMB2_LEASE_HANDLE_CACHING_LE))) {
+ err = STATUS_INVALID_OPLOCK_PROTOCOL;
+ if (lease->state & SMB2_LEASE_WRITE_CACHING_LE)
+ lease_change_type = OPLOCK_WRITE_TO_NONE;
+ else
+ lease_change_type = OPLOCK_READ_TO_NONE;
+ ksmbd_debug(OPLOCK, "handle bad lease state 0x%x -> 0x%x\n",
+ le32_to_cpu(lease->state),
+ le32_to_cpu(req->LeaseState));
+ } else if (lease->state == SMB2_LEASE_READ_CACHING_LE &&
+ req->LeaseState != SMB2_LEASE_NONE_LE) {
+ err = STATUS_INVALID_OPLOCK_PROTOCOL;
+ lease_change_type = OPLOCK_READ_TO_NONE;
+ ksmbd_debug(OPLOCK, "handle bad lease state 0x%x -> 0x%x\n",
+ le32_to_cpu(lease->state),
+ le32_to_cpu(req->LeaseState));
+ } else {
+ /* valid lease state changes */
+ err = STATUS_INVALID_DEVICE_STATE;
+ if (req->LeaseState == SMB2_LEASE_NONE_LE) {
+ if (lease->state & SMB2_LEASE_WRITE_CACHING_LE)
+ lease_change_type = OPLOCK_WRITE_TO_NONE;
+ else
+ lease_change_type = OPLOCK_READ_TO_NONE;
+ } else if (req->LeaseState & SMB2_LEASE_READ_CACHING_LE) {
+ if (lease->state & SMB2_LEASE_WRITE_CACHING_LE)
+ lease_change_type = OPLOCK_WRITE_TO_READ;
+ else
+ lease_change_type = OPLOCK_READ_HANDLE_TO_READ;
+ } else {
+ lease_change_type = 0;
+ }
+ }
+
+ switch (lease_change_type) {
+ case OPLOCK_WRITE_TO_READ:
+ ret = opinfo_write_to_read(opinfo);
+ break;
+ case OPLOCK_READ_HANDLE_TO_READ:
+ ret = opinfo_read_handle_to_read(opinfo);
+ break;
+ case OPLOCK_WRITE_TO_NONE:
+ ret = opinfo_write_to_none(opinfo);
+ break;
+ case OPLOCK_READ_TO_NONE:
+ ret = opinfo_read_to_none(opinfo);
+ break;
+ default:
+ ksmbd_debug(OPLOCK, "unknown lease change 0x%x -> 0x%x\n",
+ le32_to_cpu(lease->state),
+ le32_to_cpu(req->LeaseState));
+ }
+
+ lease_state = lease->state;
+ opinfo->op_state = OPLOCK_STATE_NONE;
+ wake_up_interruptible_all(&opinfo->oplock_q);
+ atomic_dec(&opinfo->breaking_cnt);
+ wake_up_interruptible_all(&opinfo->oplock_brk);
+ opinfo_put(opinfo);
+
+ if (ret < 0) {
+ rsp->hdr.Status = err;
+ goto err_out;
+ }
+
+ rsp->StructureSize = cpu_to_le16(36);
+ rsp->Reserved = 0;
+ rsp->Flags = 0;
+ memcpy(rsp->LeaseKey, req->LeaseKey, 16);
+ rsp->LeaseState = lease_state;
+ rsp->LeaseDuration = 0;
+ inc_rfc1001_len(rsp, 36);
+ return;
+
+err_out:
+ opinfo->op_state = OPLOCK_STATE_NONE;
+ wake_up_interruptible_all(&opinfo->oplock_q);
+ atomic_dec(&opinfo->breaking_cnt);
+ wake_up_interruptible_all(&opinfo->oplock_brk);
+
+ opinfo_put(opinfo);
+ smb2_set_err_rsp(work);
+}
+
+/**
+ * smb2_oplock_break() - dispatcher for smb2.0 and 2.1 oplock/lease break
+ * @work: smb work containing oplock/lease break command buffer
+ *
+ * Return: 0
+ */
+int smb2_oplock_break(struct ksmbd_work *work)
+{
+ struct smb2_oplock_break *req = work->request_buf;
+ struct smb2_oplock_break *rsp = work->response_buf;
+
+ switch (le16_to_cpu(req->StructureSize)) {
+ case OP_BREAK_STRUCT_SIZE_20:
+ smb20_oplock_break_ack(work);
+ break;
+ case OP_BREAK_STRUCT_SIZE_21:
+ smb21_lease_break_ack(work);
+ break;
+ default:
+ ksmbd_debug(OPLOCK, "invalid break cmd %d\n",
+ le16_to_cpu(req->StructureSize));
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ smb2_set_err_rsp(work);
+ }
+
+ return 0;
+}
+
+/**
+ * smb2_notify() - handler for smb2 notify request
+ * @work: smb work containing notify command buffer
+ *
+ * Return: 0
+ */
+int smb2_notify(struct ksmbd_work *work)
+{
+ struct smb2_notify_req *req;
+ struct smb2_notify_rsp *rsp;
+
+ WORK_BUFFERS(work, req, rsp);
+
+ if (work->next_smb2_rcv_hdr_off && req->hdr.NextCommand) {
+ rsp->hdr.Status = STATUS_INTERNAL_ERROR;
+ smb2_set_err_rsp(work);
+ return 0;
+ }
+
+ smb2_set_err_rsp(work);
+ rsp->hdr.Status = STATUS_NOT_IMPLEMENTED;
+ return 0;
+}
+
+/**
+ * smb2_is_sign_req() - handler for checking packet signing status
+ * @work: smb work containing notify command buffer
+ * @command: SMB2 command id
+ *
+ * Return: true if packed is signed, false otherwise
+ */
+bool smb2_is_sign_req(struct ksmbd_work *work, unsigned int command)
+{
+ struct smb2_hdr *rcv_hdr2 = work->request_buf;
+
+ if ((rcv_hdr2->Flags & SMB2_FLAGS_SIGNED) &&
+ command != SMB2_NEGOTIATE_HE &&
+ command != SMB2_SESSION_SETUP_HE &&
+ command != SMB2_OPLOCK_BREAK_HE)
+ return true;
+
+ return false;
+}
+
+/**
+ * smb2_check_sign_req() - handler for req packet sign processing
+ * @work: smb work containing notify command buffer
+ *
+ * Return: 1 on success, 0 otherwise
+ */
+int smb2_check_sign_req(struct ksmbd_work *work)
+{
+ struct smb2_hdr *hdr, *hdr_org;
+ char signature_req[SMB2_SIGNATURE_SIZE];
+ char signature[SMB2_HMACSHA256_SIZE];
+ struct kvec iov[1];
+ size_t len;
+
+ hdr_org = hdr = work->request_buf;
+ if (work->next_smb2_rcv_hdr_off)
+ hdr = ksmbd_req_buf_next(work);
+
+ if (!hdr->NextCommand && !work->next_smb2_rcv_hdr_off)
+ len = be32_to_cpu(hdr_org->smb2_buf_length);
+ else if (hdr->NextCommand)
+ len = le32_to_cpu(hdr->NextCommand);
+ else
+ len = be32_to_cpu(hdr_org->smb2_buf_length) -
+ work->next_smb2_rcv_hdr_off;
+
+ memcpy(signature_req, hdr->Signature, SMB2_SIGNATURE_SIZE);
+ memset(hdr->Signature, 0, SMB2_SIGNATURE_SIZE);
+
+ iov[0].iov_base = (char *)&hdr->ProtocolId;
+ iov[0].iov_len = len;
+
+ if (ksmbd_sign_smb2_pdu(work->conn, work->sess->sess_key, iov, 1,
+ signature))
+ return 0;
+
+ if (memcmp(signature, signature_req, SMB2_SIGNATURE_SIZE)) {
+ pr_err("bad smb2 signature\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+/**
+ * smb2_set_sign_rsp() - handler for rsp packet sign processing
+ * @work: smb work containing notify command buffer
+ *
+ */
+void smb2_set_sign_rsp(struct ksmbd_work *work)
+{
+ struct smb2_hdr *hdr, *hdr_org;
+ struct smb2_hdr *req_hdr;
+ char signature[SMB2_HMACSHA256_SIZE];
+ struct kvec iov[2];
+ size_t len;
+ int n_vec = 1;
+
+ hdr_org = hdr = work->response_buf;
+ if (work->next_smb2_rsp_hdr_off)
+ hdr = ksmbd_resp_buf_next(work);
+
+ req_hdr = ksmbd_req_buf_next(work);
+
+ if (!work->next_smb2_rsp_hdr_off) {
+ len = get_rfc1002_len(hdr_org);
+ if (req_hdr->NextCommand)
+ len = ALIGN(len, 8);
+ } else {
+ len = get_rfc1002_len(hdr_org) - work->next_smb2_rsp_hdr_off;
+ len = ALIGN(len, 8);
+ }
+
+ if (req_hdr->NextCommand)
+ hdr->NextCommand = cpu_to_le32(len);
+
+ hdr->Flags |= SMB2_FLAGS_SIGNED;
+ memset(hdr->Signature, 0, SMB2_SIGNATURE_SIZE);
+
+ iov[0].iov_base = (char *)&hdr->ProtocolId;
+ iov[0].iov_len = len;
+
+ if (work->aux_payload_sz) {
+ iov[0].iov_len -= work->aux_payload_sz;
+
+ iov[1].iov_base = work->aux_payload_buf;
+ iov[1].iov_len = work->aux_payload_sz;
+ n_vec++;
+ }
+
+ if (!ksmbd_sign_smb2_pdu(work->conn, work->sess->sess_key, iov, n_vec,
+ signature))
+ memcpy(hdr->Signature, signature, SMB2_SIGNATURE_SIZE);
+}
+
+/**
+ * smb3_check_sign_req() - handler for req packet sign processing
+ * @work: smb work containing notify command buffer
+ *
+ * Return: 1 on success, 0 otherwise
+ */
+int smb3_check_sign_req(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ char *signing_key;
+ struct smb2_hdr *hdr, *hdr_org;
+ struct channel *chann;
+ char signature_req[SMB2_SIGNATURE_SIZE];
+ char signature[SMB2_CMACAES_SIZE];
+ struct kvec iov[1];
+ size_t len;
+
+ hdr_org = hdr = work->request_buf;
+ if (work->next_smb2_rcv_hdr_off)
+ hdr = ksmbd_req_buf_next(work);
+
+ if (!hdr->NextCommand && !work->next_smb2_rcv_hdr_off)
+ len = be32_to_cpu(hdr_org->smb2_buf_length);
+ else if (hdr->NextCommand)
+ len = le32_to_cpu(hdr->NextCommand);
+ else
+ len = be32_to_cpu(hdr_org->smb2_buf_length) -
+ work->next_smb2_rcv_hdr_off;
+
+ if (le16_to_cpu(hdr->Command) == SMB2_SESSION_SETUP_HE) {
+ signing_key = work->sess->smb3signingkey;
+ } else {
+ chann = lookup_chann_list(work->sess, conn);
+ if (!chann)
+ return 0;
+ signing_key = chann->smb3signingkey;
+ }
+
+ if (!signing_key) {
+ pr_err("SMB3 signing key is not generated\n");
+ return 0;
+ }
+
+ memcpy(signature_req, hdr->Signature, SMB2_SIGNATURE_SIZE);
+ memset(hdr->Signature, 0, SMB2_SIGNATURE_SIZE);
+ iov[0].iov_base = (char *)&hdr->ProtocolId;
+ iov[0].iov_len = len;
+
+ if (ksmbd_sign_smb3_pdu(conn, signing_key, iov, 1, signature))
+ return 0;
+
+ if (memcmp(signature, signature_req, SMB2_SIGNATURE_SIZE)) {
+ pr_err("bad smb2 signature\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+/**
+ * smb3_set_sign_rsp() - handler for rsp packet sign processing
+ * @work: smb work containing notify command buffer
+ *
+ */
+void smb3_set_sign_rsp(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_hdr *req_hdr;
+ struct smb2_hdr *hdr, *hdr_org;
+ struct channel *chann;
+ char signature[SMB2_CMACAES_SIZE];
+ struct kvec iov[2];
+ int n_vec = 1;
+ size_t len;
+ char *signing_key;
+
+ hdr_org = hdr = work->response_buf;
+ if (work->next_smb2_rsp_hdr_off)
+ hdr = ksmbd_resp_buf_next(work);
+
+ req_hdr = ksmbd_req_buf_next(work);
+
+ if (!work->next_smb2_rsp_hdr_off) {
+ len = get_rfc1002_len(hdr_org);
+ if (req_hdr->NextCommand)
+ len = ALIGN(len, 8);
+ } else {
+ len = get_rfc1002_len(hdr_org) - work->next_smb2_rsp_hdr_off;
+ len = ALIGN(len, 8);
+ }
+
+ if (conn->binding == false &&
+ le16_to_cpu(hdr->Command) == SMB2_SESSION_SETUP_HE) {
+ signing_key = work->sess->smb3signingkey;
+ } else {
+ chann = lookup_chann_list(work->sess, work->conn);
+ if (!chann)
+ return;
+ signing_key = chann->smb3signingkey;
+ }
+
+ if (!signing_key)
+ return;
+
+ if (req_hdr->NextCommand)
+ hdr->NextCommand = cpu_to_le32(len);
+
+ hdr->Flags |= SMB2_FLAGS_SIGNED;
+ memset(hdr->Signature, 0, SMB2_SIGNATURE_SIZE);
+ iov[0].iov_base = (char *)&hdr->ProtocolId;
+ iov[0].iov_len = len;
+ if (work->aux_payload_sz) {
+ iov[0].iov_len -= work->aux_payload_sz;
+ iov[1].iov_base = work->aux_payload_buf;
+ iov[1].iov_len = work->aux_payload_sz;
+ n_vec++;
+ }
+
+ if (!ksmbd_sign_smb3_pdu(conn, signing_key, iov, n_vec, signature))
+ memcpy(hdr->Signature, signature, SMB2_SIGNATURE_SIZE);
+}
+
+/**
+ * smb3_preauth_hash_rsp() - handler for computing preauth hash on response
+ * @work: smb work containing response buffer
+ *
+ */
+void smb3_preauth_hash_rsp(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_session *sess = work->sess;
+ struct smb2_hdr *req, *rsp;
+
+ if (conn->dialect != SMB311_PROT_ID)
+ return;
+
+ WORK_BUFFERS(work, req, rsp);
+
+ if (le16_to_cpu(req->Command) == SMB2_NEGOTIATE_HE)
+ ksmbd_gen_preauth_integrity_hash(conn, (char *)rsp,
+ conn->preauth_info->Preauth_HashValue);
+
+ if (le16_to_cpu(rsp->Command) == SMB2_SESSION_SETUP_HE && sess) {
+ __u8 *hash_value;
+
+ if (conn->binding) {
+ struct preauth_session *preauth_sess;
+
+ preauth_sess = ksmbd_preauth_session_lookup(conn, sess->id);
+ if (!preauth_sess)
+ return;
+ hash_value = preauth_sess->Preauth_HashValue;
+ } else {
+ hash_value = sess->Preauth_HashValue;
+ if (!hash_value)
+ return;
+ }
+ ksmbd_gen_preauth_integrity_hash(conn, (char *)rsp,
+ hash_value);
+ }
+}
+
+static void fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, char *old_buf,
+ __le16 cipher_type)
+{
+ struct smb2_hdr *hdr = (struct smb2_hdr *)old_buf;
+ unsigned int orig_len = get_rfc1002_len(old_buf);
+
+ memset(tr_hdr, 0, sizeof(struct smb2_transform_hdr));
+ tr_hdr->ProtocolId = SMB2_TRANSFORM_PROTO_NUM;
+ tr_hdr->OriginalMessageSize = cpu_to_le32(orig_len);
+ tr_hdr->Flags = cpu_to_le16(0x01);
+ if (cipher_type == SMB2_ENCRYPTION_AES128_GCM ||
+ cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+ get_random_bytes(&tr_hdr->Nonce, SMB3_AES_GCM_NONCE);
+ else
+ get_random_bytes(&tr_hdr->Nonce, SMB3_AES_CCM_NONCE);
+ memcpy(&tr_hdr->SessionId, &hdr->SessionId, 8);
+ inc_rfc1001_len(tr_hdr, sizeof(struct smb2_transform_hdr) - 4);
+ inc_rfc1001_len(tr_hdr, orig_len);
+}
+
+int smb3_encrypt_resp(struct ksmbd_work *work)
+{
+ char *buf = work->response_buf;
+ struct smb2_transform_hdr *tr_hdr;
+ struct kvec iov[3];
+ int rc = -ENOMEM;
+ int buf_size = 0, rq_nvec = 2 + (work->aux_payload_sz ? 1 : 0);
+
+ if (ARRAY_SIZE(iov) < rq_nvec)
+ return -ENOMEM;
+
+ tr_hdr = kzalloc(sizeof(struct smb2_transform_hdr), GFP_KERNEL);
+ if (!tr_hdr)
+ return rc;
+
+ /* fill transform header */
+ fill_transform_hdr(tr_hdr, buf, work->conn->cipher_type);
+
+ iov[0].iov_base = tr_hdr;
+ iov[0].iov_len = sizeof(struct smb2_transform_hdr);
+ buf_size += iov[0].iov_len - 4;
+
+ iov[1].iov_base = buf + 4;
+ iov[1].iov_len = get_rfc1002_len(buf);
+ if (work->aux_payload_sz) {
+ iov[1].iov_len = work->resp_hdr_sz - 4;
+
+ iov[2].iov_base = work->aux_payload_buf;
+ iov[2].iov_len = work->aux_payload_sz;
+ buf_size += iov[2].iov_len;
+ }
+ buf_size += iov[1].iov_len;
+ work->resp_hdr_sz = iov[1].iov_len;
+
+ rc = ksmbd_crypt_message(work->conn, iov, rq_nvec, 1);
+ if (rc)
+ return rc;
+
+ memmove(buf, iov[1].iov_base, iov[1].iov_len);
+ tr_hdr->smb2_buf_length = cpu_to_be32(buf_size);
+ work->tr_buf = tr_hdr;
+
+ return rc;
+}
+
+bool smb3_is_transform_hdr(void *buf)
+{
+ struct smb2_transform_hdr *trhdr = buf;
+
+ return trhdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM;
+}
+
+int smb3_decrypt_req(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_session *sess;
+ char *buf = work->request_buf;
+ struct smb2_hdr *hdr;
+ unsigned int pdu_length = get_rfc1002_len(buf);
+ struct kvec iov[2];
+ unsigned int buf_data_size = pdu_length + 4 -
+ sizeof(struct smb2_transform_hdr);
+ struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf;
+ unsigned int orig_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
+ int rc = 0;
+
+ sess = ksmbd_session_lookup_all(conn, le64_to_cpu(tr_hdr->SessionId));
+ if (!sess) {
+ pr_err("invalid session id(%llx) in transform header\n",
+ le64_to_cpu(tr_hdr->SessionId));
+ return -ECONNABORTED;
+ }
+
+ if (pdu_length + 4 <
+ sizeof(struct smb2_transform_hdr) + sizeof(struct smb2_hdr)) {
+ pr_err("Transform message is too small (%u)\n",
+ pdu_length);
+ return -ECONNABORTED;
+ }
+
+ if (pdu_length + 4 < orig_len + sizeof(struct smb2_transform_hdr)) {
+ pr_err("Transform message is broken\n");
+ return -ECONNABORTED;
+ }
+
+ iov[0].iov_base = buf;
+ iov[0].iov_len = sizeof(struct smb2_transform_hdr);
+ iov[1].iov_base = buf + sizeof(struct smb2_transform_hdr);
+ iov[1].iov_len = buf_data_size;
+ rc = ksmbd_crypt_message(conn, iov, 2, 0);
+ if (rc)
+ return rc;
+
+ memmove(buf + 4, iov[1].iov_base, buf_data_size);
+ hdr = (struct smb2_hdr *)buf;
+ hdr->smb2_buf_length = cpu_to_be32(buf_data_size);
+
+ return rc;
+}
+
+bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_hdr *rsp = work->response_buf;
+
+ if (conn->dialect < SMB30_PROT_ID)
+ return false;
+
+ if (work->next_smb2_rcv_hdr_off)
+ rsp = ksmbd_resp_buf_next(work);
+
+ if (le16_to_cpu(rsp->Command) == SMB2_SESSION_SETUP_HE &&
+ rsp->Status == STATUS_SUCCESS)
+ return true;
+ return false;
+}
diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h
new file mode 100644
index 000000000000..bcec845b03f3
--- /dev/null
+++ b/fs/ksmbd/smb2pdu.h
@@ -0,0 +1,1698 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef _SMB2PDU_H
+#define _SMB2PDU_H
+
+#include "ntlmssp.h"
+#include "smbacl.h"
+
+/*
+ * Note that, due to trying to use names similar to the protocol specifications,
+ * there are many mixed case field names in the structures below. Although
+ * this does not match typical Linux kernel style, it is necessary to be
+ * able to match against the protocol specfication.
+ *
+ * SMB2 commands
+ * Some commands have minimal (wct=0,bcc=0), or uninteresting, responses
+ * (ie no useful data other than the SMB error code itself) and are marked such.
+ * Knowing this helps avoid response buffer allocations and copy in some cases.
+ */
+
+/* List of commands in host endian */
+#define SMB2_NEGOTIATE_HE 0x0000
+#define SMB2_SESSION_SETUP_HE 0x0001
+#define SMB2_LOGOFF_HE 0x0002 /* trivial request/resp */
+#define SMB2_TREE_CONNECT_HE 0x0003
+#define SMB2_TREE_DISCONNECT_HE 0x0004 /* trivial req/resp */
+#define SMB2_CREATE_HE 0x0005
+#define SMB2_CLOSE_HE 0x0006
+#define SMB2_FLUSH_HE 0x0007 /* trivial resp */
+#define SMB2_READ_HE 0x0008
+#define SMB2_WRITE_HE 0x0009
+#define SMB2_LOCK_HE 0x000A
+#define SMB2_IOCTL_HE 0x000B
+#define SMB2_CANCEL_HE 0x000C
+#define SMB2_ECHO_HE 0x000D
+#define SMB2_QUERY_DIRECTORY_HE 0x000E
+#define SMB2_CHANGE_NOTIFY_HE 0x000F
+#define SMB2_QUERY_INFO_HE 0x0010
+#define SMB2_SET_INFO_HE 0x0011
+#define SMB2_OPLOCK_BREAK_HE 0x0012
+
+/* The same list in little endian */
+#define SMB2_NEGOTIATE cpu_to_le16(SMB2_NEGOTIATE_HE)
+#define SMB2_SESSION_SETUP cpu_to_le16(SMB2_SESSION_SETUP_HE)
+#define SMB2_LOGOFF cpu_to_le16(SMB2_LOGOFF_HE)
+#define SMB2_TREE_CONNECT cpu_to_le16(SMB2_TREE_CONNECT_HE)
+#define SMB2_TREE_DISCONNECT cpu_to_le16(SMB2_TREE_DISCONNECT_HE)
+#define SMB2_CREATE cpu_to_le16(SMB2_CREATE_HE)
+#define SMB2_CLOSE cpu_to_le16(SMB2_CLOSE_HE)
+#define SMB2_FLUSH cpu_to_le16(SMB2_FLUSH_HE)
+#define SMB2_READ cpu_to_le16(SMB2_READ_HE)
+#define SMB2_WRITE cpu_to_le16(SMB2_WRITE_HE)
+#define SMB2_LOCK cpu_to_le16(SMB2_LOCK_HE)
+#define SMB2_IOCTL cpu_to_le16(SMB2_IOCTL_HE)
+#define SMB2_CANCEL cpu_to_le16(SMB2_CANCEL_HE)
+#define SMB2_ECHO cpu_to_le16(SMB2_ECHO_HE)
+#define SMB2_QUERY_DIRECTORY cpu_to_le16(SMB2_QUERY_DIRECTORY_HE)
+#define SMB2_CHANGE_NOTIFY cpu_to_le16(SMB2_CHANGE_NOTIFY_HE)
+#define SMB2_QUERY_INFO cpu_to_le16(SMB2_QUERY_INFO_HE)
+#define SMB2_SET_INFO cpu_to_le16(SMB2_SET_INFO_HE)
+#define SMB2_OPLOCK_BREAK cpu_to_le16(SMB2_OPLOCK_BREAK_HE)
+
+/*Create Action Flags*/
+#define FILE_SUPERSEDED 0x00000000
+#define FILE_OPENED 0x00000001
+#define FILE_CREATED 0x00000002
+#define FILE_OVERWRITTEN 0x00000003
+
+/*
+ * Size of the session key (crypto key encrypted with the password
+ */
+#define SMB2_NTLMV2_SESSKEY_SIZE 16
+#define SMB2_SIGNATURE_SIZE 16
+#define SMB2_HMACSHA256_SIZE 32
+#define SMB2_CMACAES_SIZE 16
+#define SMB3_GCM128_CRYPTKEY_SIZE 16
+#define SMB3_GCM256_CRYPTKEY_SIZE 32
+
+/*
+ * Size of the smb3 encryption/decryption keys
+ */
+#define SMB3_ENC_DEC_KEY_SIZE 32
+
+/*
+ * Size of the smb3 signing key
+ */
+#define SMB3_SIGN_KEY_SIZE 16
+
+#define CIFS_CLIENT_CHALLENGE_SIZE 8
+#define SMB_SERVER_CHALLENGE_SIZE 8
+
+/* SMB2 Max Credits */
+#define SMB2_MAX_CREDITS 8192
+
+#define SMB2_CLIENT_GUID_SIZE 16
+#define SMB2_CREATE_GUID_SIZE 16
+
+/* Maximum buffer size value we can send with 1 credit */
+#define SMB2_MAX_BUFFER_SIZE 65536
+
+#define NUMBER_OF_SMB2_COMMANDS 0x0013
+
+/* BB FIXME - analyze following length BB */
+#define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
+
+#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) /* 'B''M''S' */
+#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
+
+#define SMB21_DEFAULT_IOSIZE (1024 * 1024)
+#define SMB3_DEFAULT_IOSIZE (4 * 1024 * 1024)
+#define SMB3_DEFAULT_TRANS_SIZE (1024 * 1024)
+
+/*
+ * SMB2 Header Definition
+ *
+ * "MBZ" : Must be Zero
+ * "BB" : BugBug, Something to check/review/analyze later
+ * "PDU" : "Protocol Data Unit" (ie a network "frame")
+ *
+ */
+
+#define __SMB2_HEADER_STRUCTURE_SIZE 64
+#define SMB2_HEADER_STRUCTURE_SIZE \
+ cpu_to_le16(__SMB2_HEADER_STRUCTURE_SIZE)
+
+struct smb2_hdr {
+ __be32 smb2_buf_length; /* big endian on wire */
+ /*
+ * length is only two or three bytes - with
+ * one or two byte type preceding it that MBZ
+ */
+ __le32 ProtocolId; /* 0xFE 'S' 'M' 'B' */
+ __le16 StructureSize; /* 64 */
+ __le16 CreditCharge; /* MBZ */
+ __le32 Status; /* Error from server */
+ __le16 Command;
+ __le16 CreditRequest; /* CreditResponse */
+ __le32 Flags;
+ __le32 NextCommand;
+ __le64 MessageId;
+ union {
+ struct {
+ __le32 ProcessId;
+ __le32 TreeId;
+ } __packed SyncId;
+ __le64 AsyncId;
+ } __packed Id;
+ __le64 SessionId;
+ __u8 Signature[16];
+} __packed;
+
+struct smb2_pdu {
+ struct smb2_hdr hdr;
+ __le16 StructureSize2; /* size of wct area (varies, request specific) */
+} __packed;
+
+#define SMB3_AES_CCM_NONCE 11
+#define SMB3_AES_GCM_NONCE 12
+
+struct smb2_transform_hdr {
+ __be32 smb2_buf_length; /* big endian on wire */
+ /*
+ * length is only two or three bytes - with
+ * one or two byte type preceding it that MBZ
+ */
+ __le32 ProtocolId; /* 0xFD 'S' 'M' 'B' */
+ __u8 Signature[16];
+ __u8 Nonce[16];
+ __le32 OriginalMessageSize;
+ __u16 Reserved1;
+ __le16 Flags; /* EncryptionAlgorithm */
+ __le64 SessionId;
+} __packed;
+
+/*
+ * SMB2 flag definitions
+ */
+#define SMB2_FLAGS_SERVER_TO_REDIR cpu_to_le32(0x00000001)
+#define SMB2_FLAGS_ASYNC_COMMAND cpu_to_le32(0x00000002)
+#define SMB2_FLAGS_RELATED_OPERATIONS cpu_to_le32(0x00000004)
+#define SMB2_FLAGS_SIGNED cpu_to_le32(0x00000008)
+#define SMB2_FLAGS_DFS_OPERATIONS cpu_to_le32(0x10000000)
+#define SMB2_FLAGS_REPLAY_OPERATIONS cpu_to_le32(0x20000000)
+
+/*
+ * Definitions for SMB2 Protocol Data Units (network frames)
+ *
+ * See MS-SMB2.PDF specification for protocol details.
+ * The Naming convention is the lower case version of the SMB2
+ * command code name for the struct. Note that structures must be packed.
+ *
+ */
+
+#define SMB2_ERROR_STRUCTURE_SIZE2 9
+#define SMB2_ERROR_STRUCTURE_SIZE2_LE cpu_to_le16(SMB2_ERROR_STRUCTURE_SIZE2)
+
+struct smb2_err_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize;
+ __u8 ErrorContextCount;
+ __u8 Reserved;
+ __le32 ByteCount; /* even if zero, at least one byte follows */
+ __u8 ErrorData[1]; /* variable length */
+} __packed;
+
+struct smb2_negotiate_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 36 */
+ __le16 DialectCount;
+ __le16 SecurityMode;
+ __le16 Reserved; /* MBZ */
+ __le32 Capabilities;
+ __u8 ClientGUID[SMB2_CLIENT_GUID_SIZE];
+ /* In SMB3.02 and earlier next three were MBZ le64 ClientStartTime */
+ __le32 NegotiateContextOffset; /* SMB3.1.1 only. MBZ earlier */
+ __le16 NegotiateContextCount; /* SMB3.1.1 only. MBZ earlier */
+ __le16 Reserved2;
+ __le16 Dialects[1]; /* One dialect (vers=) at a time for now */
+} __packed;
+
+/* SecurityMode flags */
+#define SMB2_NEGOTIATE_SIGNING_ENABLED_LE cpu_to_le16(0x0001)
+#define SMB2_NEGOTIATE_SIGNING_REQUIRED 0x0002
+#define SMB2_NEGOTIATE_SIGNING_REQUIRED_LE cpu_to_le16(0x0002)
+/* Capabilities flags */
+#define SMB2_GLOBAL_CAP_DFS 0x00000001
+#define SMB2_GLOBAL_CAP_LEASING 0x00000002 /* Resp only New to SMB2.1 */
+#define SMB2_GLOBAL_CAP_LARGE_MTU 0X00000004 /* Resp only New to SMB2.1 */
+#define SMB2_GLOBAL_CAP_MULTI_CHANNEL 0x00000008 /* New to SMB3 */
+#define SMB2_GLOBAL_CAP_PERSISTENT_HANDLES 0x00000010 /* New to SMB3 */
+#define SMB2_GLOBAL_CAP_DIRECTORY_LEASING 0x00000020 /* New to SMB3 */
+#define SMB2_GLOBAL_CAP_ENCRYPTION 0x00000040 /* New to SMB3 */
+/* Internal types */
+#define SMB2_NT_FIND 0x00100000
+#define SMB2_LARGE_FILES 0x00200000
+
+#define SMB311_SALT_SIZE 32
+/* Hash Algorithm Types */
+#define SMB2_PREAUTH_INTEGRITY_SHA512 cpu_to_le16(0x0001)
+
+#define PREAUTH_HASHVALUE_SIZE 64
+
+struct preauth_integrity_info {
+ /* PreAuth integrity Hash ID */
+ __le16 Preauth_HashId;
+ /* PreAuth integrity Hash Value */
+ __u8 Preauth_HashValue[PREAUTH_HASHVALUE_SIZE];
+};
+
+/* offset is sizeof smb2_negotiate_rsp - 4 but rounded up to 8 bytes. */
+#ifdef CONFIG_SMB_SERVER_KERBEROS5
+/* sizeof(struct smb2_negotiate_rsp) - 4 =
+ * header(64) + response(64) + GSS_LENGTH(96) + GSS_PADDING(0)
+ */
+#define OFFSET_OF_NEG_CONTEXT 0xe0
+#else
+/* sizeof(struct smb2_negotiate_rsp) - 4 =
+ * header(64) + response(64) + GSS_LENGTH(74) + GSS_PADDING(6)
+ */
+#define OFFSET_OF_NEG_CONTEXT 0xd0
+#endif
+
+#define SMB2_PREAUTH_INTEGRITY_CAPABILITIES cpu_to_le16(1)
+#define SMB2_ENCRYPTION_CAPABILITIES cpu_to_le16(2)
+#define SMB2_COMPRESSION_CAPABILITIES cpu_to_le16(3)
+#define SMB2_NETNAME_NEGOTIATE_CONTEXT_ID cpu_to_le16(5)
+#define SMB2_SIGNING_CAPABILITIES cpu_to_le16(8)
+#define SMB2_POSIX_EXTENSIONS_AVAILABLE cpu_to_le16(0x100)
+
+struct smb2_neg_context {
+ __le16 ContextType;
+ __le16 DataLength;
+ __le32 Reserved;
+ /* Followed by array of data */
+} __packed;
+
+struct smb2_preauth_neg_context {
+ __le16 ContextType; /* 1 */
+ __le16 DataLength;
+ __le32 Reserved;
+ __le16 HashAlgorithmCount; /* 1 */
+ __le16 SaltLength;
+ __le16 HashAlgorithms; /* HashAlgorithms[0] since only one defined */
+ __u8 Salt[SMB311_SALT_SIZE];
+} __packed;
+
+/* Encryption Algorithms Ciphers */
+#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001)
+#define SMB2_ENCRYPTION_AES128_GCM cpu_to_le16(0x0002)
+#define SMB2_ENCRYPTION_AES256_CCM cpu_to_le16(0x0003)
+#define SMB2_ENCRYPTION_AES256_GCM cpu_to_le16(0x0004)
+
+struct smb2_encryption_neg_context {
+ __le16 ContextType; /* 2 */
+ __le16 DataLength;
+ __le32 Reserved;
+ /* CipherCount usally 2, but can be 3 when AES256-GCM enabled */
+ __le16 CipherCount; /* AES-128-GCM and AES-128-CCM by default */
+ __le16 Ciphers[];
+} __packed;
+
+#define SMB3_COMPRESS_NONE cpu_to_le16(0x0000)
+#define SMB3_COMPRESS_LZNT1 cpu_to_le16(0x0001)
+#define SMB3_COMPRESS_LZ77 cpu_to_le16(0x0002)
+#define SMB3_COMPRESS_LZ77_HUFF cpu_to_le16(0x0003)
+
+struct smb2_compression_ctx {
+ __le16 ContextType; /* 3 */
+ __le16 DataLength;
+ __le32 Reserved;
+ __le16 CompressionAlgorithmCount;
+ __u16 Padding;
+ __le32 Reserved1;
+ __le16 CompressionAlgorithms[];
+} __packed;
+
+#define POSIX_CTXT_DATA_LEN 16
+struct smb2_posix_neg_context {
+ __le16 ContextType; /* 0x100 */
+ __le16 DataLength;
+ __le32 Reserved;
+ __u8 Name[16]; /* POSIX ctxt GUID 93AD25509CB411E7B42383DE968BCD7C */
+} __packed;
+
+struct smb2_netname_neg_context {
+ __le16 ContextType; /* 0x100 */
+ __le16 DataLength;
+ __le32 Reserved;
+ __le16 NetName[]; /* hostname of target converted to UCS-2 */
+} __packed;
+
+/* Signing algorithms */
+#define SIGNING_ALG_HMAC_SHA256 cpu_to_le16(0)
+#define SIGNING_ALG_AES_CMAC cpu_to_le16(1)
+#define SIGNING_ALG_AES_GMAC cpu_to_le16(2)
+
+struct smb2_signing_capabilities {
+ __le16 ContextType; /* 8 */
+ __le16 DataLength;
+ __le32 Reserved;
+ __le16 SigningAlgorithmCount;
+ __le16 SigningAlgorithms[];
+} __packed;
+
+struct smb2_negotiate_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 65 */
+ __le16 SecurityMode;
+ __le16 DialectRevision;
+ __le16 NegotiateContextCount; /* Prior to SMB3.1.1 was Reserved & MBZ */
+ __u8 ServerGUID[16];
+ __le32 Capabilities;
+ __le32 MaxTransactSize;
+ __le32 MaxReadSize;
+ __le32 MaxWriteSize;
+ __le64 SystemTime; /* MBZ */
+ __le64 ServerStartTime;
+ __le16 SecurityBufferOffset;
+ __le16 SecurityBufferLength;
+ __le32 NegotiateContextOffset; /* Pre:SMB3.1.1 was reserved/ignored */
+ __u8 Buffer[1]; /* variable length GSS security buffer */
+} __packed;
+
+/* Flags */
+#define SMB2_SESSION_REQ_FLAG_BINDING 0x01
+#define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA 0x04
+
+#define SMB2_SESSION_EXPIRED (0)
+#define SMB2_SESSION_IN_PROGRESS BIT(0)
+#define SMB2_SESSION_VALID BIT(1)
+
+/* Flags */
+#define SMB2_SESSION_REQ_FLAG_BINDING 0x01
+#define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA 0x04
+
+struct smb2_sess_setup_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 25 */
+ __u8 Flags;
+ __u8 SecurityMode;
+ __le32 Capabilities;
+ __le32 Channel;
+ __le16 SecurityBufferOffset;
+ __le16 SecurityBufferLength;
+ __le64 PreviousSessionId;
+ __u8 Buffer[1]; /* variable length GSS security buffer */
+} __packed;
+
+/* Flags/Reserved for SMB3.1.1 */
+#define SMB2_SHAREFLAG_CLUSTER_RECONNECT 0x0001
+
+/* Currently defined SessionFlags */
+#define SMB2_SESSION_FLAG_IS_GUEST_LE cpu_to_le16(0x0001)
+#define SMB2_SESSION_FLAG_IS_NULL_LE cpu_to_le16(0x0002)
+#define SMB2_SESSION_FLAG_ENCRYPT_DATA_LE cpu_to_le16(0x0004)
+struct smb2_sess_setup_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 9 */
+ __le16 SessionFlags;
+ __le16 SecurityBufferOffset;
+ __le16 SecurityBufferLength;
+ __u8 Buffer[1]; /* variable length GSS security buffer */
+} __packed;
+
+struct smb2_logoff_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 4 */
+ __le16 Reserved;
+} __packed;
+
+struct smb2_logoff_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 4 */
+ __le16 Reserved;
+} __packed;
+
+struct smb2_tree_connect_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 9 */
+ __le16 Reserved; /* Flags in SMB3.1.1 */
+ __le16 PathOffset;
+ __le16 PathLength;
+ __u8 Buffer[1]; /* variable length */
+} __packed;
+
+struct smb2_tree_connect_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 16 */
+ __u8 ShareType; /* see below */
+ __u8 Reserved;
+ __le32 ShareFlags; /* see below */
+ __le32 Capabilities; /* see below */
+ __le32 MaximalAccess;
+} __packed;
+
+/* Possible ShareType values */
+#define SMB2_SHARE_TYPE_DISK 0x01
+#define SMB2_SHARE_TYPE_PIPE 0x02
+#define SMB2_SHARE_TYPE_PRINT 0x03
+
+/*
+ * Possible ShareFlags - exactly one and only one of the first 4 caching flags
+ * must be set (any of the remaining, SHI1005, flags may be set individually
+ * or in combination.
+ */
+#define SMB2_SHAREFLAG_MANUAL_CACHING 0x00000000
+#define SMB2_SHAREFLAG_AUTO_CACHING 0x00000010
+#define SMB2_SHAREFLAG_VDO_CACHING 0x00000020
+#define SMB2_SHAREFLAG_NO_CACHING 0x00000030
+#define SHI1005_FLAGS_DFS 0x00000001
+#define SHI1005_FLAGS_DFS_ROOT 0x00000002
+#define SHI1005_FLAGS_RESTRICT_EXCLUSIVE_OPENS 0x00000100
+#define SHI1005_FLAGS_FORCE_SHARED_DELETE 0x00000200
+#define SHI1005_FLAGS_ALLOW_NAMESPACE_CACHING 0x00000400
+#define SHI1005_FLAGS_ACCESS_BASED_DIRECTORY_ENUM 0x00000800
+#define SHI1005_FLAGS_FORCE_LEVELII_OPLOCK 0x00001000
+#define SHI1005_FLAGS_ENABLE_HASH 0x00002000
+
+/* Possible share capabilities */
+#define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008)
+
+struct smb2_tree_disconnect_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 4 */
+ __le16 Reserved;
+} __packed;
+
+struct smb2_tree_disconnect_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 4 */
+ __le16 Reserved;
+} __packed;
+
+#define ATTR_READONLY_LE cpu_to_le32(ATTR_READONLY)
+#define ATTR_HIDDEN_LE cpu_to_le32(ATTR_HIDDEN)
+#define ATTR_SYSTEM_LE cpu_to_le32(ATTR_SYSTEM)
+#define ATTR_DIRECTORY_LE cpu_to_le32(ATTR_DIRECTORY)
+#define ATTR_ARCHIVE_LE cpu_to_le32(ATTR_ARCHIVE)
+#define ATTR_NORMAL_LE cpu_to_le32(ATTR_NORMAL)
+#define ATTR_TEMPORARY_LE cpu_to_le32(ATTR_TEMPORARY)
+#define ATTR_SPARSE_FILE_LE cpu_to_le32(ATTR_SPARSE)
+#define ATTR_REPARSE_POINT_LE cpu_to_le32(ATTR_REPARSE)
+#define ATTR_COMPRESSED_LE cpu_to_le32(ATTR_COMPRESSED)
+#define ATTR_OFFLINE_LE cpu_to_le32(ATTR_OFFLINE)
+#define ATTR_NOT_CONTENT_INDEXED_LE cpu_to_le32(ATTR_NOT_CONTENT_INDEXED)
+#define ATTR_ENCRYPTED_LE cpu_to_le32(ATTR_ENCRYPTED)
+#define ATTR_INTEGRITY_STREAML_LE cpu_to_le32(0x00008000)
+#define ATTR_NO_SCRUB_DATA_LE cpu_to_le32(0x00020000)
+#define ATTR_MASK_LE cpu_to_le32(0x00007FB7)
+
+/* Oplock levels */
+#define SMB2_OPLOCK_LEVEL_NONE 0x00
+#define SMB2_OPLOCK_LEVEL_II 0x01
+#define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08
+#define SMB2_OPLOCK_LEVEL_BATCH 0x09
+#define SMB2_OPLOCK_LEVEL_LEASE 0xFF
+/* Non-spec internal type */
+#define SMB2_OPLOCK_LEVEL_NOCHANGE 0x99
+
+/* Desired Access Flags */
+#define FILE_READ_DATA_LE cpu_to_le32(0x00000001)
+#define FILE_LIST_DIRECTORY_LE cpu_to_le32(0x00000001)
+#define FILE_WRITE_DATA_LE cpu_to_le32(0x00000002)
+#define FILE_ADD_FILE_LE cpu_to_le32(0x00000002)
+#define FILE_APPEND_DATA_LE cpu_to_le32(0x00000004)
+#define FILE_ADD_SUBDIRECTORY_LE cpu_to_le32(0x00000004)
+#define FILE_READ_EA_LE cpu_to_le32(0x00000008)
+#define FILE_WRITE_EA_LE cpu_to_le32(0x00000010)
+#define FILE_EXECUTE_LE cpu_to_le32(0x00000020)
+#define FILE_TRAVERSE_LE cpu_to_le32(0x00000020)
+#define FILE_DELETE_CHILD_LE cpu_to_le32(0x00000040)
+#define FILE_READ_ATTRIBUTES_LE cpu_to_le32(0x00000080)
+#define FILE_WRITE_ATTRIBUTES_LE cpu_to_le32(0x00000100)
+#define FILE_DELETE_LE cpu_to_le32(0x00010000)
+#define FILE_READ_CONTROL_LE cpu_to_le32(0x00020000)
+#define FILE_WRITE_DAC_LE cpu_to_le32(0x00040000)
+#define FILE_WRITE_OWNER_LE cpu_to_le32(0x00080000)
+#define FILE_SYNCHRONIZE_LE cpu_to_le32(0x00100000)
+#define FILE_ACCESS_SYSTEM_SECURITY_LE cpu_to_le32(0x01000000)
+#define FILE_MAXIMAL_ACCESS_LE cpu_to_le32(0x02000000)
+#define FILE_GENERIC_ALL_LE cpu_to_le32(0x10000000)
+#define FILE_GENERIC_EXECUTE_LE cpu_to_le32(0x20000000)
+#define FILE_GENERIC_WRITE_LE cpu_to_le32(0x40000000)
+#define FILE_GENERIC_READ_LE cpu_to_le32(0x80000000)
+#define DESIRED_ACCESS_MASK cpu_to_le32(0xF21F01FF)
+
+/* ShareAccess Flags */
+#define FILE_SHARE_READ_LE cpu_to_le32(0x00000001)
+#define FILE_SHARE_WRITE_LE cpu_to_le32(0x00000002)
+#define FILE_SHARE_DELETE_LE cpu_to_le32(0x00000004)
+#define FILE_SHARE_ALL_LE cpu_to_le32(0x00000007)
+
+/* CreateDisposition Flags */
+#define FILE_SUPERSEDE_LE cpu_to_le32(0x00000000)
+#define FILE_OPEN_LE cpu_to_le32(0x00000001)
+#define FILE_CREATE_LE cpu_to_le32(0x00000002)
+#define FILE_OPEN_IF_LE cpu_to_le32(0x00000003)
+#define FILE_OVERWRITE_LE cpu_to_le32(0x00000004)
+#define FILE_OVERWRITE_IF_LE cpu_to_le32(0x00000005)
+#define FILE_CREATE_MASK_LE cpu_to_le32(0x00000007)
+
+#define FILE_READ_DESIRED_ACCESS_LE (FILE_READ_DATA_LE | \
+ FILE_READ_EA_LE | \
+ FILE_GENERIC_READ_LE)
+#define FILE_WRITE_DESIRE_ACCESS_LE (FILE_WRITE_DATA_LE | \
+ FILE_APPEND_DATA_LE | \
+ FILE_WRITE_EA_LE | \
+ FILE_WRITE_ATTRIBUTES_LE | \
+ FILE_GENERIC_WRITE_LE)
+
+/* Impersonation Levels */
+#define IL_ANONYMOUS_LE cpu_to_le32(0x00000000)
+#define IL_IDENTIFICATION_LE cpu_to_le32(0x00000001)
+#define IL_IMPERSONATION_LE cpu_to_le32(0x00000002)
+#define IL_DELEGATE_LE cpu_to_le32(0x00000003)
+
+/* Create Context Values */
+#define SMB2_CREATE_EA_BUFFER "ExtA" /* extended attributes */
+#define SMB2_CREATE_SD_BUFFER "SecD" /* security descriptor */
+#define SMB2_CREATE_DURABLE_HANDLE_REQUEST "DHnQ"
+#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT "DHnC"
+#define SMB2_CREATE_ALLOCATION_SIZE "AlSi"
+#define SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST "MxAc"
+#define SMB2_CREATE_TIMEWARP_REQUEST "TWrp"
+#define SMB2_CREATE_QUERY_ON_DISK_ID "QFid"
+#define SMB2_CREATE_REQUEST_LEASE "RqLs"
+#define SMB2_CREATE_DURABLE_HANDLE_REQUEST_V2 "DH2Q"
+#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT_V2 "DH2C"
+#define SMB2_CREATE_APP_INSTANCE_ID "\x45\xBC\xA6\x6A\xEF\xA7\xF7\x4A\x90\x08\xFA\x46\x2E\x14\x4D\x74"
+ #define SMB2_CREATE_APP_INSTANCE_VERSION "\xB9\x82\xD0\xB7\x3B\x56\x07\x4F\xA0\x7B\x52\x4A\x81\x16\xA0\x10"
+#define SVHDX_OPEN_DEVICE_CONTEXT 0x83CE6F1AD851E0986E34401CC9BCFCE9
+#define SMB2_CREATE_TAG_POSIX "\x93\xAD\x25\x50\x9C\xB4\x11\xE7\xB4\x23\x83\xDE\x96\x8B\xCD\x7C"
+
+struct smb2_create_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 57 */
+ __u8 SecurityFlags;
+ __u8 RequestedOplockLevel;
+ __le32 ImpersonationLevel;
+ __le64 SmbCreateFlags;
+ __le64 Reserved;
+ __le32 DesiredAccess;
+ __le32 FileAttributes;
+ __le32 ShareAccess;
+ __le32 CreateDisposition;
+ __le32 CreateOptions;
+ __le16 NameOffset;
+ __le16 NameLength;
+ __le32 CreateContextsOffset;
+ __le32 CreateContextsLength;
+ __u8 Buffer[0];
+} __packed;
+
+struct smb2_create_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 89 */
+ __u8 OplockLevel;
+ __u8 Reserved;
+ __le32 CreateAction;
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 AllocationSize;
+ __le64 EndofFile;
+ __le32 FileAttributes;
+ __le32 Reserved2;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __le32 CreateContextsOffset;
+ __le32 CreateContextsLength;
+ __u8 Buffer[1];
+} __packed;
+
+struct create_context {
+ __le32 Next;
+ __le16 NameOffset;
+ __le16 NameLength;
+ __le16 Reserved;
+ __le16 DataOffset;
+ __le32 DataLength;
+ __u8 Buffer[0];
+} __packed;
+
+struct create_durable_req_v2 {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __le32 Timeout;
+ __le32 Flags;
+ __u8 Reserved[8];
+ __u8 CreateGuid[16];
+} __packed;
+
+struct create_durable_reconn_req {
+ struct create_context ccontext;
+ __u8 Name[8];
+ union {
+ __u8 Reserved[16];
+ struct {
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ } Fid;
+ } Data;
+} __packed;
+
+struct create_durable_reconn_v2_req {
+ struct create_context ccontext;
+ __u8 Name[8];
+ struct {
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ } Fid;
+ __u8 CreateGuid[16];
+ __le32 Flags;
+} __packed;
+
+struct create_app_inst_id {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __u8 Reserved[8];
+ __u8 AppInstanceId[16];
+} __packed;
+
+struct create_app_inst_id_vers {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __u8 Reserved[2];
+ __u8 Padding[4];
+ __le64 AppInstanceVersionHigh;
+ __le64 AppInstanceVersionLow;
+} __packed;
+
+struct create_mxac_req {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __le64 Timestamp;
+} __packed;
+
+struct create_alloc_size_req {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __le64 AllocationSize;
+} __packed;
+
+struct create_posix {
+ struct create_context ccontext;
+ __u8 Name[16];
+ __le32 Mode;
+ __u32 Reserved;
+} __packed;
+
+struct create_durable_rsp {
+ struct create_context ccontext;
+ __u8 Name[8];
+ union {
+ __u8 Reserved[8];
+ __u64 data;
+ } Data;
+} __packed;
+
+struct create_durable_v2_rsp {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __le32 Timeout;
+ __le32 Flags;
+} __packed;
+
+struct create_mxac_rsp {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __le32 QueryStatus;
+ __le32 MaximalAccess;
+} __packed;
+
+struct create_disk_id_rsp {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __le64 DiskFileId;
+ __le64 VolumeId;
+ __u8 Reserved[16];
+} __packed;
+
+/* equivalent of the contents of SMB3.1.1 POSIX open context response */
+struct create_posix_rsp {
+ struct create_context ccontext;
+ __u8 Name[16];
+ __le32 nlink;
+ __le32 reparse_tag;
+ __le32 mode;
+ u8 SidBuffer[40];
+} __packed;
+
+#define SMB2_LEASE_NONE_LE cpu_to_le32(0x00)
+#define SMB2_LEASE_READ_CACHING_LE cpu_to_le32(0x01)
+#define SMB2_LEASE_HANDLE_CACHING_LE cpu_to_le32(0x02)
+#define SMB2_LEASE_WRITE_CACHING_LE cpu_to_le32(0x04)
+
+#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS_LE cpu_to_le32(0x02)
+
+struct lease_context {
+ __le64 LeaseKeyLow;
+ __le64 LeaseKeyHigh;
+ __le32 LeaseState;
+ __le32 LeaseFlags;
+ __le64 LeaseDuration;
+} __packed;
+
+struct lease_context_v2 {
+ __le64 LeaseKeyLow;
+ __le64 LeaseKeyHigh;
+ __le32 LeaseState;
+ __le32 LeaseFlags;
+ __le64 LeaseDuration;
+ __le64 ParentLeaseKeyLow;
+ __le64 ParentLeaseKeyHigh;
+ __le16 Epoch;
+ __le16 Reserved;
+} __packed;
+
+struct create_lease {
+ struct create_context ccontext;
+ __u8 Name[8];
+ struct lease_context lcontext;
+} __packed;
+
+struct create_lease_v2 {
+ struct create_context ccontext;
+ __u8 Name[8];
+ struct lease_context_v2 lcontext;
+ __u8 Pad[4];
+} __packed;
+
+/* Currently defined values for close flags */
+#define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001)
+struct smb2_close_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 24 */
+ __le16 Flags;
+ __le32 Reserved;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+} __packed;
+
+struct smb2_close_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* 60 */
+ __le16 Flags;
+ __le32 Reserved;
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 AllocationSize; /* Beginning of FILE_STANDARD_INFO equivalent */
+ __le64 EndOfFile;
+ __le32 Attributes;
+} __packed;
+
+struct smb2_flush_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 24 */
+ __le16 Reserved1;
+ __le32 Reserved2;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+} __packed;
+
+struct smb2_flush_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize;
+ __le16 Reserved;
+} __packed;
+
+struct smb2_buffer_desc_v1 {
+ __le64 offset;
+ __le32 token;
+ __le32 length;
+} __packed;
+
+#define SMB2_CHANNEL_NONE cpu_to_le32(0x00000000)
+#define SMB2_CHANNEL_RDMA_V1 cpu_to_le32(0x00000001)
+#define SMB2_CHANNEL_RDMA_V1_INVALIDATE cpu_to_le32(0x00000002)
+
+struct smb2_read_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 49 */
+ __u8 Padding; /* offset from start of SMB2 header to place read */
+ __u8 Reserved;
+ __le32 Length;
+ __le64 Offset;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __le32 MinimumCount;
+ __le32 Channel; /* Reserved MBZ */
+ __le32 RemainingBytes;
+ __le16 ReadChannelInfoOffset; /* Reserved MBZ */
+ __le16 ReadChannelInfoLength; /* Reserved MBZ */
+ __u8 Buffer[1];
+} __packed;
+
+struct smb2_read_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 17 */
+ __u8 DataOffset;
+ __u8 Reserved;
+ __le32 DataLength;
+ __le32 DataRemaining;
+ __u32 Reserved2;
+ __u8 Buffer[1];
+} __packed;
+
+/* For write request Flags field below the following flag is defined: */
+#define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001
+
+struct smb2_write_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 49 */
+ __le16 DataOffset; /* offset from start of SMB2 header to write data */
+ __le32 Length;
+ __le64 Offset;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __le32 Channel; /* Reserved MBZ */
+ __le32 RemainingBytes;
+ __le16 WriteChannelInfoOffset; /* Reserved MBZ */
+ __le16 WriteChannelInfoLength; /* Reserved MBZ */
+ __le32 Flags;
+ __u8 Buffer[1];
+} __packed;
+
+struct smb2_write_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 17 */
+ __u8 DataOffset;
+ __u8 Reserved;
+ __le32 DataLength;
+ __le32 DataRemaining;
+ __u32 Reserved2;
+ __u8 Buffer[1];
+} __packed;
+
+#define SMB2_0_IOCTL_IS_FSCTL 0x00000001
+
+struct duplicate_extents_to_file {
+ __u64 PersistentFileHandle; /* source file handle, opaque endianness */
+ __u64 VolatileFileHandle;
+ __le64 SourceFileOffset;
+ __le64 TargetFileOffset;
+ __le64 ByteCount; /* Bytes to be copied */
+} __packed;
+
+struct smb2_ioctl_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 57 */
+ __le16 Reserved; /* offset from start of SMB2 header to write data */
+ __le32 CntCode;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __le32 InputOffset; /* Reserved MBZ */
+ __le32 InputCount;
+ __le32 MaxInputResponse;
+ __le32 OutputOffset;
+ __le32 OutputCount;
+ __le32 MaxOutputResponse;
+ __le32 Flags;
+ __le32 Reserved2;
+ __u8 Buffer[1];
+} __packed;
+
+struct smb2_ioctl_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 49 */
+ __le16 Reserved; /* offset from start of SMB2 header to write data */
+ __le32 CntCode;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __le32 InputOffset; /* Reserved MBZ */
+ __le32 InputCount;
+ __le32 OutputOffset;
+ __le32 OutputCount;
+ __le32 Flags;
+ __le32 Reserved2;
+ __u8 Buffer[1];
+} __packed;
+
+struct validate_negotiate_info_req {
+ __le32 Capabilities;
+ __u8 Guid[SMB2_CLIENT_GUID_SIZE];
+ __le16 SecurityMode;
+ __le16 DialectCount;
+ __le16 Dialects[1]; /* dialect (someday maybe list) client asked for */
+} __packed;
+
+struct validate_negotiate_info_rsp {
+ __le32 Capabilities;
+ __u8 Guid[SMB2_CLIENT_GUID_SIZE];
+ __le16 SecurityMode;
+ __le16 Dialect; /* Dialect in use for the connection */
+} __packed;
+
+struct smb_sockaddr_in {
+ __be16 Port;
+ __be32 IPv4address;
+ __u8 Reserved[8];
+} __packed;
+
+struct smb_sockaddr_in6 {
+ __be16 Port;
+ __be32 FlowInfo;
+ __u8 IPv6address[16];
+ __be32 ScopeId;
+} __packed;
+
+#define INTERNETWORK 0x0002
+#define INTERNETWORKV6 0x0017
+
+struct sockaddr_storage_rsp {
+ __le16 Family;
+ union {
+ struct smb_sockaddr_in addr4;
+ struct smb_sockaddr_in6 addr6;
+ };
+} __packed;
+
+#define RSS_CAPABLE 0x00000001
+#define RDMA_CAPABLE 0x00000002
+
+struct network_interface_info_ioctl_rsp {
+ __le32 Next; /* next interface. zero if this is last one */
+ __le32 IfIndex;
+ __le32 Capability; /* RSS or RDMA Capable */
+ __le32 Reserved;
+ __le64 LinkSpeed;
+ char SockAddr_Storage[128];
+} __packed;
+
+struct file_object_buf_type1_ioctl_rsp {
+ __u8 ObjectId[16];
+ __u8 BirthVolumeId[16];
+ __u8 BirthObjectId[16];
+ __u8 DomainId[16];
+} __packed;
+
+struct resume_key_ioctl_rsp {
+ __le64 ResumeKey[3];
+ __le32 ContextLength;
+ __u8 Context[4]; /* ignored, Windows sets to 4 bytes of zero */
+} __packed;
+
+struct copychunk_ioctl_req {
+ __le64 ResumeKey[3];
+ __le32 ChunkCount;
+ __le32 Reserved;
+ __u8 Chunks[1]; /* array of srv_copychunk */
+} __packed;
+
+struct srv_copychunk {
+ __le64 SourceOffset;
+ __le64 TargetOffset;
+ __le32 Length;
+ __le32 Reserved;
+} __packed;
+
+struct copychunk_ioctl_rsp {
+ __le32 ChunksWritten;
+ __le32 ChunkBytesWritten;
+ __le32 TotalBytesWritten;
+} __packed;
+
+struct file_sparse {
+ __u8 SetSparse;
+} __packed;
+
+struct file_zero_data_information {
+ __le64 FileOffset;
+ __le64 BeyondFinalZero;
+} __packed;
+
+struct file_allocated_range_buffer {
+ __le64 file_offset;
+ __le64 length;
+} __packed;
+
+struct reparse_data_buffer {
+ __le32 ReparseTag;
+ __le16 ReparseDataLength;
+ __u16 Reserved;
+ __u8 DataBuffer[]; /* Variable Length */
+} __packed;
+
+/* Completion Filter flags for Notify */
+#define FILE_NOTIFY_CHANGE_FILE_NAME 0x00000001
+#define FILE_NOTIFY_CHANGE_DIR_NAME 0x00000002
+#define FILE_NOTIFY_CHANGE_NAME 0x00000003
+#define FILE_NOTIFY_CHANGE_ATTRIBUTES 0x00000004
+#define FILE_NOTIFY_CHANGE_SIZE 0x00000008
+#define FILE_NOTIFY_CHANGE_LAST_WRITE 0x00000010
+#define FILE_NOTIFY_CHANGE_LAST_ACCESS 0x00000020
+#define FILE_NOTIFY_CHANGE_CREATION 0x00000040
+#define FILE_NOTIFY_CHANGE_EA 0x00000080
+#define FILE_NOTIFY_CHANGE_SECURITY 0x00000100
+#define FILE_NOTIFY_CHANGE_STREAM_NAME 0x00000200
+#define FILE_NOTIFY_CHANGE_STREAM_SIZE 0x00000400
+#define FILE_NOTIFY_CHANGE_STREAM_WRITE 0x00000800
+
+/* Flags */
+#define SMB2_WATCH_TREE 0x0001
+
+struct smb2_notify_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 32 */
+ __le16 Flags;
+ __le32 OutputBufferLength;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __u32 CompletionFileter;
+ __u32 Reserved;
+} __packed;
+
+struct smb2_notify_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 9 */
+ __le16 OutputBufferOffset;
+ __le32 OutputBufferLength;
+ __u8 Buffer[1];
+} __packed;
+
+/* SMB2 Notify Action Flags */
+#define FILE_ACTION_ADDED 0x00000001
+#define FILE_ACTION_REMOVED 0x00000002
+#define FILE_ACTION_MODIFIED 0x00000003
+#define FILE_ACTION_RENAMED_OLD_NAME 0x00000004
+#define FILE_ACTION_RENAMED_NEW_NAME 0x00000005
+#define FILE_ACTION_ADDED_STREAM 0x00000006
+#define FILE_ACTION_REMOVED_STREAM 0x00000007
+#define FILE_ACTION_MODIFIED_STREAM 0x00000008
+#define FILE_ACTION_REMOVED_BY_DELETE 0x00000009
+
+#define SMB2_LOCKFLAG_SHARED 0x0001
+#define SMB2_LOCKFLAG_EXCLUSIVE 0x0002
+#define SMB2_LOCKFLAG_UNLOCK 0x0004
+#define SMB2_LOCKFLAG_FAIL_IMMEDIATELY 0x0010
+#define SMB2_LOCKFLAG_MASK 0x0007
+
+struct smb2_lock_element {
+ __le64 Offset;
+ __le64 Length;
+ __le32 Flags;
+ __le32 Reserved;
+} __packed;
+
+struct smb2_lock_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 48 */
+ __le16 LockCount;
+ __le32 Reserved;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ /* Followed by at least one */
+ struct smb2_lock_element locks[1];
+} __packed;
+
+struct smb2_lock_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 4 */
+ __le16 Reserved;
+} __packed;
+
+struct smb2_echo_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 4 */
+ __u16 Reserved;
+} __packed;
+
+struct smb2_echo_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 4 */
+ __u16 Reserved;
+} __packed;
+
+/* search (query_directory) Flags field */
+#define SMB2_RESTART_SCANS 0x01
+#define SMB2_RETURN_SINGLE_ENTRY 0x02
+#define SMB2_INDEX_SPECIFIED 0x04
+#define SMB2_REOPEN 0x10
+
+struct smb2_query_directory_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 33 */
+ __u8 FileInformationClass;
+ __u8 Flags;
+ __le32 FileIndex;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __le16 FileNameOffset;
+ __le16 FileNameLength;
+ __le32 OutputBufferLength;
+ __u8 Buffer[1];
+} __packed;
+
+struct smb2_query_directory_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 9 */
+ __le16 OutputBufferOffset;
+ __le32 OutputBufferLength;
+ __u8 Buffer[1];
+} __packed;
+
+/* Possible InfoType values */
+#define SMB2_O_INFO_FILE 0x01
+#define SMB2_O_INFO_FILESYSTEM 0x02
+#define SMB2_O_INFO_SECURITY 0x03
+#define SMB2_O_INFO_QUOTA 0x04
+
+/* Security info type additionalinfo flags. See MS-SMB2 (2.2.37) or MS-DTYP */
+#define OWNER_SECINFO 0x00000001
+#define GROUP_SECINFO 0x00000002
+#define DACL_SECINFO 0x00000004
+#define SACL_SECINFO 0x00000008
+#define LABEL_SECINFO 0x00000010
+#define ATTRIBUTE_SECINFO 0x00000020
+#define SCOPE_SECINFO 0x00000040
+#define BACKUP_SECINFO 0x00010000
+#define UNPROTECTED_SACL_SECINFO 0x10000000
+#define UNPROTECTED_DACL_SECINFO 0x20000000
+#define PROTECTED_SACL_SECINFO 0x40000000
+#define PROTECTED_DACL_SECINFO 0x80000000
+
+struct smb2_query_info_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 41 */
+ __u8 InfoType;
+ __u8 FileInfoClass;
+ __le32 OutputBufferLength;
+ __le16 InputBufferOffset;
+ __u16 Reserved;
+ __le32 InputBufferLength;
+ __le32 AdditionalInformation;
+ __le32 Flags;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __u8 Buffer[1];
+} __packed;
+
+struct smb2_query_info_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 9 */
+ __le16 OutputBufferOffset;
+ __le32 OutputBufferLength;
+ __u8 Buffer[1];
+} __packed;
+
+struct smb2_set_info_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 33 */
+ __u8 InfoType;
+ __u8 FileInfoClass;
+ __le32 BufferLength;
+ __le16 BufferOffset;
+ __u16 Reserved;
+ __le32 AdditionalInformation;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __u8 Buffer[1];
+} __packed;
+
+struct smb2_set_info_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 2 */
+} __packed;
+
+/* FILE Info response size */
+#define FILE_DIRECTORY_INFORMATION_SIZE 1
+#define FILE_FULL_DIRECTORY_INFORMATION_SIZE 2
+#define FILE_BOTH_DIRECTORY_INFORMATION_SIZE 3
+#define FILE_BASIC_INFORMATION_SIZE 40
+#define FILE_STANDARD_INFORMATION_SIZE 24
+#define FILE_INTERNAL_INFORMATION_SIZE 8
+#define FILE_EA_INFORMATION_SIZE 4
+#define FILE_ACCESS_INFORMATION_SIZE 4
+#define FILE_NAME_INFORMATION_SIZE 9
+#define FILE_RENAME_INFORMATION_SIZE 10
+#define FILE_LINK_INFORMATION_SIZE 11
+#define FILE_NAMES_INFORMATION_SIZE 12
+#define FILE_DISPOSITION_INFORMATION_SIZE 13
+#define FILE_POSITION_INFORMATION_SIZE 14
+#define FILE_FULL_EA_INFORMATION_SIZE 15
+#define FILE_MODE_INFORMATION_SIZE 4
+#define FILE_ALIGNMENT_INFORMATION_SIZE 4
+#define FILE_ALL_INFORMATION_SIZE 104
+#define FILE_ALLOCATION_INFORMATION_SIZE 19
+#define FILE_END_OF_FILE_INFORMATION_SIZE 20
+#define FILE_ALTERNATE_NAME_INFORMATION_SIZE 8
+#define FILE_STREAM_INFORMATION_SIZE 32
+#define FILE_PIPE_INFORMATION_SIZE 23
+#define FILE_PIPE_LOCAL_INFORMATION_SIZE 24
+#define FILE_PIPE_REMOTE_INFORMATION_SIZE 25
+#define FILE_MAILSLOT_QUERY_INFORMATION_SIZE 26
+#define FILE_MAILSLOT_SET_INFORMATION_SIZE 27
+#define FILE_COMPRESSION_INFORMATION_SIZE 16
+#define FILE_OBJECT_ID_INFORMATION_SIZE 29
+/* Number 30 not defined in documents */
+#define FILE_MOVE_CLUSTER_INFORMATION_SIZE 31
+#define FILE_QUOTA_INFORMATION_SIZE 32
+#define FILE_REPARSE_POINT_INFORMATION_SIZE 33
+#define FILE_NETWORK_OPEN_INFORMATION_SIZE 56
+#define FILE_ATTRIBUTE_TAG_INFORMATION_SIZE 8
+
+/* FS Info response size */
+#define FS_DEVICE_INFORMATION_SIZE 8
+#define FS_ATTRIBUTE_INFORMATION_SIZE 16
+#define FS_VOLUME_INFORMATION_SIZE 24
+#define FS_SIZE_INFORMATION_SIZE 24
+#define FS_FULL_SIZE_INFORMATION_SIZE 32
+#define FS_SECTOR_SIZE_INFORMATION_SIZE 28
+#define FS_OBJECT_ID_INFORMATION_SIZE 64
+#define FS_CONTROL_INFORMATION_SIZE 48
+#define FS_POSIX_INFORMATION_SIZE 56
+
+/* FS_ATTRIBUTE_File_System_Name */
+#define FS_TYPE_SUPPORT_SIZE 44
+struct fs_type_info {
+ char *fs_name;
+ long magic_number;
+} __packed;
+
+struct smb2_oplock_break {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 24 */
+ __u8 OplockLevel;
+ __u8 Reserved;
+ __le32 Reserved2;
+ __le64 PersistentFid;
+ __le64 VolatileFid;
+} __packed;
+
+#define SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED cpu_to_le32(0x01)
+
+struct smb2_lease_break {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 44 */
+ __le16 Epoch;
+ __le32 Flags;
+ __u8 LeaseKey[16];
+ __le32 CurrentLeaseState;
+ __le32 NewLeaseState;
+ __le32 BreakReason;
+ __le32 AccessMaskHint;
+ __le32 ShareMaskHint;
+} __packed;
+
+struct smb2_lease_ack {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 36 */
+ __le16 Reserved;
+ __le32 Flags;
+ __u8 LeaseKey[16];
+ __le32 LeaseState;
+ __le64 LeaseDuration;
+} __packed;
+
+/*
+ * PDU infolevel structure definitions
+ * BB consider moving to a different header
+ */
+
+/* File System Information Classes */
+#define FS_VOLUME_INFORMATION 1 /* Query */
+#define FS_LABEL_INFORMATION 2 /* Set */
+#define FS_SIZE_INFORMATION 3 /* Query */
+#define FS_DEVICE_INFORMATION 4 /* Query */
+#define FS_ATTRIBUTE_INFORMATION 5 /* Query */
+#define FS_CONTROL_INFORMATION 6 /* Query, Set */
+#define FS_FULL_SIZE_INFORMATION 7 /* Query */
+#define FS_OBJECT_ID_INFORMATION 8 /* Query, Set */
+#define FS_DRIVER_PATH_INFORMATION 9 /* Query */
+#define FS_SECTOR_SIZE_INFORMATION 11 /* SMB3 or later. Query */
+#define FS_POSIX_INFORMATION 100 /* SMB3.1.1 POSIX. Query */
+
+struct smb2_fs_full_size_info {
+ __le64 TotalAllocationUnits;
+ __le64 CallerAvailableAllocationUnits;
+ __le64 ActualAvailableAllocationUnits;
+ __le32 SectorsPerAllocationUnit;
+ __le32 BytesPerSector;
+} __packed;
+
+#define SSINFO_FLAGS_ALIGNED_DEVICE 0x00000001
+#define SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE 0x00000002
+#define SSINFO_FLAGS_NO_SEEK_PENALTY 0x00000004
+#define SSINFO_FLAGS_TRIM_ENABLED 0x00000008
+
+/* sector size info struct */
+struct smb3_fs_ss_info {
+ __le32 LogicalBytesPerSector;
+ __le32 PhysicalBytesPerSectorForAtomicity;
+ __le32 PhysicalBytesPerSectorForPerf;
+ __le32 FSEffPhysicalBytesPerSectorForAtomicity;
+ __le32 Flags;
+ __le32 ByteOffsetForSectorAlignment;
+ __le32 ByteOffsetForPartitionAlignment;
+} __packed;
+
+/* File System Control Information */
+struct smb2_fs_control_info {
+ __le64 FreeSpaceStartFiltering;
+ __le64 FreeSpaceThreshold;
+ __le64 FreeSpaceStopFiltering;
+ __le64 DefaultQuotaThreshold;
+ __le64 DefaultQuotaLimit;
+ __le32 FileSystemControlFlags;
+ __le32 Padding;
+} __packed;
+
+/* partial list of QUERY INFO levels */
+#define FILE_DIRECTORY_INFORMATION 1
+#define FILE_FULL_DIRECTORY_INFORMATION 2
+#define FILE_BOTH_DIRECTORY_INFORMATION 3
+#define FILE_BASIC_INFORMATION 4
+#define FILE_STANDARD_INFORMATION 5
+#define FILE_INTERNAL_INFORMATION 6
+#define FILE_EA_INFORMATION 7
+#define FILE_ACCESS_INFORMATION 8
+#define FILE_NAME_INFORMATION 9
+#define FILE_RENAME_INFORMATION 10
+#define FILE_LINK_INFORMATION 11
+#define FILE_NAMES_INFORMATION 12
+#define FILE_DISPOSITION_INFORMATION 13
+#define FILE_POSITION_INFORMATION 14
+#define FILE_FULL_EA_INFORMATION 15
+#define FILE_MODE_INFORMATION 16
+#define FILE_ALIGNMENT_INFORMATION 17
+#define FILE_ALL_INFORMATION 18
+#define FILE_ALLOCATION_INFORMATION 19
+#define FILE_END_OF_FILE_INFORMATION 20
+#define FILE_ALTERNATE_NAME_INFORMATION 21
+#define FILE_STREAM_INFORMATION 22
+#define FILE_PIPE_INFORMATION 23
+#define FILE_PIPE_LOCAL_INFORMATION 24
+#define FILE_PIPE_REMOTE_INFORMATION 25
+#define FILE_MAILSLOT_QUERY_INFORMATION 26
+#define FILE_MAILSLOT_SET_INFORMATION 27
+#define FILE_COMPRESSION_INFORMATION 28
+#define FILE_OBJECT_ID_INFORMATION 29
+/* Number 30 not defined in documents */
+#define FILE_MOVE_CLUSTER_INFORMATION 31
+#define FILE_QUOTA_INFORMATION 32
+#define FILE_REPARSE_POINT_INFORMATION 33
+#define FILE_NETWORK_OPEN_INFORMATION 34
+#define FILE_ATTRIBUTE_TAG_INFORMATION 35
+#define FILE_TRACKING_INFORMATION 36
+#define FILEID_BOTH_DIRECTORY_INFORMATION 37
+#define FILEID_FULL_DIRECTORY_INFORMATION 38
+#define FILE_VALID_DATA_LENGTH_INFORMATION 39
+#define FILE_SHORT_NAME_INFORMATION 40
+#define FILE_SFIO_RESERVE_INFORMATION 44
+#define FILE_SFIO_VOLUME_INFORMATION 45
+#define FILE_HARD_LINK_INFORMATION 46
+#define FILE_NORMALIZED_NAME_INFORMATION 48
+#define FILEID_GLOBAL_TX_DIRECTORY_INFORMATION 50
+#define FILE_STANDARD_LINK_INFORMATION 54
+
+#define OP_BREAK_STRUCT_SIZE_20 24
+#define OP_BREAK_STRUCT_SIZE_21 36
+
+struct smb2_file_access_info {
+ __le32 AccessFlags;
+} __packed;
+
+struct smb2_file_alignment_info {
+ __le32 AlignmentRequirement;
+} __packed;
+
+struct smb2_file_internal_info {
+ __le64 IndexNumber;
+} __packed; /* level 6 Query */
+
+struct smb2_file_rename_info { /* encoding of request for level 10 */
+ __u8 ReplaceIfExists; /* 1 = replace existing target with new */
+ /* 0 = fail if target already exists */
+ __u8 Reserved[7];
+ __u64 RootDirectory; /* MBZ for network operations (why says spec?) */
+ __le32 FileNameLength;
+ char FileName[0]; /* New name to be assigned */
+} __packed; /* level 10 Set */
+
+struct smb2_file_link_info { /* encoding of request for level 11 */
+ __u8 ReplaceIfExists; /* 1 = replace existing link with new */
+ /* 0 = fail if link already exists */
+ __u8 Reserved[7];
+ __u64 RootDirectory; /* MBZ for network operations (why says spec?) */
+ __le32 FileNameLength;
+ char FileName[0]; /* Name to be assigned to new link */
+} __packed; /* level 11 Set */
+
+/*
+ * This level 18, although with struct with same name is different from cifs
+ * level 0x107. Level 0x107 has an extra u64 between AccessFlags and
+ * CurrentByteOffset.
+ */
+struct smb2_file_all_info { /* data block encoding of response to level 18 */
+ __le64 CreationTime; /* Beginning of FILE_BASIC_INFO equivalent */
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le32 Attributes;
+ __u32 Pad1; /* End of FILE_BASIC_INFO_INFO equivalent */
+ __le64 AllocationSize; /* Beginning of FILE_STANDARD_INFO equivalent */
+ __le64 EndOfFile; /* size ie offset to first free byte in file */
+ __le32 NumberOfLinks; /* hard links */
+ __u8 DeletePending;
+ __u8 Directory;
+ __u16 Pad2; /* End of FILE_STANDARD_INFO equivalent */
+ __le64 IndexNumber;
+ __le32 EASize;
+ __le32 AccessFlags;
+ __le64 CurrentByteOffset;
+ __le32 Mode;
+ __le32 AlignmentRequirement;
+ __le32 FileNameLength;
+ char FileName[1];
+} __packed; /* level 18 Query */
+
+struct smb2_file_alt_name_info {
+ __le32 FileNameLength;
+ char FileName[0];
+} __packed;
+
+struct smb2_file_stream_info {
+ __le32 NextEntryOffset;
+ __le32 StreamNameLength;
+ __le64 StreamSize;
+ __le64 StreamAllocationSize;
+ char StreamName[0];
+} __packed;
+
+struct smb2_file_eof_info { /* encoding of request for level 10 */
+ __le64 EndOfFile; /* new end of file value */
+} __packed; /* level 20 Set */
+
+struct smb2_file_ntwrk_info {
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 AllocationSize;
+ __le64 EndOfFile;
+ __le32 Attributes;
+ __le32 Reserved;
+} __packed;
+
+struct smb2_file_standard_info {
+ __le64 AllocationSize;
+ __le64 EndOfFile;
+ __le32 NumberOfLinks; /* hard links */
+ __u8 DeletePending;
+ __u8 Directory;
+ __le16 Reserved;
+} __packed; /* level 18 Query */
+
+struct smb2_file_ea_info {
+ __le32 EASize;
+} __packed;
+
+struct smb2_file_alloc_info {
+ __le64 AllocationSize;
+} __packed;
+
+struct smb2_file_disposition_info {
+ __u8 DeletePending;
+} __packed;
+
+struct smb2_file_pos_info {
+ __le64 CurrentByteOffset;
+} __packed;
+
+#define FILE_MODE_INFO_MASK cpu_to_le32(0x0000103e)
+
+struct smb2_file_mode_info {
+ __le32 Mode;
+} __packed;
+
+#define COMPRESSION_FORMAT_NONE 0x0000
+#define COMPRESSION_FORMAT_LZNT1 0x0002
+
+struct smb2_file_comp_info {
+ __le64 CompressedFileSize;
+ __le16 CompressionFormat;
+ __u8 CompressionUnitShift;
+ __u8 ChunkShift;
+ __u8 ClusterShift;
+ __u8 Reserved[3];
+} __packed;
+
+struct smb2_file_attr_tag_info {
+ __le32 FileAttributes;
+ __le32 ReparseTag;
+} __packed;
+
+#define SL_RESTART_SCAN 0x00000001
+#define SL_RETURN_SINGLE_ENTRY 0x00000002
+#define SL_INDEX_SPECIFIED 0x00000004
+
+struct smb2_ea_info_req {
+ __le32 NextEntryOffset;
+ __u8 EaNameLength;
+ char name[1];
+} __packed; /* level 15 Query */
+
+struct smb2_ea_info {
+ __le32 NextEntryOffset;
+ __u8 Flags;
+ __u8 EaNameLength;
+ __le16 EaValueLength;
+ char name[1];
+ /* optionally followed by value */
+} __packed; /* level 15 Query */
+
+struct create_ea_buf_req {
+ struct create_context ccontext;
+ __u8 Name[8];
+ struct smb2_ea_info ea;
+} __packed;
+
+struct create_sd_buf_req {
+ struct create_context ccontext;
+ __u8 Name[8];
+ struct smb_ntsd ntsd;
+} __packed;
+
+/* Find File infolevels */
+#define SMB_FIND_FILE_POSIX_INFO 0x064
+
+/* Level 100 query info */
+struct smb311_posix_qinfo {
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 EndOfFile;
+ __le64 AllocationSize;
+ __le32 DosAttributes;
+ __le64 Inode;
+ __le32 DeviceId;
+ __le32 Zero;
+ /* beginning of POSIX Create Context Response */
+ __le32 HardLinks;
+ __le32 ReparseTag;
+ __le32 Mode;
+ u8 Sids[];
+ /*
+ * var sized owner SID
+ * var sized group SID
+ * le32 filenamelength
+ * u8 filename[]
+ */
+} __packed;
+
+struct smb2_posix_info {
+ __le32 NextEntryOffset;
+ __u32 Ignored;
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 EndOfFile;
+ __le64 AllocationSize;
+ __le32 DosAttributes;
+ __le64 Inode;
+ __le32 DeviceId;
+ __le32 Zero;
+ /* beginning of POSIX Create Context Response */
+ __le32 HardLinks;
+ __le32 ReparseTag;
+ __le32 Mode;
+ u8 SidBuffer[40];
+ __le32 name_len;
+ u8 name[1];
+ /*
+ * var sized owner SID
+ * var sized group SID
+ * le32 filenamelength
+ * u8 filename[]
+ */
+} __packed;
+
+/* functions */
+int init_smb2_0_server(struct ksmbd_conn *conn);
+void init_smb2_1_server(struct ksmbd_conn *conn);
+void init_smb3_0_server(struct ksmbd_conn *conn);
+void init_smb3_02_server(struct ksmbd_conn *conn);
+int init_smb3_11_server(struct ksmbd_conn *conn);
+
+void init_smb2_max_read_size(unsigned int sz);
+void init_smb2_max_write_size(unsigned int sz);
+void init_smb2_max_trans_size(unsigned int sz);
+
+bool is_smb2_neg_cmd(struct ksmbd_work *work);
+bool is_smb2_rsp(struct ksmbd_work *work);
+
+u16 get_smb2_cmd_val(struct ksmbd_work *work);
+void set_smb2_rsp_status(struct ksmbd_work *work, __le32 err);
+int init_smb2_rsp_hdr(struct ksmbd_work *work);
+int smb2_allocate_rsp_buf(struct ksmbd_work *work);
+bool is_chained_smb2_message(struct ksmbd_work *work);
+int init_smb2_neg_rsp(struct ksmbd_work *work);
+void smb2_set_err_rsp(struct ksmbd_work *work);
+int smb2_check_user_session(struct ksmbd_work *work);
+int smb2_get_ksmbd_tcon(struct ksmbd_work *work);
+bool smb2_is_sign_req(struct ksmbd_work *work, unsigned int command);
+int smb2_check_sign_req(struct ksmbd_work *work);
+void smb2_set_sign_rsp(struct ksmbd_work *work);
+int smb3_check_sign_req(struct ksmbd_work *work);
+void smb3_set_sign_rsp(struct ksmbd_work *work);
+int find_matching_smb2_dialect(int start_index, __le16 *cli_dialects,
+ __le16 dialects_count);
+struct file_lock *smb_flock_init(struct file *f);
+int setup_async_work(struct ksmbd_work *work, void (*fn)(void **),
+ void **arg);
+void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status);
+struct channel *lookup_chann_list(struct ksmbd_session *sess,
+ struct ksmbd_conn *conn);
+void smb3_preauth_hash_rsp(struct ksmbd_work *work);
+bool smb3_is_transform_hdr(void *buf);
+int smb3_decrypt_req(struct ksmbd_work *work);
+int smb3_encrypt_resp(struct ksmbd_work *work);
+bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work);
+int smb2_set_rsp_credits(struct ksmbd_work *work);
+
+/* smb2 misc functions */
+int ksmbd_smb2_check_message(struct ksmbd_work *work);
+
+/* smb2 command handlers */
+int smb2_handle_negotiate(struct ksmbd_work *work);
+int smb2_negotiate_request(struct ksmbd_work *work);
+int smb2_sess_setup(struct ksmbd_work *work);
+int smb2_tree_connect(struct ksmbd_work *work);
+int smb2_tree_disconnect(struct ksmbd_work *work);
+int smb2_session_logoff(struct ksmbd_work *work);
+int smb2_open(struct ksmbd_work *work);
+int smb2_query_info(struct ksmbd_work *work);
+int smb2_query_dir(struct ksmbd_work *work);
+int smb2_close(struct ksmbd_work *work);
+int smb2_echo(struct ksmbd_work *work);
+int smb2_set_info(struct ksmbd_work *work);
+int smb2_read(struct ksmbd_work *work);
+int smb2_write(struct ksmbd_work *work);
+int smb2_flush(struct ksmbd_work *work);
+int smb2_cancel(struct ksmbd_work *work);
+int smb2_lock(struct ksmbd_work *work);
+int smb2_ioctl(struct ksmbd_work *work);
+int smb2_oplock_break(struct ksmbd_work *work);
+int smb2_notify(struct ksmbd_work *ksmbd_work);
+
+#endif /* _SMB2PDU_H */
diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c
new file mode 100644
index 000000000000..b108b918ec84
--- /dev/null
+++ b/fs/ksmbd/smb_common.c
@@ -0,0 +1,674 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ * Copyright (C) 2018 Namjae Jeon <linkinjeon@kernel.org>
+ */
+
+#include "smb_common.h"
+#include "server.h"
+#include "misc.h"
+#include "smbstatus.h"
+#include "connection.h"
+#include "ksmbd_work.h"
+#include "mgmt/user_session.h"
+#include "mgmt/user_config.h"
+#include "mgmt/tree_connect.h"
+#include "mgmt/share_config.h"
+
+/*for shortname implementation */
+static const char basechars[43] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_-!@#$%";
+#define MANGLE_BASE (sizeof(basechars) / sizeof(char) - 1)
+#define MAGIC_CHAR '~'
+#define PERIOD '.'
+#define mangle(V) ((char)(basechars[(V) % MANGLE_BASE]))
+#define KSMBD_MIN_SUPPORTED_HEADER_SIZE (sizeof(struct smb2_hdr))
+
+struct smb_protocol {
+ int index;
+ char *name;
+ char *prot;
+ __u16 prot_id;
+};
+
+static struct smb_protocol smb1_protos[] = {
+ {
+ SMB21_PROT,
+ "\2SMB 2.1",
+ "SMB2_10",
+ SMB21_PROT_ID
+ },
+ {
+ SMB2X_PROT,
+ "\2SMB 2.???",
+ "SMB2_22",
+ SMB2X_PROT_ID
+ },
+};
+
+static struct smb_protocol smb2_protos[] = {
+ {
+ SMB21_PROT,
+ "\2SMB 2.1",
+ "SMB2_10",
+ SMB21_PROT_ID
+ },
+ {
+ SMB30_PROT,
+ "\2SMB 3.0",
+ "SMB3_00",
+ SMB30_PROT_ID
+ },
+ {
+ SMB302_PROT,
+ "\2SMB 3.02",
+ "SMB3_02",
+ SMB302_PROT_ID
+ },
+ {
+ SMB311_PROT,
+ "\2SMB 3.1.1",
+ "SMB3_11",
+ SMB311_PROT_ID
+ },
+};
+
+unsigned int ksmbd_server_side_copy_max_chunk_count(void)
+{
+ return 256;
+}
+
+unsigned int ksmbd_server_side_copy_max_chunk_size(void)
+{
+ return (2U << 30) - 1;
+}
+
+unsigned int ksmbd_server_side_copy_max_total_size(void)
+{
+ return (2U << 30) - 1;
+}
+
+inline int ksmbd_min_protocol(void)
+{
+ return SMB2_PROT;
+}
+
+inline int ksmbd_max_protocol(void)
+{
+ return SMB311_PROT;
+}
+
+int ksmbd_lookup_protocol_idx(char *str)
+{
+ int offt = ARRAY_SIZE(smb1_protos) - 1;
+ int len = strlen(str);
+
+ while (offt >= 0) {
+ if (!strncmp(str, smb1_protos[offt].prot, len)) {
+ ksmbd_debug(SMB, "selected %s dialect idx = %d\n",
+ smb1_protos[offt].prot, offt);
+ return smb1_protos[offt].index;
+ }
+ offt--;
+ }
+
+ offt = ARRAY_SIZE(smb2_protos) - 1;
+ while (offt >= 0) {
+ if (!strncmp(str, smb2_protos[offt].prot, len)) {
+ ksmbd_debug(SMB, "selected %s dialect idx = %d\n",
+ smb2_protos[offt].prot, offt);
+ return smb2_protos[offt].index;
+ }
+ offt--;
+ }
+ return -1;
+}
+
+/**
+ * ksmbd_verify_smb_message() - check for valid smb2 request header
+ * @work: smb work
+ *
+ * check for valid smb signature and packet direction(request/response)
+ *
+ * Return: 0 on success, otherwise 1
+ */
+int ksmbd_verify_smb_message(struct ksmbd_work *work)
+{
+ struct smb2_hdr *smb2_hdr = work->request_buf;
+
+ if (smb2_hdr->ProtocolId == SMB2_PROTO_NUMBER)
+ return ksmbd_smb2_check_message(work);
+
+ return 0;
+}
+
+/**
+ * ksmbd_smb_request() - check for valid smb request type
+ * @conn: connection instance
+ *
+ * Return: true on success, otherwise false
+ */
+bool ksmbd_smb_request(struct ksmbd_conn *conn)
+{
+ int type = *(char *)conn->request_buf;
+
+ switch (type) {
+ case RFC1002_SESSION_MESSAGE:
+ /* Regular SMB request */
+ return true;
+ case RFC1002_SESSION_KEEP_ALIVE:
+ ksmbd_debug(SMB, "RFC 1002 session keep alive\n");
+ break;
+ default:
+ ksmbd_debug(SMB, "RFC 1002 unknown request type 0x%x\n", type);
+ }
+
+ return false;
+}
+
+static bool supported_protocol(int idx)
+{
+ if (idx == SMB2X_PROT &&
+ (server_conf.min_protocol >= SMB21_PROT ||
+ server_conf.max_protocol <= SMB311_PROT))
+ return true;
+
+ return (server_conf.min_protocol <= idx &&
+ idx <= server_conf.max_protocol);
+}
+
+static char *next_dialect(char *dialect, int *next_off)
+{
+ dialect = dialect + *next_off;
+ *next_off = strlen(dialect);
+ return dialect;
+}
+
+static int ksmbd_lookup_dialect_by_name(char *cli_dialects, __le16 byte_count)
+{
+ int i, seq_num, bcount, next;
+ char *dialect;
+
+ for (i = ARRAY_SIZE(smb1_protos) - 1; i >= 0; i--) {
+ seq_num = 0;
+ next = 0;
+ dialect = cli_dialects;
+ bcount = le16_to_cpu(byte_count);
+ do {
+ dialect = next_dialect(dialect, &next);
+ ksmbd_debug(SMB, "client requested dialect %s\n",
+ dialect);
+ if (!strcmp(dialect, smb1_protos[i].name)) {
+ if (supported_protocol(smb1_protos[i].index)) {
+ ksmbd_debug(SMB,
+ "selected %s dialect\n",
+ smb1_protos[i].name);
+ if (smb1_protos[i].index == SMB1_PROT)
+ return seq_num;
+ return smb1_protos[i].prot_id;
+ }
+ }
+ seq_num++;
+ bcount -= (++next);
+ } while (bcount > 0);
+ }
+
+ return BAD_PROT_ID;
+}
+
+int ksmbd_lookup_dialect_by_id(__le16 *cli_dialects, __le16 dialects_count)
+{
+ int i;
+ int count;
+
+ for (i = ARRAY_SIZE(smb2_protos) - 1; i >= 0; i--) {
+ count = le16_to_cpu(dialects_count);
+ while (--count >= 0) {
+ ksmbd_debug(SMB, "client requested dialect 0x%x\n",
+ le16_to_cpu(cli_dialects[count]));
+ if (le16_to_cpu(cli_dialects[count]) !=
+ smb2_protos[i].prot_id)
+ continue;
+
+ if (supported_protocol(smb2_protos[i].index)) {
+ ksmbd_debug(SMB, "selected %s dialect\n",
+ smb2_protos[i].name);
+ return smb2_protos[i].prot_id;
+ }
+ }
+ }
+
+ return BAD_PROT_ID;
+}
+
+static int ksmbd_negotiate_smb_dialect(void *buf)
+{
+ __le32 proto;
+
+ proto = ((struct smb2_hdr *)buf)->ProtocolId;
+ if (proto == SMB2_PROTO_NUMBER) {
+ struct smb2_negotiate_req *req;
+
+ req = (struct smb2_negotiate_req *)buf;
+ return ksmbd_lookup_dialect_by_id(req->Dialects,
+ req->DialectCount);
+ }
+
+ proto = *(__le32 *)((struct smb_hdr *)buf)->Protocol;
+ if (proto == SMB1_PROTO_NUMBER) {
+ struct smb_negotiate_req *req;
+
+ req = (struct smb_negotiate_req *)buf;
+ return ksmbd_lookup_dialect_by_name(req->DialectsArray,
+ req->ByteCount);
+ }
+
+ return BAD_PROT_ID;
+}
+
+#define SMB_COM_NEGOTIATE 0x72
+int ksmbd_init_smb_server(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+
+ if (conn->need_neg == false)
+ return 0;
+
+ init_smb3_11_server(conn);
+
+ if (conn->ops->get_cmd_val(work) != SMB_COM_NEGOTIATE)
+ conn->need_neg = false;
+ return 0;
+}
+
+bool ksmbd_pdu_size_has_room(unsigned int pdu)
+{
+ return (pdu >= KSMBD_MIN_SUPPORTED_HEADER_SIZE - 4);
+}
+
+int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
+ struct ksmbd_file *dir,
+ struct ksmbd_dir_info *d_info,
+ char *search_pattern,
+ int (*fn)(struct ksmbd_conn *, int,
+ struct ksmbd_dir_info *,
+ struct user_namespace *,
+ struct ksmbd_kstat *))
+{
+ int i, rc = 0;
+ struct ksmbd_conn *conn = work->conn;
+ struct user_namespace *user_ns = file_mnt_user_ns(dir->filp);
+
+ for (i = 0; i < 2; i++) {
+ struct kstat kstat;
+ struct ksmbd_kstat ksmbd_kstat;
+
+ if (!dir->dot_dotdot[i]) { /* fill dot entry info */
+ if (i == 0) {
+ d_info->name = ".";
+ d_info->name_len = 1;
+ } else {
+ d_info->name = "..";
+ d_info->name_len = 2;
+ }
+
+ if (!match_pattern(d_info->name, d_info->name_len,
+ search_pattern)) {
+ dir->dot_dotdot[i] = 1;
+ continue;
+ }
+
+ ksmbd_kstat.kstat = &kstat;
+ ksmbd_vfs_fill_dentry_attrs(work,
+ user_ns,
+ dir->filp->f_path.dentry->d_parent,
+ &ksmbd_kstat);
+ rc = fn(conn, info_level, d_info,
+ user_ns, &ksmbd_kstat);
+ if (rc)
+ break;
+ if (d_info->out_buf_len <= 0)
+ break;
+
+ dir->dot_dotdot[i] = 1;
+ if (d_info->flags & SMB2_RETURN_SINGLE_ENTRY) {
+ d_info->out_buf_len = 0;
+ break;
+ }
+ }
+ }
+
+ return rc;
+}
+
+/**
+ * ksmbd_extract_shortname() - get shortname from long filename
+ * @conn: connection instance
+ * @longname: source long filename
+ * @shortname: destination short filename
+ *
+ * Return: shortname length or 0 when source long name is '.' or '..'
+ * TODO: Though this function comforms the restriction of 8.3 Filename spec,
+ * but the result is different with Windows 7's one. need to check.
+ */
+int ksmbd_extract_shortname(struct ksmbd_conn *conn, const char *longname,
+ char *shortname)
+{
+ const char *p;
+ char base[9], extension[4];
+ char out[13] = {0};
+ int baselen = 0;
+ int extlen = 0, len = 0;
+ unsigned int csum = 0;
+ const unsigned char *ptr;
+ bool dot_present = true;
+
+ p = longname;
+ if ((*p == '.') || (!(strcmp(p, "..")))) {
+ /*no mangling required */
+ return 0;
+ }
+
+ p = strrchr(longname, '.');
+ if (p == longname) { /*name starts with a dot*/
+ strscpy(extension, "___", strlen("___"));
+ } else {
+ if (p) {
+ p++;
+ while (*p && extlen < 3) {
+ if (*p != '.')
+ extension[extlen++] = toupper(*p);
+ p++;
+ }
+ extension[extlen] = '\0';
+ } else {
+ dot_present = false;
+ }
+ }
+
+ p = longname;
+ if (*p == '.') {
+ p++;
+ longname++;
+ }
+ while (*p && (baselen < 5)) {
+ if (*p != '.')
+ base[baselen++] = toupper(*p);
+ p++;
+ }
+
+ base[baselen] = MAGIC_CHAR;
+ memcpy(out, base, baselen + 1);
+
+ ptr = longname;
+ len = strlen(longname);
+ for (; len > 0; len--, ptr++)
+ csum += *ptr;
+
+ csum = csum % (MANGLE_BASE * MANGLE_BASE);
+ out[baselen + 1] = mangle(csum / MANGLE_BASE);
+ out[baselen + 2] = mangle(csum);
+ out[baselen + 3] = PERIOD;
+
+ if (dot_present)
+ memcpy(&out[baselen + 4], extension, 4);
+ else
+ out[baselen + 4] = '\0';
+ smbConvertToUTF16((__le16 *)shortname, out, PATH_MAX,
+ conn->local_nls, 0);
+ len = strlen(out) * 2;
+ return len;
+}
+
+static int __smb2_negotiate(struct ksmbd_conn *conn)
+{
+ return (conn->dialect >= SMB20_PROT_ID &&
+ conn->dialect <= SMB311_PROT_ID);
+}
+
+static int smb_handle_negotiate(struct ksmbd_work *work)
+{
+ struct smb_negotiate_rsp *neg_rsp = work->response_buf;
+
+ ksmbd_debug(SMB, "Unsupported SMB protocol\n");
+ neg_rsp->hdr.Status.CifsError = STATUS_INVALID_LOGON_TYPE;
+ return -EINVAL;
+}
+
+int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command)
+{
+ struct ksmbd_conn *conn = work->conn;
+ int ret;
+
+ conn->dialect = ksmbd_negotiate_smb_dialect(work->request_buf);
+ ksmbd_debug(SMB, "conn->dialect 0x%x\n", conn->dialect);
+
+ if (command == SMB2_NEGOTIATE_HE) {
+ struct smb2_hdr *smb2_hdr = work->request_buf;
+
+ if (smb2_hdr->ProtocolId != SMB2_PROTO_NUMBER) {
+ ksmbd_debug(SMB, "Downgrade to SMB1 negotiation\n");
+ command = SMB_COM_NEGOTIATE;
+ }
+ }
+
+ if (command == SMB2_NEGOTIATE_HE) {
+ ret = smb2_handle_negotiate(work);
+ init_smb2_neg_rsp(work);
+ return ret;
+ }
+
+ if (command == SMB_COM_NEGOTIATE) {
+ if (__smb2_negotiate(conn)) {
+ conn->need_neg = true;
+ init_smb3_11_server(conn);
+ init_smb2_neg_rsp(work);
+ ksmbd_debug(SMB, "Upgrade to SMB2 negotiation\n");
+ return 0;
+ }
+ return smb_handle_negotiate(work);
+ }
+
+ pr_err("Unknown SMB negotiation command: %u\n", command);
+ return -EINVAL;
+}
+
+enum SHARED_MODE_ERRORS {
+ SHARE_DELETE_ERROR,
+ SHARE_READ_ERROR,
+ SHARE_WRITE_ERROR,
+ FILE_READ_ERROR,
+ FILE_WRITE_ERROR,
+ FILE_DELETE_ERROR,
+};
+
+static const char * const shared_mode_errors[] = {
+ "Current access mode does not permit SHARE_DELETE",
+ "Current access mode does not permit SHARE_READ",
+ "Current access mode does not permit SHARE_WRITE",
+ "Desired access mode does not permit FILE_READ",
+ "Desired access mode does not permit FILE_WRITE",
+ "Desired access mode does not permit FILE_DELETE",
+};
+
+static void smb_shared_mode_error(int error, struct ksmbd_file *prev_fp,
+ struct ksmbd_file *curr_fp)
+{
+ ksmbd_debug(SMB, "%s\n", shared_mode_errors[error]);
+ ksmbd_debug(SMB, "Current mode: 0x%x Desired mode: 0x%x\n",
+ prev_fp->saccess, curr_fp->daccess);
+}
+
+int ksmbd_smb_check_shared_mode(struct file *filp, struct ksmbd_file *curr_fp)
+{
+ int rc = 0;
+ struct ksmbd_file *prev_fp;
+
+ /*
+ * Lookup fp in master fp list, and check desired access and
+ * shared mode between previous open and current open.
+ */
+ read_lock(&curr_fp->f_ci->m_lock);
+ list_for_each_entry(prev_fp, &curr_fp->f_ci->m_fp_list, node) {
+ if (file_inode(filp) != file_inode(prev_fp->filp))
+ continue;
+
+ if (filp == prev_fp->filp)
+ continue;
+
+ if (ksmbd_stream_fd(prev_fp) && ksmbd_stream_fd(curr_fp))
+ if (strcmp(prev_fp->stream.name, curr_fp->stream.name))
+ continue;
+
+ if (prev_fp->attrib_only != curr_fp->attrib_only)
+ continue;
+
+ if (!(prev_fp->saccess & FILE_SHARE_DELETE_LE) &&
+ curr_fp->daccess & FILE_DELETE_LE) {
+ smb_shared_mode_error(SHARE_DELETE_ERROR,
+ prev_fp,
+ curr_fp);
+ rc = -EPERM;
+ break;
+ }
+
+ /*
+ * Only check FILE_SHARE_DELETE if stream opened and
+ * normal file opened.
+ */
+ if (ksmbd_stream_fd(prev_fp) && !ksmbd_stream_fd(curr_fp))
+ continue;
+
+ if (!(prev_fp->saccess & FILE_SHARE_READ_LE) &&
+ curr_fp->daccess & (FILE_EXECUTE_LE | FILE_READ_DATA_LE)) {
+ smb_shared_mode_error(SHARE_READ_ERROR,
+ prev_fp,
+ curr_fp);
+ rc = -EPERM;
+ break;
+ }
+
+ if (!(prev_fp->saccess & FILE_SHARE_WRITE_LE) &&
+ curr_fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE)) {
+ smb_shared_mode_error(SHARE_WRITE_ERROR,
+ prev_fp,
+ curr_fp);
+ rc = -EPERM;
+ break;
+ }
+
+ if (prev_fp->daccess & (FILE_EXECUTE_LE | FILE_READ_DATA_LE) &&
+ !(curr_fp->saccess & FILE_SHARE_READ_LE)) {
+ smb_shared_mode_error(FILE_READ_ERROR,
+ prev_fp,
+ curr_fp);
+ rc = -EPERM;
+ break;
+ }
+
+ if (prev_fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE) &&
+ !(curr_fp->saccess & FILE_SHARE_WRITE_LE)) {
+ smb_shared_mode_error(FILE_WRITE_ERROR,
+ prev_fp,
+ curr_fp);
+ rc = -EPERM;
+ break;
+ }
+
+ if (prev_fp->daccess & FILE_DELETE_LE &&
+ !(curr_fp->saccess & FILE_SHARE_DELETE_LE)) {
+ smb_shared_mode_error(FILE_DELETE_ERROR,
+ prev_fp,
+ curr_fp);
+ rc = -EPERM;
+ break;
+ }
+ }
+ read_unlock(&curr_fp->f_ci->m_lock);
+
+ return rc;
+}
+
+bool is_asterisk(char *p)
+{
+ return p && p[0] == '*';
+}
+
+int ksmbd_override_fsids(struct ksmbd_work *work)
+{
+ struct ksmbd_session *sess = work->sess;
+ struct ksmbd_share_config *share = work->tcon->share_conf;
+ struct cred *cred;
+ struct group_info *gi;
+ unsigned int uid;
+ unsigned int gid;
+
+ uid = user_uid(sess->user);
+ gid = user_gid(sess->user);
+ if (share->force_uid != KSMBD_SHARE_INVALID_UID)
+ uid = share->force_uid;
+ if (share->force_gid != KSMBD_SHARE_INVALID_GID)
+ gid = share->force_gid;
+
+ cred = prepare_kernel_cred(NULL);
+ if (!cred)
+ return -ENOMEM;
+
+ cred->fsuid = make_kuid(current_user_ns(), uid);
+ cred->fsgid = make_kgid(current_user_ns(), gid);
+
+ gi = groups_alloc(0);
+ if (!gi) {
+ abort_creds(cred);
+ return -ENOMEM;
+ }
+ set_groups(cred, gi);
+ put_group_info(gi);
+
+ if (!uid_eq(cred->fsuid, GLOBAL_ROOT_UID))
+ cred->cap_effective = cap_drop_fs_set(cred->cap_effective);
+
+ WARN_ON(work->saved_cred);
+ work->saved_cred = override_creds(cred);
+ if (!work->saved_cred) {
+ abort_creds(cred);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+void ksmbd_revert_fsids(struct ksmbd_work *work)
+{
+ const struct cred *cred;
+
+ WARN_ON(!work->saved_cred);
+
+ cred = current_cred();
+ revert_creds(work->saved_cred);
+ put_cred(cred);
+ work->saved_cred = NULL;
+}
+
+__le32 smb_map_generic_desired_access(__le32 daccess)
+{
+ if (daccess & FILE_GENERIC_READ_LE) {
+ daccess |= cpu_to_le32(GENERIC_READ_FLAGS);
+ daccess &= ~FILE_GENERIC_READ_LE;
+ }
+
+ if (daccess & FILE_GENERIC_WRITE_LE) {
+ daccess |= cpu_to_le32(GENERIC_WRITE_FLAGS);
+ daccess &= ~FILE_GENERIC_WRITE_LE;
+ }
+
+ if (daccess & FILE_GENERIC_EXECUTE_LE) {
+ daccess |= cpu_to_le32(GENERIC_EXECUTE_FLAGS);
+ daccess &= ~FILE_GENERIC_EXECUTE_LE;
+ }
+
+ if (daccess & FILE_GENERIC_ALL_LE) {
+ daccess |= cpu_to_le32(GENERIC_ALL_FLAGS);
+ daccess &= ~FILE_GENERIC_ALL_LE;
+ }
+
+ return daccess;
+}
diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h
new file mode 100644
index 000000000000..eb667d85558e
--- /dev/null
+++ b/fs/ksmbd/smb_common.h
@@ -0,0 +1,542 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __SMB_COMMON_H__
+#define __SMB_COMMON_H__
+
+#include <linux/kernel.h>
+
+#include "glob.h"
+#include "nterr.h"
+#include "smb2pdu.h"
+
+/* ksmbd's Specific ERRNO */
+#define ESHARE 50000
+
+#define SMB1_PROT 0
+#define SMB2_PROT 1
+#define SMB21_PROT 2
+/* multi-protocol negotiate request */
+#define SMB2X_PROT 3
+#define SMB30_PROT 4
+#define SMB302_PROT 5
+#define SMB311_PROT 6
+#define BAD_PROT 0xFFFF
+
+#define SMB1_VERSION_STRING "1.0"
+#define SMB20_VERSION_STRING "2.0"
+#define SMB21_VERSION_STRING "2.1"
+#define SMB30_VERSION_STRING "3.0"
+#define SMB302_VERSION_STRING "3.02"
+#define SMB311_VERSION_STRING "3.1.1"
+
+/* Dialects */
+#define SMB10_PROT_ID 0x00
+#define SMB20_PROT_ID 0x0202
+#define SMB21_PROT_ID 0x0210
+/* multi-protocol negotiate request */
+#define SMB2X_PROT_ID 0x02FF
+#define SMB30_PROT_ID 0x0300
+#define SMB302_PROT_ID 0x0302
+#define SMB311_PROT_ID 0x0311
+#define BAD_PROT_ID 0xFFFF
+
+#define SMB_ECHO_INTERVAL (60 * HZ)
+
+#define CIFS_DEFAULT_IOSIZE (64 * 1024)
+#define MAX_CIFS_SMALL_BUFFER_SIZE 448 /* big enough for most */
+
+/* RFC 1002 session packet types */
+#define RFC1002_SESSION_MESSAGE 0x00
+#define RFC1002_SESSION_REQUEST 0x81
+#define RFC1002_POSITIVE_SESSION_RESPONSE 0x82
+#define RFC1002_NEGATIVE_SESSION_RESPONSE 0x83
+#define RFC1002_RETARGET_SESSION_RESPONSE 0x84
+#define RFC1002_SESSION_KEEP_ALIVE 0x85
+
+/* Responses when opening a file. */
+#define F_SUPERSEDED 0
+#define F_OPENED 1
+#define F_CREATED 2
+#define F_OVERWRITTEN 3
+
+/*
+ * File Attribute flags
+ */
+#define ATTR_READONLY 0x0001
+#define ATTR_HIDDEN 0x0002
+#define ATTR_SYSTEM 0x0004
+#define ATTR_VOLUME 0x0008
+#define ATTR_DIRECTORY 0x0010
+#define ATTR_ARCHIVE 0x0020
+#define ATTR_DEVICE 0x0040
+#define ATTR_NORMAL 0x0080
+#define ATTR_TEMPORARY 0x0100
+#define ATTR_SPARSE 0x0200
+#define ATTR_REPARSE 0x0400
+#define ATTR_COMPRESSED 0x0800
+#define ATTR_OFFLINE 0x1000
+#define ATTR_NOT_CONTENT_INDEXED 0x2000
+#define ATTR_ENCRYPTED 0x4000
+#define ATTR_POSIX_SEMANTICS 0x01000000
+#define ATTR_BACKUP_SEMANTICS 0x02000000
+#define ATTR_DELETE_ON_CLOSE 0x04000000
+#define ATTR_SEQUENTIAL_SCAN 0x08000000
+#define ATTR_RANDOM_ACCESS 0x10000000
+#define ATTR_NO_BUFFERING 0x20000000
+#define ATTR_WRITE_THROUGH 0x80000000
+
+#define ATTR_READONLY_LE cpu_to_le32(ATTR_READONLY)
+#define ATTR_HIDDEN_LE cpu_to_le32(ATTR_HIDDEN)
+#define ATTR_SYSTEM_LE cpu_to_le32(ATTR_SYSTEM)
+#define ATTR_DIRECTORY_LE cpu_to_le32(ATTR_DIRECTORY)
+#define ATTR_ARCHIVE_LE cpu_to_le32(ATTR_ARCHIVE)
+#define ATTR_NORMAL_LE cpu_to_le32(ATTR_NORMAL)
+#define ATTR_TEMPORARY_LE cpu_to_le32(ATTR_TEMPORARY)
+#define ATTR_SPARSE_FILE_LE cpu_to_le32(ATTR_SPARSE)
+#define ATTR_REPARSE_POINT_LE cpu_to_le32(ATTR_REPARSE)
+#define ATTR_COMPRESSED_LE cpu_to_le32(ATTR_COMPRESSED)
+#define ATTR_OFFLINE_LE cpu_to_le32(ATTR_OFFLINE)
+#define ATTR_NOT_CONTENT_INDEXED_LE cpu_to_le32(ATTR_NOT_CONTENT_INDEXED)
+#define ATTR_ENCRYPTED_LE cpu_to_le32(ATTR_ENCRYPTED)
+#define ATTR_INTEGRITY_STREAML_LE cpu_to_le32(0x00008000)
+#define ATTR_NO_SCRUB_DATA_LE cpu_to_le32(0x00020000)
+#define ATTR_MASK_LE cpu_to_le32(0x00007FB7)
+
+/* List of FileSystemAttributes - see 2.5.1 of MS-FSCC */
+#define FILE_SUPPORTS_SPARSE_VDL 0x10000000 /* faster nonsparse extend */
+#define FILE_SUPPORTS_BLOCK_REFCOUNTING 0x08000000 /* allow ioctl dup extents */
+#define FILE_SUPPORT_INTEGRITY_STREAMS 0x04000000
+#define FILE_SUPPORTS_USN_JOURNAL 0x02000000
+#define FILE_SUPPORTS_OPEN_BY_FILE_ID 0x01000000
+#define FILE_SUPPORTS_EXTENDED_ATTRIBUTES 0x00800000
+#define FILE_SUPPORTS_HARD_LINKS 0x00400000
+#define FILE_SUPPORTS_TRANSACTIONS 0x00200000
+#define FILE_SEQUENTIAL_WRITE_ONCE 0x00100000
+#define FILE_READ_ONLY_VOLUME 0x00080000
+#define FILE_NAMED_STREAMS 0x00040000
+#define FILE_SUPPORTS_ENCRYPTION 0x00020000
+#define FILE_SUPPORTS_OBJECT_IDS 0x00010000
+#define FILE_VOLUME_IS_COMPRESSED 0x00008000
+#define FILE_SUPPORTS_REMOTE_STORAGE 0x00000100
+#define FILE_SUPPORTS_REPARSE_POINTS 0x00000080
+#define FILE_SUPPORTS_SPARSE_FILES 0x00000040
+#define FILE_VOLUME_QUOTAS 0x00000020
+#define FILE_FILE_COMPRESSION 0x00000010
+#define FILE_PERSISTENT_ACLS 0x00000008
+#define FILE_UNICODE_ON_DISK 0x00000004
+#define FILE_CASE_PRESERVED_NAMES 0x00000002
+#define FILE_CASE_SENSITIVE_SEARCH 0x00000001
+
+#define FILE_READ_DATA 0x00000001 /* Data can be read from the file */
+#define FILE_WRITE_DATA 0x00000002 /* Data can be written to the file */
+#define FILE_APPEND_DATA 0x00000004 /* Data can be appended to the file */
+#define FILE_READ_EA 0x00000008 /* Extended attributes associated */
+/* with the file can be read */
+#define FILE_WRITE_EA 0x00000010 /* Extended attributes associated */
+/* with the file can be written */
+#define FILE_EXECUTE 0x00000020 /*Data can be read into memory from */
+/* the file using system paging I/O */
+#define FILE_DELETE_CHILD 0x00000040
+#define FILE_READ_ATTRIBUTES 0x00000080 /* Attributes associated with the */
+/* file can be read */
+#define FILE_WRITE_ATTRIBUTES 0x00000100 /* Attributes associated with the */
+/* file can be written */
+#define DELETE 0x00010000 /* The file can be deleted */
+#define READ_CONTROL 0x00020000 /* The access control list and */
+/* ownership associated with the */
+/* file can be read */
+#define WRITE_DAC 0x00040000 /* The access control list and */
+/* ownership associated with the */
+/* file can be written. */
+#define WRITE_OWNER 0x00080000 /* Ownership information associated */
+/* with the file can be written */
+#define SYNCHRONIZE 0x00100000 /* The file handle can waited on to */
+/* synchronize with the completion */
+/* of an input/output request */
+#define GENERIC_ALL 0x10000000
+#define GENERIC_EXECUTE 0x20000000
+#define GENERIC_WRITE 0x40000000
+#define GENERIC_READ 0x80000000
+/* In summary - Relevant file */
+/* access flags from CIFS are */
+/* file_read_data, file_write_data */
+/* file_execute, file_read_attributes*/
+/* write_dac, and delete. */
+
+#define FILE_READ_RIGHTS (FILE_READ_DATA | FILE_READ_EA | FILE_READ_ATTRIBUTES)
+#define FILE_WRITE_RIGHTS (FILE_WRITE_DATA | FILE_APPEND_DATA \
+ | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES)
+#define FILE_EXEC_RIGHTS (FILE_EXECUTE)
+
+#define SET_FILE_READ_RIGHTS (FILE_READ_DATA | FILE_READ_EA \
+ | FILE_READ_ATTRIBUTES \
+ | DELETE | READ_CONTROL | WRITE_DAC \
+ | WRITE_OWNER | SYNCHRONIZE)
+#define SET_FILE_WRITE_RIGHTS (FILE_WRITE_DATA | FILE_APPEND_DATA \
+ | FILE_WRITE_EA \
+ | FILE_DELETE_CHILD \
+ | FILE_WRITE_ATTRIBUTES \
+ | DELETE | READ_CONTROL | WRITE_DAC \
+ | WRITE_OWNER | SYNCHRONIZE)
+#define SET_FILE_EXEC_RIGHTS (FILE_READ_EA | FILE_WRITE_EA | FILE_EXECUTE \
+ | FILE_READ_ATTRIBUTES \
+ | FILE_WRITE_ATTRIBUTES \
+ | DELETE | READ_CONTROL | WRITE_DAC \
+ | WRITE_OWNER | SYNCHRONIZE)
+
+#define SET_MINIMUM_RIGHTS (FILE_READ_EA | FILE_READ_ATTRIBUTES \
+ | READ_CONTROL | SYNCHRONIZE)
+
+/* generic flags for file open */
+#define GENERIC_READ_FLAGS (READ_CONTROL | FILE_READ_DATA | \
+ FILE_READ_ATTRIBUTES | \
+ FILE_READ_EA | SYNCHRONIZE)
+
+#define GENERIC_WRITE_FLAGS (READ_CONTROL | FILE_WRITE_DATA | \
+ FILE_WRITE_ATTRIBUTES | FILE_WRITE_EA | \
+ FILE_APPEND_DATA | SYNCHRONIZE)
+
+#define GENERIC_EXECUTE_FLAGS (READ_CONTROL | FILE_EXECUTE | \
+ FILE_READ_ATTRIBUTES | SYNCHRONIZE)
+
+#define GENERIC_ALL_FLAGS (DELETE | READ_CONTROL | WRITE_DAC | \
+ WRITE_OWNER | SYNCHRONIZE | FILE_READ_DATA | \
+ FILE_WRITE_DATA | FILE_APPEND_DATA | \
+ FILE_READ_EA | FILE_WRITE_EA | \
+ FILE_EXECUTE | FILE_DELETE_CHILD | \
+ FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES)
+
+#define SMB1_PROTO_NUMBER cpu_to_le32(0x424d53ff)
+
+#define SMB1_CLIENT_GUID_SIZE (16)
+struct smb_hdr {
+ __be32 smb_buf_length;
+ __u8 Protocol[4];
+ __u8 Command;
+ union {
+ struct {
+ __u8 ErrorClass;
+ __u8 Reserved;
+ __le16 Error;
+ } __packed DosError;
+ __le32 CifsError;
+ } __packed Status;
+ __u8 Flags;
+ __le16 Flags2; /* note: le */
+ __le16 PidHigh;
+ union {
+ struct {
+ __le32 SequenceNumber; /* le */
+ __u32 Reserved; /* zero */
+ } __packed Sequence;
+ __u8 SecuritySignature[8]; /* le */
+ } __packed Signature;
+ __u8 pad[2];
+ __le16 Tid;
+ __le16 Pid;
+ __le16 Uid;
+ __le16 Mid;
+ __u8 WordCount;
+} __packed;
+
+struct smb_negotiate_req {
+ struct smb_hdr hdr; /* wct = 0 */
+ __le16 ByteCount;
+ unsigned char DialectsArray[1];
+} __packed;
+
+struct smb_negotiate_rsp {
+ struct smb_hdr hdr; /* wct = 17 */
+ __le16 DialectIndex; /* 0xFFFF = no dialect acceptable */
+ __u8 SecurityMode;
+ __le16 MaxMpxCount;
+ __le16 MaxNumberVcs;
+ __le32 MaxBufferSize;
+ __le32 MaxRawSize;
+ __le32 SessionKey;
+ __le32 Capabilities; /* see below */
+ __le32 SystemTimeLow;
+ __le32 SystemTimeHigh;
+ __le16 ServerTimeZone;
+ __u8 EncryptionKeyLength;
+ __le16 ByteCount;
+ union {
+ unsigned char EncryptionKey[8]; /* cap extended security off */
+ /* followed by Domain name - if extended security is off */
+ /* followed by 16 bytes of server GUID */
+ /* then security blob if cap_extended_security negotiated */
+ struct {
+ unsigned char GUID[SMB1_CLIENT_GUID_SIZE];
+ unsigned char SecurityBlob[1];
+ } __packed extended_response;
+ } __packed u;
+} __packed;
+
+struct filesystem_attribute_info {
+ __le32 Attributes;
+ __le32 MaxPathNameComponentLength;
+ __le32 FileSystemNameLen;
+ __le16 FileSystemName[1]; /* do not have to save this - get subset? */
+} __packed;
+
+struct filesystem_device_info {
+ __le32 DeviceType;
+ __le32 DeviceCharacteristics;
+} __packed; /* device info level 0x104 */
+
+struct filesystem_vol_info {
+ __le64 VolumeCreationTime;
+ __le32 SerialNumber;
+ __le32 VolumeLabelSize;
+ __le16 Reserved;
+ __le16 VolumeLabel[1];
+} __packed;
+
+struct filesystem_info {
+ __le64 TotalAllocationUnits;
+ __le64 FreeAllocationUnits;
+ __le32 SectorsPerAllocationUnit;
+ __le32 BytesPerSector;
+} __packed; /* size info, level 0x103 */
+
+#define EXTENDED_INFO_MAGIC 0x43667364 /* Cfsd */
+#define STRING_LENGTH 28
+
+struct fs_extended_info {
+ __le32 magic;
+ __le32 version;
+ __le32 release;
+ __u64 rel_date;
+ char version_string[STRING_LENGTH];
+} __packed;
+
+struct object_id_info {
+ char objid[16];
+ struct fs_extended_info extended_info;
+} __packed;
+
+struct file_directory_info {
+ __le32 NextEntryOffset;
+ __u32 FileIndex;
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 EndOfFile;
+ __le64 AllocationSize;
+ __le32 ExtFileAttributes;
+ __le32 FileNameLength;
+ char FileName[1];
+} __packed; /* level 0x101 FF resp data */
+
+struct file_names_info {
+ __le32 NextEntryOffset;
+ __u32 FileIndex;
+ __le32 FileNameLength;
+ char FileName[1];
+} __packed; /* level 0xc FF resp data */
+
+struct file_full_directory_info {
+ __le32 NextEntryOffset;
+ __u32 FileIndex;
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 EndOfFile;
+ __le64 AllocationSize;
+ __le32 ExtFileAttributes;
+ __le32 FileNameLength;
+ __le32 EaSize;
+ char FileName[1];
+} __packed; /* level 0x102 FF resp */
+
+struct file_both_directory_info {
+ __le32 NextEntryOffset;
+ __u32 FileIndex;
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 EndOfFile;
+ __le64 AllocationSize;
+ __le32 ExtFileAttributes;
+ __le32 FileNameLength;
+ __le32 EaSize; /* length of the xattrs */
+ __u8 ShortNameLength;
+ __u8 Reserved;
+ __u8 ShortName[24];
+ char FileName[1];
+} __packed; /* level 0x104 FFrsp data */
+
+struct file_id_both_directory_info {
+ __le32 NextEntryOffset;
+ __u32 FileIndex;
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 EndOfFile;
+ __le64 AllocationSize;
+ __le32 ExtFileAttributes;
+ __le32 FileNameLength;
+ __le32 EaSize; /* length of the xattrs */
+ __u8 ShortNameLength;
+ __u8 Reserved;
+ __u8 ShortName[24];
+ __le16 Reserved2;
+ __le64 UniqueId;
+ char FileName[1];
+} __packed;
+
+struct file_id_full_dir_info {
+ __le32 NextEntryOffset;
+ __u32 FileIndex;
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 EndOfFile;
+ __le64 AllocationSize;
+ __le32 ExtFileAttributes;
+ __le32 FileNameLength;
+ __le32 EaSize; /* EA size */
+ __le32 Reserved;
+ __le64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/
+ char FileName[1];
+} __packed; /* level 0x105 FF rsp data */
+
+struct smb_version_values {
+ char *version_string;
+ __u16 protocol_id;
+ __le16 lock_cmd;
+ __u32 capabilities;
+ __u32 max_read_size;
+ __u32 max_write_size;
+ __u32 max_trans_size;
+ __u32 large_lock_type;
+ __u32 exclusive_lock_type;
+ __u32 shared_lock_type;
+ __u32 unlock_lock_type;
+ size_t header_size;
+ size_t max_header_size;
+ size_t read_rsp_size;
+ unsigned int cap_unix;
+ unsigned int cap_nt_find;
+ unsigned int cap_large_files;
+ __u16 signing_enabled;
+ __u16 signing_required;
+ size_t create_lease_size;
+ size_t create_durable_size;
+ size_t create_durable_v2_size;
+ size_t create_mxac_size;
+ size_t create_disk_id_size;
+ size_t create_posix_size;
+};
+
+struct filesystem_posix_info {
+ /* For undefined recommended transfer size return -1 in that field */
+ __le32 OptimalTransferSize; /* bsize on some os, iosize on other os */
+ __le32 BlockSize;
+ /* The next three fields are in terms of the block size.
+ * (above). If block size is unknown, 4096 would be a
+ * reasonable block size for a server to report.
+ * Note that returning the blocks/blocksavail removes need
+ * to make a second call (to QFSInfo level 0x103 to get this info.
+ * UserBlockAvail is typically less than or equal to BlocksAvail,
+ * if no distinction is made return the same value in each
+ */
+ __le64 TotalBlocks;
+ __le64 BlocksAvail; /* bfree */
+ __le64 UserBlocksAvail; /* bavail */
+ /* For undefined Node fields or FSID return -1 */
+ __le64 TotalFileNodes;
+ __le64 FreeFileNodes;
+ __le64 FileSysIdentifier; /* fsid */
+ /* NB Namelen comes from FILE_SYSTEM_ATTRIBUTE_INFO call */
+ /* NB flags can come from FILE_SYSTEM_DEVICE_INFO call */
+} __packed;
+
+struct smb_version_ops {
+ u16 (*get_cmd_val)(struct ksmbd_work *swork);
+ int (*init_rsp_hdr)(struct ksmbd_work *swork);
+ void (*set_rsp_status)(struct ksmbd_work *swork, __le32 err);
+ int (*allocate_rsp_buf)(struct ksmbd_work *work);
+ int (*set_rsp_credits)(struct ksmbd_work *work);
+ int (*check_user_session)(struct ksmbd_work *work);
+ int (*get_ksmbd_tcon)(struct ksmbd_work *work);
+ bool (*is_sign_req)(struct ksmbd_work *work, unsigned int command);
+ int (*check_sign_req)(struct ksmbd_work *work);
+ void (*set_sign_rsp)(struct ksmbd_work *work);
+ int (*generate_signingkey)(struct ksmbd_session *sess, struct ksmbd_conn *conn);
+ int (*generate_encryptionkey)(struct ksmbd_session *sess);
+ bool (*is_transform_hdr)(void *buf);
+ int (*decrypt_req)(struct ksmbd_work *work);
+ int (*encrypt_resp)(struct ksmbd_work *work);
+};
+
+struct smb_version_cmds {
+ int (*proc)(struct ksmbd_work *swork);
+};
+
+static inline size_t
+smb2_hdr_size_no_buflen(struct smb_version_values *vals)
+{
+ return vals->header_size - 4;
+}
+
+int ksmbd_min_protocol(void);
+int ksmbd_max_protocol(void);
+
+int ksmbd_lookup_protocol_idx(char *str);
+
+int ksmbd_verify_smb_message(struct ksmbd_work *work);
+bool ksmbd_smb_request(struct ksmbd_conn *conn);
+
+int ksmbd_lookup_dialect_by_id(__le16 *cli_dialects, __le16 dialects_count);
+
+int ksmbd_init_smb_server(struct ksmbd_work *work);
+
+bool ksmbd_pdu_size_has_room(unsigned int pdu);
+
+struct ksmbd_kstat;
+int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work,
+ int info_level,
+ struct ksmbd_file *dir,
+ struct ksmbd_dir_info *d_info,
+ char *search_pattern,
+ int (*fn)(struct ksmbd_conn *,
+ int,
+ struct ksmbd_dir_info *,
+ struct user_namespace *,
+ struct ksmbd_kstat *));
+
+int ksmbd_extract_shortname(struct ksmbd_conn *conn,
+ const char *longname,
+ char *shortname);
+
+int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command);
+
+int ksmbd_smb_check_shared_mode(struct file *filp, struct ksmbd_file *curr_fp);
+int ksmbd_override_fsids(struct ksmbd_work *work);
+void ksmbd_revert_fsids(struct ksmbd_work *work);
+
+unsigned int ksmbd_server_side_copy_max_chunk_count(void);
+unsigned int ksmbd_server_side_copy_max_chunk_size(void);
+unsigned int ksmbd_server_side_copy_max_total_size(void);
+bool is_asterisk(char *p);
+__le32 smb_map_generic_desired_access(__le32 daccess);
+
+static inline unsigned int get_rfc1002_len(void *buf)
+{
+ return be32_to_cpu(*((__be32 *)buf)) & 0xffffff;
+}
+
+static inline void inc_rfc1001_len(void *buf, int count)
+{
+ be32_add_cpu((__be32 *)buf, count);
+}
+#endif /* __SMB_COMMON_H__ */
diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c
new file mode 100644
index 000000000000..5456e3ad943e
--- /dev/null
+++ b/fs/ksmbd/smbacl.c
@@ -0,0 +1,1366 @@
+// SPDX-License-Identifier: LGPL-2.1+
+/*
+ * Copyright (C) International Business Machines Corp., 2007,2008
+ * Author(s): Steve French (sfrench@us.ibm.com)
+ * Copyright (C) 2020 Samsung Electronics Co., Ltd.
+ * Author(s): Namjae Jeon <linkinjeon@kernel.org>
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "smbacl.h"
+#include "smb_common.h"
+#include "server.h"
+#include "misc.h"
+#include "mgmt/share_config.h"
+
+static const struct smb_sid domain = {1, 4, {0, 0, 0, 0, 0, 5},
+ {cpu_to_le32(21), cpu_to_le32(1), cpu_to_le32(2), cpu_to_le32(3),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* security id for everyone/world system group */
+static const struct smb_sid creator_owner = {
+ 1, 1, {0, 0, 0, 0, 0, 3}, {0} };
+/* security id for everyone/world system group */
+static const struct smb_sid creator_group = {
+ 1, 1, {0, 0, 0, 0, 0, 3}, {cpu_to_le32(1)} };
+
+/* security id for everyone/world system group */
+static const struct smb_sid sid_everyone = {
+ 1, 1, {0, 0, 0, 0, 0, 1}, {0} };
+/* security id for Authenticated Users system group */
+static const struct smb_sid sid_authusers = {
+ 1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11)} };
+
+/* S-1-22-1 Unmapped Unix users */
+static const struct smb_sid sid_unix_users = {1, 1, {0, 0, 0, 0, 0, 22},
+ {cpu_to_le32(1), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* S-1-22-2 Unmapped Unix groups */
+static const struct smb_sid sid_unix_groups = { 1, 1, {0, 0, 0, 0, 0, 22},
+ {cpu_to_le32(2), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/*
+ * See http://technet.microsoft.com/en-us/library/hh509017(v=ws.10).aspx
+ */
+
+/* S-1-5-88 MS NFS and Apple style UID/GID/mode */
+
+/* S-1-5-88-1 Unix uid */
+static const struct smb_sid sid_unix_NFS_users = { 1, 2, {0, 0, 0, 0, 0, 5},
+ {cpu_to_le32(88),
+ cpu_to_le32(1), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* S-1-5-88-2 Unix gid */
+static const struct smb_sid sid_unix_NFS_groups = { 1, 2, {0, 0, 0, 0, 0, 5},
+ {cpu_to_le32(88),
+ cpu_to_le32(2), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* S-1-5-88-3 Unix mode */
+static const struct smb_sid sid_unix_NFS_mode = { 1, 2, {0, 0, 0, 0, 0, 5},
+ {cpu_to_le32(88),
+ cpu_to_le32(3), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/*
+ * if the two SIDs (roughly equivalent to a UUID for a user or group) are
+ * the same returns zero, if they do not match returns non-zero.
+ */
+int compare_sids(const struct smb_sid *ctsid, const struct smb_sid *cwsid)
+{
+ int i;
+ int num_subauth, num_sat, num_saw;
+
+ if (!ctsid || !cwsid)
+ return 1;
+
+ /* compare the revision */
+ if (ctsid->revision != cwsid->revision) {
+ if (ctsid->revision > cwsid->revision)
+ return 1;
+ else
+ return -1;
+ }
+
+ /* compare all of the six auth values */
+ for (i = 0; i < NUM_AUTHS; ++i) {
+ if (ctsid->authority[i] != cwsid->authority[i]) {
+ if (ctsid->authority[i] > cwsid->authority[i])
+ return 1;
+ else
+ return -1;
+ }
+ }
+
+ /* compare all of the subauth values if any */
+ num_sat = ctsid->num_subauth;
+ num_saw = cwsid->num_subauth;
+ num_subauth = num_sat < num_saw ? num_sat : num_saw;
+ if (num_subauth) {
+ for (i = 0; i < num_subauth; ++i) {
+ if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) {
+ if (le32_to_cpu(ctsid->sub_auth[i]) >
+ le32_to_cpu(cwsid->sub_auth[i]))
+ return 1;
+ else
+ return -1;
+ }
+ }
+ }
+
+ return 0; /* sids compare/match */
+}
+
+static void smb_copy_sid(struct smb_sid *dst, const struct smb_sid *src)
+{
+ int i;
+
+ dst->revision = src->revision;
+ dst->num_subauth = min_t(u8, src->num_subauth, SID_MAX_SUB_AUTHORITIES);
+ for (i = 0; i < NUM_AUTHS; ++i)
+ dst->authority[i] = src->authority[i];
+ for (i = 0; i < dst->num_subauth; ++i)
+ dst->sub_auth[i] = src->sub_auth[i];
+}
+
+/*
+ * change posix mode to reflect permissions
+ * pmode is the existing mode (we only want to overwrite part of this
+ * bits to set can be: S_IRWXU, S_IRWXG or S_IRWXO ie 00700 or 00070 or 00007
+ */
+static umode_t access_flags_to_mode(struct smb_fattr *fattr, __le32 ace_flags,
+ int type)
+{
+ __u32 flags = le32_to_cpu(ace_flags);
+ umode_t mode = 0;
+
+ if (flags & GENERIC_ALL) {
+ mode = 0777;
+ ksmbd_debug(SMB, "all perms\n");
+ return mode;
+ }
+
+ if ((flags & GENERIC_READ) || (flags & FILE_READ_RIGHTS))
+ mode = 0444;
+ if ((flags & GENERIC_WRITE) || (flags & FILE_WRITE_RIGHTS)) {
+ mode |= 0222;
+ if (S_ISDIR(fattr->cf_mode))
+ mode |= 0111;
+ }
+ if ((flags & GENERIC_EXECUTE) || (flags & FILE_EXEC_RIGHTS))
+ mode |= 0111;
+
+ if (type == ACCESS_DENIED_ACE_TYPE || type == ACCESS_DENIED_OBJECT_ACE_TYPE)
+ mode = ~mode;
+
+ ksmbd_debug(SMB, "access flags 0x%x mode now %04o\n", flags, mode);
+
+ return mode;
+}
+
+/*
+ * Generate access flags to reflect permissions mode is the existing mode.
+ * This function is called for every ACE in the DACL whose SID matches
+ * with either owner or group or everyone.
+ */
+static void mode_to_access_flags(umode_t mode, umode_t bits_to_use,
+ __u32 *pace_flags)
+{
+ /* reset access mask */
+ *pace_flags = 0x0;
+
+ /* bits to use are either S_IRWXU or S_IRWXG or S_IRWXO */
+ mode &= bits_to_use;
+
+ /*
+ * check for R/W/X UGO since we do not know whose flags
+ * is this but we have cleared all the bits sans RWX for
+ * either user or group or other as per bits_to_use
+ */
+ if (mode & 0444)
+ *pace_flags |= SET_FILE_READ_RIGHTS;
+ if (mode & 0222)
+ *pace_flags |= FILE_WRITE_RIGHTS;
+ if (mode & 0111)
+ *pace_flags |= SET_FILE_EXEC_RIGHTS;
+
+ ksmbd_debug(SMB, "mode: %o, access flags now 0x%x\n",
+ mode, *pace_flags);
+}
+
+static __u16 fill_ace_for_sid(struct smb_ace *pntace,
+ const struct smb_sid *psid, int type, int flags,
+ umode_t mode, umode_t bits)
+{
+ int i;
+ __u16 size = 0;
+ __u32 access_req = 0;
+
+ pntace->type = type;
+ pntace->flags = flags;
+ mode_to_access_flags(mode, bits, &access_req);
+ if (!access_req)
+ access_req = SET_MINIMUM_RIGHTS;
+ pntace->access_req = cpu_to_le32(access_req);
+
+ pntace->sid.revision = psid->revision;
+ pntace->sid.num_subauth = psid->num_subauth;
+ for (i = 0; i < NUM_AUTHS; i++)
+ pntace->sid.authority[i] = psid->authority[i];
+ for (i = 0; i < psid->num_subauth; i++)
+ pntace->sid.sub_auth[i] = psid->sub_auth[i];
+
+ size = 1 + 1 + 2 + 4 + 1 + 1 + 6 + (psid->num_subauth * 4);
+ pntace->size = cpu_to_le16(size);
+
+ return size;
+}
+
+void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid)
+{
+ switch (sidtype) {
+ case SIDOWNER:
+ smb_copy_sid(ssid, &server_conf.domain_sid);
+ break;
+ case SIDUNIX_USER:
+ smb_copy_sid(ssid, &sid_unix_users);
+ break;
+ case SIDUNIX_GROUP:
+ smb_copy_sid(ssid, &sid_unix_groups);
+ break;
+ case SIDCREATOR_OWNER:
+ smb_copy_sid(ssid, &creator_owner);
+ return;
+ case SIDCREATOR_GROUP:
+ smb_copy_sid(ssid, &creator_group);
+ return;
+ case SIDNFS_USER:
+ smb_copy_sid(ssid, &sid_unix_NFS_users);
+ break;
+ case SIDNFS_GROUP:
+ smb_copy_sid(ssid, &sid_unix_NFS_groups);
+ break;
+ case SIDNFS_MODE:
+ smb_copy_sid(ssid, &sid_unix_NFS_mode);
+ break;
+ default:
+ return;
+ }
+
+ /* RID */
+ ssid->sub_auth[ssid->num_subauth] = cpu_to_le32(cid);
+ ssid->num_subauth++;
+}
+
+static int sid_to_id(struct user_namespace *user_ns,
+ struct smb_sid *psid, uint sidtype,
+ struct smb_fattr *fattr)
+{
+ int rc = -EINVAL;
+
+ /*
+ * If we have too many subauthorities, then something is really wrong.
+ * Just return an error.
+ */
+ if (unlikely(psid->num_subauth > SID_MAX_SUB_AUTHORITIES)) {
+ pr_err("%s: %u subauthorities is too many!\n",
+ __func__, psid->num_subauth);
+ return -EIO;
+ }
+
+ if (sidtype == SIDOWNER) {
+ kuid_t uid;
+ uid_t id;
+
+ id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]);
+ if (id > 0) {
+ uid = make_kuid(user_ns, id);
+ if (uid_valid(uid) && kuid_has_mapping(user_ns, uid)) {
+ fattr->cf_uid = uid;
+ rc = 0;
+ }
+ }
+ } else {
+ kgid_t gid;
+ gid_t id;
+
+ id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]);
+ if (id > 0) {
+ gid = make_kgid(user_ns, id);
+ if (gid_valid(gid) && kgid_has_mapping(user_ns, gid)) {
+ fattr->cf_gid = gid;
+ rc = 0;
+ }
+ }
+ }
+
+ return rc;
+}
+
+void posix_state_to_acl(struct posix_acl_state *state,
+ struct posix_acl_entry *pace)
+{
+ int i;
+
+ pace->e_tag = ACL_USER_OBJ;
+ pace->e_perm = state->owner.allow;
+ for (i = 0; i < state->users->n; i++) {
+ pace++;
+ pace->e_tag = ACL_USER;
+ pace->e_uid = state->users->aces[i].uid;
+ pace->e_perm = state->users->aces[i].perms.allow;
+ }
+
+ pace++;
+ pace->e_tag = ACL_GROUP_OBJ;
+ pace->e_perm = state->group.allow;
+
+ for (i = 0; i < state->groups->n; i++) {
+ pace++;
+ pace->e_tag = ACL_GROUP;
+ pace->e_gid = state->groups->aces[i].gid;
+ pace->e_perm = state->groups->aces[i].perms.allow;
+ }
+
+ if (state->users->n || state->groups->n) {
+ pace++;
+ pace->e_tag = ACL_MASK;
+ pace->e_perm = state->mask.allow;
+ }
+
+ pace++;
+ pace->e_tag = ACL_OTHER;
+ pace->e_perm = state->other.allow;
+}
+
+int init_acl_state(struct posix_acl_state *state, int cnt)
+{
+ int alloc;
+
+ memset(state, 0, sizeof(struct posix_acl_state));
+ /*
+ * In the worst case, each individual acl could be for a distinct
+ * named user or group, but we don't know which, so we allocate
+ * enough space for either:
+ */
+ alloc = sizeof(struct posix_ace_state_array)
+ + cnt * sizeof(struct posix_user_ace_state);
+ state->users = kzalloc(alloc, GFP_KERNEL);
+ if (!state->users)
+ return -ENOMEM;
+ state->groups = kzalloc(alloc, GFP_KERNEL);
+ if (!state->groups) {
+ kfree(state->users);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+void free_acl_state(struct posix_acl_state *state)
+{
+ kfree(state->users);
+ kfree(state->groups);
+}
+
+static void parse_dacl(struct user_namespace *user_ns,
+ struct smb_acl *pdacl, char *end_of_acl,
+ struct smb_sid *pownersid, struct smb_sid *pgrpsid,
+ struct smb_fattr *fattr)
+{
+ int i, ret;
+ int num_aces = 0;
+ int acl_size;
+ char *acl_base;
+ struct smb_ace **ppace;
+ struct posix_acl_entry *cf_pace, *cf_pdace;
+ struct posix_acl_state acl_state, default_acl_state;
+ umode_t mode = 0, acl_mode;
+ bool owner_found = false, group_found = false, others_found = false;
+
+ if (!pdacl)
+ return;
+
+ /* validate that we do not go past end of acl */
+ if (end_of_acl <= (char *)pdacl ||
+ end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) {
+ pr_err("ACL too small to parse DACL\n");
+ return;
+ }
+
+ ksmbd_debug(SMB, "DACL revision %d size %d num aces %d\n",
+ le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size),
+ le32_to_cpu(pdacl->num_aces));
+
+ acl_base = (char *)pdacl;
+ acl_size = sizeof(struct smb_acl);
+
+ num_aces = le32_to_cpu(pdacl->num_aces);
+ if (num_aces <= 0)
+ return;
+
+ if (num_aces > ULONG_MAX / sizeof(struct smb_ace *))
+ return;
+
+ ppace = kmalloc_array(num_aces, sizeof(struct smb_ace *), GFP_KERNEL);
+ if (!ppace)
+ return;
+
+ ret = init_acl_state(&acl_state, num_aces);
+ if (ret)
+ return;
+ ret = init_acl_state(&default_acl_state, num_aces);
+ if (ret) {
+ free_acl_state(&acl_state);
+ return;
+ }
+
+ /*
+ * reset rwx permissions for user/group/other.
+ * Also, if num_aces is 0 i.e. DACL has no ACEs,
+ * user/group/other have no permissions
+ */
+ for (i = 0; i < num_aces; ++i) {
+ ppace[i] = (struct smb_ace *)(acl_base + acl_size);
+ acl_base = (char *)ppace[i];
+ acl_size = le16_to_cpu(ppace[i]->size);
+ ppace[i]->access_req =
+ smb_map_generic_desired_access(ppace[i]->access_req);
+
+ if (!(compare_sids(&ppace[i]->sid, &sid_unix_NFS_mode))) {
+ fattr->cf_mode =
+ le32_to_cpu(ppace[i]->sid.sub_auth[2]);
+ break;
+ } else if (!compare_sids(&ppace[i]->sid, pownersid)) {
+ acl_mode = access_flags_to_mode(fattr,
+ ppace[i]->access_req,
+ ppace[i]->type);
+ acl_mode &= 0700;
+
+ if (!owner_found) {
+ mode &= ~(0700);
+ mode |= acl_mode;
+ }
+ owner_found = true;
+ } else if (!compare_sids(&ppace[i]->sid, pgrpsid) ||
+ ppace[i]->sid.sub_auth[ppace[i]->sid.num_subauth - 1] ==
+ DOMAIN_USER_RID_LE) {
+ acl_mode = access_flags_to_mode(fattr,
+ ppace[i]->access_req,
+ ppace[i]->type);
+ acl_mode &= 0070;
+ if (!group_found) {
+ mode &= ~(0070);
+ mode |= acl_mode;
+ }
+ group_found = true;
+ } else if (!compare_sids(&ppace[i]->sid, &sid_everyone)) {
+ acl_mode = access_flags_to_mode(fattr,
+ ppace[i]->access_req,
+ ppace[i]->type);
+ acl_mode &= 0007;
+ if (!others_found) {
+ mode &= ~(0007);
+ mode |= acl_mode;
+ }
+ others_found = true;
+ } else if (!compare_sids(&ppace[i]->sid, &creator_owner)) {
+ continue;
+ } else if (!compare_sids(&ppace[i]->sid, &creator_group)) {
+ continue;
+ } else if (!compare_sids(&ppace[i]->sid, &sid_authusers)) {
+ continue;
+ } else {
+ struct smb_fattr temp_fattr;
+
+ acl_mode = access_flags_to_mode(fattr, ppace[i]->access_req,
+ ppace[i]->type);
+ temp_fattr.cf_uid = INVALID_UID;
+ ret = sid_to_id(user_ns, &ppace[i]->sid, SIDOWNER, &temp_fattr);
+ if (ret || uid_eq(temp_fattr.cf_uid, INVALID_UID)) {
+ pr_err("%s: Error %d mapping Owner SID to uid\n",
+ __func__, ret);
+ continue;
+ }
+
+ acl_state.owner.allow = ((acl_mode & 0700) >> 6) | 0004;
+ acl_state.users->aces[acl_state.users->n].uid =
+ temp_fattr.cf_uid;
+ acl_state.users->aces[acl_state.users->n++].perms.allow =
+ ((acl_mode & 0700) >> 6) | 0004;
+ default_acl_state.owner.allow = ((acl_mode & 0700) >> 6) | 0004;
+ default_acl_state.users->aces[default_acl_state.users->n].uid =
+ temp_fattr.cf_uid;
+ default_acl_state.users->aces[default_acl_state.users->n++].perms.allow =
+ ((acl_mode & 0700) >> 6) | 0004;
+ }
+ }
+ kfree(ppace);
+
+ if (owner_found) {
+ /* The owner must be set to at least read-only. */
+ acl_state.owner.allow = ((mode & 0700) >> 6) | 0004;
+ acl_state.users->aces[acl_state.users->n].uid = fattr->cf_uid;
+ acl_state.users->aces[acl_state.users->n++].perms.allow =
+ ((mode & 0700) >> 6) | 0004;
+ default_acl_state.owner.allow = ((mode & 0700) >> 6) | 0004;
+ default_acl_state.users->aces[default_acl_state.users->n].uid =
+ fattr->cf_uid;
+ default_acl_state.users->aces[default_acl_state.users->n++].perms.allow =
+ ((mode & 0700) >> 6) | 0004;
+ }
+
+ if (group_found) {
+ acl_state.group.allow = (mode & 0070) >> 3;
+ acl_state.groups->aces[acl_state.groups->n].gid =
+ fattr->cf_gid;
+ acl_state.groups->aces[acl_state.groups->n++].perms.allow =
+ (mode & 0070) >> 3;
+ default_acl_state.group.allow = (mode & 0070) >> 3;
+ default_acl_state.groups->aces[default_acl_state.groups->n].gid =
+ fattr->cf_gid;
+ default_acl_state.groups->aces[default_acl_state.groups->n++].perms.allow =
+ (mode & 0070) >> 3;
+ }
+
+ if (others_found) {
+ fattr->cf_mode &= ~(0007);
+ fattr->cf_mode |= mode & 0007;
+
+ acl_state.other.allow = mode & 0007;
+ default_acl_state.other.allow = mode & 0007;
+ }
+
+ if (acl_state.users->n || acl_state.groups->n) {
+ acl_state.mask.allow = 0x07;
+
+ if (IS_ENABLED(CONFIG_FS_POSIX_ACL)) {
+ fattr->cf_acls =
+ posix_acl_alloc(acl_state.users->n +
+ acl_state.groups->n + 4, GFP_KERNEL);
+ if (fattr->cf_acls) {
+ cf_pace = fattr->cf_acls->a_entries;
+ posix_state_to_acl(&acl_state, cf_pace);
+ }
+ }
+ }
+
+ if (default_acl_state.users->n || default_acl_state.groups->n) {
+ default_acl_state.mask.allow = 0x07;
+
+ if (IS_ENABLED(CONFIG_FS_POSIX_ACL)) {
+ fattr->cf_dacls =
+ posix_acl_alloc(default_acl_state.users->n +
+ default_acl_state.groups->n + 4, GFP_KERNEL);
+ if (fattr->cf_dacls) {
+ cf_pdace = fattr->cf_dacls->a_entries;
+ posix_state_to_acl(&default_acl_state, cf_pdace);
+ }
+ }
+ }
+ free_acl_state(&acl_state);
+ free_acl_state(&default_acl_state);
+}
+
+static void set_posix_acl_entries_dacl(struct user_namespace *user_ns,
+ struct smb_ace *pndace,
+ struct smb_fattr *fattr, u32 *num_aces,
+ u16 *size, u32 nt_aces_num)
+{
+ struct posix_acl_entry *pace;
+ struct smb_sid *sid;
+ struct smb_ace *ntace;
+ int i, j;
+
+ if (!fattr->cf_acls)
+ goto posix_default_acl;
+
+ pace = fattr->cf_acls->a_entries;
+ for (i = 0; i < fattr->cf_acls->a_count; i++, pace++) {
+ int flags = 0;
+
+ sid = kmalloc(sizeof(struct smb_sid), GFP_KERNEL);
+ if (!sid)
+ break;
+
+ if (pace->e_tag == ACL_USER) {
+ uid_t uid;
+ unsigned int sid_type = SIDOWNER;
+
+ uid = from_kuid(user_ns, pace->e_uid);
+ if (!uid)
+ sid_type = SIDUNIX_USER;
+ id_to_sid(uid, sid_type, sid);
+ } else if (pace->e_tag == ACL_GROUP) {
+ gid_t gid;
+
+ gid = from_kgid(user_ns, pace->e_gid);
+ id_to_sid(gid, SIDUNIX_GROUP, sid);
+ } else if (pace->e_tag == ACL_OTHER && !nt_aces_num) {
+ smb_copy_sid(sid, &sid_everyone);
+ } else {
+ kfree(sid);
+ continue;
+ }
+ ntace = pndace;
+ for (j = 0; j < nt_aces_num; j++) {
+ if (ntace->sid.sub_auth[ntace->sid.num_subauth - 1] ==
+ sid->sub_auth[sid->num_subauth - 1])
+ goto pass_same_sid;
+ ntace = (struct smb_ace *)((char *)ntace +
+ le16_to_cpu(ntace->size));
+ }
+
+ if (S_ISDIR(fattr->cf_mode) && pace->e_tag == ACL_OTHER)
+ flags = 0x03;
+
+ ntace = (struct smb_ace *)((char *)pndace + *size);
+ *size += fill_ace_for_sid(ntace, sid, ACCESS_ALLOWED, flags,
+ pace->e_perm, 0777);
+ (*num_aces)++;
+ if (pace->e_tag == ACL_USER)
+ ntace->access_req |=
+ FILE_DELETE_LE | FILE_DELETE_CHILD_LE;
+
+ if (S_ISDIR(fattr->cf_mode) &&
+ (pace->e_tag == ACL_USER || pace->e_tag == ACL_GROUP)) {
+ ntace = (struct smb_ace *)((char *)pndace + *size);
+ *size += fill_ace_for_sid(ntace, sid, ACCESS_ALLOWED,
+ 0x03, pace->e_perm, 0777);
+ (*num_aces)++;
+ if (pace->e_tag == ACL_USER)
+ ntace->access_req |=
+ FILE_DELETE_LE | FILE_DELETE_CHILD_LE;
+ }
+
+pass_same_sid:
+ kfree(sid);
+ }
+
+ if (nt_aces_num)
+ return;
+
+posix_default_acl:
+ if (!fattr->cf_dacls)
+ return;
+
+ pace = fattr->cf_dacls->a_entries;
+ for (i = 0; i < fattr->cf_dacls->a_count; i++, pace++) {
+ sid = kmalloc(sizeof(struct smb_sid), GFP_KERNEL);
+ if (!sid)
+ break;
+
+ if (pace->e_tag == ACL_USER) {
+ uid_t uid;
+
+ uid = from_kuid(user_ns, pace->e_uid);
+ id_to_sid(uid, SIDCREATOR_OWNER, sid);
+ } else if (pace->e_tag == ACL_GROUP) {
+ gid_t gid;
+
+ gid = from_kgid(user_ns, pace->e_gid);
+ id_to_sid(gid, SIDCREATOR_GROUP, sid);
+ } else {
+ kfree(sid);
+ continue;
+ }
+
+ ntace = (struct smb_ace *)((char *)pndace + *size);
+ *size += fill_ace_for_sid(ntace, sid, ACCESS_ALLOWED, 0x0b,
+ pace->e_perm, 0777);
+ (*num_aces)++;
+ if (pace->e_tag == ACL_USER)
+ ntace->access_req |=
+ FILE_DELETE_LE | FILE_DELETE_CHILD_LE;
+ kfree(sid);
+ }
+}
+
+static void set_ntacl_dacl(struct user_namespace *user_ns,
+ struct smb_acl *pndacl,
+ struct smb_acl *nt_dacl,
+ const struct smb_sid *pownersid,
+ const struct smb_sid *pgrpsid,
+ struct smb_fattr *fattr)
+{
+ struct smb_ace *ntace, *pndace;
+ int nt_num_aces = le32_to_cpu(nt_dacl->num_aces), num_aces = 0;
+ unsigned short size = 0;
+ int i;
+
+ pndace = (struct smb_ace *)((char *)pndacl + sizeof(struct smb_acl));
+ if (nt_num_aces) {
+ ntace = (struct smb_ace *)((char *)nt_dacl + sizeof(struct smb_acl));
+ for (i = 0; i < nt_num_aces; i++) {
+ memcpy((char *)pndace + size, ntace, le16_to_cpu(ntace->size));
+ size += le16_to_cpu(ntace->size);
+ ntace = (struct smb_ace *)((char *)ntace + le16_to_cpu(ntace->size));
+ num_aces++;
+ }
+ }
+
+ set_posix_acl_entries_dacl(user_ns, pndace, fattr,
+ &num_aces, &size, nt_num_aces);
+ pndacl->num_aces = cpu_to_le32(num_aces);
+ pndacl->size = cpu_to_le16(le16_to_cpu(pndacl->size) + size);
+}
+
+static void set_mode_dacl(struct user_namespace *user_ns,
+ struct smb_acl *pndacl, struct smb_fattr *fattr)
+{
+ struct smb_ace *pace, *pndace;
+ u32 num_aces = 0;
+ u16 size = 0, ace_size = 0;
+ uid_t uid;
+ const struct smb_sid *sid;
+
+ pace = pndace = (struct smb_ace *)((char *)pndacl + sizeof(struct smb_acl));
+
+ if (fattr->cf_acls) {
+ set_posix_acl_entries_dacl(user_ns, pndace, fattr,
+ &num_aces, &size, num_aces);
+ goto out;
+ }
+
+ /* owner RID */
+ uid = from_kuid(user_ns, fattr->cf_uid);
+ if (uid)
+ sid = &server_conf.domain_sid;
+ else
+ sid = &sid_unix_users;
+ ace_size = fill_ace_for_sid(pace, sid, ACCESS_ALLOWED, 0,
+ fattr->cf_mode, 0700);
+ pace->sid.sub_auth[pace->sid.num_subauth++] = cpu_to_le32(uid);
+ pace->size = cpu_to_le16(ace_size + 4);
+ size += le16_to_cpu(pace->size);
+ pace = (struct smb_ace *)((char *)pndace + size);
+
+ /* Group RID */
+ ace_size = fill_ace_for_sid(pace, &sid_unix_groups,
+ ACCESS_ALLOWED, 0, fattr->cf_mode, 0070);
+ pace->sid.sub_auth[pace->sid.num_subauth++] =
+ cpu_to_le32(from_kgid(user_ns, fattr->cf_gid));
+ pace->size = cpu_to_le16(ace_size + 4);
+ size += le16_to_cpu(pace->size);
+ pace = (struct smb_ace *)((char *)pndace + size);
+ num_aces = 3;
+
+ if (S_ISDIR(fattr->cf_mode)) {
+ pace = (struct smb_ace *)((char *)pndace + size);
+
+ /* creator owner */
+ size += fill_ace_for_sid(pace, &creator_owner, ACCESS_ALLOWED,
+ 0x0b, fattr->cf_mode, 0700);
+ pace = (struct smb_ace *)((char *)pndace + size);
+
+ /* creator group */
+ size += fill_ace_for_sid(pace, &creator_group, ACCESS_ALLOWED,
+ 0x0b, fattr->cf_mode, 0070);
+ pace = (struct smb_ace *)((char *)pndace + size);
+ num_aces = 5;
+ }
+
+ /* other */
+ size += fill_ace_for_sid(pace, &sid_everyone, ACCESS_ALLOWED, 0,
+ fattr->cf_mode, 0007);
+
+out:
+ pndacl->num_aces = cpu_to_le32(num_aces);
+ pndacl->size = cpu_to_le16(le16_to_cpu(pndacl->size) + size);
+}
+
+static int parse_sid(struct smb_sid *psid, char *end_of_acl)
+{
+ /*
+ * validate that we do not go past end of ACL - sid must be at least 8
+ * bytes long (assuming no sub-auths - e.g. the null SID
+ */
+ if (end_of_acl < (char *)psid + 8) {
+ pr_err("ACL too small to parse SID %p\n", psid);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Convert CIFS ACL to POSIX form */
+int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+ int acl_len, struct smb_fattr *fattr)
+{
+ int rc = 0;
+ struct smb_sid *owner_sid_ptr, *group_sid_ptr;
+ struct smb_acl *dacl_ptr; /* no need for SACL ptr */
+ char *end_of_acl = ((char *)pntsd) + acl_len;
+ __u32 dacloffset;
+ int pntsd_type;
+
+ if (!pntsd)
+ return -EIO;
+
+ owner_sid_ptr = (struct smb_sid *)((char *)pntsd +
+ le32_to_cpu(pntsd->osidoffset));
+ group_sid_ptr = (struct smb_sid *)((char *)pntsd +
+ le32_to_cpu(pntsd->gsidoffset));
+ dacloffset = le32_to_cpu(pntsd->dacloffset);
+ dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset);
+ ksmbd_debug(SMB,
+ "revision %d type 0x%x ooffset 0x%x goffset 0x%x sacloffset 0x%x dacloffset 0x%x\n",
+ pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset),
+ le32_to_cpu(pntsd->gsidoffset),
+ le32_to_cpu(pntsd->sacloffset), dacloffset);
+
+ pntsd_type = le16_to_cpu(pntsd->type);
+ if (!(pntsd_type & DACL_PRESENT)) {
+ ksmbd_debug(SMB, "DACL_PRESENT in DACL type is not set\n");
+ return rc;
+ }
+
+ pntsd->type = cpu_to_le16(DACL_PRESENT);
+
+ if (pntsd->osidoffset) {
+ rc = parse_sid(owner_sid_ptr, end_of_acl);
+ if (rc) {
+ pr_err("%s: Error %d parsing Owner SID\n", __func__, rc);
+ return rc;
+ }
+
+ rc = sid_to_id(user_ns, owner_sid_ptr, SIDOWNER, fattr);
+ if (rc) {
+ pr_err("%s: Error %d mapping Owner SID to uid\n",
+ __func__, rc);
+ owner_sid_ptr = NULL;
+ }
+ }
+
+ if (pntsd->gsidoffset) {
+ rc = parse_sid(group_sid_ptr, end_of_acl);
+ if (rc) {
+ pr_err("%s: Error %d mapping Owner SID to gid\n",
+ __func__, rc);
+ return rc;
+ }
+ rc = sid_to_id(user_ns, group_sid_ptr, SIDUNIX_GROUP, fattr);
+ if (rc) {
+ pr_err("%s: Error %d mapping Group SID to gid\n",
+ __func__, rc);
+ group_sid_ptr = NULL;
+ }
+ }
+
+ if ((pntsd_type & (DACL_AUTO_INHERITED | DACL_AUTO_INHERIT_REQ)) ==
+ (DACL_AUTO_INHERITED | DACL_AUTO_INHERIT_REQ))
+ pntsd->type |= cpu_to_le16(DACL_AUTO_INHERITED);
+ if (pntsd_type & DACL_PROTECTED)
+ pntsd->type |= cpu_to_le16(DACL_PROTECTED);
+
+ if (dacloffset) {
+ parse_dacl(user_ns, dacl_ptr, end_of_acl,
+ owner_sid_ptr, group_sid_ptr, fattr);
+ }
+
+ return 0;
+}
+
+/* Convert permission bits from mode to equivalent CIFS ACL */
+int build_sec_desc(struct user_namespace *user_ns,
+ struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd,
+ int addition_info, __u32 *secdesclen,
+ struct smb_fattr *fattr)
+{
+ int rc = 0;
+ __u32 offset;
+ struct smb_sid *owner_sid_ptr, *group_sid_ptr;
+ struct smb_sid *nowner_sid_ptr, *ngroup_sid_ptr;
+ struct smb_acl *dacl_ptr = NULL; /* no need for SACL ptr */
+ uid_t uid;
+ gid_t gid;
+ unsigned int sid_type = SIDOWNER;
+
+ nowner_sid_ptr = kmalloc(sizeof(struct smb_sid), GFP_KERNEL);
+ if (!nowner_sid_ptr)
+ return -ENOMEM;
+
+ uid = from_kuid(user_ns, fattr->cf_uid);
+ if (!uid)
+ sid_type = SIDUNIX_USER;
+ id_to_sid(uid, sid_type, nowner_sid_ptr);
+
+ ngroup_sid_ptr = kmalloc(sizeof(struct smb_sid), GFP_KERNEL);
+ if (!ngroup_sid_ptr) {
+ kfree(nowner_sid_ptr);
+ return -ENOMEM;
+ }
+
+ gid = from_kgid(user_ns, fattr->cf_gid);
+ id_to_sid(gid, SIDUNIX_GROUP, ngroup_sid_ptr);
+
+ offset = sizeof(struct smb_ntsd);
+ pntsd->sacloffset = 0;
+ pntsd->revision = cpu_to_le16(1);
+ pntsd->type = cpu_to_le16(SELF_RELATIVE);
+ if (ppntsd)
+ pntsd->type |= ppntsd->type;
+
+ if (addition_info & OWNER_SECINFO) {
+ pntsd->osidoffset = cpu_to_le32(offset);
+ owner_sid_ptr = (struct smb_sid *)((char *)pntsd + offset);
+ smb_copy_sid(owner_sid_ptr, nowner_sid_ptr);
+ offset += 1 + 1 + 6 + (nowner_sid_ptr->num_subauth * 4);
+ }
+
+ if (addition_info & GROUP_SECINFO) {
+ pntsd->gsidoffset = cpu_to_le32(offset);
+ group_sid_ptr = (struct smb_sid *)((char *)pntsd + offset);
+ smb_copy_sid(group_sid_ptr, ngroup_sid_ptr);
+ offset += 1 + 1 + 6 + (ngroup_sid_ptr->num_subauth * 4);
+ }
+
+ if (addition_info & DACL_SECINFO) {
+ pntsd->type |= cpu_to_le16(DACL_PRESENT);
+ dacl_ptr = (struct smb_acl *)((char *)pntsd + offset);
+ dacl_ptr->revision = cpu_to_le16(2);
+ dacl_ptr->size = cpu_to_le16(sizeof(struct smb_acl));
+ dacl_ptr->num_aces = 0;
+
+ if (!ppntsd) {
+ set_mode_dacl(user_ns, dacl_ptr, fattr);
+ } else if (!ppntsd->dacloffset) {
+ goto out;
+ } else {
+ struct smb_acl *ppdacl_ptr;
+
+ ppdacl_ptr = (struct smb_acl *)((char *)ppntsd +
+ le32_to_cpu(ppntsd->dacloffset));
+ set_ntacl_dacl(user_ns, dacl_ptr, ppdacl_ptr,
+ nowner_sid_ptr, ngroup_sid_ptr, fattr);
+ }
+ pntsd->dacloffset = cpu_to_le32(offset);
+ offset += le16_to_cpu(dacl_ptr->size);
+ }
+
+out:
+ kfree(nowner_sid_ptr);
+ kfree(ngroup_sid_ptr);
+ *secdesclen = offset;
+ return rc;
+}
+
+static void smb_set_ace(struct smb_ace *ace, const struct smb_sid *sid, u8 type,
+ u8 flags, __le32 access_req)
+{
+ ace->type = type;
+ ace->flags = flags;
+ ace->access_req = access_req;
+ smb_copy_sid(&ace->sid, sid);
+ ace->size = cpu_to_le16(1 + 1 + 2 + 4 + 1 + 1 + 6 + (sid->num_subauth * 4));
+}
+
+int smb_inherit_dacl(struct ksmbd_conn *conn,
+ struct path *path,
+ unsigned int uid, unsigned int gid)
+{
+ const struct smb_sid *psid, *creator = NULL;
+ struct smb_ace *parent_aces, *aces;
+ struct smb_acl *parent_pdacl;
+ struct smb_ntsd *parent_pntsd = NULL;
+ struct smb_sid owner_sid, group_sid;
+ struct dentry *parent = path->dentry->d_parent;
+ struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0;
+ int rc = 0, num_aces, dacloffset, pntsd_type, acl_len;
+ char *aces_base;
+ bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode);
+
+ acl_len = ksmbd_vfs_get_sd_xattr(conn, user_ns,
+ parent, &parent_pntsd);
+ if (acl_len <= 0)
+ return -ENOENT;
+ dacloffset = le32_to_cpu(parent_pntsd->dacloffset);
+ if (!dacloffset) {
+ rc = -EINVAL;
+ goto free_parent_pntsd;
+ }
+
+ parent_pdacl = (struct smb_acl *)((char *)parent_pntsd + dacloffset);
+ num_aces = le32_to_cpu(parent_pdacl->num_aces);
+ pntsd_type = le16_to_cpu(parent_pntsd->type);
+
+ aces_base = kmalloc(sizeof(struct smb_ace) * num_aces * 2, GFP_KERNEL);
+ if (!aces_base) {
+ rc = -ENOMEM;
+ goto free_parent_pntsd;
+ }
+
+ aces = (struct smb_ace *)aces_base;
+ parent_aces = (struct smb_ace *)((char *)parent_pdacl +
+ sizeof(struct smb_acl));
+
+ if (pntsd_type & DACL_AUTO_INHERITED)
+ inherited_flags = INHERITED_ACE;
+
+ for (i = 0; i < num_aces; i++) {
+ flags = parent_aces->flags;
+ if (!smb_inherit_flags(flags, is_dir))
+ goto pass;
+ if (is_dir) {
+ flags &= ~(INHERIT_ONLY_ACE | INHERITED_ACE);
+ if (!(flags & CONTAINER_INHERIT_ACE))
+ flags |= INHERIT_ONLY_ACE;
+ if (flags & NO_PROPAGATE_INHERIT_ACE)
+ flags = 0;
+ } else {
+ flags = 0;
+ }
+
+ if (!compare_sids(&creator_owner, &parent_aces->sid)) {
+ creator = &creator_owner;
+ id_to_sid(uid, SIDOWNER, &owner_sid);
+ psid = &owner_sid;
+ } else if (!compare_sids(&creator_group, &parent_aces->sid)) {
+ creator = &creator_group;
+ id_to_sid(gid, SIDUNIX_GROUP, &group_sid);
+ psid = &group_sid;
+ } else {
+ creator = NULL;
+ psid = &parent_aces->sid;
+ }
+
+ if (is_dir && creator && flags & CONTAINER_INHERIT_ACE) {
+ smb_set_ace(aces, psid, parent_aces->type, inherited_flags,
+ parent_aces->access_req);
+ nt_size += le16_to_cpu(aces->size);
+ ace_cnt++;
+ aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size));
+ flags |= INHERIT_ONLY_ACE;
+ psid = creator;
+ } else if (is_dir && !(parent_aces->flags & NO_PROPAGATE_INHERIT_ACE)) {
+ psid = &parent_aces->sid;
+ }
+
+ smb_set_ace(aces, psid, parent_aces->type, flags | inherited_flags,
+ parent_aces->access_req);
+ nt_size += le16_to_cpu(aces->size);
+ aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size));
+ ace_cnt++;
+pass:
+ parent_aces =
+ (struct smb_ace *)((char *)parent_aces + le16_to_cpu(parent_aces->size));
+ }
+
+ if (nt_size > 0) {
+ struct smb_ntsd *pntsd;
+ struct smb_acl *pdacl;
+ struct smb_sid *powner_sid = NULL, *pgroup_sid = NULL;
+ int powner_sid_size = 0, pgroup_sid_size = 0, pntsd_size;
+
+ if (parent_pntsd->osidoffset) {
+ powner_sid = (struct smb_sid *)((char *)parent_pntsd +
+ le32_to_cpu(parent_pntsd->osidoffset));
+ powner_sid_size = 1 + 1 + 6 + (powner_sid->num_subauth * 4);
+ }
+ if (parent_pntsd->gsidoffset) {
+ pgroup_sid = (struct smb_sid *)((char *)parent_pntsd +
+ le32_to_cpu(parent_pntsd->gsidoffset));
+ pgroup_sid_size = 1 + 1 + 6 + (pgroup_sid->num_subauth * 4);
+ }
+
+ pntsd = kzalloc(sizeof(struct smb_ntsd) + powner_sid_size +
+ pgroup_sid_size + sizeof(struct smb_acl) +
+ nt_size, GFP_KERNEL);
+ if (!pntsd) {
+ rc = -ENOMEM;
+ goto free_aces_base;
+ }
+
+ pntsd->revision = cpu_to_le16(1);
+ pntsd->type = cpu_to_le16(SELF_RELATIVE | DACL_PRESENT);
+ if (le16_to_cpu(parent_pntsd->type) & DACL_AUTO_INHERITED)
+ pntsd->type |= cpu_to_le16(DACL_AUTO_INHERITED);
+ pntsd_size = sizeof(struct smb_ntsd);
+ pntsd->osidoffset = parent_pntsd->osidoffset;
+ pntsd->gsidoffset = parent_pntsd->gsidoffset;
+ pntsd->dacloffset = parent_pntsd->dacloffset;
+
+ if (pntsd->osidoffset) {
+ struct smb_sid *owner_sid = (struct smb_sid *)((char *)pntsd +
+ le32_to_cpu(pntsd->osidoffset));
+ memcpy(owner_sid, powner_sid, powner_sid_size);
+ pntsd_size += powner_sid_size;
+ }
+
+ if (pntsd->gsidoffset) {
+ struct smb_sid *group_sid = (struct smb_sid *)((char *)pntsd +
+ le32_to_cpu(pntsd->gsidoffset));
+ memcpy(group_sid, pgroup_sid, pgroup_sid_size);
+ pntsd_size += pgroup_sid_size;
+ }
+
+ if (pntsd->dacloffset) {
+ struct smb_ace *pace;
+
+ pdacl = (struct smb_acl *)((char *)pntsd + le32_to_cpu(pntsd->dacloffset));
+ pdacl->revision = cpu_to_le16(2);
+ pdacl->size = cpu_to_le16(sizeof(struct smb_acl) + nt_size);
+ pdacl->num_aces = cpu_to_le32(ace_cnt);
+ pace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
+ memcpy(pace, aces_base, nt_size);
+ pntsd_size += sizeof(struct smb_acl) + nt_size;
+ }
+
+ ksmbd_vfs_set_sd_xattr(conn, user_ns,
+ path->dentry, pntsd, pntsd_size);
+ kfree(pntsd);
+ }
+
+free_aces_base:
+ kfree(aces_base);
+free_parent_pntsd:
+ kfree(parent_pntsd);
+ return rc;
+}
+
+bool smb_inherit_flags(int flags, bool is_dir)
+{
+ if (!is_dir)
+ return (flags & OBJECT_INHERIT_ACE) != 0;
+
+ if (flags & OBJECT_INHERIT_ACE && !(flags & NO_PROPAGATE_INHERIT_ACE))
+ return true;
+
+ if (flags & CONTAINER_INHERIT_ACE)
+ return true;
+ return false;
+}
+
+int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+ __le32 *pdaccess, int uid)
+{
+ struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ struct smb_ntsd *pntsd = NULL;
+ struct smb_acl *pdacl;
+ struct posix_acl *posix_acls;
+ int rc = 0, acl_size;
+ struct smb_sid sid;
+ int granted = le32_to_cpu(*pdaccess & ~FILE_MAXIMAL_ACCESS_LE);
+ struct smb_ace *ace;
+ int i, found = 0;
+ unsigned int access_bits = 0;
+ struct smb_ace *others_ace = NULL;
+ struct posix_acl_entry *pa_entry;
+ unsigned int sid_type = SIDOWNER;
+ char *end_of_acl;
+
+ ksmbd_debug(SMB, "check permission using windows acl\n");
+ acl_size = ksmbd_vfs_get_sd_xattr(conn, user_ns,
+ path->dentry, &pntsd);
+ if (acl_size <= 0 || !pntsd || !pntsd->dacloffset) {
+ kfree(pntsd);
+ return 0;
+ }
+
+ pdacl = (struct smb_acl *)((char *)pntsd + le32_to_cpu(pntsd->dacloffset));
+ end_of_acl = ((char *)pntsd) + acl_size;
+ if (end_of_acl <= (char *)pdacl) {
+ kfree(pntsd);
+ return 0;
+ }
+
+ if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size) ||
+ le16_to_cpu(pdacl->size) < sizeof(struct smb_acl)) {
+ kfree(pntsd);
+ return 0;
+ }
+
+ if (!pdacl->num_aces) {
+ if (!(le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) &&
+ *pdaccess & ~(FILE_READ_CONTROL_LE | FILE_WRITE_DAC_LE)) {
+ rc = -EACCES;
+ goto err_out;
+ }
+ kfree(pntsd);
+ return 0;
+ }
+
+ if (*pdaccess & FILE_MAXIMAL_ACCESS_LE) {
+ granted = READ_CONTROL | WRITE_DAC | FILE_READ_ATTRIBUTES |
+ DELETE;
+
+ ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
+ for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) {
+ granted |= le32_to_cpu(ace->access_req);
+ ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size));
+ if (end_of_acl < (char *)ace)
+ goto err_out;
+ }
+
+ if (!pdacl->num_aces)
+ granted = GENERIC_ALL_FLAGS;
+ }
+
+ if (!uid)
+ sid_type = SIDUNIX_USER;
+ id_to_sid(uid, sid_type, &sid);
+
+ ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
+ for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) {
+ if (!compare_sids(&sid, &ace->sid) ||
+ !compare_sids(&sid_unix_NFS_mode, &ace->sid)) {
+ found = 1;
+ break;
+ }
+ if (!compare_sids(&sid_everyone, &ace->sid))
+ others_ace = ace;
+
+ ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size));
+ if (end_of_acl < (char *)ace)
+ goto err_out;
+ }
+
+ if (*pdaccess & FILE_MAXIMAL_ACCESS_LE && found) {
+ granted = READ_CONTROL | WRITE_DAC | FILE_READ_ATTRIBUTES |
+ DELETE;
+
+ granted |= le32_to_cpu(ace->access_req);
+
+ if (!pdacl->num_aces)
+ granted = GENERIC_ALL_FLAGS;
+ }
+
+ if (IS_ENABLED(CONFIG_FS_POSIX_ACL)) {
+ posix_acls = get_acl(d_inode(path->dentry), ACL_TYPE_ACCESS);
+ if (posix_acls && !found) {
+ unsigned int id = -1;
+
+ pa_entry = posix_acls->a_entries;
+ for (i = 0; i < posix_acls->a_count; i++, pa_entry++) {
+ if (pa_entry->e_tag == ACL_USER)
+ id = from_kuid(user_ns,
+ pa_entry->e_uid);
+ else if (pa_entry->e_tag == ACL_GROUP)
+ id = from_kgid(user_ns,
+ pa_entry->e_gid);
+ else
+ continue;
+
+ if (id == uid) {
+ mode_to_access_flags(pa_entry->e_perm,
+ 0777,
+ &access_bits);
+ if (!access_bits)
+ access_bits =
+ SET_MINIMUM_RIGHTS;
+ goto check_access_bits;
+ }
+ }
+ }
+ if (posix_acls)
+ posix_acl_release(posix_acls);
+ }
+
+ if (!found) {
+ if (others_ace) {
+ ace = others_ace;
+ } else {
+ ksmbd_debug(SMB, "Can't find corresponding sid\n");
+ rc = -EACCES;
+ goto err_out;
+ }
+ }
+
+ switch (ace->type) {
+ case ACCESS_ALLOWED_ACE_TYPE:
+ access_bits = le32_to_cpu(ace->access_req);
+ break;
+ case ACCESS_DENIED_ACE_TYPE:
+ case ACCESS_DENIED_CALLBACK_ACE_TYPE:
+ access_bits = le32_to_cpu(~ace->access_req);
+ break;
+ }
+
+check_access_bits:
+ if (granted &
+ ~(access_bits | FILE_READ_ATTRIBUTES | READ_CONTROL | WRITE_DAC | DELETE)) {
+ ksmbd_debug(SMB, "Access denied with winACL, granted : %x, access_req : %x\n",
+ granted, le32_to_cpu(ace->access_req));
+ rc = -EACCES;
+ goto err_out;
+ }
+
+ *pdaccess = cpu_to_le32(granted);
+err_out:
+ kfree(pntsd);
+ return rc;
+}
+
+int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
+ struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
+ bool type_check)
+{
+ int rc;
+ struct smb_fattr fattr = {{0}};
+ struct inode *inode = d_inode(path->dentry);
+ struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ struct iattr newattrs;
+
+ fattr.cf_uid = INVALID_UID;
+ fattr.cf_gid = INVALID_GID;
+ fattr.cf_mode = inode->i_mode;
+
+ rc = parse_sec_desc(user_ns, pntsd, ntsd_len, &fattr);
+ if (rc)
+ goto out;
+
+ newattrs.ia_valid = ATTR_CTIME;
+ if (!uid_eq(fattr.cf_uid, INVALID_UID)) {
+ newattrs.ia_valid |= ATTR_UID;
+ newattrs.ia_uid = fattr.cf_uid;
+ }
+ if (!gid_eq(fattr.cf_gid, INVALID_GID)) {
+ newattrs.ia_valid |= ATTR_GID;
+ newattrs.ia_gid = fattr.cf_gid;
+ }
+ newattrs.ia_valid |= ATTR_MODE;
+ newattrs.ia_mode = (inode->i_mode & ~0777) | (fattr.cf_mode & 0777);
+
+ inode_lock(inode);
+ rc = notify_change(user_ns, path->dentry, &newattrs, NULL);
+ inode_unlock(inode);
+ if (rc)
+ goto out;
+
+ ksmbd_vfs_remove_acl_xattrs(user_ns, path->dentry);
+ /* Update posix acls */
+ if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && fattr.cf_dacls) {
+ rc = set_posix_acl(user_ns, inode,
+ ACL_TYPE_ACCESS, fattr.cf_acls);
+ if (S_ISDIR(inode->i_mode) && fattr.cf_dacls)
+ rc = set_posix_acl(user_ns, inode,
+ ACL_TYPE_DEFAULT, fattr.cf_dacls);
+ }
+
+ /* Check it only calling from SD BUFFER context */
+ if (type_check && !(le16_to_cpu(pntsd->type) & DACL_PRESENT))
+ goto out;
+
+ if (test_share_config_flag(tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) {
+ /* Update WinACL in xattr */
+ ksmbd_vfs_remove_sd_xattrs(user_ns, path->dentry);
+ ksmbd_vfs_set_sd_xattr(conn, user_ns,
+ path->dentry, pntsd, ntsd_len);
+ }
+
+out:
+ posix_acl_release(fattr.cf_acls);
+ posix_acl_release(fattr.cf_dacls);
+ mark_inode_dirty(inode);
+ return rc;
+}
+
+void ksmbd_init_domain(u32 *sub_auth)
+{
+ int i;
+
+ memcpy(&server_conf.domain_sid, &domain, sizeof(struct smb_sid));
+ for (i = 0; i < 3; ++i)
+ server_conf.domain_sid.sub_auth[i + 1] = cpu_to_le32(sub_auth[i]);
+}
diff --git a/fs/ksmbd/smbacl.h b/fs/ksmbd/smbacl.h
new file mode 100644
index 000000000000..940f686a1d95
--- /dev/null
+++ b/fs/ksmbd/smbacl.h
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/*
+ * Copyright (c) International Business Machines Corp., 2007
+ * Author(s): Steve French (sfrench@us.ibm.com)
+ * Modified by Namjae Jeon (linkinjeon@kernel.org)
+ */
+
+#ifndef _SMBACL_H
+#define _SMBACL_H
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/posix_acl.h>
+
+#include "mgmt/tree_connect.h"
+
+#define NUM_AUTHS (6) /* number of authority fields */
+#define SID_MAX_SUB_AUTHORITIES (15) /* max number of sub authority fields */
+
+/*
+ * ACE types - see MS-DTYP 2.4.4.1
+ */
+enum {
+ ACCESS_ALLOWED,
+ ACCESS_DENIED,
+};
+
+/*
+ * Security ID types
+ */
+enum {
+ SIDOWNER = 1,
+ SIDGROUP,
+ SIDCREATOR_OWNER,
+ SIDCREATOR_GROUP,
+ SIDUNIX_USER,
+ SIDUNIX_GROUP,
+ SIDNFS_USER,
+ SIDNFS_GROUP,
+ SIDNFS_MODE,
+};
+
+/* Revision for ACLs */
+#define SD_REVISION 1
+
+/* Control flags for Security Descriptor */
+#define OWNER_DEFAULTED 0x0001
+#define GROUP_DEFAULTED 0x0002
+#define DACL_PRESENT 0x0004
+#define DACL_DEFAULTED 0x0008
+#define SACL_PRESENT 0x0010
+#define SACL_DEFAULTED 0x0020
+#define DACL_TRUSTED 0x0040
+#define SERVER_SECURITY 0x0080
+#define DACL_AUTO_INHERIT_REQ 0x0100
+#define SACL_AUTO_INHERIT_REQ 0x0200
+#define DACL_AUTO_INHERITED 0x0400
+#define SACL_AUTO_INHERITED 0x0800
+#define DACL_PROTECTED 0x1000
+#define SACL_PROTECTED 0x2000
+#define RM_CONTROL_VALID 0x4000
+#define SELF_RELATIVE 0x8000
+
+/* ACE types - see MS-DTYP 2.4.4.1 */
+#define ACCESS_ALLOWED_ACE_TYPE 0x00
+#define ACCESS_DENIED_ACE_TYPE 0x01
+#define SYSTEM_AUDIT_ACE_TYPE 0x02
+#define SYSTEM_ALARM_ACE_TYPE 0x03
+#define ACCESS_ALLOWED_COMPOUND_ACE_TYPE 0x04
+#define ACCESS_ALLOWED_OBJECT_ACE_TYPE 0x05
+#define ACCESS_DENIED_OBJECT_ACE_TYPE 0x06
+#define SYSTEM_AUDIT_OBJECT_ACE_TYPE 0x07
+#define SYSTEM_ALARM_OBJECT_ACE_TYPE 0x08
+#define ACCESS_ALLOWED_CALLBACK_ACE_TYPE 0x09
+#define ACCESS_DENIED_CALLBACK_ACE_TYPE 0x0A
+#define ACCESS_ALLOWED_CALLBACK_OBJECT_ACE_TYPE 0x0B
+#define ACCESS_DENIED_CALLBACK_OBJECT_ACE_TYPE 0x0C
+#define SYSTEM_AUDIT_CALLBACK_ACE_TYPE 0x0D
+#define SYSTEM_ALARM_CALLBACK_ACE_TYPE 0x0E /* Reserved */
+#define SYSTEM_AUDIT_CALLBACK_OBJECT_ACE_TYPE 0x0F
+#define SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE 0x10 /* reserved */
+#define SYSTEM_MANDATORY_LABEL_ACE_TYPE 0x11
+#define SYSTEM_RESOURCE_ATTRIBUTE_ACE_TYPE 0x12
+#define SYSTEM_SCOPED_POLICY_ID_ACE_TYPE 0x13
+
+/* ACE flags */
+#define OBJECT_INHERIT_ACE 0x01
+#define CONTAINER_INHERIT_ACE 0x02
+#define NO_PROPAGATE_INHERIT_ACE 0x04
+#define INHERIT_ONLY_ACE 0x08
+#define INHERITED_ACE 0x10
+#define SUCCESSFUL_ACCESS_ACE_FLAG 0x40
+#define FAILED_ACCESS_ACE_FLAG 0x80
+
+/*
+ * Maximum size of a string representation of a SID:
+ *
+ * The fields are unsigned values in decimal. So:
+ *
+ * u8: max 3 bytes in decimal
+ * u32: max 10 bytes in decimal
+ *
+ * "S-" + 3 bytes for version field + 15 for authority field + NULL terminator
+ *
+ * For authority field, max is when all 6 values are non-zero and it must be
+ * represented in hex. So "-0x" + 12 hex digits.
+ *
+ * Add 11 bytes for each subauthority field (10 bytes each + 1 for '-')
+ */
+#define SID_STRING_BASE_SIZE (2 + 3 + 15 + 1)
+#define SID_STRING_SUBAUTH_SIZE (11) /* size of a single subauth string */
+
+#define DOMAIN_USER_RID_LE cpu_to_le32(513)
+
+struct ksmbd_conn;
+
+struct smb_ntsd {
+ __le16 revision; /* revision level */
+ __le16 type;
+ __le32 osidoffset;
+ __le32 gsidoffset;
+ __le32 sacloffset;
+ __le32 dacloffset;
+} __packed;
+
+struct smb_sid {
+ __u8 revision; /* revision level */
+ __u8 num_subauth;
+ __u8 authority[NUM_AUTHS];
+ __le32 sub_auth[SID_MAX_SUB_AUTHORITIES]; /* sub_auth[num_subauth] */
+} __packed;
+
+/* size of a struct cifs_sid, sans sub_auth array */
+#define CIFS_SID_BASE_SIZE (1 + 1 + NUM_AUTHS)
+
+struct smb_acl {
+ __le16 revision; /* revision level */
+ __le16 size;
+ __le32 num_aces;
+} __packed;
+
+struct smb_ace {
+ __u8 type;
+ __u8 flags;
+ __le16 size;
+ __le32 access_req;
+ struct smb_sid sid; /* ie UUID of user or group who gets these perms */
+} __packed;
+
+struct smb_fattr {
+ kuid_t cf_uid;
+ kgid_t cf_gid;
+ umode_t cf_mode;
+ __le32 daccess;
+ struct posix_acl *cf_acls;
+ struct posix_acl *cf_dacls;
+};
+
+struct posix_ace_state {
+ u32 allow;
+ u32 deny;
+};
+
+struct posix_user_ace_state {
+ union {
+ kuid_t uid;
+ kgid_t gid;
+ };
+ struct posix_ace_state perms;
+};
+
+struct posix_ace_state_array {
+ int n;
+ struct posix_user_ace_state aces[];
+};
+
+/*
+ * while processing the nfsv4 ace, this maintains the partial permissions
+ * calculated so far:
+ */
+
+struct posix_acl_state {
+ struct posix_ace_state owner;
+ struct posix_ace_state group;
+ struct posix_ace_state other;
+ struct posix_ace_state everyone;
+ struct posix_ace_state mask; /* deny unused in this case */
+ struct posix_ace_state_array *users;
+ struct posix_ace_state_array *groups;
+};
+
+int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+ int acl_len, struct smb_fattr *fattr);
+int build_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+ struct smb_ntsd *ppntsd, int addition_info,
+ __u32 *secdesclen, struct smb_fattr *fattr);
+int init_acl_state(struct posix_acl_state *state, int cnt);
+void free_acl_state(struct posix_acl_state *state);
+void posix_state_to_acl(struct posix_acl_state *state,
+ struct posix_acl_entry *pace);
+int compare_sids(const struct smb_sid *ctsid, const struct smb_sid *cwsid);
+bool smb_inherit_flags(int flags, bool is_dir);
+int smb_inherit_dacl(struct ksmbd_conn *conn, struct path *path,
+ unsigned int uid, unsigned int gid);
+int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+ __le32 *pdaccess, int uid);
+int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
+ struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
+ bool type_check);
+void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid);
+void ksmbd_init_domain(u32 *sub_auth);
+#endif /* _SMBACL_H */
diff --git a/fs/ksmbd/smbfsctl.h b/fs/ksmbd/smbfsctl.h
new file mode 100644
index 000000000000..b98418aae20c
--- /dev/null
+++ b/fs/ksmbd/smbfsctl.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/*
+ * fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions
+ *
+ * Copyright (c) International Business Machines Corp., 2002,2009
+ * Author(s): Steve French (sfrench@us.ibm.com)
+ */
+
+/* IOCTL information */
+/*
+ * List of ioctl/fsctl function codes that are or could be useful in the
+ * future to remote clients like cifs or SMB2 client. There is probably
+ * a slightly larger set of fsctls that NTFS local filesystem could handle,
+ * including the seven below that we do not have struct definitions for.
+ * Even with protocol definitions for most of these now available, we still
+ * need to do some experimentation to identify which are practical to do
+ * remotely. Some of the following, such as the encryption/compression ones
+ * could be invoked from tools via a specialized hook into the VFS rather
+ * than via the standard vfs entry points
+ */
+
+#ifndef __KSMBD_SMBFSCTL_H
+#define __KSMBD_SMBFSCTL_H
+
+#define FSCTL_DFS_GET_REFERRALS 0x00060194
+#define FSCTL_DFS_GET_REFERRALS_EX 0x000601B0
+#define FSCTL_REQUEST_OPLOCK_LEVEL_1 0x00090000
+#define FSCTL_REQUEST_OPLOCK_LEVEL_2 0x00090004
+#define FSCTL_REQUEST_BATCH_OPLOCK 0x00090008
+#define FSCTL_LOCK_VOLUME 0x00090018
+#define FSCTL_UNLOCK_VOLUME 0x0009001C
+#define FSCTL_IS_PATHNAME_VALID 0x0009002C /* BB add struct */
+#define FSCTL_GET_COMPRESSION 0x0009003C /* BB add struct */
+#define FSCTL_SET_COMPRESSION 0x0009C040 /* BB add struct */
+#define FSCTL_QUERY_FAT_BPB 0x00090058 /* BB add struct */
+/* Verify the next FSCTL number, we had it as 0x00090090 before */
+#define FSCTL_FILESYSTEM_GET_STATS 0x00090060 /* BB add struct */
+#define FSCTL_GET_NTFS_VOLUME_DATA 0x00090064 /* BB add struct */
+#define FSCTL_GET_RETRIEVAL_POINTERS 0x00090073 /* BB add struct */
+#define FSCTL_IS_VOLUME_DIRTY 0x00090078 /* BB add struct */
+#define FSCTL_ALLOW_EXTENDED_DASD_IO 0x00090083 /* BB add struct */
+#define FSCTL_REQUEST_FILTER_OPLOCK 0x0009008C
+#define FSCTL_FIND_FILES_BY_SID 0x0009008F /* BB add struct */
+#define FSCTL_SET_OBJECT_ID 0x00090098 /* BB add struct */
+#define FSCTL_GET_OBJECT_ID 0x0009009C /* BB add struct */
+#define FSCTL_DELETE_OBJECT_ID 0x000900A0 /* BB add struct */
+#define FSCTL_SET_REPARSE_POINT 0x000900A4 /* BB add struct */
+#define FSCTL_GET_REPARSE_POINT 0x000900A8 /* BB add struct */
+#define FSCTL_DELETE_REPARSE_POINT 0x000900AC /* BB add struct */
+#define FSCTL_SET_OBJECT_ID_EXTENDED 0x000900BC /* BB add struct */
+#define FSCTL_CREATE_OR_GET_OBJECT_ID 0x000900C0 /* BB add struct */
+#define FSCTL_SET_SPARSE 0x000900C4 /* BB add struct */
+#define FSCTL_SET_ZERO_DATA 0x000980C8 /* BB add struct */
+#define FSCTL_SET_ENCRYPTION 0x000900D7 /* BB add struct */
+#define FSCTL_ENCRYPTION_FSCTL_IO 0x000900DB /* BB add struct */
+#define FSCTL_WRITE_RAW_ENCRYPTED 0x000900DF /* BB add struct */
+#define FSCTL_READ_RAW_ENCRYPTED 0x000900E3 /* BB add struct */
+#define FSCTL_READ_FILE_USN_DATA 0x000900EB /* BB add struct */
+#define FSCTL_WRITE_USN_CLOSE_RECORD 0x000900EF /* BB add struct */
+#define FSCTL_SIS_COPYFILE 0x00090100 /* BB add struct */
+#define FSCTL_RECALL_FILE 0x00090117 /* BB add struct */
+#define FSCTL_QUERY_SPARING_INFO 0x00090138 /* BB add struct */
+#define FSCTL_SET_ZERO_ON_DEALLOC 0x00090194 /* BB add struct */
+#define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */
+#define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF /* BB add struct */
+#define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */
+#define FSCTL_DUPLICATE_EXTENTS_TO_FILE 0x00098344
+#define FSCTL_SIS_LINK_FILES 0x0009C104
+#define FSCTL_PIPE_PEEK 0x0011400C /* BB add struct */
+#define FSCTL_PIPE_TRANSCEIVE 0x0011C017 /* BB add struct */
+/* strange that the number for this op is not sequential with previous op */
+#define FSCTL_PIPE_WAIT 0x00110018 /* BB add struct */
+#define FSCTL_REQUEST_RESUME_KEY 0x00140078
+#define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */
+#define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */
+#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204
+#define FSCTL_QUERY_NETWORK_INTERFACE_INFO 0x001401FC
+#define FSCTL_COPYCHUNK 0x001440F2
+#define FSCTL_COPYCHUNK_WRITE 0x001480F2
+
+#define IO_REPARSE_TAG_MOUNT_POINT 0xA0000003
+#define IO_REPARSE_TAG_HSM 0xC0000004
+#define IO_REPARSE_TAG_SIS 0x80000007
+
+/* WSL reparse tags */
+#define IO_REPARSE_TAG_LX_SYMLINK_LE cpu_to_le32(0xA000001D)
+#define IO_REPARSE_TAG_AF_UNIX_LE cpu_to_le32(0x80000023)
+#define IO_REPARSE_TAG_LX_FIFO_LE cpu_to_le32(0x80000024)
+#define IO_REPARSE_TAG_LX_CHR_LE cpu_to_le32(0x80000025)
+#define IO_REPARSE_TAG_LX_BLK_LE cpu_to_le32(0x80000026)
+#endif /* __KSMBD_SMBFSCTL_H */
diff --git a/fs/ksmbd/smbstatus.h b/fs/ksmbd/smbstatus.h
new file mode 100644
index 000000000000..108a8b6ed24a
--- /dev/null
+++ b/fs/ksmbd/smbstatus.h
@@ -0,0 +1,1822 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/*
+ * fs/cifs/smb2status.h
+ *
+ * SMB2 Status code (network error) definitions
+ * Definitions are from MS-ERREF
+ *
+ * Copyright (c) International Business Machines Corp., 2009,2011
+ * Author(s): Steve French (sfrench@us.ibm.com)
+ */
+
+/*
+ * 0 1 2 3 4 5 6 7 8 9 0 A B C D E F 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ * SEV C N <-------Facility--------> <------Error Status Code------>
+ *
+ * C is set if "customer defined" error, N bit is reserved and MBZ
+ */
+
+#define STATUS_SEVERITY_SUCCESS cpu_to_le32(0x0000)
+#define STATUS_SEVERITY_INFORMATIONAL cpu_to_le32(0x0001)
+#define STATUS_SEVERITY_WARNING cpu_to_le32(0x0002)
+#define STATUS_SEVERITY_ERROR cpu_to_le32(0x0003)
+
+struct ntstatus {
+ /* Facility is the high 12 bits of the following field */
+ __le32 Facility; /* low 2 bits Severity, next is Customer, then rsrvd */
+ __le32 Code;
+};
+
+#define STATUS_SUCCESS 0x00000000
+#define STATUS_WAIT_0 cpu_to_le32(0x00000000)
+#define STATUS_WAIT_1 cpu_to_le32(0x00000001)
+#define STATUS_WAIT_2 cpu_to_le32(0x00000002)
+#define STATUS_WAIT_3 cpu_to_le32(0x00000003)
+#define STATUS_WAIT_63 cpu_to_le32(0x0000003F)
+#define STATUS_ABANDONED cpu_to_le32(0x00000080)
+#define STATUS_ABANDONED_WAIT_0 cpu_to_le32(0x00000080)
+#define STATUS_ABANDONED_WAIT_63 cpu_to_le32(0x000000BF)
+#define STATUS_USER_APC cpu_to_le32(0x000000C0)
+#define STATUS_KERNEL_APC cpu_to_le32(0x00000100)
+#define STATUS_ALERTED cpu_to_le32(0x00000101)
+#define STATUS_TIMEOUT cpu_to_le32(0x00000102)
+#define STATUS_PENDING cpu_to_le32(0x00000103)
+#define STATUS_REPARSE cpu_to_le32(0x00000104)
+#define STATUS_MORE_ENTRIES cpu_to_le32(0x00000105)
+#define STATUS_NOT_ALL_ASSIGNED cpu_to_le32(0x00000106)
+#define STATUS_SOME_NOT_MAPPED cpu_to_le32(0x00000107)
+#define STATUS_OPLOCK_BREAK_IN_PROGRESS cpu_to_le32(0x00000108)
+#define STATUS_VOLUME_MOUNTED cpu_to_le32(0x00000109)
+#define STATUS_RXACT_COMMITTED cpu_to_le32(0x0000010A)
+#define STATUS_NOTIFY_CLEANUP cpu_to_le32(0x0000010B)
+#define STATUS_NOTIFY_ENUM_DIR cpu_to_le32(0x0000010C)
+#define STATUS_NO_QUOTAS_FOR_ACCOUNT cpu_to_le32(0x0000010D)
+#define STATUS_PRIMARY_TRANSPORT_CONNECT_FAILED cpu_to_le32(0x0000010E)
+#define STATUS_PAGE_FAULT_TRANSITION cpu_to_le32(0x00000110)
+#define STATUS_PAGE_FAULT_DEMAND_ZERO cpu_to_le32(0x00000111)
+#define STATUS_PAGE_FAULT_COPY_ON_WRITE cpu_to_le32(0x00000112)
+#define STATUS_PAGE_FAULT_GUARD_PAGE cpu_to_le32(0x00000113)
+#define STATUS_PAGE_FAULT_PAGING_FILE cpu_to_le32(0x00000114)
+#define STATUS_CACHE_PAGE_LOCKED cpu_to_le32(0x00000115)
+#define STATUS_CRASH_DUMP cpu_to_le32(0x00000116)
+#define STATUS_BUFFER_ALL_ZEROS cpu_to_le32(0x00000117)
+#define STATUS_REPARSE_OBJECT cpu_to_le32(0x00000118)
+#define STATUS_RESOURCE_REQUIREMENTS_CHANGED cpu_to_le32(0x00000119)
+#define STATUS_TRANSLATION_COMPLETE cpu_to_le32(0x00000120)
+#define STATUS_DS_MEMBERSHIP_EVALUATED_LOCALLY cpu_to_le32(0x00000121)
+#define STATUS_NOTHING_TO_TERMINATE cpu_to_le32(0x00000122)
+#define STATUS_PROCESS_NOT_IN_JOB cpu_to_le32(0x00000123)
+#define STATUS_PROCESS_IN_JOB cpu_to_le32(0x00000124)
+#define STATUS_VOLSNAP_HIBERNATE_READY cpu_to_le32(0x00000125)
+#define STATUS_FSFILTER_OP_COMPLETED_SUCCESSFULLY cpu_to_le32(0x00000126)
+#define STATUS_INTERRUPT_VECTOR_ALREADY_CONNECTED cpu_to_le32(0x00000127)
+#define STATUS_INTERRUPT_STILL_CONNECTED cpu_to_le32(0x00000128)
+#define STATUS_PROCESS_CLONED cpu_to_le32(0x00000129)
+#define STATUS_FILE_LOCKED_WITH_ONLY_READERS cpu_to_le32(0x0000012A)
+#define STATUS_FILE_LOCKED_WITH_WRITERS cpu_to_le32(0x0000012B)
+#define STATUS_RESOURCEMANAGER_READ_ONLY cpu_to_le32(0x00000202)
+#define STATUS_WAIT_FOR_OPLOCK cpu_to_le32(0x00000367)
+#define DBG_EXCEPTION_HANDLED cpu_to_le32(0x00010001)
+#define DBG_CONTINUE cpu_to_le32(0x00010002)
+#define STATUS_FLT_IO_COMPLETE cpu_to_le32(0x001C0001)
+#define STATUS_OBJECT_NAME_EXISTS cpu_to_le32(0x40000000)
+#define STATUS_THREAD_WAS_SUSPENDED cpu_to_le32(0x40000001)
+#define STATUS_WORKING_SET_LIMIT_RANGE cpu_to_le32(0x40000002)
+#define STATUS_IMAGE_NOT_AT_BASE cpu_to_le32(0x40000003)
+#define STATUS_RXACT_STATE_CREATED cpu_to_le32(0x40000004)
+#define STATUS_SEGMENT_NOTIFICATION cpu_to_le32(0x40000005)
+#define STATUS_LOCAL_USER_SESSION_KEY cpu_to_le32(0x40000006)
+#define STATUS_BAD_CURRENT_DIRECTORY cpu_to_le32(0x40000007)
+#define STATUS_SERIAL_MORE_WRITES cpu_to_le32(0x40000008)
+#define STATUS_REGISTRY_RECOVERED cpu_to_le32(0x40000009)
+#define STATUS_FT_READ_RECOVERY_FROM_BACKUP cpu_to_le32(0x4000000A)
+#define STATUS_FT_WRITE_RECOVERY cpu_to_le32(0x4000000B)
+#define STATUS_SERIAL_COUNTER_TIMEOUT cpu_to_le32(0x4000000C)
+#define STATUS_NULL_LM_PASSWORD cpu_to_le32(0x4000000D)
+#define STATUS_IMAGE_MACHINE_TYPE_MISMATCH cpu_to_le32(0x4000000E)
+#define STATUS_RECEIVE_PARTIAL cpu_to_le32(0x4000000F)
+#define STATUS_RECEIVE_EXPEDITED cpu_to_le32(0x40000010)
+#define STATUS_RECEIVE_PARTIAL_EXPEDITED cpu_to_le32(0x40000011)
+#define STATUS_EVENT_DONE cpu_to_le32(0x40000012)
+#define STATUS_EVENT_PENDING cpu_to_le32(0x40000013)
+#define STATUS_CHECKING_FILE_SYSTEM cpu_to_le32(0x40000014)
+#define STATUS_FATAL_APP_EXIT cpu_to_le32(0x40000015)
+#define STATUS_PREDEFINED_HANDLE cpu_to_le32(0x40000016)
+#define STATUS_WAS_UNLOCKED cpu_to_le32(0x40000017)
+#define STATUS_SERVICE_NOTIFICATION cpu_to_le32(0x40000018)
+#define STATUS_WAS_LOCKED cpu_to_le32(0x40000019)
+#define STATUS_LOG_HARD_ERROR cpu_to_le32(0x4000001A)
+#define STATUS_ALREADY_WIN32 cpu_to_le32(0x4000001B)
+#define STATUS_WX86_UNSIMULATE cpu_to_le32(0x4000001C)
+#define STATUS_WX86_CONTINUE cpu_to_le32(0x4000001D)
+#define STATUS_WX86_SINGLE_STEP cpu_to_le32(0x4000001E)
+#define STATUS_WX86_BREAKPOINT cpu_to_le32(0x4000001F)
+#define STATUS_WX86_EXCEPTION_CONTINUE cpu_to_le32(0x40000020)
+#define STATUS_WX86_EXCEPTION_LASTCHANCE cpu_to_le32(0x40000021)
+#define STATUS_WX86_EXCEPTION_CHAIN cpu_to_le32(0x40000022)
+#define STATUS_IMAGE_MACHINE_TYPE_MISMATCH_EXE cpu_to_le32(0x40000023)
+#define STATUS_NO_YIELD_PERFORMED cpu_to_le32(0x40000024)
+#define STATUS_TIMER_RESUME_IGNORED cpu_to_le32(0x40000025)
+#define STATUS_ARBITRATION_UNHANDLED cpu_to_le32(0x40000026)
+#define STATUS_CARDBUS_NOT_SUPPORTED cpu_to_le32(0x40000027)
+#define STATUS_WX86_CREATEWX86TIB cpu_to_le32(0x40000028)
+#define STATUS_MP_PROCESSOR_MISMATCH cpu_to_le32(0x40000029)
+#define STATUS_HIBERNATED cpu_to_le32(0x4000002A)
+#define STATUS_RESUME_HIBERNATION cpu_to_le32(0x4000002B)
+#define STATUS_FIRMWARE_UPDATED cpu_to_le32(0x4000002C)
+#define STATUS_DRIVERS_LEAKING_LOCKED_PAGES cpu_to_le32(0x4000002D)
+#define STATUS_MESSAGE_RETRIEVED cpu_to_le32(0x4000002E)
+#define STATUS_SYSTEM_POWERSTATE_TRANSITION cpu_to_le32(0x4000002F)
+#define STATUS_ALPC_CHECK_COMPLETION_LIST cpu_to_le32(0x40000030)
+#define STATUS_SYSTEM_POWERSTATE_COMPLEX_TRANSITION cpu_to_le32(0x40000031)
+#define STATUS_ACCESS_AUDIT_BY_POLICY cpu_to_le32(0x40000032)
+#define STATUS_ABANDON_HIBERFILE cpu_to_le32(0x40000033)
+#define STATUS_BIZRULES_NOT_ENABLED cpu_to_le32(0x40000034)
+#define STATUS_WAKE_SYSTEM cpu_to_le32(0x40000294)
+#define STATUS_DS_SHUTTING_DOWN cpu_to_le32(0x40000370)
+#define DBG_REPLY_LATER cpu_to_le32(0x40010001)
+#define DBG_UNABLE_TO_PROVIDE_HANDLE cpu_to_le32(0x40010002)
+#define DBG_TERMINATE_THREAD cpu_to_le32(0x40010003)
+#define DBG_TERMINATE_PROCESS cpu_to_le32(0x40010004)
+#define DBG_CONTROL_C cpu_to_le32(0x40010005)
+#define DBG_PRINTEXCEPTION_C cpu_to_le32(0x40010006)
+#define DBG_RIPEXCEPTION cpu_to_le32(0x40010007)
+#define DBG_CONTROL_BREAK cpu_to_le32(0x40010008)
+#define DBG_COMMAND_EXCEPTION cpu_to_le32(0x40010009)
+#define RPC_NT_UUID_LOCAL_ONLY cpu_to_le32(0x40020056)
+#define RPC_NT_SEND_INCOMPLETE cpu_to_le32(0x400200AF)
+#define STATUS_CTX_CDM_CONNECT cpu_to_le32(0x400A0004)
+#define STATUS_CTX_CDM_DISCONNECT cpu_to_le32(0x400A0005)
+#define STATUS_SXS_RELEASE_ACTIVATION_CONTEXT cpu_to_le32(0x4015000D)
+#define STATUS_RECOVERY_NOT_NEEDED cpu_to_le32(0x40190034)
+#define STATUS_RM_ALREADY_STARTED cpu_to_le32(0x40190035)
+#define STATUS_LOG_NO_RESTART cpu_to_le32(0x401A000C)
+#define STATUS_VIDEO_DRIVER_DEBUG_REPORT_REQUEST cpu_to_le32(0x401B00EC)
+#define STATUS_GRAPHICS_PARTIAL_DATA_POPULATED cpu_to_le32(0x401E000A)
+#define STATUS_GRAPHICS_DRIVER_MISMATCH cpu_to_le32(0x401E0117)
+#define STATUS_GRAPHICS_MODE_NOT_PINNED cpu_to_le32(0x401E0307)
+#define STATUS_GRAPHICS_NO_PREFERRED_MODE cpu_to_le32(0x401E031E)
+#define STATUS_GRAPHICS_DATASET_IS_EMPTY cpu_to_le32(0x401E034B)
+#define STATUS_GRAPHICS_NO_MORE_ELEMENTS_IN_DATASET cpu_to_le32(0x401E034C)
+#define STATUS_GRAPHICS_PATH_CONTENT_GEOMETRY_TRANSFORMATION_NOT_PINNED \
+ cpu_to_le32(0x401E0351)
+#define STATUS_GRAPHICS_UNKNOWN_CHILD_STATUS cpu_to_le32(0x401E042F)
+#define STATUS_GRAPHICS_LEADLINK_START_DEFERRED cpu_to_le32(0x401E0437)
+#define STATUS_GRAPHICS_POLLING_TOO_FREQUENTLY cpu_to_le32(0x401E0439)
+#define STATUS_GRAPHICS_START_DEFERRED cpu_to_le32(0x401E043A)
+#define STATUS_NDIS_INDICATION_REQUIRED cpu_to_le32(0x40230001)
+#define STATUS_GUARD_PAGE_VIOLATION cpu_to_le32(0x80000001)
+#define STATUS_DATATYPE_MISALIGNMENT cpu_to_le32(0x80000002)
+#define STATUS_BREAKPOINT cpu_to_le32(0x80000003)
+#define STATUS_SINGLE_STEP cpu_to_le32(0x80000004)
+#define STATUS_BUFFER_OVERFLOW cpu_to_le32(0x80000005)
+#define STATUS_NO_MORE_FILES cpu_to_le32(0x80000006)
+#define STATUS_WAKE_SYSTEM_DEBUGGER cpu_to_le32(0x80000007)
+#define STATUS_HANDLES_CLOSED cpu_to_le32(0x8000000A)
+#define STATUS_NO_INHERITANCE cpu_to_le32(0x8000000B)
+#define STATUS_GUID_SUBSTITUTION_MADE cpu_to_le32(0x8000000C)
+#define STATUS_PARTIAL_COPY cpu_to_le32(0x8000000D)
+#define STATUS_DEVICE_PAPER_EMPTY cpu_to_le32(0x8000000E)
+#define STATUS_DEVICE_POWERED_OFF cpu_to_le32(0x8000000F)
+#define STATUS_DEVICE_OFF_LINE cpu_to_le32(0x80000010)
+#define STATUS_DEVICE_BUSY cpu_to_le32(0x80000011)
+#define STATUS_NO_MORE_EAS cpu_to_le32(0x80000012)
+#define STATUS_INVALID_EA_NAME cpu_to_le32(0x80000013)
+#define STATUS_EA_LIST_INCONSISTENT cpu_to_le32(0x80000014)
+#define STATUS_INVALID_EA_FLAG cpu_to_le32(0x80000015)
+#define STATUS_VERIFY_REQUIRED cpu_to_le32(0x80000016)
+#define STATUS_EXTRANEOUS_INFORMATION cpu_to_le32(0x80000017)
+#define STATUS_RXACT_COMMIT_NECESSARY cpu_to_le32(0x80000018)
+#define STATUS_NO_MORE_ENTRIES cpu_to_le32(0x8000001A)
+#define STATUS_FILEMARK_DETECTED cpu_to_le32(0x8000001B)
+#define STATUS_MEDIA_CHANGED cpu_to_le32(0x8000001C)
+#define STATUS_BUS_RESET cpu_to_le32(0x8000001D)
+#define STATUS_END_OF_MEDIA cpu_to_le32(0x8000001E)
+#define STATUS_BEGINNING_OF_MEDIA cpu_to_le32(0x8000001F)
+#define STATUS_MEDIA_CHECK cpu_to_le32(0x80000020)
+#define STATUS_SETMARK_DETECTED cpu_to_le32(0x80000021)
+#define STATUS_NO_DATA_DETECTED cpu_to_le32(0x80000022)
+#define STATUS_REDIRECTOR_HAS_OPEN_HANDLES cpu_to_le32(0x80000023)
+#define STATUS_SERVER_HAS_OPEN_HANDLES cpu_to_le32(0x80000024)
+#define STATUS_ALREADY_DISCONNECTED cpu_to_le32(0x80000025)
+#define STATUS_LONGJUMP cpu_to_le32(0x80000026)
+#define STATUS_CLEANER_CARTRIDGE_INSTALLED cpu_to_le32(0x80000027)
+#define STATUS_PLUGPLAY_QUERY_VETOED cpu_to_le32(0x80000028)
+#define STATUS_UNWIND_CONSOLIDATE cpu_to_le32(0x80000029)
+#define STATUS_REGISTRY_HIVE_RECOVERED cpu_to_le32(0x8000002A)
+#define STATUS_DLL_MIGHT_BE_INSECURE cpu_to_le32(0x8000002B)
+#define STATUS_DLL_MIGHT_BE_INCOMPATIBLE cpu_to_le32(0x8000002C)
+#define STATUS_STOPPED_ON_SYMLINK cpu_to_le32(0x8000002D)
+#define STATUS_DEVICE_REQUIRES_CLEANING cpu_to_le32(0x80000288)
+#define STATUS_DEVICE_DOOR_OPEN cpu_to_le32(0x80000289)
+#define STATUS_DATA_LOST_REPAIR cpu_to_le32(0x80000803)
+#define DBG_EXCEPTION_NOT_HANDLED cpu_to_le32(0x80010001)
+#define STATUS_CLUSTER_NODE_ALREADY_UP cpu_to_le32(0x80130001)
+#define STATUS_CLUSTER_NODE_ALREADY_DOWN cpu_to_le32(0x80130002)
+#define STATUS_CLUSTER_NETWORK_ALREADY_ONLINE cpu_to_le32(0x80130003)
+#define STATUS_CLUSTER_NETWORK_ALREADY_OFFLINE cpu_to_le32(0x80130004)
+#define STATUS_CLUSTER_NODE_ALREADY_MEMBER cpu_to_le32(0x80130005)
+#define STATUS_COULD_NOT_RESIZE_LOG cpu_to_le32(0x80190009)
+#define STATUS_NO_TXF_METADATA cpu_to_le32(0x80190029)
+#define STATUS_CANT_RECOVER_WITH_HANDLE_OPEN cpu_to_le32(0x80190031)
+#define STATUS_TXF_METADATA_ALREADY_PRESENT cpu_to_le32(0x80190041)
+#define STATUS_TRANSACTION_SCOPE_CALLBACKS_NOT_SET cpu_to_le32(0x80190042)
+#define STATUS_VIDEO_HUNG_DISPLAY_DRIVER_THREAD_RECOVERED \
+ cpu_to_le32(0x801B00EB)
+#define STATUS_FLT_BUFFER_TOO_SMALL cpu_to_le32(0x801C0001)
+#define STATUS_FVE_PARTIAL_METADATA cpu_to_le32(0x80210001)
+#define STATUS_UNSUCCESSFUL cpu_to_le32(0xC0000001)
+#define STATUS_NOT_IMPLEMENTED cpu_to_le32(0xC0000002)
+#define STATUS_INVALID_INFO_CLASS cpu_to_le32(0xC0000003)
+#define STATUS_INFO_LENGTH_MISMATCH cpu_to_le32(0xC0000004)
+#define STATUS_ACCESS_VIOLATION cpu_to_le32(0xC0000005)
+#define STATUS_IN_PAGE_ERROR cpu_to_le32(0xC0000006)
+#define STATUS_PAGEFILE_QUOTA cpu_to_le32(0xC0000007)
+#define STATUS_INVALID_HANDLE cpu_to_le32(0xC0000008)
+#define STATUS_BAD_INITIAL_STACK cpu_to_le32(0xC0000009)
+#define STATUS_BAD_INITIAL_PC cpu_to_le32(0xC000000A)
+#define STATUS_INVALID_CID cpu_to_le32(0xC000000B)
+#define STATUS_TIMER_NOT_CANCELED cpu_to_le32(0xC000000C)
+#define STATUS_INVALID_PARAMETER cpu_to_le32(0xC000000D)
+#define STATUS_NO_SUCH_DEVICE cpu_to_le32(0xC000000E)
+#define STATUS_NO_SUCH_FILE cpu_to_le32(0xC000000F)
+#define STATUS_INVALID_DEVICE_REQUEST cpu_to_le32(0xC0000010)
+#define STATUS_END_OF_FILE cpu_to_le32(0xC0000011)
+#define STATUS_WRONG_VOLUME cpu_to_le32(0xC0000012)
+#define STATUS_NO_MEDIA_IN_DEVICE cpu_to_le32(0xC0000013)
+#define STATUS_UNRECOGNIZED_MEDIA cpu_to_le32(0xC0000014)
+#define STATUS_NONEXISTENT_SECTOR cpu_to_le32(0xC0000015)
+#define STATUS_MORE_PROCESSING_REQUIRED cpu_to_le32(0xC0000016)
+#define STATUS_NO_MEMORY cpu_to_le32(0xC0000017)
+#define STATUS_CONFLICTING_ADDRESSES cpu_to_le32(0xC0000018)
+#define STATUS_NOT_MAPPED_VIEW cpu_to_le32(0xC0000019)
+#define STATUS_UNABLE_TO_FREE_VM cpu_to_le32(0xC000001A)
+#define STATUS_UNABLE_TO_DELETE_SECTION cpu_to_le32(0xC000001B)
+#define STATUS_INVALID_SYSTEM_SERVICE cpu_to_le32(0xC000001C)
+#define STATUS_ILLEGAL_INSTRUCTION cpu_to_le32(0xC000001D)
+#define STATUS_INVALID_LOCK_SEQUENCE cpu_to_le32(0xC000001E)
+#define STATUS_INVALID_VIEW_SIZE cpu_to_le32(0xC000001F)
+#define STATUS_INVALID_FILE_FOR_SECTION cpu_to_le32(0xC0000020)
+#define STATUS_ALREADY_COMMITTED cpu_to_le32(0xC0000021)
+#define STATUS_ACCESS_DENIED cpu_to_le32(0xC0000022)
+#define STATUS_BUFFER_TOO_SMALL cpu_to_le32(0xC0000023)
+#define STATUS_OBJECT_TYPE_MISMATCH cpu_to_le32(0xC0000024)
+#define STATUS_NONCONTINUABLE_EXCEPTION cpu_to_le32(0xC0000025)
+#define STATUS_INVALID_DISPOSITION cpu_to_le32(0xC0000026)
+#define STATUS_UNWIND cpu_to_le32(0xC0000027)
+#define STATUS_BAD_STACK cpu_to_le32(0xC0000028)
+#define STATUS_INVALID_UNWIND_TARGET cpu_to_le32(0xC0000029)
+#define STATUS_NOT_LOCKED cpu_to_le32(0xC000002A)
+#define STATUS_PARITY_ERROR cpu_to_le32(0xC000002B)
+#define STATUS_UNABLE_TO_DECOMMIT_VM cpu_to_le32(0xC000002C)
+#define STATUS_NOT_COMMITTED cpu_to_le32(0xC000002D)
+#define STATUS_INVALID_PORT_ATTRIBUTES cpu_to_le32(0xC000002E)
+#define STATUS_PORT_MESSAGE_TOO_LONG cpu_to_le32(0xC000002F)
+#define STATUS_INVALID_PARAMETER_MIX cpu_to_le32(0xC0000030)
+#define STATUS_INVALID_QUOTA_LOWER cpu_to_le32(0xC0000031)
+#define STATUS_DISK_CORRUPT_ERROR cpu_to_le32(0xC0000032)
+#define STATUS_OBJECT_NAME_INVALID cpu_to_le32(0xC0000033)
+#define STATUS_OBJECT_NAME_NOT_FOUND cpu_to_le32(0xC0000034)
+#define STATUS_OBJECT_NAME_COLLISION cpu_to_le32(0xC0000035)
+#define STATUS_PORT_DISCONNECTED cpu_to_le32(0xC0000037)
+#define STATUS_DEVICE_ALREADY_ATTACHED cpu_to_le32(0xC0000038)
+#define STATUS_OBJECT_PATH_INVALID cpu_to_le32(0xC0000039)
+#define STATUS_OBJECT_PATH_NOT_FOUND cpu_to_le32(0xC000003A)
+#define STATUS_OBJECT_PATH_SYNTAX_BAD cpu_to_le32(0xC000003B)
+#define STATUS_DATA_OVERRUN cpu_to_le32(0xC000003C)
+#define STATUS_DATA_LATE_ERROR cpu_to_le32(0xC000003D)
+#define STATUS_DATA_ERROR cpu_to_le32(0xC000003E)
+#define STATUS_CRC_ERROR cpu_to_le32(0xC000003F)
+#define STATUS_SECTION_TOO_BIG cpu_to_le32(0xC0000040)
+#define STATUS_PORT_CONNECTION_REFUSED cpu_to_le32(0xC0000041)
+#define STATUS_INVALID_PORT_HANDLE cpu_to_le32(0xC0000042)
+#define STATUS_SHARING_VIOLATION cpu_to_le32(0xC0000043)
+#define STATUS_QUOTA_EXCEEDED cpu_to_le32(0xC0000044)
+#define STATUS_INVALID_PAGE_PROTECTION cpu_to_le32(0xC0000045)
+#define STATUS_MUTANT_NOT_OWNED cpu_to_le32(0xC0000046)
+#define STATUS_SEMAPHORE_LIMIT_EXCEEDED cpu_to_le32(0xC0000047)
+#define STATUS_PORT_ALREADY_SET cpu_to_le32(0xC0000048)
+#define STATUS_SECTION_NOT_IMAGE cpu_to_le32(0xC0000049)
+#define STATUS_SUSPEND_COUNT_EXCEEDED cpu_to_le32(0xC000004A)
+#define STATUS_THREAD_IS_TERMINATING cpu_to_le32(0xC000004B)
+#define STATUS_BAD_WORKING_SET_LIMIT cpu_to_le32(0xC000004C)
+#define STATUS_INCOMPATIBLE_FILE_MAP cpu_to_le32(0xC000004D)
+#define STATUS_SECTION_PROTECTION cpu_to_le32(0xC000004E)
+#define STATUS_EAS_NOT_SUPPORTED cpu_to_le32(0xC000004F)
+#define STATUS_EA_TOO_LARGE cpu_to_le32(0xC0000050)
+#define STATUS_NONEXISTENT_EA_ENTRY cpu_to_le32(0xC0000051)
+#define STATUS_NO_EAS_ON_FILE cpu_to_le32(0xC0000052)
+#define STATUS_EA_CORRUPT_ERROR cpu_to_le32(0xC0000053)
+#define STATUS_FILE_LOCK_CONFLICT cpu_to_le32(0xC0000054)
+#define STATUS_LOCK_NOT_GRANTED cpu_to_le32(0xC0000055)
+#define STATUS_DELETE_PENDING cpu_to_le32(0xC0000056)
+#define STATUS_CTL_FILE_NOT_SUPPORTED cpu_to_le32(0xC0000057)
+#define STATUS_UNKNOWN_REVISION cpu_to_le32(0xC0000058)
+#define STATUS_REVISION_MISMATCH cpu_to_le32(0xC0000059)
+#define STATUS_INVALID_OWNER cpu_to_le32(0xC000005A)
+#define STATUS_INVALID_PRIMARY_GROUP cpu_to_le32(0xC000005B)
+#define STATUS_NO_IMPERSONATION_TOKEN cpu_to_le32(0xC000005C)
+#define STATUS_CANT_DISABLE_MANDATORY cpu_to_le32(0xC000005D)
+#define STATUS_NO_LOGON_SERVERS cpu_to_le32(0xC000005E)
+#define STATUS_NO_SUCH_LOGON_SESSION cpu_to_le32(0xC000005F)
+#define STATUS_NO_SUCH_PRIVILEGE cpu_to_le32(0xC0000060)
+#define STATUS_PRIVILEGE_NOT_HELD cpu_to_le32(0xC0000061)
+#define STATUS_INVALID_ACCOUNT_NAME cpu_to_le32(0xC0000062)
+#define STATUS_USER_EXISTS cpu_to_le32(0xC0000063)
+#define STATUS_NO_SUCH_USER cpu_to_le32(0xC0000064)
+#define STATUS_GROUP_EXISTS cpu_to_le32(0xC0000065)
+#define STATUS_NO_SUCH_GROUP cpu_to_le32(0xC0000066)
+#define STATUS_MEMBER_IN_GROUP cpu_to_le32(0xC0000067)
+#define STATUS_MEMBER_NOT_IN_GROUP cpu_to_le32(0xC0000068)
+#define STATUS_LAST_ADMIN cpu_to_le32(0xC0000069)
+#define STATUS_WRONG_PASSWORD cpu_to_le32(0xC000006A)
+#define STATUS_ILL_FORMED_PASSWORD cpu_to_le32(0xC000006B)
+#define STATUS_PASSWORD_RESTRICTION cpu_to_le32(0xC000006C)
+#define STATUS_LOGON_FAILURE cpu_to_le32(0xC000006D)
+#define STATUS_ACCOUNT_RESTRICTION cpu_to_le32(0xC000006E)
+#define STATUS_INVALID_LOGON_HOURS cpu_to_le32(0xC000006F)
+#define STATUS_INVALID_WORKSTATION cpu_to_le32(0xC0000070)
+#define STATUS_PASSWORD_EXPIRED cpu_to_le32(0xC0000071)
+#define STATUS_ACCOUNT_DISABLED cpu_to_le32(0xC0000072)
+#define STATUS_NONE_MAPPED cpu_to_le32(0xC0000073)
+#define STATUS_TOO_MANY_LUIDS_REQUESTED cpu_to_le32(0xC0000074)
+#define STATUS_LUIDS_EXHAUSTED cpu_to_le32(0xC0000075)
+#define STATUS_INVALID_SUB_AUTHORITY cpu_to_le32(0xC0000076)
+#define STATUS_INVALID_ACL cpu_to_le32(0xC0000077)
+#define STATUS_INVALID_SID cpu_to_le32(0xC0000078)
+#define STATUS_INVALID_SECURITY_DESCR cpu_to_le32(0xC0000079)
+#define STATUS_PROCEDURE_NOT_FOUND cpu_to_le32(0xC000007A)
+#define STATUS_INVALID_IMAGE_FORMAT cpu_to_le32(0xC000007B)
+#define STATUS_NO_TOKEN cpu_to_le32(0xC000007C)
+#define STATUS_BAD_INHERITANCE_ACL cpu_to_le32(0xC000007D)
+#define STATUS_RANGE_NOT_LOCKED cpu_to_le32(0xC000007E)
+#define STATUS_DISK_FULL cpu_to_le32(0xC000007F)
+#define STATUS_SERVER_DISABLED cpu_to_le32(0xC0000080)
+#define STATUS_SERVER_NOT_DISABLED cpu_to_le32(0xC0000081)
+#define STATUS_TOO_MANY_GUIDS_REQUESTED cpu_to_le32(0xC0000082)
+#define STATUS_GUIDS_EXHAUSTED cpu_to_le32(0xC0000083)
+#define STATUS_INVALID_ID_AUTHORITY cpu_to_le32(0xC0000084)
+#define STATUS_AGENTS_EXHAUSTED cpu_to_le32(0xC0000085)
+#define STATUS_INVALID_VOLUME_LABEL cpu_to_le32(0xC0000086)
+#define STATUS_SECTION_NOT_EXTENDED cpu_to_le32(0xC0000087)
+#define STATUS_NOT_MAPPED_DATA cpu_to_le32(0xC0000088)
+#define STATUS_RESOURCE_DATA_NOT_FOUND cpu_to_le32(0xC0000089)
+#define STATUS_RESOURCE_TYPE_NOT_FOUND cpu_to_le32(0xC000008A)
+#define STATUS_RESOURCE_NAME_NOT_FOUND cpu_to_le32(0xC000008B)
+#define STATUS_ARRAY_BOUNDS_EXCEEDED cpu_to_le32(0xC000008C)
+#define STATUS_FLOAT_DENORMAL_OPERAND cpu_to_le32(0xC000008D)
+#define STATUS_FLOAT_DIVIDE_BY_ZERO cpu_to_le32(0xC000008E)
+#define STATUS_FLOAT_INEXACT_RESULT cpu_to_le32(0xC000008F)
+#define STATUS_FLOAT_INVALID_OPERATION cpu_to_le32(0xC0000090)
+#define STATUS_FLOAT_OVERFLOW cpu_to_le32(0xC0000091)
+#define STATUS_FLOAT_STACK_CHECK cpu_to_le32(0xC0000092)
+#define STATUS_FLOAT_UNDERFLOW cpu_to_le32(0xC0000093)
+#define STATUS_INTEGER_DIVIDE_BY_ZERO cpu_to_le32(0xC0000094)
+#define STATUS_INTEGER_OVERFLOW cpu_to_le32(0xC0000095)
+#define STATUS_PRIVILEGED_INSTRUCTION cpu_to_le32(0xC0000096)
+#define STATUS_TOO_MANY_PAGING_FILES cpu_to_le32(0xC0000097)
+#define STATUS_FILE_INVALID cpu_to_le32(0xC0000098)
+#define STATUS_ALLOTTED_SPACE_EXCEEDED cpu_to_le32(0xC0000099)
+#define STATUS_INSUFFICIENT_RESOURCES cpu_to_le32(0xC000009A)
+#define STATUS_DFS_EXIT_PATH_FOUND cpu_to_le32(0xC000009B)
+#define STATUS_DEVICE_DATA_ERROR cpu_to_le32(0xC000009C)
+#define STATUS_DEVICE_NOT_CONNECTED cpu_to_le32(0xC000009D)
+#define STATUS_DEVICE_POWER_FAILURE cpu_to_le32(0xC000009E)
+#define STATUS_FREE_VM_NOT_AT_BASE cpu_to_le32(0xC000009F)
+#define STATUS_MEMORY_NOT_ALLOCATED cpu_to_le32(0xC00000A0)
+#define STATUS_WORKING_SET_QUOTA cpu_to_le32(0xC00000A1)
+#define STATUS_MEDIA_WRITE_PROTECTED cpu_to_le32(0xC00000A2)
+#define STATUS_DEVICE_NOT_READY cpu_to_le32(0xC00000A3)
+#define STATUS_INVALID_GROUP_ATTRIBUTES cpu_to_le32(0xC00000A4)
+#define STATUS_BAD_IMPERSONATION_LEVEL cpu_to_le32(0xC00000A5)
+#define STATUS_CANT_OPEN_ANONYMOUS cpu_to_le32(0xC00000A6)
+#define STATUS_BAD_VALIDATION_CLASS cpu_to_le32(0xC00000A7)
+#define STATUS_BAD_TOKEN_TYPE cpu_to_le32(0xC00000A8)
+#define STATUS_BAD_MASTER_BOOT_RECORD cpu_to_le32(0xC00000A9)
+#define STATUS_INSTRUCTION_MISALIGNMENT cpu_to_le32(0xC00000AA)
+#define STATUS_INSTANCE_NOT_AVAILABLE cpu_to_le32(0xC00000AB)
+#define STATUS_PIPE_NOT_AVAILABLE cpu_to_le32(0xC00000AC)
+#define STATUS_INVALID_PIPE_STATE cpu_to_le32(0xC00000AD)
+#define STATUS_PIPE_BUSY cpu_to_le32(0xC00000AE)
+#define STATUS_ILLEGAL_FUNCTION cpu_to_le32(0xC00000AF)
+#define STATUS_PIPE_DISCONNECTED cpu_to_le32(0xC00000B0)
+#define STATUS_PIPE_CLOSING cpu_to_le32(0xC00000B1)
+#define STATUS_PIPE_CONNECTED cpu_to_le32(0xC00000B2)
+#define STATUS_PIPE_LISTENING cpu_to_le32(0xC00000B3)
+#define STATUS_INVALID_READ_MODE cpu_to_le32(0xC00000B4)
+#define STATUS_IO_TIMEOUT cpu_to_le32(0xC00000B5)
+#define STATUS_FILE_FORCED_CLOSED cpu_to_le32(0xC00000B6)
+#define STATUS_PROFILING_NOT_STARTED cpu_to_le32(0xC00000B7)
+#define STATUS_PROFILING_NOT_STOPPED cpu_to_le32(0xC00000B8)
+#define STATUS_COULD_NOT_INTERPRET cpu_to_le32(0xC00000B9)
+#define STATUS_FILE_IS_A_DIRECTORY cpu_to_le32(0xC00000BA)
+#define STATUS_NOT_SUPPORTED cpu_to_le32(0xC00000BB)
+#define STATUS_REMOTE_NOT_LISTENING cpu_to_le32(0xC00000BC)
+#define STATUS_DUPLICATE_NAME cpu_to_le32(0xC00000BD)
+#define STATUS_BAD_NETWORK_PATH cpu_to_le32(0xC00000BE)
+#define STATUS_NETWORK_BUSY cpu_to_le32(0xC00000BF)
+#define STATUS_DEVICE_DOES_NOT_EXIST cpu_to_le32(0xC00000C0)
+#define STATUS_TOO_MANY_COMMANDS cpu_to_le32(0xC00000C1)
+#define STATUS_ADAPTER_HARDWARE_ERROR cpu_to_le32(0xC00000C2)
+#define STATUS_INVALID_NETWORK_RESPONSE cpu_to_le32(0xC00000C3)
+#define STATUS_UNEXPECTED_NETWORK_ERROR cpu_to_le32(0xC00000C4)
+#define STATUS_BAD_REMOTE_ADAPTER cpu_to_le32(0xC00000C5)
+#define STATUS_PRINT_QUEUE_FULL cpu_to_le32(0xC00000C6)
+#define STATUS_NO_SPOOL_SPACE cpu_to_le32(0xC00000C7)
+#define STATUS_PRINT_CANCELLED cpu_to_le32(0xC00000C8)
+#define STATUS_NETWORK_NAME_DELETED cpu_to_le32(0xC00000C9)
+#define STATUS_NETWORK_ACCESS_DENIED cpu_to_le32(0xC00000CA)
+#define STATUS_BAD_DEVICE_TYPE cpu_to_le32(0xC00000CB)
+#define STATUS_BAD_NETWORK_NAME cpu_to_le32(0xC00000CC)
+#define STATUS_TOO_MANY_NAMES cpu_to_le32(0xC00000CD)
+#define STATUS_TOO_MANY_SESSIONS cpu_to_le32(0xC00000CE)
+#define STATUS_SHARING_PAUSED cpu_to_le32(0xC00000CF)
+#define STATUS_REQUEST_NOT_ACCEPTED cpu_to_le32(0xC00000D0)
+#define STATUS_REDIRECTOR_PAUSED cpu_to_le32(0xC00000D1)
+#define STATUS_NET_WRITE_FAULT cpu_to_le32(0xC00000D2)
+#define STATUS_PROFILING_AT_LIMIT cpu_to_le32(0xC00000D3)
+#define STATUS_NOT_SAME_DEVICE cpu_to_le32(0xC00000D4)
+#define STATUS_FILE_RENAMED cpu_to_le32(0xC00000D5)
+#define STATUS_VIRTUAL_CIRCUIT_CLOSED cpu_to_le32(0xC00000D6)
+#define STATUS_NO_SECURITY_ON_OBJECT cpu_to_le32(0xC00000D7)
+#define STATUS_CANT_WAIT cpu_to_le32(0xC00000D8)
+#define STATUS_PIPE_EMPTY cpu_to_le32(0xC00000D9)
+#define STATUS_CANT_ACCESS_DOMAIN_INFO cpu_to_le32(0xC00000DA)
+#define STATUS_CANT_TERMINATE_SELF cpu_to_le32(0xC00000DB)
+#define STATUS_INVALID_SERVER_STATE cpu_to_le32(0xC00000DC)
+#define STATUS_INVALID_DOMAIN_STATE cpu_to_le32(0xC00000DD)
+#define STATUS_INVALID_DOMAIN_ROLE cpu_to_le32(0xC00000DE)
+#define STATUS_NO_SUCH_DOMAIN cpu_to_le32(0xC00000DF)
+#define STATUS_DOMAIN_EXISTS cpu_to_le32(0xC00000E0)
+#define STATUS_DOMAIN_LIMIT_EXCEEDED cpu_to_le32(0xC00000E1)
+#define STATUS_OPLOCK_NOT_GRANTED cpu_to_le32(0xC00000E2)
+#define STATUS_INVALID_OPLOCK_PROTOCOL cpu_to_le32(0xC00000E3)
+#define STATUS_INTERNAL_DB_CORRUPTION cpu_to_le32(0xC00000E4)
+#define STATUS_INTERNAL_ERROR cpu_to_le32(0xC00000E5)
+#define STATUS_GENERIC_NOT_MAPPED cpu_to_le32(0xC00000E6)
+#define STATUS_BAD_DESCRIPTOR_FORMAT cpu_to_le32(0xC00000E7)
+#define STATUS_INVALID_USER_BUFFER cpu_to_le32(0xC00000E8)
+#define STATUS_UNEXPECTED_IO_ERROR cpu_to_le32(0xC00000E9)
+#define STATUS_UNEXPECTED_MM_CREATE_ERR cpu_to_le32(0xC00000EA)
+#define STATUS_UNEXPECTED_MM_MAP_ERROR cpu_to_le32(0xC00000EB)
+#define STATUS_UNEXPECTED_MM_EXTEND_ERR cpu_to_le32(0xC00000EC)
+#define STATUS_NOT_LOGON_PROCESS cpu_to_le32(0xC00000ED)
+#define STATUS_LOGON_SESSION_EXISTS cpu_to_le32(0xC00000EE)
+#define STATUS_INVALID_PARAMETER_1 cpu_to_le32(0xC00000EF)
+#define STATUS_INVALID_PARAMETER_2 cpu_to_le32(0xC00000F0)
+#define STATUS_INVALID_PARAMETER_3 cpu_to_le32(0xC00000F1)
+#define STATUS_INVALID_PARAMETER_4 cpu_to_le32(0xC00000F2)
+#define STATUS_INVALID_PARAMETER_5 cpu_to_le32(0xC00000F3)
+#define STATUS_INVALID_PARAMETER_6 cpu_to_le32(0xC00000F4)
+#define STATUS_INVALID_PARAMETER_7 cpu_to_le32(0xC00000F5)
+#define STATUS_INVALID_PARAMETER_8 cpu_to_le32(0xC00000F6)
+#define STATUS_INVALID_PARAMETER_9 cpu_to_le32(0xC00000F7)
+#define STATUS_INVALID_PARAMETER_10 cpu_to_le32(0xC00000F8)
+#define STATUS_INVALID_PARAMETER_11 cpu_to_le32(0xC00000F9)
+#define STATUS_INVALID_PARAMETER_12 cpu_to_le32(0xC00000FA)
+#define STATUS_REDIRECTOR_NOT_STARTED cpu_to_le32(0xC00000FB)
+#define STATUS_REDIRECTOR_STARTED cpu_to_le32(0xC00000FC)
+#define STATUS_STACK_OVERFLOW cpu_to_le32(0xC00000FD)
+#define STATUS_NO_SUCH_PACKAGE cpu_to_le32(0xC00000FE)
+#define STATUS_BAD_FUNCTION_TABLE cpu_to_le32(0xC00000FF)
+#define STATUS_VARIABLE_NOT_FOUND cpu_to_le32(0xC0000100)
+#define STATUS_DIRECTORY_NOT_EMPTY cpu_to_le32(0xC0000101)
+#define STATUS_FILE_CORRUPT_ERROR cpu_to_le32(0xC0000102)
+#define STATUS_NOT_A_DIRECTORY cpu_to_le32(0xC0000103)
+#define STATUS_BAD_LOGON_SESSION_STATE cpu_to_le32(0xC0000104)
+#define STATUS_LOGON_SESSION_COLLISION cpu_to_le32(0xC0000105)
+#define STATUS_NAME_TOO_LONG cpu_to_le32(0xC0000106)
+#define STATUS_FILES_OPEN cpu_to_le32(0xC0000107)
+#define STATUS_CONNECTION_IN_USE cpu_to_le32(0xC0000108)
+#define STATUS_MESSAGE_NOT_FOUND cpu_to_le32(0xC0000109)
+#define STATUS_PROCESS_IS_TERMINATING cpu_to_le32(0xC000010A)
+#define STATUS_INVALID_LOGON_TYPE cpu_to_le32(0xC000010B)
+#define STATUS_NO_GUID_TRANSLATION cpu_to_le32(0xC000010C)
+#define STATUS_CANNOT_IMPERSONATE cpu_to_le32(0xC000010D)
+#define STATUS_IMAGE_ALREADY_LOADED cpu_to_le32(0xC000010E)
+#define STATUS_ABIOS_NOT_PRESENT cpu_to_le32(0xC000010F)
+#define STATUS_ABIOS_LID_NOT_EXIST cpu_to_le32(0xC0000110)
+#define STATUS_ABIOS_LID_ALREADY_OWNED cpu_to_le32(0xC0000111)
+#define STATUS_ABIOS_NOT_LID_OWNER cpu_to_le32(0xC0000112)
+#define STATUS_ABIOS_INVALID_COMMAND cpu_to_le32(0xC0000113)
+#define STATUS_ABIOS_INVALID_LID cpu_to_le32(0xC0000114)
+#define STATUS_ABIOS_SELECTOR_NOT_AVAILABLE cpu_to_le32(0xC0000115)
+#define STATUS_ABIOS_INVALID_SELECTOR cpu_to_le32(0xC0000116)
+#define STATUS_NO_LDT cpu_to_le32(0xC0000117)
+#define STATUS_INVALID_LDT_SIZE cpu_to_le32(0xC0000118)
+#define STATUS_INVALID_LDT_OFFSET cpu_to_le32(0xC0000119)
+#define STATUS_INVALID_LDT_DESCRIPTOR cpu_to_le32(0xC000011A)
+#define STATUS_INVALID_IMAGE_NE_FORMAT cpu_to_le32(0xC000011B)
+#define STATUS_RXACT_INVALID_STATE cpu_to_le32(0xC000011C)
+#define STATUS_RXACT_COMMIT_FAILURE cpu_to_le32(0xC000011D)
+#define STATUS_MAPPED_FILE_SIZE_ZERO cpu_to_le32(0xC000011E)
+#define STATUS_TOO_MANY_OPENED_FILES cpu_to_le32(0xC000011F)
+#define STATUS_CANCELLED cpu_to_le32(0xC0000120)
+#define STATUS_CANNOT_DELETE cpu_to_le32(0xC0000121)
+#define STATUS_INVALID_COMPUTER_NAME cpu_to_le32(0xC0000122)
+#define STATUS_FILE_DELETED cpu_to_le32(0xC0000123)
+#define STATUS_SPECIAL_ACCOUNT cpu_to_le32(0xC0000124)
+#define STATUS_SPECIAL_GROUP cpu_to_le32(0xC0000125)
+#define STATUS_SPECIAL_USER cpu_to_le32(0xC0000126)
+#define STATUS_MEMBERS_PRIMARY_GROUP cpu_to_le32(0xC0000127)
+#define STATUS_FILE_CLOSED cpu_to_le32(0xC0000128)
+#define STATUS_TOO_MANY_THREADS cpu_to_le32(0xC0000129)
+#define STATUS_THREAD_NOT_IN_PROCESS cpu_to_le32(0xC000012A)
+#define STATUS_TOKEN_ALREADY_IN_USE cpu_to_le32(0xC000012B)
+#define STATUS_PAGEFILE_QUOTA_EXCEEDED cpu_to_le32(0xC000012C)
+#define STATUS_COMMITMENT_LIMIT cpu_to_le32(0xC000012D)
+#define STATUS_INVALID_IMAGE_LE_FORMAT cpu_to_le32(0xC000012E)
+#define STATUS_INVALID_IMAGE_NOT_MZ cpu_to_le32(0xC000012F)
+#define STATUS_INVALID_IMAGE_PROTECT cpu_to_le32(0xC0000130)
+#define STATUS_INVALID_IMAGE_WIN_16 cpu_to_le32(0xC0000131)
+#define STATUS_LOGON_SERVER_CONFLICT cpu_to_le32(0xC0000132)
+#define STATUS_TIME_DIFFERENCE_AT_DC cpu_to_le32(0xC0000133)
+#define STATUS_SYNCHRONIZATION_REQUIRED cpu_to_le32(0xC0000134)
+#define STATUS_DLL_NOT_FOUND cpu_to_le32(0xC0000135)
+#define STATUS_OPEN_FAILED cpu_to_le32(0xC0000136)
+#define STATUS_IO_PRIVILEGE_FAILED cpu_to_le32(0xC0000137)
+#define STATUS_ORDINAL_NOT_FOUND cpu_to_le32(0xC0000138)
+#define STATUS_ENTRYPOINT_NOT_FOUND cpu_to_le32(0xC0000139)
+#define STATUS_CONTROL_C_EXIT cpu_to_le32(0xC000013A)
+#define STATUS_LOCAL_DISCONNECT cpu_to_le32(0xC000013B)
+#define STATUS_REMOTE_DISCONNECT cpu_to_le32(0xC000013C)
+#define STATUS_REMOTE_RESOURCES cpu_to_le32(0xC000013D)
+#define STATUS_LINK_FAILED cpu_to_le32(0xC000013E)
+#define STATUS_LINK_TIMEOUT cpu_to_le32(0xC000013F)
+#define STATUS_INVALID_CONNECTION cpu_to_le32(0xC0000140)
+#define STATUS_INVALID_ADDRESS cpu_to_le32(0xC0000141)
+#define STATUS_DLL_INIT_FAILED cpu_to_le32(0xC0000142)
+#define STATUS_MISSING_SYSTEMFILE cpu_to_le32(0xC0000143)
+#define STATUS_UNHANDLED_EXCEPTION cpu_to_le32(0xC0000144)
+#define STATUS_APP_INIT_FAILURE cpu_to_le32(0xC0000145)
+#define STATUS_PAGEFILE_CREATE_FAILED cpu_to_le32(0xC0000146)
+#define STATUS_NO_PAGEFILE cpu_to_le32(0xC0000147)
+#define STATUS_INVALID_LEVEL cpu_to_le32(0xC0000148)
+#define STATUS_WRONG_PASSWORD_CORE cpu_to_le32(0xC0000149)
+#define STATUS_ILLEGAL_FLOAT_CONTEXT cpu_to_le32(0xC000014A)
+#define STATUS_PIPE_BROKEN cpu_to_le32(0xC000014B)
+#define STATUS_REGISTRY_CORRUPT cpu_to_le32(0xC000014C)
+#define STATUS_REGISTRY_IO_FAILED cpu_to_le32(0xC000014D)
+#define STATUS_NO_EVENT_PAIR cpu_to_le32(0xC000014E)
+#define STATUS_UNRECOGNIZED_VOLUME cpu_to_le32(0xC000014F)
+#define STATUS_SERIAL_NO_DEVICE_INITED cpu_to_le32(0xC0000150)
+#define STATUS_NO_SUCH_ALIAS cpu_to_le32(0xC0000151)
+#define STATUS_MEMBER_NOT_IN_ALIAS cpu_to_le32(0xC0000152)
+#define STATUS_MEMBER_IN_ALIAS cpu_to_le32(0xC0000153)
+#define STATUS_ALIAS_EXISTS cpu_to_le32(0xC0000154)
+#define STATUS_LOGON_NOT_GRANTED cpu_to_le32(0xC0000155)
+#define STATUS_TOO_MANY_SECRETS cpu_to_le32(0xC0000156)
+#define STATUS_SECRET_TOO_LONG cpu_to_le32(0xC0000157)
+#define STATUS_INTERNAL_DB_ERROR cpu_to_le32(0xC0000158)
+#define STATUS_FULLSCREEN_MODE cpu_to_le32(0xC0000159)
+#define STATUS_TOO_MANY_CONTEXT_IDS cpu_to_le32(0xC000015A)
+#define STATUS_LOGON_TYPE_NOT_GRANTED cpu_to_le32(0xC000015B)
+#define STATUS_NOT_REGISTRY_FILE cpu_to_le32(0xC000015C)
+#define STATUS_NT_CROSS_ENCRYPTION_REQUIRED cpu_to_le32(0xC000015D)
+#define STATUS_DOMAIN_CTRLR_CONFIG_ERROR cpu_to_le32(0xC000015E)
+#define STATUS_FT_MISSING_MEMBER cpu_to_le32(0xC000015F)
+#define STATUS_ILL_FORMED_SERVICE_ENTRY cpu_to_le32(0xC0000160)
+#define STATUS_ILLEGAL_CHARACTER cpu_to_le32(0xC0000161)
+#define STATUS_UNMAPPABLE_CHARACTER cpu_to_le32(0xC0000162)
+#define STATUS_UNDEFINED_CHARACTER cpu_to_le32(0xC0000163)
+#define STATUS_FLOPPY_VOLUME cpu_to_le32(0xC0000164)
+#define STATUS_FLOPPY_ID_MARK_NOT_FOUND cpu_to_le32(0xC0000165)
+#define STATUS_FLOPPY_WRONG_CYLINDER cpu_to_le32(0xC0000166)
+#define STATUS_FLOPPY_UNKNOWN_ERROR cpu_to_le32(0xC0000167)
+#define STATUS_FLOPPY_BAD_REGISTERS cpu_to_le32(0xC0000168)
+#define STATUS_DISK_RECALIBRATE_FAILED cpu_to_le32(0xC0000169)
+#define STATUS_DISK_OPERATION_FAILED cpu_to_le32(0xC000016A)
+#define STATUS_DISK_RESET_FAILED cpu_to_le32(0xC000016B)
+#define STATUS_SHARED_IRQ_BUSY cpu_to_le32(0xC000016C)
+#define STATUS_FT_ORPHANING cpu_to_le32(0xC000016D)
+#define STATUS_BIOS_FAILED_TO_CONNECT_INTERRUPT cpu_to_le32(0xC000016E)
+#define STATUS_PARTITION_FAILURE cpu_to_le32(0xC0000172)
+#define STATUS_INVALID_BLOCK_LENGTH cpu_to_le32(0xC0000173)
+#define STATUS_DEVICE_NOT_PARTITIONED cpu_to_le32(0xC0000174)
+#define STATUS_UNABLE_TO_LOCK_MEDIA cpu_to_le32(0xC0000175)
+#define STATUS_UNABLE_TO_UNLOAD_MEDIA cpu_to_le32(0xC0000176)
+#define STATUS_EOM_OVERFLOW cpu_to_le32(0xC0000177)
+#define STATUS_NO_MEDIA cpu_to_le32(0xC0000178)
+#define STATUS_NO_SUCH_MEMBER cpu_to_le32(0xC000017A)
+#define STATUS_INVALID_MEMBER cpu_to_le32(0xC000017B)
+#define STATUS_KEY_DELETED cpu_to_le32(0xC000017C)
+#define STATUS_NO_LOG_SPACE cpu_to_le32(0xC000017D)
+#define STATUS_TOO_MANY_SIDS cpu_to_le32(0xC000017E)
+#define STATUS_LM_CROSS_ENCRYPTION_REQUIRED cpu_to_le32(0xC000017F)
+#define STATUS_KEY_HAS_CHILDREN cpu_to_le32(0xC0000180)
+#define STATUS_CHILD_MUST_BE_VOLATILE cpu_to_le32(0xC0000181)
+#define STATUS_DEVICE_CONFIGURATION_ERROR cpu_to_le32(0xC0000182)
+#define STATUS_DRIVER_INTERNAL_ERROR cpu_to_le32(0xC0000183)
+#define STATUS_INVALID_DEVICE_STATE cpu_to_le32(0xC0000184)
+#define STATUS_IO_DEVICE_ERROR cpu_to_le32(0xC0000185)
+#define STATUS_DEVICE_PROTOCOL_ERROR cpu_to_le32(0xC0000186)
+#define STATUS_BACKUP_CONTROLLER cpu_to_le32(0xC0000187)
+#define STATUS_LOG_FILE_FULL cpu_to_le32(0xC0000188)
+#define STATUS_TOO_LATE cpu_to_le32(0xC0000189)
+#define STATUS_NO_TRUST_LSA_SECRET cpu_to_le32(0xC000018A)
+#define STATUS_NO_TRUST_SAM_ACCOUNT cpu_to_le32(0xC000018B)
+#define STATUS_TRUSTED_DOMAIN_FAILURE cpu_to_le32(0xC000018C)
+#define STATUS_TRUSTED_RELATIONSHIP_FAILURE cpu_to_le32(0xC000018D)
+#define STATUS_EVENTLOG_FILE_CORRUPT cpu_to_le32(0xC000018E)
+#define STATUS_EVENTLOG_CANT_START cpu_to_le32(0xC000018F)
+#define STATUS_TRUST_FAILURE cpu_to_le32(0xC0000190)
+#define STATUS_MUTANT_LIMIT_EXCEEDED cpu_to_le32(0xC0000191)
+#define STATUS_NETLOGON_NOT_STARTED cpu_to_le32(0xC0000192)
+#define STATUS_ACCOUNT_EXPIRED cpu_to_le32(0xC0000193)
+#define STATUS_POSSIBLE_DEADLOCK cpu_to_le32(0xC0000194)
+#define STATUS_NETWORK_CREDENTIAL_CONFLICT cpu_to_le32(0xC0000195)
+#define STATUS_REMOTE_SESSION_LIMIT cpu_to_le32(0xC0000196)
+#define STATUS_EVENTLOG_FILE_CHANGED cpu_to_le32(0xC0000197)
+#define STATUS_NOLOGON_INTERDOMAIN_TRUST_ACCOUNT cpu_to_le32(0xC0000198)
+#define STATUS_NOLOGON_WORKSTATION_TRUST_ACCOUNT cpu_to_le32(0xC0000199)
+#define STATUS_NOLOGON_SERVER_TRUST_ACCOUNT cpu_to_le32(0xC000019A)
+#define STATUS_DOMAIN_TRUST_INCONSISTENT cpu_to_le32(0xC000019B)
+#define STATUS_FS_DRIVER_REQUIRED cpu_to_le32(0xC000019C)
+#define STATUS_IMAGE_ALREADY_LOADED_AS_DLL cpu_to_le32(0xC000019D)
+#define STATUS_NETWORK_OPEN_RESTRICTION cpu_to_le32(0xC0000201)
+#define STATUS_NO_USER_SESSION_KEY cpu_to_le32(0xC0000202)
+#define STATUS_USER_SESSION_DELETED cpu_to_le32(0xC0000203)
+#define STATUS_RESOURCE_LANG_NOT_FOUND cpu_to_le32(0xC0000204)
+#define STATUS_INSUFF_SERVER_RESOURCES cpu_to_le32(0xC0000205)
+#define STATUS_INVALID_BUFFER_SIZE cpu_to_le32(0xC0000206)
+#define STATUS_INVALID_ADDRESS_COMPONENT cpu_to_le32(0xC0000207)
+#define STATUS_INVALID_ADDRESS_WILDCARD cpu_to_le32(0xC0000208)
+#define STATUS_TOO_MANY_ADDRESSES cpu_to_le32(0xC0000209)
+#define STATUS_ADDRESS_ALREADY_EXISTS cpu_to_le32(0xC000020A)
+#define STATUS_ADDRESS_CLOSED cpu_to_le32(0xC000020B)
+#define STATUS_CONNECTION_DISCONNECTED cpu_to_le32(0xC000020C)
+#define STATUS_CONNECTION_RESET cpu_to_le32(0xC000020D)
+#define STATUS_TOO_MANY_NODES cpu_to_le32(0xC000020E)
+#define STATUS_TRANSACTION_ABORTED cpu_to_le32(0xC000020F)
+#define STATUS_TRANSACTION_TIMED_OUT cpu_to_le32(0xC0000210)
+#define STATUS_TRANSACTION_NO_RELEASE cpu_to_le32(0xC0000211)
+#define STATUS_TRANSACTION_NO_MATCH cpu_to_le32(0xC0000212)
+#define STATUS_TRANSACTION_RESPONDED cpu_to_le32(0xC0000213)
+#define STATUS_TRANSACTION_INVALID_ID cpu_to_le32(0xC0000214)
+#define STATUS_TRANSACTION_INVALID_TYPE cpu_to_le32(0xC0000215)
+#define STATUS_NOT_SERVER_SESSION cpu_to_le32(0xC0000216)
+#define STATUS_NOT_CLIENT_SESSION cpu_to_le32(0xC0000217)
+#define STATUS_CANNOT_LOAD_REGISTRY_FILE cpu_to_le32(0xC0000218)
+#define STATUS_DEBUG_ATTACH_FAILED cpu_to_le32(0xC0000219)
+#define STATUS_SYSTEM_PROCESS_TERMINATED cpu_to_le32(0xC000021A)
+#define STATUS_DATA_NOT_ACCEPTED cpu_to_le32(0xC000021B)
+#define STATUS_NO_BROWSER_SERVERS_FOUND cpu_to_le32(0xC000021C)
+#define STATUS_VDM_HARD_ERROR cpu_to_le32(0xC000021D)
+#define STATUS_DRIVER_CANCEL_TIMEOUT cpu_to_le32(0xC000021E)
+#define STATUS_REPLY_MESSAGE_MISMATCH cpu_to_le32(0xC000021F)
+#define STATUS_MAPPED_ALIGNMENT cpu_to_le32(0xC0000220)
+#define STATUS_IMAGE_CHECKSUM_MISMATCH cpu_to_le32(0xC0000221)
+#define STATUS_LOST_WRITEBEHIND_DATA cpu_to_le32(0xC0000222)
+#define STATUS_CLIENT_SERVER_PARAMETERS_INVALID cpu_to_le32(0xC0000223)
+#define STATUS_PASSWORD_MUST_CHANGE cpu_to_le32(0xC0000224)
+#define STATUS_NOT_FOUND cpu_to_le32(0xC0000225)
+#define STATUS_NOT_TINY_STREAM cpu_to_le32(0xC0000226)
+#define STATUS_RECOVERY_FAILURE cpu_to_le32(0xC0000227)
+#define STATUS_STACK_OVERFLOW_READ cpu_to_le32(0xC0000228)
+#define STATUS_FAIL_CHECK cpu_to_le32(0xC0000229)
+#define STATUS_DUPLICATE_OBJECTID cpu_to_le32(0xC000022A)
+#define STATUS_OBJECTID_EXISTS cpu_to_le32(0xC000022B)
+#define STATUS_CONVERT_TO_LARGE cpu_to_le32(0xC000022C)
+#define STATUS_RETRY cpu_to_le32(0xC000022D)
+#define STATUS_FOUND_OUT_OF_SCOPE cpu_to_le32(0xC000022E)
+#define STATUS_ALLOCATE_BUCKET cpu_to_le32(0xC000022F)
+#define STATUS_PROPSET_NOT_FOUND cpu_to_le32(0xC0000230)
+#define STATUS_MARSHALL_OVERFLOW cpu_to_le32(0xC0000231)
+#define STATUS_INVALID_VARIANT cpu_to_le32(0xC0000232)
+#define STATUS_DOMAIN_CONTROLLER_NOT_FOUND cpu_to_le32(0xC0000233)
+#define STATUS_ACCOUNT_LOCKED_OUT cpu_to_le32(0xC0000234)
+#define STATUS_HANDLE_NOT_CLOSABLE cpu_to_le32(0xC0000235)
+#define STATUS_CONNECTION_REFUSED cpu_to_le32(0xC0000236)
+#define STATUS_GRACEFUL_DISCONNECT cpu_to_le32(0xC0000237)
+#define STATUS_ADDRESS_ALREADY_ASSOCIATED cpu_to_le32(0xC0000238)
+#define STATUS_ADDRESS_NOT_ASSOCIATED cpu_to_le32(0xC0000239)
+#define STATUS_CONNECTION_INVALID cpu_to_le32(0xC000023A)
+#define STATUS_CONNECTION_ACTIVE cpu_to_le32(0xC000023B)
+#define STATUS_NETWORK_UNREACHABLE cpu_to_le32(0xC000023C)
+#define STATUS_HOST_UNREACHABLE cpu_to_le32(0xC000023D)
+#define STATUS_PROTOCOL_UNREACHABLE cpu_to_le32(0xC000023E)
+#define STATUS_PORT_UNREACHABLE cpu_to_le32(0xC000023F)
+#define STATUS_REQUEST_ABORTED cpu_to_le32(0xC0000240)
+#define STATUS_CONNECTION_ABORTED cpu_to_le32(0xC0000241)
+#define STATUS_BAD_COMPRESSION_BUFFER cpu_to_le32(0xC0000242)
+#define STATUS_USER_MAPPED_FILE cpu_to_le32(0xC0000243)
+#define STATUS_AUDIT_FAILED cpu_to_le32(0xC0000244)
+#define STATUS_TIMER_RESOLUTION_NOT_SET cpu_to_le32(0xC0000245)
+#define STATUS_CONNECTION_COUNT_LIMIT cpu_to_le32(0xC0000246)
+#define STATUS_LOGIN_TIME_RESTRICTION cpu_to_le32(0xC0000247)
+#define STATUS_LOGIN_WKSTA_RESTRICTION cpu_to_le32(0xC0000248)
+#define STATUS_IMAGE_MP_UP_MISMATCH cpu_to_le32(0xC0000249)
+#define STATUS_INSUFFICIENT_LOGON_INFO cpu_to_le32(0xC0000250)
+#define STATUS_BAD_DLL_ENTRYPOINT cpu_to_le32(0xC0000251)
+#define STATUS_BAD_SERVICE_ENTRYPOINT cpu_to_le32(0xC0000252)
+#define STATUS_LPC_REPLY_LOST cpu_to_le32(0xC0000253)
+#define STATUS_IP_ADDRESS_CONFLICT1 cpu_to_le32(0xC0000254)
+#define STATUS_IP_ADDRESS_CONFLICT2 cpu_to_le32(0xC0000255)
+#define STATUS_REGISTRY_QUOTA_LIMIT cpu_to_le32(0xC0000256)
+#define STATUS_PATH_NOT_COVERED cpu_to_le32(0xC0000257)
+#define STATUS_NO_CALLBACK_ACTIVE cpu_to_le32(0xC0000258)
+#define STATUS_LICENSE_QUOTA_EXCEEDED cpu_to_le32(0xC0000259)
+#define STATUS_PWD_TOO_SHORT cpu_to_le32(0xC000025A)
+#define STATUS_PWD_TOO_RECENT cpu_to_le32(0xC000025B)
+#define STATUS_PWD_HISTORY_CONFLICT cpu_to_le32(0xC000025C)
+#define STATUS_PLUGPLAY_NO_DEVICE cpu_to_le32(0xC000025E)
+#define STATUS_UNSUPPORTED_COMPRESSION cpu_to_le32(0xC000025F)
+#define STATUS_INVALID_HW_PROFILE cpu_to_le32(0xC0000260)
+#define STATUS_INVALID_PLUGPLAY_DEVICE_PATH cpu_to_le32(0xC0000261)
+#define STATUS_DRIVER_ORDINAL_NOT_FOUND cpu_to_le32(0xC0000262)
+#define STATUS_DRIVER_ENTRYPOINT_NOT_FOUND cpu_to_le32(0xC0000263)
+#define STATUS_RESOURCE_NOT_OWNED cpu_to_le32(0xC0000264)
+#define STATUS_TOO_MANY_LINKS cpu_to_le32(0xC0000265)
+#define STATUS_QUOTA_LIST_INCONSISTENT cpu_to_le32(0xC0000266)
+#define STATUS_FILE_IS_OFFLINE cpu_to_le32(0xC0000267)
+#define STATUS_EVALUATION_EXPIRATION cpu_to_le32(0xC0000268)
+#define STATUS_ILLEGAL_DLL_RELOCATION cpu_to_le32(0xC0000269)
+#define STATUS_LICENSE_VIOLATION cpu_to_le32(0xC000026A)
+#define STATUS_DLL_INIT_FAILED_LOGOFF cpu_to_le32(0xC000026B)
+#define STATUS_DRIVER_UNABLE_TO_LOAD cpu_to_le32(0xC000026C)
+#define STATUS_DFS_UNAVAILABLE cpu_to_le32(0xC000026D)
+#define STATUS_VOLUME_DISMOUNTED cpu_to_le32(0xC000026E)
+#define STATUS_WX86_INTERNAL_ERROR cpu_to_le32(0xC000026F)
+#define STATUS_WX86_FLOAT_STACK_CHECK cpu_to_le32(0xC0000270)
+#define STATUS_VALIDATE_CONTINUE cpu_to_le32(0xC0000271)
+#define STATUS_NO_MATCH cpu_to_le32(0xC0000272)
+#define STATUS_NO_MORE_MATCHES cpu_to_le32(0xC0000273)
+#define STATUS_NOT_A_REPARSE_POINT cpu_to_le32(0xC0000275)
+#define STATUS_IO_REPARSE_TAG_INVALID cpu_to_le32(0xC0000276)
+#define STATUS_IO_REPARSE_TAG_MISMATCH cpu_to_le32(0xC0000277)
+#define STATUS_IO_REPARSE_DATA_INVALID cpu_to_le32(0xC0000278)
+#define STATUS_IO_REPARSE_TAG_NOT_HANDLED cpu_to_le32(0xC0000279)
+#define STATUS_REPARSE_POINT_NOT_RESOLVED cpu_to_le32(0xC0000280)
+#define STATUS_DIRECTORY_IS_A_REPARSE_POINT cpu_to_le32(0xC0000281)
+#define STATUS_RANGE_LIST_CONFLICT cpu_to_le32(0xC0000282)
+#define STATUS_SOURCE_ELEMENT_EMPTY cpu_to_le32(0xC0000283)
+#define STATUS_DESTINATION_ELEMENT_FULL cpu_to_le32(0xC0000284)
+#define STATUS_ILLEGAL_ELEMENT_ADDRESS cpu_to_le32(0xC0000285)
+#define STATUS_MAGAZINE_NOT_PRESENT cpu_to_le32(0xC0000286)
+#define STATUS_REINITIALIZATION_NEEDED cpu_to_le32(0xC0000287)
+#define STATUS_ENCRYPTION_FAILED cpu_to_le32(0xC000028A)
+#define STATUS_DECRYPTION_FAILED cpu_to_le32(0xC000028B)
+#define STATUS_RANGE_NOT_FOUND cpu_to_le32(0xC000028C)
+#define STATUS_NO_RECOVERY_POLICY cpu_to_le32(0xC000028D)
+#define STATUS_NO_EFS cpu_to_le32(0xC000028E)
+#define STATUS_WRONG_EFS cpu_to_le32(0xC000028F)
+#define STATUS_NO_USER_KEYS cpu_to_le32(0xC0000290)
+#define STATUS_FILE_NOT_ENCRYPTED cpu_to_le32(0xC0000291)
+#define STATUS_NOT_EXPORT_FORMAT cpu_to_le32(0xC0000292)
+#define STATUS_FILE_ENCRYPTED cpu_to_le32(0xC0000293)
+#define STATUS_WMI_GUID_NOT_FOUND cpu_to_le32(0xC0000295)
+#define STATUS_WMI_INSTANCE_NOT_FOUND cpu_to_le32(0xC0000296)
+#define STATUS_WMI_ITEMID_NOT_FOUND cpu_to_le32(0xC0000297)
+#define STATUS_WMI_TRY_AGAIN cpu_to_le32(0xC0000298)
+#define STATUS_SHARED_POLICY cpu_to_le32(0xC0000299)
+#define STATUS_POLICY_OBJECT_NOT_FOUND cpu_to_le32(0xC000029A)
+#define STATUS_POLICY_ONLY_IN_DS cpu_to_le32(0xC000029B)
+#define STATUS_VOLUME_NOT_UPGRADED cpu_to_le32(0xC000029C)
+#define STATUS_REMOTE_STORAGE_NOT_ACTIVE cpu_to_le32(0xC000029D)
+#define STATUS_REMOTE_STORAGE_MEDIA_ERROR cpu_to_le32(0xC000029E)
+#define STATUS_NO_TRACKING_SERVICE cpu_to_le32(0xC000029F)
+#define STATUS_SERVER_SID_MISMATCH cpu_to_le32(0xC00002A0)
+#define STATUS_DS_NO_ATTRIBUTE_OR_VALUE cpu_to_le32(0xC00002A1)
+#define STATUS_DS_INVALID_ATTRIBUTE_SYNTAX cpu_to_le32(0xC00002A2)
+#define STATUS_DS_ATTRIBUTE_TYPE_UNDEFINED cpu_to_le32(0xC00002A3)
+#define STATUS_DS_ATTRIBUTE_OR_VALUE_EXISTS cpu_to_le32(0xC00002A4)
+#define STATUS_DS_BUSY cpu_to_le32(0xC00002A5)
+#define STATUS_DS_UNAVAILABLE cpu_to_le32(0xC00002A6)
+#define STATUS_DS_NO_RIDS_ALLOCATED cpu_to_le32(0xC00002A7)
+#define STATUS_DS_NO_MORE_RIDS cpu_to_le32(0xC00002A8)
+#define STATUS_DS_INCORRECT_ROLE_OWNER cpu_to_le32(0xC00002A9)
+#define STATUS_DS_RIDMGR_INIT_ERROR cpu_to_le32(0xC00002AA)
+#define STATUS_DS_OBJ_CLASS_VIOLATION cpu_to_le32(0xC00002AB)
+#define STATUS_DS_CANT_ON_NON_LEAF cpu_to_le32(0xC00002AC)
+#define STATUS_DS_CANT_ON_RDN cpu_to_le32(0xC00002AD)
+#define STATUS_DS_CANT_MOD_OBJ_CLASS cpu_to_le32(0xC00002AE)
+#define STATUS_DS_CROSS_DOM_MOVE_FAILED cpu_to_le32(0xC00002AF)
+#define STATUS_DS_GC_NOT_AVAILABLE cpu_to_le32(0xC00002B0)
+#define STATUS_DIRECTORY_SERVICE_REQUIRED cpu_to_le32(0xC00002B1)
+#define STATUS_REPARSE_ATTRIBUTE_CONFLICT cpu_to_le32(0xC00002B2)
+#define STATUS_CANT_ENABLE_DENY_ONLY cpu_to_le32(0xC00002B3)
+#define STATUS_FLOAT_MULTIPLE_FAULTS cpu_to_le32(0xC00002B4)
+#define STATUS_FLOAT_MULTIPLE_TRAPS cpu_to_le32(0xC00002B5)
+#define STATUS_DEVICE_REMOVED cpu_to_le32(0xC00002B6)
+#define STATUS_JOURNAL_DELETE_IN_PROGRESS cpu_to_le32(0xC00002B7)
+#define STATUS_JOURNAL_NOT_ACTIVE cpu_to_le32(0xC00002B8)
+#define STATUS_NOINTERFACE cpu_to_le32(0xC00002B9)
+#define STATUS_DS_ADMIN_LIMIT_EXCEEDED cpu_to_le32(0xC00002C1)
+#define STATUS_DRIVER_FAILED_SLEEP cpu_to_le32(0xC00002C2)
+#define STATUS_MUTUAL_AUTHENTICATION_FAILED cpu_to_le32(0xC00002C3)
+#define STATUS_CORRUPT_SYSTEM_FILE cpu_to_le32(0xC00002C4)
+#define STATUS_DATATYPE_MISALIGNMENT_ERROR cpu_to_le32(0xC00002C5)
+#define STATUS_WMI_READ_ONLY cpu_to_le32(0xC00002C6)
+#define STATUS_WMI_SET_FAILURE cpu_to_le32(0xC00002C7)
+#define STATUS_COMMITMENT_MINIMUM cpu_to_le32(0xC00002C8)
+#define STATUS_REG_NAT_CONSUMPTION cpu_to_le32(0xC00002C9)
+#define STATUS_TRANSPORT_FULL cpu_to_le32(0xC00002CA)
+#define STATUS_DS_SAM_INIT_FAILURE cpu_to_le32(0xC00002CB)
+#define STATUS_ONLY_IF_CONNECTED cpu_to_le32(0xC00002CC)
+#define STATUS_DS_SENSITIVE_GROUP_VIOLATION cpu_to_le32(0xC00002CD)
+#define STATUS_PNP_RESTART_ENUMERATION cpu_to_le32(0xC00002CE)
+#define STATUS_JOURNAL_ENTRY_DELETED cpu_to_le32(0xC00002CF)
+#define STATUS_DS_CANT_MOD_PRIMARYGROUPID cpu_to_le32(0xC00002D0)
+#define STATUS_SYSTEM_IMAGE_BAD_SIGNATURE cpu_to_le32(0xC00002D1)
+#define STATUS_PNP_REBOOT_REQUIRED cpu_to_le32(0xC00002D2)
+#define STATUS_POWER_STATE_INVALID cpu_to_le32(0xC00002D3)
+#define STATUS_DS_INVALID_GROUP_TYPE cpu_to_le32(0xC00002D4)
+#define STATUS_DS_NO_NEST_GLOBALGROUP_IN_MIXEDDOMAIN cpu_to_le32(0xC00002D5)
+#define STATUS_DS_NO_NEST_LOCALGROUP_IN_MIXEDDOMAIN cpu_to_le32(0xC00002D6)
+#define STATUS_DS_GLOBAL_CANT_HAVE_LOCAL_MEMBER cpu_to_le32(0xC00002D7)
+#define STATUS_DS_GLOBAL_CANT_HAVE_UNIVERSAL_MEMBER cpu_to_le32(0xC00002D8)
+#define STATUS_DS_UNIVERSAL_CANT_HAVE_LOCAL_MEMBER cpu_to_le32(0xC00002D9)
+#define STATUS_DS_GLOBAL_CANT_HAVE_CROSSDOMAIN_MEMBER cpu_to_le32(0xC00002DA)
+#define STATUS_DS_LOCAL_CANT_HAVE_CROSSDOMAIN_LOCAL_MEMBER \
+ cpu_to_le32(0xC00002DB)
+#define STATUS_DS_HAVE_PRIMARY_MEMBERS cpu_to_le32(0xC00002DC)
+#define STATUS_WMI_NOT_SUPPORTED cpu_to_le32(0xC00002DD)
+#define STATUS_INSUFFICIENT_POWER cpu_to_le32(0xC00002DE)
+#define STATUS_SAM_NEED_BOOTKEY_PASSWORD cpu_to_le32(0xC00002DF)
+#define STATUS_SAM_NEED_BOOTKEY_FLOPPY cpu_to_le32(0xC00002E0)
+#define STATUS_DS_CANT_START cpu_to_le32(0xC00002E1)
+#define STATUS_DS_INIT_FAILURE cpu_to_le32(0xC00002E2)
+#define STATUS_SAM_INIT_FAILURE cpu_to_le32(0xC00002E3)
+#define STATUS_DS_GC_REQUIRED cpu_to_le32(0xC00002E4)
+#define STATUS_DS_LOCAL_MEMBER_OF_LOCAL_ONLY cpu_to_le32(0xC00002E5)
+#define STATUS_DS_NO_FPO_IN_UNIVERSAL_GROUPS cpu_to_le32(0xC00002E6)
+#define STATUS_DS_MACHINE_ACCOUNT_QUOTA_EXCEEDED cpu_to_le32(0xC00002E7)
+#define STATUS_MULTIPLE_FAULT_VIOLATION cpu_to_le32(0xC00002E8)
+#define STATUS_CURRENT_DOMAIN_NOT_ALLOWED cpu_to_le32(0xC00002E9)
+#define STATUS_CANNOT_MAKE cpu_to_le32(0xC00002EA)
+#define STATUS_SYSTEM_SHUTDOWN cpu_to_le32(0xC00002EB)
+#define STATUS_DS_INIT_FAILURE_CONSOLE cpu_to_le32(0xC00002EC)
+#define STATUS_DS_SAM_INIT_FAILURE_CONSOLE cpu_to_le32(0xC00002ED)
+#define STATUS_UNFINISHED_CONTEXT_DELETED cpu_to_le32(0xC00002EE)
+#define STATUS_NO_TGT_REPLY cpu_to_le32(0xC00002EF)
+#define STATUS_OBJECTID_NOT_FOUND cpu_to_le32(0xC00002F0)
+#define STATUS_NO_IP_ADDRESSES cpu_to_le32(0xC00002F1)
+#define STATUS_WRONG_CREDENTIAL_HANDLE cpu_to_le32(0xC00002F2)
+#define STATUS_CRYPTO_SYSTEM_INVALID cpu_to_le32(0xC00002F3)
+#define STATUS_MAX_REFERRALS_EXCEEDED cpu_to_le32(0xC00002F4)
+#define STATUS_MUST_BE_KDC cpu_to_le32(0xC00002F5)
+#define STATUS_STRONG_CRYPTO_NOT_SUPPORTED cpu_to_le32(0xC00002F6)
+#define STATUS_TOO_MANY_PRINCIPALS cpu_to_le32(0xC00002F7)
+#define STATUS_NO_PA_DATA cpu_to_le32(0xC00002F8)
+#define STATUS_PKINIT_NAME_MISMATCH cpu_to_le32(0xC00002F9)
+#define STATUS_SMARTCARD_LOGON_REQUIRED cpu_to_le32(0xC00002FA)
+#define STATUS_KDC_INVALID_REQUEST cpu_to_le32(0xC00002FB)
+#define STATUS_KDC_UNABLE_TO_REFER cpu_to_le32(0xC00002FC)
+#define STATUS_KDC_UNKNOWN_ETYPE cpu_to_le32(0xC00002FD)
+#define STATUS_SHUTDOWN_IN_PROGRESS cpu_to_le32(0xC00002FE)
+#define STATUS_SERVER_SHUTDOWN_IN_PROGRESS cpu_to_le32(0xC00002FF)
+#define STATUS_NOT_SUPPORTED_ON_SBS cpu_to_le32(0xC0000300)
+#define STATUS_WMI_GUID_DISCONNECTED cpu_to_le32(0xC0000301)
+#define STATUS_WMI_ALREADY_DISABLED cpu_to_le32(0xC0000302)
+#define STATUS_WMI_ALREADY_ENABLED cpu_to_le32(0xC0000303)
+#define STATUS_MFT_TOO_FRAGMENTED cpu_to_le32(0xC0000304)
+#define STATUS_COPY_PROTECTION_FAILURE cpu_to_le32(0xC0000305)
+#define STATUS_CSS_AUTHENTICATION_FAILURE cpu_to_le32(0xC0000306)
+#define STATUS_CSS_KEY_NOT_PRESENT cpu_to_le32(0xC0000307)
+#define STATUS_CSS_KEY_NOT_ESTABLISHED cpu_to_le32(0xC0000308)
+#define STATUS_CSS_SCRAMBLED_SECTOR cpu_to_le32(0xC0000309)
+#define STATUS_CSS_REGION_MISMATCH cpu_to_le32(0xC000030A)
+#define STATUS_CSS_RESETS_EXHAUSTED cpu_to_le32(0xC000030B)
+#define STATUS_PKINIT_FAILURE cpu_to_le32(0xC0000320)
+#define STATUS_SMARTCARD_SUBSYSTEM_FAILURE cpu_to_le32(0xC0000321)
+#define STATUS_NO_KERB_KEY cpu_to_le32(0xC0000322)
+#define STATUS_HOST_DOWN cpu_to_le32(0xC0000350)
+#define STATUS_UNSUPPORTED_PREAUTH cpu_to_le32(0xC0000351)
+#define STATUS_EFS_ALG_BLOB_TOO_BIG cpu_to_le32(0xC0000352)
+#define STATUS_PORT_NOT_SET cpu_to_le32(0xC0000353)
+#define STATUS_DEBUGGER_INACTIVE cpu_to_le32(0xC0000354)
+#define STATUS_DS_VERSION_CHECK_FAILURE cpu_to_le32(0xC0000355)
+#define STATUS_AUDITING_DISABLED cpu_to_le32(0xC0000356)
+#define STATUS_PRENT4_MACHINE_ACCOUNT cpu_to_le32(0xC0000357)
+#define STATUS_DS_AG_CANT_HAVE_UNIVERSAL_MEMBER cpu_to_le32(0xC0000358)
+#define STATUS_INVALID_IMAGE_WIN_32 cpu_to_le32(0xC0000359)
+#define STATUS_INVALID_IMAGE_WIN_64 cpu_to_le32(0xC000035A)
+#define STATUS_BAD_BINDINGS cpu_to_le32(0xC000035B)
+#define STATUS_NETWORK_SESSION_EXPIRED cpu_to_le32(0xC000035C)
+#define STATUS_APPHELP_BLOCK cpu_to_le32(0xC000035D)
+#define STATUS_ALL_SIDS_FILTERED cpu_to_le32(0xC000035E)
+#define STATUS_NOT_SAFE_MODE_DRIVER cpu_to_le32(0xC000035F)
+#define STATUS_ACCESS_DISABLED_BY_POLICY_DEFAULT cpu_to_le32(0xC0000361)
+#define STATUS_ACCESS_DISABLED_BY_POLICY_PATH cpu_to_le32(0xC0000362)
+#define STATUS_ACCESS_DISABLED_BY_POLICY_PUBLISHER cpu_to_le32(0xC0000363)
+#define STATUS_ACCESS_DISABLED_BY_POLICY_OTHER cpu_to_le32(0xC0000364)
+#define STATUS_FAILED_DRIVER_ENTRY cpu_to_le32(0xC0000365)
+#define STATUS_DEVICE_ENUMERATION_ERROR cpu_to_le32(0xC0000366)
+#define STATUS_MOUNT_POINT_NOT_RESOLVED cpu_to_le32(0xC0000368)
+#define STATUS_INVALID_DEVICE_OBJECT_PARAMETER cpu_to_le32(0xC0000369)
+#define STATUS_MCA_OCCURRED cpu_to_le32(0xC000036A)
+#define STATUS_DRIVER_BLOCKED_CRITICAL cpu_to_le32(0xC000036B)
+#define STATUS_DRIVER_BLOCKED cpu_to_le32(0xC000036C)
+#define STATUS_DRIVER_DATABASE_ERROR cpu_to_le32(0xC000036D)
+#define STATUS_SYSTEM_HIVE_TOO_LARGE cpu_to_le32(0xC000036E)
+#define STATUS_INVALID_IMPORT_OF_NON_DLL cpu_to_le32(0xC000036F)
+#define STATUS_NO_SECRETS cpu_to_le32(0xC0000371)
+#define STATUS_ACCESS_DISABLED_NO_SAFER_UI_BY_POLICY cpu_to_le32(0xC0000372)
+#define STATUS_FAILED_STACK_SWITCH cpu_to_le32(0xC0000373)
+#define STATUS_HEAP_CORRUPTION cpu_to_le32(0xC0000374)
+#define STATUS_SMARTCARD_WRONG_PIN cpu_to_le32(0xC0000380)
+#define STATUS_SMARTCARD_CARD_BLOCKED cpu_to_le32(0xC0000381)
+#define STATUS_SMARTCARD_CARD_NOT_AUTHENTICATED cpu_to_le32(0xC0000382)
+#define STATUS_SMARTCARD_NO_CARD cpu_to_le32(0xC0000383)
+#define STATUS_SMARTCARD_NO_KEY_CONTAINER cpu_to_le32(0xC0000384)
+#define STATUS_SMARTCARD_NO_CERTIFICATE cpu_to_le32(0xC0000385)
+#define STATUS_SMARTCARD_NO_KEYSET cpu_to_le32(0xC0000386)
+#define STATUS_SMARTCARD_IO_ERROR cpu_to_le32(0xC0000387)
+#define STATUS_DOWNGRADE_DETECTED cpu_to_le32(0xC0000388)
+#define STATUS_SMARTCARD_CERT_REVOKED cpu_to_le32(0xC0000389)
+#define STATUS_ISSUING_CA_UNTRUSTED cpu_to_le32(0xC000038A)
+#define STATUS_REVOCATION_OFFLINE_C cpu_to_le32(0xC000038B)
+#define STATUS_PKINIT_CLIENT_FAILURE cpu_to_le32(0xC000038C)
+#define STATUS_SMARTCARD_CERT_EXPIRED cpu_to_le32(0xC000038D)
+#define STATUS_DRIVER_FAILED_PRIOR_UNLOAD cpu_to_le32(0xC000038E)
+#define STATUS_SMARTCARD_SILENT_CONTEXT cpu_to_le32(0xC000038F)
+#define STATUS_PER_USER_TRUST_QUOTA_EXCEEDED cpu_to_le32(0xC0000401)
+#define STATUS_ALL_USER_TRUST_QUOTA_EXCEEDED cpu_to_le32(0xC0000402)
+#define STATUS_USER_DELETE_TRUST_QUOTA_EXCEEDED cpu_to_le32(0xC0000403)
+#define STATUS_DS_NAME_NOT_UNIQUE cpu_to_le32(0xC0000404)
+#define STATUS_DS_DUPLICATE_ID_FOUND cpu_to_le32(0xC0000405)
+#define STATUS_DS_GROUP_CONVERSION_ERROR cpu_to_le32(0xC0000406)
+#define STATUS_VOLSNAP_PREPARE_HIBERNATE cpu_to_le32(0xC0000407)
+#define STATUS_USER2USER_REQUIRED cpu_to_le32(0xC0000408)
+#define STATUS_STACK_BUFFER_OVERRUN cpu_to_le32(0xC0000409)
+#define STATUS_NO_S4U_PROT_SUPPORT cpu_to_le32(0xC000040A)
+#define STATUS_CROSSREALM_DELEGATION_FAILURE cpu_to_le32(0xC000040B)
+#define STATUS_REVOCATION_OFFLINE_KDC cpu_to_le32(0xC000040C)
+#define STATUS_ISSUING_CA_UNTRUSTED_KDC cpu_to_le32(0xC000040D)
+#define STATUS_KDC_CERT_EXPIRED cpu_to_le32(0xC000040E)
+#define STATUS_KDC_CERT_REVOKED cpu_to_le32(0xC000040F)
+#define STATUS_PARAMETER_QUOTA_EXCEEDED cpu_to_le32(0xC0000410)
+#define STATUS_HIBERNATION_FAILURE cpu_to_le32(0xC0000411)
+#define STATUS_DELAY_LOAD_FAILED cpu_to_le32(0xC0000412)
+#define STATUS_AUTHENTICATION_FIREWALL_FAILED cpu_to_le32(0xC0000413)
+#define STATUS_VDM_DISALLOWED cpu_to_le32(0xC0000414)
+#define STATUS_HUNG_DISPLAY_DRIVER_THREAD cpu_to_le32(0xC0000415)
+#define STATUS_INSUFFICIENT_RESOURCE_FOR_SPECIFIED_SHARED_SECTION_SIZE \
+ cpu_to_le32(0xC0000416)
+#define STATUS_INVALID_CRUNTIME_PARAMETER cpu_to_le32(0xC0000417)
+#define STATUS_NTLM_BLOCKED cpu_to_le32(0xC0000418)
+#define STATUS_ASSERTION_FAILURE cpu_to_le32(0xC0000420)
+#define STATUS_VERIFIER_STOP cpu_to_le32(0xC0000421)
+#define STATUS_CALLBACK_POP_STACK cpu_to_le32(0xC0000423)
+#define STATUS_INCOMPATIBLE_DRIVER_BLOCKED cpu_to_le32(0xC0000424)
+#define STATUS_HIVE_UNLOADED cpu_to_le32(0xC0000425)
+#define STATUS_COMPRESSION_DISABLED cpu_to_le32(0xC0000426)
+#define STATUS_FILE_SYSTEM_LIMITATION cpu_to_le32(0xC0000427)
+#define STATUS_INVALID_IMAGE_HASH cpu_to_le32(0xC0000428)
+#define STATUS_NOT_CAPABLE cpu_to_le32(0xC0000429)
+#define STATUS_REQUEST_OUT_OF_SEQUENCE cpu_to_le32(0xC000042A)
+#define STATUS_IMPLEMENTATION_LIMIT cpu_to_le32(0xC000042B)
+#define STATUS_ELEVATION_REQUIRED cpu_to_le32(0xC000042C)
+#define STATUS_BEYOND_VDL cpu_to_le32(0xC0000432)
+#define STATUS_ENCOUNTERED_WRITE_IN_PROGRESS cpu_to_le32(0xC0000433)
+#define STATUS_PTE_CHANGED cpu_to_le32(0xC0000434)
+#define STATUS_PURGE_FAILED cpu_to_le32(0xC0000435)
+#define STATUS_CRED_REQUIRES_CONFIRMATION cpu_to_le32(0xC0000440)
+#define STATUS_CS_ENCRYPTION_INVALID_SERVER_RESPONSE cpu_to_le32(0xC0000441)
+#define STATUS_CS_ENCRYPTION_UNSUPPORTED_SERVER cpu_to_le32(0xC0000442)
+#define STATUS_CS_ENCRYPTION_EXISTING_ENCRYPTED_FILE cpu_to_le32(0xC0000443)
+#define STATUS_CS_ENCRYPTION_NEW_ENCRYPTED_FILE cpu_to_le32(0xC0000444)
+#define STATUS_CS_ENCRYPTION_FILE_NOT_CSE cpu_to_le32(0xC0000445)
+#define STATUS_INVALID_LABEL cpu_to_le32(0xC0000446)
+#define STATUS_DRIVER_PROCESS_TERMINATED cpu_to_le32(0xC0000450)
+#define STATUS_AMBIGUOUS_SYSTEM_DEVICE cpu_to_le32(0xC0000451)
+#define STATUS_SYSTEM_DEVICE_NOT_FOUND cpu_to_le32(0xC0000452)
+#define STATUS_RESTART_BOOT_APPLICATION cpu_to_le32(0xC0000453)
+#define STATUS_INVALID_TASK_NAME cpu_to_le32(0xC0000500)
+#define STATUS_INVALID_TASK_INDEX cpu_to_le32(0xC0000501)
+#define STATUS_THREAD_ALREADY_IN_TASK cpu_to_le32(0xC0000502)
+#define STATUS_CALLBACK_BYPASS cpu_to_le32(0xC0000503)
+#define STATUS_PORT_CLOSED cpu_to_le32(0xC0000700)
+#define STATUS_MESSAGE_LOST cpu_to_le32(0xC0000701)
+#define STATUS_INVALID_MESSAGE cpu_to_le32(0xC0000702)
+#define STATUS_REQUEST_CANCELED cpu_to_le32(0xC0000703)
+#define STATUS_RECURSIVE_DISPATCH cpu_to_le32(0xC0000704)
+#define STATUS_LPC_RECEIVE_BUFFER_EXPECTED cpu_to_le32(0xC0000705)
+#define STATUS_LPC_INVALID_CONNECTION_USAGE cpu_to_le32(0xC0000706)
+#define STATUS_LPC_REQUESTS_NOT_ALLOWED cpu_to_le32(0xC0000707)
+#define STATUS_RESOURCE_IN_USE cpu_to_le32(0xC0000708)
+#define STATUS_HARDWARE_MEMORY_ERROR cpu_to_le32(0xC0000709)
+#define STATUS_THREADPOOL_HANDLE_EXCEPTION cpu_to_le32(0xC000070A)
+#define STATUS_THREADPOOL_SET_EVENT_ON_COMPLETION_FAILED cpu_to_le32(0xC000070B)
+#define STATUS_THREADPOOL_RELEASE_SEMAPHORE_ON_COMPLETION_FAILED \
+ cpu_to_le32(0xC000070C)
+#define STATUS_THREADPOOL_RELEASE_MUTEX_ON_COMPLETION_FAILED \
+ cpu_to_le32(0xC000070D)
+#define STATUS_THREADPOOL_FREE_LIBRARY_ON_COMPLETION_FAILED \
+ cpu_to_le32(0xC000070E)
+#define STATUS_THREADPOOL_RELEASED_DURING_OPERATION cpu_to_le32(0xC000070F)
+#define STATUS_CALLBACK_RETURNED_WHILE_IMPERSONATING cpu_to_le32(0xC0000710)
+#define STATUS_APC_RETURNED_WHILE_IMPERSONATING cpu_to_le32(0xC0000711)
+#define STATUS_PROCESS_IS_PROTECTED cpu_to_le32(0xC0000712)
+#define STATUS_MCA_EXCEPTION cpu_to_le32(0xC0000713)
+#define STATUS_CERTIFICATE_MAPPING_NOT_UNIQUE cpu_to_le32(0xC0000714)
+#define STATUS_SYMLINK_CLASS_DISABLED cpu_to_le32(0xC0000715)
+#define STATUS_INVALID_IDN_NORMALIZATION cpu_to_le32(0xC0000716)
+#define STATUS_NO_UNICODE_TRANSLATION cpu_to_le32(0xC0000717)
+#define STATUS_ALREADY_REGISTERED cpu_to_le32(0xC0000718)
+#define STATUS_CONTEXT_MISMATCH cpu_to_le32(0xC0000719)
+#define STATUS_PORT_ALREADY_HAS_COMPLETION_LIST cpu_to_le32(0xC000071A)
+#define STATUS_CALLBACK_RETURNED_THREAD_PRIORITY cpu_to_le32(0xC000071B)
+#define STATUS_INVALID_THREAD cpu_to_le32(0xC000071C)
+#define STATUS_CALLBACK_RETURNED_TRANSACTION cpu_to_le32(0xC000071D)
+#define STATUS_CALLBACK_RETURNED_LDR_LOCK cpu_to_le32(0xC000071E)
+#define STATUS_CALLBACK_RETURNED_LANG cpu_to_le32(0xC000071F)
+#define STATUS_CALLBACK_RETURNED_PRI_BACK cpu_to_le32(0xC0000720)
+#define STATUS_CALLBACK_RETURNED_THREAD_AFFINITY cpu_to_le32(0xC0000721)
+#define STATUS_DISK_REPAIR_DISABLED cpu_to_le32(0xC0000800)
+#define STATUS_DS_DOMAIN_RENAME_IN_PROGRESS cpu_to_le32(0xC0000801)
+#define STATUS_DISK_QUOTA_EXCEEDED cpu_to_le32(0xC0000802)
+#define STATUS_CONTENT_BLOCKED cpu_to_le32(0xC0000804)
+#define STATUS_BAD_CLUSTERS cpu_to_le32(0xC0000805)
+#define STATUS_VOLUME_DIRTY cpu_to_le32(0xC0000806)
+#define STATUS_FILE_CHECKED_OUT cpu_to_le32(0xC0000901)
+#define STATUS_CHECKOUT_REQUIRED cpu_to_le32(0xC0000902)
+#define STATUS_BAD_FILE_TYPE cpu_to_le32(0xC0000903)
+#define STATUS_FILE_TOO_LARGE cpu_to_le32(0xC0000904)
+#define STATUS_FORMS_AUTH_REQUIRED cpu_to_le32(0xC0000905)
+#define STATUS_VIRUS_INFECTED cpu_to_le32(0xC0000906)
+#define STATUS_VIRUS_DELETED cpu_to_le32(0xC0000907)
+#define STATUS_BAD_MCFG_TABLE cpu_to_le32(0xC0000908)
+#define STATUS_WOW_ASSERTION cpu_to_le32(0xC0009898)
+#define STATUS_INVALID_SIGNATURE cpu_to_le32(0xC000A000)
+#define STATUS_HMAC_NOT_SUPPORTED cpu_to_le32(0xC000A001)
+#define STATUS_IPSEC_QUEUE_OVERFLOW cpu_to_le32(0xC000A010)
+#define STATUS_ND_QUEUE_OVERFLOW cpu_to_le32(0xC000A011)
+#define STATUS_HOPLIMIT_EXCEEDED cpu_to_le32(0xC000A012)
+#define STATUS_PROTOCOL_NOT_SUPPORTED cpu_to_le32(0xC000A013)
+#define STATUS_LOST_WRITEBEHIND_DATA_NETWORK_DISCONNECTED \
+ cpu_to_le32(0xC000A080)
+#define STATUS_LOST_WRITEBEHIND_DATA_NETWORK_SERVER_ERROR \
+ cpu_to_le32(0xC000A081)
+#define STATUS_LOST_WRITEBEHIND_DATA_LOCAL_DISK_ERROR cpu_to_le32(0xC000A082)
+#define STATUS_XML_PARSE_ERROR cpu_to_le32(0xC000A083)
+#define STATUS_XMLDSIG_ERROR cpu_to_le32(0xC000A084)
+#define STATUS_WRONG_COMPARTMENT cpu_to_le32(0xC000A085)
+#define STATUS_AUTHIP_FAILURE cpu_to_le32(0xC000A086)
+#define DBG_NO_STATE_CHANGE cpu_to_le32(0xC0010001)
+#define DBG_APP_NOT_IDLE cpu_to_le32(0xC0010002)
+#define RPC_NT_INVALID_STRING_BINDING cpu_to_le32(0xC0020001)
+#define RPC_NT_WRONG_KIND_OF_BINDING cpu_to_le32(0xC0020002)
+#define RPC_NT_INVALID_BINDING cpu_to_le32(0xC0020003)
+#define RPC_NT_PROTSEQ_NOT_SUPPORTED cpu_to_le32(0xC0020004)
+#define RPC_NT_INVALID_RPC_PROTSEQ cpu_to_le32(0xC0020005)
+#define RPC_NT_INVALID_STRING_UUID cpu_to_le32(0xC0020006)
+#define RPC_NT_INVALID_ENDPOINT_FORMAT cpu_to_le32(0xC0020007)
+#define RPC_NT_INVALID_NET_ADDR cpu_to_le32(0xC0020008)
+#define RPC_NT_NO_ENDPOINT_FOUND cpu_to_le32(0xC0020009)
+#define RPC_NT_INVALID_TIMEOUT cpu_to_le32(0xC002000A)
+#define RPC_NT_OBJECT_NOT_FOUND cpu_to_le32(0xC002000B)
+#define RPC_NT_ALREADY_REGISTERED cpu_to_le32(0xC002000C)
+#define RPC_NT_TYPE_ALREADY_REGISTERED cpu_to_le32(0xC002000D)
+#define RPC_NT_ALREADY_LISTENING cpu_to_le32(0xC002000E)
+#define RPC_NT_NO_PROTSEQS_REGISTERED cpu_to_le32(0xC002000F)
+#define RPC_NT_NOT_LISTENING cpu_to_le32(0xC0020010)
+#define RPC_NT_UNKNOWN_MGR_TYPE cpu_to_le32(0xC0020011)
+#define RPC_NT_UNKNOWN_IF cpu_to_le32(0xC0020012)
+#define RPC_NT_NO_BINDINGS cpu_to_le32(0xC0020013)
+#define RPC_NT_NO_PROTSEQS cpu_to_le32(0xC0020014)
+#define RPC_NT_CANT_CREATE_ENDPOINT cpu_to_le32(0xC0020015)
+#define RPC_NT_OUT_OF_RESOURCES cpu_to_le32(0xC0020016)
+#define RPC_NT_SERVER_UNAVAILABLE cpu_to_le32(0xC0020017)
+#define RPC_NT_SERVER_TOO_BUSY cpu_to_le32(0xC0020018)
+#define RPC_NT_INVALID_NETWORK_OPTIONS cpu_to_le32(0xC0020019)
+#define RPC_NT_NO_CALL_ACTIVE cpu_to_le32(0xC002001A)
+#define RPC_NT_CALL_FAILED cpu_to_le32(0xC002001B)
+#define RPC_NT_CALL_FAILED_DNE cpu_to_le32(0xC002001C)
+#define RPC_NT_PROTOCOL_ERROR cpu_to_le32(0xC002001D)
+#define RPC_NT_UNSUPPORTED_TRANS_SYN cpu_to_le32(0xC002001F)
+#define RPC_NT_UNSUPPORTED_TYPE cpu_to_le32(0xC0020021)
+#define RPC_NT_INVALID_TAG cpu_to_le32(0xC0020022)
+#define RPC_NT_INVALID_BOUND cpu_to_le32(0xC0020023)
+#define RPC_NT_NO_ENTRY_NAME cpu_to_le32(0xC0020024)
+#define RPC_NT_INVALID_NAME_SYNTAX cpu_to_le32(0xC0020025)
+#define RPC_NT_UNSUPPORTED_NAME_SYNTAX cpu_to_le32(0xC0020026)
+#define RPC_NT_UUID_NO_ADDRESS cpu_to_le32(0xC0020028)
+#define RPC_NT_DUPLICATE_ENDPOINT cpu_to_le32(0xC0020029)
+#define RPC_NT_UNKNOWN_AUTHN_TYPE cpu_to_le32(0xC002002A)
+#define RPC_NT_MAX_CALLS_TOO_SMALL cpu_to_le32(0xC002002B)
+#define RPC_NT_STRING_TOO_LONG cpu_to_le32(0xC002002C)
+#define RPC_NT_PROTSEQ_NOT_FOUND cpu_to_le32(0xC002002D)
+#define RPC_NT_PROCNUM_OUT_OF_RANGE cpu_to_le32(0xC002002E)
+#define RPC_NT_BINDING_HAS_NO_AUTH cpu_to_le32(0xC002002F)
+#define RPC_NT_UNKNOWN_AUTHN_SERVICE cpu_to_le32(0xC0020030)
+#define RPC_NT_UNKNOWN_AUTHN_LEVEL cpu_to_le32(0xC0020031)
+#define RPC_NT_INVALID_AUTH_IDENTITY cpu_to_le32(0xC0020032)
+#define RPC_NT_UNKNOWN_AUTHZ_SERVICE cpu_to_le32(0xC0020033)
+#define EPT_NT_INVALID_ENTRY cpu_to_le32(0xC0020034)
+#define EPT_NT_CANT_PERFORM_OP cpu_to_le32(0xC0020035)
+#define EPT_NT_NOT_REGISTERED cpu_to_le32(0xC0020036)
+#define RPC_NT_NOTHING_TO_EXPORT cpu_to_le32(0xC0020037)
+#define RPC_NT_INCOMPLETE_NAME cpu_to_le32(0xC0020038)
+#define RPC_NT_INVALID_VERS_OPTION cpu_to_le32(0xC0020039)
+#define RPC_NT_NO_MORE_MEMBERS cpu_to_le32(0xC002003A)
+#define RPC_NT_NOT_ALL_OBJS_UNEXPORTED cpu_to_le32(0xC002003B)
+#define RPC_NT_INTERFACE_NOT_FOUND cpu_to_le32(0xC002003C)
+#define RPC_NT_ENTRY_ALREADY_EXISTS cpu_to_le32(0xC002003D)
+#define RPC_NT_ENTRY_NOT_FOUND cpu_to_le32(0xC002003E)
+#define RPC_NT_NAME_SERVICE_UNAVAILABLE cpu_to_le32(0xC002003F)
+#define RPC_NT_INVALID_NAF_ID cpu_to_le32(0xC0020040)
+#define RPC_NT_CANNOT_SUPPORT cpu_to_le32(0xC0020041)
+#define RPC_NT_NO_CONTEXT_AVAILABLE cpu_to_le32(0xC0020042)
+#define RPC_NT_INTERNAL_ERROR cpu_to_le32(0xC0020043)
+#define RPC_NT_ZERO_DIVIDE cpu_to_le32(0xC0020044)
+#define RPC_NT_ADDRESS_ERROR cpu_to_le32(0xC0020045)
+#define RPC_NT_FP_DIV_ZERO cpu_to_le32(0xC0020046)
+#define RPC_NT_FP_UNDERFLOW cpu_to_le32(0xC0020047)
+#define RPC_NT_FP_OVERFLOW cpu_to_le32(0xC0020048)
+#define RPC_NT_CALL_IN_PROGRESS cpu_to_le32(0xC0020049)
+#define RPC_NT_NO_MORE_BINDINGS cpu_to_le32(0xC002004A)
+#define RPC_NT_GROUP_MEMBER_NOT_FOUND cpu_to_le32(0xC002004B)
+#define EPT_NT_CANT_CREATE cpu_to_le32(0xC002004C)
+#define RPC_NT_INVALID_OBJECT cpu_to_le32(0xC002004D)
+#define RPC_NT_NO_INTERFACES cpu_to_le32(0xC002004F)
+#define RPC_NT_CALL_CANCELLED cpu_to_le32(0xC0020050)
+#define RPC_NT_BINDING_INCOMPLETE cpu_to_le32(0xC0020051)
+#define RPC_NT_COMM_FAILURE cpu_to_le32(0xC0020052)
+#define RPC_NT_UNSUPPORTED_AUTHN_LEVEL cpu_to_le32(0xC0020053)
+#define RPC_NT_NO_PRINC_NAME cpu_to_le32(0xC0020054)
+#define RPC_NT_NOT_RPC_ERROR cpu_to_le32(0xC0020055)
+#define RPC_NT_SEC_PKG_ERROR cpu_to_le32(0xC0020057)
+#define RPC_NT_NOT_CANCELLED cpu_to_le32(0xC0020058)
+#define RPC_NT_INVALID_ASYNC_HANDLE cpu_to_le32(0xC0020062)
+#define RPC_NT_INVALID_ASYNC_CALL cpu_to_le32(0xC0020063)
+#define RPC_NT_PROXY_ACCESS_DENIED cpu_to_le32(0xC0020064)
+#define RPC_NT_NO_MORE_ENTRIES cpu_to_le32(0xC0030001)
+#define RPC_NT_SS_CHAR_TRANS_OPEN_FAIL cpu_to_le32(0xC0030002)
+#define RPC_NT_SS_CHAR_TRANS_SHORT_FILE cpu_to_le32(0xC0030003)
+#define RPC_NT_SS_IN_NULL_CONTEXT cpu_to_le32(0xC0030004)
+#define RPC_NT_SS_CONTEXT_MISMATCH cpu_to_le32(0xC0030005)
+#define RPC_NT_SS_CONTEXT_DAMAGED cpu_to_le32(0xC0030006)
+#define RPC_NT_SS_HANDLES_MISMATCH cpu_to_le32(0xC0030007)
+#define RPC_NT_SS_CANNOT_GET_CALL_HANDLE cpu_to_le32(0xC0030008)
+#define RPC_NT_NULL_REF_POINTER cpu_to_le32(0xC0030009)
+#define RPC_NT_ENUM_VALUE_OUT_OF_RANGE cpu_to_le32(0xC003000A)
+#define RPC_NT_BYTE_COUNT_TOO_SMALL cpu_to_le32(0xC003000B)
+#define RPC_NT_BAD_STUB_DATA cpu_to_le32(0xC003000C)
+#define RPC_NT_INVALID_ES_ACTION cpu_to_le32(0xC0030059)
+#define RPC_NT_WRONG_ES_VERSION cpu_to_le32(0xC003005A)
+#define RPC_NT_WRONG_STUB_VERSION cpu_to_le32(0xC003005B)
+#define RPC_NT_INVALID_PIPE_OBJECT cpu_to_le32(0xC003005C)
+#define RPC_NT_INVALID_PIPE_OPERATION cpu_to_le32(0xC003005D)
+#define RPC_NT_WRONG_PIPE_VERSION cpu_to_le32(0xC003005E)
+#define RPC_NT_PIPE_CLOSED cpu_to_le32(0xC003005F)
+#define RPC_NT_PIPE_DISCIPLINE_ERROR cpu_to_le32(0xC0030060)
+#define RPC_NT_PIPE_EMPTY cpu_to_le32(0xC0030061)
+#define STATUS_PNP_BAD_MPS_TABLE cpu_to_le32(0xC0040035)
+#define STATUS_PNP_TRANSLATION_FAILED cpu_to_le32(0xC0040036)
+#define STATUS_PNP_IRQ_TRANSLATION_FAILED cpu_to_le32(0xC0040037)
+#define STATUS_PNP_INVALID_ID cpu_to_le32(0xC0040038)
+#define STATUS_IO_REISSUE_AS_CACHED cpu_to_le32(0xC0040039)
+#define STATUS_CTX_WINSTATION_NAME_INVALID cpu_to_le32(0xC00A0001)
+#define STATUS_CTX_INVALID_PD cpu_to_le32(0xC00A0002)
+#define STATUS_CTX_PD_NOT_FOUND cpu_to_le32(0xC00A0003)
+#define STATUS_CTX_CLOSE_PENDING cpu_to_le32(0xC00A0006)
+#define STATUS_CTX_NO_OUTBUF cpu_to_le32(0xC00A0007)
+#define STATUS_CTX_MODEM_INF_NOT_FOUND cpu_to_le32(0xC00A0008)
+#define STATUS_CTX_INVALID_MODEMNAME cpu_to_le32(0xC00A0009)
+#define STATUS_CTX_RESPONSE_ERROR cpu_to_le32(0xC00A000A)
+#define STATUS_CTX_MODEM_RESPONSE_TIMEOUT cpu_to_le32(0xC00A000B)
+#define STATUS_CTX_MODEM_RESPONSE_NO_CARRIER cpu_to_le32(0xC00A000C)
+#define STATUS_CTX_MODEM_RESPONSE_NO_DIALTONE cpu_to_le32(0xC00A000D)
+#define STATUS_CTX_MODEM_RESPONSE_BUSY cpu_to_le32(0xC00A000E)
+#define STATUS_CTX_MODEM_RESPONSE_VOICE cpu_to_le32(0xC00A000F)
+#define STATUS_CTX_TD_ERROR cpu_to_le32(0xC00A0010)
+#define STATUS_CTX_LICENSE_CLIENT_INVALID cpu_to_le32(0xC00A0012)
+#define STATUS_CTX_LICENSE_NOT_AVAILABLE cpu_to_le32(0xC00A0013)
+#define STATUS_CTX_LICENSE_EXPIRED cpu_to_le32(0xC00A0014)
+#define STATUS_CTX_WINSTATION_NOT_FOUND cpu_to_le32(0xC00A0015)
+#define STATUS_CTX_WINSTATION_NAME_COLLISION cpu_to_le32(0xC00A0016)
+#define STATUS_CTX_WINSTATION_BUSY cpu_to_le32(0xC00A0017)
+#define STATUS_CTX_BAD_VIDEO_MODE cpu_to_le32(0xC00A0018)
+#define STATUS_CTX_GRAPHICS_INVALID cpu_to_le32(0xC00A0022)
+#define STATUS_CTX_NOT_CONSOLE cpu_to_le32(0xC00A0024)
+#define STATUS_CTX_CLIENT_QUERY_TIMEOUT cpu_to_le32(0xC00A0026)
+#define STATUS_CTX_CONSOLE_DISCONNECT cpu_to_le32(0xC00A0027)
+#define STATUS_CTX_CONSOLE_CONNECT cpu_to_le32(0xC00A0028)
+#define STATUS_CTX_SHADOW_DENIED cpu_to_le32(0xC00A002A)
+#define STATUS_CTX_WINSTATION_ACCESS_DENIED cpu_to_le32(0xC00A002B)
+#define STATUS_CTX_INVALID_WD cpu_to_le32(0xC00A002E)
+#define STATUS_CTX_WD_NOT_FOUND cpu_to_le32(0xC00A002F)
+#define STATUS_CTX_SHADOW_INVALID cpu_to_le32(0xC00A0030)
+#define STATUS_CTX_SHADOW_DISABLED cpu_to_le32(0xC00A0031)
+#define STATUS_RDP_PROTOCOL_ERROR cpu_to_le32(0xC00A0032)
+#define STATUS_CTX_CLIENT_LICENSE_NOT_SET cpu_to_le32(0xC00A0033)
+#define STATUS_CTX_CLIENT_LICENSE_IN_USE cpu_to_le32(0xC00A0034)
+#define STATUS_CTX_SHADOW_ENDED_BY_MODE_CHANGE cpu_to_le32(0xC00A0035)
+#define STATUS_CTX_SHADOW_NOT_RUNNING cpu_to_le32(0xC00A0036)
+#define STATUS_CTX_LOGON_DISABLED cpu_to_le32(0xC00A0037)
+#define STATUS_CTX_SECURITY_LAYER_ERROR cpu_to_le32(0xC00A0038)
+#define STATUS_TS_INCOMPATIBLE_SESSIONS cpu_to_le32(0xC00A0039)
+#define STATUS_MUI_FILE_NOT_FOUND cpu_to_le32(0xC00B0001)
+#define STATUS_MUI_INVALID_FILE cpu_to_le32(0xC00B0002)
+#define STATUS_MUI_INVALID_RC_CONFIG cpu_to_le32(0xC00B0003)
+#define STATUS_MUI_INVALID_LOCALE_NAME cpu_to_le32(0xC00B0004)
+#define STATUS_MUI_INVALID_ULTIMATEFALLBACK_NAME cpu_to_le32(0xC00B0005)
+#define STATUS_MUI_FILE_NOT_LOADED cpu_to_le32(0xC00B0006)
+#define STATUS_RESOURCE_ENUM_USER_STOP cpu_to_le32(0xC00B0007)
+#define STATUS_CLUSTER_INVALID_NODE cpu_to_le32(0xC0130001)
+#define STATUS_CLUSTER_NODE_EXISTS cpu_to_le32(0xC0130002)
+#define STATUS_CLUSTER_JOIN_IN_PROGRESS cpu_to_le32(0xC0130003)
+#define STATUS_CLUSTER_NODE_NOT_FOUND cpu_to_le32(0xC0130004)
+#define STATUS_CLUSTER_LOCAL_NODE_NOT_FOUND cpu_to_le32(0xC0130005)
+#define STATUS_CLUSTER_NETWORK_EXISTS cpu_to_le32(0xC0130006)
+#define STATUS_CLUSTER_NETWORK_NOT_FOUND cpu_to_le32(0xC0130007)
+#define STATUS_CLUSTER_NETINTERFACE_EXISTS cpu_to_le32(0xC0130008)
+#define STATUS_CLUSTER_NETINTERFACE_NOT_FOUND cpu_to_le32(0xC0130009)
+#define STATUS_CLUSTER_INVALID_REQUEST cpu_to_le32(0xC013000A)
+#define STATUS_CLUSTER_INVALID_NETWORK_PROVIDER cpu_to_le32(0xC013000B)
+#define STATUS_CLUSTER_NODE_DOWN cpu_to_le32(0xC013000C)
+#define STATUS_CLUSTER_NODE_UNREACHABLE cpu_to_le32(0xC013000D)
+#define STATUS_CLUSTER_NODE_NOT_MEMBER cpu_to_le32(0xC013000E)
+#define STATUS_CLUSTER_JOIN_NOT_IN_PROGRESS cpu_to_le32(0xC013000F)
+#define STATUS_CLUSTER_INVALID_NETWORK cpu_to_le32(0xC0130010)
+#define STATUS_CLUSTER_NO_NET_ADAPTERS cpu_to_le32(0xC0130011)
+#define STATUS_CLUSTER_NODE_UP cpu_to_le32(0xC0130012)
+#define STATUS_CLUSTER_NODE_PAUSED cpu_to_le32(0xC0130013)
+#define STATUS_CLUSTER_NODE_NOT_PAUSED cpu_to_le32(0xC0130014)
+#define STATUS_CLUSTER_NO_SECURITY_CONTEXT cpu_to_le32(0xC0130015)
+#define STATUS_CLUSTER_NETWORK_NOT_INTERNAL cpu_to_le32(0xC0130016)
+#define STATUS_CLUSTER_POISONED cpu_to_le32(0xC0130017)
+#define STATUS_ACPI_INVALID_OPCODE cpu_to_le32(0xC0140001)
+#define STATUS_ACPI_STACK_OVERFLOW cpu_to_le32(0xC0140002)
+#define STATUS_ACPI_ASSERT_FAILED cpu_to_le32(0xC0140003)
+#define STATUS_ACPI_INVALID_INDEX cpu_to_le32(0xC0140004)
+#define STATUS_ACPI_INVALID_ARGUMENT cpu_to_le32(0xC0140005)
+#define STATUS_ACPI_FATAL cpu_to_le32(0xC0140006)
+#define STATUS_ACPI_INVALID_SUPERNAME cpu_to_le32(0xC0140007)
+#define STATUS_ACPI_INVALID_ARGTYPE cpu_to_le32(0xC0140008)
+#define STATUS_ACPI_INVALID_OBJTYPE cpu_to_le32(0xC0140009)
+#define STATUS_ACPI_INVALID_TARGETTYPE cpu_to_le32(0xC014000A)
+#define STATUS_ACPI_INCORRECT_ARGUMENT_COUNT cpu_to_le32(0xC014000B)
+#define STATUS_ACPI_ADDRESS_NOT_MAPPED cpu_to_le32(0xC014000C)
+#define STATUS_ACPI_INVALID_EVENTTYPE cpu_to_le32(0xC014000D)
+#define STATUS_ACPI_HANDLER_COLLISION cpu_to_le32(0xC014000E)
+#define STATUS_ACPI_INVALID_DATA cpu_to_le32(0xC014000F)
+#define STATUS_ACPI_INVALID_REGION cpu_to_le32(0xC0140010)
+#define STATUS_ACPI_INVALID_ACCESS_SIZE cpu_to_le32(0xC0140011)
+#define STATUS_ACPI_ACQUIRE_GLOBAL_LOCK cpu_to_le32(0xC0140012)
+#define STATUS_ACPI_ALREADY_INITIALIZED cpu_to_le32(0xC0140013)
+#define STATUS_ACPI_NOT_INITIALIZED cpu_to_le32(0xC0140014)
+#define STATUS_ACPI_INVALID_MUTEX_LEVEL cpu_to_le32(0xC0140015)
+#define STATUS_ACPI_MUTEX_NOT_OWNED cpu_to_le32(0xC0140016)
+#define STATUS_ACPI_MUTEX_NOT_OWNER cpu_to_le32(0xC0140017)
+#define STATUS_ACPI_RS_ACCESS cpu_to_le32(0xC0140018)
+#define STATUS_ACPI_INVALID_TABLE cpu_to_le32(0xC0140019)
+#define STATUS_ACPI_REG_HANDLER_FAILED cpu_to_le32(0xC0140020)
+#define STATUS_ACPI_POWER_REQUEST_FAILED cpu_to_le32(0xC0140021)
+#define STATUS_SXS_SECTION_NOT_FOUND cpu_to_le32(0xC0150001)
+#define STATUS_SXS_CANT_GEN_ACTCTX cpu_to_le32(0xC0150002)
+#define STATUS_SXS_INVALID_ACTCTXDATA_FORMAT cpu_to_le32(0xC0150003)
+#define STATUS_SXS_ASSEMBLY_NOT_FOUND cpu_to_le32(0xC0150004)
+#define STATUS_SXS_MANIFEST_FORMAT_ERROR cpu_to_le32(0xC0150005)
+#define STATUS_SXS_MANIFEST_PARSE_ERROR cpu_to_le32(0xC0150006)
+#define STATUS_SXS_ACTIVATION_CONTEXT_DISABLED cpu_to_le32(0xC0150007)
+#define STATUS_SXS_KEY_NOT_FOUND cpu_to_le32(0xC0150008)
+#define STATUS_SXS_VERSION_CONFLICT cpu_to_le32(0xC0150009)
+#define STATUS_SXS_WRONG_SECTION_TYPE cpu_to_le32(0xC015000A)
+#define STATUS_SXS_THREAD_QUERIES_DISABLED cpu_to_le32(0xC015000B)
+#define STATUS_SXS_ASSEMBLY_MISSING cpu_to_le32(0xC015000C)
+#define STATUS_SXS_PROCESS_DEFAULT_ALREADY_SET cpu_to_le32(0xC015000E)
+#define STATUS_SXS_EARLY_DEACTIVATION cpu_to_le32(0xC015000F)
+#define STATUS_SXS_INVALID_DEACTIVATION cpu_to_le32(0xC0150010)
+#define STATUS_SXS_MULTIPLE_DEACTIVATION cpu_to_le32(0xC0150011)
+#define STATUS_SXS_SYSTEM_DEFAULT_ACTIVATION_CONTEXT_EMPTY \
+ cpu_to_le32(0xC0150012)
+#define STATUS_SXS_PROCESS_TERMINATION_REQUESTED cpu_to_le32(0xC0150013)
+#define STATUS_SXS_CORRUPT_ACTIVATION_STACK cpu_to_le32(0xC0150014)
+#define STATUS_SXS_CORRUPTION cpu_to_le32(0xC0150015)
+#define STATUS_SXS_INVALID_IDENTITY_ATTRIBUTE_VALUE cpu_to_le32(0xC0150016)
+#define STATUS_SXS_INVALID_IDENTITY_ATTRIBUTE_NAME cpu_to_le32(0xC0150017)
+#define STATUS_SXS_IDENTITY_DUPLICATE_ATTRIBUTE cpu_to_le32(0xC0150018)
+#define STATUS_SXS_IDENTITY_PARSE_ERROR cpu_to_le32(0xC0150019)
+#define STATUS_SXS_COMPONENT_STORE_CORRUPT cpu_to_le32(0xC015001A)
+#define STATUS_SXS_FILE_HASH_MISMATCH cpu_to_le32(0xC015001B)
+#define STATUS_SXS_MANIFEST_IDENTITY_SAME_BUT_CONTENTS_DIFFERENT \
+ cpu_to_le32(0xC015001C)
+#define STATUS_SXS_IDENTITIES_DIFFERENT cpu_to_le32(0xC015001D)
+#define STATUS_SXS_ASSEMBLY_IS_NOT_A_DEPLOYMENT cpu_to_le32(0xC015001E)
+#define STATUS_SXS_FILE_NOT_PART_OF_ASSEMBLY cpu_to_le32(0xC015001F)
+#define STATUS_ADVANCED_INSTALLER_FAILED cpu_to_le32(0xC0150020)
+#define STATUS_XML_ENCODING_MISMATCH cpu_to_le32(0xC0150021)
+#define STATUS_SXS_MANIFEST_TOO_BIG cpu_to_le32(0xC0150022)
+#define STATUS_SXS_SETTING_NOT_REGISTERED cpu_to_le32(0xC0150023)
+#define STATUS_SXS_TRANSACTION_CLOSURE_INCOMPLETE cpu_to_le32(0xC0150024)
+#define STATUS_SMI_PRIMITIVE_INSTALLER_FAILED cpu_to_le32(0xC0150025)
+#define STATUS_GENERIC_COMMAND_FAILED cpu_to_le32(0xC0150026)
+#define STATUS_SXS_FILE_HASH_MISSING cpu_to_le32(0xC0150027)
+#define STATUS_TRANSACTIONAL_CONFLICT cpu_to_le32(0xC0190001)
+#define STATUS_INVALID_TRANSACTION cpu_to_le32(0xC0190002)
+#define STATUS_TRANSACTION_NOT_ACTIVE cpu_to_le32(0xC0190003)
+#define STATUS_TM_INITIALIZATION_FAILED cpu_to_le32(0xC0190004)
+#define STATUS_RM_NOT_ACTIVE cpu_to_le32(0xC0190005)
+#define STATUS_RM_METADATA_CORRUPT cpu_to_le32(0xC0190006)
+#define STATUS_TRANSACTION_NOT_JOINED cpu_to_le32(0xC0190007)
+#define STATUS_DIRECTORY_NOT_RM cpu_to_le32(0xC0190008)
+#define STATUS_TRANSACTIONS_UNSUPPORTED_REMOTE cpu_to_le32(0xC019000A)
+#define STATUS_LOG_RESIZE_INVALID_SIZE cpu_to_le32(0xC019000B)
+#define STATUS_REMOTE_FILE_VERSION_MISMATCH cpu_to_le32(0xC019000C)
+#define STATUS_CRM_PROTOCOL_ALREADY_EXISTS cpu_to_le32(0xC019000F)
+#define STATUS_TRANSACTION_PROPAGATION_FAILED cpu_to_le32(0xC0190010)
+#define STATUS_CRM_PROTOCOL_NOT_FOUND cpu_to_le32(0xC0190011)
+#define STATUS_TRANSACTION_SUPERIOR_EXISTS cpu_to_le32(0xC0190012)
+#define STATUS_TRANSACTION_REQUEST_NOT_VALID cpu_to_le32(0xC0190013)
+#define STATUS_TRANSACTION_NOT_REQUESTED cpu_to_le32(0xC0190014)
+#define STATUS_TRANSACTION_ALREADY_ABORTED cpu_to_le32(0xC0190015)
+#define STATUS_TRANSACTION_ALREADY_COMMITTED cpu_to_le32(0xC0190016)
+#define STATUS_TRANSACTION_INVALID_MARSHALL_BUFFER cpu_to_le32(0xC0190017)
+#define STATUS_CURRENT_TRANSACTION_NOT_VALID cpu_to_le32(0xC0190018)
+#define STATUS_LOG_GROWTH_FAILED cpu_to_le32(0xC0190019)
+#define STATUS_OBJECT_NO_LONGER_EXISTS cpu_to_le32(0xC0190021)
+#define STATUS_STREAM_MINIVERSION_NOT_FOUND cpu_to_le32(0xC0190022)
+#define STATUS_STREAM_MINIVERSION_NOT_VALID cpu_to_le32(0xC0190023)
+#define STATUS_MINIVERSION_INACCESSIBLE_FROM_SPECIFIED_TRANSACTION \
+ cpu_to_le32(0xC0190024)
+#define STATUS_CANT_OPEN_MINIVERSION_WITH_MODIFY_INTENT cpu_to_le32(0xC0190025)
+#define STATUS_CANT_CREATE_MORE_STREAM_MINIVERSIONS cpu_to_le32(0xC0190026)
+#define STATUS_HANDLE_NO_LONGER_VALID cpu_to_le32(0xC0190028)
+#define STATUS_LOG_CORRUPTION_DETECTED cpu_to_le32(0xC0190030)
+#define STATUS_RM_DISCONNECTED cpu_to_le32(0xC0190032)
+#define STATUS_ENLISTMENT_NOT_SUPERIOR cpu_to_le32(0xC0190033)
+#define STATUS_FILE_IDENTITY_NOT_PERSISTENT cpu_to_le32(0xC0190036)
+#define STATUS_CANT_BREAK_TRANSACTIONAL_DEPENDENCY cpu_to_le32(0xC0190037)
+#define STATUS_CANT_CROSS_RM_BOUNDARY cpu_to_le32(0xC0190038)
+#define STATUS_TXF_DIR_NOT_EMPTY cpu_to_le32(0xC0190039)
+#define STATUS_INDOUBT_TRANSACTIONS_EXIST cpu_to_le32(0xC019003A)
+#define STATUS_TM_VOLATILE cpu_to_le32(0xC019003B)
+#define STATUS_ROLLBACK_TIMER_EXPIRED cpu_to_le32(0xC019003C)
+#define STATUS_TXF_ATTRIBUTE_CORRUPT cpu_to_le32(0xC019003D)
+#define STATUS_EFS_NOT_ALLOWED_IN_TRANSACTION cpu_to_le32(0xC019003E)
+#define STATUS_TRANSACTIONAL_OPEN_NOT_ALLOWED cpu_to_le32(0xC019003F)
+#define STATUS_TRANSACTED_MAPPING_UNSUPPORTED_REMOTE cpu_to_le32(0xC0190040)
+#define STATUS_TRANSACTION_REQUIRED_PROMOTION cpu_to_le32(0xC0190043)
+#define STATUS_CANNOT_EXECUTE_FILE_IN_TRANSACTION cpu_to_le32(0xC0190044)
+#define STATUS_TRANSACTIONS_NOT_FROZEN cpu_to_le32(0xC0190045)
+#define STATUS_TRANSACTION_FREEZE_IN_PROGRESS cpu_to_le32(0xC0190046)
+#define STATUS_NOT_SNAPSHOT_VOLUME cpu_to_le32(0xC0190047)
+#define STATUS_NO_SAVEPOINT_WITH_OPEN_FILES cpu_to_le32(0xC0190048)
+#define STATUS_SPARSE_NOT_ALLOWED_IN_TRANSACTION cpu_to_le32(0xC0190049)
+#define STATUS_TM_IDENTITY_MISMATCH cpu_to_le32(0xC019004A)
+#define STATUS_FLOATED_SECTION cpu_to_le32(0xC019004B)
+#define STATUS_CANNOT_ACCEPT_TRANSACTED_WORK cpu_to_le32(0xC019004C)
+#define STATUS_CANNOT_ABORT_TRANSACTIONS cpu_to_le32(0xC019004D)
+#define STATUS_TRANSACTION_NOT_FOUND cpu_to_le32(0xC019004E)
+#define STATUS_RESOURCEMANAGER_NOT_FOUND cpu_to_le32(0xC019004F)
+#define STATUS_ENLISTMENT_NOT_FOUND cpu_to_le32(0xC0190050)
+#define STATUS_TRANSACTIONMANAGER_NOT_FOUND cpu_to_le32(0xC0190051)
+#define STATUS_TRANSACTIONMANAGER_NOT_ONLINE cpu_to_le32(0xC0190052)
+#define STATUS_TRANSACTIONMANAGER_RECOVERY_NAME_COLLISION \
+ cpu_to_le32(0xC0190053)
+#define STATUS_TRANSACTION_NOT_ROOT cpu_to_le32(0xC0190054)
+#define STATUS_TRANSACTION_OBJECT_EXPIRED cpu_to_le32(0xC0190055)
+#define STATUS_COMPRESSION_NOT_ALLOWED_IN_TRANSACTION cpu_to_le32(0xC0190056)
+#define STATUS_TRANSACTION_RESPONSE_NOT_ENLISTED cpu_to_le32(0xC0190057)
+#define STATUS_TRANSACTION_RECORD_TOO_LONG cpu_to_le32(0xC0190058)
+#define STATUS_NO_LINK_TRACKING_IN_TRANSACTION cpu_to_le32(0xC0190059)
+#define STATUS_OPERATION_NOT_SUPPORTED_IN_TRANSACTION cpu_to_le32(0xC019005A)
+#define STATUS_TRANSACTION_INTEGRITY_VIOLATED cpu_to_le32(0xC019005B)
+#define STATUS_LOG_SECTOR_INVALID cpu_to_le32(0xC01A0001)
+#define STATUS_LOG_SECTOR_PARITY_INVALID cpu_to_le32(0xC01A0002)
+#define STATUS_LOG_SECTOR_REMAPPED cpu_to_le32(0xC01A0003)
+#define STATUS_LOG_BLOCK_INCOMPLETE cpu_to_le32(0xC01A0004)
+#define STATUS_LOG_INVALID_RANGE cpu_to_le32(0xC01A0005)
+#define STATUS_LOG_BLOCKS_EXHAUSTED cpu_to_le32(0xC01A0006)
+#define STATUS_LOG_READ_CONTEXT_INVALID cpu_to_le32(0xC01A0007)
+#define STATUS_LOG_RESTART_INVALID cpu_to_le32(0xC01A0008)
+#define STATUS_LOG_BLOCK_VERSION cpu_to_le32(0xC01A0009)
+#define STATUS_LOG_BLOCK_INVALID cpu_to_le32(0xC01A000A)
+#define STATUS_LOG_READ_MODE_INVALID cpu_to_le32(0xC01A000B)
+#define STATUS_LOG_METADATA_CORRUPT cpu_to_le32(0xC01A000D)
+#define STATUS_LOG_METADATA_INVALID cpu_to_le32(0xC01A000E)
+#define STATUS_LOG_METADATA_INCONSISTENT cpu_to_le32(0xC01A000F)
+#define STATUS_LOG_RESERVATION_INVALID cpu_to_le32(0xC01A0010)
+#define STATUS_LOG_CANT_DELETE cpu_to_le32(0xC01A0011)
+#define STATUS_LOG_CONTAINER_LIMIT_EXCEEDED cpu_to_le32(0xC01A0012)
+#define STATUS_LOG_START_OF_LOG cpu_to_le32(0xC01A0013)
+#define STATUS_LOG_POLICY_ALREADY_INSTALLED cpu_to_le32(0xC01A0014)
+#define STATUS_LOG_POLICY_NOT_INSTALLED cpu_to_le32(0xC01A0015)
+#define STATUS_LOG_POLICY_INVALID cpu_to_le32(0xC01A0016)
+#define STATUS_LOG_POLICY_CONFLICT cpu_to_le32(0xC01A0017)
+#define STATUS_LOG_PINNED_ARCHIVE_TAIL cpu_to_le32(0xC01A0018)
+#define STATUS_LOG_RECORD_NONEXISTENT cpu_to_le32(0xC01A0019)
+#define STATUS_LOG_RECORDS_RESERVED_INVALID cpu_to_le32(0xC01A001A)
+#define STATUS_LOG_SPACE_RESERVED_INVALID cpu_to_le32(0xC01A001B)
+#define STATUS_LOG_TAIL_INVALID cpu_to_le32(0xC01A001C)
+#define STATUS_LOG_FULL cpu_to_le32(0xC01A001D)
+#define STATUS_LOG_MULTIPLEXED cpu_to_le32(0xC01A001E)
+#define STATUS_LOG_DEDICATED cpu_to_le32(0xC01A001F)
+#define STATUS_LOG_ARCHIVE_NOT_IN_PROGRESS cpu_to_le32(0xC01A0020)
+#define STATUS_LOG_ARCHIVE_IN_PROGRESS cpu_to_le32(0xC01A0021)
+#define STATUS_LOG_EPHEMERAL cpu_to_le32(0xC01A0022)
+#define STATUS_LOG_NOT_ENOUGH_CONTAINERS cpu_to_le32(0xC01A0023)
+#define STATUS_LOG_CLIENT_ALREADY_REGISTERED cpu_to_le32(0xC01A0024)
+#define STATUS_LOG_CLIENT_NOT_REGISTERED cpu_to_le32(0xC01A0025)
+#define STATUS_LOG_FULL_HANDLER_IN_PROGRESS cpu_to_le32(0xC01A0026)
+#define STATUS_LOG_CONTAINER_READ_FAILED cpu_to_le32(0xC01A0027)
+#define STATUS_LOG_CONTAINER_WRITE_FAILED cpu_to_le32(0xC01A0028)
+#define STATUS_LOG_CONTAINER_OPEN_FAILED cpu_to_le32(0xC01A0029)
+#define STATUS_LOG_CONTAINER_STATE_INVALID cpu_to_le32(0xC01A002A)
+#define STATUS_LOG_STATE_INVALID cpu_to_le32(0xC01A002B)
+#define STATUS_LOG_PINNED cpu_to_le32(0xC01A002C)
+#define STATUS_LOG_METADATA_FLUSH_FAILED cpu_to_le32(0xC01A002D)
+#define STATUS_LOG_INCONSISTENT_SECURITY cpu_to_le32(0xC01A002E)
+#define STATUS_LOG_APPENDED_FLUSH_FAILED cpu_to_le32(0xC01A002F)
+#define STATUS_LOG_PINNED_RESERVATION cpu_to_le32(0xC01A0030)
+#define STATUS_VIDEO_HUNG_DISPLAY_DRIVER_THREAD cpu_to_le32(0xC01B00EA)
+#define STATUS_FLT_NO_HANDLER_DEFINED cpu_to_le32(0xC01C0001)
+#define STATUS_FLT_CONTEXT_ALREADY_DEFINED cpu_to_le32(0xC01C0002)
+#define STATUS_FLT_INVALID_ASYNCHRONOUS_REQUEST cpu_to_le32(0xC01C0003)
+#define STATUS_FLT_DISALLOW_FAST_IO cpu_to_le32(0xC01C0004)
+#define STATUS_FLT_INVALID_NAME_REQUEST cpu_to_le32(0xC01C0005)
+#define STATUS_FLT_NOT_SAFE_TO_POST_OPERATION cpu_to_le32(0xC01C0006)
+#define STATUS_FLT_NOT_INITIALIZED cpu_to_le32(0xC01C0007)
+#define STATUS_FLT_FILTER_NOT_READY cpu_to_le32(0xC01C0008)
+#define STATUS_FLT_POST_OPERATION_CLEANUP cpu_to_le32(0xC01C0009)
+#define STATUS_FLT_INTERNAL_ERROR cpu_to_le32(0xC01C000A)
+#define STATUS_FLT_DELETING_OBJECT cpu_to_le32(0xC01C000B)
+#define STATUS_FLT_MUST_BE_NONPAGED_POOL cpu_to_le32(0xC01C000C)
+#define STATUS_FLT_DUPLICATE_ENTRY cpu_to_le32(0xC01C000D)
+#define STATUS_FLT_CBDQ_DISABLED cpu_to_le32(0xC01C000E)
+#define STATUS_FLT_DO_NOT_ATTACH cpu_to_le32(0xC01C000F)
+#define STATUS_FLT_DO_NOT_DETACH cpu_to_le32(0xC01C0010)
+#define STATUS_FLT_INSTANCE_ALTITUDE_COLLISION cpu_to_le32(0xC01C0011)
+#define STATUS_FLT_INSTANCE_NAME_COLLISION cpu_to_le32(0xC01C0012)
+#define STATUS_FLT_FILTER_NOT_FOUND cpu_to_le32(0xC01C0013)
+#define STATUS_FLT_VOLUME_NOT_FOUND cpu_to_le32(0xC01C0014)
+#define STATUS_FLT_INSTANCE_NOT_FOUND cpu_to_le32(0xC01C0015)
+#define STATUS_FLT_CONTEXT_ALLOCATION_NOT_FOUND cpu_to_le32(0xC01C0016)
+#define STATUS_FLT_INVALID_CONTEXT_REGISTRATION cpu_to_le32(0xC01C0017)
+#define STATUS_FLT_NAME_CACHE_MISS cpu_to_le32(0xC01C0018)
+#define STATUS_FLT_NO_DEVICE_OBJECT cpu_to_le32(0xC01C0019)
+#define STATUS_FLT_VOLUME_ALREADY_MOUNTED cpu_to_le32(0xC01C001A)
+#define STATUS_FLT_ALREADY_ENLISTED cpu_to_le32(0xC01C001B)
+#define STATUS_FLT_CONTEXT_ALREADY_LINKED cpu_to_le32(0xC01C001C)
+#define STATUS_FLT_NO_WAITER_FOR_REPLY cpu_to_le32(0xC01C0020)
+#define STATUS_MONITOR_NO_DESCRIPTOR cpu_to_le32(0xC01D0001)
+#define STATUS_MONITOR_UNKNOWN_DESCRIPTOR_FORMAT cpu_to_le32(0xC01D0002)
+#define STATUS_MONITOR_INVALID_DESCRIPTOR_CHECKSUM cpu_to_le32(0xC01D0003)
+#define STATUS_MONITOR_INVALID_STANDARD_TIMING_BLOCK cpu_to_le32(0xC01D0004)
+#define STATUS_MONITOR_WMI_DATABLOCK_REGISTRATION_FAILED cpu_to_le32(0xC01D0005)
+#define STATUS_MONITOR_INVALID_SERIAL_NUMBER_MONDSC_BLOCK \
+ cpu_to_le32(0xC01D0006)
+#define STATUS_MONITOR_INVALID_USER_FRIENDLY_MONDSC_BLOCK \
+ cpu_to_le32(0xC01D0007)
+#define STATUS_MONITOR_NO_MORE_DESCRIPTOR_DATA cpu_to_le32(0xC01D0008)
+#define STATUS_MONITOR_INVALID_DETAILED_TIMING_BLOCK cpu_to_le32(0xC01D0009)
+#define STATUS_GRAPHICS_NOT_EXCLUSIVE_MODE_OWNER cpu_to_le32(0xC01E0000)
+#define STATUS_GRAPHICS_INSUFFICIENT_DMA_BUFFER cpu_to_le32(0xC01E0001)
+#define STATUS_GRAPHICS_INVALID_DISPLAY_ADAPTER cpu_to_le32(0xC01E0002)
+#define STATUS_GRAPHICS_ADAPTER_WAS_RESET cpu_to_le32(0xC01E0003)
+#define STATUS_GRAPHICS_INVALID_DRIVER_MODEL cpu_to_le32(0xC01E0004)
+#define STATUS_GRAPHICS_PRESENT_MODE_CHANGED cpu_to_le32(0xC01E0005)
+#define STATUS_GRAPHICS_PRESENT_OCCLUDED cpu_to_le32(0xC01E0006)
+#define STATUS_GRAPHICS_PRESENT_DENIED cpu_to_le32(0xC01E0007)
+#define STATUS_GRAPHICS_CANNOTCOLORCONVERT cpu_to_le32(0xC01E0008)
+#define STATUS_GRAPHICS_NO_VIDEO_MEMORY cpu_to_le32(0xC01E0100)
+#define STATUS_GRAPHICS_CANT_LOCK_MEMORY cpu_to_le32(0xC01E0101)
+#define STATUS_GRAPHICS_ALLOCATION_BUSY cpu_to_le32(0xC01E0102)
+#define STATUS_GRAPHICS_TOO_MANY_REFERENCES cpu_to_le32(0xC01E0103)
+#define STATUS_GRAPHICS_TRY_AGAIN_LATER cpu_to_le32(0xC01E0104)
+#define STATUS_GRAPHICS_TRY_AGAIN_NOW cpu_to_le32(0xC01E0105)
+#define STATUS_GRAPHICS_ALLOCATION_INVALID cpu_to_le32(0xC01E0106)
+#define STATUS_GRAPHICS_UNSWIZZLING_APERTURE_UNAVAILABLE cpu_to_le32(0xC01E0107)
+#define STATUS_GRAPHICS_UNSWIZZLING_APERTURE_UNSUPPORTED cpu_to_le32(0xC01E0108)
+#define STATUS_GRAPHICS_CANT_EVICT_PINNED_ALLOCATION cpu_to_le32(0xC01E0109)
+#define STATUS_GRAPHICS_INVALID_ALLOCATION_USAGE cpu_to_le32(0xC01E0110)
+#define STATUS_GRAPHICS_CANT_RENDER_LOCKED_ALLOCATION cpu_to_le32(0xC01E0111)
+#define STATUS_GRAPHICS_ALLOCATION_CLOSED cpu_to_le32(0xC01E0112)
+#define STATUS_GRAPHICS_INVALID_ALLOCATION_INSTANCE cpu_to_le32(0xC01E0113)
+#define STATUS_GRAPHICS_INVALID_ALLOCATION_HANDLE cpu_to_le32(0xC01E0114)
+#define STATUS_GRAPHICS_WRONG_ALLOCATION_DEVICE cpu_to_le32(0xC01E0115)
+#define STATUS_GRAPHICS_ALLOCATION_CONTENT_LOST cpu_to_le32(0xC01E0116)
+#define STATUS_GRAPHICS_GPU_EXCEPTION_ON_DEVICE cpu_to_le32(0xC01E0200)
+#define STATUS_GRAPHICS_INVALID_VIDPN_TOPOLOGY cpu_to_le32(0xC01E0300)
+#define STATUS_GRAPHICS_VIDPN_TOPOLOGY_NOT_SUPPORTED cpu_to_le32(0xC01E0301)
+#define STATUS_GRAPHICS_VIDPN_TOPOLOGY_CURRENTLY_NOT_SUPPORTED \
+ cpu_to_le32(0xC01E0302)
+#define STATUS_GRAPHICS_INVALID_VIDPN cpu_to_le32(0xC01E0303)
+#define STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_SOURCE cpu_to_le32(0xC01E0304)
+#define STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_TARGET cpu_to_le32(0xC01E0305)
+#define STATUS_GRAPHICS_VIDPN_MODALITY_NOT_SUPPORTED cpu_to_le32(0xC01E0306)
+#define STATUS_GRAPHICS_INVALID_VIDPN_SOURCEMODESET cpu_to_le32(0xC01E0308)
+#define STATUS_GRAPHICS_INVALID_VIDPN_TARGETMODESET cpu_to_le32(0xC01E0309)
+#define STATUS_GRAPHICS_INVALID_FREQUENCY cpu_to_le32(0xC01E030A)
+#define STATUS_GRAPHICS_INVALID_ACTIVE_REGION cpu_to_le32(0xC01E030B)
+#define STATUS_GRAPHICS_INVALID_TOTAL_REGION cpu_to_le32(0xC01E030C)
+#define STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_SOURCE_MODE \
+ cpu_to_le32(0xC01E0310)
+#define STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_TARGET_MODE \
+ cpu_to_le32(0xC01E0311)
+#define STATUS_GRAPHICS_PINNED_MODE_MUST_REMAIN_IN_SET cpu_to_le32(0xC01E0312)
+#define STATUS_GRAPHICS_PATH_ALREADY_IN_TOPOLOGY cpu_to_le32(0xC01E0313)
+#define STATUS_GRAPHICS_MODE_ALREADY_IN_MODESET cpu_to_le32(0xC01E0314)
+#define STATUS_GRAPHICS_INVALID_VIDEOPRESENTSOURCESET cpu_to_le32(0xC01E0315)
+#define STATUS_GRAPHICS_INVALID_VIDEOPRESENTTARGETSET cpu_to_le32(0xC01E0316)
+#define STATUS_GRAPHICS_SOURCE_ALREADY_IN_SET cpu_to_le32(0xC01E0317)
+#define STATUS_GRAPHICS_TARGET_ALREADY_IN_SET cpu_to_le32(0xC01E0318)
+#define STATUS_GRAPHICS_INVALID_VIDPN_PRESENT_PATH cpu_to_le32(0xC01E0319)
+#define STATUS_GRAPHICS_NO_RECOMMENDED_VIDPN_TOPOLOGY cpu_to_le32(0xC01E031A)
+#define STATUS_GRAPHICS_INVALID_MONITOR_FREQUENCYRANGESET \
+ cpu_to_le32(0xC01E031B)
+#define STATUS_GRAPHICS_INVALID_MONITOR_FREQUENCYRANGE cpu_to_le32(0xC01E031C)
+#define STATUS_GRAPHICS_FREQUENCYRANGE_NOT_IN_SET cpu_to_le32(0xC01E031D)
+#define STATUS_GRAPHICS_FREQUENCYRANGE_ALREADY_IN_SET cpu_to_le32(0xC01E031F)
+#define STATUS_GRAPHICS_STALE_MODESET cpu_to_le32(0xC01E0320)
+#define STATUS_GRAPHICS_INVALID_MONITOR_SOURCEMODESET cpu_to_le32(0xC01E0321)
+#define STATUS_GRAPHICS_INVALID_MONITOR_SOURCE_MODE cpu_to_le32(0xC01E0322)
+#define STATUS_GRAPHICS_NO_RECOMMENDED_FUNCTIONAL_VIDPN cpu_to_le32(0xC01E0323)
+#define STATUS_GRAPHICS_MODE_ID_MUST_BE_UNIQUE cpu_to_le32(0xC01E0324)
+#define STATUS_GRAPHICS_EMPTY_ADAPTER_MONITOR_MODE_SUPPORT_INTERSECTION \
+ cpu_to_le32(0xC01E0325)
+#define STATUS_GRAPHICS_VIDEO_PRESENT_TARGETS_LESS_THAN_SOURCES \
+ cpu_to_le32(0xC01E0326)
+#define STATUS_GRAPHICS_PATH_NOT_IN_TOPOLOGY cpu_to_le32(0xC01E0327)
+#define STATUS_GRAPHICS_ADAPTER_MUST_HAVE_AT_LEAST_ONE_SOURCE \
+ cpu_to_le32(0xC01E0328)
+#define STATUS_GRAPHICS_ADAPTER_MUST_HAVE_AT_LEAST_ONE_TARGET \
+ cpu_to_le32(0xC01E0329)
+#define STATUS_GRAPHICS_INVALID_MONITORDESCRIPTORSET cpu_to_le32(0xC01E032A)
+#define STATUS_GRAPHICS_INVALID_MONITORDESCRIPTOR cpu_to_le32(0xC01E032B)
+#define STATUS_GRAPHICS_MONITORDESCRIPTOR_NOT_IN_SET cpu_to_le32(0xC01E032C)
+#define STATUS_GRAPHICS_MONITORDESCRIPTOR_ALREADY_IN_SET cpu_to_le32(0xC01E032D)
+#define STATUS_GRAPHICS_MONITORDESCRIPTOR_ID_MUST_BE_UNIQUE \
+ cpu_to_le32(0xC01E032E)
+#define STATUS_GRAPHICS_INVALID_VIDPN_TARGET_SUBSET_TYPE cpu_to_le32(0xC01E032F)
+#define STATUS_GRAPHICS_RESOURCES_NOT_RELATED cpu_to_le32(0xC01E0330)
+#define STATUS_GRAPHICS_SOURCE_ID_MUST_BE_UNIQUE cpu_to_le32(0xC01E0331)
+#define STATUS_GRAPHICS_TARGET_ID_MUST_BE_UNIQUE cpu_to_le32(0xC01E0332)
+#define STATUS_GRAPHICS_NO_AVAILABLE_VIDPN_TARGET cpu_to_le32(0xC01E0333)
+#define STATUS_GRAPHICS_MONITOR_COULD_NOT_BE_ASSOCIATED_WITH_ADAPTER \
+ cpu_to_le32(0xC01E0334)
+#define STATUS_GRAPHICS_NO_VIDPNMGR cpu_to_le32(0xC01E0335)
+#define STATUS_GRAPHICS_NO_ACTIVE_VIDPN cpu_to_le32(0xC01E0336)
+#define STATUS_GRAPHICS_STALE_VIDPN_TOPOLOGY cpu_to_le32(0xC01E0337)
+#define STATUS_GRAPHICS_MONITOR_NOT_CONNECTED cpu_to_le32(0xC01E0338)
+#define STATUS_GRAPHICS_SOURCE_NOT_IN_TOPOLOGY cpu_to_le32(0xC01E0339)
+#define STATUS_GRAPHICS_INVALID_PRIMARYSURFACE_SIZE cpu_to_le32(0xC01E033A)
+#define STATUS_GRAPHICS_INVALID_VISIBLEREGION_SIZE cpu_to_le32(0xC01E033B)
+#define STATUS_GRAPHICS_INVALID_STRIDE cpu_to_le32(0xC01E033C)
+#define STATUS_GRAPHICS_INVALID_PIXELFORMAT cpu_to_le32(0xC01E033D)
+#define STATUS_GRAPHICS_INVALID_COLORBASIS cpu_to_le32(0xC01E033E)
+#define STATUS_GRAPHICS_INVALID_PIXELVALUEACCESSMODE cpu_to_le32(0xC01E033F)
+#define STATUS_GRAPHICS_TARGET_NOT_IN_TOPOLOGY cpu_to_le32(0xC01E0340)
+#define STATUS_GRAPHICS_NO_DISPLAY_MODE_MANAGEMENT_SUPPORT \
+ cpu_to_le32(0xC01E0341)
+#define STATUS_GRAPHICS_VIDPN_SOURCE_IN_USE cpu_to_le32(0xC01E0342)
+#define STATUS_GRAPHICS_CANT_ACCESS_ACTIVE_VIDPN cpu_to_le32(0xC01E0343)
+#define STATUS_GRAPHICS_INVALID_PATH_IMPORTANCE_ORDINAL cpu_to_le32(0xC01E0344)
+#define STATUS_GRAPHICS_INVALID_PATH_CONTENT_GEOMETRY_TRANSFORMATION \
+ cpu_to_le32(0xC01E0345)
+#define STATUS_GRAPHICS_PATH_CONTENT_GEOMETRY_TRANSFORMATION_NOT_SUPPORTED \
+ cpu_to_le32(0xC01E0346)
+#define STATUS_GRAPHICS_INVALID_GAMMA_RAMP cpu_to_le32(0xC01E0347)
+#define STATUS_GRAPHICS_GAMMA_RAMP_NOT_SUPPORTED cpu_to_le32(0xC01E0348)
+#define STATUS_GRAPHICS_MULTISAMPLING_NOT_SUPPORTED cpu_to_le32(0xC01E0349)
+#define STATUS_GRAPHICS_MODE_NOT_IN_MODESET cpu_to_le32(0xC01E034A)
+#define STATUS_GRAPHICS_INVALID_VIDPN_TOPOLOGY_RECOMMENDATION_REASON \
+ cpu_to_le32(0xC01E034D)
+#define STATUS_GRAPHICS_INVALID_PATH_CONTENT_TYPE cpu_to_le32(0xC01E034E)
+#define STATUS_GRAPHICS_INVALID_COPYPROTECTION_TYPE cpu_to_le32(0xC01E034F)
+#define STATUS_GRAPHICS_UNASSIGNED_MODESET_ALREADY_EXISTS \
+ cpu_to_le32(0xC01E0350)
+#define STATUS_GRAPHICS_INVALID_SCANLINE_ORDERING cpu_to_le32(0xC01E0352)
+#define STATUS_GRAPHICS_TOPOLOGY_CHANGES_NOT_ALLOWED cpu_to_le32(0xC01E0353)
+#define STATUS_GRAPHICS_NO_AVAILABLE_IMPORTANCE_ORDINALS cpu_to_le32(0xC01E0354)
+#define STATUS_GRAPHICS_INCOMPATIBLE_PRIVATE_FORMAT cpu_to_le32(0xC01E0355)
+#define STATUS_GRAPHICS_INVALID_MODE_PRUNING_ALGORITHM cpu_to_le32(0xC01E0356)
+#define STATUS_GRAPHICS_INVALID_MONITOR_CAPABILITY_ORIGIN \
+ cpu_to_le32(0xC01E0357)
+#define STATUS_GRAPHICS_INVALID_MONITOR_FREQUENCYRANGE_CONSTRAINT \
+ cpu_to_le32(0xC01E0358)
+#define STATUS_GRAPHICS_MAX_NUM_PATHS_REACHED cpu_to_le32(0xC01E0359)
+#define STATUS_GRAPHICS_CANCEL_VIDPN_TOPOLOGY_AUGMENTATION \
+ cpu_to_le32(0xC01E035A)
+#define STATUS_GRAPHICS_INVALID_CLIENT_TYPE cpu_to_le32(0xC01E035B)
+#define STATUS_GRAPHICS_CLIENTVIDPN_NOT_SET cpu_to_le32(0xC01E035C)
+#define STATUS_GRAPHICS_SPECIFIED_CHILD_ALREADY_CONNECTED \
+ cpu_to_le32(0xC01E0400)
+#define STATUS_GRAPHICS_CHILD_DESCRIPTOR_NOT_SUPPORTED cpu_to_le32(0xC01E0401)
+#define STATUS_GRAPHICS_NOT_A_LINKED_ADAPTER cpu_to_le32(0xC01E0430)
+#define STATUS_GRAPHICS_LEADLINK_NOT_ENUMERATED cpu_to_le32(0xC01E0431)
+#define STATUS_GRAPHICS_CHAINLINKS_NOT_ENUMERATED cpu_to_le32(0xC01E0432)
+#define STATUS_GRAPHICS_ADAPTER_CHAIN_NOT_READY cpu_to_le32(0xC01E0433)
+#define STATUS_GRAPHICS_CHAINLINKS_NOT_STARTED cpu_to_le32(0xC01E0434)
+#define STATUS_GRAPHICS_CHAINLINKS_NOT_POWERED_ON cpu_to_le32(0xC01E0435)
+#define STATUS_GRAPHICS_INCONSISTENT_DEVICE_LINK_STATE cpu_to_le32(0xC01E0436)
+#define STATUS_GRAPHICS_NOT_POST_DEVICE_DRIVER cpu_to_le32(0xC01E0438)
+#define STATUS_GRAPHICS_ADAPTER_ACCESS_NOT_EXCLUDED cpu_to_le32(0xC01E043B)
+#define STATUS_GRAPHICS_OPM_PROTECTED_OUTPUT_DOES_NOT_HAVE_COPP_SEMANTICS \
+ cpu_to_le32(0xC01E051C)
+#define STATUS_GRAPHICS_OPM_INVALID_INFORMATION_REQUEST cpu_to_le32(0xC01E051D)
+#define STATUS_GRAPHICS_OPM_DRIVER_INTERNAL_ERROR cpu_to_le32(0xC01E051E)
+#define STATUS_GRAPHICS_OPM_PROTECTED_OUTPUT_DOES_NOT_HAVE_OPM_SEMANTICS \
+ cpu_to_le32(0xC01E051F)
+#define STATUS_GRAPHICS_OPM_SIGNALING_NOT_SUPPORTED cpu_to_le32(0xC01E0520)
+#define STATUS_GRAPHICS_OPM_INVALID_CONFIGURATION_REQUEST \
+ cpu_to_le32(0xC01E0521)
+#define STATUS_GRAPHICS_OPM_NOT_SUPPORTED cpu_to_le32(0xC01E0500)
+#define STATUS_GRAPHICS_COPP_NOT_SUPPORTED cpu_to_le32(0xC01E0501)
+#define STATUS_GRAPHICS_UAB_NOT_SUPPORTED cpu_to_le32(0xC01E0502)
+#define STATUS_GRAPHICS_OPM_INVALID_ENCRYPTED_PARAMETERS cpu_to_le32(0xC01E0503)
+#define STATUS_GRAPHICS_OPM_PARAMETER_ARRAY_TOO_SMALL cpu_to_le32(0xC01E0504)
+#define STATUS_GRAPHICS_OPM_NO_PROTECTED_OUTPUTS_EXIST cpu_to_le32(0xC01E0505)
+#define STATUS_GRAPHICS_PVP_NO_DISPLAY_DEVICE_CORRESPONDS_TO_NAME \
+ cpu_to_le32(0xC01E0506)
+#define STATUS_GRAPHICS_PVP_DISPLAY_DEVICE_NOT_ATTACHED_TO_DESKTOP \
+ cpu_to_le32(0xC01E0507)
+#define STATUS_GRAPHICS_PVP_MIRRORING_DEVICES_NOT_SUPPORTED \
+ cpu_to_le32(0xC01E0508)
+#define STATUS_GRAPHICS_OPM_INVALID_POINTER cpu_to_le32(0xC01E050A)
+#define STATUS_GRAPHICS_OPM_INTERNAL_ERROR cpu_to_le32(0xC01E050B)
+#define STATUS_GRAPHICS_OPM_INVALID_HANDLE cpu_to_le32(0xC01E050C)
+#define STATUS_GRAPHICS_PVP_NO_MONITORS_CORRESPOND_TO_DISPLAY_DEVICE \
+ cpu_to_le32(0xC01E050D)
+#define STATUS_GRAPHICS_PVP_INVALID_CERTIFICATE_LENGTH cpu_to_le32(0xC01E050E)
+#define STATUS_GRAPHICS_OPM_SPANNING_MODE_ENABLED cpu_to_le32(0xC01E050F)
+#define STATUS_GRAPHICS_OPM_THEATER_MODE_ENABLED cpu_to_le32(0xC01E0510)
+#define STATUS_GRAPHICS_PVP_HFS_FAILED cpu_to_le32(0xC01E0511)
+#define STATUS_GRAPHICS_OPM_INVALID_SRM cpu_to_le32(0xC01E0512)
+#define STATUS_GRAPHICS_OPM_OUTPUT_DOES_NOT_SUPPORT_HDCP cpu_to_le32(0xC01E0513)
+#define STATUS_GRAPHICS_OPM_OUTPUT_DOES_NOT_SUPPORT_ACP cpu_to_le32(0xC01E0514)
+#define STATUS_GRAPHICS_OPM_OUTPUT_DOES_NOT_SUPPORT_CGMSA \
+ cpu_to_le32(0xC01E0515)
+#define STATUS_GRAPHICS_OPM_HDCP_SRM_NEVER_SET cpu_to_le32(0xC01E0516)
+#define STATUS_GRAPHICS_OPM_RESOLUTION_TOO_HIGH cpu_to_le32(0xC01E0517)
+#define STATUS_GRAPHICS_OPM_ALL_HDCP_HARDWARE_ALREADY_IN_USE \
+ cpu_to_le32(0xC01E0518)
+#define STATUS_GRAPHICS_OPM_PROTECTED_OUTPUT_NO_LONGER_EXISTS \
+ cpu_to_le32(0xC01E051A)
+#define STATUS_GRAPHICS_OPM_SESSION_TYPE_CHANGE_IN_PROGRESS \
+ cpu_to_le32(0xC01E051B)
+#define STATUS_GRAPHICS_I2C_NOT_SUPPORTED cpu_to_le32(0xC01E0580)
+#define STATUS_GRAPHICS_I2C_DEVICE_DOES_NOT_EXIST cpu_to_le32(0xC01E0581)
+#define STATUS_GRAPHICS_I2C_ERROR_TRANSMITTING_DATA cpu_to_le32(0xC01E0582)
+#define STATUS_GRAPHICS_I2C_ERROR_RECEIVING_DATA cpu_to_le32(0xC01E0583)
+#define STATUS_GRAPHICS_DDCCI_VCP_NOT_SUPPORTED cpu_to_le32(0xC01E0584)
+#define STATUS_GRAPHICS_DDCCI_INVALID_DATA cpu_to_le32(0xC01E0585)
+#define STATUS_GRAPHICS_DDCCI_MONITOR_RETURNED_INVALID_TIMING_STATUS_BYTE \
+ cpu_to_le32(0xC01E0586)
+#define STATUS_GRAPHICS_DDCCI_INVALID_CAPABILITIES_STRING \
+ cpu_to_le32(0xC01E0587)
+#define STATUS_GRAPHICS_MCA_INTERNAL_ERROR cpu_to_le32(0xC01E0588)
+#define STATUS_GRAPHICS_DDCCI_INVALID_MESSAGE_COMMAND cpu_to_le32(0xC01E0589)
+#define STATUS_GRAPHICS_DDCCI_INVALID_MESSAGE_LENGTH cpu_to_le32(0xC01E058A)
+#define STATUS_GRAPHICS_DDCCI_INVALID_MESSAGE_CHECKSUM cpu_to_le32(0xC01E058B)
+#define STATUS_GRAPHICS_INVALID_PHYSICAL_MONITOR_HANDLE cpu_to_le32(0xC01E058C)
+#define STATUS_GRAPHICS_MONITOR_NO_LONGER_EXISTS cpu_to_le32(0xC01E058D)
+#define STATUS_GRAPHICS_ONLY_CONSOLE_SESSION_SUPPORTED cpu_to_le32(0xC01E05E0)
+#define STATUS_GRAPHICS_NO_DISPLAY_DEVICE_CORRESPONDS_TO_NAME \
+ cpu_to_le32(0xC01E05E1)
+#define STATUS_GRAPHICS_DISPLAY_DEVICE_NOT_ATTACHED_TO_DESKTOP \
+ cpu_to_le32(0xC01E05E2)
+#define STATUS_GRAPHICS_MIRRORING_DEVICES_NOT_SUPPORTED cpu_to_le32(0xC01E05E3)
+#define STATUS_GRAPHICS_INVALID_POINTER cpu_to_le32(0xC01E05E4)
+#define STATUS_GRAPHICS_NO_MONITORS_CORRESPOND_TO_DISPLAY_DEVICE \
+ cpu_to_le32(0xC01E05E5)
+#define STATUS_GRAPHICS_PARAMETER_ARRAY_TOO_SMALL cpu_to_le32(0xC01E05E6)
+#define STATUS_GRAPHICS_INTERNAL_ERROR cpu_to_le32(0xC01E05E7)
+#define STATUS_GRAPHICS_SESSION_TYPE_CHANGE_IN_PROGRESS cpu_to_le32(0xC01E05E8)
+#define STATUS_FVE_LOCKED_VOLUME cpu_to_le32(0xC0210000)
+#define STATUS_FVE_NOT_ENCRYPTED cpu_to_le32(0xC0210001)
+#define STATUS_FVE_BAD_INFORMATION cpu_to_le32(0xC0210002)
+#define STATUS_FVE_TOO_SMALL cpu_to_le32(0xC0210003)
+#define STATUS_FVE_FAILED_WRONG_FS cpu_to_le32(0xC0210004)
+#define STATUS_FVE_FAILED_BAD_FS cpu_to_le32(0xC0210005)
+#define STATUS_FVE_FS_NOT_EXTENDED cpu_to_le32(0xC0210006)
+#define STATUS_FVE_FS_MOUNTED cpu_to_le32(0xC0210007)
+#define STATUS_FVE_NO_LICENSE cpu_to_le32(0xC0210008)
+#define STATUS_FVE_ACTION_NOT_ALLOWED cpu_to_le32(0xC0210009)
+#define STATUS_FVE_BAD_DATA cpu_to_le32(0xC021000A)
+#define STATUS_FVE_VOLUME_NOT_BOUND cpu_to_le32(0xC021000B)
+#define STATUS_FVE_NOT_DATA_VOLUME cpu_to_le32(0xC021000C)
+#define STATUS_FVE_CONV_READ_ERROR cpu_to_le32(0xC021000D)
+#define STATUS_FVE_CONV_WRITE_ERROR cpu_to_le32(0xC021000E)
+#define STATUS_FVE_OVERLAPPED_UPDATE cpu_to_le32(0xC021000F)
+#define STATUS_FVE_FAILED_SECTOR_SIZE cpu_to_le32(0xC0210010)
+#define STATUS_FVE_FAILED_AUTHENTICATION cpu_to_le32(0xC0210011)
+#define STATUS_FVE_NOT_OS_VOLUME cpu_to_le32(0xC0210012)
+#define STATUS_FVE_KEYFILE_NOT_FOUND cpu_to_le32(0xC0210013)
+#define STATUS_FVE_KEYFILE_INVALID cpu_to_le32(0xC0210014)
+#define STATUS_FVE_KEYFILE_NO_VMK cpu_to_le32(0xC0210015)
+#define STATUS_FVE_TPM_DISABLED cpu_to_le32(0xC0210016)
+#define STATUS_FVE_TPM_SRK_AUTH_NOT_ZERO cpu_to_le32(0xC0210017)
+#define STATUS_FVE_TPM_INVALID_PCR cpu_to_le32(0xC0210018)
+#define STATUS_FVE_TPM_NO_VMK cpu_to_le32(0xC0210019)
+#define STATUS_FVE_PIN_INVALID cpu_to_le32(0xC021001A)
+#define STATUS_FVE_AUTH_INVALID_APPLICATION cpu_to_le32(0xC021001B)
+#define STATUS_FVE_AUTH_INVALID_CONFIG cpu_to_le32(0xC021001C)
+#define STATUS_FVE_DEBUGGER_ENABLED cpu_to_le32(0xC021001D)
+#define STATUS_FVE_DRY_RUN_FAILED cpu_to_le32(0xC021001E)
+#define STATUS_FVE_BAD_METADATA_POINTER cpu_to_le32(0xC021001F)
+#define STATUS_FVE_OLD_METADATA_COPY cpu_to_le32(0xC0210020)
+#define STATUS_FVE_REBOOT_REQUIRED cpu_to_le32(0xC0210021)
+#define STATUS_FVE_RAW_ACCESS cpu_to_le32(0xC0210022)
+#define STATUS_FVE_RAW_BLOCKED cpu_to_le32(0xC0210023)
+#define STATUS_FWP_CALLOUT_NOT_FOUND cpu_to_le32(0xC0220001)
+#define STATUS_FWP_CONDITION_NOT_FOUND cpu_to_le32(0xC0220002)
+#define STATUS_FWP_FILTER_NOT_FOUND cpu_to_le32(0xC0220003)
+#define STATUS_FWP_LAYER_NOT_FOUND cpu_to_le32(0xC0220004)
+#define STATUS_FWP_PROVIDER_NOT_FOUND cpu_to_le32(0xC0220005)
+#define STATUS_FWP_PROVIDER_CONTEXT_NOT_FOUND cpu_to_le32(0xC0220006)
+#define STATUS_FWP_SUBLAYER_NOT_FOUND cpu_to_le32(0xC0220007)
+#define STATUS_FWP_NOT_FOUND cpu_to_le32(0xC0220008)
+#define STATUS_FWP_ALREADY_EXISTS cpu_to_le32(0xC0220009)
+#define STATUS_FWP_IN_USE cpu_to_le32(0xC022000A)
+#define STATUS_FWP_DYNAMIC_SESSION_IN_PROGRESS cpu_to_le32(0xC022000B)
+#define STATUS_FWP_WRONG_SESSION cpu_to_le32(0xC022000C)
+#define STATUS_FWP_NO_TXN_IN_PROGRESS cpu_to_le32(0xC022000D)
+#define STATUS_FWP_TXN_IN_PROGRESS cpu_to_le32(0xC022000E)
+#define STATUS_FWP_TXN_ABORTED cpu_to_le32(0xC022000F)
+#define STATUS_FWP_SESSION_ABORTED cpu_to_le32(0xC0220010)
+#define STATUS_FWP_INCOMPATIBLE_TXN cpu_to_le32(0xC0220011)
+#define STATUS_FWP_TIMEOUT cpu_to_le32(0xC0220012)
+#define STATUS_FWP_NET_EVENTS_DISABLED cpu_to_le32(0xC0220013)
+#define STATUS_FWP_INCOMPATIBLE_LAYER cpu_to_le32(0xC0220014)
+#define STATUS_FWP_KM_CLIENTS_ONLY cpu_to_le32(0xC0220015)
+#define STATUS_FWP_LIFETIME_MISMATCH cpu_to_le32(0xC0220016)
+#define STATUS_FWP_BUILTIN_OBJECT cpu_to_le32(0xC0220017)
+#define STATUS_FWP_TOO_MANY_BOOTTIME_FILTERS cpu_to_le32(0xC0220018)
+#define STATUS_FWP_TOO_MANY_CALLOUTS cpu_to_le32(0xC0220018)
+#define STATUS_FWP_NOTIFICATION_DROPPED cpu_to_le32(0xC0220019)
+#define STATUS_FWP_TRAFFIC_MISMATCH cpu_to_le32(0xC022001A)
+#define STATUS_FWP_INCOMPATIBLE_SA_STATE cpu_to_le32(0xC022001B)
+#define STATUS_FWP_NULL_POINTER cpu_to_le32(0xC022001C)
+#define STATUS_FWP_INVALID_ENUMERATOR cpu_to_le32(0xC022001D)
+#define STATUS_FWP_INVALID_FLAGS cpu_to_le32(0xC022001E)
+#define STATUS_FWP_INVALID_NET_MASK cpu_to_le32(0xC022001F)
+#define STATUS_FWP_INVALID_RANGE cpu_to_le32(0xC0220020)
+#define STATUS_FWP_INVALID_INTERVAL cpu_to_le32(0xC0220021)
+#define STATUS_FWP_ZERO_LENGTH_ARRAY cpu_to_le32(0xC0220022)
+#define STATUS_FWP_NULL_DISPLAY_NAME cpu_to_le32(0xC0220023)
+#define STATUS_FWP_INVALID_ACTION_TYPE cpu_to_le32(0xC0220024)
+#define STATUS_FWP_INVALID_WEIGHT cpu_to_le32(0xC0220025)
+#define STATUS_FWP_MATCH_TYPE_MISMATCH cpu_to_le32(0xC0220026)
+#define STATUS_FWP_TYPE_MISMATCH cpu_to_le32(0xC0220027)
+#define STATUS_FWP_OUT_OF_BOUNDS cpu_to_le32(0xC0220028)
+#define STATUS_FWP_RESERVED cpu_to_le32(0xC0220029)
+#define STATUS_FWP_DUPLICATE_CONDITION cpu_to_le32(0xC022002A)
+#define STATUS_FWP_DUPLICATE_KEYMOD cpu_to_le32(0xC022002B)
+#define STATUS_FWP_ACTION_INCOMPATIBLE_WITH_LAYER cpu_to_le32(0xC022002C)
+#define STATUS_FWP_ACTION_INCOMPATIBLE_WITH_SUBLAYER cpu_to_le32(0xC022002D)
+#define STATUS_FWP_CONTEXT_INCOMPATIBLE_WITH_LAYER cpu_to_le32(0xC022002E)
+#define STATUS_FWP_CONTEXT_INCOMPATIBLE_WITH_CALLOUT cpu_to_le32(0xC022002F)
+#define STATUS_FWP_INCOMPATIBLE_AUTH_METHOD cpu_to_le32(0xC0220030)
+#define STATUS_FWP_INCOMPATIBLE_DH_GROUP cpu_to_le32(0xC0220031)
+#define STATUS_FWP_EM_NOT_SUPPORTED cpu_to_le32(0xC0220032)
+#define STATUS_FWP_NEVER_MATCH cpu_to_le32(0xC0220033)
+#define STATUS_FWP_PROVIDER_CONTEXT_MISMATCH cpu_to_le32(0xC0220034)
+#define STATUS_FWP_INVALID_PARAMETER cpu_to_le32(0xC0220035)
+#define STATUS_FWP_TOO_MANY_SUBLAYERS cpu_to_le32(0xC0220036)
+#define STATUS_FWP_CALLOUT_NOTIFICATION_FAILED cpu_to_le32(0xC0220037)
+#define STATUS_FWP_INCOMPATIBLE_AUTH_CONFIG cpu_to_le32(0xC0220038)
+#define STATUS_FWP_INCOMPATIBLE_CIPHER_CONFIG cpu_to_le32(0xC0220039)
+#define STATUS_FWP_TCPIP_NOT_READY cpu_to_le32(0xC0220100)
+#define STATUS_FWP_INJECT_HANDLE_CLOSING cpu_to_le32(0xC0220101)
+#define STATUS_FWP_INJECT_HANDLE_STALE cpu_to_le32(0xC0220102)
+#define STATUS_FWP_CANNOT_PEND cpu_to_le32(0xC0220103)
+#define STATUS_NDIS_CLOSING cpu_to_le32(0xC0230002)
+#define STATUS_NDIS_BAD_VERSION cpu_to_le32(0xC0230004)
+#define STATUS_NDIS_BAD_CHARACTERISTICS cpu_to_le32(0xC0230005)
+#define STATUS_NDIS_ADAPTER_NOT_FOUND cpu_to_le32(0xC0230006)
+#define STATUS_NDIS_OPEN_FAILED cpu_to_le32(0xC0230007)
+#define STATUS_NDIS_DEVICE_FAILED cpu_to_le32(0xC0230008)
+#define STATUS_NDIS_MULTICAST_FULL cpu_to_le32(0xC0230009)
+#define STATUS_NDIS_MULTICAST_EXISTS cpu_to_le32(0xC023000A)
+#define STATUS_NDIS_MULTICAST_NOT_FOUND cpu_to_le32(0xC023000B)
+#define STATUS_NDIS_REQUEST_ABORTED cpu_to_le32(0xC023000C)
+#define STATUS_NDIS_RESET_IN_PROGRESS cpu_to_le32(0xC023000D)
+#define STATUS_NDIS_INVALID_PACKET cpu_to_le32(0xC023000F)
+#define STATUS_NDIS_INVALID_DEVICE_REQUEST cpu_to_le32(0xC0230010)
+#define STATUS_NDIS_ADAPTER_NOT_READY cpu_to_le32(0xC0230011)
+#define STATUS_NDIS_INVALID_LENGTH cpu_to_le32(0xC0230014)
+#define STATUS_NDIS_INVALID_DATA cpu_to_le32(0xC0230015)
+#define STATUS_NDIS_BUFFER_TOO_SHORT cpu_to_le32(0xC0230016)
+#define STATUS_NDIS_INVALID_OID cpu_to_le32(0xC0230017)
+#define STATUS_NDIS_ADAPTER_REMOVED cpu_to_le32(0xC0230018)
+#define STATUS_NDIS_UNSUPPORTED_MEDIA cpu_to_le32(0xC0230019)
+#define STATUS_NDIS_GROUP_ADDRESS_IN_USE cpu_to_le32(0xC023001A)
+#define STATUS_NDIS_FILE_NOT_FOUND cpu_to_le32(0xC023001B)
+#define STATUS_NDIS_ERROR_READING_FILE cpu_to_le32(0xC023001C)
+#define STATUS_NDIS_ALREADY_MAPPED cpu_to_le32(0xC023001D)
+#define STATUS_NDIS_RESOURCE_CONFLICT cpu_to_le32(0xC023001E)
+#define STATUS_NDIS_MEDIA_DISCONNECTED cpu_to_le32(0xC023001F)
+#define STATUS_NDIS_INVALID_ADDRESS cpu_to_le32(0xC0230022)
+#define STATUS_NDIS_PAUSED cpu_to_le32(0xC023002A)
+#define STATUS_NDIS_INTERFACE_NOT_FOUND cpu_to_le32(0xC023002B)
+#define STATUS_NDIS_UNSUPPORTED_REVISION cpu_to_le32(0xC023002C)
+#define STATUS_NDIS_INVALID_PORT cpu_to_le32(0xC023002D)
+#define STATUS_NDIS_INVALID_PORT_STATE cpu_to_le32(0xC023002E)
+#define STATUS_NDIS_LOW_POWER_STATE cpu_to_le32(0xC023002F)
+#define STATUS_NDIS_NOT_SUPPORTED cpu_to_le32(0xC02300BB)
+#define STATUS_NDIS_DOT11_AUTO_CONFIG_ENABLED cpu_to_le32(0xC0232000)
+#define STATUS_NDIS_DOT11_MEDIA_IN_USE cpu_to_le32(0xC0232001)
+#define STATUS_NDIS_DOT11_POWER_STATE_INVALID cpu_to_le32(0xC0232002)
+#define STATUS_IPSEC_BAD_SPI cpu_to_le32(0xC0360001)
+#define STATUS_IPSEC_SA_LIFETIME_EXPIRED cpu_to_le32(0xC0360002)
+#define STATUS_IPSEC_WRONG_SA cpu_to_le32(0xC0360003)
+#define STATUS_IPSEC_REPLAY_CHECK_FAILED cpu_to_le32(0xC0360004)
+#define STATUS_IPSEC_INVALID_PACKET cpu_to_le32(0xC0360005)
+#define STATUS_IPSEC_INTEGRITY_CHECK_FAILED cpu_to_le32(0xC0360006)
+#define STATUS_IPSEC_CLEAR_TEXT_DROP cpu_to_le32(0xC0360007)
+
+#define STATUS_NO_PREAUTH_INTEGRITY_HASH_OVERLAP cpu_to_le32(0xC05D0000)
+#define STATUS_INVALID_LOCK_RANGE cpu_to_le32(0xC00001a1)
diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c
new file mode 100644
index 000000000000..44aea33a67fa
--- /dev/null
+++ b/fs/ksmbd/transport_ipc.c
@@ -0,0 +1,874 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
+#include <linux/hashtable.h>
+#include <net/net_namespace.h>
+#include <net/genetlink.h>
+#include <linux/socket.h>
+#include <linux/workqueue.h>
+
+#include "vfs_cache.h"
+#include "transport_ipc.h"
+#include "server.h"
+#include "smb_common.h"
+
+#include "mgmt/user_config.h"
+#include "mgmt/share_config.h"
+#include "mgmt/user_session.h"
+#include "mgmt/tree_connect.h"
+#include "mgmt/ksmbd_ida.h"
+#include "connection.h"
+#include "transport_tcp.h"
+
+#define IPC_WAIT_TIMEOUT (2 * HZ)
+
+#define IPC_MSG_HASH_BITS 3
+static DEFINE_HASHTABLE(ipc_msg_table, IPC_MSG_HASH_BITS);
+static DECLARE_RWSEM(ipc_msg_table_lock);
+static DEFINE_MUTEX(startup_lock);
+
+static DEFINE_IDA(ipc_ida);
+
+static unsigned int ksmbd_tools_pid;
+
+static bool ksmbd_ipc_validate_version(struct genl_info *m)
+{
+ if (m->genlhdr->version != KSMBD_GENL_VERSION) {
+ pr_err("%s. ksmbd: %d, kernel module: %d. %s.\n",
+ "Daemon and kernel module version mismatch",
+ m->genlhdr->version,
+ KSMBD_GENL_VERSION,
+ "User-space ksmbd should terminate");
+ return false;
+ }
+ return true;
+}
+
+struct ksmbd_ipc_msg {
+ unsigned int type;
+ unsigned int sz;
+ unsigned char payload[];
+};
+
+struct ipc_msg_table_entry {
+ unsigned int handle;
+ unsigned int type;
+ wait_queue_head_t wait;
+ struct hlist_node ipc_table_hlist;
+
+ void *response;
+};
+
+static struct delayed_work ipc_timer_work;
+
+static int handle_startup_event(struct sk_buff *skb, struct genl_info *info);
+static int handle_unsupported_event(struct sk_buff *skb, struct genl_info *info);
+static int handle_generic_event(struct sk_buff *skb, struct genl_info *info);
+static int ksmbd_ipc_heartbeat_request(void);
+
+static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX] = {
+ [KSMBD_EVENT_UNSPEC] = {
+ .len = 0,
+ },
+ [KSMBD_EVENT_HEARTBEAT_REQUEST] = {
+ .len = sizeof(struct ksmbd_heartbeat),
+ },
+ [KSMBD_EVENT_STARTING_UP] = {
+ .len = sizeof(struct ksmbd_startup_request),
+ },
+ [KSMBD_EVENT_SHUTTING_DOWN] = {
+ .len = sizeof(struct ksmbd_shutdown_request),
+ },
+ [KSMBD_EVENT_LOGIN_REQUEST] = {
+ .len = sizeof(struct ksmbd_login_request),
+ },
+ [KSMBD_EVENT_LOGIN_RESPONSE] = {
+ .len = sizeof(struct ksmbd_login_response),
+ },
+ [KSMBD_EVENT_SHARE_CONFIG_REQUEST] = {
+ .len = sizeof(struct ksmbd_share_config_request),
+ },
+ [KSMBD_EVENT_SHARE_CONFIG_RESPONSE] = {
+ .len = sizeof(struct ksmbd_share_config_response),
+ },
+ [KSMBD_EVENT_TREE_CONNECT_REQUEST] = {
+ .len = sizeof(struct ksmbd_tree_connect_request),
+ },
+ [KSMBD_EVENT_TREE_CONNECT_RESPONSE] = {
+ .len = sizeof(struct ksmbd_tree_connect_response),
+ },
+ [KSMBD_EVENT_TREE_DISCONNECT_REQUEST] = {
+ .len = sizeof(struct ksmbd_tree_disconnect_request),
+ },
+ [KSMBD_EVENT_LOGOUT_REQUEST] = {
+ .len = sizeof(struct ksmbd_logout_request),
+ },
+ [KSMBD_EVENT_RPC_REQUEST] = {
+ },
+ [KSMBD_EVENT_RPC_RESPONSE] = {
+ },
+ [KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST] = {
+ },
+ [KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE] = {
+ },
+};
+
+static struct genl_ops ksmbd_genl_ops[] = {
+ {
+ .cmd = KSMBD_EVENT_UNSPEC,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_HEARTBEAT_REQUEST,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_STARTING_UP,
+ .doit = handle_startup_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_SHUTTING_DOWN,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_LOGIN_REQUEST,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_LOGIN_RESPONSE,
+ .doit = handle_generic_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_SHARE_CONFIG_REQUEST,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_SHARE_CONFIG_RESPONSE,
+ .doit = handle_generic_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_TREE_CONNECT_REQUEST,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_TREE_CONNECT_RESPONSE,
+ .doit = handle_generic_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_TREE_DISCONNECT_REQUEST,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_LOGOUT_REQUEST,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_RPC_REQUEST,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_RPC_RESPONSE,
+ .doit = handle_generic_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE,
+ .doit = handle_generic_event,
+ },
+};
+
+static struct genl_family ksmbd_genl_family = {
+ .name = KSMBD_GENL_NAME,
+ .version = KSMBD_GENL_VERSION,
+ .hdrsize = 0,
+ .maxattr = KSMBD_EVENT_MAX,
+ .netnsok = true,
+ .module = THIS_MODULE,
+ .ops = ksmbd_genl_ops,
+ .n_ops = ARRAY_SIZE(ksmbd_genl_ops),
+};
+
+static void ksmbd_nl_init_fixup(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ksmbd_genl_ops); i++)
+ ksmbd_genl_ops[i].validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP;
+
+ ksmbd_genl_family.policy = ksmbd_nl_policy;
+}
+
+static int rpc_context_flags(struct ksmbd_session *sess)
+{
+ if (user_guest(sess->user))
+ return KSMBD_RPC_RESTRICTED_CONTEXT;
+ return 0;
+}
+
+static void ipc_update_last_active(void)
+{
+ if (server_conf.ipc_timeout)
+ server_conf.ipc_last_active = jiffies;
+}
+
+static struct ksmbd_ipc_msg *ipc_msg_alloc(size_t sz)
+{
+ struct ksmbd_ipc_msg *msg;
+ size_t msg_sz = sz + sizeof(struct ksmbd_ipc_msg);
+
+ msg = kvmalloc(msg_sz, GFP_KERNEL | __GFP_ZERO);
+ if (msg)
+ msg->sz = sz;
+ return msg;
+}
+
+static void ipc_msg_free(struct ksmbd_ipc_msg *msg)
+{
+ kvfree(msg);
+}
+
+static void ipc_msg_handle_free(int handle)
+{
+ if (handle >= 0)
+ ksmbd_release_id(&ipc_ida, handle);
+}
+
+static int handle_response(int type, void *payload, size_t sz)
+{
+ unsigned int handle = *(unsigned int *)payload;
+ struct ipc_msg_table_entry *entry;
+ int ret = 0;
+
+ ipc_update_last_active();
+ down_read(&ipc_msg_table_lock);
+ hash_for_each_possible(ipc_msg_table, entry, ipc_table_hlist, handle) {
+ if (handle != entry->handle)
+ continue;
+
+ entry->response = NULL;
+ /*
+ * Response message type value should be equal to
+ * request message type + 1.
+ */
+ if (entry->type + 1 != type) {
+ pr_err("Waiting for IPC type %d, got %d. Ignore.\n",
+ entry->type + 1, type);
+ }
+
+ entry->response = kvmalloc(sz, GFP_KERNEL | __GFP_ZERO);
+ if (!entry->response) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ memcpy(entry->response, payload, sz);
+ wake_up_interruptible(&entry->wait);
+ ret = 0;
+ break;
+ }
+ up_read(&ipc_msg_table_lock);
+
+ return ret;
+}
+
+static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
+{
+ int ret;
+
+ ksmbd_set_fd_limit(req->file_max);
+ server_conf.flags = req->flags;
+ server_conf.signing = req->signing;
+ server_conf.tcp_port = req->tcp_port;
+ server_conf.ipc_timeout = req->ipc_timeout * HZ;
+ server_conf.deadtime = req->deadtime * SMB_ECHO_INTERVAL;
+ server_conf.share_fake_fscaps = req->share_fake_fscaps;
+ ksmbd_init_domain(req->sub_auth);
+
+ if (req->smb2_max_read)
+ init_smb2_max_read_size(req->smb2_max_read);
+ if (req->smb2_max_write)
+ init_smb2_max_write_size(req->smb2_max_write);
+ if (req->smb2_max_trans)
+ init_smb2_max_trans_size(req->smb2_max_trans);
+
+ ret = ksmbd_set_netbios_name(req->netbios_name);
+ ret |= ksmbd_set_server_string(req->server_string);
+ ret |= ksmbd_set_work_group(req->work_group);
+ ret |= ksmbd_tcp_set_interfaces(KSMBD_STARTUP_CONFIG_INTERFACES(req),
+ req->ifc_list_sz);
+ if (ret) {
+ pr_err("Server configuration error: %s %s %s\n",
+ req->netbios_name, req->server_string,
+ req->work_group);
+ return ret;
+ }
+
+ if (req->min_prot[0]) {
+ ret = ksmbd_lookup_protocol_idx(req->min_prot);
+ if (ret >= 0)
+ server_conf.min_protocol = ret;
+ }
+ if (req->max_prot[0]) {
+ ret = ksmbd_lookup_protocol_idx(req->max_prot);
+ if (ret >= 0)
+ server_conf.max_protocol = ret;
+ }
+
+ if (server_conf.ipc_timeout)
+ schedule_delayed_work(&ipc_timer_work, server_conf.ipc_timeout);
+ return 0;
+}
+
+static int handle_startup_event(struct sk_buff *skb, struct genl_info *info)
+{
+ int ret = 0;
+
+#ifdef CONFIG_SMB_SERVER_CHECK_CAP_NET_ADMIN
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
+ return -EPERM;
+#endif
+
+ if (!ksmbd_ipc_validate_version(info))
+ return -EINVAL;
+
+ if (!info->attrs[KSMBD_EVENT_STARTING_UP])
+ return -EINVAL;
+
+ mutex_lock(&startup_lock);
+ if (!ksmbd_server_configurable()) {
+ mutex_unlock(&startup_lock);
+ pr_err("Server reset is in progress, can't start daemon\n");
+ return -EINVAL;
+ }
+
+ if (ksmbd_tools_pid) {
+ if (ksmbd_ipc_heartbeat_request() == 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ pr_err("Reconnect to a new user space daemon\n");
+ } else {
+ struct ksmbd_startup_request *req;
+
+ req = nla_data(info->attrs[info->genlhdr->cmd]);
+ ret = ipc_server_config_on_startup(req);
+ if (ret)
+ goto out;
+ server_queue_ctrl_init_work();
+ }
+
+ ksmbd_tools_pid = info->snd_portid;
+ ipc_update_last_active();
+
+out:
+ mutex_unlock(&startup_lock);
+ return ret;
+}
+
+static int handle_unsupported_event(struct sk_buff *skb, struct genl_info *info)
+{
+ pr_err("Unknown IPC event: %d, ignore.\n", info->genlhdr->cmd);
+ return -EINVAL;
+}
+
+static int handle_generic_event(struct sk_buff *skb, struct genl_info *info)
+{
+ void *payload;
+ int sz;
+ int type = info->genlhdr->cmd;
+
+#ifdef CONFIG_SMB_SERVER_CHECK_CAP_NET_ADMIN
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
+ return -EPERM;
+#endif
+
+ if (type >= KSMBD_EVENT_MAX) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ if (!ksmbd_ipc_validate_version(info))
+ return -EINVAL;
+
+ if (!info->attrs[type])
+ return -EINVAL;
+
+ payload = nla_data(info->attrs[info->genlhdr->cmd]);
+ sz = nla_len(info->attrs[info->genlhdr->cmd]);
+ return handle_response(type, payload, sz);
+}
+
+static int ipc_msg_send(struct ksmbd_ipc_msg *msg)
+{
+ struct genlmsghdr *nlh;
+ struct sk_buff *skb;
+ int ret = -EINVAL;
+
+ if (!ksmbd_tools_pid)
+ return ret;
+
+ skb = genlmsg_new(msg->sz, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ nlh = genlmsg_put(skb, 0, 0, &ksmbd_genl_family, 0, msg->type);
+ if (!nlh)
+ goto out;
+
+ ret = nla_put(skb, msg->type, msg->sz, msg->payload);
+ if (ret) {
+ genlmsg_cancel(skb, nlh);
+ goto out;
+ }
+
+ genlmsg_end(skb, nlh);
+ ret = genlmsg_unicast(&init_net, skb, ksmbd_tools_pid);
+ if (!ret)
+ ipc_update_last_active();
+ return ret;
+
+out:
+ nlmsg_free(skb);
+ return ret;
+}
+
+static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle)
+{
+ struct ipc_msg_table_entry entry;
+ int ret;
+
+ if ((int)handle < 0)
+ return NULL;
+
+ entry.type = msg->type;
+ entry.response = NULL;
+ init_waitqueue_head(&entry.wait);
+
+ down_write(&ipc_msg_table_lock);
+ entry.handle = handle;
+ hash_add(ipc_msg_table, &entry.ipc_table_hlist, entry.handle);
+ up_write(&ipc_msg_table_lock);
+
+ ret = ipc_msg_send(msg);
+ if (ret)
+ goto out;
+
+ ret = wait_event_interruptible_timeout(entry.wait,
+ entry.response != NULL,
+ IPC_WAIT_TIMEOUT);
+out:
+ down_write(&ipc_msg_table_lock);
+ hash_del(&entry.ipc_table_hlist);
+ up_write(&ipc_msg_table_lock);
+ return entry.response;
+}
+
+static int ksmbd_ipc_heartbeat_request(void)
+{
+ struct ksmbd_ipc_msg *msg;
+ int ret;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_heartbeat));
+ if (!msg)
+ return -EINVAL;
+
+ msg->type = KSMBD_EVENT_HEARTBEAT_REQUEST;
+ ret = ipc_msg_send(msg);
+ ipc_msg_free(msg);
+ return ret;
+}
+
+struct ksmbd_login_response *ksmbd_ipc_login_request(const char *account)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_login_request *req;
+ struct ksmbd_login_response *resp;
+
+ if (strlen(account) >= KSMBD_REQ_MAX_ACCOUNT_NAME_SZ)
+ return NULL;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_login_request));
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_LOGIN_REQUEST;
+ req = (struct ksmbd_login_request *)msg->payload;
+ req->handle = ksmbd_acquire_id(&ipc_ida);
+ strscpy(req->account, account, KSMBD_REQ_MAX_ACCOUNT_NAME_SZ);
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_handle_free(req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+struct ksmbd_spnego_authen_response *
+ksmbd_ipc_spnego_authen_request(const char *spnego_blob, int blob_len)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_spnego_authen_request *req;
+ struct ksmbd_spnego_authen_response *resp;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_spnego_authen_request) +
+ blob_len + 1);
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST;
+ req = (struct ksmbd_spnego_authen_request *)msg->payload;
+ req->handle = ksmbd_acquire_id(&ipc_ida);
+ req->spnego_blob_len = blob_len;
+ memcpy(req->spnego_blob, spnego_blob, blob_len);
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_handle_free(req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+struct ksmbd_tree_connect_response *
+ksmbd_ipc_tree_connect_request(struct ksmbd_session *sess,
+ struct ksmbd_share_config *share,
+ struct ksmbd_tree_connect *tree_conn,
+ struct sockaddr *peer_addr)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_tree_connect_request *req;
+ struct ksmbd_tree_connect_response *resp;
+
+ if (strlen(user_name(sess->user)) >= KSMBD_REQ_MAX_ACCOUNT_NAME_SZ)
+ return NULL;
+
+ if (strlen(share->name) >= KSMBD_REQ_MAX_SHARE_NAME)
+ return NULL;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_tree_connect_request));
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_TREE_CONNECT_REQUEST;
+ req = (struct ksmbd_tree_connect_request *)msg->payload;
+
+ req->handle = ksmbd_acquire_id(&ipc_ida);
+ req->account_flags = sess->user->flags;
+ req->session_id = sess->id;
+ req->connect_id = tree_conn->id;
+ strscpy(req->account, user_name(sess->user), KSMBD_REQ_MAX_ACCOUNT_NAME_SZ);
+ strscpy(req->share, share->name, KSMBD_REQ_MAX_SHARE_NAME);
+ snprintf(req->peer_addr, sizeof(req->peer_addr), "%pIS", peer_addr);
+
+ if (peer_addr->sa_family == AF_INET6)
+ req->flags |= KSMBD_TREE_CONN_FLAG_REQUEST_IPV6;
+ if (test_session_flag(sess, CIFDS_SESSION_FLAG_SMB2))
+ req->flags |= KSMBD_TREE_CONN_FLAG_REQUEST_SMB2;
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_handle_free(req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+int ksmbd_ipc_tree_disconnect_request(unsigned long long session_id,
+ unsigned long long connect_id)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_tree_disconnect_request *req;
+ int ret;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_tree_disconnect_request));
+ if (!msg)
+ return -ENOMEM;
+
+ msg->type = KSMBD_EVENT_TREE_DISCONNECT_REQUEST;
+ req = (struct ksmbd_tree_disconnect_request *)msg->payload;
+ req->session_id = session_id;
+ req->connect_id = connect_id;
+
+ ret = ipc_msg_send(msg);
+ ipc_msg_free(msg);
+ return ret;
+}
+
+int ksmbd_ipc_logout_request(const char *account)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_logout_request *req;
+ int ret;
+
+ if (strlen(account) >= KSMBD_REQ_MAX_ACCOUNT_NAME_SZ)
+ return -EINVAL;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_logout_request));
+ if (!msg)
+ return -ENOMEM;
+
+ msg->type = KSMBD_EVENT_LOGOUT_REQUEST;
+ req = (struct ksmbd_logout_request *)msg->payload;
+ strscpy(req->account, account, KSMBD_REQ_MAX_ACCOUNT_NAME_SZ);
+
+ ret = ipc_msg_send(msg);
+ ipc_msg_free(msg);
+ return ret;
+}
+
+struct ksmbd_share_config_response *
+ksmbd_ipc_share_config_request(const char *name)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_share_config_request *req;
+ struct ksmbd_share_config_response *resp;
+
+ if (strlen(name) >= KSMBD_REQ_MAX_SHARE_NAME)
+ return NULL;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_share_config_request));
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_SHARE_CONFIG_REQUEST;
+ req = (struct ksmbd_share_config_request *)msg->payload;
+ req->handle = ksmbd_acquire_id(&ipc_ida);
+ strscpy(req->share_name, name, KSMBD_REQ_MAX_SHARE_NAME);
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_handle_free(req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+struct ksmbd_rpc_command *ksmbd_rpc_open(struct ksmbd_session *sess, int handle)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_rpc_command *req;
+ struct ksmbd_rpc_command *resp;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command));
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_RPC_REQUEST;
+ req = (struct ksmbd_rpc_command *)msg->payload;
+ req->handle = handle;
+ req->flags = ksmbd_session_rpc_method(sess, handle);
+ req->flags |= KSMBD_RPC_OPEN_METHOD;
+ req->payload_sz = 0;
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+struct ksmbd_rpc_command *ksmbd_rpc_close(struct ksmbd_session *sess, int handle)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_rpc_command *req;
+ struct ksmbd_rpc_command *resp;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command));
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_RPC_REQUEST;
+ req = (struct ksmbd_rpc_command *)msg->payload;
+ req->handle = handle;
+ req->flags = ksmbd_session_rpc_method(sess, handle);
+ req->flags |= KSMBD_RPC_CLOSE_METHOD;
+ req->payload_sz = 0;
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+struct ksmbd_rpc_command *ksmbd_rpc_write(struct ksmbd_session *sess, int handle,
+ void *payload, size_t payload_sz)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_rpc_command *req;
+ struct ksmbd_rpc_command *resp;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command) + payload_sz + 1);
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_RPC_REQUEST;
+ req = (struct ksmbd_rpc_command *)msg->payload;
+ req->handle = handle;
+ req->flags = ksmbd_session_rpc_method(sess, handle);
+ req->flags |= rpc_context_flags(sess);
+ req->flags |= KSMBD_RPC_WRITE_METHOD;
+ req->payload_sz = payload_sz;
+ memcpy(req->payload, payload, payload_sz);
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+struct ksmbd_rpc_command *ksmbd_rpc_read(struct ksmbd_session *sess, int handle)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_rpc_command *req;
+ struct ksmbd_rpc_command *resp;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command));
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_RPC_REQUEST;
+ req = (struct ksmbd_rpc_command *)msg->payload;
+ req->handle = handle;
+ req->flags = ksmbd_session_rpc_method(sess, handle);
+ req->flags |= rpc_context_flags(sess);
+ req->flags |= KSMBD_RPC_READ_METHOD;
+ req->payload_sz = 0;
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+struct ksmbd_rpc_command *ksmbd_rpc_ioctl(struct ksmbd_session *sess, int handle,
+ void *payload, size_t payload_sz)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_rpc_command *req;
+ struct ksmbd_rpc_command *resp;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command) + payload_sz + 1);
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_RPC_REQUEST;
+ req = (struct ksmbd_rpc_command *)msg->payload;
+ req->handle = handle;
+ req->flags = ksmbd_session_rpc_method(sess, handle);
+ req->flags |= rpc_context_flags(sess);
+ req->flags |= KSMBD_RPC_IOCTL_METHOD;
+ req->payload_sz = payload_sz;
+ memcpy(req->payload, payload, payload_sz);
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+struct ksmbd_rpc_command *ksmbd_rpc_rap(struct ksmbd_session *sess, void *payload,
+ size_t payload_sz)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_rpc_command *req;
+ struct ksmbd_rpc_command *resp;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command) + payload_sz + 1);
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_RPC_REQUEST;
+ req = (struct ksmbd_rpc_command *)msg->payload;
+ req->handle = ksmbd_acquire_id(&ipc_ida);
+ req->flags = rpc_context_flags(sess);
+ req->flags |= KSMBD_RPC_RAP_METHOD;
+ req->payload_sz = payload_sz;
+ memcpy(req->payload, payload, payload_sz);
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_handle_free(req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+static int __ipc_heartbeat(void)
+{
+ unsigned long delta;
+
+ if (!ksmbd_server_running())
+ return 0;
+
+ if (time_after(jiffies, server_conf.ipc_last_active)) {
+ delta = (jiffies - server_conf.ipc_last_active);
+ } else {
+ ipc_update_last_active();
+ schedule_delayed_work(&ipc_timer_work,
+ server_conf.ipc_timeout);
+ return 0;
+ }
+
+ if (delta < server_conf.ipc_timeout) {
+ schedule_delayed_work(&ipc_timer_work,
+ server_conf.ipc_timeout - delta);
+ return 0;
+ }
+
+ if (ksmbd_ipc_heartbeat_request() == 0) {
+ schedule_delayed_work(&ipc_timer_work,
+ server_conf.ipc_timeout);
+ return 0;
+ }
+
+ mutex_lock(&startup_lock);
+ WRITE_ONCE(server_conf.state, SERVER_STATE_RESETTING);
+ server_conf.ipc_last_active = 0;
+ ksmbd_tools_pid = 0;
+ pr_err("No IPC daemon response for %lus\n", delta / HZ);
+ mutex_unlock(&startup_lock);
+ return -EINVAL;
+}
+
+static void ipc_timer_heartbeat(struct work_struct *w)
+{
+ if (__ipc_heartbeat())
+ server_queue_ctrl_reset_work();
+}
+
+int ksmbd_ipc_id_alloc(void)
+{
+ return ksmbd_acquire_id(&ipc_ida);
+}
+
+void ksmbd_rpc_id_free(int handle)
+{
+ ksmbd_release_id(&ipc_ida, handle);
+}
+
+void ksmbd_ipc_release(void)
+{
+ cancel_delayed_work_sync(&ipc_timer_work);
+ genl_unregister_family(&ksmbd_genl_family);
+}
+
+void ksmbd_ipc_soft_reset(void)
+{
+ mutex_lock(&startup_lock);
+ ksmbd_tools_pid = 0;
+ cancel_delayed_work_sync(&ipc_timer_work);
+ mutex_unlock(&startup_lock);
+}
+
+int ksmbd_ipc_init(void)
+{
+ int ret = 0;
+
+ ksmbd_nl_init_fixup();
+ INIT_DELAYED_WORK(&ipc_timer_work, ipc_timer_heartbeat);
+
+ ret = genl_register_family(&ksmbd_genl_family);
+ if (ret) {
+ pr_err("Failed to register KSMBD netlink interface %d\n", ret);
+ cancel_delayed_work_sync(&ipc_timer_work);
+ }
+
+ return ret;
+}
diff --git a/fs/ksmbd/transport_ipc.h b/fs/ksmbd/transport_ipc.h
new file mode 100644
index 000000000000..9eacc895ffdb
--- /dev/null
+++ b/fs/ksmbd/transport_ipc.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_TRANSPORT_IPC_H__
+#define __KSMBD_TRANSPORT_IPC_H__
+
+#include <linux/wait.h>
+
+#define KSMBD_IPC_MAX_PAYLOAD 4096
+
+struct ksmbd_login_response *
+ksmbd_ipc_login_request(const char *account);
+
+struct ksmbd_session;
+struct ksmbd_share_config;
+struct ksmbd_tree_connect;
+struct sockaddr;
+
+struct ksmbd_tree_connect_response *
+ksmbd_ipc_tree_connect_request(struct ksmbd_session *sess,
+ struct ksmbd_share_config *share,
+ struct ksmbd_tree_connect *tree_conn,
+ struct sockaddr *peer_addr);
+int ksmbd_ipc_tree_disconnect_request(unsigned long long session_id,
+ unsigned long long connect_id);
+int ksmbd_ipc_logout_request(const char *account);
+struct ksmbd_share_config_response *
+ksmbd_ipc_share_config_request(const char *name);
+struct ksmbd_spnego_authen_response *
+ksmbd_ipc_spnego_authen_request(const char *spnego_blob, int blob_len);
+int ksmbd_ipc_id_alloc(void);
+void ksmbd_rpc_id_free(int handle);
+struct ksmbd_rpc_command *ksmbd_rpc_open(struct ksmbd_session *sess, int handle);
+struct ksmbd_rpc_command *ksmbd_rpc_close(struct ksmbd_session *sess, int handle);
+struct ksmbd_rpc_command *ksmbd_rpc_write(struct ksmbd_session *sess, int handle,
+ void *payload, size_t payload_sz);
+struct ksmbd_rpc_command *ksmbd_rpc_read(struct ksmbd_session *sess, int handle);
+struct ksmbd_rpc_command *ksmbd_rpc_ioctl(struct ksmbd_session *sess, int handle,
+ void *payload, size_t payload_sz);
+struct ksmbd_rpc_command *ksmbd_rpc_rap(struct ksmbd_session *sess, void *payload,
+ size_t payload_sz);
+void ksmbd_ipc_release(void);
+void ksmbd_ipc_soft_reset(void);
+int ksmbd_ipc_init(void);
+#endif /* __KSMBD_TRANSPORT_IPC_H__ */
diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
new file mode 100644
index 000000000000..58f530056ac0
--- /dev/null
+++ b/fs/ksmbd/transport_rdma.c
@@ -0,0 +1,2058 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2017, Microsoft Corporation.
+ * Copyright (C) 2018, LG Electronics.
+ *
+ * Author(s): Long Li <longli@microsoft.com>,
+ * Hyunchul Lee <hyc.lee@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+
+#define SUBMOD_NAME "smb_direct"
+
+#include <linux/kthread.h>
+#include <linux/rwlock.h>
+#include <linux/list.h>
+#include <linux/mempool.h>
+#include <linux/highmem.h>
+#include <linux/scatterlist.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/rw.h>
+
+#include "glob.h"
+#include "connection.h"
+#include "smb_common.h"
+#include "smbstatus.h"
+#include "transport_rdma.h"
+
+#define SMB_DIRECT_PORT 5445
+
+#define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100)
+
+/* SMB_DIRECT negotiation timeout in seconds */
+#define SMB_DIRECT_NEGOTIATE_TIMEOUT 120
+
+#define SMB_DIRECT_MAX_SEND_SGES 8
+#define SMB_DIRECT_MAX_RECV_SGES 1
+
+/*
+ * Default maximum number of RDMA read/write outstanding on this connection
+ * This value is possibly decreased during QP creation on hardware limit
+ */
+#define SMB_DIRECT_CM_INITIATOR_DEPTH 8
+
+/* Maximum number of retries on data transfer operations */
+#define SMB_DIRECT_CM_RETRY 6
+/* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */
+#define SMB_DIRECT_CM_RNR_RETRY 0
+
+/*
+ * User configurable initial values per SMB_DIRECT transport connection
+ * as defined in [MS-SMBD] 3.1.1.1
+ * Those may change after a SMB_DIRECT negotiation
+ */
+/* The local peer's maximum number of credits to grant to the peer */
+static int smb_direct_receive_credit_max = 255;
+
+/* The remote peer's credit request of local peer */
+static int smb_direct_send_credit_target = 255;
+
+/* The maximum single message size can be sent to remote peer */
+static int smb_direct_max_send_size = 8192;
+
+/* The maximum fragmented upper-layer payload receive size supported */
+static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
+
+/* The maximum single-message size which can be received */
+static int smb_direct_max_receive_size = 8192;
+
+static int smb_direct_max_read_write_size = 1024 * 1024;
+
+static int smb_direct_max_outstanding_rw_ops = 8;
+
+static struct smb_direct_listener {
+ struct rdma_cm_id *cm_id;
+} smb_direct_listener;
+
+static struct workqueue_struct *smb_direct_wq;
+
+enum smb_direct_status {
+ SMB_DIRECT_CS_NEW = 0,
+ SMB_DIRECT_CS_CONNECTED,
+ SMB_DIRECT_CS_DISCONNECTING,
+ SMB_DIRECT_CS_DISCONNECTED,
+};
+
+struct smb_direct_transport {
+ struct ksmbd_transport transport;
+
+ enum smb_direct_status status;
+ bool full_packet_received;
+ wait_queue_head_t wait_status;
+
+ struct rdma_cm_id *cm_id;
+ struct ib_cq *send_cq;
+ struct ib_cq *recv_cq;
+ struct ib_pd *pd;
+ struct ib_qp *qp;
+
+ int max_send_size;
+ int max_recv_size;
+ int max_fragmented_send_size;
+ int max_fragmented_recv_size;
+ int max_rdma_rw_size;
+
+ spinlock_t reassembly_queue_lock;
+ struct list_head reassembly_queue;
+ int reassembly_data_length;
+ int reassembly_queue_length;
+ int first_entry_offset;
+ wait_queue_head_t wait_reassembly_queue;
+
+ spinlock_t receive_credit_lock;
+ int recv_credits;
+ int count_avail_recvmsg;
+ int recv_credit_max;
+ int recv_credit_target;
+
+ spinlock_t recvmsg_queue_lock;
+ struct list_head recvmsg_queue;
+
+ spinlock_t empty_recvmsg_queue_lock;
+ struct list_head empty_recvmsg_queue;
+
+ int send_credit_target;
+ atomic_t send_credits;
+ spinlock_t lock_new_recv_credits;
+ int new_recv_credits;
+ atomic_t rw_avail_ops;
+
+ wait_queue_head_t wait_send_credits;
+ wait_queue_head_t wait_rw_avail_ops;
+
+ mempool_t *sendmsg_mempool;
+ struct kmem_cache *sendmsg_cache;
+ mempool_t *recvmsg_mempool;
+ struct kmem_cache *recvmsg_cache;
+
+ wait_queue_head_t wait_send_payload_pending;
+ atomic_t send_payload_pending;
+ wait_queue_head_t wait_send_pending;
+ atomic_t send_pending;
+
+ struct delayed_work post_recv_credits_work;
+ struct work_struct send_immediate_work;
+ struct work_struct disconnect_work;
+
+ bool negotiation_requested;
+};
+
+#define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport))
+
+enum {
+ SMB_DIRECT_MSG_NEGOTIATE_REQ = 0,
+ SMB_DIRECT_MSG_DATA_TRANSFER
+};
+
+static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops;
+
+struct smb_direct_send_ctx {
+ struct list_head msg_list;
+ int wr_cnt;
+ bool need_invalidate_rkey;
+ unsigned int remote_key;
+};
+
+struct smb_direct_sendmsg {
+ struct smb_direct_transport *transport;
+ struct ib_send_wr wr;
+ struct list_head list;
+ int num_sge;
+ struct ib_sge sge[SMB_DIRECT_MAX_SEND_SGES];
+ struct ib_cqe cqe;
+ u8 packet[];
+};
+
+struct smb_direct_recvmsg {
+ struct smb_direct_transport *transport;
+ struct list_head list;
+ int type;
+ struct ib_sge sge;
+ struct ib_cqe cqe;
+ bool first_segment;
+ u8 packet[];
+};
+
+struct smb_direct_rdma_rw_msg {
+ struct smb_direct_transport *t;
+ struct ib_cqe cqe;
+ struct completion *completion;
+ struct rdma_rw_ctx rw_ctx;
+ struct sg_table sgt;
+ struct scatterlist sg_list[0];
+};
+
+static inline int get_buf_page_count(void *buf, int size)
+{
+ return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
+ (uintptr_t)buf / PAGE_SIZE;
+}
+
+static void smb_direct_destroy_pools(struct smb_direct_transport *transport);
+static void smb_direct_post_recv_credits(struct work_struct *work);
+static int smb_direct_post_send_data(struct smb_direct_transport *t,
+ struct smb_direct_send_ctx *send_ctx,
+ struct kvec *iov, int niov,
+ int remaining_data_length);
+
+static inline struct smb_direct_transport *
+smb_trans_direct_transfort(struct ksmbd_transport *t)
+{
+ return container_of(t, struct smb_direct_transport, transport);
+}
+
+static inline void
+*smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg)
+{
+ return (void *)recvmsg->packet;
+}
+
+static inline bool is_receive_credit_post_required(int receive_credits,
+ int avail_recvmsg_count)
+{
+ return receive_credits <= (smb_direct_receive_credit_max >> 3) &&
+ avail_recvmsg_count >= (receive_credits >> 2);
+}
+
+static struct
+smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t)
+{
+ struct smb_direct_recvmsg *recvmsg = NULL;
+
+ spin_lock(&t->recvmsg_queue_lock);
+ if (!list_empty(&t->recvmsg_queue)) {
+ recvmsg = list_first_entry(&t->recvmsg_queue,
+ struct smb_direct_recvmsg,
+ list);
+ list_del(&recvmsg->list);
+ }
+ spin_unlock(&t->recvmsg_queue_lock);
+ return recvmsg;
+}
+
+static void put_recvmsg(struct smb_direct_transport *t,
+ struct smb_direct_recvmsg *recvmsg)
+{
+ ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
+ recvmsg->sge.length, DMA_FROM_DEVICE);
+
+ spin_lock(&t->recvmsg_queue_lock);
+ list_add(&recvmsg->list, &t->recvmsg_queue);
+ spin_unlock(&t->recvmsg_queue_lock);
+}
+
+static struct
+smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t)
+{
+ struct smb_direct_recvmsg *recvmsg = NULL;
+
+ spin_lock(&t->empty_recvmsg_queue_lock);
+ if (!list_empty(&t->empty_recvmsg_queue)) {
+ recvmsg = list_first_entry(&t->empty_recvmsg_queue,
+ struct smb_direct_recvmsg, list);
+ list_del(&recvmsg->list);
+ }
+ spin_unlock(&t->empty_recvmsg_queue_lock);
+ return recvmsg;
+}
+
+static void put_empty_recvmsg(struct smb_direct_transport *t,
+ struct smb_direct_recvmsg *recvmsg)
+{
+ ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
+ recvmsg->sge.length, DMA_FROM_DEVICE);
+
+ spin_lock(&t->empty_recvmsg_queue_lock);
+ list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue);
+ spin_unlock(&t->empty_recvmsg_queue_lock);
+}
+
+static void enqueue_reassembly(struct smb_direct_transport *t,
+ struct smb_direct_recvmsg *recvmsg,
+ int data_length)
+{
+ spin_lock(&t->reassembly_queue_lock);
+ list_add_tail(&recvmsg->list, &t->reassembly_queue);
+ t->reassembly_queue_length++;
+ /*
+ * Make sure reassembly_data_length is updated after list and
+ * reassembly_queue_length are updated. On the dequeue side
+ * reassembly_data_length is checked without a lock to determine
+ * if reassembly_queue_length and list is up to date
+ */
+ virt_wmb();
+ t->reassembly_data_length += data_length;
+ spin_unlock(&t->reassembly_queue_lock);
+}
+
+static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t)
+{
+ if (!list_empty(&t->reassembly_queue))
+ return list_first_entry(&t->reassembly_queue,
+ struct smb_direct_recvmsg, list);
+ else
+ return NULL;
+}
+
+static void smb_direct_disconnect_rdma_work(struct work_struct *work)
+{
+ struct smb_direct_transport *t =
+ container_of(work, struct smb_direct_transport,
+ disconnect_work);
+
+ if (t->status == SMB_DIRECT_CS_CONNECTED) {
+ t->status = SMB_DIRECT_CS_DISCONNECTING;
+ rdma_disconnect(t->cm_id);
+ }
+}
+
+static void
+smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t)
+{
+ if (t->status == SMB_DIRECT_CS_CONNECTED)
+ queue_work(smb_direct_wq, &t->disconnect_work);
+}
+
+static void smb_direct_send_immediate_work(struct work_struct *work)
+{
+ struct smb_direct_transport *t = container_of(work,
+ struct smb_direct_transport, send_immediate_work);
+
+ if (t->status != SMB_DIRECT_CS_CONNECTED)
+ return;
+
+ smb_direct_post_send_data(t, NULL, NULL, 0, 0);
+}
+
+static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
+{
+ struct smb_direct_transport *t;
+ struct ksmbd_conn *conn;
+
+ t = kzalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+ return NULL;
+
+ t->cm_id = cm_id;
+ cm_id->context = t;
+
+ t->status = SMB_DIRECT_CS_NEW;
+ init_waitqueue_head(&t->wait_status);
+
+ spin_lock_init(&t->reassembly_queue_lock);
+ INIT_LIST_HEAD(&t->reassembly_queue);
+ t->reassembly_data_length = 0;
+ t->reassembly_queue_length = 0;
+ init_waitqueue_head(&t->wait_reassembly_queue);
+ init_waitqueue_head(&t->wait_send_credits);
+ init_waitqueue_head(&t->wait_rw_avail_ops);
+
+ spin_lock_init(&t->receive_credit_lock);
+ spin_lock_init(&t->recvmsg_queue_lock);
+ INIT_LIST_HEAD(&t->recvmsg_queue);
+
+ spin_lock_init(&t->empty_recvmsg_queue_lock);
+ INIT_LIST_HEAD(&t->empty_recvmsg_queue);
+
+ init_waitqueue_head(&t->wait_send_payload_pending);
+ atomic_set(&t->send_payload_pending, 0);
+ init_waitqueue_head(&t->wait_send_pending);
+ atomic_set(&t->send_pending, 0);
+
+ spin_lock_init(&t->lock_new_recv_credits);
+
+ INIT_DELAYED_WORK(&t->post_recv_credits_work,
+ smb_direct_post_recv_credits);
+ INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work);
+ INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work);
+
+ conn = ksmbd_conn_alloc();
+ if (!conn)
+ goto err;
+ conn->transport = KSMBD_TRANS(t);
+ KSMBD_TRANS(t)->conn = conn;
+ KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops;
+ return t;
+err:
+ kfree(t);
+ return NULL;
+}
+
+static void free_transport(struct smb_direct_transport *t)
+{
+ struct smb_direct_recvmsg *recvmsg;
+
+ wake_up_interruptible(&t->wait_send_credits);
+
+ ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n");
+ wait_event(t->wait_send_payload_pending,
+ atomic_read(&t->send_payload_pending) == 0);
+ wait_event(t->wait_send_pending,
+ atomic_read(&t->send_pending) == 0);
+
+ cancel_work_sync(&t->disconnect_work);
+ cancel_delayed_work_sync(&t->post_recv_credits_work);
+ cancel_work_sync(&t->send_immediate_work);
+
+ if (t->qp) {
+ ib_drain_qp(t->qp);
+ ib_destroy_qp(t->qp);
+ }
+
+ ksmbd_debug(RDMA, "drain the reassembly queue\n");
+ do {
+ spin_lock(&t->reassembly_queue_lock);
+ recvmsg = get_first_reassembly(t);
+ if (recvmsg) {
+ list_del(&recvmsg->list);
+ spin_unlock(&t->reassembly_queue_lock);
+ put_recvmsg(t, recvmsg);
+ } else {
+ spin_unlock(&t->reassembly_queue_lock);
+ }
+ } while (recvmsg);
+ t->reassembly_data_length = 0;
+
+ if (t->send_cq)
+ ib_free_cq(t->send_cq);
+ if (t->recv_cq)
+ ib_free_cq(t->recv_cq);
+ if (t->pd)
+ ib_dealloc_pd(t->pd);
+ if (t->cm_id)
+ rdma_destroy_id(t->cm_id);
+
+ smb_direct_destroy_pools(t);
+ ksmbd_conn_free(KSMBD_TRANS(t)->conn);
+ kfree(t);
+}
+
+static struct smb_direct_sendmsg
+*smb_direct_alloc_sendmsg(struct smb_direct_transport *t)
+{
+ struct smb_direct_sendmsg *msg;
+
+ msg = mempool_alloc(t->sendmsg_mempool, GFP_KERNEL);
+ if (!msg)
+ return ERR_PTR(-ENOMEM);
+ msg->transport = t;
+ INIT_LIST_HEAD(&msg->list);
+ msg->num_sge = 0;
+ return msg;
+}
+
+static void smb_direct_free_sendmsg(struct smb_direct_transport *t,
+ struct smb_direct_sendmsg *msg)
+{
+ int i;
+
+ if (msg->num_sge > 0) {
+ ib_dma_unmap_single(t->cm_id->device,
+ msg->sge[0].addr, msg->sge[0].length,
+ DMA_TO_DEVICE);
+ for (i = 1; i < msg->num_sge; i++)
+ ib_dma_unmap_page(t->cm_id->device,
+ msg->sge[i].addr, msg->sge[i].length,
+ DMA_TO_DEVICE);
+ }
+ mempool_free(msg, t->sendmsg_mempool);
+}
+
+static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg)
+{
+ switch (recvmsg->type) {
+ case SMB_DIRECT_MSG_DATA_TRANSFER: {
+ struct smb_direct_data_transfer *req =
+ (struct smb_direct_data_transfer *)recvmsg->packet;
+ struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet
+ + le32_to_cpu(req->data_offset) - 4);
+ ksmbd_debug(RDMA,
+ "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n",
+ le16_to_cpu(req->credits_granted),
+ le16_to_cpu(req->credits_requested),
+ req->data_length, req->remaining_data_length,
+ hdr->ProtocolId, hdr->Command);
+ break;
+ }
+ case SMB_DIRECT_MSG_NEGOTIATE_REQ: {
+ struct smb_direct_negotiate_req *req =
+ (struct smb_direct_negotiate_req *)recvmsg->packet;
+ ksmbd_debug(RDMA,
+ "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n",
+ le16_to_cpu(req->min_version),
+ le16_to_cpu(req->max_version),
+ le16_to_cpu(req->credits_requested),
+ le32_to_cpu(req->preferred_send_size),
+ le32_to_cpu(req->max_receive_size),
+ le32_to_cpu(req->max_fragmented_size));
+ if (le16_to_cpu(req->min_version) > 0x0100 ||
+ le16_to_cpu(req->max_version) < 0x0100)
+ return -EOPNOTSUPP;
+ if (le16_to_cpu(req->credits_requested) <= 0 ||
+ le32_to_cpu(req->max_receive_size) <= 128 ||
+ le32_to_cpu(req->max_fragmented_size) <=
+ 128 * 1024)
+ return -ECONNABORTED;
+
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct smb_direct_recvmsg *recvmsg;
+ struct smb_direct_transport *t;
+
+ recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe);
+ t = recvmsg->transport;
+
+ if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
+ if (wc->status != IB_WC_WR_FLUSH_ERR) {
+ pr_err("Recv error. status='%s (%d)' opcode=%d\n",
+ ib_wc_status_msg(wc->status), wc->status,
+ wc->opcode);
+ smb_direct_disconnect_rdma_connection(t);
+ }
+ put_empty_recvmsg(t, recvmsg);
+ return;
+ }
+
+ ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n",
+ ib_wc_status_msg(wc->status), wc->status,
+ wc->opcode);
+
+ ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr,
+ recvmsg->sge.length, DMA_FROM_DEVICE);
+
+ switch (recvmsg->type) {
+ case SMB_DIRECT_MSG_NEGOTIATE_REQ:
+ t->negotiation_requested = true;
+ t->full_packet_received = true;
+ wake_up_interruptible(&t->wait_status);
+ break;
+ case SMB_DIRECT_MSG_DATA_TRANSFER: {
+ struct smb_direct_data_transfer *data_transfer =
+ (struct smb_direct_data_transfer *)recvmsg->packet;
+ int data_length = le32_to_cpu(data_transfer->data_length);
+ int avail_recvmsg_count, receive_credits;
+
+ if (data_length) {
+ if (t->full_packet_received)
+ recvmsg->first_segment = true;
+
+ if (le32_to_cpu(data_transfer->remaining_data_length))
+ t->full_packet_received = false;
+ else
+ t->full_packet_received = true;
+
+ enqueue_reassembly(t, recvmsg, data_length);
+ wake_up_interruptible(&t->wait_reassembly_queue);
+
+ spin_lock(&t->receive_credit_lock);
+ receive_credits = --(t->recv_credits);
+ avail_recvmsg_count = t->count_avail_recvmsg;
+ spin_unlock(&t->receive_credit_lock);
+ } else {
+ put_empty_recvmsg(t, recvmsg);
+
+ spin_lock(&t->receive_credit_lock);
+ receive_credits = --(t->recv_credits);
+ avail_recvmsg_count = ++(t->count_avail_recvmsg);
+ spin_unlock(&t->receive_credit_lock);
+ }
+
+ t->recv_credit_target =
+ le16_to_cpu(data_transfer->credits_requested);
+ atomic_add(le16_to_cpu(data_transfer->credits_granted),
+ &t->send_credits);
+
+ if (le16_to_cpu(data_transfer->flags) &
+ SMB_DIRECT_RESPONSE_REQUESTED)
+ queue_work(smb_direct_wq, &t->send_immediate_work);
+
+ if (atomic_read(&t->send_credits) > 0)
+ wake_up_interruptible(&t->wait_send_credits);
+
+ if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count))
+ mod_delayed_work(smb_direct_wq,
+ &t->post_recv_credits_work, 0);
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+static int smb_direct_post_recv(struct smb_direct_transport *t,
+ struct smb_direct_recvmsg *recvmsg)
+{
+ struct ib_recv_wr wr;
+ int ret;
+
+ recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device,
+ recvmsg->packet, t->max_recv_size,
+ DMA_FROM_DEVICE);
+ ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr);
+ if (ret)
+ return ret;
+ recvmsg->sge.length = t->max_recv_size;
+ recvmsg->sge.lkey = t->pd->local_dma_lkey;
+ recvmsg->cqe.done = recv_done;
+
+ wr.wr_cqe = &recvmsg->cqe;
+ wr.next = NULL;
+ wr.sg_list = &recvmsg->sge;
+ wr.num_sge = 1;
+
+ ret = ib_post_recv(t->qp, &wr, NULL);
+ if (ret) {
+ pr_err("Can't post recv: %d\n", ret);
+ ib_dma_unmap_single(t->cm_id->device,
+ recvmsg->sge.addr, recvmsg->sge.length,
+ DMA_FROM_DEVICE);
+ smb_direct_disconnect_rdma_connection(t);
+ return ret;
+ }
+ return ret;
+}
+
+static int smb_direct_read(struct ksmbd_transport *t, char *buf,
+ unsigned int size)
+{
+ struct smb_direct_recvmsg *recvmsg;
+ struct smb_direct_data_transfer *data_transfer;
+ int to_copy, to_read, data_read, offset;
+ u32 data_length, remaining_data_length, data_offset;
+ int rc;
+ struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+
+again:
+ if (st->status != SMB_DIRECT_CS_CONNECTED) {
+ pr_err("disconnected\n");
+ return -ENOTCONN;
+ }
+
+ /*
+ * No need to hold the reassembly queue lock all the time as we are
+ * the only one reading from the front of the queue. The transport
+ * may add more entries to the back of the queue at the same time
+ */
+ if (st->reassembly_data_length >= size) {
+ int queue_length;
+ int queue_removed = 0;
+
+ /*
+ * Need to make sure reassembly_data_length is read before
+ * reading reassembly_queue_length and calling
+ * get_first_reassembly. This call is lock free
+ * as we never read at the end of the queue which are being
+ * updated in SOFTIRQ as more data is received
+ */
+ virt_rmb();
+ queue_length = st->reassembly_queue_length;
+ data_read = 0;
+ to_read = size;
+ offset = st->first_entry_offset;
+ while (data_read < size) {
+ recvmsg = get_first_reassembly(st);
+ data_transfer = smb_direct_recvmsg_payload(recvmsg);
+ data_length = le32_to_cpu(data_transfer->data_length);
+ remaining_data_length =
+ le32_to_cpu(data_transfer->remaining_data_length);
+ data_offset = le32_to_cpu(data_transfer->data_offset);
+
+ /*
+ * The upper layer expects RFC1002 length at the
+ * beginning of the payload. Return it to indicate
+ * the total length of the packet. This minimize the
+ * change to upper layer packet processing logic. This
+ * will be eventually remove when an intermediate
+ * transport layer is added
+ */
+ if (recvmsg->first_segment && size == 4) {
+ unsigned int rfc1002_len =
+ data_length + remaining_data_length;
+ *((__be32 *)buf) = cpu_to_be32(rfc1002_len);
+ data_read = 4;
+ recvmsg->first_segment = false;
+ ksmbd_debug(RDMA,
+ "returning rfc1002 length %d\n",
+ rfc1002_len);
+ goto read_rfc1002_done;
+ }
+
+ to_copy = min_t(int, data_length - offset, to_read);
+ memcpy(buf + data_read, (char *)data_transfer + data_offset + offset,
+ to_copy);
+
+ /* move on to the next buffer? */
+ if (to_copy == data_length - offset) {
+ queue_length--;
+ /*
+ * No need to lock if we are not at the
+ * end of the queue
+ */
+ if (queue_length) {
+ list_del(&recvmsg->list);
+ } else {
+ spin_lock_irq(&st->reassembly_queue_lock);
+ list_del(&recvmsg->list);
+ spin_unlock_irq(&st->reassembly_queue_lock);
+ }
+ queue_removed++;
+ put_recvmsg(st, recvmsg);
+ offset = 0;
+ } else {
+ offset += to_copy;
+ }
+
+ to_read -= to_copy;
+ data_read += to_copy;
+ }
+
+ spin_lock_irq(&st->reassembly_queue_lock);
+ st->reassembly_data_length -= data_read;
+ st->reassembly_queue_length -= queue_removed;
+ spin_unlock_irq(&st->reassembly_queue_lock);
+
+ spin_lock(&st->receive_credit_lock);
+ st->count_avail_recvmsg += queue_removed;
+ if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) {
+ spin_unlock(&st->receive_credit_lock);
+ mod_delayed_work(smb_direct_wq,
+ &st->post_recv_credits_work, 0);
+ } else {
+ spin_unlock(&st->receive_credit_lock);
+ }
+
+ st->first_entry_offset = offset;
+ ksmbd_debug(RDMA,
+ "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n",
+ data_read, st->reassembly_data_length,
+ st->first_entry_offset);
+read_rfc1002_done:
+ return data_read;
+ }
+
+ ksmbd_debug(RDMA, "wait_event on more data\n");
+ rc = wait_event_interruptible(st->wait_reassembly_queue,
+ st->reassembly_data_length >= size ||
+ st->status != SMB_DIRECT_CS_CONNECTED);
+ if (rc)
+ return -EINTR;
+
+ goto again;
+}
+
+static void smb_direct_post_recv_credits(struct work_struct *work)
+{
+ struct smb_direct_transport *t = container_of(work,
+ struct smb_direct_transport, post_recv_credits_work.work);
+ struct smb_direct_recvmsg *recvmsg;
+ int receive_credits, credits = 0;
+ int ret;
+ int use_free = 1;
+
+ spin_lock(&t->receive_credit_lock);
+ receive_credits = t->recv_credits;
+ spin_unlock(&t->receive_credit_lock);
+
+ if (receive_credits < t->recv_credit_target) {
+ while (true) {
+ if (use_free)
+ recvmsg = get_free_recvmsg(t);
+ else
+ recvmsg = get_empty_recvmsg(t);
+ if (!recvmsg) {
+ if (use_free) {
+ use_free = 0;
+ continue;
+ } else {
+ break;
+ }
+ }
+
+ recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER;
+ recvmsg->first_segment = false;
+
+ ret = smb_direct_post_recv(t, recvmsg);
+ if (ret) {
+ pr_err("Can't post recv: %d\n", ret);
+ put_recvmsg(t, recvmsg);
+ break;
+ }
+ credits++;
+ }
+ }
+
+ spin_lock(&t->receive_credit_lock);
+ t->recv_credits += credits;
+ t->count_avail_recvmsg -= credits;
+ spin_unlock(&t->receive_credit_lock);
+
+ spin_lock(&t->lock_new_recv_credits);
+ t->new_recv_credits += credits;
+ spin_unlock(&t->lock_new_recv_credits);
+
+ if (credits)
+ queue_work(smb_direct_wq, &t->send_immediate_work);
+}
+
+static void send_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct smb_direct_sendmsg *sendmsg, *sibling;
+ struct smb_direct_transport *t;
+ struct list_head *pos, *prev, *end;
+
+ sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe);
+ t = sendmsg->transport;
+
+ ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n",
+ ib_wc_status_msg(wc->status), wc->status,
+ wc->opcode);
+
+ if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
+ pr_err("Send error. status='%s (%d)', opcode=%d\n",
+ ib_wc_status_msg(wc->status), wc->status,
+ wc->opcode);
+ smb_direct_disconnect_rdma_connection(t);
+ }
+
+ if (sendmsg->num_sge > 1) {
+ if (atomic_dec_and_test(&t->send_payload_pending))
+ wake_up(&t->wait_send_payload_pending);
+ } else {
+ if (atomic_dec_and_test(&t->send_pending))
+ wake_up(&t->wait_send_pending);
+ }
+
+ /* iterate and free the list of messages in reverse. the list's head
+ * is invalid.
+ */
+ for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next;
+ prev != end; pos = prev, prev = prev->prev) {
+ sibling = container_of(pos, struct smb_direct_sendmsg, list);
+ smb_direct_free_sendmsg(t, sibling);
+ }
+
+ sibling = container_of(pos, struct smb_direct_sendmsg, list);
+ smb_direct_free_sendmsg(t, sibling);
+}
+
+static int manage_credits_prior_sending(struct smb_direct_transport *t)
+{
+ int new_credits;
+
+ spin_lock(&t->lock_new_recv_credits);
+ new_credits = t->new_recv_credits;
+ t->new_recv_credits = 0;
+ spin_unlock(&t->lock_new_recv_credits);
+
+ return new_credits;
+}
+
+static int smb_direct_post_send(struct smb_direct_transport *t,
+ struct ib_send_wr *wr)
+{
+ int ret;
+
+ if (wr->num_sge > 1)
+ atomic_inc(&t->send_payload_pending);
+ else
+ atomic_inc(&t->send_pending);
+
+ ret = ib_post_send(t->qp, wr, NULL);
+ if (ret) {
+ pr_err("failed to post send: %d\n", ret);
+ if (wr->num_sge > 1) {
+ if (atomic_dec_and_test(&t->send_payload_pending))
+ wake_up(&t->wait_send_payload_pending);
+ } else {
+ if (atomic_dec_and_test(&t->send_pending))
+ wake_up(&t->wait_send_pending);
+ }
+ smb_direct_disconnect_rdma_connection(t);
+ }
+ return ret;
+}
+
+static void smb_direct_send_ctx_init(struct smb_direct_transport *t,
+ struct smb_direct_send_ctx *send_ctx,
+ bool need_invalidate_rkey,
+ unsigned int remote_key)
+{
+ INIT_LIST_HEAD(&send_ctx->msg_list);
+ send_ctx->wr_cnt = 0;
+ send_ctx->need_invalidate_rkey = need_invalidate_rkey;
+ send_ctx->remote_key = remote_key;
+}
+
+static int smb_direct_flush_send_list(struct smb_direct_transport *t,
+ struct smb_direct_send_ctx *send_ctx,
+ bool is_last)
+{
+ struct smb_direct_sendmsg *first, *last;
+ int ret;
+
+ if (list_empty(&send_ctx->msg_list))
+ return 0;
+
+ first = list_first_entry(&send_ctx->msg_list,
+ struct smb_direct_sendmsg,
+ list);
+ last = list_last_entry(&send_ctx->msg_list,
+ struct smb_direct_sendmsg,
+ list);
+
+ last->wr.send_flags = IB_SEND_SIGNALED;
+ last->wr.wr_cqe = &last->cqe;
+ if (is_last && send_ctx->need_invalidate_rkey) {
+ last->wr.opcode = IB_WR_SEND_WITH_INV;
+ last->wr.ex.invalidate_rkey = send_ctx->remote_key;
+ }
+
+ ret = smb_direct_post_send(t, &first->wr);
+ if (!ret) {
+ smb_direct_send_ctx_init(t, send_ctx,
+ send_ctx->need_invalidate_rkey,
+ send_ctx->remote_key);
+ } else {
+ atomic_add(send_ctx->wr_cnt, &t->send_credits);
+ wake_up(&t->wait_send_credits);
+ list_for_each_entry_safe(first, last, &send_ctx->msg_list,
+ list) {
+ smb_direct_free_sendmsg(t, first);
+ }
+ }
+ return ret;
+}
+
+static int wait_for_credits(struct smb_direct_transport *t,
+ wait_queue_head_t *waitq, atomic_t *credits)
+{
+ int ret;
+
+ do {
+ if (atomic_dec_return(credits) >= 0)
+ return 0;
+
+ atomic_inc(credits);
+ ret = wait_event_interruptible(*waitq,
+ atomic_read(credits) > 0 ||
+ t->status != SMB_DIRECT_CS_CONNECTED);
+
+ if (t->status != SMB_DIRECT_CS_CONNECTED)
+ return -ENOTCONN;
+ else if (ret < 0)
+ return ret;
+ } while (true);
+}
+
+static int wait_for_send_credits(struct smb_direct_transport *t,
+ struct smb_direct_send_ctx *send_ctx)
+{
+ int ret;
+
+ if (send_ctx &&
+ (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) {
+ ret = smb_direct_flush_send_list(t, send_ctx, false);
+ if (ret)
+ return ret;
+ }
+
+ return wait_for_credits(t, &t->wait_send_credits, &t->send_credits);
+}
+
+static int smb_direct_create_header(struct smb_direct_transport *t,
+ int size, int remaining_data_length,
+ struct smb_direct_sendmsg **sendmsg_out)
+{
+ struct smb_direct_sendmsg *sendmsg;
+ struct smb_direct_data_transfer *packet;
+ int header_length;
+ int ret;
+
+ sendmsg = smb_direct_alloc_sendmsg(t);
+ if (IS_ERR(sendmsg))
+ return PTR_ERR(sendmsg);
+
+ /* Fill in the packet header */
+ packet = (struct smb_direct_data_transfer *)sendmsg->packet;
+ packet->credits_requested = cpu_to_le16(t->send_credit_target);
+ packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
+
+ packet->flags = 0;
+ packet->reserved = 0;
+ if (!size)
+ packet->data_offset = 0;
+ else
+ packet->data_offset = cpu_to_le32(24);
+ packet->data_length = cpu_to_le32(size);
+ packet->remaining_data_length = cpu_to_le32(remaining_data_length);
+ packet->padding = 0;
+
+ ksmbd_debug(RDMA,
+ "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n",
+ le16_to_cpu(packet->credits_requested),
+ le16_to_cpu(packet->credits_granted),
+ le32_to_cpu(packet->data_offset),
+ le32_to_cpu(packet->data_length),
+ le32_to_cpu(packet->remaining_data_length));
+
+ /* Map the packet to DMA */
+ header_length = sizeof(struct smb_direct_data_transfer);
+ /* If this is a packet without payload, don't send padding */
+ if (!size)
+ header_length =
+ offsetof(struct smb_direct_data_transfer, padding);
+
+ sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
+ (void *)packet,
+ header_length,
+ DMA_TO_DEVICE);
+ ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
+ if (ret) {
+ smb_direct_free_sendmsg(t, sendmsg);
+ return ret;
+ }
+
+ sendmsg->num_sge = 1;
+ sendmsg->sge[0].length = header_length;
+ sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
+
+ *sendmsg_out = sendmsg;
+ return 0;
+}
+
+static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries)
+{
+ bool high = is_vmalloc_addr(buf);
+ struct page *page;
+ int offset, len;
+ int i = 0;
+
+ if (nentries < get_buf_page_count(buf, size))
+ return -EINVAL;
+
+ offset = offset_in_page(buf);
+ buf -= offset;
+ while (size > 0) {
+ len = min_t(int, PAGE_SIZE - offset, size);
+ if (high)
+ page = vmalloc_to_page(buf);
+ else
+ page = kmap_to_page(buf);
+
+ if (!sg_list)
+ return -EINVAL;
+ sg_set_page(sg_list, page, len, offset);
+ sg_list = sg_next(sg_list);
+
+ buf += PAGE_SIZE;
+ size -= len;
+ offset = 0;
+ i++;
+ }
+ return i;
+}
+
+static int get_mapped_sg_list(struct ib_device *device, void *buf, int size,
+ struct scatterlist *sg_list, int nentries,
+ enum dma_data_direction dir)
+{
+ int npages;
+
+ npages = get_sg_list(buf, size, sg_list, nentries);
+ if (npages <= 0)
+ return -EINVAL;
+ return ib_dma_map_sg(device, sg_list, npages, dir);
+}
+
+static int post_sendmsg(struct smb_direct_transport *t,
+ struct smb_direct_send_ctx *send_ctx,
+ struct smb_direct_sendmsg *msg)
+{
+ int i;
+
+ for (i = 0; i < msg->num_sge; i++)
+ ib_dma_sync_single_for_device(t->cm_id->device,
+ msg->sge[i].addr, msg->sge[i].length,
+ DMA_TO_DEVICE);
+
+ msg->cqe.done = send_done;
+ msg->wr.opcode = IB_WR_SEND;
+ msg->wr.sg_list = &msg->sge[0];
+ msg->wr.num_sge = msg->num_sge;
+ msg->wr.next = NULL;
+
+ if (send_ctx) {
+ msg->wr.wr_cqe = NULL;
+ msg->wr.send_flags = 0;
+ if (!list_empty(&send_ctx->msg_list)) {
+ struct smb_direct_sendmsg *last;
+
+ last = list_last_entry(&send_ctx->msg_list,
+ struct smb_direct_sendmsg,
+ list);
+ last->wr.next = &msg->wr;
+ }
+ list_add_tail(&msg->list, &send_ctx->msg_list);
+ send_ctx->wr_cnt++;
+ return 0;
+ }
+
+ msg->wr.wr_cqe = &msg->cqe;
+ msg->wr.send_flags = IB_SEND_SIGNALED;
+ return smb_direct_post_send(t, &msg->wr);
+}
+
+static int smb_direct_post_send_data(struct smb_direct_transport *t,
+ struct smb_direct_send_ctx *send_ctx,
+ struct kvec *iov, int niov,
+ int remaining_data_length)
+{
+ int i, j, ret;
+ struct smb_direct_sendmsg *msg;
+ int data_length;
+ struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1];
+
+ ret = wait_for_send_credits(t, send_ctx);
+ if (ret)
+ return ret;
+
+ data_length = 0;
+ for (i = 0; i < niov; i++)
+ data_length += iov[i].iov_len;
+
+ ret = smb_direct_create_header(t, data_length, remaining_data_length,
+ &msg);
+ if (ret) {
+ atomic_inc(&t->send_credits);
+ return ret;
+ }
+
+ for (i = 0; i < niov; i++) {
+ struct ib_sge *sge;
+ int sg_cnt;
+
+ sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1);
+ sg_cnt = get_mapped_sg_list(t->cm_id->device,
+ iov[i].iov_base, iov[i].iov_len,
+ sg, SMB_DIRECT_MAX_SEND_SGES - 1,
+ DMA_TO_DEVICE);
+ if (sg_cnt <= 0) {
+ pr_err("failed to map buffer\n");
+ ret = -ENOMEM;
+ goto err;
+ } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES - 1) {
+ pr_err("buffer not fitted into sges\n");
+ ret = -E2BIG;
+ ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt,
+ DMA_TO_DEVICE);
+ goto err;
+ }
+
+ for (j = 0; j < sg_cnt; j++) {
+ sge = &msg->sge[msg->num_sge];
+ sge->addr = sg_dma_address(&sg[j]);
+ sge->length = sg_dma_len(&sg[j]);
+ sge->lkey = t->pd->local_dma_lkey;
+ msg->num_sge++;
+ }
+ }
+
+ ret = post_sendmsg(t, send_ctx, msg);
+ if (ret)
+ goto err;
+ return 0;
+err:
+ smb_direct_free_sendmsg(t, msg);
+ atomic_inc(&t->send_credits);
+ return ret;
+}
+
+static int smb_direct_writev(struct ksmbd_transport *t,
+ struct kvec *iov, int niovs, int buflen,
+ bool need_invalidate, unsigned int remote_key)
+{
+ struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+ int remaining_data_length;
+ int start, i, j;
+ int max_iov_size = st->max_send_size -
+ sizeof(struct smb_direct_data_transfer);
+ int ret;
+ struct kvec vec;
+ struct smb_direct_send_ctx send_ctx;
+
+ if (st->status != SMB_DIRECT_CS_CONNECTED)
+ return -ENOTCONN;
+
+ //FIXME: skip RFC1002 header..
+ buflen -= 4;
+ iov[0].iov_base += 4;
+ iov[0].iov_len -= 4;
+
+ remaining_data_length = buflen;
+ ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
+
+ smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key);
+ start = i = 0;
+ buflen = 0;
+ while (true) {
+ buflen += iov[i].iov_len;
+ if (buflen > max_iov_size) {
+ if (i > start) {
+ remaining_data_length -=
+ (buflen - iov[i].iov_len);
+ ret = smb_direct_post_send_data(st, &send_ctx,
+ &iov[start], i - start,
+ remaining_data_length);
+ if (ret)
+ goto done;
+ } else {
+ /* iov[start] is too big, break it */
+ int nvec = (buflen + max_iov_size - 1) /
+ max_iov_size;
+
+ for (j = 0; j < nvec; j++) {
+ vec.iov_base =
+ (char *)iov[start].iov_base +
+ j * max_iov_size;
+ vec.iov_len =
+ min_t(int, max_iov_size,
+ buflen - max_iov_size * j);
+ remaining_data_length -= vec.iov_len;
+ ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1,
+ remaining_data_length);
+ if (ret)
+ goto done;
+ }
+ i++;
+ if (i == niovs)
+ break;
+ }
+ start = i;
+ buflen = 0;
+ } else {
+ i++;
+ if (i == niovs) {
+ /* send out all remaining vecs */
+ remaining_data_length -= buflen;
+ ret = smb_direct_post_send_data(st, &send_ctx,
+ &iov[start], i - start,
+ remaining_data_length);
+ if (ret)
+ goto done;
+ break;
+ }
+ }
+ }
+
+done:
+ ret = smb_direct_flush_send_list(st, &send_ctx, true);
+
+ /*
+ * As an optimization, we don't wait for individual I/O to finish
+ * before sending the next one.
+ * Send them all and wait for pending send count to get to 0
+ * that means all the I/Os have been out and we are good to return
+ */
+
+ wait_event(st->wait_send_payload_pending,
+ atomic_read(&st->send_payload_pending) == 0);
+ return ret;
+}
+
+static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
+ enum dma_data_direction dir)
+{
+ struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe,
+ struct smb_direct_rdma_rw_msg, cqe);
+ struct smb_direct_transport *t = msg->t;
+
+ if (wc->status != IB_WC_SUCCESS) {
+ pr_err("read/write error. opcode = %d, status = %s(%d)\n",
+ wc->opcode, ib_wc_status_msg(wc->status), wc->status);
+ smb_direct_disconnect_rdma_connection(t);
+ }
+
+ if (atomic_inc_return(&t->rw_avail_ops) > 0)
+ wake_up(&t->wait_rw_avail_ops);
+
+ rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
+ msg->sg_list, msg->sgt.nents, dir);
+ sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
+ complete(msg->completion);
+ kfree(msg);
+}
+
+static void read_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ read_write_done(cq, wc, DMA_FROM_DEVICE);
+}
+
+static void write_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ read_write_done(cq, wc, DMA_TO_DEVICE);
+}
+
+static int smb_direct_rdma_xmit(struct smb_direct_transport *t, void *buf,
+ int buf_len, u32 remote_key, u64 remote_offset,
+ u32 remote_len, bool is_read)
+{
+ struct smb_direct_rdma_rw_msg *msg;
+ int ret;
+ DECLARE_COMPLETION_ONSTACK(completion);
+ struct ib_send_wr *first_wr = NULL;
+
+ ret = wait_for_credits(t, &t->wait_rw_avail_ops, &t->rw_avail_ops);
+ if (ret < 0)
+ return ret;
+
+ /* TODO: mempool */
+ msg = kmalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) +
+ sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL);
+ if (!msg) {
+ atomic_inc(&t->rw_avail_ops);
+ return -ENOMEM;
+ }
+
+ msg->sgt.sgl = &msg->sg_list[0];
+ ret = sg_alloc_table_chained(&msg->sgt,
+ get_buf_page_count(buf, buf_len),
+ msg->sg_list, SG_CHUNK_SIZE);
+ if (ret) {
+ atomic_inc(&t->rw_avail_ops);
+ kfree(msg);
+ return -ENOMEM;
+ }
+
+ ret = get_sg_list(buf, buf_len, msg->sgt.sgl, msg->sgt.orig_nents);
+ if (ret <= 0) {
+ pr_err("failed to get pages\n");
+ goto err;
+ }
+
+ ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port,
+ msg->sg_list, get_buf_page_count(buf, buf_len),
+ 0, remote_offset, remote_key,
+ is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ if (ret < 0) {
+ pr_err("failed to init rdma_rw_ctx: %d\n", ret);
+ goto err;
+ }
+
+ msg->t = t;
+ msg->cqe.done = is_read ? read_done : write_done;
+ msg->completion = &completion;
+ first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port,
+ &msg->cqe, NULL);
+
+ ret = ib_post_send(t->qp, first_wr, NULL);
+ if (ret) {
+ pr_err("failed to post send wr: %d\n", ret);
+ goto err;
+ }
+
+ wait_for_completion(&completion);
+ return 0;
+
+err:
+ atomic_inc(&t->rw_avail_ops);
+ if (first_wr)
+ rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
+ msg->sg_list, msg->sgt.nents,
+ is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
+ kfree(msg);
+ return ret;
+}
+
+static int smb_direct_rdma_write(struct ksmbd_transport *t, void *buf,
+ unsigned int buflen, u32 remote_key,
+ u64 remote_offset, u32 remote_len)
+{
+ return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
+ remote_key, remote_offset,
+ remote_len, false);
+}
+
+static int smb_direct_rdma_read(struct ksmbd_transport *t, void *buf,
+ unsigned int buflen, u32 remote_key,
+ u64 remote_offset, u32 remote_len)
+{
+ return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
+ remote_key, remote_offset,
+ remote_len, true);
+}
+
+static void smb_direct_disconnect(struct ksmbd_transport *t)
+{
+ struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+
+ ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id);
+
+ smb_direct_disconnect_rdma_work(&st->disconnect_work);
+ wait_event_interruptible(st->wait_status,
+ st->status == SMB_DIRECT_CS_DISCONNECTED);
+ free_transport(st);
+}
+
+static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event)
+{
+ struct smb_direct_transport *t = cm_id->context;
+
+ ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n",
+ cm_id, rdma_event_msg(event->event), event->event);
+
+ switch (event->event) {
+ case RDMA_CM_EVENT_ESTABLISHED: {
+ t->status = SMB_DIRECT_CS_CONNECTED;
+ wake_up_interruptible(&t->wait_status);
+ break;
+ }
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ case RDMA_CM_EVENT_DISCONNECTED: {
+ t->status = SMB_DIRECT_CS_DISCONNECTED;
+ wake_up_interruptible(&t->wait_status);
+ wake_up_interruptible(&t->wait_reassembly_queue);
+ wake_up(&t->wait_send_credits);
+ break;
+ }
+ case RDMA_CM_EVENT_CONNECT_ERROR: {
+ t->status = SMB_DIRECT_CS_DISCONNECTED;
+ wake_up_interruptible(&t->wait_status);
+ break;
+ }
+ default:
+ pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n",
+ cm_id, rdma_event_msg(event->event),
+ event->event);
+ break;
+ }
+ return 0;
+}
+
+static void smb_direct_qpair_handler(struct ib_event *event, void *context)
+{
+ struct smb_direct_transport *t = context;
+
+ ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n",
+ t->cm_id, ib_event_msg(event->event), event->event);
+
+ switch (event->event) {
+ case IB_EVENT_CQ_ERR:
+ case IB_EVENT_QP_FATAL:
+ smb_direct_disconnect_rdma_connection(t);
+ break;
+ default:
+ break;
+ }
+}
+
+static int smb_direct_send_negotiate_response(struct smb_direct_transport *t,
+ int failed)
+{
+ struct smb_direct_sendmsg *sendmsg;
+ struct smb_direct_negotiate_resp *resp;
+ int ret;
+
+ sendmsg = smb_direct_alloc_sendmsg(t);
+ if (IS_ERR(sendmsg))
+ return -ENOMEM;
+
+ resp = (struct smb_direct_negotiate_resp *)sendmsg->packet;
+ if (failed) {
+ memset(resp, 0, sizeof(*resp));
+ resp->min_version = cpu_to_le16(0x0100);
+ resp->max_version = cpu_to_le16(0x0100);
+ resp->status = STATUS_NOT_SUPPORTED;
+ } else {
+ resp->status = STATUS_SUCCESS;
+ resp->min_version = SMB_DIRECT_VERSION_LE;
+ resp->max_version = SMB_DIRECT_VERSION_LE;
+ resp->negotiated_version = SMB_DIRECT_VERSION_LE;
+ resp->reserved = 0;
+ resp->credits_requested =
+ cpu_to_le16(t->send_credit_target);
+ resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
+ resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size);
+ resp->preferred_send_size = cpu_to_le32(t->max_send_size);
+ resp->max_receive_size = cpu_to_le32(t->max_recv_size);
+ resp->max_fragmented_size =
+ cpu_to_le32(t->max_fragmented_recv_size);
+ }
+
+ sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
+ (void *)resp, sizeof(*resp),
+ DMA_TO_DEVICE);
+ ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
+ if (ret) {
+ smb_direct_free_sendmsg(t, sendmsg);
+ return ret;
+ }
+
+ sendmsg->num_sge = 1;
+ sendmsg->sge[0].length = sizeof(*resp);
+ sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
+
+ ret = post_sendmsg(t, NULL, sendmsg);
+ if (ret) {
+ smb_direct_free_sendmsg(t, sendmsg);
+ return ret;
+ }
+
+ wait_event(t->wait_send_pending,
+ atomic_read(&t->send_pending) == 0);
+ return 0;
+}
+
+static int smb_direct_accept_client(struct smb_direct_transport *t)
+{
+ struct rdma_conn_param conn_param;
+ struct ib_port_immutable port_immutable;
+ u32 ird_ord_hdr[2];
+ int ret;
+
+ memset(&conn_param, 0, sizeof(conn_param));
+ conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom,
+ SMB_DIRECT_CM_INITIATOR_DEPTH);
+ conn_param.responder_resources = 0;
+
+ t->cm_id->device->ops.get_port_immutable(t->cm_id->device,
+ t->cm_id->port_num,
+ &port_immutable);
+ if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
+ ird_ord_hdr[0] = conn_param.responder_resources;
+ ird_ord_hdr[1] = 1;
+ conn_param.private_data = ird_ord_hdr;
+ conn_param.private_data_len = sizeof(ird_ord_hdr);
+ } else {
+ conn_param.private_data = NULL;
+ conn_param.private_data_len = 0;
+ }
+ conn_param.retry_count = SMB_DIRECT_CM_RETRY;
+ conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY;
+ conn_param.flow_control = 0;
+
+ ret = rdma_accept(t->cm_id, &conn_param);
+ if (ret) {
+ pr_err("error at rdma_accept: %d\n", ret);
+ return ret;
+ }
+
+ wait_event_interruptible(t->wait_status,
+ t->status != SMB_DIRECT_CS_NEW);
+ if (t->status != SMB_DIRECT_CS_CONNECTED)
+ return -ENOTCONN;
+ return 0;
+}
+
+static int smb_direct_negotiate(struct smb_direct_transport *t)
+{
+ int ret;
+ struct smb_direct_recvmsg *recvmsg;
+ struct smb_direct_negotiate_req *req;
+
+ recvmsg = get_free_recvmsg(t);
+ if (!recvmsg)
+ return -ENOMEM;
+ recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ;
+
+ ret = smb_direct_post_recv(t, recvmsg);
+ if (ret) {
+ pr_err("Can't post recv: %d\n", ret);
+ goto out;
+ }
+
+ t->negotiation_requested = false;
+ ret = smb_direct_accept_client(t);
+ if (ret) {
+ pr_err("Can't accept client\n");
+ goto out;
+ }
+
+ smb_direct_post_recv_credits(&t->post_recv_credits_work.work);
+
+ ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
+ ret = wait_event_interruptible_timeout(t->wait_status,
+ t->negotiation_requested ||
+ t->status == SMB_DIRECT_CS_DISCONNECTED,
+ SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
+ if (ret <= 0 || t->status == SMB_DIRECT_CS_DISCONNECTED) {
+ ret = ret < 0 ? ret : -ETIMEDOUT;
+ goto out;
+ }
+
+ ret = smb_direct_check_recvmsg(recvmsg);
+ if (ret == -ECONNABORTED)
+ goto out;
+
+ req = (struct smb_direct_negotiate_req *)recvmsg->packet;
+ t->max_recv_size = min_t(int, t->max_recv_size,
+ le32_to_cpu(req->preferred_send_size));
+ t->max_send_size = min_t(int, t->max_send_size,
+ le32_to_cpu(req->max_receive_size));
+ t->max_fragmented_send_size =
+ le32_to_cpu(req->max_fragmented_size);
+
+ ret = smb_direct_send_negotiate_response(t, ret);
+out:
+ if (recvmsg)
+ put_recvmsg(t, recvmsg);
+ return ret;
+}
+
+static int smb_direct_init_params(struct smb_direct_transport *t,
+ struct ib_qp_cap *cap)
+{
+ struct ib_device *device = t->cm_id->device;
+ int max_send_sges, max_pages, max_rw_wrs, max_send_wrs;
+
+ /* need 2 more sge. because a SMB_DIRECT header will be mapped,
+ * and maybe a send buffer could be not page aligned.
+ */
+ t->max_send_size = smb_direct_max_send_size;
+ max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 2;
+ if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) {
+ pr_err("max_send_size %d is too large\n", t->max_send_size);
+ return -EINVAL;
+ }
+
+ /*
+ * allow smb_direct_max_outstanding_rw_ops of in-flight RDMA
+ * read/writes. HCA guarantees at least max_send_sge of sges for
+ * a RDMA read/write work request, and if memory registration is used,
+ * we need reg_mr, local_inv wrs for each read/write.
+ */
+ t->max_rdma_rw_size = smb_direct_max_read_write_size;
+ max_pages = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
+ max_rw_wrs = DIV_ROUND_UP(max_pages, SMB_DIRECT_MAX_SEND_SGES);
+ max_rw_wrs += rdma_rw_mr_factor(device, t->cm_id->port_num,
+ max_pages) * 2;
+ max_rw_wrs *= smb_direct_max_outstanding_rw_ops;
+
+ max_send_wrs = smb_direct_send_credit_target + max_rw_wrs;
+ if (max_send_wrs > device->attrs.max_cqe ||
+ max_send_wrs > device->attrs.max_qp_wr) {
+ pr_err("consider lowering send_credit_target = %d, or max_outstanding_rw_ops = %d\n",
+ smb_direct_send_credit_target,
+ smb_direct_max_outstanding_rw_ops);
+ pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
+ device->attrs.max_cqe, device->attrs.max_qp_wr);
+ return -EINVAL;
+ }
+
+ if (smb_direct_receive_credit_max > device->attrs.max_cqe ||
+ smb_direct_receive_credit_max > device->attrs.max_qp_wr) {
+ pr_err("consider lowering receive_credit_max = %d\n",
+ smb_direct_receive_credit_max);
+ pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
+ device->attrs.max_cqe, device->attrs.max_qp_wr);
+ return -EINVAL;
+ }
+
+ if (device->attrs.max_send_sge < SMB_DIRECT_MAX_SEND_SGES) {
+ pr_err("warning: device max_send_sge = %d too small\n",
+ device->attrs.max_send_sge);
+ return -EINVAL;
+ }
+ if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) {
+ pr_err("warning: device max_recv_sge = %d too small\n",
+ device->attrs.max_recv_sge);
+ return -EINVAL;
+ }
+
+ t->recv_credits = 0;
+ t->count_avail_recvmsg = 0;
+
+ t->recv_credit_max = smb_direct_receive_credit_max;
+ t->recv_credit_target = 10;
+ t->new_recv_credits = 0;
+
+ t->send_credit_target = smb_direct_send_credit_target;
+ atomic_set(&t->send_credits, 0);
+ atomic_set(&t->rw_avail_ops, smb_direct_max_outstanding_rw_ops);
+
+ t->max_send_size = smb_direct_max_send_size;
+ t->max_recv_size = smb_direct_max_receive_size;
+ t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size;
+
+ cap->max_send_wr = max_send_wrs;
+ cap->max_recv_wr = t->recv_credit_max;
+ cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES;
+ cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
+ cap->max_inline_data = 0;
+ cap->max_rdma_ctxs = 0;
+ return 0;
+}
+
+static void smb_direct_destroy_pools(struct smb_direct_transport *t)
+{
+ struct smb_direct_recvmsg *recvmsg;
+
+ while ((recvmsg = get_free_recvmsg(t)))
+ mempool_free(recvmsg, t->recvmsg_mempool);
+ while ((recvmsg = get_empty_recvmsg(t)))
+ mempool_free(recvmsg, t->recvmsg_mempool);
+
+ mempool_destroy(t->recvmsg_mempool);
+ t->recvmsg_mempool = NULL;
+
+ kmem_cache_destroy(t->recvmsg_cache);
+ t->recvmsg_cache = NULL;
+
+ mempool_destroy(t->sendmsg_mempool);
+ t->sendmsg_mempool = NULL;
+
+ kmem_cache_destroy(t->sendmsg_cache);
+ t->sendmsg_cache = NULL;
+}
+
+static int smb_direct_create_pools(struct smb_direct_transport *t)
+{
+ char name[80];
+ int i;
+ struct smb_direct_recvmsg *recvmsg;
+
+ snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t);
+ t->sendmsg_cache = kmem_cache_create(name,
+ sizeof(struct smb_direct_sendmsg) +
+ sizeof(struct smb_direct_negotiate_resp),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!t->sendmsg_cache)
+ return -ENOMEM;
+
+ t->sendmsg_mempool = mempool_create(t->send_credit_target,
+ mempool_alloc_slab, mempool_free_slab,
+ t->sendmsg_cache);
+ if (!t->sendmsg_mempool)
+ goto err;
+
+ snprintf(name, sizeof(name), "smb_direct_resp_%p", t);
+ t->recvmsg_cache = kmem_cache_create(name,
+ sizeof(struct smb_direct_recvmsg) +
+ t->max_recv_size,
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!t->recvmsg_cache)
+ goto err;
+
+ t->recvmsg_mempool =
+ mempool_create(t->recv_credit_max, mempool_alloc_slab,
+ mempool_free_slab, t->recvmsg_cache);
+ if (!t->recvmsg_mempool)
+ goto err;
+
+ INIT_LIST_HEAD(&t->recvmsg_queue);
+
+ for (i = 0; i < t->recv_credit_max; i++) {
+ recvmsg = mempool_alloc(t->recvmsg_mempool, GFP_KERNEL);
+ if (!recvmsg)
+ goto err;
+ recvmsg->transport = t;
+ list_add(&recvmsg->list, &t->recvmsg_queue);
+ }
+ t->count_avail_recvmsg = t->recv_credit_max;
+
+ return 0;
+err:
+ smb_direct_destroy_pools(t);
+ return -ENOMEM;
+}
+
+static int smb_direct_create_qpair(struct smb_direct_transport *t,
+ struct ib_qp_cap *cap)
+{
+ int ret;
+ struct ib_qp_init_attr qp_attr;
+
+ t->pd = ib_alloc_pd(t->cm_id->device, 0);
+ if (IS_ERR(t->pd)) {
+ pr_err("Can't create RDMA PD\n");
+ ret = PTR_ERR(t->pd);
+ t->pd = NULL;
+ return ret;
+ }
+
+ t->send_cq = ib_alloc_cq(t->cm_id->device, t,
+ t->send_credit_target, 0, IB_POLL_WORKQUEUE);
+ if (IS_ERR(t->send_cq)) {
+ pr_err("Can't create RDMA send CQ\n");
+ ret = PTR_ERR(t->send_cq);
+ t->send_cq = NULL;
+ goto err;
+ }
+
+ t->recv_cq = ib_alloc_cq(t->cm_id->device, t,
+ cap->max_send_wr + cap->max_rdma_ctxs,
+ 0, IB_POLL_WORKQUEUE);
+ if (IS_ERR(t->recv_cq)) {
+ pr_err("Can't create RDMA recv CQ\n");
+ ret = PTR_ERR(t->recv_cq);
+ t->recv_cq = NULL;
+ goto err;
+ }
+
+ memset(&qp_attr, 0, sizeof(qp_attr));
+ qp_attr.event_handler = smb_direct_qpair_handler;
+ qp_attr.qp_context = t;
+ qp_attr.cap = *cap;
+ qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+ qp_attr.qp_type = IB_QPT_RC;
+ qp_attr.send_cq = t->send_cq;
+ qp_attr.recv_cq = t->recv_cq;
+ qp_attr.port_num = ~0;
+
+ ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr);
+ if (ret) {
+ pr_err("Can't create RDMA QP: %d\n", ret);
+ goto err;
+ }
+
+ t->qp = t->cm_id->qp;
+ t->cm_id->event_handler = smb_direct_cm_handler;
+
+ return 0;
+err:
+ if (t->qp) {
+ ib_destroy_qp(t->qp);
+ t->qp = NULL;
+ }
+ if (t->recv_cq) {
+ ib_destroy_cq(t->recv_cq);
+ t->recv_cq = NULL;
+ }
+ if (t->send_cq) {
+ ib_destroy_cq(t->send_cq);
+ t->send_cq = NULL;
+ }
+ if (t->pd) {
+ ib_dealloc_pd(t->pd);
+ t->pd = NULL;
+ }
+ return ret;
+}
+
+static int smb_direct_prepare(struct ksmbd_transport *t)
+{
+ struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+ int ret;
+ struct ib_qp_cap qp_cap;
+
+ ret = smb_direct_init_params(st, &qp_cap);
+ if (ret) {
+ pr_err("Can't configure RDMA parameters\n");
+ return ret;
+ }
+
+ ret = smb_direct_create_pools(st);
+ if (ret) {
+ pr_err("Can't init RDMA pool: %d\n", ret);
+ return ret;
+ }
+
+ ret = smb_direct_create_qpair(st, &qp_cap);
+ if (ret) {
+ pr_err("Can't accept RDMA client: %d\n", ret);
+ return ret;
+ }
+
+ ret = smb_direct_negotiate(st);
+ if (ret) {
+ pr_err("Can't negotiate: %d\n", ret);
+ return ret;
+ }
+
+ st->status = SMB_DIRECT_CS_CONNECTED;
+ return 0;
+}
+
+static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
+{
+ if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
+ return false;
+ if (attrs->max_fast_reg_page_list_len == 0)
+ return false;
+ return true;
+}
+
+static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
+{
+ struct smb_direct_transport *t;
+
+ if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
+ ksmbd_debug(RDMA,
+ "Fast Registration Work Requests is not supported. device capabilities=%llx\n",
+ new_cm_id->device->attrs.device_cap_flags);
+ return -EPROTONOSUPPORT;
+ }
+
+ t = alloc_transport(new_cm_id);
+ if (!t)
+ return -ENOMEM;
+
+ KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop,
+ KSMBD_TRANS(t)->conn, "ksmbd:r%u",
+ SMB_DIRECT_PORT);
+ if (IS_ERR(KSMBD_TRANS(t)->handler)) {
+ int ret = PTR_ERR(KSMBD_TRANS(t)->handler);
+
+ pr_err("Can't start thread\n");
+ free_transport(t);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event)
+{
+ switch (event->event) {
+ case RDMA_CM_EVENT_CONNECT_REQUEST: {
+ int ret = smb_direct_handle_connect_request(cm_id);
+
+ if (ret) {
+ pr_err("Can't create transport: %d\n", ret);
+ return ret;
+ }
+
+ ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n",
+ cm_id);
+ break;
+ }
+ default:
+ pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n",
+ cm_id, rdma_event_msg(event->event), event->event);
+ break;
+ }
+ return 0;
+}
+
+static int smb_direct_listen(int port)
+{
+ int ret;
+ struct rdma_cm_id *cm_id;
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_ANY),
+ .sin_port = htons(port),
+ };
+
+ cm_id = rdma_create_id(&init_net, smb_direct_listen_handler,
+ &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC);
+ if (IS_ERR(cm_id)) {
+ pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id));
+ return PTR_ERR(cm_id);
+ }
+
+ ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
+ if (ret) {
+ pr_err("Can't bind: %d\n", ret);
+ goto err;
+ }
+
+ smb_direct_listener.cm_id = cm_id;
+
+ ret = rdma_listen(cm_id, 10);
+ if (ret) {
+ pr_err("Can't listen: %d\n", ret);
+ goto err;
+ }
+ return 0;
+err:
+ smb_direct_listener.cm_id = NULL;
+ rdma_destroy_id(cm_id);
+ return ret;
+}
+
+int ksmbd_rdma_init(void)
+{
+ int ret;
+
+ smb_direct_listener.cm_id = NULL;
+
+ /* When a client is running out of send credits, the credits are
+ * granted by the server's sending a packet using this queue.
+ * This avoids the situation that a clients cannot send packets
+ * for lack of credits
+ */
+ smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq",
+ WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
+ if (!smb_direct_wq)
+ return -ENOMEM;
+
+ ret = smb_direct_listen(SMB_DIRECT_PORT);
+ if (ret) {
+ destroy_workqueue(smb_direct_wq);
+ smb_direct_wq = NULL;
+ pr_err("Can't listen: %d\n", ret);
+ return ret;
+ }
+
+ ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n",
+ smb_direct_listener.cm_id);
+ return 0;
+}
+
+int ksmbd_rdma_destroy(void)
+{
+ if (smb_direct_listener.cm_id)
+ rdma_destroy_id(smb_direct_listener.cm_id);
+ smb_direct_listener.cm_id = NULL;
+
+ if (smb_direct_wq) {
+ flush_workqueue(smb_direct_wq);
+ destroy_workqueue(smb_direct_wq);
+ smb_direct_wq = NULL;
+ }
+ return 0;
+}
+
+bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
+{
+ struct ib_device *ibdev;
+ bool rdma_capable = false;
+
+ ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
+ if (ibdev) {
+ if (rdma_frwr_is_supported(&ibdev->attrs))
+ rdma_capable = true;
+ ib_device_put(ibdev);
+ }
+ return rdma_capable;
+}
+
+static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
+ .prepare = smb_direct_prepare,
+ .disconnect = smb_direct_disconnect,
+ .writev = smb_direct_writev,
+ .read = smb_direct_read,
+ .rdma_read = smb_direct_rdma_read,
+ .rdma_write = smb_direct_rdma_write,
+};
diff --git a/fs/ksmbd/transport_rdma.h b/fs/ksmbd/transport_rdma.h
new file mode 100644
index 000000000000..0fa8adc0776f
--- /dev/null
+++ b/fs/ksmbd/transport_rdma.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2017, Microsoft Corporation.
+ * Copyright (C) 2018, LG Electronics.
+ */
+
+#ifndef __KSMBD_TRANSPORT_RDMA_H__
+#define __KSMBD_TRANSPORT_RDMA_H__
+
+#define SMB_DIRECT_PORT 5445
+
+/* SMB DIRECT negotiation request packet [MS-SMBD] 2.2.1 */
+struct smb_direct_negotiate_req {
+ __le16 min_version;
+ __le16 max_version;
+ __le16 reserved;
+ __le16 credits_requested;
+ __le32 preferred_send_size;
+ __le32 max_receive_size;
+ __le32 max_fragmented_size;
+} __packed;
+
+/* SMB DIRECT negotiation response packet [MS-SMBD] 2.2.2 */
+struct smb_direct_negotiate_resp {
+ __le16 min_version;
+ __le16 max_version;
+ __le16 negotiated_version;
+ __le16 reserved;
+ __le16 credits_requested;
+ __le16 credits_granted;
+ __le32 status;
+ __le32 max_readwrite_size;
+ __le32 preferred_send_size;
+ __le32 max_receive_size;
+ __le32 max_fragmented_size;
+} __packed;
+
+#define SMB_DIRECT_RESPONSE_REQUESTED 0x0001
+
+/* SMB DIRECT data transfer packet with payload [MS-SMBD] 2.2.3 */
+struct smb_direct_data_transfer {
+ __le16 credits_requested;
+ __le16 credits_granted;
+ __le16 flags;
+ __le16 reserved;
+ __le32 remaining_data_length;
+ __le32 data_offset;
+ __le32 data_length;
+ __le32 padding;
+ __u8 buffer[];
+} __packed;
+
+#ifdef CONFIG_SMB_SERVER_SMBDIRECT
+int ksmbd_rdma_init(void);
+int ksmbd_rdma_destroy(void);
+bool ksmbd_rdma_capable_netdev(struct net_device *netdev);
+#else
+static inline int ksmbd_rdma_init(void) { return 0; }
+static inline int ksmbd_rdma_destroy(void) { return 0; }
+static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; }
+#endif
+
+#endif /* __KSMBD_TRANSPORT_RDMA_H__ */
diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c
new file mode 100644
index 000000000000..dc15a5ecd2e0
--- /dev/null
+++ b/fs/ksmbd/transport_tcp.c
@@ -0,0 +1,618 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/freezer.h>
+
+#include "smb_common.h"
+#include "server.h"
+#include "auth.h"
+#include "connection.h"
+#include "transport_tcp.h"
+
+#define IFACE_STATE_DOWN BIT(0)
+#define IFACE_STATE_CONFIGURED BIT(1)
+
+struct interface {
+ struct task_struct *ksmbd_kthread;
+ struct socket *ksmbd_socket;
+ struct list_head entry;
+ char *name;
+ struct mutex sock_release_lock;
+ int state;
+};
+
+static LIST_HEAD(iface_list);
+
+static int bind_additional_ifaces;
+
+struct tcp_transport {
+ struct ksmbd_transport transport;
+ struct socket *sock;
+ struct kvec *iov;
+ unsigned int nr_iov;
+};
+
+static struct ksmbd_transport_ops ksmbd_tcp_transport_ops;
+
+static void tcp_stop_kthread(struct task_struct *kthread);
+static struct interface *alloc_iface(char *ifname);
+
+#define KSMBD_TRANS(t) (&(t)->transport)
+#define TCP_TRANS(t) ((struct tcp_transport *)container_of(t, \
+ struct tcp_transport, transport))
+
+static inline void ksmbd_tcp_nodelay(struct socket *sock)
+{
+ tcp_sock_set_nodelay(sock->sk);
+}
+
+static inline void ksmbd_tcp_reuseaddr(struct socket *sock)
+{
+ sock_set_reuseaddr(sock->sk);
+}
+
+static inline void ksmbd_tcp_rcv_timeout(struct socket *sock, s64 secs)
+{
+ lock_sock(sock->sk);
+ if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
+ sock->sk->sk_rcvtimeo = secs * HZ;
+ else
+ sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+ release_sock(sock->sk);
+}
+
+static inline void ksmbd_tcp_snd_timeout(struct socket *sock, s64 secs)
+{
+ sock_set_sndtimeo(sock->sk, secs);
+}
+
+static struct tcp_transport *alloc_transport(struct socket *client_sk)
+{
+ struct tcp_transport *t;
+ struct ksmbd_conn *conn;
+
+ t = kzalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+ return NULL;
+ t->sock = client_sk;
+
+ conn = ksmbd_conn_alloc();
+ if (!conn) {
+ kfree(t);
+ return NULL;
+ }
+
+ conn->transport = KSMBD_TRANS(t);
+ KSMBD_TRANS(t)->conn = conn;
+ KSMBD_TRANS(t)->ops = &ksmbd_tcp_transport_ops;
+ return t;
+}
+
+static void free_transport(struct tcp_transport *t)
+{
+ kernel_sock_shutdown(t->sock, SHUT_RDWR);
+ sock_release(t->sock);
+ t->sock = NULL;
+
+ ksmbd_conn_free(KSMBD_TRANS(t)->conn);
+ kfree(t->iov);
+ kfree(t);
+}
+
+/**
+ * kvec_array_init() - initialize a IO vector segment
+ * @new: IO vector to be initialized
+ * @iov: base IO vector
+ * @nr_segs: number of segments in base iov
+ * @bytes: total iovec length so far for read
+ *
+ * Return: Number of IO segments
+ */
+static unsigned int kvec_array_init(struct kvec *new, struct kvec *iov,
+ unsigned int nr_segs, size_t bytes)
+{
+ size_t base = 0;
+
+ while (bytes || !iov->iov_len) {
+ int copy = min(bytes, iov->iov_len);
+
+ bytes -= copy;
+ base += copy;
+ if (iov->iov_len == base) {
+ iov++;
+ nr_segs--;
+ base = 0;
+ }
+ }
+
+ memcpy(new, iov, sizeof(*iov) * nr_segs);
+ new->iov_base += base;
+ new->iov_len -= base;
+ return nr_segs;
+}
+
+/**
+ * get_conn_iovec() - get connection iovec for reading from socket
+ * @t: TCP transport instance
+ * @nr_segs: number of segments in iov
+ *
+ * Return: return existing or newly allocate iovec
+ */
+static struct kvec *get_conn_iovec(struct tcp_transport *t, unsigned int nr_segs)
+{
+ struct kvec *new_iov;
+
+ if (t->iov && nr_segs <= t->nr_iov)
+ return t->iov;
+
+ /* not big enough -- allocate a new one and release the old */
+ new_iov = kmalloc_array(nr_segs, sizeof(*new_iov), GFP_KERNEL);
+ if (new_iov) {
+ kfree(t->iov);
+ t->iov = new_iov;
+ t->nr_iov = nr_segs;
+ }
+ return new_iov;
+}
+
+static unsigned short ksmbd_tcp_get_port(const struct sockaddr *sa)
+{
+ switch (sa->sa_family) {
+ case AF_INET:
+ return ntohs(((struct sockaddr_in *)sa)->sin_port);
+ case AF_INET6:
+ return ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
+ }
+ return 0;
+}
+
+/**
+ * ksmbd_tcp_new_connection() - create a new tcp session on mount
+ * @client_sk: socket associated with new connection
+ *
+ * whenever a new connection is requested, create a conn thread
+ * (session thread) to handle new incoming smb requests from the connection
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int ksmbd_tcp_new_connection(struct socket *client_sk)
+{
+ struct sockaddr *csin;
+ int rc = 0;
+ struct tcp_transport *t;
+
+ t = alloc_transport(client_sk);
+ if (!t)
+ return -ENOMEM;
+
+ csin = KSMBD_TCP_PEER_SOCKADDR(KSMBD_TRANS(t)->conn);
+ if (kernel_getpeername(client_sk, csin) < 0) {
+ pr_err("client ip resolution failed\n");
+ rc = -EINVAL;
+ goto out_error;
+ }
+
+ KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop,
+ KSMBD_TRANS(t)->conn,
+ "ksmbd:%u",
+ ksmbd_tcp_get_port(csin));
+ if (IS_ERR(KSMBD_TRANS(t)->handler)) {
+ pr_err("cannot start conn thread\n");
+ rc = PTR_ERR(KSMBD_TRANS(t)->handler);
+ free_transport(t);
+ }
+ return rc;
+
+out_error:
+ free_transport(t);
+ return rc;
+}
+
+/**
+ * ksmbd_kthread_fn() - listen to new SMB connections and callback server
+ * @p: arguments to forker thread
+ *
+ * Return: Returns a task_struct or ERR_PTR
+ */
+static int ksmbd_kthread_fn(void *p)
+{
+ struct socket *client_sk = NULL;
+ struct interface *iface = (struct interface *)p;
+ int ret;
+
+ while (!kthread_should_stop()) {
+ mutex_lock(&iface->sock_release_lock);
+ if (!iface->ksmbd_socket) {
+ mutex_unlock(&iface->sock_release_lock);
+ break;
+ }
+ ret = kernel_accept(iface->ksmbd_socket, &client_sk,
+ O_NONBLOCK);
+ mutex_unlock(&iface->sock_release_lock);
+ if (ret) {
+ if (ret == -EAGAIN)
+ /* check for new connections every 100 msecs */
+ schedule_timeout_interruptible(HZ / 10);
+ continue;
+ }
+
+ ksmbd_debug(CONN, "connect success: accepted new connection\n");
+ client_sk->sk->sk_rcvtimeo = KSMBD_TCP_RECV_TIMEOUT;
+ client_sk->sk->sk_sndtimeo = KSMBD_TCP_SEND_TIMEOUT;
+
+ ksmbd_tcp_new_connection(client_sk);
+ }
+
+ ksmbd_debug(CONN, "releasing socket\n");
+ return 0;
+}
+
+/**
+ * ksmbd_tcp_run_kthread() - start forker thread
+ * @iface: pointer to struct interface
+ *
+ * start forker thread(ksmbd/0) at module init time to listen
+ * on port 445 for new SMB connection requests. It creates per connection
+ * server threads(ksmbd/x)
+ *
+ * Return: 0 on success or error number
+ */
+static int ksmbd_tcp_run_kthread(struct interface *iface)
+{
+ int rc;
+ struct task_struct *kthread;
+
+ kthread = kthread_run(ksmbd_kthread_fn, (void *)iface, "ksmbd-%s",
+ iface->name);
+ if (IS_ERR(kthread)) {
+ rc = PTR_ERR(kthread);
+ return rc;
+ }
+ iface->ksmbd_kthread = kthread;
+
+ return 0;
+}
+
+/**
+ * ksmbd_tcp_readv() - read data from socket in given iovec
+ * @t: TCP transport instance
+ * @iov_orig: base IO vector
+ * @nr_segs: number of segments in base iov
+ * @to_read: number of bytes to read from socket
+ *
+ * Return: on success return number of bytes read from socket,
+ * otherwise return error number
+ */
+static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig,
+ unsigned int nr_segs, unsigned int to_read)
+{
+ int length = 0;
+ int total_read;
+ unsigned int segs;
+ struct msghdr ksmbd_msg;
+ struct kvec *iov;
+ struct ksmbd_conn *conn = KSMBD_TRANS(t)->conn;
+
+ iov = get_conn_iovec(t, nr_segs);
+ if (!iov)
+ return -ENOMEM;
+
+ ksmbd_msg.msg_control = NULL;
+ ksmbd_msg.msg_controllen = 0;
+
+ for (total_read = 0; to_read; total_read += length, to_read -= length) {
+ try_to_freeze();
+
+ if (!ksmbd_conn_alive(conn)) {
+ total_read = -ESHUTDOWN;
+ break;
+ }
+ segs = kvec_array_init(iov, iov_orig, nr_segs, total_read);
+
+ length = kernel_recvmsg(t->sock, &ksmbd_msg,
+ iov, segs, to_read, 0);
+
+ if (length == -EINTR) {
+ total_read = -ESHUTDOWN;
+ break;
+ } else if (conn->status == KSMBD_SESS_NEED_RECONNECT) {
+ total_read = -EAGAIN;
+ break;
+ } else if (length == -ERESTARTSYS || length == -EAGAIN) {
+ usleep_range(1000, 2000);
+ length = 0;
+ continue;
+ } else if (length <= 0) {
+ total_read = -EAGAIN;
+ break;
+ }
+ }
+ return total_read;
+}
+
+/**
+ * ksmbd_tcp_read() - read data from socket in given buffer
+ * @t: TCP transport instance
+ * @buf: buffer to store read data from socket
+ * @to_read: number of bytes to read from socket
+ *
+ * Return: on success return number of bytes read from socket,
+ * otherwise return error number
+ */
+static int ksmbd_tcp_read(struct ksmbd_transport *t, char *buf, unsigned int to_read)
+{
+ struct kvec iov;
+
+ iov.iov_base = buf;
+ iov.iov_len = to_read;
+
+ return ksmbd_tcp_readv(TCP_TRANS(t), &iov, 1, to_read);
+}
+
+static int ksmbd_tcp_writev(struct ksmbd_transport *t, struct kvec *iov,
+ int nvecs, int size, bool need_invalidate,
+ unsigned int remote_key)
+
+{
+ struct msghdr smb_msg = {.msg_flags = MSG_NOSIGNAL};
+
+ return kernel_sendmsg(TCP_TRANS(t)->sock, &smb_msg, iov, nvecs, size);
+}
+
+static void ksmbd_tcp_disconnect(struct ksmbd_transport *t)
+{
+ free_transport(TCP_TRANS(t));
+}
+
+static void tcp_destroy_socket(struct socket *ksmbd_socket)
+{
+ int ret;
+
+ if (!ksmbd_socket)
+ return;
+
+ /* set zero to timeout */
+ ksmbd_tcp_rcv_timeout(ksmbd_socket, 0);
+ ksmbd_tcp_snd_timeout(ksmbd_socket, 0);
+
+ ret = kernel_sock_shutdown(ksmbd_socket, SHUT_RDWR);
+ if (ret)
+ pr_err("Failed to shutdown socket: %d\n", ret);
+ sock_release(ksmbd_socket);
+}
+
+/**
+ * create_socket - create socket for ksmbd/0
+ *
+ * Return: Returns a task_struct or ERR_PTR
+ */
+static int create_socket(struct interface *iface)
+{
+ int ret;
+ struct sockaddr_in6 sin6;
+ struct sockaddr_in sin;
+ struct socket *ksmbd_socket;
+ bool ipv4 = false;
+
+ ret = sock_create(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &ksmbd_socket);
+ if (ret) {
+ pr_err("Can't create socket for ipv6, try ipv4: %d\n", ret);
+ ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP,
+ &ksmbd_socket);
+ if (ret) {
+ pr_err("Can't create socket for ipv4: %d\n", ret);
+ goto out_error;
+ }
+
+ sin.sin_family = PF_INET;
+ sin.sin_addr.s_addr = htonl(INADDR_ANY);
+ sin.sin_port = htons(server_conf.tcp_port);
+ ipv4 = true;
+ } else {
+ sin6.sin6_family = PF_INET6;
+ sin6.sin6_addr = in6addr_any;
+ sin6.sin6_port = htons(server_conf.tcp_port);
+ }
+
+ ksmbd_tcp_nodelay(ksmbd_socket);
+ ksmbd_tcp_reuseaddr(ksmbd_socket);
+
+ ret = sock_setsockopt(ksmbd_socket,
+ SOL_SOCKET,
+ SO_BINDTODEVICE,
+ KERNEL_SOCKPTR(iface->name),
+ strlen(iface->name));
+ if (ret != -ENODEV && ret < 0) {
+ pr_err("Failed to set SO_BINDTODEVICE: %d\n", ret);
+ goto out_error;
+ }
+
+ if (ipv4)
+ ret = kernel_bind(ksmbd_socket, (struct sockaddr *)&sin,
+ sizeof(sin));
+ else
+ ret = kernel_bind(ksmbd_socket, (struct sockaddr *)&sin6,
+ sizeof(sin6));
+ if (ret) {
+ pr_err("Failed to bind socket: %d\n", ret);
+ goto out_error;
+ }
+
+ ksmbd_socket->sk->sk_rcvtimeo = KSMBD_TCP_RECV_TIMEOUT;
+ ksmbd_socket->sk->sk_sndtimeo = KSMBD_TCP_SEND_TIMEOUT;
+
+ ret = kernel_listen(ksmbd_socket, KSMBD_SOCKET_BACKLOG);
+ if (ret) {
+ pr_err("Port listen() error: %d\n", ret);
+ goto out_error;
+ }
+
+ iface->ksmbd_socket = ksmbd_socket;
+ ret = ksmbd_tcp_run_kthread(iface);
+ if (ret) {
+ pr_err("Can't start ksmbd main kthread: %d\n", ret);
+ goto out_error;
+ }
+ iface->state = IFACE_STATE_CONFIGURED;
+
+ return 0;
+
+out_error:
+ tcp_destroy_socket(ksmbd_socket);
+ iface->ksmbd_socket = NULL;
+ return ret;
+}
+
+static int ksmbd_netdev_event(struct notifier_block *nb, unsigned long event,
+ void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct interface *iface;
+ int ret, found = 0;
+
+ switch (event) {
+ case NETDEV_UP:
+ if (netdev->priv_flags & IFF_BRIDGE_PORT)
+ return NOTIFY_OK;
+
+ list_for_each_entry(iface, &iface_list, entry) {
+ if (!strcmp(iface->name, netdev->name)) {
+ found = 1;
+ if (iface->state != IFACE_STATE_DOWN)
+ break;
+ ret = create_socket(iface);
+ if (ret)
+ return NOTIFY_OK;
+ break;
+ }
+ }
+ if (!found && bind_additional_ifaces) {
+ iface = alloc_iface(kstrdup(netdev->name, GFP_KERNEL));
+ if (!iface)
+ return NOTIFY_OK;
+ ret = create_socket(iface);
+ if (ret)
+ break;
+ }
+ break;
+ case NETDEV_DOWN:
+ list_for_each_entry(iface, &iface_list, entry) {
+ if (!strcmp(iface->name, netdev->name) &&
+ iface->state == IFACE_STATE_CONFIGURED) {
+ tcp_stop_kthread(iface->ksmbd_kthread);
+ iface->ksmbd_kthread = NULL;
+ mutex_lock(&iface->sock_release_lock);
+ tcp_destroy_socket(iface->ksmbd_socket);
+ iface->ksmbd_socket = NULL;
+ mutex_unlock(&iface->sock_release_lock);
+
+ iface->state = IFACE_STATE_DOWN;
+ break;
+ }
+ }
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ksmbd_netdev_notifier = {
+ .notifier_call = ksmbd_netdev_event,
+};
+
+int ksmbd_tcp_init(void)
+{
+ register_netdevice_notifier(&ksmbd_netdev_notifier);
+
+ return 0;
+}
+
+static void tcp_stop_kthread(struct task_struct *kthread)
+{
+ int ret;
+
+ if (!kthread)
+ return;
+
+ ret = kthread_stop(kthread);
+ if (ret)
+ pr_err("failed to stop forker thread\n");
+}
+
+void ksmbd_tcp_destroy(void)
+{
+ struct interface *iface, *tmp;
+
+ unregister_netdevice_notifier(&ksmbd_netdev_notifier);
+
+ list_for_each_entry_safe(iface, tmp, &iface_list, entry) {
+ list_del(&iface->entry);
+ kfree(iface->name);
+ kfree(iface);
+ }
+}
+
+static struct interface *alloc_iface(char *ifname)
+{
+ struct interface *iface;
+
+ if (!ifname)
+ return NULL;
+
+ iface = kzalloc(sizeof(struct interface), GFP_KERNEL);
+ if (!iface) {
+ kfree(ifname);
+ return NULL;
+ }
+
+ iface->name = ifname;
+ iface->state = IFACE_STATE_DOWN;
+ list_add(&iface->entry, &iface_list);
+ mutex_init(&iface->sock_release_lock);
+ return iface;
+}
+
+int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz)
+{
+ int sz = 0;
+
+ if (!ifc_list_sz) {
+ struct net_device *netdev;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, netdev) {
+ if (netdev->priv_flags & IFF_BRIDGE_PORT)
+ continue;
+ if (!alloc_iface(kstrdup(netdev->name, GFP_KERNEL)))
+ return -ENOMEM;
+ }
+ rtnl_unlock();
+ bind_additional_ifaces = 1;
+ return 0;
+ }
+
+ while (ifc_list_sz > 0) {
+ if (!alloc_iface(kstrdup(ifc_list, GFP_KERNEL)))
+ return -ENOMEM;
+
+ sz = strlen(ifc_list);
+ if (!sz)
+ break;
+
+ ifc_list += sz + 1;
+ ifc_list_sz -= (sz + 1);
+ }
+
+ bind_additional_ifaces = 0;
+
+ return 0;
+}
+
+static struct ksmbd_transport_ops ksmbd_tcp_transport_ops = {
+ .read = ksmbd_tcp_read,
+ .writev = ksmbd_tcp_writev,
+ .disconnect = ksmbd_tcp_disconnect,
+};
diff --git a/fs/ksmbd/transport_tcp.h b/fs/ksmbd/transport_tcp.h
new file mode 100644
index 000000000000..e338bebe322f
--- /dev/null
+++ b/fs/ksmbd/transport_tcp.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_TRANSPORT_TCP_H__
+#define __KSMBD_TRANSPORT_TCP_H__
+
+int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz);
+int ksmbd_tcp_init(void);
+void ksmbd_tcp_destroy(void);
+
+#endif /* __KSMBD_TRANSPORT_TCP_H__ */
diff --git a/fs/ksmbd/unicode.c b/fs/ksmbd/unicode.c
new file mode 100644
index 000000000000..a0db699ddafd
--- /dev/null
+++ b/fs/ksmbd/unicode.c
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Some of the source code in this file came from fs/cifs/cifs_unicode.c
+ *
+ * Copyright (c) International Business Machines Corp., 2000,2009
+ * Modified by Steve French (sfrench@us.ibm.com)
+ * Modified by Namjae Jeon (linkinjeon@kernel.org)
+ */
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <asm/unaligned.h>
+#include "glob.h"
+#include "unicode.h"
+#include "uniupr.h"
+#include "smb_common.h"
+
+/*
+ * smb_utf16_bytes() - how long will a string be after conversion?
+ * @from: pointer to input string
+ * @maxbytes: don't go past this many bytes of input string
+ * @codepage: destination codepage
+ *
+ * Walk a utf16le string and return the number of bytes that the string will
+ * be after being converted to the given charset, not including any null
+ * termination required. Don't walk past maxbytes in the source buffer.
+ *
+ * Return: string length after conversion
+ */
+static int smb_utf16_bytes(const __le16 *from, int maxbytes,
+ const struct nls_table *codepage)
+{
+ int i;
+ int charlen, outlen = 0;
+ int maxwords = maxbytes / 2;
+ char tmp[NLS_MAX_CHARSET_SIZE];
+ __u16 ftmp;
+
+ for (i = 0; i < maxwords; i++) {
+ ftmp = get_unaligned_le16(&from[i]);
+ if (ftmp == 0)
+ break;
+
+ charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
+ if (charlen > 0)
+ outlen += charlen;
+ else
+ outlen++;
+ }
+
+ return outlen;
+}
+
+/*
+ * cifs_mapchar() - convert a host-endian char to proper char in codepage
+ * @target: where converted character should be copied
+ * @src_char: 2 byte host-endian source character
+ * @cp: codepage to which character should be converted
+ * @mapchar: should character be mapped according to mapchars mount option?
+ *
+ * This function handles the conversion of a single character. It is the
+ * responsibility of the caller to ensure that the target buffer is large
+ * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
+ *
+ * Return: string length after conversion
+ */
+static int
+cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
+ bool mapchar)
+{
+ int len = 1;
+
+ if (!mapchar)
+ goto cp_convert;
+
+ /*
+ * BB: Cannot handle remapping UNI_SLASH until all the calls to
+ * build_path_from_dentry are modified, as they use slash as
+ * separator.
+ */
+ switch (src_char) {
+ case UNI_COLON:
+ *target = ':';
+ break;
+ case UNI_ASTERISK:
+ *target = '*';
+ break;
+ case UNI_QUESTION:
+ *target = '?';
+ break;
+ case UNI_PIPE:
+ *target = '|';
+ break;
+ case UNI_GRTRTHAN:
+ *target = '>';
+ break;
+ case UNI_LESSTHAN:
+ *target = '<';
+ break;
+ default:
+ goto cp_convert;
+ }
+
+out:
+ return len;
+
+cp_convert:
+ len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
+ if (len <= 0) {
+ *target = '?';
+ len = 1;
+ }
+
+ goto out;
+}
+
+/*
+ * is_char_allowed() - check for valid character
+ * @ch: input character to be checked
+ *
+ * Return: 1 if char is allowed, otherwise 0
+ */
+static inline int is_char_allowed(char *ch)
+{
+ /* check for control chars, wildcards etc. */
+ if (!(*ch & 0x80) &&
+ (*ch <= 0x1f ||
+ *ch == '?' || *ch == '"' || *ch == '<' ||
+ *ch == '>' || *ch == '|'))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * smb_from_utf16() - convert utf16le string to local charset
+ * @to: destination buffer
+ * @from: source buffer
+ * @tolen: destination buffer size (in bytes)
+ * @fromlen: source buffer size (in bytes)
+ * @codepage: codepage to which characters should be converted
+ * @mapchar: should characters be remapped according to the mapchars option?
+ *
+ * Convert a little-endian utf16le string (as sent by the server) to a string
+ * in the provided codepage. The tolen and fromlen parameters are to ensure
+ * that the code doesn't walk off of the end of the buffer (which is always
+ * a danger if the alignment of the source buffer is off). The destination
+ * string is always properly null terminated and fits in the destination
+ * buffer. Returns the length of the destination string in bytes (including
+ * null terminator).
+ *
+ * Note that some windows versions actually send multiword UTF-16 characters
+ * instead of straight UTF16-2. The linux nls routines however aren't able to
+ * deal with those characters properly. In the event that we get some of
+ * those characters, they won't be translated properly.
+ *
+ * Return: string length after conversion
+ */
+static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
+ const struct nls_table *codepage, bool mapchar)
+{
+ int i, charlen, safelen;
+ int outlen = 0;
+ int nullsize = nls_nullsize(codepage);
+ int fromwords = fromlen / 2;
+ char tmp[NLS_MAX_CHARSET_SIZE];
+ __u16 ftmp;
+
+ /*
+ * because the chars can be of varying widths, we need to take care
+ * not to overflow the destination buffer when we get close to the
+ * end of it. Until we get to this offset, we don't need to check
+ * for overflow however.
+ */
+ safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
+
+ for (i = 0; i < fromwords; i++) {
+ ftmp = get_unaligned_le16(&from[i]);
+ if (ftmp == 0)
+ break;
+
+ /*
+ * check to see if converting this character might make the
+ * conversion bleed into the null terminator
+ */
+ if (outlen >= safelen) {
+ charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar);
+ if ((outlen + charlen) > (tolen - nullsize))
+ break;
+ }
+
+ /* put converted char into 'to' buffer */
+ charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);
+ outlen += charlen;
+ }
+
+ /* properly null-terminate string */
+ for (i = 0; i < nullsize; i++)
+ to[outlen++] = 0;
+
+ return outlen;
+}
+
+/*
+ * smb_strtoUTF16() - Convert character string to unicode string
+ * @to: destination buffer
+ * @from: source buffer
+ * @len: destination buffer size (in bytes)
+ * @codepage: codepage to which characters should be converted
+ *
+ * Return: string length after conversion
+ */
+int smb_strtoUTF16(__le16 *to, const char *from, int len,
+ const struct nls_table *codepage)
+{
+ int charlen;
+ int i;
+ wchar_t wchar_to; /* needed to quiet sparse */
+
+ /* special case for utf8 to handle no plane0 chars */
+ if (!strcmp(codepage->charset, "utf8")) {
+ /*
+ * convert utf8 -> utf16, we assume we have enough space
+ * as caller should have assumed conversion does not overflow
+ * in destination len is length in wchar_t units (16bits)
+ */
+ i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
+ (wchar_t *)to, len);
+
+ /* if success terminate and exit */
+ if (i >= 0)
+ goto success;
+ /*
+ * if fails fall back to UCS encoding as this
+ * function should not return negative values
+ * currently can fail only if source contains
+ * invalid encoded characters
+ */
+ }
+
+ for (i = 0; len > 0 && *from; i++, from += charlen, len -= charlen) {
+ charlen = codepage->char2uni(from, len, &wchar_to);
+ if (charlen < 1) {
+ /* A question mark */
+ wchar_to = 0x003f;
+ charlen = 1;
+ }
+ put_unaligned_le16(wchar_to, &to[i]);
+ }
+
+success:
+ put_unaligned_le16(0, &to[i]);
+ return i;
+}
+
+/*
+ * smb_strndup_from_utf16() - copy a string from wire format to the local
+ * codepage
+ * @src: source string
+ * @maxlen: don't walk past this many bytes in the source string
+ * @is_unicode: is this a unicode string?
+ * @codepage: destination codepage
+ *
+ * Take a string given by the server, convert it to the local codepage and
+ * put it in a new buffer. Returns a pointer to the new string or NULL on
+ * error.
+ *
+ * Return: destination string buffer or error ptr
+ */
+char *smb_strndup_from_utf16(const char *src, const int maxlen,
+ const bool is_unicode,
+ const struct nls_table *codepage)
+{
+ int len, ret;
+ char *dst;
+
+ if (is_unicode) {
+ len = smb_utf16_bytes((__le16 *)src, maxlen, codepage);
+ len += nls_nullsize(codepage);
+ dst = kmalloc(len, GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+ ret = smb_from_utf16(dst, (__le16 *)src, len, maxlen, codepage,
+ false);
+ if (ret < 0) {
+ kfree(dst);
+ return ERR_PTR(-EINVAL);
+ }
+ } else {
+ len = strnlen(src, maxlen);
+ len++;
+ dst = kmalloc(len, GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+ strscpy(dst, src, len);
+ }
+
+ return dst;
+}
+
+/*
+ * Convert 16 bit Unicode pathname to wire format from string in current code
+ * page. Conversion may involve remapping up the six characters that are
+ * only legal in POSIX-like OS (if they are present in the string). Path
+ * names are little endian 16 bit Unicode on the wire
+ */
+/*
+ * smbConvertToUTF16() - convert string from local charset to utf16
+ * @target: destination buffer
+ * @source: source buffer
+ * @srclen: source buffer size (in bytes)
+ * @cp: codepage to which characters should be converted
+ * @mapchar: should characters be remapped according to the mapchars option?
+ *
+ * Convert 16 bit Unicode pathname to wire format from string in current code
+ * page. Conversion may involve remapping up the six characters that are
+ * only legal in POSIX-like OS (if they are present in the string). Path
+ * names are little endian 16 bit Unicode on the wire
+ *
+ * Return: char length after conversion
+ */
+int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
+ const struct nls_table *cp, int mapchars)
+{
+ int i, j, charlen;
+ char src_char;
+ __le16 dst_char;
+ wchar_t tmp;
+
+ if (!mapchars)
+ return smb_strtoUTF16(target, source, srclen, cp);
+
+ for (i = 0, j = 0; i < srclen; j++) {
+ src_char = source[i];
+ charlen = 1;
+ switch (src_char) {
+ case 0:
+ put_unaligned(0, &target[j]);
+ return j;
+ case ':':
+ dst_char = cpu_to_le16(UNI_COLON);
+ break;
+ case '*':
+ dst_char = cpu_to_le16(UNI_ASTERISK);
+ break;
+ case '?':
+ dst_char = cpu_to_le16(UNI_QUESTION);
+ break;
+ case '<':
+ dst_char = cpu_to_le16(UNI_LESSTHAN);
+ break;
+ case '>':
+ dst_char = cpu_to_le16(UNI_GRTRTHAN);
+ break;
+ case '|':
+ dst_char = cpu_to_le16(UNI_PIPE);
+ break;
+ /*
+ * FIXME: We can not handle remapping backslash (UNI_SLASH)
+ * until all the calls to build_path_from_dentry are modified,
+ * as they use backslash as separator.
+ */
+ default:
+ charlen = cp->char2uni(source + i, srclen - i, &tmp);
+ dst_char = cpu_to_le16(tmp);
+
+ /*
+ * if no match, use question mark, which at least in
+ * some cases serves as wild card
+ */
+ if (charlen < 1) {
+ dst_char = cpu_to_le16(0x003f);
+ charlen = 1;
+ }
+ }
+ /*
+ * character may take more than one byte in the source string,
+ * but will take exactly two bytes in the target string
+ */
+ i += charlen;
+ put_unaligned(dst_char, &target[j]);
+ }
+
+ return j;
+}
diff --git a/fs/ksmbd/unicode.h b/fs/ksmbd/unicode.h
new file mode 100644
index 000000000000..5593024230ae
--- /dev/null
+++ b/fs/ksmbd/unicode.h
@@ -0,0 +1,357 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Some of the source code in this file came from fs/cifs/cifs_unicode.c
+ * cifs_unicode: Unicode kernel case support
+ *
+ * Function:
+ * Convert a unicode character to upper or lower case using
+ * compressed tables.
+ *
+ * Copyright (c) International Business Machines Corp., 2000,2009
+ *
+ *
+ * Notes:
+ * These APIs are based on the C library functions. The semantics
+ * should match the C functions but with expanded size operands.
+ *
+ * The upper/lower functions are based on a table created by mkupr.
+ * This is a compressed table of upper and lower case conversion.
+ *
+ */
+#ifndef _CIFS_UNICODE_H
+#define _CIFS_UNICODE_H
+
+#include <asm/byteorder.h>
+#include <linux/types.h>
+#include <linux/nls.h>
+
+#define UNIUPR_NOLOWER /* Example to not expand lower case tables */
+
+/*
+ * Windows maps these to the user defined 16 bit Unicode range since they are
+ * reserved symbols (along with \ and /), otherwise illegal to store
+ * in filenames in NTFS
+ */
+#define UNI_ASTERISK ((__u16)('*' + 0xF000))
+#define UNI_QUESTION ((__u16)('?' + 0xF000))
+#define UNI_COLON ((__u16)(':' + 0xF000))
+#define UNI_GRTRTHAN ((__u16)('>' + 0xF000))
+#define UNI_LESSTHAN ((__u16)('<' + 0xF000))
+#define UNI_PIPE ((__u16)('|' + 0xF000))
+#define UNI_SLASH ((__u16)('\\' + 0xF000))
+
+/* Just define what we want from uniupr.h. We don't want to define the tables
+ * in each source file.
+ */
+#ifndef UNICASERANGE_DEFINED
+struct UniCaseRange {
+ wchar_t start;
+ wchar_t end;
+ signed char *table;
+};
+#endif /* UNICASERANGE_DEFINED */
+
+#ifndef UNIUPR_NOUPPER
+extern signed char SmbUniUpperTable[512];
+extern const struct UniCaseRange SmbUniUpperRange[];
+#endif /* UNIUPR_NOUPPER */
+
+#ifndef UNIUPR_NOLOWER
+extern signed char CifsUniLowerTable[512];
+extern const struct UniCaseRange CifsUniLowerRange[];
+#endif /* UNIUPR_NOLOWER */
+
+#ifdef __KERNEL__
+int smb_strtoUTF16(__le16 *to, const char *from, int len,
+ const struct nls_table *codepage);
+char *smb_strndup_from_utf16(const char *src, const int maxlen,
+ const bool is_unicode,
+ const struct nls_table *codepage);
+int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
+ const struct nls_table *cp, int mapchars);
+char *ksmbd_extract_sharename(char *treename);
+#endif
+
+/*
+ * UniStrcat: Concatenate the second string to the first
+ *
+ * Returns:
+ * Address of the first string
+ */
+static inline wchar_t *UniStrcat(wchar_t *ucs1, const wchar_t *ucs2)
+{
+ wchar_t *anchor = ucs1; /* save a pointer to start of ucs1 */
+
+ while (*ucs1++)
+ /*NULL*/; /* To end of first string */
+ ucs1--; /* Return to the null */
+ while ((*ucs1++ = *ucs2++))
+ /*NULL*/; /* copy string 2 over */
+ return anchor;
+}
+
+/*
+ * UniStrchr: Find a character in a string
+ *
+ * Returns:
+ * Address of first occurrence of character in string
+ * or NULL if the character is not in the string
+ */
+static inline wchar_t *UniStrchr(const wchar_t *ucs, wchar_t uc)
+{
+ while ((*ucs != uc) && *ucs)
+ ucs++;
+
+ if (*ucs == uc)
+ return (wchar_t *)ucs;
+ return NULL;
+}
+
+/*
+ * UniStrcmp: Compare two strings
+ *
+ * Returns:
+ * < 0: First string is less than second
+ * = 0: Strings are equal
+ * > 0: First string is greater than second
+ */
+static inline int UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2)
+{
+ while ((*ucs1 == *ucs2) && *ucs1) {
+ ucs1++;
+ ucs2++;
+ }
+ return (int)*ucs1 - (int)*ucs2;
+}
+
+/*
+ * UniStrcpy: Copy a string
+ */
+static inline wchar_t *UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2)
+{
+ wchar_t *anchor = ucs1; /* save the start of result string */
+
+ while ((*ucs1++ = *ucs2++))
+ /*NULL*/;
+ return anchor;
+}
+
+/*
+ * UniStrlen: Return the length of a string (in 16 bit Unicode chars not bytes)
+ */
+static inline size_t UniStrlen(const wchar_t *ucs1)
+{
+ int i = 0;
+
+ while (*ucs1++)
+ i++;
+ return i;
+}
+
+/*
+ * UniStrnlen: Return the length (in 16 bit Unicode chars not bytes) of a
+ * string (length limited)
+ */
+static inline size_t UniStrnlen(const wchar_t *ucs1, int maxlen)
+{
+ int i = 0;
+
+ while (*ucs1++) {
+ i++;
+ if (i >= maxlen)
+ break;
+ }
+ return i;
+}
+
+/*
+ * UniStrncat: Concatenate length limited string
+ */
+static inline wchar_t *UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
+{
+ wchar_t *anchor = ucs1; /* save pointer to string 1 */
+
+ while (*ucs1++)
+ /*NULL*/;
+ ucs1--; /* point to null terminator of s1 */
+ while (n-- && (*ucs1 = *ucs2)) { /* copy s2 after s1 */
+ ucs1++;
+ ucs2++;
+ }
+ *ucs1 = 0; /* Null terminate the result */
+ return anchor;
+}
+
+/*
+ * UniStrncmp: Compare length limited string
+ */
+static inline int UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
+{
+ if (!n)
+ return 0; /* Null strings are equal */
+ while ((*ucs1 == *ucs2) && *ucs1 && --n) {
+ ucs1++;
+ ucs2++;
+ }
+ return (int)*ucs1 - (int)*ucs2;
+}
+
+/*
+ * UniStrncmp_le: Compare length limited string - native to little-endian
+ */
+static inline int
+UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
+{
+ if (!n)
+ return 0; /* Null strings are equal */
+ while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) {
+ ucs1++;
+ ucs2++;
+ }
+ return (int)*ucs1 - (int)__le16_to_cpu(*ucs2);
+}
+
+/*
+ * UniStrncpy: Copy length limited string with pad
+ */
+static inline wchar_t *UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
+{
+ wchar_t *anchor = ucs1;
+
+ while (n-- && *ucs2) /* Copy the strings */
+ *ucs1++ = *ucs2++;
+
+ n++;
+ while (n--) /* Pad with nulls */
+ *ucs1++ = 0;
+ return anchor;
+}
+
+/*
+ * UniStrncpy_le: Copy length limited string with pad to little-endian
+ */
+static inline wchar_t *UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
+{
+ wchar_t *anchor = ucs1;
+
+ while (n-- && *ucs2) /* Copy the strings */
+ *ucs1++ = __le16_to_cpu(*ucs2++);
+
+ n++;
+ while (n--) /* Pad with nulls */
+ *ucs1++ = 0;
+ return anchor;
+}
+
+/*
+ * UniStrstr: Find a string in a string
+ *
+ * Returns:
+ * Address of first match found
+ * NULL if no matching string is found
+ */
+static inline wchar_t *UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2)
+{
+ const wchar_t *anchor1 = ucs1;
+ const wchar_t *anchor2 = ucs2;
+
+ while (*ucs1) {
+ if (*ucs1 == *ucs2) {
+ /* Partial match found */
+ ucs1++;
+ ucs2++;
+ } else {
+ if (!*ucs2) /* Match found */
+ return (wchar_t *)anchor1;
+ ucs1 = ++anchor1; /* No match */
+ ucs2 = anchor2;
+ }
+ }
+
+ if (!*ucs2) /* Both end together */
+ return (wchar_t *)anchor1; /* Match found */
+ return NULL; /* No match */
+}
+
+#ifndef UNIUPR_NOUPPER
+/*
+ * UniToupper: Convert a unicode character to upper case
+ */
+static inline wchar_t UniToupper(register wchar_t uc)
+{
+ register const struct UniCaseRange *rp;
+
+ if (uc < sizeof(SmbUniUpperTable)) {
+ /* Latin characters */
+ return uc + SmbUniUpperTable[uc]; /* Use base tables */
+ }
+
+ rp = SmbUniUpperRange; /* Use range tables */
+ while (rp->start) {
+ if (uc < rp->start) /* Before start of range */
+ return uc; /* Uppercase = input */
+ if (uc <= rp->end) /* In range */
+ return uc + rp->table[uc - rp->start];
+ rp++; /* Try next range */
+ }
+ return uc; /* Past last range */
+}
+
+/*
+ * UniStrupr: Upper case a unicode string
+ */
+static inline __le16 *UniStrupr(register __le16 *upin)
+{
+ register __le16 *up;
+
+ up = upin;
+ while (*up) { /* For all characters */
+ *up = cpu_to_le16(UniToupper(le16_to_cpu(*up)));
+ up++;
+ }
+ return upin; /* Return input pointer */
+}
+#endif /* UNIUPR_NOUPPER */
+
+#ifndef UNIUPR_NOLOWER
+/*
+ * UniTolower: Convert a unicode character to lower case
+ */
+static inline wchar_t UniTolower(register wchar_t uc)
+{
+ register const struct UniCaseRange *rp;
+
+ if (uc < sizeof(CifsUniLowerTable)) {
+ /* Latin characters */
+ return uc + CifsUniLowerTable[uc]; /* Use base tables */
+ }
+
+ rp = CifsUniLowerRange; /* Use range tables */
+ while (rp->start) {
+ if (uc < rp->start) /* Before start of range */
+ return uc; /* Uppercase = input */
+ if (uc <= rp->end) /* In range */
+ return uc + rp->table[uc - rp->start];
+ rp++; /* Try next range */
+ }
+ return uc; /* Past last range */
+}
+
+/*
+ * UniStrlwr: Lower case a unicode string
+ */
+static inline wchar_t *UniStrlwr(register wchar_t *upin)
+{
+ register wchar_t *up;
+
+ up = upin;
+ while (*up) { /* For all characters */
+ *up = UniTolower(*up);
+ up++;
+ }
+ return upin; /* Return input pointer */
+}
+
+#endif
+
+#endif /* _CIFS_UNICODE_H */
diff --git a/fs/ksmbd/uniupr.h b/fs/ksmbd/uniupr.h
new file mode 100644
index 000000000000..26583b776897
--- /dev/null
+++ b/fs/ksmbd/uniupr.h
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Some of the source code in this file came from fs/cifs/uniupr.h
+ * Copyright (c) International Business Machines Corp., 2000,2002
+ *
+ * uniupr.h - Unicode compressed case ranges
+ *
+ */
+#ifndef __KSMBD_UNIUPR_H
+#define __KSMBD_UNIUPR_H
+
+#ifndef UNIUPR_NOUPPER
+/*
+ * Latin upper case
+ */
+signed char SmbUniUpperTable[512] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 000-00f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 010-01f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 020-02f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 030-03f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 040-04f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 050-05f */
+ 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, /* 060-06f */
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, 0, 0, 0, 0, 0, /* 070-07f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 080-08f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 090-09f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0a0-0af */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0b0-0bf */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0c0-0cf */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0d0-0df */
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, -32, /* 0e0-0ef */
+ -32, -32, -32, -32, -32, -32, -32, 0, -32, -32,
+ -32, -32, -32, -32, -32, 121, /* 0f0-0ff */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 100-10f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 110-11f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 120-12f */
+ 0, 0, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, /* 130-13f */
+ -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, /* 140-14f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 150-15f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 160-16f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, /* 170-17f */
+ 0, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, /* 180-18f */
+ 0, 0, -1, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, /* 190-19f */
+ 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, /* 1a0-1af */
+ -1, 0, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, /* 1b0-1bf */
+ 0, 0, 0, 0, 0, -1, -2, 0, -1, -2, 0, -1, -2, 0, -1, 0, /* 1c0-1cf */
+ -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -79, 0, -1, /* 1d0-1df */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e0-1ef */
+ 0, 0, -1, -2, 0, -1, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, /* 1f0-1ff */
+};
+
+/* Upper case range - Greek */
+static signed char UniCaseRangeU03a0[47] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -38, -37, -37, -37, /* 3a0-3af */
+ 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, /* 3b0-3bf */
+ -32, -32, -31, -32, -32, -32, -32, -32, -32, -32, -32, -32, -64,
+ -63, -63,
+};
+
+/* Upper case range - Cyrillic */
+static signed char UniCaseRangeU0430[48] = {
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, /* 430-43f */
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, /* 440-44f */
+ 0, -80, -80, -80, -80, -80, -80, -80, -80, -80, -80,
+ -80, -80, 0, -80, -80, /* 450-45f */
+};
+
+/* Upper case range - Extended cyrillic */
+static signed char UniCaseRangeU0490[61] = {
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 490-49f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 4a0-4af */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 4b0-4bf */
+ 0, 0, -1, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1,
+};
+
+/* Upper case range - Extended latin and greek */
+static signed char UniCaseRangeU1e00[509] = {
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e00-1e0f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e10-1e1f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e20-1e2f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e30-1e3f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e40-1e4f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e50-1e5f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e60-1e6f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e70-1e7f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e80-1e8f */
+ 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, -59, 0, -1, 0, -1, /* 1e90-1e9f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ea0-1eaf */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1eb0-1ebf */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ec0-1ecf */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ed0-1edf */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ee0-1eef */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, /* 1ef0-1eff */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f00-1f0f */
+ 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f10-1f1f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f20-1f2f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f30-1f3f */
+ 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f40-1f4f */
+ 0, 8, 0, 8, 0, 8, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f50-1f5f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f60-1f6f */
+ 74, 74, 86, 86, 86, 86, 100, 100, 0, 0, 112, 112,
+ 126, 126, 0, 0, /* 1f70-1f7f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f80-1f8f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f90-1f9f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fa0-1faf */
+ 8, 8, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fb0-1fbf */
+ 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fc0-1fcf */
+ 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fd0-1fdf */
+ 8, 8, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fe0-1fef */
+ 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/* Upper case range - Wide latin */
+static signed char UniCaseRangeUff40[27] = {
+ 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, /* ff40-ff4f */
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+};
+
+/*
+ * Upper Case Range
+ */
+const struct UniCaseRange SmbUniUpperRange[] = {
+ {0x03a0, 0x03ce, UniCaseRangeU03a0},
+ {0x0430, 0x045f, UniCaseRangeU0430},
+ {0x0490, 0x04cc, UniCaseRangeU0490},
+ {0x1e00, 0x1ffc, UniCaseRangeU1e00},
+ {0xff40, 0xff5a, UniCaseRangeUff40},
+ {0}
+};
+#endif
+
+#ifndef UNIUPR_NOLOWER
+/*
+ * Latin lower case
+ */
+signed char CifsUniLowerTable[512] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 000-00f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 010-01f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 020-02f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 030-03f */
+ 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, /* 040-04f */
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0,
+ 0, 0, 0, /* 050-05f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 060-06f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 070-07f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 080-08f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 090-09f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0a0-0af */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0b0-0bf */
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, /* 0c0-0cf */
+ 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32,
+ 32, 32, 32, 0, /* 0d0-0df */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0e0-0ef */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0f0-0ff */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 100-10f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 110-11f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 120-12f */
+ 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, /* 130-13f */
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, /* 140-14f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 150-15f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 160-16f */
+ 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0,
+ 0, /* 170-17f */
+ 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 79,
+ 0, /* 180-18f */
+ 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, /* 190-19f */
+ 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, /* 1a0-1af */
+ 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, /* 1b0-1bf */
+ 0, 0, 0, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 1, /* 1c0-1cf */
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, /* 1d0-1df */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e0-1ef */
+ 0, 2, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1f0-1ff */
+};
+
+/* Lower case range - Greek */
+static signed char UniCaseRangeL0380[44] = {
+ 0, 0, 0, 0, 0, 0, 38, 0, 37, 37, 37, 0, 64, 0, 63, 63, /* 380-38f */
+ 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, /* 390-39f */
+ 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+};
+
+/* Lower case range - Cyrillic */
+static signed char UniCaseRangeL0400[48] = {
+ 0, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
+ 0, 80, 80, /* 400-40f */
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, /* 410-41f */
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, /* 420-42f */
+};
+
+/* Lower case range - Extended cyrillic */
+static signed char UniCaseRangeL0490[60] = {
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 490-49f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 4a0-4af */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 4b0-4bf */
+ 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
+};
+
+/* Lower case range - Extended latin and greek */
+static signed char UniCaseRangeL1e00[504] = {
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e00-1e0f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e10-1e1f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e20-1e2f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e30-1e3f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e40-1e4f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e50-1e5f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e60-1e6f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e70-1e7f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e80-1e8f */
+ 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* 1e90-1e9f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1ea0-1eaf */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1eb0-1ebf */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1ec0-1ecf */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1ed0-1edf */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1ee0-1eef */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, /* 1ef0-1eff */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, -8, -8, /* 1f00-1f0f */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, 0, 0, /* 1f10-1f1f */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, -8, -8, /* 1f20-1f2f */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, -8, -8, /* 1f30-1f3f */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, 0, 0, /* 1f40-1f4f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, -8, 0, -8, 0, -8, 0, -8, /* 1f50-1f5f */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, -8, -8, /* 1f60-1f6f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f70-1f7f */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, -8, -8, /* 1f80-1f8f */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, -8, -8, /* 1f90-1f9f */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, -8, -8, /* 1fa0-1faf */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -74, -74, -9, 0, 0, 0, /* 1fb0-1fbf */
+ 0, 0, 0, 0, 0, 0, 0, 0, -86, -86, -86, -86, -9, 0,
+ 0, 0, /* 1fc0-1fcf */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -100, -100, 0, 0, 0, 0, /* 1fd0-1fdf */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -112, -112, -7, 0,
+ 0, 0, /* 1fe0-1fef */
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/* Lower case range - Wide latin */
+static signed char UniCaseRangeLff20[27] = {
+ 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, /* ff20-ff2f */
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+};
+
+/*
+ * Lower Case Range
+ */
+const struct UniCaseRange CifsUniLowerRange[] = {
+ {0x0380, 0x03ab, UniCaseRangeL0380},
+ {0x0400, 0x042f, UniCaseRangeL0400},
+ {0x0490, 0x04cb, UniCaseRangeL0490},
+ {0x1e00, 0x1ff7, UniCaseRangeL1e00},
+ {0xff20, 0xff3a, UniCaseRangeLff20},
+ {0}
+};
+#endif
+
+#endif /* __KSMBD_UNIUPR_H */
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
new file mode 100644
index 000000000000..aee28ee6b19c
--- /dev/null
+++ b/fs/ksmbd/vfs.c
@@ -0,0 +1,1895 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/backing-dev.h>
+#include <linux/writeback.h>
+#include <linux/xattr.h>
+#include <linux/falloc.h>
+#include <linux/genhd.h>
+#include <linux/fsnotify.h>
+#include <linux/dcache.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sched/xacct.h>
+#include <linux/crc32c.h>
+
+#include "glob.h"
+#include "oplock.h"
+#include "connection.h"
+#include "vfs.h"
+#include "vfs_cache.h"
+#include "smbacl.h"
+#include "ndr.h"
+#include "auth.h"
+#include "misc.h"
+
+#include "smb_common.h"
+#include "mgmt/share_config.h"
+#include "mgmt/tree_connect.h"
+#include "mgmt/user_session.h"
+#include "mgmt/user_config.h"
+
+static char *extract_last_component(char *path)
+{
+ char *p = strrchr(path, '/');
+
+ if (p && p[1] != '\0') {
+ *p = '\0';
+ p++;
+ } else {
+ p = NULL;
+ pr_err("Invalid path %s\n", path);
+ }
+ return p;
+}
+
+static void ksmbd_vfs_inherit_owner(struct ksmbd_work *work,
+ struct inode *parent_inode,
+ struct inode *inode)
+{
+ if (!test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_INHERIT_OWNER))
+ return;
+
+ i_uid_write(inode, i_uid_read(parent_inode));
+}
+
+/**
+ * ksmbd_vfs_lock_parent() - lock parent dentry if it is stable
+ *
+ * the parent dentry got by dget_parent or @parent could be
+ * unstable, we try to lock a parent inode and lookup the
+ * child dentry again.
+ *
+ * the reference count of @parent isn't incremented.
+ */
+int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child)
+{
+ struct dentry *dentry;
+ int ret = 0;
+
+ inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
+ dentry = lookup_one_len(child->d_name.name, parent,
+ child->d_name.len);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
+ goto out_err;
+ }
+
+ if (dentry != child) {
+ ret = -ESTALE;
+ dput(dentry);
+ goto out_err;
+ }
+
+ dput(dentry);
+ return 0;
+out_err:
+ inode_unlock(d_inode(parent));
+ return ret;
+}
+
+int ksmbd_vfs_may_delete(struct user_namespace *user_ns,
+ struct dentry *dentry)
+{
+ struct dentry *parent;
+ int ret;
+
+ parent = dget_parent(dentry);
+ ret = ksmbd_vfs_lock_parent(parent, dentry);
+ if (ret) {
+ dput(parent);
+ return ret;
+ }
+
+ ret = inode_permission(user_ns, d_inode(parent),
+ MAY_EXEC | MAY_WRITE);
+
+ inode_unlock(d_inode(parent));
+ dput(parent);
+ return ret;
+}
+
+int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
+ struct dentry *dentry, __le32 *daccess)
+{
+ struct dentry *parent;
+ int ret = 0;
+
+ *daccess = cpu_to_le32(FILE_READ_ATTRIBUTES | READ_CONTROL);
+
+ if (!inode_permission(user_ns, d_inode(dentry), MAY_OPEN | MAY_WRITE))
+ *daccess |= cpu_to_le32(WRITE_DAC | WRITE_OWNER | SYNCHRONIZE |
+ FILE_WRITE_DATA | FILE_APPEND_DATA |
+ FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES |
+ FILE_DELETE_CHILD);
+
+ if (!inode_permission(user_ns, d_inode(dentry), MAY_OPEN | MAY_READ))
+ *daccess |= FILE_READ_DATA_LE | FILE_READ_EA_LE;
+
+ if (!inode_permission(user_ns, d_inode(dentry), MAY_OPEN | MAY_EXEC))
+ *daccess |= FILE_EXECUTE_LE;
+
+ parent = dget_parent(dentry);
+ ret = ksmbd_vfs_lock_parent(parent, dentry);
+ if (ret) {
+ dput(parent);
+ return ret;
+ }
+
+ if (!inode_permission(user_ns, d_inode(parent), MAY_EXEC | MAY_WRITE))
+ *daccess |= FILE_DELETE_LE;
+
+ inode_unlock(d_inode(parent));
+ dput(parent);
+ return ret;
+}
+
+/**
+ * ksmbd_vfs_create() - vfs helper for smb create file
+ * @work: work
+ * @name: file name
+ * @mode: file create mode
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode)
+{
+ struct path path;
+ struct dentry *dentry;
+ int err;
+
+ dentry = kern_path_create(AT_FDCWD, name, &path, 0);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ if (err != -ENOENT)
+ pr_err("path create failed for %s, err %d\n",
+ name, err);
+ return err;
+ }
+
+ mode |= S_IFREG;
+ err = vfs_create(mnt_user_ns(path.mnt), d_inode(path.dentry),
+ dentry, mode, true);
+ if (!err) {
+ ksmbd_vfs_inherit_owner(work, d_inode(path.dentry),
+ d_inode(dentry));
+ } else {
+ pr_err("File(%s): creation failed (err:%d)\n", name, err);
+ }
+ done_path_create(&path, dentry);
+ return err;
+}
+
+/**
+ * ksmbd_vfs_mkdir() - vfs helper for smb create directory
+ * @work: work
+ * @name: directory name
+ * @mode: directory create mode
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode)
+{
+ struct path path;
+ struct dentry *dentry;
+ int err;
+
+ dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ if (err != -EEXIST)
+ ksmbd_debug(VFS, "path create failed for %s, err %d\n",
+ name, err);
+ return err;
+ }
+
+ mode |= S_IFDIR;
+ err = vfs_mkdir(mnt_user_ns(path.mnt), d_inode(path.dentry),
+ dentry, mode);
+ if (err) {
+ goto out;
+ } else if (d_unhashed(dentry)) {
+ struct dentry *d;
+
+ d = lookup_one_len(dentry->d_name.name, dentry->d_parent,
+ dentry->d_name.len);
+ if (IS_ERR(d)) {
+ err = PTR_ERR(d);
+ goto out;
+ }
+ if (unlikely(d_is_negative(d))) {
+ dput(d);
+ err = -ENOENT;
+ goto out;
+ }
+
+ ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d));
+ dput(d);
+ }
+out:
+ done_path_create(&path, dentry);
+ if (err)
+ pr_err("mkdir(%s): creation failed (err:%d)\n", name, err);
+ return err;
+}
+
+static ssize_t ksmbd_vfs_getcasexattr(struct user_namespace *user_ns,
+ struct dentry *dentry, char *attr_name,
+ int attr_name_len, char **attr_value)
+{
+ char *name, *xattr_list = NULL;
+ ssize_t value_len = -ENOENT, xattr_list_len;
+
+ xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
+ if (xattr_list_len <= 0)
+ goto out;
+
+ for (name = xattr_list; name - xattr_list < xattr_list_len;
+ name += strlen(name) + 1) {
+ ksmbd_debug(VFS, "%s, len %zd\n", name, strlen(name));
+ if (strncasecmp(attr_name, name, attr_name_len))
+ continue;
+
+ value_len = ksmbd_vfs_getxattr(user_ns,
+ dentry,
+ name,
+ attr_value);
+ if (value_len < 0)
+ pr_err("failed to get xattr in file\n");
+ break;
+ }
+
+out:
+ kvfree(xattr_list);
+ return value_len;
+}
+
+static int ksmbd_vfs_stream_read(struct ksmbd_file *fp, char *buf, loff_t *pos,
+ size_t count)
+{
+ ssize_t v_len;
+ char *stream_buf = NULL;
+
+ ksmbd_debug(VFS, "read stream data pos : %llu, count : %zd\n",
+ *pos, count);
+
+ v_len = ksmbd_vfs_getcasexattr(file_mnt_user_ns(fp->filp),
+ fp->filp->f_path.dentry,
+ fp->stream.name,
+ fp->stream.size,
+ &stream_buf);
+ if ((int)v_len <= 0)
+ return (int)v_len;
+
+ if (v_len <= *pos) {
+ count = -EINVAL;
+ goto free_buf;
+ }
+
+ if (v_len - *pos < count)
+ count = v_len - *pos;
+
+ memcpy(buf, &stream_buf[*pos], count);
+
+free_buf:
+ kvfree(stream_buf);
+ return count;
+}
+
+/**
+ * check_lock_range() - vfs helper for smb byte range file locking
+ * @filp: the file to apply the lock to
+ * @start: lock start byte offset
+ * @end: lock end byte offset
+ * @type: byte range type read/write
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int check_lock_range(struct file *filp, loff_t start, loff_t end,
+ unsigned char type)
+{
+ struct file_lock *flock;
+ struct file_lock_context *ctx = file_inode(filp)->i_flctx;
+ int error = 0;
+
+ if (!ctx || list_empty_careful(&ctx->flc_posix))
+ return 0;
+
+ spin_lock(&ctx->flc_lock);
+ list_for_each_entry(flock, &ctx->flc_posix, fl_list) {
+ /* check conflict locks */
+ if (flock->fl_end >= start && end >= flock->fl_start) {
+ if (flock->fl_type == F_RDLCK) {
+ if (type == WRITE) {
+ pr_err("not allow write by shared lock\n");
+ error = 1;
+ goto out;
+ }
+ } else if (flock->fl_type == F_WRLCK) {
+ /* check owner in lock */
+ if (flock->fl_file != filp) {
+ error = 1;
+ pr_err("not allow rw access by exclusive lock from other opens\n");
+ goto out;
+ }
+ }
+ }
+ }
+out:
+ spin_unlock(&ctx->flc_lock);
+ return error;
+}
+
+/**
+ * ksmbd_vfs_read() - vfs helper for smb file read
+ * @work: smb work
+ * @fid: file id of open file
+ * @count: read byte count
+ * @pos: file pos
+ *
+ * Return: number of read bytes on success, otherwise error
+ */
+int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp, size_t count,
+ loff_t *pos)
+{
+ struct file *filp = fp->filp;
+ ssize_t nbytes = 0;
+ char *rbuf = work->aux_payload_buf;
+ struct inode *inode = file_inode(filp);
+
+ if (S_ISDIR(inode->i_mode))
+ return -EISDIR;
+
+ if (unlikely(count == 0))
+ return 0;
+
+ if (work->conn->connection_type) {
+ if (!(fp->daccess & (FILE_READ_DATA_LE | FILE_EXECUTE_LE))) {
+ pr_err("no right to read(%pd)\n",
+ fp->filp->f_path.dentry);
+ return -EACCES;
+ }
+ }
+
+ if (ksmbd_stream_fd(fp))
+ return ksmbd_vfs_stream_read(fp, rbuf, pos, count);
+
+ if (!work->tcon->posix_extensions) {
+ int ret;
+
+ ret = check_lock_range(filp, *pos, *pos + count - 1, READ);
+ if (ret) {
+ pr_err("unable to read due to lock\n");
+ return -EAGAIN;
+ }
+ }
+
+ nbytes = kernel_read(filp, rbuf, count, pos);
+ if (nbytes < 0) {
+ pr_err("smb read failed for (%s), err = %zd\n",
+ fp->filename, nbytes);
+ return nbytes;
+ }
+
+ filp->f_pos = *pos;
+ return nbytes;
+}
+
+static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
+ size_t count)
+{
+ char *stream_buf = NULL, *wbuf;
+ struct user_namespace *user_ns = file_mnt_user_ns(fp->filp);
+ size_t size, v_len;
+ int err = 0;
+
+ ksmbd_debug(VFS, "write stream data pos : %llu, count : %zd\n",
+ *pos, count);
+
+ size = *pos + count;
+ if (size > XATTR_SIZE_MAX) {
+ size = XATTR_SIZE_MAX;
+ count = (*pos + count) - XATTR_SIZE_MAX;
+ }
+
+ v_len = ksmbd_vfs_getcasexattr(user_ns,
+ fp->filp->f_path.dentry,
+ fp->stream.name,
+ fp->stream.size,
+ &stream_buf);
+ if ((int)v_len < 0) {
+ pr_err("not found stream in xattr : %zd\n", v_len);
+ err = (int)v_len;
+ goto out;
+ }
+
+ if (v_len < size) {
+ wbuf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ if (!wbuf) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (v_len > 0)
+ memcpy(wbuf, stream_buf, v_len);
+ kvfree(stream_buf);
+ stream_buf = wbuf;
+ }
+
+ memcpy(&stream_buf[*pos], buf, count);
+
+ err = ksmbd_vfs_setxattr(user_ns,
+ fp->filp->f_path.dentry,
+ fp->stream.name,
+ (void *)stream_buf,
+ size,
+ 0);
+ if (err < 0)
+ goto out;
+
+ fp->filp->f_pos = *pos;
+ err = 0;
+out:
+ kvfree(stream_buf);
+ return err;
+}
+
+/**
+ * ksmbd_vfs_write() - vfs helper for smb file write
+ * @work: work
+ * @fid: file id of open file
+ * @buf: buf containing data for writing
+ * @count: read byte count
+ * @pos: file pos
+ * @sync: fsync after write
+ * @written: number of bytes written
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp,
+ char *buf, size_t count, loff_t *pos, bool sync,
+ ssize_t *written)
+{
+ struct ksmbd_session *sess = work->sess;
+ struct file *filp;
+ loff_t offset = *pos;
+ int err = 0;
+
+ if (sess->conn->connection_type) {
+ if (!(fp->daccess & FILE_WRITE_DATA_LE)) {
+ pr_err("no right to write(%pd)\n",
+ fp->filp->f_path.dentry);
+ err = -EACCES;
+ goto out;
+ }
+ }
+
+ filp = fp->filp;
+
+ if (ksmbd_stream_fd(fp)) {
+ err = ksmbd_vfs_stream_write(fp, buf, pos, count);
+ if (!err)
+ *written = count;
+ goto out;
+ }
+
+ if (!work->tcon->posix_extensions) {
+ err = check_lock_range(filp, *pos, *pos + count - 1, WRITE);
+ if (err) {
+ pr_err("unable to write due to lock\n");
+ err = -EAGAIN;
+ goto out;
+ }
+ }
+
+ /* Do we need to break any of a levelII oplock? */
+ smb_break_all_levII_oplock(work, fp, 1);
+
+ err = kernel_write(filp, buf, count, pos);
+ if (err < 0) {
+ ksmbd_debug(VFS, "smb write failed, err = %d\n", err);
+ goto out;
+ }
+
+ filp->f_pos = *pos;
+ *written = err;
+ err = 0;
+ if (sync) {
+ err = vfs_fsync_range(filp, offset, offset + *written, 0);
+ if (err < 0)
+ pr_err("fsync failed for filename = %pd, err = %d\n",
+ fp->filp->f_path.dentry, err);
+ }
+
+out:
+ return err;
+}
+
+/**
+ * ksmbd_vfs_getattr() - vfs helper for smb getattr
+ * @work: work
+ * @fid: file id of open file
+ * @attrs: inode attributes
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_getattr(struct path *path, struct kstat *stat)
+{
+ int err;
+
+ err = vfs_getattr(path, stat, STATX_BTIME, AT_STATX_SYNC_AS_STAT);
+ if (err)
+ pr_err("getattr failed, err %d\n", err);
+ return err;
+}
+
+/**
+ * ksmbd_vfs_fsync() - vfs helper for smb fsync
+ * @work: work
+ * @fid: file id of open file
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_fsync(struct ksmbd_work *work, u64 fid, u64 p_id)
+{
+ struct ksmbd_file *fp;
+ int err;
+
+ fp = ksmbd_lookup_fd_slow(work, fid, p_id);
+ if (!fp) {
+ pr_err("failed to get filp for fid %llu\n", fid);
+ return -ENOENT;
+ }
+ err = vfs_fsync(fp->filp, 0);
+ if (err < 0)
+ pr_err("smb fsync failed, err = %d\n", err);
+ ksmbd_fd_put(work, fp);
+ return err;
+}
+
+/**
+ * ksmbd_vfs_remove_file() - vfs helper for smb rmdir or unlink
+ * @name: absolute directory or file name
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name)
+{
+ struct path path;
+ struct dentry *parent;
+ int err;
+ int flags = 0;
+
+ if (ksmbd_override_fsids(work))
+ return -ENOMEM;
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS))
+ flags = LOOKUP_FOLLOW;
+
+ err = kern_path(name, flags, &path);
+ if (err) {
+ ksmbd_debug(VFS, "can't get %s, err %d\n", name, err);
+ ksmbd_revert_fsids(work);
+ return err;
+ }
+
+ parent = dget_parent(path.dentry);
+ err = ksmbd_vfs_lock_parent(parent, path.dentry);
+ if (err) {
+ dput(parent);
+ path_put(&path);
+ ksmbd_revert_fsids(work);
+ return err;
+ }
+
+ if (!d_inode(path.dentry)->i_nlink) {
+ err = -ENOENT;
+ goto out_err;
+ }
+
+ if (S_ISDIR(d_inode(path.dentry)->i_mode)) {
+ err = vfs_rmdir(mnt_user_ns(path.mnt), d_inode(parent),
+ path.dentry);
+ if (err && err != -ENOTEMPTY)
+ ksmbd_debug(VFS, "%s: rmdir failed, err %d\n", name,
+ err);
+ } else {
+ err = vfs_unlink(mnt_user_ns(path.mnt), d_inode(parent),
+ path.dentry, NULL);
+ if (err)
+ ksmbd_debug(VFS, "%s: unlink failed, err %d\n", name,
+ err);
+ }
+
+out_err:
+ inode_unlock(d_inode(parent));
+ dput(parent);
+ path_put(&path);
+ ksmbd_revert_fsids(work);
+ return err;
+}
+
+/**
+ * ksmbd_vfs_link() - vfs helper for creating smb hardlink
+ * @oldname: source file name
+ * @newname: hardlink name
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname,
+ const char *newname)
+{
+ struct path oldpath, newpath;
+ struct dentry *dentry;
+ int err;
+ int flags = 0;
+
+ if (ksmbd_override_fsids(work))
+ return -ENOMEM;
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS))
+ flags = LOOKUP_FOLLOW;
+
+ err = kern_path(oldname, flags, &oldpath);
+ if (err) {
+ pr_err("cannot get linux path for %s, err = %d\n",
+ oldname, err);
+ goto out1;
+ }
+
+ dentry = kern_path_create(AT_FDCWD, newname, &newpath,
+ flags | LOOKUP_REVAL);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ pr_err("path create err for %s, err %d\n", newname, err);
+ goto out2;
+ }
+
+ err = -EXDEV;
+ if (oldpath.mnt != newpath.mnt) {
+ pr_err("vfs_link failed err %d\n", err);
+ goto out3;
+ }
+
+ err = vfs_link(oldpath.dentry, mnt_user_ns(newpath.mnt),
+ d_inode(newpath.dentry),
+ dentry, NULL);
+ if (err)
+ ksmbd_debug(VFS, "vfs_link failed err %d\n", err);
+
+out3:
+ done_path_create(&newpath, dentry);
+out2:
+ path_put(&oldpath);
+out1:
+ ksmbd_revert_fsids(work);
+ return err;
+}
+
+static int ksmbd_validate_entry_in_use(struct dentry *src_dent)
+{
+ struct dentry *dst_dent;
+
+ spin_lock(&src_dent->d_lock);
+ list_for_each_entry(dst_dent, &src_dent->d_subdirs, d_child) {
+ struct ksmbd_file *child_fp;
+
+ if (d_really_is_negative(dst_dent))
+ continue;
+
+ child_fp = ksmbd_lookup_fd_inode(d_inode(dst_dent));
+ if (child_fp) {
+ spin_unlock(&src_dent->d_lock);
+ ksmbd_debug(VFS, "Forbid rename, sub file/dir is in use\n");
+ return -EACCES;
+ }
+ }
+ spin_unlock(&src_dent->d_lock);
+
+ return 0;
+}
+
+static int __ksmbd_vfs_rename(struct ksmbd_work *work,
+ struct user_namespace *src_user_ns,
+ struct dentry *src_dent_parent,
+ struct dentry *src_dent,
+ struct user_namespace *dst_user_ns,
+ struct dentry *dst_dent_parent,
+ struct dentry *trap_dent,
+ char *dst_name)
+{
+ struct dentry *dst_dent;
+ int err;
+
+ if (!work->tcon->posix_extensions) {
+ err = ksmbd_validate_entry_in_use(src_dent);
+ if (err)
+ return err;
+ }
+
+ if (d_really_is_negative(src_dent_parent))
+ return -ENOENT;
+ if (d_really_is_negative(dst_dent_parent))
+ return -ENOENT;
+ if (d_really_is_negative(src_dent))
+ return -ENOENT;
+ if (src_dent == trap_dent)
+ return -EINVAL;
+
+ if (ksmbd_override_fsids(work))
+ return -ENOMEM;
+
+ dst_dent = lookup_one_len(dst_name, dst_dent_parent, strlen(dst_name));
+ err = PTR_ERR(dst_dent);
+ if (IS_ERR(dst_dent)) {
+ pr_err("lookup failed %s [%d]\n", dst_name, err);
+ goto out;
+ }
+
+ err = -ENOTEMPTY;
+ if (dst_dent != trap_dent && !d_really_is_positive(dst_dent)) {
+ struct renamedata rd = {
+ .old_mnt_userns = src_user_ns,
+ .old_dir = d_inode(src_dent_parent),
+ .old_dentry = src_dent,
+ .new_mnt_userns = dst_user_ns,
+ .new_dir = d_inode(dst_dent_parent),
+ .new_dentry = dst_dent,
+ };
+ err = vfs_rename(&rd);
+ }
+ if (err)
+ pr_err("vfs_rename failed err %d\n", err);
+ if (dst_dent)
+ dput(dst_dent);
+out:
+ ksmbd_revert_fsids(work);
+ return err;
+}
+
+int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
+ char *newname)
+{
+ struct path dst_path;
+ struct dentry *src_dent_parent, *dst_dent_parent;
+ struct dentry *src_dent, *trap_dent, *src_child;
+ char *dst_name;
+ int err;
+ int flags;
+
+ dst_name = extract_last_component(newname);
+ if (!dst_name)
+ return -EINVAL;
+
+ src_dent_parent = dget_parent(fp->filp->f_path.dentry);
+ src_dent = fp->filp->f_path.dentry;
+
+ flags = LOOKUP_DIRECTORY;
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS))
+ flags |= LOOKUP_FOLLOW;
+
+ err = kern_path(newname, flags, &dst_path);
+ if (err) {
+ ksmbd_debug(VFS, "Cannot get path for %s [%d]\n", newname, err);
+ goto out;
+ }
+ dst_dent_parent = dst_path.dentry;
+
+ trap_dent = lock_rename(src_dent_parent, dst_dent_parent);
+ dget(src_dent);
+ dget(dst_dent_parent);
+ src_child = lookup_one_len(src_dent->d_name.name, src_dent_parent,
+ src_dent->d_name.len);
+ if (IS_ERR(src_child)) {
+ err = PTR_ERR(src_child);
+ goto out_lock;
+ }
+
+ if (src_child != src_dent) {
+ err = -ESTALE;
+ dput(src_child);
+ goto out_lock;
+ }
+ dput(src_child);
+
+ err = __ksmbd_vfs_rename(work,
+ file_mnt_user_ns(fp->filp),
+ src_dent_parent,
+ src_dent,
+ mnt_user_ns(dst_path.mnt),
+ dst_dent_parent,
+ trap_dent,
+ dst_name);
+out_lock:
+ dput(src_dent);
+ dput(dst_dent_parent);
+ unlock_rename(src_dent_parent, dst_dent_parent);
+ path_put(&dst_path);
+out:
+ dput(src_dent_parent);
+ return err;
+}
+
+/**
+ * ksmbd_vfs_truncate() - vfs helper for smb file truncate
+ * @work: work
+ * @name: old filename
+ * @fid: file id of old file
+ * @size: truncate to given size
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_truncate(struct ksmbd_work *work, const char *name,
+ struct ksmbd_file *fp, loff_t size)
+{
+ struct path path;
+ int err = 0;
+
+ if (name) {
+ err = kern_path(name, 0, &path);
+ if (err) {
+ pr_err("cannot get linux path for %s, err %d\n",
+ name, err);
+ return err;
+ }
+ err = vfs_truncate(&path, size);
+ if (err)
+ pr_err("truncate failed for %s err %d\n",
+ name, err);
+ path_put(&path);
+ } else {
+ struct file *filp;
+
+ filp = fp->filp;
+
+ /* Do we need to break any of a levelII oplock? */
+ smb_break_all_levII_oplock(work, fp, 1);
+
+ if (!work->tcon->posix_extensions) {
+ struct inode *inode = file_inode(filp);
+
+ if (size < inode->i_size) {
+ err = check_lock_range(filp, size,
+ inode->i_size - 1, WRITE);
+ } else {
+ err = check_lock_range(filp, inode->i_size,
+ size - 1, WRITE);
+ }
+
+ if (err) {
+ pr_err("failed due to lock\n");
+ return -EAGAIN;
+ }
+ }
+
+ err = vfs_truncate(&filp->f_path, size);
+ if (err)
+ pr_err("truncate failed for filename : %s err %d\n",
+ fp->filename, err);
+ }
+
+ return err;
+}
+
+/**
+ * ksmbd_vfs_listxattr() - vfs helper for smb list extended attributes
+ * @dentry: dentry of file for listing xattrs
+ * @list: destination buffer
+ * @size: destination buffer length
+ *
+ * Return: xattr list length on success, otherwise error
+ */
+ssize_t ksmbd_vfs_listxattr(struct dentry *dentry, char **list)
+{
+ ssize_t size;
+ char *vlist = NULL;
+
+ size = vfs_listxattr(dentry, NULL, 0);
+ if (size <= 0)
+ return size;
+
+ vlist = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ if (!vlist)
+ return -ENOMEM;
+
+ *list = vlist;
+ size = vfs_listxattr(dentry, vlist, size);
+ if (size < 0) {
+ ksmbd_debug(VFS, "listxattr failed\n");
+ kvfree(vlist);
+ *list = NULL;
+ }
+
+ return size;
+}
+
+static ssize_t ksmbd_vfs_xattr_len(struct user_namespace *user_ns,
+ struct dentry *dentry, char *xattr_name)
+{
+ return vfs_getxattr(user_ns, dentry, xattr_name, NULL, 0);
+}
+
+/**
+ * ksmbd_vfs_getxattr() - vfs helper for smb get extended attributes value
+ * @user_ns: user namespace
+ * @dentry: dentry of file for getting xattrs
+ * @xattr_name: name of xattr name to query
+ * @xattr_buf: destination buffer xattr value
+ *
+ * Return: read xattr value length on success, otherwise error
+ */
+ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns,
+ struct dentry *dentry,
+ char *xattr_name, char **xattr_buf)
+{
+ ssize_t xattr_len;
+ char *buf;
+
+ *xattr_buf = NULL;
+ xattr_len = ksmbd_vfs_xattr_len(user_ns, dentry, xattr_name);
+ if (xattr_len < 0)
+ return xattr_len;
+
+ buf = kmalloc(xattr_len + 1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ xattr_len = vfs_getxattr(user_ns, dentry, xattr_name,
+ (void *)buf, xattr_len);
+ if (xattr_len > 0)
+ *xattr_buf = buf;
+ else
+ kfree(buf);
+ return xattr_len;
+}
+
+/**
+ * ksmbd_vfs_setxattr() - vfs helper for smb set extended attributes value
+ * @user_ns: user namespace
+ * @dentry: dentry to set XATTR at
+ * @name: xattr name for setxattr
+ * @value: xattr value to set
+ * @size: size of xattr value
+ * @flags: destination buffer length
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_setxattr(struct user_namespace *user_ns,
+ struct dentry *dentry, const char *attr_name,
+ const void *attr_value, size_t attr_size, int flags)
+{
+ int err;
+
+ err = vfs_setxattr(user_ns,
+ dentry,
+ attr_name,
+ attr_value,
+ attr_size,
+ flags);
+ if (err)
+ ksmbd_debug(VFS, "setxattr failed, err %d\n", err);
+ return err;
+}
+
+/**
+ * ksmbd_vfs_set_fadvise() - convert smb IO caching options to linux options
+ * @filp: file pointer for IO
+ * @options: smb IO options
+ */
+void ksmbd_vfs_set_fadvise(struct file *filp, __le32 option)
+{
+ struct address_space *mapping;
+
+ mapping = filp->f_mapping;
+
+ if (!option || !mapping)
+ return;
+
+ if (option & FILE_WRITE_THROUGH_LE) {
+ filp->f_flags |= O_SYNC;
+ } else if (option & FILE_SEQUENTIAL_ONLY_LE) {
+ filp->f_ra.ra_pages = inode_to_bdi(mapping->host)->ra_pages * 2;
+ spin_lock(&filp->f_lock);
+ filp->f_mode &= ~FMODE_RANDOM;
+ spin_unlock(&filp->f_lock);
+ } else if (option & FILE_RANDOM_ACCESS_LE) {
+ spin_lock(&filp->f_lock);
+ filp->f_mode |= FMODE_RANDOM;
+ spin_unlock(&filp->f_lock);
+ }
+}
+
+int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp,
+ loff_t off, loff_t len)
+{
+ smb_break_all_levII_oplock(work, fp, 1);
+ if (fp->f_ci->m_fattr & ATTR_SPARSE_FILE_LE)
+ return vfs_fallocate(fp->filp,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ off, len);
+
+ return vfs_fallocate(fp->filp, FALLOC_FL_ZERO_RANGE, off, len);
+}
+
+int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
+ struct file_allocated_range_buffer *ranges,
+ int in_count, int *out_count)
+{
+ struct file *f = fp->filp;
+ struct inode *inode = file_inode(fp->filp);
+ loff_t maxbytes = (u64)inode->i_sb->s_maxbytes, end;
+ loff_t extent_start, extent_end;
+ int ret = 0;
+
+ if (start > maxbytes)
+ return -EFBIG;
+
+ if (!in_count)
+ return 0;
+
+ /*
+ * Shrink request scope to what the fs can actually handle.
+ */
+ if (length > maxbytes || (maxbytes - length) < start)
+ length = maxbytes - start;
+
+ if (start + length > inode->i_size)
+ length = inode->i_size - start;
+
+ *out_count = 0;
+ end = start + length;
+ while (start < end && *out_count < in_count) {
+ extent_start = f->f_op->llseek(f, start, SEEK_DATA);
+ if (extent_start < 0) {
+ if (extent_start != -ENXIO)
+ ret = (int)extent_start;
+ break;
+ }
+
+ if (extent_start >= end)
+ break;
+
+ extent_end = f->f_op->llseek(f, extent_start, SEEK_HOLE);
+ if (extent_end < 0) {
+ if (extent_end != -ENXIO)
+ ret = (int)extent_end;
+ break;
+ } else if (extent_start >= extent_end) {
+ break;
+ }
+
+ ranges[*out_count].file_offset = cpu_to_le64(extent_start);
+ ranges[(*out_count)++].length =
+ cpu_to_le64(min(extent_end, end) - extent_start);
+
+ start = extent_end;
+ }
+
+ return ret;
+}
+
+int ksmbd_vfs_remove_xattr(struct user_namespace *user_ns,
+ struct dentry *dentry, char *attr_name)
+{
+ return vfs_removexattr(user_ns, dentry, attr_name);
+}
+
+int ksmbd_vfs_unlink(struct user_namespace *user_ns,
+ struct dentry *dir, struct dentry *dentry)
+{
+ int err = 0;
+
+ err = ksmbd_vfs_lock_parent(dir, dentry);
+ if (err)
+ return err;
+ dget(dentry);
+
+ if (S_ISDIR(d_inode(dentry)->i_mode))
+ err = vfs_rmdir(user_ns, d_inode(dir), dentry);
+ else
+ err = vfs_unlink(user_ns, d_inode(dir), dentry, NULL);
+
+ dput(dentry);
+ inode_unlock(d_inode(dir));
+ if (err)
+ ksmbd_debug(VFS, "failed to delete, err %d\n", err);
+
+ return err;
+}
+
+static int __dir_empty(struct dir_context *ctx, const char *name, int namlen,
+ loff_t offset, u64 ino, unsigned int d_type)
+{
+ struct ksmbd_readdir_data *buf;
+
+ buf = container_of(ctx, struct ksmbd_readdir_data, ctx);
+ buf->dirent_count++;
+
+ if (buf->dirent_count > 2)
+ return -ENOTEMPTY;
+ return 0;
+}
+
+/**
+ * ksmbd_vfs_empty_dir() - check for empty directory
+ * @fp: ksmbd file pointer
+ *
+ * Return: true if directory empty, otherwise false
+ */
+int ksmbd_vfs_empty_dir(struct ksmbd_file *fp)
+{
+ int err;
+ struct ksmbd_readdir_data readdir_data;
+
+ memset(&readdir_data, 0, sizeof(struct ksmbd_readdir_data));
+
+ set_ctx_actor(&readdir_data.ctx, __dir_empty);
+ readdir_data.dirent_count = 0;
+
+ err = iterate_dir(fp->filp, &readdir_data.ctx);
+ if (readdir_data.dirent_count > 2)
+ err = -ENOTEMPTY;
+ else
+ err = 0;
+ return err;
+}
+
+static int __caseless_lookup(struct dir_context *ctx, const char *name,
+ int namlen, loff_t offset, u64 ino,
+ unsigned int d_type)
+{
+ struct ksmbd_readdir_data *buf;
+
+ buf = container_of(ctx, struct ksmbd_readdir_data, ctx);
+
+ if (buf->used != namlen)
+ return 0;
+ if (!strncasecmp((char *)buf->private, name, namlen)) {
+ memcpy((char *)buf->private, name, namlen);
+ buf->dirent_count = 1;
+ return -EEXIST;
+ }
+ return 0;
+}
+
+/**
+ * ksmbd_vfs_lookup_in_dir() - lookup a file in a directory
+ * @dir: path info
+ * @name: filename to lookup
+ * @namelen: filename length
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int ksmbd_vfs_lookup_in_dir(struct path *dir, char *name, size_t namelen)
+{
+ int ret;
+ struct file *dfilp;
+ int flags = O_RDONLY | O_LARGEFILE;
+ struct ksmbd_readdir_data readdir_data = {
+ .ctx.actor = __caseless_lookup,
+ .private = name,
+ .used = namelen,
+ .dirent_count = 0,
+ };
+
+ dfilp = dentry_open(dir, flags, current_cred());
+ if (IS_ERR(dfilp))
+ return PTR_ERR(dfilp);
+
+ ret = iterate_dir(dfilp, &readdir_data.ctx);
+ if (readdir_data.dirent_count > 0)
+ ret = 0;
+ fput(dfilp);
+ return ret;
+}
+
+/**
+ * ksmbd_vfs_kern_path() - lookup a file and get path info
+ * @name: name of file for lookup
+ * @flags: lookup flags
+ * @path: if lookup succeed, return path info
+ * @caseless: caseless filename lookup
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_kern_path(char *name, unsigned int flags, struct path *path,
+ bool caseless)
+{
+ int err;
+
+ if (name[0] != '/')
+ return -EINVAL;
+
+ err = kern_path(name, flags, path);
+ if (!err)
+ return 0;
+
+ if (caseless) {
+ char *filepath;
+ struct path parent;
+ size_t path_len, remain_len;
+
+ filepath = kstrdup(name, GFP_KERNEL);
+ if (!filepath)
+ return -ENOMEM;
+
+ path_len = strlen(filepath);
+ remain_len = path_len - 1;
+
+ err = kern_path("/", flags, &parent);
+ if (err)
+ goto out;
+
+ while (d_can_lookup(parent.dentry)) {
+ char *filename = filepath + path_len - remain_len;
+ char *next = strchrnul(filename, '/');
+ size_t filename_len = next - filename;
+ bool is_last = !next[0];
+
+ if (filename_len == 0)
+ break;
+
+ err = ksmbd_vfs_lookup_in_dir(&parent, filename,
+ filename_len);
+ if (err) {
+ path_put(&parent);
+ goto out;
+ }
+
+ path_put(&parent);
+ next[0] = '\0';
+
+ err = kern_path(filepath, flags, &parent);
+ if (err)
+ goto out;
+
+ if (is_last) {
+ path->mnt = parent.mnt;
+ path->dentry = parent.dentry;
+ goto out;
+ }
+
+ next[0] = '/';
+ remain_len -= filename_len + 1;
+ }
+
+ path_put(&parent);
+ err = -EINVAL;
+out:
+ kfree(filepath);
+ }
+ return err;
+}
+
+int ksmbd_vfs_remove_acl_xattrs(struct user_namespace *user_ns,
+ struct dentry *dentry)
+{
+ char *name, *xattr_list = NULL;
+ ssize_t xattr_list_len;
+ int err = 0;
+
+ xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
+ if (xattr_list_len < 0) {
+ goto out;
+ } else if (!xattr_list_len) {
+ ksmbd_debug(SMB, "empty xattr in the file\n");
+ goto out;
+ }
+
+ for (name = xattr_list; name - xattr_list < xattr_list_len;
+ name += strlen(name) + 1) {
+ ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
+
+ if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
+ sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1) ||
+ !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
+ sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1)) {
+ err = ksmbd_vfs_remove_xattr(user_ns, dentry, name);
+ if (err)
+ ksmbd_debug(SMB,
+ "remove acl xattr failed : %s\n", name);
+ }
+ }
+out:
+ kvfree(xattr_list);
+ return err;
+}
+
+int ksmbd_vfs_remove_sd_xattrs(struct user_namespace *user_ns,
+ struct dentry *dentry)
+{
+ char *name, *xattr_list = NULL;
+ ssize_t xattr_list_len;
+ int err = 0;
+
+ xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
+ if (xattr_list_len < 0) {
+ goto out;
+ } else if (!xattr_list_len) {
+ ksmbd_debug(SMB, "empty xattr in the file\n");
+ goto out;
+ }
+
+ for (name = xattr_list; name - xattr_list < xattr_list_len;
+ name += strlen(name) + 1) {
+ ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
+
+ if (!strncmp(name, XATTR_NAME_SD, XATTR_NAME_SD_LEN)) {
+ err = ksmbd_vfs_remove_xattr(user_ns, dentry, name);
+ if (err)
+ ksmbd_debug(SMB, "remove xattr failed : %s\n", name);
+ }
+ }
+out:
+ kvfree(xattr_list);
+ return err;
+}
+
+static struct xattr_smb_acl *ksmbd_vfs_make_xattr_posix_acl(struct user_namespace *user_ns,
+ struct inode *inode,
+ int acl_type)
+{
+ struct xattr_smb_acl *smb_acl = NULL;
+ struct posix_acl *posix_acls;
+ struct posix_acl_entry *pa_entry;
+ struct xattr_acl_entry *xa_entry;
+ int i;
+
+ if (!IS_ENABLED(CONFIG_FS_POSIX_ACL))
+ return NULL;
+
+ posix_acls = get_acl(inode, acl_type);
+ if (!posix_acls)
+ return NULL;
+
+ smb_acl = kzalloc(sizeof(struct xattr_smb_acl) +
+ sizeof(struct xattr_acl_entry) * posix_acls->a_count,
+ GFP_KERNEL);
+ if (!smb_acl)
+ goto out;
+
+ smb_acl->count = posix_acls->a_count;
+ pa_entry = posix_acls->a_entries;
+ xa_entry = smb_acl->entries;
+ for (i = 0; i < posix_acls->a_count; i++, pa_entry++, xa_entry++) {
+ switch (pa_entry->e_tag) {
+ case ACL_USER:
+ xa_entry->type = SMB_ACL_USER;
+ xa_entry->uid = from_kuid(user_ns, pa_entry->e_uid);
+ break;
+ case ACL_USER_OBJ:
+ xa_entry->type = SMB_ACL_USER_OBJ;
+ break;
+ case ACL_GROUP:
+ xa_entry->type = SMB_ACL_GROUP;
+ xa_entry->gid = from_kgid(user_ns, pa_entry->e_gid);
+ break;
+ case ACL_GROUP_OBJ:
+ xa_entry->type = SMB_ACL_GROUP_OBJ;
+ break;
+ case ACL_OTHER:
+ xa_entry->type = SMB_ACL_OTHER;
+ break;
+ case ACL_MASK:
+ xa_entry->type = SMB_ACL_MASK;
+ break;
+ default:
+ pr_err("unknown type : 0x%x\n", pa_entry->e_tag);
+ goto out;
+ }
+
+ if (pa_entry->e_perm & ACL_READ)
+ xa_entry->perm |= SMB_ACL_READ;
+ if (pa_entry->e_perm & ACL_WRITE)
+ xa_entry->perm |= SMB_ACL_WRITE;
+ if (pa_entry->e_perm & ACL_EXECUTE)
+ xa_entry->perm |= SMB_ACL_EXECUTE;
+ }
+out:
+ posix_acl_release(posix_acls);
+ return smb_acl;
+}
+
+int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
+ struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct smb_ntsd *pntsd, int len)
+{
+ int rc;
+ struct ndr sd_ndr = {0}, acl_ndr = {0};
+ struct xattr_ntacl acl = {0};
+ struct xattr_smb_acl *smb_acl, *def_smb_acl = NULL;
+ struct inode *inode = d_inode(dentry);
+
+ acl.version = 4;
+ acl.hash_type = XATTR_SD_HASH_TYPE_SHA256;
+ acl.current_time = ksmbd_UnixTimeToNT(current_time(inode));
+
+ memcpy(acl.desc, "posix_acl", 9);
+ acl.desc_len = 10;
+
+ pntsd->osidoffset =
+ cpu_to_le32(le32_to_cpu(pntsd->osidoffset) + NDR_NTSD_OFFSETOF);
+ pntsd->gsidoffset =
+ cpu_to_le32(le32_to_cpu(pntsd->gsidoffset) + NDR_NTSD_OFFSETOF);
+ pntsd->dacloffset =
+ cpu_to_le32(le32_to_cpu(pntsd->dacloffset) + NDR_NTSD_OFFSETOF);
+
+ acl.sd_buf = (char *)pntsd;
+ acl.sd_size = len;
+
+ rc = ksmbd_gen_sd_hash(conn, acl.sd_buf, acl.sd_size, acl.hash);
+ if (rc) {
+ pr_err("failed to generate hash for ndr acl\n");
+ return rc;
+ }
+
+ smb_acl = ksmbd_vfs_make_xattr_posix_acl(user_ns, inode,
+ ACL_TYPE_ACCESS);
+ if (S_ISDIR(inode->i_mode))
+ def_smb_acl = ksmbd_vfs_make_xattr_posix_acl(user_ns, inode,
+ ACL_TYPE_DEFAULT);
+
+ rc = ndr_encode_posix_acl(&acl_ndr, user_ns, inode,
+ smb_acl, def_smb_acl);
+ if (rc) {
+ pr_err("failed to encode ndr to posix acl\n");
+ goto out;
+ }
+
+ rc = ksmbd_gen_sd_hash(conn, acl_ndr.data, acl_ndr.offset,
+ acl.posix_acl_hash);
+ if (rc) {
+ pr_err("failed to generate hash for ndr acl\n");
+ goto out;
+ }
+
+ rc = ndr_encode_v4_ntacl(&sd_ndr, &acl);
+ if (rc) {
+ pr_err("failed to encode ndr to posix acl\n");
+ goto out;
+ }
+
+ rc = ksmbd_vfs_setxattr(user_ns, dentry,
+ XATTR_NAME_SD, sd_ndr.data,
+ sd_ndr.offset, 0);
+ if (rc < 0)
+ pr_err("Failed to store XATTR ntacl :%d\n", rc);
+
+ kfree(sd_ndr.data);
+out:
+ kfree(acl_ndr.data);
+ kfree(smb_acl);
+ kfree(def_smb_acl);
+ return rc;
+}
+
+int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
+ struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct smb_ntsd **pntsd)
+{
+ int rc;
+ struct ndr n;
+ struct inode *inode = d_inode(dentry);
+ struct ndr acl_ndr = {0};
+ struct xattr_ntacl acl;
+ struct xattr_smb_acl *smb_acl = NULL, *def_smb_acl = NULL;
+ __u8 cmp_hash[XATTR_SD_HASH_SIZE] = {0};
+
+ rc = ksmbd_vfs_getxattr(user_ns, dentry, XATTR_NAME_SD, &n.data);
+ if (rc <= 0)
+ return rc;
+
+ n.length = rc;
+ rc = ndr_decode_v4_ntacl(&n, &acl);
+ if (rc)
+ goto free_n_data;
+
+ smb_acl = ksmbd_vfs_make_xattr_posix_acl(user_ns, inode,
+ ACL_TYPE_ACCESS);
+ if (S_ISDIR(inode->i_mode))
+ def_smb_acl = ksmbd_vfs_make_xattr_posix_acl(user_ns, inode,
+ ACL_TYPE_DEFAULT);
+
+ rc = ndr_encode_posix_acl(&acl_ndr, user_ns, inode, smb_acl,
+ def_smb_acl);
+ if (rc) {
+ pr_err("failed to encode ndr to posix acl\n");
+ goto out_free;
+ }
+
+ rc = ksmbd_gen_sd_hash(conn, acl_ndr.data, acl_ndr.offset, cmp_hash);
+ if (rc) {
+ pr_err("failed to generate hash for ndr acl\n");
+ goto out_free;
+ }
+
+ if (memcmp(cmp_hash, acl.posix_acl_hash, XATTR_SD_HASH_SIZE)) {
+ pr_err("hash value diff\n");
+ rc = -EINVAL;
+ goto out_free;
+ }
+
+ *pntsd = acl.sd_buf;
+ (*pntsd)->osidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->osidoffset) -
+ NDR_NTSD_OFFSETOF);
+ (*pntsd)->gsidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->gsidoffset) -
+ NDR_NTSD_OFFSETOF);
+ (*pntsd)->dacloffset = cpu_to_le32(le32_to_cpu((*pntsd)->dacloffset) -
+ NDR_NTSD_OFFSETOF);
+
+ rc = acl.sd_size;
+out_free:
+ kfree(acl_ndr.data);
+ kfree(smb_acl);
+ kfree(def_smb_acl);
+ if (rc < 0) {
+ kfree(acl.sd_buf);
+ *pntsd = NULL;
+ }
+
+free_n_data:
+ kfree(n.data);
+ return rc;
+}
+
+int ksmbd_vfs_set_dos_attrib_xattr(struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct xattr_dos_attrib *da)
+{
+ struct ndr n;
+ int err;
+
+ err = ndr_encode_dos_attr(&n, da);
+ if (err)
+ return err;
+
+ err = ksmbd_vfs_setxattr(user_ns, dentry, XATTR_NAME_DOS_ATTRIBUTE,
+ (void *)n.data, n.offset, 0);
+ if (err)
+ ksmbd_debug(SMB, "failed to store dos attribute in xattr\n");
+ kfree(n.data);
+
+ return err;
+}
+
+int ksmbd_vfs_get_dos_attrib_xattr(struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct xattr_dos_attrib *da)
+{
+ struct ndr n;
+ int err;
+
+ err = ksmbd_vfs_getxattr(user_ns, dentry, XATTR_NAME_DOS_ATTRIBUTE,
+ (char **)&n.data);
+ if (err > 0) {
+ n.length = err;
+ if (ndr_decode_dos_attr(&n, da))
+ err = -EINVAL;
+ kfree(n.data);
+ } else {
+ ksmbd_debug(SMB, "failed to load dos attribute in xattr\n");
+ }
+
+ return err;
+}
+
+/**
+ * ksmbd_vfs_init_kstat() - convert unix stat information to smb stat format
+ * @p: destination buffer
+ * @ksmbd_kstat: ksmbd kstat wrapper
+ */
+void *ksmbd_vfs_init_kstat(char **p, struct ksmbd_kstat *ksmbd_kstat)
+{
+ struct file_directory_info *info = (struct file_directory_info *)(*p);
+ struct kstat *kstat = ksmbd_kstat->kstat;
+ u64 time;
+
+ info->FileIndex = 0;
+ info->CreationTime = cpu_to_le64(ksmbd_kstat->create_time);
+ time = ksmbd_UnixTimeToNT(kstat->atime);
+ info->LastAccessTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(kstat->mtime);
+ info->LastWriteTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(kstat->ctime);
+ info->ChangeTime = cpu_to_le64(time);
+
+ if (ksmbd_kstat->file_attributes & ATTR_DIRECTORY_LE) {
+ info->EndOfFile = 0;
+ info->AllocationSize = 0;
+ } else {
+ info->EndOfFile = cpu_to_le64(kstat->size);
+ info->AllocationSize = cpu_to_le64(kstat->blocks << 9);
+ }
+ info->ExtFileAttributes = ksmbd_kstat->file_attributes;
+
+ return info;
+}
+
+int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
+ struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct ksmbd_kstat *ksmbd_kstat)
+{
+ u64 time;
+ int rc;
+
+ generic_fillattr(user_ns, d_inode(dentry), ksmbd_kstat->kstat);
+
+ time = ksmbd_UnixTimeToNT(ksmbd_kstat->kstat->ctime);
+ ksmbd_kstat->create_time = time;
+
+ /*
+ * set default value for the case that store dos attributes is not yes
+ * or that acl is disable in server's filesystem and the config is yes.
+ */
+ if (S_ISDIR(ksmbd_kstat->kstat->mode))
+ ksmbd_kstat->file_attributes = ATTR_DIRECTORY_LE;
+ else
+ ksmbd_kstat->file_attributes = ATTR_ARCHIVE_LE;
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_STORE_DOS_ATTRS)) {
+ struct xattr_dos_attrib da;
+
+ rc = ksmbd_vfs_get_dos_attrib_xattr(user_ns, dentry, &da);
+ if (rc > 0) {
+ ksmbd_kstat->file_attributes = cpu_to_le32(da.attr);
+ ksmbd_kstat->create_time = da.create_time;
+ } else {
+ ksmbd_debug(VFS, "fail to load dos attribute.\n");
+ }
+ }
+
+ return 0;
+}
+
+ssize_t ksmbd_vfs_casexattr_len(struct user_namespace *user_ns,
+ struct dentry *dentry, char *attr_name,
+ int attr_name_len)
+{
+ char *name, *xattr_list = NULL;
+ ssize_t value_len = -ENOENT, xattr_list_len;
+
+ xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
+ if (xattr_list_len <= 0)
+ goto out;
+
+ for (name = xattr_list; name - xattr_list < xattr_list_len;
+ name += strlen(name) + 1) {
+ ksmbd_debug(VFS, "%s, len %zd\n", name, strlen(name));
+ if (strncasecmp(attr_name, name, attr_name_len))
+ continue;
+
+ value_len = ksmbd_vfs_xattr_len(user_ns, dentry, name);
+ break;
+ }
+
+out:
+ kvfree(xattr_list);
+ return value_len;
+}
+
+int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name,
+ size_t *xattr_stream_name_size, int s_type)
+{
+ char *type, *buf;
+
+ if (s_type == DIR_STREAM)
+ type = ":$INDEX_ALLOCATION";
+ else
+ type = ":$DATA";
+
+ buf = kasprintf(GFP_KERNEL, "%s%s%s",
+ XATTR_NAME_STREAM, stream_name, type);
+ if (!buf)
+ return -ENOMEM;
+
+ *xattr_stream_name = buf;
+ *xattr_stream_name_size = strlen(buf) + 1;
+
+ return 0;
+}
+
+int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
+ struct ksmbd_file *src_fp,
+ struct ksmbd_file *dst_fp,
+ struct srv_copychunk *chunks,
+ unsigned int chunk_count,
+ unsigned int *chunk_count_written,
+ unsigned int *chunk_size_written,
+ loff_t *total_size_written)
+{
+ unsigned int i;
+ loff_t src_off, dst_off, src_file_size;
+ size_t len;
+ int ret;
+
+ *chunk_count_written = 0;
+ *chunk_size_written = 0;
+ *total_size_written = 0;
+
+ if (!(src_fp->daccess & (FILE_READ_DATA_LE | FILE_EXECUTE_LE))) {
+ pr_err("no right to read(%pd)\n", src_fp->filp->f_path.dentry);
+ return -EACCES;
+ }
+ if (!(dst_fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE))) {
+ pr_err("no right to write(%pd)\n", dst_fp->filp->f_path.dentry);
+ return -EACCES;
+ }
+
+ if (ksmbd_stream_fd(src_fp) || ksmbd_stream_fd(dst_fp))
+ return -EBADF;
+
+ smb_break_all_levII_oplock(work, dst_fp, 1);
+
+ if (!work->tcon->posix_extensions) {
+ for (i = 0; i < chunk_count; i++) {
+ src_off = le64_to_cpu(chunks[i].SourceOffset);
+ dst_off = le64_to_cpu(chunks[i].TargetOffset);
+ len = le32_to_cpu(chunks[i].Length);
+
+ if (check_lock_range(src_fp->filp, src_off,
+ src_off + len - 1, READ))
+ return -EAGAIN;
+ if (check_lock_range(dst_fp->filp, dst_off,
+ dst_off + len - 1, WRITE))
+ return -EAGAIN;
+ }
+ }
+
+ src_file_size = i_size_read(file_inode(src_fp->filp));
+
+ for (i = 0; i < chunk_count; i++) {
+ src_off = le64_to_cpu(chunks[i].SourceOffset);
+ dst_off = le64_to_cpu(chunks[i].TargetOffset);
+ len = le32_to_cpu(chunks[i].Length);
+
+ if (src_off + len > src_file_size)
+ return -E2BIG;
+
+ ret = vfs_copy_file_range(src_fp->filp, src_off,
+ dst_fp->filp, dst_off, len, 0);
+ if (ret < 0)
+ return ret;
+
+ *chunk_count_written += 1;
+ *total_size_written += ret;
+ }
+ return 0;
+}
+
+void ksmbd_vfs_posix_lock_wait(struct file_lock *flock)
+{
+ wait_event(flock->fl_wait, !flock->fl_blocker);
+}
+
+int ksmbd_vfs_posix_lock_wait_timeout(struct file_lock *flock, long timeout)
+{
+ return wait_event_interruptible_timeout(flock->fl_wait,
+ !flock->fl_blocker,
+ timeout);
+}
+
+void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock)
+{
+ locks_delete_block(flock);
+}
+
+int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns,
+ struct inode *inode)
+{
+ struct posix_acl_state acl_state;
+ struct posix_acl *acls;
+ int rc;
+
+ if (!IS_ENABLED(CONFIG_FS_POSIX_ACL))
+ return -EOPNOTSUPP;
+
+ ksmbd_debug(SMB, "Set posix acls\n");
+ rc = init_acl_state(&acl_state, 1);
+ if (rc)
+ return rc;
+
+ /* Set default owner group */
+ acl_state.owner.allow = (inode->i_mode & 0700) >> 6;
+ acl_state.group.allow = (inode->i_mode & 0070) >> 3;
+ acl_state.other.allow = inode->i_mode & 0007;
+ acl_state.users->aces[acl_state.users->n].uid = inode->i_uid;
+ acl_state.users->aces[acl_state.users->n++].perms.allow =
+ acl_state.owner.allow;
+ acl_state.groups->aces[acl_state.groups->n].gid = inode->i_gid;
+ acl_state.groups->aces[acl_state.groups->n++].perms.allow =
+ acl_state.group.allow;
+ acl_state.mask.allow = 0x07;
+
+ acls = posix_acl_alloc(6, GFP_KERNEL);
+ if (!acls) {
+ free_acl_state(&acl_state);
+ return -ENOMEM;
+ }
+ posix_state_to_acl(&acl_state, acls->a_entries);
+ rc = set_posix_acl(user_ns, inode, ACL_TYPE_ACCESS, acls);
+ if (rc < 0)
+ ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
+ rc);
+ else if (S_ISDIR(inode->i_mode)) {
+ posix_state_to_acl(&acl_state, acls->a_entries);
+ rc = set_posix_acl(user_ns, inode, ACL_TYPE_DEFAULT,
+ acls);
+ if (rc < 0)
+ ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
+ rc);
+ }
+ free_acl_state(&acl_state);
+ posix_acl_release(acls);
+ return rc;
+}
+
+int ksmbd_vfs_inherit_posix_acl(struct user_namespace *user_ns,
+ struct inode *inode, struct inode *parent_inode)
+{
+ struct posix_acl *acls;
+ struct posix_acl_entry *pace;
+ int rc, i;
+
+ if (!IS_ENABLED(CONFIG_FS_POSIX_ACL))
+ return -EOPNOTSUPP;
+
+ acls = get_acl(parent_inode, ACL_TYPE_DEFAULT);
+ if (!acls)
+ return -ENOENT;
+ pace = acls->a_entries;
+
+ for (i = 0; i < acls->a_count; i++, pace++) {
+ if (pace->e_tag == ACL_MASK) {
+ pace->e_perm = 0x07;
+ break;
+ }
+ }
+
+ rc = set_posix_acl(user_ns, inode, ACL_TYPE_ACCESS, acls);
+ if (rc < 0)
+ ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
+ rc);
+ if (S_ISDIR(inode->i_mode)) {
+ rc = set_posix_acl(user_ns, inode, ACL_TYPE_DEFAULT,
+ acls);
+ if (rc < 0)
+ ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
+ rc);
+ }
+ posix_acl_release(acls);
+ return rc;
+}
diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
new file mode 100644
index 000000000000..cb0cba0d5d07
--- /dev/null
+++ b/fs/ksmbd/vfs.h
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_VFS_H__
+#define __KSMBD_VFS_H__
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <uapi/linux/xattr.h>
+#include <linux/posix_acl.h>
+
+#include "smbacl.h"
+#include "xattr.h"
+
+/*
+ * Enumeration for stream type.
+ */
+enum {
+ DATA_STREAM = 1, /* type $DATA */
+ DIR_STREAM /* type $INDEX_ALLOCATION */
+};
+
+/* CreateOptions */
+/* Flag is set, it must not be a file , valid for directory only */
+#define FILE_DIRECTORY_FILE_LE cpu_to_le32(0x00000001)
+#define FILE_WRITE_THROUGH_LE cpu_to_le32(0x00000002)
+#define FILE_SEQUENTIAL_ONLY_LE cpu_to_le32(0x00000004)
+
+/* Should not buffer on server*/
+#define FILE_NO_INTERMEDIATE_BUFFERING_LE cpu_to_le32(0x00000008)
+/* MBZ */
+#define FILE_SYNCHRONOUS_IO_ALERT_LE cpu_to_le32(0x00000010)
+/* MBZ */
+#define FILE_SYNCHRONOUS_IO_NONALERT_LE cpu_to_le32(0x00000020)
+
+/* Flaf must not be set for directory */
+#define FILE_NON_DIRECTORY_FILE_LE cpu_to_le32(0x00000040)
+
+/* Should be zero */
+#define CREATE_TREE_CONNECTION cpu_to_le32(0x00000080)
+#define FILE_COMPLETE_IF_OPLOCKED_LE cpu_to_le32(0x00000100)
+#define FILE_NO_EA_KNOWLEDGE_LE cpu_to_le32(0x00000200)
+#define FILE_OPEN_REMOTE_INSTANCE cpu_to_le32(0x00000400)
+
+/**
+ * Doc says this is obsolete "open for recovery" flag should be zero
+ * in any case.
+ */
+#define CREATE_OPEN_FOR_RECOVERY cpu_to_le32(0x00000400)
+#define FILE_RANDOM_ACCESS_LE cpu_to_le32(0x00000800)
+#define FILE_DELETE_ON_CLOSE_LE cpu_to_le32(0x00001000)
+#define FILE_OPEN_BY_FILE_ID_LE cpu_to_le32(0x00002000)
+#define FILE_OPEN_FOR_BACKUP_INTENT_LE cpu_to_le32(0x00004000)
+#define FILE_NO_COMPRESSION_LE cpu_to_le32(0x00008000)
+
+/* Should be zero*/
+#define FILE_OPEN_REQUIRING_OPLOCK cpu_to_le32(0x00010000)
+#define FILE_DISALLOW_EXCLUSIVE cpu_to_le32(0x00020000)
+#define FILE_RESERVE_OPFILTER_LE cpu_to_le32(0x00100000)
+#define FILE_OPEN_REPARSE_POINT_LE cpu_to_le32(0x00200000)
+#define FILE_OPEN_NO_RECALL_LE cpu_to_le32(0x00400000)
+
+/* Should be zero */
+#define FILE_OPEN_FOR_FREE_SPACE_QUERY_LE cpu_to_le32(0x00800000)
+#define CREATE_OPTIONS_MASK cpu_to_le32(0x00FFFFFF)
+#define CREATE_OPTION_READONLY 0x10000000
+/* system. NB not sent over wire */
+#define CREATE_OPTION_SPECIAL 0x20000000
+
+struct ksmbd_work;
+struct ksmbd_file;
+struct ksmbd_conn;
+
+struct ksmbd_dir_info {
+ const char *name;
+ char *wptr;
+ char *rptr;
+ int name_len;
+ int out_buf_len;
+ int num_entry;
+ int data_count;
+ int last_entry_offset;
+ bool hide_dot_file;
+ int flags;
+};
+
+struct ksmbd_readdir_data {
+ struct dir_context ctx;
+ union {
+ void *private;
+ char *dirent;
+ };
+
+ unsigned int used;
+ unsigned int dirent_count;
+ unsigned int file_attr;
+};
+
+/* ksmbd kstat wrapper to get valid create time when reading dir entry */
+struct ksmbd_kstat {
+ struct kstat *kstat;
+ unsigned long long create_time;
+ __le32 file_attributes;
+};
+
+int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child);
+int ksmbd_vfs_may_delete(struct user_namespace *user_ns, struct dentry *dentry);
+int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
+ struct dentry *dentry, __le32 *daccess);
+int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode);
+int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode);
+int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp,
+ size_t count, loff_t *pos);
+int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp,
+ char *buf, size_t count, loff_t *pos, bool sync,
+ ssize_t *written);
+int ksmbd_vfs_fsync(struct ksmbd_work *work, u64 fid, u64 p_id);
+int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name);
+int ksmbd_vfs_link(struct ksmbd_work *work,
+ const char *oldname, const char *newname);
+int ksmbd_vfs_getattr(struct path *path, struct kstat *stat);
+int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
+ char *newname);
+int ksmbd_vfs_truncate(struct ksmbd_work *work, const char *name,
+ struct ksmbd_file *fp, loff_t size);
+struct srv_copychunk;
+int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
+ struct ksmbd_file *src_fp,
+ struct ksmbd_file *dst_fp,
+ struct srv_copychunk *chunks,
+ unsigned int chunk_count,
+ unsigned int *chunk_count_written,
+ unsigned int *chunk_size_written,
+ loff_t *total_size_written);
+ssize_t ksmbd_vfs_listxattr(struct dentry *dentry, char **list);
+ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns,
+ struct dentry *dentry,
+ char *xattr_name,
+ char **xattr_buf);
+ssize_t ksmbd_vfs_casexattr_len(struct user_namespace *user_ns,
+ struct dentry *dentry, char *attr_name,
+ int attr_name_len);
+int ksmbd_vfs_setxattr(struct user_namespace *user_ns,
+ struct dentry *dentry, const char *attr_name,
+ const void *attr_value, size_t attr_size, int flags);
+int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name,
+ size_t *xattr_stream_name_size, int s_type);
+int ksmbd_vfs_remove_xattr(struct user_namespace *user_ns,
+ struct dentry *dentry, char *attr_name);
+int ksmbd_vfs_kern_path(char *name, unsigned int flags, struct path *path,
+ bool caseless);
+int ksmbd_vfs_empty_dir(struct ksmbd_file *fp);
+void ksmbd_vfs_set_fadvise(struct file *filp, __le32 option);
+int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp,
+ loff_t off, loff_t len);
+struct file_allocated_range_buffer;
+int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
+ struct file_allocated_range_buffer *ranges,
+ int in_count, int *out_count);
+int ksmbd_vfs_unlink(struct user_namespace *user_ns,
+ struct dentry *dir, struct dentry *dentry);
+void *ksmbd_vfs_init_kstat(char **p, struct ksmbd_kstat *ksmbd_kstat);
+int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
+ struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct ksmbd_kstat *ksmbd_kstat);
+void ksmbd_vfs_posix_lock_wait(struct file_lock *flock);
+int ksmbd_vfs_posix_lock_wait_timeout(struct file_lock *flock, long timeout);
+void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock);
+int ksmbd_vfs_remove_acl_xattrs(struct user_namespace *user_ns,
+ struct dentry *dentry);
+int ksmbd_vfs_remove_sd_xattrs(struct user_namespace *user_ns,
+ struct dentry *dentry);
+int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
+ struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct smb_ntsd *pntsd, int len);
+int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
+ struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct smb_ntsd **pntsd);
+int ksmbd_vfs_set_dos_attrib_xattr(struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct xattr_dos_attrib *da);
+int ksmbd_vfs_get_dos_attrib_xattr(struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct xattr_dos_attrib *da);
+int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns,
+ struct inode *inode);
+int ksmbd_vfs_inherit_posix_acl(struct user_namespace *user_ns,
+ struct inode *inode,
+ struct inode *parent_inode);
+#endif /* __KSMBD_VFS_H__ */
diff --git a/fs/ksmbd/vfs_cache.c b/fs/ksmbd/vfs_cache.c
new file mode 100644
index 000000000000..92d8c61ffd2a
--- /dev/null
+++ b/fs/ksmbd/vfs_cache.c
@@ -0,0 +1,725 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2019 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include "glob.h"
+#include "vfs_cache.h"
+#include "oplock.h"
+#include "vfs.h"
+#include "connection.h"
+#include "mgmt/tree_connect.h"
+#include "mgmt/user_session.h"
+#include "smb_common.h"
+
+#define S_DEL_PENDING 1
+#define S_DEL_ON_CLS 2
+#define S_DEL_ON_CLS_STREAM 8
+
+static unsigned int inode_hash_mask __read_mostly;
+static unsigned int inode_hash_shift __read_mostly;
+static struct hlist_head *inode_hashtable __read_mostly;
+static DEFINE_RWLOCK(inode_hash_lock);
+
+static struct ksmbd_file_table global_ft;
+static atomic_long_t fd_limit;
+static struct kmem_cache *filp_cache;
+
+void ksmbd_set_fd_limit(unsigned long limit)
+{
+ limit = min(limit, get_max_files());
+ atomic_long_set(&fd_limit, limit);
+}
+
+static bool fd_limit_depleted(void)
+{
+ long v = atomic_long_dec_return(&fd_limit);
+
+ if (v >= 0)
+ return false;
+ atomic_long_inc(&fd_limit);
+ return true;
+}
+
+static void fd_limit_close(void)
+{
+ atomic_long_inc(&fd_limit);
+}
+
+/*
+ * INODE hash
+ */
+
+static unsigned long inode_hash(struct super_block *sb, unsigned long hashval)
+{
+ unsigned long tmp;
+
+ tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
+ L1_CACHE_BYTES;
+ tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> inode_hash_shift);
+ return tmp & inode_hash_mask;
+}
+
+static struct ksmbd_inode *__ksmbd_inode_lookup(struct inode *inode)
+{
+ struct hlist_head *head = inode_hashtable +
+ inode_hash(inode->i_sb, inode->i_ino);
+ struct ksmbd_inode *ci = NULL, *ret_ci = NULL;
+
+ hlist_for_each_entry(ci, head, m_hash) {
+ if (ci->m_inode == inode) {
+ if (atomic_inc_not_zero(&ci->m_count))
+ ret_ci = ci;
+ break;
+ }
+ }
+ return ret_ci;
+}
+
+static struct ksmbd_inode *ksmbd_inode_lookup(struct ksmbd_file *fp)
+{
+ return __ksmbd_inode_lookup(file_inode(fp->filp));
+}
+
+static struct ksmbd_inode *ksmbd_inode_lookup_by_vfsinode(struct inode *inode)
+{
+ struct ksmbd_inode *ci;
+
+ read_lock(&inode_hash_lock);
+ ci = __ksmbd_inode_lookup(inode);
+ read_unlock(&inode_hash_lock);
+ return ci;
+}
+
+int ksmbd_query_inode_status(struct inode *inode)
+{
+ struct ksmbd_inode *ci;
+ int ret = KSMBD_INODE_STATUS_UNKNOWN;
+
+ read_lock(&inode_hash_lock);
+ ci = __ksmbd_inode_lookup(inode);
+ if (ci) {
+ ret = KSMBD_INODE_STATUS_OK;
+ if (ci->m_flags & S_DEL_PENDING)
+ ret = KSMBD_INODE_STATUS_PENDING_DELETE;
+ atomic_dec(&ci->m_count);
+ }
+ read_unlock(&inode_hash_lock);
+ return ret;
+}
+
+bool ksmbd_inode_pending_delete(struct ksmbd_file *fp)
+{
+ return (fp->f_ci->m_flags & S_DEL_PENDING);
+}
+
+void ksmbd_set_inode_pending_delete(struct ksmbd_file *fp)
+{
+ fp->f_ci->m_flags |= S_DEL_PENDING;
+}
+
+void ksmbd_clear_inode_pending_delete(struct ksmbd_file *fp)
+{
+ fp->f_ci->m_flags &= ~S_DEL_PENDING;
+}
+
+void ksmbd_fd_set_delete_on_close(struct ksmbd_file *fp,
+ int file_info)
+{
+ if (ksmbd_stream_fd(fp)) {
+ fp->f_ci->m_flags |= S_DEL_ON_CLS_STREAM;
+ return;
+ }
+
+ fp->f_ci->m_flags |= S_DEL_ON_CLS;
+}
+
+static void ksmbd_inode_hash(struct ksmbd_inode *ci)
+{
+ struct hlist_head *b = inode_hashtable +
+ inode_hash(ci->m_inode->i_sb, ci->m_inode->i_ino);
+
+ hlist_add_head(&ci->m_hash, b);
+}
+
+static void ksmbd_inode_unhash(struct ksmbd_inode *ci)
+{
+ write_lock(&inode_hash_lock);
+ hlist_del_init(&ci->m_hash);
+ write_unlock(&inode_hash_lock);
+}
+
+static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp)
+{
+ ci->m_inode = file_inode(fp->filp);
+ atomic_set(&ci->m_count, 1);
+ atomic_set(&ci->op_count, 0);
+ atomic_set(&ci->sop_count, 0);
+ ci->m_flags = 0;
+ ci->m_fattr = 0;
+ INIT_LIST_HEAD(&ci->m_fp_list);
+ INIT_LIST_HEAD(&ci->m_op_list);
+ rwlock_init(&ci->m_lock);
+ return 0;
+}
+
+static struct ksmbd_inode *ksmbd_inode_get(struct ksmbd_file *fp)
+{
+ struct ksmbd_inode *ci, *tmpci;
+ int rc;
+
+ read_lock(&inode_hash_lock);
+ ci = ksmbd_inode_lookup(fp);
+ read_unlock(&inode_hash_lock);
+ if (ci)
+ return ci;
+
+ ci = kmalloc(sizeof(struct ksmbd_inode), GFP_KERNEL);
+ if (!ci)
+ return NULL;
+
+ rc = ksmbd_inode_init(ci, fp);
+ if (rc) {
+ pr_err("inode initialized failed\n");
+ kfree(ci);
+ return NULL;
+ }
+
+ write_lock(&inode_hash_lock);
+ tmpci = ksmbd_inode_lookup(fp);
+ if (!tmpci) {
+ ksmbd_inode_hash(ci);
+ } else {
+ kfree(ci);
+ ci = tmpci;
+ }
+ write_unlock(&inode_hash_lock);
+ return ci;
+}
+
+static void ksmbd_inode_free(struct ksmbd_inode *ci)
+{
+ ksmbd_inode_unhash(ci);
+ kfree(ci);
+}
+
+static void ksmbd_inode_put(struct ksmbd_inode *ci)
+{
+ if (atomic_dec_and_test(&ci->m_count))
+ ksmbd_inode_free(ci);
+}
+
+int __init ksmbd_inode_hash_init(void)
+{
+ unsigned int loop;
+ unsigned long numentries = 16384;
+ unsigned long bucketsize = sizeof(struct hlist_head);
+ unsigned long size;
+
+ inode_hash_shift = ilog2(numentries);
+ inode_hash_mask = (1 << inode_hash_shift) - 1;
+
+ size = bucketsize << inode_hash_shift;
+
+ /* init master fp hash table */
+ inode_hashtable = vmalloc(size);
+ if (!inode_hashtable)
+ return -ENOMEM;
+
+ for (loop = 0; loop < (1U << inode_hash_shift); loop++)
+ INIT_HLIST_HEAD(&inode_hashtable[loop]);
+ return 0;
+}
+
+void ksmbd_release_inode_hash(void)
+{
+ vfree(inode_hashtable);
+}
+
+static void __ksmbd_inode_close(struct ksmbd_file *fp)
+{
+ struct dentry *dir, *dentry;
+ struct ksmbd_inode *ci = fp->f_ci;
+ int err;
+ struct file *filp;
+
+ filp = fp->filp;
+ if (ksmbd_stream_fd(fp) && (ci->m_flags & S_DEL_ON_CLS_STREAM)) {
+ ci->m_flags &= ~S_DEL_ON_CLS_STREAM;
+ err = ksmbd_vfs_remove_xattr(file_mnt_user_ns(filp),
+ filp->f_path.dentry,
+ fp->stream.name);
+ if (err)
+ pr_err("remove xattr failed : %s\n",
+ fp->stream.name);
+ }
+
+ if (atomic_dec_and_test(&ci->m_count)) {
+ write_lock(&ci->m_lock);
+ if (ci->m_flags & (S_DEL_ON_CLS | S_DEL_PENDING)) {
+ dentry = filp->f_path.dentry;
+ dir = dentry->d_parent;
+ ci->m_flags &= ~(S_DEL_ON_CLS | S_DEL_PENDING);
+ write_unlock(&ci->m_lock);
+ ksmbd_vfs_unlink(file_mnt_user_ns(filp), dir, dentry);
+ write_lock(&ci->m_lock);
+ }
+ write_unlock(&ci->m_lock);
+
+ ksmbd_inode_free(ci);
+ }
+}
+
+static void __ksmbd_remove_durable_fd(struct ksmbd_file *fp)
+{
+ if (!has_file_id(fp->persistent_id))
+ return;
+
+ write_lock(&global_ft.lock);
+ idr_remove(global_ft.idr, fp->persistent_id);
+ write_unlock(&global_ft.lock);
+}
+
+static void __ksmbd_remove_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp)
+{
+ if (!has_file_id(fp->volatile_id))
+ return;
+
+ write_lock(&fp->f_ci->m_lock);
+ list_del_init(&fp->node);
+ write_unlock(&fp->f_ci->m_lock);
+
+ write_lock(&ft->lock);
+ idr_remove(ft->idr, fp->volatile_id);
+ write_unlock(&ft->lock);
+}
+
+static void __ksmbd_close_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp)
+{
+ struct file *filp;
+ struct ksmbd_lock *smb_lock, *tmp_lock;
+
+ fd_limit_close();
+ __ksmbd_remove_durable_fd(fp);
+ __ksmbd_remove_fd(ft, fp);
+
+ close_id_del_oplock(fp);
+ filp = fp->filp;
+
+ __ksmbd_inode_close(fp);
+ if (!IS_ERR_OR_NULL(filp))
+ fput(filp);
+
+ /* because the reference count of fp is 0, it is guaranteed that
+ * there are not accesses to fp->lock_list.
+ */
+ list_for_each_entry_safe(smb_lock, tmp_lock, &fp->lock_list, flist) {
+ spin_lock(&fp->conn->llist_lock);
+ list_del(&smb_lock->clist);
+ spin_unlock(&fp->conn->llist_lock);
+
+ list_del(&smb_lock->flist);
+ locks_free_lock(smb_lock->fl);
+ kfree(smb_lock);
+ }
+
+ kfree(fp->filename);
+ if (ksmbd_stream_fd(fp))
+ kfree(fp->stream.name);
+ kmem_cache_free(filp_cache, fp);
+}
+
+static struct ksmbd_file *ksmbd_fp_get(struct ksmbd_file *fp)
+{
+ if (!atomic_inc_not_zero(&fp->refcount))
+ return NULL;
+ return fp;
+}
+
+static struct ksmbd_file *__ksmbd_lookup_fd(struct ksmbd_file_table *ft,
+ u64 id)
+{
+ struct ksmbd_file *fp;
+
+ if (!has_file_id(id))
+ return NULL;
+
+ read_lock(&ft->lock);
+ fp = idr_find(ft->idr, id);
+ if (fp)
+ fp = ksmbd_fp_get(fp);
+ read_unlock(&ft->lock);
+ return fp;
+}
+
+static void __put_fd_final(struct ksmbd_work *work, struct ksmbd_file *fp)
+{
+ __ksmbd_close_fd(&work->sess->file_table, fp);
+ atomic_dec(&work->conn->stats.open_files_count);
+}
+
+static void set_close_state_blocked_works(struct ksmbd_file *fp)
+{
+ struct ksmbd_work *cancel_work, *ctmp;
+
+ spin_lock(&fp->f_lock);
+ list_for_each_entry_safe(cancel_work, ctmp, &fp->blocked_works,
+ fp_entry) {
+ list_del(&cancel_work->fp_entry);
+ cancel_work->state = KSMBD_WORK_CLOSED;
+ cancel_work->cancel_fn(cancel_work->cancel_argv);
+ }
+ spin_unlock(&fp->f_lock);
+}
+
+int ksmbd_close_fd(struct ksmbd_work *work, u64 id)
+{
+ struct ksmbd_file *fp;
+ struct ksmbd_file_table *ft;
+
+ if (!has_file_id(id))
+ return 0;
+
+ ft = &work->sess->file_table;
+ read_lock(&ft->lock);
+ fp = idr_find(ft->idr, id);
+ if (fp) {
+ set_close_state_blocked_works(fp);
+
+ if (!atomic_dec_and_test(&fp->refcount))
+ fp = NULL;
+ }
+ read_unlock(&ft->lock);
+
+ if (!fp)
+ return -EINVAL;
+
+ __put_fd_final(work, fp);
+ return 0;
+}
+
+void ksmbd_fd_put(struct ksmbd_work *work, struct ksmbd_file *fp)
+{
+ if (!fp)
+ return;
+
+ if (!atomic_dec_and_test(&fp->refcount))
+ return;
+ __put_fd_final(work, fp);
+}
+
+static bool __sanity_check(struct ksmbd_tree_connect *tcon, struct ksmbd_file *fp)
+{
+ if (!fp)
+ return false;
+ if (fp->tcon != tcon)
+ return false;
+ return true;
+}
+
+struct ksmbd_file *ksmbd_lookup_foreign_fd(struct ksmbd_work *work, u64 id)
+{
+ return __ksmbd_lookup_fd(&work->sess->file_table, id);
+}
+
+struct ksmbd_file *ksmbd_lookup_fd_fast(struct ksmbd_work *work, u64 id)
+{
+ struct ksmbd_file *fp = __ksmbd_lookup_fd(&work->sess->file_table, id);
+
+ if (__sanity_check(work->tcon, fp))
+ return fp;
+
+ ksmbd_fd_put(work, fp);
+ return NULL;
+}
+
+struct ksmbd_file *ksmbd_lookup_fd_slow(struct ksmbd_work *work, u64 id,
+ u64 pid)
+{
+ struct ksmbd_file *fp;
+
+ if (!has_file_id(id)) {
+ id = work->compound_fid;
+ pid = work->compound_pfid;
+ }
+
+ fp = __ksmbd_lookup_fd(&work->sess->file_table, id);
+ if (!__sanity_check(work->tcon, fp)) {
+ ksmbd_fd_put(work, fp);
+ return NULL;
+ }
+ if (fp->persistent_id != pid) {
+ ksmbd_fd_put(work, fp);
+ return NULL;
+ }
+ return fp;
+}
+
+struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id)
+{
+ return __ksmbd_lookup_fd(&global_ft, id);
+}
+
+struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid)
+{
+ struct ksmbd_file *fp = NULL;
+ unsigned int id;
+
+ read_lock(&global_ft.lock);
+ idr_for_each_entry(global_ft.idr, fp, id) {
+ if (!memcmp(fp->create_guid,
+ cguid,
+ SMB2_CREATE_GUID_SIZE)) {
+ fp = ksmbd_fp_get(fp);
+ break;
+ }
+ }
+ read_unlock(&global_ft.lock);
+
+ return fp;
+}
+
+struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode)
+{
+ struct ksmbd_file *lfp;
+ struct ksmbd_inode *ci;
+
+ ci = ksmbd_inode_lookup_by_vfsinode(inode);
+ if (!ci)
+ return NULL;
+
+ read_lock(&ci->m_lock);
+ list_for_each_entry(lfp, &ci->m_fp_list, node) {
+ if (inode == file_inode(lfp->filp)) {
+ atomic_dec(&ci->m_count);
+ read_unlock(&ci->m_lock);
+ return lfp;
+ }
+ }
+ atomic_dec(&ci->m_count);
+ read_unlock(&ci->m_lock);
+ return NULL;
+}
+
+#define OPEN_ID_TYPE_VOLATILE_ID (0)
+#define OPEN_ID_TYPE_PERSISTENT_ID (1)
+
+static void __open_id_set(struct ksmbd_file *fp, u64 id, int type)
+{
+ if (type == OPEN_ID_TYPE_VOLATILE_ID)
+ fp->volatile_id = id;
+ if (type == OPEN_ID_TYPE_PERSISTENT_ID)
+ fp->persistent_id = id;
+}
+
+static int __open_id(struct ksmbd_file_table *ft, struct ksmbd_file *fp,
+ int type)
+{
+ u64 id = 0;
+ int ret;
+
+ if (type == OPEN_ID_TYPE_VOLATILE_ID && fd_limit_depleted()) {
+ __open_id_set(fp, KSMBD_NO_FID, type);
+ return -EMFILE;
+ }
+
+ idr_preload(GFP_KERNEL);
+ write_lock(&ft->lock);
+ ret = idr_alloc_cyclic(ft->idr, fp, 0, INT_MAX - 1, GFP_NOWAIT);
+ if (ret >= 0) {
+ id = ret;
+ ret = 0;
+ } else {
+ id = KSMBD_NO_FID;
+ fd_limit_close();
+ }
+
+ __open_id_set(fp, id, type);
+ write_unlock(&ft->lock);
+ idr_preload_end();
+ return ret;
+}
+
+unsigned int ksmbd_open_durable_fd(struct ksmbd_file *fp)
+{
+ __open_id(&global_ft, fp, OPEN_ID_TYPE_PERSISTENT_ID);
+ return fp->persistent_id;
+}
+
+struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp)
+{
+ struct ksmbd_file *fp;
+ int ret;
+
+ fp = kmem_cache_zalloc(filp_cache, GFP_KERNEL);
+ if (!fp) {
+ pr_err("Failed to allocate memory\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ INIT_LIST_HEAD(&fp->blocked_works);
+ INIT_LIST_HEAD(&fp->node);
+ INIT_LIST_HEAD(&fp->lock_list);
+ spin_lock_init(&fp->f_lock);
+ atomic_set(&fp->refcount, 1);
+
+ fp->filp = filp;
+ fp->conn = work->sess->conn;
+ fp->tcon = work->tcon;
+ fp->volatile_id = KSMBD_NO_FID;
+ fp->persistent_id = KSMBD_NO_FID;
+ fp->f_ci = ksmbd_inode_get(fp);
+
+ if (!fp->f_ci) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ ret = __open_id(&work->sess->file_table, fp, OPEN_ID_TYPE_VOLATILE_ID);
+ if (ret) {
+ ksmbd_inode_put(fp->f_ci);
+ goto err_out;
+ }
+
+ atomic_inc(&work->conn->stats.open_files_count);
+ return fp;
+
+err_out:
+ kmem_cache_free(filp_cache, fp);
+ return ERR_PTR(ret);
+}
+
+static int
+__close_file_table_ids(struct ksmbd_file_table *ft,
+ struct ksmbd_tree_connect *tcon,
+ bool (*skip)(struct ksmbd_tree_connect *tcon,
+ struct ksmbd_file *fp))
+{
+ unsigned int id;
+ struct ksmbd_file *fp;
+ int num = 0;
+
+ idr_for_each_entry(ft->idr, fp, id) {
+ if (skip(tcon, fp))
+ continue;
+
+ set_close_state_blocked_works(fp);
+
+ if (!atomic_dec_and_test(&fp->refcount))
+ continue;
+ __ksmbd_close_fd(ft, fp);
+ num++;
+ }
+ return num;
+}
+
+static bool tree_conn_fd_check(struct ksmbd_tree_connect *tcon,
+ struct ksmbd_file *fp)
+{
+ return fp->tcon != tcon;
+}
+
+static bool session_fd_check(struct ksmbd_tree_connect *tcon,
+ struct ksmbd_file *fp)
+{
+ return false;
+}
+
+void ksmbd_close_tree_conn_fds(struct ksmbd_work *work)
+{
+ int num = __close_file_table_ids(&work->sess->file_table,
+ work->tcon,
+ tree_conn_fd_check);
+
+ atomic_sub(num, &work->conn->stats.open_files_count);
+}
+
+void ksmbd_close_session_fds(struct ksmbd_work *work)
+{
+ int num = __close_file_table_ids(&work->sess->file_table,
+ work->tcon,
+ session_fd_check);
+
+ atomic_sub(num, &work->conn->stats.open_files_count);
+}
+
+int ksmbd_init_global_file_table(void)
+{
+ return ksmbd_init_file_table(&global_ft);
+}
+
+void ksmbd_free_global_file_table(void)
+{
+ struct ksmbd_file *fp = NULL;
+ unsigned int id;
+
+ idr_for_each_entry(global_ft.idr, fp, id) {
+ __ksmbd_remove_durable_fd(fp);
+ kmem_cache_free(filp_cache, fp);
+ }
+
+ ksmbd_destroy_file_table(&global_ft);
+}
+
+int ksmbd_file_table_flush(struct ksmbd_work *work)
+{
+ struct ksmbd_file *fp = NULL;
+ unsigned int id;
+ int ret;
+
+ read_lock(&work->sess->file_table.lock);
+ idr_for_each_entry(work->sess->file_table.idr, fp, id) {
+ ret = ksmbd_vfs_fsync(work, fp->volatile_id, KSMBD_NO_FID);
+ if (ret)
+ break;
+ }
+ read_unlock(&work->sess->file_table.lock);
+ return ret;
+}
+
+int ksmbd_init_file_table(struct ksmbd_file_table *ft)
+{
+ ft->idr = kzalloc(sizeof(struct idr), GFP_KERNEL);
+ if (!ft->idr)
+ return -ENOMEM;
+
+ idr_init(ft->idr);
+ rwlock_init(&ft->lock);
+ return 0;
+}
+
+void ksmbd_destroy_file_table(struct ksmbd_file_table *ft)
+{
+ if (!ft->idr)
+ return;
+
+ __close_file_table_ids(ft, NULL, session_fd_check);
+ idr_destroy(ft->idr);
+ kfree(ft->idr);
+ ft->idr = NULL;
+}
+
+int ksmbd_init_file_cache(void)
+{
+ filp_cache = kmem_cache_create("ksmbd_file_cache",
+ sizeof(struct ksmbd_file), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!filp_cache)
+ goto out;
+
+ return 0;
+
+out:
+ pr_err("failed to allocate file cache\n");
+ return -ENOMEM;
+}
+
+void ksmbd_exit_file_cache(void)
+{
+ kmem_cache_destroy(filp_cache);
+}
diff --git a/fs/ksmbd/vfs_cache.h b/fs/ksmbd/vfs_cache.h
new file mode 100644
index 000000000000..70dfe6a99f13
--- /dev/null
+++ b/fs/ksmbd/vfs_cache.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __VFS_CACHE_H__
+#define __VFS_CACHE_H__
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/rwsem.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/workqueue.h>
+
+#include "vfs.h"
+
+/* Windows style file permissions for extended response */
+#define FILE_GENERIC_ALL 0x1F01FF
+#define FILE_GENERIC_READ 0x120089
+#define FILE_GENERIC_WRITE 0x120116
+#define FILE_GENERIC_EXECUTE 0X1200a0
+
+#define KSMBD_START_FID 0
+#define KSMBD_NO_FID (INT_MAX)
+#define SMB2_NO_FID (0xFFFFFFFFFFFFFFFFULL)
+
+struct ksmbd_conn;
+struct ksmbd_session;
+
+struct ksmbd_lock {
+ struct file_lock *fl;
+ struct list_head clist;
+ struct list_head flist;
+ struct list_head llist;
+ unsigned int flags;
+ int cmd;
+ int zero_len;
+ unsigned long long start;
+ unsigned long long end;
+};
+
+struct stream {
+ char *name;
+ ssize_t size;
+};
+
+struct ksmbd_inode {
+ rwlock_t m_lock;
+ atomic_t m_count;
+ atomic_t op_count;
+ /* opinfo count for streams */
+ atomic_t sop_count;
+ struct inode *m_inode;
+ unsigned int m_flags;
+ struct hlist_node m_hash;
+ struct list_head m_fp_list;
+ struct list_head m_op_list;
+ struct oplock_info *m_opinfo;
+ __le32 m_fattr;
+};
+
+struct ksmbd_file {
+ struct file *filp;
+ char *filename;
+ u64 persistent_id;
+ u64 volatile_id;
+
+ spinlock_t f_lock;
+
+ struct ksmbd_inode *f_ci;
+ struct ksmbd_inode *f_parent_ci;
+ struct oplock_info __rcu *f_opinfo;
+ struct ksmbd_conn *conn;
+ struct ksmbd_tree_connect *tcon;
+
+ atomic_t refcount;
+ __le32 daccess;
+ __le32 saccess;
+ __le32 coption;
+ __le32 cdoption;
+ __u64 create_time;
+ __u64 itime;
+
+ bool is_nt_open;
+ bool attrib_only;
+
+ char client_guid[16];
+ char create_guid[16];
+ char app_instance_id[16];
+
+ struct stream stream;
+ struct list_head node;
+ struct list_head blocked_works;
+ struct list_head lock_list;
+
+ int durable_timeout;
+
+ /* for SMB1 */
+ int pid;
+
+ /* conflict lock fail count for SMB1 */
+ unsigned int cflock_cnt;
+ /* last lock failure start offset for SMB1 */
+ unsigned long long llock_fstart;
+
+ int dirent_offset;
+
+ /* if ls is happening on directory, below is valid*/
+ struct ksmbd_readdir_data readdir_data;
+ int dot_dotdot[2];
+};
+
+static inline void set_ctx_actor(struct dir_context *ctx,
+ filldir_t actor)
+{
+ ctx->actor = actor;
+}
+
+#define KSMBD_NR_OPEN_DEFAULT BITS_PER_LONG
+
+struct ksmbd_file_table {
+ rwlock_t lock;
+ struct idr *idr;
+};
+
+static inline bool has_file_id(u64 id)
+{
+ return id < KSMBD_NO_FID;
+}
+
+static inline bool ksmbd_stream_fd(struct ksmbd_file *fp)
+{
+ return fp->stream.name != NULL;
+}
+
+int ksmbd_init_file_table(struct ksmbd_file_table *ft);
+void ksmbd_destroy_file_table(struct ksmbd_file_table *ft);
+int ksmbd_close_fd(struct ksmbd_work *work, u64 id);
+struct ksmbd_file *ksmbd_lookup_fd_fast(struct ksmbd_work *work, u64 id);
+struct ksmbd_file *ksmbd_lookup_foreign_fd(struct ksmbd_work *work, u64 id);
+struct ksmbd_file *ksmbd_lookup_fd_slow(struct ksmbd_work *work, u64 id,
+ u64 pid);
+void ksmbd_fd_put(struct ksmbd_work *work, struct ksmbd_file *fp);
+struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id);
+struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid);
+struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode);
+unsigned int ksmbd_open_durable_fd(struct ksmbd_file *fp);
+struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp);
+void ksmbd_close_tree_conn_fds(struct ksmbd_work *work);
+void ksmbd_close_session_fds(struct ksmbd_work *work);
+int ksmbd_close_inode_fds(struct ksmbd_work *work, struct inode *inode);
+int ksmbd_init_global_file_table(void);
+void ksmbd_free_global_file_table(void);
+int ksmbd_file_table_flush(struct ksmbd_work *work);
+void ksmbd_set_fd_limit(unsigned long limit);
+
+/*
+ * INODE hash
+ */
+int __init ksmbd_inode_hash_init(void);
+void ksmbd_release_inode_hash(void);
+
+enum KSMBD_INODE_STATUS {
+ KSMBD_INODE_STATUS_OK,
+ KSMBD_INODE_STATUS_UNKNOWN,
+ KSMBD_INODE_STATUS_PENDING_DELETE,
+};
+
+int ksmbd_query_inode_status(struct inode *inode);
+bool ksmbd_inode_pending_delete(struct ksmbd_file *fp);
+void ksmbd_set_inode_pending_delete(struct ksmbd_file *fp);
+void ksmbd_clear_inode_pending_delete(struct ksmbd_file *fp);
+void ksmbd_fd_set_delete_on_close(struct ksmbd_file *fp,
+ int file_info);
+int ksmbd_init_file_cache(void);
+void ksmbd_exit_file_cache(void);
+#endif /* __VFS_CACHE_H__ */
diff --git a/fs/ksmbd/xattr.h b/fs/ksmbd/xattr.h
new file mode 100644
index 000000000000..8857c01093d9
--- /dev/null
+++ b/fs/ksmbd/xattr.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __XATTR_H__
+#define __XATTR_H__
+
+/*
+ * These are on-disk structures to store additional metadata into xattr to
+ * reproduce windows filesystem semantics. And they are encoded with NDR to
+ * compatible with samba's xattr meta format. The compatibility with samba
+ * is important because it can lose the information(file attribute,
+ * creation time, acls) about the existing files when switching between
+ * ksmbd and samba.
+ */
+
+/*
+ * Dos attribute flags used for what variable is valid.
+ */
+enum {
+ XATTR_DOSINFO_ATTRIB = 0x00000001,
+ XATTR_DOSINFO_EA_SIZE = 0x00000002,
+ XATTR_DOSINFO_SIZE = 0x00000004,
+ XATTR_DOSINFO_ALLOC_SIZE = 0x00000008,
+ XATTR_DOSINFO_CREATE_TIME = 0x00000010,
+ XATTR_DOSINFO_CHANGE_TIME = 0x00000020,
+ XATTR_DOSINFO_ITIME = 0x00000040
+};
+
+/*
+ * Dos attribute structure which is compatible with samba's one.
+ * Storing it into the xattr named "DOSATTRIB" separately from inode
+ * allows ksmbd to faithfully reproduce windows filesystem semantics
+ * on top of a POSIX filesystem.
+ */
+struct xattr_dos_attrib {
+ __u16 version; /* version 3 or version 4 */
+ __u32 flags; /* valid flags */
+ __u32 attr; /* Dos attribute */
+ __u32 ea_size; /* EA size */
+ __u64 size;
+ __u64 alloc_size;
+ __u64 create_time; /* File creation time */
+ __u64 change_time; /* File change time */
+ __u64 itime; /* Invented/Initial time */
+};
+
+/*
+ * Enumeration is used for computing posix acl hash.
+ */
+enum {
+ SMB_ACL_TAG_INVALID = 0,
+ SMB_ACL_USER,
+ SMB_ACL_USER_OBJ,
+ SMB_ACL_GROUP,
+ SMB_ACL_GROUP_OBJ,
+ SMB_ACL_OTHER,
+ SMB_ACL_MASK
+};
+
+#define SMB_ACL_READ 4
+#define SMB_ACL_WRITE 2
+#define SMB_ACL_EXECUTE 1
+
+struct xattr_acl_entry {
+ int type;
+ uid_t uid;
+ gid_t gid;
+ mode_t perm;
+};
+
+/*
+ * xattr_smb_acl structure is used for computing posix acl hash.
+ */
+struct xattr_smb_acl {
+ int count;
+ int next;
+ struct xattr_acl_entry entries[0];
+};
+
+/* 64bytes hash in xattr_ntacl is computed with sha256 */
+#define XATTR_SD_HASH_TYPE_SHA256 0x1
+#define XATTR_SD_HASH_SIZE 64
+
+/*
+ * xattr_ntacl is used for storing ntacl and hashes.
+ * Hash is used for checking valid posix acl and ntacl in xattr.
+ */
+struct xattr_ntacl {
+ __u16 version; /* version 4*/
+ void *sd_buf;
+ __u32 sd_size;
+ __u16 hash_type; /* hash type */
+ __u8 desc[10]; /* posix_acl description */
+ __u16 desc_len;
+ __u64 current_time;
+ __u8 hash[XATTR_SD_HASH_SIZE]; /* 64bytes hash for ntacl */
+ __u8 posix_acl_hash[XATTR_SD_HASH_SIZE]; /* 64bytes hash for posix acl */
+};
+
+/* DOS ATTRIBUITE XATTR PREFIX */
+#define DOS_ATTRIBUTE_PREFIX "DOSATTRIB"
+#define DOS_ATTRIBUTE_PREFIX_LEN (sizeof(DOS_ATTRIBUTE_PREFIX) - 1)
+#define XATTR_NAME_DOS_ATTRIBUTE (XATTR_USER_PREFIX DOS_ATTRIBUTE_PREFIX)
+#define XATTR_NAME_DOS_ATTRIBUTE_LEN \
+ (sizeof(XATTR_USER_PREFIX DOS_ATTRIBUTE_PREFIX) - 1)
+
+/* STREAM XATTR PREFIX */
+#define STREAM_PREFIX "DosStream."
+#define STREAM_PREFIX_LEN (sizeof(STREAM_PREFIX) - 1)
+#define XATTR_NAME_STREAM (XATTR_USER_PREFIX STREAM_PREFIX)
+#define XATTR_NAME_STREAM_LEN (sizeof(XATTR_NAME_STREAM) - 1)
+
+/* SECURITY DESCRIPTOR(NTACL) XATTR PREFIX */
+#define SD_PREFIX "NTACL"
+#define SD_PREFIX_LEN (sizeof(SD_PREFIX) - 1)
+#define XATTR_NAME_SD (XATTR_SECURITY_PREFIX SD_PREFIX)
+#define XATTR_NAME_SD_LEN \
+ (sizeof(XATTR_SECURITY_PREFIX SD_PREFIX) - 1)
+
+#endif /* __XATTR_H__ */
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 2de048f80eb8..0ab9756ed235 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -584,7 +584,7 @@ static struct ctl_table nlm_sysctls[] = {
.data = &nsm_use_hostnames,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dobool,
},
{
.procname = "nsm_local_state",
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4c10fb5138f1..e10ae2c41279 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -40,12 +40,15 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Obtain file pointer. Not used by FREE_ALL call. */
if (filp != NULL) {
- if ((error = nlm_lookup_file(rqstp, &file, &lock->fh)) != 0)
+ int mode = lock_to_openmode(&lock->fl);
+
+ error = nlm_lookup_file(rqstp, &file, lock);
+ if (error)
goto no_locks;
*filp = file;
/* Set up the missing parts of the file_lock structure */
- lock->fl.fl_file = file->f_file;
+ lock->fl.fl_file = file->f_file[mode];
lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 61d3cc2283dc..e9b85d8fd5fe 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -31,6 +31,7 @@
#include <linux/lockd/nlm.h>
#include <linux/lockd/lockd.h>
#include <linux/kthread.h>
+#include <linux/exportfs.h>
#define NLMDBG_FACILITY NLMDBG_SVCLOCK
@@ -395,28 +396,10 @@ nlmsvc_release_lockowner(struct nlm_lock *lock)
nlmsvc_put_lockowner(lock->fl.fl_owner);
}
-static void nlmsvc_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
-{
- struct nlm_lockowner *nlm_lo = (struct nlm_lockowner *)fl->fl_owner;
- new->fl_owner = nlmsvc_get_lockowner(nlm_lo);
-}
-
-static void nlmsvc_locks_release_private(struct file_lock *fl)
-{
- nlmsvc_put_lockowner((struct nlm_lockowner *)fl->fl_owner);
-}
-
-static const struct file_lock_operations nlmsvc_lock_ops = {
- .fl_copy_lock = nlmsvc_locks_copy_lock,
- .fl_release_private = nlmsvc_locks_release_private,
-};
-
void nlmsvc_locks_init_private(struct file_lock *fl, struct nlm_host *host,
pid_t pid)
{
fl->fl_owner = nlmsvc_find_lockowner(host, pid);
- if (fl->fl_owner != NULL)
- fl->fl_ops = &nlmsvc_lock_ops;
}
/*
@@ -488,17 +471,24 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_cookie *cookie, int reclaim)
{
struct nlm_block *block = NULL;
+ struct inode *inode = nlmsvc_file_inode(file);
int error;
+ int mode;
+ int async_block = 0;
__be32 ret;
dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
- locks_inode(file->f_file)->i_sb->s_id,
- locks_inode(file->f_file)->i_ino,
+ inode->i_sb->s_id, inode->i_ino,
lock->fl.fl_type, lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end,
wait);
+ if (inode->i_sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS) {
+ async_block = wait;
+ wait = 0;
+ }
+
/* Lock file against concurrent access */
mutex_lock(&file->f_mutex);
/* Get existing block (in case client is busy-waiting)
@@ -542,7 +532,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
if (!wait)
lock->fl.fl_flags &= ~FL_SLEEP;
- error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
+ mode = lock_to_openmode(&lock->fl);
+ error = vfs_lock_file(file->f_file[mode], F_SETLK, &lock->fl, NULL);
lock->fl.fl_flags &= ~FL_SLEEP;
dprintk("lockd: vfs_lock_file returned %d\n", error);
@@ -558,7 +549,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
*/
if (wait)
break;
- ret = nlm_lck_denied;
+ ret = async_block ? nlm_lck_blocked : nlm_lck_denied;
goto out;
case FILE_LOCK_DEFERRED:
if (wait)
@@ -595,12 +586,13 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_lock *conflock, struct nlm_cookie *cookie)
{
int error;
+ int mode;
__be32 ret;
struct nlm_lockowner *test_owner;
dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
- locks_inode(file->f_file)->i_sb->s_id,
- locks_inode(file->f_file)->i_ino,
+ nlmsvc_file_inode(file)->i_sb->s_id,
+ nlmsvc_file_inode(file)->i_ino,
lock->fl.fl_type,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
@@ -613,7 +605,8 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
/* If there's a conflicting lock, remember to clean up the test lock */
test_owner = (struct nlm_lockowner *)lock->fl.fl_owner;
- error = vfs_test_lock(file->f_file, &lock->fl);
+ mode = lock_to_openmode(&lock->fl);
+ error = vfs_test_lock(file->f_file[mode], &lock->fl);
if (error) {
/* We can't currently deal with deferred test requests */
if (error == FILE_LOCK_DEFERRED)
@@ -634,7 +627,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
conflock->caller = "somehost"; /* FIXME */
conflock->len = strlen(conflock->caller);
conflock->oh.len = 0; /* don't return OH info */
- conflock->svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid;
+ conflock->svid = lock->fl.fl_pid;
conflock->fl.fl_type = lock->fl.fl_type;
conflock->fl.fl_start = lock->fl.fl_start;
conflock->fl.fl_end = lock->fl.fl_end;
@@ -659,11 +652,11 @@ out:
__be32
nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
{
- int error;
+ int error = 0;
dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
- locks_inode(file->f_file)->i_sb->s_id,
- locks_inode(file->f_file)->i_ino,
+ nlmsvc_file_inode(file)->i_sb->s_id,
+ nlmsvc_file_inode(file)->i_ino,
lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
@@ -672,7 +665,12 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
nlmsvc_cancel_blocked(net, file, lock);
lock->fl.fl_type = F_UNLCK;
- error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
+ if (file->f_file[O_RDONLY])
+ error = vfs_lock_file(file->f_file[O_RDONLY], F_SETLK,
+ &lock->fl, NULL);
+ if (file->f_file[O_WRONLY])
+ error = vfs_lock_file(file->f_file[O_WRONLY], F_SETLK,
+ &lock->fl, NULL);
return (error < 0)? nlm_lck_denied_nolocks : nlm_granted;
}
@@ -689,10 +687,11 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
{
struct nlm_block *block;
int status = 0;
+ int mode;
dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
- locks_inode(file->f_file)->i_sb->s_id,
- locks_inode(file->f_file)->i_ino,
+ nlmsvc_file_inode(file)->i_sb->s_id,
+ nlmsvc_file_inode(file)->i_ino,
lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
@@ -704,7 +703,8 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
block = nlmsvc_lookup_block(file, lock);
mutex_unlock(&file->f_mutex);
if (block != NULL) {
- vfs_cancel_lock(block->b_file->f_file,
+ mode = lock_to_openmode(&lock->fl);
+ vfs_cancel_lock(block->b_file->f_file[mode],
&block->b_call->a_args.lock.fl);
status = nlmsvc_unlink_block(block);
nlmsvc_release_block(block);
@@ -788,9 +788,21 @@ nlmsvc_notify_blocked(struct file_lock *fl)
printk(KERN_WARNING "lockd: notification for unknown block!\n");
}
+static fl_owner_t nlmsvc_get_owner(fl_owner_t owner)
+{
+ return nlmsvc_get_lockowner(owner);
+}
+
+static void nlmsvc_put_owner(fl_owner_t owner)
+{
+ nlmsvc_put_lockowner(owner);
+}
+
const struct lock_manager_operations nlmsvc_lock_operations = {
.lm_notify = nlmsvc_notify_blocked,
.lm_grant = nlmsvc_grant_deferred,
+ .lm_get_owner = nlmsvc_get_owner,
+ .lm_put_owner = nlmsvc_put_owner,
};
/*
@@ -809,6 +821,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
{
struct nlm_file *file = block->b_file;
struct nlm_lock *lock = &block->b_call->a_args.lock;
+ int mode;
int error;
loff_t fl_start, fl_end;
@@ -834,7 +847,8 @@ nlmsvc_grant_blocked(struct nlm_block *block)
lock->fl.fl_flags |= FL_SLEEP;
fl_start = lock->fl.fl_start;
fl_end = lock->fl.fl_end;
- error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
+ mode = lock_to_openmode(&lock->fl);
+ error = vfs_lock_file(file->f_file[mode], F_SETLK, &lock->fl, NULL);
lock->fl.fl_flags &= ~FL_SLEEP;
lock->fl.fl_start = fl_start;
lock->fl.fl_end = fl_end;
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 4ae4b63b5392..99696d3f6dd6 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -55,6 +55,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
struct nlm_host *host = NULL;
struct nlm_file *file = NULL;
struct nlm_lock *lock = &argp->lock;
+ int mode;
__be32 error = 0;
/* nfsd callbacks must have been installed for this procedure */
@@ -69,13 +70,14 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Obtain file pointer. Not used by FREE_ALL call. */
if (filp != NULL) {
- error = cast_status(nlm_lookup_file(rqstp, &file, &lock->fh));
+ error = cast_status(nlm_lookup_file(rqstp, &file, lock));
if (error != 0)
goto no_locks;
*filp = file;
/* Set up the missing parts of the file_lock structure */
- lock->fl.fl_file = file->f_file;
+ mode = lock_to_openmode(&lock->fl);
+ lock->fl.fl_file = file->f_file[mode];
lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 028fc152da22..cb3a7512c33e 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -45,7 +45,7 @@ static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
static inline void nlm_debug_print_file(char *msg, struct nlm_file *file)
{
- struct inode *inode = locks_inode(file->f_file);
+ struct inode *inode = nlmsvc_file_inode(file);
dprintk("lockd: %s %s/%ld\n",
msg, inode->i_sb->s_id, inode->i_ino);
@@ -71,56 +71,75 @@ static inline unsigned int file_hash(struct nfs_fh *f)
return tmp & (FILE_NRHASH - 1);
}
+int lock_to_openmode(struct file_lock *lock)
+{
+ return (lock->fl_type == F_WRLCK) ? O_WRONLY : O_RDONLY;
+}
+
+/*
+ * Open the file. Note that if we're reexporting, for example,
+ * this could block the lockd thread for a while.
+ *
+ * We have to make sure we have the right credential to open
+ * the file.
+ */
+static __be32 nlm_do_fopen(struct svc_rqst *rqstp,
+ struct nlm_file *file, int mode)
+{
+ struct file **fp = &file->f_file[mode];
+ __be32 nfserr;
+
+ if (*fp)
+ return 0;
+ nfserr = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, mode);
+ if (nfserr)
+ dprintk("lockd: open failed (error %d)\n", nfserr);
+ return nfserr;
+}
+
/*
* Lookup file info. If it doesn't exist, create a file info struct
* and open a (VFS) file for the given inode.
- *
- * FIXME:
- * Note that we open the file O_RDONLY even when creating write locks.
- * This is not quite right, but for now, we assume the client performs
- * the proper R/W checking.
*/
__be32
nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
- struct nfs_fh *f)
+ struct nlm_lock *lock)
{
struct nlm_file *file;
unsigned int hash;
__be32 nfserr;
+ int mode;
- nlm_debug_print_fh("nlm_lookup_file", f);
+ nlm_debug_print_fh("nlm_lookup_file", &lock->fh);
- hash = file_hash(f);
+ hash = file_hash(&lock->fh);
+ mode = lock_to_openmode(&lock->fl);
/* Lock file table */
mutex_lock(&nlm_file_mutex);
hlist_for_each_entry(file, &nlm_files[hash], f_list)
- if (!nfs_compare_fh(&file->f_handle, f))
+ if (!nfs_compare_fh(&file->f_handle, &lock->fh)) {
+ mutex_lock(&file->f_mutex);
+ nfserr = nlm_do_fopen(rqstp, file, mode);
+ mutex_unlock(&file->f_mutex);
goto found;
-
- nlm_debug_print_fh("creating file for", f);
+ }
+ nlm_debug_print_fh("creating file for", &lock->fh);
nfserr = nlm_lck_denied_nolocks;
file = kzalloc(sizeof(*file), GFP_KERNEL);
if (!file)
- goto out_unlock;
+ goto out_free;
- memcpy(&file->f_handle, f, sizeof(struct nfs_fh));
+ memcpy(&file->f_handle, &lock->fh, sizeof(struct nfs_fh));
mutex_init(&file->f_mutex);
INIT_HLIST_NODE(&file->f_list);
INIT_LIST_HEAD(&file->f_blocks);
- /* Open the file. Note that this must not sleep for too long, else
- * we would lock up lockd:-) So no NFS re-exports, folks.
- *
- * We have to make sure we have the right credential to open
- * the file.
- */
- if ((nfserr = nlmsvc_ops->fopen(rqstp, f, &file->f_file)) != 0) {
- dprintk("lockd: open failed (error %d)\n", nfserr);
- goto out_free;
- }
+ nfserr = nlm_do_fopen(rqstp, file, mode);
+ if (nfserr)
+ goto out_unlock;
hlist_add_head(&file->f_list, &nlm_files[hash]);
@@ -128,7 +147,6 @@ found:
dprintk("lockd: found file %p (count %d)\n", file, file->f_count);
*result = file;
file->f_count++;
- nfserr = 0;
out_unlock:
mutex_unlock(&nlm_file_mutex);
@@ -148,13 +166,34 @@ nlm_delete_file(struct nlm_file *file)
nlm_debug_print_file("closing file", file);
if (!hlist_unhashed(&file->f_list)) {
hlist_del(&file->f_list);
- nlmsvc_ops->fclose(file->f_file);
+ if (file->f_file[O_RDONLY])
+ nlmsvc_ops->fclose(file->f_file[O_RDONLY]);
+ if (file->f_file[O_WRONLY])
+ nlmsvc_ops->fclose(file->f_file[O_WRONLY]);
kfree(file);
} else {
printk(KERN_WARNING "lockd: attempt to release unknown file!\n");
}
}
+static int nlm_unlock_files(struct nlm_file *file)
+{
+ struct file_lock lock;
+ struct file *f;
+
+ lock.fl_type = F_UNLCK;
+ lock.fl_start = 0;
+ lock.fl_end = OFFSET_MAX;
+ for (f = file->f_file[0]; f <= file->f_file[1]; f++) {
+ if (f && vfs_lock_file(f, F_SETLK, &lock, NULL) < 0) {
+ pr_warn("lockd: unlock failure in %s:%d\n",
+ __FILE__, __LINE__);
+ return 1;
+ }
+ }
+ return 0;
+}
+
/*
* Loop over all locks on the given file and perform the specified
* action.
@@ -182,17 +221,10 @@ again:
lockhost = ((struct nlm_lockowner *)fl->fl_owner)->host;
if (match(lockhost, host)) {
- struct file_lock lock = *fl;
spin_unlock(&flctx->flc_lock);
- lock.fl_type = F_UNLCK;
- lock.fl_start = 0;
- lock.fl_end = OFFSET_MAX;
- if (vfs_lock_file(file->f_file, F_SETLK, &lock, NULL) < 0) {
- printk("lockd: unlock failure in %s:%d\n",
- __FILE__, __LINE__);
+ if (nlm_unlock_files(file))
return 1;
- }
goto again;
}
}
@@ -246,6 +278,15 @@ nlm_file_inuse(struct nlm_file *file)
return 0;
}
+static void nlm_close_files(struct nlm_file *file)
+{
+ struct file *f;
+
+ for (f = file->f_file[0]; f <= file->f_file[1]; f++)
+ if (f)
+ nlmsvc_ops->fclose(f);
+}
+
/*
* Loop over all files in the file table.
*/
@@ -276,7 +317,7 @@ nlm_traverse_files(void *data, nlm_host_match_fn_t match,
if (list_empty(&file->f_blocks) && !file->f_locks
&& !file->f_shares && !file->f_count) {
hlist_del(&file->f_list);
- nlmsvc_ops->fclose(file->f_file);
+ nlm_close_files(file);
kfree(file);
}
}
@@ -410,12 +451,13 @@ nlmsvc_invalidate_all(void)
nlm_traverse_files(NULL, nlmsvc_is_client, NULL);
}
+
static int
nlmsvc_match_sb(void *datap, struct nlm_file *file)
{
struct super_block *sb = datap;
- return sb == locks_inode(file->f_file)->i_sb;
+ return sb == nlmsvc_file_inode(file)->i_sb;
}
/**
diff --git a/fs/locks.c b/fs/locks.c
index 74b2a1dfe8d8..3d6fb4ae847b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1397,103 +1397,6 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
return error;
}
-#ifdef CONFIG_MANDATORY_FILE_LOCKING
-/**
- * locks_mandatory_locked - Check for an active lock
- * @file: the file to check
- *
- * Searches the inode's list of locks to find any POSIX locks which conflict.
- * This function is called from locks_verify_locked() only.
- */
-int locks_mandatory_locked(struct file *file)
-{
- int ret;
- struct inode *inode = locks_inode(file);
- struct file_lock_context *ctx;
- struct file_lock *fl;
-
- ctx = smp_load_acquire(&inode->i_flctx);
- if (!ctx || list_empty_careful(&ctx->flc_posix))
- return 0;
-
- /*
- * Search the lock list for this inode for any POSIX locks.
- */
- spin_lock(&ctx->flc_lock);
- ret = 0;
- list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
- if (fl->fl_owner != current->files &&
- fl->fl_owner != file) {
- ret = -EAGAIN;
- break;
- }
- }
- spin_unlock(&ctx->flc_lock);
- return ret;
-}
-
-/**
- * locks_mandatory_area - Check for a conflicting lock
- * @inode: the file to check
- * @filp: how the file was opened (if it was)
- * @start: first byte in the file to check
- * @end: lastbyte in the file to check
- * @type: %F_WRLCK for a write lock, else %F_RDLCK
- *
- * Searches the inode's list of locks to find any POSIX locks which conflict.
- */
-int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
- loff_t end, unsigned char type)
-{
- struct file_lock fl;
- int error;
- bool sleep = false;
-
- locks_init_lock(&fl);
- fl.fl_pid = current->tgid;
- fl.fl_file = filp;
- fl.fl_flags = FL_POSIX | FL_ACCESS;
- if (filp && !(filp->f_flags & O_NONBLOCK))
- sleep = true;
- fl.fl_type = type;
- fl.fl_start = start;
- fl.fl_end = end;
-
- for (;;) {
- if (filp) {
- fl.fl_owner = filp;
- fl.fl_flags &= ~FL_SLEEP;
- error = posix_lock_inode(inode, &fl, NULL);
- if (!error)
- break;
- }
-
- if (sleep)
- fl.fl_flags |= FL_SLEEP;
- fl.fl_owner = current->files;
- error = posix_lock_inode(inode, &fl, NULL);
- if (error != FILE_LOCK_DEFERRED)
- break;
- error = wait_event_interruptible(fl.fl_wait,
- list_empty(&fl.fl_blocked_member));
- if (!error) {
- /*
- * If we've been sleeping someone might have
- * changed the permissions behind our back.
- */
- if (__mandatory_lock(inode))
- continue;
- }
-
- break;
- }
- locks_delete_block(&fl);
-
- return error;
-}
-EXPORT_SYMBOL(locks_mandatory_area);
-#endif /* CONFIG_MANDATORY_FILE_LOCKING */
-
static void lease_clear_pending(struct file_lock *fl, int arg)
{
switch (arg) {
@@ -2486,14 +2389,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (file_lock == NULL)
return -ENOLCK;
- /* Don't allow mandatory locks on files that may be memory mapped
- * and shared.
- */
- if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {
- error = -EAGAIN;
- goto out;
- }
-
error = flock_to_posix_lock(filp, file_lock, flock);
if (error)
goto out;
@@ -2611,21 +2506,12 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
struct flock64 *flock)
{
struct file_lock *file_lock = locks_alloc_lock();
- struct inode *inode = locks_inode(filp);
struct file *f;
int error;
if (file_lock == NULL)
return -ENOLCK;
- /* Don't allow mandatory locks on files that may be memory mapped
- * and shared.
- */
- if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {
- error = -EAGAIN;
- goto out;
- }
-
error = flock64_to_posix_lock(filp, file_lock, flock);
if (error)
goto out;
@@ -2857,8 +2743,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
seq_puts(f, "POSIX ");
seq_printf(f, " %s ",
- (inode == NULL) ? "*NOINODE*" :
- mandatory_lock(inode) ? "MANDATORY" : "ADVISORY ");
+ (inode == NULL) ? "*NOINODE*" : "ADVISORY ");
} else if (IS_FLOCK(fl)) {
if (fl->fl_type & LOCK_MAND) {
seq_puts(f, "FLOCK MSNFS ");
diff --git a/fs/namei.c b/fs/namei.c
index bf6d8a738c59..d049d3972695 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -204,6 +204,14 @@ getname_flags(const char __user *filename, int flags, int *empty)
}
struct filename *
+getname_uflags(const char __user *filename, int uflags)
+{
+ int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+
+ return getname_flags(filename, flags, NULL);
+}
+
+struct filename *
getname(const char __user * filename)
{
return getname_flags(filename, 0, NULL);
@@ -247,6 +255,9 @@ getname_kernel(const char * filename)
void putname(struct filename *name)
{
+ if (IS_ERR_OR_NULL(name))
+ return;
+
BUG_ON(name->refcnt <= 0);
if (--name->refcnt > 0)
@@ -2456,7 +2467,7 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
return err;
}
-int filename_lookup(int dfd, struct filename *name, unsigned flags,
+static int __filename_lookup(int dfd, struct filename *name, unsigned flags,
struct path *path, struct path *root)
{
int retval;
@@ -2474,6 +2485,14 @@ int filename_lookup(int dfd, struct filename *name, unsigned flags,
audit_inode(name, path->dentry,
flags & LOOKUP_MOUNTPOINT ? AUDIT_INODE_NOEVAL : 0);
restore_nameidata();
+ return retval;
+}
+
+int filename_lookup(int dfd, struct filename *name, unsigned flags,
+ struct path *path, struct path *root)
+{
+ int retval = __filename_lookup(dfd, name, flags, path, root);
+
putname(name);
return retval;
}
@@ -2495,7 +2514,7 @@ static int path_parentat(struct nameidata *nd, unsigned flags,
return err;
}
-static struct filename *filename_parentat(int dfd, struct filename *name,
+static int __filename_parentat(int dfd, struct filename *name,
unsigned int flags, struct path *parent,
struct qstr *last, int *type)
{
@@ -2503,7 +2522,7 @@ static struct filename *filename_parentat(int dfd, struct filename *name,
struct nameidata nd;
if (IS_ERR(name))
- return name;
+ return PTR_ERR(name);
set_nameidata(&nd, dfd, name, NULL);
retval = path_parentat(&nd, flags | LOOKUP_RCU, parent);
if (unlikely(retval == -ECHILD))
@@ -2514,29 +2533,34 @@ static struct filename *filename_parentat(int dfd, struct filename *name,
*last = nd.last;
*type = nd.last_type;
audit_inode(name, parent->dentry, AUDIT_INODE_PARENT);
- } else {
- putname(name);
- name = ERR_PTR(retval);
}
restore_nameidata();
- return name;
+ return retval;
+}
+
+static int filename_parentat(int dfd, struct filename *name,
+ unsigned int flags, struct path *parent,
+ struct qstr *last, int *type)
+{
+ int retval = __filename_parentat(dfd, name, flags, parent, last, type);
+
+ putname(name);
+ return retval;
}
/* does lookup, returns the object with parent locked */
struct dentry *kern_path_locked(const char *name, struct path *path)
{
- struct filename *filename;
struct dentry *d;
struct qstr last;
- int type;
+ int type, error;
- filename = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path,
+ error = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path,
&last, &type);
- if (IS_ERR(filename))
- return ERR_CAST(filename);
+ if (error)
+ return ERR_PTR(error);
if (unlikely(type != LAST_NORM)) {
path_put(path);
- putname(filename);
return ERR_PTR(-EINVAL);
}
inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
@@ -2545,7 +2569,6 @@ struct dentry *kern_path_locked(const char *name, struct path *path)
inode_unlock(path->dentry->d_inode);
path_put(path);
}
- putname(filename);
return d;
}
@@ -2575,8 +2598,9 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
}
EXPORT_SYMBOL(vfs_path_lookup);
-static int lookup_one_len_common(const char *name, struct dentry *base,
- int len, struct qstr *this)
+static int lookup_one_common(struct user_namespace *mnt_userns,
+ const char *name, struct dentry *base, int len,
+ struct qstr *this)
{
this->name = name;
this->len = len;
@@ -2604,7 +2628,7 @@ static int lookup_one_len_common(const char *name, struct dentry *base,
return err;
}
- return inode_permission(&init_user_ns, base->d_inode, MAY_EXEC);
+ return inode_permission(mnt_userns, base->d_inode, MAY_EXEC);
}
/**
@@ -2628,7 +2652,7 @@ struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len
WARN_ON_ONCE(!inode_is_locked(base->d_inode));
- err = lookup_one_len_common(name, base, len, &this);
+ err = lookup_one_common(&init_user_ns, name, base, len, &this);
if (err)
return ERR_PTR(err);
@@ -2655,7 +2679,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
WARN_ON_ONCE(!inode_is_locked(base->d_inode));
- err = lookup_one_len_common(name, base, len, &this);
+ err = lookup_one_common(&init_user_ns, name, base, len, &this);
if (err)
return ERR_PTR(err);
@@ -2665,6 +2689,36 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
EXPORT_SYMBOL(lookup_one_len);
/**
+ * lookup_one - filesystem helper to lookup single pathname component
+ * @mnt_userns: user namespace of the mount the lookup is performed from
+ * @name: pathname component to lookup
+ * @base: base directory to lookup from
+ * @len: maximum length @len should be interpreted to
+ *
+ * Note that this routine is purely a helper for filesystem usage and should
+ * not be called by generic code.
+ *
+ * The caller must hold base->i_mutex.
+ */
+struct dentry *lookup_one(struct user_namespace *mnt_userns, const char *name,
+ struct dentry *base, int len)
+{
+ struct dentry *dentry;
+ struct qstr this;
+ int err;
+
+ WARN_ON_ONCE(!inode_is_locked(base->d_inode));
+
+ err = lookup_one_common(mnt_userns, name, base, len, &this);
+ if (err)
+ return ERR_PTR(err);
+
+ dentry = lookup_dcache(&this, base, 0);
+ return dentry ? dentry : __lookup_slow(&this, base, 0);
+}
+EXPORT_SYMBOL(lookup_one);
+
+/**
* lookup_one_len_unlocked - filesystem helper to lookup single pathname component
* @name: pathname component to lookup
* @base: base directory to lookup from
@@ -2683,7 +2737,7 @@ struct dentry *lookup_one_len_unlocked(const char *name,
int err;
struct dentry *ret;
- err = lookup_one_len_common(name, base, len, &this);
+ err = lookup_one_common(&init_user_ns, name, base, len, &this);
if (err)
return ERR_PTR(err);
@@ -3023,9 +3077,7 @@ static int handle_truncate(struct user_namespace *mnt_userns, struct file *filp)
/*
* Refuse to truncate files with mandatory locks held on them.
*/
- error = locks_verify_locked(filp);
- if (!error)
- error = security_path_truncate(path);
+ error = security_path_truncate(path);
if (!error) {
error = do_truncate(mnt_userns, path->dentry, 0,
ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
@@ -3566,7 +3618,7 @@ struct file *do_file_open_root(const struct path *root,
return file;
}
-static struct dentry *filename_create(int dfd, struct filename *name,
+static struct dentry *__filename_create(int dfd, struct filename *name,
struct path *path, unsigned int lookup_flags)
{
struct dentry *dentry = ERR_PTR(-EEXIST);
@@ -3582,9 +3634,9 @@ static struct dentry *filename_create(int dfd, struct filename *name,
*/
lookup_flags &= LOOKUP_REVAL;
- name = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
- if (IS_ERR(name))
- return ERR_CAST(name);
+ error = __filename_parentat(dfd, name, lookup_flags, path, &last, &type);
+ if (error)
+ return ERR_PTR(error);
/*
* Yucky last component or no last component at all?
@@ -3622,7 +3674,6 @@ static struct dentry *filename_create(int dfd, struct filename *name,
error = err2;
goto fail;
}
- putname(name);
return dentry;
fail:
dput(dentry);
@@ -3633,10 +3684,18 @@ unlock:
mnt_drop_write(path->mnt);
out:
path_put(path);
- putname(name);
return dentry;
}
+static inline struct dentry *filename_create(int dfd, struct filename *name,
+ struct path *path, unsigned int lookup_flags)
+{
+ struct dentry *res = __filename_create(dfd, name, path, lookup_flags);
+
+ putname(name);
+ return res;
+}
+
struct dentry *kern_path_create(int dfd, const char *pathname,
struct path *path, unsigned int lookup_flags)
{
@@ -3725,7 +3784,7 @@ static int may_mknod(umode_t mode)
}
}
-static long do_mknodat(int dfd, const char __user *filename, umode_t mode,
+static int do_mknodat(int dfd, struct filename *name, umode_t mode,
unsigned int dev)
{
struct user_namespace *mnt_userns;
@@ -3736,17 +3795,18 @@ static long do_mknodat(int dfd, const char __user *filename, umode_t mode,
error = may_mknod(mode);
if (error)
- return error;
+ goto out1;
retry:
- dentry = user_path_create(dfd, filename, &path, lookup_flags);
+ dentry = __filename_create(dfd, name, &path, lookup_flags);
+ error = PTR_ERR(dentry);
if (IS_ERR(dentry))
- return PTR_ERR(dentry);
+ goto out1;
if (!IS_POSIXACL(path.dentry->d_inode))
mode &= ~current_umask();
error = security_path_mknod(&path, dentry, mode, dev);
if (error)
- goto out;
+ goto out2;
mnt_userns = mnt_user_ns(path.mnt);
switch (mode & S_IFMT) {
@@ -3765,24 +3825,26 @@ retry:
dentry, mode, 0);
break;
}
-out:
+out2:
done_path_create(&path, dentry);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
+out1:
+ putname(name);
return error;
}
SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
unsigned int, dev)
{
- return do_mknodat(dfd, filename, mode, dev);
+ return do_mknodat(dfd, getname(filename), mode, dev);
}
SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
{
- return do_mknodat(AT_FDCWD, filename, mode, dev);
+ return do_mknodat(AT_FDCWD, getname(filename), mode, dev);
}
/**
@@ -3827,7 +3889,7 @@ int vfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
}
EXPORT_SYMBOL(vfs_mkdir);
-static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
+int do_mkdirat(int dfd, struct filename *name, umode_t mode)
{
struct dentry *dentry;
struct path path;
@@ -3835,9 +3897,10 @@ static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
unsigned int lookup_flags = LOOKUP_DIRECTORY;
retry:
- dentry = user_path_create(dfd, pathname, &path, lookup_flags);
+ dentry = __filename_create(dfd, name, &path, lookup_flags);
+ error = PTR_ERR(dentry);
if (IS_ERR(dentry))
- return PTR_ERR(dentry);
+ goto out_putname;
if (!IS_POSIXACL(path.dentry->d_inode))
mode &= ~current_umask();
@@ -3853,17 +3916,19 @@ retry:
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
+out_putname:
+ putname(name);
return error;
}
SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
{
- return do_mkdirat(dfd, pathname, mode);
+ return do_mkdirat(dfd, getname(pathname), mode);
}
SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
{
- return do_mkdirat(AT_FDCWD, pathname, mode);
+ return do_mkdirat(AT_FDCWD, getname(pathname), mode);
}
/**
@@ -3921,62 +3986,62 @@ out:
}
EXPORT_SYMBOL(vfs_rmdir);
-long do_rmdir(int dfd, struct filename *name)
+int do_rmdir(int dfd, struct filename *name)
{
struct user_namespace *mnt_userns;
- int error = 0;
+ int error;
struct dentry *dentry;
struct path path;
struct qstr last;
int type;
unsigned int lookup_flags = 0;
retry:
- name = filename_parentat(dfd, name, lookup_flags,
- &path, &last, &type);
- if (IS_ERR(name))
- return PTR_ERR(name);
+ error = __filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
+ if (error)
+ goto exit1;
switch (type) {
case LAST_DOTDOT:
error = -ENOTEMPTY;
- goto exit1;
+ goto exit2;
case LAST_DOT:
error = -EINVAL;
- goto exit1;
+ goto exit2;
case LAST_ROOT:
error = -EBUSY;
- goto exit1;
+ goto exit2;
}
error = mnt_want_write(path.mnt);
if (error)
- goto exit1;
+ goto exit2;
inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
dentry = __lookup_hash(&last, path.dentry, lookup_flags);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
- goto exit2;
+ goto exit3;
if (!dentry->d_inode) {
error = -ENOENT;
- goto exit3;
+ goto exit4;
}
error = security_path_rmdir(&path, dentry);
if (error)
- goto exit3;
+ goto exit4;
mnt_userns = mnt_user_ns(path.mnt);
error = vfs_rmdir(mnt_userns, path.dentry->d_inode, dentry);
-exit3:
+exit4:
dput(dentry);
-exit2:
+exit3:
inode_unlock(path.dentry->d_inode);
mnt_drop_write(path.mnt);
-exit1:
+exit2:
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
+exit1:
putname(name);
return error;
}
@@ -4059,7 +4124,7 @@ EXPORT_SYMBOL(vfs_unlink);
* writeout happening, and we don't want to prevent access to the directory
* while waiting on the I/O.
*/
-long do_unlinkat(int dfd, struct filename *name)
+int do_unlinkat(int dfd, struct filename *name)
{
int error;
struct dentry *dentry;
@@ -4070,17 +4135,17 @@ long do_unlinkat(int dfd, struct filename *name)
struct inode *delegated_inode = NULL;
unsigned int lookup_flags = 0;
retry:
- name = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
- if (IS_ERR(name))
- return PTR_ERR(name);
+ error = __filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
+ if (error)
+ goto exit1;
error = -EISDIR;
if (type != LAST_NORM)
- goto exit1;
+ goto exit2;
error = mnt_want_write(path.mnt);
if (error)
- goto exit1;
+ goto exit2;
retry_deleg:
inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
dentry = __lookup_hash(&last, path.dentry, lookup_flags);
@@ -4097,11 +4162,11 @@ retry_deleg:
ihold(inode);
error = security_path_unlink(&path, dentry);
if (error)
- goto exit2;
+ goto exit3;
mnt_userns = mnt_user_ns(path.mnt);
error = vfs_unlink(mnt_userns, path.dentry->d_inode, dentry,
&delegated_inode);
-exit2:
+exit3:
dput(dentry);
}
inode_unlock(path.dentry->d_inode);
@@ -4114,13 +4179,14 @@ exit2:
goto retry_deleg;
}
mnt_drop_write(path.mnt);
-exit1:
+exit2:
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
inode = NULL;
goto retry;
}
+exit1:
putname(name);
return error;
@@ -4131,7 +4197,7 @@ slashes:
error = -EISDIR;
else
error = -ENOTDIR;
- goto exit2;
+ goto exit3;
}
SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
@@ -4186,23 +4252,22 @@ int vfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
}
EXPORT_SYMBOL(vfs_symlink);
-static long do_symlinkat(const char __user *oldname, int newdfd,
- const char __user *newname)
+int do_symlinkat(struct filename *from, int newdfd, struct filename *to)
{
int error;
- struct filename *from;
struct dentry *dentry;
struct path path;
unsigned int lookup_flags = 0;
- from = getname(oldname);
- if (IS_ERR(from))
- return PTR_ERR(from);
+ if (IS_ERR(from)) {
+ error = PTR_ERR(from);
+ goto out_putnames;
+ }
retry:
- dentry = user_path_create(newdfd, newname, &path, lookup_flags);
+ dentry = __filename_create(newdfd, to, &path, lookup_flags);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
- goto out_putname;
+ goto out_putnames;
error = security_path_symlink(&path, dentry, from->name);
if (!error) {
@@ -4217,7 +4282,8 @@ retry:
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
-out_putname:
+out_putnames:
+ putname(to);
putname(from);
return error;
}
@@ -4225,12 +4291,12 @@ out_putname:
SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
int, newdfd, const char __user *, newname)
{
- return do_symlinkat(oldname, newdfd, newname);
+ return do_symlinkat(getname(oldname), newdfd, getname(newname));
}
SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname)
{
- return do_symlinkat(oldname, AT_FDCWD, newname);
+ return do_symlinkat(getname(oldname), AT_FDCWD, getname(newname));
}
/**
@@ -4331,8 +4397,8 @@ EXPORT_SYMBOL(vfs_link);
* with linux 2.0, and to avoid hard-linking to directories
* and other special files. --ADM
*/
-static int do_linkat(int olddfd, const char __user *oldname, int newdfd,
- const char __user *newname, int flags)
+int do_linkat(int olddfd, struct filename *old, int newdfd,
+ struct filename *new, int flags)
{
struct user_namespace *mnt_userns;
struct dentry *new_dentry;
@@ -4341,31 +4407,32 @@ static int do_linkat(int olddfd, const char __user *oldname, int newdfd,
int how = 0;
int error;
- if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
- return -EINVAL;
+ if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) {
+ error = -EINVAL;
+ goto out_putnames;
+ }
/*
* To use null names we require CAP_DAC_READ_SEARCH
* This ensures that not everyone will be able to create
* handlink using the passed filedescriptor.
*/
- if (flags & AT_EMPTY_PATH) {
- if (!capable(CAP_DAC_READ_SEARCH))
- return -ENOENT;
- how = LOOKUP_EMPTY;
+ if (flags & AT_EMPTY_PATH && !capable(CAP_DAC_READ_SEARCH)) {
+ error = -ENOENT;
+ goto out_putnames;
}
if (flags & AT_SYMLINK_FOLLOW)
how |= LOOKUP_FOLLOW;
retry:
- error = user_path_at(olddfd, oldname, how, &old_path);
+ error = __filename_lookup(olddfd, old, how, &old_path, NULL);
if (error)
- return error;
+ goto out_putnames;
- new_dentry = user_path_create(newdfd, newname, &new_path,
+ new_dentry = __filename_create(newdfd, new, &new_path,
(how & LOOKUP_REVAL));
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
- goto out;
+ goto out_putpath;
error = -EXDEV;
if (old_path.mnt != new_path.mnt)
@@ -4393,8 +4460,11 @@ out_dput:
how |= LOOKUP_REVAL;
goto retry;
}
-out:
+out_putpath:
path_put(&old_path);
+out_putnames:
+ putname(old);
+ putname(new);
return error;
}
@@ -4402,12 +4472,13 @@ out:
SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
int, newdfd, const char __user *, newname, int, flags)
{
- return do_linkat(olddfd, oldname, newdfd, newname, flags);
+ return do_linkat(olddfd, getname_uflags(oldname, flags),
+ newdfd, getname(newname), flags);
}
SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname)
{
- return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
+ return do_linkat(AT_FDCWD, getname(oldname), AT_FDCWD, getname(newname), 0);
}
/**
@@ -4602,29 +4673,25 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
int error = -EINVAL;
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
- goto put_both;
+ goto put_names;
if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) &&
(flags & RENAME_EXCHANGE))
- goto put_both;
+ goto put_names;
if (flags & RENAME_EXCHANGE)
target_flags = 0;
retry:
- from = filename_parentat(olddfd, from, lookup_flags, &old_path,
+ error = __filename_parentat(olddfd, from, lookup_flags, &old_path,
&old_last, &old_type);
- if (IS_ERR(from)) {
- error = PTR_ERR(from);
- goto put_new;
- }
+ if (error)
+ goto put_names;
- to = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
+ error = __filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
&new_type);
- if (IS_ERR(to)) {
- error = PTR_ERR(to);
+ if (error)
goto exit1;
- }
error = -EXDEV;
if (old_path.mnt != new_path.mnt)
@@ -4727,12 +4794,9 @@ exit1:
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
-put_both:
- if (!IS_ERR(from))
- putname(from);
-put_new:
- if (!IS_ERR(to))
- putname(to);
+put_names:
+ putname(from);
+ putname(to);
return error;
}
diff --git a/fs/namespace.c b/fs/namespace.c
index ab4174a3c802..12852363d90e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1715,18 +1715,14 @@ static inline bool may_mount(void)
return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
}
-#ifdef CONFIG_MANDATORY_FILE_LOCKING
-static inline bool may_mandlock(void)
+static void warn_mandlock(void)
{
- return capable(CAP_SYS_ADMIN);
+ pr_warn_once("=======================================================\n"
+ "WARNING: The mand mount option has been deprecated and\n"
+ " and is ignored by this kernel. Remove the mand\n"
+ " option from the mount to silence this warning.\n"
+ "=======================================================\n");
}
-#else
-static inline bool may_mandlock(void)
-{
- pr_warn("VFS: \"mand\" mount option not supported");
- return false;
-}
-#endif
static int can_umount(const struct path *path, int flags)
{
@@ -1938,6 +1934,20 @@ void drop_collected_mounts(struct vfsmount *mnt)
namespace_unlock();
}
+static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
+{
+ struct mount *child;
+
+ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+ if (!is_subdir(child->mnt_mountpoint, dentry))
+ continue;
+
+ if (child->mnt.mnt_flags & MNT_LOCKED)
+ return true;
+ }
+ return false;
+}
+
/**
* clone_private_mount - create a private clone of a path
* @path: path to clone
@@ -1953,10 +1963,19 @@ struct vfsmount *clone_private_mount(const struct path *path)
struct mount *old_mnt = real_mount(path->mnt);
struct mount *new_mnt;
+ down_read(&namespace_sem);
if (IS_MNT_UNBINDABLE(old_mnt))
- return ERR_PTR(-EINVAL);
+ goto invalid;
+
+ if (!check_mnt(old_mnt))
+ goto invalid;
+
+ if (has_locked_children(old_mnt, path->dentry))
+ goto invalid;
new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
+ up_read(&namespace_sem);
+
if (IS_ERR(new_mnt))
return ERR_CAST(new_mnt);
@@ -1964,6 +1983,10 @@ struct vfsmount *clone_private_mount(const struct path *path)
new_mnt->mnt_ns = MNT_NS_INTERNAL;
return &new_mnt->mnt;
+
+invalid:
+ up_read(&namespace_sem);
+ return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL_GPL(clone_private_mount);
@@ -2315,19 +2338,6 @@ static int do_change_type(struct path *path, int ms_flags)
return err;
}
-static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
-{
- struct mount *child;
- list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
- if (!is_subdir(child->mnt_mountpoint, dentry))
- continue;
-
- if (child->mnt.mnt_flags & MNT_LOCKED)
- return true;
- }
- return false;
-}
-
static struct mount *__do_loopback(struct path *old_path, int recurse)
{
struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
@@ -2684,6 +2694,78 @@ out:
return ret;
}
+static int do_set_group(struct path *from_path, struct path *to_path)
+{
+ struct mount *from, *to;
+ int err;
+
+ from = real_mount(from_path->mnt);
+ to = real_mount(to_path->mnt);
+
+ namespace_lock();
+
+ err = -EINVAL;
+ /* To and From must be mounted */
+ if (!is_mounted(&from->mnt))
+ goto out;
+ if (!is_mounted(&to->mnt))
+ goto out;
+
+ err = -EPERM;
+ /* We should be allowed to modify mount namespaces of both mounts */
+ if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ goto out;
+ if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ goto out;
+
+ err = -EINVAL;
+ /* To and From paths should be mount roots */
+ if (from_path->dentry != from_path->mnt->mnt_root)
+ goto out;
+ if (to_path->dentry != to_path->mnt->mnt_root)
+ goto out;
+
+ /* Setting sharing groups is only allowed across same superblock */
+ if (from->mnt.mnt_sb != to->mnt.mnt_sb)
+ goto out;
+
+ /* From mount root should be wider than To mount root */
+ if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root))
+ goto out;
+
+ /* From mount should not have locked children in place of To's root */
+ if (has_locked_children(from, to->mnt.mnt_root))
+ goto out;
+
+ /* Setting sharing groups is only allowed on private mounts */
+ if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to))
+ goto out;
+
+ /* From should not be private */
+ if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from))
+ goto out;
+
+ if (IS_MNT_SLAVE(from)) {
+ struct mount *m = from->mnt_master;
+
+ list_add(&to->mnt_slave, &m->mnt_slave_list);
+ to->mnt_master = m;
+ }
+
+ if (IS_MNT_SHARED(from)) {
+ to->mnt_group_id = from->mnt_group_id;
+ list_add(&to->mnt_share, &from->mnt_share);
+ lock_mount_hash();
+ set_mnt_shared(to);
+ unlock_mount_hash();
+ }
+
+ err = 0;
+out:
+ namespace_unlock();
+ return err;
+}
+
static int do_move_mount(struct path *old_path, struct path *new_path)
{
struct mnt_namespace *ns;
@@ -3179,8 +3261,8 @@ int path_mount(const char *dev_name, struct path *path,
return ret;
if (!may_mount())
return -EPERM;
- if ((flags & SB_MANDLOCK) && !may_mandlock())
- return -EPERM;
+ if (flags & SB_MANDLOCK)
+ warn_mandlock();
/* Default to relatime unless overriden */
if (!(flags & MS_NOATIME))
@@ -3563,9 +3645,8 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
goto err_unlock;
- ret = -EPERM;
- if ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock())
- goto err_unlock;
+ if (fc->sb_flags & SB_MANDLOCK)
+ warn_mandlock();
newmount.mnt = vfs_create_mount(fc);
if (IS_ERR(newmount.mnt)) {
@@ -3669,7 +3750,10 @@ SYSCALL_DEFINE5(move_mount,
if (ret < 0)
goto out_to;
- ret = do_move_mount(&from_path, &to_path);
+ if (flags & MOVE_MOUNT_SET_GROUP)
+ ret = do_set_group(&from_path, &to_path);
+ else
+ ret = do_move_mount(&from_path, &to_path);
out_to:
path_put(&to_path);
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index 37a1a88df771..d772c20bbfd1 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -180,5 +180,5 @@ const struct export_operations nfs_export_ops = {
.fetch_iversion = nfs_fetch_iversion,
.flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
- EXPORT_OP_NOATOMIC_ATTR,
+ EXPORT_OP_NOATOMIC_ATTR|EXPORT_OP_SYNC_LOCKS,
};
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 1fef107961bc..aa353fd58240 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -806,9 +806,8 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
nfs_inc_stats(inode, NFSIOS_VFSLOCK);
- /* No mandatory locks over NFS */
- if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
- goto out_err;
+ if (fl->fl_flags & FL_RECLAIM)
+ return -ENOGRACE;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
is_local = 1;
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 3f5b3d7b62b7..606fa155c28a 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -25,9 +25,11 @@
* Note: we hold the dentry use count while the file is open.
*/
static __be32
-nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
+nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
+ int mode)
{
__be32 nfserr;
+ int access;
struct svc_fh fh;
/* must initialize before using! but maxsize doesn't matter */
@@ -36,7 +38,9 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size);
fh.fh_export = NULL;
- nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
+ access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ;
+ access |= NFSD_MAY_LOCK;
+ nfserr = nfsd_open(rqstp, &fh, S_IFREG, access, filp);
fh_put(&fh);
/* We return nlm error codes as nlm doesn't know
* about nfsd, but nfsd does know about nlm..
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fa67ecd5fe63..42356416f0a0 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2687,9 +2687,9 @@ static void force_expire_client(struct nfs4_client *clp)
trace_nfsd_clid_admin_expired(&clp->cl_clientid);
- spin_lock(&clp->cl_lock);
+ spin_lock(&nn->client_lock);
clp->cl_time = 0;
- spin_unlock(&clp->cl_lock);
+ spin_unlock(&nn->client_lock);
wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0);
spin_lock(&nn->client_lock);
@@ -5735,16 +5735,6 @@ check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid,
NFS4_SHARE_DENY_READ);
}
-/*
- * Allow READ/WRITE during grace period on recovered state only for files
- * that are not able to provide mandatory locking.
- */
-static inline int
-grace_disallows_io(struct net *net, struct inode *inode)
-{
- return opens_in_grace(net) && mandatory_lock(inode);
-}
-
static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session)
{
/*
@@ -6026,7 +6016,6 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
stateid_t *stateid, int flags, struct nfsd_file **nfp,
struct nfs4_stid **cstid)
{
- struct inode *ino = d_inode(fhp->fh_dentry);
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct nfs4_stid *s = NULL;
@@ -6035,9 +6024,6 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
if (nfp)
*nfp = NULL;
- if (grace_disallows_io(net, ino))
- return nfserr_grace;
-
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
status = check_special_stateids(net, fhp, stateid, flags);
goto done;
@@ -6835,6 +6821,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_blocked_lock *nbl = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
+ struct super_block *sb;
__be32 status = 0;
int lkflg;
int err;
@@ -6856,6 +6843,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dprintk("NFSD: nfsd4_lock: permission denied!\n");
return status;
}
+ sb = cstate->current_fh.fh_dentry->d_sb;
if (lock->lk_is_new) {
if (nfsd4_has_session(cstate))
@@ -6901,10 +6889,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!locks_in_grace(net) && lock->lk_reclaim)
goto out;
+ if (lock->lk_reclaim)
+ fl_flags |= FL_RECLAIM;
+
fp = lock_stp->st_stid.sc_file;
switch (lock->lk_type) {
case NFS4_READW_LT:
- if (nfsd4_has_session(cstate))
+ if (nfsd4_has_session(cstate) &&
+ !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS))
fl_flags |= FL_SLEEP;
fallthrough;
case NFS4_READ_LT:
@@ -6916,7 +6908,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fl_type = F_RDLCK;
break;
case NFS4_WRITEW_LT:
- if (nfsd4_has_session(cstate))
+ if (nfsd4_has_session(cstate) &&
+ !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS))
fl_flags |= FL_SLEEP;
fallthrough;
case NFS4_WRITE_LT:
@@ -7036,8 +7029,7 @@ out:
/*
* The NFSv4 spec allows a client to do a LOCKT without holding an OPEN,
* so we do a temporary open here just to get an open file to pass to
- * vfs_test_lock. (Arguably perhaps test_lock should be done with an
- * inode operation.)
+ * vfs_test_lock.
*/
static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
{
@@ -7052,7 +7044,9 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct
NFSD_MAY_READ));
if (err)
goto out;
+ lock->fl_file = nf->nf_file;
err = nfserrno(vfs_test_lock(nf->nf_file, lock));
+ lock->fl_file = NULL;
out:
fh_unlock(fhp);
nfsd_file_put(nf);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 60d7c59e7935..90fcd6178823 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -881,6 +881,7 @@ nfserrno (int errno)
{ nfserr_serverfault, -ENFILE },
{ nfserr_io, -EUCLEAN },
{ nfserr_perm, -ENOKEY },
+ { nfserr_no_grace, -ENOGRACE},
};
int i;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index adaec43548d1..538520957a81 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -400,18 +400,16 @@ TRACE_EVENT(nfsd_dirent,
TP_STRUCT__entry(
__field(u32, fh_hash)
__field(u64, ino)
- __field(int, len)
- __dynamic_array(unsigned char, name, namlen)
+ __string_len(name, name, namlen)
),
TP_fast_assign(
__entry->fh_hash = fhp ? knfsd_fh_hash(&fhp->fh_handle) : 0;
__entry->ino = ino;
- __entry->len = namlen;
- memcpy(__get_str(name), name, namlen);
+ __assign_str_len(name, name, namlen)
),
- TP_printk("fh_hash=0x%08x ino=%llu name=%.*s",
- __entry->fh_hash, __entry->ino,
- __entry->len, __get_str(name))
+ TP_printk("fh_hash=0x%08x ino=%llu name=%s",
+ __entry->fh_hash, __entry->ino, __get_str(name)
+ )
)
#include "state.h"
@@ -608,7 +606,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class,
__array(unsigned char, addr, sizeof(struct sockaddr_in6))
__field(unsigned long, flavor)
__array(unsigned char, verifier, NFS4_VERIFIER_SIZE)
- __dynamic_array(char, name, clp->cl_name.len + 1)
+ __string_len(name, name, clp->cl_name.len)
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
@@ -618,8 +616,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class,
__entry->flavor = clp->cl_cred.cr_flavor;
memcpy(__entry->verifier, (void *)&clp->cl_verifier,
NFS4_VERIFIER_SIZE);
- memcpy(__get_str(name), clp->cl_name.data, clp->cl_name.len);
- __get_str(name)[clp->cl_name.len] = '\0';
+ __assign_str_len(name, clp->cl_name.data, clp->cl_name.len);
),
TP_printk("addr=%pISpc name='%s' verifier=0x%s flavor=%s client=%08x:%08x",
__entry->addr, __get_str(name),
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a224a5e23cc1..738d564ca4ce 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -244,7 +244,6 @@ out_nfserr:
* returned. Otherwise the covered directory is returned.
* NOTE: this mountpoint crossing is not supported properly by all
* clients and is explicitly disallowed for NFSv3
- * NeilBrown <neilb@cse.unsw.edu.au>
*/
__be32
nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
@@ -333,7 +332,6 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct iattr *iap)
{
struct inode *inode = d_inode(fhp->fh_dentry);
- int host_err;
if (iap->ia_size < inode->i_size) {
__be32 err;
@@ -343,20 +341,7 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (err)
return err;
}
-
- host_err = get_write_access(inode);
- if (host_err)
- goto out_nfserrno;
-
- host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
- if (host_err)
- goto out_put_write_access;
- return 0;
-
-out_put_write_access:
- put_write_access(inode);
-out_nfserrno:
- return nfserrno(host_err);
+ return nfserrno(get_write_access(inode));
}
/*
@@ -750,13 +735,6 @@ __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
err = nfserr_perm;
if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
goto out;
- /*
- * We must ignore files (but only files) which might have mandatory
- * locks on them because there is no way to know if the accesser has
- * the lock.
- */
- if (S_ISREG((inode)->i_mode) && mandatory_lock(inode))
- goto out;
if (!inode->i_fop)
goto out;
@@ -847,26 +825,16 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct svc_rqst *rqstp = sd->u.data;
struct page **pp = rqstp->rq_next_page;
struct page *page = buf->page;
- size_t size;
-
- size = sd->len;
if (rqstp->rq_res.page_len == 0) {
- get_page(page);
- put_page(*rqstp->rq_next_page);
- *(rqstp->rq_next_page++) = page;
+ svc_rqst_replace_page(rqstp, page);
rqstp->rq_res.page_base = buf->offset;
- rqstp->rq_res.page_len = size;
} else if (page != pp[-1]) {
- get_page(page);
- if (*rqstp->rq_next_page)
- put_page(*rqstp->rq_next_page);
- *(rqstp->rq_next_page++) = page;
- rqstp->rq_res.page_len += size;
- } else
- rqstp->rq_res.page_len += size;
+ svc_rqst_replace_page(rqstp, page);
+ }
+ rqstp->rq_res.page_len += sd->len;
- return size;
+ return sd->len;
}
static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 4abd928b0bc8..f6b2d280aab5 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1053,7 +1053,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_time_gran = 1;
sb->s_max_links = NILFS_LINK_MAX;
- sb->s_bdi = bdi_get(sb->s_bdev->bd_bdi);
+ sb->s_bdi = bdi_get(sb->s_bdev->bd_disk->bdi);
err = load_nilfs(nilfs, sb);
if (err)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 64864fb40b40..6facdf476255 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/fanotify.h>
#include <linux/fcntl.h>
+#include <linux/fdtable.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/anon_inodes.h>
@@ -54,22 +55,27 @@ static int fanotify_max_queued_events __read_mostly;
#include <linux/sysctl.h>
+static long ft_zero = 0;
+static long ft_int_max = INT_MAX;
+
struct ctl_table fanotify_table[] = {
{
.procname = "max_user_groups",
.data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &ft_zero,
+ .extra2 = &ft_int_max,
},
{
.procname = "max_user_marks",
.data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &ft_zero,
+ .extra2 = &ft_int_max,
},
{
.procname = "max_queued_events",
@@ -104,8 +110,10 @@ struct kmem_cache *fanotify_path_event_cachep __read_mostly;
struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
#define FANOTIFY_EVENT_ALIGN 4
-#define FANOTIFY_INFO_HDR_LEN \
+#define FANOTIFY_FID_INFO_HDR_LEN \
(sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle))
+#define FANOTIFY_PIDFD_INFO_HDR_LEN \
+ sizeof(struct fanotify_event_info_pidfd)
static int fanotify_fid_info_len(int fh_len, int name_len)
{
@@ -114,10 +122,11 @@ static int fanotify_fid_info_len(int fh_len, int name_len)
if (name_len)
info_len += name_len + 1;
- return roundup(FANOTIFY_INFO_HDR_LEN + info_len, FANOTIFY_EVENT_ALIGN);
+ return roundup(FANOTIFY_FID_INFO_HDR_LEN + info_len,
+ FANOTIFY_EVENT_ALIGN);
}
-static int fanotify_event_info_len(unsigned int fid_mode,
+static int fanotify_event_info_len(unsigned int info_mode,
struct fanotify_event *event)
{
struct fanotify_info *info = fanotify_event_info(event);
@@ -128,7 +137,8 @@ static int fanotify_event_info_len(unsigned int fid_mode,
if (dir_fh_len) {
info_len += fanotify_fid_info_len(dir_fh_len, info->name_len);
- } else if ((fid_mode & FAN_REPORT_NAME) && (event->mask & FAN_ONDIR)) {
+ } else if ((info_mode & FAN_REPORT_NAME) &&
+ (event->mask & FAN_ONDIR)) {
/*
* With group flag FAN_REPORT_NAME, if name was not recorded in
* event on a directory, we will report the name ".".
@@ -136,6 +146,9 @@ static int fanotify_event_info_len(unsigned int fid_mode,
dot_len = 1;
}
+ if (info_mode & FAN_REPORT_PIDFD)
+ info_len += FANOTIFY_PIDFD_INFO_HDR_LEN;
+
if (fh_len)
info_len += fanotify_fid_info_len(fh_len, dot_len);
@@ -171,7 +184,7 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group,
size_t event_size = FAN_EVENT_METADATA_LEN;
struct fanotify_event *event = NULL;
struct fsnotify_event *fsn_event;
- unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
+ unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES);
pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
@@ -181,8 +194,8 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group,
goto out;
event = FANOTIFY_E(fsn_event);
- if (fid_mode)
- event_size += fanotify_event_info_len(fid_mode, event);
+ if (info_mode)
+ event_size += fanotify_event_info_len(info_mode, event);
if (event_size > count) {
event = ERR_PTR(-EINVAL);
@@ -303,9 +316,10 @@ static int process_access_response(struct fsnotify_group *group,
return -ENOENT;
}
-static int copy_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
- int info_type, const char *name, size_t name_len,
- char __user *buf, size_t count)
+static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
+ int info_type, const char *name,
+ size_t name_len,
+ char __user *buf, size_t count)
{
struct fanotify_event_info_fid info = { };
struct file_handle handle = { };
@@ -398,6 +412,117 @@ static int copy_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
return info_len;
}
+static int copy_pidfd_info_to_user(int pidfd,
+ char __user *buf,
+ size_t count)
+{
+ struct fanotify_event_info_pidfd info = { };
+ size_t info_len = FANOTIFY_PIDFD_INFO_HDR_LEN;
+
+ if (WARN_ON_ONCE(info_len > count))
+ return -EFAULT;
+
+ info.hdr.info_type = FAN_EVENT_INFO_TYPE_PIDFD;
+ info.hdr.len = info_len;
+ info.pidfd = pidfd;
+
+ if (copy_to_user(buf, &info, info_len))
+ return -EFAULT;
+
+ return info_len;
+}
+
+static int copy_info_records_to_user(struct fanotify_event *event,
+ struct fanotify_info *info,
+ unsigned int info_mode, int pidfd,
+ char __user *buf, size_t count)
+{
+ int ret, total_bytes = 0, info_type = 0;
+ unsigned int fid_mode = info_mode & FANOTIFY_FID_BITS;
+ unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
+
+ /*
+ * Event info records order is as follows: dir fid + name, child fid.
+ */
+ if (fanotify_event_dir_fh_len(event)) {
+ info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME :
+ FAN_EVENT_INFO_TYPE_DFID;
+ ret = copy_fid_info_to_user(fanotify_event_fsid(event),
+ fanotify_info_dir_fh(info),
+ info_type,
+ fanotify_info_name(info),
+ info->name_len, buf, count);
+ if (ret < 0)
+ return ret;
+
+ buf += ret;
+ count -= ret;
+ total_bytes += ret;
+ }
+
+ if (fanotify_event_object_fh_len(event)) {
+ const char *dot = NULL;
+ int dot_len = 0;
+
+ if (fid_mode == FAN_REPORT_FID || info_type) {
+ /*
+ * With only group flag FAN_REPORT_FID only type FID is
+ * reported. Second info record type is always FID.
+ */
+ info_type = FAN_EVENT_INFO_TYPE_FID;
+ } else if ((fid_mode & FAN_REPORT_NAME) &&
+ (event->mask & FAN_ONDIR)) {
+ /*
+ * With group flag FAN_REPORT_NAME, if name was not
+ * recorded in an event on a directory, report the name
+ * "." with info type DFID_NAME.
+ */
+ info_type = FAN_EVENT_INFO_TYPE_DFID_NAME;
+ dot = ".";
+ dot_len = 1;
+ } else if ((event->mask & ALL_FSNOTIFY_DIRENT_EVENTS) ||
+ (event->mask & FAN_ONDIR)) {
+ /*
+ * With group flag FAN_REPORT_DIR_FID, a single info
+ * record has type DFID for directory entry modification
+ * event and for event on a directory.
+ */
+ info_type = FAN_EVENT_INFO_TYPE_DFID;
+ } else {
+ /*
+ * With group flags FAN_REPORT_DIR_FID|FAN_REPORT_FID,
+ * a single info record has type FID for event on a
+ * non-directory, when there is no directory to report.
+ * For example, on FAN_DELETE_SELF event.
+ */
+ info_type = FAN_EVENT_INFO_TYPE_FID;
+ }
+
+ ret = copy_fid_info_to_user(fanotify_event_fsid(event),
+ fanotify_event_object_fh(event),
+ info_type, dot, dot_len,
+ buf, count);
+ if (ret < 0)
+ return ret;
+
+ buf += ret;
+ count -= ret;
+ total_bytes += ret;
+ }
+
+ if (pidfd_mode) {
+ ret = copy_pidfd_info_to_user(pidfd, buf, count);
+ if (ret < 0)
+ return ret;
+
+ buf += ret;
+ count -= ret;
+ total_bytes += ret;
+ }
+
+ return total_bytes;
+}
+
static ssize_t copy_event_to_user(struct fsnotify_group *group,
struct fanotify_event *event,
char __user *buf, size_t count)
@@ -405,15 +530,15 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
struct fanotify_event_metadata metadata;
struct path *path = fanotify_event_path(event);
struct fanotify_info *info = fanotify_event_info(event);
- unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
+ unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES);
+ unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
struct file *f = NULL;
- int ret, fd = FAN_NOFD;
- int info_type = 0;
+ int ret, pidfd = FAN_NOPIDFD, fd = FAN_NOFD;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
metadata.event_len = FAN_EVENT_METADATA_LEN +
- fanotify_event_info_len(fid_mode, event);
+ fanotify_event_info_len(info_mode, event);
metadata.metadata_len = FAN_EVENT_METADATA_LEN;
metadata.vers = FANOTIFY_METADATA_VERSION;
metadata.reserved = 0;
@@ -442,6 +567,33 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
}
metadata.fd = fd;
+ if (pidfd_mode) {
+ /*
+ * Complain if the FAN_REPORT_PIDFD and FAN_REPORT_TID mutual
+ * exclusion is ever lifted. At the time of incoporating pidfd
+ * support within fanotify, the pidfd API only supported the
+ * creation of pidfds for thread-group leaders.
+ */
+ WARN_ON_ONCE(FAN_GROUP_FLAG(group, FAN_REPORT_TID));
+
+ /*
+ * The PIDTYPE_TGID check for an event->pid is performed
+ * preemptively in an attempt to catch out cases where the event
+ * listener reads events after the event generating process has
+ * already terminated. Report FAN_NOPIDFD to the event listener
+ * in those cases, with all other pidfd creation errors being
+ * reported as FAN_EPIDFD.
+ */
+ if (metadata.pid == 0 ||
+ !pid_has_task(event->pid, PIDTYPE_TGID)) {
+ pidfd = FAN_NOPIDFD;
+ } else {
+ pidfd = pidfd_create(event->pid, 0);
+ if (pidfd < 0)
+ pidfd = FAN_EPIDFD;
+ }
+ }
+
ret = -EFAULT;
/*
* Sanity check copy size in case get_one_event() and
@@ -462,67 +614,11 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
if (f)
fd_install(fd, f);
- /* Event info records order is: dir fid + name, child fid */
- if (fanotify_event_dir_fh_len(event)) {
- info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME :
- FAN_EVENT_INFO_TYPE_DFID;
- ret = copy_info_to_user(fanotify_event_fsid(event),
- fanotify_info_dir_fh(info),
- info_type, fanotify_info_name(info),
- info->name_len, buf, count);
+ if (info_mode) {
+ ret = copy_info_records_to_user(event, info, info_mode, pidfd,
+ buf, count);
if (ret < 0)
goto out_close_fd;
-
- buf += ret;
- count -= ret;
- }
-
- if (fanotify_event_object_fh_len(event)) {
- const char *dot = NULL;
- int dot_len = 0;
-
- if (fid_mode == FAN_REPORT_FID || info_type) {
- /*
- * With only group flag FAN_REPORT_FID only type FID is
- * reported. Second info record type is always FID.
- */
- info_type = FAN_EVENT_INFO_TYPE_FID;
- } else if ((fid_mode & FAN_REPORT_NAME) &&
- (event->mask & FAN_ONDIR)) {
- /*
- * With group flag FAN_REPORT_NAME, if name was not
- * recorded in an event on a directory, report the
- * name "." with info type DFID_NAME.
- */
- info_type = FAN_EVENT_INFO_TYPE_DFID_NAME;
- dot = ".";
- dot_len = 1;
- } else if ((event->mask & ALL_FSNOTIFY_DIRENT_EVENTS) ||
- (event->mask & FAN_ONDIR)) {
- /*
- * With group flag FAN_REPORT_DIR_FID, a single info
- * record has type DFID for directory entry modification
- * event and for event on a directory.
- */
- info_type = FAN_EVENT_INFO_TYPE_DFID;
- } else {
- /*
- * With group flags FAN_REPORT_DIR_FID|FAN_REPORT_FID,
- * a single info record has type FID for event on a
- * non-directory, when there is no directory to report.
- * For example, on FAN_DELETE_SELF event.
- */
- info_type = FAN_EVENT_INFO_TYPE_FID;
- }
-
- ret = copy_info_to_user(fanotify_event_fsid(event),
- fanotify_event_object_fh(event),
- info_type, dot, dot_len, buf, count);
- if (ret < 0)
- goto out_close_fd;
-
- buf += ret;
- count -= ret;
}
return metadata.event_len;
@@ -532,6 +628,10 @@ out_close_fd:
put_unused_fd(fd);
fput(f);
}
+
+ if (pidfd >= 0)
+ close_fd(pidfd);
+
return ret;
}
@@ -1077,6 +1177,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
#endif
return -EINVAL;
+ /*
+ * A pidfd can only be returned for a thread-group leader; thus
+ * FAN_REPORT_PIDFD and FAN_REPORT_TID need to remain mutually
+ * exclusive.
+ */
+ if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID))
+ return -EINVAL;
+
if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS)
return -EINVAL;
@@ -1478,7 +1586,7 @@ static int __init fanotify_user_setup(void)
FANOTIFY_DEFAULT_MAX_USER_MARKS);
BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
- BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 10);
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 11);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 30d422b8c0fc..963e6ce75b96 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -87,15 +87,15 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
if (iput_inode)
iput(iput_inode);
- /* Wait for outstanding inode references from connectors */
- wait_var_event(&sb->s_fsnotify_inode_refs,
- !atomic_long_read(&sb->s_fsnotify_inode_refs));
}
void fsnotify_sb_delete(struct super_block *sb)
{
fsnotify_unmount_inodes(sb);
fsnotify_clear_marks_by_sb(sb);
+ /* Wait for outstanding object references from connectors */
+ wait_var_event(&sb->s_fsnotify_connectors,
+ !atomic_long_read(&sb->s_fsnotify_connectors));
}
/*
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index ff2063ec6b0f..87d8a50ee803 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -27,6 +27,21 @@ static inline struct super_block *fsnotify_conn_sb(
return container_of(conn->obj, struct super_block, s_fsnotify_marks);
}
+static inline struct super_block *fsnotify_connector_sb(
+ struct fsnotify_mark_connector *conn)
+{
+ switch (conn->type) {
+ case FSNOTIFY_OBJ_TYPE_INODE:
+ return fsnotify_conn_inode(conn)->i_sb;
+ case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
+ return fsnotify_conn_mount(conn)->mnt.mnt_sb;
+ case FSNOTIFY_OBJ_TYPE_SB:
+ return fsnotify_conn_sb(conn);
+ default:
+ return NULL;
+ }
+}
+
/* destroy all events sitting in this groups notification queue */
extern void fsnotify_flush_notify(struct fsnotify_group *group);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 98f61b31745a..62051247f6d2 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -55,22 +55,27 @@ struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
#include <linux/sysctl.h>
+static long it_zero = 0;
+static long it_int_max = INT_MAX;
+
struct ctl_table inotify_table[] = {
{
.procname = "max_user_instances",
.data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &it_zero,
+ .extra2 = &it_int_max,
},
{
.procname = "max_user_watches",
.data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &it_zero,
+ .extra2 = &it_int_max,
},
{
.procname = "max_queued_events",
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index d32ab349db74..95006d1d29ab 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -169,6 +169,37 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
}
}
+static void fsnotify_get_inode_ref(struct inode *inode)
+{
+ ihold(inode);
+ atomic_long_inc(&inode->i_sb->s_fsnotify_connectors);
+}
+
+static void fsnotify_put_inode_ref(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+
+ iput(inode);
+ if (atomic_long_dec_and_test(&sb->s_fsnotify_connectors))
+ wake_up_var(&sb->s_fsnotify_connectors);
+}
+
+static void fsnotify_get_sb_connectors(struct fsnotify_mark_connector *conn)
+{
+ struct super_block *sb = fsnotify_connector_sb(conn);
+
+ if (sb)
+ atomic_long_inc(&sb->s_fsnotify_connectors);
+}
+
+static void fsnotify_put_sb_connectors(struct fsnotify_mark_connector *conn)
+{
+ struct super_block *sb = fsnotify_connector_sb(conn);
+
+ if (sb && atomic_long_dec_and_test(&sb->s_fsnotify_connectors))
+ wake_up_var(&sb->s_fsnotify_connectors);
+}
+
static void *fsnotify_detach_connector_from_object(
struct fsnotify_mark_connector *conn,
unsigned int *type)
@@ -182,13 +213,13 @@ static void *fsnotify_detach_connector_from_object(
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = fsnotify_conn_inode(conn);
inode->i_fsnotify_mask = 0;
- atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs);
} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
}
+ fsnotify_put_sb_connectors(conn);
rcu_assign_pointer(*(conn->obj), NULL);
conn->obj = NULL;
conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
@@ -209,19 +240,12 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
/* Drop object reference originally held by a connector */
static void fsnotify_drop_object(unsigned int type, void *objp)
{
- struct inode *inode;
- struct super_block *sb;
-
if (!objp)
return;
/* Currently only inode references are passed to be dropped */
if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
return;
- inode = objp;
- sb = inode->i_sb;
- iput(inode);
- if (atomic_long_dec_and_test(&sb->s_fsnotify_inode_refs))
- wake_up_var(&sb->s_fsnotify_inode_refs);
+ fsnotify_put_inode_ref(objp);
}
void fsnotify_put_mark(struct fsnotify_mark *mark)
@@ -493,8 +517,12 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
conn->fsid.val[0] = conn->fsid.val[1] = 0;
conn->flags = 0;
}
- if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
- inode = igrab(fsnotify_conn_inode(conn));
+ if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
+ inode = fsnotify_conn_inode(conn);
+ fsnotify_get_inode_ref(inode);
+ }
+ fsnotify_get_sb_connectors(conn);
+
/*
* cmpxchg() provides the barrier so that readers of *connp can see
* only initialized structure
@@ -502,7 +530,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
if (cmpxchg(connp, NULL, conn)) {
/* Someone else created list structure for us */
if (inode)
- iput(inode);
+ fsnotify_put_inode_ref(inode);
kmem_cache_free(fsnotify_mark_connector_cachep, conn);
}
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index fab7c6a4a7d0..73a3854b2afb 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -101,8 +101,6 @@ int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl)
if (!(fl->fl_flags & FL_FLOCK))
return -ENOLCK;
- if (__mandatory_lock(inode))
- return -ENOLCK;
if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) ||
ocfs2_mount_local(osb))
@@ -121,8 +119,6 @@ int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl)
if (!(fl->fl_flags & FL_POSIX))
return -ENOLCK;
- if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
- return -ENOLCK;
return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl);
}
diff --git a/fs/open.c b/fs/open.c
index 94bef26ff1b6..daa324606a41 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -105,9 +105,7 @@ long vfs_truncate(const struct path *path, loff_t length)
if (error)
goto put_write_and_out;
- error = locks_verify_truncate(inode, NULL, length);
- if (!error)
- error = security_path_truncate(path);
+ error = security_path_truncate(path);
if (!error)
error = do_truncate(mnt_userns, path->dentry, length, 0, NULL);
@@ -189,9 +187,7 @@ long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
if (IS_APPEND(file_inode(f.file)))
goto out_putf;
sb_start_write(inode->i_sb);
- error = locks_verify_truncate(inode, f.file, length);
- if (!error)
- error = security_path_truncate(&f.file->f_path);
+ error = security_path_truncate(&f.file->f_path);
if (!error)
error = do_truncate(file_mnt_user_ns(f.file), dentry, length,
ATTR_MTIME | ATTR_CTIME, f.file);
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 41ebf52f1bbc..ebde05c9cf62 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -392,6 +392,7 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
*/
take_dentry_name_snapshot(&name, real);
this = lookup_one_len(name.name.name, connected, name.name.len);
+ release_dentry_name_snapshot(&name);
err = PTR_ERR(this);
if (IS_ERR(this)) {
goto fail;
@@ -406,7 +407,6 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
}
out:
- release_dentry_name_snapshot(&name);
dput(parent);
inode_unlock(dir);
return this;
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 4d53d3b7e5fe..d081faa55e83 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -392,6 +392,51 @@ out_unlock:
return ret;
}
+/*
+ * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
+ * due to lock order inversion between pipe->mutex in iter_file_splice_write()
+ * and file_start_write(real.file) in ovl_write_iter().
+ *
+ * So do everything ovl_write_iter() does and call iter_file_splice_write() on
+ * the real file.
+ */
+static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
+{
+ struct fd real;
+ const struct cred *old_cred;
+ struct inode *inode = file_inode(out);
+ struct inode *realinode = ovl_inode_real(inode);
+ ssize_t ret;
+
+ inode_lock(inode);
+ /* Update mode */
+ ovl_copyattr(realinode, inode);
+ ret = file_remove_privs(out);
+ if (ret)
+ goto out_unlock;
+
+ ret = ovl_real_fdget(out, &real);
+ if (ret)
+ goto out_unlock;
+
+ old_cred = ovl_override_creds(inode->i_sb);
+ file_start_write(real.file);
+
+ ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
+
+ file_end_write(real.file);
+ /* Update size */
+ ovl_copyattr(realinode, inode);
+ revert_creds(old_cred);
+ fdput(real);
+
+out_unlock:
+ inode_unlock(inode);
+
+ return ret;
+}
+
static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct fd real;
@@ -603,7 +648,7 @@ const struct file_operations ovl_file_operations = {
.fadvise = ovl_fadvise,
.flush = ovl_flush,
.splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
+ .splice_write = ovl_splice_write,
.copy_file_range = ovl_copy_file_range,
.remap_file_range = ovl_remap_file_range,
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index e8ad2c2c77dd..150fdf3bc68d 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -481,6 +481,8 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
}
this = lookup_one_len(p->name, dir, p->len);
if (IS_ERR_OR_NULL(this) || !this->d_inode) {
+ /* Mark a stale entry */
+ p->is_whiteout = true;
if (IS_ERR(this)) {
err = PTR_ERR(this);
this = NULL;
@@ -776,6 +778,9 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
if (err)
goto out;
}
+ }
+ /* ovl_cache_update_ino() sets is_whiteout on stale entry */
+ if (!p->is_whiteout) {
if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
break;
}
diff --git a/fs/pipe.c b/fs/pipe.c
index 8e6ef62aeb1c..6d4342bad9f1 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -363,10 +363,9 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
* _very_ unlikely case that the pipe was full, but we got
* no data.
*/
- if (unlikely(was_full)) {
+ if (unlikely(was_full))
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
- kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
- }
+ kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
/*
* But because we didn't read anything, at this point we can
@@ -385,12 +384,11 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
wake_next_reader = false;
__pipe_unlock(pipe);
- if (was_full) {
+ if (was_full)
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
- kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
- }
if (wake_next_reader)
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
+ kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
if (ret > 0)
file_accessed(filp);
return ret;
@@ -444,9 +442,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
#endif
/*
- * Epoll nonsensically wants a wakeup whether the pipe
- * was already empty or not.
- *
* If it wasn't empty we try to merge new data into
* the last buffer.
*
@@ -455,9 +450,9 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
* spanning multiple pages.
*/
head = pipe->head;
- was_empty = true;
+ was_empty = pipe_empty(head, pipe->tail);
chars = total_len & (PAGE_SIZE-1);
- if (chars && !pipe_empty(head, pipe->tail)) {
+ if (chars && !was_empty) {
unsigned int mask = pipe->ring_size - 1;
struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
int offset = buf->offset + buf->len;
@@ -568,10 +563,9 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
* become empty while we dropped the lock.
*/
__pipe_unlock(pipe);
- if (was_empty) {
+ if (was_empty)
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- }
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
__pipe_lock(pipe);
was_empty = pipe_empty(pipe->head, pipe->tail);
@@ -590,11 +584,13 @@ out:
* This is particularly important for small writes, because of
* how (for example) the GNU make jobserver uses small writes to
* wake up pending jobs
+ *
+ * Epoll nonsensically wants a wakeup whether the pipe
+ * was already empty or not.
*/
- if (was_empty) {
+ if (was_empty || pipe->poll_usage)
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- }
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
if (wake_next_writer)
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
@@ -654,6 +650,9 @@ pipe_poll(struct file *filp, poll_table *wait)
struct pipe_inode_info *pipe = filp->private_data;
unsigned int head, tail;
+ /* Epoll has some historical nasty semantics, this enables them */
+ pipe->poll_usage = 1;
+
/*
* Reading pipe state only -- no need for acquiring the semaphore.
*
diff --git a/fs/read_write.c b/fs/read_write.c
index 9db7adf160d2..af057c57bdc6 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -365,12 +365,8 @@ out_putf:
int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
{
- struct inode *inode;
- int retval = -EINVAL;
-
- inode = file_inode(file);
if (unlikely((ssize_t) count < 0))
- return retval;
+ return -EINVAL;
/*
* ranged mandatory locking does not apply to streams - it makes sense
@@ -381,19 +377,12 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
if (unlikely(pos < 0)) {
if (!unsigned_offsets(file))
- return retval;
+ return -EINVAL;
if (count >= -pos) /* both values are in 0..LLONG_MAX */
return -EOVERFLOW;
} else if (unlikely((loff_t) (pos + count) < 0)) {
if (!unsigned_offsets(file))
- return retval;
- }
-
- if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
- retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
- read_write == READ ? F_RDLCK : F_WRLCK);
- if (retval < 0)
- return retval;
+ return -EINVAL;
}
}
diff --git a/fs/remap_range.c b/fs/remap_range.c
index e4a5fdd7ad7b..6d4a9beaa097 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -99,24 +99,12 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in,
static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
bool write)
{
- struct inode *inode = file_inode(file);
-
if (unlikely(pos < 0 || len < 0))
return -EINVAL;
if (unlikely((loff_t) (pos + len) < 0))
return -EINVAL;
- if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
- loff_t end = len ? pos + len - 1 : OFFSET_MAX;
- int retval;
-
- retval = locks_mandatory_area(inode, file, pos, end,
- write ? F_WRLCK : F_RDLCK);
- if (retval < 0)
- return retval;
- }
-
return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
}
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 855f0e87066d..2db8bcf7ff85 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -49,8 +49,7 @@ static int copy_bio_to_actor(struct bio *bio,
bytes_to_copy = min_t(int, bytes_to_copy,
req_length - copied_bytes);
- memcpy(actor_addr + actor_offset,
- page_address(bvec->bv_page) + bvec->bv_offset + offset,
+ memcpy(actor_addr + actor_offset, bvec_virt(bvec) + offset,
bytes_to_copy);
actor_offset += bytes_to_copy;
@@ -177,7 +176,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
goto out_free_bio;
}
/* Extract the length of the metadata block */
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
length = data[offset];
if (offset < bvec->bv_len - 1) {
length |= data[offset + 1] << 8;
@@ -186,7 +185,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
res = -EIO;
goto out_free_bio;
}
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
length |= data[0] << 8;
}
bio_free_pages(bio);
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index 233d5582fbee..b685b6238316 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -101,7 +101,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
while (bio_next_segment(bio, &iter_all)) {
int avail = min(bytes, ((int)bvec->bv_len) - offset);
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
memcpy(buff, data + offset, avail);
buff += avail;
bytes -= avail;
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 97bb7d92ddcd..cb510a631968 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -76,7 +76,7 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
while (bio_next_segment(bio, &iter_all)) {
int avail = min(bytes, ((int)bvec->bv_len) - offset);
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
memcpy(buff, data + offset, avail);
buff += avail;
bytes -= avail;
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index e80419aed862..68f6d09bb3a2 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -146,7 +146,7 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
}
avail = min(length, ((int)bvec->bv_len) - offset);
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
length -= avail;
stream->buf.in = data + offset;
stream->buf.in_size = avail;
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index bcb881ec47f2..a20e9042146b 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -76,7 +76,7 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
}
avail = min(length, ((int)bvec->bv_len) - offset);
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
length -= avail;
stream->next_in = data + offset;
stream->avail_in = avail;
diff --git a/fs/squashfs/zstd_wrapper.c b/fs/squashfs/zstd_wrapper.c
index b7cb1faa652d..0015cf8b5582 100644
--- a/fs/squashfs/zstd_wrapper.c
+++ b/fs/squashfs/zstd_wrapper.c
@@ -94,7 +94,7 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm,
}
avail = min(length, ((int)bvec->bv_len) - offset);
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
length -= avail;
in_buf.src = data + offset;
in_buf.size = avail;
diff --git a/fs/super.c b/fs/super.c
index 91b7f156735b..bcef3a6f4c4b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1203,7 +1203,7 @@ static int set_bdev_super(struct super_block *s, void *data)
{
s->s_bdev = data;
s->s_dev = s->s_bdev->bd_dev;
- s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
+ s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi);
if (blk_queue_stable_writes(s->s_bdev->bd_disk->queue))
s->s_iflags |= SB_I_STABLE_WRITES;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index c5509d2448e3..e9c96a0c79f1 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -115,6 +115,22 @@ void timerfd_clock_was_set(void)
rcu_read_unlock();
}
+static void timerfd_resume_work(struct work_struct *work)
+{
+ timerfd_clock_was_set();
+}
+
+static DECLARE_WORK(timerfd_work, timerfd_resume_work);
+
+/*
+ * Invoked from timekeeping_resume(). Defer the actual update to work so
+ * timerfd_clock_was_set() runs in task context.
+ */
+void timerfd_resume(void)
+{
+ schedule_work(&timerfd_work);
+}
+
static void __timerfd_remove_cancel(struct timerfd_ctx *ctx)
{
if (ctx->might_cancel) {
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 2e4e1d159969..5cfa28cd00cd 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1630,6 +1630,17 @@ static const char *ubifs_get_link(struct dentry *dentry,
return fscrypt_get_symlink(inode, ui->data, ui->data_len, done);
}
+static int ubifs_symlink_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
+{
+ ubifs_getattr(mnt_userns, path, stat, request_mask, query_flags);
+
+ if (IS_ENCRYPTED(d_inode(path->dentry)))
+ return fscrypt_symlink_getattr(path, stat);
+ return 0;
+}
+
const struct address_space_operations ubifs_file_address_operations = {
.readpage = ubifs_readpage,
.writepage = ubifs_writepage,
@@ -1655,7 +1666,7 @@ const struct inode_operations ubifs_file_inode_operations = {
const struct inode_operations ubifs_symlink_inode_operations = {
.get_link = ubifs_get_link,
.setattr = ubifs_setattr,
- .getattr = ubifs_getattr,
+ .getattr = ubifs_symlink_getattr,
.listxattr = ubifs_listxattr,
.update_time = ubifs_update_time,
};
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index c19dba45aa20..70abdfad2df1 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -35,7 +35,6 @@
#include "udf_i.h"
#include "udf_sb.h"
-
static int udf_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *dir = file_inode(file);
@@ -135,7 +134,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
lfi = cfi.lengthFileIdent;
if (fibh.sbh == fibh.ebh) {
- nameptr = fi->fileIdent + liu;
+ nameptr = udf_get_fi_ident(fi);
} else {
int poffset; /* Unpaded ending offset */
@@ -153,7 +152,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
}
}
nameptr = copy_name;
- memcpy(nameptr, fi->fileIdent + liu,
+ memcpy(nameptr, udf_get_fi_ident(fi),
lfi - poffset);
memcpy(nameptr + lfi - poffset,
fibh.ebh->b_data, poffset);
diff --git a/fs/udf/ecma_167.h b/fs/udf/ecma_167.h
index 185c3e247648..de17a97e8667 100644
--- a/fs/udf/ecma_167.h
+++ b/fs/udf/ecma_167.h
@@ -307,14 +307,14 @@ struct logicalVolDesc {
struct regid impIdent;
uint8_t impUse[128];
struct extent_ad integritySeqExt;
- uint8_t partitionMaps[0];
+ uint8_t partitionMaps[];
} __packed;
/* Generic Partition Map (ECMA 167r3 3/10.7.1) */
struct genericPartitionMap {
uint8_t partitionMapType;
uint8_t partitionMapLength;
- uint8_t partitionMapping[0];
+ uint8_t partitionMapping[];
} __packed;
/* Partition Map Type (ECMA 167r3 3/10.7.1.1) */
@@ -342,7 +342,7 @@ struct unallocSpaceDesc {
struct tag descTag;
__le32 volDescSeqNum;
__le32 numAllocDescs;
- struct extent_ad allocDescs[0];
+ struct extent_ad allocDescs[];
} __packed;
/* Terminating Descriptor (ECMA 167r3 3/10.9) */
@@ -360,9 +360,9 @@ struct logicalVolIntegrityDesc {
uint8_t logicalVolContentsUse[32];
__le32 numOfPartitions;
__le32 lengthOfImpUse;
- __le32 freeSpaceTable[0];
- __le32 sizeTable[0];
- uint8_t impUse[0];
+ __le32 freeSpaceTable[];
+ /* __le32 sizeTable[]; */
+ /* uint8_t impUse[]; */
} __packed;
/* Integrity Type (ECMA 167r3 3/10.10.3) */
@@ -471,9 +471,9 @@ struct fileIdentDesc {
uint8_t lengthFileIdent;
struct long_ad icb;
__le16 lengthOfImpUse;
- uint8_t impUse[0];
- uint8_t fileIdent[0];
- uint8_t padding[0];
+ uint8_t impUse[];
+ /* uint8_t fileIdent[]; */
+ /* uint8_t padding[]; */
} __packed;
/* File Characteristics (ECMA 167r3 4/14.4.3) */
@@ -578,8 +578,8 @@ struct fileEntry {
__le64 uniqueID;
__le32 lengthExtendedAttr;
__le32 lengthAllocDescs;
- uint8_t extendedAttr[0];
- uint8_t allocDescs[0];
+ uint8_t extendedAttr[];
+ /* uint8_t allocDescs[]; */
} __packed;
/* Permissions (ECMA 167r3 4/14.9.5) */
@@ -632,7 +632,7 @@ struct genericFormat {
uint8_t attrSubtype;
uint8_t reserved[3];
__le32 attrLength;
- uint8_t attrData[0];
+ uint8_t attrData[];
} __packed;
/* Character Set Information (ECMA 167r3 4/14.10.3) */
@@ -643,7 +643,7 @@ struct charSetInfo {
__le32 attrLength;
__le32 escapeSeqLength;
uint8_t charSetType;
- uint8_t escapeSeq[0];
+ uint8_t escapeSeq[];
} __packed;
/* Alternate Permissions (ECMA 167r3 4/14.10.4) */
@@ -682,7 +682,7 @@ struct infoTimesExtAttr {
__le32 attrLength;
__le32 dataLength;
__le32 infoTimeExistence;
- uint8_t infoTimes[0];
+ uint8_t infoTimes[];
} __packed;
/* Device Specification (ECMA 167r3 4/14.10.7) */
@@ -694,7 +694,7 @@ struct deviceSpec {
__le32 impUseLength;
__le32 majorDeviceIdent;
__le32 minorDeviceIdent;
- uint8_t impUse[0];
+ uint8_t impUse[];
} __packed;
/* Implementation Use Extended Attr (ECMA 167r3 4/14.10.8) */
@@ -705,7 +705,7 @@ struct impUseExtAttr {
__le32 attrLength;
__le32 impUseLength;
struct regid impIdent;
- uint8_t impUse[0];
+ uint8_t impUse[];
} __packed;
/* Application Use Extended Attribute (ECMA 167r3 4/14.10.9) */
@@ -716,7 +716,7 @@ struct appUseExtAttr {
__le32 attrLength;
__le32 appUseLength;
struct regid appIdent;
- uint8_t appUse[0];
+ uint8_t appUse[];
} __packed;
#define EXTATTR_CHAR_SET 1
@@ -733,7 +733,7 @@ struct unallocSpaceEntry {
struct tag descTag;
struct icbtag icbTag;
__le32 lengthAllocDescs;
- uint8_t allocDescs[0];
+ uint8_t allocDescs[];
} __packed;
/* Space Bitmap Descriptor (ECMA 167r3 4/14.12) */
@@ -741,7 +741,7 @@ struct spaceBitmapDesc {
struct tag descTag;
__le32 numOfBits;
__le32 numOfBytes;
- uint8_t bitmap[0];
+ uint8_t bitmap[];
} __packed;
/* Partition Integrity Entry (ECMA 167r3 4/14.13) */
@@ -780,7 +780,7 @@ struct pathComponent {
uint8_t componentType;
uint8_t lengthComponentIdent;
__le16 componentFileVersionNum;
- dchars componentIdent[0];
+ dchars componentIdent[];
} __packed;
/* File Entry (ECMA 167r3 4/14.17) */
@@ -809,8 +809,8 @@ struct extendedFileEntry {
__le64 uniqueID;
__le32 lengthExtendedAttr;
__le32 lengthAllocDescs;
- uint8_t extendedAttr[0];
- uint8_t allocDescs[0];
+ uint8_t extendedAttr[];
+ /* uint8_t allocDescs[]; */
} __packed;
#endif /* _ECMA_167_H */
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 4917670860a0..1d6b7a50736b 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -390,8 +390,7 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode,
dfibh.eoffset += (sfibh.eoffset - sfibh.soffset);
dfi = (struct fileIdentDesc *)(dbh->b_data + dfibh.soffset);
if (udf_write_fi(inode, sfi, dfi, &dfibh, sfi->impUse,
- sfi->fileIdent +
- le16_to_cpu(sfi->lengthOfImpUse))) {
+ udf_get_fi_ident(sfi))) {
iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
brelse(dbh);
return NULL;
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index eab94527340d..1614d308d0f0 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -173,13 +173,22 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type,
else
offset = le32_to_cpu(eahd->appAttrLocation);
- while (offset < iinfo->i_lenEAttr) {
+ while (offset + sizeof(*gaf) < iinfo->i_lenEAttr) {
+ uint32_t attrLength;
+
gaf = (struct genericFormat *)&ea[offset];
+ attrLength = le32_to_cpu(gaf->attrLength);
+
+ /* Detect undersized elements and buffer overflows */
+ if ((attrLength < sizeof(*gaf)) ||
+ (attrLength > (iinfo->i_lenEAttr - offset)))
+ break;
+
if (le32_to_cpu(gaf->attrType) == type &&
gaf->attrSubtype == subtype)
return gaf;
else
- offset += le32_to_cpu(gaf->attrLength);
+ offset += attrLength;
}
}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 7c7c9bbbfa57..caeef08efed2 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -74,12 +74,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
if (fileident) {
if (adinicb || (offset + lfi < 0)) {
- memcpy((uint8_t *)sfi->fileIdent + liu, fileident, lfi);
+ memcpy(udf_get_fi_ident(sfi), fileident, lfi);
} else if (offset >= 0) {
memcpy(fibh->ebh->b_data + offset, fileident, lfi);
} else {
- memcpy((uint8_t *)sfi->fileIdent + liu, fileident,
- -offset);
+ memcpy(udf_get_fi_ident(sfi), fileident, -offset);
memcpy(fibh->ebh->b_data, fileident - offset,
lfi + offset);
}
@@ -88,11 +87,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
offset += lfi;
if (adinicb || (offset + padlen < 0)) {
- memset((uint8_t *)sfi->padding + liu + lfi, 0x00, padlen);
+ memset(udf_get_fi_ident(sfi) + lfi, 0x00, padlen);
} else if (offset >= 0) {
memset(fibh->ebh->b_data + offset, 0x00, padlen);
} else {
- memset((uint8_t *)sfi->padding + liu + lfi, 0x00, -offset);
+ memset(udf_get_fi_ident(sfi) + lfi, 0x00, -offset);
memset(fibh->ebh->b_data, 0x00, padlen + offset);
}
@@ -226,7 +225,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
lfi = cfi->lengthFileIdent;
if (fibh->sbh == fibh->ebh) {
- nameptr = fi->fileIdent + liu;
+ nameptr = udf_get_fi_ident(fi);
} else {
int poffset; /* Unpaded ending offset */
@@ -246,7 +245,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
}
}
nameptr = copy_name;
- memcpy(nameptr, fi->fileIdent + liu,
+ memcpy(nameptr, udf_get_fi_ident(fi),
lfi - poffset);
memcpy(nameptr + lfi - poffset,
fibh->ebh->b_data, poffset);
diff --git a/fs/udf/osta_udf.h b/fs/udf/osta_udf.h
index 22bc4fb2feb9..157de0ec0cd5 100644
--- a/fs/udf/osta_udf.h
+++ b/fs/udf/osta_udf.h
@@ -111,7 +111,7 @@ struct logicalVolIntegrityDescImpUse {
__le16 minUDFReadRev;
__le16 minUDFWriteRev;
__le16 maxUDFWriteRev;
- uint8_t impUse[0];
+ uint8_t impUse[];
} __packed;
/* Implementation Use Volume Descriptor (UDF 2.60 2.2.7) */
@@ -178,15 +178,6 @@ struct metadataPartitionMap {
uint8_t reserved2[5];
} __packed;
-/* Virtual Allocation Table (UDF 1.5 2.2.10) */
-struct virtualAllocationTable15 {
- __le32 vatEntry[0];
- struct regid vatIdent;
- __le32 previousVATICBLoc;
-} __packed;
-
-#define ICBTAG_FILE_TYPE_VAT15 0x00U
-
/* Virtual Allocation Table (UDF 2.60 2.2.11) */
struct virtualAllocationTable20 {
__le16 lengthHeader;
@@ -199,8 +190,8 @@ struct virtualAllocationTable20 {
__le16 minUDFWriteRev;
__le16 maxUDFWriteRev;
__le16 reserved;
- uint8_t impUse[0];
- __le32 vatEntry[0];
+ uint8_t impUse[];
+ /* __le32 vatEntry[]; */
} __packed;
#define ICBTAG_FILE_TYPE_VAT20 0xF8U
@@ -217,8 +208,7 @@ struct sparingTable {
__le16 reallocationTableLen;
__le16 reserved;
__le32 sequenceNum;
- struct sparingEntry
- mapEntry[0];
+ struct sparingEntry mapEntry[];
} __packed;
/* Metadata File (and Metadata Mirror File) (UDF 2.60 2.2.13.1) */
@@ -241,7 +231,7 @@ struct allocDescImpUse {
/* FreeEASpace (UDF 2.60 3.3.4.5.1.1) */
struct freeEaSpace {
__le16 headerChecksum;
- uint8_t freeEASpace[0];
+ uint8_t freeEASpace[];
} __packed;
/* DVD Copyright Management Information (UDF 2.60 3.3.4.5.1.2) */
@@ -265,7 +255,7 @@ struct LVExtensionEA {
/* FreeAppEASpace (UDF 2.60 3.3.4.6.1) */
struct freeAppEASpace {
__le16 headerChecksum;
- uint8_t freeEASpace[0];
+ uint8_t freeEASpace[];
} __packed;
/* UDF Defined System Stream (UDF 2.60 3.3.7) */
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 2f83c1204e20..b2d7c57d0688 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -108,16 +108,10 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb)
return NULL;
lvid = (struct logicalVolIntegrityDesc *)UDF_SB(sb)->s_lvid_bh->b_data;
partnum = le32_to_cpu(lvid->numOfPartitions);
- if ((sb->s_blocksize - sizeof(struct logicalVolIntegrityDescImpUse) -
- offsetof(struct logicalVolIntegrityDesc, impUse)) /
- (2 * sizeof(uint32_t)) < partnum) {
- udf_err(sb, "Logical volume integrity descriptor corrupted "
- "(numOfPartitions = %u)!\n", partnum);
- return NULL;
- }
/* The offset is to skip freeSpaceTable and sizeTable arrays */
offset = partnum * 2 * sizeof(uint32_t);
- return (struct logicalVolIntegrityDescImpUse *)&(lvid->impUse[offset]);
+ return (struct logicalVolIntegrityDescImpUse *)
+ (((uint8_t *)(lvid + 1)) + offset);
}
/* UDF filesystem type */
@@ -349,10 +343,10 @@ static int udf_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",lastblock=%u", sbi->s_last_block);
if (sbi->s_anchor != 0)
seq_printf(seq, ",anchor=%u", sbi->s_anchor);
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8))
- seq_puts(seq, ",utf8");
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP) && sbi->s_nls_map)
+ if (sbi->s_nls_map)
seq_printf(seq, ",iocharset=%s", sbi->s_nls_map->charset);
+ else
+ seq_puts(seq, ",iocharset=utf8");
return 0;
}
@@ -558,19 +552,24 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
/* Ignored (never implemented properly) */
break;
case Opt_utf8:
- uopt->flags |= (1 << UDF_FLAG_UTF8);
+ if (!remount) {
+ unload_nls(uopt->nls_map);
+ uopt->nls_map = NULL;
+ }
break;
case Opt_iocharset:
if (!remount) {
- if (uopt->nls_map)
- unload_nls(uopt->nls_map);
- /*
- * load_nls() failure is handled later in
- * udf_fill_super() after all options are
- * parsed.
- */
+ unload_nls(uopt->nls_map);
+ uopt->nls_map = NULL;
+ }
+ /* When nls_map is not loaded then UTF-8 is used */
+ if (!remount && strcmp(args[0].from, "utf8") != 0) {
uopt->nls_map = load_nls(args[0].from);
- uopt->flags |= (1 << UDF_FLAG_NLS_MAP);
+ if (!uopt->nls_map) {
+ pr_err("iocharset %s not found\n",
+ args[0].from);
+ return 0;
+ }
}
break;
case Opt_uforget:
@@ -1542,6 +1541,7 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_
struct udf_sb_info *sbi = UDF_SB(sb);
struct logicalVolIntegrityDesc *lvid;
int indirections = 0;
+ u32 parts, impuselen;
while (++indirections <= UDF_MAX_LVID_NESTING) {
final_bh = NULL;
@@ -1568,15 +1568,27 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_
lvid = (struct logicalVolIntegrityDesc *)final_bh->b_data;
if (lvid->nextIntegrityExt.extLength == 0)
- return;
+ goto check;
loc = leea_to_cpu(lvid->nextIntegrityExt);
}
udf_warn(sb, "Too many LVID indirections (max %u), ignoring.\n",
UDF_MAX_LVID_NESTING);
+out_err:
brelse(sbi->s_lvid_bh);
sbi->s_lvid_bh = NULL;
+ return;
+check:
+ parts = le32_to_cpu(lvid->numOfPartitions);
+ impuselen = le32_to_cpu(lvid->lengthOfImpUse);
+ if (parts >= sb->s_blocksize || impuselen >= sb->s_blocksize ||
+ sizeof(struct logicalVolIntegrityDesc) + impuselen +
+ 2 * parts * sizeof(u32) > sb->s_blocksize) {
+ udf_warn(sb, "Corrupted LVID (parts=%u, impuselen=%u), "
+ "ignoring.\n", parts, impuselen);
+ goto out_err;
+ }
}
/*
@@ -2139,21 +2151,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
if (!udf_parse_options((char *)options, &uopt, false))
goto parse_options_failure;
- if (uopt.flags & (1 << UDF_FLAG_UTF8) &&
- uopt.flags & (1 << UDF_FLAG_NLS_MAP)) {
- udf_err(sb, "utf8 cannot be combined with iocharset\n");
- goto parse_options_failure;
- }
- if ((uopt.flags & (1 << UDF_FLAG_NLS_MAP)) && !uopt.nls_map) {
- uopt.nls_map = load_nls_default();
- if (!uopt.nls_map)
- uopt.flags &= ~(1 << UDF_FLAG_NLS_MAP);
- else
- udf_debug("Using default NLS map\n");
- }
- if (!(uopt.flags & (1 << UDF_FLAG_NLS_MAP)))
- uopt.flags |= (1 << UDF_FLAG_UTF8);
-
fileset.logicalBlockNum = 0xFFFFFFFF;
fileset.partitionReferenceNum = 0xFFFF;
@@ -2308,8 +2305,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
error_out:
iput(sbi->s_vat_inode);
parse_options_failure:
- if (uopt.nls_map)
- unload_nls(uopt.nls_map);
+ unload_nls(uopt.nls_map);
if (lvid_open)
udf_close_lvid(sb);
brelse(sbi->s_lvid_bh);
@@ -2359,8 +2355,7 @@ static void udf_put_super(struct super_block *sb)
sbi = UDF_SB(sb);
iput(sbi->s_vat_inode);
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
- unload_nls(sbi->s_nls_map);
+ unload_nls(sbi->s_nls_map);
if (!sb_rdonly(sb))
udf_close_lvid(sb);
brelse(sbi->s_lvid_bh);
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 758efe557a19..4fa620543d30 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -20,8 +20,6 @@
#define UDF_FLAG_UNDELETE 6
#define UDF_FLAG_UNHIDE 7
#define UDF_FLAG_VARCONV 8
-#define UDF_FLAG_NLS_MAP 9
-#define UDF_FLAG_UTF8 10
#define UDF_FLAG_UID_FORGET 11 /* save -1 for uid to disk */
#define UDF_FLAG_GID_FORGET 12
#define UDF_FLAG_UID_SET 13
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 9dd0814f1077..7e258f15b8ef 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -130,6 +130,10 @@ static inline unsigned int udf_dir_entry_len(struct fileIdentDesc *cfi)
le16_to_cpu(cfi->lengthOfImpUse) + cfi->lengthFileIdent,
UDF_NAME_PAD);
}
+static inline uint8_t *udf_get_fi_ident(struct fileIdentDesc *fi)
+{
+ return ((uint8_t *)(fi + 1)) + le16_to_cpu(fi->lengthOfImpUse);
+}
/* file.c */
extern long udf_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 5fcfa96463eb..622569007b53 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -177,7 +177,7 @@ static int udf_name_from_CS0(struct super_block *sb,
return 0;
}
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
+ if (UDF_SB(sb)->s_nls_map)
conv_f = UDF_SB(sb)->s_nls_map->uni2char;
else
conv_f = NULL;
@@ -285,7 +285,7 @@ static int udf_name_to_CS0(struct super_block *sb,
if (ocu_max_len <= 0)
return 0;
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
+ if (UDF_SB(sb)->s_nls_map)
conv_f = UDF_SB(sb)->s_nls_map->char2uni;
else
conv_f = NULL;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 213a97a921bb..1cd3f940fa6a 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1626,7 +1626,6 @@ xfs_swap_extents(
struct xfs_bstat *sbp = &sxp->sx_stat;
int src_log_flags, target_log_flags;
int error = 0;
- int lock_flags;
uint64_t f;
int resblks = 0;
unsigned int flags = 0;
@@ -1638,8 +1637,8 @@ xfs_swap_extents(
* do the rest of the checks.
*/
lock_two_nondirectories(VFS_I(ip), VFS_I(tip));
- lock_flags = XFS_MMAPLOCK_EXCL;
- xfs_lock_two_inodes(ip, XFS_MMAPLOCK_EXCL, tip, XFS_MMAPLOCK_EXCL);
+ filemap_invalidate_lock_two(VFS_I(ip)->i_mapping,
+ VFS_I(tip)->i_mapping);
/* Verify that both files have the same format */
if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) {
@@ -1711,7 +1710,6 @@ xfs_swap_extents(
* or cancel will unlock the inodes from this point onwards.
*/
xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL);
- lock_flags |= XFS_ILOCK_EXCL;
xfs_trans_ijoin(tp, ip, 0);
xfs_trans_ijoin(tp, tip, 0);
@@ -1830,13 +1828,16 @@ xfs_swap_extents(
trace_xfs_swap_extent_after(ip, 0);
trace_xfs_swap_extent_after(tip, 1);
+out_unlock_ilock:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(tip, XFS_ILOCK_EXCL);
out_unlock:
- xfs_iunlock(ip, lock_flags);
- xfs_iunlock(tip, lock_flags);
+ filemap_invalidate_unlock_two(VFS_I(ip)->i_mapping,
+ VFS_I(tip)->i_mapping);
unlock_two_nondirectories(VFS_I(ip), VFS_I(tip));
return error;
out_trans_cancel:
xfs_trans_cancel(tp);
- goto out_unlock;
+ goto out_unlock_ilock;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8ff42b3585e0..3ab73567a0f5 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -844,7 +844,7 @@ xfs_buf_readahead_map(
{
struct xfs_buf *bp;
- if (bdi_read_congested(target->bt_bdev->bd_bdi))
+ if (bdi_read_congested(target->bt_bdev->bd_disk->bdi))
return;
xfs_buf_read_map(target, map, nmaps,
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index cc3cfb12df53..3dfbdcdb0d1c 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1302,7 +1302,7 @@ xfs_file_llseek(
*
* mmap_lock (MM)
* sb_start_pagefault(vfs, freeze)
- * i_mmaplock (XFS - truncate serialisation)
+ * invalidate_lock (vfs/XFS_MMAPLOCK - truncate serialisation)
* page_lock (MM)
* i_lock (XFS - extent map serialisation)
*/
@@ -1323,24 +1323,27 @@ __xfs_filemap_fault(
file_update_time(vmf->vma->vm_file);
}
- xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) {
pfn_t pfn;
+ xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL,
(write_fault && !vmf->cow_page) ?
&xfs_direct_write_iomap_ops :
&xfs_read_iomap_ops);
if (ret & VM_FAULT_NEEDDSYNC)
ret = dax_finish_sync_fault(vmf, pe_size, pfn);
+ xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
} else {
- if (write_fault)
+ if (write_fault) {
+ xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
ret = iomap_page_mkwrite(vmf,
&xfs_buffered_write_iomap_ops);
- else
+ xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+ } else {
ret = filemap_fault(vmf);
+ }
}
- xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (write_fault)
sb_end_pagefault(inode->i_sb);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 990b72ae3635..f00145e1a976 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -132,7 +132,7 @@ xfs_ilock_attr_map_shared(
/*
* In addition to i_rwsem in the VFS inode, the xfs inode contains 2
- * multi-reader locks: i_mmap_lock and the i_lock. This routine allows
+ * multi-reader locks: invalidate_lock and the i_lock. This routine allows
* various combinations of the locks to be obtained.
*
* The 3 locks should always be ordered so that the IO lock is obtained first,
@@ -140,23 +140,23 @@ xfs_ilock_attr_map_shared(
*
* Basic locking order:
*
- * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock
+ * i_rwsem -> invalidate_lock -> page_lock -> i_ilock
*
* mmap_lock locking order:
*
* i_rwsem -> page lock -> mmap_lock
- * mmap_lock -> i_mmap_lock -> page_lock
+ * mmap_lock -> invalidate_lock -> page_lock
*
* The difference in mmap_lock locking order mean that we cannot hold the
- * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
- * fault in pages during copy in/out (for buffered IO) or require the mmap_lock
- * in get_user_pages() to map the user pages into the kernel address space for
- * direct IO. Similarly the i_rwsem cannot be taken inside a page fault because
- * page faults already hold the mmap_lock.
+ * invalidate_lock over syscall based read(2)/write(2) based IO. These IO paths
+ * can fault in pages during copy in/out (for buffered IO) or require the
+ * mmap_lock in get_user_pages() to map the user pages into the kernel address
+ * space for direct IO. Similarly the i_rwsem cannot be taken inside a page
+ * fault because page faults already hold the mmap_lock.
*
* Hence to serialise fully against both syscall and mmap based IO, we need to
- * take both the i_rwsem and the i_mmap_lock. These locks should *only* be both
- * taken in places where we need to invalidate the page cache in a race
+ * take both the i_rwsem and the invalidate_lock. These locks should *only* be
+ * both taken in places where we need to invalidate the page cache in a race
* free manner (e.g. truncate, hole punch and other extent manipulation
* functions).
*/
@@ -188,10 +188,13 @@ xfs_ilock(
XFS_IOLOCK_DEP(lock_flags));
}
- if (lock_flags & XFS_MMAPLOCK_EXCL)
- mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
- else if (lock_flags & XFS_MMAPLOCK_SHARED)
- mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
+ if (lock_flags & XFS_MMAPLOCK_EXCL) {
+ down_write_nested(&VFS_I(ip)->i_mapping->invalidate_lock,
+ XFS_MMAPLOCK_DEP(lock_flags));
+ } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
+ down_read_nested(&VFS_I(ip)->i_mapping->invalidate_lock,
+ XFS_MMAPLOCK_DEP(lock_flags));
+ }
if (lock_flags & XFS_ILOCK_EXCL)
mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
@@ -240,10 +243,10 @@ xfs_ilock_nowait(
}
if (lock_flags & XFS_MMAPLOCK_EXCL) {
- if (!mrtryupdate(&ip->i_mmaplock))
+ if (!down_write_trylock(&VFS_I(ip)->i_mapping->invalidate_lock))
goto out_undo_iolock;
} else if (lock_flags & XFS_MMAPLOCK_SHARED) {
- if (!mrtryaccess(&ip->i_mmaplock))
+ if (!down_read_trylock(&VFS_I(ip)->i_mapping->invalidate_lock))
goto out_undo_iolock;
}
@@ -258,9 +261,9 @@ xfs_ilock_nowait(
out_undo_mmaplock:
if (lock_flags & XFS_MMAPLOCK_EXCL)
- mrunlock_excl(&ip->i_mmaplock);
+ up_write(&VFS_I(ip)->i_mapping->invalidate_lock);
else if (lock_flags & XFS_MMAPLOCK_SHARED)
- mrunlock_shared(&ip->i_mmaplock);
+ up_read(&VFS_I(ip)->i_mapping->invalidate_lock);
out_undo_iolock:
if (lock_flags & XFS_IOLOCK_EXCL)
up_write(&VFS_I(ip)->i_rwsem);
@@ -307,9 +310,9 @@ xfs_iunlock(
up_read(&VFS_I(ip)->i_rwsem);
if (lock_flags & XFS_MMAPLOCK_EXCL)
- mrunlock_excl(&ip->i_mmaplock);
+ up_write(&VFS_I(ip)->i_mapping->invalidate_lock);
else if (lock_flags & XFS_MMAPLOCK_SHARED)
- mrunlock_shared(&ip->i_mmaplock);
+ up_read(&VFS_I(ip)->i_mapping->invalidate_lock);
if (lock_flags & XFS_ILOCK_EXCL)
mrunlock_excl(&ip->i_lock);
@@ -335,7 +338,7 @@ xfs_ilock_demote(
if (lock_flags & XFS_ILOCK_EXCL)
mrdemote(&ip->i_lock);
if (lock_flags & XFS_MMAPLOCK_EXCL)
- mrdemote(&ip->i_mmaplock);
+ downgrade_write(&VFS_I(ip)->i_mapping->invalidate_lock);
if (lock_flags & XFS_IOLOCK_EXCL)
downgrade_write(&VFS_I(ip)->i_rwsem);
@@ -343,9 +346,29 @@ xfs_ilock_demote(
}
#if defined(DEBUG) || defined(XFS_WARN)
-int
+static inline bool
+__xfs_rwsem_islocked(
+ struct rw_semaphore *rwsem,
+ bool shared)
+{
+ if (!debug_locks)
+ return rwsem_is_locked(rwsem);
+
+ if (!shared)
+ return lockdep_is_held_type(rwsem, 0);
+
+ /*
+ * We are checking that the lock is held at least in shared
+ * mode but don't care that it might be held exclusively
+ * (i.e. shared | excl). Hence we check if the lock is held
+ * in any mode rather than an explicit shared mode.
+ */
+ return lockdep_is_held_type(rwsem, -1);
+}
+
+bool
xfs_isilocked(
- xfs_inode_t *ip,
+ struct xfs_inode *ip,
uint lock_flags)
{
if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
@@ -355,20 +378,17 @@ xfs_isilocked(
}
if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
- if (!(lock_flags & XFS_MMAPLOCK_SHARED))
- return !!ip->i_mmaplock.mr_writer;
- return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
+ return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
+ (lock_flags & XFS_IOLOCK_SHARED));
}
- if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
- if (!(lock_flags & XFS_IOLOCK_SHARED))
- return !debug_locks ||
- lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0);
- return rwsem_is_locked(&VFS_I(ip)->i_rwsem);
+ if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) {
+ return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
+ (lock_flags & XFS_IOLOCK_SHARED));
}
ASSERT(0);
- return 0;
+ return false;
}
#endif
@@ -532,12 +552,10 @@ again:
}
/*
- * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
- * the mmaplock or the ilock, but not more than one type at a time. If we lock
- * more than one at a time, lockdep will report false positives saying we have
- * violated locking orders. The iolock must be double-locked separately since
- * we use i_rwsem for that. We now support taking one lock EXCL and the other
- * SHARED.
+ * xfs_lock_two_inodes() can only be used to lock ilock. The iolock and
+ * mmaplock must be double-locked separately since we use i_rwsem and
+ * invalidate_lock for that. We now support taking one lock EXCL and the
+ * other SHARED.
*/
void
xfs_lock_two_inodes(
@@ -555,15 +573,8 @@ xfs_lock_two_inodes(
ASSERT(hweight32(ip1_mode) == 1);
ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
- ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
-
+ ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
+ ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
ASSERT(ip0->i_ino != ip1->i_ino);
if (ip0->i_ino > ip1->i_ino) {
@@ -3741,11 +3752,8 @@ xfs_ilock2_io_mmap(
ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2));
if (ret)
return ret;
- if (ip1 == ip2)
- xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
- else
- xfs_lock_two_inodes(ip1, XFS_MMAPLOCK_EXCL,
- ip2, XFS_MMAPLOCK_EXCL);
+ filemap_invalidate_lock_two(VFS_I(ip1)->i_mapping,
+ VFS_I(ip2)->i_mapping);
return 0;
}
@@ -3755,12 +3763,9 @@ xfs_iunlock2_io_mmap(
struct xfs_inode *ip1,
struct xfs_inode *ip2)
{
- bool same_inode = (ip1 == ip2);
-
- xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
- if (!same_inode)
- xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
+ filemap_invalidate_unlock_two(VFS_I(ip1)->i_mapping,
+ VFS_I(ip2)->i_mapping);
inode_unlock(VFS_I(ip2));
- if (!same_inode)
+ if (ip1 != ip2)
inode_unlock(VFS_I(ip1));
}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 4b6703dbffb8..e0ae905554e2 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -40,7 +40,6 @@ typedef struct xfs_inode {
/* Transaction and locking information. */
struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */
- mrlock_t i_mmaplock; /* inode mmap IO lock */
atomic_t i_pincount; /* inode pin count */
/*
@@ -410,7 +409,7 @@ void xfs_ilock(xfs_inode_t *, uint);
int xfs_ilock_nowait(xfs_inode_t *, uint);
void xfs_iunlock(xfs_inode_t *, uint);
void xfs_ilock_demote(xfs_inode_t *, uint);
-int xfs_isilocked(xfs_inode_t *, uint);
+bool xfs_isilocked(struct xfs_inode *, uint);
uint xfs_ilock_data_map_shared(struct xfs_inode *);
uint xfs_ilock_attr_map_shared(struct xfs_inode *);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 2c9e26a44546..102cbd606633 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -709,8 +709,6 @@ xfs_fs_inode_init_once(
atomic_set(&ip->i_pincount, 0);
spin_lock_init(&ip->i_flags_lock);
- mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
- "xfsino", ip->i_ino);
mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
"xfsino", ip->i_ino);
}
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 70055d486bf7..ddc346a9df9b 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -462,7 +462,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize)
inode_dio_wait(inode);
/* Serialize against page faults */
- down_write(&zi->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
/* Serialize against zonefs_iomap_begin() */
mutex_lock(&zi->i_truncate_mutex);
@@ -500,7 +500,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize)
unlock:
mutex_unlock(&zi->i_truncate_mutex);
- up_write(&zi->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
return ret;
}
@@ -575,18 +575,6 @@ static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
return ret;
}
-static vm_fault_t zonefs_filemap_fault(struct vm_fault *vmf)
-{
- struct zonefs_inode_info *zi = ZONEFS_I(file_inode(vmf->vma->vm_file));
- vm_fault_t ret;
-
- down_read(&zi->i_mmap_sem);
- ret = filemap_fault(vmf);
- up_read(&zi->i_mmap_sem);
-
- return ret;
-}
-
static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
@@ -607,16 +595,16 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
file_update_time(vmf->vma->vm_file);
/* Serialize against truncates */
- down_read(&zi->i_mmap_sem);
+ filemap_invalidate_lock_shared(inode->i_mapping);
ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops);
- up_read(&zi->i_mmap_sem);
+ filemap_invalidate_unlock_shared(inode->i_mapping);
sb_end_pagefault(inode->i_sb);
return ret;
}
static const struct vm_operations_struct zonefs_file_vm_ops = {
- .fault = zonefs_filemap_fault,
+ .fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = zonefs_filemap_page_mkwrite,
};
@@ -1155,7 +1143,6 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb)
inode_init_once(&zi->i_vnode);
mutex_init(&zi->i_truncate_mutex);
- init_rwsem(&zi->i_mmap_sem);
zi->i_wr_refcnt = 0;
return &zi->i_vnode;
diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
index 51141907097c..7b147907c328 100644
--- a/fs/zonefs/zonefs.h
+++ b/fs/zonefs/zonefs.h
@@ -70,12 +70,11 @@ struct zonefs_inode_info {
* and changes to the inode private data, and in particular changes to
* a sequential file size on completion of direct IO writes.
* Serialization of mmap read IOs with truncate and syscall IO
- * operations is done with i_mmap_sem in addition to i_truncate_mutex.
- * Only zonefs_seq_file_truncate() takes both lock (i_mmap_sem first,
- * i_truncate_mutex second).
+ * operations is done with invalidate_lock in addition to
+ * i_truncate_mutex. Only zonefs_seq_file_truncate() takes both lock
+ * (invalidate_lock first, i_truncate_mutex second).
*/
struct mutex i_truncate_mutex;
- struct rw_semaphore i_mmap_sem;
/* guarded by i_truncate_mutex */
unsigned int i_wr_refcnt;
diff --git a/include/acpi/acnames.h b/include/acpi/acnames.h
index a2bc381c7ce7..30869ab77fba 100644
--- a/include/acpi/acnames.h
+++ b/include/acpi/acnames.h
@@ -20,6 +20,7 @@
#define METHOD_NAME__CLS "_CLS"
#define METHOD_NAME__CRS "_CRS"
#define METHOD_NAME__DDN "_DDN"
+#define METHOD_NAME__DIS "_DIS"
#define METHOD_NAME__DMA "_DMA"
#define METHOD_NAME__HID "_HID"
#define METHOD_NAME__INI "_INI"
diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h
index 1b4c45815695..5a3875744678 100644
--- a/include/acpi/acoutput.h
+++ b/include/acpi/acoutput.h
@@ -415,7 +415,7 @@
/* Conditional execution */
#define ACPI_DEBUG_EXEC(a) a
-#define ACPI_DEBUG_ONLY_MEMBERS(a) a;
+#define ACPI_DEBUG_ONLY_MEMBERS(a) a
#define _VERBOSE_STRUCTURES
/* Various object display routines for debug */
diff --git a/include/acpi/acpi_io.h b/include/acpi/acpi_io.h
index 027faa8883aa..a0212e67d6f4 100644
--- a/include/acpi/acpi_io.h
+++ b/include/acpi/acpi_io.h
@@ -14,6 +14,14 @@ static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
}
#endif
+#ifndef acpi_os_memmap
+static inline void __iomem *acpi_os_memmap(acpi_physical_address phys,
+ acpi_size size)
+{
+ return ioremap_cache(phys, size);
+}
+#endif
+
extern bool acpi_permanent_mmap;
void __iomem __ref
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index a43335961e30..fa02e3ff0faf 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -12,7 +12,7 @@
/* Current ACPICA subsystem version in YYYYMMDD format */
-#define ACPI_CA_VERSION 0x20210604
+#define ACPI_CA_VERSION 0x20210730
#include <acpi/acconfig.h>
#include <acpi/actypes.h>
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index ef2872dea01c..159070edd031 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -24,6 +24,7 @@
* file. Useful because they make it more difficult to inadvertently type in
* the wrong signature.
*/
+#define ACPI_SIG_AEST "AEST" /* Arm Error Source Table */
#define ACPI_SIG_ASF "ASF!" /* Alert Standard Format table */
#define ACPI_SIG_BERT "BERT" /* Boot Error Record Table */
#define ACPI_SIG_BGRT "BGRT" /* Boot Graphics Resource Table */
@@ -482,7 +483,7 @@ struct acpi_csrt_descriptor {
* DBG2 - Debug Port Table 2
* Version 0 (Both main table and subtables)
*
- * Conforms to "Microsoft Debug Port Table 2 (DBG2)", December 10, 2015
+ * Conforms to "Microsoft Debug Port Table 2 (DBG2)", September 21, 2020
*
******************************************************************************/
@@ -532,11 +533,24 @@ struct acpi_dbg2_device {
#define ACPI_DBG2_16550_COMPATIBLE 0x0000
#define ACPI_DBG2_16550_SUBSET 0x0001
+#define ACPI_DBG2_MAX311XE_SPI 0x0002
#define ACPI_DBG2_ARM_PL011 0x0003
+#define ACPI_DBG2_MSM8X60 0x0004
+#define ACPI_DBG2_16550_NVIDIA 0x0005
+#define ACPI_DBG2_TI_OMAP 0x0006
+#define ACPI_DBG2_APM88XXXX 0x0008
+#define ACPI_DBG2_MSM8974 0x0009
+#define ACPI_DBG2_SAM5250 0x000A
+#define ACPI_DBG2_INTEL_USIF 0x000B
+#define ACPI_DBG2_IMX6 0x000C
#define ACPI_DBG2_ARM_SBSA_32BIT 0x000D
#define ACPI_DBG2_ARM_SBSA_GENERIC 0x000E
#define ACPI_DBG2_ARM_DCC 0x000F
#define ACPI_DBG2_BCM2835 0x0010
+#define ACPI_DBG2_SDM845_1_8432MHZ 0x0011
+#define ACPI_DBG2_16550_WITH_GAS 0x0012
+#define ACPI_DBG2_SDM845_7_372MHZ 0x0013
+#define ACPI_DBG2_INTEL_LPSS 0x0014
#define ACPI_DBG2_1394_STANDARD 0x0000
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 2069ac38a4e2..a47b32a5cbde 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -69,6 +69,176 @@
/*******************************************************************************
*
+ * AEST - Arm Error Source Table
+ *
+ * Conforms to: ACPI for the Armv8 RAS Extensions 1.1 Platform Design Document
+ * September 2020.
+ *
+ ******************************************************************************/
+
+struct acpi_table_aest {
+ struct acpi_table_header header;
+ void *node_array[];
+};
+
+/* Common Subtable header - one per Node Structure (Subtable) */
+
+struct acpi_aest_hdr {
+ u8 type;
+ u16 length;
+ u8 reserved;
+ u32 node_specific_offset;
+ u32 node_interface_offset;
+ u32 node_interrupt_offset;
+ u32 node_interrupt_count;
+ u64 timestamp_rate;
+ u64 reserved1;
+ u64 error_injection_rate;
+};
+
+/* Values for Type above */
+
+#define ACPI_AEST_PROCESSOR_ERROR_NODE 0
+#define ACPI_AEST_MEMORY_ERROR_NODE 1
+#define ACPI_AEST_SMMU_ERROR_NODE 2
+#define ACPI_AEST_VENDOR_ERROR_NODE 3
+#define ACPI_AEST_GIC_ERROR_NODE 4
+#define ACPI_AEST_NODE_TYPE_RESERVED 5 /* 5 and above are reserved */
+
+/*
+ * AEST subtables (Error nodes)
+ */
+
+/* 0: Processor Error */
+
+typedef struct acpi_aest_processor {
+ u32 processor_id;
+ u8 resource_type;
+ u8 reserved;
+ u8 flags;
+ u8 revision;
+ u64 processor_affinity;
+
+} acpi_aest_processor;
+
+/* Values for resource_type above, related structs below */
+
+#define ACPI_AEST_CACHE_RESOURCE 0
+#define ACPI_AEST_TLB_RESOURCE 1
+#define ACPI_AEST_GENERIC_RESOURCE 2
+#define ACPI_AEST_RESOURCE_RESERVED 3 /* 3 and above are reserved */
+
+/* 0R: Processor Cache Resource Substructure */
+
+typedef struct acpi_aest_processor_cache {
+ u32 cache_reference;
+ u32 reserved;
+
+} acpi_aest_processor_cache;
+
+/* Values for cache_type above */
+
+#define ACPI_AEST_CACHE_DATA 0
+#define ACPI_AEST_CACHE_INSTRUCTION 1
+#define ACPI_AEST_CACHE_UNIFIED 2
+#define ACPI_AEST_CACHE_RESERVED 3 /* 3 and above are reserved */
+
+/* 1R: Processor TLB Resource Substructure */
+
+typedef struct acpi_aest_processor_tlb {
+ u32 tlb_level;
+ u32 reserved;
+
+} acpi_aest_processor_tlb;
+
+/* 2R: Processor Generic Resource Substructure */
+
+typedef struct acpi_aest_processor_generic {
+ u8 *resource;
+
+} acpi_aest_processor_generic;
+
+/* 1: Memory Error */
+
+typedef struct acpi_aest_memory {
+ u32 srat_proximity_domain;
+
+} acpi_aest_memory;
+
+/* 2: Smmu Error */
+
+typedef struct acpi_aest_smmu {
+ u32 iort_node_reference;
+ u32 subcomponent_reference;
+
+} acpi_aest_smmu;
+
+/* 3: Vendor Defined */
+
+typedef struct acpi_aest_vendor {
+ u32 acpi_hid;
+ u32 acpi_uid;
+ u8 vendor_specific_data[16];
+
+} acpi_aest_vendor;
+
+/* 4: Gic Error */
+
+typedef struct acpi_aest_gic {
+ u32 interface_type;
+ u32 instance_id;
+
+} acpi_aest_gic;
+
+/* Values for interface_type above */
+
+#define ACPI_AEST_GIC_CPU 0
+#define ACPI_AEST_GIC_DISTRIBUTOR 1
+#define ACPI_AEST_GIC_REDISTRIBUTOR 2
+#define ACPI_AEST_GIC_ITS 3
+#define ACPI_AEST_GIC_RESERVED 4 /* 4 and above are reserved */
+
+/* Node Interface Structure */
+
+typedef struct acpi_aest_node_interface {
+ u8 type;
+ u8 reserved[3];
+ u32 flags;
+ u64 address;
+ u32 error_record_index;
+ u32 error_record_count;
+ u64 error_record_implemented;
+ u64 error_status_reporting;
+ u64 addressing_mode;
+
+} acpi_aest_node_interface;
+
+/* Values for Type field above */
+
+#define ACPI_AEST_NODE_SYSTEM_REGISTER 0
+#define ACPI_AEST_NODE_MEMORY_MAPPED 1
+#define ACPI_AEST_XFACE_RESERVED 2 /* 2 and above are reserved */
+
+/* Node Interrupt Structure */
+
+typedef struct acpi_aest_node_interrupt {
+ u8 type;
+ u8 reserved[2];
+ u8 flags;
+ u32 gsiv;
+ u8 iort_id;
+ u8 reserved1[3];
+
+} acpi_aest_node_interrupt;
+
+/* Values for Type field above */
+
+#define ACPI_AEST_NODE_FAULT_HANDLING 0
+#define ACPI_AEST_NODE_ERROR_RECOVERY 1
+#define ACPI_AEST_XRUPT_RESERVED 2 /* 2 and above are reserved */
+
+/*******************************************************************************
+ *
* BDAT - BIOS Data ACPI Table
*
* Conforms to "BIOS Data ACPI Table", Interface Specification v4.0 Draft 5
diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h
index 86903ac5bbc5..9125e2f16329 100644
--- a/include/acpi/actbl3.h
+++ b/include/acpi/actbl3.h
@@ -723,6 +723,10 @@ struct acpi_table_wpbt {
u16 arguments_length;
};
+struct acpi_wpbt_unicode {
+ u16 *unicode_string;
+};
+
/*******************************************************************************
*
* WSMT - Windows SMM Security Mitigations Table
diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
deleted file mode 100644
index 073cf40f431b..000000000000
--- a/include/asm-generic/atomic-long.h
+++ /dev/null
@@ -1,1014 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-// Generated by scripts/atomic/gen-atomic-long.sh
-// DO NOT MODIFY THIS FILE DIRECTLY
-
-#ifndef _ASM_GENERIC_ATOMIC_LONG_H
-#define _ASM_GENERIC_ATOMIC_LONG_H
-
-#include <linux/compiler.h>
-#include <asm/types.h>
-
-#ifdef CONFIG_64BIT
-typedef atomic64_t atomic_long_t;
-#define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i)
-#define atomic_long_cond_read_acquire atomic64_cond_read_acquire
-#define atomic_long_cond_read_relaxed atomic64_cond_read_relaxed
-#else
-typedef atomic_t atomic_long_t;
-#define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i)
-#define atomic_long_cond_read_acquire atomic_cond_read_acquire
-#define atomic_long_cond_read_relaxed atomic_cond_read_relaxed
-#endif
-
-#ifdef CONFIG_64BIT
-
-static __always_inline long
-atomic_long_read(const atomic_long_t *v)
-{
- return atomic64_read(v);
-}
-
-static __always_inline long
-atomic_long_read_acquire(const atomic_long_t *v)
-{
- return atomic64_read_acquire(v);
-}
-
-static __always_inline void
-atomic_long_set(atomic_long_t *v, long i)
-{
- atomic64_set(v, i);
-}
-
-static __always_inline void
-atomic_long_set_release(atomic_long_t *v, long i)
-{
- atomic64_set_release(v, i);
-}
-
-static __always_inline void
-atomic_long_add(long i, atomic_long_t *v)
-{
- atomic64_add(i, v);
-}
-
-static __always_inline long
-atomic_long_add_return(long i, atomic_long_t *v)
-{
- return atomic64_add_return(i, v);
-}
-
-static __always_inline long
-atomic_long_add_return_acquire(long i, atomic_long_t *v)
-{
- return atomic64_add_return_acquire(i, v);
-}
-
-static __always_inline long
-atomic_long_add_return_release(long i, atomic_long_t *v)
-{
- return atomic64_add_return_release(i, v);
-}
-
-static __always_inline long
-atomic_long_add_return_relaxed(long i, atomic_long_t *v)
-{
- return atomic64_add_return_relaxed(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_add(long i, atomic_long_t *v)
-{
- return atomic64_fetch_add(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
-{
- return atomic64_fetch_add_acquire(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_add_release(long i, atomic_long_t *v)
-{
- return atomic64_fetch_add_release(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
-{
- return atomic64_fetch_add_relaxed(i, v);
-}
-
-static __always_inline void
-atomic_long_sub(long i, atomic_long_t *v)
-{
- atomic64_sub(i, v);
-}
-
-static __always_inline long
-atomic_long_sub_return(long i, atomic_long_t *v)
-{
- return atomic64_sub_return(i, v);
-}
-
-static __always_inline long
-atomic_long_sub_return_acquire(long i, atomic_long_t *v)
-{
- return atomic64_sub_return_acquire(i, v);
-}
-
-static __always_inline long
-atomic_long_sub_return_release(long i, atomic_long_t *v)
-{
- return atomic64_sub_return_release(i, v);
-}
-
-static __always_inline long
-atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
-{
- return atomic64_sub_return_relaxed(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_sub(long i, atomic_long_t *v)
-{
- return atomic64_fetch_sub(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
-{
- return atomic64_fetch_sub_acquire(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_sub_release(long i, atomic_long_t *v)
-{
- return atomic64_fetch_sub_release(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
-{
- return atomic64_fetch_sub_relaxed(i, v);
-}
-
-static __always_inline void
-atomic_long_inc(atomic_long_t *v)
-{
- atomic64_inc(v);
-}
-
-static __always_inline long
-atomic_long_inc_return(atomic_long_t *v)
-{
- return atomic64_inc_return(v);
-}
-
-static __always_inline long
-atomic_long_inc_return_acquire(atomic_long_t *v)
-{
- return atomic64_inc_return_acquire(v);
-}
-
-static __always_inline long
-atomic_long_inc_return_release(atomic_long_t *v)
-{
- return atomic64_inc_return_release(v);
-}
-
-static __always_inline long
-atomic_long_inc_return_relaxed(atomic_long_t *v)
-{
- return atomic64_inc_return_relaxed(v);
-}
-
-static __always_inline long
-atomic_long_fetch_inc(atomic_long_t *v)
-{
- return atomic64_fetch_inc(v);
-}
-
-static __always_inline long
-atomic_long_fetch_inc_acquire(atomic_long_t *v)
-{
- return atomic64_fetch_inc_acquire(v);
-}
-
-static __always_inline long
-atomic_long_fetch_inc_release(atomic_long_t *v)
-{
- return atomic64_fetch_inc_release(v);
-}
-
-static __always_inline long
-atomic_long_fetch_inc_relaxed(atomic_long_t *v)
-{
- return atomic64_fetch_inc_relaxed(v);
-}
-
-static __always_inline void
-atomic_long_dec(atomic_long_t *v)
-{
- atomic64_dec(v);
-}
-
-static __always_inline long
-atomic_long_dec_return(atomic_long_t *v)
-{
- return atomic64_dec_return(v);
-}
-
-static __always_inline long
-atomic_long_dec_return_acquire(atomic_long_t *v)
-{
- return atomic64_dec_return_acquire(v);
-}
-
-static __always_inline long
-atomic_long_dec_return_release(atomic_long_t *v)
-{
- return atomic64_dec_return_release(v);
-}
-
-static __always_inline long
-atomic_long_dec_return_relaxed(atomic_long_t *v)
-{
- return atomic64_dec_return_relaxed(v);
-}
-
-static __always_inline long
-atomic_long_fetch_dec(atomic_long_t *v)
-{
- return atomic64_fetch_dec(v);
-}
-
-static __always_inline long
-atomic_long_fetch_dec_acquire(atomic_long_t *v)
-{
- return atomic64_fetch_dec_acquire(v);
-}
-
-static __always_inline long
-atomic_long_fetch_dec_release(atomic_long_t *v)
-{
- return atomic64_fetch_dec_release(v);
-}
-
-static __always_inline long
-atomic_long_fetch_dec_relaxed(atomic_long_t *v)
-{
- return atomic64_fetch_dec_relaxed(v);
-}
-
-static __always_inline void
-atomic_long_and(long i, atomic_long_t *v)
-{
- atomic64_and(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_and(long i, atomic_long_t *v)
-{
- return atomic64_fetch_and(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
-{
- return atomic64_fetch_and_acquire(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_and_release(long i, atomic_long_t *v)
-{
- return atomic64_fetch_and_release(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
-{
- return atomic64_fetch_and_relaxed(i, v);
-}
-
-static __always_inline void
-atomic_long_andnot(long i, atomic_long_t *v)
-{
- atomic64_andnot(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_andnot(long i, atomic_long_t *v)
-{
- return atomic64_fetch_andnot(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
-{
- return atomic64_fetch_andnot_acquire(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
-{
- return atomic64_fetch_andnot_release(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
-{
- return atomic64_fetch_andnot_relaxed(i, v);
-}
-
-static __always_inline void
-atomic_long_or(long i, atomic_long_t *v)
-{
- atomic64_or(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_or(long i, atomic_long_t *v)
-{
- return atomic64_fetch_or(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
-{
- return atomic64_fetch_or_acquire(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_or_release(long i, atomic_long_t *v)
-{
- return atomic64_fetch_or_release(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
-{
- return atomic64_fetch_or_relaxed(i, v);
-}
-
-static __always_inline void
-atomic_long_xor(long i, atomic_long_t *v)
-{
- atomic64_xor(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_xor(long i, atomic_long_t *v)
-{
- return atomic64_fetch_xor(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
-{
- return atomic64_fetch_xor_acquire(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_xor_release(long i, atomic_long_t *v)
-{
- return atomic64_fetch_xor_release(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
-{
- return atomic64_fetch_xor_relaxed(i, v);
-}
-
-static __always_inline long
-atomic_long_xchg(atomic_long_t *v, long i)
-{
- return atomic64_xchg(v, i);
-}
-
-static __always_inline long
-atomic_long_xchg_acquire(atomic_long_t *v, long i)
-{
- return atomic64_xchg_acquire(v, i);
-}
-
-static __always_inline long
-atomic_long_xchg_release(atomic_long_t *v, long i)
-{
- return atomic64_xchg_release(v, i);
-}
-
-static __always_inline long
-atomic_long_xchg_relaxed(atomic_long_t *v, long i)
-{
- return atomic64_xchg_relaxed(v, i);
-}
-
-static __always_inline long
-atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
-{
- return atomic64_cmpxchg(v, old, new);
-}
-
-static __always_inline long
-atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
-{
- return atomic64_cmpxchg_acquire(v, old, new);
-}
-
-static __always_inline long
-atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
-{
- return atomic64_cmpxchg_release(v, old, new);
-}
-
-static __always_inline long
-atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
-{
- return atomic64_cmpxchg_relaxed(v, old, new);
-}
-
-static __always_inline bool
-atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
-{
- return atomic64_try_cmpxchg(v, (s64 *)old, new);
-}
-
-static __always_inline bool
-atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
-{
- return atomic64_try_cmpxchg_acquire(v, (s64 *)old, new);
-}
-
-static __always_inline bool
-atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
-{
- return atomic64_try_cmpxchg_release(v, (s64 *)old, new);
-}
-
-static __always_inline bool
-atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
-{
- return atomic64_try_cmpxchg_relaxed(v, (s64 *)old, new);
-}
-
-static __always_inline bool
-atomic_long_sub_and_test(long i, atomic_long_t *v)
-{
- return atomic64_sub_and_test(i, v);
-}
-
-static __always_inline bool
-atomic_long_dec_and_test(atomic_long_t *v)
-{
- return atomic64_dec_and_test(v);
-}
-
-static __always_inline bool
-atomic_long_inc_and_test(atomic_long_t *v)
-{
- return atomic64_inc_and_test(v);
-}
-
-static __always_inline bool
-atomic_long_add_negative(long i, atomic_long_t *v)
-{
- return atomic64_add_negative(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
-{
- return atomic64_fetch_add_unless(v, a, u);
-}
-
-static __always_inline bool
-atomic_long_add_unless(atomic_long_t *v, long a, long u)
-{
- return atomic64_add_unless(v, a, u);
-}
-
-static __always_inline bool
-atomic_long_inc_not_zero(atomic_long_t *v)
-{
- return atomic64_inc_not_zero(v);
-}
-
-static __always_inline bool
-atomic_long_inc_unless_negative(atomic_long_t *v)
-{
- return atomic64_inc_unless_negative(v);
-}
-
-static __always_inline bool
-atomic_long_dec_unless_positive(atomic_long_t *v)
-{
- return atomic64_dec_unless_positive(v);
-}
-
-static __always_inline long
-atomic_long_dec_if_positive(atomic_long_t *v)
-{
- return atomic64_dec_if_positive(v);
-}
-
-#else /* CONFIG_64BIT */
-
-static __always_inline long
-atomic_long_read(const atomic_long_t *v)
-{
- return atomic_read(v);
-}
-
-static __always_inline long
-atomic_long_read_acquire(const atomic_long_t *v)
-{
- return atomic_read_acquire(v);
-}
-
-static __always_inline void
-atomic_long_set(atomic_long_t *v, long i)
-{
- atomic_set(v, i);
-}
-
-static __always_inline void
-atomic_long_set_release(atomic_long_t *v, long i)
-{
- atomic_set_release(v, i);
-}
-
-static __always_inline void
-atomic_long_add(long i, atomic_long_t *v)
-{
- atomic_add(i, v);
-}
-
-static __always_inline long
-atomic_long_add_return(long i, atomic_long_t *v)
-{
- return atomic_add_return(i, v);
-}
-
-static __always_inline long
-atomic_long_add_return_acquire(long i, atomic_long_t *v)
-{
- return atomic_add_return_acquire(i, v);
-}
-
-static __always_inline long
-atomic_long_add_return_release(long i, atomic_long_t *v)
-{
- return atomic_add_return_release(i, v);
-}
-
-static __always_inline long
-atomic_long_add_return_relaxed(long i, atomic_long_t *v)
-{
- return atomic_add_return_relaxed(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_add(long i, atomic_long_t *v)
-{
- return atomic_fetch_add(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
-{
- return atomic_fetch_add_acquire(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_add_release(long i, atomic_long_t *v)
-{
- return atomic_fetch_add_release(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
-{
- return atomic_fetch_add_relaxed(i, v);
-}
-
-static __always_inline void
-atomic_long_sub(long i, atomic_long_t *v)
-{
- atomic_sub(i, v);
-}
-
-static __always_inline long
-atomic_long_sub_return(long i, atomic_long_t *v)
-{
- return atomic_sub_return(i, v);
-}
-
-static __always_inline long
-atomic_long_sub_return_acquire(long i, atomic_long_t *v)
-{
- return atomic_sub_return_acquire(i, v);
-}
-
-static __always_inline long
-atomic_long_sub_return_release(long i, atomic_long_t *v)
-{
- return atomic_sub_return_release(i, v);
-}
-
-static __always_inline long
-atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
-{
- return atomic_sub_return_relaxed(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_sub(long i, atomic_long_t *v)
-{
- return atomic_fetch_sub(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
-{
- return atomic_fetch_sub_acquire(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_sub_release(long i, atomic_long_t *v)
-{
- return atomic_fetch_sub_release(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
-{
- return atomic_fetch_sub_relaxed(i, v);
-}
-
-static __always_inline void
-atomic_long_inc(atomic_long_t *v)
-{
- atomic_inc(v);
-}
-
-static __always_inline long
-atomic_long_inc_return(atomic_long_t *v)
-{
- return atomic_inc_return(v);
-}
-
-static __always_inline long
-atomic_long_inc_return_acquire(atomic_long_t *v)
-{
- return atomic_inc_return_acquire(v);
-}
-
-static __always_inline long
-atomic_long_inc_return_release(atomic_long_t *v)
-{
- return atomic_inc_return_release(v);
-}
-
-static __always_inline long
-atomic_long_inc_return_relaxed(atomic_long_t *v)
-{
- return atomic_inc_return_relaxed(v);
-}
-
-static __always_inline long
-atomic_long_fetch_inc(atomic_long_t *v)
-{
- return atomic_fetch_inc(v);
-}
-
-static __always_inline long
-atomic_long_fetch_inc_acquire(atomic_long_t *v)
-{
- return atomic_fetch_inc_acquire(v);
-}
-
-static __always_inline long
-atomic_long_fetch_inc_release(atomic_long_t *v)
-{
- return atomic_fetch_inc_release(v);
-}
-
-static __always_inline long
-atomic_long_fetch_inc_relaxed(atomic_long_t *v)
-{
- return atomic_fetch_inc_relaxed(v);
-}
-
-static __always_inline void
-atomic_long_dec(atomic_long_t *v)
-{
- atomic_dec(v);
-}
-
-static __always_inline long
-atomic_long_dec_return(atomic_long_t *v)
-{
- return atomic_dec_return(v);
-}
-
-static __always_inline long
-atomic_long_dec_return_acquire(atomic_long_t *v)
-{
- return atomic_dec_return_acquire(v);
-}
-
-static __always_inline long
-atomic_long_dec_return_release(atomic_long_t *v)
-{
- return atomic_dec_return_release(v);
-}
-
-static __always_inline long
-atomic_long_dec_return_relaxed(atomic_long_t *v)
-{
- return atomic_dec_return_relaxed(v);
-}
-
-static __always_inline long
-atomic_long_fetch_dec(atomic_long_t *v)
-{
- return atomic_fetch_dec(v);
-}
-
-static __always_inline long
-atomic_long_fetch_dec_acquire(atomic_long_t *v)
-{
- return atomic_fetch_dec_acquire(v);
-}
-
-static __always_inline long
-atomic_long_fetch_dec_release(atomic_long_t *v)
-{
- return atomic_fetch_dec_release(v);
-}
-
-static __always_inline long
-atomic_long_fetch_dec_relaxed(atomic_long_t *v)
-{
- return atomic_fetch_dec_relaxed(v);
-}
-
-static __always_inline void
-atomic_long_and(long i, atomic_long_t *v)
-{
- atomic_and(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_and(long i, atomic_long_t *v)
-{
- return atomic_fetch_and(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
-{
- return atomic_fetch_and_acquire(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_and_release(long i, atomic_long_t *v)
-{
- return atomic_fetch_and_release(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
-{
- return atomic_fetch_and_relaxed(i, v);
-}
-
-static __always_inline void
-atomic_long_andnot(long i, atomic_long_t *v)
-{
- atomic_andnot(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_andnot(long i, atomic_long_t *v)
-{
- return atomic_fetch_andnot(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
-{
- return atomic_fetch_andnot_acquire(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
-{
- return atomic_fetch_andnot_release(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
-{
- return atomic_fetch_andnot_relaxed(i, v);
-}
-
-static __always_inline void
-atomic_long_or(long i, atomic_long_t *v)
-{
- atomic_or(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_or(long i, atomic_long_t *v)
-{
- return atomic_fetch_or(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
-{
- return atomic_fetch_or_acquire(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_or_release(long i, atomic_long_t *v)
-{
- return atomic_fetch_or_release(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
-{
- return atomic_fetch_or_relaxed(i, v);
-}
-
-static __always_inline void
-atomic_long_xor(long i, atomic_long_t *v)
-{
- atomic_xor(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_xor(long i, atomic_long_t *v)
-{
- return atomic_fetch_xor(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
-{
- return atomic_fetch_xor_acquire(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_xor_release(long i, atomic_long_t *v)
-{
- return atomic_fetch_xor_release(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
-{
- return atomic_fetch_xor_relaxed(i, v);
-}
-
-static __always_inline long
-atomic_long_xchg(atomic_long_t *v, long i)
-{
- return atomic_xchg(v, i);
-}
-
-static __always_inline long
-atomic_long_xchg_acquire(atomic_long_t *v, long i)
-{
- return atomic_xchg_acquire(v, i);
-}
-
-static __always_inline long
-atomic_long_xchg_release(atomic_long_t *v, long i)
-{
- return atomic_xchg_release(v, i);
-}
-
-static __always_inline long
-atomic_long_xchg_relaxed(atomic_long_t *v, long i)
-{
- return atomic_xchg_relaxed(v, i);
-}
-
-static __always_inline long
-atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
-{
- return atomic_cmpxchg(v, old, new);
-}
-
-static __always_inline long
-atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
-{
- return atomic_cmpxchg_acquire(v, old, new);
-}
-
-static __always_inline long
-atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
-{
- return atomic_cmpxchg_release(v, old, new);
-}
-
-static __always_inline long
-atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
-{
- return atomic_cmpxchg_relaxed(v, old, new);
-}
-
-static __always_inline bool
-atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
-{
- return atomic_try_cmpxchg(v, (int *)old, new);
-}
-
-static __always_inline bool
-atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
-{
- return atomic_try_cmpxchg_acquire(v, (int *)old, new);
-}
-
-static __always_inline bool
-atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
-{
- return atomic_try_cmpxchg_release(v, (int *)old, new);
-}
-
-static __always_inline bool
-atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
-{
- return atomic_try_cmpxchg_relaxed(v, (int *)old, new);
-}
-
-static __always_inline bool
-atomic_long_sub_and_test(long i, atomic_long_t *v)
-{
- return atomic_sub_and_test(i, v);
-}
-
-static __always_inline bool
-atomic_long_dec_and_test(atomic_long_t *v)
-{
- return atomic_dec_and_test(v);
-}
-
-static __always_inline bool
-atomic_long_inc_and_test(atomic_long_t *v)
-{
- return atomic_inc_and_test(v);
-}
-
-static __always_inline bool
-atomic_long_add_negative(long i, atomic_long_t *v)
-{
- return atomic_add_negative(i, v);
-}
-
-static __always_inline long
-atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
-{
- return atomic_fetch_add_unless(v, a, u);
-}
-
-static __always_inline bool
-atomic_long_add_unless(atomic_long_t *v, long a, long u)
-{
- return atomic_add_unless(v, a, u);
-}
-
-static __always_inline bool
-atomic_long_inc_not_zero(atomic_long_t *v)
-{
- return atomic_inc_not_zero(v);
-}
-
-static __always_inline bool
-atomic_long_inc_unless_negative(atomic_long_t *v)
-{
- return atomic_inc_unless_negative(v);
-}
-
-static __always_inline bool
-atomic_long_dec_unless_positive(atomic_long_t *v)
-{
- return atomic_dec_unless_positive(v);
-}
-
-static __always_inline long
-atomic_long_dec_if_positive(atomic_long_t *v)
-{
- return atomic_dec_if_positive(v);
-}
-
-#endif /* CONFIG_64BIT */
-#endif /* _ASM_GENERIC_ATOMIC_LONG_H */
-// a624200981f552b2c6be4f32fe44da8289f30d87
diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
index 0e7316a86240..3096f086b5a3 100644
--- a/include/asm-generic/bitops/atomic.h
+++ b/include/asm-generic/bitops/atomic.h
@@ -11,25 +11,29 @@
* See Documentation/atomic_bitops.txt for details.
*/
-static __always_inline void set_bit(unsigned int nr, volatile unsigned long *p)
+static __always_inline void
+arch_set_bit(unsigned int nr, volatile unsigned long *p)
{
p += BIT_WORD(nr);
- atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p);
+ arch_atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p);
}
-static __always_inline void clear_bit(unsigned int nr, volatile unsigned long *p)
+static __always_inline void
+arch_clear_bit(unsigned int nr, volatile unsigned long *p)
{
p += BIT_WORD(nr);
- atomic_long_andnot(BIT_MASK(nr), (atomic_long_t *)p);
+ arch_atomic_long_andnot(BIT_MASK(nr), (atomic_long_t *)p);
}
-static __always_inline void change_bit(unsigned int nr, volatile unsigned long *p)
+static __always_inline void
+arch_change_bit(unsigned int nr, volatile unsigned long *p)
{
p += BIT_WORD(nr);
- atomic_long_xor(BIT_MASK(nr), (atomic_long_t *)p);
+ arch_atomic_long_xor(BIT_MASK(nr), (atomic_long_t *)p);
}
-static inline int test_and_set_bit(unsigned int nr, volatile unsigned long *p)
+static __always_inline int
+arch_test_and_set_bit(unsigned int nr, volatile unsigned long *p)
{
long old;
unsigned long mask = BIT_MASK(nr);
@@ -38,11 +42,12 @@ static inline int test_and_set_bit(unsigned int nr, volatile unsigned long *p)
if (READ_ONCE(*p) & mask)
return 1;
- old = atomic_long_fetch_or(mask, (atomic_long_t *)p);
+ old = arch_atomic_long_fetch_or(mask, (atomic_long_t *)p);
return !!(old & mask);
}
-static inline int test_and_clear_bit(unsigned int nr, volatile unsigned long *p)
+static __always_inline int
+arch_test_and_clear_bit(unsigned int nr, volatile unsigned long *p)
{
long old;
unsigned long mask = BIT_MASK(nr);
@@ -51,18 +56,21 @@ static inline int test_and_clear_bit(unsigned int nr, volatile unsigned long *p)
if (!(READ_ONCE(*p) & mask))
return 0;
- old = atomic_long_fetch_andnot(mask, (atomic_long_t *)p);
+ old = arch_atomic_long_fetch_andnot(mask, (atomic_long_t *)p);
return !!(old & mask);
}
-static inline int test_and_change_bit(unsigned int nr, volatile unsigned long *p)
+static __always_inline int
+arch_test_and_change_bit(unsigned int nr, volatile unsigned long *p)
{
long old;
unsigned long mask = BIT_MASK(nr);
p += BIT_WORD(nr);
- old = atomic_long_fetch_xor(mask, (atomic_long_t *)p);
+ old = arch_atomic_long_fetch_xor(mask, (atomic_long_t *)p);
return !!(old & mask);
}
+#include <asm-generic/bitops/instrumented-atomic.h>
+
#endif /* _ASM_GENERIC_BITOPS_ATOMIC_H */
diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h
index 3ae021368f48..630f2f6b9595 100644
--- a/include/asm-generic/bitops/lock.h
+++ b/include/asm-generic/bitops/lock.h
@@ -7,7 +7,7 @@
#include <asm/barrier.h>
/**
- * test_and_set_bit_lock - Set a bit and return its old value, for lock
+ * arch_test_and_set_bit_lock - Set a bit and return its old value, for lock
* @nr: Bit to set
* @addr: Address to count from
*
@@ -15,8 +15,8 @@
* the returned value is 0.
* It can be used to implement bit locks.
*/
-static inline int test_and_set_bit_lock(unsigned int nr,
- volatile unsigned long *p)
+static __always_inline int
+arch_test_and_set_bit_lock(unsigned int nr, volatile unsigned long *p)
{
long old;
unsigned long mask = BIT_MASK(nr);
@@ -25,26 +25,27 @@ static inline int test_and_set_bit_lock(unsigned int nr,
if (READ_ONCE(*p) & mask)
return 1;
- old = atomic_long_fetch_or_acquire(mask, (atomic_long_t *)p);
+ old = arch_atomic_long_fetch_or_acquire(mask, (atomic_long_t *)p);
return !!(old & mask);
}
/**
- * clear_bit_unlock - Clear a bit in memory, for unlock
+ * arch_clear_bit_unlock - Clear a bit in memory, for unlock
* @nr: the bit to set
* @addr: the address to start counting from
*
* This operation is atomic and provides release barrier semantics.
*/
-static inline void clear_bit_unlock(unsigned int nr, volatile unsigned long *p)
+static __always_inline void
+arch_clear_bit_unlock(unsigned int nr, volatile unsigned long *p)
{
p += BIT_WORD(nr);
- atomic_long_fetch_andnot_release(BIT_MASK(nr), (atomic_long_t *)p);
+ arch_atomic_long_fetch_andnot_release(BIT_MASK(nr), (atomic_long_t *)p);
}
/**
- * __clear_bit_unlock - Clear a bit in memory, for unlock
+ * arch___clear_bit_unlock - Clear a bit in memory, for unlock
* @nr: the bit to set
* @addr: the address to start counting from
*
@@ -54,38 +55,40 @@ static inline void clear_bit_unlock(unsigned int nr, volatile unsigned long *p)
*
* See for example x86's implementation.
*/
-static inline void __clear_bit_unlock(unsigned int nr,
- volatile unsigned long *p)
+static inline void
+arch___clear_bit_unlock(unsigned int nr, volatile unsigned long *p)
{
unsigned long old;
p += BIT_WORD(nr);
old = READ_ONCE(*p);
old &= ~BIT_MASK(nr);
- atomic_long_set_release((atomic_long_t *)p, old);
+ arch_atomic_long_set_release((atomic_long_t *)p, old);
}
/**
- * clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom
- * byte is negative, for unlock.
+ * arch_clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom
+ * byte is negative, for unlock.
* @nr: the bit to clear
* @addr: the address to start counting from
*
* This is a bit of a one-trick-pony for the filemap code, which clears
* PG_locked and tests PG_waiters,
*/
-#ifndef clear_bit_unlock_is_negative_byte
-static inline bool clear_bit_unlock_is_negative_byte(unsigned int nr,
- volatile unsigned long *p)
+#ifndef arch_clear_bit_unlock_is_negative_byte
+static inline bool arch_clear_bit_unlock_is_negative_byte(unsigned int nr,
+ volatile unsigned long *p)
{
long old;
unsigned long mask = BIT_MASK(nr);
p += BIT_WORD(nr);
- old = atomic_long_fetch_andnot_release(mask, (atomic_long_t *)p);
+ old = arch_atomic_long_fetch_andnot_release(mask, (atomic_long_t *)p);
return !!(old & BIT(7));
}
-#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
+#define arch_clear_bit_unlock_is_negative_byte arch_clear_bit_unlock_is_negative_byte
#endif
+#include <asm-generic/bitops/instrumented-lock.h>
+
#endif /* _ASM_GENERIC_BITOPS_LOCK_H_ */
diff --git a/include/asm-generic/bitops/non-atomic.h b/include/asm-generic/bitops/non-atomic.h
index 7e10c4b50c5d..365377fb104b 100644
--- a/include/asm-generic/bitops/non-atomic.h
+++ b/include/asm-generic/bitops/non-atomic.h
@@ -5,7 +5,7 @@
#include <asm/types.h>
/**
- * __set_bit - Set a bit in memory
+ * arch___set_bit - Set a bit in memory
* @nr: the bit to set
* @addr: the address to start counting from
*
@@ -13,24 +13,28 @@
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
-static inline void __set_bit(int nr, volatile unsigned long *addr)
+static __always_inline void
+arch___set_bit(int nr, volatile unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
*p |= mask;
}
+#define __set_bit arch___set_bit
-static inline void __clear_bit(int nr, volatile unsigned long *addr)
+static __always_inline void
+arch___clear_bit(int nr, volatile unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
*p &= ~mask;
}
+#define __clear_bit arch___clear_bit
/**
- * __change_bit - Toggle a bit in memory
+ * arch___change_bit - Toggle a bit in memory
* @nr: the bit to change
* @addr: the address to start counting from
*
@@ -38,16 +42,18 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
-static inline void __change_bit(int nr, volatile unsigned long *addr)
+static __always_inline
+void arch___change_bit(int nr, volatile unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
*p ^= mask;
}
+#define __change_bit arch___change_bit
/**
- * __test_and_set_bit - Set a bit and return its old value
+ * arch___test_and_set_bit - Set a bit and return its old value
* @nr: Bit to set
* @addr: Address to count from
*
@@ -55,7 +61,8 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
*/
-static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
+static __always_inline int
+arch___test_and_set_bit(int nr, volatile unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
@@ -64,9 +71,10 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
*p = old | mask;
return (old & mask) != 0;
}
+#define __test_and_set_bit arch___test_and_set_bit
/**
- * __test_and_clear_bit - Clear a bit and return its old value
+ * arch___test_and_clear_bit - Clear a bit and return its old value
* @nr: Bit to clear
* @addr: Address to count from
*
@@ -74,7 +82,8 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
*/
-static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+static __always_inline int
+arch___test_and_clear_bit(int nr, volatile unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
@@ -83,10 +92,11 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
*p = old & ~mask;
return (old & mask) != 0;
}
+#define __test_and_clear_bit arch___test_and_clear_bit
/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr,
- volatile unsigned long *addr)
+static __always_inline int
+arch___test_and_change_bit(int nr, volatile unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
@@ -95,15 +105,18 @@ static inline int __test_and_change_bit(int nr,
*p = old ^ mask;
return (old & mask) != 0;
}
+#define __test_and_change_bit arch___test_and_change_bit
/**
- * test_bit - Determine whether a bit is set
+ * arch_test_bit - Determine whether a bit is set
* @nr: bit number to test
* @addr: Address to start counting from
*/
-static inline int test_bit(int nr, const volatile unsigned long *addr)
+static __always_inline int
+arch_test_bit(int nr, const volatile unsigned long *addr)
{
return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
}
+#define test_bit arch_test_bit
#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
diff --git a/include/asm-generic/compat.h b/include/asm-generic/compat.h
index 30f7b18a36f9..d46c0201cc34 100644
--- a/include/asm-generic/compat.h
+++ b/include/asm-generic/compat.h
@@ -20,7 +20,18 @@ typedef u16 compat_ushort_t;
typedef u32 compat_uint_t;
typedef u32 compat_ulong_t;
typedef u32 compat_uptr_t;
+typedef u32 compat_caddr_t;
typedef u32 compat_aio_context_t;
+typedef u32 compat_old_sigset_t;
+
+#ifndef __compat_uid32_t
+typedef u32 __compat_uid32_t;
+typedef u32 __compat_gid32_t;
+#endif
+
+#ifndef compat_mode_t
+typedef u32 compat_mode_t;
+#endif
#ifdef CONFIG_COMPAT_FOR_U64_ALIGNMENT
typedef s64 __attribute__((aligned(4))) compat_s64;
@@ -30,4 +41,10 @@ typedef s64 compat_s64;
typedef u64 compat_u64;
#endif
+#ifndef _COMPAT_NSIG
+typedef u32 compat_sigset_word;
+#define _COMPAT_NSIG _NSIG
+#define _COMPAT_NSIG_BPW 32
+#endif
+
#endif
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 17325416e2de..62669b36a772 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -586,6 +586,7 @@
NOINSTR_TEXT \
*(.text..refcount) \
*(.ref.text) \
+ *(.text.asan.* .text.tsan.*) \
TEXT_CFI_JT \
MEM_KEEP(init.text*) \
MEM_KEEP(exit.text*) \
diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h
index 47accec68cb0..f603325c0c30 100644
--- a/include/crypto/public_key.h
+++ b/include/crypto/public_key.h
@@ -38,9 +38,9 @@ extern void public_key_free(struct public_key *key);
struct public_key_signature {
struct asymmetric_key_id *auth_ids[2];
u8 *s; /* Signature */
- u32 s_size; /* Number of bytes in signature */
u8 *digest;
- u8 digest_size; /* Number of bytes in digest */
+ u32 s_size; /* Number of bytes in signature */
+ u32 digest_size; /* Number of bytes in digest */
const char *pkey_algo;
const char *hash_algo;
const char *encoding;
diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h
index 7afd730d16ff..709f286e7b25 100644
--- a/include/crypto/sm4.h
+++ b/include/crypto/sm4.h
@@ -3,6 +3,7 @@
/*
* Common values for the SM4 algorithm
* Copyright (C) 2018 ARM Limited or its affiliates.
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#ifndef _CRYPTO_SM4_H
@@ -15,17 +16,29 @@
#define SM4_BLOCK_SIZE 16
#define SM4_RKEY_WORDS 32
-struct crypto_sm4_ctx {
+struct sm4_ctx {
u32 rkey_enc[SM4_RKEY_WORDS];
u32 rkey_dec[SM4_RKEY_WORDS];
};
-int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
- unsigned int key_len);
-int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key,
+/**
+ * sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016
+ * @ctx: The location where the computed key will be stored.
+ * @in_key: The supplied key.
+ * @key_len: The length of the supplied key.
+ *
+ * Returns 0 on success. The function fails only if an invalid key size (or
+ * pointer) is supplied.
+ */
+int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key,
unsigned int key_len);
-void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in);
-void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in);
+/**
+ * sm4_crypt_block - Encrypt or decrypt a single SM4 block
+ * @rk: The rkey_enc for encrypt or rkey_dec for decrypt
+ * @out: Buffer to store output data
+ * @in: Buffer containing the input data
+ */
+void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in);
#endif
diff --git a/include/dt-bindings/clock/ingenic,sysost.h b/include/dt-bindings/clock/ingenic,sysost.h
index 063791b01ab3..d7aa42c08ded 100644
--- a/include/dt-bindings/clock/ingenic,sysost.h
+++ b/include/dt-bindings/clock/ingenic,sysost.h
@@ -13,4 +13,23 @@
#define OST_CLK_PERCPU_TIMER2 3
#define OST_CLK_PERCPU_TIMER3 4
+#define OST_CLK_EVENT_TIMER 1
+
+#define OST_CLK_EVENT_TIMER0 0
+#define OST_CLK_EVENT_TIMER1 1
+#define OST_CLK_EVENT_TIMER2 2
+#define OST_CLK_EVENT_TIMER3 3
+#define OST_CLK_EVENT_TIMER4 4
+#define OST_CLK_EVENT_TIMER5 5
+#define OST_CLK_EVENT_TIMER6 6
+#define OST_CLK_EVENT_TIMER7 7
+#define OST_CLK_EVENT_TIMER8 8
+#define OST_CLK_EVENT_TIMER9 9
+#define OST_CLK_EVENT_TIMER10 10
+#define OST_CLK_EVENT_TIMER11 11
+#define OST_CLK_EVENT_TIMER12 12
+#define OST_CLK_EVENT_TIMER13 13
+#define OST_CLK_EVENT_TIMER14 14
+#define OST_CLK_EVENT_TIMER15 15
+
#endif /* __DT_BINDINGS_CLOCK_INGENIC_OST_H__ */
diff --git a/include/dt-bindings/interconnect/qcom,sc8180x.h b/include/dt-bindings/interconnect/qcom,sc8180x.h
new file mode 100644
index 000000000000..235b525d2803
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,sc8180x.h
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Qualcomm SC8180x interconnect IDs
+ *
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_SC8180X_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_SC8180X_H
+
+#define MASTER_A1NOC_CFG 0
+#define MASTER_UFS_CARD 1
+#define MASTER_UFS_GEN4 2
+#define MASTER_UFS_MEM 3
+#define MASTER_USB3 4
+#define MASTER_USB3_1 5
+#define MASTER_USB3_2 6
+#define A1NOC_SNOC_SLV 7
+#define SLAVE_SERVICE_A1NOC 8
+
+#define MASTER_A2NOC_CFG 0
+#define MASTER_QDSS_BAM 1
+#define MASTER_QSPI_0 2
+#define MASTER_QSPI_1 3
+#define MASTER_QUP_0 4
+#define MASTER_QUP_1 5
+#define MASTER_QUP_2 6
+#define MASTER_SENSORS_AHB 7
+#define MASTER_CRYPTO_CORE_0 8
+#define MASTER_IPA 9
+#define MASTER_EMAC 10
+#define MASTER_PCIE 11
+#define MASTER_PCIE_1 12
+#define MASTER_PCIE_2 13
+#define MASTER_PCIE_3 14
+#define MASTER_QDSS_ETR 15
+#define MASTER_SDCC_2 16
+#define MASTER_SDCC_4 17
+#define A2NOC_SNOC_SLV 18
+#define SLAVE_ANOC_PCIE_GEM_NOC 19
+#define SLAVE_SERVICE_A2NOC 20
+
+#define MASTER_CAMNOC_HF0_UNCOMP 0
+#define MASTER_CAMNOC_HF1_UNCOMP 1
+#define MASTER_CAMNOC_SF_UNCOMP 2
+#define SLAVE_CAMNOC_UNCOMP 3
+
+#define MASTER_NPU 0
+#define SLAVE_CDSP_MEM_NOC 1
+
+#define SNOC_CNOC_MAS 0
+#define SLAVE_A1NOC_CFG 1
+#define SLAVE_A2NOC_CFG 2
+#define SLAVE_AHB2PHY_CENTER 3
+#define SLAVE_AHB2PHY_EAST 4
+#define SLAVE_AHB2PHY_WEST 5
+#define SLAVE_AHB2PHY_SOUTH 6
+#define SLAVE_AOP 7
+#define SLAVE_AOSS 8
+#define SLAVE_CAMERA_CFG 9
+#define SLAVE_CLK_CTL 10
+#define SLAVE_CDSP_CFG 11
+#define SLAVE_RBCPR_CX_CFG 12
+#define SLAVE_RBCPR_MMCX_CFG 13
+#define SLAVE_RBCPR_MX_CFG 14
+#define SLAVE_CRYPTO_0_CFG 15
+#define SLAVE_CNOC_DDRSS 16
+#define SLAVE_DISPLAY_CFG 17
+#define SLAVE_EMAC_CFG 18
+#define SLAVE_GLM 19
+#define SLAVE_GRAPHICS_3D_CFG 20
+#define SLAVE_IMEM_CFG 21
+#define SLAVE_IPA_CFG 22
+#define SLAVE_CNOC_MNOC_CFG 23
+#define SLAVE_NPU_CFG 24
+#define SLAVE_PCIE_0_CFG 25
+#define SLAVE_PCIE_1_CFG 26
+#define SLAVE_PCIE_2_CFG 27
+#define SLAVE_PCIE_3_CFG 28
+#define SLAVE_PDM 29
+#define SLAVE_PIMEM_CFG 30
+#define SLAVE_PRNG 31
+#define SLAVE_QDSS_CFG 32
+#define SLAVE_QSPI_0 33
+#define SLAVE_QSPI_1 34
+#define SLAVE_QUP_1 35
+#define SLAVE_QUP_2 36
+#define SLAVE_QUP_0 37
+#define SLAVE_SDCC_2 38
+#define SLAVE_SDCC_4 39
+#define SLAVE_SECURITY 40
+#define SLAVE_SNOC_CFG 41
+#define SLAVE_SPSS_CFG 42
+#define SLAVE_TCSR 43
+#define SLAVE_TLMM_EAST 44
+#define SLAVE_TLMM_SOUTH 45
+#define SLAVE_TLMM_WEST 46
+#define SLAVE_TSIF 47
+#define SLAVE_UFS_CARD_CFG 48
+#define SLAVE_UFS_MEM_0_CFG 49
+#define SLAVE_UFS_MEM_1_CFG 50
+#define SLAVE_USB3 51
+#define SLAVE_USB3_1 52
+#define SLAVE_USB3_2 53
+#define SLAVE_VENUS_CFG 54
+#define SLAVE_VSENSE_CTRL_CFG 55
+#define SLAVE_SERVICE_CNOC 56
+
+#define MASTER_CNOC_DC_NOC 0
+#define SLAVE_GEM_NOC_CFG 1
+#define SLAVE_LLCC_CFG 2
+
+#define MASTER_AMPSS_M0 0
+#define MASTER_GPU_TCU 1
+#define MASTER_SYS_TCU 2
+#define MASTER_GEM_NOC_CFG 3
+#define MASTER_COMPUTE_NOC 4
+#define MASTER_GRAPHICS_3D 5
+#define MASTER_MNOC_HF_MEM_NOC 6
+#define MASTER_MNOC_SF_MEM_NOC 7
+#define MASTER_GEM_NOC_PCIE_SNOC 8
+#define MASTER_SNOC_GC_MEM_NOC 9
+#define MASTER_SNOC_SF_MEM_NOC 10
+#define MASTER_ECC 11
+#define SLAVE_MSS_PROC_MS_MPU_CFG 12
+#define SLAVE_ECC 13
+#define SLAVE_GEM_NOC_SNOC 14
+#define SLAVE_LLCC 15
+#define SLAVE_SERVICE_GEM_NOC 16
+#define SLAVE_SERVICE_GEM_NOC_1 17
+
+#define MASTER_IPA_CORE 0
+#define SLAVE_IPA_CORE 1
+
+#define MASTER_LLCC 0
+#define SLAVE_EBI_CH0 1
+
+#define MASTER_CNOC_MNOC_CFG 0
+#define MASTER_CAMNOC_HF0 1
+#define MASTER_CAMNOC_HF1 2
+#define MASTER_CAMNOC_SF 3
+#define MASTER_MDP_PORT0 4
+#define MASTER_MDP_PORT1 5
+#define MASTER_ROTATOR 6
+#define MASTER_VIDEO_P0 7
+#define MASTER_VIDEO_P1 8
+#define MASTER_VIDEO_PROC 9
+#define SLAVE_MNOC_SF_MEM_NOC 10
+#define SLAVE_MNOC_HF_MEM_NOC 11
+#define SLAVE_SERVICE_MNOC 12
+
+#define MASTER_SNOC_CFG 0
+#define A1NOC_SNOC_MAS 1
+#define A2NOC_SNOC_MAS 2
+#define MASTER_GEM_NOC_SNOC 3
+#define MASTER_PIMEM 4
+#define MASTER_GIC 5
+#define SLAVE_APPSS 6
+#define SNOC_CNOC_SLV 7
+#define SLAVE_SNOC_GEM_NOC_GC 8
+#define SLAVE_SNOC_GEM_NOC_SF 9
+#define SLAVE_OCIMEM 10
+#define SLAVE_PIMEM 11
+#define SLAVE_SERVICE_SNOC 12
+#define SLAVE_PCIE_0 13
+#define SLAVE_PCIE_1 14
+#define SLAVE_PCIE_2 15
+#define SLAVE_PCIE_3 16
+#define SLAVE_QDSS_STM 17
+#define SLAVE_TCU 18
+
+#define MASTER_MNOC_HF_MEM_NOC_DISPLAY 0
+#define MASTER_MNOC_SF_MEM_NOC_DISPLAY 1
+#define SLAVE_LLCC_DISPLAY 2
+
+#define MASTER_LLCC_DISPLAY 0
+#define SLAVE_EBI_CH0_DISPLAY 1
+
+#define MASTER_MDP_PORT0_DISPLAY 0
+#define MASTER_MDP_PORT1_DISPLAY 1
+#define MASTER_ROTATOR_DISPLAY 2
+#define SLAVE_MNOC_SF_MEM_NOC_DISPLAY 3
+#define SLAVE_MNOC_HF_MEM_NOC_DISPLAY 4
+
+#endif
diff --git a/include/dt-bindings/power/summit,smb347-charger.h b/include/dt-bindings/power/summit,smb347-charger.h
index d918bf321a71..3205699b5e41 100644
--- a/include/dt-bindings/power/summit,smb347-charger.h
+++ b/include/dt-bindings/power/summit,smb347-charger.h
@@ -16,4 +16,8 @@
#define SMB3XX_CHG_ENABLE_PIN_ACTIVE_LOW 1
#define SMB3XX_CHG_ENABLE_PIN_ACTIVE_HIGH 2
+/* Polarity of INOK signal */
+#define SMB3XX_SYSOK_INOK_ACTIVE_LOW 0
+#define SMB3XX_SYSOK_INOK_ACTIVE_HIGH 1
+
#endif
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 72e4f7fd268c..974d497a897d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -249,7 +249,7 @@ void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
/* the following numa functions are architecture-dependent */
void acpi_numa_slit_init (struct acpi_table_slit *slit);
-#if defined(CONFIG_X86) || defined(CONFIG_IA64)
+#if defined(CONFIG_X86) || defined(CONFIG_IA64) || defined(CONFIG_LOONGARCH)
void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa);
#else
static inline void
@@ -1380,13 +1380,11 @@ static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
#endif
#ifdef CONFIG_ACPI
-extern int acpi_platform_notify(struct device *dev, enum kobject_action action);
+extern void acpi_device_notify(struct device *dev);
+extern void acpi_device_notify_remove(struct device *dev);
#else
-static inline int
-acpi_platform_notify(struct device *dev, enum kobject_action action)
-{
- return 0;
-}
+static inline void acpi_device_notify(struct device *dev) { }
+static inline void acpi_device_notify_remove(struct device *dev) { }
#endif
#endif /*_LINUX_ACPI_H*/
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index ed1d3ffd5b9d..8dd57c3a99e9 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -77,9 +77,8 @@
__ret; \
})
-#include <linux/atomic-arch-fallback.h>
-#include <asm-generic/atomic-instrumented.h>
-
-#include <asm-generic/atomic-long.h>
+#include <linux/atomic/atomic-arch-fallback.h>
+#include <linux/atomic/atomic-long.h>
+#include <linux/atomic/atomic-instrumented.h>
#endif /* _LINUX_ATOMIC_H */
diff --git a/include/linux/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index a3dba31df01e..a3dba31df01e 100644
--- a/include/linux/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
diff --git a/include/asm-generic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index bc45af52c93b..a0f654370da3 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -14,8 +14,8 @@
* arch_ variants (i.e. arch_atomic_read()/arch_atomic_cmpxchg()) to avoid
* double instrumentation.
*/
-#ifndef _ASM_GENERIC_ATOMIC_INSTRUMENTED_H
-#define _ASM_GENERIC_ATOMIC_INSTRUMENTED_H
+#ifndef _LINUX_ATOMIC_INSTRUMENTED_H
+#define _LINUX_ATOMIC_INSTRUMENTED_H
#include <linux/build_bug.h>
#include <linux/compiler.h>
@@ -1177,6 +1177,584 @@ atomic64_dec_if_positive(atomic64_t *v)
return arch_atomic64_dec_if_positive(v);
}
+static __always_inline long
+atomic_long_read(const atomic_long_t *v)
+{
+ instrument_atomic_read(v, sizeof(*v));
+ return arch_atomic_long_read(v);
+}
+
+static __always_inline long
+atomic_long_read_acquire(const atomic_long_t *v)
+{
+ instrument_atomic_read(v, sizeof(*v));
+ return arch_atomic_long_read_acquire(v);
+}
+
+static __always_inline void
+atomic_long_set(atomic_long_t *v, long i)
+{
+ instrument_atomic_write(v, sizeof(*v));
+ arch_atomic_long_set(v, i);
+}
+
+static __always_inline void
+atomic_long_set_release(atomic_long_t *v, long i)
+{
+ instrument_atomic_write(v, sizeof(*v));
+ arch_atomic_long_set_release(v, i);
+}
+
+static __always_inline void
+atomic_long_add(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ arch_atomic_long_add(i, v);
+}
+
+static __always_inline long
+atomic_long_add_return(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_add_return(i, v);
+}
+
+static __always_inline long
+atomic_long_add_return_acquire(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_add_return_acquire(i, v);
+}
+
+static __always_inline long
+atomic_long_add_return_release(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_add_return_release(i, v);
+}
+
+static __always_inline long
+atomic_long_add_return_relaxed(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_add_return_relaxed(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_add(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_add(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_add_acquire(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_add_release(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_add_release(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_add_relaxed(i, v);
+}
+
+static __always_inline void
+atomic_long_sub(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ arch_atomic_long_sub(i, v);
+}
+
+static __always_inline long
+atomic_long_sub_return(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_sub_return(i, v);
+}
+
+static __always_inline long
+atomic_long_sub_return_acquire(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_sub_return_acquire(i, v);
+}
+
+static __always_inline long
+atomic_long_sub_return_release(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_sub_return_release(i, v);
+}
+
+static __always_inline long
+atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_sub_return_relaxed(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_sub(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_sub(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_sub_acquire(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_sub_release(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_sub_release(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_sub_relaxed(i, v);
+}
+
+static __always_inline void
+atomic_long_inc(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ arch_atomic_long_inc(v);
+}
+
+static __always_inline long
+atomic_long_inc_return(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_inc_return(v);
+}
+
+static __always_inline long
+atomic_long_inc_return_acquire(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_inc_return_acquire(v);
+}
+
+static __always_inline long
+atomic_long_inc_return_release(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_inc_return_release(v);
+}
+
+static __always_inline long
+atomic_long_inc_return_relaxed(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_inc_return_relaxed(v);
+}
+
+static __always_inline long
+atomic_long_fetch_inc(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_inc(v);
+}
+
+static __always_inline long
+atomic_long_fetch_inc_acquire(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_inc_acquire(v);
+}
+
+static __always_inline long
+atomic_long_fetch_inc_release(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_inc_release(v);
+}
+
+static __always_inline long
+atomic_long_fetch_inc_relaxed(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_inc_relaxed(v);
+}
+
+static __always_inline void
+atomic_long_dec(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ arch_atomic_long_dec(v);
+}
+
+static __always_inline long
+atomic_long_dec_return(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_dec_return(v);
+}
+
+static __always_inline long
+atomic_long_dec_return_acquire(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_dec_return_acquire(v);
+}
+
+static __always_inline long
+atomic_long_dec_return_release(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_dec_return_release(v);
+}
+
+static __always_inline long
+atomic_long_dec_return_relaxed(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_dec_return_relaxed(v);
+}
+
+static __always_inline long
+atomic_long_fetch_dec(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_dec(v);
+}
+
+static __always_inline long
+atomic_long_fetch_dec_acquire(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_dec_acquire(v);
+}
+
+static __always_inline long
+atomic_long_fetch_dec_release(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_dec_release(v);
+}
+
+static __always_inline long
+atomic_long_fetch_dec_relaxed(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_dec_relaxed(v);
+}
+
+static __always_inline void
+atomic_long_and(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ arch_atomic_long_and(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_and(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_and(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_and_acquire(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_and_release(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_and_release(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_and_relaxed(i, v);
+}
+
+static __always_inline void
+atomic_long_andnot(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ arch_atomic_long_andnot(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_andnot(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_andnot(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_andnot_acquire(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_andnot_release(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_andnot_relaxed(i, v);
+}
+
+static __always_inline void
+atomic_long_or(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ arch_atomic_long_or(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_or(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_or(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_or_acquire(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_or_release(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_or_release(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_or_relaxed(i, v);
+}
+
+static __always_inline void
+atomic_long_xor(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ arch_atomic_long_xor(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_xor(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_xor(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_xor_acquire(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_xor_release(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_xor_release(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_xor_relaxed(i, v);
+}
+
+static __always_inline long
+atomic_long_xchg(atomic_long_t *v, long i)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_xchg(v, i);
+}
+
+static __always_inline long
+atomic_long_xchg_acquire(atomic_long_t *v, long i)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_xchg_acquire(v, i);
+}
+
+static __always_inline long
+atomic_long_xchg_release(atomic_long_t *v, long i)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_xchg_release(v, i);
+}
+
+static __always_inline long
+atomic_long_xchg_relaxed(atomic_long_t *v, long i)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_xchg_relaxed(v, i);
+}
+
+static __always_inline long
+atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_cmpxchg(v, old, new);
+}
+
+static __always_inline long
+atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_cmpxchg_acquire(v, old, new);
+}
+
+static __always_inline long
+atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_cmpxchg_release(v, old, new);
+}
+
+static __always_inline long
+atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_cmpxchg_relaxed(v, old, new);
+}
+
+static __always_inline bool
+atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ instrument_atomic_read_write(old, sizeof(*old));
+ return arch_atomic_long_try_cmpxchg(v, old, new);
+}
+
+static __always_inline bool
+atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ instrument_atomic_read_write(old, sizeof(*old));
+ return arch_atomic_long_try_cmpxchg_acquire(v, old, new);
+}
+
+static __always_inline bool
+atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ instrument_atomic_read_write(old, sizeof(*old));
+ return arch_atomic_long_try_cmpxchg_release(v, old, new);
+}
+
+static __always_inline bool
+atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ instrument_atomic_read_write(old, sizeof(*old));
+ return arch_atomic_long_try_cmpxchg_relaxed(v, old, new);
+}
+
+static __always_inline bool
+atomic_long_sub_and_test(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_sub_and_test(i, v);
+}
+
+static __always_inline bool
+atomic_long_dec_and_test(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_dec_and_test(v);
+}
+
+static __always_inline bool
+atomic_long_inc_and_test(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_inc_and_test(v);
+}
+
+static __always_inline bool
+atomic_long_add_negative(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_add_negative(i, v);
+}
+
+static __always_inline long
+atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_fetch_add_unless(v, a, u);
+}
+
+static __always_inline bool
+atomic_long_add_unless(atomic_long_t *v, long a, long u)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_add_unless(v, a, u);
+}
+
+static __always_inline bool
+atomic_long_inc_not_zero(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_inc_not_zero(v);
+}
+
+static __always_inline bool
+atomic_long_inc_unless_negative(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_inc_unless_negative(v);
+}
+
+static __always_inline bool
+atomic_long_dec_unless_positive(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_dec_unless_positive(v);
+}
+
+static __always_inline long
+atomic_long_dec_if_positive(atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_dec_if_positive(v);
+}
+
#define xchg(ptr, ...) \
({ \
typeof(ptr) __ai_ptr = (ptr); \
@@ -1333,5 +1911,5 @@ atomic64_dec_if_positive(atomic64_t *v)
arch_cmpxchg_double_local(__ai_ptr, __VA_ARGS__); \
})
-#endif /* _ASM_GENERIC_ATOMIC_INSTRUMENTED_H */
-// 1d7c3a25aca5c7fb031c307be4c3d24c7b48fcd5
+#endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
+// 2a9553f0a9d5619f19151092df5cabbbf16ce835
diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h
new file mode 100644
index 000000000000..800b8c35992d
--- /dev/null
+++ b/include/linux/atomic/atomic-long.h
@@ -0,0 +1,1014 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Generated by scripts/atomic/gen-atomic-long.sh
+// DO NOT MODIFY THIS FILE DIRECTLY
+
+#ifndef _LINUX_ATOMIC_LONG_H
+#define _LINUX_ATOMIC_LONG_H
+
+#include <linux/compiler.h>
+#include <asm/types.h>
+
+#ifdef CONFIG_64BIT
+typedef atomic64_t atomic_long_t;
+#define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i)
+#define atomic_long_cond_read_acquire atomic64_cond_read_acquire
+#define atomic_long_cond_read_relaxed atomic64_cond_read_relaxed
+#else
+typedef atomic_t atomic_long_t;
+#define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i)
+#define atomic_long_cond_read_acquire atomic_cond_read_acquire
+#define atomic_long_cond_read_relaxed atomic_cond_read_relaxed
+#endif
+
+#ifdef CONFIG_64BIT
+
+static __always_inline long
+arch_atomic_long_read(const atomic_long_t *v)
+{
+ return arch_atomic64_read(v);
+}
+
+static __always_inline long
+arch_atomic_long_read_acquire(const atomic_long_t *v)
+{
+ return arch_atomic64_read_acquire(v);
+}
+
+static __always_inline void
+arch_atomic_long_set(atomic_long_t *v, long i)
+{
+ arch_atomic64_set(v, i);
+}
+
+static __always_inline void
+arch_atomic_long_set_release(atomic_long_t *v, long i)
+{
+ arch_atomic64_set_release(v, i);
+}
+
+static __always_inline void
+arch_atomic_long_add(long i, atomic_long_t *v)
+{
+ arch_atomic64_add(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_add_return(long i, atomic_long_t *v)
+{
+ return arch_atomic64_add_return(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_add_return_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic64_add_return_acquire(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_add_return_release(long i, atomic_long_t *v)
+{
+ return arch_atomic64_add_return_release(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_add_return_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic64_add_return_relaxed(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_add(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_add(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_add_acquire(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_add_release(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_add_release(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_add_relaxed(i, v);
+}
+
+static __always_inline void
+arch_atomic_long_sub(long i, atomic_long_t *v)
+{
+ arch_atomic64_sub(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_sub_return(long i, atomic_long_t *v)
+{
+ return arch_atomic64_sub_return(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_sub_return_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic64_sub_return_acquire(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_sub_return_release(long i, atomic_long_t *v)
+{
+ return arch_atomic64_sub_return_release(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic64_sub_return_relaxed(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_sub(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_sub(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_sub_acquire(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_sub_release(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_sub_release(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_sub_relaxed(i, v);
+}
+
+static __always_inline void
+arch_atomic_long_inc(atomic_long_t *v)
+{
+ arch_atomic64_inc(v);
+}
+
+static __always_inline long
+arch_atomic_long_inc_return(atomic_long_t *v)
+{
+ return arch_atomic64_inc_return(v);
+}
+
+static __always_inline long
+arch_atomic_long_inc_return_acquire(atomic_long_t *v)
+{
+ return arch_atomic64_inc_return_acquire(v);
+}
+
+static __always_inline long
+arch_atomic_long_inc_return_release(atomic_long_t *v)
+{
+ return arch_atomic64_inc_return_release(v);
+}
+
+static __always_inline long
+arch_atomic_long_inc_return_relaxed(atomic_long_t *v)
+{
+ return arch_atomic64_inc_return_relaxed(v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_inc(atomic_long_t *v)
+{
+ return arch_atomic64_fetch_inc(v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_inc_acquire(atomic_long_t *v)
+{
+ return arch_atomic64_fetch_inc_acquire(v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_inc_release(atomic_long_t *v)
+{
+ return arch_atomic64_fetch_inc_release(v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_inc_relaxed(atomic_long_t *v)
+{
+ return arch_atomic64_fetch_inc_relaxed(v);
+}
+
+static __always_inline void
+arch_atomic_long_dec(atomic_long_t *v)
+{
+ arch_atomic64_dec(v);
+}
+
+static __always_inline long
+arch_atomic_long_dec_return(atomic_long_t *v)
+{
+ return arch_atomic64_dec_return(v);
+}
+
+static __always_inline long
+arch_atomic_long_dec_return_acquire(atomic_long_t *v)
+{
+ return arch_atomic64_dec_return_acquire(v);
+}
+
+static __always_inline long
+arch_atomic_long_dec_return_release(atomic_long_t *v)
+{
+ return arch_atomic64_dec_return_release(v);
+}
+
+static __always_inline long
+arch_atomic_long_dec_return_relaxed(atomic_long_t *v)
+{
+ return arch_atomic64_dec_return_relaxed(v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_dec(atomic_long_t *v)
+{
+ return arch_atomic64_fetch_dec(v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_dec_acquire(atomic_long_t *v)
+{
+ return arch_atomic64_fetch_dec_acquire(v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_dec_release(atomic_long_t *v)
+{
+ return arch_atomic64_fetch_dec_release(v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_dec_relaxed(atomic_long_t *v)
+{
+ return arch_atomic64_fetch_dec_relaxed(v);
+}
+
+static __always_inline void
+arch_atomic_long_and(long i, atomic_long_t *v)
+{
+ arch_atomic64_and(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_and(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_and(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_and_acquire(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_and_release(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_and_release(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_and_relaxed(i, v);
+}
+
+static __always_inline void
+arch_atomic_long_andnot(long i, atomic_long_t *v)
+{
+ arch_atomic64_andnot(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_andnot(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_andnot(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_andnot_acquire(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_andnot_release(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_andnot_relaxed(i, v);
+}
+
+static __always_inline void
+arch_atomic_long_or(long i, atomic_long_t *v)
+{
+ arch_atomic64_or(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_or(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_or(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_or_acquire(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_or_release(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_or_release(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_or_relaxed(i, v);
+}
+
+static __always_inline void
+arch_atomic_long_xor(long i, atomic_long_t *v)
+{
+ arch_atomic64_xor(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_xor(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_xor(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_xor_acquire(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_xor_release(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_xor_release(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic64_fetch_xor_relaxed(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_xchg(atomic_long_t *v, long i)
+{
+ return arch_atomic64_xchg(v, i);
+}
+
+static __always_inline long
+arch_atomic_long_xchg_acquire(atomic_long_t *v, long i)
+{
+ return arch_atomic64_xchg_acquire(v, i);
+}
+
+static __always_inline long
+arch_atomic_long_xchg_release(atomic_long_t *v, long i)
+{
+ return arch_atomic64_xchg_release(v, i);
+}
+
+static __always_inline long
+arch_atomic_long_xchg_relaxed(atomic_long_t *v, long i)
+{
+ return arch_atomic64_xchg_relaxed(v, i);
+}
+
+static __always_inline long
+arch_atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
+{
+ return arch_atomic64_cmpxchg(v, old, new);
+}
+
+static __always_inline long
+arch_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
+{
+ return arch_atomic64_cmpxchg_acquire(v, old, new);
+}
+
+static __always_inline long
+arch_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
+{
+ return arch_atomic64_cmpxchg_release(v, old, new);
+}
+
+static __always_inline long
+arch_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
+{
+ return arch_atomic64_cmpxchg_relaxed(v, old, new);
+}
+
+static __always_inline bool
+arch_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
+{
+ return arch_atomic64_try_cmpxchg(v, (s64 *)old, new);
+}
+
+static __always_inline bool
+arch_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
+{
+ return arch_atomic64_try_cmpxchg_acquire(v, (s64 *)old, new);
+}
+
+static __always_inline bool
+arch_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
+{
+ return arch_atomic64_try_cmpxchg_release(v, (s64 *)old, new);
+}
+
+static __always_inline bool
+arch_atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
+{
+ return arch_atomic64_try_cmpxchg_relaxed(v, (s64 *)old, new);
+}
+
+static __always_inline bool
+arch_atomic_long_sub_and_test(long i, atomic_long_t *v)
+{
+ return arch_atomic64_sub_and_test(i, v);
+}
+
+static __always_inline bool
+arch_atomic_long_dec_and_test(atomic_long_t *v)
+{
+ return arch_atomic64_dec_and_test(v);
+}
+
+static __always_inline bool
+arch_atomic_long_inc_and_test(atomic_long_t *v)
+{
+ return arch_atomic64_inc_and_test(v);
+}
+
+static __always_inline bool
+arch_atomic_long_add_negative(long i, atomic_long_t *v)
+{
+ return arch_atomic64_add_negative(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
+{
+ return arch_atomic64_fetch_add_unless(v, a, u);
+}
+
+static __always_inline bool
+arch_atomic_long_add_unless(atomic_long_t *v, long a, long u)
+{
+ return arch_atomic64_add_unless(v, a, u);
+}
+
+static __always_inline bool
+arch_atomic_long_inc_not_zero(atomic_long_t *v)
+{
+ return arch_atomic64_inc_not_zero(v);
+}
+
+static __always_inline bool
+arch_atomic_long_inc_unless_negative(atomic_long_t *v)
+{
+ return arch_atomic64_inc_unless_negative(v);
+}
+
+static __always_inline bool
+arch_atomic_long_dec_unless_positive(atomic_long_t *v)
+{
+ return arch_atomic64_dec_unless_positive(v);
+}
+
+static __always_inline long
+arch_atomic_long_dec_if_positive(atomic_long_t *v)
+{
+ return arch_atomic64_dec_if_positive(v);
+}
+
+#else /* CONFIG_64BIT */
+
+static __always_inline long
+arch_atomic_long_read(const atomic_long_t *v)
+{
+ return arch_atomic_read(v);
+}
+
+static __always_inline long
+arch_atomic_long_read_acquire(const atomic_long_t *v)
+{
+ return arch_atomic_read_acquire(v);
+}
+
+static __always_inline void
+arch_atomic_long_set(atomic_long_t *v, long i)
+{
+ arch_atomic_set(v, i);
+}
+
+static __always_inline void
+arch_atomic_long_set_release(atomic_long_t *v, long i)
+{
+ arch_atomic_set_release(v, i);
+}
+
+static __always_inline void
+arch_atomic_long_add(long i, atomic_long_t *v)
+{
+ arch_atomic_add(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_add_return(long i, atomic_long_t *v)
+{
+ return arch_atomic_add_return(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_add_return_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic_add_return_acquire(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_add_return_release(long i, atomic_long_t *v)
+{
+ return arch_atomic_add_return_release(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_add_return_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic_add_return_relaxed(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_add(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_add(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_add_acquire(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_add_release(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_add_release(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_add_relaxed(i, v);
+}
+
+static __always_inline void
+arch_atomic_long_sub(long i, atomic_long_t *v)
+{
+ arch_atomic_sub(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_sub_return(long i, atomic_long_t *v)
+{
+ return arch_atomic_sub_return(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_sub_return_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic_sub_return_acquire(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_sub_return_release(long i, atomic_long_t *v)
+{
+ return arch_atomic_sub_return_release(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic_sub_return_relaxed(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_sub(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_sub(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_sub_acquire(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_sub_release(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_sub_release(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_sub_relaxed(i, v);
+}
+
+static __always_inline void
+arch_atomic_long_inc(atomic_long_t *v)
+{
+ arch_atomic_inc(v);
+}
+
+static __always_inline long
+arch_atomic_long_inc_return(atomic_long_t *v)
+{
+ return arch_atomic_inc_return(v);
+}
+
+static __always_inline long
+arch_atomic_long_inc_return_acquire(atomic_long_t *v)
+{
+ return arch_atomic_inc_return_acquire(v);
+}
+
+static __always_inline long
+arch_atomic_long_inc_return_release(atomic_long_t *v)
+{
+ return arch_atomic_inc_return_release(v);
+}
+
+static __always_inline long
+arch_atomic_long_inc_return_relaxed(atomic_long_t *v)
+{
+ return arch_atomic_inc_return_relaxed(v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_inc(atomic_long_t *v)
+{
+ return arch_atomic_fetch_inc(v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_inc_acquire(atomic_long_t *v)
+{
+ return arch_atomic_fetch_inc_acquire(v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_inc_release(atomic_long_t *v)
+{
+ return arch_atomic_fetch_inc_release(v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_inc_relaxed(atomic_long_t *v)
+{
+ return arch_atomic_fetch_inc_relaxed(v);
+}
+
+static __always_inline void
+arch_atomic_long_dec(atomic_long_t *v)
+{
+ arch_atomic_dec(v);
+}
+
+static __always_inline long
+arch_atomic_long_dec_return(atomic_long_t *v)
+{
+ return arch_atomic_dec_return(v);
+}
+
+static __always_inline long
+arch_atomic_long_dec_return_acquire(atomic_long_t *v)
+{
+ return arch_atomic_dec_return_acquire(v);
+}
+
+static __always_inline long
+arch_atomic_long_dec_return_release(atomic_long_t *v)
+{
+ return arch_atomic_dec_return_release(v);
+}
+
+static __always_inline long
+arch_atomic_long_dec_return_relaxed(atomic_long_t *v)
+{
+ return arch_atomic_dec_return_relaxed(v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_dec(atomic_long_t *v)
+{
+ return arch_atomic_fetch_dec(v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_dec_acquire(atomic_long_t *v)
+{
+ return arch_atomic_fetch_dec_acquire(v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_dec_release(atomic_long_t *v)
+{
+ return arch_atomic_fetch_dec_release(v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_dec_relaxed(atomic_long_t *v)
+{
+ return arch_atomic_fetch_dec_relaxed(v);
+}
+
+static __always_inline void
+arch_atomic_long_and(long i, atomic_long_t *v)
+{
+ arch_atomic_and(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_and(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_and(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_and_acquire(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_and_release(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_and_release(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_and_relaxed(i, v);
+}
+
+static __always_inline void
+arch_atomic_long_andnot(long i, atomic_long_t *v)
+{
+ arch_atomic_andnot(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_andnot(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_andnot(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_andnot_acquire(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_andnot_release(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_andnot_relaxed(i, v);
+}
+
+static __always_inline void
+arch_atomic_long_or(long i, atomic_long_t *v)
+{
+ arch_atomic_or(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_or(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_or(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_or_acquire(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_or_release(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_or_release(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_or_relaxed(i, v);
+}
+
+static __always_inline void
+arch_atomic_long_xor(long i, atomic_long_t *v)
+{
+ arch_atomic_xor(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_xor(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_xor(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_xor_acquire(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_xor_release(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_xor_release(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic_fetch_xor_relaxed(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_xchg(atomic_long_t *v, long i)
+{
+ return arch_atomic_xchg(v, i);
+}
+
+static __always_inline long
+arch_atomic_long_xchg_acquire(atomic_long_t *v, long i)
+{
+ return arch_atomic_xchg_acquire(v, i);
+}
+
+static __always_inline long
+arch_atomic_long_xchg_release(atomic_long_t *v, long i)
+{
+ return arch_atomic_xchg_release(v, i);
+}
+
+static __always_inline long
+arch_atomic_long_xchg_relaxed(atomic_long_t *v, long i)
+{
+ return arch_atomic_xchg_relaxed(v, i);
+}
+
+static __always_inline long
+arch_atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
+{
+ return arch_atomic_cmpxchg(v, old, new);
+}
+
+static __always_inline long
+arch_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
+{
+ return arch_atomic_cmpxchg_acquire(v, old, new);
+}
+
+static __always_inline long
+arch_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
+{
+ return arch_atomic_cmpxchg_release(v, old, new);
+}
+
+static __always_inline long
+arch_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
+{
+ return arch_atomic_cmpxchg_relaxed(v, old, new);
+}
+
+static __always_inline bool
+arch_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
+{
+ return arch_atomic_try_cmpxchg(v, (int *)old, new);
+}
+
+static __always_inline bool
+arch_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
+{
+ return arch_atomic_try_cmpxchg_acquire(v, (int *)old, new);
+}
+
+static __always_inline bool
+arch_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
+{
+ return arch_atomic_try_cmpxchg_release(v, (int *)old, new);
+}
+
+static __always_inline bool
+arch_atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
+{
+ return arch_atomic_try_cmpxchg_relaxed(v, (int *)old, new);
+}
+
+static __always_inline bool
+arch_atomic_long_sub_and_test(long i, atomic_long_t *v)
+{
+ return arch_atomic_sub_and_test(i, v);
+}
+
+static __always_inline bool
+arch_atomic_long_dec_and_test(atomic_long_t *v)
+{
+ return arch_atomic_dec_and_test(v);
+}
+
+static __always_inline bool
+arch_atomic_long_inc_and_test(atomic_long_t *v)
+{
+ return arch_atomic_inc_and_test(v);
+}
+
+static __always_inline bool
+arch_atomic_long_add_negative(long i, atomic_long_t *v)
+{
+ return arch_atomic_add_negative(i, v);
+}
+
+static __always_inline long
+arch_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
+{
+ return arch_atomic_fetch_add_unless(v, a, u);
+}
+
+static __always_inline bool
+arch_atomic_long_add_unless(atomic_long_t *v, long a, long u)
+{
+ return arch_atomic_add_unless(v, a, u);
+}
+
+static __always_inline bool
+arch_atomic_long_inc_not_zero(atomic_long_t *v)
+{
+ return arch_atomic_inc_not_zero(v);
+}
+
+static __always_inline bool
+arch_atomic_long_inc_unless_negative(atomic_long_t *v)
+{
+ return arch_atomic_inc_unless_negative(v);
+}
+
+static __always_inline bool
+arch_atomic_long_dec_unless_positive(atomic_long_t *v)
+{
+ return arch_atomic_dec_unless_positive(v);
+}
+
+static __always_inline long
+arch_atomic_long_dec_if_positive(atomic_long_t *v)
+{
+ return arch_atomic_dec_if_positive(v);
+}
+
+#endif /* CONFIG_64BIT */
+#endif /* _LINUX_ATOMIC_LONG_H */
+// e8f0e08ff072b74d180eabe2ad001282b38c2c88
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 44df4fcef65c..29530859d9ff 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -143,7 +143,7 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
sb = inode->i_sb;
#ifdef CONFIG_BLOCK
if (sb_is_blkdev_sb(sb))
- return I_BDEV(inode)->bd_bdi;
+ return I_BDEV(inode)->bd_disk->bdi;
#endif
return sb->s_bdi;
}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 2203b686e1f0..00952e92eae1 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -5,7 +5,6 @@
#ifndef __LINUX_BIO_H
#define __LINUX_BIO_H
-#include <linux/highmem.h>
#include <linux/mempool.h>
#include <linux/ioprio.h>
/* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
@@ -375,7 +374,7 @@ static inline void bip_set_seed(struct bio_integrity_payload *bip,
#endif /* CONFIG_BLK_DEV_INTEGRITY */
-extern void bio_trim(struct bio *bio, int offset, int size);
+void bio_trim(struct bio *bio, sector_t offset, sector_t size);
extern struct bio *bio_split(struct bio *bio, int sectors,
gfp_t gfp, struct bio_set *bs);
@@ -401,6 +400,7 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
enum {
BIOSET_NEED_BVECS = BIT(0),
BIOSET_NEED_RESCUER = BIT(1),
+ BIOSET_PERCPU_CACHE = BIT(2),
};
extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags);
extern void bioset_exit(struct bio_set *);
@@ -409,6 +409,8 @@ extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src);
struct bio *bio_alloc_bioset(gfp_t gfp, unsigned short nr_iovecs,
struct bio_set *bs);
+struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
+ struct bio_set *bs);
struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs);
extern void bio_put(struct bio *);
@@ -519,47 +521,6 @@ static inline void bio_clone_blkg_association(struct bio *dst,
struct bio *src) { }
#endif /* CONFIG_BLK_CGROUP */
-#ifdef CONFIG_HIGHMEM
-/*
- * remember never ever reenable interrupts between a bvec_kmap_irq and
- * bvec_kunmap_irq!
- */
-static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags)
-{
- unsigned long addr;
-
- /*
- * might not be a highmem page, but the preempt/irq count
- * balancing is a lot nicer this way
- */
- local_irq_save(*flags);
- addr = (unsigned long) kmap_atomic(bvec->bv_page);
-
- BUG_ON(addr & ~PAGE_MASK);
-
- return (char *) addr + bvec->bv_offset;
-}
-
-static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
-{
- unsigned long ptr = (unsigned long) buffer & PAGE_MASK;
-
- kunmap_atomic((void *) ptr);
- local_irq_restore(*flags);
-}
-
-#else
-static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags)
-{
- return page_address(bvec->bv_page) + bvec->bv_offset;
-}
-
-static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
-{
- *flags = 0;
-}
-#endif
-
/*
* BIO list management for use by remapping drivers (e.g. DM or MD) and loop.
*
@@ -699,6 +660,11 @@ struct bio_set {
struct kmem_cache *bio_slab;
unsigned int front_pad;
+ /*
+ * per-cpu bio alloc cache
+ */
+ struct bio_alloc_cache __percpu *cache;
+
mempool_t bio_pool;
mempool_t bvec_pool;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
@@ -715,6 +681,11 @@ struct bio_set {
struct bio_list rescue_list;
struct work_struct rescue_work;
struct workqueue_struct *rescue_workqueue;
+
+ /*
+ * Hot un-plug notifier for the per-cpu cache, if used
+ */
+ struct hlist_node cpuhp_dead;
};
static inline bool bioset_initialized(struct bio_set *bs)
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 26bf15e6cd35..5e62e2383b7f 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -4,6 +4,7 @@
#include <asm/types.h>
#include <linux/bits.h>
+#include <linux/typecheck.h>
#include <uapi/linux/kernel.h>
@@ -253,6 +254,55 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr,
__clear_bit(nr, addr);
}
+/**
+ * __ptr_set_bit - Set bit in a pointer's value
+ * @nr: the bit to set
+ * @addr: the address of the pointer variable
+ *
+ * Example:
+ * void *p = foo();
+ * __ptr_set_bit(bit, &p);
+ */
+#define __ptr_set_bit(nr, addr) \
+ ({ \
+ typecheck_pointer(*(addr)); \
+ __set_bit(nr, (unsigned long *)(addr)); \
+ })
+
+/**
+ * __ptr_clear_bit - Clear bit in a pointer's value
+ * @nr: the bit to clear
+ * @addr: the address of the pointer variable
+ *
+ * Example:
+ * void *p = foo();
+ * __ptr_clear_bit(bit, &p);
+ */
+#define __ptr_clear_bit(nr, addr) \
+ ({ \
+ typecheck_pointer(*(addr)); \
+ __clear_bit(nr, (unsigned long *)(addr)); \
+ })
+
+/**
+ * __ptr_test_bit - Test bit in a pointer's value
+ * @nr: the bit to test
+ * @addr: the address of the pointer variable
+ *
+ * Example:
+ * void *p = foo();
+ * if (__ptr_test_bit(bit, &p)) {
+ * ...
+ * } else {
+ * ...
+ * }
+ */
+#define __ptr_test_bit(nr, addr) \
+ ({ \
+ typecheck_pointer(*(addr)); \
+ test_bit(nr, (unsigned long *)(addr)); \
+ })
+
#ifdef __KERNEL__
#ifndef set_mask_bits
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 37048438872c..b4de2010fba5 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -152,8 +152,8 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
-typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf,
- size_t size);
+typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
+ struct seq_file *s);
struct blkcg_policy {
int plid;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 1d18447ebebc..13ba1861e688 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -404,7 +404,13 @@ enum {
BLK_MQ_F_STACKING = 1 << 2,
BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3,
BLK_MQ_F_BLOCKING = 1 << 5,
+ /* Do not allow an I/O scheduler to be configured. */
BLK_MQ_F_NO_SCHED = 1 << 6,
+ /*
+ * Select 'none' during queue registration in case of a single hwq
+ * or shared hwqs instead of 'mq-deadline'.
+ */
+ BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 7,
BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
BLK_MQ_F_ALLOC_POLICY_BITS = 1,
@@ -426,18 +432,14 @@ enum {
((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
+struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
+ struct lock_class_key *lkclass);
#define blk_mq_alloc_disk(set, queuedata) \
({ \
static struct lock_class_key __key; \
- struct gendisk *__disk = __blk_mq_alloc_disk(set, queuedata); \
\
- if (!IS_ERR(__disk)) \
- lockdep_init_map(&__disk->lockdep_map, \
- "(bio completion)", &__key, 0); \
- __disk; \
+ __blk_mq_alloc_disk(set, queuedata, &__key); \
})
-struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set,
- void *queuedata);
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
struct request_queue *q);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 290f9061b29a..be622b5a21ed 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -34,14 +34,10 @@ struct block_device {
void * bd_holder;
int bd_holders;
bool bd_write_holder;
-#ifdef CONFIG_SYSFS
- struct list_head bd_holder_disks;
-#endif
struct kobject *bd_holder_dir;
u8 bd_partno;
spinlock_t bd_size_lock; /* for bd_inode->i_size updates */
struct gendisk * bd_disk;
- struct backing_dev_info *bd_bdi;
/* The counter of freeze processes */
int bd_fsfreeze_count;
@@ -281,6 +277,7 @@ struct bio {
};
#define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs)
+#define BIO_MAX_SECTORS (UINT_MAX >> SECTOR_SHIFT)
/*
* bio flags
@@ -301,6 +298,7 @@ enum {
BIO_TRACKED, /* set if bio goes through the rq_qos path */
BIO_REMAPPED,
BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */
+ BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */
BIO_FLAG_LAST
};
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d3afea47ade6..c9cb12483e12 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -11,7 +11,6 @@
#include <linux/minmax.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
-#include <linux/backing-dev-defs.h>
#include <linux/wait.h>
#include <linux/mempool.h>
#include <linux/pfn.h>
@@ -398,8 +397,6 @@ struct request_queue {
struct blk_mq_hw_ctx **queue_hw_ctx;
unsigned int nr_hw_queues;
- struct backing_dev_info *backing_dev_info;
-
/*
* The queue owner gets to use this for whatever they like.
* ll_rw_blk doesn't touch it.
@@ -424,6 +421,8 @@ struct request_queue {
spinlock_t queue_lock;
+ struct gendisk *disk;
+
/*
* queue kobject
*/
@@ -664,8 +663,6 @@ extern void blk_clear_pm_only(struct request_queue *q);
dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \
(dir), (attrs))
-#define queue_to_disk(q) (dev_to_disk(kobj_to_dev((q)->kobj.parent)))
-
static inline bool queue_is_mq(struct request_queue *q)
{
return q->mq_ops;
@@ -941,6 +938,10 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
#define SECTOR_SIZE (1 << SECTOR_SHIFT)
#endif
+#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
+#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
+#define SECTOR_MASK (PAGE_SECTORS - 1)
+
/*
* blk_rq_pos() : the current sector
* blk_rq_bytes() : bytes left in the entire request
@@ -1139,7 +1140,7 @@ void blk_queue_zone_write_granularity(struct request_queue *q,
unsigned int size);
extern void blk_queue_alignment_offset(struct request_queue *q,
unsigned int alignment);
-void blk_queue_update_readahead(struct request_queue *q);
+void disk_update_readahead(struct gendisk *disk);
extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
@@ -1521,6 +1522,22 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector
return offset << SECTOR_SHIFT;
}
+/*
+ * Two cases of handling DISCARD merge:
+ * If max_discard_segments > 1, the driver takes every bio
+ * as a range and send them to controller together. The ranges
+ * needn't to be contiguous.
+ * Otherwise, the bios/requests will be handled as same as
+ * others which should be contiguous.
+ */
+static inline bool blk_discard_mergable(struct request *req)
+{
+ if (req_op(req) == REQ_OP_DISCARD &&
+ queue_max_discard_segments(req->q) > 1)
+ return true;
+ return false;
+}
+
static inline int bdev_discard_alignment(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
@@ -1855,6 +1872,13 @@ struct block_device_operations {
char *(*devnode)(struct gendisk *disk, umode_t *mode);
struct module *owner;
const struct pr_ops *pr_ops;
+
+ /*
+ * Special callback for probing GPT entry at a given sector.
+ * Needed by Android devices, used by GPT scanner and MMC blk
+ * driver.
+ */
+ int (*alternative_gpt_sector)(struct gendisk *disk, sector_t *sector);
};
#ifdef CONFIG_COMPAT
@@ -1984,8 +2008,6 @@ void blkdev_put_no_open(struct block_device *bdev);
struct block_device *bdev_alloc(struct gendisk *disk, u8 partno);
void bdev_add(struct block_device *bdev, dev_t dev);
struct block_device *I_BDEV(struct inode *inode);
-struct block_device *bdgrab(struct block_device *bdev);
-void bdput(struct block_device *);
int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart,
loff_t lend);
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 8b77d08d4b47..2746fd804216 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -23,22 +23,73 @@ struct ctl_table_header;
struct task_struct;
#ifdef CONFIG_CGROUP_BPF
+enum cgroup_bpf_attach_type {
+ CGROUP_BPF_ATTACH_TYPE_INVALID = -1,
+ CGROUP_INET_INGRESS = 0,
+ CGROUP_INET_EGRESS,
+ CGROUP_INET_SOCK_CREATE,
+ CGROUP_SOCK_OPS,
+ CGROUP_DEVICE,
+ CGROUP_INET4_BIND,
+ CGROUP_INET6_BIND,
+ CGROUP_INET4_CONNECT,
+ CGROUP_INET6_CONNECT,
+ CGROUP_INET4_POST_BIND,
+ CGROUP_INET6_POST_BIND,
+ CGROUP_UDP4_SENDMSG,
+ CGROUP_UDP6_SENDMSG,
+ CGROUP_SYSCTL,
+ CGROUP_UDP4_RECVMSG,
+ CGROUP_UDP6_RECVMSG,
+ CGROUP_GETSOCKOPT,
+ CGROUP_SETSOCKOPT,
+ CGROUP_INET4_GETPEERNAME,
+ CGROUP_INET6_GETPEERNAME,
+ CGROUP_INET4_GETSOCKNAME,
+ CGROUP_INET6_GETSOCKNAME,
+ CGROUP_INET_SOCK_RELEASE,
+ MAX_CGROUP_BPF_ATTACH_TYPE
+};
-extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
-#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
+#define CGROUP_ATYPE(type) \
+ case BPF_##type: return type
-#define BPF_CGROUP_STORAGE_NEST_MAX 8
+static inline enum cgroup_bpf_attach_type
+to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type)
+{
+ switch (attach_type) {
+ CGROUP_ATYPE(CGROUP_INET_INGRESS);
+ CGROUP_ATYPE(CGROUP_INET_EGRESS);
+ CGROUP_ATYPE(CGROUP_INET_SOCK_CREATE);
+ CGROUP_ATYPE(CGROUP_SOCK_OPS);
+ CGROUP_ATYPE(CGROUP_DEVICE);
+ CGROUP_ATYPE(CGROUP_INET4_BIND);
+ CGROUP_ATYPE(CGROUP_INET6_BIND);
+ CGROUP_ATYPE(CGROUP_INET4_CONNECT);
+ CGROUP_ATYPE(CGROUP_INET6_CONNECT);
+ CGROUP_ATYPE(CGROUP_INET4_POST_BIND);
+ CGROUP_ATYPE(CGROUP_INET6_POST_BIND);
+ CGROUP_ATYPE(CGROUP_UDP4_SENDMSG);
+ CGROUP_ATYPE(CGROUP_UDP6_SENDMSG);
+ CGROUP_ATYPE(CGROUP_SYSCTL);
+ CGROUP_ATYPE(CGROUP_UDP4_RECVMSG);
+ CGROUP_ATYPE(CGROUP_UDP6_RECVMSG);
+ CGROUP_ATYPE(CGROUP_GETSOCKOPT);
+ CGROUP_ATYPE(CGROUP_SETSOCKOPT);
+ CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME);
+ CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME);
+ CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME);
+ CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME);
+ CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE);
+ default:
+ return CGROUP_BPF_ATTACH_TYPE_INVALID;
+ }
+}
-struct bpf_cgroup_storage_info {
- struct task_struct *task;
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
-};
+#undef CGROUP_ATYPE
-/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
- * to use bpf cgroup storage simultaneously.
- */
-DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
- bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
+extern struct static_key_false cgroup_bpf_enabled_key[MAX_CGROUP_BPF_ATTACH_TYPE];
+#define cgroup_bpf_enabled(atype) static_branch_unlikely(&cgroup_bpf_enabled_key[atype])
#define for_each_cgroup_storage_type(stype) \
for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
@@ -80,15 +131,15 @@ struct bpf_prog_array;
struct cgroup_bpf {
/* array of effective progs in this cgroup */
- struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE];
+ struct bpf_prog_array __rcu *effective[MAX_CGROUP_BPF_ATTACH_TYPE];
/* attached progs to this cgroup and attach flags
* when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
* have either zero or one element
* when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
*/
- struct list_head progs[MAX_BPF_ATTACH_TYPE];
- u32 flags[MAX_BPF_ATTACH_TYPE];
+ struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
+ u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
/* list of cgroup shared storages */
struct list_head storages;
@@ -128,28 +179,28 @@ int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
int __cgroup_bpf_run_filter_skb(struct sock *sk,
struct sk_buff *skb,
- enum bpf_attach_type type);
+ enum cgroup_bpf_attach_type atype);
int __cgroup_bpf_run_filter_sk(struct sock *sk,
- enum bpf_attach_type type);
+ enum cgroup_bpf_attach_type atype);
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
- enum bpf_attach_type type,
+ enum cgroup_bpf_attach_type atype,
void *t_ctx,
u32 *flags);
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct bpf_sock_ops_kern *sock_ops,
- enum bpf_attach_type type);
+ enum cgroup_bpf_attach_type atype);
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
- short access, enum bpf_attach_type type);
+ short access, enum cgroup_bpf_attach_type atype);
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
struct ctl_table *table, int write,
char **buf, size_t *pcount, loff_t *ppos,
- enum bpf_attach_type type);
+ enum cgroup_bpf_attach_type atype);
int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
int *optname, char __user *optval,
@@ -172,44 +223,6 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
return BPF_CGROUP_STORAGE_SHARED;
}
-static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
- *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
-{
- enum bpf_cgroup_storage_type stype;
- int i, err = 0;
-
- preempt_disable();
- for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
- if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
- continue;
-
- this_cpu_write(bpf_cgroup_storage_info[i].task, current);
- for_each_cgroup_storage_type(stype)
- this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
- storage[stype]);
- goto out;
- }
- err = -EBUSY;
- WARN_ON_ONCE(1);
-
-out:
- preempt_enable();
- return err;
-}
-
-static inline void bpf_cgroup_storage_unset(void)
-{
- int i;
-
- for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
- if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
- continue;
-
- this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
- return;
- }
-}
-
struct bpf_cgroup_storage *
cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
void *key, bool locked);
@@ -230,9 +243,9 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(BPF_CGROUP_INET_INGRESS)) \
+ if (cgroup_bpf_enabled(CGROUP_INET_INGRESS)) \
__ret = __cgroup_bpf_run_filter_skb(sk, skb, \
- BPF_CGROUP_INET_INGRESS); \
+ CGROUP_INET_INGRESS); \
\
__ret; \
})
@@ -240,54 +253,54 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(BPF_CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
+ if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
typeof(sk) __sk = sk_to_full_sk(sk); \
if (sk_fullsock(__sk)) \
__ret = __cgroup_bpf_run_filter_skb(__sk, skb, \
- BPF_CGROUP_INET_EGRESS); \
+ CGROUP_INET_EGRESS); \
} \
__ret; \
})
-#define BPF_CGROUP_RUN_SK_PROG(sk, type) \
+#define BPF_CGROUP_RUN_SK_PROG(sk, atype) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(type)) { \
- __ret = __cgroup_bpf_run_filter_sk(sk, type); \
+ if (cgroup_bpf_enabled(atype)) { \
+ __ret = __cgroup_bpf_run_filter_sk(sk, atype); \
} \
__ret; \
})
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
- BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
+ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET_SOCK_CREATE)
#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) \
- BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_RELEASE)
+ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET_SOCK_RELEASE)
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \
- BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
+ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET4_POST_BIND)
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \
- BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND)
+ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND)
-#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \
({ \
u32 __unused_flags; \
int __ret = 0; \
- if (cgroup_bpf_enabled(type)) \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
+ if (cgroup_bpf_enabled(atype)) \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
NULL, \
&__unused_flags); \
__ret; \
})
-#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \
({ \
u32 __unused_flags; \
int __ret = 0; \
- if (cgroup_bpf_enabled(type)) { \
+ if (cgroup_bpf_enabled(atype)) { \
lock_sock(sk); \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
t_ctx, \
&__unused_flags); \
release_sock(sk); \
@@ -300,13 +313,13 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
* (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
* should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
*/
-#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, bind_flags) \
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags) \
({ \
u32 __flags = 0; \
int __ret = 0; \
- if (cgroup_bpf_enabled(type)) { \
+ if (cgroup_bpf_enabled(atype)) { \
lock_sock(sk); \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
NULL, &__flags); \
release_sock(sk); \
if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \
@@ -316,33 +329,33 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
})
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) \
- ((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) || \
- cgroup_bpf_enabled(BPF_CGROUP_INET6_CONNECT)) && \
+ ((cgroup_bpf_enabled(CGROUP_INET4_CONNECT) || \
+ cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \
(sk)->sk_prot->pre_connect)
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT)
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT)
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT, NULL)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL)
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT, NULL)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL)
#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_SENDMSG, t_ctx)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx)
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_SENDMSG, t_ctx)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx)
#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_RECVMSG, NULL)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL)
#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL)
/* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
* fullsock and its parent fullsock cannot be traced by
@@ -362,33 +375,33 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS)) \
+ if (cgroup_bpf_enabled(CGROUP_SOCK_OPS)) \
__ret = __cgroup_bpf_run_filter_sock_ops(sk, \
sock_ops, \
- BPF_CGROUP_SOCK_OPS); \
+ CGROUP_SOCK_OPS); \
__ret; \
})
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS) && (sock_ops)->sk) { \
+ if (cgroup_bpf_enabled(CGROUP_SOCK_OPS) && (sock_ops)->sk) { \
typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \
if (__sk && sk_fullsock(__sk)) \
__ret = __cgroup_bpf_run_filter_sock_ops(__sk, \
sock_ops, \
- BPF_CGROUP_SOCK_OPS); \
+ CGROUP_SOCK_OPS); \
} \
__ret; \
})
-#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \
+#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(BPF_CGROUP_DEVICE)) \
- __ret = __cgroup_bpf_check_dev_permission(type, major, minor, \
+ if (cgroup_bpf_enabled(CGROUP_DEVICE)) \
+ __ret = __cgroup_bpf_check_dev_permission(atype, major, minor, \
access, \
- BPF_CGROUP_DEVICE); \
+ CGROUP_DEVICE); \
\
__ret; \
})
@@ -397,10 +410,10 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(BPF_CGROUP_SYSCTL)) \
+ if (cgroup_bpf_enabled(CGROUP_SYSCTL)) \
__ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
buf, count, pos, \
- BPF_CGROUP_SYSCTL); \
+ CGROUP_SYSCTL); \
__ret; \
})
@@ -408,7 +421,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
kernel_optval) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(BPF_CGROUP_SETSOCKOPT)) \
+ if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT)) \
__ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \
optname, optval, \
optlen, \
@@ -419,7 +432,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
+ if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \
get_user(__ret, optlen); \
__ret; \
})
@@ -428,7 +441,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
max_optlen, retval) \
({ \
int __ret = retval; \
- if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
+ if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \
if (!(sock)->sk_prot->bpf_bypass_getsockopt || \
!INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \
tcp_bpf_bypass_getsockopt, \
@@ -443,7 +456,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
optlen, retval) \
({ \
int __ret = retval; \
- if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
+ if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \
__ret = __cgroup_bpf_run_filter_getsockopt_kern( \
sock, level, optname, optval, optlen, retval); \
__ret; \
@@ -487,9 +500,6 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
return -EINVAL;
}
-static inline int bpf_cgroup_storage_set(
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
-static inline void bpf_cgroup_storage_unset(void) {}
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
struct bpf_map *map) { return 0; }
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
@@ -505,14 +515,14 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
return 0;
}
-#define cgroup_bpf_enabled(type) (0)
-#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) ({ 0; })
+#define cgroup_bpf_enabled(atype) (0)
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; })
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
@@ -524,7 +534,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e8e2b0393ca9..f4c16f19f83e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -168,6 +168,7 @@ struct bpf_map {
u32 max_entries;
u32 map_flags;
int spin_lock_off; /* >=0 valid offset, <0 error */
+ int timer_off; /* >=0 valid offset, <0 error */
u32 id;
int numa_node;
u32 btf_key_type_id;
@@ -197,30 +198,53 @@ static inline bool map_value_has_spin_lock(const struct bpf_map *map)
return map->spin_lock_off >= 0;
}
-static inline void check_and_init_map_lock(struct bpf_map *map, void *dst)
+static inline bool map_value_has_timer(const struct bpf_map *map)
{
- if (likely(!map_value_has_spin_lock(map)))
- return;
- *(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
- (struct bpf_spin_lock){};
+ return map->timer_off >= 0;
}
-/* copy everything but bpf_spin_lock */
+static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
+{
+ if (unlikely(map_value_has_spin_lock(map)))
+ *(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
+ (struct bpf_spin_lock){};
+ if (unlikely(map_value_has_timer(map)))
+ *(struct bpf_timer *)(dst + map->timer_off) =
+ (struct bpf_timer){};
+}
+
+/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
{
+ u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0;
+
if (unlikely(map_value_has_spin_lock(map))) {
- u32 off = map->spin_lock_off;
+ s_off = map->spin_lock_off;
+ s_sz = sizeof(struct bpf_spin_lock);
+ } else if (unlikely(map_value_has_timer(map))) {
+ t_off = map->timer_off;
+ t_sz = sizeof(struct bpf_timer);
+ }
- memcpy(dst, src, off);
- memcpy(dst + off + sizeof(struct bpf_spin_lock),
- src + off + sizeof(struct bpf_spin_lock),
- map->value_size - off - sizeof(struct bpf_spin_lock));
+ if (unlikely(s_sz || t_sz)) {
+ if (s_off < t_off || !s_sz) {
+ swap(s_off, t_off);
+ swap(s_sz, t_sz);
+ }
+ memcpy(dst, src, t_off);
+ memcpy(dst + t_off + t_sz,
+ src + t_off + t_sz,
+ s_off - t_off - t_sz);
+ memcpy(dst + s_off + s_sz,
+ src + s_off + s_sz,
+ map->value_size - s_off - s_sz);
} else {
memcpy(dst, src, map->value_size);
}
}
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
bool lock_src);
+void bpf_timer_cancel_and_free(void *timer);
int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
struct bpf_offload_dev;
@@ -314,6 +338,7 @@ enum bpf_arg_type {
ARG_PTR_TO_FUNC, /* pointer to a bpf program function */
ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */
ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
+ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */
__BPF_ARG_TYPE_MAX,
};
@@ -554,6 +579,11 @@ struct btf_func_model {
*/
#define BPF_TRAMP_F_SKIP_FRAME BIT(2)
+/* Store IP address of the caller on the trampoline stack,
+ * so it's available for trampoline's programs.
+ */
+#define BPF_TRAMP_F_IP_ARG BIT(3)
+
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
*/
@@ -1073,7 +1103,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
/* an array of programs to be executed under rcu_lock.
*
* Typical usage:
- * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN);
+ * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, bpf_prog_run);
*
* the structure returned by bpf_prog_array_alloc() should be populated
* with program pointers and the last pointer must be NULL.
@@ -1084,7 +1114,10 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
*/
struct bpf_prog_array_item {
struct bpf_prog *prog;
- struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+ union {
+ struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+ u64 bpf_cookie;
+ };
};
struct bpf_prog_array {
@@ -1110,73 +1143,133 @@ int bpf_prog_array_copy_info(struct bpf_prog_array *array,
int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog,
+ u64 bpf_cookie,
struct bpf_prog_array **new_array);
+struct bpf_run_ctx {};
+
+struct bpf_cg_run_ctx {
+ struct bpf_run_ctx run_ctx;
+ const struct bpf_prog_array_item *prog_item;
+};
+
+struct bpf_trace_run_ctx {
+ struct bpf_run_ctx run_ctx;
+ u64 bpf_cookie;
+};
+
+static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
+{
+ struct bpf_run_ctx *old_ctx = NULL;
+
+#ifdef CONFIG_BPF_SYSCALL
+ old_ctx = current->bpf_ctx;
+ current->bpf_ctx = new_ctx;
+#endif
+ return old_ctx;
+}
+
+static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
+{
+#ifdef CONFIG_BPF_SYSCALL
+ current->bpf_ctx = old_ctx;
+#endif
+}
+
/* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
#define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0)
/* BPF program asks to set CN on the packet. */
#define BPF_RET_SET_CN (1 << 0)
-/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
- * if bpf_cgroup_storage_set() failed, the rest of programs
- * will not execute. This should be a really rare scenario
- * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
- * preemptions all between bpf_cgroup_storage_set() and
- * bpf_cgroup_storage_unset() on the same cpu.
- */
-#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \
- ({ \
- struct bpf_prog_array_item *_item; \
- struct bpf_prog *_prog; \
- struct bpf_prog_array *_array; \
- u32 _ret = 1; \
- u32 func_ret; \
- migrate_disable(); \
- rcu_read_lock(); \
- _array = rcu_dereference(array); \
- _item = &_array->items[0]; \
- while ((_prog = READ_ONCE(_item->prog))) { \
- if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \
- break; \
- func_ret = func(_prog, ctx); \
- _ret &= (func_ret & 1); \
- *(ret_flags) |= (func_ret >> 1); \
- bpf_cgroup_storage_unset(); \
- _item++; \
- } \
- rcu_read_unlock(); \
- migrate_enable(); \
- _ret; \
- })
-
-#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage) \
- ({ \
- struct bpf_prog_array_item *_item; \
- struct bpf_prog *_prog; \
- struct bpf_prog_array *_array; \
- u32 _ret = 1; \
- migrate_disable(); \
- rcu_read_lock(); \
- _array = rcu_dereference(array); \
- if (unlikely(check_non_null && !_array))\
- goto _out; \
- _item = &_array->items[0]; \
- while ((_prog = READ_ONCE(_item->prog))) { \
- if (!set_cg_storage) { \
- _ret &= func(_prog, ctx); \
- } else { \
- if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \
- break; \
- _ret &= func(_prog, ctx); \
- bpf_cgroup_storage_unset(); \
- } \
- _item++; \
- } \
-_out: \
- rcu_read_unlock(); \
- migrate_enable(); \
- _ret; \
- })
+typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
+
+static __always_inline u32
+BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
+ const void *ctx, bpf_prog_run_fn run_prog,
+ u32 *ret_flags)
+{
+ const struct bpf_prog_array_item *item;
+ const struct bpf_prog *prog;
+ const struct bpf_prog_array *array;
+ struct bpf_run_ctx *old_run_ctx;
+ struct bpf_cg_run_ctx run_ctx;
+ u32 ret = 1;
+ u32 func_ret;
+
+ migrate_disable();
+ rcu_read_lock();
+ array = rcu_dereference(array_rcu);
+ item = &array->items[0];
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ while ((prog = READ_ONCE(item->prog))) {
+ run_ctx.prog_item = item;
+ func_ret = run_prog(prog, ctx);
+ ret &= (func_ret & 1);
+ *(ret_flags) |= (func_ret >> 1);
+ item++;
+ }
+ bpf_reset_run_ctx(old_run_ctx);
+ rcu_read_unlock();
+ migrate_enable();
+ return ret;
+}
+
+static __always_inline u32
+BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
+ const void *ctx, bpf_prog_run_fn run_prog)
+{
+ const struct bpf_prog_array_item *item;
+ const struct bpf_prog *prog;
+ const struct bpf_prog_array *array;
+ struct bpf_run_ctx *old_run_ctx;
+ struct bpf_cg_run_ctx run_ctx;
+ u32 ret = 1;
+
+ migrate_disable();
+ rcu_read_lock();
+ array = rcu_dereference(array_rcu);
+ item = &array->items[0];
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ while ((prog = READ_ONCE(item->prog))) {
+ run_ctx.prog_item = item;
+ ret &= run_prog(prog, ctx);
+ item++;
+ }
+ bpf_reset_run_ctx(old_run_ctx);
+ rcu_read_unlock();
+ migrate_enable();
+ return ret;
+}
+
+static __always_inline u32
+BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
+ const void *ctx, bpf_prog_run_fn run_prog)
+{
+ const struct bpf_prog_array_item *item;
+ const struct bpf_prog *prog;
+ const struct bpf_prog_array *array;
+ struct bpf_run_ctx *old_run_ctx;
+ struct bpf_trace_run_ctx run_ctx;
+ u32 ret = 1;
+
+ migrate_disable();
+ rcu_read_lock();
+ array = rcu_dereference(array_rcu);
+ if (unlikely(!array))
+ goto out;
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ item = &array->items[0];
+ while ((prog = READ_ONCE(item->prog))) {
+ run_ctx.bpf_cookie = item->bpf_cookie;
+ ret &= run_prog(prog, ctx);
+ item++;
+ }
+ bpf_reset_run_ctx(old_run_ctx);
+out:
+ rcu_read_unlock();
+ migrate_enable();
+ return ret;
+}
/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
* so BPF programs can request cwr for TCP packets.
@@ -1205,7 +1298,7 @@ _out: \
u32 _flags = 0; \
bool _cn; \
u32 _ret; \
- _ret = BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, &_flags); \
+ _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
_cn = _flags & BPF_RET_SET_CN; \
if (_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
@@ -1214,12 +1307,6 @@ _out: \
_ret; \
})
-#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
- __BPF_PROG_RUN_ARRAY(array, ctx, func, false, true)
-
-#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \
- __BPF_PROG_RUN_ARRAY(array, ctx, func, true, false)
-
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
extern struct mutex bpf_stats_enabled_mutex;
@@ -1398,6 +1485,9 @@ typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux,
struct seq_file *seq);
typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux,
struct bpf_link_info *info);
+typedef const struct bpf_func_proto *
+(*bpf_iter_get_func_proto_t)(enum bpf_func_id func_id,
+ const struct bpf_prog *prog);
enum bpf_iter_feature {
BPF_ITER_RESCHED = BIT(0),
@@ -1410,6 +1500,7 @@ struct bpf_iter_reg {
bpf_iter_detach_target_t detach_target;
bpf_iter_show_fdinfo_t show_fdinfo;
bpf_iter_fill_link_info_t fill_link_info;
+ bpf_iter_get_func_proto_t get_func_proto;
u32 ctx_arg_info_size;
u32 feature;
struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
@@ -1432,6 +1523,8 @@ struct bpf_iter__bpf_map_elem {
int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
bool bpf_iter_prog_supported(struct bpf_prog *prog);
+const struct bpf_func_proto *
+bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog);
int bpf_iter_new_fd(struct bpf_link *link);
bool bpf_link_is_iter(struct bpf_link *link);
@@ -1509,12 +1602,12 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
struct bpf_prog *xdp_prog, struct bpf_map *map,
bool exclude_ingress);
-bool dev_map_can_have_prog(struct bpf_map *map);
void __cpu_map_flush(void);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx);
-bool cpu_map_prog_allowed(struct bpf_map *map);
+int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
+ struct sk_buff *skb);
/* Return map's numa specified by userspace */
static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
@@ -1711,6 +1804,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
return 0;
}
+static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
+ struct sk_buff *skb)
+{
+ return -EOPNOTSUPP;
+}
+
static inline bool cpu_map_prog_allowed(struct bpf_map *map)
{
return false;
@@ -1852,6 +1951,12 @@ void bpf_map_offload_map_free(struct bpf_map *map);
int bpf_prog_test_run_syscall(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
+
+int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
+int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
+int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
+void sock_map_unhash(struct sock *sk);
+void sock_map_close(struct sock *sk, long timeout);
#else
static inline int bpf_prog_offload_init(struct bpf_prog *prog,
union bpf_attr *attr)
@@ -1884,24 +1989,6 @@ static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog,
{
return -ENOTSUPP;
}
-#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
-
-#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
-int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
-int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
-int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
-void sock_map_unhash(struct sock *sk);
-void sock_map_close(struct sock *sk, long timeout);
-
-void bpf_sk_reuseport_detach(struct sock *sk);
-int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
- void *value);
-int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
- void *value, u64 map_flags);
-#else
-static inline void bpf_sk_reuseport_detach(struct sock *sk)
-{
-}
#ifdef CONFIG_BPF_SYSCALL
static inline int sock_map_get_from_fd(const union bpf_attr *attr,
@@ -1921,7 +2008,21 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
{
return -EOPNOTSUPP;
}
+#endif /* CONFIG_BPF_SYSCALL */
+#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
+#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
+void bpf_sk_reuseport_detach(struct sock *sk);
+int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
+ void *value);
+int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags);
+#else
+static inline void bpf_sk_reuseport_detach(struct sock *sk)
+{
+}
+
+#ifdef CONFIG_BPF_SYSCALL
static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
void *key, void *value)
{
@@ -1998,9 +2099,8 @@ extern const struct bpf_func_proto bpf_task_storage_get_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
-
-const struct bpf_func_proto *bpf_tracing_func_proto(
- enum bpf_func_id func_id, const struct bpf_prog *prog);
+extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
+extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
const struct bpf_func_proto *tracing_prog_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index ae3ac3a2018c..9c81724e4b98 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -136,3 +136,6 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns)
BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp)
#endif
+#ifdef CONFIG_PERF_EVENTS
+BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf)
+#endif
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 828d08afeee0..5424124dbe36 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -53,7 +53,14 @@ struct bpf_reg_state {
/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
* PTR_TO_MAP_VALUE_OR_NULL
*/
- struct bpf_map *map_ptr;
+ struct {
+ struct bpf_map *map_ptr;
+ /* To distinguish map lookups from outer map
+ * the map_uid is non-zero for registers
+ * pointing to inner maps.
+ */
+ u32 map_uid;
+ };
/* for PTR_TO_BTF_ID */
struct {
@@ -201,12 +208,19 @@ struct bpf_func_state {
* zero == main subprog
*/
u32 subprogno;
+ /* Every bpf_timer_start will increment async_entry_cnt.
+ * It's used to distinguish:
+ * void foo(void) { for(;;); }
+ * void foo(void) { bpf_timer_set_callback(,foo); }
+ */
+ u32 async_entry_cnt;
+ bool in_callback_fn;
+ bool in_async_callback_fn;
/* The following fields should be last. See copy_func_state() */
int acquired_refs;
struct bpf_reference_state *refs;
int allocated_stack;
- bool in_callback_fn;
struct bpf_stack_state *stack;
};
@@ -392,6 +406,7 @@ struct bpf_subprog_info {
bool has_tail_call;
bool tail_call_reachable;
bool has_ld_abs;
+ bool is_async_cb;
};
/* single container for all structs
diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h
index 5cdeab497cb3..546e27fc6d46 100644
--- a/include/linux/bpfptr.h
+++ b/include/linux/bpfptr.h
@@ -62,9 +62,17 @@ static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset,
return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size);
}
-static inline void *memdup_bpfptr(bpfptr_t src, size_t len)
+static inline void *kvmemdup_bpfptr(bpfptr_t src, size_t len)
{
- return memdup_sockptr((sockptr_t) src, len);
+ void *p = kvmalloc(len, GFP_USER | __GFP_NOWARN);
+
+ if (!p)
+ return ERR_PTR(-ENOMEM);
+ if (copy_from_bpfptr(p, src, len)) {
+ kvfree(p);
+ return ERR_PTR(-EFAULT);
+ }
+ return p;
}
static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count)
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 94a0c976c90f..214fde93214b 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -99,6 +99,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
const struct btf_member *m,
u32 expected_offset, u32 expected_size);
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
+int btf_find_timer(const struct btf *btf, const struct btf_type *t);
bool btf_type_is_void(const struct btf_type *t);
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 57890b357f85..47d9abfbdb55 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -82,6 +82,9 @@ __BTF_ID_LIST(name, globl)
#define BTF_ID_LIST_SINGLE(name, prefix, typename) \
BTF_ID_LIST(name) \
BTF_ID(prefix, typename)
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) \
+ BTF_ID_LIST_GLOBAL(name) \
+ BTF_ID(prefix, typename)
/*
* The BTF_ID_UNUSED macro defines 4 zero bytes.
@@ -148,6 +151,7 @@ extern struct btf_id_set name;
#define BTF_ID_UNUSED
#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1];
#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
#define BTF_SET_END(name)
@@ -172,7 +176,8 @@ extern struct btf_id_set name;
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock)
enum {
#define BTF_SOCK_TYPE(name, str) name,
@@ -184,4 +189,6 @@ MAX_BTF_SOCK_TYPE,
extern u32 btf_sock_ids[];
#endif
+extern u32 btf_task_struct_ids[];
+
#endif
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index ff832e698efb..0e9bdd42dafb 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -4,9 +4,10 @@
*
* Copyright (C) 2001 Ming Lei <ming.lei@canonical.com>
*/
-#ifndef __LINUX_BVEC_ITER_H
-#define __LINUX_BVEC_ITER_H
+#ifndef __LINUX_BVEC_H
+#define __LINUX_BVEC_H
+#include <linux/highmem.h>
#include <linux/bug.h>
#include <linux/errno.h>
#include <linux/limits.h>
@@ -183,4 +184,61 @@ static inline void bvec_advance(const struct bio_vec *bvec,
}
}
-#endif /* __LINUX_BVEC_ITER_H */
+/**
+ * bvec_kmap_local - map a bvec into the kernel virtual address space
+ * @bvec: bvec to map
+ *
+ * Must be called on single-page bvecs only. Call kunmap_local on the returned
+ * address to unmap.
+ */
+static inline void *bvec_kmap_local(struct bio_vec *bvec)
+{
+ return kmap_local_page(bvec->bv_page) + bvec->bv_offset;
+}
+
+/**
+ * memcpy_from_bvec - copy data from a bvec
+ * @bvec: bvec to copy from
+ *
+ * Must be called on single-page bvecs only.
+ */
+static inline void memcpy_from_bvec(char *to, struct bio_vec *bvec)
+{
+ memcpy_from_page(to, bvec->bv_page, bvec->bv_offset, bvec->bv_len);
+}
+
+/**
+ * memcpy_to_bvec - copy data to a bvec
+ * @bvec: bvec to copy to
+ *
+ * Must be called on single-page bvecs only.
+ */
+static inline void memcpy_to_bvec(struct bio_vec *bvec, const char *from)
+{
+ memcpy_to_page(bvec->bv_page, bvec->bv_offset, from, bvec->bv_len);
+}
+
+/**
+ * memzero_bvec - zero all data in a bvec
+ * @bvec: bvec to zero
+ *
+ * Must be called on single-page bvecs only.
+ */
+static inline void memzero_bvec(struct bio_vec *bvec)
+{
+ memzero_page(bvec->bv_page, bvec->bv_offset, bvec->bv_len);
+}
+
+/**
+ * bvec_virt - return the virtual address for a bvec
+ * @bvec: bvec to return the virtual address for
+ *
+ * Note: the caller must ensure that @bvec->bv_page is not a highmem page.
+ */
+static inline void *bvec_virt(struct bio_vec *bvec)
+{
+ WARN_ON_ONCE(PageHighMem(bvec->bv_page));
+ return page_address(bvec->bv_page) + bvec->bv_offset;
+}
+
+#endif /* __LINUX_BVEC_H */
diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h
index ae7a3411167c..9de6e9053e34 100644
--- a/include/linux/can/bittiming.h
+++ b/include/linux/can/bittiming.h
@@ -37,7 +37,7 @@
* quanta, from when the bit is sent on the TX pin to when it is
* received on the RX pin of the transmitter. Possible options:
*
- * O: automatic mode. The controller dynamically measure @tdcv
+ * 0: automatic mode. The controller dynamically measures @tdcv
* for each transmitted CAN FD frame.
*
* Other values: manual mode. Use the fixed provided value.
@@ -45,7 +45,7 @@
* @tdco: Transmitter Delay Compensation Offset. Offset value, in time
* quanta, defining the distance between the start of the bit
* reception on the RX pin of the transceiver and the SSP
- * position such as SSP = @tdcv + @tdco.
+ * position such that SSP = @tdcv + @tdco.
*
* If @tdco is zero, then TDC is disabled and both @tdcv and
* @tdcf should be ignored.
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 27b275e463da..2413253e54c7 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -32,6 +32,12 @@ enum can_mode {
CAN_MODE_SLEEP
};
+enum can_termination_gpio {
+ CAN_TERMINATION_GPIO_DISABLED = 0,
+ CAN_TERMINATION_GPIO_ENABLED,
+ CAN_TERMINATION_GPIO_MAX,
+};
+
/*
* CAN common private data
*/
@@ -55,6 +61,8 @@ struct can_priv {
unsigned int termination_const_cnt;
const u16 *termination_const;
u16 termination;
+ struct gpio_desc *termination_gpio;
+ u16 termination_gpio_ohms[CAN_TERMINATION_GPIO_MAX];
enum can_state state;
diff --git a/include/linux/can/platform/flexcan.h b/include/linux/can/platform/flexcan.h
new file mode 100644
index 000000000000..1b536fb999de
--- /dev/null
+++ b/include/linux/can/platform/flexcan.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Angelo Dureghello <angelo@kernel-space.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CAN_PLATFORM_FLEXCAN_H
+#define _CAN_PLATFORM_FLEXCAN_H
+
+struct flexcan_platform_data {
+ u32 clock_frequency;
+ u8 clk_src;
+};
+
+#endif /* _CAN_PLATFORM_FLEXCAN_H */
diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h
index 40882df7105e..c11477620403 100644
--- a/include/linux/can/rx-offload.h
+++ b/include/linux/can/rx-offload.h
@@ -20,6 +20,7 @@ struct can_rx_offload {
bool drop);
struct sk_buff_head skb_queue;
+ struct sk_buff_head skb_irq_queue;
u32 skb_queue_len_max;
unsigned int mb_first;
@@ -48,14 +49,11 @@ unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload,
unsigned int *frame_len_ptr);
int can_rx_offload_queue_tail(struct can_rx_offload *offload,
struct sk_buff *skb);
+void can_rx_offload_irq_finish(struct can_rx_offload *offload);
+void can_rx_offload_threaded_irq_finish(struct can_rx_offload *offload);
void can_rx_offload_del(struct can_rx_offload *offload);
void can_rx_offload_enable(struct can_rx_offload *offload);
-static inline void can_rx_offload_schedule(struct can_rx_offload *offload)
-{
- napi_schedule(&offload->napi);
-}
-
static inline void can_rx_offload_disable(struct can_rx_offload *offload)
{
napi_disable(&offload->napi);
diff --git a/include/linux/cmdline-parser.h b/include/linux/cmdline-parser.h
deleted file mode 100644
index 68a541807bdf..000000000000
--- a/include/linux/cmdline-parser.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Parsing command line, get the partitions information.
- *
- * Written by Cai Zhiyong <caizhiyong@huawei.com>
- *
- */
-#ifndef CMDLINEPARSEH
-#define CMDLINEPARSEH
-
-#include <linux/blkdev.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-
-/* partition flags */
-#define PF_RDONLY 0x01 /* Device is read only */
-#define PF_POWERUP_LOCK 0x02 /* Always locked after reset */
-
-struct cmdline_subpart {
- char name[BDEVNAME_SIZE]; /* partition name, such as 'rootfs' */
- sector_t from;
- sector_t size;
- int flags;
- struct cmdline_subpart *next_subpart;
-};
-
-struct cmdline_parts {
- char name[BDEVNAME_SIZE]; /* block device, such as 'mmcblk0' */
- unsigned int nr_subparts;
- struct cmdline_subpart *subpart;
- struct cmdline_parts *next_parts;
-};
-
-void cmdline_parts_free(struct cmdline_parts **parts);
-
-int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline);
-
-struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
- const char *bdev);
-
-int cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
- int slot,
- int (*add_part)(int, struct cmdline_subpart *, void *),
- void *param);
-
-#endif /* CMDLINEPARSEH */
diff --git a/include/linux/compat.h b/include/linux/compat.h
index c270124e4402..8e0598c7d1d1 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -20,11 +20,8 @@
#include <linux/unistd.h>
#include <asm/compat.h>
-
-#ifdef CONFIG_COMPAT
#include <asm/siginfo.h>
#include <asm/signal.h>
-#endif
#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
/*
@@ -95,8 +92,6 @@ struct compat_iovec {
compat_size_t iov_len;
};
-#ifdef CONFIG_COMPAT
-
#ifndef compat_user_stack_pointer
#define compat_user_stack_pointer() current_user_stack_pointer()
#endif
@@ -131,9 +126,11 @@ struct compat_tms {
#define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW)
+#ifndef compat_sigset_t
typedef struct {
compat_sigset_word sig[_COMPAT_NSIG_WORDS];
} compat_sigset_t;
+#endif
int set_compat_user_sigmask(const compat_sigset_t __user *umask,
size_t sigsetsize);
@@ -384,6 +381,7 @@ struct compat_keyctl_kdf_params {
__u32 __spare[8];
};
+struct compat_stat;
struct compat_statfs;
struct compat_statfs64;
struct compat_old_linux_dirent;
@@ -428,7 +426,7 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set,
unsigned int size)
{
/* size <= sizeof(compat_sigset_t) <= sizeof(sigset_t) */
-#ifdef __BIG_ENDIAN
+#if defined(__BIG_ENDIAN) && defined(CONFIG_64BIT)
compat_sigset_t v;
switch (_NSIG_WORDS) {
case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3];
@@ -929,17 +927,6 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args);
#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
-
-/*
- * For most but not all architectures, "am I in a compat syscall?" and
- * "am I a compat task?" are the same question. For architectures on which
- * they aren't the same question, arch code can override in_compat_syscall.
- */
-
-#ifndef in_compat_syscall
-static inline bool in_compat_syscall(void) { return is_compat_task(); }
-#endif
-
/**
* ns_to_old_timeval32 - Compat version of ns_to_timeval
* @nsec: the nanoseconds value to be converted
@@ -969,6 +956,17 @@ int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz,
int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz,
struct compat_statfs64 __user * buf);
+#ifdef CONFIG_COMPAT
+
+/*
+ * For most but not all architectures, "am I in a compat syscall?" and
+ * "am I a compat task?" are the same question. For architectures on which
+ * they aren't the same question, arch code can override in_compat_syscall.
+ */
+#ifndef in_compat_syscall
+static inline bool in_compat_syscall(void) { return is_compat_task(); }
+#endif
+
#else /* !CONFIG_COMPAT */
#define is_compat_task() (0)
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 85008a65e21f..93a2922b7653 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -220,6 +220,10 @@ struct coresight_sysfs_link {
* @nr_links: number of sysfs links created to other components from this
* device. These will appear in the "connections" group.
* @has_conns_grp: Have added a "connections" group for sysfs links.
+ * @feature_csdev_list: List of complex feature programming added to the device.
+ * @config_csdev_list: List of system configurations added to the device.
+ * @cscfg_csdev_lock: Protect the lists of configurations and features.
+ * @active_cscfg_ctxt: Context information for current active system configuration.
*/
struct coresight_device {
struct coresight_platform_data *pdata;
@@ -241,6 +245,11 @@ struct coresight_device {
int nr_links;
bool has_conns_grp;
bool ect_enabled; /* true only if associated ect device is enabled */
+ /* system configuration and feature lists */
+ struct list_head feature_csdev_list;
+ struct list_head config_csdev_list;
+ spinlock_t cscfg_csdev_lock;
+ void *active_cscfg_ctxt;
};
/*
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index f39b34b13871..95f88edc8f09 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -46,6 +46,7 @@ enum cpuhp_state {
CPUHP_ARM_OMAP_WAKE_DEAD,
CPUHP_IRQ_POLL_DEAD,
CPUHP_BLOCK_SOFTIRQ_DEAD,
+ CPUHP_BIO_DEAD,
CPUHP_ACPI_CPUDRV_DEAD,
CPUHP_S390_PFAULT_DEAD,
CPUHP_BLK_MQ_DEAD,
@@ -399,7 +400,7 @@ static inline int cpuhp_state_remove_instance(enum cpuhp_state state,
/**
* cpuhp_state_remove_instance_nocalls - Remove hotplug instance from state
- * without invoking the reatdown callback
+ * without invoking the teardown callback
* @state: The state from which the instance is removed
* @node: The node for this individual state.
*
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 04c20de66afc..d2b9c41c8edf 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -15,6 +15,7 @@
#include <linux/cpumask.h>
#include <linux/nodemask.h>
#include <linux/mm.h>
+#include <linux/mmu_context.h>
#include <linux/jump_label.h>
#ifdef CONFIG_CPUSETS
@@ -58,7 +59,7 @@ extern void cpuset_wait_for_hotplug(void);
extern void cpuset_read_lock(void);
extern void cpuset_read_unlock(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
-extern void cpuset_cpus_allowed_fallback(struct task_struct *p);
+extern bool cpuset_cpus_allowed_fallback(struct task_struct *p);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
#define cpuset_current_mems_allowed (current->mems_allowed)
void cpuset_init_current_mems_allowed(void);
@@ -184,11 +185,12 @@ static inline void cpuset_read_unlock(void) { }
static inline void cpuset_cpus_allowed(struct task_struct *p,
struct cpumask *mask)
{
- cpumask_copy(mask, cpu_possible_mask);
+ cpumask_copy(mask, task_cpu_possible_mask(p));
}
-static inline void cpuset_cpus_allowed_fallback(struct task_struct *p)
+static inline bool cpuset_cpus_allowed_fallback(struct task_struct *p)
{
+ return false;
}
static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index edb5c186b0b7..3f49e65169c6 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -3,8 +3,7 @@
#define __LINUX_DEBUG_LOCKING_H
#include <linux/atomic.h>
-#include <linux/bug.h>
-#include <linux/printk.h>
+#include <linux/cache.h>
struct task_struct;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 7457d49acf9a..114553b487ef 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -31,7 +31,7 @@ enum dm_queue_mode {
DM_TYPE_DAX_BIO_BASED = 3,
};
-typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
+typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE, STATUSTYPE_IMA } status_type_t;
union map_info {
void *ptr;
@@ -151,7 +151,6 @@ typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i);
typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages);
-#define PAGE_SECTORS (PAGE_SIZE / 512)
void dm_error(const char *message);
@@ -603,6 +602,10 @@ void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm);
#define DMEMIT(x...) sz += ((sz >= maxlen) ? \
0 : scnprintf(result + sz, maxlen - sz, x))
+#define DMEMIT_TARGET_NAME_VERSION(y) \
+ DMEMIT("target_name=%s,target_version=%u.%u.%u", \
+ (y)->name, (y)->version[0], (y)->version[1], (y)->version[2])
+
/*
* Definitions of return values from target end_io function.
*/
diff --git a/include/linux/device.h b/include/linux/device.h
index 59940f1744c1..65d84b67b024 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -407,6 +407,7 @@ struct dev_links_info {
* @em_pd: device's energy model performance domain
* @pins: For device pin management.
* See Documentation/driver-api/pin-control.rst for details.
+ * @msi_lock: Lock to protect MSI mask cache and mask register
* @msi_list: Hosts MSI descriptors
* @msi_domain: The generic MSI domain this device is using.
* @numa_node: NUMA node this device is close to.
@@ -506,6 +507,7 @@ struct device {
struct dev_pin_info *pins;
#endif
#ifdef CONFIG_GENERIC_MSI_IRQ
+ raw_spinlock_t msi_lock;
struct list_head msi_list;
#endif
#ifdef CONFIG_DMA_OPS
diff --git a/include/linux/dfl.h b/include/linux/dfl.h
index 6cc10982351a..431636a0dc78 100644
--- a/include/linux/dfl.h
+++ b/include/linux/dfl.h
@@ -38,6 +38,7 @@ struct dfl_device {
int id;
u16 type;
u16 feature_id;
+ u8 revision;
struct resource mmio_res;
int *irqs;
unsigned int num_irqs;
diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index 1587961f1a7b..c7fa4a3498fe 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -11,60 +11,48 @@
struct dsa_switch;
struct sk_buff;
struct net_device;
-struct packet_type;
-struct dsa_8021q_context;
-struct dsa_8021q_crosschip_link {
+struct dsa_tag_8021q_vlan {
struct list_head list;
int port;
- struct dsa_8021q_context *other_ctx;
- int other_port;
+ u16 vid;
refcount_t refcount;
};
-struct dsa_8021q_ops {
- int (*vlan_add)(struct dsa_switch *ds, int port, u16 vid, u16 flags);
- int (*vlan_del)(struct dsa_switch *ds, int port, u16 vid);
-};
-
struct dsa_8021q_context {
- const struct dsa_8021q_ops *ops;
struct dsa_switch *ds;
- struct list_head crosschip_links;
+ struct list_head vlans;
/* EtherType of RX VID, used for filtering on master interface */
__be16 proto;
};
-#define DSA_8021Q_N_SUBVLAN 8
-
-int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled);
+int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto);
-int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port,
- struct dsa_8021q_context *other_ctx,
- int other_port);
-
-int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port,
- struct dsa_8021q_context *other_ctx,
- int other_port);
+void dsa_tag_8021q_unregister(struct dsa_switch *ds);
struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
u16 tpid, u16 tci);
-void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
- int *subvlan);
+void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id);
+
+int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port,
+ struct net_device *br,
+ int bridge_num);
+
+void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port,
+ struct net_device *br,
+ int bridge_num);
+
+u16 dsa_8021q_bridge_tx_fwd_offload_vid(int bridge_num);
u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port);
u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port);
-u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan);
-
int dsa_8021q_rx_switch_id(u16 vid);
int dsa_8021q_rx_source_port(u16 vid);
-u16 dsa_8021q_rx_subvlan(u16 vid);
-
bool vid_is_dsa_8021q_rxvlan(u16 vid);
bool vid_is_dsa_8021q_txvlan(u16 vid);
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index b6089b88314c..171106202fe5 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -16,6 +16,8 @@
#define ETH_P_SJA1105_META 0x0008
#define ETH_P_SJA1110 0xdadc
+#define SJA1105_DEFAULT_VLAN (VLAN_N_VID - 1)
+
/* IEEE 802.3 Annex 57A: Slow Protocols PDUs (01:80:C2:xx:xx:xx) */
#define SJA1105_LINKLOCAL_FILTER_A 0x0180C2000000ull
#define SJA1105_LINKLOCAL_FILTER_A_MASK 0xFFFFFF000000ull
@@ -59,14 +61,12 @@ struct sja1105_skb_cb {
((struct sja1105_skb_cb *)((skb)->cb))
struct sja1105_port {
- u16 subvlan_map[DSA_8021Q_N_SUBVLAN];
struct kthread_worker *xmit_worker;
struct kthread_work xmit_work;
struct sk_buff_head xmit_queue;
struct sja1105_tagger_data *data;
struct dsa_port *dp;
bool hwts_tx_en;
- u16 xmit_tpid;
};
enum sja1110_meta_tstamp {
@@ -89,4 +89,22 @@ static inline void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port,
#endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) */
+#if IS_ENABLED(CONFIG_NET_DSA_SJA1105)
+
+extern const struct dsa_switch_ops sja1105_switch_ops;
+
+static inline bool dsa_port_is_sja1105(struct dsa_port *dp)
+{
+ return dp->ds->ops == &sja1105_switch_ops;
+}
+
+#else
+
+static inline bool dsa_port_is_sja1105(struct dsa_port *dp)
+{
+ return false;
+}
+
+#endif
+
#endif /* _NET_DSA_SJA1105_H */
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 76d3562d3006..4207d06996a4 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -184,6 +184,7 @@ static inline char *mc_event_error_type(const unsigned int err_type)
* @MEM_DDR5: Unbuffered DDR5 RAM
* @MEM_NVDIMM: Non-volatile RAM
* @MEM_WIO2: Wide I/O 2.
+ * @MEM_HBM2: High bandwidth Memory Gen 2.
*/
enum mem_type {
MEM_EMPTY = 0,
@@ -212,6 +213,7 @@ enum mem_type {
MEM_DDR5,
MEM_NVDIMM,
MEM_WIO2,
+ MEM_HBM2,
};
#define MEM_FLAG_EMPTY BIT(MEM_EMPTY)
@@ -239,6 +241,7 @@ enum mem_type {
#define MEM_FLAG_DDR5 BIT(MEM_DDR5)
#define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM)
#define MEM_FLAG_WIO2 BIT(MEM_WIO2)
+#define MEM_FLAG_HBM2 BIT(MEM_HBM2)
/**
* enum edac_type - Error Detection and Correction capabilities and mode
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 3f221dbf5f95..1834752c5617 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -53,6 +53,22 @@ struct em_perf_domain {
#ifdef CONFIG_ENERGY_MODEL
#define EM_MAX_POWER 0xFFFF
+/*
+ * Increase resolution of energy estimation calculations for 64-bit
+ * architectures. The extra resolution improves decision made by EAS for the
+ * task placement when two Performance Domains might provide similar energy
+ * estimation values (w/o better resolution the values could be equal).
+ *
+ * We increase resolution only if we have enough bits to allow this increased
+ * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
+ * are pretty high and the returns do not justify the increased costs.
+ */
+#ifdef CONFIG_64BIT
+#define em_scale_power(p) ((p) * 1000)
+#else
+#define em_scale_power(p) (p)
+#endif
+
struct em_data_callback {
/**
* active_power() - Provide power at the next performance state of
diff --git a/include/linux/errno.h b/include/linux/errno.h
index d73f597a2484..8b0c754bab02 100644
--- a/include/linux/errno.h
+++ b/include/linux/errno.h
@@ -31,5 +31,6 @@
#define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */
#define EIOCBQUEUED 529 /* iocb queued, will get completion event */
#define ERECALLCONFLICT 530 /* conflict with recalled state */
+#define ENOGRACE 531 /* NFS file lock reclaim refused */
#endif
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 232daaec56e4..849524b55d89 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -15,10 +15,9 @@
#include <linux/bitmap.h>
#include <linux/compat.h>
+#include <linux/netlink.h>
#include <uapi/linux/ethtool.h>
-#ifdef CONFIG_COMPAT
-
struct compat_ethtool_rx_flow_spec {
u32 flow_type;
union ethtool_flow_union h_u;
@@ -38,8 +37,6 @@ struct compat_ethtool_rxnfc {
u32 rule_locs[];
};
-#endif /* CONFIG_COMPAT */
-
#include <linux/rculist.h>
/**
@@ -176,6 +173,11 @@ extern int
__ethtool_get_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *link_ksettings);
+struct kernel_ethtool_coalesce {
+ u8 use_cqe_mode_tx;
+ u8 use_cqe_mode_rx;
+};
+
/**
* ethtool_intersect_link_masks - Given two link masks, AND them together
* @dst: first mask and where result is stored
@@ -215,7 +217,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
#define ETHTOOL_COALESCE_TX_USECS_HIGH BIT(19)
#define ETHTOOL_COALESCE_TX_MAX_FRAMES_HIGH BIT(20)
#define ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL BIT(21)
-#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(21, 0)
+#define ETHTOOL_COALESCE_USE_CQE_RX BIT(22)
+#define ETHTOOL_COALESCE_USE_CQE_TX BIT(23)
+#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(23, 0)
#define ETHTOOL_COALESCE_USECS \
(ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_TX_USECS)
@@ -241,6 +245,8 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
ETHTOOL_COALESCE_RX_USECS_LOW | ETHTOOL_COALESCE_RX_USECS_HIGH | \
ETHTOOL_COALESCE_PKT_RATE_LOW | ETHTOOL_COALESCE_PKT_RATE_HIGH | \
ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL)
+#define ETHTOOL_COALESCE_USE_CQE \
+ (ETHTOOL_COALESCE_USE_CQE_RX | ETHTOOL_COALESCE_USE_CQE_TX)
#define ETHTOOL_STAT_NOT_SET (~0ULL)
@@ -606,8 +612,14 @@ struct ethtool_ops {
struct ethtool_eeprom *, u8 *);
int (*set_eeprom)(struct net_device *,
struct ethtool_eeprom *, u8 *);
- int (*get_coalesce)(struct net_device *, struct ethtool_coalesce *);
- int (*set_coalesce)(struct net_device *, struct ethtool_coalesce *);
+ int (*get_coalesce)(struct net_device *,
+ struct ethtool_coalesce *,
+ struct kernel_ethtool_coalesce *,
+ struct netlink_ext_ack *);
+ int (*set_coalesce)(struct net_device *,
+ struct ethtool_coalesce *,
+ struct kernel_ethtool_coalesce *,
+ struct netlink_ext_ack *);
void (*get_ringparam)(struct net_device *,
struct ethtool_ringparam *);
int (*set_ringparam)(struct net_device *,
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index fa0a524baed0..305d5f19093b 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -14,6 +14,7 @@
#include <linux/err.h>
#include <linux/percpu-defs.h>
#include <linux/percpu.h>
+#include <linux/sched.h>
/*
* CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
@@ -43,11 +44,9 @@ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *w
__u64 *cnt);
void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
-DECLARE_PER_CPU(int, eventfd_wake_count);
-
-static inline bool eventfd_signal_count(void)
+static inline bool eventfd_signal_allowed(void)
{
- return this_cpu_read(eventfd_wake_count);
+ return !current->in_eventfd_signal;
}
#else /* CONFIG_EVENTFD */
@@ -78,9 +77,9 @@ static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
return -ENOSYS;
}
-static inline bool eventfd_signal_count(void)
+static inline bool eventfd_signal_allowed(void)
{
- return false;
+ return true;
}
static inline void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index fe848901fcc3..3260fe714846 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -221,6 +221,8 @@ struct export_operations {
#define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply
atomic attribute updates
*/
+#define EXPORT_OP_SYNC_LOCKS (0x20) /* Filesystem can't do
+ asychronous blocking locks */
unsigned long flags;
};
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index a16dbeced152..eec3b7c40811 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -27,6 +27,8 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
#define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DFID_NAME)
+#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD)
+
/*
* fanotify_init() flags that require CAP_SYS_ADMIN.
* We do not allow unprivileged groups to request permission events.
@@ -35,6 +37,7 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
*/
#define FANOTIFY_ADMIN_INIT_FLAGS (FANOTIFY_PERM_CLASSES | \
FAN_REPORT_TID | \
+ FAN_REPORT_PIDFD | \
FAN_UNLIMITED_QUEUE | \
FAN_UNLIMITED_MARKS)
diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h
index 4e624c466583..c50882f19235 100644
--- a/include/linux/fiemap.h
+++ b/include/linux/fiemap.h
@@ -18,8 +18,4 @@ int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo,
int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
u64 phys, u64 len, u32 flags);
-int generic_block_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo, u64 start, u64 len,
- get_block_t *get_block);
-
#endif /* _LINUX_FIEMAP_H 1 */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 83b896044e79..7d248941ecea 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -574,7 +574,8 @@ struct bpf_prog {
kprobe_override:1, /* Do we override a kprobe? */
has_callchain_buf:1, /* callchain buffer allocated? */
enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
- call_get_stack:1; /* Do we call bpf_get_stack() or bpf_get_stackid() */
+ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
+ call_get_func_ip:1; /* Do we call get_func_ip() */
enum bpf_prog_type type; /* Type of BPF program */
enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
@@ -599,25 +600,38 @@ struct sk_filter {
DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
-#define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \
- u32 __ret; \
- cant_migrate(); \
- if (static_branch_unlikely(&bpf_stats_enabled_key)) { \
- struct bpf_prog_stats *__stats; \
- u64 __start = sched_clock(); \
- __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \
- __stats = this_cpu_ptr(prog->stats); \
- u64_stats_update_begin(&__stats->syncp); \
- __stats->cnt++; \
- __stats->nsecs += sched_clock() - __start; \
- u64_stats_update_end(&__stats->syncp); \
- } else { \
- __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \
- } \
- __ret; })
-
-#define BPF_PROG_RUN(prog, ctx) \
- __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func)
+typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx,
+ const struct bpf_insn *insnsi,
+ unsigned int (*bpf_func)(const void *,
+ const struct bpf_insn *));
+
+static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
+ const void *ctx,
+ bpf_dispatcher_fn dfunc)
+{
+ u32 ret;
+
+ cant_migrate();
+ if (static_branch_unlikely(&bpf_stats_enabled_key)) {
+ struct bpf_prog_stats *stats;
+ u64 start = sched_clock();
+
+ ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+ stats = this_cpu_ptr(prog->stats);
+ u64_stats_update_begin(&stats->syncp);
+ stats->cnt++;
+ stats->nsecs += sched_clock() - start;
+ u64_stats_update_end(&stats->syncp);
+ } else {
+ ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+ }
+ return ret;
+}
+
+static __always_inline u32 bpf_prog_run(const struct bpf_prog *prog, const void *ctx)
+{
+ return __bpf_prog_run(prog, ctx, bpf_dispatcher_nop_func);
+}
/*
* Use in preemptible and therefore migratable context to make sure that
@@ -636,7 +650,7 @@ static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog,
u32 ret;
migrate_disable();
- ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func);
+ ret = bpf_prog_run(prog, ctx);
migrate_enable();
return ret;
}
@@ -709,7 +723,7 @@ static inline void bpf_restore_data_end(
cb->data_end = saved_data_end;
}
-static inline u8 *bpf_skb_cb(struct sk_buff *skb)
+static inline u8 *bpf_skb_cb(const struct sk_buff *skb)
{
/* eBPF programs may read/write skb->cb[] area to transfer meta
* data between tail calls. Since this also needs to work with
@@ -730,8 +744,9 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb)
/* Must be invoked with migration disabled */
static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
- struct sk_buff *skb)
+ const void *ctx)
{
+ const struct sk_buff *skb = ctx;
u8 *cb_data = bpf_skb_cb(skb);
u8 cb_saved[BPF_SKB_CB_LEN];
u32 res;
@@ -741,7 +756,7 @@ static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
memset(cb_data, 0, sizeof(cb_saved));
}
- res = BPF_PROG_RUN(prog, skb);
+ res = bpf_prog_run(prog, skb);
if (unlikely(prog->cb_access))
memcpy(cb_data, cb_saved, sizeof(cb_saved));
@@ -775,6 +790,10 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
DECLARE_BPF_DISPATCHER(xdp)
+DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
+
+u32 xdp_master_redirect(struct xdp_buff *xdp);
+
static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
struct xdp_buff *xdp)
{
@@ -782,7 +801,14 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
* under local_bh_disable(), which provides the needed RCU protection
* for accessing map entries.
*/
- return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+ u32 act = __bpf_prog_run(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+
+ if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
+ if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev))
+ act = xdp_master_redirect(xdp);
+ }
+
+ return act;
}
void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);
@@ -1428,7 +1454,7 @@ static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
};
u32 act;
- act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN);
+ act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, bpf_prog_run);
if (act == SK_PASS) {
selected_sk = ctx.selected_sk;
no_reuseport = ctx.no_reuseport;
@@ -1466,7 +1492,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
};
u32 act;
- act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN);
+ act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, bpf_prog_run);
if (act == SK_PASS) {
selected_sk = ctx.selected_sk;
no_reuseport = ctx.no_reuseport;
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 9d1a5c175065..56b426fe020c 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -52,6 +52,10 @@
#define ZYNQMP_PM_CAPABILITY_WAKEUP 0x4U
#define ZYNQMP_PM_CAPABILITY_UNUSABLE 0x8U
+/* Loader commands */
+#define PM_LOAD_PDI 0x701
+#define PDI_SRC_DDR 0xF
+
/*
* Firmware FPGA Manager flags
* XILINX_ZYNQMP_PM_FPGA_FULL: FPGA full reconfiguration
@@ -411,6 +415,7 @@ int zynqmp_pm_pinctrl_get_config(const u32 pin, const u32 param,
u32 *value);
int zynqmp_pm_pinctrl_set_config(const u32 pin, const u32 param,
u32 value);
+int zynqmp_pm_load_pdi(const u32 src, const u64 address);
#else
static inline int zynqmp_pm_get_api_version(u32 *version)
{
@@ -622,6 +627,11 @@ static inline int zynqmp_pm_pinctrl_set_config(const u32 pin, const u32 param,
{
return -ENODEV;
}
+
+static inline int zynqmp_pm_load_pdi(const u32 src, const u64 address)
+{
+ return -ENODEV;
+}
#endif
#endif /* __FIRMWARE_ZYNQMP_H__ */
diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index ec2cd8bfceb0..474c1f506307 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -110,7 +110,7 @@ struct fpga_image_info {
* @initial_header_size: Maximum number of bytes that should be passed into write_init
* @state: returns an enum value of the FPGA's state
* @status: returns status of the FPGA, including reconfiguration error code
- * @write_init: prepare the FPGA to receive confuration data
+ * @write_init: prepare the FPGA to receive configuration data
* @write: write count bytes of configuration data to the FPGA
* @write_sg: write the scatter list of configuration data to the FPGA
* @write_complete: set FPGA to operating state after writing is done
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 640574294216..1c01f9f2b574 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -319,6 +319,8 @@ enum rw_hint {
/* iocb->ki_waitq is valid */
#define IOCB_WAITQ (1 << 19)
#define IOCB_NOIO (1 << 20)
+/* can use bio alloc cache */
+#define IOCB_ALLOC_CACHE (1 << 21)
struct kiocb {
struct file *ki_filp;
@@ -436,6 +438,10 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
* struct address_space - Contents of a cacheable, mappable object.
* @host: Owner, either the inode or the block_device.
* @i_pages: Cached pages.
+ * @invalidate_lock: Guards coherency between page cache contents and
+ * file offset->disk block mappings in the filesystem during invalidates.
+ * It is also used to block modification of page cache contents through
+ * memory mappings.
* @gfp_mask: Memory allocation flags to use for allocating pages.
* @i_mmap_writable: Number of VM_SHARED mappings.
* @nr_thps: Number of THPs in the pagecache (non-shmem only).
@@ -453,6 +459,7 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
struct address_space {
struct inode *host;
struct xarray i_pages;
+ struct rw_semaphore invalidate_lock;
gfp_t gfp_mask;
atomic_t i_mmap_writable;
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
@@ -814,9 +821,42 @@ static inline void inode_lock_shared_nested(struct inode *inode, unsigned subcla
down_read_nested(&inode->i_rwsem, subclass);
}
+static inline void filemap_invalidate_lock(struct address_space *mapping)
+{
+ down_write(&mapping->invalidate_lock);
+}
+
+static inline void filemap_invalidate_unlock(struct address_space *mapping)
+{
+ up_write(&mapping->invalidate_lock);
+}
+
+static inline void filemap_invalidate_lock_shared(struct address_space *mapping)
+{
+ down_read(&mapping->invalidate_lock);
+}
+
+static inline int filemap_invalidate_trylock_shared(
+ struct address_space *mapping)
+{
+ return down_read_trylock(&mapping->invalidate_lock);
+}
+
+static inline void filemap_invalidate_unlock_shared(
+ struct address_space *mapping)
+{
+ up_read(&mapping->invalidate_lock);
+}
+
void lock_two_nondirectories(struct inode *, struct inode*);
void unlock_two_nondirectories(struct inode *, struct inode*);
+void filemap_invalidate_lock_two(struct address_space *mapping1,
+ struct address_space *mapping2);
+void filemap_invalidate_unlock_two(struct address_space *mapping1,
+ struct address_space *mapping2);
+
+
/*
* NOTE: in a 32bit arch with a preemptable kernel and
* an UP compile the i_size_read/write must be atomic
@@ -997,6 +1037,7 @@ static inline struct file *get_file(struct file *f)
#define FL_UNLOCK_PENDING 512 /* Lease is being broken */
#define FL_OFDLCK 1024 /* lock is "owned" by struct file */
#define FL_LAYOUT 2048 /* outstanding pNFS layout */
+#define FL_RECLAIM 4096 /* reclaiming from a reboot server */
#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
@@ -1507,8 +1548,11 @@ struct super_block {
/* Number of inodes with nlink == 0 but still referenced */
atomic_long_t s_remove_count;
- /* Pending fsnotify inode refs */
- atomic_long_t s_fsnotify_inode_refs;
+ /*
+ * Number of inode/mount/sb objects that are being watched, note that
+ * inodes objects are currently double-accounted.
+ */
+ atomic_long_t s_fsnotify_connectors;
/* Being remounted read-only */
int s_readonly_remount;
@@ -2457,7 +2501,6 @@ static inline void file_accessed(struct file *file)
extern int file_modified(struct file *file);
-int sync_inode(struct inode *inode, struct writeback_control *wbc);
int sync_inode_metadata(struct inode *inode, int wait);
struct file_system_type {
@@ -2487,6 +2530,7 @@ struct file_system_type {
struct lock_class_key i_lock_key;
struct lock_class_key i_mutex_key;
+ struct lock_class_key invalidate_lock_key;
struct lock_class_key i_mutex_dir_key;
};
@@ -2570,90 +2614,6 @@ extern struct kobject *fs_kobj;
#define MAX_RW_COUNT (INT_MAX & PAGE_MASK)
-#ifdef CONFIG_MANDATORY_FILE_LOCKING
-extern int locks_mandatory_locked(struct file *);
-extern int locks_mandatory_area(struct inode *, struct file *, loff_t, loff_t, unsigned char);
-
-/*
- * Candidates for mandatory locking have the setgid bit set
- * but no group execute bit - an otherwise meaningless combination.
- */
-
-static inline int __mandatory_lock(struct inode *ino)
-{
- return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID;
-}
-
-/*
- * ... and these candidates should be on SB_MANDLOCK mounted fs,
- * otherwise these will be advisory locks
- */
-
-static inline int mandatory_lock(struct inode *ino)
-{
- return IS_MANDLOCK(ino) && __mandatory_lock(ino);
-}
-
-static inline int locks_verify_locked(struct file *file)
-{
- if (mandatory_lock(locks_inode(file)))
- return locks_mandatory_locked(file);
- return 0;
-}
-
-static inline int locks_verify_truncate(struct inode *inode,
- struct file *f,
- loff_t size)
-{
- if (!inode->i_flctx || !mandatory_lock(inode))
- return 0;
-
- if (size < inode->i_size) {
- return locks_mandatory_area(inode, f, size, inode->i_size - 1,
- F_WRLCK);
- } else {
- return locks_mandatory_area(inode, f, inode->i_size, size - 1,
- F_WRLCK);
- }
-}
-
-#else /* !CONFIG_MANDATORY_FILE_LOCKING */
-
-static inline int locks_mandatory_locked(struct file *file)
-{
- return 0;
-}
-
-static inline int locks_mandatory_area(struct inode *inode, struct file *filp,
- loff_t start, loff_t end, unsigned char type)
-{
- return 0;
-}
-
-static inline int __mandatory_lock(struct inode *inode)
-{
- return 0;
-}
-
-static inline int mandatory_lock(struct inode *inode)
-{
- return 0;
-}
-
-static inline int locks_verify_locked(struct file *file)
-{
- return 0;
-}
-
-static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
- size_t size)
-{
- return 0;
-}
-
-#endif /* CONFIG_MANDATORY_FILE_LOCKING */
-
-
#ifdef CONFIG_FILE_LOCKING
static inline int break_lease(struct inode *inode, unsigned int mode)
{
@@ -2786,6 +2746,7 @@ static inline struct file *file_clone_open(struct file *file)
extern int filp_close(struct file *, fl_owner_t id);
extern struct filename *getname_flags(const char __user *, int, int *);
+extern struct filename *getname_uflags(const char __user *, int);
extern struct filename *getname(const char __user *);
extern struct filename *getname_kernel(const char *);
extern void putname(struct filename *name);
@@ -2891,6 +2852,8 @@ extern int filemap_fdatawrite_range(struct address_space *mapping,
loff_t start, loff_t end);
extern int filemap_check_errors(struct address_space *mapping);
extern void __filemap_set_wb_err(struct address_space *mapping, int err);
+int filemap_fdatawrite_wbc(struct address_space *mapping,
+ struct writeback_control *wbc);
static inline int filemap_write_and_wait(struct address_space *mapping)
{
@@ -3246,10 +3209,6 @@ ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
struct iov_iter *iter);
-/* fs/block_dev.c */
-extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
- int datasync);
-
/* fs/splice.c */
extern ssize_t generic_file_splice_read(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 2ea1387bb497..e912ed9141d9 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -47,27 +47,128 @@ struct fscrypt_name {
#define FSCRYPT_SET_CONTEXT_MAX_SIZE 40
#ifdef CONFIG_FS_ENCRYPTION
+
/*
- * fscrypt superblock flags
+ * If set, the fscrypt bounce page pool won't be allocated (unless another
+ * filesystem needs it). Set this if the filesystem always uses its own bounce
+ * pages for writes and therefore won't need the fscrypt bounce page pool.
*/
#define FS_CFLG_OWN_PAGES (1U << 1)
-/*
- * crypto operations for filesystems
- */
+/* Crypto operations for filesystems */
struct fscrypt_operations {
+
+ /* Set of optional flags; see above for allowed flags */
unsigned int flags;
+
+ /*
+ * If set, this is a filesystem-specific key description prefix that
+ * will be accepted for "logon" keys for v1 fscrypt policies, in
+ * addition to the generic prefix "fscrypt:". This functionality is
+ * deprecated, so new filesystems shouldn't set this field.
+ */
const char *key_prefix;
+
+ /*
+ * Get the fscrypt context of the given inode.
+ *
+ * @inode: the inode whose context to get
+ * @ctx: the buffer into which to get the context
+ * @len: length of the @ctx buffer in bytes
+ *
+ * Return: On success, returns the length of the context in bytes; this
+ * may be less than @len. On failure, returns -ENODATA if the
+ * inode doesn't have a context, -ERANGE if the context is
+ * longer than @len, or another -errno code.
+ */
int (*get_context)(struct inode *inode, void *ctx, size_t len);
+
+ /*
+ * Set an fscrypt context on the given inode.
+ *
+ * @inode: the inode whose context to set. The inode won't already have
+ * an fscrypt context.
+ * @ctx: the context to set
+ * @len: length of @ctx in bytes (at most FSCRYPT_SET_CONTEXT_MAX_SIZE)
+ * @fs_data: If called from fscrypt_set_context(), this will be the
+ * value the filesystem passed to fscrypt_set_context().
+ * Otherwise (i.e. when called from
+ * FS_IOC_SET_ENCRYPTION_POLICY) this will be NULL.
+ *
+ * i_rwsem will be held for write.
+ *
+ * Return: 0 on success, -errno on failure.
+ */
int (*set_context)(struct inode *inode, const void *ctx, size_t len,
void *fs_data);
+
+ /*
+ * Get the dummy fscrypt policy in use on the filesystem (if any).
+ *
+ * Filesystems only need to implement this function if they support the
+ * test_dummy_encryption mount option.
+ *
+ * Return: A pointer to the dummy fscrypt policy, if the filesystem is
+ * mounted with test_dummy_encryption; otherwise NULL.
+ */
const union fscrypt_policy *(*get_dummy_policy)(struct super_block *sb);
+
+ /*
+ * Check whether a directory is empty. i_rwsem will be held for write.
+ */
bool (*empty_dir)(struct inode *inode);
+
+ /* The filesystem's maximum ciphertext filename length, in bytes */
unsigned int max_namelen;
+
+ /*
+ * Check whether the filesystem's inode numbers and UUID are stable,
+ * meaning that they will never be changed even by offline operations
+ * such as filesystem shrinking and therefore can be used in the
+ * encryption without the possibility of files becoming unreadable.
+ *
+ * Filesystems only need to implement this function if they want to
+ * support the FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64} flags. These
+ * flags are designed to work around the limitations of UFS and eMMC
+ * inline crypto hardware, and they shouldn't be used in scenarios where
+ * such hardware isn't being used.
+ *
+ * Leaving this NULL is equivalent to always returning false.
+ */
bool (*has_stable_inodes)(struct super_block *sb);
+
+ /*
+ * Get the number of bits that the filesystem uses to represent inode
+ * numbers and file logical block numbers.
+ *
+ * By default, both of these are assumed to be 64-bit. This function
+ * can be implemented to declare that either or both of these numbers is
+ * shorter, which may allow the use of the
+ * FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64} flags and/or the use of
+ * inline crypto hardware whose maximum DUN length is less than 64 bits
+ * (e.g., eMMC v5.2 spec compliant hardware). This function only needs
+ * to be implemented if support for one of these features is needed.
+ */
void (*get_ino_and_lblk_bits)(struct super_block *sb,
int *ino_bits_ret, int *lblk_bits_ret);
+
+ /*
+ * Return the number of block devices to which the filesystem may write
+ * encrypted file contents.
+ *
+ * If the filesystem can use multiple block devices (other than block
+ * devices that aren't used for encrypted file contents, such as
+ * external journal devices), and wants to support inline encryption,
+ * then it must implement this function. Otherwise it's not needed.
+ */
int (*get_num_devices)(struct super_block *sb);
+
+ /*
+ * If ->get_num_devices() returns a value greater than 1, then this
+ * function is called to get the array of request_queues that the
+ * filesystem is using -- one per block device. (There may be duplicate
+ * entries in this array, as block devices can share a request_queue.)
+ */
void (*get_devices)(struct super_block *sb,
struct request_queue **devs);
};
@@ -253,6 +354,7 @@ int __fscrypt_encrypt_symlink(struct inode *inode, const char *target,
const char *fscrypt_get_symlink(struct inode *inode, const void *caddr,
unsigned int max_size,
struct delayed_call *done);
+int fscrypt_symlink_getattr(const struct path *path, struct kstat *stat);
static inline void fscrypt_set_ops(struct super_block *sb,
const struct fscrypt_operations *s_cop)
{
@@ -583,6 +685,12 @@ static inline const char *fscrypt_get_symlink(struct inode *inode,
return ERR_PTR(-EOPNOTSUPP);
}
+static inline int fscrypt_symlink_getattr(const struct path *path,
+ struct kstat *stat)
+{
+ return -EOPNOTSUPP;
+}
+
static inline void fscrypt_set_ops(struct super_block *sb,
const struct fscrypt_operations *s_cop)
{
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index 63b56aba925a..30ece3ae6df7 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -423,7 +423,8 @@ int __must_check fsl_mc_allocate_irqs(struct fsl_mc_device *mc_dev);
void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev);
-struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev);
+struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev,
+ u16 if_id);
extern struct bus_type fsl_mc_bus_type;
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index f8acddcf54fb..12d3a7d308ab 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -30,6 +30,9 @@ static inline void fsnotify_name(struct inode *dir, __u32 mask,
struct inode *child,
const struct qstr *name, u32 cookie)
{
+ if (atomic_long_read(&dir->i_sb->s_fsnotify_connectors) == 0)
+ return;
+
fsnotify(mask, child, FSNOTIFY_EVENT_INODE, dir, name, NULL, cookie);
}
@@ -41,6 +44,9 @@ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry,
static inline void fsnotify_inode(struct inode *inode, __u32 mask)
{
+ if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0)
+ return;
+
if (S_ISDIR(inode->i_mode))
mask |= FS_ISDIR;
@@ -53,6 +59,9 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask,
{
struct inode *inode = d_inode(dentry);
+ if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0)
+ return 0;
+
if (S_ISDIR(inode->i_mode)) {
mask |= FS_ISDIR;
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a69f363b61bf..832e65f06754 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -643,6 +643,22 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; }
extern int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr);
+/**
+ * ftrace_need_init_nop - return whether nop call sites should be initialized
+ *
+ * Normally the compiler's -mnop-mcount generates suitable nops, so we don't
+ * need to call ftrace_init_nop() if the code is built with that flag.
+ * Architectures where this is not always the case may define their own
+ * condition.
+ *
+ * Return must be:
+ * 0 if ftrace_init_nop() should be called
+ * Nonzero if ftrace_init_nop() should not be called
+ */
+
+#ifndef ftrace_need_init_nop
+#define ftrace_need_init_nop() (!__is_defined(CC_USING_NOP_MCOUNT))
+#endif
/**
* ftrace_init_nop - initialize a nop call site
diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
index bc738504ab4a..c285968e437a 100644
--- a/include/linux/genetlink.h
+++ b/include/linux/genetlink.h
@@ -8,34 +8,11 @@
/* All generic netlink requests are serialized by a global lock. */
extern void genl_lock(void);
extern void genl_unlock(void);
-#ifdef CONFIG_LOCKDEP
-extern bool lockdep_genl_is_held(void);
-#endif
/* for synchronisation between af_netlink and genetlink */
extern atomic_t genl_sk_destructing_cnt;
extern wait_queue_head_t genl_sk_destructing_waitq;
-/**
- * rcu_dereference_genl - rcu_dereference with debug checking
- * @p: The pointer to read, prior to dereferencing
- *
- * Do an rcu_dereference(p), but check caller either holds rcu_read_lock()
- * or genl mutex. Note : Please prefer genl_dereference() or rcu_dereference()
- */
-#define rcu_dereference_genl(p) \
- rcu_dereference_check(p, lockdep_genl_is_held())
-
-/**
- * genl_dereference - fetch RCU pointer when updates are prevented by genl mutex
- * @p: The pointer to read, prior to dereferencing
- *
- * Return the value of the specified RCU-protected pointer, but omit
- * the READ_ONCE(), because caller holds genl mutex.
- */
-#define genl_dereference(p) \
- rcu_dereference_protected(p, lockdep_genl_is_held())
-
#define MODULE_ALIAS_GENL_FAMILY(family)\
MODULE_ALIAS_NET_PF_PROTO_NAME(PF_NETLINK, NETLINK_GENERIC, "-family-" family)
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 13b34177cc85..c68d83c87f83 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -60,9 +60,6 @@ struct partition_meta_info {
* device.
* Affects responses to the ``CDROM_GET_CAPABILITY`` ioctl.
*
- * ``GENHD_FL_UP`` (0x0010): indicates that the block device is "up",
- * with a similar meaning to network interfaces.
- *
* ``GENHD_FL_SUPPRESS_PARTITION_INFO`` (0x0020): don't include
* partition information in ``/proc/partitions`` or in the output of
* printk_all_partitions().
@@ -97,7 +94,6 @@ struct partition_meta_info {
/* 2 is unused (used to be GENHD_FL_DRIVERFS) */
/* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */
#define GENHD_FL_CD 0x0008
-#define GENHD_FL_UP 0x0010
#define GENHD_FL_SUPPRESS_PARTITION_INFO 0x0020
#define GENHD_FL_EXT_DEVT 0x0040
#define GENHD_FL_NATIVE_CAPACITY 0x0080
@@ -153,13 +149,15 @@ struct gendisk {
unsigned long state;
#define GD_NEED_PART_SCAN 0
#define GD_READ_ONLY 1
-#define GD_QUEUE_REF 2
struct mutex open_mutex; /* open/close mutex */
unsigned open_partitions; /* number of open partitions */
+ struct backing_dev_info *bdi;
struct kobject *slave_dir;
-
+#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
+ struct list_head slave_bdevs;
+#endif
struct timer_rand_state *random;
atomic_t sync_io; /* RAID */
struct disk_events *ev;
@@ -172,8 +170,14 @@ struct gendisk {
int node_id;
struct badblocks *bb;
struct lockdep_map lockdep_map;
+ u64 diskseq;
};
+static inline bool disk_live(struct gendisk *disk)
+{
+ return !inode_unhashed(disk->part0->bd_inode);
+}
+
/*
* The gendisk is refcounted by the part0 block_device, and the bd_device
* therein is also used for device model presentation in sysfs.
@@ -210,18 +214,12 @@ static inline dev_t disk_devt(struct gendisk *disk)
void disk_uevent(struct gendisk *disk, enum kobject_action action);
/* block/genhd.c */
-extern void device_add_disk(struct device *parent, struct gendisk *disk,
- const struct attribute_group **groups);
-static inline void add_disk(struct gendisk *disk)
+int device_add_disk(struct device *parent, struct gendisk *disk,
+ const struct attribute_group **groups);
+static inline int add_disk(struct gendisk *disk)
{
- device_add_disk(NULL, disk, NULL);
+ return device_add_disk(NULL, disk, NULL);
}
-extern void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk);
-static inline void add_disk_no_queue_reg(struct gendisk *disk)
-{
- device_add_disk_no_queue_reg(NULL, disk);
-}
-
extern void del_gendisk(struct gendisk *gp);
void set_disk_ro(struct gendisk *disk, bool read_only);
@@ -236,6 +234,7 @@ extern void disk_block_events(struct gendisk *disk);
extern void disk_unblock_events(struct gendisk *disk);
extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
+bool disk_force_media_change(struct gendisk *disk, unsigned int events);
/* drivers/char/random.c */
extern void add_disk_randomness(struct gendisk *disk) __latent_entropy;
@@ -259,26 +258,10 @@ static inline sector_t get_capacity(struct gendisk *disk)
int bdev_disk_changed(struct gendisk *disk, bool invalidate);
void blk_drop_partitions(struct gendisk *disk);
-extern struct gendisk *__alloc_disk_node(int minors, int node_id);
+struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
+ struct lock_class_key *lkclass);
extern void put_disk(struct gendisk *disk);
-
-#define alloc_disk_node(minors, node_id) \
-({ \
- static struct lock_class_key __key; \
- const char *__name; \
- struct gendisk *__disk; \
- \
- __name = "(gendisk_completion)"#minors"("#node_id")"; \
- \
- __disk = __alloc_disk_node(minors, node_id); \
- \
- if (__disk) \
- lockdep_init_map(&__disk->lockdep_map, __name, &__key, 0); \
- \
- __disk; \
-})
-
-#define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE)
+struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass);
/**
* blk_alloc_disk - allocate a gendisk structure
@@ -291,15 +274,10 @@ extern void put_disk(struct gendisk *disk);
*/
#define blk_alloc_disk(node_id) \
({ \
- struct gendisk *__disk = __blk_alloc_disk(node_id); \
static struct lock_class_key __key; \
\
- if (__disk) \
- lockdep_init_map(&__disk->lockdep_map, \
- "(bio completion)", &__key, 0); \
- __disk; \
+ __blk_alloc_disk(node_id, &__key); \
})
-struct gendisk *__blk_alloc_disk(int node);
void blk_cleanup_disk(struct gendisk *disk);
int __register_blkdev(unsigned int major, const char *name,
@@ -316,9 +294,10 @@ void set_capacity(struct gendisk *disk, sector_t size);
int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
-#ifdef CONFIG_SYSFS
+#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
+int bd_register_pending_holders(struct gendisk *disk);
#else
static inline int bd_link_disk_holder(struct block_device *bdev,
struct gendisk *disk)
@@ -329,9 +308,14 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
struct gendisk *disk)
{
}
-#endif /* CONFIG_SYSFS */
+static inline int bd_register_pending_holders(struct gendisk *disk)
+{
+ return 0;
+}
+#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */
dev_t part_devt(struct gendisk *disk, u8 partno);
+void inc_diskseq(struct gendisk *disk);
dev_t blk_lookup_devt(const char *name, int partno);
void blk_request_module(dev_t devt);
#ifdef CONFIG_BLOCK
diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h
index cacc4dd27794..630a388035f1 100644
--- a/include/linux/hdlc.h
+++ b/include/linux/hdlc.h
@@ -22,7 +22,7 @@ struct hdlc_proto {
void (*start)(struct net_device *dev); /* if open & DCD */
void (*stop)(struct net_device *dev); /* if open & !DCD */
void (*detach)(struct net_device *dev);
- int (*ioctl)(struct net_device *dev, struct ifreq *ifr);
+ int (*ioctl)(struct net_device *dev, struct if_settings *ifs);
__be16 (*type_trans)(struct sk_buff *skb, struct net_device *dev);
int (*netif_rx)(struct sk_buff *skb);
netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev);
@@ -54,7 +54,7 @@ typedef struct hdlc_device {
/* Exported from hdlc module */
/* Called by hardware driver when a user requests HDLC service */
-int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+int hdlc_ioctl(struct net_device *dev, struct if_settings *ifs);
/* Must be used by hardware driver on module startup/exit */
#define register_hdlc_device(dev) register_netdev(dev)
diff --git a/include/linux/hdlcdrv.h b/include/linux/hdlcdrv.h
index d4d633a49d36..5d70c3f98f5b 100644
--- a/include/linux/hdlcdrv.h
+++ b/include/linux/hdlcdrv.h
@@ -79,7 +79,7 @@ struct hdlcdrv_ops {
*/
int (*open)(struct net_device *);
int (*close)(struct net_device *);
- int (*ioctl)(struct net_device *, struct ifreq *,
+ int (*ioctl)(struct net_device *, void __user *,
struct hdlcdrv_ioctl *, int);
};
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index bb5e7b0a4274..0ee140176f10 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -318,16 +318,12 @@ struct clock_event_device;
extern void hrtimer_interrupt(struct clock_event_device *dev);
-extern void clock_was_set_delayed(void);
-
extern unsigned int hrtimer_resolution;
#else
#define hrtimer_resolution (unsigned int)LOW_RES_NSEC
-static inline void clock_was_set_delayed(void) { }
-
#endif
static inline ktime_t
@@ -351,13 +347,13 @@ hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
timer->base->get_time());
}
-extern void clock_was_set(void);
#ifdef CONFIG_TIMERFD
extern void timerfd_clock_was_set(void);
+extern void timerfd_resume(void);
#else
static inline void timerfd_clock_was_set(void) { }
+static inline void timerfd_resume(void) { }
#endif
-extern void hrtimers_resume(void);
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index a6730072d13a..694264503119 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1088,6 +1088,48 @@ struct ieee80211_ext {
} u;
} __packed __aligned(2);
+#define IEEE80211_TWT_CONTROL_NDP BIT(0)
+#define IEEE80211_TWT_CONTROL_RESP_MODE BIT(1)
+#define IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST BIT(3)
+#define IEEE80211_TWT_CONTROL_RX_DISABLED BIT(4)
+#define IEEE80211_TWT_CONTROL_WAKE_DUR_UNIT BIT(5)
+
+#define IEEE80211_TWT_REQTYPE_REQUEST BIT(0)
+#define IEEE80211_TWT_REQTYPE_SETUP_CMD GENMASK(3, 1)
+#define IEEE80211_TWT_REQTYPE_TRIGGER BIT(4)
+#define IEEE80211_TWT_REQTYPE_IMPLICIT BIT(5)
+#define IEEE80211_TWT_REQTYPE_FLOWTYPE BIT(6)
+#define IEEE80211_TWT_REQTYPE_FLOWID GENMASK(9, 7)
+#define IEEE80211_TWT_REQTYPE_WAKE_INT_EXP GENMASK(14, 10)
+#define IEEE80211_TWT_REQTYPE_PROTECTION BIT(15)
+
+enum ieee80211_twt_setup_cmd {
+ TWT_SETUP_CMD_REQUEST,
+ TWT_SETUP_CMD_SUGGEST,
+ TWT_SETUP_CMD_DEMAND,
+ TWT_SETUP_CMD_GROUPING,
+ TWT_SETUP_CMD_ACCEPT,
+ TWT_SETUP_CMD_ALTERNATE,
+ TWT_SETUP_CMD_DICTATE,
+ TWT_SETUP_CMD_REJECT,
+};
+
+struct ieee80211_twt_params {
+ __le16 req_type;
+ __le64 twt;
+ u8 min_twt_dur;
+ __le16 mantissa;
+ u8 channel;
+} __packed;
+
+struct ieee80211_twt_setup {
+ u8 dialog_token;
+ u8 element_id;
+ u8 length;
+ u8 control;
+ u8 params[];
+} __packed;
+
struct ieee80211_mgmt {
__le16 frame_control;
__le16 duration;
@@ -1252,6 +1294,10 @@ struct ieee80211_mgmt {
__le16 toa_error;
u8 variable[0];
} __packed ftm;
+ struct {
+ u8 action_code;
+ u8 variable[];
+ } __packed s1g;
} u;
} __packed action;
} u;
@@ -2266,6 +2312,9 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info)
#define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR 0x40000000
#define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED 0x80000000
+#define IEEE80211_6GHZ_CTRL_REG_LPI_AP 0
+#define IEEE80211_6GHZ_CTRL_REG_SP_AP 1
+
/**
* ieee80211_he_6ghz_oper - HE 6 GHz operation Information field
* @primary: primary channel
@@ -2282,6 +2331,7 @@ struct ieee80211_he_6ghz_oper {
#define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ 2
#define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ 3
#define IEEE80211_HE_6GHZ_OPER_CTRL_DUP_BEACON 0x4
+#define IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO 0x38
u8 control;
u8 ccfs0;
u8 ccfs1;
@@ -2289,6 +2339,44 @@ struct ieee80211_he_6ghz_oper {
} __packed;
/*
+ * In "9.4.2.161 Transmit Power Envelope element" of "IEEE Std 802.11ax-2021",
+ * it show four types in "Table 9-275a-Maximum Transmit Power Interpretation
+ * subfield encoding", and two category for each type in "Table E-12-Regulatory
+ * Info subfield encoding in the United States".
+ * So it it totally max 8 Transmit Power Envelope element.
+ */
+#define IEEE80211_TPE_MAX_IE_COUNT 8
+/*
+ * In "Table 9-277—Meaning of Maximum Transmit Power Count subfield"
+ * of "IEEE Std 802.11ax™‐2021", the max power level is 8.
+ */
+#define IEEE80211_MAX_NUM_PWR_LEVEL 8
+
+#define IEEE80211_TPE_MAX_POWER_COUNT 8
+
+/* transmit power interpretation type of transmit power envelope element */
+enum ieee80211_tx_power_intrpt_type {
+ IEEE80211_TPE_LOCAL_EIRP,
+ IEEE80211_TPE_LOCAL_EIRP_PSD,
+ IEEE80211_TPE_REG_CLIENT_EIRP,
+ IEEE80211_TPE_REG_CLIENT_EIRP_PSD,
+};
+
+/**
+ * struct ieee80211_tx_pwr_env
+ *
+ * This structure represents the "Transmit Power Envelope element"
+ */
+struct ieee80211_tx_pwr_env {
+ u8 tx_power_info;
+ s8 tx_power[IEEE80211_TPE_MAX_POWER_COUNT];
+} __packed;
+
+#define IEEE80211_TX_PWR_ENV_INFO_COUNT 0x7
+#define IEEE80211_TX_PWR_ENV_INFO_INTERPRET 0x38
+#define IEEE80211_TX_PWR_ENV_INFO_CATEGORY 0xC0
+
+/*
* ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size
* @he_oper_ie: byte data of the He Operations IE, stating from the byte
* after the ext ID byte. It is assumed that he_oper_ie has at least
@@ -2869,7 +2957,7 @@ enum ieee80211_eid {
WLAN_EID_VHT_OPERATION = 192,
WLAN_EID_EXTENDED_BSS_LOAD = 193,
WLAN_EID_WIDE_BW_CHANNEL_SWITCH = 194,
- WLAN_EID_VHT_TX_POWER_ENVELOPE = 195,
+ WLAN_EID_TX_POWER_ENVELOPE = 195,
WLAN_EID_CHANNEL_SWITCH_WRAPPER = 196,
WLAN_EID_AID = 197,
WLAN_EID_QUIET_CHANNEL = 198,
@@ -2881,6 +2969,7 @@ enum ieee80211_eid {
WLAN_EID_AID_RESPONSE = 211,
WLAN_EID_S1G_BCN_COMPAT = 213,
WLAN_EID_S1G_SHORT_BCN_INTERVAL = 214,
+ WLAN_EID_S1G_TWT = 216,
WLAN_EID_S1G_CAPABILITIES = 217,
WLAN_EID_VENDOR_SPECIFIC = 221,
WLAN_EID_QOS_PARAMETER = 222,
@@ -2950,6 +3039,7 @@ enum ieee80211_category {
WLAN_CATEGORY_FST = 18,
WLAN_CATEGORY_UNPROT_DMG = 20,
WLAN_CATEGORY_VHT = 21,
+ WLAN_CATEGORY_S1G = 22,
WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126,
WLAN_CATEGORY_VENDOR_SPECIFIC = 127,
};
@@ -3023,6 +3113,20 @@ enum ieee80211_key_len {
WLAN_KEY_LEN_BIP_GMAC_256 = 32,
};
+enum ieee80211_s1g_actioncode {
+ WLAN_S1G_AID_SWITCH_REQUEST,
+ WLAN_S1G_AID_SWITCH_RESPONSE,
+ WLAN_S1G_SYNC_CONTROL,
+ WLAN_S1G_STA_INFO_ANNOUNCE,
+ WLAN_S1G_EDCA_PARAM_SET,
+ WLAN_S1G_EL_OPERATION,
+ WLAN_S1G_TWT_SETUP,
+ WLAN_S1G_TWT_TEARDOWN,
+ WLAN_S1G_SECT_GROUP_ID_LIST,
+ WLAN_S1G_SECT_ID_FEEDBACK,
+ WLAN_S1G_TWT_INFORMATION = 11,
+};
+
#define IEEE80211_WEP_IV_LEN 4
#define IEEE80211_WEP_ICV_LEN 4
#define IEEE80211_CCMP_HDR_LEN 8
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index b651c5e32a28..509e18c7e740 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -57,10 +57,16 @@ struct br_ip_list {
#define BR_MRP_AWARE BIT(17)
#define BR_MRP_LOST_CONT BIT(18)
#define BR_MRP_LOST_IN_CONT BIT(19)
+#define BR_TX_FWD_OFFLOAD BIT(20)
#define BR_DEFAULT_AGEING_TIME (300 * HZ)
-extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
+struct net_bridge;
+void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
+ unsigned int cmd, struct ifreq *ifr,
+ void __user *uarg));
+int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
+ struct ifreq *ifr, void __user *uarg);
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
int br_multicast_list_adjacent(struct net_device *dev,
@@ -70,9 +76,6 @@ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
bool br_multicast_has_router_adjacent(struct net_device *dev, int proto);
bool br_multicast_enabled(const struct net_device *dev);
bool br_multicast_router(const struct net_device *dev);
-int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
- const void *ctx, bool adding, struct notifier_block *nb,
- struct netlink_ext_ack *extack);
#else
static inline int br_multicast_list_adjacent(struct net_device *dev,
struct list_head *br_ip_list)
@@ -104,13 +107,6 @@ static inline bool br_multicast_router(const struct net_device *dev)
{
return false;
}
-static inline int br_mdb_replay(const struct net_device *br_dev,
- const struct net_device *dev, const void *ctx,
- bool adding, struct notifier_block *nb,
- struct netlink_ext_ack *extack)
-{
- return -EOPNOTSUPP;
-}
#endif
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
@@ -120,9 +116,8 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid);
int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto);
int br_vlan_get_info(const struct net_device *dev, u16 vid,
struct bridge_vlan_info *p_vinfo);
-int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
- const void *ctx, bool adding, struct notifier_block *nb,
- struct netlink_ext_ack *extack);
+int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid,
+ struct bridge_vlan_info *p_vinfo);
#else
static inline bool br_vlan_enabled(const struct net_device *dev)
{
@@ -150,12 +145,10 @@ static inline int br_vlan_get_info(const struct net_device *dev, u16 vid,
return -EINVAL;
}
-static inline int br_vlan_replay(struct net_device *br_dev,
- struct net_device *dev, const void *ctx,
- bool adding, struct notifier_block *nb,
- struct netlink_ext_ack *extack)
+static inline int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid,
+ struct bridge_vlan_info *p_vinfo)
{
- return -EOPNOTSUPP;
+ return -EINVAL;
}
#endif
@@ -167,8 +160,6 @@ void br_fdb_clear_offload(const struct net_device *dev, u16 vid);
bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
u8 br_port_get_stp_state(const struct net_device *dev);
clock_t br_get_ageing_time(const struct net_device *br_dev);
-int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
- const void *ctx, bool adding, struct notifier_block *nb);
#else
static inline struct net_device *
br_fdb_find_port(const struct net_device *br_dev,
@@ -197,13 +188,6 @@ static inline clock_t br_get_ageing_time(const struct net_device *br_dev)
{
return 0;
}
-
-static inline int br_fdb_replay(const struct net_device *br_dev,
- const struct net_device *dev, const void *ctx,
- bool adding, struct notifier_block *nb)
-{
- return -EOPNOTSUPP;
-}
#endif
#endif
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 64ce8cd1cfaf..93c262ecbdc9 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -41,9 +41,6 @@ struct ip_sf_socklist {
__be32 sl_addr[];
};
-#define IP_SFLSIZE(count) (sizeof(struct ip_sf_socklist) + \
- (count) * sizeof(__be32))
-
#define IP_SFBLOCK 10 /* allocate this many at once */
/* ip_mc_socklist is real list now. Speed is not argument;
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 53aa0343bf69..a038feb63f23 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -41,7 +41,7 @@ struct in_device {
unsigned long mr_qri; /* Query Response Interval */
unsigned char mr_qrv; /* Query Robustness Variable */
unsigned char mr_gq_running;
- unsigned char mr_ifc_count;
+ u32 mr_ifc_count;
struct timer_list mr_gq_timer; /* general query timer */
struct timer_list mr_ifc_timer; /* interface change timer */
@@ -178,6 +178,15 @@ static inline struct net_device *ip_dev_find(struct net *net, __be32 addr)
int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *);
+#ifdef CONFIG_INET
+int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size);
+#else
+static inline int inet_gifconf(struct net_device *dev, char __user *buf,
+ int len, int size)
+{
+ return 0;
+}
+#endif
void devinet_init(void);
struct in_device *inetdev_by_index(struct net *, int);
__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope);
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 2ed65b01c961..1f22a30c0963 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -13,6 +13,7 @@
#include <linux/hrtimer.h>
#include <linux/kref.h>
#include <linux/workqueue.h>
+#include <linux/jump_label.h>
#include <linux/atomic.h>
#include <asm/ptrace.h>
@@ -474,12 +475,13 @@ extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
#ifdef CONFIG_IRQ_FORCED_THREADING
# ifdef CONFIG_PREEMPT_RT
-# define force_irqthreads (true)
+# define force_irqthreads() (true)
# else
-extern bool force_irqthreads;
+DECLARE_STATIC_KEY_FALSE(force_irqthreads_key);
+# define force_irqthreads() (static_branch_unlikely(&force_irqthreads_key))
# endif
#else
-#define force_irqthreads (0)
+#define force_irqthreads() (false)
#endif
#ifndef local_softirq_pending
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 04b650bcbbe5..649a4d7c241b 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -7,17 +7,18 @@
#if defined(CONFIG_IO_URING)
struct sock *io_uring_get_socket(struct file *file);
-void __io_uring_cancel(struct files_struct *files);
+void __io_uring_cancel(bool cancel_all);
void __io_uring_free(struct task_struct *tsk);
-static inline void io_uring_files_cancel(struct files_struct *files)
+static inline void io_uring_files_cancel(void)
{
if (current->io_uring)
- __io_uring_cancel(files);
+ __io_uring_cancel(false);
}
static inline void io_uring_task_cancel(void)
{
- return io_uring_files_cancel(NULL);
+ if (current->io_uring)
+ __io_uring_cancel(true);
}
static inline void io_uring_free(struct task_struct *tsk)
{
@@ -32,7 +33,7 @@ static inline struct sock *io_uring_get_socket(struct file *file)
static inline void io_uring_task_cancel(void)
{
}
-static inline void io_uring_files_cancel(struct files_struct *files)
+static inline void io_uring_files_cancel(void)
{
}
static inline void io_uring_free(struct task_struct *tsk)
diff --git a/include/linux/ioam6.h b/include/linux/ioam6.h
new file mode 100644
index 000000000000..94a24b36998f
--- /dev/null
+++ b/include/linux/ioam6.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * IPv6 IOAM
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+#ifndef _LINUX_IOAM6_H
+#define _LINUX_IOAM6_H
+
+#include <uapi/linux/ioam6.h>
+
+#endif /* _LINUX_IOAM6_H */
diff --git a/include/linux/ioam6_genl.h b/include/linux/ioam6_genl.h
new file mode 100644
index 000000000000..176e67919de3
--- /dev/null
+++ b/include/linux/ioam6_genl.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * IPv6 IOAM Generic Netlink API
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+#ifndef _LINUX_IOAM6_GENL_H
+#define _LINUX_IOAM6_GENL_H
+
+#include <uapi/linux/ioam6_genl.h>
+
+#endif /* _LINUX_IOAM6_GENL_H */
diff --git a/include/linux/ioam6_iptunnel.h b/include/linux/ioam6_iptunnel.h
new file mode 100644
index 000000000000..07d9dfedd29d
--- /dev/null
+++ b/include/linux/ioam6_iptunnel.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * IPv6 IOAM Lightweight Tunnel API
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+#ifndef _LINUX_IOAM6_IPTUNNEL_H
+#define _LINUX_IOAM6_IPTUNNEL_H
+
+#include <uapi/linux/ioam6_iptunnel.h>
+
+#endif /* _LINUX_IOAM6_IPTUNNEL_H */
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 479c1da3e221..24f8489583ca 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -91,13 +91,30 @@ struct iomap {
const struct iomap_page_ops *page_ops;
};
-static inline sector_t
-iomap_sector(struct iomap *iomap, loff_t pos)
+static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos)
{
return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
}
/*
+ * Returns the inline data pointer for logical offset @pos.
+ */
+static inline void *iomap_inline_data(const struct iomap *iomap, loff_t pos)
+{
+ return iomap->inline_data + pos - iomap->offset;
+}
+
+/*
+ * Check if the mapping's length is within the valid range for inline data.
+ * This is used to guard against accessing data beyond the page inline_data
+ * points at.
+ */
+static inline bool iomap_inline_data_valid(const struct iomap *iomap)
+{
+ return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data);
+}
+
+/*
* When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
* and page_done will be called for each page written to. This only applies to
* buffered writes as unbuffered writes will not typically have pages
@@ -108,10 +125,9 @@ iomap_sector(struct iomap *iomap, loff_t pos)
* associated page could not be obtained.
*/
struct iomap_page_ops {
- int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len,
- struct iomap *iomap);
+ int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len);
void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
- struct page *page, struct iomap *iomap);
+ struct page *page);
};
/*
@@ -124,6 +140,7 @@ struct iomap_page_ops {
#define IOMAP_DIRECT (1 << 4) /* direct I/O */
#define IOMAP_NOWAIT (1 << 5) /* do not block */
#define IOMAP_OVERWRITE_ONLY (1 << 6) /* only pure overwrites allowed */
+#define IOMAP_UNSHARE (1 << 7) /* unshare_file_range */
struct iomap_ops {
/*
@@ -145,15 +162,61 @@ struct iomap_ops {
ssize_t written, unsigned flags, struct iomap *iomap);
};
-/*
- * Main iomap iterator function.
+/**
+ * struct iomap_iter - Iterate through a range of a file
+ * @inode: Set at the start of the iteration and should not change.
+ * @pos: The current file position we are operating on. It is updated by
+ * calls to iomap_iter(). Treat as read-only in the body.
+ * @len: The remaining length of the file segment we're operating on.
+ * It is updated at the same time as @pos.
+ * @processed: The number of bytes processed by the body in the most recent
+ * iteration, or a negative errno. 0 causes the iteration to stop.
+ * @flags: Zero or more of the iomap_begin flags above.
+ * @iomap: Map describing the I/O iteration
+ * @srcmap: Source map for COW operations
*/
-typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
- void *data, struct iomap *iomap, struct iomap *srcmap);
+struct iomap_iter {
+ struct inode *inode;
+ loff_t pos;
+ u64 len;
+ s64 processed;
+ unsigned flags;
+ struct iomap iomap;
+ struct iomap srcmap;
+};
-loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
- unsigned flags, const struct iomap_ops *ops, void *data,
- iomap_actor_t actor);
+int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops);
+
+/**
+ * iomap_length - length of the current iomap iteration
+ * @iter: iteration structure
+ *
+ * Returns the length that the operation applies to for the current iteration.
+ */
+static inline u64 iomap_length(const struct iomap_iter *iter)
+{
+ u64 end = iter->iomap.offset + iter->iomap.length;
+
+ if (iter->srcmap.type != IOMAP_HOLE)
+ end = min(end, iter->srcmap.offset + iter->srcmap.length);
+ return min(iter->len, end - iter->pos);
+}
+
+/**
+ * iomap_iter_srcmap - return the source map for the current iomap iteration
+ * @i: iteration structure
+ *
+ * Write operations on file systems with reflink support might require a
+ * source and a destination map. This function retourns the source map
+ * for a given operation, which may or may no be identical to the destination
+ * map in &i->iomap.
+ */
+static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i)
+{
+ if (i->srcmap.type != IOMAP_HOLE)
+ return &i->srcmap;
+ return &i->iomap;
+}
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
const struct iomap_ops *ops);
@@ -250,8 +313,8 @@ int iomap_writepages(struct address_space *mapping,
struct iomap_dio_ops {
int (*end_io)(struct kiocb *iocb, ssize_t size, int error,
unsigned flags);
- blk_qc_t (*submit_io)(struct inode *inode, struct iomap *iomap,
- struct bio *bio, loff_t file_offset);
+ blk_qc_t (*submit_io)(const struct iomap_iter *iter, struct bio *bio,
+ loff_t file_offset);
};
/*
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index e9bfe6972aed..3f53bc27a19b 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -6,46 +6,22 @@
#include <linux/sched/rt.h>
#include <linux/iocontext.h>
-/*
- * Gives us 8 prio classes with 13-bits of data for each class
- */
-#define IOPRIO_CLASS_SHIFT (13)
-#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1)
-
-#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT)
-#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK)
-#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data)
-
-#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE)
+#include <uapi/linux/ioprio.h>
/*
- * These are the io priority groups as implemented by CFQ. RT is the realtime
- * class, it always gets premium service. BE is the best-effort scheduling
- * class, the default for any process. IDLE is the idle scheduling class, it
- * is only served when no one else is using the disk.
+ * Default IO priority.
*/
-enum {
- IOPRIO_CLASS_NONE,
- IOPRIO_CLASS_RT,
- IOPRIO_CLASS_BE,
- IOPRIO_CLASS_IDLE,
-};
+#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM)
/*
- * 8 best effort priority levels are supported
+ * Check that a priority value has a valid class.
*/
-#define IOPRIO_BE_NR (8)
-
-enum {
- IOPRIO_WHO_PROCESS = 1,
- IOPRIO_WHO_PGRP,
- IOPRIO_WHO_USER,
-};
+static inline bool ioprio_valid(unsigned short ioprio)
+{
+ unsigned short class = IOPRIO_PRIO_CLASS(ioprio);
-/*
- * Fallback BE priority
- */
-#define IOPRIO_NORM (4)
+ return class > IOPRIO_CLASS_NONE && class <= IOPRIO_CLASS_IDLE;
+}
/*
* if process has set io priority explicitly, use that. if not, convert
@@ -80,7 +56,7 @@ static inline int get_current_ioprio(void)
if (ioc)
return ioc->ioprio;
- return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+ return IOPRIO_DEFAULT;
}
/*
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 70b2ad3b9884..ef4a69865737 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -76,6 +76,9 @@ struct ipv6_devconf {
__s32 disable_policy;
__s32 ndisc_tclass;
__s32 rpl_seg_enabled;
+ __u32 ioam6_id;
+ __u32 ioam6_id_wide;
+ __u8 ioam6_enabled;
struct ctl_table_header *sysctl_header;
};
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8e9a9ae471a6..c8293c817646 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -569,6 +569,7 @@ struct irq_chip {
* IRQCHIP_SUPPORTS_NMI: Chip can deliver NMIs, only for root irqchips
* IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND: Invokes __enable_irq()/__disable_irq() for wake irqs
* in the suspend path if they are in disabled state
+ * IRQCHIP_AFFINITY_PRE_STARTUP: Default affinity update before startup
*/
enum {
IRQCHIP_SET_TYPE_MASKED = (1 << 0),
@@ -581,6 +582,7 @@ enum {
IRQCHIP_SUPPORTS_LEVEL_MSI = (1 << 7),
IRQCHIP_SUPPORTS_NMI = (1 << 8),
IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND = (1 << 9),
+ IRQCHIP_AFFINITY_PRE_STARTUP = (1 << 10),
};
#include <linux/irqdesc.h>
diff --git a/include/linux/iscsi_ibft.h b/include/linux/iscsi_ibft.h
index b7b45ca82bea..790e7fcfc1a6 100644
--- a/include/linux/iscsi_ibft.h
+++ b/include/linux/iscsi_ibft.h
@@ -13,26 +13,22 @@
#ifndef ISCSI_IBFT_H
#define ISCSI_IBFT_H
-#include <linux/acpi.h>
+#include <linux/types.h>
/*
- * Logical location of iSCSI Boot Format Table.
- * If the value is NULL there is no iBFT on the machine.
+ * Physical location of iSCSI Boot Format Table.
+ * If the value is 0 there is no iBFT on the machine.
*/
-extern struct acpi_table_ibft *ibft_addr;
+extern phys_addr_t ibft_phys_addr;
/*
* Routine used to find and reserve the iSCSI Boot Format Table. The
- * mapped address is set in the ibft_addr variable.
+ * physical address is set in the ibft_phys_addr variable.
*/
#ifdef CONFIG_ISCSI_IBFT_FIND
-unsigned long find_ibft_region(unsigned long *sizep);
+void reserve_ibft_region(void);
#else
-static inline unsigned long find_ibft_region(unsigned long *sizep)
-{
- *sizep = 0;
- return 0;
-}
+static inline void reserve_ibft_region(void) {}
#endif
#endif /* ISCSI_IBFT_H */
diff --git a/include/linux/kfence.h b/include/linux/kfence.h
index a70d1ea03532..3fe6dd8a18c1 100644
--- a/include/linux/kfence.h
+++ b/include/linux/kfence.h
@@ -51,10 +51,11 @@ extern atomic_t kfence_allocation_gate;
static __always_inline bool is_kfence_address(const void *addr)
{
/*
- * The non-NULL check is required in case the __kfence_pool pointer was
- * never initialized; keep it in the slow-path after the range-check.
+ * The __kfence_pool != NULL check is required to deal with the case
+ * where __kfence_pool == NULL && addr < KFENCE_POOL_SIZE. Keep it in
+ * the slow-path after the range-check!
*/
- return unlikely((unsigned long)((char *)addr - __kfence_pool) < KFENCE_POOL_SIZE && addr);
+ return unlikely((unsigned long)((char *)addr - __kfence_pool) < KFENCE_POOL_SIZE && __kfence_pool);
}
/**
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 329fd914cf24..a0b730be40ad 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -33,6 +33,12 @@ enum led_brightness {
LED_FULL = 255,
};
+enum led_default_state {
+ LEDS_DEFSTATE_OFF = 0,
+ LEDS_DEFSTATE_ON = 1,
+ LEDS_DEFSTATE_KEEP = 2,
+};
+
struct led_init_data {
/* device fwnode handle */
struct fwnode_handle *fwnode;
@@ -520,9 +526,9 @@ struct gpio_led {
/* default_state should be one of LEDS_GPIO_DEFSTATE_(ON|OFF|KEEP) */
struct gpio_desc *gpiod;
};
-#define LEDS_GPIO_DEFSTATE_OFF 0
-#define LEDS_GPIO_DEFSTATE_ON 1
-#define LEDS_GPIO_DEFSTATE_KEEP 2
+#define LEDS_GPIO_DEFSTATE_OFF LEDS_DEFSTATE_OFF
+#define LEDS_GPIO_DEFSTATE_ON LEDS_DEFSTATE_ON
+#define LEDS_GPIO_DEFSTATE_KEEP LEDS_DEFSTATE_KEEP
struct gpio_led_platform_data {
int num_leds;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 3fcd24236793..860e63f5667b 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -161,6 +161,10 @@ enum {
ATA_DFLAG_D_SENSE = (1 << 29), /* Descriptor sense requested */
ATA_DFLAG_ZAC = (1 << 30), /* ZAC device */
+ ATA_DFLAG_FEATURES_MASK = ATA_DFLAG_TRUSTED | ATA_DFLAG_DA | \
+ ATA_DFLAG_DEVSLP | ATA_DFLAG_NCQ_SEND_RECV | \
+ ATA_DFLAG_NCQ_PRIO,
+
ATA_DEV_UNKNOWN = 0, /* unknown device */
ATA_DEV_ATA = 1, /* ATA device */
ATA_DEV_ATA_UNSUP = 2, /* ATA device (unsupported) */
@@ -535,6 +539,7 @@ typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes)
extern struct device_attribute dev_attr_unload_heads;
#ifdef CONFIG_SATA_HOST
extern struct device_attribute dev_attr_link_power_management_policy;
+extern struct device_attribute dev_attr_ncq_prio_supported;
extern struct device_attribute dev_attr_ncq_prio_enable;
extern struct device_attribute dev_attr_em_message_type;
extern struct device_attribute dev_attr_em_message;
@@ -1454,7 +1459,7 @@ static inline bool sata_pmp_attached(struct ata_port *ap)
static inline bool ata_is_host_link(const struct ata_link *link)
{
- return 1;
+ return true;
}
#endif /* CONFIG_SATA_PMP */
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
deleted file mode 100644
index 0908abda9c1b..000000000000
--- a/include/linux/lightnvm.h
+++ /dev/null
@@ -1,697 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef NVM_H
-#define NVM_H
-
-#include <linux/blkdev.h>
-#include <linux/types.h>
-#include <uapi/linux/lightnvm.h>
-
-enum {
- NVM_IO_OK = 0,
- NVM_IO_REQUEUE = 1,
- NVM_IO_DONE = 2,
- NVM_IO_ERR = 3,
-
- NVM_IOTYPE_NONE = 0,
- NVM_IOTYPE_GC = 1,
-};
-
-/* common format */
-#define NVM_GEN_CH_BITS (8)
-#define NVM_GEN_LUN_BITS (8)
-#define NVM_GEN_BLK_BITS (16)
-#define NVM_GEN_RESERVED (32)
-
-/* 1.2 format */
-#define NVM_12_PG_BITS (16)
-#define NVM_12_PL_BITS (4)
-#define NVM_12_SEC_BITS (4)
-#define NVM_12_RESERVED (8)
-
-/* 2.0 format */
-#define NVM_20_SEC_BITS (24)
-#define NVM_20_RESERVED (8)
-
-enum {
- NVM_OCSSD_SPEC_12 = 12,
- NVM_OCSSD_SPEC_20 = 20,
-};
-
-struct ppa_addr {
- /* Generic structure for all addresses */
- union {
- /* generic device format */
- struct {
- u64 ch : NVM_GEN_CH_BITS;
- u64 lun : NVM_GEN_LUN_BITS;
- u64 blk : NVM_GEN_BLK_BITS;
- u64 reserved : NVM_GEN_RESERVED;
- } a;
-
- /* 1.2 device format */
- struct {
- u64 ch : NVM_GEN_CH_BITS;
- u64 lun : NVM_GEN_LUN_BITS;
- u64 blk : NVM_GEN_BLK_BITS;
- u64 pg : NVM_12_PG_BITS;
- u64 pl : NVM_12_PL_BITS;
- u64 sec : NVM_12_SEC_BITS;
- u64 reserved : NVM_12_RESERVED;
- } g;
-
- /* 2.0 device format */
- struct {
- u64 grp : NVM_GEN_CH_BITS;
- u64 pu : NVM_GEN_LUN_BITS;
- u64 chk : NVM_GEN_BLK_BITS;
- u64 sec : NVM_20_SEC_BITS;
- u64 reserved : NVM_20_RESERVED;
- } m;
-
- struct {
- u64 line : 63;
- u64 is_cached : 1;
- } c;
-
- u64 ppa;
- };
-};
-
-struct nvm_rq;
-struct nvm_id;
-struct nvm_dev;
-struct nvm_tgt_dev;
-struct nvm_chk_meta;
-
-typedef int (nvm_id_fn)(struct nvm_dev *);
-typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *);
-typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int);
-typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int,
- struct nvm_chk_meta *);
-typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *, void *);
-typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *, int);
-typedef void (nvm_destroy_dma_pool_fn)(void *);
-typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
- dma_addr_t *);
-typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t);
-
-struct nvm_dev_ops {
- nvm_id_fn *identity;
- nvm_op_bb_tbl_fn *get_bb_tbl;
- nvm_op_set_bb_fn *set_bb_tbl;
-
- nvm_get_chk_meta_fn *get_chk_meta;
-
- nvm_submit_io_fn *submit_io;
-
- nvm_create_dma_pool_fn *create_dma_pool;
- nvm_destroy_dma_pool_fn *destroy_dma_pool;
- nvm_dev_dma_alloc_fn *dev_dma_alloc;
- nvm_dev_dma_free_fn *dev_dma_free;
-};
-
-#ifdef CONFIG_NVM
-
-#include <linux/file.h>
-#include <linux/dmapool.h>
-
-enum {
- /* HW Responsibilities */
- NVM_RSP_L2P = 1 << 0,
- NVM_RSP_ECC = 1 << 1,
-
- /* Physical Adressing Mode */
- NVM_ADDRMODE_LINEAR = 0,
- NVM_ADDRMODE_CHANNEL = 1,
-
- /* Plane programming mode for LUN */
- NVM_PLANE_SINGLE = 1,
- NVM_PLANE_DOUBLE = 2,
- NVM_PLANE_QUAD = 4,
-
- /* Status codes */
- NVM_RSP_SUCCESS = 0x0,
- NVM_RSP_NOT_CHANGEABLE = 0x1,
- NVM_RSP_ERR_FAILWRITE = 0x40ff,
- NVM_RSP_ERR_EMPTYPAGE = 0x42ff,
- NVM_RSP_ERR_FAILECC = 0x4281,
- NVM_RSP_ERR_FAILCRC = 0x4004,
- NVM_RSP_WARN_HIGHECC = 0x4700,
-
- /* Device opcodes */
- NVM_OP_PWRITE = 0x91,
- NVM_OP_PREAD = 0x92,
- NVM_OP_ERASE = 0x90,
-
- /* PPA Command Flags */
- NVM_IO_SNGL_ACCESS = 0x0,
- NVM_IO_DUAL_ACCESS = 0x1,
- NVM_IO_QUAD_ACCESS = 0x2,
-
- /* NAND Access Modes */
- NVM_IO_SUSPEND = 0x80,
- NVM_IO_SLC_MODE = 0x100,
- NVM_IO_SCRAMBLE_ENABLE = 0x200,
-
- /* Block Types */
- NVM_BLK_T_FREE = 0x0,
- NVM_BLK_T_BAD = 0x1,
- NVM_BLK_T_GRWN_BAD = 0x2,
- NVM_BLK_T_DEV = 0x4,
- NVM_BLK_T_HOST = 0x8,
-
- /* Memory capabilities */
- NVM_ID_CAP_SLC = 0x1,
- NVM_ID_CAP_CMD_SUSPEND = 0x2,
- NVM_ID_CAP_SCRAMBLE = 0x4,
- NVM_ID_CAP_ENCRYPT = 0x8,
-
- /* Memory types */
- NVM_ID_FMTYPE_SLC = 0,
- NVM_ID_FMTYPE_MLC = 1,
-
- /* Device capabilities */
- NVM_ID_DCAP_BBLKMGMT = 0x1,
- NVM_UD_DCAP_ECC = 0x2,
-};
-
-struct nvm_id_lp_mlc {
- u16 num_pairs;
- u8 pairs[886];
-};
-
-struct nvm_id_lp_tbl {
- __u8 id[8];
- struct nvm_id_lp_mlc mlc;
-};
-
-struct nvm_addrf_12 {
- u8 ch_len;
- u8 lun_len;
- u8 blk_len;
- u8 pg_len;
- u8 pln_len;
- u8 sec_len;
-
- u8 ch_offset;
- u8 lun_offset;
- u8 blk_offset;
- u8 pg_offset;
- u8 pln_offset;
- u8 sec_offset;
-
- u64 ch_mask;
- u64 lun_mask;
- u64 blk_mask;
- u64 pg_mask;
- u64 pln_mask;
- u64 sec_mask;
-};
-
-struct nvm_addrf {
- u8 ch_len;
- u8 lun_len;
- u8 chk_len;
- u8 sec_len;
- u8 rsv_len[2];
-
- u8 ch_offset;
- u8 lun_offset;
- u8 chk_offset;
- u8 sec_offset;
- u8 rsv_off[2];
-
- u64 ch_mask;
- u64 lun_mask;
- u64 chk_mask;
- u64 sec_mask;
- u64 rsv_mask[2];
-};
-
-enum {
- /* Chunk states */
- NVM_CHK_ST_FREE = 1 << 0,
- NVM_CHK_ST_CLOSED = 1 << 1,
- NVM_CHK_ST_OPEN = 1 << 2,
- NVM_CHK_ST_OFFLINE = 1 << 3,
-
- /* Chunk types */
- NVM_CHK_TP_W_SEQ = 1 << 0,
- NVM_CHK_TP_W_RAN = 1 << 1,
- NVM_CHK_TP_SZ_SPEC = 1 << 4,
-};
-
-/*
- * Note: The structure size is linked to nvme_nvm_chk_meta such that the same
- * buffer can be used when converting from little endian to cpu addressing.
- */
-struct nvm_chk_meta {
- u8 state;
- u8 type;
- u8 wi;
- u8 rsvd[5];
- u64 slba;
- u64 cnlb;
- u64 wp;
-};
-
-struct nvm_target {
- struct list_head list;
- struct nvm_tgt_dev *dev;
- struct nvm_tgt_type *type;
- struct gendisk *disk;
-};
-
-#define ADDR_EMPTY (~0ULL)
-
-#define NVM_TARGET_DEFAULT_OP (101)
-#define NVM_TARGET_MIN_OP (3)
-#define NVM_TARGET_MAX_OP (80)
-
-#define NVM_VERSION_MAJOR 1
-#define NVM_VERSION_MINOR 0
-#define NVM_VERSION_PATCH 0
-
-#define NVM_MAX_VLBA (64) /* max logical blocks in a vector command */
-
-struct nvm_rq;
-typedef void (nvm_end_io_fn)(struct nvm_rq *);
-
-struct nvm_rq {
- struct nvm_tgt_dev *dev;
-
- struct bio *bio;
-
- union {
- struct ppa_addr ppa_addr;
- dma_addr_t dma_ppa_list;
- };
-
- struct ppa_addr *ppa_list;
-
- void *meta_list;
- dma_addr_t dma_meta_list;
-
- nvm_end_io_fn *end_io;
-
- uint8_t opcode;
- uint16_t nr_ppas;
- uint16_t flags;
-
- u64 ppa_status; /* ppa media status */
- int error;
-
- int is_seq; /* Sequential hint flag. 1.2 only */
-
- void *private;
-};
-
-static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu)
-{
- return pdu - sizeof(struct nvm_rq);
-}
-
-static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
-{
- return rqdata + 1;
-}
-
-static inline struct ppa_addr *nvm_rq_to_ppa_list(struct nvm_rq *rqd)
-{
- return (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
-}
-
-enum {
- NVM_BLK_ST_FREE = 0x1, /* Free block */
- NVM_BLK_ST_TGT = 0x2, /* Block in use by target */
- NVM_BLK_ST_BAD = 0x8, /* Bad block */
-};
-
-/* Instance geometry */
-struct nvm_geo {
- /* device reported version */
- u8 major_ver_id;
- u8 minor_ver_id;
-
- /* kernel short version */
- u8 version;
-
- /* instance specific geometry */
- int num_ch;
- int num_lun; /* per channel */
-
- /* calculated values */
- int all_luns; /* across channels */
- int all_chunks; /* across channels */
-
- int op; /* over-provision in instance */
-
- sector_t total_secs; /* across channels */
-
- /* chunk geometry */
- u32 num_chk; /* chunks per lun */
- u32 clba; /* sectors per chunk */
- u16 csecs; /* sector size */
- u16 sos; /* out-of-band area size */
- bool ext; /* metadata in extended data buffer */
- u32 mdts; /* Max data transfer size*/
-
- /* device write constrains */
- u32 ws_min; /* minimum write size */
- u32 ws_opt; /* optimal write size */
- u32 mw_cunits; /* distance required for successful read */
- u32 maxoc; /* maximum open chunks */
- u32 maxocpu; /* maximum open chunks per parallel unit */
-
- /* device capabilities */
- u32 mccap;
-
- /* device timings */
- u32 trdt; /* Avg. Tread (ns) */
- u32 trdm; /* Max Tread (ns) */
- u32 tprt; /* Avg. Tprog (ns) */
- u32 tprm; /* Max Tprog (ns) */
- u32 tbet; /* Avg. Terase (ns) */
- u32 tbem; /* Max Terase (ns) */
-
- /* generic address format */
- struct nvm_addrf addrf;
-
- /* 1.2 compatibility */
- u8 vmnt;
- u32 cap;
- u32 dom;
-
- u8 mtype;
- u8 fmtype;
-
- u16 cpar;
- u32 mpos;
-
- u8 num_pln;
- u8 pln_mode;
- u16 num_pg;
- u16 fpg_sz;
-};
-
-/* sub-device structure */
-struct nvm_tgt_dev {
- /* Device information */
- struct nvm_geo geo;
-
- /* Base ppas for target LUNs */
- struct ppa_addr *luns;
-
- struct request_queue *q;
-
- struct nvm_dev *parent;
- void *map;
-};
-
-struct nvm_dev {
- struct nvm_dev_ops *ops;
-
- struct list_head devices;
-
- /* Device information */
- struct nvm_geo geo;
-
- unsigned long *lun_map;
- void *dma_pool;
-
- /* Backend device */
- struct request_queue *q;
- char name[DISK_NAME_LEN];
- void *private_data;
-
- struct kref ref;
- void *rmap;
-
- struct mutex mlock;
- spinlock_t lock;
-
- /* target management */
- struct list_head area_list;
- struct list_head targets;
-};
-
-static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
- struct ppa_addr r)
-{
- struct nvm_geo *geo = &dev->geo;
- struct ppa_addr l;
-
- if (geo->version == NVM_OCSSD_SPEC_12) {
- struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&geo->addrf;
-
- l.ppa = ((u64)r.g.ch) << ppaf->ch_offset;
- l.ppa |= ((u64)r.g.lun) << ppaf->lun_offset;
- l.ppa |= ((u64)r.g.blk) << ppaf->blk_offset;
- l.ppa |= ((u64)r.g.pg) << ppaf->pg_offset;
- l.ppa |= ((u64)r.g.pl) << ppaf->pln_offset;
- l.ppa |= ((u64)r.g.sec) << ppaf->sec_offset;
- } else {
- struct nvm_addrf *lbaf = &geo->addrf;
-
- l.ppa = ((u64)r.m.grp) << lbaf->ch_offset;
- l.ppa |= ((u64)r.m.pu) << lbaf->lun_offset;
- l.ppa |= ((u64)r.m.chk) << lbaf->chk_offset;
- l.ppa |= ((u64)r.m.sec) << lbaf->sec_offset;
- }
-
- return l;
-}
-
-static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev,
- struct ppa_addr r)
-{
- struct nvm_geo *geo = &dev->geo;
- struct ppa_addr l;
-
- l.ppa = 0;
-
- if (geo->version == NVM_OCSSD_SPEC_12) {
- struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&geo->addrf;
-
- l.g.ch = (r.ppa & ppaf->ch_mask) >> ppaf->ch_offset;
- l.g.lun = (r.ppa & ppaf->lun_mask) >> ppaf->lun_offset;
- l.g.blk = (r.ppa & ppaf->blk_mask) >> ppaf->blk_offset;
- l.g.pg = (r.ppa & ppaf->pg_mask) >> ppaf->pg_offset;
- l.g.pl = (r.ppa & ppaf->pln_mask) >> ppaf->pln_offset;
- l.g.sec = (r.ppa & ppaf->sec_mask) >> ppaf->sec_offset;
- } else {
- struct nvm_addrf *lbaf = &geo->addrf;
-
- l.m.grp = (r.ppa & lbaf->ch_mask) >> lbaf->ch_offset;
- l.m.pu = (r.ppa & lbaf->lun_mask) >> lbaf->lun_offset;
- l.m.chk = (r.ppa & lbaf->chk_mask) >> lbaf->chk_offset;
- l.m.sec = (r.ppa & lbaf->sec_mask) >> lbaf->sec_offset;
- }
-
- return l;
-}
-
-static inline u64 dev_to_chunk_addr(struct nvm_dev *dev, void *addrf,
- struct ppa_addr p)
-{
- struct nvm_geo *geo = &dev->geo;
- u64 caddr;
-
- if (geo->version == NVM_OCSSD_SPEC_12) {
- struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)addrf;
-
- caddr = (u64)p.g.pg << ppaf->pg_offset;
- caddr |= (u64)p.g.pl << ppaf->pln_offset;
- caddr |= (u64)p.g.sec << ppaf->sec_offset;
- } else {
- caddr = p.m.sec;
- }
-
- return caddr;
-}
-
-static inline struct ppa_addr nvm_ppa32_to_ppa64(struct nvm_dev *dev,
- void *addrf, u32 ppa32)
-{
- struct ppa_addr ppa64;
-
- ppa64.ppa = 0;
-
- if (ppa32 == -1) {
- ppa64.ppa = ADDR_EMPTY;
- } else if (ppa32 & (1U << 31)) {
- ppa64.c.line = ppa32 & ((~0U) >> 1);
- ppa64.c.is_cached = 1;
- } else {
- struct nvm_geo *geo = &dev->geo;
-
- if (geo->version == NVM_OCSSD_SPEC_12) {
- struct nvm_addrf_12 *ppaf = addrf;
-
- ppa64.g.ch = (ppa32 & ppaf->ch_mask) >>
- ppaf->ch_offset;
- ppa64.g.lun = (ppa32 & ppaf->lun_mask) >>
- ppaf->lun_offset;
- ppa64.g.blk = (ppa32 & ppaf->blk_mask) >>
- ppaf->blk_offset;
- ppa64.g.pg = (ppa32 & ppaf->pg_mask) >>
- ppaf->pg_offset;
- ppa64.g.pl = (ppa32 & ppaf->pln_mask) >>
- ppaf->pln_offset;
- ppa64.g.sec = (ppa32 & ppaf->sec_mask) >>
- ppaf->sec_offset;
- } else {
- struct nvm_addrf *lbaf = addrf;
-
- ppa64.m.grp = (ppa32 & lbaf->ch_mask) >>
- lbaf->ch_offset;
- ppa64.m.pu = (ppa32 & lbaf->lun_mask) >>
- lbaf->lun_offset;
- ppa64.m.chk = (ppa32 & lbaf->chk_mask) >>
- lbaf->chk_offset;
- ppa64.m.sec = (ppa32 & lbaf->sec_mask) >>
- lbaf->sec_offset;
- }
- }
-
- return ppa64;
-}
-
-static inline u32 nvm_ppa64_to_ppa32(struct nvm_dev *dev,
- void *addrf, struct ppa_addr ppa64)
-{
- u32 ppa32 = 0;
-
- if (ppa64.ppa == ADDR_EMPTY) {
- ppa32 = ~0U;
- } else if (ppa64.c.is_cached) {
- ppa32 |= ppa64.c.line;
- ppa32 |= 1U << 31;
- } else {
- struct nvm_geo *geo = &dev->geo;
-
- if (geo->version == NVM_OCSSD_SPEC_12) {
- struct nvm_addrf_12 *ppaf = addrf;
-
- ppa32 |= ppa64.g.ch << ppaf->ch_offset;
- ppa32 |= ppa64.g.lun << ppaf->lun_offset;
- ppa32 |= ppa64.g.blk << ppaf->blk_offset;
- ppa32 |= ppa64.g.pg << ppaf->pg_offset;
- ppa32 |= ppa64.g.pl << ppaf->pln_offset;
- ppa32 |= ppa64.g.sec << ppaf->sec_offset;
- } else {
- struct nvm_addrf *lbaf = addrf;
-
- ppa32 |= ppa64.m.grp << lbaf->ch_offset;
- ppa32 |= ppa64.m.pu << lbaf->lun_offset;
- ppa32 |= ppa64.m.chk << lbaf->chk_offset;
- ppa32 |= ppa64.m.sec << lbaf->sec_offset;
- }
- }
-
- return ppa32;
-}
-
-static inline int nvm_next_ppa_in_chk(struct nvm_tgt_dev *dev,
- struct ppa_addr *ppa)
-{
- struct nvm_geo *geo = &dev->geo;
- int last = 0;
-
- if (geo->version == NVM_OCSSD_SPEC_12) {
- int sec = ppa->g.sec;
-
- sec++;
- if (sec == geo->ws_min) {
- int pg = ppa->g.pg;
-
- sec = 0;
- pg++;
- if (pg == geo->num_pg) {
- int pl = ppa->g.pl;
-
- pg = 0;
- pl++;
- if (pl == geo->num_pln)
- last = 1;
-
- ppa->g.pl = pl;
- }
- ppa->g.pg = pg;
- }
- ppa->g.sec = sec;
- } else {
- ppa->m.sec++;
- if (ppa->m.sec == geo->clba)
- last = 1;
- }
-
- return last;
-}
-
-typedef sector_t (nvm_tgt_capacity_fn)(void *);
-typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
- int flags);
-typedef void (nvm_tgt_exit_fn)(void *, bool);
-typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *);
-typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *);
-
-enum {
- NVM_TGT_F_DEV_L2P = 0,
- NVM_TGT_F_HOST_L2P = 1 << 0,
-};
-
-struct nvm_tgt_type {
- const char *name;
- unsigned int version[3];
- int flags;
-
- /* target entry points */
- const struct block_device_operations *bops;
- nvm_tgt_capacity_fn *capacity;
-
- /* module-specific init/teardown */
- nvm_tgt_init_fn *init;
- nvm_tgt_exit_fn *exit;
-
- /* sysfs */
- nvm_tgt_sysfs_init_fn *sysfs_init;
- nvm_tgt_sysfs_exit_fn *sysfs_exit;
-
- /* For internal use */
- struct list_head list;
- struct module *owner;
-};
-
-extern int nvm_register_tgt_type(struct nvm_tgt_type *);
-extern void nvm_unregister_tgt_type(struct nvm_tgt_type *);
-
-extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *);
-extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t);
-
-extern struct nvm_dev *nvm_alloc_dev(int);
-extern int nvm_register(struct nvm_dev *);
-extern void nvm_unregister(struct nvm_dev *);
-
-extern int nvm_get_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr,
- int, struct nvm_chk_meta *);
-extern int nvm_set_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr *,
- int, int);
-extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *, void *);
-extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *, void *);
-extern void nvm_end_io(struct nvm_rq *);
-
-#else /* CONFIG_NVM */
-struct nvm_dev_ops;
-
-static inline struct nvm_dev *nvm_alloc_dev(int node)
-{
- return ERR_PTR(-EINVAL);
-}
-static inline int nvm_register(struct nvm_dev *dev)
-{
- return -EINVAL;
-}
-static inline void nvm_unregister(struct nvm_dev *dev) {}
-#endif /* CONFIG_NVM */
-#endif /* LIGHTNVM.H */
diff --git a/include/linux/linear_range.h b/include/linux/linear_range.h
index 17b5943727d5..fd3d0b358f22 100644
--- a/include/linux/linear_range.h
+++ b/include/linux/linear_range.h
@@ -41,6 +41,8 @@ int linear_range_get_selector_low(const struct linear_range *r,
int linear_range_get_selector_high(const struct linear_range *r,
unsigned int val, unsigned int *selector,
bool *found);
+void linear_range_get_selector_within(const struct linear_range *r,
+ unsigned int val, unsigned int *selector);
int linear_range_get_selector_low_array(const struct linear_range *r,
int ranges, unsigned int val,
unsigned int *selector, bool *found);
diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h
index ded90b097e6e..975e33b793a7 100644
--- a/include/linux/local_lock_internal.h
+++ b/include/linux/local_lock_internal.h
@@ -6,6 +6,8 @@
#include <linux/percpu-defs.h>
#include <linux/lockdep.h>
+#ifndef CONFIG_PREEMPT_RT
+
typedef struct {
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
@@ -14,29 +16,14 @@ typedef struct {
} local_lock_t;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define LL_DEP_MAP_INIT(lockname) \
+# define LOCAL_LOCK_DEBUG_INIT(lockname) \
.dep_map = { \
.name = #lockname, \
.wait_type_inner = LD_WAIT_CONFIG, \
- .lock_type = LD_LOCK_PERCPU, \
- }
-#else
-# define LL_DEP_MAP_INIT(lockname)
-#endif
-
-#define INIT_LOCAL_LOCK(lockname) { LL_DEP_MAP_INIT(lockname) }
-
-#define __local_lock_init(lock) \
-do { \
- static struct lock_class_key __key; \
- \
- debug_check_no_locks_freed((void *)lock, sizeof(*lock));\
- lockdep_init_map_type(&(lock)->dep_map, #lock, &__key, 0, \
- LD_WAIT_CONFIG, LD_WAIT_INV, \
- LD_LOCK_PERCPU); \
-} while (0)
+ .lock_type = LD_LOCK_PERCPU, \
+ }, \
+ .owner = NULL,
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
static inline void local_lock_acquire(local_lock_t *l)
{
lock_map_acquire(&l->dep_map);
@@ -51,11 +38,30 @@ static inline void local_lock_release(local_lock_t *l)
lock_map_release(&l->dep_map);
}
+static inline void local_lock_debug_init(local_lock_t *l)
+{
+ l->owner = NULL;
+}
#else /* CONFIG_DEBUG_LOCK_ALLOC */
+# define LOCAL_LOCK_DEBUG_INIT(lockname)
static inline void local_lock_acquire(local_lock_t *l) { }
static inline void local_lock_release(local_lock_t *l) { }
+static inline void local_lock_debug_init(local_lock_t *l) { }
#endif /* !CONFIG_DEBUG_LOCK_ALLOC */
+#define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) }
+
+#define __local_lock_init(lock) \
+do { \
+ static struct lock_class_key __key; \
+ \
+ debug_check_no_locks_freed((void *)lock, sizeof(*lock));\
+ lockdep_init_map_type(&(lock)->dep_map, #lock, &__key, \
+ 0, LD_WAIT_CONFIG, LD_WAIT_INV, \
+ LD_LOCK_PERCPU); \
+ local_lock_debug_init(lock); \
+} while (0)
+
#define __local_lock(lock) \
do { \
preempt_disable(); \
@@ -91,3 +97,45 @@ static inline void local_lock_release(local_lock_t *l) { }
local_lock_release(this_cpu_ptr(lock)); \
local_irq_restore(flags); \
} while (0)
+
+#else /* !CONFIG_PREEMPT_RT */
+
+/*
+ * On PREEMPT_RT local_lock maps to a per CPU spinlock, which protects the
+ * critical section while staying preemptible.
+ */
+typedef spinlock_t local_lock_t;
+
+#define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname))
+
+#define __local_lock_init(l) \
+ do { \
+ local_spin_lock_init((l)); \
+ } while (0)
+
+#define __local_lock(__lock) \
+ do { \
+ migrate_disable(); \
+ spin_lock(this_cpu_ptr((__lock))); \
+ } while (0)
+
+#define __local_lock_irq(lock) __local_lock(lock)
+
+#define __local_lock_irqsave(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = 0; \
+ __local_lock(lock); \
+ } while (0)
+
+#define __local_unlock(__lock) \
+ do { \
+ spin_unlock(this_cpu_ptr((__lock))); \
+ migrate_enable(); \
+ } while (0)
+
+#define __local_unlock_irq(lock) __local_unlock(lock)
+
+#define __local_unlock_irqrestore(lock, flags) __local_unlock(lock)
+
+#endif /* CONFIG_PREEMPT_RT */
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 0520c0cd73f4..3bc9f7410e21 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -27,7 +27,8 @@ struct rpc_task;
struct nlmsvc_binding {
__be32 (*fopen)(struct svc_rqst *,
struct nfs_fh *,
- struct file **);
+ struct file **,
+ int mode);
void (*fclose)(struct file *);
};
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 666f5f310a04..c4ae6506b8b3 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -10,6 +10,8 @@
#ifndef LINUX_LOCKD_LOCKD_H
#define LINUX_LOCKD_LOCKD_H
+/* XXX: a lot of this should really be under fs/lockd. */
+
#include <linux/in.h>
#include <linux/in6.h>
#include <net/ipv6.h>
@@ -154,7 +156,8 @@ struct nlm_rqst {
struct nlm_file {
struct hlist_node f_list; /* linked list */
struct nfs_fh f_handle; /* NFS file handle */
- struct file * f_file; /* VFS file pointer */
+ struct file * f_file[2]; /* VFS file pointers,
+ indexed by O_ flags */
struct nlm_share * f_shares; /* DOS shares */
struct list_head f_blocks; /* blocked locks */
unsigned int f_locks; /* guesstimate # of locks */
@@ -267,6 +270,7 @@ typedef int (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref);
/*
* Server-side lock handling
*/
+int lock_to_openmode(struct file_lock *);
__be32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
struct nlm_host *, struct nlm_lock *, int,
struct nlm_cookie *, int);
@@ -286,7 +290,7 @@ void nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t);
* File handling for the server personality
*/
__be32 nlm_lookup_file(struct svc_rqst *, struct nlm_file **,
- struct nfs_fh *);
+ struct nlm_lock *);
void nlm_release_file(struct nlm_file *);
void nlmsvc_release_lockowner(struct nlm_lock *);
void nlmsvc_mark_resources(struct net *);
@@ -301,7 +305,8 @@ int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
{
- return locks_inode(file->f_file);
+ return locks_inode(file->f_file[O_RDONLY] ?
+ file->f_file[O_RDONLY] : file->f_file[O_WRONLY]);
}
static inline int __nlm_privileged_request4(const struct sockaddr *sap)
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index 07f5ef8fc456..c6786c12b207 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -91,12 +91,13 @@ void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv);
mei_cldev_driver_register,\
mei_cldev_driver_unregister)
-ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length);
+ssize_t mei_cldev_send(struct mei_cl_device *cldev, const u8 *buf,
+ size_t length);
ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length);
ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf,
size_t length);
-ssize_t mei_cldev_send_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length,
- u8 vtag);
+ssize_t mei_cldev_send_vtag(struct mei_cl_device *cldev, const u8 *buf,
+ size_t length, u8 vtag);
ssize_t mei_cldev_recv_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length,
u8 *vtag);
ssize_t mei_cldev_recv_nonblock_vtag(struct mei_cl_device *cldev, u8 *buf,
@@ -114,6 +115,6 @@ void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data);
int mei_cldev_enable(struct mei_cl_device *cldev);
int mei_cldev_disable(struct mei_cl_device *cldev);
-bool mei_cldev_enabled(struct mei_cl_device *cldev);
+bool mei_cldev_enabled(const struct mei_cl_device *cldev);
#endif /* _LINUX_MEI_CL_BUS_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bfe5c486f4ad..20151c4f1e0e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -612,12 +612,15 @@ static inline bool mem_cgroup_disabled(void)
return !cgroup_subsys_enabled(memory_cgrp_subsys);
}
-static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root,
- struct mem_cgroup *memcg,
- bool in_low_reclaim)
+static inline void mem_cgroup_protection(struct mem_cgroup *root,
+ struct mem_cgroup *memcg,
+ unsigned long *min,
+ unsigned long *low)
{
+ *min = *low = 0;
+
if (mem_cgroup_disabled())
- return 0;
+ return;
/*
* There is no reclaim protection applied to a targeted reclaim.
@@ -653,13 +656,10 @@ static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root,
*
*/
if (root == memcg)
- return 0;
-
- if (in_low_reclaim)
- return READ_ONCE(memcg->memory.emin);
+ return;
- return max(READ_ONCE(memcg->memory.emin),
- READ_ONCE(memcg->memory.elow));
+ *min = READ_ONCE(memcg->memory.emin);
+ *low = READ_ONCE(memcg->memory.elow);
}
void mem_cgroup_calculate_protection(struct mem_cgroup *root,
@@ -1147,11 +1147,12 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
{
}
-static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root,
- struct mem_cgroup *memcg,
- bool in_low_reclaim)
+static inline void mem_cgroup_protection(struct mem_cgroup *root,
+ struct mem_cgroup *memcg,
+ unsigned long *min,
+ unsigned long *low)
{
- return 0;
+ *min = *low = 0;
}
static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root,
@@ -1581,7 +1582,8 @@ static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
#endif /* CONFIG_CGROUP_WRITEBACK */
struct sock;
-bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
+ gfp_t gfp_mask);
void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
#ifdef CONFIG_MEMCG
extern struct static_key_false memcg_sockets_enabled_key;
diff --git a/include/linux/mfd/rt5033-private.h b/include/linux/mfd/rt5033-private.h
index 40a0c2dfb80f..2d1895c3efbf 100644
--- a/include/linux/mfd/rt5033-private.h
+++ b/include/linux/mfd/rt5033-private.h
@@ -200,13 +200,13 @@ enum rt5033_reg {
#define RT5033_REGULATOR_BUCK_VOLTAGE_MIN 1000000U
#define RT5033_REGULATOR_BUCK_VOLTAGE_MAX 3000000U
#define RT5033_REGULATOR_BUCK_VOLTAGE_STEP 100000U
-#define RT5033_REGULATOR_BUCK_VOLTAGE_STEP_NUM 21
+#define RT5033_REGULATOR_BUCK_VOLTAGE_STEP_NUM 32
/* RT5033 regulator LDO output voltage uV */
#define RT5033_REGULATOR_LDO_VOLTAGE_MIN 1200000U
#define RT5033_REGULATOR_LDO_VOLTAGE_MAX 3000000U
#define RT5033_REGULATOR_LDO_VOLTAGE_STEP 100000U
-#define RT5033_REGULATOR_LDO_VOLTAGE_STEP_NUM 19
+#define RT5033_REGULATOR_LDO_VOLTAGE_STEP_NUM 32
/* RT5033 regulator SAFE LDO output voltage uV */
#define RT5033_REGULATOR_SAFE_LDO_VOLTAGE 4900000U
diff --git a/include/linux/mhi.h b/include/linux/mhi.h
index 5e08468854db..723985879035 100644
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -303,6 +303,7 @@ struct mhi_controller_config {
* @rddm_size: RAM dump size that host should allocate for debugging purpose
* @sbl_size: SBL image size downloaded through BHIe (optional)
* @seg_len: BHIe vector size (optional)
+ * @reg_len: Length of the MHI MMIO region (required)
* @fbc_image: Points to firmware image buffer
* @rddm_image: Points to RAM dump buffer
* @mhi_chan: Points to the channel configuration table
@@ -356,6 +357,7 @@ struct mhi_controller_config {
* @fbc_download: MHI host needs to do complete image transfer (optional)
* @wake_set: Device wakeup set flag
* @irq_flags: irq flags passed to request_irq (optional)
+ * @mru: the default MRU for the MHI device
*
* Fields marked as (required) need to be populated by the controller driver
* before calling mhi_register_controller(). For the fields marked as (optional)
@@ -386,6 +388,7 @@ struct mhi_controller {
size_t rddm_size;
size_t sbl_size;
size_t seg_len;
+ size_t reg_len;
struct image_info *fbc_image;
struct image_info *rddm_image;
struct mhi_chan *mhi_chan;
@@ -448,6 +451,7 @@ struct mhi_controller {
bool fbc_download;
bool wake_set;
unsigned long irq_flags;
+ u32 mru;
};
/**
@@ -719,13 +723,8 @@ void mhi_device_put(struct mhi_device *mhi_dev);
* host and device execution environments match and
* channels are in a DISABLED state.
* @mhi_dev: Device associated with the channels
- * @flags: MHI channel flags
*/
-int mhi_prepare_for_transfer(struct mhi_device *mhi_dev,
- unsigned int flags);
-
-/* Automatically allocate and queue inbound buffers */
-#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0)
+int mhi_prepare_for_transfer(struct mhi_device *mhi_dev);
/**
* mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer.
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 219b93cad1dd..12ea29e04293 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -32,7 +32,7 @@ struct mii_if_info {
extern int mii_link_ok (struct mii_if_info *mii);
extern int mii_nway_restart (struct mii_if_info *mii);
-extern int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd);
+extern void mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd);
extern void mii_ethtool_get_link_ksettings(
struct mii_if_info *mii, struct ethtool_link_ksettings *cmd);
extern int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 0025913505ab..66eaf0aa7f69 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1038,7 +1038,7 @@ enum {
struct mlx5_mkey_seg {
/* This is a two bit field occupying bits 31-30.
* bit 31 is always 0,
- * bit 30 is zero for regular MRs and 1 (e.g free) for UMRs that do not have tanslation
+ * bit 30 is zero for regular MRs and 1 (e.g free) for UMRs that do not have translation
*/
u8 status;
u8 pcie_control;
@@ -1157,6 +1157,9 @@ enum mlx5_cap_mode {
HCA_CAP_OPMOD_GET_CUR = 1,
};
+/* Any new cap addition must update mlx5_hca_caps_alloc() to allocate
+ * capability memory.
+ */
enum mlx5_cap_type {
MLX5_CAP_GENERAL = 0,
MLX5_CAP_ETHERNET_OFFLOADS,
@@ -1213,55 +1216,55 @@ enum mlx5_qcam_feature_groups {
/* GET Dev Caps macros */
#define MLX5_CAP_GEN(mdev, cap) \
- MLX5_GET(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap)
+ MLX5_GET(cmd_hca_cap, mdev->caps.hca[MLX5_CAP_GENERAL]->cur, cap)
#define MLX5_CAP_GEN_64(mdev, cap) \
- MLX5_GET64(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap)
+ MLX5_GET64(cmd_hca_cap, mdev->caps.hca[MLX5_CAP_GENERAL]->cur, cap)
#define MLX5_CAP_GEN_MAX(mdev, cap) \
- MLX5_GET(cmd_hca_cap, mdev->caps.hca_max[MLX5_CAP_GENERAL], cap)
+ MLX5_GET(cmd_hca_cap, mdev->caps.hca[MLX5_CAP_GENERAL]->max, cap)
#define MLX5_CAP_GEN_2(mdev, cap) \
- MLX5_GET(cmd_hca_cap_2, mdev->caps.hca_cur[MLX5_CAP_GENERAL_2], cap)
+ MLX5_GET(cmd_hca_cap_2, mdev->caps.hca[MLX5_CAP_GENERAL_2]->cur, cap)
#define MLX5_CAP_GEN_2_64(mdev, cap) \
- MLX5_GET64(cmd_hca_cap_2, mdev->caps.hca_cur[MLX5_CAP_GENERAL_2], cap)
+ MLX5_GET64(cmd_hca_cap_2, mdev->caps.hca[MLX5_CAP_GENERAL_2]->cur, cap)
#define MLX5_CAP_GEN_2_MAX(mdev, cap) \
- MLX5_GET(cmd_hca_cap_2, mdev->caps.hca_max[MLX5_CAP_GENERAL_2], cap)
+ MLX5_GET(cmd_hca_cap_2, mdev->caps.hca[MLX5_CAP_GENERAL_2]->max, cap)
#define MLX5_CAP_ETH(mdev, cap) \
MLX5_GET(per_protocol_networking_offload_caps,\
- mdev->caps.hca_cur[MLX5_CAP_ETHERNET_OFFLOADS], cap)
+ mdev->caps.hca[MLX5_CAP_ETHERNET_OFFLOADS]->cur, cap)
#define MLX5_CAP_ETH_MAX(mdev, cap) \
MLX5_GET(per_protocol_networking_offload_caps,\
- mdev->caps.hca_max[MLX5_CAP_ETHERNET_OFFLOADS], cap)
+ mdev->caps.hca[MLX5_CAP_ETHERNET_OFFLOADS]->max, cap)
#define MLX5_CAP_IPOIB_ENHANCED(mdev, cap) \
MLX5_GET(per_protocol_networking_offload_caps,\
- mdev->caps.hca_cur[MLX5_CAP_IPOIB_ENHANCED_OFFLOADS], cap)
+ mdev->caps.hca[MLX5_CAP_IPOIB_ENHANCED_OFFLOADS]->cur, cap)
#define MLX5_CAP_ROCE(mdev, cap) \
- MLX5_GET(roce_cap, mdev->caps.hca_cur[MLX5_CAP_ROCE], cap)
+ MLX5_GET(roce_cap, mdev->caps.hca[MLX5_CAP_ROCE]->cur, cap)
#define MLX5_CAP_ROCE_MAX(mdev, cap) \
- MLX5_GET(roce_cap, mdev->caps.hca_max[MLX5_CAP_ROCE], cap)
+ MLX5_GET(roce_cap, mdev->caps.hca[MLX5_CAP_ROCE]->max, cap)
#define MLX5_CAP_ATOMIC(mdev, cap) \
- MLX5_GET(atomic_caps, mdev->caps.hca_cur[MLX5_CAP_ATOMIC], cap)
+ MLX5_GET(atomic_caps, mdev->caps.hca[MLX5_CAP_ATOMIC]->cur, cap)
#define MLX5_CAP_ATOMIC_MAX(mdev, cap) \
- MLX5_GET(atomic_caps, mdev->caps.hca_max[MLX5_CAP_ATOMIC], cap)
+ MLX5_GET(atomic_caps, mdev->caps.hca[MLX5_CAP_ATOMIC]->max, cap)
#define MLX5_CAP_FLOWTABLE(mdev, cap) \
- MLX5_GET(flow_table_nic_cap, mdev->caps.hca_cur[MLX5_CAP_FLOW_TABLE], cap)
+ MLX5_GET(flow_table_nic_cap, mdev->caps.hca[MLX5_CAP_FLOW_TABLE]->cur, cap)
#define MLX5_CAP64_FLOWTABLE(mdev, cap) \
- MLX5_GET64(flow_table_nic_cap, (mdev)->caps.hca_cur[MLX5_CAP_FLOW_TABLE], cap)
+ MLX5_GET64(flow_table_nic_cap, (mdev)->caps.hca[MLX5_CAP_FLOW_TABLE]->cur, cap)
#define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \
- MLX5_GET(flow_table_nic_cap, mdev->caps.hca_max[MLX5_CAP_FLOW_TABLE], cap)
+ MLX5_GET(flow_table_nic_cap, mdev->caps.hca[MLX5_CAP_FLOW_TABLE]->max, cap)
#define MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) \
MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.cap)
@@ -1301,11 +1304,11 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \
MLX5_GET(flow_table_eswitch_cap, \
- mdev->caps.hca_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
+ mdev->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->cur, cap)
#define MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, cap) \
MLX5_GET(flow_table_eswitch_cap, \
- mdev->caps.hca_max[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
+ mdev->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->max, cap)
#define MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) \
MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_nic_esw_fdb.cap)
@@ -1327,31 +1330,31 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP_ESW(mdev, cap) \
MLX5_GET(e_switch_cap, \
- mdev->caps.hca_cur[MLX5_CAP_ESWITCH], cap)
+ mdev->caps.hca[MLX5_CAP_ESWITCH]->cur, cap)
#define MLX5_CAP64_ESW_FLOWTABLE(mdev, cap) \
MLX5_GET64(flow_table_eswitch_cap, \
- (mdev)->caps.hca_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
+ (mdev)->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->cur, cap)
#define MLX5_CAP_ESW_MAX(mdev, cap) \
MLX5_GET(e_switch_cap, \
- mdev->caps.hca_max[MLX5_CAP_ESWITCH], cap)
+ mdev->caps.hca[MLX5_CAP_ESWITCH]->max, cap)
#define MLX5_CAP_ODP(mdev, cap)\
- MLX5_GET(odp_cap, mdev->caps.hca_cur[MLX5_CAP_ODP], cap)
+ MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, cap)
#define MLX5_CAP_ODP_MAX(mdev, cap)\
- MLX5_GET(odp_cap, mdev->caps.hca_max[MLX5_CAP_ODP], cap)
+ MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->max, cap)
#define MLX5_CAP_VECTOR_CALC(mdev, cap) \
MLX5_GET(vector_calc_cap, \
- mdev->caps.hca_cur[MLX5_CAP_VECTOR_CALC], cap)
+ mdev->caps.hca[MLX5_CAP_VECTOR_CALC]->cur, cap)
#define MLX5_CAP_QOS(mdev, cap)\
- MLX5_GET(qos_cap, mdev->caps.hca_cur[MLX5_CAP_QOS], cap)
+ MLX5_GET(qos_cap, mdev->caps.hca[MLX5_CAP_QOS]->cur, cap)
#define MLX5_CAP_DEBUG(mdev, cap)\
- MLX5_GET(debug_cap, mdev->caps.hca_cur[MLX5_CAP_DEBUG], cap)
+ MLX5_GET(debug_cap, mdev->caps.hca[MLX5_CAP_DEBUG]->cur, cap)
#define MLX5_CAP_PCAM_FEATURE(mdev, fld) \
MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld)
@@ -1387,27 +1390,27 @@ enum mlx5_qcam_feature_groups {
MLX5_GET64(fpga_cap, (mdev)->caps.fpga, cap)
#define MLX5_CAP_DEV_MEM(mdev, cap)\
- MLX5_GET(device_mem_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_MEM], cap)
+ MLX5_GET(device_mem_cap, mdev->caps.hca[MLX5_CAP_DEV_MEM]->cur, cap)
#define MLX5_CAP64_DEV_MEM(mdev, cap)\
- MLX5_GET64(device_mem_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_MEM], cap)
+ MLX5_GET64(device_mem_cap, mdev->caps.hca[MLX5_CAP_DEV_MEM]->cur, cap)
#define MLX5_CAP_TLS(mdev, cap) \
- MLX5_GET(tls_cap, (mdev)->caps.hca_cur[MLX5_CAP_TLS], cap)
+ MLX5_GET(tls_cap, (mdev)->caps.hca[MLX5_CAP_TLS]->cur, cap)
#define MLX5_CAP_DEV_EVENT(mdev, cap)\
- MLX5_ADDR_OF(device_event_cap, (mdev)->caps.hca_cur[MLX5_CAP_DEV_EVENT], cap)
+ MLX5_ADDR_OF(device_event_cap, (mdev)->caps.hca[MLX5_CAP_DEV_EVENT]->cur, cap)
#define MLX5_CAP_DEV_VDPA_EMULATION(mdev, cap)\
MLX5_GET(virtio_emulation_cap, \
- (mdev)->caps.hca_cur[MLX5_CAP_VDPA_EMULATION], cap)
+ (mdev)->caps.hca[MLX5_CAP_VDPA_EMULATION]->cur, cap)
#define MLX5_CAP64_DEV_VDPA_EMULATION(mdev, cap)\
MLX5_GET64(virtio_emulation_cap, \
- (mdev)->caps.hca_cur[MLX5_CAP_VDPA_EMULATION], cap)
+ (mdev)->caps.hca[MLX5_CAP_VDPA_EMULATION]->cur, cap)
#define MLX5_CAP_IPSEC(mdev, cap)\
- MLX5_GET(ipsec_cap, (mdev)->caps.hca_cur[MLX5_CAP_IPSEC], cap)
+ MLX5_GET(ipsec_cap, (mdev)->caps.hca[MLX5_CAP_IPSEC]->cur, cap)
enum {
MLX5_CMD_STAT_OK = 0x0,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 1efe37466969..e23417424373 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -581,7 +581,7 @@ struct mlx5_priv {
/* end: qp staff */
/* start: alloc staff */
- /* protect buffer alocation according to numa node */
+ /* protect buffer allocation according to numa node */
struct mutex alloc_mutex;
int numa_node;
@@ -623,8 +623,7 @@ struct mlx5_priv {
};
enum mlx5_device_state {
- MLX5_DEVICE_STATE_UNINITIALIZED,
- MLX5_DEVICE_STATE_UP,
+ MLX5_DEVICE_STATE_UP = 1,
MLX5_DEVICE_STATE_INTERNAL_ERROR,
};
@@ -730,6 +729,11 @@ struct mlx5_profile {
} mr_cache[MAX_MR_CACHE_ENTRIES];
};
+struct mlx5_hca_cap {
+ u32 cur[MLX5_UN_SZ_DW(hca_cap_union)];
+ u32 max[MLX5_UN_SZ_DW(hca_cap_union)];
+};
+
struct mlx5_core_dev {
struct device *device;
enum mlx5_coredev_type coredev_type;
@@ -741,8 +745,7 @@ struct mlx5_core_dev {
char board_id[MLX5_BOARD_ID_LEN];
struct mlx5_cmd cmd;
struct {
- u32 hca_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
- u32 hca_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
+ struct mlx5_hca_cap *hca[MLX5_CAP_NUM];
u32 pcam[MLX5_ST_SZ_DW(pcam_reg)];
u32 mcam[MLX5_MCAM_REGS_NUM][MLX5_ST_SZ_DW(mcam_reg)];
u32 fpga[MLX5_ST_SZ_DW(fpga_cap)];
@@ -1044,8 +1047,7 @@ void mlx5_unregister_debugfs(void);
void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas);
void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm);
void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
-int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
- unsigned int *irqn);
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn);
int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
@@ -1111,7 +1113,7 @@ static inline u8 mlx5_mkey_variant(u32 mkey)
}
/* Async-atomic event notifier used by mlx5 core to forward FW
- * evetns recived from event queue to mlx5 consumers.
+ * evetns received from event queue to mlx5 consumers.
* Optimise event queue dipatching.
*/
int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb);
@@ -1138,6 +1140,8 @@ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev);
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
+bool mlx5_lag_is_master(struct mlx5_core_dev *dev);
+bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev);
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
struct net_device *slave);
@@ -1145,6 +1149,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
u64 *values,
int num_counters,
size_t *offsets);
+struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev);
struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index bc7db2e059eb..4ab5c1fc1270 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -29,11 +29,20 @@ enum {
REP_LOADED,
};
+enum mlx5_switchdev_event {
+ MLX5_SWITCHDEV_EVENT_PAIR,
+ MLX5_SWITCHDEV_EVENT_UNPAIR,
+};
+
struct mlx5_eswitch_rep;
struct mlx5_eswitch_rep_ops {
int (*load)(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep);
void (*unload)(struct mlx5_eswitch_rep *rep);
void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
+ int (*event)(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ enum mlx5_switchdev_event event,
+ void *data);
};
struct mlx5_eswitch_rep_data {
@@ -63,6 +72,7 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
struct mlx5_flow_handle *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+ struct mlx5_eswitch *from_esw,
struct mlx5_eswitch_rep *rep, u32 sqn);
#ifdef CONFIG_MLX5_ESWITCH
@@ -128,6 +138,7 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev);
u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
+struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw);
#else /* CONFIG_MLX5_ESWITCH */
@@ -171,6 +182,11 @@ static inline u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
return 0;
}
+static inline struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw)
+{
+ return NULL;
+}
+
#endif /* CONFIG_MLX5_ESWITCH */
static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev)
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 77746f7e35b8..0106c67e8ccb 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -38,6 +38,8 @@
#define MLX5_FS_DEFAULT_FLOW_TAG 0x0
+#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
+
enum {
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16,
MLX5_FLOW_CONTEXT_ACTION_ENCRYPT = 1 << 17,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index b0009aa3647f..f3638d09ba77 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -865,7 +865,8 @@ struct mlx5_ifc_qos_cap_bits {
u8 nic_bw_share[0x1];
u8 nic_rate_limit[0x1];
u8 packet_pacing_uid[0x1];
- u8 reserved_at_c[0x14];
+ u8 log_esw_max_sched_depth[0x4];
+ u8 reserved_at_10[0x10];
u8 reserved_at_20[0xb];
u8 log_max_qos_nic_queue_group[0x5];
@@ -921,7 +922,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
u8 scatter_fcs[0x1];
u8 enhanced_multi_pkt_send_wqe[0x1];
u8 tunnel_lso_const_out_ip_id[0x1];
- u8 reserved_at_1c[0x2];
+ u8 tunnel_lro_gre[0x1];
+ u8 tunnel_lro_vxlan[0x1];
u8 tunnel_stateless_gre[0x1];
u8 tunnel_stateless_vxlan[0x1];
@@ -1651,7 +1653,13 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 max_geneve_tlv_option_data_len[0x5];
u8 reserved_at_570[0x10];
- u8 reserved_at_580[0x33];
+ u8 reserved_at_580[0xb];
+ u8 log_max_dci_stream_channels[0x5];
+ u8 reserved_at_590[0x3];
+ u8 log_max_dci_errored_streams[0x5];
+ u8 reserved_at_598[0x8];
+
+ u8 reserved_at_5a0[0x13];
u8 log_max_dek[0x5];
u8 reserved_at_5b8[0x4];
u8 mini_cqe_resp_stride_index[0x1];
@@ -3020,10 +3028,12 @@ struct mlx5_ifc_qpc_bits {
u8 reserved_at_3c0[0x8];
u8 next_send_psn[0x18];
- u8 reserved_at_3e0[0x8];
+ u8 reserved_at_3e0[0x3];
+ u8 log_num_dci_stream_channels[0x5];
u8 cqn_snd[0x18];
- u8 reserved_at_400[0x8];
+ u8 reserved_at_400[0x3];
+ u8 log_num_dci_errored_streams[0x5];
u8 deth_sqpn[0x18];
u8 reserved_at_420[0x20];
@@ -3911,7 +3921,7 @@ struct mlx5_ifc_cqc_bits {
u8 status[0x4];
u8 reserved_at_4[0x2];
u8 dbr_umem_valid[0x1];
- u8 apu_thread_cq[0x1];
+ u8 apu_cq[0x1];
u8 cqe_sz[0x3];
u8 cc[0x1];
u8 reserved_at_c[0x1];
@@ -3937,8 +3947,7 @@ struct mlx5_ifc_cqc_bits {
u8 cq_period[0xc];
u8 cq_max_count[0x10];
- u8 reserved_at_a0[0x18];
- u8 c_eqn[0x8];
+ u8 c_eqn_or_apu_element[0x20];
u8 reserved_at_c0[0x3];
u8 log_page_size[0x5];
diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
index 98b56b75c625..1a9c9d94cb59 100644
--- a/include/linux/mlx5/mlx5_ifc_vdpa.h
+++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
@@ -11,13 +11,15 @@ enum {
};
enum {
- MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT = 0x1, // do I check this caps?
- MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED = 0x2,
+ MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT = 0,
+ MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED = 1,
};
enum {
- MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT = 0,
- MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED = 1,
+ MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT =
+ BIT(MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT),
+ MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED =
+ BIT(MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED),
};
struct mlx5_ifc_virtio_q_bits {
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 52bbd2b7cb46..7f8ee09c711f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -103,11 +103,19 @@ struct page {
unsigned long pp_magic;
struct page_pool *pp;
unsigned long _pp_mapping_pad;
- /**
- * @dma_addr: might require a 64-bit value on
- * 32-bit architectures.
- */
- unsigned long dma_addr[2];
+ unsigned long dma_addr;
+ union {
+ /**
+ * dma_addr_upper: might require a 64-bit
+ * value on 32-bit architectures.
+ */
+ unsigned long dma_addr_upper;
+ /**
+ * For frag page support, not supported in
+ * 32-bit architectures with 64-bit DMA.
+ */
+ atomic_long_t pp_frag_count;
+ };
};
struct { /* slab, slob and slub */
union {
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 74e6c0624d27..37f975875102 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -109,6 +109,7 @@ struct mmc_ext_csd {
u8 raw_hc_erase_gap_size; /* 221 */
u8 raw_erase_timeout_mult; /* 223 */
u8 raw_hc_erase_grp_size; /* 224 */
+ u8 raw_boot_mult; /* 226 */
u8 raw_sec_trim_mult; /* 229 */
u8 raw_sec_erase_mult; /* 230 */
u8 raw_sec_feature_support;/* 231 */
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index ab19245e9945..71101d1ec825 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -164,9 +164,8 @@ struct mmc_request {
int tag;
#ifdef CONFIG_MMC_CRYPTO
- bool crypto_enabled;
+ const struct bio_crypt_ctx *crypto_ctx;
int crypto_key_slot;
- u32 data_unit_num;
#endif
};
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0abd47e9ef9b..0c0c9a0fdf57 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -153,7 +153,7 @@ struct mmc_host_ops {
int (*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios);
- /* Check if the card is pulling dat[0:3] low */
+ /* Check if the card is pulling dat[0] low */
int (*card_busy)(struct mmc_host *host);
/* The tuning command opcode value is different for SD and eMMC cards */
@@ -398,6 +398,7 @@ struct mmc_host {
#else
#define MMC_CAP2_CRYPTO 0
#endif
+#define MMC_CAP2_ALT_GPT_TEGRA (1 << 28) /* Host with eMMC that has GPT entry at a non-standard location */
int fixed_drv_type; /* fixed driver type for non-removable media */
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index 12036619346c..a85c9f0bd470 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -75,6 +75,7 @@
#define SDIO_DEVICE_ID_BROADCOM_43364 0xa9a4
#define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6
#define SDIO_DEVICE_ID_BROADCOM_43455 0xa9bf
+#define SDIO_DEVICE_ID_BROADCOM_CYPRESS_43752 0xaae8
#define SDIO_VENDOR_ID_MARVELL 0x02df
#define SDIO_DEVICE_ID_MARVELL_LIBERTAS 0x9103
diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h
index 03dee12d2b61..b9b970f7ab45 100644
--- a/include/linux/mmu_context.h
+++ b/include/linux/mmu_context.h
@@ -14,4 +14,18 @@
static inline void leave_mm(int cpu) { }
#endif
+/*
+ * CPUs that are capable of running user task @p. Must contain at least one
+ * active CPU. It is assumed that the kernel can run on all CPUs, so calling
+ * this for a kernel thread is pointless.
+ *
+ * By default, we assume a sane, homogeneous system.
+ */
+#ifndef task_cpu_possible_mask
+# define task_cpu_possible_mask(p) cpu_possible_mask
+# define task_cpu_possible(cpu, p) true
+#else
+# define task_cpu_possible(cpu, p) cpumask_test_cpu((cpu), task_cpu_possible_mask(p))
+#endif
+
#endif
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index eed280fae433..962cd41a2cb5 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -431,6 +431,8 @@ extern int param_get_int(char *buffer, const struct kernel_param *kp);
extern const struct kernel_param_ops param_ops_uint;
extern int param_set_uint(const char *val, const struct kernel_param *kp);
extern int param_get_uint(char *buffer, const struct kernel_param *kp);
+int param_set_uint_minmax(const char *val, const struct kernel_param *kp,
+ unsigned int min, unsigned int max);
#define param_check_uint(name, p) __param_check(name, p, unsigned int)
extern const struct kernel_param_ops param_ops_long;
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 6aff469e511d..49cf6eb222e7 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -107,7 +107,8 @@ struct ti_sci_inta_msi_desc {
* address or data changes
* @write_msi_msg_data: Data parameter for the callback.
*
- * @masked: [PCI MSI/X] Mask bits
+ * @msi_mask: [PCI MSI] MSI cached mask bits
+ * @msix_ctrl: [PCI MSI-X] MSI-X cached per vector control bits
* @is_msix: [PCI MSI/X] True if MSI-X
* @multiple: [PCI MSI/X] log2 num of messages allocated
* @multi_cap: [PCI MSI/X] log2 num of messages supported
@@ -139,7 +140,10 @@ struct msi_desc {
union {
/* PCI MSI/X specific data */
struct {
- u32 masked;
+ union {
+ u32 msi_mask;
+ u32 msix_ctrl;
+ };
struct {
u8 is_msix : 1;
u8 multiple : 3;
@@ -232,11 +236,13 @@ void free_msi_entry(struct msi_desc *entry);
void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
-u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag);
-u32 __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag);
void pci_msi_mask_irq(struct irq_data *data);
void pci_msi_unmask_irq(struct irq_data *data);
+const struct attribute_group **msi_populate_sysfs(struct device *dev);
+void msi_destroy_sysfs(struct device *dev,
+ const struct attribute_group **msi_irq_groups);
+
/*
* The arch hooks to setup up msi irqs. Default functions are implemented
* as weak symbols so that they /can/ be overriden by architecture specific
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index e19323521f9c..8f226d460f51 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -20,8 +20,17 @@
#include <linux/osq_lock.h>
#include <linux/debug_locks.h>
-struct ww_class;
-struct ww_acquire_ctx;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
+ , .dep_map = { \
+ .name = #lockname, \
+ .wait_type_inner = LD_WAIT_SLEEP, \
+ }
+#else
+# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
+#endif
+
+#ifndef CONFIG_PREEMPT_RT
/*
* Simple, straightforward mutexes with strict semantics:
@@ -53,7 +62,7 @@ struct ww_acquire_ctx;
*/
struct mutex {
atomic_long_t owner;
- spinlock_t wait_lock;
+ raw_spinlock_t wait_lock;
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
struct optimistic_spin_queue osq; /* Spinner MCS lock */
#endif
@@ -66,27 +75,6 @@ struct mutex {
#endif
};
-struct ww_mutex {
- struct mutex base;
- struct ww_acquire_ctx *ctx;
-#ifdef CONFIG_DEBUG_MUTEXES
- struct ww_class *ww_class;
-#endif
-};
-
-/*
- * This is the control structure for tasks blocked on mutex,
- * which resides on the blocked task's kernel stack:
- */
-struct mutex_waiter {
- struct list_head list;
- struct task_struct *task;
- struct ww_acquire_ctx *ww_ctx;
-#ifdef CONFIG_DEBUG_MUTEXES
- void *magic;
-#endif
-};
-
#ifdef CONFIG_DEBUG_MUTEXES
#define __DEBUG_MUTEX_INITIALIZER(lockname) \
@@ -117,19 +105,9 @@ do { \
__mutex_init((mutex), #mutex, &__key); \
} while (0)
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
- , .dep_map = { \
- .name = #lockname, \
- .wait_type_inner = LD_WAIT_SLEEP, \
- }
-#else
-# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
-#endif
-
#define __MUTEX_INITIALIZER(lockname) \
{ .owner = ATOMIC_LONG_INIT(0) \
- , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
+ , .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
, .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
__DEBUG_MUTEX_INITIALIZER(lockname) \
__DEP_MAP_MUTEX_INITIALIZER(lockname) }
@@ -148,6 +126,50 @@ extern void __mutex_init(struct mutex *lock, const char *name,
*/
extern bool mutex_is_locked(struct mutex *lock);
+#else /* !CONFIG_PREEMPT_RT */
+/*
+ * Preempt-RT variant based on rtmutexes.
+ */
+#include <linux/rtmutex.h>
+
+struct mutex {
+ struct rt_mutex_base rtmutex;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+};
+
+#define __MUTEX_INITIALIZER(mutexname) \
+{ \
+ .rtmutex = __RT_MUTEX_BASE_INITIALIZER(mutexname.rtmutex) \
+ __DEP_MAP_MUTEX_INITIALIZER(mutexname) \
+}
+
+#define DEFINE_MUTEX(mutexname) \
+ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
+
+extern void __mutex_rt_init(struct mutex *lock, const char *name,
+ struct lock_class_key *key);
+extern int mutex_trylock(struct mutex *lock);
+
+static inline void mutex_destroy(struct mutex *lock) { }
+
+#define mutex_is_locked(l) rt_mutex_base_is_locked(&(l)->rtmutex)
+
+#define __mutex_init(mutex, name, key) \
+do { \
+ rt_mutex_base_init(&(mutex)->rtmutex); \
+ __mutex_rt_init((mutex), name, key); \
+} while (0)
+
+#define mutex_init(mutex) \
+do { \
+ static struct lock_class_key __key; \
+ \
+ __mutex_init((mutex), #mutex, &__key); \
+} while (0)
+#endif /* CONFIG_PREEMPT_RT */
+
/*
* See kernel/locking/mutex.c for detailed documentation of these APIs.
* Also see Documentation/locking/mutex-design.rst.
diff --git a/include/linux/namei.h b/include/linux/namei.h
index be9a2b349ca7..e89329bb3134 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -68,6 +68,7 @@ extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int);
extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int);
extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int);
+struct dentry *lookup_one(struct user_namespace *, const char *, struct dentry *, int);
extern int follow_down_one(struct path *);
extern int follow_down(struct path *);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index eaf5bb008aa9..7c41593c1d6a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -47,6 +47,7 @@
#include <uapi/linux/if_bonding.h>
#include <uapi/linux/pkt_cls.h>
#include <linux/hashtable.h>
+#include <linux/rbtree.h>
struct netpoll_info;
struct device;
@@ -208,6 +209,7 @@ struct sk_buff;
struct netdev_hw_addr {
struct list_head list;
+ struct rb_node node;
unsigned char addr[MAX_ADDR_LEN];
unsigned char type;
#define NETDEV_HW_ADDR_T_LAN 1
@@ -224,6 +226,9 @@ struct netdev_hw_addr {
struct netdev_hw_addr_list {
struct list_head list;
int count;
+
+ /* Auxiliary tree for faster lookup on addition and deletion */
+ struct rb_root tree;
};
#define netdev_hw_addr_list_count(l) ((l)->count)
@@ -295,18 +300,6 @@ enum netdev_state_t {
};
-/*
- * This structure holds boot-time configured netdevice settings. They
- * are then used in the device probing.
- */
-struct netdev_boot_setup {
- char name[IFNAMSIZ];
- struct ifmap map;
-};
-#define NETDEV_BOOT_SETUP_MAX 8
-
-int __init netdev_boot_setup(char *str);
-
struct gro_list {
struct list_head list;
int count;
@@ -734,13 +727,13 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
/* This structure contains an instance of an RX queue. */
struct netdev_rx_queue {
+ struct xdp_rxq_info xdp_rxq;
#ifdef CONFIG_RPS
struct rps_map __rcu *rps_map;
struct rps_dev_flow_table __rcu *rps_flow_table;
#endif
struct kobject kobj;
struct net_device *dev;
- struct xdp_rxq_info xdp_rxq;
#ifdef CONFIG_XDP_SOCKETS
struct xsk_buff_pool *pool;
#endif
@@ -1086,9 +1079,18 @@ struct netdev_net_notifier {
* Test if Media Access Control address is valid for the device.
*
* int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);
- * Called when a user requests an ioctl which can't be handled by
- * the generic interface code. If not defined ioctls return
- * not supported error code.
+ * Old-style ioctl entry point. This is used internally by the
+ * appletalk and ieee802154 subsystems but is no longer called by
+ * the device ioctl handler.
+ *
+ * int (*ndo_siocbond)(struct net_device *dev, struct ifreq *ifr, int cmd);
+ * Used by the bonding driver for its device specific ioctls:
+ * SIOCBONDENSLAVE, SIOCBONDRELEASE, SIOCBONDSETHWADDR, SIOCBONDCHANGEACTIVE,
+ * SIOCBONDSLAVEINFOQUERY, and SIOCBONDINFOQUERY
+ *
+ * * int (*ndo_eth_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);
+ * Called for ethernet specific ioctls: SIOCGMIIPHY, SIOCGMIIREG,
+ * SIOCSMIIREG, SIOCSHWTSTAMP and SIOCGHWTSTAMP.
*
* int (*ndo_set_config)(struct net_device *dev, struct ifmap *map);
* Used to set network devices bus interface parameters. This interface
@@ -1321,6 +1323,9 @@ struct netdev_net_notifier {
* that got dropped are freed/returned via xdp_return_frame().
* Returns negative number, means general error invoking ndo, meaning
* no frames were xmit'ed and core-caller will free all frames.
+ * struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev,
+ * struct xdp_buff *xdp);
+ * Get the xmit slave of master device based on the xdp_buff.
* int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags);
* This function is used to wake up the softirq, ksoftirqd or kthread
* responsible for sending and/or receiving packets on a specific
@@ -1361,6 +1366,15 @@ struct net_device_ops {
int (*ndo_validate_addr)(struct net_device *dev);
int (*ndo_do_ioctl)(struct net_device *dev,
struct ifreq *ifr, int cmd);
+ int (*ndo_eth_ioctl)(struct net_device *dev,
+ struct ifreq *ifr, int cmd);
+ int (*ndo_siocbond)(struct net_device *dev,
+ struct ifreq *ifr, int cmd);
+ int (*ndo_siocwandev)(struct net_device *dev,
+ struct if_settings *ifs);
+ int (*ndo_siocdevprivate)(struct net_device *dev,
+ struct ifreq *ifr,
+ void __user *data, int cmd);
int (*ndo_set_config)(struct net_device *dev,
struct ifmap *map);
int (*ndo_change_mtu)(struct net_device *dev,
@@ -1539,6 +1553,8 @@ struct net_device_ops {
int (*ndo_xdp_xmit)(struct net_device *dev, int n,
struct xdp_frame **xdp,
u32 flags);
+ struct net_device * (*ndo_xdp_get_xmit_slave)(struct net_device *dev,
+ struct xdp_buff *xdp);
int (*ndo_xsk_wakeup)(struct net_device *dev,
u32 queue_id, u32 flags);
struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
@@ -1805,6 +1821,7 @@ enum netdev_ml_priv_type {
* @ieee802154_ptr: IEEE 802.15.4 low-rate Wireless Personal Area Network
* device struct
* @mpls_ptr: mpls_dev struct pointer
+ * @mctp_ptr: MCTP specific data
*
* @dev_addr: Hw address (before bcast,
* because most packets are unicast)
@@ -2092,6 +2109,9 @@ struct net_device {
#if IS_ENABLED(CONFIG_MPLS_ROUTING)
struct mpls_dev __rcu *mpls_ptr;
#endif
+#if IS_ENABLED(CONFIG_MCTP)
+ struct mctp_dev __rcu *mctp_ptr;
+#endif
/*
* Cache lines mostly used on receive path (including eth_type_trans())
@@ -2917,7 +2937,6 @@ static inline struct net_device *first_net_device_rcu(struct net *net)
}
int netdev_boot_setup_check(struct net_device *dev);
-unsigned long netdev_boot_base(const char *prefix, int unit);
struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
const char *hwaddr);
struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
@@ -3289,14 +3308,6 @@ static inline bool dev_has_header(const struct net_device *dev)
return dev->header_ops && dev->header_ops->create;
}
-typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr,
- int len, int size);
-int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
-static inline int unregister_gifconf(unsigned int family)
-{
- return register_gifconf(family, NULL);
-}
-
#ifdef CONFIG_NET_FLOW_LIMIT
#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */
struct sd_flow_limit {
@@ -3915,6 +3926,8 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev,
return 0;
}
#endif
+int netif_set_real_num_queues(struct net_device *dev,
+ unsigned int txq, unsigned int rxq);
static inline struct netdev_rx_queue *
__netif_get_rx_queue(struct net_device *dev, unsigned int rxq)
@@ -3948,7 +3961,7 @@ void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason);
/*
* It is not allowed to call kfree_skb() or consume_skb() from hardware
* interrupt context or with hardware interrupts being disabled.
- * (in_irq() || irqs_disabled())
+ * (in_hardirq() || irqs_disabled())
*
* We provide four helpers that can be used in following contexts :
*
@@ -3984,6 +3997,8 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
__dev_kfree_skb_any(skb, SKB_REASON_CONSUMED);
}
+u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog);
void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
int netif_rx(struct sk_buff *skb);
@@ -4012,10 +4027,16 @@ int netdev_rx_handler_register(struct net_device *dev,
void netdev_rx_handler_unregister(struct net_device *dev);
bool dev_valid_name(const char *name);
+static inline bool is_socket_ioctl_cmd(unsigned int cmd)
+{
+ return _IOC_TYPE(cmd) == SOCK_IOC_TYPE;
+}
+int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg);
+int put_user_ifreq(struct ifreq *ifr, void __user *arg);
int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr,
- bool *need_copyout);
-int dev_ifconf(struct net *net, struct ifconf *, int);
-int dev_ethtool(struct net *net, struct ifreq *);
+ void __user *data, bool *need_copyout);
+int dev_ifconf(struct net *net, struct ifconf __user *ifc);
+int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata);
unsigned int dev_get_flags(const struct net_device *);
int __dev_change_flags(struct net_device *dev, unsigned int flags,
struct netlink_ext_ack *extack);
@@ -4069,6 +4090,7 @@ typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
int fd, int expected_fd, u32 flags);
int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+u8 dev_xdp_prog_count(struct net_device *dev);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
@@ -4136,11 +4158,13 @@ void netdev_run_todo(void);
*/
static inline void dev_put(struct net_device *dev)
{
+ if (dev) {
#ifdef CONFIG_PCPU_DEV_REFCNT
- this_cpu_dec(*dev->pcpu_refcnt);
+ this_cpu_dec(*dev->pcpu_refcnt);
#else
- refcount_dec(&dev->dev_refcnt);
+ refcount_dec(&dev->dev_refcnt);
#endif
+ }
}
/**
@@ -4151,11 +4175,13 @@ static inline void dev_put(struct net_device *dev)
*/
static inline void dev_hold(struct net_device *dev)
{
+ if (dev) {
#ifdef CONFIG_PCPU_DEV_REFCNT
- this_cpu_inc(*dev->pcpu_refcnt);
+ this_cpu_inc(*dev->pcpu_refcnt);
#else
- refcount_inc(&dev->dev_refcnt);
+ refcount_inc(&dev->dev_refcnt);
#endif
+ }
}
/* Carrier loss detection, dial on demand. The functions netif_carrier_on
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 10279c4830ac..ada1296c87d5 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -196,6 +196,9 @@ struct ip_set_region {
u32 elements; /* Number of elements vs timeout */
};
+/* Max range where every element is added/deleted in one step */
+#define IPSET_MAX_RANGE (1<<20)
+
/* The max revision number supported by any set type + 1 */
#define IPSET_REVISION_MAX 9
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 28d7027cd460..5897f3dbaf7c 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -238,9 +238,6 @@ struct xt_table {
u_int8_t af; /* address/protocol family */
int priority; /* hook order */
- /* called when table is needed in the given netns */
- int (*table_init)(struct net *net);
-
/* A unique name... */
const char name[XT_TABLE_MAXNAMELEN];
};
@@ -452,6 +449,9 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);
+int xt_register_template(const struct xt_table *t, int(*table_init)(struct net *net));
+void xt_unregister_template(const struct xt_table *t);
+
#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
#include <net/compat.h>
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index a8178253ce53..10a01978bc0d 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -127,4 +127,6 @@ static inline bool ebt_invalid_target(int target)
return (target < -NUM_STANDARD_TARGETS || target >= 0);
}
+int ebt_register_template(const struct ebt_table *t, int(*table_init)(struct net *net));
+void ebt_unregister_template(const struct ebt_table *t);
#endif
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 2fb373a5c1ed..87069b8459af 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -168,8 +168,6 @@ extern int raw_notifier_call_chain(struct raw_notifier_head *nh,
extern int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
unsigned long val, void *v);
-extern int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh,
- unsigned long val_up, unsigned long val_down, void *v);
extern int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh,
unsigned long val_up, unsigned long val_down, void *v);
extern int raw_notifier_call_chain_robust(struct raw_notifier_head *nh,
diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h
index 3d8db1f6a5db..0f4a8903922a 100644
--- a/include/linux/oid_registry.h
+++ b/include/linux/oid_registry.h
@@ -70,6 +70,9 @@ enum OID {
OID_spnego, /* 1.3.6.1.5.5.2 */
+ OID_IAKerb, /* 1.3.6.1.5.2.5 */
+ OID_PKU2U, /* 1.3.5.1.5.2.7 */
+ OID_Scram, /* 1.3.6.1.5.5.14 */
OID_certAuthInfoAccess, /* 1.3.6.1.5.5.7.1.1 */
OID_sha1, /* 1.3.14.3.2.26 */
OID_id_ansip384r1, /* 1.3.132.0.34 */
@@ -104,6 +107,10 @@ enum OID {
OID_authorityKeyIdentifier, /* 2.5.29.35 */
OID_extKeyUsage, /* 2.5.29.37 */
+ /* Heimdal mechanisms */
+ OID_NetlogonMechanism, /* 1.2.752.43.14.2 */
+ OID_appleLocalKdcSupported, /* 1.2.752.43.14.3 */
+
/* EC-RDSA */
OID_gostCPSignA, /* 1.2.643.2.2.35.1 */
OID_gostCPSignB, /* 1.2.643.2.2.35.2 */
diff --git a/include/linux/once.h b/include/linux/once.h
index 9225ee6d96c7..ae6f4eb41cbe 100644
--- a/include/linux/once.h
+++ b/include/linux/once.h
@@ -7,7 +7,7 @@
bool __do_once_start(bool *done, unsigned long *flags);
void __do_once_done(bool *done, struct static_key_true *once_key,
- unsigned long *flags);
+ unsigned long *flags, struct module *mod);
/* Call a function exactly once. The idea of DO_ONCE() is to perform
* a function call such as initialization of random seeds, etc, only
@@ -46,7 +46,7 @@ void __do_once_done(bool *done, struct static_key_true *once_key,
if (unlikely(___ret)) { \
func(__VA_ARGS__); \
__do_once_done(&___done, &___once_key, \
- &___flags); \
+ &___flags, THIS_MODULE); \
} \
} \
___ret; \
diff --git a/include/linux/padata.h b/include/linux/padata.h
index a433f13fc4bf..495b16b6b4d7 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -12,6 +12,7 @@
#ifndef PADATA_H
#define PADATA_H
+#include <linux/refcount.h>
#include <linux/compiler_types.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
@@ -96,7 +97,7 @@ struct parallel_data {
struct padata_shell *ps;
struct padata_list __percpu *reorder_list;
struct padata_serial_queue __percpu *squeue;
- atomic_t refcnt;
+ refcount_t refcnt;
unsigned int seq_nr;
unsigned int processed;
int cpu;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 540b377ca8f6..947430637cac 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1620,6 +1620,16 @@ static inline bool pci_aer_available(void) { return false; }
bool pci_ats_disabled(void);
+#ifdef CONFIG_PCIE_PTM
+int pci_enable_ptm(struct pci_dev *dev, u8 *granularity);
+bool pcie_ptm_enabled(struct pci_dev *dev);
+#else
+static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
+{ return -EINVAL; }
+static inline bool pcie_ptm_enabled(struct pci_dev *dev)
+{ return false; }
+#endif
+
void pci_cfg_access_lock(struct pci_dev *dev);
bool pci_cfg_access_trylock(struct pci_dev *dev);
void pci_cfg_access_unlock(struct pci_dev *dev);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 4bac1831de80..06eccef155ad 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -555,6 +555,7 @@
#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F3 0x144b
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443
#define PCI_DEVICE_ID_AMD_19H_DF_F3 0x1653
+#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F3 0x167c
#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F3 0x166d
#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
#define PCI_DEVICE_ID_AMD_LANCE 0x2000
@@ -1121,6 +1122,7 @@
#define PCI_DEVICE_ID_3COM_3CR990SVR 0x990a
#define PCI_VENDOR_ID_AL 0x10b9
+#define PCI_DEVICE_ID_AL_M1489 0x1489
#define PCI_DEVICE_ID_AL_M1533 0x1533
#define PCI_DEVICE_ID_AL_M1535 0x1535
#define PCI_DEVICE_ID_AL_M1541 0x1541
@@ -2643,6 +2645,7 @@
#define PCI_DEVICE_ID_INTEL_82375 0x0482
#define PCI_DEVICE_ID_INTEL_82424 0x0483
#define PCI_DEVICE_ID_INTEL_82378 0x0484
+#define PCI_DEVICE_ID_INTEL_82425 0x0486
#define PCI_DEVICE_ID_INTEL_MRST_SD0 0x0807
#define PCI_DEVICE_ID_INTEL_MRST_SD1 0x0808
#define PCI_DEVICE_ID_INTEL_MFD_SD 0x0820
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2d510ad750ed..fe156a8170aa 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -762,6 +762,7 @@ struct perf_event {
#ifdef CONFIG_BPF_SYSCALL
perf_overflow_handler_t orig_overflow_handler;
struct bpf_prog *prog;
+ u64 bpf_cookie;
#endif
#ifdef CONFIG_EVENT_TRACING
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 3b80dc3ed68b..736e1d1a47c4 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1431,6 +1431,7 @@ static inline int phy_device_register(struct phy_device *phy)
static inline void phy_device_free(struct phy_device *phydev) { }
#endif /* CONFIG_PHYLIB */
void phy_device_remove(struct phy_device *phydev);
+int phy_get_c45_ids(struct phy_device *phydev);
int phy_init_hw(struct phy_device *phydev);
int phy_suspend(struct phy_device *phydev);
int phy_resume(struct phy_device *phydev);
diff --git a/include/linux/pid.h b/include/linux/pid.h
index fa10acb8d6a4..af308e15f174 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -78,6 +78,7 @@ struct file;
extern struct pid *pidfd_pid(const struct file *file);
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
+int pidfd_create(struct pid *pid, unsigned int flags);
static inline struct pid *get_pid(struct pid *pid)
{
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 5d2705f1d01c..fc5642431b92 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -48,6 +48,7 @@ struct pipe_buffer {
* @files: number of struct file referring this pipe (protected by ->i_lock)
* @r_counter: reader counter
* @w_counter: writer counter
+ * @poll_usage: is this pipe used for epoll, which has crazy wakeups?
* @fasync_readers: reader side fasync
* @fasync_writers: writer side fasync
* @bufs: the circular array of pipe buffers
@@ -70,6 +71,7 @@ struct pipe_inode_info {
unsigned int files;
unsigned int r_counter;
unsigned int w_counter;
+ unsigned int poll_usage;
struct page *tmp_page;
struct fasync_struct *fasync_readers;
struct fasync_struct *fasync_writers;
diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h
index 45f53afc46e2..271bd87bff0a 100644
--- a/include/linux/platform_data/cros_ec_commands.h
+++ b/include/linux/platform_data/cros_ec_commands.h
@@ -4228,6 +4228,7 @@ enum ec_device_event {
EC_DEVICE_EVENT_TRACKPAD,
EC_DEVICE_EVENT_DSP,
EC_DEVICE_EVENT_WIFI,
+ EC_DEVICE_EVENT_WLC,
};
enum ec_device_event_param {
@@ -5460,6 +5461,72 @@ struct ec_response_rollback_info {
/* Issue AP reset */
#define EC_CMD_AP_RESET 0x0125
+/**
+ * Get the number of peripheral charge ports
+ */
+#define EC_CMD_PCHG_COUNT 0x0134
+
+#define EC_PCHG_MAX_PORTS 8
+
+struct ec_response_pchg_count {
+ uint8_t port_count;
+} __ec_align1;
+
+/**
+ * Get the status of a peripheral charge port
+ */
+#define EC_CMD_PCHG 0x0135
+
+struct ec_params_pchg {
+ uint8_t port;
+} __ec_align1;
+
+struct ec_response_pchg {
+ uint32_t error; /* enum pchg_error */
+ uint8_t state; /* enum pchg_state state */
+ uint8_t battery_percentage;
+ uint8_t unused0;
+ uint8_t unused1;
+ /* Fields added in version 1 */
+ uint32_t fw_version;
+ uint32_t dropped_event_count;
+} __ec_align2;
+
+enum pchg_state {
+ /* Charger is reset and not initialized. */
+ PCHG_STATE_RESET = 0,
+ /* Charger is initialized or disabled. */
+ PCHG_STATE_INITIALIZED,
+ /* Charger is enabled and ready to detect a device. */
+ PCHG_STATE_ENABLED,
+ /* Device is in proximity. */
+ PCHG_STATE_DETECTED,
+ /* Device is being charged. */
+ PCHG_STATE_CHARGING,
+ /* Device is fully charged. It implies DETECTED (& not charging). */
+ PCHG_STATE_FULL,
+ /* In download (a.k.a. firmware update) mode */
+ PCHG_STATE_DOWNLOAD,
+ /* In download mode. Ready for receiving data. */
+ PCHG_STATE_DOWNLOADING,
+ /* Device is ready for data communication. */
+ PCHG_STATE_CONNECTED,
+ /* Put no more entry below */
+ PCHG_STATE_COUNT,
+};
+
+#define EC_PCHG_STATE_TEXT { \
+ [PCHG_STATE_RESET] = "RESET", \
+ [PCHG_STATE_INITIALIZED] = "INITIALIZED", \
+ [PCHG_STATE_ENABLED] = "ENABLED", \
+ [PCHG_STATE_DETECTED] = "DETECTED", \
+ [PCHG_STATE_CHARGING] = "CHARGING", \
+ [PCHG_STATE_FULL] = "FULL", \
+ [PCHG_STATE_DOWNLOAD] = "DOWNLOAD", \
+ [PCHG_STATE_DOWNLOADING] = "DOWNLOADING", \
+ [PCHG_STATE_CONNECTED] = "CONNECTED", \
+ }
+
/*****************************************************************************/
/* Voltage regulator controls */
diff --git a/include/linux/platform_data/mmc-esdhc-imx.h b/include/linux/platform_data/mmc-esdhc-imx.h
deleted file mode 100644
index cba1184b364c..000000000000
--- a/include/linux/platform_data/mmc-esdhc-imx.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2010 Wolfram Sang <kernel@pengutronix.de>
- */
-
-#ifndef __ASM_ARCH_IMX_ESDHC_H
-#define __ASM_ARCH_IMX_ESDHC_H
-
-#include <linux/types.h>
-
-enum wp_types {
- ESDHC_WP_NONE, /* no WP, neither controller nor gpio */
- ESDHC_WP_CONTROLLER, /* mmc controller internal WP */
- ESDHC_WP_GPIO, /* external gpio pin for WP */
-};
-
-enum cd_types {
- ESDHC_CD_NONE, /* no CD, neither controller nor gpio */
- ESDHC_CD_CONTROLLER, /* mmc controller internal CD */
- ESDHC_CD_GPIO, /* external gpio pin for CD */
- ESDHC_CD_PERMANENT, /* no CD, card permanently wired to host */
-};
-
-/**
- * struct esdhc_platform_data - platform data for esdhc on i.MX
- *
- * ESDHC_WP(CD)_CONTROLLER type is not available on i.MX25/35.
- *
- * @wp_type: type of write_protect method (see wp_types enum above)
- * @cd_type: type of card_detect method (see cd_types enum above)
- */
-
-struct esdhc_platform_data {
- enum wp_types wp_type;
- enum cd_types cd_type;
- int max_bus_width;
- unsigned int delay_line;
- unsigned int tuning_step; /* The delay cell steps in tuning procedure */
- unsigned int tuning_start_tap; /* The start delay cell point in tuning procedure */
- unsigned int strobe_dll_delay_target; /* The delay cell for strobe pad (read clock) */
-};
-#endif /* __ASM_ARCH_IMX_ESDHC_H */
diff --git a/include/linux/platform_data/spi-mt65xx.h b/include/linux/platform_data/spi-mt65xx.h
index 65fd5ffd257c..f0db674f07b8 100644
--- a/include/linux/platform_data/spi-mt65xx.h
+++ b/include/linux/platform_data/spi-mt65xx.h
@@ -12,5 +12,6 @@
/* Board specific platform_data */
struct mtk_chip_config {
u32 sample_sel;
+ u32 tick_delay;
};
#endif
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 21a0577305ef..67017c9390c8 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -198,6 +198,7 @@ struct generic_pm_domain_data {
struct notifier_block *power_nb;
int cpu;
unsigned int performance_state;
+ unsigned int default_pstate;
unsigned int rpm_pstate;
ktime_t next_wakeup;
void *data;
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 896c16d2c5fb..00fef0064355 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -82,12 +82,19 @@ static inline bool cpu_timer_enqueue(struct timerqueue_head *head,
return timerqueue_add(head, &ctmr->node);
}
-static inline void cpu_timer_dequeue(struct cpu_timer *ctmr)
+static inline bool cpu_timer_queued(struct cpu_timer *ctmr)
{
- if (ctmr->head) {
+ return !!ctmr->head;
+}
+
+static inline bool cpu_timer_dequeue(struct cpu_timer *ctmr)
+{
+ if (cpu_timer_queued(ctmr)) {
timerqueue_del(ctmr->head, &ctmr->node);
ctmr->head = NULL;
+ return true;
}
+ return false;
}
static inline u64 cpu_timer_getexpires(struct cpu_timer *ctmr)
diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h
index d55c746ac56e..dd24756a8af7 100644
--- a/include/linux/power/max17042_battery.h
+++ b/include/linux/power/max17042_battery.h
@@ -69,7 +69,7 @@ enum max17042_register {
MAX17042_RelaxCFG = 0x2A,
MAX17042_MiscCFG = 0x2B,
MAX17042_TGAIN = 0x2C,
- MAx17042_TOFF = 0x2D,
+ MAX17042_TOFF = 0x2D,
MAX17042_CGAIN = 0x2E,
MAX17042_COFF = 0x2F,
@@ -110,13 +110,14 @@ enum max17042_register {
MAX17042_VFSOC = 0xFF,
};
+/* Registers specific to max17055 only */
enum max17055_register {
MAX17055_QRes = 0x0C,
+ MAX17055_RCell = 0x14,
MAX17055_TTF = 0x20,
- MAX17055_V_empty = 0x3A,
- MAX17055_TIMER = 0x3E,
+ MAX17055_DieTemp = 0x34,
MAX17055_USER_MEM = 0x40,
- MAX17055_RGAIN = 0x42,
+ MAX17055_RGAIN = 0x43,
MAX17055_ConvgCfg = 0x49,
MAX17055_VFRemCap = 0x4A,
@@ -155,13 +156,14 @@ enum max17055_register {
MAX17055_AtAvCap = 0xDF,
};
-/* Registers specific to max17047/50 */
+/* Registers specific to max17047/50/55 */
enum max17047_register {
MAX17047_QRTbl00 = 0x12,
MAX17047_FullSOCThr = 0x13,
MAX17047_QRTbl10 = 0x22,
MAX17047_QRTbl20 = 0x32,
MAX17047_V_empty = 0x3A,
+ MAX17047_TIMER = 0x3E,
MAX17047_QRTbl30 = 0x42,
};
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index be203985ecdd..9ca1f120a211 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -352,6 +352,7 @@ struct power_supply_resistance_temp_table {
*/
struct power_supply_battery_info {
+ unsigned int technology; /* from the enum above */
int energy_full_design_uwh; /* microWatt-hours */
int charge_full_design_uah; /* microAmp-hours */
int voltage_min_design_uv; /* microVolts */
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 9881eac0698f..4d244e295e85 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -121,7 +121,11 @@
/*
* The preempt_count offset after spin_lock()
*/
+#if !defined(CONFIG_PREEMPT_RT)
#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
+#else
+#define PREEMPT_LOCK_OFFSET 0
+#endif
/*
* The preempt_count offset needed for things like:
diff --git a/include/linux/property.h b/include/linux/property.h
index 073e680c35e2..357513a977e5 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -484,8 +484,6 @@ void software_node_unregister_node_group(const struct software_node **node_group
int software_node_register(const struct software_node *node);
void software_node_unregister(const struct software_node *node);
-int software_node_notify(struct device *dev, unsigned long action);
-
struct fwnode_handle *
fwnode_create_software_node(const struct property_entry *properties,
const struct fwnode_handle *parent);
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 71fac9237725..2e5565067355 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -215,7 +215,7 @@ static inline long scaled_ppm_to_ppb(long ppm)
return (long)ppb;
}
-#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
/**
* ptp_clock_register() - register a PTP hardware clock driver
@@ -307,6 +307,33 @@ int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay);
*/
void ptp_cancel_worker_sync(struct ptp_clock *ptp);
+#else
+static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
+ struct device *parent)
+{ return NULL; }
+static inline int ptp_clock_unregister(struct ptp_clock *ptp)
+{ return 0; }
+static inline void ptp_clock_event(struct ptp_clock *ptp,
+ struct ptp_clock_event *event)
+{ }
+static inline int ptp_clock_index(struct ptp_clock *ptp)
+{ return -1; }
+static inline int ptp_find_pin(struct ptp_clock *ptp,
+ enum ptp_pin_function func, unsigned int chan)
+{ return -1; }
+static inline int ptp_schedule_worker(struct ptp_clock *ptp,
+ unsigned long delay)
+{ return -EOPNOTSUPP; }
+static inline void ptp_cancel_worker_sync(struct ptp_clock *ptp)
+{ }
+#endif
+
+#if IS_BUILTIN(CONFIG_PTP_1588_CLOCK)
+/*
+ * These are called by the network core, and don't work if PTP is in
+ * a loadable module.
+ */
+
/**
* ptp_get_vclocks_index() - get all vclocks index on pclock, and
* caller is responsible to free memory
@@ -327,26 +354,7 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index);
*/
void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps,
int vclock_index);
-
#else
-static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
- struct device *parent)
-{ return NULL; }
-static inline int ptp_clock_unregister(struct ptp_clock *ptp)
-{ return 0; }
-static inline void ptp_clock_event(struct ptp_clock *ptp,
- struct ptp_clock_event *event)
-{ }
-static inline int ptp_clock_index(struct ptp_clock *ptp)
-{ return -1; }
-static inline int ptp_find_pin(struct ptp_clock *ptp,
- enum ptp_pin_function func, unsigned int chan)
-{ return -1; }
-static inline int ptp_schedule_worker(struct ptp_clock *ptp,
- unsigned long delay)
-{ return -EOPNOTSUPP; }
-static inline void ptp_cancel_worker_sync(struct ptp_clock *ptp)
-{ }
static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index)
{ return 0; }
static inline void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps,
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index d31ecaf4fdd3..235047d7a1b5 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -17,24 +17,14 @@
#ifndef _LINUX_RBTREE_H
#define _LINUX_RBTREE_H
+#include <linux/rbtree_types.h>
+
#include <linux/kernel.h>
#include <linux/stddef.h>
#include <linux/rcupdate.h>
-struct rb_node {
- unsigned long __rb_parent_color;
- struct rb_node *rb_right;
- struct rb_node *rb_left;
-} __attribute__((aligned(sizeof(long))));
- /* The alignment might seem pointless, but allegedly CRIS needs it */
-
-struct rb_root {
- struct rb_node *rb_node;
-};
-
#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3))
-#define RB_ROOT (struct rb_root) { NULL, }
#define rb_entry(ptr, type, member) container_of(ptr, type, member)
#define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL)
@@ -112,23 +102,6 @@ static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent
typeof(*pos), field); 1; }); \
pos = n)
-/*
- * Leftmost-cached rbtrees.
- *
- * We do not cache the rightmost node based on footprint
- * size vs number of potential users that could benefit
- * from O(1) rb_last(). Just not worth it, users that want
- * this feature can always implement the logic explicitly.
- * Furthermore, users that want to cache both pointers may
- * find it a bit asymmetric, but that's ok.
- */
-struct rb_root_cached {
- struct rb_root rb_root;
- struct rb_node *rb_leftmost;
-};
-
-#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }
-
/* Same as rb_first(), but O(1) */
#define rb_first_cached(root) (root)->rb_leftmost
diff --git a/include/linux/rbtree_types.h b/include/linux/rbtree_types.h
new file mode 100644
index 000000000000..45b6ecde3665
--- /dev/null
+++ b/include/linux/rbtree_types.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _LINUX_RBTREE_TYPES_H
+#define _LINUX_RBTREE_TYPES_H
+
+struct rb_node {
+ unsigned long __rb_parent_color;
+ struct rb_node *rb_right;
+ struct rb_node *rb_left;
+} __attribute__((aligned(sizeof(long))));
+/* The alignment might seem pointless, but allegedly CRIS needs it */
+
+struct rb_root {
+ struct rb_node *rb_node;
+};
+
+/*
+ * Leftmost-cached rbtrees.
+ *
+ * We do not cache the rightmost node based on footprint
+ * size vs number of potential users that could benefit
+ * from O(1) rb_last(). Just not worth it, users that want
+ * this feature can always implement the logic explicitly.
+ * Furthermore, users that want to cache both pointers may
+ * find it a bit asymmetric, but that's ok.
+ */
+struct rb_root_cached {
+ struct rb_root rb_root;
+ struct rb_node *rb_leftmost;
+};
+
+#define RB_ROOT (struct rb_root) { NULL, }
+#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }
+
+#endif
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index f8633d37e358..d29740be4833 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -11,15 +11,6 @@
#include <linux/rcupdate.h>
/*
- * Why is there no list_empty_rcu()? Because list_empty() serves this
- * purpose. The list_empty() function fetches the RCU-protected pointer
- * and compares it to the address of the list head, but neither dereferences
- * this pointer itself nor provides this pointer to the caller. Therefore,
- * it is not necessary to use rcu_dereference(), so that list_empty() can
- * be used anywhere you would want to use a list_empty_rcu().
- */
-
-/*
* INIT_LIST_HEAD_RCU - Initialize a list_head visible to RCU readers
* @list: list to be initialized
*
@@ -318,21 +309,29 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
/*
* Where are list_empty_rcu() and list_first_entry_rcu()?
*
- * Implementing those functions following their counterparts list_empty() and
- * list_first_entry() is not advisable because they lead to subtle race
- * conditions as the following snippet shows:
+ * They do not exist because they would lead to subtle race conditions:
*
* if (!list_empty_rcu(mylist)) {
* struct foo *bar = list_first_entry_rcu(mylist, struct foo, list_member);
* do_something(bar);
* }
*
- * The list may not be empty when list_empty_rcu checks it, but it may be when
- * list_first_entry_rcu rereads the ->next pointer.
- *
- * Rereading the ->next pointer is not a problem for list_empty() and
- * list_first_entry() because they would be protected by a lock that blocks
- * writers.
+ * The list might be non-empty when list_empty_rcu() checks it, but it
+ * might have become empty by the time that list_first_entry_rcu() rereads
+ * the ->next pointer, which would result in a SEGV.
+ *
+ * When not using RCU, it is OK for list_first_entry() to re-read that
+ * pointer because both functions should be protected by some lock that
+ * blocks writers.
+ *
+ * When using RCU, list_empty() uses READ_ONCE() to fetch the
+ * RCU-protected ->next pointer and then compares it to the address of the
+ * list head. However, it neither dereferences this pointer nor provides
+ * this pointer to its caller. Thus, READ_ONCE() suffices (that is,
+ * rcu_dereference() is not needed), which means that list_empty() can be
+ * used anywhere you would want to use list_empty_rcu(). Just don't
+ * expect anything useful to happen if you do a subsequent lockless
+ * call to list_first_entry_rcu()!!!
*
* See list_first_or_null_rcu for an alternative.
*/
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d9680b798b21..434d12fe2d4f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -53,7 +53,7 @@ void __rcu_read_unlock(void);
* nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other
* types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
*/
-#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
+#define rcu_preempt_depth() READ_ONCE(current->rcu_read_lock_nesting)
#else /* #ifdef CONFIG_PREEMPT_RCU */
@@ -167,7 +167,7 @@ void synchronize_rcu_tasks(void);
# define synchronize_rcu_tasks synchronize_rcu
# endif
-# ifdef CONFIG_TASKS_RCU_TRACE
+# ifdef CONFIG_TASKS_TRACE_RCU
# define rcu_tasks_trace_qs(t) \
do { \
if (!likely(READ_ONCE((t)->trc_reader_checked)) && \
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 953e70fafe38..9be015305f9f 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -14,9 +14,6 @@
#include <asm/param.h> /* for HZ */
-/* Never flag non-existent other CPUs! */
-static inline bool rcu_eqs_special_set(int cpu) { return false; }
-
unsigned long get_state_synchronize_rcu(void);
unsigned long start_poll_synchronize_rcu(void);
bool poll_state_synchronize_rcu(unsigned long oldstate);
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index f5f08dd0a116..e3c9a25a853a 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -344,6 +344,7 @@ typedef void (*regmap_unlock)(void *);
* @ranges: Array of configuration entries for virtual address ranges.
* @num_ranges: Number of range configuration entries.
* @use_hwlock: Indicate if a hardware spinlock should be used.
+ * @use_raw_spinlock: Indicate if a raw spinlock should be used.
* @hwlock_id: Specify the hardware spinlock id.
* @hwlock_mode: The hardware spinlock mode, should be HWLOCK_IRQSTATE,
* HWLOCK_IRQ or 0.
@@ -403,6 +404,7 @@ struct regmap_config {
unsigned int num_ranges;
bool use_hwlock;
+ bool use_raw_spinlock;
unsigned int hwlock_id;
unsigned int hwlock_mode;
@@ -1269,12 +1271,13 @@ void devm_regmap_field_free(struct device *dev, struct regmap_field *field);
int regmap_field_bulk_alloc(struct regmap *regmap,
struct regmap_field **rm_field,
- struct reg_field *reg_field,
+ const struct reg_field *reg_field,
int num_fields);
void regmap_field_bulk_free(struct regmap_field *field);
int devm_regmap_field_bulk_alloc(struct device *dev, struct regmap *regmap,
struct regmap_field **field,
- struct reg_field *reg_field, int num_fields);
+ const struct reg_field *reg_field,
+ int num_fields);
void devm_regmap_field_bulk_free(struct device *dev,
struct regmap_field *field);
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index f72ca73631be..bbf6590a6dec 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -222,17 +222,12 @@ void regulator_bulk_unregister_supply_alias(struct device *dev,
int devm_regulator_register_supply_alias(struct device *dev, const char *id,
struct device *alias_dev,
const char *alias_id);
-void devm_regulator_unregister_supply_alias(struct device *dev,
- const char *id);
int devm_regulator_bulk_register_supply_alias(struct device *dev,
const char *const *id,
struct device *alias_dev,
const char *const *alias_id,
int num_id);
-void devm_regulator_bulk_unregister_supply_alias(struct device *dev,
- const char *const *id,
- int num_id);
/* regulator output control and status */
int __must_check regulator_enable(struct regulator *regulator);
@@ -408,11 +403,6 @@ static inline int devm_regulator_register_supply_alias(struct device *dev,
return 0;
}
-static inline void devm_regulator_unregister_supply_alias(struct device *dev,
- const char *id)
-{
-}
-
static inline int devm_regulator_bulk_register_supply_alias(struct device *dev,
const char *const *id,
struct device *alias_dev,
@@ -422,11 +412,6 @@ static inline int devm_regulator_bulk_register_supply_alias(struct device *dev,
return 0;
}
-static inline void devm_regulator_bulk_unregister_supply_alias(
- struct device *dev, const char *const *id, int num_id)
-{
-}
-
static inline int regulator_enable(struct regulator *regulator)
{
return 0;
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 4aec20387857..bd7a73db2e66 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -337,6 +337,12 @@ enum regulator_type {
* @pull_down_val_on: Enabling value for control when using regmap
* set_pull_down
*
+ * @ramp_reg: Register for controlling the regulator ramp-rate.
+ * @ramp_mask: Bitmask for the ramp-rate control register.
+ * @ramp_delay_table: Table for mapping the regulator ramp-rate values. Values
+ * should be given in units of V/S (uV/uS). See the
+ * regulator_set_ramp_delay_regmap().
+ *
* @enable_time: Time taken for initial enable of regulator (in uS).
* @off_on_delay: guard time (in uS), before re-enabling a regulator
*
@@ -462,7 +468,7 @@ struct regulator_err_state {
};
/**
- * struct regulator_irq_data - regulator error/notification status date
+ * struct regulator_irq_data - regulator error/notification status data
*
* @states: Status structs for each of the associated regulators.
* @num_states: Amount of associated regulators.
@@ -521,8 +527,8 @@ struct regulator_irq_data {
* active events as core does not clean the map data.
* REGULATOR_FAILED_RETRY can be returned to indicate that the
* status reading from IC failed. If this is repeated for
- * fatal_cnt times the core will call die() callback or BUG()
- * as a last resort to protect the HW.
+ * fatal_cnt times the core will call die() callback or power-off
+ * the system as a last resort to protect the HW.
* @renable: Optional callback to check status (if HW supports that) before
* re-enabling IRQ. If implemented this should clear the error
* flags so that errors fetched by regulator_get_error_flags()
@@ -531,7 +537,8 @@ struct regulator_irq_data {
* REGULATOR_FAILED_RETRY can be returned to
* indicate that the status reading from IC failed. If this is
* repeated for 'fatal_cnt' times the core will call die()
- * callback or BUG() as a last resort to protect the HW.
+ * callback or if die() is not populated then attempt to power-off
+ * the system as a last resort to protect the HW.
* Returning zero indicates that the problem in HW has been solved
* and IRQ will be re-enabled. Returning REGULATOR_ERROR_ON
* indicates the error condition is still active and keeps IRQ
@@ -645,7 +652,6 @@ devm_regulator_register(struct device *dev,
const struct regulator_desc *regulator_desc,
const struct regulator_config *config);
void regulator_unregister(struct regulator_dev *rdev);
-void devm_regulator_unregister(struct device *dev, struct regulator_dev *rdev);
int regulator_notifier_call_chain(struct regulator_dev *rdev,
unsigned long event, void *data);
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 68b4a514a410..621b7f4a3639 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -112,7 +112,7 @@ struct notification_limit {
* @over_voltage_limits: Limits for acting on over voltage.
* @under_voltage_limits: Limits for acting on under voltage.
* @temp_limits: Limits for acting on over temperature.
-
+ *
* @max_spread: Max possible spread between coupled regulators
* @max_uV_step: Max possible step change in voltage
* @valid_modes_mask: Mask of modes which may be configured by consumers.
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 9b05af9b3e28..21deb5212bbd 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -2,6 +2,8 @@
#ifndef _RESCTRL_H
#define _RESCTRL_H
+#include <linux/kernel.h>
+#include <linux/list.h>
#include <linux/pid.h>
#ifdef CONFIG_PROC_CPU_RESCTRL
@@ -13,4 +15,186 @@ int proc_resctrl_show(struct seq_file *m,
#endif
+/**
+ * enum resctrl_conf_type - The type of configuration.
+ * @CDP_NONE: No prioritisation, both code and data are controlled or monitored.
+ * @CDP_CODE: Configuration applies to instruction fetches.
+ * @CDP_DATA: Configuration applies to reads and writes.
+ */
+enum resctrl_conf_type {
+ CDP_NONE,
+ CDP_CODE,
+ CDP_DATA,
+};
+
+#define CDP_NUM_TYPES (CDP_DATA + 1)
+
+/**
+ * struct resctrl_staged_config - parsed configuration to be applied
+ * @new_ctrl: new ctrl value to be loaded
+ * @have_new_ctrl: whether the user provided new_ctrl is valid
+ */
+struct resctrl_staged_config {
+ u32 new_ctrl;
+ bool have_new_ctrl;
+};
+
+/**
+ * struct rdt_domain - group of CPUs sharing a resctrl resource
+ * @list: all instances of this resource
+ * @id: unique id for this instance
+ * @cpu_mask: which CPUs share this resource
+ * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold
+ * @mbm_total: saved state for MBM total bandwidth
+ * @mbm_local: saved state for MBM local bandwidth
+ * @mbm_over: worker to periodically read MBM h/w counters
+ * @cqm_limbo: worker to periodically read CQM h/w counters
+ * @mbm_work_cpu: worker CPU for MBM h/w counters
+ * @cqm_work_cpu: worker CPU for CQM h/w counters
+ * @plr: pseudo-locked region (if any) associated with domain
+ * @staged_config: parsed configuration to be applied
+ */
+struct rdt_domain {
+ struct list_head list;
+ int id;
+ struct cpumask cpu_mask;
+ unsigned long *rmid_busy_llc;
+ struct mbm_state *mbm_total;
+ struct mbm_state *mbm_local;
+ struct delayed_work mbm_over;
+ struct delayed_work cqm_limbo;
+ int mbm_work_cpu;
+ int cqm_work_cpu;
+ struct pseudo_lock_region *plr;
+ struct resctrl_staged_config staged_config[CDP_NUM_TYPES];
+};
+
+/**
+ * struct resctrl_cache - Cache allocation related data
+ * @cbm_len: Length of the cache bit mask
+ * @min_cbm_bits: Minimum number of consecutive bits to be set
+ * @shareable_bits: Bitmask of shareable resource with other
+ * executing entities
+ * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid.
+ * @arch_has_empty_bitmaps: True if the '0' bitmap is valid.
+ * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache
+ * level has CPU scope.
+ */
+struct resctrl_cache {
+ unsigned int cbm_len;
+ unsigned int min_cbm_bits;
+ unsigned int shareable_bits;
+ bool arch_has_sparse_bitmaps;
+ bool arch_has_empty_bitmaps;
+ bool arch_has_per_cpu_cfg;
+};
+
+/**
+ * enum membw_throttle_mode - System's memory bandwidth throttling mode
+ * @THREAD_THROTTLE_UNDEFINED: Not relevant to the system
+ * @THREAD_THROTTLE_MAX: Memory bandwidth is throttled at the core
+ * always using smallest bandwidth percentage
+ * assigned to threads, aka "max throttling"
+ * @THREAD_THROTTLE_PER_THREAD: Memory bandwidth is throttled at the thread
+ */
+enum membw_throttle_mode {
+ THREAD_THROTTLE_UNDEFINED = 0,
+ THREAD_THROTTLE_MAX,
+ THREAD_THROTTLE_PER_THREAD,
+};
+
+/**
+ * struct resctrl_membw - Memory bandwidth allocation related data
+ * @min_bw: Minimum memory bandwidth percentage user can request
+ * @bw_gran: Granularity at which the memory bandwidth is allocated
+ * @delay_linear: True if memory B/W delay is in linear scale
+ * @arch_needs_linear: True if we can't configure non-linear resources
+ * @throttle_mode: Bandwidth throttling mode when threads request
+ * different memory bandwidths
+ * @mba_sc: True if MBA software controller(mba_sc) is enabled
+ * @mb_map: Mapping of memory B/W percentage to memory B/W delay
+ */
+struct resctrl_membw {
+ u32 min_bw;
+ u32 bw_gran;
+ u32 delay_linear;
+ bool arch_needs_linear;
+ enum membw_throttle_mode throttle_mode;
+ bool mba_sc;
+ u32 *mb_map;
+};
+
+struct rdt_parse_data;
+struct resctrl_schema;
+
+/**
+ * struct rdt_resource - attributes of a resctrl resource
+ * @rid: The index of the resource
+ * @alloc_enabled: Is allocation enabled on this machine
+ * @mon_enabled: Is monitoring enabled for this feature
+ * @alloc_capable: Is allocation available on this machine
+ * @mon_capable: Is monitor feature available on this machine
+ * @num_rmid: Number of RMIDs available
+ * @cache_level: Which cache level defines scope of this resource
+ * @cache: Cache allocation related data
+ * @membw: If the component has bandwidth controls, their properties.
+ * @domains: All domains for this resource
+ * @name: Name to use in "schemata" file.
+ * @data_width: Character width of data when displaying
+ * @default_ctrl: Specifies default cache cbm or memory B/W percent.
+ * @format_str: Per resource format string to show domain value
+ * @parse_ctrlval: Per resource function pointer to parse control values
+ * @evt_list: List of monitoring events
+ * @fflags: flags to choose base and info files
+ * @cdp_capable: Is the CDP feature available on this resource
+ */
+struct rdt_resource {
+ int rid;
+ bool alloc_enabled;
+ bool mon_enabled;
+ bool alloc_capable;
+ bool mon_capable;
+ int num_rmid;
+ int cache_level;
+ struct resctrl_cache cache;
+ struct resctrl_membw membw;
+ struct list_head domains;
+ char *name;
+ int data_width;
+ u32 default_ctrl;
+ const char *format_str;
+ int (*parse_ctrlval)(struct rdt_parse_data *data,
+ struct resctrl_schema *s,
+ struct rdt_domain *d);
+ struct list_head evt_list;
+ unsigned long fflags;
+ bool cdp_capable;
+};
+
+/**
+ * struct resctrl_schema - configuration abilities of a resource presented to
+ * user-space
+ * @list: Member of resctrl_schema_all.
+ * @name: The name to use in the "schemata" file.
+ * @conf_type: Whether this schema is specific to code/data.
+ * @res: The resource structure exported by the architecture to describe
+ * the hardware that is configured by this schema.
+ * @num_closid: The number of closid that can be used with this schema. When
+ * features like CDP are enabled, this will be lower than the
+ * hardware supports for the resource.
+ */
+struct resctrl_schema {
+ struct list_head list;
+ char name[8];
+ enum resctrl_conf_type conf_type;
+ struct rdt_resource *res;
+ u32 num_closid;
+};
+
+/* The number of closid supported by this resource regardless of CDP */
+u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
+int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
+u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
+ u32 closid, enum resctrl_conf_type type);
+
#endif /* _RESCTRL_H */
diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index d1672de9ca89..9deedfeec2b1 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -13,12 +13,39 @@
#ifndef __LINUX_RT_MUTEX_H
#define __LINUX_RT_MUTEX_H
+#include <linux/compiler.h>
#include <linux/linkage.h>
-#include <linux/rbtree.h>
-#include <linux/spinlock_types.h>
+#include <linux/rbtree_types.h>
+#include <linux/spinlock_types_raw.h>
extern int max_lock_depth; /* for sysctl */
+struct rt_mutex_base {
+ raw_spinlock_t wait_lock;
+ struct rb_root_cached waiters;
+ struct task_struct *owner;
+};
+
+#define __RT_MUTEX_BASE_INITIALIZER(rtbasename) \
+{ \
+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(rtbasename.wait_lock), \
+ .waiters = RB_ROOT_CACHED, \
+ .owner = NULL \
+}
+
+/**
+ * rt_mutex_base_is_locked - is the rtmutex locked
+ * @lock: the mutex to be queried
+ *
+ * Returns true if the mutex is locked, false if unlocked.
+ */
+static inline bool rt_mutex_base_is_locked(struct rt_mutex_base *lock)
+{
+ return READ_ONCE(lock->owner) != NULL;
+}
+
+extern void rt_mutex_base_init(struct rt_mutex_base *rtb);
+
/**
* The rt_mutex structure
*
@@ -28,9 +55,7 @@ extern int max_lock_depth; /* for sysctl */
* @owner: the mutex owner
*/
struct rt_mutex {
- raw_spinlock_t wait_lock;
- struct rb_root_cached waiters;
- struct task_struct *owner;
+ struct rt_mutex_base rtmutex;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
@@ -52,32 +77,24 @@ do { \
} while (0)
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-#define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \
- , .dep_map = { .name = #mutexname }
+#define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \
+ .dep_map = { \
+ .name = #mutexname, \
+ .wait_type_inner = LD_WAIT_SLEEP, \
+ }
#else
#define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)
#endif
-#define __RT_MUTEX_INITIALIZER(mutexname) \
- { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
- , .waiters = RB_ROOT_CACHED \
- , .owner = NULL \
- __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)}
+#define __RT_MUTEX_INITIALIZER(mutexname) \
+{ \
+ .rtmutex = __RT_MUTEX_BASE_INITIALIZER(mutexname.rtmutex), \
+ __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \
+}
#define DEFINE_RT_MUTEX(mutexname) \
struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
-/**
- * rt_mutex_is_locked - is the mutex locked
- * @lock: the mutex to be queried
- *
- * Returns 1 if the mutex is locked, 0 if unlocked.
- */
-static inline int rt_mutex_is_locked(struct rt_mutex *lock)
-{
- return lock->owner != NULL;
-}
-
extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/include/linux/rwbase_rt.h b/include/linux/rwbase_rt.h
new file mode 100644
index 000000000000..1d264dd08625
--- /dev/null
+++ b/include/linux/rwbase_rt.h
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef _LINUX_RWBASE_RT_H
+#define _LINUX_RWBASE_RT_H
+
+#include <linux/rtmutex.h>
+#include <linux/atomic.h>
+
+#define READER_BIAS (1U << 31)
+#define WRITER_BIAS (1U << 30)
+
+struct rwbase_rt {
+ atomic_t readers;
+ struct rt_mutex_base rtmutex;
+};
+
+#define __RWBASE_INITIALIZER(name) \
+{ \
+ .readers = ATOMIC_INIT(READER_BIAS), \
+ .rtmutex = __RT_MUTEX_BASE_INITIALIZER(name.rtmutex), \
+}
+
+#define init_rwbase_rt(rwbase) \
+ do { \
+ rt_mutex_base_init(&(rwbase)->rtmutex); \
+ atomic_set(&(rwbase)->readers, READER_BIAS); \
+ } while (0)
+
+
+static __always_inline bool rw_base_is_locked(struct rwbase_rt *rwb)
+{
+ return atomic_read(&rwb->readers) != READER_BIAS;
+}
+
+static __always_inline bool rw_base_is_contended(struct rwbase_rt *rwb)
+{
+ return atomic_read(&rwb->readers) > 0;
+}
+
+#endif /* _LINUX_RWBASE_RT_H */
diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h
new file mode 100644
index 000000000000..49c1f3842ed5
--- /dev/null
+++ b/include/linux/rwlock_rt.h
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef __LINUX_RWLOCK_RT_H
+#define __LINUX_RWLOCK_RT_H
+
+#ifndef __LINUX_SPINLOCK_RT_H
+#error Do not #include directly. Use <linux/spinlock.h>.
+#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern void __rt_rwlock_init(rwlock_t *rwlock, const char *name,
+ struct lock_class_key *key);
+#else
+static inline void __rt_rwlock_init(rwlock_t *rwlock, char *name,
+ struct lock_class_key *key)
+{
+}
+#endif
+
+#define rwlock_init(rwl) \
+do { \
+ static struct lock_class_key __key; \
+ \
+ init_rwbase_rt(&(rwl)->rwbase); \
+ __rt_rwlock_init(rwl, #rwl, &__key); \
+} while (0)
+
+extern void rt_read_lock(rwlock_t *rwlock);
+extern int rt_read_trylock(rwlock_t *rwlock);
+extern void rt_read_unlock(rwlock_t *rwlock);
+extern void rt_write_lock(rwlock_t *rwlock);
+extern int rt_write_trylock(rwlock_t *rwlock);
+extern void rt_write_unlock(rwlock_t *rwlock);
+
+static __always_inline void read_lock(rwlock_t *rwlock)
+{
+ rt_read_lock(rwlock);
+}
+
+static __always_inline void read_lock_bh(rwlock_t *rwlock)
+{
+ local_bh_disable();
+ rt_read_lock(rwlock);
+}
+
+static __always_inline void read_lock_irq(rwlock_t *rwlock)
+{
+ rt_read_lock(rwlock);
+}
+
+#define read_lock_irqsave(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ rt_read_lock(lock); \
+ flags = 0; \
+ } while (0)
+
+#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock))
+
+static __always_inline void read_unlock(rwlock_t *rwlock)
+{
+ rt_read_unlock(rwlock);
+}
+
+static __always_inline void read_unlock_bh(rwlock_t *rwlock)
+{
+ rt_read_unlock(rwlock);
+ local_bh_enable();
+}
+
+static __always_inline void read_unlock_irq(rwlock_t *rwlock)
+{
+ rt_read_unlock(rwlock);
+}
+
+static __always_inline void read_unlock_irqrestore(rwlock_t *rwlock,
+ unsigned long flags)
+{
+ rt_read_unlock(rwlock);
+}
+
+static __always_inline void write_lock(rwlock_t *rwlock)
+{
+ rt_write_lock(rwlock);
+}
+
+static __always_inline void write_lock_bh(rwlock_t *rwlock)
+{
+ local_bh_disable();
+ rt_write_lock(rwlock);
+}
+
+static __always_inline void write_lock_irq(rwlock_t *rwlock)
+{
+ rt_write_lock(rwlock);
+}
+
+#define write_lock_irqsave(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ rt_write_lock(lock); \
+ flags = 0; \
+ } while (0)
+
+#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock))
+
+#define write_trylock_irqsave(lock, flags) \
+({ \
+ int __locked; \
+ \
+ typecheck(unsigned long, flags); \
+ flags = 0; \
+ __locked = write_trylock(lock); \
+ __locked; \
+})
+
+static __always_inline void write_unlock(rwlock_t *rwlock)
+{
+ rt_write_unlock(rwlock);
+}
+
+static __always_inline void write_unlock_bh(rwlock_t *rwlock)
+{
+ rt_write_unlock(rwlock);
+ local_bh_enable();
+}
+
+static __always_inline void write_unlock_irq(rwlock_t *rwlock)
+{
+ rt_write_unlock(rwlock);
+}
+
+static __always_inline void write_unlock_irqrestore(rwlock_t *rwlock,
+ unsigned long flags)
+{
+ rt_write_unlock(rwlock);
+}
+
+#define rwlock_is_contended(lock) (((void)(lock), 0))
+
+#endif /* __LINUX_RWLOCK_RT_H */
diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
index 3bd03e18061c..1948442e7750 100644
--- a/include/linux/rwlock_types.h
+++ b/include/linux/rwlock_types.h
@@ -1,9 +1,23 @@
#ifndef __LINUX_RWLOCK_TYPES_H
#define __LINUX_RWLOCK_TYPES_H
+#if !defined(__LINUX_SPINLOCK_TYPES_H)
+# error "Do not include directly, include spinlock_types.h"
+#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define RW_DEP_MAP_INIT(lockname) \
+ .dep_map = { \
+ .name = #lockname, \
+ .wait_type_inner = LD_WAIT_CONFIG, \
+ }
+#else
+# define RW_DEP_MAP_INIT(lockname)
+#endif
+
+#ifndef CONFIG_PREEMPT_RT
/*
- * include/linux/rwlock_types.h - generic rwlock type definitions
- * and initializers
+ * generic rwlock type definitions and initializers
*
* portions Copyright 2005, Red Hat, Inc., Ingo Molnar
* Released under the General Public License (GPL).
@@ -21,16 +35,6 @@ typedef struct {
#define RWLOCK_MAGIC 0xdeaf1eed
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define RW_DEP_MAP_INIT(lockname) \
- .dep_map = { \
- .name = #lockname, \
- .wait_type_inner = LD_WAIT_CONFIG, \
- }
-#else
-# define RW_DEP_MAP_INIT(lockname)
-#endif
-
#ifdef CONFIG_DEBUG_SPINLOCK
#define __RW_LOCK_UNLOCKED(lockname) \
(rwlock_t) { .raw_lock = __ARCH_RW_LOCK_UNLOCKED, \
@@ -46,4 +50,29 @@ typedef struct {
#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
+#else /* !CONFIG_PREEMPT_RT */
+
+#include <linux/rwbase_rt.h>
+
+typedef struct {
+ struct rwbase_rt rwbase;
+ atomic_t readers;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+} rwlock_t;
+
+#define __RWLOCK_RT_INITIALIZER(name) \
+{ \
+ .rwbase = __RWBASE_INITIALIZER(name), \
+ RW_DEP_MAP_INIT(name) \
+}
+
+#define __RW_LOCK_UNLOCKED(name) __RWLOCK_RT_INITIALIZER(name)
+
+#define DEFINE_RWLOCK(name) \
+ rwlock_t name = __RW_LOCK_UNLOCKED(name)
+
+#endif /* CONFIG_PREEMPT_RT */
+
#endif /* __LINUX_RWLOCK_TYPES_H */
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index a66038d88878..426e98e0b675 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -16,6 +16,19 @@
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <linux/err.h>
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define __RWSEM_DEP_MAP_INIT(lockname) \
+ .dep_map = { \
+ .name = #lockname, \
+ .wait_type_inner = LD_WAIT_SLEEP, \
+ },
+#else
+# define __RWSEM_DEP_MAP_INIT(lockname)
+#endif
+
+#ifndef CONFIG_PREEMPT_RT
+
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
#include <linux/osq_lock.h>
#endif
@@ -64,16 +77,6 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
/* Common initializer macros and functions */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) \
- .dep_map = { \
- .name = #lockname, \
- .wait_type_inner = LD_WAIT_SLEEP, \
- },
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
#ifdef CONFIG_DEBUG_RWSEMS
# define __RWSEM_DEBUG_INIT(lockname) .magic = &lockname,
#else
@@ -119,6 +122,61 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem)
return !list_empty(&sem->wait_list);
}
+#else /* !CONFIG_PREEMPT_RT */
+
+#include <linux/rwbase_rt.h>
+
+struct rw_semaphore {
+ struct rwbase_rt rwbase;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+};
+
+#define __RWSEM_INITIALIZER(name) \
+ { \
+ .rwbase = __RWBASE_INITIALIZER(name), \
+ __RWSEM_DEP_MAP_INIT(name) \
+ }
+
+#define DECLARE_RWSEM(lockname) \
+ struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name,
+ struct lock_class_key *key);
+#else
+static inline void __rwsem_init(struct rw_semaphore *rwsem, const char *name,
+ struct lock_class_key *key)
+{
+}
+#endif
+
+#define init_rwsem(sem) \
+do { \
+ static struct lock_class_key __key; \
+ \
+ init_rwbase_rt(&(sem)->rwbase); \
+ __rwsem_init((sem), #sem, &__key); \
+} while (0)
+
+static __always_inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return rw_base_is_locked(&sem->rwbase);
+}
+
+static __always_inline int rwsem_is_contended(struct rw_semaphore *sem)
+{
+ return rw_base_is_contended(&sem->rwbase);
+}
+
+#endif /* CONFIG_PREEMPT_RT */
+
+/*
+ * The functions below are the same for all rwsem implementations including
+ * the RT specific variant.
+ */
+
/*
* lock for reading
*/
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ec8d07d88641..e12b524426b0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -42,6 +42,7 @@ struct backing_dev_info;
struct bio_list;
struct blk_plug;
struct bpf_local_storage;
+struct bpf_run_ctx;
struct capture_control;
struct cfs_rq;
struct fs_struct;
@@ -95,7 +96,9 @@ struct task_group;
#define TASK_WAKING 0x0200
#define TASK_NOLOAD 0x0400
#define TASK_NEW 0x0800
-#define TASK_STATE_MAX 0x1000
+/* RT specific auxilliary flag to mark RT lock waiters */
+#define TASK_RTLOCK_WAIT 0x1000
+#define TASK_STATE_MAX 0x2000
/* Convenience macros for the sake of set_current_state: */
#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
@@ -121,8 +124,6 @@ struct task_group;
#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)
-#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-
/*
* Special states are those that do not use the normal wait-loop pattern. See
* the comment with set_special_state().
@@ -130,30 +131,37 @@ struct task_group;
#define is_special_task_state(state) \
((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD))
-#define __set_current_state(state_value) \
- do { \
- WARN_ON_ONCE(is_special_task_state(state_value));\
- current->task_state_change = _THIS_IP_; \
- WRITE_ONCE(current->__state, (state_value)); \
- } while (0)
-
-#define set_current_state(state_value) \
- do { \
- WARN_ON_ONCE(is_special_task_state(state_value));\
- current->task_state_change = _THIS_IP_; \
- smp_store_mb(current->__state, (state_value)); \
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+# define debug_normal_state_change(state_value) \
+ do { \
+ WARN_ON_ONCE(is_special_task_state(state_value)); \
+ current->task_state_change = _THIS_IP_; \
} while (0)
-#define set_special_state(state_value) \
+# define debug_special_state_change(state_value) \
do { \
- unsigned long flags; /* may shadow */ \
WARN_ON_ONCE(!is_special_task_state(state_value)); \
- raw_spin_lock_irqsave(&current->pi_lock, flags); \
current->task_state_change = _THIS_IP_; \
- WRITE_ONCE(current->__state, (state_value)); \
- raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
} while (0)
+
+# define debug_rtlock_wait_set_state() \
+ do { \
+ current->saved_state_change = current->task_state_change;\
+ current->task_state_change = _THIS_IP_; \
+ } while (0)
+
+# define debug_rtlock_wait_restore_state() \
+ do { \
+ current->task_state_change = current->saved_state_change;\
+ } while (0)
+
#else
+# define debug_normal_state_change(cond) do { } while (0)
+# define debug_special_state_change(cond) do { } while (0)
+# define debug_rtlock_wait_set_state() do { } while (0)
+# define debug_rtlock_wait_restore_state() do { } while (0)
+#endif
+
/*
* set_current_state() includes a barrier so that the write of current->state
* is correctly serialised wrt the caller's subsequent test of whether to
@@ -192,26 +200,77 @@ struct task_group;
* Also see the comments of try_to_wake_up().
*/
#define __set_current_state(state_value) \
- WRITE_ONCE(current->__state, (state_value))
+ do { \
+ debug_normal_state_change((state_value)); \
+ WRITE_ONCE(current->__state, (state_value)); \
+ } while (0)
#define set_current_state(state_value) \
- smp_store_mb(current->__state, (state_value))
+ do { \
+ debug_normal_state_change((state_value)); \
+ smp_store_mb(current->__state, (state_value)); \
+ } while (0)
/*
* set_special_state() should be used for those states when the blocking task
* can not use the regular condition based wait-loop. In that case we must
- * serialize against wakeups such that any possible in-flight TASK_RUNNING stores
- * will not collide with our state change.
+ * serialize against wakeups such that any possible in-flight TASK_RUNNING
+ * stores will not collide with our state change.
*/
#define set_special_state(state_value) \
do { \
unsigned long flags; /* may shadow */ \
+ \
raw_spin_lock_irqsave(&current->pi_lock, flags); \
+ debug_special_state_change((state_value)); \
WRITE_ONCE(current->__state, (state_value)); \
raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
} while (0)
-#endif
+/*
+ * PREEMPT_RT specific variants for "sleeping" spin/rwlocks
+ *
+ * RT's spin/rwlock substitutions are state preserving. The state of the
+ * task when blocking on the lock is saved in task_struct::saved_state and
+ * restored after the lock has been acquired. These operations are
+ * serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT
+ * lock related wakeups while the task is blocked on the lock are
+ * redirected to operate on task_struct::saved_state to ensure that these
+ * are not dropped. On restore task_struct::saved_state is set to
+ * TASK_RUNNING so any wakeup attempt redirected to saved_state will fail.
+ *
+ * The lock operation looks like this:
+ *
+ * current_save_and_set_rtlock_wait_state();
+ * for (;;) {
+ * if (try_lock())
+ * break;
+ * raw_spin_unlock_irq(&lock->wait_lock);
+ * schedule_rtlock();
+ * raw_spin_lock_irq(&lock->wait_lock);
+ * set_current_state(TASK_RTLOCK_WAIT);
+ * }
+ * current_restore_rtlock_saved_state();
+ */
+#define current_save_and_set_rtlock_wait_state() \
+ do { \
+ lockdep_assert_irqs_disabled(); \
+ raw_spin_lock(&current->pi_lock); \
+ current->saved_state = current->__state; \
+ debug_rtlock_wait_set_state(); \
+ WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \
+ raw_spin_unlock(&current->pi_lock); \
+ } while (0);
+
+#define current_restore_rtlock_saved_state() \
+ do { \
+ lockdep_assert_irqs_disabled(); \
+ raw_spin_lock(&current->pi_lock); \
+ debug_rtlock_wait_restore_state(); \
+ WRITE_ONCE(current->__state, current->saved_state); \
+ current->saved_state = TASK_RUNNING; \
+ raw_spin_unlock(&current->pi_lock); \
+ } while (0);
#define get_current_state() READ_ONCE(current->__state)
@@ -230,6 +289,9 @@ extern long schedule_timeout_idle(long timeout);
asmlinkage void schedule(void);
extern void schedule_preempt_disabled(void);
asmlinkage void preempt_schedule_irq(void);
+#ifdef CONFIG_PREEMPT_RT
+ extern void schedule_rtlock(void);
+#endif
extern int __must_check io_schedule_prepare(void);
extern void io_schedule_finish(int token);
@@ -668,6 +730,11 @@ struct task_struct {
#endif
unsigned int __state;
+#ifdef CONFIG_PREEMPT_RT
+ /* saved state for "spinlock sleepers" */
+ unsigned int saved_state;
+#endif
+
/*
* This begins the randomizable portion of task_struct. Only
* scheduling-critical items should be added above here.
@@ -748,6 +815,7 @@ struct task_struct {
unsigned int policy;
int nr_cpus_allowed;
const cpumask_t *cpus_ptr;
+ cpumask_t *user_cpus_ptr;
cpumask_t cpus_mask;
void *migration_pending;
#ifdef CONFIG_SMP
@@ -863,6 +931,10 @@ struct task_struct {
/* Used by page_owner=on to detect recursion in page tracking. */
unsigned in_page_owner:1;
#endif
+#ifdef CONFIG_EVENTFD
+ /* Recursion prevention for eventfd_signal() */
+ unsigned in_eventfd_signal:1;
+#endif
unsigned long atomic_flags; /* Flags requiring atomic access. */
@@ -1357,6 +1429,9 @@ struct task_struct {
struct kmap_ctrl kmap_ctrl;
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
unsigned long task_state_change;
+# ifdef CONFIG_PREEMPT_RT
+ unsigned long saved_state_change;
+# endif
#endif
int pagefault_disabled;
#ifdef CONFIG_MMU
@@ -1379,6 +1454,8 @@ struct task_struct {
#ifdef CONFIG_BPF_SYSCALL
/* Used by BPF task local storage */
struct bpf_local_storage __rcu *bpf_storage;
+ /* Used for BPF run context */
+ struct bpf_run_ctx *bpf_ctx;
#endif
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
@@ -1400,6 +1477,16 @@ struct task_struct {
struct llist_head kretprobe_instances;
#endif
+#ifdef CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH
+ /*
+ * If L1D flush is supported on mm context switch
+ * then we use this callback head to queue kill work
+ * to kill tasks that are not running on SMT disabled
+ * cores
+ */
+ struct callback_head l1d_flush_kill;
+#endif
+
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.
@@ -1705,6 +1792,11 @@ extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_
#ifdef CONFIG_SMP
extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
+extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node);
+extern void release_user_cpus_ptr(struct task_struct *p);
+extern int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask);
+extern void force_compatible_cpus_allowed_ptr(struct task_struct *p);
+extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p);
#else
static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
@@ -1715,6 +1807,21 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma
return -EINVAL;
return 0;
}
+static inline int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node)
+{
+ if (src->user_cpus_ptr)
+ return -EINVAL;
+ return 0;
+}
+static inline void release_user_cpus_ptr(struct task_struct *p)
+{
+ WARN_ON(p->user_cpus_ptr);
+}
+
+static inline int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
+{
+ return 0;
+}
#endif
extern int yield_to(struct task_struct *p, bool preempt);
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index b9126fe06c3f..0310a5add9ab 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -714,6 +714,12 @@ static inline void unlock_task_sighand(struct task_struct *task,
spin_unlock_irqrestore(&task->sighand->siglock, *flags);
}
+#ifdef CONFIG_LOCKDEP
+extern void lockdep_assert_task_sighand_held(struct task_struct *task);
+#else
+static inline void lockdep_assert_task_sighand_held(struct task_struct *task) { }
+#endif
+
static inline unsigned long task_rlimit(const struct task_struct *task,
unsigned int limit)
{
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index db2c0f34aaaf..304f431178fd 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -28,30 +28,12 @@ enum { sysctl_hung_task_timeout_secs = 0 };
extern unsigned int sysctl_sched_child_runs_first;
-extern unsigned int sysctl_sched_latency;
-extern unsigned int sysctl_sched_min_granularity;
-extern unsigned int sysctl_sched_wakeup_granularity;
-
enum sched_tunable_scaling {
SCHED_TUNABLESCALING_NONE,
SCHED_TUNABLESCALING_LOG,
SCHED_TUNABLESCALING_LINEAR,
SCHED_TUNABLESCALING_END,
};
-extern unsigned int sysctl_sched_tunable_scaling;
-
-extern unsigned int sysctl_numa_balancing_scan_delay;
-extern unsigned int sysctl_numa_balancing_scan_period_min;
-extern unsigned int sysctl_numa_balancing_scan_period_max;
-extern unsigned int sysctl_numa_balancing_scan_size;
-
-#ifdef CONFIG_SCHED_DEBUG
-extern __read_mostly unsigned int sysctl_sched_migration_cost;
-extern __read_mostly unsigned int sysctl_sched_nr_migrate;
-
-extern int sysctl_resched_latency_warn_ms;
-extern int sysctl_resched_latency_warn_once;
-#endif
/*
* control realtime throttling:
diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
index 26a2013ac39c..06cd8fb2f409 100644
--- a/include/linux/sched/wake_q.h
+++ b/include/linux/sched/wake_q.h
@@ -42,8 +42,11 @@ struct wake_q_head {
#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
-#define DEFINE_WAKE_Q(name) \
- struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
+#define WAKE_Q_HEAD_INITIALIZER(name) \
+ { WAKE_Q_TAIL, &name.first }
+
+#define DEFINE_WAKE_Q(name) \
+ struct wake_q_head name = WAKE_Q_HEAD_INITIALIZER(name)
static inline void wake_q_init(struct wake_q_head *head)
{
diff --git a/include/linux/security.h b/include/linux/security.h
index 24eda04221e9..5b7288521300 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -120,10 +120,11 @@ enum lockdown_reason {
LOCKDOWN_MMIOTRACE,
LOCKDOWN_DEBUGFS,
LOCKDOWN_XMON_WR,
+ LOCKDOWN_BPF_WRITE_USER,
LOCKDOWN_INTEGRITY_MAX,
LOCKDOWN_KCORE,
LOCKDOWN_KPROBES,
- LOCKDOWN_BPF_READ,
+ LOCKDOWN_BPF_READ_KERNEL,
LOCKDOWN_PERF,
LOCKDOWN_TRACEFS,
LOCKDOWN_XMON_RW,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b2db9cd9a73f..6bdb0db3e825 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -689,6 +689,7 @@ typedef unsigned char *sk_buff_data_t;
* CHECKSUM_UNNECESSARY (max 3)
* @dst_pending_confirm: need to confirm neighbour
* @decrypted: Decrypted SKB
+ * @slow_gro: state present at GRO time, slower prepare step required
* @napi_id: id of the NAPI struct this skb came from
* @sender_cpu: (aka @napi_id) source CPU in XPS
* @secmark: security marking
@@ -863,13 +864,14 @@ struct sk_buff {
__u8 tc_skip_classify:1;
__u8 tc_at_ingress:1;
#endif
-#ifdef CONFIG_NET_REDIRECT
__u8 redirected:1;
+#ifdef CONFIG_NET_REDIRECT
__u8 from_ingress:1;
#endif
#ifdef CONFIG_TLS_DEVICE
__u8 decrypted:1;
#endif
+ __u8 slow_gro:1;
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
@@ -990,6 +992,7 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
*/
static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
{
+ skb->slow_gro |= !!dst;
skb->_skb_refdst = (unsigned long)dst;
}
@@ -1006,6 +1009,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
{
WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+ skb->slow_gro |= !!dst;
skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
}
@@ -1179,6 +1183,7 @@ static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom,
int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
unsigned int headroom);
+struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom);
struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
int newtailroom, gfp_t priority);
int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
@@ -4216,6 +4221,7 @@ static inline unsigned long skb_get_nfct(const struct sk_buff *skb)
static inline void skb_set_nfct(struct sk_buff *skb, unsigned long nfct)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ skb->slow_gro |= !!nfct;
skb->_nfct = nfct;
#endif
}
@@ -4375,6 +4381,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put(skb_nfct(dst));
#endif
+ dst->slow_gro = src->slow_gro;
__nf_copy(dst, src, true);
}
@@ -4664,17 +4671,13 @@ static inline __wsum lco_csum(struct sk_buff *skb)
static inline bool skb_is_redirected(const struct sk_buff *skb)
{
-#ifdef CONFIG_NET_REDIRECT
return skb->redirected;
-#else
- return false;
-#endif
}
static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
{
-#ifdef CONFIG_NET_REDIRECT
skb->redirected = 1;
+#ifdef CONFIG_NET_REDIRECT
skb->from_ingress = from_ingress;
if (skb->from_ingress)
skb->tstamp = 0;
@@ -4683,9 +4686,7 @@ static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
static inline void skb_reset_redirect(struct sk_buff *skb)
{
-#ifdef CONFIG_NET_REDIRECT
skb->redirected = 0;
-#endif
}
static inline bool skb_csum_is_sctp(struct sk_buff *skb)
@@ -4711,11 +4712,9 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
}
#ifdef CONFIG_PAGE_POOL
-static inline void skb_mark_for_recycle(struct sk_buff *skb, struct page *page,
- struct page_pool *pp)
+static inline void skb_mark_for_recycle(struct sk_buff *skb)
{
skb->pp_recycle = 1;
- page_pool_store_mem_info(page, pp);
}
#endif
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 0d8e3dcb7f88..041d6032a348 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -223,8 +223,11 @@ struct ucred {
* reuses AF_INET address family
*/
#define AF_XDP 44 /* XDP sockets */
+#define AF_MCTP 45 /* Management component
+ * transport protocol
+ */
-#define AF_MAX 45 /* For now.. */
+#define AF_MAX 46 /* For now.. */
/* Protocol families, same as address families. */
#define PF_UNSPEC AF_UNSPEC
@@ -274,6 +277,7 @@ struct ucred {
#define PF_QIPCRTR AF_QIPCRTR
#define PF_SMC AF_SMC
#define PF_XDP AF_XDP
+#define PF_MCTP AF_MCTP
#define PF_MAX AF_MAX
/* Maximum queue length specifiable by listen. */
@@ -421,6 +425,9 @@ extern int __sys_accept4_file(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags,
unsigned long nofile);
+extern struct file *do_accept(struct file *file, unsigned file_flags,
+ struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags);
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol);
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index a48ac3e77301..76ce3f3ac0f2 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -661,6 +661,8 @@ struct sdw_slave_ops {
* initialized
* @first_interrupt_done: status flag tracking if the interrupt handling
* for a Slave happens for the first time after enumeration
+ * @is_mockup_device: status flag used to squelch errors in the command/control
+ * protocol for SoundWire mockup devices
*/
struct sdw_slave {
struct sdw_slave_id id;
@@ -683,6 +685,7 @@ struct sdw_slave {
struct completion initialization_complete;
u32 unattach_request;
bool first_interrupt_done;
+ bool is_mockup_device;
};
#define dev_to_sdw_dev(_dev) container_of(_dev, struct sdw_slave, dev)
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 97b8d12b5f2b..8371bca13729 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -147,7 +147,11 @@ extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer);
* not using a GPIO line)
* @word_delay: delay to be inserted between consecutive
* words of a transfer
- *
+ * @cs_setup: delay to be introduced by the controller after CS is asserted
+ * @cs_hold: delay to be introduced by the controller before CS is deasserted
+ * @cs_inactive: delay to be introduced by the controller after CS is
+ * deasserted. If @cs_change_delay is used from @spi_transfer, then the
+ * two delays will be added up.
* @statistics: statistics for the spi_device
*
* A @spi_device is used to interchange data between an SPI slave
@@ -188,6 +192,10 @@ struct spi_device {
int cs_gpio; /* LEGACY: chip select gpio */
struct gpio_desc *cs_gpiod; /* chip select gpio desc */
struct spi_delay word_delay; /* inter-word delay */
+ /* CS delays */
+ struct spi_delay cs_setup;
+ struct spi_delay cs_hold;
+ struct spi_delay cs_inactive;
/* the statistics */
struct spi_statistics statistics;
@@ -339,6 +347,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* @max_speed_hz: Highest supported transfer speed
* @flags: other constraints relevant to this driver
* @slave: indicates that this is an SPI slave controller
+ * @devm_allocated: whether the allocation of this struct is devres-managed
* @max_transfer_size: function that returns the max transfer size for
* a &spi_device; may be %NULL, so the default %SIZE_MAX will be used.
* @max_message_size: function that returns the max message size for
@@ -412,11 +421,6 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* controller has native support for memory like operations.
* @unprepare_message: undo any work done by prepare_message().
* @slave_abort: abort the ongoing transfer request on an SPI slave controller
- * @cs_setup: delay to be introduced by the controller after CS is asserted
- * @cs_hold: delay to be introduced by the controller before CS is deasserted
- * @cs_inactive: delay to be introduced by the controller after CS is
- * deasserted. If @cs_change_delay is used from @spi_transfer, then the
- * two delays will be added up.
* @cs_gpios: LEGACY: array of GPIO descs to use as chip select lines; one per
* CS number. Any individual value may be -ENOENT for CS lines that
* are not GPIOs (driven by the SPI controller itself). Use the cs_gpiods
@@ -511,7 +515,7 @@ struct spi_controller {
#define SPI_MASTER_GPIO_SS BIT(5) /* GPIO CS must select slave */
- /* flag indicating this is a non-devres managed controller */
+ /* flag indicating if the allocation of this struct is devres-managed */
bool devm_allocated;
/* flag indicating this is an SPI slave controller */
@@ -550,8 +554,7 @@ struct spi_controller {
* to configure specific CS timing through spi_set_cs_timing() after
* spi_setup().
*/
- int (*set_cs_timing)(struct spi_device *spi, struct spi_delay *setup,
- struct spi_delay *hold, struct spi_delay *inactive);
+ int (*set_cs_timing)(struct spi_device *spi);
/* bidirectional bulk transfers
*
@@ -638,11 +641,6 @@ struct spi_controller {
/* Optimized handlers for SPI memory-like operations. */
const struct spi_controller_mem_ops *mem_ops;
- /* CS delays */
- struct spi_delay cs_setup;
- struct spi_delay cs_hold;
- struct spi_delay cs_inactive;
-
/* gpio chip select */
int *cs_gpios;
struct gpio_desc **cs_gpiods;
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 79897841a2cc..45310ea1b1d7 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -12,6 +12,8 @@
* asm/spinlock_types.h: contains the arch_spinlock_t/arch_rwlock_t and the
* initializers
*
+ * linux/spinlock_types_raw:
+ * The raw types and initializers
* linux/spinlock_types.h:
* defines the generic type and initializers
*
@@ -31,6 +33,8 @@
* contains the generic, simplified UP spinlock type.
* (which is an empty structure on non-debug builds)
*
+ * linux/spinlock_types_raw:
+ * The raw RT types and initializers
* linux/spinlock_types.h:
* defines the generic type and initializers
*
@@ -308,8 +312,10 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
1 : ({ local_irq_restore(flags); 0; }); \
})
-/* Include rwlock functions */
+#ifndef CONFIG_PREEMPT_RT
+/* Include rwlock functions for !RT */
#include <linux/rwlock.h>
+#endif
/*
* Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
@@ -320,6 +326,9 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
# include <linux/spinlock_api_up.h>
#endif
+/* Non PREEMPT_RT kernel, map to raw spinlocks: */
+#ifndef CONFIG_PREEMPT_RT
+
/*
* Map the spin_lock functions to the raw variants for PREEMPT_RT=n
*/
@@ -454,6 +463,10 @@ static __always_inline int spin_is_contended(spinlock_t *lock)
#define assert_spin_locked(lock) assert_raw_spin_locked(&(lock)->rlock)
+#else /* !CONFIG_PREEMPT_RT */
+# include <linux/spinlock_rt.h>
+#endif /* CONFIG_PREEMPT_RT */
+
/*
* Pull the atomic_t declaration:
* (asm-mips/atomic.h needs above definitions)
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 19a9be9d97ee..6b8e1a0b137b 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -187,6 +187,9 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock)
return 0;
}
+/* PREEMPT_RT has its own rwlock implementation */
+#ifndef CONFIG_PREEMPT_RT
#include <linux/rwlock_api_smp.h>
+#endif
#endif /* __LINUX_SPINLOCK_API_SMP_H */
diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
new file mode 100644
index 000000000000..835aedaf68ac
--- /dev/null
+++ b/include/linux/spinlock_rt.h
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef __LINUX_SPINLOCK_RT_H
+#define __LINUX_SPINLOCK_RT_H
+
+#ifndef __LINUX_SPINLOCK_H
+#error Do not include directly. Use spinlock.h
+#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern void __rt_spin_lock_init(spinlock_t *lock, const char *name,
+ struct lock_class_key *key, bool percpu);
+#else
+static inline void __rt_spin_lock_init(spinlock_t *lock, const char *name,
+ struct lock_class_key *key, bool percpu)
+{
+}
+#endif
+
+#define spin_lock_init(slock) \
+do { \
+ static struct lock_class_key __key; \
+ \
+ rt_mutex_base_init(&(slock)->lock); \
+ __rt_spin_lock_init(slock, #slock, &__key, false); \
+} while (0)
+
+#define local_spin_lock_init(slock) \
+do { \
+ static struct lock_class_key __key; \
+ \
+ rt_mutex_base_init(&(slock)->lock); \
+ __rt_spin_lock_init(slock, #slock, &__key, true); \
+} while (0)
+
+extern void rt_spin_lock(spinlock_t *lock);
+extern void rt_spin_lock_nested(spinlock_t *lock, int subclass);
+extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock);
+extern void rt_spin_unlock(spinlock_t *lock);
+extern void rt_spin_lock_unlock(spinlock_t *lock);
+extern int rt_spin_trylock_bh(spinlock_t *lock);
+extern int rt_spin_trylock(spinlock_t *lock);
+
+static __always_inline void spin_lock(spinlock_t *lock)
+{
+ rt_spin_lock(lock);
+}
+
+#ifdef CONFIG_LOCKDEP
+# define __spin_lock_nested(lock, subclass) \
+ rt_spin_lock_nested(lock, subclass)
+
+# define __spin_lock_nest_lock(lock, nest_lock) \
+ do { \
+ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
+ rt_spin_lock_nest_lock(lock, &(nest_lock)->dep_map); \
+ } while (0)
+# define __spin_lock_irqsave_nested(lock, flags, subclass) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = 0; \
+ __spin_lock_nested(lock, subclass); \
+ } while (0)
+
+#else
+ /*
+ * Always evaluate the 'subclass' argument to avoid that the compiler
+ * warns about set-but-not-used variables when building with
+ * CONFIG_DEBUG_LOCK_ALLOC=n and with W=1.
+ */
+# define __spin_lock_nested(lock, subclass) spin_lock(((void)(subclass), (lock)))
+# define __spin_lock_nest_lock(lock, subclass) spin_lock(((void)(subclass), (lock)))
+# define __spin_lock_irqsave_nested(lock, flags, subclass) \
+ spin_lock_irqsave(((void)(subclass), (lock)), flags)
+#endif
+
+#define spin_lock_nested(lock, subclass) \
+ __spin_lock_nested(lock, subclass)
+
+#define spin_lock_nest_lock(lock, nest_lock) \
+ __spin_lock_nest_lock(lock, nest_lock)
+
+#define spin_lock_irqsave_nested(lock, flags, subclass) \
+ __spin_lock_irqsave_nested(lock, flags, subclass)
+
+static __always_inline void spin_lock_bh(spinlock_t *lock)
+{
+ /* Investigate: Drop bh when blocking ? */
+ local_bh_disable();
+ rt_spin_lock(lock);
+}
+
+static __always_inline void spin_lock_irq(spinlock_t *lock)
+{
+ rt_spin_lock(lock);
+}
+
+#define spin_lock_irqsave(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = 0; \
+ spin_lock(lock); \
+ } while (0)
+
+static __always_inline void spin_unlock(spinlock_t *lock)
+{
+ rt_spin_unlock(lock);
+}
+
+static __always_inline void spin_unlock_bh(spinlock_t *lock)
+{
+ rt_spin_unlock(lock);
+ local_bh_enable();
+}
+
+static __always_inline void spin_unlock_irq(spinlock_t *lock)
+{
+ rt_spin_unlock(lock);
+}
+
+static __always_inline void spin_unlock_irqrestore(spinlock_t *lock,
+ unsigned long flags)
+{
+ rt_spin_unlock(lock);
+}
+
+#define spin_trylock(lock) \
+ __cond_lock(lock, rt_spin_trylock(lock))
+
+#define spin_trylock_bh(lock) \
+ __cond_lock(lock, rt_spin_trylock_bh(lock))
+
+#define spin_trylock_irq(lock) \
+ __cond_lock(lock, rt_spin_trylock(lock))
+
+#define __spin_trylock_irqsave(lock, flags) \
+({ \
+ int __locked; \
+ \
+ typecheck(unsigned long, flags); \
+ flags = 0; \
+ __locked = spin_trylock(lock); \
+ __locked; \
+})
+
+#define spin_trylock_irqsave(lock, flags) \
+ __cond_lock(lock, __spin_trylock_irqsave(lock, flags))
+
+#define spin_is_contended(lock) (((void)(lock), 0))
+
+static inline int spin_is_locked(spinlock_t *lock)
+{
+ return rt_mutex_base_is_locked(&lock->lock);
+}
+
+#define assert_spin_locked(lock) BUG_ON(!spin_is_locked(lock))
+
+#include <linux/rwlock_rt.h>
+
+#endif
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index b981caafe8bf..2dfa35ffec76 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -9,65 +9,11 @@
* Released under the General Public License (GPL).
*/
-#if defined(CONFIG_SMP)
-# include <asm/spinlock_types.h>
-#else
-# include <linux/spinlock_types_up.h>
-#endif
-
-#include <linux/lockdep_types.h>
+#include <linux/spinlock_types_raw.h>
-typedef struct raw_spinlock {
- arch_spinlock_t raw_lock;
-#ifdef CONFIG_DEBUG_SPINLOCK
- unsigned int magic, owner_cpu;
- void *owner;
-#endif
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-} raw_spinlock_t;
-
-#define SPINLOCK_MAGIC 0xdead4ead
-
-#define SPINLOCK_OWNER_INIT ((void *)-1L)
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define RAW_SPIN_DEP_MAP_INIT(lockname) \
- .dep_map = { \
- .name = #lockname, \
- .wait_type_inner = LD_WAIT_SPIN, \
- }
-# define SPIN_DEP_MAP_INIT(lockname) \
- .dep_map = { \
- .name = #lockname, \
- .wait_type_inner = LD_WAIT_CONFIG, \
- }
-#else
-# define RAW_SPIN_DEP_MAP_INIT(lockname)
-# define SPIN_DEP_MAP_INIT(lockname)
-#endif
-
-#ifdef CONFIG_DEBUG_SPINLOCK
-# define SPIN_DEBUG_INIT(lockname) \
- .magic = SPINLOCK_MAGIC, \
- .owner_cpu = -1, \
- .owner = SPINLOCK_OWNER_INIT,
-#else
-# define SPIN_DEBUG_INIT(lockname)
-#endif
-
-#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
- { \
- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
- SPIN_DEBUG_INIT(lockname) \
- RAW_SPIN_DEP_MAP_INIT(lockname) }
-
-#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
- (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
-
-#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
+#ifndef CONFIG_PREEMPT_RT
+/* Non PREEMPT_RT kernels map spinlock to raw_spinlock */
typedef struct spinlock {
union {
struct raw_spinlock rlock;
@@ -96,6 +42,35 @@ typedef struct spinlock {
#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
+#else /* !CONFIG_PREEMPT_RT */
+
+/* PREEMPT_RT kernels map spinlock to rt_mutex */
+#include <linux/rtmutex.h>
+
+typedef struct spinlock {
+ struct rt_mutex_base lock;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+} spinlock_t;
+
+#define __SPIN_LOCK_UNLOCKED(name) \
+ { \
+ .lock = __RT_MUTEX_BASE_INITIALIZER(name.lock), \
+ SPIN_DEP_MAP_INIT(name) \
+ }
+
+#define __LOCAL_SPIN_LOCK_UNLOCKED(name) \
+ { \
+ .lock = __RT_MUTEX_BASE_INITIALIZER(name.lock), \
+ LOCAL_SPIN_DEP_MAP_INIT(name) \
+ }
+
+#define DEFINE_SPINLOCK(name) \
+ spinlock_t name = __SPIN_LOCK_UNLOCKED(name)
+
+#endif /* CONFIG_PREEMPT_RT */
+
#include <linux/rwlock_types.h>
#endif /* __LINUX_SPINLOCK_TYPES_H */
diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h
new file mode 100644
index 000000000000..91cb36b65a17
--- /dev/null
+++ b/include/linux/spinlock_types_raw.h
@@ -0,0 +1,73 @@
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+#define __LINUX_SPINLOCK_TYPES_RAW_H
+
+#include <linux/types.h>
+
+#if defined(CONFIG_SMP)
+# include <asm/spinlock_types.h>
+#else
+# include <linux/spinlock_types_up.h>
+#endif
+
+#include <linux/lockdep_types.h>
+
+typedef struct raw_spinlock {
+ arch_spinlock_t raw_lock;
+#ifdef CONFIG_DEBUG_SPINLOCK
+ unsigned int magic, owner_cpu;
+ void *owner;
+#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+} raw_spinlock_t;
+
+#define SPINLOCK_MAGIC 0xdead4ead
+
+#define SPINLOCK_OWNER_INIT ((void *)-1L)
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define RAW_SPIN_DEP_MAP_INIT(lockname) \
+ .dep_map = { \
+ .name = #lockname, \
+ .wait_type_inner = LD_WAIT_SPIN, \
+ }
+# define SPIN_DEP_MAP_INIT(lockname) \
+ .dep_map = { \
+ .name = #lockname, \
+ .wait_type_inner = LD_WAIT_CONFIG, \
+ }
+
+# define LOCAL_SPIN_DEP_MAP_INIT(lockname) \
+ .dep_map = { \
+ .name = #lockname, \
+ .wait_type_inner = LD_WAIT_CONFIG, \
+ .lock_type = LD_LOCK_PERCPU, \
+ }
+#else
+# define RAW_SPIN_DEP_MAP_INIT(lockname)
+# define SPIN_DEP_MAP_INIT(lockname)
+# define LOCAL_SPIN_DEP_MAP_INIT(lockname)
+#endif
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+# define SPIN_DEBUG_INIT(lockname) \
+ .magic = SPINLOCK_MAGIC, \
+ .owner_cpu = -1, \
+ .owner = SPINLOCK_OWNER_INIT,
+#else
+# define SPIN_DEBUG_INIT(lockname)
+#endif
+
+#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
+{ \
+ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
+ SPIN_DEBUG_INIT(lockname) \
+ RAW_SPIN_DEP_MAP_INIT(lockname) }
+
+#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
+ (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
+
+#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
+
+#endif /* __LINUX_SPINLOCK_TYPES_RAW_H */
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 0e0cf4d6a72a..6cfaa0a9a9b9 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -61,7 +61,7 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp)
int idx;
idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1;
- WRITE_ONCE(ssp->srcu_lock_nesting[idx], ssp->srcu_lock_nesting[idx] + 1);
+ WRITE_ONCE(ssp->srcu_lock_nesting[idx], READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1);
return idx;
}
@@ -81,11 +81,11 @@ static inline void srcu_torture_stats_print(struct srcu_struct *ssp,
{
int idx;
- idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1;
+ idx = ((data_race(READ_ONCE(ssp->srcu_idx)) + 1) & 0x2) >> 1;
pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n",
tt, tf, idx,
- READ_ONCE(ssp->srcu_lock_nesting[!idx]),
- READ_ONCE(ssp->srcu_lock_nesting[idx]));
+ data_race(READ_ONCE(ssp->srcu_lock_nesting[!idx])),
+ data_race(READ_ONCE(ssp->srcu_lock_nesting[idx])));
}
#endif
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 0d5a2691e7e9..f9b53acb4e02 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -7,7 +7,7 @@
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
#include <linux/mod_devicetable.h>
#include <linux/dma-mapping.h>
#include <linux/platform_device.h>
diff --git a/include/linux/ssb/ssb_driver_extif.h b/include/linux/ssb/ssb_driver_extif.h
index 3f8bc973d67d..19253bfacd1a 100644
--- a/include/linux/ssb/ssb_driver_extif.h
+++ b/include/linux/ssb/ssb_driver_extif.h
@@ -197,7 +197,7 @@ struct ssb_extif {
static inline bool ssb_extif_available(struct ssb_extif *extif)
{
- return 0;
+ return false;
}
static inline
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index fc94faa53b5b..3e56a9751c06 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -17,11 +17,17 @@
* DECLARE_STATIC_CALL(name, func);
* DEFINE_STATIC_CALL(name, func);
* DEFINE_STATIC_CALL_NULL(name, typename);
+ * DEFINE_STATIC_CALL_RET0(name, typename);
+ *
+ * __static_call_return0;
+ *
* static_call(name)(args...);
* static_call_cond(name)(args...);
* static_call_update(name, func);
* static_call_query(name);
*
+ * EXPORT_STATIC_CALL{,_TRAMP}{,_GPL}()
+ *
* Usage example:
*
* # Start with the following functions (with identical prototypes):
@@ -96,6 +102,33 @@
* To query which function is currently set to be called, use:
*
* func = static_call_query(name);
+ *
+ *
+ * DEFINE_STATIC_CALL_RET0 / __static_call_return0:
+ *
+ * Just like how DEFINE_STATIC_CALL_NULL() / static_call_cond() optimize the
+ * conditional void function call, DEFINE_STATIC_CALL_RET0 /
+ * __static_call_return0 optimize the do nothing return 0 function.
+ *
+ * This feature is strictly UB per the C standard (since it casts a function
+ * pointer to a different signature) and relies on the architecture ABI to
+ * make things work. In particular it relies on Caller Stack-cleanup and the
+ * whole return register being clobbered for short return values. All normal
+ * CDECL style ABIs conform.
+ *
+ * In particular the x86_64 implementation replaces the 5 byte CALL
+ * instruction at the callsite with a 5 byte clear of the RAX register,
+ * completely eliding any function call overhead.
+ *
+ * Notably argument setup is unconditional.
+ *
+ *
+ * EXPORT_STATIC_CALL() vs EXPORT_STATIC_CALL_TRAMP():
+ *
+ * The difference is that the _TRAMP variant tries to only export the
+ * trampoline with the result that a module can use static_call{,_cond}() but
+ * not static_call_update().
+ *
*/
#include <linux/types.h>
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index 938c2bf29db8..02117ed0fa2e 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -20,6 +20,7 @@ enum rpc_auth_flavors {
RPC_AUTH_DES = 3,
RPC_AUTH_KRB = 4,
RPC_AUTH_GSS = 6,
+ RPC_AUTH_TLS = 7,
RPC_AUTH_MAXFLAVOR = 8,
/* pseudoflavors: */
RPC_AUTH_GSS_KRB5 = 390003,
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index e91d51ea028b..f0f846fa396e 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -19,6 +19,7 @@
#include <linux/sunrpc/svcauth.h>
#include <linux/wait.h>
#include <linux/mm.h>
+#include <linux/pagevec.h>
/* statistics for svc_pool structures */
struct svc_pool_stats {
@@ -256,6 +257,7 @@ struct svc_rqst {
struct page * *rq_next_page; /* next reply page to use */
struct page * *rq_page_end; /* one past the last page */
+ struct pagevec rq_pvec;
struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
struct bio_vec rq_bvec[RPCSVC_MAXPAGES];
@@ -502,6 +504,8 @@ struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
struct svc_pool *pool, int node);
struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
struct svc_pool *pool, int node);
+void svc_rqst_replace_page(struct svc_rqst *rqstp,
+ struct page *page);
void svc_rqst_free(struct svc_rqst *);
void svc_exit_thread(struct svc_rqst *);
unsigned int svc_pool_map_get(void);
@@ -523,6 +527,7 @@ void svc_wake_up(struct svc_serv *);
void svc_reserve(struct svc_rqst *rqstp, int space);
struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu);
char * svc_print_addr(struct svc_rqst *, char *, size_t);
+const char * svc_proc_name(const struct svc_rqst *rqstp);
int svc_encode_result_payload(struct svc_rqst *rqstp,
unsigned int offset,
unsigned int length);
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 3184465de3a0..24aa159d29a7 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -90,9 +90,9 @@ struct svcxprt_rdma {
struct ib_pd *sc_pd;
spinlock_t sc_send_lock;
- struct list_head sc_send_ctxts;
+ struct llist_head sc_send_ctxts;
spinlock_t sc_rw_ctxt_lock;
- struct list_head sc_rw_ctxts;
+ struct llist_head sc_rw_ctxts;
u32 sc_pending_recvs;
u32 sc_recv_batch;
@@ -150,7 +150,7 @@ struct svc_rdma_recv_ctxt {
};
struct svc_rdma_send_ctxt {
- struct list_head sc_list;
+ struct llist_node sc_node;
struct rpc_rdma_cid sc_cid;
struct ib_send_wr sc_send_wr;
@@ -207,6 +207,7 @@ extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
struct svc_rdma_recv_ctxt *rctxt,
int status);
+extern void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail);
extern int svc_rdma_sendto(struct svc_rqst *);
extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
unsigned int length);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index a965cbc136ad..b519609af1d0 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -95,6 +95,7 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
#define rpc_auth_unix cpu_to_be32(RPC_AUTH_UNIX)
#define rpc_auth_short cpu_to_be32(RPC_AUTH_SHORT)
#define rpc_auth_gss cpu_to_be32(RPC_AUTH_GSS)
+#define rpc_auth_tls cpu_to_be32(RPC_AUTH_TLS)
#define rpc_call cpu_to_be32(RPC_CALL)
#define rpc_reply cpu_to_be32(RPC_REPLY)
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index c8c39f22d3b1..b15c1f07162d 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -288,7 +288,6 @@ struct rpc_xprt {
const char *address_strings[RPC_DISPLAY_MAX];
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
struct dentry *debugfs; /* debugfs directory */
- atomic_t inject_disconnect;
#endif
struct rcu_head rcu;
const struct xprt_class *xprt_class;
@@ -502,21 +501,4 @@ static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt)
return test_and_set_bit(XPRT_BINDING, &xprt->state);
}
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-extern unsigned int rpc_inject_disconnect;
-static inline void xprt_inject_disconnect(struct rpc_xprt *xprt)
-{
- if (!rpc_inject_disconnect)
- return;
- if (atomic_dec_return(&xprt->inject_disconnect))
- return;
- atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect);
- xprt->ops->inject_disconnect(xprt);
-}
-#else
-static inline void xprt_inject_disconnect(struct rpc_xprt *xprt)
-{
-}
-#endif
-
#endif /* _LINUX_SUNRPC_XPRT_H */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index d99ca99837de..1fa2b69c6fc3 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -48,6 +48,8 @@ typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer,
size_t *lenp, loff_t *ppos);
int proc_dostring(struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_dobool(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos);
int proc_dointvec(struct ctl_table *, int, void *, size_t *, loff_t *);
int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *);
int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *);
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index ad413b382a3c..8e0631a4b046 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -675,7 +675,7 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
#ifdef CONFIG_BPF_EVENTS
unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
-int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
+int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie);
void perf_event_detach_bpf_prog(struct perf_event *event);
int perf_event_query_prog_array(struct perf_event *event, void __user *info);
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
@@ -692,7 +692,7 @@ static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *c
}
static inline int
-perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog)
+perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie)
{
return -EOPNOTSUPP;
}
@@ -803,6 +803,9 @@ extern void ftrace_profile_free_filter(struct perf_event *event);
void perf_trace_buf_update(void *record, u16 type);
void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
+int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie);
+void perf_event_free_bpf_prog(struct perf_event *event);
+
void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
diff --git a/include/linux/typecheck.h b/include/linux/typecheck.h
index 20d310331eb5..46b15e2aaefb 100644
--- a/include/linux/typecheck.h
+++ b/include/linux/typecheck.h
@@ -22,4 +22,13 @@
(void)__tmp; \
})
+/*
+ * Check at compile time that something is a pointer type.
+ */
+#define typecheck_pointer(x) \
+({ typeof(x) __dummy; \
+ (void)sizeof(*__dummy); \
+ 1; \
+})
+
#endif /* TYPECHECK_H_INCLUDED */
diff --git a/include/linux/units.h b/include/linux/units.h
index dcc30a53fa93..4a25e0cc8fb3 100644
--- a/include/linux/units.h
+++ b/include/linux/units.h
@@ -4,6 +4,22 @@
#include <linux/math.h>
+/* Metric prefixes in accordance with Système international (d'unités) */
+#define PETA 1000000000000000ULL
+#define TERA 1000000000000ULL
+#define GIGA 1000000000UL
+#define MEGA 1000000UL
+#define KILO 1000UL
+#define HECTO 100UL
+#define DECA 10UL
+#define DECI 10UL
+#define CENTI 100UL
+#define MILLI 1000UL
+#define MICRO 1000000UL
+#define NANO 1000000000UL
+#define PICO 1000000000000ULL
+#define FEMTO 1000000000000000ULL
+
#define MILLIWATT_PER_WATT 1000L
#define MICROWATT_PER_MILLIWATT 1000L
#define MICROWATT_PER_WATT 1000000L
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 3357ac98878d..8cfe49d201dd 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -277,6 +277,17 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
const struct vdpa_config_ops *config,
size_t size, const char *name);
+/**
+ * vdpa_alloc_device - allocate and initilaize a vDPA device
+ *
+ * @dev_struct: the type of the parent structure
+ * @member: the name of struct vdpa_device within the @dev_struct
+ * @parent: the parent device
+ * @config: the bus operations that is supported by this device
+ * @name: name of the vdpa device
+ *
+ * Return allocated data structure or ERR_PTR upon error
+ */
#define vdpa_alloc_device(dev_struct, member, parent, config, name) \
container_of(__vdpa_alloc_device( \
parent, config, \
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b1894e0323fa..41edbc01ffa4 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -110,6 +110,7 @@ struct virtio_device {
bool config_enabled;
bool config_change_pending;
spinlock_t config_lock;
+ spinlock_t vqs_list_lock; /* Protects VQs list access */
struct device dev;
struct virtio_device_id id;
const struct virtio_config_ops *config;
diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index 84db7b8f912f..212892cf9822 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h
@@ -14,6 +14,7 @@
#include <linux/virtio_byteorder.h>
#include <linux/uio.h>
#include <linux/slab.h>
+#include <linux/spinlock.h>
#if IS_REACHABLE(CONFIG_VHOST_IOTLB)
#include <linux/dma-direction.h>
#include <linux/vhost_iotlb.h>
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 6598ae35e1b5..93dab0e9580f 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -56,7 +56,7 @@ struct task_struct;
#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
- .head = { &(name).head, &(name).head } }
+ .head = LIST_HEAD_INIT(name.head) }
#define DECLARE_WAIT_QUEUE_HEAD(name) \
struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index d15a7730ee18..2ebef6b1a3d6 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -29,7 +29,7 @@ void delayed_work_timer_fn(struct timer_list *t);
enum {
WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */
- WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */
+ WORK_STRUCT_INACTIVE_BIT= 1, /* work item is inactive */
WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */
WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */
#ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -42,7 +42,7 @@ enum {
WORK_STRUCT_COLOR_BITS = 4,
WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT,
- WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT,
+ WORK_STRUCT_INACTIVE = 1 << WORK_STRUCT_INACTIVE_BIT,
WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT,
WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT,
#ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -51,19 +51,14 @@ enum {
WORK_STRUCT_STATIC = 0,
#endif
- /*
- * The last color is no color used for works which don't
- * participate in workqueue flushing.
- */
- WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS) - 1,
- WORK_NO_COLOR = WORK_NR_COLORS,
+ WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS),
/* not bound to any CPU, prefer the local CPU */
WORK_CPU_UNBOUND = NR_CPUS,
/*
* Reserve 8 bits off of pwq pointer w/ debugobjects turned off.
- * This makes pwqs aligned to 256 bytes and allows 15 workqueue
+ * This makes pwqs aligned to 256 bytes and allows 16 workqueue
* flush colors.
*/
WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT +
@@ -324,7 +319,7 @@ enum {
* to execute and tries to keep idle cores idle to conserve power;
* however, for example, a per-cpu work item scheduled from an
* interrupt handler on an idle CPU will force the scheduler to
- * excute the work item on that CPU breaking the idleness, which in
+ * execute the work item on that CPU breaking the idleness, which in
* turn may lead to more scheduling choices which are sub-optimal
* in terms of power consumption.
*
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 667e86cfbdcf..270677dc4f36 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -336,14 +336,9 @@ static inline void cgroup_writeback_umount(void)
/*
* mm/page-writeback.c
*/
-#ifdef CONFIG_BLOCK
void laptop_io_completion(struct backing_dev_info *info);
void laptop_sync_completion(void);
-void laptop_mode_sync(struct work_struct *work);
void laptop_mode_timer_fn(struct timer_list *t);
-#else
-static inline void laptop_sync_completion(void) { }
-#endif
bool node_dirty_ok(struct pglist_data *pgdat);
int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
#ifdef CONFIG_CGROUP_WRITEBACK
diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index b77f39f319ad..29db736af86d 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -18,6 +18,24 @@
#define __LINUX_WW_MUTEX_H
#include <linux/mutex.h>
+#include <linux/rtmutex.h>
+
+#if defined(CONFIG_DEBUG_MUTEXES) || \
+ (defined(CONFIG_PREEMPT_RT) && defined(CONFIG_DEBUG_RT_MUTEXES))
+#define DEBUG_WW_MUTEXES
+#endif
+
+#ifndef CONFIG_PREEMPT_RT
+#define WW_MUTEX_BASE mutex
+#define ww_mutex_base_init(l,n,k) __mutex_init(l,n,k)
+#define ww_mutex_base_trylock(l) mutex_trylock(l)
+#define ww_mutex_base_is_locked(b) mutex_is_locked((b))
+#else
+#define WW_MUTEX_BASE rt_mutex
+#define ww_mutex_base_init(l,n,k) __rt_mutex_init(l,n,k)
+#define ww_mutex_base_trylock(l) rt_mutex_trylock(l)
+#define ww_mutex_base_is_locked(b) rt_mutex_base_is_locked(&(b)->rtmutex)
+#endif
struct ww_class {
atomic_long_t stamp;
@@ -28,16 +46,24 @@ struct ww_class {
unsigned int is_wait_die;
};
+struct ww_mutex {
+ struct WW_MUTEX_BASE base;
+ struct ww_acquire_ctx *ctx;
+#ifdef DEBUG_WW_MUTEXES
+ struct ww_class *ww_class;
+#endif
+};
+
struct ww_acquire_ctx {
struct task_struct *task;
unsigned long stamp;
unsigned int acquired;
unsigned short wounded;
unsigned short is_wait_die;
-#ifdef CONFIG_DEBUG_MUTEXES
+#ifdef DEBUG_WW_MUTEXES
unsigned int done_acquire;
struct ww_class *ww_class;
- struct ww_mutex *contending_lock;
+ void *contending_lock;
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
@@ -74,9 +100,9 @@ struct ww_acquire_ctx {
static inline void ww_mutex_init(struct ww_mutex *lock,
struct ww_class *ww_class)
{
- __mutex_init(&lock->base, ww_class->mutex_name, &ww_class->mutex_key);
+ ww_mutex_base_init(&lock->base, ww_class->mutex_name, &ww_class->mutex_key);
lock->ctx = NULL;
-#ifdef CONFIG_DEBUG_MUTEXES
+#ifdef DEBUG_WW_MUTEXES
lock->ww_class = ww_class;
#endif
}
@@ -113,7 +139,7 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx,
ctx->acquired = 0;
ctx->wounded = false;
ctx->is_wait_die = ww_class->is_wait_die;
-#ifdef CONFIG_DEBUG_MUTEXES
+#ifdef DEBUG_WW_MUTEXES
ctx->ww_class = ww_class;
ctx->done_acquire = 0;
ctx->contending_lock = NULL;
@@ -143,7 +169,7 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx,
*/
static inline void ww_acquire_done(struct ww_acquire_ctx *ctx)
{
-#ifdef CONFIG_DEBUG_MUTEXES
+#ifdef DEBUG_WW_MUTEXES
lockdep_assert_held(ctx);
DEBUG_LOCKS_WARN_ON(ctx->done_acquire);
@@ -163,7 +189,7 @@ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx)
#ifdef CONFIG_DEBUG_LOCK_ALLOC
mutex_release(&ctx->dep_map, _THIS_IP_);
#endif
-#ifdef CONFIG_DEBUG_MUTEXES
+#ifdef DEBUG_WW_MUTEXES
DEBUG_LOCKS_WARN_ON(ctx->acquired);
if (!IS_ENABLED(CONFIG_PROVE_LOCKING))
/*
@@ -269,7 +295,7 @@ static inline void
ww_mutex_lock_slow(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
int ret;
-#ifdef CONFIG_DEBUG_MUTEXES
+#ifdef DEBUG_WW_MUTEXES
DEBUG_LOCKS_WARN_ON(!ctx->contending_lock);
#endif
ret = ww_mutex_lock(lock, ctx);
@@ -305,7 +331,7 @@ static inline int __must_check
ww_mutex_lock_slow_interruptible(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
-#ifdef CONFIG_DEBUG_MUTEXES
+#ifdef DEBUG_WW_MUTEXES
DEBUG_LOCKS_WARN_ON(!ctx->contending_lock);
#endif
return ww_mutex_lock_interruptible(lock, ctx);
@@ -322,7 +348,7 @@ extern void ww_mutex_unlock(struct ww_mutex *lock);
*/
static inline int __must_check ww_mutex_trylock(struct ww_mutex *lock)
{
- return mutex_trylock(&lock->base);
+ return ww_mutex_base_trylock(&lock->base);
}
/***
@@ -335,7 +361,9 @@ static inline int __must_check ww_mutex_trylock(struct ww_mutex *lock)
*/
static inline void ww_mutex_destroy(struct ww_mutex *lock)
{
+#ifndef CONFIG_PREEMPT_RT
mutex_destroy(&lock->base);
+#endif
}
/**
@@ -346,7 +374,7 @@ static inline void ww_mutex_destroy(struct ww_mutex *lock)
*/
static inline bool ww_mutex_is_locked(struct ww_mutex *lock)
{
- return mutex_is_locked(&lock->base);
+ return ww_mutex_base_is_locked(&lock->base);
}
#endif
diff --git a/include/net/Space.h b/include/net/Space.h
index 9cce0d80d37a..08ca9cef0213 100644
--- a/include/net/Space.h
+++ b/include/net/Space.h
@@ -8,23 +8,13 @@ struct net_device *ultra_probe(int unit);
struct net_device *wd_probe(int unit);
struct net_device *ne_probe(int unit);
struct net_device *fmv18x_probe(int unit);
-struct net_device *i82596_probe(int unit);
struct net_device *ni65_probe(int unit);
struct net_device *sonic_probe(int unit);
struct net_device *smc_init(int unit);
-struct net_device *atarilance_probe(int unit);
-struct net_device *sun3lance_probe(int unit);
-struct net_device *sun3_82586_probe(int unit);
-struct net_device *apne_probe(int unit);
struct net_device *cs89x0_probe(int unit);
-struct net_device *mvme147lance_probe(int unit);
struct net_device *tc515_probe(int unit);
struct net_device *lance_probe(int unit);
struct net_device *cops_probe(int unit);
-struct net_device *ltpc_probe(void);
/* Fibre Channel adapters */
int iph5526_probe(struct net_device *dev);
-
-/* SBNI adapters */
-int sbni_probe(int unit);
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 086b291e9530..f19f7f4a463c 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -58,6 +58,14 @@ struct tc_action {
#define TCA_ACT_HW_STATS_ANY (TCA_ACT_HW_STATS_IMMEDIATE | \
TCA_ACT_HW_STATS_DELAYED)
+/* Reserve 16 bits for user-space. See TCA_ACT_FLAGS_NO_PERCPU_STATS. */
+#define TCA_ACT_FLAGS_USER_BITS 16
+#define TCA_ACT_FLAGS_USER_MASK 0xffff
+#define TCA_ACT_FLAGS_POLICE (1U << TCA_ACT_FLAGS_USER_BITS)
+#define TCA_ACT_FLAGS_BIND (1U << (TCA_ACT_FLAGS_USER_BITS + 1))
+#define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2))
+#define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3))
+
/* Update lastuse only if needed, to avoid dirtying a cache line.
* We use a temp variable to avoid fetching jiffies twice.
*/
@@ -99,8 +107,8 @@ struct tc_action_ops {
void (*cleanup)(struct tc_action *);
int (*lookup)(struct net *net, struct tc_action **a, u32 index);
int (*init)(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **act, int ovr,
- int bind, bool rtnl_held, struct tcf_proto *tp,
+ struct nlattr *est, struct tc_action **act,
+ struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack);
int (*walk)(struct net *, struct sk_buff *,
struct netlink_callback *, int,
@@ -179,18 +187,16 @@ int tcf_action_destroy(struct tc_action *actions[], int bind);
int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
int nr_actions, struct tcf_result *res);
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
- struct nlattr *est, char *name, int ovr, int bind,
+ struct nlattr *est,
struct tc_action *actions[], int init_res[], size_t *attr_size,
- bool rtnl_held, struct netlink_ext_ack *extack);
-struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
+ u32 flags, struct netlink_ext_ack *extack);
+struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
bool rtnl_held,
struct netlink_ext_ack *extack);
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
- char *name, int ovr, int bind,
struct tc_action_ops *a_o, int *init_res,
- bool rtnl_held,
- struct netlink_ext_ack *extack);
+ u32 flags, struct netlink_ext_ack *extack);
int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
int ref, bool terse);
int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index f42fdddecd41..7d142e8a0550 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -70,6 +70,9 @@ struct unix_sock {
struct socket_wq peer_wq;
wait_queue_entry_t peer_wake;
struct scm_stat scm_stat;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ struct sk_buff *oob_skb;
+#endif
};
static inline struct unix_sock *unix_sk(const struct sock *sk)
@@ -82,6 +85,10 @@ static inline struct unix_sock *unix_sk(const struct sock *sk)
long unix_inq_len(struct sock *sk);
long unix_outq_len(struct sock *sk);
+int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
+ int flags);
+int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
+ int flags);
#ifdef CONFIG_SYSCTL
int unix_sysctl_register(struct net *net);
void unix_sysctl_unregister(struct net *net);
@@ -89,4 +96,16 @@ void unix_sysctl_unregister(struct net *net);
static inline int unix_sysctl_register(struct net *net) { return 0; }
static inline void unix_sysctl_unregister(struct net *net) {}
#endif
+
+#ifdef CONFIG_BPF_SYSCALL
+extern struct proto unix_dgram_proto;
+extern struct proto unix_stream_proto;
+
+int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
+int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
+void __init unix_bpf_build_proto(void);
+#else
+static inline void __init unix_bpf_build_proto(void)
+{}
+#endif
#endif
diff --git a/include/net/ax88796.h b/include/net/ax88796.h
index aa52b2e8ff7b..2ed23a368602 100644
--- a/include/net/ax88796.h
+++ b/include/net/ax88796.h
@@ -38,4 +38,7 @@ struct ax_plat_data {
int (*check_irq)(struct platform_device *pdev);
};
+/* exported from ax88796.c for xsurf100.c */
+extern void ax_NS8390_reinit(struct net_device *dev);
+
#endif /* __NET_AX88796_PLAT_H */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index db4312e44d47..a7360c8c72f8 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -221,6 +221,7 @@ struct oob_data {
struct adv_info {
struct list_head list;
+ bool enabled;
bool pending;
__u8 instance;
__u32 flags;
@@ -628,6 +629,7 @@ struct hci_conn {
__u8 init_addr_type;
bdaddr_t resp_addr;
__u8 resp_addr_type;
+ __u8 adv_instance;
__u16 handle;
__u16 state;
__u8 mode;
@@ -1223,14 +1225,25 @@ static inline void hci_set_drvdata(struct hci_dev *hdev, void *data)
dev_set_drvdata(&hdev->dev, data);
}
+static inline void *hci_get_priv(struct hci_dev *hdev)
+{
+ return (char *)hdev + sizeof(*hdev);
+}
+
struct hci_dev *hci_dev_get(int index);
struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, u8 src_type);
-struct hci_dev *hci_alloc_dev(void);
+struct hci_dev *hci_alloc_dev_priv(int sizeof_priv);
+
+static inline struct hci_dev *hci_alloc_dev(void)
+{
+ return hci_alloc_dev_priv(0);
+}
+
void hci_free_dev(struct hci_dev *hdev);
int hci_register_dev(struct hci_dev *hdev);
void hci_unregister_dev(struct hci_dev *hdev);
-void hci_cleanup_dev(struct hci_dev *hdev);
+void hci_release_dev(struct hci_dev *hdev);
int hci_suspend_dev(struct hci_dev *hdev);
int hci_resume_dev(struct hci_dev *hdev);
int hci_reset_dev(struct hci_dev *hdev);
@@ -1412,6 +1425,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
!hci_dev_test_flag(dev, HCI_AUTO_OFF))
#define bredr_sc_enabled(dev) (lmp_sc_capable(dev) && \
hci_dev_test_flag(dev, HCI_SC_ENABLED))
+#define rpa_valid(dev) (bacmp(&dev->rpa, BDADDR_ANY) && \
+ !hci_dev_test_flag(dev, HCI_RPA_EXPIRED))
+#define adv_rpa_valid(adv) (bacmp(&adv->random_addr, BDADDR_ANY) && \
+ !adv->rpa_expired)
#define scan_1m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_1M) || \
((dev)->le_rx_def_phys & HCI_LE_SET_PHY_1M))
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index c8696a230b7d..38785d48baff 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -303,6 +303,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,
int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
struct slave *slave);
int bond_3ad_set_carrier(struct bonding *bond);
+void bond_3ad_update_lacp_active(struct bonding *bond);
void bond_3ad_update_lacp_rate(struct bonding *bond);
void bond_3ad_update_ad_actor_settings(struct bonding *bond);
int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats);
diff --git a/include/net/bond_options.h b/include/net/bond_options.h
index 9d382f2f0bc5..e64833a674eb 100644
--- a/include/net/bond_options.h
+++ b/include/net/bond_options.h
@@ -64,6 +64,7 @@ enum {
BOND_OPT_AD_USER_PORT_KEY,
BOND_OPT_NUM_PEER_NOTIF_ALIAS,
BOND_OPT_PEER_NOTIF_DELAY,
+ BOND_OPT_LACP_ACTIVE,
BOND_OPT_LAST
};
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 625d9c72dee3..15e083e18f75 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -129,6 +129,7 @@ struct bond_params {
int updelay;
int downdelay;
int peer_notif_delay;
+ int lacp_active;
int lacp_fast;
unsigned int min_links;
int ad_select;
@@ -149,11 +150,6 @@ struct bond_params {
u8 ad_actor_system[ETH_ALEN + 2];
};
-struct bond_parm_tbl {
- char *modename;
- int mode;
-};
-
struct slave {
struct net_device *dev; /* first - useful for panic debug */
struct bonding *bond; /* our master */
@@ -258,6 +254,7 @@ struct bonding {
/* protecting ipsec_list */
spinlock_t ipsec_lock;
#endif /* CONFIG_XFRM_OFFLOAD */
+ struct bpf_prog *xdp_prog;
};
#define bond_slave_get_rcu(dev) \
@@ -753,13 +750,6 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip)
/* exported from bond_main.c */
extern unsigned int bond_net_id;
-extern const struct bond_parm_tbl bond_lacp_tbl[];
-extern const struct bond_parm_tbl xmit_hashtype_tbl[];
-extern const struct bond_parm_tbl arp_validate_tbl[];
-extern const struct bond_parm_tbl arp_all_targets_tbl[];
-extern const struct bond_parm_tbl fail_over_mac_tbl[];
-extern const struct bond_parm_tbl pri_reselect_tbl[];
-extern struct bond_parm_tbl ad_select_tbl[];
/* exported from bond_netlink.c */
extern struct rtnl_link_ops bond_link_ops;
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 161cdf7df1a0..62dd8422e0dc 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1253,6 +1253,27 @@ struct cfg80211_csa_settings {
};
/**
+ * struct cfg80211_color_change_settings - color change settings
+ *
+ * Used for bss color change
+ *
+ * @beacon_color_change: beacon data while performing the color countdown
+ * @counter_offsets_beacon: offsets of the counters within the beacon (tail)
+ * @counter_offsets_presp: offsets of the counters within the probe response
+ * @beacon_next: beacon data to be used after the color change
+ * @count: number of beacons until the color change
+ * @color: the color used after the change
+ */
+struct cfg80211_color_change_settings {
+ struct cfg80211_beacon_data beacon_color_change;
+ u16 counter_offset_beacon;
+ u16 counter_offset_presp;
+ struct cfg80211_beacon_data beacon_next;
+ u8 count;
+ u8 color;
+};
+
+/**
* struct iface_combination_params - input parameters for interface combinations
*
* Used to pass interface combination parameters
@@ -3995,6 +4016,8 @@ struct mgmt_frame_regs {
* given TIDs. This callback may sleep.
*
* @set_sar_specs: Update the SAR (TX power) settings.
+ *
+ * @color_change: Initiate a color change.
*/
struct cfg80211_ops {
int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -4322,6 +4345,9 @@ struct cfg80211_ops {
const u8 *peer, u8 tids);
int (*set_sar_specs)(struct wiphy *wiphy,
struct cfg80211_sar_specs *sar);
+ int (*color_change)(struct wiphy *wiphy,
+ struct net_device *dev,
+ struct cfg80211_color_change_settings *params);
};
/*
@@ -8218,4 +8244,70 @@ void cfg80211_update_owe_info_event(struct net_device *netdev,
*/
void cfg80211_bss_flush(struct wiphy *wiphy);
+/**
+ * cfg80211_bss_color_notify - notify about bss color event
+ * @dev: network device
+ * @gfp: allocation flags
+ * @cmd: the actual event we want to notify
+ * @count: the number of TBTTs until the color change happens
+ * @color_bitmap: representations of the colors that the local BSS is aware of
+ */
+int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp,
+ enum nl80211_commands cmd, u8 count,
+ u64 color_bitmap);
+
+/**
+ * cfg80211_obss_color_collision_notify - notify about bss color collision
+ * @dev: network device
+ * @color_bitmap: representations of the colors that the local BSS is aware of
+ */
+static inline int cfg80211_obss_color_collision_notify(struct net_device *dev,
+ u64 color_bitmap)
+{
+ return cfg80211_bss_color_notify(dev, GFP_KERNEL,
+ NL80211_CMD_OBSS_COLOR_COLLISION,
+ 0, color_bitmap);
+}
+
+/**
+ * cfg80211_color_change_started_notify - notify color change start
+ * @dev: the device on which the color is switched
+ * @count: the number of TBTTs until the color change happens
+ *
+ * Inform the userspace about the color change that has started.
+ */
+static inline int cfg80211_color_change_started_notify(struct net_device *dev,
+ u8 count)
+{
+ return cfg80211_bss_color_notify(dev, GFP_KERNEL,
+ NL80211_CMD_COLOR_CHANGE_STARTED,
+ count, 0);
+}
+
+/**
+ * cfg80211_color_change_aborted_notify - notify color change abort
+ * @dev: the device on which the color is switched
+ *
+ * Inform the userspace about the color change that has aborted.
+ */
+static inline int cfg80211_color_change_aborted_notify(struct net_device *dev)
+{
+ return cfg80211_bss_color_notify(dev, GFP_KERNEL,
+ NL80211_CMD_COLOR_CHANGE_ABORTED,
+ 0, 0);
+}
+
+/**
+ * cfg80211_color_change_notify - notify color change completion
+ * @dev: the device on which the color was switched
+ *
+ * Inform the userspace about the color change that has completed.
+ */
+static inline int cfg80211_color_change_notify(struct net_device *dev)
+{
+ return cfg80211_bss_color_notify(dev, GFP_KERNEL,
+ NL80211_CMD_COLOR_CHANGE_COMPLETED,
+ 0, 0);
+}
+
#endif /* __NET_CFG80211_H */
diff --git a/include/net/compat.h b/include/net/compat.h
index 84805bdc4435..595fee069b82 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -71,13 +71,26 @@ struct compat_group_source_req {
} __packed;
struct compat_group_filter {
- __u32 gf_interface;
- struct __kernel_sockaddr_storage gf_group
- __aligned(4);
- __u32 gf_fmode;
- __u32 gf_numsrc;
- struct __kernel_sockaddr_storage gf_slist[1]
- __aligned(4);
+ union {
+ struct {
+ __u32 gf_interface_aux;
+ struct __kernel_sockaddr_storage gf_group_aux
+ __aligned(4);
+ __u32 gf_fmode_aux;
+ __u32 gf_numsrc_aux;
+ struct __kernel_sockaddr_storage gf_slist[1]
+ __aligned(4);
+ } __packed;
+ struct {
+ __u32 gf_interface;
+ struct __kernel_sockaddr_storage gf_group
+ __aligned(4);
+ __u32 gf_fmode;
+ __u32 gf_numsrc;
+ struct __kernel_sockaddr_storage gf_slist_flex[]
+ __aligned(4);
+ } __packed;
+ };
} __packed;
#endif /* NET_COMPAT_H */
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 57b738b78073..154cf0dbca37 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -32,7 +32,7 @@ struct devlink_dev_stats {
struct devlink_ops;
struct devlink {
- struct list_head list;
+ u32 index;
struct list_head port_list;
struct list_head rate_list;
struct list_head sb_list;
@@ -55,8 +55,9 @@ struct devlink {
* port, sb, dpipe, resource, params, region, traps and more.
*/
u8 reload_failed:1,
- reload_enabled:1,
- registered:1;
+ reload_enabled:1;
+ refcount_t refcount;
+ struct completion comp;
char priv[0] __aligned(NETDEV_ALIGN);
};
@@ -158,7 +159,6 @@ struct devlink_port {
struct list_head region_list;
struct devlink *devlink;
unsigned int index;
- bool registered;
spinlock_t type_lock; /* Protects type and type_dev
* pointer consistency.
*/
@@ -521,6 +521,9 @@ enum devlink_param_generic_id {
DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE,
DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
/* add new param generic ids above here*/
__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -561,6 +564,15 @@ enum devlink_param_generic_id {
#define DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME "enable_remote_dev_reset"
#define DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE DEVLINK_PARAM_TYPE_BOOL
+#define DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME "enable_eth"
+#define DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE DEVLINK_PARAM_TYPE_BOOL
+
+#define DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME "enable_rdma"
+#define DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE DEVLINK_PARAM_TYPE_BOOL
+
+#define DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME "enable_vnet"
+#define DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE DEVLINK_PARAM_TYPE_BOOL
+
#define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \
{ \
.id = DEVLINK_PARAM_GENERIC_ID_##_id, \
@@ -1398,8 +1410,8 @@ struct devlink_ops {
*
* Note: @extack can be NULL when port notifier queries the port function.
*/
- int (*port_function_hw_addr_get)(struct devlink *devlink, struct devlink_port *port,
- u8 *hw_addr, int *hw_addr_len,
+ int (*port_function_hw_addr_get)(struct devlink_port *port, u8 *hw_addr,
+ int *hw_addr_len,
struct netlink_ext_ack *extack);
/**
* @port_function_hw_addr_set: Port function's hardware address set function.
@@ -1408,7 +1420,7 @@ struct devlink_ops {
* by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port
* function handling for a particular port.
*/
- int (*port_function_hw_addr_set)(struct devlink *devlink, struct devlink_port *port,
+ int (*port_function_hw_addr_set)(struct devlink_port *port,
const u8 *hw_addr, int hw_addr_len,
struct netlink_ext_ack *extack);
/**
@@ -1464,8 +1476,7 @@ struct devlink_ops {
*
* Return: 0 on success, negative value otherwise.
*/
- int (*port_fn_state_get)(struct devlink *devlink,
- struct devlink_port *port,
+ int (*port_fn_state_get)(struct devlink_port *port,
enum devlink_port_fn_state *state,
enum devlink_port_fn_opstate *opstate,
struct netlink_ext_ack *extack);
@@ -1480,8 +1491,7 @@ struct devlink_ops {
*
* Return: 0 on success, negative value otherwise.
*/
- int (*port_fn_state_set)(struct devlink *devlink,
- struct devlink_port *port,
+ int (*port_fn_state_set)(struct devlink_port *port,
enum devlink_port_fn_state state,
struct netlink_ext_ack *extack);
@@ -1542,9 +1552,21 @@ static inline struct devlink *netdev_to_devlink(struct net_device *dev)
struct ib_device;
struct net *devlink_net(const struct devlink *devlink);
-void devlink_net_set(struct devlink *devlink, struct net *net);
-struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size);
-int devlink_register(struct devlink *devlink, struct device *dev);
+/* This call is intended for software devices that can create
+ * devlink instances in other namespaces than init_net.
+ *
+ * Drivers that operate on real HW must use devlink_alloc() instead.
+ */
+struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
+ size_t priv_size, struct net *net,
+ struct device *dev);
+static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
+ size_t priv_size,
+ struct device *dev)
+{
+ return devlink_alloc_ns(ops, priv_size, &init_net, dev);
+}
+int devlink_register(struct devlink *devlink);
void devlink_unregister(struct devlink *devlink);
void devlink_reload_enable(struct devlink *devlink);
void devlink_reload_disable(struct devlink *devlink);
@@ -1625,8 +1647,16 @@ int devlink_params_register(struct devlink *devlink,
void devlink_params_unregister(struct devlink *devlink,
const struct devlink_param *params,
size_t params_count);
+int devlink_param_register(struct devlink *devlink,
+ const struct devlink_param *param);
+void devlink_param_unregister(struct devlink *devlink,
+ const struct devlink_param *param);
void devlink_params_publish(struct devlink *devlink);
void devlink_params_unpublish(struct devlink *devlink);
+void devlink_param_publish(struct devlink *devlink,
+ const struct devlink_param *param);
+void devlink_param_unpublish(struct devlink *devlink,
+ const struct devlink_param *param);
int devlink_port_params_register(struct devlink_port *devlink_port,
const struct devlink_param *params,
size_t params_count);
diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h
index ccc6e9df178b..ddd6565957b3 100644
--- a/include/net/dn_fib.h
+++ b/include/net/dn_fib.h
@@ -29,7 +29,7 @@ struct dn_fib_nh {
struct dn_fib_info {
struct dn_fib_info *fib_next;
struct dn_fib_info *fib_prev;
- int fib_treeref;
+ refcount_t fib_treeref;
refcount_t fib_clntref;
int fib_dead;
unsigned int fib_flags;
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 33f40c1ec379..f9a17145255a 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -79,20 +79,13 @@ enum dsa_tag_protocol {
DSA_TAG_PROTO_SJA1110 = DSA_TAG_PROTO_SJA1110_VALUE,
};
-struct packet_type;
struct dsa_switch;
struct dsa_device_ops {
struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
- struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt);
+ struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
int *offset);
- /* Used to determine which traffic should match the DSA filter in
- * eth_type_trans, and which, if any, should bypass it and be processed
- * as regular on the master net device.
- */
- bool (*filter)(const struct sk_buff *skb, struct net_device *dev);
unsigned int needed_headroom;
unsigned int needed_tailroom;
const char *name;
@@ -111,8 +104,8 @@ struct dsa_device_ops {
* function pointers.
*/
struct dsa_netdevice_ops {
- int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr,
- int cmd);
+ int (*ndo_eth_ioctl)(struct net_device *dev, struct ifreq *ifr,
+ int cmd);
};
#define DSA_TAG_DRIVER_ALIAS "dsa_tag-"
@@ -159,6 +152,9 @@ struct dsa_switch_tree {
*/
struct net_device **lags;
unsigned int lags_len;
+
+ /* Track the largest switch index within a tree */
+ unsigned int last_switch;
};
#define dsa_lags_foreach_id(_id, _dst) \
@@ -238,9 +234,7 @@ struct dsa_port {
/* Copies for faster access in master receive hot path */
struct dsa_switch_tree *dst;
- struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt);
- bool (*filter)(const struct sk_buff *skb, struct net_device *dev);
+ struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
enum {
DSA_PORT_TYPE_UNUSED = 0,
@@ -257,8 +251,11 @@ struct dsa_port {
struct device_node *dn;
unsigned int ageing_time;
bool vlan_filtering;
+ /* Managed by DSA on user ports and by drivers on CPU and DSA ports */
+ bool learning;
u8 stp_state;
struct net_device *bridge_dev;
+ int bridge_num;
struct devlink_port devlink_port;
bool devlink_port_setup;
struct phylink *pl;
@@ -352,6 +349,9 @@ struct dsa_switch {
unsigned int ageing_time_min;
unsigned int ageing_time_max;
+ /* Storage for drivers using tag_8021q */
+ struct dsa_8021q_context *tag_8021q_ctx;
+
/* devlink used to represent this switch device */
struct devlink *devlink;
@@ -363,6 +363,9 @@ struct dsa_switch {
*/
bool vlan_filtering_is_global;
+ /* Keep VLAN filtering enabled on ports not offloading any upper. */
+ bool needs_standalone_vlan_filtering;
+
/* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges
* that have vlan_filtering=0. All drivers should ideally set this (and
* then the option would get removed), but it is unknown whether this
@@ -407,6 +410,13 @@ struct dsa_switch {
*/
unsigned int num_lag_ids;
+ /* Drivers that support bridge forwarding offload should set this to
+ * the maximum number of bridges spanning the same switch tree (or all
+ * trees, in the case of cross-tree bridging support) that can be
+ * offloaded.
+ */
+ unsigned int num_fwd_offloading_bridges;
+
size_t num_ports;
};
@@ -690,6 +700,14 @@ struct dsa_switch_ops {
struct net_device *bridge);
void (*port_bridge_leave)(struct dsa_switch *ds, int port,
struct net_device *bridge);
+ /* Called right after .port_bridge_join() */
+ int (*port_bridge_tx_fwd_offload)(struct dsa_switch *ds, int port,
+ struct net_device *bridge,
+ int bridge_num);
+ /* Called right before .port_bridge_leave() */
+ void (*port_bridge_tx_fwd_unoffload)(struct dsa_switch *ds, int port,
+ struct net_device *bridge,
+ int bridge_num);
void (*port_stp_state_set)(struct dsa_switch *ds, int port,
u8 state);
void (*port_fast_age)(struct dsa_switch *ds, int port);
@@ -699,8 +717,6 @@ struct dsa_switch_ops {
int (*port_bridge_flags)(struct dsa_switch *ds, int port,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack);
- int (*port_set_mrouter)(struct dsa_switch *ds, int port, bool mrouter,
- struct netlink_ext_ack *extack);
/*
* VLAN support
@@ -869,6 +885,13 @@ struct dsa_switch_ops {
const struct switchdev_obj_ring_role_mrp *mrp);
int (*port_mrp_del_ring_role)(struct dsa_switch *ds, int port,
const struct switchdev_obj_ring_role_mrp *mrp);
+
+ /*
+ * tag_8021q operations
+ */
+ int (*tag_8021q_vlan_add)(struct dsa_switch *ds, int port, u16 vid,
+ u16 flags);
+ int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid);
};
#define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \
@@ -954,15 +977,6 @@ static inline bool netdev_uses_dsa(const struct net_device *dev)
return false;
}
-static inline bool dsa_can_decode(const struct sk_buff *skb,
- struct net_device *dev)
-{
-#if IS_ENABLED(CONFIG_NET_DSA)
- return !dev->dsa_ptr->filter || dev->dsa_ptr->filter(skb, dev);
-#endif
- return false;
-}
-
/* All DSA tags that push the EtherType to the right (basically all except tail
* tags, which don't break dissection) can be treated the same from the
* perspective of the flow dissector.
@@ -1003,8 +1017,8 @@ static inline int __dsa_netdevice_ops_check(struct net_device *dev)
return 0;
}
-static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr,
- int cmd)
+static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr,
+ int cmd)
{
const struct dsa_netdevice_ops *ops;
int err;
@@ -1015,11 +1029,11 @@ static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr,
ops = dev->dsa_ptr->netdev_ops;
- return ops->ndo_do_ioctl(dev, ifr, cmd);
+ return ops->ndo_eth_ioctl(dev, ifr, cmd);
}
#else
-static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr,
- int cmd)
+static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr,
+ int cmd)
{
return -EOPNOTSUPP;
}
diff --git a/include/net/dst.h b/include/net/dst.h
index 75b1e734e9c2..a057319aabef 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -277,6 +277,7 @@ static inline void skb_dst_drop(struct sk_buff *skb)
static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst)
{
+ nskb->slow_gro |= !!refdst;
nskb->_skb_refdst = refdst;
if (!(nskb->_skb_refdst & SKB_DST_NOREF))
dst_clone(skb_dst(nskb));
@@ -316,6 +317,7 @@ static inline bool skb_dst_force(struct sk_buff *skb)
dst = NULL;
skb->_skb_refdst = (unsigned long)dst;
+ skb->slow_gro |= !!dst;
}
return skb->_skb_refdst != 0UL;
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index f3c2841566a0..3961461d9c8b 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -319,14 +319,12 @@ flow_action_mixed_hw_stats_check(const struct flow_action *action,
if (flow_offload_has_one_action(action))
return true;
- if (action) {
- flow_action_for_each(i, action_entry, action) {
- if (i && action_entry->hw_stats != last_hw_stats) {
- NL_SET_ERR_MSG_MOD(extack, "Mixing HW stats types for actions is not supported");
- return false;
- }
- last_hw_stats = action_entry->hw_stats;
+ flow_action_for_each(i, action_entry, action) {
+ if (i && action_entry->hw_stats != last_hw_stats) {
+ NL_SET_ERR_MSG_MOD(extack, "Mixing HW stats types for actions is not supported");
+ return false;
}
+ last_hw_stats = action_entry->hw_stats;
}
return true;
}
@@ -453,6 +451,7 @@ struct flow_block_offload {
struct list_head *driver_block_list;
struct netlink_ext_ack *extack;
struct Qdisc *sch;
+ struct list_head *cb_list_head;
};
enum tc_setup_type;
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index c0854933e24f..11630351c978 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -43,6 +43,11 @@ struct ieee80211_radiotap_header {
* @it_present: (first) present word
*/
__le32 it_present;
+
+ /**
+ * @it_optional: all remaining presence bitmaps
+ */
+ __le32 it_optional[];
} __packed;
/* version is always 0 */
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 71bb4cc4d05d..653e7d0f65cb 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -82,9 +82,6 @@ struct ip6_sf_socklist {
struct in6_addr sl_addr[];
};
-#define IP6_SFLSIZE(count) (sizeof(struct ip6_sf_socklist) + \
- (count) * sizeof(struct in6_addr))
-
#define IP6_SFBLOCK 10 /* allocate this many at once */
struct ipv6_mc_socklist {
@@ -213,6 +210,8 @@ struct inet6_dev {
unsigned long tstamp; /* ipv6InterfaceTable update timestamp */
struct rcu_head rcu;
+
+ unsigned int ra_mtu;
};
static inline void ipv6_eth_mc_map(const struct in6_addr *addr, char *buf)
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index ca6a3ea9057e..f72ec113ae56 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -160,6 +160,12 @@ struct inet_hashinfo {
____cacheline_aligned_in_smp;
};
+#define inet_lhash2_for_each_icsk_continue(__icsk) \
+ hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node)
+
+#define inet_lhash2_for_each_icsk(__icsk, list) \
+ hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node)
+
#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \
hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node)
diff --git a/include/net/ioam6.h b/include/net/ioam6.h
new file mode 100644
index 000000000000..3c2993bc48c8
--- /dev/null
+++ b/include/net/ioam6.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * IPv6 IOAM implementation
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#ifndef _NET_IOAM6_H
+#define _NET_IOAM6_H
+
+#include <linux/net.h>
+#include <linux/ipv6.h>
+#include <linux/ioam6.h>
+#include <linux/rhashtable-types.h>
+
+struct ioam6_namespace {
+ struct rhash_head head;
+ struct rcu_head rcu;
+
+ struct ioam6_schema __rcu *schema;
+
+ __be16 id;
+ __be32 data;
+ __be64 data_wide;
+};
+
+struct ioam6_schema {
+ struct rhash_head head;
+ struct rcu_head rcu;
+
+ struct ioam6_namespace __rcu *ns;
+
+ u32 id;
+ int len;
+ __be32 hdr;
+
+ u8 data[0];
+};
+
+struct ioam6_pernet_data {
+ struct mutex lock;
+ struct rhashtable namespaces;
+ struct rhashtable schemas;
+};
+
+static inline struct ioam6_pernet_data *ioam6_pernet(struct net *net)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ return net->ipv6.ioam6_data;
+#else
+ return NULL;
+#endif
+}
+
+struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id);
+void ioam6_fill_trace_data(struct sk_buff *skb,
+ struct ioam6_namespace *ns,
+ struct ioam6_trace_hdr *trace);
+
+int ioam6_init(void);
+void ioam6_exit(void);
+
+int ioam6_iptunnel_init(void);
+void ioam6_iptunnel_exit(void);
+
+#endif /* _NET_IOAM6_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index d9683bef8684..9192444f2964 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -436,18 +436,32 @@ static inline bool ip_sk_ignore_df(const struct sock *sk)
static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
bool forwarding)
{
+ const struct rtable *rt = container_of(dst, struct rtable, dst);
struct net *net = dev_net(dst->dev);
unsigned int mtu;
if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
ip_mtu_locked(dst) ||
- !forwarding)
- return dst_mtu(dst);
+ !forwarding) {
+ mtu = rt->rt_pmtu;
+ if (mtu && time_before(jiffies, rt->dst.expires))
+ goto out;
+ }
/* 'forwarding = true' case should always honour route mtu */
mtu = dst_metric_raw(dst, RTAX_MTU);
- if (!mtu)
- mtu = min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU);
+ if (mtu)
+ goto out;
+
+ mtu = READ_ONCE(dst->dev->mtu);
+
+ if (unlikely(ip_mtu_locked(dst))) {
+ if (rt->rt_uses_gateway && mtu > 576)
+ mtu = 576;
+ }
+
+out:
+ mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 15b7fbe6b15c..c412dde4d67d 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -267,7 +267,7 @@ static inline bool fib6_check_expired(const struct fib6_info *f6i)
return false;
}
-/* Function to safely get fn->sernum for passed in rt
+/* Function to safely get fn->fn_sernum for passed in rt
* and store result in passed in cookie.
* Return true if we can get cookie safely
* Return false if not
@@ -282,7 +282,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i,
if (fn) {
*cookie = fn->fn_sernum;
- /* pairs with smp_wmb() in fib6_update_sernum_upto_root() */
+ /* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */
smp_rmb();
status = true;
}
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 0bf09a9bca4e..5efd0b71dc67 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -316,12 +316,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
!lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
}
-static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
+ bool forwarding)
{
struct inet6_dev *idev;
unsigned int mtu;
- if (dst_metric_locked(dst, RTAX_MTU)) {
+ if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
goto out;
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 3ab2563b1a23..21c5386d4a6d 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -133,7 +133,7 @@ struct fib_info {
struct hlist_node fib_lhash;
struct list_head nh_list;
struct net *fib_net;
- int fib_treeref;
+ refcount_t fib_treeref;
refcount_t fib_clntref;
unsigned int fib_flags;
unsigned char fib_dead;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 548b65bd3973..bc3b13ec93c9 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -270,7 +270,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const u8 proto, int tunnel_hlen);
int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
-int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd);
int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
diff --git a/include/net/ipx.h b/include/net/ipx.h
deleted file mode 100644
index 9d1342807b59..000000000000
--- a/include/net/ipx.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NET_INET_IPX_H_
-#define _NET_INET_IPX_H_
-/*
- * The following information is in its entirety obtained from:
- *
- * Novell 'IPX Router Specification' Version 1.10
- * Part No. 107-000029-001
- *
- * Which is available from ftp.novell.com
- */
-
-#include <linux/netdevice.h>
-#include <net/datalink.h>
-#include <linux/ipx.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/refcount.h>
-
-struct ipx_address {
- __be32 net;
- __u8 node[IPX_NODE_LEN];
- __be16 sock;
-};
-
-#define ipx_broadcast_node "\377\377\377\377\377\377"
-#define ipx_this_node "\0\0\0\0\0\0"
-
-#define IPX_MAX_PPROP_HOPS 8
-
-struct ipxhdr {
- __be16 ipx_checksum __packed;
-#define IPX_NO_CHECKSUM cpu_to_be16(0xFFFF)
- __be16 ipx_pktsize __packed;
- __u8 ipx_tctrl;
- __u8 ipx_type;
-#define IPX_TYPE_UNKNOWN 0x00
-#define IPX_TYPE_RIP 0x01 /* may also be 0 */
-#define IPX_TYPE_SAP 0x04 /* may also be 0 */
-#define IPX_TYPE_SPX 0x05 /* SPX protocol */
-#define IPX_TYPE_NCP 0x11 /* $lots for docs on this (SPIT) */
-#define IPX_TYPE_PPROP 0x14 /* complicated flood fill brdcast */
- struct ipx_address ipx_dest __packed;
- struct ipx_address ipx_source __packed;
-};
-
-/* From af_ipx.c */
-extern int sysctl_ipx_pprop_broadcasting;
-
-struct ipx_interface {
- /* IPX address */
- __be32 if_netnum;
- unsigned char if_node[IPX_NODE_LEN];
- refcount_t refcnt;
-
- /* physical device info */
- struct net_device *if_dev;
- struct datalink_proto *if_dlink;
- __be16 if_dlink_type;
-
- /* socket support */
- unsigned short if_sknum;
- struct hlist_head if_sklist;
- spinlock_t if_sklist_lock;
-
- /* administrative overhead */
- int if_ipx_offset;
- unsigned char if_internal;
- unsigned char if_primary;
-
- struct list_head node; /* node in ipx_interfaces list */
-};
-
-struct ipx_route {
- __be32 ir_net;
- struct ipx_interface *ir_intrfc;
- unsigned char ir_routed;
- unsigned char ir_router_node[IPX_NODE_LEN];
- struct list_head node; /* node in ipx_routes list */
- refcount_t refcnt;
-};
-
-struct ipx_cb {
- u8 ipx_tctrl;
- __be32 ipx_dest_net;
- __be32 ipx_source_net;
- struct {
- __be32 netnum;
- int index;
- } last_hop;
-};
-
-#include <net/sock.h>
-
-struct ipx_sock {
- /* struct sock has to be the first member of ipx_sock */
- struct sock sk;
- struct ipx_address dest_addr;
- struct ipx_interface *intrfc;
- __be16 port;
-#ifdef CONFIG_IPX_INTERN
- unsigned char node[IPX_NODE_LEN];
-#endif
- unsigned short type;
- /*
- * To handle special ncp connection-handling sockets for mars_nwe,
- * the connection number must be stored in the socket.
- */
- unsigned short ipx_ncp_conn;
-};
-
-static inline struct ipx_sock *ipx_sk(struct sock *sk)
-{
- return (struct ipx_sock *)sk;
-}
-
-#define IPX_SKB_CB(__skb) ((struct ipx_cb *)&((__skb)->cb[0]))
-
-#define IPX_MIN_EPHEMERAL_SOCKET 0x4000
-#define IPX_MAX_EPHEMERAL_SOCKET 0x7fff
-
-extern struct list_head ipx_routes;
-extern rwlock_t ipx_routes_lock;
-
-extern struct list_head ipx_interfaces;
-struct ipx_interface *ipx_interfaces_head(void);
-extern spinlock_t ipx_interfaces_lock;
-
-extern struct ipx_interface *ipx_primary_net;
-
-int ipx_proc_init(void);
-void ipx_proc_exit(void);
-
-const char *ipx_frame_name(__be16);
-const char *ipx_device_name(struct ipx_interface *intrfc);
-
-static __inline__ void ipxitf_hold(struct ipx_interface *intrfc)
-{
- refcount_inc(&intrfc->refcnt);
-}
-
-void ipxitf_down(struct ipx_interface *intrfc);
-struct ipx_interface *ipxitf_find_using_net(__be32 net);
-int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb, char *node);
-__be16 ipx_cksum(struct ipxhdr *packet, int length);
-int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc,
- unsigned char *node);
-void ipxrtr_del_routes(struct ipx_interface *intrfc);
-int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
- struct msghdr *msg, size_t len, int noblock);
-int ipxrtr_route_skb(struct sk_buff *skb);
-struct ipx_route *ipxrtr_lookup(__be32 net);
-int ipxrtr_ioctl(unsigned int cmd, void __user *arg);
-
-static __inline__ void ipxitf_put(struct ipx_interface *intrfc)
-{
- if (refcount_dec_and_test(&intrfc->refcnt))
- ipxitf_down(intrfc);
-}
-
-static __inline__ void ipxrtr_hold(struct ipx_route *rt)
-{
- refcount_inc(&rt->refcnt);
-}
-
-static __inline__ void ipxrtr_put(struct ipx_route *rt)
-{
- if (refcount_dec_and_test(&rt->refcnt))
- kfree(rt);
-}
-#endif /* _NET_INET_IPX_H_ */
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 05cfd6ff6528..6f15e6fa154e 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -51,6 +51,9 @@ struct lwtunnel_encap_ops {
};
#ifdef CONFIG_LWTUNNEL
+
+DECLARE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
+
void lwtstate_free(struct lwtunnel_state *lws);
static inline struct lwtunnel_state *
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d8a1d09a2141..af0fc13cea34 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1711,6 +1711,10 @@ enum ieee80211_offload_flags {
* protected by fq->lock.
* @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see
* &enum ieee80211_offload_flags.
+ * @color_change_active: marks whether a color change is ongoing. Internally it is
+ * write-protected by sdata_lock and local->mtx so holding either is fine
+ * for read access.
+ * @color_change_color: the bss color that will be used after the change.
*/
struct ieee80211_vif {
enum nl80211_iftype type;
@@ -1739,6 +1743,9 @@ struct ieee80211_vif {
bool txqs_stopped[IEEE80211_NUM_ACS];
+ bool color_change_active;
+ u8 color_change_color;
+
/* must be last */
u8 drv_priv[] __aligned(sizeof(void *));
};
@@ -3919,6 +3926,13 @@ struct ieee80211_prep_tx_info {
* @set_sar_specs: Update the SAR (TX power) settings.
* @sta_set_decap_offload: Called to notify the driver when a station is allowed
* to use rx decapsulation offload
+ * @add_twt_setup: Update hw with TWT agreement parameters received from the peer.
+ * This callback allows the hw to check if requested parameters
+ * are supported and if there is enough room for a new agreement.
+ * The hw is expected to set agreement result in the req_type field of
+ * twt structure.
+ * @twt_teardown_request: Update the hw with TWT teardown request received
+ * from the peer.
*/
struct ieee80211_ops {
void (*tx)(struct ieee80211_hw *hw,
@@ -4242,6 +4256,11 @@ struct ieee80211_ops {
void (*sta_set_decap_offload)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta, bool enabled);
+ void (*add_twt_setup)(struct ieee80211_hw *hw,
+ struct ieee80211_sta *sta,
+ struct ieee80211_twt_setup *twt);
+ void (*twt_teardown_request)(struct ieee80211_hw *hw,
+ struct ieee80211_sta *sta, u8 flowid);
};
/**
@@ -5008,6 +5027,16 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif);
bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif);
/**
+ * ieee80211_color_change_finish - notify mac80211 about color change
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * After a color change announcement was scheduled and the counter in this
+ * announcement hits 1, this function must be called by the driver to
+ * notify mac80211 that the color can be changed
+ */
+void ieee80211_color_change_finish(struct ieee80211_vif *vif);
+
+/**
* ieee80211_proberesp_get - retrieve a Probe Response template
* @hw: pointer obtained from ieee80211_alloc_hw().
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
@@ -6772,6 +6801,18 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw,
struct ieee80211_vif *vif);
/**
+ * ieeee80211_obss_color_collision_notify - notify userland about a BSS color
+ * collision.
+ *
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @color_bitmap: a 64 bit bitmap representing the colors that the local BSS is
+ * aware of.
+ */
+void
+ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
+ u64 color_bitmap);
+
+/**
* ieee80211_is_tx_data - check if frame is a data frame
*
* The function is used to check if a frame is a data frame. Frames with
diff --git a/include/net/mctp.h b/include/net/mctp.h
new file mode 100644
index 000000000000..a824d47c3c6d
--- /dev/null
+++ b/include/net/mctp.h
@@ -0,0 +1,232 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Management Component Transport Protocol (MCTP)
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#ifndef __NET_MCTP_H
+#define __NET_MCTP_H
+
+#include <linux/bits.h>
+#include <linux/mctp.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+/* MCTP packet definitions */
+struct mctp_hdr {
+ u8 ver;
+ u8 dest;
+ u8 src;
+ u8 flags_seq_tag;
+};
+
+#define MCTP_VER_MIN 1
+#define MCTP_VER_MAX 1
+
+/* Definitions for flags_seq_tag field */
+#define MCTP_HDR_FLAG_SOM BIT(7)
+#define MCTP_HDR_FLAG_EOM BIT(6)
+#define MCTP_HDR_FLAG_TO BIT(3)
+#define MCTP_HDR_FLAGS GENMASK(5, 3)
+#define MCTP_HDR_SEQ_SHIFT 4
+#define MCTP_HDR_SEQ_MASK GENMASK(1, 0)
+#define MCTP_HDR_TAG_SHIFT 0
+#define MCTP_HDR_TAG_MASK GENMASK(2, 0)
+
+#define MCTP_HEADER_MAXLEN 4
+
+#define MCTP_INITIAL_DEFAULT_NET 1
+
+static inline bool mctp_address_ok(mctp_eid_t eid)
+{
+ return eid >= 8 && eid < 255;
+}
+
+static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb)
+{
+ return (struct mctp_hdr *)skb_network_header(skb);
+}
+
+/* socket implementation */
+struct mctp_sock {
+ struct sock sk;
+
+ /* bind() params */
+ int bind_net;
+ mctp_eid_t bind_addr;
+ __u8 bind_type;
+
+ /* list of mctp_sk_key, for incoming tag lookup. updates protected
+ * by sk->net->keys_lock
+ */
+ struct hlist_head keys;
+};
+
+/* Key for matching incoming packets to sockets or reassembly contexts.
+ * Packets are matched on (src,dest,tag).
+ *
+ * Lifetime requirements:
+ *
+ * - keys are free()ed via RCU
+ *
+ * - a mctp_sk_key contains a reference to a struct sock; this is valid
+ * for the life of the key. On sock destruction (through unhash), the key is
+ * removed from lists (see below), and will not be observable after a RCU
+ * grace period.
+ *
+ * any RX occurring within that grace period may still queue to the socket,
+ * but will hit the SOCK_DEAD case before the socket is freed.
+ *
+ * - these mctp_sk_keys appear on two lists:
+ * 1) the struct mctp_sock->keys list
+ * 2) the struct netns_mctp->keys list
+ *
+ * updates to either list are performed under the netns_mctp->keys
+ * lock.
+ *
+ * - a key may have a sk_buff attached as part of an in-progress message
+ * reassembly (->reasm_head). The reassembly context is protected by
+ * reasm_lock, which may be acquired with the keys lock (above) held, if
+ * necessary. Consequently, keys lock *cannot* be acquired with the
+ * reasm_lock held.
+ *
+ * - there are two destruction paths for a mctp_sk_key:
+ *
+ * - through socket unhash (see mctp_sk_unhash). This performs the list
+ * removal under keys_lock.
+ *
+ * - where a key is established to receive a reply message: after receiving
+ * the (complete) reply, or during reassembly errors. Here, we clean up
+ * the reassembly context (marking reasm_dead, to prevent another from
+ * starting), and remove the socket from the netns & socket lists.
+ */
+struct mctp_sk_key {
+ mctp_eid_t peer_addr;
+ mctp_eid_t local_addr;
+ __u8 tag; /* incoming tag match; invert TO for local */
+
+ /* we hold a ref to sk when set */
+ struct sock *sk;
+
+ /* routing lookup list */
+ struct hlist_node hlist;
+
+ /* per-socket list */
+ struct hlist_node sklist;
+
+ /* incoming fragment reassembly context */
+ spinlock_t reasm_lock;
+ struct sk_buff *reasm_head;
+ struct sk_buff **reasm_tailp;
+ bool reasm_dead;
+ u8 last_seq;
+
+ struct rcu_head rcu;
+};
+
+struct mctp_skb_cb {
+ unsigned int magic;
+ unsigned int net;
+ mctp_eid_t src;
+};
+
+/* skb control-block accessors with a little extra debugging for initial
+ * development.
+ *
+ * TODO: remove checks & mctp_skb_cb->magic; replace callers of __mctp_cb
+ * with mctp_cb().
+ *
+ * __mctp_cb() is only for the initial ingress code; we should see ->magic set
+ * at all times after this.
+ */
+static inline struct mctp_skb_cb *__mctp_cb(struct sk_buff *skb)
+{
+ struct mctp_skb_cb *cb = (void *)skb->cb;
+
+ cb->magic = 0x4d435450;
+ return cb;
+}
+
+static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb)
+{
+ struct mctp_skb_cb *cb = (void *)skb->cb;
+
+ WARN_ON(cb->magic != 0x4d435450);
+ return (void *)(skb->cb);
+}
+
+/* Route definition.
+ *
+ * These are held in the pernet->mctp.routes list, with RCU protection for
+ * removed routes. We hold a reference to the netdev; routes need to be
+ * dropped on NETDEV_UNREGISTER events.
+ *
+ * Updates to the route table are performed under rtnl; all reads under RCU,
+ * so routes cannot be referenced over a RCU grace period. Specifically: A
+ * caller cannot block between mctp_route_lookup and passing the route to
+ * mctp_do_route.
+ */
+struct mctp_route {
+ mctp_eid_t min, max;
+
+ struct mctp_dev *dev;
+ unsigned int mtu;
+ unsigned char type;
+ int (*output)(struct mctp_route *route,
+ struct sk_buff *skb);
+
+ struct list_head list;
+ refcount_t refs;
+ struct rcu_head rcu;
+};
+
+/* route interfaces */
+struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
+ mctp_eid_t daddr);
+
+int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb);
+
+int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+ struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
+
+/* routing <--> device interface */
+unsigned int mctp_default_net(struct net *net);
+int mctp_default_net_set(struct net *net, unsigned int index);
+int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr);
+int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr);
+void mctp_route_remove_dev(struct mctp_dev *mdev);
+
+/* neighbour definitions */
+enum mctp_neigh_source {
+ MCTP_NEIGH_STATIC,
+ MCTP_NEIGH_DISCOVER,
+};
+
+struct mctp_neigh {
+ struct mctp_dev *dev;
+ mctp_eid_t eid;
+ enum mctp_neigh_source source;
+
+ unsigned char ha[MAX_ADDR_LEN];
+
+ struct list_head list;
+ struct rcu_head rcu;
+};
+
+int mctp_neigh_init(void);
+void mctp_neigh_exit(void);
+
+// ret_hwaddr may be NULL, otherwise must have space for MAX_ADDR_LEN
+int mctp_neigh_lookup(struct mctp_dev *dev, mctp_eid_t eid,
+ void *ret_hwaddr);
+void mctp_neigh_remove_dev(struct mctp_dev *mdev);
+
+int mctp_routes_init(void);
+void mctp_routes_exit(void);
+
+void mctp_device_init(void);
+void mctp_device_exit(void);
+
+#endif /* __NET_MCTP_H */
diff --git a/include/net/mctpdevice.h b/include/net/mctpdevice.h
new file mode 100644
index 000000000000..71a11012fac7
--- /dev/null
+++ b/include/net/mctpdevice.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Management Component Transport Protocol (MCTP) - device
+ * definitions.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#ifndef __NET_MCTPDEVICE_H
+#define __NET_MCTPDEVICE_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/refcount.h>
+
+struct mctp_dev {
+ struct net_device *dev;
+
+ unsigned int net;
+
+ /* Only modified under RTNL. Reads have addrs_lock held */
+ u8 *addrs;
+ size_t num_addrs;
+ spinlock_t addrs_lock;
+
+ struct rcu_head rcu;
+};
+
+#define MCTP_INITIAL_DEFAULT_NET 1
+
+struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev);
+struct mctp_dev *__mctp_dev_get(const struct net_device *dev);
+
+#endif /* __NET_MCTPDEVICE_H */
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 8b5af683a818..6026bbefbffd 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -58,10 +58,6 @@ struct mptcp_addr_info {
struct mptcp_out_options {
#if IS_ENABLED(CONFIG_MPTCP)
u16 suboptions;
- u64 sndr_key;
- u64 rcvr_key;
- u64 ahmac;
- struct mptcp_addr_info addr;
struct mptcp_rm_list rm_list;
u8 join_id;
u8 backup;
@@ -69,11 +65,26 @@ struct mptcp_out_options {
reset_transient:1,
csum_reqd:1,
allow_join_id0:1;
- u32 nonce;
- u64 thmac;
- u32 token;
- u8 hmac[20];
- struct mptcp_ext ext_copy;
+ union {
+ struct {
+ u64 sndr_key;
+ u64 rcvr_key;
+ };
+ struct {
+ struct mptcp_addr_info addr;
+ u64 ahmac;
+ };
+ struct {
+ struct mptcp_ext ext_copy;
+ u64 fail_seq;
+ };
+ struct {
+ u32 nonce;
+ u32 token;
+ u64 thmac;
+ u8 hmac[20];
+ };
+ };
#endif
};
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 12cf6d7ea62c..bb5fa5914032 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -23,7 +23,6 @@
#include <net/netns/ieee802154_6lowpan.h>
#include <net/netns/sctp.h>
#include <net/netns/netfilter.h>
-#include <net/netns/x_tables.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netns/conntrack.h>
#endif
@@ -34,6 +33,7 @@
#include <net/netns/xdp.h>
#include <net/netns/smc.h>
#include <net/netns/bpf.h>
+#include <net/netns/mctp.h>
#include <linux/ns_common.h>
#include <linux/idr.h>
#include <linux/skbuff.h>
@@ -132,7 +132,6 @@ struct net {
#endif
#ifdef CONFIG_NETFILTER
struct netns_nf nf;
- struct netns_xt xt;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct netns_ct ct;
#endif
@@ -167,6 +166,9 @@ struct net {
#ifdef CONFIG_XDP_SOCKETS
struct netns_xdp xdp;
#endif
+#if IS_ENABLED(CONFIG_MCTP)
+ struct netns_mctp mctp;
+#endif
#if IS_ENABLED(CONFIG_CRYPTO_USER)
struct sock *crypto_nlsk;
#endif
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index d00ba6048e44..d932e22edcb4 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -72,14 +72,20 @@ struct nf_ct_event {
int report;
};
+struct nf_exp_event {
+ struct nf_conntrack_expect *exp;
+ u32 portid;
+ int report;
+};
+
struct nf_ct_event_notifier {
- int (*fcn)(unsigned int events, struct nf_ct_event *item);
+ int (*ct_event)(unsigned int events, const struct nf_ct_event *item);
+ int (*exp_event)(unsigned int events, const struct nf_exp_event *item);
};
-int nf_conntrack_register_notifier(struct net *net,
- struct nf_ct_event_notifier *nb);
-void nf_conntrack_unregister_notifier(struct net *net,
- struct nf_ct_event_notifier *nb);
+void nf_conntrack_register_notifier(struct net *net,
+ const struct nf_ct_event_notifier *nb);
+void nf_conntrack_unregister_notifier(struct net *net);
void nf_ct_deliver_cached_events(struct nf_conn *ct);
int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
@@ -151,22 +157,6 @@ nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
-
-struct nf_exp_event {
- struct nf_conntrack_expect *exp;
- u32 portid;
- int report;
-};
-
-struct nf_exp_event_notifier {
- int (*fcn)(unsigned int events, struct nf_exp_event *item);
-};
-
-int nf_ct_expect_register_notifier(struct net *net,
- struct nf_exp_event_notifier *nb);
-void nf_ct_expect_unregister_notifier(struct net *net,
- struct nf_exp_event_notifier *nb);
-
void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
struct nf_conntrack_expect *exp,
u32 portid, int report);
diff --git a/include/net/netfilter/nf_hooks_lwtunnel.h b/include/net/netfilter/nf_hooks_lwtunnel.h
new file mode 100644
index 000000000000..52e27920f829
--- /dev/null
+++ b/include/net/netfilter/nf_hooks_lwtunnel.h
@@ -0,0 +1,7 @@
+#include <linux/sysctl.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_SYSCTL
+int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos);
+#endif
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index e770bba00066..9eed51e920e8 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -33,8 +33,8 @@ struct nf_queue_handler {
void (*nf_hook_drop)(struct net *net);
};
-void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh);
-void nf_unregister_queue_handler(struct net *net);
+void nf_register_queue_handler(const struct nf_queue_handler *qh);
+void nf_unregister_queue_handler(void);
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
void nf_queue_entry_get_refs(struct nf_queue_entry *entry);
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 1ceec518ab49..7a2a9d3144ba 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -885,7 +885,7 @@ static inline int nlmsg_validate_deprecated(const struct nlmsghdr *nlh,
*/
static inline int nlmsg_report(const struct nlmsghdr *nlh)
{
- return !!(nlh->nlmsg_flags & NLM_F_ECHO);
+ return nlh ? !!(nlh->nlmsg_flags & NLM_F_ECHO) : 0;
}
/**
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 37e5300c7e5a..0294f3d473af 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -30,7 +30,6 @@ struct nf_tcp_net {
u8 tcp_ignore_invalid_rst;
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
unsigned int offload_timeout;
- unsigned int offload_pickup;
#endif
};
@@ -44,7 +43,6 @@ struct nf_udp_net {
unsigned int timeouts[UDP_CT_MAX];
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
unsigned int offload_timeout;
- unsigned int offload_pickup;
#endif
};
@@ -115,7 +113,6 @@ struct netns_ct {
struct ct_pcpu __percpu *pcpu_lists;
struct ip_conntrack_stat __percpu *stat;
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
- struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
struct nf_ip_net nf_ct_proto;
#if defined(CONFIG_NF_CONNTRACK_LABELS)
unsigned int labels_used;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index b8620519eace..2f65701a43c9 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -174,7 +174,6 @@ struct netns_ipv4 {
int sysctl_tcp_fastopen;
const struct tcp_congestion_ops __rcu *tcp_congestion_control;
struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
- spinlock_t tcp_fastopen_ctx_lock;
unsigned int sysctl_tcp_fastopen_blackhole_timeout;
atomic_t tfo_active_disable_times;
unsigned long tfo_active_disable_stamp;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index bde0b7adb4a3..a4b550380316 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -51,6 +51,8 @@ struct netns_sysctl_ipv6 {
int max_dst_opts_len;
int max_hbh_opts_len;
int seg6_flowlabel;
+ u32 ioam6_id;
+ u64 ioam6_id_wide;
bool skip_notify_on_dev_down;
u8 fib_notify_on_flag_change;
};
@@ -110,6 +112,7 @@ struct netns_ipv6 {
spinlock_t lock;
u32 seq;
} ip6addrlbl_table;
+ struct ioam6_pernet_data *ioam6_data;
};
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h
new file mode 100644
index 000000000000..acedef12a35e
--- /dev/null
+++ b/include/net/netns/mctp.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * MCTP per-net structures
+ */
+
+#ifndef __NETNS_MCTP_H__
+#define __NETNS_MCTP_H__
+
+#include <linux/types.h>
+
+struct netns_mctp {
+ /* Only updated under RTNL, entries freed via RCU */
+ struct list_head routes;
+
+ /* Bound sockets: list of sockets bound by type.
+ * This list is updated from non-atomic contexts (under bind_lock),
+ * and read (under rcu) in packet rx
+ */
+ struct mutex bind_lock;
+ struct hlist_head binds;
+
+ /* tag allocations. This list is read and updated from atomic contexts,
+ * but elements are free()ed after a RCU grace-period
+ */
+ spinlock_t keys_lock;
+ struct hlist_head keys;
+
+ /* MCTP network */
+ unsigned int default_net;
+
+ /* neighbour table */
+ struct mutex neigh_lock;
+ struct list_head neighbours;
+};
+
+#endif /* __NETNS_MCTP_H__ */
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index 15e2b13fb0c0..986a2a9cfdfa 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -12,7 +12,6 @@ struct netns_nf {
#if defined CONFIG_PROC_FS
struct proc_dir_entry *proc_netfilter;
#endif
- const struct nf_queue_handler __rcu *queue_handler;
const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO];
#ifdef CONFIG_SYSCTL
struct ctl_table_header *nf_log_dir_header;
diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
deleted file mode 100644
index d02316ec2906..000000000000
--- a/include/net/netns/x_tables.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __NETNS_X_TABLES_H
-#define __NETNS_X_TABLES_H
-
-#include <linux/list.h>
-#include <linux/netfilter_defs.h>
-
-struct netns_xt {
- bool notrack_deprecated_warning;
- bool clusterip_deprecated_warning;
-};
-#endif
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 1f4e1816fd36..947733a639a6 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -65,6 +65,13 @@ struct netns_xfrm {
u32 sysctl_aevent_rseqth;
int sysctl_larval_drop;
u32 sysctl_acq_expires;
+
+ u8 policy_default;
+#define XFRM_POL_DEFAULT_IN 1
+#define XFRM_POL_DEFAULT_OUT 2
+#define XFRM_POL_DEFAULT_FWD 4
+#define XFRM_POL_DEFAULT_MASK 7
+
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_hdr;
#endif
diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h
index 963db96bcbbb..bb3e8fdc0692 100644
--- a/include/net/nfc/digital.h
+++ b/include/net/nfc/digital.h
@@ -191,7 +191,7 @@ struct digital_poll_tech {
struct nfc_digital_dev {
struct nfc_dev *nfc_dev;
- struct nfc_digital_ops *ops;
+ const struct nfc_digital_ops *ops;
u32 protocols;
@@ -236,7 +236,7 @@ struct nfc_digital_dev {
void (*skb_add_crc)(struct sk_buff *skb);
};
-struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops,
+struct nfc_digital_dev *nfc_digital_allocate_device(const struct nfc_digital_ops *ops,
__u32 supported_protocols,
__u32 driver_capabilities,
int tx_headroom,
diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index b35f37a57686..756c11084f65 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -118,7 +118,7 @@ struct nfc_hci_dev {
struct sk_buff_head msg_rx_queue;
- struct nfc_hci_ops *ops;
+ const struct nfc_hci_ops *ops;
struct nfc_llc *llc;
@@ -151,7 +151,7 @@ struct nfc_hci_dev {
};
/* hci device allocation */
-struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
+struct nfc_hci_dev *nfc_hci_allocate_device(const struct nfc_hci_ops *ops,
struct nfc_hci_init_data *init_data,
unsigned long quirks,
u32 protocols,
@@ -168,7 +168,7 @@ void nfc_hci_set_clientdata(struct nfc_hci_dev *hdev, void *clientdata);
void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev);
static inline int nfc_hci_set_vendor_cmds(struct nfc_hci_dev *hdev,
- struct nfc_vendor_cmd *cmds,
+ const struct nfc_vendor_cmd *cmds,
int n_cmds)
{
return nfc_set_vendor_cmds(hdev->ndev, cmds, n_cmds);
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 1df0f8074c9d..a964daedc17b 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -82,10 +82,10 @@ struct nci_ops {
void (*hci_cmd_received)(struct nci_dev *ndev, u8 pipe, u8 cmd,
struct sk_buff *skb);
- struct nci_driver_ops *prop_ops;
+ const struct nci_driver_ops *prop_ops;
size_t n_prop_ops;
- struct nci_driver_ops *core_ops;
+ const struct nci_driver_ops *core_ops;
size_t n_core_ops;
};
@@ -194,7 +194,7 @@ struct nci_hci_dev {
/* NCI Core structures */
struct nci_dev {
struct nfc_dev *nfc_dev;
- struct nci_ops *ops;
+ const struct nci_ops *ops;
struct nci_hci_dev *hci_dev;
int tx_headroom;
@@ -267,7 +267,7 @@ struct nci_dev {
};
/* ----- NCI Devices ----- */
-struct nci_dev *nci_allocate_device(struct nci_ops *ops,
+struct nci_dev *nci_allocate_device(const struct nci_ops *ops,
__u32 supported_protocols,
int tx_headroom,
int tx_tailroom);
@@ -276,25 +276,27 @@ int nci_register_device(struct nci_dev *ndev);
void nci_unregister_device(struct nci_dev *ndev);
int nci_request(struct nci_dev *ndev,
void (*req)(struct nci_dev *ndev,
- unsigned long opt),
- unsigned long opt, __u32 timeout);
-int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload);
-int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload);
+ const void *opt),
+ const void *opt, __u32 timeout);
+int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len,
+ const __u8 *payload);
+int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len,
+ const __u8 *payload);
int nci_core_reset(struct nci_dev *ndev);
int nci_core_init(struct nci_dev *ndev);
int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb);
int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb);
-int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val);
+int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val);
int nci_nfcee_discover(struct nci_dev *ndev, u8 action);
int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode);
int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
u8 number_destination_params,
size_t params_len,
- struct core_conn_create_dest_spec_params *params);
+ const struct core_conn_create_dest_spec_params *params);
int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id);
-int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len,
+int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len,
struct sk_buff **resp);
struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev);
@@ -343,7 +345,7 @@ static inline void *nci_get_drvdata(struct nci_dev *ndev)
}
static inline int nci_set_vendor_cmds(struct nci_dev *ndev,
- struct nfc_vendor_cmd *cmds,
+ const struct nfc_vendor_cmd *cmds,
int n_cmds)
{
return nfc_set_vendor_cmds(ndev->nfc_dev, cmds, n_cmds);
@@ -360,7 +362,7 @@ int nci_core_rsp_packet(struct nci_dev *ndev, __u16 opcode,
int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode,
struct sk_buff *skb);
void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb);
-int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload);
+int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payload);
int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb);
int nci_conn_max_data_pkt_payload_size(struct nci_dev *ndev, __u8 conn_id);
void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
@@ -378,7 +380,7 @@ void nci_req_complete(struct nci_dev *ndev, int result);
struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev,
int conn_id);
int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type,
- struct dest_spec_params *params);
+ const struct dest_spec_params *params);
/* ----- NCI status code ----- */
int nci_to_errno(__u8 code);
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 2cd3a261bcbc..5dee575fbe86 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -188,17 +188,17 @@ struct nfc_dev {
struct rfkill *rfkill;
- struct nfc_vendor_cmd *vendor_cmds;
+ const struct nfc_vendor_cmd *vendor_cmds;
int n_vendor_cmds;
- struct nfc_ops *ops;
+ const struct nfc_ops *ops;
struct genl_info *cur_cmd_info;
};
#define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev)
extern struct class nfc_class;
-struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
+struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops,
u32 supported_protocols,
int tx_headroom,
int tx_tailroom);
@@ -245,7 +245,7 @@ static inline void nfc_set_drvdata(struct nfc_dev *dev, void *data)
*
* @dev: The nfc device
*/
-static inline void *nfc_get_drvdata(struct nfc_dev *dev)
+static inline void *nfc_get_drvdata(const struct nfc_dev *dev)
{
return dev_get_drvdata(&dev->dev);
}
@@ -255,7 +255,7 @@ static inline void *nfc_get_drvdata(struct nfc_dev *dev)
*
* @dev: The nfc device whose name to return
*/
-static inline const char *nfc_device_name(struct nfc_dev *dev)
+static inline const char *nfc_device_name(const struct nfc_dev *dev)
{
return dev_name(&dev->dev);
}
@@ -266,7 +266,7 @@ struct sk_buff *nfc_alloc_send_skb(struct nfc_dev *dev, struct sock *sk,
struct sk_buff *nfc_alloc_recv_skb(unsigned int size, gfp_t gfp);
int nfc_set_remote_general_bytes(struct nfc_dev *dev,
- u8 *gt, u8 gt_len);
+ const u8 *gt, u8 gt_len);
u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, size_t *gb_len);
int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
@@ -280,7 +280,7 @@ int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx,
u8 comm_mode, u8 rf_mode);
int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode,
- u8 *gb, size_t gb_len);
+ const u8 *gb, size_t gb_len);
int nfc_tm_deactivated(struct nfc_dev *dev);
int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb);
@@ -297,7 +297,7 @@ void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb,
u8 payload_type, u8 direction);
static inline int nfc_set_vendor_cmds(struct nfc_dev *dev,
- struct nfc_vendor_cmd *cmds,
+ const struct nfc_vendor_cmd *cmds,
int n_cmds)
{
if (dev->vendor_cmds || dev->n_vendor_cmds)
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 3dd62dd73027..a4082406a003 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -45,7 +45,10 @@
* Please note DMA-sync-for-CPU is still
* device driver responsibility
*/
-#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
+#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */
+#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\
+ PP_FLAG_DMA_SYNC_DEV |\
+ PP_FLAG_PAGE_FRAG)
/*
* Fast allocation side cache array/stack
@@ -88,6 +91,9 @@ struct page_pool {
unsigned long defer_warn;
u32 pages_state_hold_cnt;
+ unsigned int frag_offset;
+ struct page *frag_page;
+ long frag_users;
/*
* Data structure for allocation side
@@ -137,6 +143,18 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
return page_pool_alloc_pages(pool, gfp);
}
+struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
+ unsigned int size, gfp_t gfp);
+
+static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int size)
+{
+ gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+ return page_pool_alloc_frag(pool, offset, size, gfp);
+}
+
/* get the stored dma direction. A driver might decide to treat this locally and
* avoid the extra cache line from page_pool to determine the direction
*/
@@ -198,19 +216,48 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
page_pool_put_full_page(pool, page, true);
}
+#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \
+ (sizeof(dma_addr_t) > sizeof(unsigned long))
+
static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
{
- dma_addr_t ret = page->dma_addr[0];
- if (sizeof(dma_addr_t) > sizeof(unsigned long))
- ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
+ dma_addr_t ret = page->dma_addr;
+
+ if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+ ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
+
return ret;
}
static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
{
- page->dma_addr[0] = addr;
- if (sizeof(dma_addr_t) > sizeof(unsigned long))
- page->dma_addr[1] = upper_32_bits(addr);
+ page->dma_addr = addr;
+ if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+ page->dma_addr_upper = upper_32_bits(addr);
+}
+
+static inline void page_pool_set_frag_count(struct page *page, long nr)
+{
+ atomic_long_set(&page->pp_frag_count, nr);
+}
+
+static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
+ long nr)
+{
+ long ret;
+
+ /* As suggested by Alexander, atomic_long_read() may cover up the
+ * reference count errors, so avoid calling atomic_long_read() in
+ * the cases of freeing or draining the page_frags, where we would
+ * not expect it to match or that are slowpath anyway.
+ */
+ if (__builtin_constant_p(nr) &&
+ atomic_long_read(&page->pp_frag_count) == nr)
+ return 0;
+
+ ret = atomic_long_sub_return(nr, &page->pp_frag_count);
+ WARN_ON(ret < 0);
+ return ret;
}
static inline bool is_page_pool_compiled_in(void)
@@ -253,11 +300,4 @@ static inline void page_pool_ring_unlock(struct page_pool *pool)
spin_unlock_bh(&pool->ring.producer_lock);
}
-/* Store mem_info on struct page and use it while recycling skb frags */
-static inline
-void page_pool_store_mem_info(struct page *page, struct page_pool *pp)
-{
- page->pp = pp;
-}
-
#endif /* _NET_PAGE_POOL_H */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 298a8d10168b..83a6d0792180 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -76,12 +76,10 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
return block->q;
}
-int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res, bool compat_mode);
-int tcf_classify_ingress(struct sk_buff *skb,
- const struct tcf_block *ingress_block,
- const struct tcf_proto *tp, struct tcf_result *res,
- bool compat_mode);
+int tcf_classify(struct sk_buff *skb,
+ const struct tcf_block *block,
+ const struct tcf_proto *tp, struct tcf_result *res,
+ bool compat_mode);
#else
static inline bool tcf_block_shared(struct tcf_block *block)
@@ -138,20 +136,14 @@ void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb,
{
}
-static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+static inline int tcf_classify(struct sk_buff *skb,
+ const struct tcf_block *block,
+ const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
{
return TC_ACT_UNSPEC;
}
-static inline int tcf_classify_ingress(struct sk_buff *skb,
- const struct tcf_block *ingress_block,
- const struct tcf_proto *tp,
- struct tcf_result *res, bool compat_mode)
-{
- return TC_ACT_UNSPEC;
-}
-
#endif
static inline unsigned long
@@ -327,7 +319,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
struct nlattr **tb, struct nlattr *rate_tlv,
- struct tcf_exts *exts, bool ovr, bool rtnl_held,
+ struct tcf_exts *exts, u32 flags,
struct netlink_ext_ack *extack);
void tcf_exts_destroy(struct tcf_exts *exts);
void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
@@ -824,10 +816,9 @@ enum tc_htb_command {
struct tc_htb_qopt_offload {
struct netlink_ext_ack *extack;
enum tc_htb_command command;
- u16 classid;
u32 parent_classid;
+ u16 classid;
u16 qid;
- u16 moved_qid;
u64 rate;
u64 ceil;
};
diff --git a/include/net/psample.h b/include/net/psample.h
index e328c5127757..0509d2d6be67 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -31,6 +31,8 @@ struct psample_group *psample_group_get(struct net *net, u32 group_num);
void psample_group_take(struct psample_group *group);
void psample_group_put(struct psample_group *group);
+struct sk_buff;
+
#if IS_ENABLED(CONFIG_PSAMPLE)
void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 384e800665f2..9f48733bfd21 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -153,7 +153,8 @@ struct rtnl_af_ops {
u32 ext_filter_mask);
int (*validate_link_af)(const struct net_device *dev,
- const struct nlattr *attr);
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack);
int (*set_link_af)(struct net_device *dev,
const struct nlattr *attr,
struct netlink_ext_ack *extack);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 9ed33e6840bd..c0069ac00e62 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -357,7 +357,7 @@ struct tcf_proto_ops {
int (*change)(struct net *net, struct sk_buff *,
struct tcf_proto*, unsigned long,
u32 handle, struct nlattr **,
- void **, bool, bool,
+ void **, u32,
struct netlink_ext_ack *);
int (*delete)(struct tcf_proto *tp, void *arg,
bool *last, bool rtnl_held,
diff --git a/include/net/sock.h b/include/net/sock.h
index f23cb259b0e2..66a9a90f9558 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -68,6 +68,7 @@
#include <net/tcp_states.h>
#include <linux/net_tstamp.h>
#include <net/l3mdev.h>
+#include <uapi/linux/socket.h>
/*
* This structure really needs to be cleaned up.
@@ -1438,8 +1439,6 @@ static inline int __sk_prot_rehash(struct sock *sk)
#define RCV_SHUTDOWN 1
#define SEND_SHUTDOWN 2
-#define SOCK_SNDBUF_LOCK 1
-#define SOCK_RCVBUF_LOCK 2
#define SOCK_BINDADDR_LOCK 4
#define SOCK_BINDPORT_LOCK 8
@@ -2249,6 +2248,15 @@ static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struc
return false;
}
+static inline void skb_prepare_for_gro(struct sk_buff *skb)
+{
+ if (skb->destructor != sock_wfree) {
+ skb_orphan(skb);
+ return;
+ }
+ skb->slow_gro = 1;
+}
+
void sk_reset_timer(struct sock *sk, struct timer_list *timer,
unsigned long expires);
@@ -2392,6 +2400,11 @@ static inline gfp_t gfp_any(void)
return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
}
+static inline gfp_t gfp_memcg_charge(void)
+{
+ return in_softirq() ? GFP_NOWAIT : GFP_KERNEL;
+}
+
static inline long sock_rcvtimeo(const struct sock *sk, bool noblock)
{
return noblock ? 0 : sk->sk_rcvtimeo;
@@ -2704,6 +2717,7 @@ extern int sysctl_optmem_max;
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;
+#define SKB_FRAG_PAGE_ORDER get_order(32768)
DECLARE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index e4cac9218ce1..60d806b6a5ae 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -180,6 +180,14 @@ struct switchdev_obj_in_state_mrp {
typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
+struct switchdev_brport {
+ struct net_device *dev;
+ const void *ctx;
+ struct notifier_block *atomic_nb;
+ struct notifier_block *blocking_nb;
+ bool tx_fwd_offload;
+};
+
enum switchdev_notifier_type {
SWITCHDEV_FDB_ADD_TO_BRIDGE = 1,
SWITCHDEV_FDB_DEL_TO_BRIDGE,
@@ -197,6 +205,9 @@ enum switchdev_notifier_type {
SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE,
SWITCHDEV_VXLAN_FDB_OFFLOADED,
+
+ SWITCHDEV_BRPORT_OFFLOADED,
+ SWITCHDEV_BRPORT_UNOFFLOADED,
};
struct switchdev_notifier_info {
@@ -226,6 +237,11 @@ struct switchdev_notifier_port_attr_info {
bool handled;
};
+struct switchdev_notifier_brport_info {
+ struct switchdev_notifier_info info; /* must be first */
+ const struct switchdev_brport brport;
+};
+
static inline struct net_device *
switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info)
{
@@ -238,8 +254,25 @@ switchdev_notifier_info_to_extack(const struct switchdev_notifier_info *info)
return info->extack;
}
+static inline bool
+switchdev_fdb_is_dynamically_learned(const struct switchdev_notifier_fdb_info *fdb_info)
+{
+ return !fdb_info->added_by_user && !fdb_info->is_local;
+}
+
#ifdef CONFIG_NET_SWITCHDEV
+int switchdev_bridge_port_offload(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack);
+void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+ const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb);
+
void switchdev_deferred_process(void);
int switchdev_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
@@ -266,6 +299,30 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
struct net_device *group_dev,
bool joining);
+int switchdev_handle_fdb_add_to_device(struct net_device *dev,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*add_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info),
+ int (*lag_add_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info));
+
+int switchdev_handle_fdb_del_to_device(struct net_device *dev,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*del_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info),
+ int (*lag_del_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info));
+
int switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
@@ -286,6 +343,25 @@ int switchdev_handle_port_attr_set(struct net_device *dev,
struct netlink_ext_ack *extack));
#else
+static inline int
+switchdev_bridge_port_offload(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void
+switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+ const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+}
+
static inline void switchdev_deferred_process(void)
{
}
@@ -350,6 +426,38 @@ call_switchdev_blocking_notifiers(unsigned long val,
}
static inline int
+switchdev_handle_fdb_add_to_device(struct net_device *dev,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*add_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info),
+ int (*lag_add_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info))
+{
+ return 0;
+}
+
+static inline int
+switchdev_handle_fdb_del_to_device(struct net_device *dev,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*del_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info),
+ int (*lag_del_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info))
+{
+ return 0;
+}
+
+static inline int
switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 784d5c3ef1c5..3166dc15d7d6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1958,7 +1958,6 @@ struct tcp_iter_state {
struct seq_net_private p;
enum tcp_seq_states state;
struct sock *syn_wait_sk;
- struct tcp_seq_afinfo *bpf_seq_afinfo;
int bucket, offset, sbucket, num;
loff_t last_pos;
};
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 5533f0ab2afc..ad5b02dcb6f4 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -276,6 +276,11 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
return unlikely(xdp->data_meta > xdp->data);
}
+static inline bool xdp_metalen_invalid(unsigned long metalen)
+{
+ return (metalen & (sizeof(__u32) - 1)) || (metalen > 32);
+}
+
struct xdp_attachment_info {
struct bpf_prog *prog;
u32 flags;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index cbff7c2a9724..2308210793a0 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1075,6 +1075,22 @@ xfrm_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, un
}
#ifdef CONFIG_XFRM
+static inline bool
+xfrm_default_allow(struct net *net, int dir)
+{
+ u8 def = net->xfrm.policy_default;
+
+ switch (dir) {
+ case XFRM_POLICY_IN:
+ return def & XFRM_POL_DEFAULT_IN ? false : true;
+ case XFRM_POLICY_OUT:
+ return def & XFRM_POL_DEFAULT_OUT ? false : true;
+ case XFRM_POLICY_FWD:
+ return def & XFRM_POL_DEFAULT_FWD ? false : true;
+ }
+ return false;
+}
+
int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb,
unsigned short family);
@@ -1088,9 +1104,13 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
if (sk && sk->sk_policy[XFRM_POLICY_IN])
return __xfrm_policy_check(sk, ndir, skb, family);
- return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) ||
- (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) ||
- __xfrm_policy_check(sk, ndir, skb, family);
+ if (xfrm_default_allow(net, dir))
+ return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) ||
+ (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) ||
+ __xfrm_policy_check(sk, ndir, skb, family);
+ else
+ return (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) ||
+ __xfrm_policy_check(sk, ndir, skb, family);
}
static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family)
@@ -1142,9 +1162,13 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
{
struct net *net = dev_net(skb->dev);
- return !net->xfrm.policy_count[XFRM_POLICY_OUT] ||
- (skb_dst(skb)->flags & DST_NOXFRM) ||
- __xfrm_route_forward(skb, family);
+ if (xfrm_default_allow(net, XFRM_POLICY_FWD))
+ return !net->xfrm.policy_count[XFRM_POLICY_OUT] ||
+ (skb_dst(skb)->flags & DST_NOXFRM) ||
+ __xfrm_route_forward(skb, family);
+ else
+ return (skb_dst(skb)->flags & DST_NOXFRM) ||
+ __xfrm_route_forward(skb, family);
}
static inline int xfrm4_route_forward(struct sk_buff *skb)
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 2f5ce4d4fdbf..06706a9fd5b1 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -589,6 +589,9 @@ enum ocelot_sb_pool {
OCELOT_SB_POOL_NUM,
};
+#define OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION BIT(0)
+#define OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP BIT(1)
+
struct ocelot_port {
struct ocelot *ocelot;
@@ -798,19 +801,14 @@ void ocelot_init_port(struct ocelot *ocelot, int port);
void ocelot_deinit_port(struct ocelot *ocelot, int port);
/* DSA callbacks */
-void ocelot_port_enable(struct ocelot *ocelot, int port,
- struct phy_device *phy);
-void ocelot_port_disable(struct ocelot *ocelot, int port);
void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data);
void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data);
int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset);
int ocelot_get_ts_info(struct ocelot *ocelot, int port,
struct ethtool_ts_info *info);
void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs);
-int ocelot_port_flush(struct ocelot *ocelot, int port);
-void ocelot_adjust_link(struct ocelot *ocelot, int port,
- struct phy_device *phydev);
-int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled);
+int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled,
+ struct netlink_ext_ack *extack);
void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state);
void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot);
int ocelot_port_pre_bridge_flags(struct ocelot *ocelot, int port,
@@ -828,7 +826,7 @@ int ocelot_fdb_add(struct ocelot *ocelot, int port,
int ocelot_fdb_del(struct ocelot *ocelot, int port,
const unsigned char *addr, u16 vid);
int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid,
- bool untagged);
+ bool untagged, struct netlink_ext_ack *extack);
int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid,
bool untagged);
int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid);
@@ -894,6 +892,18 @@ int ocelot_sb_occ_tc_port_bind_get(struct ocelot *ocelot, int port,
enum devlink_sb_pool_type pool_type,
u32 *p_cur, u32 *p_max);
+void ocelot_phylink_mac_link_down(struct ocelot *ocelot, int port,
+ unsigned int link_an_mode,
+ phy_interface_t interface,
+ unsigned long quirks);
+void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port,
+ struct phy_device *phydev,
+ unsigned int link_an_mode,
+ phy_interface_t interface,
+ int speed, int duplex,
+ bool tx_pause, bool rx_pause,
+ unsigned long quirks);
+
#if IS_ENABLED(CONFIG_BRIDGE_MRP)
int ocelot_mrp_add(struct ocelot *ocelot, int port,
const struct switchdev_obj_mrp *mrp);
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index b671b1f2ce0f..8f58fd95efc7 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -94,6 +94,7 @@ struct btrfs_space_info;
EM( FLUSH_DELAYED_ITEMS, "FLUSH_DELAYED_ITEMS") \
EM( FLUSH_DELALLOC, "FLUSH_DELALLOC") \
EM( FLUSH_DELALLOC_WAIT, "FLUSH_DELALLOC_WAIT") \
+ EM( FLUSH_DELALLOC_FULL, "FLUSH_DELALLOC_FULL") \
EM( FLUSH_DELAYED_REFS_NR, "FLUSH_DELAYED_REFS_NR") \
EM( FLUSH_DELAYED_REFS, "FLUSH_ELAYED_REFS") \
EM( ALLOC_CHUNK, "ALLOC_CHUNK") \
@@ -2037,7 +2038,7 @@ TRACE_EVENT(btrfs_convert_extent_bit,
);
DECLARE_EVENT_CLASS(btrfs_dump_space_info,
- TP_PROTO(const struct btrfs_fs_info *fs_info,
+ TP_PROTO(struct btrfs_fs_info *fs_info,
const struct btrfs_space_info *sinfo),
TP_ARGS(fs_info, sinfo),
@@ -2057,6 +2058,8 @@ DECLARE_EVENT_CLASS(btrfs_dump_space_info,
__field( u64, delayed_refs_reserved )
__field( u64, delayed_reserved )
__field( u64, free_chunk_space )
+ __field( u64, delalloc_bytes )
+ __field( u64, ordered_bytes )
),
TP_fast_assign_btrfs(fs_info,
@@ -2074,6 +2077,8 @@ DECLARE_EVENT_CLASS(btrfs_dump_space_info,
__entry->delayed_refs_reserved = fs_info->delayed_refs_rsv.reserved;
__entry->delayed_reserved = fs_info->delayed_block_rsv.reserved;
__entry->free_chunk_space = atomic64_read(&fs_info->free_chunk_space);
+ __entry->delalloc_bytes = percpu_counter_sum_positive(&fs_info->delalloc_bytes);
+ __entry->ordered_bytes = percpu_counter_sum_positive(&fs_info->ordered_bytes);
),
TP_printk_btrfs("flags=%s total_bytes=%llu bytes_used=%llu "
@@ -2081,7 +2086,8 @@ DECLARE_EVENT_CLASS(btrfs_dump_space_info,
"bytes_may_use=%llu bytes_readonly=%llu "
"reclaim_size=%llu clamp=%d global_reserved=%llu "
"trans_reserved=%llu delayed_refs_reserved=%llu "
- "delayed_reserved=%llu chunk_free_space=%llu",
+ "delayed_reserved=%llu chunk_free_space=%llu "
+ "delalloc_bytes=%llu ordered_bytes=%llu",
__print_flags(__entry->flags, "|", BTRFS_GROUP_FLAGS),
__entry->total_bytes, __entry->bytes_used,
__entry->bytes_pinned, __entry->bytes_reserved,
@@ -2089,11 +2095,18 @@ DECLARE_EVENT_CLASS(btrfs_dump_space_info,
__entry->reclaim_size, __entry->clamp,
__entry->global_reserved, __entry->trans_reserved,
__entry->delayed_refs_reserved,
- __entry->delayed_reserved, __entry->free_chunk_space)
+ __entry->delayed_reserved, __entry->free_chunk_space,
+ __entry->delalloc_bytes, __entry->ordered_bytes)
);
DEFINE_EVENT(btrfs_dump_space_info, btrfs_done_preemptive_reclaim,
- TP_PROTO(const struct btrfs_fs_info *fs_info,
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ const struct btrfs_space_info *sinfo),
+ TP_ARGS(fs_info, sinfo)
+);
+
+DEFINE_EVENT(btrfs_dump_space_info, btrfs_fail_all_tickets,
+ TP_PROTO(struct btrfs_fs_info *fs_info,
const struct btrfs_space_info *sinfo),
TP_ARGS(fs_info, sinfo)
);
diff --git a/include/trace/events/kyber.h b/include/trace/events/kyber.h
index f9802562edf6..491098a0d8ed 100644
--- a/include/trace/events/kyber.h
+++ b/include/trace/events/kyber.h
@@ -30,7 +30,7 @@ TRACE_EVENT(kyber_latency,
),
TP_fast_assign(
- __entry->dev = disk_devt(queue_to_disk(q));
+ __entry->dev = disk_devt(q->disk);
strlcpy(__entry->domain, domain, sizeof(__entry->domain));
strlcpy(__entry->type, type, sizeof(__entry->type));
__entry->percentile = percentile;
@@ -59,7 +59,7 @@ TRACE_EVENT(kyber_adjust,
),
TP_fast_assign(
- __entry->dev = disk_devt(queue_to_disk(q));
+ __entry->dev = disk_devt(q->disk);
strlcpy(__entry->domain, domain, sizeof(__entry->domain));
__entry->depth = depth;
),
@@ -81,7 +81,7 @@ TRACE_EVENT(kyber_throttled,
),
TP_fast_assign(
- __entry->dev = disk_devt(queue_to_disk(q));
+ __entry->dev = disk_devt(q->disk);
strlcpy(__entry->domain, domain, sizeof(__entry->domain));
),
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 390270e00a1d..f160484afc5c 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -48,7 +48,9 @@
{(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \
{(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \
{(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\
- {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"}\
+ {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\
+ {(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"}, \
+ {(unsigned long)__GFP_SKIP_KASAN_POISON,"__GFP_SKIP_KASAN_POISON"}\
#define show_gfp_flags(flags) \
(flags) ? __print_flags(flags, "|", \
diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h
index c3006c6b4a87..59c945b66f9c 100644
--- a/include/trace/events/qdisc.h
+++ b/include/trace/events/qdisc.h
@@ -54,6 +54,7 @@ TRACE_EVENT(qdisc_enqueue,
TP_STRUCT__entry(
__field(struct Qdisc *, qdisc)
+ __field(const struct netdev_queue *, txq)
__field(void *, skbaddr)
__field(int, ifindex)
__field(u32, handle)
@@ -62,6 +63,7 @@ TRACE_EVENT(qdisc_enqueue,
TP_fast_assign(
__entry->qdisc = qdisc;
+ __entry->txq = txq;
__entry->skbaddr = skb;
__entry->ifindex = txq->dev ? txq->dev->ifindex : 0;
__entry->handle = qdisc->handle;
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 861f199896c6..d323f5a049c8 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1642,7 +1642,7 @@ TRACE_EVENT(svc_process,
__field(u32, vers)
__field(u32, proc)
__string(service, name)
- __string(procedure, rqst->rq_procinfo->pc_name)
+ __string(procedure, svc_proc_name(rqst))
__string(addr, rqst->rq_xprt ?
rqst->rq_xprt->xpt_remotebuf : "(null)")
),
@@ -1652,7 +1652,7 @@ TRACE_EVENT(svc_process,
__entry->vers = rqst->rq_vers;
__entry->proc = rqst->rq_proc;
__assign_str(service, name);
- __assign_str(procedure, rqst->rq_procinfo->pc_name);
+ __assign_str(procedure, svc_proc_name(rqst));
__assign_str(addr, rqst->rq_xprt ?
rqst->rq_xprt->xpt_remotebuf : "(null)");
),
@@ -1918,7 +1918,7 @@ TRACE_EVENT(svc_stats_latency,
TP_STRUCT__entry(
__field(u32, xid)
__field(unsigned long, execute)
- __string(procedure, rqst->rq_procinfo->pc_name)
+ __string(procedure, svc_proc_name(rqst))
__string(addr, rqst->rq_xprt->xpt_remotebuf)
),
@@ -1926,7 +1926,7 @@ TRACE_EVENT(svc_stats_latency,
__entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->execute = ktime_to_us(ktime_sub(ktime_get(),
rqst->rq_stime));
- __assign_str(procedure, rqst->rq_procinfo->pc_name);
+ __assign_str(procedure, svc_proc_name(rqst));
__assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
),
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index acc17194c160..08810a463880 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -102,6 +102,9 @@ TRACE_MAKE_SYSTEM_STR();
#undef __string
#define __string(item, src) __dynamic_array(char, item, -1)
+#undef __string_len
+#define __string_len(item, src, len) __dynamic_array(char, item, -1)
+
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)
@@ -197,6 +200,9 @@ TRACE_MAKE_SYSTEM_STR();
#undef __string
#define __string(item, src) __dynamic_array(char, item, -1)
+#undef __string_len
+#define __string_len(item, src, len) __dynamic_array(char, item, -1)
+
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
@@ -459,6 +465,9 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \
#undef __string
#define __string(item, src) __dynamic_array(char, item, -1)
+#undef __string_len
+#define __string_len(item, src, len) __dynamic_array(char, item, -1)
+
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
@@ -507,6 +516,9 @@ static struct trace_event_fields trace_event_fields_##call[] = { \
#define __string(item, src) __dynamic_array(char, item, \
strlen((src) ? (const char *)(src) : "(null)") + 1)
+#undef __string_len
+#define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1)
+
/*
* __bitmask_size_in_bytes_raw is the number of bytes needed to hold
* num_possible_cpus().
@@ -670,10 +682,20 @@ static inline notrace int trace_event_get_offsets_##call( \
#undef __string
#define __string(item, src) __dynamic_array(char, item, -1)
+#undef __string_len
+#define __string_len(item, src, len) __dynamic_array(char, item, -1)
+
#undef __assign_str
#define __assign_str(dst, src) \
strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)");
+#undef __assign_str_len
+#define __assign_str_len(dst, src, len) \
+ do { \
+ memcpy(__get_str(dst), (src), (len)); \
+ __get_str(dst)[len] = '\0'; \
+ } while(0)
+
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index d588c244ec2f..1f0a2b4864e4 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -124,6 +124,8 @@
#define SO_NETNS_COOKIE 71
+#define SO_BUF_LOCK 72
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bf9252c7381e..791f31dd0abe 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -84,7 +84,7 @@ struct bpf_lpm_trie_key {
struct bpf_cgroup_storage_key {
__u64 cgroup_inode_id; /* cgroup inode id */
- __u32 attach_type; /* program attach type */
+ __u32 attach_type; /* program attach type (enum bpf_attach_type) */
};
union bpf_iter_link_info {
@@ -324,9 +324,6 @@ union bpf_iter_link_info {
* **BPF_PROG_TYPE_SK_LOOKUP**
* *data_in* and *data_out* must be NULL.
*
- * **BPF_PROG_TYPE_XDP**
- * *ctx_in* and *ctx_out* must be NULL.
- *
* **BPF_PROG_TYPE_RAW_TRACEPOINT**,
* **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
*
@@ -996,6 +993,7 @@ enum bpf_attach_type {
BPF_SK_SKB_VERDICT,
BPF_SK_REUSEPORT_SELECT,
BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
+ BPF_PERF_EVENT,
__MAX_BPF_ATTACH_TYPE
};
@@ -1009,6 +1007,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_ITER = 4,
BPF_LINK_TYPE_NETNS = 5,
BPF_LINK_TYPE_XDP = 6,
+ BPF_LINK_TYPE_PERF_EVENT = 7,
MAX_BPF_LINK_TYPE,
};
@@ -1449,6 +1448,13 @@ union bpf_attr {
__aligned_u64 iter_info; /* extra bpf_iter_link_info */
__u32 iter_info_len; /* iter_info length */
};
+ struct {
+ /* black box user-provided value passed through
+ * to BPF program at the execution time and
+ * accessible through bpf_get_attach_cookie() BPF helper
+ */
+ __u64 bpf_cookie;
+ } perf_event;
};
} link_create;
@@ -3249,7 +3255,7 @@ union bpf_attr {
* long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
* Description
* Select a **SO_REUSEPORT** socket from a
- * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
+ * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*.
* It checks the selected socket is matching the incoming
* request in the socket buffer.
* Return
@@ -4780,6 +4786,97 @@ union bpf_attr {
* Execute close syscall for given FD.
* Return
* A syscall result.
+ *
+ * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags)
+ * Description
+ * Initialize the timer.
+ * First 4 bits of *flags* specify clockid.
+ * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed.
+ * All other bits of *flags* are reserved.
+ * The verifier will reject the program if *timer* is not from
+ * the same *map*.
+ * Return
+ * 0 on success.
+ * **-EBUSY** if *timer* is already initialized.
+ * **-EINVAL** if invalid *flags* are passed.
+ * **-EPERM** if *timer* is in a map that doesn't have any user references.
+ * The user space should either hold a file descriptor to a map with timers
+ * or pin such map in bpffs. When map is unpinned or file descriptor is
+ * closed all timers in the map will be cancelled and freed.
+ *
+ * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn)
+ * Description
+ * Configure the timer to call *callback_fn* static function.
+ * Return
+ * 0 on success.
+ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
+ * **-EPERM** if *timer* is in a map that doesn't have any user references.
+ * The user space should either hold a file descriptor to a map with timers
+ * or pin such map in bpffs. When map is unpinned or file descriptor is
+ * closed all timers in the map will be cancelled and freed.
+ *
+ * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags)
+ * Description
+ * Set timer expiration N nanoseconds from the current time. The
+ * configured callback will be invoked in soft irq context on some cpu
+ * and will not repeat unless another bpf_timer_start() is made.
+ * In such case the next invocation can migrate to a different cpu.
+ * Since struct bpf_timer is a field inside map element the map
+ * owns the timer. The bpf_timer_set_callback() will increment refcnt
+ * of BPF program to make sure that callback_fn code stays valid.
+ * When user space reference to a map reaches zero all timers
+ * in a map are cancelled and corresponding program's refcnts are
+ * decremented. This is done to make sure that Ctrl-C of a user
+ * process doesn't leave any timers running. If map is pinned in
+ * bpffs the callback_fn can re-arm itself indefinitely.
+ * bpf_map_update/delete_elem() helpers and user space sys_bpf commands
+ * cancel and free the timer in the given map element.
+ * The map can contain timers that invoke callback_fn-s from different
+ * programs. The same callback_fn can serve different timers from
+ * different maps if key/value layout matches across maps.
+ * Every bpf_timer_set_callback() can have different callback_fn.
+ *
+ * Return
+ * 0 on success.
+ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier
+ * or invalid *flags* are passed.
+ *
+ * long bpf_timer_cancel(struct bpf_timer *timer)
+ * Description
+ * Cancel the timer and wait for callback_fn to finish if it was running.
+ * Return
+ * 0 if the timer was not active.
+ * 1 if the timer was active.
+ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
+ * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its
+ * own timer which would have led to a deadlock otherwise.
+ *
+ * u64 bpf_get_func_ip(void *ctx)
+ * Description
+ * Get address of the traced function (for tracing and kprobe programs).
+ * Return
+ * Address of the traced function.
+ *
+ * u64 bpf_get_attach_cookie(void *ctx)
+ * Description
+ * Get bpf_cookie value provided (optionally) during the program
+ * attachment. It might be different for each individual
+ * attachment, even if BPF program itself is the same.
+ * Expects BPF program context *ctx* as a first argument.
+ *
+ * Supported for the following program types:
+ * - kprobe/uprobe;
+ * - tracepoint;
+ * - perf_event.
+ * Return
+ * Value specified by user at BPF link creation/attachment time
+ * or 0, if it was not specified.
+ *
+ * long bpf_task_pt_regs(struct task_struct *task)
+ * Description
+ * Get the struct pt_regs associated with **task**.
+ * Return
+ * A pointer to struct pt_regs.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -4951,6 +5048,13 @@ union bpf_attr {
FN(sys_bpf), \
FN(btf_find_by_name_kind), \
FN(sys_close), \
+ FN(timer_init), \
+ FN(timer_set_callback), \
+ FN(timer_start), \
+ FN(timer_cancel), \
+ FN(get_func_ip), \
+ FN(get_attach_cookie), \
+ FN(task_pt_regs), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6077,6 +6181,11 @@ struct bpf_spin_lock {
__u32 val;
};
+struct bpf_timer {
+ __u64 :64;
+ __u64 :64;
+} __attribute__((aligned(8)));
+
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 22cd037123fa..d7d3cfead056 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -288,6 +288,7 @@ struct btrfs_ioctl_fs_info_args {
* first mount when booting older kernel versions.
*/
#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID (1ULL << 1)
+#define BTRFS_FEATURE_COMPAT_RO_VERITY (1ULL << 2)
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index ccdb40fe40dc..e1c4c732aaba 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -118,6 +118,29 @@
#define BTRFS_INODE_REF_KEY 12
#define BTRFS_INODE_EXTREF_KEY 13
#define BTRFS_XATTR_ITEM_KEY 24
+
+/*
+ * fs verity items are stored under two different key types on disk.
+ * The descriptor items:
+ * [ inode objectid, BTRFS_VERITY_DESC_ITEM_KEY, offset ]
+ *
+ * At offset 0, we store a btrfs_verity_descriptor_item which tracks the size
+ * of the descriptor item and some extra data for encryption.
+ * Starting at offset 1, these hold the generic fs verity descriptor. The
+ * latter are opaque to btrfs, we just read and write them as a blob for the
+ * higher level verity code. The most common descriptor size is 256 bytes.
+ *
+ * The merkle tree items:
+ * [ inode objectid, BTRFS_VERITY_MERKLE_ITEM_KEY, offset ]
+ *
+ * These also start at offset 0, and correspond to the merkle tree bytes. When
+ * fsverity asks for page 0 of the merkle tree, we pull up one page starting at
+ * offset 0 for this key type. These are also opaque to btrfs, we're blindly
+ * storing whatever fsverity sends down.
+ */
+#define BTRFS_VERITY_DESC_ITEM_KEY 36
+#define BTRFS_VERITY_MERKLE_ITEM_KEY 37
+
#define BTRFS_ORPHAN_ITEM_KEY 48
/* reserve 2-15 close to the inode for later flexibility */
@@ -991,4 +1014,16 @@ struct btrfs_qgroup_limit_item {
__le64 rsv_excl;
} __attribute__ ((__packed__));
+struct btrfs_verity_descriptor_item {
+ /* Size of the verity descriptor in bytes */
+ __le64 size;
+ /*
+ * When we implement support for fscrypt, we will need to encrypt the
+ * Merkle tree for encrypted verity files. These 128 bits are for the
+ * eventual storage of an fscrypt initialization vector.
+ */
+ __le64 reserved[2];
+ __u8 encryption;
+} __attribute__ ((__packed__));
+
#endif /* _BTRFS_CTREE_H_ */
diff --git a/include/uapi/linux/can/j1939.h b/include/uapi/linux/can/j1939.h
index df6e821075c1..38936460f668 100644
--- a/include/uapi/linux/can/j1939.h
+++ b/include/uapi/linux/can/j1939.h
@@ -78,11 +78,20 @@ enum {
enum {
J1939_NLA_PAD,
J1939_NLA_BYTES_ACKED,
+ J1939_NLA_TOTAL_SIZE,
+ J1939_NLA_PGN,
+ J1939_NLA_SRC_NAME,
+ J1939_NLA_DEST_NAME,
+ J1939_NLA_SRC_ADDR,
+ J1939_NLA_DEST_ADDR,
};
enum {
J1939_EE_INFO_NONE,
J1939_EE_INFO_TX_ABORT,
+ J1939_EE_INFO_RX_RTS,
+ J1939_EE_INFO_RX_DPO,
+ J1939_EE_INFO_RX_ABORT,
};
struct j1939_filter {
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index e5c6e458bdf7..c12ce30b52df 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -376,4 +376,10 @@ enum {
*/
#define DM_INTERNAL_SUSPEND_FLAG (1 << 18) /* Out */
+/*
+ * If set, returns in the in buffer passed by UM, the raw table information
+ * that would be measured by IMA subsystem on device state change.
+ */
+#define DM_IMA_MEASUREMENT_FLAG (1 << 19) /* In */
+
#endif /* _LINUX_DM_IOCTL_H */
diff --git a/drivers/staging/media/av7110/audio.h b/include/uapi/linux/dvb/audio.h
index 2f869da69171..2f869da69171 100644
--- a/drivers/staging/media/av7110/audio.h
+++ b/include/uapi/linux/dvb/audio.h
diff --git a/drivers/staging/media/av7110/osd.h b/include/uapi/linux/dvb/osd.h
index 858997c74043..858997c74043 100644
--- a/drivers/staging/media/av7110/osd.h
+++ b/include/uapi/linux/dvb/osd.h
diff --git a/drivers/staging/media/av7110/video.h b/include/uapi/linux/dvb/video.h
index 179f1ec60af6..179f1ec60af6 100644
--- a/drivers/staging/media/av7110/video.h
+++ b/include/uapi/linux/dvb/video.h
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 67aa7134b301..b6db6590baf0 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -639,6 +639,8 @@ enum ethtool_link_ext_substate_link_logical_mismatch {
enum ethtool_link_ext_substate_bad_signal_integrity {
ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1,
ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE,
+ ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST,
+ ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS,
};
/* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index b3b93710eff7..5545f1ca9237 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -377,6 +377,8 @@ enum {
ETHTOOL_A_COALESCE_TX_USECS_HIGH, /* u32 */
ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, /* u32 */
ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, /* u32 */
+ ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, /* u8 */
+ ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, /* u8 */
/* add new constants above here */
__ETHTOOL_A_COALESCE_CNT,
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index fbf9c5c7dd59..64553df9d735 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -51,6 +51,7 @@
#define FAN_ENABLE_AUDIT 0x00000040
/* Flags to determine fanotify event format */
+#define FAN_REPORT_PIDFD 0x00000080 /* Report pidfd for event->pid */
#define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */
#define FAN_REPORT_FID 0x00000200 /* Report unique file id */
#define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */
@@ -123,6 +124,7 @@ struct fanotify_event_metadata {
#define FAN_EVENT_INFO_TYPE_FID 1
#define FAN_EVENT_INFO_TYPE_DFID_NAME 2
#define FAN_EVENT_INFO_TYPE_DFID 3
+#define FAN_EVENT_INFO_TYPE_PIDFD 4
/* Variable length info record following event metadata */
struct fanotify_event_info_header {
@@ -148,6 +150,15 @@ struct fanotify_event_info_fid {
unsigned char handle[0];
};
+/*
+ * This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD.
+ * It holds a pidfd for the pid that was responsible for generating an event.
+ */
+struct fanotify_event_info_pidfd {
+ struct fanotify_event_info_header hdr;
+ __s32 pidfd;
+};
+
struct fanotify_response {
__s32 fd;
__u32 response;
@@ -160,6 +171,8 @@ struct fanotify_response {
/* No fd set in event */
#define FAN_NOFD -1
+#define FAN_NOPIDFD FAN_NOFD
+#define FAN_EPIDFD -2
/* Helper functions to deal with fanotify_event_metadata buffers */
#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata))
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 4c32e97dcdf0..bdf7b404b3e7 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -184,6 +184,7 @@ struct fsxattr {
#define BLKSECDISCARD _IO(0x12,125)
#define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127)
+#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
/*
* A jump here: 130-136 are reserved for zoned block devices
* (see uapi/linux/blkzoned.h)
diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h
index c3cc5a9e5eaf..4783af9fe520 100644
--- a/include/uapi/linux/if_arp.h
+++ b/include/uapi/linux/if_arp.h
@@ -54,6 +54,7 @@
#define ARPHRD_X25 271 /* CCITT X.25 */
#define ARPHRD_HWX25 272 /* Boards with X.25 in firmware */
#define ARPHRD_CAN 280 /* Controller Area Network */
+#define ARPHRD_MCTP 290
#define ARPHRD_PPP 512
#define ARPHRD_CISCO 513 /* Cisco HDLC */
#define ARPHRD_HDLC ARPHRD_CISCO
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 6b56a7549531..2711c3522010 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -479,16 +479,22 @@ enum {
/* flags used in BRIDGE_VLANDB_DUMP_FLAGS attribute to affect dumps */
#define BRIDGE_VLANDB_DUMPF_STATS (1 << 0) /* Include stats in the dump */
+#define BRIDGE_VLANDB_DUMPF_GLOBAL (1 << 1) /* Dump global vlan options only */
/* Bridge vlan RTM attributes
* [BRIDGE_VLANDB_ENTRY] = {
* [BRIDGE_VLANDB_ENTRY_INFO]
* ...
* }
+ * [BRIDGE_VLANDB_GLOBAL_OPTIONS] = {
+ * [BRIDGE_VLANDB_GOPTS_ID]
+ * ...
+ * }
*/
enum {
BRIDGE_VLANDB_UNSPEC,
BRIDGE_VLANDB_ENTRY,
+ BRIDGE_VLANDB_GLOBAL_OPTIONS,
__BRIDGE_VLANDB_MAX,
};
#define BRIDGE_VLANDB_MAX (__BRIDGE_VLANDB_MAX - 1)
@@ -500,6 +506,7 @@ enum {
BRIDGE_VLANDB_ENTRY_STATE,
BRIDGE_VLANDB_ENTRY_TUNNEL_INFO,
BRIDGE_VLANDB_ENTRY_STATS,
+ BRIDGE_VLANDB_ENTRY_MCAST_ROUTER,
__BRIDGE_VLANDB_ENTRY_MAX,
};
#define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1)
@@ -538,6 +545,29 @@ enum {
};
#define BRIDGE_VLANDB_STATS_MAX (__BRIDGE_VLANDB_STATS_MAX - 1)
+enum {
+ BRIDGE_VLANDB_GOPTS_UNSPEC,
+ BRIDGE_VLANDB_GOPTS_ID,
+ BRIDGE_VLANDB_GOPTS_RANGE,
+ BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING,
+ BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION,
+ BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION,
+ BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT,
+ BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT,
+ BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL,
+ BRIDGE_VLANDB_GOPTS_PAD,
+ BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL,
+ BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL,
+ BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL,
+ BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL,
+ BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL,
+ BRIDGE_VLANDB_GOPTS_MCAST_QUERIER,
+ BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS,
+ BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE,
+ __BRIDGE_VLANDB_GOPTS_MAX
+};
+#define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1)
+
/* Bridge multicast database attributes
* [MDBA_MDB] = {
* [MDBA_MDB_ENTRY] = {
@@ -629,6 +659,7 @@ enum {
MDBA_ROUTER_PATTR_TYPE,
MDBA_ROUTER_PATTR_INET_TIMER,
MDBA_ROUTER_PATTR_INET6_TIMER,
+ MDBA_ROUTER_PATTR_VID,
__MDBA_ROUTER_PATTR_MAX
};
#define MDBA_ROUTER_PATTR_MAX (__MDBA_ROUTER_PATTR_MAX - 1)
@@ -720,12 +751,14 @@ struct br_mcast_stats {
/* bridge boolean options
* BR_BOOLOPT_NO_LL_LEARN - disable learning from link-local packets
+ * BR_BOOLOPT_MCAST_VLAN_SNOOPING - control vlan multicast snooping
*
* IMPORTANT: if adding a new option do not forget to handle
* it in br_boolopt_toggle/get and bridge sysfs
*/
enum br_boolopt_id {
BR_BOOLOPT_NO_LL_LEARN,
+ BR_BOOLOPT_MCAST_VLAN_SNOOPING,
BR_BOOLOPT_MAX
};
@@ -738,4 +771,17 @@ struct br_boolopt_multi {
__u32 optval;
__u32 optmask;
};
+
+enum {
+ BRIDGE_QUERIER_UNSPEC,
+ BRIDGE_QUERIER_IP_ADDRESS,
+ BRIDGE_QUERIER_IP_PORT,
+ BRIDGE_QUERIER_IP_OTHER_TIMER,
+ BRIDGE_QUERIER_PAD,
+ BRIDGE_QUERIER_IPV6_ADDRESS,
+ BRIDGE_QUERIER_IPV6_PORT,
+ BRIDGE_QUERIER_IPV6_OTHER_TIMER,
+ __BRIDGE_QUERIER_MAX
+};
+#define BRIDGE_QUERIER_MAX (__BRIDGE_QUERIER_MAX - 1)
#endif /* _UAPI_LINUX_IF_BRIDGE_H */
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index a0b637911d3c..5f589c7a8382 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -151,6 +151,9 @@
#define ETH_P_MAP 0x00F9 /* Qualcomm multiplexing and
* aggregation protocol
*/
+#define ETH_P_MCTP 0x00FA /* Management component transport
+ * protocol packets
+ */
/*
* This is an Ethernet frame header.
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 4882e81514b6..eebd3894fe89 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -417,6 +417,7 @@ enum {
IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */
IFLA_INET6_TOKEN, /* device token */
IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */
+ IFLA_INET6_RA_MTU, /* mtu carried in the RA message */
__IFLA_INET6_MAX
};
@@ -479,6 +480,7 @@ enum {
IFLA_BR_MCAST_MLD_VERSION,
IFLA_BR_VLAN_STATS_PER_PORT,
IFLA_BR_MULTI_BOOLOPT,
+ IFLA_BR_MCAST_QUERIER_STATE,
__IFLA_BR_MAX,
};
@@ -855,6 +857,7 @@ enum {
IFLA_BOND_AD_ACTOR_SYSTEM,
IFLA_BOND_TLB_DYNAMIC_LB,
IFLA_BOND_PEER_NOTIF_DELAY,
+ IFLA_BOND_AD_LACP_ACTIVE,
__IFLA_BOND_MAX,
};
@@ -1260,4 +1263,14 @@ struct ifla_rmnet_flags {
__u32 mask;
};
+/* MCTP section */
+
+enum {
+ IFLA_MCTP_UNSPEC,
+ IFLA_MCTP_NET,
+ __IFLA_MCTP_MAX,
+};
+
+#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1)
+
#endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index d1b327036ae4..14168225cecd 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -188,11 +188,22 @@ struct ip_mreq_source {
};
struct ip_msfilter {
- __be32 imsf_multiaddr;
- __be32 imsf_interface;
- __u32 imsf_fmode;
- __u32 imsf_numsrc;
- __be32 imsf_slist[1];
+ union {
+ struct {
+ __be32 imsf_multiaddr_aux;
+ __be32 imsf_interface_aux;
+ __u32 imsf_fmode_aux;
+ __u32 imsf_numsrc_aux;
+ __be32 imsf_slist[1];
+ };
+ struct {
+ __be32 imsf_multiaddr;
+ __be32 imsf_interface;
+ __u32 imsf_fmode;
+ __u32 imsf_numsrc;
+ __be32 imsf_slist_flex[];
+ };
+ };
};
#define IP_MSFILTER_SIZE(numsrc) \
@@ -211,11 +222,22 @@ struct group_source_req {
};
struct group_filter {
- __u32 gf_interface; /* interface index */
- struct __kernel_sockaddr_storage gf_group; /* multicast address */
- __u32 gf_fmode; /* filter mode */
- __u32 gf_numsrc; /* number of sources */
- struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */
+ union {
+ struct {
+ __u32 gf_interface_aux; /* interface index */
+ struct __kernel_sockaddr_storage gf_group_aux; /* multicast address */
+ __u32 gf_fmode_aux; /* filter mode */
+ __u32 gf_numsrc_aux; /* number of sources */
+ struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */
+ };
+ struct {
+ __u32 gf_interface; /* interface index */
+ struct __kernel_sockaddr_storage gf_group; /* multicast address */
+ __u32 gf_fmode; /* filter mode */
+ __u32 gf_numsrc; /* number of sources */
+ struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */
+ };
+ };
};
#define GROUP_FILTER_SIZE(numsrc) \
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index 5ad396a57eb3..c4c53a9ab959 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -145,6 +145,7 @@ struct in6_flowlabel_req {
#define IPV6_TLV_PADN 1
#define IPV6_TLV_ROUTERALERT 5
#define IPV6_TLV_CALIPSO 7 /* RFC 5570 */
+#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */
#define IPV6_TLV_JUMBO 194
#define IPV6_TLV_HAO 201 /* home address option */
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 79126d5cd289..59ef35154e3d 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -44,6 +44,7 @@ struct io_uring_sqe {
__u32 splice_flags;
__u32 rename_flags;
__u32 unlink_flags;
+ __u32 hardlink_flags;
};
__u64 user_data; /* data to be passed back at completion time */
/* pack this to avoid bogus arm OABI complaints */
@@ -55,7 +56,10 @@ struct io_uring_sqe {
} __attribute__((packed));
/* personality to use, if used */
__u16 personality;
- __s32 splice_fd_in;
+ union {
+ __s32 splice_fd_in;
+ __u32 file_index;
+ };
__u64 __pad2[2];
};
@@ -133,6 +137,9 @@ enum {
IORING_OP_SHUTDOWN,
IORING_OP_RENAMEAT,
IORING_OP_UNLINKAT,
+ IORING_OP_MKDIRAT,
+ IORING_OP_SYMLINKAT,
+ IORING_OP_LINKAT,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -146,9 +153,13 @@ enum {
/*
* sqe->timeout_flags
*/
-#define IORING_TIMEOUT_ABS (1U << 0)
-#define IORING_TIMEOUT_UPDATE (1U << 1)
-
+#define IORING_TIMEOUT_ABS (1U << 0)
+#define IORING_TIMEOUT_UPDATE (1U << 1)
+#define IORING_TIMEOUT_BOOTTIME (1U << 2)
+#define IORING_TIMEOUT_REALTIME (1U << 3)
+#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
+#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
+#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
/*
* sqe->splice_flags
* extends splice(2) flags
@@ -306,6 +317,9 @@ enum {
IORING_REGISTER_IOWQ_AFF = 17,
IORING_UNREGISTER_IOWQ_AFF = 18,
+ /* set/get max number of workers */
+ IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
+
/* this goes last */
IORING_REGISTER_LAST
};
diff --git a/include/uapi/linux/ioam6.h b/include/uapi/linux/ioam6.h
new file mode 100644
index 000000000000..ac4de376f0ce
--- /dev/null
+++ b/include/uapi/linux/ioam6.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * IPv6 IOAM implementation
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#ifndef _UAPI_LINUX_IOAM6_H
+#define _UAPI_LINUX_IOAM6_H
+
+#include <asm/byteorder.h>
+#include <linux/types.h>
+
+#define IOAM6_U16_UNAVAILABLE U16_MAX
+#define IOAM6_U32_UNAVAILABLE U32_MAX
+#define IOAM6_U64_UNAVAILABLE U64_MAX
+
+#define IOAM6_DEFAULT_ID (IOAM6_U32_UNAVAILABLE >> 8)
+#define IOAM6_DEFAULT_ID_WIDE (IOAM6_U64_UNAVAILABLE >> 8)
+#define IOAM6_DEFAULT_IF_ID IOAM6_U16_UNAVAILABLE
+#define IOAM6_DEFAULT_IF_ID_WIDE IOAM6_U32_UNAVAILABLE
+
+/*
+ * IPv6 IOAM Option Header
+ */
+struct ioam6_hdr {
+ __u8 opt_type;
+ __u8 opt_len;
+ __u8 :8; /* reserved */
+#define IOAM6_TYPE_PREALLOC 0
+ __u8 type;
+} __attribute__((packed));
+
+/*
+ * IOAM Trace Header
+ */
+struct ioam6_trace_hdr {
+ __be16 namespace_id;
+
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+
+ __u8 :1, /* unused */
+ :1, /* unused */
+ overflow:1,
+ nodelen:5;
+
+ __u8 remlen:7,
+ :1; /* unused */
+
+ union {
+ __be32 type_be32;
+
+ struct {
+ __u32 bit7:1,
+ bit6:1,
+ bit5:1,
+ bit4:1,
+ bit3:1,
+ bit2:1,
+ bit1:1,
+ bit0:1,
+ bit15:1, /* unused */
+ bit14:1, /* unused */
+ bit13:1, /* unused */
+ bit12:1, /* unused */
+ bit11:1,
+ bit10:1,
+ bit9:1,
+ bit8:1,
+ bit23:1, /* reserved */
+ bit22:1,
+ bit21:1, /* unused */
+ bit20:1, /* unused */
+ bit19:1, /* unused */
+ bit18:1, /* unused */
+ bit17:1, /* unused */
+ bit16:1, /* unused */
+ :8; /* reserved */
+ } type;
+ };
+
+#elif defined(__BIG_ENDIAN_BITFIELD)
+
+ __u8 nodelen:5,
+ overflow:1,
+ :1, /* unused */
+ :1; /* unused */
+
+ __u8 :1, /* unused */
+ remlen:7;
+
+ union {
+ __be32 type_be32;
+
+ struct {
+ __u32 bit0:1,
+ bit1:1,
+ bit2:1,
+ bit3:1,
+ bit4:1,
+ bit5:1,
+ bit6:1,
+ bit7:1,
+ bit8:1,
+ bit9:1,
+ bit10:1,
+ bit11:1,
+ bit12:1, /* unused */
+ bit13:1, /* unused */
+ bit14:1, /* unused */
+ bit15:1, /* unused */
+ bit16:1, /* unused */
+ bit17:1, /* unused */
+ bit18:1, /* unused */
+ bit19:1, /* unused */
+ bit20:1, /* unused */
+ bit21:1, /* unused */
+ bit22:1,
+ bit23:1, /* reserved */
+ :8; /* reserved */
+ } type;
+ };
+
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+
+#define IOAM6_TRACE_DATA_SIZE_MAX 244
+ __u8 data[0];
+} __attribute__((packed));
+
+#endif /* _UAPI_LINUX_IOAM6_H */
diff --git a/include/uapi/linux/ioam6_genl.h b/include/uapi/linux/ioam6_genl.h
new file mode 100644
index 000000000000..ca4b22833754
--- /dev/null
+++ b/include/uapi/linux/ioam6_genl.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * IPv6 IOAM Generic Netlink API
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#ifndef _UAPI_LINUX_IOAM6_GENL_H
+#define _UAPI_LINUX_IOAM6_GENL_H
+
+#define IOAM6_GENL_NAME "IOAM6"
+#define IOAM6_GENL_VERSION 0x1
+
+enum {
+ IOAM6_ATTR_UNSPEC,
+
+ IOAM6_ATTR_NS_ID, /* u16 */
+ IOAM6_ATTR_NS_DATA, /* u32 */
+ IOAM6_ATTR_NS_DATA_WIDE,/* u64 */
+
+#define IOAM6_MAX_SCHEMA_DATA_LEN (255 * 4)
+ IOAM6_ATTR_SC_ID, /* u32 */
+ IOAM6_ATTR_SC_DATA, /* Binary */
+ IOAM6_ATTR_SC_NONE, /* Flag */
+
+ IOAM6_ATTR_PAD,
+
+ __IOAM6_ATTR_MAX,
+};
+
+#define IOAM6_ATTR_MAX (__IOAM6_ATTR_MAX - 1)
+
+enum {
+ IOAM6_CMD_UNSPEC,
+
+ IOAM6_CMD_ADD_NAMESPACE,
+ IOAM6_CMD_DEL_NAMESPACE,
+ IOAM6_CMD_DUMP_NAMESPACES,
+
+ IOAM6_CMD_ADD_SCHEMA,
+ IOAM6_CMD_DEL_SCHEMA,
+ IOAM6_CMD_DUMP_SCHEMAS,
+
+ IOAM6_CMD_NS_SET_SCHEMA,
+
+ __IOAM6_CMD_MAX,
+};
+
+#define IOAM6_CMD_MAX (__IOAM6_CMD_MAX - 1)
+
+#endif /* _UAPI_LINUX_IOAM6_GENL_H */
diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h
new file mode 100644
index 000000000000..bae14636a8c8
--- /dev/null
+++ b/include/uapi/linux/ioam6_iptunnel.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * IPv6 IOAM Lightweight Tunnel API
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#ifndef _UAPI_LINUX_IOAM6_IPTUNNEL_H
+#define _UAPI_LINUX_IOAM6_IPTUNNEL_H
+
+enum {
+ IOAM6_IPTUNNEL_UNSPEC,
+ IOAM6_IPTUNNEL_TRACE, /* struct ioam6_trace_hdr */
+ __IOAM6_IPTUNNEL_MAX,
+};
+
+#define IOAM6_IPTUNNEL_MAX (__IOAM6_IPTUNNEL_MAX - 1)
+
+#endif /* _UAPI_LINUX_IOAM6_IPTUNNEL_H */
diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h
new file mode 100644
index 000000000000..f70f2596a6bf
--- /dev/null
+++ b/include/uapi/linux/ioprio.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_IOPRIO_H
+#define _UAPI_LINUX_IOPRIO_H
+
+/*
+ * Gives us 8 prio classes with 13-bits of data for each class
+ */
+#define IOPRIO_CLASS_SHIFT 13
+#define IOPRIO_CLASS_MASK 0x07
+#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1)
+
+#define IOPRIO_PRIO_CLASS(ioprio) \
+ (((ioprio) >> IOPRIO_CLASS_SHIFT) & IOPRIO_CLASS_MASK)
+#define IOPRIO_PRIO_DATA(ioprio) ((ioprio) & IOPRIO_PRIO_MASK)
+#define IOPRIO_PRIO_VALUE(class, data) \
+ ((((class) & IOPRIO_CLASS_MASK) << IOPRIO_CLASS_SHIFT) | \
+ ((data) & IOPRIO_PRIO_MASK))
+
+/*
+ * These are the io priority groups as implemented by the BFQ and mq-deadline
+ * schedulers. RT is the realtime class, it always gets premium service. For
+ * ATA disks supporting NCQ IO priority, RT class IOs will be processed using
+ * high priority NCQ commands. BE is the best-effort scheduling class, the
+ * default for any process. IDLE is the idle scheduling class, it is only
+ * served when no one else is using the disk.
+ */
+enum {
+ IOPRIO_CLASS_NONE,
+ IOPRIO_CLASS_RT,
+ IOPRIO_CLASS_BE,
+ IOPRIO_CLASS_IDLE,
+};
+
+/*
+ * The RT and BE priority classes both support up to 8 priority levels.
+ */
+#define IOPRIO_NR_LEVELS 8
+#define IOPRIO_BE_NR IOPRIO_NR_LEVELS
+
+enum {
+ IOPRIO_WHO_PROCESS = 1,
+ IOPRIO_WHO_PGRP,
+ IOPRIO_WHO_USER,
+};
+
+/*
+ * Fallback BE priority level.
+ */
+#define IOPRIO_NORM 4
+#define IOPRIO_BE_NORM IOPRIO_NORM
+
+#endif /* _UAPI_LINUX_IOPRIO_H */
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 70603775fe91..b243a53fa985 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -190,6 +190,9 @@ enum {
DEVCONF_NDISC_TCLASS,
DEVCONF_RPL_SEG_ENABLED,
DEVCONF_RA_DEFRTR_METRIC,
+ DEVCONF_IOAM6_ENABLED,
+ DEVCONF_IOAM6_ID,
+ DEVCONF_IOAM6_ID_WIDE,
DEVCONF_MAX
};
diff --git a/include/uapi/linux/ipx.h b/include/uapi/linux/ipx.h
deleted file mode 100644
index 3168137adae8..000000000000
--- a/include/uapi/linux/ipx.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _IPX_H_
-#define _IPX_H_
-#include <linux/libc-compat.h> /* for compatibility with glibc netipx/ipx.h */
-#include <linux/types.h>
-#include <linux/sockios.h>
-#include <linux/socket.h>
-#define IPX_NODE_LEN 6
-#define IPX_MTU 576
-
-#if __UAPI_DEF_SOCKADDR_IPX
-struct sockaddr_ipx {
- __kernel_sa_family_t sipx_family;
- __be16 sipx_port;
- __be32 sipx_network;
- unsigned char sipx_node[IPX_NODE_LEN];
- __u8 sipx_type;
- unsigned char sipx_zero; /* 16 byte fill */
-};
-#endif /* __UAPI_DEF_SOCKADDR_IPX */
-
-/*
- * So we can fit the extra info for SIOCSIFADDR into the address nicely
- */
-#define sipx_special sipx_port
-#define sipx_action sipx_zero
-#define IPX_DLTITF 0
-#define IPX_CRTITF 1
-
-#if __UAPI_DEF_IPX_ROUTE_DEFINITION
-struct ipx_route_definition {
- __be32 ipx_network;
- __be32 ipx_router_network;
- unsigned char ipx_router_node[IPX_NODE_LEN];
-};
-#endif /* __UAPI_DEF_IPX_ROUTE_DEFINITION */
-
-#if __UAPI_DEF_IPX_INTERFACE_DEFINITION
-struct ipx_interface_definition {
- __be32 ipx_network;
- unsigned char ipx_device[16];
- unsigned char ipx_dlink_type;
-#define IPX_FRAME_NONE 0
-#define IPX_FRAME_SNAP 1
-#define IPX_FRAME_8022 2
-#define IPX_FRAME_ETHERII 3
-#define IPX_FRAME_8023 4
-#define IPX_FRAME_TR_8022 5 /* obsolete */
- unsigned char ipx_special;
-#define IPX_SPECIAL_NONE 0
-#define IPX_PRIMARY 1
-#define IPX_INTERNAL 2
- unsigned char ipx_node[IPX_NODE_LEN];
-};
-#endif /* __UAPI_DEF_IPX_INTERFACE_DEFINITION */
-
-#if __UAPI_DEF_IPX_CONFIG_DATA
-struct ipx_config_data {
- unsigned char ipxcfg_auto_select_primary;
- unsigned char ipxcfg_auto_create_interfaces;
-};
-#endif /* __UAPI_DEF_IPX_CONFIG_DATA */
-
-/*
- * OLD Route Definition for backward compatibility.
- */
-
-#if __UAPI_DEF_IPX_ROUTE_DEF
-struct ipx_route_def {
- __be32 ipx_network;
- __be32 ipx_router_network;
-#define IPX_ROUTE_NO_ROUTER 0
- unsigned char ipx_router_node[IPX_NODE_LEN];
- unsigned char ipx_device[16];
- unsigned short ipx_flags;
-#define IPX_RT_SNAP 8
-#define IPX_RT_8022 4
-#define IPX_RT_BLUEBOOK 2
-#define IPX_RT_ROUTED 1
-};
-#endif /* __UAPI_DEF_IPX_ROUTE_DEF */
-
-#define SIOCAIPXITFCRT (SIOCPROTOPRIVATE)
-#define SIOCAIPXPRISLT (SIOCPROTOPRIVATE + 1)
-#define SIOCIPXCFGDATA (SIOCPROTOPRIVATE + 2)
-#define SIOCIPXNCPCONN (SIOCPROTOPRIVATE + 3)
-#endif /* _IPX_H_ */
diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h
deleted file mode 100644
index 2745afd9b8fa..000000000000
--- a/include/uapi/linux/lightnvm.h
+++ /dev/null
@@ -1,224 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Copyright (C) 2015 CNEX Labs. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; see the file COPYING. If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
-#ifndef _UAPI_LINUX_LIGHTNVM_H
-#define _UAPI_LINUX_LIGHTNVM_H
-
-#ifdef __KERNEL__
-#include <linux/const.h>
-#else /* __KERNEL__ */
-#include <stdio.h>
-#include <sys/ioctl.h>
-#define DISK_NAME_LEN 32
-#endif /* __KERNEL__ */
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-
-#define NVM_TTYPE_NAME_MAX 48
-#define NVM_TTYPE_MAX 63
-#define NVM_MMTYPE_LEN 8
-
-#define NVM_CTRL_FILE "/dev/lightnvm/control"
-
-struct nvm_ioctl_info_tgt {
- __u32 version[3];
- __u32 reserved;
- char tgtname[NVM_TTYPE_NAME_MAX];
-};
-
-struct nvm_ioctl_info {
- __u32 version[3]; /* in/out - major, minor, patch */
- __u16 tgtsize; /* number of targets */
- __u16 reserved16; /* pad to 4K page */
- __u32 reserved[12];
- struct nvm_ioctl_info_tgt tgts[NVM_TTYPE_MAX];
-};
-
-enum {
- NVM_DEVICE_ACTIVE = 1 << 0,
-};
-
-struct nvm_ioctl_device_info {
- char devname[DISK_NAME_LEN];
- char bmname[NVM_TTYPE_NAME_MAX];
- __u32 bmversion[3];
- __u32 flags;
- __u32 reserved[8];
-};
-
-struct nvm_ioctl_get_devices {
- __u32 nr_devices;
- __u32 reserved[31];
- struct nvm_ioctl_device_info info[31];
-};
-
-struct nvm_ioctl_create_simple {
- __u32 lun_begin;
- __u32 lun_end;
-};
-
-struct nvm_ioctl_create_extended {
- __u16 lun_begin;
- __u16 lun_end;
- __u16 op;
- __u16 rsv;
-};
-
-enum {
- NVM_CONFIG_TYPE_SIMPLE = 0,
- NVM_CONFIG_TYPE_EXTENDED = 1,
-};
-
-struct nvm_ioctl_create_conf {
- __u32 type;
- union {
- struct nvm_ioctl_create_simple s;
- struct nvm_ioctl_create_extended e;
- };
-};
-
-enum {
- NVM_TARGET_FACTORY = 1 << 0, /* Init target in factory mode */
-};
-
-struct nvm_ioctl_create {
- char dev[DISK_NAME_LEN]; /* open-channel SSD device */
- char tgttype[NVM_TTYPE_NAME_MAX]; /* target type name */
- char tgtname[DISK_NAME_LEN]; /* dev to expose target as */
-
- __u32 flags;
-
- struct nvm_ioctl_create_conf conf;
-};
-
-struct nvm_ioctl_remove {
- char tgtname[DISK_NAME_LEN];
-
- __u32 flags;
-};
-
-struct nvm_ioctl_dev_init {
- char dev[DISK_NAME_LEN]; /* open-channel SSD device */
- char mmtype[NVM_MMTYPE_LEN]; /* register to media manager */
-
- __u32 flags;
-};
-
-enum {
- NVM_FACTORY_ERASE_ONLY_USER = 1 << 0, /* erase only blocks used as
- * host blks or grown blks */
- NVM_FACTORY_RESET_HOST_BLKS = 1 << 1, /* remove host blk marks */
- NVM_FACTORY_RESET_GRWN_BBLKS = 1 << 2, /* remove grown blk marks */
- NVM_FACTORY_NR_BITS = 1 << 3, /* stops here */
-};
-
-struct nvm_ioctl_dev_factory {
- char dev[DISK_NAME_LEN];
-
- __u32 flags;
-};
-
-struct nvm_user_vio {
- __u8 opcode;
- __u8 flags;
- __u16 control;
- __u16 nppas;
- __u16 rsvd;
- __u64 metadata;
- __u64 addr;
- __u64 ppa_list;
- __u32 metadata_len;
- __u32 data_len;
- __u64 status;
- __u32 result;
- __u32 rsvd3[3];
-};
-
-struct nvm_passthru_vio {
- __u8 opcode;
- __u8 flags;
- __u8 rsvd[2];
- __u32 nsid;
- __u32 cdw2;
- __u32 cdw3;
- __u64 metadata;
- __u64 addr;
- __u32 metadata_len;
- __u32 data_len;
- __u64 ppa_list;
- __u16 nppas;
- __u16 control;
- __u32 cdw13;
- __u32 cdw14;
- __u32 cdw15;
- __u64 status;
- __u32 result;
- __u32 timeout_ms;
-};
-
-/* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */
-enum {
- /* top level cmds */
- NVM_INFO_CMD = 0x20,
- NVM_GET_DEVICES_CMD,
-
- /* device level cmds */
- NVM_DEV_CREATE_CMD,
- NVM_DEV_REMOVE_CMD,
-
- /* Init a device to support LightNVM media managers */
- NVM_DEV_INIT_CMD,
-
- /* Factory reset device */
- NVM_DEV_FACTORY_CMD,
-
- /* Vector user I/O */
- NVM_DEV_VIO_ADMIN_CMD = 0x41,
- NVM_DEV_VIO_CMD = 0x42,
- NVM_DEV_VIO_USER_CMD = 0x43,
-};
-
-#define NVM_IOCTL 'L' /* 0x4c */
-
-#define NVM_INFO _IOWR(NVM_IOCTL, NVM_INFO_CMD, \
- struct nvm_ioctl_info)
-#define NVM_GET_DEVICES _IOR(NVM_IOCTL, NVM_GET_DEVICES_CMD, \
- struct nvm_ioctl_get_devices)
-#define NVM_DEV_CREATE _IOW(NVM_IOCTL, NVM_DEV_CREATE_CMD, \
- struct nvm_ioctl_create)
-#define NVM_DEV_REMOVE _IOW(NVM_IOCTL, NVM_DEV_REMOVE_CMD, \
- struct nvm_ioctl_remove)
-#define NVM_DEV_INIT _IOW(NVM_IOCTL, NVM_DEV_INIT_CMD, \
- struct nvm_ioctl_dev_init)
-#define NVM_DEV_FACTORY _IOW(NVM_IOCTL, NVM_DEV_FACTORY_CMD, \
- struct nvm_ioctl_dev_factory)
-
-#define NVME_NVM_IOCTL_IO_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_USER_CMD, \
- struct nvm_passthru_vio)
-#define NVME_NVM_IOCTL_ADMIN_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_ADMIN_CMD,\
- struct nvm_passthru_vio)
-#define NVME_NVM_IOCTL_SUBMIT_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_CMD,\
- struct nvm_user_vio)
-
-#define NVM_VERSION_MAJOR 1
-#define NVM_VERSION_MINOR 0
-#define NVM_VERSION_PATCHLEVEL 0
-
-#endif
diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
index 568a4303ccce..2e206919125c 100644
--- a/include/uapi/linux/lwtunnel.h
+++ b/include/uapi/linux/lwtunnel.h
@@ -14,6 +14,7 @@ enum lwtunnel_encap_types {
LWTUNNEL_ENCAP_BPF,
LWTUNNEL_ENCAP_SEG6_LOCAL,
LWTUNNEL_ENCAP_RPL,
+ LWTUNNEL_ENCAP_IOAM6,
__LWTUNNEL_ENCAP_MAX,
};
diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
new file mode 100644
index 000000000000..52b54d13f385
--- /dev/null
+++ b/include/uapi/linux/mctp.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Management Component Transport Protocol (MCTP)
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#ifndef __UAPI_MCTP_H
+#define __UAPI_MCTP_H
+
+#include <linux/types.h>
+
+typedef __u8 mctp_eid_t;
+
+struct mctp_addr {
+ mctp_eid_t s_addr;
+};
+
+struct sockaddr_mctp {
+ unsigned short int smctp_family;
+ int smctp_network;
+ struct mctp_addr smctp_addr;
+ __u8 smctp_type;
+ __u8 smctp_tag;
+};
+
+#define MCTP_NET_ANY 0x0
+
+#define MCTP_ADDR_NULL 0x00
+#define MCTP_ADDR_ANY 0xff
+
+#define MCTP_TAG_MASK 0x07
+#define MCTP_TAG_OWNER 0x08
+
+#endif /* __UAPI_MCTP_H */
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index dd7a166fdf9c..4d93967f8aea 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -73,7 +73,8 @@
#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */
#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */
#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
-#define MOVE_MOUNT__MASK 0x00000077
+#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */
+#define MOVE_MOUNT__MASK 0x00000177
/*
* fsopen() flags.
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 7b05f7102321..f66038b9551f 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -73,6 +73,7 @@ enum {
#define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0)
#define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1)
#define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2)
+#define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3)
enum {
MPTCP_PM_CMD_UNSPEC,
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index dc8b72201f6c..00a60695fa53 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -66,8 +66,11 @@ enum {
#define NUD_NONE 0x00
/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
- and make no address resolution or NUD.
- NUD_PERMANENT also cannot be deleted by garbage collectors.
+ * and make no address resolution or NUD.
+ * NUD_PERMANENT also cannot be deleted by garbage collectors.
+ * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry
+ * states don't make sense and thus are ignored. Such entries don't age and
+ * can roam.
*/
struct nda_cacheinfo {
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index d8484be72fdc..c6e6d7d7d538 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -56,6 +56,7 @@ enum ctattr_type {
CTA_LABELS_MASK,
CTA_SYNPROXY,
CTA_FILTER,
+ CTA_STATUS_MASK,
__CTA_MAX
};
#define CTA_MAX (__CTA_MAX - 1)
diff --git a/include/uapi/linux/netfilter/nfnetlink_hook.h b/include/uapi/linux/netfilter/nfnetlink_hook.h
index 912ec60b26b0..bbcd285b22e1 100644
--- a/include/uapi/linux/netfilter/nfnetlink_hook.h
+++ b/include/uapi/linux/netfilter/nfnetlink_hook.h
@@ -43,6 +43,15 @@ enum nfnl_hook_chain_info_attributes {
};
#define NFNLA_HOOK_INFO_MAX (__NFNLA_HOOK_INFO_MAX - 1)
+enum nfnl_hook_chain_desc_attributes {
+ NFNLA_CHAIN_UNSPEC,
+ NFNLA_CHAIN_TABLE,
+ NFNLA_CHAIN_FAMILY,
+ NFNLA_CHAIN_NAME,
+ __NFNLA_CHAIN_MAX,
+};
+#define NFNLA_CHAIN_MAX (__NFNLA_CHAIN_MAX - 1)
+
/**
* enum nfnl_hook_chaintype - chain type
*
diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h
index 427294dd56a1..e29e8accc4f4 100644
--- a/include/uapi/linux/nfsd/nfsfh.h
+++ b/include/uapi/linux/nfsd/nfsfh.h
@@ -33,7 +33,6 @@ struct nfs_fhbase_old {
/*
* This is the new flexible, extensible style NFSv2/v3/v4 file handle.
- * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
*
* The file handle starts with a sequence of four-byte words.
* The first word contains a version number (1) and three descriptor bytes
diff --git a/include/uapi/linux/nl80211-vnd-intel.h b/include/uapi/linux/nl80211-vnd-intel.h
new file mode 100644
index 000000000000..0bf177b84fd9
--- /dev/null
+++ b/include/uapi/linux/nl80211-vnd-intel.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
+ * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
+ * Copyright (C) 2016-2017 Intel Deutschland GmbH
+ */
+#ifndef __VENDOR_CMD_INTEL_H__
+#define __VENDOR_CMD_INTEL_H__
+
+#define INTEL_OUI 0x001735
+
+/**
+ * enum iwl_mvm_vendor_cmd - supported vendor commands
+ * @IWL_MVM_VENDOR_CMD_GET_CSME_CONN_INFO: reports CSME connection info.
+ * @IWL_MVM_VENDOR_CMD_HOST_GET_OWNERSHIP: asks for ownership on the device.
+ * @IWL_MVM_VENDOR_CMD_ROAMING_FORBIDDEN_EVENT: notifies if roaming is allowed.
+ * It contains a &IWL_MVM_VENDOR_ATTR_ROAMING_FORBIDDEN and a
+ * &IWL_MVM_VENDOR_ATTR_VIF_ADDR attributes.
+ */
+
+enum iwl_mvm_vendor_cmd {
+ IWL_MVM_VENDOR_CMD_GET_CSME_CONN_INFO = 0x2d,
+ IWL_MVM_VENDOR_CMD_HOST_GET_OWNERSHIP = 0x30,
+ IWL_MVM_VENDOR_CMD_ROAMING_FORBIDDEN_EVENT = 0x32,
+};
+
+enum iwl_vendor_auth_akm_mode {
+ IWL_VENDOR_AUTH_OPEN,
+ IWL_VENDOR_AUTH_RSNA = 0x6,
+ IWL_VENDOR_AUTH_RSNA_PSK,
+ IWL_VENDOR_AUTH_SAE = 0x9,
+ IWL_VENDOR_AUTH_MAX,
+};
+
+/**
+ * enum iwl_mvm_vendor_attr - attributes used in vendor commands
+ * @__IWL_MVM_VENDOR_ATTR_INVALID: attribute 0 is invalid
+ * @IWL_MVM_VENDOR_ATTR_VIF_ADDR: interface MAC address
+ * @IWL_MVM_VENDOR_ATTR_ADDR: MAC address
+ * @IWL_MVM_VENDOR_ATTR_SSID: SSID (binary attribute, 0..32 octets)
+ * @IWL_MVM_VENDOR_ATTR_STA_CIPHER: the cipher to use for the station with the
+ * mac address specified in &IWL_MVM_VENDOR_ATTR_ADDR.
+ * @IWL_MVM_VENDOR_ATTR_ROAMING_FORBIDDEN: u8 attribute. Indicates whether
+ * roaming is forbidden or not. Value 1 means roaming is forbidden,
+ * 0 mean roaming is allowed.
+ * @IWL_MVM_VENDOR_ATTR_AUTH_MODE: u32 attribute. Authentication mode type
+ * as specified in &enum iwl_vendor_auth_akm_mode.
+ * @IWL_MVM_VENDOR_ATTR_CHANNEL_NUM: u8 attribute. Contains channel number.
+ * @IWL_MVM_VENDOR_ATTR_BAND: u8 attribute.
+ * 0 for 2.4 GHz band, 1 for 5.2GHz band and 2 for 6GHz band.
+ * @IWL_MVM_VENDOR_ATTR_COLLOC_CHANNEL: u32 attribute. Channel number of
+ * collocated AP. Relevant for 6GHz AP info.
+ * @IWL_MVM_VENDOR_ATTR_COLLOC_ADDR: MAC address of a collocated AP.
+ * Relevant for 6GHz AP info.
+ *
+ * @NUM_IWL_MVM_VENDOR_ATTR: number of vendor attributes
+ * @MAX_IWL_MVM_VENDOR_ATTR: highest vendor attribute number
+
+ */
+enum iwl_mvm_vendor_attr {
+ __IWL_MVM_VENDOR_ATTR_INVALID = 0x00,
+ IWL_MVM_VENDOR_ATTR_VIF_ADDR = 0x02,
+ IWL_MVM_VENDOR_ATTR_ADDR = 0x0a,
+ IWL_MVM_VENDOR_ATTR_SSID = 0x3d,
+ IWL_MVM_VENDOR_ATTR_STA_CIPHER = 0x51,
+ IWL_MVM_VENDOR_ATTR_ROAMING_FORBIDDEN = 0x64,
+ IWL_MVM_VENDOR_ATTR_AUTH_MODE = 0x65,
+ IWL_MVM_VENDOR_ATTR_CHANNEL_NUM = 0x66,
+ IWL_MVM_VENDOR_ATTR_BAND = 0x69,
+ IWL_MVM_VENDOR_ATTR_COLLOC_CHANNEL = 0x70,
+ IWL_MVM_VENDOR_ATTR_COLLOC_ADDR = 0x71,
+
+ NUM_IWL_MVM_VENDOR_ATTR,
+ MAX_IWL_MVM_VENDOR_ATTR = NUM_IWL_MVM_VENDOR_ATTR - 1,
+};
+
+#endif /* __VENDOR_CMD_INTEL_H__ */
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index db474994fa73..c2efea98e060 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1185,6 +1185,21 @@
* passed using %NL80211_ATTR_SAR_SPEC. %NL80211_ATTR_WIPHY is used to
* specify the wiphy index to be applied to.
*
+ * @NL80211_CMD_OBSS_COLOR_COLLISION: This notification is sent out whenever
+ * mac80211/drv detects a bss color collision.
+ *
+ * @NL80211_CMD_COLOR_CHANGE_REQUEST: This command is used to indicate that
+ * userspace wants to change the BSS color.
+ *
+ * @NL80211_CMD_COLOR_CHANGE_STARTED: Notify userland, that a color change has
+ * started
+ *
+ * @NL80211_CMD_COLOR_CHANGE_ABORTED: Notify userland, that the color change has
+ * been aborted
+ *
+ * @NL80211_CMD_COLOR_CHANGE_COMPLETED: Notify userland that the color change
+ * has completed
+ *
* @NL80211_CMD_MAX: highest used command number
* @__NL80211_CMD_AFTER_LAST: internal use
*/
@@ -1417,6 +1432,14 @@ enum nl80211_commands {
NL80211_CMD_SET_SAR_SPECS,
+ NL80211_CMD_OBSS_COLOR_COLLISION,
+
+ NL80211_CMD_COLOR_CHANGE_REQUEST,
+
+ NL80211_CMD_COLOR_CHANGE_STARTED,
+ NL80211_CMD_COLOR_CHANGE_ABORTED,
+ NL80211_CMD_COLOR_CHANGE_COMPLETED,
+
/* add new commands above here */
/* used to define NL80211_CMD_MAX below */
@@ -2560,6 +2583,16 @@ enum nl80211_commands {
* disassoc events to indicate that an immediate reconnect to the AP
* is desired.
*
+ * @NL80211_ATTR_OBSS_COLOR_BITMAP: bitmap of the u64 BSS colors for the
+ * %NL80211_CMD_OBSS_COLOR_COLLISION event.
+ *
+ * @NL80211_ATTR_COLOR_CHANGE_COUNT: u8 attribute specifying the number of TBTT's
+ * until the color switch event.
+ * @NL80211_ATTR_COLOR_CHANGE_COLOR: u8 attribute specifying the color that we are
+ * switching to
+ * @NL80211_ATTR_COLOR_CHANGE_ELEMS: Nested set of attributes containing the IE
+ * information for the time while performing a color switch.
+ *
* @NUM_NL80211_ATTR: total number of nl80211_attrs available
* @NL80211_ATTR_MAX: highest attribute number currently defined
* @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3057,6 +3090,12 @@ enum nl80211_attrs {
NL80211_ATTR_DISABLE_HE,
+ NL80211_ATTR_OBSS_COLOR_BITMAP,
+
+ NL80211_ATTR_COLOR_CHANGE_COUNT,
+ NL80211_ATTR_COLOR_CHANGE_COLOR,
+ NL80211_ATTR_COLOR_CHANGE_ELEMS,
+
/* add attributes here, update the policy in nl80211.c */
__NL80211_ATTR_AFTER_LAST,
@@ -5953,6 +5992,9 @@ enum nl80211_feature_flags {
* frame protection for all management frames exchanged during the
* negotiation and range measurement procedure.
*
+ * @NL80211_EXT_FEATURE_BSS_COLOR: The driver supports BSS color collision
+ * detection and change announcemnts.
+ *
* @NUM_NL80211_EXT_FEATURES: number of extended features.
* @MAX_NL80211_EXT_FEATURES: highest extended feature index.
*/
@@ -6017,6 +6059,7 @@ enum nl80211_ext_feature_index {
NL80211_EXT_FEATURE_SECURE_LTF,
NL80211_EXT_FEATURE_SECURE_RTT,
NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE,
+ NL80211_EXT_FEATURE_BSS_COLOR,
/* add new features before the definition below */
NUM_NL80211_EXT_FEATURES,
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 8d16744edc31..150bcff49b1c 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -70,6 +70,8 @@ enum ovs_datapath_cmd {
* set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on
* %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
* not be sent.
+ * @OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when
+ * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set.
* @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
* datapath. Always present in notifications.
* @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the
@@ -87,6 +89,9 @@ enum ovs_datapath_attr {
OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */
OVS_DP_ATTR_PAD,
OVS_DP_ATTR_MASKS_CACHE_SIZE,
+ OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in
+ * per-cpu dispatch mode
+ */
__OVS_DP_ATTR_MAX
};
@@ -127,6 +132,9 @@ struct ovs_vport_stats {
/* Allow tc offload recirc sharing */
#define OVS_DP_F_TC_RECIRC_SHARING (1 << 2)
+/* Allow per-cpu dispatch of upcalls */
+#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3)
+
/* Fixed logical ports. */
#define OVSP_LOCAL ((__u32)0)
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 025c40fef93d..6836ccb9c45d 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -22,6 +22,7 @@ enum {
__TCA_ACT_MAX
};
+/* See other TCA_ACT_FLAGS_ * flags in include/net/act_api.h. */
#define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for
* actions stats.
*/
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 967d9c55323d..964c41ed303e 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -213,6 +213,7 @@ struct prctl_mm_map {
/* Speculation control variants */
# define PR_SPEC_STORE_BYPASS 0
# define PR_SPEC_INDIRECT_BRANCH 1
+# define PR_SPEC_L1D_FLUSH 2
/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
# define PR_SPEC_NOT_AFFECTED 0
# define PR_SPEC_PRCTL (1UL << 0)
diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h
index c3409c8ec0dd..eb0a9a5b6e71 100644
--- a/include/uapi/linux/socket.h
+++ b/include/uapi/linux/socket.h
@@ -26,4 +26,9 @@ struct __kernel_sockaddr_storage {
};
};
+#define SOCK_SNDBUF_LOCK 1
+#define SOCK_RCVBUF_LOCK 2
+
+#define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK)
+
#endif /* _UAPI_LINUX_SOCKET_H */
diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h
index c525b3503797..af6ef2cfbf3d 100644
--- a/include/uapi/linux/tc_act/tc_skbmod.h
+++ b/include/uapi/linux/tc_act/tc_skbmod.h
@@ -17,6 +17,7 @@
#define SKBMOD_F_SMAC 0x2
#define SKBMOD_F_ETYPE 0x4
#define SKBMOD_F_SWAPMAC 0x8
+#define SKBMOD_F_ECN 0x10
struct tc_skbmod {
tc_gen;
diff --git a/include/uapi/linux/virtio_i2c.h b/include/uapi/linux/virtio_i2c.h
new file mode 100644
index 000000000000..7c6a6fc01ad6
--- /dev/null
+++ b/include/uapi/linux/virtio_i2c.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later WITH Linux-syscall-note */
+/*
+ * Definitions for virtio I2C Adpter
+ *
+ * Copyright (c) 2021 Intel Corporation. All rights reserved.
+ */
+
+#ifndef _UAPI_LINUX_VIRTIO_I2C_H
+#define _UAPI_LINUX_VIRTIO_I2C_H
+
+#include <linux/const.h>
+#include <linux/types.h>
+
+/* The bit 0 of the @virtio_i2c_out_hdr.@flags, used to group the requests */
+#define VIRTIO_I2C_FLAGS_FAIL_NEXT _BITUL(0)
+
+/**
+ * struct virtio_i2c_out_hdr - the virtio I2C message OUT header
+ * @addr: the controlled device address
+ * @padding: used to pad to full dword
+ * @flags: used for feature extensibility
+ */
+struct virtio_i2c_out_hdr {
+ __le16 addr;
+ __le16 padding;
+ __le32 flags;
+};
+
+/**
+ * struct virtio_i2c_in_hdr - the virtio I2C message IN header
+ * @status: the processing result from the backend
+ */
+struct virtio_i2c_in_hdr {
+ __u8 status;
+};
+
+/* The final status written by the device */
+#define VIRTIO_I2C_MSG_OK 0
+#define VIRTIO_I2C_MSG_ERR 1
+
+#endif /* _UAPI_LINUX_VIRTIO_I2C_H */
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 70a8057ad4bb..99aa27b100bc 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -55,6 +55,7 @@
#define VIRTIO_ID_FS 26 /* virtio filesystem */
#define VIRTIO_ID_PMEM 27 /* virtio pmem */
#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_I2C_ADAPTER 34 /* virtio i2c adapter */
#define VIRTIO_ID_BT 40 /* virtio bluetooth */
/*
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index ffc6a5391bb7..b96c1ea7166d 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -213,6 +213,11 @@ enum {
XFRM_MSG_GETSPDINFO,
#define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO
+ XFRM_MSG_SETDEFAULT,
+#define XFRM_MSG_SETDEFAULT XFRM_MSG_SETDEFAULT
+ XFRM_MSG_GETDEFAULT,
+#define XFRM_MSG_GETDEFAULT XFRM_MSG_GETDEFAULT
+
XFRM_MSG_MAPPING,
#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING
__XFRM_MSG_MAX
@@ -508,6 +513,12 @@ struct xfrm_user_offload {
#define XFRM_OFFLOAD_IPV6 1
#define XFRM_OFFLOAD_INBOUND 2
+struct xfrm_userpolicy_default {
+#define XFRM_USERPOLICY_DIRMASK_MAX (sizeof(__u8) * 8)
+ __u8 dirmask;
+ __u8 action;
+};
+
#ifndef __KERNEL__
/* backwards compatibility for userspace */
#define XFRMGRP_ACQUIRE 1
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 74aede860de7..b691d6891e51 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -432,10 +432,6 @@ retry:
printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");
printk_all_partitions();
-#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
- printk("DEBUG_BLOCK_EXT_DEVT is enabled, you need to specify "
- "explicit textual name for \"root=\" boot option.\n");
-#endif
panic("VFS: Unable to mount root fs on %s", b);
}
if (!(flags & SB_RDONLY)) {
diff --git a/init/init_task.c b/init/init_task.c
index 562f2ef8d157..2d024066e27b 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -80,6 +80,7 @@ struct task_struct init_task
.normal_prio = MAX_PRIO - 20,
.policy = SCHED_NORMAL,
.cpus_ptr = &init_task.cpus_mask,
+ .user_cpus_ptr = NULL,
.cpus_mask = CPU_MASK_ALL,
.nr_cpus_allowed= NR_CPUS,
.mm = NULL,
diff --git a/init/main.c b/init/main.c
index f5b8246e8aa1..daad6979f782 100644
--- a/init/main.c
+++ b/init/main.c
@@ -397,6 +397,12 @@ static int __init bootconfig_params(char *param, char *val,
return 0;
}
+static int __init warn_bootconfig(char *str)
+{
+ /* The 'bootconfig' has been handled by bootconfig_params(). */
+ return 0;
+}
+
static void __init setup_boot_config(void)
{
static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
@@ -475,9 +481,8 @@ static int __init warn_bootconfig(char *str)
pr_warn("WARNING: 'bootconfig' found on the kernel command line but CONFIG_BOOT_CONFIG is not set.\n");
return 0;
}
-early_param("bootconfig", warn_bootconfig);
-
#endif
+early_param("bootconfig", warn_bootconfig);
/* Change NUL term back to "=", to make "param" the whole string. */
static void __init repair_env_string(char *param, char *val)
@@ -1221,7 +1226,7 @@ trace_initcall_start_cb(void *data, initcall_t fn)
{
ktime_t *calltime = (ktime_t *)data;
- printk(KERN_DEBUG "calling %pS @ %i\n", fn, task_pid_nr(current));
+ printk(KERN_DEBUG "calling %pS @ %i irqs_disabled() %d\n", fn, task_pid_nr(current), irqs_disabled());
*calltime = ktime_get();
}
@@ -1235,8 +1240,8 @@ trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
rettime = ktime_get();
delta = ktime_sub(rettime, *calltime);
duration = (unsigned long long) ktime_to_ns(delta) >> 10;
- printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
- fn, ret, duration);
+ printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs, irqs_disabled() %d\n",
+ fn, ret, duration, irqs_disabled());
}
static ktime_t initcall_calltime;
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 3de8fd11873b..4198f0273ecd 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -251,7 +251,7 @@ config ARCH_USE_QUEUED_RWLOCKS
config QUEUED_RWLOCKS
def_bool y if ARCH_USE_QUEUED_RWLOCKS
- depends on SMP
+ depends on SMP && !PREEMPT_RT
config ARCH_HAS_MMIOWB
bool
diff --git a/kernel/audit.h b/kernel/audit.h
index b565ea16c0a5..d6a2c899a8db 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -6,6 +6,9 @@
* Copyright 2005 IBM Corporation
*/
+#ifndef _KERNEL_AUDIT_H_
+#define _KERNEL_AUDIT_H_
+
#include <linux/fs.h>
#include <linux/audit.h>
#include <linux/skbuff.h>
@@ -331,3 +334,5 @@ extern int audit_filter(int msgtype, unsigned int listtype);
extern void audit_ctl_lock(void);
extern void audit_ctl_unlock(void);
+
+#endif
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index b2be4e978ba3..2cd7b5694422 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -593,7 +593,6 @@ static void prune_tree_chunks(struct audit_tree *victim, bool tagged)
spin_lock(&hash_lock);
}
spin_unlock(&hash_lock);
- put_tree(victim);
}
/*
@@ -602,6 +601,7 @@ static void prune_tree_chunks(struct audit_tree *victim, bool tagged)
static void prune_one(struct audit_tree *victim)
{
prune_tree_chunks(victim, false);
+ put_tree(victim);
}
/* trim the uncommitted chunks from tree */
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index bd04f4a44c01..a82d6de86522 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -29,7 +29,7 @@ config BPF_SYSCALL
select IRQ_WORK
select TASKS_TRACE_RCU
select BINARY_PRINTF
- select NET_SOCK_MSG if INET
+ select NET_SOCK_MSG if NET
default n
help
Enable the bpf() system call that allows to manipulate BPF programs
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 3c4105603f9d..cebd4fb06d19 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -287,6 +287,12 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key
return 0;
}
+static void check_and_free_timer_in_array(struct bpf_array *arr, void *val)
+{
+ if (unlikely(map_value_has_timer(&arr->map)))
+ bpf_timer_cancel_and_free(val + arr->map.timer_off);
+}
+
/* Called from syscall or from eBPF program */
static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
@@ -321,6 +327,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
copy_map_value_locked(map, val, value, false);
else
copy_map_value(map, val, value);
+ check_and_free_timer_in_array(array, val);
}
return 0;
}
@@ -374,6 +381,19 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
return (void *)round_down((unsigned long)array, PAGE_SIZE);
}
+static void array_map_free_timers(struct bpf_map *map)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ int i;
+
+ if (likely(!map_value_has_timer(map)))
+ return;
+
+ for (i = 0; i < array->map.max_entries; i++)
+ bpf_timer_cancel_and_free(array->value + array->elem_size * i +
+ map->timer_off);
+}
+
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
static void array_map_free(struct bpf_map *map)
{
@@ -668,6 +688,7 @@ const struct bpf_map_ops array_map_ops = {
.map_alloc = array_map_alloc,
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
+ .map_release_uref = array_map_free_timers,
.map_lookup_elem = array_map_lookup_elem,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 2d4fbdbb194e..b2ee45064e06 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -360,6 +360,28 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
return supported;
}
+const struct bpf_func_proto *
+bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ const struct bpf_iter_target_info *tinfo;
+ const struct bpf_func_proto *fn = NULL;
+
+ mutex_lock(&targets_mutex);
+ list_for_each_entry(tinfo, &targets, list) {
+ if (tinfo->btf_id == prog->aux->attach_btf_id) {
+ const struct bpf_iter_reg *reg_info;
+
+ reg_info = tinfo->reg_info;
+ if (reg_info->get_func_proto)
+ fn = reg_info->get_func_proto(func_id, prog);
+ break;
+ }
+ }
+ mutex_unlock(&targets_mutex);
+
+ return fn;
+}
+
static void bpf_iter_link_release(struct bpf_link *link)
{
struct bpf_iter_link *iter_link =
@@ -664,7 +686,7 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
rcu_read_lock();
migrate_disable();
- ret = BPF_PROG_RUN(prog, ctx);
+ ret = bpf_prog_run(prog, ctx);
migrate_enable();
rcu_read_unlock();
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 70f6fd4fa305..d6731c32864e 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -28,6 +28,7 @@ struct bpf_struct_ops_value {
struct bpf_struct_ops_map {
struct bpf_map map;
+ struct rcu_head rcu;
const struct bpf_struct_ops *st_ops;
/* protect map_update */
struct mutex lock;
@@ -622,6 +623,14 @@ bool bpf_struct_ops_get(const void *kdata)
return refcount_inc_not_zero(&kvalue->refcnt);
}
+static void bpf_struct_ops_put_rcu(struct rcu_head *head)
+{
+ struct bpf_struct_ops_map *st_map;
+
+ st_map = container_of(head, struct bpf_struct_ops_map, rcu);
+ bpf_map_put(&st_map->map);
+}
+
void bpf_struct_ops_put(const void *kdata)
{
struct bpf_struct_ops_value *kvalue;
@@ -632,6 +641,17 @@ void bpf_struct_ops_put(const void *kdata)
st_map = container_of(kvalue, struct bpf_struct_ops_map,
kvalue);
- bpf_map_put(&st_map->map);
+ /* The struct_ops's function may switch to another struct_ops.
+ *
+ * For example, bpf_tcp_cc_x->init() may switch to
+ * another tcp_cc_y by calling
+ * setsockopt(TCP_CONGESTION, "tcp_cc_y").
+ * During the switch, bpf_struct_ops_put(tcp_cc_x) is called
+ * and its map->refcnt may reach 0 which then free its
+ * trampoline image while tcp_cc_x is still running.
+ *
+ * Thus, a rcu grace period is needed here.
+ */
+ call_rcu(&st_map->rcu, bpf_struct_ops_put_rcu);
}
}
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 3ce75758d394..ebfa8bc90892 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -317,15 +317,13 @@ const struct bpf_map_ops task_storage_map_ops = {
.map_owner_storage_ptr = task_storage_ptr,
};
-BTF_ID_LIST_SINGLE(bpf_task_storage_btf_ids, struct, task_struct)
-
const struct bpf_func_proto bpf_task_storage_get_proto = {
.func = bpf_task_storage_get,
.gpl_only = false,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_BTF_ID,
- .arg2_btf_id = &bpf_task_storage_btf_ids[0],
+ .arg2_btf_id = &btf_task_struct_ids[0],
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
.arg4_type = ARG_ANYTHING,
};
@@ -336,5 +334,5 @@ const struct bpf_func_proto bpf_task_storage_delete_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_BTF_ID,
- .arg2_btf_id = &bpf_task_storage_btf_ids[0],
+ .arg2_btf_id = &btf_task_struct_ids[0],
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index cb4b72997d9b..dfe61df4f974 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3046,43 +3046,92 @@ static void btf_struct_log(struct btf_verifier_env *env,
btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
}
-/* find 'struct bpf_spin_lock' in map value.
- * return >= 0 offset if found
- * and < 0 in case of error
- */
-int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
+static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t,
+ const char *name, int sz, int align)
{
const struct btf_member *member;
u32 i, off = -ENOENT;
- if (!__btf_type_is_struct(t))
- return -EINVAL;
-
for_each_member(i, t, member) {
const struct btf_type *member_type = btf_type_by_id(btf,
member->type);
if (!__btf_type_is_struct(member_type))
continue;
- if (member_type->size != sizeof(struct bpf_spin_lock))
+ if (member_type->size != sz)
continue;
- if (strcmp(__btf_name_by_offset(btf, member_type->name_off),
- "bpf_spin_lock"))
+ if (strcmp(__btf_name_by_offset(btf, member_type->name_off), name))
continue;
if (off != -ENOENT)
- /* only one 'struct bpf_spin_lock' is allowed */
+ /* only one such field is allowed */
return -E2BIG;
off = btf_member_bit_offset(t, member);
if (off % 8)
/* valid C code cannot generate such BTF */
return -EINVAL;
off /= 8;
- if (off % __alignof__(struct bpf_spin_lock))
- /* valid struct bpf_spin_lock will be 4 byte aligned */
+ if (off % align)
+ return -EINVAL;
+ }
+ return off;
+}
+
+static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
+ const char *name, int sz, int align)
+{
+ const struct btf_var_secinfo *vsi;
+ u32 i, off = -ENOENT;
+
+ for_each_vsi(i, t, vsi) {
+ const struct btf_type *var = btf_type_by_id(btf, vsi->type);
+ const struct btf_type *var_type = btf_type_by_id(btf, var->type);
+
+ if (!__btf_type_is_struct(var_type))
+ continue;
+ if (var_type->size != sz)
+ continue;
+ if (vsi->size != sz)
+ continue;
+ if (strcmp(__btf_name_by_offset(btf, var_type->name_off), name))
+ continue;
+ if (off != -ENOENT)
+ /* only one such field is allowed */
+ return -E2BIG;
+ off = vsi->offset;
+ if (off % align)
return -EINVAL;
}
return off;
}
+static int btf_find_field(const struct btf *btf, const struct btf_type *t,
+ const char *name, int sz, int align)
+{
+
+ if (__btf_type_is_struct(t))
+ return btf_find_struct_field(btf, t, name, sz, align);
+ else if (btf_type_is_datasec(t))
+ return btf_find_datasec_var(btf, t, name, sz, align);
+ return -EINVAL;
+}
+
+/* find 'struct bpf_spin_lock' in map value.
+ * return >= 0 offset if found
+ * and < 0 in case of error
+ */
+int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
+{
+ return btf_find_field(btf, t, "bpf_spin_lock",
+ sizeof(struct bpf_spin_lock),
+ __alignof__(struct bpf_spin_lock));
+}
+
+int btf_find_timer(const struct btf *btf, const struct btf_type *t)
+{
+ return btf_find_field(btf, t, "bpf_timer",
+ sizeof(struct bpf_timer),
+ __alignof__(struct bpf_timer));
+}
+
static void __btf_struct_show(const struct btf *btf, const struct btf_type *t,
u32 type_id, void *data, u8 bits_offset,
struct btf_show *show)
@@ -4776,6 +4825,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
if (ctx_arg_info->offset == off) {
+ if (!ctx_arg_info->btf_id) {
+ bpf_log(log,"invalid btf_id for context argument offset %u\n", off);
+ return false;
+ }
+
info->reg_type = ctx_arg_info->reg_type;
info->btf = btf_vmlinux;
info->btf_id = ctx_arg_info->btf_id;
@@ -6159,3 +6213,5 @@ const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = {
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_ANYTHING,
};
+
+BTF_ID_LIST_GLOBAL_SINGLE(btf_task_struct_ids, struct, task_struct)
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index b567ca46555c..03145d45e3d5 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -19,7 +19,7 @@
#include "../cgroup/cgroup-internal.h"
-DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_BPF_ATTACH_TYPE);
+DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE);
EXPORT_SYMBOL(cgroup_bpf_enabled_key);
void cgroup_bpf_offline(struct cgroup *cgrp)
@@ -113,12 +113,12 @@ static void cgroup_bpf_release(struct work_struct *work)
struct list_head *storages = &cgrp->bpf.storages;
struct bpf_cgroup_storage *storage, *stmp;
- unsigned int type;
+ unsigned int atype;
mutex_lock(&cgroup_mutex);
- for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
- struct list_head *progs = &cgrp->bpf.progs[type];
+ for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) {
+ struct list_head *progs = &cgrp->bpf.progs[atype];
struct bpf_prog_list *pl, *pltmp;
list_for_each_entry_safe(pl, pltmp, progs, node) {
@@ -128,10 +128,10 @@ static void cgroup_bpf_release(struct work_struct *work)
if (pl->link)
bpf_cgroup_link_auto_detach(pl->link);
kfree(pl);
- static_branch_dec(&cgroup_bpf_enabled_key[type]);
+ static_branch_dec(&cgroup_bpf_enabled_key[atype]);
}
old_array = rcu_dereference_protected(
- cgrp->bpf.effective[type],
+ cgrp->bpf.effective[atype],
lockdep_is_held(&cgroup_mutex));
bpf_prog_array_free(old_array);
}
@@ -196,7 +196,7 @@ static u32 prog_list_length(struct list_head *head)
* if parent has overridable or multi-prog, allow attaching
*/
static bool hierarchy_allows_attach(struct cgroup *cgrp,
- enum bpf_attach_type type)
+ enum cgroup_bpf_attach_type atype)
{
struct cgroup *p;
@@ -204,12 +204,12 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
if (!p)
return true;
do {
- u32 flags = p->bpf.flags[type];
+ u32 flags = p->bpf.flags[atype];
u32 cnt;
if (flags & BPF_F_ALLOW_MULTI)
return true;
- cnt = prog_list_length(&p->bpf.progs[type]);
+ cnt = prog_list_length(&p->bpf.progs[atype]);
WARN_ON_ONCE(cnt > 1);
if (cnt == 1)
return !!(flags & BPF_F_ALLOW_OVERRIDE);
@@ -225,7 +225,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
* to programs in this cgroup
*/
static int compute_effective_progs(struct cgroup *cgrp,
- enum bpf_attach_type type,
+ enum cgroup_bpf_attach_type atype,
struct bpf_prog_array **array)
{
struct bpf_prog_array_item *item;
@@ -236,8 +236,8 @@ static int compute_effective_progs(struct cgroup *cgrp,
/* count number of effective programs by walking parents */
do {
- if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
- cnt += prog_list_length(&p->bpf.progs[type]);
+ if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
+ cnt += prog_list_length(&p->bpf.progs[atype]);
p = cgroup_parent(p);
} while (p);
@@ -249,10 +249,10 @@ static int compute_effective_progs(struct cgroup *cgrp,
cnt = 0;
p = cgrp;
do {
- if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+ if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
continue;
- list_for_each_entry(pl, &p->bpf.progs[type], node) {
+ list_for_each_entry(pl, &p->bpf.progs[atype], node) {
if (!prog_list_prog(pl))
continue;
@@ -269,10 +269,10 @@ static int compute_effective_progs(struct cgroup *cgrp,
}
static void activate_effective_progs(struct cgroup *cgrp,
- enum bpf_attach_type type,
+ enum cgroup_bpf_attach_type atype,
struct bpf_prog_array *old_array)
{
- old_array = rcu_replace_pointer(cgrp->bpf.effective[type], old_array,
+ old_array = rcu_replace_pointer(cgrp->bpf.effective[atype], old_array,
lockdep_is_held(&cgroup_mutex));
/* free prog array after grace period, since __cgroup_bpf_run_*()
* might be still walking the array
@@ -328,7 +328,7 @@ cleanup:
}
static int update_effective_progs(struct cgroup *cgrp,
- enum bpf_attach_type type)
+ enum cgroup_bpf_attach_type atype)
{
struct cgroup_subsys_state *css;
int err;
@@ -340,7 +340,7 @@ static int update_effective_progs(struct cgroup *cgrp,
if (percpu_ref_is_zero(&desc->bpf.refcnt))
continue;
- err = compute_effective_progs(desc, type, &desc->bpf.inactive);
+ err = compute_effective_progs(desc, atype, &desc->bpf.inactive);
if (err)
goto cleanup;
}
@@ -357,7 +357,7 @@ static int update_effective_progs(struct cgroup *cgrp,
continue;
}
- activate_effective_progs(desc, type, desc->bpf.inactive);
+ activate_effective_progs(desc, atype, desc->bpf.inactive);
desc->bpf.inactive = NULL;
}
@@ -436,11 +436,12 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
enum bpf_attach_type type, u32 flags)
{
u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
- struct list_head *progs = &cgrp->bpf.progs[type];
struct bpf_prog *old_prog = NULL;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
+ enum cgroup_bpf_attach_type atype;
struct bpf_prog_list *pl;
+ struct list_head *progs;
int err;
if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
@@ -454,10 +455,16 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
/* replace_prog implies BPF_F_REPLACE, and vice versa */
return -EINVAL;
- if (!hierarchy_allows_attach(cgrp, type))
+ atype = to_cgroup_bpf_attach_type(type);
+ if (atype < 0)
+ return -EINVAL;
+
+ progs = &cgrp->bpf.progs[atype];
+
+ if (!hierarchy_allows_attach(cgrp, atype))
return -EPERM;
- if (!list_empty(progs) && cgrp->bpf.flags[type] != saved_flags)
+ if (!list_empty(progs) && cgrp->bpf.flags[atype] != saved_flags)
/* Disallow attaching non-overridable on top
* of existing overridable in this cgroup.
* Disallow attaching multi-prog if overridable or none
@@ -490,16 +497,16 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
pl->prog = prog;
pl->link = link;
bpf_cgroup_storages_assign(pl->storage, storage);
- cgrp->bpf.flags[type] = saved_flags;
+ cgrp->bpf.flags[atype] = saved_flags;
- err = update_effective_progs(cgrp, type);
+ err = update_effective_progs(cgrp, atype);
if (err)
goto cleanup;
if (old_prog)
bpf_prog_put(old_prog);
else
- static_branch_inc(&cgroup_bpf_enabled_key[type]);
+ static_branch_inc(&cgroup_bpf_enabled_key[atype]);
bpf_cgroup_storages_link(new_storage, cgrp, type);
return 0;
@@ -520,7 +527,7 @@ cleanup:
* all descendant cgroups. This function is guaranteed to succeed.
*/
static void replace_effective_prog(struct cgroup *cgrp,
- enum bpf_attach_type type,
+ enum cgroup_bpf_attach_type atype,
struct bpf_cgroup_link *link)
{
struct bpf_prog_array_item *item;
@@ -539,10 +546,10 @@ static void replace_effective_prog(struct cgroup *cgrp,
/* find position of link in effective progs array */
for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
- if (pos && !(cg->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+ if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
continue;
- head = &cg->bpf.progs[type];
+ head = &cg->bpf.progs[atype];
list_for_each_entry(pl, head, node) {
if (!prog_list_prog(pl))
continue;
@@ -554,7 +561,7 @@ static void replace_effective_prog(struct cgroup *cgrp,
found:
BUG_ON(!cg);
progs = rcu_dereference_protected(
- desc->bpf.effective[type],
+ desc->bpf.effective[atype],
lockdep_is_held(&cgroup_mutex));
item = &progs->items[pos];
WRITE_ONCE(item->prog, link->link.prog);
@@ -574,11 +581,18 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
struct bpf_cgroup_link *link,
struct bpf_prog *new_prog)
{
- struct list_head *progs = &cgrp->bpf.progs[link->type];
+ enum cgroup_bpf_attach_type atype;
struct bpf_prog *old_prog;
struct bpf_prog_list *pl;
+ struct list_head *progs;
bool found = false;
+ atype = to_cgroup_bpf_attach_type(link->type);
+ if (atype < 0)
+ return -EINVAL;
+
+ progs = &cgrp->bpf.progs[atype];
+
if (link->link.prog->type != new_prog->type)
return -EINVAL;
@@ -592,7 +606,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
return -ENOENT;
old_prog = xchg(&link->link.prog, new_prog);
- replace_effective_prog(cgrp, link->type, link);
+ replace_effective_prog(cgrp, atype, link);
bpf_prog_put(old_prog);
return 0;
}
@@ -667,12 +681,20 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_cgroup_link *link, enum bpf_attach_type type)
{
- struct list_head *progs = &cgrp->bpf.progs[type];
- u32 flags = cgrp->bpf.flags[type];
- struct bpf_prog_list *pl;
+ enum cgroup_bpf_attach_type atype;
struct bpf_prog *old_prog;
+ struct bpf_prog_list *pl;
+ struct list_head *progs;
+ u32 flags;
int err;
+ atype = to_cgroup_bpf_attach_type(type);
+ if (atype < 0)
+ return -EINVAL;
+
+ progs = &cgrp->bpf.progs[atype];
+ flags = cgrp->bpf.flags[atype];
+
if (prog && link)
/* only one of prog or link can be specified */
return -EINVAL;
@@ -686,7 +708,7 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
pl->prog = NULL;
pl->link = NULL;
- err = update_effective_progs(cgrp, type);
+ err = update_effective_progs(cgrp, atype);
if (err)
goto cleanup;
@@ -695,10 +717,10 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
kfree(pl);
if (list_empty(progs))
/* last program was detached, reset flags to zero */
- cgrp->bpf.flags[type] = 0;
+ cgrp->bpf.flags[atype] = 0;
if (old_prog)
bpf_prog_put(old_prog);
- static_branch_dec(&cgroup_bpf_enabled_key[type]);
+ static_branch_dec(&cgroup_bpf_enabled_key[atype]);
return 0;
cleanup:
@@ -714,13 +736,21 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
{
__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
enum bpf_attach_type type = attr->query.attach_type;
- struct list_head *progs = &cgrp->bpf.progs[type];
- u32 flags = cgrp->bpf.flags[type];
+ enum cgroup_bpf_attach_type atype;
struct bpf_prog_array *effective;
+ struct list_head *progs;
struct bpf_prog *prog;
int cnt, ret = 0, i;
+ u32 flags;
- effective = rcu_dereference_protected(cgrp->bpf.effective[type],
+ atype = to_cgroup_bpf_attach_type(type);
+ if (atype < 0)
+ return -EINVAL;
+
+ progs = &cgrp->bpf.progs[atype];
+ flags = cgrp->bpf.flags[atype];
+
+ effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
lockdep_is_held(&cgroup_mutex));
if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
@@ -925,14 +955,14 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
link->cgroup = cgrp;
link->type = attr->link_create.attach_type;
- err = bpf_link_prime(&link->link, &link_primer);
+ err = bpf_link_prime(&link->link, &link_primer);
if (err) {
kfree(link);
goto out_put_cgroup;
}
- err = cgroup_bpf_attach(cgrp, NULL, NULL, link, link->type,
- BPF_F_ALLOW_MULTI);
+ err = cgroup_bpf_attach(cgrp, NULL, NULL, link,
+ link->type, BPF_F_ALLOW_MULTI);
if (err) {
bpf_link_cleanup(&link_primer);
goto out_put_cgroup;
@@ -986,7 +1016,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
*/
int __cgroup_bpf_run_filter_skb(struct sock *sk,
struct sk_buff *skb,
- enum bpf_attach_type type)
+ enum cgroup_bpf_attach_type atype)
{
unsigned int offset = skb->data - skb_network_header(skb);
struct sock *save_sk;
@@ -1008,12 +1038,12 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
/* compute pointers for the bpf prog */
bpf_compute_and_save_data_end(skb, &saved_data_end);
- if (type == BPF_CGROUP_INET_EGRESS) {
+ if (atype == CGROUP_INET_EGRESS) {
ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
- cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);
+ cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
} else {
- ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
- __bpf_prog_run_save_cb);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
+ __bpf_prog_run_save_cb);
ret = (ret == 1 ? 0 : -EPERM);
}
bpf_restore_data_end(skb, saved_data_end);
@@ -1038,12 +1068,12 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
* and if it returned != 1 during execution. In all other cases, 0 is returned.
*/
int __cgroup_bpf_run_filter_sk(struct sock *sk,
- enum bpf_attach_type type)
+ enum cgroup_bpf_attach_type atype)
{
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
int ret;
- ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, bpf_prog_run);
return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
@@ -1065,7 +1095,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
*/
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
- enum bpf_attach_type type,
+ enum cgroup_bpf_attach_type atype,
void *t_ctx,
u32 *flags)
{
@@ -1090,8 +1120,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
}
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- ret = BPF_PROG_RUN_ARRAY_FLAGS(cgrp->bpf.effective[type], &ctx,
- BPF_PROG_RUN, flags);
+ ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
+ bpf_prog_run, flags);
return ret == 1 ? 0 : -EPERM;
}
@@ -1115,19 +1145,19 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
*/
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct bpf_sock_ops_kern *sock_ops,
- enum bpf_attach_type type)
+ enum cgroup_bpf_attach_type atype)
{
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
int ret;
- ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops,
- BPF_PROG_RUN);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
+ bpf_prog_run);
return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
- short access, enum bpf_attach_type type)
+ short access, enum cgroup_bpf_attach_type atype)
{
struct cgroup *cgrp;
struct bpf_cgroup_dev_ctx ctx = {
@@ -1135,12 +1165,12 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
.major = major,
.minor = minor,
};
- int allow = 1;
+ int allow;
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
- allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
- BPF_PROG_RUN);
+ allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+ bpf_prog_run);
rcu_read_unlock();
return !allow;
@@ -1231,7 +1261,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
struct ctl_table *table, int write,
char **buf, size_t *pcount, loff_t *ppos,
- enum bpf_attach_type type)
+ enum cgroup_bpf_attach_type atype)
{
struct bpf_sysctl_kern ctx = {
.head = head,
@@ -1271,7 +1301,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
- ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run);
rcu_read_unlock();
kfree(ctx.cur_val);
@@ -1289,7 +1319,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
#ifdef CONFIG_NET
static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
- enum bpf_attach_type attach_type)
+ enum cgroup_bpf_attach_type attach_type)
{
struct bpf_prog_array *prog_array;
bool empty;
@@ -1364,7 +1394,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
* attached to the hook so we don't waste time allocating
* memory and locking the socket.
*/
- if (__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT))
+ if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_SETSOCKOPT))
return 0;
/* Allocate a bit more than the initial user buffer for
@@ -1385,8 +1415,8 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
}
lock_sock(sk);
- ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT],
- &ctx, BPF_PROG_RUN);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
+ &ctx, bpf_prog_run);
release_sock(sk);
if (!ret) {
@@ -1460,7 +1490,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
* attached to the hook so we don't waste time allocating
* memory and locking the socket.
*/
- if (__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
+ if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_GETSOCKOPT))
return retval;
ctx.optlen = max_optlen;
@@ -1495,8 +1525,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
}
lock_sock(sk);
- ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
- &ctx, BPF_PROG_RUN);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
+ &ctx, bpf_prog_run);
release_sock(sk);
if (!ret) {
@@ -1556,8 +1586,8 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
* be called if that data shouldn't be "exported".
*/
- ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
- &ctx, BPF_PROG_RUN);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
+ &ctx, bpf_prog_run);
if (!ret)
return -EPERM;
@@ -1846,15 +1876,41 @@ const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
const struct bpf_prog_ops cg_sysctl_prog_ops = {
};
+#ifdef CONFIG_NET
+BPF_CALL_1(bpf_get_netns_cookie_sockopt, struct bpf_sockopt_kern *, ctx)
+{
+ const struct net *net = ctx ? sock_net(ctx->sk) : &init_net;
+
+ return net->net_cookie;
+}
+
+static const struct bpf_func_proto bpf_get_netns_cookie_sockopt_proto = {
+ .func = bpf_get_netns_cookie_sockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
+};
+#endif
+
static const struct bpf_func_proto *
cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
#ifdef CONFIG_NET
+ case BPF_FUNC_get_netns_cookie:
+ return &bpf_get_netns_cookie_sockopt_proto;
case BPF_FUNC_sk_storage_get:
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
+ case BPF_FUNC_setsockopt:
+ if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT)
+ return &bpf_sk_setsockopt_proto;
+ return NULL;
+ case BPF_FUNC_getsockopt:
+ if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT)
+ return &bpf_sk_getsockopt_proto;
+ return NULL;
#endif
#ifdef CONFIG_INET
case BPF_FUNC_tcp_sock:
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b1a5fc04492b..9f4636d021b1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1362,11 +1362,13 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
}
/**
- * __bpf_prog_run - run eBPF program on a given context
+ * ___bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
* @insn: is the array of eBPF instructions
*
* Decode and execute eBPF instructions.
+ *
+ * Return: whatever value is in %BPF_R0 at program exit
*/
static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
{
@@ -1877,7 +1879,10 @@ static void bpf_prog_select_func(struct bpf_prog *fp)
* @err: pointer to error variable
*
* Try to JIT eBPF program, if JIT is not available, use interpreter.
- * The BPF program will be executed via BPF_PROG_RUN() macro.
+ * The BPF program will be executed via bpf_prog_run() function.
+ *
+ * Return: the &fp argument along with &err set to 0 for success or
+ * a negative errno code on failure
*/
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
{
@@ -2114,13 +2119,13 @@ int bpf_prog_array_update_at(struct bpf_prog_array *array, int index,
int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog,
+ u64 bpf_cookie,
struct bpf_prog_array **new_array)
{
int new_prog_cnt, carry_prog_cnt = 0;
- struct bpf_prog_array_item *existing;
+ struct bpf_prog_array_item *existing, *new;
struct bpf_prog_array *array;
bool found_exclude = false;
- int new_prog_idx = 0;
/* Figure out how many existing progs we need to carry over to
* the new array.
@@ -2157,20 +2162,27 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
if (!array)
return -ENOMEM;
+ new = array->items;
/* Fill in the new prog array */
if (carry_prog_cnt) {
existing = old_array->items;
- for (; existing->prog; existing++)
- if (existing->prog != exclude_prog &&
- existing->prog != &dummy_bpf_prog.prog) {
- array->items[new_prog_idx++].prog =
- existing->prog;
- }
+ for (; existing->prog; existing++) {
+ if (existing->prog == exclude_prog ||
+ existing->prog == &dummy_bpf_prog.prog)
+ continue;
+
+ new->prog = existing->prog;
+ new->bpf_cookie = existing->bpf_cookie;
+ new++;
+ }
}
- if (include_prog)
- array->items[new_prog_idx++].prog = include_prog;
- array->items[new_prog_idx].prog = NULL;
+ if (include_prog) {
+ new->prog = include_prog;
+ new->bpf_cookie = bpf_cookie;
+ new++;
+ }
+ new->prog = NULL;
*new_array = array;
return 0;
}
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 480e936c54d0..585b2b77ccc4 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -16,6 +16,7 @@
* netstack, and assigning dedicated CPUs for this stage. This
* basically allows for 10G wirespeed pre-filtering via bpf.
*/
+#include <linux/bitops.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/ptr_ring.h>
@@ -168,6 +169,46 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
}
}
+static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
+ struct list_head *listp,
+ struct xdp_cpumap_stats *stats)
+{
+ struct sk_buff *skb, *tmp;
+ struct xdp_buff xdp;
+ u32 act;
+ int err;
+
+ list_for_each_entry_safe(skb, tmp, listp, list) {
+ act = bpf_prog_run_generic_xdp(skb, &xdp, rcpu->prog);
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_REDIRECT:
+ skb_list_del_init(skb);
+ err = xdp_do_generic_redirect(skb->dev, skb, &xdp,
+ rcpu->prog);
+ if (unlikely(err)) {
+ kfree_skb(skb);
+ stats->drop++;
+ } else {
+ stats->redirect++;
+ }
+ return;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ fallthrough;
+ case XDP_ABORTED:
+ trace_xdp_exception(skb->dev, rcpu->prog, act);
+ fallthrough;
+ case XDP_DROP:
+ skb_list_del_init(skb);
+ kfree_skb(skb);
+ stats->drop++;
+ return;
+ }
+ }
+}
+
static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
void **frames, int n,
struct xdp_cpumap_stats *stats)
@@ -176,11 +217,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
struct xdp_buff xdp;
int i, nframes = 0;
- if (!rcpu->prog)
- return n;
-
- rcu_read_lock_bh();
-
xdp_set_return_frame_no_direct();
xdp.rxq = &rxq;
@@ -227,17 +263,37 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
}
}
+ xdp_clear_return_frame_no_direct();
+
+ return nframes;
+}
+
+#define CPUMAP_BATCH 8
+
+static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
+ int xdp_n, struct xdp_cpumap_stats *stats,
+ struct list_head *list)
+{
+ int nframes;
+
+ if (!rcpu->prog)
+ return xdp_n;
+
+ rcu_read_lock_bh();
+
+ nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, xdp_n, stats);
+
if (stats->redirect)
- xdp_do_flush_map();
+ xdp_do_flush();
- xdp_clear_return_frame_no_direct();
+ if (unlikely(!list_empty(list)))
+ cpu_map_bpf_prog_run_skb(rcpu, list, stats);
rcu_read_unlock_bh(); /* resched point, may call do_softirq() */
return nframes;
}
-#define CPUMAP_BATCH 8
static int cpu_map_kthread_run(void *data)
{
@@ -254,9 +310,9 @@ static int cpu_map_kthread_run(void *data)
struct xdp_cpumap_stats stats = {}; /* zero stats */
unsigned int kmem_alloc_drops = 0, sched = 0;
gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
+ int i, n, m, nframes, xdp_n;
void *frames[CPUMAP_BATCH];
void *skbs[CPUMAP_BATCH];
- int i, n, m, nframes;
LIST_HEAD(list);
/* Release CPU reschedule checks */
@@ -280,9 +336,20 @@ static int cpu_map_kthread_run(void *data)
*/
n = __ptr_ring_consume_batched(rcpu->queue, frames,
CPUMAP_BATCH);
- for (i = 0; i < n; i++) {
+ for (i = 0, xdp_n = 0; i < n; i++) {
void *f = frames[i];
- struct page *page = virt_to_page(f);
+ struct page *page;
+
+ if (unlikely(__ptr_test_bit(0, &f))) {
+ struct sk_buff *skb = f;
+
+ __ptr_clear_bit(0, &skb);
+ list_add_tail(&skb->list, &list);
+ continue;
+ }
+
+ frames[xdp_n++] = f;
+ page = virt_to_page(f);
/* Bring struct page memory area to curr CPU. Read by
* build_skb_around via page_is_pfmemalloc(), and when
@@ -292,7 +359,7 @@ static int cpu_map_kthread_run(void *data)
}
/* Support running another XDP prog on this CPU */
- nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats);
+ nframes = cpu_map_bpf_prog_run(rcpu, frames, xdp_n, &stats, &list);
if (nframes) {
m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs);
if (unlikely(m == 0)) {
@@ -330,12 +397,6 @@ static int cpu_map_kthread_run(void *data)
return 0;
}
-bool cpu_map_prog_allowed(struct bpf_map *map)
-{
- return map->map_type == BPF_MAP_TYPE_CPUMAP &&
- map->value_size != offsetofend(struct bpf_cpumap_val, qsize);
-}
-
static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
{
struct bpf_prog *prog;
@@ -701,6 +762,25 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
return 0;
}
+int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
+ struct sk_buff *skb)
+{
+ int ret;
+
+ __skb_pull(skb, skb->mac_len);
+ skb_set_redirected(skb, false);
+ __ptr_set_bit(0, &skb);
+
+ ret = ptr_ring_produce(rcpu->queue, skb);
+ if (ret < 0)
+ goto trace;
+
+ wake_up_process(rcpu->kthread);
+trace:
+ trace_xdp_cpumap_enqueue(rcpu->map_id, !ret, !!ret, rcpu->cpu);
+ return ret;
+}
+
void __cpu_map_flush(void)
{
struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index fdc20892837c..f02d04540c0c 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -322,16 +322,6 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
return -ENOENT;
}
-bool dev_map_can_have_prog(struct bpf_map *map)
-{
- if ((map->map_type == BPF_MAP_TYPE_DEVMAP ||
- map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) &&
- map->value_size != offsetofend(struct bpf_devmap_val, ifindex))
- return true;
-
- return false;
-}
-
static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
struct xdp_frame **frames, int n,
struct net_device *dev)
@@ -499,6 +489,37 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
return 0;
}
+static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev *dst)
+{
+ struct xdp_txq_info txq = { .dev = dst->dev };
+ struct xdp_buff xdp;
+ u32 act;
+
+ if (!dst->xdp_prog)
+ return XDP_PASS;
+
+ __skb_pull(skb, skb->mac_len);
+ xdp.txq = &txq;
+
+ act = bpf_prog_run_generic_xdp(skb, &xdp, dst->xdp_prog);
+ switch (act) {
+ case XDP_PASS:
+ __skb_push(skb, skb->mac_len);
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ fallthrough;
+ case XDP_ABORTED:
+ trace_xdp_exception(dst->dev, dst->xdp_prog, act);
+ fallthrough;
+ case XDP_DROP:
+ kfree_skb(skb);
+ break;
+ }
+
+ return act;
+}
+
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
@@ -513,10 +534,9 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
}
-static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp,
- int exclude_ifindex)
+static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp)
{
- if (!obj || obj->dev->ifindex == exclude_ifindex ||
+ if (!obj ||
!obj->dev->netdev_ops->ndo_xdp_xmit)
return false;
@@ -541,17 +561,48 @@ static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj,
return 0;
}
+static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifindex)
+{
+ while (num_excluded--) {
+ if (ifindex == excluded[num_excluded])
+ return true;
+ }
+ return false;
+}
+
+/* Get ifindex of each upper device. 'indexes' must be able to hold at
+ * least MAX_NEST_DEV elements.
+ * Returns the number of ifindexes added.
+ */
+static int get_upper_ifindexes(struct net_device *dev, int *indexes)
+{
+ struct net_device *upper;
+ struct list_head *iter;
+ int n = 0;
+
+ netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+ indexes[n++] = upper->ifindex;
+ }
+ return n;
+}
+
int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
struct bpf_map *map, bool exclude_ingress)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
- int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
struct bpf_dtab_netdev *dst, *last_dst = NULL;
+ int excluded_devices[1+MAX_NEST_DEV];
struct hlist_head *head;
struct xdp_frame *xdpf;
+ int num_excluded = 0;
unsigned int i;
int err;
+ if (exclude_ingress) {
+ num_excluded = get_upper_ifindexes(dev_rx, excluded_devices);
+ excluded_devices[num_excluded++] = dev_rx->ifindex;
+ }
+
xdpf = xdp_convert_buff_to_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
@@ -560,7 +611,10 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
for (i = 0; i < map->max_entries; i++) {
dst = rcu_dereference_check(dtab->netdev_map[i],
rcu_read_lock_bh_held());
- if (!is_valid_dst(dst, xdp, exclude_ifindex))
+ if (!is_valid_dst(dst, xdp))
+ continue;
+
+ if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
@@ -580,7 +634,11 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
head = dev_map_index_hash(dtab, i);
hlist_for_each_entry_rcu(dst, head, index_hlist,
lockdep_is_held(&dtab->index_lock)) {
- if (!is_valid_dst(dst, xdp, exclude_ifindex))
+ if (!is_valid_dst(dst, xdp))
+ continue;
+
+ if (is_ifindex_excluded(excluded_devices, num_excluded,
+ dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
@@ -615,6 +673,14 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
err = xdp_ok_fwd_dev(dst->dev, skb->len);
if (unlikely(err))
return err;
+
+ /* Redirect has already succeeded semantically at this point, so we just
+ * return 0 even if packet is dropped. Helper below takes care of
+ * freeing skb.
+ */
+ if (dev_map_bpf_prog_run_skb(skb, dst) != XDP_PASS)
+ return 0;
+
skb->dev = dst->dev;
generic_xdp_tx(skb, xdp_prog);
@@ -646,18 +712,27 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
bool exclude_ingress)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
- int exclude_ifindex = exclude_ingress ? dev->ifindex : 0;
struct bpf_dtab_netdev *dst, *last_dst = NULL;
+ int excluded_devices[1+MAX_NEST_DEV];
struct hlist_head *head;
struct hlist_node *next;
+ int num_excluded = 0;
unsigned int i;
int err;
+ if (exclude_ingress) {
+ num_excluded = get_upper_ifindexes(dev, excluded_devices);
+ excluded_devices[num_excluded++] = dev->ifindex;
+ }
+
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
for (i = 0; i < map->max_entries; i++) {
dst = rcu_dereference_check(dtab->netdev_map[i],
rcu_read_lock_bh_held());
- if (!dst || dst->dev->ifindex == exclude_ifindex)
+ if (!dst)
+ continue;
+
+ if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
@@ -671,12 +746,17 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
return err;
last_dst = dst;
+
}
} else { /* BPF_MAP_TYPE_DEVMAP_HASH */
for (i = 0; i < dtab->n_buckets; i++) {
head = dev_map_index_hash(dtab, i);
hlist_for_each_entry_safe(dst, next, head, index_hlist) {
- if (!dst || dst->dev->ifindex == exclude_ifindex)
+ if (!dst)
+ continue;
+
+ if (is_ifindex_excluded(excluded_devices, num_excluded,
+ dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 72c58cc516a3..32471ba02708 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -228,6 +228,32 @@ static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size);
}
+static bool htab_has_extra_elems(struct bpf_htab *htab)
+{
+ return !htab_is_percpu(htab) && !htab_is_lru(htab);
+}
+
+static void htab_free_prealloced_timers(struct bpf_htab *htab)
+{
+ u32 num_entries = htab->map.max_entries;
+ int i;
+
+ if (likely(!map_value_has_timer(&htab->map)))
+ return;
+ if (htab_has_extra_elems(htab))
+ num_entries += num_possible_cpus();
+
+ for (i = 0; i < num_entries; i++) {
+ struct htab_elem *elem;
+
+ elem = get_htab_elem(htab, i);
+ bpf_timer_cancel_and_free(elem->key +
+ round_up(htab->map.key_size, 8) +
+ htab->map.timer_off);
+ cond_resched();
+ }
+}
+
static void htab_free_elems(struct bpf_htab *htab)
{
int i;
@@ -265,8 +291,12 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
struct htab_elem *l;
if (node) {
+ u32 key_size = htab->map.key_size;
+
l = container_of(node, struct htab_elem, lru_node);
- memcpy(l->key, key, htab->map.key_size);
+ memcpy(l->key, key, key_size);
+ check_and_init_map_value(&htab->map,
+ l->key + round_up(key_size, 8));
return l;
}
@@ -278,7 +308,7 @@ static int prealloc_init(struct bpf_htab *htab)
u32 num_entries = htab->map.max_entries;
int err = -ENOMEM, i;
- if (!htab_is_percpu(htab) && !htab_is_lru(htab))
+ if (htab_has_extra_elems(htab))
num_entries += num_possible_cpus();
htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries,
@@ -695,6 +725,14 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map,
return insn - insn_buf;
}
+static void check_and_free_timer(struct bpf_htab *htab, struct htab_elem *elem)
+{
+ if (unlikely(map_value_has_timer(&htab->map)))
+ bpf_timer_cancel_and_free(elem->key +
+ round_up(htab->map.key_size, 8) +
+ htab->map.timer_off);
+}
+
/* It is called from the bpf_lru_list when the LRU needs to delete
* older elements from the htab.
*/
@@ -719,6 +757,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
if (l == tgt_l) {
hlist_nulls_del_rcu(&l->hash_node);
+ check_and_free_timer(htab, l);
break;
}
@@ -790,6 +829,7 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
{
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
+ check_and_free_timer(htab, l);
kfree(l);
}
@@ -817,6 +857,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
htab_put_fd_value(htab, l);
if (htab_is_prealloc(htab)) {
+ check_and_free_timer(htab, l);
__pcpu_freelist_push(&htab->freelist, &l->fnode);
} else {
atomic_dec(&htab->count);
@@ -920,8 +961,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
l_new = ERR_PTR(-ENOMEM);
goto dec_count;
}
- check_and_init_map_lock(&htab->map,
- l_new->key + round_up(key_size, 8));
+ check_and_init_map_value(&htab->map,
+ l_new->key + round_up(key_size, 8));
}
memcpy(l_new->key, key, key_size);
@@ -1062,6 +1103,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
hlist_nulls_del_rcu(&l_old->hash_node);
if (!htab_is_prealloc(htab))
free_htab_elem(htab, l_old);
+ else
+ check_and_free_timer(htab, l_old);
}
ret = 0;
err:
@@ -1069,6 +1112,12 @@ err:
return ret;
}
+static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem)
+{
+ check_and_free_timer(htab, elem);
+ bpf_lru_push_free(&htab->lru, &elem->lru_node);
+}
+
static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
@@ -1102,7 +1151,8 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
l_new = prealloc_lru_pop(htab, key, hash);
if (!l_new)
return -ENOMEM;
- memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size);
+ copy_map_value(&htab->map,
+ l_new->key + round_up(map->key_size, 8), value);
ret = htab_lock_bucket(htab, b, hash, &flags);
if (ret)
@@ -1128,9 +1178,9 @@ err:
htab_unlock_bucket(htab, b, hash, flags);
if (ret)
- bpf_lru_push_free(&htab->lru, &l_new->lru_node);
+ htab_lru_push_free(htab, l_new);
else if (l_old)
- bpf_lru_push_free(&htab->lru, &l_old->lru_node);
+ htab_lru_push_free(htab, l_old);
return ret;
}
@@ -1339,7 +1389,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
htab_unlock_bucket(htab, b, hash, flags);
if (l)
- bpf_lru_push_free(&htab->lru, &l->lru_node);
+ htab_lru_push_free(htab, l);
return ret;
}
@@ -1359,6 +1409,35 @@ static void delete_all_elements(struct bpf_htab *htab)
}
}
+static void htab_free_malloced_timers(struct bpf_htab *htab)
+{
+ int i;
+
+ rcu_read_lock();
+ for (i = 0; i < htab->n_buckets; i++) {
+ struct hlist_nulls_head *head = select_bucket(htab, i);
+ struct hlist_nulls_node *n;
+ struct htab_elem *l;
+
+ hlist_nulls_for_each_entry(l, n, head, hash_node)
+ check_and_free_timer(htab, l);
+ cond_resched_rcu();
+ }
+ rcu_read_unlock();
+}
+
+static void htab_map_free_timers(struct bpf_map *map)
+{
+ struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+
+ if (likely(!map_value_has_timer(&htab->map)))
+ return;
+ if (!htab_is_prealloc(htab))
+ htab_free_malloced_timers(htab);
+ else
+ htab_free_prealloced_timers(htab);
+}
+
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
static void htab_map_free(struct bpf_map *map)
{
@@ -1456,7 +1535,7 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
else
copy_map_value(map, value, l->key +
roundup_key_size);
- check_and_init_map_lock(map, value);
+ check_and_init_map_value(map, value);
}
hlist_nulls_del_rcu(&l->hash_node);
@@ -1467,7 +1546,7 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
htab_unlock_bucket(htab, b, hash, bflags);
if (is_lru_map && l)
- bpf_lru_push_free(&htab->lru, &l->lru_node);
+ htab_lru_push_free(htab, l);
return ret;
}
@@ -1565,8 +1644,8 @@ alloc:
/* We cannot do copy_from_user or copy_to_user inside
* the rcu_read_lock. Allocate enough space here.
*/
- keys = kvmalloc(key_size * bucket_size, GFP_USER | __GFP_NOWARN);
- values = kvmalloc(value_size * bucket_size, GFP_USER | __GFP_NOWARN);
+ keys = kvmalloc_array(key_size, bucket_size, GFP_USER | __GFP_NOWARN);
+ values = kvmalloc_array(value_size, bucket_size, GFP_USER | __GFP_NOWARN);
if (!keys || !values) {
ret = -ENOMEM;
goto after_loop;
@@ -1645,7 +1724,7 @@ again_nocopy:
true);
else
copy_map_value(map, dst_val, value);
- check_and_init_map_lock(map, dst_val);
+ check_and_init_map_value(map, dst_val);
}
if (do_delete) {
hlist_nulls_del_rcu(&l->hash_node);
@@ -1672,7 +1751,7 @@ again_nocopy:
while (node_to_free) {
l = node_to_free;
node_to_free = node_to_free->batch_flink;
- bpf_lru_push_free(&htab->lru, &l->lru_node);
+ htab_lru_push_free(htab, l);
}
next_batch:
@@ -2034,6 +2113,7 @@ const struct bpf_map_ops htab_map_ops = {
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
+ .map_release_uref = htab_map_free_timers,
.map_lookup_elem = htab_map_lookup_elem,
.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
.map_update_elem = htab_map_update_elem,
@@ -2055,6 +2135,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
+ .map_release_uref = htab_map_free_timers,
.map_lookup_elem = htab_lru_map_lookup_elem,
.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 62cf00383910..9aabf84afd4b 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -289,13 +289,18 @@ static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
static DEFINE_PER_CPU(unsigned long, irqsave_flags);
-notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
+static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
{
unsigned long flags;
local_irq_save(flags);
__bpf_spin_lock(lock);
__this_cpu_write(irqsave_flags, flags);
+}
+
+notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
+{
+ __bpf_spin_lock_irqsave(lock);
return 0;
}
@@ -306,13 +311,18 @@ const struct bpf_func_proto bpf_spin_lock_proto = {
.arg1_type = ARG_PTR_TO_SPIN_LOCK,
};
-notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
+static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
{
unsigned long flags;
flags = __this_cpu_read(irqsave_flags);
__bpf_spin_unlock(lock);
local_irq_restore(flags);
+}
+
+notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
+{
+ __bpf_spin_unlock_irqrestore(lock);
return 0;
}
@@ -333,9 +343,9 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
else
lock = dst + map->spin_lock_off;
preempt_disable();
- ____bpf_spin_lock(lock);
+ __bpf_spin_lock_irqsave(lock);
copy_map_value(map, dst, src);
- ____bpf_spin_unlock(lock);
+ __bpf_spin_unlock_irqrestore(lock);
preempt_enable();
}
@@ -353,9 +363,15 @@ const struct bpf_func_proto bpf_jiffies64_proto = {
#ifdef CONFIG_CGROUPS
BPF_CALL_0(bpf_get_current_cgroup_id)
{
- struct cgroup *cgrp = task_dfl_cgroup(current);
+ struct cgroup *cgrp;
+ u64 cgrp_id;
+
+ rcu_read_lock();
+ cgrp = task_dfl_cgroup(current);
+ cgrp_id = cgroup_id(cgrp);
+ rcu_read_unlock();
- return cgroup_id(cgrp);
+ return cgrp_id;
}
const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
@@ -366,13 +382,17 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level)
{
- struct cgroup *cgrp = task_dfl_cgroup(current);
+ struct cgroup *cgrp;
struct cgroup *ancestor;
+ u64 cgrp_id;
+ rcu_read_lock();
+ cgrp = task_dfl_cgroup(current);
ancestor = cgroup_ancestor(cgrp, ancestor_level);
- if (!ancestor)
- return 0;
- return cgroup_id(ancestor);
+ cgrp_id = ancestor ? cgroup_id(ancestor) : 0;
+ rcu_read_unlock();
+
+ return cgrp_id;
}
const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
@@ -383,8 +403,6 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
};
#ifdef CONFIG_CGROUP_BPF
-DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
- bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
{
@@ -393,17 +411,13 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
* verifier checks that its value is correct.
*/
enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
- struct bpf_cgroup_storage *storage = NULL;
+ struct bpf_cgroup_storage *storage;
+ struct bpf_cg_run_ctx *ctx;
void *ptr;
- int i;
-
- for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
- if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
- continue;
- storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]);
- break;
- }
+ /* get current cgroup storage from BPF run context */
+ ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+ storage = ctx->prog_item->cgroup_storage[stype];
if (stype == BPF_CGROUP_STORAGE_SHARED)
ptr = &READ_ONCE(storage->buf)->data[0];
@@ -904,6 +918,20 @@ fmt_str:
num_spec++;
continue;
+ } else if (fmt[i] == 'c') {
+ if (!tmp_buf)
+ goto nocopy_fmt;
+
+ if (tmp_buf_end == tmp_buf) {
+ err = -ENOSPC;
+ goto out;
+ }
+
+ *tmp_buf = raw_args[num_spec];
+ tmp_buf++;
+ num_spec++;
+
+ continue;
}
sizeof_cur_arg = sizeof(int);
@@ -989,11 +1017,327 @@ const struct bpf_func_proto bpf_snprintf_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
+/* BPF map elements can contain 'struct bpf_timer'.
+ * Such map owns all of its BPF timers.
+ * 'struct bpf_timer' is allocated as part of map element allocation
+ * and it's zero initialized.
+ * That space is used to keep 'struct bpf_timer_kern'.
+ * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and
+ * remembers 'struct bpf_map *' pointer it's part of.
+ * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn.
+ * bpf_timer_start() arms the timer.
+ * If user space reference to a map goes to zero at this point
+ * ops->map_release_uref callback is responsible for cancelling the timers,
+ * freeing their memory, and decrementing prog's refcnts.
+ * bpf_timer_cancel() cancels the timer and decrements prog's refcnt.
+ * Inner maps can contain bpf timers as well. ops->map_release_uref is
+ * freeing the timers when inner map is replaced or deleted by user space.
+ */
+struct bpf_hrtimer {
+ struct hrtimer timer;
+ struct bpf_map *map;
+ struct bpf_prog *prog;
+ void __rcu *callback_fn;
+ void *value;
+};
+
+/* the actual struct hidden inside uapi struct bpf_timer */
+struct bpf_timer_kern {
+ struct bpf_hrtimer *timer;
+ /* bpf_spin_lock is used here instead of spinlock_t to make
+ * sure that it always fits into space resereved by struct bpf_timer
+ * regardless of LOCKDEP and spinlock debug flags.
+ */
+ struct bpf_spin_lock lock;
+} __attribute__((aligned(8)));
+
+static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
+
+static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
+{
+ struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
+ struct bpf_map *map = t->map;
+ void *value = t->value;
+ void *callback_fn;
+ void *key;
+ u32 idx;
+
+ callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held());
+ if (!callback_fn)
+ goto out;
+
+ /* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and
+ * cannot be preempted by another bpf_timer_cb() on the same cpu.
+ * Remember the timer this callback is servicing to prevent
+ * deadlock if callback_fn() calls bpf_timer_cancel() or
+ * bpf_map_delete_elem() on the same timer.
+ */
+ this_cpu_write(hrtimer_running, t);
+ if (map->map_type == BPF_MAP_TYPE_ARRAY) {
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+ /* compute the key */
+ idx = ((char *)value - array->value) / array->elem_size;
+ key = &idx;
+ } else { /* hash or lru */
+ key = value - round_up(map->key_size, 8);
+ }
+
+ BPF_CAST_CALL(callback_fn)((u64)(long)map, (u64)(long)key,
+ (u64)(long)value, 0, 0);
+ /* The verifier checked that return value is zero. */
+
+ this_cpu_write(hrtimer_running, NULL);
+out:
+ return HRTIMER_NORESTART;
+}
+
+BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map,
+ u64, flags)
+{
+ clockid_t clockid = flags & (MAX_CLOCKS - 1);
+ struct bpf_hrtimer *t;
+ int ret = 0;
+
+ BUILD_BUG_ON(MAX_CLOCKS != 16);
+ BUILD_BUG_ON(sizeof(struct bpf_timer_kern) > sizeof(struct bpf_timer));
+ BUILD_BUG_ON(__alignof__(struct bpf_timer_kern) != __alignof__(struct bpf_timer));
+
+ if (in_nmi())
+ return -EOPNOTSUPP;
+
+ if (flags >= MAX_CLOCKS ||
+ /* similar to timerfd except _ALARM variants are not supported */
+ (clockid != CLOCK_MONOTONIC &&
+ clockid != CLOCK_REALTIME &&
+ clockid != CLOCK_BOOTTIME))
+ return -EINVAL;
+ __bpf_spin_lock_irqsave(&timer->lock);
+ t = timer->timer;
+ if (t) {
+ ret = -EBUSY;
+ goto out;
+ }
+ if (!atomic64_read(&map->usercnt)) {
+ /* maps with timers must be either held by user space
+ * or pinned in bpffs.
+ */
+ ret = -EPERM;
+ goto out;
+ }
+ /* allocate hrtimer via map_kmalloc to use memcg accounting */
+ t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node);
+ if (!t) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ t->value = (void *)timer - map->timer_off;
+ t->map = map;
+ t->prog = NULL;
+ rcu_assign_pointer(t->callback_fn, NULL);
+ hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
+ t->timer.function = bpf_timer_cb;
+ timer->timer = t;
+out:
+ __bpf_spin_unlock_irqrestore(&timer->lock);
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_timer_init_proto = {
+ .func = bpf_timer_init,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_TIMER,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn,
+ struct bpf_prog_aux *, aux)
+{
+ struct bpf_prog *prev, *prog = aux->prog;
+ struct bpf_hrtimer *t;
+ int ret = 0;
+
+ if (in_nmi())
+ return -EOPNOTSUPP;
+ __bpf_spin_lock_irqsave(&timer->lock);
+ t = timer->timer;
+ if (!t) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!atomic64_read(&t->map->usercnt)) {
+ /* maps with timers must be either held by user space
+ * or pinned in bpffs. Otherwise timer might still be
+ * running even when bpf prog is detached and user space
+ * is gone, since map_release_uref won't ever be called.
+ */
+ ret = -EPERM;
+ goto out;
+ }
+ prev = t->prog;
+ if (prev != prog) {
+ /* Bump prog refcnt once. Every bpf_timer_set_callback()
+ * can pick different callback_fn-s within the same prog.
+ */
+ prog = bpf_prog_inc_not_zero(prog);
+ if (IS_ERR(prog)) {
+ ret = PTR_ERR(prog);
+ goto out;
+ }
+ if (prev)
+ /* Drop prev prog refcnt when swapping with new prog */
+ bpf_prog_put(prev);
+ t->prog = prog;
+ }
+ rcu_assign_pointer(t->callback_fn, callback_fn);
+out:
+ __bpf_spin_unlock_irqrestore(&timer->lock);
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_timer_set_callback_proto = {
+ .func = bpf_timer_set_callback,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_TIMER,
+ .arg2_type = ARG_PTR_TO_FUNC,
+};
+
+BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, flags)
+{
+ struct bpf_hrtimer *t;
+ int ret = 0;
+
+ if (in_nmi())
+ return -EOPNOTSUPP;
+ if (flags)
+ return -EINVAL;
+ __bpf_spin_lock_irqsave(&timer->lock);
+ t = timer->timer;
+ if (!t || !t->prog) {
+ ret = -EINVAL;
+ goto out;
+ }
+ hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT);
+out:
+ __bpf_spin_unlock_irqrestore(&timer->lock);
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_timer_start_proto = {
+ .func = bpf_timer_start,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_TIMER,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
+static void drop_prog_refcnt(struct bpf_hrtimer *t)
+{
+ struct bpf_prog *prog = t->prog;
+
+ if (prog) {
+ bpf_prog_put(prog);
+ t->prog = NULL;
+ rcu_assign_pointer(t->callback_fn, NULL);
+ }
+}
+
+BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer)
+{
+ struct bpf_hrtimer *t;
+ int ret = 0;
+
+ if (in_nmi())
+ return -EOPNOTSUPP;
+ __bpf_spin_lock_irqsave(&timer->lock);
+ t = timer->timer;
+ if (!t) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (this_cpu_read(hrtimer_running) == t) {
+ /* If bpf callback_fn is trying to bpf_timer_cancel()
+ * its own timer the hrtimer_cancel() will deadlock
+ * since it waits for callback_fn to finish
+ */
+ ret = -EDEADLK;
+ goto out;
+ }
+ drop_prog_refcnt(t);
+out:
+ __bpf_spin_unlock_irqrestore(&timer->lock);
+ /* Cancel the timer and wait for associated callback to finish
+ * if it was running.
+ */
+ ret = ret ?: hrtimer_cancel(&t->timer);
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_timer_cancel_proto = {
+ .func = bpf_timer_cancel,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_TIMER,
+};
+
+/* This function is called by map_delete/update_elem for individual element and
+ * by ops->map_release_uref when the user space reference to a map reaches zero.
+ */
+void bpf_timer_cancel_and_free(void *val)
+{
+ struct bpf_timer_kern *timer = val;
+ struct bpf_hrtimer *t;
+
+ /* Performance optimization: read timer->timer without lock first. */
+ if (!READ_ONCE(timer->timer))
+ return;
+
+ __bpf_spin_lock_irqsave(&timer->lock);
+ /* re-read it under lock */
+ t = timer->timer;
+ if (!t)
+ goto out;
+ drop_prog_refcnt(t);
+ /* The subsequent bpf_timer_start/cancel() helpers won't be able to use
+ * this timer, since it won't be initialized.
+ */
+ timer->timer = NULL;
+out:
+ __bpf_spin_unlock_irqrestore(&timer->lock);
+ if (!t)
+ return;
+ /* Cancel the timer and wait for callback to complete if it was running.
+ * If hrtimer_cancel() can be safely called it's safe to call kfree(t)
+ * right after for both preallocated and non-preallocated maps.
+ * The timer->timer = NULL was already done and no code path can
+ * see address 't' anymore.
+ *
+ * Check that bpf_map_delete/update_elem() wasn't called from timer
+ * callback_fn. In such case don't call hrtimer_cancel() (since it will
+ * deadlock) and don't call hrtimer_try_to_cancel() (since it will just
+ * return -1). Though callback_fn is still running on this cpu it's
+ * safe to do kfree(t) because bpf_timer_cb() read everything it needed
+ * from 't'. The bpf subprog callback_fn won't be able to access 't',
+ * since timer->timer = NULL was already done. The timer will be
+ * effectively cancelled because bpf_timer_cb() will return
+ * HRTIMER_NORESTART.
+ */
+ if (this_cpu_read(hrtimer_running) != t)
+ hrtimer_cancel(&t->timer);
+ kfree(t);
+}
+
const struct bpf_func_proto bpf_get_current_task_proto __weak;
+const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
+const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
@@ -1055,6 +1399,14 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_per_cpu_ptr_proto;
case BPF_FUNC_this_cpu_ptr:
return &bpf_this_cpu_ptr_proto;
+ case BPF_FUNC_timer_init:
+ return &bpf_timer_init_proto;
+ case BPF_FUNC_timer_set_callback:
+ return &bpf_timer_set_callback_proto;
+ case BPF_FUNC_timer_start:
+ return &bpf_timer_start_proto;
+ case BPF_FUNC_timer_cancel:
+ return &bpf_timer_cancel_proto;
default:
break;
}
@@ -1067,20 +1419,24 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return bpf_get_trace_printk_proto();
case BPF_FUNC_get_current_task:
return &bpf_get_current_task_proto;
+ case BPF_FUNC_get_current_task_btf:
+ return &bpf_get_current_task_btf_proto;
case BPF_FUNC_probe_read_user:
return &bpf_probe_read_user_proto;
case BPF_FUNC_probe_read_kernel:
- return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+ return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
NULL : &bpf_probe_read_kernel_proto;
case BPF_FUNC_probe_read_user_str:
return &bpf_probe_read_user_str_proto;
case BPF_FUNC_probe_read_kernel_str:
- return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+ return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
NULL : &bpf_probe_read_kernel_str_proto;
case BPF_FUNC_snprintf_btf:
return &bpf_snprintf_btf_proto;
case BPF_FUNC_snprintf:
return &bpf_snprintf_proto;
+ case BPF_FUNC_task_pt_regs:
+ return &bpf_task_pt_regs_proto;
default:
return NULL;
}
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index bd11db9774c3..035e9e3a7132 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -1,6 +1,7 @@
//SPDX-License-Identifier: GPL-2.0
#include <linux/bpf-cgroup.h>
#include <linux/bpf.h>
+#include <linux/bpf_local_storage.h>
#include <linux/btf.h>
#include <linux/bug.h>
#include <linux/filter.h>
@@ -11,9 +12,6 @@
#ifdef CONFIG_CGROUP_BPF
-DEFINE_PER_CPU(struct bpf_cgroup_storage_info,
- bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
-
#include "../cgroup/cgroup-internal.h"
#define LOCAL_STORAGE_CREATE_FLAG_MASK \
@@ -173,7 +171,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *key,
return -ENOMEM;
memcpy(&new->data[0], value, map->value_size);
- check_and_init_map_lock(map, new->data);
+ check_and_init_map_value(map, new->data);
new = xchg(&storage->buf, new);
kfree_rcu(new, rcu);
@@ -286,9 +284,17 @@ enoent:
static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
{
+ __u32 max_value_size = BPF_LOCAL_STORAGE_MAX_VALUE_SIZE;
int numa_node = bpf_map_attr_numa_node(attr);
struct bpf_cgroup_storage_map *map;
+ /* percpu is bound by PCPU_MIN_UNIT_SIZE, non-percu
+ * is the same as other local storages.
+ */
+ if (attr->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
+ max_value_size = min_t(__u32, max_value_size,
+ PCPU_MIN_UNIT_SIZE);
+
if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) &&
attr->key_size != sizeof(__u64))
return ERR_PTR(-EINVAL);
@@ -296,7 +302,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
if (attr->value_size == 0)
return ERR_PTR(-EINVAL);
- if (attr->value_size > PAGE_SIZE)
+ if (attr->value_size > max_value_size)
return ERR_PTR(-E2BIG);
if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK ||
@@ -409,7 +415,7 @@ static int cgroup_storage_check_btf(const struct bpf_map *map,
static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key,
struct seq_file *m)
{
- enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
+ enum bpf_cgroup_storage_type stype;
struct bpf_cgroup_storage *storage;
int cpu;
@@ -509,7 +515,7 @@ struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
map->numa_node);
if (!storage->buf)
goto enomem;
- check_and_init_map_lock(map, storage->buf->data);
+ check_and_init_map_value(map, storage->buf->data);
} else {
storage->percpu_buf = bpf_map_alloc_percpu(map, size, 8, gfp);
if (!storage->percpu_buf)
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 39ab0b68cade..5cd8f5277279 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -3,6 +3,7 @@
*/
#include <linux/slab.h>
#include <linux/bpf.h>
+#include <linux/btf.h>
#include "map_in_map.h"
@@ -50,6 +51,11 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
inner_map_meta->map_flags = inner_map->map_flags;
inner_map_meta->max_entries = inner_map->max_entries;
inner_map_meta->spin_lock_off = inner_map->spin_lock_off;
+ inner_map_meta->timer_off = inner_map->timer_off;
+ if (inner_map->btf) {
+ btf_get(inner_map->btf);
+ inner_map_meta->btf = inner_map->btf;
+ }
/* Misc members not needed in bpf_map_meta_equal() check. */
inner_map_meta->ops = inner_map->ops;
@@ -65,6 +71,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
void bpf_map_meta_free(struct bpf_map *map_meta)
{
+ btf_put(map_meta->btf);
kfree(map_meta);
}
@@ -75,6 +82,7 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
return meta0->map_type == meta1->map_type &&
meta0->key_size == meta1->key_size &&
meta0->value_size == meta1->value_size &&
+ meta0->timer_off == meta1->timer_off &&
meta0->map_flags == meta1->map_flags;
}
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 6fbc2abe9c91..e8eefdf8cf3e 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -530,14 +530,12 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
return res;
}
-BTF_ID_LIST_SINGLE(bpf_get_task_stack_btf_ids, struct, task_struct)
-
const struct bpf_func_proto bpf_get_task_stack_proto = {
.func = bpf_get_task_stack,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
- .arg1_btf_id = &bpf_get_task_stack_btf_ids[0],
+ .arg1_btf_id = &btf_task_struct_ids[0],
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e343f158e556..4e50c0bfdb7d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -260,8 +260,8 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
copy_map_value_locked(map, value, ptr, true);
else
copy_map_value(map, value, ptr);
- /* mask lock, since value wasn't zero inited */
- check_and_init_map_lock(map, value);
+ /* mask lock and timer, since value wasn't zero inited */
+ check_and_init_map_value(map, value);
}
rcu_read_unlock();
}
@@ -623,7 +623,8 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
struct bpf_map *map = filp->private_data;
int err;
- if (!map->ops->map_mmap || map_value_has_spin_lock(map))
+ if (!map->ops->map_mmap || map_value_has_spin_lock(map) ||
+ map_value_has_timer(map))
return -ENOTSUPP;
if (!(vma->vm_flags & VM_SHARED))
@@ -793,6 +794,16 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
}
}
+ map->timer_off = btf_find_timer(btf, value_type);
+ if (map_value_has_timer(map)) {
+ if (map->map_flags & BPF_F_RDONLY_PROG)
+ return -EACCES;
+ if (map->map_type != BPF_MAP_TYPE_HASH &&
+ map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+ map->map_type != BPF_MAP_TYPE_ARRAY)
+ return -EOPNOTSUPP;
+ }
+
if (map->ops->map_check_btf)
ret = map->ops->map_check_btf(map, btf, key_type, value_type);
@@ -844,6 +855,7 @@ static int map_create(union bpf_attr *attr)
mutex_init(&map->freeze_mutex);
map->spin_lock_off = -EINVAL;
+ map->timer_off = -EINVAL;
if (attr->btf_key_type_id || attr->btf_value_type_id ||
/* Even the map's value is a kernel's struct,
* the bpf_prog.o must have BTF to begin with
@@ -1001,7 +1013,7 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
static void *__bpf_copy_key(void __user *ukey, u64 key_size)
{
if (key_size)
- return memdup_user(ukey, key_size);
+ return vmemdup_user(ukey, key_size);
if (ukey)
return ERR_PTR(-EINVAL);
@@ -1012,7 +1024,7 @@ static void *__bpf_copy_key(void __user *ukey, u64 key_size)
static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size)
{
if (key_size)
- return memdup_bpfptr(ukey, key_size);
+ return kvmemdup_bpfptr(ukey, key_size);
if (!bpfptr_is_null(ukey))
return ERR_PTR(-EINVAL);
@@ -1064,7 +1076,7 @@ static int map_lookup_elem(union bpf_attr *attr)
value_size = bpf_map_value_size(map);
err = -ENOMEM;
- value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+ value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
if (!value)
goto free_key;
@@ -1079,9 +1091,9 @@ static int map_lookup_elem(union bpf_attr *attr)
err = 0;
free_value:
- kfree(value);
+ kvfree(value);
free_key:
- kfree(key);
+ kvfree(key);
err_put:
fdput(f);
return err;
@@ -1125,16 +1137,10 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
goto err_put;
}
- if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
- map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
- map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
- map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
- value_size = round_up(map->value_size, 8) * num_possible_cpus();
- else
- value_size = map->value_size;
+ value_size = bpf_map_value_size(map);
err = -ENOMEM;
- value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+ value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
if (!value)
goto free_key;
@@ -1145,9 +1151,9 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
err = bpf_map_update_value(map, f, key, value, attr->flags);
free_value:
- kfree(value);
+ kvfree(value);
free_key:
- kfree(key);
+ kvfree(key);
err_put:
fdput(f);
return err;
@@ -1199,7 +1205,7 @@ static int map_delete_elem(union bpf_attr *attr)
bpf_enable_instrumentation();
maybe_wait_bpf_programs(map);
out:
- kfree(key);
+ kvfree(key);
err_put:
fdput(f);
return err;
@@ -1241,7 +1247,7 @@ static int map_get_next_key(union bpf_attr *attr)
}
err = -ENOMEM;
- next_key = kmalloc(map->key_size, GFP_USER);
+ next_key = kvmalloc(map->key_size, GFP_USER);
if (!next_key)
goto free_key;
@@ -1264,9 +1270,9 @@ out:
err = 0;
free_next_key:
- kfree(next_key);
+ kvfree(next_key);
free_key:
- kfree(key);
+ kvfree(key);
err_put:
fdput(f);
return err;
@@ -1293,7 +1299,7 @@ int generic_map_delete_batch(struct bpf_map *map,
if (!max_count)
return 0;
- key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+ key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
if (!key)
return -ENOMEM;
@@ -1320,7 +1326,7 @@ int generic_map_delete_batch(struct bpf_map *map,
if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
err = -EFAULT;
- kfree(key);
+ kvfree(key);
return err;
}
@@ -1351,13 +1357,13 @@ int generic_map_update_batch(struct bpf_map *map,
if (!max_count)
return 0;
- key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+ key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
if (!key)
return -ENOMEM;
- value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+ value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
if (!value) {
- kfree(key);
+ kvfree(key);
return -ENOMEM;
}
@@ -1378,8 +1384,8 @@ int generic_map_update_batch(struct bpf_map *map,
if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
err = -EFAULT;
- kfree(value);
- kfree(key);
+ kvfree(value);
+ kvfree(key);
return err;
}
@@ -1413,13 +1419,13 @@ int generic_map_lookup_batch(struct bpf_map *map,
if (put_user(0, &uattr->batch.count))
return -EFAULT;
- buf_prevkey = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+ buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
if (!buf_prevkey)
return -ENOMEM;
- buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
+ buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
if (!buf) {
- kfree(buf_prevkey);
+ kvfree(buf_prevkey);
return -ENOMEM;
}
@@ -1479,8 +1485,8 @@ int generic_map_lookup_batch(struct bpf_map *map,
err = -EFAULT;
free_buf:
- kfree(buf_prevkey);
- kfree(buf);
+ kvfree(buf_prevkey);
+ kvfree(buf);
return err;
}
@@ -1535,7 +1541,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
value_size = bpf_map_value_size(map);
err = -ENOMEM;
- value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+ value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
if (!value)
goto free_key;
@@ -1567,9 +1573,9 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
err = 0;
free_value:
- kfree(value);
+ kvfree(value);
free_key:
- kfree(key);
+ kvfree(key);
err_put:
fdput(f);
return err;
@@ -1591,7 +1597,8 @@ static int map_freeze(const union bpf_attr *attr)
if (IS_ERR(map))
return PTR_ERR(map);
- if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
+ if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS ||
+ map_value_has_timer(map)) {
fdput(f);
return -ENOTSUPP;
}
@@ -1699,6 +1706,8 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog)
void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
{
+ unsigned long flags;
+
/* cBPF to eBPF migrations are currently not in the idr store.
* Offloaded programs are removed from the store when their device
* disappears - even if someone grabs an fd to them they are unusable,
@@ -1708,7 +1717,7 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
return;
if (do_idr_lock)
- spin_lock_bh(&prog_idr_lock);
+ spin_lock_irqsave(&prog_idr_lock, flags);
else
__acquire(&prog_idr_lock);
@@ -1716,7 +1725,7 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
prog->aux->id = 0;
if (do_idr_lock)
- spin_unlock_bh(&prog_idr_lock);
+ spin_unlock_irqrestore(&prog_idr_lock, flags);
else
__release(&prog_idr_lock);
}
@@ -1752,14 +1761,32 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
}
}
+static void bpf_prog_put_deferred(struct work_struct *work)
+{
+ struct bpf_prog_aux *aux;
+ struct bpf_prog *prog;
+
+ aux = container_of(work, struct bpf_prog_aux, work);
+ prog = aux->prog;
+ perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
+ bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
+ __bpf_prog_put_noref(prog, true);
+}
+
static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
{
- if (atomic64_dec_and_test(&prog->aux->refcnt)) {
- perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
- bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
+ struct bpf_prog_aux *aux = prog->aux;
+
+ if (atomic64_dec_and_test(&aux->refcnt)) {
/* bpf_prog_free_id() must be called first */
bpf_prog_free_id(prog, do_idr_lock);
- __bpf_prog_put_noref(prog, true);
+
+ if (in_irq() || irqs_disabled()) {
+ INIT_WORK(&aux->work, bpf_prog_put_deferred);
+ schedule_work(&aux->work);
+ } else {
+ bpf_prog_put_deferred(&aux->work);
+ }
}
}
@@ -2873,6 +2900,79 @@ static const struct bpf_link_ops bpf_raw_tp_link_lops = {
.fill_link_info = bpf_raw_tp_link_fill_link_info,
};
+#ifdef CONFIG_PERF_EVENTS
+struct bpf_perf_link {
+ struct bpf_link link;
+ struct file *perf_file;
+};
+
+static void bpf_perf_link_release(struct bpf_link *link)
+{
+ struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
+ struct perf_event *event = perf_link->perf_file->private_data;
+
+ perf_event_free_bpf_prog(event);
+ fput(perf_link->perf_file);
+}
+
+static void bpf_perf_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
+
+ kfree(perf_link);
+}
+
+static const struct bpf_link_ops bpf_perf_link_lops = {
+ .release = bpf_perf_link_release,
+ .dealloc = bpf_perf_link_dealloc,
+};
+
+static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ struct bpf_link_primer link_primer;
+ struct bpf_perf_link *link;
+ struct perf_event *event;
+ struct file *perf_file;
+ int err;
+
+ if (attr->link_create.flags)
+ return -EINVAL;
+
+ perf_file = perf_event_get(attr->link_create.target_fd);
+ if (IS_ERR(perf_file))
+ return PTR_ERR(perf_file);
+
+ link = kzalloc(sizeof(*link), GFP_USER);
+ if (!link) {
+ err = -ENOMEM;
+ goto out_put_file;
+ }
+ bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog);
+ link->perf_file = perf_file;
+
+ err = bpf_link_prime(&link->link, &link_primer);
+ if (err) {
+ kfree(link);
+ goto out_put_file;
+ }
+
+ event = perf_file->private_data;
+ err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie);
+ if (err) {
+ bpf_link_cleanup(&link_primer);
+ goto out_put_file;
+ }
+ /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */
+ bpf_prog_inc(prog);
+
+ return bpf_link_settle(&link_primer);
+
+out_put_file:
+ fput(perf_file);
+ return err;
+}
+#endif /* CONFIG_PERF_EVENTS */
+
#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
@@ -4114,15 +4214,26 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
if (ret)
goto out;
- if (prog->type == BPF_PROG_TYPE_EXT) {
+ switch (prog->type) {
+ case BPF_PROG_TYPE_EXT:
ret = tracing_bpf_link_attach(attr, uattr, prog);
goto out;
- }
-
- ptype = attach_type_to_prog_type(attr->link_create.attach_type);
- if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
- ret = -EINVAL;
- goto out;
+ case BPF_PROG_TYPE_PERF_EVENT:
+ case BPF_PROG_TYPE_KPROBE:
+ case BPF_PROG_TYPE_TRACEPOINT:
+ if (attr->link_create.attach_type != BPF_PERF_EVENT) {
+ ret = -EINVAL;
+ goto out;
+ }
+ ptype = prog->type;
+ break;
+ default:
+ ptype = attach_type_to_prog_type(attr->link_create.attach_type);
+ if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
}
switch (ptype) {
@@ -4147,6 +4258,13 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
ret = bpf_xdp_link_attach(attr, prog);
break;
#endif
+#ifdef CONFIG_PERF_EVENTS
+ case BPF_PROG_TYPE_PERF_EVENT:
+ case BPF_PROG_TYPE_TRACEPOINT:
+ case BPF_PROG_TYPE_KPROBE:
+ ret = bpf_perf_link_attach(attr, prog);
+ break;
+#endif
default:
ret = -EINVAL;
}
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index b68cb5d6d6eb..b48750bfba5a 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -525,7 +525,6 @@ static const struct seq_operations task_vma_seq_ops = {
};
BTF_ID_LIST(btf_task_file_ids)
-BTF_ID(struct, task_struct)
BTF_ID(struct, file)
BTF_ID(struct, vm_area_struct)
@@ -591,19 +590,19 @@ static int __init task_iter_init(void)
{
int ret;
- task_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
+ task_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0];
ret = bpf_iter_reg_target(&task_reg_info);
if (ret)
return ret;
- task_file_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
- task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[1];
+ task_file_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0];
+ task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[0];
ret = bpf_iter_reg_target(&task_file_reg_info);
if (ret)
return ret;
- task_vma_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
- task_vma_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[2];
+ task_vma_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0];
+ task_vma_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[1];
return bpf_iter_reg_target(&task_vma_reg_info);
}
late_initcall(task_iter_init);
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 28a3630c48ee..fe1e857324e6 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -172,7 +172,7 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
}
static struct bpf_tramp_progs *
-bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)
+bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
{
const struct bpf_prog_aux *aux;
struct bpf_tramp_progs *tprogs;
@@ -189,8 +189,10 @@ bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)
*total += tr->progs_cnt[kind];
progs = tprogs[kind].progs;
- hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist)
+ hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) {
+ *ip_arg |= aux->prog->call_get_func_ip;
*progs++ = aux->prog;
+ }
}
return tprogs;
}
@@ -333,9 +335,10 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
struct bpf_tramp_image *im;
struct bpf_tramp_progs *tprogs;
u32 flags = BPF_TRAMP_F_RESTORE_REGS;
+ bool ip_arg = false;
int err, total;
- tprogs = bpf_trampoline_get_progs(tr, &total);
+ tprogs = bpf_trampoline_get_progs(tr, &total, &ip_arg);
if (IS_ERR(tprogs))
return PTR_ERR(tprogs);
@@ -357,6 +360,9 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs)
flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
+ if (ip_arg)
+ flags |= BPF_TRAMP_F_IP_ARG;
+
err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE,
&tr->func.model, flags, tprogs,
tr->func.addr);
@@ -542,7 +548,7 @@ static void notrace inc_misses_counter(struct bpf_prog *prog)
u64_stats_update_end(&stats->syncp);
}
-/* The logic is similar to BPF_PROG_RUN, but with an explicit
+/* The logic is similar to bpf_prog_run(), but with an explicit
* rcu_read_lock() and migrate_disable() which are required
* for the trampoline. The macro is split into
* call __bpf_prog_enter
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f9bda5476ea5..047ac4b4703b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -255,6 +255,7 @@ struct bpf_call_arg_meta {
int mem_size;
u64 msize_max_value;
int ref_obj_id;
+ int map_uid;
int func_id;
struct btf *btf;
u32 btf_id;
@@ -734,6 +735,10 @@ static void print_verifier_state(struct bpf_verifier_env *env,
if (state->refs[i].id)
verbose(env, ",%d", state->refs[i].id);
}
+ if (state->in_callback_fn)
+ verbose(env, " cb");
+ if (state->in_async_callback_fn)
+ verbose(env, " async_cb");
verbose(env, "\n");
}
@@ -1135,6 +1140,10 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
if (map->inner_map_meta) {
reg->type = CONST_PTR_TO_MAP;
reg->map_ptr = map->inner_map_meta;
+ /* transfer reg's id which is unique for every map_lookup_elem
+ * as UID of the inner map.
+ */
+ reg->map_uid = reg->id;
} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
reg->type = PTR_TO_XDP_SOCK;
} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
@@ -1522,6 +1531,54 @@ static void init_func_state(struct bpf_verifier_env *env,
init_reg_state(env, state);
}
+/* Similar to push_stack(), but for async callbacks */
+static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
+ int insn_idx, int prev_insn_idx,
+ int subprog)
+{
+ struct bpf_verifier_stack_elem *elem;
+ struct bpf_func_state *frame;
+
+ elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
+ if (!elem)
+ goto err;
+
+ elem->insn_idx = insn_idx;
+ elem->prev_insn_idx = prev_insn_idx;
+ elem->next = env->head;
+ elem->log_pos = env->log.len_used;
+ env->head = elem;
+ env->stack_size++;
+ if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
+ verbose(env,
+ "The sequence of %d jumps is too complex for async cb.\n",
+ env->stack_size);
+ goto err;
+ }
+ /* Unlike push_stack() do not copy_verifier_state().
+ * The caller state doesn't matter.
+ * This is async callback. It starts in a fresh stack.
+ * Initialize it similar to do_check_common().
+ */
+ elem->st.branches = 1;
+ frame = kzalloc(sizeof(*frame), GFP_KERNEL);
+ if (!frame)
+ goto err;
+ init_func_state(env, frame,
+ BPF_MAIN_FUNC /* callsite */,
+ 0 /* frameno within this callchain */,
+ subprog /* subprog number within this prog */);
+ elem->st.frame[0] = frame;
+ return &elem->st;
+err:
+ free_verifier_state(env->cur_state, true);
+ env->cur_state = NULL;
+ /* pop all elements and return */
+ while (!pop_stack(env, NULL, NULL, false));
+ return NULL;
+}
+
+
enum reg_arg_type {
SRC_OP, /* register is used as source operand */
DST_OP, /* register is used as destination operand */
@@ -3217,6 +3274,15 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
return -EACCES;
}
}
+ if (map_value_has_timer(map)) {
+ u32 t = map->timer_off;
+
+ if (reg->smin_value + off < t + sizeof(struct bpf_timer) &&
+ t < reg->umax_value + off + size) {
+ verbose(env, "bpf_timer cannot be accessed directly by load/store\n");
+ return -EACCES;
+ }
+ }
return err;
}
@@ -3619,6 +3685,8 @@ process_func:
continue_func:
subprog_end = subprog[idx + 1].start;
for (; i < subprog_end; i++) {
+ int next_insn;
+
if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
continue;
/* remember insn and function to return to */
@@ -3626,13 +3694,22 @@ continue_func:
ret_prog[frame] = idx;
/* find the callee */
- i = i + insn[i].imm + 1;
- idx = find_subprog(env, i);
+ next_insn = i + insn[i].imm + 1;
+ idx = find_subprog(env, next_insn);
if (idx < 0) {
WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
- i);
+ next_insn);
return -EFAULT;
}
+ if (subprog[idx].is_async_cb) {
+ if (subprog[idx].has_tail_call) {
+ verbose(env, "verifier bug. subprog has tail_call and async cb\n");
+ return -EFAULT;
+ }
+ /* async callbacks don't increase bpf prog stack size */
+ continue;
+ }
+ i = next_insn;
if (subprog[idx].has_tail_call)
tail_call_reachable = true;
@@ -4634,6 +4711,54 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
return 0;
}
+static int process_timer_func(struct bpf_verifier_env *env, int regno,
+ struct bpf_call_arg_meta *meta)
+{
+ struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ bool is_const = tnum_is_const(reg->var_off);
+ struct bpf_map *map = reg->map_ptr;
+ u64 val = reg->var_off.value;
+
+ if (!is_const) {
+ verbose(env,
+ "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
+ regno);
+ return -EINVAL;
+ }
+ if (!map->btf) {
+ verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
+ map->name);
+ return -EINVAL;
+ }
+ if (!map_value_has_timer(map)) {
+ if (map->timer_off == -E2BIG)
+ verbose(env,
+ "map '%s' has more than one 'struct bpf_timer'\n",
+ map->name);
+ else if (map->timer_off == -ENOENT)
+ verbose(env,
+ "map '%s' doesn't have 'struct bpf_timer'\n",
+ map->name);
+ else
+ verbose(env,
+ "map '%s' is not a struct type or bpf_timer is mangled\n",
+ map->name);
+ return -EINVAL;
+ }
+ if (map->timer_off != val + reg->off) {
+ verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
+ val + reg->off, map->timer_off);
+ return -EINVAL;
+ }
+ if (meta->map_ptr) {
+ verbose(env, "verifier bug. Two map pointers in a timer helper\n");
+ return -EFAULT;
+ }
+ meta->map_uid = reg->map_uid;
+ meta->map_ptr = map;
+ return 0;
+}
+
static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
{
return type == ARG_PTR_TO_MEM ||
@@ -4766,6 +4891,7 @@ static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PER
static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
+static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
@@ -4797,6 +4923,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_FUNC] = &func_ptr_types,
[ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types,
[ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
+ [ARG_PTR_TO_TIMER] = &timer_types,
};
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -4926,7 +5053,29 @@ skip_type_check:
if (arg_type == ARG_CONST_MAP_PTR) {
/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
+ if (meta->map_ptr) {
+ /* Use map_uid (which is unique id of inner map) to reject:
+ * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
+ * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
+ * if (inner_map1 && inner_map2) {
+ * timer = bpf_map_lookup_elem(inner_map1);
+ * if (timer)
+ * // mismatch would have been allowed
+ * bpf_timer_init(timer, inner_map2);
+ * }
+ *
+ * Comparing map_ptr is enough to distinguish normal and outer maps.
+ */
+ if (meta->map_ptr != reg->map_ptr ||
+ meta->map_uid != reg->map_uid) {
+ verbose(env,
+ "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
+ meta->map_uid, reg->map_uid);
+ return -EINVAL;
+ }
+ }
meta->map_ptr = reg->map_ptr;
+ meta->map_uid = reg->map_uid;
} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
/* bpf_map_xxx(..., map_ptr, ..., key) call:
* check that [key, key + map->key_size) are within
@@ -4978,6 +5127,9 @@ skip_type_check:
verbose(env, "verifier internal error\n");
return -EFAULT;
}
+ } else if (arg_type == ARG_PTR_TO_TIMER) {
+ if (process_timer_func(env, regno, meta))
+ return -EACCES;
} else if (arg_type == ARG_PTR_TO_FUNC) {
meta->subprogno = reg->subprogno;
} else if (arg_type_is_mem_ptr(arg_type)) {
@@ -5150,8 +5302,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_RINGBUF:
if (func_id != BPF_FUNC_ringbuf_output &&
func_id != BPF_FUNC_ringbuf_reserve &&
- func_id != BPF_FUNC_ringbuf_submit &&
- func_id != BPF_FUNC_ringbuf_discard &&
func_id != BPF_FUNC_ringbuf_query)
goto error;
break;
@@ -5260,6 +5410,12 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
goto error;
break;
+ case BPF_FUNC_ringbuf_output:
+ case BPF_FUNC_ringbuf_reserve:
+ case BPF_FUNC_ringbuf_query:
+ if (map->map_type != BPF_MAP_TYPE_RINGBUF)
+ goto error;
+ break;
case BPF_FUNC_get_stackid:
if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
goto error;
@@ -5593,6 +5749,31 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
}
}
+ if (insn->code == (BPF_JMP | BPF_CALL) &&
+ insn->imm == BPF_FUNC_timer_set_callback) {
+ struct bpf_verifier_state *async_cb;
+
+ /* there is no real recursion here. timer callbacks are async */
+ env->subprog_info[subprog].is_async_cb = true;
+ async_cb = push_async_cb(env, env->subprog_info[subprog].start,
+ *insn_idx, subprog);
+ if (!async_cb)
+ return -EFAULT;
+ callee = async_cb->frame[0];
+ callee->async_entry_cnt = caller->async_entry_cnt + 1;
+
+ /* Convert bpf_timer_set_callback() args into timer callback args */
+ err = set_callee_state_cb(env, caller, callee, *insn_idx);
+ if (err)
+ return err;
+
+ clear_caller_saved_regs(env, caller->regs);
+ mark_reg_unknown(env, caller->regs, BPF_REG_0);
+ caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
+ /* continue with next insn after call */
+ return 0;
+ }
+
callee = kzalloc(sizeof(*callee), GFP_KERNEL);
if (!callee)
return -ENOMEM;
@@ -5720,6 +5901,35 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env,
return 0;
}
+static int set_timer_callback_state(struct bpf_verifier_env *env,
+ struct bpf_func_state *caller,
+ struct bpf_func_state *callee,
+ int insn_idx)
+{
+ struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
+
+ /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
+ * callback_fn(struct bpf_map *map, void *key, void *value);
+ */
+ callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
+ __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
+ callee->regs[BPF_REG_1].map_ptr = map_ptr;
+
+ callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
+ __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
+ callee->regs[BPF_REG_2].map_ptr = map_ptr;
+
+ callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
+ __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
+ callee->regs[BPF_REG_3].map_ptr = map_ptr;
+
+ /* unused */
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+ callee->in_async_callback_fn = true;
+ return 0;
+}
+
static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
{
struct bpf_verifier_state *state = env->cur_state;
@@ -5933,6 +6143,29 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
return err;
}
+static int check_get_func_ip(struct bpf_verifier_env *env)
+{
+ enum bpf_attach_type eatype = env->prog->expected_attach_type;
+ enum bpf_prog_type type = resolve_prog_type(env->prog);
+ int func_id = BPF_FUNC_get_func_ip;
+
+ if (type == BPF_PROG_TYPE_TRACING) {
+ if (eatype != BPF_TRACE_FENTRY && eatype != BPF_TRACE_FEXIT &&
+ eatype != BPF_MODIFY_RETURN) {
+ verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
+ func_id_name(func_id), func_id);
+ return -ENOTSUPP;
+ }
+ return 0;
+ } else if (type == BPF_PROG_TYPE_KPROBE) {
+ return 0;
+ }
+
+ verbose(env, "func %s#%d not supported for program type %d\n",
+ func_id_name(func_id), func_id, type);
+ return -ENOTSUPP;
+}
+
static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
@@ -6047,6 +6280,13 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return -EINVAL;
}
+ if (func_id == BPF_FUNC_timer_set_callback) {
+ err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+ set_timer_callback_state);
+ if (err < 0)
+ return -EINVAL;
+ }
+
if (func_id == BPF_FUNC_snprintf) {
err = check_bpf_snprintf_call(env, regs);
if (err < 0)
@@ -6082,6 +6322,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return -EINVAL;
}
regs[BPF_REG_0].map_ptr = meta.map_ptr;
+ regs[BPF_REG_0].map_uid = meta.map_uid;
if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
if (map_value_has_spin_lock(meta.map_ptr))
@@ -6203,6 +6444,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
env->prog->call_get_stack = true;
+ if (func_id == BPF_FUNC_get_func_ip) {
+ if (check_get_func_ip(env))
+ return -ENOTSUPP;
+ env->prog->call_get_func_ip = true;
+ }
+
if (changes_data)
clear_all_pkt_pointers(env);
return 0;
@@ -9083,7 +9330,8 @@ static int check_return_code(struct bpf_verifier_env *env)
struct tnum range = tnum_range(0, 1);
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
int err;
- const bool is_subprog = env->cur_state->frame[0]->subprogno;
+ struct bpf_func_state *frame = env->cur_state->frame[0];
+ const bool is_subprog = frame->subprogno;
/* LSM and struct_ops func-ptr's return type could be "void" */
if (!is_subprog &&
@@ -9108,6 +9356,22 @@ static int check_return_code(struct bpf_verifier_env *env)
}
reg = cur_regs(env) + BPF_REG_0;
+
+ if (frame->in_async_callback_fn) {
+ /* enforce return zero from async callbacks like timer */
+ if (reg->type != SCALAR_VALUE) {
+ verbose(env, "In async callback the register R0 is not a known value (%s)\n",
+ reg_type_str[reg->type]);
+ return -EINVAL;
+ }
+
+ if (!tnum_in(tnum_const(0), reg->var_off)) {
+ verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
+ return -EINVAL;
+ }
+ return 0;
+ }
+
if (is_subprog) {
if (reg->type != SCALAR_VALUE) {
verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
@@ -9322,8 +9586,12 @@ static int visit_func_call_insn(int t, int insn_cnt,
init_explored_state(env, t + 1);
if (visit_callee) {
init_explored_state(env, t);
- ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
- env, false);
+ ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
+ /* It's ok to allow recursion from CFG point of
+ * view. __check_func_call() will do the actual
+ * check.
+ */
+ bpf_pseudo_func(insns + t));
}
return ret;
}
@@ -9351,6 +9619,13 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
return DONE_EXPLORING;
case BPF_CALL:
+ if (insns[t].imm == BPF_FUNC_timer_set_callback)
+ /* Mark this call insn to trigger is_state_visited() check
+ * before call itself is processed by __check_func_call().
+ * Otherwise new async state will be pushed for further
+ * exploration.
+ */
+ init_explored_state(env, t);
return visit_func_call_insn(t, insn_cnt, insns, env,
insns[t].src_reg == BPF_PSEUDO_CALL);
@@ -10359,9 +10634,25 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
states_cnt++;
if (sl->state.insn_idx != insn_idx)
goto next;
+
if (sl->state.branches) {
- if (states_maybe_looping(&sl->state, cur) &&
- states_equal(env, &sl->state, cur)) {
+ struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
+
+ if (frame->in_async_callback_fn &&
+ frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
+ /* Different async_entry_cnt means that the verifier is
+ * processing another entry into async callback.
+ * Seeing the same state is not an indication of infinite
+ * loop or infinite recursion.
+ * But finding the same state doesn't mean that it's safe
+ * to stop processing the current state. The previous state
+ * hasn't yet reached bpf_exit, since state.branches > 0.
+ * Checking in_async_callback_fn alone is not enough either.
+ * Since the verifier still needs to catch infinite loops
+ * inside async callbacks.
+ */
+ } else if (states_maybe_looping(&sl->state, cur) &&
+ states_equal(env, &sl->state, cur)) {
verbose_linfo(env, insn_idx, "; ");
verbose(env, "infinite loop detected at insn %d\n", insn_idx);
return -EINVAL;
@@ -11410,10 +11701,11 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
* insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
* [0, off) and [off, end) to new locations, so the patched range stays zero
*/
-static int adjust_insn_aux_data(struct bpf_verifier_env *env,
- struct bpf_prog *new_prog, u32 off, u32 cnt)
+static void adjust_insn_aux_data(struct bpf_verifier_env *env,
+ struct bpf_insn_aux_data *new_data,
+ struct bpf_prog *new_prog, u32 off, u32 cnt)
{
- struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
+ struct bpf_insn_aux_data *old_data = env->insn_aux_data;
struct bpf_insn *insn = new_prog->insnsi;
u32 old_seen = old_data[off].seen;
u32 prog_len;
@@ -11426,12 +11718,9 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
if (cnt == 1)
- return 0;
+ return;
prog_len = new_prog->len;
- new_data = vzalloc(array_size(prog_len,
- sizeof(struct bpf_insn_aux_data)));
- if (!new_data)
- return -ENOMEM;
+
memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
memcpy(new_data + off + cnt - 1, old_data + off,
sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
@@ -11442,7 +11731,6 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
}
env->insn_aux_data = new_data;
vfree(old_data);
- return 0;
}
static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
@@ -11477,6 +11765,14 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
const struct bpf_insn *patch, u32 len)
{
struct bpf_prog *new_prog;
+ struct bpf_insn_aux_data *new_data = NULL;
+
+ if (len > 1) {
+ new_data = vzalloc(array_size(env->prog->len + len - 1,
+ sizeof(struct bpf_insn_aux_data)));
+ if (!new_data)
+ return NULL;
+ }
new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
if (IS_ERR(new_prog)) {
@@ -11484,10 +11780,10 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
verbose(env,
"insn %d cannot be patched due to 16-bit range\n",
env->insn_aux_data[off].orig_idx);
+ vfree(new_data);
return NULL;
}
- if (adjust_insn_aux_data(env, new_prog, off, len))
- return NULL;
+ adjust_insn_aux_data(env, new_data, new_prog, off, len);
adjust_subprog_starts(env, off, len);
adjust_poke_descs(new_prog, off, len);
return new_prog;
@@ -11663,6 +11959,7 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
if (aux_data[i].seen)
continue;
memcpy(insn + i, &trap, sizeof(trap));
+ aux_data[i].zext_dst = false;
}
}
@@ -12003,6 +12300,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
if (is_narrower_load && size < target_size) {
u8 shift = bpf_ctx_narrow_access_offset(
off, size, size_default) * 8;
+ if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
+ verbose(env, "bpf verifier narrow ctx load misconfigured\n");
+ return -EINVAL;
+ }
if (ctx_field_size <= 4) {
if (shift)
insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
@@ -12091,7 +12392,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
subprog_end = env->subprog_info[i + 1].start;
len = subprog_end - subprog_start;
- /* BPF_PROG_RUN doesn't call subprogs directly,
+ /* bpf_prog_run() doesn't call subprogs directly,
* hence main prog stats include the runtime of subprogs.
* subprogs don't have IDs and not reachable via prog_get_next_id
* func[i]->stats will never be accessed and stays NULL
@@ -12337,6 +12638,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
bool expect_blinding = bpf_jit_blinding_enabled(prog);
+ enum bpf_prog_type prog_type = resolve_prog_type(prog);
struct bpf_insn *insn = prog->insnsi;
const struct bpf_func_proto *fn;
const int insn_cnt = prog->len;
@@ -12554,6 +12856,39 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
continue;
}
+ if (insn->imm == BPF_FUNC_timer_set_callback) {
+ /* The verifier will process callback_fn as many times as necessary
+ * with different maps and the register states prepared by
+ * set_timer_callback_state will be accurate.
+ *
+ * The following use case is valid:
+ * map1 is shared by prog1, prog2, prog3.
+ * prog1 calls bpf_timer_init for some map1 elements
+ * prog2 calls bpf_timer_set_callback for some map1 elements.
+ * Those that were not bpf_timer_init-ed will return -EINVAL.
+ * prog3 calls bpf_timer_start for some map1 elements.
+ * Those that were not both bpf_timer_init-ed and
+ * bpf_timer_set_callback-ed will return -EINVAL.
+ */
+ struct bpf_insn ld_addrs[2] = {
+ BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
+ };
+
+ insn_buf[0] = ld_addrs[0];
+ insn_buf[1] = ld_addrs[1];
+ insn_buf[2] = *insn;
+ cnt = 3;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto patch_call_imm;
+ }
+
/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
* and other inlining handlers are currently limited to 64 bit
* only.
@@ -12670,6 +13005,21 @@ patch_map_ops_generic:
continue;
}
+ /* Implement bpf_get_func_ip inline. */
+ if (prog_type == BPF_PROG_TYPE_TRACING &&
+ insn->imm == BPF_FUNC_get_func_ip) {
+ /* Load IP address from ctx - 8 */
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
+ if (!new_prog)
+ return -ENOMEM;
+
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ continue;
+ }
+
patch_call_imm:
fn = env->ops->get_func_proto(insn->imm, env->prog);
/* all functions that have prototype and verifier allowed
diff --git a/kernel/cfi.c b/kernel/cfi.c
index e17a56639766..9594cfd1cf2c 100644
--- a/kernel/cfi.c
+++ b/kernel/cfi.c
@@ -248,9 +248,9 @@ static inline cfi_check_fn find_shadow_check_fn(unsigned long ptr)
{
cfi_check_fn fn;
- rcu_read_lock_sched();
+ rcu_read_lock_sched_notrace();
fn = ptr_to_check_fn(rcu_dereference_sched(cfi_shadow), ptr);
- rcu_read_unlock_sched();
+ rcu_read_unlock_sched_notrace();
return fn;
}
@@ -269,11 +269,11 @@ static inline cfi_check_fn find_module_check_fn(unsigned long ptr)
cfi_check_fn fn = NULL;
struct module *mod;
- rcu_read_lock_sched();
+ rcu_read_lock_sched_notrace();
mod = __module_address(ptr);
if (mod)
fn = mod->cfi_check;
- rcu_read_unlock_sched();
+ rcu_read_unlock_sched_notrace();
return fn;
}
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index de2c432dee20..35b920328344 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -50,6 +50,8 @@ bool cgroup1_ssid_disabled(int ssid)
* cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
* @from: attach to all cgroups of a given task
* @tsk: the task to be attached
+ *
+ * Return: %0 on success or a negative errno code on failure
*/
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
{
@@ -80,7 +82,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
/**
- * cgroup_trasnsfer_tasks - move tasks from one cgroup to another
+ * cgroup_transfer_tasks - move tasks from one cgroup to another
* @to: cgroup to which the tasks will be moved
* @from: cgroup in which the tasks currently reside
*
@@ -89,6 +91,8 @@ EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
* is guaranteed to be either visible in the source cgroup after the
* parent's migration is complete or put into the target cgroup. No task
* can slip out of migration through forking.
+ *
+ * Return: %0 on success or a negative errno code on failure
*/
int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
{
@@ -682,6 +686,8 @@ int proc_cgroupstats_show(struct seq_file *m, void *v)
*
* Build and fill cgroupstats so that taskstats can export it to user
* space.
+ *
+ * Return: %0 on success or a negative errno code on failure
*/
int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
{
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 3a0161c21b6b..881ce1470beb 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -68,6 +68,14 @@
#define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100)
/*
+ * To avoid confusing the compiler (and generating warnings) with code
+ * that attempts to access what would be a 0-element array (i.e. sized
+ * to a potentially empty array when CGROUP_SUBSYS_COUNT == 0), this
+ * constant expression can be added.
+ */
+#define CGROUP_HAS_SUBSYS_CONFIG (CGROUP_SUBSYS_COUNT > 0)
+
+/*
* cgroup_mutex is the master lock. Any modification to cgroup or its
* hierarchy must be performed while holding it.
*
@@ -248,7 +256,7 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css,
*/
bool cgroup_ssid_enabled(int ssid)
{
- if (CGROUP_SUBSYS_COUNT == 0)
+ if (!CGROUP_HAS_SUBSYS_CONFIG)
return false;
return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
@@ -472,7 +480,7 @@ static u16 cgroup_ss_mask(struct cgroup *cgrp)
static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
struct cgroup_subsys *ss)
{
- if (ss)
+ if (CGROUP_HAS_SUBSYS_CONFIG && ss)
return rcu_dereference_check(cgrp->subsys[ss->id],
lockdep_is_held(&cgroup_mutex));
else
@@ -550,6 +558,9 @@ struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
{
struct cgroup_subsys_state *css;
+ if (!CGROUP_HAS_SUBSYS_CONFIG)
+ return NULL;
+
do {
css = cgroup_css(cgrp, ss);
@@ -577,6 +588,9 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
{
struct cgroup_subsys_state *css;
+ if (!CGROUP_HAS_SUBSYS_CONFIG)
+ return NULL;
+
rcu_read_lock();
do {
@@ -647,7 +661,7 @@ struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
* the matching css from the cgroup's subsys table is guaranteed to
* be and stay valid until the enclosing operation is complete.
*/
- if (cft->ss)
+ if (CGROUP_HAS_SUBSYS_CONFIG && cft->ss)
return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
else
return &cgrp->self;
@@ -695,7 +709,7 @@ EXPORT_SYMBOL_GPL(of_css);
*/
#define do_each_subsys_mask(ss, ssid, ss_mask) do { \
unsigned long __ss_mask = (ss_mask); \
- if (!CGROUP_SUBSYS_COUNT) { /* to avoid spurious gcc warning */ \
+ if (!CGROUP_HAS_SUBSYS_CONFIG) { \
(ssid) = 0; \
break; \
} \
@@ -2169,7 +2183,6 @@ static void cgroup_kill_sb(struct super_block *sb)
/*
* If @root doesn't have any children, start killing it.
* This prevents new mounts by disabling percpu_ref_tryget_live().
- * cgroup_mount() may wait for @root's release.
*
* And don't kill the default root.
*/
@@ -2373,7 +2386,7 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
struct css_set *cset = tset->cur_cset;
struct task_struct *task = tset->cur_task;
- while (&cset->mg_node != tset->csets) {
+ while (CGROUP_HAS_SUBSYS_CONFIG && &cset->mg_node != tset->csets) {
if (!task)
task = list_first_entry(&cset->mg_tasks,
struct task_struct, cg_list);
@@ -4644,7 +4657,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
it->ss = css->ss;
it->flags = flags;
- if (it->ss)
+ if (CGROUP_HAS_SUBSYS_CONFIG && it->ss)
it->cset_pos = &css->cgroup->e_csets[css->ss->id];
else
it->cset_pos = &css->cgroup->cset_links;
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index adb5190c4429..df1ccf4558f8 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -160,6 +160,9 @@ struct cpuset {
*/
int use_parent_ecpus;
int child_ecpus_count;
+
+ /* Handle for cpuset.cpus.partition */
+ struct cgroup_file partition_file;
};
/*
@@ -263,6 +266,16 @@ static inline int is_partition_root(const struct cpuset *cs)
return cs->partition_root_state > 0;
}
+/*
+ * Send notification event of whenever partition_root_state changes.
+ */
+static inline void notify_partition_change(struct cpuset *cs,
+ int old_prs, int new_prs)
+{
+ if (old_prs != new_prs)
+ cgroup_file_notify(&cs->partition_file);
+}
+
static struct cpuset top_cpuset = {
.flags = ((1 << CS_ONLINE) | (1 << CS_CPU_EXCLUSIVE) |
(1 << CS_MEM_EXCLUSIVE)),
@@ -372,18 +385,29 @@ static inline bool is_in_v2_mode(void)
}
/*
- * Return in pmask the portion of a cpusets's cpus_allowed that
- * are online. If none are online, walk up the cpuset hierarchy
- * until we find one that does have some online cpus.
+ * Return in pmask the portion of a task's cpusets's cpus_allowed that
+ * are online and are capable of running the task. If none are found,
+ * walk up the cpuset hierarchy until we find one that does have some
+ * appropriate cpus.
*
* One way or another, we guarantee to return some non-empty subset
* of cpu_online_mask.
*
* Call with callback_lock or cpuset_mutex held.
*/
-static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
+static void guarantee_online_cpus(struct task_struct *tsk,
+ struct cpumask *pmask)
{
- while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) {
+ const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
+ struct cpuset *cs;
+
+ if (WARN_ON(!cpumask_and(pmask, possible_mask, cpu_online_mask)))
+ cpumask_copy(pmask, cpu_online_mask);
+
+ rcu_read_lock();
+ cs = task_cs(tsk);
+
+ while (!cpumask_intersects(cs->effective_cpus, pmask)) {
cs = parent_cs(cs);
if (unlikely(!cs)) {
/*
@@ -393,11 +417,13 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
* cpuset's effective_cpus is on its way to be
* identical to cpu_online_mask.
*/
- cpumask_copy(pmask, cpu_online_mask);
- return;
+ goto out_unlock;
}
}
- cpumask_and(pmask, cs->effective_cpus, cpu_online_mask);
+ cpumask_and(pmask, pmask, cs->effective_cpus);
+
+out_unlock:
+ rcu_read_unlock();
}
/*
@@ -979,7 +1005,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
* 'cpus' is removed, then call this routine to rebuild the
* scheduler's dynamic sched domains.
*
- * Call with cpuset_mutex held. Takes get_online_cpus().
+ * Call with cpuset_mutex held. Takes cpus_read_lock().
*/
static void rebuild_sched_domains_locked(void)
{
@@ -1040,11 +1066,11 @@ static void rebuild_sched_domains_locked(void)
void rebuild_sched_domains(void)
{
- get_online_cpus();
+ cpus_read_lock();
percpu_down_write(&cpuset_rwsem);
rebuild_sched_domains_locked();
percpu_up_write(&cpuset_rwsem);
- put_online_cpus();
+ cpus_read_unlock();
}
/**
@@ -1114,7 +1140,7 @@ enum subparts_cmd {
* cpus_allowed can be granted or an error code will be returned.
*
* For partcmd_disable, the cpuset is being transofrmed from a partition
- * root back to a non-partition root. any CPUs in cpus_allowed that are in
+ * root back to a non-partition root. Any CPUs in cpus_allowed that are in
* parent's subparts_cpus will be taken away from that cpumask and put back
* into parent's effective_cpus. 0 should always be returned.
*
@@ -1148,6 +1174,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
struct cpuset *parent = parent_cs(cpuset);
int adding; /* Moving cpus from effective_cpus to subparts_cpus */
int deleting; /* Moving cpus from subparts_cpus to effective_cpus */
+ int old_prs, new_prs;
bool part_error = false; /* Partition error? */
percpu_rwsem_assert_held(&cpuset_rwsem);
@@ -1183,6 +1210,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
* A cpumask update cannot make parent's effective_cpus become empty.
*/
adding = deleting = false;
+ old_prs = new_prs = cpuset->partition_root_state;
if (cmd == partcmd_enable) {
cpumask_copy(tmp->addmask, cpuset->cpus_allowed);
adding = true;
@@ -1225,7 +1253,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
/*
* partcmd_update w/o newmask:
*
- * addmask = cpus_allowed & parent->effectiveb_cpus
+ * addmask = cpus_allowed & parent->effective_cpus
*
* Note that parent's subparts_cpus may have been
* pre-shrunk in case there is a change in the cpu list.
@@ -1247,11 +1275,11 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
switch (cpuset->partition_root_state) {
case PRS_ENABLED:
if (part_error)
- cpuset->partition_root_state = PRS_ERROR;
+ new_prs = PRS_ERROR;
break;
case PRS_ERROR:
if (!part_error)
- cpuset->partition_root_state = PRS_ENABLED;
+ new_prs = PRS_ENABLED;
break;
}
/*
@@ -1260,10 +1288,10 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
part_error = (prev_prs == PRS_ERROR);
}
- if (!part_error && (cpuset->partition_root_state == PRS_ERROR))
+ if (!part_error && (new_prs == PRS_ERROR))
return 0; /* Nothing need to be done */
- if (cpuset->partition_root_state == PRS_ERROR) {
+ if (new_prs == PRS_ERROR) {
/*
* Remove all its cpus from parent's subparts_cpus.
*/
@@ -1272,7 +1300,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
parent->subparts_cpus);
}
- if (!adding && !deleting)
+ if (!adding && !deleting && (new_prs == old_prs))
return 0;
/*
@@ -1299,7 +1327,12 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
}
parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus);
+
+ if (old_prs != new_prs)
+ cpuset->partition_root_state = new_prs;
+
spin_unlock_irq(&callback_lock);
+ notify_partition_change(cpuset, old_prs, new_prs);
return cmd == partcmd_update;
}
@@ -1321,6 +1354,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
struct cpuset *cp;
struct cgroup_subsys_state *pos_css;
bool need_rebuild_sched_domains = false;
+ int old_prs, new_prs;
rcu_read_lock();
cpuset_for_each_descendant_pre(cp, pos_css, cs) {
@@ -1360,17 +1394,18 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
* update_tasks_cpumask() again for tasks in the parent
* cpuset if the parent's subparts_cpus changes.
*/
- if ((cp != cs) && cp->partition_root_state) {
+ old_prs = new_prs = cp->partition_root_state;
+ if ((cp != cs) && old_prs) {
switch (parent->partition_root_state) {
case PRS_DISABLED:
/*
* If parent is not a partition root or an
- * invalid partition root, clear the state
- * state and the CS_CPU_EXCLUSIVE flag.
+ * invalid partition root, clear its state
+ * and its CS_CPU_EXCLUSIVE flag.
*/
WARN_ON_ONCE(cp->partition_root_state
!= PRS_ERROR);
- cp->partition_root_state = 0;
+ new_prs = PRS_DISABLED;
/*
* clear_bit() is an atomic operation and
@@ -1391,11 +1426,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
/*
* When parent is invalid, it has to be too.
*/
- cp->partition_root_state = PRS_ERROR;
- if (cp->nr_subparts_cpus) {
- cp->nr_subparts_cpus = 0;
- cpumask_clear(cp->subparts_cpus);
- }
+ new_prs = PRS_ERROR;
break;
}
}
@@ -1407,8 +1438,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
spin_lock_irq(&callback_lock);
cpumask_copy(cp->effective_cpus, tmp->new_cpus);
- if (cp->nr_subparts_cpus &&
- (cp->partition_root_state != PRS_ENABLED)) {
+ if (cp->nr_subparts_cpus && (new_prs != PRS_ENABLED)) {
cp->nr_subparts_cpus = 0;
cpumask_clear(cp->subparts_cpus);
} else if (cp->nr_subparts_cpus) {
@@ -1435,7 +1465,12 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
= cpumask_weight(cp->subparts_cpus);
}
}
+
+ if (new_prs != old_prs)
+ cp->partition_root_state = new_prs;
+
spin_unlock_irq(&callback_lock);
+ notify_partition_change(cp, old_prs, new_prs);
WARN_ON(!is_in_v2_mode() &&
!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
@@ -1612,6 +1647,11 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
{
struct cpuset_migrate_mm_work *mwork;
+ if (nodes_equal(*from, *to)) {
+ mmput(mm);
+ return;
+ }
+
mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
if (mwork) {
mwork->mm = mm;
@@ -1937,34 +1977,32 @@ out:
/*
* update_prstate - update partititon_root_state
- * cs: the cpuset to update
- * val: 0 - disabled, 1 - enabled
+ * cs: the cpuset to update
+ * new_prs: new partition root state
*
* Call with cpuset_mutex held.
*/
-static int update_prstate(struct cpuset *cs, int val)
+static int update_prstate(struct cpuset *cs, int new_prs)
{
- int err;
+ int err, old_prs = cs->partition_root_state;
struct cpuset *parent = parent_cs(cs);
- struct tmpmasks tmp;
+ struct tmpmasks tmpmask;
- if ((val != 0) && (val != 1))
- return -EINVAL;
- if (val == cs->partition_root_state)
+ if (old_prs == new_prs)
return 0;
/*
* Cannot force a partial or invalid partition root to a full
* partition root.
*/
- if (val && cs->partition_root_state)
+ if (new_prs && (old_prs == PRS_ERROR))
return -EINVAL;
- if (alloc_cpumasks(NULL, &tmp))
+ if (alloc_cpumasks(NULL, &tmpmask))
return -ENOMEM;
err = -EINVAL;
- if (!cs->partition_root_state) {
+ if (!old_prs) {
/*
* Turning on partition root requires setting the
* CS_CPU_EXCLUSIVE bit implicitly as well and cpus_allowed
@@ -1978,31 +2016,27 @@ static int update_prstate(struct cpuset *cs, int val)
goto out;
err = update_parent_subparts_cpumask(cs, partcmd_enable,
- NULL, &tmp);
+ NULL, &tmpmask);
if (err) {
update_flag(CS_CPU_EXCLUSIVE, cs, 0);
goto out;
}
- cs->partition_root_state = PRS_ENABLED;
} else {
/*
* Turning off partition root will clear the
* CS_CPU_EXCLUSIVE bit.
*/
- if (cs->partition_root_state == PRS_ERROR) {
- cs->partition_root_state = 0;
+ if (old_prs == PRS_ERROR) {
update_flag(CS_CPU_EXCLUSIVE, cs, 0);
err = 0;
goto out;
}
err = update_parent_subparts_cpumask(cs, partcmd_disable,
- NULL, &tmp);
+ NULL, &tmpmask);
if (err)
goto out;
- cs->partition_root_state = 0;
-
/* Turning off CS_CPU_EXCLUSIVE will not return error */
update_flag(CS_CPU_EXCLUSIVE, cs, 0);
}
@@ -2015,11 +2049,18 @@ static int update_prstate(struct cpuset *cs, int val)
update_tasks_cpumask(parent);
if (parent->child_ecpus_count)
- update_sibling_cpumasks(parent, cs, &tmp);
+ update_sibling_cpumasks(parent, cs, &tmpmask);
rebuild_sched_domains_locked();
out:
- free_cpumasks(NULL, &tmp);
+ if (!err) {
+ spin_lock_irq(&callback_lock);
+ cs->partition_root_state = new_prs;
+ spin_unlock_irq(&callback_lock);
+ notify_partition_change(cs, old_prs, new_prs);
+ }
+
+ free_cpumasks(NULL, &tmpmask);
return err;
}
@@ -2199,15 +2240,13 @@ static void cpuset_attach(struct cgroup_taskset *tset)
percpu_down_write(&cpuset_rwsem);
- /* prepare for attach */
- if (cs == &top_cpuset)
- cpumask_copy(cpus_attach, cpu_possible_mask);
- else
- guarantee_online_cpus(cs, cpus_attach);
-
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
cgroup_taskset_for_each(task, css, tset) {
+ if (cs != &top_cpuset)
+ guarantee_online_cpus(task, cpus_attach);
+ else
+ cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
/*
* can_attach beforehand should guarantee that this doesn't
* fail. TODO: have a better way to handle failure here
@@ -2282,7 +2321,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
cpuset_filetype_t type = cft->private;
int retval = 0;
- get_online_cpus();
+ cpus_read_lock();
percpu_down_write(&cpuset_rwsem);
if (!is_cpuset_online(cs)) {
retval = -ENODEV;
@@ -2320,7 +2359,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
}
out_unlock:
percpu_up_write(&cpuset_rwsem);
- put_online_cpus();
+ cpus_read_unlock();
return retval;
}
@@ -2331,7 +2370,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
cpuset_filetype_t type = cft->private;
int retval = -ENODEV;
- get_online_cpus();
+ cpus_read_lock();
percpu_down_write(&cpuset_rwsem);
if (!is_cpuset_online(cs))
goto out_unlock;
@@ -2346,7 +2385,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
}
out_unlock:
percpu_up_write(&cpuset_rwsem);
- put_online_cpus();
+ cpus_read_unlock();
return retval;
}
@@ -2385,7 +2424,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
kernfs_break_active_protection(of->kn);
flush_work(&cpuset_hotplug_work);
- get_online_cpus();
+ cpus_read_lock();
percpu_down_write(&cpuset_rwsem);
if (!is_cpuset_online(cs))
goto out_unlock;
@@ -2411,7 +2450,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
free_cpuset(trialcs);
out_unlock:
percpu_up_write(&cpuset_rwsem);
- put_online_cpus();
+ cpus_read_unlock();
kernfs_unbreak_active_protection(of->kn);
css_put(&cs->css);
flush_workqueue(cpuset_migrate_mm_wq);
@@ -2542,7 +2581,7 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
return -EINVAL;
css_get(&cs->css);
- get_online_cpus();
+ cpus_read_lock();
percpu_down_write(&cpuset_rwsem);
if (!is_cpuset_online(cs))
goto out_unlock;
@@ -2550,7 +2589,7 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
retval = update_prstate(cs, val);
out_unlock:
percpu_up_write(&cpuset_rwsem);
- put_online_cpus();
+ cpus_read_unlock();
css_put(&cs->css);
return retval ?: nbytes;
}
@@ -2702,6 +2741,7 @@ static struct cftype dfl_files[] = {
.write = sched_partition_write,
.private = FILE_PARTITION_ROOT,
.flags = CFTYPE_NOT_ON_ROOT,
+ .file_offset = offsetof(struct cpuset, partition_file),
},
{
@@ -2737,12 +2777,16 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
return ERR_PTR(-ENOMEM);
}
- set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
+ __set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
nodes_clear(cs->mems_allowed);
nodes_clear(cs->effective_mems);
fmeter_init(&cs->fmeter);
cs->relax_domain_level = -1;
+ /* Set CS_MEMORY_MIGRATE for default hierarchy */
+ if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
+ __set_bit(CS_MEMORY_MIGRATE, &cs->flags);
+
return &cs->css;
}
@@ -2756,7 +2800,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
if (!parent)
return 0;
- get_online_cpus();
+ cpus_read_lock();
percpu_down_write(&cpuset_rwsem);
set_bit(CS_ONLINE, &cs->flags);
@@ -2809,7 +2853,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
spin_unlock_irq(&callback_lock);
out_unlock:
percpu_up_write(&cpuset_rwsem);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
@@ -2828,7 +2872,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
{
struct cpuset *cs = css_cs(css);
- get_online_cpus();
+ cpus_read_lock();
percpu_down_write(&cpuset_rwsem);
if (is_partition_root(cs))
@@ -2849,7 +2893,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
clear_bit(CS_ONLINE, &cs->flags);
percpu_up_write(&cpuset_rwsem);
- put_online_cpus();
+ cpus_read_unlock();
}
static void cpuset_css_free(struct cgroup_subsys_state *css)
@@ -3060,7 +3104,7 @@ retry:
goto retry;
}
- parent = parent_cs(cs);
+ parent = parent_cs(cs);
compute_effective_cpumask(&new_cpus, cs, parent);
nodes_and(new_mems, cs->mems_allowed, parent->effective_mems);
@@ -3082,8 +3126,10 @@ retry:
if (is_partition_root(cs) && (cpumask_empty(&new_cpus) ||
(parent->partition_root_state == PRS_ERROR))) {
if (cs->nr_subparts_cpus) {
+ spin_lock_irq(&callback_lock);
cs->nr_subparts_cpus = 0;
cpumask_clear(cs->subparts_cpus);
+ spin_unlock_irq(&callback_lock);
compute_effective_cpumask(&new_cpus, cs, parent);
}
@@ -3095,9 +3141,17 @@ retry:
*/
if ((parent->partition_root_state == PRS_ERROR) ||
cpumask_empty(&new_cpus)) {
+ int old_prs;
+
update_parent_subparts_cpumask(cs, partcmd_disable,
NULL, tmp);
- cs->partition_root_state = PRS_ERROR;
+ old_prs = cs->partition_root_state;
+ if (old_prs != PRS_ERROR) {
+ spin_lock_irq(&callback_lock);
+ cs->partition_root_state = PRS_ERROR;
+ spin_unlock_irq(&callback_lock);
+ notify_partition_change(cs, old_prs, PRS_ERROR);
+ }
}
cpuset_force_rebuild();
}
@@ -3168,6 +3222,13 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus);
mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems);
+ /*
+ * In the rare case that hotplug removes all the cpus in subparts_cpus,
+ * we assumed that cpus are updated.
+ */
+ if (!cpus_updated && top_cpuset.nr_subparts_cpus)
+ cpus_updated = true;
+
/* synchronize cpus_allowed to cpu_active_mask */
if (cpus_updated) {
spin_lock_irq(&callback_lock);
@@ -3302,9 +3363,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
unsigned long flags;
spin_lock_irqsave(&callback_lock, flags);
- rcu_read_lock();
- guarantee_online_cpus(task_cs(tsk), pmask);
- rcu_read_unlock();
+ guarantee_online_cpus(tsk, pmask);
spin_unlock_irqrestore(&callback_lock, flags);
}
@@ -3318,13 +3377,22 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
* which will not contain a sane cpumask during cases such as cpu hotplugging.
* This is the absolute last resort for the scheduler and it is only used if
* _every_ other avenue has been traveled.
+ *
+ * Returns true if the affinity of @tsk was changed, false otherwise.
**/
-void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
+bool cpuset_cpus_allowed_fallback(struct task_struct *tsk)
{
+ const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
+ const struct cpumask *cs_mask;
+ bool changed = false;
+
rcu_read_lock();
- do_set_cpus_allowed(tsk, is_in_v2_mode() ?
- task_cs(tsk)->cpus_allowed : cpu_possible_mask);
+ cs_mask = task_cs(tsk)->cpus_allowed;
+ if (is_in_v2_mode() && cpumask_subset(cs_mask, possible_mask)) {
+ do_set_cpus_allowed(tsk, cs_mask);
+ changed = true;
+ }
rcu_read_unlock();
/*
@@ -3344,6 +3412,7 @@ void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
* select_fallback_rq() will fix things ups and set cpu_possible_mask
* if required.
*/
+ return changed;
}
void __init cpuset_init_current_mems_allowed(void)
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 7f0e58917432..b264ab5652ba 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -347,19 +347,20 @@ static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
}
static struct cgroup_rstat_cpu *
-cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp)
+cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags)
{
struct cgroup_rstat_cpu *rstatc;
rstatc = get_cpu_ptr(cgrp->rstat_cpu);
- u64_stats_update_begin(&rstatc->bsync);
+ *flags = u64_stats_update_begin_irqsave(&rstatc->bsync);
return rstatc;
}
static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
- struct cgroup_rstat_cpu *rstatc)
+ struct cgroup_rstat_cpu *rstatc,
+ unsigned long flags)
{
- u64_stats_update_end(&rstatc->bsync);
+ u64_stats_update_end_irqrestore(&rstatc->bsync, flags);
cgroup_rstat_updated(cgrp, smp_processor_id());
put_cpu_ptr(rstatc);
}
@@ -367,18 +368,20 @@ static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
{
struct cgroup_rstat_cpu *rstatc;
+ unsigned long flags;
- rstatc = cgroup_base_stat_cputime_account_begin(cgrp);
+ rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
rstatc->bstat.cputime.sum_exec_runtime += delta_exec;
- cgroup_base_stat_cputime_account_end(cgrp, rstatc);
+ cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags);
}
void __cgroup_account_cputime_field(struct cgroup *cgrp,
enum cpu_usage_stat index, u64 delta_exec)
{
struct cgroup_rstat_cpu *rstatc;
+ unsigned long flags;
- rstatc = cgroup_base_stat_cputime_account_begin(cgrp);
+ rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
switch (index) {
case CPUTIME_USER:
@@ -394,7 +397,7 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp,
break;
}
- cgroup_base_stat_cputime_account_end(cgrp, rstatc);
+ cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags);
}
/*
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 804b847912dc..192e43a87407 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -41,14 +41,19 @@
#include "smpboot.h"
/**
- * cpuhp_cpu_state - Per cpu hotplug state storage
+ * struct cpuhp_cpu_state - Per cpu hotplug state storage
* @state: The current cpu state
* @target: The target state
+ * @fail: Current CPU hotplug callback state
* @thread: Pointer to the hotplug thread
* @should_run: Thread should execute
* @rollback: Perform a rollback
* @single: Single callback invocation
* @bringup: Single callback bringup or teardown selector
+ * @cpu: CPU number
+ * @node: Remote CPU node; for multi-instance, do a
+ * single entry callback for install/remove
+ * @last: For multi-instance rollback, remember how far we got
* @cb_state: The state for a single callback (install/uninstall)
* @result: Result of the operation
* @done_up: Signal completion to the issuer of the task for cpu-up
@@ -106,11 +111,12 @@ static inline void cpuhp_lock_release(bool bringup) { }
#endif
/**
- * cpuhp_step - Hotplug state machine step
+ * struct cpuhp_step - Hotplug state machine step
* @name: Name of the step
* @startup: Startup function of the step
* @teardown: Teardown function of the step
* @cant_stop: Bringup/teardown can't be stopped at this step
+ * @multi_instance: State has multiple instances which get added afterwards
*/
struct cpuhp_step {
const char *name;
@@ -124,7 +130,9 @@ struct cpuhp_step {
int (*multi)(unsigned int cpu,
struct hlist_node *node);
} teardown;
+ /* private: */
struct hlist_head list;
+ /* public: */
bool cant_stop;
bool multi_instance;
};
@@ -143,7 +151,7 @@ static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step)
}
/**
- * cpuhp_invoke_callback _ Invoke the callbacks for a given state
+ * cpuhp_invoke_callback - Invoke the callbacks for a given state
* @cpu: The cpu for which the callback should be invoked
* @state: The state to do callbacks for
* @bringup: True if the bringup callback should be invoked
@@ -151,6 +159,8 @@ static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step)
* @lastp: For multi-instance rollback, remember how far we got
*
* Called from cpu hotplug and from the state register machinery.
+ *
+ * Return: %0 on success or a negative errno code
*/
static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
bool bringup, struct hlist_node *node,
@@ -682,6 +692,10 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
ret = cpuhp_invoke_callback_range(true, cpu, st, target);
if (ret) {
+ pr_debug("CPU UP failed (%d) CPU %u state %s (%d)\n",
+ ret, cpu, cpuhp_get_step(st->state)->name,
+ st->state);
+
cpuhp_reset_state(st, prev_state);
if (can_rollback_cpu(st))
WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
@@ -1081,6 +1095,9 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
ret = cpuhp_invoke_callback_range(false, cpu, st, target);
if (ret) {
+ pr_debug("CPU DOWN failed (%d) CPU %u state %s (%d)\n",
+ ret, cpu, cpuhp_get_step(st->state)->name,
+ st->state);
cpuhp_reset_state(st, prev_state);
@@ -1183,6 +1200,8 @@ static int cpu_down(unsigned int cpu, enum cpuhp_state target)
* This function is meant to be used by device core cpu subsystem only.
*
* Other subsystems should use remove_cpu() instead.
+ *
+ * Return: %0 on success or a negative errno code
*/
int cpu_device_down(struct device *dev)
{
@@ -1395,6 +1414,8 @@ out:
* This function is meant to be used by device core cpu subsystem only.
*
* Other subsystems should use add_cpu() instead.
+ *
+ * Return: %0 on success or a negative errno code
*/
int cpu_device_up(struct device *dev)
{
@@ -1420,6 +1441,8 @@ EXPORT_SYMBOL_GPL(add_cpu);
* On some architectures like arm64, we can hibernate on any CPU, but on
* wake up the CPU we hibernated on might be offline as a side effect of
* using maxcpus= for example.
+ *
+ * Return: %0 on success or a negative errno code
*/
int bringup_hibernate_cpu(unsigned int sleep_cpu)
{
@@ -1976,6 +1999,7 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
/**
* __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
* @state: The state to setup
+ * @name: Name of the step
* @invoke: If true, the startup function is invoked for cpus where
* cpu state >= @state
* @startup: startup callback function
@@ -1984,9 +2008,9 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
* added afterwards.
*
* The caller needs to hold cpus read locked while calling this function.
- * Returns:
+ * Return:
* On success:
- * Positive state number if @state is CPUHP_AP_ONLINE_DYN
+ * Positive state number if @state is CPUHP_AP_ONLINE_DYN;
* 0 for all other states
* On failure: proper (negative) error code
*/
@@ -2232,18 +2256,17 @@ int cpuhp_smt_enable(void)
#endif
#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
-static ssize_t show_cpuhp_state(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
return sprintf(buf, "%d\n", st->state);
}
-static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
+static DEVICE_ATTR_RO(state);
-static ssize_t write_cpuhp_target(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t target_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
struct cpuhp_step *sp;
@@ -2281,19 +2304,17 @@ out:
return ret ? ret : count;
}
-static ssize_t show_cpuhp_target(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t target_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
return sprintf(buf, "%d\n", st->target);
}
-static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
-
+static DEVICE_ATTR_RW(target);
-static ssize_t write_cpuhp_fail(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t fail_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
struct cpuhp_step *sp;
@@ -2342,15 +2363,15 @@ static ssize_t write_cpuhp_fail(struct device *dev,
return count;
}
-static ssize_t show_cpuhp_fail(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t fail_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
return sprintf(buf, "%d\n", st->fail);
}
-static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);
+static DEVICE_ATTR_RW(fail);
static struct attribute *cpuhp_cpu_attrs[] = {
&dev_attr_state.attr,
@@ -2365,7 +2386,7 @@ static const struct attribute_group cpuhp_cpu_attr_group = {
NULL
};
-static ssize_t show_cpuhp_states(struct device *dev,
+static ssize_t states_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
ssize_t cur, res = 0;
@@ -2384,7 +2405,7 @@ static ssize_t show_cpuhp_states(struct device *dev,
mutex_unlock(&cpuhp_state_mutex);
return res;
}
-static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
+static DEVICE_ATTR_RO(states);
static struct attribute *cpuhp_cpu_root_attrs[] = {
&dev_attr_states.attr,
@@ -2457,28 +2478,27 @@ static const char *smt_states[] = {
[CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
};
-static ssize_t
-show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t control_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
const char *state = smt_states[cpu_smt_control];
return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
}
-static ssize_t
-store_smt_control(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t control_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
return __store_smt_control(dev, attr, buf, count);
}
-static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
+static DEVICE_ATTR_RW(control);
-static ssize_t
-show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t active_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
}
-static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
+static DEVICE_ATTR_RO(active);
static struct attribute *cpuhp_smt_attrs[] = {
&dev_attr_control.attr,
diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c
index f7e1d0eccdbc..246efc74e3f3 100644
--- a/kernel/cpu_pm.c
+++ b/kernel/cpu_pm.c
@@ -13,19 +13,32 @@
#include <linux/spinlock.h>
#include <linux/syscore_ops.h>
-static ATOMIC_NOTIFIER_HEAD(cpu_pm_notifier_chain);
+/*
+ * atomic_notifiers use a spinlock_t, which can block under PREEMPT_RT.
+ * Notifications for cpu_pm will be issued by the idle task itself, which can
+ * never block, IOW it requires using a raw_spinlock_t.
+ */
+static struct {
+ struct raw_notifier_head chain;
+ raw_spinlock_t lock;
+} cpu_pm_notifier = {
+ .chain = RAW_NOTIFIER_INIT(cpu_pm_notifier.chain),
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(cpu_pm_notifier.lock),
+};
static int cpu_pm_notify(enum cpu_pm_event event)
{
int ret;
/*
- * atomic_notifier_call_chain has a RCU read critical section, which
- * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let
- * RCU know this.
+ * This introduces a RCU read critical section, which could be
+ * disfunctional in cpu idle. Copy RCU_NONIDLE code to let RCU know
+ * this.
*/
rcu_irq_enter_irqson();
- ret = atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL);
+ rcu_read_lock();
+ ret = raw_notifier_call_chain(&cpu_pm_notifier.chain, event, NULL);
+ rcu_read_unlock();
rcu_irq_exit_irqson();
return notifier_to_errno(ret);
@@ -33,10 +46,13 @@ static int cpu_pm_notify(enum cpu_pm_event event)
static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event event_down)
{
+ unsigned long flags;
int ret;
rcu_irq_enter_irqson();
- ret = atomic_notifier_call_chain_robust(&cpu_pm_notifier_chain, event_up, event_down, NULL);
+ raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags);
+ ret = raw_notifier_call_chain_robust(&cpu_pm_notifier.chain, event_up, event_down, NULL);
+ raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags);
rcu_irq_exit_irqson();
return notifier_to_errno(ret);
@@ -49,12 +65,17 @@ static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event ev
* Add a driver to a list of drivers that are notified about
* CPU and CPU cluster low power entry and exit.
*
- * This function may sleep, and has the same return conditions as
- * raw_notifier_chain_register.
+ * This function has the same return conditions as raw_notifier_chain_register.
*/
int cpu_pm_register_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_register(&cpu_pm_notifier_chain, nb);
+ unsigned long flags;
+ int ret;
+
+ raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags);
+ ret = raw_notifier_chain_register(&cpu_pm_notifier.chain, nb);
+ raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags);
+ return ret;
}
EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
@@ -64,12 +85,17 @@ EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
*
* Remove a driver from the CPU PM notifier list.
*
- * This function may sleep, and has the same return conditions as
- * raw_notifier_chain_unregister.
+ * This function has the same return conditions as raw_notifier_chain_unregister.
*/
int cpu_pm_unregister_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_unregister(&cpu_pm_notifier_chain, nb);
+ unsigned long flags;
+ int ret;
+
+ raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags);
+ ret = raw_notifier_chain_unregister(&cpu_pm_notifier.chain, nb);
+ raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags);
+ return ret;
}
EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
diff --git a/kernel/cred.c b/kernel/cred.c
index e6fd2b3fc31f..f784e08c2fbd 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -286,13 +286,13 @@ struct cred *prepare_creds(void)
new->security = NULL;
#endif
- if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
- goto error;
-
new->ucounts = get_ucounts(new->ucounts);
if (!new->ucounts)
goto error;
+ if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
+ goto error;
+
validate_creds(new);
return new;
@@ -753,13 +753,13 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
#ifdef CONFIG_SECURITY
new->security = NULL;
#endif
- if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
- goto error;
-
new->ucounts = get_ucounts(new->ucounts);
if (!new->ucounts)
goto error;
+ if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
+ goto error;
+
put_cred(old);
validate_creds(new);
return new;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1cb1f9b8392e..011cc5069b7b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4697,7 +4697,6 @@ errout:
}
static void perf_event_free_filter(struct perf_event *event);
-static void perf_event_free_bpf_prog(struct perf_event *event);
static void free_event_rcu(struct rcu_head *head)
{
@@ -5574,7 +5573,6 @@ static inline int perf_fget_light(int fd, struct fd *p)
static int perf_event_set_output(struct perf_event *event,
struct perf_event *output_event);
static int perf_event_set_filter(struct perf_event *event, void __user *arg);
-static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
static int perf_copy_attr(struct perf_event_attr __user *uattr,
struct perf_event_attr *attr);
@@ -5637,7 +5635,22 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
return perf_event_set_filter(event, (void __user *)arg);
case PERF_EVENT_IOC_SET_BPF:
- return perf_event_set_bpf_prog(event, arg);
+ {
+ struct bpf_prog *prog;
+ int err;
+
+ prog = bpf_prog_get(arg);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ err = perf_event_set_bpf_prog(event, prog, 0);
+ if (err) {
+ bpf_prog_put(prog);
+ return err;
+ }
+
+ return 0;
+ }
case PERF_EVENT_IOC_PAUSE_OUTPUT: {
struct perf_buffer *rb;
@@ -9907,13 +9920,16 @@ static void bpf_overflow_handler(struct perf_event *event,
.data = data,
.event = event,
};
+ struct bpf_prog *prog;
int ret = 0;
ctx.regs = perf_arch_bpf_user_pt_regs(regs);
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
goto out;
rcu_read_lock();
- ret = BPF_PROG_RUN(event->prog, &ctx);
+ prog = READ_ONCE(event->prog);
+ if (prog)
+ ret = bpf_prog_run(prog, &ctx);
rcu_read_unlock();
out:
__this_cpu_dec(bpf_prog_active);
@@ -9923,10 +9939,10 @@ out:
event->orig_overflow_handler(event, data, regs);
}
-static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+static int perf_event_set_bpf_handler(struct perf_event *event,
+ struct bpf_prog *prog,
+ u64 bpf_cookie)
{
- struct bpf_prog *prog;
-
if (event->overflow_handler_context)
/* hw breakpoint or kernel counter */
return -EINVAL;
@@ -9934,9 +9950,8 @@ static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
if (event->prog)
return -EEXIST;
- prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
+ if (prog->type != BPF_PROG_TYPE_PERF_EVENT)
+ return -EINVAL;
if (event->attr.precise_ip &&
prog->call_get_stack &&
@@ -9952,11 +9967,11 @@ static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
* attached to perf_sample_data, do not allow attaching BPF
* program that calls bpf_get_[stack|stackid].
*/
- bpf_prog_put(prog);
return -EPROTO;
}
event->prog = prog;
+ event->bpf_cookie = bpf_cookie;
event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
return 0;
@@ -9974,7 +9989,9 @@ static void perf_event_free_bpf_handler(struct perf_event *event)
bpf_prog_put(prog);
}
#else
-static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+static int perf_event_set_bpf_handler(struct perf_event *event,
+ struct bpf_prog *prog,
+ u64 bpf_cookie)
{
return -EOPNOTSUPP;
}
@@ -10002,14 +10019,13 @@ static inline bool perf_event_is_tracing(struct perf_event *event)
return false;
}
-static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
+int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
+ u64 bpf_cookie)
{
bool is_kprobe, is_tracepoint, is_syscall_tp;
- struct bpf_prog *prog;
- int ret;
if (!perf_event_is_tracing(event))
- return perf_event_set_bpf_handler(event, prog_fd);
+ return perf_event_set_bpf_handler(event, prog, bpf_cookie);
is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
@@ -10018,41 +10034,27 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
/* bpf programs can only be attached to u/kprobe or tracepoint */
return -EINVAL;
- prog = bpf_prog_get(prog_fd);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
-
if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) ||
(is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) ||
- (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT)) {
- /* valid fd, but invalid bpf program type */
- bpf_prog_put(prog);
+ (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT))
return -EINVAL;
- }
/* Kprobe override only works for kprobes, not uprobes. */
if (prog->kprobe_override &&
- !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) {
- bpf_prog_put(prog);
+ !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
return -EINVAL;
- }
if (is_tracepoint || is_syscall_tp) {
int off = trace_event_get_offsets(event->tp_event);
- if (prog->aux->max_ctx_offset > off) {
- bpf_prog_put(prog);
+ if (prog->aux->max_ctx_offset > off)
return -EACCES;
- }
}
- ret = perf_event_attach_bpf_prog(event, prog);
- if (ret)
- bpf_prog_put(prog);
- return ret;
+ return perf_event_attach_bpf_prog(event, prog, bpf_cookie);
}
-static void perf_event_free_bpf_prog(struct perf_event *event)
+void perf_event_free_bpf_prog(struct perf_event *event)
{
if (!perf_event_is_tracing(event)) {
perf_event_free_bpf_handler(event);
@@ -10071,12 +10073,13 @@ static void perf_event_free_filter(struct perf_event *event)
{
}
-static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
+int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
+ u64 bpf_cookie)
{
return -ENOENT;
}
-static void perf_event_free_bpf_prog(struct perf_event *event)
+void perf_event_free_bpf_prog(struct perf_event *event)
{
}
#endif /* CONFIG_EVENT_TRACING */
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 835973444a1e..f32320ac02fd 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -568,7 +568,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
if (!cpu_events)
return (void __percpu __force *)ERR_PTR(-ENOMEM);
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu) {
bp = perf_event_create_kernel_counter(attr, cpu, NULL,
triggered, context);
@@ -579,7 +579,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
per_cpu(*cpu_events, cpu) = bp;
}
- put_online_cpus();
+ cpus_read_unlock();
if (likely(!err))
return cpu_events;
diff --git a/kernel/exit.c b/kernel/exit.c
index 9a89e7f36acb..91a43e57a32e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -777,7 +777,7 @@ void __noreturn do_exit(long code)
schedule();
}
- io_uring_files_cancel(tsk->files);
+ io_uring_files_cancel();
exit_signals(tsk); /* sets PF_EXITING */
/* sync mm's RSS info before statistics gathering */
diff --git a/kernel/fork.c b/kernel/fork.c
index bc94b2cc5995..695d1343a254 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -446,6 +446,7 @@ void put_task_stack(struct task_struct *tsk)
void free_task(struct task_struct *tsk)
{
+ release_user_cpus_ptr(tsk);
scs_release(tsk);
#ifndef CONFIG_THREAD_INFO_IN_TASK
@@ -828,10 +829,10 @@ void __init fork_init(void)
for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++)
init_user_ns.ucount_max[i] = max_threads/2;
- set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_NPROC, task_rlimit(&init_task, RLIMIT_NPROC));
- set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_MSGQUEUE, task_rlimit(&init_task, RLIMIT_MSGQUEUE));
- set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_SIGPENDING, task_rlimit(&init_task, RLIMIT_SIGPENDING));
- set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_MEMLOCK, task_rlimit(&init_task, RLIMIT_MEMLOCK));
+ set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_NPROC, RLIM_INFINITY);
+ set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_MSGQUEUE, RLIM_INFINITY);
+ set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_SIGPENDING, RLIM_INFINITY);
+ set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_MEMLOCK, RLIM_INFINITY);
#ifdef CONFIG_VMAP_STACK
cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
@@ -924,6 +925,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
#endif
if (orig->cpus_ptr == &orig->cpus_mask)
tsk->cpus_ptr = &tsk->cpus_mask;
+ dup_user_cpus_ptr(tsk, orig, node);
/*
* One for the user space visible state that goes away when reaped.
@@ -2083,6 +2085,7 @@ static __latent_entropy struct task_struct *copy_process(
#endif
#ifdef CONFIG_BPF_SYSCALL
RCU_INIT_POINTER(p->bpf_storage, NULL);
+ p->bpf_ctx = NULL;
#endif
/* Perform scheduler related setup. Assign this task to a CPU. */
diff --git a/kernel/futex.c b/kernel/futex.c
index 2ecb07575055..e7b4c6121da4 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -179,7 +179,7 @@ struct futex_pi_state {
/*
* The PI object:
*/
- struct rt_mutex pi_mutex;
+ struct rt_mutex_base pi_mutex;
struct task_struct *owner;
refcount_t refcount;
@@ -197,6 +197,8 @@ struct futex_pi_state {
* @rt_waiter: rt_waiter storage for use with requeue_pi
* @requeue_pi_key: the requeue_pi target futex key
* @bitset: bitset for the optional bitmasked wakeup
+ * @requeue_state: State field for futex_requeue_pi()
+ * @requeue_wait: RCU wait for futex_requeue_pi() (RT only)
*
* We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
* we can wake only the relevant ones (hashed queues may be shared).
@@ -219,12 +221,68 @@ struct futex_q {
struct rt_mutex_waiter *rt_waiter;
union futex_key *requeue_pi_key;
u32 bitset;
+ atomic_t requeue_state;
+#ifdef CONFIG_PREEMPT_RT
+ struct rcuwait requeue_wait;
+#endif
} __randomize_layout;
+/*
+ * On PREEMPT_RT, the hash bucket lock is a 'sleeping' spinlock with an
+ * underlying rtmutex. The task which is about to be requeued could have
+ * just woken up (timeout, signal). After the wake up the task has to
+ * acquire hash bucket lock, which is held by the requeue code. As a task
+ * can only be blocked on _ONE_ rtmutex at a time, the proxy lock blocking
+ * and the hash bucket lock blocking would collide and corrupt state.
+ *
+ * On !PREEMPT_RT this is not a problem and everything could be serialized
+ * on hash bucket lock, but aside of having the benefit of common code,
+ * this allows to avoid doing the requeue when the task is already on the
+ * way out and taking the hash bucket lock of the original uaddr1 when the
+ * requeue has been completed.
+ *
+ * The following state transitions are valid:
+ *
+ * On the waiter side:
+ * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_IGNORE
+ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_WAIT
+ *
+ * On the requeue side:
+ * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_INPROGRESS
+ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_DONE/LOCKED
+ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_NONE (requeue failed)
+ * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_DONE/LOCKED
+ * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_IGNORE (requeue failed)
+ *
+ * The requeue side ignores a waiter with state Q_REQUEUE_PI_IGNORE as this
+ * signals that the waiter is already on the way out. It also means that
+ * the waiter is still on the 'wait' futex, i.e. uaddr1.
+ *
+ * The waiter side signals early wakeup to the requeue side either through
+ * setting state to Q_REQUEUE_PI_IGNORE or to Q_REQUEUE_PI_WAIT depending
+ * on the current state. In case of Q_REQUEUE_PI_IGNORE it can immediately
+ * proceed to take the hash bucket lock of uaddr1. If it set state to WAIT,
+ * which means the wakeup is interleaving with a requeue in progress it has
+ * to wait for the requeue side to change the state. Either to DONE/LOCKED
+ * or to IGNORE. DONE/LOCKED means the waiter q is now on the uaddr2 futex
+ * and either blocked (DONE) or has acquired it (LOCKED). IGNORE is set by
+ * the requeue side when the requeue attempt failed via deadlock detection
+ * and therefore the waiter q is still on the uaddr1 futex.
+ */
+enum {
+ Q_REQUEUE_PI_NONE = 0,
+ Q_REQUEUE_PI_IGNORE,
+ Q_REQUEUE_PI_IN_PROGRESS,
+ Q_REQUEUE_PI_WAIT,
+ Q_REQUEUE_PI_DONE,
+ Q_REQUEUE_PI_LOCKED,
+};
+
static const struct futex_q futex_q_init = {
/* list gets initialized in queue_me()*/
- .key = FUTEX_KEY_INIT,
- .bitset = FUTEX_BITSET_MATCH_ANY
+ .key = FUTEX_KEY_INIT,
+ .bitset = FUTEX_BITSET_MATCH_ANY,
+ .requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE),
};
/*
@@ -1299,27 +1357,6 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
return 0;
}
-static int lookup_pi_state(u32 __user *uaddr, u32 uval,
- struct futex_hash_bucket *hb,
- union futex_key *key, struct futex_pi_state **ps,
- struct task_struct **exiting)
-{
- struct futex_q *top_waiter = futex_top_waiter(hb, key);
-
- /*
- * If there is a waiter on that futex, validate it and
- * attach to the pi_state when the validation succeeds.
- */
- if (top_waiter)
- return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
-
- /*
- * We are the first waiter - try to look up the owner based on
- * @uval and attach to it.
- */
- return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
-}
-
static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
{
int err;
@@ -1354,7 +1391,7 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
* - 1 - acquired the lock;
* - <0 - error
*
- * The hb->lock and futex_key refs shall be held by the caller.
+ * The hb->lock must be held by the caller.
*
* @exiting is only set when the return value is -EBUSY. If so, this holds
* a refcount on the exiting task on return and the caller needs to drop it
@@ -1493,11 +1530,11 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
*/
static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
{
- u32 curval, newval;
struct rt_mutex_waiter *top_waiter;
struct task_struct *new_owner;
bool postunlock = false;
- DEFINE_WAKE_Q(wake_q);
+ DEFINE_RT_WAKE_Q(wqh);
+ u32 curval, newval;
int ret = 0;
top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
@@ -1549,14 +1586,14 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
* not fail.
*/
pi_state_update_owner(pi_state, new_owner);
- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
+ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh);
}
out_unlock:
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
if (postunlock)
- rt_mutex_postunlock(&wake_q);
+ rt_mutex_postunlock(&wqh);
return ret;
}
@@ -1793,6 +1830,108 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
q->key = *key2;
}
+static inline bool futex_requeue_pi_prepare(struct futex_q *q,
+ struct futex_pi_state *pi_state)
+{
+ int old, new;
+
+ /*
+ * Set state to Q_REQUEUE_PI_IN_PROGRESS unless an early wakeup has
+ * already set Q_REQUEUE_PI_IGNORE to signal that requeue should
+ * ignore the waiter.
+ */
+ old = atomic_read_acquire(&q->requeue_state);
+ do {
+ if (old == Q_REQUEUE_PI_IGNORE)
+ return false;
+
+ /*
+ * futex_proxy_trylock_atomic() might have set it to
+ * IN_PROGRESS and a interleaved early wake to WAIT.
+ *
+ * It was considered to have an extra state for that
+ * trylock, but that would just add more conditionals
+ * all over the place for a dubious value.
+ */
+ if (old != Q_REQUEUE_PI_NONE)
+ break;
+
+ new = Q_REQUEUE_PI_IN_PROGRESS;
+ } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
+
+ q->pi_state = pi_state;
+ return true;
+}
+
+static inline void futex_requeue_pi_complete(struct futex_q *q, int locked)
+{
+ int old, new;
+
+ old = atomic_read_acquire(&q->requeue_state);
+ do {
+ if (old == Q_REQUEUE_PI_IGNORE)
+ return;
+
+ if (locked >= 0) {
+ /* Requeue succeeded. Set DONE or LOCKED */
+ WARN_ON_ONCE(old != Q_REQUEUE_PI_IN_PROGRESS &&
+ old != Q_REQUEUE_PI_WAIT);
+ new = Q_REQUEUE_PI_DONE + locked;
+ } else if (old == Q_REQUEUE_PI_IN_PROGRESS) {
+ /* Deadlock, no early wakeup interleave */
+ new = Q_REQUEUE_PI_NONE;
+ } else {
+ /* Deadlock, early wakeup interleave. */
+ WARN_ON_ONCE(old != Q_REQUEUE_PI_WAIT);
+ new = Q_REQUEUE_PI_IGNORE;
+ }
+ } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
+
+#ifdef CONFIG_PREEMPT_RT
+ /* If the waiter interleaved with the requeue let it know */
+ if (unlikely(old == Q_REQUEUE_PI_WAIT))
+ rcuwait_wake_up(&q->requeue_wait);
+#endif
+}
+
+static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q)
+{
+ int old, new;
+
+ old = atomic_read_acquire(&q->requeue_state);
+ do {
+ /* Is requeue done already? */
+ if (old >= Q_REQUEUE_PI_DONE)
+ return old;
+
+ /*
+ * If not done, then tell the requeue code to either ignore
+ * the waiter or to wake it up once the requeue is done.
+ */
+ new = Q_REQUEUE_PI_WAIT;
+ if (old == Q_REQUEUE_PI_NONE)
+ new = Q_REQUEUE_PI_IGNORE;
+ } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
+
+ /* If the requeue was in progress, wait for it to complete */
+ if (old == Q_REQUEUE_PI_IN_PROGRESS) {
+#ifdef CONFIG_PREEMPT_RT
+ rcuwait_wait_event(&q->requeue_wait,
+ atomic_read(&q->requeue_state) != Q_REQUEUE_PI_WAIT,
+ TASK_UNINTERRUPTIBLE);
+#else
+ (void)atomic_cond_read_relaxed(&q->requeue_state, VAL != Q_REQUEUE_PI_WAIT);
+#endif
+ }
+
+ /*
+ * Requeue is now either prohibited or complete. Reread state
+ * because during the wait above it might have changed. Nothing
+ * will modify q->requeue_state after this point.
+ */
+ return atomic_read(&q->requeue_state);
+}
+
/**
* requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
* @q: the futex_q
@@ -1820,6 +1959,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
q->lock_ptr = &hb->lock;
+ /* Signal locked state to the waiter */
+ futex_requeue_pi_complete(q, 1);
wake_up_state(q->task, TASK_NORMAL);
}
@@ -1879,10 +2020,21 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
if (!top_waiter)
return 0;
+ /*
+ * Ensure that this is a waiter sitting in futex_wait_requeue_pi()
+ * and waiting on the 'waitqueue' futex which is always !PI.
+ */
+ if (!top_waiter->rt_waiter || top_waiter->pi_state)
+ ret = -EINVAL;
+
/* Ensure we requeue to the expected futex. */
if (!match_futex(top_waiter->requeue_pi_key, key2))
return -EINVAL;
+ /* Ensure that this does not race against an early wakeup */
+ if (!futex_requeue_pi_prepare(top_waiter, NULL))
+ return -EAGAIN;
+
/*
* Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in
* the contended case or if set_waiters is 1. The pi_state is returned
@@ -1892,8 +2044,22 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
exiting, set_waiters);
if (ret == 1) {
+ /* Dequeue, wake up and update top_waiter::requeue_state */
requeue_pi_wake_futex(top_waiter, key2, hb2);
return vpid;
+ } else if (ret < 0) {
+ /* Rewind top_waiter::requeue_state */
+ futex_requeue_pi_complete(top_waiter, ret);
+ } else {
+ /*
+ * futex_lock_pi_atomic() did not acquire the user space
+ * futex, but managed to establish the proxy lock and pi
+ * state. top_waiter::requeue_state cannot be fixed up here
+ * because the waiter is not enqueued on the rtmutex
+ * yet. This is handled at the callsite depending on the
+ * result of rt_mutex_start_proxy_lock() which is
+ * guaranteed to be reached with this function returning 0.
+ */
}
return ret;
}
@@ -1948,23 +2114,35 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
return -EINVAL;
/*
+ * futex_requeue() allows the caller to define the number
+ * of waiters to wake up via the @nr_wake argument. With
+ * REQUEUE_PI, waking up more than one waiter is creating
+ * more problems than it solves. Waking up a waiter makes
+ * only sense if the PI futex @uaddr2 is uncontended as
+ * this allows the requeue code to acquire the futex
+ * @uaddr2 before waking the waiter. The waiter can then
+ * return to user space without further action. A secondary
+ * wakeup would just make the futex_wait_requeue_pi()
+ * handling more complex, because that code would have to
+ * look up pi_state and do more or less all the handling
+ * which the requeue code has to do for the to be requeued
+ * waiters. So restrict the number of waiters to wake to
+ * one, and only wake it up when the PI futex is
+ * uncontended. Otherwise requeue it and let the unlock of
+ * the PI futex handle the wakeup.
+ *
+ * All REQUEUE_PI users, e.g. pthread_cond_signal() and
+ * pthread_cond_broadcast() must use nr_wake=1.
+ */
+ if (nr_wake != 1)
+ return -EINVAL;
+
+ /*
* requeue_pi requires a pi_state, try to allocate it now
* without any locks in case it fails.
*/
if (refill_pi_state_cache())
return -ENOMEM;
- /*
- * requeue_pi must wake as many tasks as it can, up to nr_wake
- * + nr_requeue, since it acquires the rt_mutex prior to
- * returning to userspace, so as to not leave the rt_mutex with
- * waiters and no owner. However, second and third wake-ups
- * cannot be predicted as they involve race conditions with the
- * first wake and a fault while looking up the pi_state. Both
- * pthread_cond_signal() and pthread_cond_broadcast() should
- * use nr_wake=1.
- */
- if (nr_wake != 1)
- return -EINVAL;
}
retry:
@@ -2014,7 +2192,7 @@ retry_private:
}
}
- if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
+ if (requeue_pi) {
struct task_struct *exiting = NULL;
/*
@@ -2022,6 +2200,8 @@ retry_private:
* intend to requeue waiters, force setting the FUTEX_WAITERS
* bit. We force this here where we are able to easily handle
* faults rather in the requeue loop below.
+ *
+ * Updates topwaiter::requeue_state if a top waiter exists.
*/
ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
&key2, &pi_state,
@@ -2031,28 +2211,52 @@ retry_private:
* At this point the top_waiter has either taken uaddr2 or is
* waiting on it. If the former, then the pi_state will not
* exist yet, look it up one more time to ensure we have a
- * reference to it. If the lock was taken, ret contains the
- * vpid of the top waiter task.
+ * reference to it. If the lock was taken, @ret contains the
+ * VPID of the top waiter task.
* If the lock was not taken, we have pi_state and an initial
* refcount on it. In case of an error we have nothing.
+ *
+ * The top waiter's requeue_state is up to date:
+ *
+ * - If the lock was acquired atomically (ret > 0), then
+ * the state is Q_REQUEUE_PI_LOCKED.
+ *
+ * - If the trylock failed with an error (ret < 0) then
+ * the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
+ * happened", or Q_REQUEUE_PI_IGNORE when there was an
+ * interleaved early wakeup.
+ *
+ * - If the trylock did not succeed (ret == 0) then the
+ * state is either Q_REQUEUE_PI_IN_PROGRESS or
+ * Q_REQUEUE_PI_WAIT if an early wakeup interleaved.
+ * This will be cleaned up in the loop below, which
+ * cannot fail because futex_proxy_trylock_atomic() did
+ * the same sanity checks for requeue_pi as the loop
+ * below does.
*/
if (ret > 0) {
WARN_ON(pi_state);
task_count++;
/*
- * If we acquired the lock, then the user space value
- * of uaddr2 should be vpid. It cannot be changed by
- * the top waiter as it is blocked on hb2 lock if it
- * tries to do so. If something fiddled with it behind
- * our back the pi state lookup might unearth it. So
- * we rather use the known value than rereading and
- * handing potential crap to lookup_pi_state.
+ * If futex_proxy_trylock_atomic() acquired the
+ * user space futex, then the user space value
+ * @uaddr2 has been set to the @hb1's top waiter
+ * task VPID. This task is guaranteed to be alive
+ * and cannot be exiting because it is either
+ * sleeping or blocked on @hb2 lock.
+ *
+ * The @uaddr2 futex cannot have waiters either as
+ * otherwise futex_proxy_trylock_atomic() would not
+ * have succeeded.
*
- * If that call succeeds then we have pi_state and an
- * initial refcount on it.
+ * In order to requeue waiters to @hb2, pi state is
+ * required. Hand in the VPID value (@ret) and
+ * allocate PI state with an initial refcount on
+ * it.
*/
- ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
- &pi_state, &exiting);
+ ret = attach_to_pi_owner(uaddr2, ret, &key2, &pi_state,
+ &exiting);
+ WARN_ON(ret);
}
switch (ret) {
@@ -2060,7 +2264,10 @@ retry_private:
/* We hold a reference on the pi state. */
break;
- /* If the above failed, then pi_state is NULL */
+ /*
+ * If the above failed, then pi_state is NULL and
+ * waiter::requeue_state is correct.
+ */
case -EFAULT:
double_unlock_hb(hb1, hb2);
hb_waiters_dec(hb2);
@@ -2112,18 +2319,17 @@ retry_private:
break;
}
- /*
- * Wake nr_wake waiters. For requeue_pi, if we acquired the
- * lock, we already woke the top_waiter. If not, it will be
- * woken by futex_unlock_pi().
- */
- if (++task_count <= nr_wake && !requeue_pi) {
- mark_wake_futex(&wake_q, this);
+ /* Plain futexes just wake or requeue and are done */
+ if (!requeue_pi) {
+ if (++task_count <= nr_wake)
+ mark_wake_futex(&wake_q, this);
+ else
+ requeue_futex(this, hb1, hb2, &key2);
continue;
}
/* Ensure we requeue to the expected futex for requeue_pi. */
- if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
+ if (!match_futex(this->requeue_pi_key, &key2)) {
ret = -EINVAL;
break;
}
@@ -2131,54 +2337,67 @@ retry_private:
/*
* Requeue nr_requeue waiters and possibly one more in the case
* of requeue_pi if we couldn't acquire the lock atomically.
+ *
+ * Prepare the waiter to take the rt_mutex. Take a refcount
+ * on the pi_state and store the pointer in the futex_q
+ * object of the waiter.
*/
- if (requeue_pi) {
+ get_pi_state(pi_state);
+
+ /* Don't requeue when the waiter is already on the way out. */
+ if (!futex_requeue_pi_prepare(this, pi_state)) {
/*
- * Prepare the waiter to take the rt_mutex. Take a
- * refcount on the pi_state and store the pointer in
- * the futex_q object of the waiter.
+ * Early woken waiter signaled that it is on the
+ * way out. Drop the pi_state reference and try the
+ * next waiter. @this->pi_state is still NULL.
*/
- get_pi_state(pi_state);
- this->pi_state = pi_state;
- ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
- this->rt_waiter,
- this->task);
- if (ret == 1) {
- /*
- * We got the lock. We do neither drop the
- * refcount on pi_state nor clear
- * this->pi_state because the waiter needs the
- * pi_state for cleaning up the user space
- * value. It will drop the refcount after
- * doing so.
- */
- requeue_pi_wake_futex(this, &key2, hb2);
- continue;
- } else if (ret) {
- /*
- * rt_mutex_start_proxy_lock() detected a
- * potential deadlock when we tried to queue
- * that waiter. Drop the pi_state reference
- * which we took above and remove the pointer
- * to the state from the waiters futex_q
- * object.
- */
- this->pi_state = NULL;
- put_pi_state(pi_state);
- /*
- * We stop queueing more waiters and let user
- * space deal with the mess.
- */
- break;
- }
+ put_pi_state(pi_state);
+ continue;
+ }
+
+ ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
+ this->rt_waiter,
+ this->task);
+
+ if (ret == 1) {
+ /*
+ * We got the lock. We do neither drop the refcount
+ * on pi_state nor clear this->pi_state because the
+ * waiter needs the pi_state for cleaning up the
+ * user space value. It will drop the refcount
+ * after doing so. this::requeue_state is updated
+ * in the wakeup as well.
+ */
+ requeue_pi_wake_futex(this, &key2, hb2);
+ task_count++;
+ } else if (!ret) {
+ /* Waiter is queued, move it to hb2 */
+ requeue_futex(this, hb1, hb2, &key2);
+ futex_requeue_pi_complete(this, 0);
+ task_count++;
+ } else {
+ /*
+ * rt_mutex_start_proxy_lock() detected a potential
+ * deadlock when we tried to queue that waiter.
+ * Drop the pi_state reference which we took above
+ * and remove the pointer to the state from the
+ * waiters futex_q object.
+ */
+ this->pi_state = NULL;
+ put_pi_state(pi_state);
+ futex_requeue_pi_complete(this, ret);
+ /*
+ * We stop queueing more waiters and let user space
+ * deal with the mess.
+ */
+ break;
}
- requeue_futex(this, hb1, hb2, &key2);
}
/*
- * We took an extra initial reference to the pi_state either
- * in futex_proxy_trylock_atomic() or in lookup_pi_state(). We
- * need to drop it here again.
+ * We took an extra initial reference to the pi_state either in
+ * futex_proxy_trylock_atomic() or in attach_to_pi_owner(). We need
+ * to drop it here again.
*/
put_pi_state(pi_state);
@@ -2357,7 +2576,7 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
* Modifying pi_state _before_ the user space value would leave the
* pi_state in an inconsistent state when we fault here, because we
* need to drop the locks to handle the fault. This might be observed
- * in the PID check in lookup_pi_state.
+ * in the PID checks when attaching to PI state .
*/
retry:
if (!argowner) {
@@ -2614,8 +2833,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
*
* Setup the futex_q and locate the hash_bucket. Get the futex value and
* compare it with the expected value. Handle atomic faults internally.
- * Return with the hb lock held and a q.key reference on success, and unlocked
- * with no q.key reference on failure.
+ * Return with the hb lock held on success, and unlocked on failure.
*
* Return:
* - 0 - uaddr contains val and hb has been locked;
@@ -2693,8 +2911,8 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
current->timer_slack_ns);
retry:
/*
- * Prepare to wait on uaddr. On success, holds hb lock and increments
- * q.key refs.
+ * Prepare to wait on uaddr. On success, it holds hb->lock and q
+ * is initialized.
*/
ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
if (ret)
@@ -2705,7 +2923,6 @@ retry:
/* If we were woken (and unqueued), we succeeded, whatever. */
ret = 0;
- /* unqueue_me() drops q.key ref */
if (!unqueue_me(&q))
goto out;
ret = -ETIMEDOUT;
@@ -3072,27 +3289,22 @@ pi_faulted:
}
/**
- * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex
+ * handle_early_requeue_pi_wakeup() - Handle early wakeup on the initial futex
* @hb: the hash_bucket futex_q was original enqueued on
* @q: the futex_q woken while waiting to be requeued
- * @key2: the futex_key of the requeue target futex
* @timeout: the timeout associated with the wait (NULL if none)
*
- * Detect if the task was woken on the initial futex as opposed to the requeue
- * target futex. If so, determine if it was a timeout or a signal that caused
- * the wakeup and return the appropriate error code to the caller. Must be
- * called with the hb lock held.
+ * Determine the cause for the early wakeup.
*
* Return:
- * - 0 = no early wakeup detected;
- * - <0 = -ETIMEDOUT or -ERESTARTNOINTR
+ * -EWOULDBLOCK or -ETIMEDOUT or -ERESTARTNOINTR
*/
static inline
int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
- struct futex_q *q, union futex_key *key2,
+ struct futex_q *q,
struct hrtimer_sleeper *timeout)
{
- int ret = 0;
+ int ret;
/*
* With the hb lock held, we avoid races while we process the wakeup.
@@ -3101,22 +3313,21 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
* It can't be requeued from uaddr2 to something else since we don't
* support a PI aware source futex for requeue.
*/
- if (!match_futex(&q->key, key2)) {
- WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
- /*
- * We were woken prior to requeue by a timeout or a signal.
- * Unqueue the futex_q and determine which it was.
- */
- plist_del(&q->list, &hb->chain);
- hb_waiters_dec(hb);
+ WARN_ON_ONCE(&hb->lock != q->lock_ptr);
- /* Handle spurious wakeups gracefully */
- ret = -EWOULDBLOCK;
- if (timeout && !timeout->task)
- ret = -ETIMEDOUT;
- else if (signal_pending(current))
- ret = -ERESTARTNOINTR;
- }
+ /*
+ * We were woken prior to requeue by a timeout or a signal.
+ * Unqueue the futex_q and determine which it was.
+ */
+ plist_del(&q->list, &hb->chain);
+ hb_waiters_dec(hb);
+
+ /* Handle spurious wakeups gracefully */
+ ret = -EWOULDBLOCK;
+ if (timeout && !timeout->task)
+ ret = -ETIMEDOUT;
+ else if (signal_pending(current))
+ ret = -ERESTARTNOINTR;
return ret;
}
@@ -3169,6 +3380,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
struct futex_hash_bucket *hb;
union futex_key key2 = FUTEX_KEY_INIT;
struct futex_q q = futex_q_init;
+ struct rt_mutex_base *pi_mutex;
int res, ret;
if (!IS_ENABLED(CONFIG_FUTEX_PI))
@@ -3198,8 +3410,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
q.requeue_pi_key = &key2;
/*
- * Prepare to wait on uaddr. On success, increments q.key (key1) ref
- * count.
+ * Prepare to wait on uaddr. On success, it holds hb->lock and q
+ * is initialized.
*/
ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
if (ret)
@@ -3218,32 +3430,22 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
/* Queue the futex_q, drop the hb lock, wait for wakeup. */
futex_wait_queue_me(hb, &q, to);
- spin_lock(&hb->lock);
- ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
- spin_unlock(&hb->lock);
- if (ret)
- goto out;
-
- /*
- * In order for us to be here, we know our q.key == key2, and since
- * we took the hb->lock above, we also know that futex_requeue() has
- * completed and we no longer have to concern ourselves with a wakeup
- * race with the atomic proxy lock acquisition by the requeue code. The
- * futex_requeue dropped our key1 reference and incremented our key2
- * reference count.
- */
+ switch (futex_requeue_pi_wakeup_sync(&q)) {
+ case Q_REQUEUE_PI_IGNORE:
+ /* The waiter is still on uaddr1 */
+ spin_lock(&hb->lock);
+ ret = handle_early_requeue_pi_wakeup(hb, &q, to);
+ spin_unlock(&hb->lock);
+ break;
- /*
- * Check if the requeue code acquired the second futex for us and do
- * any pertinent fixup.
- */
- if (!q.rt_waiter) {
+ case Q_REQUEUE_PI_LOCKED:
+ /* The requeue acquired the lock */
if (q.pi_state && (q.pi_state->owner != current)) {
spin_lock(q.lock_ptr);
ret = fixup_owner(uaddr2, &q, true);
/*
- * Drop the reference to the pi state which
- * the requeue_pi() code acquired for us.
+ * Drop the reference to the pi state which the
+ * requeue_pi() code acquired for us.
*/
put_pi_state(q.pi_state);
spin_unlock(q.lock_ptr);
@@ -3253,18 +3455,14 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
*/
ret = ret < 0 ? ret : 0;
}
- } else {
- struct rt_mutex *pi_mutex;
+ break;
- /*
- * We have been woken up by futex_unlock_pi(), a timeout, or a
- * signal. futex_unlock_pi() will not destroy the lock_ptr nor
- * the pi_state.
- */
- WARN_ON(!q.pi_state);
+ case Q_REQUEUE_PI_DONE:
+ /* Requeue completed. Current is 'pi_blocked_on' the rtmutex */
pi_mutex = &q.pi_state->pi_mutex;
ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
+ /* Current is not longer pi_blocked_on */
spin_lock(q.lock_ptr);
if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
ret = 0;
@@ -3284,17 +3482,21 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
unqueue_me_pi(&q);
spin_unlock(q.lock_ptr);
- }
- if (ret == -EINTR) {
- /*
- * We've already been requeued, but cannot restart by calling
- * futex_lock_pi() directly. We could restart this syscall, but
- * it would detect that the user space "val" changed and return
- * -EWOULDBLOCK. Save the overhead of the restart and return
- * -EWOULDBLOCK directly.
- */
- ret = -EWOULDBLOCK;
+ if (ret == -EINTR) {
+ /*
+ * We've already been requeued, but cannot restart
+ * by calling futex_lock_pi() directly. We could
+ * restart this syscall, but it would detect that
+ * the user space "val" changed and return
+ * -EWOULDBLOCK. Save the overhead of the restart
+ * and return -EWOULDBLOCK directly.
+ */
+ ret = -EWOULDBLOCK;
+ }
+ break;
+ default:
+ BUG();
}
out:
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 4d89ad4fae3b..f7ff8919dc9b 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -355,7 +355,7 @@ static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
goto fail_npresmsk;
/* Stabilize the cpumasks */
- get_online_cpus();
+ cpus_read_lock();
build_node_to_cpumask(node_to_cpumask);
/* Spread on present CPUs starting from affd->pre_vectors */
@@ -384,7 +384,7 @@ static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
nr_others = ret;
fail_build_affinity:
- put_online_cpus();
+ cpus_read_unlock();
if (ret >= 0)
WARN_ON(nr_present + nr_others < numvecs);
@@ -505,9 +505,9 @@ unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
if (affd->calc_sets) {
set_vecs = maxvec - resv;
} else {
- get_online_cpus();
+ cpus_read_lock();
set_vecs = cpumask_weight(cpu_possible_mask);
- put_online_cpus();
+ cpus_read_unlock();
}
return resv + min(set_vecs, maxvec - resv);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 7f04c7d8296e..a98bcfc4be7b 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -265,8 +265,11 @@ int irq_startup(struct irq_desc *desc, bool resend, bool force)
} else {
switch (__irq_startup_managed(desc, aff, force)) {
case IRQ_STARTUP_NORMAL:
+ if (d->chip->flags & IRQCHIP_AFFINITY_PRE_STARTUP)
+ irq_setup_affinity(desc);
ret = __irq_startup(desc);
- irq_setup_affinity(desc);
+ if (!(d->chip->flags & IRQCHIP_AFFINITY_PRE_STARTUP))
+ irq_setup_affinity(desc);
break;
case IRQ_STARTUP_MANAGED:
irq_do_set_affinity(d, aff, false);
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 02236b13b359..39a41c56ad4f 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -166,7 +166,7 @@ void irq_migrate_all_off_this_cpu(void)
raw_spin_unlock(&desc->lock);
if (affinity_broken) {
- pr_warn_ratelimited("IRQ %u: no longer affine to CPU%u\n",
+ pr_debug_ratelimited("IRQ %u: no longer affine to CPU%u\n",
irq, smp_processor_id());
}
}
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index f8f23af6ab0d..cc7cdd26e23e 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -240,9 +240,8 @@ irq_alloc_generic_chip(const char *name, int num_ct, unsigned int irq_base,
void __iomem *reg_base, irq_flow_handler_t handler)
{
struct irq_chip_generic *gc;
- unsigned long sz = sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
- gc = kzalloc(sz, GFP_KERNEL);
+ gc = kzalloc(struct_size(gc, chip_types, num_ct), GFP_KERNEL);
if (gc) {
irq_init_generic_chip(gc, name, num_ct, irq_base, reg_base,
handler);
@@ -288,8 +287,11 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
{
struct irq_domain_chip_generic *dgc;
struct irq_chip_generic *gc;
- int numchips, sz, i;
unsigned long flags;
+ int numchips, i;
+ size_t dgc_sz;
+ size_t gc_sz;
+ size_t sz;
void *tmp;
if (d->gc)
@@ -300,8 +302,9 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
return -EINVAL;
/* Allocate a pointer, generic chip and chiptypes for each chip */
- sz = sizeof(*dgc) + numchips * sizeof(gc);
- sz += numchips * (sizeof(*gc) + num_ct * sizeof(struct irq_chip_type));
+ gc_sz = struct_size(gc, chip_types, num_ct);
+ dgc_sz = struct_size(dgc, gc, numchips);
+ sz = dgc_sz + numchips * gc_sz;
tmp = dgc = kzalloc(sz, GFP_KERNEL);
if (!dgc)
@@ -314,7 +317,7 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
d->gc = dgc;
/* Calc pointer to the first generic chip */
- tmp += sizeof(*dgc) + numchips * sizeof(gc);
+ tmp += dgc_sz;
for (i = 0; i < numchips; i++) {
/* Store the pointer to the generic chip */
dgc->gc[i] = gc = tmp;
@@ -331,7 +334,7 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
list_add_tail(&gc->list, &gc_list);
raw_spin_unlock_irqrestore(&gc_lock, flags);
/* Calc pointer to the next generic chip */
- tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
+ tmp += gc_sz;
}
return 0;
}
diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c
index 52f11c791bf8..08ce7da3b57c 100644
--- a/kernel/irq/ipi.c
+++ b/kernel/irq/ipi.c
@@ -14,11 +14,11 @@
/**
* irq_reserve_ipi() - Setup an IPI to destination cpumask
* @domain: IPI domain
- * @dest: cpumask of cpus which can receive the IPI
+ * @dest: cpumask of CPUs which can receive the IPI
*
* Allocate a virq that can be used to send IPI to any CPU in dest mask.
*
- * On success it'll return linux irq number and error code on failure
+ * Return: Linux IRQ number on success or error code on failure
*/
int irq_reserve_ipi(struct irq_domain *domain,
const struct cpumask *dest)
@@ -104,13 +104,13 @@ free_descs:
/**
* irq_destroy_ipi() - unreserve an IPI that was previously allocated
- * @irq: linux irq number to be destroyed
- * @dest: cpumask of cpus which should have the IPI removed
+ * @irq: Linux IRQ number to be destroyed
+ * @dest: cpumask of CPUs which should have the IPI removed
*
* The IPIs allocated with irq_reserve_ipi() are returned to the system
* destroying all virqs associated with them.
*
- * Return 0 on success or error code on failure.
+ * Return: %0 on success or error code on failure.
*/
int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest)
{
@@ -150,14 +150,14 @@ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest)
}
/**
- * ipi_get_hwirq - Get the hwirq associated with an IPI to a cpu
- * @irq: linux irq number
- * @cpu: the target cpu
+ * ipi_get_hwirq - Get the hwirq associated with an IPI to a CPU
+ * @irq: Linux IRQ number
+ * @cpu: the target CPU
*
* When dealing with coprocessors IPI, we need to inform the coprocessor of
* the hwirq it needs to use to receive and send IPIs.
*
- * Returns hwirq value on success and INVALID_HWIRQ on failure.
+ * Return: hwirq value on success or INVALID_HWIRQ on failure.
*/
irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu)
{
@@ -216,7 +216,7 @@ static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data,
* This function is for architecture or core code to speed up IPI sending. Not
* usable from driver code.
*
- * Returns zero on success and negative error number on failure.
+ * Return: %0 on success or negative error number on failure.
*/
int __ipi_send_single(struct irq_desc *desc, unsigned int cpu)
{
@@ -250,7 +250,7 @@ int __ipi_send_single(struct irq_desc *desc, unsigned int cpu)
}
/**
- * ipi_send_mask - send an IPI to target Linux SMP CPU(s)
+ * __ipi_send_mask - send an IPI to target Linux SMP CPU(s)
* @desc: pointer to irq_desc of the IRQ
* @dest: dest CPU(s), must be a subset of the mask passed to
* irq_reserve_ipi()
@@ -258,7 +258,7 @@ int __ipi_send_single(struct irq_desc *desc, unsigned int cpu)
* This function is for architecture or core code to speed up IPI sending. Not
* usable from driver code.
*
- * Returns zero on success and negative error number on failure.
+ * Return: %0 on success or negative error number on failure.
*/
int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest)
{
@@ -298,11 +298,11 @@ int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest)
/**
* ipi_send_single - Send an IPI to a single CPU
- * @virq: linux irq number from irq_reserve_ipi()
+ * @virq: Linux IRQ number from irq_reserve_ipi()
* @cpu: destination CPU, must in the destination mask passed to
* irq_reserve_ipi()
*
- * Returns zero on success and negative error number on failure.
+ * Return: %0 on success or negative error number on failure.
*/
int ipi_send_single(unsigned int virq, unsigned int cpu)
{
@@ -319,11 +319,11 @@ EXPORT_SYMBOL_GPL(ipi_send_single);
/**
* ipi_send_mask - Send an IPI to target CPU(s)
- * @virq: linux irq number from irq_reserve_ipi()
+ * @virq: Linux IRQ number from irq_reserve_ipi()
* @dest: dest CPU(s), must be a subset of the mask passed to
* irq_reserve_ipi()
*
- * Returns zero on success and negative error number on failure.
+ * Return: %0 on success or negative error number on failure.
*/
int ipi_send_mask(unsigned int virq, const struct cpumask *dest)
{
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index fadb93766020..4e3c29bb603c 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -188,7 +188,7 @@ static ssize_t hwirq_show(struct kobject *kobj,
raw_spin_lock_irq(&desc->lock);
if (desc->irq_data.domain)
- ret = sprintf(buf, "%d\n", (int)desc->irq_data.hwirq);
+ ret = sprintf(buf, "%lu\n", desc->irq_data.hwirq);
raw_spin_unlock_irq(&desc->lock);
return ret;
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 51c483ce2447..62be16135e7c 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1215,6 +1215,7 @@ int irq_domain_disconnect_hierarchy(struct irq_domain *domain,
irqd->chip = ERR_PTR(-ENOTCONN);
return 0;
}
+EXPORT_SYMBOL_GPL(irq_domain_disconnect_hierarchy);
static int irq_domain_trim_hierarchy(unsigned int virq)
{
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ef30b4762947..27667e82ecc9 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -25,12 +25,11 @@
#include "internals.h"
#if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT)
-__read_mostly bool force_irqthreads;
-EXPORT_SYMBOL_GPL(force_irqthreads);
+DEFINE_STATIC_KEY_FALSE(force_irqthreads_key);
static int __init setup_forced_irqthreads(char *arg)
{
- force_irqthreads = true;
+ static_branch_enable(&force_irqthreads_key);
return 0;
}
early_param("threadirqs", setup_forced_irqthreads);
@@ -1260,8 +1259,8 @@ static int irq_thread(void *data)
irqreturn_t (*handler_fn)(struct irq_desc *desc,
struct irqaction *action);
- if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD,
- &action->thread_flags))
+ if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD,
+ &action->thread_flags))
handler_fn = irq_forced_thread_fn;
else
handler_fn = irq_thread_fn;
@@ -1322,7 +1321,7 @@ EXPORT_SYMBOL_GPL(irq_wake_thread);
static int irq_setup_forced_threading(struct irqaction *new)
{
- if (!force_irqthreads)
+ if (!force_irqthreads())
return 0;
if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
return 0;
@@ -2072,9 +2071,9 @@ const void *free_nmi(unsigned int irq, void *dev_id)
* request_threaded_irq - allocate an interrupt line
* @irq: Interrupt line to allocate
* @handler: Function to be called when the IRQ occurs.
- * Primary handler for threaded interrupts
- * If NULL and thread_fn != NULL the default
- * primary handler is installed
+ * Primary handler for threaded interrupts.
+ * If handler is NULL and thread_fn != NULL
+ * the default primary handler is installed.
* @thread_fn: Function called from the irq handler thread
* If NULL, no irq thread is created
* @irqflags: Interrupt type flags
@@ -2108,7 +2107,7 @@ const void *free_nmi(unsigned int irq, void *dev_id)
*
* IRQF_SHARED Interrupt is shared
* IRQF_TRIGGER_* Specify active edge(s) or level
- *
+ * IRQF_ONESHOT Run thread_fn with interrupt line masked
*/
int request_threaded_irq(unsigned int irq, irq_handler_t handler,
irq_handler_t thread_fn, unsigned long irqflags,
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 578596e41cb6..bbfb26489aa1 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -280,7 +280,8 @@ void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk)
/**
* irq_matrix_alloc_managed - Allocate a managed interrupt in a CPU map
* @m: Matrix pointer
- * @cpu: On which CPU the interrupt should be allocated
+ * @msk: Which CPUs to search in
+ * @mapped_cpu: Pointer to store the CPU for which the irq was allocated
*/
int irq_matrix_alloc_managed(struct irq_matrix *m, const struct cpumask *msk,
unsigned int *mapped_cpu)
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index c41965e348b5..6a5ecee6e567 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -14,17 +14,20 @@
#include <linux/irqdomain.h>
#include <linux/msi.h>
#include <linux/slab.h>
+#include <linux/pci.h>
#include "internals.h"
/**
- * alloc_msi_entry - Allocate an initialize msi_entry
+ * alloc_msi_entry - Allocate an initialized msi_desc
* @dev: Pointer to the device for which this is allocated
* @nvec: The number of vectors used in this entry
* @affinity: Optional pointer to an affinity mask array size of @nvec
*
- * If @affinity is not NULL then an affinity array[@nvec] is allocated
+ * If @affinity is not %NULL then an affinity array[@nvec] is allocated
* and the affinity masks and flags from @affinity are copied.
+ *
+ * Return: pointer to allocated &msi_desc on success or %NULL on failure
*/
struct msi_desc *alloc_msi_entry(struct device *dev, int nvec,
const struct irq_affinity_desc *affinity)
@@ -69,6 +72,139 @@ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
}
EXPORT_SYMBOL_GPL(get_cached_msi_msg);
+static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct msi_desc *entry;
+ bool is_msix = false;
+ unsigned long irq;
+ int retval;
+
+ retval = kstrtoul(attr->attr.name, 10, &irq);
+ if (retval)
+ return retval;
+
+ entry = irq_get_msi_desc(irq);
+ if (!entry)
+ return -ENODEV;
+
+ if (dev_is_pci(dev))
+ is_msix = entry->msi_attrib.is_msix;
+
+ return sysfs_emit(buf, "%s\n", is_msix ? "msix" : "msi");
+}
+
+/**
+ * msi_populate_sysfs - Populate msi_irqs sysfs entries for devices
+ * @dev: The device(PCI, platform etc) who will get sysfs entries
+ *
+ * Return attribute_group ** so that specific bus MSI can save it to
+ * somewhere during initilizing msi irqs. If devices has no MSI irq,
+ * return NULL; if it fails to populate sysfs, return ERR_PTR
+ */
+const struct attribute_group **msi_populate_sysfs(struct device *dev)
+{
+ const struct attribute_group **msi_irq_groups;
+ struct attribute **msi_attrs, *msi_attr;
+ struct device_attribute *msi_dev_attr;
+ struct attribute_group *msi_irq_group;
+ struct msi_desc *entry;
+ int ret = -ENOMEM;
+ int num_msi = 0;
+ int count = 0;
+ int i;
+
+ /* Determine how many msi entries we have */
+ for_each_msi_entry(entry, dev)
+ num_msi += entry->nvec_used;
+ if (!num_msi)
+ return NULL;
+
+ /* Dynamically create the MSI attributes for the device */
+ msi_attrs = kcalloc(num_msi + 1, sizeof(void *), GFP_KERNEL);
+ if (!msi_attrs)
+ return ERR_PTR(-ENOMEM);
+
+ for_each_msi_entry(entry, dev) {
+ for (i = 0; i < entry->nvec_used; i++) {
+ msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
+ if (!msi_dev_attr)
+ goto error_attrs;
+ msi_attrs[count] = &msi_dev_attr->attr;
+
+ sysfs_attr_init(&msi_dev_attr->attr);
+ msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
+ entry->irq + i);
+ if (!msi_dev_attr->attr.name)
+ goto error_attrs;
+ msi_dev_attr->attr.mode = 0444;
+ msi_dev_attr->show = msi_mode_show;
+ ++count;
+ }
+ }
+
+ msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL);
+ if (!msi_irq_group)
+ goto error_attrs;
+ msi_irq_group->name = "msi_irqs";
+ msi_irq_group->attrs = msi_attrs;
+
+ msi_irq_groups = kcalloc(2, sizeof(void *), GFP_KERNEL);
+ if (!msi_irq_groups)
+ goto error_irq_group;
+ msi_irq_groups[0] = msi_irq_group;
+
+ ret = sysfs_create_groups(&dev->kobj, msi_irq_groups);
+ if (ret)
+ goto error_irq_groups;
+
+ return msi_irq_groups;
+
+error_irq_groups:
+ kfree(msi_irq_groups);
+error_irq_group:
+ kfree(msi_irq_group);
+error_attrs:
+ count = 0;
+ msi_attr = msi_attrs[count];
+ while (msi_attr) {
+ msi_dev_attr = container_of(msi_attr, struct device_attribute, attr);
+ kfree(msi_attr->name);
+ kfree(msi_dev_attr);
+ ++count;
+ msi_attr = msi_attrs[count];
+ }
+ kfree(msi_attrs);
+ return ERR_PTR(ret);
+}
+
+/**
+ * msi_destroy_sysfs - Destroy msi_irqs sysfs entries for devices
+ * @dev: The device(PCI, platform etc) who will remove sysfs entries
+ * @msi_irq_groups: attribute_group for device msi_irqs entries
+ */
+void msi_destroy_sysfs(struct device *dev, const struct attribute_group **msi_irq_groups)
+{
+ struct device_attribute *dev_attr;
+ struct attribute **msi_attrs;
+ int count = 0;
+
+ if (msi_irq_groups) {
+ sysfs_remove_groups(&dev->kobj, msi_irq_groups);
+ msi_attrs = msi_irq_groups[0]->attrs;
+ while (msi_attrs[count]) {
+ dev_attr = container_of(msi_attrs[count],
+ struct device_attribute, attr);
+ kfree(dev_attr->attr.name);
+ kfree(dev_attr);
+ ++count;
+ }
+ kfree(msi_attrs);
+ kfree(msi_irq_groups[0]);
+ kfree(msi_irq_groups);
+ }
+}
+
#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
static inline void irq_chip_write_msi_msg(struct irq_data *data,
struct msi_msg *msg)
@@ -97,6 +233,8 @@ static void msi_check_level(struct irq_domain *domain, struct msi_msg *msg)
*
* Intended to be used by MSI interrupt controllers which are
* implemented with hierarchical domains.
+ *
+ * Return: IRQ_SET_MASK_* result code
*/
int msi_domain_set_affinity(struct irq_data *irq_data,
const struct cpumask *mask, bool force)
@@ -277,10 +415,12 @@ static void msi_domain_update_chip_ops(struct msi_domain_info *info)
}
/**
- * msi_create_irq_domain - Create a MSI interrupt domain
+ * msi_create_irq_domain - Create an MSI interrupt domain
* @fwnode: Optional fwnode of the interrupt controller
* @info: MSI domain info
* @parent: Parent irq domain
+ *
+ * Return: pointer to the created &struct irq_domain or %NULL on failure
*/
struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
struct msi_domain_info *info,
@@ -476,11 +616,6 @@ skip_activate:
return 0;
cleanup:
- for_each_msi_vector(desc, i, dev) {
- irq_data = irq_domain_get_irq_data(domain, i);
- if (irqd_is_activated(irq_data))
- irq_domain_deactivate_irq(irq_data);
- }
msi_domain_free_irqs(domain, dev);
return ret;
}
@@ -492,7 +627,7 @@ cleanup:
* are allocated
* @nvec: The number of interrupts to allocate
*
- * Returns 0 on success or an error code.
+ * Return: %0 on success or an error code.
*/
int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
int nvec)
@@ -505,7 +640,15 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
{
+ struct irq_data *irq_data;
struct msi_desc *desc;
+ int i;
+
+ for_each_msi_vector(desc, i, dev) {
+ irq_data = irq_domain_get_irq_data(domain, i);
+ if (irqd_is_activated(irq_data))
+ irq_domain_deactivate_irq(irq_data);
+ }
for_each_msi_entry(desc, dev) {
/*
@@ -521,7 +664,7 @@ void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
}
/**
- * __msi_domain_free_irqs - Free interrupts from a MSI interrupt @domain associated tp @dev
+ * msi_domain_free_irqs - Free interrupts from a MSI interrupt @domain associated to @dev
* @domain: The domain to managing the interrupts
* @dev: Pointer to device struct of the device for which the interrupts
* are free
@@ -538,8 +681,7 @@ void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
* msi_get_domain_info - Get the MSI interrupt domain info for @domain
* @domain: The interrupt domain to retrieve data from
*
- * Returns the pointer to the msi_domain_info stored in
- * @domain->host_data.
+ * Return: the pointer to the msi_domain_info stored in @domain->host_data.
*/
struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain)
{
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index ce0adb22ee96..ca71123a6130 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -227,7 +227,7 @@ unlock:
}
/**
- * irq_pm_syscore_ops - enable interrupt lines early
+ * irq_pm_syscore_resume - enable interrupt lines early
*
* Enable all interrupt lines with %IRQF_EARLY_RESUME set.
*/
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 7c5cd42df3b9..ee595ec09778 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -513,7 +513,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_printf(p, " %8s", "None");
}
if (desc->irq_data.domain)
- seq_printf(p, " %*d", prec, (int) desc->irq_data.hwirq);
+ seq_printf(p, " %*lu", prec, desc->irq_data.hwirq);
else
seq_printf(p, " %*s", prec, "");
#ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL
diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
index d309d6fbf5bd..c43e2ac2f8de 100644
--- a/kernel/irq/timings.c
+++ b/kernel/irq/timings.c
@@ -453,6 +453,11 @@ static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs,
*/
index = irq_timings_interval_index(interval);
+ if (index > PREDICTION_BUFFER_SIZE - 1) {
+ irqs->count = 0;
+ return;
+ }
+
/*
* Store the index as an element of the pattern in another
* circular array.
@@ -794,12 +799,14 @@ static int __init irq_timings_test_irqs(struct timings_intervals *ti)
__irq_timings_store(irq, irqs, ti->intervals[i]);
if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) {
+ ret = -EBADSLT;
pr_err("Failed to store in the circular buffer\n");
goto out;
}
}
if (irqs->count != ti->count) {
+ ret = -ERANGE;
pr_err("Count differs\n");
goto out;
}
diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c
index e65de172ccf7..1d1d1b0e4248 100644
--- a/kernel/kcsan/debugfs.c
+++ b/kernel/kcsan/debugfs.c
@@ -64,7 +64,7 @@ static noinline void microbenchmark(unsigned long iters)
{
const struct kcsan_ctx ctx_save = current->kcsan_ctx;
const bool was_enabled = READ_ONCE(kcsan_enabled);
- cycles_t cycles;
+ u64 cycles;
/* We may have been called from an atomic region; reset context. */
memset(&current->kcsan_ctx, 0, sizeof(current->kcsan_ctx));
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 3572808223e4..d51cabf28f38 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -24,7 +24,8 @@ obj-$(CONFIG_SMP) += spinlock.o
obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o
-obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
+obj-$(CONFIG_RT_MUTEXES) += rtmutex_api.o
+obj-$(CONFIG_PREEMPT_RT) += spinlock_rt.o ww_rt_mutex.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index b3adb40549bf..7c5a4a087cc7 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -59,7 +59,7 @@ static struct task_struct **writer_tasks;
static struct task_struct **reader_tasks;
static bool lock_is_write_held;
-static bool lock_is_read_held;
+static atomic_t lock_is_read_held;
static unsigned long last_lock_release;
struct lock_stress_stats {
@@ -682,7 +682,7 @@ static int lock_torture_writer(void *arg)
if (WARN_ON_ONCE(lock_is_write_held))
lwsp->n_lock_fail++;
lock_is_write_held = true;
- if (WARN_ON_ONCE(lock_is_read_held))
+ if (WARN_ON_ONCE(atomic_read(&lock_is_read_held)))
lwsp->n_lock_fail++; /* rare, but... */
lwsp->n_lock_acquired++;
@@ -717,13 +717,13 @@ static int lock_torture_reader(void *arg)
schedule_timeout_uninterruptible(1);
cxt.cur_ops->readlock(tid);
- lock_is_read_held = true;
+ atomic_inc(&lock_is_read_held);
if (WARN_ON_ONCE(lock_is_write_held))
lrsp->n_lock_fail++; /* rare, but... */
lrsp->n_lock_acquired++;
cxt.cur_ops->read_delay(&rand);
- lock_is_read_held = false;
+ atomic_dec(&lock_is_read_held);
cxt.cur_ops->readunlock(tid);
stutter_wait("lock_torture_reader");
@@ -738,20 +738,22 @@ static int lock_torture_reader(void *arg)
static void __torture_print_stats(char *page,
struct lock_stress_stats *statp, bool write)
{
+ long cur;
bool fail = false;
int i, n_stress;
- long max = 0, min = statp ? statp[0].n_lock_acquired : 0;
+ long max = 0, min = statp ? data_race(statp[0].n_lock_acquired) : 0;
long long sum = 0;
n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress;
for (i = 0; i < n_stress; i++) {
- if (statp[i].n_lock_fail)
+ if (data_race(statp[i].n_lock_fail))
fail = true;
- sum += statp[i].n_lock_acquired;
- if (max < statp[i].n_lock_acquired)
- max = statp[i].n_lock_acquired;
- if (min > statp[i].n_lock_acquired)
- min = statp[i].n_lock_acquired;
+ cur = data_race(statp[i].n_lock_acquired);
+ sum += cur;
+ if (max < cur)
+ max = cur;
+ if (min > cur)
+ min = cur;
}
page += sprintf(page,
"%s: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n",
@@ -996,7 +998,6 @@ static int __init lock_torture_init(void)
}
if (nreaders_stress) {
- lock_is_read_held = false;
cxt.lrsa = kmalloc_array(cxt.nrealreaders_stress,
sizeof(*cxt.lrsa),
GFP_KERNEL);
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index db9301591e3f..bc8abb8549d2 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -1,6 +1,4 @@
/*
- * kernel/mutex-debug.c
- *
* Debugging code for mutexes
*
* Started by Ingo Molnar:
@@ -22,7 +20,7 @@
#include <linux/interrupt.h>
#include <linux/debug_locks.h>
-#include "mutex-debug.h"
+#include "mutex.h"
/*
* Must be called with lock->wait_lock held.
@@ -32,6 +30,7 @@ void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
waiter->magic = waiter;
INIT_LIST_HEAD(&waiter->list);
+ waiter->ww_ctx = MUTEX_POISON_WW_CTX;
}
void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h
deleted file mode 100644
index 53e631e1d76d..000000000000
--- a/kernel/locking/mutex-debug.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Mutexes: blocking mutual exclusion locks
- *
- * started by Ingo Molnar:
- *
- * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- *
- * This file contains mutex debugging related internal declarations,
- * prototypes and inline functions, for the CONFIG_DEBUG_MUTEXES case.
- * More details are in kernel/mutex-debug.c.
- */
-
-/*
- * This must be called with lock->wait_lock held.
- */
-extern void debug_mutex_lock_common(struct mutex *lock,
- struct mutex_waiter *waiter);
-extern void debug_mutex_wake_waiter(struct mutex *lock,
- struct mutex_waiter *waiter);
-extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
-extern void debug_mutex_add_waiter(struct mutex *lock,
- struct mutex_waiter *waiter,
- struct task_struct *task);
-extern void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
- struct task_struct *task);
-extern void debug_mutex_unlock(struct mutex *lock);
-extern void debug_mutex_init(struct mutex *lock, const char *name,
- struct lock_class_key *key);
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index d2df5e68b503..d456579d0952 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -30,17 +30,20 @@
#include <linux/debug_locks.h>
#include <linux/osq_lock.h>
+#ifndef CONFIG_PREEMPT_RT
+#include "mutex.h"
+
#ifdef CONFIG_DEBUG_MUTEXES
-# include "mutex-debug.h"
+# define MUTEX_WARN_ON(cond) DEBUG_LOCKS_WARN_ON(cond)
#else
-# include "mutex.h"
+# define MUTEX_WARN_ON(cond)
#endif
void
__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
{
atomic_long_set(&lock->owner, 0);
- spin_lock_init(&lock->wait_lock);
+ raw_spin_lock_init(&lock->wait_lock);
INIT_LIST_HEAD(&lock->wait_list);
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
osq_lock_init(&lock->osq);
@@ -91,55 +94,56 @@ static inline unsigned long __owner_flags(unsigned long owner)
return owner & MUTEX_FLAGS;
}
-/*
- * Trylock variant that returns the owning task on failure.
- */
-static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock)
+static inline struct task_struct *__mutex_trylock_common(struct mutex *lock, bool handoff)
{
unsigned long owner, curr = (unsigned long)current;
owner = atomic_long_read(&lock->owner);
for (;;) { /* must loop, can race against a flag */
- unsigned long old, flags = __owner_flags(owner);
+ unsigned long flags = __owner_flags(owner);
unsigned long task = owner & ~MUTEX_FLAGS;
if (task) {
- if (likely(task != curr))
- break;
-
- if (likely(!(flags & MUTEX_FLAG_PICKUP)))
+ if (flags & MUTEX_FLAG_PICKUP) {
+ if (task != curr)
+ break;
+ flags &= ~MUTEX_FLAG_PICKUP;
+ } else if (handoff) {
+ if (flags & MUTEX_FLAG_HANDOFF)
+ break;
+ flags |= MUTEX_FLAG_HANDOFF;
+ } else {
break;
-
- flags &= ~MUTEX_FLAG_PICKUP;
+ }
} else {
-#ifdef CONFIG_DEBUG_MUTEXES
- DEBUG_LOCKS_WARN_ON(flags & MUTEX_FLAG_PICKUP);
-#endif
+ MUTEX_WARN_ON(flags & (MUTEX_FLAG_HANDOFF | MUTEX_FLAG_PICKUP));
+ task = curr;
}
- /*
- * We set the HANDOFF bit, we must make sure it doesn't live
- * past the point where we acquire it. This would be possible
- * if we (accidentally) set the bit on an unlocked mutex.
- */
- flags &= ~MUTEX_FLAG_HANDOFF;
-
- old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags);
- if (old == owner)
- return NULL;
-
- owner = old;
+ if (atomic_long_try_cmpxchg_acquire(&lock->owner, &owner, task | flags)) {
+ if (task == curr)
+ return NULL;
+ break;
+ }
}
return __owner_task(owner);
}
/*
+ * Trylock or set HANDOFF
+ */
+static inline bool __mutex_trylock_or_handoff(struct mutex *lock, bool handoff)
+{
+ return !__mutex_trylock_common(lock, handoff);
+}
+
+/*
* Actual trylock that will work on any unlocked state.
*/
static inline bool __mutex_trylock(struct mutex *lock)
{
- return !__mutex_trylock_or_owner(lock);
+ return !__mutex_trylock_common(lock, false);
}
#ifndef CONFIG_DEBUG_LOCK_ALLOC
@@ -168,10 +172,7 @@ static __always_inline bool __mutex_unlock_fast(struct mutex *lock)
{
unsigned long curr = (unsigned long)current;
- if (atomic_long_cmpxchg_release(&lock->owner, curr, 0UL) == curr)
- return true;
-
- return false;
+ return atomic_long_try_cmpxchg_release(&lock->owner, &curr, 0UL);
}
#endif
@@ -226,23 +227,18 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
unsigned long owner = atomic_long_read(&lock->owner);
for (;;) {
- unsigned long old, new;
+ unsigned long new;
-#ifdef CONFIG_DEBUG_MUTEXES
- DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
- DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);
-#endif
+ MUTEX_WARN_ON(__owner_task(owner) != current);
+ MUTEX_WARN_ON(owner & MUTEX_FLAG_PICKUP);
new = (owner & MUTEX_FLAG_WAITERS);
new |= (unsigned long)task;
if (task)
new |= MUTEX_FLAG_PICKUP;
- old = atomic_long_cmpxchg_release(&lock->owner, owner, new);
- if (old == owner)
+ if (atomic_long_try_cmpxchg_release(&lock->owner, &owner, new))
break;
-
- owner = old;
}
}
@@ -286,218 +282,18 @@ void __sched mutex_lock(struct mutex *lock)
EXPORT_SYMBOL(mutex_lock);
#endif
-/*
- * Wait-Die:
- * The newer transactions are killed when:
- * It (the new transaction) makes a request for a lock being held
- * by an older transaction.
- *
- * Wound-Wait:
- * The newer transactions are wounded when:
- * An older transaction makes a request for a lock being held by
- * the newer transaction.
- */
-
-/*
- * Associate the ww_mutex @ww with the context @ww_ctx under which we acquired
- * it.
- */
-static __always_inline void
-ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
-{
-#ifdef CONFIG_DEBUG_MUTEXES
- /*
- * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
- * but released with a normal mutex_unlock in this call.
- *
- * This should never happen, always use ww_mutex_unlock.
- */
- DEBUG_LOCKS_WARN_ON(ww->ctx);
-
- /*
- * Not quite done after calling ww_acquire_done() ?
- */
- DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
+#include "ww_mutex.h"
- if (ww_ctx->contending_lock) {
- /*
- * After -EDEADLK you tried to
- * acquire a different ww_mutex? Bad!
- */
- DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
-
- /*
- * You called ww_mutex_lock after receiving -EDEADLK,
- * but 'forgot' to unlock everything else first?
- */
- DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
- ww_ctx->contending_lock = NULL;
- }
-
- /*
- * Naughty, using a different class will lead to undefined behavior!
- */
- DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
-#endif
- ww_ctx->acquired++;
- ww->ctx = ww_ctx;
-}
-
-/*
- * Determine if context @a is 'after' context @b. IOW, @a is a younger
- * transaction than @b and depending on algorithm either needs to wait for
- * @b or die.
- */
-static inline bool __sched
-__ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
-{
-
- return (signed long)(a->stamp - b->stamp) > 0;
-}
-
-/*
- * Wait-Die; wake a younger waiter context (when locks held) such that it can
- * die.
- *
- * Among waiters with context, only the first one can have other locks acquired
- * already (ctx->acquired > 0), because __ww_mutex_add_waiter() and
- * __ww_mutex_check_kill() wake any but the earliest context.
- */
-static bool __sched
-__ww_mutex_die(struct mutex *lock, struct mutex_waiter *waiter,
- struct ww_acquire_ctx *ww_ctx)
-{
- if (!ww_ctx->is_wait_die)
- return false;
-
- if (waiter->ww_ctx->acquired > 0 &&
- __ww_ctx_stamp_after(waiter->ww_ctx, ww_ctx)) {
- debug_mutex_wake_waiter(lock, waiter);
- wake_up_process(waiter->task);
- }
-
- return true;
-}
-
-/*
- * Wound-Wait; wound a younger @hold_ctx if it holds the lock.
- *
- * Wound the lock holder if there are waiters with older transactions than
- * the lock holders. Even if multiple waiters may wound the lock holder,
- * it's sufficient that only one does.
- */
-static bool __ww_mutex_wound(struct mutex *lock,
- struct ww_acquire_ctx *ww_ctx,
- struct ww_acquire_ctx *hold_ctx)
-{
- struct task_struct *owner = __mutex_owner(lock);
-
- lockdep_assert_held(&lock->wait_lock);
-
- /*
- * Possible through __ww_mutex_add_waiter() when we race with
- * ww_mutex_set_context_fastpath(). In that case we'll get here again
- * through __ww_mutex_check_waiters().
- */
- if (!hold_ctx)
- return false;
-
- /*
- * Can have !owner because of __mutex_unlock_slowpath(), but if owner,
- * it cannot go away because we'll have FLAG_WAITERS set and hold
- * wait_lock.
- */
- if (!owner)
- return false;
-
- if (ww_ctx->acquired > 0 && __ww_ctx_stamp_after(hold_ctx, ww_ctx)) {
- hold_ctx->wounded = 1;
-
- /*
- * wake_up_process() paired with set_current_state()
- * inserts sufficient barriers to make sure @owner either sees
- * it's wounded in __ww_mutex_check_kill() or has a
- * wakeup pending to re-read the wounded state.
- */
- if (owner != current)
- wake_up_process(owner);
-
- return true;
- }
-
- return false;
-}
-
-/*
- * We just acquired @lock under @ww_ctx, if there are later contexts waiting
- * behind us on the wait-list, check if they need to die, or wound us.
- *
- * See __ww_mutex_add_waiter() for the list-order construction; basically the
- * list is ordered by stamp, smallest (oldest) first.
- *
- * This relies on never mixing wait-die/wound-wait on the same wait-list;
- * which is currently ensured by that being a ww_class property.
- *
- * The current task must not be on the wait list.
- */
-static void __sched
-__ww_mutex_check_waiters(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
-{
- struct mutex_waiter *cur;
-
- lockdep_assert_held(&lock->wait_lock);
-
- list_for_each_entry(cur, &lock->wait_list, list) {
- if (!cur->ww_ctx)
- continue;
-
- if (__ww_mutex_die(lock, cur, ww_ctx) ||
- __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx))
- break;
- }
-}
+#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
/*
- * After acquiring lock with fastpath, where we do not hold wait_lock, set ctx
- * and wake up any waiters so they can recheck.
+ * Trylock variant that returns the owning task on failure.
*/
-static __always_inline void
-ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock)
{
- ww_mutex_lock_acquired(lock, ctx);
-
- /*
- * The lock->ctx update should be visible on all cores before
- * the WAITERS check is done, otherwise contended waiters might be
- * missed. The contended waiters will either see ww_ctx == NULL
- * and keep spinning, or it will acquire wait_lock, add itself
- * to waiter list and sleep.
- */
- smp_mb(); /* See comments above and below. */
-
- /*
- * [W] ww->ctx = ctx [W] MUTEX_FLAG_WAITERS
- * MB MB
- * [R] MUTEX_FLAG_WAITERS [R] ww->ctx
- *
- * The memory barrier above pairs with the memory barrier in
- * __ww_mutex_add_waiter() and makes sure we either observe ww->ctx
- * and/or !empty list.
- */
- if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS)))
- return;
-
- /*
- * Uh oh, we raced in fastpath, check if any of the waiters need to
- * die or wound us.
- */
- spin_lock(&lock->base.wait_lock);
- __ww_mutex_check_waiters(&lock->base, ctx);
- spin_unlock(&lock->base.wait_lock);
+ return __mutex_trylock_common(lock, false);
}
-#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-
static inline
bool ww_mutex_spin_on_owner(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
struct mutex_waiter *waiter)
@@ -754,171 +550,11 @@ EXPORT_SYMBOL(mutex_unlock);
*/
void __sched ww_mutex_unlock(struct ww_mutex *lock)
{
- /*
- * The unlocking fastpath is the 0->1 transition from 'locked'
- * into 'unlocked' state:
- */
- if (lock->ctx) {
-#ifdef CONFIG_DEBUG_MUTEXES
- DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
-#endif
- if (lock->ctx->acquired > 0)
- lock->ctx->acquired--;
- lock->ctx = NULL;
- }
-
+ __ww_mutex_unlock(lock);
mutex_unlock(&lock->base);
}
EXPORT_SYMBOL(ww_mutex_unlock);
-
-static __always_inline int __sched
-__ww_mutex_kill(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
-{
- if (ww_ctx->acquired > 0) {
-#ifdef CONFIG_DEBUG_MUTEXES
- struct ww_mutex *ww;
-
- ww = container_of(lock, struct ww_mutex, base);
- DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock);
- ww_ctx->contending_lock = ww;
-#endif
- return -EDEADLK;
- }
-
- return 0;
-}
-
-
-/*
- * Check the wound condition for the current lock acquire.
- *
- * Wound-Wait: If we're wounded, kill ourself.
- *
- * Wait-Die: If we're trying to acquire a lock already held by an older
- * context, kill ourselves.
- *
- * Since __ww_mutex_add_waiter() orders the wait-list on stamp, we only have to
- * look at waiters before us in the wait-list.
- */
-static inline int __sched
-__ww_mutex_check_kill(struct mutex *lock, struct mutex_waiter *waiter,
- struct ww_acquire_ctx *ctx)
-{
- struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
- struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
- struct mutex_waiter *cur;
-
- if (ctx->acquired == 0)
- return 0;
-
- if (!ctx->is_wait_die) {
- if (ctx->wounded)
- return __ww_mutex_kill(lock, ctx);
-
- return 0;
- }
-
- if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx))
- return __ww_mutex_kill(lock, ctx);
-
- /*
- * If there is a waiter in front of us that has a context, then its
- * stamp is earlier than ours and we must kill ourself.
- */
- cur = waiter;
- list_for_each_entry_continue_reverse(cur, &lock->wait_list, list) {
- if (!cur->ww_ctx)
- continue;
-
- return __ww_mutex_kill(lock, ctx);
- }
-
- return 0;
-}
-
-/*
- * Add @waiter to the wait-list, keep the wait-list ordered by stamp, smallest
- * first. Such that older contexts are preferred to acquire the lock over
- * younger contexts.
- *
- * Waiters without context are interspersed in FIFO order.
- *
- * Furthermore, for Wait-Die kill ourself immediately when possible (there are
- * older contexts already waiting) to avoid unnecessary waiting and for
- * Wound-Wait ensure we wound the owning context when it is younger.
- */
-static inline int __sched
-__ww_mutex_add_waiter(struct mutex_waiter *waiter,
- struct mutex *lock,
- struct ww_acquire_ctx *ww_ctx)
-{
- struct mutex_waiter *cur;
- struct list_head *pos;
- bool is_wait_die;
-
- if (!ww_ctx) {
- __mutex_add_waiter(lock, waiter, &lock->wait_list);
- return 0;
- }
-
- is_wait_die = ww_ctx->is_wait_die;
-
- /*
- * Add the waiter before the first waiter with a higher stamp.
- * Waiters without a context are skipped to avoid starving
- * them. Wait-Die waiters may die here. Wound-Wait waiters
- * never die here, but they are sorted in stamp order and
- * may wound the lock holder.
- */
- pos = &lock->wait_list;
- list_for_each_entry_reverse(cur, &lock->wait_list, list) {
- if (!cur->ww_ctx)
- continue;
-
- if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) {
- /*
- * Wait-Die: if we find an older context waiting, there
- * is no point in queueing behind it, as we'd have to
- * die the moment it would acquire the lock.
- */
- if (is_wait_die) {
- int ret = __ww_mutex_kill(lock, ww_ctx);
-
- if (ret)
- return ret;
- }
-
- break;
- }
-
- pos = &cur->list;
-
- /* Wait-Die: ensure younger waiters die. */
- __ww_mutex_die(lock, cur, ww_ctx);
- }
-
- __mutex_add_waiter(lock, waiter, pos);
-
- /*
- * Wound-Wait: if we're blocking on a mutex owned by a younger context,
- * wound that such that we might proceed.
- */
- if (!is_wait_die) {
- struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
-
- /*
- * See ww_mutex_set_context_fastpath(). Orders setting
- * MUTEX_FLAG_WAITERS vs the ww->ctx load,
- * such that either we or the fastpath will wound @ww->ctx.
- */
- smp_mb();
- __ww_mutex_wound(lock, ww_ctx, ww->ctx);
- }
-
- return 0;
-}
-
/*
* Lock a mutex (possibly interruptible), slowpath:
*/
@@ -928,7 +564,6 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
{
struct mutex_waiter waiter;
- bool first = false;
struct ww_mutex *ww;
int ret;
@@ -937,9 +572,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
might_sleep();
-#ifdef CONFIG_DEBUG_MUTEXES
- DEBUG_LOCKS_WARN_ON(lock->magic != lock);
-#endif
+ MUTEX_WARN_ON(lock->magic != lock);
ww = container_of(lock, struct ww_mutex, base);
if (ww_ctx) {
@@ -953,6 +586,10 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
*/
if (ww_ctx->acquired == 0)
ww_ctx->wounded = 0;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ nest_lock = &ww_ctx->dep_map;
+#endif
}
preempt_disable();
@@ -968,7 +605,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
return 0;
}
- spin_lock(&lock->wait_lock);
+ raw_spin_lock(&lock->wait_lock);
/*
* After waiting to acquire the wait_lock, try again.
*/
@@ -980,17 +617,15 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
}
debug_mutex_lock_common(lock, &waiter);
+ waiter.task = current;
+ if (use_ww_ctx)
+ waiter.ww_ctx = ww_ctx;
lock_contended(&lock->dep_map, ip);
if (!use_ww_ctx) {
/* add waiting tasks to the end of the waitqueue (FIFO): */
__mutex_add_waiter(lock, &waiter, &lock->wait_list);
-
-
-#ifdef CONFIG_DEBUG_MUTEXES
- waiter.ww_ctx = MUTEX_POISON_WW_CTX;
-#endif
} else {
/*
* Add in stamp order, waking up waiters that must kill
@@ -999,14 +634,12 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx);
if (ret)
goto err_early_kill;
-
- waiter.ww_ctx = ww_ctx;
}
- waiter.task = current;
-
set_current_state(state);
for (;;) {
+ bool first;
+
/*
* Once we hold wait_lock, we're serialized against
* mutex_unlock() handing the lock off to us, do a trylock
@@ -1032,18 +665,10 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
goto err;
}
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
schedule_preempt_disabled();
- /*
- * ww_mutex needs to always recheck its position since its waiter
- * list is not FIFO ordered.
- */
- if (ww_ctx || !first) {
- first = __mutex_waiter_is_first(lock, &waiter);
- if (first)
- __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF);
- }
+ first = __mutex_waiter_is_first(lock, &waiter);
set_current_state(state);
/*
@@ -1051,13 +676,13 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
* state back to RUNNING and fall through the next schedule(),
* or we must see its unlock and acquire.
*/
- if (__mutex_trylock(lock) ||
+ if (__mutex_trylock_or_handoff(lock, first) ||
(first && mutex_optimistic_spin(lock, ww_ctx, &waiter)))
break;
- spin_lock(&lock->wait_lock);
+ raw_spin_lock(&lock->wait_lock);
}
- spin_lock(&lock->wait_lock);
+ raw_spin_lock(&lock->wait_lock);
acquired:
__set_current_state(TASK_RUNNING);
@@ -1082,7 +707,7 @@ skip_wait:
if (ww_ctx)
ww_mutex_lock_acquired(ww, ww_ctx);
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
preempt_enable();
return 0;
@@ -1090,7 +715,7 @@ err:
__set_current_state(TASK_RUNNING);
__mutex_remove_waiter(lock, &waiter);
err_early_kill:
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
debug_mutex_free_waiter(&waiter);
mutex_release(&lock->dep_map, ip);
preempt_enable();
@@ -1106,10 +731,9 @@ __mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass,
static int __sched
__ww_mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass,
- struct lockdep_map *nest_lock, unsigned long ip,
- struct ww_acquire_ctx *ww_ctx)
+ unsigned long ip, struct ww_acquire_ctx *ww_ctx)
{
- return __mutex_lock_common(lock, state, subclass, nest_lock, ip, ww_ctx, true);
+ return __mutex_lock_common(lock, state, subclass, NULL, ip, ww_ctx, true);
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -1189,8 +813,7 @@ ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
might_sleep();
ret = __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE,
- 0, ctx ? &ctx->dep_map : NULL, _RET_IP_,
- ctx);
+ 0, _RET_IP_, ctx);
if (!ret && ctx && ctx->acquired > 1)
return ww_mutex_deadlock_injection(lock, ctx);
@@ -1205,8 +828,7 @@ ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
might_sleep();
ret = __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE,
- 0, ctx ? &ctx->dep_map : NULL, _RET_IP_,
- ctx);
+ 0, _RET_IP_, ctx);
if (!ret && ctx && ctx->acquired > 1)
return ww_mutex_deadlock_injection(lock, ctx);
@@ -1237,29 +859,21 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
*/
owner = atomic_long_read(&lock->owner);
for (;;) {
- unsigned long old;
-
-#ifdef CONFIG_DEBUG_MUTEXES
- DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
- DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);
-#endif
+ MUTEX_WARN_ON(__owner_task(owner) != current);
+ MUTEX_WARN_ON(owner & MUTEX_FLAG_PICKUP);
if (owner & MUTEX_FLAG_HANDOFF)
break;
- old = atomic_long_cmpxchg_release(&lock->owner, owner,
- __owner_flags(owner));
- if (old == owner) {
+ if (atomic_long_try_cmpxchg_release(&lock->owner, &owner, __owner_flags(owner))) {
if (owner & MUTEX_FLAG_WAITERS)
break;
return;
}
-
- owner = old;
}
- spin_lock(&lock->wait_lock);
+ raw_spin_lock(&lock->wait_lock);
debug_mutex_unlock(lock);
if (!list_empty(&lock->wait_list)) {
/* get the first entry from the wait-list: */
@@ -1276,7 +890,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
if (owner & MUTEX_FLAG_HANDOFF)
__mutex_handoff(lock, next);
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
wake_up_q(&wake_q);
}
@@ -1380,7 +994,7 @@ __mutex_lock_interruptible_slowpath(struct mutex *lock)
static noinline int __sched
__ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
- return __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE, 0, NULL,
+ return __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE, 0,
_RET_IP_, ctx);
}
@@ -1388,7 +1002,7 @@ static noinline int __sched
__ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
- return __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE, 0, NULL,
+ return __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE, 0,
_RET_IP_, ctx);
}
@@ -1412,9 +1026,7 @@ int __sched mutex_trylock(struct mutex *lock)
{
bool locked;
-#ifdef CONFIG_DEBUG_MUTEXES
- DEBUG_LOCKS_WARN_ON(lock->magic != lock);
-#endif
+ MUTEX_WARN_ON(lock->magic != lock);
locked = __mutex_trylock(lock);
if (locked)
@@ -1455,7 +1067,8 @@ ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
}
EXPORT_SYMBOL(ww_mutex_lock_interruptible);
-#endif
+#endif /* !CONFIG_DEBUG_LOCK_ALLOC */
+#endif /* !CONFIG_PREEMPT_RT */
/**
* atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index f0c710b1d192..0b2a79c4013b 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -5,19 +5,41 @@
* started by Ingo Molnar:
*
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- *
- * This file contains mutex debugging related internal prototypes, for the
- * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs:
*/
-#define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
-#define debug_mutex_free_waiter(waiter) do { } while (0)
-#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
-#define debug_mutex_remove_waiter(lock, waiter, ti) do { } while (0)
-#define debug_mutex_unlock(lock) do { } while (0)
-#define debug_mutex_init(lock, name, key) do { } while (0)
+/*
+ * This is the control structure for tasks blocked on mutex, which resides
+ * on the blocked task's kernel stack:
+ */
+struct mutex_waiter {
+ struct list_head list;
+ struct task_struct *task;
+ struct ww_acquire_ctx *ww_ctx;
+#ifdef CONFIG_DEBUG_MUTEXES
+ void *magic;
+#endif
+};
-static inline void
-debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
-{
-}
+#ifdef CONFIG_DEBUG_MUTEXES
+extern void debug_mutex_lock_common(struct mutex *lock,
+ struct mutex_waiter *waiter);
+extern void debug_mutex_wake_waiter(struct mutex *lock,
+ struct mutex_waiter *waiter);
+extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
+extern void debug_mutex_add_waiter(struct mutex *lock,
+ struct mutex_waiter *waiter,
+ struct task_struct *task);
+extern void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+ struct task_struct *task);
+extern void debug_mutex_unlock(struct mutex *lock);
+extern void debug_mutex_init(struct mutex *lock, const char *name,
+ struct lock_class_key *key);
+#else /* CONFIG_DEBUG_MUTEXES */
+# define debug_mutex_lock_common(lock, waiter) do { } while (0)
+# define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
+# define debug_mutex_free_waiter(waiter) do { } while (0)
+# define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
+# define debug_mutex_remove_waiter(lock, waiter, ti) do { } while (0)
+# define debug_mutex_unlock(lock) do { } while (0)
+# define debug_mutex_init(lock, name, key) do { } while (0)
+#endif /* !CONFIG_DEBUG_MUTEXES */
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index b5d9bb5202c6..8eabdc79602b 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -8,20 +8,58 @@
* Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
* Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
* Copyright (C) 2006 Esben Nielsen
+ * Adaptive Spinlocks:
+ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
+ * and Peter Morreale,
+ * Adaptive Spinlocks simplification:
+ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
*
* See Documentation/locking/rt-mutex-design.rst for details.
*/
-#include <linux/spinlock.h>
-#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/deadline.h>
#include <linux/sched/signal.h>
#include <linux/sched/rt.h>
-#include <linux/sched/deadline.h>
#include <linux/sched/wake_q.h>
-#include <linux/sched/debug.h>
-#include <linux/timer.h>
+#include <linux/ww_mutex.h>
#include "rtmutex_common.h"
+#ifndef WW_RT
+# define build_ww_mutex() (false)
+# define ww_container_of(rtm) NULL
+
+static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
+ struct rt_mutex *lock,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ return 0;
+}
+
+static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
+ struct ww_acquire_ctx *ww_ctx)
+{
+}
+
+static inline void ww_mutex_lock_acquired(struct ww_mutex *lock,
+ struct ww_acquire_ctx *ww_ctx)
+{
+}
+
+static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ return 0;
+}
+
+#else
+# define build_ww_mutex() (true)
+# define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base)
+# include "ww_mutex.h"
+#endif
+
/*
* lock->owner state tracking:
*
@@ -50,7 +88,7 @@
*/
static __always_inline void
-rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
+rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
{
unsigned long val = (unsigned long)owner;
@@ -60,13 +98,13 @@ rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
WRITE_ONCE(lock->owner, (struct task_struct *)val);
}
-static __always_inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
+static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
{
lock->owner = (struct task_struct *)
((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
}
-static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex *lock)
+static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
{
unsigned long owner, *p = (unsigned long *) &lock->owner;
@@ -141,15 +179,26 @@ static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex *lock)
* set up.
*/
#ifndef CONFIG_DEBUG_RT_MUTEXES
-# define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c)
-# define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c)
+static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
+ struct task_struct *old,
+ struct task_struct *new)
+{
+ return try_cmpxchg_acquire(&lock->owner, &old, new);
+}
+
+static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
+ struct task_struct *old,
+ struct task_struct *new)
+{
+ return try_cmpxchg_release(&lock->owner, &old, new);
+}
/*
* Callers must hold the ->wait_lock -- which is the whole purpose as we force
* all future threads that attempt to [Rmw] the lock to the slowpath. As such
* relaxed semantics suffice.
*/
-static __always_inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
+static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
{
unsigned long owner, *p = (unsigned long *) &lock->owner;
@@ -165,7 +214,7 @@ static __always_inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
* 2) Drop lock->wait_lock
* 3) Try to unlock the lock with cmpxchg
*/
-static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
unsigned long flags)
__releases(lock->wait_lock)
{
@@ -201,10 +250,22 @@ static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
}
#else
-# define rt_mutex_cmpxchg_acquire(l,c,n) (0)
-# define rt_mutex_cmpxchg_release(l,c,n) (0)
+static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
+ struct task_struct *old,
+ struct task_struct *new)
+{
+ return false;
+
+}
-static __always_inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
+static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
+ struct task_struct *old,
+ struct task_struct *new)
+{
+ return false;
+}
+
+static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
{
lock->owner = (struct task_struct *)
((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
@@ -213,7 +274,7 @@ static __always_inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
/*
* Simple slow path only version: lock->owner is protected by lock->wait_lock.
*/
-static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
unsigned long flags)
__releases(lock->wait_lock)
{
@@ -223,11 +284,28 @@ static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
}
#endif
+static __always_inline int __waiter_prio(struct task_struct *task)
+{
+ int prio = task->prio;
+
+ if (!rt_prio(prio))
+ return DEFAULT_PRIO;
+
+ return prio;
+}
+
+static __always_inline void
+waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
+{
+ waiter->prio = __waiter_prio(task);
+ waiter->deadline = task->dl.deadline;
+}
+
/*
* Only use with rt_mutex_waiter_{less,equal}()
*/
#define task_to_waiter(p) \
- &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
+ &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
struct rt_mutex_waiter *right)
@@ -265,22 +343,63 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
return 1;
}
+static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
+ struct rt_mutex_waiter *top_waiter)
+{
+ if (rt_mutex_waiter_less(waiter, top_waiter))
+ return true;
+
+#ifdef RT_MUTEX_BUILD_SPINLOCKS
+ /*
+ * Note that RT tasks are excluded from same priority (lateral)
+ * steals to prevent the introduction of an unbounded latency.
+ */
+ if (rt_prio(waiter->prio) || dl_prio(waiter->prio))
+ return false;
+
+ return rt_mutex_waiter_equal(waiter, top_waiter);
+#else
+ return false;
+#endif
+}
+
#define __node_2_waiter(node) \
rb_entry((node), struct rt_mutex_waiter, tree_entry)
static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
{
- return rt_mutex_waiter_less(__node_2_waiter(a), __node_2_waiter(b));
+ struct rt_mutex_waiter *aw = __node_2_waiter(a);
+ struct rt_mutex_waiter *bw = __node_2_waiter(b);
+
+ if (rt_mutex_waiter_less(aw, bw))
+ return 1;
+
+ if (!build_ww_mutex())
+ return 0;
+
+ if (rt_mutex_waiter_less(bw, aw))
+ return 0;
+
+ /* NOTE: relies on waiter->ww_ctx being set before insertion */
+ if (aw->ww_ctx) {
+ if (!bw->ww_ctx)
+ return 1;
+
+ return (signed long)(aw->ww_ctx->stamp -
+ bw->ww_ctx->stamp) < 0;
+ }
+
+ return 0;
}
static __always_inline void
-rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
+rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
{
rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less);
}
static __always_inline void
-rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
+rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
{
if (RB_EMPTY_NODE(&waiter->tree_entry))
return;
@@ -326,6 +445,35 @@ static __always_inline void rt_mutex_adjust_prio(struct task_struct *p)
rt_mutex_setprio(p, pi_task);
}
+/* RT mutex specific wake_q wrappers */
+static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh,
+ struct rt_mutex_waiter *w)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && w->wake_state != TASK_NORMAL) {
+ if (IS_ENABLED(CONFIG_PROVE_LOCKING))
+ WARN_ON_ONCE(wqh->rtlock_task);
+ get_task_struct(w->task);
+ wqh->rtlock_task = w->task;
+ } else {
+ wake_q_add(&wqh->head, w->task);
+ }
+}
+
+static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) {
+ wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT);
+ put_task_struct(wqh->rtlock_task);
+ wqh->rtlock_task = NULL;
+ }
+
+ if (!wake_q_empty(&wqh->head))
+ wake_up_q(&wqh->head);
+
+ /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */
+ preempt_enable();
+}
+
/*
* Deadlock detection is conditional:
*
@@ -343,17 +491,12 @@ static __always_inline bool
rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
enum rtmutex_chainwalk chwalk)
{
- if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEX))
+ if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES))
return waiter != NULL;
return chwalk == RT_MUTEX_FULL_CHAINWALK;
}
-/*
- * Max number of times we'll walk the boosting chain:
- */
-int max_lock_depth = 1024;
-
-static __always_inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
+static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p)
{
return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
}
@@ -423,15 +566,15 @@ static __always_inline struct rt_mutex *task_blocked_on_lock(struct task_struct
*/
static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
enum rtmutex_chainwalk chwalk,
- struct rt_mutex *orig_lock,
- struct rt_mutex *next_lock,
+ struct rt_mutex_base *orig_lock,
+ struct rt_mutex_base *next_lock,
struct rt_mutex_waiter *orig_waiter,
struct task_struct *top_task)
{
struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
struct rt_mutex_waiter *prerequeue_top_waiter;
int ret = 0, depth = 0;
- struct rt_mutex *lock;
+ struct rt_mutex_base *lock;
bool detect_deadlock;
bool requeue = true;
@@ -514,6 +657,31 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
goto out_unlock_pi;
/*
+ * There could be 'spurious' loops in the lock graph due to ww_mutex,
+ * consider:
+ *
+ * P1: A, ww_A, ww_B
+ * P2: ww_B, ww_A
+ * P3: A
+ *
+ * P3 should not return -EDEADLK because it gets trapped in the cycle
+ * created by P1 and P2 (which will resolve -- and runs into
+ * max_lock_depth above). Therefore disable detect_deadlock such that
+ * the below termination condition can trigger once all relevant tasks
+ * are boosted.
+ *
+ * Even when we start with ww_mutex we can disable deadlock detection,
+ * since we would supress a ww_mutex induced deadlock at [6] anyway.
+ * Supressing it here however is not sufficient since we might still
+ * hit [6] due to adjustment driven iteration.
+ *
+ * NOTE: if someone were to create a deadlock between 2 ww_classes we'd
+ * utterly fail to report it; lockdep should.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock)
+ detect_deadlock = false;
+
+ /*
* Drop out, when the task has no waiters. Note,
* top_waiter can be NULL, when we are in the deboosting
* mode!
@@ -574,8 +742,21 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
* walk, we detected a deadlock.
*/
if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
- raw_spin_unlock(&lock->wait_lock);
ret = -EDEADLK;
+
+ /*
+ * When the deadlock is due to ww_mutex; also see above. Don't
+ * report the deadlock and instead let the ww_mutex wound/die
+ * logic pick which of the contending threads gets -EDEADLK.
+ *
+ * NOTE: assumes the cycle only contains a single ww_class; any
+ * other configuration and we fail to report; also, see
+ * lockdep.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter->ww_ctx)
+ ret = 0;
+
+ raw_spin_unlock(&lock->wait_lock);
goto out_unlock_pi;
}
@@ -653,8 +834,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
* serializes all pi_waiters access and rb_erase() does not care about
* the values of the node being removed.
*/
- waiter->prio = task->prio;
- waiter->deadline = task->dl.deadline;
+ waiter_update_prio(waiter, task);
rt_mutex_enqueue(lock, waiter);
@@ -676,7 +856,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
* to get the lock.
*/
if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
- wake_up_process(rt_mutex_top_waiter(lock)->task);
+ wake_up_state(waiter->task, waiter->wake_state);
raw_spin_unlock_irq(&lock->wait_lock);
return 0;
}
@@ -779,7 +959,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
* callsite called task_blocked_on_lock(), otherwise NULL
*/
static int __sched
-try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task,
struct rt_mutex_waiter *waiter)
{
lockdep_assert_held(&lock->wait_lock);
@@ -815,19 +995,21 @@ try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
* trylock attempt.
*/
if (waiter) {
- /*
- * If waiter is not the highest priority waiter of
- * @lock, give up.
- */
- if (waiter != rt_mutex_top_waiter(lock))
- return 0;
+ struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
/*
- * We can acquire the lock. Remove the waiter from the
- * lock waiters tree.
+ * If waiter is the highest priority waiter of @lock,
+ * or allowed to steal it, take it over.
*/
- rt_mutex_dequeue(lock, waiter);
-
+ if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) {
+ /*
+ * We can acquire the lock. Remove the waiter from the
+ * lock waiters tree.
+ */
+ rt_mutex_dequeue(lock, waiter);
+ } else {
+ return 0;
+ }
} else {
/*
* If the lock has waiters already we check whether @task is
@@ -838,13 +1020,9 @@ try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
* not need to be dequeued.
*/
if (rt_mutex_has_waiters(lock)) {
- /*
- * If @task->prio is greater than or equal to
- * the top waiter priority (kernel view),
- * @task lost.
- */
- if (!rt_mutex_waiter_less(task_to_waiter(task),
- rt_mutex_top_waiter(lock)))
+ /* Check whether the trylock can steal it. */
+ if (!rt_mutex_steal(task_to_waiter(task),
+ rt_mutex_top_waiter(lock)))
return 0;
/*
@@ -897,14 +1075,15 @@ takeit:
*
* This must be called with lock->wait_lock held and interrupts disabled
*/
-static int __sched task_blocks_on_rt_mutex(struct rt_mutex *lock,
+static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task,
+ struct ww_acquire_ctx *ww_ctx,
enum rtmutex_chainwalk chwalk)
{
struct task_struct *owner = rt_mutex_owner(lock);
struct rt_mutex_waiter *top_waiter = waiter;
- struct rt_mutex *next_lock;
+ struct rt_mutex_base *next_lock;
int chain_walk = 0, res;
lockdep_assert_held(&lock->wait_lock);
@@ -924,8 +1103,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex *lock,
raw_spin_lock(&task->pi_lock);
waiter->task = task;
waiter->lock = lock;
- waiter->prio = task->prio;
- waiter->deadline = task->dl.deadline;
+ waiter_update_prio(waiter, task);
/* Get the top priority waiter on the lock */
if (rt_mutex_has_waiters(lock))
@@ -936,6 +1114,21 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex *lock,
raw_spin_unlock(&task->pi_lock);
+ if (build_ww_mutex() && ww_ctx) {
+ struct rt_mutex *rtm;
+
+ /* Check whether the waiter should back out immediately */
+ rtm = container_of(lock, struct rt_mutex, rtmutex);
+ res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
+ if (res) {
+ raw_spin_lock(&task->pi_lock);
+ rt_mutex_dequeue(lock, waiter);
+ task->pi_blocked_on = NULL;
+ raw_spin_unlock(&task->pi_lock);
+ return res;
+ }
+ }
+
if (!owner)
return 0;
@@ -986,8 +1179,8 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex *lock,
*
* Called with lock->wait_lock held and interrupts disabled.
*/
-static void __sched mark_wakeup_next_waiter(struct wake_q_head *wake_q,
- struct rt_mutex *lock)
+static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
+ struct rt_mutex_base *lock)
{
struct rt_mutex_waiter *waiter;
@@ -1023,235 +1216,14 @@ static void __sched mark_wakeup_next_waiter(struct wake_q_head *wake_q,
* deboost but before waking our donor task, hence the preempt_disable()
* before unlock.
*
- * Pairs with preempt_enable() in rt_mutex_postunlock();
+ * Pairs with preempt_enable() in rt_mutex_wake_up_q();
*/
preempt_disable();
- wake_q_add(wake_q, waiter->task);
- raw_spin_unlock(&current->pi_lock);
-}
-
-/*
- * Remove a waiter from a lock and give up
- *
- * Must be called with lock->wait_lock held and interrupts disabled. I must
- * have just failed to try_to_take_rt_mutex().
- */
-static void __sched remove_waiter(struct rt_mutex *lock,
- struct rt_mutex_waiter *waiter)
-{
- bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
- struct task_struct *owner = rt_mutex_owner(lock);
- struct rt_mutex *next_lock;
-
- lockdep_assert_held(&lock->wait_lock);
-
- raw_spin_lock(&current->pi_lock);
- rt_mutex_dequeue(lock, waiter);
- current->pi_blocked_on = NULL;
+ rt_mutex_wake_q_add(wqh, waiter);
raw_spin_unlock(&current->pi_lock);
-
- /*
- * Only update priority if the waiter was the highest priority
- * waiter of the lock and there is an owner to update.
- */
- if (!owner || !is_top_waiter)
- return;
-
- raw_spin_lock(&owner->pi_lock);
-
- rt_mutex_dequeue_pi(owner, waiter);
-
- if (rt_mutex_has_waiters(lock))
- rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
-
- rt_mutex_adjust_prio(owner);
-
- /* Store the lock on which owner is blocked or NULL */
- next_lock = task_blocked_on_lock(owner);
-
- raw_spin_unlock(&owner->pi_lock);
-
- /*
- * Don't walk the chain, if the owner task is not blocked
- * itself.
- */
- if (!next_lock)
- return;
-
- /* gets dropped in rt_mutex_adjust_prio_chain()! */
- get_task_struct(owner);
-
- raw_spin_unlock_irq(&lock->wait_lock);
-
- rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
- next_lock, NULL, current);
-
- raw_spin_lock_irq(&lock->wait_lock);
-}
-
-/*
- * Recheck the pi chain, in case we got a priority setting
- *
- * Called from sched_setscheduler
- */
-void __sched rt_mutex_adjust_pi(struct task_struct *task)
-{
- struct rt_mutex_waiter *waiter;
- struct rt_mutex *next_lock;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&task->pi_lock, flags);
-
- waiter = task->pi_blocked_on;
- if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
- return;
- }
- next_lock = waiter->lock;
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-
- /* gets dropped in rt_mutex_adjust_prio_chain()! */
- get_task_struct(task);
-
- rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
- next_lock, NULL, task);
}
-void __sched rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
-{
- debug_rt_mutex_init_waiter(waiter);
- RB_CLEAR_NODE(&waiter->pi_tree_entry);
- RB_CLEAR_NODE(&waiter->tree_entry);
- waiter->task = NULL;
-}
-
-/**
- * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
- * @lock: the rt_mutex to take
- * @state: the state the task should block in (TASK_INTERRUPTIBLE
- * or TASK_UNINTERRUPTIBLE)
- * @timeout: the pre-initialized and started timer, or NULL for none
- * @waiter: the pre-initialized rt_mutex_waiter
- *
- * Must be called with lock->wait_lock held and interrupts disabled
- */
-static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, unsigned int state,
- struct hrtimer_sleeper *timeout,
- struct rt_mutex_waiter *waiter)
-{
- int ret = 0;
-
- for (;;) {
- /* Try to acquire the lock: */
- if (try_to_take_rt_mutex(lock, current, waiter))
- break;
-
- if (timeout && !timeout->task) {
- ret = -ETIMEDOUT;
- break;
- }
- if (signal_pending_state(state, current)) {
- ret = -EINTR;
- break;
- }
-
- raw_spin_unlock_irq(&lock->wait_lock);
-
- schedule();
-
- raw_spin_lock_irq(&lock->wait_lock);
- set_current_state(state);
- }
-
- __set_current_state(TASK_RUNNING);
- return ret;
-}
-
-static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
- struct rt_mutex_waiter *w)
-{
- /*
- * If the result is not -EDEADLOCK or the caller requested
- * deadlock detection, nothing to do here.
- */
- if (res != -EDEADLOCK || detect_deadlock)
- return;
-
- /*
- * Yell loudly and stop the task right here.
- */
- WARN(1, "rtmutex deadlock detected\n");
- while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- }
-}
-
-/*
- * Slow path lock function:
- */
-static int __sched rt_mutex_slowlock(struct rt_mutex *lock, unsigned int state,
- struct hrtimer_sleeper *timeout,
- enum rtmutex_chainwalk chwalk)
-{
- struct rt_mutex_waiter waiter;
- unsigned long flags;
- int ret = 0;
-
- rt_mutex_init_waiter(&waiter);
-
- /*
- * Technically we could use raw_spin_[un]lock_irq() here, but this can
- * be called in early boot if the cmpxchg() fast path is disabled
- * (debug, no architecture support). In this case we will acquire the
- * rtmutex with lock->wait_lock held. But we cannot unconditionally
- * enable interrupts in that early boot case. So we need to use the
- * irqsave/restore variants.
- */
- raw_spin_lock_irqsave(&lock->wait_lock, flags);
-
- /* Try to acquire the lock again: */
- if (try_to_take_rt_mutex(lock, current, NULL)) {
- raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
- return 0;
- }
-
- set_current_state(state);
-
- /* Setup the timer, when timeout != NULL */
- if (unlikely(timeout))
- hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
-
- ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
-
- if (likely(!ret))
- /* sleep on the mutex */
- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
-
- if (unlikely(ret)) {
- __set_current_state(TASK_RUNNING);
- remove_waiter(lock, &waiter);
- rt_mutex_handle_deadlock(ret, chwalk, &waiter);
- }
-
- /*
- * try_to_take_rt_mutex() sets the waiter bit
- * unconditionally. We might have to fix that up.
- */
- fixup_rt_mutex_waiters(lock);
-
- raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
-
- /* Remove pending timer: */
- if (unlikely(timeout))
- hrtimer_cancel(&timeout->timer);
-
- debug_rt_mutex_free_waiter(&waiter);
-
- return ret;
-}
-
-static int __sched __rt_mutex_slowtrylock(struct rt_mutex *lock)
+static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
{
int ret = try_to_take_rt_mutex(lock, current, NULL);
@@ -1267,7 +1239,7 @@ static int __sched __rt_mutex_slowtrylock(struct rt_mutex *lock)
/*
* Slow path try-lock function:
*/
-static int __sched rt_mutex_slowtrylock(struct rt_mutex *lock)
+static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock)
{
unsigned long flags;
int ret;
@@ -1293,25 +1265,20 @@ static int __sched rt_mutex_slowtrylock(struct rt_mutex *lock)
return ret;
}
-/*
- * Performs the wakeup of the top-waiter and re-enables preemption.
- */
-void __sched rt_mutex_postunlock(struct wake_q_head *wake_q)
+static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock)
{
- wake_up_q(wake_q);
+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
+ return 1;
- /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */
- preempt_enable();
+ return rt_mutex_slowtrylock(lock);
}
/*
* Slow path to release a rt-mutex.
- *
- * Return whether the current task needs to call rt_mutex_postunlock().
*/
-static void __sched rt_mutex_slowunlock(struct rt_mutex *lock)
+static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
{
- DEFINE_WAKE_Q(wake_q);
+ DEFINE_RT_WAKE_Q(wqh);
unsigned long flags;
/* irqsave required to support early boot calls */
@@ -1364,422 +1331,387 @@ static void __sched rt_mutex_slowunlock(struct rt_mutex *lock)
*
* Queue the next waiter for wakeup once we release the wait_lock.
*/
- mark_wakeup_next_waiter(&wake_q, lock);
+ mark_wakeup_next_waiter(&wqh, lock);
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
- rt_mutex_postunlock(&wake_q);
+ rt_mutex_wake_up_q(&wqh);
}
-/*
- * debug aware fast / slowpath lock,trylock,unlock
- *
- * The atomic acquire/release ops are compiled away, when either the
- * architecture does not support cmpxchg or when debugging is enabled.
- */
-static __always_inline int __rt_mutex_lock(struct rt_mutex *lock, long state,
- unsigned int subclass)
+static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock)
{
- int ret;
-
- might_sleep();
- mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
-
- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
- return 0;
+ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
+ return;
- ret = rt_mutex_slowlock(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
- if (ret)
- mutex_release(&lock->dep_map, _RET_IP_);
- return ret;
+ rt_mutex_slowunlock(lock);
}
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-/**
- * rt_mutex_lock_nested - lock a rt_mutex
- *
- * @lock: the rt_mutex to be locked
- * @subclass: the lockdep subclass
- */
-void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass)
+#ifdef CONFIG_SMP
+static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *owner)
{
- __rt_mutex_lock(lock, TASK_UNINTERRUPTIBLE, subclass);
-}
-EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
-
-#else /* !CONFIG_DEBUG_LOCK_ALLOC */
+ bool res = true;
-/**
- * rt_mutex_lock - lock a rt_mutex
- *
- * @lock: the rt_mutex to be locked
- */
-void __sched rt_mutex_lock(struct rt_mutex *lock)
+ rcu_read_lock();
+ for (;;) {
+ /* If owner changed, trylock again. */
+ if (owner != rt_mutex_owner(lock))
+ break;
+ /*
+ * Ensure that @owner is dereferenced after checking that
+ * the lock owner still matches @owner. If that fails,
+ * @owner might point to freed memory. If it still matches,
+ * the rcu_read_lock() ensures the memory stays valid.
+ */
+ barrier();
+ /*
+ * Stop spinning when:
+ * - the lock owner has been scheduled out
+ * - current is not longer the top waiter
+ * - current is requested to reschedule (redundant
+ * for CONFIG_PREEMPT_RCU=y)
+ * - the VCPU on which owner runs is preempted
+ */
+ if (!owner->on_cpu || need_resched() ||
+ rt_mutex_waiter_is_top_waiter(lock, waiter) ||
+ vcpu_is_preempted(task_cpu(owner))) {
+ res = false;
+ break;
+ }
+ cpu_relax();
+ }
+ rcu_read_unlock();
+ return res;
+}
+#else
+static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *owner)
{
- __rt_mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0);
+ return false;
}
-EXPORT_SYMBOL_GPL(rt_mutex_lock);
#endif
-/**
- * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
- *
- * @lock: the rt_mutex to be locked
- *
- * Returns:
- * 0 on success
- * -EINTR when interrupted by a signal
+#ifdef RT_MUTEX_BUILD_MUTEX
+/*
+ * Functions required for:
+ * - rtmutex, futex on all kernels
+ * - mutex and rwsem substitutions on RT kernels
*/
-int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
-{
- return __rt_mutex_lock(lock, TASK_INTERRUPTIBLE, 0);
-}
-EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
-/**
- * rt_mutex_trylock - try to lock a rt_mutex
- *
- * @lock: the rt_mutex to be locked
- *
- * This function can only be called in thread context. It's safe to call it
- * from atomic regions, but not from hard or soft interrupt context.
+/*
+ * Remove a waiter from a lock and give up
*
- * Returns:
- * 1 on success
- * 0 on contention
+ * Must be called with lock->wait_lock held and interrupts disabled. It must
+ * have just failed to try_to_take_rt_mutex().
*/
-int __sched rt_mutex_trylock(struct rt_mutex *lock)
+static void __sched remove_waiter(struct rt_mutex_base *lock,
+ struct rt_mutex_waiter *waiter)
{
- int ret;
+ bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
+ struct task_struct *owner = rt_mutex_owner(lock);
+ struct rt_mutex_base *next_lock;
- if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task()))
- return 0;
+ lockdep_assert_held(&lock->wait_lock);
+
+ raw_spin_lock(&current->pi_lock);
+ rt_mutex_dequeue(lock, waiter);
+ current->pi_blocked_on = NULL;
+ raw_spin_unlock(&current->pi_lock);
/*
- * No lockdep annotation required because lockdep disables the fast
- * path.
+ * Only update priority if the waiter was the highest priority
+ * waiter of the lock and there is an owner to update.
*/
- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
- return 1;
-
- ret = rt_mutex_slowtrylock(lock);
- if (ret)
- mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(rt_mutex_trylock);
-
-/**
- * rt_mutex_unlock - unlock a rt_mutex
- *
- * @lock: the rt_mutex to be unlocked
- */
-void __sched rt_mutex_unlock(struct rt_mutex *lock)
-{
- mutex_release(&lock->dep_map, _RET_IP_);
- if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
+ if (!owner || !is_top_waiter)
return;
- rt_mutex_slowunlock(lock);
-}
-EXPORT_SYMBOL_GPL(rt_mutex_unlock);
+ raw_spin_lock(&owner->pi_lock);
-/*
- * Futex variants, must not use fastpath.
- */
-int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
-{
- return rt_mutex_slowtrylock(lock);
-}
+ rt_mutex_dequeue_pi(owner, waiter);
-int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock)
-{
- return __rt_mutex_slowtrylock(lock);
-}
+ if (rt_mutex_has_waiters(lock))
+ rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
-/**
- * __rt_mutex_futex_unlock - Futex variant, that since futex variants
- * do not use the fast-path, can be simple and will not need to retry.
- *
- * @lock: The rt_mutex to be unlocked
- * @wake_q: The wake queue head from which to get the next lock waiter
- */
-bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
- struct wake_q_head *wake_q)
-{
- lockdep_assert_held(&lock->wait_lock);
+ rt_mutex_adjust_prio(owner);
- debug_rt_mutex_unlock(lock);
+ /* Store the lock on which owner is blocked or NULL */
+ next_lock = task_blocked_on_lock(owner);
- if (!rt_mutex_has_waiters(lock)) {
- lock->owner = NULL;
- return false; /* done */
- }
+ raw_spin_unlock(&owner->pi_lock);
/*
- * We've already deboosted, mark_wakeup_next_waiter() will
- * retain preempt_disabled when we drop the wait_lock, to
- * avoid inversion prior to the wakeup. preempt_disable()
- * therein pairs with rt_mutex_postunlock().
+ * Don't walk the chain, if the owner task is not blocked
+ * itself.
*/
- mark_wakeup_next_waiter(wake_q, lock);
+ if (!next_lock)
+ return;
- return true; /* call postunlock() */
-}
+ /* gets dropped in rt_mutex_adjust_prio_chain()! */
+ get_task_struct(owner);
-void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
-{
- DEFINE_WAKE_Q(wake_q);
- unsigned long flags;
- bool postunlock;
+ raw_spin_unlock_irq(&lock->wait_lock);
- raw_spin_lock_irqsave(&lock->wait_lock, flags);
- postunlock = __rt_mutex_futex_unlock(lock, &wake_q);
- raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
+ next_lock, NULL, current);
- if (postunlock)
- rt_mutex_postunlock(&wake_q);
+ raw_spin_lock_irq(&lock->wait_lock);
}
/**
- * __rt_mutex_init - initialize the rt_mutex
- *
- * @lock: The rt_mutex to be initialized
- * @name: The lock name used for debugging
- * @key: The lock class key used for debugging
- *
- * Initialize the rt_mutex to unlocked state.
+ * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop
+ * @lock: the rt_mutex to take
+ * @ww_ctx: WW mutex context pointer
+ * @state: the state the task should block in (TASK_INTERRUPTIBLE
+ * or TASK_UNINTERRUPTIBLE)
+ * @timeout: the pre-initialized and started timer, or NULL for none
+ * @waiter: the pre-initialized rt_mutex_waiter
*
- * Initializing of a locked rt_mutex is not allowed
+ * Must be called with lock->wait_lock held and interrupts disabled
*/
-void __sched __rt_mutex_init(struct rt_mutex *lock, const char *name,
- struct lock_class_key *key)
+static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
+ struct ww_acquire_ctx *ww_ctx,
+ unsigned int state,
+ struct hrtimer_sleeper *timeout,
+ struct rt_mutex_waiter *waiter)
{
- debug_check_no_locks_freed((void *)lock, sizeof(*lock));
- lockdep_init_map(&lock->dep_map, name, key, 0);
+ struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
+ struct task_struct *owner;
+ int ret = 0;
- __rt_mutex_basic_init(lock);
-}
-EXPORT_SYMBOL_GPL(__rt_mutex_init);
+ for (;;) {
+ /* Try to acquire the lock: */
+ if (try_to_take_rt_mutex(lock, current, waiter))
+ break;
-/**
- * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
- * proxy owner
- *
- * @lock: the rt_mutex to be locked
- * @proxy_owner:the task to set as owner
- *
- * No locking. Caller has to do serializing itself
- *
- * Special API call for PI-futex support. This initializes the rtmutex and
- * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not
- * possible at this point because the pi_state which contains the rtmutex
- * is not yet visible to other tasks.
- */
-void __sched rt_mutex_init_proxy_locked(struct rt_mutex *lock,
- struct task_struct *proxy_owner)
-{
- __rt_mutex_basic_init(lock);
- rt_mutex_set_owner(lock, proxy_owner);
+ if (timeout && !timeout->task) {
+ ret = -ETIMEDOUT;
+ break;
+ }
+ if (signal_pending_state(state, current)) {
+ ret = -EINTR;
+ break;
+ }
+
+ if (build_ww_mutex() && ww_ctx) {
+ ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx);
+ if (ret)
+ break;
+ }
+
+ if (waiter == rt_mutex_top_waiter(lock))
+ owner = rt_mutex_owner(lock);
+ else
+ owner = NULL;
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+ if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner))
+ schedule();
+
+ raw_spin_lock_irq(&lock->wait_lock);
+ set_current_state(state);
+ }
+
+ __set_current_state(TASK_RUNNING);
+ return ret;
}
-/**
- * rt_mutex_proxy_unlock - release a lock on behalf of owner
- *
- * @lock: the rt_mutex to be locked
- *
- * No locking. Caller has to do serializing itself
- *
- * Special API call for PI-futex support. This merrily cleans up the rtmutex
- * (debugging) state. Concurrent operations on this rt_mutex are not
- * possible because it belongs to the pi_state which is about to be freed
- * and it is not longer visible to other tasks.
- */
-void __sched rt_mutex_proxy_unlock(struct rt_mutex *lock)
+static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
+ struct rt_mutex_waiter *w)
{
- debug_rt_mutex_proxy_unlock(lock);
- rt_mutex_set_owner(lock, NULL);
+ /*
+ * If the result is not -EDEADLOCK or the caller requested
+ * deadlock detection, nothing to do here.
+ */
+ if (res != -EDEADLOCK || detect_deadlock)
+ return;
+
+ if (build_ww_mutex() && w->ww_ctx)
+ return;
+
+ /*
+ * Yell loudly and stop the task right here.
+ */
+ WARN(1, "rtmutex deadlock detected\n");
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ }
}
/**
- * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task
- * @lock: the rt_mutex to take
- * @waiter: the pre-initialized rt_mutex_waiter
- * @task: the task to prepare
- *
- * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
- * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
- *
- * NOTE: does _NOT_ remove the @waiter on failure; must either call
- * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this.
- *
- * Returns:
- * 0 - task blocked on lock
- * 1 - acquired the lock for task, caller should wake it up
- * <0 - error
- *
- * Special API call for PI-futex support.
+ * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held
+ * @lock: The rtmutex to block lock
+ * @ww_ctx: WW mutex context pointer
+ * @state: The task state for sleeping
+ * @chwalk: Indicator whether full or partial chainwalk is requested
+ * @waiter: Initializer waiter for blocking
*/
-int __sched __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
- struct rt_mutex_waiter *waiter,
- struct task_struct *task)
+static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
+ struct ww_acquire_ctx *ww_ctx,
+ unsigned int state,
+ enum rtmutex_chainwalk chwalk,
+ struct rt_mutex_waiter *waiter)
{
+ struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
+ struct ww_mutex *ww = ww_container_of(rtm);
int ret;
lockdep_assert_held(&lock->wait_lock);
- if (try_to_take_rt_mutex(lock, task, NULL))
- return 1;
+ /* Try to acquire the lock again: */
+ if (try_to_take_rt_mutex(lock, current, NULL)) {
+ if (build_ww_mutex() && ww_ctx) {
+ __ww_mutex_check_waiters(rtm, ww_ctx);
+ ww_mutex_lock_acquired(ww, ww_ctx);
+ }
+ return 0;
+ }
- /* We enforce deadlock detection for futexes */
- ret = task_blocks_on_rt_mutex(lock, waiter, task,
- RT_MUTEX_FULL_CHAINWALK);
+ set_current_state(state);
- if (ret && !rt_mutex_owner(lock)) {
- /*
- * Reset the return value. We might have
- * returned with -EDEADLK and the owner
- * released the lock while we were walking the
- * pi chain. Let the waiter sort it out.
- */
- ret = 0;
+ ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
+ if (likely(!ret))
+ ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
+
+ if (likely(!ret)) {
+ /* acquired the lock */
+ if (build_ww_mutex() && ww_ctx) {
+ if (!ww_ctx->is_wait_die)
+ __ww_mutex_check_waiters(rtm, ww_ctx);
+ ww_mutex_lock_acquired(ww, ww_ctx);
+ }
+ } else {
+ __set_current_state(TASK_RUNNING);
+ remove_waiter(lock, waiter);
+ rt_mutex_handle_deadlock(ret, chwalk, waiter);
}
+ /*
+ * try_to_take_rt_mutex() sets the waiter bit
+ * unconditionally. We might have to fix that up.
+ */
+ fixup_rt_mutex_waiters(lock);
return ret;
}
-/**
- * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
- * @lock: the rt_mutex to take
- * @waiter: the pre-initialized rt_mutex_waiter
- * @task: the task to prepare
- *
- * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
- * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
- *
- * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter
- * on failure.
- *
- * Returns:
- * 0 - task blocked on lock
- * 1 - acquired the lock for task, caller should wake it up
- * <0 - error
- *
- * Special API call for PI-futex support.
- */
-int __sched rt_mutex_start_proxy_lock(struct rt_mutex *lock,
- struct rt_mutex_waiter *waiter,
- struct task_struct *task)
+static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
+ struct ww_acquire_ctx *ww_ctx,
+ unsigned int state)
{
+ struct rt_mutex_waiter waiter;
int ret;
- raw_spin_lock_irq(&lock->wait_lock);
- ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
- if (unlikely(ret))
- remove_waiter(lock, waiter);
- raw_spin_unlock_irq(&lock->wait_lock);
+ rt_mutex_init_waiter(&waiter);
+ waiter.ww_ctx = ww_ctx;
+ ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
+ &waiter);
+
+ debug_rt_mutex_free_waiter(&waiter);
return ret;
}
-/**
- * rt_mutex_wait_proxy_lock() - Wait for lock acquisition
- * @lock: the rt_mutex we were woken on
- * @to: the timeout, null if none. hrtimer should already have
- * been started.
- * @waiter: the pre-initialized rt_mutex_waiter
- *
- * Wait for the lock acquisition started on our behalf by
- * rt_mutex_start_proxy_lock(). Upon failure, the caller must call
- * rt_mutex_cleanup_proxy_lock().
- *
- * Returns:
- * 0 - success
- * <0 - error, one of -EINTR, -ETIMEDOUT
- *
- * Special API call for PI-futex support
+/*
+ * rt_mutex_slowlock - Locking slowpath invoked when fast path fails
+ * @lock: The rtmutex to block lock
+ * @ww_ctx: WW mutex context pointer
+ * @state: The task state for sleeping
*/
-int __sched rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
- struct hrtimer_sleeper *to,
- struct rt_mutex_waiter *waiter)
+static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
+ struct ww_acquire_ctx *ww_ctx,
+ unsigned int state)
{
+ unsigned long flags;
int ret;
- raw_spin_lock_irq(&lock->wait_lock);
- /* sleep on the mutex */
- set_current_state(TASK_INTERRUPTIBLE);
- ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
/*
- * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
- * have to fix that up.
+ * Technically we could use raw_spin_[un]lock_irq() here, but this can
+ * be called in early boot if the cmpxchg() fast path is disabled
+ * (debug, no architecture support). In this case we will acquire the
+ * rtmutex with lock->wait_lock held. But we cannot unconditionally
+ * enable interrupts in that early boot case. So we need to use the
+ * irqsave/restore variants.
*/
- fixup_rt_mutex_waiters(lock);
- raw_spin_unlock_irq(&lock->wait_lock);
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+ ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
return ret;
}
+static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
+ unsigned int state)
+{
+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
+ return 0;
+
+ return rt_mutex_slowlock(lock, NULL, state);
+}
+#endif /* RT_MUTEX_BUILD_MUTEX */
+
+#ifdef RT_MUTEX_BUILD_SPINLOCKS
+/*
+ * Functions required for spin/rw_lock substitution on RT kernels
+ */
+
/**
- * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition
- * @lock: the rt_mutex we were woken on
- * @waiter: the pre-initialized rt_mutex_waiter
- *
- * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or
- * rt_mutex_wait_proxy_lock().
- *
- * Unless we acquired the lock; we're still enqueued on the wait-list and can
- * in fact still be granted ownership until we're removed. Therefore we can
- * find we are in fact the owner and must disregard the
- * rt_mutex_wait_proxy_lock() failure.
- *
- * Returns:
- * true - did the cleanup, we done.
- * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned,
- * caller should disregards its return value.
- *
- * Special API call for PI-futex support
+ * rtlock_slowlock_locked - Slow path lock acquisition for RT locks
+ * @lock: The underlying RT mutex
*/
-bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
- struct rt_mutex_waiter *waiter)
+static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
{
- bool cleanup = false;
+ struct rt_mutex_waiter waiter;
+ struct task_struct *owner;
- raw_spin_lock_irq(&lock->wait_lock);
- /*
- * Do an unconditional try-lock, this deals with the lock stealing
- * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter()
- * sets a NULL owner.
- *
- * We're not interested in the return value, because the subsequent
- * test on rt_mutex_owner() will infer that. If the trylock succeeded,
- * we will own the lock and it will have removed the waiter. If we
- * failed the trylock, we're still not owner and we need to remove
- * ourselves.
- */
- try_to_take_rt_mutex(lock, current, waiter);
- /*
- * Unless we're the owner; we're still enqueued on the wait_list.
- * So check if we became owner, if not, take us off the wait_list.
- */
- if (rt_mutex_owner(lock) != current) {
- remove_waiter(lock, waiter);
- cleanup = true;
+ lockdep_assert_held(&lock->wait_lock);
+
+ if (try_to_take_rt_mutex(lock, current, NULL))
+ return;
+
+ rt_mutex_init_rtlock_waiter(&waiter);
+
+ /* Save current state and set state to TASK_RTLOCK_WAIT */
+ current_save_and_set_rtlock_wait_state();
+
+ task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
+
+ for (;;) {
+ /* Try to acquire the lock again */
+ if (try_to_take_rt_mutex(lock, current, &waiter))
+ break;
+
+ if (&waiter == rt_mutex_top_waiter(lock))
+ owner = rt_mutex_owner(lock);
+ else
+ owner = NULL;
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+ if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner))
+ schedule_rtlock();
+
+ raw_spin_lock_irq(&lock->wait_lock);
+ set_current_state(TASK_RTLOCK_WAIT);
}
+
+ /* Restore the task state */
+ current_restore_rtlock_saved_state();
+
/*
- * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
- * have to fix that up.
+ * try_to_take_rt_mutex() sets the waiter bit unconditionally.
+ * We might have to fix that up:
*/
fixup_rt_mutex_waiters(lock);
-
- raw_spin_unlock_irq(&lock->wait_lock);
-
- return cleanup;
+ debug_rt_mutex_free_waiter(&waiter);
}
-#ifdef CONFIG_DEBUG_RT_MUTEXES
-void rt_mutex_debug_task_free(struct task_struct *task)
+static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
{
- DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root));
- DEBUG_LOCKS_WARN_ON(task->pi_blocked_on);
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+ rtlock_slowlock_locked(lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
}
-#endif
+
+#endif /* RT_MUTEX_BUILD_SPINLOCKS */
diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
new file mode 100644
index 000000000000..5c9299aaabae
--- /dev/null
+++ b/kernel/locking/rtmutex_api.c
@@ -0,0 +1,590 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * rtmutex API
+ */
+#include <linux/spinlock.h>
+#include <linux/export.h>
+
+#define RT_MUTEX_BUILD_MUTEX
+#include "rtmutex.c"
+
+/*
+ * Max number of times we'll walk the boosting chain:
+ */
+int max_lock_depth = 1024;
+
+/*
+ * Debug aware fast / slowpath lock,trylock,unlock
+ *
+ * The atomic acquire/release ops are compiled away, when either the
+ * architecture does not support cmpxchg or when debugging is enabled.
+ */
+static __always_inline int __rt_mutex_lock_common(struct rt_mutex *lock,
+ unsigned int state,
+ unsigned int subclass)
+{
+ int ret;
+
+ might_sleep();
+ mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+ ret = __rt_mutex_lock(&lock->rtmutex, state);
+ if (ret)
+ mutex_release(&lock->dep_map, _RET_IP_);
+ return ret;
+}
+
+void rt_mutex_base_init(struct rt_mutex_base *rtb)
+{
+ __rt_mutex_base_init(rtb);
+}
+EXPORT_SYMBOL(rt_mutex_base_init);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+/**
+ * rt_mutex_lock_nested - lock a rt_mutex
+ *
+ * @lock: the rt_mutex to be locked
+ * @subclass: the lockdep subclass
+ */
+void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass)
+{
+ __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
+}
+EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
+
+#else /* !CONFIG_DEBUG_LOCK_ALLOC */
+
+/**
+ * rt_mutex_lock - lock a rt_mutex
+ *
+ * @lock: the rt_mutex to be locked
+ */
+void __sched rt_mutex_lock(struct rt_mutex *lock)
+{
+ __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
+}
+EXPORT_SYMBOL_GPL(rt_mutex_lock);
+#endif
+
+/**
+ * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
+ *
+ * @lock: the rt_mutex to be locked
+ *
+ * Returns:
+ * 0 on success
+ * -EINTR when interrupted by a signal
+ */
+int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
+{
+ return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);
+}
+EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
+
+/**
+ * rt_mutex_trylock - try to lock a rt_mutex
+ *
+ * @lock: the rt_mutex to be locked
+ *
+ * This function can only be called in thread context. It's safe to call it
+ * from atomic regions, but not from hard or soft interrupt context.
+ *
+ * Returns:
+ * 1 on success
+ * 0 on contention
+ */
+int __sched rt_mutex_trylock(struct rt_mutex *lock)
+{
+ int ret;
+
+ if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task()))
+ return 0;
+
+ ret = __rt_mutex_trylock(&lock->rtmutex);
+ if (ret)
+ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(rt_mutex_trylock);
+
+/**
+ * rt_mutex_unlock - unlock a rt_mutex
+ *
+ * @lock: the rt_mutex to be unlocked
+ */
+void __sched rt_mutex_unlock(struct rt_mutex *lock)
+{
+ mutex_release(&lock->dep_map, _RET_IP_);
+ __rt_mutex_unlock(&lock->rtmutex);
+}
+EXPORT_SYMBOL_GPL(rt_mutex_unlock);
+
+/*
+ * Futex variants, must not use fastpath.
+ */
+int __sched rt_mutex_futex_trylock(struct rt_mutex_base *lock)
+{
+ return rt_mutex_slowtrylock(lock);
+}
+
+int __sched __rt_mutex_futex_trylock(struct rt_mutex_base *lock)
+{
+ return __rt_mutex_slowtrylock(lock);
+}
+
+/**
+ * __rt_mutex_futex_unlock - Futex variant, that since futex variants
+ * do not use the fast-path, can be simple and will not need to retry.
+ *
+ * @lock: The rt_mutex to be unlocked
+ * @wqh: The wake queue head from which to get the next lock waiter
+ */
+bool __sched __rt_mutex_futex_unlock(struct rt_mutex_base *lock,
+ struct rt_wake_q_head *wqh)
+{
+ lockdep_assert_held(&lock->wait_lock);
+
+ debug_rt_mutex_unlock(lock);
+
+ if (!rt_mutex_has_waiters(lock)) {
+ lock->owner = NULL;
+ return false; /* done */
+ }
+
+ /*
+ * We've already deboosted, mark_wakeup_next_waiter() will
+ * retain preempt_disabled when we drop the wait_lock, to
+ * avoid inversion prior to the wakeup. preempt_disable()
+ * therein pairs with rt_mutex_postunlock().
+ */
+ mark_wakeup_next_waiter(wqh, lock);
+
+ return true; /* call postunlock() */
+}
+
+void __sched rt_mutex_futex_unlock(struct rt_mutex_base *lock)
+{
+ DEFINE_RT_WAKE_Q(wqh);
+ unsigned long flags;
+ bool postunlock;
+
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+ postunlock = __rt_mutex_futex_unlock(lock, &wqh);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+
+ if (postunlock)
+ rt_mutex_postunlock(&wqh);
+}
+
+/**
+ * __rt_mutex_init - initialize the rt_mutex
+ *
+ * @lock: The rt_mutex to be initialized
+ * @name: The lock name used for debugging
+ * @key: The lock class key used for debugging
+ *
+ * Initialize the rt_mutex to unlocked state.
+ *
+ * Initializing of a locked rt_mutex is not allowed
+ */
+void __sched __rt_mutex_init(struct rt_mutex *lock, const char *name,
+ struct lock_class_key *key)
+{
+ debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+ __rt_mutex_base_init(&lock->rtmutex);
+ lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP);
+}
+EXPORT_SYMBOL_GPL(__rt_mutex_init);
+
+/**
+ * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
+ * proxy owner
+ *
+ * @lock: the rt_mutex to be locked
+ * @proxy_owner:the task to set as owner
+ *
+ * No locking. Caller has to do serializing itself
+ *
+ * Special API call for PI-futex support. This initializes the rtmutex and
+ * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not
+ * possible at this point because the pi_state which contains the rtmutex
+ * is not yet visible to other tasks.
+ */
+void __sched rt_mutex_init_proxy_locked(struct rt_mutex_base *lock,
+ struct task_struct *proxy_owner)
+{
+ static struct lock_class_key pi_futex_key;
+
+ __rt_mutex_base_init(lock);
+ /*
+ * On PREEMPT_RT the futex hashbucket spinlock becomes 'sleeping'
+ * and rtmutex based. That causes a lockdep false positive, because
+ * some of the futex functions invoke spin_unlock(&hb->lock) with
+ * the wait_lock of the rtmutex associated to the pi_futex held.
+ * spin_unlock() in turn takes wait_lock of the rtmutex on which
+ * the spinlock is based, which makes lockdep notice a lock
+ * recursion. Give the futex/rtmutex wait_lock a separate key.
+ */
+ lockdep_set_class(&lock->wait_lock, &pi_futex_key);
+ rt_mutex_set_owner(lock, proxy_owner);
+}
+
+/**
+ * rt_mutex_proxy_unlock - release a lock on behalf of owner
+ *
+ * @lock: the rt_mutex to be locked
+ *
+ * No locking. Caller has to do serializing itself
+ *
+ * Special API call for PI-futex support. This just cleans up the rtmutex
+ * (debugging) state. Concurrent operations on this rt_mutex are not
+ * possible because it belongs to the pi_state which is about to be freed
+ * and it is not longer visible to other tasks.
+ */
+void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock)
+{
+ debug_rt_mutex_proxy_unlock(lock);
+ rt_mutex_set_owner(lock, NULL);
+}
+
+/**
+ * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task
+ * @lock: the rt_mutex to take
+ * @waiter: the pre-initialized rt_mutex_waiter
+ * @task: the task to prepare
+ *
+ * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
+ * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
+ *
+ * NOTE: does _NOT_ remove the @waiter on failure; must either call
+ * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this.
+ *
+ * Returns:
+ * 0 - task blocked on lock
+ * 1 - acquired the lock for task, caller should wake it up
+ * <0 - error
+ *
+ * Special API call for PI-futex support.
+ */
+int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *task)
+{
+ int ret;
+
+ lockdep_assert_held(&lock->wait_lock);
+
+ if (try_to_take_rt_mutex(lock, task, NULL))
+ return 1;
+
+ /* We enforce deadlock detection for futexes */
+ ret = task_blocks_on_rt_mutex(lock, waiter, task, NULL,
+ RT_MUTEX_FULL_CHAINWALK);
+
+ if (ret && !rt_mutex_owner(lock)) {
+ /*
+ * Reset the return value. We might have
+ * returned with -EDEADLK and the owner
+ * released the lock while we were walking the
+ * pi chain. Let the waiter sort it out.
+ */
+ ret = 0;
+ }
+
+ return ret;
+}
+
+/**
+ * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
+ * @lock: the rt_mutex to take
+ * @waiter: the pre-initialized rt_mutex_waiter
+ * @task: the task to prepare
+ *
+ * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
+ * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
+ *
+ * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter
+ * on failure.
+ *
+ * Returns:
+ * 0 - task blocked on lock
+ * 1 - acquired the lock for task, caller should wake it up
+ * <0 - error
+ *
+ * Special API call for PI-futex support.
+ */
+int __sched rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *task)
+{
+ int ret;
+
+ raw_spin_lock_irq(&lock->wait_lock);
+ ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
+ if (unlikely(ret))
+ remove_waiter(lock, waiter);
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+ return ret;
+}
+
+/**
+ * rt_mutex_wait_proxy_lock() - Wait for lock acquisition
+ * @lock: the rt_mutex we were woken on
+ * @to: the timeout, null if none. hrtimer should already have
+ * been started.
+ * @waiter: the pre-initialized rt_mutex_waiter
+ *
+ * Wait for the lock acquisition started on our behalf by
+ * rt_mutex_start_proxy_lock(). Upon failure, the caller must call
+ * rt_mutex_cleanup_proxy_lock().
+ *
+ * Returns:
+ * 0 - success
+ * <0 - error, one of -EINTR, -ETIMEDOUT
+ *
+ * Special API call for PI-futex support
+ */
+int __sched rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock,
+ struct hrtimer_sleeper *to,
+ struct rt_mutex_waiter *waiter)
+{
+ int ret;
+
+ raw_spin_lock_irq(&lock->wait_lock);
+ /* sleep on the mutex */
+ set_current_state(TASK_INTERRUPTIBLE);
+ ret = rt_mutex_slowlock_block(lock, NULL, TASK_INTERRUPTIBLE, to, waiter);
+ /*
+ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+ * have to fix that up.
+ */
+ fixup_rt_mutex_waiters(lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+ return ret;
+}
+
+/**
+ * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition
+ * @lock: the rt_mutex we were woken on
+ * @waiter: the pre-initialized rt_mutex_waiter
+ *
+ * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or
+ * rt_mutex_wait_proxy_lock().
+ *
+ * Unless we acquired the lock; we're still enqueued on the wait-list and can
+ * in fact still be granted ownership until we're removed. Therefore we can
+ * find we are in fact the owner and must disregard the
+ * rt_mutex_wait_proxy_lock() failure.
+ *
+ * Returns:
+ * true - did the cleanup, we done.
+ * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned,
+ * caller should disregards its return value.
+ *
+ * Special API call for PI-futex support
+ */
+bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock,
+ struct rt_mutex_waiter *waiter)
+{
+ bool cleanup = false;
+
+ raw_spin_lock_irq(&lock->wait_lock);
+ /*
+ * Do an unconditional try-lock, this deals with the lock stealing
+ * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter()
+ * sets a NULL owner.
+ *
+ * We're not interested in the return value, because the subsequent
+ * test on rt_mutex_owner() will infer that. If the trylock succeeded,
+ * we will own the lock and it will have removed the waiter. If we
+ * failed the trylock, we're still not owner and we need to remove
+ * ourselves.
+ */
+ try_to_take_rt_mutex(lock, current, waiter);
+ /*
+ * Unless we're the owner; we're still enqueued on the wait_list.
+ * So check if we became owner, if not, take us off the wait_list.
+ */
+ if (rt_mutex_owner(lock) != current) {
+ remove_waiter(lock, waiter);
+ cleanup = true;
+ }
+ /*
+ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+ * have to fix that up.
+ */
+ fixup_rt_mutex_waiters(lock);
+
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+ return cleanup;
+}
+
+/*
+ * Recheck the pi chain, in case we got a priority setting
+ *
+ * Called from sched_setscheduler
+ */
+void __sched rt_mutex_adjust_pi(struct task_struct *task)
+{
+ struct rt_mutex_waiter *waiter;
+ struct rt_mutex_base *next_lock;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
+
+ waiter = task->pi_blocked_on;
+ if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ return;
+ }
+ next_lock = waiter->lock;
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+
+ /* gets dropped in rt_mutex_adjust_prio_chain()! */
+ get_task_struct(task);
+
+ rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
+ next_lock, NULL, task);
+}
+
+/*
+ * Performs the wakeup of the top-waiter and re-enables preemption.
+ */
+void __sched rt_mutex_postunlock(struct rt_wake_q_head *wqh)
+{
+ rt_mutex_wake_up_q(wqh);
+}
+
+#ifdef CONFIG_DEBUG_RT_MUTEXES
+void rt_mutex_debug_task_free(struct task_struct *task)
+{
+ DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root));
+ DEBUG_LOCKS_WARN_ON(task->pi_blocked_on);
+}
+#endif
+
+#ifdef CONFIG_PREEMPT_RT
+/* Mutexes */
+void __mutex_rt_init(struct mutex *mutex, const char *name,
+ struct lock_class_key *key)
+{
+ debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
+ lockdep_init_map_wait(&mutex->dep_map, name, key, 0, LD_WAIT_SLEEP);
+}
+EXPORT_SYMBOL(__mutex_rt_init);
+
+static __always_inline int __mutex_lock_common(struct mutex *lock,
+ unsigned int state,
+ unsigned int subclass,
+ struct lockdep_map *nest_lock,
+ unsigned long ip)
+{
+ int ret;
+
+ might_sleep();
+ mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
+ ret = __rt_mutex_lock(&lock->rtmutex, state);
+ if (ret)
+ mutex_release(&lock->dep_map, ip);
+ else
+ lock_acquired(&lock->dep_map, ip);
+ return ret;
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass)
+{
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
+}
+EXPORT_SYMBOL_GPL(mutex_lock_nested);
+
+void __sched _mutex_lock_nest_lock(struct mutex *lock,
+ struct lockdep_map *nest_lock)
+{
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest_lock, _RET_IP_);
+}
+EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
+
+int __sched mutex_lock_interruptible_nested(struct mutex *lock,
+ unsigned int subclass)
+{
+ return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, NULL, _RET_IP_);
+}
+EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
+
+int __sched mutex_lock_killable_nested(struct mutex *lock,
+ unsigned int subclass)
+{
+ return __mutex_lock_common(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_);
+}
+EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
+
+void __sched mutex_lock_io_nested(struct mutex *lock, unsigned int subclass)
+{
+ int token;
+
+ might_sleep();
+
+ token = io_schedule_prepare();
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
+ io_schedule_finish(token);
+}
+EXPORT_SYMBOL_GPL(mutex_lock_io_nested);
+
+#else /* CONFIG_DEBUG_LOCK_ALLOC */
+
+void __sched mutex_lock(struct mutex *lock)
+{
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
+}
+EXPORT_SYMBOL(mutex_lock);
+
+int __sched mutex_lock_interruptible(struct mutex *lock)
+{
+ return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_);
+}
+EXPORT_SYMBOL(mutex_lock_interruptible);
+
+int __sched mutex_lock_killable(struct mutex *lock)
+{
+ return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_);
+}
+EXPORT_SYMBOL(mutex_lock_killable);
+
+void __sched mutex_lock_io(struct mutex *lock)
+{
+ int token = io_schedule_prepare();
+
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
+ io_schedule_finish(token);
+}
+EXPORT_SYMBOL(mutex_lock_io);
+#endif /* !CONFIG_DEBUG_LOCK_ALLOC */
+
+int __sched mutex_trylock(struct mutex *lock)
+{
+ int ret;
+
+ if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task()))
+ return 0;
+
+ ret = __rt_mutex_trylock(&lock->rtmutex);
+ if (ret)
+ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+
+ return ret;
+}
+EXPORT_SYMBOL(mutex_trylock);
+
+void __sched mutex_unlock(struct mutex *lock)
+{
+ mutex_release(&lock->dep_map, _RET_IP_);
+ __rt_mutex_unlock(&lock->rtmutex);
+}
+EXPORT_SYMBOL(mutex_unlock);
+
+#endif /* CONFIG_PREEMPT_RT */
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index a90c22abdbca..c47e8361bfb5 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -25,29 +25,90 @@
* @pi_tree_entry: pi node to enqueue into the mutex owner waiters tree
* @task: task reference to the blocked task
* @lock: Pointer to the rt_mutex on which the waiter blocks
+ * @wake_state: Wakeup state to use (TASK_NORMAL or TASK_RTLOCK_WAIT)
* @prio: Priority of the waiter
* @deadline: Deadline of the waiter if applicable
+ * @ww_ctx: WW context pointer
*/
struct rt_mutex_waiter {
struct rb_node tree_entry;
struct rb_node pi_tree_entry;
struct task_struct *task;
- struct rt_mutex *lock;
+ struct rt_mutex_base *lock;
+ unsigned int wake_state;
int prio;
u64 deadline;
+ struct ww_acquire_ctx *ww_ctx;
};
+/**
+ * rt_wake_q_head - Wrapper around regular wake_q_head to support
+ * "sleeping" spinlocks on RT
+ * @head: The regular wake_q_head for sleeping lock variants
+ * @rtlock_task: Task pointer for RT lock (spin/rwlock) wakeups
+ */
+struct rt_wake_q_head {
+ struct wake_q_head head;
+ struct task_struct *rtlock_task;
+};
+
+#define DEFINE_RT_WAKE_Q(name) \
+ struct rt_wake_q_head name = { \
+ .head = WAKE_Q_HEAD_INITIALIZER(name.head), \
+ .rtlock_task = NULL, \
+ }
+
+/*
+ * PI-futex support (proxy locking functions, etc.):
+ */
+extern void rt_mutex_init_proxy_locked(struct rt_mutex_base *lock,
+ struct task_struct *proxy_owner);
+extern void rt_mutex_proxy_unlock(struct rt_mutex_base *lock);
+extern int __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *task);
+extern int rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *task);
+extern int rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock,
+ struct hrtimer_sleeper *to,
+ struct rt_mutex_waiter *waiter);
+extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock,
+ struct rt_mutex_waiter *waiter);
+
+extern int rt_mutex_futex_trylock(struct rt_mutex_base *l);
+extern int __rt_mutex_futex_trylock(struct rt_mutex_base *l);
+
+extern void rt_mutex_futex_unlock(struct rt_mutex_base *lock);
+extern bool __rt_mutex_futex_unlock(struct rt_mutex_base *lock,
+ struct rt_wake_q_head *wqh);
+
+extern void rt_mutex_postunlock(struct rt_wake_q_head *wqh);
+
/*
* Must be guarded because this header is included from rcu/tree_plugin.h
* unconditionally.
*/
#ifdef CONFIG_RT_MUTEXES
-static inline int rt_mutex_has_waiters(struct rt_mutex *lock)
+static inline int rt_mutex_has_waiters(struct rt_mutex_base *lock)
{
return !RB_EMPTY_ROOT(&lock->waiters.rb_root);
}
-static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex *lock)
+/*
+ * Lockless speculative check whether @waiter is still the top waiter on
+ * @lock. This is solely comparing pointers and not derefencing the
+ * leftmost entry which might be about to vanish.
+ */
+static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock,
+ struct rt_mutex_waiter *waiter)
+{
+ struct rb_node *leftmost = rb_first_cached(&lock->waiters);
+
+ return rb_entry(leftmost, struct rt_mutex_waiter, tree_entry) == waiter;
+}
+
+static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock)
{
struct rb_node *leftmost = rb_first_cached(&lock->waiters);
struct rt_mutex_waiter *w = NULL;
@@ -72,19 +133,12 @@ static inline struct rt_mutex_waiter *task_top_pi_waiter(struct task_struct *p)
#define RT_MUTEX_HAS_WAITERS 1UL
-static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
+static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock)
{
unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS);
}
-#else /* CONFIG_RT_MUTEXES */
-/* Used in rcu/tree_plugin.h */
-static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
-{
- return NULL;
-}
-#endif /* !CONFIG_RT_MUTEXES */
/*
* Constants for rt mutex functions which have a selectable deadlock
@@ -101,49 +155,21 @@ enum rtmutex_chainwalk {
RT_MUTEX_FULL_CHAINWALK,
};
-static inline void __rt_mutex_basic_init(struct rt_mutex *lock)
+static inline void __rt_mutex_base_init(struct rt_mutex_base *lock)
{
- lock->owner = NULL;
raw_spin_lock_init(&lock->wait_lock);
lock->waiters = RB_ROOT_CACHED;
+ lock->owner = NULL;
}
-/*
- * PI-futex support (proxy locking functions, etc.):
- */
-extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
- struct task_struct *proxy_owner);
-extern void rt_mutex_proxy_unlock(struct rt_mutex *lock);
-extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
-extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
- struct rt_mutex_waiter *waiter,
- struct task_struct *task);
-extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
- struct rt_mutex_waiter *waiter,
- struct task_struct *task);
-extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
- struct hrtimer_sleeper *to,
- struct rt_mutex_waiter *waiter);
-extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
- struct rt_mutex_waiter *waiter);
-
-extern int rt_mutex_futex_trylock(struct rt_mutex *l);
-extern int __rt_mutex_futex_trylock(struct rt_mutex *l);
-
-extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
-extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
- struct wake_q_head *wqh);
-
-extern void rt_mutex_postunlock(struct wake_q_head *wake_q);
-
/* Debug functions */
-static inline void debug_rt_mutex_unlock(struct rt_mutex *lock)
+static inline void debug_rt_mutex_unlock(struct rt_mutex_base *lock)
{
if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES))
DEBUG_LOCKS_WARN_ON(rt_mutex_owner(lock) != current);
}
-static inline void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
+static inline void debug_rt_mutex_proxy_unlock(struct rt_mutex_base *lock)
{
if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES))
DEBUG_LOCKS_WARN_ON(!rt_mutex_owner(lock));
@@ -161,4 +187,27 @@ static inline void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
memset(waiter, 0x22, sizeof(*waiter));
}
+static inline void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
+{
+ debug_rt_mutex_init_waiter(waiter);
+ RB_CLEAR_NODE(&waiter->pi_tree_entry);
+ RB_CLEAR_NODE(&waiter->tree_entry);
+ waiter->wake_state = TASK_NORMAL;
+ waiter->task = NULL;
+}
+
+static inline void rt_mutex_init_rtlock_waiter(struct rt_mutex_waiter *waiter)
+{
+ rt_mutex_init_waiter(waiter);
+ waiter->wake_state = TASK_RTLOCK_WAIT;
+}
+
+#else /* CONFIG_RT_MUTEXES */
+/* Used in rcu/tree_plugin.h */
+static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock)
+{
+ return NULL;
+}
+#endif /* !CONFIG_RT_MUTEXES */
+
#endif
diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
new file mode 100644
index 000000000000..4ba15088e640
--- /dev/null
+++ b/kernel/locking/rwbase_rt.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * RT-specific reader/writer semaphores and reader/writer locks
+ *
+ * down_write/write_lock()
+ * 1) Lock rtmutex
+ * 2) Remove the reader BIAS to force readers into the slow path
+ * 3) Wait until all readers have left the critical section
+ * 4) Mark it write locked
+ *
+ * up_write/write_unlock()
+ * 1) Remove the write locked marker
+ * 2) Set the reader BIAS, so readers can use the fast path again
+ * 3) Unlock rtmutex, to release blocked readers
+ *
+ * down_read/read_lock()
+ * 1) Try fast path acquisition (reader BIAS is set)
+ * 2) Take tmutex::wait_lock, which protects the writelocked flag
+ * 3) If !writelocked, acquire it for read
+ * 4) If writelocked, block on tmutex
+ * 5) unlock rtmutex, goto 1)
+ *
+ * up_read/read_unlock()
+ * 1) Try fast path release (reader count != 1)
+ * 2) Wake the writer waiting in down_write()/write_lock() #3
+ *
+ * down_read/read_lock()#3 has the consequence, that rw semaphores and rw
+ * locks on RT are not writer fair, but writers, which should be avoided in
+ * RT tasks (think mmap_sem), are subject to the rtmutex priority/DL
+ * inheritance mechanism.
+ *
+ * It's possible to make the rw primitives writer fair by keeping a list of
+ * active readers. A blocked writer would force all newly incoming readers
+ * to block on the rtmutex, but the rtmutex would have to be proxy locked
+ * for one reader after the other. We can't use multi-reader inheritance
+ * because there is no way to support that with SCHED_DEADLINE.
+ * Implementing the one by one reader boosting/handover mechanism is a
+ * major surgery for a very dubious value.
+ *
+ * The risk of writer starvation is there, but the pathological use cases
+ * which trigger it are not necessarily the typical RT workloads.
+ *
+ * Common code shared between RT rw_semaphore and rwlock
+ */
+
+static __always_inline int rwbase_read_trylock(struct rwbase_rt *rwb)
+{
+ int r;
+
+ /*
+ * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is
+ * set.
+ */
+ for (r = atomic_read(&rwb->readers); r < 0;) {
+ if (likely(atomic_try_cmpxchg(&rwb->readers, &r, r + 1)))
+ return 1;
+ }
+ return 0;
+}
+
+static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
+ unsigned int state)
+{
+ struct rt_mutex_base *rtm = &rwb->rtmutex;
+ int ret;
+
+ raw_spin_lock_irq(&rtm->wait_lock);
+ /*
+ * Allow readers, as long as the writer has not completely
+ * acquired the semaphore for write.
+ */
+ if (atomic_read(&rwb->readers) != WRITER_BIAS) {
+ atomic_inc(&rwb->readers);
+ raw_spin_unlock_irq(&rtm->wait_lock);
+ return 0;
+ }
+
+ /*
+ * Call into the slow lock path with the rtmutex->wait_lock
+ * held, so this can't result in the following race:
+ *
+ * Reader1 Reader2 Writer
+ * down_read()
+ * down_write()
+ * rtmutex_lock(m)
+ * wait()
+ * down_read()
+ * unlock(m->wait_lock)
+ * up_read()
+ * wake(Writer)
+ * lock(m->wait_lock)
+ * sem->writelocked=true
+ * unlock(m->wait_lock)
+ *
+ * up_write()
+ * sem->writelocked=false
+ * rtmutex_unlock(m)
+ * down_read()
+ * down_write()
+ * rtmutex_lock(m)
+ * wait()
+ * rtmutex_lock(m)
+ *
+ * That would put Reader1 behind the writer waiting on
+ * Reader2 to call up_read(), which might be unbound.
+ */
+
+ /*
+ * For rwlocks this returns 0 unconditionally, so the below
+ * !ret conditionals are optimized out.
+ */
+ ret = rwbase_rtmutex_slowlock_locked(rtm, state);
+
+ /*
+ * On success the rtmutex is held, so there can't be a writer
+ * active. Increment the reader count and immediately drop the
+ * rtmutex again.
+ *
+ * rtmutex->wait_lock has to be unlocked in any case of course.
+ */
+ if (!ret)
+ atomic_inc(&rwb->readers);
+ raw_spin_unlock_irq(&rtm->wait_lock);
+ if (!ret)
+ rwbase_rtmutex_unlock(rtm);
+ return ret;
+}
+
+static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb,
+ unsigned int state)
+{
+ if (rwbase_read_trylock(rwb))
+ return 0;
+
+ return __rwbase_read_lock(rwb, state);
+}
+
+static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb,
+ unsigned int state)
+{
+ struct rt_mutex_base *rtm = &rwb->rtmutex;
+ struct task_struct *owner;
+
+ raw_spin_lock_irq(&rtm->wait_lock);
+ /*
+ * Wake the writer, i.e. the rtmutex owner. It might release the
+ * rtmutex concurrently in the fast path (due to a signal), but to
+ * clean up rwb->readers it needs to acquire rtm->wait_lock. The
+ * worst case which can happen is a spurious wakeup.
+ */
+ owner = rt_mutex_owner(rtm);
+ if (owner)
+ wake_up_state(owner, state);
+
+ raw_spin_unlock_irq(&rtm->wait_lock);
+}
+
+static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb,
+ unsigned int state)
+{
+ /*
+ * rwb->readers can only hit 0 when a writer is waiting for the
+ * active readers to leave the critical section.
+ */
+ if (unlikely(atomic_dec_and_test(&rwb->readers)))
+ __rwbase_read_unlock(rwb, state);
+}
+
+static inline void __rwbase_write_unlock(struct rwbase_rt *rwb, int bias,
+ unsigned long flags)
+{
+ struct rt_mutex_base *rtm = &rwb->rtmutex;
+
+ atomic_add(READER_BIAS - bias, &rwb->readers);
+ raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
+ rwbase_rtmutex_unlock(rtm);
+}
+
+static inline void rwbase_write_unlock(struct rwbase_rt *rwb)
+{
+ struct rt_mutex_base *rtm = &rwb->rtmutex;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ __rwbase_write_unlock(rwb, WRITER_BIAS, flags);
+}
+
+static inline void rwbase_write_downgrade(struct rwbase_rt *rwb)
+{
+ struct rt_mutex_base *rtm = &rwb->rtmutex;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ /* Release it and account current as reader */
+ __rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags);
+}
+
+static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
+ unsigned int state)
+{
+ struct rt_mutex_base *rtm = &rwb->rtmutex;
+ unsigned long flags;
+
+ /* Take the rtmutex as a first step */
+ if (rwbase_rtmutex_lock_state(rtm, state))
+ return -EINTR;
+
+ /* Force readers into slow path */
+ atomic_sub(READER_BIAS, &rwb->readers);
+
+ raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ /*
+ * set_current_state() for rw_semaphore
+ * current_save_and_set_rtlock_wait_state() for rwlock
+ */
+ rwbase_set_and_save_current_state(state);
+
+ /* Block until all readers have left the critical section. */
+ for (; atomic_read(&rwb->readers);) {
+ /* Optimized out for rwlocks */
+ if (rwbase_signal_pending_state(state, current)) {
+ __set_current_state(TASK_RUNNING);
+ __rwbase_write_unlock(rwb, 0, flags);
+ return -EINTR;
+ }
+ raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
+
+ /*
+ * Schedule and wait for the readers to leave the critical
+ * section. The last reader leaving it wakes the waiter.
+ */
+ if (atomic_read(&rwb->readers) != 0)
+ rwbase_schedule();
+ set_current_state(state);
+ raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ }
+
+ atomic_set(&rwb->readers, WRITER_BIAS);
+ rwbase_restore_current_state();
+ raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
+ return 0;
+}
+
+static inline int rwbase_write_trylock(struct rwbase_rt *rwb)
+{
+ struct rt_mutex_base *rtm = &rwb->rtmutex;
+ unsigned long flags;
+
+ if (!rwbase_rtmutex_trylock(rtm))
+ return 0;
+
+ atomic_sub(READER_BIAS, &rwb->readers);
+
+ raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ if (!atomic_read(&rwb->readers)) {
+ atomic_set(&rwb->readers, WRITER_BIAS);
+ raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
+ return 1;
+ }
+ __rwbase_write_unlock(rwb, 0, flags);
+ return 0;
+}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 16bfbb10c74d..9215b4d6a9de 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -28,6 +28,7 @@
#include <linux/rwsem.h>
#include <linux/atomic.h>
+#ifndef CONFIG_PREEMPT_RT
#include "lock_events.h"
/*
@@ -1165,7 +1166,7 @@ out_nolock:
* handle waking up a waiter on the semaphore
* - up_read/up_write has decremented the active part of count if we come here
*/
-static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem, long count)
+static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
{
unsigned long flags;
DEFINE_WAKE_Q(wake_q);
@@ -1297,7 +1298,7 @@ static inline void __up_read(struct rw_semaphore *sem)
if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
RWSEM_FLAG_WAITERS)) {
clear_nonspinnable(sem);
- rwsem_wake(sem, tmp);
+ rwsem_wake(sem);
}
}
@@ -1319,7 +1320,7 @@ static inline void __up_write(struct rw_semaphore *sem)
rwsem_clear_owner(sem);
tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
if (unlikely(tmp & RWSEM_FLAG_WAITERS))
- rwsem_wake(sem, tmp);
+ rwsem_wake(sem);
}
/*
@@ -1344,6 +1345,114 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
rwsem_downgrade_wake(sem);
}
+#else /* !CONFIG_PREEMPT_RT */
+
+#define RT_MUTEX_BUILD_MUTEX
+#include "rtmutex.c"
+
+#define rwbase_set_and_save_current_state(state) \
+ set_current_state(state)
+
+#define rwbase_restore_current_state() \
+ __set_current_state(TASK_RUNNING)
+
+#define rwbase_rtmutex_lock_state(rtm, state) \
+ __rt_mutex_lock(rtm, state)
+
+#define rwbase_rtmutex_slowlock_locked(rtm, state) \
+ __rt_mutex_slowlock_locked(rtm, NULL, state)
+
+#define rwbase_rtmutex_unlock(rtm) \
+ __rt_mutex_unlock(rtm)
+
+#define rwbase_rtmutex_trylock(rtm) \
+ __rt_mutex_trylock(rtm)
+
+#define rwbase_signal_pending_state(state, current) \
+ signal_pending_state(state, current)
+
+#define rwbase_schedule() \
+ schedule()
+
+#include "rwbase_rt.c"
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void __rwsem_init(struct rw_semaphore *sem, const char *name,
+ struct lock_class_key *key)
+{
+ debug_check_no_locks_freed((void *)sem, sizeof(*sem));
+ lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
+}
+EXPORT_SYMBOL(__rwsem_init);
+#endif
+
+static inline void __down_read(struct rw_semaphore *sem)
+{
+ rwbase_read_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
+}
+
+static inline int __down_read_interruptible(struct rw_semaphore *sem)
+{
+ return rwbase_read_lock(&sem->rwbase, TASK_INTERRUPTIBLE);
+}
+
+static inline int __down_read_killable(struct rw_semaphore *sem)
+{
+ return rwbase_read_lock(&sem->rwbase, TASK_KILLABLE);
+}
+
+static inline int __down_read_trylock(struct rw_semaphore *sem)
+{
+ return rwbase_read_trylock(&sem->rwbase);
+}
+
+static inline void __up_read(struct rw_semaphore *sem)
+{
+ rwbase_read_unlock(&sem->rwbase, TASK_NORMAL);
+}
+
+static inline void __sched __down_write(struct rw_semaphore *sem)
+{
+ rwbase_write_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
+}
+
+static inline int __sched __down_write_killable(struct rw_semaphore *sem)
+{
+ return rwbase_write_lock(&sem->rwbase, TASK_KILLABLE);
+}
+
+static inline int __down_write_trylock(struct rw_semaphore *sem)
+{
+ return rwbase_write_trylock(&sem->rwbase);
+}
+
+static inline void __up_write(struct rw_semaphore *sem)
+{
+ rwbase_write_unlock(&sem->rwbase);
+}
+
+static inline void __downgrade_write(struct rw_semaphore *sem)
+{
+ rwbase_write_downgrade(&sem->rwbase);
+}
+
+/* Debug stubs for the common API */
+#define DEBUG_RWSEMS_WARN_ON(c, sem)
+
+static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
+ struct task_struct *owner)
+{
+}
+
+static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
+{
+ int count = atomic_read(&sem->rwbase.readers);
+
+ return count < 0 && count != READER_BIAS;
+}
+
+#endif /* CONFIG_PREEMPT_RT */
+
/*
* lock for reading
*/
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index 9aa855a96c4a..9ee381e4d2a4 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -54,6 +54,7 @@ void down(struct semaphore *sem)
{
unsigned long flags;
+ might_sleep();
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
sem->count--;
@@ -77,6 +78,7 @@ int down_interruptible(struct semaphore *sem)
unsigned long flags;
int result = 0;
+ might_sleep();
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
sem->count--;
@@ -103,6 +105,7 @@ int down_killable(struct semaphore *sem)
unsigned long flags;
int result = 0;
+ might_sleep();
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
sem->count--;
@@ -157,6 +160,7 @@ int down_timeout(struct semaphore *sem, long timeout)
unsigned long flags;
int result = 0;
+ might_sleep();
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
sem->count--;
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index c8d7ad9fb9b2..c5830cfa379a 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \
* __[spin|read|write]_lock_bh()
*/
BUILD_LOCK_OPS(spin, raw_spinlock);
+
+#ifndef CONFIG_PREEMPT_RT
BUILD_LOCK_OPS(read, rwlock);
BUILD_LOCK_OPS(write, rwlock);
+#endif
#endif
@@ -209,6 +212,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
EXPORT_SYMBOL(_raw_spin_unlock_bh);
#endif
+#ifndef CONFIG_PREEMPT_RT
+
#ifndef CONFIG_INLINE_READ_TRYLOCK
int __lockfunc _raw_read_trylock(rwlock_t *lock)
{
@@ -353,6 +358,8 @@ void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
EXPORT_SYMBOL(_raw_write_unlock_bh);
#endif
+#endif /* !CONFIG_PREEMPT_RT */
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index b9d93087ee66..14235671a1a7 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
EXPORT_SYMBOL(__raw_spin_lock_init);
+#ifndef CONFIG_PREEMPT_RT
void __rwlock_init(rwlock_t *lock, const char *name,
struct lock_class_key *key)
{
@@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const char *name,
}
EXPORT_SYMBOL(__rwlock_init);
+#endif
static void spin_dump(raw_spinlock_t *lock, const char *msg)
{
@@ -139,6 +141,7 @@ void do_raw_spin_unlock(raw_spinlock_t *lock)
arch_spin_unlock(&lock->raw_lock);
}
+#ifndef CONFIG_PREEMPT_RT
static void rwlock_bug(rwlock_t *lock, const char *msg)
{
if (!debug_locks_off())
@@ -228,3 +231,5 @@ void do_raw_write_unlock(rwlock_t *lock)
debug_write_unlock(lock);
arch_write_unlock(&lock->raw_lock);
}
+
+#endif /* !CONFIG_PREEMPT_RT */
diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c
new file mode 100644
index 000000000000..d2912e44d61f
--- /dev/null
+++ b/kernel/locking/spinlock_rt.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PREEMPT_RT substitution for spin/rw_locks
+ *
+ * spinlocks and rwlocks on RT are based on rtmutexes, with a few twists to
+ * resemble the non RT semantics:
+ *
+ * - Contrary to plain rtmutexes, spinlocks and rwlocks are state
+ * preserving. The task state is saved before blocking on the underlying
+ * rtmutex, and restored when the lock has been acquired. Regular wakeups
+ * during that time are redirected to the saved state so no wake up is
+ * missed.
+ *
+ * - Non RT spin/rwlocks disable preemption and eventually interrupts.
+ * Disabling preemption has the side effect of disabling migration and
+ * preventing RCU grace periods.
+ *
+ * The RT substitutions explicitly disable migration and take
+ * rcu_read_lock() across the lock held section.
+ */
+#include <linux/spinlock.h>
+#include <linux/export.h>
+
+#define RT_MUTEX_BUILD_SPINLOCKS
+#include "rtmutex.c"
+
+static __always_inline void rtlock_lock(struct rt_mutex_base *rtm)
+{
+ if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
+ rtlock_slowlock(rtm);
+}
+
+static __always_inline void __rt_spin_lock(spinlock_t *lock)
+{
+ ___might_sleep(__FILE__, __LINE__, 0);
+ rtlock_lock(&lock->lock);
+ rcu_read_lock();
+ migrate_disable();
+}
+
+void __sched rt_spin_lock(spinlock_t *lock)
+{
+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+ __rt_spin_lock(lock);
+}
+EXPORT_SYMBOL(rt_spin_lock);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void __sched rt_spin_lock_nested(spinlock_t *lock, int subclass)
+{
+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+ __rt_spin_lock(lock);
+}
+EXPORT_SYMBOL(rt_spin_lock_nested);
+
+void __sched rt_spin_lock_nest_lock(spinlock_t *lock,
+ struct lockdep_map *nest_lock)
+{
+ spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
+ __rt_spin_lock(lock);
+}
+EXPORT_SYMBOL(rt_spin_lock_nest_lock);
+#endif
+
+void __sched rt_spin_unlock(spinlock_t *lock)
+{
+ spin_release(&lock->dep_map, _RET_IP_);
+ migrate_enable();
+ rcu_read_unlock();
+
+ if (unlikely(!rt_mutex_cmpxchg_release(&lock->lock, current, NULL)))
+ rt_mutex_slowunlock(&lock->lock);
+}
+EXPORT_SYMBOL(rt_spin_unlock);
+
+/*
+ * Wait for the lock to get unlocked: instead of polling for an unlock
+ * (like raw spinlocks do), lock and unlock, to force the kernel to
+ * schedule if there's contention:
+ */
+void __sched rt_spin_lock_unlock(spinlock_t *lock)
+{
+ spin_lock(lock);
+ spin_unlock(lock);
+}
+EXPORT_SYMBOL(rt_spin_lock_unlock);
+
+static __always_inline int __rt_spin_trylock(spinlock_t *lock)
+{
+ int ret = 1;
+
+ if (unlikely(!rt_mutex_cmpxchg_acquire(&lock->lock, NULL, current)))
+ ret = rt_mutex_slowtrylock(&lock->lock);
+
+ if (ret) {
+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+ rcu_read_lock();
+ migrate_disable();
+ }
+ return ret;
+}
+
+int __sched rt_spin_trylock(spinlock_t *lock)
+{
+ return __rt_spin_trylock(lock);
+}
+EXPORT_SYMBOL(rt_spin_trylock);
+
+int __sched rt_spin_trylock_bh(spinlock_t *lock)
+{
+ int ret;
+
+ local_bh_disable();
+ ret = __rt_spin_trylock(lock);
+ if (!ret)
+ local_bh_enable();
+ return ret;
+}
+EXPORT_SYMBOL(rt_spin_trylock_bh);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void __rt_spin_lock_init(spinlock_t *lock, const char *name,
+ struct lock_class_key *key, bool percpu)
+{
+ u8 type = percpu ? LD_LOCK_PERCPU : LD_LOCK_NORMAL;
+
+ debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+ lockdep_init_map_type(&lock->dep_map, name, key, 0, LD_WAIT_CONFIG,
+ LD_WAIT_INV, type);
+}
+EXPORT_SYMBOL(__rt_spin_lock_init);
+#endif
+
+/*
+ * RT-specific reader/writer locks
+ */
+#define rwbase_set_and_save_current_state(state) \
+ current_save_and_set_rtlock_wait_state()
+
+#define rwbase_restore_current_state() \
+ current_restore_rtlock_saved_state()
+
+static __always_inline int
+rwbase_rtmutex_lock_state(struct rt_mutex_base *rtm, unsigned int state)
+{
+ if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
+ rtlock_slowlock(rtm);
+ return 0;
+}
+
+static __always_inline int
+rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state)
+{
+ rtlock_slowlock_locked(rtm);
+ return 0;
+}
+
+static __always_inline void rwbase_rtmutex_unlock(struct rt_mutex_base *rtm)
+{
+ if (likely(rt_mutex_cmpxchg_acquire(rtm, current, NULL)))
+ return;
+
+ rt_mutex_slowunlock(rtm);
+}
+
+static __always_inline int rwbase_rtmutex_trylock(struct rt_mutex_base *rtm)
+{
+ if (likely(rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
+ return 1;
+
+ return rt_mutex_slowtrylock(rtm);
+}
+
+#define rwbase_signal_pending_state(state, current) (0)
+
+#define rwbase_schedule() \
+ schedule_rtlock()
+
+#include "rwbase_rt.c"
+/*
+ * The common functions which get wrapped into the rwlock API.
+ */
+int __sched rt_read_trylock(rwlock_t *rwlock)
+{
+ int ret;
+
+ ret = rwbase_read_trylock(&rwlock->rwbase);
+ if (ret) {
+ rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);
+ rcu_read_lock();
+ migrate_disable();
+ }
+ return ret;
+}
+EXPORT_SYMBOL(rt_read_trylock);
+
+int __sched rt_write_trylock(rwlock_t *rwlock)
+{
+ int ret;
+
+ ret = rwbase_write_trylock(&rwlock->rwbase);
+ if (ret) {
+ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
+ rcu_read_lock();
+ migrate_disable();
+ }
+ return ret;
+}
+EXPORT_SYMBOL(rt_write_trylock);
+
+void __sched rt_read_lock(rwlock_t *rwlock)
+{
+ ___might_sleep(__FILE__, __LINE__, 0);
+ rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
+ rwbase_read_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
+ rcu_read_lock();
+ migrate_disable();
+}
+EXPORT_SYMBOL(rt_read_lock);
+
+void __sched rt_write_lock(rwlock_t *rwlock)
+{
+ ___might_sleep(__FILE__, __LINE__, 0);
+ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
+ rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
+ rcu_read_lock();
+ migrate_disable();
+}
+EXPORT_SYMBOL(rt_write_lock);
+
+void __sched rt_read_unlock(rwlock_t *rwlock)
+{
+ rwlock_release(&rwlock->dep_map, _RET_IP_);
+ migrate_enable();
+ rcu_read_unlock();
+ rwbase_read_unlock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
+}
+EXPORT_SYMBOL(rt_read_unlock);
+
+void __sched rt_write_unlock(rwlock_t *rwlock)
+{
+ rwlock_release(&rwlock->dep_map, _RET_IP_);
+ rcu_read_unlock();
+ migrate_enable();
+ rwbase_write_unlock(&rwlock->rwbase);
+}
+EXPORT_SYMBOL(rt_write_unlock);
+
+int __sched rt_rwlock_is_contended(rwlock_t *rwlock)
+{
+ return rw_base_is_contended(&rwlock->rwbase);
+}
+EXPORT_SYMBOL(rt_rwlock_is_contended);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void __rt_rwlock_init(rwlock_t *rwlock, const char *name,
+ struct lock_class_key *key)
+{
+ debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
+ lockdep_init_map_wait(&rwlock->dep_map, name, key, 0, LD_WAIT_CONFIG);
+}
+EXPORT_SYMBOL(__rt_rwlock_init);
+#endif
diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
new file mode 100644
index 000000000000..56f139201f24
--- /dev/null
+++ b/kernel/locking/ww_mutex.h
@@ -0,0 +1,569 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef WW_RT
+
+#define MUTEX mutex
+#define MUTEX_WAITER mutex_waiter
+
+static inline struct mutex_waiter *
+__ww_waiter_first(struct mutex *lock)
+{
+ struct mutex_waiter *w;
+
+ w = list_first_entry(&lock->wait_list, struct mutex_waiter, list);
+ if (list_entry_is_head(w, &lock->wait_list, list))
+ return NULL;
+
+ return w;
+}
+
+static inline struct mutex_waiter *
+__ww_waiter_next(struct mutex *lock, struct mutex_waiter *w)
+{
+ w = list_next_entry(w, list);
+ if (list_entry_is_head(w, &lock->wait_list, list))
+ return NULL;
+
+ return w;
+}
+
+static inline struct mutex_waiter *
+__ww_waiter_prev(struct mutex *lock, struct mutex_waiter *w)
+{
+ w = list_prev_entry(w, list);
+ if (list_entry_is_head(w, &lock->wait_list, list))
+ return NULL;
+
+ return w;
+}
+
+static inline struct mutex_waiter *
+__ww_waiter_last(struct mutex *lock)
+{
+ struct mutex_waiter *w;
+
+ w = list_last_entry(&lock->wait_list, struct mutex_waiter, list);
+ if (list_entry_is_head(w, &lock->wait_list, list))
+ return NULL;
+
+ return w;
+}
+
+static inline void
+__ww_waiter_add(struct mutex *lock, struct mutex_waiter *waiter, struct mutex_waiter *pos)
+{
+ struct list_head *p = &lock->wait_list;
+ if (pos)
+ p = &pos->list;
+ __mutex_add_waiter(lock, waiter, p);
+}
+
+static inline struct task_struct *
+__ww_mutex_owner(struct mutex *lock)
+{
+ return __mutex_owner(lock);
+}
+
+static inline bool
+__ww_mutex_has_waiters(struct mutex *lock)
+{
+ return atomic_long_read(&lock->owner) & MUTEX_FLAG_WAITERS;
+}
+
+static inline void lock_wait_lock(struct mutex *lock)
+{
+ raw_spin_lock(&lock->wait_lock);
+}
+
+static inline void unlock_wait_lock(struct mutex *lock)
+{
+ raw_spin_unlock(&lock->wait_lock);
+}
+
+static inline void lockdep_assert_wait_lock_held(struct mutex *lock)
+{
+ lockdep_assert_held(&lock->wait_lock);
+}
+
+#else /* WW_RT */
+
+#define MUTEX rt_mutex
+#define MUTEX_WAITER rt_mutex_waiter
+
+static inline struct rt_mutex_waiter *
+__ww_waiter_first(struct rt_mutex *lock)
+{
+ struct rb_node *n = rb_first(&lock->rtmutex.waiters.rb_root);
+ if (!n)
+ return NULL;
+ return rb_entry(n, struct rt_mutex_waiter, tree_entry);
+}
+
+static inline struct rt_mutex_waiter *
+__ww_waiter_next(struct rt_mutex *lock, struct rt_mutex_waiter *w)
+{
+ struct rb_node *n = rb_next(&w->tree_entry);
+ if (!n)
+ return NULL;
+ return rb_entry(n, struct rt_mutex_waiter, tree_entry);
+}
+
+static inline struct rt_mutex_waiter *
+__ww_waiter_prev(struct rt_mutex *lock, struct rt_mutex_waiter *w)
+{
+ struct rb_node *n = rb_prev(&w->tree_entry);
+ if (!n)
+ return NULL;
+ return rb_entry(n, struct rt_mutex_waiter, tree_entry);
+}
+
+static inline struct rt_mutex_waiter *
+__ww_waiter_last(struct rt_mutex *lock)
+{
+ struct rb_node *n = rb_last(&lock->rtmutex.waiters.rb_root);
+ if (!n)
+ return NULL;
+ return rb_entry(n, struct rt_mutex_waiter, tree_entry);
+}
+
+static inline void
+__ww_waiter_add(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct rt_mutex_waiter *pos)
+{
+ /* RT unconditionally adds the waiter first and then removes it on error */
+}
+
+static inline struct task_struct *
+__ww_mutex_owner(struct rt_mutex *lock)
+{
+ return rt_mutex_owner(&lock->rtmutex);
+}
+
+static inline bool
+__ww_mutex_has_waiters(struct rt_mutex *lock)
+{
+ return rt_mutex_has_waiters(&lock->rtmutex);
+}
+
+static inline void lock_wait_lock(struct rt_mutex *lock)
+{
+ raw_spin_lock(&lock->rtmutex.wait_lock);
+}
+
+static inline void unlock_wait_lock(struct rt_mutex *lock)
+{
+ raw_spin_unlock(&lock->rtmutex.wait_lock);
+}
+
+static inline void lockdep_assert_wait_lock_held(struct rt_mutex *lock)
+{
+ lockdep_assert_held(&lock->rtmutex.wait_lock);
+}
+
+#endif /* WW_RT */
+
+/*
+ * Wait-Die:
+ * The newer transactions are killed when:
+ * It (the new transaction) makes a request for a lock being held
+ * by an older transaction.
+ *
+ * Wound-Wait:
+ * The newer transactions are wounded when:
+ * An older transaction makes a request for a lock being held by
+ * the newer transaction.
+ */
+
+/*
+ * Associate the ww_mutex @ww with the context @ww_ctx under which we acquired
+ * it.
+ */
+static __always_inline void
+ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+#ifdef DEBUG_WW_MUTEXES
+ /*
+ * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
+ * but released with a normal mutex_unlock in this call.
+ *
+ * This should never happen, always use ww_mutex_unlock.
+ */
+ DEBUG_LOCKS_WARN_ON(ww->ctx);
+
+ /*
+ * Not quite done after calling ww_acquire_done() ?
+ */
+ DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
+
+ if (ww_ctx->contending_lock) {
+ /*
+ * After -EDEADLK you tried to
+ * acquire a different ww_mutex? Bad!
+ */
+ DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
+
+ /*
+ * You called ww_mutex_lock after receiving -EDEADLK,
+ * but 'forgot' to unlock everything else first?
+ */
+ DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
+ ww_ctx->contending_lock = NULL;
+ }
+
+ /*
+ * Naughty, using a different class will lead to undefined behavior!
+ */
+ DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
+#endif
+ ww_ctx->acquired++;
+ ww->ctx = ww_ctx;
+}
+
+/*
+ * Determine if @a is 'less' than @b. IOW, either @a is a lower priority task
+ * or, when of equal priority, a younger transaction than @b.
+ *
+ * Depending on the algorithm, @a will either need to wait for @b, or die.
+ */
+static inline bool
+__ww_ctx_less(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
+{
+/*
+ * Can only do the RT prio for WW_RT, because task->prio isn't stable due to PI,
+ * so the wait_list ordering will go wobbly. rt_mutex re-queues the waiter and
+ * isn't affected by this.
+ */
+#ifdef WW_RT
+ /* kernel prio; less is more */
+ int a_prio = a->task->prio;
+ int b_prio = b->task->prio;
+
+ if (rt_prio(a_prio) || rt_prio(b_prio)) {
+
+ if (a_prio > b_prio)
+ return true;
+
+ if (a_prio < b_prio)
+ return false;
+
+ /* equal static prio */
+
+ if (dl_prio(a_prio)) {
+ if (dl_time_before(b->task->dl.deadline,
+ a->task->dl.deadline))
+ return true;
+
+ if (dl_time_before(a->task->dl.deadline,
+ b->task->dl.deadline))
+ return false;
+ }
+
+ /* equal prio */
+ }
+#endif
+
+ /* FIFO order tie break -- bigger is younger */
+ return (signed long)(a->stamp - b->stamp) > 0;
+}
+
+/*
+ * Wait-Die; wake a lesser waiter context (when locks held) such that it can
+ * die.
+ *
+ * Among waiters with context, only the first one can have other locks acquired
+ * already (ctx->acquired > 0), because __ww_mutex_add_waiter() and
+ * __ww_mutex_check_kill() wake any but the earliest context.
+ */
+static bool
+__ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ if (!ww_ctx->is_wait_die)
+ return false;
+
+ if (waiter->ww_ctx->acquired > 0 && __ww_ctx_less(waiter->ww_ctx, ww_ctx)) {
+#ifndef WW_RT
+ debug_mutex_wake_waiter(lock, waiter);
+#endif
+ wake_up_process(waiter->task);
+ }
+
+ return true;
+}
+
+/*
+ * Wound-Wait; wound a lesser @hold_ctx if it holds the lock.
+ *
+ * Wound the lock holder if there are waiters with more important transactions
+ * than the lock holders. Even if multiple waiters may wound the lock holder,
+ * it's sufficient that only one does.
+ */
+static bool __ww_mutex_wound(struct MUTEX *lock,
+ struct ww_acquire_ctx *ww_ctx,
+ struct ww_acquire_ctx *hold_ctx)
+{
+ struct task_struct *owner = __ww_mutex_owner(lock);
+
+ lockdep_assert_wait_lock_held(lock);
+
+ /*
+ * Possible through __ww_mutex_add_waiter() when we race with
+ * ww_mutex_set_context_fastpath(). In that case we'll get here again
+ * through __ww_mutex_check_waiters().
+ */
+ if (!hold_ctx)
+ return false;
+
+ /*
+ * Can have !owner because of __mutex_unlock_slowpath(), but if owner,
+ * it cannot go away because we'll have FLAG_WAITERS set and hold
+ * wait_lock.
+ */
+ if (!owner)
+ return false;
+
+ if (ww_ctx->acquired > 0 && __ww_ctx_less(hold_ctx, ww_ctx)) {
+ hold_ctx->wounded = 1;
+
+ /*
+ * wake_up_process() paired with set_current_state()
+ * inserts sufficient barriers to make sure @owner either sees
+ * it's wounded in __ww_mutex_check_kill() or has a
+ * wakeup pending to re-read the wounded state.
+ */
+ if (owner != current)
+ wake_up_process(owner);
+
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * We just acquired @lock under @ww_ctx, if there are more important contexts
+ * waiting behind us on the wait-list, check if they need to die, or wound us.
+ *
+ * See __ww_mutex_add_waiter() for the list-order construction; basically the
+ * list is ordered by stamp, smallest (oldest) first.
+ *
+ * This relies on never mixing wait-die/wound-wait on the same wait-list;
+ * which is currently ensured by that being a ww_class property.
+ *
+ * The current task must not be on the wait list.
+ */
+static void
+__ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx)
+{
+ struct MUTEX_WAITER *cur;
+
+ lockdep_assert_wait_lock_held(lock);
+
+ for (cur = __ww_waiter_first(lock); cur;
+ cur = __ww_waiter_next(lock, cur)) {
+
+ if (!cur->ww_ctx)
+ continue;
+
+ if (__ww_mutex_die(lock, cur, ww_ctx) ||
+ __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx))
+ break;
+ }
+}
+
+/*
+ * After acquiring lock with fastpath, where we do not hold wait_lock, set ctx
+ * and wake up any waiters so they can recheck.
+ */
+static __always_inline void
+ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+{
+ ww_mutex_lock_acquired(lock, ctx);
+
+ /*
+ * The lock->ctx update should be visible on all cores before
+ * the WAITERS check is done, otherwise contended waiters might be
+ * missed. The contended waiters will either see ww_ctx == NULL
+ * and keep spinning, or it will acquire wait_lock, add itself
+ * to waiter list and sleep.
+ */
+ smp_mb(); /* See comments above and below. */
+
+ /*
+ * [W] ww->ctx = ctx [W] MUTEX_FLAG_WAITERS
+ * MB MB
+ * [R] MUTEX_FLAG_WAITERS [R] ww->ctx
+ *
+ * The memory barrier above pairs with the memory barrier in
+ * __ww_mutex_add_waiter() and makes sure we either observe ww->ctx
+ * and/or !empty list.
+ */
+ if (likely(!__ww_mutex_has_waiters(&lock->base)))
+ return;
+
+ /*
+ * Uh oh, we raced in fastpath, check if any of the waiters need to
+ * die or wound us.
+ */
+ lock_wait_lock(&lock->base);
+ __ww_mutex_check_waiters(&lock->base, ctx);
+ unlock_wait_lock(&lock->base);
+}
+
+static __always_inline int
+__ww_mutex_kill(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx)
+{
+ if (ww_ctx->acquired > 0) {
+#ifdef DEBUG_WW_MUTEXES
+ struct ww_mutex *ww;
+
+ ww = container_of(lock, struct ww_mutex, base);
+ DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock);
+ ww_ctx->contending_lock = ww;
+#endif
+ return -EDEADLK;
+ }
+
+ return 0;
+}
+
+/*
+ * Check the wound condition for the current lock acquire.
+ *
+ * Wound-Wait: If we're wounded, kill ourself.
+ *
+ * Wait-Die: If we're trying to acquire a lock already held by an older
+ * context, kill ourselves.
+ *
+ * Since __ww_mutex_add_waiter() orders the wait-list on stamp, we only have to
+ * look at waiters before us in the wait-list.
+ */
+static inline int
+__ww_mutex_check_kill(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
+ struct ww_acquire_ctx *ctx)
+{
+ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
+ struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
+ struct MUTEX_WAITER *cur;
+
+ if (ctx->acquired == 0)
+ return 0;
+
+ if (!ctx->is_wait_die) {
+ if (ctx->wounded)
+ return __ww_mutex_kill(lock, ctx);
+
+ return 0;
+ }
+
+ if (hold_ctx && __ww_ctx_less(ctx, hold_ctx))
+ return __ww_mutex_kill(lock, ctx);
+
+ /*
+ * If there is a waiter in front of us that has a context, then its
+ * stamp is earlier than ours and we must kill ourself.
+ */
+ for (cur = __ww_waiter_prev(lock, waiter); cur;
+ cur = __ww_waiter_prev(lock, cur)) {
+
+ if (!cur->ww_ctx)
+ continue;
+
+ return __ww_mutex_kill(lock, ctx);
+ }
+
+ return 0;
+}
+
+/*
+ * Add @waiter to the wait-list, keep the wait-list ordered by stamp, smallest
+ * first. Such that older contexts are preferred to acquire the lock over
+ * younger contexts.
+ *
+ * Waiters without context are interspersed in FIFO order.
+ *
+ * Furthermore, for Wait-Die kill ourself immediately when possible (there are
+ * older contexts already waiting) to avoid unnecessary waiting and for
+ * Wound-Wait ensure we wound the owning context when it is younger.
+ */
+static inline int
+__ww_mutex_add_waiter(struct MUTEX_WAITER *waiter,
+ struct MUTEX *lock,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ struct MUTEX_WAITER *cur, *pos = NULL;
+ bool is_wait_die;
+
+ if (!ww_ctx) {
+ __ww_waiter_add(lock, waiter, NULL);
+ return 0;
+ }
+
+ is_wait_die = ww_ctx->is_wait_die;
+
+ /*
+ * Add the waiter before the first waiter with a higher stamp.
+ * Waiters without a context are skipped to avoid starving
+ * them. Wait-Die waiters may die here. Wound-Wait waiters
+ * never die here, but they are sorted in stamp order and
+ * may wound the lock holder.
+ */
+ for (cur = __ww_waiter_last(lock); cur;
+ cur = __ww_waiter_prev(lock, cur)) {
+
+ if (!cur->ww_ctx)
+ continue;
+
+ if (__ww_ctx_less(ww_ctx, cur->ww_ctx)) {
+ /*
+ * Wait-Die: if we find an older context waiting, there
+ * is no point in queueing behind it, as we'd have to
+ * die the moment it would acquire the lock.
+ */
+ if (is_wait_die) {
+ int ret = __ww_mutex_kill(lock, ww_ctx);
+
+ if (ret)
+ return ret;
+ }
+
+ break;
+ }
+
+ pos = cur;
+
+ /* Wait-Die: ensure younger waiters die. */
+ __ww_mutex_die(lock, cur, ww_ctx);
+ }
+
+ __ww_waiter_add(lock, waiter, pos);
+
+ /*
+ * Wound-Wait: if we're blocking on a mutex owned by a younger context,
+ * wound that such that we might proceed.
+ */
+ if (!is_wait_die) {
+ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
+
+ /*
+ * See ww_mutex_set_context_fastpath(). Orders setting
+ * MUTEX_FLAG_WAITERS vs the ww->ctx load,
+ * such that either we or the fastpath will wound @ww->ctx.
+ */
+ smp_mb();
+ __ww_mutex_wound(lock, ww_ctx, ww->ctx);
+ }
+
+ return 0;
+}
+
+static inline void __ww_mutex_unlock(struct ww_mutex *lock)
+{
+ if (lock->ctx) {
+#ifdef DEBUG_WW_MUTEXES
+ DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
+#endif
+ if (lock->ctx->acquired > 0)
+ lock->ctx->acquired--;
+ lock->ctx = NULL;
+ }
+}
diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c
new file mode 100644
index 000000000000..3f1fff7d2780
--- /dev/null
+++ b/kernel/locking/ww_rt_mutex.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * rtmutex API
+ */
+#include <linux/spinlock.h>
+#include <linux/export.h>
+
+#define RT_MUTEX_BUILD_MUTEX
+#define WW_RT
+#include "rtmutex.c"
+
+static int __sched
+__ww_rt_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx,
+ unsigned int state, unsigned long ip)
+{
+ struct lockdep_map __maybe_unused *nest_lock = NULL;
+ struct rt_mutex *rtm = &lock->base;
+ int ret;
+
+ might_sleep();
+
+ if (ww_ctx) {
+ if (unlikely(ww_ctx == READ_ONCE(lock->ctx)))
+ return -EALREADY;
+
+ /*
+ * Reset the wounded flag after a kill. No other process can
+ * race and wound us here, since they can't have a valid owner
+ * pointer if we don't have any locks held.
+ */
+ if (ww_ctx->acquired == 0)
+ ww_ctx->wounded = 0;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ nest_lock = &ww_ctx->dep_map;
+#endif
+ }
+ mutex_acquire_nest(&rtm->dep_map, 0, 0, nest_lock, ip);
+
+ if (likely(rt_mutex_cmpxchg_acquire(&rtm->rtmutex, NULL, current))) {
+ if (ww_ctx)
+ ww_mutex_set_context_fastpath(lock, ww_ctx);
+ return 0;
+ }
+
+ ret = rt_mutex_slowlock(&rtm->rtmutex, ww_ctx, state);
+
+ if (ret)
+ mutex_release(&rtm->dep_map, ip);
+ return ret;
+}
+
+int __sched
+ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+{
+ return __ww_rt_mutex_lock(lock, ctx, TASK_UNINTERRUPTIBLE, _RET_IP_);
+}
+EXPORT_SYMBOL(ww_mutex_lock);
+
+int __sched
+ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+{
+ return __ww_rt_mutex_lock(lock, ctx, TASK_INTERRUPTIBLE, _RET_IP_);
+}
+EXPORT_SYMBOL(ww_mutex_lock_interruptible);
+
+void __sched ww_mutex_unlock(struct ww_mutex *lock)
+{
+ struct rt_mutex *rtm = &lock->base;
+
+ __ww_mutex_unlock(lock);
+
+ mutex_release(&rtm->dep_map, _RET_IP_);
+ __rt_mutex_unlock(&rtm->rtmutex);
+}
+EXPORT_SYMBOL(ww_mutex_unlock);
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 1b019cbca594..b8251dc0bc0f 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -172,25 +172,6 @@ int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
}
EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
-int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh,
- unsigned long val_up, unsigned long val_down, void *v)
-{
- unsigned long flags;
- int ret;
-
- /*
- * Musn't use RCU; because then the notifier list can
- * change between the up and down traversal.
- */
- spin_lock_irqsave(&nh->lock, flags);
- ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v);
- spin_unlock_irqrestore(&nh->lock, flags);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(atomic_notifier_call_chain_robust);
-NOKPROBE_SYMBOL(atomic_notifier_call_chain_robust);
-
/**
* atomic_notifier_call_chain - Call functions in an atomic notifier chain
* @nh: Pointer to head of the atomic notifier chain
diff --git a/kernel/padata.c b/kernel/padata.c
index d4d3ba6e1728..18d3a5c699d8 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -9,19 +9,6 @@
*
* Copyright (c) 2020 Oracle and/or its affiliates.
* Author: Daniel Jordan <daniel.m.jordan@oracle.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <linux/completion.h>
@@ -211,7 +198,7 @@ int padata_do_parallel(struct padata_shell *ps,
if ((pinst->flags & PADATA_RESET))
goto out;
- atomic_inc(&pd->refcnt);
+ refcount_inc(&pd->refcnt);
padata->pd = pd;
padata->cb_cpu = *cb_cpu;
@@ -383,7 +370,7 @@ static void padata_serial_worker(struct work_struct *serial_work)
}
local_bh_enable();
- if (atomic_sub_and_test(cnt, &pd->refcnt))
+ if (refcount_sub_and_test(cnt, &pd->refcnt))
padata_free_pd(pd);
}
@@ -593,7 +580,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_shell *ps)
padata_init_reorder_list(pd);
padata_init_squeues(pd);
pd->seq_nr = -1;
- atomic_set(&pd->refcnt, 1);
+ refcount_set(&pd->refcnt, 1);
spin_lock_init(&pd->lock);
pd->cpu = cpumask_first(pd->cpumask.pcpu);
INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
@@ -667,7 +654,7 @@ static int padata_replace(struct padata_instance *pinst)
synchronize_rcu();
list_for_each_entry_continue_reverse(ps, &pinst->pslist, list)
- if (atomic_dec_and_test(&ps->opd->refcnt))
+ if (refcount_dec_and_test(&ps->opd->refcnt))
padata_free_pd(ps->opd);
pinst->flags &= ~PADATA_RESET;
@@ -733,7 +720,7 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
struct cpumask *serial_mask, *parallel_mask;
int err = -EINVAL;
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&pinst->lock);
switch (cpumask_type) {
@@ -753,7 +740,7 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
out:
mutex_unlock(&pinst->lock);
- put_online_cpus();
+ cpus_read_unlock();
return err;
}
@@ -992,7 +979,7 @@ struct padata_instance *padata_alloc(const char *name)
if (!pinst->parallel_wq)
goto err_free_inst;
- get_online_cpus();
+ cpus_read_lock();
pinst->serial_wq = alloc_workqueue("%s_serial", WQ_MEM_RECLAIM |
WQ_CPU_INTENSIVE, 1, name);
@@ -1026,7 +1013,7 @@ struct padata_instance *padata_alloc(const char *name)
&pinst->cpu_dead_node);
#endif
- put_online_cpus();
+ cpus_read_unlock();
return pinst;
@@ -1036,7 +1023,7 @@ err_free_masks:
err_free_serial_wq:
destroy_workqueue(pinst->serial_wq);
err_put_cpus:
- put_online_cpus();
+ cpus_read_unlock();
destroy_workqueue(pinst->parallel_wq);
err_free_inst:
kfree(pinst);
@@ -1074,9 +1061,9 @@ struct padata_shell *padata_alloc_shell(struct padata_instance *pinst)
ps->pinst = pinst;
- get_online_cpus();
+ cpus_read_lock();
pd = padata_alloc_pd(ps);
- put_online_cpus();
+ cpus_read_unlock();
if (!pd)
goto out_free_ps;
diff --git a/kernel/params.c b/kernel/params.c
index 2daa2780a92c..8299bd764e42 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -243,6 +243,24 @@ STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", kstrtoul);
STANDARD_PARAM_DEF(ullong, unsigned long long, "%llu", kstrtoull);
STANDARD_PARAM_DEF(hexint, unsigned int, "%#08x", kstrtouint);
+int param_set_uint_minmax(const char *val, const struct kernel_param *kp,
+ unsigned int min, unsigned int max)
+{
+ unsigned int num;
+ int ret;
+
+ if (!val)
+ return -EINVAL;
+ ret = kstrtouint(val, 0, &num);
+ if (ret)
+ return ret;
+ if (num < min || num > max)
+ return -EINVAL;
+ *((unsigned int *)kp->arg) = num;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(param_set_uint_minmax);
+
int param_set_charp(const char *val, const struct kernel_param *kp)
{
if (strlen(val) > 1024) {
diff --git a/kernel/pid.c b/kernel/pid.c
index ebdf9c60cd0b..efe87db44683 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -550,13 +550,21 @@ struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
* Note, that this function can only be called after the fd table has
* been unshared to avoid leaking the pidfd to the new process.
*
+ * This symbol should not be explicitly exported to loadable modules.
+ *
* Return: On success, a cloexec pidfd is returned.
* On error, a negative errno number will be returned.
*/
-static int pidfd_create(struct pid *pid, unsigned int flags)
+int pidfd_create(struct pid *pid, unsigned int flags)
{
int fd;
+ if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
+ return -EINVAL;
+
+ if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
+ return -EINVAL;
+
fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
flags | O_RDWR | O_CLOEXEC);
if (fd < 0)
@@ -596,10 +604,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
if (!p)
return -ESRCH;
- if (pid_has_task(p, PIDTYPE_TGID))
- fd = pidfd_create(p, flags);
- else
- fd = -EINVAL;
+ fd = pidfd_create(p, flags);
put_pid(p);
return fd;
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index 0f4530b3a8cd..a332ccd829e2 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -170,7 +170,9 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
/* Compute the cost of each performance state. */
fmax = (u64) table[nr_states - 1].frequency;
for (i = 0; i < nr_states; i++) {
- table[i].cost = div64_u64(fmax * table[i].power,
+ unsigned long power_res = em_scale_power(table[i].power);
+
+ table[i].cost = div64_u64(fmax * power_res,
table[i].frequency);
}
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 12c7e1bb442f..44169f3081fd 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -577,7 +577,7 @@ static inline void pm_print_times_init(void) {}
struct kobject *power_kobj;
-/**
+/*
* state - control system sleep states.
*
* show() returns available sleep state labels, which may be "mem", "standby",
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index d8cae434f9eb..eb75f394a059 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -96,7 +96,7 @@ static void s2idle_enter(void)
s2idle_state = S2IDLE_STATE_ENTER;
raw_spin_unlock_irq(&s2idle_lock);
- get_online_cpus();
+ cpus_read_lock();
cpuidle_resume();
/* Push all the CPUs into the idle loop. */
@@ -106,7 +106,7 @@ static void s2idle_enter(void)
s2idle_state == S2IDLE_STATE_WAKE);
cpuidle_pause();
- put_online_cpus();
+ cpus_read_unlock();
raw_spin_lock_irq(&s2idle_lock);
diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c
index e1ed58adb69e..d20526c5be15 100644
--- a/kernel/power/suspend_test.c
+++ b/kernel/power/suspend_test.c
@@ -129,7 +129,7 @@ static int __init has_wakealarm(struct device *dev, const void *data)
{
struct rtc_device *candidate = to_rtc_device(dev);
- if (!candidate->ops->set_alarm)
+ if (!test_bit(RTC_FEATURE_ALARM, candidate->features))
return 0;
if (!device_may_wakeup(candidate->dev.parent))
return 0;
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index dca51fe9c73f..2cc34a22a506 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -487,7 +487,7 @@ retry:
if (gp_async) {
cur_ops->gp_barrier();
}
- writer_n_durations[me] = i_max;
+ writer_n_durations[me] = i_max + 1;
torture_kthread_stopping("rcu_scale_writer");
return 0;
}
@@ -561,7 +561,7 @@ rcu_scale_cleanup(void)
wdpp = writer_durations[i];
if (!wdpp)
continue;
- for (j = 0; j <= writer_n_durations[i]; j++) {
+ for (j = 0; j < writer_n_durations[i]; j++) {
wdp = &wdpp[j];
pr_alert("%s%s %4d writer-duration: %5d %llu\n",
scale_type, SCALE_FLAG,
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 40ef5417d954..ab4215266ebe 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -2022,8 +2022,13 @@ static int rcu_torture_stall(void *args)
__func__, raw_smp_processor_id());
while (ULONG_CMP_LT((unsigned long)ktime_get_seconds(),
stop_at))
- if (stall_cpu_block)
+ if (stall_cpu_block) {
+#ifdef CONFIG_PREEMPTION
+ preempt_schedule();
+#else
schedule_timeout_uninterruptible(HZ);
+#endif
+ }
if (stall_cpu_irqsoff)
local_irq_enable();
else if (!stall_cpu_block)
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index d998a76fb542..66dc14cf5687 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -467,6 +467,40 @@ static struct ref_scale_ops acqrel_ops = {
.name = "acqrel"
};
+static volatile u64 stopopts;
+
+static void ref_clock_section(const int nloops)
+{
+ u64 x = 0;
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--)
+ x += ktime_get_real_fast_ns();
+ preempt_enable();
+ stopopts = x;
+}
+
+static void ref_clock_delay_section(const int nloops, const int udl, const int ndl)
+{
+ u64 x = 0;
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ x += ktime_get_real_fast_ns();
+ un_delay(udl, ndl);
+ }
+ preempt_enable();
+ stopopts = x;
+}
+
+static struct ref_scale_ops clock_ops = {
+ .readsection = ref_clock_section,
+ .delaysection = ref_clock_delay_section,
+ .name = "clock"
+};
+
static void rcu_scale_one_reader(void)
{
if (readdelay <= 0)
@@ -759,7 +793,7 @@ ref_scale_init(void)
int firsterr = 0;
static struct ref_scale_ops *scale_ops[] = {
&rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops, &refcnt_ops, &rwlock_ops,
- &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops,
+ &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops,
};
if (!torture_init_begin(scale_type, verbose))
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 26344dc6483b..a0ba2ed49bc6 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -96,7 +96,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
*/
void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
{
- int newval = ssp->srcu_lock_nesting[idx] - 1;
+ int newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1;
WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval);
if (!newval && READ_ONCE(ssp->srcu_gp_waiting))
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 8536c55df514..806160c44b17 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -643,8 +643,8 @@ void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
//
// "Rude" variant of Tasks RCU, inspired by Steve Rostedt's trick of
// passing an empty function to schedule_on_each_cpu(). This approach
-// provides an asynchronous call_rcu_tasks_rude() API and batching
-// of concurrent calls to the synchronous synchronize_rcu_rude() API.
+// provides an asynchronous call_rcu_tasks_rude() API and batching of
+// concurrent calls to the synchronous synchronize_rcu_tasks_rude() API.
// This invokes schedule_on_each_cpu() in order to send IPIs far and wide
// and induces otherwise unnecessary context switches on all online CPUs,
// whether idle or not.
@@ -785,7 +785,10 @@ EXPORT_SYMBOL_GPL(show_rcu_tasks_rude_gp_kthread);
// set that task's .need_qs flag so that task's next outermost
// rcu_read_unlock_trace() will report the quiescent state (in which
// case the count of readers is incremented). If both attempts fail,
-// the task is added to a "holdout" list.
+// the task is added to a "holdout" list. Note that IPIs are used
+// to invoke trc_read_check_handler() in the context of running tasks
+// in order to avoid ordering overhead on common-case shared-variable
+// accessses.
// rcu_tasks_trace_postscan():
// Initialize state and attempt to identify an immediate quiescent
// state as above (but only for idle tasks), unblock CPU-hotplug
@@ -847,7 +850,7 @@ static DEFINE_IRQ_WORK(rcu_tasks_trace_iw, rcu_read_unlock_iw);
/* If we are the last reader, wake up the grace-period kthread. */
void rcu_read_unlock_trace_special(struct task_struct *t, int nesting)
{
- int nq = t->trc_reader_special.b.need_qs;
+ int nq = READ_ONCE(t->trc_reader_special.b.need_qs);
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
t->trc_reader_special.b.need_mb)
@@ -894,7 +897,7 @@ static void trc_read_check_handler(void *t_in)
// If the task is not in a read-side critical section, and
// if this is the last reader, awaken the grace-period kthread.
- if (likely(!t->trc_reader_nesting)) {
+ if (likely(!READ_ONCE(t->trc_reader_nesting))) {
if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
wake_up(&trc_wait);
// Mark as checked after decrement to avoid false
@@ -903,7 +906,7 @@ static void trc_read_check_handler(void *t_in)
goto reset_ipi;
}
// If we are racing with an rcu_read_unlock_trace(), try again later.
- if (unlikely(t->trc_reader_nesting < 0)) {
+ if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) {
if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
wake_up(&trc_wait);
goto reset_ipi;
@@ -913,14 +916,14 @@ static void trc_read_check_handler(void *t_in)
// Get here if the task is in a read-side critical section. Set
// its state so that it will awaken the grace-period kthread upon
// exit from that critical section.
- WARN_ON_ONCE(t->trc_reader_special.b.need_qs);
+ WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs));
WRITE_ONCE(t->trc_reader_special.b.need_qs, true);
reset_ipi:
// Allow future IPIs to be sent on CPU and for task.
// Also order this IPI handler against any later manipulations of
// the intended task.
- smp_store_release(&per_cpu(trc_ipi_to_cpu, smp_processor_id()), false); // ^^^
+ smp_store_release(per_cpu_ptr(&trc_ipi_to_cpu, smp_processor_id()), false); // ^^^
smp_store_release(&texp->trc_ipi_to_cpu, -1); // ^^^
}
@@ -950,6 +953,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg)
n_heavy_reader_ofl_updates++;
in_qs = true;
} else {
+ // The task is not running, so C-language access is safe.
in_qs = likely(!t->trc_reader_nesting);
}
@@ -964,7 +968,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg)
// state so that it will awaken the grace-period kthread upon exit
// from that critical section.
atomic_inc(&trc_n_readers_need_end); // One more to wait on.
- WARN_ON_ONCE(t->trc_reader_special.b.need_qs);
+ WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs));
WRITE_ONCE(t->trc_reader_special.b.need_qs, true);
return true;
}
@@ -982,7 +986,7 @@ static void trc_wait_for_one_reader(struct task_struct *t,
// The current task had better be in a quiescent state.
if (t == current) {
t->trc_reader_checked = true;
- WARN_ON_ONCE(t->trc_reader_nesting);
+ WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting));
return;
}
@@ -994,6 +998,12 @@ static void trc_wait_for_one_reader(struct task_struct *t,
}
put_task_struct(t);
+ // If this task is not yet on the holdout list, then we are in
+ // an RCU read-side critical section. Otherwise, the invocation of
+ // rcu_add_holdout() that added it to the list did the necessary
+ // get_task_struct(). Either way, the task cannot be freed out
+ // from under this code.
+
// If currently running, send an IPI, either way, add to list.
trc_add_holdout(t, bhp);
if (task_curr(t) &&
@@ -1092,8 +1102,8 @@ static void show_stalled_task_trace(struct task_struct *t, bool *firstreport)
".I"[READ_ONCE(t->trc_ipi_to_cpu) > 0],
".i"[is_idle_task(t)],
".N"[cpu > 0 && tick_nohz_full_cpu(cpu)],
- t->trc_reader_nesting,
- " N"[!!t->trc_reader_special.b.need_qs],
+ READ_ONCE(t->trc_reader_nesting),
+ " N"[!!READ_ONCE(t->trc_reader_special.b.need_qs)],
cpu);
sched_show_task(t);
}
@@ -1187,7 +1197,7 @@ static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp)
static void exit_tasks_rcu_finish_trace(struct task_struct *t)
{
WRITE_ONCE(t->trc_reader_checked, true);
- WARN_ON_ONCE(t->trc_reader_nesting);
+ WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting));
WRITE_ONCE(t->trc_reader_nesting, 0);
if (WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)))
rcu_read_unlock_trace_special(t, 0);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 51f24ecd94b2..bce848e50512 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -74,17 +74,10 @@
/* Data structures. */
-/*
- * Steal a bit from the bottom of ->dynticks for idle entry/exit
- * control. Initially this is for TLB flushing.
- */
-#define RCU_DYNTICK_CTRL_MASK 0x1
-#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1)
-
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
.dynticks_nesting = 1,
.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
- .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
+ .dynticks = ATOMIC_INIT(1),
#ifdef CONFIG_RCU_NOCB_CPU
.cblist.flags = SEGCBLIST_SOFTIRQ_ONLY,
#endif
@@ -259,6 +252,15 @@ void rcu_softirq_qs(void)
}
/*
+ * Increment the current CPU's rcu_data structure's ->dynticks field
+ * with ordering. Return the new value.
+ */
+static noinline noinstr unsigned long rcu_dynticks_inc(int incby)
+{
+ return arch_atomic_add_return(incby, this_cpu_ptr(&rcu_data.dynticks));
+}
+
+/*
* Record entry into an extended quiescent state. This is only to be
* called when not already in an extended quiescent state, that is,
* RCU is watching prior to the call to this function and is no longer
@@ -266,7 +268,6 @@ void rcu_softirq_qs(void)
*/
static noinstr void rcu_dynticks_eqs_enter(void)
{
- struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
int seq;
/*
@@ -275,13 +276,9 @@ static noinstr void rcu_dynticks_eqs_enter(void)
* next idle sojourn.
*/
rcu_dynticks_task_trace_enter(); // Before ->dynticks update!
- seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
+ seq = rcu_dynticks_inc(1);
// RCU is no longer watching. Better be in extended quiescent state!
- WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
- (seq & RCU_DYNTICK_CTRL_CTR));
- /* Better not have special action (TLB flush) pending! */
- WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
- (seq & RCU_DYNTICK_CTRL_MASK));
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & 0x1));
}
/*
@@ -291,7 +288,6 @@ static noinstr void rcu_dynticks_eqs_enter(void)
*/
static noinstr void rcu_dynticks_eqs_exit(void)
{
- struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
int seq;
/*
@@ -299,15 +295,10 @@ static noinstr void rcu_dynticks_eqs_exit(void)
* and we also must force ordering with the next RCU read-side
* critical section.
*/
- seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
+ seq = rcu_dynticks_inc(1);
// RCU is now watching. Better not be in an extended quiescent state!
rcu_dynticks_task_trace_exit(); // After ->dynticks update!
- WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
- !(seq & RCU_DYNTICK_CTRL_CTR));
- if (seq & RCU_DYNTICK_CTRL_MASK) {
- arch_atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
- smp_mb__after_atomic(); /* _exit after clearing mask. */
- }
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & 0x1));
}
/*
@@ -324,9 +315,9 @@ static void rcu_dynticks_eqs_online(void)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
- if (atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR)
+ if (atomic_read(&rdp->dynticks) & 0x1)
return;
- atomic_add(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
+ rcu_dynticks_inc(1);
}
/*
@@ -336,9 +327,7 @@ static void rcu_dynticks_eqs_online(void)
*/
static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
{
- struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
-
- return !(arch_atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR);
+ return !(atomic_read(this_cpu_ptr(&rcu_data.dynticks)) & 0x1);
}
/*
@@ -347,9 +336,8 @@ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
*/
static int rcu_dynticks_snap(struct rcu_data *rdp)
{
- int snap = atomic_add_return(0, &rdp->dynticks);
-
- return snap & ~RCU_DYNTICK_CTRL_MASK;
+ smp_mb(); // Fundamental RCU ordering guarantee.
+ return atomic_read_acquire(&rdp->dynticks);
}
/*
@@ -358,7 +346,7 @@ static int rcu_dynticks_snap(struct rcu_data *rdp)
*/
static bool rcu_dynticks_in_eqs(int snap)
{
- return !(snap & RCU_DYNTICK_CTRL_CTR);
+ return !(snap & 0x1);
}
/* Return true if the specified CPU is currently idle from an RCU viewpoint. */
@@ -389,8 +377,7 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp)
int snap;
// If not quiescent, force back to earlier extended quiescent state.
- snap = atomic_read(&rdp->dynticks) & ~(RCU_DYNTICK_CTRL_MASK |
- RCU_DYNTICK_CTRL_CTR);
+ snap = atomic_read(&rdp->dynticks) & ~0x1;
smp_rmb(); // Order ->dynticks and *vp reads.
if (READ_ONCE(*vp))
@@ -398,32 +385,7 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp)
smp_rmb(); // Order *vp read and ->dynticks re-read.
// If still in the same extended quiescent state, we are good!
- return snap == (atomic_read(&rdp->dynticks) & ~RCU_DYNTICK_CTRL_MASK);
-}
-
-/*
- * Set the special (bottom) bit of the specified CPU so that it
- * will take special action (such as flushing its TLB) on the
- * next exit from an extended quiescent state. Returns true if
- * the bit was successfully set, or false if the CPU was not in
- * an extended quiescent state.
- */
-bool rcu_eqs_special_set(int cpu)
-{
- int old;
- int new;
- int new_old;
- struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
-
- new_old = atomic_read(&rdp->dynticks);
- do {
- old = new_old;
- if (old & RCU_DYNTICK_CTRL_CTR)
- return false;
- new = old | RCU_DYNTICK_CTRL_MASK;
- new_old = atomic_cmpxchg(&rdp->dynticks, old, new);
- } while (new_old != old);
- return true;
+ return snap == atomic_read(&rdp->dynticks);
}
/*
@@ -439,13 +401,12 @@ bool rcu_eqs_special_set(int cpu)
*/
notrace void rcu_momentary_dyntick_idle(void)
{
- int special;
+ int seq;
raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
- special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,
- &this_cpu_ptr(&rcu_data)->dynticks);
+ seq = rcu_dynticks_inc(2);
/* It is illegal to call this from idle state. */
- WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
+ WARN_ON_ONCE(!(seq & 0x1));
rcu_preempt_deferred_qs(current);
}
EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle);
@@ -1325,7 +1286,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
*/
jtsq = READ_ONCE(jiffies_to_sched_qs);
ruqp = per_cpu_ptr(&rcu_data.rcu_urgent_qs, rdp->cpu);
- rnhqp = &per_cpu(rcu_data.rcu_need_heavy_qs, rdp->cpu);
+ rnhqp = per_cpu_ptr(&rcu_data.rcu_need_heavy_qs, rdp->cpu);
if (!READ_ONCE(*rnhqp) &&
(time_after(jiffies, rcu_state.gp_start + jtsq * 2) ||
time_after(jiffies, rcu_state.jiffies_resched) ||
@@ -1772,7 +1733,7 @@ static void rcu_strict_gp_boundary(void *unused)
/*
* Initialize a new grace period. Return false if no grace period required.
*/
-static bool rcu_gp_init(void)
+static noinline_for_stack bool rcu_gp_init(void)
{
unsigned long firstseq;
unsigned long flags;
@@ -1966,7 +1927,7 @@ static void rcu_gp_fqs(bool first_time)
/*
* Loop doing repeated quiescent-state forcing until the grace period ends.
*/
-static void rcu_gp_fqs_loop(void)
+static noinline_for_stack void rcu_gp_fqs_loop(void)
{
bool first_gp_fqs;
int gf = 0;
@@ -1993,8 +1954,8 @@ static void rcu_gp_fqs_loop(void)
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
TPS("fqswait"));
WRITE_ONCE(rcu_state.gp_state, RCU_GP_WAIT_FQS);
- ret = swait_event_idle_timeout_exclusive(
- rcu_state.gp_wq, rcu_gp_fqs_check_wake(&gf), j);
+ (void)swait_event_idle_timeout_exclusive(rcu_state.gp_wq,
+ rcu_gp_fqs_check_wake(&gf), j);
rcu_gp_torture_wait();
WRITE_ONCE(rcu_state.gp_state, RCU_GP_DOING_FQS);
/* Locking provides needed memory barriers. */
@@ -2471,9 +2432,6 @@ int rcutree_dead_cpu(unsigned int cpu)
WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1);
/* Adjust any no-longer-needed kthreads. */
rcu_boost_kthread_setaffinity(rnp, -1);
- /* Do any needed no-CB deferred wakeups from this CPU. */
- do_nocb_deferred_wakeup(per_cpu_ptr(&rcu_data, cpu));
-
// Stop-machine done, so allow nohz_full to disable tick.
tick_dep_clear(TICK_DEP_BIT_RCU);
return 0;
@@ -4050,7 +4008,7 @@ void rcu_barrier(void)
*/
init_completion(&rcu_state.barrier_completion);
atomic_set(&rcu_state.barrier_cpu_count, 2);
- get_online_cpus();
+ cpus_read_lock();
/*
* Force each CPU with callbacks to register a new callback.
@@ -4081,7 +4039,7 @@ void rcu_barrier(void)
rcu_state.barrier_sequence);
}
}
- put_online_cpus();
+ cpus_read_unlock();
/*
* Now that we have an rcu_barrier_callback() callback on each
@@ -4784,4 +4742,5 @@ void __init rcu_init(void)
#include "tree_stall.h"
#include "tree_exp.h"
+#include "tree_nocb.h"
#include "tree_plugin.h"
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
new file mode 100644
index 000000000000..8fdf44f8523f
--- /dev/null
+++ b/kernel/rcu/tree_nocb.h
@@ -0,0 +1,1496 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Read-Copy Update mechanism for mutual exclusion (tree-based version)
+ * Internal non-public definitions that provide either classic
+ * or preemptible semantics.
+ *
+ * Copyright Red Hat, 2009
+ * Copyright IBM Corporation, 2009
+ * Copyright SUSE, 2021
+ *
+ * Author: Ingo Molnar <mingo@elte.hu>
+ * Paul E. McKenney <paulmck@linux.ibm.com>
+ * Frederic Weisbecker <frederic@kernel.org>
+ */
+
+#ifdef CONFIG_RCU_NOCB_CPU
+static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
+static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
+static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp)
+{
+ return lockdep_is_held(&rdp->nocb_lock);
+}
+
+static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
+{
+ /* Race on early boot between thread creation and assignment */
+ if (!rdp->nocb_cb_kthread || !rdp->nocb_gp_kthread)
+ return true;
+
+ if (current == rdp->nocb_cb_kthread || current == rdp->nocb_gp_kthread)
+ if (in_task())
+ return true;
+ return false;
+}
+
+/*
+ * Offload callback processing from the boot-time-specified set of CPUs
+ * specified by rcu_nocb_mask. For the CPUs in the set, there are kthreads
+ * created that pull the callbacks from the corresponding CPU, wait for
+ * a grace period to elapse, and invoke the callbacks. These kthreads
+ * are organized into GP kthreads, which manage incoming callbacks, wait for
+ * grace periods, and awaken CB kthreads, and the CB kthreads, which only
+ * invoke callbacks. Each GP kthread invokes its own CBs. The no-CBs CPUs
+ * do a wake_up() on their GP kthread when they insert a callback into any
+ * empty list, unless the rcu_nocb_poll boot parameter has been specified,
+ * in which case each kthread actively polls its CPU. (Which isn't so great
+ * for energy efficiency, but which does reduce RCU's overhead on that CPU.)
+ *
+ * This is intended to be used in conjunction with Frederic Weisbecker's
+ * adaptive-idle work, which would seriously reduce OS jitter on CPUs
+ * running CPU-bound user-mode computations.
+ *
+ * Offloading of callbacks can also be used as an energy-efficiency
+ * measure because CPUs with no RCU callbacks queued are more aggressive
+ * about entering dyntick-idle mode.
+ */
+
+
+/*
+ * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters.
+ * If the list is invalid, a warning is emitted and all CPUs are offloaded.
+ */
+static int __init rcu_nocb_setup(char *str)
+{
+ alloc_bootmem_cpumask_var(&rcu_nocb_mask);
+ if (cpulist_parse(str, rcu_nocb_mask)) {
+ pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n");
+ cpumask_setall(rcu_nocb_mask);
+ }
+ return 1;
+}
+__setup("rcu_nocbs=", rcu_nocb_setup);
+
+static int __init parse_rcu_nocb_poll(char *arg)
+{
+ rcu_nocb_poll = true;
+ return 0;
+}
+early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
+
+/*
+ * Don't bother bypassing ->cblist if the call_rcu() rate is low.
+ * After all, the main point of bypassing is to avoid lock contention
+ * on ->nocb_lock, which only can happen at high call_rcu() rates.
+ */
+static int nocb_nobypass_lim_per_jiffy = 16 * 1000 / HZ;
+module_param(nocb_nobypass_lim_per_jiffy, int, 0);
+
+/*
+ * Acquire the specified rcu_data structure's ->nocb_bypass_lock. If the
+ * lock isn't immediately available, increment ->nocb_lock_contended to
+ * flag the contention.
+ */
+static void rcu_nocb_bypass_lock(struct rcu_data *rdp)
+ __acquires(&rdp->nocb_bypass_lock)
+{
+ lockdep_assert_irqs_disabled();
+ if (raw_spin_trylock(&rdp->nocb_bypass_lock))
+ return;
+ atomic_inc(&rdp->nocb_lock_contended);
+ WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
+ smp_mb__after_atomic(); /* atomic_inc() before lock. */
+ raw_spin_lock(&rdp->nocb_bypass_lock);
+ smp_mb__before_atomic(); /* atomic_dec() after lock. */
+ atomic_dec(&rdp->nocb_lock_contended);
+}
+
+/*
+ * Spinwait until the specified rcu_data structure's ->nocb_lock is
+ * not contended. Please note that this is extremely special-purpose,
+ * relying on the fact that at most two kthreads and one CPU contend for
+ * this lock, and also that the two kthreads are guaranteed to have frequent
+ * grace-period-duration time intervals between successive acquisitions
+ * of the lock. This allows us to use an extremely simple throttling
+ * mechanism, and further to apply it only to the CPU doing floods of
+ * call_rcu() invocations. Don't try this at home!
+ */
+static void rcu_nocb_wait_contended(struct rcu_data *rdp)
+{
+ WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
+ while (WARN_ON_ONCE(atomic_read(&rdp->nocb_lock_contended)))
+ cpu_relax();
+}
+
+/*
+ * Conditionally acquire the specified rcu_data structure's
+ * ->nocb_bypass_lock.
+ */
+static bool rcu_nocb_bypass_trylock(struct rcu_data *rdp)
+{
+ lockdep_assert_irqs_disabled();
+ return raw_spin_trylock(&rdp->nocb_bypass_lock);
+}
+
+/*
+ * Release the specified rcu_data structure's ->nocb_bypass_lock.
+ */
+static void rcu_nocb_bypass_unlock(struct rcu_data *rdp)
+ __releases(&rdp->nocb_bypass_lock)
+{
+ lockdep_assert_irqs_disabled();
+ raw_spin_unlock(&rdp->nocb_bypass_lock);
+}
+
+/*
+ * Acquire the specified rcu_data structure's ->nocb_lock, but only
+ * if it corresponds to a no-CBs CPU.
+ */
+static void rcu_nocb_lock(struct rcu_data *rdp)
+{
+ lockdep_assert_irqs_disabled();
+ if (!rcu_rdp_is_offloaded(rdp))
+ return;
+ raw_spin_lock(&rdp->nocb_lock);
+}
+
+/*
+ * Release the specified rcu_data structure's ->nocb_lock, but only
+ * if it corresponds to a no-CBs CPU.
+ */
+static void rcu_nocb_unlock(struct rcu_data *rdp)
+{
+ if (rcu_rdp_is_offloaded(rdp)) {
+ lockdep_assert_irqs_disabled();
+ raw_spin_unlock(&rdp->nocb_lock);
+ }
+}
+
+/*
+ * Release the specified rcu_data structure's ->nocb_lock and restore
+ * interrupts, but only if it corresponds to a no-CBs CPU.
+ */
+static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
+ unsigned long flags)
+{
+ if (rcu_rdp_is_offloaded(rdp)) {
+ lockdep_assert_irqs_disabled();
+ raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
+ } else {
+ local_irq_restore(flags);
+ }
+}
+
+/* Lockdep check that ->cblist may be safely accessed. */
+static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
+{
+ lockdep_assert_irqs_disabled();
+ if (rcu_rdp_is_offloaded(rdp))
+ lockdep_assert_held(&rdp->nocb_lock);
+}
+
+/*
+ * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
+ * grace period.
+ */
+static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
+{
+ swake_up_all(sq);
+}
+
+static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
+{
+ return &rnp->nocb_gp_wq[rcu_seq_ctr(rnp->gp_seq) & 0x1];
+}
+
+static void rcu_init_one_nocb(struct rcu_node *rnp)
+{
+ init_swait_queue_head(&rnp->nocb_gp_wq[0]);
+ init_swait_queue_head(&rnp->nocb_gp_wq[1]);
+}
+
+/* Is the specified CPU a no-CBs CPU? */
+bool rcu_is_nocb_cpu(int cpu)
+{
+ if (cpumask_available(rcu_nocb_mask))
+ return cpumask_test_cpu(cpu, rcu_nocb_mask);
+ return false;
+}
+
+static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
+ struct rcu_data *rdp,
+ bool force, unsigned long flags)
+ __releases(rdp_gp->nocb_gp_lock)
+{
+ bool needwake = false;
+
+ if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) {
+ raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
+ TPS("AlreadyAwake"));
+ return false;
+ }
+
+ if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
+ WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
+ del_timer(&rdp_gp->nocb_timer);
+ }
+
+ if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
+ WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
+ needwake = true;
+ }
+ raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
+ if (needwake) {
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake"));
+ wake_up_process(rdp_gp->nocb_gp_kthread);
+ }
+
+ return needwake;
+}
+
+/*
+ * Kick the GP kthread for this NOCB group.
+ */
+static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
+{
+ unsigned long flags;
+ struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
+
+ raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
+ return __wake_nocb_gp(rdp_gp, rdp, force, flags);
+}
+
+/*
+ * Arrange to wake the GP kthread for this NOCB group at some future
+ * time when it is safe to do so.
+ */
+static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
+ const char *reason)
+{
+ unsigned long flags;
+ struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
+
+ raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
+
+ /*
+ * Bypass wakeup overrides previous deferments. In case
+ * of callback storm, no need to wake up too early.
+ */
+ if (waketype == RCU_NOCB_WAKE_BYPASS) {
+ mod_timer(&rdp_gp->nocb_timer, jiffies + 2);
+ WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
+ } else {
+ if (rdp_gp->nocb_defer_wakeup < RCU_NOCB_WAKE)
+ mod_timer(&rdp_gp->nocb_timer, jiffies + 1);
+ if (rdp_gp->nocb_defer_wakeup < waketype)
+ WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
+ }
+
+ raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
+
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, reason);
+}
+
+/*
+ * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL.
+ * However, if there is a callback to be enqueued and if ->nocb_bypass
+ * proves to be initially empty, just return false because the no-CB GP
+ * kthread may need to be awakened in this case.
+ *
+ * Note that this function always returns true if rhp is NULL.
+ */
+static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+ unsigned long j)
+{
+ struct rcu_cblist rcl;
+
+ WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp));
+ rcu_lockdep_assert_cblist_protected(rdp);
+ lockdep_assert_held(&rdp->nocb_bypass_lock);
+ if (rhp && !rcu_cblist_n_cbs(&rdp->nocb_bypass)) {
+ raw_spin_unlock(&rdp->nocb_bypass_lock);
+ return false;
+ }
+ /* Note: ->cblist.len already accounts for ->nocb_bypass contents. */
+ if (rhp)
+ rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
+ rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);
+ rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl);
+ WRITE_ONCE(rdp->nocb_bypass_first, j);
+ rcu_nocb_bypass_unlock(rdp);
+ return true;
+}
+
+/*
+ * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL.
+ * However, if there is a callback to be enqueued and if ->nocb_bypass
+ * proves to be initially empty, just return false because the no-CB GP
+ * kthread may need to be awakened in this case.
+ *
+ * Note that this function always returns true if rhp is NULL.
+ */
+static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+ unsigned long j)
+{
+ if (!rcu_rdp_is_offloaded(rdp))
+ return true;
+ rcu_lockdep_assert_cblist_protected(rdp);
+ rcu_nocb_bypass_lock(rdp);
+ return rcu_nocb_do_flush_bypass(rdp, rhp, j);
+}
+
+/*
+ * If the ->nocb_bypass_lock is immediately available, flush the
+ * ->nocb_bypass queue into ->cblist.
+ */
+static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
+{
+ rcu_lockdep_assert_cblist_protected(rdp);
+ if (!rcu_rdp_is_offloaded(rdp) ||
+ !rcu_nocb_bypass_trylock(rdp))
+ return;
+ WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j));
+}
+
+/*
+ * See whether it is appropriate to use the ->nocb_bypass list in order
+ * to control contention on ->nocb_lock. A limited number of direct
+ * enqueues are permitted into ->cblist per jiffy. If ->nocb_bypass
+ * is non-empty, further callbacks must be placed into ->nocb_bypass,
+ * otherwise rcu_barrier() breaks. Use rcu_nocb_flush_bypass() to switch
+ * back to direct use of ->cblist. However, ->nocb_bypass should not be
+ * used if ->cblist is empty, because otherwise callbacks can be stranded
+ * on ->nocb_bypass because we cannot count on the current CPU ever again
+ * invoking call_rcu(). The general rule is that if ->nocb_bypass is
+ * non-empty, the corresponding no-CBs grace-period kthread must not be
+ * in an indefinite sleep state.
+ *
+ * Finally, it is not permitted to use the bypass during early boot,
+ * as doing so would confuse the auto-initialization code. Besides
+ * which, there is no point in worrying about lock contention while
+ * there is only one CPU in operation.
+ */
+static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+ bool *was_alldone, unsigned long flags)
+{
+ unsigned long c;
+ unsigned long cur_gp_seq;
+ unsigned long j = jiffies;
+ long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
+
+ lockdep_assert_irqs_disabled();
+
+ // Pure softirq/rcuc based processing: no bypassing, no
+ // locking.
+ if (!rcu_rdp_is_offloaded(rdp)) {
+ *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
+ return false;
+ }
+
+ // In the process of (de-)offloading: no bypassing, but
+ // locking.
+ if (!rcu_segcblist_completely_offloaded(&rdp->cblist)) {
+ rcu_nocb_lock(rdp);
+ *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
+ return false; /* Not offloaded, no bypassing. */
+ }
+
+ // Don't use ->nocb_bypass during early boot.
+ if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {
+ rcu_nocb_lock(rdp);
+ WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
+ *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
+ return false;
+ }
+
+ // If we have advanced to a new jiffy, reset counts to allow
+ // moving back from ->nocb_bypass to ->cblist.
+ if (j == rdp->nocb_nobypass_last) {
+ c = rdp->nocb_nobypass_count + 1;
+ } else {
+ WRITE_ONCE(rdp->nocb_nobypass_last, j);
+ c = rdp->nocb_nobypass_count - nocb_nobypass_lim_per_jiffy;
+ if (ULONG_CMP_LT(rdp->nocb_nobypass_count,
+ nocb_nobypass_lim_per_jiffy))
+ c = 0;
+ else if (c > nocb_nobypass_lim_per_jiffy)
+ c = nocb_nobypass_lim_per_jiffy;
+ }
+ WRITE_ONCE(rdp->nocb_nobypass_count, c);
+
+ // If there hasn't yet been all that many ->cblist enqueues
+ // this jiffy, tell the caller to enqueue onto ->cblist. But flush
+ // ->nocb_bypass first.
+ if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy) {
+ rcu_nocb_lock(rdp);
+ *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
+ if (*was_alldone)
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
+ TPS("FirstQ"));
+ WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j));
+ WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
+ return false; // Caller must enqueue the callback.
+ }
+
+ // If ->nocb_bypass has been used too long or is too full,
+ // flush ->nocb_bypass to ->cblist.
+ if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) ||
+ ncbs >= qhimark) {
+ rcu_nocb_lock(rdp);
+ if (!rcu_nocb_flush_bypass(rdp, rhp, j)) {
+ *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
+ if (*was_alldone)
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
+ TPS("FirstQ"));
+ WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
+ return false; // Caller must enqueue the callback.
+ }
+ if (j != rdp->nocb_gp_adv_time &&
+ rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
+ rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
+ rcu_advance_cbs_nowake(rdp->mynode, rdp);
+ rdp->nocb_gp_adv_time = j;
+ }
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ return true; // Callback already enqueued.
+ }
+
+ // We need to use the bypass.
+ rcu_nocb_wait_contended(rdp);
+ rcu_nocb_bypass_lock(rdp);
+ ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
+ rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
+ rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
+ if (!ncbs) {
+ WRITE_ONCE(rdp->nocb_bypass_first, j);
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ"));
+ }
+ rcu_nocb_bypass_unlock(rdp);
+ smp_mb(); /* Order enqueue before wake. */
+ if (ncbs) {
+ local_irq_restore(flags);
+ } else {
+ // No-CBs GP kthread might be indefinitely asleep, if so, wake.
+ rcu_nocb_lock(rdp); // Rare during call_rcu() flood.
+ if (!rcu_segcblist_pend_cbs(&rdp->cblist)) {
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
+ TPS("FirstBQwake"));
+ __call_rcu_nocb_wake(rdp, true, flags);
+ } else {
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
+ TPS("FirstBQnoWake"));
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ }
+ }
+ return true; // Callback already enqueued.
+}
+
+/*
+ * Awaken the no-CBs grace-period kthread if needed, either due to it
+ * legitimately being asleep or due to overload conditions.
+ *
+ * If warranted, also wake up the kthread servicing this CPUs queues.
+ */
+static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
+ unsigned long flags)
+ __releases(rdp->nocb_lock)
+{
+ unsigned long cur_gp_seq;
+ unsigned long j;
+ long len;
+ struct task_struct *t;
+
+ // If we are being polled or there is no kthread, just leave.
+ t = READ_ONCE(rdp->nocb_gp_kthread);
+ if (rcu_nocb_poll || !t) {
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
+ TPS("WakeNotPoll"));
+ return;
+ }
+ // Need to actually to a wakeup.
+ len = rcu_segcblist_n_cbs(&rdp->cblist);
+ if (was_alldone) {
+ rdp->qlen_last_fqs_check = len;
+ if (!irqs_disabled_flags(flags)) {
+ /* ... if queue was empty ... */
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ wake_nocb_gp(rdp, false);
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
+ TPS("WakeEmpty"));
+ } else {
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
+ TPS("WakeEmptyIsDeferred"));
+ }
+ } else if (len > rdp->qlen_last_fqs_check + qhimark) {
+ /* ... or if many callbacks queued. */
+ rdp->qlen_last_fqs_check = len;
+ j = jiffies;
+ if (j != rdp->nocb_gp_adv_time &&
+ rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
+ rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
+ rcu_advance_cbs_nowake(rdp->mynode, rdp);
+ rdp->nocb_gp_adv_time = j;
+ }
+ smp_mb(); /* Enqueue before timer_pending(). */
+ if ((rdp->nocb_cb_sleep ||
+ !rcu_segcblist_ready_cbs(&rdp->cblist)) &&
+ !timer_pending(&rdp->nocb_timer)) {
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
+ TPS("WakeOvfIsDeferred"));
+ } else {
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
+ }
+ } else {
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
+ }
+ return;
+}
+
+/*
+ * Check if we ignore this rdp.
+ *
+ * We check that without holding the nocb lock but
+ * we make sure not to miss a freshly offloaded rdp
+ * with the current ordering:
+ *
+ * rdp_offload_toggle() nocb_gp_enabled_cb()
+ * ------------------------- ----------------------------
+ * WRITE flags LOCK nocb_gp_lock
+ * LOCK nocb_gp_lock READ/WRITE nocb_gp_sleep
+ * READ/WRITE nocb_gp_sleep UNLOCK nocb_gp_lock
+ * UNLOCK nocb_gp_lock READ flags
+ */
+static inline bool nocb_gp_enabled_cb(struct rcu_data *rdp)
+{
+ u8 flags = SEGCBLIST_OFFLOADED | SEGCBLIST_KTHREAD_GP;
+
+ return rcu_segcblist_test_flags(&rdp->cblist, flags);
+}
+
+static inline bool nocb_gp_update_state_deoffloading(struct rcu_data *rdp,
+ bool *needwake_state)
+{
+ struct rcu_segcblist *cblist = &rdp->cblist;
+
+ if (rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED)) {
+ if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP)) {
+ rcu_segcblist_set_flags(cblist, SEGCBLIST_KTHREAD_GP);
+ if (rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB))
+ *needwake_state = true;
+ }
+ return false;
+ }
+
+ /*
+ * De-offloading. Clear our flag and notify the de-offload worker.
+ * We will ignore this rdp until it ever gets re-offloaded.
+ */
+ WARN_ON_ONCE(!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
+ rcu_segcblist_clear_flags(cblist, SEGCBLIST_KTHREAD_GP);
+ if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB))
+ *needwake_state = true;
+ return true;
+}
+
+
+/*
+ * No-CBs GP kthreads come here to wait for additional callbacks to show up
+ * or for grace periods to end.
+ */
+static void nocb_gp_wait(struct rcu_data *my_rdp)
+{
+ bool bypass = false;
+ long bypass_ncbs;
+ int __maybe_unused cpu = my_rdp->cpu;
+ unsigned long cur_gp_seq;
+ unsigned long flags;
+ bool gotcbs = false;
+ unsigned long j = jiffies;
+ bool needwait_gp = false; // This prevents actual uninitialized use.
+ bool needwake;
+ bool needwake_gp;
+ struct rcu_data *rdp;
+ struct rcu_node *rnp;
+ unsigned long wait_gp_seq = 0; // Suppress "use uninitialized" warning.
+ bool wasempty = false;
+
+ /*
+ * Each pass through the following loop checks for CBs and for the
+ * nearest grace period (if any) to wait for next. The CB kthreads
+ * and the global grace-period kthread are awakened if needed.
+ */
+ WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp);
+ for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) {
+ bool needwake_state = false;
+
+ if (!nocb_gp_enabled_cb(rdp))
+ continue;
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check"));
+ rcu_nocb_lock_irqsave(rdp, flags);
+ if (nocb_gp_update_state_deoffloading(rdp, &needwake_state)) {
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ if (needwake_state)
+ swake_up_one(&rdp->nocb_state_wq);
+ continue;
+ }
+ bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
+ if (bypass_ncbs &&
+ (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) ||
+ bypass_ncbs > 2 * qhimark)) {
+ // Bypass full or old, so flush it.
+ (void)rcu_nocb_try_flush_bypass(rdp, j);
+ bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
+ } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ if (needwake_state)
+ swake_up_one(&rdp->nocb_state_wq);
+ continue; /* No callbacks here, try next. */
+ }
+ if (bypass_ncbs) {
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
+ TPS("Bypass"));
+ bypass = true;
+ }
+ rnp = rdp->mynode;
+
+ // Advance callbacks if helpful and low contention.
+ needwake_gp = false;
+ if (!rcu_segcblist_restempty(&rdp->cblist,
+ RCU_NEXT_READY_TAIL) ||
+ (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
+ rcu_seq_done(&rnp->gp_seq, cur_gp_seq))) {
+ raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
+ needwake_gp = rcu_advance_cbs(rnp, rdp);
+ wasempty = rcu_segcblist_restempty(&rdp->cblist,
+ RCU_NEXT_READY_TAIL);
+ raw_spin_unlock_rcu_node(rnp); /* irqs disabled. */
+ }
+ // Need to wait on some grace period?
+ WARN_ON_ONCE(wasempty &&
+ !rcu_segcblist_restempty(&rdp->cblist,
+ RCU_NEXT_READY_TAIL));
+ if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq)) {
+ if (!needwait_gp ||
+ ULONG_CMP_LT(cur_gp_seq, wait_gp_seq))
+ wait_gp_seq = cur_gp_seq;
+ needwait_gp = true;
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
+ TPS("NeedWaitGP"));
+ }
+ if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
+ needwake = rdp->nocb_cb_sleep;
+ WRITE_ONCE(rdp->nocb_cb_sleep, false);
+ smp_mb(); /* CB invocation -after- GP end. */
+ } else {
+ needwake = false;
+ }
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ if (needwake) {
+ swake_up_one(&rdp->nocb_cb_wq);
+ gotcbs = true;
+ }
+ if (needwake_gp)
+ rcu_gp_kthread_wake();
+ if (needwake_state)
+ swake_up_one(&rdp->nocb_state_wq);
+ }
+
+ my_rdp->nocb_gp_bypass = bypass;
+ my_rdp->nocb_gp_gp = needwait_gp;
+ my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0;
+
+ if (bypass && !rcu_nocb_poll) {
+ // At least one child with non-empty ->nocb_bypass, so set
+ // timer in order to avoid stranding its callbacks.
+ wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS,
+ TPS("WakeBypassIsDeferred"));
+ }
+ if (rcu_nocb_poll) {
+ /* Polling, so trace if first poll in the series. */
+ if (gotcbs)
+ trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
+ schedule_timeout_idle(1);
+ } else if (!needwait_gp) {
+ /* Wait for callbacks to appear. */
+ trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));
+ swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq,
+ !READ_ONCE(my_rdp->nocb_gp_sleep));
+ trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("EndSleep"));
+ } else {
+ rnp = my_rdp->mynode;
+ trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("StartWait"));
+ swait_event_interruptible_exclusive(
+ rnp->nocb_gp_wq[rcu_seq_ctr(wait_gp_seq) & 0x1],
+ rcu_seq_done(&rnp->gp_seq, wait_gp_seq) ||
+ !READ_ONCE(my_rdp->nocb_gp_sleep));
+ trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("EndWait"));
+ }
+ if (!rcu_nocb_poll) {
+ raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
+ if (my_rdp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
+ WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
+ del_timer(&my_rdp->nocb_timer);
+ }
+ WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
+ raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
+ }
+ my_rdp->nocb_gp_seq = -1;
+ WARN_ON(signal_pending(current));
+}
+
+/*
+ * No-CBs grace-period-wait kthread. There is one of these per group
+ * of CPUs, but only once at least one CPU in that group has come online
+ * at least once since boot. This kthread checks for newly posted
+ * callbacks from any of the CPUs it is responsible for, waits for a
+ * grace period, then awakens all of the rcu_nocb_cb_kthread() instances
+ * that then have callback-invocation work to do.
+ */
+static int rcu_nocb_gp_kthread(void *arg)
+{
+ struct rcu_data *rdp = arg;
+
+ for (;;) {
+ WRITE_ONCE(rdp->nocb_gp_loops, rdp->nocb_gp_loops + 1);
+ nocb_gp_wait(rdp);
+ cond_resched_tasks_rcu_qs();
+ }
+ return 0;
+}
+
+static inline bool nocb_cb_can_run(struct rcu_data *rdp)
+{
+ u8 flags = SEGCBLIST_OFFLOADED | SEGCBLIST_KTHREAD_CB;
+ return rcu_segcblist_test_flags(&rdp->cblist, flags);
+}
+
+static inline bool nocb_cb_wait_cond(struct rcu_data *rdp)
+{
+ return nocb_cb_can_run(rdp) && !READ_ONCE(rdp->nocb_cb_sleep);
+}
+
+/*
+ * Invoke any ready callbacks from the corresponding no-CBs CPU,
+ * then, if there are no more, wait for more to appear.
+ */
+static void nocb_cb_wait(struct rcu_data *rdp)
+{
+ struct rcu_segcblist *cblist = &rdp->cblist;
+ unsigned long cur_gp_seq;
+ unsigned long flags;
+ bool needwake_state = false;
+ bool needwake_gp = false;
+ bool can_sleep = true;
+ struct rcu_node *rnp = rdp->mynode;
+
+ local_irq_save(flags);
+ rcu_momentary_dyntick_idle();
+ local_irq_restore(flags);
+ /*
+ * Disable BH to provide the expected environment. Also, when
+ * transitioning to/from NOCB mode, a self-requeuing callback might
+ * be invoked from softirq. A short grace period could cause both
+ * instances of this callback would execute concurrently.
+ */
+ local_bh_disable();
+ rcu_do_batch(rdp);
+ local_bh_enable();
+ lockdep_assert_irqs_enabled();
+ rcu_nocb_lock_irqsave(rdp, flags);
+ if (rcu_segcblist_nextgp(cblist, &cur_gp_seq) &&
+ rcu_seq_done(&rnp->gp_seq, cur_gp_seq) &&
+ raw_spin_trylock_rcu_node(rnp)) { /* irqs already disabled. */
+ needwake_gp = rcu_advance_cbs(rdp->mynode, rdp);
+ raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
+ }
+
+ if (rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED)) {
+ if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB)) {
+ rcu_segcblist_set_flags(cblist, SEGCBLIST_KTHREAD_CB);
+ if (rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP))
+ needwake_state = true;
+ }
+ if (rcu_segcblist_ready_cbs(cblist))
+ can_sleep = false;
+ } else {
+ /*
+ * De-offloading. Clear our flag and notify the de-offload worker.
+ * We won't touch the callbacks and keep sleeping until we ever
+ * get re-offloaded.
+ */
+ WARN_ON_ONCE(!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB));
+ rcu_segcblist_clear_flags(cblist, SEGCBLIST_KTHREAD_CB);
+ if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP))
+ needwake_state = true;
+ }
+
+ WRITE_ONCE(rdp->nocb_cb_sleep, can_sleep);
+
+ if (rdp->nocb_cb_sleep)
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep"));
+
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ if (needwake_gp)
+ rcu_gp_kthread_wake();
+
+ if (needwake_state)
+ swake_up_one(&rdp->nocb_state_wq);
+
+ do {
+ swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
+ nocb_cb_wait_cond(rdp));
+
+ // VVV Ensure CB invocation follows _sleep test.
+ if (smp_load_acquire(&rdp->nocb_cb_sleep)) { // ^^^
+ WARN_ON(signal_pending(current));
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
+ }
+ } while (!nocb_cb_can_run(rdp));
+}
+
+/*
+ * Per-rcu_data kthread, but only for no-CBs CPUs. Repeatedly invoke
+ * nocb_cb_wait() to do the dirty work.
+ */
+static int rcu_nocb_cb_kthread(void *arg)
+{
+ struct rcu_data *rdp = arg;
+
+ // Each pass through this loop does one callback batch, and,
+ // if there are no more ready callbacks, waits for them.
+ for (;;) {
+ nocb_cb_wait(rdp);
+ cond_resched_tasks_rcu_qs();
+ }
+ return 0;
+}
+
+/* Is a deferred wakeup of rcu_nocb_kthread() required? */
+static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level)
+{
+ return READ_ONCE(rdp->nocb_defer_wakeup) >= level;
+}
+
+/* Do a deferred wakeup of rcu_nocb_kthread(). */
+static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp_gp,
+ struct rcu_data *rdp, int level,
+ unsigned long flags)
+ __releases(rdp_gp->nocb_gp_lock)
+{
+ int ndw;
+ int ret;
+
+ if (!rcu_nocb_need_deferred_wakeup(rdp_gp, level)) {
+ raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
+ return false;
+ }
+
+ ndw = rdp_gp->nocb_defer_wakeup;
+ ret = __wake_nocb_gp(rdp_gp, rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
+
+ return ret;
+}
+
+/* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
+static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
+{
+ unsigned long flags;
+ struct rcu_data *rdp = from_timer(rdp, t, nocb_timer);
+
+ WARN_ON_ONCE(rdp->nocb_gp_rdp != rdp);
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Timer"));
+
+ raw_spin_lock_irqsave(&rdp->nocb_gp_lock, flags);
+ smp_mb__after_spinlock(); /* Timer expire before wakeup. */
+ do_nocb_deferred_wakeup_common(rdp, rdp, RCU_NOCB_WAKE_BYPASS, flags);
+}
+
+/*
+ * Do a deferred wakeup of rcu_nocb_kthread() from fastpath.
+ * This means we do an inexact common-case check. Note that if
+ * we miss, ->nocb_timer will eventually clean things up.
+ */
+static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
+{
+ unsigned long flags;
+ struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
+
+ if (!rdp_gp || !rcu_nocb_need_deferred_wakeup(rdp_gp, RCU_NOCB_WAKE))
+ return false;
+
+ raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
+ return do_nocb_deferred_wakeup_common(rdp_gp, rdp, RCU_NOCB_WAKE, flags);
+}
+
+void rcu_nocb_flush_deferred_wakeup(void)
+{
+ do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data));
+}
+EXPORT_SYMBOL_GPL(rcu_nocb_flush_deferred_wakeup);
+
+static int rdp_offload_toggle(struct rcu_data *rdp,
+ bool offload, unsigned long flags)
+ __releases(rdp->nocb_lock)
+{
+ struct rcu_segcblist *cblist = &rdp->cblist;
+ struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
+ bool wake_gp = false;
+
+ rcu_segcblist_offload(cblist, offload);
+
+ if (rdp->nocb_cb_sleep)
+ rdp->nocb_cb_sleep = false;
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+
+ /*
+ * Ignore former value of nocb_cb_sleep and force wake up as it could
+ * have been spuriously set to false already.
+ */
+ swake_up_one(&rdp->nocb_cb_wq);
+
+ raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
+ if (rdp_gp->nocb_gp_sleep) {
+ rdp_gp->nocb_gp_sleep = false;
+ wake_gp = true;
+ }
+ raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
+
+ if (wake_gp)
+ wake_up_process(rdp_gp->nocb_gp_kthread);
+
+ return 0;
+}
+
+static long rcu_nocb_rdp_deoffload(void *arg)
+{
+ struct rcu_data *rdp = arg;
+ struct rcu_segcblist *cblist = &rdp->cblist;
+ unsigned long flags;
+ int ret;
+
+ WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
+
+ pr_info("De-offloading %d\n", rdp->cpu);
+
+ rcu_nocb_lock_irqsave(rdp, flags);
+ /*
+ * Flush once and for all now. This suffices because we are
+ * running on the target CPU holding ->nocb_lock (thus having
+ * interrupts disabled), and because rdp_offload_toggle()
+ * invokes rcu_segcblist_offload(), which clears SEGCBLIST_OFFLOADED.
+ * Thus future calls to rcu_segcblist_completely_offloaded() will
+ * return false, which means that future calls to rcu_nocb_try_bypass()
+ * will refuse to put anything into the bypass.
+ */
+ WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
+ ret = rdp_offload_toggle(rdp, false, flags);
+ swait_event_exclusive(rdp->nocb_state_wq,
+ !rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB |
+ SEGCBLIST_KTHREAD_GP));
+ /*
+ * Lock one last time to acquire latest callback updates from kthreads
+ * so we can later handle callbacks locally without locking.
+ */
+ rcu_nocb_lock_irqsave(rdp, flags);
+ /*
+ * Theoretically we could set SEGCBLIST_SOFTIRQ_ONLY after the nocb
+ * lock is released but how about being paranoid for once?
+ */
+ rcu_segcblist_set_flags(cblist, SEGCBLIST_SOFTIRQ_ONLY);
+ /*
+ * With SEGCBLIST_SOFTIRQ_ONLY, we can't use
+ * rcu_nocb_unlock_irqrestore() anymore.
+ */
+ raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
+
+ /* Sanity check */
+ WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
+
+
+ return ret;
+}
+
+int rcu_nocb_cpu_deoffload(int cpu)
+{
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+ int ret = 0;
+
+ mutex_lock(&rcu_state.barrier_mutex);
+ cpus_read_lock();
+ if (rcu_rdp_is_offloaded(rdp)) {
+ if (cpu_online(cpu)) {
+ ret = work_on_cpu(cpu, rcu_nocb_rdp_deoffload, rdp);
+ if (!ret)
+ cpumask_clear_cpu(cpu, rcu_nocb_mask);
+ } else {
+ pr_info("NOCB: Can't CB-deoffload an offline CPU\n");
+ ret = -EINVAL;
+ }
+ }
+ cpus_read_unlock();
+ mutex_unlock(&rcu_state.barrier_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload);
+
+static long rcu_nocb_rdp_offload(void *arg)
+{
+ struct rcu_data *rdp = arg;
+ struct rcu_segcblist *cblist = &rdp->cblist;
+ unsigned long flags;
+ int ret;
+
+ WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
+ /*
+ * For now we only support re-offload, ie: the rdp must have been
+ * offloaded on boot first.
+ */
+ if (!rdp->nocb_gp_rdp)
+ return -EINVAL;
+
+ pr_info("Offloading %d\n", rdp->cpu);
+ /*
+ * Can't use rcu_nocb_lock_irqsave() while we are in
+ * SEGCBLIST_SOFTIRQ_ONLY mode.
+ */
+ raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
+
+ /*
+ * We didn't take the nocb lock while working on the
+ * rdp->cblist in SEGCBLIST_SOFTIRQ_ONLY mode.
+ * Every modifications that have been done previously on
+ * rdp->cblist must be visible remotely by the nocb kthreads
+ * upon wake up after reading the cblist flags.
+ *
+ * The layout against nocb_lock enforces that ordering:
+ *
+ * __rcu_nocb_rdp_offload() nocb_cb_wait()/nocb_gp_wait()
+ * ------------------------- ----------------------------
+ * WRITE callbacks rcu_nocb_lock()
+ * rcu_nocb_lock() READ flags
+ * WRITE flags READ callbacks
+ * rcu_nocb_unlock() rcu_nocb_unlock()
+ */
+ ret = rdp_offload_toggle(rdp, true, flags);
+ swait_event_exclusive(rdp->nocb_state_wq,
+ rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
+ rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
+
+ return ret;
+}
+
+int rcu_nocb_cpu_offload(int cpu)
+{
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+ int ret = 0;
+
+ mutex_lock(&rcu_state.barrier_mutex);
+ cpus_read_lock();
+ if (!rcu_rdp_is_offloaded(rdp)) {
+ if (cpu_online(cpu)) {
+ ret = work_on_cpu(cpu, rcu_nocb_rdp_offload, rdp);
+ if (!ret)
+ cpumask_set_cpu(cpu, rcu_nocb_mask);
+ } else {
+ pr_info("NOCB: Can't CB-offload an offline CPU\n");
+ ret = -EINVAL;
+ }
+ }
+ cpus_read_unlock();
+ mutex_unlock(&rcu_state.barrier_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload);
+
+void __init rcu_init_nohz(void)
+{
+ int cpu;
+ bool need_rcu_nocb_mask = false;
+ struct rcu_data *rdp;
+
+#if defined(CONFIG_NO_HZ_FULL)
+ if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
+ need_rcu_nocb_mask = true;
+#endif /* #if defined(CONFIG_NO_HZ_FULL) */
+
+ if (!cpumask_available(rcu_nocb_mask) && need_rcu_nocb_mask) {
+ if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
+ pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
+ return;
+ }
+ }
+ if (!cpumask_available(rcu_nocb_mask))
+ return;
+
+#if defined(CONFIG_NO_HZ_FULL)
+ if (tick_nohz_full_running)
+ cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
+#endif /* #if defined(CONFIG_NO_HZ_FULL) */
+
+ if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
+ pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
+ cpumask_and(rcu_nocb_mask, cpu_possible_mask,
+ rcu_nocb_mask);
+ }
+ if (cpumask_empty(rcu_nocb_mask))
+ pr_info("\tOffload RCU callbacks from CPUs: (none).\n");
+ else
+ pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
+ cpumask_pr_args(rcu_nocb_mask));
+ if (rcu_nocb_poll)
+ pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
+
+ for_each_cpu(cpu, rcu_nocb_mask) {
+ rdp = per_cpu_ptr(&rcu_data, cpu);
+ if (rcu_segcblist_empty(&rdp->cblist))
+ rcu_segcblist_init(&rdp->cblist);
+ rcu_segcblist_offload(&rdp->cblist, true);
+ rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB);
+ rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_GP);
+ }
+ rcu_organize_nocb_kthreads();
+}
+
+/* Initialize per-rcu_data variables for no-CBs CPUs. */
+static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
+{
+ init_swait_queue_head(&rdp->nocb_cb_wq);
+ init_swait_queue_head(&rdp->nocb_gp_wq);
+ init_swait_queue_head(&rdp->nocb_state_wq);
+ raw_spin_lock_init(&rdp->nocb_lock);
+ raw_spin_lock_init(&rdp->nocb_bypass_lock);
+ raw_spin_lock_init(&rdp->nocb_gp_lock);
+ timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
+ rcu_cblist_init(&rdp->nocb_bypass);
+}
+
+/*
+ * If the specified CPU is a no-CBs CPU that does not already have its
+ * rcuo CB kthread, spawn it. Additionally, if the rcuo GP kthread
+ * for this CPU's group has not yet been created, spawn it as well.
+ */
+static void rcu_spawn_one_nocb_kthread(int cpu)
+{
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+ struct rcu_data *rdp_gp;
+ struct task_struct *t;
+
+ /*
+ * If this isn't a no-CBs CPU or if it already has an rcuo kthread,
+ * then nothing to do.
+ */
+ if (!rcu_is_nocb_cpu(cpu) || rdp->nocb_cb_kthread)
+ return;
+
+ /* If we didn't spawn the GP kthread first, reorganize! */
+ rdp_gp = rdp->nocb_gp_rdp;
+ if (!rdp_gp->nocb_gp_kthread) {
+ t = kthread_run(rcu_nocb_gp_kthread, rdp_gp,
+ "rcuog/%d", rdp_gp->cpu);
+ if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__))
+ return;
+ WRITE_ONCE(rdp_gp->nocb_gp_kthread, t);
+ }
+
+ /* Spawn the kthread for this CPU. */
+ t = kthread_run(rcu_nocb_cb_kthread, rdp,
+ "rcuo%c/%d", rcu_state.abbr, cpu);
+ if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__))
+ return;
+ WRITE_ONCE(rdp->nocb_cb_kthread, t);
+ WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
+}
+
+/*
+ * If the specified CPU is a no-CBs CPU that does not already have its
+ * rcuo kthread, spawn it.
+ */
+static void rcu_spawn_cpu_nocb_kthread(int cpu)
+{
+ if (rcu_scheduler_fully_active)
+ rcu_spawn_one_nocb_kthread(cpu);
+}
+
+/*
+ * Once the scheduler is running, spawn rcuo kthreads for all online
+ * no-CBs CPUs. This assumes that the early_initcall()s happen before
+ * non-boot CPUs come online -- if this changes, we will need to add
+ * some mutual exclusion.
+ */
+static void __init rcu_spawn_nocb_kthreads(void)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ rcu_spawn_cpu_nocb_kthread(cpu);
+}
+
+/* How many CB CPU IDs per GP kthread? Default of -1 for sqrt(nr_cpu_ids). */
+static int rcu_nocb_gp_stride = -1;
+module_param(rcu_nocb_gp_stride, int, 0444);
+
+/*
+ * Initialize GP-CB relationships for all no-CBs CPU.
+ */
+static void __init rcu_organize_nocb_kthreads(void)
+{
+ int cpu;
+ bool firsttime = true;
+ bool gotnocbs = false;
+ bool gotnocbscbs = true;
+ int ls = rcu_nocb_gp_stride;
+ int nl = 0; /* Next GP kthread. */
+ struct rcu_data *rdp;
+ struct rcu_data *rdp_gp = NULL; /* Suppress misguided gcc warn. */
+ struct rcu_data *rdp_prev = NULL;
+
+ if (!cpumask_available(rcu_nocb_mask))
+ return;
+ if (ls == -1) {
+ ls = nr_cpu_ids / int_sqrt(nr_cpu_ids);
+ rcu_nocb_gp_stride = ls;
+ }
+
+ /*
+ * Each pass through this loop sets up one rcu_data structure.
+ * Should the corresponding CPU come online in the future, then
+ * we will spawn the needed set of rcu_nocb_kthread() kthreads.
+ */
+ for_each_cpu(cpu, rcu_nocb_mask) {
+ rdp = per_cpu_ptr(&rcu_data, cpu);
+ if (rdp->cpu >= nl) {
+ /* New GP kthread, set up for CBs & next GP. */
+ gotnocbs = true;
+ nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls;
+ rdp->nocb_gp_rdp = rdp;
+ rdp_gp = rdp;
+ if (dump_tree) {
+ if (!firsttime)
+ pr_cont("%s\n", gotnocbscbs
+ ? "" : " (self only)");
+ gotnocbscbs = false;
+ firsttime = false;
+ pr_alert("%s: No-CB GP kthread CPU %d:",
+ __func__, cpu);
+ }
+ } else {
+ /* Another CB kthread, link to previous GP kthread. */
+ gotnocbscbs = true;
+ rdp->nocb_gp_rdp = rdp_gp;
+ rdp_prev->nocb_next_cb_rdp = rdp;
+ if (dump_tree)
+ pr_cont(" %d", cpu);
+ }
+ rdp_prev = rdp;
+ }
+ if (gotnocbs && dump_tree)
+ pr_cont("%s\n", gotnocbscbs ? "" : " (self only)");
+}
+
+/*
+ * Bind the current task to the offloaded CPUs. If there are no offloaded
+ * CPUs, leave the task unbound. Splat if the bind attempt fails.
+ */
+void rcu_bind_current_to_nocb(void)
+{
+ if (cpumask_available(rcu_nocb_mask) && cpumask_weight(rcu_nocb_mask))
+ WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask));
+}
+EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb);
+
+// The ->on_cpu field is available only in CONFIG_SMP=y, so...
+#ifdef CONFIG_SMP
+static char *show_rcu_should_be_on_cpu(struct task_struct *tsp)
+{
+ return tsp && task_is_running(tsp) && !tsp->on_cpu ? "!" : "";
+}
+#else // #ifdef CONFIG_SMP
+static char *show_rcu_should_be_on_cpu(struct task_struct *tsp)
+{
+ return "";
+}
+#endif // #else #ifdef CONFIG_SMP
+
+/*
+ * Dump out nocb grace-period kthread state for the specified rcu_data
+ * structure.
+ */
+static void show_rcu_nocb_gp_state(struct rcu_data *rdp)
+{
+ struct rcu_node *rnp = rdp->mynode;
+
+ pr_info("nocb GP %d %c%c%c%c%c %c[%c%c] %c%c:%ld rnp %d:%d %lu %c CPU %d%s\n",
+ rdp->cpu,
+ "kK"[!!rdp->nocb_gp_kthread],
+ "lL"[raw_spin_is_locked(&rdp->nocb_gp_lock)],
+ "dD"[!!rdp->nocb_defer_wakeup],
+ "tT"[timer_pending(&rdp->nocb_timer)],
+ "sS"[!!rdp->nocb_gp_sleep],
+ ".W"[swait_active(&rdp->nocb_gp_wq)],
+ ".W"[swait_active(&rnp->nocb_gp_wq[0])],
+ ".W"[swait_active(&rnp->nocb_gp_wq[1])],
+ ".B"[!!rdp->nocb_gp_bypass],
+ ".G"[!!rdp->nocb_gp_gp],
+ (long)rdp->nocb_gp_seq,
+ rnp->grplo, rnp->grphi, READ_ONCE(rdp->nocb_gp_loops),
+ rdp->nocb_gp_kthread ? task_state_to_char(rdp->nocb_gp_kthread) : '.',
+ rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1,
+ show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread));
+}
+
+/* Dump out nocb kthread state for the specified rcu_data structure. */
+static void show_rcu_nocb_state(struct rcu_data *rdp)
+{
+ char bufw[20];
+ char bufr[20];
+ struct rcu_segcblist *rsclp = &rdp->cblist;
+ bool waslocked;
+ bool wassleep;
+
+ if (rdp->nocb_gp_rdp == rdp)
+ show_rcu_nocb_gp_state(rdp);
+
+ sprintf(bufw, "%ld", rsclp->gp_seq[RCU_WAIT_TAIL]);
+ sprintf(bufr, "%ld", rsclp->gp_seq[RCU_NEXT_READY_TAIL]);
+ pr_info(" CB %d^%d->%d %c%c%c%c%c%c F%ld L%ld C%d %c%c%s%c%s%c%c q%ld %c CPU %d%s\n",
+ rdp->cpu, rdp->nocb_gp_rdp->cpu,
+ rdp->nocb_next_cb_rdp ? rdp->nocb_next_cb_rdp->cpu : -1,
+ "kK"[!!rdp->nocb_cb_kthread],
+ "bB"[raw_spin_is_locked(&rdp->nocb_bypass_lock)],
+ "cC"[!!atomic_read(&rdp->nocb_lock_contended)],
+ "lL"[raw_spin_is_locked(&rdp->nocb_lock)],
+ "sS"[!!rdp->nocb_cb_sleep],
+ ".W"[swait_active(&rdp->nocb_cb_wq)],
+ jiffies - rdp->nocb_bypass_first,
+ jiffies - rdp->nocb_nobypass_last,
+ rdp->nocb_nobypass_count,
+ ".D"[rcu_segcblist_ready_cbs(rsclp)],
+ ".W"[!rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL)],
+ rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL) ? "" : bufw,
+ ".R"[!rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL)],
+ rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL) ? "" : bufr,
+ ".N"[!rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL)],
+ ".B"[!!rcu_cblist_n_cbs(&rdp->nocb_bypass)],
+ rcu_segcblist_n_cbs(&rdp->cblist),
+ rdp->nocb_cb_kthread ? task_state_to_char(rdp->nocb_cb_kthread) : '.',
+ rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1,
+ show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread));
+
+ /* It is OK for GP kthreads to have GP state. */
+ if (rdp->nocb_gp_rdp == rdp)
+ return;
+
+ waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock);
+ wassleep = swait_active(&rdp->nocb_gp_wq);
+ if (!rdp->nocb_gp_sleep && !waslocked && !wassleep)
+ return; /* Nothing untoward. */
+
+ pr_info(" nocb GP activity on CB-only CPU!!! %c%c%c %c\n",
+ "lL"[waslocked],
+ "dD"[!!rdp->nocb_defer_wakeup],
+ "sS"[!!rdp->nocb_gp_sleep],
+ ".W"[wassleep]);
+}
+
+#else /* #ifdef CONFIG_RCU_NOCB_CPU */
+
+static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp)
+{
+ return 0;
+}
+
+static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
+{
+ return false;
+}
+
+/* No ->nocb_lock to acquire. */
+static void rcu_nocb_lock(struct rcu_data *rdp)
+{
+}
+
+/* No ->nocb_lock to release. */
+static void rcu_nocb_unlock(struct rcu_data *rdp)
+{
+}
+
+/* No ->nocb_lock to release. */
+static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
+ unsigned long flags)
+{
+ local_irq_restore(flags);
+}
+
+/* Lockdep check that ->cblist may be safely accessed. */
+static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
+{
+ lockdep_assert_irqs_disabled();
+}
+
+static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
+{
+}
+
+static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
+{
+ return NULL;
+}
+
+static void rcu_init_one_nocb(struct rcu_node *rnp)
+{
+}
+
+static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+ unsigned long j)
+{
+ return true;
+}
+
+static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+ bool *was_alldone, unsigned long flags)
+{
+ return false;
+}
+
+static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
+ unsigned long flags)
+{
+ WARN_ON_ONCE(1); /* Should be dead code! */
+}
+
+static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
+{
+}
+
+static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level)
+{
+ return false;
+}
+
+static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
+{
+ return false;
+}
+
+static void rcu_spawn_cpu_nocb_kthread(int cpu)
+{
+}
+
+static void __init rcu_spawn_nocb_kthreads(void)
+{
+}
+
+static void show_rcu_nocb_state(struct rcu_data *rdp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index de1dc3bb7f70..d070059163d7 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -13,39 +13,6 @@
#include "../locking/rtmutex_common.h"
-#ifdef CONFIG_RCU_NOCB_CPU
-static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
-static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
-static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp)
-{
- return lockdep_is_held(&rdp->nocb_lock);
-}
-
-static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
-{
- /* Race on early boot between thread creation and assignment */
- if (!rdp->nocb_cb_kthread || !rdp->nocb_gp_kthread)
- return true;
-
- if (current == rdp->nocb_cb_kthread || current == rdp->nocb_gp_kthread)
- if (in_task())
- return true;
- return false;
-}
-
-#else
-static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp)
-{
- return 0;
-}
-
-static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
-{
- return false;
-}
-
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
-
static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
{
/*
@@ -346,7 +313,7 @@ void rcu_note_context_switch(bool preempt)
trace_rcu_utilization(TPS("Start context switch"));
lockdep_assert_irqs_disabled();
- WARN_ON_ONCE(!preempt && rcu_preempt_depth() > 0);
+ WARN_ONCE(!preempt && rcu_preempt_depth() > 0, "Voluntary context switch within RCU read-side critical section!");
if (rcu_preempt_depth() > 0 &&
!t->rcu_read_unlock_special.b.blocked) {
@@ -405,17 +372,20 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
static void rcu_preempt_read_enter(void)
{
- current->rcu_read_lock_nesting++;
+ WRITE_ONCE(current->rcu_read_lock_nesting, READ_ONCE(current->rcu_read_lock_nesting) + 1);
}
static int rcu_preempt_read_exit(void)
{
- return --current->rcu_read_lock_nesting;
+ int ret = READ_ONCE(current->rcu_read_lock_nesting) - 1;
+
+ WRITE_ONCE(current->rcu_read_lock_nesting, ret);
+ return ret;
}
static void rcu_preempt_depth_set(int val)
{
- current->rcu_read_lock_nesting = val;
+ WRITE_ONCE(current->rcu_read_lock_nesting, val);
}
/*
@@ -559,7 +529,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
WRITE_ONCE(rnp->exp_tasks, np);
if (IS_ENABLED(CONFIG_RCU_BOOST)) {
/* Snapshot ->boost_mtx ownership w/rnp->lock held. */
- drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
+ drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx.rtmutex) == t;
if (&t->rcu_node_entry == rnp->boost_tasks)
WRITE_ONCE(rnp->boost_tasks, np);
}
@@ -586,7 +556,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
/* Unboost if we were boosted. */
if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
- rt_mutex_futex_unlock(&rnp->boost_mtx);
+ rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex);
/*
* If this was the last task on the expedited lists,
@@ -1083,7 +1053,7 @@ static int rcu_boost(struct rcu_node *rnp)
* section.
*/
t = container_of(tb, struct task_struct, rcu_node_entry);
- rt_mutex_init_proxy_locked(&rnp->boost_mtx, t);
+ rt_mutex_init_proxy_locked(&rnp->boost_mtx.rtmutex, t);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
/* Lock only for side effect: boosts task t's priority. */
rt_mutex_lock(&rnp->boost_mtx);
@@ -1479,1460 +1449,6 @@ static void rcu_cleanup_after_idle(void)
#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
-#ifdef CONFIG_RCU_NOCB_CPU
-
-/*
- * Offload callback processing from the boot-time-specified set of CPUs
- * specified by rcu_nocb_mask. For the CPUs in the set, there are kthreads
- * created that pull the callbacks from the corresponding CPU, wait for
- * a grace period to elapse, and invoke the callbacks. These kthreads
- * are organized into GP kthreads, which manage incoming callbacks, wait for
- * grace periods, and awaken CB kthreads, and the CB kthreads, which only
- * invoke callbacks. Each GP kthread invokes its own CBs. The no-CBs CPUs
- * do a wake_up() on their GP kthread when they insert a callback into any
- * empty list, unless the rcu_nocb_poll boot parameter has been specified,
- * in which case each kthread actively polls its CPU. (Which isn't so great
- * for energy efficiency, but which does reduce RCU's overhead on that CPU.)
- *
- * This is intended to be used in conjunction with Frederic Weisbecker's
- * adaptive-idle work, which would seriously reduce OS jitter on CPUs
- * running CPU-bound user-mode computations.
- *
- * Offloading of callbacks can also be used as an energy-efficiency
- * measure because CPUs with no RCU callbacks queued are more aggressive
- * about entering dyntick-idle mode.
- */
-
-
-/*
- * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters.
- * If the list is invalid, a warning is emitted and all CPUs are offloaded.
- */
-static int __init rcu_nocb_setup(char *str)
-{
- alloc_bootmem_cpumask_var(&rcu_nocb_mask);
- if (cpulist_parse(str, rcu_nocb_mask)) {
- pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n");
- cpumask_setall(rcu_nocb_mask);
- }
- return 1;
-}
-__setup("rcu_nocbs=", rcu_nocb_setup);
-
-static int __init parse_rcu_nocb_poll(char *arg)
-{
- rcu_nocb_poll = true;
- return 0;
-}
-early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
-
-/*
- * Don't bother bypassing ->cblist if the call_rcu() rate is low.
- * After all, the main point of bypassing is to avoid lock contention
- * on ->nocb_lock, which only can happen at high call_rcu() rates.
- */
-static int nocb_nobypass_lim_per_jiffy = 16 * 1000 / HZ;
-module_param(nocb_nobypass_lim_per_jiffy, int, 0);
-
-/*
- * Acquire the specified rcu_data structure's ->nocb_bypass_lock. If the
- * lock isn't immediately available, increment ->nocb_lock_contended to
- * flag the contention.
- */
-static void rcu_nocb_bypass_lock(struct rcu_data *rdp)
- __acquires(&rdp->nocb_bypass_lock)
-{
- lockdep_assert_irqs_disabled();
- if (raw_spin_trylock(&rdp->nocb_bypass_lock))
- return;
- atomic_inc(&rdp->nocb_lock_contended);
- WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
- smp_mb__after_atomic(); /* atomic_inc() before lock. */
- raw_spin_lock(&rdp->nocb_bypass_lock);
- smp_mb__before_atomic(); /* atomic_dec() after lock. */
- atomic_dec(&rdp->nocb_lock_contended);
-}
-
-/*
- * Spinwait until the specified rcu_data structure's ->nocb_lock is
- * not contended. Please note that this is extremely special-purpose,
- * relying on the fact that at most two kthreads and one CPU contend for
- * this lock, and also that the two kthreads are guaranteed to have frequent
- * grace-period-duration time intervals between successive acquisitions
- * of the lock. This allows us to use an extremely simple throttling
- * mechanism, and further to apply it only to the CPU doing floods of
- * call_rcu() invocations. Don't try this at home!
- */
-static void rcu_nocb_wait_contended(struct rcu_data *rdp)
-{
- WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
- while (WARN_ON_ONCE(atomic_read(&rdp->nocb_lock_contended)))
- cpu_relax();
-}
-
-/*
- * Conditionally acquire the specified rcu_data structure's
- * ->nocb_bypass_lock.
- */
-static bool rcu_nocb_bypass_trylock(struct rcu_data *rdp)
-{
- lockdep_assert_irqs_disabled();
- return raw_spin_trylock(&rdp->nocb_bypass_lock);
-}
-
-/*
- * Release the specified rcu_data structure's ->nocb_bypass_lock.
- */
-static void rcu_nocb_bypass_unlock(struct rcu_data *rdp)
- __releases(&rdp->nocb_bypass_lock)
-{
- lockdep_assert_irqs_disabled();
- raw_spin_unlock(&rdp->nocb_bypass_lock);
-}
-
-/*
- * Acquire the specified rcu_data structure's ->nocb_lock, but only
- * if it corresponds to a no-CBs CPU.
- */
-static void rcu_nocb_lock(struct rcu_data *rdp)
-{
- lockdep_assert_irqs_disabled();
- if (!rcu_rdp_is_offloaded(rdp))
- return;
- raw_spin_lock(&rdp->nocb_lock);
-}
-
-/*
- * Release the specified rcu_data structure's ->nocb_lock, but only
- * if it corresponds to a no-CBs CPU.
- */
-static void rcu_nocb_unlock(struct rcu_data *rdp)
-{
- if (rcu_rdp_is_offloaded(rdp)) {
- lockdep_assert_irqs_disabled();
- raw_spin_unlock(&rdp->nocb_lock);
- }
-}
-
-/*
- * Release the specified rcu_data structure's ->nocb_lock and restore
- * interrupts, but only if it corresponds to a no-CBs CPU.
- */
-static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
- unsigned long flags)
-{
- if (rcu_rdp_is_offloaded(rdp)) {
- lockdep_assert_irqs_disabled();
- raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
- } else {
- local_irq_restore(flags);
- }
-}
-
-/* Lockdep check that ->cblist may be safely accessed. */
-static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
-{
- lockdep_assert_irqs_disabled();
- if (rcu_rdp_is_offloaded(rdp))
- lockdep_assert_held(&rdp->nocb_lock);
-}
-
-/*
- * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
- * grace period.
- */
-static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
-{
- swake_up_all(sq);
-}
-
-static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
-{
- return &rnp->nocb_gp_wq[rcu_seq_ctr(rnp->gp_seq) & 0x1];
-}
-
-static void rcu_init_one_nocb(struct rcu_node *rnp)
-{
- init_swait_queue_head(&rnp->nocb_gp_wq[0]);
- init_swait_queue_head(&rnp->nocb_gp_wq[1]);
-}
-
-/* Is the specified CPU a no-CBs CPU? */
-bool rcu_is_nocb_cpu(int cpu)
-{
- if (cpumask_available(rcu_nocb_mask))
- return cpumask_test_cpu(cpu, rcu_nocb_mask);
- return false;
-}
-
-static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
- struct rcu_data *rdp,
- bool force, unsigned long flags)
- __releases(rdp_gp->nocb_gp_lock)
-{
- bool needwake = false;
-
- if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) {
- raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
- TPS("AlreadyAwake"));
- return false;
- }
-
- if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
- WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
- del_timer(&rdp_gp->nocb_timer);
- }
-
- if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
- WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
- needwake = true;
- }
- raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
- if (needwake) {
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake"));
- wake_up_process(rdp_gp->nocb_gp_kthread);
- }
-
- return needwake;
-}
-
-/*
- * Kick the GP kthread for this NOCB group.
- */
-static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
-{
- unsigned long flags;
- struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
-
- raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
- return __wake_nocb_gp(rdp_gp, rdp, force, flags);
-}
-
-/*
- * Arrange to wake the GP kthread for this NOCB group at some future
- * time when it is safe to do so.
- */
-static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
- const char *reason)
-{
- unsigned long flags;
- struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
-
- raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
-
- /*
- * Bypass wakeup overrides previous deferments. In case
- * of callback storm, no need to wake up too early.
- */
- if (waketype == RCU_NOCB_WAKE_BYPASS) {
- mod_timer(&rdp_gp->nocb_timer, jiffies + 2);
- WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
- } else {
- if (rdp_gp->nocb_defer_wakeup < RCU_NOCB_WAKE)
- mod_timer(&rdp_gp->nocb_timer, jiffies + 1);
- if (rdp_gp->nocb_defer_wakeup < waketype)
- WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
- }
-
- raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
-
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, reason);
-}
-
-/*
- * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL.
- * However, if there is a callback to be enqueued and if ->nocb_bypass
- * proves to be initially empty, just return false because the no-CB GP
- * kthread may need to be awakened in this case.
- *
- * Note that this function always returns true if rhp is NULL.
- */
-static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
- unsigned long j)
-{
- struct rcu_cblist rcl;
-
- WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp));
- rcu_lockdep_assert_cblist_protected(rdp);
- lockdep_assert_held(&rdp->nocb_bypass_lock);
- if (rhp && !rcu_cblist_n_cbs(&rdp->nocb_bypass)) {
- raw_spin_unlock(&rdp->nocb_bypass_lock);
- return false;
- }
- /* Note: ->cblist.len already accounts for ->nocb_bypass contents. */
- if (rhp)
- rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
- rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);
- rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl);
- WRITE_ONCE(rdp->nocb_bypass_first, j);
- rcu_nocb_bypass_unlock(rdp);
- return true;
-}
-
-/*
- * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL.
- * However, if there is a callback to be enqueued and if ->nocb_bypass
- * proves to be initially empty, just return false because the no-CB GP
- * kthread may need to be awakened in this case.
- *
- * Note that this function always returns true if rhp is NULL.
- */
-static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
- unsigned long j)
-{
- if (!rcu_rdp_is_offloaded(rdp))
- return true;
- rcu_lockdep_assert_cblist_protected(rdp);
- rcu_nocb_bypass_lock(rdp);
- return rcu_nocb_do_flush_bypass(rdp, rhp, j);
-}
-
-/*
- * If the ->nocb_bypass_lock is immediately available, flush the
- * ->nocb_bypass queue into ->cblist.
- */
-static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
-{
- rcu_lockdep_assert_cblist_protected(rdp);
- if (!rcu_rdp_is_offloaded(rdp) ||
- !rcu_nocb_bypass_trylock(rdp))
- return;
- WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j));
-}
-
-/*
- * See whether it is appropriate to use the ->nocb_bypass list in order
- * to control contention on ->nocb_lock. A limited number of direct
- * enqueues are permitted into ->cblist per jiffy. If ->nocb_bypass
- * is non-empty, further callbacks must be placed into ->nocb_bypass,
- * otherwise rcu_barrier() breaks. Use rcu_nocb_flush_bypass() to switch
- * back to direct use of ->cblist. However, ->nocb_bypass should not be
- * used if ->cblist is empty, because otherwise callbacks can be stranded
- * on ->nocb_bypass because we cannot count on the current CPU ever again
- * invoking call_rcu(). The general rule is that if ->nocb_bypass is
- * non-empty, the corresponding no-CBs grace-period kthread must not be
- * in an indefinite sleep state.
- *
- * Finally, it is not permitted to use the bypass during early boot,
- * as doing so would confuse the auto-initialization code. Besides
- * which, there is no point in worrying about lock contention while
- * there is only one CPU in operation.
- */
-static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
- bool *was_alldone, unsigned long flags)
-{
- unsigned long c;
- unsigned long cur_gp_seq;
- unsigned long j = jiffies;
- long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
-
- lockdep_assert_irqs_disabled();
-
- // Pure softirq/rcuc based processing: no bypassing, no
- // locking.
- if (!rcu_rdp_is_offloaded(rdp)) {
- *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
- return false;
- }
-
- // In the process of (de-)offloading: no bypassing, but
- // locking.
- if (!rcu_segcblist_completely_offloaded(&rdp->cblist)) {
- rcu_nocb_lock(rdp);
- *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
- return false; /* Not offloaded, no bypassing. */
- }
-
- // Don't use ->nocb_bypass during early boot.
- if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {
- rcu_nocb_lock(rdp);
- WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
- *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
- return false;
- }
-
- // If we have advanced to a new jiffy, reset counts to allow
- // moving back from ->nocb_bypass to ->cblist.
- if (j == rdp->nocb_nobypass_last) {
- c = rdp->nocb_nobypass_count + 1;
- } else {
- WRITE_ONCE(rdp->nocb_nobypass_last, j);
- c = rdp->nocb_nobypass_count - nocb_nobypass_lim_per_jiffy;
- if (ULONG_CMP_LT(rdp->nocb_nobypass_count,
- nocb_nobypass_lim_per_jiffy))
- c = 0;
- else if (c > nocb_nobypass_lim_per_jiffy)
- c = nocb_nobypass_lim_per_jiffy;
- }
- WRITE_ONCE(rdp->nocb_nobypass_count, c);
-
- // If there hasn't yet been all that many ->cblist enqueues
- // this jiffy, tell the caller to enqueue onto ->cblist. But flush
- // ->nocb_bypass first.
- if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy) {
- rcu_nocb_lock(rdp);
- *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
- if (*was_alldone)
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
- TPS("FirstQ"));
- WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j));
- WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
- return false; // Caller must enqueue the callback.
- }
-
- // If ->nocb_bypass has been used too long or is too full,
- // flush ->nocb_bypass to ->cblist.
- if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) ||
- ncbs >= qhimark) {
- rcu_nocb_lock(rdp);
- if (!rcu_nocb_flush_bypass(rdp, rhp, j)) {
- *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
- if (*was_alldone)
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
- TPS("FirstQ"));
- WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
- return false; // Caller must enqueue the callback.
- }
- if (j != rdp->nocb_gp_adv_time &&
- rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
- rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
- rcu_advance_cbs_nowake(rdp->mynode, rdp);
- rdp->nocb_gp_adv_time = j;
- }
- rcu_nocb_unlock_irqrestore(rdp, flags);
- return true; // Callback already enqueued.
- }
-
- // We need to use the bypass.
- rcu_nocb_wait_contended(rdp);
- rcu_nocb_bypass_lock(rdp);
- ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
- rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
- rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
- if (!ncbs) {
- WRITE_ONCE(rdp->nocb_bypass_first, j);
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ"));
- }
- rcu_nocb_bypass_unlock(rdp);
- smp_mb(); /* Order enqueue before wake. */
- if (ncbs) {
- local_irq_restore(flags);
- } else {
- // No-CBs GP kthread might be indefinitely asleep, if so, wake.
- rcu_nocb_lock(rdp); // Rare during call_rcu() flood.
- if (!rcu_segcblist_pend_cbs(&rdp->cblist)) {
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
- TPS("FirstBQwake"));
- __call_rcu_nocb_wake(rdp, true, flags);
- } else {
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
- TPS("FirstBQnoWake"));
- rcu_nocb_unlock_irqrestore(rdp, flags);
- }
- }
- return true; // Callback already enqueued.
-}
-
-/*
- * Awaken the no-CBs grace-period kthread if needed, either due to it
- * legitimately being asleep or due to overload conditions.
- *
- * If warranted, also wake up the kthread servicing this CPUs queues.
- */
-static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
- unsigned long flags)
- __releases(rdp->nocb_lock)
-{
- unsigned long cur_gp_seq;
- unsigned long j;
- long len;
- struct task_struct *t;
-
- // If we are being polled or there is no kthread, just leave.
- t = READ_ONCE(rdp->nocb_gp_kthread);
- if (rcu_nocb_poll || !t) {
- rcu_nocb_unlock_irqrestore(rdp, flags);
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
- TPS("WakeNotPoll"));
- return;
- }
- // Need to actually to a wakeup.
- len = rcu_segcblist_n_cbs(&rdp->cblist);
- if (was_alldone) {
- rdp->qlen_last_fqs_check = len;
- if (!irqs_disabled_flags(flags)) {
- /* ... if queue was empty ... */
- rcu_nocb_unlock_irqrestore(rdp, flags);
- wake_nocb_gp(rdp, false);
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
- TPS("WakeEmpty"));
- } else {
- rcu_nocb_unlock_irqrestore(rdp, flags);
- wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
- TPS("WakeEmptyIsDeferred"));
- }
- } else if (len > rdp->qlen_last_fqs_check + qhimark) {
- /* ... or if many callbacks queued. */
- rdp->qlen_last_fqs_check = len;
- j = jiffies;
- if (j != rdp->nocb_gp_adv_time &&
- rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
- rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
- rcu_advance_cbs_nowake(rdp->mynode, rdp);
- rdp->nocb_gp_adv_time = j;
- }
- smp_mb(); /* Enqueue before timer_pending(). */
- if ((rdp->nocb_cb_sleep ||
- !rcu_segcblist_ready_cbs(&rdp->cblist)) &&
- !timer_pending(&rdp->nocb_timer)) {
- rcu_nocb_unlock_irqrestore(rdp, flags);
- wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
- TPS("WakeOvfIsDeferred"));
- } else {
- rcu_nocb_unlock_irqrestore(rdp, flags);
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
- }
- } else {
- rcu_nocb_unlock_irqrestore(rdp, flags);
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
- }
- return;
-}
-
-/*
- * Check if we ignore this rdp.
- *
- * We check that without holding the nocb lock but
- * we make sure not to miss a freshly offloaded rdp
- * with the current ordering:
- *
- * rdp_offload_toggle() nocb_gp_enabled_cb()
- * ------------------------- ----------------------------
- * WRITE flags LOCK nocb_gp_lock
- * LOCK nocb_gp_lock READ/WRITE nocb_gp_sleep
- * READ/WRITE nocb_gp_sleep UNLOCK nocb_gp_lock
- * UNLOCK nocb_gp_lock READ flags
- */
-static inline bool nocb_gp_enabled_cb(struct rcu_data *rdp)
-{
- u8 flags = SEGCBLIST_OFFLOADED | SEGCBLIST_KTHREAD_GP;
-
- return rcu_segcblist_test_flags(&rdp->cblist, flags);
-}
-
-static inline bool nocb_gp_update_state_deoffloading(struct rcu_data *rdp,
- bool *needwake_state)
-{
- struct rcu_segcblist *cblist = &rdp->cblist;
-
- if (rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED)) {
- if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP)) {
- rcu_segcblist_set_flags(cblist, SEGCBLIST_KTHREAD_GP);
- if (rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB))
- *needwake_state = true;
- }
- return false;
- }
-
- /*
- * De-offloading. Clear our flag and notify the de-offload worker.
- * We will ignore this rdp until it ever gets re-offloaded.
- */
- WARN_ON_ONCE(!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
- rcu_segcblist_clear_flags(cblist, SEGCBLIST_KTHREAD_GP);
- if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB))
- *needwake_state = true;
- return true;
-}
-
-
-/*
- * No-CBs GP kthreads come here to wait for additional callbacks to show up
- * or for grace periods to end.
- */
-static void nocb_gp_wait(struct rcu_data *my_rdp)
-{
- bool bypass = false;
- long bypass_ncbs;
- int __maybe_unused cpu = my_rdp->cpu;
- unsigned long cur_gp_seq;
- unsigned long flags;
- bool gotcbs = false;
- unsigned long j = jiffies;
- bool needwait_gp = false; // This prevents actual uninitialized use.
- bool needwake;
- bool needwake_gp;
- struct rcu_data *rdp;
- struct rcu_node *rnp;
- unsigned long wait_gp_seq = 0; // Suppress "use uninitialized" warning.
- bool wasempty = false;
-
- /*
- * Each pass through the following loop checks for CBs and for the
- * nearest grace period (if any) to wait for next. The CB kthreads
- * and the global grace-period kthread are awakened if needed.
- */
- WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp);
- for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) {
- bool needwake_state = false;
-
- if (!nocb_gp_enabled_cb(rdp))
- continue;
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check"));
- rcu_nocb_lock_irqsave(rdp, flags);
- if (nocb_gp_update_state_deoffloading(rdp, &needwake_state)) {
- rcu_nocb_unlock_irqrestore(rdp, flags);
- if (needwake_state)
- swake_up_one(&rdp->nocb_state_wq);
- continue;
- }
- bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
- if (bypass_ncbs &&
- (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) ||
- bypass_ncbs > 2 * qhimark)) {
- // Bypass full or old, so flush it.
- (void)rcu_nocb_try_flush_bypass(rdp, j);
- bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
- } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {
- rcu_nocb_unlock_irqrestore(rdp, flags);
- if (needwake_state)
- swake_up_one(&rdp->nocb_state_wq);
- continue; /* No callbacks here, try next. */
- }
- if (bypass_ncbs) {
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
- TPS("Bypass"));
- bypass = true;
- }
- rnp = rdp->mynode;
-
- // Advance callbacks if helpful and low contention.
- needwake_gp = false;
- if (!rcu_segcblist_restempty(&rdp->cblist,
- RCU_NEXT_READY_TAIL) ||
- (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
- rcu_seq_done(&rnp->gp_seq, cur_gp_seq))) {
- raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
- needwake_gp = rcu_advance_cbs(rnp, rdp);
- wasempty = rcu_segcblist_restempty(&rdp->cblist,
- RCU_NEXT_READY_TAIL);
- raw_spin_unlock_rcu_node(rnp); /* irqs disabled. */
- }
- // Need to wait on some grace period?
- WARN_ON_ONCE(wasempty &&
- !rcu_segcblist_restempty(&rdp->cblist,
- RCU_NEXT_READY_TAIL));
- if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq)) {
- if (!needwait_gp ||
- ULONG_CMP_LT(cur_gp_seq, wait_gp_seq))
- wait_gp_seq = cur_gp_seq;
- needwait_gp = true;
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
- TPS("NeedWaitGP"));
- }
- if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
- needwake = rdp->nocb_cb_sleep;
- WRITE_ONCE(rdp->nocb_cb_sleep, false);
- smp_mb(); /* CB invocation -after- GP end. */
- } else {
- needwake = false;
- }
- rcu_nocb_unlock_irqrestore(rdp, flags);
- if (needwake) {
- swake_up_one(&rdp->nocb_cb_wq);
- gotcbs = true;
- }
- if (needwake_gp)
- rcu_gp_kthread_wake();
- if (needwake_state)
- swake_up_one(&rdp->nocb_state_wq);
- }
-
- my_rdp->nocb_gp_bypass = bypass;
- my_rdp->nocb_gp_gp = needwait_gp;
- my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0;
-
- if (bypass && !rcu_nocb_poll) {
- // At least one child with non-empty ->nocb_bypass, so set
- // timer in order to avoid stranding its callbacks.
- wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS,
- TPS("WakeBypassIsDeferred"));
- }
- if (rcu_nocb_poll) {
- /* Polling, so trace if first poll in the series. */
- if (gotcbs)
- trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
- schedule_timeout_idle(1);
- } else if (!needwait_gp) {
- /* Wait for callbacks to appear. */
- trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));
- swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq,
- !READ_ONCE(my_rdp->nocb_gp_sleep));
- trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("EndSleep"));
- } else {
- rnp = my_rdp->mynode;
- trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("StartWait"));
- swait_event_interruptible_exclusive(
- rnp->nocb_gp_wq[rcu_seq_ctr(wait_gp_seq) & 0x1],
- rcu_seq_done(&rnp->gp_seq, wait_gp_seq) ||
- !READ_ONCE(my_rdp->nocb_gp_sleep));
- trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("EndWait"));
- }
- if (!rcu_nocb_poll) {
- raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
- if (my_rdp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
- WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
- del_timer(&my_rdp->nocb_timer);
- }
- WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
- raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
- }
- my_rdp->nocb_gp_seq = -1;
- WARN_ON(signal_pending(current));
-}
-
-/*
- * No-CBs grace-period-wait kthread. There is one of these per group
- * of CPUs, but only once at least one CPU in that group has come online
- * at least once since boot. This kthread checks for newly posted
- * callbacks from any of the CPUs it is responsible for, waits for a
- * grace period, then awakens all of the rcu_nocb_cb_kthread() instances
- * that then have callback-invocation work to do.
- */
-static int rcu_nocb_gp_kthread(void *arg)
-{
- struct rcu_data *rdp = arg;
-
- for (;;) {
- WRITE_ONCE(rdp->nocb_gp_loops, rdp->nocb_gp_loops + 1);
- nocb_gp_wait(rdp);
- cond_resched_tasks_rcu_qs();
- }
- return 0;
-}
-
-static inline bool nocb_cb_can_run(struct rcu_data *rdp)
-{
- u8 flags = SEGCBLIST_OFFLOADED | SEGCBLIST_KTHREAD_CB;
- return rcu_segcblist_test_flags(&rdp->cblist, flags);
-}
-
-static inline bool nocb_cb_wait_cond(struct rcu_data *rdp)
-{
- return nocb_cb_can_run(rdp) && !READ_ONCE(rdp->nocb_cb_sleep);
-}
-
-/*
- * Invoke any ready callbacks from the corresponding no-CBs CPU,
- * then, if there are no more, wait for more to appear.
- */
-static void nocb_cb_wait(struct rcu_data *rdp)
-{
- struct rcu_segcblist *cblist = &rdp->cblist;
- unsigned long cur_gp_seq;
- unsigned long flags;
- bool needwake_state = false;
- bool needwake_gp = false;
- bool can_sleep = true;
- struct rcu_node *rnp = rdp->mynode;
-
- local_irq_save(flags);
- rcu_momentary_dyntick_idle();
- local_irq_restore(flags);
- /*
- * Disable BH to provide the expected environment. Also, when
- * transitioning to/from NOCB mode, a self-requeuing callback might
- * be invoked from softirq. A short grace period could cause both
- * instances of this callback would execute concurrently.
- */
- local_bh_disable();
- rcu_do_batch(rdp);
- local_bh_enable();
- lockdep_assert_irqs_enabled();
- rcu_nocb_lock_irqsave(rdp, flags);
- if (rcu_segcblist_nextgp(cblist, &cur_gp_seq) &&
- rcu_seq_done(&rnp->gp_seq, cur_gp_seq) &&
- raw_spin_trylock_rcu_node(rnp)) { /* irqs already disabled. */
- needwake_gp = rcu_advance_cbs(rdp->mynode, rdp);
- raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
- }
-
- if (rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED)) {
- if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB)) {
- rcu_segcblist_set_flags(cblist, SEGCBLIST_KTHREAD_CB);
- if (rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP))
- needwake_state = true;
- }
- if (rcu_segcblist_ready_cbs(cblist))
- can_sleep = false;
- } else {
- /*
- * De-offloading. Clear our flag and notify the de-offload worker.
- * We won't touch the callbacks and keep sleeping until we ever
- * get re-offloaded.
- */
- WARN_ON_ONCE(!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB));
- rcu_segcblist_clear_flags(cblist, SEGCBLIST_KTHREAD_CB);
- if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP))
- needwake_state = true;
- }
-
- WRITE_ONCE(rdp->nocb_cb_sleep, can_sleep);
-
- if (rdp->nocb_cb_sleep)
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep"));
-
- rcu_nocb_unlock_irqrestore(rdp, flags);
- if (needwake_gp)
- rcu_gp_kthread_wake();
-
- if (needwake_state)
- swake_up_one(&rdp->nocb_state_wq);
-
- do {
- swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
- nocb_cb_wait_cond(rdp));
-
- // VVV Ensure CB invocation follows _sleep test.
- if (smp_load_acquire(&rdp->nocb_cb_sleep)) { // ^^^
- WARN_ON(signal_pending(current));
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
- }
- } while (!nocb_cb_can_run(rdp));
-}
-
-/*
- * Per-rcu_data kthread, but only for no-CBs CPUs. Repeatedly invoke
- * nocb_cb_wait() to do the dirty work.
- */
-static int rcu_nocb_cb_kthread(void *arg)
-{
- struct rcu_data *rdp = arg;
-
- // Each pass through this loop does one callback batch, and,
- // if there are no more ready callbacks, waits for them.
- for (;;) {
- nocb_cb_wait(rdp);
- cond_resched_tasks_rcu_qs();
- }
- return 0;
-}
-
-/* Is a deferred wakeup of rcu_nocb_kthread() required? */
-static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level)
-{
- return READ_ONCE(rdp->nocb_defer_wakeup) >= level;
-}
-
-/* Do a deferred wakeup of rcu_nocb_kthread(). */
-static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp_gp,
- struct rcu_data *rdp, int level,
- unsigned long flags)
- __releases(rdp_gp->nocb_gp_lock)
-{
- int ndw;
- int ret;
-
- if (!rcu_nocb_need_deferred_wakeup(rdp_gp, level)) {
- raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
- return false;
- }
-
- ndw = rdp_gp->nocb_defer_wakeup;
- ret = __wake_nocb_gp(rdp_gp, rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
-
- return ret;
-}
-
-/* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
-static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
-{
- unsigned long flags;
- struct rcu_data *rdp = from_timer(rdp, t, nocb_timer);
-
- WARN_ON_ONCE(rdp->nocb_gp_rdp != rdp);
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Timer"));
-
- raw_spin_lock_irqsave(&rdp->nocb_gp_lock, flags);
- smp_mb__after_spinlock(); /* Timer expire before wakeup. */
- do_nocb_deferred_wakeup_common(rdp, rdp, RCU_NOCB_WAKE_BYPASS, flags);
-}
-
-/*
- * Do a deferred wakeup of rcu_nocb_kthread() from fastpath.
- * This means we do an inexact common-case check. Note that if
- * we miss, ->nocb_timer will eventually clean things up.
- */
-static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
-{
- unsigned long flags;
- struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
-
- if (!rdp_gp || !rcu_nocb_need_deferred_wakeup(rdp_gp, RCU_NOCB_WAKE))
- return false;
-
- raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
- return do_nocb_deferred_wakeup_common(rdp_gp, rdp, RCU_NOCB_WAKE, flags);
-}
-
-void rcu_nocb_flush_deferred_wakeup(void)
-{
- do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data));
-}
-EXPORT_SYMBOL_GPL(rcu_nocb_flush_deferred_wakeup);
-
-static int rdp_offload_toggle(struct rcu_data *rdp,
- bool offload, unsigned long flags)
- __releases(rdp->nocb_lock)
-{
- struct rcu_segcblist *cblist = &rdp->cblist;
- struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
- bool wake_gp = false;
-
- rcu_segcblist_offload(cblist, offload);
-
- if (rdp->nocb_cb_sleep)
- rdp->nocb_cb_sleep = false;
- rcu_nocb_unlock_irqrestore(rdp, flags);
-
- /*
- * Ignore former value of nocb_cb_sleep and force wake up as it could
- * have been spuriously set to false already.
- */
- swake_up_one(&rdp->nocb_cb_wq);
-
- raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
- if (rdp_gp->nocb_gp_sleep) {
- rdp_gp->nocb_gp_sleep = false;
- wake_gp = true;
- }
- raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
-
- if (wake_gp)
- wake_up_process(rdp_gp->nocb_gp_kthread);
-
- return 0;
-}
-
-static long rcu_nocb_rdp_deoffload(void *arg)
-{
- struct rcu_data *rdp = arg;
- struct rcu_segcblist *cblist = &rdp->cblist;
- unsigned long flags;
- int ret;
-
- WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
-
- pr_info("De-offloading %d\n", rdp->cpu);
-
- rcu_nocb_lock_irqsave(rdp, flags);
- /*
- * Flush once and for all now. This suffices because we are
- * running on the target CPU holding ->nocb_lock (thus having
- * interrupts disabled), and because rdp_offload_toggle()
- * invokes rcu_segcblist_offload(), which clears SEGCBLIST_OFFLOADED.
- * Thus future calls to rcu_segcblist_completely_offloaded() will
- * return false, which means that future calls to rcu_nocb_try_bypass()
- * will refuse to put anything into the bypass.
- */
- WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
- ret = rdp_offload_toggle(rdp, false, flags);
- swait_event_exclusive(rdp->nocb_state_wq,
- !rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB |
- SEGCBLIST_KTHREAD_GP));
- /*
- * Lock one last time to acquire latest callback updates from kthreads
- * so we can later handle callbacks locally without locking.
- */
- rcu_nocb_lock_irqsave(rdp, flags);
- /*
- * Theoretically we could set SEGCBLIST_SOFTIRQ_ONLY after the nocb
- * lock is released but how about being paranoid for once?
- */
- rcu_segcblist_set_flags(cblist, SEGCBLIST_SOFTIRQ_ONLY);
- /*
- * With SEGCBLIST_SOFTIRQ_ONLY, we can't use
- * rcu_nocb_unlock_irqrestore() anymore.
- */
- raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
-
- /* Sanity check */
- WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
-
-
- return ret;
-}
-
-int rcu_nocb_cpu_deoffload(int cpu)
-{
- struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
- int ret = 0;
-
- mutex_lock(&rcu_state.barrier_mutex);
- cpus_read_lock();
- if (rcu_rdp_is_offloaded(rdp)) {
- if (cpu_online(cpu)) {
- ret = work_on_cpu(cpu, rcu_nocb_rdp_deoffload, rdp);
- if (!ret)
- cpumask_clear_cpu(cpu, rcu_nocb_mask);
- } else {
- pr_info("NOCB: Can't CB-deoffload an offline CPU\n");
- ret = -EINVAL;
- }
- }
- cpus_read_unlock();
- mutex_unlock(&rcu_state.barrier_mutex);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload);
-
-static long rcu_nocb_rdp_offload(void *arg)
-{
- struct rcu_data *rdp = arg;
- struct rcu_segcblist *cblist = &rdp->cblist;
- unsigned long flags;
- int ret;
-
- WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
- /*
- * For now we only support re-offload, ie: the rdp must have been
- * offloaded on boot first.
- */
- if (!rdp->nocb_gp_rdp)
- return -EINVAL;
-
- pr_info("Offloading %d\n", rdp->cpu);
- /*
- * Can't use rcu_nocb_lock_irqsave() while we are in
- * SEGCBLIST_SOFTIRQ_ONLY mode.
- */
- raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
-
- /*
- * We didn't take the nocb lock while working on the
- * rdp->cblist in SEGCBLIST_SOFTIRQ_ONLY mode.
- * Every modifications that have been done previously on
- * rdp->cblist must be visible remotely by the nocb kthreads
- * upon wake up after reading the cblist flags.
- *
- * The layout against nocb_lock enforces that ordering:
- *
- * __rcu_nocb_rdp_offload() nocb_cb_wait()/nocb_gp_wait()
- * ------------------------- ----------------------------
- * WRITE callbacks rcu_nocb_lock()
- * rcu_nocb_lock() READ flags
- * WRITE flags READ callbacks
- * rcu_nocb_unlock() rcu_nocb_unlock()
- */
- ret = rdp_offload_toggle(rdp, true, flags);
- swait_event_exclusive(rdp->nocb_state_wq,
- rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
- rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
-
- return ret;
-}
-
-int rcu_nocb_cpu_offload(int cpu)
-{
- struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
- int ret = 0;
-
- mutex_lock(&rcu_state.barrier_mutex);
- cpus_read_lock();
- if (!rcu_rdp_is_offloaded(rdp)) {
- if (cpu_online(cpu)) {
- ret = work_on_cpu(cpu, rcu_nocb_rdp_offload, rdp);
- if (!ret)
- cpumask_set_cpu(cpu, rcu_nocb_mask);
- } else {
- pr_info("NOCB: Can't CB-offload an offline CPU\n");
- ret = -EINVAL;
- }
- }
- cpus_read_unlock();
- mutex_unlock(&rcu_state.barrier_mutex);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload);
-
-void __init rcu_init_nohz(void)
-{
- int cpu;
- bool need_rcu_nocb_mask = false;
- struct rcu_data *rdp;
-
-#if defined(CONFIG_NO_HZ_FULL)
- if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
- need_rcu_nocb_mask = true;
-#endif /* #if defined(CONFIG_NO_HZ_FULL) */
-
- if (!cpumask_available(rcu_nocb_mask) && need_rcu_nocb_mask) {
- if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
- pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
- return;
- }
- }
- if (!cpumask_available(rcu_nocb_mask))
- return;
-
-#if defined(CONFIG_NO_HZ_FULL)
- if (tick_nohz_full_running)
- cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
-#endif /* #if defined(CONFIG_NO_HZ_FULL) */
-
- if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
- pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
- cpumask_and(rcu_nocb_mask, cpu_possible_mask,
- rcu_nocb_mask);
- }
- if (cpumask_empty(rcu_nocb_mask))
- pr_info("\tOffload RCU callbacks from CPUs: (none).\n");
- else
- pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
- cpumask_pr_args(rcu_nocb_mask));
- if (rcu_nocb_poll)
- pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
-
- for_each_cpu(cpu, rcu_nocb_mask) {
- rdp = per_cpu_ptr(&rcu_data, cpu);
- if (rcu_segcblist_empty(&rdp->cblist))
- rcu_segcblist_init(&rdp->cblist);
- rcu_segcblist_offload(&rdp->cblist, true);
- rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB);
- rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_GP);
- }
- rcu_organize_nocb_kthreads();
-}
-
-/* Initialize per-rcu_data variables for no-CBs CPUs. */
-static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
-{
- init_swait_queue_head(&rdp->nocb_cb_wq);
- init_swait_queue_head(&rdp->nocb_gp_wq);
- init_swait_queue_head(&rdp->nocb_state_wq);
- raw_spin_lock_init(&rdp->nocb_lock);
- raw_spin_lock_init(&rdp->nocb_bypass_lock);
- raw_spin_lock_init(&rdp->nocb_gp_lock);
- timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
- rcu_cblist_init(&rdp->nocb_bypass);
-}
-
-/*
- * If the specified CPU is a no-CBs CPU that does not already have its
- * rcuo CB kthread, spawn it. Additionally, if the rcuo GP kthread
- * for this CPU's group has not yet been created, spawn it as well.
- */
-static void rcu_spawn_one_nocb_kthread(int cpu)
-{
- struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
- struct rcu_data *rdp_gp;
- struct task_struct *t;
-
- /*
- * If this isn't a no-CBs CPU or if it already has an rcuo kthread,
- * then nothing to do.
- */
- if (!rcu_is_nocb_cpu(cpu) || rdp->nocb_cb_kthread)
- return;
-
- /* If we didn't spawn the GP kthread first, reorganize! */
- rdp_gp = rdp->nocb_gp_rdp;
- if (!rdp_gp->nocb_gp_kthread) {
- t = kthread_run(rcu_nocb_gp_kthread, rdp_gp,
- "rcuog/%d", rdp_gp->cpu);
- if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__))
- return;
- WRITE_ONCE(rdp_gp->nocb_gp_kthread, t);
- }
-
- /* Spawn the kthread for this CPU. */
- t = kthread_run(rcu_nocb_cb_kthread, rdp,
- "rcuo%c/%d", rcu_state.abbr, cpu);
- if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__))
- return;
- WRITE_ONCE(rdp->nocb_cb_kthread, t);
- WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
-}
-
-/*
- * If the specified CPU is a no-CBs CPU that does not already have its
- * rcuo kthread, spawn it.
- */
-static void rcu_spawn_cpu_nocb_kthread(int cpu)
-{
- if (rcu_scheduler_fully_active)
- rcu_spawn_one_nocb_kthread(cpu);
-}
-
-/*
- * Once the scheduler is running, spawn rcuo kthreads for all online
- * no-CBs CPUs. This assumes that the early_initcall()s happen before
- * non-boot CPUs come online -- if this changes, we will need to add
- * some mutual exclusion.
- */
-static void __init rcu_spawn_nocb_kthreads(void)
-{
- int cpu;
-
- for_each_online_cpu(cpu)
- rcu_spawn_cpu_nocb_kthread(cpu);
-}
-
-/* How many CB CPU IDs per GP kthread? Default of -1 for sqrt(nr_cpu_ids). */
-static int rcu_nocb_gp_stride = -1;
-module_param(rcu_nocb_gp_stride, int, 0444);
-
-/*
- * Initialize GP-CB relationships for all no-CBs CPU.
- */
-static void __init rcu_organize_nocb_kthreads(void)
-{
- int cpu;
- bool firsttime = true;
- bool gotnocbs = false;
- bool gotnocbscbs = true;
- int ls = rcu_nocb_gp_stride;
- int nl = 0; /* Next GP kthread. */
- struct rcu_data *rdp;
- struct rcu_data *rdp_gp = NULL; /* Suppress misguided gcc warn. */
- struct rcu_data *rdp_prev = NULL;
-
- if (!cpumask_available(rcu_nocb_mask))
- return;
- if (ls == -1) {
- ls = nr_cpu_ids / int_sqrt(nr_cpu_ids);
- rcu_nocb_gp_stride = ls;
- }
-
- /*
- * Each pass through this loop sets up one rcu_data structure.
- * Should the corresponding CPU come online in the future, then
- * we will spawn the needed set of rcu_nocb_kthread() kthreads.
- */
- for_each_cpu(cpu, rcu_nocb_mask) {
- rdp = per_cpu_ptr(&rcu_data, cpu);
- if (rdp->cpu >= nl) {
- /* New GP kthread, set up for CBs & next GP. */
- gotnocbs = true;
- nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls;
- rdp->nocb_gp_rdp = rdp;
- rdp_gp = rdp;
- if (dump_tree) {
- if (!firsttime)
- pr_cont("%s\n", gotnocbscbs
- ? "" : " (self only)");
- gotnocbscbs = false;
- firsttime = false;
- pr_alert("%s: No-CB GP kthread CPU %d:",
- __func__, cpu);
- }
- } else {
- /* Another CB kthread, link to previous GP kthread. */
- gotnocbscbs = true;
- rdp->nocb_gp_rdp = rdp_gp;
- rdp_prev->nocb_next_cb_rdp = rdp;
- if (dump_tree)
- pr_cont(" %d", cpu);
- }
- rdp_prev = rdp;
- }
- if (gotnocbs && dump_tree)
- pr_cont("%s\n", gotnocbscbs ? "" : " (self only)");
-}
-
-/*
- * Bind the current task to the offloaded CPUs. If there are no offloaded
- * CPUs, leave the task unbound. Splat if the bind attempt fails.
- */
-void rcu_bind_current_to_nocb(void)
-{
- if (cpumask_available(rcu_nocb_mask) && cpumask_weight(rcu_nocb_mask))
- WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask));
-}
-EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb);
-
-// The ->on_cpu field is available only in CONFIG_SMP=y, so...
-#ifdef CONFIG_SMP
-static char *show_rcu_should_be_on_cpu(struct task_struct *tsp)
-{
- return tsp && task_is_running(tsp) && !tsp->on_cpu ? "!" : "";
-}
-#else // #ifdef CONFIG_SMP
-static char *show_rcu_should_be_on_cpu(struct task_struct *tsp)
-{
- return "";
-}
-#endif // #else #ifdef CONFIG_SMP
-
-/*
- * Dump out nocb grace-period kthread state for the specified rcu_data
- * structure.
- */
-static void show_rcu_nocb_gp_state(struct rcu_data *rdp)
-{
- struct rcu_node *rnp = rdp->mynode;
-
- pr_info("nocb GP %d %c%c%c%c%c %c[%c%c] %c%c:%ld rnp %d:%d %lu %c CPU %d%s\n",
- rdp->cpu,
- "kK"[!!rdp->nocb_gp_kthread],
- "lL"[raw_spin_is_locked(&rdp->nocb_gp_lock)],
- "dD"[!!rdp->nocb_defer_wakeup],
- "tT"[timer_pending(&rdp->nocb_timer)],
- "sS"[!!rdp->nocb_gp_sleep],
- ".W"[swait_active(&rdp->nocb_gp_wq)],
- ".W"[swait_active(&rnp->nocb_gp_wq[0])],
- ".W"[swait_active(&rnp->nocb_gp_wq[1])],
- ".B"[!!rdp->nocb_gp_bypass],
- ".G"[!!rdp->nocb_gp_gp],
- (long)rdp->nocb_gp_seq,
- rnp->grplo, rnp->grphi, READ_ONCE(rdp->nocb_gp_loops),
- rdp->nocb_gp_kthread ? task_state_to_char(rdp->nocb_gp_kthread) : '.',
- rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1,
- show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread));
-}
-
-/* Dump out nocb kthread state for the specified rcu_data structure. */
-static void show_rcu_nocb_state(struct rcu_data *rdp)
-{
- char bufw[20];
- char bufr[20];
- struct rcu_segcblist *rsclp = &rdp->cblist;
- bool waslocked;
- bool wassleep;
-
- if (rdp->nocb_gp_rdp == rdp)
- show_rcu_nocb_gp_state(rdp);
-
- sprintf(bufw, "%ld", rsclp->gp_seq[RCU_WAIT_TAIL]);
- sprintf(bufr, "%ld", rsclp->gp_seq[RCU_NEXT_READY_TAIL]);
- pr_info(" CB %d^%d->%d %c%c%c%c%c%c F%ld L%ld C%d %c%c%s%c%s%c%c q%ld %c CPU %d%s\n",
- rdp->cpu, rdp->nocb_gp_rdp->cpu,
- rdp->nocb_next_cb_rdp ? rdp->nocb_next_cb_rdp->cpu : -1,
- "kK"[!!rdp->nocb_cb_kthread],
- "bB"[raw_spin_is_locked(&rdp->nocb_bypass_lock)],
- "cC"[!!atomic_read(&rdp->nocb_lock_contended)],
- "lL"[raw_spin_is_locked(&rdp->nocb_lock)],
- "sS"[!!rdp->nocb_cb_sleep],
- ".W"[swait_active(&rdp->nocb_cb_wq)],
- jiffies - rdp->nocb_bypass_first,
- jiffies - rdp->nocb_nobypass_last,
- rdp->nocb_nobypass_count,
- ".D"[rcu_segcblist_ready_cbs(rsclp)],
- ".W"[!rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL)],
- rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL) ? "" : bufw,
- ".R"[!rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL)],
- rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL) ? "" : bufr,
- ".N"[!rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL)],
- ".B"[!!rcu_cblist_n_cbs(&rdp->nocb_bypass)],
- rcu_segcblist_n_cbs(&rdp->cblist),
- rdp->nocb_cb_kthread ? task_state_to_char(rdp->nocb_cb_kthread) : '.',
- rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1,
- show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread));
-
- /* It is OK for GP kthreads to have GP state. */
- if (rdp->nocb_gp_rdp == rdp)
- return;
-
- waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock);
- wassleep = swait_active(&rdp->nocb_gp_wq);
- if (!rdp->nocb_gp_sleep && !waslocked && !wassleep)
- return; /* Nothing untoward. */
-
- pr_info(" nocb GP activity on CB-only CPU!!! %c%c%c %c\n",
- "lL"[waslocked],
- "dD"[!!rdp->nocb_defer_wakeup],
- "sS"[!!rdp->nocb_gp_sleep],
- ".W"[wassleep]);
-}
-
-#else /* #ifdef CONFIG_RCU_NOCB_CPU */
-
-/* No ->nocb_lock to acquire. */
-static void rcu_nocb_lock(struct rcu_data *rdp)
-{
-}
-
-/* No ->nocb_lock to release. */
-static void rcu_nocb_unlock(struct rcu_data *rdp)
-{
-}
-
-/* No ->nocb_lock to release. */
-static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
- unsigned long flags)
-{
- local_irq_restore(flags);
-}
-
-/* Lockdep check that ->cblist may be safely accessed. */
-static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
-{
- lockdep_assert_irqs_disabled();
-}
-
-static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
-{
-}
-
-static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
-{
- return NULL;
-}
-
-static void rcu_init_one_nocb(struct rcu_node *rnp)
-{
-}
-
-static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
- unsigned long j)
-{
- return true;
-}
-
-static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
- bool *was_alldone, unsigned long flags)
-{
- return false;
-}
-
-static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
- unsigned long flags)
-{
- WARN_ON_ONCE(1); /* Should be dead code! */
-}
-
-static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
-{
-}
-
-static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level)
-{
- return false;
-}
-
-static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
-{
- return false;
-}
-
-static void rcu_spawn_cpu_nocb_kthread(int cpu)
-{
-}
-
-static void __init rcu_spawn_nocb_kthreads(void)
-{
-}
-
-static void show_rcu_nocb_state(struct rcu_data *rdp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
-
/*
* Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
* grace-period kthread will do force_quiescent_state() processing?
@@ -2982,17 +1498,17 @@ static void noinstr rcu_dynticks_task_exit(void)
/* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
static void rcu_dynticks_task_trace_enter(void)
{
-#ifdef CONFIG_TASKS_RCU_TRACE
+#ifdef CONFIG_TASKS_TRACE_RCU
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
current->trc_reader_special.b.need_mb = true;
-#endif /* #ifdef CONFIG_TASKS_RCU_TRACE */
+#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
}
/* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
static void rcu_dynticks_task_trace_exit(void)
{
-#ifdef CONFIG_TASKS_RCU_TRACE
+#ifdef CONFIG_TASKS_TRACE_RCU
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
current->trc_reader_special.b.need_mb = false;
-#endif /* #ifdef CONFIG_TASKS_RCU_TRACE */
+#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
}
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 6c76988cc019..677ee3d8671b 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -7,6 +7,8 @@
* Author: Paul E. McKenney <paulmck@linux.ibm.com>
*/
+#include <linux/kvm_para.h>
+
//////////////////////////////////////////////////////////////////////////////
//
// Controlling CPU stall warnings, including delay calculation.
@@ -117,17 +119,14 @@ static void panic_on_rcu_stall(void)
}
/**
- * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
- *
- * Set the stall-warning timeout way off into the future, thus preventing
- * any RCU CPU stall-warning messages from appearing in the current set of
- * RCU grace periods.
+ * rcu_cpu_stall_reset - restart stall-warning timeout for current grace period
*
* The caller must disable hard irqs.
*/
void rcu_cpu_stall_reset(void)
{
- WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2);
+ WRITE_ONCE(rcu_state.jiffies_stall,
+ jiffies + rcu_jiffies_till_stall_check());
}
//////////////////////////////////////////////////////////////////////////////
@@ -267,8 +266,10 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
struct task_struct *ts[8];
lockdep_assert_irqs_disabled();
- if (!rcu_preempt_blocked_readers_cgp(rnp))
+ if (!rcu_preempt_blocked_readers_cgp(rnp)) {
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return 0;
+ }
pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
rnp->level, rnp->grplo, rnp->grphi);
t = list_entry(rnp->gp_tasks->prev,
@@ -280,8 +281,8 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
break;
}
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- for (i--; i; i--) {
- t = ts[i];
+ while (i) {
+ t = ts[--i];
if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr))
pr_cont(" P%d", t->pid);
else
@@ -350,7 +351,7 @@ static void rcu_dump_cpu_stacks(void)
static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
{
- struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
sprintf(cp, "last_accelerate: %04lx/%04lx dyntick_enabled: %d",
rdp->last_accelerate & 0xffff, jiffies & 0xffff,
@@ -464,9 +465,10 @@ static void rcu_check_gp_kthread_starvation(void)
pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#x ->cpu=%d\n",
rcu_state.name, j,
(long)rcu_seq_current(&rcu_state.gp_seq),
- data_race(rcu_state.gp_flags),
- gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
- gpk ? gpk->__state : ~0, cpu);
+ data_race(READ_ONCE(rcu_state.gp_flags)),
+ gp_state_getname(rcu_state.gp_state),
+ data_race(READ_ONCE(rcu_state.gp_state)),
+ gpk ? data_race(READ_ONCE(gpk->__state)) : ~0, cpu);
if (gpk) {
pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name);
pr_err("RCU grace-period kthread stack dump:\n");
@@ -509,7 +511,7 @@ static void rcu_check_gp_kthread_expired_fqs_timer(void)
(long)rcu_seq_current(&rcu_state.gp_seq),
data_race(rcu_state.gp_flags),
gp_state_getname(RCU_GP_WAIT_FQS), RCU_GP_WAIT_FQS,
- gpk->__state);
+ data_race(READ_ONCE(gpk->__state)));
pr_err("\tPossible timer handling issue on cpu=%d timer-softirq=%u\n",
cpu, kstat_softirqs_cpu(TIMER_SOFTIRQ, cpu));
}
@@ -568,11 +570,11 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
pr_err("INFO: Stall ended before state dump start\n");
} else {
j = jiffies;
- gpa = data_race(rcu_state.gp_activity);
+ gpa = data_race(READ_ONCE(rcu_state.gp_activity));
pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
rcu_state.name, j - gpa, j, gpa,
- data_race(jiffies_till_next_fqs),
- rcu_get_root()->qsmask);
+ data_race(READ_ONCE(jiffies_till_next_fqs)),
+ data_race(READ_ONCE(rcu_get_root()->qsmask)));
}
}
/* Rewrite if needed in case of slow consoles. */
@@ -646,6 +648,7 @@ static void print_cpu_stall(unsigned long gps)
static void check_cpu_stall(struct rcu_data *rdp)
{
+ bool didstall = false;
unsigned long gs1;
unsigned long gs2;
unsigned long gps;
@@ -691,24 +694,46 @@ static void check_cpu_stall(struct rcu_data *rdp)
ULONG_CMP_GE(gps, js))
return; /* No stall or GP completed since entering function. */
rnp = rdp->mynode;
- jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
+ jn = jiffies + ULONG_MAX / 2;
if (rcu_gp_in_progress() &&
(READ_ONCE(rnp->qsmask) & rdp->grpmask) &&
cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
+ /*
+ * If a virtual machine is stopped by the host it can look to
+ * the watchdog like an RCU stall. Check to see if the host
+ * stopped the vm.
+ */
+ if (kvm_check_and_clear_guest_paused())
+ return;
+
/* We haven't checked in, so go dump stack. */
print_cpu_stall(gps);
if (READ_ONCE(rcu_cpu_stall_ftrace_dump))
rcu_ftrace_dump(DUMP_ALL);
+ didstall = true;
} else if (rcu_gp_in_progress() &&
ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) &&
cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
+ /*
+ * If a virtual machine is stopped by the host it can look to
+ * the watchdog like an RCU stall. Check to see if the host
+ * stopped the vm.
+ */
+ if (kvm_check_and_clear_guest_paused())
+ return;
+
/* They had a few time units to dump stack, so complain. */
print_other_cpu_stall(gs2, gps);
if (READ_ONCE(rcu_cpu_stall_ftrace_dump))
rcu_ftrace_dump(DUMP_ALL);
+ didstall = true;
+ }
+ if (didstall && READ_ONCE(rcu_state.jiffies_stall) == jn) {
+ jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
+ WRITE_ONCE(rcu_state.jiffies_stall, jn);
}
}
@@ -742,7 +767,7 @@ bool rcu_check_boost_fail(unsigned long gp_state, int *cpup)
rcu_for_each_leaf_node(rnp) {
if (!cpup) {
- if (READ_ONCE(rnp->qsmask)) {
+ if (data_race(READ_ONCE(rnp->qsmask))) {
return false;
} else {
if (READ_ONCE(rnp->gp_tasks))
@@ -791,32 +816,34 @@ void show_rcu_gp_kthreads(void)
struct task_struct *t = READ_ONCE(rcu_state.gp_kthread);
j = jiffies;
- ja = j - data_race(rcu_state.gp_activity);
- jr = j - data_race(rcu_state.gp_req_activity);
- js = j - data_race(rcu_state.gp_start);
- jw = j - data_race(rcu_state.gp_wake_time);
+ ja = j - data_race(READ_ONCE(rcu_state.gp_activity));
+ jr = j - data_race(READ_ONCE(rcu_state.gp_req_activity));
+ js = j - data_race(READ_ONCE(rcu_state.gp_start));
+ jw = j - data_race(READ_ONCE(rcu_state.gp_wake_time));
pr_info("%s: wait state: %s(%d) ->state: %#x ->rt_priority %u delta ->gp_start %lu ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_max %lu ->gp_flags %#x\n",
rcu_state.name, gp_state_getname(rcu_state.gp_state),
- rcu_state.gp_state, t ? t->__state : 0x1ffff, t ? t->rt_priority : 0xffU,
- js, ja, jr, jw, (long)data_race(rcu_state.gp_wake_seq),
- (long)data_race(rcu_state.gp_seq),
- (long)data_race(rcu_get_root()->gp_seq_needed),
- data_race(rcu_state.gp_max),
- data_race(rcu_state.gp_flags));
+ data_race(READ_ONCE(rcu_state.gp_state)),
+ t ? data_race(READ_ONCE(t->__state)) : 0x1ffff, t ? t->rt_priority : 0xffU,
+ js, ja, jr, jw, (long)data_race(READ_ONCE(rcu_state.gp_wake_seq)),
+ (long)data_race(READ_ONCE(rcu_state.gp_seq)),
+ (long)data_race(READ_ONCE(rcu_get_root()->gp_seq_needed)),
+ data_race(READ_ONCE(rcu_state.gp_max)),
+ data_race(READ_ONCE(rcu_state.gp_flags)));
rcu_for_each_node_breadth_first(rnp) {
if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq), READ_ONCE(rnp->gp_seq_needed)) &&
- !data_race(rnp->qsmask) && !data_race(rnp->boost_tasks) &&
- !data_race(rnp->exp_tasks) && !data_race(rnp->gp_tasks))
+ !data_race(READ_ONCE(rnp->qsmask)) && !data_race(READ_ONCE(rnp->boost_tasks)) &&
+ !data_race(READ_ONCE(rnp->exp_tasks)) && !data_race(READ_ONCE(rnp->gp_tasks)))
continue;
pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld ->qsmask %#lx %c%c%c%c ->n_boosts %ld\n",
rnp->grplo, rnp->grphi,
- (long)data_race(rnp->gp_seq), (long)data_race(rnp->gp_seq_needed),
- data_race(rnp->qsmask),
- ".b"[!!data_race(rnp->boost_kthread_task)],
- ".B"[!!data_race(rnp->boost_tasks)],
- ".E"[!!data_race(rnp->exp_tasks)],
- ".G"[!!data_race(rnp->gp_tasks)],
- data_race(rnp->n_boosts));
+ (long)data_race(READ_ONCE(rnp->gp_seq)),
+ (long)data_race(READ_ONCE(rnp->gp_seq_needed)),
+ data_race(READ_ONCE(rnp->qsmask)),
+ ".b"[!!data_race(READ_ONCE(rnp->boost_kthread_task))],
+ ".B"[!!data_race(READ_ONCE(rnp->boost_tasks))],
+ ".E"[!!data_race(READ_ONCE(rnp->exp_tasks))],
+ ".G"[!!data_race(READ_ONCE(rnp->gp_tasks))],
+ data_race(READ_ONCE(rnp->n_boosts)));
if (!rcu_is_leaf_node(rnp))
continue;
for_each_leaf_node_possible_cpu(rnp, cpu) {
@@ -826,12 +853,12 @@ void show_rcu_gp_kthreads(void)
READ_ONCE(rdp->gp_seq_needed)))
continue;
pr_info("\tcpu %d ->gp_seq_needed %ld\n",
- cpu, (long)data_race(rdp->gp_seq_needed));
+ cpu, (long)data_race(READ_ONCE(rdp->gp_seq_needed)));
}
}
for_each_possible_cpu(cpu) {
rdp = per_cpu_ptr(&rcu_data, cpu);
- cbs += data_race(rdp->n_cbs_invoked);
+ cbs += data_race(READ_ONCE(rdp->n_cbs_invoked));
if (rcu_segcblist_is_offloaded(&rdp->cblist))
show_rcu_nocb_state(rdp);
}
@@ -913,11 +940,11 @@ void rcu_fwd_progress_check(unsigned long j)
if (rcu_gp_in_progress()) {
pr_info("%s: GP age %lu jiffies\n",
- __func__, jiffies - rcu_state.gp_start);
+ __func__, jiffies - data_race(READ_ONCE(rcu_state.gp_start)));
show_rcu_gp_kthreads();
} else {
pr_info("%s: Last GP end %lu jiffies ago\n",
- __func__, jiffies - rcu_state.gp_end);
+ __func__, jiffies - data_race(READ_ONCE(rcu_state.gp_end)));
preempt_disable();
rdp = this_cpu_ptr(&rcu_data);
rcu_check_gp_start_stall(rdp->mynode, rdp, j);
diff --git a/kernel/scftorture.c b/kernel/scftorture.c
index 29e8fc5d91a7..64a08288b1a6 100644
--- a/kernel/scftorture.c
+++ b/kernel/scftorture.c
@@ -64,6 +64,7 @@ torture_param(bool, use_cpus_read_lock, 0, "Use cpus_read_lock() to exclude CPU
torture_param(int, verbose, 0, "Enable verbose debugging printk()s");
torture_param(int, weight_resched, -1, "Testing weight for resched_cpu() operations.");
torture_param(int, weight_single, -1, "Testing weight for single-CPU no-wait operations.");
+torture_param(int, weight_single_rpc, -1, "Testing weight for single-CPU RPC operations.");
torture_param(int, weight_single_wait, -1, "Testing weight for single-CPU operations.");
torture_param(int, weight_many, -1, "Testing weight for multi-CPU no-wait operations.");
torture_param(int, weight_many_wait, -1, "Testing weight for multi-CPU operations.");
@@ -86,6 +87,8 @@ struct scf_statistics {
long long n_resched;
long long n_single;
long long n_single_ofl;
+ long long n_single_rpc;
+ long long n_single_rpc_ofl;
long long n_single_wait;
long long n_single_wait_ofl;
long long n_many;
@@ -101,14 +104,17 @@ static DEFINE_PER_CPU(long long, scf_invoked_count);
// Data for random primitive selection
#define SCF_PRIM_RESCHED 0
#define SCF_PRIM_SINGLE 1
-#define SCF_PRIM_MANY 2
-#define SCF_PRIM_ALL 3
-#define SCF_NPRIMS 7 // Need wait and no-wait versions of each,
- // except for SCF_PRIM_RESCHED.
+#define SCF_PRIM_SINGLE_RPC 2
+#define SCF_PRIM_MANY 3
+#define SCF_PRIM_ALL 4
+#define SCF_NPRIMS 8 // Need wait and no-wait versions of each,
+ // except for SCF_PRIM_RESCHED and
+ // SCF_PRIM_SINGLE_RPC.
static char *scf_prim_name[] = {
"resched_cpu",
"smp_call_function_single",
+ "smp_call_function_single_rpc",
"smp_call_function_many",
"smp_call_function",
};
@@ -128,6 +134,8 @@ struct scf_check {
bool scfc_out;
int scfc_cpu; // -1 for not _single().
bool scfc_wait;
+ bool scfc_rpc;
+ struct completion scfc_completion;
};
// Use to wait for all threads to start.
@@ -158,6 +166,7 @@ static void scf_torture_stats_print(void)
scfs.n_resched += scf_stats_p[i].n_resched;
scfs.n_single += scf_stats_p[i].n_single;
scfs.n_single_ofl += scf_stats_p[i].n_single_ofl;
+ scfs.n_single_rpc += scf_stats_p[i].n_single_rpc;
scfs.n_single_wait += scf_stats_p[i].n_single_wait;
scfs.n_single_wait_ofl += scf_stats_p[i].n_single_wait_ofl;
scfs.n_many += scf_stats_p[i].n_many;
@@ -168,9 +177,10 @@ static void scf_torture_stats_print(void)
if (atomic_read(&n_errs) || atomic_read(&n_mb_in_errs) ||
atomic_read(&n_mb_out_errs) || atomic_read(&n_alloc_errs))
bangstr = "!!! ";
- pr_alert("%s %sscf_invoked_count %s: %lld resched: %lld single: %lld/%lld single_ofl: %lld/%lld many: %lld/%lld all: %lld/%lld ",
+ pr_alert("%s %sscf_invoked_count %s: %lld resched: %lld single: %lld/%lld single_ofl: %lld/%lld single_rpc: %lld single_rpc_ofl: %lld many: %lld/%lld all: %lld/%lld ",
SCFTORT_FLAG, bangstr, isdone ? "VER" : "ver", invoked_count, scfs.n_resched,
scfs.n_single, scfs.n_single_wait, scfs.n_single_ofl, scfs.n_single_wait_ofl,
+ scfs.n_single_rpc, scfs.n_single_rpc_ofl,
scfs.n_many, scfs.n_many_wait, scfs.n_all, scfs.n_all_wait);
torture_onoff_stats();
pr_cont("ste: %d stnmie: %d stnmoe: %d staf: %d\n", atomic_read(&n_errs),
@@ -282,10 +292,13 @@ static void scf_handler(void *scfc_in)
out:
if (unlikely(!scfcp))
return;
- if (scfcp->scfc_wait)
+ if (scfcp->scfc_wait) {
WRITE_ONCE(scfcp->scfc_out, true);
- else
+ if (scfcp->scfc_rpc)
+ complete(&scfcp->scfc_completion);
+ } else {
kfree(scfcp);
+ }
}
// As above, but check for correct CPU.
@@ -319,6 +332,7 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra
scfcp->scfc_cpu = -1;
scfcp->scfc_wait = scfsp->scfs_wait;
scfcp->scfc_out = false;
+ scfcp->scfc_rpc = false;
}
}
switch (scfsp->scfs_prim) {
@@ -350,6 +364,34 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra
scfcp = NULL;
}
break;
+ case SCF_PRIM_SINGLE_RPC:
+ if (!scfcp)
+ break;
+ cpu = torture_random(trsp) % nr_cpu_ids;
+ scfp->n_single_rpc++;
+ scfcp->scfc_cpu = cpu;
+ scfcp->scfc_wait = true;
+ init_completion(&scfcp->scfc_completion);
+ scfcp->scfc_rpc = true;
+ barrier(); // Prevent race-reduction compiler optimizations.
+ scfcp->scfc_in = true;
+ ret = smp_call_function_single(cpu, scf_handler_1, (void *)scfcp, 0);
+ if (!ret) {
+ if (use_cpus_read_lock)
+ cpus_read_unlock();
+ else
+ preempt_enable();
+ wait_for_completion(&scfcp->scfc_completion);
+ if (use_cpus_read_lock)
+ cpus_read_lock();
+ else
+ preempt_disable();
+ } else {
+ scfp->n_single_rpc_ofl++;
+ kfree(scfcp);
+ scfcp = NULL;
+ }
+ break;
case SCF_PRIM_MANY:
if (scfsp->scfs_wait)
scfp->n_many_wait++;
@@ -379,10 +421,12 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra
}
if (scfcp && scfsp->scfs_wait) {
if (WARN_ON_ONCE((num_online_cpus() > 1 || scfsp->scfs_prim == SCF_PRIM_SINGLE) &&
- !scfcp->scfc_out))
+ !scfcp->scfc_out)) {
+ pr_warn("%s: Memory-ordering failure, scfs_prim: %d.\n", __func__, scfsp->scfs_prim);
atomic_inc(&n_mb_out_errs); // Leak rather than trash!
- else
+ } else {
kfree(scfcp);
+ }
barrier(); // Prevent race-reduction compiler optimizations.
}
if (use_cpus_read_lock)
@@ -453,8 +497,8 @@ static void
scftorture_print_module_parms(const char *tag)
{
pr_alert(SCFTORT_FLAG
- "--- %s: verbose=%d holdoff=%d longwait=%d nthreads=%d onoff_holdoff=%d onoff_interval=%d shutdown_secs=%d stat_interval=%d stutter=%d use_cpus_read_lock=%d, weight_resched=%d, weight_single=%d, weight_single_wait=%d, weight_many=%d, weight_many_wait=%d, weight_all=%d, weight_all_wait=%d\n", tag,
- verbose, holdoff, longwait, nthreads, onoff_holdoff, onoff_interval, shutdown, stat_interval, stutter, use_cpus_read_lock, weight_resched, weight_single, weight_single_wait, weight_many, weight_many_wait, weight_all, weight_all_wait);
+ "--- %s: verbose=%d holdoff=%d longwait=%d nthreads=%d onoff_holdoff=%d onoff_interval=%d shutdown_secs=%d stat_interval=%d stutter=%d use_cpus_read_lock=%d, weight_resched=%d, weight_single=%d, weight_single_rpc=%d, weight_single_wait=%d, weight_many=%d, weight_many_wait=%d, weight_all=%d, weight_all_wait=%d\n", tag,
+ verbose, holdoff, longwait, nthreads, onoff_holdoff, onoff_interval, shutdown, stat_interval, stutter, use_cpus_read_lock, weight_resched, weight_single, weight_single_rpc, weight_single_wait, weight_many, weight_many_wait, weight_all, weight_all_wait);
}
static void scf_cleanup_handler(void *unused)
@@ -469,7 +513,7 @@ static void scf_torture_cleanup(void)
return;
WRITE_ONCE(scfdone, true);
- if (nthreads)
+ if (nthreads && scf_stats_p)
for (i = 0; i < nthreads; i++)
torture_stop_kthread("scftorture_invoker", scf_stats_p[i].task);
else
@@ -497,6 +541,7 @@ static int __init scf_torture_init(void)
int firsterr = 0;
unsigned long weight_resched1 = weight_resched;
unsigned long weight_single1 = weight_single;
+ unsigned long weight_single_rpc1 = weight_single_rpc;
unsigned long weight_single_wait1 = weight_single_wait;
unsigned long weight_many1 = weight_many;
unsigned long weight_many_wait1 = weight_many_wait;
@@ -508,11 +553,13 @@ static int __init scf_torture_init(void)
scftorture_print_module_parms("Start of test");
- if (weight_resched == -1 && weight_single == -1 && weight_single_wait == -1 &&
+ if (weight_resched == -1 &&
+ weight_single == -1 && weight_single_rpc == -1 && weight_single_wait == -1 &&
weight_many == -1 && weight_many_wait == -1 &&
weight_all == -1 && weight_all_wait == -1) {
weight_resched1 = 2 * nr_cpu_ids;
weight_single1 = 2 * nr_cpu_ids;
+ weight_single_rpc1 = 2 * nr_cpu_ids;
weight_single_wait1 = 2 * nr_cpu_ids;
weight_many1 = 2;
weight_many_wait1 = 2;
@@ -523,6 +570,8 @@ static int __init scf_torture_init(void)
weight_resched1 = 0;
if (weight_single == -1)
weight_single1 = 0;
+ if (weight_single_rpc == -1)
+ weight_single_rpc1 = 0;
if (weight_single_wait == -1)
weight_single_wait1 = 0;
if (weight_many == -1)
@@ -534,7 +583,7 @@ static int __init scf_torture_init(void)
if (weight_all_wait == -1)
weight_all_wait1 = 0;
}
- if (weight_single1 == 0 && weight_single_wait1 == 0 &&
+ if (weight_single1 == 0 && weight_single_rpc1 == 0 && weight_single_wait1 == 0 &&
weight_many1 == 0 && weight_many_wait1 == 0 &&
weight_all1 == 0 && weight_all_wait1 == 0) {
VERBOSE_SCFTORTOUT_ERRSTRING("all zero weights makes no sense");
@@ -546,6 +595,7 @@ static int __init scf_torture_init(void)
else if (weight_resched1)
VERBOSE_SCFTORTOUT_ERRSTRING("built as module, weight_resched ignored");
scf_sel_add(weight_single1, SCF_PRIM_SINGLE, false);
+ scf_sel_add(weight_single_rpc1, SCF_PRIM_SINGLE_RPC, true);
scf_sel_add(weight_single_wait1, SCF_PRIM_SINGLE, true);
scf_sel_add(weight_many1, SCF_PRIM_MANY, false);
scf_sel_add(weight_many_wait1, SCF_PRIM_MANY, true);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 20ffcc044134..c4462c454ab9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -237,9 +237,30 @@ static DEFINE_MUTEX(sched_core_mutex);
static atomic_t sched_core_count;
static struct cpumask sched_core_mask;
+static void sched_core_lock(int cpu, unsigned long *flags)
+{
+ const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+ int t, i = 0;
+
+ local_irq_save(*flags);
+ for_each_cpu(t, smt_mask)
+ raw_spin_lock_nested(&cpu_rq(t)->__lock, i++);
+}
+
+static void sched_core_unlock(int cpu, unsigned long *flags)
+{
+ const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+ int t;
+
+ for_each_cpu(t, smt_mask)
+ raw_spin_unlock(&cpu_rq(t)->__lock);
+ local_irq_restore(*flags);
+}
+
static void __sched_core_flip(bool enabled)
{
- int cpu, t, i;
+ unsigned long flags;
+ int cpu, t;
cpus_read_lock();
@@ -250,19 +271,12 @@ static void __sched_core_flip(bool enabled)
for_each_cpu(cpu, &sched_core_mask) {
const struct cpumask *smt_mask = cpu_smt_mask(cpu);
- i = 0;
- local_irq_disable();
- for_each_cpu(t, smt_mask) {
- /* supports up to SMT8 */
- raw_spin_lock_nested(&cpu_rq(t)->__lock, i++);
- }
+ sched_core_lock(cpu, &flags);
for_each_cpu(t, smt_mask)
cpu_rq(t)->core_enabled = enabled;
- for_each_cpu(t, smt_mask)
- raw_spin_unlock(&cpu_rq(t)->__lock);
- local_irq_enable();
+ sched_core_unlock(cpu, &flags);
cpumask_andnot(&sched_core_mask, &sched_core_mask, smt_mask);
}
@@ -993,6 +1007,7 @@ int get_nohz_timer_target(void)
{
int i, cpu = smp_processor_id(), default_cpu = -1;
struct sched_domain *sd;
+ const struct cpumask *hk_mask;
if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
if (!idle_cpu(cpu))
@@ -1000,10 +1015,11 @@ int get_nohz_timer_target(void)
default_cpu = cpu;
}
+ hk_mask = housekeeping_cpumask(HK_FLAG_TIMER);
+
rcu_read_lock();
for_each_domain(cpu, sd) {
- for_each_cpu_and(i, sched_domain_span(sd),
- housekeeping_cpumask(HK_FLAG_TIMER)) {
+ for_each_cpu_and(i, sched_domain_span(sd), hk_mask) {
if (cpu == i)
continue;
@@ -1619,6 +1635,23 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
uclamp_rq_dec_id(rq, p, clamp_id);
}
+static inline void uclamp_rq_reinc_id(struct rq *rq, struct task_struct *p,
+ enum uclamp_id clamp_id)
+{
+ if (!p->uclamp[clamp_id].active)
+ return;
+
+ uclamp_rq_dec_id(rq, p, clamp_id);
+ uclamp_rq_inc_id(rq, p, clamp_id);
+
+ /*
+ * Make sure to clear the idle flag if we've transiently reached 0
+ * active tasks on rq.
+ */
+ if (clamp_id == UCLAMP_MAX && (rq->uclamp_flags & UCLAMP_FLAG_IDLE))
+ rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE;
+}
+
static inline void
uclamp_update_active(struct task_struct *p)
{
@@ -1642,12 +1675,8 @@ uclamp_update_active(struct task_struct *p)
* affecting a valid clamp bucket, the next time it's enqueued,
* it will already see the updated clamp bucket value.
*/
- for_each_clamp_id(clamp_id) {
- if (p->uclamp[clamp_id].active) {
- uclamp_rq_dec_id(rq, p, clamp_id);
- uclamp_rq_inc_id(rq, p, clamp_id);
- }
- }
+ for_each_clamp_id(clamp_id)
+ uclamp_rq_reinc_id(rq, p, clamp_id);
task_rq_unlock(rq, p, &rf);
}
@@ -2161,7 +2190,7 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
/* Non kernel threads are not allowed during either online or offline. */
if (!(p->flags & PF_KTHREAD))
- return cpu_active(cpu);
+ return cpu_active(cpu) && task_cpu_possible(cpu, p);
/* KTHREAD_IS_PER_CPU is always allowed. */
if (kthread_is_per_cpu(p))
@@ -2468,6 +2497,34 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
__do_set_cpus_allowed(p, new_mask, 0);
}
+int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src,
+ int node)
+{
+ if (!src->user_cpus_ptr)
+ return 0;
+
+ dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node);
+ if (!dst->user_cpus_ptr)
+ return -ENOMEM;
+
+ cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr);
+ return 0;
+}
+
+static inline struct cpumask *clear_user_cpus_ptr(struct task_struct *p)
+{
+ struct cpumask *user_mask = NULL;
+
+ swap(p->user_cpus_ptr, user_mask);
+
+ return user_mask;
+}
+
+void release_user_cpus_ptr(struct task_struct *p)
+{
+ kfree(clear_user_cpus_ptr(p));
+}
+
/*
* This function is wildly self concurrent; here be dragons.
*
@@ -2685,28 +2742,26 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
}
/*
- * Change a given task's CPU affinity. Migrate the thread to a
- * proper CPU and schedule it away if the CPU it's executing on
- * is removed from the allowed bitmask.
- *
- * NOTE: the caller must have a valid reference to the task, the
- * task must not exit() & deallocate itself prematurely. The
- * call is not atomic; no spinlocks may be held.
+ * Called with both p->pi_lock and rq->lock held; drops both before returning.
*/
-static int __set_cpus_allowed_ptr(struct task_struct *p,
- const struct cpumask *new_mask,
- u32 flags)
+static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
+ const struct cpumask *new_mask,
+ u32 flags,
+ struct rq *rq,
+ struct rq_flags *rf)
+ __releases(rq->lock)
+ __releases(p->pi_lock)
{
+ const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p);
const struct cpumask *cpu_valid_mask = cpu_active_mask;
+ bool kthread = p->flags & PF_KTHREAD;
+ struct cpumask *user_mask = NULL;
unsigned int dest_cpu;
- struct rq_flags rf;
- struct rq *rq;
int ret = 0;
- rq = task_rq_lock(p, &rf);
update_rq_clock(rq);
- if (p->flags & PF_KTHREAD || is_migration_disabled(p)) {
+ if (kthread || is_migration_disabled(p)) {
/*
* Kernel threads are allowed on online && !active CPUs,
* however, during cpu-hot-unplug, even these might get pushed
@@ -2720,6 +2775,11 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
cpu_valid_mask = cpu_online_mask;
}
+ if (!kthread && !cpumask_subset(new_mask, cpu_allowed_mask)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
/*
* Must re-check here, to close a race against __kthread_bind(),
* sched_setaffinity() is not guaranteed to observe the flag.
@@ -2754,20 +2814,178 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
__do_set_cpus_allowed(p, new_mask, flags);
- return affine_move_task(rq, p, &rf, dest_cpu, flags);
+ if (flags & SCA_USER)
+ user_mask = clear_user_cpus_ptr(p);
+
+ ret = affine_move_task(rq, p, rf, dest_cpu, flags);
+
+ kfree(user_mask);
+
+ return ret;
out:
- task_rq_unlock(rq, p, &rf);
+ task_rq_unlock(rq, p, rf);
return ret;
}
+/*
+ * Change a given task's CPU affinity. Migrate the thread to a
+ * proper CPU and schedule it away if the CPU it's executing on
+ * is removed from the allowed bitmask.
+ *
+ * NOTE: the caller must have a valid reference to the task, the
+ * task must not exit() & deallocate itself prematurely. The
+ * call is not atomic; no spinlocks may be held.
+ */
+static int __set_cpus_allowed_ptr(struct task_struct *p,
+ const struct cpumask *new_mask, u32 flags)
+{
+ struct rq_flags rf;
+ struct rq *rq;
+
+ rq = task_rq_lock(p, &rf);
+ return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, &rf);
+}
+
int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
{
return __set_cpus_allowed_ptr(p, new_mask, 0);
}
EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
+/*
+ * Change a given task's CPU affinity to the intersection of its current
+ * affinity mask and @subset_mask, writing the resulting mask to @new_mask
+ * and pointing @p->user_cpus_ptr to a copy of the old mask.
+ * If the resulting mask is empty, leave the affinity unchanged and return
+ * -EINVAL.
+ */
+static int restrict_cpus_allowed_ptr(struct task_struct *p,
+ struct cpumask *new_mask,
+ const struct cpumask *subset_mask)
+{
+ struct cpumask *user_mask = NULL;
+ struct rq_flags rf;
+ struct rq *rq;
+ int err;
+
+ if (!p->user_cpus_ptr) {
+ user_mask = kmalloc(cpumask_size(), GFP_KERNEL);
+ if (!user_mask)
+ return -ENOMEM;
+ }
+
+ rq = task_rq_lock(p, &rf);
+
+ /*
+ * Forcefully restricting the affinity of a deadline task is
+ * likely to cause problems, so fail and noisily override the
+ * mask entirely.
+ */
+ if (task_has_dl_policy(p) && dl_bandwidth_enabled()) {
+ err = -EPERM;
+ goto err_unlock;
+ }
+
+ if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) {
+ err = -EINVAL;
+ goto err_unlock;
+ }
+
+ /*
+ * We're about to butcher the task affinity, so keep track of what
+ * the user asked for in case we're able to restore it later on.
+ */
+ if (user_mask) {
+ cpumask_copy(user_mask, p->cpus_ptr);
+ p->user_cpus_ptr = user_mask;
+ }
+
+ return __set_cpus_allowed_ptr_locked(p, new_mask, 0, rq, &rf);
+
+err_unlock:
+ task_rq_unlock(rq, p, &rf);
+ kfree(user_mask);
+ return err;
+}
+
+/*
+ * Restrict the CPU affinity of task @p so that it is a subset of
+ * task_cpu_possible_mask() and point @p->user_cpu_ptr to a copy of the
+ * old affinity mask. If the resulting mask is empty, we warn and walk
+ * up the cpuset hierarchy until we find a suitable mask.
+ */
+void force_compatible_cpus_allowed_ptr(struct task_struct *p)
+{
+ cpumask_var_t new_mask;
+ const struct cpumask *override_mask = task_cpu_possible_mask(p);
+
+ alloc_cpumask_var(&new_mask, GFP_KERNEL);
+
+ /*
+ * __migrate_task() can fail silently in the face of concurrent
+ * offlining of the chosen destination CPU, so take the hotplug
+ * lock to ensure that the migration succeeds.
+ */
+ cpus_read_lock();
+ if (!cpumask_available(new_mask))
+ goto out_set_mask;
+
+ if (!restrict_cpus_allowed_ptr(p, new_mask, override_mask))
+ goto out_free_mask;
+
+ /*
+ * We failed to find a valid subset of the affinity mask for the
+ * task, so override it based on its cpuset hierarchy.
+ */
+ cpuset_cpus_allowed(p, new_mask);
+ override_mask = new_mask;
+
+out_set_mask:
+ if (printk_ratelimit()) {
+ printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n",
+ task_pid_nr(p), p->comm,
+ cpumask_pr_args(override_mask));
+ }
+
+ WARN_ON(set_cpus_allowed_ptr(p, override_mask));
+out_free_mask:
+ cpus_read_unlock();
+ free_cpumask_var(new_mask);
+}
+
+static int
+__sched_setaffinity(struct task_struct *p, const struct cpumask *mask);
+
+/*
+ * Restore the affinity of a task @p which was previously restricted by a
+ * call to force_compatible_cpus_allowed_ptr(). This will clear (and free)
+ * @p->user_cpus_ptr.
+ *
+ * It is the caller's responsibility to serialise this with any calls to
+ * force_compatible_cpus_allowed_ptr(@p).
+ */
+void relax_compatible_cpus_allowed_ptr(struct task_struct *p)
+{
+ struct cpumask *user_mask = p->user_cpus_ptr;
+ unsigned long flags;
+
+ /*
+ * Try to restore the old affinity mask. If this fails, then
+ * we free the mask explicitly to avoid it being inherited across
+ * a subsequent fork().
+ */
+ if (!user_mask || !__sched_setaffinity(p, user_mask))
+ return;
+
+ raw_spin_lock_irqsave(&p->pi_lock, flags);
+ user_mask = clear_user_cpus_ptr(p);
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+
+ kfree(user_mask);
+}
+
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
{
#ifdef CONFIG_SCHED_DEBUG
@@ -3112,9 +3330,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
/* Look for allowed, online CPU in same node. */
for_each_cpu(dest_cpu, nodemask) {
- if (!cpu_active(dest_cpu))
- continue;
- if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
+ if (is_cpu_allowed(p, dest_cpu))
return dest_cpu;
}
}
@@ -3131,8 +3347,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
/* No more Mr. Nice Guy. */
switch (state) {
case cpuset:
- if (IS_ENABLED(CONFIG_CPUSETS)) {
- cpuset_cpus_allowed_fallback(p);
+ if (cpuset_cpus_allowed_fallback(p)) {
state = possible;
break;
}
@@ -3144,10 +3359,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
*
* More yuck to audit.
*/
- do_set_cpus_allowed(p, cpu_possible_mask);
+ do_set_cpus_allowed(p, task_cpu_possible_mask(p));
state = fail;
break;
-
case fail:
BUG();
break;
@@ -3562,6 +3776,55 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
}
/*
+ * Invoked from try_to_wake_up() to check whether the task can be woken up.
+ *
+ * The caller holds p::pi_lock if p != current or has preemption
+ * disabled when p == current.
+ *
+ * The rules of PREEMPT_RT saved_state:
+ *
+ * The related locking code always holds p::pi_lock when updating
+ * p::saved_state, which means the code is fully serialized in both cases.
+ *
+ * The lock wait and lock wakeups happen via TASK_RTLOCK_WAIT. No other
+ * bits set. This allows to distinguish all wakeup scenarios.
+ */
+static __always_inline
+bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
+{
+ if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) {
+ WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) &&
+ state != TASK_RTLOCK_WAIT);
+ }
+
+ if (READ_ONCE(p->__state) & state) {
+ *success = 1;
+ return true;
+ }
+
+#ifdef CONFIG_PREEMPT_RT
+ /*
+ * Saved state preserves the task state across blocking on
+ * an RT lock. If the state matches, set p::saved_state to
+ * TASK_RUNNING, but do not wake the task because it waits
+ * for a lock wakeup. Also indicate success because from
+ * the regular waker's point of view this has succeeded.
+ *
+ * After acquiring the lock the task will restore p::__state
+ * from p::saved_state which ensures that the regular
+ * wakeup is not lost. The restore will also set
+ * p::saved_state to TASK_RUNNING so any further tests will
+ * not result in false positives vs. @success
+ */
+ if (p->saved_state & state) {
+ p->saved_state = TASK_RUNNING;
+ *success = 1;
+ }
+#endif
+ return false;
+}
+
+/*
* Notes on Program-Order guarantees on SMP systems.
*
* MIGRATION
@@ -3700,10 +3963,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
* - we're serialized against set_special_state() by virtue of
* it disabling IRQs (this allows not taking ->pi_lock).
*/
- if (!(READ_ONCE(p->__state) & state))
+ if (!ttwu_state_match(p, state, &success))
goto out;
- success = 1;
trace_sched_waking(p);
WRITE_ONCE(p->__state, TASK_RUNNING);
trace_sched_wakeup(p);
@@ -3718,14 +3980,11 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
*/
raw_spin_lock_irqsave(&p->pi_lock, flags);
smp_mb__after_spinlock();
- if (!(READ_ONCE(p->__state) & state))
+ if (!ttwu_state_match(p, state, &success))
goto unlock;
trace_sched_waking(p);
- /* We're going to change ->state: */
- success = 1;
-
/*
* Ensure we load p->on_rq _after_ p->state, otherwise it would
* be possible to, falsely, observe p->on_rq == 0 and get stuck
@@ -5660,11 +5919,9 @@ static bool try_steal_cookie(int this, int that)
if (p->core_occupation > dst->idle->core_occupation)
goto next;
- p->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(src, p, 0);
set_task_cpu(p, this);
activate_task(dst, p, 0);
- p->on_rq = TASK_ON_RQ_QUEUED;
resched_curr(dst);
@@ -5736,35 +5993,109 @@ void queue_core_balance(struct rq *rq)
queue_balance_callback(rq, &per_cpu(core_balance_head, rq->cpu), sched_core_balance);
}
-static inline void sched_core_cpu_starting(unsigned int cpu)
+static void sched_core_cpu_starting(unsigned int cpu)
{
const struct cpumask *smt_mask = cpu_smt_mask(cpu);
- struct rq *rq, *core_rq = NULL;
- int i;
+ struct rq *rq = cpu_rq(cpu), *core_rq = NULL;
+ unsigned long flags;
+ int t;
- core_rq = cpu_rq(cpu)->core;
+ sched_core_lock(cpu, &flags);
- if (!core_rq) {
- for_each_cpu(i, smt_mask) {
- rq = cpu_rq(i);
- if (rq->core && rq->core == rq)
- core_rq = rq;
+ WARN_ON_ONCE(rq->core != rq);
+
+ /* if we're the first, we'll be our own leader */
+ if (cpumask_weight(smt_mask) == 1)
+ goto unlock;
+
+ /* find the leader */
+ for_each_cpu(t, smt_mask) {
+ if (t == cpu)
+ continue;
+ rq = cpu_rq(t);
+ if (rq->core == rq) {
+ core_rq = rq;
+ break;
}
+ }
- if (!core_rq)
- core_rq = cpu_rq(cpu);
+ if (WARN_ON_ONCE(!core_rq)) /* whoopsie */
+ goto unlock;
- for_each_cpu(i, smt_mask) {
- rq = cpu_rq(i);
+ /* install and validate core_rq */
+ for_each_cpu(t, smt_mask) {
+ rq = cpu_rq(t);
- WARN_ON_ONCE(rq->core && rq->core != core_rq);
+ if (t == cpu)
rq->core = core_rq;
- }
+
+ WARN_ON_ONCE(rq->core != core_rq);
+ }
+
+unlock:
+ sched_core_unlock(cpu, &flags);
+}
+
+static void sched_core_cpu_deactivate(unsigned int cpu)
+{
+ const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+ struct rq *rq = cpu_rq(cpu), *core_rq = NULL;
+ unsigned long flags;
+ int t;
+
+ sched_core_lock(cpu, &flags);
+
+ /* if we're the last man standing, nothing to do */
+ if (cpumask_weight(smt_mask) == 1) {
+ WARN_ON_ONCE(rq->core != rq);
+ goto unlock;
+ }
+
+ /* if we're not the leader, nothing to do */
+ if (rq->core != rq)
+ goto unlock;
+
+ /* find a new leader */
+ for_each_cpu(t, smt_mask) {
+ if (t == cpu)
+ continue;
+ core_rq = cpu_rq(t);
+ break;
}
+
+ if (WARN_ON_ONCE(!core_rq)) /* impossible */
+ goto unlock;
+
+ /* copy the shared state to the new leader */
+ core_rq->core_task_seq = rq->core_task_seq;
+ core_rq->core_pick_seq = rq->core_pick_seq;
+ core_rq->core_cookie = rq->core_cookie;
+ core_rq->core_forceidle = rq->core_forceidle;
+ core_rq->core_forceidle_seq = rq->core_forceidle_seq;
+
+ /* install new leader */
+ for_each_cpu(t, smt_mask) {
+ rq = cpu_rq(t);
+ rq->core = core_rq;
+ }
+
+unlock:
+ sched_core_unlock(cpu, &flags);
}
+
+static inline void sched_core_cpu_dying(unsigned int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+
+ if (rq->core != rq)
+ rq->core = rq;
+}
+
#else /* !CONFIG_SCHED_CORE */
static inline void sched_core_cpu_starting(unsigned int cpu) {}
+static inline void sched_core_cpu_deactivate(unsigned int cpu) {}
+static inline void sched_core_cpu_dying(unsigned int cpu) {}
static struct task_struct *
pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
@@ -5775,6 +6106,24 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
#endif /* CONFIG_SCHED_CORE */
/*
+ * Constants for the sched_mode argument of __schedule().
+ *
+ * The mode argument allows RT enabled kernels to differentiate a
+ * preemption from blocking on an 'sleeping' spin/rwlock. Note that
+ * SM_MASK_PREEMPT for !RT has all bits set, which allows the compiler to
+ * optimize the AND operation out and just check for zero.
+ */
+#define SM_NONE 0x0
+#define SM_PREEMPT 0x1
+#define SM_RTLOCK_WAIT 0x2
+
+#ifndef CONFIG_PREEMPT_RT
+# define SM_MASK_PREEMPT (~0U)
+#else
+# define SM_MASK_PREEMPT SM_PREEMPT
+#endif
+
+/*
* __schedule() is the main scheduler function.
*
* The main means of driving the scheduler and thus entering this function are:
@@ -5813,7 +6162,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
*
* WARNING: must be called with preemption disabled!
*/
-static void __sched notrace __schedule(bool preempt)
+static void __sched notrace __schedule(unsigned int sched_mode)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
@@ -5826,13 +6175,13 @@ static void __sched notrace __schedule(bool preempt)
rq = cpu_rq(cpu);
prev = rq->curr;
- schedule_debug(prev, preempt);
+ schedule_debug(prev, !!sched_mode);
if (sched_feat(HRTICK) || sched_feat(HRTICK_DL))
hrtick_clear(rq);
local_irq_disable();
- rcu_note_context_switch(preempt);
+ rcu_note_context_switch(!!sched_mode);
/*
* Make sure that signal_pending_state()->signal_pending() below
@@ -5866,7 +6215,7 @@ static void __sched notrace __schedule(bool preempt)
* - ptrace_{,un}freeze_traced() can change ->state underneath us.
*/
prev_state = READ_ONCE(prev->__state);
- if (!preempt && prev_state) {
+ if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) {
if (signal_pending_state(prev_state, prev)) {
WRITE_ONCE(prev->__state, TASK_RUNNING);
} else {
@@ -5932,7 +6281,7 @@ static void __sched notrace __schedule(bool preempt)
migrate_disable_switch(rq, prev);
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
- trace_sched_switch(preempt, prev, next);
+ trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next);
/* Also unlocks the rq: */
rq = context_switch(rq, prev, next, &rf);
@@ -5953,7 +6302,7 @@ void __noreturn do_task_dead(void)
/* Tell freezer to ignore us: */
current->flags |= PF_NOFREEZE;
- __schedule(false);
+ __schedule(SM_NONE);
BUG();
/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
@@ -6014,7 +6363,7 @@ asmlinkage __visible void __sched schedule(void)
sched_submit_work(tsk);
do {
preempt_disable();
- __schedule(false);
+ __schedule(SM_NONE);
sched_preempt_enable_no_resched();
} while (need_resched());
sched_update_worker(tsk);
@@ -6042,7 +6391,7 @@ void __sched schedule_idle(void)
*/
WARN_ON_ONCE(current->__state);
do {
- __schedule(false);
+ __schedule(SM_NONE);
} while (need_resched());
}
@@ -6077,6 +6426,18 @@ void __sched schedule_preempt_disabled(void)
preempt_disable();
}
+#ifdef CONFIG_PREEMPT_RT
+void __sched notrace schedule_rtlock(void)
+{
+ do {
+ preempt_disable();
+ __schedule(SM_RTLOCK_WAIT);
+ sched_preempt_enable_no_resched();
+ } while (need_resched());
+}
+NOKPROBE_SYMBOL(schedule_rtlock);
+#endif
+
static void __sched notrace preempt_schedule_common(void)
{
do {
@@ -6095,7 +6456,7 @@ static void __sched notrace preempt_schedule_common(void)
*/
preempt_disable_notrace();
preempt_latency_start(1);
- __schedule(true);
+ __schedule(SM_PREEMPT);
preempt_latency_stop(1);
preempt_enable_no_resched_notrace();
@@ -6174,7 +6535,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
* an infinite recursion.
*/
prev_ctx = exception_enter();
- __schedule(true);
+ __schedule(SM_PREEMPT);
exception_exit(prev_ctx);
preempt_latency_stop(1);
@@ -6323,7 +6684,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
do {
preempt_disable();
local_irq_enable();
- __schedule(true);
+ __schedule(SM_PREEMPT);
local_irq_disable();
sched_preempt_enable_no_resched();
} while (need_resched());
@@ -7300,6 +7661,16 @@ err_size:
return -E2BIG;
}
+static void get_params(struct task_struct *p, struct sched_attr *attr)
+{
+ if (task_has_dl_policy(p))
+ __getparam_dl(p, attr);
+ else if (task_has_rt_policy(p))
+ attr->sched_priority = p->rt_priority;
+ else
+ attr->sched_nice = task_nice(p);
+}
+
/**
* sys_sched_setscheduler - set/change the scheduler policy and RT priority
* @pid: the pid in question.
@@ -7361,6 +7732,8 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
rcu_read_unlock();
if (likely(p)) {
+ if (attr.sched_flags & SCHED_FLAG_KEEP_PARAMS)
+ get_params(p, &attr);
retval = sched_setattr(p, &attr);
put_task_struct(p);
}
@@ -7509,12 +7882,8 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
kattr.sched_policy = p->policy;
if (p->sched_reset_on_fork)
kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
- if (task_has_dl_policy(p))
- __getparam_dl(p, &kattr);
- else if (task_has_rt_policy(p))
- kattr.sched_priority = p->rt_priority;
- else
- kattr.sched_nice = task_nice(p);
+ get_params(p, &kattr);
+ kattr.sched_flags &= SCHED_FLAG_ALL;
#ifdef CONFIG_UCLAMP_TASK
/*
@@ -7535,9 +7904,76 @@ out_unlock:
return retval;
}
-long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
+#ifdef CONFIG_SMP
+int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
{
+ int ret = 0;
+
+ /*
+ * If the task isn't a deadline task or admission control is
+ * disabled then we don't care about affinity changes.
+ */
+ if (!task_has_dl_policy(p) || !dl_bandwidth_enabled())
+ return 0;
+
+ /*
+ * Since bandwidth control happens on root_domain basis,
+ * if admission test is enabled, we only admit -deadline
+ * tasks allowed to run on all the CPUs in the task's
+ * root_domain.
+ */
+ rcu_read_lock();
+ if (!cpumask_subset(task_rq(p)->rd->span, mask))
+ ret = -EBUSY;
+ rcu_read_unlock();
+ return ret;
+}
+#endif
+
+static int
+__sched_setaffinity(struct task_struct *p, const struct cpumask *mask)
+{
+ int retval;
cpumask_var_t cpus_allowed, new_mask;
+
+ if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL))
+ return -ENOMEM;
+
+ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
+ retval = -ENOMEM;
+ goto out_free_cpus_allowed;
+ }
+
+ cpuset_cpus_allowed(p, cpus_allowed);
+ cpumask_and(new_mask, mask, cpus_allowed);
+
+ retval = dl_task_check_affinity(p, new_mask);
+ if (retval)
+ goto out_free_new_mask;
+again:
+ retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK | SCA_USER);
+ if (retval)
+ goto out_free_new_mask;
+
+ cpuset_cpus_allowed(p, cpus_allowed);
+ if (!cpumask_subset(new_mask, cpus_allowed)) {
+ /*
+ * We must have raced with a concurrent cpuset update.
+ * Just reset the cpumask to the cpuset's cpus_allowed.
+ */
+ cpumask_copy(new_mask, cpus_allowed);
+ goto again;
+ }
+
+out_free_new_mask:
+ free_cpumask_var(new_mask);
+out_free_cpus_allowed:
+ free_cpumask_var(cpus_allowed);
+ return retval;
+}
+
+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
+{
struct task_struct *p;
int retval;
@@ -7557,68 +7993,22 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
retval = -EINVAL;
goto out_put_task;
}
- if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
- retval = -ENOMEM;
- goto out_put_task;
- }
- if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
- retval = -ENOMEM;
- goto out_free_cpus_allowed;
- }
- retval = -EPERM;
+
if (!check_same_owner(p)) {
rcu_read_lock();
if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
rcu_read_unlock();
- goto out_free_new_mask;
+ retval = -EPERM;
+ goto out_put_task;
}
rcu_read_unlock();
}
retval = security_task_setscheduler(p);
if (retval)
- goto out_free_new_mask;
-
-
- cpuset_cpus_allowed(p, cpus_allowed);
- cpumask_and(new_mask, in_mask, cpus_allowed);
-
- /*
- * Since bandwidth control happens on root_domain basis,
- * if admission test is enabled, we only admit -deadline
- * tasks allowed to run on all the CPUs in the task's
- * root_domain.
- */
-#ifdef CONFIG_SMP
- if (task_has_dl_policy(p) && dl_bandwidth_enabled()) {
- rcu_read_lock();
- if (!cpumask_subset(task_rq(p)->rd->span, new_mask)) {
- retval = -EBUSY;
- rcu_read_unlock();
- goto out_free_new_mask;
- }
- rcu_read_unlock();
- }
-#endif
-again:
- retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK);
+ goto out_put_task;
- if (!retval) {
- cpuset_cpus_allowed(p, cpus_allowed);
- if (!cpumask_subset(new_mask, cpus_allowed)) {
- /*
- * We must have raced with a concurrent cpuset
- * update. Just reset the cpus_allowed to the
- * cpuset's cpus_allowed
- */
- cpumask_copy(new_mask, cpus_allowed);
- goto again;
- }
- }
-out_free_new_mask:
- free_cpumask_var(new_mask);
-out_free_cpus_allowed:
- free_cpumask_var(cpus_allowed);
+ retval = __sched_setaffinity(p, in_mask);
out_put_task:
put_task_struct(p);
return retval;
@@ -7761,6 +8151,17 @@ int __sched __cond_resched(void)
preempt_schedule_common();
return 1;
}
+ /*
+ * In preemptible kernels, ->rcu_read_lock_nesting tells the tick
+ * whether the current CPU is in an RCU read-side critical section,
+ * so the tick can report quiescent states even for CPUs looping
+ * in kernel context. In contrast, in non-preemptible kernels,
+ * RCU readers leave no in-memory hints, which means that CPU-bound
+ * processes executing in kernel context might never report an
+ * RCU quiescent state. Therefore, the following code causes
+ * cond_resched() to report a quiescent state, but only when RCU
+ * is in urgent need of one.
+ */
#ifndef CONFIG_PREEMPT_RCU
rcu_all_qs();
#endif
@@ -8707,6 +9108,8 @@ int sched_cpu_deactivate(unsigned int cpu)
*/
if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
static_branch_dec_cpuslocked(&sched_smt_present);
+
+ sched_core_cpu_deactivate(cpu);
#endif
if (!sched_smp_initialized)
@@ -8811,6 +9214,7 @@ int sched_cpu_dying(unsigned int cpu)
calc_load_migrate(rq);
update_max_interval();
hrtick_clear(rq);
+ sched_core_cpu_dying(cpu);
return 0;
}
#endif
@@ -9022,7 +9426,7 @@ void __init sched_init(void)
atomic_set(&rq->nr_iowait, 0);
#ifdef CONFIG_SCHED_CORE
- rq->core = NULL;
+ rq->core = rq;
rq->core_pick = NULL;
rq->core_enabled = 0;
rq->core_tree = RB_ROOT;
@@ -9804,7 +10208,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
* Prevent race between setting of cfs_rq->runtime_enabled and
* unthrottle_offline_cfs_rqs().
*/
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&cfs_constraints_mutex);
ret = __cfs_schedulable(tg, period, quota);
if (ret)
@@ -9848,7 +10252,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
cfs_bandwidth_usage_dec();
out_unlock:
mutex_unlock(&cfs_constraints_mutex);
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
@@ -10099,6 +10503,20 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
}
#endif /* CONFIG_RT_GROUP_SCHED */
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static s64 cpu_idle_read_s64(struct cgroup_subsys_state *css,
+ struct cftype *cft)
+{
+ return css_tg(css)->idle;
+}
+
+static int cpu_idle_write_s64(struct cgroup_subsys_state *css,
+ struct cftype *cft, s64 idle)
+{
+ return sched_group_set_idle(css_tg(css), idle);
+}
+#endif
+
static struct cftype cpu_legacy_files[] = {
#ifdef CONFIG_FAIR_GROUP_SCHED
{
@@ -10106,6 +10524,11 @@ static struct cftype cpu_legacy_files[] = {
.read_u64 = cpu_shares_read_u64,
.write_u64 = cpu_shares_write_u64,
},
+ {
+ .name = "idle",
+ .read_s64 = cpu_idle_read_s64,
+ .write_s64 = cpu_idle_write_s64,
+ },
#endif
#ifdef CONFIG_CFS_BANDWIDTH
{
@@ -10313,6 +10736,12 @@ static struct cftype cpu_files[] = {
.read_s64 = cpu_weight_nice_read_s64,
.write_s64 = cpu_weight_nice_write_s64,
},
+ {
+ .name = "idle",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .read_s64 = cpu_idle_read_s64,
+ .write_s64 = cpu_idle_write_s64,
+ },
#endif
#ifdef CONFIG_CFS_BANDWIDTH
{
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 57124614363d..e7af18857371 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -537,9 +537,17 @@ static struct attribute *sugov_attrs[] = {
};
ATTRIBUTE_GROUPS(sugov);
+static void sugov_tunables_free(struct kobject *kobj)
+{
+ struct gov_attr_set *attr_set = container_of(kobj, struct gov_attr_set, kobj);
+
+ kfree(to_sugov_tunables(attr_set));
+}
+
static struct kobj_type sugov_tunables_ktype = {
.default_groups = sugov_groups,
.sysfs_ops = &governor_sysfs_ops,
+ .release = &sugov_tunables_free,
};
/********************** cpufreq governor interface *********************/
@@ -639,12 +647,10 @@ static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_polic
return tunables;
}
-static void sugov_tunables_free(struct sugov_tunables *tunables)
+static void sugov_clear_global_tunables(void)
{
if (!have_governor_per_policy())
global_tunables = NULL;
-
- kfree(tunables);
}
static int sugov_init(struct cpufreq_policy *policy)
@@ -707,7 +713,7 @@ out:
fail:
kobject_put(&tunables->attr_set.kobj);
policy->governor_data = NULL;
- sugov_tunables_free(tunables);
+ sugov_clear_global_tunables();
stop_kthread:
sugov_kthread_stop(sg_policy);
@@ -734,7 +740,7 @@ static void sugov_exit(struct cpufreq_policy *policy)
count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
policy->governor_data = NULL;
if (!count)
- sugov_tunables_free(tunables);
+ sugov_clear_global_tunables();
mutex_unlock(&global_tunables_lock);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index aaacd6cfd42f..e94314633b39 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1733,6 +1733,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
*/
raw_spin_rq_lock(rq);
if (p->dl.dl_non_contending) {
+ update_rq_clock(rq);
sub_running_bw(&p->dl, &rq->dl);
p->dl.dl_non_contending = 0;
/*
@@ -2741,7 +2742,7 @@ void __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
dl_se->dl_runtime = attr->sched_runtime;
dl_se->dl_deadline = attr->sched_deadline;
dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
- dl_se->flags = attr->sched_flags;
+ dl_se->flags = attr->sched_flags & SCHED_DL_FLAGS;
dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
}
@@ -2754,7 +2755,8 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
attr->sched_runtime = dl_se->dl_runtime;
attr->sched_deadline = dl_se->dl_deadline;
attr->sched_period = dl_se->dl_period;
- attr->sched_flags = dl_se->flags;
+ attr->sched_flags &= ~SCHED_DL_FLAGS;
+ attr->sched_flags |= dl_se->flags;
}
/*
@@ -2851,7 +2853,7 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
if (dl_se->dl_runtime != attr->sched_runtime ||
dl_se->dl_deadline != attr->sched_deadline ||
dl_se->dl_period != attr->sched_period ||
- dl_se->flags != attr->sched_flags)
+ dl_se->flags != (attr->sched_flags & SCHED_DL_FLAGS))
return true;
return false;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 0c5ec2776ddf..49716228efb4 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -388,6 +388,13 @@ void update_sched_domain_debugfs(void)
{
int cpu, i;
+ /*
+ * This can unfortunately be invoked before sched_debug_init() creates
+ * the debug directory. Don't touch sd_sysctl_cpus until then.
+ */
+ if (!debugfs_sched)
+ return;
+
if (!cpumask_available(sd_sysctl_cpus)) {
if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
return;
@@ -600,6 +607,9 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
cfs_rq->nr_spread_over);
SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
+ SEQ_printf(m, " .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running);
+ SEQ_printf(m, " .%-30s: %d\n", "idle_h_nr_running",
+ cfs_rq->idle_h_nr_running);
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
#ifdef CONFIG_SMP
SEQ_printf(m, " .%-30s: %lu\n", "load_avg",
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 44c452072a1b..ff69f245b939 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -431,6 +431,23 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
}
}
+static int tg_is_idle(struct task_group *tg)
+{
+ return tg->idle > 0;
+}
+
+static int cfs_rq_is_idle(struct cfs_rq *cfs_rq)
+{
+ return cfs_rq->idle > 0;
+}
+
+static int se_is_idle(struct sched_entity *se)
+{
+ if (entity_is_task(se))
+ return task_has_idle_policy(task_of(se));
+ return cfs_rq_is_idle(group_cfs_rq(se));
+}
+
#else /* !CONFIG_FAIR_GROUP_SCHED */
#define for_each_sched_entity(se) \
@@ -468,6 +485,21 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
{
}
+static inline int tg_is_idle(struct task_group *tg)
+{
+ return 0;
+}
+
+static int cfs_rq_is_idle(struct cfs_rq *cfs_rq)
+{
+ return 0;
+}
+
+static int se_is_idle(struct sched_entity *se)
+{
+ return 0;
+}
+
#endif /* CONFIG_FAIR_GROUP_SCHED */
static __always_inline
@@ -1486,7 +1518,7 @@ static inline bool is_core_idle(int cpu)
if (cpu == sibling)
continue;
- if (!idle_cpu(cpu))
+ if (!idle_cpu(sibling))
return false;
}
#endif
@@ -4841,6 +4873,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP);
+ if (cfs_rq_is_idle(group_cfs_rq(se)))
+ idle_task_delta = cfs_rq->h_nr_running;
+
qcfs_rq->h_nr_running -= task_delta;
qcfs_rq->idle_h_nr_running -= idle_task_delta;
@@ -4860,6 +4895,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
update_load_avg(qcfs_rq, se, 0);
se_update_runnable(se);
+ if (cfs_rq_is_idle(group_cfs_rq(se)))
+ idle_task_delta = cfs_rq->h_nr_running;
+
qcfs_rq->h_nr_running -= task_delta;
qcfs_rq->idle_h_nr_running -= idle_task_delta;
}
@@ -4904,39 +4942,45 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
task_delta = cfs_rq->h_nr_running;
idle_task_delta = cfs_rq->idle_h_nr_running;
for_each_sched_entity(se) {
+ struct cfs_rq *qcfs_rq = cfs_rq_of(se);
+
if (se->on_rq)
break;
- cfs_rq = cfs_rq_of(se);
- enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
+ enqueue_entity(qcfs_rq, se, ENQUEUE_WAKEUP);
+
+ if (cfs_rq_is_idle(group_cfs_rq(se)))
+ idle_task_delta = cfs_rq->h_nr_running;
- cfs_rq->h_nr_running += task_delta;
- cfs_rq->idle_h_nr_running += idle_task_delta;
+ qcfs_rq->h_nr_running += task_delta;
+ qcfs_rq->idle_h_nr_running += idle_task_delta;
/* end evaluation on encountering a throttled cfs_rq */
- if (cfs_rq_throttled(cfs_rq))
+ if (cfs_rq_throttled(qcfs_rq))
goto unthrottle_throttle;
}
for_each_sched_entity(se) {
- cfs_rq = cfs_rq_of(se);
+ struct cfs_rq *qcfs_rq = cfs_rq_of(se);
- update_load_avg(cfs_rq, se, UPDATE_TG);
+ update_load_avg(qcfs_rq, se, UPDATE_TG);
se_update_runnable(se);
- cfs_rq->h_nr_running += task_delta;
- cfs_rq->idle_h_nr_running += idle_task_delta;
+ if (cfs_rq_is_idle(group_cfs_rq(se)))
+ idle_task_delta = cfs_rq->h_nr_running;
+ qcfs_rq->h_nr_running += task_delta;
+ qcfs_rq->idle_h_nr_running += idle_task_delta;
/* end evaluation on encountering a throttled cfs_rq */
- if (cfs_rq_throttled(cfs_rq))
+ if (cfs_rq_throttled(qcfs_rq))
goto unthrottle_throttle;
/*
* One parent has been throttled and cfs_rq removed from the
* list. Add it back to not break the leaf list.
*/
- if (throttled_hierarchy(cfs_rq))
- list_add_leaf_cfs_rq(cfs_rq);
+ if (throttled_hierarchy(qcfs_rq))
+ list_add_leaf_cfs_rq(qcfs_rq);
}
/* At this point se is NULL and we are at root level*/
@@ -4949,9 +4993,9 @@ unthrottle_throttle:
* assertion below.
*/
for_each_sched_entity(se) {
- cfs_rq = cfs_rq_of(se);
+ struct cfs_rq *qcfs_rq = cfs_rq_of(se);
- if (list_add_leaf_cfs_rq(cfs_rq))
+ if (list_add_leaf_cfs_rq(qcfs_rq))
break;
}
@@ -5574,6 +5618,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cfs_rq->h_nr_running++;
cfs_rq->idle_h_nr_running += idle_h_nr_running;
+ if (cfs_rq_is_idle(cfs_rq))
+ idle_h_nr_running = 1;
+
/* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq))
goto enqueue_throttle;
@@ -5591,6 +5638,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cfs_rq->h_nr_running++;
cfs_rq->idle_h_nr_running += idle_h_nr_running;
+ if (cfs_rq_is_idle(cfs_rq))
+ idle_h_nr_running = 1;
+
/* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq))
goto enqueue_throttle;
@@ -5668,6 +5718,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cfs_rq->h_nr_running--;
cfs_rq->idle_h_nr_running -= idle_h_nr_running;
+ if (cfs_rq_is_idle(cfs_rq))
+ idle_h_nr_running = 1;
+
/* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq))
goto dequeue_throttle;
@@ -5697,6 +5750,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cfs_rq->h_nr_running--;
cfs_rq->idle_h_nr_running -= idle_h_nr_running;
+ if (cfs_rq_is_idle(cfs_rq))
+ idle_h_nr_running = 1;
+
/* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq))
goto dequeue_throttle;
@@ -6249,7 +6305,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
time = cpu_clock(this);
}
- for_each_cpu_wrap(cpu, cpus, target) {
+ for_each_cpu_wrap(cpu, cpus, target + 1) {
if (has_idle_core) {
i = select_idle_core(p, cpu, cpus, &idle_cpu);
if ((unsigned int)i < nr_cpumask_bits)
@@ -6376,6 +6432,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
/* Check a recently used CPU as a potential idle candidate: */
recent_used_cpu = p->recent_used_cpu;
+ p->recent_used_cpu = prev;
if (recent_used_cpu != prev &&
recent_used_cpu != target &&
cpus_share_cache(recent_used_cpu, target) &&
@@ -6902,9 +6959,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
} else if (wake_flags & WF_TTWU) { /* XXX always ? */
/* Fast path */
new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
-
- if (want_affine)
- current->recent_used_cpu = cpu;
}
rcu_read_unlock();
@@ -7041,24 +7095,22 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
static void set_last_buddy(struct sched_entity *se)
{
- if (entity_is_task(se) && unlikely(task_has_idle_policy(task_of(se))))
- return;
-
for_each_sched_entity(se) {
if (SCHED_WARN_ON(!se->on_rq))
return;
+ if (se_is_idle(se))
+ return;
cfs_rq_of(se)->last = se;
}
}
static void set_next_buddy(struct sched_entity *se)
{
- if (entity_is_task(se) && unlikely(task_has_idle_policy(task_of(se))))
- return;
-
for_each_sched_entity(se) {
if (SCHED_WARN_ON(!se->on_rq))
return;
+ if (se_is_idle(se))
+ return;
cfs_rq_of(se)->next = se;
}
}
@@ -7079,6 +7131,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
int scale = cfs_rq->nr_running >= sched_nr_latency;
int next_buddy_marked = 0;
+ int cse_is_idle, pse_is_idle;
if (unlikely(se == pse))
return;
@@ -7123,8 +7176,21 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
return;
find_matching_se(&se, &pse);
- update_curr(cfs_rq_of(se));
BUG_ON(!pse);
+
+ cse_is_idle = se_is_idle(se);
+ pse_is_idle = se_is_idle(pse);
+
+ /*
+ * Preempt an idle group in favor of a non-idle group (and don't preempt
+ * in the inverse case).
+ */
+ if (cse_is_idle && !pse_is_idle)
+ goto preempt;
+ if (cse_is_idle != pse_is_idle)
+ return;
+
+ update_curr(cfs_rq_of(se));
if (wakeup_preempt_entity(se, pse) == 1) {
/*
* Bias pick_next to pick the sched entity that is
@@ -10217,9 +10283,11 @@ static inline int on_null_domain(struct rq *rq)
static inline int find_new_ilb(void)
{
int ilb;
+ const struct cpumask *hk_mask;
+
+ hk_mask = housekeeping_cpumask(HK_FLAG_MISC);
- for_each_cpu_and(ilb, nohz.idle_cpus_mask,
- housekeeping_cpumask(HK_FLAG_MISC)) {
+ for_each_cpu_and(ilb, nohz.idle_cpus_mask, hk_mask) {
if (ilb == smp_processor_id())
continue;
@@ -11416,10 +11484,12 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
static DEFINE_MUTEX(shares_mutex);
-int sched_group_set_shares(struct task_group *tg, unsigned long shares)
+static int __sched_group_set_shares(struct task_group *tg, unsigned long shares)
{
int i;
+ lockdep_assert_held(&shares_mutex);
+
/*
* We can't change the weight of the root cgroup.
*/
@@ -11428,9 +11498,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES));
- mutex_lock(&shares_mutex);
if (tg->shares == shares)
- goto done;
+ return 0;
tg->shares = shares;
for_each_possible_cpu(i) {
@@ -11448,10 +11517,88 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
rq_unlock_irqrestore(rq, &rf);
}
-done:
+ return 0;
+}
+
+int sched_group_set_shares(struct task_group *tg, unsigned long shares)
+{
+ int ret;
+
+ mutex_lock(&shares_mutex);
+ if (tg_is_idle(tg))
+ ret = -EINVAL;
+ else
+ ret = __sched_group_set_shares(tg, shares);
+ mutex_unlock(&shares_mutex);
+
+ return ret;
+}
+
+int sched_group_set_idle(struct task_group *tg, long idle)
+{
+ int i;
+
+ if (tg == &root_task_group)
+ return -EINVAL;
+
+ if (idle < 0 || idle > 1)
+ return -EINVAL;
+
+ mutex_lock(&shares_mutex);
+
+ if (tg->idle == idle) {
+ mutex_unlock(&shares_mutex);
+ return 0;
+ }
+
+ tg->idle = idle;
+
+ for_each_possible_cpu(i) {
+ struct rq *rq = cpu_rq(i);
+ struct sched_entity *se = tg->se[i];
+ struct cfs_rq *grp_cfs_rq = tg->cfs_rq[i];
+ bool was_idle = cfs_rq_is_idle(grp_cfs_rq);
+ long idle_task_delta;
+ struct rq_flags rf;
+
+ rq_lock_irqsave(rq, &rf);
+
+ grp_cfs_rq->idle = idle;
+ if (WARN_ON_ONCE(was_idle == cfs_rq_is_idle(grp_cfs_rq)))
+ goto next_cpu;
+
+ idle_task_delta = grp_cfs_rq->h_nr_running -
+ grp_cfs_rq->idle_h_nr_running;
+ if (!cfs_rq_is_idle(grp_cfs_rq))
+ idle_task_delta *= -1;
+
+ for_each_sched_entity(se) {
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+ if (!se->on_rq)
+ break;
+
+ cfs_rq->idle_h_nr_running += idle_task_delta;
+
+ /* Already accounted at parent level and above. */
+ if (cfs_rq_is_idle(cfs_rq))
+ break;
+ }
+
+next_cpu:
+ rq_unlock_irqrestore(rq, &rf);
+ }
+
+ /* Idle groups have minimum weight. */
+ if (tg_is_idle(tg))
+ __sched_group_set_shares(tg, scale_load(WEIGHT_IDLEPRIO));
+ else
+ __sched_group_set_shares(tg, NICE_0_LOAD);
+
mutex_unlock(&shares_mutex);
return 0;
}
+
#else /* CONFIG_FAIR_GROUP_SCHED */
void free_fair_sched_group(struct task_group *tg) { }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 14a41a243f7b..3d3e5793e117 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -227,6 +227,8 @@ static inline void update_avg(u64 *avg, u64 sample)
*/
#define SCHED_FLAG_SUGOV 0x10000000
+#define SCHED_DL_FLAGS (SCHED_FLAG_RECLAIM | SCHED_FLAG_DL_OVERRUN | SCHED_FLAG_SUGOV)
+
static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se)
{
#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
@@ -394,6 +396,9 @@ struct task_group {
struct cfs_rq **cfs_rq;
unsigned long shares;
+ /* A positive value indicates that this is a SCHED_IDLE group. */
+ int idle;
+
#ifdef CONFIG_SMP
/*
* load_avg can be heavily contended at clock tick time, so put
@@ -503,6 +508,8 @@ extern void sched_move_task(struct task_struct *tsk);
#ifdef CONFIG_FAIR_GROUP_SCHED
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
+extern int sched_group_set_idle(struct task_group *tg, long idle);
+
#ifdef CONFIG_SMP
extern void set_task_rq_fair(struct sched_entity *se,
struct cfs_rq *prev, struct cfs_rq *next);
@@ -599,6 +606,9 @@ struct cfs_rq {
struct list_head leaf_cfs_rq_list;
struct task_group *tg; /* group that "owns" this runqueue */
+ /* Locally cached copy of our task_group's idle value */
+ int idle;
+
#ifdef CONFIG_CFS_BANDWIDTH
int runtime_enabled;
s64 runtime_remaining;
@@ -1093,7 +1103,7 @@ struct rq {
unsigned int core_sched_seq;
struct rb_root core_tree;
- /* shared state */
+ /* shared state -- careful with sched_core_cpu_deactivate() */
unsigned int core_task_seq;
unsigned int core_pick_seq;
unsigned long core_cookie;
@@ -2234,6 +2244,7 @@ extern struct task_struct *pick_next_task_idle(struct rq *rq);
#define SCA_CHECK 0x01
#define SCA_MIGRATE_DISABLE 0x02
#define SCA_MIGRATE_ENABLE 0x04
+#define SCA_USER 0x08
#ifdef CONFIG_SMP
@@ -2255,6 +2266,9 @@ static inline struct task_struct *get_push_task(struct rq *rq)
if (p->nr_cpus_allowed == 1)
return NULL;
+ if (p->migration_disabled)
+ return NULL;
+
rq->push_busy = true;
return get_task_struct(p);
}
@@ -2385,6 +2399,21 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
extern const_debug unsigned int sysctl_sched_nr_migrate;
extern const_debug unsigned int sysctl_sched_migration_cost;
+#ifdef CONFIG_SCHED_DEBUG
+extern unsigned int sysctl_sched_latency;
+extern unsigned int sysctl_sched_min_granularity;
+extern unsigned int sysctl_sched_wakeup_granularity;
+extern int sysctl_resched_latency_warn_ms;
+extern int sysctl_resched_latency_warn_once;
+
+extern unsigned int sysctl_sched_tunable_scaling;
+
+extern unsigned int sysctl_numa_balancing_scan_delay;
+extern unsigned int sysctl_numa_balancing_scan_period_min;
+extern unsigned int sysctl_numa_balancing_scan_period_max;
+extern unsigned int sysctl_numa_balancing_scan_size;
+#endif
+
#ifdef CONFIG_SCHED_HRTICK
/*
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index b77ad49dc14f..4e8698e62f07 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1482,6 +1482,8 @@ int sched_max_numa_distance;
static int *sched_domains_numa_distance;
static struct cpumask ***sched_domains_numa_masks;
int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE;
+
+static unsigned long __read_mostly *sched_numa_onlined_nodes;
#endif
/*
@@ -1833,6 +1835,16 @@ void sched_init_numa(void)
sched_domains_numa_masks[i][j] = mask;
for_each_node(k) {
+ /*
+ * Distance information can be unreliable for
+ * offline nodes, defer building the node
+ * masks to its bringup.
+ * This relies on all unique distance values
+ * still being visible at init time.
+ */
+ if (!node_online(j))
+ continue;
+
if (sched_debug() && (node_distance(j, k) != node_distance(k, j)))
sched_numa_warn("Node-distance not symmetric");
@@ -1886,6 +1898,53 @@ void sched_init_numa(void)
sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1];
init_numa_topology_type();
+
+ sched_numa_onlined_nodes = bitmap_alloc(nr_node_ids, GFP_KERNEL);
+ if (!sched_numa_onlined_nodes)
+ return;
+
+ bitmap_zero(sched_numa_onlined_nodes, nr_node_ids);
+ for_each_online_node(i)
+ bitmap_set(sched_numa_onlined_nodes, i, 1);
+}
+
+static void __sched_domains_numa_masks_set(unsigned int node)
+{
+ int i, j;
+
+ /*
+ * NUMA masks are not built for offline nodes in sched_init_numa().
+ * Thus, when a CPU of a never-onlined-before node gets plugged in,
+ * adding that new CPU to the right NUMA masks is not sufficient: the
+ * masks of that CPU's node must also be updated.
+ */
+ if (test_bit(node, sched_numa_onlined_nodes))
+ return;
+
+ bitmap_set(sched_numa_onlined_nodes, node, 1);
+
+ for (i = 0; i < sched_domains_numa_levels; i++) {
+ for (j = 0; j < nr_node_ids; j++) {
+ if (!node_online(j) || node == j)
+ continue;
+
+ if (node_distance(j, node) > sched_domains_numa_distance[i])
+ continue;
+
+ /* Add remote nodes in our masks */
+ cpumask_or(sched_domains_numa_masks[i][node],
+ sched_domains_numa_masks[i][node],
+ sched_domains_numa_masks[0][j]);
+ }
+ }
+
+ /*
+ * A new node has been brought up, potentially changing the topology
+ * classification.
+ *
+ * Note that this is racy vs any use of sched_numa_topology_type :/
+ */
+ init_numa_topology_type();
}
void sched_domains_numa_masks_set(unsigned int cpu)
@@ -1893,8 +1952,14 @@ void sched_domains_numa_masks_set(unsigned int cpu)
int node = cpu_to_node(cpu);
int i, j;
+ __sched_domains_numa_masks_set(node);
+
for (i = 0; i < sched_domains_numa_levels; i++) {
for (j = 0; j < nr_node_ids; j++) {
+ if (!node_online(j))
+ continue;
+
+ /* Set ourselves in the remote node's masks */
if (node_distance(j, node) <= sched_domains_numa_distance[i])
cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);
}
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 057e17f3215d..6469eca8078c 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -602,7 +602,7 @@ static inline void seccomp_sync_threads(unsigned long flags)
smp_store_release(&thread->seccomp.filter,
caller->seccomp.filter);
atomic_set(&thread->seccomp.filter_count,
- atomic_read(&thread->seccomp.filter_count));
+ atomic_read(&caller->seccomp.filter_count));
/*
* Don't let an unprivileged task work around
diff --git a/kernel/signal.c b/kernel/signal.c
index a3229add4455..52b6abec0ff8 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1413,6 +1413,21 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
return sighand;
}
+#ifdef CONFIG_LOCKDEP
+void lockdep_assert_task_sighand_held(struct task_struct *task)
+{
+ struct sighand_struct *sighand;
+
+ rcu_read_lock();
+ sighand = rcu_dereference(task->sighand);
+ if (sighand)
+ lockdep_assert_held(&sighand->siglock);
+ else
+ WARN_ON_ONCE(1);
+ rcu_read_unlock();
+}
+#endif
+
/*
* send signal info to all the members of a group
*/
diff --git a/kernel/smp.c b/kernel/smp.c
index 52bf159ec400..f43ede0ab183 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -764,7 +764,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
EXPORT_SYMBOL(smp_call_function_single);
/**
- * smp_call_function_single_async(): Run an asynchronous function on a
+ * smp_call_function_single_async() - Run an asynchronous function on a
* specific CPU.
* @cpu: The CPU to run on.
* @csd: Pre-allocated and setup data structure
@@ -783,6 +783,8 @@ EXPORT_SYMBOL(smp_call_function_single);
*
* NOTE: Be careful, there is unfortunately no current debugging facility to
* validate the correctness of this serialization.
+ *
+ * Return: %0 on success or negative errno value on error
*/
int smp_call_function_single_async(int cpu, struct __call_single_data *csd)
{
@@ -974,7 +976,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
* @mask: The set of cpus to run on (only runs on online subset).
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
- * @flags: Bitmask that controls the operation. If %SCF_WAIT is set, wait
+ * @wait: Bitmask that controls the operation. If %SCF_WAIT is set, wait
* (atomically) until function has completed on other CPUs. If
* %SCF_RUN_LOCAL is set, the function will also be run locally
* if the local CPU is set in the @cpumask.
@@ -1180,7 +1182,13 @@ void wake_up_all_idle_cpus(void)
EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);
/**
- * smp_call_on_cpu - Call a function on a specific cpu
+ * struct smp_call_on_cpu_struct - Call a function on a specific CPU
+ * @work: &work_struct
+ * @done: &completion to signal
+ * @func: function to call
+ * @data: function's data argument
+ * @ret: return value from @func
+ * @cpu: target CPU (%-1 for any CPU)
*
* Used to call a function on a specific cpu and wait for it to return.
* Optionally make sure the call is done on a specified physical cpu via vcpu
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index cf6acab78538..f6bc0bc8a2aa 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -291,7 +291,7 @@ int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
unsigned int cpu;
int ret = 0;
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&smpboot_threads_lock);
for_each_online_cpu(cpu) {
ret = __smpboot_create_thread(plug_thread, cpu);
@@ -304,7 +304,7 @@ int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
list_add(&plug_thread->list, &hotplug_threads);
out:
mutex_unlock(&smpboot_threads_lock);
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread);
@@ -317,12 +317,12 @@ EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread);
*/
void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread)
{
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&smpboot_threads_lock);
list_del(&plug_thread->list);
smpboot_destroy_threads(plug_thread);
mutex_unlock(&smpboot_threads_lock);
- put_online_cpus();
+ cpus_read_unlock();
}
EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index f3a012179f47..322b65d45676 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -422,7 +422,7 @@ static inline void invoke_softirq(void)
if (ksoftirqd_running(local_softirq_pending()))
return;
- if (!force_irqthreads || !__this_cpu_read(ksoftirqd)) {
+ if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) {
#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
/*
* We can safely execute softirq on the current stack if
diff --git a/kernel/sys.c b/kernel/sys.c
index ef1a78f5d71c..72c7639e3c98 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -480,7 +480,8 @@ static int set_user(struct cred *new)
* failure to the execve() stage.
*/
if (is_ucounts_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) &&
- new_user != INIT_USER)
+ new_user != INIT_USER &&
+ !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
current->flags |= PF_NPROC_EXCEEDED;
else
current->flags &= ~PF_NPROC_EXCEEDED;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 272f4a272f8c..25e49b4d8049 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -536,6 +536,21 @@ static void proc_put_char(void **buf, size_t *size, char c)
}
}
+static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp,
+ int *valp,
+ int write, void *data)
+{
+ if (write) {
+ *(bool *)valp = *lvalp;
+ } else {
+ int val = *(bool *)valp;
+
+ *lvalp = (unsigned long)val;
+ *negp = false;
+ }
+ return 0;
+}
+
static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
int *valp,
int write, void *data)
@@ -799,6 +814,26 @@ static int do_proc_douintvec(struct ctl_table *table, int write,
}
/**
+ * proc_dobool - read/write a bool
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+ * values from/to the user buffer, treated as an ASCII string.
+ *
+ * Returns 0 on success.
+ */
+int proc_dobool(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ return do_proc_dointvec(table, write, buffer, lenp, ppos,
+ do_proc_dobool_conv, NULL);
+}
+
+/**
* proc_dointvec - read a vector of integers
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
@@ -1630,6 +1665,12 @@ int proc_dostring(struct ctl_table *table, int write,
return -ENOSYS;
}
+int proc_dobool(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+
int proc_dointvec(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
@@ -3425,6 +3466,7 @@ int __init sysctl_init(void)
* No sense putting this after each symbol definition, twice,
* exception granted :-)
*/
+EXPORT_SYMBOL(proc_dobool);
EXPORT_SYMBOL(proc_dointvec);
EXPORT_SYMBOL(proc_douintvec);
EXPORT_SYMBOL(proc_dointvec_jiffies);
diff --git a/kernel/time/clocksource-wdtest.c b/kernel/time/clocksource-wdtest.c
index 01df12395c0e..df922f49d171 100644
--- a/kernel/time/clocksource-wdtest.c
+++ b/kernel/time/clocksource-wdtest.c
@@ -19,6 +19,8 @@
#include <linux/prandom.h>
#include <linux/cpu.h>
+#include "tick-internal.h"
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@kernel.org>");
@@ -34,9 +36,6 @@ static u64 wdtest_jiffies_read(struct clocksource *cs)
return (u64)jiffies;
}
-/* Assume HZ > 100. */
-#define JIFFIES_SHIFT 8
-
static struct clocksource clocksource_wdtest_jiffies = {
.name = "wdtest-jiffies",
.rating = 1, /* lowest valid rating*/
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index b89c76e1c02c..b8a14d2fb5ba 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -306,12 +306,12 @@ void clocksource_verify_percpu(struct clocksource *cs)
return;
cpumask_clear(&cpus_ahead);
cpumask_clear(&cpus_behind);
- get_online_cpus();
+ cpus_read_lock();
preempt_disable();
clocksource_verify_choose_cpus();
if (cpumask_weight(&cpus_chosen) == 0) {
preempt_enable();
- put_online_cpus();
+ cpus_read_unlock();
pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
return;
}
@@ -337,7 +337,7 @@ void clocksource_verify_percpu(struct clocksource *cs)
cs_nsec_min = cs_nsec;
}
preempt_enable();
- put_online_cpus();
+ cpus_read_unlock();
if (!cpumask_empty(&cpus_ahead))
pr_warn(" CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
cpumask_pr_args(&cpus_ahead), testcpu, cs->name);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 4a66725b1d4a..0ea8702eb516 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -652,21 +652,10 @@ static inline int hrtimer_hres_active(void)
return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
}
-/*
- * Reprogram the event source with checking both queues for the
- * next event
- * Called with interrupts disabled and base->lock held
- */
-static void
-hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
+static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base,
+ struct hrtimer *next_timer,
+ ktime_t expires_next)
{
- ktime_t expires_next;
-
- expires_next = hrtimer_update_next_event(cpu_base);
-
- if (skip_equal && expires_next == cpu_base->expires_next)
- return;
-
cpu_base->expires_next = expires_next;
/*
@@ -689,7 +678,25 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
return;
- tick_program_event(cpu_base->expires_next, 1);
+ tick_program_event(expires_next, 1);
+}
+
+/*
+ * Reprogram the event source with checking both queues for the
+ * next event
+ * Called with interrupts disabled and base->lock held
+ */
+static void
+hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
+{
+ ktime_t expires_next;
+
+ expires_next = hrtimer_update_next_event(cpu_base);
+
+ if (skip_equal && expires_next == cpu_base->expires_next)
+ return;
+
+ __hrtimer_reprogram(cpu_base, cpu_base->next_timer, expires_next);
}
/* High resolution timer related functions */
@@ -720,23 +727,7 @@ static inline int hrtimer_is_hres_enabled(void)
return hrtimer_hres_enabled;
}
-/*
- * Retrigger next event is called after clock was set
- *
- * Called with interrupts disabled via on_each_cpu()
- */
-static void retrigger_next_event(void *arg)
-{
- struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
-
- if (!__hrtimer_hres_active(base))
- return;
-
- raw_spin_lock(&base->lock);
- hrtimer_update_base(base);
- hrtimer_force_reprogram(base, 0);
- raw_spin_unlock(&base->lock);
-}
+static void retrigger_next_event(void *arg);
/*
* Switch to high resolution mode
@@ -758,29 +749,54 @@ static void hrtimer_switch_to_hres(void)
retrigger_next_event(NULL);
}
-static void clock_was_set_work(struct work_struct *work)
-{
- clock_was_set();
-}
+#else
-static DECLARE_WORK(hrtimer_work, clock_was_set_work);
+static inline int hrtimer_is_hres_enabled(void) { return 0; }
+static inline void hrtimer_switch_to_hres(void) { }
+#endif /* CONFIG_HIGH_RES_TIMERS */
/*
- * Called from timekeeping and resume code to reprogram the hrtimer
- * interrupt device on all cpus.
+ * Retrigger next event is called after clock was set with interrupts
+ * disabled through an SMP function call or directly from low level
+ * resume code.
+ *
+ * This is only invoked when:
+ * - CONFIG_HIGH_RES_TIMERS is enabled.
+ * - CONFIG_NOHZ_COMMON is enabled
+ *
+ * For the other cases this function is empty and because the call sites
+ * are optimized out it vanishes as well, i.e. no need for lots of
+ * #ifdeffery.
*/
-void clock_was_set_delayed(void)
+static void retrigger_next_event(void *arg)
{
- schedule_work(&hrtimer_work);
-}
-
-#else
+ struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
-static inline int hrtimer_is_hres_enabled(void) { return 0; }
-static inline void hrtimer_switch_to_hres(void) { }
-static inline void retrigger_next_event(void *arg) { }
+ /*
+ * When high resolution mode or nohz is active, then the offsets of
+ * CLOCK_REALTIME/TAI/BOOTTIME have to be updated. Otherwise the
+ * next tick will take care of that.
+ *
+ * If high resolution mode is active then the next expiring timer
+ * must be reevaluated and the clock event device reprogrammed if
+ * necessary.
+ *
+ * In the NOHZ case the update of the offset and the reevaluation
+ * of the next expiring timer is enough. The return from the SMP
+ * function call will take care of the reprogramming in case the
+ * CPU was in a NOHZ idle sleep.
+ */
+ if (!__hrtimer_hres_active(base) && !tick_nohz_active)
+ return;
-#endif /* CONFIG_HIGH_RES_TIMERS */
+ raw_spin_lock(&base->lock);
+ hrtimer_update_base(base);
+ if (__hrtimer_hres_active(base))
+ hrtimer_force_reprogram(base, 0);
+ else
+ hrtimer_update_next_event(base);
+ raw_spin_unlock(&base->lock);
+}
/*
* When a timer is enqueued and expires earlier than the already enqueued
@@ -835,75 +851,161 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
if (base->cpu_base != cpu_base)
return;
+ if (expires >= cpu_base->expires_next)
+ return;
+
/*
- * If the hrtimer interrupt is running, then it will
- * reevaluate the clock bases and reprogram the clock event
- * device. The callbacks are always executed in hard interrupt
- * context so we don't need an extra check for a running
- * callback.
+ * If the hrtimer interrupt is running, then it will reevaluate the
+ * clock bases and reprogram the clock event device.
*/
if (cpu_base->in_hrtirq)
return;
- if (expires >= cpu_base->expires_next)
- return;
-
- /* Update the pointer to the next expiring timer */
cpu_base->next_timer = timer;
- cpu_base->expires_next = expires;
+
+ __hrtimer_reprogram(cpu_base, timer, expires);
+}
+
+static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base,
+ unsigned int active)
+{
+ struct hrtimer_clock_base *base;
+ unsigned int seq;
+ ktime_t expires;
/*
- * If hres is not active, hardware does not have to be
- * programmed yet.
+ * Update the base offsets unconditionally so the following
+ * checks whether the SMP function call is required works.
*
- * If a hang was detected in the last timer interrupt then we
- * do not schedule a timer which is earlier than the expiry
- * which we enforced in the hang detection. We want the system
- * to make progress.
+ * The update is safe even when the remote CPU is in the hrtimer
+ * interrupt or the hrtimer soft interrupt and expiring affected
+ * bases. Either it will see the update before handling a base or
+ * it will see it when it finishes the processing and reevaluates
+ * the next expiring timer.
*/
- if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
- return;
+ seq = cpu_base->clock_was_set_seq;
+ hrtimer_update_base(cpu_base);
+
+ /*
+ * If the sequence did not change over the update then the
+ * remote CPU already handled it.
+ */
+ if (seq == cpu_base->clock_was_set_seq)
+ return false;
+
+ /*
+ * If the remote CPU is currently handling an hrtimer interrupt, it
+ * will reevaluate the first expiring timer of all clock bases
+ * before reprogramming. Nothing to do here.
+ */
+ if (cpu_base->in_hrtirq)
+ return false;
/*
- * Program the timer hardware. We enforce the expiry for
- * events which are already in the past.
+ * Walk the affected clock bases and check whether the first expiring
+ * timer in a clock base is moving ahead of the first expiring timer of
+ * @cpu_base. If so, the IPI must be invoked because per CPU clock
+ * event devices cannot be remotely reprogrammed.
*/
- tick_program_event(expires, 1);
+ active &= cpu_base->active_bases;
+
+ for_each_active_base(base, cpu_base, active) {
+ struct timerqueue_node *next;
+
+ next = timerqueue_getnext(&base->active);
+ expires = ktime_sub(next->expires, base->offset);
+ if (expires < cpu_base->expires_next)
+ return true;
+
+ /* Extra check for softirq clock bases */
+ if (base->clockid < HRTIMER_BASE_MONOTONIC_SOFT)
+ continue;
+ if (cpu_base->softirq_activated)
+ continue;
+ if (expires < cpu_base->softirq_expires_next)
+ return true;
+ }
+ return false;
}
/*
- * Clock realtime was set
- *
- * Change the offset of the realtime clock vs. the monotonic
- * clock.
+ * Clock was set. This might affect CLOCK_REALTIME, CLOCK_TAI and
+ * CLOCK_BOOTTIME (for late sleep time injection).
*
- * We might have to reprogram the high resolution timer interrupt. On
- * SMP we call the architecture specific code to retrigger _all_ high
- * resolution timer interrupts. On UP we just disable interrupts and
- * call the high resolution interrupt code.
+ * This requires to update the offsets for these clocks
+ * vs. CLOCK_MONOTONIC. When high resolution timers are enabled, then this
+ * also requires to eventually reprogram the per CPU clock event devices
+ * when the change moves an affected timer ahead of the first expiring
+ * timer on that CPU. Obviously remote per CPU clock event devices cannot
+ * be reprogrammed. The other reason why an IPI has to be sent is when the
+ * system is in !HIGH_RES and NOHZ mode. The NOHZ mode updates the offsets
+ * in the tick, which obviously might be stopped, so this has to bring out
+ * the remote CPU which might sleep in idle to get this sorted.
*/
-void clock_was_set(void)
+void clock_was_set(unsigned int bases)
{
-#ifdef CONFIG_HIGH_RES_TIMERS
- /* Retrigger the CPU local events everywhere */
- on_each_cpu(retrigger_next_event, NULL, 1);
-#endif
+ struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases);
+ cpumask_var_t mask;
+ int cpu;
+
+ if (!__hrtimer_hres_active(cpu_base) && !tick_nohz_active)
+ goto out_timerfd;
+
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
+ on_each_cpu(retrigger_next_event, NULL, 1);
+ goto out_timerfd;
+ }
+
+ /* Avoid interrupting CPUs if possible */
+ cpus_read_lock();
+ for_each_online_cpu(cpu) {
+ unsigned long flags;
+
+ cpu_base = &per_cpu(hrtimer_bases, cpu);
+ raw_spin_lock_irqsave(&cpu_base->lock, flags);
+
+ if (update_needs_ipi(cpu_base, bases))
+ cpumask_set_cpu(cpu, mask);
+
+ raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+ }
+
+ preempt_disable();
+ smp_call_function_many(mask, retrigger_next_event, NULL, 1);
+ preempt_enable();
+ cpus_read_unlock();
+ free_cpumask_var(mask);
+
+out_timerfd:
timerfd_clock_was_set();
}
+static void clock_was_set_work(struct work_struct *work)
+{
+ clock_was_set(CLOCK_SET_WALL);
+}
+
+static DECLARE_WORK(hrtimer_work, clock_was_set_work);
+
+/*
+ * Called from timekeeping code to reprogram the hrtimer interrupt device
+ * on all cpus and to notify timerfd.
+ */
+void clock_was_set_delayed(void)
+{
+ schedule_work(&hrtimer_work);
+}
+
/*
- * During resume we might have to reprogram the high resolution timer
- * interrupt on all online CPUs. However, all other CPUs will be
- * stopped with IRQs interrupts disabled so the clock_was_set() call
- * must be deferred.
+ * Called during resume either directly from via timekeeping_resume()
+ * or in the case of s2idle from tick_unfreeze() to ensure that the
+ * hrtimers are up to date.
*/
-void hrtimers_resume(void)
+void hrtimers_resume_local(void)
{
lockdep_assert_irqs_disabled();
/* Retrigger on the local CPU */
retrigger_next_event(NULL);
- /* And schedule a retrigger for all others */
- clock_was_set_delayed();
}
/*
@@ -1030,12 +1132,13 @@ static void __remove_hrtimer(struct hrtimer *timer,
* remove hrtimer, called with base lock held
*/
static inline int
-remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart)
+remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base,
+ bool restart, bool keep_local)
{
u8 state = timer->state;
if (state & HRTIMER_STATE_ENQUEUED) {
- int reprogram;
+ bool reprogram;
/*
* Remove the timer and force reprogramming when high
@@ -1048,8 +1151,16 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest
debug_deactivate(timer);
reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
+ /*
+ * If the timer is not restarted then reprogramming is
+ * required if the timer is local. If it is local and about
+ * to be restarted, avoid programming it twice (on removal
+ * and a moment later when it's requeued).
+ */
if (!restart)
state = HRTIMER_STATE_INACTIVE;
+ else
+ reprogram &= !keep_local;
__remove_hrtimer(timer, base, state, reprogram);
return 1;
@@ -1103,9 +1214,31 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
struct hrtimer_clock_base *base)
{
struct hrtimer_clock_base *new_base;
+ bool force_local, first;
- /* Remove an active timer from the queue: */
- remove_hrtimer(timer, base, true);
+ /*
+ * If the timer is on the local cpu base and is the first expiring
+ * timer then this might end up reprogramming the hardware twice
+ * (on removal and on enqueue). To avoid that by prevent the
+ * reprogram on removal, keep the timer local to the current CPU
+ * and enforce reprogramming after it is queued no matter whether
+ * it is the new first expiring timer again or not.
+ */
+ force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
+ force_local &= base->cpu_base->next_timer == timer;
+
+ /*
+ * Remove an active timer from the queue. In case it is not queued
+ * on the current CPU, make sure that remove_hrtimer() updates the
+ * remote data correctly.
+ *
+ * If it's on the current CPU and the first expiring timer, then
+ * skip reprogramming, keep the timer local and enforce
+ * reprogramming later if it was the first expiring timer. This
+ * avoids programming the underlying clock event twice (once at
+ * removal and once after enqueue).
+ */
+ remove_hrtimer(timer, base, true, force_local);
if (mode & HRTIMER_MODE_REL)
tim = ktime_add_safe(tim, base->get_time());
@@ -1115,9 +1248,24 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
hrtimer_set_expires_range_ns(timer, tim, delta_ns);
/* Switch the timer base, if necessary: */
- new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
+ if (!force_local) {
+ new_base = switch_hrtimer_base(timer, base,
+ mode & HRTIMER_MODE_PINNED);
+ } else {
+ new_base = base;
+ }
- return enqueue_hrtimer(timer, new_base, mode);
+ first = enqueue_hrtimer(timer, new_base, mode);
+ if (!force_local)
+ return first;
+
+ /*
+ * Timer was forced to stay on the current CPU to avoid
+ * reprogramming on removal and enqueue. Force reprogram the
+ * hardware by evaluating the new first expiring timer.
+ */
+ hrtimer_force_reprogram(new_base->cpu_base, 1);
+ return 0;
}
/**
@@ -1183,7 +1331,7 @@ int hrtimer_try_to_cancel(struct hrtimer *timer)
base = lock_hrtimer_base(timer, &flags);
if (!hrtimer_callback_running(timer))
- ret = remove_hrtimer(timer, base, false);
+ ret = remove_hrtimer(timer, base, false, false);
unlock_hrtimer_base(timer, &flags);
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 01935aafdb46..bc4db9e5ab70 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -10,28 +10,9 @@
#include <linux/init.h>
#include "timekeeping.h"
+#include "tick-internal.h"
-/* Since jiffies uses a simple TICK_NSEC multiplier
- * conversion, the .shift value could be zero. However
- * this would make NTP adjustments impossible as they are
- * in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to
- * shift both the nominator and denominator the same
- * amount, and give ntp adjustments in units of 1/2^8
- *
- * The value 8 is somewhat carefully chosen, as anything
- * larger can result in overflows. TICK_NSEC grows as HZ
- * shrinks, so values greater than 8 overflow 32bits when
- * HZ=100.
- */
-#if HZ < 34
-#define JIFFIES_SHIFT 6
-#elif HZ < 67
-#define JIFFIES_SHIFT 7
-#else
-#define JIFFIES_SHIFT 8
-#endif
-
static u64 jiffies_read(struct clocksource *cs)
{
return (u64) jiffies;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 517be7fd175e..ee736861b18f 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -291,6 +291,8 @@ static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples)
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
struct posix_cputimers *pct = &tsk->signal->posix_cputimers;
+ lockdep_assert_task_sighand_held(tsk);
+
/* Check if cputimer isn't running. This is accessed without locking. */
if (!READ_ONCE(pct->timers_active)) {
struct task_cputime sum;
@@ -405,6 +407,55 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer)
return 0;
}
+static struct posix_cputimer_base *timer_base(struct k_itimer *timer,
+ struct task_struct *tsk)
+{
+ int clkidx = CPUCLOCK_WHICH(timer->it_clock);
+
+ if (CPUCLOCK_PERTHREAD(timer->it_clock))
+ return tsk->posix_cputimers.bases + clkidx;
+ else
+ return tsk->signal->posix_cputimers.bases + clkidx;
+}
+
+/*
+ * Force recalculating the base earliest expiration on the next tick.
+ * This will also re-evaluate the need to keep around the process wide
+ * cputime counter and tick dependency and eventually shut these down
+ * if necessary.
+ */
+static void trigger_base_recalc_expires(struct k_itimer *timer,
+ struct task_struct *tsk)
+{
+ struct posix_cputimer_base *base = timer_base(timer, tsk);
+
+ base->nextevt = 0;
+}
+
+/*
+ * Dequeue the timer and reset the base if it was its earliest expiration.
+ * It makes sure the next tick recalculates the base next expiration so we
+ * don't keep the costly process wide cputime counter around for a random
+ * amount of time, along with the tick dependency.
+ *
+ * If another timer gets queued between this and the next tick, its
+ * expiration will update the base next event if necessary on the next
+ * tick.
+ */
+static void disarm_timer(struct k_itimer *timer, struct task_struct *p)
+{
+ struct cpu_timer *ctmr = &timer->it.cpu;
+ struct posix_cputimer_base *base;
+
+ if (!cpu_timer_dequeue(ctmr))
+ return;
+
+ base = timer_base(timer, p);
+ if (cpu_timer_getexpires(ctmr) == base->nextevt)
+ trigger_base_recalc_expires(timer, p);
+}
+
+
/*
* Clean up a CPU-clock timer that is about to be destroyed.
* This is called from timer deletion with the timer already locked.
@@ -439,7 +490,7 @@ static int posix_cpu_timer_del(struct k_itimer *timer)
if (timer->it.cpu.firing)
ret = TIMER_RETRY;
else
- cpu_timer_dequeue(ctmr);
+ disarm_timer(timer, p);
unlock_task_sighand(p, &flags);
}
@@ -498,15 +549,9 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
*/
static void arm_timer(struct k_itimer *timer, struct task_struct *p)
{
- int clkidx = CPUCLOCK_WHICH(timer->it_clock);
+ struct posix_cputimer_base *base = timer_base(timer, p);
struct cpu_timer *ctmr = &timer->it.cpu;
u64 newexp = cpu_timer_getexpires(ctmr);
- struct posix_cputimer_base *base;
-
- if (CPUCLOCK_PERTHREAD(timer->it_clock))
- base = p->posix_cputimers.bases + clkidx;
- else
- base = p->signal->posix_cputimers.bases + clkidx;
if (!cpu_timer_enqueue(&base->tqhead, ctmr))
return;
@@ -703,16 +748,29 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
timer->it_overrun_last = 0;
timer->it_overrun = -1;
- if (new_expires != 0 && !(val < new_expires)) {
+ if (val >= new_expires) {
+ if (new_expires != 0) {
+ /*
+ * The designated time already passed, so we notify
+ * immediately, even if the thread never runs to
+ * accumulate more time on this clock.
+ */
+ cpu_timer_fire(timer);
+ }
+
/*
- * The designated time already passed, so we notify
- * immediately, even if the thread never runs to
- * accumulate more time on this clock.
+ * Make sure we don't keep around the process wide cputime
+ * counter or the tick dependency if they are not necessary.
*/
- cpu_timer_fire(timer);
- }
+ sighand = lock_task_sighand(p, &flags);
+ if (!sighand)
+ goto out;
+
+ if (!cpu_timer_queued(ctmr))
+ trigger_base_recalc_expires(timer, p);
- ret = 0;
+ unlock_task_sighand(p, &flags);
+ }
out:
rcu_read_unlock();
if (old)
@@ -1346,8 +1404,6 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
}
}
- if (!*newval)
- return;
*newval += now;
}
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index dd5697d7347b..3913222e7bcf 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -336,7 +336,7 @@ void posixtimer_rearm(struct kernel_siginfo *info)
int posix_timer_event(struct k_itimer *timr, int si_private)
{
enum pid_type type;
- int ret = -1;
+ int ret;
/*
* FIXME: if ->sigq is queued we can race with
* dequeue_signal()->posixtimer_rearm().
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index d663249652ef..46789356f856 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -470,6 +470,13 @@ void tick_resume_local(void)
else
tick_resume_oneshot();
}
+
+ /*
+ * Ensure that hrtimers are up to date and the clockevents device
+ * is reprogrammed correctly when high resolution timers are
+ * enabled.
+ */
+ hrtimers_resume_local();
}
/**
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 6a742a29e545..649f2b48e8f0 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -165,3 +165,35 @@ DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
void timer_clear_idle(void);
+
+#define CLOCK_SET_WALL \
+ (BIT(HRTIMER_BASE_REALTIME) | BIT(HRTIMER_BASE_REALTIME_SOFT) | \
+ BIT(HRTIMER_BASE_TAI) | BIT(HRTIMER_BASE_TAI_SOFT))
+
+#define CLOCK_SET_BOOT \
+ (BIT(HRTIMER_BASE_BOOTTIME) | BIT(HRTIMER_BASE_BOOTTIME_SOFT))
+
+void clock_was_set(unsigned int bases);
+void clock_was_set_delayed(void);
+
+void hrtimers_resume_local(void);
+
+/* Since jiffies uses a simple TICK_NSEC multiplier
+ * conversion, the .shift value could be zero. However
+ * this would make NTP adjustments impossible as they are
+ * in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to
+ * shift both the nominator and denominator the same
+ * amount, and give ntp adjustments in units of 1/2^8
+ *
+ * The value 8 is somewhat carefully chosen, as anything
+ * larger can result in overflows. TICK_NSEC grows as HZ
+ * shrinks, so values greater than 8 overflow 32bits when
+ * HZ=100.
+ */
+#if HZ < 34
+#define JIFFIES_SHIFT 6
+#elif HZ < 67
+#define JIFFIES_SHIFT 7
+#else
+#define JIFFIES_SHIFT 8
+#endif
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 8a364aa9881a..b348749a9fc6 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1323,8 +1323,8 @@ out:
write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
- /* signal hrtimers about time change */
- clock_was_set();
+ /* Signal hrtimers about time change */
+ clock_was_set(CLOCK_SET_WALL);
if (!ret)
audit_tk_injoffset(ts_delta);
@@ -1371,8 +1371,8 @@ error: /* even if we error out, we forwarded the time, so call update */
write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
- /* signal hrtimers about time change */
- clock_was_set();
+ /* Signal hrtimers about time change */
+ clock_was_set(CLOCK_SET_WALL);
return ret;
}
@@ -1746,8 +1746,8 @@ void timekeeping_inject_sleeptime64(const struct timespec64 *delta)
write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
- /* signal hrtimers about time change */
- clock_was_set();
+ /* Signal hrtimers about time change */
+ clock_was_set(CLOCK_SET_WALL | CLOCK_SET_BOOT);
}
#endif
@@ -1810,8 +1810,10 @@ void timekeeping_resume(void)
touch_softlockup_watchdog();
+ /* Resume the clockevent device(s) and hrtimers */
tick_resume();
- hrtimers_resume();
+ /* Notify timerfd as resume is equivalent to clock_was_set() */
+ timerfd_resume();
}
int timekeeping_suspend(void)
@@ -2125,7 +2127,7 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
* timekeeping_advance - Updates the timekeeper to the current time and
* current NTP tick length
*/
-static void timekeeping_advance(enum timekeeping_adv_mode mode)
+static bool timekeeping_advance(enum timekeeping_adv_mode mode)
{
struct timekeeper *real_tk = &tk_core.timekeeper;
struct timekeeper *tk = &shadow_timekeeper;
@@ -2196,9 +2198,8 @@ static void timekeeping_advance(enum timekeeping_adv_mode mode)
write_seqcount_end(&tk_core.seq);
out:
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
- if (clock_set)
- /* Have to call _delayed version, since in irq context*/
- clock_was_set_delayed();
+
+ return !!clock_set;
}
/**
@@ -2207,7 +2208,8 @@ out:
*/
void update_wall_time(void)
{
- timekeeping_advance(TK_ADV_TICK);
+ if (timekeeping_advance(TK_ADV_TICK))
+ clock_was_set_delayed();
}
/**
@@ -2387,8 +2389,9 @@ int do_adjtimex(struct __kernel_timex *txc)
{
struct timekeeper *tk = &tk_core.timekeeper;
struct audit_ntp_data ad;
- unsigned long flags;
+ bool clock_set = false;
struct timespec64 ts;
+ unsigned long flags;
s32 orig_tai, tai;
int ret;
@@ -2423,6 +2426,7 @@ int do_adjtimex(struct __kernel_timex *txc)
if (tai != orig_tai) {
__timekeeping_set_tai_offset(tk, tai);
timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
+ clock_set = true;
}
tk_update_leap_state(tk);
@@ -2433,10 +2437,10 @@ int do_adjtimex(struct __kernel_timex *txc)
/* Update the multiplier immediately if frequency was set directly */
if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK))
- timekeeping_advance(TK_ADV_FREQ);
+ clock_set |= timekeeping_advance(TK_ADV_FREQ);
- if (tai != orig_tai)
- clock_was_set();
+ if (clock_set)
+ clock_was_set(CLOCK_REALTIME);
ntp_notify_cmos_timer();
diff --git a/kernel/torture.c b/kernel/torture.c
index 0a315c387bed..bb8f411c974b 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -521,11 +521,11 @@ static void torture_shuffle_tasks(void)
struct shuffle_task *stp;
cpumask_setall(shuffle_tmp_mask);
- get_online_cpus();
+ cpus_read_lock();
/* No point in shuffling if there is only one online CPU (ex: UP) */
if (num_online_cpus() == 1) {
- put_online_cpus();
+ cpus_read_unlock();
return;
}
@@ -541,7 +541,7 @@ static void torture_shuffle_tasks(void)
set_cpus_allowed_ptr(stp->st_t, shuffle_tmp_mask);
mutex_unlock(&shuffle_task_mutex);
- put_online_cpus();
+ cpus_read_unlock();
}
/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d567b1717c4c..3ee23f4d437f 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -219,6 +219,11 @@ config DYNAMIC_FTRACE_WITH_DIRECT_CALLS
depends on DYNAMIC_FTRACE_WITH_REGS
depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+config DYNAMIC_FTRACE_WITH_ARGS
+ def_bool y
+ depends on DYNAMIC_FTRACE
+ depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS
+
config FUNCTION_PROFILER
bool "Kernel function profiler"
depends on FUNCTION_TRACER
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b4916ef388ad..8e2eb950aa82 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -124,7 +124,7 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
* out of events when it was updated in between this and the
* rcu_dereference() which is accepted risk.
*/
- ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN);
+ ret = BPF_PROG_RUN_ARRAY(call->prog_array, ctx, bpf_prog_run);
out:
__this_cpu_dec(bpf_prog_active);
@@ -714,13 +714,28 @@ BPF_CALL_0(bpf_get_current_task_btf)
return (unsigned long) current;
}
-BTF_ID_LIST_SINGLE(bpf_get_current_btf_ids, struct, task_struct)
-
-static const struct bpf_func_proto bpf_get_current_task_btf_proto = {
+const struct bpf_func_proto bpf_get_current_task_btf_proto = {
.func = bpf_get_current_task_btf,
.gpl_only = true,
.ret_type = RET_PTR_TO_BTF_ID,
- .ret_btf_id = &bpf_get_current_btf_ids[0],
+ .ret_btf_id = &btf_task_struct_ids[0],
+};
+
+BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task)
+{
+ return (unsigned long) task_pt_regs(task);
+}
+
+BTF_ID_LIST(bpf_task_pt_regs_ids)
+BTF_ID(struct, pt_regs)
+
+const struct bpf_func_proto bpf_task_pt_regs_proto = {
+ .func = bpf_task_pt_regs,
+ .gpl_only = true,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &btf_task_struct_ids[0],
+ .ret_type = RET_PTR_TO_BTF_ID,
+ .ret_btf_id = &bpf_task_pt_regs_ids[0],
};
BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
@@ -948,7 +963,61 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = {
.arg5_type = ARG_ANYTHING,
};
-const struct bpf_func_proto *
+BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx)
+{
+ /* This helper call is inlined by verifier. */
+ return ((u64 *)ctx)[-1];
+}
+
+static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = {
+ .func = bpf_get_func_ip_tracing,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs)
+{
+ struct kprobe *kp = kprobe_running();
+
+ return kp ? (uintptr_t)kp->addr : 0;
+}
+
+static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = {
+ .func = bpf_get_func_ip_kprobe,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx)
+{
+ struct bpf_trace_run_ctx *run_ctx;
+
+ run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
+ return run_ctx->bpf_cookie;
+}
+
+static const struct bpf_func_proto bpf_get_attach_cookie_proto_trace = {
+ .func = bpf_get_attach_cookie_trace,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_attach_cookie_pe, struct bpf_perf_event_data_kern *, ctx)
+{
+ return ctx->event->bpf_cookie;
+}
+
+static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = {
+ .func = bpf_get_attach_cookie_pe,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+static const struct bpf_func_proto *
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
@@ -978,6 +1047,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_current_task_proto;
case BPF_FUNC_get_current_task_btf:
return &bpf_get_current_task_btf_proto;
+ case BPF_FUNC_task_pt_regs:
+ return &bpf_task_pt_regs_proto;
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_get_current_comm:
@@ -990,28 +1061,29 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_numa_node_id_proto;
case BPF_FUNC_perf_event_read:
return &bpf_perf_event_read_proto;
- case BPF_FUNC_probe_write_user:
- return bpf_get_probe_write_proto();
case BPF_FUNC_current_task_under_cgroup:
return &bpf_current_task_under_cgroup_proto;
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
+ case BPF_FUNC_probe_write_user:
+ return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ?
+ NULL : bpf_get_probe_write_proto();
case BPF_FUNC_probe_read_user:
return &bpf_probe_read_user_proto;
case BPF_FUNC_probe_read_kernel:
- return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+ return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
NULL : &bpf_probe_read_kernel_proto;
case BPF_FUNC_probe_read_user_str:
return &bpf_probe_read_user_str_proto;
case BPF_FUNC_probe_read_kernel_str:
- return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+ return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
NULL : &bpf_probe_read_kernel_str_proto;
#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
case BPF_FUNC_probe_read:
- return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+ return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
NULL : &bpf_probe_read_compat_proto;
case BPF_FUNC_probe_read_str:
- return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+ return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
NULL : &bpf_probe_read_compat_str_proto;
#endif
#ifdef CONFIG_CGROUPS
@@ -1058,8 +1130,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_for_each_map_elem_proto;
case BPF_FUNC_snprintf:
return &bpf_snprintf_proto;
+ case BPF_FUNC_get_func_ip:
+ return &bpf_get_func_ip_proto_tracing;
default:
- return NULL;
+ return bpf_base_func_proto(func_id);
}
}
@@ -1077,6 +1151,10 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_override_return:
return &bpf_override_return_proto;
#endif
+ case BPF_FUNC_get_func_ip:
+ return &bpf_get_func_ip_proto_kprobe;
+ case BPF_FUNC_get_attach_cookie:
+ return &bpf_get_attach_cookie_proto_trace;
default:
return bpf_tracing_func_proto(func_id, prog);
}
@@ -1187,6 +1265,8 @@ tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_stackid_proto_tp;
case BPF_FUNC_get_stack:
return &bpf_get_stack_proto_tp;
+ case BPF_FUNC_get_attach_cookie:
+ return &bpf_get_attach_cookie_proto_trace;
default:
return bpf_tracing_func_proto(func_id, prog);
}
@@ -1294,6 +1374,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_perf_prog_read_value_proto;
case BPF_FUNC_read_branch_records:
return &bpf_read_branch_records_proto;
+ case BPF_FUNC_get_attach_cookie:
+ return &bpf_get_attach_cookie_proto_pe;
default:
return bpf_tracing_func_proto(func_id, prog);
}
@@ -1430,6 +1512,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
const struct bpf_func_proto *
tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
+ const struct bpf_func_proto *fn;
+
switch (func_id) {
#ifdef CONFIG_NET
case BPF_FUNC_skb_output:
@@ -1470,7 +1554,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_d_path:
return &bpf_d_path_proto;
default:
- return raw_tp_prog_func_proto(func_id, prog);
+ fn = raw_tp_prog_func_proto(func_id, prog);
+ if (!fn && prog->expected_attach_type == BPF_TRACE_ITER)
+ fn = bpf_iter_get_func_proto(func_id, prog);
+ return fn;
}
}
@@ -1638,7 +1725,8 @@ static DEFINE_MUTEX(bpf_event_mutex);
#define BPF_TRACE_MAX_PROGS 64
int perf_event_attach_bpf_prog(struct perf_event *event,
- struct bpf_prog *prog)
+ struct bpf_prog *prog,
+ u64 bpf_cookie)
{
struct bpf_prog_array *old_array;
struct bpf_prog_array *new_array;
@@ -1665,12 +1753,13 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
goto unlock;
}
- ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
+ ret = bpf_prog_array_copy(old_array, NULL, prog, bpf_cookie, &new_array);
if (ret < 0)
goto unlock;
/* set the new array to event->tp_event and set event->prog */
event->prog = prog;
+ event->bpf_cookie = bpf_cookie;
rcu_assign_pointer(event->tp_event->prog_array, new_array);
bpf_prog_array_free(old_array);
@@ -1691,7 +1780,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
goto unlock;
old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
- ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
+ ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array);
if (ret == -ENOENT)
goto unlock;
if (ret < 0) {
@@ -1779,7 +1868,7 @@ void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
{
cant_sleep();
rcu_read_lock();
- (void) BPF_PROG_RUN(prog, args);
+ (void) bpf_prog_run(prog, args);
rcu_read_unlock();
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7b180f61e6d3..7efbc8aaf7f6 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3100,6 +3100,7 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
{
+ bool init_nop = ftrace_need_init_nop();
struct ftrace_page *pg;
struct dyn_ftrace *p;
u64 start, stop;
@@ -3138,8 +3139,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
* Do the initial record conversion from mcount jump
* to the NOP instructions.
*/
- if (!__is_defined(CC_USING_NOP_MCOUNT) &&
- !ftrace_nop_initialize(mod, p))
+ if (init_nop && !ftrace_nop_initialize(mod, p))
break;
update_cnt++;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 33899a71fdc1..a1adb29ef5c1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2897,14 +2897,26 @@ int tracepoint_printk_sysctl(struct ctl_table *table, int write,
void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
{
+ enum event_trigger_type tt = ETT_NONE;
+ struct trace_event_file *file = fbuffer->trace_file;
+
+ if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
+ fbuffer->entry, &tt))
+ goto discard;
+
if (static_key_false(&tracepoint_printk_key.key))
output_printk(fbuffer);
if (static_branch_unlikely(&trace_event_exports_enabled))
ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
- event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
- fbuffer->event, fbuffer->entry,
- fbuffer->trace_ctx, fbuffer->regs);
+
+ trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
+ fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
+
+discard:
+ if (tt)
+ event_triggers_post_call(file, tt);
+
}
EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index a180abf76d4e..4a0e693000c6 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1389,38 +1389,6 @@ event_trigger_unlock_commit(struct trace_event_file *file,
event_triggers_post_call(file, tt);
}
-/**
- * event_trigger_unlock_commit_regs - handle triggers and finish event commit
- * @file: The file pointer associated with the event
- * @buffer: The ring buffer that the event is being written to
- * @event: The event meta data in the ring buffer
- * @entry: The event itself
- * @trace_ctx: The tracing context flags.
- *
- * This is a helper function to handle triggers that require data
- * from the event itself. It also tests the event against filters and
- * if the event is soft disabled and should be discarded.
- *
- * Same as event_trigger_unlock_commit() but calls
- * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit().
- */
-static inline void
-event_trigger_unlock_commit_regs(struct trace_event_file *file,
- struct trace_buffer *buffer,
- struct ring_buffer_event *event,
- void *entry, unsigned int trace_ctx,
- struct pt_regs *regs)
-{
- enum event_trigger_type tt = ETT_NONE;
-
- if (!__event_trigger_test_discard(file, buffer, event, entry, &tt))
- trace_buffer_unlock_commit_regs(file->tr, buffer, event,
- trace_ctx, regs);
-
- if (tt)
- event_triggers_post_call(file, tt);
-}
-
#define FILTER_PRED_INVALID ((unsigned short)-1)
#define FILTER_PRED_IS_RIGHT (1 << 15)
#define FILTER_PRED_FOLD (1 << 15)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 949ef09dc537..a48aa2a2875b 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -3430,6 +3430,8 @@ trace_action_create_field_var(struct hist_trigger_data *hist_data,
event = data->match_data.event;
}
+ if (!event)
+ goto free;
/*
* At this point, we're looking at a field on another
* event. Because we can't modify a hist trigger on
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index a7e3c24dee13..b61eefe5ccf5 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -253,6 +253,7 @@ static struct osnoise_data {
*/
static bool osnoise_busy;
+#ifdef CONFIG_PREEMPT_RT
/*
* Print the osnoise header info.
*/
@@ -261,6 +262,35 @@ static void print_osnoise_headers(struct seq_file *s)
if (osnoise_data.tainted)
seq_puts(s, "# osnoise is tainted!\n");
+ seq_puts(s, "# _-------=> irqs-off\n");
+ seq_puts(s, "# / _------=> need-resched\n");
+ seq_puts(s, "# | / _-----=> need-resched-lazy\n");
+ seq_puts(s, "# || / _----=> hardirq/softirq\n");
+ seq_puts(s, "# ||| / _---=> preempt-depth\n");
+ seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n");
+ seq_puts(s, "# ||||| / _-=> migrate-disable\n");
+
+ seq_puts(s, "# |||||| / ");
+ seq_puts(s, " MAX\n");
+
+ seq_puts(s, "# ||||| / ");
+ seq_puts(s, " SINGLE Interference counters:\n");
+
+ seq_puts(s, "# ||||||| RUNTIME ");
+ seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n");
+
+ seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US ");
+ seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n");
+
+ seq_puts(s, "# | | | ||||||| | | ");
+ seq_puts(s, " | | | | | | | |\n");
+}
+#else /* CONFIG_PREEMPT_RT */
+static void print_osnoise_headers(struct seq_file *s)
+{
+ if (osnoise_data.tainted)
+ seq_puts(s, "# osnoise is tainted!\n");
+
seq_puts(s, "# _-----=> irqs-off\n");
seq_puts(s, "# / _----=> need-resched\n");
seq_puts(s, "# | / _---=> hardirq/softirq\n");
@@ -279,6 +309,7 @@ static void print_osnoise_headers(struct seq_file *s)
seq_puts(s, "# | | | |||| | | ");
seq_puts(s, " | | | | | | | |\n");
}
+#endif /* CONFIG_PREEMPT_RT */
/*
* osnoise_taint - report an osnoise error.
@@ -323,6 +354,24 @@ static void trace_osnoise_sample(struct osnoise_sample *sample)
/*
* Print the timerlat header info.
*/
+#ifdef CONFIG_PREEMPT_RT
+static void print_timerlat_headers(struct seq_file *s)
+{
+ seq_puts(s, "# _-------=> irqs-off\n");
+ seq_puts(s, "# / _------=> need-resched\n");
+ seq_puts(s, "# | / _-----=> need-resched-lazy\n");
+ seq_puts(s, "# || / _----=> hardirq/softirq\n");
+ seq_puts(s, "# ||| / _---=> preempt-depth\n");
+ seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n");
+ seq_puts(s, "# ||||| / _-=> migrate-disable\n");
+ seq_puts(s, "# |||||| /\n");
+ seq_puts(s, "# ||||||| ACTIVATION\n");
+ seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID ");
+ seq_puts(s, " CONTEXT LATENCY\n");
+ seq_puts(s, "# | | | ||||||| | | ");
+ seq_puts(s, " | |\n");
+}
+#else /* CONFIG_PREEMPT_RT */
static void print_timerlat_headers(struct seq_file *s)
{
seq_puts(s, "# _-----=> irqs-off\n");
@@ -336,6 +385,7 @@ static void print_timerlat_headers(struct seq_file *s)
seq_puts(s, "# | | | |||| | | ");
seq_puts(s, " | |\n");
}
+#endif /* CONFIG_PREEMPT_RT */
/*
* Record an timerlat_sample into the tracer buffer.
@@ -1025,9 +1075,13 @@ diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *
/*
* osnoise_stop_tracing - Stop tracing and the tracer.
*/
-static void osnoise_stop_tracing(void)
+static __always_inline void osnoise_stop_tracing(void)
{
struct trace_array *tr = osnoise_trace;
+
+ trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
+ "stop tracing hit on cpu %d\n", smp_processor_id());
+
tracer_tracing_off(tr);
}
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 77be3bbe3cc4..bb51849e6375 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -58,14 +58,17 @@ static struct ctl_table_root set_root = {
.permissions = set_permissions,
};
-#define UCOUNT_ENTRY(name) \
- { \
- .procname = name, \
- .maxlen = sizeof(int), \
- .mode = 0644, \
- .proc_handler = proc_dointvec_minmax, \
- .extra1 = SYSCTL_ZERO, \
- .extra2 = SYSCTL_INT_MAX, \
+static long ue_zero = 0;
+static long ue_int_max = INT_MAX;
+
+#define UCOUNT_ENTRY(name) \
+ { \
+ .procname = name, \
+ .maxlen = sizeof(long), \
+ .mode = 0644, \
+ .proc_handler = proc_doulongvec_minmax, \
+ .extra1 = &ue_zero, \
+ .extra2 = &ue_int_max, \
}
static struct ctl_table user_table[] = {
UCOUNT_ENTRY("max_user_namespaces"),
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f148eacda55a..33a6b4a2443d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -205,9 +205,26 @@ struct pool_workqueue {
int refcnt; /* L: reference count */
int nr_in_flight[WORK_NR_COLORS];
/* L: nr of in_flight works */
+
+ /*
+ * nr_active management and WORK_STRUCT_INACTIVE:
+ *
+ * When pwq->nr_active >= max_active, new work item is queued to
+ * pwq->inactive_works instead of pool->worklist and marked with
+ * WORK_STRUCT_INACTIVE.
+ *
+ * All work items marked with WORK_STRUCT_INACTIVE do not participate
+ * in pwq->nr_active and all work items in pwq->inactive_works are
+ * marked with WORK_STRUCT_INACTIVE. But not all WORK_STRUCT_INACTIVE
+ * work items are in pwq->inactive_works. Some of them are ready to
+ * run in pool->worklist or worker->scheduled. Those work itmes are
+ * only struct wq_barrier which is used for flush_work() and should
+ * not participate in pwq->nr_active. For non-barrier work item, it
+ * is marked with WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
+ */
int nr_active; /* L: nr of active works */
int max_active; /* L: max active works */
- struct list_head delayed_works; /* L: delayed works */
+ struct list_head inactive_works; /* L: inactive works */
struct list_head pwqs_node; /* WR: node on wq->pwqs */
struct list_head mayday_node; /* MD: node on wq->maydays */
@@ -524,7 +541,7 @@ static inline void debug_work_deactivate(struct work_struct *work) { }
#endif
/**
- * worker_pool_assign_id - allocate ID and assing it to @pool
+ * worker_pool_assign_id - allocate ID and assign it to @pool
* @pool: the pool pointer of interest
*
* Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned
@@ -579,9 +596,9 @@ static unsigned int work_color_to_flags(int color)
return color << WORK_STRUCT_COLOR_SHIFT;
}
-static int get_work_color(struct work_struct *work)
+static int get_work_color(unsigned long work_data)
{
- return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
+ return (work_data >> WORK_STRUCT_COLOR_SHIFT) &
((1 << WORK_STRUCT_COLOR_BITS) - 1);
}
@@ -1136,7 +1153,7 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq)
}
}
-static void pwq_activate_delayed_work(struct work_struct *work)
+static void pwq_activate_inactive_work(struct work_struct *work)
{
struct pool_workqueue *pwq = get_work_pwq(work);
@@ -1144,22 +1161,22 @@ static void pwq_activate_delayed_work(struct work_struct *work)
if (list_empty(&pwq->pool->worklist))
pwq->pool->watchdog_ts = jiffies;
move_linked_works(work, &pwq->pool->worklist, NULL);
- __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
+ __clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work));
pwq->nr_active++;
}
-static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
+static void pwq_activate_first_inactive(struct pool_workqueue *pwq)
{
- struct work_struct *work = list_first_entry(&pwq->delayed_works,
+ struct work_struct *work = list_first_entry(&pwq->inactive_works,
struct work_struct, entry);
- pwq_activate_delayed_work(work);
+ pwq_activate_inactive_work(work);
}
/**
* pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
* @pwq: pwq of interest
- * @color: color of work which left the queue
+ * @work_data: work_data of work which left the queue
*
* A work either has completed or is removed from pending queue,
* decrement nr_in_flight of its pwq and handle workqueue flushing.
@@ -1167,21 +1184,21 @@ static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
* CONTEXT:
* raw_spin_lock_irq(pool->lock).
*/
-static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
+static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_data)
{
- /* uncolored work items don't participate in flushing or nr_active */
- if (color == WORK_NO_COLOR)
- goto out_put;
-
- pwq->nr_in_flight[color]--;
+ int color = get_work_color(work_data);
- pwq->nr_active--;
- if (!list_empty(&pwq->delayed_works)) {
- /* one down, submit a delayed one */
- if (pwq->nr_active < pwq->max_active)
- pwq_activate_first_delayed(pwq);
+ if (!(work_data & WORK_STRUCT_INACTIVE)) {
+ pwq->nr_active--;
+ if (!list_empty(&pwq->inactive_works)) {
+ /* one down, submit an inactive one */
+ if (pwq->nr_active < pwq->max_active)
+ pwq_activate_first_inactive(pwq);
+ }
}
+ pwq->nr_in_flight[color]--;
+
/* is flush in progress and are we at the flushing tip? */
if (likely(pwq->flush_color != color))
goto out_put;
@@ -1281,17 +1298,21 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
debug_work_deactivate(work);
/*
- * A delayed work item cannot be grabbed directly because
- * it might have linked NO_COLOR work items which, if left
- * on the delayed_list, will confuse pwq->nr_active
+ * A cancelable inactive work item must be in the
+ * pwq->inactive_works since a queued barrier can't be
+ * canceled (see the comments in insert_wq_barrier()).
+ *
+ * An inactive work item cannot be grabbed directly because
+ * it might have linked barrier work items which, if left
+ * on the inactive_works list, will confuse pwq->nr_active
* management later on and cause stall. Make sure the work
* item is activated before grabbing.
*/
- if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
- pwq_activate_delayed_work(work);
+ if (*work_data_bits(work) & WORK_STRUCT_INACTIVE)
+ pwq_activate_inactive_work(work);
list_del_init(&work->entry);
- pwq_dec_nr_in_flight(pwq, get_work_color(work));
+ pwq_dec_nr_in_flight(pwq, *work_data_bits(work));
/* work->data points to pwq iff queued, point to pool */
set_work_pool_and_keep_pending(work, pool->id);
@@ -1490,8 +1511,8 @@ retry:
if (list_empty(worklist))
pwq->pool->watchdog_ts = jiffies;
} else {
- work_flags |= WORK_STRUCT_DELAYED;
- worklist = &pwq->delayed_works;
+ work_flags |= WORK_STRUCT_INACTIVE;
+ worklist = &pwq->inactive_works;
}
debug_work_activate(work);
@@ -1912,14 +1933,14 @@ static void worker_detach_from_pool(struct worker *worker)
*/
static struct worker *create_worker(struct worker_pool *pool)
{
- struct worker *worker = NULL;
- int id = -1;
+ struct worker *worker;
+ int id;
char id_buf[16];
/* ID is needed to determine kthread name */
- id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
+ id = ida_alloc(&pool->worker_ida, GFP_KERNEL);
if (id < 0)
- goto fail;
+ return NULL;
worker = alloc_worker(pool->node);
if (!worker)
@@ -1954,8 +1975,7 @@ static struct worker *create_worker(struct worker_pool *pool)
return worker;
fail:
- if (id >= 0)
- ida_simple_remove(&pool->worker_ida, id);
+ ida_free(&pool->worker_ida, id);
kfree(worker);
return NULL;
}
@@ -2173,7 +2193,7 @@ __acquires(&pool->lock)
struct pool_workqueue *pwq = get_work_pwq(work);
struct worker_pool *pool = worker->pool;
bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
- int work_color;
+ unsigned long work_data;
struct worker *collision;
#ifdef CONFIG_LOCKDEP
/*
@@ -2209,7 +2229,8 @@ __acquires(&pool->lock)
worker->current_work = work;
worker->current_func = work->func;
worker->current_pwq = pwq;
- work_color = get_work_color(work);
+ work_data = *work_data_bits(work);
+ worker->current_color = get_work_color(work_data);
/*
* Record wq name for cmdline and debug reporting, may get
@@ -2315,7 +2336,8 @@ __acquires(&pool->lock)
worker->current_work = NULL;
worker->current_func = NULL;
worker->current_pwq = NULL;
- pwq_dec_nr_in_flight(pwq, work_color);
+ worker->current_color = INT_MAX;
+ pwq_dec_nr_in_flight(pwq, work_data);
}
/**
@@ -2378,7 +2400,7 @@ woke_up:
set_pf_worker(false);
set_task_comm(worker->task, "kworker/dying");
- ida_simple_remove(&pool->worker_ida, worker->id);
+ ida_free(&pool->worker_ida, worker->id);
worker_detach_from_pool(worker);
kfree(worker);
return 0;
@@ -2531,7 +2553,7 @@ repeat:
/*
* The above execution of rescued work items could
* have created more to rescue through
- * pwq_activate_first_delayed() or chained
+ * pwq_activate_first_inactive() or chained
* queueing. Let's put @pwq back on mayday list so
* that such back-to-back work items, which may be
* being used to relieve memory pressure, don't
@@ -2658,8 +2680,9 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
struct wq_barrier *barr,
struct work_struct *target, struct worker *worker)
{
+ unsigned int work_flags = 0;
+ unsigned int work_color;
struct list_head *head;
- unsigned int linked = 0;
/*
* debugobject calls are safe here even with pool->lock locked
@@ -2674,24 +2697,31 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
barr->task = current;
+ /* The barrier work item does not participate in pwq->nr_active. */
+ work_flags |= WORK_STRUCT_INACTIVE;
+
/*
* If @target is currently being executed, schedule the
* barrier to the worker; otherwise, put it after @target.
*/
- if (worker)
+ if (worker) {
head = worker->scheduled.next;
- else {
+ work_color = worker->current_color;
+ } else {
unsigned long *bits = work_data_bits(target);
head = target->entry.next;
/* there can already be other linked works, inherit and set */
- linked = *bits & WORK_STRUCT_LINKED;
+ work_flags |= *bits & WORK_STRUCT_LINKED;
+ work_color = get_work_color(*bits);
__set_bit(WORK_STRUCT_LINKED_BIT, bits);
}
+ pwq->nr_in_flight[work_color]++;
+ work_flags |= work_color_to_flags(work_color);
+
debug_work_activate(&barr->work);
- insert_work(pwq, &barr->work, head,
- work_color_to_flags(WORK_NO_COLOR) | linked);
+ insert_work(pwq, &barr->work, head, work_flags);
}
/**
@@ -2957,7 +2987,7 @@ reflush:
bool drained;
raw_spin_lock_irq(&pwq->pool->lock);
- drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
+ drained = !pwq->nr_active && list_empty(&pwq->inactive_works);
raw_spin_unlock_irq(&pwq->pool->lock);
if (drained)
@@ -3293,7 +3323,7 @@ int schedule_on_each_cpu(work_func_t func)
if (!works)
return -ENOMEM;
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu) {
struct work_struct *work = per_cpu_ptr(works, cpu);
@@ -3305,7 +3335,7 @@ int schedule_on_each_cpu(work_func_t func)
for_each_online_cpu(cpu)
flush_work(per_cpu_ptr(works, cpu));
- put_online_cpus();
+ cpus_read_unlock();
free_percpu(works);
return 0;
}
@@ -3713,7 +3743,7 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
* @pwq: target pool_workqueue
*
* If @pwq isn't freezing, set @pwq->max_active to the associated
- * workqueue's saved_max_active and activate delayed work items
+ * workqueue's saved_max_active and activate inactive work items
* accordingly. If @pwq is freezing, clear @pwq->max_active to zero.
*/
static void pwq_adjust_max_active(struct pool_workqueue *pwq)
@@ -3742,9 +3772,9 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
pwq->max_active = wq->saved_max_active;
- while (!list_empty(&pwq->delayed_works) &&
+ while (!list_empty(&pwq->inactive_works) &&
pwq->nr_active < pwq->max_active) {
- pwq_activate_first_delayed(pwq);
+ pwq_activate_first_inactive(pwq);
kick = true;
}
@@ -3763,7 +3793,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
}
-/* initialize newly alloced @pwq which is associated with @wq and @pool */
+/* initialize newly allocated @pwq which is associated with @wq and @pool */
static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
struct worker_pool *pool)
{
@@ -3775,7 +3805,7 @@ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
pwq->wq = wq;
pwq->flush_color = -1;
pwq->refcnt = 1;
- INIT_LIST_HEAD(&pwq->delayed_works);
+ INIT_LIST_HEAD(&pwq->inactive_works);
INIT_LIST_HEAD(&pwq->pwqs_node);
INIT_LIST_HEAD(&pwq->mayday_node);
INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
@@ -4016,14 +4046,14 @@ static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
static void apply_wqattrs_lock(void)
{
/* CPUs should stay stable across pwq creations and installations */
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&wq_pool_mutex);
}
static void apply_wqattrs_unlock(void)
{
mutex_unlock(&wq_pool_mutex);
- put_online_cpus();
+ cpus_read_unlock();
}
static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
@@ -4068,7 +4098,7 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
*
* Performs GFP_KERNEL allocations.
*
- * Assumes caller has CPU hotplug read exclusion, i.e. get_online_cpus().
+ * Assumes caller has CPU hotplug read exclusion, i.e. cpus_read_lock().
*
* Return: 0 on success and -errno on failure.
*/
@@ -4196,7 +4226,7 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
return 0;
}
- get_online_cpus();
+ cpus_read_lock();
if (wq->flags & __WQ_ORDERED) {
ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
/* there should only be single pwq for ordering guarantee */
@@ -4206,7 +4236,7 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
} else {
ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
}
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
@@ -4362,7 +4392,7 @@ static bool pwq_busy(struct pool_workqueue *pwq)
if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
return true;
- if (pwq->nr_active || !list_empty(&pwq->delayed_works))
+ if (pwq->nr_active || !list_empty(&pwq->inactive_works))
return true;
return false;
@@ -4558,7 +4588,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
else
pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
- ret = !list_empty(&pwq->delayed_works);
+ ret = !list_empty(&pwq->inactive_works);
preempt_enable();
rcu_read_unlock();
@@ -4754,11 +4784,11 @@ static void show_pwq(struct pool_workqueue *pwq)
pr_cont("\n");
}
- if (!list_empty(&pwq->delayed_works)) {
+ if (!list_empty(&pwq->inactive_works)) {
bool comma = false;
- pr_info(" delayed:");
- list_for_each_entry(work, &pwq->delayed_works, entry) {
+ pr_info(" inactive:");
+ list_for_each_entry(work, &pwq->inactive_works, entry) {
pr_cont_work(comma, work);
comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
}
@@ -4788,7 +4818,7 @@ void show_workqueue_state(void)
bool idle = true;
for_each_pwq(pwq, wq) {
- if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
+ if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
idle = false;
break;
}
@@ -4800,7 +4830,7 @@ void show_workqueue_state(void)
for_each_pwq(pwq, wq) {
raw_spin_lock_irqsave(&pwq->pool->lock, flags);
- if (pwq->nr_active || !list_empty(&pwq->delayed_works))
+ if (pwq->nr_active || !list_empty(&pwq->inactive_works))
show_pwq(pwq);
raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
/*
@@ -5168,10 +5198,10 @@ long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
{
long ret = -ENODEV;
- get_online_cpus();
+ cpus_read_lock();
if (cpu_online(cpu))
ret = work_on_cpu(cpu, fn, arg);
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(work_on_cpu_safe);
@@ -5183,7 +5213,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu_safe);
* freeze_workqueues_begin - begin freezing workqueues
*
* Start freezing workqueues. After this function returns, all freezable
- * workqueues will queue new works to their delayed_works list instead of
+ * workqueues will queue new works to their inactive_works list instead of
* pool->worklist.
*
* CONTEXT:
@@ -5331,7 +5361,7 @@ static int workqueue_apply_unbound_cpumask(void)
* the affinity of all unbound workqueues. This function check the @cpumask
* and apply it to all unbound workqueues and updates all pwqs of them.
*
- * Retun: 0 - Success
+ * Return: 0 - Success
* -EINVAL - Invalid @cpumask
* -ENOMEM - Failed to allocate memory for attrs or pwqs.
*/
@@ -5443,7 +5473,7 @@ static ssize_t wq_pool_ids_show(struct device *dev,
const char *delim = "";
int node, written = 0;
- get_online_cpus();
+ cpus_read_lock();
rcu_read_lock();
for_each_node(node) {
written += scnprintf(buf + written, PAGE_SIZE - written,
@@ -5453,7 +5483,7 @@ static ssize_t wq_pool_ids_show(struct device *dev,
}
written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
rcu_read_unlock();
- put_online_cpus();
+ cpus_read_unlock();
return written;
}
@@ -5902,6 +5932,13 @@ static void __init wq_numa_init(void)
return;
}
+ for_each_possible_cpu(cpu) {
+ if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) {
+ pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
+ return;
+ }
+ }
+
wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
BUG_ON(!wq_update_unbound_numa_attrs_buf);
@@ -5919,11 +5956,6 @@ static void __init wq_numa_init(void)
for_each_possible_cpu(cpu) {
node = cpu_to_node(cpu);
- if (WARN_ON(node == NUMA_NO_NODE)) {
- pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
- /* happens iff arch is bonkers, let's just proceed */
- return;
- }
cpumask_set_cpu(cpu, tbl[node]);
}
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 498de0e909a4..e00b1204a8e9 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -30,7 +30,8 @@ struct worker {
struct work_struct *current_work; /* L: work being processed */
work_func_t current_func; /* L: current_work's fn */
- struct pool_workqueue *current_pwq; /* L: current_work's pwq */
+ struct pool_workqueue *current_pwq; /* L: current_work's pwq */
+ unsigned int current_color; /* L: current_work's color */
struct list_head scheduled; /* L: scheduled works */
/* 64 bytes boundary on 64bit, 32 on 32bit */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 5ddd575159fb..12b805dabbc9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1235,7 +1235,7 @@ config PROVE_LOCKING
depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
select LOCKDEP
select DEBUG_SPINLOCK
- select DEBUG_MUTEXES
+ select DEBUG_MUTEXES if !PREEMPT_RT
select DEBUG_RT_MUTEXES if RT_MUTEXES
select DEBUG_RWSEMS
select DEBUG_WW_MUTEX_SLOWPATH
@@ -1299,7 +1299,7 @@ config LOCK_STAT
depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
select LOCKDEP
select DEBUG_SPINLOCK
- select DEBUG_MUTEXES
+ select DEBUG_MUTEXES if !PREEMPT_RT
select DEBUG_RT_MUTEXES if RT_MUTEXES
select DEBUG_LOCK_ALLOC
default n
@@ -1335,7 +1335,7 @@ config DEBUG_SPINLOCK
config DEBUG_MUTEXES
bool "Mutex debugging: basic checks"
- depends on DEBUG_KERNEL
+ depends on DEBUG_KERNEL && !PREEMPT_RT
help
This feature allows mutex semantics violations to be detected and
reported.
@@ -1345,7 +1345,8 @@ config DEBUG_WW_MUTEX_SLOWPATH
depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
select DEBUG_LOCK_ALLOC
select DEBUG_SPINLOCK
- select DEBUG_MUTEXES
+ select DEBUG_MUTEXES if !PREEMPT_RT
+ select DEBUG_RT_MUTEXES if PREEMPT_RT
help
This feature enables slowpath testing for w/w mutex users by
injecting additional -EDEADLK wound/backoff cases. Together with
@@ -1368,7 +1369,7 @@ config DEBUG_LOCK_ALLOC
bool "Lock debugging: detect incorrect freeing of live locks"
depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
select DEBUG_SPINLOCK
- select DEBUG_MUTEXES
+ select DEBUG_MUTEXES if !PREEMPT_RT
select DEBUG_RT_MUTEXES if RT_MUTEXES
select LOCKDEP
help
@@ -1679,33 +1680,6 @@ config DEBUG_WQ_FORCE_RR_CPU
feature by default. When enabled, memory and cache locality will
be impacted.
-config DEBUG_BLOCK_EXT_DEVT
- bool "Force extended block device numbers and spread them"
- depends on DEBUG_KERNEL
- depends on BLOCK
- default n
- help
- BIG FAT WARNING: ENABLING THIS OPTION MIGHT BREAK BOOTING ON
- SOME DISTRIBUTIONS. DO NOT ENABLE THIS UNLESS YOU KNOW WHAT
- YOU ARE DOING. Distros, please enable this and fix whatever
- is broken.
-
- Conventionally, block device numbers are allocated from
- predetermined contiguous area. However, extended block area
- may introduce non-contiguous block device numbers. This
- option forces most block device numbers to be allocated from
- the extended space and spreads them to discover kernel or
- userland code paths which assume predetermined contiguous
- device number allocation.
-
- Note that turning on this debug option shuffles all the
- device numbers for all IDE and SCSI devices including libata
- ones, so root partition specified using device number
- directly (via rdev or root=MAJ:MIN) won't work anymore.
- Textual device names (root=/dev/sdXn) will continue to work.
-
- Say N if you are unsure.
-
config CPU_HOTPLUG_STATE_CONTROL
bool "Enable CPU hotplug state control"
depends on DEBUG_KERNEL
@@ -1971,6 +1945,13 @@ config FAIL_MMC_REQUEST
and to test how the mmc host driver handles retries from
the block device.
+config FAIL_SUNRPC
+ bool "Fault-injection capability for SunRPC"
+ depends on FAULT_INJECTION_DEBUG_FS && SUNRPC_DEBUG
+ help
+ Provide fault-injection capability for SunRPC and
+ its consumers.
+
config FAULT_INJECTION_STACKTRACE_FILTER
bool "stacktrace filter for fault-injection capabilities"
depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 14c032de276e..545ccbddf6a1 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -128,3 +128,6 @@ config CRYPTO_LIB_CHACHA20POLY1305
config CRYPTO_LIB_SHA256
tristate
+
+config CRYPTO_LIB_SM4
+ tristate
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 3a435629d9ce..73205ed269ba 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -38,6 +38,9 @@ libpoly1305-y += poly1305.o
obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
libsha256-y := sha256.o
+obj-$(CONFIG_CRYPTO_LIB_SM4) += libsm4.o
+libsm4-y := sm4.o
+
ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y)
libblake2s-y += blake2s-selftest.o
libchacha20poly1305-y += chacha20poly1305-selftest.o
diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c
index c64ac8bfb6a9..4055aa593ec4 100644
--- a/lib/crypto/blake2s.c
+++ b/lib/crypto/blake2s.c
@@ -73,7 +73,7 @@ void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen,
}
EXPORT_SYMBOL(blake2s256_hmac);
-static int __init mod_init(void)
+static int __init blake2s_mod_init(void)
{
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
WARN_ON(!blake2s_selftest()))
@@ -81,12 +81,12 @@ static int __init mod_init(void)
return 0;
}
-static void __exit mod_exit(void)
+static void __exit blake2s_mod_exit(void)
{
}
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(blake2s_mod_init);
+module_exit(blake2s_mod_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("BLAKE2s hash function");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
index c2fcdb98cc02..fa6a9440fc95 100644
--- a/lib/crypto/chacha20poly1305.c
+++ b/lib/crypto/chacha20poly1305.c
@@ -354,7 +354,7 @@ bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len
}
EXPORT_SYMBOL(chacha20poly1305_decrypt_sg_inplace);
-static int __init mod_init(void)
+static int __init chacha20poly1305_init(void)
{
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
WARN_ON(!chacha20poly1305_selftest()))
@@ -362,12 +362,12 @@ static int __init mod_init(void)
return 0;
}
-static void __exit mod_exit(void)
+static void __exit chacha20poly1305_exit(void)
{
}
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(chacha20poly1305_init);
+module_exit(chacha20poly1305_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c
index fb29739e8c29..064b352c6907 100644
--- a/lib/crypto/curve25519.c
+++ b/lib/crypto/curve25519.c
@@ -13,7 +13,7 @@
#include <linux/module.h>
#include <linux/init.h>
-static int __init mod_init(void)
+static int __init curve25519_init(void)
{
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
WARN_ON(!curve25519_selftest()))
@@ -21,12 +21,12 @@ static int __init mod_init(void)
return 0;
}
-static void __exit mod_exit(void)
+static void __exit curve25519_exit(void)
{
}
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(curve25519_init);
+module_exit(curve25519_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Curve25519 scalar multiplication");
diff --git a/lib/crypto/sm4.c b/lib/crypto/sm4.c
new file mode 100644
index 000000000000..633b59fed9db
--- /dev/null
+++ b/lib/crypto/sm4.c
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4, as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (C) 2018 ARM Limited or its affiliates.
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/module.h>
+#include <asm/unaligned.h>
+#include <crypto/sm4.h>
+
+static const u32 fk[4] = {
+ 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
+};
+
+static const u32 __cacheline_aligned ck[32] = {
+ 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
+ 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
+ 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
+ 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
+ 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
+ 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
+ 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
+ 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
+};
+
+static const u8 __cacheline_aligned sbox[256] = {
+ 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
+ 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
+ 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
+ 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
+ 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
+ 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
+ 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
+ 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
+ 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
+ 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
+ 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
+ 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
+ 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
+ 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
+ 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
+ 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
+ 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
+ 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
+ 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
+ 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
+ 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
+ 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
+ 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
+ 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
+ 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
+ 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
+ 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
+ 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
+ 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
+ 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
+ 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
+ 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
+};
+
+static inline u32 sm4_t_non_lin_sub(u32 x)
+{
+ u32 out;
+
+ out = (u32)sbox[x & 0xff];
+ out |= (u32)sbox[(x >> 8) & 0xff] << 8;
+ out |= (u32)sbox[(x >> 16) & 0xff] << 16;
+ out |= (u32)sbox[(x >> 24) & 0xff] << 24;
+
+ return out;
+}
+
+static inline u32 sm4_key_lin_sub(u32 x)
+{
+ return x ^ rol32(x, 13) ^ rol32(x, 23);
+}
+
+static inline u32 sm4_enc_lin_sub(u32 x)
+{
+ return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24);
+}
+
+static inline u32 sm4_key_sub(u32 x)
+{
+ return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static inline u32 sm4_enc_sub(u32 x)
+{
+ return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static inline u32 sm4_round(u32 x0, u32 x1, u32 x2, u32 x3, u32 rk)
+{
+ return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk);
+}
+
+
+/**
+ * sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016
+ * @ctx: The location where the computed key will be stored.
+ * @in_key: The supplied key.
+ * @key_len: The length of the supplied key.
+ *
+ * Returns 0 on success. The function fails only if an invalid key size (or
+ * pointer) is supplied.
+ */
+int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key,
+ unsigned int key_len)
+{
+ u32 rk[4];
+ const u32 *key = (u32 *)in_key;
+ int i;
+
+ if (key_len != SM4_KEY_SIZE)
+ return -EINVAL;
+
+ rk[0] = get_unaligned_be32(&key[0]) ^ fk[0];
+ rk[1] = get_unaligned_be32(&key[1]) ^ fk[1];
+ rk[2] = get_unaligned_be32(&key[2]) ^ fk[2];
+ rk[3] = get_unaligned_be32(&key[3]) ^ fk[3];
+
+ for (i = 0; i < 32; i += 4) {
+ rk[0] ^= sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]);
+ rk[1] ^= sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]);
+ rk[2] ^= sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]);
+ rk[3] ^= sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]);
+
+ ctx->rkey_enc[i + 0] = rk[0];
+ ctx->rkey_enc[i + 1] = rk[1];
+ ctx->rkey_enc[i + 2] = rk[2];
+ ctx->rkey_enc[i + 3] = rk[3];
+ ctx->rkey_dec[31 - 0 - i] = rk[0];
+ ctx->rkey_dec[31 - 1 - i] = rk[1];
+ ctx->rkey_dec[31 - 2 - i] = rk[2];
+ ctx->rkey_dec[31 - 3 - i] = rk[3];
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(sm4_expandkey);
+
+/**
+ * sm4_crypt_block - Encrypt or decrypt a single SM4 block
+ * @rk: The rkey_enc for encrypt or rkey_dec for decrypt
+ * @out: Buffer to store output data
+ * @in: Buffer containing the input data
+ */
+void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in)
+{
+ u32 x[4], i;
+
+ x[0] = get_unaligned_be32(in + 0 * 4);
+ x[1] = get_unaligned_be32(in + 1 * 4);
+ x[2] = get_unaligned_be32(in + 2 * 4);
+ x[3] = get_unaligned_be32(in + 3 * 4);
+
+ for (i = 0; i < 32; i += 4) {
+ x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]);
+ x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]);
+ x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]);
+ x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]);
+ }
+
+ put_unaligned_be32(x[3 - 0], out + 0 * 4);
+ put_unaligned_be32(x[3 - 1], out + 1 * 4);
+ put_unaligned_be32(x[3 - 2], out + 2 * 4);
+ put_unaligned_be32(x[3 - 3], out + 3 * 4);
+}
+EXPORT_SYMBOL_GPL(sm4_crypt_block);
+
+MODULE_DESCRIPTION("Generic SM4 library");
+MODULE_LICENSE("GPL v2");
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 9e14ae02306b..6946f8e204e3 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -557,7 +557,12 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
struct debug_obj *obj;
unsigned long flags;
- fill_pool();
+ /*
+ * On RT enabled kernels the pool refill must happen in preemptible
+ * context:
+ */
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible())
+ fill_pool();
db = get_bucket((unsigned long) addr);
diff --git a/lib/devmem_is_allowed.c b/lib/devmem_is_allowed.c
index c0d67c541849..60be9e24bd57 100644
--- a/lib/devmem_is_allowed.c
+++ b/lib/devmem_is_allowed.c
@@ -19,7 +19,7 @@
*/
int devmem_is_allowed(unsigned long pfn)
{
- if (iomem_is_exclusive(pfn << PAGE_SHIFT))
+ if (iomem_is_exclusive(PFN_PHYS(pfn)))
return 0;
if (!page_is_ram(pfn))
return 1;
diff --git a/lib/linear_ranges.c b/lib/linear_ranges.c
index ced5c15d3f04..a1a7dfa881de 100644
--- a/lib/linear_ranges.c
+++ b/lib/linear_ranges.c
@@ -241,5 +241,36 @@ int linear_range_get_selector_high(const struct linear_range *r,
}
EXPORT_SYMBOL_GPL(linear_range_get_selector_high);
+/**
+ * linear_range_get_selector_within - return linear range selector for value
+ * @r: pointer to linear range where selector is looked from
+ * @val: value for which the selector is searched
+ * @selector: address where found selector value is updated
+ *
+ * Return selector for which range value is closest match for given
+ * input value. Value is matching if it is equal or lower than given
+ * value. But return maximum selector if given value is higher than
+ * maximum value.
+ */
+void linear_range_get_selector_within(const struct linear_range *r,
+ unsigned int val, unsigned int *selector)
+{
+ if (r->min > val) {
+ *selector = r->min_sel;
+ return;
+ }
+
+ if (linear_range_get_max_value(r) < val) {
+ *selector = r->max_sel;
+ return;
+ }
+
+ if (r->step == 0)
+ *selector = r->min_sel;
+ else
+ *selector = (val - r->min) / r->step + r->min_sel;
+}
+EXPORT_SYMBOL_GPL(linear_range_get_selector_within);
+
MODULE_DESCRIPTION("linear-ranges helper");
MODULE_LICENSE("GPL");
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
index 9a75ca3f7edf..bc81419f400c 100644
--- a/lib/mpi/mpiutil.c
+++ b/lib/mpi/mpiutil.c
@@ -148,7 +148,7 @@ int mpi_resize(MPI a, unsigned nlimbs)
return 0; /* no need to do it */
if (a->d) {
- p = kmalloc_array(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL);
+ p = kcalloc(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL);
if (!p)
return -ENOMEM;
memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t));
diff --git a/lib/once.c b/lib/once.c
index 8b7d6235217e..59149bf3bfb4 100644
--- a/lib/once.c
+++ b/lib/once.c
@@ -3,10 +3,12 @@
#include <linux/spinlock.h>
#include <linux/once.h>
#include <linux/random.h>
+#include <linux/module.h>
struct once_work {
struct work_struct work;
struct static_key_true *key;
+ struct module *module;
};
static void once_deferred(struct work_struct *w)
@@ -16,10 +18,11 @@ static void once_deferred(struct work_struct *w)
work = container_of(w, struct once_work, work);
BUG_ON(!static_key_enabled(work->key));
static_branch_disable(work->key);
+ module_put(work->module);
kfree(work);
}
-static void once_disable_jump(struct static_key_true *key)
+static void once_disable_jump(struct static_key_true *key, struct module *mod)
{
struct once_work *w;
@@ -29,6 +32,8 @@ static void once_disable_jump(struct static_key_true *key)
INIT_WORK(&w->work, once_deferred);
w->key = key;
+ w->module = mod;
+ __module_get(mod);
schedule_work(&w->work);
}
@@ -53,11 +58,11 @@ bool __do_once_start(bool *done, unsigned long *flags)
EXPORT_SYMBOL(__do_once_start);
void __do_once_done(bool *done, struct static_key_true *once_key,
- unsigned long *flags)
+ unsigned long *flags, struct module *mod)
__releases(once_lock)
{
*done = true;
spin_unlock_irqrestore(&once_lock, *flags);
- once_disable_jump(once_key);
+ once_disable_jump(once_key, mod);
}
EXPORT_SYMBOL(__do_once_done);
diff --git a/lib/string.c b/lib/string.c
index 77bd0b1d3296..b2de45a581f4 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -29,6 +29,7 @@
#include <linux/errno.h>
#include <linux/slab.h>
+#include <asm/unaligned.h>
#include <asm/byteorder.h>
#include <asm/word-at-a-time.h>
#include <asm/page.h>
@@ -935,6 +936,21 @@ __visible int memcmp(const void *cs, const void *ct, size_t count)
const unsigned char *su1, *su2;
int res = 0;
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ if (count >= sizeof(unsigned long)) {
+ const unsigned long *u1 = cs;
+ const unsigned long *u2 = ct;
+ do {
+ if (get_unaligned(u1) != get_unaligned(u2))
+ break;
+ u1++;
+ u2++;
+ count -= sizeof(unsigned long);
+ } while (count >= sizeof(unsigned long));
+ cs = u1;
+ ct = u2;
+ }
+#endif
for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
if ((res = *su1 - *su2) != 0)
break;
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index d500320778c7..830a18ecffc8 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -461,6 +461,41 @@ static int bpf_fill_stxdw(struct bpf_test *self)
return __bpf_fill_stxdw(self, BPF_DW);
}
+static int bpf_fill_long_jmp(struct bpf_test *self)
+{
+ unsigned int len = BPF_MAXINSNS;
+ struct bpf_insn *insn;
+ int i;
+
+ insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
+ if (!insn)
+ return -ENOMEM;
+
+ insn[0] = BPF_ALU64_IMM(BPF_MOV, R0, 1);
+ insn[1] = BPF_JMP_IMM(BPF_JEQ, R0, 1, len - 2 - 1);
+
+ /*
+ * Fill with a complex 64-bit operation that expands to a lot of
+ * instructions on 32-bit JITs. The large jump offset can then
+ * overflow the conditional branch field size, triggering a branch
+ * conversion mechanism in some JITs.
+ *
+ * Note: BPF_MAXINSNS of ALU64 MUL is enough to trigger such branch
+ * conversion on the 32-bit MIPS JIT. For other JITs, the instruction
+ * count and/or operation may need to be modified to trigger the
+ * branch conversion.
+ */
+ for (i = 2; i < len - 1; i++)
+ insn[i] = BPF_ALU64_IMM(BPF_MUL, R0, (i << 16) + i);
+
+ insn[len - 1] = BPF_EXIT_INSN();
+
+ self->u.ptr.insns = insn;
+ self->u.ptr.len = len;
+
+ return 0;
+}
+
static struct bpf_test tests[] = {
{
"TAX",
@@ -1917,6 +1952,163 @@ static struct bpf_test tests[] = {
{ { 0, -1 } }
},
{
+ /*
+ * Register (non-)clobbering test, in the case where a 32-bit
+ * JIT implements complex ALU64 operations via function calls.
+ * If so, the function call must be invisible in the eBPF
+ * registers. The JIT must then save and restore relevant
+ * registers during the call. The following tests check that
+ * the eBPF registers retain their values after such a call.
+ */
+ "INT: Register clobbering, R1 updated",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_ALU32_IMM(BPF_MOV, R1, 123456789),
+ BPF_ALU32_IMM(BPF_MOV, R2, 2),
+ BPF_ALU32_IMM(BPF_MOV, R3, 3),
+ BPF_ALU32_IMM(BPF_MOV, R4, 4),
+ BPF_ALU32_IMM(BPF_MOV, R5, 5),
+ BPF_ALU32_IMM(BPF_MOV, R6, 6),
+ BPF_ALU32_IMM(BPF_MOV, R7, 7),
+ BPF_ALU32_IMM(BPF_MOV, R8, 8),
+ BPF_ALU32_IMM(BPF_MOV, R9, 9),
+ BPF_ALU64_IMM(BPF_DIV, R1, 123456789),
+ BPF_JMP_IMM(BPF_JNE, R0, 0, 10),
+ BPF_JMP_IMM(BPF_JNE, R1, 1, 9),
+ BPF_JMP_IMM(BPF_JNE, R2, 2, 8),
+ BPF_JMP_IMM(BPF_JNE, R3, 3, 7),
+ BPF_JMP_IMM(BPF_JNE, R4, 4, 6),
+ BPF_JMP_IMM(BPF_JNE, R5, 5, 5),
+ BPF_JMP_IMM(BPF_JNE, R6, 6, 4),
+ BPF_JMP_IMM(BPF_JNE, R7, 7, 3),
+ BPF_JMP_IMM(BPF_JNE, R8, 8, 2),
+ BPF_JMP_IMM(BPF_JNE, R9, 9, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+ {
+ "INT: Register clobbering, R2 updated",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_ALU32_IMM(BPF_MOV, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R2, 2 * 123456789),
+ BPF_ALU32_IMM(BPF_MOV, R3, 3),
+ BPF_ALU32_IMM(BPF_MOV, R4, 4),
+ BPF_ALU32_IMM(BPF_MOV, R5, 5),
+ BPF_ALU32_IMM(BPF_MOV, R6, 6),
+ BPF_ALU32_IMM(BPF_MOV, R7, 7),
+ BPF_ALU32_IMM(BPF_MOV, R8, 8),
+ BPF_ALU32_IMM(BPF_MOV, R9, 9),
+ BPF_ALU64_IMM(BPF_DIV, R2, 123456789),
+ BPF_JMP_IMM(BPF_JNE, R0, 0, 10),
+ BPF_JMP_IMM(BPF_JNE, R1, 1, 9),
+ BPF_JMP_IMM(BPF_JNE, R2, 2, 8),
+ BPF_JMP_IMM(BPF_JNE, R3, 3, 7),
+ BPF_JMP_IMM(BPF_JNE, R4, 4, 6),
+ BPF_JMP_IMM(BPF_JNE, R5, 5, 5),
+ BPF_JMP_IMM(BPF_JNE, R6, 6, 4),
+ BPF_JMP_IMM(BPF_JNE, R7, 7, 3),
+ BPF_JMP_IMM(BPF_JNE, R8, 8, 2),
+ BPF_JMP_IMM(BPF_JNE, R9, 9, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+ {
+ /*
+ * Test 32-bit JITs that implement complex ALU64 operations as
+ * function calls R0 = f(R1, R2), and must re-arrange operands.
+ */
+#define NUMER 0xfedcba9876543210ULL
+#define DENOM 0x0123456789abcdefULL
+ "ALU64_DIV X: Operand register permutations",
+ .u.insns_int = {
+ /* R0 / R2 */
+ BPF_LD_IMM64(R0, NUMER),
+ BPF_LD_IMM64(R2, DENOM),
+ BPF_ALU64_REG(BPF_DIV, R0, R2),
+ BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1),
+ BPF_EXIT_INSN(),
+ /* R1 / R0 */
+ BPF_LD_IMM64(R1, NUMER),
+ BPF_LD_IMM64(R0, DENOM),
+ BPF_ALU64_REG(BPF_DIV, R1, R0),
+ BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1),
+ BPF_EXIT_INSN(),
+ /* R0 / R1 */
+ BPF_LD_IMM64(R0, NUMER),
+ BPF_LD_IMM64(R1, DENOM),
+ BPF_ALU64_REG(BPF_DIV, R0, R1),
+ BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1),
+ BPF_EXIT_INSN(),
+ /* R2 / R0 */
+ BPF_LD_IMM64(R2, NUMER),
+ BPF_LD_IMM64(R0, DENOM),
+ BPF_ALU64_REG(BPF_DIV, R2, R0),
+ BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1),
+ BPF_EXIT_INSN(),
+ /* R2 / R1 */
+ BPF_LD_IMM64(R2, NUMER),
+ BPF_LD_IMM64(R1, DENOM),
+ BPF_ALU64_REG(BPF_DIV, R2, R1),
+ BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1),
+ BPF_EXIT_INSN(),
+ /* R1 / R2 */
+ BPF_LD_IMM64(R1, NUMER),
+ BPF_LD_IMM64(R2, DENOM),
+ BPF_ALU64_REG(BPF_DIV, R1, R2),
+ BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1),
+ BPF_EXIT_INSN(),
+ /* R1 / R1 */
+ BPF_LD_IMM64(R1, NUMER),
+ BPF_ALU64_REG(BPF_DIV, R1, R1),
+ BPF_JMP_IMM(BPF_JEQ, R1, 1, 1),
+ BPF_EXIT_INSN(),
+ /* R2 / R2 */
+ BPF_LD_IMM64(R2, DENOM),
+ BPF_ALU64_REG(BPF_DIV, R2, R2),
+ BPF_JMP_IMM(BPF_JEQ, R2, 1, 1),
+ BPF_EXIT_INSN(),
+ /* R3 / R4 */
+ BPF_LD_IMM64(R3, NUMER),
+ BPF_LD_IMM64(R4, DENOM),
+ BPF_ALU64_REG(BPF_DIV, R3, R4),
+ BPF_JMP_IMM(BPF_JEQ, R3, NUMER / DENOM, 1),
+ BPF_EXIT_INSN(),
+ /* Successful return */
+ BPF_LD_IMM64(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } },
+#undef NUMER
+#undef DENOM
+ },
+#ifdef CONFIG_32BIT
+ {
+ "INT: 32-bit context pointer word order and zero-extension",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_JMP32_IMM(BPF_JEQ, R1, 0, 3),
+ BPF_ALU64_IMM(BPF_RSH, R1, 32),
+ BPF_JMP32_IMM(BPF_JNE, R1, 0, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+#endif
+ {
"check: missing ret",
.u.insns = {
BPF_STMT(BPF_LD | BPF_IMM, 1),
@@ -2361,6 +2553,48 @@ static struct bpf_test tests[] = {
{ { 0, 0x1 } },
},
{
+ "ALU_MOV_K: small negative",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123 } }
+ },
+ {
+ "ALU_MOV_K: small negative zero extension",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } }
+ },
+ {
+ "ALU_MOV_K: large negative",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123456789),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123456789 } }
+ },
+ {
+ "ALU_MOV_K: large negative zero extension",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123456789),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } }
+ },
+ {
"ALU64_MOV_K: dst = 2",
.u.insns_int = {
BPF_ALU64_IMM(BPF_MOV, R0, 2),
@@ -2412,6 +2646,48 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 0x1 } },
},
+ {
+ "ALU64_MOV_K: small negative",
+ .u.insns_int = {
+ BPF_ALU64_IMM(BPF_MOV, R0, -123),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123 } }
+ },
+ {
+ "ALU64_MOV_K: small negative sign extension",
+ .u.insns_int = {
+ BPF_ALU64_IMM(BPF_MOV, R0, -123),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xffffffff } }
+ },
+ {
+ "ALU64_MOV_K: large negative",
+ .u.insns_int = {
+ BPF_ALU64_IMM(BPF_MOV, R0, -123456789),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123456789 } }
+ },
+ {
+ "ALU64_MOV_K: large negative sign extension",
+ .u.insns_int = {
+ BPF_ALU64_IMM(BPF_MOV, R0, -123456789),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xffffffff } }
+ },
/* BPF_ALU | BPF_ADD | BPF_X */
{
"ALU_ADD_X: 1 + 2 = 3",
@@ -2967,6 +3243,31 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 2147483647 } },
},
+ {
+ "ALU64_MUL_X: 64x64 multiply, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0fedcba987654321LL),
+ BPF_LD_IMM64(R1, 0x123456789abcdef0LL),
+ BPF_ALU64_REG(BPF_MUL, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xe5618cf0 } }
+ },
+ {
+ "ALU64_MUL_X: 64x64 multiply, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0fedcba987654321LL),
+ BPF_LD_IMM64(R1, 0x123456789abcdef0LL),
+ BPF_ALU64_REG(BPF_MUL, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x2236d88f } }
+ },
/* BPF_ALU | BPF_MUL | BPF_K */
{
"ALU_MUL_K: 2 * 3 = 6",
@@ -3077,6 +3378,29 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 0x1 } },
},
+ {
+ "ALU64_MUL_K: 64x32 multiply, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_MUL, R0, 0x12345678),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xe242d208 } }
+ },
+ {
+ "ALU64_MUL_K: 64x32 multiply, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_MUL, R0, 0x12345678),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xc28f5c28 } }
+ },
/* BPF_ALU | BPF_DIV | BPF_X */
{
"ALU_DIV_X: 6 / 2 = 3",
@@ -3431,6 +3755,44 @@ static struct bpf_test tests[] = {
{ { 0, 0xffffffff } },
},
{
+ "ALU_AND_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+ BPF_ALU32_IMM(BPF_AND, R0, 15),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 4 } }
+ },
+ {
+ "ALU_AND_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xf1f2f3f4),
+ BPF_ALU32_IMM(BPF_AND, R0, 0xafbfcfdf),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xa1b2c3d4 } }
+ },
+ {
+ "ALU_AND_K: Zero extension",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0x0000000080a0c0e0LL),
+ BPF_ALU32_IMM(BPF_AND, R0, 0xf0f0f0f0),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+ {
"ALU64_AND_K: 3 & 2 = 2",
.u.insns_int = {
BPF_LD_IMM64(R0, 3),
@@ -3453,7 +3815,7 @@ static struct bpf_test tests[] = {
{ { 0, 0xffffffff } },
},
{
- "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000ffff00000000",
+ "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000000000000000",
.u.insns_int = {
BPF_LD_IMM64(R2, 0x0000ffffffff0000LL),
BPF_LD_IMM64(R3, 0x0000000000000000LL),
@@ -3469,7 +3831,7 @@ static struct bpf_test tests[] = {
{ { 0, 0x1 } },
},
{
- "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffffffff",
+ "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffff0000",
.u.insns_int = {
BPF_LD_IMM64(R2, 0x0000ffffffff0000LL),
BPF_LD_IMM64(R3, 0x0000ffffffff0000LL),
@@ -3500,6 +3862,38 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 0x1 } },
},
+ {
+ "ALU64_AND_K: Sign extension 1",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0x00000000090b0d0fLL),
+ BPF_ALU64_IMM(BPF_AND, R0, 0x0f0f0f0f),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+ {
+ "ALU64_AND_K: Sign extension 2",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0x0123456780a0c0e0LL),
+ BPF_ALU64_IMM(BPF_AND, R0, 0xf0f0f0f0),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
/* BPF_ALU | BPF_OR | BPF_X */
{
"ALU_OR_X: 1 | 2 = 3",
@@ -3573,6 +3967,44 @@ static struct bpf_test tests[] = {
{ { 0, 0xffffffff } },
},
{
+ "ALU_OR_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+ BPF_ALU32_IMM(BPF_OR, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x01020305 } }
+ },
+ {
+ "ALU_OR_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+ BPF_ALU32_IMM(BPF_OR, R0, 0xa0b0c0d0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xa1b2c3d4 } }
+ },
+ {
+ "ALU_OR_K: Zero extension",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0x00000000f9fbfdffLL),
+ BPF_ALU32_IMM(BPF_OR, R0, 0xf0f0f0f0),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+ {
"ALU64_OR_K: 1 | 2 = 3",
.u.insns_int = {
BPF_LD_IMM64(R0, 1),
@@ -3595,7 +4027,7 @@ static struct bpf_test tests[] = {
{ { 0, 0xffffffff } },
},
{
- "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffff00000000",
+ "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffffffff0000",
.u.insns_int = {
BPF_LD_IMM64(R2, 0x0000ffffffff0000LL),
BPF_LD_IMM64(R3, 0x0000ffffffff0000LL),
@@ -3642,6 +4074,38 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 0x1 } },
},
+ {
+ "ALU64_OR_K: Sign extension 1",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0x012345678fafcfefLL),
+ BPF_ALU64_IMM(BPF_OR, R0, 0x0f0f0f0f),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+ {
+ "ALU64_OR_K: Sign extension 2",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0xfffffffff9fbfdffLL),
+ BPF_ALU64_IMM(BPF_OR, R0, 0xf0f0f0f0),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
/* BPF_ALU | BPF_XOR | BPF_X */
{
"ALU_XOR_X: 5 ^ 6 = 3",
@@ -3715,6 +4179,44 @@ static struct bpf_test tests[] = {
{ { 0, 0xfffffffe } },
},
{
+ "ALU_XOR_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+ BPF_ALU32_IMM(BPF_XOR, R0, 15),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x0102030b } }
+ },
+ {
+ "ALU_XOR_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xf1f2f3f4),
+ BPF_ALU32_IMM(BPF_XOR, R0, 0xafbfcfdf),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x5e4d3c2b } }
+ },
+ {
+ "ALU_XOR_K: Zero extension",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0x00000000795b3d1fLL),
+ BPF_ALU32_IMM(BPF_XOR, R0, 0xf0f0f0f0),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+ {
"ALU64_XOR_K: 5 ^ 6 = 3",
.u.insns_int = {
BPF_LD_IMM64(R0, 5),
@@ -3726,7 +4228,7 @@ static struct bpf_test tests[] = {
{ { 0, 3 } },
},
{
- "ALU64_XOR_K: 1 & 0xffffffff = 0xfffffffe",
+ "ALU64_XOR_K: 1 ^ 0xffffffff = 0xfffffffe",
.u.insns_int = {
BPF_LD_IMM64(R0, 1),
BPF_ALU64_IMM(BPF_XOR, R0, 0xffffffff),
@@ -3784,6 +4286,38 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 0x1 } },
},
+ {
+ "ALU64_XOR_K: Sign extension 1",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0x0123456786a4c2e0LL),
+ BPF_ALU64_IMM(BPF_XOR, R0, 0x0f0f0f0f),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+ {
+ "ALU64_XOR_K: Sign extension 2",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0xfedcba98795b3d1fLL),
+ BPF_ALU64_IMM(BPF_XOR, R0, 0xf0f0f0f0),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
/* BPF_ALU | BPF_LSH | BPF_X */
{
"ALU_LSH_X: 1 << 1 = 2",
@@ -3810,6 +4344,18 @@ static struct bpf_test tests[] = {
{ { 0, 0x80000000 } },
},
{
+ "ALU_LSH_X: 0x12345678 << 12 = 0x45678000",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+ BPF_ALU32_IMM(BPF_MOV, R1, 12),
+ BPF_ALU32_REG(BPF_LSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x45678000 } }
+ },
+ {
"ALU64_LSH_X: 1 << 1 = 2",
.u.insns_int = {
BPF_LD_IMM64(R0, 1),
@@ -3833,6 +4379,106 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 0x80000000 } },
},
+ {
+ "ALU64_LSH_X: Shift < 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 12),
+ BPF_ALU64_REG(BPF_LSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xbcdef000 } }
+ },
+ {
+ "ALU64_LSH_X: Shift < 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 12),
+ BPF_ALU64_REG(BPF_LSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x3456789a } }
+ },
+ {
+ "ALU64_LSH_X: Shift > 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 36),
+ BPF_ALU64_REG(BPF_LSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } }
+ },
+ {
+ "ALU64_LSH_X: Shift > 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 36),
+ BPF_ALU64_REG(BPF_LSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x9abcdef0 } }
+ },
+ {
+ "ALU64_LSH_X: Shift == 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 32),
+ BPF_ALU64_REG(BPF_LSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } }
+ },
+ {
+ "ALU64_LSH_X: Shift == 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 32),
+ BPF_ALU64_REG(BPF_LSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x89abcdef } }
+ },
+ {
+ "ALU64_LSH_X: Zero shift, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0),
+ BPF_ALU64_REG(BPF_LSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x89abcdef } }
+ },
+ {
+ "ALU64_LSH_X: Zero shift, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0),
+ BPF_ALU64_REG(BPF_LSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x01234567 } }
+ },
/* BPF_ALU | BPF_LSH | BPF_K */
{
"ALU_LSH_K: 1 << 1 = 2",
@@ -3857,6 +4503,28 @@ static struct bpf_test tests[] = {
{ { 0, 0x80000000 } },
},
{
+ "ALU_LSH_K: 0x12345678 << 12 = 0x45678000",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+ BPF_ALU32_IMM(BPF_LSH, R0, 12),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x45678000 } }
+ },
+ {
+ "ALU_LSH_K: 0x12345678 << 0 = 0x12345678",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+ BPF_ALU32_IMM(BPF_LSH, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x12345678 } }
+ },
+ {
"ALU64_LSH_K: 1 << 1 = 2",
.u.insns_int = {
BPF_LD_IMM64(R0, 1),
@@ -3878,6 +4546,86 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 0x80000000 } },
},
+ {
+ "ALU64_LSH_K: Shift < 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_LSH, R0, 12),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xbcdef000 } }
+ },
+ {
+ "ALU64_LSH_K: Shift < 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_LSH, R0, 12),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x3456789a } }
+ },
+ {
+ "ALU64_LSH_K: Shift > 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_LSH, R0, 36),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } }
+ },
+ {
+ "ALU64_LSH_K: Shift > 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_LSH, R0, 36),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x9abcdef0 } }
+ },
+ {
+ "ALU64_LSH_K: Shift == 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_LSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } }
+ },
+ {
+ "ALU64_LSH_K: Shift == 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_LSH, R0, 32),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x89abcdef } }
+ },
+ {
+ "ALU64_LSH_K: Zero shift",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_LSH, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x89abcdef } }
+ },
/* BPF_ALU | BPF_RSH | BPF_X */
{
"ALU_RSH_X: 2 >> 1 = 1",
@@ -3904,6 +4652,18 @@ static struct bpf_test tests[] = {
{ { 0, 1 } },
},
{
+ "ALU_RSH_X: 0x12345678 >> 20 = 0x123",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+ BPF_ALU32_IMM(BPF_MOV, R1, 20),
+ BPF_ALU32_REG(BPF_RSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x123 } }
+ },
+ {
"ALU64_RSH_X: 2 >> 1 = 1",
.u.insns_int = {
BPF_LD_IMM64(R0, 2),
@@ -3927,6 +4687,106 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 1 } },
},
+ {
+ "ALU64_RSH_X: Shift < 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 12),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x56789abc } }
+ },
+ {
+ "ALU64_RSH_X: Shift < 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 12),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x00081234 } }
+ },
+ {
+ "ALU64_RSH_X: Shift > 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 36),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x08123456 } }
+ },
+ {
+ "ALU64_RSH_X: Shift > 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 36),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } }
+ },
+ {
+ "ALU64_RSH_X: Shift == 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 32),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x81234567 } }
+ },
+ {
+ "ALU64_RSH_X: Shift == 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 32),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } }
+ },
+ {
+ "ALU64_RSH_X: Zero shift, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x89abcdef } }
+ },
+ {
+ "ALU64_RSH_X: Zero shift, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x81234567 } }
+ },
/* BPF_ALU | BPF_RSH | BPF_K */
{
"ALU_RSH_K: 2 >> 1 = 1",
@@ -3951,6 +4811,28 @@ static struct bpf_test tests[] = {
{ { 0, 1 } },
},
{
+ "ALU_RSH_K: 0x12345678 >> 20 = 0x123",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+ BPF_ALU32_IMM(BPF_RSH, R0, 20),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x123 } }
+ },
+ {
+ "ALU_RSH_K: 0x12345678 >> 0 = 0x12345678",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+ BPF_ALU32_IMM(BPF_RSH, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x12345678 } }
+ },
+ {
"ALU64_RSH_K: 2 >> 1 = 1",
.u.insns_int = {
BPF_LD_IMM64(R0, 2),
@@ -3972,9 +4854,101 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 1 } },
},
+ {
+ "ALU64_RSH_K: Shift < 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_RSH, R0, 12),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x56789abc } }
+ },
+ {
+ "ALU64_RSH_K: Shift < 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_RSH, R0, 12),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x00081234 } }
+ },
+ {
+ "ALU64_RSH_K: Shift > 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_RSH, R0, 36),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x08123456 } }
+ },
+ {
+ "ALU64_RSH_K: Shift > 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_RSH, R0, 36),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } }
+ },
+ {
+ "ALU64_RSH_K: Shift == 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x81234567 } }
+ },
+ {
+ "ALU64_RSH_K: Shift == 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } }
+ },
+ {
+ "ALU64_RSH_K: Zero shift",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_RSH, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x89abcdef } }
+ },
/* BPF_ALU | BPF_ARSH | BPF_X */
{
- "ALU_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+ "ALU32_ARSH_X: -1234 >> 7 = -10",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -1234),
+ BPF_ALU32_IMM(BPF_MOV, R1, 7),
+ BPF_ALU32_REG(BPF_ARSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -10 } }
+ },
+ {
+ "ALU64_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
.u.insns_int = {
BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
BPF_ALU32_IMM(BPF_MOV, R1, 40),
@@ -3985,9 +4959,131 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 0xffff00ff } },
},
+ {
+ "ALU64_ARSH_X: Shift < 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 12),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x56789abc } }
+ },
+ {
+ "ALU64_ARSH_X: Shift < 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 12),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xfff81234 } }
+ },
+ {
+ "ALU64_ARSH_X: Shift > 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 36),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xf8123456 } }
+ },
+ {
+ "ALU64_ARSH_X: Shift > 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 36),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -1 } }
+ },
+ {
+ "ALU64_ARSH_X: Shift == 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 32),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x81234567 } }
+ },
+ {
+ "ALU64_ARSH_X: Shift == 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 32),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -1 } }
+ },
+ {
+ "ALU64_ARSH_X: Zero shift, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x89abcdef } }
+ },
+ {
+ "ALU64_ARSH_X: Zero shift, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x81234567 } }
+ },
/* BPF_ALU | BPF_ARSH | BPF_K */
{
- "ALU_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+ "ALU32_ARSH_K: -1234 >> 7 = -10",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -1234),
+ BPF_ALU32_IMM(BPF_ARSH, R0, 7),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -10 } }
+ },
+ {
+ "ALU32_ARSH_K: -1234 >> 0 = -1234",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -1234),
+ BPF_ALU32_IMM(BPF_ARSH, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -1234 } }
+ },
+ {
+ "ALU64_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
.u.insns_int = {
BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
BPF_ALU64_IMM(BPF_ARSH, R0, 40),
@@ -3997,6 +5093,86 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 0xffff00ff } },
},
+ {
+ "ALU64_ARSH_K: Shift < 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_RSH, R0, 12),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x56789abc } }
+ },
+ {
+ "ALU64_ARSH_K: Shift < 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_ARSH, R0, 12),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xfff81234 } }
+ },
+ {
+ "ALU64_ARSH_K: Shift > 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_ARSH, R0, 36),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xf8123456 } }
+ },
+ {
+ "ALU64_ARSH_K: Shift > 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0xf123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_ARSH, R0, 36),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -1 } }
+ },
+ {
+ "ALU64_ARSH_K: Shift == 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_ARSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x81234567 } }
+ },
+ {
+ "ALU64_ARSH_K: Shift == 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_ARSH, R0, 32),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -1 } }
+ },
+ {
+ "ALU64_ARSH_K: Zero shift",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_ARSH, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x89abcdef } }
+ },
/* BPF_ALU | BPF_NEG */
{
"ALU_NEG: -(3) = -3",
@@ -4286,8 +5462,8 @@ static struct bpf_test tests[] = {
.u.insns_int = {
BPF_LD_IMM64(R0, 0),
BPF_LD_IMM64(R1, 0xffffffffffffffffLL),
- BPF_STX_MEM(BPF_W, R10, R1, -40),
- BPF_LDX_MEM(BPF_W, R0, R10, -40),
+ BPF_STX_MEM(BPF_DW, R10, R1, -40),
+ BPF_LDX_MEM(BPF_DW, R0, R10, -40),
BPF_EXIT_INSN(),
},
INTERNAL,
@@ -4295,80 +5471,346 @@ static struct bpf_test tests[] = {
{ { 0, 0xffffffff } },
.stack_depth = 40,
},
+ {
+ "STX_MEM_DW: Store double word: first word in memory",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0),
+ BPF_LD_IMM64(R1, 0x0123456789abcdefLL),
+ BPF_STX_MEM(BPF_DW, R10, R1, -40),
+ BPF_LDX_MEM(BPF_W, R0, R10, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+#ifdef __BIG_ENDIAN
+ { { 0, 0x01234567 } },
+#else
+ { { 0, 0x89abcdef } },
+#endif
+ .stack_depth = 40,
+ },
+ {
+ "STX_MEM_DW: Store double word: second word in memory",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0),
+ BPF_LD_IMM64(R1, 0x0123456789abcdefLL),
+ BPF_STX_MEM(BPF_DW, R10, R1, -40),
+ BPF_LDX_MEM(BPF_W, R0, R10, -36),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+#ifdef __BIG_ENDIAN
+ { { 0, 0x89abcdef } },
+#else
+ { { 0, 0x01234567 } },
+#endif
+ .stack_depth = 40,
+ },
/* BPF_STX | BPF_ATOMIC | BPF_W/DW */
{
- "STX_XADD_W: Test: 0x12 + 0x10 = 0x22",
+ "STX_XADD_W: X + 1 + 1 + 1 + ...",
+ { },
+ INTERNAL,
+ { },
+ { { 0, 4134 } },
+ .fill_helper = bpf_fill_stxw,
+ },
+ {
+ "STX_XADD_DW: X + 1 + 1 + 1 + ...",
+ { },
+ INTERNAL,
+ { },
+ { { 0, 4134 } },
+ .fill_helper = bpf_fill_stxdw,
+ },
+ /*
+ * Exhaustive tests of atomic operation variants.
+ * Individual tests are expanded from template macros for all
+ * combinations of ALU operation, word size and fetching.
+ */
+#define BPF_ATOMIC_OP_TEST1(width, op, logic, old, update, result) \
+{ \
+ "BPF_ATOMIC | " #width ", " #op ": Test: " \
+ #old " " #logic " " #update " = " #result, \
+ .u.insns_int = { \
+ BPF_ALU32_IMM(BPF_MOV, R5, update), \
+ BPF_ST_MEM(width, R10, -40, old), \
+ BPF_ATOMIC_OP(width, op, R10, R5, -40), \
+ BPF_LDX_MEM(width, R0, R10, -40), \
+ BPF_EXIT_INSN(), \
+ }, \
+ INTERNAL, \
+ { }, \
+ { { 0, result } }, \
+ .stack_depth = 40, \
+}
+#define BPF_ATOMIC_OP_TEST2(width, op, logic, old, update, result) \
+{ \
+ "BPF_ATOMIC | " #width ", " #op ": Test side effects, r10: " \
+ #old " " #logic " " #update " = " #result, \
+ .u.insns_int = { \
+ BPF_ALU64_REG(BPF_MOV, R1, R10), \
+ BPF_ALU32_IMM(BPF_MOV, R0, update), \
+ BPF_ST_MEM(BPF_W, R10, -40, old), \
+ BPF_ATOMIC_OP(width, op, R10, R0, -40), \
+ BPF_ALU64_REG(BPF_MOV, R0, R10), \
+ BPF_ALU64_REG(BPF_SUB, R0, R1), \
+ BPF_EXIT_INSN(), \
+ }, \
+ INTERNAL, \
+ { }, \
+ { { 0, 0 } }, \
+ .stack_depth = 40, \
+}
+#define BPF_ATOMIC_OP_TEST3(width, op, logic, old, update, result) \
+{ \
+ "BPF_ATOMIC | " #width ", " #op ": Test side effects, r0: " \
+ #old " " #logic " " #update " = " #result, \
+ .u.insns_int = { \
+ BPF_ALU64_REG(BPF_MOV, R0, R10), \
+ BPF_ALU32_IMM(BPF_MOV, R1, update), \
+ BPF_ST_MEM(width, R10, -40, old), \
+ BPF_ATOMIC_OP(width, op, R10, R1, -40), \
+ BPF_ALU64_REG(BPF_SUB, R0, R10), \
+ BPF_EXIT_INSN(), \
+ }, \
+ INTERNAL, \
+ { }, \
+ { { 0, 0 } }, \
+ .stack_depth = 40, \
+}
+#define BPF_ATOMIC_OP_TEST4(width, op, logic, old, update, result) \
+{ \
+ "BPF_ATOMIC | " #width ", " #op ": Test fetch: " \
+ #old " " #logic " " #update " = " #result, \
+ .u.insns_int = { \
+ BPF_ALU32_IMM(BPF_MOV, R3, update), \
+ BPF_ST_MEM(width, R10, -40, old), \
+ BPF_ATOMIC_OP(width, op, R10, R3, -40), \
+ BPF_ALU64_REG(BPF_MOV, R0, R3), \
+ BPF_EXIT_INSN(), \
+ }, \
+ INTERNAL, \
+ { }, \
+ { { 0, (op) & BPF_FETCH ? old : update } }, \
+ .stack_depth = 40, \
+}
+ /* BPF_ATOMIC | BPF_W: BPF_ADD */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+ /* BPF_ATOMIC | BPF_W: BPF_ADD | BPF_FETCH */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+ /* BPF_ATOMIC | BPF_DW: BPF_ADD */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+ /* BPF_ATOMIC | BPF_DW: BPF_ADD | BPF_FETCH */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+ /* BPF_ATOMIC | BPF_W: BPF_AND */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+ /* BPF_ATOMIC | BPF_W: BPF_AND | BPF_FETCH */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+ /* BPF_ATOMIC | BPF_DW: BPF_AND */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+ /* BPF_ATOMIC | BPF_DW: BPF_AND | BPF_FETCH */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+ /* BPF_ATOMIC | BPF_W: BPF_OR */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+ /* BPF_ATOMIC | BPF_W: BPF_OR | BPF_FETCH */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+ /* BPF_ATOMIC | BPF_DW: BPF_OR */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+ /* BPF_ATOMIC | BPF_DW: BPF_OR | BPF_FETCH */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+ /* BPF_ATOMIC | BPF_W: BPF_XOR */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+ /* BPF_ATOMIC | BPF_W: BPF_XOR | BPF_FETCH */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+ /* BPF_ATOMIC | BPF_DW: BPF_XOR */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+ /* BPF_ATOMIC | BPF_DW: BPF_XOR | BPF_FETCH */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+ /* BPF_ATOMIC | BPF_W: BPF_XCHG */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+ /* BPF_ATOMIC | BPF_DW: BPF_XCHG */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+#undef BPF_ATOMIC_OP_TEST1
+#undef BPF_ATOMIC_OP_TEST2
+#undef BPF_ATOMIC_OP_TEST3
+#undef BPF_ATOMIC_OP_TEST4
+ /* BPF_ATOMIC | BPF_W, BPF_CMPXCHG */
+ {
+ "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test successful return",
+ .u.insns_int = {
+ BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567),
+ BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x01234567 } },
+ .stack_depth = 40,
+ },
+ {
+ "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test successful store",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
- BPF_ST_MEM(BPF_W, R10, -40, 0x10),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
+ BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567),
+ BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
BPF_LDX_MEM(BPF_W, R0, R10, -40),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0x22 } },
+ { { 0, 0x89abcdef } },
.stack_depth = 40,
},
{
- "STX_XADD_W: Test side-effects, r10: 0x12 + 0x10 = 0x22",
+ "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test failure return",
.u.insns_int = {
- BPF_ALU64_REG(BPF_MOV, R1, R10),
- BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
- BPF_ST_MEM(BPF_W, R10, -40, 0x10),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
- BPF_ALU64_REG(BPF_MOV, R0, R10),
- BPF_ALU64_REG(BPF_SUB, R0, R1),
+ BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x76543210),
+ BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0 } },
+ { { 0, 0x01234567 } },
.stack_depth = 40,
},
{
- "STX_XADD_W: Test side-effects, r0: 0x12 + 0x10 = 0x22",
+ "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test failure store",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
- BPF_ST_MEM(BPF_W, R10, -40, 0x10),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
+ BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x76543210),
+ BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+ BPF_LDX_MEM(BPF_W, R0, R10, -40),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0x12 } },
+ { { 0, 0x01234567 } },
.stack_depth = 40,
},
{
- "STX_XADD_W: X + 1 + 1 + 1 + ...",
+ "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test side effects",
+ .u.insns_int = {
+ BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567),
+ BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+ BPF_ALU32_REG(BPF_MOV, R0, R3),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
{ },
+ { { 0, 0x89abcdef } },
+ .stack_depth = 40,
+ },
+ /* BPF_ATOMIC | BPF_DW, BPF_CMPXCHG */
+ {
+ "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful return",
+ .u.insns_int = {
+ BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+ BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+ BPF_ALU64_REG(BPF_MOV, R0, R1),
+ BPF_STX_MEM(BPF_DW, R10, R1, -40),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+ BPF_JMP_REG(BPF_JNE, R0, R1, 1),
+ BPF_ALU64_REG(BPF_SUB, R0, R1),
+ BPF_EXIT_INSN(),
+ },
INTERNAL,
{ },
- { { 0, 4134 } },
- .fill_helper = bpf_fill_stxw,
+ { { 0, 0 } },
+ .stack_depth = 40,
},
{
- "STX_XADD_DW: Test: 0x12 + 0x10 = 0x22",
+ "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful store",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
- BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
+ BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+ BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+ BPF_ALU64_REG(BPF_MOV, R0, R1),
+ BPF_STX_MEM(BPF_DW, R10, R0, -40),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+ BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+ BPF_ALU64_REG(BPF_SUB, R0, R2),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0x22 } },
+ { { 0, 0 } },
.stack_depth = 40,
},
{
- "STX_XADD_DW: Test side-effects, r10: 0x12 + 0x10 = 0x22",
+ "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure return",
.u.insns_int = {
- BPF_ALU64_REG(BPF_MOV, R1, R10),
- BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
- BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
- BPF_ALU64_REG(BPF_MOV, R0, R10),
+ BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+ BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+ BPF_ALU64_REG(BPF_MOV, R0, R1),
+ BPF_ALU64_IMM(BPF_ADD, R0, 1),
+ BPF_STX_MEM(BPF_DW, R10, R1, -40),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+ BPF_JMP_REG(BPF_JNE, R0, R1, 1),
BPF_ALU64_REG(BPF_SUB, R0, R1),
BPF_EXIT_INSN(),
},
@@ -4378,25 +5820,552 @@ static struct bpf_test tests[] = {
.stack_depth = 40,
},
{
- "STX_XADD_DW: Test side-effects, r0: 0x12 + 0x10 = 0x22",
+ "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure store",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
- BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
+ BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+ BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+ BPF_ALU64_REG(BPF_MOV, R0, R1),
+ BPF_ALU64_IMM(BPF_ADD, R0, 1),
+ BPF_STX_MEM(BPF_DW, R10, R1, -40),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+ BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+ BPF_JMP_REG(BPF_JNE, R0, R1, 1),
+ BPF_ALU64_REG(BPF_SUB, R0, R1),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0x12 } },
+ { { 0, 0 } },
.stack_depth = 40,
},
{
- "STX_XADD_DW: X + 1 + 1 + 1 + ...",
+ "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test side effects",
+ .u.insns_int = {
+ BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+ BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+ BPF_ALU64_REG(BPF_MOV, R0, R1),
+ BPF_STX_MEM(BPF_DW, R10, R1, -40),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+ BPF_LD_IMM64(R0, 0xfecdba9876543210ULL),
+ BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+ BPF_ALU64_REG(BPF_SUB, R0, R2),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
{ },
+ { { 0, 0 } },
+ .stack_depth = 40,
+ },
+ /* BPF_JMP32 | BPF_JEQ | BPF_K */
+ {
+ "JMP32_JEQ_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 123),
+ BPF_JMP32_IMM(BPF_JEQ, R0, 321, 1),
+ BPF_JMP32_IMM(BPF_JEQ, R0, 123, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
INTERNAL,
{ },
- { { 0, 4134 } },
- .fill_helper = bpf_fill_stxdw,
+ { { 0, 123 } }
+ },
+ {
+ "JMP32_JEQ_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 12345678),
+ BPF_JMP32_IMM(BPF_JEQ, R0, 12345678 & 0xffff, 1),
+ BPF_JMP32_IMM(BPF_JEQ, R0, 12345678, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 12345678 } }
+ },
+ {
+ "JMP32_JEQ_K: negative immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_JMP32_IMM(BPF_JEQ, R0, 123, 1),
+ BPF_JMP32_IMM(BPF_JEQ, R0, -123, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123 } }
+ },
+ /* BPF_JMP32 | BPF_JEQ | BPF_X */
+ {
+ "JMP32_JEQ_X",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 1234),
+ BPF_ALU32_IMM(BPF_MOV, R1, 4321),
+ BPF_JMP32_REG(BPF_JEQ, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, 1234),
+ BPF_JMP32_REG(BPF_JEQ, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1234 } }
+ },
+ /* BPF_JMP32 | BPF_JNE | BPF_K */
+ {
+ "JMP32_JNE_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 123),
+ BPF_JMP32_IMM(BPF_JNE, R0, 123, 1),
+ BPF_JMP32_IMM(BPF_JNE, R0, 321, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 123 } }
+ },
+ {
+ "JMP32_JNE_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 12345678),
+ BPF_JMP32_IMM(BPF_JNE, R0, 12345678, 1),
+ BPF_JMP32_IMM(BPF_JNE, R0, 12345678 & 0xffff, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 12345678 } }
+ },
+ {
+ "JMP32_JNE_K: negative immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_JMP32_IMM(BPF_JNE, R0, -123, 1),
+ BPF_JMP32_IMM(BPF_JNE, R0, 123, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123 } }
+ },
+ /* BPF_JMP32 | BPF_JNE | BPF_X */
+ {
+ "JMP32_JNE_X",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 1234),
+ BPF_ALU32_IMM(BPF_MOV, R1, 1234),
+ BPF_JMP32_REG(BPF_JNE, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, 4321),
+ BPF_JMP32_REG(BPF_JNE, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1234 } }
+ },
+ /* BPF_JMP32 | BPF_JSET | BPF_K */
+ {
+ "JMP32_JSET_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_JMP32_IMM(BPF_JSET, R0, 2, 1),
+ BPF_JMP32_IMM(BPF_JSET, R0, 3, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+ {
+ "JMP32_JSET_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x40000000),
+ BPF_JMP32_IMM(BPF_JSET, R0, 0x3fffffff, 1),
+ BPF_JMP32_IMM(BPF_JSET, R0, 0x60000000, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x40000000 } }
+ },
+ {
+ "JMP32_JSET_K: negative immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_JMP32_IMM(BPF_JSET, R0, -1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123 } }
+ },
+ /* BPF_JMP32 | BPF_JSET | BPF_X */
+ {
+ "JMP32_JSET_X",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 8),
+ BPF_ALU32_IMM(BPF_MOV, R1, 7),
+ BPF_JMP32_REG(BPF_JSET, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, 8 | 2),
+ BPF_JMP32_REG(BPF_JNE, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 8 } }
+ },
+ /* BPF_JMP32 | BPF_JGT | BPF_K */
+ {
+ "JMP32_JGT_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 123),
+ BPF_JMP32_IMM(BPF_JGT, R0, 123, 1),
+ BPF_JMP32_IMM(BPF_JGT, R0, 122, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 123 } }
+ },
+ {
+ "JMP32_JGT_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+ BPF_JMP32_IMM(BPF_JGT, R0, 0xffffffff, 1),
+ BPF_JMP32_IMM(BPF_JGT, R0, 0xfffffffd, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xfffffffe } }
+ },
+ /* BPF_JMP32 | BPF_JGT | BPF_X */
+ {
+ "JMP32_JGT_X",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff),
+ BPF_JMP32_REG(BPF_JGT, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd),
+ BPF_JMP32_REG(BPF_JGT, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xfffffffe } }
+ },
+ /* BPF_JMP32 | BPF_JGE | BPF_K */
+ {
+ "JMP32_JGE_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 123),
+ BPF_JMP32_IMM(BPF_JGE, R0, 124, 1),
+ BPF_JMP32_IMM(BPF_JGE, R0, 123, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 123 } }
+ },
+ {
+ "JMP32_JGE_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+ BPF_JMP32_IMM(BPF_JGE, R0, 0xffffffff, 1),
+ BPF_JMP32_IMM(BPF_JGE, R0, 0xfffffffe, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xfffffffe } }
+ },
+ /* BPF_JMP32 | BPF_JGE | BPF_X */
+ {
+ "JMP32_JGE_X",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff),
+ BPF_JMP32_REG(BPF_JGE, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffe),
+ BPF_JMP32_REG(BPF_JGE, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xfffffffe } }
+ },
+ /* BPF_JMP32 | BPF_JLT | BPF_K */
+ {
+ "JMP32_JLT_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 123),
+ BPF_JMP32_IMM(BPF_JLT, R0, 123, 1),
+ BPF_JMP32_IMM(BPF_JLT, R0, 124, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 123 } }
+ },
+ {
+ "JMP32_JLT_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+ BPF_JMP32_IMM(BPF_JLT, R0, 0xfffffffd, 1),
+ BPF_JMP32_IMM(BPF_JLT, R0, 0xffffffff, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xfffffffe } }
+ },
+ /* BPF_JMP32 | BPF_JLT | BPF_X */
+ {
+ "JMP32_JLT_X",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd),
+ BPF_JMP32_REG(BPF_JLT, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff),
+ BPF_JMP32_REG(BPF_JLT, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xfffffffe } }
+ },
+ /* BPF_JMP32 | BPF_JLE | BPF_K */
+ {
+ "JMP32_JLE_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 123),
+ BPF_JMP32_IMM(BPF_JLE, R0, 122, 1),
+ BPF_JMP32_IMM(BPF_JLE, R0, 123, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 123 } }
+ },
+ {
+ "JMP32_JLE_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+ BPF_JMP32_IMM(BPF_JLE, R0, 0xfffffffd, 1),
+ BPF_JMP32_IMM(BPF_JLE, R0, 0xfffffffe, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xfffffffe } }
+ },
+ /* BPF_JMP32 | BPF_JLE | BPF_X */
+ {
+ "JMP32_JLE_X",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd),
+ BPF_JMP32_REG(BPF_JLE, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffe),
+ BPF_JMP32_REG(BPF_JLE, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xfffffffe } }
+ },
+ /* BPF_JMP32 | BPF_JSGT | BPF_K */
+ {
+ "JMP32_JSGT_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_JMP32_IMM(BPF_JSGT, R0, -123, 1),
+ BPF_JMP32_IMM(BPF_JSGT, R0, -124, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123 } }
+ },
+ {
+ "JMP32_JSGT_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+ BPF_JMP32_IMM(BPF_JSGT, R0, -12345678, 1),
+ BPF_JMP32_IMM(BPF_JSGT, R0, -12345679, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -12345678 } }
+ },
+ /* BPF_JMP32 | BPF_JSGT | BPF_X */
+ {
+ "JMP32_JSGT_X",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+ BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+ BPF_JMP32_REG(BPF_JSGT, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, -12345679),
+ BPF_JMP32_REG(BPF_JSGT, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -12345678 } }
+ },
+ /* BPF_JMP32 | BPF_JSGE | BPF_K */
+ {
+ "JMP32_JSGE_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_JMP32_IMM(BPF_JSGE, R0, -122, 1),
+ BPF_JMP32_IMM(BPF_JSGE, R0, -123, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123 } }
+ },
+ {
+ "JMP32_JSGE_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+ BPF_JMP32_IMM(BPF_JSGE, R0, -12345677, 1),
+ BPF_JMP32_IMM(BPF_JSGE, R0, -12345678, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -12345678 } }
+ },
+ /* BPF_JMP32 | BPF_JSGE | BPF_X */
+ {
+ "JMP32_JSGE_X",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+ BPF_ALU32_IMM(BPF_MOV, R1, -12345677),
+ BPF_JMP32_REG(BPF_JSGE, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+ BPF_JMP32_REG(BPF_JSGE, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -12345678 } }
+ },
+ /* BPF_JMP32 | BPF_JSLT | BPF_K */
+ {
+ "JMP32_JSLT_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_JMP32_IMM(BPF_JSLT, R0, -123, 1),
+ BPF_JMP32_IMM(BPF_JSLT, R0, -122, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123 } }
+ },
+ {
+ "JMP32_JSLT_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+ BPF_JMP32_IMM(BPF_JSLT, R0, -12345678, 1),
+ BPF_JMP32_IMM(BPF_JSLT, R0, -12345677, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -12345678 } }
+ },
+ /* BPF_JMP32 | BPF_JSLT | BPF_X */
+ {
+ "JMP32_JSLT_X",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+ BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+ BPF_JMP32_REG(BPF_JSLT, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, -12345677),
+ BPF_JMP32_REG(BPF_JSLT, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -12345678 } }
+ },
+ /* BPF_JMP32 | BPF_JSLE | BPF_K */
+ {
+ "JMP32_JSLE_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_JMP32_IMM(BPF_JSLE, R0, -124, 1),
+ BPF_JMP32_IMM(BPF_JSLE, R0, -123, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123 } }
+ },
+ {
+ "JMP32_JSLE_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+ BPF_JMP32_IMM(BPF_JSLE, R0, -12345679, 1),
+ BPF_JMP32_IMM(BPF_JSLE, R0, -12345678, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -12345678 } }
+ },
+ /* BPF_JMP32 | BPF_JSLE | BPF_K */
+ {
+ "JMP32_JSLE_X",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+ BPF_ALU32_IMM(BPF_MOV, R1, -12345679),
+ BPF_JMP32_REG(BPF_JSLE, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+ BPF_JMP32_REG(BPF_JSLE, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -12345678 } }
},
/* BPF_JMP | BPF_EXIT */
{
@@ -5223,6 +7192,14 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 1 } },
},
+ { /* Mainly checking JIT here. */
+ "BPF_MAXINSNS: Very long conditional jump",
+ { },
+ INTERNAL | FLAG_NO_DATA,
+ { },
+ { { 0, 1 } },
+ .fill_helper = bpf_fill_long_jmp,
+ },
{
"JMP_JA: Jump, gap, jump, ...",
{ },
@@ -6639,7 +8616,7 @@ static int __run_one(const struct bpf_prog *fp, const void *data,
start = ktime_get_ns();
for (i = 0; i < runs; i++)
- ret = BPF_PROG_RUN(fp, data);
+ ret = bpf_prog_run(fp, data);
finish = ktime_get_ns();
migrate_enable();
@@ -6659,7 +8636,14 @@ static int run_one(const struct bpf_prog *fp, struct bpf_test *test)
u64 duration;
u32 ret;
- if (test->test[i].data_size == 0 &&
+ /*
+ * NOTE: Several sub-tests may be present, in which case
+ * a zero {data_size, result} tuple indicates the end of
+ * the sub-test array. The first test is always run,
+ * even if both data_size and result happen to be zero.
+ */
+ if (i > 0 &&
+ test->test[i].data_size == 0 &&
test->test[i].result == 0)
break;
@@ -7005,8 +8989,248 @@ static __init int test_bpf(void)
return err_cnt ? -EINVAL : 0;
}
+struct tail_call_test {
+ const char *descr;
+ struct bpf_insn insns[MAX_INSNS];
+ int result;
+ int stack_depth;
+};
+
+/*
+ * Magic marker used in test snippets for tail calls below.
+ * BPF_LD/MOV to R2 and R2 with this immediate value is replaced
+ * with the proper values by the test runner.
+ */
+#define TAIL_CALL_MARKER 0x7a11ca11
+
+/* Special offset to indicate a NULL call target */
+#define TAIL_CALL_NULL 0x7fff
+
+/* Special offset to indicate an out-of-range index */
+#define TAIL_CALL_INVALID 0x7ffe
+
+#define TAIL_CALL(offset) \
+ BPF_LD_IMM64(R2, TAIL_CALL_MARKER), \
+ BPF_RAW_INSN(BPF_ALU | BPF_MOV | BPF_K, R3, 0, \
+ offset, TAIL_CALL_MARKER), \
+ BPF_JMP_IMM(BPF_TAIL_CALL, 0, 0, 0)
+
+/*
+ * Tail call tests. Each test case may call any other test in the table,
+ * including itself, specified as a relative index offset from the calling
+ * test. The index TAIL_CALL_NULL can be used to specify a NULL target
+ * function to test the JIT error path. Similarly, the index TAIL_CALL_INVALID
+ * results in a target index that is out of range.
+ */
+static struct tail_call_test tail_call_tests[] = {
+ {
+ "Tail call leaf",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, R0, R1),
+ BPF_ALU64_IMM(BPF_ADD, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = 1,
+ },
+ {
+ "Tail call 2",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, R1, 2),
+ TAIL_CALL(-1),
+ BPF_ALU64_IMM(BPF_MOV, R0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .result = 3,
+ },
+ {
+ "Tail call 3",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, R1, 3),
+ TAIL_CALL(-1),
+ BPF_ALU64_IMM(BPF_MOV, R0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .result = 6,
+ },
+ {
+ "Tail call 4",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, R1, 4),
+ TAIL_CALL(-1),
+ BPF_ALU64_IMM(BPF_MOV, R0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .result = 10,
+ },
+ {
+ "Tail call error path, max count reached",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, R1, 1),
+ BPF_ALU64_REG(BPF_MOV, R0, R1),
+ TAIL_CALL(0),
+ BPF_EXIT_INSN(),
+ },
+ .result = MAX_TAIL_CALL_CNT + 1,
+ },
+ {
+ "Tail call error path, NULL target",
+ .insns = {
+ BPF_ALU64_IMM(BPF_MOV, R0, -1),
+ TAIL_CALL(TAIL_CALL_NULL),
+ BPF_ALU64_IMM(BPF_MOV, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = 1,
+ },
+ {
+ "Tail call error path, index out of range",
+ .insns = {
+ BPF_ALU64_IMM(BPF_MOV, R0, -1),
+ TAIL_CALL(TAIL_CALL_INVALID),
+ BPF_ALU64_IMM(BPF_MOV, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = 1,
+ },
+};
+
+static void __init destroy_tail_call_tests(struct bpf_array *progs)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++)
+ if (progs->ptrs[i])
+ bpf_prog_free(progs->ptrs[i]);
+ kfree(progs);
+}
+
+static __init int prepare_tail_call_tests(struct bpf_array **pprogs)
+{
+ int ntests = ARRAY_SIZE(tail_call_tests);
+ struct bpf_array *progs;
+ int which, err;
+
+ /* Allocate the table of programs to be used for tall calls */
+ progs = kzalloc(sizeof(*progs) + (ntests + 1) * sizeof(progs->ptrs[0]),
+ GFP_KERNEL);
+ if (!progs)
+ goto out_nomem;
+
+ /* Create all eBPF programs and populate the table */
+ for (which = 0; which < ntests; which++) {
+ struct tail_call_test *test = &tail_call_tests[which];
+ struct bpf_prog *fp;
+ int len, i;
+
+ /* Compute the number of program instructions */
+ for (len = 0; len < MAX_INSNS; len++) {
+ struct bpf_insn *insn = &test->insns[len];
+
+ if (len < MAX_INSNS - 1 &&
+ insn->code == (BPF_LD | BPF_DW | BPF_IMM))
+ len++;
+ if (insn->code == 0)
+ break;
+ }
+
+ /* Allocate and initialize the program */
+ fp = bpf_prog_alloc(bpf_prog_size(len), 0);
+ if (!fp)
+ goto out_nomem;
+
+ fp->len = len;
+ fp->type = BPF_PROG_TYPE_SOCKET_FILTER;
+ fp->aux->stack_depth = test->stack_depth;
+ memcpy(fp->insnsi, test->insns, len * sizeof(struct bpf_insn));
+
+ /* Relocate runtime tail call offsets and addresses */
+ for (i = 0; i < len; i++) {
+ struct bpf_insn *insn = &fp->insnsi[i];
+
+ if (insn->imm != TAIL_CALL_MARKER)
+ continue;
+
+ switch (insn->code) {
+ case BPF_LD | BPF_DW | BPF_IMM:
+ insn[0].imm = (u32)(long)progs;
+ insn[1].imm = ((u64)(long)progs) >> 32;
+ break;
+
+ case BPF_ALU | BPF_MOV | BPF_K:
+ if (insn->off == TAIL_CALL_NULL)
+ insn->imm = ntests;
+ else if (insn->off == TAIL_CALL_INVALID)
+ insn->imm = ntests + 1;
+ else
+ insn->imm = which + insn->off;
+ insn->off = 0;
+ }
+ }
+
+ fp = bpf_prog_select_runtime(fp, &err);
+ if (err)
+ goto out_err;
+
+ progs->ptrs[which] = fp;
+ }
+
+ /* The last entry contains a NULL program pointer */
+ progs->map.max_entries = ntests + 1;
+ *pprogs = progs;
+ return 0;
+
+out_nomem:
+ err = -ENOMEM;
+
+out_err:
+ if (progs)
+ destroy_tail_call_tests(progs);
+ return err;
+}
+
+static __init int test_tail_calls(struct bpf_array *progs)
+{
+ int i, err_cnt = 0, pass_cnt = 0;
+ int jit_cnt = 0, run_cnt = 0;
+
+ for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++) {
+ struct tail_call_test *test = &tail_call_tests[i];
+ struct bpf_prog *fp = progs->ptrs[i];
+ u64 duration;
+ int ret;
+
+ cond_resched();
+
+ pr_info("#%d %s ", i, test->descr);
+ if (!fp) {
+ err_cnt++;
+ continue;
+ }
+ pr_cont("jited:%u ", fp->jited);
+
+ run_cnt++;
+ if (fp->jited)
+ jit_cnt++;
+
+ ret = __run_one(fp, NULL, MAX_TESTRUNS, &duration);
+ if (ret == test->result) {
+ pr_cont("%lld PASS", duration);
+ pass_cnt++;
+ } else {
+ pr_cont("ret %d != %d FAIL", ret, test->result);
+ err_cnt++;
+ }
+ }
+
+ pr_info("%s: Summary: %d PASSED, %d FAILED, [%d/%d JIT'ed]\n",
+ __func__, pass_cnt, err_cnt, jit_cnt, run_cnt);
+
+ return err_cnt ? -EINVAL : 0;
+}
+
static int __init test_bpf_init(void)
{
+ struct bpf_array *progs = NULL;
int ret;
ret = prepare_bpf_tests();
@@ -7018,6 +9242,14 @@ static int __init test_bpf_init(void)
if (ret)
return ret;
+ ret = prepare_tail_call_tests(&progs);
+ if (ret)
+ return ret;
+ ret = test_tail_calls(progs);
+ destroy_tail_call_tests(progs);
+ if (ret)
+ return ret;
+
return test_skb_segment();
}
diff --git a/lib/test_lockup.c b/lib/test_lockup.c
index 864554e76973..906b598740a7 100644
--- a/lib/test_lockup.c
+++ b/lib/test_lockup.c
@@ -485,13 +485,13 @@ static int __init test_lockup_init(void)
offsetof(spinlock_t, lock.wait_lock.magic),
SPINLOCK_MAGIC) ||
test_magic(lock_rwlock_ptr,
- offsetof(rwlock_t, rtmutex.wait_lock.magic),
+ offsetof(rwlock_t, rwbase.rtmutex.wait_lock.magic),
SPINLOCK_MAGIC) ||
test_magic(lock_mutex_ptr,
- offsetof(struct mutex, lock.wait_lock.magic),
+ offsetof(struct mutex, rtmutex.wait_lock.magic),
SPINLOCK_MAGIC) ||
test_magic(lock_rwsem_ptr,
- offsetof(struct rw_semaphore, rtmutex.wait_lock.magic),
+ offsetof(struct rw_semaphore, rwbase.rtmutex.wait_lock.magic),
SPINLOCK_MAGIC))
return -EINVAL;
#else
@@ -502,7 +502,7 @@ static int __init test_lockup_init(void)
offsetof(rwlock_t, magic),
RWLOCK_MAGIC) ||
test_magic(lock_mutex_ptr,
- offsetof(struct mutex, wait_lock.rlock.magic),
+ offsetof(struct mutex, wait_lock.magic),
SPINLOCK_MAGIC) ||
test_magic(lock_rwsem_ptr,
offsetof(struct rw_semaphore, wait_lock.magic),
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f5561ea7d90a..cd06dca232c3 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -807,6 +807,7 @@ struct backing_dev_info *bdi_alloc(int node_id)
bdi->capabilities = BDI_CAP_WRITEBACK | BDI_CAP_WRITEBACK_ACCT;
bdi->ra_pages = VM_READAHEAD_PAGES;
bdi->io_pages = VM_READAHEAD_PAGES;
+ timer_setup(&bdi->laptop_mode_wb_timer, laptop_mode_timer_fn, 0);
return bdi;
}
EXPORT_SYMBOL(bdi_alloc);
@@ -928,6 +929,8 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
void bdi_unregister(struct backing_dev_info *bdi)
{
+ del_timer_sync(&bdi->laptop_mode_wb_timer);
+
/* make sure nobody finds us on the bdi_list anymore */
bdi_remove_from_list(bdi);
wb_shutdown(&bdi->wb);
diff --git a/mm/filemap.c b/mm/filemap.c
index d1458ecf2f51..920e8dc03251 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -76,8 +76,9 @@
* ->swap_lock (exclusive_swap_page, others)
* ->i_pages lock
*
- * ->i_mutex
- * ->i_mmap_rwsem (truncate->unmap_mapping_range)
+ * ->i_rwsem
+ * ->invalidate_lock (acquired by fs in truncate path)
+ * ->i_mmap_rwsem (truncate->unmap_mapping_range)
*
* ->mmap_lock
* ->i_mmap_rwsem
@@ -85,9 +86,10 @@
* ->i_pages lock (arch-dependent flush_dcache_mmap_lock)
*
* ->mmap_lock
- * ->lock_page (access_process_vm)
+ * ->invalidate_lock (filemap_fault)
+ * ->lock_page (filemap_fault, access_process_vm)
*
- * ->i_mutex (generic_perform_write)
+ * ->i_rwsem (generic_perform_write)
* ->mmap_lock (fault_in_pages_readable->do_page_fault)
*
* bdi->wb.list_lock
@@ -378,6 +380,32 @@ static int filemap_check_and_keep_errors(struct address_space *mapping)
}
/**
+ * filemap_fdatawrite_wbc - start writeback on mapping dirty pages in range
+ * @mapping: address space structure to write
+ * @wbc: the writeback_control controlling the writeout
+ *
+ * Call writepages on the mapping using the provided wbc to control the
+ * writeout.
+ *
+ * Return: %0 on success, negative error code otherwise.
+ */
+int filemap_fdatawrite_wbc(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ int ret;
+
+ if (!mapping_can_writeback(mapping) ||
+ !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+ return 0;
+
+ wbc_attach_fdatawrite_inode(wbc, mapping->host);
+ ret = do_writepages(mapping, wbc);
+ wbc_detach_inode(wbc);
+ return ret;
+}
+EXPORT_SYMBOL(filemap_fdatawrite_wbc);
+
+/**
* __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
* @mapping: address space structure to write
* @start: offset in bytes where the range starts
@@ -397,7 +425,6 @@ static int filemap_check_and_keep_errors(struct address_space *mapping)
int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
loff_t end, int sync_mode)
{
- int ret;
struct writeback_control wbc = {
.sync_mode = sync_mode,
.nr_to_write = LONG_MAX,
@@ -405,14 +432,7 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
.range_end = end,
};
- if (!mapping_can_writeback(mapping) ||
- !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
- return 0;
-
- wbc_attach_fdatawrite_inode(&wbc, mapping->host);
- ret = do_writepages(mapping, &wbc);
- wbc_detach_inode(&wbc);
- return ret;
+ return filemap_fdatawrite_wbc(mapping, &wbc);
}
static inline int __filemap_fdatawrite(struct address_space *mapping,
@@ -1008,6 +1028,44 @@ EXPORT_SYMBOL(__page_cache_alloc);
#endif
/*
+ * filemap_invalidate_lock_two - lock invalidate_lock for two mappings
+ *
+ * Lock exclusively invalidate_lock of any passed mapping that is not NULL.
+ *
+ * @mapping1: the first mapping to lock
+ * @mapping2: the second mapping to lock
+ */
+void filemap_invalidate_lock_two(struct address_space *mapping1,
+ struct address_space *mapping2)
+{
+ if (mapping1 > mapping2)
+ swap(mapping1, mapping2);
+ if (mapping1)
+ down_write(&mapping1->invalidate_lock);
+ if (mapping2 && mapping1 != mapping2)
+ down_write_nested(&mapping2->invalidate_lock, 1);
+}
+EXPORT_SYMBOL(filemap_invalidate_lock_two);
+
+/*
+ * filemap_invalidate_unlock_two - unlock invalidate_lock for two mappings
+ *
+ * Unlock exclusive invalidate_lock of any passed mapping that is not NULL.
+ *
+ * @mapping1: the first mapping to unlock
+ * @mapping2: the second mapping to unlock
+ */
+void filemap_invalidate_unlock_two(struct address_space *mapping1,
+ struct address_space *mapping2)
+{
+ if (mapping1)
+ up_write(&mapping1->invalidate_lock);
+ if (mapping2 && mapping1 != mapping2)
+ up_write(&mapping2->invalidate_lock);
+}
+EXPORT_SYMBOL(filemap_invalidate_unlock_two);
+
+/*
* In order to wait for pages to become available there must be
* waitqueues associated with pages. By using a hash table of
* waitqueues where the bucket discipline is to maintain all
@@ -2368,20 +2426,30 @@ static int filemap_update_page(struct kiocb *iocb,
{
int error;
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!filemap_invalidate_trylock_shared(mapping))
+ return -EAGAIN;
+ } else {
+ filemap_invalidate_lock_shared(mapping);
+ }
+
if (!trylock_page(page)) {
+ error = -EAGAIN;
if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
- return -EAGAIN;
+ goto unlock_mapping;
if (!(iocb->ki_flags & IOCB_WAITQ)) {
+ filemap_invalidate_unlock_shared(mapping);
put_and_wait_on_page_locked(page, TASK_KILLABLE);
return AOP_TRUNCATED_PAGE;
}
error = __lock_page_async(page, iocb->ki_waitq);
if (error)
- return error;
+ goto unlock_mapping;
}
+ error = AOP_TRUNCATED_PAGE;
if (!page->mapping)
- goto truncated;
+ goto unlock;
error = 0;
if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, page))
@@ -2392,15 +2460,13 @@ static int filemap_update_page(struct kiocb *iocb,
goto unlock;
error = filemap_read_page(iocb->ki_filp, mapping, page);
- if (error == AOP_TRUNCATED_PAGE)
- put_page(page);
- return error;
-truncated:
- unlock_page(page);
- put_page(page);
- return AOP_TRUNCATED_PAGE;
+ goto unlock_mapping;
unlock:
unlock_page(page);
+unlock_mapping:
+ filemap_invalidate_unlock_shared(mapping);
+ if (error == AOP_TRUNCATED_PAGE)
+ put_page(page);
return error;
}
@@ -2415,6 +2481,19 @@ static int filemap_create_page(struct file *file,
if (!page)
return -ENOMEM;
+ /*
+ * Protect against truncate / hole punch. Grabbing invalidate_lock here
+ * assures we cannot instantiate and bring uptodate new pagecache pages
+ * after evicting page cache during truncate and before actually
+ * freeing blocks. Note that we could release invalidate_lock after
+ * inserting the page into page cache as the locked page would then be
+ * enough to synchronize with hole punching. But there are code paths
+ * such as filemap_update_page() filling in partially uptodate pages or
+ * ->readpages() that need to hold invalidate_lock while mapping blocks
+ * for IO so let's hold the lock here as well to keep locking rules
+ * simple.
+ */
+ filemap_invalidate_lock_shared(mapping);
error = add_to_page_cache_lru(page, mapping, index,
mapping_gfp_constraint(mapping, GFP_KERNEL));
if (error == -EEXIST)
@@ -2426,9 +2505,11 @@ static int filemap_create_page(struct file *file,
if (error)
goto error;
+ filemap_invalidate_unlock_shared(mapping);
pagevec_add(pvec, page);
return 0;
error:
+ filemap_invalidate_unlock_shared(mapping);
put_page(page);
return error;
}
@@ -2967,6 +3048,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
pgoff_t max_off;
struct page *page;
vm_fault_t ret = 0;
+ bool mapping_locked = false;
max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
if (unlikely(offset >= max_off))
@@ -2976,25 +3058,39 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
* Do we have something in the page cache already?
*/
page = find_get_page(mapping, offset);
- if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
+ if (likely(page)) {
/*
- * We found the page, so try async readahead before
- * waiting for the lock.
+ * We found the page, so try async readahead before waiting for
+ * the lock.
*/
- fpin = do_async_mmap_readahead(vmf, page);
- } else if (!page) {
+ if (!(vmf->flags & FAULT_FLAG_TRIED))
+ fpin = do_async_mmap_readahead(vmf, page);
+ if (unlikely(!PageUptodate(page))) {
+ filemap_invalidate_lock_shared(mapping);
+ mapping_locked = true;
+ }
+ } else {
/* No page in the page cache at all */
count_vm_event(PGMAJFAULT);
count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
ret = VM_FAULT_MAJOR;
fpin = do_sync_mmap_readahead(vmf);
retry_find:
+ /*
+ * See comment in filemap_create_page() why we need
+ * invalidate_lock
+ */
+ if (!mapping_locked) {
+ filemap_invalidate_lock_shared(mapping);
+ mapping_locked = true;
+ }
page = pagecache_get_page(mapping, offset,
FGP_CREAT|FGP_FOR_MMAP,
vmf->gfp_mask);
if (!page) {
if (fpin)
goto out_retry;
+ filemap_invalidate_unlock_shared(mapping);
return VM_FAULT_OOM;
}
}
@@ -3014,8 +3110,20 @@ retry_find:
* We have a locked page in the page cache, now we need to check
* that it's up-to-date. If not, it is going to be due to an error.
*/
- if (unlikely(!PageUptodate(page)))
+ if (unlikely(!PageUptodate(page))) {
+ /*
+ * The page was in cache and uptodate and now it is not.
+ * Strange but possible since we didn't hold the page lock all
+ * the time. Let's drop everything get the invalidate lock and
+ * try again.
+ */
+ if (!mapping_locked) {
+ unlock_page(page);
+ put_page(page);
+ goto retry_find;
+ }
goto page_not_uptodate;
+ }
/*
* We've made it this far and we had to drop our mmap_lock, now is the
@@ -3026,6 +3134,8 @@ retry_find:
unlock_page(page);
goto out_retry;
}
+ if (mapping_locked)
+ filemap_invalidate_unlock_shared(mapping);
/*
* Found the page and have a reference on it.
@@ -3056,6 +3166,7 @@ page_not_uptodate:
if (!error || error == AOP_TRUNCATED_PAGE)
goto retry_find;
+ filemap_invalidate_unlock_shared(mapping);
return VM_FAULT_SIGBUS;
@@ -3067,6 +3178,8 @@ out_retry:
*/
if (page)
put_page(page);
+ if (mapping_locked)
+ filemap_invalidate_unlock_shared(mapping);
if (fpin)
fput(fpin);
return ret | VM_FAULT_RETRY;
@@ -3437,6 +3550,8 @@ out:
*
* If the page does not get brought uptodate, return -EIO.
*
+ * The function expects mapping->invalidate_lock to be already held.
+ *
* Return: up to date page on success, ERR_PTR() on failure.
*/
struct page *read_cache_page(struct address_space *mapping,
@@ -3460,6 +3575,8 @@ EXPORT_SYMBOL(read_cache_page);
*
* If the page does not get brought uptodate, return -EIO.
*
+ * The function expects mapping->invalidate_lock to be already held.
+ *
* Return: up to date page on success, ERR_PTR() on failure.
*/
struct page *read_cache_page_gfp(struct address_space *mapping,
@@ -3704,12 +3821,12 @@ EXPORT_SYMBOL(generic_perform_write);
* modification times and calls proper subroutines depending on whether we
* do direct IO or a standard buffered write.
*
- * It expects i_mutex to be grabbed unless we work on a block device or similar
+ * It expects i_rwsem to be grabbed unless we work on a block device or similar
* object which does not need locking at all.
*
* This function does *not* take care of syncing data in case of O_SYNC write.
* A caller has to handle it. This is mainly due to the fact that we want to
- * avoid syncing under i_mutex.
+ * avoid syncing under i_rwsem.
*
* Return:
* * number of bytes written, even for truncated writes
@@ -3797,7 +3914,7 @@ EXPORT_SYMBOL(__generic_file_write_iter);
*
* This is a wrapper around __generic_file_write_iter() to be used by most
* filesystems. It takes care of syncing the file in case of O_SYNC file
- * and acquires i_mutex as needed.
+ * and acquires i_rwsem as needed.
* Return:
* * negative error code if no data has been written at all of
* vfs_fsync_range() failed for a synchronous write
diff --git a/mm/gup.c b/mm/gup.c
index 42b8b1fa6521..b94717977d17 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1558,9 +1558,12 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
gup_flags |= FOLL_WRITE;
/*
- * See check_vma_flags(): Will return -EFAULT on incompatible mappings
- * or with insufficient permissions.
+ * We want to report -EINVAL instead of -EFAULT for any permission
+ * problems or incompatible mappings.
*/
+ if (check_vma_flags(vma, gup_flags))
+ return -EINVAL;
+
return __get_user_pages(mm, start, nr_pages, gup_flags,
NULL, NULL, locked);
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index dfc940d5221d..8ea35ba6699f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2476,7 +2476,7 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
if (!rc) {
/*
* This indicates there is an entry in the reserve map
- * added by alloc_huge_page. We know it was added
+ * not added by alloc_huge_page. We know it was added
* before the alloc_huge_page call, otherwise
* HPageRestoreReserve would be set on the page.
* Remove the entry so that a subsequent allocation
@@ -4660,7 +4660,9 @@ retry_avoidcopy:
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(&range);
out_release_all:
- restore_reserve_on_error(h, vma, haddr, new_page);
+ /* No restore in case of successful pagetable update (Break COW) */
+ if (new_page != old_page)
+ restore_reserve_on_error(h, vma, haddr, new_page);
put_page(new_page);
out_release_old:
put_page(old_page);
@@ -4776,7 +4778,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
pte_t new_pte;
spinlock_t *ptl;
unsigned long haddr = address & huge_page_mask(h);
- bool new_page = false;
+ bool new_page, new_pagecache_page = false;
/*
* Currently, we are forced to kill the process in the event the
@@ -4799,6 +4801,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
goto out;
retry:
+ new_page = false;
page = find_lock_page(mapping, idx);
if (!page) {
/* Check for page in userfault range */
@@ -4842,6 +4845,7 @@ retry:
goto retry;
goto out;
}
+ new_pagecache_page = true;
} else {
lock_page(page);
if (unlikely(anon_vma_prepare(vma))) {
@@ -4926,7 +4930,9 @@ backout:
spin_unlock(ptl);
backout_unlocked:
unlock_page(page);
- restore_reserve_on_error(h, vma, haddr, page);
+ /* restore reserve for newly allocated pages not in page cache */
+ if (new_page && !new_pagecache_page)
+ restore_reserve_on_error(h, vma, haddr, page);
put_page(page);
goto out;
}
@@ -5135,6 +5141,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
int ret = -ENOMEM;
struct page *page;
int writable;
+ bool new_pagecache_page = false;
if (is_continue) {
ret = -EFAULT;
@@ -5228,6 +5235,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
ret = huge_add_to_page_cache(page, mapping, idx);
if (ret)
goto out_release_nounlock;
+ new_pagecache_page = true;
}
ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
@@ -5291,7 +5299,8 @@ out_release_unlock:
if (vm_shared || is_continue)
unlock_page(page);
out_release_nounlock:
- restore_reserve_on_error(h, dst_vma, dst_addr, page);
+ if (!new_pagecache_page)
+ restore_reserve_on_error(h, dst_vma, dst_addr, page);
put_page(page);
goto out;
}
diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
index 942cbc16ad26..eb6307c199ea 100644
--- a/mm/kfence/kfence_test.c
+++ b/mm/kfence/kfence_test.c
@@ -23,8 +23,15 @@
#include <linux/tracepoint.h>
#include <trace/events/printk.h>
+#include <asm/kfence.h>
+
#include "kfence.h"
+/* May be overridden by <asm/kfence.h>. */
+#ifndef arch_kfence_test_address
+#define arch_kfence_test_address(addr) (addr)
+#endif
+
/* Report as observed from console. */
static struct {
spinlock_t lock;
@@ -82,6 +89,7 @@ static const char *get_access_type(const struct expect_report *r)
/* Check observed report matches information in @r. */
static bool report_matches(const struct expect_report *r)
{
+ unsigned long addr = (unsigned long)r->addr;
bool ret = false;
unsigned long flags;
typeof(observed.lines) expect;
@@ -131,22 +139,25 @@ static bool report_matches(const struct expect_report *r)
switch (r->type) {
case KFENCE_ERROR_OOB:
cur += scnprintf(cur, end - cur, "Out-of-bounds %s at", get_access_type(r));
+ addr = arch_kfence_test_address(addr);
break;
case KFENCE_ERROR_UAF:
cur += scnprintf(cur, end - cur, "Use-after-free %s at", get_access_type(r));
+ addr = arch_kfence_test_address(addr);
break;
case KFENCE_ERROR_CORRUPTION:
cur += scnprintf(cur, end - cur, "Corrupted memory at");
break;
case KFENCE_ERROR_INVALID:
cur += scnprintf(cur, end - cur, "Invalid %s at", get_access_type(r));
+ addr = arch_kfence_test_address(addr);
break;
case KFENCE_ERROR_INVALID_FREE:
cur += scnprintf(cur, end - cur, "Invalid free of");
break;
}
- cur += scnprintf(cur, end - cur, " 0x%p", (void *)r->addr);
+ cur += scnprintf(cur, end - cur, " 0x%p", (void *)addr);
spin_lock_irqsave(&observed.lock, flags);
if (!report_available())
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 228a2fbe0657..73d46d16d575 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -290,7 +290,7 @@ static void hex_dump_object(struct seq_file *seq,
warn_or_seq_printf(seq, " hex dump (first %zu bytes):\n", len);
kasan_disable_current();
warn_or_seq_hex_dump(seq, DUMP_PREFIX_NONE, HEX_ROW_SIZE,
- HEX_GROUP_SIZE, ptr, len, HEX_ASCII);
+ HEX_GROUP_SIZE, kasan_reset_tag((void *)ptr), len, HEX_ASCII);
kasan_enable_current();
}
@@ -1171,7 +1171,7 @@ static bool update_checksum(struct kmemleak_object *object)
kasan_disable_current();
kcsan_disable_current();
- object->checksum = crc32(0, (void *)object->pointer, object->size);
+ object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size);
kasan_enable_current();
kcsan_enable_current();
@@ -1246,7 +1246,7 @@ static void scan_block(void *_start, void *_end,
break;
kasan_disable_current();
- pointer = *ptr;
+ pointer = *(unsigned long *)kasan_reset_tag((void *)ptr);
kasan_enable_current();
untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer);
diff --git a/mm/madvise.c b/mm/madvise.c
index 6d3d348b17f4..56324a3dbc4e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -862,10 +862,12 @@ static long madvise_populate(struct vm_area_struct *vma,
switch (pages) {
case -EINTR:
return -EINTR;
- case -EFAULT: /* Incompatible mappings / permissions. */
+ case -EINVAL: /* Incompatible mappings / permissions. */
return -EINVAL;
case -EHWPOISON:
return -EHWPOISON;
+ case -EFAULT: /* VM_FAULT_SIGBUS or VM_FAULT_SIGSEGV */
+ return -EFAULT;
default:
pr_warn_once("%s: unhandled return value: %ld\n",
__func__, pages);
@@ -910,7 +912,7 @@ static long madvise_remove(struct vm_area_struct *vma,
+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
/*
- * Filesystem's fallocate may need to take i_mutex. We need to
+ * Filesystem's fallocate may need to take i_rwsem. We need to
* explicitly grab a reference because the vma (and hence the
* vma's reference to the file) can go away as soon as we drop
* mmap_lock.
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index eb8e87c4833f..389b5766e74f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -968,7 +968,7 @@ static __always_inline bool memcg_kmem_bypass(void)
return false;
/* Memcg to charge can't be determined. */
- if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
+ if (!in_task() || !current->mm || (current->flags & PF_KTHREAD))
return true;
return false;
@@ -3106,13 +3106,15 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
stock->cached_pgdat = pgdat;
} else if (stock->cached_pgdat != pgdat) {
/* Flush the existing cached vmstat data */
+ struct pglist_data *oldpg = stock->cached_pgdat;
+
if (stock->nr_slab_reclaimable_b) {
- mod_objcg_mlstate(objcg, pgdat, NR_SLAB_RECLAIMABLE_B,
+ mod_objcg_mlstate(objcg, oldpg, NR_SLAB_RECLAIMABLE_B,
stock->nr_slab_reclaimable_b);
stock->nr_slab_reclaimable_b = 0;
}
if (stock->nr_slab_unreclaimable_b) {
- mod_objcg_mlstate(objcg, pgdat, NR_SLAB_UNRECLAIMABLE_B,
+ mod_objcg_mlstate(objcg, oldpg, NR_SLAB_UNRECLAIMABLE_B,
stock->nr_slab_unreclaimable_b);
stock->nr_slab_unreclaimable_b = 0;
}
@@ -7048,14 +7050,14 @@ void mem_cgroup_sk_free(struct sock *sk)
* mem_cgroup_charge_skmem - charge socket memory
* @memcg: memcg to charge
* @nr_pages: number of pages to charge
+ * @gfp_mask: reclaim mode
*
* Charges @nr_pages to @memcg. Returns %true if the charge fit within
- * @memcg's configured limit, %false if the charge had to be forced.
+ * @memcg's configured limit, %false if it doesn't.
*/
-bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
+ gfp_t gfp_mask)
{
- gfp_t gfp_mask = GFP_KERNEL;
-
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
struct page_counter *fail;
@@ -7063,21 +7065,19 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
memcg->tcpmem_pressure = 0;
return true;
}
- page_counter_charge(&memcg->tcpmem, nr_pages);
memcg->tcpmem_pressure = 1;
+ if (gfp_mask & __GFP_NOFAIL) {
+ page_counter_charge(&memcg->tcpmem, nr_pages);
+ return true;
+ }
return false;
}
- /* Don't block in the packet receive path */
- if (in_softirq())
- gfp_mask = GFP_NOWAIT;
-
- mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
-
- if (try_charge(memcg, gfp_mask, nr_pages) == 0)
+ if (try_charge(memcg, gfp_mask, nr_pages) == 0) {
+ mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
return true;
+ }
- try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages);
return false;
}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index eefd823deb67..e1f87cf13235 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -866,7 +866,7 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
/*
* Truncation is a bit tricky. Enable it per file system for now.
*
- * Open: to take i_mutex or not for this? Right now we don't.
+ * Open: to take i_rwsem or not for this? Right now we don't.
*/
ret = truncate_error_page(p, pfn, mapping);
out:
@@ -1146,7 +1146,7 @@ static int __get_hwpoison_page(struct page *page)
* unexpected races caused by taking a page refcount.
*/
if (!HWPoisonHandlable(head))
- return 0;
+ return -EBUSY;
if (PageTransHuge(head)) {
/*
@@ -1199,9 +1199,15 @@ try_again:
}
goto out;
} else if (ret == -EBUSY) {
- /* We raced with freeing huge page to buddy, retry. */
- if (pass++ < 3)
+ /*
+ * We raced with (possibly temporary) unhandlable
+ * page, retry.
+ */
+ if (pass++ < 3) {
+ shake_page(p, 1);
goto try_again;
+ }
+ ret = -EIO;
goto out;
}
}
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 8cb75b26ea4f..86c3af79e874 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1731,6 +1731,7 @@ failed_removal_isolated:
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
memory_notify(MEM_CANCEL_OFFLINE, &arg);
failed_removal_pcplists_disabled:
+ lru_cache_enable();
zone_pcp_enable(zone);
failed_removal:
pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n",
diff --git a/mm/mmap.c b/mm/mmap.c
index ca54d36d203a..181a113b545d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1517,12 +1517,6 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
return -EACCES;
- /*
- * Make sure there are no mandatory locks on the file.
- */
- if (locks_verify_locked(file))
- return -EAGAIN;
-
vm_flags |= VM_SHARED | VM_MAYSHARE;
if (!(file->f_mode & FMODE_WRITE))
vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
diff --git a/mm/nommu.c b/mm/nommu.c
index 3a93d4054810..9d0ad98f838c 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -826,9 +826,6 @@ static int validate_mmap_request(struct file *file,
(file->f_mode & FMODE_WRITE))
return -EACCES;
- if (locks_verify_locked(file))
- return -EAGAIN;
-
if (!(capabilities & NOMMU_MAP_DIRECT))
return -ENODEV;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 9f63548f247c..c12f67cbfa19 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2010,7 +2010,6 @@ int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
return ret;
}
-#ifdef CONFIG_BLOCK
void laptop_mode_timer_fn(struct timer_list *t)
{
struct backing_dev_info *backing_dev_info =
@@ -2045,7 +2044,6 @@ void laptop_sync_completion(void)
rcu_read_unlock();
}
-#endif
/*
* If ratelimit_pages is too high then we can get into dirty-data overload
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 856b175c15a4..eeb3a9cb36bb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3453,19 +3453,10 @@ void free_unref_page_list(struct list_head *list)
* comment in free_unref_page.
*/
migratetype = get_pcppage_migratetype(page);
- if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
- if (unlikely(is_migrate_isolate(migratetype))) {
- list_del(&page->lru);
- free_one_page(page_zone(page), page, pfn, 0,
- migratetype, FPI_NONE);
- continue;
- }
-
- /*
- * Non-isolated types over MIGRATE_PCPTYPES get added
- * to the MIGRATE_MOVABLE pcp list.
- */
- set_pcppage_migratetype(page, MIGRATE_MOVABLE);
+ if (unlikely(is_migrate_isolate(migratetype))) {
+ list_del(&page->lru);
+ free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE);
+ continue;
}
set_page_private(page, pfn);
@@ -3475,7 +3466,15 @@ void free_unref_page_list(struct list_head *list)
list_for_each_entry_safe(page, next, list, lru) {
pfn = page_private(page);
set_page_private(page, 0);
+
+ /*
+ * Non-isolated types over MIGRATE_PCPTYPES get added
+ * to the MIGRATE_MOVABLE pcp list.
+ */
migratetype = get_pcppage_migratetype(page);
+ if (unlikely(migratetype >= MIGRATE_PCPTYPES))
+ migratetype = MIGRATE_MOVABLE;
+
trace_mm_page_free_batched(page);
free_unref_page_commit(page, pfn, migratetype, 0);
diff --git a/mm/readahead.c b/mm/readahead.c
index d589f147f4c2..41b75d76d36e 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -192,6 +192,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
*/
unsigned int nofs = memalloc_nofs_save();
+ filemap_invalidate_lock_shared(mapping);
/*
* Preallocate as many pages as we will need.
*/
@@ -236,6 +237,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
* will then handle the error.
*/
read_pages(ractl, &page_pool, false);
+ filemap_invalidate_unlock_shared(mapping);
memalloc_nofs_restore(nofs);
}
EXPORT_SYMBOL_GPL(page_cache_ra_unbounded);
diff --git a/mm/rmap.c b/mm/rmap.c
index b9eb5c12f3fe..2d29a57d29e8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -20,28 +20,29 @@
/*
* Lock ordering in mm:
*
- * inode->i_mutex (while writing or truncating, not reading or faulting)
+ * inode->i_rwsem (while writing or truncating, not reading or faulting)
* mm->mmap_lock
- * page->flags PG_locked (lock_page) * (see huegtlbfs below)
- * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
- * mapping->i_mmap_rwsem
- * hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
- * anon_vma->rwsem
- * mm->page_table_lock or pte_lock
- * swap_lock (in swap_duplicate, swap_info_get)
- * mmlist_lock (in mmput, drain_mmlist and others)
- * mapping->private_lock (in __set_page_dirty_buffers)
- * lock_page_memcg move_lock (in __set_page_dirty_buffers)
- * i_pages lock (widely used)
- * lruvec->lru_lock (in lock_page_lruvec_irq)
- * inode->i_lock (in set_page_dirty's __mark_inode_dirty)
- * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
- * sb_lock (within inode_lock in fs/fs-writeback.c)
- * i_pages lock (widely used, in set_page_dirty,
- * in arch-dependent flush_dcache_mmap_lock,
- * within bdi.wb->list_lock in __sync_single_inode)
+ * mapping->invalidate_lock (in filemap_fault)
+ * page->flags PG_locked (lock_page) * (see hugetlbfs below)
+ * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
+ * mapping->i_mmap_rwsem
+ * hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
+ * anon_vma->rwsem
+ * mm->page_table_lock or pte_lock
+ * swap_lock (in swap_duplicate, swap_info_get)
+ * mmlist_lock (in mmput, drain_mmlist and others)
+ * mapping->private_lock (in __set_page_dirty_buffers)
+ * lock_page_memcg move_lock (in __set_page_dirty_buffers)
+ * i_pages lock (widely used)
+ * lruvec->lru_lock (in lock_page_lruvec_irq)
+ * inode->i_lock (in set_page_dirty's __mark_inode_dirty)
+ * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
+ * sb_lock (within inode_lock in fs/fs-writeback.c)
+ * i_pages lock (widely used, in set_page_dirty,
+ * in arch-dependent flush_dcache_mmap_lock,
+ * within bdi.wb->list_lock in __sync_single_inode)
*
- * anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon)
+ * anon_vma->rwsem,mapping->i_mmap_rwsem (memory_failure, collect_procs_anon)
* ->tasklist_lock
* pte map lock
*
diff --git a/mm/shmem.c b/mm/shmem.c
index 70d9ce294bb4..3107acee4f71 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -96,7 +96,7 @@ static struct vfsmount *shm_mnt;
/*
* shmem_fallocate communicates with shmem_fault or shmem_writepage via
- * inode->i_private (with i_mutex making sure that it has only one user at
+ * inode->i_private (with i_rwsem making sure that it has only one user at
* a time): we would prefer not to enlarge the shmem inode just for that.
*/
struct shmem_falloc {
@@ -774,7 +774,7 @@ static int shmem_free_swap(struct address_space *mapping,
* Determine (in bytes) how many of the shmem object's pages mapped by the
* given offsets are swapped out.
*
- * This is safe to call without i_mutex or the i_pages lock thanks to RCU,
+ * This is safe to call without i_rwsem or the i_pages lock thanks to RCU,
* as long as the inode doesn't go away and racy results are not a problem.
*/
unsigned long shmem_partial_swap_usage(struct address_space *mapping,
@@ -806,7 +806,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
* Determine (in bytes) how many of the shmem object's pages mapped by the
* given vma is swapped out.
*
- * This is safe to call without i_mutex or the i_pages lock thanks to RCU,
+ * This is safe to call without i_rwsem or the i_pages lock thanks to RCU,
* as long as the inode doesn't go away and racy results are not a problem.
*/
unsigned long shmem_swap_usage(struct vm_area_struct *vma)
@@ -1069,7 +1069,7 @@ static int shmem_setattr(struct user_namespace *mnt_userns,
loff_t oldsize = inode->i_size;
loff_t newsize = attr->ia_size;
- /* protected by i_mutex */
+ /* protected by i_rwsem */
if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
(newsize > oldsize && (info->seals & F_SEAL_GROW)))
return -EPERM;
@@ -1696,8 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL;
- struct swap_info_struct *si;
- struct page *page = NULL;
+ struct page *page;
swp_entry_t swap;
int error;
@@ -1705,12 +1704,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
swap = radix_to_swp_entry(*pagep);
*pagep = NULL;
- /* Prevent swapoff from happening to us. */
- si = get_swap_device(swap);
- if (!si) {
- error = EINVAL;
- goto failed;
- }
/* Look it up and read it in.. */
page = lookup_swap_cache(swap, NULL, 0);
if (!page) {
@@ -1772,8 +1765,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
swap_free(swap);
*pagep = page;
- if (si)
- put_swap_device(si);
return 0;
failed:
if (!shmem_confirm_swap(mapping, index, swap))
@@ -1784,9 +1775,6 @@ unlock:
put_page(page);
}
- if (si)
- put_swap_device(si);
-
return error;
}
@@ -2071,7 +2059,7 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
/*
* Trinity finds that probing a hole which tmpfs is punching can
* prevent the hole-punch from ever completing: which in turn
- * locks writers out with its hold on i_mutex. So refrain from
+ * locks writers out with its hold on i_rwsem. So refrain from
* faulting pages into the hole while it's being punched. Although
* shmem_undo_range() does remove the additions, it may be unable to
* keep up, as each new page needs its own unmap_mapping_range() call,
@@ -2082,7 +2070,7 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
* we just need to make racing faults a rare case.
*
* The implementation below would be much simpler if we just used a
- * standard mutex or completion: but we cannot take i_mutex in fault,
+ * standard mutex or completion: but we cannot take i_rwsem in fault,
* and bloating every shmem inode for this unlikely case would be sad.
*/
if (unlikely(inode->i_private)) {
@@ -2482,7 +2470,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
struct shmem_inode_info *info = SHMEM_I(inode);
pgoff_t index = pos >> PAGE_SHIFT;
- /* i_mutex is held by caller */
+ /* i_rwsem is held by caller */
if (unlikely(info->seals & (F_SEAL_GROW |
F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))) {
if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))
@@ -2582,7 +2570,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
/*
* We must evaluate after, since reads (unlike writes)
- * are called without i_mutex protection against truncate
+ * are called without i_rwsem protection against truncate
*/
nr = PAGE_SIZE;
i_size = i_size_read(inode);
@@ -2652,7 +2640,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
return -ENXIO;
inode_lock(inode);
- /* We're holding i_mutex so we can access i_size directly */
+ /* We're holding i_rwsem so we can access i_size directly */
offset = mapping_seek_hole_data(mapping, offset, inode->i_size, whence);
if (offset >= 0)
offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
@@ -2681,7 +2669,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
- /* protected by i_mutex */
+ /* protected by i_rwsem */
if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
error = -EPERM;
goto out;
diff --git a/mm/slub.c b/mm/slub.c
index af984e4990e8..f77d8cd79ef7 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -576,8 +576,8 @@ static void print_section(char *level, char *text, u8 *addr,
unsigned int length)
{
metadata_access_enable();
- print_hex_dump(level, kasan_reset_tag(text), DUMP_PREFIX_ADDRESS,
- 16, 1, addr, length, 1);
+ print_hex_dump(level, text, DUMP_PREFIX_ADDRESS,
+ 16, 1, kasan_reset_tag((void *)addr), length, 1);
metadata_access_disable();
}
@@ -1400,12 +1400,13 @@ check_slabs:
static int __init setup_slub_debug(char *str)
{
slab_flags_t flags;
+ slab_flags_t global_flags;
char *saved_str;
char *slab_list;
bool global_slub_debug_changed = false;
bool slab_list_specified = false;
- slub_debug = DEBUG_DEFAULT_FLAGS;
+ global_flags = DEBUG_DEFAULT_FLAGS;
if (*str++ != '=' || !*str)
/*
* No options specified. Switch on full debugging.
@@ -1417,7 +1418,7 @@ static int __init setup_slub_debug(char *str)
str = parse_slub_debug_flags(str, &flags, &slab_list, true);
if (!slab_list) {
- slub_debug = flags;
+ global_flags = flags;
global_slub_debug_changed = true;
} else {
slab_list_specified = true;
@@ -1426,16 +1427,18 @@ static int __init setup_slub_debug(char *str)
/*
* For backwards compatibility, a single list of flags with list of
- * slabs means debugging is only enabled for those slabs, so the global
- * slub_debug should be 0. We can extended that to multiple lists as
+ * slabs means debugging is only changed for those slabs, so the global
+ * slub_debug should be unchanged (0 or DEBUG_DEFAULT_FLAGS, depending
+ * on CONFIG_SLUB_DEBUG_ON). We can extended that to multiple lists as
* long as there is no option specifying flags without a slab list.
*/
if (slab_list_specified) {
if (!global_slub_debug_changed)
- slub_debug = 0;
+ global_flags = slub_debug;
slub_debug_string = saved_str;
}
out:
+ slub_debug = global_flags;
if (slub_debug != 0 || slub_debug_string)
static_branch_enable(&slub_debug_enabled);
else
@@ -3236,12 +3239,12 @@ struct detached_freelist {
struct kmem_cache *s;
};
-static inline void free_nonslab_page(struct page *page)
+static inline void free_nonslab_page(struct page *page, void *object)
{
unsigned int order = compound_order(page);
VM_BUG_ON_PAGE(!PageCompound(page), page);
- kfree_hook(page_address(page));
+ kfree_hook(object);
mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order));
__free_pages(page, order);
}
@@ -3282,7 +3285,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
if (!s) {
/* Handle kalloc'ed objects */
if (unlikely(!PageSlab(page))) {
- free_nonslab_page(page);
+ free_nonslab_page(page, object);
p[size] = NULL; /* mark object processed */
return size;
}
@@ -4258,7 +4261,7 @@ void kfree(const void *x)
page = virt_to_head_page(x);
if (unlikely(!PageSlab(page))) {
- free_nonslab_page(page);
+ free_nonslab_page(page, object);
return;
}
slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index a66f3e0ec973..16f706c55d92 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -70,9 +70,9 @@ void disable_swap_slots_cache_lock(void)
swap_slot_cache_enabled = false;
if (swap_slot_cache_initialized) {
/* serialize with cpu hotplug operations */
- get_online_cpus();
+ cpus_read_lock();
__drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
- put_online_cpus();
+ cpus_read_unlock();
}
}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index c56aa9ac050d..bc7cee6b2ec5 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -628,13 +628,6 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
if (!mask)
goto skip;
- /* Test swap type to make sure the dereference is safe */
- if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) {
- struct inode *inode = si->swap_file->f_mapping->host;
- if (inode_read_congested(inode))
- goto skip;
- }
-
do_poll = false;
/* Read a page_cluster sized and aligned cluster around offset. */
start_offset = offset & ~mask;
diff --git a/mm/truncate.c b/mm/truncate.c
index 234ddd879caa..44ad5e515140 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -412,7 +412,8 @@ EXPORT_SYMBOL(truncate_inode_pages_range);
* @mapping: mapping to truncate
* @lstart: offset from which to truncate
*
- * Called under (and serialised by) inode->i_mutex.
+ * Called under (and serialised by) inode->i_rwsem and
+ * mapping->invalidate_lock.
*
* Note: When this function returns, there can be a page in the process of
* deletion (inside __delete_from_page_cache()) in the specified range. Thus
@@ -429,7 +430,7 @@ EXPORT_SYMBOL(truncate_inode_pages);
* truncate_inode_pages_final - truncate *all* pages before inode dies
* @mapping: mapping to truncate
*
- * Called under (and serialized by) inode->i_mutex.
+ * Called under (and serialized by) inode->i_rwsem.
*
* Filesystems have to use this in the .evict_inode path to inform the
* VM that this is the final truncate and the inode is going away.
@@ -748,7 +749,7 @@ EXPORT_SYMBOL(truncate_pagecache);
* setattr function when ATTR_SIZE is passed in.
*
* Must be called with a lock serializing truncates and writes (generally
- * i_mutex but e.g. xfs uses a different lock) and before all filesystem
+ * i_rwsem but e.g. xfs uses a different lock) and before all filesystem
* specific block truncation has been performed.
*/
void truncate_setsize(struct inode *inode, loff_t newsize)
@@ -777,7 +778,7 @@ EXPORT_SYMBOL(truncate_setsize);
*
* The function must be called after i_size is updated so that page fault
* coming after we unlock the page will already see the new i_size.
- * The function must be called while we still hold i_mutex - this not only
+ * The function must be called while we still hold i_rwsem - this not only
* makes sure i_size is stable but also that userspace cannot observe new
* i_size value before we are prepared to store mmap writes at new inode size.
*/
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4620df62f0ff..eeae2f6bc532 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -100,9 +100,12 @@ struct scan_control {
unsigned int may_swap:1;
/*
- * Cgroups are not reclaimed below their configured memory.low,
- * unless we threaten to OOM. If any cgroups are skipped due to
- * memory.low and nothing was reclaimed, go back for memory.low.
+ * Cgroup memory below memory.low is protected as long as we
+ * don't threaten to OOM. If any cgroup is reclaimed at
+ * reduced force or passed over entirely due to its memory.low
+ * setting (memcg_low_skipped), and nothing is reclaimed as a
+ * result, then go back for one more cycle that reclaims the protected
+ * memory (memcg_low_reclaim) to avert OOM.
*/
unsigned int memcg_low_reclaim:1;
unsigned int memcg_low_skipped:1;
@@ -2537,15 +2540,14 @@ out:
for_each_evictable_lru(lru) {
int file = is_file_lru(lru);
unsigned long lruvec_size;
+ unsigned long low, min;
unsigned long scan;
- unsigned long protection;
lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
- protection = mem_cgroup_protection(sc->target_mem_cgroup,
- memcg,
- sc->memcg_low_reclaim);
+ mem_cgroup_protection(sc->target_mem_cgroup, memcg,
+ &min, &low);
- if (protection) {
+ if (min || low) {
/*
* Scale a cgroup's reclaim pressure by proportioning
* its current usage to its memory.low or memory.min
@@ -2576,6 +2578,15 @@ out:
* hard protection.
*/
unsigned long cgroup_size = mem_cgroup_size(memcg);
+ unsigned long protection;
+
+ /* memory.low scaling, make sure we retry before OOM */
+ if (!sc->memcg_low_reclaim && low > min) {
+ protection = low;
+ sc->memcg_low_skipped = 1;
+ } else {
+ protection = min;
+ }
/* Avoid TOCTOU with earlier protection check */
cgroup_size = max(cgroup_size, protection);
@@ -4413,11 +4424,13 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
.may_swap = 1,
.reclaim_idx = gfp_zone(gfp_mask),
};
+ unsigned long pflags;
trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order,
sc.gfp_mask);
cond_resched();
+ psi_memstall_enter(&pflags);
fs_reclaim_acquire(sc.gfp_mask);
/*
* We need to be able to allocate from the reserves for RECLAIM_UNMAP
@@ -4442,6 +4455,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
current->flags &= ~PF_SWAPWRITE;
memalloc_noreclaim_restore(noreclaim_flag);
fs_reclaim_release(sc.gfp_mask);
+ psi_memstall_leave(&pflags);
trace_mm_vmscan_node_reclaim_end(sc.nr_reclaimed);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b0534e068166..a7ed56ac4c0b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -129,9 +129,9 @@ static void sum_vm_events(unsigned long *ret)
*/
void all_vm_events(unsigned long *ret)
{
- get_online_cpus();
+ cpus_read_lock();
sum_vm_events(ret);
- put_online_cpus();
+ cpus_read_unlock();
}
EXPORT_SYMBOL_GPL(all_vm_events);
@@ -1948,7 +1948,7 @@ static void vmstat_shepherd(struct work_struct *w)
{
int cpu;
- get_online_cpus();
+ cpus_read_lock();
/* Check processors whose vmstat worker threads have been disabled */
for_each_online_cpu(cpu) {
struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
@@ -1958,7 +1958,7 @@ static void vmstat_shepherd(struct work_struct *w)
cond_resched();
}
- put_online_cpus();
+ cpus_read_unlock();
schedule_delayed_work(&shepherd,
round_jiffies_relative(sysctl_stat_interval));
@@ -2037,9 +2037,9 @@ void __init init_mm_internals(void)
if (ret < 0)
pr_err("vmstat: failed to register 'online' hotplug state\n");
- get_online_cpus();
+ cpus_read_lock();
init_cpu_node_state();
- put_online_cpus();
+ cpus_read_unlock();
start_shepherd_timer();
#endif
diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c
index 1c140af06d52..600b9563bfc5 100644
--- a/net/6lowpan/debugfs.c
+++ b/net/6lowpan/debugfs.c
@@ -170,7 +170,8 @@ static void lowpan_dev_debugfs_ctx_init(struct net_device *dev,
struct dentry *root;
char buf[32];
- WARN_ON_ONCE(id > LOWPAN_IPHC_CTX_TABLE_SIZE);
+ if (WARN_ON_ONCE(id >= LOWPAN_IPHC_CTX_TABLE_SIZE))
+ return;
sprintf(buf, "%d", id);
diff --git a/net/802/Makefile b/net/802/Makefile
index 19406a87bdaa..bfed80221b8b 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -8,7 +8,6 @@ obj-$(CONFIG_LLC) += p8022.o psnap.o
obj-$(CONFIG_NET_FC) += fc.o
obj-$(CONFIG_FDDI) += fddi.o
obj-$(CONFIG_HIPPI) += hippi.o
-obj-$(CONFIG_IPX) += p8022.o psnap.o p8023.o
obj-$(CONFIG_ATALK) += p8022.o psnap.o
obj-$(CONFIG_STP) += stp.o
obj-$(CONFIG_GARP) += garp.o
diff --git a/net/802/p8023.c b/net/802/p8023.c
deleted file mode 100644
index 19cd56990db2..000000000000
--- a/net/802/p8023.c
+++ /dev/null
@@ -1,60 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * NET3: 802.3 data link hooks used for IPX 802.3
- *
- * 802.3 isn't really a protocol data link layer. Some old IPX stuff
- * uses it however. Note that there is only one 802.3 protocol layer
- * in the system. We don't currently support different protocols
- * running raw 802.3 on different devices. Thankfully nobody else
- * has done anything like the old IPX.
- */
-
-#include <linux/in.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-
-#include <net/datalink.h>
-#include <net/p8022.h>
-
-/*
- * Place an 802.3 header on a packet. The driver will do the mac
- * addresses, we just need to give it the buffer length.
- */
-static int p8023_request(struct datalink_proto *dl,
- struct sk_buff *skb, unsigned char *dest_node)
-{
- struct net_device *dev = skb->dev;
-
- dev_hard_header(skb, dev, ETH_P_802_3, dest_node, NULL, skb->len);
- return dev_queue_xmit(skb);
-}
-
-/*
- * Create an 802.3 client. Note there can be only one 802.3 client
- */
-struct datalink_proto *make_8023_client(void)
-{
- struct datalink_proto *proto = kmalloc(sizeof(*proto), GFP_ATOMIC);
-
- if (proto) {
- proto->header_length = 0;
- proto->request = p8023_request;
- }
- return proto;
-}
-
-/*
- * Destroy the 802.3 client.
- */
-void destroy_8023_client(struct datalink_proto *dl)
-{
- kfree(dl);
-}
-
-EXPORT_SYMBOL(destroy_8023_client);
-EXPORT_SYMBOL(make_8023_client);
-
-MODULE_LICENSE("GPL");
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 4cdf8416869d..55275ef9a31a 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -67,7 +67,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg,
return 0;
size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN;
- array = kzalloc(size, GFP_KERNEL);
+ array = kzalloc(size, GFP_KERNEL_ACCOUNT);
if (array == NULL)
return -ENOBUFS;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index a0367b37512d..0c21d1fec852 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -372,8 +372,8 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCGMIIREG:
case SIOCSMIIREG:
case SIOCGHWTSTAMP:
- if (netif_device_present(real_dev) && ops->ndo_do_ioctl)
- err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd);
+ if (netif_device_present(real_dev) && ops->ndo_eth_ioctl)
+ err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd);
break;
}
@@ -814,7 +814,7 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_set_mac_address = vlan_dev_set_mac_address,
.ndo_set_rx_mode = vlan_dev_set_rx_mode,
.ndo_change_rx_flags = vlan_dev_change_rx_flags,
- .ndo_do_ioctl = vlan_dev_ioctl,
+ .ndo_eth_ioctl = vlan_dev_ioctl,
.ndo_neigh_setup = vlan_dev_neigh_setup,
.ndo_get_stats64 = vlan_dev_get_stats64,
#if IS_ENABLED(CONFIG_FCOE)
diff --git a/net/Kconfig b/net/Kconfig
index c7392c449b25..fb13460c6dab 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -363,6 +363,7 @@ source "net/bluetooth/Kconfig"
source "net/rxrpc/Kconfig"
source "net/kcm/Kconfig"
source "net/strparser/Kconfig"
+source "net/mctp/Kconfig"
config FIB_RULES
bool
diff --git a/net/Makefile b/net/Makefile
index 9ca9572188fe..fbfeb8a0bb37 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -78,3 +78,4 @@ obj-$(CONFIG_QRTR) += qrtr/
obj-$(CONFIG_NET_NCSI) += ncsi/
obj-$(CONFIG_XDP_SOCKETS) += xdp/
obj-$(CONFIG_MPTCP) += mptcp/
+obj-$(CONFIG_MCTP) += mctp/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 8ade5a4ceaf5..bf5736c1d458 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -666,7 +666,7 @@ static int atif_ioctl(int cmd, void __user *arg)
struct rtentry rtdef;
int add_route;
- if (copy_from_user(&atreq, arg, sizeof(atreq)))
+ if (get_user_ifreq(&atreq, NULL, arg))
return -EFAULT;
dev = __dev_get_by_name(&init_net, atreq.ifr_name);
@@ -865,7 +865,7 @@ static int atif_ioctl(int cmd, void __user *arg)
return 0;
}
- return copy_to_user(arg, &atreq, sizeof(atreq)) ? -EFAULT : 0;
+ return put_user_ifreq(&atreq, arg);
}
static int atrtr_ioctl_addrt(struct rtentry *rt)
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index e4f63dd43cb5..36249776c021 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -193,10 +193,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
skb_pull(skb, AX25_KISS_HEADER_LEN);
if (digipeat != NULL) {
- if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) {
- kfree_skb(skb);
+ if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL)
goto put;
- }
skb = ourskb;
}
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index f53751ba81b3..22f2f66c6e0a 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -325,7 +325,6 @@ void ax25_kick(ax25_cb *ax25)
void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
{
- struct sk_buff *skbn;
unsigned char *ptr;
int headroom;
@@ -336,18 +335,12 @@ void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
headroom = ax25_addr_size(ax25->digipeat);
- if (skb_headroom(skb) < headroom) {
- if ((skbn = skb_realloc_headroom(skb, headroom)) == NULL) {
+ if (unlikely(skb_headroom(skb) < headroom)) {
+ skb = skb_expand_head(skb, headroom);
+ if (!skb) {
printk(KERN_CRIT "AX.25: ax25_transmit_buffer - out of memory\n");
- kfree_skb(skb);
return;
}
-
- if (skb->sk != NULL)
- skb_set_owner_w(skbn, skb->sk);
-
- consume_skb(skb);
- skb = skbn;
}
ptr = skb_push(skb, headroom);
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index b40e0bce67ea..d0b2e094bd55 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -441,24 +441,17 @@ put:
struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
ax25_address *dest, ax25_digi *digi)
{
- struct sk_buff *skbn;
unsigned char *bp;
int len;
len = digi->ndigi * AX25_ADDR_LEN;
- if (skb_headroom(skb) < len) {
- if ((skbn = skb_realloc_headroom(skb, len)) == NULL) {
+ if (unlikely(skb_headroom(skb) < len)) {
+ skb = skb_expand_head(skb, len);
+ if (!skb) {
printk(KERN_CRIT "AX.25: ax25_dg_build_path - out of memory\n");
return NULL;
}
-
- if (skb->sk != NULL)
- skb_set_owner_w(skbn, skb->sk);
-
- consume_skb(skb);
-
- skb = skbn;
}
bp = skb_push(skb, len);
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 12022378f892..f94f538fa382 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -519,8 +519,7 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet,
}
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
return res;
}
@@ -857,8 +856,7 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
rcu_read_unlock();
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
}
static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
@@ -1046,14 +1044,10 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
unlock:
rcu_read_unlock();
out:
- if (neigh_node)
- batadv_neigh_node_put(neigh_node);
- if (router)
- batadv_neigh_node_put(router);
- if (neigh_ifinfo)
- batadv_neigh_ifinfo_put(neigh_ifinfo);
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
+ batadv_neigh_node_put(neigh_node);
+ batadv_neigh_node_put(router);
+ batadv_neigh_ifinfo_put(neigh_ifinfo);
+ batadv_neigh_ifinfo_put(router_ifinfo);
}
/**
@@ -1194,8 +1188,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
ret = true;
out:
- if (neigh_node)
- batadv_neigh_node_put(neigh_node);
+ batadv_neigh_node_put(neigh_node);
return ret;
}
@@ -1496,16 +1489,11 @@ out_neigh:
if (orig_neigh_node && !is_single_hop_neigh)
batadv_orig_node_put(orig_neigh_node);
out:
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
- if (router)
- batadv_neigh_node_put(router);
- if (router_router)
- batadv_neigh_node_put(router_router);
- if (orig_neigh_router)
- batadv_neigh_node_put(orig_neigh_router);
- if (hardif_neigh)
- batadv_hardif_neigh_put(hardif_neigh);
+ batadv_neigh_ifinfo_put(router_ifinfo);
+ batadv_neigh_node_put(router);
+ batadv_neigh_node_put(router_router);
+ batadv_neigh_node_put(orig_neigh_router);
+ batadv_hardif_neigh_put(hardif_neigh);
consume_skb(skb_priv);
}
@@ -1926,8 +1914,7 @@ batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
}
out:
- if (neigh_node_best)
- batadv_neigh_node_put(neigh_node_best);
+ batadv_neigh_node_put(neigh_node_best);
*sub_s = 0;
return 0;
@@ -2049,10 +2036,8 @@ static bool batadv_iv_ogm_neigh_diff(struct batadv_neigh_node *neigh1,
*diff = (int)tq1 - (int)tq2;
out:
- if (neigh1_ifinfo)
- batadv_neigh_ifinfo_put(neigh1_ifinfo);
- if (neigh2_ifinfo)
- batadv_neigh_ifinfo_put(neigh2_ifinfo);
+ batadv_neigh_ifinfo_put(neigh1_ifinfo);
+ batadv_neigh_ifinfo_put(neigh2_ifinfo);
return ret;
}
@@ -2299,8 +2284,7 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
if (tmp_gw_factor > max_gw_factor ||
(tmp_gw_factor == max_gw_factor &&
tq_avg > max_tq)) {
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
+ batadv_gw_node_put(curr_gw);
curr_gw = gw_node;
kref_get(&curr_gw->refcount);
}
@@ -2314,8 +2298,7 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
* $routing_class more tq points)
*/
if (tq_avg > max_tq) {
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
+ batadv_gw_node_put(curr_gw);
curr_gw = gw_node;
kref_get(&curr_gw->refcount);
}
@@ -2332,8 +2315,7 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
next:
batadv_neigh_node_put(router);
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
+ batadv_neigh_ifinfo_put(router_ifinfo);
}
rcu_read_unlock();
@@ -2397,14 +2379,10 @@ static bool batadv_iv_gw_is_eligible(struct batadv_priv *bat_priv,
ret = true;
out:
- if (router_gw_ifinfo)
- batadv_neigh_ifinfo_put(router_gw_ifinfo);
- if (router_orig_ifinfo)
- batadv_neigh_ifinfo_put(router_orig_ifinfo);
- if (router_gw)
- batadv_neigh_node_put(router_gw);
- if (router_orig)
- batadv_neigh_node_put(router_orig);
+ batadv_neigh_ifinfo_put(router_gw_ifinfo);
+ batadv_neigh_ifinfo_put(router_orig_ifinfo);
+ batadv_neigh_node_put(router_gw);
+ batadv_neigh_node_put(router_orig);
return ret;
}
@@ -2479,12 +2457,9 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid,
ret = 0;
out:
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
- if (router)
- batadv_neigh_node_put(router);
+ batadv_gw_node_put(curr_gw);
+ batadv_neigh_ifinfo_put(router_ifinfo);
+ batadv_neigh_node_put(router);
return ret;
}
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index b98aea958e3d..54e41fc709c3 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -106,8 +106,7 @@ static void batadv_v_iface_update_mac(struct batadv_hard_iface *hard_iface)
batadv_v_primary_iface_set(hard_iface);
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
}
static void
@@ -366,8 +365,7 @@ batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
}
out:
- if (neigh_node_best)
- batadv_neigh_node_put(neigh_node_best);
+ batadv_neigh_node_put(neigh_node_best);
*sub_s = 0;
return 0;
@@ -568,10 +566,8 @@ static int batadv_v_gw_throughput_get(struct batadv_gw_node *gw_node, u32 *bw)
ret = 0;
out:
- if (router)
- batadv_neigh_node_put(router);
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
+ batadv_neigh_node_put(router);
+ batadv_neigh_ifinfo_put(router_ifinfo);
return ret;
}
@@ -599,8 +595,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
if (curr_gw && bw <= max_bw)
goto next;
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
+ batadv_gw_node_put(curr_gw);
curr_gw = gw_node;
kref_get(&curr_gw->refcount);
@@ -662,10 +657,8 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
ret = true;
out:
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
- if (orig_gw)
- batadv_gw_node_put(orig_gw);
+ batadv_gw_node_put(curr_gw);
+ batadv_gw_node_put(orig_gw);
return ret;
}
@@ -764,12 +757,9 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid,
ret = 0;
out:
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
- if (router)
- batadv_neigh_node_put(router);
+ batadv_gw_node_put(curr_gw);
+ batadv_neigh_ifinfo_put(router_ifinfo);
+ batadv_neigh_node_put(router);
return ret;
}
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 423c2d171703..71999e13f729 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -486,14 +486,11 @@ static void batadv_v_elp_neigh_update(struct batadv_priv *bat_priv,
hardif_neigh->bat_v.elp_interval = ntohl(elp_packet->elp_interval);
hardif_free:
- if (hardif_neigh)
- batadv_hardif_neigh_put(hardif_neigh);
+ batadv_hardif_neigh_put(hardif_neigh);
neigh_free:
- if (neigh)
- batadv_neigh_node_put(neigh);
+ batadv_neigh_node_put(neigh);
orig_free:
- if (orig_neigh)
- batadv_orig_node_put(orig_neigh);
+ batadv_orig_node_put(orig_neigh);
}
/**
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index a0a9636d1740..1d750f3cb2e4 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -584,12 +584,9 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv,
batadv_v_ogm_queue_on_if(skb, if_outgoing);
out:
- if (orig_ifinfo)
- batadv_orig_ifinfo_put(orig_ifinfo);
- if (router)
- batadv_neigh_node_put(router);
- if (neigh_ifinfo)
- batadv_neigh_ifinfo_put(neigh_ifinfo);
+ batadv_orig_ifinfo_put(orig_ifinfo);
+ batadv_neigh_node_put(router);
+ batadv_neigh_ifinfo_put(neigh_ifinfo);
}
/**
@@ -669,10 +666,8 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv,
else
ret = 0;
out:
- if (orig_ifinfo)
- batadv_orig_ifinfo_put(orig_ifinfo);
- if (neigh_ifinfo)
- batadv_neigh_ifinfo_put(neigh_ifinfo);
+ batadv_orig_ifinfo_put(orig_ifinfo);
+ batadv_neigh_ifinfo_put(neigh_ifinfo);
return ret;
}
@@ -763,16 +758,11 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node);
out:
- if (router)
- batadv_neigh_node_put(router);
- if (orig_neigh_router)
- batadv_neigh_node_put(orig_neigh_router);
- if (orig_neigh_node)
- batadv_orig_node_put(orig_neigh_node);
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
- if (neigh_ifinfo)
- batadv_neigh_ifinfo_put(neigh_ifinfo);
+ batadv_neigh_node_put(router);
+ batadv_neigh_node_put(orig_neigh_router);
+ batadv_orig_node_put(orig_neigh_node);
+ batadv_neigh_ifinfo_put(router_ifinfo);
+ batadv_neigh_ifinfo_put(neigh_ifinfo);
return forward;
}
@@ -978,12 +968,9 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
}
rcu_read_unlock();
out:
- if (orig_node)
- batadv_orig_node_put(orig_node);
- if (neigh_node)
- batadv_neigh_node_put(neigh_node);
- if (hardif_neigh)
- batadv_hardif_neigh_put(hardif_neigh);
+ batadv_orig_node_put(orig_node);
+ batadv_neigh_node_put(neigh_node);
+ batadv_hardif_neigh_put(hardif_neigh);
}
/**
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 63d42dcc9324..1669744304c5 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -162,6 +162,9 @@ static void batadv_backbone_gw_release(struct kref *ref)
*/
static void batadv_backbone_gw_put(struct batadv_bla_backbone_gw *backbone_gw)
{
+ if (!backbone_gw)
+ return;
+
kref_put(&backbone_gw->refcount, batadv_backbone_gw_release);
}
@@ -197,6 +200,9 @@ static void batadv_claim_release(struct kref *ref)
*/
static void batadv_claim_put(struct batadv_bla_claim *claim)
{
+ if (!claim)
+ return;
+
kref_put(&claim->refcount, batadv_claim_release);
}
@@ -439,8 +445,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
netif_rx_any_context(skb);
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
}
/**
@@ -1498,8 +1503,7 @@ static void batadv_bla_periodic_work(struct work_struct *work)
rcu_read_unlock();
}
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work,
msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH));
@@ -1808,8 +1812,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
batadv_hash_destroy(bat_priv->bla.backbone_hash);
bat_priv->bla.backbone_hash = NULL;
}
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
}
/**
@@ -1996,10 +1999,8 @@ handled:
ret = true;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (claim)
- batadv_claim_put(claim);
+ batadv_hardif_put(primary_if);
+ batadv_claim_put(claim);
return ret;
}
@@ -2103,10 +2104,8 @@ allow:
handled:
ret = true;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (claim)
- batadv_claim_put(claim);
+ batadv_hardif_put(primary_if);
+ batadv_claim_put(claim);
return ret;
}
@@ -2271,11 +2270,9 @@ int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb)
ret = msg->len;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
return ret;
}
@@ -2443,11 +2440,9 @@ int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb)
ret = msg->len;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
return ret;
}
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 8c95a11a830a..2f008e329007 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -127,6 +127,9 @@ static void batadv_dat_entry_release(struct kref *ref)
*/
static void batadv_dat_entry_put(struct batadv_dat_entry *dat_entry)
{
+ if (!dat_entry)
+ return;
+
kref_put(&dat_entry->refcount, batadv_dat_entry_release);
}
@@ -405,8 +408,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
&dat_entry->ip, dat_entry->mac_addr, batadv_print_vid(vid));
out:
- if (dat_entry)
- batadv_dat_entry_put(dat_entry);
+ batadv_dat_entry_put(dat_entry);
}
#ifdef CONFIG_BATMAN_ADV_DEBUG
@@ -594,8 +596,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
continue;
max = tmp_max;
- if (max_orig_node)
- batadv_orig_node_put(max_orig_node);
+ batadv_orig_node_put(max_orig_node);
max_orig_node = orig_node;
}
rcu_read_unlock();
@@ -981,11 +982,9 @@ int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
ret = msg->len;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
return ret;
}
@@ -1218,8 +1217,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
BATADV_P_DAT_DHT_GET);
}
out:
- if (dat_entry)
- batadv_dat_entry_put(dat_entry);
+ batadv_dat_entry_put(dat_entry);
return ret;
}
@@ -1286,8 +1284,7 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
ret = true;
}
out:
- if (dat_entry)
- batadv_dat_entry_put(dat_entry);
+ batadv_dat_entry_put(dat_entry);
if (ret)
kfree_skb(skb);
return ret;
@@ -1420,8 +1417,7 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
out:
if (dropped)
kfree_skb(skb);
- if (dat_entry)
- batadv_dat_entry_put(dat_entry);
+ batadv_dat_entry_put(dat_entry);
/* if dropped == false -> deliver to the interface */
return dropped;
}
@@ -1830,7 +1826,6 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
ret = true;
out:
- if (dat_entry)
- batadv_dat_entry_put(dat_entry);
+ batadv_dat_entry_put(dat_entry);
return ret;
}
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index a5d9d800082b..0899a729a23f 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -381,10 +381,8 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
}
out:
- if (orig_node_dst)
- batadv_orig_node_put(orig_node_dst);
- if (neigh_node)
- batadv_neigh_node_put(neigh_node);
+ batadv_orig_node_put(orig_node_dst);
+ batadv_neigh_node_put(neigh_node);
return ret;
}
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 007f2827935d..b7466136e292 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -59,7 +59,7 @@
* after rcu grace period
* @ref: kref pointer of the gw_node
*/
-static void batadv_gw_node_release(struct kref *ref)
+void batadv_gw_node_release(struct kref *ref)
{
struct batadv_gw_node *gw_node;
@@ -70,16 +70,6 @@ static void batadv_gw_node_release(struct kref *ref)
}
/**
- * batadv_gw_node_put() - decrement the gw_node refcounter and possibly release
- * it
- * @gw_node: gateway node to free
- */
-void batadv_gw_node_put(struct batadv_gw_node *gw_node)
-{
- kref_put(&gw_node->refcount, batadv_gw_node_release);
-}
-
-/**
* batadv_gw_get_selected_gw_node() - Get currently selected gateway
* @bat_priv: the bat priv with all the soft interface information
*
@@ -130,8 +120,7 @@ batadv_gw_get_selected_orig(struct batadv_priv *bat_priv)
unlock:
rcu_read_unlock();
out:
- if (gw_node)
- batadv_gw_node_put(gw_node);
+ batadv_gw_node_put(gw_node);
return orig_node;
}
@@ -148,8 +137,7 @@ static void batadv_gw_select(struct batadv_priv *bat_priv,
curr_gw_node = rcu_replace_pointer(bat_priv->gw.curr_gw, new_gw_node,
true);
- if (curr_gw_node)
- batadv_gw_node_put(curr_gw_node);
+ batadv_gw_node_put(curr_gw_node);
spin_unlock_bh(&bat_priv->gw.list_lock);
}
@@ -284,14 +272,10 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
batadv_gw_select(bat_priv, next_gw);
out:
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
- if (next_gw)
- batadv_gw_node_put(next_gw);
- if (router)
- batadv_neigh_node_put(router);
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
+ batadv_gw_node_put(curr_gw);
+ batadv_gw_node_put(next_gw);
+ batadv_neigh_node_put(router);
+ batadv_neigh_ifinfo_put(router_ifinfo);
}
/**
@@ -325,8 +309,7 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv,
reselect:
batadv_gw_reselect(bat_priv);
out:
- if (curr_gw_orig)
- batadv_orig_node_put(curr_gw_orig);
+ batadv_orig_node_put(curr_gw_orig);
}
/**
@@ -466,13 +449,11 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
if (gw_node == curr_gw)
batadv_gw_reselect(bat_priv);
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
+ batadv_gw_node_put(curr_gw);
}
out:
- if (gw_node)
- batadv_gw_node_put(gw_node);
+ batadv_gw_node_put(gw_node);
}
/**
@@ -555,10 +536,8 @@ int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb)
ret = msg->len;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ batadv_hardif_put(primary_if);
+ dev_put(soft_iface);
return ret;
}
@@ -780,15 +759,10 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
batadv_neigh_ifinfo_put(old_ifinfo);
out:
- if (orig_dst_node)
- batadv_orig_node_put(orig_dst_node);
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
- if (gw_node)
- batadv_gw_node_put(gw_node);
- if (neigh_old)
- batadv_neigh_node_put(neigh_old);
- if (neigh_curr)
- batadv_neigh_node_put(neigh_curr);
+ batadv_orig_node_put(orig_dst_node);
+ batadv_gw_node_put(curr_gw);
+ batadv_gw_node_put(gw_node);
+ batadv_neigh_node_put(neigh_old);
+ batadv_neigh_node_put(neigh_curr);
return out_of_range;
}
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 2ae5846ef958..95c2ccdaa554 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -9,6 +9,7 @@
#include "main.h"
+#include <linux/kref.h>
#include <linux/netlink.h>
#include <linux/skbuff.h>
#include <linux/types.h>
@@ -27,7 +28,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
void batadv_gw_node_delete(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node);
void batadv_gw_node_free(struct batadv_priv *bat_priv);
-void batadv_gw_node_put(struct batadv_gw_node *gw_node);
+void batadv_gw_node_release(struct kref *ref);
struct batadv_gw_node *
batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv);
int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb);
@@ -38,4 +39,17 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node);
+/**
+ * batadv_gw_node_put() - decrement the gw_node refcounter and possibly release
+ * it
+ * @gw_node: gateway node to free
+ */
+static inline void batadv_gw_node_put(struct batadv_gw_node *gw_node)
+{
+ if (!gw_node)
+ return;
+
+ kref_put(&gw_node->refcount, batadv_gw_node_release);
+}
+
#endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index fdde305a198e..9349c76f30c5 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -10,7 +10,7 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
-#include <linux/kernel.h>
+#include <linux/kstrtox.h>
#include <linux/limits.h>
#include <linux/math64.h>
#include <linux/netdevice.h>
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 55d97e18aa4a..8a2b78f9c4b2 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -236,8 +236,7 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
real_netdev = dev_get_by_index(real_net, ifindex);
out:
- if (hard_iface)
- batadv_hardif_put(hard_iface);
+ batadv_hardif_put(hard_iface);
return real_netdev;
}
@@ -457,8 +456,7 @@ static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv,
batadv_dat_init_own_addr(bat_priv, primary_if);
batadv_bla_update_orig_address(bat_priv, primary_if, oldif);
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
}
static void batadv_primary_if_select(struct batadv_priv *bat_priv,
@@ -481,8 +479,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
batadv_primary_if_update_addr(bat_priv, curr_hard_iface);
out:
- if (curr_hard_iface)
- batadv_hardif_put(curr_hard_iface);
+ batadv_hardif_put(curr_hard_iface);
}
static bool
@@ -657,8 +654,7 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
bat_priv->algo_ops->iface.activate(hard_iface);
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
}
static void
@@ -811,8 +807,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface)
new_if = batadv_hardif_get_active(hard_iface->soft_iface);
batadv_primary_if_select(bat_priv, new_if);
- if (new_if)
- batadv_hardif_put(new_if);
+ batadv_hardif_put(new_if);
}
bat_priv->algo_ops->iface.disable(hard_iface);
@@ -834,8 +829,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface)
batadv_hardif_put(hard_iface);
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
}
static struct batadv_hard_iface *
@@ -990,8 +984,7 @@ static int batadv_hard_if_event(struct notifier_block *this,
hardif_put:
batadv_hardif_put(hard_iface);
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
return NOTIFY_DONE;
}
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 8cb2a1f10080..64f660dbbe54 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -89,6 +89,9 @@ int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing,
*/
static inline void batadv_hardif_put(struct batadv_hard_iface *hard_iface)
{
+ if (!hard_iface)
+ return;
+
kref_put(&hard_iface->refcount, batadv_hardif_release);
}
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 014235fd4681..058b8f2eef65 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2021.2"
+#define BATADV_SOURCE_VERSION "2021.3"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 923e2197c2db..a3b6658ed789 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -91,8 +91,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
upper = netdev_master_upper_dev_get_rcu(upper);
} while (upper && !(upper->priv_flags & IFF_EBRIDGE));
- if (upper)
- dev_hold(upper);
+ dev_hold(upper);
rcu_read_unlock();
return upper;
@@ -509,8 +508,7 @@ batadv_mcast_mla_softif_get(struct net_device *dev,
}
out:
- if (bridge)
- dev_put(bridge);
+ dev_put(bridge);
return ret4 + ret6;
}
@@ -2239,12 +2237,11 @@ batadv_mcast_netlink_get_primary(struct netlink_callback *cb,
}
out:
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
if (!ret && primary_if)
*primary_if = hard_iface;
- else if (hard_iface)
+ else
batadv_hardif_put(hard_iface);
return ret;
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index b6cc746e01a6..29276284d281 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -359,15 +359,13 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg,
atomic_read(&bat_priv->orig_interval)))
goto nla_put_failure;
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
genlmsg_end(msg, hdr);
return 0;
nla_put_failure:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
genlmsg_cancel(msg, hdr);
return -EMSGSIZE;
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 4bb76b434d07..9f06132e007d 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -217,6 +217,9 @@ static void batadv_nc_node_release(struct kref *ref)
*/
static void batadv_nc_node_put(struct batadv_nc_node *nc_node)
{
+ if (!nc_node)
+ return;
+
kref_put(&nc_node->refcount, batadv_nc_node_release);
}
@@ -241,6 +244,9 @@ static void batadv_nc_path_release(struct kref *ref)
*/
static void batadv_nc_path_put(struct batadv_nc_path *nc_path)
{
+ if (!nc_path)
+ return;
+
kref_put(&nc_path->refcount, batadv_nc_path_release);
}
@@ -930,10 +936,8 @@ void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
out_nc_node->last_seen = jiffies;
out:
- if (in_nc_node)
- batadv_nc_node_put(in_nc_node);
- if (out_nc_node)
- batadv_nc_node_put(out_nc_node);
+ batadv_nc_node_put(in_nc_node);
+ batadv_nc_node_put(out_nc_node);
}
/**
@@ -1209,14 +1213,10 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
batadv_send_unicast_skb(skb_dest, first_dest);
res = true;
out:
- if (router_neigh)
- batadv_neigh_node_put(router_neigh);
- if (router_coding)
- batadv_neigh_node_put(router_coding);
- if (router_neigh_ifinfo)
- batadv_neigh_ifinfo_put(router_neigh_ifinfo);
- if (router_coding_ifinfo)
- batadv_neigh_ifinfo_put(router_coding_ifinfo);
+ batadv_neigh_node_put(router_neigh);
+ batadv_neigh_node_put(router_coding);
+ batadv_neigh_ifinfo_put(router_neigh_ifinfo);
+ batadv_neigh_ifinfo_put(router_coding_ifinfo);
return res;
}
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index da7249448474..aadc653ca1d8 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -177,7 +177,7 @@ out:
* and queue for free after rcu grace period
* @ref: kref pointer of the originator-vlan object
*/
-static void batadv_orig_node_vlan_release(struct kref *ref)
+void batadv_orig_node_vlan_release(struct kref *ref)
{
struct batadv_orig_node_vlan *orig_vlan;
@@ -187,16 +187,6 @@ static void batadv_orig_node_vlan_release(struct kref *ref)
}
/**
- * batadv_orig_node_vlan_put() - decrement the refcounter and possibly release
- * the originator-vlan object
- * @orig_vlan: the originator-vlan object to release
- */
-void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan)
-{
- kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release);
-}
-
-/**
* batadv_originator_init() - Initialize all originator structures
* @bat_priv: the bat priv with all the soft interface information
*
@@ -231,7 +221,7 @@ err:
* free after rcu grace period
* @ref: kref pointer of the neigh_ifinfo
*/
-static void batadv_neigh_ifinfo_release(struct kref *ref)
+void batadv_neigh_ifinfo_release(struct kref *ref)
{
struct batadv_neigh_ifinfo *neigh_ifinfo;
@@ -244,21 +234,11 @@ static void batadv_neigh_ifinfo_release(struct kref *ref)
}
/**
- * batadv_neigh_ifinfo_put() - decrement the refcounter and possibly release
- * the neigh_ifinfo
- * @neigh_ifinfo: the neigh_ifinfo object to release
- */
-void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo)
-{
- kref_put(&neigh_ifinfo->refcount, batadv_neigh_ifinfo_release);
-}
-
-/**
* batadv_hardif_neigh_release() - release hardif neigh node from lists and
* queue for free after rcu grace period
* @ref: kref pointer of the neigh_node
*/
-static void batadv_hardif_neigh_release(struct kref *ref)
+void batadv_hardif_neigh_release(struct kref *ref)
{
struct batadv_hardif_neigh_node *hardif_neigh;
@@ -274,21 +254,11 @@ static void batadv_hardif_neigh_release(struct kref *ref)
}
/**
- * batadv_hardif_neigh_put() - decrement the hardif neighbors refcounter
- * and possibly release it
- * @hardif_neigh: hardif neigh neighbor to free
- */
-void batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh)
-{
- kref_put(&hardif_neigh->refcount, batadv_hardif_neigh_release);
-}
-
-/**
* batadv_neigh_node_release() - release neigh_node from lists and queue for
* free after rcu grace period
* @ref: kref pointer of the neigh_node
*/
-static void batadv_neigh_node_release(struct kref *ref)
+void batadv_neigh_node_release(struct kref *ref)
{
struct hlist_node *node_tmp;
struct batadv_neigh_node *neigh_node;
@@ -309,16 +279,6 @@ static void batadv_neigh_node_release(struct kref *ref)
}
/**
- * batadv_neigh_node_put() - decrement the neighbors refcounter and possibly
- * release it
- * @neigh_node: neigh neighbor to free
- */
-void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
-{
- kref_put(&neigh_node->refcount, batadv_neigh_node_release);
-}
-
-/**
* batadv_orig_router_get() - router to the originator depending on iface
* @orig_node: the orig node for the router
* @if_outgoing: the interface where the payload packet has been received or
@@ -704,8 +664,7 @@ batadv_neigh_node_create(struct batadv_orig_node *orig_node,
out:
spin_unlock_bh(&orig_node->neigh_list_lock);
- if (hardif_neigh)
- batadv_hardif_neigh_put(hardif_neigh);
+ batadv_hardif_neigh_put(hardif_neigh);
return neigh_node;
}
@@ -797,14 +756,10 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
ret = msg->len;
out:
- if (hardif)
- batadv_hardif_put(hardif);
- if (hard_iface)
- dev_put(hard_iface);
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ batadv_hardif_put(hardif);
+ dev_put(hard_iface);
+ batadv_hardif_put(primary_if);
+ dev_put(soft_iface);
return ret;
}
@@ -814,7 +769,7 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
* free after rcu grace period
* @ref: kref pointer of the orig_ifinfo
*/
-static void batadv_orig_ifinfo_release(struct kref *ref)
+void batadv_orig_ifinfo_release(struct kref *ref)
{
struct batadv_orig_ifinfo *orig_ifinfo;
struct batadv_neigh_node *router;
@@ -826,23 +781,12 @@ static void batadv_orig_ifinfo_release(struct kref *ref)
/* this is the last reference to this object */
router = rcu_dereference_protected(orig_ifinfo->router, true);
- if (router)
- batadv_neigh_node_put(router);
+ batadv_neigh_node_put(router);
kfree_rcu(orig_ifinfo, rcu);
}
/**
- * batadv_orig_ifinfo_put() - decrement the refcounter and possibly release
- * the orig_ifinfo
- * @orig_ifinfo: the orig_ifinfo object to release
- */
-void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo)
-{
- kref_put(&orig_ifinfo->refcount, batadv_orig_ifinfo_release);
-}
-
-/**
* batadv_orig_node_free_rcu() - free the orig_node
* @rcu: rcu pointer of the orig_node
*/
@@ -865,7 +809,7 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
* free after rcu grace period
* @ref: kref pointer of the orig_node
*/
-static void batadv_orig_node_release(struct kref *ref)
+void batadv_orig_node_release(struct kref *ref)
{
struct hlist_node *node_tmp;
struct batadv_neigh_node *neigh_node;
@@ -895,8 +839,7 @@ static void batadv_orig_node_release(struct kref *ref)
orig_node->last_bonding_candidate = NULL;
spin_unlock_bh(&orig_node->neigh_list_lock);
- if (last_candidate)
- batadv_orig_ifinfo_put(last_candidate);
+ batadv_orig_ifinfo_put(last_candidate);
spin_lock_bh(&orig_node->vlan_list_lock);
hlist_for_each_entry_safe(vlan, node_tmp, &orig_node->vlan_list, list) {
@@ -912,16 +855,6 @@ static void batadv_orig_node_release(struct kref *ref)
}
/**
- * batadv_orig_node_put() - decrement the orig node refcounter and possibly
- * release it
- * @orig_node: the orig node to free
- */
-void batadv_orig_node_put(struct batadv_orig_node *orig_node)
-{
- kref_put(&orig_node->refcount, batadv_orig_node_release);
-}
-
-/**
* batadv_originator_free() - Free all originator structures
* @bat_priv: the bat priv with all the soft interface information
*/
@@ -1213,8 +1146,7 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv,
if (!kref_get_unless_zero(&neigh->refcount))
continue;
- if (best)
- batadv_neigh_node_put(best);
+ batadv_neigh_node_put(best);
best = neigh;
}
@@ -1259,8 +1191,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
BATADV_IF_DEFAULT);
batadv_update_route(bat_priv, orig_node, BATADV_IF_DEFAULT,
best_neigh_node);
- if (best_neigh_node)
- batadv_neigh_node_put(best_neigh_node);
+ batadv_neigh_node_put(best_neigh_node);
/* ... then for all other interfaces. */
rcu_read_lock();
@@ -1279,8 +1210,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
hard_iface);
batadv_update_route(bat_priv, orig_node, hard_iface,
best_neigh_node);
- if (best_neigh_node)
- batadv_neigh_node_put(best_neigh_node);
+ batadv_neigh_node_put(best_neigh_node);
batadv_hardif_put(hard_iface);
}
@@ -1410,14 +1340,10 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
ret = msg->len;
out:
- if (hardif)
- batadv_hardif_put(hardif);
- if (hard_iface)
- dev_put(hard_iface);
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ batadv_hardif_put(hardif);
+ dev_put(hard_iface);
+ batadv_hardif_put(primary_if);
+ dev_put(soft_iface);
return ret;
}
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 805be87d55b8..ea3d69e4e670 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -12,6 +12,7 @@
#include <linux/compiler.h>
#include <linux/if_ether.h>
#include <linux/jhash.h>
+#include <linux/kref.h>
#include <linux/netlink.h>
#include <linux/skbuff.h>
#include <linux/types.h>
@@ -20,19 +21,18 @@ bool batadv_compare_orig(const struct hlist_node *node, const void *data2);
int batadv_originator_init(struct batadv_priv *bat_priv);
void batadv_originator_free(struct batadv_priv *bat_priv);
void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
-void batadv_orig_node_put(struct batadv_orig_node *orig_node);
+void batadv_orig_node_release(struct kref *ref);
struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
const u8 *addr);
struct batadv_hardif_neigh_node *
batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
const u8 *neigh_addr);
-void
-batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh);
+void batadv_hardif_neigh_release(struct kref *ref);
struct batadv_neigh_node *
batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
struct batadv_hard_iface *hard_iface,
const u8 *neigh_addr);
-void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node);
+void batadv_neigh_node_release(struct kref *ref);
struct batadv_neigh_node *
batadv_orig_router_get(struct batadv_orig_node *orig_node,
const struct batadv_hard_iface *if_outgoing);
@@ -42,7 +42,7 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
struct batadv_neigh_ifinfo *
batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
struct batadv_hard_iface *if_outgoing);
-void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo);
+void batadv_neigh_ifinfo_release(struct kref *ref);
int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb);
@@ -52,7 +52,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node,
struct batadv_orig_ifinfo *
batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
struct batadv_hard_iface *if_outgoing);
-void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo);
+void batadv_orig_ifinfo_release(struct kref *ref);
int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb);
struct batadv_orig_node_vlan *
@@ -61,7 +61,7 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
struct batadv_orig_node_vlan *
batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
unsigned short vid);
-void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan);
+void batadv_orig_node_vlan_release(struct kref *ref);
/**
* batadv_choose_orig() - Return the index of the orig entry in the hash table
@@ -82,4 +82,86 @@ static inline u32 batadv_choose_orig(const void *data, u32 size)
struct batadv_orig_node *
batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data);
+/**
+ * batadv_orig_node_vlan_put() - decrement the refcounter and possibly release
+ * the originator-vlan object
+ * @orig_vlan: the originator-vlan object to release
+ */
+static inline void
+batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan)
+{
+ if (!orig_vlan)
+ return;
+
+ kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release);
+}
+
+/**
+ * batadv_neigh_ifinfo_put() - decrement the refcounter and possibly release
+ * the neigh_ifinfo
+ * @neigh_ifinfo: the neigh_ifinfo object to release
+ */
+static inline void
+batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo)
+{
+ if (!neigh_ifinfo)
+ return;
+
+ kref_put(&neigh_ifinfo->refcount, batadv_neigh_ifinfo_release);
+}
+
+/**
+ * batadv_hardif_neigh_put() - decrement the hardif neighbors refcounter
+ * and possibly release it
+ * @hardif_neigh: hardif neigh neighbor to free
+ */
+static inline void
+batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh)
+{
+ if (!hardif_neigh)
+ return;
+
+ kref_put(&hardif_neigh->refcount, batadv_hardif_neigh_release);
+}
+
+/**
+ * batadv_neigh_node_put() - decrement the neighbors refcounter and possibly
+ * release it
+ * @neigh_node: neigh neighbor to free
+ */
+static inline void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
+{
+ if (!neigh_node)
+ return;
+
+ kref_put(&neigh_node->refcount, batadv_neigh_node_release);
+}
+
+/**
+ * batadv_orig_ifinfo_put() - decrement the refcounter and possibly release
+ * the orig_ifinfo
+ * @orig_ifinfo: the orig_ifinfo object to release
+ */
+static inline void
+batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo)
+{
+ if (!orig_ifinfo)
+ return;
+
+ kref_put(&orig_ifinfo->refcount, batadv_orig_ifinfo_release);
+}
+
+/**
+ * batadv_orig_node_put() - decrement the orig node refcounter and possibly
+ * release it
+ * @orig_node: the orig node to free
+ */
+static inline void batadv_orig_node_put(struct batadv_orig_node *orig_node)
+{
+ if (!orig_node)
+ return;
+
+ kref_put(&orig_node->refcount, batadv_orig_node_release);
+}
+
#endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index bb9e93e3d98c..970d0d7ccc98 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -101,8 +101,7 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
}
/* decrease refcount of previous best neighbor */
- if (curr_router)
- batadv_neigh_node_put(curr_router);
+ batadv_neigh_node_put(curr_router);
}
/**
@@ -128,8 +127,7 @@ void batadv_update_route(struct batadv_priv *bat_priv,
_batadv_update_route(bat_priv, orig_node, recv_if, neigh_node);
out:
- if (router)
- batadv_neigh_node_put(router);
+ batadv_neigh_node_put(router);
}
/**
@@ -269,10 +267,8 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
goto out;
}
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_hardif_put(primary_if);
+ batadv_orig_node_put(orig_node);
kfree_skb(skb);
@@ -324,10 +320,8 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
skb = NULL;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_hardif_put(primary_if);
+ batadv_orig_node_put(orig_node);
kfree_skb(skb);
@@ -425,8 +419,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
skb = NULL;
put_orig_node:
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
free_skb:
kfree_skb(skb);
@@ -513,8 +506,7 @@ batadv_last_bonding_replace(struct batadv_orig_node *orig_node,
orig_node->last_bonding_candidate = new_candidate;
spin_unlock_bh(&orig_node->neigh_list_lock);
- if (old_candidate)
- batadv_orig_ifinfo_put(old_candidate);
+ batadv_orig_ifinfo_put(old_candidate);
}
/**
@@ -656,8 +648,7 @@ next:
batadv_orig_ifinfo_put(next_candidate);
}
- if (last_candidate)
- batadv_orig_ifinfo_put(last_candidate);
+ batadv_orig_ifinfo_put(last_candidate);
return router;
}
@@ -785,10 +776,8 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
ret = true;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_hardif_put(primary_if);
+ batadv_orig_node_put(orig_node);
return ret;
}
@@ -1031,8 +1020,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
orig_node);
rx_success:
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
return NET_RX_SUCCESS;
}
@@ -1279,7 +1267,6 @@ free_skb:
kfree_skb(skb);
ret = NET_RX_DROP;
out:
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
return ret;
}
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 0b9dd29d3b6a..477d85a3b558 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -152,8 +152,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
if (hardif_neigh && ret != NET_XMIT_DROP)
hardif_neigh->bat_v.last_unicast_tx = jiffies;
- if (hardif_neigh)
- batadv_hardif_neigh_put(hardif_neigh);
+ batadv_hardif_neigh_put(hardif_neigh);
#endif
return ret;
@@ -309,8 +308,7 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv,
ret = true;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
return ret;
}
@@ -425,8 +423,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
ret = batadv_send_skb_unicast(bat_priv, skb, packet_type,
packet_subtype, orig_node, vid);
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
return ret;
}
@@ -452,8 +449,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
ret = batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR,
BATADV_P_DATA, orig_node, vid);
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
return ret;
}
@@ -474,10 +470,8 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet,
else
consume_skb(forw_packet->skb);
- if (forw_packet->if_incoming)
- batadv_hardif_put(forw_packet->if_incoming);
- if (forw_packet->if_outgoing)
- batadv_hardif_put(forw_packet->if_outgoing);
+ batadv_hardif_put(forw_packet->if_incoming);
+ batadv_hardif_put(forw_packet->if_outgoing);
if (forw_packet->queue_left)
atomic_inc(forw_packet->queue_left);
kfree(forw_packet);
@@ -748,6 +742,10 @@ void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv,
* Adds a broadcast packet to the queue and sets up timers. Broadcast packets
* are sent multiple times to increase probability for being received.
*
+ * This call clones the given skb, hence the caller needs to take into
+ * account that the data segment of the original skb might not be
+ * modifiable anymore.
+ *
* Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors.
*/
static int batadv_forw_bcast_packet_to_list(struct batadv_priv *bat_priv,
@@ -761,7 +759,7 @@ static int batadv_forw_bcast_packet_to_list(struct batadv_priv *bat_priv,
unsigned long send_time = jiffies;
struct sk_buff *newskb;
- newskb = skb_copy(skb, GFP_ATOMIC);
+ newskb = skb_clone(skb, GFP_ATOMIC);
if (!newskb)
goto err;
@@ -800,6 +798,10 @@ err:
* or if a delay is given after that. Furthermore, queues additional
* retransmissions if this interface is a wireless one.
*
+ * This call clones the given skb, hence the caller needs to take into
+ * account that the data segment of the original skb might not be
+ * modifiable anymore.
+ *
* Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors.
*/
static int batadv_forw_bcast_packet_if(struct batadv_priv *bat_priv,
@@ -814,7 +816,7 @@ static int batadv_forw_bcast_packet_if(struct batadv_priv *bat_priv,
int ret = NETDEV_TX_OK;
if (!delay) {
- newskb = skb_copy(skb, GFP_ATOMIC);
+ newskb = skb_clone(skb, GFP_ATOMIC);
if (!newskb)
return NETDEV_TX_BUSY;
@@ -867,8 +869,7 @@ static bool batadv_send_no_broadcast(struct batadv_priv *bat_priv,
ret = batadv_hardif_no_broadcast(if_out, bcast_packet->orig,
orig_neigh);
- if (neigh_node)
- batadv_hardif_neigh_put(neigh_node);
+ batadv_hardif_neigh_put(neigh_node);
/* ok, may broadcast */
if (!ret)
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index ae368a42a4ad..0604b0279573 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -383,10 +383,8 @@ dropped:
dropped_freed:
batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED);
end:
- if (mcast_single_orig)
- batadv_orig_node_put(mcast_single_orig);
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_orig_node_put(mcast_single_orig);
+ batadv_hardif_put(primary_if);
return NETDEV_TX_OK;
}
@@ -501,7 +499,7 @@ out:
* after rcu grace period
* @ref: kref pointer of the vlan object
*/
-static void batadv_softif_vlan_release(struct kref *ref)
+void batadv_softif_vlan_release(struct kref *ref)
{
struct batadv_softif_vlan *vlan;
@@ -515,19 +513,6 @@ static void batadv_softif_vlan_release(struct kref *ref)
}
/**
- * batadv_softif_vlan_put() - decrease the vlan object refcounter and
- * possibly release it
- * @vlan: the vlan object to release
- */
-void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan)
-{
- if (!vlan)
- return;
-
- kref_put(&vlan->refcount, batadv_softif_vlan_release);
-}
-
-/**
* batadv_softif_vlan_get() - get the vlan object for a specific vid
* @bat_priv: the bat priv with all the soft interface information
* @vid: the identifier of the vlan object to retrieve
@@ -851,8 +836,7 @@ static int batadv_softif_slave_add(struct net_device *dev,
ret = batadv_hardif_enable_interface(hard_iface, dev);
out:
- if (hard_iface)
- batadv_hardif_put(hard_iface);
+ batadv_hardif_put(hard_iface);
return ret;
}
@@ -878,8 +862,7 @@ static int batadv_softif_slave_del(struct net_device *dev,
ret = 0;
out:
- if (hard_iface)
- batadv_hardif_put(hard_iface);
+ batadv_hardif_put(hard_iface);
return ret;
}
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 67a2ddd6832f..9f2003f1a497 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -9,6 +9,7 @@
#include "main.h"
+#include <linux/kref.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/types.h>
@@ -21,8 +22,21 @@ void batadv_interface_rx(struct net_device *soft_iface,
bool batadv_softif_is_valid(const struct net_device *net_dev);
extern struct rtnl_link_ops batadv_link_ops;
int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid);
-void batadv_softif_vlan_put(struct batadv_softif_vlan *softif_vlan);
+void batadv_softif_vlan_release(struct kref *ref);
struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
unsigned short vid);
+/**
+ * batadv_softif_vlan_put() - decrease the vlan object refcounter and
+ * possibly release it
+ * @vlan: the vlan object to release
+ */
+static inline void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan)
+{
+ if (!vlan)
+ return;
+
+ kref_put(&vlan->refcount, batadv_softif_vlan_release);
+}
+
#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 789c851732b7..56b9fe97b3b4 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -358,6 +358,9 @@ static void batadv_tp_vars_release(struct kref *ref)
*/
static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars)
{
+ if (!tp_vars)
+ return;
+
kref_put(&tp_vars->refcount, batadv_tp_vars_release);
}
@@ -748,12 +751,9 @@ move_twnd:
wake_up(&tp_vars->more_bytes);
out:
- if (likely(primary_if))
- batadv_hardif_put(primary_if);
- if (likely(orig_node))
- batadv_orig_node_put(orig_node);
- if (likely(tp_vars))
- batadv_tp_vars_put(tp_vars);
+ batadv_hardif_put(primary_if);
+ batadv_orig_node_put(orig_node);
+ batadv_tp_vars_put(tp_vars);
}
/**
@@ -882,10 +882,8 @@ static int batadv_tp_send(void *arg)
}
out:
- if (likely(primary_if))
- batadv_hardif_put(primary_if);
- if (likely(orig_node))
- batadv_orig_node_put(orig_node);
+ batadv_hardif_put(primary_if);
+ batadv_orig_node_put(orig_node);
batadv_tp_sender_end(bat_priv, tp_vars);
batadv_tp_sender_cleanup(bat_priv, tp_vars);
@@ -1205,10 +1203,8 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
ret = 0;
out:
- if (likely(orig_node))
- batadv_orig_node_put(orig_node);
- if (likely(primary_if))
- batadv_hardif_put(primary_if);
+ batadv_orig_node_put(orig_node);
+ batadv_hardif_put(primary_if);
return ret;
}
@@ -1456,8 +1452,7 @@ send_ack:
batadv_tp_send_ack(bat_priv, icmp->orig, tp_vars->last_recv,
icmp->timestamp, icmp->session, icmp->uid);
out:
- if (likely(tp_vars))
- batadv_tp_vars_put(tp_vars);
+ batadv_tp_vars_put(tp_vars);
}
/**
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 434b4f042909..e0b3dace2020 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -247,6 +247,9 @@ static void batadv_tt_local_entry_release(struct kref *ref)
static void
batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry)
{
+ if (!tt_local_entry)
+ return;
+
kref_put(&tt_local_entry->common.refcount,
batadv_tt_local_entry_release);
}
@@ -270,7 +273,7 @@ static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
* queue for free after rcu grace period
* @ref: kref pointer of the nc_node
*/
-static void batadv_tt_global_entry_release(struct kref *ref)
+void batadv_tt_global_entry_release(struct kref *ref)
{
struct batadv_tt_global_entry *tt_global_entry;
@@ -283,17 +286,6 @@ static void batadv_tt_global_entry_release(struct kref *ref)
}
/**
- * batadv_tt_global_entry_put() - decrement the tt_global_entry refcounter and
- * possibly release it
- * @tt_global_entry: tt_global_entry to be free'd
- */
-void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
-{
- kref_put(&tt_global_entry->common.refcount,
- batadv_tt_global_entry_release);
-}
-
-/**
* batadv_tt_global_hash_count() - count the number of orig entries
* @bat_priv: the bat priv with all the soft interface information
* @addr: the mac address of the client to count entries for
@@ -452,6 +444,9 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref)
static void
batadv_tt_orig_list_entry_put(struct batadv_tt_orig_list_entry *orig_entry)
{
+ if (!orig_entry)
+ return;
+
kref_put(&orig_entry->refcount, batadv_tt_orig_list_entry_release);
}
@@ -818,14 +813,10 @@ check_roaming:
ret = true;
out:
- if (in_hardif)
- batadv_hardif_put(in_hardif);
- if (in_dev)
- dev_put(in_dev);
- if (tt_local)
- batadv_tt_local_entry_put(tt_local);
- if (tt_global)
- batadv_tt_global_entry_put(tt_global);
+ batadv_hardif_put(in_hardif);
+ dev_put(in_dev);
+ batadv_tt_local_entry_put(tt_local);
+ batadv_tt_global_entry_put(tt_global);
return ret;
}
@@ -1215,10 +1206,8 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb)
ret = msg->len;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ batadv_hardif_put(primary_if);
+ dev_put(soft_iface);
cb->args[0] = bucket;
cb->args[1] = idx;
@@ -1305,8 +1294,7 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
batadv_tt_local_entry_put(tt_removed_entry);
out:
- if (tt_local_entry)
- batadv_tt_local_entry_put(tt_local_entry);
+ batadv_tt_local_entry_put(tt_local_entry);
return curr_flags;
}
@@ -1576,8 +1564,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
sync_flags:
batadv_tt_global_sync_flags(tt_global);
out:
- if (orig_entry)
- batadv_tt_orig_list_entry_put(orig_entry);
+ batadv_tt_orig_list_entry_put(orig_entry);
spin_unlock_bh(&tt_global->list_lock);
}
@@ -1750,10 +1737,8 @@ out_remove:
tt_global_entry->common.flags &= ~BATADV_TT_CLIENT_ROAM;
out:
- if (tt_global_entry)
- batadv_tt_global_entry_put(tt_global_entry);
- if (tt_local_entry)
- batadv_tt_local_entry_put(tt_local_entry);
+ batadv_tt_global_entry_put(tt_global_entry);
+ batadv_tt_local_entry_put(tt_local_entry);
return ret;
}
@@ -1789,15 +1774,13 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
}
/* release the refcount for the "old" best */
- if (best_router)
- batadv_neigh_node_put(best_router);
+ batadv_neigh_node_put(best_router);
best_entry = orig_entry;
best_router = router;
}
- if (best_router)
- batadv_neigh_node_put(best_router);
+ batadv_neigh_node_put(best_router);
return best_entry;
}
@@ -2003,10 +1986,8 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb)
ret = msg->len;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ batadv_hardif_put(primary_if);
+ dev_put(soft_iface);
cb->args[0] = bucket;
cb->args[1] = idx;
@@ -2196,10 +2177,8 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
}
out:
- if (tt_global_entry)
- batadv_tt_global_entry_put(tt_global_entry);
- if (local_entry)
- batadv_tt_local_entry_put(local_entry);
+ batadv_tt_global_entry_put(tt_global_entry);
+ batadv_tt_local_entry_put(local_entry);
}
/**
@@ -2426,10 +2405,8 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
rcu_read_unlock();
out:
- if (tt_global_entry)
- batadv_tt_global_entry_put(tt_global_entry);
- if (tt_local_entry)
- batadv_tt_local_entry_put(tt_local_entry);
+ batadv_tt_global_entry_put(tt_global_entry);
+ batadv_tt_local_entry_put(tt_local_entry);
return orig_node;
}
@@ -2606,6 +2583,9 @@ static void batadv_tt_req_node_release(struct kref *ref)
*/
static void batadv_tt_req_node_put(struct batadv_tt_req_node *tt_req_node)
{
+ if (!tt_req_node)
+ return;
+
kref_put(&tt_req_node->refcount, batadv_tt_req_node_release);
}
@@ -2987,8 +2967,7 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv,
ret = true;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
if (ret && tt_req_node) {
spin_lock_bh(&bat_priv->tt.req_list_lock);
@@ -2999,8 +2978,7 @@ out:
spin_unlock_bh(&bat_priv->tt.req_list_lock);
}
- if (tt_req_node)
- batadv_tt_req_node_put(tt_req_node);
+ batadv_tt_req_node_put(tt_req_node);
kfree(tvlv_tt_data);
return ret;
@@ -3131,10 +3109,8 @@ unlock:
spin_unlock_bh(&req_dst_orig_node->tt_buff_lock);
out:
- if (res_dst_orig_node)
- batadv_orig_node_put(res_dst_orig_node);
- if (req_dst_orig_node)
- batadv_orig_node_put(req_dst_orig_node);
+ batadv_orig_node_put(res_dst_orig_node);
+ batadv_orig_node_put(req_dst_orig_node);
kfree(tvlv_tt_data);
return ret;
}
@@ -3248,10 +3224,8 @@ unlock:
spin_unlock_bh(&bat_priv->tt.last_changeset_lock);
out:
spin_unlock_bh(&bat_priv->tt.commit_lock);
- if (orig_node)
- batadv_orig_node_put(orig_node);
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_orig_node_put(orig_node);
+ batadv_hardif_put(primary_if);
kfree(tvlv_tt_data);
/* The packet was for this host, so it doesn't need to be re-routed */
return true;
@@ -3336,8 +3310,7 @@ static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
atomic_set(&orig_node->last_ttvn, ttvn);
out:
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
}
static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
@@ -3378,8 +3351,7 @@ bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr,
goto out;
ret = true;
out:
- if (tt_local_entry)
- batadv_tt_local_entry_put(tt_local_entry);
+ batadv_tt_local_entry_put(tt_local_entry);
return ret;
}
@@ -3442,8 +3414,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
spin_unlock_bh(&bat_priv->tt.req_list_lock);
out:
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
}
static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv)
@@ -3574,8 +3545,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client,
&tvlv_roam, sizeof(tvlv_roam));
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
}
static void batadv_tt_purge(struct work_struct *work)
@@ -4170,8 +4140,7 @@ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
atomic_read(&orig_node->last_ttvn) + 1);
out:
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
return NET_RX_SUCCESS;
}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index e1285904f885..d18740d9a22b 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -9,6 +9,7 @@
#include "main.h"
+#include <linux/kref.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/skbuff.h>
@@ -28,7 +29,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
struct batadv_tt_global_entry *
batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
unsigned short vid);
-void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry);
+void batadv_tt_global_entry_release(struct kref *ref);
int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
const u8 *addr, unsigned short vid);
struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
@@ -55,4 +56,19 @@ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
int batadv_tt_cache_init(void);
void batadv_tt_cache_destroy(void);
+/**
+ * batadv_tt_global_entry_put() - decrement the tt_global_entry refcounter and
+ * possibly release it
+ * @tt_global_entry: tt_global_entry to be free'd
+ */
+static inline void
+batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
+{
+ if (!tt_global_entry)
+ return;
+
+ kref_put(&tt_global_entry->common.refcount,
+ batadv_tt_global_entry_release);
+}
+
#endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 253f5a33a914..992773376e51 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -50,6 +50,9 @@ static void batadv_tvlv_handler_release(struct kref *ref)
*/
static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
{
+ if (!tvlv_handler)
+ return;
+
kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release);
}
@@ -106,6 +109,9 @@ static void batadv_tvlv_container_release(struct kref *ref)
*/
static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
{
+ if (!tvlv)
+ return;
+
kref_put(&tvlv->refcount, batadv_tvlv_container_release);
}
@@ -438,8 +444,7 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
ogm_source, orig_node,
src, dst, tvlv_value,
tvlv_value_cont_len);
- if (tvlv_handler)
- batadv_tvlv_handler_put(tvlv_handler);
+ batadv_tvlv_handler_put(tvlv_handler);
tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
tvlv_value_len -= tvlv_value_cont_len;
}
diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h
index c32638dddbf9..f6b9dc4e408f 100644
--- a/net/bluetooth/cmtp/cmtp.h
+++ b/net/bluetooth/cmtp/cmtp.h
@@ -26,7 +26,7 @@
#include <linux/types.h>
#include <net/bluetooth/bluetooth.h>
-#define BTNAMSIZ 18
+#define BTNAMSIZ 21
/* CMTP ioctl defines */
#define CMTPCONNADD _IOW('C', 200, int)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index e1a545c8a69f..8a47a3017d61 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1343,6 +1343,12 @@ int hci_inquiry(void __user *arg)
goto done;
}
+ /* Restrict maximum inquiry length to 60 seconds */
+ if (ir.length > 60) {
+ err = -EINVAL;
+ goto done;
+ }
+
hci_dev_lock(hdev);
if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX ||
inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) {
@@ -1718,6 +1724,7 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev)
int hci_dev_do_close(struct hci_dev *hdev)
{
bool auto_off;
+ int err = 0;
BT_DBG("%s %p", hdev->name, hdev);
@@ -1727,10 +1734,18 @@ int hci_dev_do_close(struct hci_dev *hdev)
hci_request_cancel_all(hdev);
hci_req_sync_lock(hdev);
+ if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
+ !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
+ test_bit(HCI_UP, &hdev->flags)) {
+ /* Execute vendor specific shutdown routine */
+ if (hdev->shutdown)
+ err = hdev->shutdown(hdev);
+ }
+
if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
cancel_delayed_work_sync(&hdev->cmd_timer);
hci_req_sync_unlock(hdev);
- return 0;
+ return err;
}
hci_leds_update_powered(hdev, false);
@@ -1798,14 +1813,6 @@ int hci_dev_do_close(struct hci_dev *hdev)
clear_bit(HCI_INIT, &hdev->flags);
}
- if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
- !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
- test_bit(HCI_UP, &hdev->flags)) {
- /* Execute vendor specific shutdown routine */
- if (hdev->shutdown)
- hdev->shutdown(hdev);
- }
-
/* flush cmd work */
flush_work(&hdev->cmd_work);
@@ -1845,7 +1852,7 @@ int hci_dev_do_close(struct hci_dev *hdev)
hci_req_sync_unlock(hdev);
hci_dev_put(hdev);
- return 0;
+ return err;
}
int hci_dev_close(__u16 dev)
@@ -3751,11 +3758,18 @@ done:
}
/* Alloc HCI device */
-struct hci_dev *hci_alloc_dev(void)
+struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
{
struct hci_dev *hdev;
+ unsigned int alloc_size;
+
+ alloc_size = sizeof(*hdev);
+ if (sizeof_priv) {
+ /* Fixme: May need ALIGN-ment? */
+ alloc_size += sizeof_priv;
+ }
- hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
+ hdev = kzalloc(alloc_size, GFP_KERNEL);
if (!hdev)
return NULL;
@@ -3869,7 +3883,7 @@ struct hci_dev *hci_alloc_dev(void)
return hdev;
}
-EXPORT_SYMBOL(hci_alloc_dev);
+EXPORT_SYMBOL(hci_alloc_dev_priv);
/* Free HCI device */
void hci_free_dev(struct hci_dev *hdev)
@@ -4034,13 +4048,13 @@ void hci_unregister_dev(struct hci_dev *hdev)
}
device_del(&hdev->dev);
- /* Actual cleanup is deferred until hci_cleanup_dev(). */
+ /* Actual cleanup is deferred until hci_release_dev(). */
hci_dev_put(hdev);
}
EXPORT_SYMBOL(hci_unregister_dev);
-/* Cleanup HCI device */
-void hci_cleanup_dev(struct hci_dev *hdev)
+/* Release HCI device */
+void hci_release_dev(struct hci_dev *hdev)
{
debugfs_remove_recursive(hdev->debugfs);
kfree_const(hdev->hw_info);
@@ -4067,7 +4081,9 @@ void hci_cleanup_dev(struct hci_dev *hdev)
hci_dev_unlock(hdev);
ida_simple_remove(&hci_index_ida, hdev->id);
+ kfree(hdev);
}
+EXPORT_SYMBOL(hci_release_dev);
/* Suspend HCI device */
int hci_suspend_dev(struct hci_dev *hdev)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 1c3018202564..0bca035bf2dc 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -40,6 +40,8 @@
#define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \
"\x00\x00\x00\x00\x00\x00\x00\x00"
+#define secs_to_jiffies(_secs) msecs_to_jiffies((_secs) * 1000)
+
/* Handle HCI Event packets */
static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb,
@@ -1171,6 +1173,12 @@ static void hci_cc_le_set_random_addr(struct hci_dev *hdev, struct sk_buff *skb)
bacpy(&hdev->random_addr, sent);
+ if (!bacmp(&hdev->rpa, sent)) {
+ hci_dev_clear_flag(hdev, HCI_RPA_EXPIRED);
+ queue_delayed_work(hdev->workqueue, &hdev->rpa_expired,
+ secs_to_jiffies(hdev->rpa_timeout));
+ }
+
hci_dev_unlock(hdev);
}
@@ -1201,24 +1209,30 @@ static void hci_cc_le_set_adv_set_random_addr(struct hci_dev *hdev,
{
__u8 status = *((__u8 *) skb->data);
struct hci_cp_le_set_adv_set_rand_addr *cp;
- struct adv_info *adv_instance;
+ struct adv_info *adv;
if (status)
return;
cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADV_SET_RAND_ADDR);
- if (!cp)
+ /* Update only in case the adv instance since handle 0x00 shall be using
+ * HCI_OP_LE_SET_RANDOM_ADDR since that allows both extended and
+ * non-extended adverting.
+ */
+ if (!cp || !cp->handle)
return;
hci_dev_lock(hdev);
- if (!cp->handle) {
- /* Store in hdev for instance 0 (Set adv and Directed advs) */
- bacpy(&hdev->random_addr, &cp->bdaddr);
- } else {
- adv_instance = hci_find_adv_instance(hdev, cp->handle);
- if (adv_instance)
- bacpy(&adv_instance->random_addr, &cp->bdaddr);
+ adv = hci_find_adv_instance(hdev, cp->handle);
+ if (adv) {
+ bacpy(&adv->random_addr, &cp->bdaddr);
+ if (!bacmp(&hdev->rpa, &cp->bdaddr)) {
+ adv->rpa_expired = false;
+ queue_delayed_work(hdev->workqueue,
+ &adv->rpa_expired_cb,
+ secs_to_jiffies(hdev->rpa_timeout));
+ }
}
hci_dev_unlock(hdev);
@@ -1277,7 +1291,9 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev,
struct sk_buff *skb)
{
struct hci_cp_le_set_ext_adv_enable *cp;
+ struct hci_cp_ext_adv_set *set;
__u8 status = *((__u8 *) skb->data);
+ struct adv_info *adv = NULL, *n;
BT_DBG("%s status 0x%2.2x", hdev->name, status);
@@ -1288,22 +1304,48 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev,
if (!cp)
return;
+ set = (void *)cp->data;
+
hci_dev_lock(hdev);
+ if (cp->num_of_sets)
+ adv = hci_find_adv_instance(hdev, set->handle);
+
if (cp->enable) {
struct hci_conn *conn;
hci_dev_set_flag(hdev, HCI_LE_ADV);
+ if (adv)
+ adv->enabled = true;
+
conn = hci_lookup_le_connect(hdev);
if (conn)
queue_delayed_work(hdev->workqueue,
&conn->le_conn_timeout,
conn->conn_timeout);
} else {
+ if (adv) {
+ adv->enabled = false;
+ /* If just one instance was disabled check if there are
+ * any other instance enabled before clearing HCI_LE_ADV
+ */
+ list_for_each_entry_safe(adv, n, &hdev->adv_instances,
+ list) {
+ if (adv->enabled)
+ goto unlock;
+ }
+ } else {
+ /* All instances shall be considered disabled */
+ list_for_each_entry_safe(adv, n, &hdev->adv_instances,
+ list)
+ adv->enabled = false;
+ }
+
hci_dev_clear_flag(hdev, HCI_LE_ADV);
}
+unlock:
hci_dev_unlock(hdev);
}
@@ -2306,19 +2348,20 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
if (conn) {
- u8 type = conn->type;
-
mgmt_disconnect_failed(hdev, &conn->dst, conn->type,
conn->dst_type, status);
+ if (conn->type == LE_LINK) {
+ hdev->cur_adv_instance = conn->adv_instance;
+ hci_req_reenable_advertising(hdev);
+ }
+
/* If the disconnection failed for any reason, the upper layer
* does not retry to disconnect in current implementation.
* Hence, we need to do some basic cleanup here and re-enable
* advertising if necessary.
*/
hci_conn_del(conn);
- if (type == LE_LINK)
- hci_req_reenable_advertising(hdev);
}
hci_dev_unlock(hdev);
@@ -2844,7 +2887,6 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
struct hci_conn_params *params;
struct hci_conn *conn;
bool mgmt_connected;
- u8 type;
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
@@ -2899,10 +2941,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
}
}
- type = conn->type;
-
hci_disconn_cfm(conn, ev->reason);
- hci_conn_del(conn);
/* The suspend notifier is waiting for all devices to disconnect so
* clear the bit from pending tasks and inform the wait queue.
@@ -2922,8 +2961,12 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
* or until a connection is created or until the Advertising
* is timed out due to Directed Advertising."
*/
- if (type == LE_LINK)
+ if (conn->type == LE_LINK) {
+ hdev->cur_adv_instance = conn->adv_instance;
hci_req_reenable_advertising(hdev);
+ }
+
+ hci_conn_del(conn);
unlock:
hci_dev_unlock(hdev);
@@ -3268,11 +3311,9 @@ unlock:
hci_dev_unlock(hdev);
}
-static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev,
- u16 opcode, u8 ncmd)
+static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev, u8 ncmd)
{
- if (opcode != HCI_OP_NOP)
- cancel_delayed_work(&hdev->cmd_timer);
+ cancel_delayed_work(&hdev->cmd_timer);
if (!test_bit(HCI_RESET, &hdev->flags)) {
if (ncmd) {
@@ -3647,7 +3688,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
break;
}
- handle_cmd_cnt_and_timer(hdev, *opcode, ev->ncmd);
+ handle_cmd_cnt_and_timer(hdev, ev->ncmd);
hci_req_cmd_complete(hdev, *opcode, *status, req_complete,
req_complete_skb);
@@ -3748,7 +3789,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
break;
}
- handle_cmd_cnt_and_timer(hdev, *opcode, ev->ncmd);
+ handle_cmd_cnt_and_timer(hdev, ev->ncmd);
/* Indicate request completion if the command failed. Also, if
* we're not waiting for a special event and we get a success
@@ -4382,6 +4423,21 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
switch (ev->status) {
case 0x00:
+ /* The synchronous connection complete event should only be
+ * sent once per new connection. Receiving a successful
+ * complete event when the connection status is already
+ * BT_CONNECTED means that the device is misbehaving and sent
+ * multiple complete event packets for the same new connection.
+ *
+ * Registering the device more than once can corrupt kernel
+ * memory, hence upon detecting this invalid event, we report
+ * an error and ignore the packet.
+ */
+ if (conn->state == BT_CONNECTED) {
+ bt_dev_err(hdev, "Ignoring connect complete event for existing connection");
+ goto unlock;
+ }
+
conn->handle = __le16_to_cpu(ev->handle);
conn->state = BT_CONNECTED;
conn->type = ev->link_type;
@@ -5104,9 +5160,64 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev,
}
#endif
+static void le_conn_update_addr(struct hci_conn *conn, bdaddr_t *bdaddr,
+ u8 bdaddr_type, bdaddr_t *local_rpa)
+{
+ if (conn->out) {
+ conn->dst_type = bdaddr_type;
+ conn->resp_addr_type = bdaddr_type;
+ bacpy(&conn->resp_addr, bdaddr);
+
+ /* Check if the controller has set a Local RPA then it must be
+ * used instead or hdev->rpa.
+ */
+ if (local_rpa && bacmp(local_rpa, BDADDR_ANY)) {
+ conn->init_addr_type = ADDR_LE_DEV_RANDOM;
+ bacpy(&conn->init_addr, local_rpa);
+ } else if (hci_dev_test_flag(conn->hdev, HCI_PRIVACY)) {
+ conn->init_addr_type = ADDR_LE_DEV_RANDOM;
+ bacpy(&conn->init_addr, &conn->hdev->rpa);
+ } else {
+ hci_copy_identity_address(conn->hdev, &conn->init_addr,
+ &conn->init_addr_type);
+ }
+ } else {
+ conn->resp_addr_type = conn->hdev->adv_addr_type;
+ /* Check if the controller has set a Local RPA then it must be
+ * used instead or hdev->rpa.
+ */
+ if (local_rpa && bacmp(local_rpa, BDADDR_ANY)) {
+ conn->resp_addr_type = ADDR_LE_DEV_RANDOM;
+ bacpy(&conn->resp_addr, local_rpa);
+ } else if (conn->hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) {
+ /* In case of ext adv, resp_addr will be updated in
+ * Adv Terminated event.
+ */
+ if (!ext_adv_capable(conn->hdev))
+ bacpy(&conn->resp_addr,
+ &conn->hdev->random_addr);
+ } else {
+ bacpy(&conn->resp_addr, &conn->hdev->bdaddr);
+ }
+
+ conn->init_addr_type = bdaddr_type;
+ bacpy(&conn->init_addr, bdaddr);
+
+ /* For incoming connections, set the default minimum
+ * and maximum connection interval. They will be used
+ * to check if the parameters are in range and if not
+ * trigger the connection update procedure.
+ */
+ conn->le_conn_min_interval = conn->hdev->le_conn_min_interval;
+ conn->le_conn_max_interval = conn->hdev->le_conn_max_interval;
+ }
+}
+
static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
- bdaddr_t *bdaddr, u8 bdaddr_type, u8 role, u16 handle,
- u16 interval, u16 latency, u16 supervision_timeout)
+ bdaddr_t *bdaddr, u8 bdaddr_type,
+ bdaddr_t *local_rpa, u8 role, u16 handle,
+ u16 interval, u16 latency,
+ u16 supervision_timeout)
{
struct hci_conn_params *params;
struct hci_conn *conn;
@@ -5154,32 +5265,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
cancel_delayed_work(&conn->le_conn_timeout);
}
- if (!conn->out) {
- /* Set the responder (our side) address type based on
- * the advertising address type.
- */
- conn->resp_addr_type = hdev->adv_addr_type;
- if (hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) {
- /* In case of ext adv, resp_addr will be updated in
- * Adv Terminated event.
- */
- if (!ext_adv_capable(hdev))
- bacpy(&conn->resp_addr, &hdev->random_addr);
- } else {
- bacpy(&conn->resp_addr, &hdev->bdaddr);
- }
-
- conn->init_addr_type = bdaddr_type;
- bacpy(&conn->init_addr, bdaddr);
-
- /* For incoming connections, set the default minimum
- * and maximum connection interval. They will be used
- * to check if the parameters are in range and if not
- * trigger the connection update procedure.
- */
- conn->le_conn_min_interval = hdev->le_conn_min_interval;
- conn->le_conn_max_interval = hdev->le_conn_max_interval;
- }
+ le_conn_update_addr(conn, bdaddr, bdaddr_type, local_rpa);
/* Lookup the identity address from the stored connection
* address and address type.
@@ -5236,6 +5322,13 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
conn->handle = handle;
conn->state = BT_CONFIG;
+ /* Store current advertising instance as connection advertising instance
+ * when sotfware rotation is in use so it can be re-enabled when
+ * disconnected.
+ */
+ if (!ext_adv_capable(hdev))
+ conn->adv_instance = hdev->cur_adv_instance;
+
conn->le_conn_interval = interval;
conn->le_conn_latency = latency;
conn->le_supv_timeout = supervision_timeout;
@@ -5290,7 +5383,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type,
- ev->role, le16_to_cpu(ev->handle),
+ NULL, ev->role, le16_to_cpu(ev->handle),
le16_to_cpu(ev->interval),
le16_to_cpu(ev->latency),
le16_to_cpu(ev->supervision_timeout));
@@ -5304,7 +5397,7 @@ static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type,
- ev->role, le16_to_cpu(ev->handle),
+ &ev->local_rpa, ev->role, le16_to_cpu(ev->handle),
le16_to_cpu(ev->interval),
le16_to_cpu(ev->latency),
le16_to_cpu(ev->supervision_timeout));
@@ -5319,13 +5412,13 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_evt_le_ext_adv_set_term *ev = (void *) skb->data;
struct hci_conn *conn;
+ struct adv_info *adv;
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
- if (ev->status) {
- struct adv_info *adv;
+ adv = hci_find_adv_instance(hdev, ev->handle);
- adv = hci_find_adv_instance(hdev, ev->handle);
+ if (ev->status) {
if (!adv)
return;
@@ -5336,11 +5429,18 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb)
return;
}
+ if (adv)
+ adv->enabled = false;
+
conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->conn_handle));
if (conn) {
- struct adv_info *adv_instance;
+ /* Store handle in the connection so the correct advertising
+ * instance can be re-enabled when disconnected.
+ */
+ conn->adv_instance = ev->handle;
- if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM)
+ if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM ||
+ bacmp(&conn->resp_addr, BDADDR_ANY))
return;
if (!ev->handle) {
@@ -5348,9 +5448,8 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb)
return;
}
- adv_instance = hci_find_adv_instance(hdev, ev->handle);
- if (adv_instance)
- bacpy(&conn->resp_addr, &adv_instance->random_addr);
+ if (adv)
+ bacpy(&conn->resp_addr, &adv->random_addr);
}
}
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 1d14adc023e9..f15626607b2d 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -2072,8 +2072,6 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
* current RPA has expired then generate a new one.
*/
if (use_rpa) {
- int to;
-
/* If Controller supports LL Privacy use own address type is
* 0x03
*/
@@ -2084,14 +2082,10 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
*own_addr_type = ADDR_LE_DEV_RANDOM;
if (adv_instance) {
- if (!adv_instance->rpa_expired &&
- !bacmp(&adv_instance->random_addr, &hdev->rpa))
+ if (adv_rpa_valid(adv_instance))
return 0;
-
- adv_instance->rpa_expired = false;
} else {
- if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) &&
- !bacmp(&hdev->random_addr, &hdev->rpa))
+ if (rpa_valid(hdev))
return 0;
}
@@ -2103,14 +2097,6 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
bacpy(rand_addr, &hdev->rpa);
- to = msecs_to_jiffies(hdev->rpa_timeout * 1000);
- if (adv_instance)
- queue_delayed_work(hdev->workqueue,
- &adv_instance->rpa_expired_cb, to);
- else
- queue_delayed_work(hdev->workqueue,
- &hdev->rpa_expired, to);
-
return 0;
}
@@ -2153,6 +2139,30 @@ void __hci_req_clear_ext_adv_sets(struct hci_request *req)
hci_req_add(req, HCI_OP_LE_CLEAR_ADV_SETS, 0, NULL);
}
+static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
+{
+ struct hci_dev *hdev = req->hdev;
+
+ /* If we're advertising or initiating an LE connection we can't
+ * go ahead and change the random address at this time. This is
+ * because the eventual initiator address used for the
+ * subsequently created connection will be undefined (some
+ * controllers use the new address and others the one we had
+ * when the operation started).
+ *
+ * In this kind of scenario skip the update and let the random
+ * address be updated at the next cycle.
+ */
+ if (hci_dev_test_flag(hdev, HCI_LE_ADV) ||
+ hci_lookup_le_connect(hdev)) {
+ bt_dev_dbg(hdev, "Deferring random address update");
+ hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
+ return;
+ }
+
+ hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa);
+}
+
int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
{
struct hci_cp_le_set_ext_adv_params cp;
@@ -2255,6 +2265,13 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
} else {
if (!bacmp(&random_addr, &hdev->random_addr))
return 0;
+ /* Instance 0x00 doesn't have an adv_info, instead it
+ * uses hdev->random_addr to track its address so
+ * whenever it needs to be updated this also set the
+ * random address since hdev->random_addr is shared with
+ * scan state machine.
+ */
+ set_random_addr(req, &random_addr);
}
memset(&cp, 0, sizeof(cp));
@@ -2512,30 +2529,6 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk,
false);
}
-static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
-{
- struct hci_dev *hdev = req->hdev;
-
- /* If we're advertising or initiating an LE connection we can't
- * go ahead and change the random address at this time. This is
- * because the eventual initiator address used for the
- * subsequently created connection will be undefined (some
- * controllers use the new address and others the one we had
- * when the operation started).
- *
- * In this kind of scenario skip the update and let the random
- * address be updated at the next cycle.
- */
- if (hci_dev_test_flag(hdev, HCI_LE_ADV) ||
- hci_lookup_le_connect(hdev)) {
- bt_dev_dbg(hdev, "Deferring random address update");
- hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
- return;
- }
-
- hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa);
-}
-
int hci_update_random_address(struct hci_request *req, bool require_privacy,
bool use_rpa, u8 *own_addr_type)
{
@@ -2547,8 +2540,6 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
* the current RPA in use, then generate a new one.
*/
if (use_rpa) {
- int to;
-
/* If Controller supports LL Privacy use own address type is
* 0x03
*/
@@ -2558,8 +2549,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
else
*own_addr_type = ADDR_LE_DEV_RANDOM;
- if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) &&
- !bacmp(&hdev->random_addr, &hdev->rpa))
+ if (rpa_valid(hdev))
return 0;
err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
@@ -2570,9 +2560,6 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
set_random_addr(req, &hdev->rpa);
- to = msecs_to_jiffies(hdev->rpa_timeout * 1000);
- queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to);
-
return 0;
}
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index b69d88b88d2e..7827639ecf5c 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -85,8 +85,7 @@ static void bt_host_release(struct device *dev)
struct hci_dev *hdev = to_hci_dev(dev);
if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
- hci_cleanup_dev(hdev);
- kfree(hdev);
+ hci_release_dev(hdev);
module_put(THIS_MODULE);
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 3663f880df11..cea01e275f1e 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -7204,7 +7204,7 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, u8 status,
if (!mgmt_rp)
goto done;
- if (status)
+ if (eir_len == 0)
goto send_rsp;
eir_len = eir_append_data(mgmt_rp->eir, 0, EIR_CLASS_OF_DEV,
@@ -7725,7 +7725,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
* advertising.
*/
if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
- return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING,
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
MGMT_STATUS_NOT_SUPPORTED);
if (cp->instance < 1 || cp->instance > hdev->le_num_of_adv_sets)
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index ae6f80730561..2c95bb58f901 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -70,7 +70,7 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
BT_DBG("dlc %p state %ld err %d", d, d->state, err);
- spin_lock_bh(&sk->sk_lock.slock);
+ lock_sock(sk);
if (err)
sk->sk_err = err;
@@ -91,7 +91,7 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
sk->sk_state_change(sk);
}
- spin_unlock_bh(&sk->sk_lock.slock);
+ release_sock(sk);
if (parent && sock_flag(sk, SOCK_ZAPPED)) {
/* We have to drop DLC lock here, otherwise
@@ -974,7 +974,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
if (!parent)
return 0;
- bh_lock_sock(parent);
+ lock_sock(parent);
/* Check for backlog size */
if (sk_acceptq_is_full(parent)) {
@@ -1001,7 +1001,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
result = 1;
done:
- bh_unlock_sock(parent);
+ release_sock(parent);
if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
parent->sk_state_change(parent);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index d9a4e88dacbb..98a881586512 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -48,6 +48,8 @@ struct sco_conn {
spinlock_t lock;
struct sock *sk;
+ struct delayed_work timeout_work;
+
unsigned int mtu;
};
@@ -74,31 +76,47 @@ struct sco_pinfo {
#define SCO_CONN_TIMEOUT (HZ * 40)
#define SCO_DISCONN_TIMEOUT (HZ * 2)
-static void sco_sock_timeout(struct timer_list *t)
+static void sco_sock_timeout(struct work_struct *work)
{
- struct sock *sk = from_timer(sk, t, sk_timer);
+ struct sco_conn *conn = container_of(work, struct sco_conn,
+ timeout_work.work);
+ struct sock *sk;
+
+ sco_conn_lock(conn);
+ sk = conn->sk;
+ if (sk)
+ sock_hold(sk);
+ sco_conn_unlock(conn);
+
+ if (!sk)
+ return;
BT_DBG("sock %p state %d", sk, sk->sk_state);
- bh_lock_sock(sk);
+ lock_sock(sk);
sk->sk_err = ETIMEDOUT;
sk->sk_state_change(sk);
- bh_unlock_sock(sk);
-
- sco_sock_kill(sk);
+ release_sock(sk);
sock_put(sk);
}
static void sco_sock_set_timer(struct sock *sk, long timeout)
{
+ if (!sco_pi(sk)->conn)
+ return;
+
BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout);
- sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout);
+ cancel_delayed_work(&sco_pi(sk)->conn->timeout_work);
+ schedule_delayed_work(&sco_pi(sk)->conn->timeout_work, timeout);
}
static void sco_sock_clear_timer(struct sock *sk)
{
+ if (!sco_pi(sk)->conn)
+ return;
+
BT_DBG("sock %p state %d", sk, sk->sk_state);
- sk_stop_timer(sk, &sk->sk_timer);
+ cancel_delayed_work(&sco_pi(sk)->conn->timeout_work);
}
/* ---- SCO connections ---- */
@@ -173,12 +191,14 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
if (sk) {
sock_hold(sk);
- bh_lock_sock(sk);
+ lock_sock(sk);
sco_sock_clear_timer(sk);
sco_chan_del(sk, err);
- bh_unlock_sock(sk);
- sco_sock_kill(sk);
+ release_sock(sk);
sock_put(sk);
+
+ /* Ensure no more work items will run before freeing conn. */
+ cancel_delayed_work_sync(&conn->timeout_work);
}
hcon->sco_data = NULL;
@@ -193,6 +213,8 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk,
sco_pi(sk)->conn = conn;
conn->sk = sk;
+ INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout);
+
if (parent)
bt_accept_enqueue(parent, sk, true);
}
@@ -212,44 +234,32 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
return err;
}
-static int sco_connect(struct sock *sk)
+static int sco_connect(struct hci_dev *hdev, struct sock *sk)
{
struct sco_conn *conn;
struct hci_conn *hcon;
- struct hci_dev *hdev;
int err, type;
BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst);
- hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR);
- if (!hdev)
- return -EHOSTUNREACH;
-
- hci_dev_lock(hdev);
-
if (lmp_esco_capable(hdev) && !disable_esco)
type = ESCO_LINK;
else
type = SCO_LINK;
if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT &&
- (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) {
- err = -EOPNOTSUPP;
- goto done;
- }
+ (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev)))
+ return -EOPNOTSUPP;
hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst,
sco_pi(sk)->setting);
- if (IS_ERR(hcon)) {
- err = PTR_ERR(hcon);
- goto done;
- }
+ if (IS_ERR(hcon))
+ return PTR_ERR(hcon);
conn = sco_conn_add(hcon);
if (!conn) {
hci_conn_drop(hcon);
- err = -ENOMEM;
- goto done;
+ return -ENOMEM;
}
/* Update source addr of the socket */
@@ -257,7 +267,7 @@ static int sco_connect(struct sock *sk)
err = sco_chan_add(conn, sk, NULL);
if (err)
- goto done;
+ return err;
if (hcon->state == BT_CONNECTED) {
sco_sock_clear_timer(sk);
@@ -267,9 +277,6 @@ static int sco_connect(struct sock *sk)
sco_sock_set_timer(sk, sk->sk_sndtimeo);
}
-done:
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
return err;
}
@@ -394,8 +401,7 @@ static void sco_sock_cleanup_listen(struct sock *parent)
*/
static void sco_sock_kill(struct sock *sk)
{
- if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket ||
- sock_flag(sk, SOCK_DEAD))
+ if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
return;
BT_DBG("sk %p state %d", sk, sk->sk_state);
@@ -443,11 +449,10 @@ static void __sco_sock_close(struct sock *sk)
/* Must be called on unlocked socket. */
static void sco_sock_close(struct sock *sk)
{
- sco_sock_clear_timer(sk);
lock_sock(sk);
+ sco_sock_clear_timer(sk);
__sco_sock_close(sk);
release_sock(sk);
- sco_sock_kill(sk);
}
static void sco_skb_put_cmsg(struct sk_buff *skb, struct msghdr *msg,
@@ -500,8 +505,6 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock,
sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT;
- timer_setup(&sk->sk_timer, sco_sock_timeout, 0);
-
bt_sock_link(&sco_sk_list, sk);
return sk;
}
@@ -566,6 +569,7 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
{
struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
struct sock *sk = sock->sk;
+ struct hci_dev *hdev;
int err;
BT_DBG("sk %p", sk);
@@ -580,12 +584,19 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
if (sk->sk_type != SOCK_SEQPACKET)
return -EINVAL;
+ hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR);
+ if (!hdev)
+ return -EHOSTUNREACH;
+ hci_dev_lock(hdev);
+
lock_sock(sk);
/* Set destination address and psm */
bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr);
- err = sco_connect(sk);
+ err = sco_connect(hdev, sk);
+ hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
if (err)
goto done;
@@ -773,6 +784,11 @@ static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting)
cp.max_latency = cpu_to_le16(0xffff);
cp.retrans_effort = 0xff;
break;
+ default:
+ /* use CVSD settings as fallback */
+ cp.max_latency = cpu_to_le16(0xffff);
+ cp.retrans_effort = 0xff;
+ break;
}
hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ,
@@ -1083,11 +1099,11 @@ static void sco_conn_ready(struct sco_conn *conn)
BT_DBG("conn %p", conn);
if (sk) {
+ lock_sock(sk);
sco_sock_clear_timer(sk);
- bh_lock_sock(sk);
sk->sk_state = BT_CONNECTED;
sk->sk_state_change(sk);
- bh_unlock_sock(sk);
+ release_sock(sk);
} else {
sco_conn_lock(conn);
@@ -1102,12 +1118,12 @@ static void sco_conn_ready(struct sco_conn *conn)
return;
}
- bh_lock_sock(parent);
+ lock_sock(parent);
sk = sco_sock_alloc(sock_net(parent), NULL,
BTPROTO_SCO, GFP_ATOMIC, 0);
if (!sk) {
- bh_unlock_sock(parent);
+ release_sock(parent);
sco_conn_unlock(conn);
return;
}
@@ -1128,7 +1144,7 @@ static void sco_conn_ready(struct sco_conn *conn)
/* Wake up parent */
parent->sk_data_ready(parent);
- bh_unlock_sock(parent);
+ release_sock(parent);
sco_conn_unlock(conn);
}
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 1cc75c811e24..2eb0e55ef54d 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -7,6 +7,7 @@
#include <linux/vmalloc.h>
#include <linux/etherdevice.h>
#include <linux/filter.h>
+#include <linux/rcupdate_trace.h>
#include <linux/sched/signal.h>
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
@@ -15,6 +16,7 @@
#include <linux/error-injection.h>
#include <linux/smp.h>
#include <linux/sock_diag.h>
+#include <net/xdp.h>
#define CREATE_TRACE_POINTS
#include <trace/events/bpf_test_run.h>
@@ -87,17 +89,19 @@ reset:
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
u32 *retval, u32 *time, bool xdp)
{
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
+ struct bpf_prog_array_item item = {.prog = prog};
+ struct bpf_run_ctx *old_ctx;
+ struct bpf_cg_run_ctx run_ctx;
struct bpf_test_timer t = { NO_MIGRATE };
enum bpf_cgroup_storage_type stype;
int ret;
for_each_cgroup_storage_type(stype) {
- storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
- if (IS_ERR(storage[stype])) {
- storage[stype] = NULL;
+ item.cgroup_storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
+ if (IS_ERR(item.cgroup_storage[stype])) {
+ item.cgroup_storage[stype] = NULL;
for_each_cgroup_storage_type(stype)
- bpf_cgroup_storage_free(storage[stype]);
+ bpf_cgroup_storage_free(item.cgroup_storage[stype]);
return -ENOMEM;
}
}
@@ -106,22 +110,19 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
repeat = 1;
bpf_test_timer_enter(&t);
+ old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
do {
- ret = bpf_cgroup_storage_set(storage);
- if (ret)
- break;
-
+ run_ctx.prog_item = &item;
if (xdp)
*retval = bpf_prog_run_xdp(prog, ctx);
else
- *retval = BPF_PROG_RUN(prog, ctx);
-
- bpf_cgroup_storage_unset();
+ *retval = bpf_prog_run(prog, ctx);
} while (bpf_test_timer_continue(&t, repeat, &ret, time));
+ bpf_reset_run_ctx(old_ctx);
bpf_test_timer_leave(&t);
for_each_cgroup_storage_type(stype)
- bpf_cgroup_storage_free(storage[stype]);
+ bpf_cgroup_storage_free(item.cgroup_storage[stype]);
return ret;
}
@@ -326,7 +327,7 @@ __bpf_prog_test_run_raw_tp(void *data)
struct bpf_raw_tp_test_run_info *info = data;
rcu_read_lock();
- info->retval = BPF_PROG_RUN(info->prog, info->ctx);
+ info->retval = bpf_prog_run(info->prog, info->ctx);
rcu_read_unlock();
}
@@ -687,6 +688,64 @@ out:
return ret;
}
+static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp)
+{
+ unsigned int ingress_ifindex, rx_queue_index;
+ struct netdev_rx_queue *rxqueue;
+ struct net_device *device;
+
+ if (!xdp_md)
+ return 0;
+
+ if (xdp_md->egress_ifindex != 0)
+ return -EINVAL;
+
+ ingress_ifindex = xdp_md->ingress_ifindex;
+ rx_queue_index = xdp_md->rx_queue_index;
+
+ if (!ingress_ifindex && rx_queue_index)
+ return -EINVAL;
+
+ if (ingress_ifindex) {
+ device = dev_get_by_index(current->nsproxy->net_ns,
+ ingress_ifindex);
+ if (!device)
+ return -ENODEV;
+
+ if (rx_queue_index >= device->real_num_rx_queues)
+ goto free_dev;
+
+ rxqueue = __netif_get_rx_queue(device, rx_queue_index);
+
+ if (!xdp_rxq_info_is_reg(&rxqueue->xdp_rxq))
+ goto free_dev;
+
+ xdp->rxq = &rxqueue->xdp_rxq;
+ /* The device is now tracked in the xdp->rxq for later
+ * dev_put()
+ */
+ }
+
+ xdp->data = xdp->data_meta + xdp_md->data;
+ return 0;
+
+free_dev:
+ dev_put(device);
+ return -EINVAL;
+}
+
+static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md)
+{
+ if (!xdp_md)
+ return;
+
+ xdp_md->data = xdp->data - xdp->data_meta;
+ xdp_md->data_end = xdp->data_end - xdp->data_meta;
+
+ if (xdp_md->ingress_ifindex)
+ dev_put(xdp->rxq->dev);
+}
+
int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
@@ -697,38 +756,73 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
struct netdev_rx_queue *rxqueue;
struct xdp_buff xdp = {};
u32 retval, duration;
+ struct xdp_md *ctx;
u32 max_data_sz;
void *data;
- int ret;
+ int ret = -EINVAL;
if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
prog->expected_attach_type == BPF_XDP_CPUMAP)
return -EINVAL;
- if (kattr->test.ctx_in || kattr->test.ctx_out)
- return -EINVAL;
+
+ ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ if (ctx) {
+ /* There can't be user provided data before the meta data */
+ if (ctx->data_meta || ctx->data_end != size ||
+ ctx->data > ctx->data_end ||
+ unlikely(xdp_metalen_invalid(ctx->data)))
+ goto free_ctx;
+ /* Meta data is allocated from the headroom */
+ headroom -= ctx->data;
+ }
/* XDP have extra tailroom as (most) drivers use full page */
max_data_sz = 4096 - headroom - tailroom;
data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
- if (IS_ERR(data))
- return PTR_ERR(data);
+ if (IS_ERR(data)) {
+ ret = PTR_ERR(data);
+ goto free_ctx;
+ }
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
&rxqueue->xdp_rxq);
xdp_prepare_buff(&xdp, data, headroom, size, true);
+ ret = xdp_convert_md_to_buff(ctx, &xdp);
+ if (ret)
+ goto free_data;
+
bpf_prog_change_xdp(NULL, prog);
ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
+ /* We convert the xdp_buff back to an xdp_md before checking the return
+ * code so the reference count of any held netdevice will be decremented
+ * even if the test run failed.
+ */
+ xdp_convert_buff_to_md(&xdp, ctx);
if (ret)
goto out;
- if (xdp.data != data + headroom || xdp.data_end != xdp.data + size)
- size = xdp.data_end - xdp.data;
- ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
+
+ if (xdp.data_meta != data + headroom ||
+ xdp.data_end != xdp.data_meta + size)
+ size = xdp.data_end - xdp.data_meta;
+
+ ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval,
+ duration);
+ if (!ret)
+ ret = bpf_ctx_finish(kattr, uattr, ctx,
+ sizeof(struct xdp_md));
+
out:
bpf_prog_change_xdp(prog, NULL);
+free_data:
kfree(data);
+free_ctx:
+ kfree(ctx);
return ret;
}
@@ -895,7 +989,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
bpf_test_timer_enter(&t);
do {
ctx.selected_sk = NULL;
- retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN);
+ retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, bpf_prog_run);
} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
bpf_test_timer_leave(&t);
@@ -951,7 +1045,10 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
goto out;
}
}
+
+ rcu_read_lock_trace();
retval = bpf_prog_run_pin_on_cpu(prog, ctx);
+ rcu_read_unlock_trace();
if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32))) {
err = -EFAULT;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index bbab9984f24e..d3a32c6813e0 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -166,8 +166,7 @@ static int br_switchdev_event(struct notifier_block *unused,
case SWITCHDEV_FDB_ADD_TO_BRIDGE:
fdb_info = ptr;
err = br_fdb_external_learn_add(br, p, fdb_info->addr,
- fdb_info->vid,
- fdb_info->is_local, false);
+ fdb_info->vid, false);
if (err) {
err = notifier_from_errno(err);
break;
@@ -202,6 +201,48 @@ static struct notifier_block br_switchdev_notifier = {
.notifier_call = br_switchdev_event,
};
+/* called under rtnl_mutex */
+static int br_switchdev_blocking_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ struct switchdev_notifier_brport_info *brport_info;
+ const struct switchdev_brport *b;
+ struct net_bridge_port *p;
+ int err = NOTIFY_DONE;
+
+ p = br_port_get_rtnl(dev);
+ if (!p)
+ goto out;
+
+ switch (event) {
+ case SWITCHDEV_BRPORT_OFFLOADED:
+ brport_info = ptr;
+ b = &brport_info->brport;
+
+ err = br_switchdev_port_offload(p, b->dev, b->ctx,
+ b->atomic_nb, b->blocking_nb,
+ b->tx_fwd_offload, extack);
+ err = notifier_from_errno(err);
+ break;
+ case SWITCHDEV_BRPORT_UNOFFLOADED:
+ brport_info = ptr;
+ b = &brport_info->brport;
+
+ br_switchdev_port_unoffload(p, b->ctx, b->atomic_nb,
+ b->blocking_nb);
+ break;
+ }
+
+out:
+ return err;
+}
+
+static struct notifier_block br_switchdev_blocking_notifier = {
+ .notifier_call = br_switchdev_blocking_event,
+};
+
/* br_boolopt_toggle - change user-controlled boolean option
*
* @br: bridge device
@@ -215,17 +256,22 @@ static struct notifier_block br_switchdev_notifier = {
int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on,
struct netlink_ext_ack *extack)
{
+ int err = 0;
+
switch (opt) {
case BR_BOOLOPT_NO_LL_LEARN:
br_opt_toggle(br, BROPT_NO_LL_LEARN, on);
break;
+ case BR_BOOLOPT_MCAST_VLAN_SNOOPING:
+ err = br_multicast_toggle_vlan_snooping(br, on, extack);
+ break;
default:
/* shouldn't be called with unsupported options */
WARN_ON(1);
break;
}
- return 0;
+ return err;
}
int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt)
@@ -233,6 +279,8 @@ int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt)
switch (opt) {
case BR_BOOLOPT_NO_LL_LEARN:
return br_opt_get(br, BROPT_NO_LL_LEARN);
+ case BR_BOOLOPT_MCAST_VLAN_SNOOPING:
+ return br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED);
default:
/* shouldn't be called with unsupported options */
WARN_ON(1);
@@ -349,11 +397,15 @@ static int __init br_init(void)
if (err)
goto err_out4;
- err = br_netlink_init();
+ err = register_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
if (err)
goto err_out5;
- brioctl_set(br_ioctl_deviceless_stub);
+ err = br_netlink_init();
+ if (err)
+ goto err_out6;
+
+ brioctl_set(br_ioctl_stub);
#if IS_ENABLED(CONFIG_ATM_LANE)
br_fdb_test_addr_hook = br_fdb_test_addr;
@@ -367,6 +419,8 @@ static int __init br_init(void)
return 0;
+err_out6:
+ unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
err_out5:
unregister_switchdev_notifier(&br_switchdev_notifier);
err_out4:
@@ -386,6 +440,7 @@ static void __exit br_deinit(void)
{
stp_proto_unregister(&br_stp_proto);
br_netlink_fini();
+ unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
unregister_switchdev_notifier(&br_switchdev_notifier);
unregister_netdevice_notifier(&br_device_notifier);
brioctl_set(NULL);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index e8b626cc6bfd..8d6bab244c4a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -27,11 +27,14 @@ EXPORT_SYMBOL_GPL(nf_br_ops);
/* net device transmit always called with BH disabled */
netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
+ struct net_bridge_mcast_port *pmctx_null = NULL;
struct net_bridge *br = netdev_priv(dev);
+ struct net_bridge_mcast *brmctx = &br->multicast_ctx;
struct net_bridge_fdb_entry *dst;
struct net_bridge_mdb_entry *mdst;
const struct nf_br_ops *nf_ops;
u8 state = BR_STATE_FORWARDING;
+ struct net_bridge_vlan *vlan;
const unsigned char *dest;
u16 vid = 0;
@@ -53,7 +56,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
- if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid, &state))
+ if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid,
+ &state, &vlan))
goto out;
if (IS_ENABLED(CONFIG_INET) &&
@@ -82,15 +86,15 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
br_flood(br, skb, BR_PKT_MULTICAST, false, true);
goto out;
}
- if (br_multicast_rcv(br, NULL, skb, vid)) {
+ if (br_multicast_rcv(&brmctx, &pmctx_null, vlan, skb, vid)) {
kfree_skb(skb);
goto out;
}
- mdst = br_mdb_get(br, skb, vid);
+ mdst = br_mdb_get(brmctx, skb, vid);
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
- br_multicast_querier_exists(br, eth_hdr(skb), mdst))
- br_multicast_flood(mdst, skb, false, true);
+ br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst))
+ br_multicast_flood(mdst, skb, brmctx, false, true);
else
br_flood(br, skb, BR_PKT_MULTICAST, false, true);
} else if ((dst = br_fdb_find_rcu(br, dest, vid)) != NULL) {
@@ -450,7 +454,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_set_rx_mode = br_dev_set_multicast_list,
.ndo_change_rx_flags = br_dev_change_rx_flags,
.ndo_change_mtu = br_change_mtu,
- .ndo_do_ioctl = br_dev_ioctl,
+ .ndo_siocdevprivate = br_dev_siocdevprivate,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_netpoll_setup = br_netpoll_setup,
.ndo_netpoll_cleanup = br_netpoll_cleanup,
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 835cec1e5a03..46812b659710 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -732,11 +732,11 @@ static inline size_t fdb_nlmsg_size(void)
+ nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */
}
-static int br_fdb_replay_one(struct notifier_block *nb,
+static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb,
const struct net_bridge_fdb_entry *fdb,
- struct net_device *dev, unsigned long action,
- const void *ctx)
+ unsigned long action, const void *ctx)
{
+ const struct net_bridge_port *p = READ_ONCE(fdb->dst);
struct switchdev_notifier_fdb_info item;
int err;
@@ -745,25 +745,25 @@ static int br_fdb_replay_one(struct notifier_block *nb,
item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
- item.info.dev = dev;
+ item.info.dev = (!p || item.is_local) ? br->dev : p->dev;
item.info.ctx = ctx;
err = nb->notifier_call(nb, action, &item);
return notifier_to_errno(err);
}
-int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
- const void *ctx, bool adding, struct notifier_block *nb)
+int br_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding,
+ struct notifier_block *nb)
{
struct net_bridge_fdb_entry *fdb;
struct net_bridge *br;
unsigned long action;
int err = 0;
- if (!netif_is_bridge_master(br_dev))
- return -EINVAL;
+ if (!nb)
+ return 0;
- if (!netif_is_bridge_port(dev) && !netif_is_bridge_master(dev))
+ if (!netif_is_bridge_master(br_dev))
return -EINVAL;
br = netdev_priv(br_dev);
@@ -776,14 +776,7 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
rcu_read_lock();
hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) {
- const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
- struct net_device *dst_dev;
-
- dst_dev = dst ? dst->dev : br->dev;
- if (dst_dev && dst_dev != dev)
- continue;
-
- err = br_fdb_replay_one(nb, fdb, dst_dev, action, ctx);
+ err = br_fdb_replay_one(br, nb, fdb, action, ctx);
if (err)
break;
}
@@ -792,7 +785,6 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
return err;
}
-EXPORT_SYMBOL_GPL(br_fdb_replay);
static void fdb_notify(struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb, int type,
@@ -1044,10 +1036,7 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
"FDB entry towards bridge must be permanent");
return -EINVAL;
}
-
- err = br_fdb_external_learn_add(br, p, addr, vid,
- ndm->ndm_state & NUD_PERMANENT,
- true);
+ err = br_fdb_external_learn_add(br, p, addr, vid, true);
} else {
spin_lock_bh(&br->hash_lock);
err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb);
@@ -1275,7 +1264,7 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
}
int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
- const unsigned char *addr, u16 vid, bool is_local,
+ const unsigned char *addr, u16 vid,
bool swdev_notify)
{
struct net_bridge_fdb_entry *fdb;
@@ -1293,7 +1282,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
if (swdev_notify)
flags |= BIT(BR_FDB_ADDED_BY_USER);
- if (is_local)
+ if (!p)
flags |= BIT(BR_FDB_LOCAL);
fdb = fdb_create(br, p, addr, vid, flags);
@@ -1322,7 +1311,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
if (swdev_notify)
set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
- if (is_local)
+ if (!p)
set_bit(BR_FDB_LOCAL, &fdb->flags);
if (modified)
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 07856362538f..ec646656dbf1 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -48,6 +48,8 @@ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb
skb_set_network_header(skb, depth);
}
+ br_switchdev_frame_set_offload_fwd_mark(skb);
+
dev_queue_xmit(skb);
return 0;
@@ -76,6 +78,11 @@ static void __br_forward(const struct net_bridge_port *to,
struct net *net;
int br_hook;
+ /* Mark the skb for forwarding offload early so that br_handle_vlan()
+ * can know whether to pop the VLAN header on egress or keep it.
+ */
+ nbp_switchdev_frame_mark_tx_fwd_offload(to, skb);
+
vg = nbp_vlan_group_rcu(to);
skb = br_handle_vlan(to->br, to, vg, skb);
if (!skb)
@@ -174,6 +181,8 @@ static struct net_bridge_port *maybe_deliver(
if (!should_deliver(p, skb))
return prev;
+ nbp_switchdev_frame_mark_tx_fwd_to_hwdom(p, skb);
+
if (!prev)
goto out;
@@ -267,20 +276,19 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
/* called with rcu_read_lock */
void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb,
+ struct net_bridge_mcast *brmctx,
bool local_rcv, bool local_orig)
{
- struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
- struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *prev = NULL;
struct net_bridge_port_group *p;
bool allow_mode_include = true;
struct hlist_node *rp;
- rp = br_multicast_get_first_rport_node(br, skb);
+ rp = br_multicast_get_first_rport_node(brmctx, skb);
if (mdst) {
p = rcu_dereference(mdst->ports);
- if (br_multicast_should_handle_mode(br, mdst->addr.proto) &&
+ if (br_multicast_should_handle_mode(brmctx, mdst->addr.proto) &&
br_multicast_is_star_g(&mdst->addr))
allow_mode_include = false;
} else {
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 6e4a32354a13..4a02f8bb278a 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -456,7 +456,7 @@ int br_add_bridge(struct net *net, const char *name)
dev_net_set(dev, net);
dev->rtnl_link_ops = &br_link_ops;
- res = register_netdev(dev);
+ res = register_netdevice(dev);
if (res)
free_netdev(dev);
return res;
@@ -467,7 +467,6 @@ int br_del_bridge(struct net *net, const char *name)
struct net_device *dev;
int ret = 0;
- rtnl_lock();
dev = __dev_get_by_name(net, name);
if (dev == NULL)
ret = -ENXIO; /* Could not find device */
@@ -485,7 +484,6 @@ int br_del_bridge(struct net *net, const char *name)
else
br_dev_delete(dev, NULL);
- rtnl_unlock();
return ret;
}
@@ -616,6 +614,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
err = dev_set_allmulti(dev, 1);
if (err) {
+ br_multicast_del_port(p);
kfree(p); /* kobject not yet init'd, manually free */
goto err1;
}
@@ -643,10 +642,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
if (err)
goto err5;
- err = nbp_switchdev_mark_set(p);
- if (err)
- goto err6;
-
dev_disable_lro(dev);
list_add_rcu(&p->list, &br->port_list);
@@ -684,13 +679,13 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
*/
err = dev_pre_changeaddr_notify(br->dev, dev->dev_addr, extack);
if (err)
- goto err7;
+ goto err6;
}
err = nbp_vlan_init(p, extack);
if (err) {
netdev_err(dev, "failed to initialize vlan filtering on this port\n");
- goto err7;
+ goto err6;
}
spin_lock_bh(&br->lock);
@@ -713,13 +708,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
return 0;
-err7:
+err6:
if (fdb_synced)
br_fdb_unsync_static(br, p);
list_del_rcu(&p->list);
br_fdb_delete_by_port(br, p, 0, 1);
nbp_update_port_count(br);
-err6:
netdev_upper_dev_unlink(dev, br->dev);
err5:
dev->priv_flags &= ~IFF_BRIDGE_PORT;
@@ -729,6 +723,7 @@ err4:
err3:
sysfs_remove_link(br->ifobj, p->dev->name);
err2:
+ br_multicast_del_port(p);
kobject_put(&p->kobj);
dev_set_allmulti(dev, -1);
err1:
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 1f506309efa8..b50382f957c1 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -69,8 +69,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
enum br_pkt_type pkt_type = BR_PKT_UNICAST;
struct net_bridge_fdb_entry *dst = NULL;
+ struct net_bridge_mcast_port *pmctx;
struct net_bridge_mdb_entry *mdst;
bool local_rcv, mcast_hit = false;
+ struct net_bridge_mcast *brmctx;
+ struct net_bridge_vlan *vlan;
struct net_bridge *br;
u16 vid = 0;
u8 state;
@@ -78,9 +81,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (!p || p->state == BR_STATE_DISABLED)
goto drop;
+ brmctx = &p->br->multicast_ctx;
+ pmctx = &p->multicast_ctx;
state = p->state;
if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid,
- &state))
+ &state, &vlan))
goto out;
nbp_switchdev_frame_mark(p, skb);
@@ -98,7 +103,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
local_rcv = true;
} else {
pkt_type = BR_PKT_MULTICAST;
- if (br_multicast_rcv(br, p, skb, vid))
+ if (br_multicast_rcv(&brmctx, &pmctx, vlan, skb, vid))
goto drop;
}
}
@@ -128,11 +133,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
switch (pkt_type) {
case BR_PKT_MULTICAST:
- mdst = br_mdb_get(br, skb, vid);
+ mdst = br_mdb_get(brmctx, skb, vid);
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
- br_multicast_querier_exists(br, eth_hdr(skb), mdst)) {
+ br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst)) {
if ((mdst && mdst->host_joined) ||
- br_multicast_is_router(br, skb)) {
+ br_multicast_is_router(brmctx, skb)) {
local_rcv = true;
br->dev->stats.multicast++;
}
@@ -162,7 +167,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (!mcast_hit)
br_flood(br, skb, pkt_type, local_rcv, false);
else
- br_multicast_flood(mdst, skb, local_rcv, false);
+ br_multicast_flood(mdst, skb, brmctx, local_rcv, false);
}
if (local_rcv)
@@ -289,11 +294,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
p = br_port_get_rcu(skb->dev);
- if (p->flags & BR_VLAN_TUNNEL) {
- if (br_handle_ingress_vlan_tunnel(skb, p,
- nbp_vlan_group_rcu(p)))
- goto drop;
- }
+ if (p->flags & BR_VLAN_TUNNEL)
+ br_handle_ingress_vlan_tunnel(skb, p, nbp_vlan_group_rcu(p));
if (unlikely(is_link_local_ether_addr(dest))) {
u16 fwd_mask = p->br->group_fwd_mask_required;
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 2db800fc27ca..793b0db9d9a3 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -106,15 +106,32 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
* This interface is deprecated because it was too difficult
* to do the translation for 32/64bit ioctl compatibility.
*/
-static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd)
{
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *p = NULL;
unsigned long args[4];
+ void __user *argp;
int ret = -EOPNOTSUPP;
- if (copy_from_user(args, rq->ifr_data, sizeof(args)))
- return -EFAULT;
+ if (in_compat_syscall()) {
+ unsigned int cargs[4];
+
+ if (copy_from_user(cargs, data, sizeof(cargs)))
+ return -EFAULT;
+
+ args[0] = cargs[0];
+ args[1] = cargs[1];
+ args[2] = cargs[2];
+ args[3] = cargs[3];
+
+ argp = compat_ptr(args[1]);
+ } else {
+ if (copy_from_user(args, data, sizeof(args)))
+ return -EFAULT;
+
+ argp = (void __user *)args[1];
+ }
switch (args[0]) {
case BRCTL_ADD_IF:
@@ -171,7 +188,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
return -ENOMEM;
get_port_ifindices(br, indices, num);
- if (copy_to_user((void __user *)args[1], indices, num*sizeof(int)))
+ if (copy_to_user(argp, indices, num * sizeof(int)))
num = -EFAULT;
kfree(indices);
return num;
@@ -232,7 +249,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
rcu_read_unlock();
- if (copy_to_user((void __user *)args[1], &p, sizeof(p)))
+ if (copy_to_user(argp, &p, sizeof(p)))
return -EFAULT;
return 0;
@@ -282,8 +299,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
}
case BRCTL_GET_FDB_ENTRIES:
- return get_fdb_entries(br, (void __user *)args[1],
- args[2], args[3]);
+ return get_fdb_entries(br, argp, args[2], args[3]);
}
if (!ret) {
@@ -320,7 +336,7 @@ static int old_deviceless(struct net *net, void __user *uarg)
args[2] = get_bridge_ifindices(net, indices, args[2]);
- ret = copy_to_user((void __user *)args[1], indices, args[2]*sizeof(int))
+ ret = copy_to_user(uarg, indices, args[2]*sizeof(int))
? -EFAULT : args[2];
kfree(indices);
@@ -350,48 +366,47 @@ static int old_deviceless(struct net *net, void __user *uarg)
return -EOPNOTSUPP;
}
-int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
+int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd,
+ struct ifreq *ifr, void __user *uarg)
{
+ int ret = -EOPNOTSUPP;
+
+ rtnl_lock();
+
switch (cmd) {
case SIOCGIFBR:
case SIOCSIFBR:
- return old_deviceless(net, uarg);
-
+ ret = old_deviceless(net, uarg);
+ break;
case SIOCBRADDBR:
case SIOCBRDELBR:
{
char buf[IFNAMSIZ];
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+ ret = -EPERM;
+ break;
+ }
- if (copy_from_user(buf, uarg, IFNAMSIZ))
- return -EFAULT;
+ if (copy_from_user(buf, uarg, IFNAMSIZ)) {
+ ret = -EFAULT;
+ break;
+ }
buf[IFNAMSIZ-1] = 0;
if (cmd == SIOCBRADDBR)
- return br_add_bridge(net, buf);
-
- return br_del_bridge(net, buf);
- }
+ ret = br_add_bridge(net, buf);
+ else
+ ret = br_del_bridge(net, buf);
}
- return -EOPNOTSUPP;
-}
-
-int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
- struct net_bridge *br = netdev_priv(dev);
-
- switch (cmd) {
- case SIOCDEVPRIVATE:
- return old_dev_ioctl(dev, rq, cmd);
-
+ break;
case SIOCBRADDIF:
case SIOCBRDELIF:
- return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);
-
+ ret = add_del_if(br, ifr->ifr_ifindex, cmd == SIOCBRADDIF);
+ break;
}
- br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd);
- return -EOPNOTSUPP;
+ rtnl_unlock();
+
+ return ret;
}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 17a720b4473f..0281453f7766 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -16,57 +16,89 @@
#include "br_private.h"
-static bool br_rports_have_mc_router(struct net_bridge *br)
+static bool
+br_ip4_rports_get_timer(struct net_bridge_mcast_port *pmctx,
+ unsigned long *timer)
+{
+ *timer = br_timer_value(&pmctx->ip4_mc_router_timer);
+ return !hlist_unhashed(&pmctx->ip4_rlist);
+}
+
+static bool
+br_ip6_rports_get_timer(struct net_bridge_mcast_port *pmctx,
+ unsigned long *timer)
{
#if IS_ENABLED(CONFIG_IPV6)
- return !hlist_empty(&br->ip4_mc_router_list) ||
- !hlist_empty(&br->ip6_mc_router_list);
+ *timer = br_timer_value(&pmctx->ip6_mc_router_timer);
+ return !hlist_unhashed(&pmctx->ip6_rlist);
#else
- return !hlist_empty(&br->ip4_mc_router_list);
+ *timer = 0;
+ return false;
#endif
}
-static bool
-br_ip4_rports_get_timer(struct net_bridge_port *port, unsigned long *timer)
+static size_t __br_rports_one_size(void)
{
- *timer = br_timer_value(&port->ip4_mc_router_timer);
- return !hlist_unhashed(&port->ip4_rlist);
+ return nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PORT */
+ nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_TIMER */
+ nla_total_size(sizeof(u8)) + /* MDBA_ROUTER_PATTR_TYPE */
+ nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_INET_TIMER */
+ nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_INET6_TIMER */
+ nla_total_size(sizeof(u32)); /* MDBA_ROUTER_PATTR_VID */
}
-static bool
-br_ip6_rports_get_timer(struct net_bridge_port *port, unsigned long *timer)
+size_t br_rports_size(const struct net_bridge_mcast *brmctx)
{
+ struct net_bridge_mcast_port *pmctx;
+ size_t size = nla_total_size(0); /* MDBA_ROUTER */
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(pmctx, &brmctx->ip4_mc_router_list,
+ ip4_rlist)
+ size += __br_rports_one_size();
+
#if IS_ENABLED(CONFIG_IPV6)
- *timer = br_timer_value(&port->ip6_mc_router_timer);
- return !hlist_unhashed(&port->ip6_rlist);
-#else
- *timer = 0;
- return false;
+ hlist_for_each_entry_rcu(pmctx, &brmctx->ip6_mc_router_list,
+ ip6_rlist)
+ size += __br_rports_one_size();
#endif
+ rcu_read_unlock();
+
+ return size;
}
-static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
- struct net_device *dev)
+int br_rports_fill_info(struct sk_buff *skb,
+ const struct net_bridge_mcast *brmctx)
{
- struct net_bridge *br = netdev_priv(dev);
+ u16 vid = brmctx->vlan ? brmctx->vlan->vid : 0;
bool have_ip4_mc_rtr, have_ip6_mc_rtr;
unsigned long ip4_timer, ip6_timer;
struct nlattr *nest, *port_nest;
struct net_bridge_port *p;
- if (!br->multicast_router)
- return 0;
-
- if (!br_rports_have_mc_router(br))
+ if (!brmctx->multicast_router || !br_rports_have_mc_router(brmctx))
return 0;
nest = nla_nest_start_noflag(skb, MDBA_ROUTER);
if (nest == NULL)
return -EMSGSIZE;
- list_for_each_entry_rcu(p, &br->port_list, list) {
- have_ip4_mc_rtr = br_ip4_rports_get_timer(p, &ip4_timer);
- have_ip6_mc_rtr = br_ip6_rports_get_timer(p, &ip6_timer);
+ list_for_each_entry_rcu(p, &brmctx->br->port_list, list) {
+ struct net_bridge_mcast_port *pmctx;
+
+ if (vid) {
+ struct net_bridge_vlan *v;
+
+ v = br_vlan_find(nbp_vlan_group(p), vid);
+ if (!v)
+ continue;
+ pmctx = &v->port_mcast_ctx;
+ } else {
+ pmctx = &p->multicast_ctx;
+ }
+
+ have_ip4_mc_rtr = br_ip4_rports_get_timer(pmctx, &ip4_timer);
+ have_ip6_mc_rtr = br_ip6_rports_get_timer(pmctx, &ip6_timer);
if (!have_ip4_mc_rtr && !have_ip6_mc_rtr)
continue;
@@ -79,13 +111,14 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
nla_put_u32(skb, MDBA_ROUTER_PATTR_TIMER,
max(ip4_timer, ip6_timer)) ||
nla_put_u8(skb, MDBA_ROUTER_PATTR_TYPE,
- p->multicast_router) ||
+ p->multicast_ctx.multicast_router) ||
(have_ip4_mc_rtr &&
nla_put_u32(skb, MDBA_ROUTER_PATTR_INET_TIMER,
ip4_timer)) ||
(have_ip6_mc_rtr &&
nla_put_u32(skb, MDBA_ROUTER_PATTR_INET6_TIMER,
- ip6_timer))) {
+ ip6_timer)) ||
+ (vid && nla_put_u16(skb, MDBA_ROUTER_PATTR_VID, vid))) {
nla_nest_cancel(skb, port_nest);
goto fail;
}
@@ -240,7 +273,7 @@ static int __mdb_fill_info(struct sk_buff *skb,
switch (mp->addr.proto) {
case htons(ETH_P_IP):
- dump_srcs_mode = !!(mp->br->multicast_igmp_version == 3);
+ dump_srcs_mode = !!(mp->br->multicast_ctx.multicast_igmp_version == 3);
if (mp->addr.src.ip4) {
if (nla_put_in_addr(skb, MDBA_MDB_EATTR_SOURCE,
mp->addr.src.ip4))
@@ -250,7 +283,7 @@ static int __mdb_fill_info(struct sk_buff *skb,
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- dump_srcs_mode = !!(mp->br->multicast_mld_version == 2);
+ dump_srcs_mode = !!(mp->br->multicast_ctx.multicast_mld_version == 2);
if (!ipv6_addr_any(&mp->addr.src.ip6)) {
if (nla_put_in6_addr(skb, MDBA_MDB_EATTR_SOURCE,
&mp->addr.src.ip6))
@@ -390,6 +423,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
for_each_netdev_rcu(net, dev) {
if (dev->priv_flags & IFF_EBRIDGE) {
+ struct net_bridge *br = netdev_priv(dev);
struct br_port_msg *bpm;
if (idx < s_idx)
@@ -406,7 +440,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
bpm->ifindex = dev->ifindex;
if (br_mdb_fill_info(skb, cb, dev) < 0)
goto out;
- if (br_rports_fill_info(skb, cb, dev) < 0)
+ if (br_rports_fill_info(skb, &br->multicast_ctx) < 0)
goto out;
cb->args[1] = 0;
@@ -483,7 +517,7 @@ static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg)
/* MDBA_MDB_EATTR_SOURCE */
if (pg->key.addr.src.ip4)
nlmsg_size += nla_total_size(sizeof(__be32));
- if (pg->key.port->br->multicast_igmp_version == 2)
+ if (pg->key.port->br->multicast_ctx.multicast_igmp_version == 2)
goto out;
addr_size = sizeof(__be32);
break;
@@ -492,7 +526,7 @@ static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg)
/* MDBA_MDB_EATTR_SOURCE */
if (!ipv6_addr_any(&pg->key.addr.src.ip6))
nlmsg_size += nla_total_size(sizeof(struct in6_addr));
- if (pg->key.port->br->multicast_mld_version == 1)
+ if (pg->key.port->br->multicast_ctx.multicast_mld_version == 1)
goto out;
addr_size = sizeof(struct in6_addr);
break;
@@ -617,6 +651,9 @@ int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
ASSERT_RTNL();
+ if (!nb)
+ return 0;
+
if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
return -EINVAL;
@@ -686,7 +723,6 @@ out_free_mdb:
return err;
}
-EXPORT_SYMBOL_GPL(br_mdb_replay);
static void br_mdb_switchdev_host_port(struct net_device *dev,
struct net_device *lower_dev,
@@ -781,12 +817,12 @@ errout:
static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
struct net_device *dev,
- int ifindex, u32 pid,
+ int ifindex, u16 vid, u32 pid,
u32 seq, int type, unsigned int flags)
{
+ struct nlattr *nest, *port_nest;
struct br_port_msg *bpm;
struct nlmsghdr *nlh;
- struct nlattr *nest;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0);
if (!nlh)
@@ -800,8 +836,18 @@ static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
if (!nest)
goto cancel;
- if (nla_put_u32(skb, MDBA_ROUTER_PORT, ifindex))
+ port_nest = nla_nest_start_noflag(skb, MDBA_ROUTER_PORT);
+ if (!port_nest)
+ goto end;
+ if (nla_put_nohdr(skb, sizeof(u32), &ifindex)) {
+ nla_nest_cancel(skb, port_nest);
+ goto end;
+ }
+ if (vid && nla_put_u16(skb, MDBA_ROUTER_PATTR_VID, vid)) {
+ nla_nest_cancel(skb, port_nest);
goto end;
+ }
+ nla_nest_end(skb, port_nest);
nla_nest_end(skb, nest);
nlmsg_end(skb, nlh);
@@ -817,23 +863,28 @@ cancel:
static inline size_t rtnl_rtr_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct br_port_msg))
- + nla_total_size(sizeof(__u32));
+ + nla_total_size(sizeof(__u32))
+ + nla_total_size(sizeof(u16));
}
-void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
+void br_rtr_notify(struct net_device *dev, struct net_bridge_mcast_port *pmctx,
int type)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
int ifindex;
+ u16 vid;
- ifindex = port ? port->dev->ifindex : 0;
+ ifindex = pmctx ? pmctx->port->dev->ifindex : 0;
+ vid = pmctx && br_multicast_port_ctx_is_vlan(pmctx) ? pmctx->vlan->vid :
+ 0;
skb = nlmsg_new(rtnl_rtr_nlmsg_size(), GFP_ATOMIC);
if (!skb)
goto errout;
- err = nlmsg_populate_rtr_fill(skb, dev, ifindex, 0, 0, type, NTF_SELF);
+ err = nlmsg_populate_rtr_fill(skb, dev, ifindex, vid, 0, 0, type,
+ NTF_SELF);
if (err < 0) {
kfree_skb(skb);
goto errout;
@@ -1004,14 +1055,47 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
}
+static struct net_bridge_mcast *
+__br_mdb_choose_context(struct net_bridge *br,
+ const struct br_mdb_entry *entry,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_mcast *brmctx = NULL;
+ struct net_bridge_vlan *v;
+
+ if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+ brmctx = &br->multicast_ctx;
+ goto out;
+ }
+
+ if (!entry->vid) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot add an entry without a vlan when vlan snooping is enabled");
+ goto out;
+ }
+
+ v = br_vlan_find(br_vlan_group(br), entry->vid);
+ if (!v) {
+ NL_SET_ERR_MSG_MOD(extack, "Vlan is not configured");
+ goto out;
+ }
+ if (br_multicast_ctx_vlan_global_disabled(&v->br_mcast_ctx)) {
+ NL_SET_ERR_MSG_MOD(extack, "Vlan's multicast processing is disabled");
+ goto out;
+ }
+ brmctx = &v->br_mcast_ctx;
+out:
+ return brmctx;
+}
+
static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
struct br_mdb_entry *entry,
struct nlattr **mdb_attrs,
struct netlink_ext_ack *extack)
{
struct net_bridge_mdb_entry *mp, *star_mp;
- struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
+ struct net_bridge_port_group *p;
+ struct net_bridge_mcast *brmctx;
struct br_ip group, star_group;
unsigned long now = jiffies;
unsigned char flags = 0;
@@ -1020,6 +1104,10 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
__mdb_entry_to_br_ip(entry, &group, mdb_attrs);
+ brmctx = __br_mdb_choose_context(br, entry, extack);
+ if (!brmctx)
+ return -EINVAL;
+
/* host join errors which can happen before creating the group */
if (!port) {
/* don't allow any flags for host-joined groups */
@@ -1053,7 +1141,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
return -EEXIST;
}
- br_multicast_host_join(mp, false);
+ br_multicast_host_join(brmctx, mp, false);
br_mdb_notify(br->dev, mp, NULL, RTM_NEWMDB);
return 0;
@@ -1084,14 +1172,15 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
}
rcu_assign_pointer(*pp, p);
if (entry->state == MDB_TEMPORARY)
- mod_timer(&p->timer, now + br->multicast_membership_interval);
+ mod_timer(&p->timer,
+ now + brmctx->multicast_membership_interval);
br_mdb_notify(br->dev, mp, p, RTM_NEWMDB);
/* if we are adding a new EXCLUDE port group (*,G) it needs to be also
* added to all S,G entries for proper replication, if we are adding
* a new INCLUDE port (S,G) then all of *,G EXCLUDE ports need to be
* added to it for proper replication
*/
- if (br_multicast_should_handle_mode(br, group.proto)) {
+ if (br_multicast_should_handle_mode(brmctx, group.proto)) {
switch (filter_mode) {
case MCAST_EXCLUDE:
br_multicast_star_g_handle_mode(p, MCAST_EXCLUDE);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index d0434dc8c03b..9231617a16e4 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -49,30 +49,30 @@ static const struct rhashtable_params br_sg_port_rht_params = {
.automatic_shrinking = true,
};
-static void br_multicast_start_querier(struct net_bridge *br,
+static void br_multicast_start_querier(struct net_bridge_mcast *brmctx,
struct bridge_mcast_own_query *query);
-static void br_ip4_multicast_add_router(struct net_bridge *br,
- struct net_bridge_port *port);
-static void br_ip4_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip4_multicast_add_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx);
+static void br_ip4_multicast_leave_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
__be32 group,
__u16 vid,
const unsigned char *src);
static void br_multicast_port_group_rexmit(struct timer_list *t);
static void
-br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted);
-static void br_ip6_multicast_add_router(struct net_bridge *br,
- struct net_bridge_port *port);
+br_multicast_rport_del_notify(struct net_bridge_mcast_port *pmctx, bool deleted);
+static void br_ip6_multicast_add_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx);
#if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip6_multicast_leave_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
const struct in6_addr *group,
__u16 vid, const unsigned char *src);
#endif
static struct net_bridge_port_group *
-__br_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
+__br_multicast_add_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct br_ip *group,
const unsigned char *src,
u8 filter_mode,
@@ -80,6 +80,7 @@ __br_multicast_add_group(struct net_bridge *br,
bool blocked);
static void br_multicast_find_del_pg(struct net_bridge *br,
struct net_bridge_port_group *pg);
+static void __br_multicast_stop(struct net_bridge_mcast *brmctx);
static struct net_bridge_port_group *
br_sg_port_find(struct net_bridge *br,
@@ -140,12 +141,14 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br,
}
#endif
-struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
+struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx,
struct sk_buff *skb, u16 vid)
{
+ struct net_bridge *br = brmctx->br;
struct br_ip ip;
- if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
+ br_multicast_ctx_vlan_global_disabled(brmctx))
return NULL;
if (BR_INPUT_SKB_CB(skb)->igmp)
@@ -158,7 +161,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
switch (skb->protocol) {
case htons(ETH_P_IP):
ip.dst.ip4 = ip_hdr(skb)->daddr;
- if (br->multicast_igmp_version == 3) {
+ if (brmctx->multicast_igmp_version == 3) {
struct net_bridge_mdb_entry *mdb;
ip.src.ip4 = ip_hdr(skb)->saddr;
@@ -171,7 +174,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
ip.dst.ip6 = ipv6_hdr(skb)->daddr;
- if (br->multicast_mld_version == 2) {
+ if (brmctx->multicast_mld_version == 2) {
struct net_bridge_mdb_entry *mdb;
ip.src.ip6 = ipv6_hdr(skb)->saddr;
@@ -190,6 +193,62 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
return br_mdb_ip_get_rcu(br, &ip);
}
+/* IMPORTANT: this function must be used only when the contexts cannot be
+ * passed down (e.g. timer) and must be used for read-only purposes because
+ * the vlan snooping option can change, so it can return any context
+ * (non-vlan or vlan). Its initial intended purpose is to read timer values
+ * from the *current* context based on the option. At worst that could lead
+ * to inconsistent timers when the contexts are changed, i.e. src timer
+ * which needs to re-arm with a specific delay taken from the old context
+ */
+static struct net_bridge_mcast_port *
+br_multicast_pg_to_port_ctx(const struct net_bridge_port_group *pg)
+{
+ struct net_bridge_mcast_port *pmctx = &pg->key.port->multicast_ctx;
+ struct net_bridge_vlan *vlan;
+
+ lockdep_assert_held_once(&pg->key.port->br->multicast_lock);
+
+ /* if vlan snooping is disabled use the port's multicast context */
+ if (!pg->key.addr.vid ||
+ !br_opt_get(pg->key.port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
+ goto out;
+
+ /* locking is tricky here, due to different rules for multicast and
+ * vlans we need to take rcu to find the vlan and make sure it has
+ * the BR_VLFLAG_MCAST_ENABLED flag set, it can only change under
+ * multicast_lock which must be already held here, so the vlan's pmctx
+ * can safely be used on return
+ */
+ rcu_read_lock();
+ vlan = br_vlan_find(nbp_vlan_group_rcu(pg->key.port), pg->key.addr.vid);
+ if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx))
+ pmctx = &vlan->port_mcast_ctx;
+ else
+ pmctx = NULL;
+ rcu_read_unlock();
+out:
+ return pmctx;
+}
+
+/* when snooping we need to check if the contexts should be used
+ * in the following order:
+ * - if pmctx is non-NULL (port), check if it should be used
+ * - if pmctx is NULL (bridge), check if brmctx should be used
+ */
+static bool
+br_multicast_ctx_should_use(const struct net_bridge_mcast *brmctx,
+ const struct net_bridge_mcast_port *pmctx)
+{
+ if (!netif_running(brmctx->br->dev))
+ return false;
+
+ if (pmctx)
+ return !br_multicast_port_ctx_state_disabled(pmctx);
+ else
+ return !br_multicast_ctx_vlan_disabled(brmctx);
+}
+
static bool br_port_group_equal(struct net_bridge_port_group *p,
struct net_bridge_port *port,
const unsigned char *src)
@@ -203,20 +262,23 @@ static bool br_port_group_equal(struct net_bridge_port_group *p,
return ether_addr_equal(src, p->eth_addr);
}
-static void __fwd_add_star_excl(struct net_bridge_port_group *pg,
+static void __fwd_add_star_excl(struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg,
struct br_ip *sg_ip)
{
struct net_bridge_port_group_sg_key sg_key;
- struct net_bridge *br = pg->key.port->br;
struct net_bridge_port_group *src_pg;
+ struct net_bridge_mcast *brmctx;
memset(&sg_key, 0, sizeof(sg_key));
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
sg_key.port = pg->key.port;
sg_key.addr = *sg_ip;
- if (br_sg_port_find(br, &sg_key))
+ if (br_sg_port_find(brmctx->br, &sg_key))
return;
- src_pg = __br_multicast_add_group(br, pg->key.port, sg_ip, pg->eth_addr,
+ src_pg = __br_multicast_add_group(brmctx, pmctx,
+ sg_ip, pg->eth_addr,
MCAST_INCLUDE, false, false);
if (IS_ERR_OR_NULL(src_pg) ||
src_pg->rt_protocol != RTPROT_KERNEL)
@@ -256,6 +318,7 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
{
struct net_bridge *br = pg->key.port->br;
struct net_bridge_port_group *pg_lst;
+ struct net_bridge_mcast_port *pmctx;
struct net_bridge_mdb_entry *mp;
struct br_ip sg_ip;
@@ -265,9 +328,13 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
mp = br_mdb_ip_get(br, &pg->key.addr);
if (!mp)
return;
+ pmctx = br_multicast_pg_to_port_ctx(pg);
+ if (!pmctx)
+ return;
memset(&sg_ip, 0, sizeof(sg_ip));
sg_ip = pg->key.addr;
+
for (pg_lst = mlock_dereference(mp->ports, br);
pg_lst;
pg_lst = mlock_dereference(pg_lst->next, br)) {
@@ -284,7 +351,7 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
__fwd_del_star_excl(pg, &sg_ip);
break;
case MCAST_EXCLUDE:
- __fwd_add_star_excl(pg, &sg_ip);
+ __fwd_add_star_excl(pmctx, pg, &sg_ip);
break;
}
}
@@ -377,7 +444,9 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
{
struct net_bridge_port_group_sg_key sg_key;
struct net_bridge *br = star_mp->br;
+ struct net_bridge_mcast_port *pmctx;
struct net_bridge_port_group *pg;
+ struct net_bridge_mcast *brmctx;
if (WARN_ON(br_multicast_is_star_g(&sg->key.addr)))
return;
@@ -400,7 +469,12 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
if (br_sg_port_find(br, &sg_key))
continue;
- src_pg = __br_multicast_add_group(br, pg->key.port,
+ pmctx = br_multicast_pg_to_port_ctx(pg);
+ if (!pmctx)
+ continue;
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
+
+ src_pg = __br_multicast_add_group(brmctx, pmctx,
&sg->key.addr,
sg->eth_addr,
MCAST_INCLUDE, false, false);
@@ -414,16 +488,23 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
static void br_multicast_fwd_src_add(struct net_bridge_group_src *src)
{
struct net_bridge_mdb_entry *star_mp;
+ struct net_bridge_mcast_port *pmctx;
struct net_bridge_port_group *sg;
+ struct net_bridge_mcast *brmctx;
struct br_ip sg_ip;
if (src->flags & BR_SGRP_F_INSTALLED)
return;
memset(&sg_ip, 0, sizeof(sg_ip));
+ pmctx = br_multicast_pg_to_port_ctx(src->pg);
+ if (!pmctx)
+ return;
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
sg_ip = src->pg->key.addr;
sg_ip.src = src->addr.src;
- sg = __br_multicast_add_group(src->br, src->pg->key.port, &sg_ip,
+
+ sg = __br_multicast_add_group(brmctx, pmctx, &sg_ip,
src->pg->eth_addr, MCAST_INCLUDE, false,
!timer_pending(&src->timer));
if (IS_ERR_OR_NULL(sg))
@@ -692,7 +773,28 @@ static void br_multicast_gc(struct hlist_head *head)
}
}
-static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
+static void __br_multicast_query_handle_vlan(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct sk_buff *skb)
+{
+ struct net_bridge_vlan *vlan = NULL;
+
+ if (pmctx && br_multicast_port_ctx_is_vlan(pmctx))
+ vlan = pmctx->vlan;
+ else if (br_multicast_ctx_is_vlan(brmctx))
+ vlan = brmctx->vlan;
+
+ if (vlan && !(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED)) {
+ u16 vlan_proto;
+
+ if (br_vlan_get_proto(brmctx->br->dev, &vlan_proto) != 0)
+ return;
+ __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vlan->vid);
+ }
+}
+
+static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct net_bridge_port_group *pg,
__be32 ip_dst, __be32 group,
bool with_srcs, bool over_lmqt,
@@ -714,11 +816,11 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
u16 lmqt_srcs = 0;
igmp_hdr_size = sizeof(*ih);
- if (br->multicast_igmp_version == 3) {
+ if (brmctx->multicast_igmp_version == 3) {
igmp_hdr_size = sizeof(*ihv3);
if (pg && with_srcs) {
- lmqt = now + (br->multicast_last_member_interval *
- br->multicast_last_member_count);
+ lmqt = now + (brmctx->multicast_last_member_interval *
+ brmctx->multicast_last_member_count);
hlist_for_each_entry(ent, &pg->src_list, node) {
if (over_lmqt == time_after(ent->timer.expires,
lmqt) &&
@@ -734,19 +836,20 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
pkt_size = sizeof(*eth) + sizeof(*iph) + 4 + igmp_hdr_size;
if ((p && pkt_size > p->dev->mtu) ||
- pkt_size > br->dev->mtu)
+ pkt_size > brmctx->br->dev->mtu)
return NULL;
- skb = netdev_alloc_skb_ip_align(br->dev, pkt_size);
+ skb = netdev_alloc_skb_ip_align(brmctx->br->dev, pkt_size);
if (!skb)
goto out;
+ __br_multicast_query_handle_vlan(brmctx, pmctx, skb);
skb->protocol = htons(ETH_P_IP);
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
- ether_addr_copy(eth->h_source, br->dev->dev_addr);
+ ether_addr_copy(eth->h_source, brmctx->br->dev->dev_addr);
ip_eth_mc_map(ip_dst, eth->h_dest);
eth->h_proto = htons(ETH_P_IP);
skb_put(skb, sizeof(*eth));
@@ -762,8 +865,8 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
iph->frag_off = htons(IP_DF);
iph->ttl = 1;
iph->protocol = IPPROTO_IGMP;
- iph->saddr = br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR) ?
- inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0;
+ iph->saddr = br_opt_get(brmctx->br, BROPT_MULTICAST_QUERY_USE_IFADDR) ?
+ inet_select_addr(brmctx->br->dev, 0, RT_SCOPE_LINK) : 0;
iph->daddr = ip_dst;
((u8 *)&iph[1])[0] = IPOPT_RA;
((u8 *)&iph[1])[1] = 4;
@@ -775,12 +878,12 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
skb_set_transport_header(skb, skb->len);
*igmp_type = IGMP_HOST_MEMBERSHIP_QUERY;
- switch (br->multicast_igmp_version) {
+ switch (brmctx->multicast_igmp_version) {
case 2:
ih = igmp_hdr(skb);
ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
- ih->code = (group ? br->multicast_last_member_interval :
- br->multicast_query_response_interval) /
+ ih->code = (group ? brmctx->multicast_last_member_interval :
+ brmctx->multicast_query_response_interval) /
(HZ / IGMP_TIMER_SCALE);
ih->group = group;
ih->csum = 0;
@@ -790,11 +893,11 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
case 3:
ihv3 = igmpv3_query_hdr(skb);
ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY;
- ihv3->code = (group ? br->multicast_last_member_interval :
- br->multicast_query_response_interval) /
+ ihv3->code = (group ? brmctx->multicast_last_member_interval :
+ brmctx->multicast_query_response_interval) /
(HZ / IGMP_TIMER_SCALE);
ihv3->group = group;
- ihv3->qqic = br->multicast_query_interval / HZ;
+ ihv3->qqic = brmctx->multicast_query_interval / HZ;
ihv3->nsrcs = htons(lmqt_srcs);
ihv3->resv = 0;
ihv3->suppress = sflag;
@@ -837,7 +940,8 @@ out:
}
#if IS_ENABLED(CONFIG_IPV6)
-static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
+static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct net_bridge_port_group *pg,
const struct in6_addr *ip6_dst,
const struct in6_addr *group,
@@ -862,11 +966,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
u8 *hopopt;
mld_hdr_size = sizeof(*mldq);
- if (br->multicast_mld_version == 2) {
+ if (brmctx->multicast_mld_version == 2) {
mld_hdr_size = sizeof(*mld2q);
if (pg && with_srcs) {
- llqt = now + (br->multicast_last_member_interval *
- br->multicast_last_member_count);
+ llqt = now + (brmctx->multicast_last_member_interval *
+ brmctx->multicast_last_member_count);
hlist_for_each_entry(ent, &pg->src_list, node) {
if (over_llqt == time_after(ent->timer.expires,
llqt) &&
@@ -882,20 +986,21 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
pkt_size = sizeof(*eth) + sizeof(*ip6h) + 8 + mld_hdr_size;
if ((p && pkt_size > p->dev->mtu) ||
- pkt_size > br->dev->mtu)
+ pkt_size > brmctx->br->dev->mtu)
return NULL;
- skb = netdev_alloc_skb_ip_align(br->dev, pkt_size);
+ skb = netdev_alloc_skb_ip_align(brmctx->br->dev, pkt_size);
if (!skb)
goto out;
+ __br_multicast_query_handle_vlan(brmctx, pmctx, skb);
skb->protocol = htons(ETH_P_IPV6);
/* Ethernet header */
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
- ether_addr_copy(eth->h_source, br->dev->dev_addr);
+ ether_addr_copy(eth->h_source, brmctx->br->dev->dev_addr);
eth->h_proto = htons(ETH_P_IPV6);
skb_put(skb, sizeof(*eth));
@@ -908,14 +1013,14 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
ip6h->nexthdr = IPPROTO_HOPOPTS;
ip6h->hop_limit = 1;
ip6h->daddr = *ip6_dst;
- if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
- &ip6h->saddr)) {
+ if (ipv6_dev_get_saddr(dev_net(brmctx->br->dev), brmctx->br->dev,
+ &ip6h->daddr, 0, &ip6h->saddr)) {
kfree_skb(skb);
- br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, false);
+ br_opt_toggle(brmctx->br, BROPT_HAS_IPV6_ADDR, false);
return NULL;
}
- br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
+ br_opt_toggle(brmctx->br, BROPT_HAS_IPV6_ADDR, true);
ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
hopopt = (u8 *)(ip6h + 1);
@@ -933,10 +1038,10 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
/* ICMPv6 */
skb_set_transport_header(skb, skb->len);
interval = ipv6_addr_any(group) ?
- br->multicast_query_response_interval :
- br->multicast_last_member_interval;
+ brmctx->multicast_query_response_interval :
+ brmctx->multicast_last_member_interval;
*igmp_type = ICMPV6_MGM_QUERY;
- switch (br->multicast_mld_version) {
+ switch (brmctx->multicast_mld_version) {
case 1:
mldq = (struct mld_msg *)icmp6_hdr(skb);
mldq->mld_type = ICMPV6_MGM_QUERY;
@@ -959,7 +1064,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
mld2q->mld2q_suppress = sflag;
mld2q->mld2q_qrv = 2;
mld2q->mld2q_nsrcs = htons(llqt_srcs);
- mld2q->mld2q_qqic = br->multicast_query_interval / HZ;
+ mld2q->mld2q_qqic = brmctx->multicast_query_interval / HZ;
mld2q->mld2q_mca = *group;
csum = &mld2q->mld2q_cksum;
csum_start = (void *)mld2q;
@@ -1000,7 +1105,8 @@ out:
}
#endif
-static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
+static struct sk_buff *br_multicast_alloc_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct net_bridge_port_group *pg,
struct br_ip *ip_dst,
struct br_ip *group,
@@ -1013,7 +1119,7 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
switch (group->proto) {
case htons(ETH_P_IP):
ip4_dst = ip_dst ? ip_dst->dst.ip4 : htonl(INADDR_ALLHOSTS_GROUP);
- return br_ip4_multicast_alloc_query(br, pg,
+ return br_ip4_multicast_alloc_query(brmctx, pmctx, pg,
ip4_dst, group->dst.ip4,
with_srcs, over_lmqt,
sflag, igmp_type,
@@ -1028,7 +1134,7 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
ipv6_addr_set(&ip6_dst, htonl(0xff020000), 0, 0,
htonl(1));
- return br_ip6_multicast_alloc_query(br, pg,
+ return br_ip6_multicast_alloc_query(brmctx, pmctx, pg,
&ip6_dst, &group->dst.ip6,
with_srcs, over_lmqt,
sflag, igmp_type,
@@ -1206,7 +1312,8 @@ struct net_bridge_port_group *br_multicast_new_port_group(
return p;
}
-void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify)
+void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_mdb_entry *mp, bool notify)
{
if (!mp->host_joined) {
mp->host_joined = true;
@@ -1219,7 +1326,7 @@ void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify)
if (br_group_is_l2(&mp->addr))
return;
- mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval);
+ mod_timer(&mp->timer, jiffies + brmctx->multicast_membership_interval);
}
void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify)
@@ -1235,8 +1342,8 @@ void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify)
}
static struct net_bridge_port_group *
-__br_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
+__br_multicast_add_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct br_ip *group,
const unsigned char *src,
u8 filter_mode,
@@ -1248,29 +1355,28 @@ __br_multicast_add_group(struct net_bridge *br,
struct net_bridge_mdb_entry *mp;
unsigned long now = jiffies;
- if (!netif_running(br->dev) ||
- (port && port->state == BR_STATE_DISABLED))
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
goto out;
- mp = br_multicast_new_group(br, group);
+ mp = br_multicast_new_group(brmctx->br, group);
if (IS_ERR(mp))
return ERR_CAST(mp);
- if (!port) {
- br_multicast_host_join(mp, true);
+ if (!pmctx) {
+ br_multicast_host_join(brmctx, mp, true);
goto out;
}
for (pp = &mp->ports;
- (p = mlock_dereference(*pp, br)) != NULL;
+ (p = mlock_dereference(*pp, brmctx->br)) != NULL;
pp = &p->next) {
- if (br_port_group_equal(p, port, src))
+ if (br_port_group_equal(p, pmctx->port, src))
goto found;
- if ((unsigned long)p->key.port < (unsigned long)port)
+ if ((unsigned long)p->key.port < (unsigned long)pmctx->port)
break;
}
- p = br_multicast_new_port_group(port, group, *pp, 0, src,
+ p = br_multicast_new_port_group(pmctx->port, group, *pp, 0, src,
filter_mode, RTPROT_KERNEL);
if (unlikely(!p)) {
p = ERR_PTR(-ENOMEM);
@@ -1279,18 +1385,19 @@ __br_multicast_add_group(struct net_bridge *br,
rcu_assign_pointer(*pp, p);
if (blocked)
p->flags |= MDB_PG_FLAGS_BLOCKED;
- br_mdb_notify(br->dev, mp, p, RTM_NEWMDB);
+ br_mdb_notify(brmctx->br->dev, mp, p, RTM_NEWMDB);
found:
if (igmpv2_mldv1)
- mod_timer(&p->timer, now + br->multicast_membership_interval);
+ mod_timer(&p->timer,
+ now + brmctx->multicast_membership_interval);
out:
return p;
}
-static int br_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_multicast_add_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct br_ip *group,
const unsigned char *src,
u8 filter_mode,
@@ -1299,18 +1406,18 @@ static int br_multicast_add_group(struct net_bridge *br,
struct net_bridge_port_group *pg;
int err;
- spin_lock(&br->multicast_lock);
- pg = __br_multicast_add_group(br, port, group, src, filter_mode,
+ spin_lock(&brmctx->br->multicast_lock);
+ pg = __br_multicast_add_group(brmctx, pmctx, group, src, filter_mode,
igmpv2_mldv1, false);
/* NULL is considered valid for host joined groups */
err = PTR_ERR_OR_ZERO(pg);
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
return err;
}
-static int br_ip4_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip4_multicast_add_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
__be32 group,
__u16 vid,
const unsigned char *src,
@@ -1328,13 +1435,13 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
br_group.vid = vid;
filter_mode = igmpv2 ? MCAST_EXCLUDE : MCAST_INCLUDE;
- return br_multicast_add_group(br, port, &br_group, src, filter_mode,
- igmpv2);
+ return br_multicast_add_group(brmctx, pmctx, &br_group, src,
+ filter_mode, igmpv2);
}
#if IS_ENABLED(CONFIG_IPV6)
-static int br_ip6_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip6_multicast_add_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
const struct in6_addr *group,
__u16 vid,
const unsigned char *src,
@@ -1352,8 +1459,8 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
br_group.vid = vid;
filter_mode = mldv1 ? MCAST_EXCLUDE : MCAST_INCLUDE;
- return br_multicast_add_group(br, port, &br_group, src, filter_mode,
- mldv1);
+ return br_multicast_add_group(brmctx, pmctx, &br_group, src,
+ filter_mode, mldv1);
}
#endif
@@ -1366,52 +1473,54 @@ static bool br_multicast_rport_del(struct hlist_node *rlist)
return true;
}
-static bool br_ip4_multicast_rport_del(struct net_bridge_port *p)
+static bool br_ip4_multicast_rport_del(struct net_bridge_mcast_port *pmctx)
{
- return br_multicast_rport_del(&p->ip4_rlist);
+ return br_multicast_rport_del(&pmctx->ip4_rlist);
}
-static bool br_ip6_multicast_rport_del(struct net_bridge_port *p)
+static bool br_ip6_multicast_rport_del(struct net_bridge_mcast_port *pmctx)
{
#if IS_ENABLED(CONFIG_IPV6)
- return br_multicast_rport_del(&p->ip6_rlist);
+ return br_multicast_rport_del(&pmctx->ip6_rlist);
#else
return false;
#endif
}
-static void br_multicast_router_expired(struct net_bridge_port *port,
+static void br_multicast_router_expired(struct net_bridge_mcast_port *pmctx,
struct timer_list *t,
struct hlist_node *rlist)
{
- struct net_bridge *br = port->br;
+ struct net_bridge *br = pmctx->port->br;
bool del;
spin_lock(&br->multicast_lock);
- if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
- port->multicast_router == MDB_RTR_TYPE_PERM ||
+ if (pmctx->multicast_router == MDB_RTR_TYPE_DISABLED ||
+ pmctx->multicast_router == MDB_RTR_TYPE_PERM ||
timer_pending(t))
goto out;
del = br_multicast_rport_del(rlist);
- br_multicast_rport_del_notify(port, del);
+ br_multicast_rport_del_notify(pmctx, del);
out:
spin_unlock(&br->multicast_lock);
}
static void br_ip4_multicast_router_expired(struct timer_list *t)
{
- struct net_bridge_port *port = from_timer(port, t, ip4_mc_router_timer);
+ struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t,
+ ip4_mc_router_timer);
- br_multicast_router_expired(port, t, &port->ip4_rlist);
+ br_multicast_router_expired(pmctx, t, &pmctx->ip4_rlist);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_router_expired(struct timer_list *t)
{
- struct net_bridge_port *port = from_timer(port, t, ip6_mc_router_timer);
+ struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t,
+ ip6_mc_router_timer);
- br_multicast_router_expired(port, t, &port->ip6_rlist);
+ br_multicast_router_expired(pmctx, t, &pmctx->ip6_rlist);
}
#endif
@@ -1428,80 +1537,86 @@ static void br_mc_router_state_change(struct net_bridge *p,
switchdev_port_attr_set(p->dev, &attr, NULL);
}
-static void br_multicast_local_router_expired(struct net_bridge *br,
+static void br_multicast_local_router_expired(struct net_bridge_mcast *brmctx,
struct timer_list *timer)
{
- spin_lock(&br->multicast_lock);
- if (br->multicast_router == MDB_RTR_TYPE_DISABLED ||
- br->multicast_router == MDB_RTR_TYPE_PERM ||
- br_ip4_multicast_is_router(br) ||
- br_ip6_multicast_is_router(br))
+ spin_lock(&brmctx->br->multicast_lock);
+ if (brmctx->multicast_router == MDB_RTR_TYPE_DISABLED ||
+ brmctx->multicast_router == MDB_RTR_TYPE_PERM ||
+ br_ip4_multicast_is_router(brmctx) ||
+ br_ip6_multicast_is_router(brmctx))
goto out;
- br_mc_router_state_change(br, false);
+ br_mc_router_state_change(brmctx->br, false);
out:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
}
static void br_ip4_multicast_local_router_expired(struct timer_list *t)
{
- struct net_bridge *br = from_timer(br, t, ip4_mc_router_timer);
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip4_mc_router_timer);
- br_multicast_local_router_expired(br, t);
+ br_multicast_local_router_expired(brmctx, t);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_local_router_expired(struct timer_list *t)
{
- struct net_bridge *br = from_timer(br, t, ip6_mc_router_timer);
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip6_mc_router_timer);
- br_multicast_local_router_expired(br, t);
+ br_multicast_local_router_expired(brmctx, t);
}
#endif
-static void br_multicast_querier_expired(struct net_bridge *br,
+static void br_multicast_querier_expired(struct net_bridge_mcast *brmctx,
struct bridge_mcast_own_query *query)
{
- spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ spin_lock(&brmctx->br->multicast_lock);
+ if (!netif_running(brmctx->br->dev) ||
+ br_multicast_ctx_vlan_global_disabled(brmctx) ||
+ !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED))
goto out;
- br_multicast_start_querier(br, query);
+ br_multicast_start_querier(brmctx, query);
out:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
}
static void br_ip4_multicast_querier_expired(struct timer_list *t)
{
- struct net_bridge *br = from_timer(br, t, ip4_other_query.timer);
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip4_other_query.timer);
- br_multicast_querier_expired(br, &br->ip4_own_query);
+ br_multicast_querier_expired(brmctx, &brmctx->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_querier_expired(struct timer_list *t)
{
- struct net_bridge *br = from_timer(br, t, ip6_other_query.timer);
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip6_other_query.timer);
- br_multicast_querier_expired(br, &br->ip6_own_query);
+ br_multicast_querier_expired(brmctx, &brmctx->ip6_own_query);
}
#endif
-static void br_multicast_select_own_querier(struct net_bridge *br,
+static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx,
struct br_ip *ip,
struct sk_buff *skb)
{
if (ip->proto == htons(ETH_P_IP))
- br->ip4_querier.addr.src.ip4 = ip_hdr(skb)->saddr;
+ brmctx->ip4_querier.addr.src.ip4 = ip_hdr(skb)->saddr;
#if IS_ENABLED(CONFIG_IPV6)
else
- br->ip6_querier.addr.src.ip6 = ipv6_hdr(skb)->saddr;
+ brmctx->ip6_querier.addr.src.ip6 = ipv6_hdr(skb)->saddr;
#endif
}
-static void __br_multicast_send_query(struct net_bridge *br,
- struct net_bridge_port *port,
+static void __br_multicast_send_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct net_bridge_port_group *pg,
struct br_ip *ip_dst,
struct br_ip *group,
@@ -1513,19 +1628,23 @@ static void __br_multicast_send_query(struct net_bridge *br,
struct sk_buff *skb;
u8 igmp_type;
+ if (!br_multicast_ctx_should_use(brmctx, pmctx) ||
+ !br_multicast_ctx_matches_vlan_snooping(brmctx))
+ return;
+
again_under_lmqt:
- skb = br_multicast_alloc_query(br, pg, ip_dst, group, with_srcs,
- over_lmqt, sflag, &igmp_type,
+ skb = br_multicast_alloc_query(brmctx, pmctx, pg, ip_dst, group,
+ with_srcs, over_lmqt, sflag, &igmp_type,
need_rexmit);
if (!skb)
return;
- if (port) {
- skb->dev = port->dev;
- br_multicast_count(br, port, skb, igmp_type,
+ if (pmctx) {
+ skb->dev = pmctx->port->dev;
+ br_multicast_count(brmctx->br, pmctx->port, skb, igmp_type,
BR_MCAST_DIR_TX);
NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
- dev_net(port->dev), NULL, skb, NULL, skb->dev,
+ dev_net(pmctx->port->dev), NULL, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
if (over_lmqt && with_srcs && sflag) {
@@ -1533,35 +1652,64 @@ again_under_lmqt:
goto again_under_lmqt;
}
} else {
- br_multicast_select_own_querier(br, group, skb);
- br_multicast_count(br, port, skb, igmp_type,
+ br_multicast_select_own_querier(brmctx, group, skb);
+ br_multicast_count(brmctx->br, NULL, skb, igmp_type,
BR_MCAST_DIR_RX);
netif_rx(skb);
}
}
-static void br_multicast_send_query(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_multicast_read_querier(const struct bridge_mcast_querier *querier,
+ struct bridge_mcast_querier *dest)
+{
+ unsigned int seq;
+
+ memset(dest, 0, sizeof(*dest));
+ do {
+ seq = read_seqcount_begin(&querier->seq);
+ dest->port_ifidx = querier->port_ifidx;
+ memcpy(&dest->addr, &querier->addr, sizeof(struct br_ip));
+ } while (read_seqcount_retry(&querier->seq, seq));
+}
+
+static void br_multicast_update_querier(struct net_bridge_mcast *brmctx,
+ struct bridge_mcast_querier *querier,
+ int ifindex,
+ struct br_ip *saddr)
+{
+ lockdep_assert_held_once(&brmctx->br->multicast_lock);
+
+ write_seqcount_begin(&querier->seq);
+ querier->port_ifidx = ifindex;
+ memcpy(&querier->addr, saddr, sizeof(*saddr));
+ write_seqcount_end(&querier->seq);
+}
+
+static void br_multicast_send_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct bridge_mcast_own_query *own_query)
{
struct bridge_mcast_other_query *other_query = NULL;
+ struct bridge_mcast_querier *querier;
struct br_ip br_group;
unsigned long time;
- if (!netif_running(br->dev) ||
- !br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
- !br_opt_get(br, BROPT_MULTICAST_QUERIER))
+ if (!br_multicast_ctx_should_use(brmctx, pmctx) ||
+ !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED) ||
+ !brmctx->multicast_querier)
return;
memset(&br_group.dst, 0, sizeof(br_group.dst));
- if (port ? (own_query == &port->ip4_own_query) :
- (own_query == &br->ip4_own_query)) {
- other_query = &br->ip4_other_query;
+ if (pmctx ? (own_query == &pmctx->ip4_own_query) :
+ (own_query == &brmctx->ip4_own_query)) {
+ querier = &brmctx->ip4_querier;
+ other_query = &brmctx->ip4_other_query;
br_group.proto = htons(ETH_P_IP);
#if IS_ENABLED(CONFIG_IPV6)
} else {
- other_query = &br->ip6_other_query;
+ querier = &brmctx->ip6_querier;
+ other_query = &brmctx->ip6_other_query;
br_group.proto = htons(ETH_P_IPV6);
#endif
}
@@ -1569,31 +1717,39 @@ static void br_multicast_send_query(struct net_bridge *br,
if (!other_query || timer_pending(&other_query->timer))
return;
- __br_multicast_send_query(br, port, NULL, NULL, &br_group, false, 0,
- NULL);
+ /* we're about to select ourselves as querier */
+ if (!pmctx && querier->port_ifidx) {
+ struct br_ip zeroip = {};
+
+ br_multicast_update_querier(brmctx, querier, 0, &zeroip);
+ }
+
+ __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &br_group, false,
+ 0, NULL);
time = jiffies;
- time += own_query->startup_sent < br->multicast_startup_query_count ?
- br->multicast_startup_query_interval :
- br->multicast_query_interval;
+ time += own_query->startup_sent < brmctx->multicast_startup_query_count ?
+ brmctx->multicast_startup_query_interval :
+ brmctx->multicast_query_interval;
mod_timer(&own_query->timer, time);
}
static void
-br_multicast_port_query_expired(struct net_bridge_port *port,
+br_multicast_port_query_expired(struct net_bridge_mcast_port *pmctx,
struct bridge_mcast_own_query *query)
{
- struct net_bridge *br = port->br;
+ struct net_bridge *br = pmctx->port->br;
+ struct net_bridge_mcast *brmctx;
spin_lock(&br->multicast_lock);
- if (port->state == BR_STATE_DISABLED ||
- port->state == BR_STATE_BLOCKING)
+ if (br_multicast_port_ctx_state_stopped(pmctx))
goto out;
- if (query->startup_sent < br->multicast_startup_query_count)
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
+ if (query->startup_sent < brmctx->multicast_startup_query_count)
query->startup_sent++;
- br_multicast_send_query(port->br, port, query);
+ br_multicast_send_query(brmctx, pmctx, query);
out:
spin_unlock(&br->multicast_lock);
@@ -1601,17 +1757,19 @@ out:
static void br_ip4_multicast_port_query_expired(struct timer_list *t)
{
- struct net_bridge_port *port = from_timer(port, t, ip4_own_query.timer);
+ struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t,
+ ip4_own_query.timer);
- br_multicast_port_query_expired(port, &port->ip4_own_query);
+ br_multicast_port_query_expired(pmctx, &pmctx->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_port_query_expired(struct timer_list *t)
{
- struct net_bridge_port *port = from_timer(port, t, ip6_own_query.timer);
+ struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t,
+ ip6_own_query.timer);
- br_multicast_port_query_expired(port, &port->ip6_own_query);
+ br_multicast_port_query_expired(pmctx, &pmctx->ip6_own_query);
}
#endif
@@ -1620,19 +1778,27 @@ static void br_multicast_port_group_rexmit(struct timer_list *t)
struct net_bridge_port_group *pg = from_timer(pg, t, rexmit_timer);
struct bridge_mcast_other_query *other_query = NULL;
struct net_bridge *br = pg->key.port->br;
+ struct net_bridge_mcast_port *pmctx;
+ struct net_bridge_mcast *brmctx;
bool need_rexmit = false;
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) || hlist_unhashed(&pg->mglist) ||
- !br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
- !br_opt_get(br, BROPT_MULTICAST_QUERIER))
+ !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ goto out;
+
+ pmctx = br_multicast_pg_to_port_ctx(pg);
+ if (!pmctx)
+ goto out;
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
+ if (!brmctx->multicast_querier)
goto out;
if (pg->key.addr.proto == htons(ETH_P_IP))
- other_query = &br->ip4_other_query;
+ other_query = &brmctx->ip4_other_query;
#if IS_ENABLED(CONFIG_IPV6)
else
- other_query = &br->ip6_other_query;
+ other_query = &brmctx->ip6_other_query;
#endif
if (!other_query || timer_pending(&other_query->timer))
@@ -1640,15 +1806,15 @@ static void br_multicast_port_group_rexmit(struct timer_list *t)
if (pg->grp_query_rexmit_cnt) {
pg->grp_query_rexmit_cnt--;
- __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr,
&pg->key.addr, false, 1, NULL);
}
- __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr,
&pg->key.addr, true, 0, &need_rexmit);
if (pg->grp_query_rexmit_cnt || need_rexmit)
mod_timer(&pg->rexmit_timer, jiffies +
- br->multicast_last_member_interval);
+ brmctx->multicast_last_member_interval);
out:
spin_unlock(&br->multicast_lock);
}
@@ -1666,23 +1832,40 @@ static int br_mc_disabled_update(struct net_device *dev, bool value,
return switchdev_port_attr_set(dev, &attr, extack);
}
-int br_multicast_add_port(struct net_bridge_port *port)
+void br_multicast_port_ctx_init(struct net_bridge_port *port,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast_port *pmctx)
{
- int err;
-
- port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
- port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT;
-
- timer_setup(&port->ip4_mc_router_timer,
+ pmctx->port = port;
+ pmctx->vlan = vlan;
+ pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+ timer_setup(&pmctx->ip4_mc_router_timer,
br_ip4_multicast_router_expired, 0);
- timer_setup(&port->ip4_own_query.timer,
+ timer_setup(&pmctx->ip4_own_query.timer,
br_ip4_multicast_port_query_expired, 0);
#if IS_ENABLED(CONFIG_IPV6)
- timer_setup(&port->ip6_mc_router_timer,
+ timer_setup(&pmctx->ip6_mc_router_timer,
br_ip6_multicast_router_expired, 0);
- timer_setup(&port->ip6_own_query.timer,
+ timer_setup(&pmctx->ip6_own_query.timer,
br_ip6_multicast_port_query_expired, 0);
#endif
+}
+
+void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ del_timer_sync(&pmctx->ip6_mc_router_timer);
+#endif
+ del_timer_sync(&pmctx->ip4_mc_router_timer);
+}
+
+int br_multicast_add_port(struct net_bridge_port *port)
+{
+ int err;
+
+ port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT;
+ br_multicast_port_ctx_init(port, NULL, &port->multicast_ctx);
+
err = br_mc_disabled_update(port->dev,
br_opt_get(port->br,
BROPT_MULTICAST_ENABLED),
@@ -1711,10 +1894,7 @@ void br_multicast_del_port(struct net_bridge_port *port)
hlist_move_list(&br->mcast_gc_list, &deleted_head);
spin_unlock_bh(&br->multicast_lock);
br_multicast_gc(&deleted_head);
- del_timer_sync(&port->ip4_mc_router_timer);
-#if IS_ENABLED(CONFIG_IPV6)
- del_timer_sync(&port->ip6_mc_router_timer);
-#endif
+ br_multicast_port_ctx_deinit(&port->multicast_ctx);
free_percpu(port->mcast_stats);
}
@@ -1727,20 +1907,23 @@ static void br_multicast_enable(struct bridge_mcast_own_query *query)
mod_timer(&query->timer, jiffies);
}
-static void __br_multicast_enable_port(struct net_bridge_port *port)
+static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx)
{
- struct net_bridge *br = port->br;
+ struct net_bridge *br = pmctx->port->br;
+ struct net_bridge_mcast *brmctx;
- if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) || !netif_running(br->dev))
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
+ if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
+ !netif_running(br->dev))
return;
- br_multicast_enable(&port->ip4_own_query);
+ br_multicast_enable(&pmctx->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- br_multicast_enable(&port->ip6_own_query);
+ br_multicast_enable(&pmctx->ip6_own_query);
#endif
- if (port->multicast_router == MDB_RTR_TYPE_PERM) {
- br_ip4_multicast_add_router(br, port);
- br_ip6_multicast_add_router(br, port);
+ if (pmctx->multicast_router == MDB_RTR_TYPE_PERM) {
+ br_ip4_multicast_add_router(brmctx, pmctx);
+ br_ip6_multicast_add_router(brmctx, pmctx);
}
}
@@ -1748,33 +1931,39 @@ void br_multicast_enable_port(struct net_bridge_port *port)
{
struct net_bridge *br = port->br;
- spin_lock(&br->multicast_lock);
- __br_multicast_enable_port(port);
- spin_unlock(&br->multicast_lock);
+ spin_lock_bh(&br->multicast_lock);
+ __br_multicast_enable_port_ctx(&port->multicast_ctx);
+ spin_unlock_bh(&br->multicast_lock);
}
-void br_multicast_disable_port(struct net_bridge_port *port)
+static void __br_multicast_disable_port_ctx(struct net_bridge_mcast_port *pmctx)
{
- struct net_bridge *br = port->br;
struct net_bridge_port_group *pg;
struct hlist_node *n;
bool del = false;
- spin_lock(&br->multicast_lock);
- hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
- if (!(pg->flags & MDB_PG_FLAGS_PERMANENT))
- br_multicast_find_del_pg(br, pg);
+ hlist_for_each_entry_safe(pg, n, &pmctx->port->mglist, mglist)
+ if (!(pg->flags & MDB_PG_FLAGS_PERMANENT) &&
+ (!br_multicast_port_ctx_is_vlan(pmctx) ||
+ pg->key.addr.vid == pmctx->vlan->vid))
+ br_multicast_find_del_pg(pmctx->port->br, pg);
- del |= br_ip4_multicast_rport_del(port);
- del_timer(&port->ip4_mc_router_timer);
- del_timer(&port->ip4_own_query.timer);
- del |= br_ip6_multicast_rport_del(port);
+ del |= br_ip4_multicast_rport_del(pmctx);
+ del_timer(&pmctx->ip4_mc_router_timer);
+ del_timer(&pmctx->ip4_own_query.timer);
+ del |= br_ip6_multicast_rport_del(pmctx);
#if IS_ENABLED(CONFIG_IPV6)
- del_timer(&port->ip6_mc_router_timer);
- del_timer(&port->ip6_own_query.timer);
+ del_timer(&pmctx->ip6_mc_router_timer);
+ del_timer(&pmctx->ip6_own_query.timer);
#endif
- br_multicast_rport_del_notify(port, del);
- spin_unlock(&br->multicast_lock);
+ br_multicast_rport_del_notify(pmctx, del);
+}
+
+void br_multicast_disable_port(struct net_bridge_port *port)
+{
+ spin_lock_bh(&port->br->multicast_lock);
+ __br_multicast_disable_port_ctx(&port->multicast_ctx);
+ spin_unlock_bh(&port->br->multicast_lock);
}
static int __grp_src_delete_marked(struct net_bridge_port_group *pg)
@@ -1799,31 +1988,32 @@ static void __grp_src_mod_timer(struct net_bridge_group_src *src,
br_multicast_fwd_src_handle(src);
}
-static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg)
+static void __grp_src_query_marked_and_rexmit(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg)
{
struct bridge_mcast_other_query *other_query = NULL;
- struct net_bridge *br = pg->key.port->br;
- u32 lmqc = br->multicast_last_member_count;
+ u32 lmqc = brmctx->multicast_last_member_count;
unsigned long lmqt, lmi, now = jiffies;
struct net_bridge_group_src *ent;
- if (!netif_running(br->dev) ||
- !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ if (!netif_running(brmctx->br->dev) ||
+ !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED))
return;
if (pg->key.addr.proto == htons(ETH_P_IP))
- other_query = &br->ip4_other_query;
+ other_query = &brmctx->ip4_other_query;
#if IS_ENABLED(CONFIG_IPV6)
else
- other_query = &br->ip6_other_query;
+ other_query = &brmctx->ip6_other_query;
#endif
- lmqt = now + br_multicast_lmqt(br);
+ lmqt = now + br_multicast_lmqt(brmctx);
hlist_for_each_entry(ent, &pg->src_list, node) {
if (ent->flags & BR_SGRP_F_SEND) {
ent->flags &= ~BR_SGRP_F_SEND;
if (ent->timer.expires > lmqt) {
- if (br_opt_get(br, BROPT_MULTICAST_QUERIER) &&
+ if (brmctx->multicast_querier &&
other_query &&
!timer_pending(&other_query->timer))
ent->src_query_rexmit_cnt = lmqc;
@@ -1832,41 +2022,42 @@ static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg)
}
}
- if (!br_opt_get(br, BROPT_MULTICAST_QUERIER) ||
+ if (!brmctx->multicast_querier ||
!other_query || timer_pending(&other_query->timer))
return;
- __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr,
&pg->key.addr, true, 1, NULL);
- lmi = now + br->multicast_last_member_interval;
+ lmi = now + brmctx->multicast_last_member_interval;
if (!timer_pending(&pg->rexmit_timer) ||
time_after(pg->rexmit_timer.expires, lmi))
mod_timer(&pg->rexmit_timer, lmi);
}
-static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
+static void __grp_send_query_and_rexmit(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg)
{
struct bridge_mcast_other_query *other_query = NULL;
- struct net_bridge *br = pg->key.port->br;
unsigned long now = jiffies, lmi;
- if (!netif_running(br->dev) ||
- !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ if (!netif_running(brmctx->br->dev) ||
+ !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED))
return;
if (pg->key.addr.proto == htons(ETH_P_IP))
- other_query = &br->ip4_other_query;
+ other_query = &brmctx->ip4_other_query;
#if IS_ENABLED(CONFIG_IPV6)
else
- other_query = &br->ip6_other_query;
+ other_query = &brmctx->ip6_other_query;
#endif
- if (br_opt_get(br, BROPT_MULTICAST_QUERIER) &&
+ if (brmctx->multicast_querier &&
other_query && !timer_pending(&other_query->timer)) {
- lmi = now + br->multicast_last_member_interval;
- pg->grp_query_rexmit_cnt = br->multicast_last_member_count - 1;
- __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ lmi = now + brmctx->multicast_last_member_interval;
+ pg->grp_query_rexmit_cnt = brmctx->multicast_last_member_count - 1;
+ __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr,
&pg->key.addr, false, 0, NULL);
if (!timer_pending(&pg->rexmit_timer) ||
time_after(pg->rexmit_timer.expires, lmi))
@@ -1875,8 +2066,8 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
if (pg->filter_mode == MCAST_EXCLUDE &&
(!timer_pending(&pg->timer) ||
- time_after(pg->timer.expires, now + br_multicast_lmqt(br))))
- mod_timer(&pg->timer, now + br_multicast_lmqt(br));
+ time_after(pg->timer.expires, now + br_multicast_lmqt(brmctx))))
+ mod_timer(&pg->timer, now + br_multicast_lmqt(brmctx));
}
/* State Msg type New state Actions
@@ -1884,11 +2075,11 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
* INCLUDE (A) ALLOW (B) INCLUDE (A+B) (B)=GMI
* EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=GMI
*/
-static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_isinc_allow(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
struct net_bridge_group_src *ent;
unsigned long now = jiffies;
bool changed = false;
@@ -1907,10 +2098,11 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_a
}
if (ent)
- __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
+ __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx));
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
return changed;
@@ -1921,7 +2113,8 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_a
* Delete (A-B)
* Group Timer=GMI
*/
-static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr,
+static void __grp_src_isexc_incl(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
@@ -1945,7 +2138,8 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr,
br_multicast_fwd_src_handle(ent);
}
- br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type);
+ br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type);
__grp_src_delete_marked(pg);
}
@@ -1956,11 +2150,11 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr,
* Delete (Y-A)
* Group Timer=GMI
*/
-static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_isexc_excl(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
struct net_bridge_group_src *ent;
unsigned long now = jiffies;
bool changed = false;
@@ -1981,13 +2175,14 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr,
ent = br_multicast_new_group_src(pg, &src_ip);
if (ent) {
__grp_src_mod_timer(ent,
- now + br_multicast_gmi(br));
+ now + br_multicast_gmi(brmctx));
changed = true;
}
}
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (__grp_src_delete_marked(pg))
@@ -1996,28 +2191,28 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr,
return changed;
}
-static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_isexc(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
bool changed = false;
switch (pg->filter_mode) {
case MCAST_INCLUDE:
- __grp_src_isexc_incl(pg, h_addr, srcs, nsrcs, addr_size,
+ __grp_src_isexc_incl(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
grec_type);
br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
changed = true;
break;
case MCAST_EXCLUDE:
- changed = __grp_src_isexc_excl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_isexc_excl(brmctx, pg, h_addr, srcs, nsrcs,
+ addr_size, grec_type);
break;
}
pg->filter_mode = MCAST_EXCLUDE;
- mod_timer(&pg->timer, jiffies + br_multicast_gmi(br));
+ mod_timer(&pg->timer, jiffies + br_multicast_gmi(brmctx));
return changed;
}
@@ -2026,11 +2221,12 @@ static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr,
* INCLUDE (A) TO_IN (B) INCLUDE (A+B) (B)=GMI
* Send Q(G,A-B)
*/
-static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_toin_incl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
u32 src_idx, to_send = pg->src_ents;
struct net_bridge_group_src *ent;
unsigned long now = jiffies;
@@ -2054,14 +2250,15 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr,
changed = true;
}
if (ent)
- __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
+ __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx));
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
return changed;
}
@@ -2071,11 +2268,12 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr,
* Send Q(G,X-A)
* Send Q(G)
*/
-static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_toin_excl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
u32 src_idx, to_send = pg->src_ents;
struct net_bridge_group_src *ent;
unsigned long now = jiffies;
@@ -2102,21 +2300,24 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr,
changed = true;
}
if (ent)
- __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
+ __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx));
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
- __grp_send_query_and_rexmit(pg);
+ __grp_send_query_and_rexmit(brmctx, pmctx, pg);
return changed;
}
-static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_toin(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
@@ -2124,12 +2325,12 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr,
switch (pg->filter_mode) {
case MCAST_INCLUDE:
- changed = __grp_src_toin_incl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_toin_incl(brmctx, pmctx, pg, h_addr, srcs,
+ nsrcs, addr_size, grec_type);
break;
case MCAST_EXCLUDE:
- changed = __grp_src_toin_excl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_toin_excl(brmctx, pmctx, pg, h_addr, srcs,
+ nsrcs, addr_size, grec_type);
break;
}
@@ -2151,7 +2352,9 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr,
* Send Q(G,A*B)
* Group Timer=GMI
*/
-static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr,
+static void __grp_src_toex_incl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
@@ -2178,11 +2381,12 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr,
br_multicast_fwd_src_handle(ent);
}
- br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type);
+ br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type);
__grp_src_delete_marked(pg);
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
}
/* State Msg type New state Actions
@@ -2192,7 +2396,9 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr,
* Send Q(G,A-Y)
* Group Timer=GMI
*/
-static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_toex_excl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
@@ -2224,39 +2430,41 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr,
}
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (__grp_src_delete_marked(pg))
changed = true;
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
return changed;
}
-static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_toex(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
bool changed = false;
switch (pg->filter_mode) {
case MCAST_INCLUDE:
- __grp_src_toex_incl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ __grp_src_toex_incl(brmctx, pmctx, pg, h_addr, srcs, nsrcs,
+ addr_size, grec_type);
br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
changed = true;
break;
case MCAST_EXCLUDE:
- changed = __grp_src_toex_excl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_toex_excl(brmctx, pmctx, pg, h_addr, srcs,
+ nsrcs, addr_size, grec_type);
break;
}
pg->filter_mode = MCAST_EXCLUDE;
- mod_timer(&pg->timer, jiffies + br_multicast_gmi(br));
+ mod_timer(&pg->timer, jiffies + br_multicast_gmi(brmctx));
return changed;
}
@@ -2264,7 +2472,9 @@ static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr,
/* State Msg type New state Actions
* INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B)
*/
-static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_block_incl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
{
struct net_bridge_group_src *ent;
@@ -2286,11 +2496,12 @@ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr,
}
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
return changed;
}
@@ -2299,7 +2510,9 @@ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr,
* EXCLUDE (X,Y) BLOCK (A) EXCLUDE (X+(A-Y),Y) (A-X-Y)=Group Timer
* Send Q(G,A-Y)
*/
-static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_block_excl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
{
struct net_bridge_group_src *ent;
@@ -2328,28 +2541,31 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr,
}
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
return changed;
}
-static bool br_multicast_block(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_block(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
{
bool changed = false;
switch (pg->filter_mode) {
case MCAST_INCLUDE:
- changed = __grp_src_block_incl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_block_incl(brmctx, pmctx, pg, h_addr, srcs,
+ nsrcs, addr_size, grec_type);
break;
case MCAST_EXCLUDE:
- changed = __grp_src_block_excl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_block_excl(brmctx, pmctx, pg, h_addr, srcs,
+ nsrcs, addr_size, grec_type);
break;
}
@@ -2384,12 +2600,12 @@ br_multicast_find_port(struct net_bridge_mdb_entry *mp,
return NULL;
}
-static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip4_multicast_igmp3_report(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
- bool igmpv2 = br->multicast_igmp_version == 2;
+ bool igmpv2 = brmctx->multicast_igmp_version == 2;
struct net_bridge_mdb_entry *mdst;
struct net_bridge_port_group *pg;
const unsigned char *src;
@@ -2436,25 +2652,29 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
if (nsrcs == 0 &&
(type == IGMPV3_CHANGE_TO_INCLUDE ||
type == IGMPV3_MODE_IS_INCLUDE)) {
- if (!port || igmpv2) {
- br_ip4_multicast_leave_group(br, port, group, vid, src);
+ if (!pmctx || igmpv2) {
+ br_ip4_multicast_leave_group(brmctx, pmctx,
+ group, vid, src);
continue;
}
} else {
- err = br_ip4_multicast_add_group(br, port, group, vid,
- src, igmpv2);
+ err = br_ip4_multicast_add_group(brmctx, pmctx, group,
+ vid, src, igmpv2);
if (err)
break;
}
- if (!port || igmpv2)
+ if (!pmctx || igmpv2)
continue;
- spin_lock_bh(&br->multicast_lock);
- mdst = br_mdb_ip4_get(br, group, vid);
+ spin_lock_bh(&brmctx->br->multicast_lock);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
+ goto unlock_continue;
+
+ mdst = br_mdb_ip4_get(brmctx->br, group, vid);
if (!mdst)
goto unlock_continue;
- pg = br_multicast_find_port(mdst, port, src);
+ pg = br_multicast_find_port(mdst, pmctx->port, src);
if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT))
goto unlock_continue;
/* reload grec and host addr */
@@ -2462,51 +2682,57 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
h_addr = &ip_hdr(skb)->saddr;
switch (type) {
case IGMPV3_ALLOW_NEW_SOURCES:
- changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src,
+ changed = br_multicast_isinc_allow(brmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
case IGMPV3_MODE_IS_INCLUDE:
- changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src,
+ changed = br_multicast_isinc_allow(brmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
case IGMPV3_MODE_IS_EXCLUDE:
- changed = br_multicast_isexc(pg, h_addr, grec->grec_src,
+ changed = br_multicast_isexc(brmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
case IGMPV3_CHANGE_TO_INCLUDE:
- changed = br_multicast_toin(pg, h_addr, grec->grec_src,
+ changed = br_multicast_toin(brmctx, pmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
case IGMPV3_CHANGE_TO_EXCLUDE:
- changed = br_multicast_toex(pg, h_addr, grec->grec_src,
+ changed = br_multicast_toex(brmctx, pmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
case IGMPV3_BLOCK_OLD_SOURCES:
- changed = br_multicast_block(pg, h_addr, grec->grec_src,
+ changed = br_multicast_block(brmctx, pmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
}
if (changed)
- br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB);
+ br_mdb_notify(brmctx->br->dev, mdst, pg, RTM_NEWMDB);
unlock_continue:
- spin_unlock_bh(&br->multicast_lock);
+ spin_unlock_bh(&brmctx->br->multicast_lock);
}
return err;
}
#if IS_ENABLED(CONFIG_IPV6)
-static int br_ip6_multicast_mld2_report(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip6_multicast_mld2_report(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
- bool mldv1 = br->multicast_mld_version == 1;
+ bool mldv1 = brmctx->multicast_mld_version == 1;
struct net_bridge_mdb_entry *mdst;
struct net_bridge_port_group *pg;
unsigned int nsrcs_offset;
+ struct mld2_report *mld2r;
const unsigned char *src;
- struct icmp6hdr *icmp6h;
struct in6_addr *h_addr;
struct mld2_grec *grec;
unsigned int grec_len;
@@ -2514,12 +2740,12 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
int i, len, num;
int err = 0;
- if (!ipv6_mc_may_pull(skb, sizeof(*icmp6h)))
+ if (!ipv6_mc_may_pull(skb, sizeof(*mld2r)))
return -EINVAL;
- icmp6h = icmp6_hdr(skb);
- num = ntohs(icmp6h->icmp6_dataun.un_data16[1]);
- len = skb_transport_offset(skb) + sizeof(*icmp6h);
+ mld2r = (struct mld2_report *)icmp6_hdr(skb);
+ num = ntohs(mld2r->mld2r_ngrec);
+ len = skb_transport_offset(skb) + sizeof(*mld2r);
for (i = 0; i < num; i++) {
__be16 *_nsrcs, __nsrcs;
@@ -2562,137 +2788,243 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE ||
grec->grec_type == MLD2_MODE_IS_INCLUDE) &&
nsrcs == 0) {
- if (!port || mldv1) {
- br_ip6_multicast_leave_group(br, port,
+ if (!pmctx || mldv1) {
+ br_ip6_multicast_leave_group(brmctx, pmctx,
&grec->grec_mca,
vid, src);
continue;
}
} else {
- err = br_ip6_multicast_add_group(br, port,
+ err = br_ip6_multicast_add_group(brmctx, pmctx,
&grec->grec_mca, vid,
src, mldv1);
if (err)
break;
}
- if (!port || mldv1)
+ if (!pmctx || mldv1)
continue;
- spin_lock_bh(&br->multicast_lock);
- mdst = br_mdb_ip6_get(br, &grec->grec_mca, vid);
+ spin_lock_bh(&brmctx->br->multicast_lock);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
+ goto unlock_continue;
+
+ mdst = br_mdb_ip6_get(brmctx->br, &grec->grec_mca, vid);
if (!mdst)
goto unlock_continue;
- pg = br_multicast_find_port(mdst, port, src);
+ pg = br_multicast_find_port(mdst, pmctx->port, src);
if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT))
goto unlock_continue;
h_addr = &ipv6_hdr(skb)->saddr;
switch (grec->grec_type) {
case MLD2_ALLOW_NEW_SOURCES:
- changed = br_multicast_isinc_allow(pg, h_addr,
+ changed = br_multicast_isinc_allow(brmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
case MLD2_MODE_IS_INCLUDE:
- changed = br_multicast_isinc_allow(pg, h_addr,
+ changed = br_multicast_isinc_allow(brmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
case MLD2_MODE_IS_EXCLUDE:
- changed = br_multicast_isexc(pg, h_addr,
+ changed = br_multicast_isexc(brmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
case MLD2_CHANGE_TO_INCLUDE:
- changed = br_multicast_toin(pg, h_addr,
+ changed = br_multicast_toin(brmctx, pmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
case MLD2_CHANGE_TO_EXCLUDE:
- changed = br_multicast_toex(pg, h_addr,
+ changed = br_multicast_toex(brmctx, pmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
case MLD2_BLOCK_OLD_SOURCES:
- changed = br_multicast_block(pg, h_addr,
+ changed = br_multicast_block(brmctx, pmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
}
if (changed)
- br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB);
+ br_mdb_notify(brmctx->br->dev, mdst, pg, RTM_NEWMDB);
unlock_continue:
- spin_unlock_bh(&br->multicast_lock);
+ spin_unlock_bh(&brmctx->br->multicast_lock);
}
return err;
}
#endif
-static bool br_ip4_multicast_select_querier(struct net_bridge *br,
- struct net_bridge_port *port,
- __be32 saddr)
+static bool br_multicast_select_querier(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct br_ip *saddr)
{
- if (!timer_pending(&br->ip4_own_query.timer) &&
- !timer_pending(&br->ip4_other_query.timer))
- goto update;
+ int port_ifidx = pmctx ? pmctx->port->dev->ifindex : 0;
+ struct timer_list *own_timer, *other_timer;
+ struct bridge_mcast_querier *querier;
- if (!br->ip4_querier.addr.src.ip4)
- goto update;
+ switch (saddr->proto) {
+ case htons(ETH_P_IP):
+ querier = &brmctx->ip4_querier;
+ own_timer = &brmctx->ip4_own_query.timer;
+ other_timer = &brmctx->ip4_other_query.timer;
+ if (!querier->addr.src.ip4 ||
+ ntohl(saddr->src.ip4) <= ntohl(querier->addr.src.ip4))
+ goto update;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ querier = &brmctx->ip6_querier;
+ own_timer = &brmctx->ip6_own_query.timer;
+ other_timer = &brmctx->ip6_other_query.timer;
+ if (ipv6_addr_cmp(&saddr->src.ip6, &querier->addr.src.ip6) <= 0)
+ goto update;
+ break;
+#endif
+ default:
+ return false;
+ }
- if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.src.ip4))
+ if (!timer_pending(own_timer) && !timer_pending(other_timer))
goto update;
return false;
update:
- br->ip4_querier.addr.src.ip4 = saddr;
-
- /* update protected by general multicast_lock by caller */
- rcu_assign_pointer(br->ip4_querier.port, port);
+ br_multicast_update_querier(brmctx, querier, port_ifidx, saddr);
return true;
}
+static struct net_bridge_port *
+__br_multicast_get_querier_port(struct net_bridge *br,
+ const struct bridge_mcast_querier *querier)
+{
+ int port_ifidx = READ_ONCE(querier->port_ifidx);
+ struct net_bridge_port *p;
+ struct net_device *dev;
+
+ if (port_ifidx == 0)
+ return NULL;
+
+ dev = dev_get_by_index_rcu(dev_net(br->dev), port_ifidx);
+ if (!dev)
+ return NULL;
+ p = br_port_get_rtnl_rcu(dev);
+ if (!p || p->br != br)
+ return NULL;
+
+ return p;
+}
+
+size_t br_multicast_querier_state_size(void)
+{
+ return nla_total_size(0) + /* nest attribute */
+ nla_total_size(sizeof(__be32)) + /* BRIDGE_QUERIER_IP_ADDRESS */
+ nla_total_size(sizeof(int)) + /* BRIDGE_QUERIER_IP_PORT */
+ nla_total_size_64bit(sizeof(u64)) + /* BRIDGE_QUERIER_IP_OTHER_TIMER */
#if IS_ENABLED(CONFIG_IPV6)
-static bool br_ip6_multicast_select_querier(struct net_bridge *br,
- struct net_bridge_port *port,
- struct in6_addr *saddr)
+ nla_total_size(sizeof(struct in6_addr)) + /* BRIDGE_QUERIER_IPV6_ADDRESS */
+ nla_total_size(sizeof(int)) + /* BRIDGE_QUERIER_IPV6_PORT */
+ nla_total_size_64bit(sizeof(u64)) + /* BRIDGE_QUERIER_IPV6_OTHER_TIMER */
+#endif
+ 0;
+}
+
+/* protected by rtnl or rcu */
+int br_multicast_dump_querier_state(struct sk_buff *skb,
+ const struct net_bridge_mcast *brmctx,
+ int nest_attr)
{
- if (!timer_pending(&br->ip6_own_query.timer) &&
- !timer_pending(&br->ip6_other_query.timer))
- goto update;
+ struct bridge_mcast_querier querier = {};
+ struct net_bridge_port *p;
+ struct nlattr *nest;
- if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.src.ip6) <= 0)
- goto update;
+ if (!br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED) ||
+ br_multicast_ctx_vlan_global_disabled(brmctx))
+ return 0;
- return false;
+ nest = nla_nest_start(skb, nest_attr);
+ if (!nest)
+ return -EMSGSIZE;
-update:
- br->ip6_querier.addr.src.ip6 = *saddr;
+ rcu_read_lock();
+ if (!brmctx->multicast_querier &&
+ !timer_pending(&brmctx->ip4_other_query.timer))
+ goto out_v6;
+
+ br_multicast_read_querier(&brmctx->ip4_querier, &querier);
+ if (nla_put_in_addr(skb, BRIDGE_QUERIER_IP_ADDRESS,
+ querier.addr.src.ip4)) {
+ rcu_read_unlock();
+ goto out_err;
+ }
- /* update protected by general multicast_lock by caller */
- rcu_assign_pointer(br->ip6_querier.port, port);
+ p = __br_multicast_get_querier_port(brmctx->br, &querier);
+ if (timer_pending(&brmctx->ip4_other_query.timer) &&
+ (nla_put_u64_64bit(skb, BRIDGE_QUERIER_IP_OTHER_TIMER,
+ br_timer_value(&brmctx->ip4_other_query.timer),
+ BRIDGE_QUERIER_PAD) ||
+ (p && nla_put_u32(skb, BRIDGE_QUERIER_IP_PORT, p->dev->ifindex)))) {
+ rcu_read_unlock();
+ goto out_err;
+ }
- return true;
-}
+out_v6:
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!brmctx->multicast_querier &&
+ !timer_pending(&brmctx->ip6_other_query.timer))
+ goto out;
+
+ br_multicast_read_querier(&brmctx->ip6_querier, &querier);
+ if (nla_put_in6_addr(skb, BRIDGE_QUERIER_IPV6_ADDRESS,
+ &querier.addr.src.ip6)) {
+ rcu_read_unlock();
+ goto out_err;
+ }
+
+ p = __br_multicast_get_querier_port(brmctx->br, &querier);
+ if (timer_pending(&brmctx->ip6_other_query.timer) &&
+ (nla_put_u64_64bit(skb, BRIDGE_QUERIER_IPV6_OTHER_TIMER,
+ br_timer_value(&brmctx->ip6_other_query.timer),
+ BRIDGE_QUERIER_PAD) ||
+ (p && nla_put_u32(skb, BRIDGE_QUERIER_IPV6_PORT,
+ p->dev->ifindex)))) {
+ rcu_read_unlock();
+ goto out_err;
+ }
+out:
#endif
+ rcu_read_unlock();
+ nla_nest_end(skb, nest);
+ if (!nla_len(nest))
+ nla_nest_cancel(skb, nest);
+
+ return 0;
+
+out_err:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
static void
-br_multicast_update_query_timer(struct net_bridge *br,
+br_multicast_update_query_timer(struct net_bridge_mcast *brmctx,
struct bridge_mcast_other_query *query,
unsigned long max_delay)
{
if (!timer_pending(&query->timer))
query->delay_time = jiffies + max_delay;
- mod_timer(&query->timer, jiffies + br->multicast_querier_interval);
+ mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval);
}
static void br_port_mc_router_state_change(struct net_bridge_port *p,
@@ -2709,19 +3041,26 @@ static void br_port_mc_router_state_change(struct net_bridge_port *p,
}
static struct net_bridge_port *
-br_multicast_rport_from_node(struct net_bridge *br,
+br_multicast_rport_from_node(struct net_bridge_mcast *brmctx,
struct hlist_head *mc_router_list,
struct hlist_node *rlist)
{
+ struct net_bridge_mcast_port *pmctx;
+
#if IS_ENABLED(CONFIG_IPV6)
- if (mc_router_list == &br->ip6_mc_router_list)
- return hlist_entry(rlist, struct net_bridge_port, ip6_rlist);
+ if (mc_router_list == &brmctx->ip6_mc_router_list)
+ pmctx = hlist_entry(rlist, struct net_bridge_mcast_port,
+ ip6_rlist);
+ else
#endif
- return hlist_entry(rlist, struct net_bridge_port, ip4_rlist);
+ pmctx = hlist_entry(rlist, struct net_bridge_mcast_port,
+ ip4_rlist);
+
+ return pmctx->port;
}
static struct hlist_node *
-br_multicast_get_rport_slot(struct net_bridge *br,
+br_multicast_get_rport_slot(struct net_bridge_mcast *brmctx,
struct net_bridge_port *port,
struct hlist_head *mc_router_list)
@@ -2731,7 +3070,7 @@ br_multicast_get_rport_slot(struct net_bridge *br,
struct hlist_node *rlist;
hlist_for_each(rlist, mc_router_list) {
- p = br_multicast_rport_from_node(br, mc_router_list, rlist);
+ p = br_multicast_rport_from_node(brmctx, mc_router_list, rlist);
if ((unsigned long)port >= (unsigned long)p)
break;
@@ -2742,14 +3081,14 @@ br_multicast_get_rport_slot(struct net_bridge *br,
return slot;
}
-static bool br_multicast_no_router_otherpf(struct net_bridge_port *port,
+static bool br_multicast_no_router_otherpf(struct net_bridge_mcast_port *pmctx,
struct hlist_node *rnode)
{
#if IS_ENABLED(CONFIG_IPV6)
- if (rnode != &port->ip6_rlist)
- return hlist_unhashed(&port->ip6_rlist);
+ if (rnode != &pmctx->ip6_rlist)
+ return hlist_unhashed(&pmctx->ip6_rlist);
else
- return hlist_unhashed(&port->ip4_rlist);
+ return hlist_unhashed(&pmctx->ip4_rlist);
#else
return true;
#endif
@@ -2759,8 +3098,8 @@ static bool br_multicast_no_router_otherpf(struct net_bridge_port *port,
* list is maintained ordered by pointer value
* and locked by br->multicast_lock and RCU
*/
-static void br_multicast_add_router(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_multicast_add_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct hlist_node *rlist,
struct hlist_head *mc_router_list)
{
@@ -2769,7 +3108,7 @@ static void br_multicast_add_router(struct net_bridge *br,
if (!hlist_unhashed(rlist))
return;
- slot = br_multicast_get_rport_slot(br, port, mc_router_list);
+ slot = br_multicast_get_rport_slot(brmctx, pmctx->port, mc_router_list);
if (slot)
hlist_add_behind_rcu(rlist, slot);
@@ -2780,9 +3119,9 @@ static void br_multicast_add_router(struct net_bridge *br,
* switched from no IPv4/IPv6 multicast router to a new
* IPv4 or IPv6 multicast router.
*/
- if (br_multicast_no_router_otherpf(port, rlist)) {
- br_rtr_notify(br->dev, port, RTM_NEWMDB);
- br_port_mc_router_state_change(port, true);
+ if (br_multicast_no_router_otherpf(pmctx, rlist)) {
+ br_rtr_notify(pmctx->port->br->dev, pmctx, RTM_NEWMDB);
+ br_port_mc_router_state_change(pmctx->port, true);
}
}
@@ -2790,116 +3129,119 @@ static void br_multicast_add_router(struct net_bridge *br,
* list is maintained ordered by pointer value
* and locked by br->multicast_lock and RCU
*/
-static void br_ip4_multicast_add_router(struct net_bridge *br,
- struct net_bridge_port *port)
+static void br_ip4_multicast_add_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx)
{
- br_multicast_add_router(br, port, &port->ip4_rlist,
- &br->ip4_mc_router_list);
+ br_multicast_add_router(brmctx, pmctx, &pmctx->ip4_rlist,
+ &brmctx->ip4_mc_router_list);
}
/* Add port to router_list
* list is maintained ordered by pointer value
* and locked by br->multicast_lock and RCU
*/
-static void br_ip6_multicast_add_router(struct net_bridge *br,
- struct net_bridge_port *port)
+static void br_ip6_multicast_add_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx)
{
#if IS_ENABLED(CONFIG_IPV6)
- br_multicast_add_router(br, port, &port->ip6_rlist,
- &br->ip6_mc_router_list);
+ br_multicast_add_router(brmctx, pmctx, &pmctx->ip6_rlist,
+ &brmctx->ip6_mc_router_list);
#endif
}
-static void br_multicast_mark_router(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_multicast_mark_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct timer_list *timer,
struct hlist_node *rlist,
struct hlist_head *mc_router_list)
{
unsigned long now = jiffies;
- if (!port) {
- if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
- if (!br_ip4_multicast_is_router(br) &&
- !br_ip6_multicast_is_router(br))
- br_mc_router_state_change(br, true);
- mod_timer(timer, now + br->multicast_querier_interval);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
+ return;
+
+ if (!pmctx) {
+ if (brmctx->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
+ if (!br_ip4_multicast_is_router(brmctx) &&
+ !br_ip6_multicast_is_router(brmctx))
+ br_mc_router_state_change(brmctx->br, true);
+ mod_timer(timer, now + brmctx->multicast_querier_interval);
}
return;
}
- if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
- port->multicast_router == MDB_RTR_TYPE_PERM)
+ if (pmctx->multicast_router == MDB_RTR_TYPE_DISABLED ||
+ pmctx->multicast_router == MDB_RTR_TYPE_PERM)
return;
- br_multicast_add_router(br, port, rlist, mc_router_list);
- mod_timer(timer, now + br->multicast_querier_interval);
+ br_multicast_add_router(brmctx, pmctx, rlist, mc_router_list);
+ mod_timer(timer, now + brmctx->multicast_querier_interval);
}
-static void br_ip4_multicast_mark_router(struct net_bridge *br,
- struct net_bridge_port *port)
+static void br_ip4_multicast_mark_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx)
{
- struct timer_list *timer = &br->ip4_mc_router_timer;
+ struct timer_list *timer = &brmctx->ip4_mc_router_timer;
struct hlist_node *rlist = NULL;
- if (port) {
- timer = &port->ip4_mc_router_timer;
- rlist = &port->ip4_rlist;
+ if (pmctx) {
+ timer = &pmctx->ip4_mc_router_timer;
+ rlist = &pmctx->ip4_rlist;
}
- br_multicast_mark_router(br, port, timer, rlist,
- &br->ip4_mc_router_list);
+ br_multicast_mark_router(brmctx, pmctx, timer, rlist,
+ &brmctx->ip4_mc_router_list);
}
-static void br_ip6_multicast_mark_router(struct net_bridge *br,
- struct net_bridge_port *port)
+static void br_ip6_multicast_mark_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx)
{
#if IS_ENABLED(CONFIG_IPV6)
- struct timer_list *timer = &br->ip6_mc_router_timer;
+ struct timer_list *timer = &brmctx->ip6_mc_router_timer;
struct hlist_node *rlist = NULL;
- if (port) {
- timer = &port->ip6_mc_router_timer;
- rlist = &port->ip6_rlist;
+ if (pmctx) {
+ timer = &pmctx->ip6_mc_router_timer;
+ rlist = &pmctx->ip6_rlist;
}
- br_multicast_mark_router(br, port, timer, rlist,
- &br->ip6_mc_router_list);
+ br_multicast_mark_router(brmctx, pmctx, timer, rlist,
+ &brmctx->ip6_mc_router_list);
#endif
}
static void
-br_ip4_multicast_query_received(struct net_bridge *br,
- struct net_bridge_port *port,
+br_ip4_multicast_query_received(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct bridge_mcast_other_query *query,
struct br_ip *saddr,
unsigned long max_delay)
{
- if (!br_ip4_multicast_select_querier(br, port, saddr->src.ip4))
+ if (!br_multicast_select_querier(brmctx, pmctx, saddr))
return;
- br_multicast_update_query_timer(br, query, max_delay);
- br_ip4_multicast_mark_router(br, port);
+ br_multicast_update_query_timer(brmctx, query, max_delay);
+ br_ip4_multicast_mark_router(brmctx, pmctx);
}
#if IS_ENABLED(CONFIG_IPV6)
static void
-br_ip6_multicast_query_received(struct net_bridge *br,
- struct net_bridge_port *port,
+br_ip6_multicast_query_received(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct bridge_mcast_other_query *query,
struct br_ip *saddr,
unsigned long max_delay)
{
- if (!br_ip6_multicast_select_querier(br, port, &saddr->src.ip6))
+ if (!br_multicast_select_querier(brmctx, pmctx, saddr))
return;
- br_multicast_update_query_timer(br, query, max_delay);
- br_ip6_multicast_mark_router(br, port);
+ br_multicast_update_query_timer(brmctx, query, max_delay);
+ br_ip6_multicast_mark_router(brmctx, pmctx);
}
#endif
-static void br_ip4_multicast_query(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip4_multicast_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
@@ -2910,14 +3252,13 @@ static void br_ip4_multicast_query(struct net_bridge *br,
struct igmpv3_query *ih3;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
- struct br_ip saddr;
+ struct br_ip saddr = {};
unsigned long max_delay;
unsigned long now = jiffies;
__be32 group;
- spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) ||
- (port && port->state == BR_STATE_DISABLED))
+ spin_lock(&brmctx->br->multicast_lock);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
goto out;
group = ih->group;
@@ -2932,7 +3273,8 @@ static void br_ip4_multicast_query(struct net_bridge *br,
} else if (transport_len >= sizeof(*ih3)) {
ih3 = igmpv3_query_hdr(skb);
if (ih3->nsrcs ||
- (br->multicast_igmp_version == 3 && group && ih3->suppress))
+ (brmctx->multicast_igmp_version == 3 && group &&
+ ih3->suppress))
goto out;
max_delay = ih3->code ?
@@ -2945,16 +3287,17 @@ static void br_ip4_multicast_query(struct net_bridge *br,
saddr.proto = htons(ETH_P_IP);
saddr.src.ip4 = iph->saddr;
- br_ip4_multicast_query_received(br, port, &br->ip4_other_query,
+ br_ip4_multicast_query_received(brmctx, pmctx,
+ &brmctx->ip4_other_query,
&saddr, max_delay);
goto out;
}
- mp = br_mdb_ip4_get(br, group, vid);
+ mp = br_mdb_ip4_get(brmctx->br, group, vid);
if (!mp)
goto out;
- max_delay *= br->multicast_last_member_count;
+ max_delay *= brmctx->multicast_last_member_count;
if (mp->host_joined &&
(timer_pending(&mp->timer) ?
@@ -2963,23 +3306,23 @@ static void br_ip4_multicast_query(struct net_bridge *br,
mod_timer(&mp->timer, now + max_delay);
for (pp = &mp->ports;
- (p = mlock_dereference(*pp, br)) != NULL;
+ (p = mlock_dereference(*pp, brmctx->br)) != NULL;
pp = &p->next) {
if (timer_pending(&p->timer) ?
time_after(p->timer.expires, now + max_delay) :
try_to_del_timer_sync(&p->timer) >= 0 &&
- (br->multicast_igmp_version == 2 ||
+ (brmctx->multicast_igmp_version == 2 ||
p->filter_mode == MCAST_EXCLUDE))
mod_timer(&p->timer, now + max_delay);
}
out:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
}
#if IS_ENABLED(CONFIG_IPV6)
-static int br_ip6_multicast_query(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip6_multicast_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
@@ -2989,7 +3332,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
struct mld2_query *mld2q;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
- struct br_ip saddr;
+ struct br_ip saddr = {};
unsigned long max_delay;
unsigned long now = jiffies;
unsigned int offset = skb_transport_offset(skb);
@@ -2997,9 +3340,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
bool is_general_query;
int err = 0;
- spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) ||
- (port && port->state == BR_STATE_DISABLED))
+ spin_lock(&brmctx->br->multicast_lock);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
goto out;
if (transport_len == sizeof(*mld)) {
@@ -3019,7 +3361,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
mld2q = (struct mld2_query *)icmp6_hdr(skb);
if (!mld2q->mld2q_nsrcs)
group = &mld2q->mld2q_mca;
- if (br->multicast_mld_version == 2 &&
+ if (brmctx->multicast_mld_version == 2 &&
!ipv6_addr_any(&mld2q->mld2q_mca) &&
mld2q->mld2q_suppress)
goto out;
@@ -3033,18 +3375,19 @@ static int br_ip6_multicast_query(struct net_bridge *br,
saddr.proto = htons(ETH_P_IPV6);
saddr.src.ip6 = ipv6_hdr(skb)->saddr;
- br_ip6_multicast_query_received(br, port, &br->ip6_other_query,
+ br_ip6_multicast_query_received(brmctx, pmctx,
+ &brmctx->ip6_other_query,
&saddr, max_delay);
goto out;
} else if (!group) {
goto out;
}
- mp = br_mdb_ip6_get(br, group, vid);
+ mp = br_mdb_ip6_get(brmctx->br, group, vid);
if (!mp)
goto out;
- max_delay *= br->multicast_last_member_count;
+ max_delay *= brmctx->multicast_last_member_count;
if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, now + max_delay) :
@@ -3052,25 +3395,25 @@ static int br_ip6_multicast_query(struct net_bridge *br,
mod_timer(&mp->timer, now + max_delay);
for (pp = &mp->ports;
- (p = mlock_dereference(*pp, br)) != NULL;
+ (p = mlock_dereference(*pp, brmctx->br)) != NULL;
pp = &p->next) {
if (timer_pending(&p->timer) ?
time_after(p->timer.expires, now + max_delay) :
try_to_del_timer_sync(&p->timer) >= 0 &&
- (br->multicast_mld_version == 1 ||
+ (brmctx->multicast_mld_version == 1 ||
p->filter_mode == MCAST_EXCLUDE))
mod_timer(&p->timer, now + max_delay);
}
out:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
return err;
}
#endif
static void
-br_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
+br_multicast_leave_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct br_ip *group,
struct bridge_mcast_other_query *other_query,
struct bridge_mcast_own_query *own_query,
@@ -3081,22 +3424,21 @@ br_multicast_leave_group(struct net_bridge *br,
unsigned long now;
unsigned long time;
- spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) ||
- (port && port->state == BR_STATE_DISABLED))
+ spin_lock(&brmctx->br->multicast_lock);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
goto out;
- mp = br_mdb_ip_get(br, group);
+ mp = br_mdb_ip_get(brmctx->br, group);
if (!mp)
goto out;
- if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) {
+ if (pmctx && (pmctx->port->flags & BR_MULTICAST_FAST_LEAVE)) {
struct net_bridge_port_group __rcu **pp;
for (pp = &mp->ports;
- (p = mlock_dereference(*pp, br)) != NULL;
+ (p = mlock_dereference(*pp, brmctx->br)) != NULL;
pp = &p->next) {
- if (!br_port_group_equal(p, port, src))
+ if (!br_port_group_equal(p, pmctx->port, src))
continue;
if (p->flags & MDB_PG_FLAGS_PERMANENT)
@@ -3111,19 +3453,19 @@ br_multicast_leave_group(struct net_bridge *br,
if (timer_pending(&other_query->timer))
goto out;
- if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) {
- __br_multicast_send_query(br, port, NULL, NULL, &mp->addr,
+ if (brmctx->multicast_querier) {
+ __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &mp->addr,
false, 0, NULL);
- time = jiffies + br->multicast_last_member_count *
- br->multicast_last_member_interval;
+ time = jiffies + brmctx->multicast_last_member_count *
+ brmctx->multicast_last_member_interval;
mod_timer(&own_query->timer, time);
- for (p = mlock_dereference(mp->ports, br);
- p != NULL;
- p = mlock_dereference(p->next, br)) {
- if (!br_port_group_equal(p, port, src))
+ for (p = mlock_dereference(mp->ports, brmctx->br);
+ p != NULL && pmctx != NULL;
+ p = mlock_dereference(p->next, brmctx->br)) {
+ if (!br_port_group_equal(p, pmctx->port, src))
continue;
if (!hlist_unhashed(&p->mglist) &&
@@ -3138,10 +3480,10 @@ br_multicast_leave_group(struct net_bridge *br,
}
now = jiffies;
- time = now + br->multicast_last_member_count *
- br->multicast_last_member_interval;
+ time = now + brmctx->multicast_last_member_count *
+ brmctx->multicast_last_member_interval;
- if (!port) {
+ if (!pmctx) {
if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, time) :
@@ -3152,10 +3494,10 @@ br_multicast_leave_group(struct net_bridge *br,
goto out;
}
- for (p = mlock_dereference(mp->ports, br);
+ for (p = mlock_dereference(mp->ports, brmctx->br);
p != NULL;
- p = mlock_dereference(p->next, br)) {
- if (p->key.port != port)
+ p = mlock_dereference(p->next, brmctx->br)) {
+ if (p->key.port != pmctx->port)
continue;
if (!hlist_unhashed(&p->mglist) &&
@@ -3168,11 +3510,11 @@ br_multicast_leave_group(struct net_bridge *br,
break;
}
out:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
}
-static void br_ip4_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip4_multicast_leave_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
__be32 group,
__u16 vid,
const unsigned char *src)
@@ -3183,20 +3525,21 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
if (ipv4_is_local_multicast(group))
return;
- own_query = port ? &port->ip4_own_query : &br->ip4_own_query;
+ own_query = pmctx ? &pmctx->ip4_own_query : &brmctx->ip4_own_query;
memset(&br_group, 0, sizeof(br_group));
br_group.dst.ip4 = group;
br_group.proto = htons(ETH_P_IP);
br_group.vid = vid;
- br_multicast_leave_group(br, port, &br_group, &br->ip4_other_query,
+ br_multicast_leave_group(brmctx, pmctx, &br_group,
+ &brmctx->ip4_other_query,
own_query, src);
}
#if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip6_multicast_leave_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
const struct in6_addr *group,
__u16 vid,
const unsigned char *src)
@@ -3207,14 +3550,15 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
if (ipv6_addr_is_ll_all_nodes(group))
return;
- own_query = port ? &port->ip6_own_query : &br->ip6_own_query;
+ own_query = pmctx ? &pmctx->ip6_own_query : &brmctx->ip6_own_query;
memset(&br_group, 0, sizeof(br_group));
br_group.dst.ip6 = *group;
br_group.proto = htons(ETH_P_IPV6);
br_group.vid = vid;
- br_multicast_leave_group(br, port, &br_group, &br->ip6_other_query,
+ br_multicast_leave_group(brmctx, pmctx, &br_group,
+ &brmctx->ip6_other_query,
own_query, src);
}
#endif
@@ -3252,8 +3596,8 @@ static void br_multicast_err_count(const struct net_bridge *br,
u64_stats_update_end(&pstats->syncp);
}
-static void br_multicast_pim(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_multicast_pim(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
const struct sk_buff *skb)
{
unsigned int offset = skb_transport_offset(skb);
@@ -3264,31 +3608,32 @@ static void br_multicast_pim(struct net_bridge *br,
pim_hdr_type(pimhdr) != PIM_TYPE_HELLO)
return;
- spin_lock(&br->multicast_lock);
- br_ip4_multicast_mark_router(br, port);
- spin_unlock(&br->multicast_lock);
+ spin_lock(&brmctx->br->multicast_lock);
+ br_ip4_multicast_mark_router(brmctx, pmctx);
+ spin_unlock(&brmctx->br->multicast_lock);
}
-static int br_ip4_multicast_mrd_rcv(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip4_multicast_mrd_rcv(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb)
{
if (ip_hdr(skb)->protocol != IPPROTO_IGMP ||
igmp_hdr(skb)->type != IGMP_MRDISC_ADV)
return -ENOMSG;
- spin_lock(&br->multicast_lock);
- br_ip4_multicast_mark_router(br, port);
- spin_unlock(&br->multicast_lock);
+ spin_lock(&brmctx->br->multicast_lock);
+ br_ip4_multicast_mark_router(brmctx, pmctx);
+ spin_unlock(&brmctx->br->multicast_lock);
return 0;
}
-static int br_multicast_ipv4_rcv(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_multicast_ipv4_rcv(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
+ struct net_bridge_port *p = pmctx ? pmctx->port : NULL;
const unsigned char *src;
struct igmphdr *ih;
int err;
@@ -3300,14 +3645,14 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
} else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) {
if (ip_hdr(skb)->protocol == IPPROTO_PIM)
- br_multicast_pim(br, port, skb);
+ br_multicast_pim(brmctx, pmctx, skb);
} else if (ipv4_is_all_snoopers(ip_hdr(skb)->daddr)) {
- br_ip4_multicast_mrd_rcv(br, port, skb);
+ br_ip4_multicast_mrd_rcv(brmctx, pmctx, skb);
}
return 0;
} else if (err < 0) {
- br_multicast_err_count(br, port, skb->protocol);
+ br_multicast_err_count(brmctx->br, p, skb->protocol);
return err;
}
@@ -3319,44 +3664,45 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
case IGMP_HOST_MEMBERSHIP_REPORT:
case IGMPV2_HOST_MEMBERSHIP_REPORT:
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
- err = br_ip4_multicast_add_group(br, port, ih->group, vid, src,
- true);
+ err = br_ip4_multicast_add_group(brmctx, pmctx, ih->group, vid,
+ src, true);
break;
case IGMPV3_HOST_MEMBERSHIP_REPORT:
- err = br_ip4_multicast_igmp3_report(br, port, skb, vid);
+ err = br_ip4_multicast_igmp3_report(brmctx, pmctx, skb, vid);
break;
case IGMP_HOST_MEMBERSHIP_QUERY:
- br_ip4_multicast_query(br, port, skb, vid);
+ br_ip4_multicast_query(brmctx, pmctx, skb, vid);
break;
case IGMP_HOST_LEAVE_MESSAGE:
- br_ip4_multicast_leave_group(br, port, ih->group, vid, src);
+ br_ip4_multicast_leave_group(brmctx, pmctx, ih->group, vid, src);
break;
}
- br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
+ br_multicast_count(brmctx->br, p, skb, BR_INPUT_SKB_CB(skb)->igmp,
BR_MCAST_DIR_RX);
return err;
}
#if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_mrd_rcv(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip6_multicast_mrd_rcv(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb)
{
if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV)
return;
- spin_lock(&br->multicast_lock);
- br_ip6_multicast_mark_router(br, port);
- spin_unlock(&br->multicast_lock);
+ spin_lock(&brmctx->br->multicast_lock);
+ br_ip6_multicast_mark_router(brmctx, pmctx);
+ spin_unlock(&brmctx->br->multicast_lock);
}
-static int br_multicast_ipv6_rcv(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_multicast_ipv6_rcv(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
+ struct net_bridge_port *p = pmctx ? pmctx->port : NULL;
const unsigned char *src;
struct mld_msg *mld;
int err;
@@ -3368,11 +3714,11 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
if (err == -ENODATA &&
ipv6_addr_is_all_snoopers(&ipv6_hdr(skb)->daddr))
- br_ip6_multicast_mrd_rcv(br, port, skb);
+ br_ip6_multicast_mrd_rcv(brmctx, pmctx, skb);
return 0;
} else if (err < 0) {
- br_multicast_err_count(br, port, skb->protocol);
+ br_multicast_err_count(brmctx->br, p, skb->protocol);
return err;
}
@@ -3383,29 +3729,32 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
case ICMPV6_MGM_REPORT:
src = eth_hdr(skb)->h_source;
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
- err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid,
- src, true);
+ err = br_ip6_multicast_add_group(brmctx, pmctx, &mld->mld_mca,
+ vid, src, true);
break;
case ICMPV6_MLD2_REPORT:
- err = br_ip6_multicast_mld2_report(br, port, skb, vid);
+ err = br_ip6_multicast_mld2_report(brmctx, pmctx, skb, vid);
break;
case ICMPV6_MGM_QUERY:
- err = br_ip6_multicast_query(br, port, skb, vid);
+ err = br_ip6_multicast_query(brmctx, pmctx, skb, vid);
break;
case ICMPV6_MGM_REDUCTION:
src = eth_hdr(skb)->h_source;
- br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid, src);
+ br_ip6_multicast_leave_group(brmctx, pmctx, &mld->mld_mca, vid,
+ src);
break;
}
- br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
+ br_multicast_count(brmctx->br, p, skb, BR_INPUT_SKB_CB(skb)->igmp,
BR_MCAST_DIR_RX);
return err;
}
#endif
-int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
+int br_multicast_rcv(struct net_bridge_mcast **brmctx,
+ struct net_bridge_mcast_port **pmctx,
+ struct net_bridge_vlan *vlan,
struct sk_buff *skb, u16 vid)
{
int ret = 0;
@@ -3413,16 +3762,36 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
BR_INPUT_SKB_CB(skb)->igmp = 0;
BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
- if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ if (!br_opt_get((*brmctx)->br, BROPT_MULTICAST_ENABLED))
return 0;
+ if (br_opt_get((*brmctx)->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && vlan) {
+ const struct net_bridge_vlan *masterv;
+
+ /* the vlan has the master flag set only when transmitting
+ * through the bridge device
+ */
+ if (br_vlan_is_master(vlan)) {
+ masterv = vlan;
+ *brmctx = &vlan->br_mcast_ctx;
+ *pmctx = NULL;
+ } else {
+ masterv = vlan->brvlan;
+ *brmctx = &vlan->brvlan->br_mcast_ctx;
+ *pmctx = &vlan->port_mcast_ctx;
+ }
+
+ if (!(masterv->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED))
+ return 0;
+ }
+
switch (skb->protocol) {
case htons(ETH_P_IP):
- ret = br_multicast_ipv4_rcv(br, port, skb, vid);
+ ret = br_multicast_ipv4_rcv(*brmctx, *pmctx, skb, vid);
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- ret = br_multicast_ipv6_rcv(br, port, skb, vid);
+ ret = br_multicast_ipv6_rcv(*brmctx, *pmctx, skb, vid);
break;
#endif
}
@@ -3430,32 +3799,39 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
return ret;
}
-static void br_multicast_query_expired(struct net_bridge *br,
+static void br_multicast_query_expired(struct net_bridge_mcast *brmctx,
struct bridge_mcast_own_query *query,
struct bridge_mcast_querier *querier)
{
- spin_lock(&br->multicast_lock);
- if (query->startup_sent < br->multicast_startup_query_count)
+ spin_lock(&brmctx->br->multicast_lock);
+ if (br_multicast_ctx_vlan_disabled(brmctx))
+ goto out;
+
+ if (query->startup_sent < brmctx->multicast_startup_query_count)
query->startup_sent++;
- RCU_INIT_POINTER(querier->port, NULL);
- br_multicast_send_query(br, NULL, query);
- spin_unlock(&br->multicast_lock);
+ br_multicast_send_query(brmctx, NULL, query);
+out:
+ spin_unlock(&brmctx->br->multicast_lock);
}
static void br_ip4_multicast_query_expired(struct timer_list *t)
{
- struct net_bridge *br = from_timer(br, t, ip4_own_query.timer);
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip4_own_query.timer);
- br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier);
+ br_multicast_query_expired(brmctx, &brmctx->ip4_own_query,
+ &brmctx->ip4_querier);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_query_expired(struct timer_list *t)
{
- struct net_bridge *br = from_timer(br, t, ip6_own_query.timer);
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip6_own_query.timer);
- br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier);
+ br_multicast_query_expired(brmctx, &brmctx->ip6_own_query,
+ &brmctx->ip6_querier);
}
#endif
@@ -3472,47 +3848,65 @@ static void br_multicast_gc_work(struct work_struct *work)
br_multicast_gc(&deleted_head);
}
-void br_multicast_init(struct net_bridge *br)
+void br_multicast_ctx_init(struct net_bridge *br,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast *brmctx)
{
- br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX;
+ brmctx->br = br;
+ brmctx->vlan = vlan;
+ brmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+ brmctx->multicast_last_member_count = 2;
+ brmctx->multicast_startup_query_count = 2;
- br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
- br->multicast_last_member_count = 2;
- br->multicast_startup_query_count = 2;
+ brmctx->multicast_last_member_interval = HZ;
+ brmctx->multicast_query_response_interval = 10 * HZ;
+ brmctx->multicast_startup_query_interval = 125 * HZ / 4;
+ brmctx->multicast_query_interval = 125 * HZ;
+ brmctx->multicast_querier_interval = 255 * HZ;
+ brmctx->multicast_membership_interval = 260 * HZ;
- br->multicast_last_member_interval = HZ;
- br->multicast_query_response_interval = 10 * HZ;
- br->multicast_startup_query_interval = 125 * HZ / 4;
- br->multicast_query_interval = 125 * HZ;
- br->multicast_querier_interval = 255 * HZ;
- br->multicast_membership_interval = 260 * HZ;
-
- br->ip4_other_query.delay_time = 0;
- br->ip4_querier.port = NULL;
- br->multicast_igmp_version = 2;
+ brmctx->ip4_other_query.delay_time = 0;
+ brmctx->ip4_querier.port_ifidx = 0;
+ seqcount_init(&brmctx->ip4_querier.seq);
+ brmctx->multicast_igmp_version = 2;
#if IS_ENABLED(CONFIG_IPV6)
- br->multicast_mld_version = 1;
- br->ip6_other_query.delay_time = 0;
- br->ip6_querier.port = NULL;
+ brmctx->multicast_mld_version = 1;
+ brmctx->ip6_other_query.delay_time = 0;
+ brmctx->ip6_querier.port_ifidx = 0;
+ seqcount_init(&brmctx->ip6_querier.seq);
#endif
- br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true);
- br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
- spin_lock_init(&br->multicast_lock);
- timer_setup(&br->ip4_mc_router_timer,
+ timer_setup(&brmctx->ip4_mc_router_timer,
br_ip4_multicast_local_router_expired, 0);
- timer_setup(&br->ip4_other_query.timer,
+ timer_setup(&brmctx->ip4_other_query.timer,
br_ip4_multicast_querier_expired, 0);
- timer_setup(&br->ip4_own_query.timer,
+ timer_setup(&brmctx->ip4_own_query.timer,
br_ip4_multicast_query_expired, 0);
#if IS_ENABLED(CONFIG_IPV6)
- timer_setup(&br->ip6_mc_router_timer,
+ timer_setup(&brmctx->ip6_mc_router_timer,
br_ip6_multicast_local_router_expired, 0);
- timer_setup(&br->ip6_other_query.timer,
+ timer_setup(&brmctx->ip6_other_query.timer,
br_ip6_multicast_querier_expired, 0);
- timer_setup(&br->ip6_own_query.timer,
+ timer_setup(&brmctx->ip6_own_query.timer,
br_ip6_multicast_query_expired, 0);
#endif
+}
+
+void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx)
+{
+ __br_multicast_stop(brmctx);
+}
+
+void br_multicast_init(struct net_bridge *br)
+{
+ br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX;
+
+ br_multicast_ctx_init(br, NULL, &br->multicast_ctx);
+
+ br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true);
+ br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
+
+ spin_lock_init(&br->multicast_lock);
INIT_HLIST_HEAD(&br->mdb_list);
INIT_HLIST_HEAD(&br->mcast_gc_list);
INIT_WORK(&br->mcast_gc_work, br_multicast_gc_work);
@@ -3580,8 +3974,8 @@ void br_multicast_leave_snoopers(struct net_bridge *br)
br_ip6_multicast_leave_snoopers(br);
}
-static void __br_multicast_open(struct net_bridge *br,
- struct bridge_mcast_own_query *query)
+static void __br_multicast_open_query(struct net_bridge *br,
+ struct bridge_mcast_own_query *query)
{
query->startup_sent = 0;
@@ -3591,26 +3985,194 @@ static void __br_multicast_open(struct net_bridge *br,
mod_timer(&query->timer, jiffies);
}
-void br_multicast_open(struct net_bridge *br)
+static void __br_multicast_open(struct net_bridge_mcast *brmctx)
{
- __br_multicast_open(br, &br->ip4_own_query);
+ __br_multicast_open_query(brmctx->br, &brmctx->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- __br_multicast_open(br, &br->ip6_own_query);
+ __br_multicast_open_query(brmctx->br, &brmctx->ip6_own_query);
#endif
}
-void br_multicast_stop(struct net_bridge *br)
+void br_multicast_open(struct net_bridge *br)
{
- del_timer_sync(&br->ip4_mc_router_timer);
- del_timer_sync(&br->ip4_other_query.timer);
- del_timer_sync(&br->ip4_own_query.timer);
+ ASSERT_RTNL();
+
+ if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *vlan;
+
+ vg = br_vlan_group(br);
+ if (vg) {
+ list_for_each_entry(vlan, &vg->vlan_list, vlist) {
+ struct net_bridge_mcast *brmctx;
+
+ brmctx = &vlan->br_mcast_ctx;
+ if (br_vlan_is_brentry(vlan) &&
+ !br_multicast_ctx_vlan_disabled(brmctx))
+ __br_multicast_open(&vlan->br_mcast_ctx);
+ }
+ }
+ } else {
+ __br_multicast_open(&br->multicast_ctx);
+ }
+}
+
+static void __br_multicast_stop(struct net_bridge_mcast *brmctx)
+{
+ del_timer_sync(&brmctx->ip4_mc_router_timer);
+ del_timer_sync(&brmctx->ip4_other_query.timer);
+ del_timer_sync(&brmctx->ip4_own_query.timer);
#if IS_ENABLED(CONFIG_IPV6)
- del_timer_sync(&br->ip6_mc_router_timer);
- del_timer_sync(&br->ip6_other_query.timer);
- del_timer_sync(&br->ip6_own_query.timer);
+ del_timer_sync(&brmctx->ip6_mc_router_timer);
+ del_timer_sync(&brmctx->ip6_other_query.timer);
+ del_timer_sync(&brmctx->ip6_own_query.timer);
#endif
}
+void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on)
+{
+ struct net_bridge *br;
+
+ /* it's okay to check for the flag without the multicast lock because it
+ * can only change under RTNL -> multicast_lock, we need the latter to
+ * sync with timers and packets
+ */
+ if (on == !!(vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED))
+ return;
+
+ if (br_vlan_is_master(vlan)) {
+ br = vlan->br;
+
+ if (!br_vlan_is_brentry(vlan) ||
+ (on &&
+ br_multicast_ctx_vlan_global_disabled(&vlan->br_mcast_ctx)))
+ return;
+
+ spin_lock_bh(&br->multicast_lock);
+ vlan->priv_flags ^= BR_VLFLAG_MCAST_ENABLED;
+ spin_unlock_bh(&br->multicast_lock);
+
+ if (on)
+ __br_multicast_open(&vlan->br_mcast_ctx);
+ else
+ __br_multicast_stop(&vlan->br_mcast_ctx);
+ } else {
+ struct net_bridge_mcast *brmctx;
+
+ brmctx = br_multicast_port_ctx_get_global(&vlan->port_mcast_ctx);
+ if (on && br_multicast_ctx_vlan_global_disabled(brmctx))
+ return;
+
+ br = vlan->port->br;
+ spin_lock_bh(&br->multicast_lock);
+ vlan->priv_flags ^= BR_VLFLAG_MCAST_ENABLED;
+ if (on)
+ __br_multicast_enable_port_ctx(&vlan->port_mcast_ctx);
+ else
+ __br_multicast_disable_port_ctx(&vlan->port_mcast_ctx);
+ spin_unlock_bh(&br->multicast_lock);
+ }
+}
+
+static void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on)
+{
+ struct net_bridge_port *p;
+
+ if (WARN_ON_ONCE(!br_vlan_is_master(vlan)))
+ return;
+
+ list_for_each_entry(p, &vlan->br->port_list, list) {
+ struct net_bridge_vlan *vport;
+
+ vport = br_vlan_find(nbp_vlan_group(p), vlan->vid);
+ if (!vport)
+ continue;
+ br_multicast_toggle_one_vlan(vport, on);
+ }
+
+ if (br_vlan_is_brentry(vlan))
+ br_multicast_toggle_one_vlan(vlan, on);
+}
+
+int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *vlan;
+ struct net_bridge_port *p;
+
+ if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) == on)
+ return 0;
+
+ if (on && !br_opt_get(br, BROPT_VLAN_ENABLED)) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot enable multicast vlan snooping with vlan filtering disabled");
+ return -EINVAL;
+ }
+
+ vg = br_vlan_group(br);
+ if (!vg)
+ return 0;
+
+ br_opt_toggle(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED, on);
+
+ /* disable/enable non-vlan mcast contexts based on vlan snooping */
+ if (on)
+ __br_multicast_stop(&br->multicast_ctx);
+ else
+ __br_multicast_open(&br->multicast_ctx);
+ list_for_each_entry(p, &br->port_list, list) {
+ if (on)
+ br_multicast_disable_port(p);
+ else
+ br_multicast_enable_port(p);
+ }
+
+ list_for_each_entry(vlan, &vg->vlan_list, vlist)
+ br_multicast_toggle_vlan(vlan, on);
+
+ return 0;
+}
+
+bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on)
+{
+ ASSERT_RTNL();
+
+ /* BR_VLFLAG_GLOBAL_MCAST_ENABLED relies on eventual consistency and
+ * requires only RTNL to change
+ */
+ if (on == !!(vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED))
+ return false;
+
+ vlan->priv_flags ^= BR_VLFLAG_GLOBAL_MCAST_ENABLED;
+ br_multicast_toggle_vlan(vlan, on);
+
+ return true;
+}
+
+void br_multicast_stop(struct net_bridge *br)
+{
+ ASSERT_RTNL();
+
+ if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *vlan;
+
+ vg = br_vlan_group(br);
+ if (vg) {
+ list_for_each_entry(vlan, &vg->vlan_list, vlist) {
+ struct net_bridge_mcast *brmctx;
+
+ brmctx = &vlan->br_mcast_ctx;
+ if (br_vlan_is_brentry(vlan) &&
+ !br_multicast_ctx_vlan_disabled(brmctx))
+ __br_multicast_stop(&vlan->br_mcast_ctx);
+ }
+ }
+ } else {
+ __br_multicast_stop(&br->multicast_ctx);
+ }
+}
+
void br_multicast_dev_del(struct net_bridge *br)
{
struct net_bridge_mdb_entry *mp;
@@ -3623,44 +4185,45 @@ void br_multicast_dev_del(struct net_bridge *br)
hlist_move_list(&br->mcast_gc_list, &deleted_head);
spin_unlock_bh(&br->multicast_lock);
+ br_multicast_ctx_deinit(&br->multicast_ctx);
br_multicast_gc(&deleted_head);
cancel_work_sync(&br->mcast_gc_work);
rcu_barrier();
}
-int br_multicast_set_router(struct net_bridge *br, unsigned long val)
+int br_multicast_set_router(struct net_bridge_mcast *brmctx, unsigned long val)
{
int err = -EINVAL;
- spin_lock_bh(&br->multicast_lock);
+ spin_lock_bh(&brmctx->br->multicast_lock);
switch (val) {
case MDB_RTR_TYPE_DISABLED:
case MDB_RTR_TYPE_PERM:
- br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM);
- del_timer(&br->ip4_mc_router_timer);
+ br_mc_router_state_change(brmctx->br, val == MDB_RTR_TYPE_PERM);
+ del_timer(&brmctx->ip4_mc_router_timer);
#if IS_ENABLED(CONFIG_IPV6)
- del_timer(&br->ip6_mc_router_timer);
+ del_timer(&brmctx->ip6_mc_router_timer);
#endif
- br->multicast_router = val;
+ brmctx->multicast_router = val;
err = 0;
break;
case MDB_RTR_TYPE_TEMP_QUERY:
- if (br->multicast_router != MDB_RTR_TYPE_TEMP_QUERY)
- br_mc_router_state_change(br, false);
- br->multicast_router = val;
+ if (brmctx->multicast_router != MDB_RTR_TYPE_TEMP_QUERY)
+ br_mc_router_state_change(brmctx->br, false);
+ brmctx->multicast_router = val;
err = 0;
break;
}
- spin_unlock_bh(&br->multicast_lock);
+ spin_unlock_bh(&brmctx->br->multicast_lock);
return err;
}
static void
-br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted)
+br_multicast_rport_del_notify(struct net_bridge_mcast_port *pmctx, bool deleted)
{
if (!deleted)
return;
@@ -3668,37 +4231,39 @@ br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted)
/* For backwards compatibility for now, only notify if there is
* no multicast router anymore for both IPv4 and IPv6.
*/
- if (!hlist_unhashed(&p->ip4_rlist))
+ if (!hlist_unhashed(&pmctx->ip4_rlist))
return;
#if IS_ENABLED(CONFIG_IPV6)
- if (!hlist_unhashed(&p->ip6_rlist))
+ if (!hlist_unhashed(&pmctx->ip6_rlist))
return;
#endif
- br_rtr_notify(p->br->dev, p, RTM_DELMDB);
- br_port_mc_router_state_change(p, false);
+ br_rtr_notify(pmctx->port->br->dev, pmctx, RTM_DELMDB);
+ br_port_mc_router_state_change(pmctx->port, false);
/* don't allow timer refresh */
- if (p->multicast_router == MDB_RTR_TYPE_TEMP)
- p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+ if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP)
+ pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
}
-int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
+int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx,
+ unsigned long val)
{
- struct net_bridge *br = p->br;
+ struct net_bridge_mcast *brmctx;
unsigned long now = jiffies;
int err = -EINVAL;
bool del = false;
- spin_lock(&br->multicast_lock);
- if (p->multicast_router == val) {
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
+ spin_lock(&brmctx->br->multicast_lock);
+ if (pmctx->multicast_router == val) {
/* Refresh the temp router port timer */
- if (p->multicast_router == MDB_RTR_TYPE_TEMP) {
- mod_timer(&p->ip4_mc_router_timer,
- now + br->multicast_querier_interval);
+ if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP) {
+ mod_timer(&pmctx->ip4_mc_router_timer,
+ now + brmctx->multicast_querier_interval);
#if IS_ENABLED(CONFIG_IPV6)
- mod_timer(&p->ip6_mc_router_timer,
- now + br->multicast_querier_interval);
+ mod_timer(&pmctx->ip6_mc_router_timer,
+ now + brmctx->multicast_querier_interval);
#endif
}
err = 0;
@@ -3706,63 +4271,103 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
}
switch (val) {
case MDB_RTR_TYPE_DISABLED:
- p->multicast_router = MDB_RTR_TYPE_DISABLED;
- del |= br_ip4_multicast_rport_del(p);
- del_timer(&p->ip4_mc_router_timer);
- del |= br_ip6_multicast_rport_del(p);
+ pmctx->multicast_router = MDB_RTR_TYPE_DISABLED;
+ del |= br_ip4_multicast_rport_del(pmctx);
+ del_timer(&pmctx->ip4_mc_router_timer);
+ del |= br_ip6_multicast_rport_del(pmctx);
#if IS_ENABLED(CONFIG_IPV6)
- del_timer(&p->ip6_mc_router_timer);
+ del_timer(&pmctx->ip6_mc_router_timer);
#endif
- br_multicast_rport_del_notify(p, del);
+ br_multicast_rport_del_notify(pmctx, del);
break;
case MDB_RTR_TYPE_TEMP_QUERY:
- p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
- del |= br_ip4_multicast_rport_del(p);
- del |= br_ip6_multicast_rport_del(p);
- br_multicast_rport_del_notify(p, del);
+ pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+ del |= br_ip4_multicast_rport_del(pmctx);
+ del |= br_ip6_multicast_rport_del(pmctx);
+ br_multicast_rport_del_notify(pmctx, del);
break;
case MDB_RTR_TYPE_PERM:
- p->multicast_router = MDB_RTR_TYPE_PERM;
- del_timer(&p->ip4_mc_router_timer);
- br_ip4_multicast_add_router(br, p);
+ pmctx->multicast_router = MDB_RTR_TYPE_PERM;
+ del_timer(&pmctx->ip4_mc_router_timer);
+ br_ip4_multicast_add_router(brmctx, pmctx);
#if IS_ENABLED(CONFIG_IPV6)
- del_timer(&p->ip6_mc_router_timer);
+ del_timer(&pmctx->ip6_mc_router_timer);
#endif
- br_ip6_multicast_add_router(br, p);
+ br_ip6_multicast_add_router(brmctx, pmctx);
break;
case MDB_RTR_TYPE_TEMP:
- p->multicast_router = MDB_RTR_TYPE_TEMP;
- br_ip4_multicast_mark_router(br, p);
- br_ip6_multicast_mark_router(br, p);
+ pmctx->multicast_router = MDB_RTR_TYPE_TEMP;
+ br_ip4_multicast_mark_router(brmctx, pmctx);
+ br_ip6_multicast_mark_router(brmctx, pmctx);
break;
default:
goto unlock;
}
err = 0;
unlock:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
return err;
}
-static void br_multicast_start_querier(struct net_bridge *br,
+int br_multicast_set_vlan_router(struct net_bridge_vlan *v, u8 mcast_router)
+{
+ int err;
+
+ if (br_vlan_is_master(v))
+ err = br_multicast_set_router(&v->br_mcast_ctx, mcast_router);
+ else
+ err = br_multicast_set_port_router(&v->port_mcast_ctx,
+ mcast_router);
+
+ return err;
+}
+
+static void br_multicast_start_querier(struct net_bridge_mcast *brmctx,
struct bridge_mcast_own_query *query)
{
struct net_bridge_port *port;
- __br_multicast_open(br, query);
+ if (!br_multicast_ctx_matches_vlan_snooping(brmctx))
+ return;
+
+ __br_multicast_open_query(brmctx->br, query);
rcu_read_lock();
- list_for_each_entry_rcu(port, &br->port_list, list) {
- if (port->state == BR_STATE_DISABLED ||
- port->state == BR_STATE_BLOCKING)
+ list_for_each_entry_rcu(port, &brmctx->br->port_list, list) {
+ struct bridge_mcast_own_query *ip4_own_query;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct bridge_mcast_own_query *ip6_own_query;
+#endif
+
+ if (br_multicast_port_ctx_state_stopped(&port->multicast_ctx))
continue;
- if (query == &br->ip4_own_query)
- br_multicast_enable(&port->ip4_own_query);
+ if (br_multicast_ctx_is_vlan(brmctx)) {
+ struct net_bridge_vlan *vlan;
+
+ vlan = br_vlan_find(nbp_vlan_group_rcu(port),
+ brmctx->vlan->vid);
+ if (!vlan ||
+ br_multicast_port_ctx_state_stopped(&vlan->port_mcast_ctx))
+ continue;
+
+ ip4_own_query = &vlan->port_mcast_ctx.ip4_own_query;
+#if IS_ENABLED(CONFIG_IPV6)
+ ip6_own_query = &vlan->port_mcast_ctx.ip6_own_query;
+#endif
+ } else {
+ ip4_own_query = &port->multicast_ctx.ip4_own_query;
+#if IS_ENABLED(CONFIG_IPV6)
+ ip6_own_query = &port->multicast_ctx.ip6_own_query;
+#endif
+ }
+
+ if (query == &brmctx->ip4_own_query)
+ br_multicast_enable(ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
else
- br_multicast_enable(&port->ip6_own_query);
+ br_multicast_enable(ip6_own_query);
#endif
}
rcu_read_unlock();
@@ -3796,7 +4401,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val,
br_multicast_open(br);
list_for_each_entry(port, &br->port_list, list)
- __br_multicast_enable_port(port);
+ __br_multicast_enable_port_ctx(&port->multicast_ctx);
change_snoopers = true;
@@ -3839,47 +4444,48 @@ bool br_multicast_router(const struct net_device *dev)
bool is_router;
spin_lock_bh(&br->multicast_lock);
- is_router = br_multicast_is_router(br, NULL);
+ is_router = br_multicast_is_router(&br->multicast_ctx, NULL);
spin_unlock_bh(&br->multicast_lock);
return is_router;
}
EXPORT_SYMBOL_GPL(br_multicast_router);
-int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
+int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val)
{
unsigned long max_delay;
val = !!val;
- spin_lock_bh(&br->multicast_lock);
- if (br_opt_get(br, BROPT_MULTICAST_QUERIER) == val)
+ spin_lock_bh(&brmctx->br->multicast_lock);
+ if (brmctx->multicast_querier == val)
goto unlock;
- br_opt_toggle(br, BROPT_MULTICAST_QUERIER, !!val);
+ WRITE_ONCE(brmctx->multicast_querier, val);
if (!val)
goto unlock;
- max_delay = br->multicast_query_response_interval;
+ max_delay = brmctx->multicast_query_response_interval;
- if (!timer_pending(&br->ip4_other_query.timer))
- br->ip4_other_query.delay_time = jiffies + max_delay;
+ if (!timer_pending(&brmctx->ip4_other_query.timer))
+ brmctx->ip4_other_query.delay_time = jiffies + max_delay;
- br_multicast_start_querier(br, &br->ip4_own_query);
+ br_multicast_start_querier(brmctx, &brmctx->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- if (!timer_pending(&br->ip6_other_query.timer))
- br->ip6_other_query.delay_time = jiffies + max_delay;
+ if (!timer_pending(&brmctx->ip6_other_query.timer))
+ brmctx->ip6_other_query.delay_time = jiffies + max_delay;
- br_multicast_start_querier(br, &br->ip6_own_query);
+ br_multicast_start_querier(brmctx, &brmctx->ip6_own_query);
#endif
unlock:
- spin_unlock_bh(&br->multicast_lock);
+ spin_unlock_bh(&brmctx->br->multicast_lock);
return 0;
}
-int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val)
+int br_multicast_set_igmp_version(struct net_bridge_mcast *brmctx,
+ unsigned long val)
{
/* Currently we support only version 2 and 3 */
switch (val) {
@@ -3890,15 +4496,16 @@ int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val)
return -EINVAL;
}
- spin_lock_bh(&br->multicast_lock);
- br->multicast_igmp_version = val;
- spin_unlock_bh(&br->multicast_lock);
+ spin_lock_bh(&brmctx->br->multicast_lock);
+ brmctx->multicast_igmp_version = val;
+ spin_unlock_bh(&brmctx->br->multicast_lock);
return 0;
}
#if IS_ENABLED(CONFIG_IPV6)
-int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val)
+int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx,
+ unsigned long val)
{
/* Currently we support version 1 and 2 */
switch (val) {
@@ -3909,9 +4516,9 @@ int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val)
return -EINVAL;
}
- spin_lock_bh(&br->multicast_lock);
- br->multicast_mld_version = val;
- spin_unlock_bh(&br->multicast_lock);
+ spin_lock_bh(&brmctx->br->multicast_lock);
+ brmctx->multicast_mld_version = val;
+ spin_unlock_bh(&brmctx->br->multicast_lock);
return 0;
}
@@ -4003,7 +4610,7 @@ bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto)
memset(&eth, 0, sizeof(eth));
eth.h_proto = htons(proto);
- ret = br_multicast_querier_exists(br, &eth, NULL);
+ ret = br_multicast_querier_exists(&br->multicast_ctx, &eth, NULL);
unlock:
rcu_read_unlock();
@@ -4022,9 +4629,11 @@ EXPORT_SYMBOL_GPL(br_multicast_has_querier_anywhere);
*/
bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto)
{
+ struct net_bridge_mcast *brmctx;
struct net_bridge *br;
struct net_bridge_port *port;
bool ret = false;
+ int port_ifidx;
rcu_read_lock();
if (!netif_is_bridge_port(dev))
@@ -4035,17 +4644,20 @@ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto)
goto unlock;
br = port->br;
+ brmctx = &br->multicast_ctx;
switch (proto) {
case ETH_P_IP:
- if (!timer_pending(&br->ip4_other_query.timer) ||
- rcu_dereference(br->ip4_querier.port) == port)
+ port_ifidx = brmctx->ip4_querier.port_ifidx;
+ if (!timer_pending(&brmctx->ip4_other_query.timer) ||
+ port_ifidx == port->dev->ifindex)
goto unlock;
break;
#if IS_ENABLED(CONFIG_IPV6)
case ETH_P_IPV6:
- if (!timer_pending(&br->ip6_other_query.timer) ||
- rcu_dereference(br->ip6_querier.port) == port)
+ port_ifidx = brmctx->ip6_querier.port_ifidx;
+ if (!timer_pending(&brmctx->ip6_other_query.timer) ||
+ port_ifidx == port->dev->ifindex)
goto unlock;
break;
#endif
@@ -4071,7 +4683,9 @@ EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);
*/
bool br_multicast_has_router_adjacent(struct net_device *dev, int proto)
{
- struct net_bridge_port *port, *p;
+ struct net_bridge_mcast_port *pmctx;
+ struct net_bridge_mcast *brmctx;
+ struct net_bridge_port *port;
bool ret = false;
rcu_read_lock();
@@ -4079,11 +4693,12 @@ bool br_multicast_has_router_adjacent(struct net_device *dev, int proto)
if (!port)
goto unlock;
+ brmctx = &port->br->multicast_ctx;
switch (proto) {
case ETH_P_IP:
- hlist_for_each_entry_rcu(p, &port->br->ip4_mc_router_list,
+ hlist_for_each_entry_rcu(pmctx, &brmctx->ip4_mc_router_list,
ip4_rlist) {
- if (p == port)
+ if (pmctx->port == port)
continue;
ret = true;
@@ -4092,9 +4707,9 @@ bool br_multicast_has_router_adjacent(struct net_device *dev, int proto)
break;
#if IS_ENABLED(CONFIG_IPV6)
case ETH_P_IPV6:
- hlist_for_each_entry_rcu(p, &port->br->ip6_mc_router_list,
+ hlist_for_each_entry_rcu(pmctx, &brmctx->ip6_mc_router_list,
ip6_rlist) {
- if (p == port)
+ if (pmctx->port == port)
continue;
ret = true;
@@ -4186,7 +4801,8 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
u64_stats_update_end(&pstats->syncp);
}
-void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
+void br_multicast_count(struct net_bridge *br,
+ const struct net_bridge_port *p,
const struct sk_buff *skb, u8 type, u8 dir)
{
struct bridge_mcast_stats __percpu *stats;
diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c
index 13290a749d09..f91c071d1608 100644
--- a/net/bridge/br_multicast_eht.c
+++ b/net/bridge/br_multicast_eht.c
@@ -33,7 +33,8 @@
static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg,
union net_bridge_eht_addr *src_addr,
union net_bridge_eht_addr *h_addr);
-static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
+static void br_multicast_create_eht_set_entry(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *src_addr,
union net_bridge_eht_addr *h_addr,
int filter_mode,
@@ -388,7 +389,8 @@ static void br_multicast_ip_src_to_eht_addr(const struct br_ip *src,
}
}
-static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg,
+static void br_eht_convert_host_filter_mode(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
int filter_mode)
{
@@ -405,14 +407,15 @@ static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg,
br_multicast_del_eht_set_entry(pg, &zero_addr, h_addr);
break;
case MCAST_EXCLUDE:
- br_multicast_create_eht_set_entry(pg, &zero_addr, h_addr,
- MCAST_EXCLUDE,
+ br_multicast_create_eht_set_entry(brmctx, pg, &zero_addr,
+ h_addr, MCAST_EXCLUDE,
true);
break;
}
}
-static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
+static void br_multicast_create_eht_set_entry(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *src_addr,
union net_bridge_eht_addr *h_addr,
int filter_mode,
@@ -441,8 +444,8 @@ static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
if (!set_h)
goto fail_set_entry;
- mod_timer(&set_h->timer, jiffies + br_multicast_gmi(br));
- mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(br));
+ mod_timer(&set_h->timer, jiffies + br_multicast_gmi(brmctx));
+ mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(brmctx));
return;
@@ -499,7 +502,8 @@ static void br_multicast_del_eht_host(struct net_bridge_port_group *pg,
}
/* create new set entries from reports */
-static void __eht_create_set_entries(struct net_bridge_port_group *pg,
+static void __eht_create_set_entries(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -512,8 +516,8 @@ static void __eht_create_set_entries(struct net_bridge_port_group *pg,
memset(&eht_src_addr, 0, sizeof(eht_src_addr));
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
- br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
- filter_mode,
+ br_multicast_create_eht_set_entry(brmctx, pg, &eht_src_addr,
+ h_addr, filter_mode,
false);
}
}
@@ -549,7 +553,8 @@ static bool __eht_del_set_entries(struct net_bridge_port_group *pg,
return changed;
}
-static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
+static bool br_multicast_eht_allow(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -559,8 +564,8 @@ static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
switch (br_multicast_eht_host_filter_mode(pg, h_addr)) {
case MCAST_INCLUDE:
- __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
- MCAST_INCLUDE);
+ __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs,
+ addr_size, MCAST_INCLUDE);
break;
case MCAST_EXCLUDE:
changed = __eht_del_set_entries(pg, h_addr, srcs, nsrcs,
@@ -571,7 +576,8 @@ static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
return changed;
}
-static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
+static bool br_multicast_eht_block(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -585,7 +591,7 @@ static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
addr_size);
break;
case MCAST_EXCLUDE:
- __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
+ __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
MCAST_EXCLUDE);
break;
}
@@ -594,7 +600,8 @@ static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
}
/* flush_entries is true when changing mode */
-static bool __eht_inc_exc(struct net_bridge_port_group *pg,
+static bool __eht_inc_exc(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -612,11 +619,10 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg,
/* if we're changing mode del host and its entries */
if (flush_entries)
br_multicast_del_eht_host(pg, h_addr);
- __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
+ __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
filter_mode);
/* we can be missing sets only if we've deleted some entries */
if (flush_entries) {
- struct net_bridge *br = pg->key.port->br;
struct net_bridge_group_eht_set *eht_set;
struct net_bridge_group_src *src_ent;
struct hlist_node *tmp;
@@ -647,14 +653,15 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg,
&eht_src_addr);
if (!eht_set)
continue;
- mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(br));
+ mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(brmctx));
}
}
return changed;
}
-static bool br_multicast_eht_inc(struct net_bridge_port_group *pg,
+static bool br_multicast_eht_inc(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -663,14 +670,15 @@ static bool br_multicast_eht_inc(struct net_bridge_port_group *pg,
{
bool changed;
- changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size,
+ changed = __eht_inc_exc(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
MCAST_INCLUDE, to_report);
- br_eht_convert_host_filter_mode(pg, h_addr, MCAST_INCLUDE);
+ br_eht_convert_host_filter_mode(brmctx, pg, h_addr, MCAST_INCLUDE);
return changed;
}
-static bool br_multicast_eht_exc(struct net_bridge_port_group *pg,
+static bool br_multicast_eht_exc(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -679,14 +687,15 @@ static bool br_multicast_eht_exc(struct net_bridge_port_group *pg,
{
bool changed;
- changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size,
+ changed = __eht_inc_exc(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
MCAST_EXCLUDE, to_report);
- br_eht_convert_host_filter_mode(pg, h_addr, MCAST_EXCLUDE);
+ br_eht_convert_host_filter_mode(brmctx, pg, h_addr, MCAST_EXCLUDE);
return changed;
}
-static bool __eht_ip4_handle(struct net_bridge_port_group *pg,
+static bool __eht_ip4_handle(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -696,24 +705,25 @@ static bool __eht_ip4_handle(struct net_bridge_port_group *pg,
switch (grec_type) {
case IGMPV3_ALLOW_NEW_SOURCES:
- br_multicast_eht_allow(pg, h_addr, srcs, nsrcs, sizeof(__be32));
+ br_multicast_eht_allow(brmctx, pg, h_addr, srcs, nsrcs,
+ sizeof(__be32));
break;
case IGMPV3_BLOCK_OLD_SOURCES:
- changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_block(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(__be32));
break;
case IGMPV3_CHANGE_TO_INCLUDE:
to_report = true;
fallthrough;
case IGMPV3_MODE_IS_INCLUDE:
- changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_inc(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(__be32), to_report);
break;
case IGMPV3_CHANGE_TO_EXCLUDE:
to_report = true;
fallthrough;
case IGMPV3_MODE_IS_EXCLUDE:
- changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_exc(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(__be32), to_report);
break;
}
@@ -722,7 +732,8 @@ static bool __eht_ip4_handle(struct net_bridge_port_group *pg,
}
#if IS_ENABLED(CONFIG_IPV6)
-static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
+static bool __eht_ip6_handle(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -732,18 +743,18 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
switch (grec_type) {
case MLD2_ALLOW_NEW_SOURCES:
- br_multicast_eht_allow(pg, h_addr, srcs, nsrcs,
+ br_multicast_eht_allow(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(struct in6_addr));
break;
case MLD2_BLOCK_OLD_SOURCES:
- changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_block(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(struct in6_addr));
break;
case MLD2_CHANGE_TO_INCLUDE:
to_report = true;
fallthrough;
case MLD2_MODE_IS_INCLUDE:
- changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_inc(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(struct in6_addr),
to_report);
break;
@@ -751,7 +762,7 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
to_report = true;
fallthrough;
case MLD2_MODE_IS_EXCLUDE:
- changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_exc(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(struct in6_addr),
to_report);
break;
@@ -762,7 +773,8 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
#endif
/* true means an entry was deleted */
-bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
+bool br_multicast_eht_handle(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
void *h_addr,
void *srcs,
u32 nsrcs,
@@ -779,12 +791,12 @@ bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
memset(&eht_host_addr, 0, sizeof(eht_host_addr));
memcpy(&eht_host_addr, h_addr, addr_size);
if (addr_size == sizeof(__be32))
- changed = __eht_ip4_handle(pg, &eht_host_addr, srcs, nsrcs,
- grec_type);
+ changed = __eht_ip4_handle(brmctx, pg, &eht_host_addr, srcs,
+ nsrcs, grec_type);
#if IS_ENABLED(CONFIG_IPV6)
else
- changed = __eht_ip6_handle(pg, &eht_host_addr, srcs, nsrcs,
- grec_type);
+ changed = __eht_ip6_handle(brmctx, pg, &eht_host_addr, srcs,
+ nsrcs, grec_type);
#endif
out:
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 8642e56059fb..6c58fc14d2cb 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -287,7 +287,7 @@ static int br_port_fill_attrs(struct sk_buff *skb,
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
if (nla_put_u8(skb, IFLA_BRPORT_MULTICAST_ROUTER,
- p->multicast_router) ||
+ p->multicast_ctx.multicast_router) ||
nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
p->multicast_eht_hosts_limit) ||
nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
@@ -932,7 +932,8 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
if (tb[IFLA_BRPORT_MULTICAST_ROUTER]) {
u8 mcast_router = nla_get_u8(tb[IFLA_BRPORT_MULTICAST_ROUTER]);
- err = br_multicast_set_port_router(p, mcast_router);
+ err = br_multicast_set_port_router(&p->multicast_ctx,
+ mcast_router);
if (err)
return err;
}
@@ -1286,7 +1287,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_MCAST_ROUTER]) {
u8 multicast_router = nla_get_u8(data[IFLA_BR_MCAST_ROUTER]);
- err = br_multicast_set_router(br, multicast_router);
+ err = br_multicast_set_router(&br->multicast_ctx,
+ multicast_router);
if (err)
return err;
}
@@ -1309,7 +1311,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_MCAST_QUERIER]) {
u8 mcast_querier = nla_get_u8(data[IFLA_BR_MCAST_QUERIER]);
- err = br_multicast_set_querier(br, mcast_querier);
+ err = br_multicast_set_querier(&br->multicast_ctx,
+ mcast_querier);
if (err)
return err;
}
@@ -1324,49 +1327,49 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_MCAST_LAST_MEMBER_CNT]) {
u32 val = nla_get_u32(data[IFLA_BR_MCAST_LAST_MEMBER_CNT]);
- br->multicast_last_member_count = val;
+ br->multicast_ctx.multicast_last_member_count = val;
}
if (data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]) {
u32 val = nla_get_u32(data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]);
- br->multicast_startup_query_count = val;
+ br->multicast_ctx.multicast_startup_query_count = val;
}
if (data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]);
- br->multicast_last_member_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]);
- br->multicast_membership_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_membership_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_QUERIER_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERIER_INTVL]);
- br->multicast_querier_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_querier_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_QUERY_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]);
- br->multicast_query_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]);
- br->multicast_query_response_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]);
- br->multicast_startup_query_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_STATS_ENABLED]) {
@@ -1380,7 +1383,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
__u8 igmp_version;
igmp_version = nla_get_u8(data[IFLA_BR_MCAST_IGMP_VERSION]);
- err = br_multicast_set_igmp_version(br, igmp_version);
+ err = br_multicast_set_igmp_version(&br->multicast_ctx,
+ igmp_version);
if (err)
return err;
}
@@ -1390,7 +1394,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
__u8 mld_version;
mld_version = nla_get_u8(data[IFLA_BR_MCAST_MLD_VERSION]);
- err = br_multicast_set_mld_version(br, mld_version);
+ err = br_multicast_set_mld_version(&br->multicast_ctx,
+ mld_version);
if (err)
return err;
}
@@ -1497,6 +1502,7 @@ static size_t br_get_size(const struct net_device *brdev)
nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */
nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_IGMP_VERSION */
nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_MLD_VERSION */
+ br_multicast_querier_state_size() + /* IFLA_BR_MCAST_QUERIER_STATE */
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IPTABLES */
@@ -1566,50 +1572,53 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
return -EMSGSIZE;
#endif
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
- if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER, br->multicast_router) ||
+ if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER,
+ br->multicast_ctx.multicast_router) ||
nla_put_u8(skb, IFLA_BR_MCAST_SNOOPING,
br_opt_get(br, BROPT_MULTICAST_ENABLED)) ||
nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR,
br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR)) ||
nla_put_u8(skb, IFLA_BR_MCAST_QUERIER,
- br_opt_get(br, BROPT_MULTICAST_QUERIER)) ||
+ br->multicast_ctx.multicast_querier) ||
nla_put_u8(skb, IFLA_BR_MCAST_STATS_ENABLED,
br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED)) ||
nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY, RHT_ELASTICITY) ||
nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) ||
nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT,
- br->multicast_last_member_count) ||
+ br->multicast_ctx.multicast_last_member_count) ||
nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT,
- br->multicast_startup_query_count) ||
+ br->multicast_ctx.multicast_startup_query_count) ||
nla_put_u8(skb, IFLA_BR_MCAST_IGMP_VERSION,
- br->multicast_igmp_version))
+ br->multicast_ctx.multicast_igmp_version) ||
+ br_multicast_dump_querier_state(skb, &br->multicast_ctx,
+ IFLA_BR_MCAST_QUERIER_STATE))
return -EMSGSIZE;
#if IS_ENABLED(CONFIG_IPV6)
if (nla_put_u8(skb, IFLA_BR_MCAST_MLD_VERSION,
- br->multicast_mld_version))
+ br->multicast_ctx.multicast_mld_version))
return -EMSGSIZE;
#endif
- clockval = jiffies_to_clock_t(br->multicast_last_member_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_last_member_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
- clockval = jiffies_to_clock_t(br->multicast_membership_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_membership_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_MEMBERSHIP_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
- clockval = jiffies_to_clock_t(br->multicast_querier_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_querier_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERIER_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
- clockval = jiffies_to_clock_t(br->multicast_query_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_query_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
- clockval = jiffies_to_clock_t(br->multicast_query_response_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_query_response_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
- clockval = jiffies_to_clock_t(br->multicast_startup_query_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_startup_query_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_STARTUP_QUERY_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index aa64d8d63ca3..b4cef3a97f12 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -29,6 +29,8 @@
#define BR_MULTICAST_DEFAULT_HASH_MAX 4096
+#define BR_HWDOM_MAX BITS_PER_LONG
+
#define BR_VERSION "2.3"
/* Control of forwarding link local multicast */
@@ -79,7 +81,8 @@ struct bridge_mcast_other_query {
/* selected querier */
struct bridge_mcast_querier {
struct br_ip addr;
- struct net_bridge_port __rcu *port;
+ int port_ifidx;
+ seqcount_t seq;
};
/* IGMP/MLD statistics */
@@ -89,6 +92,60 @@ struct bridge_mcast_stats {
};
#endif
+/* net_bridge_mcast_port must be always defined due to forwarding stubs */
+struct net_bridge_mcast_port {
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ struct net_bridge_port *port;
+ struct net_bridge_vlan *vlan;
+
+ struct bridge_mcast_own_query ip4_own_query;
+ struct timer_list ip4_mc_router_timer;
+ struct hlist_node ip4_rlist;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct bridge_mcast_own_query ip6_own_query;
+ struct timer_list ip6_mc_router_timer;
+ struct hlist_node ip6_rlist;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+ unsigned char multicast_router;
+#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
+};
+
+/* net_bridge_mcast must be always defined due to forwarding stubs */
+struct net_bridge_mcast {
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ struct net_bridge *br;
+ struct net_bridge_vlan *vlan;
+
+ u32 multicast_last_member_count;
+ u32 multicast_startup_query_count;
+
+ u8 multicast_querier;
+ u8 multicast_igmp_version;
+ u8 multicast_router;
+#if IS_ENABLED(CONFIG_IPV6)
+ u8 multicast_mld_version;
+#endif
+ unsigned long multicast_last_member_interval;
+ unsigned long multicast_membership_interval;
+ unsigned long multicast_querier_interval;
+ unsigned long multicast_query_interval;
+ unsigned long multicast_query_response_interval;
+ unsigned long multicast_startup_query_interval;
+ struct hlist_head ip4_mc_router_list;
+ struct timer_list ip4_mc_router_timer;
+ struct bridge_mcast_other_query ip4_other_query;
+ struct bridge_mcast_own_query ip4_own_query;
+ struct bridge_mcast_querier ip4_querier;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct hlist_head ip6_mc_router_list;
+ struct timer_list ip6_mc_router_timer;
+ struct bridge_mcast_other_query ip6_other_query;
+ struct bridge_mcast_own_query ip6_own_query;
+ struct bridge_mcast_querier ip6_querier;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
+};
+
struct br_tunnel_info {
__be64 tunnel_id;
struct metadata_dst __rcu *tunnel_dst;
@@ -98,6 +155,8 @@ struct br_tunnel_info {
enum {
BR_VLFLAG_PER_PORT_STATS = BIT(0),
BR_VLFLAG_ADDED_BY_SWITCHDEV = BIT(1),
+ BR_VLFLAG_MCAST_ENABLED = BIT(2),
+ BR_VLFLAG_GLOBAL_MCAST_ENABLED = BIT(3),
};
/**
@@ -114,6 +173,9 @@ enum {
* @refcnt: if MASTER flag set, this is bumped for each port referencing it
* @brvlan: if MASTER flag unset, this points to the global per-VLAN context
* for this VLAN entry
+ * @br_mcast_ctx: if MASTER flag set, this is the global vlan multicast context
+ * @port_mcast_ctx: if MASTER flag unset, this is the per-port/vlan multicast
+ * context
* @vlist: sorted list of VLAN entries
* @rcu: used for entry destruction
*
@@ -141,6 +203,11 @@ struct net_bridge_vlan {
struct br_tunnel_info tinfo;
+ union {
+ struct net_bridge_mcast br_mcast_ctx;
+ struct net_bridge_mcast_port port_mcast_ctx;
+ };
+
struct list_head vlist;
struct rcu_head rcu;
@@ -305,19 +372,13 @@ struct net_bridge_port {
struct kobject kobj;
struct rcu_head rcu;
+ struct net_bridge_mcast_port multicast_ctx;
+
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
- struct bridge_mcast_own_query ip4_own_query;
- struct timer_list ip4_mc_router_timer;
- struct hlist_node ip4_rlist;
-#if IS_ENABLED(CONFIG_IPV6)
- struct bridge_mcast_own_query ip6_own_query;
- struct timer_list ip6_mc_router_timer;
- struct hlist_node ip6_rlist;
-#endif /* IS_ENABLED(CONFIG_IPV6) */
+ struct bridge_mcast_stats __percpu *mcast_stats;
+
u32 multicast_eht_hosts_limit;
u32 multicast_eht_hosts_cnt;
- unsigned char multicast_router;
- struct bridge_mcast_stats __percpu *mcast_stats;
struct hlist_head mglist;
#endif
@@ -329,7 +390,12 @@ struct net_bridge_port {
struct netpoll *np;
#endif
#ifdef CONFIG_NET_SWITCHDEV
- int offload_fwd_mark;
+ /* Identifier used to group ports that share the same switchdev
+ * hardware domain.
+ */
+ int hwdom;
+ int offload_count;
+ struct netdev_phys_item_id ppid;
#endif
u16 group_fwd_mask;
u16 backup_redirected_cnt;
@@ -367,7 +433,6 @@ enum net_bridge_opts {
BROPT_NF_CALL_ARPTABLES,
BROPT_GROUP_ADDR_SET,
BROPT_MULTICAST_ENABLED,
- BROPT_MULTICAST_QUERIER,
BROPT_MULTICAST_QUERY_USE_IFADDR,
BROPT_MULTICAST_STATS_ENABLED,
BROPT_HAS_IPV6_ADDR,
@@ -376,6 +441,7 @@ enum net_bridge_opts {
BROPT_VLAN_STATS_PER_PORT,
BROPT_NO_LL_LEARN,
BROPT_VLAN_BRIDGE_BINDING,
+ BROPT_MCAST_VLAN_SNOOPING_ENABLED,
};
struct net_bridge {
@@ -426,25 +492,14 @@ struct net_bridge {
BR_USER_STP, /* new RSTP in userspace */
} stp_enabled;
+ struct net_bridge_mcast multicast_ctx;
+
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ struct bridge_mcast_stats __percpu *mcast_stats;
u32 hash_max;
- u32 multicast_last_member_count;
- u32 multicast_startup_query_count;
-
- u8 multicast_igmp_version;
- u8 multicast_router;
-#if IS_ENABLED(CONFIG_IPV6)
- u8 multicast_mld_version;
-#endif
spinlock_t multicast_lock;
- unsigned long multicast_last_member_interval;
- unsigned long multicast_membership_interval;
- unsigned long multicast_querier_interval;
- unsigned long multicast_query_interval;
- unsigned long multicast_query_response_interval;
- unsigned long multicast_startup_query_interval;
struct rhashtable mdb_hash_tbl;
struct rhashtable sg_port_tbl;
@@ -452,19 +507,6 @@ struct net_bridge {
struct hlist_head mcast_gc_list;
struct hlist_head mdb_list;
- struct hlist_head ip4_mc_router_list;
- struct timer_list ip4_mc_router_timer;
- struct bridge_mcast_other_query ip4_other_query;
- struct bridge_mcast_own_query ip4_own_query;
- struct bridge_mcast_querier ip4_querier;
- struct bridge_mcast_stats __percpu *mcast_stats;
-#if IS_ENABLED(CONFIG_IPV6)
- struct hlist_head ip6_mc_router_list;
- struct timer_list ip6_mc_router_timer;
- struct bridge_mcast_other_query ip6_other_query;
- struct bridge_mcast_own_query ip6_own_query;
- struct bridge_mcast_querier ip6_querier;
-#endif /* IS_ENABLED(CONFIG_IPV6) */
struct work_struct mcast_gc_work;
#endif
@@ -476,7 +518,12 @@ struct net_bridge {
u32 auto_cnt;
#ifdef CONFIG_NET_SWITCHDEV
- int offload_fwd_mark;
+ /* Counter used to make sure that hardware domains get unique
+ * identifiers in case a bridge spans multiple switchdev instances.
+ */
+ int last_hwdom;
+ /* Bit mask of hardware domain numbers in use */
+ unsigned long busy_hwdoms;
#endif
struct hlist_head fdb_list;
@@ -506,7 +553,20 @@ struct br_input_skb_cb {
#endif
#ifdef CONFIG_NET_SWITCHDEV
- int offload_fwd_mark;
+ /* Set if TX data plane offloading is used towards at least one
+ * hardware domain.
+ */
+ u8 tx_fwd_offload:1;
+ /* The switchdev hardware domain from which this packet was received.
+ * If skb->offload_fwd_mark was set, then this packet was already
+ * forwarded by hardware to the other ports in the source hardware
+ * domain, otherwise it wasn't.
+ */
+ int src_hwdom;
+ /* Bit mask of hardware domains towards this packet has already been
+ * transmitted using the TX data plane offload.
+ */
+ unsigned long fwd_hwdoms;
#endif
};
@@ -616,6 +676,20 @@ static inline bool br_vlan_valid_range(const struct bridge_vlan_info *cur,
return true;
}
+static inline u8 br_vlan_multicast_router(const struct net_bridge_vlan *v)
+{
+ u8 mcast_router = MDB_RTR_TYPE_DISABLED;
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (!br_vlan_is_master(v))
+ mcast_router = v->port_mcast_ctx.multicast_router;
+ else
+ mcast_router = v->br_mcast_ctx.multicast_router;
+#endif
+
+ return mcast_router;
+}
+
static inline int br_afspec_cmd_to_rtm(int cmd)
{
switch (cmd) {
@@ -711,13 +785,15 @@ int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev,
int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
- const unsigned char *addr, u16 vid, bool is_local,
+ const unsigned char *addr, u16 vid,
bool swdev_notify);
int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid,
bool swdev_notify);
void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid, bool offloaded);
+int br_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding,
+ struct notifier_block *nb);
/* br_forward.c */
enum br_pkt_type {
@@ -790,15 +866,18 @@ br_port_get_check_rtnl(const struct net_device *dev)
}
/* br_ioctl.c */
-int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
-int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd,
- void __user *arg);
+int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+ void __user *data, int cmd);
+int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd,
+ struct ifreq *ifr, void __user *uarg);
/* br_multicast.c */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
+int br_multicast_rcv(struct net_bridge_mcast **brmctx,
+ struct net_bridge_mcast_port **pmctx,
+ struct net_bridge_vlan *vlan,
struct sk_buff *skb, u16 vid);
-struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
+struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx,
struct sk_buff *skb, u16 vid);
int br_multicast_add_port(struct net_bridge_port *port);
void br_multicast_del_port(struct net_bridge_port *port);
@@ -810,17 +889,22 @@ void br_multicast_leave_snoopers(struct net_bridge *br);
void br_multicast_open(struct net_bridge *br);
void br_multicast_stop(struct net_bridge *br);
void br_multicast_dev_del(struct net_bridge *br);
-void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
- struct sk_buff *skb, bool local_rcv, bool local_orig);
-int br_multicast_set_router(struct net_bridge *br, unsigned long val);
-int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val);
+void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb,
+ struct net_bridge_mcast *brmctx,
+ bool local_rcv, bool local_orig);
+int br_multicast_set_router(struct net_bridge_mcast *brmctx, unsigned long val);
+int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx,
+ unsigned long val);
+int br_multicast_set_vlan_router(struct net_bridge_vlan *v, u8 mcast_router);
int br_multicast_toggle(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack);
-int br_multicast_set_querier(struct net_bridge *br, unsigned long val);
+int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val);
int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
-int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val);
+int br_multicast_set_igmp_version(struct net_bridge_mcast *brmctx,
+ unsigned long val);
#if IS_ENABLED(CONFIG_IPV6)
-int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val);
+int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx,
+ unsigned long val);
#endif
struct net_bridge_mdb_entry *
br_mdb_ip_get(struct net_bridge *br, struct br_ip *dst);
@@ -835,12 +919,13 @@ int br_mdb_hash_init(struct net_bridge *br);
void br_mdb_hash_fini(struct net_bridge *br);
void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp,
struct net_bridge_port_group *pg, int type);
-void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
+void br_rtr_notify(struct net_device *dev, struct net_bridge_mcast_port *pmctx,
int type);
void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
struct net_bridge_port_group *pg,
struct net_bridge_port_group __rcu **pp);
-void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
+void br_multicast_count(struct net_bridge *br,
+ const struct net_bridge_port *p,
const struct sk_buff *skb, u8 type, u8 dir);
int br_multicast_init_stats(struct net_bridge *br);
void br_multicast_uninit_stats(struct net_bridge *br);
@@ -849,7 +934,8 @@ void br_multicast_get_stats(const struct net_bridge *br,
struct br_mcast_stats *dest);
void br_mdb_init(void);
void br_mdb_uninit(void);
-void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify);
+void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_mdb_entry *mp, bool notify);
void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify);
void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
u8 filter_mode);
@@ -859,6 +945,29 @@ struct net_bridge_group_src *
br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip);
void br_multicast_del_group_src(struct net_bridge_group_src *src,
bool fastleave);
+void br_multicast_ctx_init(struct net_bridge *br,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast *brmctx);
+void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx);
+void br_multicast_port_ctx_init(struct net_bridge_port *port,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast_port *pmctx);
+void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx);
+void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on);
+int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on,
+ struct netlink_ext_ack *extack);
+bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on);
+
+int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
+ const void *ctx, bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack);
+int br_rports_fill_info(struct sk_buff *skb,
+ const struct net_bridge_mcast *brmctx);
+int br_multicast_dump_querier_state(struct sk_buff *skb,
+ const struct net_bridge_mcast *brmctx,
+ int nest_attr);
+size_t br_multicast_querier_state_size(void);
+size_t br_rports_size(const struct net_bridge_mcast *brmctx);
static inline bool br_group_is_l2(const struct br_ip *group)
{
@@ -869,52 +978,65 @@ static inline bool br_group_is_l2(const struct br_ip *group)
rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
static inline struct hlist_node *
-br_multicast_get_first_rport_node(struct net_bridge *b, struct sk_buff *skb) {
+br_multicast_get_first_rport_node(struct net_bridge_mcast *brmctx,
+ struct sk_buff *skb)
+{
#if IS_ENABLED(CONFIG_IPV6)
if (skb->protocol == htons(ETH_P_IPV6))
- return rcu_dereference(hlist_first_rcu(&b->ip6_mc_router_list));
+ return rcu_dereference(hlist_first_rcu(&brmctx->ip6_mc_router_list));
#endif
- return rcu_dereference(hlist_first_rcu(&b->ip4_mc_router_list));
+ return rcu_dereference(hlist_first_rcu(&brmctx->ip4_mc_router_list));
}
static inline struct net_bridge_port *
-br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb) {
+br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb)
+{
+ struct net_bridge_mcast_port *mctx;
+
#if IS_ENABLED(CONFIG_IPV6)
if (skb->protocol == htons(ETH_P_IPV6))
- return hlist_entry_safe(rp, struct net_bridge_port, ip6_rlist);
+ mctx = hlist_entry_safe(rp, struct net_bridge_mcast_port,
+ ip6_rlist);
+ else
#endif
- return hlist_entry_safe(rp, struct net_bridge_port, ip4_rlist);
+ mctx = hlist_entry_safe(rp, struct net_bridge_mcast_port,
+ ip4_rlist);
+
+ if (mctx)
+ return mctx->port;
+ else
+ return NULL;
}
-static inline bool br_ip4_multicast_is_router(struct net_bridge *br)
+static inline bool br_ip4_multicast_is_router(struct net_bridge_mcast *brmctx)
{
- return timer_pending(&br->ip4_mc_router_timer);
+ return timer_pending(&brmctx->ip4_mc_router_timer);
}
-static inline bool br_ip6_multicast_is_router(struct net_bridge *br)
+static inline bool br_ip6_multicast_is_router(struct net_bridge_mcast *brmctx)
{
#if IS_ENABLED(CONFIG_IPV6)
- return timer_pending(&br->ip6_mc_router_timer);
+ return timer_pending(&brmctx->ip6_mc_router_timer);
#else
return false;
#endif
}
static inline bool
-br_multicast_is_router(struct net_bridge *br, struct sk_buff *skb)
+br_multicast_is_router(struct net_bridge_mcast *brmctx, struct sk_buff *skb)
{
- switch (br->multicast_router) {
+ switch (brmctx->multicast_router) {
case MDB_RTR_TYPE_PERM:
return true;
case MDB_RTR_TYPE_TEMP_QUERY:
if (skb) {
if (skb->protocol == htons(ETH_P_IP))
- return br_ip4_multicast_is_router(br);
+ return br_ip4_multicast_is_router(brmctx);
else if (skb->protocol == htons(ETH_P_IPV6))
- return br_ip6_multicast_is_router(br);
+ return br_ip6_multicast_is_router(brmctx);
} else {
- return br_ip4_multicast_is_router(br) ||
- br_ip6_multicast_is_router(br);
+ return br_ip4_multicast_is_router(brmctx) ||
+ br_ip6_multicast_is_router(brmctx);
}
fallthrough;
default:
@@ -923,14 +1045,14 @@ br_multicast_is_router(struct net_bridge *br, struct sk_buff *skb)
}
static inline bool
-__br_multicast_querier_exists(struct net_bridge *br,
- struct bridge_mcast_other_query *querier,
- const bool is_ipv6)
+__br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
+ struct bridge_mcast_other_query *querier,
+ const bool is_ipv6)
{
bool own_querier_enabled;
- if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) {
- if (is_ipv6 && !br_opt_get(br, BROPT_HAS_IPV6_ADDR))
+ if (brmctx->multicast_querier) {
+ if (is_ipv6 && !br_opt_get(brmctx->br, BROPT_HAS_IPV6_ADDR))
own_querier_enabled = false;
else
own_querier_enabled = true;
@@ -942,18 +1064,18 @@ __br_multicast_querier_exists(struct net_bridge *br,
(own_querier_enabled || timer_pending(&querier->timer));
}
-static inline bool br_multicast_querier_exists(struct net_bridge *br,
+static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
struct ethhdr *eth,
const struct net_bridge_mdb_entry *mdb)
{
switch (eth->h_proto) {
case (htons(ETH_P_IP)):
- return __br_multicast_querier_exists(br,
- &br->ip4_other_query, false);
+ return __br_multicast_querier_exists(brmctx,
+ &brmctx->ip4_other_query, false);
#if IS_ENABLED(CONFIG_IPV6)
case (htons(ETH_P_IPV6)):
- return __br_multicast_querier_exists(br,
- &br->ip6_other_query, true);
+ return __br_multicast_querier_exists(brmctx,
+ &brmctx->ip6_other_query, true);
#endif
default:
return !!mdb && br_group_is_l2(&mdb->addr);
@@ -974,15 +1096,16 @@ static inline bool br_multicast_is_star_g(const struct br_ip *ip)
}
}
-static inline bool br_multicast_should_handle_mode(const struct net_bridge *br,
- __be16 proto)
+static inline bool
+br_multicast_should_handle_mode(const struct net_bridge_mcast *brmctx,
+ __be16 proto)
{
switch (proto) {
case htons(ETH_P_IP):
- return !!(br->multicast_igmp_version == 3);
+ return !!(brmctx->multicast_igmp_version == 3);
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- return !!(br->multicast_mld_version == 2);
+ return !!(brmctx->multicast_mld_version == 2);
#endif
default:
return false;
@@ -994,28 +1117,145 @@ static inline int br_multicast_igmp_type(const struct sk_buff *skb)
return BR_INPUT_SKB_CB(skb)->igmp;
}
-static inline unsigned long br_multicast_lmqt(const struct net_bridge *br)
+static inline unsigned long br_multicast_lmqt(const struct net_bridge_mcast *brmctx)
{
- return br->multicast_last_member_interval *
- br->multicast_last_member_count;
+ return brmctx->multicast_last_member_interval *
+ brmctx->multicast_last_member_count;
}
-static inline unsigned long br_multicast_gmi(const struct net_bridge *br)
+static inline unsigned long br_multicast_gmi(const struct net_bridge_mcast *brmctx)
{
/* use the RFC default of 2 for QRV */
- return 2 * br->multicast_query_interval +
- br->multicast_query_response_interval;
+ return 2 * brmctx->multicast_query_interval +
+ brmctx->multicast_query_response_interval;
+}
+
+static inline bool
+br_multicast_ctx_is_vlan(const struct net_bridge_mcast *brmctx)
+{
+ return !!brmctx->vlan;
+}
+
+static inline bool
+br_multicast_port_ctx_is_vlan(const struct net_bridge_mcast_port *pmctx)
+{
+ return !!pmctx->vlan;
+}
+
+static inline struct net_bridge_mcast *
+br_multicast_port_ctx_get_global(const struct net_bridge_mcast_port *pmctx)
+{
+ if (!br_multicast_port_ctx_is_vlan(pmctx))
+ return &pmctx->port->br->multicast_ctx;
+ else
+ return &pmctx->vlan->brvlan->br_mcast_ctx;
+}
+
+static inline bool
+br_multicast_ctx_vlan_global_disabled(const struct net_bridge_mcast *brmctx)
+{
+ return br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) &&
+ br_multicast_ctx_is_vlan(brmctx) &&
+ !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED);
+}
+
+static inline bool
+br_multicast_ctx_vlan_disabled(const struct net_bridge_mcast *brmctx)
+{
+ return br_multicast_ctx_is_vlan(brmctx) &&
+ !(brmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED);
+}
+
+static inline bool
+br_multicast_port_ctx_vlan_disabled(const struct net_bridge_mcast_port *pmctx)
+{
+ return br_multicast_port_ctx_is_vlan(pmctx) &&
+ !(pmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED);
+}
+
+static inline bool
+br_multicast_port_ctx_state_disabled(const struct net_bridge_mcast_port *pmctx)
+{
+ return pmctx->port->state == BR_STATE_DISABLED ||
+ (br_multicast_port_ctx_is_vlan(pmctx) &&
+ (br_multicast_port_ctx_vlan_disabled(pmctx) ||
+ pmctx->vlan->state == BR_STATE_DISABLED));
+}
+
+static inline bool
+br_multicast_port_ctx_state_stopped(const struct net_bridge_mcast_port *pmctx)
+{
+ return br_multicast_port_ctx_state_disabled(pmctx) ||
+ pmctx->port->state == BR_STATE_BLOCKING ||
+ (br_multicast_port_ctx_is_vlan(pmctx) &&
+ pmctx->vlan->state == BR_STATE_BLOCKING);
+}
+
+static inline bool
+br_rports_have_mc_router(const struct net_bridge_mcast *brmctx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ return !hlist_empty(&brmctx->ip4_mc_router_list) ||
+ !hlist_empty(&brmctx->ip6_mc_router_list);
+#else
+ return !hlist_empty(&brmctx->ip4_mc_router_list);
+#endif
+}
+
+static inline bool
+br_multicast_ctx_options_equal(const struct net_bridge_mcast *brmctx1,
+ const struct net_bridge_mcast *brmctx2)
+{
+ return brmctx1->multicast_igmp_version ==
+ brmctx2->multicast_igmp_version &&
+ brmctx1->multicast_last_member_count ==
+ brmctx2->multicast_last_member_count &&
+ brmctx1->multicast_startup_query_count ==
+ brmctx2->multicast_startup_query_count &&
+ brmctx1->multicast_last_member_interval ==
+ brmctx2->multicast_last_member_interval &&
+ brmctx1->multicast_membership_interval ==
+ brmctx2->multicast_membership_interval &&
+ brmctx1->multicast_querier_interval ==
+ brmctx2->multicast_querier_interval &&
+ brmctx1->multicast_query_interval ==
+ brmctx2->multicast_query_interval &&
+ brmctx1->multicast_query_response_interval ==
+ brmctx2->multicast_query_response_interval &&
+ brmctx1->multicast_startup_query_interval ==
+ brmctx2->multicast_startup_query_interval &&
+ brmctx1->multicast_querier == brmctx2->multicast_querier &&
+ brmctx1->multicast_router == brmctx2->multicast_router &&
+ !br_rports_have_mc_router(brmctx1) &&
+ !br_rports_have_mc_router(brmctx2) &&
+#if IS_ENABLED(CONFIG_IPV6)
+ brmctx1->multicast_mld_version ==
+ brmctx2->multicast_mld_version &&
+#endif
+ true;
+}
+
+static inline bool
+br_multicast_ctx_matches_vlan_snooping(const struct net_bridge_mcast *brmctx)
+{
+ bool vlan_snooping_enabled;
+
+ vlan_snooping_enabled = !!br_opt_get(brmctx->br,
+ BROPT_MCAST_VLAN_SNOOPING_ENABLED);
+
+ return !!(vlan_snooping_enabled == br_multicast_ctx_is_vlan(brmctx));
}
#else
-static inline int br_multicast_rcv(struct net_bridge *br,
- struct net_bridge_port *port,
+static inline int br_multicast_rcv(struct net_bridge_mcast **brmctx,
+ struct net_bridge_mcast_port **pmctx,
+ struct net_bridge_vlan *vlan,
struct sk_buff *skb,
u16 vid)
{
return 0;
}
-static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
+static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx,
struct sk_buff *skb, u16 vid)
{
return NULL;
@@ -1064,17 +1304,18 @@ static inline void br_multicast_dev_del(struct net_bridge *br)
static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb,
+ struct net_bridge_mcast *brmctx,
bool local_rcv, bool local_orig)
{
}
-static inline bool br_multicast_is_router(struct net_bridge *br,
+static inline bool br_multicast_is_router(struct net_bridge_mcast *brmctx,
struct sk_buff *skb)
{
return false;
}
-static inline bool br_multicast_querier_exists(struct net_bridge *br,
+static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
struct ethhdr *eth,
const struct net_bridge_mdb_entry *mdb)
{
@@ -1118,13 +1359,67 @@ static inline int br_multicast_igmp_type(const struct sk_buff *skb)
{
return 0;
}
+
+static inline void br_multicast_ctx_init(struct net_bridge *br,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast *brmctx)
+{
+}
+
+static inline void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx)
+{
+}
+
+static inline void br_multicast_port_ctx_init(struct net_bridge_port *port,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast_port *pmctx)
+{
+}
+
+static inline void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx)
+{
+}
+
+static inline void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan,
+ bool on)
+{
+}
+
+static inline int br_multicast_toggle_vlan_snooping(struct net_bridge *br,
+ bool on,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan,
+ bool on)
+{
+ return false;
+}
+
+static inline int br_mdb_replay(struct net_device *br_dev,
+ struct net_device *dev, const void *ctx,
+ bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline bool
+br_multicast_ctx_options_equal(const struct net_bridge_mcast *brmctx1,
+ const struct net_bridge_mcast *brmctx2)
+{
+ return true;
+}
#endif
/* br_vlan.c */
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
bool br_allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg, struct sk_buff *skb,
- u16 *vid, u8 *state);
+ u16 *vid, u8 *state,
+ struct net_bridge_vlan **vlan);
bool br_allowed_egress(struct net_bridge_vlan_group *vg,
const struct sk_buff *skb);
bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid);
@@ -1168,6 +1463,9 @@ void br_vlan_notify(const struct net_bridge *br,
const struct net_bridge_port *p,
u16 vid, u16 vid_range,
int cmd);
+int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
+ const void *ctx, bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack);
bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
const struct net_bridge_vlan *range_end);
@@ -1236,8 +1534,11 @@ static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid)
static inline bool br_allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg,
struct sk_buff *skb,
- u16 *vid, u8 *state)
+ u16 *vid, u8 *state,
+ struct net_bridge_vlan **vlan)
+
{
+ *vlan = NULL;
return true;
}
@@ -1410,6 +1711,14 @@ static inline bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
{
return true;
}
+
+static inline int br_vlan_replay(struct net_device *br_dev,
+ struct net_device *dev, const void *ctx,
+ bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
#endif
/* br_vlan_options.c */
@@ -1424,6 +1733,14 @@ int br_vlan_process_options(const struct net_bridge *br,
struct net_bridge_vlan *range_end,
struct nlattr **tb,
struct netlink_ext_ack *extack);
+int br_vlan_rtm_process_global_options(struct net_device *dev,
+ const struct nlattr *attr,
+ int cmd,
+ struct netlink_ext_ack *extack);
+bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr,
+ const struct net_bridge_vlan *r_end);
+bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range,
+ const struct net_bridge_vlan *v_opts);
/* vlan state manipulation helpers using *_ONCE to annotate lock-free access */
static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v)
@@ -1645,7 +1962,25 @@ static inline void br_sysfs_delbr(struct net_device *dev) { return; }
/* br_switchdev.c */
#ifdef CONFIG_NET_SWITCHDEV
-int nbp_switchdev_mark_set(struct net_bridge_port *p);
+int br_switchdev_port_offload(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack);
+
+void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb);
+
+bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb);
+
+void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb);
+
+void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
+ struct sk_buff *skb);
+void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
+ struct sk_buff *skb);
void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
struct sk_buff *skb);
bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
@@ -1659,15 +1994,50 @@ void br_switchdev_fdb_notify(struct net_bridge *br,
int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
struct netlink_ext_ack *extack);
int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid);
+void br_switchdev_init(struct net_bridge *br);
static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
{
skb->offload_fwd_mark = 0;
}
#else
-static inline int nbp_switchdev_mark_set(struct net_bridge_port *p)
+static inline int
+br_switchdev_port_offload(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void
+br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+}
+
+static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
+{
+ return false;
+}
+
+static inline void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb)
+{
+}
+
+static inline void
+nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
+ struct sk_buff *skb)
+{
+}
+
+static inline void
+nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
+ struct sk_buff *skb)
{
- return 0;
}
static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
@@ -1710,6 +2080,11 @@ br_switchdev_fdb_notify(struct net_bridge *br,
static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
{
}
+
+static inline void br_switchdev_init(struct net_bridge *br)
+{
+}
+
#endif /* CONFIG_NET_SWITCHDEV */
/* br_arp_nd_proxy.c */
diff --git a/net/bridge/br_private_mcast_eht.h b/net/bridge/br_private_mcast_eht.h
index f89049f4892c..adf82a05515a 100644
--- a/net/bridge/br_private_mcast_eht.h
+++ b/net/bridge/br_private_mcast_eht.h
@@ -51,7 +51,8 @@ struct net_bridge_group_eht_set {
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg);
-bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
+bool br_multicast_eht_handle(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
void *h_addr,
void *srcs,
u32 nsrcs,
diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h
index c54cc26211d7..2b053289f016 100644
--- a/net/bridge/br_private_tunnel.h
+++ b/net/bridge/br_private_tunnel.h
@@ -38,9 +38,9 @@ int nbp_vlan_tunnel_info_add(const struct net_bridge_port *port, u16 vid,
void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port);
void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg,
struct net_bridge_vlan *vlan);
-int br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
- struct net_bridge_port *p,
- struct net_bridge_vlan_group *vg);
+void br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
+ struct net_bridge_port *p,
+ struct net_bridge_vlan_group *vg);
int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
struct net_bridge_vlan *vlan);
bool vlan_tunid_inrange(const struct net_bridge_vlan *v_curr,
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index d3adee0f91f9..6bf518d78f02 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -8,50 +8,65 @@
#include "br_private.h"
-static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev)
-{
- struct net_bridge_port *p;
+static struct static_key_false br_switchdev_tx_fwd_offload;
- /* dev is yet to be added to the port list. */
- list_for_each_entry(p, &br->port_list, list) {
- if (netdev_port_same_parent_id(dev, p->dev))
- return p->offload_fwd_mark;
- }
+static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p,
+ const struct sk_buff *skb)
+{
+ if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
+ return false;
- return ++br->offload_fwd_mark;
+ return (p->flags & BR_TX_FWD_OFFLOAD) &&
+ (p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom);
}
-int nbp_switchdev_mark_set(struct net_bridge_port *p)
+bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
{
- struct netdev_phys_item_id ppid = { };
- int err;
+ if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
+ return false;
- ASSERT_RTNL();
+ return BR_INPUT_SKB_CB(skb)->tx_fwd_offload;
+}
- err = dev_get_port_parent_id(p->dev, &ppid, true);
- if (err) {
- if (err == -EOPNOTSUPP)
- return 0;
- return err;
- }
+void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb)
+{
+ skb->offload_fwd_mark = br_switchdev_frame_uses_tx_fwd_offload(skb);
+}
- p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev);
+/* Mark the frame for TX forwarding offload if this egress port supports it */
+void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
+ struct sk_buff *skb)
+{
+ if (nbp_switchdev_can_offload_tx_fwd(p, skb))
+ BR_INPUT_SKB_CB(skb)->tx_fwd_offload = true;
+}
- return 0;
+/* Lazily adds the hwdom of the egress bridge port to the bit mask of hwdoms
+ * that the skb has been already forwarded to, to avoid further cloning to
+ * other ports in the same hwdom by making nbp_switchdev_allowed_egress()
+ * return false.
+ */
+void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
+ struct sk_buff *skb)
+{
+ if (nbp_switchdev_can_offload_tx_fwd(p, skb))
+ set_bit(p->hwdom, &BR_INPUT_SKB_CB(skb)->fwd_hwdoms);
}
void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
struct sk_buff *skb)
{
- if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark))
- BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark;
+ if (p->hwdom)
+ BR_INPUT_SKB_CB(skb)->src_hwdom = p->hwdom;
}
bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
const struct sk_buff *skb)
{
- return !skb->offload_fwd_mark ||
- BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark;
+ struct br_input_skb_cb *cb = BR_INPUT_SKB_CB(skb);
+
+ return !test_bit(p->hwdom, &cb->fwd_hwdoms) &&
+ (!skb->offload_fwd_mark || cb->src_hwdom != p->hwdom);
}
/* Flags that can be offloaded to hardware */
@@ -112,7 +127,6 @@ br_switchdev_fdb_notify(struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb, int type)
{
const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
- struct net_device *dev = dst ? dst->dev : br->dev;
struct switchdev_notifier_fdb_info info = {
.addr = fdb->key.addr.addr,
.vid = fdb->key.vlan_id,
@@ -120,6 +134,7 @@ br_switchdev_fdb_notify(struct net_bridge *br,
.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags),
.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags),
};
+ struct net_device *dev = (!dst || info.is_local) ? br->dev : dst->dev;
switch (type) {
case RTM_DELNEIGH:
@@ -156,3 +171,182 @@ int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid)
return switchdev_port_obj_del(dev, &v.obj);
}
+
+static int nbp_switchdev_hwdom_set(struct net_bridge_port *joining)
+{
+ struct net_bridge *br = joining->br;
+ struct net_bridge_port *p;
+ int hwdom;
+
+ /* joining is yet to be added to the port list. */
+ list_for_each_entry(p, &br->port_list, list) {
+ if (netdev_phys_item_id_same(&joining->ppid, &p->ppid)) {
+ joining->hwdom = p->hwdom;
+ return 0;
+ }
+ }
+
+ hwdom = find_next_zero_bit(&br->busy_hwdoms, BR_HWDOM_MAX, 1);
+ if (hwdom >= BR_HWDOM_MAX)
+ return -EBUSY;
+
+ set_bit(hwdom, &br->busy_hwdoms);
+ joining->hwdom = hwdom;
+ return 0;
+}
+
+static void nbp_switchdev_hwdom_put(struct net_bridge_port *leaving)
+{
+ struct net_bridge *br = leaving->br;
+ struct net_bridge_port *p;
+
+ /* leaving is no longer in the port list. */
+ list_for_each_entry(p, &br->port_list, list) {
+ if (p->hwdom == leaving->hwdom)
+ return;
+ }
+
+ clear_bit(leaving->hwdom, &br->busy_hwdoms);
+}
+
+static int nbp_switchdev_add(struct net_bridge_port *p,
+ struct netdev_phys_item_id ppid,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (p->offload_count) {
+ /* Prevent unsupported configurations such as a bridge port
+ * which is a bonding interface, and the member ports are from
+ * different hardware switches.
+ */
+ if (!netdev_phys_item_id_same(&p->ppid, &ppid)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Same bridge port cannot be offloaded by two physical switches");
+ return -EBUSY;
+ }
+
+ /* Tolerate drivers that call switchdev_bridge_port_offload()
+ * more than once for the same bridge port, such as when the
+ * bridge port is an offloaded bonding/team interface.
+ */
+ p->offload_count++;
+
+ return 0;
+ }
+
+ p->ppid = ppid;
+ p->offload_count = 1;
+
+ err = nbp_switchdev_hwdom_set(p);
+ if (err)
+ return err;
+
+ if (tx_fwd_offload) {
+ p->flags |= BR_TX_FWD_OFFLOAD;
+ static_branch_inc(&br_switchdev_tx_fwd_offload);
+ }
+
+ return 0;
+}
+
+static void nbp_switchdev_del(struct net_bridge_port *p)
+{
+ if (WARN_ON(!p->offload_count))
+ return;
+
+ p->offload_count--;
+
+ if (p->offload_count)
+ return;
+
+ if (p->hwdom)
+ nbp_switchdev_hwdom_put(p);
+
+ if (p->flags & BR_TX_FWD_OFFLOAD) {
+ p->flags &= ~BR_TX_FWD_OFFLOAD;
+ static_branch_dec(&br_switchdev_tx_fwd_offload);
+ }
+}
+
+static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *br_dev = p->br->dev;
+ struct net_device *dev = p->dev;
+ int err;
+
+ err = br_vlan_replay(br_dev, dev, ctx, true, blocking_nb, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ err = br_mdb_replay(br_dev, dev, ctx, true, blocking_nb, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ err = br_fdb_replay(br_dev, ctx, true, atomic_nb);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ return 0;
+}
+
+static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
+ const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+ struct net_device *br_dev = p->br->dev;
+ struct net_device *dev = p->dev;
+
+ br_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+
+ br_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+
+ br_fdb_replay(br_dev, ctx, false, atomic_nb);
+}
+
+/* Let the bridge know that this port is offloaded, so that it can assign a
+ * switchdev hardware domain to it.
+ */
+int br_switchdev_port_offload(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_phys_item_id ppid;
+ int err;
+
+ err = dev_get_port_parent_id(dev, &ppid, false);
+ if (err)
+ return err;
+
+ err = nbp_switchdev_add(p, ppid, tx_fwd_offload, extack);
+ if (err)
+ return err;
+
+ err = nbp_switchdev_sync_objs(p, ctx, atomic_nb, blocking_nb, extack);
+ if (err)
+ goto out_switchdev_del;
+
+ return 0;
+
+out_switchdev_del:
+ nbp_switchdev_del(p);
+
+ return err;
+}
+
+void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+ nbp_switchdev_unsync_objs(p, ctx, atomic_nb, blocking_nb);
+
+ nbp_switchdev_del(p);
+}
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 381467b691d5..d9a89ddd0331 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -384,13 +384,13 @@ static ssize_t multicast_router_show(struct device *d,
struct device_attribute *attr, char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%d\n", br->multicast_router);
+ return sprintf(buf, "%d\n", br->multicast_ctx.multicast_router);
}
static int set_multicast_router(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- return br_multicast_set_router(br, val);
+ return br_multicast_set_router(&br->multicast_ctx, val);
}
static ssize_t multicast_router_store(struct device *d,
@@ -447,13 +447,13 @@ static ssize_t multicast_querier_show(struct device *d,
char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_QUERIER));
+ return sprintf(buf, "%d\n", br->multicast_ctx.multicast_querier);
}
static int set_multicast_querier(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- return br_multicast_set_querier(br, val);
+ return br_multicast_set_querier(&br->multicast_ctx, val);
}
static ssize_t multicast_querier_store(struct device *d,
@@ -514,13 +514,13 @@ static ssize_t multicast_igmp_version_show(struct device *d,
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->multicast_igmp_version);
+ return sprintf(buf, "%u\n", br->multicast_ctx.multicast_igmp_version);
}
static int set_multicast_igmp_version(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- return br_multicast_set_igmp_version(br, val);
+ return br_multicast_set_igmp_version(&br->multicast_ctx, val);
}
static ssize_t multicast_igmp_version_store(struct device *d,
@@ -536,13 +536,13 @@ static ssize_t multicast_last_member_count_show(struct device *d,
char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->multicast_last_member_count);
+ return sprintf(buf, "%u\n", br->multicast_ctx.multicast_last_member_count);
}
static int set_last_member_count(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_last_member_count = val;
+ br->multicast_ctx.multicast_last_member_count = val;
return 0;
}
@@ -558,13 +558,13 @@ static ssize_t multicast_startup_query_count_show(
struct device *d, struct device_attribute *attr, char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->multicast_startup_query_count);
+ return sprintf(buf, "%u\n", br->multicast_ctx.multicast_startup_query_count);
}
static int set_startup_query_count(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_startup_query_count = val;
+ br->multicast_ctx.multicast_startup_query_count = val;
return 0;
}
@@ -581,13 +581,13 @@ static ssize_t multicast_last_member_interval_show(
{
struct net_bridge *br = to_bridge(d);
return sprintf(buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_last_member_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_last_member_interval));
}
static int set_last_member_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_last_member_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -604,13 +604,13 @@ static ssize_t multicast_membership_interval_show(
{
struct net_bridge *br = to_bridge(d);
return sprintf(buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_membership_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_membership_interval));
}
static int set_membership_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_membership_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_membership_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -628,13 +628,13 @@ static ssize_t multicast_querier_interval_show(struct device *d,
{
struct net_bridge *br = to_bridge(d);
return sprintf(buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_querier_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_querier_interval));
}
static int set_querier_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_querier_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_querier_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -652,13 +652,13 @@ static ssize_t multicast_query_interval_show(struct device *d,
{
struct net_bridge *br = to_bridge(d);
return sprintf(buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_query_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_query_interval));
}
static int set_query_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_query_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -676,13 +676,13 @@ static ssize_t multicast_query_response_interval_show(
struct net_bridge *br = to_bridge(d);
return sprintf(
buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_query_response_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_query_response_interval));
}
static int set_query_response_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_query_response_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -700,13 +700,13 @@ static ssize_t multicast_startup_query_interval_show(
struct net_bridge *br = to_bridge(d);
return sprintf(
buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_startup_query_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_startup_query_interval));
}
static int set_startup_query_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_startup_query_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -751,13 +751,13 @@ static ssize_t multicast_mld_version_show(struct device *d,
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->multicast_mld_version);
+ return sprintf(buf, "%u\n", br->multicast_ctx.multicast_mld_version);
}
static int set_multicast_mld_version(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- return br_multicast_set_mld_version(br, val);
+ return br_multicast_set_mld_version(&br->multicast_ctx, val);
}
static ssize_t multicast_mld_version_store(struct device *d,
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 72e92376eef1..07fa76080512 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -244,13 +244,13 @@ BRPORT_ATTR_FLAG(isolated, BR_ISOLATED);
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
{
- return sprintf(buf, "%d\n", p->multicast_router);
+ return sprintf(buf, "%d\n", p->multicast_ctx.multicast_router);
}
static int store_multicast_router(struct net_bridge_port *p,
unsigned long v)
{
- return br_multicast_set_port_router(p, v);
+ return br_multicast_set_port_router(&p->multicast_ctx, v);
}
static BRPORT_ATTR(multicast_router, 0644, show_multicast_router,
store_multicast_router);
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index a08e9f193009..19f65ab91a02 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -190,6 +190,8 @@ static void br_vlan_put_master(struct net_bridge_vlan *masterv)
rhashtable_remove_fast(&vg->vlan_hash,
&masterv->vnode, br_vlan_rht_params);
__vlan_del_list(masterv);
+ br_multicast_toggle_one_vlan(masterv, false);
+ br_multicast_ctx_deinit(&masterv->br_mcast_ctx);
call_rcu(&masterv->rcu, br_master_vlan_rcu_free);
}
}
@@ -280,10 +282,13 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
} else {
v->stats = masterv->stats;
}
+ br_multicast_port_ctx_init(p, v, &v->port_mcast_ctx);
} else {
err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack);
if (err && err != -EOPNOTSUPP)
goto out;
+ br_multicast_ctx_init(br, v, &v->br_mcast_ctx);
+ v->priv_flags |= BR_VLFLAG_GLOBAL_MCAST_ENABLED;
}
/* Add the dev mac and count the vlan only if it's usable */
@@ -306,6 +311,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
__vlan_add_list(v);
__vlan_add_flags(v, flags);
+ br_multicast_toggle_one_vlan(v, true);
if (p)
nbp_vlan_set_vlan_dev_state(p, v->vid);
@@ -374,6 +380,8 @@ static int __vlan_del(struct net_bridge_vlan *v)
br_vlan_rht_params);
__vlan_del_list(v);
nbp_vlan_set_vlan_dev_state(p, v->vid);
+ br_multicast_toggle_one_vlan(v, false);
+ br_multicast_port_ctx_deinit(&v->port_mcast_ctx);
call_rcu(&v->rcu, nbp_vlan_rcu_free);
}
@@ -457,7 +465,15 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
u64_stats_update_end(&stats->syncp);
}
- if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
+ /* If the skb will be sent using forwarding offload, the assumption is
+ * that the switchdev will inject the packet into hardware together
+ * with the bridge VLAN, so that it can be forwarded according to that
+ * VLAN. The switchdev should deal with popping the VLAN header in
+ * hardware on each egress port as appropriate. So only strip the VLAN
+ * header if forwarding offload is not being used.
+ */
+ if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED &&
+ !br_switchdev_frame_uses_tx_fwd_offload(skb))
__vlan_hwaccel_clear_tag(skb);
if (p && (p->flags & BR_VLAN_TUNNEL) &&
@@ -473,7 +489,8 @@ out:
static bool __allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg,
struct sk_buff *skb, u16 *vid,
- u8 *state)
+ u8 *state,
+ struct net_bridge_vlan **vlan)
{
struct pcpu_sw_netstats *stats;
struct net_bridge_vlan *v;
@@ -538,8 +555,9 @@ static bool __allowed_ingress(const struct net_bridge *br,
*/
skb->vlan_tci |= pvid;
- /* if stats are disabled we can avoid the lookup */
- if (!br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
+ /* if snooping and stats are disabled we can avoid the lookup */
+ if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) &&
+ !br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
if (*state == BR_STATE_FORWARDING) {
*state = br_vlan_get_pvid_state(vg);
return br_vlan_state_allowed(*state, true);
@@ -566,6 +584,8 @@ static bool __allowed_ingress(const struct net_bridge *br,
u64_stats_update_end(&stats->syncp);
}
+ *vlan = v;
+
return true;
drop:
@@ -575,17 +595,19 @@ drop:
bool br_allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg, struct sk_buff *skb,
- u16 *vid, u8 *state)
+ u16 *vid, u8 *state,
+ struct net_bridge_vlan **vlan)
{
/* If VLAN filtering is disabled on the bridge, all packets are
* permitted.
*/
+ *vlan = NULL;
if (!br_opt_get(br, BROPT_VLAN_ENABLED)) {
BR_INPUT_SKB_CB(skb)->vlan_filtered = false;
return true;
}
- return __allowed_ingress(br, vg, skb, vid, state);
+ return __allowed_ingress(br, vg, skb, vid, state, vlan);
}
/* Called under RCU. */
@@ -672,6 +694,7 @@ static int br_vlan_add_existing(struct net_bridge *br,
vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY;
vg->num_vlans++;
*changed = true;
+ br_multicast_toggle_one_vlan(vlan, true);
}
if (__vlan_add_flags(vlan, flags))
@@ -818,14 +841,21 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val,
if (br_opt_get(br, BROPT_VLAN_ENABLED) == !!val)
return 0;
+ br_opt_toggle(br, BROPT_VLAN_ENABLED, !!val);
+
err = switchdev_port_attr_set(br->dev, &attr, extack);
- if (err && err != -EOPNOTSUPP)
+ if (err && err != -EOPNOTSUPP) {
+ br_opt_toggle(br, BROPT_VLAN_ENABLED, !val);
return err;
+ }
- br_opt_toggle(br, BROPT_VLAN_ENABLED, !!val);
br_manage_promisc(br);
recalculate_group_addr(br);
br_recalculate_fwd_mask(br);
+ if (!val && br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+ br_info(br, "vlan filtering disabled, automatically disabling multicast vlan snooping\n");
+ br_multicast_toggle_vlan_snooping(br, false, NULL);
+ }
return 0;
}
@@ -1420,6 +1450,33 @@ int br_vlan_get_info(const struct net_device *dev, u16 vid,
}
EXPORT_SYMBOL_GPL(br_vlan_get_info);
+int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid,
+ struct bridge_vlan_info *p_vinfo)
+{
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *v;
+ struct net_bridge_port *p;
+
+ p = br_port_get_check_rcu(dev);
+ if (p)
+ vg = nbp_vlan_group_rcu(p);
+ else if (netif_is_bridge_master(dev))
+ vg = br_vlan_group_rcu(netdev_priv(dev));
+ else
+ return -EINVAL;
+
+ v = br_vlan_find(vg, vid);
+ if (!v)
+ return -ENOENT;
+
+ p_vinfo->vid = vid;
+ p_vinfo->flags = v->flags;
+ if (vid == br_get_pvid(vg))
+ p_vinfo->flags |= BRIDGE_VLAN_INFO_PVID;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(br_vlan_get_info_rcu);
+
static int br_vlan_is_bind_vlan_dev(const struct net_device *dev)
{
return is_vlan_dev(dev) &&
@@ -1838,6 +1895,9 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
ASSERT_RTNL();
+ if (!nb)
+ return 0;
+
if (!netif_is_bridge_master(br_dev))
return -EINVAL;
@@ -1884,7 +1944,6 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
return err;
}
-EXPORT_SYMBOL_GPL(br_vlan_replay);
/* check if v_curr can enter a range ending in range_end */
bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
@@ -1901,6 +1960,7 @@ static int br_vlan_dump_dev(const struct net_device *dev,
u32 dump_flags)
{
struct net_bridge_vlan *v, *range_start = NULL, *range_end = NULL;
+ bool dump_global = !!(dump_flags & BRIDGE_VLANDB_DUMPF_GLOBAL);
bool dump_stats = !!(dump_flags & BRIDGE_VLANDB_DUMPF_STATS);
struct net_bridge_vlan_group *vg;
int idx = 0, s_idx = cb->args[1];
@@ -1919,6 +1979,10 @@ static int br_vlan_dump_dev(const struct net_device *dev,
vg = br_vlan_group_rcu(br);
p = NULL;
} else {
+ /* global options are dumped only for bridge devices */
+ if (dump_global)
+ return 0;
+
p = br_port_get_rcu(dev);
if (WARN_ON(!p))
return -EINVAL;
@@ -1941,7 +2005,7 @@ static int br_vlan_dump_dev(const struct net_device *dev,
/* idx must stay at range's beginning until it is filled in */
list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
- if (!br_vlan_should_use(v))
+ if (!dump_global && !br_vlan_should_use(v))
continue;
if (idx < s_idx) {
idx++;
@@ -1954,8 +2018,21 @@ static int br_vlan_dump_dev(const struct net_device *dev,
continue;
}
- if (dump_stats || v->vid == pvid ||
- !br_vlan_can_enter_range(v, range_end)) {
+ if (dump_global) {
+ if (br_vlan_global_opts_can_enter_range(v, range_end))
+ goto update_end;
+ if (!br_vlan_global_opts_fill(skb, range_start->vid,
+ range_end->vid,
+ range_start)) {
+ err = -EMSGSIZE;
+ break;
+ }
+ /* advance number of filled vlans */
+ idx += range_end->vid - range_start->vid + 1;
+
+ range_start = v;
+ } else if (dump_stats || v->vid == pvid ||
+ !br_vlan_can_enter_range(v, range_end)) {
u16 vlan_flags = br_vlan_flags(range_start, pvid);
if (!br_vlan_fill_vids(skb, range_start->vid,
@@ -1969,6 +2046,7 @@ static int br_vlan_dump_dev(const struct net_device *dev,
range_start = v;
}
+update_end:
range_end = v;
}
@@ -1977,11 +2055,18 @@ static int br_vlan_dump_dev(const struct net_device *dev,
* - last vlan (range_start == range_end, not in range)
* - last vlan range (range_start != range_end, in range)
*/
- if (!err && range_start &&
- !br_vlan_fill_vids(skb, range_start->vid, range_end->vid,
- range_start, br_vlan_flags(range_start, pvid),
- dump_stats))
- err = -EMSGSIZE;
+ if (!err && range_start) {
+ if (dump_global &&
+ !br_vlan_global_opts_fill(skb, range_start->vid,
+ range_end->vid, range_start))
+ err = -EMSGSIZE;
+ else if (!dump_global &&
+ !br_vlan_fill_vids(skb, range_start->vid,
+ range_end->vid, range_start,
+ br_vlan_flags(range_start, pvid),
+ dump_stats))
+ err = -EMSGSIZE;
+ }
cb->args[1] = err ? idx : 0;
@@ -2051,6 +2136,7 @@ static const struct nla_policy br_vlan_db_policy[BRIDGE_VLANDB_ENTRY_MAX + 1] =
[BRIDGE_VLANDB_ENTRY_RANGE] = { .type = NLA_U16 },
[BRIDGE_VLANDB_ENTRY_STATE] = { .type = NLA_U8 },
[BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { .type = NLA_NESTED },
+ [BRIDGE_VLANDB_ENTRY_MCAST_ROUTER] = { .type = NLA_U8 },
};
static int br_vlan_rtm_process_one(struct net_device *dev,
@@ -2185,12 +2271,22 @@ static int br_vlan_rtm_process(struct sk_buff *skb, struct nlmsghdr *nlh,
}
nlmsg_for_each_attr(attr, nlh, sizeof(*bvm), rem) {
- if (nla_type(attr) != BRIDGE_VLANDB_ENTRY)
+ switch (nla_type(attr)) {
+ case BRIDGE_VLANDB_ENTRY:
+ err = br_vlan_rtm_process_one(dev, attr,
+ nlh->nlmsg_type,
+ extack);
+ break;
+ case BRIDGE_VLANDB_GLOBAL_OPTIONS:
+ err = br_vlan_rtm_process_global_options(dev, attr,
+ nlh->nlmsg_type,
+ extack);
+ break;
+ default:
continue;
+ }
vlans++;
- err = br_vlan_rtm_process_one(dev, attr, nlh->nlmsg_type,
- extack);
if (err)
break;
}
diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
index b4add9ea8964..8ffd4ed2563c 100644
--- a/net/bridge/br_vlan_options.c
+++ b/net/bridge/br_vlan_options.c
@@ -40,22 +40,38 @@ static bool __vlan_tun_can_enter_range(const struct net_bridge_vlan *v_curr,
bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr,
const struct net_bridge_vlan *range_end)
{
+ u8 range_mc_rtr = br_vlan_multicast_router(range_end);
+ u8 curr_mc_rtr = br_vlan_multicast_router(v_curr);
+
return v_curr->state == range_end->state &&
- __vlan_tun_can_enter_range(v_curr, range_end);
+ __vlan_tun_can_enter_range(v_curr, range_end) &&
+ curr_mc_rtr == range_mc_rtr;
}
bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v)
{
- return !nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE,
- br_vlan_get_state(v)) &&
- __vlan_tun_put(skb, v);
+ if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE, br_vlan_get_state(v)) ||
+ !__vlan_tun_put(skb, v))
+ return false;
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_MCAST_ROUTER,
+ br_vlan_multicast_router(v)))
+ return false;
+#endif
+
+ return true;
}
size_t br_vlan_opts_nl_size(void)
{
return nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_ENTRY_STATE */
+ nla_total_size(0) /* BRIDGE_VLANDB_ENTRY_TUNNEL_INFO */
- + nla_total_size(sizeof(u32)); /* BRIDGE_VLANDB_TINFO_ID */
+ + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_TINFO_ID */
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_ENTRY_MCAST_ROUTER */
+#endif
+ + 0;
}
static int br_vlan_modify_state(struct net_bridge_vlan_group *vg,
@@ -181,6 +197,18 @@ static int br_vlan_process_one_opts(const struct net_bridge *br,
return err;
}
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (tb[BRIDGE_VLANDB_ENTRY_MCAST_ROUTER]) {
+ u8 val;
+
+ val = nla_get_u8(tb[BRIDGE_VLANDB_ENTRY_MCAST_ROUTER]);
+ err = br_multicast_set_vlan_router(v, val);
+ if (err)
+ return err;
+ *changed = true;
+ }
+#endif
+
return 0;
}
@@ -258,3 +286,392 @@ int br_vlan_process_options(const struct net_bridge *br,
return err;
}
+
+bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr,
+ const struct net_bridge_vlan *r_end)
+{
+ return v_curr->vid - r_end->vid == 1 &&
+ ((v_curr->priv_flags ^ r_end->priv_flags) &
+ BR_VLFLAG_GLOBAL_MCAST_ENABLED) == 0 &&
+ br_multicast_ctx_options_equal(&v_curr->br_mcast_ctx,
+ &r_end->br_mcast_ctx);
+}
+
+bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range,
+ const struct net_bridge_vlan *v_opts)
+{
+ struct nlattr *nest2 __maybe_unused;
+ u64 clockval __maybe_unused;
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, BRIDGE_VLANDB_GLOBAL_OPTIONS);
+ if (!nest)
+ return false;
+
+ if (nla_put_u16(skb, BRIDGE_VLANDB_GOPTS_ID, vid))
+ goto out_err;
+
+ if (vid_range && vid < vid_range &&
+ nla_put_u16(skb, BRIDGE_VLANDB_GOPTS_RANGE, vid_range))
+ goto out_err;
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING,
+ !!(v_opts->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)) ||
+ nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION,
+ v_opts->br_mcast_ctx.multicast_igmp_version) ||
+ nla_put_u32(skb, BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT,
+ v_opts->br_mcast_ctx.multicast_last_member_count) ||
+ nla_put_u32(skb, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT,
+ v_opts->br_mcast_ctx.multicast_startup_query_count) ||
+ nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER,
+ v_opts->br_mcast_ctx.multicast_querier) ||
+ br_multicast_dump_querier_state(skb, &v_opts->br_mcast_ctx,
+ BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE))
+ goto out_err;
+
+ clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_last_member_interval);
+ if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL,
+ clockval, BRIDGE_VLANDB_GOPTS_PAD))
+ goto out_err;
+ clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_membership_interval);
+ if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL,
+ clockval, BRIDGE_VLANDB_GOPTS_PAD))
+ goto out_err;
+ clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_querier_interval);
+ if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL,
+ clockval, BRIDGE_VLANDB_GOPTS_PAD))
+ goto out_err;
+ clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_query_interval);
+ if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL,
+ clockval, BRIDGE_VLANDB_GOPTS_PAD))
+ goto out_err;
+ clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_query_response_interval);
+ if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL,
+ clockval, BRIDGE_VLANDB_GOPTS_PAD))
+ goto out_err;
+ clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_startup_query_interval);
+ if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL,
+ clockval, BRIDGE_VLANDB_GOPTS_PAD))
+ goto out_err;
+
+ if (br_rports_have_mc_router(&v_opts->br_mcast_ctx)) {
+ nest2 = nla_nest_start(skb,
+ BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS);
+ if (!nest2)
+ goto out_err;
+
+ rcu_read_lock();
+ if (br_rports_fill_info(skb, &v_opts->br_mcast_ctx)) {
+ rcu_read_unlock();
+ nla_nest_cancel(skb, nest2);
+ goto out_err;
+ }
+ rcu_read_unlock();
+
+ nla_nest_end(skb, nest2);
+ }
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION,
+ v_opts->br_mcast_ctx.multicast_mld_version))
+ goto out_err;
+#endif
+#endif
+
+ nla_nest_end(skb, nest);
+
+ return true;
+
+out_err:
+ nla_nest_cancel(skb, nest);
+ return false;
+}
+
+static size_t rtnl_vlan_global_opts_nlmsg_size(const struct net_bridge_vlan *v)
+{
+ return NLMSG_ALIGN(sizeof(struct br_vlan_msg))
+ + nla_total_size(0) /* BRIDGE_VLANDB_GLOBAL_OPTIONS */
+ + nla_total_size(sizeof(u16)) /* BRIDGE_VLANDB_GOPTS_ID */
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING */
+ + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION */
+ + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION */
+ + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT */
+ + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT */
+ + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL */
+ + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL */
+ + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL */
+ + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL */
+ + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL */
+ + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL */
+ + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERIER */
+ + br_multicast_querier_state_size() /* BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE */
+ + nla_total_size(0) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS */
+ + br_rports_size(&v->br_mcast_ctx) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS */
+#endif
+ + nla_total_size(sizeof(u16)); /* BRIDGE_VLANDB_GOPTS_RANGE */
+}
+
+static void br_vlan_global_opts_notify(const struct net_bridge *br,
+ u16 vid, u16 vid_range)
+{
+ struct net_bridge_vlan *v;
+ struct br_vlan_msg *bvm;
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ /* right now notifications are done only with rtnl held */
+ ASSERT_RTNL();
+
+ /* need to find the vlan due to flags/options */
+ v = br_vlan_find(br_vlan_group(br), vid);
+ if (!v)
+ return;
+
+ skb = nlmsg_new(rtnl_vlan_global_opts_nlmsg_size(v), GFP_KERNEL);
+ if (!skb)
+ goto out_err;
+
+ err = -EMSGSIZE;
+ nlh = nlmsg_put(skb, 0, 0, RTM_NEWVLAN, sizeof(*bvm), 0);
+ if (!nlh)
+ goto out_err;
+ bvm = nlmsg_data(nlh);
+ memset(bvm, 0, sizeof(*bvm));
+ bvm->family = AF_BRIDGE;
+ bvm->ifindex = br->dev->ifindex;
+
+ if (!br_vlan_global_opts_fill(skb, vid, vid_range, v))
+ goto out_err;
+
+ nlmsg_end(skb, nlh);
+ rtnl_notify(skb, dev_net(br->dev), 0, RTNLGRP_BRVLAN, NULL, GFP_KERNEL);
+ return;
+
+out_err:
+ rtnl_set_sk_err(dev_net(br->dev), RTNLGRP_BRVLAN, err);
+ kfree_skb(skb);
+}
+
+static int br_vlan_process_global_one_opts(const struct net_bridge *br,
+ struct net_bridge_vlan_group *vg,
+ struct net_bridge_vlan *v,
+ struct nlattr **tb,
+ bool *changed,
+ struct netlink_ext_ack *extack)
+{
+ int err __maybe_unused;
+
+ *changed = false;
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING]) {
+ u8 mc_snooping;
+
+ mc_snooping = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING]);
+ if (br_multicast_toggle_global_vlan(v, !!mc_snooping))
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION]) {
+ u8 ver;
+
+ ver = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION]);
+ err = br_multicast_set_igmp_version(&v->br_mcast_ctx, ver);
+ if (err)
+ return err;
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT]) {
+ u32 cnt;
+
+ cnt = nla_get_u32(tb[BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT]);
+ v->br_mcast_ctx.multicast_last_member_count = cnt;
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT]) {
+ u32 cnt;
+
+ cnt = nla_get_u32(tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT]);
+ v->br_mcast_ctx.multicast_startup_query_count = cnt;
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL]) {
+ u64 val;
+
+ val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL]);
+ v->br_mcast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val);
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL]) {
+ u64 val;
+
+ val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL]);
+ v->br_mcast_ctx.multicast_membership_interval = clock_t_to_jiffies(val);
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL]) {
+ u64 val;
+
+ val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL]);
+ v->br_mcast_ctx.multicast_querier_interval = clock_t_to_jiffies(val);
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL]) {
+ u64 val;
+
+ val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL]);
+ v->br_mcast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL]) {
+ u64 val;
+
+ val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL]);
+ v->br_mcast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val);
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL]) {
+ u64 val;
+
+ val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL]);
+ v->br_mcast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]) {
+ u8 val;
+
+ val = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]);
+ err = br_multicast_set_querier(&v->br_mcast_ctx, val);
+ if (err)
+ return err;
+ *changed = true;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION]) {
+ u8 ver;
+
+ ver = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION]);
+ err = br_multicast_set_mld_version(&v->br_mcast_ctx, ver);
+ if (err)
+ return err;
+ *changed = true;
+ }
+#endif
+#endif
+
+ return 0;
+}
+
+static const struct nla_policy br_vlan_db_gpol[BRIDGE_VLANDB_GOPTS_MAX + 1] = {
+ [BRIDGE_VLANDB_GOPTS_ID] = { .type = NLA_U16 },
+ [BRIDGE_VLANDB_GOPTS_RANGE] = { .type = NLA_U16 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING] = { .type = NLA_U8 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION] = { .type = NLA_U8 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL] = { .type = NLA_U64 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_QUERIER] = { .type = NLA_U8 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION] = { .type = NLA_U8 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT] = { .type = NLA_U32 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT] = { .type = NLA_U32 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL] = { .type = NLA_U64 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL] = { .type = NLA_U64 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL] = { .type = NLA_U64 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL] = { .type = NLA_U64 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL] = { .type = NLA_U64 },
+};
+
+int br_vlan_rtm_process_global_options(struct net_device *dev,
+ const struct nlattr *attr,
+ int cmd,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_vlan *v, *curr_start = NULL, *curr_end = NULL;
+ struct nlattr *tb[BRIDGE_VLANDB_GOPTS_MAX + 1];
+ struct net_bridge_vlan_group *vg;
+ u16 vid, vid_range = 0;
+ struct net_bridge *br;
+ int err = 0;
+
+ if (cmd != RTM_NEWVLAN) {
+ NL_SET_ERR_MSG_MOD(extack, "Global vlan options support only set operation");
+ return -EINVAL;
+ }
+ if (!netif_is_bridge_master(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Global vlan options can only be set on bridge device");
+ return -EINVAL;
+ }
+ br = netdev_priv(dev);
+ vg = br_vlan_group(br);
+ if (WARN_ON(!vg))
+ return -ENODEV;
+
+ err = nla_parse_nested(tb, BRIDGE_VLANDB_GOPTS_MAX, attr,
+ br_vlan_db_gpol, extack);
+ if (err)
+ return err;
+
+ if (!tb[BRIDGE_VLANDB_GOPTS_ID]) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing vlan entry id");
+ return -EINVAL;
+ }
+ vid = nla_get_u16(tb[BRIDGE_VLANDB_GOPTS_ID]);
+ if (!br_vlan_valid_id(vid, extack))
+ return -EINVAL;
+
+ if (tb[BRIDGE_VLANDB_GOPTS_RANGE]) {
+ vid_range = nla_get_u16(tb[BRIDGE_VLANDB_GOPTS_RANGE]);
+ if (!br_vlan_valid_id(vid_range, extack))
+ return -EINVAL;
+ if (vid >= vid_range) {
+ NL_SET_ERR_MSG_MOD(extack, "End vlan id is less than or equal to start vlan id");
+ return -EINVAL;
+ }
+ } else {
+ vid_range = vid;
+ }
+
+ for (; vid <= vid_range; vid++) {
+ bool changed = false;
+
+ v = br_vlan_find(vg, vid);
+ if (!v) {
+ NL_SET_ERR_MSG_MOD(extack, "Vlan in range doesn't exist, can't process global options");
+ err = -ENOENT;
+ break;
+ }
+
+ err = br_vlan_process_global_one_opts(br, vg, v, tb, &changed,
+ extack);
+ if (err)
+ break;
+
+ if (changed) {
+ /* vlan options changed, check for range */
+ if (!curr_start) {
+ curr_start = v;
+ curr_end = v;
+ continue;
+ }
+
+ if (!br_vlan_global_opts_can_enter_range(v, curr_end)) {
+ br_vlan_global_opts_notify(br, curr_start->vid,
+ curr_end->vid);
+ curr_start = v;
+ }
+ curr_end = v;
+ } else {
+ /* nothing changed and nothing to notify yet */
+ if (!curr_start)
+ continue;
+
+ br_vlan_global_opts_notify(br, curr_start->vid,
+ curr_end->vid);
+ curr_start = NULL;
+ curr_end = NULL;
+ }
+ }
+ if (curr_start)
+ br_vlan_global_opts_notify(br, curr_start->vid, curr_end->vid);
+
+ return err;
+}
diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index 01017448ebde..6399a8a69d07 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -158,30 +158,28 @@ void vlan_tunnel_deinit(struct net_bridge_vlan_group *vg)
rhashtable_destroy(&vg->tunnel_hash);
}
-int br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
- struct net_bridge_port *p,
- struct net_bridge_vlan_group *vg)
+void br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
+ struct net_bridge_port *p,
+ struct net_bridge_vlan_group *vg)
{
struct ip_tunnel_info *tinfo = skb_tunnel_info(skb);
struct net_bridge_vlan *vlan;
if (!vg || !tinfo)
- return 0;
+ return;
/* if already tagged, ignore */
if (skb_vlan_tagged(skb))
- return 0;
+ return;
/* lookup vid, given tunnel id */
vlan = br_vlan_tunnel_lookup(&vg->tunnel_hash, tinfo->key.tun_id);
if (!vlan)
- return 0;
+ return;
skb_dst_drop(skb);
__vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan->vid);
-
- return 0;
}
int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 020b1487ee0c..a7af4eaff17d 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -98,7 +98,7 @@ static const struct nf_hook_ops ebt_ops_broute = {
.priority = NF_BR_PRI_FIRST,
};
-static int __net_init broute_net_init(struct net *net)
+static int broute_table_init(struct net *net)
{
return ebt_register_table(net, &broute_table, &ebt_ops_broute);
}
@@ -114,19 +114,30 @@ static void __net_exit broute_net_exit(struct net *net)
}
static struct pernet_operations broute_net_ops = {
- .init = broute_net_init,
.exit = broute_net_exit,
.pre_exit = broute_net_pre_exit,
};
static int __init ebtable_broute_init(void)
{
- return register_pernet_subsys(&broute_net_ops);
+ int ret = ebt_register_template(&broute_table, broute_table_init);
+
+ if (ret)
+ return ret;
+
+ ret = register_pernet_subsys(&broute_net_ops);
+ if (ret) {
+ ebt_unregister_template(&broute_table);
+ return ret;
+ }
+
+ return 0;
}
static void __exit ebtable_broute_fini(void)
{
unregister_pernet_subsys(&broute_net_ops);
+ ebt_unregister_template(&broute_table);
}
module_init(ebtable_broute_init);
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 8ec0b3736803..c0b121df4a9a 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -86,7 +86,7 @@ static const struct nf_hook_ops ebt_ops_filter[] = {
},
};
-static int __net_init frame_filter_net_init(struct net *net)
+static int frame_filter_table_init(struct net *net)
{
return ebt_register_table(net, &frame_filter, ebt_ops_filter);
}
@@ -102,19 +102,30 @@ static void __net_exit frame_filter_net_exit(struct net *net)
}
static struct pernet_operations frame_filter_net_ops = {
- .init = frame_filter_net_init,
.exit = frame_filter_net_exit,
.pre_exit = frame_filter_net_pre_exit,
};
static int __init ebtable_filter_init(void)
{
- return register_pernet_subsys(&frame_filter_net_ops);
+ int ret = ebt_register_template(&frame_filter, frame_filter_table_init);
+
+ if (ret)
+ return ret;
+
+ ret = register_pernet_subsys(&frame_filter_net_ops);
+ if (ret) {
+ ebt_unregister_template(&frame_filter);
+ return ret;
+ }
+
+ return 0;
}
static void __exit ebtable_filter_fini(void)
{
unregister_pernet_subsys(&frame_filter_net_ops);
+ ebt_unregister_template(&frame_filter);
}
module_init(ebtable_filter_init);
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 7c8a1064a531..4078151c224f 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -85,7 +85,7 @@ static const struct nf_hook_ops ebt_ops_nat[] = {
},
};
-static int __net_init frame_nat_net_init(struct net *net)
+static int frame_nat_table_init(struct net *net)
{
return ebt_register_table(net, &frame_nat, ebt_ops_nat);
}
@@ -101,19 +101,30 @@ static void __net_exit frame_nat_net_exit(struct net *net)
}
static struct pernet_operations frame_nat_net_ops = {
- .init = frame_nat_net_init,
.exit = frame_nat_net_exit,
.pre_exit = frame_nat_net_pre_exit,
};
static int __init ebtable_nat_init(void)
{
- return register_pernet_subsys(&frame_nat_net_ops);
+ int ret = ebt_register_template(&frame_nat, frame_nat_table_init);
+
+ if (ret)
+ return ret;
+
+ ret = register_pernet_subsys(&frame_nat_net_ops);
+ if (ret) {
+ ebt_unregister_template(&frame_nat);
+ return ret;
+ }
+
+ return ret;
}
static void __exit ebtable_nat_fini(void)
{
unregister_pernet_subsys(&frame_nat_net_ops);
+ ebt_unregister_template(&frame_nat);
}
module_init(ebtable_nat_init);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index f022deb3721e..83d1798dfbb4 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -44,7 +44,16 @@ struct ebt_pernet {
struct list_head tables;
};
+struct ebt_template {
+ struct list_head list;
+ char name[EBT_TABLE_MAXNAMELEN];
+ struct module *owner;
+ /* called when table is needed in the given netns */
+ int (*table_init)(struct net *net);
+};
+
static unsigned int ebt_pernet_id __read_mostly;
+static LIST_HEAD(template_tables);
static DEFINE_MUTEX(ebt_mutex);
#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
@@ -309,30 +318,57 @@ letscontinue:
/* If it succeeds, returns element and locks mutex */
static inline void *
-find_inlist_lock_noload(struct list_head *head, const char *name, int *error,
+find_inlist_lock_noload(struct net *net, const char *name, int *error,
struct mutex *mutex)
{
- struct {
- struct list_head list;
- char name[EBT_FUNCTION_MAXNAMELEN];
- } *e;
+ struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
+ struct ebt_template *tmpl;
+ struct ebt_table *table;
mutex_lock(mutex);
- list_for_each_entry(e, head, list) {
- if (strcmp(e->name, name) == 0)
- return e;
+ list_for_each_entry(table, &ebt_net->tables, list) {
+ if (strcmp(table->name, name) == 0)
+ return table;
}
+
+ list_for_each_entry(tmpl, &template_tables, list) {
+ if (strcmp(name, tmpl->name) == 0) {
+ struct module *owner = tmpl->owner;
+
+ if (!try_module_get(owner))
+ goto out;
+
+ mutex_unlock(mutex);
+
+ *error = tmpl->table_init(net);
+ if (*error) {
+ module_put(owner);
+ return NULL;
+ }
+
+ mutex_lock(mutex);
+ module_put(owner);
+ break;
+ }
+ }
+
+ list_for_each_entry(table, &ebt_net->tables, list) {
+ if (strcmp(table->name, name) == 0)
+ return table;
+ }
+
+out:
*error = -ENOENT;
mutex_unlock(mutex);
return NULL;
}
static void *
-find_inlist_lock(struct list_head *head, const char *name, const char *prefix,
+find_inlist_lock(struct net *net, const char *name, const char *prefix,
int *error, struct mutex *mutex)
{
return try_then_request_module(
- find_inlist_lock_noload(head, name, error, mutex),
+ find_inlist_lock_noload(net, name, error, mutex),
"%s%s", prefix, name);
}
@@ -340,10 +376,7 @@ static inline struct ebt_table *
find_table_lock(struct net *net, const char *name, int *error,
struct mutex *mutex)
{
- struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
-
- return find_inlist_lock(&ebt_net->tables, name,
- "ebtable_", error, mutex);
+ return find_inlist_lock(net, name, "ebtable_", error, mutex);
}
static inline void ebt_free_table_info(struct ebt_table_info *info)
@@ -1258,6 +1291,54 @@ out:
return ret;
}
+int ebt_register_template(const struct ebt_table *t, int (*table_init)(struct net *net))
+{
+ struct ebt_template *tmpl;
+
+ mutex_lock(&ebt_mutex);
+ list_for_each_entry(tmpl, &template_tables, list) {
+ if (WARN_ON_ONCE(strcmp(t->name, tmpl->name) == 0)) {
+ mutex_unlock(&ebt_mutex);
+ return -EEXIST;
+ }
+ }
+
+ tmpl = kzalloc(sizeof(*tmpl), GFP_KERNEL);
+ if (!tmpl) {
+ mutex_unlock(&ebt_mutex);
+ return -ENOMEM;
+ }
+
+ tmpl->table_init = table_init;
+ strscpy(tmpl->name, t->name, sizeof(tmpl->name));
+ tmpl->owner = t->me;
+ list_add(&tmpl->list, &template_tables);
+
+ mutex_unlock(&ebt_mutex);
+ return 0;
+}
+EXPORT_SYMBOL(ebt_register_template);
+
+void ebt_unregister_template(const struct ebt_table *t)
+{
+ struct ebt_template *tmpl;
+
+ mutex_lock(&ebt_mutex);
+ list_for_each_entry(tmpl, &template_tables, list) {
+ if (strcmp(t->name, tmpl->name))
+ continue;
+
+ list_del(&tmpl->list);
+ mutex_unlock(&ebt_mutex);
+ kfree(tmpl);
+ return;
+ }
+
+ mutex_unlock(&ebt_mutex);
+ WARN_ON_ONCE(1);
+}
+EXPORT_SYMBOL(ebt_unregister_template);
+
static struct ebt_table *__ebt_find_table(struct net *net, const char *name)
{
struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 8d033a75a766..fdbed3158555 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -88,6 +88,12 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
skb = ip_fraglist_next(&iter);
}
+
+ if (!err)
+ return 0;
+
+ kfree_skb_list(iter.frag);
+
return err;
}
slow_path:
diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
index 12369b604ce9..f6df20808f5e 100644
--- a/net/can/j1939/j1939-priv.h
+++ b/net/can/j1939/j1939-priv.h
@@ -20,9 +20,12 @@
struct j1939_session;
enum j1939_sk_errqueue_type {
- J1939_ERRQUEUE_ACK,
- J1939_ERRQUEUE_SCHED,
- J1939_ERRQUEUE_ABORT,
+ J1939_ERRQUEUE_TX_ACK,
+ J1939_ERRQUEUE_TX_SCHED,
+ J1939_ERRQUEUE_TX_ABORT,
+ J1939_ERRQUEUE_RX_RTS,
+ J1939_ERRQUEUE_RX_DPO,
+ J1939_ERRQUEUE_RX_ABORT,
};
/* j1939 devices */
@@ -87,6 +90,7 @@ struct j1939_priv {
struct list_head j1939_socks;
struct kref rx_kref;
+ u32 rx_tskey;
};
void j1939_ecu_put(struct j1939_ecu *ecu);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 54f6d521492f..6dff4510687a 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -352,7 +352,7 @@ static void j1939_sk_sock_destruct(struct sock *sk)
{
struct j1939_sock *jsk = j1939_sk(sk);
- /* This function will be call by the generic networking code, when then
+ /* This function will be called by the generic networking code, when
* the socket is ultimately closed (sk->sk_destruct).
*
* The race between
@@ -905,20 +905,33 @@ failure:
return NULL;
}
-static size_t j1939_sk_opt_stats_get_size(void)
+static size_t j1939_sk_opt_stats_get_size(enum j1939_sk_errqueue_type type)
{
- return
- nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */
- 0;
+ switch (type) {
+ case J1939_ERRQUEUE_RX_RTS:
+ return
+ nla_total_size(sizeof(u32)) + /* J1939_NLA_TOTAL_SIZE */
+ nla_total_size(sizeof(u32)) + /* J1939_NLA_PGN */
+ nla_total_size(sizeof(u64)) + /* J1939_NLA_SRC_NAME */
+ nla_total_size(sizeof(u64)) + /* J1939_NLA_DEST_NAME */
+ nla_total_size(sizeof(u8)) + /* J1939_NLA_SRC_ADDR */
+ nla_total_size(sizeof(u8)) + /* J1939_NLA_DEST_ADDR */
+ 0;
+ default:
+ return
+ nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */
+ 0;
+ }
}
static struct sk_buff *
-j1939_sk_get_timestamping_opt_stats(struct j1939_session *session)
+j1939_sk_get_timestamping_opt_stats(struct j1939_session *session,
+ enum j1939_sk_errqueue_type type)
{
struct sk_buff *stats;
u32 size;
- stats = alloc_skb(j1939_sk_opt_stats_get_size(), GFP_ATOMIC);
+ stats = alloc_skb(j1939_sk_opt_stats_get_size(type), GFP_ATOMIC);
if (!stats)
return NULL;
@@ -928,32 +941,67 @@ j1939_sk_get_timestamping_opt_stats(struct j1939_session *session)
size = min(session->pkt.tx_acked * 7,
session->total_message_size);
- nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size);
+ switch (type) {
+ case J1939_ERRQUEUE_RX_RTS:
+ nla_put_u32(stats, J1939_NLA_TOTAL_SIZE,
+ session->total_message_size);
+ nla_put_u32(stats, J1939_NLA_PGN,
+ session->skcb.addr.pgn);
+ nla_put_u64_64bit(stats, J1939_NLA_SRC_NAME,
+ session->skcb.addr.src_name, J1939_NLA_PAD);
+ nla_put_u64_64bit(stats, J1939_NLA_DEST_NAME,
+ session->skcb.addr.dst_name, J1939_NLA_PAD);
+ nla_put_u8(stats, J1939_NLA_SRC_ADDR,
+ session->skcb.addr.sa);
+ nla_put_u8(stats, J1939_NLA_DEST_ADDR,
+ session->skcb.addr.da);
+ break;
+ default:
+ nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size);
+ }
return stats;
}
-void j1939_sk_errqueue(struct j1939_session *session,
- enum j1939_sk_errqueue_type type)
+static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
+ enum j1939_sk_errqueue_type type)
{
struct j1939_priv *priv = session->priv;
- struct sock *sk = session->sk;
struct j1939_sock *jsk;
struct sock_exterr_skb *serr;
struct sk_buff *skb;
char *state = "UNK";
int err;
- /* currently we have no sk for the RX session */
- if (!sk)
- return;
-
jsk = j1939_sk(sk);
if (!(jsk->state & J1939_SOCK_ERRQUEUE))
return;
- skb = j1939_sk_get_timestamping_opt_stats(session);
+ switch (type) {
+ case J1939_ERRQUEUE_TX_ACK:
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
+ return;
+ break;
+ case J1939_ERRQUEUE_TX_SCHED:
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
+ return;
+ break;
+ case J1939_ERRQUEUE_TX_ABORT:
+ break;
+ case J1939_ERRQUEUE_RX_RTS:
+ fallthrough;
+ case J1939_ERRQUEUE_RX_DPO:
+ fallthrough;
+ case J1939_ERRQUEUE_RX_ABORT:
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
+ return;
+ break;
+ default:
+ netdev_err(priv->ndev, "Unknown errqueue type %i\n", type);
+ }
+
+ skb = j1939_sk_get_timestamping_opt_stats(session, type);
if (!skb)
return;
@@ -964,36 +1012,42 @@ void j1939_sk_errqueue(struct j1939_session *session,
serr = SKB_EXT_ERR(skb);
memset(serr, 0, sizeof(*serr));
switch (type) {
- case J1939_ERRQUEUE_ACK:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) {
- kfree_skb(skb);
- return;
- }
-
+ case J1939_ERRQUEUE_TX_ACK:
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
serr->ee.ee_info = SCM_TSTAMP_ACK;
- state = "ACK";
+ state = "TX ACK";
break;
- case J1939_ERRQUEUE_SCHED:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) {
- kfree_skb(skb);
- return;
- }
-
+ case J1939_ERRQUEUE_TX_SCHED:
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
serr->ee.ee_info = SCM_TSTAMP_SCHED;
- state = "SCH";
+ state = "TX SCH";
break;
- case J1939_ERRQUEUE_ABORT:
+ case J1939_ERRQUEUE_TX_ABORT:
serr->ee.ee_errno = session->err;
serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
serr->ee.ee_info = J1939_EE_INFO_TX_ABORT;
- state = "ABT";
+ state = "TX ABT";
+ break;
+ case J1939_ERRQUEUE_RX_RTS:
+ serr->ee.ee_errno = ENOMSG;
+ serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+ serr->ee.ee_info = J1939_EE_INFO_RX_RTS;
+ state = "RX RTS";
+ break;
+ case J1939_ERRQUEUE_RX_DPO:
+ serr->ee.ee_errno = ENOMSG;
+ serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+ serr->ee.ee_info = J1939_EE_INFO_RX_DPO;
+ state = "RX DPO";
+ break;
+ case J1939_ERRQUEUE_RX_ABORT:
+ serr->ee.ee_errno = session->err;
+ serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+ serr->ee.ee_info = J1939_EE_INFO_RX_ABORT;
+ state = "RX ABT";
break;
- default:
- netdev_err(priv->ndev, "Unknown errqueue type %i\n", type);
}
serr->opt_stats = true;
@@ -1008,6 +1062,27 @@ void j1939_sk_errqueue(struct j1939_session *session,
kfree_skb(skb);
};
+void j1939_sk_errqueue(struct j1939_session *session,
+ enum j1939_sk_errqueue_type type)
+{
+ struct j1939_priv *priv = session->priv;
+ struct j1939_sock *jsk;
+
+ if (session->sk) {
+ /* send TX notifications to the socket of origin */
+ __j1939_sk_errqueue(session, session->sk, type);
+ return;
+ }
+
+ /* spread RX notifications to all sockets subscribed to this session */
+ spin_lock_bh(&priv->j1939_socks_lock);
+ list_for_each_entry(jsk, &priv->j1939_socks, list) {
+ if (j1939_sk_recv_match_one(jsk, &session->skcb))
+ __j1939_sk_errqueue(session, &jsk->sk, type);
+ }
+ spin_unlock_bh(&priv->j1939_socks_lock);
+};
+
void j1939_sk_send_loop_abort(struct sock *sk, int err)
{
sk->sk_err = err;
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index bdc95bd7a851..bb5c4b8979be 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -260,10 +260,14 @@ static void __j1939_session_drop(struct j1939_session *session)
static void j1939_session_destroy(struct j1939_session *session)
{
- if (session->err)
- j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT);
- else
- j1939_sk_errqueue(session, J1939_ERRQUEUE_ACK);
+ if (session->transmission) {
+ if (session->err)
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ABORT);
+ else
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ACK);
+ } else if (session->err) {
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
+ }
netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
@@ -776,7 +780,7 @@ static int j1939_session_tx_dpo(struct j1939_session *session)
static int j1939_session_tx_dat(struct j1939_session *session)
{
struct j1939_priv *priv = session->priv;
- struct j1939_sk_buff_cb *skcb;
+ struct j1939_sk_buff_cb *se_skcb;
int offset, pkt_done, pkt_end;
unsigned int len, pdelay;
struct sk_buff *se_skb;
@@ -788,7 +792,7 @@ static int j1939_session_tx_dat(struct j1939_session *session)
if (!se_skb)
return -ENOBUFS;
- skcb = j1939_skb_to_cb(se_skb);
+ se_skcb = j1939_skb_to_cb(se_skb);
tpdat = se_skb->data;
ret = 0;
pkt_done = 0;
@@ -800,7 +804,7 @@ static int j1939_session_tx_dat(struct j1939_session *session)
while (session->pkt.tx < pkt_end) {
dat[0] = session->pkt.tx - session->pkt.dpo + 1;
- offset = (session->pkt.tx * 7) - skcb->offset;
+ offset = (session->pkt.tx * 7) - se_skcb->offset;
len = se_skb->len - offset;
if (len > 7)
len = 7;
@@ -808,7 +812,8 @@ static int j1939_session_tx_dat(struct j1939_session *session)
if (offset + len > se_skb->len) {
netdev_err_once(priv->ndev,
"%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n",
- __func__, session, skcb->offset, se_skb->len , session->pkt.tx);
+ __func__, session, se_skcb->offset,
+ se_skb->len , session->pkt.tx);
ret = -EOVERFLOW;
goto out_free;
}
@@ -821,7 +826,7 @@ static int j1939_session_tx_dat(struct j1939_session *session)
memcpy(&dat[1], &tpdat[offset], len);
ret = j1939_tp_tx_dat(session, dat, len + 1);
if (ret < 0) {
- /* ENOBUS == CAN interface TX queue is full */
+ /* ENOBUFS == CAN interface TX queue is full */
if (ret != -ENOBUFS)
netdev_alert(priv->ndev,
"%s: 0x%p: queue data error: %i\n",
@@ -1043,7 +1048,7 @@ static int j1939_simple_txnext(struct j1939_session *session)
if (ret)
goto out_free;
- j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED);
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_SCHED);
j1939_sk_queue_activate_next(session);
out_free:
@@ -1097,7 +1102,7 @@ j1939_session_deactivate_activate_next(struct j1939_session *session)
}
static void __j1939_session_cancel(struct j1939_session *session,
- enum j1939_xtp_abort err)
+ enum j1939_xtp_abort err)
{
struct j1939_priv *priv = session->priv;
@@ -1115,6 +1120,8 @@ static void __j1939_session_cancel(struct j1939_session *session,
if (session->sk)
j1939_sk_send_loop_abort(session->sk, session->err);
+ else
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
}
static void j1939_session_cancel(struct j1939_session *session,
@@ -1195,13 +1202,13 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
static void j1939_session_completed(struct j1939_session *session)
{
- struct sk_buff *skb;
+ struct sk_buff *se_skb;
if (!session->transmission) {
- skb = j1939_session_skb_get(session);
+ se_skb = j1939_session_skb_get(session);
/* distribute among j1939 receivers */
- j1939_sk_recv(session->priv, skb);
- consume_skb(skb);
+ j1939_sk_recv(session->priv, se_skb);
+ consume_skb(se_skb);
}
j1939_session_deactivate_activate_next(session);
@@ -1268,12 +1275,14 @@ static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session,
break;
case J1939_ETP_CMD_RTS:
- case J1939_TP_CMD_RTS: /* fall through */
+ fallthrough;
+ case J1939_TP_CMD_RTS:
abort = J1939_XTP_ABORT_BUSY;
break;
case J1939_ETP_CMD_CTS:
- case J1939_TP_CMD_CTS: /* fall through */
+ fallthrough;
+ case J1939_TP_CMD_CTS:
abort = J1939_XTP_ABORT_ECTS_UNXPECTED_PGN;
break;
@@ -1282,7 +1291,8 @@ static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session,
break;
case J1939_ETP_CMD_EOMA:
- case J1939_TP_CMD_EOMA: /* fall through */
+ fallthrough;
+ case J1939_TP_CMD_EOMA:
abort = J1939_XTP_ABORT_OTHER;
break;
@@ -1326,6 +1336,8 @@ static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb,
session->err = j1939_xtp_abort_to_errno(priv, abort);
if (session->sk)
j1939_sk_send_loop_abort(session->sk, session->err);
+ else
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
j1939_session_deactivate_activate_next(session);
abort_put:
@@ -1434,7 +1446,7 @@ j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb)
if (session->transmission) {
if (session->pkt.tx_acked)
j1939_sk_errqueue(session,
- J1939_ERRQUEUE_SCHED);
+ J1939_ERRQUEUE_TX_SCHED);
j1939_session_txtimer_cancel(session);
j1939_tp_schedule_txtimer(session, 0);
}
@@ -1626,6 +1638,9 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
session->pkt.rx = 0;
session->pkt.tx = 0;
+ session->tskey = priv->rx_tskey++;
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_RTS);
+
WARN_ON_ONCE(j1939_session_activate(session));
return session;
@@ -1748,6 +1763,9 @@ static void j1939_xtp_rx_dpo_one(struct j1939_session *session,
session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data);
session->last_cmd = dat[0];
j1939_tp_set_rxtimeout(session, 750);
+
+ if (!session->transmission)
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_DPO);
}
static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb,
@@ -1772,7 +1790,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
struct sk_buff *skb)
{
struct j1939_priv *priv = session->priv;
- struct j1939_sk_buff_cb *skcb;
+ struct j1939_sk_buff_cb *skcb, *se_skcb;
struct sk_buff *se_skb = NULL;
const u8 *dat;
u8 *tpdat;
@@ -1797,7 +1815,8 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
break;
fallthrough;
case J1939_TP_CMD_BAM:
- case J1939_TP_CMD_CTS: /* fall through */
+ fallthrough;
+ case J1939_TP_CMD_CTS:
if (skcb->addr.type != J1939_ETP)
break;
fallthrough;
@@ -1822,8 +1841,8 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
goto out_session_cancel;
}
- skcb = j1939_skb_to_cb(se_skb);
- offset = packet * 7 - skcb->offset;
+ se_skcb = j1939_skb_to_cb(se_skb);
+ offset = packet * 7 - se_skcb->offset;
nbytes = se_skb->len - offset;
if (nbytes > 7)
nbytes = 7;
@@ -1851,7 +1870,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
if (packet == session->pkt.rx)
session->pkt.rx++;
- if (skcb->addr.type != J1939_ETP &&
+ if (se_skcb->addr.type != J1939_ETP &&
j1939_cb_is_broadcast(&session->skcb)) {
if (session->pkt.rx >= session->pkt.total)
final = true;
@@ -2000,7 +2019,8 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb)
extd = J1939_ETP;
fallthrough;
case J1939_TP_CMD_BAM:
- case J1939_TP_CMD_RTS: /* fall through */
+ fallthrough;
+ case J1939_TP_CMD_RTS:
if (skcb->addr.type != extd)
return;
diff --git a/net/can/raw.c b/net/can/raw.c
index cd5a49380116..7105fa4824e4 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -592,9 +592,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
ro->count = count;
out_fil:
- if (dev)
- dev_put(dev);
-
+ dev_put(dev);
release_sock(sk);
rtnl_unlock();
@@ -638,9 +636,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
ro->err_mask = err_mask;
out_err:
- if (dev)
- dev_put(dev);
-
+ dev_put(dev);
release_sock(sk);
rtnl_unlock();
diff --git a/net/core/Makefile b/net/core/Makefile
index f7f16650fe9e..35ced6201814 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -33,8 +33,6 @@ obj-$(CONFIG_HWBM) += hwbm.o
obj-$(CONFIG_NET_DEVLINK) += devlink.o
obj-$(CONFIG_GRO_CELLS) += gro_cells.o
obj-$(CONFIG_FAILOVER) += failover.o
-ifeq ($(CONFIG_INET),y)
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
-endif
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index f564f82e91d9..68d2cbf8331a 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -416,7 +416,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags)
{
- if (in_irq() || in_nmi())
+ if (in_hardirq() || in_nmi())
return (unsigned long)NULL;
return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags);
@@ -425,7 +425,7 @@ BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
struct sock *, sk)
{
- if (in_irq() || in_nmi())
+ if (in_hardirq() || in_nmi())
return -EPERM;
return ____bpf_sk_storage_delete(map, sk);
diff --git a/net/core/dev.c b/net/core/dev.c
index 8f1a47ad6781..74fd402d26dd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -676,131 +676,6 @@ void dev_remove_offload(struct packet_offload *po)
}
EXPORT_SYMBOL(dev_remove_offload);
-/******************************************************************************
- *
- * Device Boot-time Settings Routines
- *
- ******************************************************************************/
-
-/* Boot time configuration table */
-static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
-
-/**
- * netdev_boot_setup_add - add new setup entry
- * @name: name of the device
- * @map: configured settings for the device
- *
- * Adds new setup entry to the dev_boot_setup list. The function
- * returns 0 on error and 1 on success. This is a generic routine to
- * all netdevices.
- */
-static int netdev_boot_setup_add(char *name, struct ifmap *map)
-{
- struct netdev_boot_setup *s;
- int i;
-
- s = dev_boot_setup;
- for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
- if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
- memset(s[i].name, 0, sizeof(s[i].name));
- strlcpy(s[i].name, name, IFNAMSIZ);
- memcpy(&s[i].map, map, sizeof(s[i].map));
- break;
- }
- }
-
- return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
-}
-
-/**
- * netdev_boot_setup_check - check boot time settings
- * @dev: the netdevice
- *
- * Check boot time settings for the device.
- * The found settings are set for the device to be used
- * later in the device probing.
- * Returns 0 if no settings found, 1 if they are.
- */
-int netdev_boot_setup_check(struct net_device *dev)
-{
- struct netdev_boot_setup *s = dev_boot_setup;
- int i;
-
- for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
- if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
- !strcmp(dev->name, s[i].name)) {
- dev->irq = s[i].map.irq;
- dev->base_addr = s[i].map.base_addr;
- dev->mem_start = s[i].map.mem_start;
- dev->mem_end = s[i].map.mem_end;
- return 1;
- }
- }
- return 0;
-}
-EXPORT_SYMBOL(netdev_boot_setup_check);
-
-
-/**
- * netdev_boot_base - get address from boot time settings
- * @prefix: prefix for network device
- * @unit: id for network device
- *
- * Check boot time settings for the base address of device.
- * The found settings are set for the device to be used
- * later in the device probing.
- * Returns 0 if no settings found.
- */
-unsigned long netdev_boot_base(const char *prefix, int unit)
-{
- const struct netdev_boot_setup *s = dev_boot_setup;
- char name[IFNAMSIZ];
- int i;
-
- sprintf(name, "%s%d", prefix, unit);
-
- /*
- * If device already registered then return base of 1
- * to indicate not to probe for this interface
- */
- if (__dev_get_by_name(&init_net, name))
- return 1;
-
- for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
- if (!strcmp(name, s[i].name))
- return s[i].map.base_addr;
- return 0;
-}
-
-/*
- * Saves at boot time configured settings for any netdevice.
- */
-int __init netdev_boot_setup(char *str)
-{
- int ints[5];
- struct ifmap map;
-
- str = get_options(str, ARRAY_SIZE(ints), ints);
- if (!str || !*str)
- return 0;
-
- /* Save settings */
- memset(&map, 0, sizeof(map));
- if (ints[0] > 0)
- map.irq = ints[1];
- if (ints[0] > 1)
- map.base_addr = ints[2];
- if (ints[0] > 2)
- map.mem_start = ints[3];
- if (ints[0] > 3)
- map.mem_end = ints[4];
-
- /* Add new entry to the list */
- return netdev_boot_setup_add(str, &map);
-}
-
-__setup("netdev=", netdev_boot_setup);
-
/*******************************************************************************
*
* Device Interface Subroutines
@@ -956,8 +831,7 @@ struct net_device *dev_get_by_name(struct net *net, const char *name)
rcu_read_lock();
dev = dev_get_by_name_rcu(net, name);
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
return dev;
}
@@ -1030,8 +904,7 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
rcu_read_lock();
dev = dev_get_by_index_rcu(net, ifindex);
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
return dev;
}
@@ -3099,6 +2972,50 @@ EXPORT_SYMBOL(netif_set_real_num_rx_queues);
#endif
/**
+ * netif_set_real_num_queues - set actual number of RX and TX queues used
+ * @dev: Network device
+ * @txq: Actual number of TX queues
+ * @rxq: Actual number of RX queues
+ *
+ * Set the real number of both TX and RX queues.
+ * Does nothing if the number of queues is already correct.
+ */
+int netif_set_real_num_queues(struct net_device *dev,
+ unsigned int txq, unsigned int rxq)
+{
+ unsigned int old_rxq = dev->real_num_rx_queues;
+ int err;
+
+ if (txq < 1 || txq > dev->num_tx_queues ||
+ rxq < 1 || rxq > dev->num_rx_queues)
+ return -EINVAL;
+
+ /* Start from increases, so the error path only does decreases -
+ * decreases can't fail.
+ */
+ if (rxq > dev->real_num_rx_queues) {
+ err = netif_set_real_num_rx_queues(dev, rxq);
+ if (err)
+ return err;
+ }
+ if (txq > dev->real_num_tx_queues) {
+ err = netif_set_real_num_tx_queues(dev, txq);
+ if (err)
+ goto undo_rx;
+ }
+ if (rxq < dev->real_num_rx_queues)
+ WARN_ON(netif_set_real_num_rx_queues(dev, rxq));
+ if (txq < dev->real_num_tx_queues)
+ WARN_ON(netif_set_real_num_tx_queues(dev, txq));
+
+ return 0;
+undo_rx:
+ WARN_ON(netif_set_real_num_rx_queues(dev, old_rxq));
+ return err;
+}
+EXPORT_SYMBOL(netif_set_real_num_queues);
+
+/**
* netif_get_num_default_rss_queues - default number of RSS queues
*
* This routine should set an upper limit on the number of RSS queues
@@ -3190,7 +3107,7 @@ EXPORT_SYMBOL(__dev_kfree_skb_irq);
void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
{
- if (in_irq() || irqs_disabled())
+ if (in_hardirq() || irqs_disabled())
__dev_kfree_skb_irq(skb, reason);
else
dev_kfree_skb(skb);
@@ -4012,7 +3929,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
qdisc_skb_cb(skb)->post_ct = false;
mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
+ switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
@@ -4756,45 +4673,18 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
return rxqueue;
}
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
- struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
{
void *orig_data, *orig_data_end, *hard_start;
struct netdev_rx_queue *rxqueue;
- u32 metalen, act = XDP_DROP;
bool orig_bcast, orig_host;
u32 mac_len, frame_sz;
__be16 orig_eth_type;
struct ethhdr *eth;
+ u32 metalen, act;
int off;
- /* Reinjected packets coming from act_mirred or similar should
- * not get XDP generic processing.
- */
- if (skb_is_redirected(skb))
- return XDP_PASS;
-
- /* XDP packets must be linear and must have sufficient headroom
- * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
- * native XDP provides, thus we need to do it here as well.
- */
- if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
- skb_headroom(skb) < XDP_PACKET_HEADROOM) {
- int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
- int troom = skb->tail + skb->data_len - skb->end;
-
- /* In case we have to go down the path and also linearize,
- * then lets do the pskb_expand_head() work just once here.
- */
- if (pskb_expand_head(skb,
- hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
- troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
- goto do_drop;
- if (skb_linearize(skb))
- goto do_drop;
- }
-
/* The XDP program wants to see the packet starting at the MAC
* header.
*/
@@ -4849,6 +4739,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
skb->protocol = eth_type_trans(skb, skb->dev);
}
+ /* Redirect/Tx gives L2 packet, code that will reuse skb must __skb_pull
+ * before calling us again on redirect path. We do not call do_redirect
+ * as we leave that up to the caller.
+ *
+ * Caller is responsible for managing lifetime of skb (i.e. calling
+ * kfree_skb in response to actions it cannot handle/XDP_DROP).
+ */
switch (act) {
case XDP_REDIRECT:
case XDP_TX:
@@ -4859,6 +4756,49 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
if (metalen)
skb_metadata_set(skb, metalen);
break;
+ }
+
+ return act;
+}
+
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+ struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+{
+ u32 act = XDP_DROP;
+
+ /* Reinjected packets coming from act_mirred or similar should
+ * not get XDP generic processing.
+ */
+ if (skb_is_redirected(skb))
+ return XDP_PASS;
+
+ /* XDP packets must be linear and must have sufficient headroom
+ * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
+ * native XDP provides, thus we need to do it here as well.
+ */
+ if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
+ skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+ int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+ int troom = skb->tail + skb->data_len - skb->end;
+
+ /* In case we have to go down the path and also linearize,
+ * then lets do the pskb_expand_head() work just once here.
+ */
+ if (pskb_expand_head(skb,
+ hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+ troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
+ goto do_drop;
+ if (skb_linearize(skb))
+ goto do_drop;
+ }
+
+ act = bpf_prog_run_generic_xdp(skb, xdp, xdp_prog);
+ switch (act) {
+ case XDP_REDIRECT:
+ case XDP_TX:
+ case XDP_PASS:
+ break;
default:
bpf_warn_invalid_xdp_action(act);
fallthrough;
@@ -5141,8 +5081,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
skb->tc_at_ingress = 1;
mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify_ingress(skb, miniq->block, miniq->filter_list,
- &cl_res, false)) {
+ switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
@@ -5324,7 +5263,6 @@ another_round:
ret = NET_RX_DROP;
goto out;
}
- skb_reset_mac_len(skb);
}
if (eth_type_vlan(skb->protocol)) {
@@ -5650,25 +5588,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
struct bpf_prog *new = xdp->prog;
int ret = 0;
- if (new) {
- u32 i;
-
- mutex_lock(&new->aux->used_maps_mutex);
-
- /* generic XDP does not work with DEVMAPs that can
- * have a bpf_prog installed on an entry
- */
- for (i = 0; i < new->aux->used_map_cnt; i++) {
- if (dev_map_can_have_prog(new->aux->used_maps[i]) ||
- cpu_map_prog_allowed(new->aux->used_maps[i])) {
- mutex_unlock(&new->aux->used_maps_mutex);
- return -EINVAL;
- }
- }
-
- mutex_unlock(&new->aux->used_maps_mutex);
- }
-
switch (xdp->command) {
case XDP_SETUP_PROG:
rcu_assign_pointer(dev->xdp_prog, new);
@@ -5876,7 +5795,7 @@ static void flush_all_backlogs(void)
*/
ASSERT_RTNL();
- get_online_cpus();
+ cpus_read_lock();
cpumask_clear(&flush_cpus);
for_each_online_cpu(cpu) {
@@ -5894,7 +5813,7 @@ static void flush_all_backlogs(void)
for_each_cpu(cpu, &flush_cpus)
flush_work(per_cpu_ptr(&flush_works, cpu));
- put_online_cpus();
+ cpus_read_unlock();
}
/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
@@ -6011,7 +5930,6 @@ static void gro_list_prepare(const struct list_head *head,
diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
if (skb_vlan_tag_present(p))
diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb);
- diffs |= skb_metadata_dst_cmp(p, skb);
diffs |= skb_metadata_differs(p, skb);
if (maclen == ETH_HLEN)
diffs |= compare_ether_header(skb_mac_header(p),
@@ -6021,17 +5939,30 @@ static void gro_list_prepare(const struct list_head *head,
skb_mac_header(skb),
maclen);
- diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);
+ /* in most common scenarions 'slow_gro' is 0
+ * otherwise we are already on some slower paths
+ * either skip all the infrequent tests altogether or
+ * avoid trying too hard to skip each of them individually
+ */
+ if (!diffs && unlikely(skb->slow_gro | p->slow_gro)) {
#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- if (!diffs) {
- struct tc_skb_ext *skb_ext = skb_ext_find(skb, TC_SKB_EXT);
- struct tc_skb_ext *p_ext = skb_ext_find(p, TC_SKB_EXT);
+ struct tc_skb_ext *skb_ext;
+ struct tc_skb_ext *p_ext;
+#endif
+
+ diffs |= p->sk != skb->sk;
+ diffs |= skb_metadata_dst_cmp(p, skb);
+ diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);
+
+#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ skb_ext = skb_ext_find(skb, TC_SKB_EXT);
+ p_ext = skb_ext_find(p, TC_SKB_EXT);
diffs |= (!!p_ext) ^ (!!skb_ext);
if (!diffs && unlikely(skb_ext))
diffs |= p_ext->chain ^ skb_ext->chain;
- }
#endif
+ }
NAPI_GRO_CB(p)->same_flow = !diffs;
}
@@ -6296,8 +6227,12 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
- skb_ext_reset(skb);
- nf_reset_ct(skb);
+ if (unlikely(skb->slow_gro)) {
+ skb_orphan(skb);
+ skb_ext_reset(skb);
+ nf_reset_ct(skb);
+ skb->slow_gro = 0;
+ }
napi->skb = skb;
}
@@ -7597,7 +7532,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
{
struct netdev_adjacent *lower;
- WARN_ON_ONCE(!rcu_read_lock_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
@@ -9362,7 +9297,7 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
return dev->xdp_state[mode].prog;
}
-static u8 dev_xdp_prog_count(struct net_device *dev)
+u8 dev_xdp_prog_count(struct net_device *dev)
{
u8 count = 0;
int i;
@@ -9372,6 +9307,7 @@ static u8 dev_xdp_prog_count(struct net_device *dev)
count++;
return count;
}
+EXPORT_SYMBOL_GPL(dev_xdp_prog_count);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
{
@@ -9465,6 +9401,8 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
{
unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES);
struct bpf_prog *cur_prog;
+ struct net_device *upper;
+ struct list_head *iter;
enum bpf_xdp_mode mode;
bpf_op_t bpf_op;
int err;
@@ -9503,6 +9441,14 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
return -EBUSY;
}
+ /* don't allow if an upper device already has a program */
+ netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+ if (dev_xdp_prog_count(upper) > 0) {
+ NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program");
+ return -EEXIST;
+ }
+ }
+
cur_prog = dev_xdp_prog(dev, mode);
/* can't replace attached prog with link */
if (link && cur_prog) {
@@ -10134,7 +10080,7 @@ static int netif_alloc_rx_queues(struct net_device *dev)
BUG_ON(count < 1);
- rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ rx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!rx)
return -ENOMEM;
@@ -10201,7 +10147,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
if (count < 1 || count > 0xffff)
return -EINVAL;
- tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ tx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!tx)
return -ENOMEM;
@@ -10841,7 +10787,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
/* ensure 32-byte alignment of whole construct */
alloc_size += NETDEV_ALIGN - 1;
- p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!p)
return NULL;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 45ae6eeb2964..8c39283c26ae 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -16,10 +16,9 @@
* General list handling functions
*/
-static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
- const unsigned char *addr, int addr_len,
- unsigned char addr_type, bool global,
- bool sync)
+static struct netdev_hw_addr*
+__hw_addr_create(const unsigned char *addr, int addr_len,
+ unsigned char addr_type, bool global, bool sync)
{
struct netdev_hw_addr *ha;
int alloc_size;
@@ -29,32 +28,44 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
alloc_size = L1_CACHE_BYTES;
ha = kmalloc(alloc_size, GFP_ATOMIC);
if (!ha)
- return -ENOMEM;
+ return NULL;
memcpy(ha->addr, addr, addr_len);
ha->type = addr_type;
ha->refcount = 1;
ha->global_use = global;
ha->synced = sync ? 1 : 0;
ha->sync_cnt = 0;
- list_add_tail_rcu(&ha->list, &list->list);
- list->count++;
- return 0;
+ return ha;
}
static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
const unsigned char *addr, int addr_len,
unsigned char addr_type, bool global, bool sync,
- int sync_count)
+ int sync_count, bool exclusive)
{
+ struct rb_node **ins_point = &list->tree.rb_node, *parent = NULL;
struct netdev_hw_addr *ha;
if (addr_len > MAX_ADDR_LEN)
return -EINVAL;
- list_for_each_entry(ha, &list->list, list) {
- if (ha->type == addr_type &&
- !memcmp(ha->addr, addr, addr_len)) {
+ while (*ins_point) {
+ int diff;
+
+ ha = rb_entry(*ins_point, struct netdev_hw_addr, node);
+ diff = memcmp(addr, ha->addr, addr_len);
+ if (diff == 0)
+ diff = memcmp(&addr_type, &ha->type, sizeof(addr_type));
+
+ parent = *ins_point;
+ if (diff < 0) {
+ ins_point = &parent->rb_left;
+ } else if (diff > 0) {
+ ins_point = &parent->rb_right;
+ } else {
+ if (exclusive)
+ return -EEXIST;
if (global) {
/* check if addr is already used as global */
if (ha->global_use)
@@ -73,8 +84,25 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
}
}
- return __hw_addr_create_ex(list, addr, addr_len, addr_type, global,
- sync);
+ ha = __hw_addr_create(addr, addr_len, addr_type, global, sync);
+ if (!ha)
+ return -ENOMEM;
+
+ /* The first address in dev->dev_addrs is pointed to by dev->dev_addr
+ * and mutated freely by device drivers and netdev ops, so if we insert
+ * it into the tree we'll end up with an invalid rbtree.
+ */
+ if (list->count > 0) {
+ rb_link_node(&ha->node, parent, ins_point);
+ rb_insert_color(&ha->node, &list->tree);
+ } else {
+ RB_CLEAR_NODE(&ha->node);
+ }
+
+ list_add_tail_rcu(&ha->list, &list->list);
+ list->count++;
+
+ return 0;
}
static int __hw_addr_add(struct netdev_hw_addr_list *list,
@@ -82,7 +110,7 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list,
unsigned char addr_type)
{
return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false,
- 0);
+ 0, false);
}
static int __hw_addr_del_entry(struct netdev_hw_addr_list *list,
@@ -103,24 +131,61 @@ static int __hw_addr_del_entry(struct netdev_hw_addr_list *list,
if (--ha->refcount)
return 0;
+
+ if (!RB_EMPTY_NODE(&ha->node))
+ rb_erase(&ha->node, &list->tree);
+
list_del_rcu(&ha->list);
kfree_rcu(ha, rcu_head);
list->count--;
return 0;
}
+static struct netdev_hw_addr *__hw_addr_lookup(struct netdev_hw_addr_list *list,
+ const unsigned char *addr, int addr_len,
+ unsigned char addr_type)
+{
+ struct netdev_hw_addr *ha;
+ struct rb_node *node;
+
+ /* The first address isn't inserted into the tree because in the dev->dev_addrs
+ * list it's the address pointed to by dev->dev_addr which is freely mutated
+ * in place, so we need to check it separately.
+ */
+ ha = list_first_entry(&list->list, struct netdev_hw_addr, list);
+ if (ha && !memcmp(addr, ha->addr, addr_len) &&
+ (!addr_type || addr_type == ha->type))
+ return ha;
+
+ node = list->tree.rb_node;
+
+ while (node) {
+ struct netdev_hw_addr *ha = rb_entry(node, struct netdev_hw_addr, node);
+ int diff = memcmp(addr, ha->addr, addr_len);
+
+ if (diff == 0 && addr_type)
+ diff = memcmp(&addr_type, &ha->type, sizeof(addr_type));
+
+ if (diff < 0)
+ node = node->rb_left;
+ else if (diff > 0)
+ node = node->rb_right;
+ else
+ return ha;
+ }
+
+ return NULL;
+}
+
static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
const unsigned char *addr, int addr_len,
unsigned char addr_type, bool global, bool sync)
{
- struct netdev_hw_addr *ha;
+ struct netdev_hw_addr *ha = __hw_addr_lookup(list, addr, addr_len, addr_type);
- list_for_each_entry(ha, &list->list, list) {
- if (!memcmp(ha->addr, addr, addr_len) &&
- (ha->type == addr_type || !addr_type))
- return __hw_addr_del_entry(list, ha, global, sync);
- }
- return -ENOENT;
+ if (!ha)
+ return -ENOENT;
+ return __hw_addr_del_entry(list, ha, global, sync);
}
static int __hw_addr_del(struct netdev_hw_addr_list *list,
@@ -137,7 +202,7 @@ static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list,
int err;
err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type,
- false, true, ha->sync_cnt);
+ false, true, ha->sync_cnt, false);
if (err && err != -EEXIST)
return err;
@@ -407,6 +472,7 @@ static void __hw_addr_flush(struct netdev_hw_addr_list *list)
{
struct netdev_hw_addr *ha, *tmp;
+ list->tree = RB_ROOT;
list_for_each_entry_safe(ha, tmp, &list->list, list) {
list_del_rcu(&ha->list);
kfree_rcu(ha, rcu_head);
@@ -418,6 +484,7 @@ void __hw_addr_init(struct netdev_hw_addr_list *list)
{
INIT_LIST_HEAD(&list->list);
list->count = 0;
+ list->tree = RB_ROOT;
}
EXPORT_SYMBOL(__hw_addr_init);
@@ -552,22 +619,14 @@ EXPORT_SYMBOL(dev_addr_del);
*/
int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr)
{
- struct netdev_hw_addr *ha;
int err;
netif_addr_lock_bh(dev);
- list_for_each_entry(ha, &dev->uc.list, list) {
- if (!memcmp(ha->addr, addr, dev->addr_len) &&
- ha->type == NETDEV_HW_ADDR_T_UNICAST) {
- err = -EEXIST;
- goto out;
- }
- }
- err = __hw_addr_create_ex(&dev->uc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_UNICAST, true, false);
+ err = __hw_addr_add_ex(&dev->uc, addr, dev->addr_len,
+ NETDEV_HW_ADDR_T_UNICAST, true, false,
+ 0, true);
if (!err)
__dev_set_rx_mode(dev);
-out:
netif_addr_unlock_bh(dev);
return err;
}
@@ -745,22 +804,14 @@ EXPORT_SYMBOL(dev_uc_init);
*/
int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr)
{
- struct netdev_hw_addr *ha;
int err;
netif_addr_lock_bh(dev);
- list_for_each_entry(ha, &dev->mc.list, list) {
- if (!memcmp(ha->addr, addr, dev->addr_len) &&
- ha->type == NETDEV_HW_ADDR_T_MULTICAST) {
- err = -EEXIST;
- goto out;
- }
- }
- err = __hw_addr_create_ex(&dev->mc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_MULTICAST, true, false);
+ err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
+ NETDEV_HW_ADDR_T_MULTICAST, true, false,
+ 0, true);
if (!err)
__dev_set_rx_mode(dev);
-out:
netif_addr_unlock_bh(dev);
return err;
}
@@ -773,7 +824,8 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
netif_addr_lock_bh(dev);
err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_MULTICAST, global, false, 0);
+ NETDEV_HW_ADDR_T_MULTICAST, global, false,
+ 0, false);
if (!err)
__dev_set_rx_mode(dev);
netif_addr_unlock_bh(dev);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 478d032f34ac..0e87237fd871 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -1,10 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kmod.h>
#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
#include <linux/net_tstamp.h>
#include <linux/wireless.h>
+#include <linux/if_bridge.h>
#include <net/dsa.h>
#include <net/wext.h>
@@ -25,79 +27,108 @@ static int dev_ifname(struct net *net, struct ifreq *ifr)
return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex);
}
-static gifconf_func_t *gifconf_list[NPROTO];
-
-/**
- * register_gifconf - register a SIOCGIF handler
- * @family: Address family
- * @gifconf: Function handler
- *
- * Register protocol dependent address dumping routines. The handler
- * that is passed must not be freed or reused until it has been replaced
- * by another handler.
- */
-int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
-{
- if (family >= NPROTO)
- return -EINVAL;
- gifconf_list[family] = gifconf;
- return 0;
-}
-EXPORT_SYMBOL(register_gifconf);
-
/*
* Perform a SIOCGIFCONF call. This structure will change
* size eventually, and there is nothing I can do about it.
* Thus we will need a 'compatibility mode'.
*/
-
-int dev_ifconf(struct net *net, struct ifconf *ifc, int size)
+int dev_ifconf(struct net *net, struct ifconf __user *uifc)
{
struct net_device *dev;
- char __user *pos;
- int len;
- int total;
- int i;
+ void __user *pos;
+ size_t size;
+ int len, total = 0, done;
- /*
- * Fetch the caller's info block.
- */
+ /* both the ifconf and the ifreq structures are slightly different */
+ if (in_compat_syscall()) {
+ struct compat_ifconf ifc32;
- pos = ifc->ifc_buf;
- len = ifc->ifc_len;
+ if (copy_from_user(&ifc32, uifc, sizeof(struct compat_ifconf)))
+ return -EFAULT;
- /*
- * Loop over the interfaces, and write an info block for each.
- */
+ pos = compat_ptr(ifc32.ifcbuf);
+ len = ifc32.ifc_len;
+ size = sizeof(struct compat_ifreq);
+ } else {
+ struct ifconf ifc;
+
+ if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
+ return -EFAULT;
- total = 0;
+ pos = ifc.ifc_buf;
+ len = ifc.ifc_len;
+ size = sizeof(struct ifreq);
+ }
+
+ /* Loop over the interfaces, and write an info block for each. */
+ rtnl_lock();
for_each_netdev(net, dev) {
- for (i = 0; i < NPROTO; i++) {
- if (gifconf_list[i]) {
- int done;
- if (!pos)
- done = gifconf_list[i](dev, NULL, 0, size);
- else
- done = gifconf_list[i](dev, pos + total,
- len - total, size);
- if (done < 0)
- return -EFAULT;
- total += done;
- }
+ if (!pos)
+ done = inet_gifconf(dev, NULL, 0, size);
+ else
+ done = inet_gifconf(dev, pos + total,
+ len - total, size);
+ if (done < 0) {
+ rtnl_unlock();
+ return -EFAULT;
}
+ total += done;
}
+ rtnl_unlock();
- /*
- * All done. Write the updated control block back to the caller.
- */
- ifc->ifc_len = total;
+ return put_user(total, &uifc->ifc_len);
+}
+
+static int dev_getifmap(struct net_device *dev, struct ifreq *ifr)
+{
+ struct ifmap *ifmap = &ifr->ifr_map;
+
+ if (in_compat_syscall()) {
+ struct compat_ifmap *cifmap = (struct compat_ifmap *)ifmap;
+
+ cifmap->mem_start = dev->mem_start;
+ cifmap->mem_end = dev->mem_end;
+ cifmap->base_addr = dev->base_addr;
+ cifmap->irq = dev->irq;
+ cifmap->dma = dev->dma;
+ cifmap->port = dev->if_port;
+
+ return 0;
+ }
+
+ ifmap->mem_start = dev->mem_start;
+ ifmap->mem_end = dev->mem_end;
+ ifmap->base_addr = dev->base_addr;
+ ifmap->irq = dev->irq;
+ ifmap->dma = dev->dma;
+ ifmap->port = dev->if_port;
- /*
- * Both BSD and Solaris return 0 here, so we do too.
- */
return 0;
}
+static int dev_setifmap(struct net_device *dev, struct ifreq *ifr)
+{
+ struct compat_ifmap *cifmap = (struct compat_ifmap *)&ifr->ifr_map;
+
+ if (!dev->netdev_ops->ndo_set_config)
+ return -EOPNOTSUPP;
+
+ if (in_compat_syscall()) {
+ struct ifmap ifmap = {
+ .mem_start = cifmap->mem_start,
+ .mem_end = cifmap->mem_end,
+ .base_addr = cifmap->base_addr,
+ .irq = cifmap->irq,
+ .dma = cifmap->dma,
+ .port = cifmap->port,
+ };
+
+ return dev->netdev_ops->ndo_set_config(dev, &ifmap);
+ }
+
+ return dev->netdev_ops->ndo_set_config(dev, &ifr->ifr_map);
+}
+
/*
* Perform the SIOCxIFxxx calls, inside rcu_read_lock()
*/
@@ -128,13 +159,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
break;
case SIOCGIFMAP:
- ifr->ifr_map.mem_start = dev->mem_start;
- ifr->ifr_map.mem_end = dev->mem_end;
- ifr->ifr_map.base_addr = dev->base_addr;
- ifr->ifr_map.irq = dev->irq;
- ifr->ifr_map.dma = dev->dma;
- ifr->ifr_map.port = dev->if_port;
- return 0;
+ return dev_getifmap(dev, ifr);
case SIOCGIFINDEX:
ifr->ifr_ifindex = dev->ifindex;
@@ -215,19 +240,19 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
return 0;
}
-static int dev_do_ioctl(struct net_device *dev,
- struct ifreq *ifr, unsigned int cmd)
+static int dev_eth_ioctl(struct net_device *dev,
+ struct ifreq *ifr, unsigned int cmd)
{
const struct net_device_ops *ops = dev->netdev_ops;
int err;
- err = dsa_ndo_do_ioctl(dev, ifr, cmd);
+ err = dsa_ndo_eth_ioctl(dev, ifr, cmd);
if (err == 0 || err != -EOPNOTSUPP)
return err;
- if (ops->ndo_do_ioctl) {
+ if (ops->ndo_eth_ioctl) {
if (netif_device_present(dev))
- err = ops->ndo_do_ioctl(dev, ifr, cmd);
+ err = ops->ndo_eth_ioctl(dev, ifr, cmd);
else
err = -ENODEV;
}
@@ -235,10 +260,55 @@ static int dev_do_ioctl(struct net_device *dev,
return err;
}
+static int dev_siocbond(struct net_device *dev,
+ struct ifreq *ifr, unsigned int cmd)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (ops->ndo_siocbond) {
+ if (netif_device_present(dev))
+ return ops->ndo_siocbond(dev, ifr, cmd);
+ else
+ return -ENODEV;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int dev_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, unsigned int cmd)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (ops->ndo_siocdevprivate) {
+ if (netif_device_present(dev))
+ return ops->ndo_siocdevprivate(dev, ifr, data, cmd);
+ else
+ return -ENODEV;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int dev_siocwandev(struct net_device *dev, struct if_settings *ifs)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (ops->ndo_siocwandev) {
+ if (netif_device_present(dev))
+ return ops->ndo_siocwandev(dev, ifs);
+ else
+ return -ENODEV;
+ }
+
+ return -EOPNOTSUPP;
+}
+
/*
* Perform the SIOCxIFxxx calls, inside rtnl_lock()
*/
-static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
+static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data,
+ unsigned int cmd)
{
int err;
struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
@@ -275,12 +345,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
return 0;
case SIOCSIFMAP:
- if (ops->ndo_set_config) {
- if (!netif_device_present(dev))
- return -ENODEV;
- return ops->ndo_set_config(dev, &ifr->ifr_map);
- }
- return -EOPNOTSUPP;
+ return dev_setifmap(dev, ifr);
case SIOCADDMULTI:
if (!ops->ndo_set_rx_mode ||
@@ -307,6 +372,22 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
ifr->ifr_newname[IFNAMSIZ-1] = '\0';
return dev_change_name(dev, ifr->ifr_newname);
+ case SIOCWANDEV:
+ return dev_siocwandev(dev, &ifr->ifr_settings);
+
+ case SIOCBRADDIF:
+ case SIOCBRDELIF:
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ if (!netif_is_bridge_master(dev))
+ return -EOPNOTSUPP;
+ dev_hold(dev);
+ rtnl_unlock();
+ err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL);
+ dev_put(dev);
+ rtnl_lock();
+ return err;
+
case SIOCSHWTSTAMP:
err = net_hwtstamp_validate(ifr);
if (err)
@@ -317,23 +398,23 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
* Unknown or private ioctl
*/
default:
- if ((cmd >= SIOCDEVPRIVATE &&
- cmd <= SIOCDEVPRIVATE + 15) ||
- cmd == SIOCBONDENSLAVE ||
+ if (cmd >= SIOCDEVPRIVATE &&
+ cmd <= SIOCDEVPRIVATE + 15)
+ return dev_siocdevprivate(dev, ifr, data, cmd);
+
+ if (cmd == SIOCGMIIPHY ||
+ cmd == SIOCGMIIREG ||
+ cmd == SIOCSMIIREG ||
+ cmd == SIOCSHWTSTAMP ||
+ cmd == SIOCGHWTSTAMP) {
+ err = dev_eth_ioctl(dev, ifr, cmd);
+ } else if (cmd == SIOCBONDENSLAVE ||
cmd == SIOCBONDRELEASE ||
cmd == SIOCBONDSETHWADDR ||
cmd == SIOCBONDSLAVEINFOQUERY ||
cmd == SIOCBONDINFOQUERY ||
- cmd == SIOCBONDCHANGEACTIVE ||
- cmd == SIOCGMIIPHY ||
- cmd == SIOCGMIIREG ||
- cmd == SIOCSMIIREG ||
- cmd == SIOCBRADDIF ||
- cmd == SIOCBRDELIF ||
- cmd == SIOCSHWTSTAMP ||
- cmd == SIOCGHWTSTAMP ||
- cmd == SIOCWANDEV) {
- err = dev_do_ioctl(dev, ifr, cmd);
+ cmd == SIOCBONDCHANGEACTIVE) {
+ err = dev_siocbond(dev, ifr, cmd);
} else
err = -EINVAL;
@@ -386,7 +467,8 @@ EXPORT_SYMBOL(dev_load);
* positive or a negative errno code on error.
*/
-int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_copyout)
+int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr,
+ void __user *data, bool *need_copyout)
{
int ret;
char *colon;
@@ -437,7 +519,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
case SIOCETHTOOL:
dev_load(net, ifr->ifr_name);
rtnl_lock();
- ret = dev_ethtool(net, ifr);
+ ret = dev_ethtool(net, ifr, data);
rtnl_unlock();
if (colon)
*colon = ':';
@@ -456,7 +538,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
rtnl_lock();
- ret = dev_ifsioc(net, ifr, cmd);
+ ret = dev_ifsioc(net, ifr, data, cmd);
rtnl_unlock();
if (colon)
*colon = ':';
@@ -502,7 +584,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
case SIOCBONDINFOQUERY:
dev_load(net, ifr->ifr_name);
rtnl_lock();
- ret = dev_ifsioc(net, ifr, cmd);
+ ret = dev_ifsioc(net, ifr, data, cmd);
rtnl_unlock();
if (need_copyout)
*need_copyout = false;
@@ -527,7 +609,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
cmd <= SIOCDEVPRIVATE + 15)) {
dev_load(net, ifr->ifr_name);
rtnl_lock();
- ret = dev_ifsioc(net, ifr, cmd);
+ ret = dev_ifsioc(net, ifr, data, cmd);
rtnl_unlock();
return ret;
}
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 85032626de24..a856ae401ea5 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -92,7 +92,8 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
DEVLINK_PORT_FN_STATE_ACTIVE),
};
-static LIST_HEAD(devlink_list);
+static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
+#define DEVLINK_REGISTERED XA_MARK_1
/* devlink_mutex
*
@@ -108,23 +109,23 @@ struct net *devlink_net(const struct devlink *devlink)
}
EXPORT_SYMBOL_GPL(devlink_net);
-static void __devlink_net_set(struct devlink *devlink, struct net *net)
+static void devlink_put(struct devlink *devlink)
{
- write_pnet(&devlink->_net, net);
+ if (refcount_dec_and_test(&devlink->refcount))
+ complete(&devlink->comp);
}
-void devlink_net_set(struct devlink *devlink, struct net *net)
+static bool __must_check devlink_try_get(struct devlink *devlink)
{
- if (WARN_ON(devlink->registered))
- return;
- __devlink_net_set(devlink, net);
+ return refcount_inc_not_zero(&devlink->refcount);
}
-EXPORT_SYMBOL_GPL(devlink_net_set);
static struct devlink *devlink_get_from_attrs(struct net *net,
struct nlattr **attrs)
{
struct devlink *devlink;
+ unsigned long index;
+ bool found = false;
char *busname;
char *devname;
@@ -136,19 +137,19 @@ static struct devlink *devlink_get_from_attrs(struct net *net,
lockdep_assert_held(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
if (strcmp(devlink->dev->bus->name, busname) == 0 &&
strcmp(dev_name(devlink->dev), devname) == 0 &&
- net_eq(devlink_net(devlink), net))
- return devlink;
+ net_eq(devlink_net(devlink), net)) {
+ found = true;
+ break;
+ }
}
- return ERR_PTR(-ENODEV);
-}
+ if (!found || !devlink_try_get(devlink))
+ devlink = ERR_PTR(-ENODEV);
-static struct devlink *devlink_get_from_info(struct genl_info *info)
-{
- return devlink_get_from_attrs(genl_info_net(info), info->attrs);
+ return devlink;
}
static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
@@ -499,7 +500,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
int err;
mutex_lock(&devlink_mutex);
- devlink = devlink_get_from_info(info);
+ devlink = devlink_get_from_attrs(genl_info_net(info), info->attrs);
if (IS_ERR(devlink)) {
mutex_unlock(&devlink_mutex);
return PTR_ERR(devlink);
@@ -542,6 +543,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
unlock:
if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
mutex_unlock(&devlink_mutex);
return err;
}
@@ -554,6 +556,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
devlink = info->user_ptr[0];
if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
mutex_unlock(&devlink_mutex);
}
@@ -817,10 +820,11 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
return 0;
}
-static int
-devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *ops,
- struct devlink_port *port, struct sk_buff *msg,
- struct netlink_ext_ack *extack, bool *msg_updated)
+static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops,
+ struct devlink_port *port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
{
u8 hw_addr[MAX_ADDR_LEN];
int hw_addr_len;
@@ -829,7 +833,8 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *
if (!ops->port_function_hw_addr_get)
return 0;
- err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack);
+ err = ops->port_function_hw_addr_get(port, hw_addr, &hw_addr_len,
+ extack);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
@@ -843,12 +848,11 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *
}
static int devlink_nl_rate_fill(struct sk_buff *msg,
- struct devlink *devlink,
struct devlink_rate *devlink_rate,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags,
- struct netlink_ext_ack *extack)
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags, struct netlink_ext_ack *extack)
{
+ struct devlink *devlink = devlink_rate->devlink;
void *hdr;
hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
@@ -906,12 +910,11 @@ devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate)
opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED;
}
-static int
-devlink_port_fn_state_fill(struct devlink *devlink,
- const struct devlink_ops *ops,
- struct devlink_port *port, struct sk_buff *msg,
- struct netlink_ext_ack *extack,
- bool *msg_updated)
+static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
+ struct devlink_port *port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
{
enum devlink_port_fn_opstate opstate;
enum devlink_port_fn_state state;
@@ -920,7 +923,7 @@ devlink_port_fn_state_fill(struct devlink *devlink,
if (!ops->port_fn_state_get)
return 0;
- err = ops->port_fn_state_get(devlink, port, &state, &opstate, extack);
+ err = ops->port_fn_state_get(port, &state, &opstate, extack);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
@@ -948,7 +951,6 @@ static int
devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
struct netlink_ext_ack *extack)
{
- struct devlink *devlink = port->devlink;
const struct devlink_ops *ops;
struct nlattr *function_attr;
bool msg_updated = false;
@@ -958,13 +960,12 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
if (!function_attr)
return -EMSGSIZE;
- ops = devlink->ops;
- err = devlink_port_fn_hw_addr_fill(devlink, ops, port, msg,
- extack, &msg_updated);
+ ops = port->devlink->ops;
+ err = devlink_port_fn_hw_addr_fill(ops, port, msg, extack,
+ &msg_updated);
if (err)
goto out;
- err = devlink_port_fn_state_fill(devlink, ops, port, msg, extack,
- &msg_updated);
+ err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated);
out:
if (err || !msg_updated)
nla_nest_cancel(msg, function_attr);
@@ -973,12 +974,12 @@ out:
return err;
}
-static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
+static int devlink_nl_port_fill(struct sk_buff *msg,
struct devlink_port *devlink_port,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags,
- struct netlink_ext_ack *extack)
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags, struct netlink_ext_ack *extack)
{
+ struct devlink *devlink = devlink_port->devlink;
void *hdr;
hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
@@ -1039,53 +1040,47 @@ nla_put_failure:
static void devlink_port_notify(struct devlink_port *devlink_port,
enum devlink_command cmd)
{
- struct devlink *devlink = devlink_port->devlink;
struct sk_buff *msg;
int err;
- if (!devlink_port->registered)
- return;
-
WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return;
- err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0,
- NULL);
+ err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL);
if (err) {
nlmsg_free(msg);
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family,
+ devlink_net(devlink_port->devlink), msg, 0,
+ DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
static void devlink_rate_notify(struct devlink_rate *devlink_rate,
enum devlink_command cmd)
{
- struct devlink *devlink = devlink_rate->devlink;
struct sk_buff *msg;
int err;
- WARN_ON(cmd != DEVLINK_CMD_RATE_NEW &&
- cmd != DEVLINK_CMD_RATE_DEL);
+ WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL);
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return;
- err = devlink_nl_rate_fill(msg, devlink, devlink_rate,
- cmd, 0, 0, 0, NULL);
+ err = devlink_nl_rate_fill(msg, devlink_rate, cmd, 0, 0, 0, NULL);
if (err) {
nlmsg_free(msg);
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family,
+ devlink_net(devlink_rate->devlink), msg, 0,
+ DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
@@ -1094,13 +1089,18 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
struct devlink_rate *devlink_rate;
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err = 0;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
@@ -1110,18 +1110,19 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
idx++;
continue;
}
- err = devlink_nl_rate_fill(msg, devlink,
- devlink_rate,
- cmd, id,
+ err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id,
cb->nlh->nlmsg_seq,
NLM_F_MULTI, NULL);
if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -1136,7 +1137,6 @@ static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink_rate *devlink_rate = info->user_ptr[1];
- struct devlink *devlink = devlink_rate->devlink;
struct sk_buff *msg;
int err;
@@ -1144,8 +1144,7 @@ static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb,
if (!msg)
return -ENOMEM;
- err = devlink_nl_rate_fill(msg, devlink, devlink_rate,
- DEVLINK_CMD_RATE_NEW,
+ err = devlink_nl_rate_fill(msg, devlink_rate, DEVLINK_CMD_RATE_NEW,
info->snd_portid, info->snd_seq, 0,
info->extack);
if (err) {
@@ -1193,20 +1192,30 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
{
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) {
+ devlink_put(devlink);
+ continue;
+ }
+
if (idx < start) {
idx++;
+ devlink_put(devlink);
continue;
}
+
err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI);
+ devlink_put(devlink);
if (err)
goto out;
idx++;
@@ -1222,7 +1231,6 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = devlink_port->devlink;
struct sk_buff *msg;
int err;
@@ -1230,8 +1238,7 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
if (!msg)
return -ENOMEM;
- err = devlink_nl_port_fill(msg, devlink, devlink_port,
- DEVLINK_CMD_PORT_NEW,
+ err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_PORT_NEW,
info->snd_portid, info->snd_seq, 0,
info->extack);
if (err) {
@@ -1248,32 +1255,39 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
struct devlink *devlink;
struct devlink_port *devlink_port;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(devlink_port, &devlink->port_list, list) {
if (idx < start) {
idx++;
continue;
}
- err = devlink_nl_port_fill(msg, devlink, devlink_port,
+ err = devlink_nl_port_fill(msg, devlink_port,
DEVLINK_CMD_NEW,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- NLM_F_MULTI,
- cb->extack);
+ NLM_F_MULTI, cb->extack);
if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -1282,31 +1296,33 @@ out:
return msg->len;
}
-static int devlink_port_type_set(struct devlink *devlink,
- struct devlink_port *devlink_port,
+static int devlink_port_type_set(struct devlink_port *devlink_port,
enum devlink_port_type port_type)
{
int err;
- if (devlink->ops->port_type_set) {
- if (port_type == devlink_port->type)
- return 0;
- err = devlink->ops->port_type_set(devlink_port, port_type);
- if (err)
- return err;
- devlink_port->desired_type = port_type;
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+ if (!devlink_port->devlink->ops->port_type_set)
+ return -EOPNOTSUPP;
+
+ if (port_type == devlink_port->type)
return 0;
- }
- return -EOPNOTSUPP;
+
+ err = devlink_port->devlink->ops->port_type_set(devlink_port,
+ port_type);
+ if (err)
+ return err;
+
+ devlink_port->desired_type = port_type;
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+ return 0;
}
-static int
-devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *port,
- const struct nlattr *attr, struct netlink_ext_ack *extack)
+static int devlink_port_function_hw_addr_set(struct devlink_port *port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
{
- const struct devlink_ops *ops;
+ const struct devlink_ops *ops = port->devlink->ops;
const u8 *hw_addr;
int hw_addr_len;
@@ -1327,17 +1343,16 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *
}
}
- ops = devlink->ops;
if (!ops->port_function_hw_addr_set) {
NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes");
return -EOPNOTSUPP;
}
- return ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack);
+ return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len,
+ extack);
}
-static int devlink_port_fn_state_set(struct devlink *devlink,
- struct devlink_port *port,
+static int devlink_port_fn_state_set(struct devlink_port *port,
const struct nlattr *attr,
struct netlink_ext_ack *extack)
{
@@ -1345,18 +1360,18 @@ static int devlink_port_fn_state_set(struct devlink *devlink,
const struct devlink_ops *ops;
state = nla_get_u8(attr);
- ops = devlink->ops;
+ ops = port->devlink->ops;
if (!ops->port_fn_state_set) {
NL_SET_ERR_MSG_MOD(extack,
"Function does not support state setting");
return -EOPNOTSUPP;
}
- return ops->port_fn_state_set(devlink, port, state, extack);
+ return ops->port_fn_state_set(port, state, extack);
}
-static int
-devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
- const struct nlattr *attr, struct netlink_ext_ack *extack)
+static int devlink_port_function_set(struct devlink_port *port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1];
int err;
@@ -1370,7 +1385,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
if (attr) {
- err = devlink_port_function_hw_addr_set(devlink, port, attr, extack);
+ err = devlink_port_function_hw_addr_set(port, attr, extack);
if (err)
return err;
}
@@ -1380,7 +1395,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
*/
attr = tb[DEVLINK_PORT_FN_ATTR_STATE];
if (attr)
- err = devlink_port_fn_state_set(devlink, port, attr, extack);
+ err = devlink_port_fn_state_set(port, attr, extack);
if (!err)
devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
@@ -1391,14 +1406,13 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = devlink_port->devlink;
int err;
if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
enum devlink_port_type port_type;
port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]);
- err = devlink_port_type_set(devlink, devlink_port, port_type);
+ err = devlink_port_type_set(devlink_port, port_type);
if (err)
return err;
}
@@ -1407,7 +1421,7 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION];
struct netlink_ext_ack *extack = info->extack;
- err = devlink_port_function_set(devlink, devlink_port, attr, extack);
+ err = devlink_port_function_set(devlink_port, attr, extack);
if (err)
return err;
}
@@ -1502,9 +1516,8 @@ static int devlink_port_new_notifiy(struct devlink *devlink,
goto out;
}
- err = devlink_nl_port_fill(msg, devlink, devlink_port,
- DEVLINK_CMD_NEW, info->snd_portid,
- info->snd_seq, 0, NULL);
+ err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW,
+ info->snd_portid, info->snd_seq, 0, NULL);
if (err)
goto out;
@@ -1908,13 +1921,18 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
struct devlink *devlink;
struct devlink_sb *devlink_sb;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
if (idx < start) {
@@ -1928,11 +1946,14 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
NLM_F_MULTI);
if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -2052,14 +2073,19 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
struct devlink *devlink;
struct devlink_sb *devlink_sb;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err = 0;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
+ continue;
+
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
!devlink->ops->sb_pool_get)
- continue;
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
err = __sb_pool_get_dumpit(msg, start, &idx, devlink,
@@ -2070,10 +2096,13 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
err = 0;
} else if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -2265,14 +2294,19 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
struct devlink *devlink;
struct devlink_sb *devlink_sb;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err = 0;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
+ continue;
+
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
!devlink->ops->sb_port_pool_get)
- continue;
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
err = __sb_port_pool_get_dumpit(msg, start, &idx,
@@ -2283,10 +2317,13 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
err = 0;
} else if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -2506,14 +2543,18 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
struct devlink *devlink;
struct devlink_sb *devlink_sb;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err = 0;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
+ continue;
+
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
!devlink->ops->sb_tc_pool_bind_get)
- continue;
+ goto retry;
mutex_lock(&devlink->lock);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
@@ -2526,10 +2567,13 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
err = 0;
} else if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -3801,10 +3845,12 @@ static void devlink_param_notify(struct devlink *devlink,
struct devlink_param_item *param_item,
enum devlink_command cmd);
-static void devlink_reload_netns_change(struct devlink *devlink,
- struct net *dest_net)
+static void devlink_ns_change_notify(struct devlink *devlink,
+ struct net *dest_net, struct net *curr_net,
+ bool new)
{
struct devlink_param_item *param_item;
+ enum devlink_command cmd;
/* Userspace needs to be notified about devlink objects
* removed from original and entering new network namespace.
@@ -3812,17 +3858,18 @@ static void devlink_reload_netns_change(struct devlink *devlink,
* reload process so the notifications are generated separatelly.
*/
- list_for_each_entry(param_item, &devlink->param_list, list)
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_DEL);
- devlink_notify(devlink, DEVLINK_CMD_DEL);
+ if (!dest_net || net_eq(dest_net, curr_net))
+ return;
- __devlink_net_set(devlink, dest_net);
+ if (new)
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
- devlink_notify(devlink, DEVLINK_CMD_NEW);
+ cmd = new ? DEVLINK_CMD_PARAM_NEW : DEVLINK_CMD_PARAM_DEL;
list_for_each_entry(param_item, &devlink->param_list, list)
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_NEW);
+ devlink_param_notify(devlink, 0, param_item, cmd);
+
+ if (!new)
+ devlink_notify(devlink, DEVLINK_CMD_DEL);
}
static bool devlink_reload_supported(const struct devlink_ops *ops)
@@ -3902,6 +3949,7 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
u32 *actions_performed, struct netlink_ext_ack *extack)
{
u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+ struct net *curr_net;
int err;
if (!devlink->reload_enabled)
@@ -3909,18 +3957,22 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
sizeof(remote_reload_stats));
+
+ curr_net = devlink_net(devlink);
+ devlink_ns_change_notify(devlink, dest_net, curr_net, false);
err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack);
if (err)
return err;
- if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
- devlink_reload_netns_change(devlink, dest_net);
+ if (dest_net && !net_eq(dest_net, curr_net))
+ write_pnet(&devlink->_net, dest_net);
err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
devlink_reload_failed_set(devlink, !!err);
if (err)
return err;
+ devlink_ns_change_notify(devlink, dest_net, curr_net, true);
WARN_ON(!(*actions_performed & BIT(action)));
/* Catch driver on updating the remote action within devlink reload */
WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats,
@@ -4117,7 +4169,7 @@ out_free_msg:
static void devlink_flash_update_begin_notify(struct devlink *devlink)
{
- struct devlink_flash_notify params = { 0 };
+ struct devlink_flash_notify params = {};
__devlink_flash_update_notify(devlink,
DEVLINK_CMD_FLASH_UPDATE,
@@ -4126,7 +4178,7 @@ static void devlink_flash_update_begin_notify(struct devlink *devlink)
static void devlink_flash_update_end_notify(struct devlink *devlink)
{
- struct devlink_flash_notify params = { 0 };
+ struct devlink_flash_notify params = {};
__devlink_flash_update_notify(devlink,
DEVLINK_CMD_FLASH_UPDATE_END,
@@ -4283,6 +4335,21 @@ static const struct devlink_param devlink_param_generic[] = {
.name = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME,
.type = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE,
},
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE,
+ },
};
static int devlink_param_generic_verify(const struct devlink_param *param)
@@ -4553,13 +4620,18 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
struct devlink_param_item *param_item;
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err = 0;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(param_item, &devlink->param_list, list) {
if (idx < start) {
@@ -4575,11 +4647,14 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
err = 0;
} else if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -4821,13 +4896,18 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
struct devlink_port *devlink_port;
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err = 0;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(devlink_port, &devlink->port_list, list) {
list_for_each_entry(param_item,
@@ -4847,12 +4927,15 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
err = 0;
} else if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -5062,7 +5145,6 @@ static void devlink_nl_region_notify(struct devlink_region *region,
struct devlink_snapshot *snapshot,
enum devlink_command cmd)
{
- struct devlink *devlink = region->devlink;
struct sk_buff *msg;
WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
@@ -5071,8 +5153,9 @@ static void devlink_nl_region_notify(struct devlink_region *region,
if (IS_ERR(msg))
return;
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family,
+ devlink_net(region->devlink), msg, 0,
+ DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
/**
@@ -5390,15 +5473,22 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
{
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
- int err;
+ int err = 0;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink,
&idx, start);
+retry:
+ devlink_put(devlink);
if (err)
goto out;
}
@@ -5761,6 +5851,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
nla_nest_end(skb, chunks_attr);
genlmsg_end(skb, hdr);
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
mutex_unlock(&devlink_mutex);
return skb->len;
@@ -5769,6 +5860,7 @@ nla_put_failure:
genlmsg_cancel(skb, hdr);
out_unlock:
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
out_dev:
mutex_unlock(&devlink_mutex);
return err;
@@ -5915,22 +6007,20 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
{
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err = 0;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
- if (idx < start) {
- idx++;
- continue;
- }
- if (!devlink->ops->info_get) {
- idx++;
- continue;
- }
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
+ if (idx < start || !devlink->ops->info_get)
+ goto inc;
mutex_lock(&devlink->lock);
err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
@@ -5940,9 +6030,14 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
mutex_unlock(&devlink->lock);
if (err == -EOPNOTSUPP)
err = 0;
- else if (err)
+ else if (err) {
+ devlink_put(devlink);
break;
+ }
+inc:
idx++;
+retry:
+ devlink_put(devlink);
}
mutex_unlock(&devlink_mutex);
@@ -6756,11 +6851,11 @@ EXPORT_SYMBOL_GPL(devlink_port_health_reporter_destroy);
static int
devlink_nl_health_reporter_fill(struct sk_buff *msg,
- struct devlink *devlink,
struct devlink_health_reporter *reporter,
enum devlink_command cmd, u32 portid,
u32 seq, int flags)
{
+ struct devlink *devlink = reporter->devlink;
struct nlattr *reporter_attr;
void *hdr;
@@ -6837,8 +6932,7 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter,
if (!msg)
return;
- err = devlink_nl_health_reporter_fill(msg, reporter->devlink,
- reporter, cmd, 0, 0, 0);
+ err = devlink_nl_health_reporter_fill(msg, reporter, cmd, 0, 0, 0);
if (err) {
nlmsg_free(msg);
return;
@@ -7028,6 +7122,7 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
goto unlock;
reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
+ devlink_put(devlink);
mutex_unlock(&devlink_mutex);
return reporter;
unlock:
@@ -7071,7 +7166,7 @@ static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
goto out;
}
- err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
+ err = devlink_nl_health_reporter_fill(msg, reporter,
DEVLINK_CMD_HEALTH_REPORTER_GET,
info->snd_portid, info->snd_seq,
0);
@@ -7094,13 +7189,18 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
struct devlink_port *port;
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry_rep;
+
mutex_lock(&devlink->reporters_lock);
list_for_each_entry(reporter, &devlink->reporter_list,
list) {
@@ -7108,24 +7208,29 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
idx++;
continue;
}
- err = devlink_nl_health_reporter_fill(msg, devlink,
- reporter,
- DEVLINK_CMD_HEALTH_REPORTER_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ err = devlink_nl_health_reporter_fill(
+ msg, reporter, DEVLINK_CMD_HEALTH_REPORTER_GET,
+ NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
if (err) {
mutex_unlock(&devlink->reporters_lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
mutex_unlock(&devlink->reporters_lock);
+retry_rep:
+ devlink_put(devlink);
}
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry_port;
+
mutex_lock(&devlink->lock);
list_for_each_entry(port, &devlink->port_list, list) {
mutex_lock(&port->reporters_lock);
@@ -7134,14 +7239,15 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
idx++;
continue;
}
- err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
- DEVLINK_CMD_HEALTH_REPORTER_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ err = devlink_nl_health_reporter_fill(
+ msg, reporter,
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI);
if (err) {
mutex_unlock(&port->reporters_lock);
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
@@ -7149,6 +7255,8 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
mutex_unlock(&port->reporters_lock);
}
mutex_unlock(&devlink->lock);
+retry_port:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -7677,13 +7785,18 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
struct devlink_trap_item *trap_item;
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(trap_item, &devlink->trap_list, list) {
if (idx < start) {
@@ -7697,11 +7810,14 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
NLM_F_MULTI);
if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -7896,13 +8012,18 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
u32 portid = NETLINK_CB(cb->skb).portid;
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(group_item, &devlink->trap_group_list,
list) {
@@ -7917,11 +8038,14 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
NLM_F_MULTI);
if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -8202,13 +8326,18 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
u32 portid = NETLINK_CB(cb->skb).portid;
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(policer_item, &devlink->trap_policer_list,
list) {
@@ -8223,11 +8352,14 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
NLM_F_MULTI);
if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -8768,30 +8900,44 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops)
}
/**
- * devlink_alloc - Allocate new devlink instance resources
+ * devlink_alloc_ns - Allocate new devlink instance resources
+ * in specific namespace
*
* @ops: ops
* @priv_size: size of user private data
+ * @net: net namespace
+ * @dev: parent device
*
* Allocate new devlink instance resources, including devlink index
* and name.
*/
-struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
+struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
+ size_t priv_size, struct net *net,
+ struct device *dev)
{
struct devlink *devlink;
+ static u32 last_id;
+ int ret;
- if (WARN_ON(!ops))
- return NULL;
-
+ WARN_ON(!ops || !dev);
if (!devlink_reload_actions_valid(ops))
return NULL;
devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
if (!devlink)
return NULL;
+
+ ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b,
+ &last_id, GFP_KERNEL);
+ if (ret < 0) {
+ kfree(devlink);
+ return NULL;
+ }
+
+ devlink->dev = dev;
devlink->ops = ops;
xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
- __devlink_net_set(devlink, &init_net);
+ write_pnet(&devlink->_net, net);
INIT_LIST_HEAD(&devlink->port_list);
INIT_LIST_HEAD(&devlink->rate_list);
INIT_LIST_HEAD(&devlink->sb_list);
@@ -8805,22 +8951,22 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
INIT_LIST_HEAD(&devlink->trap_policer_list);
mutex_init(&devlink->lock);
mutex_init(&devlink->reporters_lock);
+ refcount_set(&devlink->refcount, 1);
+ init_completion(&devlink->comp);
+
return devlink;
}
-EXPORT_SYMBOL_GPL(devlink_alloc);
+EXPORT_SYMBOL_GPL(devlink_alloc_ns);
/**
* devlink_register - Register devlink instance
*
* @devlink: devlink
- * @dev: parent device
*/
-int devlink_register(struct devlink *devlink, struct device *dev)
+int devlink_register(struct devlink *devlink)
{
- devlink->dev = dev;
- devlink->registered = true;
mutex_lock(&devlink_mutex);
- list_add_tail(&devlink->list, &devlink_list);
+ xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
devlink_notify(devlink, DEVLINK_CMD_NEW);
mutex_unlock(&devlink_mutex);
return 0;
@@ -8834,11 +8980,14 @@ EXPORT_SYMBOL_GPL(devlink_register);
*/
void devlink_unregister(struct devlink *devlink)
{
+ devlink_put(devlink);
+ wait_for_completion(&devlink->comp);
+
mutex_lock(&devlink_mutex);
WARN_ON(devlink_reload_supported(devlink->ops) &&
devlink->reload_enabled);
devlink_notify(devlink, DEVLINK_CMD_DEL);
- list_del(&devlink->list);
+ xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
mutex_unlock(&devlink_mutex);
}
EXPORT_SYMBOL_GPL(devlink_unregister);
@@ -8900,6 +9049,7 @@ void devlink_free(struct devlink *devlink)
WARN_ON(!list_empty(&devlink->port_list));
xa_destroy(&devlink->snapshot_ids);
+ xa_erase(&devlinks, devlink->index);
kfree(devlink);
}
@@ -8960,9 +9110,10 @@ int devlink_port_register(struct devlink *devlink,
mutex_unlock(&devlink->lock);
return -EEXIST;
}
+
+ WARN_ON(devlink_port->devlink);
devlink_port->devlink = devlink;
devlink_port->index = port_index;
- devlink_port->registered = true;
spin_lock_init(&devlink_port->type_lock);
INIT_LIST_HEAD(&devlink_port->reporter_list);
mutex_init(&devlink_port->reporters_lock);
@@ -9001,7 +9152,7 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port,
enum devlink_port_type type,
void *type_dev)
{
- if (WARN_ON(!devlink_port->registered))
+ if (WARN_ON(!devlink_port->devlink))
return;
devlink_port_type_warn_cancel(devlink_port);
spin_lock_bh(&devlink_port->type_lock);
@@ -9121,7 +9272,7 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port,
{
int ret;
- if (WARN_ON(devlink_port->registered))
+ if (WARN_ON(devlink_port->devlink))
return;
devlink_port->attrs = *attrs;
ret = __devlink_port_attrs_set(devlink_port, attrs->flavour);
@@ -9145,7 +9296,7 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
- if (WARN_ON(devlink_port->registered))
+ if (WARN_ON(devlink_port->devlink))
return;
ret = __devlink_port_attrs_set(devlink_port,
DEVLINK_PORT_FLAVOUR_PCI_PF);
@@ -9172,7 +9323,7 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
- if (WARN_ON(devlink_port->registered))
+ if (WARN_ON(devlink_port->devlink))
return;
ret = __devlink_port_attrs_set(devlink_port,
DEVLINK_PORT_FLAVOUR_PCI_VF);
@@ -9200,7 +9351,7 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
- if (WARN_ON(devlink_port->registered))
+ if (WARN_ON(devlink_port->devlink))
return;
ret = __devlink_port_attrs_set(devlink_port,
DEVLINK_PORT_FLAVOUR_PCI_SF);
@@ -9788,6 +9939,22 @@ static int devlink_param_verify(const struct devlink_param *param)
return devlink_param_driver_verify(param);
}
+static int __devlink_param_register_one(struct devlink *devlink,
+ unsigned int port_index,
+ struct list_head *param_list,
+ const struct devlink_param *param,
+ enum devlink_command reg_cmd)
+{
+ int err;
+
+ err = devlink_param_verify(param);
+ if (err)
+ return err;
+
+ return devlink_param_register_one(devlink, port_index,
+ param_list, param, reg_cmd);
+}
+
static int __devlink_params_register(struct devlink *devlink,
unsigned int port_index,
struct list_head *param_list,
@@ -9802,12 +9969,8 @@ static int __devlink_params_register(struct devlink *devlink,
mutex_lock(&devlink->lock);
for (i = 0; i < params_count; i++, param++) {
- err = devlink_param_verify(param);
- if (err)
- goto rollback;
-
- err = devlink_param_register_one(devlink, port_index,
- param_list, param, reg_cmd);
+ err = __devlink_param_register_one(devlink, port_index,
+ param_list, param, reg_cmd);
if (err)
goto rollback;
}
@@ -9880,6 +10043,43 @@ void devlink_params_unregister(struct devlink *devlink,
EXPORT_SYMBOL_GPL(devlink_params_unregister);
/**
+ * devlink_param_register - register one configuration parameter
+ *
+ * @devlink: devlink
+ * @param: one configuration parameter
+ *
+ * Register the configuration parameter supported by the driver.
+ * Return: returns 0 on successful registration or error code otherwise.
+ */
+int devlink_param_register(struct devlink *devlink,
+ const struct devlink_param *param)
+{
+ int err;
+
+ mutex_lock(&devlink->lock);
+ err = __devlink_param_register_one(devlink, 0, &devlink->param_list,
+ param, DEVLINK_CMD_PARAM_NEW);
+ mutex_unlock(&devlink->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_param_register);
+
+/**
+ * devlink_param_unregister - unregister one configuration parameter
+ * @devlink: devlink
+ * @param: configuration parameter to unregister
+ */
+void devlink_param_unregister(struct devlink *devlink,
+ const struct devlink_param *param)
+{
+ mutex_lock(&devlink->lock);
+ devlink_param_unregister_one(devlink, 0, &devlink->param_list, param,
+ DEVLINK_CMD_PARAM_DEL);
+ mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_param_unregister);
+
+/**
* devlink_params_publish - publish configuration parameters
*
* @devlink: devlink
@@ -9922,6 +10122,54 @@ void devlink_params_unpublish(struct devlink *devlink)
EXPORT_SYMBOL_GPL(devlink_params_unpublish);
/**
+ * devlink_param_publish - publish one configuration parameter
+ *
+ * @devlink: devlink
+ * @param: one configuration parameter
+ *
+ * Publish previously registered configuration parameter.
+ */
+void devlink_param_publish(struct devlink *devlink,
+ const struct devlink_param *param)
+{
+ struct devlink_param_item *param_item;
+
+ list_for_each_entry(param_item, &devlink->param_list, list) {
+ if (param_item->param != param || param_item->published)
+ continue;
+ param_item->published = true;
+ devlink_param_notify(devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_NEW);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(devlink_param_publish);
+
+/**
+ * devlink_param_unpublish - unpublish one configuration parameter
+ *
+ * @devlink: devlink
+ * @param: one configuration parameter
+ *
+ * Unpublish previously registered configuration parameter.
+ */
+void devlink_param_unpublish(struct devlink *devlink,
+ const struct devlink_param *param)
+{
+ struct devlink_param_item *param_item;
+
+ list_for_each_entry(param_item, &devlink->param_list, list) {
+ if (param_item->param != param || !param_item->published)
+ continue;
+ param_item->published = false;
+ devlink_param_notify(devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_DEL);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(devlink_param_unpublish);
+
+/**
* devlink_port_params_register - register port configuration parameters
*
* @devlink_port: devlink port
@@ -11276,23 +11524,29 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
{
struct devlink *devlink;
u32 actions_performed;
+ unsigned long index;
int err;
/* In case network namespace is getting destroyed, reload
* all devlink instances from this namespace into init_net.
*/
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (net_eq(devlink_net(devlink), net)) {
- if (WARN_ON(!devlink_reload_supported(devlink->ops)))
- continue;
- err = devlink_reload(devlink, &init_net,
- DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
- DEVLINK_RELOAD_LIMIT_UNSPEC,
- &actions_performed, NULL);
- if (err && err != -EOPNOTSUPP)
- pr_warn("Failed to reload devlink instance into init_net\n");
- }
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
+ continue;
+
+ if (!net_eq(devlink_net(devlink), net))
+ goto retry;
+
+ WARN_ON(!devlink_reload_supported(devlink->ops));
+ err = devlink_reload(devlink, &init_net,
+ DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+ DEVLINK_RELOAD_LIMIT_UNSPEC,
+ &actions_performed, NULL);
+ if (err && err != -EOPNOTSUPP)
+ pr_warn("Failed to reload devlink instance into init_net\n");
+retry:
+ devlink_put(devlink);
}
mutex_unlock(&devlink_mutex);
}
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index ead2a8aa57b4..49442cae6f69 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -850,8 +850,7 @@ net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata)
}
hw_metadata->input_dev = metadata->input_dev;
- if (hw_metadata->input_dev)
- dev_hold(hw_metadata->input_dev);
+ dev_hold(hw_metadata->input_dev);
return hw_metadata;
@@ -867,8 +866,7 @@ free_hw_metadata:
static void
net_dm_hw_metadata_free(const struct devlink_trap_metadata *hw_metadata)
{
- if (hw_metadata->input_dev)
- dev_put(hw_metadata->input_dev);
+ dev_put(hw_metadata->input_dev);
kfree(hw_metadata->fa_cookie);
kfree(hw_metadata->trap_name);
kfree(hw_metadata->trap_group_name);
diff --git a/net/core/dst.c b/net/core/dst.c
index fb3bcba87744..497ef9b3fc6a 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -49,8 +49,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
unsigned short flags)
{
dst->dev = dev;
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
dst->ops = ops;
dst_init_metrics(dst, dst_default_metrics.metrics, true);
dst->expires = 0UL;
@@ -118,8 +117,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
if (dst->ops->destroy)
dst->ops->destroy(dst);
- if (dst->dev)
- dev_put(dst->dev);
+ dev_put(dst->dev);
lwtstate_put(dst->lwtstate);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index a9f937975080..79df7cd9dbc1 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -57,7 +57,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
{
struct fib_rule *r;
- r = kzalloc(ops->rule_size, GFP_KERNEL);
+ r = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
if (r == NULL)
return -ENOMEM;
@@ -541,7 +541,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
- nlrule = kzalloc(ops->rule_size, GFP_KERNEL);
+ nlrule = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
if (!nlrule) {
err = -ENOMEM;
goto errout;
diff --git a/net/core/filter.c b/net/core/filter.c
index d70187ce851b..2e32cee2c469 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -77,6 +77,7 @@
#include <net/transp_v6.h>
#include <linux/btf_ids.h>
#include <net/tls.h>
+#include <net/xdp.h>
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@ -113,7 +114,7 @@ EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
* Run the eBPF program and then cut skb->data to correct size returned by
* the program. If pkt_len is 0 we toss packet. If skb->len is smaller
* than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
+ * wrapper to bpf_prog_run. It returns 0 if the packet should
* be accepted or -EPERM if the packet should be tossed.
*
*/
@@ -2179,17 +2180,9 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
skb->tstamp = 0;
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
- struct sk_buff *skb2;
-
- skb2 = skb_realloc_headroom(skb, hh_len);
- if (unlikely(!skb2)) {
- kfree_skb(skb);
+ skb = skb_expand_head(skb, hh_len);
+ if (!skb)
return -ENOMEM;
- }
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
- consume_skb(skb);
- skb = skb2;
}
rcu_read_lock_bh();
@@ -2213,8 +2206,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
}
rcu_read_unlock_bh();
if (dst)
- IP6_INC_STATS(dev_net(dst->dev),
- ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
out_drop:
kfree_skb(skb);
return -ENETDOWN;
@@ -2286,17 +2278,9 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
skb->tstamp = 0;
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
- struct sk_buff *skb2;
-
- skb2 = skb_realloc_headroom(skb, hh_len);
- if (unlikely(!skb2)) {
- kfree_skb(skb);
+ skb = skb_expand_head(skb, hh_len);
+ if (!skb)
return -ENOMEM;
- }
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
- consume_skb(skb);
- skb = skb2;
}
rcu_read_lock_bh();
@@ -3880,8 +3864,7 @@ BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
if (unlikely(meta < xdp_frame_end ||
meta > xdp->data))
return -EINVAL;
- if (unlikely((metalen & (sizeof(__u32) - 1)) ||
- (metalen > 32)))
+ if (unlikely(xdp_metalen_invalid(metalen)))
return -EACCES;
xdp->data_meta = meta;
@@ -3950,6 +3933,31 @@ void bpf_clear_redirect_map(struct bpf_map *map)
}
}
+DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
+EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
+
+u32 xdp_master_redirect(struct xdp_buff *xdp)
+{
+ struct net_device *master, *slave;
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+ master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
+ slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
+ if (slave && slave != xdp->rxq->dev) {
+ /* The target device is different from the receiving device, so
+ * redirect it to the new device.
+ * Using XDP_REDIRECT gets the correct behaviour from XDP enabled
+ * drivers to unmap the packet from their rx ring.
+ */
+ ri->tgt_index = slave->ifindex;
+ ri->map_id = INT_MAX;
+ ri->map_type = BPF_MAP_TYPE_UNSPEC;
+ return XDP_REDIRECT;
+ }
+ return XDP_TX;
+}
+EXPORT_SYMBOL_GPL(xdp_master_redirect);
+
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
@@ -4040,8 +4048,12 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
goto err;
consume_skb(skb);
break;
+ case BPF_MAP_TYPE_CPUMAP:
+ err = cpu_map_generic_redirect(fwd, skb);
+ if (unlikely(err))
+ goto err;
+ break;
default:
- /* TODO: Handle BPF_MAP_TYPE_CPUMAP */
err = -EBADRQC;
goto err;
}
@@ -4664,6 +4676,30 @@ static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = {
.arg1_type = ARG_PTR_TO_CTX_OR_NULL,
};
+BPF_CALL_1(bpf_get_netns_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
+{
+ return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
+}
+
+static const struct bpf_func_proto bpf_get_netns_cookie_sock_ops_proto = {
+ .func = bpf_get_netns_cookie_sock_ops,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
+};
+
+BPF_CALL_1(bpf_get_netns_cookie_sk_msg, struct sk_msg *, ctx)
+{
+ return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
+}
+
+static const struct bpf_func_proto bpf_get_netns_cookie_sk_msg_proto = {
+ .func = bpf_get_netns_cookie_sk_msg,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
+};
+
BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
{
struct sock *sk = sk_to_full_sk(skb->sk);
@@ -5012,6 +5048,46 @@ err_clear:
return -EINVAL;
}
+BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
+ int, optname, char *, optval, int, optlen)
+{
+ if (level == SOL_TCP && optname == TCP_CONGESTION) {
+ if (optlen >= sizeof("cdg") - 1 &&
+ !strncmp("cdg", optval, optlen))
+ return -ENOTSUPP;
+ }
+
+ return _bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_sk_setsockopt_proto = {
+ .func = bpf_sk_setsockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(bpf_sk_getsockopt, struct sock *, sk, int, level,
+ int, optname, char *, optval, int, optlen)
+{
+ return _bpf_getsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_sk_getsockopt_proto = {
+ .func = bpf_sk_getsockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
int, level, int, optname, char *, optval, int, optlen)
{
@@ -7445,6 +7521,8 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
+ case BPF_FUNC_get_netns_cookie:
+ return &bpf_get_netns_cookie_sock_ops_proto;
#ifdef CONFIG_INET
case BPF_FUNC_load_hdr_opt:
return &bpf_sock_ops_load_hdr_opt_proto;
@@ -7491,6 +7569,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
+ case BPF_FUNC_get_netns_cookie:
+ return &bpf_get_netns_cookie_sk_msg_proto;
#ifdef CONFIG_CGROUPS
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
@@ -10069,7 +10149,7 @@ struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
enum sk_action action;
bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, migrating_sk, hash);
- action = BPF_PROG_RUN(prog, &reuse_kern);
+ action = bpf_prog_run(prog, &reuse_kern);
if (action == SK_PASS)
return reuse_kern.selected_sk;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 4b2415d34873..bac0184cf3de 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1056,8 +1056,10 @@ proto_again:
FLOW_DISSECTOR_KEY_IPV4_ADDRS,
target_container);
- memcpy(&key_addrs->v4addrs, &iph->saddr,
- sizeof(key_addrs->v4addrs));
+ memcpy(&key_addrs->v4addrs.src, &iph->saddr,
+ sizeof(key_addrs->v4addrs.src));
+ memcpy(&key_addrs->v4addrs.dst, &iph->daddr,
+ sizeof(key_addrs->v4addrs.dst));
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
}
@@ -1101,8 +1103,10 @@ proto_again:
FLOW_DISSECTOR_KEY_IPV6_ADDRS,
target_container);
- memcpy(&key_addrs->v6addrs, &iph->saddr,
- sizeof(key_addrs->v6addrs));
+ memcpy(&key_addrs->v6addrs.src, &iph->saddr,
+ sizeof(key_addrs->v6addrs.src));
+ memcpy(&key_addrs->v6addrs.dst, &iph->daddr,
+ sizeof(key_addrs->v6addrs.dst));
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 715b67f6c62f..6beaea13564a 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -321,13 +321,13 @@ EXPORT_SYMBOL(flow_block_cb_setup_simple);
static DEFINE_MUTEX(flow_indr_block_lock);
static LIST_HEAD(flow_block_indr_list);
static LIST_HEAD(flow_block_indr_dev_list);
+static LIST_HEAD(flow_indir_dev_list);
struct flow_indr_dev {
struct list_head list;
flow_indr_block_bind_cb_t *cb;
void *cb_priv;
refcount_t refcnt;
- struct rcu_head rcu;
};
static struct flow_indr_dev *flow_indr_dev_alloc(flow_indr_block_bind_cb_t *cb,
@@ -346,6 +346,33 @@ static struct flow_indr_dev *flow_indr_dev_alloc(flow_indr_block_bind_cb_t *cb,
return indr_dev;
}
+struct flow_indir_dev_info {
+ void *data;
+ struct net_device *dev;
+ struct Qdisc *sch;
+ enum tc_setup_type type;
+ void (*cleanup)(struct flow_block_cb *block_cb);
+ struct list_head list;
+ enum flow_block_command command;
+ enum flow_block_binder_type binder_type;
+ struct list_head *cb_list;
+};
+
+static void existing_qdiscs_register(flow_indr_block_bind_cb_t *cb, void *cb_priv)
+{
+ struct flow_block_offload bo;
+ struct flow_indir_dev_info *cur;
+
+ list_for_each_entry(cur, &flow_indir_dev_list, list) {
+ memset(&bo, 0, sizeof(bo));
+ bo.command = cur->command;
+ bo.binder_type = cur->binder_type;
+ INIT_LIST_HEAD(&bo.cb_list);
+ cb(cur->dev, cur->sch, cb_priv, cur->type, &bo, cur->data, cur->cleanup);
+ list_splice(&bo.cb_list, cur->cb_list);
+ }
+}
+
int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv)
{
struct flow_indr_dev *indr_dev;
@@ -367,6 +394,7 @@ int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv)
}
list_add(&indr_dev->list, &flow_block_indr_dev_list);
+ existing_qdiscs_register(cb, cb_priv);
mutex_unlock(&flow_indr_block_lock);
return 0;
@@ -463,7 +491,59 @@ out:
}
EXPORT_SYMBOL(flow_indr_block_cb_alloc);
-int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
+static struct flow_indir_dev_info *find_indir_dev(void *data)
+{
+ struct flow_indir_dev_info *cur;
+
+ list_for_each_entry(cur, &flow_indir_dev_list, list) {
+ if (cur->data == data)
+ return cur;
+ }
+ return NULL;
+}
+
+static int indir_dev_add(void *data, struct net_device *dev, struct Qdisc *sch,
+ enum tc_setup_type type, void (*cleanup)(struct flow_block_cb *block_cb),
+ struct flow_block_offload *bo)
+{
+ struct flow_indir_dev_info *info;
+
+ info = find_indir_dev(data);
+ if (info)
+ return -EEXIST;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ info->data = data;
+ info->dev = dev;
+ info->sch = sch;
+ info->type = type;
+ info->cleanup = cleanup;
+ info->command = bo->command;
+ info->binder_type = bo->binder_type;
+ info->cb_list = bo->cb_list_head;
+
+ list_add(&info->list, &flow_indir_dev_list);
+ return 0;
+}
+
+static int indir_dev_remove(void *data)
+{
+ struct flow_indir_dev_info *info;
+
+ info = find_indir_dev(data);
+ if (!info)
+ return -ENOENT;
+
+ list_del(&info->list);
+
+ kfree(info);
+ return 0;
+}
+
+int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
enum tc_setup_type type, void *data,
struct flow_block_offload *bo,
void (*cleanup)(struct flow_block_cb *block_cb))
@@ -471,6 +551,12 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
struct flow_indr_dev *this;
mutex_lock(&flow_indr_block_lock);
+
+ if (bo->command == FLOW_BLOCK_BIND)
+ indir_dev_add(data, dev, sch, type, cleanup, bo);
+ else if (bo->command == FLOW_BLOCK_UNBIND)
+ indir_dev_remove(data);
+
list_for_each_entry(this, &flow_block_indr_dev_list, list)
this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 75431ca9300f..1a455847da54 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -158,7 +158,7 @@ static void linkwatch_do_dev(struct net_device *dev)
clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
rfc2863_policy(dev);
- if (dev->flags & IFF_UP && netif_device_present(dev)) {
+ if (dev->flags & IFF_UP) {
if (netif_carrier_ok(dev))
dev_activate(dev);
else
@@ -204,7 +204,8 @@ static void __linkwatch_run_queue(int urgent_only)
dev = list_first_entry(&wrk, struct net_device, link_watch_list);
list_del_init(&dev->link_watch_list);
- if (urgent_only && !linkwatch_urgent_event(dev)) {
+ if (!netif_device_present(dev) ||
+ (urgent_only && !linkwatch_urgent_event(dev))) {
list_add_tail(&dev->link_watch_list, &lweventlist);
continue;
}
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 8ec7d13d2860..2820aca2173a 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -23,6 +23,9 @@
#include <net/ip6_fib.h>
#include <net/rtnh.h>
+DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
+EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);
+
#ifdef CONFIG_MODULES
static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
@@ -43,6 +46,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
return "SEG6LOCAL";
case LWTUNNEL_ENCAP_RPL:
return "RPL";
+ case LWTUNNEL_ENCAP_IOAM6:
+ return "IOAM6";
case LWTUNNEL_ENCAP_IP6:
case LWTUNNEL_ENCAP_IP:
case LWTUNNEL_ENCAP_NONE:
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 53e85c70c6e5..2d5bc3a75fae 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -741,12 +741,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
write_pnet(&n->net, net);
memcpy(n->key, pkey, key_len);
n->dev = dev;
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
if (tbl->pconstructor && tbl->pconstructor(n)) {
- if (dev)
- dev_put(dev);
+ dev_put(dev);
kfree(n);
n = NULL;
goto out;
@@ -778,8 +776,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
write_unlock_bh(&tbl->lock);
if (tbl->pdestructor)
tbl->pdestructor(n);
- if (n->dev)
- dev_put(n->dev);
+ dev_put(n->dev);
kfree(n);
return 0;
}
@@ -812,8 +809,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
n->next = NULL;
if (tbl->pdestructor)
tbl->pdestructor(n);
- if (n->dev)
- dev_put(n->dev);
+ dev_put(n->dev);
kfree(n);
}
return -ENOENT;
@@ -1662,8 +1658,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
list_del(&parms->list);
parms->dead = 1;
write_unlock_bh(&tbl->lock);
- if (parms->dev)
- dev_put(parms->dev);
+ dev_put(parms->dev);
call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
}
EXPORT_SYMBOL(neigh_parms_release);
@@ -2533,6 +2528,13 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx)
return false;
master = dev ? netdev_master_upper_dev_get(dev) : NULL;
+
+ /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
+ * invalid value for ifindex to denote "no master".
+ */
+ if (master_idx == -1)
+ return !!master;
+
if (!master || master->ifindex != master_idx)
return true;
@@ -3315,12 +3317,13 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
struct neigh_statistics *st = v;
if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
+ seq_puts(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
return 0;
}
- seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
- "%08lx %08lx %08lx %08lx %08lx %08lx\n",
+ seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
+ "%08lx %08lx %08lx "
+ "%08lx %08lx %08lx\n",
atomic_read(&tbl->entries),
st->allocs,
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index d8b9dbabd4a4..eab5fc88a002 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -77,8 +77,8 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
- seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
- "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
+ seq_printf(seq, "%9s: %16llu %12llu %4llu %6llu %4llu %5llu %10llu %9llu "
+ "%16llu %12llu %4llu %6llu %4llu %5llu %7llu %10llu\n",
dev->name, stats->rx_bytes, stats->rx_packets,
stats->rx_errors,
stats->rx_dropped + stats->rx_missed_errors,
@@ -103,11 +103,11 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
static int dev_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Inter-| Receive "
- " | Transmit\n"
- " face |bytes packets errs drop fifo frame "
- "compressed multicast|bytes packets errs "
- "drop fifo colls carrier compressed\n");
+ seq_puts(seq, "Interface| Receive "
+ " | Transmit\n"
+ " | bytes packets errs drop fifo frame "
+ "compressed multicast| bytes packets errs "
+ " drop fifo colls carrier compressed\n");
else
dev_seq_printf_stats(seq, v);
return 0;
@@ -259,14 +259,14 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
struct packet_type *pt = v;
if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Type Device Function\n");
+ seq_puts(seq, "Type Device Function\n");
else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
if (pt->type == htons(ETH_P_ALL))
seq_puts(seq, "ALL ");
else
seq_printf(seq, "%04x", ntohs(pt->type));
- seq_printf(seq, " %-8s %ps\n",
+ seq_printf(seq, " %-9s %ps\n",
pt->dev ? pt->dev->name : "", pt->func);
}
@@ -327,12 +327,14 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
struct netdev_hw_addr *ha;
struct net_device *dev = v;
- if (v == SEQ_START_TOKEN)
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq, "Ifindex Interface Refcount Global_use Address\n");
return 0;
+ }
netif_addr_lock_bh(dev);
netdev_for_each_mc_addr(ha, dev) {
- seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n",
+ seq_printf(seq, "%-7d %-9s %-8d %-10d %*phN\n",
dev->ifindex, dev->name,
ha->refcount, ha->global_use,
(int)dev->addr_len, ha->addr);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 9b5a767eddd5..a448a9b5bb2d 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -98,7 +98,7 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
}
ng = net_alloc_generic();
- if (ng == NULL)
+ if (!ng)
return -ENOMEM;
/*
@@ -148,13 +148,6 @@ out:
return err;
}
-static void ops_free(const struct pernet_operations *ops, struct net *net)
-{
- if (ops->id && ops->size) {
- kfree(net_generic(net, *ops->id));
- }
-}
-
static void ops_pre_exit_list(const struct pernet_operations *ops,
struct list_head *net_exit_list)
{
@@ -184,7 +177,7 @@ static void ops_free_list(const struct pernet_operations *ops,
struct net *net;
if (ops->size && ops->id) {
list_for_each_entry(net, net_exit_list, exit_list)
- ops_free(ops, net);
+ kfree(net_generic(net, *ops->id));
}
}
@@ -433,15 +426,18 @@ out_free:
static void net_free(struct net *net)
{
- kfree(rcu_access_pointer(net->gen));
- kmem_cache_free(net_cachep, net);
+ if (refcount_dec_and_test(&net->passive)) {
+ kfree(rcu_access_pointer(net->gen));
+ kmem_cache_free(net_cachep, net);
+ }
}
void net_drop_ns(void *p)
{
- struct net *ns = p;
- if (ns && refcount_dec_and_test(&ns->passive))
- net_free(ns);
+ struct net *net = (struct net *)p;
+
+ if (net)
+ net_free(net);
}
struct net *copy_net_ns(unsigned long flags,
@@ -479,7 +475,7 @@ struct net *copy_net_ns(unsigned long flags,
put_userns:
key_remove_domain(net->key_domain);
put_user_ns(user_ns);
- net_drop_ns(net);
+ net_free(net);
dec_ucounts:
dec_net_namespaces(ucounts);
return ERR_PTR(rv);
@@ -611,7 +607,7 @@ static void cleanup_net(struct work_struct *work)
dec_net_namespaces(net->ucounts);
key_remove_domain(net->key_domain);
put_user_ns(net->user_ns);
- net_drop_ns(net);
+ net_free(net);
}
}
@@ -1120,6 +1116,14 @@ static int __init net_ns_init(void)
pure_initcall(net_ns_init);
+static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
+{
+ ops_pre_exit_list(ops, net_exit_list);
+ synchronize_rcu();
+ ops_exit_list(ops, net_exit_list);
+ ops_free_list(ops, net_exit_list);
+}
+
#ifdef CONFIG_NET_NS
static int __register_pernet_operations(struct list_head *list,
struct pernet_operations *ops)
@@ -1145,10 +1149,7 @@ static int __register_pernet_operations(struct list_head *list,
out_undo:
/* If I have an error cleanup all namespaces I initialized */
list_del(&ops->list);
- ops_pre_exit_list(ops, &net_exit_list);
- synchronize_rcu();
- ops_exit_list(ops, &net_exit_list);
- ops_free_list(ops, &net_exit_list);
+ free_exit_list(ops, &net_exit_list);
return error;
}
@@ -1161,10 +1162,8 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
/* See comment in __register_pernet_operations() */
for_each_net(net)
list_add_tail(&net->exit_list, &net_exit_list);
- ops_pre_exit_list(ops, &net_exit_list);
- synchronize_rcu();
- ops_exit_list(ops, &net_exit_list);
- ops_free_list(ops, &net_exit_list);
+
+ free_exit_list(ops, &net_exit_list);
}
#else
@@ -1187,10 +1186,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
} else {
LIST_HEAD(net_exit_list);
list_add(&init_net.exit_list, &net_exit_list);
- ops_pre_exit_list(ops, &net_exit_list);
- synchronize_rcu();
- ops_exit_list(ops, &net_exit_list);
- ops_free_list(ops, &net_exit_list);
+ free_exit_list(ops, &net_exit_list);
}
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 5e4eb45b139c..1a6978427d6c 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -24,6 +24,8 @@
#define DEFER_TIME (msecs_to_jiffies(1000))
#define DEFER_WARN_INTERVAL (60 * HZ)
+#define BIAS_MAX LONG_MAX
+
static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params)
{
@@ -67,6 +69,10 @@ static int page_pool_init(struct page_pool *pool,
*/
}
+ if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
+ pool->p.flags & PP_FLAG_PAGE_FRAG)
+ return -EINVAL;
+
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
return -ENOMEM;
@@ -206,6 +212,19 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
return true;
}
+static void page_pool_set_pp_info(struct page_pool *pool,
+ struct page *page)
+{
+ page->pp = pool;
+ page->pp_magic |= PP_SIGNATURE;
+}
+
+static void page_pool_clear_pp_info(struct page *page)
+{
+ page->pp_magic = 0;
+ page->pp = NULL;
+}
+
static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
gfp_t gfp)
{
@@ -222,7 +241,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
return NULL;
}
- page->pp_magic |= PP_SIGNATURE;
+ page_pool_set_pp_info(pool, page);
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
@@ -266,7 +285,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
put_page(page);
continue;
}
- page->pp_magic |= PP_SIGNATURE;
+
+ page_pool_set_pp_info(pool, page);
pool->alloc.cache[pool->alloc.count++] = page;
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
@@ -345,12 +365,12 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
DMA_ATTR_SKIP_CPU_SYNC);
page_pool_set_dma_addr(page, 0);
skip_dma_unmap:
- page->pp_magic = 0;
+ page_pool_clear_pp_info(page);
/* This may be the last page returned, releasing the pool, so
* it is not safe to reference pool afterwards.
*/
- count = atomic_inc_return(&pool->pages_state_release_cnt);
+ count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
trace_page_pool_state_release(pool, page, count);
}
EXPORT_SYMBOL(page_pool_release_page);
@@ -405,6 +425,11 @@ static __always_inline struct page *
__page_pool_put_page(struct page_pool *pool, struct page *page,
unsigned int dma_sync_size, bool allow_direct)
{
+ /* It is not the last user for the page frag case */
+ if (pool->p.flags & PP_FLAG_PAGE_FRAG &&
+ page_pool_atomic_sub_frag_count_return(page, 1))
+ return NULL;
+
/* This allocator is optimized for the XDP mode that uses
* one-frame-per-page, but have fallbacks that act like the
* regular page allocator APIs.
@@ -497,6 +522,84 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
}
EXPORT_SYMBOL(page_pool_put_page_bulk);
+static struct page *page_pool_drain_frag(struct page_pool *pool,
+ struct page *page)
+{
+ long drain_count = BIAS_MAX - pool->frag_users;
+
+ /* Some user is still using the page frag */
+ if (likely(page_pool_atomic_sub_frag_count_return(page,
+ drain_count)))
+ return NULL;
+
+ if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
+ if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
+ page_pool_dma_sync_for_device(pool, page, -1);
+
+ return page;
+ }
+
+ page_pool_return_page(pool, page);
+ return NULL;
+}
+
+static void page_pool_free_frag(struct page_pool *pool)
+{
+ long drain_count = BIAS_MAX - pool->frag_users;
+ struct page *page = pool->frag_page;
+
+ pool->frag_page = NULL;
+
+ if (!page ||
+ page_pool_atomic_sub_frag_count_return(page, drain_count))
+ return;
+
+ page_pool_return_page(pool, page);
+}
+
+struct page *page_pool_alloc_frag(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int size, gfp_t gfp)
+{
+ unsigned int max_size = PAGE_SIZE << pool->p.order;
+ struct page *page = pool->frag_page;
+
+ if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
+ size > max_size))
+ return NULL;
+
+ size = ALIGN(size, dma_get_cache_alignment());
+ *offset = pool->frag_offset;
+
+ if (page && *offset + size > max_size) {
+ page = page_pool_drain_frag(pool, page);
+ if (page)
+ goto frag_reset;
+ }
+
+ if (!page) {
+ page = page_pool_alloc_pages(pool, gfp);
+ if (unlikely(!page)) {
+ pool->frag_page = NULL;
+ return NULL;
+ }
+
+ pool->frag_page = page;
+
+frag_reset:
+ pool->frag_users = 1;
+ *offset = 0;
+ pool->frag_offset = size;
+ page_pool_set_frag_count(page, BIAS_MAX);
+ return page;
+ }
+
+ pool->frag_users++;
+ pool->frag_offset = *offset + size;
+ return page;
+}
+EXPORT_SYMBOL(page_pool_alloc_frag);
+
static void page_pool_empty_ring(struct page_pool *pool)
{
struct page *page;
@@ -602,6 +705,8 @@ void page_pool_destroy(struct page_pool *pool)
if (!page_pool_put(pool))
return;
+ page_pool_free_frag(pool);
+
if (!page_pool_release(pool))
return;
@@ -634,7 +739,15 @@ bool page_pool_return_skb_page(struct page *page)
struct page_pool *pp;
page = compound_head(page);
- if (unlikely(page->pp_magic != PP_SIGNATURE))
+
+ /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
+ * in order to preserve any existing bits, such as bit 0 for the
+ * head page of compound page and bit 1 for pfmemalloc page, so
+ * mask those bits for freeing side when doing below checking,
+ * and page_is_pfmemalloc() is checked in __page_pool_put_page()
+ * to avoid recycling the pfmemalloc page.
+ */
+ if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
return false;
pp = page->pp;
@@ -644,7 +757,6 @@ bool page_pool_return_skb_page(struct page *page)
* The page will be returned to the pool here regardless of the
* 'flipped' fragment being in use or not.
*/
- page->pp = NULL;
page_pool_put_full_page(pp, page, false);
return true;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 7e258d255e90..9e5a3249373c 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -175,6 +175,9 @@
#define IP_NAME_SZ 32
#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
#define MPLS_STACK_BOTTOM htonl(0x00000100)
+/* Max number of internet mix entries that can be specified in imix_weights. */
+#define MAX_IMIX_ENTRIES 20
+#define IMIX_PRECISION 100 /* Precision of IMIX distribution */
#define func_enter() pr_debug("entering %s\n", __func__);
@@ -242,6 +245,12 @@ static char *pkt_flag_names[] = {
#define VLAN_TAG_SIZE(x) ((x)->vlan_id == 0xffff ? 0 : 4)
#define SVLAN_TAG_SIZE(x) ((x)->svlan_id == 0xffff ? 0 : 4)
+struct imix_pkt {
+ u64 size;
+ u64 weight;
+ u64 count_so_far;
+};
+
struct flow_state {
__be32 cur_daddr;
int count;
@@ -343,6 +352,12 @@ struct pktgen_dev {
__u8 traffic_class; /* ditto for the (former) Traffic Class in IPv6
(see RFC 3260, sec. 4) */
+ /* IMIX */
+ unsigned int n_imix_entries;
+ struct imix_pkt imix_entries[MAX_IMIX_ENTRIES];
+ /* Maps 0-IMIX_PRECISION range to imix_entry based on probability*/
+ __u8 imix_distribution[IMIX_PRECISION];
+
/* MPLS */
unsigned int nr_labels; /* Depth of stack, 0 = no MPLS */
__be32 labels[MAX_MPLS_LABELS];
@@ -471,6 +486,7 @@ static void pktgen_stop_all_threads(struct pktgen_net *pn);
static void pktgen_stop(struct pktgen_thread *t);
static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
+static void fill_imix_distribution(struct pktgen_dev *pkt_dev);
/* Module parameters, defaults. */
static int pg_count_d __read_mostly = 1000;
@@ -552,6 +568,16 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
(unsigned long long)pkt_dev->count, pkt_dev->min_pkt_size,
pkt_dev->max_pkt_size);
+ if (pkt_dev->n_imix_entries > 0) {
+ seq_puts(seq, " imix_weights: ");
+ for (i = 0; i < pkt_dev->n_imix_entries; i++) {
+ seq_printf(seq, "%llu,%llu ",
+ pkt_dev->imix_entries[i].size,
+ pkt_dev->imix_entries[i].weight);
+ }
+ seq_puts(seq, "\n");
+ }
+
seq_printf(seq,
" frags: %d delay: %llu clone_skb: %d ifname: %s\n",
pkt_dev->nfrags, (unsigned long long) pkt_dev->delay,
@@ -669,6 +695,18 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
(unsigned long long)pkt_dev->sofar,
(unsigned long long)pkt_dev->errors);
+ if (pkt_dev->n_imix_entries > 0) {
+ int i;
+
+ seq_puts(seq, " imix_size_counts: ");
+ for (i = 0; i < pkt_dev->n_imix_entries; i++) {
+ seq_printf(seq, "%llu,%llu ",
+ pkt_dev->imix_entries[i].size,
+ pkt_dev->imix_entries[i].count_so_far);
+ }
+ seq_puts(seq, "\n");
+ }
+
seq_printf(seq,
" started: %lluus stopped: %lluus idle: %lluus\n",
(unsigned long long) ktime_to_us(pkt_dev->started_at),
@@ -792,6 +830,62 @@ done_str:
return i;
}
+/* Parses imix entries from user buffer.
+ * The user buffer should consist of imix entries separated by spaces
+ * where each entry consists of size and weight delimited by commas.
+ * "size1,weight_1 size2,weight_2 ... size_n,weight_n" for example.
+ */
+static ssize_t get_imix_entries(const char __user *buffer,
+ struct pktgen_dev *pkt_dev)
+{
+ const int max_digits = 10;
+ int i = 0;
+ long len;
+ char c;
+
+ pkt_dev->n_imix_entries = 0;
+
+ do {
+ unsigned long weight;
+ unsigned long size;
+
+ len = num_arg(&buffer[i], max_digits, &size);
+ if (len < 0)
+ return len;
+ i += len;
+ if (get_user(c, &buffer[i]))
+ return -EFAULT;
+ /* Check for comma between size_i and weight_i */
+ if (c != ',')
+ return -EINVAL;
+ i++;
+
+ if (size < 14 + 20 + 8)
+ size = 14 + 20 + 8;
+
+ len = num_arg(&buffer[i], max_digits, &weight);
+ if (len < 0)
+ return len;
+ if (weight <= 0)
+ return -EINVAL;
+
+ pkt_dev->imix_entries[pkt_dev->n_imix_entries].size = size;
+ pkt_dev->imix_entries[pkt_dev->n_imix_entries].weight = weight;
+
+ i += len;
+ if (get_user(c, &buffer[i]))
+ return -EFAULT;
+
+ i++;
+ pkt_dev->n_imix_entries++;
+
+ if (pkt_dev->n_imix_entries > MAX_IMIX_ENTRIES)
+ return -E2BIG;
+ } while (c == ' ');
+
+ return i;
+}
+
static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
{
unsigned int n = 0;
@@ -960,6 +1054,20 @@ static ssize_t pktgen_if_write(struct file *file,
return count;
}
+ if (!strcmp(name, "imix_weights")) {
+ if (pkt_dev->clone_skb > 0)
+ return -EINVAL;
+
+ len = get_imix_entries(&user_buffer[i], pkt_dev);
+ if (len < 0)
+ return len;
+
+ fill_imix_distribution(pkt_dev);
+
+ i += len;
+ return count;
+ }
+
if (!strcmp(name, "debug")) {
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
@@ -1082,10 +1190,16 @@ static ssize_t pktgen_if_write(struct file *file,
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
return len;
+ /* clone_skb is not supported for netif_receive xmit_mode and
+ * IMIX mode.
+ */
if ((value > 0) &&
((pkt_dev->xmit_mode == M_NETIF_RECEIVE) ||
!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
return -ENOTSUPP;
+ if (value > 0 && pkt_dev->n_imix_entries > 0)
+ return -EINVAL;
+
i += len;
pkt_dev->clone_skb = value;
@@ -1190,11 +1304,6 @@ static ssize_t pktgen_if_write(struct file *file,
* pktgen_xmit() is called
*/
pkt_dev->last_ok = 1;
-
- /* override clone_skb if user passed default value
- * at module loading time
- */
- pkt_dev->clone_skb = 0;
} else if (strcmp(f, "queue_xmit") == 0) {
pkt_dev->xmit_mode = M_QUEUE_XMIT;
pkt_dev->last_ok = 1;
@@ -2477,6 +2586,14 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
t = pkt_dev->min_pkt_size;
}
pkt_dev->cur_pkt_size = t;
+ } else if (pkt_dev->n_imix_entries > 0) {
+ struct imix_pkt *entry;
+ __u32 t = prandom_u32() % IMIX_PRECISION;
+ __u8 entry_index = pkt_dev->imix_distribution[t];
+
+ entry = &pkt_dev->imix_entries[entry_index];
+ entry->count_so_far++;
+ pkt_dev->cur_pkt_size = entry->size;
}
set_cur_queue_map(pkt_dev);
@@ -2484,6 +2601,32 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
pkt_dev->flows[flow].count++;
}
+static void fill_imix_distribution(struct pktgen_dev *pkt_dev)
+{
+ int cumulative_probabilites[MAX_IMIX_ENTRIES];
+ int j = 0;
+ __u64 cumulative_prob = 0;
+ __u64 total_weight = 0;
+ int i = 0;
+
+ for (i = 0; i < pkt_dev->n_imix_entries; i++)
+ total_weight += pkt_dev->imix_entries[i].weight;
+
+ /* Fill cumulative_probabilites with sum of normalized probabilities */
+ for (i = 0; i < pkt_dev->n_imix_entries - 1; i++) {
+ cumulative_prob += div64_u64(pkt_dev->imix_entries[i].weight *
+ IMIX_PRECISION,
+ total_weight);
+ cumulative_probabilites[i] = cumulative_prob;
+ }
+ cumulative_probabilites[pkt_dev->n_imix_entries - 1] = 100;
+
+ for (i = 0; i < IMIX_PRECISION; i++) {
+ if (i == cumulative_probabilites[j])
+ j++;
+ pkt_dev->imix_distribution[i] = j;
+ }
+}
#ifdef CONFIG_XFRM
static u32 pktgen_dst_metrics[RTAX_MAX + 1] = {
@@ -3145,7 +3288,19 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
pps = div64_u64(pkt_dev->sofar * NSEC_PER_SEC,
ktime_to_ns(elapsed));
- bps = pps * 8 * pkt_dev->cur_pkt_size;
+ if (pkt_dev->n_imix_entries > 0) {
+ int i;
+ struct imix_pkt *entry;
+
+ bps = 0;
+ for (i = 0; i < pkt_dev->n_imix_entries; i++) {
+ entry = &pkt_dev->imix_entries[i];
+ bps += entry->size * entry->count_so_far;
+ }
+ bps = div64_u64(bps * 8 * NSEC_PER_SEC, ktime_to_ns(elapsed));
+ } else {
+ bps = pps * 8 * pkt_dev->cur_pkt_size;
+ }
mbps = bps;
do_div(mbps, 1000000);
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index e33fde06d528..dd4cf01d1e0a 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -103,7 +103,7 @@ static struct bpf_prog *ptp_insns __read_mostly;
unsigned int ptp_classify_raw(const struct sk_buff *skb)
{
- return BPF_PROG_RUN(ptp_insns, skb);
+ return bpf_prog_run(ptp_insns, skb);
}
EXPORT_SYMBOL_GPL(ptp_classify_raw);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f6af3e74fc44..972c8cb303a5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -710,15 +710,8 @@ out:
int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo)
{
struct sock *rtnl = net->rtnl;
- int err = 0;
- NETLINK_CB(skb).dst_group = group;
- if (echo)
- refcount_inc(&skb->users);
- netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
- if (echo)
- err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
- return err;
+ return nlmsg_notify(rtnl, skb, pid, group, echo, GFP_KERNEL);
}
int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
@@ -733,12 +726,8 @@ void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
struct nlmsghdr *nlh, gfp_t flags)
{
struct sock *rtnl = net->rtnl;
- int report = 0;
- if (nlh)
- report = nlmsg_report(nlh);
-
- nlmsg_notify(rtnl, skb, pid, group, report, flags);
+ nlmsg_notify(rtnl, skb, pid, group, nlmsg_report(nlh), flags);
}
EXPORT_SYMBOL(rtnl_notify);
@@ -1970,6 +1959,13 @@ static bool link_master_filtered(struct net_device *dev, int master_idx)
return false;
master = netdev_master_upper_dev_get(dev);
+
+ /* 0 is already used to denote IFLA_MASTER wasn't passed, therefore need
+ * another invalid value for ifindex to denote "no master".
+ */
+ if (master_idx == -1)
+ return !!master;
+
if (!master || master->ifindex != master_idx)
return true;
@@ -2268,7 +2264,8 @@ invalid_attr:
return -EINVAL;
}
-static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
+static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
{
if (dev) {
if (tb[IFLA_ADDRESS] &&
@@ -2295,7 +2292,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
return -EOPNOTSUPP;
if (af_ops->validate_link_af) {
- err = af_ops->validate_link_af(dev, af);
+ err = af_ops->validate_link_af(dev, af, extack);
if (err < 0)
return err;
}
@@ -2603,11 +2600,12 @@ static int do_setlink(const struct sk_buff *skb,
const struct net_device_ops *ops = dev->netdev_ops;
int err;
- err = validate_linkmsg(dev, tb);
+ err = validate_linkmsg(dev, tb, extack);
if (err < 0)
return err;
if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) {
+ const char *pat = ifname && ifname[0] ? ifname : NULL;
struct net *net;
int new_ifindex;
@@ -2623,7 +2621,7 @@ static int do_setlink(const struct sk_buff *skb,
else
new_ifindex = 0;
- err = __dev_change_net_namespace(dev, net, ifname, new_ifindex);
+ err = __dev_change_net_namespace(dev, net, pat, new_ifindex);
put_net(net);
if (err)
goto errout;
@@ -3301,7 +3299,7 @@ replay:
m_ops = master_dev->rtnl_link_ops;
}
- err = validate_linkmsg(dev, tb);
+ err = validate_linkmsg(dev, tb, extack);
if (err < 0)
return err;
diff --git a/net/core/scm.c b/net/core/scm.c
index ae3085d9aae8..5c356f0dee30 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -79,7 +79,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
if (!fpl)
{
- fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+ fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL_ACCOUNT);
if (!fpl)
return -ENOMEM;
*fplp = fpl;
@@ -355,7 +355,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
return NULL;
new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (new_fpl) {
for (i = 0; i < fpl->count; i++)
get_file(fpl->fp[i]);
diff --git a/net/core/selftests.c b/net/core/selftests.c
index ba7b0171974c..9077fa969892 100644
--- a/net/core/selftests.c
+++ b/net/core/selftests.c
@@ -318,6 +318,15 @@ static int net_test_phy_loopback_udp(struct net_device *ndev)
return __net_test_loopback(ndev, &attr);
}
+static int net_test_phy_loopback_udp_mtu(struct net_device *ndev)
+{
+ struct net_packet_attrs attr = { };
+
+ attr.dst = ndev->dev_addr;
+ attr.max_size = ndev->mtu;
+ return __net_test_loopback(ndev, &attr);
+}
+
static int net_test_phy_loopback_tcp(struct net_device *ndev)
{
struct net_packet_attrs attr = { };
@@ -345,6 +354,9 @@ static const struct net_test {
.name = "PHY internal loopback, UDP ",
.fn = net_test_phy_loopback_udp,
}, {
+ .name = "PHY internal loopback, MTU ",
+ .fn = net_test_phy_loopback_udp_mtu,
+ }, {
.name = "PHY internal loopback, TCP ",
.fn = net_test_phy_loopback_tcp,
}, {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fc7942c0dddc..f9311762cc47 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -156,7 +156,7 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
void *data;
fragsz = SKB_DATA_ALIGN(fragsz);
- if (in_irq() || irqs_disabled()) {
+ if (in_hardirq() || irqs_disabled()) {
nc = this_cpu_ptr(&netdev_alloc_cache);
data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask);
} else {
@@ -502,7 +502,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
- if (in_irq() || irqs_disabled()) {
+ if (in_hardirq() || irqs_disabled()) {
nc = this_cpu_ptr(&netdev_alloc_cache);
data = page_frag_alloc(nc, len, gfp_mask);
pfmemalloc = nc->pfmemalloc;
@@ -724,7 +724,7 @@ void skb_release_head_state(struct sk_buff *skb)
{
skb_dst_drop(skb);
if (skb->destructor) {
- WARN_ON(in_irq());
+ WARN_ON(in_hardirq());
skb->destructor(skb);
}
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
@@ -954,9 +954,13 @@ void __kfree_skb_defer(struct sk_buff *skb)
void napi_skb_free_stolen_head(struct sk_buff *skb)
{
- nf_reset_ct(skb);
- skb_dst_drop(skb);
- skb_ext_put(skb);
+ if (unlikely(skb->slow_gro)) {
+ nf_reset_ct(skb);
+ skb_dst_drop(skb);
+ skb_ext_put(skb);
+ skb_orphan(skb);
+ skb->slow_gro = 0;
+ }
napi_skb_cache_put(skb);
}
@@ -1786,6 +1790,48 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
EXPORT_SYMBOL(skb_realloc_headroom);
/**
+ * skb_expand_head - reallocate header of &sk_buff
+ * @skb: buffer to reallocate
+ * @headroom: needed headroom
+ *
+ * Unlike skb_realloc_headroom, this one does not allocate a new skb
+ * if possible; copies skb->sk to new skb as needed
+ * and frees original skb in case of failures.
+ *
+ * It expect increased headroom and generates warning otherwise.
+ */
+
+struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
+{
+ int delta = headroom - skb_headroom(skb);
+
+ if (WARN_ONCE(delta <= 0,
+ "%s is expecting an increase in the headroom", __func__))
+ return skb;
+
+ /* pskb_expand_head() might crash, if skb is shared */
+ if (skb_shared(skb)) {
+ struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+ if (likely(nskb)) {
+ if (skb->sk)
+ skb_set_owner_w(nskb, skb->sk);
+ consume_skb(skb);
+ } else {
+ kfree_skb(skb);
+ }
+ skb = nskb;
+ }
+ if (skb &&
+ pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
+ kfree_skb(skb);
+ skb = NULL;
+ }
+ return skb;
+}
+EXPORT_SYMBOL(skb_expand_head);
+
+/**
* skb_copy_expand - copy and expand sk_buff
* @skb: buffer to copy
* @newheadroom: new free bytes at head
@@ -3889,6 +3935,9 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
NAPI_GRO_CB(p)->last = skb;
NAPI_GRO_CB(p)->count++;
p->data_len += skb->len;
+
+ /* sk owenrship - if any - completely transferred to the aggregated packet */
+ skb->destructor = NULL;
p->truesize += skb->truesize;
p->len += skb->len;
@@ -4256,6 +4305,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
unsigned int headlen = skb_headlen(skb);
unsigned int len = skb_gro_len(skb);
unsigned int delta_truesize;
+ unsigned int new_truesize;
struct sk_buff *lp;
if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush))
@@ -4287,10 +4337,10 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
skb_frag_size_sub(frag, offset);
/* all fragments truesize : remove (head size + sk_buff) */
- delta_truesize = skb->truesize -
- SKB_TRUESIZE(skb_end_offset(skb));
+ new_truesize = SKB_TRUESIZE(skb_end_offset(skb));
+ delta_truesize = skb->truesize - new_truesize;
- skb->truesize -= skb->data_len;
+ skb->truesize = new_truesize;
skb->len -= skb->data_len;
skb->data_len = 0;
@@ -4319,12 +4369,16 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
/* We dont need to clear skbinfo->nr_frags here */
- delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
+ new_truesize = SKB_DATA_ALIGN(sizeof(struct sk_buff));
+ delta_truesize = skb->truesize - new_truesize;
+ skb->truesize = new_truesize;
NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
goto done;
}
merge:
+ /* sk owenrship - if any - completely transferred to the aggregated packet */
+ skb->destructor = NULL;
delta_truesize = skb->truesize;
if (offset > headlen) {
unsigned int eat = offset - headlen;
@@ -6449,6 +6503,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
new->chunks = newlen;
new->offset[id] = newoff;
set_active:
+ skb->slow_gro = 1;
skb->extensions = new;
skb->active_extensions |= 1 << id;
return skb_ext_get_ptr(new, id);
diff --git a/net/core/sock.c b/net/core/sock.c
index a3eea6e0b30a..62627e868e03 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -226,6 +226,7 @@ static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \
x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \
x "AF_QIPCRTR", x "AF_SMC" , x "AF_XDP" , \
+ x "AF_MCTP" , \
x "AF_MAX"
static const char *const af_family_key_strings[AF_MAX+1] = {
@@ -1357,6 +1358,15 @@ set_sndbuf:
ret = sock_bindtoindex_locked(sk, val);
break;
+ case SO_BUF_LOCK:
+ if (val & ~SOCK_BUF_LOCK_MASK) {
+ ret = -EINVAL;
+ break;
+ }
+ sk->sk_userlocks = val | (sk->sk_userlocks &
+ ~SOCK_BUF_LOCK_MASK);
+ break;
+
default:
ret = -ENOPROTOOPT;
break;
@@ -1719,6 +1729,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val64 = sock_net(sk)->net_cookie;
break;
+ case SO_BUF_LOCK:
+ v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -2560,7 +2574,6 @@ static void sk_leave_memory_pressure(struct sock *sk)
}
}
-#define SKB_FRAG_PAGE_ORDER get_order(32768)
DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
/**
@@ -2714,10 +2727,12 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
{
struct proto *prot = sk->sk_prot;
long allocated = sk_memory_allocated_add(sk, amt);
+ bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg;
bool charged = true;
- if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
- !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt)))
+ if (memcg_charge &&
+ !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt,
+ gfp_memcg_charge())))
goto suppress_allocation;
/* Under limit. */
@@ -2771,8 +2786,14 @@ suppress_allocation:
/* Fail only if socket is _under_ its sndbuf.
* In this case we cannot block, so that we have to fail.
*/
- if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
+ if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
+ /* Force charge with __GFP_NOFAIL */
+ if (memcg_charge && !charged) {
+ mem_cgroup_charge_skmem(sk->sk_memcg, amt,
+ gfp_memcg_charge() | __GFP_NOFAIL);
+ }
return 1;
+ }
}
if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
@@ -2780,7 +2801,7 @@ suppress_allocation:
sk_memory_allocated_sub(sk, amt);
- if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+ if (memcg_charge && charged)
mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
return 0;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 60decd6420ca..e252b8ec2b85 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -211,8 +211,6 @@ out:
return psock;
}
-static bool sock_map_redirect_allowed(const struct sock *sk);
-
static int sock_map_link(struct bpf_map *map, struct sock *sk)
{
struct sk_psock_progs *progs = sock_map_progs(map);
@@ -223,13 +221,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
struct sk_psock *psock;
int ret;
- /* Only sockets we can redirect into/from in BPF need to hold
- * refs to parser/verdict progs and have their sk_data_ready
- * and sk_write_space callbacks overridden.
- */
- if (!sock_map_redirect_allowed(sk))
- goto no_progs;
-
stream_verdict = READ_ONCE(progs->stream_verdict);
if (stream_verdict) {
stream_verdict = bpf_prog_inc_not_zero(stream_verdict);
@@ -264,7 +255,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
}
}
-no_progs:
psock = sock_map_psock_get_checked(sk);
if (IS_ERR(psock)) {
ret = PTR_ERR(psock);
@@ -527,12 +517,6 @@ static bool sk_is_tcp(const struct sock *sk)
sk->sk_protocol == IPPROTO_TCP;
}
-static bool sk_is_udp(const struct sock *sk)
-{
- return sk->sk_type == SOCK_DGRAM &&
- sk->sk_protocol == IPPROTO_UDP;
-}
-
static bool sock_map_redirect_allowed(const struct sock *sk)
{
if (sk_is_tcp(sk))
@@ -550,10 +534,7 @@ static bool sock_map_sk_state_allowed(const struct sock *sk)
{
if (sk_is_tcp(sk))
return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
- else if (sk_is_udp(sk))
- return sk_hashed(sk);
-
- return false;
+ return true;
}
static int sock_hash_update_common(struct bpf_map *map, void *key,
@@ -1513,6 +1494,7 @@ void sock_map_unhash(struct sock *sk)
rcu_read_unlock();
saved_unhash(sk);
}
+EXPORT_SYMBOL_GPL(sock_map_unhash);
void sock_map_close(struct sock *sk, long timeout)
{
@@ -1536,6 +1518,7 @@ void sock_map_close(struct sock *sk, long timeout)
release_sock(sk);
saved_close(sk, timeout);
}
+EXPORT_SYMBOL_GPL(sock_map_close);
static int sock_map_iter_attach_target(struct bpf_prog *prog,
union bpf_iter_link_info *linfo,
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 9cc9d1ee6cdb..c5c1d2b8045e 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -41,9 +41,9 @@ extern bool dccp_debug;
#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
#else
-#define dccp_pr_debug(format, a...)
-#define dccp_pr_debug_cat(format, a...)
-#define dccp_debug(format, a...)
+#define dccp_pr_debug(format, a...) do {} while (0)
+#define dccp_pr_debug_cat(format, a...) do {} while (0)
+#define dccp_debug(format, a...) do {} while (0)
#endif
extern struct inet_hashinfo dccp_hashinfo;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 7eb0fb231940..abb5c596a817 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1126,7 +1126,7 @@ static int __init dccp_init(void)
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_free_hashinfo2;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index d1c50a48614b..0ee7d4c0c955 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -521,8 +521,7 @@ int dn_dev_set_default(struct net_device *dev, int force)
}
spin_unlock(&dndev_lock);
- if (old)
- dev_put(old);
+ dev_put(old);
return rv;
}
@@ -536,8 +535,7 @@ static void dn_dev_check_default(struct net_device *dev)
}
spin_unlock(&dndev_lock);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
}
/*
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 77fbf8e9df4b..269c029ad74f 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -92,8 +92,7 @@ void dn_fib_free_info(struct dn_fib_info *fi)
}
change_nexthops(fi) {
- if (nh->nh_dev)
- dev_put(nh->nh_dev);
+ dev_put(nh->nh_dev);
nh->nh_dev = NULL;
} endfor_nexthops(fi);
kfree(fi);
@@ -102,7 +101,7 @@ void dn_fib_free_info(struct dn_fib_info *fi)
void dn_fib_release_info(struct dn_fib_info *fi)
{
spin_lock(&dn_fib_info_lock);
- if (fi && --fi->fib_treeref == 0) {
+ if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
if (fi->fib_next)
fi->fib_next->fib_prev = fi->fib_prev;
if (fi->fib_prev)
@@ -385,11 +384,11 @@ link_it:
if ((ofi = dn_fib_find_info(fi)) != NULL) {
fi->fib_dead = 1;
dn_fib_free_info(fi);
- ofi->fib_treeref++;
+ refcount_inc(&ofi->fib_treeref);
return ofi;
}
- fi->fib_treeref++;
+ refcount_set(&fi->fib_treeref, 1);
refcount_set(&fi->fib_clntref, 1);
spin_lock(&dn_fib_info_lock);
fi->fib_next = dn_fib_info_list;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 729d3de6020d..7e85f2a1ae25 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1026,8 +1026,7 @@ source_ok:
if (!fld.daddr) {
fld.daddr = fld.saddr;
- if (dev_out)
- dev_put(dev_out);
+ dev_put(dev_out);
err = -EINVAL;
dev_out = init_net.loopback_dev;
if (!dev_out->dn_ptr)
@@ -1084,8 +1083,7 @@ source_ok:
neigh_release(neigh);
neigh = NULL;
} else {
- if (dev_out)
- dev_put(dev_out);
+ dev_put(dev_out);
if (dn_dev_islocal(neigh->dev, fld.daddr)) {
dev_out = init_net.loopback_dev;
res.type = RTN_LOCAL;
@@ -1144,8 +1142,7 @@ select_source:
if (res.type == RTN_LOCAL) {
if (!fld.saddr)
fld.saddr = fld.daddr;
- if (dev_out)
- dev_put(dev_out);
+ dev_put(dev_out);
dev_out = init_net.loopback_dev;
dev_hold(dev_out);
if (!dev_out->dn_ptr)
@@ -1168,8 +1165,7 @@ select_source:
if (!fld.saddr)
fld.saddr = DN_FIB_RES_PREFSRC(res);
- if (dev_out)
- dev_put(dev_out);
+ dev_put(dev_out);
dev_out = DN_FIB_RES_DEV(res);
dev_hold(dev_out);
fld.flowidn_oif = dev_out->ifindex;
@@ -1222,8 +1218,7 @@ done:
neigh_release(neigh);
if (free_res)
dn_fib_res_put(&res);
- if (dev_out)
- dev_put(dev_out);
+ dev_put(dev_out);
out:
return err;
@@ -1503,8 +1498,7 @@ done:
if (free_res)
dn_fib_res_put(&res);
dev_put(in_dev);
- if (out_dev)
- dev_put(out_dev);
+ dev_put(out_dev);
out:
return err;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 00bb89b2d86f..548285539752 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -18,16 +18,6 @@ if NET_DSA
# Drivers must select the appropriate tagging format(s)
-config NET_DSA_TAG_8021Q
- tristate
- select VLAN_8021Q
- help
- Unlike the other tagging protocols, the 802.1Q config option simply
- provides helpers for other tagging implementations that might rely on
- VLAN in one way or another. It is not a complete solution.
-
- Drivers which use these helpers should select this as dependency.
-
config NET_DSA_TAG_AR9331
tristate "Tag driver for Atheros AR9331 SoC with built-in switch"
help
@@ -126,7 +116,6 @@ config NET_DSA_TAG_OCELOT_8021Q
tristate "Tag driver for Ocelot family of switches, using VLAN"
depends on MSCC_OCELOT_SWITCH_LIB || \
(MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST)
- select NET_DSA_TAG_8021Q
help
Say Y or M if you want to enable support for tagging frames with a
custom VLAN-based header. Frames that require timestamping, such as
@@ -149,7 +138,7 @@ config NET_DSA_TAG_LAN9303
config NET_DSA_TAG_SJA1105
tristate "Tag driver for NXP SJA1105 switches"
- select NET_DSA_TAG_8021Q
+ depends on NET_DSA_SJA1105 || !NET_DSA_SJA1105
select PACKING
help
Say Y or M if you want to enable support for tagging frames with the
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 44bc79952b8b..67ea009f242c 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,10 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o
+dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o tag_8021q.o
# tagging formats
-obj-$(CONFIG_NET_DSA_TAG_8021Q) += tag_8021q.o
obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o
obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o
obj-$(CONFIG_NET_DSA_TAG_DSA_COMMON) += tag_dsa.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 84cad1be9ce4..1dc45e40f961 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -238,7 +238,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
if (!skb)
return 0;
- nskb = cpu_dp->rcv(skb, dev, pt);
+ nskb = cpu_dp->rcv(skb, dev);
if (!nskb) {
kfree_skb(skb);
return 0;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 185629f27f80..1b2b25d7bd02 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -21,6 +21,9 @@
static DEFINE_MUTEX(dsa2_mutex);
LIST_HEAD(dsa_tree_list);
+/* Track the bridges with forwarding offload enabled */
+static unsigned long dsa_fwd_offloading_bridges;
+
/**
* dsa_tree_notify - Execute code for all switches in a DSA switch tree.
* @dst: collection of struct dsa_switch devices to notify.
@@ -49,6 +52,9 @@ int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v)
* Can be used to notify the switching fabric of events such as cross-chip
* bridging between disjoint trees (such as islands of tagger-compatible
* switches bridged by an incompatible middle switch).
+ *
+ * WARNING: this function is not reliable during probe time, because probing
+ * between trees is asynchronous and not all DSA trees might have probed.
*/
int dsa_broadcast(unsigned long e, void *v)
{
@@ -123,6 +129,51 @@ void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag)
}
}
+static int dsa_bridge_num_find(const struct net_device *bridge_dev)
+{
+ struct dsa_switch_tree *dst;
+ struct dsa_port *dp;
+
+ /* When preparing the offload for a port, it will have a valid
+ * dp->bridge_dev pointer but a not yet valid dp->bridge_num.
+ * However there might be other ports having the same dp->bridge_dev
+ * and a valid dp->bridge_num, so just ignore this port.
+ */
+ list_for_each_entry(dst, &dsa_tree_list, list)
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dp->bridge_dev == bridge_dev &&
+ dp->bridge_num != -1)
+ return dp->bridge_num;
+
+ return -1;
+}
+
+int dsa_bridge_num_get(const struct net_device *bridge_dev, int max)
+{
+ int bridge_num = dsa_bridge_num_find(bridge_dev);
+
+ if (bridge_num < 0) {
+ /* First port that offloads TX forwarding for this bridge */
+ bridge_num = find_first_zero_bit(&dsa_fwd_offloading_bridges,
+ DSA_MAX_NUM_OFFLOADING_BRIDGES);
+ if (bridge_num >= max)
+ return -1;
+
+ set_bit(bridge_num, &dsa_fwd_offloading_bridges);
+ }
+
+ return bridge_num;
+}
+
+void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num)
+{
+ /* Check if the bridge is still in use, otherwise it is time
+ * to clean it up so we can reuse this bridge_num later.
+ */
+ if (!dsa_bridge_num_find(bridge_dev))
+ clear_bit(bridge_num, &dsa_fwd_offloading_bridges);
+}
+
struct dsa_switch *dsa_switch_find(int tree_index, int sw_index)
{
struct dsa_switch_tree *dst;
@@ -311,6 +362,9 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
return NULL;
}
+/* Assign the default CPU port (the first one in the tree) to all ports of the
+ * fabric which don't already have one as part of their own switch.
+ */
static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
{
struct dsa_port *cpu_dp, *dp;
@@ -321,15 +375,48 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
return -EINVAL;
}
- /* Assign the default CPU port to all ports of the fabric */
- list_for_each_entry(dp, &dst->ports, list)
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->cpu_dp)
+ continue;
+
if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
dp->cpu_dp = cpu_dp;
+ }
return 0;
}
-static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
+/* Perform initial assignment of CPU ports to user ports and DSA links in the
+ * fabric, giving preference to CPU ports local to each switch. Default to
+ * using the first CPU port in the switch tree if the port does not have a CPU
+ * port local to this switch.
+ */
+static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *cpu_dp, *dp;
+
+ list_for_each_entry(cpu_dp, &dst->ports, list) {
+ if (!dsa_port_is_cpu(cpu_dp))
+ continue;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ /* Prefer a local CPU port */
+ if (dp->ds != cpu_dp->ds)
+ continue;
+
+ /* Prefer the first local CPU port found */
+ if (dp->cpu_dp)
+ continue;
+
+ if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+ dp->cpu_dp = cpu_dp;
+ }
+ }
+
+ return dsa_tree_setup_default_cpu(dst);
+}
+
+static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst)
{
struct dsa_port *dp;
@@ -710,13 +797,14 @@ static int dsa_switch_setup(struct dsa_switch *ds)
/* Add the switch to devlink before calling setup, so that setup can
* add dpipe tables
*/
- ds->devlink = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv));
+ ds->devlink =
+ devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv), ds->dev);
if (!ds->devlink)
return -ENOMEM;
dl_priv = devlink_priv(ds->devlink);
dl_priv->ds = ds;
- err = devlink_register(ds->devlink, ds->dev);
+ err = devlink_register(ds->devlink);
if (err)
goto free_devlink;
@@ -921,13 +1009,13 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
if (!complete)
return 0;
- err = dsa_tree_setup_default_cpu(dst);
+ err = dsa_tree_setup_cpu_ports(dst);
if (err)
return err;
err = dsa_tree_setup_switches(dst);
if (err)
- goto teardown_default_cpu;
+ goto teardown_cpu_ports;
err = dsa_tree_setup_master(dst);
if (err)
@@ -947,8 +1035,8 @@ teardown_master:
dsa_tree_teardown_master(dst);
teardown_switches:
dsa_tree_teardown_switches(dst);
-teardown_default_cpu:
- dsa_tree_teardown_default_cpu(dst);
+teardown_cpu_ports:
+ dsa_tree_teardown_cpu_ports(dst);
return err;
}
@@ -966,7 +1054,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
dsa_tree_teardown_switches(dst);
- dsa_tree_teardown_default_cpu(dst);
+ dsa_tree_teardown_cpu_ports(dst);
list_for_each_entry_safe(dl, next, &dst->rtable, list) {
list_del(&dl->list);
@@ -1044,6 +1132,7 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
dp->ds = ds;
dp->index = index;
+ dp->bridge_num = -1;
INIT_LIST_HEAD(&dp->list);
list_add_tail(&dp->list, &dst->ports);
@@ -1265,6 +1354,9 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds,
return -EEXIST;
}
+ if (ds->dst->last_switch < ds->index)
+ ds->dst->last_switch = ds->index;
+
return 0;
}
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index f201c33980bf..33ab7d7af9eb 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -14,6 +14,8 @@
#include <net/dsa.h>
#include <net/gro_cells.h>
+#define DSA_MAX_NUM_OFFLOADING_BRIDGES BITS_PER_LONG
+
enum {
DSA_NOTIFIER_AGEING_TIME,
DSA_NOTIFIER_BRIDGE_JOIN,
@@ -39,6 +41,8 @@ enum {
DSA_NOTIFIER_MRP_DEL,
DSA_NOTIFIER_MRP_ADD_RING_ROLE,
DSA_NOTIFIER_MRP_DEL_RING_ROLE,
+ DSA_NOTIFIER_TAG_8021Q_VLAN_ADD,
+ DSA_NOTIFIER_TAG_8021Q_VLAN_DEL,
};
/* DSA_NOTIFIER_AGEING_TIME */
@@ -113,6 +117,14 @@ struct dsa_notifier_mrp_ring_role_info {
int port;
};
+/* DSA_NOTIFIER_TAG_8021Q_VLAN_* */
+struct dsa_notifier_tag_8021q_vlan_info {
+ int tree_index;
+ int sw_index;
+ int port;
+ u16 vid;
+};
+
struct dsa_switchdev_event_work {
struct dsa_switch *ds;
int port;
@@ -187,23 +199,21 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
/* port.c */
void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
const struct dsa_device_ops *tag_ops);
-int dsa_port_set_state(struct dsa_port *dp, u8 state);
+int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age);
int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
void dsa_port_disable_rt(struct dsa_port *dp);
void dsa_port_disable(struct dsa_port *dp);
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
struct netlink_ext_ack *extack);
-int dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br,
- struct netlink_ext_ack *extack);
+void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br);
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
int dsa_port_lag_change(struct dsa_port *dp,
struct netdev_lag_lower_state_info *linfo);
int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev,
struct netdev_lag_upper_info *uinfo,
struct netlink_ext_ack *extack);
-int dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev,
- struct netlink_ext_ack *extack);
+void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
struct netlink_ext_ack *extack);
@@ -231,11 +241,9 @@ int dsa_port_host_mdb_del(const struct dsa_port *dp,
int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack);
-int dsa_port_bridge_flags(const struct dsa_port *dp,
+int dsa_port_bridge_flags(struct dsa_port *dp,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack);
-int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
- struct netlink_ext_ack *extack);
int dsa_port_vlan_add(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan,
struct netlink_ext_ack *extack);
@@ -253,16 +261,18 @@ int dsa_port_link_register_of(struct dsa_port *dp);
void dsa_port_link_unregister_of(struct dsa_port *dp);
int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr);
void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr);
+int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast);
+void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast);
extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp,
- struct net_device *dev)
+ const struct net_device *dev)
{
return dsa_port_to_bridge_port(dp) == dev;
}
static inline bool dsa_port_offloads_bridge(struct dsa_port *dp,
- struct net_device *bridge_dev)
+ const struct net_device *bridge_dev)
{
/* DSA ports connected to a bridge, and event was emitted
* for the bridge.
@@ -272,7 +282,7 @@ static inline bool dsa_port_offloads_bridge(struct dsa_port *dp,
/* Returns true if any port of this tree offloads the given net_device */
static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst,
- struct net_device *dev)
+ const struct net_device *dev)
{
struct dsa_port *dp;
@@ -283,6 +293,19 @@ static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst,
return false;
}
+/* Returns true if any port of this tree offloads the given bridge */
+static inline bool dsa_tree_offloads_bridge(struct dsa_switch_tree *dst,
+ const struct net_device *bridge_dev)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_offloads_bridge(dp, bridge_dev))
+ return true;
+
+ return false;
+}
+
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
extern struct notifier_block dsa_slave_switchdev_notifier;
@@ -297,6 +320,8 @@ int dsa_slave_register_notifier(void);
void dsa_slave_unregister_notifier(void);
void dsa_slave_setup_tagger(struct net_device *slave);
int dsa_slave_change_mtu(struct net_device *dev, int new_mtu);
+int dsa_slave_manage_vlan_filtering(struct net_device *dev,
+ bool vlan_filtering);
static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
{
@@ -372,6 +397,141 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb)
return skb;
}
+/* For switches without hardware support for DSA tagging to be able
+ * to support termination through the bridge.
+ */
+static inline struct net_device *
+dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid)
+{
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+ struct dsa_switch_tree *dst = cpu_dp->dst;
+ struct bridge_vlan_info vinfo;
+ struct net_device *slave;
+ struct dsa_port *dp;
+ int err;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->type != DSA_PORT_TYPE_USER)
+ continue;
+
+ if (!dp->bridge_dev)
+ continue;
+
+ if (dp->stp_state != BR_STATE_LEARNING &&
+ dp->stp_state != BR_STATE_FORWARDING)
+ continue;
+
+ /* Since the bridge might learn this packet, keep the CPU port
+ * affinity with the port that will be used for the reply on
+ * xmit.
+ */
+ if (dp->cpu_dp != cpu_dp)
+ continue;
+
+ slave = dp->slave;
+
+ err = br_vlan_get_info_rcu(slave, vid, &vinfo);
+ if (err)
+ continue;
+
+ return slave;
+ }
+
+ return NULL;
+}
+
+/* If the ingress port offloads the bridge, we mark the frame as autonomously
+ * forwarded by hardware, so the software bridge doesn't forward in twice, back
+ * to us, because we already did. However, if we're in fallback mode and we do
+ * software bridging, we are not offloading it, therefore the dp->bridge_dev
+ * pointer is not populated, and flooding needs to be done by software (we are
+ * effectively operating in standalone ports mode).
+ */
+static inline void dsa_default_offload_fwd_mark(struct sk_buff *skb)
+{
+ struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+
+ skb->offload_fwd_mark = !!(dp->bridge_dev);
+}
+
+/* Helper for removing DSA header tags from packets in the RX path.
+ * Must not be called before skb_pull(len).
+ * skb->data
+ * |
+ * v
+ * | | | | | | | | | | | | | | | | | | |
+ * +-----------------------+-----------------------+---------------+-------+
+ * | Destination MAC | Source MAC | DSA header | EType |
+ * +-----------------------+-----------------------+---------------+-------+
+ * | |
+ * <----- len -----> <----- len ----->
+ * |
+ * >>>>>>> v
+ * >>>>>>> | | | | | | | | | | | | | | |
+ * >>>>>>> +-----------------------+-----------------------+-------+
+ * >>>>>>> | Destination MAC | Source MAC | EType |
+ * +-----------------------+-----------------------+-------+
+ * ^
+ * |
+ * skb->data
+ */
+static inline void dsa_strip_etype_header(struct sk_buff *skb, int len)
+{
+ memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - len, 2 * ETH_ALEN);
+}
+
+/* Helper for creating space for DSA header tags in TX path packets.
+ * Must not be called before skb_push(len).
+ *
+ * Before:
+ *
+ * <<<<<<< | | | | | | | | | | | | | | |
+ * ^ <<<<<<< +-----------------------+-----------------------+-------+
+ * | <<<<<<< | Destination MAC | Source MAC | EType |
+ * | +-----------------------+-----------------------+-------+
+ * <----- len ----->
+ * |
+ * |
+ * skb->data
+ *
+ * After:
+ *
+ * | | | | | | | | | | | | | | | | | | |
+ * +-----------------------+-----------------------+---------------+-------+
+ * | Destination MAC | Source MAC | DSA header | EType |
+ * +-----------------------+-----------------------+---------------+-------+
+ * ^ | |
+ * | <----- len ----->
+ * skb->data
+ */
+static inline void dsa_alloc_etype_header(struct sk_buff *skb, int len)
+{
+ memmove(skb->data, skb->data + len, 2 * ETH_ALEN);
+}
+
+/* On RX, eth_type_trans() on the DSA master pulls ETH_HLEN bytes starting from
+ * skb_mac_header(skb), which leaves skb->data pointing at the first byte after
+ * what the DSA master perceives as the EtherType (the beginning of the L3
+ * protocol). Since DSA EtherType header taggers treat the EtherType as part of
+ * the DSA tag itself, and the EtherType is 2 bytes in length, the DSA header
+ * is located 2 bytes behind skb->data. Note that EtherType in this context
+ * means the first 2 bytes of the DSA header, not the encapsulated EtherType
+ * that will become visible after the DSA header is stripped.
+ */
+static inline void *dsa_etype_header_pos_rx(struct sk_buff *skb)
+{
+ return skb->data - 2;
+}
+
+/* On TX, skb->data points to skb_mac_header(skb), which means that EtherType
+ * header taggers start exactly where the EtherType is (the EtherType is
+ * treated as part of the DSA header).
+ */
+static inline void *dsa_etype_header_pos_tx(struct sk_buff *skb)
+{
+ return skb->data + 2 * ETH_ALEN;
+}
+
/* switch.c */
int dsa_switch_register_notifier(struct dsa_switch *ds);
void dsa_switch_unregister_notifier(struct dsa_switch *ds);
@@ -385,6 +545,18 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
struct net_device *master,
const struct dsa_device_ops *tag_ops,
const struct dsa_device_ops *old_tag_ops);
+int dsa_bridge_num_get(const struct net_device *bridge_dev, int max);
+void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num);
+
+/* tag_8021q.c */
+int dsa_tag_8021q_bridge_join(struct dsa_switch *ds,
+ struct dsa_notifier_bridge_info *info);
+int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds,
+ struct dsa_notifier_bridge_info *info);
+int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
+ struct dsa_notifier_tag_8021q_vlan_info *info);
+int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
+ struct dsa_notifier_tag_8021q_vlan_info *info);
extern struct list_head dsa_tree_list;
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 3fc90e36772d..e8e19857621b 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -210,14 +210,14 @@ static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
- if (dev->netdev_ops->ndo_do_ioctl)
- err = dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
+ if (dev->netdev_ops->ndo_eth_ioctl)
+ err = dev->netdev_ops->ndo_eth_ioctl(dev, ifr, cmd);
return err;
}
static const struct dsa_netdevice_ops dsa_netdev_ops = {
- .ndo_do_ioctl = dsa_master_ioctl,
+ .ndo_eth_ioctl = dsa_master_ioctl,
};
static int dsa_master_ethtool_setup(struct net_device *dev)
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 28b45b7e66df..616330a16d31 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -30,7 +30,52 @@ static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v)
return dsa_tree_notify(dp->ds->dst, e, v);
}
-int dsa_port_set_state(struct dsa_port *dp, u8 state)
+static void dsa_port_notify_bridge_fdb_flush(const struct dsa_port *dp)
+{
+ struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
+ struct switchdev_notifier_fdb_info info = {
+ /* flush all VLANs */
+ .vid = 0,
+ };
+
+ /* When the port becomes standalone it has already left the bridge.
+ * Don't notify the bridge in that case.
+ */
+ if (!brport_dev)
+ return;
+
+ call_switchdev_notifiers(SWITCHDEV_FDB_FLUSH_TO_BRIDGE,
+ brport_dev, &info.info, NULL);
+}
+
+static void dsa_port_fast_age(const struct dsa_port *dp)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ if (!ds->ops->port_fast_age)
+ return;
+
+ ds->ops->port_fast_age(ds, dp->index);
+
+ dsa_port_notify_bridge_fdb_flush(dp);
+}
+
+static bool dsa_port_can_configure_learning(struct dsa_port *dp)
+{
+ struct switchdev_brport_flags flags = {
+ .mask = BR_LEARNING,
+ };
+ struct dsa_switch *ds = dp->ds;
+ int err;
+
+ if (!ds->ops->port_bridge_flags || !ds->ops->port_pre_bridge_flags)
+ return false;
+
+ err = ds->ops->port_pre_bridge_flags(ds, dp->index, flags, NULL);
+ return !err;
+}
+
+int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age)
{
struct dsa_switch *ds = dp->ds;
int port = dp->index;
@@ -40,10 +85,14 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state)
ds->ops->port_stp_state_set(ds, port, state);
- if (ds->ops->port_fast_age) {
+ if (!dsa_port_can_configure_learning(dp) ||
+ (do_fast_age && dp->learning)) {
/* Fast age FDB entries or flush appropriate forwarding database
* for the given port, if we are moving it from Learning or
* Forwarding state, to Disabled or Blocking or Listening state.
+ * Ports that were standalone before the STP state change don't
+ * need to fast age the FDB, since address learning is off in
+ * standalone mode.
*/
if ((dp->stp_state == BR_STATE_LEARNING ||
@@ -51,7 +100,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state)
(state == BR_STATE_DISABLED ||
state == BR_STATE_BLOCKING ||
state == BR_STATE_LISTENING))
- ds->ops->port_fast_age(ds, port);
+ dsa_port_fast_age(dp);
}
dp->stp_state = state;
@@ -59,11 +108,12 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state)
return 0;
}
-static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
+static void dsa_port_set_state_now(struct dsa_port *dp, u8 state,
+ bool do_fast_age)
{
int err;
- err = dsa_port_set_state(dp, state);
+ err = dsa_port_set_state(dp, state, do_fast_age);
if (err)
pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
}
@@ -81,7 +131,7 @@ int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy)
}
if (!dp->bridge_dev)
- dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+ dsa_port_set_state_now(dp, BR_STATE_FORWARDING, false);
if (dp->pl)
phylink_start(dp->pl);
@@ -109,7 +159,7 @@ void dsa_port_disable_rt(struct dsa_port *dp)
phylink_stop(dp->pl);
if (!dp->bridge_dev)
- dsa_port_set_state_now(dp, BR_STATE_DISABLED);
+ dsa_port_set_state_now(dp, BR_STATE_DISABLED, false);
if (ds->ops->port_disable)
ds->ops->port_disable(ds, port);
@@ -167,8 +217,8 @@ static void dsa_port_clear_brport_flags(struct dsa_port *dp)
}
}
-static int dsa_port_switchdev_sync(struct dsa_port *dp,
- struct netlink_ext_ack *extack)
+static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp,
+ struct netlink_ext_ack *extack)
{
struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
struct net_device *br = dp->bridge_dev;
@@ -178,7 +228,7 @@ static int dsa_port_switchdev_sync(struct dsa_port *dp,
if (err)
return err;
- err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev));
+ err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev), false);
if (err && err != -EOPNOTSUPP)
return err;
@@ -186,67 +236,10 @@ static int dsa_port_switchdev_sync(struct dsa_port *dp,
if (err && err != -EOPNOTSUPP)
return err;
- err = dsa_port_mrouter(dp->cpu_dp, br_multicast_router(br), extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
err = dsa_port_ageing_time(dp, br_get_ageing_time(br));
if (err && err != -EOPNOTSUPP)
return err;
- err = br_mdb_replay(br, brport_dev, dp, true,
- &dsa_slave_switchdev_blocking_notifier, extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- /* Forwarding and termination FDB entries on the port */
- err = br_fdb_replay(br, brport_dev, dp, true,
- &dsa_slave_switchdev_notifier);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- /* Termination FDB entries on the bridge itself */
- err = br_fdb_replay(br, br, dp, true, &dsa_slave_switchdev_notifier);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- err = br_vlan_replay(br, brport_dev, dp, true,
- &dsa_slave_switchdev_blocking_notifier, extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- return 0;
-}
-
-static int dsa_port_switchdev_unsync_objs(struct dsa_port *dp,
- struct net_device *br,
- struct netlink_ext_ack *extack)
-{
- struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
- int err;
-
- /* Delete the switchdev objects left on this port */
- err = br_mdb_replay(br, brport_dev, dp, false,
- &dsa_slave_switchdev_blocking_notifier, extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- /* Forwarding and termination FDB entries on the port */
- err = br_fdb_replay(br, brport_dev, dp, false,
- &dsa_slave_switchdev_notifier);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- /* Termination FDB entries on the bridge itself */
- err = br_fdb_replay(br, br, dp, false, &dsa_slave_switchdev_notifier);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- err = br_vlan_replay(br, brport_dev, dp, false,
- &dsa_slave_switchdev_blocking_notifier, extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
return 0;
}
@@ -268,21 +261,63 @@ static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp)
/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
* so allow it to be in BR_STATE_FORWARDING to be kept functional
*/
- dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+ dsa_port_set_state_now(dp, BR_STATE_FORWARDING, true);
/* VLAN filtering is handled by dsa_switch_bridge_leave */
- /* Some drivers treat the notification for having a local multicast
- * router by allowing multicast to be flooded to the CPU, so we should
- * allow this in standalone mode too.
- */
- dsa_port_mrouter(dp->cpu_dp, true, NULL);
-
/* Ageing time may be global to the switch chip, so don't change it
* here because we have no good reason (or value) to change it to.
*/
}
+static void dsa_port_bridge_tx_fwd_unoffload(struct dsa_port *dp,
+ struct net_device *bridge_dev)
+{
+ int bridge_num = dp->bridge_num;
+ struct dsa_switch *ds = dp->ds;
+
+ /* No bridge TX forwarding offload => do nothing */
+ if (!ds->ops->port_bridge_tx_fwd_unoffload || dp->bridge_num == -1)
+ return;
+
+ dp->bridge_num = -1;
+
+ dsa_bridge_num_put(bridge_dev, bridge_num);
+
+ /* Notify the chips only once the offload has been deactivated, so
+ * that they can update their configuration accordingly.
+ */
+ ds->ops->port_bridge_tx_fwd_unoffload(ds, dp->index, bridge_dev,
+ bridge_num);
+}
+
+static bool dsa_port_bridge_tx_fwd_offload(struct dsa_port *dp,
+ struct net_device *bridge_dev)
+{
+ struct dsa_switch *ds = dp->ds;
+ int bridge_num, err;
+
+ if (!ds->ops->port_bridge_tx_fwd_offload)
+ return false;
+
+ bridge_num = dsa_bridge_num_get(bridge_dev,
+ ds->num_fwd_offloading_bridges);
+ if (bridge_num < 0)
+ return false;
+
+ dp->bridge_num = bridge_num;
+
+ /* Notify the driver */
+ err = ds->ops->port_bridge_tx_fwd_offload(ds, dp->index, bridge_dev,
+ bridge_num);
+ if (err) {
+ dsa_port_bridge_tx_fwd_unoffload(dp, bridge_dev);
+ return false;
+ }
+
+ return true;
+}
+
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
struct netlink_ext_ack *extack)
{
@@ -292,6 +327,9 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
.port = dp->index,
.br = br,
};
+ struct net_device *dev = dp->slave;
+ struct net_device *brport_dev;
+ bool tx_fwd_offload;
int err;
/* Here the interface is already bridged. Reflect the current
@@ -299,16 +337,31 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
*/
dp->bridge_dev = br;
+ brport_dev = dsa_port_to_bridge_port(dp);
+
err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_JOIN, &info);
if (err)
goto out_rollback;
- err = dsa_port_switchdev_sync(dp, extack);
+ tx_fwd_offload = dsa_port_bridge_tx_fwd_offload(dp, br);
+
+ err = switchdev_bridge_port_offload(brport_dev, dev, dp,
+ &dsa_slave_switchdev_notifier,
+ &dsa_slave_switchdev_blocking_notifier,
+ tx_fwd_offload, extack);
if (err)
goto out_rollback_unbridge;
+ err = dsa_port_switchdev_sync_attrs(dp, extack);
+ if (err)
+ goto out_rollback_unoffload;
+
return 0;
+out_rollback_unoffload:
+ switchdev_bridge_port_unoffload(brport_dev, dp,
+ &dsa_slave_switchdev_notifier,
+ &dsa_slave_switchdev_blocking_notifier);
out_rollback_unbridge:
dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
out_rollback:
@@ -316,10 +369,17 @@ out_rollback:
return err;
}
-int dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br,
- struct netlink_ext_ack *extack)
+void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br)
{
- return dsa_port_switchdev_unsync_objs(dp, br, extack);
+ struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
+
+ /* Don't try to unoffload something that is not offloaded */
+ if (!brport_dev)
+ return;
+
+ switchdev_bridge_port_unoffload(brport_dev, dp,
+ &dsa_slave_switchdev_notifier,
+ &dsa_slave_switchdev_blocking_notifier);
}
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
@@ -337,9 +397,13 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
*/
dp->bridge_dev = NULL;
+ dsa_port_bridge_tx_fwd_unoffload(dp, br);
+
err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
if (err)
- pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
+ dev_err(dp->ds->dev,
+ "port %d failed to notify DSA_NOTIFIER_BRIDGE_LEAVE: %pe\n",
+ dp->index, ERR_PTR(err));
dsa_port_switchdev_unsync_attrs(dp);
}
@@ -409,13 +473,10 @@ err_lag_join:
return err;
}
-int dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag,
- struct netlink_ext_ack *extack)
+void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag)
{
if (dp->bridge_dev)
- return dsa_port_pre_bridge_leave(dp, dp->bridge_dev, extack);
-
- return 0;
+ dsa_port_pre_bridge_leave(dp, dp->bridge_dev);
}
void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag)
@@ -441,8 +502,9 @@ void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag)
err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info);
if (err)
- pr_err("DSA: failed to notify DSA_NOTIFIER_LAG_LEAVE: %d\n",
- err);
+ dev_err(dp->ds->dev,
+ "port %d failed to notify DSA_NOTIFIER_LAG_LEAVE: %pe\n",
+ dp->index, ERR_PTR(err));
dsa_lag_unmap(dp->ds->dst, lag);
}
@@ -518,6 +580,7 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp,
int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
struct netlink_ext_ack *extack)
{
+ bool old_vlan_filtering = dsa_port_is_vlan_filtering(dp);
struct dsa_switch *ds = dp->ds;
bool apply;
int err;
@@ -543,12 +606,49 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
if (err)
return err;
- if (ds->vlan_filtering_is_global)
+ if (ds->vlan_filtering_is_global) {
+ int port;
+
ds->vlan_filtering = vlan_filtering;
- else
+
+ for (port = 0; port < ds->num_ports; port++) {
+ struct net_device *slave;
+
+ if (!dsa_is_user_port(ds, port))
+ continue;
+
+ /* We might be called in the unbind path, so not
+ * all slave devices might still be registered.
+ */
+ slave = dsa_to_port(ds, port)->slave;
+ if (!slave)
+ continue;
+
+ err = dsa_slave_manage_vlan_filtering(slave,
+ vlan_filtering);
+ if (err)
+ goto restore;
+ }
+ } else {
dp->vlan_filtering = vlan_filtering;
+ err = dsa_slave_manage_vlan_filtering(dp->slave,
+ vlan_filtering);
+ if (err)
+ goto restore;
+ }
+
return 0;
+
+restore:
+ ds->ops->port_vlan_filtering(ds, dp->index, old_vlan_filtering, NULL);
+
+ if (ds->vlan_filtering_is_global)
+ ds->vlan_filtering = old_vlan_filtering;
+ else
+ dp->vlan_filtering = old_vlan_filtering;
+
+ return err;
}
/* This enforces legacy behavior for switch drivers which assume they can't
@@ -595,27 +695,35 @@ int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
return ds->ops->port_pre_bridge_flags(ds, dp->index, flags, extack);
}
-int dsa_port_bridge_flags(const struct dsa_port *dp,
+int dsa_port_bridge_flags(struct dsa_port *dp,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack)
{
struct dsa_switch *ds = dp->ds;
+ int err;
if (!ds->ops->port_bridge_flags)
return -EOPNOTSUPP;
- return ds->ops->port_bridge_flags(ds, dp->index, flags, extack);
-}
+ err = ds->ops->port_bridge_flags(ds, dp->index, flags, extack);
+ if (err)
+ return err;
-int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
- struct netlink_ext_ack *extack)
-{
- struct dsa_switch *ds = dp->ds;
+ if (flags.mask & BR_LEARNING) {
+ bool learning = flags.val & BR_LEARNING;
- if (!ds->ops->port_set_mrouter)
- return -EOPNOTSUPP;
+ if (learning == dp->learning)
+ return 0;
+
+ if ((dp->learning && !learning) &&
+ (dp->stp_state == BR_STATE_LEARNING ||
+ dp->stp_state == BR_STATE_FORWARDING))
+ dsa_port_fast_age(dp);
+
+ dp->learning = learning;
+ }
- return ds->ops->port_set_mrouter(ds, dp->index, mrouter, extack);
+ return 0;
}
int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
@@ -844,7 +952,6 @@ int dsa_port_mrp_del_ring_role(const struct dsa_port *dp,
void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
const struct dsa_device_ops *tag_ops)
{
- cpu_dp->filter = tag_ops->filter;
cpu_dp->rcv = tag_ops->rcv;
cpu_dp->tag_ops = tag_ops;
}
@@ -1215,5 +1322,42 @@ void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr)
err = dsa_port_notify(dp, DSA_NOTIFIER_HSR_LEAVE, &info);
if (err)
- pr_err("DSA: failed to notify DSA_NOTIFIER_HSR_LEAVE\n");
+ dev_err(dp->ds->dev,
+ "port %d failed to notify DSA_NOTIFIER_HSR_LEAVE: %pe\n",
+ dp->index, ERR_PTR(err));
+}
+
+int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast)
+{
+ struct dsa_notifier_tag_8021q_vlan_info info = {
+ .tree_index = dp->ds->dst->index,
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .vid = vid,
+ };
+
+ if (broadcast)
+ return dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, &info);
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, &info);
+}
+
+void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast)
+{
+ struct dsa_notifier_tag_8021q_vlan_info info = {
+ .tree_index = dp->ds->dst->index,
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .vid = vid,
+ };
+ int err;
+
+ if (broadcast)
+ err = dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, &info);
+ else
+ err = dsa_port_notify(dp, DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, &info);
+ if (err)
+ dev_err(dp->ds->dev,
+ "port %d failed to notify tag_8021q VLAN %d deletion: %pe\n",
+ dp->index, vid, ERR_PTR(err));
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 532085da8d8f..662ff531d4e2 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -286,7 +286,7 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
return -EOPNOTSUPP;
- ret = dsa_port_set_state(dp, attr->u.stp_state);
+ ret = dsa_port_set_state(dp, attr->u.stp_state, true);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
@@ -314,12 +314,6 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack);
break;
- case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER:
- if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
- return -EOPNOTSUPP;
-
- ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, extack);
- break;
default:
ret = -EOPNOTSUPP;
break;
@@ -1415,6 +1409,76 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
return 0;
}
+static int dsa_slave_restore_vlan(struct net_device *vdev, int vid, void *arg)
+{
+ __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q);
+
+ return dsa_slave_vlan_rx_add_vid(arg, proto, vid);
+}
+
+static int dsa_slave_clear_vlan(struct net_device *vdev, int vid, void *arg)
+{
+ __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q);
+
+ return dsa_slave_vlan_rx_kill_vid(arg, proto, vid);
+}
+
+/* Keep the VLAN RX filtering list in sync with the hardware only if VLAN
+ * filtering is enabled. The baseline is that only ports that offload a
+ * VLAN-aware bridge are VLAN-aware, and standalone ports are VLAN-unaware,
+ * but there are exceptions for quirky hardware.
+ *
+ * If ds->vlan_filtering_is_global = true, then standalone ports which share
+ * the same switch with other ports that offload a VLAN-aware bridge are also
+ * inevitably VLAN-aware.
+ *
+ * To summarize, a DSA switch port offloads:
+ *
+ * - If standalone (this includes software bridge, software LAG):
+ * - if ds->needs_standalone_vlan_filtering = true, OR if
+ * (ds->vlan_filtering_is_global = true AND there are bridges spanning
+ * this switch chip which have vlan_filtering=1)
+ * - the 8021q upper VLANs
+ * - else (standalone VLAN filtering is not needed, VLAN filtering is not
+ * global, or it is, but no port is under a VLAN-aware bridge):
+ * - no VLAN (any 8021q upper is a software VLAN)
+ *
+ * - If under a vlan_filtering=0 bridge which it offload:
+ * - if ds->configure_vlan_while_not_filtering = true (default):
+ * - the bridge VLANs. These VLANs are committed to hardware but inactive.
+ * - else (deprecated):
+ * - no VLAN. The bridge VLANs are not restored when VLAN awareness is
+ * enabled, so this behavior is broken and discouraged.
+ *
+ * - If under a vlan_filtering=1 bridge which it offload:
+ * - the bridge VLANs
+ * - the 8021q upper VLANs
+ */
+int dsa_slave_manage_vlan_filtering(struct net_device *slave,
+ bool vlan_filtering)
+{
+ int err;
+
+ if (vlan_filtering) {
+ slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+ err = vlan_for_each(slave, dsa_slave_restore_vlan, slave);
+ if (err) {
+ vlan_for_each(slave, dsa_slave_clear_vlan, slave);
+ slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+ return err;
+ }
+ } else {
+ err = vlan_for_each(slave, dsa_slave_clear_vlan, slave);
+ if (err)
+ return err;
+
+ slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+ }
+
+ return 0;
+}
+
struct dsa_hw_port {
struct list_head list;
struct net_device *dev;
@@ -1687,7 +1751,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_set_rx_mode = dsa_slave_set_rx_mode,
.ndo_set_mac_address = dsa_slave_set_mac_address,
.ndo_fdb_dump = dsa_slave_fdb_dump,
- .ndo_do_ioctl = dsa_slave_ioctl,
+ .ndo_eth_ioctl = dsa_slave_ioctl,
.ndo_get_iflink = dsa_slave_get_iflink,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_netpoll_setup = dsa_slave_netpoll_setup,
@@ -1822,12 +1886,12 @@ void dsa_slave_setup_tagger(struct net_device *slave)
p->xmit = cpu_dp->tag_ops->xmit;
slave->features = master->vlan_features | NETIF_F_HW_TC;
- if (ds->ops->port_vlan_add && ds->ops->port_vlan_del)
- slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
slave->hw_features |= NETIF_F_HW_TC;
slave->features |= NETIF_F_LLTX;
if (slave->needed_tailroom)
slave->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST);
+ if (ds->needs_standalone_vlan_filtering)
+ slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
}
static struct lock_class_key dsa_slave_netdev_xmit_lock_key;
@@ -2015,6 +2079,11 @@ static int dsa_slave_changeupper(struct net_device *dev,
err = dsa_port_bridge_join(dp, info->upper_dev, extack);
if (!err)
dsa_bridge_mtu_normalization(dp);
+ if (err == -EOPNOTSUPP) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offloading not supported");
+ err = 0;
+ }
err = notifier_from_errno(err);
} else {
dsa_port_bridge_leave(dp, info->upper_dev);
@@ -2056,20 +2125,16 @@ static int dsa_slave_prechangeupper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
- struct netlink_ext_ack *extack;
- int err = 0;
-
- extack = netdev_notifier_info_to_extack(&info->info);
if (netif_is_bridge_master(info->upper_dev) && !info->linking)
- err = dsa_port_pre_bridge_leave(dp, info->upper_dev, extack);
+ dsa_port_pre_bridge_leave(dp, info->upper_dev);
else if (netif_is_lag_master(info->upper_dev) && !info->linking)
- err = dsa_port_pre_lag_leave(dp, info->upper_dev, extack);
+ dsa_port_pre_lag_leave(dp, info->upper_dev);
/* dsa_port_pre_hsr_leave is not yet necessary since hsr cannot be
* meaningfully enslaved to a bridge yet
*/
- return notifier_from_errno(err);
+ return NOTIFY_DONE;
}
static int
@@ -2291,8 +2356,8 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
static void
dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work)
{
+ struct switchdev_notifier_fdb_info info = {};
struct dsa_switch *ds = switchdev_work->ds;
- struct switchdev_notifier_fdb_info info;
struct dsa_port *dp;
if (!dsa_is_user_port(ds, switchdev_work->port))
@@ -2357,26 +2422,98 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
kfree(switchdev_work);
}
-static int dsa_lower_dev_walk(struct net_device *lower_dev,
- struct netdev_nested_priv *priv)
+static bool dsa_foreign_dev_check(const struct net_device *dev,
+ const struct net_device *foreign_dev)
{
- if (dsa_slave_dev_check(lower_dev)) {
- priv->data = (void *)netdev_priv(lower_dev);
- return 1;
- }
+ const struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch_tree *dst = dp->ds->dst;
- return 0;
+ if (netif_is_bridge_master(foreign_dev))
+ return !dsa_tree_offloads_bridge(dst, foreign_dev);
+
+ if (netif_is_bridge_port(foreign_dev))
+ return !dsa_tree_offloads_bridge_port(dst, foreign_dev);
+
+ /* Everything else is foreign */
+ return true;
}
-static struct dsa_slave_priv *dsa_slave_dev_lower_find(struct net_device *dev)
+static int dsa_slave_fdb_event(struct net_device *dev,
+ const struct net_device *orig_dev,
+ const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ unsigned long event)
{
- struct netdev_nested_priv priv = {
- .data = NULL,
- };
+ struct dsa_switchdev_event_work *switchdev_work;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ bool host_addr = fdb_info->is_local;
+ struct dsa_switch *ds = dp->ds;
+
+ if (ctx && ctx != dp)
+ return 0;
+
+ if (!ds->ops->port_fdb_add || !ds->ops->port_fdb_del)
+ return -EOPNOTSUPP;
+
+ if (dsa_slave_dev_check(orig_dev) &&
+ switchdev_fdb_is_dynamically_learned(fdb_info))
+ return 0;
+
+ /* FDB entries learned by the software bridge should be installed as
+ * host addresses only if the driver requests assisted learning.
+ */
+ if (switchdev_fdb_is_dynamically_learned(fdb_info) &&
+ !ds->assisted_learning_on_cpu_port)
+ return 0;
+
+ /* Also treat FDB entries on foreign interfaces bridged with us as host
+ * addresses.
+ */
+ if (dsa_foreign_dev_check(dev, orig_dev))
+ host_addr = true;
+
+ switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
+ if (!switchdev_work)
+ return -ENOMEM;
- netdev_walk_all_lower_dev_rcu(dev, dsa_lower_dev_walk, &priv);
+ netdev_dbg(dev, "%s FDB entry towards %s, addr %pM vid %d%s\n",
+ event == SWITCHDEV_FDB_ADD_TO_DEVICE ? "Adding" : "Deleting",
+ orig_dev->name, fdb_info->addr, fdb_info->vid,
+ host_addr ? " as host address" : "");
- return (struct dsa_slave_priv *)priv.data;
+ INIT_WORK(&switchdev_work->work, dsa_slave_switchdev_event_work);
+ switchdev_work->ds = ds;
+ switchdev_work->port = dp->index;
+ switchdev_work->event = event;
+ switchdev_work->dev = dev;
+
+ ether_addr_copy(switchdev_work->addr, fdb_info->addr);
+ switchdev_work->vid = fdb_info->vid;
+ switchdev_work->host_addr = host_addr;
+
+ /* Hold a reference for dsa_fdb_offload_notify */
+ dev_hold(dev);
+ dsa_schedule_work(&switchdev_work->work);
+
+ return 0;
+}
+
+static int
+dsa_slave_fdb_add_to_device(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info)
+{
+ return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info,
+ SWITCHDEV_FDB_ADD_TO_DEVICE);
+}
+
+static int
+dsa_slave_fdb_del_to_device(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info)
+{
+ return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info,
+ SWITCHDEV_FDB_DEL_TO_DEVICE);
}
/* Called under rcu_read_lock() */
@@ -2384,10 +2521,6 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
- const struct switchdev_notifier_fdb_info *fdb_info;
- struct dsa_switchdev_event_work *switchdev_work;
- bool host_addr = false;
- struct dsa_port *dp;
int err;
switch (event) {
@@ -2397,92 +2530,19 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
dsa_slave_port_attr_set);
return notifier_from_errno(err);
case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ err = switchdev_handle_fdb_add_to_device(dev, ptr,
+ dsa_slave_dev_check,
+ dsa_foreign_dev_check,
+ dsa_slave_fdb_add_to_device,
+ NULL);
+ return notifier_from_errno(err);
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- fdb_info = ptr;
-
- if (dsa_slave_dev_check(dev)) {
- dp = dsa_slave_to_port(dev);
-
- if (fdb_info->is_local)
- host_addr = true;
- else if (!fdb_info->added_by_user)
- return NOTIFY_OK;
- } else {
- /* Snoop addresses added to foreign interfaces
- * bridged with us, or the bridge
- * itself. Dynamically learned addresses can
- * also be added for switches that don't
- * automatically learn SA from CPU-injected
- * traffic.
- */
- struct net_device *br_dev;
- struct dsa_slave_priv *p;
-
- if (netif_is_bridge_master(dev))
- br_dev = dev;
- else
- br_dev = netdev_master_upper_dev_get_rcu(dev);
-
- if (!br_dev)
- return NOTIFY_DONE;
-
- if (!netif_is_bridge_master(br_dev))
- return NOTIFY_DONE;
-
- p = dsa_slave_dev_lower_find(br_dev);
- if (!p)
- return NOTIFY_DONE;
-
- dp = p->dp;
- host_addr = fdb_info->is_local;
-
- /* FDB entries learned by the software bridge should
- * be installed as host addresses only if the driver
- * requests assisted learning.
- * On the other hand, FDB entries for local termination
- * should always be installed.
- */
- if (!fdb_info->added_by_user && !fdb_info->is_local &&
- !dp->ds->assisted_learning_on_cpu_port)
- return NOTIFY_DONE;
-
- /* When the bridge learns an address on an offloaded
- * LAG we don't want to send traffic to the CPU, the
- * other ports bridged with the LAG should be able to
- * autonomously forward towards it.
- * On the other hand, if the address is local
- * (therefore not learned) then we want to trap it to
- * the CPU regardless of whether the interface it
- * belongs to is offloaded or not.
- */
- if (dsa_tree_offloads_bridge_port(dp->ds->dst, dev) &&
- !fdb_info->is_local)
- return NOTIFY_DONE;
- }
-
- if (!dp->ds->ops->port_fdb_add || !dp->ds->ops->port_fdb_del)
- return NOTIFY_DONE;
-
- switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
- if (!switchdev_work)
- return NOTIFY_BAD;
-
- INIT_WORK(&switchdev_work->work,
- dsa_slave_switchdev_event_work);
- switchdev_work->ds = dp->ds;
- switchdev_work->port = dp->index;
- switchdev_work->event = event;
- switchdev_work->dev = dev;
-
- ether_addr_copy(switchdev_work->addr,
- fdb_info->addr);
- switchdev_work->vid = fdb_info->vid;
- switchdev_work->host_addr = host_addr;
-
- /* Hold a reference for dsa_fdb_offload_notify */
- dev_hold(dev);
- dsa_schedule_work(&switchdev_work->work);
- break;
+ err = switchdev_handle_fdb_del_to_device(dev, ptr,
+ dsa_slave_dev_check,
+ dsa_foreign_dev_check,
+ dsa_slave_fdb_del_to_device,
+ NULL);
+ return notifier_from_errno(err);
default:
return NOTIFY_DONE;
}
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 5ece05dfd8f2..1c797ec8e2c2 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -90,26 +90,36 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds,
struct dsa_notifier_bridge_info *info)
{
struct dsa_switch_tree *dst = ds->dst;
+ int err;
- if (dst->index == info->tree_index && ds->index == info->sw_index &&
- ds->ops->port_bridge_join)
- return ds->ops->port_bridge_join(ds, info->port, info->br);
+ if (dst->index == info->tree_index && ds->index == info->sw_index) {
+ if (!ds->ops->port_bridge_join)
+ return -EOPNOTSUPP;
+
+ err = ds->ops->port_bridge_join(ds, info->port, info->br);
+ if (err)
+ return err;
+ }
if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
- ds->ops->crosschip_bridge_join)
- return ds->ops->crosschip_bridge_join(ds, info->tree_index,
- info->sw_index,
- info->port, info->br);
+ ds->ops->crosschip_bridge_join) {
+ err = ds->ops->crosschip_bridge_join(ds, info->tree_index,
+ info->sw_index,
+ info->port, info->br);
+ if (err)
+ return err;
+ }
- return 0;
+ return dsa_tag_8021q_bridge_join(ds, info);
}
static int dsa_switch_bridge_leave(struct dsa_switch *ds,
struct dsa_notifier_bridge_info *info)
{
- bool unset_vlan_filtering = br_vlan_enabled(info->br);
struct dsa_switch_tree *dst = ds->dst;
struct netlink_ext_ack extack = {0};
+ bool change_vlan_filtering = false;
+ bool vlan_filtering;
int err, port;
if (dst->index == info->tree_index && ds->index == info->sw_index &&
@@ -122,6 +132,15 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
info->sw_index, info->port,
info->br);
+ if (ds->needs_standalone_vlan_filtering && !br_vlan_enabled(info->br)) {
+ change_vlan_filtering = true;
+ vlan_filtering = true;
+ } else if (!ds->needs_standalone_vlan_filtering &&
+ br_vlan_enabled(info->br)) {
+ change_vlan_filtering = true;
+ vlan_filtering = false;
+ }
+
/* If the bridge was vlan_filtering, the bridge core doesn't trigger an
* event for changing vlan_filtering setting upon slave ports leaving
* it. That is a good thing, because that lets us handle it and also
@@ -130,28 +149,30 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
* vlan_filtering callback is only when the last port leaves the last
* VLAN-aware bridge.
*/
- if (unset_vlan_filtering && ds->vlan_filtering_is_global) {
+ if (change_vlan_filtering && ds->vlan_filtering_is_global) {
for (port = 0; port < ds->num_ports; port++) {
struct net_device *bridge_dev;
bridge_dev = dsa_to_port(ds, port)->bridge_dev;
if (bridge_dev && br_vlan_enabled(bridge_dev)) {
- unset_vlan_filtering = false;
+ change_vlan_filtering = false;
break;
}
}
}
- if (unset_vlan_filtering) {
+
+ if (change_vlan_filtering) {
err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port),
- false, &extack);
+ vlan_filtering, &extack);
if (extack._msg)
dev_err(ds->dev, "port %d: %s\n", info->port,
extack._msg);
if (err && err != EOPNOTSUPP)
return err;
}
- return 0;
+
+ return dsa_tag_8021q_bridge_leave(ds, info);
}
/* Matches for all upstream-facing ports (the CPU port and all upstream-facing
@@ -726,6 +747,12 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_MRP_DEL_RING_ROLE:
err = dsa_switch_mrp_del_ring_role(ds, info);
break;
+ case DSA_NOTIFIER_TAG_8021Q_VLAN_ADD:
+ err = dsa_switch_tag_8021q_vlan_add(ds, info);
+ break;
+ case DSA_NOTIFIER_TAG_8021Q_VLAN_DEL:
+ err = dsa_switch_tag_8021q_vlan_del(ds, info);
+ break;
default:
err = -EOPNOTSUPP;
break;
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 4aa29f90ecea..f8f7b7c34e7d 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -17,7 +17,7 @@
*
* | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
* +-----------+-----+-----------------+-----------+-----------------------+
- * | DIR | SVL | SWITCH_ID | SUBVLAN | PORT |
+ * | DIR | VBID| SWITCH_ID | VBID | PORT |
* +-----------+-----+-----------------+-----------+-----------------------+
*
* DIR - VID[11:10]:
@@ -27,24 +27,14 @@
* These values make the special VIDs of 0, 1 and 4095 to be left
* unused by this coding scheme.
*
- * SVL/SUBVLAN - { VID[9], VID[5:4] }:
- * Sub-VLAN encoding. Valid only when DIR indicates an RX VLAN.
- * * 0 (0b000): Field does not encode a sub-VLAN, either because
- * received traffic is untagged, PVID-tagged or because a second
- * VLAN tag is present after this tag and not inside of it.
- * * 1 (0b001): Received traffic is tagged with a VID value private
- * to the host. This field encodes the index in the host's lookup
- * table through which the value of the ingress VLAN ID can be
- * recovered.
- * * 2 (0b010): Field encodes a sub-VLAN.
- * ...
- * * 7 (0b111): Field encodes a sub-VLAN.
- * When DIR indicates a TX VLAN, SUBVLAN must be transmitted as zero
- * (by the host) and ignored on receive (by the switch).
- *
* SWITCH_ID - VID[8:6]:
* Index of switch within DSA tree. Must be between 0 and 7.
*
+ * VBID - { VID[9], VID[5:4] }:
+ * Virtual bridge ID. If between 1 and 7, packet targets the broadcast
+ * domain of a bridge. If transmitted as zero, packet targets a single
+ * port. Field only valid on transmit, must be ignored on receive.
+ *
* PORT - VID[3:0]:
* Index of switch port. Must be between 0 and 15.
*/
@@ -61,23 +51,30 @@
#define DSA_8021Q_SWITCH_ID(x) (((x) << DSA_8021Q_SWITCH_ID_SHIFT) & \
DSA_8021Q_SWITCH_ID_MASK)
-#define DSA_8021Q_SUBVLAN_HI_SHIFT 9
-#define DSA_8021Q_SUBVLAN_HI_MASK GENMASK(9, 9)
-#define DSA_8021Q_SUBVLAN_LO_SHIFT 4
-#define DSA_8021Q_SUBVLAN_LO_MASK GENMASK(5, 4)
-#define DSA_8021Q_SUBVLAN_HI(x) (((x) & GENMASK(2, 2)) >> 2)
-#define DSA_8021Q_SUBVLAN_LO(x) ((x) & GENMASK(1, 0))
-#define DSA_8021Q_SUBVLAN(x) \
- (((DSA_8021Q_SUBVLAN_LO(x) << DSA_8021Q_SUBVLAN_LO_SHIFT) & \
- DSA_8021Q_SUBVLAN_LO_MASK) | \
- ((DSA_8021Q_SUBVLAN_HI(x) << DSA_8021Q_SUBVLAN_HI_SHIFT) & \
- DSA_8021Q_SUBVLAN_HI_MASK))
+#define DSA_8021Q_VBID_HI_SHIFT 9
+#define DSA_8021Q_VBID_HI_MASK GENMASK(9, 9)
+#define DSA_8021Q_VBID_LO_SHIFT 4
+#define DSA_8021Q_VBID_LO_MASK GENMASK(5, 4)
+#define DSA_8021Q_VBID_HI(x) (((x) & GENMASK(2, 2)) >> 2)
+#define DSA_8021Q_VBID_LO(x) ((x) & GENMASK(1, 0))
+#define DSA_8021Q_VBID(x) \
+ (((DSA_8021Q_VBID_LO(x) << DSA_8021Q_VBID_LO_SHIFT) & \
+ DSA_8021Q_VBID_LO_MASK) | \
+ ((DSA_8021Q_VBID_HI(x) << DSA_8021Q_VBID_HI_SHIFT) & \
+ DSA_8021Q_VBID_HI_MASK))
#define DSA_8021Q_PORT_SHIFT 0
#define DSA_8021Q_PORT_MASK GENMASK(3, 0)
#define DSA_8021Q_PORT(x) (((x) << DSA_8021Q_PORT_SHIFT) & \
DSA_8021Q_PORT_MASK)
+u16 dsa_8021q_bridge_tx_fwd_offload_vid(int bridge_num)
+{
+ /* The VBID value of 0 is reserved for precise TX */
+ return DSA_8021Q_DIR_TX | DSA_8021Q_VBID(bridge_num + 1);
+}
+EXPORT_SYMBOL_GPL(dsa_8021q_bridge_tx_fwd_offload_vid);
+
/* Returns the VID to be inserted into the frame from xmit for switch steering
* instructions on egress. Encodes switch ID and port ID.
*/
@@ -98,13 +95,6 @@ u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port)
}
EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid);
-u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan)
-{
- return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(ds->index) |
- DSA_8021Q_PORT(port) | DSA_8021Q_SUBVLAN(subvlan);
-}
-EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid_subvlan);
-
/* Returns the decoded switch ID from the RX VID. */
int dsa_8021q_rx_switch_id(u16 vid)
{
@@ -119,20 +109,6 @@ int dsa_8021q_rx_source_port(u16 vid)
}
EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port);
-/* Returns the decoded subvlan from the RX VID. */
-u16 dsa_8021q_rx_subvlan(u16 vid)
-{
- u16 svl_hi, svl_lo;
-
- svl_hi = (vid & DSA_8021Q_SUBVLAN_HI_MASK) >>
- DSA_8021Q_SUBVLAN_HI_SHIFT;
- svl_lo = (vid & DSA_8021Q_SUBVLAN_LO_MASK) >>
- DSA_8021Q_SUBVLAN_LO_SHIFT;
-
- return (svl_hi << 2) | svl_lo;
-}
-EXPORT_SYMBOL_GPL(dsa_8021q_rx_subvlan);
-
bool vid_is_dsa_8021q_rxvlan(u16 vid)
{
return (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_RX;
@@ -151,21 +127,152 @@ bool vid_is_dsa_8021q(u16 vid)
}
EXPORT_SYMBOL_GPL(vid_is_dsa_8021q);
-/* If @enabled is true, installs @vid with @flags into the switch port's HW
- * filter.
- * If @enabled is false, deletes @vid (ignores @flags) from the port. Had the
- * user explicitly configured this @vid through the bridge core, then the @vid
- * is installed again, but this time with the flags from the bridge layer.
- */
-static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid,
- u16 flags, bool enabled)
+static struct dsa_tag_8021q_vlan *
+dsa_tag_8021q_vlan_find(struct dsa_8021q_context *ctx, int port, u16 vid)
{
- struct dsa_port *dp = dsa_to_port(ctx->ds, port);
+ struct dsa_tag_8021q_vlan *v;
- if (enabled)
- return ctx->ops->vlan_add(ctx->ds, dp->index, vid, flags);
+ list_for_each_entry(v, &ctx->vlans, list)
+ if (v->vid == vid && v->port == port)
+ return v;
- return ctx->ops->vlan_del(ctx->ds, dp->index, vid);
+ return NULL;
+}
+
+static int dsa_switch_do_tag_8021q_vlan_add(struct dsa_switch *ds, int port,
+ u16 vid, u16 flags)
+{
+ struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_tag_8021q_vlan *v;
+ int err;
+
+ /* No need to bother with refcounting for user ports */
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+ return ds->ops->tag_8021q_vlan_add(ds, port, vid, flags);
+
+ v = dsa_tag_8021q_vlan_find(ctx, port, vid);
+ if (v) {
+ refcount_inc(&v->refcount);
+ return 0;
+ }
+
+ v = kzalloc(sizeof(*v), GFP_KERNEL);
+ if (!v)
+ return -ENOMEM;
+
+ err = ds->ops->tag_8021q_vlan_add(ds, port, vid, flags);
+ if (err) {
+ kfree(v);
+ return err;
+ }
+
+ v->vid = vid;
+ v->port = port;
+ refcount_set(&v->refcount, 1);
+ list_add_tail(&v->list, &ctx->vlans);
+
+ return 0;
+}
+
+static int dsa_switch_do_tag_8021q_vlan_del(struct dsa_switch *ds, int port,
+ u16 vid)
+{
+ struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_tag_8021q_vlan *v;
+ int err;
+
+ /* No need to bother with refcounting for user ports */
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+ return ds->ops->tag_8021q_vlan_del(ds, port, vid);
+
+ v = dsa_tag_8021q_vlan_find(ctx, port, vid);
+ if (!v)
+ return -ENOENT;
+
+ if (!refcount_dec_and_test(&v->refcount))
+ return 0;
+
+ err = ds->ops->tag_8021q_vlan_del(ds, port, vid);
+ if (err) {
+ refcount_inc(&v->refcount);
+ return err;
+ }
+
+ list_del(&v->list);
+ kfree(v);
+
+ return 0;
+}
+
+static bool
+dsa_switch_tag_8021q_vlan_match(struct dsa_switch *ds, int port,
+ struct dsa_notifier_tag_8021q_vlan_info *info)
+{
+ if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
+ return true;
+
+ if (ds->dst->index == info->tree_index && ds->index == info->sw_index)
+ return port == info->port;
+
+ return false;
+}
+
+int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
+ struct dsa_notifier_tag_8021q_vlan_info *info)
+{
+ int port, err;
+
+ /* Since we use dsa_broadcast(), there might be other switches in other
+ * trees which don't support tag_8021q, so don't return an error.
+ * Or they might even support tag_8021q but have not registered yet to
+ * use it (maybe they use another tagger currently).
+ */
+ if (!ds->ops->tag_8021q_vlan_add || !ds->tag_8021q_ctx)
+ return 0;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) {
+ u16 flags = 0;
+
+ if (dsa_is_user_port(ds, port))
+ flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+
+ if (vid_is_dsa_8021q_rxvlan(info->vid) &&
+ dsa_8021q_rx_switch_id(info->vid) == ds->index &&
+ dsa_8021q_rx_source_port(info->vid) == port)
+ flags |= BRIDGE_VLAN_INFO_PVID;
+
+ err = dsa_switch_do_tag_8021q_vlan_add(ds, port,
+ info->vid,
+ flags);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
+ struct dsa_notifier_tag_8021q_vlan_info *info)
+{
+ int port, err;
+
+ if (!ds->ops->tag_8021q_vlan_del || !ds->tag_8021q_ctx)
+ return 0;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) {
+ err = dsa_switch_do_tag_8021q_vlan_del(ds, port,
+ info->vid);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
}
/* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single
@@ -181,12 +288,6 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid,
* force all switched traffic to pass through the CPU. So we must also make
* the other front-panel ports members of this VID we're adding, albeit
* we're not making it their PVID (they'll still have their own).
- * By the way - just because we're installing the same VID in multiple
- * switch ports doesn't mean that they'll start to talk to one another, even
- * while not bridged: the final forwarding decision is still an AND between
- * the L2 forwarding information (which is limiting forwarding in this case)
- * and the VLAN-based restrictions (of which there are none in this case,
- * since all ports are members).
* - On TX (ingress from CPU and towards network) we are faced with a problem.
* If we were to tag traffic (from within DSA) with the port's pvid, all
* would be well, assuming the switch ports were standalone. Frames would
@@ -200,9 +301,10 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid,
* a member of the VID we're tagging the traffic with - the desired one.
*
* So at the end, each front-panel port will have one RX VID (also the PVID),
- * the RX VID of all other front-panel ports, and one TX VID. Whereas the CPU
- * port will have the RX and TX VIDs of all front-panel ports, and on top of
- * that, is also tagged-input and tagged-output (VLAN trunk).
+ * the RX VID of all other front-panel ports that are in the same bridge, and
+ * one TX VID. Whereas the CPU port will have the RX and TX VIDs of all
+ * front-panel ports, and on top of that, is also tagged-input and
+ * tagged-output (VLAN trunk).
*
* CPU port CPU port
* +-------------+-----+-------------+ +-------------+-----+-------------+
@@ -220,246 +322,246 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid,
* +-+-----+-+-----+-+-----+-+-----+-+ +-+-----+-+-----+-+-----+-+-----+-+
* swp0 swp1 swp2 swp3 swp0 swp1 swp2 swp3
*/
-static int dsa_8021q_setup_port(struct dsa_8021q_context *ctx, int port,
- bool enabled)
+static bool dsa_tag_8021q_bridge_match(struct dsa_switch *ds, int port,
+ struct dsa_notifier_bridge_info *info)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+
+ /* Don't match on self */
+ if (ds->dst->index == info->tree_index &&
+ ds->index == info->sw_index &&
+ port == info->port)
+ return false;
+
+ if (dsa_port_is_user(dp))
+ return dp->bridge_dev == info->br;
+
+ return false;
+}
+
+int dsa_tag_8021q_bridge_join(struct dsa_switch *ds,
+ struct dsa_notifier_bridge_info *info)
+{
+ struct dsa_switch *targeted_ds;
+ struct dsa_port *targeted_dp;
+ u16 targeted_rx_vid;
+ int err, port;
+
+ if (!ds->tag_8021q_ctx)
+ return 0;
+
+ targeted_ds = dsa_switch_find(info->tree_index, info->sw_index);
+ targeted_dp = dsa_to_port(targeted_ds, info->port);
+ targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port);
+
+ for (port = 0; port < ds->num_ports; port++) {
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+
+ if (!dsa_tag_8021q_bridge_match(ds, port, info))
+ continue;
+
+ /* Install the RX VID of the targeted port in our VLAN table */
+ err = dsa_port_tag_8021q_vlan_add(dp, targeted_rx_vid, true);
+ if (err)
+ return err;
+
+ /* Install our RX VID into the targeted port's VLAN table */
+ err = dsa_port_tag_8021q_vlan_add(targeted_dp, rx_vid, true);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds,
+ struct dsa_notifier_bridge_info *info)
{
- int upstream = dsa_upstream_port(ctx->ds, port);
- u16 rx_vid = dsa_8021q_rx_vid(ctx->ds, port);
- u16 tx_vid = dsa_8021q_tx_vid(ctx->ds, port);
+ struct dsa_switch *targeted_ds;
+ struct dsa_port *targeted_dp;
+ u16 targeted_rx_vid;
+ int port;
+
+ if (!ds->tag_8021q_ctx)
+ return 0;
+
+ targeted_ds = dsa_switch_find(info->tree_index, info->sw_index);
+ targeted_dp = dsa_to_port(targeted_ds, info->port);
+ targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port);
+
+ for (port = 0; port < ds->num_ports; port++) {
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+
+ if (!dsa_tag_8021q_bridge_match(ds, port, info))
+ continue;
+
+ /* Remove the RX VID of the targeted port from our VLAN table */
+ dsa_port_tag_8021q_vlan_del(dp, targeted_rx_vid, true);
+
+ /* Remove our RX VID from the targeted port's VLAN table */
+ dsa_port_tag_8021q_vlan_del(targeted_dp, rx_vid, true);
+ }
+
+ return 0;
+}
+
+int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port,
+ struct net_device *br,
+ int bridge_num)
+{
+ u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num);
+
+ return dsa_port_tag_8021q_vlan_add(dsa_to_port(ds, port), tx_vid,
+ true);
+}
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_offload);
+
+void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port,
+ struct net_device *br,
+ int bridge_num)
+{
+ u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num);
+
+ dsa_port_tag_8021q_vlan_del(dsa_to_port(ds, port), tx_vid, true);
+}
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_unoffload);
+
+/* Set up a port's tag_8021q RX and TX VLAN for standalone mode operation */
+static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port)
+{
+ struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+ u16 tx_vid = dsa_8021q_tx_vid(ds, port);
struct net_device *master;
- int i, err, subvlan;
+ int err;
/* The CPU port is implicitly configured by
* configuring the front-panel ports
*/
- if (!dsa_is_user_port(ctx->ds, port))
+ if (!dsa_port_is_user(dp))
return 0;
- master = dsa_to_port(ctx->ds, port)->cpu_dp->master;
+ master = dp->cpu_dp->master;
/* Add this user port's RX VID to the membership list of all others
* (including itself). This is so that bridging will not be hindered.
* L2 forwarding rules still take precedence when there are no VLAN
* restrictions, so there are no concerns about leaking traffic.
*/
- for (i = 0; i < ctx->ds->num_ports; i++) {
- u16 flags;
-
- if (i == upstream)
- continue;
- else if (i == port)
- /* The RX VID is pvid on this port */
- flags = BRIDGE_VLAN_INFO_UNTAGGED |
- BRIDGE_VLAN_INFO_PVID;
- else
- /* The RX VID is a regular VLAN on all others */
- flags = BRIDGE_VLAN_INFO_UNTAGGED;
-
- err = dsa_8021q_vid_apply(ctx, i, rx_vid, flags, enabled);
- if (err) {
- dev_err(ctx->ds->dev,
- "Failed to apply RX VID %d to port %d: %d\n",
- rx_vid, port, err);
- return err;
- }
- }
-
- /* CPU port needs to see this port's RX VID
- * as tagged egress.
- */
- err = dsa_8021q_vid_apply(ctx, upstream, rx_vid, 0, enabled);
+ err = dsa_port_tag_8021q_vlan_add(dp, rx_vid, false);
if (err) {
- dev_err(ctx->ds->dev,
- "Failed to apply RX VID %d to port %d: %d\n",
- rx_vid, port, err);
+ dev_err(ds->dev,
+ "Failed to apply RX VID %d to port %d: %pe\n",
+ rx_vid, port, ERR_PTR(err));
return err;
}
- /* Add to the master's RX filter not only @rx_vid, but in fact
- * the entire subvlan range, just in case this DSA switch might
- * want to use sub-VLANs.
- */
- for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++) {
- u16 vid = dsa_8021q_rx_vid_subvlan(ctx->ds, port, subvlan);
-
- if (enabled)
- vlan_vid_add(master, ctx->proto, vid);
- else
- vlan_vid_del(master, ctx->proto, vid);
- }
+ /* Add @rx_vid to the master's RX filter. */
+ vlan_vid_add(master, ctx->proto, rx_vid);
/* Finally apply the TX VID on this port and on the CPU port */
- err = dsa_8021q_vid_apply(ctx, port, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED,
- enabled);
- if (err) {
- dev_err(ctx->ds->dev,
- "Failed to apply TX VID %d on port %d: %d\n",
- tx_vid, port, err);
- return err;
- }
- err = dsa_8021q_vid_apply(ctx, upstream, tx_vid, 0, enabled);
+ err = dsa_port_tag_8021q_vlan_add(dp, tx_vid, false);
if (err) {
- dev_err(ctx->ds->dev,
- "Failed to apply TX VID %d on port %d: %d\n",
- tx_vid, upstream, err);
+ dev_err(ds->dev,
+ "Failed to apply TX VID %d on port %d: %pe\n",
+ tx_vid, port, ERR_PTR(err));
return err;
}
return err;
}
-int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled)
+static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port)
{
- int rc, port;
+ struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+ u16 tx_vid = dsa_8021q_tx_vid(ds, port);
+ struct net_device *master;
- ASSERT_RTNL();
+ /* The CPU port is implicitly configured by
+ * configuring the front-panel ports
+ */
+ if (!dsa_port_is_user(dp))
+ return;
- for (port = 0; port < ctx->ds->num_ports; port++) {
- rc = dsa_8021q_setup_port(ctx, port, enabled);
- if (rc < 0) {
- dev_err(ctx->ds->dev,
- "Failed to setup VLAN tagging for port %d: %d\n",
- port, rc);
- return rc;
- }
- }
+ master = dp->cpu_dp->master;
- return 0;
-}
-EXPORT_SYMBOL_GPL(dsa_8021q_setup);
+ dsa_port_tag_8021q_vlan_del(dp, rx_vid, false);
-static int dsa_8021q_crosschip_link_apply(struct dsa_8021q_context *ctx,
- int port,
- struct dsa_8021q_context *other_ctx,
- int other_port, bool enabled)
-{
- u16 rx_vid = dsa_8021q_rx_vid(ctx->ds, port);
+ vlan_vid_del(master, ctx->proto, rx_vid);
- /* @rx_vid of local @ds port @port goes to @other_port of
- * @other_ds
- */
- return dsa_8021q_vid_apply(other_ctx, other_port, rx_vid,
- BRIDGE_VLAN_INFO_UNTAGGED, enabled);
+ dsa_port_tag_8021q_vlan_del(dp, tx_vid, false);
}
-static int dsa_8021q_crosschip_link_add(struct dsa_8021q_context *ctx, int port,
- struct dsa_8021q_context *other_ctx,
- int other_port)
+static int dsa_tag_8021q_setup(struct dsa_switch *ds)
{
- struct dsa_8021q_crosschip_link *c;
+ int err, port;
+
+ ASSERT_RTNL();
- list_for_each_entry(c, &ctx->crosschip_links, list) {
- if (c->port == port && c->other_ctx == other_ctx &&
- c->other_port == other_port) {
- refcount_inc(&c->refcount);
- return 0;
+ for (port = 0; port < ds->num_ports; port++) {
+ err = dsa_tag_8021q_port_setup(ds, port);
+ if (err < 0) {
+ dev_err(ds->dev,
+ "Failed to setup VLAN tagging for port %d: %pe\n",
+ port, ERR_PTR(err));
+ return err;
}
}
- dev_dbg(ctx->ds->dev,
- "adding crosschip link from port %d to %s port %d\n",
- port, dev_name(other_ctx->ds->dev), other_port);
-
- c = kzalloc(sizeof(*c), GFP_KERNEL);
- if (!c)
- return -ENOMEM;
-
- c->port = port;
- c->other_ctx = other_ctx;
- c->other_port = other_port;
- refcount_set(&c->refcount, 1);
-
- list_add(&c->list, &ctx->crosschip_links);
-
return 0;
}
-static void dsa_8021q_crosschip_link_del(struct dsa_8021q_context *ctx,
- struct dsa_8021q_crosschip_link *c,
- bool *keep)
+static void dsa_tag_8021q_teardown(struct dsa_switch *ds)
{
- *keep = !refcount_dec_and_test(&c->refcount);
+ int port;
- if (*keep)
- return;
-
- dev_dbg(ctx->ds->dev,
- "deleting crosschip link from port %d to %s port %d\n",
- c->port, dev_name(c->other_ctx->ds->dev), c->other_port);
+ ASSERT_RTNL();
- list_del(&c->list);
- kfree(c);
+ for (port = 0; port < ds->num_ports; port++)
+ dsa_tag_8021q_port_teardown(ds, port);
}
-/* Make traffic from local port @port be received by remote port @other_port.
- * This means that our @rx_vid needs to be installed on @other_ds's upstream
- * and user ports. The user ports should be egress-untagged so that they can
- * pop the dsa_8021q VLAN. But the @other_upstream can be either egress-tagged
- * or untagged: it doesn't matter, since it should never egress a frame having
- * our @rx_vid.
- */
-int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port,
- struct dsa_8021q_context *other_ctx,
- int other_port)
+int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto)
{
- /* @other_upstream is how @other_ds reaches us. If we are part
- * of disjoint trees, then we are probably connected through
- * our CPU ports. If we're part of the same tree though, we should
- * probably use dsa_towards_port.
- */
- int other_upstream = dsa_upstream_port(other_ctx->ds, other_port);
- int rc;
+ struct dsa_8021q_context *ctx;
- rc = dsa_8021q_crosschip_link_add(ctx, port, other_ctx, other_port);
- if (rc)
- return rc;
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
- rc = dsa_8021q_crosschip_link_apply(ctx, port, other_ctx,
- other_port, true);
- if (rc)
- return rc;
+ ctx->proto = proto;
+ ctx->ds = ds;
- rc = dsa_8021q_crosschip_link_add(ctx, port, other_ctx, other_upstream);
- if (rc)
- return rc;
+ INIT_LIST_HEAD(&ctx->vlans);
- return dsa_8021q_crosschip_link_apply(ctx, port, other_ctx,
- other_upstream, true);
+ ds->tag_8021q_ctx = ctx;
+
+ return dsa_tag_8021q_setup(ds);
}
-EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_join);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_register);
-int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port,
- struct dsa_8021q_context *other_ctx,
- int other_port)
+void dsa_tag_8021q_unregister(struct dsa_switch *ds)
{
- int other_upstream = dsa_upstream_port(other_ctx->ds, other_port);
- struct dsa_8021q_crosschip_link *c, *n;
-
- list_for_each_entry_safe(c, n, &ctx->crosschip_links, list) {
- if (c->port == port && c->other_ctx == other_ctx &&
- (c->other_port == other_port ||
- c->other_port == other_upstream)) {
- struct dsa_8021q_context *other_ctx = c->other_ctx;
- int other_port = c->other_port;
- bool keep;
- int rc;
-
- dsa_8021q_crosschip_link_del(ctx, c, &keep);
- if (keep)
- continue;
-
- rc = dsa_8021q_crosschip_link_apply(ctx, port,
- other_ctx,
- other_port,
- false);
- if (rc)
- return rc;
- }
+ struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+ struct dsa_tag_8021q_vlan *v, *n;
+
+ dsa_tag_8021q_teardown(ds);
+
+ list_for_each_entry_safe(v, n, &ctx->vlans, list) {
+ list_del(&v->list);
+ kfree(v);
}
- return 0;
+ ds->tag_8021q_ctx = NULL;
+
+ kfree(ctx);
}
-EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_leave);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_unregister);
struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
u16 tpid, u16 tci)
@@ -471,8 +573,7 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
}
EXPORT_SYMBOL_GPL(dsa_8021q_xmit);
-void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
- int *subvlan)
+void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id)
{
u16 vid, tci;
@@ -489,9 +590,6 @@ void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
*source_port = dsa_8021q_rx_source_port(vid);
*switch_id = dsa_8021q_rx_switch_id(vid);
- *subvlan = dsa_8021q_rx_subvlan(vid);
skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
}
EXPORT_SYMBOL_GPL(dsa_8021q_rcv);
-
-MODULE_LICENSE("GPL v2");
diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c
index 0efae1a372b3..8a02ac44282f 100644
--- a/net/dsa/tag_ar9331.c
+++ b/net/dsa/tag_ar9331.c
@@ -44,8 +44,7 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb,
}
static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb,
- struct net_device *ndev,
- struct packet_type *pt)
+ struct net_device *ndev)
{
u8 ver, port;
u16 hdr;
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 0750af951fc9..96dbb8ee2fee 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -99,7 +99,7 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
skb_push(skb, BRCM_TAG_LEN);
if (offset)
- memmove(skb->data, skb->data + BRCM_TAG_LEN, offset);
+ dsa_alloc_etype_header(skb, BRCM_TAG_LEN);
brcm_tag = skb->data + offset;
@@ -136,7 +136,6 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
*/
static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
struct net_device *dev,
- struct packet_type *pt,
unsigned int offset)
{
int source_port;
@@ -167,7 +166,7 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
/* Remove Broadcom tag and update checksum */
skb_pull_rcsum(skb, BRCM_TAG_LEN);
- skb->offload_fwd_mark = 1;
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
@@ -182,20 +181,16 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb,
}
-static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev)
{
struct sk_buff *nskb;
/* skb->data points to the EtherType, the tag is right before it */
- nskb = brcm_tag_rcv_ll(skb, dev, pt, 2);
+ nskb = brcm_tag_rcv_ll(skb, dev, 2);
if (!nskb)
return nskb;
- /* Move the Ethernet DA and SA */
- memmove(nskb->data - ETH_HLEN,
- nskb->data - ETH_HLEN - BRCM_TAG_LEN,
- 2 * ETH_ALEN);
+ dsa_strip_etype_header(skb, BRCM_TAG_LEN);
return nskb;
}
@@ -233,7 +228,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb,
skb_push(skb, BRCM_LEG_TAG_LEN);
- memmove(skb->data, skb->data + BRCM_LEG_TAG_LEN, 2 * ETH_ALEN);
+ dsa_alloc_etype_header(skb, BRCM_LEG_TAG_LEN);
brcm_tag = skb->data + 2 * ETH_ALEN;
@@ -251,8 +246,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb,
}
static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt)
+ struct net_device *dev)
{
int source_port;
u8 *brcm_tag;
@@ -260,7 +254,7 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, BRCM_LEG_PORT_ID)))
return NULL;
- brcm_tag = skb->data - 2;
+ brcm_tag = dsa_etype_header_pos_rx(skb);
source_port = brcm_tag[5] & BRCM_LEG_PORT_ID;
@@ -271,12 +265,9 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
/* Remove Broadcom tag and update checksum */
skb_pull_rcsum(skb, BRCM_LEG_TAG_LEN);
- skb->offload_fwd_mark = 1;
+ dsa_default_offload_fwd_mark(skb);
- /* Move the Ethernet DA and SA */
- memmove(skb->data - ETH_HLEN,
- skb->data - ETH_HLEN - BRCM_LEG_TAG_LEN,
- 2 * ETH_ALEN);
+ dsa_strip_etype_header(skb, BRCM_LEG_TAG_LEN);
return skb;
}
@@ -302,11 +293,10 @@ static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb,
}
static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt)
+ struct net_device *dev)
{
/* tag is prepended to the packet */
- return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
+ return brcm_tag_rcv_ll(skb, dev, ETH_HLEN);
}
static const struct dsa_device_ops brcm_prepend_netdev_ops = {
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index a822355afc90..77d0ce89ab77 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -126,18 +126,53 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
u8 extra)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
+ u8 tag_dev, tag_port;
+ enum dsa_cmd cmd;
u8 *dsa_header;
+ u16 pvid = 0;
+ int err;
+
+ if (skb->offload_fwd_mark) {
+ struct dsa_switch_tree *dst = dp->ds->dst;
+ struct net_device *br = dp->bridge_dev;
+
+ cmd = DSA_CMD_FORWARD;
+
+ /* When offloading forwarding for a bridge, inject FORWARD
+ * packets on behalf of a virtual switch device with an index
+ * past the physical switches.
+ */
+ tag_dev = dst->last_switch + 1 + dp->bridge_num;
+ tag_port = 0;
+
+ /* If we are offloading forwarding for a VLAN-unaware bridge,
+ * inject packets to hardware using the bridge's pvid, since
+ * that's where the packets ingressed from.
+ */
+ if (!br_vlan_enabled(br)) {
+ /* Safe because __dev_queue_xmit() runs under
+ * rcu_read_lock_bh()
+ */
+ err = br_vlan_get_pvid_rcu(br, &pvid);
+ if (err)
+ return NULL;
+ }
+ } else {
+ cmd = DSA_CMD_FROM_CPU;
+ tag_dev = dp->ds->index;
+ tag_port = dp->index;
+ }
if (skb->protocol == htons(ETH_P_8021Q)) {
if (extra) {
skb_push(skb, extra);
- memmove(skb->data, skb->data + extra, 2 * ETH_ALEN);
+ dsa_alloc_etype_header(skb, extra);
}
- /* Construct tagged FROM_CPU DSA tag from 802.1Q tag. */
- dsa_header = skb->data + 2 * ETH_ALEN + extra;
- dsa_header[0] = (DSA_CMD_FROM_CPU << 6) | 0x20 | dp->ds->index;
- dsa_header[1] = dp->index << 3;
+ /* Construct tagged DSA tag from 802.1Q tag. */
+ dsa_header = dsa_etype_header_pos_tx(skb) + extra;
+ dsa_header[0] = (cmd << 6) | 0x20 | tag_dev;
+ dsa_header[1] = tag_port << 3;
/* Move CFI field from byte 2 to byte 1. */
if (dsa_header[2] & 0x10) {
@@ -146,14 +181,15 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
}
} else {
skb_push(skb, DSA_HLEN + extra);
- memmove(skb->data, skb->data + DSA_HLEN + extra, 2 * ETH_ALEN);
-
- /* Construct untagged FROM_CPU DSA tag. */
- dsa_header = skb->data + 2 * ETH_ALEN + extra;
- dsa_header[0] = (DSA_CMD_FROM_CPU << 6) | dp->ds->index;
- dsa_header[1] = dp->index << 3;
- dsa_header[2] = 0x00;
- dsa_header[3] = 0x00;
+ dsa_alloc_etype_header(skb, DSA_HLEN + extra);
+
+ /* Construct untagged DSA tag. */
+ dsa_header = dsa_etype_header_pos_tx(skb) + extra;
+
+ dsa_header[0] = (cmd << 6) | tag_dev;
+ dsa_header[1] = tag_port << 3;
+ dsa_header[2] = pvid >> 8;
+ dsa_header[3] = pvid & 0xff;
}
return skb;
@@ -162,20 +198,18 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
u8 extra)
{
+ bool trap = false, trunk = false;
int source_device, source_port;
- bool trunk = false;
enum dsa_code code;
enum dsa_cmd cmd;
u8 *dsa_header;
/* The ethertype field is part of the DSA header. */
- dsa_header = skb->data - 2;
+ dsa_header = dsa_etype_header_pos_rx(skb);
cmd = dsa_header[0] >> 6;
switch (cmd) {
case DSA_CMD_FORWARD:
- skb->offload_fwd_mark = 1;
-
trunk = !!(dsa_header[1] & 7);
break;
@@ -194,7 +228,6 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
* device (like a bridge) that forwarding has
* already been done by hardware.
*/
- skb->offload_fwd_mark = 1;
break;
case DSA_CODE_MGMT_TRAP:
case DSA_CODE_IGMP_MLD_TRAP:
@@ -202,6 +235,7 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
/* Traps have, by definition, not been
* forwarded by hardware, so don't mark them.
*/
+ trap = true;
break;
default:
/* Reserved code, this could be anything. Drop
@@ -235,6 +269,15 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
if (!skb->dev)
return NULL;
+ /* When using LAG offload, skb->dev is not a DSA slave interface,
+ * so we cannot call dsa_default_offload_fwd_mark and we need to
+ * special-case it.
+ */
+ if (trunk)
+ skb->offload_fwd_mark = true;
+ else if (!trap)
+ dsa_default_offload_fwd_mark(skb);
+
/* If the 'tagged' bit is set; convert the DSA tag to a 802.1Q
* tag, and delete the ethertype (extra) if applicable. If the
* 'tagged' bit is cleared; delete the DSA tag, and ethertype
@@ -269,14 +312,10 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
memcpy(dsa_header, new_header, DSA_HLEN);
if (extra)
- memmove(skb->data - ETH_HLEN,
- skb->data - ETH_HLEN - extra,
- 2 * ETH_ALEN);
+ dsa_strip_etype_header(skb, extra);
} else {
skb_pull_rcsum(skb, DSA_HLEN);
- memmove(skb->data - ETH_HLEN,
- skb->data - ETH_HLEN - DSA_HLEN - extra,
- 2 * ETH_ALEN);
+ dsa_strip_etype_header(skb, DSA_HLEN + extra);
}
return skb;
@@ -289,8 +328,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
return dsa_xmit_ll(skb, dev, 0);
}
-static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev)
{
if (unlikely(!pskb_may_pull(skb, DSA_HLEN)))
return NULL;
@@ -322,7 +360,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
if (!skb)
return NULL;
- edsa_header = skb->data + 2 * ETH_ALEN;
+ edsa_header = dsa_etype_header_pos_tx(skb);
edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff;
edsa_header[1] = ETH_P_EDSA & 0xff;
edsa_header[2] = 0x00;
@@ -330,8 +368,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
return skb;
}
-static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev)
{
if (unlikely(!pskb_may_pull(skb, EDSA_HLEN)))
return NULL;
diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c
index 5985dab06ab8..df7140984da3 100644
--- a/net/dsa/tag_gswip.c
+++ b/net/dsa/tag_gswip.c
@@ -75,8 +75,7 @@ static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb,
}
static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt)
+ struct net_device *dev)
{
int port;
u8 *gswip_tag;
diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c
index 424130f85f59..f64b805303cd 100644
--- a/net/dsa/tag_hellcreek.c
+++ b/net/dsa/tag_hellcreek.c
@@ -29,8 +29,7 @@ static struct sk_buff *hellcreek_xmit(struct sk_buff *skb,
}
static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt)
+ struct net_device *dev)
{
/* Tag decoding */
u8 *tag = skb_tail_pointer(skb) - HELLCREEK_TAG_LEN;
@@ -44,7 +43,7 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN);
- skb->offload_fwd_mark = true;
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index a201ccf2435d..fa1d60d13ad9 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -24,7 +24,7 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
pskb_trim_rcsum(skb, skb->len - len);
- skb->offload_fwd_mark = true;
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
@@ -67,8 +67,7 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev)
return skb;
}
-static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev)
{
u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
@@ -134,8 +133,7 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
return skb;
}
-static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev)
{
/* Tag decoding */
u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 26207ef39ebc..cb548188f813 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -62,9 +62,10 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
skb_push(skb, LAN9303_TAG_LEN);
/* make room between MACs and Ether-Type */
- memmove(skb->data, skb->data + LAN9303_TAG_LEN, 2 * ETH_ALEN);
+ dsa_alloc_etype_header(skb, LAN9303_TAG_LEN);
+
+ lan9303_tag = dsa_etype_header_pos_tx(skb);
- lan9303_tag = (__be16 *)(skb->data + 2 * ETH_ALEN);
tag = lan9303_xmit_use_arl(dp, skb->data) ?
LAN9303_TAG_TX_USE_ALR :
dp->index | LAN9303_TAG_TX_STP_OVERRIDE;
@@ -74,8 +75,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
return skb;
}
-static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
{
__be16 *lan9303_tag;
u16 lan9303_tag1;
@@ -87,13 +87,7 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
return NULL;
}
- /* '->data' points into the middle of our special VLAN tag information:
- *
- * ~ MAC src | 0x81 | 0x00 | 0xyy | 0xzz | ether type
- * ^
- * ->data
- */
- lan9303_tag = (__be16 *)(skb->data - 2);
+ lan9303_tag = dsa_etype_header_pos_rx(skb);
if (lan9303_tag[0] != htons(ETH_P_8021Q)) {
dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid VLAN marker\n");
@@ -113,9 +107,11 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
* and the current ethertype field.
*/
skb_pull_rcsum(skb, 2 + 2);
- memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN),
- 2 * ETH_ALEN);
- skb->offload_fwd_mark = !(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU);
+
+ dsa_strip_etype_header(skb, LAN9303_TAG_LEN);
+
+ if (!(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU))
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index cc3ba864ad5b..415d8ece242a 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -41,10 +41,10 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
default:
xmit_tpid = MTK_HDR_XMIT_UNTAGGED;
skb_push(skb, MTK_HDR_LEN);
- memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
+ dsa_alloc_etype_header(skb, MTK_HDR_LEN);
}
- mtk_tag = skb->data + 2 * ETH_ALEN;
+ mtk_tag = dsa_etype_header_pos_tx(skb);
/* Mark tag attribute on special tag insertion to notify hardware
* whether that's a combined special tag with 802.1Q header.
@@ -61,8 +61,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
return skb;
}
-static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev)
{
u16 hdr;
int port;
@@ -71,19 +70,13 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN)))
return NULL;
- /* The MTK header is added by the switch between src addr
- * and ethertype at this point, skb->data points to 2 bytes
- * after src addr so header should be 2 bytes right before.
- */
- phdr = (__be16 *)(skb->data - 2);
+ phdr = dsa_etype_header_pos_rx(skb);
hdr = ntohs(*phdr);
/* Remove MTK tag and recalculate checksum. */
skb_pull_rcsum(skb, MTK_HDR_LEN);
- memmove(skb->data - ETH_HLEN,
- skb->data - ETH_HLEN - MTK_HDR_LEN,
- 2 * ETH_ALEN);
+ dsa_strip_etype_header(skb, MTK_HDR_LEN);
/* Get source port information */
port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK);
@@ -92,7 +85,7 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
if (!skb->dev)
return NULL;
- skb->offload_fwd_mark = 1;
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index 190f4bfd3bef..d37ab98e7fe1 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -55,8 +55,7 @@ static struct sk_buff *seville_xmit(struct sk_buff *skb,
}
static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
- struct net_device *netdev,
- struct packet_type *pt)
+ struct net_device *netdev)
{
u64 src_port, qos_class;
u64 vlan_tci, tag_type;
@@ -104,7 +103,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
*/
return NULL;
- skb->offload_fwd_mark = 1;
+ dsa_default_offload_fwd_mark(skb);
skb->priority = qos_class;
/* Ocelot switches copy frames unmodified to the CPU. However, it is
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 85ac85c3af8c..3038a257ba05 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -38,18 +38,17 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
}
static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
- struct net_device *netdev,
- struct packet_type *pt)
+ struct net_device *netdev)
{
- int src_port, switch_id, subvlan;
+ int src_port, switch_id;
- dsa_8021q_rcv(skb, &src_port, &switch_id, &subvlan);
+ dsa_8021q_rcv(skb, &src_port, &switch_id);
skb->dev = dsa_master_find_slave(netdev, switch_id, src_port);
if (!skb->dev)
return NULL;
- skb->offload_fwd_mark = 1;
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 693bda013065..1ea9401b8ace 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -36,8 +36,8 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
skb_push(skb, QCA_HDR_LEN);
- memmove(skb->data, skb->data + QCA_HDR_LEN, 2 * ETH_ALEN);
- phdr = (__be16 *)(skb->data + 2 * ETH_ALEN);
+ dsa_alloc_etype_header(skb, QCA_HDR_LEN);
+ phdr = dsa_etype_header_pos_tx(skb);
/* Set the version field, and set destination port information */
hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S |
@@ -48,8 +48,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
return skb;
}
-static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
{
u8 ver;
u16 hdr;
@@ -59,11 +58,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN)))
return NULL;
- /* The QCA header is added by the switch between src addr and Ethertype
- * At this point, skb->data points to ethertype so header should be
- * right before
- */
- phdr = (__be16 *)(skb->data - 2);
+ phdr = dsa_etype_header_pos_rx(skb);
hdr = ntohs(*phdr);
/* Make sure the version is correct */
@@ -73,8 +68,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
/* Remove QCA tag and recalculate checksum */
skb_pull_rcsum(skb, QCA_HDR_LEN);
- memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN,
- ETH_HLEN - QCA_HDR_LEN);
+ dsa_strip_etype_header(skb, QCA_HDR_LEN);
/* Get source port information */
port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index 57c46b4ab2b3..40811bab4d09 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -47,8 +47,8 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
dp->index);
skb_push(skb, RTL4_A_HDR_LEN);
- memmove(skb->data, skb->data + RTL4_A_HDR_LEN, 2 * ETH_ALEN);
- tag = skb->data + 2 * ETH_ALEN;
+ dsa_alloc_etype_header(skb, RTL4_A_HDR_LEN);
+ tag = dsa_etype_header_pos_tx(skb);
/* Set Ethertype */
p = (__be16 *)tag;
@@ -64,8 +64,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
}
static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt)
+ struct net_device *dev)
{
u16 protport;
__be16 *p;
@@ -77,12 +76,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, RTL4_A_HDR_LEN)))
return NULL;
- /* The RTL4 header has its own custom Ethertype 0x8899 and that
- * starts right at the beginning of the packet, after the src
- * ethernet addr. Apparently skb->data always points 2 bytes in,
- * behind the Ethertype.
- */
- tag = skb->data - 2;
+ tag = dsa_etype_header_pos_rx(skb);
p = (__be16 *)tag;
etype = ntohs(*p);
if (etype != RTL4_A_ETHERTYPE) {
@@ -109,12 +103,9 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
/* Remove RTL4 tag and recalculate checksum */
skb_pull_rcsum(skb, RTL4_A_HDR_LEN);
- /* Move ethernet DA and SA in front of the data */
- memmove(skb->data - ETH_HLEN,
- skb->data - ETH_HLEN - RTL4_A_HDR_LEN,
- 2 * ETH_ALEN);
+ dsa_strip_etype_header(skb, RTL4_A_HDR_LEN);
- skb->offload_fwd_mark = 1;
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 9c2df9ece01b..c054f48541c8 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -115,56 +115,117 @@ static inline bool sja1105_is_meta_frame(const struct sk_buff *skb)
return true;
}
-static bool sja1105_can_use_vlan_as_tags(const struct sk_buff *skb)
+/* Calls sja1105_port_deferred_xmit in sja1105_main.c */
+static struct sk_buff *sja1105_defer_xmit(struct dsa_port *dp,
+ struct sk_buff *skb)
{
- struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
- u16 vlan_tci;
-
- if (hdr->h_vlan_proto == htons(ETH_P_SJA1105))
- return true;
+ struct sja1105_port *sp = dp->priv;
- if (hdr->h_vlan_proto != htons(ETH_P_8021Q) &&
- !skb_vlan_tag_present(skb))
- return false;
+ if (!dsa_port_is_sja1105(dp))
+ return skb;
- if (skb_vlan_tag_present(skb))
- vlan_tci = skb_vlan_tag_get(skb);
- else
- vlan_tci = ntohs(hdr->h_vlan_TCI);
+ /* Increase refcount so the kfree_skb in dsa_slave_xmit
+ * won't really free the packet.
+ */
+ skb_queue_tail(&sp->xmit_queue, skb_get(skb));
+ kthread_queue_work(sp->xmit_worker, &sp->xmit_work);
- return vid_is_dsa_8021q(vlan_tci & VLAN_VID_MASK);
+ return NULL;
}
-/* This is the first time the tagger sees the frame on RX.
- * Figure out if we can decode it.
+/* Send VLAN tags with a TPID that blends in with whatever VLAN protocol a
+ * bridge spanning ports of this switch might have.
*/
-static bool sja1105_filter(const struct sk_buff *skb, struct net_device *dev)
+static u16 sja1105_xmit_tpid(struct dsa_port *dp)
{
- if (sja1105_can_use_vlan_as_tags(skb))
- return true;
- if (sja1105_is_link_local(skb))
- return true;
- if (sja1105_is_meta_frame(skb))
- return true;
- return false;
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_port *other_dp;
+ u16 proto;
+
+ /* Since VLAN awareness is global, then if this port is VLAN-unaware,
+ * all ports are. Use the VLAN-unaware TPID used for tag_8021q.
+ */
+ if (!dsa_port_is_vlan_filtering(dp))
+ return ETH_P_SJA1105;
+
+ /* Port is VLAN-aware, so there is a bridge somewhere (a single one,
+ * we're sure about that). It may not be on this port though, so we
+ * need to find it.
+ */
+ list_for_each_entry(other_dp, &ds->dst->ports, list) {
+ if (other_dp->ds != ds)
+ continue;
+
+ if (!other_dp->bridge_dev)
+ continue;
+
+ /* Error is returned only if CONFIG_BRIDGE_VLAN_FILTERING,
+ * which seems pointless to handle, as our port cannot become
+ * VLAN-aware in that case.
+ */
+ br_vlan_get_proto(other_dp->bridge_dev, &proto);
+
+ return proto;
+ }
+
+ WARN_ONCE(1, "Port is VLAN-aware but cannot find associated bridge!\n");
+
+ return ETH_P_SJA1105;
}
-/* Calls sja1105_port_deferred_xmit in sja1105_main.c */
-static struct sk_buff *sja1105_defer_xmit(struct sja1105_port *sp,
- struct sk_buff *skb)
+static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
{
- /* Increase refcount so the kfree_skb in dsa_slave_xmit
- * won't really free the packet.
+ struct dsa_port *dp = dsa_slave_to_port(netdev);
+ struct net_device *br = dp->bridge_dev;
+ u16 tx_vid;
+
+ /* If the port is under a VLAN-aware bridge, just slide the
+ * VLAN-tagged packet into the FDB and hope for the best.
+ * This works because we support a single VLAN-aware bridge
+ * across the entire dst, and its VLANs cannot be shared with
+ * any standalone port.
*/
- skb_queue_tail(&sp->xmit_queue, skb_get(skb));
- kthread_queue_work(sp->xmit_worker, &sp->xmit_work);
+ if (br_vlan_enabled(br))
+ return skb;
- return NULL;
+ /* If the port is under a VLAN-unaware bridge, use an imprecise
+ * TX VLAN that targets the bridge's entire broadcast domain,
+ * instead of just the specific port.
+ */
+ tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(dp->bridge_num);
+
+ return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp), tx_vid);
}
-static u16 sja1105_xmit_tpid(struct sja1105_port *sp)
+/* Transform untagged control packets into pvid-tagged control packets so that
+ * all packets sent by this tagger are VLAN-tagged and we can configure the
+ * switch to drop untagged packets coming from the DSA master.
+ */
+static struct sk_buff *sja1105_pvid_tag_control_pkt(struct dsa_port *dp,
+ struct sk_buff *skb, u8 pcp)
{
- return sp->xmit_tpid;
+ __be16 xmit_tpid = htons(sja1105_xmit_tpid(dp));
+ struct vlan_ethhdr *hdr;
+
+ /* If VLAN tag is in hwaccel area, move it to the payload
+ * to deal with both cases uniformly and to ensure that
+ * the VLANs are added in the right order.
+ */
+ if (unlikely(skb_vlan_tag_present(skb))) {
+ skb = __vlan_hwaccel_push_inside(skb);
+ if (!skb)
+ return NULL;
+ }
+
+ hdr = (struct vlan_ethhdr *)skb_mac_header(skb);
+
+ /* If skb is already VLAN-tagged, leave that VLAN ID in place */
+ if (hdr->h_vlan_proto == xmit_tpid)
+ return skb;
+
+ return vlan_insert_tag(skb, xmit_tpid, (pcp << VLAN_PRIO_SHIFT) |
+ SJA1105_DEFAULT_VLAN);
}
static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
@@ -175,14 +236,22 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
+ if (skb->offload_fwd_mark)
+ return sja1105_imprecise_xmit(skb, netdev);
+
/* Transmitting management traffic does not rely upon switch tagging,
* but instead SPI-installed management routes. Part 2 of this
* is the .port_deferred_xmit driver callback.
*/
- if (unlikely(sja1105_is_link_local(skb)))
- return sja1105_defer_xmit(dp->priv, skb);
+ if (unlikely(sja1105_is_link_local(skb))) {
+ skb = sja1105_pvid_tag_control_pkt(dp, skb, pcp);
+ if (!skb)
+ return NULL;
+
+ return sja1105_defer_xmit(dp, skb);
+ }
- return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp->priv),
+ return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp),
((pcp << VLAN_PRIO_SHIFT) | tx_vid));
}
@@ -194,43 +263,45 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb,
u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
- struct ethhdr *eth_hdr;
__be32 *tx_trailer;
__be16 *tx_header;
int trailer_pos;
+ if (skb->offload_fwd_mark)
+ return sja1105_imprecise_xmit(skb, netdev);
+
/* Transmitting control packets is done using in-band control
* extensions, while data packets are transmitted using
* tag_8021q TX VLANs.
*/
if (likely(!sja1105_is_link_local(skb)))
- return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp->priv),
+ return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp),
((pcp << VLAN_PRIO_SHIFT) | tx_vid));
+ skb = sja1105_pvid_tag_control_pkt(dp, skb, pcp);
+ if (!skb)
+ return NULL;
+
skb_push(skb, SJA1110_HEADER_LEN);
- /* Move Ethernet header to the left, making space for DSA tag */
- memmove(skb->data, skb->data + SJA1110_HEADER_LEN, 2 * ETH_ALEN);
+ dsa_alloc_etype_header(skb, SJA1110_HEADER_LEN);
trailer_pos = skb->len;
- /* On TX, skb->data points to skb_mac_header(skb) */
- eth_hdr = (struct ethhdr *)skb->data;
- tx_header = (__be16 *)(eth_hdr + 1);
+ tx_header = dsa_etype_header_pos_tx(skb);
tx_trailer = skb_put(skb, SJA1110_TX_TRAILER_LEN);
- eth_hdr->h_proto = htons(ETH_P_SJA1110);
-
- *tx_header = htons(SJA1110_HEADER_HOST_TO_SWITCH |
- SJA1110_TX_HEADER_HAS_TRAILER |
- SJA1110_TX_HEADER_TRAILER_POS(trailer_pos));
+ tx_header[0] = htons(ETH_P_SJA1110);
+ tx_header[1] = htons(SJA1110_HEADER_HOST_TO_SWITCH |
+ SJA1110_TX_HEADER_HAS_TRAILER |
+ SJA1110_TX_HEADER_TRAILER_POS(trailer_pos));
*tx_trailer = cpu_to_be32(SJA1110_TX_TRAILER_PRIO(pcp) |
SJA1110_TX_TRAILER_SWITCHID(dp->ds->index) |
SJA1110_TX_TRAILER_DESTPORTS(BIT(dp->index)));
if (clone) {
u8 ts_id = SJA1105_SKB_CB(clone)->ts_id;
- *tx_header |= htons(SJA1110_TX_HEADER_TAKE_TS);
+ tx_header[1] |= htons(SJA1110_TX_HEADER_TAKE_TS);
*tx_trailer |= cpu_to_be32(SJA1110_TX_TRAILER_TSTAMP_ID(ts_id));
}
@@ -273,16 +344,16 @@ static struct sk_buff
bool is_link_local,
bool is_meta)
{
- struct sja1105_port *sp;
- struct dsa_port *dp;
-
- dp = dsa_slave_to_port(skb->dev);
- sp = dp->priv;
-
/* Step 1: A timestampable frame was received.
* Buffer it until we get its meta frame.
*/
if (is_link_local) {
+ struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+ struct sja1105_port *sp = dp->priv;
+
+ if (unlikely(!dsa_port_is_sja1105(dp)))
+ return skb;
+
if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state))
/* Do normal processing. */
return skb;
@@ -315,8 +386,13 @@ static struct sk_buff
* frame, which serves no further purpose).
*/
} else if (is_meta) {
+ struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+ struct sja1105_port *sp = dp->priv;
struct sk_buff *stampable_skb;
+ if (unlikely(!dsa_port_is_sja1105(dp)))
+ return skb;
+
/* Drop the meta frame if we're not in the right state
* to process it.
*/
@@ -358,20 +434,6 @@ static struct sk_buff
return skb;
}
-static void sja1105_decode_subvlan(struct sk_buff *skb, u16 subvlan)
-{
- struct dsa_port *dp = dsa_slave_to_port(skb->dev);
- struct sja1105_port *sp = dp->priv;
- u16 vid = sp->subvlan_map[subvlan];
- u16 vlan_tci;
-
- if (vid == VLAN_N_VID)
- return;
-
- vlan_tci = (skb->priority << VLAN_PRIO_SHIFT) | vid;
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
-}
-
static bool sja1105_skb_has_tag_8021q(const struct sk_buff *skb)
{
u16 tpid = ntohs(eth_hdr(skb)->h_proto);
@@ -385,25 +447,45 @@ static bool sja1110_skb_has_inband_control_extension(const struct sk_buff *skb)
return ntohs(eth_hdr(skb)->h_proto) == ETH_P_SJA1110;
}
+/* If the VLAN in the packet is a tag_8021q one, set @source_port and
+ * @switch_id and strip the header. Otherwise set @vid and keep it in the
+ * packet.
+ */
+static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port,
+ int *switch_id, u16 *vid)
+{
+ struct vlan_ethhdr *hdr = (struct vlan_ethhdr *)skb_mac_header(skb);
+ u16 vlan_tci;
+
+ if (skb_vlan_tag_present(skb))
+ vlan_tci = skb_vlan_tag_get(skb);
+ else
+ vlan_tci = ntohs(hdr->h_vlan_TCI);
+
+ if (vid_is_dsa_8021q_rxvlan(vlan_tci & VLAN_VID_MASK))
+ return dsa_8021q_rcv(skb, source_port, switch_id);
+
+ /* Try our best with imprecise RX */
+ *vid = vlan_tci & VLAN_VID_MASK;
+}
+
static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
- struct net_device *netdev,
- struct packet_type *pt)
+ struct net_device *netdev)
{
- int source_port, switch_id, subvlan = 0;
+ int source_port = -1, switch_id = -1;
struct sja1105_meta meta = {0};
struct ethhdr *hdr;
bool is_link_local;
bool is_meta;
+ u16 vid;
hdr = eth_hdr(skb);
is_link_local = sja1105_is_link_local(skb);
is_meta = sja1105_is_meta_frame(skb);
- skb->offload_fwd_mark = 1;
-
if (sja1105_skb_has_tag_8021q(skb)) {
/* Normal traffic path. */
- dsa_8021q_rcv(skb, &source_port, &switch_id, &subvlan);
+ sja1105_vlan_rcv(skb, &source_port, &switch_id, &vid);
} else if (is_link_local) {
/* Management traffic path. Switch embeds the switch ID and
* port ID into bytes of the destination MAC, courtesy of
@@ -422,14 +504,17 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
return NULL;
}
- skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
+ if (source_port == -1 || switch_id == -1)
+ skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
+ else
+ skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
if (!skb->dev) {
netdev_warn(netdev, "Couldn't decode source port\n");
return NULL;
}
- if (subvlan)
- sja1105_decode_subvlan(skb, subvlan);
+ if (!is_link_local)
+ dsa_default_offload_fwd_mark(skb);
return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local,
is_meta);
@@ -437,11 +522,11 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header)
{
+ u8 *buf = dsa_etype_header_pos_rx(skb) + SJA1110_HEADER_LEN;
int switch_id = SJA1110_RX_HEADER_SWITCH_ID(rx_header);
int n_ts = SJA1110_RX_HEADER_N_TS(rx_header);
struct net_device *master = skb->dev;
struct dsa_port *cpu_dp;
- u8 *buf = skb->data + 2;
struct dsa_switch *ds;
int i;
@@ -474,7 +559,8 @@ static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header)
static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
int *source_port,
- int *switch_id)
+ int *switch_id,
+ bool *host_only)
{
u16 rx_header;
@@ -488,6 +574,9 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
*/
rx_header = ntohs(*(__be16 *)skb->data);
+ if (rx_header & SJA1110_RX_HEADER_HOST_ONLY)
+ *host_only = true;
+
if (rx_header & SJA1110_RX_HEADER_IS_METADATA)
return sja1110_rcv_meta(skb, rx_header);
@@ -522,9 +611,7 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
/* Advance skb->data past the DSA header */
skb_pull_rcsum(skb, SJA1110_HEADER_LEN);
- /* Remove the DSA header */
- memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - SJA1110_HEADER_LEN,
- 2 * ETH_ALEN);
+ dsa_strip_etype_header(skb, SJA1110_HEADER_LEN);
/* With skb->data in its final place, update the MAC header
* so that eth_hdr() continues to works properly.
@@ -535,34 +622,35 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
}
static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
- struct net_device *netdev,
- struct packet_type *pt)
+ struct net_device *netdev)
{
- int source_port = -1, switch_id = -1, subvlan = 0;
-
- skb->offload_fwd_mark = 1;
+ int source_port = -1, switch_id = -1;
+ bool host_only = false;
+ u16 vid = 0;
if (sja1110_skb_has_inband_control_extension(skb)) {
skb = sja1110_rcv_inband_control_extension(skb, &source_port,
- &switch_id);
+ &switch_id,
+ &host_only);
if (!skb)
return NULL;
}
/* Packets with in-band control extensions might still have RX VLANs */
if (likely(sja1105_skb_has_tag_8021q(skb)))
- dsa_8021q_rcv(skb, &source_port, &switch_id, &subvlan);
+ sja1105_vlan_rcv(skb, &source_port, &switch_id, &vid);
- skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
+ if (source_port == -1 || switch_id == -1)
+ skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
+ else
+ skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
if (!skb->dev) {
- netdev_warn(netdev,
- "Couldn't decode source port %d and switch id %d\n",
- source_port, switch_id);
+ netdev_warn(netdev, "Couldn't decode source port\n");
return NULL;
}
- if (subvlan)
- sja1105_decode_subvlan(skb, subvlan);
+ if (!host_only)
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
@@ -596,7 +684,6 @@ static const struct dsa_device_ops sja1105_netdev_ops = {
.proto = DSA_TAG_PROTO_SJA1105,
.xmit = sja1105_xmit,
.rcv = sja1105_rcv,
- .filter = sja1105_filter,
.needed_headroom = VLAN_HLEN,
.flow_dissect = sja1105_flow_dissect,
.promisc_on_master = true,
@@ -610,7 +697,6 @@ static const struct dsa_device_ops sja1110_netdev_ops = {
.proto = DSA_TAG_PROTO_SJA1110,
.xmit = sja1110_xmit,
.rcv = sja1110_rcv,
- .filter = sja1105_filter,
.flow_dissect = sja1110_flow_dissect,
.needed_headroom = SJA1110_HEADER_LEN + VLAN_HLEN,
.needed_tailroom = SJA1110_RX_TRAILER_LEN + SJA1110_MAX_PADDING_LEN,
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index ba73804340a5..5749ba85c2b8 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -24,8 +24,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
return skb;
}
-static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev)
{
u8 *trailer;
int source_port;
diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c
index a31ff7fcb45f..ff442b8af636 100644
--- a/net/dsa/tag_xrs700x.c
+++ b/net/dsa/tag_xrs700x.c
@@ -25,8 +25,7 @@ static struct sk_buff *xrs700x_xmit(struct sk_buff *skb, struct net_device *dev)
return skb;
}
-static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev)
{
int source_port;
u8 *trailer;
@@ -46,7 +45,7 @@ static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev,
return NULL;
/* Frame is forwarded by hardware, don't forward in software. */
- skb->offload_fwd_mark = 1;
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 9cce612e8976..73fce9467467 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -62,8 +62,6 @@
#include <linux/uaccess.h>
#include <net/pkt_sched.h>
-__setup("ether=", netdev_boot_setup);
-
/**
* eth_header - create the Ethernet header
* @skb: buffer to alter
@@ -182,12 +180,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
* at all, so we check here whether one of those tagging
* variants has been configured on the receiving interface,
* and if so, set skb->protocol without looking at the packet.
- * The DSA tagging protocol may be able to decode some but not all
- * traffic (for example only for management). In that case give it the
- * option to filter the packets from which it can decode source port
- * information.
*/
- if (unlikely(netdev_uses_dsa(dev)) && dsa_can_decode(skb, dev))
+ if (unlikely(netdev_uses_dsa(dev)))
return htons(ETH_P_XDSA);
if (likely(eth_proto_is_802_3(eth->h_proto)))
diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c
index 1d6bc132aa4d..46776ea42a92 100644
--- a/net/ethtool/coalesce.c
+++ b/net/ethtool/coalesce.c
@@ -10,6 +10,7 @@ struct coalesce_req_info {
struct coalesce_reply_data {
struct ethnl_reply_data base;
struct ethtool_coalesce coalesce;
+ struct kernel_ethtool_coalesce kernel_coalesce;
u32 supported_params;
};
@@ -61,6 +62,7 @@ static int coalesce_prepare_data(const struct ethnl_req_info *req_base,
struct genl_info *info)
{
struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base);
+ struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct net_device *dev = reply_base->dev;
int ret;
@@ -70,7 +72,8 @@ static int coalesce_prepare_data(const struct ethnl_req_info *req_base,
ret = ethnl_ops_begin(dev);
if (ret < 0)
return ret;
- ret = dev->ethtool_ops->get_coalesce(dev, &data->coalesce);
+ ret = dev->ethtool_ops->get_coalesce(dev, &data->coalesce,
+ &data->kernel_coalesce, extack);
ethnl_ops_complete(dev);
return ret;
@@ -100,7 +103,9 @@ static int coalesce_reply_size(const struct ethnl_req_info *req_base,
nla_total_size(sizeof(u32)) + /* _RX_MAX_FRAMES_HIGH */
nla_total_size(sizeof(u32)) + /* _TX_USECS_HIGH */
nla_total_size(sizeof(u32)) + /* _TX_MAX_FRAMES_HIGH */
- nla_total_size(sizeof(u32)); /* _RATE_SAMPLE_INTERVAL */
+ nla_total_size(sizeof(u32)) + /* _RATE_SAMPLE_INTERVAL */
+ nla_total_size(sizeof(u8)) + /* _USE_CQE_MODE_TX */
+ nla_total_size(sizeof(u8)); /* _USE_CQE_MODE_RX */
}
static bool coalesce_put_u32(struct sk_buff *skb, u16 attr_type, u32 val,
@@ -124,6 +129,7 @@ static int coalesce_fill_reply(struct sk_buff *skb,
const struct ethnl_reply_data *reply_base)
{
const struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base);
+ const struct kernel_ethtool_coalesce *kcoal = &data->kernel_coalesce;
const struct ethtool_coalesce *coal = &data->coalesce;
u32 supported = data->supported_params;
@@ -170,7 +176,11 @@ static int coalesce_fill_reply(struct sk_buff *skb,
coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH,
coal->tx_max_coalesced_frames_high, supported) ||
coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL,
- coal->rate_sample_interval, supported))
+ coal->rate_sample_interval, supported) ||
+ coalesce_put_bool(skb, ETHTOOL_A_COALESCE_USE_CQE_MODE_TX,
+ kcoal->use_cqe_mode_tx, supported) ||
+ coalesce_put_bool(skb, ETHTOOL_A_COALESCE_USE_CQE_MODE_RX,
+ kcoal->use_cqe_mode_rx, supported))
return -EMSGSIZE;
return 0;
@@ -215,10 +225,13 @@ const struct nla_policy ethnl_coalesce_set_policy[] = {
[ETHTOOL_A_COALESCE_TX_USECS_HIGH] = { .type = NLA_U32 },
[ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH] = { .type = NLA_U32 },
[ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL] = { .type = NLA_U32 },
+ [ETHTOOL_A_COALESCE_USE_CQE_MODE_TX] = NLA_POLICY_MAX(NLA_U8, 1),
+ [ETHTOOL_A_COALESCE_USE_CQE_MODE_RX] = NLA_POLICY_MAX(NLA_U8, 1),
};
int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info)
{
+ struct kernel_ethtool_coalesce kernel_coalesce = {};
struct ethtool_coalesce coalesce = {};
struct ethnl_req_info req_info = {};
struct nlattr **tb = info->attrs;
@@ -255,7 +268,8 @@ int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info)
ret = ethnl_ops_begin(dev);
if (ret < 0)
goto out_rtnl;
- ret = ops->get_coalesce(dev, &coalesce);
+ ret = ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
+ info->extack);
if (ret < 0)
goto out_ops;
@@ -303,11 +317,16 @@ int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info)
tb[ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH], &mod);
ethnl_update_u32(&coalesce.rate_sample_interval,
tb[ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL], &mod);
+ ethnl_update_u8(&kernel_coalesce.use_cqe_mode_tx,
+ tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_TX], &mod);
+ ethnl_update_u8(&kernel_coalesce.use_cqe_mode_rx,
+ tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_RX], &mod);
ret = 0;
if (!mod)
goto out_ops;
- ret = dev->ethtool_ops->set_coalesce(dev, &coalesce);
+ ret = dev->ethtool_ops->set_coalesce(dev, &coalesce, &kernel_coalesce,
+ info->extack);
if (ret < 0)
goto out_ops;
ethtool_notify(dev, ETHTOOL_MSG_COALESCE_NTF, NULL);
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index baa5d10043cb..f2abc3152888 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -7,6 +7,7 @@
* the information ethtool needs.
*/
+#include <linux/compat.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/capability.h>
@@ -23,6 +24,7 @@
#include <linux/rtnetlink.h>
#include <linux/sched/signal.h>
#include <linux/net.h>
+#include <linux/pm_runtime.h>
#include <net/devlink.h>
#include <net/xdp_sock_drv.h>
#include <net/flow_offload.h>
@@ -807,6 +809,120 @@ out:
return ret;
}
+static noinline_for_stack int
+ethtool_rxnfc_copy_from_compat(struct ethtool_rxnfc *rxnfc,
+ const struct compat_ethtool_rxnfc __user *useraddr,
+ size_t size)
+{
+ struct compat_ethtool_rxnfc crxnfc = {};
+
+ /* We expect there to be holes between fs.m_ext and
+ * fs.ring_cookie and at the end of fs, but nowhere else.
+ * On non-x86, no conversion should be needed.
+ */
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_X86_64) &&
+ sizeof(struct compat_ethtool_rxnfc) !=
+ sizeof(struct ethtool_rxnfc));
+ BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
+ sizeof(useraddr->fs.m_ext) !=
+ offsetof(struct ethtool_rxnfc, fs.m_ext) +
+ sizeof(rxnfc->fs.m_ext));
+ BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.location) -
+ offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
+ offsetof(struct ethtool_rxnfc, fs.location) -
+ offsetof(struct ethtool_rxnfc, fs.ring_cookie));
+
+ if (copy_from_user(&crxnfc, useraddr, min(size, sizeof(crxnfc))))
+ return -EFAULT;
+
+ *rxnfc = (struct ethtool_rxnfc) {
+ .cmd = crxnfc.cmd,
+ .flow_type = crxnfc.flow_type,
+ .data = crxnfc.data,
+ .fs = {
+ .flow_type = crxnfc.fs.flow_type,
+ .h_u = crxnfc.fs.h_u,
+ .h_ext = crxnfc.fs.h_ext,
+ .m_u = crxnfc.fs.m_u,
+ .m_ext = crxnfc.fs.m_ext,
+ .ring_cookie = crxnfc.fs.ring_cookie,
+ .location = crxnfc.fs.location,
+ },
+ .rule_cnt = crxnfc.rule_cnt,
+ };
+
+ return 0;
+}
+
+static int ethtool_rxnfc_copy_from_user(struct ethtool_rxnfc *rxnfc,
+ const void __user *useraddr,
+ size_t size)
+{
+ if (compat_need_64bit_alignment_fixup())
+ return ethtool_rxnfc_copy_from_compat(rxnfc, useraddr, size);
+
+ if (copy_from_user(rxnfc, useraddr, size))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int ethtool_rxnfc_copy_to_compat(void __user *useraddr,
+ const struct ethtool_rxnfc *rxnfc,
+ size_t size, const u32 *rule_buf)
+{
+ struct compat_ethtool_rxnfc crxnfc;
+
+ memset(&crxnfc, 0, sizeof(crxnfc));
+ crxnfc = (struct compat_ethtool_rxnfc) {
+ .cmd = rxnfc->cmd,
+ .flow_type = rxnfc->flow_type,
+ .data = rxnfc->data,
+ .fs = {
+ .flow_type = rxnfc->fs.flow_type,
+ .h_u = rxnfc->fs.h_u,
+ .h_ext = rxnfc->fs.h_ext,
+ .m_u = rxnfc->fs.m_u,
+ .m_ext = rxnfc->fs.m_ext,
+ .ring_cookie = rxnfc->fs.ring_cookie,
+ .location = rxnfc->fs.location,
+ },
+ .rule_cnt = rxnfc->rule_cnt,
+ };
+
+ if (copy_to_user(useraddr, &crxnfc, min(size, sizeof(crxnfc))))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int ethtool_rxnfc_copy_to_user(void __user *useraddr,
+ const struct ethtool_rxnfc *rxnfc,
+ size_t size, const u32 *rule_buf)
+{
+ int ret;
+
+ if (compat_need_64bit_alignment_fixup()) {
+ ret = ethtool_rxnfc_copy_to_compat(useraddr, rxnfc, size,
+ rule_buf);
+ useraddr += offsetof(struct compat_ethtool_rxnfc, rule_locs);
+ } else {
+ ret = copy_to_user(useraddr, rxnfc, size);
+ useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
+ }
+
+ if (ret)
+ return -EFAULT;
+
+ if (rule_buf) {
+ if (copy_to_user(useraddr, rule_buf,
+ rxnfc->rule_cnt * sizeof(u32)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
u32 cmd, void __user *useraddr)
{
@@ -825,7 +941,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
info_size = (offsetof(struct ethtool_rxnfc, data) +
sizeof(info.data));
- if (copy_from_user(&info, useraddr, info_size))
+ if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
return -EFAULT;
rc = dev->ethtool_ops->set_rxnfc(dev, &info);
@@ -833,7 +949,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
return rc;
if (cmd == ETHTOOL_SRXCLSRLINS &&
- copy_to_user(useraddr, &info, info_size))
+ ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, NULL))
return -EFAULT;
return 0;
@@ -859,7 +975,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
info_size = (offsetof(struct ethtool_rxnfc, data) +
sizeof(info.data));
- if (copy_from_user(&info, useraddr, info_size))
+ if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
return -EFAULT;
/* If FLOW_RSS was requested then user-space must be using the
@@ -867,7 +983,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
*/
if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
info_size = sizeof(info);
- if (copy_from_user(&info, useraddr, info_size))
+ if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
return -EFAULT;
/* Since malicious users may modify the original data,
* we need to check whether FLOW_RSS is still requested.
@@ -893,18 +1009,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
if (ret < 0)
goto err_out;
- ret = -EFAULT;
- if (copy_to_user(useraddr, &info, info_size))
- goto err_out;
-
- if (rule_buf) {
- useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
- if (copy_to_user(useraddr, rule_buf,
- info.rule_cnt * sizeof(u32)))
- goto err_out;
- }
- ret = 0;
-
+ ret = ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, rule_buf);
err_out:
kfree(rule_buf);
@@ -1514,12 +1619,14 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
void __user *useraddr)
{
struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
+ struct kernel_ethtool_coalesce kernel_coalesce = {};
int ret;
if (!dev->ethtool_ops->get_coalesce)
return -EOPNOTSUPP;
- ret = dev->ethtool_ops->get_coalesce(dev, &coalesce);
+ ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
+ NULL);
if (ret)
return ret;
@@ -1586,19 +1693,26 @@ ethtool_set_coalesce_supported(struct net_device *dev,
static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
void __user *useraddr)
{
+ struct kernel_ethtool_coalesce kernel_coalesce = {};
struct ethtool_coalesce coalesce;
int ret;
- if (!dev->ethtool_ops->set_coalesce)
+ if (!dev->ethtool_ops->set_coalesce && !dev->ethtool_ops->get_coalesce)
return -EOPNOTSUPP;
+ ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
+ NULL);
+ if (ret)
+ return ret;
+
if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
return -EFAULT;
if (!ethtool_set_coalesce_supported(dev, &coalesce))
return -EOPNOTSUPP;
- ret = dev->ethtool_ops->set_coalesce(dev, &coalesce);
+ ret = dev->ethtool_ops->set_coalesce(dev, &coalesce, &kernel_coalesce,
+ NULL);
if (!ret)
ethtool_notify(dev, ETHTOOL_MSG_COALESCE_NTF, NULL);
return ret;
@@ -2581,15 +2695,14 @@ static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr)
/* The main entry point in this file. Called from net/core/dev_ioctl.c */
-int dev_ethtool(struct net *net, struct ifreq *ifr)
+int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr)
{
struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
- void __user *useraddr = ifr->ifr_data;
u32 ethcmd, sub_cmd;
int rc;
netdev_features_t old_features;
- if (!dev || !netif_device_present(dev))
+ if (!dev)
return -ENODEV;
if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
@@ -2645,10 +2758,18 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
return -EPERM;
}
+ if (dev->dev.parent)
+ pm_runtime_get_sync(dev->dev.parent);
+
+ if (!netif_device_present(dev)) {
+ rc = -ENODEV;
+ goto out;
+ }
+
if (dev->ethtool_ops->begin) {
rc = dev->ethtool_ops->begin(dev);
- if (rc < 0)
- return rc;
+ if (rc < 0)
+ goto out;
}
old_features = dev->features;
@@ -2867,6 +2988,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
if (old_features != dev->features)
netdev_features_change(dev);
+out:
+ if (dev->dev.parent)
+ pm_runtime_put(dev->dev.parent);
return rc;
}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 73e0f5b626bf..1797a0a90019 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -2,6 +2,7 @@
#include <net/sock.h>
#include <linux/ethtool_netlink.h>
+#include <linux/pm_runtime.h>
#include "netlink.h"
static struct genl_family ethtool_genl_family;
@@ -29,6 +30,44 @@ const struct nla_policy ethnl_header_policy_stats[] = {
ETHTOOL_FLAGS_STATS),
};
+int ethnl_ops_begin(struct net_device *dev)
+{
+ int ret;
+
+ if (!dev)
+ return -ENODEV;
+
+ if (dev->dev.parent)
+ pm_runtime_get_sync(dev->dev.parent);
+
+ if (!netif_device_present(dev)) {
+ ret = -ENODEV;
+ goto err;
+ }
+
+ if (dev->ethtool_ops->begin) {
+ ret = dev->ethtool_ops->begin(dev);
+ if (ret)
+ goto err;
+ }
+
+ return 0;
+err:
+ if (dev->dev.parent)
+ pm_runtime_put(dev->dev.parent);
+
+ return ret;
+}
+
+void ethnl_ops_complete(struct net_device *dev)
+{
+ if (dev->ethtool_ops->complete)
+ dev->ethtool_ops->complete(dev);
+
+ if (dev->dev.parent)
+ pm_runtime_put(dev->dev.parent);
+}
+
/**
* ethnl_parse_header_dev_get() - parse request header
* @req_info: structure to put results into
@@ -101,12 +140,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
return -EINVAL;
}
- if (dev && !netif_device_present(dev)) {
- dev_put(dev);
- NL_SET_ERR_MSG(extack, "device not present");
- return -ENODEV;
- }
-
req_info->dev = dev;
req_info->flags = flags;
return 0;
@@ -365,8 +398,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
ops->cleanup_data(reply_data);
genlmsg_end(rskb, reply_payload);
- if (req_info->dev)
- dev_put(req_info->dev);
+ dev_put(req_info->dev);
kfree(reply_data);
kfree(req_info);
return genlmsg_reply(rskb, info);
@@ -378,8 +410,7 @@ err_cleanup:
if (ops->cleanup_data)
ops->cleanup_data(reply_data);
err_dev:
- if (req_info->dev)
- dev_put(req_info->dev);
+ dev_put(req_info->dev);
kfree(reply_data);
kfree(req_info);
return ret;
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 3fc395c86702..e8987e28036f 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -247,19 +247,8 @@ struct ethnl_reply_data {
struct net_device *dev;
};
-static inline int ethnl_ops_begin(struct net_device *dev)
-{
- if (dev && dev->ethtool_ops->begin)
- return dev->ethtool_ops->begin(dev);
- else
- return 0;
-}
-
-static inline void ethnl_ops_complete(struct net_device *dev)
-{
- if (dev && dev->ethtool_ops->complete)
- dev->ethtool_ops->complete(dev);
-}
+int ethnl_ops_begin(struct net_device *dev);
+void ethnl_ops_complete(struct net_device *dev);
/**
* struct ethnl_request_ops - unified handling of GET requests
@@ -370,7 +359,7 @@ extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX + 1];
extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1];
extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1];
extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1];
-extern const struct nla_policy ethnl_coalesce_set_policy[ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL + 1];
+extern const struct nla_policy ethnl_coalesce_set_policy[ETHTOOL_A_COALESCE_MAX + 1];
extern const struct nla_policy ethnl_pause_get_policy[ETHTOOL_A_PAUSE_HEADER + 1];
extern const struct nla_policy ethnl_pause_set_policy[ETHTOOL_A_PAUSE_TX + 1];
extern const struct nla_policy ethnl_eee_get_policy[ETHTOOL_A_EEE_HEADER + 1];
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index 88215b5c93aa..dd5a45f8a78a 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -340,8 +340,7 @@ nla_put_failure:
out_dev:
wpan_phy_put(phy);
out:
- if (dev)
- dev_put(dev);
+ dev_put(dev);
return rc;
}
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 0cf2374c143b..277124f206e0 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -2226,8 +2226,7 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
if (ops->internal_flags & NL802154_FLAG_NEED_WPAN_DEV) {
struct wpan_dev *wpan_dev = info->user_ptr[1];
- if (wpan_dev->netdev)
- dev_put(wpan_dev->netdev);
+ dev_put(wpan_dev->netdev);
} else {
dev_put(info->user_ptr[1]);
}
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index a45a0401adc5..7bb9ef35c570 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -41,8 +41,7 @@ ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr)
ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr);
rcu_read_lock();
dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr);
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
break;
case IEEE802154_ADDR_SHORT:
@@ -129,7 +128,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg,
int ret = -ENOIOCTLCMD;
struct net_device *dev;
- if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+ if (get_user_ifreq(&ifr, NULL, arg))
return -EFAULT;
ifr.ifr_name[IFNAMSIZ-1] = 0;
@@ -143,7 +142,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg,
if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl)
ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd);
- if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+ if (!ret && put_user_ifreq(&ifr, arg))
ret = -EFAULT;
dev_put(dev);
@@ -984,6 +983,11 @@ static const struct proto_ops ieee802154_dgram_ops = {
.sendpage = sock_no_sendpage,
};
+static void ieee802154_sock_destruct(struct sock *sk)
+{
+ skb_queue_purge(&sk->sk_receive_queue);
+}
+
/* Create a socket. Initialise the socket, blank the addresses
* set the state.
*/
@@ -1024,7 +1028,7 @@ static int ieee802154_create(struct net *net, struct socket *sock,
sock->ops = ops;
sock_init_data(sock, sk);
- /* FIXME: sk->sk_destruct */
+ sk->sk_destruct = ieee802154_sock_destruct;
sk->sk_family = PF_IEEE802154;
/* Checksums on by default */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 54648181dd56..1d816a5fd3eb 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -452,7 +452,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* changes context in a wrong way it will be caught.
*/
err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
- BPF_CGROUP_INET4_BIND, &flags);
+ CGROUP_INET4_BIND, &flags);
if (err)
return err;
@@ -781,7 +781,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin_port = inet->inet_dport;
sin->sin_addr.s_addr = inet->inet_daddr;
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- BPF_CGROUP_INET4_GETPEERNAME,
+ CGROUP_INET4_GETPEERNAME,
NULL);
} else {
__be32 addr = inet->inet_rcv_saddr;
@@ -790,7 +790,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin_port = inet->inet_sport;
sin->sin_addr.s_addr = addr;
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- BPF_CGROUP_INET4_GETSOCKNAME,
+ CGROUP_INET4_GETSOCKNAME,
NULL);
}
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
@@ -953,10 +953,10 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCGIFNETMASK:
case SIOCGIFDSTADDR:
case SIOCGIFPFLAGS:
- if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
+ if (get_user_ifreq(&ifr, NULL, p))
return -EFAULT;
err = devinet_ioctl(net, cmd, &ifr);
- if (!err && copy_to_user(p, &ifr, sizeof(struct ifreq)))
+ if (!err && put_user_ifreq(&ifr, p))
err = -EFAULT;
break;
@@ -966,7 +966,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCSIFDSTADDR:
case SIOCSIFPFLAGS:
case SIOCSIFFLAGS:
- if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
+ if (get_user_ifreq(&ifr, NULL, p))
return -EFAULT;
err = devinet_ioctl(net, cmd, &ifr);
break;
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 9e41eff4a685..0dcee9df1326 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -10,6 +10,9 @@
#include <net/tcp.h>
#include <net/bpf_sk_storage.h>
+/* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */
+extern struct bpf_struct_ops bpf_tcp_congestion_ops;
+
static u32 optional_ops[] = {
offsetof(struct tcp_congestion_ops, init),
offsetof(struct tcp_congestion_ops, release),
@@ -163,6 +166,19 @@ static const struct bpf_func_proto bpf_tcp_send_ack_proto = {
.arg2_type = ARG_ANYTHING,
};
+static u32 prog_ops_moff(const struct bpf_prog *prog)
+{
+ const struct btf_member *m;
+ const struct btf_type *t;
+ u32 midx;
+
+ midx = prog->expected_attach_type;
+ t = bpf_tcp_congestion_ops.type;
+ m = &btf_type_member(t)[midx];
+
+ return btf_member_bit_offset(t, m) / 8;
+}
+
static const struct bpf_func_proto *
bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
const struct bpf_prog *prog)
@@ -174,6 +190,28 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
+ case BPF_FUNC_setsockopt:
+ /* Does not allow release() to call setsockopt.
+ * release() is called when the current bpf-tcp-cc
+ * is retiring. It is not allowed to call
+ * setsockopt() to make further changes which
+ * may potentially allocate new resources.
+ */
+ if (prog_ops_moff(prog) !=
+ offsetof(struct tcp_congestion_ops, release))
+ return &bpf_sk_setsockopt_proto;
+ return NULL;
+ case BPF_FUNC_getsockopt:
+ /* Since get/setsockopt is usually expected to
+ * be available together, disable getsockopt for
+ * release also to avoid usage surprise.
+ * The bpf-tcp-cc already has a more powerful way
+ * to read tcp_sock from the PTR_TO_BTF_ID.
+ */
+ if (prog_ops_moff(prog) !=
+ offsetof(struct tcp_congestion_ops, release))
+ return &bpf_sk_getsockopt_proto;
+ return NULL;
default:
return bpf_base_func_proto(func_id);
}
@@ -286,9 +324,6 @@ static void bpf_tcp_ca_unreg(void *kdata)
tcp_unregister_congestion_control(kdata);
}
-/* Avoid sparse warning. It is only used in bpf_struct_ops.c. */
-extern struct bpf_struct_ops bpf_tcp_congestion_ops;
-
struct bpf_struct_ops bpf_tcp_congestion_ops = {
.verifier_ops = &bpf_tcp_ca_verifier_ops,
.reg = bpf_tcp_ca_reg,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 099259fc826a..7fbd0b532f52 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -465,14 +465,16 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
if (!doi_def)
return;
- switch (doi_def->type) {
- case CIPSO_V4_MAP_TRANS:
- kfree(doi_def->map.std->lvl.cipso);
- kfree(doi_def->map.std->lvl.local);
- kfree(doi_def->map.std->cat.cipso);
- kfree(doi_def->map.std->cat.local);
- kfree(doi_def->map.std);
- break;
+ if (doi_def->map.std) {
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_TRANS:
+ kfree(doi_def->map.std->lvl.cipso);
+ kfree(doi_def->map.std->lvl.local);
+ kfree(doi_def->map.std->cat.cipso);
+ kfree(doi_def->map.std->cat.local);
+ kfree(doi_def->map.std);
+ break;
+ }
}
kfree(doi_def);
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 73721a4448bd..f4468980b675 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -215,7 +215,7 @@ static void devinet_sysctl_unregister(struct in_device *idev)
static struct in_ifaddr *inet_alloc_ifa(void)
{
- return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
+ return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
}
static void inet_rcu_free_ifa(struct rcu_head *head)
@@ -1243,7 +1243,7 @@ out:
return ret;
}
-static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
+int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
{
struct in_device *in_dev = __in_dev_get_rtnl(dev);
const struct in_ifaddr *ifa;
@@ -1950,7 +1950,8 @@ static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
};
static int inet_validate_link_af(const struct net_device *dev,
- const struct nlattr *nla)
+ const struct nlattr *nla,
+ struct netlink_ext_ack *extack)
{
struct nlattr *a, *tb[IFLA_INET_MAX+1];
int err, rem;
@@ -1959,7 +1960,7 @@ static int inet_validate_link_af(const struct net_device *dev,
return -EAFNOSUPPORT;
err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
- inet_af_policy, NULL);
+ inet_af_policy, extack);
if (err < 0)
return err;
@@ -2424,11 +2425,15 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
int *valp = ctl->data;
int val = *valp;
loff_t pos = *ppos;
- int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+ struct net *net = ctl->extra2;
+ int ret;
- if (write && *valp != val) {
- struct net *net = ctl->extra2;
+ if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+ if (write && *valp != val) {
if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
if (!rtnl_trylock()) {
/* Restore the original values before restarting */
@@ -2762,8 +2767,6 @@ void __init devinet_init(void)
INIT_HLIST_HEAD(&inet_addr_lst[i]);
register_pernet_subsys(&devinet_ops);
-
- register_gifconf(PF_INET, inet_gifconf);
register_netdevice_notifier(&ip_netdev_notifier);
queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index a09e36c4a413..851f542928a3 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -97,7 +97,6 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
{
- struct esp_output_extra *extra = esp_tmp_extra(tmp);
struct crypto_aead *aead = x->data;
int extralen = 0;
u8 *iv;
@@ -105,9 +104,8 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
struct scatterlist *sg;
if (x->props.flags & XFRM_STATE_ESN)
- extralen += sizeof(*extra);
+ extralen += sizeof(struct esp_output_extra);
- extra = esp_tmp_extra(tmp);
iv = esp_tmp_iv(aead, tmp, extralen);
req = esp_tmp_req(aead, iv);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 4c0c33e4710d..b42c429cebbe 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -208,9 +208,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
void fib_nh_common_release(struct fib_nh_common *nhc)
{
- if (nhc->nhc_dev)
- dev_put(nhc->nhc_dev);
-
+ dev_put(nhc->nhc_dev);
lwtstate_put(nhc->nhc_lwtstate);
rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
rt_fibinfo_free(&nhc->nhc_rth_input);
@@ -260,7 +258,7 @@ EXPORT_SYMBOL_GPL(free_fib_info);
void fib_release_info(struct fib_info *fi)
{
spin_lock_bh(&fib_info_lock);
- if (fi && --fi->fib_treeref == 0) {
+ if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
hlist_del(&fi->fib_hash);
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
@@ -1373,7 +1371,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
if (!cfg->fc_mx) {
fi = fib_find_info_nh(net, cfg);
if (fi) {
- fi->fib_treeref++;
+ refcount_inc(&fi->fib_treeref);
return fi;
}
}
@@ -1547,11 +1545,11 @@ link_it:
if (ofi) {
fi->fib_dead = 1;
free_fib_info(fi);
- ofi->fib_treeref++;
+ refcount_inc(&ofi->fib_treeref);
return ofi;
}
- fi->fib_treeref++;
+ refcount_set(&fi->fib_treeref, 1);
refcount_set(&fi->fib_clntref, 1);
spin_lock_bh(&fib_info_lock);
hlist_add_head(&fi->fib_hash,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 25cf387cca5b..8060524f4256 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2380,11 +2380,11 @@ void __init fib_trie_init(void)
{
fn_alias_kmem = kmem_cache_create("ip_fib_alias",
sizeof(struct fib_alias),
- 0, SLAB_PANIC, NULL);
+ 0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
LEAF_SIZE,
- 0, SLAB_PANIC, NULL);
+ 0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
}
struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index e5f69b0bf3df..8fcbc6258ec5 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -230,8 +230,8 @@ static struct sk_buff *fou_gro_receive(struct sock *sk,
struct list_head *head,
struct sk_buff *skb)
{
+ const struct net_offload __rcu **offloads;
u8 proto = fou_from_sock(sk)->protocol;
- const struct net_offload **offloads;
const struct net_offload *ops;
struct sk_buff *pp = NULL;
@@ -263,10 +263,10 @@ out_unlock:
static int fou_gro_complete(struct sock *sk, struct sk_buff *skb,
int nhoff)
{
- const struct net_offload *ops;
+ const struct net_offload __rcu **offloads;
u8 proto = fou_from_sock(sk)->protocol;
+ const struct net_offload *ops;
int err = -ENOSYS;
- const struct net_offload **offloads;
rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
@@ -311,7 +311,7 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
struct list_head *head,
struct sk_buff *skb)
{
- const struct net_offload **offloads;
+ const struct net_offload __rcu **offloads;
const struct net_offload *ops;
struct sk_buff *pp = NULL;
struct sk_buff *p;
@@ -457,8 +457,8 @@ out:
static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
{
- const struct net_offload **offloads;
struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
+ const struct net_offload __rcu **offloads;
const struct net_offload *ops;
unsigned int guehlen = 0;
u8 proto;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c695d294a5df..8b30cadff708 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1095,8 +1095,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
sizeof(struct in6_addr))
goto send_mal_query;
dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev);
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
break;
#endif
default:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6b3c558a4f23..d2e2b3d18c66 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -803,10 +803,17 @@ static void igmp_gq_timer_expire(struct timer_list *t)
static void igmp_ifc_timer_expire(struct timer_list *t)
{
struct in_device *in_dev = from_timer(in_dev, t, mr_ifc_timer);
+ u32 mr_ifc_count;
igmpv3_send_cr(in_dev);
- if (in_dev->mr_ifc_count) {
- in_dev->mr_ifc_count--;
+restart:
+ mr_ifc_count = READ_ONCE(in_dev->mr_ifc_count);
+
+ if (mr_ifc_count) {
+ if (cmpxchg(&in_dev->mr_ifc_count,
+ mr_ifc_count,
+ mr_ifc_count - 1) != mr_ifc_count)
+ goto restart;
igmp_ifc_start_timer(in_dev,
unsolicited_report_interval(in_dev));
}
@@ -818,7 +825,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
struct net *net = dev_net(in_dev->dev);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
return;
- in_dev->mr_ifc_count = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);
igmp_ifc_start_timer(in_dev, 1);
}
@@ -957,7 +964,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
in_dev->mr_qri;
}
/* cancel the interface change timer */
- in_dev->mr_ifc_count = 0;
+ WRITE_ONCE(in_dev->mr_ifc_count, 0);
if (del_timer(&in_dev->mr_ifc_timer))
__in_dev_put(in_dev);
/* clear deleted report items */
@@ -1724,7 +1731,7 @@ void ip_mc_down(struct in_device *in_dev)
igmp_group_dropped(pmc);
#ifdef CONFIG_IP_MULTICAST
- in_dev->mr_ifc_count = 0;
+ WRITE_ONCE(in_dev->mr_ifc_count, 0);
if (del_timer(&in_dev->mr_ifc_timer))
__in_dev_put(in_dev);
in_dev->mr_gq_running = 0;
@@ -1941,7 +1948,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
pmc->sfmode = MCAST_INCLUDE;
#ifdef CONFIG_IP_MULTICAST
pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
- in_dev->mr_ifc_count = pmc->crcount;
+ WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
igmp_ifc_event(pmc->interface);
@@ -2120,7 +2127,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
/* else no filters; keep old mode for reports */
pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
- in_dev->mr_ifc_count = pmc->crcount;
+ WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
igmp_ifc_event(in_dev);
@@ -2233,7 +2240,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
iml->sfmode, psf->sl_count, psf->sl_addr, 0);
RCU_INIT_POINTER(iml->sflist, NULL);
/* decrease mem now to avoid the memleak warning */
- atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psf, sl_addr, psf->sl_max), &sk->sk_omem_alloc);
kfree_rcu(psf, rcu);
return err;
}
@@ -2382,7 +2389,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
if (psl)
count += psl->sl_max;
- newpsl = sock_kmalloc(sk, IP_SFLSIZE(count), GFP_KERNEL);
+ newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count),
+ GFP_KERNEL);
if (!newpsl) {
err = -ENOBUFS;
goto done;
@@ -2393,7 +2401,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
for (i = 0; i < psl->sl_count; i++)
newpsl->sl_addr[i] = psl->sl_addr[i];
/* decrease mem now to avoid the memleak warning */
- atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
}
rcu_assign_pointer(pmc->sflist, newpsl);
@@ -2468,19 +2477,22 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
goto done;
}
if (msf->imsf_numsrc) {
- newpsl = sock_kmalloc(sk, IP_SFLSIZE(msf->imsf_numsrc),
- GFP_KERNEL);
+ newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr,
+ msf->imsf_numsrc),
+ GFP_KERNEL);
if (!newpsl) {
err = -ENOBUFS;
goto done;
}
newpsl->sl_max = newpsl->sl_count = msf->imsf_numsrc;
- memcpy(newpsl->sl_addr, msf->imsf_slist,
- msf->imsf_numsrc * sizeof(msf->imsf_slist[0]));
+ memcpy(newpsl->sl_addr, msf->imsf_slist_flex,
+ flex_array_size(msf, imsf_slist_flex, msf->imsf_numsrc));
err = ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
msf->imsf_fmode, newpsl->sl_count, newpsl->sl_addr, 0);
if (err) {
- sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max));
+ sock_kfree_s(sk, newpsl,
+ struct_size(newpsl, sl_addr,
+ newpsl->sl_max));
goto done;
}
} else {
@@ -2493,7 +2505,8 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
psl->sl_count, psl->sl_addr, 0);
/* decrease mem now to avoid the memleak warning */
- atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
} else
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
@@ -2551,14 +2564,14 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
count = psl->sl_count;
}
copycount = count < msf->imsf_numsrc ? count : msf->imsf_numsrc;
- len = copycount * sizeof(psl->sl_addr[0]);
+ len = flex_array_size(psl, sl_addr, copycount);
msf->imsf_numsrc = count;
if (put_user(IP_MSFILTER_SIZE(copycount), optlen) ||
copy_to_user(optval, msf, IP_MSFILTER_SIZE(0))) {
return -EFAULT;
}
if (len &&
- copy_to_user(&optval->imsf_slist[0], psl->sl_addr, len))
+ copy_to_user(&optval->imsf_slist_flex[0], psl->sl_addr, len))
return -EFAULT;
return 0;
done:
@@ -2713,6 +2726,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u
rv = 1;
} else if (im) {
if (src_addr) {
+ spin_lock_bh(&im->lock);
for (psf = im->sources; psf; psf = psf->sf_next) {
if (psf->sf_inaddr == src_addr)
break;
@@ -2723,6 +2737,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u
im->sfcount[MCAST_EXCLUDE];
else
rv = im->sfcount[MCAST_EXCLUDE] != 0;
+ spin_unlock_bh(&im->lock);
} else
rv = 1; /* unspecified source; tentatively allow */
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 754013fa393b..f25d02ad4a8a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -534,7 +534,8 @@ out:
atomic_read(&newsk->sk_rmem_alloc));
mem_cgroup_sk_alloc(newsk);
if (newsk->sk_memcg && amt)
- mem_cgroup_charge_skmem(newsk->sk_memcg, amt);
+ mem_cgroup_charge_skmem(newsk->sk_memcg, amt,
+ GFP_KERNEL | __GFP_NOFAIL);
release_sock(newsk);
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 12dca0c85f3c..177d26d8fb9c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -473,6 +473,8 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
static int gre_handle_offloads(struct sk_buff *skb, bool csum)
{
+ if (csum && skb_checksum_start(skb) < skb->data)
+ return -EINVAL;
return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
@@ -923,7 +925,7 @@ static const struct net_device_ops ipgre_netdev_ops = {
.ndo_stop = ipgre_close,
#endif
.ndo_start_xmit = ipgre_xmit,
- .ndo_do_ioctl = ip_tunnel_ioctl,
+ .ndo_siocdevprivate = ip_tunnel_siocdevprivate,
.ndo_change_mtu = ip_tunnel_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8d8a8da3ae7e..9bca57ef8b83 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -198,19 +198,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
} else if (rt->rt_type == RTN_BROADCAST)
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
- /* Be paranoid, rather than too clever. */
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
- struct sk_buff *skb2;
-
- skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
- if (!skb2) {
- kfree_skb(skb);
+ skb = skb_expand_head(skb, hh_len);
+ if (!skb)
return -ENOMEM;
- }
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
- consume_skb(skb);
- skb = skb2;
}
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
@@ -446,8 +437,9 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
{
BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) !=
offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr));
- memcpy(&iph->saddr, &fl4->saddr,
- sizeof(fl4->saddr) + sizeof(fl4->daddr));
+
+ iph->saddr = fl4->saddr;
+ iph->daddr = fl4->daddr;
}
/* Note: skb->sk can be different from sk, in case of tunnels */
@@ -614,18 +606,6 @@ void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph,
}
EXPORT_SYMBOL(ip_fraglist_init);
-static void ip_fraglist_ipcb_prepare(struct sk_buff *skb,
- struct ip_fraglist_iter *iter)
-{
- struct sk_buff *to = iter->frag;
-
- /* Copy the flags to each fragment. */
- IPCB(to)->flags = IPCB(skb)->flags;
-
- if (iter->offset == 0)
- ip_options_fragment(to);
-}
-
void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter)
{
unsigned int hlen = iter->hlen;
@@ -671,7 +651,7 @@ void ip_frag_init(struct sk_buff *skb, unsigned int hlen,
EXPORT_SYMBOL(ip_frag_init);
static void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to,
- bool first_frag, struct ip_frag_state *state)
+ bool first_frag)
{
/* Copy the flags to each fragment. */
IPCB(to)->flags = IPCB(from)->flags;
@@ -846,11 +826,14 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
/* Everything is OK. Generate! */
ip_fraglist_init(skb, iph, hlen, &iter);
+ if (iter.frag)
+ ip_options_fragment(iter.frag);
+
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
if (iter.frag) {
- ip_fraglist_ipcb_prepare(skb, &iter);
+ IPCB(iter.frag)->flags = IPCB(skb)->flags;
ip_fraglist_prepare(skb, &iter);
}
@@ -905,7 +888,7 @@ slow_path:
err = PTR_ERR(skb2);
goto fail;
}
- ip_frag_ipcb(skb, skb2, first_frag, &state);
+ ip_frag_ipcb(skb, skb2, first_frag);
/*
* Put this fragment into the sending queue.
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ec6036713e2c..b297bb28556e 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -663,12 +663,11 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex,
struct sockaddr_storage *group,
struct sockaddr_storage *list)
{
- int msize = IP_MSFILTER_SIZE(numsrc);
struct ip_msfilter *msf;
struct sockaddr_in *psin;
int err, i;
- msf = kmalloc(msize, GFP_KERNEL);
+ msf = kmalloc(IP_MSFILTER_SIZE(numsrc), GFP_KERNEL);
if (!msf)
return -ENOBUFS;
@@ -684,7 +683,7 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex,
if (psin->sin_family != AF_INET)
goto Eaddrnotavail;
- msf->imsf_slist[i] = psin->sin_addr.s_addr;
+ msf->imsf_slist_flex[i] = psin->sin_addr.s_addr;
}
err = ip_mc_msfilter(sk, msf, ifindex);
kfree(msf);
@@ -791,7 +790,8 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
goto out_free_gsf;
err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc,
- gsf->gf_fmode, &gsf->gf_group, gsf->gf_slist);
+ gsf->gf_fmode, &gsf->gf_group,
+ gsf->gf_slist_flex);
out_free_gsf:
kfree(gsf);
return err;
@@ -800,7 +800,7 @@ out_free_gsf:
static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
int optlen)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter *gf32;
unsigned int n;
void *p;
@@ -814,7 +814,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
p = kmalloc(optlen + 4, GFP_KERNEL);
if (!p)
return -ENOMEM;
- gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+ gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
err = -EFAULT;
if (copy_from_sockptr(gf32, optval, optlen))
@@ -827,7 +827,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
goto out_free_gsf;
err = -EINVAL;
- if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+ if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
goto out_free_gsf;
/* numsrc >= (4G-140)/128 overflow in 32 bits */
@@ -835,7 +835,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
goto out_free_gsf;
err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
- &gf32->gf_group, gf32->gf_slist);
+ &gf32->gf_group, gf32->gf_slist_flex);
out_free_gsf:
kfree(p);
return err;
@@ -1456,7 +1456,7 @@ static bool getsockopt_needs_rtnl(int optname)
static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
int __user *optlen, int len)
{
- const int size0 = offsetof(struct group_filter, gf_slist);
+ const int size0 = offsetof(struct group_filter, gf_slist_flex);
struct group_filter __user *p = optval;
struct group_filter gsf;
int num;
@@ -1468,7 +1468,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
return -EFAULT;
num = gsf.gf_numsrc;
- err = ip_mc_gsfget(sk, &gsf, p->gf_slist);
+ err = ip_mc_gsfget(sk, &gsf, p->gf_slist_flex);
if (err)
return err;
if (gsf.gf_numsrc < num)
@@ -1482,7 +1482,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
int __user *optlen, int len)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter __user *p = optval;
struct compat_group_filter gf32;
struct group_filter gf;
@@ -1499,7 +1499,7 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
num = gf.gf_numsrc = gf32.gf_numsrc;
gf.gf_group = gf32.gf_group;
- err = ip_mc_gsfget(sk, &gf, p->gf_slist);
+ err = ip_mc_gsfget(sk, &gf, p->gf_slist_flex);
if (err)
return err;
if (gf.gf_numsrc < num)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index be75b409445c..fe9101d3d69e 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -958,19 +958,20 @@ done:
}
EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
-int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
struct ip_tunnel_parm p;
int err;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
return -EFAULT;
err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
- if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (!err && copy_to_user(data, &p, sizeof(p)))
return -EFAULT;
return err;
}
-EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
+EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
{
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index eb560eecee08..efe25a0172e6 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -405,7 +405,7 @@ static const struct net_device_ops vti_netdev_ops = {
.ndo_init = vti_tunnel_init,
.ndo_uninit = ip_tunnel_uninit,
.ndo_start_xmit = vti_tunnel_xmit,
- .ndo_do_ioctl = ip_tunnel_ioctl,
+ .ndo_siocdevprivate = ip_tunnel_siocdevprivate,
.ndo_change_mtu = ip_tunnel_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 266c65577ba6..3aa78ccbec3e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -347,7 +347,7 @@ static const struct net_device_ops ipip_netdev_ops = {
.ndo_init = ipip_tunnel_init,
.ndo_uninit = ip_tunnel_uninit,
.ndo_start_xmit = ipip_tunnel_xmit,
- .ndo_do_ioctl = ip_tunnel_ioctl,
+ .ndo_siocdevprivate = ip_tunnel_siocdevprivate,
.ndo_change_mtu = ip_tunnel_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 6922612df456..3de78416ec76 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -18,15 +18,12 @@ MODULE_DESCRIPTION("arptables filter table");
#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
(1 << NF_ARP_FORWARD))
-static int __net_init arptable_filter_table_init(struct net *net);
-
static const struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_ARP,
.priority = NF_IP_PRI_FILTER,
- .table_init = arptable_filter_table_init,
};
/* The work comes in here from netfilter.c */
@@ -39,7 +36,7 @@ arptable_filter_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *arpfilter_ops __read_mostly;
-static int __net_init arptable_filter_table_init(struct net *net)
+static int arptable_filter_table_init(struct net *net)
{
struct arpt_replace *repl;
int err;
@@ -69,30 +66,32 @@ static struct pernet_operations arptable_filter_net_ops = {
static int __init arptable_filter_init(void)
{
- int ret;
+ int ret = xt_register_template(&packet_filter,
+ arptable_filter_table_init);
+
+ if (ret < 0)
+ return ret;
arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook);
- if (IS_ERR(arpfilter_ops))
+ if (IS_ERR(arpfilter_ops)) {
+ xt_unregister_template(&packet_filter);
return PTR_ERR(arpfilter_ops);
+ }
ret = register_pernet_subsys(&arptable_filter_net_ops);
if (ret < 0) {
+ xt_unregister_template(&packet_filter);
kfree(arpfilter_ops);
return ret;
}
- ret = arptable_filter_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&arptable_filter_net_ops);
- kfree(arpfilter_ops);
- }
-
return ret;
}
static void __exit arptable_filter_fini(void)
{
unregister_pernet_subsys(&arptable_filter_net_ops);
+ xt_unregister_template(&packet_filter);
kfree(arpfilter_ops);
}
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 8f7ca67475b7..8fd1aba8af31 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -66,11 +66,22 @@ struct clusterip_net {
/* lock protects the configs list */
spinlock_t lock;
+ bool clusterip_deprecated_warning;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *procdir;
/* mutex protects the config->pde*/
struct mutex mutex;
#endif
+ unsigned int hook_users;
+};
+
+static unsigned int clusterip_arp_mangle(void *priv, struct sk_buff *skb, const struct nf_hook_state *state);
+
+static const struct nf_hook_ops cip_arp_ops = {
+ .hook = clusterip_arp_mangle,
+ .pf = NFPROTO_ARP,
+ .hooknum = NF_ARP_OUT,
+ .priority = -1
};
static unsigned int clusterip_net_id __read_mostly;
@@ -458,6 +469,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
static int clusterip_tg_check(const struct xt_tgchk_param *par)
{
struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
+ struct clusterip_net *cn = clusterip_pernet(par->net);
const struct ipt_entry *e = par->entryinfo;
struct clusterip_config *config;
int ret, i;
@@ -467,6 +479,9 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
return -EOPNOTSUPP;
}
+ if (cn->hook_users == UINT_MAX)
+ return -EOVERFLOW;
+
if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
@@ -517,10 +532,23 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
return ret;
}
- if (!par->net->xt.clusterip_deprecated_warning) {
+ if (cn->hook_users == 0) {
+ ret = nf_register_net_hook(par->net, &cip_arp_ops);
+
+ if (ret < 0) {
+ clusterip_config_entry_put(config);
+ clusterip_config_put(config);
+ nf_ct_netns_put(par->net, par->family);
+ return ret;
+ }
+ }
+
+ cn->hook_users++;
+
+ if (!cn->clusterip_deprecated_warning) {
pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
"use xt_cluster instead\n");
- par->net->xt.clusterip_deprecated_warning = true;
+ cn->clusterip_deprecated_warning = true;
}
cipinfo->config = config;
@@ -531,6 +559,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
{
const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
+ struct clusterip_net *cn = clusterip_pernet(par->net);
/* if no more entries are referencing the config, remove it
* from the list and destroy the proc entry */
@@ -539,6 +568,10 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
clusterip_config_put(cipinfo->config);
nf_ct_netns_put(par->net, par->family);
+ cn->hook_users--;
+
+ if (cn->hook_users == 0)
+ nf_unregister_net_hook(par->net, &cip_arp_ops);
}
#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
@@ -602,9 +635,8 @@ static void arp_print(struct arp_payload *payload)
#endif
static unsigned int
-arp_mangle(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+clusterip_arp_mangle(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
struct arphdr *arp = arp_hdr(skb);
struct arp_payload *payload;
@@ -654,13 +686,6 @@ arp_mangle(void *priv,
return NF_ACCEPT;
}
-static const struct nf_hook_ops cip_arp_ops = {
- .hook = arp_mangle,
- .pf = NFPROTO_ARP,
- .hooknum = NF_ARP_OUT,
- .priority = -1
-};
-
/***********************************************************************
* PROC DIR HANDLING
***********************************************************************/
@@ -817,20 +842,14 @@ static const struct proc_ops clusterip_proc_ops = {
static int clusterip_net_init(struct net *net)
{
struct clusterip_net *cn = clusterip_pernet(net);
- int ret;
INIT_LIST_HEAD(&cn->configs);
spin_lock_init(&cn->lock);
- ret = nf_register_net_hook(net, &cip_arp_ops);
- if (ret < 0)
- return ret;
-
#ifdef CONFIG_PROC_FS
cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
if (!cn->procdir) {
- nf_unregister_net_hook(net, &cip_arp_ops);
pr_err("Unable to proc dir entry\n");
return -ENOMEM;
}
@@ -850,7 +869,6 @@ static void clusterip_net_exit(struct net *net)
cn->procdir = NULL;
mutex_unlock(&cn->mutex);
#endif
- nf_unregister_net_hook(net, &cip_arp_ops);
}
static struct pernet_operations clusterip_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 8272df7c6ad5..0eb0e2ab9bfc 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -19,7 +19,6 @@ MODULE_DESCRIPTION("iptables filter table");
#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT))
-static int __net_init iptable_filter_table_init(struct net *net);
static const struct xt_table packet_filter = {
.name = "filter",
@@ -27,7 +26,6 @@ static const struct xt_table packet_filter = {
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
.priority = NF_IP_PRI_FILTER,
- .table_init = iptable_filter_table_init,
};
static unsigned int
@@ -43,7 +41,7 @@ static struct nf_hook_ops *filter_ops __read_mostly;
static bool forward __read_mostly = true;
module_param(forward, bool, 0000);
-static int __net_init iptable_filter_table_init(struct net *net)
+static int iptable_filter_table_init(struct net *net)
{
struct ipt_replace *repl;
int err;
@@ -62,7 +60,7 @@ static int __net_init iptable_filter_table_init(struct net *net)
static int __net_init iptable_filter_net_init(struct net *net)
{
- if (net == &init_net || !forward)
+ if (!forward)
return iptable_filter_table_init(net);
return 0;
@@ -86,22 +84,32 @@ static struct pernet_operations iptable_filter_net_ops = {
static int __init iptable_filter_init(void)
{
- int ret;
+ int ret = xt_register_template(&packet_filter,
+ iptable_filter_table_init);
+
+ if (ret < 0)
+ return ret;
filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook);
- if (IS_ERR(filter_ops))
+ if (IS_ERR(filter_ops)) {
+ xt_unregister_template(&packet_filter);
return PTR_ERR(filter_ops);
+ }
ret = register_pernet_subsys(&iptable_filter_net_ops);
- if (ret < 0)
+ if (ret < 0) {
+ xt_unregister_template(&packet_filter);
kfree(filter_ops);
+ return ret;
+ }
- return ret;
+ return 0;
}
static void __exit iptable_filter_fini(void)
{
unregister_pernet_subsys(&iptable_filter_net_ops);
+ xt_unregister_template(&packet_filter);
kfree(filter_ops);
}
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 2abc3836f391..40417a3f930b 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -25,15 +25,12 @@ MODULE_DESCRIPTION("iptables mangle table");
(1 << NF_INET_LOCAL_OUT) | \
(1 << NF_INET_POST_ROUTING))
-static int __net_init iptable_mangle_table_init(struct net *net);
-
static const struct xt_table packet_mangler = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
.priority = NF_IP_PRI_MANGLE,
- .table_init = iptable_mangle_table_init,
};
static unsigned int
@@ -83,7 +80,7 @@ iptable_mangle_hook(void *priv,
}
static struct nf_hook_ops *mangle_ops __read_mostly;
-static int __net_init iptable_mangle_table_init(struct net *net)
+static int iptable_mangle_table_init(struct net *net)
{
struct ipt_replace *repl;
int ret;
@@ -113,32 +110,32 @@ static struct pernet_operations iptable_mangle_net_ops = {
static int __init iptable_mangle_init(void)
{
- int ret;
+ int ret = xt_register_template(&packet_mangler,
+ iptable_mangle_table_init);
+ if (ret < 0)
+ return ret;
mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook);
if (IS_ERR(mangle_ops)) {
+ xt_unregister_template(&packet_mangler);
ret = PTR_ERR(mangle_ops);
return ret;
}
ret = register_pernet_subsys(&iptable_mangle_net_ops);
if (ret < 0) {
+ xt_unregister_template(&packet_mangler);
kfree(mangle_ops);
return ret;
}
- ret = iptable_mangle_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&iptable_mangle_net_ops);
- kfree(mangle_ops);
- }
-
return ret;
}
static void __exit iptable_mangle_fini(void)
{
unregister_pernet_subsys(&iptable_mangle_net_ops);
+ xt_unregister_template(&packet_mangler);
kfree(mangle_ops);
}
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index a9913842ef18..45d7e072e6a5 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -17,8 +17,6 @@ struct iptable_nat_pernet {
struct nf_hook_ops *nf_nat_ops;
};
-static int __net_init iptable_nat_table_init(struct net *net);
-
static unsigned int iptable_nat_net_id __read_mostly;
static const struct xt_table nf_nat_ipv4_table = {
@@ -29,7 +27,6 @@ static const struct xt_table nf_nat_ipv4_table = {
(1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
- .table_init = iptable_nat_table_init,
};
static unsigned int iptable_nat_do_chain(void *priv,
@@ -113,7 +110,7 @@ static void ipt_nat_unregister_lookups(struct net *net)
kfree(ops);
}
-static int __net_init iptable_nat_table_init(struct net *net)
+static int iptable_nat_table_init(struct net *net)
{
struct ipt_replace *repl;
int ret;
@@ -155,20 +152,25 @@ static struct pernet_operations iptable_nat_net_ops = {
static int __init iptable_nat_init(void)
{
- int ret = register_pernet_subsys(&iptable_nat_net_ops);
+ int ret = xt_register_template(&nf_nat_ipv4_table,
+ iptable_nat_table_init);
+
+ if (ret < 0)
+ return ret;
- if (ret)
+ ret = register_pernet_subsys(&iptable_nat_net_ops);
+ if (ret < 0) {
+ xt_unregister_template(&nf_nat_ipv4_table);
return ret;
+ }
- ret = iptable_nat_table_init(&init_net);
- if (ret)
- unregister_pernet_subsys(&iptable_nat_net_ops);
return ret;
}
static void __exit iptable_nat_exit(void)
{
unregister_pernet_subsys(&iptable_nat_net_ops);
+ xt_unregister_template(&nf_nat_ipv4_table);
}
module_init(iptable_nat_init);
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index ceef397c1f5f..b88e0f36cd05 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -12,8 +12,6 @@
#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
-static int __net_init iptable_raw_table_init(struct net *net);
-
static bool raw_before_defrag __read_mostly;
MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag");
module_param(raw_before_defrag, bool, 0000);
@@ -24,7 +22,6 @@ static const struct xt_table packet_raw = {
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
.priority = NF_IP_PRI_RAW,
- .table_init = iptable_raw_table_init,
};
static const struct xt_table packet_raw_before_defrag = {
@@ -33,7 +30,6 @@ static const struct xt_table packet_raw_before_defrag = {
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
.priority = NF_IP_PRI_RAW_BEFORE_DEFRAG,
- .table_init = iptable_raw_table_init,
};
/* The work comes in here from netfilter.c. */
@@ -89,22 +85,24 @@ static int __init iptable_raw_init(void)
pr_info("Enabling raw table before defrag\n");
}
+ ret = xt_register_template(table,
+ iptable_raw_table_init);
+ if (ret < 0)
+ return ret;
+
rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook);
- if (IS_ERR(rawtable_ops))
+ if (IS_ERR(rawtable_ops)) {
+ xt_unregister_template(table);
return PTR_ERR(rawtable_ops);
+ }
ret = register_pernet_subsys(&iptable_raw_net_ops);
if (ret < 0) {
+ xt_unregister_template(table);
kfree(rawtable_ops);
return ret;
}
- ret = iptable_raw_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&iptable_raw_net_ops);
- kfree(rawtable_ops);
- }
-
return ret;
}
@@ -112,6 +110,7 @@ static void __exit iptable_raw_fini(void)
{
unregister_pernet_subsys(&iptable_raw_net_ops);
kfree(rawtable_ops);
+ xt_unregister_template(&packet_raw);
}
module_init(iptable_raw_init);
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 77973f5fd8f6..f519162a2fa5 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -25,15 +25,12 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules");
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT)
-static int __net_init iptable_security_table_init(struct net *net);
-
static const struct xt_table security_table = {
.name = "security",
.valid_hooks = SECURITY_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
.priority = NF_IP_PRI_SECURITY,
- .table_init = iptable_security_table_init,
};
static unsigned int
@@ -45,7 +42,7 @@ iptable_security_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *sectbl_ops __read_mostly;
-static int __net_init iptable_security_table_init(struct net *net)
+static int iptable_security_table_init(struct net *net)
{
struct ipt_replace *repl;
int ret;
@@ -75,24 +72,25 @@ static struct pernet_operations iptable_security_net_ops = {
static int __init iptable_security_init(void)
{
- int ret;
+ int ret = xt_register_template(&security_table,
+ iptable_security_table_init);
+
+ if (ret < 0)
+ return ret;
sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook);
- if (IS_ERR(sectbl_ops))
+ if (IS_ERR(sectbl_ops)) {
+ xt_unregister_template(&security_table);
return PTR_ERR(sectbl_ops);
+ }
ret = register_pernet_subsys(&iptable_security_net_ops);
if (ret < 0) {
+ xt_unregister_template(&security_table);
kfree(sectbl_ops);
return ret;
}
- ret = iptable_security_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&iptable_security_net_ops);
- kfree(sectbl_ops);
- }
-
return ret;
}
@@ -100,6 +98,7 @@ static void __exit iptable_security_fini(void)
{
unregister_pernet_subsys(&iptable_security_net_ops);
kfree(sectbl_ops);
+ xt_unregister_template(&security_table);
}
module_init(iptable_security_init);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 99c06944501a..d6899ab5fb39 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -276,12 +276,13 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
struct rt_cache_stat *st = v;
if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
+ seq_puts(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
return 0;
}
- seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
- " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
+ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x "
+ "%08x %08x %08x %08x %08x %08x "
+ "%08x %08x %08x %08x\n",
dst_entries_get_slow(&ipv4_dst_ops),
0, /* st->in_hit */
st->in_slow_tot,
@@ -586,28 +587,35 @@ static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
}
}
-static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
+static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash)
{
- struct fib_nh_exception *fnhe, *oldest;
+ struct fib_nh_exception __rcu **fnhe_p, **oldest_p;
+ struct fib_nh_exception *fnhe, *oldest = NULL;
- oldest = rcu_dereference(hash->chain);
- for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
- fnhe = rcu_dereference(fnhe->fnhe_next)) {
- if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
+ for (fnhe_p = &hash->chain; ; fnhe_p = &fnhe->fnhe_next) {
+ fnhe = rcu_dereference_protected(*fnhe_p,
+ lockdep_is_held(&fnhe_lock));
+ if (!fnhe)
+ break;
+ if (!oldest ||
+ time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) {
oldest = fnhe;
+ oldest_p = fnhe_p;
+ }
}
fnhe_flush_routes(oldest);
- return oldest;
+ *oldest_p = oldest->fnhe_next;
+ kfree_rcu(oldest, rcu);
}
-static inline u32 fnhe_hashfun(__be32 daddr)
+static u32 fnhe_hashfun(__be32 daddr)
{
- static u32 fnhe_hashrnd __read_mostly;
- u32 hval;
+ static siphash_key_t fnhe_hash_key __read_mostly;
+ u64 hval;
- net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
- hval = jhash_1word((__force u32)daddr, fnhe_hashrnd);
- return hash_32(hval, FNHE_HASH_SHIFT);
+ net_get_random_once(&fnhe_hash_key, sizeof(fnhe_hash_key));
+ hval = siphash_1u32((__force u32)daddr, &fnhe_hash_key);
+ return hash_64(hval, FNHE_HASH_SHIFT);
}
static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
@@ -676,16 +684,21 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
if (rt)
fill_route_from_fnhe(rt, fnhe);
} else {
- if (depth > FNHE_RECLAIM_DEPTH)
- fnhe = fnhe_oldest(hash);
- else {
- fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
- if (!fnhe)
- goto out_unlock;
-
- fnhe->fnhe_next = hash->chain;
- rcu_assign_pointer(hash->chain, fnhe);
+ /* Randomize max depth to avoid some side channels attacks. */
+ int max_depth = FNHE_RECLAIM_DEPTH +
+ prandom_u32_max(FNHE_RECLAIM_DEPTH);
+
+ while (depth > max_depth) {
+ fnhe_remove_oldest(hash);
+ depth--;
}
+
+ fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
+ if (!fnhe)
+ goto out_unlock;
+
+ fnhe->fnhe_next = hash->chain;
+
fnhe->fnhe_genid = genid;
fnhe->fnhe_daddr = daddr;
fnhe->fnhe_gw = gw;
@@ -693,6 +706,8 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
fnhe->fnhe_mtu_locked = lock;
fnhe->fnhe_expires = max(1UL, expires);
+ rcu_assign_pointer(hash->chain, fnhe);
+
/* Exception created; mark the cached routes for the nexthop
* stale, so anyone caching it rechecks if this exception
* applies to them.
@@ -1299,26 +1314,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst)
{
- const struct rtable *rt = (const struct rtable *)dst;
- unsigned int mtu = rt->rt_pmtu;
-
- if (!mtu || time_after_eq(jiffies, rt->dst.expires))
- mtu = dst_metric_raw(dst, RTAX_MTU);
-
- if (mtu)
- goto out;
-
- mtu = READ_ONCE(dst->dev->mtu);
-
- if (unlikely(ip_mtu_locked(dst))) {
- if (rt->rt_uses_gateway && mtu > 576)
- mtu = 576;
- }
-
-out:
- mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
-
- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
+ return ip_dst_mtu_maybe_forward(dst, false);
}
EXPORT_INDIRECT_CALLABLE(ipv4_mtu);
@@ -2831,8 +2827,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
new->output = dst_discard_out;
new->dev = net->loopback_dev;
- if (new->dev)
- dev_hold(new->dev);
+ dev_hold(new->dev);
rt->rt_is_input = ort->rt_is_input;
rt->rt_iif = ort->rt_iif;
@@ -3170,7 +3165,7 @@ static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
udph = skb_put_zero(skb, sizeof(struct udphdr));
udph->source = sport;
udph->dest = dport;
- udph->len = sizeof(struct udphdr);
+ udph->len = htons(sizeof(struct udphdr));
udph->check = 0;
break;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8cb44040ec68..e8b48df73c85 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3338,6 +3338,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
} else {
tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
SOCK_MIN_RCVBUF / 2 : val;
+ tp->rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp);
}
return 0;
}
@@ -4512,7 +4513,9 @@ void __init tcp_init(void)
tcp_hashinfo.bind_bucket_cachep =
kmem_cache_create("tcp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC |
+ SLAB_ACCOUNT,
+ NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 6ea3dc2e4219..6274462b86b4 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -1041,7 +1041,7 @@ static void bbr_init(struct sock *sk)
bbr->prior_cwnd = 0;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
bbr->rtt_cnt = 0;
- bbr->next_rtt_delivered = 0;
+ bbr->next_rtt_delivered = tp->delivered;
bbr->prev_ca_state = TCP_CA_Open;
bbr->packet_conservation = 0;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 25fa4c01a17f..59412d6354a0 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -55,12 +55,7 @@ void tcp_fastopen_ctx_destroy(struct net *net)
{
struct tcp_fastopen_context *ctxt;
- spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
-
- ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
- lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
- rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL);
- spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
+ ctxt = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, NULL);
if (ctxt)
call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
@@ -89,18 +84,12 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
ctx->num = 1;
}
- spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
if (sk) {
q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
- octx = rcu_dereference_protected(q->ctx,
- lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
- rcu_assign_pointer(q->ctx, ctx);
+ octx = xchg((__force struct tcp_fastopen_context **)&q->ctx, ctx);
} else {
- octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
- lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
- rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
+ octx = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, ctx);
}
- spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
if (octx)
call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
@@ -379,8 +368,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
return NULL;
}
- if (syn_data &&
- tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
+ if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
goto fastopen;
if (foc->len == 0) {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 149ceb5c94ff..3f7bd7ae7d7a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -100,6 +100,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */
#define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */
+#define FLAG_DSACK_TLP 0x20000 /* DSACK for tail loss probe */
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -454,11 +455,12 @@ static void tcp_sndbuf_expand(struct sock *sk)
*/
/* Slow part of check#2. */
-static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
+static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
+ unsigned int skbtruesize)
{
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
- int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
+ int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
while (tp->rcv_ssthresh <= window) {
@@ -471,7 +473,27 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
return 0;
}
-static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
+/* Even if skb appears to have a bad len/truesize ratio, TCP coalescing
+ * can play nice with us, as sk_buff and skb->head might be either
+ * freed or shared with up to MAX_SKB_FRAGS segments.
+ * Only give a boost to drivers using page frag(s) to hold the frame(s),
+ * and if no payload was pulled in skb->head before reaching us.
+ */
+static u32 truesize_adjust(bool adjust, const struct sk_buff *skb)
+{
+ u32 truesize = skb->truesize;
+
+ if (adjust && !skb_headlen(skb)) {
+ truesize -= SKB_TRUESIZE(skb_end_offset(skb));
+ /* paranoid check, some drivers might be buggy */
+ if (unlikely((int)truesize < (int)skb->len))
+ truesize = skb->truesize;
+ }
+ return truesize;
+}
+
+static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
+ bool adjust)
{
struct tcp_sock *tp = tcp_sk(sk);
int room;
@@ -480,15 +502,16 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
/* Check #1 */
if (room > 0 && !tcp_under_memory_pressure(sk)) {
+ unsigned int truesize = truesize_adjust(adjust, skb);
int incr;
/* Check #2. Increase window, if skb with such overhead
* will fit to rcvbuf in future.
*/
- if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
+ if (tcp_win_from_space(sk, truesize) <= skb->len)
incr = 2 * tp->advmss;
else
- incr = __tcp_grow_window(sk, skb);
+ incr = __tcp_grow_window(sk, skb, truesize);
if (incr) {
incr = max_t(int, incr, 2 * skb->len);
@@ -782,7 +805,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
tcp_ecn_check_ce(sk, skb);
if (skb->len >= 128)
- tcp_grow_window(sk, skb);
+ tcp_grow_window(sk, skb, true);
}
/* Called to compute a smoothed rtt estimate. The data fed to this
@@ -969,6 +992,8 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
return 0;
if (seq_len > tp->mss_cache)
dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
+ else if (tp->tlp_high_seq && tp->tlp_high_seq == end_seq)
+ state->flag |= FLAG_DSACK_TLP;
tp->dsack_dups += dup_segs;
/* Skip the DSACK if dup segs weren't retransmitted by sender */
@@ -976,7 +1001,14 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
return 0;
tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
- tp->rack.dsack_seen = 1;
+ /* We increase the RACK ordering window in rounds where we receive
+ * DSACKs that may have been due to reordering causing RACK to trigger
+ * a spurious fast recovery. Thus RACK ignores DSACKs that happen
+ * without having seen reordering, or that match TLP probes (TLP
+ * is timer-driven, not triggered by RACK).
+ */
+ if (tp->reord_seen && !(state->flag & FLAG_DSACK_TLP))
+ tp->rack.dsack_seen = 1;
state->flag |= FLAG_DSACKING_ACK;
/* A spurious retransmission is delivered */
@@ -3628,7 +3660,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
if (!tp->tlp_retrans) {
/* TLP of new data has been acknowledged */
tp->tlp_high_seq = 0;
- } else if (flag & FLAG_DSACKING_ACK) {
+ } else if (flag & FLAG_DSACK_TLP) {
/* This DSACK means original and TLP probe arrived; no loss */
tp->tlp_high_seq = 0;
} else if (after(ack, tp->tlp_high_seq)) {
@@ -4769,7 +4801,7 @@ coalesce_done:
* and trigger fast retransmit.
*/
if (tcp_is_sack(tp))
- tcp_grow_window(sk, skb);
+ tcp_grow_window(sk, skb, true);
kfree_skb_partial(skb, fragstolen);
skb = NULL;
goto add_sack;
@@ -4857,7 +4889,7 @@ end:
* and trigger fast retransmit.
*/
if (tcp_is_sack(tp))
- tcp_grow_window(sk, skb);
+ tcp_grow_window(sk, skb, false);
skb_condense(skb);
skb_set_owner_r(skb, sk);
}
@@ -5383,7 +5415,7 @@ static void tcp_new_space(struct sock *sk)
tp->snd_cwnd_stamp = tcp_jiffies32;
}
- sk->sk_write_space(sk);
+ INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk);
}
static void tcp_check_space(struct sock *sk)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a692626c19e4..2e62e0d6373a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2277,51 +2277,72 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
#ifdef CONFIG_PROC_FS
/* Proc filesystem TCP sock list dumping. */
-/*
- * Get next listener socket follow cur. If cur is NULL, get first socket
- * starting from bucket given in st->bucket; when st->bucket is zero the
- * very first socket in the hash table is returned.
+static unsigned short seq_file_family(const struct seq_file *seq);
+
+static bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
+{
+ unsigned short family = seq_file_family(seq);
+
+ /* AF_UNSPEC is used as a match all */
+ return ((family == AF_UNSPEC || family == sk->sk_family) &&
+ net_eq(sock_net(sk), seq_file_net(seq)));
+}
+
+/* Find a non empty bucket (starting from st->bucket)
+ * and return the first sk from it.
*/
-static void *listening_get_next(struct seq_file *seq, void *cur)
+static void *listening_get_first(struct seq_file *seq)
{
- struct tcp_seq_afinfo *afinfo;
struct tcp_iter_state *st = seq->private;
- struct net *net = seq_file_net(seq);
- struct inet_listen_hashbucket *ilb;
- struct hlist_nulls_node *node;
- struct sock *sk = cur;
- if (st->bpf_seq_afinfo)
- afinfo = st->bpf_seq_afinfo;
- else
- afinfo = PDE_DATA(file_inode(seq->file));
+ st->offset = 0;
+ for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) {
+ struct inet_listen_hashbucket *ilb2;
+ struct inet_connection_sock *icsk;
+ struct sock *sk;
- if (!sk) {
-get_head:
- ilb = &tcp_hashinfo.listening_hash[st->bucket];
- spin_lock(&ilb->lock);
- sk = sk_nulls_head(&ilb->nulls_head);
- st->offset = 0;
- goto get_sk;
+ ilb2 = &tcp_hashinfo.lhash2[st->bucket];
+ if (hlist_empty(&ilb2->head))
+ continue;
+
+ spin_lock(&ilb2->lock);
+ inet_lhash2_for_each_icsk(icsk, &ilb2->head) {
+ sk = (struct sock *)icsk;
+ if (seq_sk_match(seq, sk))
+ return sk;
+ }
+ spin_unlock(&ilb2->lock);
}
- ilb = &tcp_hashinfo.listening_hash[st->bucket];
+
+ return NULL;
+}
+
+/* Find the next sk of "cur" within the same bucket (i.e. st->bucket).
+ * If "cur" is the last one in the st->bucket,
+ * call listening_get_first() to return the first sk of the next
+ * non empty bucket.
+ */
+static void *listening_get_next(struct seq_file *seq, void *cur)
+{
+ struct tcp_iter_state *st = seq->private;
+ struct inet_listen_hashbucket *ilb2;
+ struct inet_connection_sock *icsk;
+ struct sock *sk = cur;
+
++st->num;
++st->offset;
- sk = sk_nulls_next(sk);
-get_sk:
- sk_nulls_for_each_from(sk, node) {
- if (!net_eq(sock_net(sk), net))
- continue;
- if (afinfo->family == AF_UNSPEC ||
- sk->sk_family == afinfo->family)
+ icsk = inet_csk(sk);
+ inet_lhash2_for_each_icsk_continue(icsk) {
+ sk = (struct sock *)icsk;
+ if (seq_sk_match(seq, sk))
return sk;
}
- spin_unlock(&ilb->lock);
- st->offset = 0;
- if (++st->bucket < INET_LHTABLE_SIZE)
- goto get_head;
- return NULL;
+
+ ilb2 = &tcp_hashinfo.lhash2[st->bucket];
+ spin_unlock(&ilb2->lock);
+ ++st->bucket;
+ return listening_get_first(seq);
}
static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
@@ -2331,7 +2352,7 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
st->bucket = 0;
st->offset = 0;
- rc = listening_get_next(seq, NULL);
+ rc = listening_get_first(seq);
while (rc && *pos) {
rc = listening_get_next(seq, rc);
@@ -2351,15 +2372,7 @@ static inline bool empty_bucket(const struct tcp_iter_state *st)
*/
static void *established_get_first(struct seq_file *seq)
{
- struct tcp_seq_afinfo *afinfo;
struct tcp_iter_state *st = seq->private;
- struct net *net = seq_file_net(seq);
- void *rc = NULL;
-
- if (st->bpf_seq_afinfo)
- afinfo = st->bpf_seq_afinfo;
- else
- afinfo = PDE_DATA(file_inode(seq->file));
st->offset = 0;
for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
@@ -2373,32 +2386,20 @@ static void *established_get_first(struct seq_file *seq)
spin_lock_bh(lock);
sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
- if ((afinfo->family != AF_UNSPEC &&
- sk->sk_family != afinfo->family) ||
- !net_eq(sock_net(sk), net)) {
- continue;
- }
- rc = sk;
- goto out;
+ if (seq_sk_match(seq, sk))
+ return sk;
}
spin_unlock_bh(lock);
}
-out:
- return rc;
+
+ return NULL;
}
static void *established_get_next(struct seq_file *seq, void *cur)
{
- struct tcp_seq_afinfo *afinfo;
struct sock *sk = cur;
struct hlist_nulls_node *node;
struct tcp_iter_state *st = seq->private;
- struct net *net = seq_file_net(seq);
-
- if (st->bpf_seq_afinfo)
- afinfo = st->bpf_seq_afinfo;
- else
- afinfo = PDE_DATA(file_inode(seq->file));
++st->num;
++st->offset;
@@ -2406,9 +2407,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
sk = sk_nulls_next(sk);
sk_nulls_for_each_from(sk, node) {
- if ((afinfo->family == AF_UNSPEC ||
- sk->sk_family == afinfo->family) &&
- net_eq(sock_net(sk), net))
+ if (seq_sk_match(seq, sk))
return sk;
}
@@ -2451,17 +2450,18 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
static void *tcp_seek_last_pos(struct seq_file *seq)
{
struct tcp_iter_state *st = seq->private;
+ int bucket = st->bucket;
int offset = st->offset;
int orig_num = st->num;
void *rc = NULL;
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
- if (st->bucket >= INET_LHTABLE_SIZE)
+ if (st->bucket > tcp_hashinfo.lhash2_mask)
break;
st->state = TCP_SEQ_STATE_LISTENING;
- rc = listening_get_next(seq, NULL);
- while (offset-- && rc)
+ rc = listening_get_first(seq);
+ while (offset-- && rc && bucket == st->bucket)
rc = listening_get_next(seq, rc);
if (rc)
break;
@@ -2472,7 +2472,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
if (st->bucket > tcp_hashinfo.ehash_mask)
break;
rc = established_get_first(seq);
- while (offset-- && rc)
+ while (offset-- && rc && bucket == st->bucket)
rc = established_get_next(seq, rc);
}
@@ -2542,7 +2542,7 @@ void tcp_seq_stop(struct seq_file *seq, void *v)
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
if (v != SEQ_START_TOKEN)
- spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
+ spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
break;
case TCP_SEQ_STATE_ESTABLISHED:
if (v)
@@ -2687,6 +2687,15 @@ out:
}
#ifdef CONFIG_BPF_SYSCALL
+struct bpf_tcp_iter_state {
+ struct tcp_iter_state state;
+ unsigned int cur_sk;
+ unsigned int end_sk;
+ unsigned int max_sk;
+ struct sock **batch;
+ bool st_bucket_done;
+};
+
struct bpf_iter__tcp {
__bpf_md_ptr(struct bpf_iter_meta *, meta);
__bpf_md_ptr(struct sock_common *, sk_common);
@@ -2705,16 +2714,204 @@ static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
return bpf_iter_run_prog(prog, &ctx);
}
+static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter)
+{
+ while (iter->cur_sk < iter->end_sk)
+ sock_put(iter->batch[iter->cur_sk++]);
+}
+
+static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter,
+ unsigned int new_batch_sz)
+{
+ struct sock **new_batch;
+
+ new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
+ GFP_USER | __GFP_NOWARN);
+ if (!new_batch)
+ return -ENOMEM;
+
+ bpf_iter_tcp_put_batch(iter);
+ kvfree(iter->batch);
+ iter->batch = new_batch;
+ iter->max_sk = new_batch_sz;
+
+ return 0;
+}
+
+static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
+ struct sock *start_sk)
+{
+ struct bpf_tcp_iter_state *iter = seq->private;
+ struct tcp_iter_state *st = &iter->state;
+ struct inet_connection_sock *icsk;
+ unsigned int expected = 1;
+ struct sock *sk;
+
+ sock_hold(start_sk);
+ iter->batch[iter->end_sk++] = start_sk;
+
+ icsk = inet_csk(start_sk);
+ inet_lhash2_for_each_icsk_continue(icsk) {
+ sk = (struct sock *)icsk;
+ if (seq_sk_match(seq, sk)) {
+ if (iter->end_sk < iter->max_sk) {
+ sock_hold(sk);
+ iter->batch[iter->end_sk++] = sk;
+ }
+ expected++;
+ }
+ }
+ spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
+
+ return expected;
+}
+
+static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq,
+ struct sock *start_sk)
+{
+ struct bpf_tcp_iter_state *iter = seq->private;
+ struct tcp_iter_state *st = &iter->state;
+ struct hlist_nulls_node *node;
+ unsigned int expected = 1;
+ struct sock *sk;
+
+ sock_hold(start_sk);
+ iter->batch[iter->end_sk++] = start_sk;
+
+ sk = sk_nulls_next(start_sk);
+ sk_nulls_for_each_from(sk, node) {
+ if (seq_sk_match(seq, sk)) {
+ if (iter->end_sk < iter->max_sk) {
+ sock_hold(sk);
+ iter->batch[iter->end_sk++] = sk;
+ }
+ expected++;
+ }
+ }
+ spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+
+ return expected;
+}
+
+static struct sock *bpf_iter_tcp_batch(struct seq_file *seq)
+{
+ struct bpf_tcp_iter_state *iter = seq->private;
+ struct tcp_iter_state *st = &iter->state;
+ unsigned int expected;
+ bool resized = false;
+ struct sock *sk;
+
+ /* The st->bucket is done. Directly advance to the next
+ * bucket instead of having the tcp_seek_last_pos() to skip
+ * one by one in the current bucket and eventually find out
+ * it has to advance to the next bucket.
+ */
+ if (iter->st_bucket_done) {
+ st->offset = 0;
+ st->bucket++;
+ if (st->state == TCP_SEQ_STATE_LISTENING &&
+ st->bucket > tcp_hashinfo.lhash2_mask) {
+ st->state = TCP_SEQ_STATE_ESTABLISHED;
+ st->bucket = 0;
+ }
+ }
+
+again:
+ /* Get a new batch */
+ iter->cur_sk = 0;
+ iter->end_sk = 0;
+ iter->st_bucket_done = false;
+
+ sk = tcp_seek_last_pos(seq);
+ if (!sk)
+ return NULL; /* Done */
+
+ if (st->state == TCP_SEQ_STATE_LISTENING)
+ expected = bpf_iter_tcp_listening_batch(seq, sk);
+ else
+ expected = bpf_iter_tcp_established_batch(seq, sk);
+
+ if (iter->end_sk == expected) {
+ iter->st_bucket_done = true;
+ return sk;
+ }
+
+ if (!resized && !bpf_iter_tcp_realloc_batch(iter, expected * 3 / 2)) {
+ resized = true;
+ goto again;
+ }
+
+ return sk;
+}
+
+static void *bpf_iter_tcp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ /* bpf iter does not support lseek, so it always
+ * continue from where it was stop()-ped.
+ */
+ if (*pos)
+ return bpf_iter_tcp_batch(seq);
+
+ return SEQ_START_TOKEN;
+}
+
+static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_tcp_iter_state *iter = seq->private;
+ struct tcp_iter_state *st = &iter->state;
+ struct sock *sk;
+
+ /* Whenever seq_next() is called, the iter->cur_sk is
+ * done with seq_show(), so advance to the next sk in
+ * the batch.
+ */
+ if (iter->cur_sk < iter->end_sk) {
+ /* Keeping st->num consistent in tcp_iter_state.
+ * bpf_iter_tcp does not use st->num.
+ * meta.seq_num is used instead.
+ */
+ st->num++;
+ /* Move st->offset to the next sk in the bucket such that
+ * the future start() will resume at st->offset in
+ * st->bucket. See tcp_seek_last_pos().
+ */
+ st->offset++;
+ sock_put(iter->batch[iter->cur_sk++]);
+ }
+
+ if (iter->cur_sk < iter->end_sk)
+ sk = iter->batch[iter->cur_sk];
+ else
+ sk = bpf_iter_tcp_batch(seq);
+
+ ++*pos;
+ /* Keeping st->last_pos consistent in tcp_iter_state.
+ * bpf iter does not do lseek, so st->last_pos always equals to *pos.
+ */
+ st->last_pos = *pos;
+ return sk;
+}
+
static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
{
struct bpf_iter_meta meta;
struct bpf_prog *prog;
struct sock *sk = v;
+ bool slow;
uid_t uid;
+ int ret;
if (v == SEQ_START_TOKEN)
return 0;
+ if (sk_fullsock(sk))
+ slow = lock_sock_fast(sk);
+
+ if (unlikely(sk_unhashed(sk))) {
+ ret = SEQ_SKIP;
+ goto unlock;
+ }
+
if (sk->sk_state == TCP_TIME_WAIT) {
uid = 0;
} else if (sk->sk_state == TCP_NEW_SYN_RECV) {
@@ -2728,11 +2925,18 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
meta.seq = seq;
prog = bpf_iter_get_info(&meta, false);
- return tcp_prog_seq_show(prog, &meta, v, uid);
+ ret = tcp_prog_seq_show(prog, &meta, v, uid);
+
+unlock:
+ if (sk_fullsock(sk))
+ unlock_sock_fast(sk, slow);
+ return ret;
+
}
static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
{
+ struct bpf_tcp_iter_state *iter = seq->private;
struct bpf_iter_meta meta;
struct bpf_prog *prog;
@@ -2743,16 +2947,33 @@ static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
(void)tcp_prog_seq_show(prog, &meta, v, 0);
}
- tcp_seq_stop(seq, v);
+ if (iter->cur_sk < iter->end_sk) {
+ bpf_iter_tcp_put_batch(iter);
+ iter->st_bucket_done = false;
+ }
}
static const struct seq_operations bpf_iter_tcp_seq_ops = {
.show = bpf_iter_tcp_seq_show,
- .start = tcp_seq_start,
- .next = tcp_seq_next,
+ .start = bpf_iter_tcp_seq_start,
+ .next = bpf_iter_tcp_seq_next,
.stop = bpf_iter_tcp_seq_stop,
};
#endif
+static unsigned short seq_file_family(const struct seq_file *seq)
+{
+ const struct tcp_seq_afinfo *afinfo;
+
+#ifdef CONFIG_BPF_SYSCALL
+ /* Iterated from bpf_iter. Let the bpf prog to filter instead. */
+ if (seq->op == &bpf_iter_tcp_seq_ops)
+ return AF_UNSPEC;
+#endif
+
+ /* Iterated from proc fs */
+ afinfo = PDE_DATA(file_inode(seq->file));
+ return afinfo->family;
+}
static const struct seq_operations tcp4_seq_ops = {
.show = tcp4_seq_show,
@@ -2964,7 +3185,6 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC;
net->ipv4.sysctl_tcp_comp_sack_nr = 44;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
- spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0;
atomic_set(&net->ipv4.tfo_active_disable_times, 0);
@@ -3003,39 +3223,55 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta,
struct sock_common *sk_common, uid_t uid)
+#define INIT_BATCH_SZ 16
+
static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux)
{
- struct tcp_iter_state *st = priv_data;
- struct tcp_seq_afinfo *afinfo;
- int ret;
+ struct bpf_tcp_iter_state *iter = priv_data;
+ int err;
- afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
- if (!afinfo)
- return -ENOMEM;
+ err = bpf_iter_init_seq_net(priv_data, aux);
+ if (err)
+ return err;
- afinfo->family = AF_UNSPEC;
- st->bpf_seq_afinfo = afinfo;
- ret = bpf_iter_init_seq_net(priv_data, aux);
- if (ret)
- kfree(afinfo);
- return ret;
+ err = bpf_iter_tcp_realloc_batch(iter, INIT_BATCH_SZ);
+ if (err) {
+ bpf_iter_fini_seq_net(priv_data);
+ return err;
+ }
+
+ return 0;
}
static void bpf_iter_fini_tcp(void *priv_data)
{
- struct tcp_iter_state *st = priv_data;
+ struct bpf_tcp_iter_state *iter = priv_data;
- kfree(st->bpf_seq_afinfo);
bpf_iter_fini_seq_net(priv_data);
+ kvfree(iter->batch);
}
static const struct bpf_iter_seq_info tcp_seq_info = {
.seq_ops = &bpf_iter_tcp_seq_ops,
.init_seq_private = bpf_iter_init_tcp,
.fini_seq_private = bpf_iter_fini_tcp,
- .seq_priv_size = sizeof(struct tcp_iter_state),
+ .seq_priv_size = sizeof(struct bpf_tcp_iter_state),
};
+static const struct bpf_func_proto *
+bpf_iter_tcp_get_func_proto(enum bpf_func_id func_id,
+ const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_setsockopt:
+ return &bpf_sk_setsockopt_proto;
+ case BPF_FUNC_getsockopt:
+ return &bpf_sk_getsockopt_proto;
+ default:
+ return NULL;
+ }
+}
+
static struct bpf_iter_reg tcp_reg_info = {
.target = "tcp",
.ctx_arg_info_size = 1,
@@ -3043,6 +3279,7 @@ static struct bpf_iter_reg tcp_reg_info = {
{ offsetof(struct bpf_iter__tcp, sk_common),
PTR_TO_BTF_ID_OR_NULL },
},
+ .get_func_proto = bpf_iter_tcp_get_func_proto,
.seq_info = &tcp_seq_info,
};
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 29553fce8502..6d72f3ea48c4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3373,7 +3373,8 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
sk_memory_allocated_add(sk, amt);
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
- mem_cgroup_charge_skmem(sk->sk_memcg, amt);
+ mem_cgroup_charge_skmem(sk->sk_memcg, amt,
+ gfp_memcg_charge() | __GFP_NOFAIL);
}
/* Send a FIN. The caller locks the socket for us.
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 6f1b4ac7fe99..fd113f6226ef 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -172,7 +172,8 @@ void tcp_rack_reo_timeout(struct sock *sk)
/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
*
- * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded
+ * If a DSACK is received that seems like it may have been due to reordering
+ * triggering fast recovery, increment reo_wnd by min_rtt/4 (upper bounded
* by srtt), since there is possibility that spurious retransmission was
* due to reordering delay longer than reo_wnd.
*
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1a742b710e54..8851c9463b4b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1143,7 +1143,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
rcu_read_unlock();
}
- if (cgroup_bpf_enabled(BPF_CGROUP_UDP4_SENDMSG) && !connected) {
+ if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
(struct sockaddr *)usin, &ipc.addr);
if (err)
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 9f5a5cdc38e6..7a1d5f473878 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -112,7 +112,6 @@ static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS];
static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
{
*prot = *base;
- prot->unhash = sock_map_unhash;
prot->close = sock_map_close;
prot->recvmsg = udp_bpf_recvmsg;
}
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 1380a6b6f4ff..86d32a1e62ac 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -152,8 +152,8 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features,
bool is_ipv6)
{
+ const struct net_offload __rcu **offloads;
__be16 protocol = skb->protocol;
- const struct net_offload **offloads;
const struct net_offload *ops;
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 747f56e0c636..e504204bca92 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -328,4 +328,15 @@ config IPV6_RPL_LWTUNNEL
If unsure, say N.
+config IPV6_IOAM6_LWTUNNEL
+ bool "IPv6: IOAM Pre-allocated Trace insertion support"
+ depends on IPV6
+ select LWTUNNEL
+ help
+ Support for the inline insertion of IOAM Pre-allocated
+ Trace Header (only on locally generated packets), using
+ the lightweight tunnels mechanism.
+
+ If unsure, say N.
+
endif # IPV6
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index cf7b47bdb9b3..1bc7e143217b 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -10,7 +10,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
- udp_offload.o seg6.o fib6_notifier.o rpl.o
+ udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o
ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o
@@ -27,6 +27,7 @@ ipv6-$(CONFIG_NETLABEL) += calipso.o
ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o
ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o
+ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o
ipv6-objs += $(ipv6-y)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3bf685fe64b9..17756f3ed33b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -89,6 +89,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/export.h>
+#include <linux/ioam6.h>
#define INFINITY_LIFE_TIME 0xFFFFFFFF
@@ -237,6 +238,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64,
.disable_policy = 0,
.rpl_seg_enabled = 0,
+ .ioam6_enabled = 0,
+ .ioam6_id = IOAM6_DEFAULT_IF_ID,
+ .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -293,6 +297,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64,
.disable_policy = 0,
.rpl_seg_enabled = 0,
+ .ioam6_enabled = 0,
+ .ioam6_id = IOAM6_DEFAULT_IF_ID,
+ .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
};
/* Check if link is ready: is it up and is a valid qdisc available */
@@ -387,6 +394,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ndev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
ndev->cnf.mtu6 = dev->mtu;
+ ndev->ra_mtu = 0;
ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
if (!ndev->nd_parms) {
kfree(ndev);
@@ -694,8 +702,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
errout:
if (in6_dev)
in6_dev_put(in6_dev);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
return err;
}
@@ -1080,7 +1087,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
goto out;
}
- ifa = kzalloc(sizeof(*ifa), gfp_flags);
+ ifa = kzalloc(sizeof(*ifa), gfp_flags | __GFP_ACCOUNT);
if (!ifa) {
err = -ENOBUFS;
goto out;
@@ -3843,6 +3850,7 @@ restart:
}
idev->tstamp = jiffies;
+ idev->ra_mtu = 0;
/* Last: Shot the device (if unregistered) */
if (unregister) {
@@ -5211,8 +5219,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
.netnsid = -1,
.type = type,
};
- struct net *net = sock_net(skb->sk);
- struct net *tgt_net = net;
+ struct net *tgt_net = sock_net(skb->sk);
int idx, s_idx, s_ip_idx;
int h, s_h;
struct net_device *dev;
@@ -5351,7 +5358,7 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
- struct net *net = sock_net(in_skb->sk);
+ struct net *tgt_net = sock_net(in_skb->sk);
struct inet6_fill_args fillargs = {
.portid = NETLINK_CB(in_skb).portid,
.seq = nlh->nlmsg_seq,
@@ -5359,7 +5366,6 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
.flags = 0,
.netnsid = -1,
};
- struct net *tgt_net = net;
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
struct in6_addr *addr = NULL, *peer;
@@ -5412,8 +5418,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
errout_ifa:
in6_ifa_put(ifa);
errout:
- if (dev)
- dev_put(dev);
+ dev_put(dev);
if (fillargs.netnsid >= 0)
put_net(tgt_net);
@@ -5526,6 +5531,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled;
+ array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled;
+ array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
+ array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
}
static inline size_t inet6_ifla6_size(void)
@@ -5537,6 +5545,7 @@ static inline size_t inet6_ifla6_size(void)
+ nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
+ nla_total_size(sizeof(struct in6_addr)) /* IFLA_INET6_TOKEN */
+ nla_total_size(1) /* IFLA_INET6_ADDR_GEN_MODE */
+ + nla_total_size(4) /* IFLA_INET6_RA_MTU */
+ 0;
}
@@ -5645,6 +5654,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
goto nla_put_failure;
+ if (idev->ra_mtu &&
+ nla_put_u32(skb, IFLA_INET6_RA_MTU, idev->ra_mtu))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -5761,6 +5774,9 @@ update_lft:
static const struct nla_policy inet6_af_policy[IFLA_INET6_MAX + 1] = {
[IFLA_INET6_ADDR_GEN_MODE] = { .type = NLA_U8 },
[IFLA_INET6_TOKEN] = { .len = sizeof(struct in6_addr) },
+ [IFLA_INET6_RA_MTU] = { .type = NLA_REJECT,
+ .reject_message =
+ "IFLA_INET6_RA_MTU can not be set" },
};
static int check_addr_gen_mode(int mode)
@@ -5784,7 +5800,8 @@ static int check_stable_privacy(struct inet6_dev *idev, struct net *net,
}
static int inet6_validate_link_af(const struct net_device *dev,
- const struct nlattr *nla)
+ const struct nlattr *nla,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[IFLA_INET6_MAX + 1];
struct inet6_dev *idev = NULL;
@@ -5797,7 +5814,7 @@ static int inet6_validate_link_af(const struct net_device *dev,
}
err = nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla,
- inet6_af_policy, NULL);
+ inet6_af_policy, extack);
if (err)
return err;
@@ -6540,6 +6557,7 @@ static int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
static int minus_one = -1;
static const int two_five_five = 255;
+static u32 ioam6_if_id_max = U16_MAX;
static const struct ctl_table addrconf_sysctl[] = {
{
@@ -6933,6 +6951,31 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "ioam6_enabled",
+ .data = &ipv6_devconf.ioam6_enabled,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = (void *)SYSCTL_ZERO,
+ .extra2 = (void *)SYSCTL_ONE,
+ },
+ {
+ .procname = "ioam6_id",
+ .data = &ipv6_devconf.ioam6_id,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = (void *)SYSCTL_ZERO,
+ .extra2 = (void *)&ioam6_if_id_max,
+ },
+ {
+ .procname = "ioam6_id_wide",
+ .data = &ipv6_devconf.ioam6_id_wide,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_douintvec,
+ },
+ {
/* sentinel */
}
};
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 2389ff702f51..b5878bb8e419 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -62,6 +62,7 @@
#include <net/rpl.h>
#include <net/compat.h>
#include <net/xfrm.h>
+#include <net/ioam6.h>
#include <linux/uaccess.h>
#include <linux/mroute6.h>
@@ -454,7 +455,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* changes context in a wrong way it will be caught.
*/
err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
- BPF_CGROUP_INET6_BIND, &flags);
+ CGROUP_INET6_BIND, &flags);
if (err)
return err;
@@ -531,7 +532,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
if (np->sndflow)
sin->sin6_flowinfo = np->flow_label;
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- BPF_CGROUP_INET6_GETPEERNAME,
+ CGROUP_INET6_GETPEERNAME,
NULL);
} else {
if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
@@ -540,7 +541,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin6_addr = sk->sk_v6_rcv_saddr;
sin->sin6_port = inet->inet_sport;
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- BPF_CGROUP_INET6_GETSOCKNAME,
+ CGROUP_INET6_GETSOCKNAME,
NULL);
}
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
@@ -961,6 +962,9 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.fib_notify_on_flag_change = 0;
atomic_set(&net->ipv6.fib6_sernum, 1);
+ net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID;
+ net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE;
+
err = ipv6_init_mibs(net);
if (err)
return err;
@@ -1191,6 +1195,10 @@ static int __init inet6_init(void)
if (err)
goto rpl_fail;
+ err = ioam6_init();
+ if (err)
+ goto ioam6_fail;
+
err = igmp6_late_init();
if (err)
goto igmp6_late_err;
@@ -1213,6 +1221,8 @@ sysctl_fail:
igmp6_late_cleanup();
#endif
igmp6_late_err:
+ ioam6_exit();
+ioam6_fail:
rpl_exit();
rpl_fail:
seg6_exit();
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 26882e165c9e..3a871a09f962 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -49,22 +49,12 @@
#include <net/seg6_hmac.h>
#endif
#include <net/rpl.h>
+#include <linux/ioam6.h>
+#include <net/ioam6.h>
+#include <net/dst_metadata.h>
#include <linux/uaccess.h>
-/*
- * Parsing tlv encoded headers.
- *
- * Parsing function "func" returns true, if parsing succeed
- * and false, if it failed.
- * It MUST NOT touch skb->h.
- */
-
-struct tlvtype_proc {
- int type;
- bool (*func)(struct sk_buff *skb, int offset);
-};
-
/*********************
Generic functions
*********************/
@@ -109,16 +99,23 @@ drop:
return false;
}
+static bool ipv6_hop_ra(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff);
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+static bool ipv6_dest_hao(struct sk_buff *skb, int optoff);
+#endif
+
/* Parse tlv encoded option header (hop-by-hop or destination) */
-static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
+static bool ip6_parse_tlv(bool hopbyhop,
struct sk_buff *skb,
int max_count)
{
int len = (skb_transport_header(skb)[1] + 1) << 3;
const unsigned char *nh = skb_network_header(skb);
int off = skb_network_header_len(skb);
- const struct tlvtype_proc *curr;
bool disallow_unknowns = false;
int tlv_count = 0;
int padlen = 0;
@@ -173,20 +170,45 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
if (tlv_count > max_count)
goto bad;
- for (curr = procs; curr->type >= 0; curr++) {
- if (curr->type == nh[off]) {
- /* type specific length/alignment
- checks will be performed in the
- func(). */
- if (curr->func(skb, off) == false)
+ if (hopbyhop) {
+ switch (nh[off]) {
+ case IPV6_TLV_ROUTERALERT:
+ if (!ipv6_hop_ra(skb, off))
+ return false;
+ break;
+ case IPV6_TLV_IOAM:
+ if (!ipv6_hop_ioam(skb, off))
+ return false;
+ break;
+ case IPV6_TLV_JUMBO:
+ if (!ipv6_hop_jumbo(skb, off))
+ return false;
+ break;
+ case IPV6_TLV_CALIPSO:
+ if (!ipv6_hop_calipso(skb, off))
+ return false;
+ break;
+ default:
+ if (!ip6_tlvopt_unknown(skb, off,
+ disallow_unknowns))
+ return false;
+ break;
+ }
+ } else {
+ switch (nh[off]) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+ case IPV6_TLV_HAO:
+ if (!ipv6_dest_hao(skb, off))
+ return false;
+ break;
+#endif
+ default:
+ if (!ip6_tlvopt_unknown(skb, off,
+ disallow_unknowns))
return false;
break;
}
}
- if (curr->type < 0 &&
- !ip6_tlvopt_unknown(skb, off, disallow_unknowns))
- return false;
-
padlen = 0;
}
off += optlen;
@@ -264,16 +286,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
}
#endif
-static const struct tlvtype_proc tlvprocdestopt_lst[] = {
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
- {
- .type = IPV6_TLV_HAO,
- .func = ipv6_dest_hao,
- },
-#endif
- {-1, NULL}
-};
-
static int ipv6_destopt_rcv(struct sk_buff *skb)
{
struct inet6_dev *idev = __in6_dev_get(skb->dev);
@@ -304,8 +316,7 @@ fail_and_free:
dstbuf = opt->dst1;
#endif
- if (ip6_parse_tlv(tlvprocdestopt_lst, skb,
- net->ipv6.sysctl.max_dst_opts_cnt)) {
+ if (ip6_parse_tlv(false, skb, net->ipv6.sysctl.max_dst_opts_cnt)) {
skb->transport_header += extlen;
opt = IP6CB(skb);
#if IS_ENABLED(CONFIG_IPV6_MIP6)
@@ -928,6 +939,60 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
return false;
}
+/* IOAM */
+
+static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
+{
+ struct ioam6_trace_hdr *trace;
+ struct ioam6_namespace *ns;
+ struct ioam6_hdr *hdr;
+
+ /* Bad alignment (must be 4n-aligned) */
+ if (optoff & 3)
+ goto drop;
+
+ /* Ignore if IOAM is not enabled on ingress */
+ if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled)
+ goto ignore;
+
+ /* Truncated Option header */
+ hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff);
+ if (hdr->opt_len < 2)
+ goto drop;
+
+ switch (hdr->type) {
+ case IOAM6_TYPE_PREALLOC:
+ /* Truncated Pre-allocated Trace header */
+ if (hdr->opt_len < 2 + sizeof(*trace))
+ goto drop;
+
+ /* Malformed Pre-allocated Trace header */
+ trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr));
+ if (hdr->opt_len < 2 + sizeof(*trace) + trace->remlen * 4)
+ goto drop;
+
+ /* Ignore if the IOAM namespace is unknown */
+ ns = ioam6_namespace(ipv6_skb_net(skb), trace->namespace_id);
+ if (!ns)
+ goto ignore;
+
+ if (!skb_valid_dst(skb))
+ ip6_route_input(skb);
+
+ ioam6_fill_trace_data(skb, ns, trace);
+ break;
+ default:
+ break;
+ }
+
+ignore:
+ return true;
+
+drop:
+ kfree_skb(skb);
+ return false;
+}
+
/* Jumbo payload */
static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
@@ -994,22 +1059,6 @@ drop:
return false;
}
-static const struct tlvtype_proc tlvprochopopt_lst[] = {
- {
- .type = IPV6_TLV_ROUTERALERT,
- .func = ipv6_hop_ra,
- },
- {
- .type = IPV6_TLV_JUMBO,
- .func = ipv6_hop_jumbo,
- },
- {
- .type = IPV6_TLV_CALIPSO,
- .func = ipv6_hop_calipso,
- },
- { -1, }
-};
-
int ipv6_parse_hopopts(struct sk_buff *skb)
{
struct inet6_skb_parm *opt = IP6CB(skb);
@@ -1035,8 +1084,7 @@ fail_and_free:
goto fail_and_free;
opt->flags |= IP6SKB_HOPBYHOP;
- if (ip6_parse_tlv(tlvprochopopt_lst, skb,
- net->ipv6.sysctl.max_hbh_opts_cnt)) {
+ if (ip6_parse_tlv(true, skb, net->ipv6.sysctl.max_hbh_opts_cnt)) {
skb->transport_header += extlen;
opt = IP6CB(skb);
opt->nhoff = sizeof(struct ipv6hdr);
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
new file mode 100644
index 000000000000..5e8961004832
--- /dev/null
+++ b/net/ipv6/ioam6.c
@@ -0,0 +1,910 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IPv6 IOAM implementation
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/net.h>
+#include <linux/ioam6.h>
+#include <linux/ioam6_genl.h>
+#include <linux/rhashtable.h>
+
+#include <net/addrconf.h>
+#include <net/genetlink.h>
+#include <net/ioam6.h>
+
+static void ioam6_ns_release(struct ioam6_namespace *ns)
+{
+ kfree_rcu(ns, rcu);
+}
+
+static void ioam6_sc_release(struct ioam6_schema *sc)
+{
+ kfree_rcu(sc, rcu);
+}
+
+static void ioam6_free_ns(void *ptr, void *arg)
+{
+ struct ioam6_namespace *ns = (struct ioam6_namespace *)ptr;
+
+ if (ns)
+ ioam6_ns_release(ns);
+}
+
+static void ioam6_free_sc(void *ptr, void *arg)
+{
+ struct ioam6_schema *sc = (struct ioam6_schema *)ptr;
+
+ if (sc)
+ ioam6_sc_release(sc);
+}
+
+static int ioam6_ns_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ const struct ioam6_namespace *ns = obj;
+
+ return (ns->id != *(__be16 *)arg->key);
+}
+
+static int ioam6_sc_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ const struct ioam6_schema *sc = obj;
+
+ return (sc->id != *(u32 *)arg->key);
+}
+
+static const struct rhashtable_params rht_ns_params = {
+ .key_len = sizeof(__be16),
+ .key_offset = offsetof(struct ioam6_namespace, id),
+ .head_offset = offsetof(struct ioam6_namespace, head),
+ .automatic_shrinking = true,
+ .obj_cmpfn = ioam6_ns_cmpfn,
+};
+
+static const struct rhashtable_params rht_sc_params = {
+ .key_len = sizeof(u32),
+ .key_offset = offsetof(struct ioam6_schema, id),
+ .head_offset = offsetof(struct ioam6_schema, head),
+ .automatic_shrinking = true,
+ .obj_cmpfn = ioam6_sc_cmpfn,
+};
+
+static struct genl_family ioam6_genl_family;
+
+static const struct nla_policy ioam6_genl_policy_addns[] = {
+ [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 },
+ [IOAM6_ATTR_NS_DATA] = { .type = NLA_U32 },
+ [IOAM6_ATTR_NS_DATA_WIDE] = { .type = NLA_U64 },
+};
+
+static const struct nla_policy ioam6_genl_policy_delns[] = {
+ [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 },
+};
+
+static const struct nla_policy ioam6_genl_policy_addsc[] = {
+ [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 },
+ [IOAM6_ATTR_SC_DATA] = { .type = NLA_BINARY,
+ .len = IOAM6_MAX_SCHEMA_DATA_LEN },
+};
+
+static const struct nla_policy ioam6_genl_policy_delsc[] = {
+ [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy ioam6_genl_policy_ns_sc[] = {
+ [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 },
+ [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 },
+ [IOAM6_ATTR_SC_NONE] = { .type = NLA_FLAG },
+};
+
+static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ struct ioam6_namespace *ns;
+ u64 data64;
+ u32 data32;
+ __be16 id;
+ int err;
+
+ if (!info->attrs[IOAM6_ATTR_NS_ID])
+ return -EINVAL;
+
+ id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+ if (ns) {
+ err = -EEXIST;
+ goto out_unlock;
+ }
+
+ ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+ if (!ns) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ ns->id = id;
+
+ if (!info->attrs[IOAM6_ATTR_NS_DATA])
+ data32 = IOAM6_U32_UNAVAILABLE;
+ else
+ data32 = nla_get_u32(info->attrs[IOAM6_ATTR_NS_DATA]);
+
+ if (!info->attrs[IOAM6_ATTR_NS_DATA_WIDE])
+ data64 = IOAM6_U64_UNAVAILABLE;
+ else
+ data64 = nla_get_u64(info->attrs[IOAM6_ATTR_NS_DATA_WIDE]);
+
+ ns->data = cpu_to_be32(data32);
+ ns->data_wide = cpu_to_be64(data64);
+
+ err = rhashtable_lookup_insert_fast(&nsdata->namespaces, &ns->head,
+ rht_ns_params);
+ if (err)
+ kfree(ns);
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static int ioam6_genl_delns(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ struct ioam6_namespace *ns;
+ struct ioam6_schema *sc;
+ __be16 id;
+ int err;
+
+ if (!info->attrs[IOAM6_ATTR_NS_ID])
+ return -EINVAL;
+
+ id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+ if (!ns) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ sc = rcu_dereference_protected(ns->schema,
+ lockdep_is_held(&nsdata->lock));
+
+ err = rhashtable_remove_fast(&nsdata->namespaces, &ns->head,
+ rht_ns_params);
+ if (err)
+ goto out_unlock;
+
+ if (sc)
+ rcu_assign_pointer(sc->ns, NULL);
+
+ ioam6_ns_release(ns);
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static int __ioam6_genl_dumpns_element(struct ioam6_namespace *ns,
+ u32 portid,
+ u32 seq,
+ u32 flags,
+ struct sk_buff *skb,
+ u8 cmd)
+{
+ struct ioam6_schema *sc;
+ u64 data64;
+ u32 data32;
+ void *hdr;
+
+ hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd);
+ if (!hdr)
+ return -ENOMEM;
+
+ data32 = be32_to_cpu(ns->data);
+ data64 = be64_to_cpu(ns->data_wide);
+
+ if (nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id)) ||
+ (data32 != IOAM6_U32_UNAVAILABLE &&
+ nla_put_u32(skb, IOAM6_ATTR_NS_DATA, data32)) ||
+ (data64 != IOAM6_U64_UNAVAILABLE &&
+ nla_put_u64_64bit(skb, IOAM6_ATTR_NS_DATA_WIDE,
+ data64, IOAM6_ATTR_PAD)))
+ goto nla_put_failure;
+
+ rcu_read_lock();
+
+ sc = rcu_dereference(ns->schema);
+ if (sc && nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id)) {
+ rcu_read_unlock();
+ goto nla_put_failure;
+ }
+
+ rcu_read_unlock();
+
+ genlmsg_end(skb, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ioam6_genl_dumpns_start(struct netlink_callback *cb)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk));
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ if (!iter) {
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ cb->args[0] = (long)iter;
+ }
+
+ rhashtable_walk_enter(&nsdata->namespaces, iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpns_done(struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ rhashtable_walk_exit(iter);
+ kfree(iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpns(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter;
+ struct ioam6_namespace *ns;
+ int err;
+
+ iter = (struct rhashtable_iter *)cb->args[0];
+ rhashtable_walk_start(iter);
+
+ for (;;) {
+ ns = rhashtable_walk_next(iter);
+
+ if (IS_ERR(ns)) {
+ if (PTR_ERR(ns) == -EAGAIN)
+ continue;
+ err = PTR_ERR(ns);
+ goto done;
+ } else if (!ns) {
+ break;
+ }
+
+ err = __ioam6_genl_dumpns_element(ns,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ skb,
+ IOAM6_CMD_DUMP_NAMESPACES);
+ if (err)
+ goto done;
+ }
+
+ err = skb->len;
+
+done:
+ rhashtable_walk_stop(iter);
+ return err;
+}
+
+static int ioam6_genl_addsc(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ int len, len_aligned, err;
+ struct ioam6_schema *sc;
+ u32 id;
+
+ if (!info->attrs[IOAM6_ATTR_SC_ID] || !info->attrs[IOAM6_ATTR_SC_DATA])
+ return -EINVAL;
+
+ id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params);
+ if (sc) {
+ err = -EEXIST;
+ goto out_unlock;
+ }
+
+ len = nla_len(info->attrs[IOAM6_ATTR_SC_DATA]);
+ len_aligned = ALIGN(len, 4);
+
+ sc = kzalloc(sizeof(*sc) + len_aligned, GFP_KERNEL);
+ if (!sc) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ sc->id = id;
+ sc->len = len_aligned;
+ sc->hdr = cpu_to_be32(sc->id | ((u8)(sc->len / 4) << 24));
+ nla_memcpy(sc->data, info->attrs[IOAM6_ATTR_SC_DATA], len);
+
+ err = rhashtable_lookup_insert_fast(&nsdata->schemas, &sc->head,
+ rht_sc_params);
+ if (err)
+ goto free_sc;
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+free_sc:
+ kfree(sc);
+ goto out_unlock;
+}
+
+static int ioam6_genl_delsc(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ struct ioam6_namespace *ns;
+ struct ioam6_schema *sc;
+ int err;
+ u32 id;
+
+ if (!info->attrs[IOAM6_ATTR_SC_ID])
+ return -EINVAL;
+
+ id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params);
+ if (!sc) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ ns = rcu_dereference_protected(sc->ns, lockdep_is_held(&nsdata->lock));
+
+ err = rhashtable_remove_fast(&nsdata->schemas, &sc->head,
+ rht_sc_params);
+ if (err)
+ goto out_unlock;
+
+ if (ns)
+ rcu_assign_pointer(ns->schema, NULL);
+
+ ioam6_sc_release(sc);
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static int __ioam6_genl_dumpsc_element(struct ioam6_schema *sc,
+ u32 portid, u32 seq, u32 flags,
+ struct sk_buff *skb, u8 cmd)
+{
+ struct ioam6_namespace *ns;
+ void *hdr;
+
+ hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd);
+ if (!hdr)
+ return -ENOMEM;
+
+ if (nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id) ||
+ nla_put(skb, IOAM6_ATTR_SC_DATA, sc->len, sc->data))
+ goto nla_put_failure;
+
+ rcu_read_lock();
+
+ ns = rcu_dereference(sc->ns);
+ if (ns && nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id))) {
+ rcu_read_unlock();
+ goto nla_put_failure;
+ }
+
+ rcu_read_unlock();
+
+ genlmsg_end(skb, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ioam6_genl_dumpsc_start(struct netlink_callback *cb)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk));
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ if (!iter) {
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ cb->args[0] = (long)iter;
+ }
+
+ rhashtable_walk_enter(&nsdata->schemas, iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpsc_done(struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ rhashtable_walk_exit(iter);
+ kfree(iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpsc(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter;
+ struct ioam6_schema *sc;
+ int err;
+
+ iter = (struct rhashtable_iter *)cb->args[0];
+ rhashtable_walk_start(iter);
+
+ for (;;) {
+ sc = rhashtable_walk_next(iter);
+
+ if (IS_ERR(sc)) {
+ if (PTR_ERR(sc) == -EAGAIN)
+ continue;
+ err = PTR_ERR(sc);
+ goto done;
+ } else if (!sc) {
+ break;
+ }
+
+ err = __ioam6_genl_dumpsc_element(sc,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ skb,
+ IOAM6_CMD_DUMP_SCHEMAS);
+ if (err)
+ goto done;
+ }
+
+ err = skb->len;
+
+done:
+ rhashtable_walk_stop(iter);
+ return err;
+}
+
+static int ioam6_genl_ns_set_schema(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_namespace *ns, *ns_ref;
+ struct ioam6_schema *sc, *sc_ref;
+ struct ioam6_pernet_data *nsdata;
+ __be16 ns_id;
+ u32 sc_id;
+ int err;
+
+ if (!info->attrs[IOAM6_ATTR_NS_ID] ||
+ (!info->attrs[IOAM6_ATTR_SC_ID] &&
+ !info->attrs[IOAM6_ATTR_SC_NONE]))
+ return -EINVAL;
+
+ ns_id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ ns = rhashtable_lookup_fast(&nsdata->namespaces, &ns_id, rht_ns_params);
+ if (!ns) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ if (info->attrs[IOAM6_ATTR_SC_NONE]) {
+ sc = NULL;
+ } else {
+ sc_id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+ sc = rhashtable_lookup_fast(&nsdata->schemas, &sc_id,
+ rht_sc_params);
+ if (!sc) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+ }
+
+ sc_ref = rcu_dereference_protected(ns->schema,
+ lockdep_is_held(&nsdata->lock));
+ if (sc_ref)
+ rcu_assign_pointer(sc_ref->ns, NULL);
+ rcu_assign_pointer(ns->schema, sc);
+
+ if (sc) {
+ ns_ref = rcu_dereference_protected(sc->ns,
+ lockdep_is_held(&nsdata->lock));
+ if (ns_ref)
+ rcu_assign_pointer(ns_ref->schema, NULL);
+ rcu_assign_pointer(sc->ns, ns);
+ }
+
+ err = 0;
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static const struct genl_ops ioam6_genl_ops[] = {
+ {
+ .cmd = IOAM6_CMD_ADD_NAMESPACE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_addns,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_addns,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_addns) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DEL_NAMESPACE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_delns,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_delns,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_delns) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DUMP_NAMESPACES,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .start = ioam6_genl_dumpns_start,
+ .dumpit = ioam6_genl_dumpns,
+ .done = ioam6_genl_dumpns_done,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = IOAM6_CMD_ADD_SCHEMA,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_addsc,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_addsc,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_addsc) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DEL_SCHEMA,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_delsc,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_delsc,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_delsc) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DUMP_SCHEMAS,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .start = ioam6_genl_dumpsc_start,
+ .dumpit = ioam6_genl_dumpsc,
+ .done = ioam6_genl_dumpsc_done,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = IOAM6_CMD_NS_SET_SCHEMA,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_ns_set_schema,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_ns_sc,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_ns_sc) - 1,
+ },
+};
+
+static struct genl_family ioam6_genl_family __ro_after_init = {
+ .name = IOAM6_GENL_NAME,
+ .version = IOAM6_GENL_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .ops = ioam6_genl_ops,
+ .n_ops = ARRAY_SIZE(ioam6_genl_ops),
+ .module = THIS_MODULE,
+};
+
+struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(net);
+
+ return rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+}
+
+static void __ioam6_fill_trace_data(struct sk_buff *skb,
+ struct ioam6_namespace *ns,
+ struct ioam6_trace_hdr *trace,
+ struct ioam6_schema *sc,
+ u8 sclen)
+{
+ struct __kernel_sock_timeval ts;
+ u64 raw64;
+ u32 raw32;
+ u16 raw16;
+ u8 *data;
+ u8 byte;
+
+ data = trace->data + trace->remlen * 4 - trace->nodelen * 4 - sclen * 4;
+
+ /* hop_lim and node_id */
+ if (trace->type.bit0) {
+ byte = ipv6_hdr(skb)->hop_limit;
+ if (skb->dev)
+ byte--;
+
+ raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id;
+
+ *(__be32 *)data = cpu_to_be32((byte << 24) | raw32);
+ data += sizeof(__be32);
+ }
+
+ /* ingress_if_id and egress_if_id */
+ if (trace->type.bit1) {
+ if (!skb->dev)
+ raw16 = IOAM6_U16_UNAVAILABLE;
+ else
+ raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id;
+
+ *(__be16 *)data = cpu_to_be16(raw16);
+ data += sizeof(__be16);
+
+ if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
+ raw16 = IOAM6_U16_UNAVAILABLE;
+ else
+ raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id;
+
+ *(__be16 *)data = cpu_to_be16(raw16);
+ data += sizeof(__be16);
+ }
+
+ /* timestamp seconds */
+ if (trace->type.bit2) {
+ if (!skb->dev) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ } else {
+ if (!skb->tstamp)
+ __net_timestamp(skb);
+
+ skb_get_new_timestamp(skb, &ts);
+ *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec);
+ }
+ data += sizeof(__be32);
+ }
+
+ /* timestamp subseconds */
+ if (trace->type.bit3) {
+ if (!skb->dev) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ } else {
+ if (!skb->tstamp)
+ __net_timestamp(skb);
+
+ if (!trace->type.bit2)
+ skb_get_new_timestamp(skb, &ts);
+
+ *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec);
+ }
+ data += sizeof(__be32);
+ }
+
+ /* transit delay */
+ if (trace->type.bit4) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* namespace data */
+ if (trace->type.bit5) {
+ *(__be32 *)data = ns->data;
+ data += sizeof(__be32);
+ }
+
+ /* queue depth */
+ if (trace->type.bit6) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* checksum complement */
+ if (trace->type.bit7) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* hop_lim and node_id (wide) */
+ if (trace->type.bit8) {
+ byte = ipv6_hdr(skb)->hop_limit;
+ if (skb->dev)
+ byte--;
+
+ raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide;
+
+ *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64);
+ data += sizeof(__be64);
+ }
+
+ /* ingress_if_id and egress_if_id (wide) */
+ if (trace->type.bit9) {
+ if (!skb->dev)
+ raw32 = IOAM6_U32_UNAVAILABLE;
+ else
+ raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide;
+
+ *(__be32 *)data = cpu_to_be32(raw32);
+ data += sizeof(__be32);
+
+ if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
+ raw32 = IOAM6_U32_UNAVAILABLE;
+ else
+ raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide;
+
+ *(__be32 *)data = cpu_to_be32(raw32);
+ data += sizeof(__be32);
+ }
+
+ /* namespace data (wide) */
+ if (trace->type.bit10) {
+ *(__be64 *)data = ns->data_wide;
+ data += sizeof(__be64);
+ }
+
+ /* buffer occupancy */
+ if (trace->type.bit11) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* opaque state snapshot */
+ if (trace->type.bit22) {
+ if (!sc) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE >> 8);
+ } else {
+ *(__be32 *)data = sc->hdr;
+ data += sizeof(__be32);
+
+ memcpy(data, sc->data, sc->len);
+ }
+ }
+}
+
+/* called with rcu_read_lock() */
+void ioam6_fill_trace_data(struct sk_buff *skb,
+ struct ioam6_namespace *ns,
+ struct ioam6_trace_hdr *trace)
+{
+ struct ioam6_schema *sc;
+ u8 sclen = 0;
+
+ /* Skip if Overflow flag is set OR
+ * if an unknown type (bit 12-21) is set
+ */
+ if (trace->overflow ||
+ trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
+ trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
+ trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
+ trace->type.bit21) {
+ return;
+ }
+
+ /* NodeLen does not include Opaque State Snapshot length. We need to
+ * take it into account if the corresponding bit is set (bit 22) and
+ * if the current IOAM namespace has an active schema attached to it
+ */
+ sc = rcu_dereference(ns->schema);
+ if (trace->type.bit22) {
+ sclen = sizeof_field(struct ioam6_schema, hdr) / 4;
+
+ if (sc)
+ sclen += sc->len / 4;
+ }
+
+ /* If there is no space remaining, we set the Overflow flag and we
+ * skip without filling the trace
+ */
+ if (!trace->remlen || trace->remlen < trace->nodelen + sclen) {
+ trace->overflow = 1;
+ return;
+ }
+
+ __ioam6_fill_trace_data(skb, ns, trace, sc, sclen);
+ trace->remlen -= trace->nodelen + sclen;
+}
+
+static int __net_init ioam6_net_init(struct net *net)
+{
+ struct ioam6_pernet_data *nsdata;
+ int err = -ENOMEM;
+
+ nsdata = kzalloc(sizeof(*nsdata), GFP_KERNEL);
+ if (!nsdata)
+ goto out;
+
+ mutex_init(&nsdata->lock);
+ net->ipv6.ioam6_data = nsdata;
+
+ err = rhashtable_init(&nsdata->namespaces, &rht_ns_params);
+ if (err)
+ goto free_nsdata;
+
+ err = rhashtable_init(&nsdata->schemas, &rht_sc_params);
+ if (err)
+ goto free_rht_ns;
+
+out:
+ return err;
+free_rht_ns:
+ rhashtable_destroy(&nsdata->namespaces);
+free_nsdata:
+ kfree(nsdata);
+ net->ipv6.ioam6_data = NULL;
+ goto out;
+}
+
+static void __net_exit ioam6_net_exit(struct net *net)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(net);
+
+ rhashtable_free_and_destroy(&nsdata->namespaces, ioam6_free_ns, NULL);
+ rhashtable_free_and_destroy(&nsdata->schemas, ioam6_free_sc, NULL);
+
+ kfree(nsdata);
+}
+
+static struct pernet_operations ioam6_net_ops = {
+ .init = ioam6_net_init,
+ .exit = ioam6_net_exit,
+};
+
+int __init ioam6_init(void)
+{
+ int err = register_pernet_subsys(&ioam6_net_ops);
+ if (err)
+ goto out;
+
+ err = genl_register_family(&ioam6_genl_family);
+ if (err)
+ goto out_unregister_pernet_subsys;
+
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+ err = ioam6_iptunnel_init();
+ if (err)
+ goto out_unregister_genl;
+#endif
+
+ pr_info("In-situ OAM (IOAM) with IPv6\n");
+
+out:
+ return err;
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+out_unregister_genl:
+ genl_unregister_family(&ioam6_genl_family);
+#endif
+out_unregister_pernet_subsys:
+ unregister_pernet_subsys(&ioam6_net_ops);
+ goto out;
+}
+
+void ioam6_exit(void)
+{
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+ ioam6_iptunnel_exit();
+#endif
+ genl_unregister_family(&ioam6_genl_family);
+ unregister_pernet_subsys(&ioam6_net_ops);
+}
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
new file mode 100644
index 000000000000..f9ee04541c17
--- /dev/null
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IPv6 IOAM Lightweight Tunnel implementation
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <linux/netlink.h>
+#include <linux/in6.h>
+#include <linux/ioam6.h>
+#include <linux/ioam6_iptunnel.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/lwtunnel.h>
+#include <net/ioam6.h>
+
+#define IOAM6_MASK_SHORT_FIELDS 0xff100000
+#define IOAM6_MASK_WIDE_FIELDS 0xe00000
+
+struct ioam6_lwt_encap {
+ struct ipv6_hopopt_hdr eh;
+ u8 pad[2]; /* 2-octet padding for 4n-alignment */
+ struct ioam6_hdr ioamh;
+ struct ioam6_trace_hdr traceh;
+} __packed;
+
+struct ioam6_lwt {
+ struct ioam6_lwt_encap tuninfo;
+};
+
+static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt)
+{
+ return (struct ioam6_lwt *)lwt->data;
+}
+
+static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt)
+{
+ return &ioam6_lwt_state(lwt)->tuninfo;
+}
+
+static struct ioam6_trace_hdr *ioam6_trace(struct lwtunnel_state *lwt)
+{
+ return &(ioam6_lwt_state(lwt)->tuninfo.traceh);
+}
+
+static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
+ [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)),
+};
+
+static int nla_put_ioam6_trace(struct sk_buff *skb, int attrtype,
+ struct ioam6_trace_hdr *trace)
+{
+ struct ioam6_trace_hdr *data;
+ struct nlattr *nla;
+ int len;
+
+ len = sizeof(*trace);
+
+ nla = nla_reserve(skb, attrtype, len);
+ if (!nla)
+ return -EMSGSIZE;
+
+ data = nla_data(nla);
+ memcpy(data, trace, len);
+
+ return 0;
+}
+
+static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
+{
+ u32 fields;
+
+ if (!trace->type_be32 || !trace->remlen ||
+ trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4)
+ return false;
+
+ trace->nodelen = 0;
+ fields = be32_to_cpu(trace->type_be32);
+
+ trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS)
+ * (sizeof(__be32) / 4);
+ trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS)
+ * (sizeof(__be64) / 4);
+
+ return true;
+}
+
+static int ioam6_build_state(struct net *net, struct nlattr *nla,
+ unsigned int family, const void *cfg,
+ struct lwtunnel_state **ts,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1];
+ struct ioam6_lwt_encap *tuninfo;
+ struct ioam6_trace_hdr *trace;
+ struct lwtunnel_state *s;
+ int len_aligned;
+ int len, err;
+
+ if (family != AF_INET6)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla,
+ ioam6_iptunnel_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[IOAM6_IPTUNNEL_TRACE]) {
+ NL_SET_ERR_MSG(extack, "missing trace");
+ return -EINVAL;
+ }
+
+ trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]);
+ if (!ioam6_validate_trace_hdr(trace)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE],
+ "invalid trace validation");
+ return -EINVAL;
+ }
+
+ len = sizeof(*tuninfo) + trace->remlen * 4;
+ len_aligned = ALIGN(len, 8);
+
+ s = lwtunnel_state_alloc(len_aligned);
+ if (!s)
+ return -ENOMEM;
+
+ tuninfo = ioam6_lwt_info(s);
+ tuninfo->eh.hdrlen = (len_aligned >> 3) - 1;
+ tuninfo->pad[0] = IPV6_TLV_PADN;
+ tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC;
+ tuninfo->ioamh.opt_type = IPV6_TLV_IOAM;
+ tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace)
+ + trace->remlen * 4;
+
+ memcpy(&tuninfo->traceh, trace, sizeof(*trace));
+
+ len = len_aligned - len;
+ if (len == 1) {
+ tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PAD1;
+ } else if (len > 0) {
+ tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN;
+ tuninfo->traceh.data[trace->remlen * 4 + 1] = len - 2;
+ }
+
+ s->type = LWTUNNEL_ENCAP_IOAM6;
+ s->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+
+ *ts = s;
+
+ return 0;
+}
+
+static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo)
+{
+ struct ioam6_trace_hdr *trace;
+ struct ipv6hdr *oldhdr, *hdr;
+ struct ioam6_namespace *ns;
+ int hdrlen, err;
+
+ hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
+
+ err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ if (unlikely(err))
+ return err;
+
+ oldhdr = ipv6_hdr(skb);
+ skb_pull(skb, sizeof(*oldhdr));
+ skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr));
+
+ skb_push(skb, sizeof(*oldhdr) + hdrlen);
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+
+ hdr = ipv6_hdr(skb);
+ memmove(hdr, oldhdr, sizeof(*oldhdr));
+ tuninfo->eh.nexthdr = hdr->nexthdr;
+
+ skb_set_transport_header(skb, sizeof(*hdr));
+ skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen);
+
+ memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
+
+ hdr->nexthdr = NEXTHDR_HOP;
+ hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
+
+ trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb)
+ + sizeof(struct ipv6_hopopt_hdr) + 2
+ + sizeof(struct ioam6_hdr));
+
+ ns = ioam6_namespace(dev_net(skb_dst(skb)->dev), trace->namespace_id);
+ if (ns)
+ ioam6_fill_trace_data(skb, ns, trace);
+
+ return 0;
+}
+
+static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct lwtunnel_state *lwt = skb_dst(skb)->lwtstate;
+ int err = -EINVAL;
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ goto drop;
+
+ /* Only for packets we send and
+ * that do not contain a Hop-by-Hop yet
+ */
+ if (skb->dev || ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
+ goto out;
+
+ err = ioam6_do_inline(skb, ioam6_lwt_info(lwt));
+ if (unlikely(err))
+ goto drop;
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(skb_dst(skb)->dev));
+ if (unlikely(err))
+ goto drop;
+
+out:
+ return lwt->orig_output(net, sk, skb);
+
+drop:
+ kfree_skb(skb);
+ return err;
+}
+
+static int ioam6_fill_encap_info(struct sk_buff *skb,
+ struct lwtunnel_state *lwtstate)
+{
+ struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate);
+
+ if (nla_put_ioam6_trace(skb, IOAM6_IPTUNNEL_TRACE, trace))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+ struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate);
+
+ return nla_total_size(sizeof(*trace));
+}
+
+static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+ struct ioam6_trace_hdr *a_hdr = ioam6_trace(a);
+ struct ioam6_trace_hdr *b_hdr = ioam6_trace(b);
+
+ return (a_hdr->namespace_id != b_hdr->namespace_id);
+}
+
+static const struct lwtunnel_encap_ops ioam6_iptun_ops = {
+ .build_state = ioam6_build_state,
+ .output = ioam6_output,
+ .fill_encap = ioam6_fill_encap_info,
+ .get_encap_size = ioam6_encap_nlsize,
+ .cmp_encap = ioam6_encap_cmp,
+ .owner = THIS_MODULE,
+};
+
+int __init ioam6_iptunnel_init(void)
+{
+ return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
+}
+
+void ioam6_iptunnel_exit(void)
+{
+ lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
+}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 2d650dc24349..1bec5b22f80d 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1341,7 +1341,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
lockdep_is_held(&rt->fib6_table->tb6_lock));
- /* paired with smp_rmb() in rt6_get_cookie_safe() */
+ /* paired with smp_rmb() in fib6_get_cookie_safe() */
smp_wmb();
while (fn) {
fn->fn_sernum = sernum;
@@ -2449,8 +2449,8 @@ int __init fib6_init(void)
int ret = -ENOMEM;
fib6_node_kmem = kmem_cache_create("fib6_nodes",
- sizeof(struct fib6_node),
- 0, SLAB_HWCACHE_ALIGN,
+ sizeof(struct fib6_node), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
NULL);
if (!fib6_node_kmem)
goto out;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index bc224f917bbd..7baf41d160f5 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -629,6 +629,8 @@ drop:
static int gre_handle_offloads(struct sk_buff *skb, bool csum)
{
+ if (csum && skb_checksum_start(skb) < skb->data)
+ return -EINVAL;
return iptunnel_handle_offloads(skb,
csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
@@ -1244,8 +1246,9 @@ static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
memcpy(u->name, p->name, sizeof(u->name));
}
-static int ip6gre_tunnel_ioctl(struct net_device *dev,
- struct ifreq *ifr, int cmd)
+static int ip6gre_tunnel_siocdevprivate(struct net_device *dev,
+ struct ifreq *ifr, void __user *data,
+ int cmd)
{
int err = 0;
struct ip6_tnl_parm2 p;
@@ -1259,7 +1262,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ign->fb_tunnel_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ if (copy_from_user(&p, data, sizeof(p))) {
err = -EFAULT;
break;
}
@@ -1270,7 +1273,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
}
memset(&p, 0, sizeof(p));
ip6gre_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
break;
@@ -1281,7 +1284,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
goto done;
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
goto done;
err = -EINVAL;
@@ -1318,7 +1321,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
memset(&p, 0, sizeof(p));
ip6gre_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
} else
err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
@@ -1331,7 +1334,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
if (dev == ign->fb_tunnel_dev) {
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
goto done;
err = -ENOENT;
ip6gre_tnl_parm_from_user(&p1, &p);
@@ -1398,7 +1401,7 @@ static const struct net_device_ops ip6gre_netdev_ops = {
.ndo_init = ip6gre_tunnel_init,
.ndo_uninit = ip6gre_tunnel_uninit,
.ndo_start_xmit = ip6gre_tunnel_xmit,
- .ndo_do_ioctl = ip6gre_tunnel_ioctl,
+ .ndo_siocdevprivate = ip6gre_tunnel_siocdevprivate,
.ndo_change_mtu = ip6_tnl_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8e6ca9ad6812..12f985f43bcc 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -60,46 +60,29 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
+ struct inet6_dev *idev = ip6_dst_idev(dst);
unsigned int hh_len = LL_RESERVED_SPACE(dev);
- int delta = hh_len - skb_headroom(skb);
- const struct in6_addr *nexthop;
+ const struct in6_addr *daddr, *nexthop;
+ struct ipv6hdr *hdr;
struct neighbour *neigh;
int ret;
/* Be paranoid, rather than too clever. */
- if (unlikely(delta > 0) && dev->header_ops) {
- /* pskb_expand_head() might crash, if skb is shared */
- if (skb_shared(skb)) {
- struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
-
- if (likely(nskb)) {
- if (skb->sk)
- skb_set_owner_w(nskb, skb->sk);
- consume_skb(skb);
- } else {
- kfree_skb(skb);
- }
- skb = nskb;
- }
- if (skb &&
- pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
- kfree_skb(skb);
- skb = NULL;
- }
+ if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
+ skb = skb_expand_head(skb, hh_len);
if (!skb) {
- IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
return -ENOMEM;
}
}
- if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
- struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
-
+ hdr = ipv6_hdr(skb);
+ daddr = &hdr->daddr;
+ if (ipv6_addr_is_multicast(daddr)) {
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
((mroute6_is_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
- ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
- &ipv6_hdr(skb)->saddr))) {
+ ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
/* Do not check for IFF_ALLMULTI; multicast routing
@@ -110,7 +93,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
net, sk, newskb, NULL, newskb->dev,
dev_loopback_xmit);
- if (ipv6_hdr(skb)->hop_limit == 0) {
+ if (hdr->hop_limit == 0) {
IP6_INC_STATS(net, idev,
IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
@@ -119,9 +102,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
-
- if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
- IPV6_ADDR_SCOPE_NODELOCAL &&
+ if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
!(dev->flags & IFF_LOOPBACK)) {
kfree_skb(skb);
return 0;
@@ -136,10 +117,10 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
rcu_read_lock_bh();
- nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
- neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
+ nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
+ neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
if (unlikely(!neigh))
- neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
+ neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
ret = neigh_output(neigh, skb, false);
@@ -148,7 +129,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
rcu_read_unlock_bh();
- IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EINVAL;
}
@@ -268,6 +249,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *first_hop = &fl6->daddr;
struct dst_entry *dst = skb_dst(skb);
+ struct net_device *dev = dst->dev;
+ struct inet6_dev *idev = ip6_dst_idev(dst);
unsigned int head_room;
struct ipv6hdr *hdr;
u8 proto = fl6->flowi6_proto;
@@ -275,22 +258,16 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
int hlimit = -1;
u32 mtu;
- head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
+ head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
if (opt)
head_room += opt->opt_nflen + opt->opt_flen;
- if (unlikely(skb_headroom(skb) < head_room)) {
- struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
- if (!skb2) {
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTDISCARDS);
- kfree_skb(skb);
+ if (unlikely(head_room > skb_headroom(skb))) {
+ skb = skb_expand_head(skb, head_room);
+ if (!skb) {
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
return -ENOBUFS;
}
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
- consume_skb(skb);
- skb = skb2;
}
if (opt) {
@@ -332,8 +309,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
mtu = dst_mtu(dst);
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
- IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUT, skb->len);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
/* if egress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -346,17 +322,17 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
* we promote our socket to non const
*/
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
- net, (struct sock *)sk, skb, NULL, dst->dev,
+ net, (struct sock *)sk, skb, NULL, dev,
dst_output);
}
- skb->dev = dst->dev;
+ skb->dev = dev;
/* ipv6_local_error() does not require socket lock,
* we promote our socket to non const
*/
ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
@@ -608,7 +584,7 @@ int ip6_forward(struct sk_buff *skb)
}
}
- mtu = ip6_dst_mtu_forward(dst);
+ mtu = ip6_dst_mtu_maybe_forward(dst, true);
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 322698d9fcf4..20a67efda47f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1581,9 +1581,10 @@ ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
}
/**
- * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
+ * ip6_tnl_siocdevprivate - configure ipv6 tunnels from userspace
* @dev: virtual device associated with tunnel
- * @ifr: parameters passed from userspace
+ * @ifr: unused
+ * @data: parameters passed from userspace
* @cmd: command to be performed
*
* Description:
@@ -1609,7 +1610,8 @@ ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
**/
static int
-ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
int err = 0;
struct ip6_tnl_parm p;
@@ -1623,7 +1625,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ip6n->fb_tnl_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ if (copy_from_user(&p, data, sizeof(p))) {
err = -EFAULT;
break;
}
@@ -1635,9 +1637,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
memset(&p, 0, sizeof(p));
}
ip6_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) {
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
- }
break;
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
@@ -1645,7 +1646,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
break;
err = -EINVAL;
if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
@@ -1669,7 +1670,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!IS_ERR(t)) {
err = 0;
ip6_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
} else {
@@ -1683,7 +1684,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (dev == ip6n->fb_tnl_dev) {
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
break;
err = -ENOENT;
ip6_tnl_parm_from_user(&p1, &p);
@@ -1802,7 +1803,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
.ndo_init = ip6_tnl_dev_init,
.ndo_uninit = ip6_tnl_dev_uninit,
.ndo_start_xmit = ip6_tnl_start_xmit,
- .ndo_do_ioctl = ip6_tnl_ioctl,
+ .ndo_siocdevprivate = ip6_tnl_siocdevprivate,
.ndo_change_mtu = ip6_tnl_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 2d048e21abbb..1d8e3ffa225d 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -771,13 +771,14 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
}
/**
- * vti6_ioctl - configure vti6 tunnels from userspace
+ * vti6_siocdevprivate - configure vti6 tunnels from userspace
* @dev: virtual device associated with tunnel
- * @ifr: parameters passed from userspace
+ * @ifr: unused
+ * @data: parameters passed from userspace
* @cmd: command to be performed
*
* Description:
- * vti6_ioctl() is used for managing vti6 tunnels
+ * vti6_siocdevprivate() is used for managing vti6 tunnels
* from userspace.
*
* The possible commands are the following:
@@ -798,7 +799,7 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
* %-ENODEV if attempting to change or delete a nonexisting device
**/
static int
-vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+vti6_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd)
{
int err = 0;
struct ip6_tnl_parm2 p;
@@ -810,7 +811,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ip6n->fb_tnl_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ if (copy_from_user(&p, data, sizeof(p))) {
err = -EFAULT;
break;
}
@@ -822,7 +823,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!t)
t = netdev_priv(dev);
vti6_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
break;
case SIOCADDTUNNEL:
@@ -831,7 +832,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
break;
err = -EINVAL;
if (p.proto != IPPROTO_IPV6 && p.proto != 0)
@@ -852,7 +853,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (t) {
err = 0;
vti6_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
} else
@@ -865,7 +866,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (dev == ip6n->fb_tnl_dev) {
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
break;
err = -ENOENT;
vti6_parm_from_user(&p1, &p);
@@ -890,7 +891,7 @@ static const struct net_device_ops vti6_netdev_ops = {
.ndo_init = vti6_dev_init,
.ndo_uninit = vti6_dev_uninit,
.ndo_start_xmit = vti6_tnl_xmit,
- .ndo_do_ioctl = vti6_ioctl,
+ .ndo_siocdevprivate = vti6_siocdevprivate,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 06b0d2c329b9..36ed9efb8825 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -559,8 +559,7 @@ static int pim6_rcv(struct sk_buff *skb)
read_lock(&mrt_lock);
if (reg_vif_num >= 0)
reg_dev = mrt->vif_table[reg_vif_num].dev;
- if (reg_dev)
- dev_hold(reg_dev);
+ dev_hold(reg_dev);
read_unlock(&mrt_lock);
if (!reg_dev)
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a6804a7e34c1..e4bdb09c5586 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -225,7 +225,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen)
goto out_free_gsf;
- ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist);
+ ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist_flex);
out_free_gsf:
kfree(gsf);
return ret;
@@ -234,7 +234,7 @@ out_free_gsf:
static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
int optlen)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter *gf32;
void *p;
int ret;
@@ -249,7 +249,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (!p)
return -ENOMEM;
- gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+ gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
ret = -EFAULT;
if (copy_from_sockptr(gf32, optval, optlen))
goto out_free_p;
@@ -261,14 +261,14 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
goto out_free_p;
ret = -EINVAL;
- if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+ if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
goto out_free_p;
ret = ip6_mc_msfilter(sk, &(struct group_filter){
.gf_interface = gf32->gf_interface,
.gf_group = gf32->gf_group,
.gf_fmode = gf32->gf_fmode,
- .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist);
+ .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist_flex);
out_free_p:
kfree(p);
@@ -1048,7 +1048,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
int __user *optlen, int len)
{
- const int size0 = offsetof(struct group_filter, gf_slist);
+ const int size0 = offsetof(struct group_filter, gf_slist_flex);
struct group_filter __user *p = optval;
struct group_filter gsf;
int num;
@@ -1062,7 +1062,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
return -EADDRNOTAVAIL;
num = gsf.gf_numsrc;
lock_sock(sk);
- err = ip6_mc_msfget(sk, &gsf, p->gf_slist);
+ err = ip6_mc_msfget(sk, &gsf, p->gf_slist_flex);
if (!err) {
if (num > gsf.gf_numsrc)
num = gsf.gf_numsrc;
@@ -1077,7 +1077,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
int __user *optlen)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter __user *p = optval;
struct compat_group_filter gf32;
struct group_filter gf;
@@ -1100,7 +1100,7 @@ static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
return -EADDRNOTAVAIL;
lock_sock(sk);
- err = ip6_mc_msfget(sk, &gf, p->gf_slist);
+ err = ip6_mc_msfget(sk, &gf, p->gf_slist_flex);
release_sock(sk);
if (err)
return err;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 54ec163fbafa..cd951faa2fac 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -447,7 +447,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
if (psl)
count += psl->sl_max;
- newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_KERNEL);
+ newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count),
+ GFP_KERNEL);
if (!newpsl) {
err = -ENOBUFS;
goto done;
@@ -457,7 +458,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
if (psl) {
for (i = 0; i < psl->sl_count; i++)
newpsl->sl_addr[i] = psl->sl_addr[i];
- atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
}
psl = newpsl;
@@ -525,8 +527,9 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
goto done;
}
if (gsf->gf_numsrc) {
- newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc),
- GFP_KERNEL);
+ newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr,
+ gsf->gf_numsrc),
+ GFP_KERNEL);
if (!newpsl) {
err = -ENOBUFS;
goto done;
@@ -543,7 +546,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
newpsl->sl_count, newpsl->sl_addr, 0);
if (err) {
mutex_unlock(&idev->mc_lock);
- sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max));
+ sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr,
+ newpsl->sl_max));
goto done;
}
mutex_unlock(&idev->mc_lock);
@@ -559,7 +563,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
if (psl) {
ip6_mc_del_src(idev, group, pmc->sfmode,
psl->sl_count, psl->sl_addr, 0);
- atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
} else {
ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
@@ -2607,7 +2612,8 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
psl->sl_count, psl->sl_addr, 0);
RCU_INIT_POINTER(iml->sflist, NULL);
- atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index c467c6419893..4b098521a44c 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1391,12 +1391,6 @@ skip_defrtr:
}
}
- /*
- * Send a notify if RA changed managed/otherconf flags or timer settings
- */
- if (send_ifinfo_notify)
- inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
-
skip_linkparms:
/*
@@ -1496,6 +1490,11 @@ skip_routeinfo:
memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
mtu = ntohl(n);
+ if (in6_dev->ra_mtu != mtu) {
+ in6_dev->ra_mtu = mtu;
+ send_ifinfo_notify = true;
+ }
+
if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
} else if (in6_dev->cnf.mtu6 != mtu) {
@@ -1519,6 +1518,12 @@ skip_routeinfo:
ND_PRINTK(2, warn, "RA: invalid RA options\n");
}
out:
+ /* Send a notify if RA changed managed/otherconf flags or
+ * timer settings or ra_mtu value
+ */
+ if (send_ifinfo_notify)
+ inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
+
fib6_info_release(rt);
if (neigh)
neigh_release(neigh);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index bb784ea7bbd3..727ee8097012 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -19,15 +19,12 @@ MODULE_DESCRIPTION("ip6tables filter table");
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT))
-static int __net_init ip6table_filter_table_init(struct net *net);
-
static const struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_FILTER,
- .table_init = ip6table_filter_table_init,
};
/* The work comes in here from netfilter.c. */
@@ -44,7 +41,7 @@ static struct nf_hook_ops *filter_ops __read_mostly;
static bool forward = true;
module_param(forward, bool, 0000);
-static int __net_init ip6table_filter_table_init(struct net *net)
+static int ip6table_filter_table_init(struct net *net)
{
struct ip6t_replace *repl;
int err;
@@ -63,7 +60,7 @@ static int __net_init ip6table_filter_table_init(struct net *net)
static int __net_init ip6table_filter_net_init(struct net *net)
{
- if (net == &init_net || !forward)
+ if (!forward)
return ip6table_filter_table_init(net);
return 0;
@@ -87,15 +84,24 @@ static struct pernet_operations ip6table_filter_net_ops = {
static int __init ip6table_filter_init(void)
{
- int ret;
+ int ret = xt_register_template(&packet_filter,
+ ip6table_filter_table_init);
+
+ if (ret < 0)
+ return ret;
filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook);
- if (IS_ERR(filter_ops))
+ if (IS_ERR(filter_ops)) {
+ xt_unregister_template(&packet_filter);
return PTR_ERR(filter_ops);
+ }
ret = register_pernet_subsys(&ip6table_filter_net_ops);
- if (ret < 0)
+ if (ret < 0) {
+ xt_unregister_template(&packet_filter);
kfree(filter_ops);
+ return ret;
+ }
return ret;
}
@@ -103,6 +109,7 @@ static int __init ip6table_filter_init(void)
static void __exit ip6table_filter_fini(void)
{
unregister_pernet_subsys(&ip6table_filter_net_ops);
+ xt_unregister_template(&packet_filter);
kfree(filter_ops);
}
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index c76cffd63041..9b518ce37d6a 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -20,15 +20,12 @@ MODULE_DESCRIPTION("ip6tables mangle table");
(1 << NF_INET_LOCAL_OUT) | \
(1 << NF_INET_POST_ROUTING))
-static int __net_init ip6table_mangle_table_init(struct net *net);
-
static const struct xt_table packet_mangler = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_MANGLE,
- .table_init = ip6table_mangle_table_init,
};
static unsigned int
@@ -76,7 +73,7 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
}
static struct nf_hook_ops *mangle_ops __read_mostly;
-static int __net_init ip6table_mangle_table_init(struct net *net)
+static int ip6table_mangle_table_init(struct net *net)
{
struct ip6t_replace *repl;
int ret;
@@ -106,29 +103,32 @@ static struct pernet_operations ip6table_mangle_net_ops = {
static int __init ip6table_mangle_init(void)
{
- int ret;
+ int ret = xt_register_template(&packet_mangler,
+ ip6table_mangle_table_init);
+
+ if (ret < 0)
+ return ret;
mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook);
- if (IS_ERR(mangle_ops))
+ if (IS_ERR(mangle_ops)) {
+ xt_unregister_template(&packet_mangler);
return PTR_ERR(mangle_ops);
+ }
ret = register_pernet_subsys(&ip6table_mangle_net_ops);
if (ret < 0) {
+ xt_unregister_template(&packet_mangler);
kfree(mangle_ops);
return ret;
}
- ret = ip6table_mangle_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&ip6table_mangle_net_ops);
- kfree(mangle_ops);
- }
return ret;
}
static void __exit ip6table_mangle_fini(void)
{
unregister_pernet_subsys(&ip6table_mangle_net_ops);
+ xt_unregister_template(&packet_mangler);
kfree(mangle_ops);
}
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index b0292251e655..921c1723a01e 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -19,8 +19,6 @@ struct ip6table_nat_pernet {
struct nf_hook_ops *nf_nat_ops;
};
-static int __net_init ip6table_nat_table_init(struct net *net);
-
static unsigned int ip6table_nat_net_id __read_mostly;
static const struct xt_table nf_nat_ipv6_table = {
@@ -31,7 +29,6 @@ static const struct xt_table nf_nat_ipv6_table = {
(1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
- .table_init = ip6table_nat_table_init,
};
static unsigned int ip6table_nat_do_chain(void *priv,
@@ -115,7 +112,7 @@ static void ip6t_nat_unregister_lookups(struct net *net)
kfree(ops);
}
-static int __net_init ip6table_nat_table_init(struct net *net)
+static int ip6table_nat_table_init(struct net *net)
{
struct ip6t_replace *repl;
int ret;
@@ -157,20 +154,23 @@ static struct pernet_operations ip6table_nat_net_ops = {
static int __init ip6table_nat_init(void)
{
- int ret = register_pernet_subsys(&ip6table_nat_net_ops);
+ int ret = xt_register_template(&nf_nat_ipv6_table,
+ ip6table_nat_table_init);
- if (ret)
+ if (ret < 0)
return ret;
- ret = ip6table_nat_table_init(&init_net);
+ ret = register_pernet_subsys(&ip6table_nat_net_ops);
if (ret)
- unregister_pernet_subsys(&ip6table_nat_net_ops);
+ xt_unregister_template(&nf_nat_ipv6_table);
+
return ret;
}
static void __exit ip6table_nat_exit(void)
{
unregister_pernet_subsys(&ip6table_nat_net_ops);
+ xt_unregister_template(&nf_nat_ipv6_table);
}
module_init(ip6table_nat_init);
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index f63c106c521e..4f2a04af71d3 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -11,8 +11,6 @@
#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
-static int __net_init ip6table_raw_table_init(struct net *net);
-
static bool raw_before_defrag __read_mostly;
MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag");
module_param(raw_before_defrag, bool, 0000);
@@ -23,7 +21,6 @@ static const struct xt_table packet_raw = {
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_RAW,
- .table_init = ip6table_raw_table_init,
};
static const struct xt_table packet_raw_before_defrag = {
@@ -32,7 +29,6 @@ static const struct xt_table packet_raw_before_defrag = {
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG,
- .table_init = ip6table_raw_table_init,
};
/* The work comes in here from netfilter.c. */
@@ -45,7 +41,7 @@ ip6table_raw_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *rawtable_ops __read_mostly;
-static int __net_init ip6table_raw_table_init(struct net *net)
+static int ip6table_raw_table_init(struct net *net)
{
struct ip6t_replace *repl;
const struct xt_table *table = &packet_raw;
@@ -79,37 +75,39 @@ static struct pernet_operations ip6table_raw_net_ops = {
static int __init ip6table_raw_init(void)
{
- int ret;
const struct xt_table *table = &packet_raw;
+ int ret;
if (raw_before_defrag) {
table = &packet_raw_before_defrag;
-
pr_info("Enabling raw table before defrag\n");
}
+ ret = xt_register_template(table, ip6table_raw_table_init);
+ if (ret < 0)
+ return ret;
+
/* Register hooks */
rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook);
- if (IS_ERR(rawtable_ops))
+ if (IS_ERR(rawtable_ops)) {
+ xt_unregister_template(table);
return PTR_ERR(rawtable_ops);
+ }
ret = register_pernet_subsys(&ip6table_raw_net_ops);
if (ret < 0) {
kfree(rawtable_ops);
+ xt_unregister_template(table);
return ret;
}
- ret = ip6table_raw_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&ip6table_raw_net_ops);
- kfree(rawtable_ops);
- }
return ret;
}
static void __exit ip6table_raw_fini(void)
{
unregister_pernet_subsys(&ip6table_raw_net_ops);
+ xt_unregister_template(&packet_raw);
kfree(rawtable_ops);
}
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 8dc335cf450b..931674034d8b 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -24,15 +24,12 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules");
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT)
-static int __net_init ip6table_security_table_init(struct net *net);
-
static const struct xt_table security_table = {
.name = "security",
.valid_hooks = SECURITY_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_SECURITY,
- .table_init = ip6table_security_table_init,
};
static unsigned int
@@ -44,7 +41,7 @@ ip6table_security_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *sectbl_ops __read_mostly;
-static int __net_init ip6table_security_table_init(struct net *net)
+static int ip6table_security_table_init(struct net *net)
{
struct ip6t_replace *repl;
int ret;
@@ -74,29 +71,32 @@ static struct pernet_operations ip6table_security_net_ops = {
static int __init ip6table_security_init(void)
{
- int ret;
+ int ret = xt_register_template(&security_table,
+ ip6table_security_table_init);
+
+ if (ret < 0)
+ return ret;
sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook);
- if (IS_ERR(sectbl_ops))
+ if (IS_ERR(sectbl_ops)) {
+ xt_unregister_template(&security_table);
return PTR_ERR(sectbl_ops);
+ }
ret = register_pernet_subsys(&ip6table_security_net_ops);
if (ret < 0) {
kfree(sectbl_ops);
+ xt_unregister_template(&security_table);
return ret;
}
- ret = ip6table_security_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&ip6table_security_net_ops);
- kfree(sectbl_ops);
- }
return ret;
}
static void __exit ip6table_security_fini(void)
{
unregister_pernet_subsys(&ip6table_security_net_ops);
+ xt_unregister_template(&security_table);
kfree(sectbl_ops);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b6ddf23d3833..dbc224023977 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -41,6 +41,7 @@
#include <linux/nsproxy.h>
#include <linux/slab.h>
#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <net/net_namespace.h>
#include <net/snmp.h>
#include <net/ipv6.h>
@@ -1484,17 +1485,24 @@ static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
static u32 rt6_exception_hash(const struct in6_addr *dst,
const struct in6_addr *src)
{
- static u32 seed __read_mostly;
- u32 val;
+ static siphash_key_t rt6_exception_key __read_mostly;
+ struct {
+ struct in6_addr dst;
+ struct in6_addr src;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .dst = *dst,
+ };
+ u64 val;
- net_get_random_once(&seed, sizeof(seed));
- val = jhash2((const u32 *)dst, sizeof(*dst)/sizeof(u32), seed);
+ net_get_random_once(&rt6_exception_key, sizeof(rt6_exception_key));
#ifdef CONFIG_IPV6_SUBTREES
if (src)
- val = jhash2((const u32 *)src, sizeof(*src)/sizeof(u32), val);
+ combined.src = *src;
#endif
- return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
+ val = siphash(&combined, sizeof(combined), &rt6_exception_key);
+
+ return hash_64(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
}
/* Helper function to find the cached rt in the hash table
@@ -1649,6 +1657,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
struct fib6_nh *nh = res->nh;
+ int max_depth;
int err = 0;
spin_lock_bh(&rt6_exception_lock);
@@ -1703,7 +1712,9 @@ static int rt6_insert_exception(struct rt6_info *nrt,
bucket->depth++;
net->ipv6.rt6_stats->fib_rt_cache++;
- if (bucket->depth > FIB6_MAX_DEPTH)
+ /* Randomize max depth to avoid some side channels attacks. */
+ max_depth = FIB6_MAX_DEPTH + prandom_u32_max(FIB6_MAX_DEPTH);
+ while (bucket->depth > max_depth)
rt6_exception_remove_oldest(bucket);
out:
@@ -3201,25 +3212,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst)
{
- struct inet6_dev *idev;
- unsigned int mtu;
-
- mtu = dst_metric_raw(dst, RTAX_MTU);
- if (mtu)
- goto out;
-
- mtu = IPV6_MIN_MTU;
-
- rcu_read_lock();
- idev = __in6_dev_get(dst->dev);
- if (idev)
- mtu = idev->cnf.mtu6;
- rcu_read_unlock();
-
-out:
- mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
-
- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
+ return ip6_dst_mtu_maybe_forward(dst, false);
}
EXPORT_INDIRECT_CALLABLE(ip6_mtu);
@@ -3644,8 +3637,7 @@ out:
if (err) {
lwtstate_put(fib6_nh->fib_nh_lws);
fib6_nh->fib_nh_lws = NULL;
- if (dev)
- dev_put(dev);
+ dev_put(dev);
}
return err;
@@ -6638,7 +6630,7 @@ int __init ip6_route_init(void)
ret = -ENOMEM;
ip6_dst_ops_template.kmem_cachep =
kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!ip6_dst_ops_template.kmem_cachep)
goto out;
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 897fa59c47de..1bf5f5ae75ac 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -26,6 +26,7 @@
#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
#endif
+#include <linux/netfilter.h>
static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
{
@@ -295,11 +296,19 @@ static int seg6_do_srh(struct sk_buff *skb)
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+ nf_reset_ct(skb);
return 0;
}
-static int seg6_input(struct sk_buff *skb)
+static int seg6_input_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ return dst_input(skb);
+}
+
+static int seg6_input_core(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
@@ -337,10 +346,41 @@ static int seg6_input(struct sk_buff *skb)
if (unlikely(err))
return err;
- return dst_input(skb);
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+ dev_net(skb->dev), NULL, skb, NULL,
+ skb_dst(skb)->dev, seg6_input_finish);
+
+ return seg6_input_finish(dev_net(skb->dev), NULL, skb);
}
-static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int seg6_input_nf(struct sk_buff *skb)
+{
+ struct net_device *dev = skb_dst(skb)->dev;
+ struct net *net = dev_net(skb->dev);
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL,
+ skb, NULL, dev, seg6_input_core);
+ case htons(ETH_P_IPV6):
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL,
+ skb, NULL, dev, seg6_input_core);
+ }
+
+ return -EINVAL;
+}
+
+static int seg6_input(struct sk_buff *skb)
+{
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return seg6_input_nf(skb);
+
+ return seg6_input_core(dev_net(skb->dev), NULL, skb);
+}
+
+static int seg6_output_core(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
@@ -387,12 +427,40 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (unlikely(err))
goto drop;
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
+ NULL, skb_dst(skb)->dev, dst_output);
+
return dst_output(net, sk, skb);
drop:
kfree_skb(skb);
return err;
}
+static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct net_device *dev = skb_dst(skb)->dev;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
+ NULL, dev, seg6_output_core);
+ case htons(ETH_P_IPV6):
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
+ NULL, dev, seg6_output_core);
+ }
+
+ return -EINVAL;
+}
+
+static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return seg6_output_nf(net, sk, skb);
+
+ return seg6_output_core(net, sk, skb);
+}
+
static int seg6_build_state(struct net *net, struct nlattr *nla,
unsigned int family, const void *cfg,
struct lwtunnel_state **ts,
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 60bf3b877957..2dc40b3f373e 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -30,6 +30,7 @@
#include <net/seg6_local.h>
#include <linux/etherdevice.h>
#include <linux/bpf.h>
+#include <linux/netfilter.h>
#define SEG6_F_ATTR(i) BIT(i)
@@ -413,12 +414,33 @@ drop:
return -EINVAL;
}
+static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct in6_addr *nhaddr = NULL;
+ struct seg6_local_lwt *slwt;
+
+ slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
+
+ /* The inner packet is not associated to any local interface,
+ * so we do not call netif_rx().
+ *
+ * If slwt->nh6 is set to ::, then lookup the nexthop for the
+ * inner packet's DA. Otherwise, use the specified nexthop.
+ */
+ if (!ipv6_addr_any(&slwt->nh6))
+ nhaddr = &slwt->nh6;
+
+ seg6_lookup_nexthop(skb, nhaddr, 0);
+
+ return dst_input(skb);
+}
+
/* decapsulate and forward to specified nexthop */
static int input_action_end_dx6(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
- struct in6_addr *nhaddr = NULL;
-
/* this function accepts IPv6 encapsulated packets, with either
* an SRH with SL=0, or no SRH.
*/
@@ -429,40 +451,30 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
- /* The inner packet is not associated to any local interface,
- * so we do not call netif_rx().
- *
- * If slwt->nh6 is set to ::, then lookup the nexthop for the
- * inner packet's DA. Otherwise, use the specified nexthop.
- */
-
- if (!ipv6_addr_any(&slwt->nh6))
- nhaddr = &slwt->nh6;
-
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+ nf_reset_ct(skb);
- seg6_lookup_nexthop(skb, nhaddr, 0);
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+ dev_net(skb->dev), NULL, skb, NULL,
+ skb_dst(skb)->dev, input_action_end_dx6_finish);
- return dst_input(skb);
+ return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
drop:
kfree_skb(skb);
return -EINVAL;
}
-static int input_action_end_dx4(struct sk_buff *skb,
- struct seg6_local_lwt *slwt)
+static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct seg6_local_lwt *slwt;
struct iphdr *iph;
__be32 nhaddr;
int err;
- if (!decap_and_validate(skb, IPPROTO_IPIP))
- goto drop;
-
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- goto drop;
-
- skb->protocol = htons(ETH_P_IP);
+ slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
iph = ip_hdr(skb);
@@ -470,14 +482,34 @@ static int input_action_end_dx4(struct sk_buff *skb,
skb_dst_drop(skb);
- skb_set_transport_header(skb, sizeof(struct iphdr));
-
err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
- if (err)
- goto drop;
+ if (err) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
return dst_input(skb);
+}
+
+static int input_action_end_dx4(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ if (!decap_and_validate(skb, IPPROTO_IPIP))
+ goto drop;
+
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ goto drop;
+
+ skb->protocol = htons(ETH_P_IP);
+ skb_set_transport_header(skb, sizeof(struct iphdr));
+ nf_reset_ct(skb);
+
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+ dev_net(skb->dev), NULL, skb, NULL,
+ skb_dst(skb)->dev, input_action_end_dx4_finish);
+ return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
drop:
kfree_skb(skb);
return -EINVAL;
@@ -645,6 +677,7 @@ static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
skb_dst_drop(skb);
skb_set_transport_header(skb, hdrlen);
+ nf_reset_ct(skb);
return end_dt_vrf_rcv(skb, family, vrf);
@@ -1078,7 +1111,8 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
u64_stats_update_end(&pcounters->syncp);
}
-static int seg6_local_input(struct sk_buff *skb)
+static int seg6_local_input_core(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct seg6_action_desc *desc;
@@ -1086,11 +1120,6 @@ static int seg6_local_input(struct sk_buff *skb)
unsigned int len = skb->len;
int rc;
- if (skb->protocol != htons(ETH_P_IPV6)) {
- kfree_skb(skb);
- return -EINVAL;
- }
-
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
desc = slwt->desc;
@@ -1104,6 +1133,21 @@ static int seg6_local_input(struct sk_buff *skb)
return rc;
}
+static int seg6_local_input(struct sk_buff *skb)
+{
+ if (skb->protocol != htons(ETH_P_IPV6)) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
+ dev_net(skb->dev), NULL, skb, skb->dev, NULL,
+ seg6_local_input_core);
+
+ return seg6_local_input_core(dev_net(skb->dev), NULL, skb);
+}
+
static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index df5bea818410..ef0c7a7c18e2 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -299,9 +299,8 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
}
-static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
+static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __user *a)
{
- struct ip_tunnel_prl __user *a = ifr->ifr_ifru.ifru_data;
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_prl kprl, *kp;
struct ip_tunnel_prl_entry *prl;
@@ -321,7 +320,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
* we try harder to allocate.
*/
kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
- kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
+ kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) :
NULL;
rcu_read_lock();
@@ -334,7 +333,8 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
* For root users, retry allocating enough memory for
* the answer.
*/
- kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC);
+ kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC | __GFP_ACCOUNT |
+ __GFP_NOWARN);
if (!kp) {
ret = -ENOMEM;
goto out;
@@ -453,8 +453,8 @@ out:
return err;
}
-static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr,
- int cmd)
+static int ipip6_tunnel_prl_ctl(struct net_device *dev,
+ struct ip_tunnel_prl __user *data, int cmd)
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_prl prl;
@@ -465,7 +465,7 @@ static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr,
if (dev == dev_to_sit_net(dev)->fb_tunnel_dev)
return -EINVAL;
- if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
+ if (copy_from_user(&prl, data, sizeof(prl)))
return -EFAULT;
switch (cmd) {
@@ -1197,14 +1197,14 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
}
static int
-ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr)
+ipip6_tunnel_get6rd(struct net_device *dev, struct ip_tunnel_parm __user *data)
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_6rd ip6rd;
struct ip_tunnel_parm p;
if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
return -EFAULT;
t = ipip6_tunnel_locate(t->net, &p, 0);
}
@@ -1215,13 +1215,14 @@ ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr)
ip6rd.relay_prefix = t->ip6rd.relay_prefix;
ip6rd.prefixlen = t->ip6rd.prefixlen;
ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, sizeof(ip6rd)))
+ if (copy_to_user(data, &ip6rd, sizeof(ip6rd)))
return -EFAULT;
return 0;
}
static int
-ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ipip6_tunnel_6rdctl(struct net_device *dev, struct ip_tunnel_6rd __user *data,
+ int cmd)
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_6rd ip6rd;
@@ -1229,7 +1230,7 @@ ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, sizeof(ip6rd)))
+ if (copy_from_user(&ip6rd, data, sizeof(ip6rd)))
return -EFAULT;
if (cmd != SIOCDEL6RD) {
@@ -1368,27 +1369,28 @@ ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
}
static int
-ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ipip6_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
switch (cmd) {
case SIOCGETTUNNEL:
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
case SIOCDELTUNNEL:
- return ip_tunnel_ioctl(dev, ifr, cmd);
+ return ip_tunnel_siocdevprivate(dev, ifr, data, cmd);
case SIOCGETPRL:
- return ipip6_tunnel_get_prl(dev, ifr);
+ return ipip6_tunnel_get_prl(dev, data);
case SIOCADDPRL:
case SIOCDELPRL:
case SIOCCHGPRL:
- return ipip6_tunnel_prl_ctl(dev, ifr, cmd);
+ return ipip6_tunnel_prl_ctl(dev, data, cmd);
#ifdef CONFIG_IPV6_SIT_6RD
case SIOCGET6RD:
- return ipip6_tunnel_get6rd(dev, ifr);
+ return ipip6_tunnel_get6rd(dev, data);
case SIOCADD6RD:
case SIOCCHG6RD:
case SIOCDEL6RD:
- return ipip6_tunnel_6rdctl(dev, ifr, cmd);
+ return ipip6_tunnel_6rdctl(dev, data, cmd);
#endif
default:
return -EINVAL;
@@ -1399,7 +1401,7 @@ static const struct net_device_ops ipip6_netdev_ops = {
.ndo_init = ipip6_tunnel_init,
.ndo_uninit = ipip6_tunnel_uninit,
.ndo_start_xmit = sit_tunnel_xmit,
- .ndo_do_ioctl = ipip6_tunnel_ioctl,
+ .ndo_siocdevprivate = ipip6_tunnel_siocdevprivate,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
.ndo_tunnel_ctl = ipip6_tunnel_ctl,
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index d7cf26f730d7..d53dd142bf87 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -21,6 +21,7 @@
#ifdef CONFIG_NETLABEL
#include <net/calipso.h>
#endif
+#include <linux/ioam6.h>
static int two = 2;
static int three = 3;
@@ -28,6 +29,8 @@ static int flowlabel_reflect_max = 0x7;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
static u32 rt6_multipath_hash_fields_all_mask =
FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
+static u32 ioam6_id_max = IOAM6_DEFAULT_ID;
+static u64 ioam6_id_wide_max = IOAM6_DEFAULT_ID_WIDE;
static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
@@ -196,6 +199,22 @@ static struct ctl_table ipv6_table_template[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
+ {
+ .procname = "ioam6_id",
+ .data = &init_net.ipv6.sysctl.ioam6_id,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra2 = &ioam6_id_max,
+ },
+ {
+ .procname = "ioam6_id_wide",
+ .data = &init_net.ipv6.sysctl.ioam6_id_wide,
+ .maxlen = sizeof(u64),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra2 = &ioam6_id_wide_max,
+ },
{ }
};
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c5e15e94bb00..ea53847b5b7e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1475,7 +1475,7 @@ do_udp_sendmsg:
fl6.saddr = np->saddr;
fl6.fl6_sport = inet->inet_sport;
- if (cgroup_bpf_enabled(BPF_CGROUP_UDP6_SENDMSG) && !connected) {
+ if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
(struct sockaddr *)sin6, &fl6.saddr);
if (err)
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 44453b35c7b7..18316ee3c692 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1044,7 +1044,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
if (err == 0) {
atomic_dec(&iucv->skbs_in_xmit);
skb_unlink(skb, &iucv->send_skb_q);
- kfree_skb(skb);
+ consume_skb(skb);
}
/* this error should never happen since the */
@@ -1293,7 +1293,7 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
}
}
- kfree_skb(skb);
+ consume_skb(skb);
if (iucv->transport == AF_IUCV_TRANS_HIPER) {
atomic_inc(&iucv->msg_recv);
if (atomic_read(&iucv->msg_recv) > iucv->msglimit) {
@@ -1756,7 +1756,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
spin_unlock_irqrestore(&list->lock, flags);
if (this) {
- kfree_skb(this);
+ consume_skb(this);
/* wake up any process waiting for sending */
iucv_sock_wake_msglim(sk);
}
@@ -1903,17 +1903,17 @@ static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb)
{
struct iucv_sock *iucv = iucv_sk(sk);
- if (!iucv)
- goto out;
- if (sk->sk_state != IUCV_BOUND)
- goto out;
+ if (!iucv || sk->sk_state != IUCV_BOUND) {
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
bh_lock_sock(sk);
iucv->msglimit_peer = iucv_trans_hdr(skb)->window;
sk->sk_state = IUCV_CONNECTED;
sk->sk_state_change(sk);
bh_unlock_sock(sk);
-out:
- kfree_skb(skb);
+ consume_skb(skb);
return NET_RX_SUCCESS;
}
@@ -1924,16 +1924,16 @@ static int afiucv_hs_callback_synfin(struct sock *sk, struct sk_buff *skb)
{
struct iucv_sock *iucv = iucv_sk(sk);
- if (!iucv)
- goto out;
- if (sk->sk_state != IUCV_BOUND)
- goto out;
+ if (!iucv || sk->sk_state != IUCV_BOUND) {
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
bh_lock_sock(sk);
sk->sk_state = IUCV_DISCONN;
sk->sk_state_change(sk);
bh_unlock_sock(sk);
-out:
- kfree_skb(skb);
+ consume_skb(skb);
return NET_RX_SUCCESS;
}
@@ -1945,16 +1945,18 @@ static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb)
struct iucv_sock *iucv = iucv_sk(sk);
/* other end of connection closed */
- if (!iucv)
- goto out;
+ if (!iucv) {
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
bh_lock_sock(sk);
if (sk->sk_state == IUCV_CONNECTED) {
sk->sk_state = IUCV_DISCONN;
sk->sk_state_change(sk);
}
bh_unlock_sock(sk);
-out:
- kfree_skb(skb);
+ consume_skb(skb);
return NET_RX_SUCCESS;
}
@@ -2107,7 +2109,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
case (AF_IUCV_FLAG_WIN):
err = afiucv_hs_callback_win(sk, skb);
if (skb->len == sizeof(struct af_iucv_trans_hdr)) {
- kfree_skb(skb);
+ consume_skb(skb);
break;
}
fallthrough; /* and receive non-zero length data */
@@ -2262,21 +2264,11 @@ static struct packet_type iucv_packet_type = {
.func = afiucv_hs_rcv,
};
-static int afiucv_iucv_init(void)
-{
- return pr_iucv->iucv_register(&af_iucv_handler, 0);
-}
-
-static void afiucv_iucv_exit(void)
-{
- pr_iucv->iucv_unregister(&af_iucv_handler, 0);
-}
-
static int __init afiucv_init(void)
{
int err;
- if (MACHINE_IS_VM) {
+ if (MACHINE_IS_VM && IS_ENABLED(CONFIG_IUCV)) {
cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err);
if (unlikely(err)) {
WARN_ON(err);
@@ -2284,11 +2276,7 @@ static int __init afiucv_init(void)
goto out;
}
- pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv");
- if (!pr_iucv) {
- printk(KERN_WARNING "iucv_if lookup failed\n");
- memset(&iucv_userid, 0, sizeof(iucv_userid));
- }
+ pr_iucv = &iucv_if;
} else {
memset(&iucv_userid, 0, sizeof(iucv_userid));
pr_iucv = NULL;
@@ -2302,7 +2290,7 @@ static int __init afiucv_init(void)
goto out_proto;
if (pr_iucv) {
- err = afiucv_iucv_init();
+ err = pr_iucv->iucv_register(&af_iucv_handler, 0);
if (err)
goto out_sock;
}
@@ -2316,23 +2304,19 @@ static int __init afiucv_init(void)
out_notifier:
if (pr_iucv)
- afiucv_iucv_exit();
+ pr_iucv->iucv_unregister(&af_iucv_handler, 0);
out_sock:
sock_unregister(PF_IUCV);
out_proto:
proto_unregister(&iucv_proto);
out:
- if (pr_iucv)
- symbol_put(iucv_if);
return err;
}
static void __exit afiucv_exit(void)
{
- if (pr_iucv) {
- afiucv_iucv_exit();
- symbol_put(iucv_if);
- }
+ if (pr_iucv)
+ pr_iucv->iucv_unregister(&af_iucv_handler, 0);
unregister_netdevice_notifier(&afiucv_netdev_notifier);
dev_remove_pack(&iucv_packet_type);
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index e6795d5a546a..f3343a8541a5 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -286,19 +286,19 @@ static union iucv_param *iucv_param_irq[NR_CPUS];
*/
static inline int __iucv_call_b2f0(int command, union iucv_param *parm)
{
- register unsigned long reg0 asm ("0");
- register unsigned long reg1 asm ("1");
- int ccode;
+ int cc;
- reg0 = command;
- reg1 = (unsigned long)parm;
asm volatile(
- " .long 0xb2f01000\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (ccode), "=m" (*parm), "+d" (reg0), "+a" (reg1)
- : "m" (*parm) : "cc");
- return ccode;
+ " lgr 0,%[reg0]\n"
+ " lgr 1,%[reg1]\n"
+ " .long 0xb2f01000\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ : [cc] "=&d" (cc), "+m" (*parm)
+ : [reg0] "d" ((unsigned long)command),
+ [reg1] "d" ((unsigned long)parm)
+ : "cc", "0", "1");
+ return cc;
}
static inline int iucv_call_b2f0(int command, union iucv_param *parm)
@@ -319,19 +319,21 @@ static inline int iucv_call_b2f0(int command, union iucv_param *parm)
*/
static int __iucv_query_maxconn(void *param, unsigned long *max_pathid)
{
- register unsigned long reg0 asm ("0");
- register unsigned long reg1 asm ("1");
- int ccode;
+ unsigned long reg1 = (unsigned long)param;
+ int cc;
- reg0 = IUCV_QUERY;
- reg1 = (unsigned long) param;
asm volatile (
+ " lghi 0,%[cmd]\n"
+ " lgr 1,%[reg1]\n"
" .long 0xb2f01000\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (ccode), "+d" (reg0), "+d" (reg1) : : "cc");
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ " lgr %[reg1],1\n"
+ : [cc] "=&d" (cc), [reg1] "+&d" (reg1)
+ : [cmd] "K" (IUCV_QUERY)
+ : "cc", "0", "1");
*max_pathid = reg1;
- return ccode;
+ return cc;
}
static int iucv_query_maxconn(void)
@@ -500,14 +502,14 @@ static void iucv_setmask_mp(void)
{
int cpu;
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu)
/* Enable all cpus with a declared buffer. */
if (cpumask_test_cpu(cpu, &iucv_buffer_cpumask) &&
!cpumask_test_cpu(cpu, &iucv_irq_cpumask))
smp_call_function_single(cpu, iucv_allow_cpu,
NULL, 1);
- put_online_cpus();
+ cpus_read_unlock();
}
/**
@@ -540,7 +542,7 @@ static int iucv_enable(void)
size_t alloc_size;
int cpu, rc;
- get_online_cpus();
+ cpus_read_lock();
rc = -ENOMEM;
alloc_size = iucv_max_pathid * sizeof(struct iucv_path);
iucv_path_table = kzalloc(alloc_size, GFP_KERNEL);
@@ -553,12 +555,12 @@ static int iucv_enable(void)
if (cpumask_empty(&iucv_buffer_cpumask))
/* No cpu could declare an iucv buffer. */
goto out;
- put_online_cpus();
+ cpus_read_unlock();
return 0;
out:
kfree(iucv_path_table);
iucv_path_table = NULL;
- put_online_cpus();
+ cpus_read_unlock();
return rc;
}
@@ -571,11 +573,11 @@ out:
*/
static void iucv_disable(void)
{
- get_online_cpus();
+ cpus_read_lock();
on_each_cpu(iucv_retrieve_cpu, NULL, 1);
kfree(iucv_path_table);
iucv_path_table = NULL;
- put_online_cpus();
+ cpus_read_unlock();
}
static int iucv_cpu_dead(unsigned int cpu)
@@ -784,7 +786,7 @@ static int iucv_reboot_event(struct notifier_block *this,
if (cpumask_empty(&iucv_irq_cpumask))
return NOTIFY_DONE;
- get_online_cpus();
+ cpus_read_lock();
on_each_cpu_mask(&iucv_irq_cpumask, iucv_block_cpu, NULL, 1);
preempt_disable();
for (i = 0; i < iucv_max_pathid; i++) {
@@ -792,7 +794,7 @@ static int iucv_reboot_event(struct notifier_block *this,
iucv_sever_pathid(i, NULL);
}
preempt_enable();
- put_online_cpus();
+ cpus_read_unlock();
iucv_disable();
return NOTIFY_DONE;
}
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index ac5cadd02cfa..3086f4a6ae68 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -224,8 +224,7 @@ static int llc_ui_release(struct socket *sock)
} else {
release_sock(sk);
}
- if (llc->dev)
- dev_put(llc->dev);
+ dev_put(llc->dev);
sock_put(sk);
llc_sk_free(sk);
out:
@@ -363,8 +362,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
} else
llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
addr->sllc_mac);
- if (llc->dev)
- dev_hold(llc->dev);
+ dev_hold(llc->dev);
rcu_read_unlock();
if (!llc->dev)
goto out;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 4e6f11e63df3..d69b31c20fe2 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -828,9 +828,11 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
return ret;
}
-static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
- const u8 *resp, size_t resp_len,
- const struct ieee80211_csa_settings *csa)
+static int
+ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
+ const u8 *resp, size_t resp_len,
+ const struct ieee80211_csa_settings *csa,
+ const struct ieee80211_color_change_settings *cca)
{
struct probe_resp *new, *old;
@@ -850,6 +852,8 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_presp,
csa->n_counter_offsets_presp *
sizeof(new->cntdwn_counter_offsets[0]));
+ else if (cca)
+ new->cntdwn_counter_offsets[0] = cca->counter_offset_presp;
rcu_assign_pointer(sdata->u.ap.probe_resp, new);
if (old)
@@ -955,7 +959,8 @@ static int ieee80211_set_ftm_responder_params(
static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
struct cfg80211_beacon_data *params,
- const struct ieee80211_csa_settings *csa)
+ const struct ieee80211_csa_settings *csa,
+ const struct ieee80211_color_change_settings *cca)
{
struct beacon_data *new, *old;
int new_head_len, new_tail_len;
@@ -1004,6 +1009,9 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_beacon,
csa->n_counter_offsets_beacon *
sizeof(new->cntdwn_counter_offsets[0]));
+ } else if (cca) {
+ new->cntdwn_current_counter = cca->count;
+ new->cntdwn_counter_offsets[0] = cca->counter_offset_beacon;
}
/* copy in head */
@@ -1020,7 +1028,7 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
memcpy(new->tail, old->tail, new_tail_len);
err = ieee80211_set_probe_resp(sdata, params->probe_resp,
- params->probe_resp_len, csa);
+ params->probe_resp_len, csa, cca);
if (err < 0) {
kfree(new);
return err;
@@ -1175,7 +1183,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL))
sdata->vif.bss_conf.beacon_tx_rate = params->beacon_rate;
- err = ieee80211_assign_beacon(sdata, &params->beacon, NULL);
+ err = ieee80211_assign_beacon(sdata, &params->beacon, NULL, NULL);
if (err < 0)
goto error;
changed |= err;
@@ -1230,17 +1238,17 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
sdata = IEEE80211_DEV_TO_SUB_IF(dev);
sdata_assert_lock(sdata);
- /* don't allow changing the beacon while CSA is in place - offset
+ /* don't allow changing the beacon while a countdown is in place - offset
* of channel switch counter may change
*/
- if (sdata->vif.csa_active)
+ if (sdata->vif.csa_active || sdata->vif.color_change_active)
return -EBUSY;
old = sdata_dereference(sdata->u.ap.beacon, sdata);
if (!old)
return -ENOENT;
- err = ieee80211_assign_beacon(sdata, params, NULL);
+ err = ieee80211_assign_beacon(sdata, params, NULL, NULL);
if (err < 0)
return err;
ieee80211_bss_info_change_notify(sdata, err);
@@ -3156,7 +3164,7 @@ static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata,
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon,
- NULL);
+ NULL, NULL);
kfree(sdata->u.ap.next_beacon);
sdata->u.ap.next_beacon = NULL;
@@ -3322,7 +3330,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
csa.n_counter_offsets_presp = params->n_counter_offsets_presp;
csa.count = params->count;
- err = ieee80211_assign_beacon(sdata, &params->beacon_csa, &csa);
+ err = ieee80211_assign_beacon(sdata, &params->beacon_csa, &csa, NULL);
if (err < 0) {
kfree(sdata->u.ap.next_beacon);
return err;
@@ -3411,6 +3419,15 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
return 0;
}
+static void ieee80211_color_change_abort(struct ieee80211_sub_if_data *sdata)
+{
+ sdata->vif.color_change_active = false;
+ kfree(sdata->u.ap.next_beacon);
+ sdata->u.ap.next_beacon = NULL;
+
+ cfg80211_color_change_aborted_notify(sdata->dev);
+}
+
static int
__ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_csa_settings *params)
@@ -3479,6 +3496,10 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
goto out;
}
+ /* if there is a color change in progress, abort it */
+ if (sdata->vif.color_change_active)
+ ieee80211_color_change_abort(sdata);
+
err = ieee80211_set_csa_beacon(sdata, params, &changed);
if (err) {
ieee80211_vif_unreserve_chanctx(sdata);
@@ -4130,6 +4151,196 @@ static int ieee80211_set_sar_specs(struct wiphy *wiphy,
return local->ops->set_sar_specs(&local->hw, sar);
}
+static int
+ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata,
+ u32 *changed)
+{
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_AP: {
+ int ret;
+
+ ret = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon,
+ NULL, NULL);
+ kfree(sdata->u.ap.next_beacon);
+ sdata->u.ap.next_beacon = NULL;
+
+ if (ret < 0)
+ return ret;
+
+ *changed |= ret;
+ break;
+ }
+ default:
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_color_change_settings *params,
+ u32 *changed)
+{
+ struct ieee80211_color_change_settings color_change = {};
+ int err;
+
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_AP:
+ sdata->u.ap.next_beacon =
+ cfg80211_beacon_dup(&params->beacon_next);
+ if (!sdata->u.ap.next_beacon)
+ return -ENOMEM;
+
+ if (params->count <= 1)
+ break;
+
+ color_change.counter_offset_beacon =
+ params->counter_offset_beacon;
+ color_change.counter_offset_presp =
+ params->counter_offset_presp;
+ color_change.count = params->count;
+
+ err = ieee80211_assign_beacon(sdata, &params->beacon_color_change,
+ NULL, &color_change);
+ if (err < 0) {
+ kfree(sdata->u.ap.next_beacon);
+ return err;
+ }
+ *changed |= err;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void
+ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata,
+ u8 color, int enable, u32 changed)
+{
+ sdata->vif.bss_conf.he_bss_color.color = color;
+ sdata->vif.bss_conf.he_bss_color.enabled = enable;
+ changed |= BSS_CHANGED_HE_BSS_COLOR;
+
+ ieee80211_bss_info_change_notify(sdata, changed);
+}
+
+static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ u32 changed = 0;
+ int err;
+
+ sdata_assert_lock(sdata);
+ lockdep_assert_held(&local->mtx);
+
+ sdata->vif.color_change_active = false;
+
+ err = ieee80211_set_after_color_change_beacon(sdata, &changed);
+ if (err) {
+ cfg80211_color_change_aborted_notify(sdata->dev);
+ return err;
+ }
+
+ ieee80211_color_change_bss_config_notify(sdata,
+ sdata->vif.color_change_color,
+ 1, changed);
+ cfg80211_color_change_notify(sdata->dev);
+
+ return 0;
+}
+
+void ieee80211_color_change_finalize_work(struct work_struct *work)
+{
+ struct ieee80211_sub_if_data *sdata =
+ container_of(work, struct ieee80211_sub_if_data,
+ color_change_finalize_work);
+ struct ieee80211_local *local = sdata->local;
+
+ sdata_lock(sdata);
+ mutex_lock(&local->mtx);
+
+ /* AP might have been stopped while waiting for the lock. */
+ if (!sdata->vif.color_change_active)
+ goto unlock;
+
+ if (!ieee80211_sdata_running(sdata))
+ goto unlock;
+
+ ieee80211_color_change_finalize(sdata);
+
+unlock:
+ mutex_unlock(&local->mtx);
+ sdata_unlock(sdata);
+}
+
+void ieee80211_color_change_finish(struct ieee80211_vif *vif)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+ ieee80211_queue_work(&sdata->local->hw,
+ &sdata->color_change_finalize_work);
+}
+EXPORT_SYMBOL_GPL(ieee80211_color_change_finish);
+
+void
+ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
+ u64 color_bitmap)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+ if (sdata->vif.color_change_active || sdata->vif.csa_active)
+ return;
+
+ cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap);
+}
+EXPORT_SYMBOL_GPL(ieeee80211_obss_color_collision_notify);
+
+static int
+ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_color_change_settings *params)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ u32 changed = 0;
+ int err;
+
+ sdata_assert_lock(sdata);
+
+ mutex_lock(&local->mtx);
+
+ /* don't allow another color change if one is already active or if csa
+ * is active
+ */
+ if (sdata->vif.color_change_active || sdata->vif.csa_active) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ err = ieee80211_set_color_change_beacon(sdata, params, &changed);
+ if (err)
+ goto out;
+
+ sdata->vif.color_change_active = true;
+ sdata->vif.color_change_color = params->color;
+
+ cfg80211_color_change_started_notify(sdata->dev, params->count);
+
+ if (changed)
+ ieee80211_color_change_bss_config_notify(sdata, 0, 0, changed);
+ else
+ /* if the beacon didn't change, we can finalize immediately */
+ ieee80211_color_change_finalize(sdata);
+
+out:
+ mutex_unlock(&local->mtx);
+
+ return err;
+}
+
const struct cfg80211_ops mac80211_config_ops = {
.add_virtual_intf = ieee80211_add_iface,
.del_virtual_intf = ieee80211_del_iface,
@@ -4233,4 +4444,5 @@ const struct cfg80211_ops mac80211_config_ops = {
.set_tid_config = ieee80211_set_tid_config,
.reset_tid_config = ieee80211_reset_tid_config,
.set_sar_specs = ieee80211_set_sar_specs,
+ .color_change = ieee80211_color_change,
};
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index bcb7cc06db3d..cd3731cbf6c6 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1447,4 +1447,40 @@ static inline void drv_sta_set_decap_offload(struct ieee80211_local *local,
trace_drv_return_void(local);
}
+static inline void drv_add_twt_setup(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta,
+ struct ieee80211_twt_setup *twt)
+{
+ struct ieee80211_twt_params *twt_agrt;
+
+ might_sleep();
+
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ twt_agrt = (void *)twt->params;
+
+ trace_drv_add_twt_setup(local, sta, twt, twt_agrt);
+ local->ops->add_twt_setup(&local->hw, sta, twt);
+ trace_drv_return_void(local);
+}
+
+static inline void drv_twt_teardown_request(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta,
+ u8 flowid)
+{
+ might_sleep();
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ if (!local->ops->twt_teardown_request)
+ return;
+
+ trace_drv_twt_teardown_request(local, sta, flowid);
+ local->ops->twt_teardown_request(&local->hw, sta, flowid);
+ trace_drv_return_void(local);
+}
+
#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index a7ac53a2f00d..5d6ca4c3e698 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -489,7 +489,6 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
const struct cfg80211_bss_ies *ies;
u16 capability = WLAN_CAPABILITY_IBSS;
u64 tsf;
- int ret = 0;
sdata_assert_lock(sdata);
@@ -501,10 +500,8 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
ifibss->ssid_len, IEEE80211_BSS_TYPE_IBSS,
IEEE80211_PRIVACY(ifibss->privacy));
- if (WARN_ON(!cbss)) {
- ret = -EINVAL;
- goto out;
- }
+ if (WARN_ON(!cbss))
+ return -EINVAL;
rcu_read_lock();
ies = rcu_dereference(cbss->ies);
@@ -520,18 +517,14 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.basic_rates,
capability, tsf, &ifibss->chandef,
NULL, csa_settings);
- if (!presp) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!presp)
+ return -ENOMEM;
rcu_assign_pointer(ifibss->presp, presp);
if (old_presp)
kfree_rcu(old_presp, rcu_head);
return BSS_CHANGED_BEACON;
- out:
- return ret;
}
int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 30ce6d2ec7ce..159af6c3ffb0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -25,6 +25,7 @@
#include <linux/leds.h>
#include <linux/idr.h>
#include <linux/rhashtable.h>
+#include <linux/rbtree.h>
#include <net/ieee80211_radiotap.h>
#include <net/cfg80211.h>
#include <net/mac80211.h>
@@ -244,6 +245,12 @@ struct ieee80211_csa_settings {
u8 count;
};
+struct ieee80211_color_change_settings {
+ u16 counter_offset_beacon;
+ u16 counter_offset_presp;
+ u8 count;
+};
+
struct beacon_data {
u8 *head, *tail;
int head_len, tail_len;
@@ -923,6 +930,8 @@ struct ieee80211_sub_if_data {
bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */
struct cfg80211_chan_def csa_chandef;
+ struct work_struct color_change_finalize_work;
+
struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */
struct list_head reserved_chanctx_list; /* protected by chanctx_mtx */
@@ -937,6 +946,7 @@ struct ieee80211_sub_if_data {
struct work_struct work;
struct sk_buff_head skb_queue;
+ struct sk_buff_head status_queue;
u8 needed_rx_chains;
enum ieee80211_smps_mode smps_mode;
@@ -1524,6 +1534,7 @@ struct ieee802_11_elems {
const struct ieee80211_he_spr *he_spr;
const struct ieee80211_mu_edca_param_set *mu_edca_param_set;
const struct ieee80211_he_6ghz_capa *he_6ghz_capa;
+ const struct ieee80211_tx_pwr_env *tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT];
const u8 *uora_element;
const u8 *mesh_id;
const u8 *peering;
@@ -1574,6 +1585,8 @@ struct ieee802_11_elems {
u8 perr_len;
u8 country_elem_len;
u8 bssid_index_len;
+ u8 tx_pwr_env_len[IEEE80211_TPE_MAX_IE_COUNT];
+ u8 tx_pwr_env_num;
/* whether a parse error occurred while retrieving these elements */
bool parse_error;
@@ -1887,6 +1900,9 @@ void ieee80211_csa_finalize_work(struct work_struct *work);
int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_csa_settings *params);
+/* color change handling */
+void ieee80211_color_change_finalize_work(struct work_struct *work);
+
/* interface handling */
#define MAC80211_SUPPORTED_FEATURES_TX (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \
NETIF_F_HW_CSUM | NETIF_F_SG | \
@@ -2068,6 +2084,11 @@ ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif,
/* S1G */
void ieee80211_s1g_sta_rate_init(struct sta_info *sta);
+bool ieee80211_s1g_is_twt_setup(struct sk_buff *skb);
+void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb);
+void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb);
/* Spectrum management */
void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 1e5e9fc45523..62c95597704b 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -462,6 +462,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
sdata_unlock(sdata);
cancel_work_sync(&sdata->csa_finalize_work);
+ cancel_work_sync(&sdata->color_change_finalize_work);
cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
@@ -551,6 +552,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
*/
ieee80211_free_keys(sdata, true);
skb_queue_purge(&sdata->skb_queue);
+ skb_queue_purge(&sdata->status_queue);
}
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
@@ -983,6 +985,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
}
skb_queue_head_init(&sdata->skb_queue);
+ skb_queue_head_init(&sdata->status_queue);
INIT_WORK(&sdata->work, ieee80211_iface_work);
return 0;
@@ -1381,6 +1384,16 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
WARN_ON(1);
break;
}
+ } else if (ieee80211_is_action(mgmt->frame_control) &&
+ mgmt->u.action.category == WLAN_CATEGORY_S1G) {
+ switch (mgmt->u.action.u.s1g.action_code) {
+ case WLAN_S1G_TWT_TEARDOWN:
+ case WLAN_S1G_TWT_SETUP:
+ ieee80211_s1g_rx_twt_action(sdata, skb);
+ break;
+ default:
+ break;
+ }
} else if (ieee80211_is_ext(mgmt->frame_control)) {
if (sdata->vif.type == NL80211_IFTYPE_STATION)
ieee80211_sta_rx_queued_ext(sdata, skb);
@@ -1436,6 +1449,24 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
}
}
+static void ieee80211_iface_process_status(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee80211_mgmt *mgmt = (void *)skb->data;
+
+ if (ieee80211_is_action(mgmt->frame_control) &&
+ mgmt->u.action.category == WLAN_CATEGORY_S1G) {
+ switch (mgmt->u.action.u.s1g.action_code) {
+ case WLAN_S1G_TWT_TEARDOWN:
+ case WLAN_S1G_TWT_SETUP:
+ ieee80211_s1g_status_twt_action(sdata, skb);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
static void ieee80211_iface_work(struct work_struct *work)
{
struct ieee80211_sub_if_data *sdata =
@@ -1465,6 +1496,16 @@ static void ieee80211_iface_work(struct work_struct *work)
kcov_remote_stop();
}
+ /* process status queue */
+ while ((skb = skb_dequeue(&sdata->status_queue))) {
+ kcov_remote_start_common(skb_get_kcov_handle(skb));
+
+ ieee80211_iface_process_status(sdata, skb);
+ kfree_skb(skb);
+
+ kcov_remote_stop();
+ }
+
/* then other type-dependent work */
switch (sdata->vif.type) {
case NL80211_IFTYPE_STATION:
@@ -1528,9 +1569,11 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
}
skb_queue_head_init(&sdata->skb_queue);
+ skb_queue_head_init(&sdata->status_queue);
INIT_WORK(&sdata->work, ieee80211_iface_work);
INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work);
INIT_WORK(&sdata->csa_finalize_work, ieee80211_csa_finalize_work);
+ INIT_WORK(&sdata->color_change_finalize_work, ieee80211_color_change_finalize_work);
INIT_LIST_HEAD(&sdata->assigned_chanctx_list);
INIT_LIST_HEAD(&sdata->reserved_chanctx_list);
@@ -2001,9 +2044,16 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops);
- /* MTU range: 256 - 2304 */
+ /* MTU range is normally 256 - 2304, where the upper limit is
+ * the maximum MSDU size. Monitor interfaces send and receive
+ * MPDU and A-MSDU frames which may be much larger so we do
+ * not impose an upper limit in that case.
+ */
ndev->min_mtu = 256;
- ndev->max_mtu = local->hw.max_mtu;
+ if (type == NL80211_IFTYPE_MONITOR)
+ ndev->max_mtu = 0;
+ else
+ ndev->max_mtu = local->hw.max_mtu;
ret = cfg80211_register_netdevice(ndev);
if (ret) {
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 05f4c3c72619..45fb517591ee 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -260,6 +260,8 @@ static void ieee80211_restart_work(struct work_struct *work)
flush_work(&local->radar_detected_work);
rtnl_lock();
+ /* we might do interface manipulations, so need both */
+ wiphy_lock(local->hw.wiphy);
WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
"%s called with hardware scan in progress\n", __func__);
@@ -1018,7 +1020,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
iftd = &sband->iftype_data[i];
- supp_he = supp_he || (iftd && iftd->he_cap.has_he);
+ supp_he = supp_he || iftd->he_cap.has_he;
}
/* HT, VHT, HE require QoS, thus >= 4 queues */
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 2563473b5cf1..99ed68f7dc36 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -359,7 +359,12 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
put_unaligned_le32(it_present_val, it_present);
- pos = (void *)(it_present + 1);
+ /* This references through an offset into it_optional[] rather
+ * than via it_present otherwise later uses of pos will cause
+ * the compiler to think we have walked past the end of the
+ * struct member.
+ */
+ pos = (void *)&rthdr->it_optional[it_present - rthdr->it_optional];
/* the order of the following fields is important */
@@ -372,7 +377,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
ieee80211_calculate_rx_timestamp(local, status,
mpdulen, 0),
pos);
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_TSFT));
pos += 8;
}
@@ -396,7 +401,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
*pos = 0;
} else {
int shift = 0;
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_RATE));
if (status->bw == RATE_INFO_BW_10)
shift = 1;
else if (status->bw == RATE_INFO_BW_5)
@@ -433,7 +438,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
*pos = status->signal;
rthdr->it_present |=
- cpu_to_le32(1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL);
+ cpu_to_le32(BIT(IEEE80211_RADIOTAP_DBM_ANTSIGNAL));
pos++;
}
@@ -459,7 +464,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
if (status->encoding == RX_ENC_HT) {
unsigned int stbc;
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS));
*pos++ = local->hw.radiotap_mcs_details;
*pos = 0;
if (status->enc_flags & RX_ENC_FLAG_SHORT_GI)
@@ -483,7 +488,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
while ((pos - (u8 *)rthdr) & 3)
pos++;
rthdr->it_present |=
- cpu_to_le32(1 << IEEE80211_RADIOTAP_AMPDU_STATUS);
+ cpu_to_le32(BIT(IEEE80211_RADIOTAP_AMPDU_STATUS));
put_unaligned_le32(status->ampdu_reference, pos);
pos += 4;
if (status->flag & RX_FLAG_AMPDU_LAST_KNOWN)
@@ -510,7 +515,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
if (status->encoding == RX_ENC_VHT) {
u16 known = local->hw.radiotap_vht_details;
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_VHT));
put_unaligned_le16(known, pos);
pos += 2;
/* flags */
@@ -554,7 +559,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
u8 flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT;
rthdr->it_present |=
- cpu_to_le32(1 << IEEE80211_RADIOTAP_TIMESTAMP);
+ cpu_to_le32(BIT(IEEE80211_RADIOTAP_TIMESTAMP));
/* ensure 8 byte alignment */
while ((pos - (u8 *)rthdr) & 7)
@@ -642,7 +647,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
/* ensure 2 byte alignment */
while ((pos - (u8 *)rthdr) & 1)
pos++;
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE));
memcpy(pos, &he, sizeof(he));
pos += sizeof(he);
}
@@ -652,14 +657,14 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
/* ensure 2 byte alignment */
while ((pos - (u8 *)rthdr) & 1)
pos++;
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE_MU);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE_MU));
memcpy(pos, &he_mu, sizeof(he_mu));
pos += sizeof(he_mu);
}
if (status->flag & RX_FLAG_NO_PSDU) {
rthdr->it_present |=
- cpu_to_le32(1 << IEEE80211_RADIOTAP_ZERO_LEN_PSDU);
+ cpu_to_le32(BIT(IEEE80211_RADIOTAP_ZERO_LEN_PSDU));
*pos++ = status->zero_length_psdu_type;
}
@@ -667,7 +672,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
/* ensure 2 byte alignment */
while ((pos - (u8 *)rthdr) & 1)
pos++;
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_LSIG);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_LSIG));
memcpy(pos, &lsig, sizeof(lsig));
pos += sizeof(lsig);
}
@@ -3207,6 +3212,68 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
return RX_CONTINUE;
}
+static bool
+ieee80211_process_rx_twt_action(struct ieee80211_rx_data *rx)
+{
+ struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)rx->skb->data;
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
+ struct ieee80211_sub_if_data *sdata = rx->sdata;
+ const struct ieee80211_sta_he_cap *hecap;
+ struct ieee80211_supported_band *sband;
+
+ /* TWT actions are only supported in AP for the moment */
+ if (sdata->vif.type != NL80211_IFTYPE_AP)
+ return false;
+
+ if (!rx->local->ops->add_twt_setup)
+ return false;
+
+ sband = rx->local->hw.wiphy->bands[status->band];
+ hecap = ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
+ if (!hecap)
+ return false;
+
+ if (!(hecap->he_cap_elem.mac_cap_info[0] &
+ IEEE80211_HE_MAC_CAP0_TWT_RES))
+ return false;
+
+ if (!rx->sta)
+ return false;
+
+ switch (mgmt->u.action.u.s1g.action_code) {
+ case WLAN_S1G_TWT_SETUP: {
+ struct ieee80211_twt_setup *twt;
+
+ if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE +
+ 1 + /* action code */
+ sizeof(struct ieee80211_twt_setup) +
+ 2 /* TWT req_type agrt */)
+ break;
+
+ twt = (void *)mgmt->u.action.u.s1g.variable;
+ if (twt->element_id != WLAN_EID_S1G_TWT)
+ break;
+
+ if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE +
+ 4 + /* action code + token + tlv */
+ twt->length)
+ break;
+
+ return true; /* queue the frame */
+ }
+ case WLAN_S1G_TWT_TEARDOWN:
+ if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE + 2)
+ break;
+
+ return true; /* queue the frame */
+ default:
+ break;
+ }
+
+ return false;
+}
+
static ieee80211_rx_result debug_noinline
ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
{
@@ -3486,6 +3553,17 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
!mesh_path_sel_is_hwmp(sdata))
break;
goto queue;
+ case WLAN_CATEGORY_S1G:
+ switch (mgmt->u.action.u.s1g.action_code) {
+ case WLAN_S1G_TWT_SETUP:
+ case WLAN_S1G_TWT_TEARDOWN:
+ if (ieee80211_process_rx_twt_action(rx))
+ goto queue;
+ break;
+ default:
+ break;
+ }
+ break;
}
return RX_CONTINUE;
diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c
index c33f332b049a..7e35ab5b6166 100644
--- a/net/mac80211/s1g.c
+++ b/net/mac80211/s1g.c
@@ -6,6 +6,7 @@
#include <linux/ieee80211.h>
#include <net/mac80211.h>
#include "ieee80211_i.h"
+#include "driver-ops.h"
void ieee80211_s1g_sta_rate_init(struct sta_info *sta)
{
@@ -14,3 +15,182 @@ void ieee80211_s1g_sta_rate_init(struct sta_info *sta)
sta->rx_stats.last_rate =
STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_S1G);
}
+
+bool ieee80211_s1g_is_twt_setup(struct sk_buff *skb)
+{
+ struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
+
+ if (likely(!ieee80211_is_action(mgmt->frame_control)))
+ return false;
+
+ if (likely(mgmt->u.action.category != WLAN_CATEGORY_S1G))
+ return false;
+
+ return mgmt->u.action.u.s1g.action_code == WLAN_S1G_TWT_SETUP;
+}
+
+static void
+ieee80211_s1g_send_twt_setup(struct ieee80211_sub_if_data *sdata, const u8 *da,
+ const u8 *bssid, struct ieee80211_twt_setup *twt)
+{
+ int len = IEEE80211_MIN_ACTION_SIZE + 4 + twt->length;
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_mgmt *mgmt;
+ struct sk_buff *skb;
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom + len);
+ if (!skb)
+ return;
+
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+ mgmt = skb_put_zero(skb, len);
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ACTION);
+ memcpy(mgmt->da, da, ETH_ALEN);
+ memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+ memcpy(mgmt->bssid, bssid, ETH_ALEN);
+
+ mgmt->u.action.category = WLAN_CATEGORY_S1G;
+ mgmt->u.action.u.s1g.action_code = WLAN_S1G_TWT_SETUP;
+ memcpy(mgmt->u.action.u.s1g.variable, twt, 3 + twt->length);
+
+ IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
+ IEEE80211_TX_INTFL_MLME_CONN_TX |
+ IEEE80211_TX_CTL_REQ_TX_STATUS;
+ ieee80211_tx_skb(sdata, skb);
+}
+
+static void
+ieee80211_s1g_send_twt_teardown(struct ieee80211_sub_if_data *sdata,
+ const u8 *da, const u8 *bssid, u8 flowid)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_mgmt *mgmt;
+ struct sk_buff *skb;
+ u8 *id;
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+ IEEE80211_MIN_ACTION_SIZE + 2);
+ if (!skb)
+ return;
+
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+ mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE + 2);
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ACTION);
+ memcpy(mgmt->da, da, ETH_ALEN);
+ memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+ memcpy(mgmt->bssid, bssid, ETH_ALEN);
+
+ mgmt->u.action.category = WLAN_CATEGORY_S1G;
+ mgmt->u.action.u.s1g.action_code = WLAN_S1G_TWT_TEARDOWN;
+ id = (u8 *)mgmt->u.action.u.s1g.variable;
+ *id = flowid;
+
+ IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
+ IEEE80211_TX_CTL_REQ_TX_STATUS;
+ ieee80211_tx_skb(sdata, skb);
+}
+
+static void
+ieee80211_s1g_rx_twt_setup(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta, struct sk_buff *skb)
+{
+ struct ieee80211_mgmt *mgmt = (void *)skb->data;
+ struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.u.s1g.variable;
+ struct ieee80211_twt_params *twt_agrt = (void *)twt->params;
+
+ twt_agrt->req_type &= cpu_to_le16(~IEEE80211_TWT_REQTYPE_REQUEST);
+
+ /* broadcast TWT not supported yet */
+ if (twt->control & IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST) {
+ le16p_replace_bits(&twt_agrt->req_type,
+ TWT_SETUP_CMD_REJECT,
+ IEEE80211_TWT_REQTYPE_SETUP_CMD);
+ goto out;
+ }
+
+ drv_add_twt_setup(sdata->local, sdata, &sta->sta, twt);
+out:
+ ieee80211_s1g_send_twt_setup(sdata, mgmt->sa, sdata->vif.addr, twt);
+}
+
+static void
+ieee80211_s1g_rx_twt_teardown(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta, struct sk_buff *skb)
+{
+ struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
+
+ drv_twt_teardown_request(sdata->local, sdata, &sta->sta,
+ mgmt->u.action.u.s1g.variable[0]);
+}
+
+static void
+ieee80211_s1g_tx_twt_setup_fail(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta, struct sk_buff *skb)
+{
+ struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
+ struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.u.s1g.variable;
+ struct ieee80211_twt_params *twt_agrt = (void *)twt->params;
+ u8 flowid = le16_get_bits(twt_agrt->req_type,
+ IEEE80211_TWT_REQTYPE_FLOWID);
+
+ drv_twt_teardown_request(sdata->local, sdata, &sta->sta, flowid);
+
+ ieee80211_s1g_send_twt_teardown(sdata, mgmt->sa, sdata->vif.addr,
+ flowid);
+}
+
+void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
+ struct ieee80211_local *local = sdata->local;
+ struct sta_info *sta;
+
+ mutex_lock(&local->sta_mtx);
+
+ sta = sta_info_get_bss(sdata, mgmt->sa);
+ if (!sta)
+ goto out;
+
+ switch (mgmt->u.action.u.s1g.action_code) {
+ case WLAN_S1G_TWT_SETUP:
+ ieee80211_s1g_rx_twt_setup(sdata, sta, skb);
+ break;
+ case WLAN_S1G_TWT_TEARDOWN:
+ ieee80211_s1g_rx_twt_teardown(sdata, sta, skb);
+ break;
+ default:
+ break;
+ }
+
+out:
+ mutex_unlock(&local->sta_mtx);
+}
+
+void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
+ struct ieee80211_local *local = sdata->local;
+ struct sta_info *sta;
+
+ mutex_lock(&local->sta_mtx);
+
+ sta = sta_info_get_bss(sdata, mgmt->da);
+ if (!sta)
+ goto out;
+
+ switch (mgmt->u.action.u.s1g.action_code) {
+ case WLAN_S1G_TWT_SETUP:
+ /* process failed twt setup frames */
+ ieee80211_s1g_tx_twt_setup_fail(sdata, sta, skb);
+ break;
+ default:
+ break;
+ }
+
+out:
+ mutex_unlock(&local->sta_mtx);
+}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index a5505ee51229..2b5acb37587f 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -543,7 +543,7 @@ static int sta_info_insert_check(struct sta_info *sta)
return -ENETDOWN;
if (WARN_ON(ether_addr_equal(sta->sta.addr, sdata->vif.addr) ||
- is_multicast_ether_addr(sta->sta.addr)))
+ !is_valid_ether_addr(sta->sta.addr)))
return -EINVAL;
/* The RCU read lock is required by rhashtable due to
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index bae321ff77f6..f6f63a0b1b72 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -305,8 +305,8 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
memset(rthdr, 0, rtap_len);
rthdr->it_len = cpu_to_le16(rtap_len);
rthdr->it_present =
- cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) |
- (1 << IEEE80211_RADIOTAP_DATA_RETRIES));
+ cpu_to_le32(BIT(IEEE80211_RADIOTAP_TX_FLAGS) |
+ BIT(IEEE80211_RADIOTAP_DATA_RETRIES));
pos = (unsigned char *)(rthdr + 1);
/*
@@ -331,7 +331,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
sband->bitrates[info->status.rates[0].idx].bitrate;
if (legacy_rate) {
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_RATE));
*pos = DIV_ROUND_UP(legacy_rate, 5 * (1 << shift));
/* padding for tx flags */
pos += 2;
@@ -358,7 +358,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
if (status && status->rate &&
(status->rate->flags & RATE_INFO_FLAGS_MCS)) {
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS));
pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
IEEE80211_RADIOTAP_MCS_HAVE_GI |
IEEE80211_RADIOTAP_MCS_HAVE_BW;
@@ -374,7 +374,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
(IEEE80211_RADIOTAP_VHT_KNOWN_GI |
IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH);
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_VHT));
/* required alignment from rthdr */
pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2);
@@ -419,7 +419,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
(status->rate->flags & RATE_INFO_FLAGS_HE_MCS)) {
struct ieee80211_radiotap_he *he;
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE));
/* required alignment from rthdr */
pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2);
@@ -495,7 +495,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
/* IEEE80211_RADIOTAP_MCS
* IEEE80211_RADIOTAP_VHT */
if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS) {
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS));
pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
IEEE80211_RADIOTAP_MCS_HAVE_GI |
IEEE80211_RADIOTAP_MCS_HAVE_BW;
@@ -512,7 +512,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
(IEEE80211_RADIOTAP_VHT_KNOWN_GI |
IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH);
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_VHT));
/* required alignment from rthdr */
pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2);
@@ -705,13 +705,26 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
/* Check to see if packet is a TDLS teardown packet */
if (ieee80211_is_data(hdr->frame_control) &&
(ieee80211_get_tdls_action(skb, hdr_size) ==
- WLAN_TDLS_TEARDOWN))
+ WLAN_TDLS_TEARDOWN)) {
ieee80211_tdls_td_tx_handle(local, sdata, skb,
info->flags);
- else
+ } else if (ieee80211_s1g_is_twt_setup(skb)) {
+ if (!acked) {
+ struct sk_buff *qskb;
+
+ qskb = skb_clone(skb, GFP_ATOMIC);
+ if (qskb) {
+ skb_queue_tail(&sdata->status_queue,
+ qskb);
+ ieee80211_queue_work(&local->hw,
+ &sdata->work);
+ }
+ }
+ } else {
ieee80211_mgd_conn_tx_status(sdata,
hdr->frame_control,
acked);
+ }
}
rcu_read_unlock();
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index f6ef15366938..9e8381bef7ed 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -2825,6 +2825,73 @@ DEFINE_EVENT(sta_flag_evt, drv_sta_set_decap_offload,
TP_ARGS(local, sdata, sta, enabled)
);
+TRACE_EVENT(drv_add_twt_setup,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sta *sta,
+ struct ieee80211_twt_setup *twt,
+ struct ieee80211_twt_params *twt_agrt),
+
+ TP_ARGS(local, sta, twt, twt_agrt),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ STA_ENTRY
+ __field(u8, dialog_token)
+ __field(u8, control)
+ __field(__le16, req_type)
+ __field(__le64, twt)
+ __field(u8, duration)
+ __field(__le16, mantissa)
+ __field(u8, channel)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ STA_ASSIGN;
+ __entry->dialog_token = twt->dialog_token;
+ __entry->control = twt->control;
+ __entry->req_type = twt_agrt->req_type;
+ __entry->twt = twt_agrt->twt;
+ __entry->duration = twt_agrt->min_twt_dur;
+ __entry->mantissa = twt_agrt->mantissa;
+ __entry->channel = twt_agrt->channel;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT STA_PR_FMT
+ " token:%d control:0x%02x req_type:0x%04x"
+ " twt:%llu duration:%d mantissa:%d channel:%d",
+ LOCAL_PR_ARG, STA_PR_ARG, __entry->dialog_token,
+ __entry->control, le16_to_cpu(__entry->req_type),
+ le64_to_cpu(__entry->twt), __entry->duration,
+ le16_to_cpu(__entry->mantissa), __entry->channel
+ )
+);
+
+TRACE_EVENT(drv_twt_teardown_request,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sta *sta, u8 flowid),
+
+ TP_ARGS(local, sta, flowid),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ STA_ENTRY
+ __field(u8, flowid)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ STA_ASSIGN;
+ __entry->flowid = flowid;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT STA_PR_FMT " flowid:%d",
+ LOCAL_PR_ARG, STA_PR_ARG, __entry->flowid
+ )
+);
+
#endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8509778ff31f..2d1193ed3eb5 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3242,7 +3242,9 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
if (info->control.flags & IEEE80211_TX_CTRL_AMSDU)
return true;
- if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr)))
+ if (!ieee80211_amsdu_realloc_pad(local, skb,
+ sizeof(*amsdu_hdr) +
+ local->hw.extra_tx_headroom))
return false;
data = skb_push(skb, sizeof(*amsdu_hdr));
@@ -4782,11 +4784,11 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata,
struct beacon_data *beacon)
{
+ u8 *beacon_data, count, max_count = 1;
struct probe_resp *resp;
- u8 *beacon_data;
size_t beacon_data_len;
+ u16 *bcn_offsets;
int i;
- u8 count = beacon->cntdwn_current_counter;
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
@@ -4806,21 +4808,27 @@ static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata,
}
rcu_read_lock();
- for (i = 0; i < IEEE80211_MAX_CNTDWN_COUNTERS_NUM; ++i) {
- resp = rcu_dereference(sdata->u.ap.probe_resp);
+ resp = rcu_dereference(sdata->u.ap.probe_resp);
+
+ bcn_offsets = beacon->cntdwn_counter_offsets;
+ count = beacon->cntdwn_current_counter;
+ if (sdata->vif.csa_active)
+ max_count = IEEE80211_MAX_CNTDWN_COUNTERS_NUM;
- if (beacon->cntdwn_counter_offsets[i]) {
- if (WARN_ON_ONCE(beacon->cntdwn_counter_offsets[i] >=
- beacon_data_len)) {
+ for (i = 0; i < max_count; ++i) {
+ if (bcn_offsets[i]) {
+ if (WARN_ON_ONCE(bcn_offsets[i] >= beacon_data_len)) {
rcu_read_unlock();
return;
}
-
- beacon_data[beacon->cntdwn_counter_offsets[i]] = count;
+ beacon_data[bcn_offsets[i]] = count;
}
- if (sdata->vif.type == NL80211_IFTYPE_AP && resp)
- resp->data[resp->cntdwn_counter_offsets[i]] = count;
+ if (sdata->vif.type == NL80211_IFTYPE_AP && resp) {
+ u16 *resp_offsets = resp->cntdwn_counter_offsets;
+
+ resp->data[resp_offsets[i]] = count;
+ }
}
rcu_read_unlock();
}
@@ -5030,6 +5038,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
if (offs) {
offs->tim_offset = beacon->head_len;
offs->tim_length = skb->len - beacon->head_len;
+ offs->cntdwn_counter_offs[0] = beacon->cntdwn_counter_offsets[0];
/* for AP the csa offsets are from tail */
csa_off_base = skb->len;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 05e96212b104..49cb96d25169 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1336,6 +1336,18 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
elems->rsnx = pos;
elems->rsnx_len = elen;
break;
+ case WLAN_EID_TX_POWER_ENVELOPE:
+ if (elen < 1 ||
+ elen > sizeof(struct ieee80211_tx_pwr_env))
+ break;
+
+ if (elems->tx_pwr_env_num >= ARRAY_SIZE(elems->tx_pwr_env))
+ break;
+
+ elems->tx_pwr_env[elems->tx_pwr_env_num] = (void *)pos;
+ elems->tx_pwr_env_len[elems->tx_pwr_env_num] = elen;
+ elems->tx_pwr_env_num++;
+ break;
case WLAN_EID_EXTENSION:
ieee80211_parse_extension_element(calc_crc ?
&crc : NULL,
diff --git a/net/mctp/Kconfig b/net/mctp/Kconfig
new file mode 100644
index 000000000000..2cdf3d0a28c9
--- /dev/null
+++ b/net/mctp/Kconfig
@@ -0,0 +1,13 @@
+
+menuconfig MCTP
+ depends on NET
+ tristate "MCTP core protocol support"
+ help
+ Management Component Transport Protocol (MCTP) is an in-system
+ protocol for communicating between management controllers and
+ their managed devices (peripherals, host processors, etc.). The
+ protocol is defined by DMTF specification DSP0236.
+
+ This option enables core MCTP support. For communicating with other
+ devices, you'll want to enable a driver for a specific hardware
+ channel.
diff --git a/net/mctp/Makefile b/net/mctp/Makefile
new file mode 100644
index 000000000000..0171333384d7
--- /dev/null
+++ b/net/mctp/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_MCTP) += mctp.o
+mctp-objs := af_mctp.o device.o route.o neigh.o
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
new file mode 100644
index 000000000000..a9526ac29dff
--- /dev/null
+++ b/net/mctp/af_mctp.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP)
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/if_arp.h>
+#include <linux/net.h>
+#include <linux/mctp.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/sock.h>
+
+/* socket implementation */
+
+static int mctp_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+
+ if (sk) {
+ sock->sk = NULL;
+ sk->sk_prot->close(sk, 0);
+ }
+
+ return 0;
+}
+
+static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
+{
+ struct sock *sk = sock->sk;
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+ struct sockaddr_mctp *smctp;
+ int rc;
+
+ if (addrlen < sizeof(*smctp))
+ return -EINVAL;
+
+ if (addr->sa_family != AF_MCTP)
+ return -EAFNOSUPPORT;
+
+ if (!capable(CAP_NET_BIND_SERVICE))
+ return -EACCES;
+
+ /* it's a valid sockaddr for MCTP, cast and do protocol checks */
+ smctp = (struct sockaddr_mctp *)addr;
+
+ lock_sock(sk);
+
+ /* TODO: allow rebind */
+ if (sk_hashed(sk)) {
+ rc = -EADDRINUSE;
+ goto out_release;
+ }
+ msk->bind_net = smctp->smctp_network;
+ msk->bind_addr = smctp->smctp_addr.s_addr;
+ msk->bind_type = smctp->smctp_type & 0x7f; /* ignore the IC bit */
+
+ rc = sk->sk_prot->hash(sk);
+
+out_release:
+ release_sock(sk);
+
+ return rc;
+}
+
+static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+{
+ DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
+ const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr);
+ int rc, addrlen = msg->msg_namelen;
+ struct sock *sk = sock->sk;
+ struct mctp_skb_cb *cb;
+ struct mctp_route *rt;
+ struct sk_buff *skb;
+
+ if (addr) {
+ if (addrlen < sizeof(struct sockaddr_mctp))
+ return -EINVAL;
+ if (addr->smctp_family != AF_MCTP)
+ return -EINVAL;
+ if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER))
+ return -EINVAL;
+
+ } else {
+ /* TODO: connect()ed sockets */
+ return -EDESTADDRREQ;
+ }
+
+ if (!capable(CAP_NET_RAW))
+ return -EACCES;
+
+ if (addr->smctp_network == MCTP_NET_ANY)
+ addr->smctp_network = mctp_default_net(sock_net(sk));
+
+ rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
+ addr->smctp_addr.s_addr);
+ if (!rt)
+ return -EHOSTUNREACH;
+
+ skb = sock_alloc_send_skb(sk, hlen + 1 + len,
+ msg->msg_flags & MSG_DONTWAIT, &rc);
+ if (!skb)
+ return rc;
+
+ skb_reserve(skb, hlen);
+
+ /* set type as fist byte in payload */
+ *(u8 *)skb_put(skb, 1) = addr->smctp_type;
+
+ rc = memcpy_from_msg((void *)skb_put(skb, len), msg, len);
+ if (rc < 0) {
+ kfree_skb(skb);
+ return rc;
+ }
+
+ /* set up cb */
+ cb = __mctp_cb(skb);
+ cb->net = addr->smctp_network;
+
+ rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr,
+ addr->smctp_tag);
+
+ return rc ? : len;
+}
+
+static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
+{
+ DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
+ struct sock *sk = sock->sk;
+ struct sk_buff *skb;
+ size_t msglen;
+ u8 type;
+ int rc;
+
+ if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
+ return -EOPNOTSUPP;
+
+ skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &rc);
+ if (!skb)
+ return rc;
+
+ if (!skb->len) {
+ rc = 0;
+ goto out_free;
+ }
+
+ /* extract message type, remove from data */
+ type = *((u8 *)skb->data);
+ msglen = skb->len - 1;
+
+ if (len < msglen)
+ msg->msg_flags |= MSG_TRUNC;
+ else
+ len = msglen;
+
+ rc = skb_copy_datagram_msg(skb, 1, msg, len);
+ if (rc < 0)
+ goto out_free;
+
+ sock_recv_ts_and_drops(msg, sk, skb);
+
+ if (addr) {
+ struct mctp_skb_cb *cb = mctp_cb(skb);
+ /* TODO: expand mctp_skb_cb for header fields? */
+ struct mctp_hdr *hdr = mctp_hdr(skb);
+
+ addr = msg->msg_name;
+ addr->smctp_family = AF_MCTP;
+ addr->smctp_network = cb->net;
+ addr->smctp_addr.s_addr = hdr->src;
+ addr->smctp_type = type;
+ addr->smctp_tag = hdr->flags_seq_tag &
+ (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+ msg->msg_namelen = sizeof(*addr);
+ }
+
+ rc = len;
+
+ if (flags & MSG_TRUNC)
+ rc = msglen;
+
+out_free:
+ skb_free_datagram(sk, skb);
+ return rc;
+}
+
+static int mctp_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ return -EINVAL;
+}
+
+static int mctp_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ return -EINVAL;
+}
+
+static const struct proto_ops mctp_dgram_ops = {
+ .family = PF_MCTP,
+ .release = mctp_release,
+ .bind = mctp_bind,
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = sock_no_getname,
+ .poll = datagram_poll,
+ .ioctl = sock_no_ioctl,
+ .gettstamp = sock_gettstamp,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = mctp_setsockopt,
+ .getsockopt = mctp_getsockopt,
+ .sendmsg = mctp_sendmsg,
+ .recvmsg = mctp_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static int mctp_sk_init(struct sock *sk)
+{
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+
+ INIT_HLIST_HEAD(&msk->keys);
+ return 0;
+}
+
+static void mctp_sk_close(struct sock *sk, long timeout)
+{
+ sk_common_release(sk);
+}
+
+static int mctp_sk_hash(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+
+ mutex_lock(&net->mctp.bind_lock);
+ sk_add_node_rcu(sk, &net->mctp.binds);
+ mutex_unlock(&net->mctp.bind_lock);
+
+ return 0;
+}
+
+static void mctp_sk_unhash(struct sock *sk)
+{
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+ struct net *net = sock_net(sk);
+ struct mctp_sk_key *key;
+ struct hlist_node *tmp;
+ unsigned long flags;
+
+ /* remove from any type-based binds */
+ mutex_lock(&net->mctp.bind_lock);
+ sk_del_node_init_rcu(sk);
+ mutex_unlock(&net->mctp.bind_lock);
+
+ /* remove tag allocations */
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
+ hlist_del_rcu(&key->sklist);
+ hlist_del_rcu(&key->hlist);
+
+ spin_lock(&key->reasm_lock);
+ if (key->reasm_head)
+ kfree_skb(key->reasm_head);
+ key->reasm_head = NULL;
+ key->reasm_dead = true;
+ spin_unlock(&key->reasm_lock);
+
+ kfree_rcu(key, rcu);
+ }
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ synchronize_rcu();
+}
+
+static struct proto mctp_proto = {
+ .name = "MCTP",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct mctp_sock),
+ .init = mctp_sk_init,
+ .close = mctp_sk_close,
+ .hash = mctp_sk_hash,
+ .unhash = mctp_sk_unhash,
+};
+
+static int mctp_pf_create(struct net *net, struct socket *sock,
+ int protocol, int kern)
+{
+ const struct proto_ops *ops;
+ struct proto *proto;
+ struct sock *sk;
+ int rc;
+
+ if (protocol)
+ return -EPROTONOSUPPORT;
+
+ /* only datagram sockets are supported */
+ if (sock->type != SOCK_DGRAM)
+ return -ESOCKTNOSUPPORT;
+
+ proto = &mctp_proto;
+ ops = &mctp_dgram_ops;
+
+ sock->state = SS_UNCONNECTED;
+ sock->ops = ops;
+
+ sk = sk_alloc(net, PF_MCTP, GFP_KERNEL, proto, kern);
+ if (!sk)
+ return -ENOMEM;
+
+ sock_init_data(sock, sk);
+
+ rc = 0;
+ if (sk->sk_prot->init)
+ rc = sk->sk_prot->init(sk);
+
+ if (rc)
+ goto err_sk_put;
+
+ return 0;
+
+err_sk_put:
+ sock_orphan(sk);
+ sock_put(sk);
+ return rc;
+}
+
+static struct net_proto_family mctp_pf = {
+ .family = PF_MCTP,
+ .create = mctp_pf_create,
+ .owner = THIS_MODULE,
+};
+
+static __init int mctp_init(void)
+{
+ int rc;
+
+ /* ensure our uapi tag definitions match the header format */
+ BUILD_BUG_ON(MCTP_TAG_OWNER != MCTP_HDR_FLAG_TO);
+ BUILD_BUG_ON(MCTP_TAG_MASK != MCTP_HDR_TAG_MASK);
+
+ pr_info("mctp: management component transport protocol core\n");
+
+ rc = sock_register(&mctp_pf);
+ if (rc)
+ return rc;
+
+ rc = proto_register(&mctp_proto, 0);
+ if (rc)
+ goto err_unreg_sock;
+
+ rc = mctp_routes_init();
+ if (rc)
+ goto err_unreg_proto;
+
+ rc = mctp_neigh_init();
+ if (rc)
+ goto err_unreg_proto;
+
+ mctp_device_init();
+
+ return 0;
+
+err_unreg_proto:
+ proto_unregister(&mctp_proto);
+err_unreg_sock:
+ sock_unregister(PF_MCTP);
+
+ return rc;
+}
+
+static __exit void mctp_exit(void)
+{
+ mctp_device_exit();
+ mctp_neigh_exit();
+ mctp_routes_exit();
+ proto_unregister(&mctp_proto);
+ sock_unregister(PF_MCTP);
+}
+
+module_init(mctp_init);
+module_exit(mctp_exit);
+
+MODULE_DESCRIPTION("MCTP core");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jeremy Kerr <jk@codeconstruct.com.au>");
+
+MODULE_ALIAS_NETPROTO(PF_MCTP);
diff --git a/net/mctp/device.c b/net/mctp/device.c
new file mode 100644
index 000000000000..b9f38e765f61
--- /dev/null
+++ b/net/mctp/device.c
@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP) - device implementation.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/if_link.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
+
+#include <net/addrconf.h>
+#include <net/netlink.h>
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/sock.h>
+
+struct mctp_dump_cb {
+ int h;
+ int idx;
+ size_t a_idx;
+};
+
+/* unlocked: caller must hold rcu_read_lock */
+struct mctp_dev *__mctp_dev_get(const struct net_device *dev)
+{
+ return rcu_dereference(dev->mctp_ptr);
+}
+
+struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev)
+{
+ return rtnl_dereference(dev->mctp_ptr);
+}
+
+static void mctp_dev_destroy(struct mctp_dev *mdev)
+{
+ struct net_device *dev = mdev->dev;
+
+ dev_put(dev);
+ kfree_rcu(mdev, rcu);
+}
+
+static int mctp_fill_addrinfo(struct sk_buff *skb, struct netlink_callback *cb,
+ struct mctp_dev *mdev, mctp_eid_t eid)
+{
+ struct ifaddrmsg *hdr;
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RTM_NEWADDR, sizeof(*hdr), NLM_F_MULTI);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ hdr = nlmsg_data(nlh);
+ hdr->ifa_family = AF_MCTP;
+ hdr->ifa_prefixlen = 0;
+ hdr->ifa_flags = 0;
+ hdr->ifa_scope = 0;
+ hdr->ifa_index = mdev->dev->ifindex;
+
+ if (nla_put_u8(skb, IFA_LOCAL, eid))
+ goto cancel;
+
+ if (nla_put_u8(skb, IFA_ADDRESS, eid))
+ goto cancel;
+
+ nlmsg_end(skb, nlh);
+
+ return 0;
+
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int mctp_dump_dev_addrinfo(struct mctp_dev *mdev, struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct mctp_dump_cb *mcb = (void *)cb->ctx;
+ int rc = 0;
+
+ for (; mcb->a_idx < mdev->num_addrs; mcb->a_idx++) {
+ rc = mctp_fill_addrinfo(skb, cb, mdev, mdev->addrs[mcb->a_idx]);
+ if (rc < 0)
+ break;
+ }
+
+ return rc;
+}
+
+static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct mctp_dump_cb *mcb = (void *)cb->ctx;
+ struct net *net = sock_net(skb->sk);
+ struct hlist_head *head;
+ struct net_device *dev;
+ struct ifaddrmsg *hdr;
+ struct mctp_dev *mdev;
+ int ifindex;
+ int idx, rc;
+
+ hdr = nlmsg_data(cb->nlh);
+ // filter by ifindex if requested
+ ifindex = hdr->ifa_index;
+
+ rcu_read_lock();
+ for (; mcb->h < NETDEV_HASHENTRIES; mcb->h++, mcb->idx = 0) {
+ idx = 0;
+ head = &net->dev_index_head[mcb->h];
+ hlist_for_each_entry_rcu(dev, head, index_hlist) {
+ if (idx >= mcb->idx &&
+ (ifindex == 0 || ifindex == dev->ifindex)) {
+ mdev = __mctp_dev_get(dev);
+ if (mdev) {
+ rc = mctp_dump_dev_addrinfo(mdev,
+ skb, cb);
+ // Error indicates full buffer, this
+ // callback will get retried.
+ if (rc < 0)
+ goto out;
+ }
+ }
+ idx++;
+ // reset for next iteration
+ mcb->a_idx = 0;
+ }
+ }
+out:
+ rcu_read_unlock();
+ mcb->idx = idx;
+
+ return skb->len;
+}
+
+static const struct nla_policy ifa_mctp_policy[IFA_MAX + 1] = {
+ [IFA_ADDRESS] = { .type = NLA_U8 },
+ [IFA_LOCAL] = { .type = NLA_U8 },
+};
+
+static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[IFA_MAX + 1];
+ struct net_device *dev;
+ struct mctp_addr *addr;
+ struct mctp_dev *mdev;
+ struct ifaddrmsg *ifm;
+ unsigned long flags;
+ u8 *tmp_addrs;
+ int rc;
+
+ rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_mctp_policy,
+ extack);
+ if (rc < 0)
+ return rc;
+
+ ifm = nlmsg_data(nlh);
+
+ if (tb[IFA_LOCAL])
+ addr = nla_data(tb[IFA_LOCAL]);
+ else if (tb[IFA_ADDRESS])
+ addr = nla_data(tb[IFA_ADDRESS]);
+ else
+ return -EINVAL;
+
+ /* find device */
+ dev = __dev_get_by_index(net, ifm->ifa_index);
+ if (!dev)
+ return -ENODEV;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODEV;
+
+ if (!mctp_address_ok(addr->s_addr))
+ return -EINVAL;
+
+ /* Prevent duplicates. Under RTNL so don't need to lock for reading */
+ if (memchr(mdev->addrs, addr->s_addr, mdev->num_addrs))
+ return -EEXIST;
+
+ tmp_addrs = kmalloc(mdev->num_addrs + 1, GFP_KERNEL);
+ if (!tmp_addrs)
+ return -ENOMEM;
+ memcpy(tmp_addrs, mdev->addrs, mdev->num_addrs);
+ tmp_addrs[mdev->num_addrs] = addr->s_addr;
+
+ /* Lock to write */
+ spin_lock_irqsave(&mdev->addrs_lock, flags);
+ mdev->num_addrs++;
+ swap(mdev->addrs, tmp_addrs);
+ spin_unlock_irqrestore(&mdev->addrs_lock, flags);
+
+ kfree(tmp_addrs);
+
+ mctp_route_add_local(mdev, addr->s_addr);
+
+ return 0;
+}
+
+static int mctp_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[IFA_MAX + 1];
+ struct net_device *dev;
+ struct mctp_addr *addr;
+ struct mctp_dev *mdev;
+ struct ifaddrmsg *ifm;
+ unsigned long flags;
+ u8 *pos;
+ int rc;
+
+ rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_mctp_policy,
+ extack);
+ if (rc < 0)
+ return rc;
+
+ ifm = nlmsg_data(nlh);
+
+ if (tb[IFA_LOCAL])
+ addr = nla_data(tb[IFA_LOCAL]);
+ else if (tb[IFA_ADDRESS])
+ addr = nla_data(tb[IFA_ADDRESS]);
+ else
+ return -EINVAL;
+
+ /* find device */
+ dev = __dev_get_by_index(net, ifm->ifa_index);
+ if (!dev)
+ return -ENODEV;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODEV;
+
+ pos = memchr(mdev->addrs, addr->s_addr, mdev->num_addrs);
+ if (!pos)
+ return -ENOENT;
+
+ rc = mctp_route_remove_local(mdev, addr->s_addr);
+ // we can ignore -ENOENT in the case a route was already removed
+ if (rc < 0 && rc != -ENOENT)
+ return rc;
+
+ spin_lock_irqsave(&mdev->addrs_lock, flags);
+ memmove(pos, pos + 1, mdev->num_addrs - 1 - (pos - mdev->addrs));
+ mdev->num_addrs--;
+ spin_unlock_irqrestore(&mdev->addrs_lock, flags);
+
+ return 0;
+}
+
+static struct mctp_dev *mctp_add_dev(struct net_device *dev)
+{
+ struct mctp_dev *mdev;
+
+ ASSERT_RTNL();
+
+ mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
+ if (!mdev)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&mdev->addrs_lock);
+
+ mdev->net = mctp_default_net(dev_net(dev));
+
+ /* associate to net_device */
+ rcu_assign_pointer(dev->mctp_ptr, mdev);
+ dev_hold(dev);
+ mdev->dev = dev;
+
+ return mdev;
+}
+
+static int mctp_fill_link_af(struct sk_buff *skb,
+ const struct net_device *dev, u32 ext_filter_mask)
+{
+ struct mctp_dev *mdev;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODATA;
+ if (nla_put_u32(skb, IFLA_MCTP_NET, mdev->net))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static size_t mctp_get_link_af_size(const struct net_device *dev,
+ u32 ext_filter_mask)
+{
+ struct mctp_dev *mdev;
+ unsigned int ret;
+
+ /* caller holds RCU */
+ mdev = __mctp_dev_get(dev);
+ if (!mdev)
+ return 0;
+ ret = nla_total_size(4); /* IFLA_MCTP_NET */
+ return ret;
+}
+
+static const struct nla_policy ifla_af_mctp_policy[IFLA_MCTP_MAX + 1] = {
+ [IFLA_MCTP_NET] = { .type = NLA_U32 },
+};
+
+static int mctp_set_link_af(struct net_device *dev, const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_MCTP_MAX + 1];
+ struct mctp_dev *mdev;
+ int rc;
+
+ rc = nla_parse_nested(tb, IFLA_MCTP_MAX, attr, ifla_af_mctp_policy,
+ NULL);
+ if (rc)
+ return rc;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return 0;
+
+ if (tb[IFLA_MCTP_NET])
+ WRITE_ONCE(mdev->net, nla_get_u32(tb[IFLA_MCTP_NET]));
+
+ return 0;
+}
+
+static void mctp_unregister(struct net_device *dev)
+{
+ struct mctp_dev *mdev;
+
+ mdev = mctp_dev_get_rtnl(dev);
+
+ if (!mdev)
+ return;
+
+ RCU_INIT_POINTER(mdev->dev->mctp_ptr, NULL);
+
+ mctp_route_remove_dev(mdev);
+ mctp_neigh_remove_dev(mdev);
+ kfree(mdev->addrs);
+
+ mctp_dev_destroy(mdev);
+}
+
+static int mctp_register(struct net_device *dev)
+{
+ struct mctp_dev *mdev;
+
+ /* Already registered? */
+ if (rtnl_dereference(dev->mctp_ptr))
+ return 0;
+
+ /* only register specific types; MCTP-specific and loopback for now */
+ if (dev->type != ARPHRD_MCTP && dev->type != ARPHRD_LOOPBACK)
+ return 0;
+
+ mdev = mctp_add_dev(dev);
+ if (IS_ERR(mdev))
+ return PTR_ERR(mdev);
+
+ return 0;
+}
+
+static int mctp_dev_notify(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ int rc;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ rc = mctp_register(dev);
+ if (rc)
+ return notifier_from_errno(rc);
+ break;
+ case NETDEV_UNREGISTER:
+ mctp_unregister(dev);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct rtnl_af_ops mctp_af_ops = {
+ .family = AF_MCTP,
+ .fill_link_af = mctp_fill_link_af,
+ .get_link_af_size = mctp_get_link_af_size,
+ .set_link_af = mctp_set_link_af,
+};
+
+static struct notifier_block mctp_dev_nb = {
+ .notifier_call = mctp_dev_notify,
+ .priority = ADDRCONF_NOTIFY_PRIORITY,
+};
+
+void __init mctp_device_init(void)
+{
+ register_netdevice_notifier(&mctp_dev_nb);
+
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETADDR,
+ NULL, mctp_dump_addrinfo, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWADDR,
+ mctp_rtm_newaddr, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELADDR,
+ mctp_rtm_deladdr, NULL, 0);
+ rtnl_af_register(&mctp_af_ops);
+}
+
+void __exit mctp_device_exit(void)
+{
+ rtnl_af_unregister(&mctp_af_ops);
+ rtnl_unregister(PF_MCTP, RTM_DELADDR);
+ rtnl_unregister(PF_MCTP, RTM_NEWADDR);
+ rtnl_unregister(PF_MCTP, RTM_GETADDR);
+
+ unregister_netdevice_notifier(&mctp_dev_nb);
+}
diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c
new file mode 100644
index 000000000000..90ed2f02d1fb
--- /dev/null
+++ b/net/mctp/neigh.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP) - routing
+ * implementation.
+ *
+ * This is currently based on a simple routing table, with no dst cache. The
+ * number of routes should stay fairly small, so the lookup cost is small.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/idr.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+
+static int mctp_neigh_add(struct mctp_dev *mdev, mctp_eid_t eid,
+ enum mctp_neigh_source source,
+ size_t lladdr_len, const void *lladdr)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_neigh *neigh;
+ int rc;
+
+ mutex_lock(&net->mctp.neigh_lock);
+ if (mctp_neigh_lookup(mdev, eid, NULL) == 0) {
+ rc = -EEXIST;
+ goto out;
+ }
+
+ if (lladdr_len > sizeof(neigh->ha)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ neigh = kzalloc(sizeof(*neigh), GFP_KERNEL);
+ if (!neigh) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ INIT_LIST_HEAD(&neigh->list);
+ neigh->dev = mdev;
+ dev_hold(neigh->dev->dev);
+ neigh->eid = eid;
+ neigh->source = source;
+ memcpy(neigh->ha, lladdr, lladdr_len);
+
+ list_add_rcu(&neigh->list, &net->mctp.neighbours);
+ rc = 0;
+out:
+ mutex_unlock(&net->mctp.neigh_lock);
+ return rc;
+}
+
+static void __mctp_neigh_free(struct rcu_head *rcu)
+{
+ struct mctp_neigh *neigh = container_of(rcu, struct mctp_neigh, rcu);
+
+ dev_put(neigh->dev->dev);
+ kfree(neigh);
+}
+
+/* Removes all neighbour entries referring to a device */
+void mctp_neigh_remove_dev(struct mctp_dev *mdev)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_neigh *neigh, *tmp;
+
+ mutex_lock(&net->mctp.neigh_lock);
+ list_for_each_entry_safe(neigh, tmp, &net->mctp.neighbours, list) {
+ if (neigh->dev == mdev) {
+ list_del_rcu(&neigh->list);
+ /* TODO: immediate RTM_DELNEIGH */
+ call_rcu(&neigh->rcu, __mctp_neigh_free);
+ }
+ }
+
+ mutex_unlock(&net->mctp.neigh_lock);
+}
+
+// TODO: add a "source" flag so netlink can only delete static neighbours?
+static int mctp_neigh_remove(struct mctp_dev *mdev, mctp_eid_t eid)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_neigh *neigh, *tmp;
+ bool dropped = false;
+
+ mutex_lock(&net->mctp.neigh_lock);
+ list_for_each_entry_safe(neigh, tmp, &net->mctp.neighbours, list) {
+ if (neigh->dev == mdev && neigh->eid == eid) {
+ list_del_rcu(&neigh->list);
+ /* TODO: immediate RTM_DELNEIGH */
+ call_rcu(&neigh->rcu, __mctp_neigh_free);
+ dropped = true;
+ }
+ }
+
+ mutex_unlock(&net->mctp.neigh_lock);
+ return dropped ? 0 : -ENOENT;
+}
+
+static const struct nla_policy nd_mctp_policy[NDA_MAX + 1] = {
+ [NDA_DST] = { .type = NLA_U8 },
+ [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+};
+
+static int mctp_rtm_newneigh(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ struct mctp_dev *mdev;
+ struct ndmsg *ndm;
+ struct nlattr *tb[NDA_MAX + 1];
+ int rc;
+ mctp_eid_t eid;
+ void *lladdr;
+ int lladdr_len;
+
+ rc = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nd_mctp_policy,
+ extack);
+ if (rc < 0) {
+ NL_SET_ERR_MSG(extack, "lladdr too large?");
+ return rc;
+ }
+
+ if (!tb[NDA_DST]) {
+ NL_SET_ERR_MSG(extack, "Neighbour EID must be specified");
+ return -EINVAL;
+ }
+
+ if (!tb[NDA_LLADDR]) {
+ NL_SET_ERR_MSG(extack, "Neighbour lladdr must be specified");
+ return -EINVAL;
+ }
+
+ eid = nla_get_u8(tb[NDA_DST]);
+ if (!mctp_address_ok(eid)) {
+ NL_SET_ERR_MSG(extack, "Invalid neighbour EID");
+ return -EINVAL;
+ }
+
+ lladdr = nla_data(tb[NDA_LLADDR]);
+ lladdr_len = nla_len(tb[NDA_LLADDR]);
+
+ ndm = nlmsg_data(nlh);
+
+ dev = __dev_get_by_index(net, ndm->ndm_ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODEV;
+
+ if (lladdr_len != dev->addr_len) {
+ NL_SET_ERR_MSG(extack, "Wrong lladdr length");
+ return -EINVAL;
+ }
+
+ return mctp_neigh_add(mdev, eid, MCTP_NEIGH_STATIC,
+ lladdr_len, lladdr);
+}
+
+static int mctp_rtm_delneigh(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[NDA_MAX + 1];
+ struct net_device *dev;
+ struct mctp_dev *mdev;
+ struct ndmsg *ndm;
+ int rc;
+ mctp_eid_t eid;
+
+ rc = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nd_mctp_policy,
+ extack);
+ if (rc < 0) {
+ NL_SET_ERR_MSG(extack, "incorrect format");
+ return rc;
+ }
+
+ if (!tb[NDA_DST]) {
+ NL_SET_ERR_MSG(extack, "Neighbour EID must be specified");
+ return -EINVAL;
+ }
+ eid = nla_get_u8(tb[NDA_DST]);
+
+ ndm = nlmsg_data(nlh);
+ dev = __dev_get_by_index(net, ndm->ndm_ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODEV;
+
+ return mctp_neigh_remove(mdev, eid);
+}
+
+static int mctp_fill_neigh(struct sk_buff *skb, u32 portid, u32 seq, int event,
+ unsigned int flags, struct mctp_neigh *neigh)
+{
+ struct net_device *dev = neigh->dev->dev;
+ struct nlmsghdr *nlh;
+ struct ndmsg *hdr;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ hdr = nlmsg_data(nlh);
+ hdr->ndm_family = AF_MCTP;
+ hdr->ndm_ifindex = dev->ifindex;
+ hdr->ndm_state = 0; // TODO other state bits?
+ if (neigh->source == MCTP_NEIGH_STATIC)
+ hdr->ndm_state |= NUD_PERMANENT;
+ hdr->ndm_flags = 0;
+ hdr->ndm_type = RTN_UNICAST; // TODO: is loopback RTN_LOCAL?
+
+ if (nla_put_u8(skb, NDA_DST, neigh->eid))
+ goto cancel;
+
+ if (nla_put(skb, NDA_LLADDR, dev->addr_len, neigh->ha))
+ goto cancel;
+
+ nlmsg_end(skb, nlh);
+
+ return 0;
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int mctp_rtm_getneigh(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ int rc, idx, req_ifindex;
+ struct mctp_neigh *neigh;
+ struct ndmsg *ndmsg;
+ struct {
+ int idx;
+ } *cbctx = (void *)cb->ctx;
+
+ ndmsg = nlmsg_data(cb->nlh);
+ req_ifindex = ndmsg->ndm_ifindex;
+
+ idx = 0;
+ rcu_read_lock();
+ list_for_each_entry_rcu(neigh, &net->mctp.neighbours, list) {
+ if (idx < cbctx->idx)
+ goto cont;
+
+ rc = 0;
+ if (req_ifindex == 0 || req_ifindex == neigh->dev->dev->ifindex)
+ rc = mctp_fill_neigh(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH, NLM_F_MULTI, neigh);
+
+ if (rc)
+ break;
+cont:
+ idx++;
+ }
+ rcu_read_unlock();
+
+ cbctx->idx = idx;
+ return skb->len;
+}
+
+int mctp_neigh_lookup(struct mctp_dev *mdev, mctp_eid_t eid, void *ret_hwaddr)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_neigh *neigh;
+ int rc = -EHOSTUNREACH; // TODO: or ENOENT?
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(neigh, &net->mctp.neighbours, list) {
+ if (mdev == neigh->dev && eid == neigh->eid) {
+ if (ret_hwaddr)
+ memcpy(ret_hwaddr, neigh->ha,
+ sizeof(neigh->ha));
+ rc = 0;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return rc;
+}
+
+/* namespace registration */
+static int __net_init mctp_neigh_net_init(struct net *net)
+{
+ struct netns_mctp *ns = &net->mctp;
+
+ INIT_LIST_HEAD(&ns->neighbours);
+ mutex_init(&ns->neigh_lock);
+ return 0;
+}
+
+static void __net_exit mctp_neigh_net_exit(struct net *net)
+{
+ struct netns_mctp *ns = &net->mctp;
+ struct mctp_neigh *neigh;
+
+ list_for_each_entry(neigh, &ns->neighbours, list)
+ call_rcu(&neigh->rcu, __mctp_neigh_free);
+}
+
+/* net namespace implementation */
+
+static struct pernet_operations mctp_net_ops = {
+ .init = mctp_neigh_net_init,
+ .exit = mctp_neigh_net_exit,
+};
+
+int __init mctp_neigh_init(void)
+{
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWNEIGH,
+ mctp_rtm_newneigh, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELNEIGH,
+ mctp_rtm_delneigh, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETNEIGH,
+ NULL, mctp_rtm_getneigh, 0);
+
+ return register_pernet_subsys(&mctp_net_ops);
+}
+
+void __exit mctp_neigh_exit(void)
+{
+ unregister_pernet_subsys(&mctp_net_ops);
+ rtnl_unregister(PF_MCTP, RTM_GETNEIGH);
+ rtnl_unregister(PF_MCTP, RTM_DELNEIGH);
+ rtnl_unregister(PF_MCTP, RTM_NEWNEIGH);
+}
diff --git a/net/mctp/route.c b/net/mctp/route.c
new file mode 100644
index 000000000000..5265525011ad
--- /dev/null
+++ b/net/mctp/route.c
@@ -0,0 +1,1116 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP) - routing
+ * implementation.
+ *
+ * This is currently based on a simple routing table, with no dst cache. The
+ * number of routes should stay fairly small, so the lookup cost is small.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/idr.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+
+#include <uapi/linux/if_arp.h>
+
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+
+static const unsigned int mctp_message_maxlen = 64 * 1024;
+
+/* route output callbacks */
+static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
+{
+ kfree_skb(skb);
+ return 0;
+}
+
+static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
+{
+ struct mctp_skb_cb *cb = mctp_cb(skb);
+ struct mctp_hdr *mh;
+ struct sock *sk;
+ u8 type;
+
+ WARN_ON(!rcu_read_lock_held());
+
+ /* TODO: look up in skb->cb? */
+ mh = mctp_hdr(skb);
+
+ if (!skb_headlen(skb))
+ return NULL;
+
+ type = (*(u8 *)skb->data) & 0x7f;
+
+ sk_for_each_rcu(sk, &net->mctp.binds) {
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+
+ if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
+ continue;
+
+ if (msk->bind_type != type)
+ continue;
+
+ if (msk->bind_addr != MCTP_ADDR_ANY &&
+ msk->bind_addr != mh->dest)
+ continue;
+
+ return msk;
+ }
+
+ return NULL;
+}
+
+static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
+ mctp_eid_t peer, u8 tag)
+{
+ if (key->local_addr != local)
+ return false;
+
+ if (key->peer_addr != peer)
+ return false;
+
+ if (key->tag != tag)
+ return false;
+
+ return true;
+}
+
+static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
+ mctp_eid_t peer)
+{
+ struct mctp_sk_key *key, *ret;
+ struct mctp_hdr *mh;
+ u8 tag;
+
+ WARN_ON(!rcu_read_lock_held());
+
+ mh = mctp_hdr(skb);
+ tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+
+ ret = NULL;
+
+ hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) {
+ if (mctp_key_match(key, mh->dest, peer, tag)) {
+ ret = key;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
+ mctp_eid_t local, mctp_eid_t peer,
+ u8 tag, gfp_t gfp)
+{
+ struct mctp_sk_key *key;
+
+ key = kzalloc(sizeof(*key), gfp);
+ if (!key)
+ return NULL;
+
+ key->peer_addr = peer;
+ key->local_addr = local;
+ key->tag = tag;
+ key->sk = &msk->sk;
+ spin_lock_init(&key->reasm_lock);
+
+ return key;
+}
+
+static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct mctp_sk_key *tmp;
+ unsigned long flags;
+ int rc = 0;
+
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+
+ hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
+ if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
+ key->tag)) {
+ rc = -EEXIST;
+ break;
+ }
+ }
+
+ if (!rc) {
+ hlist_add_head(&key->hlist, &net->mctp.keys);
+ hlist_add_head(&key->sklist, &msk->keys);
+ }
+
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ return rc;
+}
+
+/* Must be called with key->reasm_lock, which it will release. Will schedule
+ * the key for an RCU free.
+ */
+static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
+ unsigned long flags)
+ __releases(&key->reasm_lock)
+{
+ struct sk_buff *skb;
+
+ skb = key->reasm_head;
+ key->reasm_head = NULL;
+ key->reasm_dead = true;
+ spin_unlock_irqrestore(&key->reasm_lock, flags);
+
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ hlist_del_rcu(&key->hlist);
+ hlist_del_rcu(&key->sklist);
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+ kfree_rcu(key, rcu);
+
+ if (skb)
+ kfree_skb(skb);
+}
+
+static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
+{
+ struct mctp_hdr *hdr = mctp_hdr(skb);
+ u8 exp_seq, this_seq;
+
+ this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT)
+ & MCTP_HDR_SEQ_MASK;
+
+ if (!key->reasm_head) {
+ key->reasm_head = skb;
+ key->reasm_tailp = &(skb_shinfo(skb)->frag_list);
+ key->last_seq = this_seq;
+ return 0;
+ }
+
+ exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK;
+
+ if (this_seq != exp_seq)
+ return -EINVAL;
+
+ if (key->reasm_head->len + skb->len > mctp_message_maxlen)
+ return -EINVAL;
+
+ skb->next = NULL;
+ skb->sk = NULL;
+ *key->reasm_tailp = skb;
+ key->reasm_tailp = &skb->next;
+
+ key->last_seq = this_seq;
+
+ key->reasm_head->data_len += skb->len;
+ key->reasm_head->len += skb->len;
+ key->reasm_head->truesize += skb->truesize;
+
+ return 0;
+}
+
+static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
+{
+ struct net *net = dev_net(skb->dev);
+ struct mctp_sk_key *key;
+ struct mctp_sock *msk;
+ struct mctp_hdr *mh;
+ unsigned long f;
+ u8 tag, flags;
+ int rc;
+
+ msk = NULL;
+ rc = -EINVAL;
+
+ /* we may be receiving a locally-routed packet; drop source sk
+ * accounting
+ */
+ skb_orphan(skb);
+
+ /* ensure we have enough data for a header and a type */
+ if (skb->len < sizeof(struct mctp_hdr) + 1)
+ goto out;
+
+ /* grab header, advance data ptr */
+ mh = mctp_hdr(skb);
+ skb_pull(skb, sizeof(struct mctp_hdr));
+
+ if (mh->ver != 1)
+ goto out;
+
+ flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM);
+ tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+
+ rcu_read_lock();
+
+ /* lookup socket / reasm context, exactly matching (src,dest,tag) */
+ key = mctp_lookup_key(net, skb, mh->src);
+
+ if (flags & MCTP_HDR_FLAG_SOM) {
+ if (key) {
+ msk = container_of(key->sk, struct mctp_sock, sk);
+ } else {
+ /* first response to a broadcast? do a more general
+ * key lookup to find the socket, but don't use this
+ * key for reassembly - we'll create a more specific
+ * one for future packets if required (ie, !EOM).
+ */
+ key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY);
+ if (key) {
+ msk = container_of(key->sk,
+ struct mctp_sock, sk);
+ key = NULL;
+ }
+ }
+
+ if (!key && !msk && (tag & MCTP_HDR_FLAG_TO))
+ msk = mctp_lookup_bind(net, skb);
+
+ if (!msk) {
+ rc = -ENOENT;
+ goto out_unlock;
+ }
+
+ /* single-packet message? deliver to socket, clean up any
+ * pending key.
+ */
+ if (flags & MCTP_HDR_FLAG_EOM) {
+ sock_queue_rcv_skb(&msk->sk, skb);
+ if (key) {
+ spin_lock_irqsave(&key->reasm_lock, f);
+ /* we've hit a pending reassembly; not much we
+ * can do but drop it
+ */
+ __mctp_key_unlock_drop(key, net, f);
+ }
+ rc = 0;
+ goto out_unlock;
+ }
+
+ /* broadcast response or a bind() - create a key for further
+ * packets for this message
+ */
+ if (!key) {
+ key = mctp_key_alloc(msk, mh->dest, mh->src,
+ tag, GFP_ATOMIC);
+ if (!key) {
+ rc = -ENOMEM;
+ goto out_unlock;
+ }
+
+ /* we can queue without the reasm lock here, as the
+ * key isn't observable yet
+ */
+ mctp_frag_queue(key, skb);
+
+ /* if the key_add fails, we've raced with another
+ * SOM packet with the same src, dest and tag. There's
+ * no way to distinguish future packets, so all we
+ * can do is drop; we'll free the skb on exit from
+ * this function.
+ */
+ rc = mctp_key_add(key, msk);
+ if (rc)
+ kfree(key);
+
+ } else {
+ /* existing key: start reassembly */
+ spin_lock_irqsave(&key->reasm_lock, f);
+
+ if (key->reasm_head || key->reasm_dead) {
+ /* duplicate start? drop everything */
+ __mctp_key_unlock_drop(key, net, f);
+ rc = -EEXIST;
+ } else {
+ rc = mctp_frag_queue(key, skb);
+ spin_unlock_irqrestore(&key->reasm_lock, f);
+ }
+ }
+
+ } else if (key) {
+ /* this packet continues a previous message; reassemble
+ * using the message-specific key
+ */
+
+ spin_lock_irqsave(&key->reasm_lock, f);
+
+ /* we need to be continuing an existing reassembly... */
+ if (!key->reasm_head)
+ rc = -EINVAL;
+ else
+ rc = mctp_frag_queue(key, skb);
+
+ /* end of message? deliver to socket, and we're done with
+ * the reassembly/response key
+ */
+ if (!rc && flags & MCTP_HDR_FLAG_EOM) {
+ sock_queue_rcv_skb(key->sk, key->reasm_head);
+ key->reasm_head = NULL;
+ __mctp_key_unlock_drop(key, net, f);
+ } else {
+ spin_unlock_irqrestore(&key->reasm_lock, f);
+ }
+
+ } else {
+ /* not a start, no matching key */
+ rc = -ENOENT;
+ }
+
+out_unlock:
+ rcu_read_unlock();
+out:
+ if (rc)
+ kfree_skb(skb);
+ return rc;
+}
+
+static unsigned int mctp_route_mtu(struct mctp_route *rt)
+{
+ return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu);
+}
+
+static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
+{
+ struct mctp_hdr *hdr = mctp_hdr(skb);
+ char daddr_buf[MAX_ADDR_LEN];
+ char *daddr = NULL;
+ unsigned int mtu;
+ int rc;
+
+ skb->protocol = htons(ETH_P_MCTP);
+
+ mtu = READ_ONCE(skb->dev->mtu);
+ if (skb->len > mtu) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ /* If lookup fails let the device handle daddr==NULL */
+ if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
+ daddr = daddr_buf;
+
+ rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
+ daddr, skb->dev->dev_addr, skb->len);
+ if (rc) {
+ kfree_skb(skb);
+ return -EHOSTUNREACH;
+ }
+
+ rc = dev_queue_xmit(skb);
+ if (rc)
+ rc = net_xmit_errno(rc);
+
+ return rc;
+}
+
+/* route alloc/release */
+static void mctp_route_release(struct mctp_route *rt)
+{
+ if (refcount_dec_and_test(&rt->refs)) {
+ dev_put(rt->dev->dev);
+ kfree_rcu(rt, rcu);
+ }
+}
+
+/* returns a route with the refcount at 1 */
+static struct mctp_route *mctp_route_alloc(void)
+{
+ struct mctp_route *rt;
+
+ rt = kzalloc(sizeof(*rt), GFP_KERNEL);
+ if (!rt)
+ return NULL;
+
+ INIT_LIST_HEAD(&rt->list);
+ refcount_set(&rt->refs, 1);
+ rt->output = mctp_route_discard;
+
+ return rt;
+}
+
+unsigned int mctp_default_net(struct net *net)
+{
+ return READ_ONCE(net->mctp.default_net);
+}
+
+int mctp_default_net_set(struct net *net, unsigned int index)
+{
+ if (index == 0)
+ return -EINVAL;
+ WRITE_ONCE(net->mctp.default_net, index);
+ return 0;
+}
+
+/* tag management */
+static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
+ struct mctp_sock *msk)
+{
+ struct netns_mctp *mns = &net->mctp;
+
+ lockdep_assert_held(&mns->keys_lock);
+
+ /* we hold the net->key_lock here, allowing updates to both
+ * then net and sk
+ */
+ hlist_add_head_rcu(&key->hlist, &mns->keys);
+ hlist_add_head_rcu(&key->sklist, &msk->keys);
+}
+
+/* Allocate a locally-owned tag value for (saddr, daddr), and reserve
+ * it for the socket msk
+ */
+static int mctp_alloc_local_tag(struct mctp_sock *msk,
+ mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct netns_mctp *mns = &net->mctp;
+ struct mctp_sk_key *key, *tmp;
+ unsigned long flags;
+ int rc = -EAGAIN;
+ u8 tagbits;
+
+ /* be optimistic, alloc now */
+ key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL);
+ if (!key)
+ return -ENOMEM;
+
+ /* 8 possible tag values */
+ tagbits = 0xff;
+
+ spin_lock_irqsave(&mns->keys_lock, flags);
+
+ /* Walk through the existing keys, looking for potential conflicting
+ * tags. If we find a conflict, clear that bit from tagbits
+ */
+ hlist_for_each_entry(tmp, &mns->keys, hlist) {
+ /* if we don't own the tag, it can't conflict */
+ if (tmp->tag & MCTP_HDR_FLAG_TO)
+ continue;
+
+ if ((tmp->peer_addr == daddr ||
+ tmp->peer_addr == MCTP_ADDR_ANY) &&
+ tmp->local_addr == saddr)
+ tagbits &= ~(1 << tmp->tag);
+
+ if (!tagbits)
+ break;
+ }
+
+ if (tagbits) {
+ key->tag = __ffs(tagbits);
+ mctp_reserve_tag(net, key, msk);
+ *tagp = key->tag;
+ rc = 0;
+ }
+
+ spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+ if (!tagbits)
+ kfree(key);
+
+ return rc;
+}
+
+/* routing lookups */
+static bool mctp_rt_match_eid(struct mctp_route *rt,
+ unsigned int net, mctp_eid_t eid)
+{
+ return READ_ONCE(rt->dev->net) == net &&
+ rt->min <= eid && rt->max >= eid;
+}
+
+/* compares match, used for duplicate prevention */
+static bool mctp_rt_compare_exact(struct mctp_route *rt1,
+ struct mctp_route *rt2)
+{
+ ASSERT_RTNL();
+ return rt1->dev->net == rt2->dev->net &&
+ rt1->min == rt2->min &&
+ rt1->max == rt2->max;
+}
+
+struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
+ mctp_eid_t daddr)
+{
+ struct mctp_route *tmp, *rt = NULL;
+
+ list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
+ /* TODO: add metrics */
+ if (mctp_rt_match_eid(tmp, dnet, daddr)) {
+ if (refcount_inc_not_zero(&tmp->refs)) {
+ rt = tmp;
+ break;
+ }
+ }
+ }
+
+ return rt;
+}
+
+/* sends a skb to rt and releases the route. */
+int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
+{
+ int rc;
+
+ rc = rt->output(rt, skb);
+ mctp_route_release(rt);
+ return rc;
+}
+
+static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
+ unsigned int mtu, u8 tag)
+{
+ const unsigned int hlen = sizeof(struct mctp_hdr);
+ struct mctp_hdr *hdr, *hdr2;
+ unsigned int pos, size;
+ struct sk_buff *skb2;
+ int rc;
+ u8 seq;
+
+ hdr = mctp_hdr(skb);
+ seq = 0;
+ rc = 0;
+
+ if (mtu < hlen + 1) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ /* we've got the header */
+ skb_pull(skb, hlen);
+
+ for (pos = 0; pos < skb->len;) {
+ /* size of message payload */
+ size = min(mtu - hlen, skb->len - pos);
+
+ skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL);
+ if (!skb2) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ /* generic skb copy */
+ skb2->protocol = skb->protocol;
+ skb2->priority = skb->priority;
+ skb2->dev = skb->dev;
+ memcpy(skb2->cb, skb->cb, sizeof(skb2->cb));
+
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
+
+ /* establish packet */
+ skb_reserve(skb2, MCTP_HEADER_MAXLEN);
+ skb_reset_network_header(skb2);
+ skb_put(skb2, hlen + size);
+ skb2->transport_header = skb2->network_header + hlen;
+
+ /* copy header fields, calculate SOM/EOM flags & seq */
+ hdr2 = mctp_hdr(skb2);
+ hdr2->ver = hdr->ver;
+ hdr2->dest = hdr->dest;
+ hdr2->src = hdr->src;
+ hdr2->flags_seq_tag = tag &
+ (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+
+ if (pos == 0)
+ hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
+
+ if (pos + size == skb->len)
+ hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
+
+ hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT;
+
+ /* copy message payload */
+ skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
+
+ /* do route, but don't drop the rt reference */
+ rc = rt->output(rt, skb2);
+ if (rc)
+ break;
+
+ seq = (seq + 1) & MCTP_HDR_SEQ_MASK;
+ pos += size;
+ }
+
+ mctp_route_release(rt);
+ consume_skb(skb);
+ return rc;
+}
+
+int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+ struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
+{
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+ struct mctp_skb_cb *cb = mctp_cb(skb);
+ struct mctp_hdr *hdr;
+ unsigned long flags;
+ unsigned int mtu;
+ mctp_eid_t saddr;
+ int rc;
+ u8 tag;
+
+ if (WARN_ON(!rt->dev))
+ return -EINVAL;
+
+ spin_lock_irqsave(&rt->dev->addrs_lock, flags);
+ if (rt->dev->num_addrs == 0) {
+ rc = -EHOSTUNREACH;
+ } else {
+ /* use the outbound interface's first address as our source */
+ saddr = rt->dev->addrs[0];
+ rc = 0;
+ }
+ spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
+
+ if (rc)
+ return rc;
+
+ if (req_tag & MCTP_HDR_FLAG_TO) {
+ rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
+ if (rc)
+ return rc;
+ tag |= MCTP_HDR_FLAG_TO;
+ } else {
+ tag = req_tag;
+ }
+
+
+ skb->protocol = htons(ETH_P_MCTP);
+ skb->priority = 0;
+ skb_reset_transport_header(skb);
+ skb_push(skb, sizeof(struct mctp_hdr));
+ skb_reset_network_header(skb);
+ skb->dev = rt->dev->dev;
+
+ /* cb->net will have been set on initial ingress */
+ cb->src = saddr;
+
+ /* set up common header fields */
+ hdr = mctp_hdr(skb);
+ hdr->ver = 1;
+ hdr->dest = daddr;
+ hdr->src = saddr;
+
+ mtu = mctp_route_mtu(rt);
+
+ if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
+ hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM |
+ tag;
+ return mctp_do_route(rt, skb);
+ } else {
+ return mctp_do_fragment_route(rt, skb, mtu, tag);
+ }
+}
+
+/* route management */
+static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+ unsigned int daddr_extent, unsigned int mtu,
+ unsigned char type)
+{
+ int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb);
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_route *rt, *ert;
+
+ if (!mctp_address_ok(daddr_start))
+ return -EINVAL;
+
+ if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
+ return -EINVAL;
+
+ switch (type) {
+ case RTN_LOCAL:
+ rtfn = mctp_route_input;
+ break;
+ case RTN_UNICAST:
+ rtfn = mctp_route_output;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ rt = mctp_route_alloc();
+ if (!rt)
+ return -ENOMEM;
+
+ rt->min = daddr_start;
+ rt->max = daddr_start + daddr_extent;
+ rt->mtu = mtu;
+ rt->dev = mdev;
+ dev_hold(rt->dev->dev);
+ rt->type = type;
+ rt->output = rtfn;
+
+ ASSERT_RTNL();
+ /* Prevent duplicate identical routes. */
+ list_for_each_entry(ert, &net->mctp.routes, list) {
+ if (mctp_rt_compare_exact(rt, ert)) {
+ mctp_route_release(rt);
+ return -EEXIST;
+ }
+ }
+
+ list_add_rcu(&rt->list, &net->mctp.routes);
+
+ return 0;
+}
+
+static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+ unsigned int daddr_extent)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_route *rt, *tmp;
+ mctp_eid_t daddr_end;
+ bool dropped;
+
+ if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
+ return -EINVAL;
+
+ daddr_end = daddr_start + daddr_extent;
+ dropped = false;
+
+ ASSERT_RTNL();
+
+ list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
+ if (rt->dev == mdev &&
+ rt->min == daddr_start && rt->max == daddr_end) {
+ list_del_rcu(&rt->list);
+ /* TODO: immediate RTM_DELROUTE */
+ mctp_route_release(rt);
+ dropped = true;
+ }
+ }
+
+ return dropped ? 0 : -ENOENT;
+}
+
+int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
+{
+ return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL);
+}
+
+int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
+{
+ return mctp_route_remove(mdev, addr, 0);
+}
+
+/* removes all entries for a given device */
+void mctp_route_remove_dev(struct mctp_dev *mdev)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_route *rt, *tmp;
+
+ ASSERT_RTNL();
+ list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
+ if (rt->dev == mdev) {
+ list_del_rcu(&rt->list);
+ /* TODO: immediate RTM_DELROUTE */
+ mctp_route_release(rt);
+ }
+ }
+}
+
+/* Incoming packet-handling */
+
+static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
+{
+ struct net *net = dev_net(dev);
+ struct mctp_skb_cb *cb;
+ struct mctp_route *rt;
+ struct mctp_hdr *mh;
+
+ /* basic non-data sanity checks */
+ if (dev->type != ARPHRD_MCTP)
+ goto err_drop;
+
+ if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
+ goto err_drop;
+
+ skb_reset_transport_header(skb);
+ skb_reset_network_header(skb);
+
+ /* We have enough for a header; decode and route */
+ mh = mctp_hdr(skb);
+ if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
+ goto err_drop;
+
+ cb = __mctp_cb(skb);
+ rcu_read_lock();
+ cb->net = READ_ONCE(__mctp_dev_get(dev)->net);
+ rcu_read_unlock();
+
+ rt = mctp_route_lookup(net, cb->net, mh->dest);
+ if (!rt)
+ goto err_drop;
+
+ mctp_do_route(rt, skb);
+
+ return NET_RX_SUCCESS;
+
+err_drop:
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+
+static struct packet_type mctp_packet_type = {
+ .type = cpu_to_be16(ETH_P_MCTP),
+ .func = mctp_pkttype_receive,
+};
+
+/* netlink interface */
+
+static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
+ [RTA_DST] = { .type = NLA_U8 },
+ [RTA_METRICS] = { .type = NLA_NESTED },
+ [RTA_OIF] = { .type = NLA_U32 },
+};
+
+/* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing.
+ * tb must hold RTA_MAX+1 elements.
+ */
+static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ struct nlattr **tb, struct rtmsg **rtm,
+ struct mctp_dev **mdev, mctp_eid_t *daddr_start)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ unsigned int ifindex;
+ int rc;
+
+ rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
+ rta_mctp_policy, extack);
+ if (rc < 0) {
+ NL_SET_ERR_MSG(extack, "incorrect format");
+ return rc;
+ }
+
+ if (!tb[RTA_DST]) {
+ NL_SET_ERR_MSG(extack, "dst EID missing");
+ return -EINVAL;
+ }
+ *daddr_start = nla_get_u8(tb[RTA_DST]);
+
+ if (!tb[RTA_OIF]) {
+ NL_SET_ERR_MSG(extack, "ifindex missing");
+ return -EINVAL;
+ }
+ ifindex = nla_get_u32(tb[RTA_OIF]);
+
+ *rtm = nlmsg_data(nlh);
+ if ((*rtm)->rtm_family != AF_MCTP) {
+ NL_SET_ERR_MSG(extack, "route family must be AF_MCTP");
+ return -EINVAL;
+ }
+
+ dev = __dev_get_by_index(net, ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "bad ifindex");
+ return -ENODEV;
+ }
+ *mdev = mctp_dev_get_rtnl(dev);
+ if (!*mdev)
+ return -ENODEV;
+
+ if (dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack, "no routes to loopback");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RTA_MAX + 1];
+ mctp_eid_t daddr_start;
+ struct mctp_dev *mdev;
+ struct rtmsg *rtm;
+ unsigned int mtu;
+ int rc;
+
+ rc = mctp_route_nlparse(skb, nlh, extack, tb,
+ &rtm, &mdev, &daddr_start);
+ if (rc < 0)
+ return rc;
+
+ if (rtm->rtm_type != RTN_UNICAST) {
+ NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
+ return -EINVAL;
+ }
+
+ /* TODO: parse mtu from nlparse */
+ mtu = 0;
+
+ if (rtm->rtm_type != RTN_UNICAST)
+ return -EINVAL;
+
+ rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu,
+ rtm->rtm_type);
+ return rc;
+}
+
+static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RTA_MAX + 1];
+ mctp_eid_t daddr_start;
+ struct mctp_dev *mdev;
+ struct rtmsg *rtm;
+ int rc;
+
+ rc = mctp_route_nlparse(skb, nlh, extack, tb,
+ &rtm, &mdev, &daddr_start);
+ if (rc < 0)
+ return rc;
+
+ /* we only have unicast routes */
+ if (rtm->rtm_type != RTN_UNICAST)
+ return -EINVAL;
+
+ rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len);
+ return rc;
+}
+
+static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
+ u32 portid, u32 seq, int event, unsigned int flags)
+{
+ struct nlmsghdr *nlh;
+ struct rtmsg *hdr;
+ void *metrics;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ hdr = nlmsg_data(nlh);
+ hdr->rtm_family = AF_MCTP;
+
+ /* we use the _len fields as a number of EIDs, rather than
+ * a number of bits in the address
+ */
+ hdr->rtm_dst_len = rt->max - rt->min;
+ hdr->rtm_src_len = 0;
+ hdr->rtm_tos = 0;
+ hdr->rtm_table = RT_TABLE_DEFAULT;
+ hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
+ hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */
+ hdr->rtm_type = rt->type;
+
+ if (nla_put_u8(skb, RTA_DST, rt->min))
+ goto cancel;
+
+ metrics = nla_nest_start_noflag(skb, RTA_METRICS);
+ if (!metrics)
+ goto cancel;
+
+ if (rt->mtu) {
+ if (nla_put_u32(skb, RTAX_MTU, rt->mtu))
+ goto cancel;
+ }
+
+ nla_nest_end(skb, metrics);
+
+ if (rt->dev) {
+ if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
+ goto cancel;
+ }
+
+ /* TODO: conditional neighbour physaddr? */
+
+ nlmsg_end(skb, nlh);
+
+ return 0;
+
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct mctp_route *rt;
+ int s_idx, idx;
+
+ /* TODO: allow filtering on route data, possibly under
+ * cb->strict_check
+ */
+
+ /* TODO: change to struct overlay */
+ s_idx = cb->args[0];
+ idx = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
+ if (idx++ < s_idx)
+ continue;
+ if (mctp_fill_rtinfo(skb, rt,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWROUTE, NLM_F_MULTI) < 0)
+ break;
+ }
+
+ rcu_read_unlock();
+ cb->args[0] = idx;
+
+ return skb->len;
+}
+
+/* net namespace implementation */
+static int __net_init mctp_routes_net_init(struct net *net)
+{
+ struct netns_mctp *ns = &net->mctp;
+
+ INIT_LIST_HEAD(&ns->routes);
+ INIT_HLIST_HEAD(&ns->binds);
+ mutex_init(&ns->bind_lock);
+ INIT_HLIST_HEAD(&ns->keys);
+ spin_lock_init(&ns->keys_lock);
+ WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET));
+ return 0;
+}
+
+static void __net_exit mctp_routes_net_exit(struct net *net)
+{
+ struct mctp_route *rt;
+
+ list_for_each_entry_rcu(rt, &net->mctp.routes, list)
+ mctp_route_release(rt);
+}
+
+static struct pernet_operations mctp_net_ops = {
+ .init = mctp_routes_net_init,
+ .exit = mctp_routes_net_exit,
+};
+
+int __init mctp_routes_init(void)
+{
+ dev_add_pack(&mctp_packet_type);
+
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE,
+ NULL, mctp_dump_rtinfo, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE,
+ mctp_newroute, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE,
+ mctp_delroute, NULL, 0);
+
+ return register_pernet_subsys(&mctp_net_ops);
+}
+
+void __exit mctp_routes_exit(void)
+{
+ unregister_pernet_subsys(&mctp_net_ops);
+ rtnl_unregister(PF_MCTP, RTM_DELROUTE);
+ rtnl_unregister(PF_MCTP, RTM_NEWROUTE);
+ rtnl_unregister(PF_MCTP, RTM_GETROUTE);
+ dev_remove_pack(&mctp_packet_type);
+}
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 05a21dd072df..ffeb2df8be7a 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -407,7 +407,6 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
/* Verify ttl is valid */
if (dec.ttl <= 1)
goto err;
- dec.ttl -= 1;
/* Find the output device */
out_dev = rcu_dereference(nh->nh_dev);
@@ -431,6 +430,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
skb->dev = out_dev;
skb->protocol = htons(ETH_P_MPLS_UC);
+ dec.ttl -= 1;
if (unlikely(!new_header_size && dec.bos)) {
/* Penultimate hop popping */
if (!mpls_egress(dev_net(out_dev), rt, skb, dec))
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 7d738bd06f2c..8b235468c88f 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -21,43 +21,50 @@ struct mptcp_pernet {
struct ctl_table_header *ctl_table_hdr;
#endif
- u8 mptcp_enabled;
unsigned int add_addr_timeout;
+ unsigned int stale_loss_cnt;
+ u8 mptcp_enabled;
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
};
-static struct mptcp_pernet *mptcp_get_pernet(struct net *net)
+static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
{
return net_generic(net, mptcp_pernet_id);
}
-int mptcp_is_enabled(struct net *net)
+int mptcp_is_enabled(const struct net *net)
{
return mptcp_get_pernet(net)->mptcp_enabled;
}
-unsigned int mptcp_get_add_addr_timeout(struct net *net)
+unsigned int mptcp_get_add_addr_timeout(const struct net *net)
{
return mptcp_get_pernet(net)->add_addr_timeout;
}
-int mptcp_is_checksum_enabled(struct net *net)
+int mptcp_is_checksum_enabled(const struct net *net)
{
return mptcp_get_pernet(net)->checksum_enabled;
}
-int mptcp_allow_join_id0(struct net *net)
+int mptcp_allow_join_id0(const struct net *net)
{
return mptcp_get_pernet(net)->allow_join_initial_addr_port;
}
+unsigned int mptcp_stale_loss_cnt(const struct net *net)
+{
+ return mptcp_get_pernet(net)->stale_loss_cnt;
+}
+
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX;
pernet->checksum_enabled = 0;
pernet->allow_join_initial_addr_port = 1;
+ pernet->stale_loss_cnt = 4;
}
#ifdef CONFIG_SYSCTL
@@ -95,6 +102,12 @@ static struct ctl_table mptcp_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE
},
+ {
+ .procname = "stale_loss_cnt",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ },
{}
};
@@ -114,6 +127,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[1].data = &pernet->add_addr_timeout;
table[2].data = &pernet->checksum_enabled;
table[3].data = &pernet->allow_join_initial_addr_port;
+ table[4].data = &pernet->stale_loss_cnt;
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr)
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index ff2cc0e3273d..b21ff9be04c6 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -44,7 +44,11 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
+ SNMP_MIB_ITEM("MPFailTx", MPTCP_MIB_MPFAILTX),
+ SNMP_MIB_ITEM("MPFailRx", MPTCP_MIB_MPFAILRX),
SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
+ SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE),
+ SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 0663cb12b448..ecd3d8b117e0 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -37,7 +37,11 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
+ MPTCP_MIB_MPFAILTX, /* Transmit a MP_FAIL */
+ MPTCP_MIB_MPFAILRX, /* Received a MP_FAIL */
MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */
+ MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */
+ MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 4452455aef7f..c41273cefc51 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -81,12 +81,11 @@ static void mptcp_parse_option(const struct sk_buff *skb,
* is if both hosts in their SYNs set A=0."
*/
if (flags & MPTCP_CAP_CHECKSUM_REQD)
- mp_opt->csum_reqd = 1;
+ mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
- if (flags & MPTCP_CAP_DENY_JOIN_ID0)
- mp_opt->deny_join_id0 = 1;
+ mp_opt->deny_join_id0 = !!(flags & MPTCP_CAP_DENY_JOIN_ID0);
- mp_opt->mp_capable = 1;
+ mp_opt->suboptions |= OPTIONS_MPTCP_MPC;
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
mp_opt->sndr_key = get_unaligned_be64(ptr);
ptr += 8;
@@ -101,7 +100,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
* equivalent to those in a DSS option and can be used
* interchangeably."
*/
- mp_opt->dss = 1;
+ mp_opt->suboptions |= OPTION_MPTCP_DSS;
mp_opt->use_map = 1;
mp_opt->mpc_map = 1;
mp_opt->data_len = get_unaligned_be16(ptr);
@@ -109,7 +108,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
}
if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
- mp_opt->csum_reqd = 1;
+ mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
ptr += 2;
}
pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u",
@@ -118,7 +117,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
break;
case MPTCPOPT_MP_JOIN:
- mp_opt->mp_join = 1;
+ mp_opt->suboptions |= OPTIONS_MPTCP_MPJ;
if (opsize == TCPOLEN_MPTCP_MPJ_SYN) {
mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
mp_opt->join_id = *ptr++;
@@ -144,7 +143,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
pr_debug("MP_JOIN hmac");
} else {
- mp_opt->mp_join = 0;
+ mp_opt->suboptions &= ~OPTIONS_MPTCP_MPJ;
}
break;
@@ -192,8 +191,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
break;
- mp_opt->dss = 1;
-
+ mp_opt->suboptions |= OPTION_MPTCP_DSS;
if (mp_opt->use_ack) {
if (mp_opt->ack64) {
mp_opt->data_ack = get_unaligned_be64(ptr);
@@ -222,14 +220,15 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr += 2;
if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
- mp_opt->csum_reqd = 1;
+ mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
ptr += 2;
}
pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
mp_opt->data_seq, mp_opt->subflow_seq,
- mp_opt->data_len, mp_opt->csum_reqd, mp_opt->csum);
+ mp_opt->data_len, !!(mp_opt->suboptions & OPTION_MPTCP_CSUMREQD),
+ mp_opt->csum);
}
break;
@@ -260,8 +259,10 @@ static void mptcp_parse_option(const struct sk_buff *skb,
break;
}
- mp_opt->add_addr = 1;
+ mp_opt->suboptions |= OPTION_MPTCP_ADD_ADDR;
mp_opt->addr.id = *ptr++;
+ mp_opt->addr.port = 0;
+ mp_opt->ahmac = 0;
if (mp_opt->addr.family == AF_INET) {
memcpy((u8 *)&mp_opt->addr.addr.s_addr, (u8 *)ptr, 4);
ptr += 4;
@@ -298,7 +299,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr++;
- mp_opt->rm_addr = 1;
+ mp_opt->suboptions |= OPTION_MPTCP_RM_ADDR;
mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE;
for (i = 0; i < mp_opt->rm_list.nr; i++)
mp_opt->rm_list.ids[i] = *ptr++;
@@ -309,7 +310,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (opsize != TCPOLEN_MPTCP_PRIO)
break;
- mp_opt->mp_prio = 1;
+ mp_opt->suboptions |= OPTION_MPTCP_PRIO;
mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP;
pr_debug("MP_PRIO: prio=%d", mp_opt->backup);
break;
@@ -321,7 +322,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr += 2;
mp_opt->rcvr_key = get_unaligned_be64(ptr);
ptr += 8;
- mp_opt->fastclose = 1;
+ mp_opt->suboptions |= OPTION_MPTCP_FASTCLOSE;
break;
case MPTCPOPT_RST:
@@ -330,12 +331,23 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
break;
- mp_opt->reset = 1;
+
+ mp_opt->suboptions |= OPTION_MPTCP_RST;
flags = *ptr++;
mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
mp_opt->reset_reason = *ptr;
break;
+ case MPTCPOPT_MP_FAIL:
+ if (opsize != TCPOLEN_MPTCP_FAIL)
+ break;
+
+ ptr += 2;
+ mp_opt->suboptions |= OPTION_MPTCP_FAIL;
+ mp_opt->fail_seq = get_unaligned_be64(ptr);
+ pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
+ break;
+
default:
break;
}
@@ -345,25 +357,12 @@ void mptcp_get_options(const struct sock *sk,
const struct sk_buff *skb,
struct mptcp_options_received *mp_opt)
{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- struct mptcp_sock *msk = mptcp_sk(subflow->conn);
const struct tcphdr *th = tcp_hdr(skb);
const unsigned char *ptr;
int length;
/* initialize option status */
- mp_opt->mp_capable = 0;
- mp_opt->mp_join = 0;
- mp_opt->add_addr = 0;
- mp_opt->ahmac = 0;
- mp_opt->fastclose = 0;
- mp_opt->addr.port = 0;
- mp_opt->rm_addr = 0;
- mp_opt->dss = 0;
- mp_opt->mp_prio = 0;
- mp_opt->reset = 0;
- mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
- mp_opt->deny_join_id0 = 0;
+ mp_opt->suboptions = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -592,6 +591,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
dss_size = map_size;
if (skb && snd_data_fin_enable)
mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
+ opts->suboptions = OPTION_MPTCP_DSS;
ret = true;
}
@@ -615,6 +615,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
opts->ext_copy.ack64 = 0;
}
opts->ext_copy.use_ack = 1;
+ opts->suboptions = OPTION_MPTCP_DSS;
WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));
/* Add kind/length/subtype/flag overhead if mapping is not populated */
@@ -667,29 +668,34 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
bool port;
int len;
- if ((mptcp_pm_should_add_signal_ipv6(msk) ||
- mptcp_pm_should_add_signal_port(msk) ||
- mptcp_pm_should_add_signal_echo(msk)) &&
- skb && skb_is_tcp_pure_ack(skb)) {
- pr_debug("drop other suboptions");
- opts->suboptions = 0;
- opts->ext_copy.use_ack = 0;
- opts->ext_copy.use_map = 0;
- remaining += opt_size;
- drop_other_suboptions = true;
- }
-
+ /* add addr will strip the existing options, be sure to avoid breaking
+ * MPC/MPJ handshakes
+ */
if (!mptcp_pm_should_add_signal(msk) ||
- !(mptcp_pm_add_addr_signal(msk, remaining, &opts->addr, &echo, &port)))
+ (opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) ||
+ !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &opts->addr,
+ &echo, &port, &drop_other_suboptions))
return false;
+ if (drop_other_suboptions)
+ remaining += opt_size;
len = mptcp_add_addr_len(opts->addr.family, echo, port);
if (remaining < len)
return false;
*size = len;
- if (drop_other_suboptions)
+ if (drop_other_suboptions) {
+ pr_debug("drop other suboptions");
+ opts->suboptions = 0;
+
+ /* note that e.g. DSS could have written into the memory
+ * aliased by ahmac, we must reset the field here
+ * to avoid appending the hmac even for ADD_ADDR echo
+ * options
+ */
+ opts->ahmac = 0;
*size -= opt_size;
+ }
opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
if (!echo) {
opts->ahmac = add_addr_generate_hmac(msk->local_key,
@@ -739,7 +745,12 @@ static bool mptcp_established_options_mp_prio(struct sock *sk,
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- if (!subflow->send_mp_prio)
+ /* can't send MP_PRIO with MPC, as they share the same option space:
+ * 'backup'. Also it makes no sense at all
+ */
+ if (!subflow->send_mp_prio ||
+ ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
+ OPTION_MPTCP_MPC_ACK) & opts->suboptions))
return false;
/* account for the trailing 'nop' option */
@@ -755,7 +766,7 @@ static bool mptcp_established_options_mp_prio(struct sock *sk,
return true;
}
-static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb,
+static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb,
unsigned int *size,
unsigned int remaining,
struct mptcp_out_options *opts)
@@ -763,12 +774,36 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
if (remaining < TCPOLEN_MPTCP_RST)
- return;
+ return false;
*size = TCPOLEN_MPTCP_RST;
opts->suboptions |= OPTION_MPTCP_RST;
opts->reset_transient = subflow->reset_transient;
opts->reset_reason = subflow->reset_reason;
+
+ return true;
+}
+
+static bool mptcp_established_options_mp_fail(struct sock *sk,
+ unsigned int *size,
+ unsigned int remaining,
+ struct mptcp_out_options *opts)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+ if (likely(!subflow->send_mp_fail))
+ return false;
+
+ if (remaining < TCPOLEN_MPTCP_FAIL)
+ return false;
+
+ *size = TCPOLEN_MPTCP_FAIL;
+ opts->suboptions |= OPTION_MPTCP_FAIL;
+ opts->fail_seq = subflow->map_seq;
+
+ pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
+
+ return true;
}
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
@@ -787,15 +822,28 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
return false;
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
- mptcp_established_options_rst(sk, skb, size, remaining, opts);
+ if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+ *size += opt_size;
+ remaining -= opt_size;
+ }
+ if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) {
+ *size += opt_size;
+ remaining -= opt_size;
+ }
return true;
}
snd_data_fin = mptcp_data_fin_enabled(msk);
if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
ret = true;
- else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts))
+ else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) {
ret = true;
+ if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+ *size += opt_size;
+ remaining -= opt_size;
+ return true;
+ }
+ }
/* we reserved enough space for the above options, and exceeding the
* TCP option space would be fatal
@@ -868,7 +916,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
*/
if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
- subflow->mp_join && mp_opt->mp_join &&
+ subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) &&
READ_ONCE(msk->pm.server_side))
tcp_send_ack(ssk);
goto fully_established;
@@ -885,25 +933,21 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
return subflow->mp_capable;
}
- if (mp_opt->dss && mp_opt->use_ack) {
+ if (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
+ ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo)) {
/* subflows are fully established as soon as we get any
- * additional ack.
+ * additional ack, including ADD_ADDR.
*/
subflow->fully_established = 1;
WRITE_ONCE(msk->fully_established, true);
goto fully_established;
}
- if (mp_opt->add_addr) {
- WRITE_ONCE(msk->fully_established, true);
- return true;
- }
-
/* If the first established packet does not contain MP_CAPABLE + data
* then fallback to TCP. Fallback scenarios requires a reset for
* MP_JOIN subflows.
*/
- if (!mp_opt->mp_capable) {
+ if (!(mp_opt->suboptions & OPTIONS_MPTCP_MPC)) {
if (subflow->mp_join)
goto reset;
subflow->mp_capable = 0;
@@ -975,9 +1019,11 @@ static void ack_update_msk(struct mptcp_sock *msk,
old_snd_una = msk->snd_una;
new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
- /* ACK for data not even sent yet? Ignore. */
- if (after64(new_snd_una, snd_nxt))
- new_snd_una = old_snd_una;
+ /* ACK for data not even sent yet and even above recovery bound? Ignore.*/
+ if (unlikely(after64(new_snd_una, snd_nxt))) {
+ if (!msk->recovery || after64(new_snd_una, msk->recovery_snd_nxt))
+ new_snd_una = old_snd_una;
+ }
new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
@@ -1065,48 +1111,51 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return sk->sk_state != TCP_CLOSE;
- if (mp_opt.fastclose &&
- msk->local_key == mp_opt.rcvr_key) {
- WRITE_ONCE(msk->rcv_fastclose, true);
- mptcp_schedule_work((struct sock *)msk);
- }
+ if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) {
+ if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) &&
+ msk->local_key == mp_opt.rcvr_key) {
+ WRITE_ONCE(msk->rcv_fastclose, true);
+ mptcp_schedule_work((struct sock *)msk);
+ }
- if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
- if (!mp_opt.echo) {
- mptcp_pm_add_addr_received(msk, &mp_opt.addr);
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
- } else {
- mptcp_pm_add_addr_echoed(msk, &mp_opt.addr);
- mptcp_pm_del_add_timer(msk, &mp_opt.addr, true);
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
+ if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) &&
+ add_addr_hmac_valid(msk, &mp_opt)) {
+ if (!mp_opt.echo) {
+ mptcp_pm_add_addr_received(msk, &mp_opt.addr);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
+ } else {
+ mptcp_pm_add_addr_echoed(msk, &mp_opt.addr);
+ mptcp_pm_del_add_timer(msk, &mp_opt.addr, true);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
+ }
+
+ if (mp_opt.addr.port)
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD);
}
- if (mp_opt.addr.port)
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD);
+ if (mp_opt.suboptions & OPTION_MPTCP_RM_ADDR)
+ mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list);
- mp_opt.add_addr = 0;
- }
+ if (mp_opt.suboptions & OPTION_MPTCP_PRIO) {
+ mptcp_pm_mp_prio_received(sk, mp_opt.backup);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX);
+ }
- if (mp_opt.rm_addr) {
- mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list);
- mp_opt.rm_addr = 0;
- }
+ if (mp_opt.suboptions & OPTION_MPTCP_FAIL) {
+ mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX);
+ }
- if (mp_opt.mp_prio) {
- mptcp_pm_mp_prio_received(sk, mp_opt.backup);
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX);
- mp_opt.mp_prio = 0;
- }
+ if (mp_opt.suboptions & OPTION_MPTCP_RST) {
+ subflow->reset_seen = 1;
+ subflow->reset_reason = mp_opt.reset_reason;
+ subflow->reset_transient = mp_opt.reset_transient;
+ }
- if (mp_opt.reset) {
- subflow->reset_seen = 1;
- subflow->reset_reason = mp_opt.reset_reason;
- subflow->reset_transient = mp_opt.reset_transient;
+ if (!(mp_opt.suboptions & OPTION_MPTCP_DSS))
+ return true;
}
- if (!mp_opt.dss)
- return true;
-
/* we can't wait for recvmsg() to update the ack_seq, otherwise
* monodirectional flows will stuck
*/
@@ -1133,7 +1182,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
memset(mpext, 0, sizeof(*mpext));
- if (mp_opt.use_map) {
+ if (likely(mp_opt.use_map)) {
if (mp_opt.mpc_map) {
/* this is an MP_CAPABLE carrying MPTCP data
* we know this map the first chunk of data
@@ -1153,7 +1202,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
}
mpext->data_len = mp_opt.data_len;
mpext->use_map = 1;
- mpext->csum_reqd = mp_opt.csum_reqd;
+ mpext->csum_reqd = !!(mp_opt.suboptions & OPTION_MPTCP_CSUMREQD);
if (mpext->csum_reqd)
mpext->csum = mp_opt.csum;
@@ -1200,8 +1249,88 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts)
{
- if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
- OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
+ if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) {
+ const struct sock *ssk = (const struct sock *)tp;
+ struct mptcp_subflow_context *subflow;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ subflow->send_mp_fail = 0;
+
+ *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
+ TCPOLEN_MPTCP_FAIL,
+ 0, 0);
+ put_unaligned_be64(opts->fail_seq, ptr);
+ ptr += 2;
+ }
+
+ /* RST is mutually exclusive with everything else */
+ if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) {
+ *ptr++ = mptcp_option(MPTCPOPT_RST,
+ TCPOLEN_MPTCP_RST,
+ opts->reset_transient,
+ opts->reset_reason);
+ return;
+ }
+
+ /* DSS, MPC, MPJ and ADD_ADDR are mutually exclusive, see
+ * mptcp_established_options*()
+ */
+ if (likely(OPTION_MPTCP_DSS & opts->suboptions)) {
+ struct mptcp_ext *mpext = &opts->ext_copy;
+ u8 len = TCPOLEN_MPTCP_DSS_BASE;
+ u8 flags = 0;
+
+ if (mpext->use_ack) {
+ flags = MPTCP_DSS_HAS_ACK;
+ if (mpext->ack64) {
+ len += TCPOLEN_MPTCP_DSS_ACK64;
+ flags |= MPTCP_DSS_ACK64;
+ } else {
+ len += TCPOLEN_MPTCP_DSS_ACK32;
+ }
+ }
+
+ if (mpext->use_map) {
+ len += TCPOLEN_MPTCP_DSS_MAP64;
+
+ /* Use only 64-bit mapping flags for now, add
+ * support for optional 32-bit mappings later.
+ */
+ flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
+ if (mpext->data_fin)
+ flags |= MPTCP_DSS_DATA_FIN;
+
+ if (opts->csum_reqd)
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
+ }
+
+ *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
+
+ if (mpext->use_ack) {
+ if (mpext->ack64) {
+ put_unaligned_be64(mpext->data_ack, ptr);
+ ptr += 2;
+ } else {
+ put_unaligned_be32(mpext->data_ack32, ptr);
+ ptr += 1;
+ }
+ }
+
+ if (mpext->use_map) {
+ put_unaligned_be64(mpext->data_seq, ptr);
+ ptr += 2;
+ put_unaligned_be32(mpext->subflow_seq, ptr);
+ ptr += 1;
+ if (opts->csum_reqd) {
+ put_unaligned_be32(mpext->data_len << 16 |
+ mptcp_make_csum(mpext), ptr);
+ } else {
+ put_unaligned_be32(mpext->data_len << 16 |
+ TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ }
+ }
+ } else if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
+ OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
u8 len, flag = MPTCP_CAP_HMAC_SHA256;
if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
@@ -1248,10 +1377,31 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
}
ptr += 1;
- }
-mp_capable_done:
- if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
+ /* MPC is additionally mutually exclusive with MP_PRIO */
+ goto mp_capable_done;
+ } else if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
+ *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+ TCPOLEN_MPTCP_MPJ_SYN,
+ opts->backup, opts->join_id);
+ put_unaligned_be32(opts->token, ptr);
+ ptr += 1;
+ put_unaligned_be32(opts->nonce, ptr);
+ ptr += 1;
+ } else if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
+ *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+ TCPOLEN_MPTCP_MPJ_SYNACK,
+ opts->backup, opts->join_id);
+ put_unaligned_be64(opts->thmac, ptr);
+ ptr += 2;
+ put_unaligned_be32(opts->nonce, ptr);
+ ptr += 1;
+ } else if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
+ *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+ TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
+ memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
+ ptr += 5;
+ } else if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
u8 echo = MPTCP_ADDR_ECHO;
@@ -1309,6 +1459,19 @@ mp_capable_done:
}
}
+ if (OPTION_MPTCP_PRIO & opts->suboptions) {
+ const struct sock *ssk = (const struct sock *)tp;
+ struct mptcp_subflow_context *subflow;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ subflow->send_mp_prio = 0;
+
+ *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
+ TCPOLEN_MPTCP_PRIO,
+ opts->backup, TCPOPT_NOP);
+ }
+
+mp_capable_done:
if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
u8 i = 1;
@@ -1329,107 +1492,6 @@ mp_capable_done:
}
}
- if (OPTION_MPTCP_PRIO & opts->suboptions) {
- const struct sock *ssk = (const struct sock *)tp;
- struct mptcp_subflow_context *subflow;
-
- subflow = mptcp_subflow_ctx(ssk);
- subflow->send_mp_prio = 0;
-
- *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
- TCPOLEN_MPTCP_PRIO,
- opts->backup, TCPOPT_NOP);
- }
-
- if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
- *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
- TCPOLEN_MPTCP_MPJ_SYN,
- opts->backup, opts->join_id);
- put_unaligned_be32(opts->token, ptr);
- ptr += 1;
- put_unaligned_be32(opts->nonce, ptr);
- ptr += 1;
- }
-
- if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
- *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
- TCPOLEN_MPTCP_MPJ_SYNACK,
- opts->backup, opts->join_id);
- put_unaligned_be64(opts->thmac, ptr);
- ptr += 2;
- put_unaligned_be32(opts->nonce, ptr);
- ptr += 1;
- }
-
- if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
- *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
- TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
- memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
- ptr += 5;
- }
-
- if (OPTION_MPTCP_RST & opts->suboptions)
- *ptr++ = mptcp_option(MPTCPOPT_RST,
- TCPOLEN_MPTCP_RST,
- opts->reset_transient,
- opts->reset_reason);
-
- if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
- struct mptcp_ext *mpext = &opts->ext_copy;
- u8 len = TCPOLEN_MPTCP_DSS_BASE;
- u8 flags = 0;
-
- if (mpext->use_ack) {
- flags = MPTCP_DSS_HAS_ACK;
- if (mpext->ack64) {
- len += TCPOLEN_MPTCP_DSS_ACK64;
- flags |= MPTCP_DSS_ACK64;
- } else {
- len += TCPOLEN_MPTCP_DSS_ACK32;
- }
- }
-
- if (mpext->use_map) {
- len += TCPOLEN_MPTCP_DSS_MAP64;
-
- /* Use only 64-bit mapping flags for now, add
- * support for optional 32-bit mappings later.
- */
- flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
- if (mpext->data_fin)
- flags |= MPTCP_DSS_DATA_FIN;
-
- if (opts->csum_reqd)
- len += TCPOLEN_MPTCP_DSS_CHECKSUM;
- }
-
- *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
-
- if (mpext->use_ack) {
- if (mpext->ack64) {
- put_unaligned_be64(mpext->data_ack, ptr);
- ptr += 2;
- } else {
- put_unaligned_be32(mpext->data_ack32, ptr);
- ptr += 1;
- }
- }
-
- if (mpext->use_map) {
- put_unaligned_be64(mpext->data_seq, ptr);
- ptr += 2;
- put_unaligned_be32(mpext->subflow_seq, ptr);
- ptr += 1;
- if (opts->csum_reqd) {
- put_unaligned_be32(mpext->data_len << 16 |
- mptcp_make_csum(mpext), ptr);
- } else {
- put_unaligned_be32(mpext->data_len << 16 |
- TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
- }
- }
- }
-
if (tp)
mptcp_set_rwin(tp);
}
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 639271e09604..6ab386ff3294 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -10,6 +10,8 @@
#include <net/mptcp.h>
#include "protocol.h"
+#include "mib.h"
+
/* path manager command handlers */
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
@@ -18,23 +20,23 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
{
u8 add_addr = READ_ONCE(msk->pm.addr_signal);
- pr_debug("msk=%p, local_id=%d", msk, addr->id);
+ pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo);
lockdep_assert_held(&msk->pm.lock);
- if (add_addr) {
- pr_warn("addr_signal error, add_addr=%d", add_addr);
+ if (add_addr &
+ (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) {
+ pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo);
return -EINVAL;
}
- msk->pm.local = *addr;
- add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL);
- if (echo)
+ if (echo) {
+ msk->pm.remote = *addr;
add_addr |= BIT(MPTCP_ADD_ADDR_ECHO);
- if (addr->family == AF_INET6)
- add_addr |= BIT(MPTCP_ADD_ADDR_IPV6);
- if (addr->port)
- add_addr |= BIT(MPTCP_ADD_ADDR_PORT);
+ } else {
+ msk->pm.local = *addr;
+ add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL);
+ }
WRITE_ONCE(msk->pm.addr_signal, add_addr);
return 0;
}
@@ -247,12 +249,21 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
}
+void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
+{
+ pr_debug("fail_seq=%llu", fail_seq);
+}
+
/* path manager helpers */
-bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
- struct mptcp_addr_info *saddr, bool *echo, bool *port)
+bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
+ unsigned int opt_size, unsigned int remaining,
+ struct mptcp_addr_info *addr, bool *echo,
+ bool *port, bool *drop_other_suboptions)
{
int ret = false;
+ u8 add_addr;
+ u8 family;
spin_lock_bh(&msk->pm.lock);
@@ -260,14 +271,30 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
if (!mptcp_pm_should_add_signal(msk))
goto out_unlock;
+ /* always drop every other options for pure ack ADD_ADDR; this is a
+ * plain dup-ack from TCP perspective. The other MPTCP-relevant info,
+ * if any, will be carried by the 'original' TCP ack
+ */
+ if (skb && skb_is_tcp_pure_ack(skb)) {
+ remaining += opt_size;
+ *drop_other_suboptions = true;
+ }
+
*echo = mptcp_pm_should_add_signal_echo(msk);
- *port = mptcp_pm_should_add_signal_port(msk);
+ *port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port);
- if (remaining < mptcp_add_addr_len(msk->pm.local.family, *echo, *port))
+ family = *echo ? msk->pm.remote.family : msk->pm.local.family;
+ if (remaining < mptcp_add_addr_len(family, *echo, *port))
goto out_unlock;
- *saddr = msk->pm.local;
- WRITE_ONCE(msk->pm.addr_signal, 0);
+ if (*echo) {
+ *addr = msk->pm.remote;
+ add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO);
+ } else {
+ *addr = msk->pm.local;
+ add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL);
+ }
+ WRITE_ONCE(msk->pm.addr_signal, add_addr);
ret = true;
out_unlock:
@@ -279,6 +306,7 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_rm_list *rm_list)
{
int ret = false, len;
+ u8 rm_addr;
spin_lock_bh(&msk->pm.lock);
@@ -286,16 +314,17 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
if (!mptcp_pm_should_rm_signal(msk))
goto out_unlock;
+ rm_addr = msk->pm.addr_signal & ~BIT(MPTCP_RM_ADDR_SIGNAL);
len = mptcp_rm_addr_len(&msk->pm.rm_list_tx);
if (len < 0) {
- WRITE_ONCE(msk->pm.addr_signal, 0);
+ WRITE_ONCE(msk->pm.addr_signal, rm_addr);
goto out_unlock;
}
if (remaining < len)
goto out_unlock;
*rm_list = msk->pm.rm_list_tx;
- WRITE_ONCE(msk->pm.addr_signal, 0);
+ WRITE_ONCE(msk->pm.addr_signal, rm_addr);
ret = true;
out_unlock:
@@ -308,6 +337,25 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
return mptcp_pm_nl_get_local_id(msk, skc);
}
+void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp);
+
+ /* keep track of rtx periods with no progress */
+ if (!subflow->stale_count) {
+ subflow->stale_rcv_tstamp = rcv_tstamp;
+ subflow->stale_count++;
+ } else if (subflow->stale_rcv_tstamp == rcv_tstamp) {
+ if (subflow->stale_count < U8_MAX)
+ subflow->stale_count++;
+ mptcp_pm_nl_subflow_chk_stale(msk, ssk);
+ } else {
+ subflow->stale_count = 0;
+ mptcp_subflow_set_active(subflow);
+ }
+}
+
void mptcp_pm_data_init(struct mptcp_sock *msk)
{
msk->pm.add_addr_signaled = 0;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 56263c2c4014..1e4289c507ff 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -46,6 +46,7 @@ struct pm_nl_pernet {
spinlock_t lock;
struct list_head local_addr_list;
unsigned int addrs;
+ unsigned int stale_loss_cnt;
unsigned int add_addr_signal_max;
unsigned int add_addr_accept_max;
unsigned int local_addr_max;
@@ -316,14 +317,14 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
if (!entry->addr.id)
return;
- if (mptcp_pm_should_add_signal(msk)) {
+ if (mptcp_pm_should_add_signal_addr(msk)) {
sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
goto out;
}
spin_lock_bh(&msk->pm.lock);
- if (!mptcp_pm_should_add_signal(msk)) {
+ if (!mptcp_pm_should_add_signal_addr(msk)) {
pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id);
mptcp_pm_announce_addr(msk, &entry->addr, false);
mptcp_pm_add_addr_send_ack(msk);
@@ -409,6 +410,55 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
}
}
+static bool lookup_address_in_vec(struct mptcp_addr_info *addrs, unsigned int nr,
+ struct mptcp_addr_info *addr)
+{
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ if (addresses_equal(&addrs[i], addr, addr->port))
+ return true;
+ }
+
+ return false;
+}
+
+/* Fill all the remote addresses into the array addrs[],
+ * and return the array size.
+ */
+static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh,
+ struct mptcp_addr_info *addrs)
+{
+ struct sock *sk = (struct sock *)msk, *ssk;
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_addr_info remote = { 0 };
+ unsigned int subflows_max;
+ int i = 0;
+
+ subflows_max = mptcp_pm_get_subflows_max(msk);
+
+ /* Non-fullmesh endpoint, fill in the single entry
+ * corresponding to the primary MPC subflow remote address
+ */
+ if (!fullmesh) {
+ remote_address((struct sock_common *)sk, &remote);
+ msk->pm.subflows++;
+ addrs[i++] = remote;
+ } else {
+ mptcp_for_each_subflow(msk, subflow) {
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ remote_address((struct sock_common *)ssk, &remote);
+ if (!lookup_address_in_vec(addrs, i, &remote) &&
+ msk->pm.subflows < subflows_max) {
+ msk->pm.subflows++;
+ addrs[i++] = remote;
+ }
+ }
+ }
+
+ return i;
+}
+
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
@@ -454,15 +504,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
!READ_ONCE(msk->pm.remote_deny_join_id0)) {
local = select_local_address(pernet, msk);
if (local) {
- struct mptcp_addr_info remote = { 0 };
+ bool fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
+ struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
+ int i, nr;
msk->pm.local_addr_used++;
- msk->pm.subflows++;
check_work_pending(msk);
- remote_address((struct sock_common *)sk, &remote);
+ nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
spin_unlock_bh(&msk->pm.lock);
- __mptcp_subflow_connect(sk, &local->addr, &remote,
- local->flags, local->ifindex);
+ for (i = 0; i < nr; i++)
+ __mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
spin_lock_bh(&msk->pm.lock);
return;
}
@@ -483,13 +534,67 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
mptcp_pm_create_subflow_or_signal_addr(msk);
}
+/* Fill all the local addresses into the array addrs[],
+ * and return the array size.
+ */
+static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addrs)
+{
+ struct sock *sk = (struct sock *)msk;
+ struct mptcp_pm_addr_entry *entry;
+ struct mptcp_addr_info local;
+ struct pm_nl_pernet *pernet;
+ unsigned int subflows_max;
+ int i = 0;
+
+ pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
+ subflows_max = mptcp_pm_get_subflows_max(msk);
+
+ rcu_read_lock();
+ __mptcp_flush_join_list(msk);
+ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
+ if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
+ continue;
+
+ if (entry->addr.family != sk->sk_family) {
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if ((entry->addr.family == AF_INET &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
+ (sk->sk_family == AF_INET &&
+ !ipv6_addr_v4mapped(&entry->addr.addr6)))
+#endif
+ continue;
+ }
+
+ if (msk->pm.subflows < subflows_max) {
+ msk->pm.subflows++;
+ addrs[i++] = entry->addr;
+ }
+ }
+ rcu_read_unlock();
+
+ /* If the array is empty, fill in the single
+ * 'IPADDRANY' local address
+ */
+ if (!i) {
+ memset(&local, 0, sizeof(local));
+ local.family = msk->pm.remote.family;
+
+ msk->pm.subflows++;
+ addrs[i++] = local;
+ }
+
+ return i;
+}
+
static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
{
+ struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
struct sock *sk = (struct sock *)msk;
unsigned int add_addr_accept_max;
struct mptcp_addr_info remote;
- struct mptcp_addr_info local;
unsigned int subflows_max;
+ int i, nr;
add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
@@ -501,23 +606,22 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote))
goto add_addr_echo;
- msk->pm.add_addr_accepted++;
- msk->pm.subflows++;
- if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
- msk->pm.subflows >= subflows_max)
- WRITE_ONCE(msk->pm.accept_addr, false);
-
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
remote = msk->pm.remote;
if (!remote.port)
remote.port = sk->sk_dport;
- memset(&local, 0, sizeof(local));
- local.family = remote.family;
+ nr = fill_local_addresses_vec(msk, addrs);
+
+ msk->pm.add_addr_accepted++;
+ if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
+ msk->pm.subflows >= subflows_max)
+ WRITE_ONCE(msk->pm.accept_addr, false);
spin_unlock_bh(&msk->pm.lock);
- __mptcp_subflow_connect(sk, &local, &remote, 0, 0);
+ for (i = 0; i < nr; i++)
+ __mptcp_subflow_connect(sk, &addrs[i], &remote);
spin_lock_bh(&msk->pm.lock);
add_addr_echo:
@@ -543,10 +647,8 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
bool slow;
spin_unlock_bh(&msk->pm.lock);
- pr_debug("send ack for %s%s%s",
- mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr",
- mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "",
- mptcp_pm_should_add_signal_port(msk) ? " [port]" : "");
+ pr_debug("send ack for %s",
+ mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr");
slow = lock_sock_fast(ssk);
tcp_send_ack(ssk);
@@ -899,6 +1001,43 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = {
[MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, },
};
+void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
+{
+ struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk);
+ struct sock *sk = (struct sock *)msk;
+ unsigned int active_max_loss_cnt;
+ struct net *net = sock_net(sk);
+ unsigned int stale_loss_cnt;
+ bool slow;
+
+ stale_loss_cnt = mptcp_stale_loss_cnt(net);
+ if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt)
+ return;
+
+ /* look for another available subflow not in loss state */
+ active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1);
+ mptcp_for_each_subflow(msk, iter) {
+ if (iter != subflow && mptcp_subflow_active(iter) &&
+ iter->stale_count < active_max_loss_cnt) {
+ /* we have some alternatives, try to mark this subflow as idle ...*/
+ slow = lock_sock_fast(ssk);
+ if (!tcp_rtx_and_write_queues_empty(ssk)) {
+ subflow->stale = 1;
+ __mptcp_retransmit_pending_data(sk);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_SUBFLOWSTALE);
+ }
+ unlock_sock_fast(ssk, slow);
+
+ /* always try to push the pending data regarless of re-injections:
+ * we can possibly use backup subflows now, and subflow selection
+ * is cheap under the msk socket lock
+ */
+ __mptcp_push_pending(sk, 0);
+ return;
+ }
+ }
+}
+
static int mptcp_pm_family_to_addr(int family)
{
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -1067,6 +1206,27 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
return NULL;
}
+int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
+ u8 *flags, int *ifindex)
+{
+ struct mptcp_pm_addr_entry *entry;
+
+ *flags = 0;
+ *ifindex = 0;
+
+ if (id) {
+ rcu_read_lock();
+ entry = __lookup_addr_by_id(net_generic(net, pm_nl_pernet_id), id);
+ if (entry) {
+ *flags = entry->flags;
+ *ifindex = entry->ifindex;
+ }
+ rcu_read_unlock();
+ }
+
+ return 0;
+}
+
static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
struct mptcp_addr_info *addr)
{
@@ -1135,36 +1295,12 @@ next:
return 0;
}
-struct addr_entry_release_work {
- struct rcu_work rwork;
- struct mptcp_pm_addr_entry *entry;
-};
-
-static void mptcp_pm_release_addr_entry(struct work_struct *work)
-{
- struct addr_entry_release_work *w;
- struct mptcp_pm_addr_entry *entry;
-
- w = container_of(to_rcu_work(work), struct addr_entry_release_work, rwork);
- entry = w->entry;
- if (entry) {
- if (entry->lsk)
- sock_release(entry->lsk);
- kfree(entry);
- }
- kfree(w);
-}
-
-static void mptcp_pm_free_addr_entry(struct mptcp_pm_addr_entry *entry)
+/* caller must ensure the RCU grace period is already elapsed */
+static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
{
- struct addr_entry_release_work *w;
-
- w = kmalloc(sizeof(*w), GFP_ATOMIC);
- if (w) {
- INIT_RCU_WORK(&w->rwork, mptcp_pm_release_addr_entry);
- w->entry = entry;
- queue_rcu_work(system_wq, &w->rwork);
- }
+ if (entry->lsk)
+ sock_release(entry->lsk);
+ kfree(entry);
}
static int mptcp_nl_remove_id_zero_address(struct net *net,
@@ -1244,7 +1380,8 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
spin_unlock_bh(&pernet->lock);
mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr);
- mptcp_pm_free_addr_entry(entry);
+ synchronize_rcu();
+ __mptcp_pm_release_addr_entry(entry);
return ret;
}
@@ -1297,6 +1434,7 @@ static void mptcp_nl_remove_addrs_list(struct net *net,
}
}
+/* caller must ensure the RCU grace period is already elapsed */
static void __flush_addrs(struct list_head *list)
{
while (!list_empty(list)) {
@@ -1305,7 +1443,7 @@ static void __flush_addrs(struct list_head *list)
cur = list_entry(list->next,
struct mptcp_pm_addr_entry, list);
list_del_rcu(&cur->list);
- mptcp_pm_free_addr_entry(cur);
+ __mptcp_pm_release_addr_entry(cur);
}
}
@@ -1329,6 +1467,7 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
spin_unlock_bh(&pernet->lock);
mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
+ synchronize_rcu();
__flush_addrs(&free_list);
return 0;
}
@@ -1922,6 +2061,7 @@ static int __net_init pm_nl_init_net(struct net *net)
INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
pernet->next_id = 1;
+ pernet->stale_loss_cnt = 4;
spin_lock_init(&pernet->lock);
/* No need to initialize other pernet fields, the struct is zeroed at
@@ -1939,7 +2079,8 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list)
struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
/* net is removed from namespace list, can't race with
- * other modifiers
+ * other modifiers, also netns core already waited for a
+ * RCU grace period.
*/
__flush_addrs(&pernet->local_addr_list);
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a88924947815..ade648c3512b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -411,16 +411,29 @@ static void mptcp_set_datafin_timeout(const struct sock *sk)
TCP_RTO_MIN << icsk->icsk_retransmits);
}
-static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
+static void __mptcp_set_timeout(struct sock *sk, long tout)
{
- long tout = ssk && inet_csk(ssk)->icsk_pending ?
- inet_csk(ssk)->icsk_timeout - jiffies : 0;
-
- if (tout <= 0)
- tout = mptcp_sk(sk)->timer_ival;
mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
}
+static long mptcp_timeout_from_subflow(const struct mptcp_subflow_context *subflow)
+{
+ const struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ return inet_csk(ssk)->icsk_pending && !subflow->stale_count ?
+ inet_csk(ssk)->icsk_timeout - jiffies : 0;
+}
+
+static void mptcp_set_timeout(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow;
+ long tout = 0;
+
+ mptcp_for_each_subflow(mptcp_sk(sk), subflow)
+ tout = max(tout, mptcp_timeout_from_subflow(subflow));
+ __mptcp_set_timeout(sk, tout);
+}
+
static bool tcp_can_send_ack(const struct sock *ssk)
{
return !((1 << inet_sk_state_load(ssk)) &
@@ -531,7 +544,6 @@ static bool mptcp_check_data_fin(struct sock *sk)
}
ret = true;
- mptcp_set_timeout(sk, NULL);
mptcp_send_ack(msk);
mptcp_close_wake_up(sk);
}
@@ -791,10 +803,7 @@ static void mptcp_reset_timer(struct sock *sk)
if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
return;
- /* should never be called with mptcp level timer cleared */
- tout = READ_ONCE(mptcp_sk(sk)->timer_ival);
- if (WARN_ON_ONCE(!tout))
- tout = TCP_RTO_MIN;
+ tout = mptcp_sk(sk)->timer_ival;
sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + tout);
}
@@ -1046,8 +1055,14 @@ static void __mptcp_clean_una(struct sock *sk)
if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
break;
- if (WARN_ON_ONCE(dfrag == msk->first_pending))
- break;
+ if (unlikely(dfrag == msk->first_pending)) {
+ /* in recovery mode can see ack after the current snd head */
+ if (WARN_ON_ONCE(!msk->recovery))
+ break;
+
+ WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+ }
+
dfrag_clear(sk, dfrag);
cleaned = true;
}
@@ -1056,8 +1071,14 @@ static void __mptcp_clean_una(struct sock *sk)
if (dfrag && after64(snd_una, dfrag->data_seq)) {
u64 delta = snd_una - dfrag->data_seq;
- if (WARN_ON_ONCE(delta > dfrag->already_sent))
- goto out;
+ /* prevent wrap around in recovery mode */
+ if (unlikely(delta > dfrag->already_sent)) {
+ if (WARN_ON_ONCE(!msk->recovery))
+ goto out;
+ if (WARN_ON_ONCE(delta > dfrag->data_len))
+ goto out;
+ dfrag->already_sent += delta - dfrag->already_sent;
+ }
dfrag->data_seq += delta;
dfrag->offset += delta;
@@ -1068,6 +1089,10 @@ static void __mptcp_clean_una(struct sock *sk)
cleaned = true;
}
+ /* all retransmitted data acked, recovery completed */
+ if (unlikely(msk->recovery) && after64(msk->snd_una, msk->recovery_snd_nxt))
+ msk->recovery = false;
+
out:
if (cleaned) {
if (tcp_under_memory_pressure(sk)) {
@@ -1076,8 +1101,8 @@ out:
}
}
- if (snd_una == READ_ONCE(msk->snd_nxt)) {
- if (msk->timer_ival && !mptcp_data_fin_enabled(msk))
+ if (snd_una == READ_ONCE(msk->snd_nxt) && !msk->recovery) {
+ if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
mptcp_stop_timer(sk);
} else {
mptcp_reset_timer(sk);
@@ -1366,16 +1391,44 @@ struct subflow_send_info {
u64 ratio;
};
+void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow)
+{
+ if (!subflow->stale)
+ return;
+
+ subflow->stale = 0;
+ MPTCP_INC_STATS(sock_net(mptcp_subflow_tcp_sock(subflow)), MPTCP_MIB_SUBFLOWRECOVER);
+}
+
+bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
+{
+ if (unlikely(subflow->stale)) {
+ u32 rcv_tstamp = READ_ONCE(tcp_sk(mptcp_subflow_tcp_sock(subflow))->rcv_tstamp);
+
+ if (subflow->stale_rcv_tstamp == rcv_tstamp)
+ return false;
+
+ mptcp_subflow_set_active(subflow);
+ }
+ return __mptcp_subflow_active(subflow);
+}
+
+/* implement the mptcp packet scheduler;
+ * returns the subflow that will transmit the next DSS
+ * additionally updates the rtx timeout
+ */
static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
{
struct subflow_send_info send_info[2];
struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
int i, nr_active = 0;
struct sock *ssk;
+ long tout = 0;
u64 ratio;
u32 pace;
- sock_owned_by_me((struct sock *)msk);
+ sock_owned_by_me(sk);
if (__mptcp_check_fallback(msk)) {
if (!msk->first)
@@ -1386,8 +1439,10 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
/* re-use last subflow, if the burst allow that */
if (msk->last_snd && msk->snd_burst > 0 &&
sk_stream_memory_free(msk->last_snd) &&
- mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd)))
+ mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) {
+ mptcp_set_timeout(sk);
return msk->last_snd;
+ }
/* pick the subflow with the lower wmem/wspace ratio */
for (i = 0; i < 2; ++i) {
@@ -1400,6 +1455,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
if (!mptcp_subflow_active(subflow))
continue;
+ tout = max(tout, mptcp_timeout_from_subflow(subflow));
nr_active += !subflow->backup;
if (!sk_stream_memory_free(subflow->tcp_sock) || !tcp_sk(ssk)->snd_wnd)
continue;
@@ -1415,6 +1471,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
send_info[subflow->backup].ratio = ratio;
}
}
+ __mptcp_set_timeout(sk, tout);
/* pick the best backup if no other subflow is active */
if (!nr_active)
@@ -1433,12 +1490,11 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
static void mptcp_push_release(struct sock *sk, struct sock *ssk,
struct mptcp_sendmsg_info *info)
{
- mptcp_set_timeout(sk, ssk);
tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal);
release_sock(ssk);
}
-static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+void __mptcp_push_pending(struct sock *sk, unsigned int flags)
{
struct sock *prev_ssk = NULL, *ssk = NULL;
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1459,15 +1515,19 @@ static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
mptcp_flush_join_list(msk);
ssk = mptcp_subflow_get_send(msk);
- /* try to keep the subflow socket lock across
- * consecutive xmit on the same socket
+ /* First check. If the ssk has changed since
+ * the last round, release prev_ssk
*/
if (ssk != prev_ssk && prev_ssk)
mptcp_push_release(sk, prev_ssk, &info);
if (!ssk)
goto out;
- if (ssk != prev_ssk || !prev_ssk)
+ /* Need to lock the new subflow only if different
+ * from the previous one, otherwise we are still
+ * helding the relevant lock
+ */
+ if (ssk != prev_ssk)
lock_sock(ssk);
/* keep it simple and always provide a new skb for the
@@ -1501,12 +1561,11 @@ static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
mptcp_push_release(sk, ssk, &info);
out:
- if (copied) {
- /* start the timer, if it's not pending */
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ /* ensure the rtx timer is running */
+ if (!mptcp_timer_pending(sk))
+ mptcp_reset_timer(sk);
+ if (copied)
__mptcp_check_send_data_fin(sk);
- }
}
static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
@@ -1567,7 +1626,6 @@ out:
*/
__mptcp_update_wmem(sk);
if (copied) {
- mptcp_set_timeout(sk, ssk);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
if (!mptcp_timer_pending(sk))
@@ -2083,10 +2141,11 @@ static void mptcp_timeout_timer(struct timer_list *t)
*
* A backup subflow is returned only if that is the only kind available.
*/
-static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
+static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
{
+ struct sock *backup = NULL, *pick = NULL;
struct mptcp_subflow_context *subflow;
- struct sock *backup = NULL;
+ int min_stale_count = INT_MAX;
sock_owned_by_me((const struct sock *)msk);
@@ -2096,14 +2155,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- if (!mptcp_subflow_active(subflow))
+ if (!__mptcp_subflow_active(subflow))
continue;
- /* still data outstanding at TCP level? Don't retransmit. */
- if (!tcp_write_queue_empty(ssk)) {
- if (inet_csk(ssk)->icsk_ca_state >= TCP_CA_Loss)
- continue;
- return NULL;
+ /* still data outstanding at TCP level? skip this */
+ if (!tcp_rtx_and_write_queues_empty(ssk)) {
+ mptcp_pm_subflow_chk_stale(msk, ssk);
+ min_stale_count = min_t(int, min_stale_count, subflow->stale_count);
+ continue;
}
if (subflow->backup) {
@@ -2112,10 +2171,15 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
continue;
}
- return ssk;
+ if (!pick)
+ pick = ssk;
}
- return backup;
+ if (pick)
+ return pick;
+
+ /* use backup only if there are no progresses anywhere */
+ return min_stale_count > 1 ? backup : NULL;
}
static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
@@ -2126,6 +2190,50 @@ static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
}
}
+bool __mptcp_retransmit_pending_data(struct sock *sk)
+{
+ struct mptcp_data_frag *cur, *rtx_head;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ if (__mptcp_check_fallback(mptcp_sk(sk)))
+ return false;
+
+ if (tcp_rtx_and_write_queues_empty(sk))
+ return false;
+
+ /* the closing socket has some data untransmitted and/or unacked:
+ * some data in the mptcp rtx queue has not really xmitted yet.
+ * keep it simple and re-inject the whole mptcp level rtx queue
+ */
+ mptcp_data_lock(sk);
+ __mptcp_clean_una_wakeup(sk);
+ rtx_head = mptcp_rtx_head(sk);
+ if (!rtx_head) {
+ mptcp_data_unlock(sk);
+ return false;
+ }
+
+ /* will accept ack for reijected data before re-sending them */
+ if (!msk->recovery || after64(msk->snd_nxt, msk->recovery_snd_nxt))
+ msk->recovery_snd_nxt = msk->snd_nxt;
+ msk->recovery = true;
+ mptcp_data_unlock(sk);
+
+ msk->first_pending = rtx_head;
+ msk->tx_pending_data += msk->snd_nxt - rtx_head->data_seq;
+ msk->snd_nxt = rtx_head->data_seq;
+ msk->snd_burst = 0;
+
+ /* be sure to clear the "sent status" on all re-injected fragments */
+ list_for_each_entry(cur, &msk->rtx_queue, list) {
+ if (!cur->already_sent)
+ break;
+ cur->already_sent = 0;
+ }
+
+ return true;
+}
+
/* subflow sockets can be either outgoing (connect) or incoming
* (accept).
*
@@ -2138,6 +2246,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ bool need_push;
list_del(&subflow->node);
@@ -2149,6 +2258,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (ssk->sk_socket)
sock_orphan(ssk);
+ need_push = __mptcp_retransmit_pending_data(sk);
subflow->disposable = 1;
/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
@@ -2176,6 +2286,9 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (msk->subflow && ssk == msk->subflow->sk)
mptcp_dispose_initial_subflow(msk);
+
+ if (need_push)
+ __mptcp_push_pending(sk, 0);
}
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
@@ -2313,7 +2426,6 @@ static void __mptcp_retrans(struct sock *sk)
info.size_goal);
}
- mptcp_set_timeout(sk, ssk);
release_sock(ssk);
reset_timer:
@@ -2384,10 +2496,12 @@ static int __mptcp_init_sock(struct sock *sk)
msk->wmem_reserved = 0;
WRITE_ONCE(msk->rmem_released, 0);
msk->tx_pending_data = 0;
+ msk->timer_ival = TCP_RTO_MIN;
msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
+ msk->recovery = false;
mptcp_pm_data_init(msk);
@@ -2472,7 +2586,6 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
tcp_shutdown(ssk, how);
} else {
pr_debug("Sending DATA_FIN on subflow %p", ssk);
- mptcp_set_timeout(sk, ssk);
tcp_send_ack(ssk);
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
@@ -2723,7 +2836,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->token = subflow_req->token;
msk->subflow = NULL;
WRITE_ONCE(msk->fully_established, false);
- if (mp_opt->csum_reqd)
+ if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
WRITE_ONCE(msk->csum_enabled, true);
msk->write_seq = subflow_req->idsn + 1;
@@ -2732,7 +2845,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
- if (mp_opt->mp_capable) {
+ if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
msk->can_ack = true;
msk->remote_key = mp_opt->sndr_key;
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0f0c026c5f8b..d7aba1c4dc48 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -26,6 +26,15 @@
#define OPTION_MPTCP_FASTCLOSE BIT(8)
#define OPTION_MPTCP_PRIO BIT(9)
#define OPTION_MPTCP_RST BIT(10)
+#define OPTION_MPTCP_DSS BIT(11)
+#define OPTION_MPTCP_FAIL BIT(12)
+
+#define OPTION_MPTCP_CSUMREQD BIT(13)
+
+#define OPTIONS_MPTCP_MPC (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | \
+ OPTION_MPTCP_MPC_ACK)
+#define OPTIONS_MPTCP_MPJ (OPTION_MPTCP_MPJ_SYN | OPTION_MPTCP_MPJ_SYNACK | \
+ OPTION_MPTCP_MPJ_SYNACK)
/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE 0
@@ -67,6 +76,7 @@
#define TCPOLEN_MPTCP_PRIO_ALIGN 4
#define TCPOLEN_MPTCP_FASTCLOSE 12
#define TCPOLEN_MPTCP_RST 4
+#define TCPOLEN_MPTCP_FAIL 12
#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
@@ -129,35 +139,28 @@ struct mptcp_options_received {
u32 subflow_seq;
u16 data_len;
__sum16 csum;
- u16 mp_capable : 1,
- mp_join : 1,
- fastclose : 1,
- reset : 1,
- dss : 1,
- add_addr : 1,
- rm_addr : 1,
- mp_prio : 1,
- echo : 1,
- csum_reqd : 1,
- backup : 1,
- deny_join_id0 : 1;
+ u16 suboptions;
u32 token;
u32 nonce;
- u64 thmac;
- u8 hmac[MPTCPOPT_HMAC_LEN];
- u8 join_id;
- u8 use_map:1,
+ u16 use_map:1,
dsn64:1,
data_fin:1,
use_ack:1,
ack64:1,
mpc_map:1,
+ reset_reason:4,
+ reset_transient:1,
+ echo:1,
+ backup:1,
+ deny_join_id0:1,
__unused:2;
+ u8 join_id;
+ u64 thmac;
+ u8 hmac[MPTCPOPT_HMAC_LEN];
struct mptcp_addr_info addr;
struct mptcp_rm_list rm_list;
u64 ahmac;
- u8 reset_reason:4;
- u8 reset_transient:1;
+ u64 fail_seq;
};
static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
@@ -178,8 +181,6 @@ enum mptcp_pm_status {
enum mptcp_addr_signal_status {
MPTCP_ADD_ADDR_SIGNAL,
MPTCP_ADD_ADDR_ECHO,
- MPTCP_ADD_ADDR_IPV6,
- MPTCP_ADD_ADDR_PORT,
MPTCP_RM_ADDR_SIGNAL,
};
@@ -230,12 +231,17 @@ struct mptcp_sock {
struct sock *last_snd;
int snd_burst;
int old_wspace;
+ u64 recovery_snd_nxt; /* in recovery mode accept up to this seq;
+ * recovery related fields are under data_lock
+ * protection
+ */
u64 snd_una;
u64 wnd_end;
unsigned long timer_ival;
u32 token;
int rmem_released;
unsigned long flags;
+ bool recovery; /* closing subflow write queue reinjected */
bool can_ack;
bool fully_established;
bool rcv_data_fin;
@@ -425,9 +431,11 @@ struct mptcp_subflow_context {
mpc_map : 1,
backup : 1,
send_mp_prio : 1,
+ send_mp_fail : 1,
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
- disposable : 1; /* ctx can be free at ulp release time */
+ disposable : 1, /* ctx can be free at ulp release time */
+ stale : 1; /* unable to snd/rcv data, do not use for xmit */
enum mptcp_data_avail data_avail;
u32 remote_nonce;
u64 thmac;
@@ -439,11 +447,13 @@ struct mptcp_subflow_context {
u8 reset_seen:1;
u8 reset_transient:1;
u8 reset_reason:4;
+ u8 stale_count;
long delegated_status;
struct list_head delegated_node; /* link into delegated_action, protected by local BH */
- u32 setsockopt_seq;
+ u32 setsockopt_seq;
+ u32 stale_rcv_tstamp;
struct sock *tcp_sock; /* tcp sk backpointer */
struct sock *conn; /* parent mptcp_sock */
@@ -549,12 +559,15 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su
clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
}
-int mptcp_is_enabled(struct net *net);
-unsigned int mptcp_get_add_addr_timeout(struct net *net);
-int mptcp_is_checksum_enabled(struct net *net);
-int mptcp_allow_join_id0(struct net *net);
+int mptcp_is_enabled(const struct net *net);
+unsigned int mptcp_get_add_addr_timeout(const struct net *net);
+int mptcp_is_checksum_enabled(const struct net *net);
+int mptcp_allow_join_id0(const struct net *net);
+unsigned int mptcp_stale_loss_cnt(const struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt);
+bool __mptcp_retransmit_pending_data(struct sock *sk);
+void __mptcp_push_pending(struct sock *sk, unsigned int flags);
bool mptcp_subflow_data_available(struct sock *sk);
void __init mptcp_subflow_init(void);
void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
@@ -566,14 +579,13 @@ struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
- const struct mptcp_addr_info *remote,
- u8 flags, int ifindex);
+ const struct mptcp_addr_info *remote);
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
unsigned short family);
-static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
+static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
{
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
@@ -585,6 +597,10 @@ static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT));
}
+void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow);
+
+bool mptcp_subflow_active(struct mptcp_subflow_context *subflow);
+
static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
struct mptcp_subflow_context *ctx)
{
@@ -596,6 +612,19 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
}
+static inline bool mptcp_has_another_subflow(struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ mptcp_for_each_subflow(msk, tmp) {
+ if (tmp != subflow)
+ return true;
+ }
+
+ return false;
+}
+
void __init mptcp_proto_init(void);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
int __init mptcp_proto_v6_init(void);
@@ -690,6 +719,8 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
+void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
+void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
@@ -708,6 +739,7 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct mptcp_addr_info *addr,
u8 bkup);
+void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry *
@@ -716,6 +748,8 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
struct mptcp_pm_add_entry *
mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
struct mptcp_addr_info *addr);
+int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
+ u8 *flags, int *ifindex);
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
@@ -730,22 +764,18 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{
- return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL);
+ return READ_ONCE(msk->pm.addr_signal) &
+ (BIT(MPTCP_ADD_ADDR_SIGNAL) | BIT(MPTCP_ADD_ADDR_ECHO));
}
-static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk)
-{
- return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO);
-}
-
-static inline bool mptcp_pm_should_add_signal_ipv6(struct mptcp_sock *msk)
+static inline bool mptcp_pm_should_add_signal_addr(struct mptcp_sock *msk)
{
- return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_IPV6);
+ return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL);
}
-static inline bool mptcp_pm_should_add_signal_port(struct mptcp_sock *msk)
+static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk)
{
- return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_PORT);
+ return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO);
}
static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk)
@@ -776,8 +806,10 @@ static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list)
return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1;
}
-bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
- struct mptcp_addr_info *saddr, bool *echo, bool *port);
+bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
+ unsigned int opt_size, unsigned int remaining,
+ struct mptcp_addr_info *addr, bool *echo,
+ bool *port, bool *drop_other_suboptions);
bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_rm_list *rm_list);
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 966f777d35ce..1de7ce883c37 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -141,6 +141,7 @@ static int subflow_check_req(struct request_sock *req,
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct mptcp_options_received mp_opt;
+ bool opt_mp_capable, opt_mp_join;
pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
@@ -154,16 +155,18 @@ static int subflow_check_req(struct request_sock *req,
mptcp_get_options(sk_listener, skb, &mp_opt);
- if (mp_opt.mp_capable) {
+ opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
+ opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
+ if (opt_mp_capable) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
- if (mp_opt.mp_join)
+ if (opt_mp_join)
return 0;
- } else if (mp_opt.mp_join) {
+ } else if (opt_mp_join) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
}
- if (mp_opt.mp_capable && listener->request_mptcp) {
+ if (opt_mp_capable && listener->request_mptcp) {
int err, retries = MPTCP_TOKEN_MAX_RETRIES;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
@@ -194,7 +197,7 @@ again:
else
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT);
- } else if (mp_opt.mp_join && listener->request_mptcp) {
+ } else if (opt_mp_join && listener->request_mptcp) {
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
subflow_req->mp_join = 1;
subflow_req->backup = mp_opt.backup;
@@ -243,15 +246,18 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct mptcp_options_received mp_opt;
+ bool opt_mp_capable, opt_mp_join;
int err;
subflow_init_req(req, sk_listener);
mptcp_get_options(sk_listener, skb, &mp_opt);
- if (mp_opt.mp_capable && mp_opt.mp_join)
+ opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
+ opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
+ if (opt_mp_capable && opt_mp_join)
return -EINVAL;
- if (mp_opt.mp_capable && listener->request_mptcp) {
+ if (opt_mp_capable && listener->request_mptcp) {
if (mp_opt.sndr_key == 0)
return -EINVAL;
@@ -262,7 +268,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
subflow_req->mp_capable = 1;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
- } else if (mp_opt.mp_join && listener->request_mptcp) {
+ } else if (opt_mp_join && listener->request_mptcp) {
if (!mptcp_token_join_cookie_init_state(subflow_req, skb))
return -EINVAL;
@@ -394,7 +400,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
-
/* be sure no special action on any packet other than syn-ack */
if (subflow->conn_finished)
return;
@@ -407,7 +412,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
mptcp_get_options(sk, skb, &mp_opt);
if (subflow->request_mptcp) {
- if (!mp_opt.mp_capable) {
+ if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
mptcp_do_fallback(sk);
@@ -415,7 +420,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
goto fallback;
}
- if (mp_opt.csum_reqd)
+ if (mp_opt.suboptions & OPTION_MPTCP_CSUMREQD)
WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
if (mp_opt.deny_join_id0)
WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true);
@@ -430,15 +435,17 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
- if (!mp_opt.mp_join) {
+ if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ)) {
subflow->reset_reason = MPTCP_RST_EMPTCP;
goto do_reset;
}
+ subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
- pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
- subflow->thmac, subflow->remote_nonce);
+ pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
+ subflow, subflow->thmac, subflow->remote_nonce,
+ subflow->backup);
if (!subflow_thmac_valid(subflow)) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
@@ -634,10 +641,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
- /* After child creation we must look for 'mp_capable' even when options
+ /* After child creation we must look for MPC even when options
* are not parsed
*/
- mp_opt.mp_capable = 0;
+ mp_opt.suboptions = 0;
/* hopefully temporary handling for MP_JOIN+syncookie */
subflow_req = mptcp_subflow_rsk(req);
@@ -657,7 +664,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* options.
*/
mptcp_get_options(sk, skb, &mp_opt);
- if (!mp_opt.mp_capable) {
+ if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
fallback = true;
goto create_child;
}
@@ -667,7 +674,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
fallback = true;
} else if (subflow_req->mp_join) {
mptcp_get_options(sk, skb, &mp_opt);
- if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
+ if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) ||
+ !subflow_hmac_valid(req, &mp_opt) ||
!mptcp_can_accept_new_subflow(subflow_req->msk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
fallback = true;
@@ -724,7 +732,7 @@ create_child:
/* with OoO packets we can reach here without ingress
* mpc option
*/
- if (mp_opt.mp_capable)
+ if (mp_opt.suboptions & OPTIONS_MPTCP_MPC)
mptcp_subflow_fully_established(ctx, &mp_opt);
} else if (ctx->mp_join) {
struct mptcp_sock *owner;
@@ -908,6 +916,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
if (unlikely(csum_fold(csum))) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
+ subflow->send_mp_fail = 1;
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
}
@@ -1155,6 +1165,20 @@ no_data:
fallback:
/* RFC 8684 section 3.7. */
+ if (subflow->send_mp_fail) {
+ if (mptcp_has_another_subflow(ssk)) {
+ while ((skb = skb_peek(&ssk->sk_receive_queue)))
+ sk_eat_skb(ssk, skb);
+ }
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ WRITE_ONCE(subflow->data_avail, 0);
+ return true;
+ }
+
if (subflow->mp_join || subflow->fully_established) {
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
@@ -1353,8 +1377,7 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
}
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
- const struct mptcp_addr_info *remote,
- u8 flags, int ifindex)
+ const struct mptcp_addr_info *remote)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_subflow_context *subflow;
@@ -1365,6 +1388,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
struct sock *ssk;
u32 remote_token;
int addrlen;
+ int ifindex;
+ u8 flags;
int err;
if (!mptcp_is_fully_established(sk))
@@ -1388,6 +1413,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
local_id = err;
}
+ mptcp_pm_get_flags_and_ifindex_by_id(sock_net(sk), local_id,
+ &flags, &ifindex);
subflow->remote_key = msk->remote_key;
subflow->local_key = msk->local_key;
subflow->token = msk->token;
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 049890e00a3d..aab20e575ecd 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -212,3 +212,6 @@ obj-$(CONFIG_IP_SET) += ipset/
# IPVS
obj-$(CONFIG_IP_VS) += ipvs/
+
+# lwtunnel
+obj-$(CONFIG_LWTUNNEL) += nf_hooks_lwtunnel.o
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index d1bef23fd4f5..dd30c03d5a23 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -132,8 +132,11 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
return ret;
- if (ip > ip_to)
+ if (ip > ip_to) {
+ if (ip_to == 0)
+ return -IPSET_ERR_HASH_ELEM;
swap(ip, ip_to);
+ }
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
@@ -144,6 +147,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
+ /* 64bit division is not allowed on 32bit */
+ if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried) {
ip = ntohl(h->next.ip);
e.ip = htonl(ip);
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 18346d18aa16..153de3457423 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -121,6 +121,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK]));
e.mark &= h->markmask;
+ if (e.mark == 0 && e.ip == 0)
+ return -IPSET_ERR_HASH_ELEM;
if (adt == IPSET_TEST ||
!(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) {
@@ -133,8 +135,11 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
return ret;
- if (ip > ip_to)
+ if (ip > ip_to) {
+ if (e.mark == 0 && ip_to == 0)
+ return -IPSET_ERR_HASH_ELEM;
swap(ip, ip_to);
+ }
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
@@ -143,6 +148,9 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip, ip_to, cidr);
}
+ if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index e1ca11196515..7303138e46be 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -173,6 +173,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
+ if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index ab179e064597..334fb1ad0e86 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -180,6 +180,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
+ if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 8f075b44cf64..7df94f437f60 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -253,6 +253,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
+ if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
ip2_to = ip2_from;
if (tb[IPSET_ATTR_IP2_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index c1a11f041ac6..1422739d9aa2 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -140,7 +140,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = { .cidr = HOST_MASK };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0;
+ u32 ip = 0, ip_to = 0, ipn, n = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -188,6 +188,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
+ n++;
+ } while (ipn++ < ip_to);
+
+ if (n > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
do {
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index ddd51c2e1cb3..9810f5bf63f5 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0;
+ u32 ip = 0, ip_to = 0, ipn, n = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -256,6 +256,14 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
+ n++;
+ } while (ipn++ < ip_to);
+
+ if (n > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 6532f0505e66..3d09eefe998a 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -168,7 +168,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0;
- u32 ip2 = 0, ip2_from = 0, ip2_to = 0;
+ u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn;
+ u64 n = 0, m = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -244,6 +245,19 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
+ n++;
+ } while (ipn++ < ip_to);
+ ipn = ip2_from;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
+ m++;
+ } while (ipn++ < ip2_to);
+
+ if (n*m > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index ec1564a1cb5a..09cf72eb37f8 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -158,7 +158,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 port, port_to, p = 0, ip = 0, ip_to = 0;
+ u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn;
+ u64 n = 0;
bool with_ports = false;
u8 cidr;
int ret;
@@ -235,6 +236,14 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr);
+ n++;
+ } while (ipn++ < ip_to);
+
+ if (n*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip);
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 0e91d1e82f1c..19bcdb3141f6 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -182,7 +182,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2;
+ u32 ip2_from = 0, ip2_to = 0, ip2, ipn;
+ u64 n = 0, m = 0;
bool with_ports = false;
int ret;
@@ -284,6 +285,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
+ n++;
+ } while (ipn++ < ip_to);
+ ipn = ip2_from;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
+ m++;
+ } while (ipn++ < ip2_to);
+
+ if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 5c03e5106751..d31dbccbe7bd 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -66,22 +66,17 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash);
struct conntrack_gc_work {
struct delayed_work dwork;
- u32 last_bucket;
+ u32 next_bucket;
bool exiting;
bool early_drop;
- long next_gc_run;
};
static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
-/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
-#define GC_MAX_BUCKETS_DIV 128u
-/* upper bound of full table scan */
-#define GC_MAX_SCAN_JIFFIES (16u * HZ)
-/* desired ratio of entries found to be expired */
-#define GC_EVICT_RATIO 50u
+#define GC_SCAN_INTERVAL (120u * HZ)
+#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
static struct conntrack_gc_work conntrack_gc_work;
@@ -1363,17 +1358,13 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
static void gc_worker(struct work_struct *work)
{
- unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
- unsigned int i, goal, buckets = 0, expired_count = 0;
- unsigned int nf_conntrack_max95 = 0;
+ unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
+ unsigned int i, hashsz, nf_conntrack_max95 = 0;
+ unsigned long next_run = GC_SCAN_INTERVAL;
struct conntrack_gc_work *gc_work;
- unsigned int ratio, scanned = 0;
- unsigned long next_run;
-
gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
- goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
- i = gc_work->last_bucket;
+ i = gc_work->next_bucket;
if (gc_work->early_drop)
nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
@@ -1381,15 +1372,15 @@ static void gc_worker(struct work_struct *work)
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
struct hlist_nulls_node *n;
- unsigned int hashsz;
struct nf_conn *tmp;
- i++;
rcu_read_lock();
nf_conntrack_get_ht(&ct_hash, &hashsz);
- if (i >= hashsz)
- i = 0;
+ if (i >= hashsz) {
+ rcu_read_unlock();
+ break;
+ }
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
struct nf_conntrack_net *cnet;
@@ -1397,7 +1388,6 @@ static void gc_worker(struct work_struct *work)
tmp = nf_ct_tuplehash_to_ctrack(h);
- scanned++;
if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
nf_ct_offload_timeout(tmp);
continue;
@@ -1405,7 +1395,6 @@ static void gc_worker(struct work_struct *work)
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
- expired_count++;
continue;
}
@@ -1438,7 +1427,14 @@ static void gc_worker(struct work_struct *work)
*/
rcu_read_unlock();
cond_resched();
- } while (++buckets < goal);
+ i++;
+
+ if (time_after(jiffies, end_time) && i < hashsz) {
+ gc_work->next_bucket = i;
+ next_run = 0;
+ break;
+ }
+ } while (i < hashsz);
if (gc_work->exiting)
return;
@@ -1449,40 +1445,17 @@ static void gc_worker(struct work_struct *work)
*
* This worker is only here to reap expired entries when system went
* idle after a busy period.
- *
- * The heuristics below are supposed to balance conflicting goals:
- *
- * 1. Minimize time until we notice a stale entry
- * 2. Maximize scan intervals to not waste cycles
- *
- * Normally, expire ratio will be close to 0.
- *
- * As soon as a sizeable fraction of the entries have expired
- * increase scan frequency.
*/
- ratio = scanned ? expired_count * 100 / scanned : 0;
- if (ratio > GC_EVICT_RATIO) {
- gc_work->next_gc_run = min_interval;
- } else {
- unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV;
-
- BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0);
-
- gc_work->next_gc_run += min_interval;
- if (gc_work->next_gc_run > max)
- gc_work->next_gc_run = max;
+ if (next_run) {
+ gc_work->early_drop = false;
+ gc_work->next_bucket = 0;
}
-
- next_run = gc_work->next_gc_run;
- gc_work->last_bucket = i;
- gc_work->early_drop = false;
queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
- gc_work->next_gc_run = HZ;
gc_work->exiting = false;
}
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 296e4a171bd1..41768ff19464 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -130,58 +130,77 @@ static void ecache_work(struct work_struct *work)
schedule_delayed_work(&cnet->ecache_dwork, delay);
}
-int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
- u32 portid, int report)
+static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
+ const unsigned int events,
+ const unsigned long missed,
+ const struct nf_ct_event *item)
{
- int ret = 0;
- struct net *net = nf_ct_net(ct);
+ struct nf_conn *ct = item->ct;
+ struct net *net = nf_ct_net(item->ct);
struct nf_ct_event_notifier *notify;
- struct nf_conntrack_ecache *e;
+ int ret;
+
+ if (!((events | missed) & e->ctmask))
+ return 0;
rcu_read_lock();
+
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
- if (!notify)
- goto out_unlock;
+ if (!notify) {
+ rcu_read_unlock();
+ return 0;
+ }
+
+ ret = notify->ct_event(events | missed, item);
+ rcu_read_unlock();
+
+ if (likely(ret >= 0 && missed == 0))
+ return 0;
+
+ spin_lock_bh(&ct->lock);
+ if (ret < 0)
+ e->missed |= events;
+ else
+ e->missed &= ~missed;
+ spin_unlock_bh(&ct->lock);
+
+ return ret;
+}
+
+int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
+ u32 portid, int report)
+{
+ struct nf_conntrack_ecache *e;
+ struct nf_ct_event item;
+ unsigned long missed;
+ int ret;
+
+ if (!nf_ct_is_confirmed(ct))
+ return 0;
e = nf_ct_ecache_find(ct);
if (!e)
- goto out_unlock;
+ return 0;
- if (nf_ct_is_confirmed(ct)) {
- struct nf_ct_event item = {
- .ct = ct,
- .portid = e->portid ? e->portid : portid,
- .report = report
- };
- /* This is a resent of a destroy event? If so, skip missed */
- unsigned long missed = e->portid ? 0 : e->missed;
-
- if (!((eventmask | missed) & e->ctmask))
- goto out_unlock;
-
- ret = notify->fcn(eventmask | missed, &item);
- if (unlikely(ret < 0 || missed)) {
- spin_lock_bh(&ct->lock);
- if (ret < 0) {
- /* This is a destroy event that has been
- * triggered by a process, we store the PORTID
- * to include it in the retransmission.
- */
- if (eventmask & (1 << IPCT_DESTROY)) {
- if (e->portid == 0 && portid != 0)
- e->portid = portid;
- e->state = NFCT_ECACHE_DESTROY_FAIL;
- } else {
- e->missed |= eventmask;
- }
- } else {
- e->missed &= ~missed;
- }
- spin_unlock_bh(&ct->lock);
- }
+ memset(&item, 0, sizeof(item));
+
+ item.ct = ct;
+ item.portid = e->portid ? e->portid : portid;
+ item.report = report;
+
+ /* This is a resent of a destroy event? If so, skip missed */
+ missed = e->portid ? 0 : e->missed;
+
+ ret = __nf_conntrack_eventmask_report(e, events, missed, &item);
+ if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) {
+ /* This is a destroy event that has been triggered by a process,
+ * we store the PORTID to include it in the retransmission.
+ */
+ if (e->portid == 0 && portid != 0)
+ e->portid = portid;
+ e->state = NFCT_ECACHE_DESTROY_FAIL;
}
-out_unlock:
- rcu_read_unlock();
+
return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
@@ -190,53 +209,28 @@ EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
* disabled softirqs */
void nf_ct_deliver_cached_events(struct nf_conn *ct)
{
- struct net *net = nf_ct_net(ct);
- unsigned long events, missed;
- struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e;
struct nf_ct_event item;
- int ret;
-
- rcu_read_lock();
- notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
- if (notify == NULL)
- goto out_unlock;
+ unsigned long events;
if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
- goto out_unlock;
+ return;
e = nf_ct_ecache_find(ct);
if (e == NULL)
- goto out_unlock;
+ return;
events = xchg(&e->cache, 0);
- /* We make a copy of the missed event cache without taking
- * the lock, thus we may send missed events twice. However,
- * this does not harm and it happens very rarely. */
- missed = e->missed;
-
- if (!((events | missed) & e->ctmask))
- goto out_unlock;
-
item.ct = ct;
item.portid = 0;
item.report = 0;
- ret = notify->fcn(events | missed, &item);
-
- if (likely(ret == 0 && !missed))
- goto out_unlock;
-
- spin_lock_bh(&ct->lock);
- if (ret < 0)
- e->missed |= events;
- else
- e->missed &= ~missed;
- spin_unlock_bh(&ct->lock);
-
-out_unlock:
- rcu_read_unlock();
+ /* We make a copy of the missed event cache without taking
+ * the lock, thus we may send missed events twice. However,
+ * this does not harm and it happens very rarely.
+ */
+ __nf_conntrack_eventmask_report(e, events, e->missed, &item);
}
EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
@@ -246,11 +240,11 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
{
struct net *net = nf_ct_exp_net(exp);
- struct nf_exp_event_notifier *notify;
+ struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e;
rcu_read_lock();
- notify = rcu_dereference(net->ct.nf_expect_event_cb);
+ notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (!notify)
goto out_unlock;
@@ -264,86 +258,35 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
.portid = portid,
.report = report
};
- notify->fcn(1 << event, &item);
+ notify->exp_event(1 << event, &item);
}
out_unlock:
rcu_read_unlock();
}
-int nf_conntrack_register_notifier(struct net *net,
- struct nf_ct_event_notifier *new)
+void nf_conntrack_register_notifier(struct net *net,
+ const struct nf_ct_event_notifier *new)
{
- int ret;
struct nf_ct_event_notifier *notify;
mutex_lock(&nf_ct_ecache_mutex);
notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex));
- if (notify != NULL) {
- ret = -EBUSY;
- goto out_unlock;
- }
+ WARN_ON_ONCE(notify);
rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
- ret = 0;
-
-out_unlock:
mutex_unlock(&nf_ct_ecache_mutex);
- return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
-void nf_conntrack_unregister_notifier(struct net *net,
- struct nf_ct_event_notifier *new)
+void nf_conntrack_unregister_notifier(struct net *net)
{
- struct nf_ct_event_notifier *notify;
-
mutex_lock(&nf_ct_ecache_mutex);
- notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
- lockdep_is_held(&nf_ct_ecache_mutex));
- BUG_ON(notify != new);
RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
mutex_unlock(&nf_ct_ecache_mutex);
- /* synchronize_rcu() is called from ctnetlink_exit. */
+ /* synchronize_rcu() is called after netns pre_exit */
}
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
-int nf_ct_expect_register_notifier(struct net *net,
- struct nf_exp_event_notifier *new)
-{
- int ret;
- struct nf_exp_event_notifier *notify;
-
- mutex_lock(&nf_ct_ecache_mutex);
- notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
- lockdep_is_held(&nf_ct_ecache_mutex));
- if (notify != NULL) {
- ret = -EBUSY;
- goto out_unlock;
- }
- rcu_assign_pointer(net->ct.nf_expect_event_cb, new);
- ret = 0;
-
-out_unlock:
- mutex_unlock(&nf_ct_ecache_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
-
-void nf_ct_expect_unregister_notifier(struct net *net,
- struct nf_exp_event_notifier *new)
-{
- struct nf_exp_event_notifier *notify;
-
- mutex_lock(&nf_ct_ecache_mutex);
- notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
- lockdep_is_held(&nf_ct_ecache_mutex));
- BUG_ON(notify != new);
- RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
- mutex_unlock(&nf_ct_ecache_mutex);
- /* synchronize_rcu() is called from ctnetlink_exit. */
-}
-EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
-
void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index e81af33b233b..5f9fc6b94855 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -706,7 +706,7 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct)
}
static int
-ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
+ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
{
const struct nf_conntrack_zone *zone;
struct net *net;
@@ -852,6 +852,11 @@ static int ctnetlink_done(struct netlink_callback *cb)
return 0;
}
+struct ctnetlink_filter_u32 {
+ u32 val;
+ u32 mask;
+};
+
struct ctnetlink_filter {
u8 family;
@@ -862,10 +867,8 @@ struct ctnetlink_filter {
struct nf_conntrack_tuple reply;
struct nf_conntrack_zone zone;
- struct {
- u_int32_t val;
- u_int32_t mask;
- } mark;
+ struct ctnetlink_filter_u32 mark;
+ struct ctnetlink_filter_u32 status;
};
static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = {
@@ -907,6 +910,46 @@ static int ctnetlink_parse_tuple_filter(const struct nlattr * const cda[],
struct nf_conntrack_zone *zone,
u_int32_t flags);
+static int ctnetlink_filter_parse_mark(struct ctnetlink_filter_u32 *mark,
+ const struct nlattr * const cda[])
+{
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ if (cda[CTA_MARK]) {
+ mark->val = ntohl(nla_get_be32(cda[CTA_MARK]));
+
+ if (cda[CTA_MARK_MASK])
+ mark->mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+ else
+ mark->mask = 0xffffffff;
+ } else if (cda[CTA_MARK_MASK]) {
+ return -EINVAL;
+ }
+#endif
+ return 0;
+}
+
+static int ctnetlink_filter_parse_status(struct ctnetlink_filter_u32 *status,
+ const struct nlattr * const cda[])
+{
+ if (cda[CTA_STATUS]) {
+ status->val = ntohl(nla_get_be32(cda[CTA_STATUS]));
+ if (cda[CTA_STATUS_MASK])
+ status->mask = ntohl(nla_get_be32(cda[CTA_STATUS_MASK]));
+ else
+ status->mask = status->val;
+
+ /* status->val == 0? always true, else always false. */
+ if (status->mask == 0)
+ return -EINVAL;
+ } else if (cda[CTA_STATUS_MASK]) {
+ return -EINVAL;
+ }
+
+ /* CTA_STATUS is NLA_U32, if this fires UAPI needs to be extended */
+ BUILD_BUG_ON(__IPS_MAX_BIT >= 32);
+ return 0;
+}
+
static struct ctnetlink_filter *
ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
{
@@ -924,18 +967,14 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
filter->family = family;
-#ifdef CONFIG_NF_CONNTRACK_MARK
- if (cda[CTA_MARK]) {
- filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
- if (cda[CTA_MARK_MASK])
- filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
- else
- filter->mark.mask = 0xffffffff;
- } else if (cda[CTA_MARK_MASK]) {
- err = -EINVAL;
+ err = ctnetlink_filter_parse_mark(&filter->mark, cda);
+ if (err)
goto err_filter;
- }
-#endif
+
+ err = ctnetlink_filter_parse_status(&filter->status, cda);
+ if (err)
+ goto err_filter;
+
if (!cda[CTA_FILTER])
return filter;
@@ -989,7 +1028,7 @@ err_filter:
static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda)
{
- return family || cda[CTA_MARK] || cda[CTA_FILTER];
+ return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS];
}
static int ctnetlink_start(struct netlink_callback *cb)
@@ -1082,6 +1121,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
{
struct ctnetlink_filter *filter = data;
struct nf_conntrack_tuple *tuple;
+ u32 status;
if (filter == NULL)
goto out;
@@ -1113,6 +1153,9 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
if ((ct->mark & filter->mark.mask) != filter->mark.val)
goto ignore_entry;
#endif
+ status = (u32)READ_ONCE(ct->status);
+ if ((status & filter->status.mask) != filter->status.val)
+ goto ignore_entry;
out:
return 1;
@@ -1495,6 +1538,7 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
[CTA_LABELS_MASK] = { .type = NLA_BINARY,
.len = NF_CT_LABELS_MAX_SIZE },
[CTA_FILTER] = { .type = NLA_NESTED },
+ [CTA_STATUS_MASK] = { .type = NLA_U32 },
};
static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
@@ -2625,6 +2669,8 @@ ctnetlink_glue_build_size(const struct nf_conn *ct)
+ nla_total_size(0) /* CTA_HELP */
+ nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
+ ctnetlink_secctx_size(ct)
+ + ctnetlink_acct_size(ct)
+ + ctnetlink_timestamp_size(ct)
#if IS_ENABLED(CONFIG_NF_NAT)
+ 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
+ 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
@@ -2682,6 +2728,10 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
if (ctnetlink_dump_protoinfo(skb, ct, false) < 0)
goto nla_put_failure;
+ if (ctnetlink_dump_acct(skb, ct, IPCTNL_MSG_CT_GET) < 0 ||
+ ctnetlink_dump_timestamp(skb, ct) < 0)
+ goto nla_put_failure;
+
if (ctnetlink_dump_helpinfo(skb, ct) < 0)
goto nla_put_failure;
@@ -3060,7 +3110,7 @@ nla_put_failure:
#ifdef CONFIG_NF_CONNTRACK_EVENTS
static int
-ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
+ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item)
{
struct nf_conntrack_expect *exp = item->exp;
struct net *net = nf_ct_exp_net(exp);
@@ -3711,11 +3761,8 @@ static int ctnetlink_stat_exp_cpu(struct sk_buff *skb,
#ifdef CONFIG_NF_CONNTRACK_EVENTS
static struct nf_ct_event_notifier ctnl_notifier = {
- .fcn = ctnetlink_conntrack_event,
-};
-
-static struct nf_exp_event_notifier ctnl_notifier_exp = {
- .fcn = ctnetlink_expect_event,
+ .ct_event = ctnetlink_conntrack_event,
+ .exp_event = ctnetlink_expect_event,
};
#endif
@@ -3808,52 +3855,21 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP);
static int __net_init ctnetlink_net_init(struct net *net)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
- int ret;
-
- ret = nf_conntrack_register_notifier(net, &ctnl_notifier);
- if (ret < 0) {
- pr_err("ctnetlink_init: cannot register notifier.\n");
- goto err_out;
- }
-
- ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp);
- if (ret < 0) {
- pr_err("ctnetlink_init: cannot expect register notifier.\n");
- goto err_unreg_notifier;
- }
+ nf_conntrack_register_notifier(net, &ctnl_notifier);
#endif
return 0;
-
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-err_unreg_notifier:
- nf_conntrack_unregister_notifier(net, &ctnl_notifier);
-err_out:
- return ret;
-#endif
}
-static void ctnetlink_net_exit(struct net *net)
+static void ctnetlink_net_pre_exit(struct net *net)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
- nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp);
- nf_conntrack_unregister_notifier(net, &ctnl_notifier);
+ nf_conntrack_unregister_notifier(net);
#endif
}
-static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list)
-{
- struct net *net;
-
- list_for_each_entry(net, net_exit_list, exit_list)
- ctnetlink_net_exit(net);
-
- /* wait for other cpus until they are done with ctnl_notifiers */
- synchronize_rcu();
-}
-
static struct pernet_operations ctnetlink_net_ops = {
.init = ctnetlink_net_init,
- .exit_batch = ctnetlink_net_exit_batch,
+ .pre_exit = ctnetlink_net_pre_exit,
};
static int __init ctnetlink_init(void)
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 3259416f2ea4..af5115e127cf 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1478,7 +1478,6 @@ void nf_conntrack_tcp_init_net(struct net *net)
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
tn->offload_timeout = 30 * HZ;
- tn->offload_pickup = 120 * HZ;
#endif
}
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 698fee49e732..f8e3c0d2602f 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -271,7 +271,6 @@ void nf_conntrack_udp_init_net(struct net *net)
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
un->offload_timeout = 30 * HZ;
- un->offload_pickup = 30 * HZ;
#endif
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 214d9f9e499b..7e0d956da51d 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -22,6 +22,9 @@
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_timestamp.h>
+#ifdef CONFIG_LWTUNNEL
+#include <net/netfilter/nf_hooks_lwtunnel.h>
+#endif
#include <linux/rculist_nulls.h>
static bool enable_hooks __read_mostly;
@@ -575,7 +578,6 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
- NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP,
#endif
NF_SYSCTL_CT_PROTO_TCP_LOOSE,
NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
@@ -585,7 +587,6 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
- NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP,
#endif
NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
@@ -614,6 +615,9 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE,
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
#endif
+#ifdef CONFIG_LWTUNNEL
+ NF_SYSCTL_CT_LWTUNNEL,
+#endif
__NF_SYSCTL_CT_LAST_SYSCTL,
};
@@ -776,12 +780,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = {
- .procname = "nf_flowtable_tcp_pickup",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
#endif
[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
.procname = "nf_conntrack_tcp_loose",
@@ -832,12 +830,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = {
- .procname = "nf_flowtable_udp_pickup",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
#endif
[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
.procname = "nf_conntrack_icmp_timeout",
@@ -973,6 +965,15 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.proc_handler = proc_dointvec_jiffies,
},
#endif
+#ifdef CONFIG_LWTUNNEL
+ [NF_SYSCTL_CT_LWTUNNEL] = {
+ .procname = "nf_hooks_lwtunnel",
+ .data = NULL,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = nf_hooks_lwtunnel_sysctl_handler,
+ },
+#endif
{}
};
@@ -1018,7 +1019,6 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
- table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = &tn->offload_pickup;
#endif
}
@@ -1111,7 +1111,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
- table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = &un->offload_pickup;
#endif
nf_conntrack_standalone_init_tcp_sysctl(net, table);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 551976e4284c..87a7388b6c89 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -99,7 +99,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
break;
case NFPROTO_IPV6:
- flow_tuple->mtu = ip6_dst_mtu_forward(dst);
+ flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
break;
}
@@ -180,27 +180,27 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
{
- const struct nf_conntrack_l4proto *l4proto;
struct net *net = nf_ct_net(ct);
int l4num = nf_ct_protonum(ct);
- unsigned int timeout;
-
- l4proto = nf_ct_l4proto_find(l4num);
- if (!l4proto)
- return;
+ s32 timeout;
if (l4num == IPPROTO_TCP) {
struct nf_tcp_net *tn = nf_tcp_pernet(net);
- timeout = tn->offload_pickup;
+ timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
+ timeout -= tn->offload_timeout;
} else if (l4num == IPPROTO_UDP) {
struct nf_udp_net *tn = nf_udp_pernet(net);
- timeout = tn->offload_pickup;
+ timeout = tn->timeouts[UDP_CT_REPLIED];
+ timeout -= tn->offload_timeout;
} else {
return;
}
+ if (timeout < 0)
+ timeout = 0;
+
if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
ct->timeout = nfct_time_stamp + timeout;
}
@@ -273,15 +273,10 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
unsigned long flow_offload_get_timeout(struct flow_offload *flow)
{
- const struct nf_conntrack_l4proto *l4proto;
unsigned long timeout = NF_FLOW_TIMEOUT;
struct net *net = nf_ct_net(flow->ct);
int l4num = nf_ct_protonum(flow->ct);
- l4proto = nf_ct_l4proto_find(l4num);
- if (!l4proto)
- return timeout;
-
if (l4num == IPPROTO_TCP) {
struct nf_tcp_net *tn = nf_tcp_pernet(net);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index f92006cec94c..d6bf1b2cd541 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -251,8 +251,7 @@ static int flow_offload_eth_src(struct net *net,
flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
&val, &mask);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
return 0;
}
@@ -1097,6 +1096,7 @@ static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
bo->command = cmd;
bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
bo->extack = extack;
+ bo->cb_list_head = &flowtable->flow_block.cb_list;
INIT_LIST_HEAD(&bo->cb_list);
}
diff --git a/net/netfilter/nf_hooks_lwtunnel.c b/net/netfilter/nf_hooks_lwtunnel.c
new file mode 100644
index 000000000000..00e89ffd78f6
--- /dev/null
+++ b/net/netfilter/nf_hooks_lwtunnel.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/sysctl.h>
+#include <net/lwtunnel.h>
+#include <net/netfilter/nf_hooks_lwtunnel.h>
+
+static inline int nf_hooks_lwtunnel_get(void)
+{
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return 1;
+ else
+ return 0;
+}
+
+static inline int nf_hooks_lwtunnel_set(int enable)
+{
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) {
+ if (!enable)
+ return -EBUSY;
+ } else if (enable) {
+ static_branch_enable(&nf_hooks_lwtunnel_enabled);
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_SYSCTL
+int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int proc_nf_hooks_lwtunnel_enabled = 0;
+ struct ctl_table tmp = {
+ .procname = table->procname,
+ .data = &proc_nf_hooks_lwtunnel_enabled,
+ .maxlen = sizeof(int),
+ .mode = table->mode,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+ int ret;
+
+ if (!write)
+ proc_nf_hooks_lwtunnel_enabled = nf_hooks_lwtunnel_get();
+
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+ if (write && ret == 0)
+ ret = nf_hooks_lwtunnel_set(proc_nf_hooks_lwtunnel_enabled);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler);
+#endif /* CONFIG_SYSCTL */
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index bbd1209694b8..6d12afabfe8a 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -21,6 +21,8 @@
#include "nf_internals.h"
+static const struct nf_queue_handler __rcu *nf_queue_handler;
+
/*
* Hook for nfnetlink_queue to register its queue handler.
* We do this so that most of the NFQUEUE code can be modular.
@@ -29,20 +31,18 @@
* receives, no matter what.
*/
-/* return EBUSY when somebody else is registered, return EEXIST if the
- * same handler is registered, return 0 in case of success. */
-void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh)
+void nf_register_queue_handler(const struct nf_queue_handler *qh)
{
/* should never happen, we only have one queueing backend in kernel */
- WARN_ON(rcu_access_pointer(net->nf.queue_handler));
- rcu_assign_pointer(net->nf.queue_handler, qh);
+ WARN_ON(rcu_access_pointer(nf_queue_handler));
+ rcu_assign_pointer(nf_queue_handler, qh);
}
EXPORT_SYMBOL(nf_register_queue_handler);
/* The caller must flush their queue before this */
-void nf_unregister_queue_handler(struct net *net)
+void nf_unregister_queue_handler(void)
{
- RCU_INIT_POINTER(net->nf.queue_handler, NULL);
+ RCU_INIT_POINTER(nf_queue_handler, NULL);
}
EXPORT_SYMBOL(nf_unregister_queue_handler);
@@ -51,18 +51,14 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
struct nf_hook_state *state = &entry->state;
/* Release those devices we held, or Alexey will kill me. */
- if (state->in)
- dev_put(state->in);
- if (state->out)
- dev_put(state->out);
+ dev_put(state->in);
+ dev_put(state->out);
if (state->sk)
sock_put(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (entry->physin)
- dev_put(entry->physin);
- if (entry->physout)
- dev_put(entry->physout);
+ dev_put(entry->physin);
+ dev_put(entry->physout);
#endif
}
@@ -95,18 +91,14 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
- if (state->in)
- dev_hold(state->in);
- if (state->out)
- dev_hold(state->out);
+ dev_hold(state->in);
+ dev_hold(state->out);
if (state->sk)
sock_hold(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (entry->physin)
- dev_hold(entry->physin);
- if (entry->physout)
- dev_hold(entry->physout);
+ dev_hold(entry->physin);
+ dev_hold(entry->physout);
#endif
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
@@ -116,7 +108,7 @@ void nf_queue_nf_hook_drop(struct net *net)
const struct nf_queue_handler *qh;
rcu_read_lock();
- qh = rcu_dereference(net->nf.queue_handler);
+ qh = rcu_dereference(nf_queue_handler);
if (qh)
qh->nf_hook_drop(net);
rcu_read_unlock();
@@ -157,12 +149,11 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
{
struct nf_queue_entry *entry = NULL;
const struct nf_queue_handler *qh;
- struct net *net = state->net;
unsigned int route_key_size;
int status;
/* QUEUE == DROP if no one is waiting, to be safe. */
- qh = rcu_dereference(net->nf.queue_handler);
+ qh = rcu_dereference(nf_queue_handler);
if (!qh)
return -ESRCH;
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index b58d73a96523..9656c1646222 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -353,6 +353,7 @@ static void nft_flow_block_offload_init(struct flow_block_offload *bo,
bo->command = cmd;
bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
bo->extack = extack;
+ bo->cb_list_head = &basechain->flow_block.cb_list;
INIT_LIST_HEAD(&bo->cb_list);
}
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
index 202f57d17bab..f554e2ea32ee 100644
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -89,11 +89,15 @@ static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb,
if (!nest2)
goto cancel_nest;
- ret = nla_put_string(nlskb, NFTA_CHAIN_TABLE, chain->table->name);
+ ret = nla_put_string(nlskb, NFNLA_CHAIN_TABLE, chain->table->name);
if (ret)
goto cancel_nest;
- ret = nla_put_string(nlskb, NFTA_CHAIN_NAME, chain->name);
+ ret = nla_put_string(nlskb, NFNLA_CHAIN_NAME, chain->name);
+ if (ret)
+ goto cancel_nest;
+
+ ret = nla_put_u8(nlskb, NFNLA_CHAIN_FAMILY, chain->table->family);
if (ret)
goto cancel_nest;
@@ -109,18 +113,19 @@ cancel_nest:
static int nfnl_hook_dump_one(struct sk_buff *nlskb,
const struct nfnl_dump_hook_data *ctx,
const struct nf_hook_ops *ops,
- unsigned int seq)
+ int family, unsigned int seq)
{
u16 event = nfnl_msg_type(NFNL_SUBSYS_HOOK, NFNL_MSG_HOOK_GET);
unsigned int portid = NETLINK_CB(nlskb).portid;
struct nlmsghdr *nlh;
int ret = -EMSGSIZE;
+ u32 hooknum;
#ifdef CONFIG_KALLSYMS
char sym[KSYM_SYMBOL_LEN];
char *module_name;
#endif
nlh = nfnl_msg_put(nlskb, portid, seq, event,
- NLM_F_MULTI, ops->pf, NFNETLINK_V0, 0);
+ NLM_F_MULTI, family, NFNETLINK_V0, 0);
if (!nlh)
goto nla_put_failure;
@@ -135,6 +140,7 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb,
if (module_name) {
char *end;
+ *module_name = '\0';
module_name += 2;
end = strchr(module_name, ']');
if (end) {
@@ -151,7 +157,12 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb,
goto nla_put_failure;
#endif
- ret = nla_put_be32(nlskb, NFNLA_HOOK_HOOKNUM, htonl(ops->hooknum));
+ if (ops->pf == NFPROTO_INET && ops->hooknum == NF_INET_INGRESS)
+ hooknum = NF_NETDEV_INGRESS;
+ else
+ hooknum = ops->hooknum;
+
+ ret = nla_put_be32(nlskb, NFNLA_HOOK_HOOKNUM, htonl(hooknum));
if (ret)
goto nla_put_failure;
@@ -259,7 +270,8 @@ static int nfnl_hook_dump(struct sk_buff *nlskb,
ops = nf_hook_entries_get_hook_ops(e);
for (; i < e->num_hook_entries; i++) {
- err = nfnl_hook_dump_one(nlskb, ctx, ops[i], cb->seq);
+ err = nfnl_hook_dump_one(nlskb, ctx, ops[i], family,
+ cb->nlh->nlmsg_seq);
if (err)
break;
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index f774de0fc24f..4c3fbaaeb103 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -951,6 +951,16 @@ static void nfqnl_nf_hook_drop(struct net *net)
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
int i;
+ /* This function is also called on net namespace error unwind,
+ * when pernet_ops->init() failed and ->exit() functions of the
+ * previous pernet_ops gets called.
+ *
+ * This may result in a call to nfqnl_nf_hook_drop() before
+ * struct nfnl_queue_net was allocated.
+ */
+ if (!q)
+ return;
+
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct nfqnl_instance *inst;
struct hlist_head *head = &q->instance_table[i];
@@ -1502,7 +1512,6 @@ static int __net_init nfnl_queue_net_init(struct net *net)
&nfqnl_seq_ops, sizeof(struct iter_state)))
return -ENOMEM;
#endif
- nf_register_queue_handler(net, &nfqh);
return 0;
}
@@ -1511,7 +1520,6 @@ static void __net_exit nfnl_queue_net_exit(struct net *net)
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
unsigned int i;
- nf_unregister_queue_handler(net);
#ifdef CONFIG_PROC_FS
remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
#endif
@@ -1555,6 +1563,8 @@ static int __init nfnetlink_queue_init(void)
goto cleanup_netlink_subsys;
}
+ nf_register_queue_handler(&nfqh);
+
return status;
cleanup_netlink_subsys:
@@ -1568,6 +1578,7 @@ out:
static void __exit nfnetlink_queue_fini(void)
{
+ nf_unregister_queue_handler();
unregister_netdevice_notifier(&nfqnl_dev_notifier);
nfnetlink_subsys_unregister(&nfqnl_subsys);
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 639c337c885b..272bcdb1392d 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -683,14 +683,12 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb,
goto out_put;
}
- ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (ret > 0)
- ret = 0;
+ ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
out_put:
rcu_read_lock();
module_put(THIS_MODULE);
- return ret == -EAGAIN ? -ENOBUFS : ret;
+
+ return ret;
}
static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 84e58ee501a4..25524e393349 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -39,6 +39,20 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
#define XT_PCPU_BLOCK_SIZE 4096
#define XT_MAX_TABLE_SIZE (512 * 1024 * 1024)
+struct xt_template {
+ struct list_head list;
+
+ /* called when table is needed in the given netns */
+ int (*table_init)(struct net *net);
+
+ struct module *me;
+
+ /* A unique name... */
+ char name[XT_TABLE_MAXNAMELEN];
+};
+
+static struct list_head xt_templates[NFPROTO_NUMPROTO];
+
struct xt_pernet {
struct list_head tables[NFPROTO_NUMPROTO];
};
@@ -1221,48 +1235,43 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
const char *name)
{
struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
- struct xt_table *t, *found = NULL;
+ struct module *owner = NULL;
+ struct xt_template *tmpl;
+ struct xt_table *t;
mutex_lock(&xt[af].mutex);
list_for_each_entry(t, &xt_net->tables[af], list)
if (strcmp(t->name, name) == 0 && try_module_get(t->me))
return t;
- if (net == &init_net)
- goto out;
-
- /* Table doesn't exist in this netns, re-try init */
- xt_net = net_generic(&init_net, xt_pernet_id);
- list_for_each_entry(t, &xt_net->tables[af], list) {
+ /* Table doesn't exist in this netns, check larval list */
+ list_for_each_entry(tmpl, &xt_templates[af], list) {
int err;
- if (strcmp(t->name, name))
+ if (strcmp(tmpl->name, name))
continue;
- if (!try_module_get(t->me))
+ if (!try_module_get(tmpl->me))
goto out;
+
+ owner = tmpl->me;
+
mutex_unlock(&xt[af].mutex);
- err = t->table_init(net);
+ err = tmpl->table_init(net);
if (err < 0) {
- module_put(t->me);
+ module_put(owner);
return ERR_PTR(err);
}
- found = t;
-
mutex_lock(&xt[af].mutex);
break;
}
- if (!found)
- goto out;
-
- xt_net = net_generic(net, xt_pernet_id);
/* and once again: */
list_for_each_entry(t, &xt_net->tables[af], list)
if (strcmp(t->name, name) == 0)
return t;
- module_put(found->me);
+ module_put(owner);
out:
mutex_unlock(&xt[af].mutex);
return ERR_PTR(-ENOENT);
@@ -1749,6 +1758,58 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
}
EXPORT_SYMBOL_GPL(xt_hook_ops_alloc);
+int xt_register_template(const struct xt_table *table,
+ int (*table_init)(struct net *net))
+{
+ int ret = -EEXIST, af = table->af;
+ struct xt_template *t;
+
+ mutex_lock(&xt[af].mutex);
+
+ list_for_each_entry(t, &xt_templates[af], list) {
+ if (WARN_ON_ONCE(strcmp(table->name, t->name) == 0))
+ goto out_unlock;
+ }
+
+ ret = -ENOMEM;
+ t = kzalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+ goto out_unlock;
+
+ BUILD_BUG_ON(sizeof(t->name) != sizeof(table->name));
+
+ strscpy(t->name, table->name, sizeof(t->name));
+ t->table_init = table_init;
+ t->me = table->me;
+ list_add(&t->list, &xt_templates[af]);
+ ret = 0;
+out_unlock:
+ mutex_unlock(&xt[af].mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xt_register_template);
+
+void xt_unregister_template(const struct xt_table *table)
+{
+ struct xt_template *t;
+ int af = table->af;
+
+ mutex_lock(&xt[af].mutex);
+ list_for_each_entry(t, &xt_templates[af], list) {
+ if (strcmp(table->name, t->name))
+ continue;
+
+ list_del(&t->list);
+ mutex_unlock(&xt[af].mutex);
+ kfree(t);
+ return;
+ }
+
+ mutex_unlock(&xt[af].mutex);
+ WARN_ON_ONCE(1);
+}
+EXPORT_SYMBOL_GPL(xt_unregister_template);
+
int xt_proto_init(struct net *net, u_int8_t af)
{
#ifdef CONFIG_PROC_FS
@@ -1937,6 +1998,7 @@ static int __init xt_init(void)
#endif
INIT_LIST_HEAD(&xt[i].target);
INIT_LIST_HEAD(&xt[i].match);
+ INIT_LIST_HEAD(&xt_templates[i]);
}
rv = register_pernet_subsys(&xt_net_ops);
if (rv < 0)
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 12404d221026..0a913ce07425 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -351,21 +351,10 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-static int notrack_chk(const struct xt_tgchk_param *par)
-{
- if (!par->net->xt.notrack_deprecated_warning) {
- pr_info("netfilter: NOTRACK target is deprecated, "
- "use CT instead or upgrade iptables\n");
- par->net->xt.notrack_deprecated_warning = true;
- }
- return 0;
-}
-
static struct xt_target notrack_tg_reg __read_mostly = {
.name = "NOTRACK",
.revision = 0,
.family = NFPROTO_UNSPEC,
- .checkentry = notrack_chk,
.target = notrack_tg,
.table = "raw",
.me = THIS_MODULE,
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 13cf3f9b5938..849ac552a154 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -90,7 +90,7 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_bpf_info *info = par->matchinfo;
- return BPF_PROG_RUN(info->filter, skb);
+ return bpf_prog_run(info->filter, skb);
}
static bool bpf_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index baf235721c43..894e6b8f1a86 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -144,8 +144,8 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
return -ENOMEM;
doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL);
if (doi_def->map.std == NULL) {
- ret_val = -ENOMEM;
- goto add_std_failure;
+ kfree(doi_def);
+ return -ENOMEM;
}
doi_def->type = CIPSO_V4_MAP_TRANS;
@@ -187,14 +187,14 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
}
doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size,
sizeof(u32),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (doi_def->map.std->lvl.local == NULL) {
ret_val = -ENOMEM;
goto add_std_failure;
}
doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size,
sizeof(u32),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (doi_def->map.std->lvl.cipso == NULL) {
ret_val = -ENOMEM;
goto add_std_failure;
@@ -263,7 +263,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
doi_def->map.std->cat.local = kcalloc(
doi_def->map.std->cat.local_size,
sizeof(u32),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (doi_def->map.std->cat.local == NULL) {
ret_val = -ENOMEM;
goto add_std_failure;
@@ -271,7 +271,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
doi_def->map.std->cat.cipso = kcalloc(
doi_def->map.std->cat.cipso_size,
sizeof(u32),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (doi_def->map.std->cat.cipso == NULL) {
ret_val = -ENOMEM;
goto add_std_failure;
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 2483df0bbd7c..566ba4397ee4 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -492,8 +492,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
netlbl_af4list_audit_addr(audit_buf, 1,
(dev != NULL ? dev->name : NULL),
addr->s_addr, mask->s_addr);
- if (dev != NULL)
- dev_put(dev);
+ dev_put(dev);
if (entry != NULL &&
security_secid_to_secctx(entry->secid,
&secctx, &secctx_len) == 0) {
@@ -553,8 +552,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
netlbl_af6list_audit_addr(audit_buf, 1,
(dev != NULL ? dev->name : NULL),
addr, mask);
- if (dev != NULL)
- dev_put(dev);
+ dev_put(dev);
if (entry != NULL &&
security_secid_to_secctx(entry->secid,
&secctx, &secctx_len) == 0) {
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 380f95aacdec..24b7cf447bc5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2545,13 +2545,15 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
/* errors reported via destination sk->sk_err, but propagate
* delivery errors if NETLINK_BROADCAST_ERROR flag is set */
err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
+ if (err == -ESRCH)
+ err = 0;
}
if (report) {
int err2;
err2 = nlmsg_unicast(sk, skb, portid);
- if (!err || err == -ESRCH)
+ if (!err)
err = err2;
}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 2d6fdf40df66..1afca2a6c2ac 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -40,14 +40,6 @@ void genl_unlock(void)
}
EXPORT_SYMBOL(genl_unlock);
-#ifdef CONFIG_LOCKDEP
-bool lockdep_genl_is_held(void)
-{
- return lockdep_is_held(&genl_mutex);
-}
-EXPORT_SYMBOL(lockdep_genl_is_held);
-#endif
-
static void genl_lock_all(void)
{
down_write(&cb_lock);
@@ -1485,6 +1477,7 @@ int genlmsg_multicast_allns(const struct genl_family *family,
{
if (WARN_ON_ONCE(group >= family->n_mcgrps))
return -EINVAL;
+
group = family->mcgrp_offset + group;
return genlmsg_mcast(skb, portid, group, flags);
}
@@ -1495,14 +1488,12 @@ void genl_notify(const struct genl_family *family, struct sk_buff *skb,
{
struct net *net = genl_info_net(info);
struct sock *sk = net->genl_sock;
- int report = 0;
-
- if (info->nlhdr)
- report = nlmsg_report(info->nlhdr);
if (WARN_ON_ONCE(group >= family->n_mcgrps))
return;
+
group = family->mcgrp_offset + group;
- nlmsg_notify(sk, skb, info->snd_portid, group, report, flags);
+ nlmsg_notify(sk, skb, info->snd_portid, group,
+ nlmsg_report(info->nlhdr), flags);
}
EXPORT_SYMBOL(genl_notify);
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index a880dd33e901..511819fbfa67 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -59,8 +59,7 @@ static void nr_loopback_timer(struct timer_list *unused)
if (dev == NULL || nr_rx_frame(skb, dev) == 0)
kfree_skb(skb);
- if (dev != NULL)
- dev_put(dev);
+ dev_put(dev);
if (!skb_queue_empty(&loopback_queue) && !nr_loopback_running())
mod_timer(&loopback_timer, jiffies + 10);
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index de0456073dc0..ddd5cbd455e3 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -582,8 +582,7 @@ struct net_device *nr_dev_first(void)
if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
first = dev;
}
- if (first)
- dev_hold(first);
+ dev_hold(first);
rcu_read_unlock();
return first;
diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c
index 4a9e72073564..6024fad905ff 100644
--- a/net/nfc/af_nfc.c
+++ b/net/nfc/af_nfc.c
@@ -79,7 +79,7 @@ int __init af_nfc_init(void)
return sock_register(&nfc_sock_family_ops);
}
-void af_nfc_exit(void)
+void __exit af_nfc_exit(void)
{
sock_unregister(PF_NFC);
}
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 573c80c6ff7a..3c645c1d99c9 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -636,7 +636,7 @@ error:
return rc;
}
-int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len)
+int nfc_set_remote_general_bytes(struct nfc_dev *dev, const u8 *gb, u8 gb_len)
{
pr_debug("dev_name=%s gb_len=%d\n", dev_name(&dev->dev), gb_len);
@@ -665,7 +665,7 @@ int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb)
EXPORT_SYMBOL(nfc_tm_data_received);
int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode,
- u8 *gb, size_t gb_len)
+ const u8 *gb, size_t gb_len)
{
int rc;
@@ -824,7 +824,7 @@ EXPORT_SYMBOL(nfc_targets_found);
*/
int nfc_target_lost(struct nfc_dev *dev, u32 target_idx)
{
- struct nfc_target *tg;
+ const struct nfc_target *tg;
int i;
pr_debug("dev_name %s n_target %d\n", dev_name(&dev->dev), target_idx);
@@ -1048,7 +1048,7 @@ struct nfc_dev *nfc_get_device(unsigned int idx)
* @tx_headroom: reserved space at beginning of skb
* @tx_tailroom: reserved space at end of skb
*/
-struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
+struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops,
u32 supported_protocols,
int tx_headroom, int tx_tailroom)
{
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index 5044c7db577e..fefc03674f4f 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -732,7 +732,7 @@ exit:
return rc;
}
-static struct nfc_ops digital_nfc_ops = {
+static const struct nfc_ops digital_nfc_ops = {
.dev_up = digital_dev_up,
.dev_down = digital_dev_down,
.start_poll = digital_start_poll,
@@ -745,7 +745,7 @@ static struct nfc_ops digital_nfc_ops = {
.im_transceive = digital_in_send,
};
-struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops,
+struct nfc_digital_dev *nfc_digital_allocate_device(const struct nfc_digital_ops *ops,
__u32 supported_protocols,
__u32 driver_capabilities,
int tx_headroom, int tx_tailroom)
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 3481941be70b..ceb87db57cdb 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -128,7 +128,7 @@ static void nfc_hci_msg_rx_work(struct work_struct *work)
struct nfc_hci_dev *hdev = container_of(work, struct nfc_hci_dev,
msg_rx_work);
struct sk_buff *skb;
- struct hcp_message *message;
+ const struct hcp_message *message;
u8 pipe;
u8 type;
u8 instruction;
@@ -182,9 +182,9 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
struct sk_buff *skb)
{
u8 status = NFC_HCI_ANY_OK;
- struct hci_create_pipe_resp *create_info;
- struct hci_delete_pipe_noti *delete_info;
- struct hci_all_pipe_cleared_noti *cleared_info;
+ const struct hci_create_pipe_resp *create_info;
+ const struct hci_delete_pipe_noti *delete_info;
+ const struct hci_all_pipe_cleared_noti *cleared_info;
u8 gate;
pr_debug("from pipe %x cmd %x\n", pipe, cmd);
@@ -447,7 +447,7 @@ static void nfc_hci_cmd_timeout(struct timer_list *t)
}
static int hci_dev_connect_gates(struct nfc_hci_dev *hdev, u8 gate_count,
- struct nfc_hci_gate *gates)
+ const struct nfc_hci_gate *gates)
{
int r;
while (gate_count--) {
@@ -928,7 +928,7 @@ static int hci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name)
return hdev->ops->fw_download(hdev, firmware_name);
}
-static struct nfc_ops hci_nfc_ops = {
+static const struct nfc_ops hci_nfc_ops = {
.dev_up = hci_dev_up,
.dev_down = hci_dev_down,
.start_poll = hci_start_poll,
@@ -947,7 +947,7 @@ static struct nfc_ops hci_nfc_ops = {
.se_io = hci_se_io,
};
-struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
+struct nfc_hci_dev *nfc_hci_allocate_device(const struct nfc_hci_ops *ops,
struct nfc_hci_init_data *init_data,
unsigned long quirks,
u32 protocols,
diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c
index 6ab40ea17662..2140f6724644 100644
--- a/net/nfc/hci/llc.c
+++ b/net/nfc/hci/llc.c
@@ -11,7 +11,7 @@
static LIST_HEAD(llc_engines);
-int nfc_llc_init(void)
+int __init nfc_llc_init(void)
{
int r;
@@ -41,7 +41,7 @@ void nfc_llc_exit(void)
}
}
-int nfc_llc_register(const char *name, struct nfc_llc_ops *ops)
+int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops)
{
struct nfc_llc_engine *llc_engine;
diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h
index 823ddb621e5d..d66271d211a5 100644
--- a/net/nfc/hci/llc.h
+++ b/net/nfc/hci/llc.h
@@ -26,20 +26,20 @@ struct nfc_llc_ops {
struct nfc_llc_engine {
const char *name;
- struct nfc_llc_ops *ops;
+ const struct nfc_llc_ops *ops;
struct list_head entry;
};
struct nfc_llc {
void *data;
- struct nfc_llc_ops *ops;
+ const struct nfc_llc_ops *ops;
int rx_headroom;
int rx_tailroom;
};
void *nfc_llc_get_data(struct nfc_llc *llc);
-int nfc_llc_register(const char *name, struct nfc_llc_ops *ops);
+int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops);
void nfc_llc_unregister(const char *name);
int nfc_llc_nop_register(void);
diff --git a/net/nfc/hci/llc_nop.c b/net/nfc/hci/llc_nop.c
index a42852f36f2e..a58716f16954 100644
--- a/net/nfc/hci/llc_nop.c
+++ b/net/nfc/hci/llc_nop.c
@@ -71,7 +71,7 @@ static int llc_nop_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb)
return llc_nop->xmit_to_drv(llc_nop->hdev, skb);
}
-static struct nfc_llc_ops llc_nop_ops = {
+static const struct nfc_llc_ops llc_nop_ops = {
.init = llc_nop_init,
.deinit = llc_nop_deinit,
.start = llc_nop_start,
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index 1e3a90049da9..aef750d7787c 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -123,7 +123,7 @@ static bool llc_shdlc_x_lteq_y_lt_z(int x, int y, int z)
return ((y >= x) || (y < z)) ? true : false;
}
-static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc,
+static struct sk_buff *llc_shdlc_alloc_skb(const struct llc_shdlc *shdlc,
int payload_len)
{
struct sk_buff *skb;
@@ -137,7 +137,7 @@ static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc,
}
/* immediately sends an S frame. */
-static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc,
+static int llc_shdlc_send_s_frame(const struct llc_shdlc *shdlc,
enum sframe_type sframe_type, int nr)
{
int r;
@@ -159,7 +159,7 @@ static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc,
}
/* immediately sends an U frame. skb may contain optional payload */
-static int llc_shdlc_send_u_frame(struct llc_shdlc *shdlc,
+static int llc_shdlc_send_u_frame(const struct llc_shdlc *shdlc,
struct sk_buff *skb,
enum uframe_modifier uframe_modifier)
{
@@ -361,7 +361,7 @@ static void llc_shdlc_connect_complete(struct llc_shdlc *shdlc, int r)
wake_up(shdlc->connect_wq);
}
-static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc)
+static int llc_shdlc_connect_initiate(const struct llc_shdlc *shdlc)
{
struct sk_buff *skb;
@@ -377,7 +377,7 @@ static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc)
return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET);
}
-static int llc_shdlc_connect_send_ua(struct llc_shdlc *shdlc)
+static int llc_shdlc_connect_send_ua(const struct llc_shdlc *shdlc)
{
struct sk_buff *skb;
@@ -820,7 +820,7 @@ static int llc_shdlc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb)
return 0;
}
-static struct nfc_llc_ops llc_shdlc_ops = {
+static const struct nfc_llc_ops llc_shdlc_ops = {
.init = llc_shdlc_init,
.deinit = llc_shdlc_deinit,
.start = llc_shdlc_start,
diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h
index 97853c9cefc7..d49d4bf2e37c 100644
--- a/net/nfc/llcp.h
+++ b/net/nfc/llcp.h
@@ -221,15 +221,15 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *sk, struct socket *newsock);
/* TLV API */
int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
- u8 *tlv_array, u16 tlv_array_len);
+ const u8 *tlv_array, u16 tlv_array_len);
int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
- u8 *tlv_array, u16 tlv_array_len);
+ const u8 *tlv_array, u16 tlv_array_len);
/* Commands API */
void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
-u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length);
+u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length);
struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap);
-struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri,
size_t uri_len);
void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head);
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 475061c79c44..3c4172a5aeb5 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -15,7 +15,7 @@
#include "nfc.h"
#include "llcp.h"
-static u8 llcp_tlv_length[LLCP_TLV_MAX] = {
+static const u8 llcp_tlv_length[LLCP_TLV_MAX] = {
0,
1, /* VERSION */
2, /* MIUX */
@@ -29,7 +29,7 @@ static u8 llcp_tlv_length[LLCP_TLV_MAX] = {
};
-static u8 llcp_tlv8(u8 *tlv, u8 type)
+static u8 llcp_tlv8(const u8 *tlv, u8 type)
{
if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]])
return 0;
@@ -37,7 +37,7 @@ static u8 llcp_tlv8(u8 *tlv, u8 type)
return tlv[2];
}
-static u16 llcp_tlv16(u8 *tlv, u8 type)
+static u16 llcp_tlv16(const u8 *tlv, u8 type)
{
if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]])
return 0;
@@ -46,37 +46,37 @@ static u16 llcp_tlv16(u8 *tlv, u8 type)
}
-static u8 llcp_tlv_version(u8 *tlv)
+static u8 llcp_tlv_version(const u8 *tlv)
{
return llcp_tlv8(tlv, LLCP_TLV_VERSION);
}
-static u16 llcp_tlv_miux(u8 *tlv)
+static u16 llcp_tlv_miux(const u8 *tlv)
{
return llcp_tlv16(tlv, LLCP_TLV_MIUX) & 0x7ff;
}
-static u16 llcp_tlv_wks(u8 *tlv)
+static u16 llcp_tlv_wks(const u8 *tlv)
{
return llcp_tlv16(tlv, LLCP_TLV_WKS);
}
-static u16 llcp_tlv_lto(u8 *tlv)
+static u16 llcp_tlv_lto(const u8 *tlv)
{
return llcp_tlv8(tlv, LLCP_TLV_LTO);
}
-static u8 llcp_tlv_opt(u8 *tlv)
+static u8 llcp_tlv_opt(const u8 *tlv)
{
return llcp_tlv8(tlv, LLCP_TLV_OPT);
}
-static u8 llcp_tlv_rw(u8 *tlv)
+static u8 llcp_tlv_rw(const u8 *tlv)
{
return llcp_tlv8(tlv, LLCP_TLV_RW) & 0xf;
}
-u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length)
+u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length)
{
u8 *tlv, length;
@@ -130,7 +130,7 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap)
return sdres;
}
-struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri,
size_t uri_len)
{
struct nfc_llcp_sdp_tlv *sdreq;
@@ -190,9 +190,10 @@ void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head)
}
int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
- u8 *tlv_array, u16 tlv_array_len)
+ const u8 *tlv_array, u16 tlv_array_len)
{
- u8 *tlv = tlv_array, type, length, offset = 0;
+ const u8 *tlv = tlv_array;
+ u8 type, length, offset = 0;
pr_debug("TLV array length %d\n", tlv_array_len);
@@ -239,9 +240,10 @@ int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
}
int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
- u8 *tlv_array, u16 tlv_array_len)
+ const u8 *tlv_array, u16 tlv_array_len)
{
- u8 *tlv = tlv_array, type, length, offset = 0;
+ const u8 *tlv = tlv_array;
+ u8 type, length, offset = 0;
pr_debug("TLV array length %d\n", tlv_array_len);
@@ -295,7 +297,7 @@ static struct sk_buff *llcp_add_header(struct sk_buff *pdu,
return pdu;
}
-static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, u8 *tlv,
+static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, const u8 *tlv,
u8 tlv_length)
{
/* XXX Add an skb length check */
@@ -389,9 +391,10 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
{
struct nfc_llcp_local *local;
struct sk_buff *skb;
- u8 *service_name_tlv = NULL, service_name_tlv_length;
- u8 *miux_tlv = NULL, miux_tlv_length;
- u8 *rw_tlv = NULL, rw_tlv_length, rw;
+ const u8 *service_name_tlv = NULL;
+ const u8 *miux_tlv = NULL;
+ const u8 *rw_tlv = NULL;
+ u8 service_name_tlv_length, miux_tlv_length, rw_tlv_length, rw;
int err;
u16 size = 0;
__be16 miux;
@@ -465,8 +468,9 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
{
struct nfc_llcp_local *local;
struct sk_buff *skb;
- u8 *miux_tlv = NULL, miux_tlv_length;
- u8 *rw_tlv = NULL, rw_tlv_length, rw;
+ const u8 *miux_tlv = NULL;
+ const u8 *rw_tlv = NULL;
+ u8 miux_tlv_length, rw_tlv_length, rw;
int err;
u16 size = 0;
__be16 miux;
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index cc997518f79d..eaeb2b1cfa6a 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -301,7 +301,7 @@ static char *wks[] = {
"urn:nfc:sn:snep",
};
-static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len)
+static int nfc_llcp_wks_sap(const char *service_name, size_t service_name_len)
{
int sap, num_wks;
@@ -325,7 +325,7 @@ static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len)
static
struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local,
- u8 *sn, size_t sn_len)
+ const u8 *sn, size_t sn_len)
{
struct sock *sk;
struct nfc_llcp_sock *llcp_sock, *tmp_sock;
@@ -522,7 +522,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
{
u8 *gb_cur, version, version_length;
u8 lto_length, wks_length, miux_length;
- u8 *version_tlv = NULL, *lto_tlv = NULL,
+ const u8 *version_tlv = NULL, *lto_tlv = NULL,
*wks_tlv = NULL, *miux_tlv = NULL;
__be16 wks = cpu_to_be16(local->local_wks);
u8 gb_len = 0;
@@ -612,7 +612,7 @@ u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len)
return local->gb;
}
-int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len)
+int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len)
{
struct nfc_llcp_local *local;
@@ -639,27 +639,27 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len)
local->remote_gb_len - 3);
}
-static u8 nfc_llcp_dsap(struct sk_buff *pdu)
+static u8 nfc_llcp_dsap(const struct sk_buff *pdu)
{
return (pdu->data[0] & 0xfc) >> 2;
}
-static u8 nfc_llcp_ptype(struct sk_buff *pdu)
+static u8 nfc_llcp_ptype(const struct sk_buff *pdu)
{
return ((pdu->data[0] & 0x03) << 2) | ((pdu->data[1] & 0xc0) >> 6);
}
-static u8 nfc_llcp_ssap(struct sk_buff *pdu)
+static u8 nfc_llcp_ssap(const struct sk_buff *pdu)
{
return pdu->data[1] & 0x3f;
}
-static u8 nfc_llcp_ns(struct sk_buff *pdu)
+static u8 nfc_llcp_ns(const struct sk_buff *pdu)
{
return pdu->data[2] >> 4;
}
-static u8 nfc_llcp_nr(struct sk_buff *pdu)
+static u8 nfc_llcp_nr(const struct sk_buff *pdu)
{
return pdu->data[2] & 0xf;
}
@@ -801,7 +801,7 @@ out:
}
static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local,
- u8 *sn, size_t sn_len)
+ const u8 *sn, size_t sn_len)
{
struct nfc_llcp_sock *llcp_sock;
@@ -815,9 +815,10 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local,
return llcp_sock;
}
-static u8 *nfc_llcp_connect_sn(struct sk_buff *skb, size_t *sn_len)
+static const u8 *nfc_llcp_connect_sn(const struct sk_buff *skb, size_t *sn_len)
{
- u8 *tlv = &skb->data[2], type, length;
+ u8 type, length;
+ const u8 *tlv = &skb->data[2];
size_t tlv_array_len = skb->len - LLCP_HEADER_SIZE, offset = 0;
while (offset < tlv_array_len) {
@@ -875,7 +876,7 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local,
}
static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
struct sock *new_sk, *parent;
struct nfc_llcp_sock *sock, *new_sock;
@@ -893,7 +894,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
goto fail;
}
} else {
- u8 *sn;
+ const u8 *sn;
size_t sn_len;
sn = nfc_llcp_connect_sn(skb, &sn_len);
@@ -1112,7 +1113,7 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local,
}
static void nfc_llcp_recv_disc(struct nfc_llcp_local *local,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
struct nfc_llcp_sock *llcp_sock;
struct sock *sk;
@@ -1155,7 +1156,8 @@ static void nfc_llcp_recv_disc(struct nfc_llcp_local *local,
nfc_llcp_sock_put(llcp_sock);
}
-static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, struct sk_buff *skb)
+static void nfc_llcp_recv_cc(struct nfc_llcp_local *local,
+ const struct sk_buff *skb)
{
struct nfc_llcp_sock *llcp_sock;
struct sock *sk;
@@ -1188,7 +1190,8 @@ static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, struct sk_buff *skb)
nfc_llcp_sock_put(llcp_sock);
}
-static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, struct sk_buff *skb)
+static void nfc_llcp_recv_dm(struct nfc_llcp_local *local,
+ const struct sk_buff *skb)
{
struct nfc_llcp_sock *llcp_sock;
struct sock *sk;
@@ -1226,12 +1229,13 @@ static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, struct sk_buff *skb)
}
static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
struct nfc_llcp_sock *llcp_sock;
- u8 dsap, ssap, *tlv, type, length, tid, sap;
+ u8 dsap, ssap, type, length, tid, sap;
+ const u8 *tlv;
u16 tlv_len, offset;
- char *service_name;
+ const char *service_name;
size_t service_name_len;
struct nfc_llcp_sdp_tlv *sdp;
HLIST_HEAD(llc_sdres_list);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index da7fe9db1b00..82ab39d80726 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -53,9 +53,9 @@ struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev,
}
int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type,
- struct dest_spec_params *params)
+ const struct dest_spec_params *params)
{
- struct nci_conn_info *conn_info;
+ const struct nci_conn_info *conn_info;
list_for_each_entry(conn_info, &ndev->conn_info_list, list) {
if (conn_info->dest_type == dest_type) {
@@ -95,8 +95,8 @@ static void nci_req_cancel(struct nci_dev *ndev, int err)
/* Execute request and wait for completion. */
static int __nci_request(struct nci_dev *ndev,
- void (*req)(struct nci_dev *ndev, unsigned long opt),
- unsigned long opt, __u32 timeout)
+ void (*req)(struct nci_dev *ndev, const void *opt),
+ const void *opt, __u32 timeout)
{
int rc = 0;
long completion_rc;
@@ -139,8 +139,8 @@ static int __nci_request(struct nci_dev *ndev,
inline int nci_request(struct nci_dev *ndev,
void (*req)(struct nci_dev *ndev,
- unsigned long opt),
- unsigned long opt, __u32 timeout)
+ const void *opt),
+ const void *opt, __u32 timeout)
{
int rc;
@@ -155,7 +155,7 @@ inline int nci_request(struct nci_dev *ndev,
return rc;
}
-static void nci_reset_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_reset_req(struct nci_dev *ndev, const void *opt)
{
struct nci_core_reset_cmd cmd;
@@ -163,17 +163,17 @@ static void nci_reset_req(struct nci_dev *ndev, unsigned long opt)
nci_send_cmd(ndev, NCI_OP_CORE_RESET_CMD, 1, &cmd);
}
-static void nci_init_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_init_req(struct nci_dev *ndev, const void *opt)
{
u8 plen = 0;
if (opt)
plen = sizeof(struct nci_core_init_v2_cmd);
- nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, (void *)opt);
+ nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, opt);
}
-static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_init_complete_req(struct nci_dev *ndev, const void *opt)
{
struct nci_rf_disc_map_cmd cmd;
struct disc_map_config *cfg = cmd.mapping_configs;
@@ -210,14 +210,14 @@ static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt)
}
struct nci_set_config_param {
- __u8 id;
- size_t len;
- __u8 *val;
+ __u8 id;
+ size_t len;
+ const __u8 *val;
};
-static void nci_set_config_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_set_config_req(struct nci_dev *ndev, const void *opt)
{
- struct nci_set_config_param *param = (struct nci_set_config_param *)opt;
+ const struct nci_set_config_param *param = opt;
struct nci_core_set_config_cmd cmd;
BUG_ON(param->len > NCI_MAX_PARAM_LEN);
@@ -235,10 +235,9 @@ struct nci_rf_discover_param {
__u32 tm_protocols;
};
-static void nci_rf_discover_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_discover_req(struct nci_dev *ndev, const void *opt)
{
- struct nci_rf_discover_param *param =
- (struct nci_rf_discover_param *)opt;
+ const struct nci_rf_discover_param *param = opt;
struct nci_rf_disc_cmd cmd;
cmd.num_disc_configs = 0;
@@ -301,10 +300,9 @@ struct nci_rf_discover_select_param {
__u8 rf_protocol;
};
-static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_discover_select_req(struct nci_dev *ndev, const void *opt)
{
- struct nci_rf_discover_select_param *param =
- (struct nci_rf_discover_select_param *)opt;
+ const struct nci_rf_discover_select_param *param = opt;
struct nci_rf_discover_select_cmd cmd;
cmd.rf_discovery_id = param->rf_discovery_id;
@@ -328,11 +326,11 @@ static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt)
sizeof(struct nci_rf_discover_select_cmd), &cmd);
}
-static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_deactivate_req(struct nci_dev *ndev, const void *opt)
{
struct nci_rf_deactivate_cmd cmd;
- cmd.type = opt;
+ cmd.type = (unsigned long)opt;
nci_send_cmd(ndev, NCI_OP_RF_DEACTIVATE_CMD,
sizeof(struct nci_rf_deactivate_cmd), &cmd);
@@ -341,18 +339,17 @@ static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt)
struct nci_cmd_param {
__u16 opcode;
size_t len;
- __u8 *payload;
+ const __u8 *payload;
};
-static void nci_generic_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_generic_req(struct nci_dev *ndev, const void *opt)
{
- struct nci_cmd_param *param =
- (struct nci_cmd_param *)opt;
+ const struct nci_cmd_param *param = opt;
nci_send_cmd(ndev, param->opcode, param->len, param->payload);
}
-int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload)
+int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, const __u8 *payload)
{
struct nci_cmd_param param;
@@ -360,12 +357,13 @@ int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload)
param.len = len;
param.payload = payload;
- return __nci_request(ndev, nci_generic_req, (unsigned long)&param,
+ return __nci_request(ndev, nci_generic_req, &param,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_prop_cmd);
-int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload)
+int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len,
+ const __u8 *payload)
{
struct nci_cmd_param param;
@@ -373,21 +371,21 @@ int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload)
param.len = len;
param.payload = payload;
- return __nci_request(ndev, nci_generic_req, (unsigned long)&param,
+ return __nci_request(ndev, nci_generic_req, &param,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_cmd);
int nci_core_reset(struct nci_dev *ndev)
{
- return __nci_request(ndev, nci_reset_req, 0,
+ return __nci_request(ndev, nci_reset_req, (void *)0,
msecs_to_jiffies(NCI_RESET_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_reset);
int nci_core_init(struct nci_dev *ndev)
{
- return __nci_request(ndev, nci_init_req, 0,
+ return __nci_request(ndev, nci_init_req, (void *)0,
msecs_to_jiffies(NCI_INIT_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_init);
@@ -397,9 +395,9 @@ struct nci_loopback_data {
struct sk_buff *data;
};
-static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_send_data_req(struct nci_dev *ndev, const void *opt)
{
- struct nci_loopback_data *data = (struct nci_loopback_data *)opt;
+ const struct nci_loopback_data *data = opt;
nci_send_data(ndev, data->conn_id, data->data);
}
@@ -407,7 +405,7 @@ static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt)
static void nci_nfcc_loopback_cb(void *context, struct sk_buff *skb, int err)
{
struct nci_dev *ndev = (struct nci_dev *)context;
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info;
conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id);
if (!conn_info) {
@@ -420,7 +418,7 @@ static void nci_nfcc_loopback_cb(void *context, struct sk_buff *skb, int err)
nci_req_complete(ndev, NCI_STATUS_OK);
}
-int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len,
+int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len,
struct sk_buff **resp)
{
int r;
@@ -460,7 +458,7 @@ int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len,
loopback_data.data = skb;
ndev->cur_conn_id = conn_id;
- r = nci_request(ndev, nci_send_data_req, (unsigned long)&loopback_data,
+ r = nci_request(ndev, nci_send_data_req, &loopback_data,
msecs_to_jiffies(NCI_DATA_TIMEOUT));
if (r == NCI_STATUS_OK && resp)
*resp = conn_info->rx_skb;
@@ -493,7 +491,7 @@ static int nci_open_device(struct nci_dev *ndev)
rc = ndev->ops->init(ndev);
if (!rc) {
- rc = __nci_request(ndev, nci_reset_req, 0,
+ rc = __nci_request(ndev, nci_reset_req, (void *)0,
msecs_to_jiffies(NCI_RESET_TIMEOUT));
}
@@ -506,10 +504,10 @@ static int nci_open_device(struct nci_dev *ndev)
.feature1 = NCI_FEATURE_DISABLE,
.feature2 = NCI_FEATURE_DISABLE
};
- unsigned long opt = 0;
+ const void *opt = NULL;
if (ndev->nci_ver & NCI_VER_2_MASK)
- opt = (unsigned long)&nci_init_v2_cmd;
+ opt = &nci_init_v2_cmd;
rc = __nci_request(ndev, nci_init_req, opt,
msecs_to_jiffies(NCI_INIT_TIMEOUT));
@@ -519,7 +517,7 @@ static int nci_open_device(struct nci_dev *ndev)
rc = ndev->ops->post_setup(ndev);
if (!rc) {
- rc = __nci_request(ndev, nci_init_complete_req, 0,
+ rc = __nci_request(ndev, nci_init_complete_req, (void *)0,
msecs_to_jiffies(NCI_INIT_TIMEOUT));
}
@@ -569,7 +567,7 @@ static int nci_close_device(struct nci_dev *ndev)
atomic_set(&ndev->cmd_cnt, 1);
set_bit(NCI_INIT, &ndev->flags);
- __nci_request(ndev, nci_reset_req, 0,
+ __nci_request(ndev, nci_reset_req, (void *)0,
msecs_to_jiffies(NCI_RESET_TIMEOUT));
/* After this point our queues are empty
@@ -624,7 +622,7 @@ static int nci_dev_down(struct nfc_dev *nfc_dev)
return nci_close_device(ndev);
}
-int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val)
+int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val)
{
struct nci_set_config_param param;
@@ -635,15 +633,15 @@ int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val)
param.len = len;
param.val = val;
- return __nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+ return __nci_request(ndev, nci_set_config_req, &param,
msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
}
EXPORT_SYMBOL(nci_set_config);
-static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_nfcee_discover_req(struct nci_dev *ndev, const void *opt)
{
struct nci_nfcee_discover_cmd cmd;
- __u8 action = opt;
+ __u8 action = (unsigned long)opt;
cmd.discovery_action = action;
@@ -652,15 +650,16 @@ static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt)
int nci_nfcee_discover(struct nci_dev *ndev, u8 action)
{
- return __nci_request(ndev, nci_nfcee_discover_req, action,
+ unsigned long opt = action;
+
+ return __nci_request(ndev, nci_nfcee_discover_req, (void *)opt,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_nfcee_discover);
-static void nci_nfcee_mode_set_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_nfcee_mode_set_req(struct nci_dev *ndev, const void *opt)
{
- struct nci_nfcee_mode_set_cmd *cmd =
- (struct nci_nfcee_mode_set_cmd *)opt;
+ const struct nci_nfcee_mode_set_cmd *cmd = opt;
nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD,
sizeof(struct nci_nfcee_mode_set_cmd), cmd);
@@ -673,16 +672,14 @@ int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode)
cmd.nfcee_id = nfcee_id;
cmd.nfcee_mode = nfcee_mode;
- return __nci_request(ndev, nci_nfcee_mode_set_req,
- (unsigned long)&cmd,
+ return __nci_request(ndev, nci_nfcee_mode_set_req, &cmd,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_nfcee_mode_set);
-static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_core_conn_create_req(struct nci_dev *ndev, const void *opt)
{
- struct core_conn_create_data *data =
- (struct core_conn_create_data *)opt;
+ const struct core_conn_create_data *data = opt;
nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd);
}
@@ -690,7 +687,7 @@ static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt)
int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
u8 number_destination_params,
size_t params_len,
- struct core_conn_create_dest_spec_params *params)
+ const struct core_conn_create_dest_spec_params *params)
{
int r;
struct nci_core_conn_create_cmd *cmd;
@@ -719,24 +716,26 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
}
ndev->cur_dest_type = destination_type;
- r = __nci_request(ndev, nci_core_conn_create_req, (unsigned long)&data,
+ r = __nci_request(ndev, nci_core_conn_create_req, &data,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
kfree(cmd);
return r;
}
EXPORT_SYMBOL(nci_core_conn_create);
-static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_core_conn_close_req(struct nci_dev *ndev, const void *opt)
{
- __u8 conn_id = opt;
+ __u8 conn_id = (unsigned long)opt;
nci_send_cmd(ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id);
}
int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id)
{
+ unsigned long opt = conn_id;
+
ndev->cur_conn_id = conn_id;
- return __nci_request(ndev, nci_core_conn_close_req, conn_id,
+ return __nci_request(ndev, nci_core_conn_close_req, (void *)opt,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_conn_close);
@@ -756,14 +755,14 @@ static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev)
param.id = NCI_PN_ATR_REQ_GEN_BYTES;
- rc = nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+ rc = nci_request(ndev, nci_set_config_req, &param,
msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
if (rc)
return rc;
param.id = NCI_LN_ATR_RES_GEN_BYTES;
- return nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+ return nci_request(ndev, nci_set_config_req, &param,
msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
}
@@ -813,7 +812,7 @@ static int nci_start_poll(struct nfc_dev *nfc_dev,
pr_debug("target active or w4 select, implicitly deactivate\n");
rc = nci_request(ndev, nci_rf_deactivate_req,
- NCI_DEACTIVATE_TYPE_IDLE_MODE,
+ (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE,
msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
if (rc)
return -EBUSY;
@@ -835,7 +834,7 @@ static int nci_start_poll(struct nfc_dev *nfc_dev,
param.im_protocols = im_protocols;
param.tm_protocols = tm_protocols;
- rc = nci_request(ndev, nci_rf_discover_req, (unsigned long)&param,
+ rc = nci_request(ndev, nci_rf_discover_req, &param,
msecs_to_jiffies(NCI_RF_DISC_TIMEOUT));
if (!rc)
@@ -854,7 +853,8 @@ static void nci_stop_poll(struct nfc_dev *nfc_dev)
return;
}
- nci_request(ndev, nci_rf_deactivate_req, NCI_DEACTIVATE_TYPE_IDLE_MODE,
+ nci_request(ndev, nci_rf_deactivate_req,
+ (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE,
msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
}
@@ -863,7 +863,7 @@ static int nci_activate_target(struct nfc_dev *nfc_dev,
{
struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
struct nci_rf_discover_select_param param;
- struct nfc_target *nci_target = NULL;
+ const struct nfc_target *nci_target = NULL;
int i;
int rc = 0;
@@ -913,8 +913,7 @@ static int nci_activate_target(struct nfc_dev *nfc_dev,
else
param.rf_protocol = NCI_RF_PROTOCOL_NFC_DEP;
- rc = nci_request(ndev, nci_rf_discover_select_req,
- (unsigned long)&param,
+ rc = nci_request(ndev, nci_rf_discover_select_req, &param,
msecs_to_jiffies(NCI_RF_DISC_SELECT_TIMEOUT));
}
@@ -929,7 +928,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
__u8 mode)
{
struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
- u8 nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;
+ unsigned long nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;
pr_debug("entry\n");
@@ -947,7 +946,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
}
if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) {
- nci_request(ndev, nci_rf_deactivate_req, nci_mode,
+ nci_request(ndev, nci_rf_deactivate_req, (void *)nci_mode,
msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
}
}
@@ -985,8 +984,8 @@ static int nci_dep_link_down(struct nfc_dev *nfc_dev)
} else {
if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE ||
atomic_read(&ndev->state) == NCI_DISCOVERY) {
- nci_request(ndev, nci_rf_deactivate_req, 0,
- msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
+ nci_request(ndev, nci_rf_deactivate_req, (void *)0,
+ msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
}
rc = nfc_tm_deactivated(nfc_dev);
@@ -1004,7 +1003,7 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
{
struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
int rc;
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info;
conn_info = ndev->rf_conn_info;
if (!conn_info)
@@ -1102,7 +1101,7 @@ static int nci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name)
return ndev->ops->fw_download(ndev, firmware_name);
}
-static struct nfc_ops nci_nfc_ops = {
+static const struct nfc_ops nci_nfc_ops = {
.dev_up = nci_dev_up,
.dev_down = nci_dev_down,
.start_poll = nci_start_poll,
@@ -1129,7 +1128,7 @@ static struct nfc_ops nci_nfc_ops = {
* @tx_headroom: Reserved space at beginning of skb
* @tx_tailroom: Reserved space at end of skb
*/
-struct nci_dev *nci_allocate_device(struct nci_ops *ops,
+struct nci_dev *nci_allocate_device(const struct nci_ops *ops,
__u32 supported_protocols,
int tx_headroom, int tx_tailroom)
{
@@ -1152,8 +1151,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops,
if (ops->n_prop_ops > NCI_MAX_PROPRIETARY_CMD) {
pr_err("Too many proprietary commands: %zd\n",
ops->n_prop_ops);
- ops->prop_ops = NULL;
- ops->n_prop_ops = 0;
+ goto free_nci;
}
ndev->tx_headroom = tx_headroom;
@@ -1270,7 +1268,7 @@ EXPORT_SYMBOL(nci_register_device);
*/
void nci_unregister_device(struct nci_dev *ndev)
{
- struct nci_conn_info *conn_info, *n;
+ struct nci_conn_info *conn_info, *n;
nci_close_device(ndev);
@@ -1332,7 +1330,7 @@ int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb)
EXPORT_SYMBOL(nci_send_frame);
/* Send NCI command */
-int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
+int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payload)
{
struct nci_ctrl_hdr *hdr;
struct sk_buff *skb;
@@ -1364,12 +1362,12 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
EXPORT_SYMBOL(nci_send_cmd);
/* Proprietary commands API */
-static struct nci_driver_ops *ops_cmd_lookup(struct nci_driver_ops *ops,
- size_t n_ops,
- __u16 opcode)
+static const struct nci_driver_ops *ops_cmd_lookup(const struct nci_driver_ops *ops,
+ size_t n_ops,
+ __u16 opcode)
{
size_t i;
- struct nci_driver_ops *op;
+ const struct nci_driver_ops *op;
if (!ops || !n_ops)
return NULL;
@@ -1384,10 +1382,10 @@ static struct nci_driver_ops *ops_cmd_lookup(struct nci_driver_ops *ops,
}
static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode,
- struct sk_buff *skb, struct nci_driver_ops *ops,
+ struct sk_buff *skb, const struct nci_driver_ops *ops,
size_t n_ops)
{
- struct nci_driver_ops *op;
+ const struct nci_driver_ops *op;
op = ops_cmd_lookup(ops, n_ops, rsp_opcode);
if (!op || !op->rsp)
@@ -1397,10 +1395,10 @@ static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode,
}
static int nci_op_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode,
- struct sk_buff *skb, struct nci_driver_ops *ops,
+ struct sk_buff *skb, const struct nci_driver_ops *ops,
size_t n_ops)
{
- struct nci_driver_ops *op;
+ const struct nci_driver_ops *op;
op = ops_cmd_lookup(ops, n_ops, ntf_opcode);
if (!op || !op->ntf)
@@ -1442,7 +1440,7 @@ int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode,
static void nci_tx_work(struct work_struct *work)
{
struct nci_dev *ndev = container_of(work, struct nci_dev, tx_work);
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info;
struct sk_buff *skb;
conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id);
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index ce3382be937f..6055dc9a82aa 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -26,7 +26,7 @@
void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
__u8 conn_id, int err)
{
- struct nci_conn_info *conn_info;
+ const struct nci_conn_info *conn_info;
data_exchange_cb_t cb;
void *cb_context;
@@ -80,7 +80,7 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev,
int nci_conn_max_data_pkt_payload_size(struct nci_dev *ndev, __u8 conn_id)
{
- struct nci_conn_info *conn_info;
+ const struct nci_conn_info *conn_info;
conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
if (!conn_info)
@@ -93,9 +93,9 @@ EXPORT_SYMBOL(nci_conn_max_data_pkt_payload_size);
static int nci_queue_tx_data_frags(struct nci_dev *ndev,
__u8 conn_id,
struct sk_buff *skb) {
- struct nci_conn_info *conn_info;
+ const struct nci_conn_info *conn_info;
int total_len = skb->len;
- unsigned char *data = skb->data;
+ const unsigned char *data = skb->data;
unsigned long flags;
struct sk_buff_head frags_q;
struct sk_buff *skb_frag;
@@ -166,7 +166,7 @@ exit:
/* Send NCI data */
int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb)
{
- struct nci_conn_info *conn_info;
+ const struct nci_conn_info *conn_info;
int rc = 0;
pr_debug("conn_id 0x%x, plen %d\n", conn_id, skb->len);
@@ -269,7 +269,7 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb)
__u8 pbf = nci_pbf(skb->data);
__u8 status = 0;
__u8 conn_id = nci_conn_id(skb->data);
- struct nci_conn_info *conn_info;
+ const struct nci_conn_info *conn_info;
pr_debug("len %d\n", skb->len);
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index d6732e5e8958..e199912ee1e5 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -16,11 +16,11 @@
#include <linux/nfc.h>
struct nci_data {
- u8 conn_id;
- u8 pipe;
- u8 cmd;
- const u8 *data;
- u32 data_len;
+ u8 conn_id;
+ u8 pipe;
+ u8 cmd;
+ const u8 *data;
+ u32 data_len;
} __packed;
struct nci_hci_create_pipe_params {
@@ -142,7 +142,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
const u8 data_type, const u8 *data,
size_t data_len)
{
- struct nci_conn_info *conn_info;
+ const struct nci_conn_info *conn_info;
struct sk_buff *skb;
int len, i, r;
u8 cb = pipe;
@@ -195,9 +195,9 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
return i;
}
-static void nci_hci_send_data_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_hci_send_data_req(struct nci_dev *ndev, const void *opt)
{
- struct nci_data *data = (struct nci_data *)opt;
+ const struct nci_data *data = opt;
nci_hci_send_data(ndev, data->pipe, data->cmd,
data->data, data->data_len);
@@ -221,8 +221,8 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
const u8 *param, size_t param_len,
struct sk_buff **skb)
{
- struct nci_hcp_message *message;
- struct nci_conn_info *conn_info;
+ const struct nci_hcp_message *message;
+ const struct nci_conn_info *conn_info;
struct nci_data data;
int r;
u8 pipe = ndev->hci_dev->gate2pipe[gate];
@@ -240,7 +240,7 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
data.data = param;
data.data_len = param_len;
- r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
+ r = nci_request(ndev, nci_hci_send_data_req, &data,
msecs_to_jiffies(NCI_DATA_TIMEOUT));
if (r == NCI_STATUS_OK) {
message = (struct nci_hcp_message *)conn_info->rx_skb->data;
@@ -363,7 +363,7 @@ exit:
static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe,
struct sk_buff *skb)
{
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info;
conn_info = ndev->hci_dev->conn_info;
if (!conn_info)
@@ -406,7 +406,7 @@ static void nci_hci_msg_rx_work(struct work_struct *work)
struct nci_hci_dev *hdev =
container_of(work, struct nci_hci_dev, msg_rx_work);
struct sk_buff *skb;
- struct nci_hcp_message *message;
+ const struct nci_hcp_message *message;
u8 pipe, type, instruction;
while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) {
@@ -498,7 +498,7 @@ void nci_hci_data_received_cb(void *context,
int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe)
{
struct nci_data data;
- struct nci_conn_info *conn_info;
+ const struct nci_conn_info *conn_info;
conn_info = ndev->hci_dev->conn_info;
if (!conn_info)
@@ -511,9 +511,8 @@ int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe)
data.data = NULL;
data.data_len = 0;
- return nci_request(ndev, nci_hci_send_data_req,
- (unsigned long)&data,
- msecs_to_jiffies(NCI_DATA_TIMEOUT));
+ return nci_request(ndev, nci_hci_send_data_req, &data,
+ msecs_to_jiffies(NCI_DATA_TIMEOUT));
}
EXPORT_SYMBOL(nci_hci_open_pipe);
@@ -523,7 +522,7 @@ static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host,
u8 pipe;
struct sk_buff *skb;
struct nci_hci_create_pipe_params params;
- struct nci_hci_create_pipe_resp *resp;
+ const struct nci_hci_create_pipe_resp *resp;
pr_debug("gate=%d\n", dest_gate);
@@ -557,8 +556,8 @@ static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe)
int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
const u8 *param, size_t param_len)
{
- struct nci_hcp_message *message;
- struct nci_conn_info *conn_info;
+ const struct nci_hcp_message *message;
+ const struct nci_conn_info *conn_info;
struct nci_data data;
int r;
u8 *tmp;
@@ -587,8 +586,7 @@ int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
data.data = tmp;
data.data_len = param_len + 1;
- r = nci_request(ndev, nci_hci_send_data_req,
- (unsigned long)&data,
+ r = nci_request(ndev, nci_hci_send_data_req, &data,
msecs_to_jiffies(NCI_DATA_TIMEOUT));
if (r == NCI_STATUS_OK) {
message = (struct nci_hcp_message *)conn_info->rx_skb->data;
@@ -605,8 +603,8 @@ EXPORT_SYMBOL(nci_hci_set_param);
int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
struct sk_buff **skb)
{
- struct nci_hcp_message *message;
- struct nci_conn_info *conn_info;
+ const struct nci_hcp_message *message;
+ const struct nci_conn_info *conn_info;
struct nci_data data;
int r;
u8 pipe = ndev->hci_dev->gate2pipe[gate];
@@ -627,7 +625,7 @@ int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
data.data = &idx;
data.data_len = 1;
- r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
+ r = nci_request(ndev, nci_hci_send_data_req, &data,
msecs_to_jiffies(NCI_DATA_TIMEOUT));
if (r == NCI_STATUS_OK) {
@@ -697,7 +695,7 @@ EXPORT_SYMBOL(nci_hci_connect_gate);
static int nci_hci_dev_connect_gates(struct nci_dev *ndev,
u8 gate_count,
- struct nci_hci_gate *gates)
+ const struct nci_hci_gate *gates)
{
int r;
@@ -714,7 +712,7 @@ static int nci_hci_dev_connect_gates(struct nci_dev *ndev,
int nci_hci_dev_session_init(struct nci_dev *ndev)
{
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info;
struct sk_buff *skb;
int r;
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 98af04c86b2c..c5eacaac41ae 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -28,10 +28,10 @@
/* Handle NCI Notification packets */
static void nci_core_reset_ntf_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
/* Handle NCI 2.x core reset notification */
- struct nci_core_reset_ntf *ntf = (void *)skb->data;
+ const struct nci_core_reset_ntf *ntf = (void *)skb->data;
ndev->nci_ver = ntf->nci_ver;
pr_debug("nci_ver 0x%x, config_status 0x%x\n",
@@ -48,7 +48,7 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev,
struct sk_buff *skb)
{
struct nci_core_conn_credit_ntf *ntf = (void *) skb->data;
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info;
int i;
pr_debug("num_entries %d\n", ntf->num_entries);
@@ -80,7 +80,7 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev,
}
static void nci_core_generic_error_ntf_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
__u8 status = skb->data[0];
@@ -107,9 +107,10 @@ static void nci_core_conn_intf_error_ntf_packet(struct nci_dev *ndev,
nci_data_exchange_complete(ndev, NULL, ntf->conn_id, -EIO);
}
-static __u8 *nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev,
- struct rf_tech_specific_params_nfca_poll *nfca_poll,
- __u8 *data)
+static const __u8 *
+nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev,
+ struct rf_tech_specific_params_nfca_poll *nfca_poll,
+ const __u8 *data)
{
nfca_poll->sens_res = __le16_to_cpu(*((__le16 *)data));
data += 2;
@@ -134,9 +135,10 @@ static __u8 *nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev,
return data;
}
-static __u8 *nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev,
- struct rf_tech_specific_params_nfcb_poll *nfcb_poll,
- __u8 *data)
+static const __u8 *
+nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev,
+ struct rf_tech_specific_params_nfcb_poll *nfcb_poll,
+ const __u8 *data)
{
nfcb_poll->sensb_res_len = min_t(__u8, *data++, NFC_SENSB_RES_MAXSIZE);
@@ -148,9 +150,10 @@ static __u8 *nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev,
return data;
}
-static __u8 *nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev,
- struct rf_tech_specific_params_nfcf_poll *nfcf_poll,
- __u8 *data)
+static const __u8 *
+nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev,
+ struct rf_tech_specific_params_nfcf_poll *nfcf_poll,
+ const __u8 *data)
{
nfcf_poll->bit_rate = *data++;
nfcf_poll->sensf_res_len = min_t(__u8, *data++, NFC_SENSF_RES_MAXSIZE);
@@ -164,9 +167,10 @@ static __u8 *nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev,
return data;
}
-static __u8 *nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev,
- struct rf_tech_specific_params_nfcv_poll *nfcv_poll,
- __u8 *data)
+static const __u8 *
+nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev,
+ struct rf_tech_specific_params_nfcv_poll *nfcv_poll,
+ const __u8 *data)
{
++data;
nfcv_poll->dsfid = *data++;
@@ -175,9 +179,10 @@ static __u8 *nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev,
return data;
}
-static __u8 *nci_extract_rf_params_nfcf_passive_listen(struct nci_dev *ndev,
- struct rf_tech_specific_params_nfcf_listen *nfcf_listen,
- __u8 *data)
+static const __u8 *
+nci_extract_rf_params_nfcf_passive_listen(struct nci_dev *ndev,
+ struct rf_tech_specific_params_nfcf_listen *nfcf_listen,
+ const __u8 *data)
{
nfcf_listen->local_nfcid2_len = min_t(__u8, *data++,
NFC_NFCID2_MAXSIZE);
@@ -198,12 +203,12 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
struct nfc_target *target,
__u8 rf_protocol,
__u8 rf_tech_and_mode,
- void *params)
+ const void *params)
{
- struct rf_tech_specific_params_nfca_poll *nfca_poll;
- struct rf_tech_specific_params_nfcb_poll *nfcb_poll;
- struct rf_tech_specific_params_nfcf_poll *nfcf_poll;
- struct rf_tech_specific_params_nfcv_poll *nfcv_poll;
+ const struct rf_tech_specific_params_nfca_poll *nfca_poll;
+ const struct rf_tech_specific_params_nfcb_poll *nfcb_poll;
+ const struct rf_tech_specific_params_nfcf_poll *nfcf_poll;
+ const struct rf_tech_specific_params_nfcv_poll *nfcv_poll;
__u32 protocol;
if (rf_protocol == NCI_RF_PROTOCOL_T1T)
@@ -274,7 +279,7 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
}
static void nci_add_new_target(struct nci_dev *ndev,
- struct nci_rf_discover_ntf *ntf)
+ const struct nci_rf_discover_ntf *ntf)
{
struct nfc_target *target;
int i, rc;
@@ -319,10 +324,10 @@ void nci_clear_target_list(struct nci_dev *ndev)
}
static void nci_rf_discover_ntf_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
struct nci_rf_discover_ntf ntf;
- __u8 *data = skb->data;
+ const __u8 *data = skb->data;
bool add_target = true;
ntf.rf_discovery_id = *data++;
@@ -382,7 +387,8 @@ static void nci_rf_discover_ntf_packet(struct nci_dev *ndev,
}
static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev,
- struct nci_rf_intf_activated_ntf *ntf, __u8 *data)
+ struct nci_rf_intf_activated_ntf *ntf,
+ const __u8 *data)
{
struct activation_params_nfca_poll_iso_dep *nfca_poll;
struct activation_params_nfcb_poll_iso_dep *nfcb_poll;
@@ -418,7 +424,8 @@ static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev,
}
static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev,
- struct nci_rf_intf_activated_ntf *ntf, __u8 *data)
+ struct nci_rf_intf_activated_ntf *ntf,
+ const __u8 *data)
{
struct activation_params_poll_nfc_dep *poll;
struct activation_params_listen_nfc_dep *listen;
@@ -454,7 +461,7 @@ static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev,
}
static void nci_target_auto_activated(struct nci_dev *ndev,
- struct nci_rf_intf_activated_ntf *ntf)
+ const struct nci_rf_intf_activated_ntf *ntf)
{
struct nfc_target *target;
int rc;
@@ -477,7 +484,7 @@ static void nci_target_auto_activated(struct nci_dev *ndev,
}
static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev,
- struct nci_rf_intf_activated_ntf *ntf)
+ const struct nci_rf_intf_activated_ntf *ntf)
{
ndev->remote_gb_len = 0;
@@ -519,11 +526,11 @@ static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev,
}
static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info;
struct nci_rf_intf_activated_ntf ntf;
- __u8 *data = skb->data;
+ const __u8 *data = skb->data;
int err = NCI_STATUS_OK;
ntf.rf_discovery_id = *data++;
@@ -681,10 +688,10 @@ listen:
}
static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
- struct nci_conn_info *conn_info;
- struct nci_rf_deactivate_ntf *ntf = (void *) skb->data;
+ const struct nci_conn_info *conn_info;
+ const struct nci_rf_deactivate_ntf *ntf = (void *)skb->data;
pr_debug("entry, type 0x%x, reason 0x%x\n", ntf->type, ntf->reason);
@@ -725,10 +732,10 @@ static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev,
}
static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
u8 status = NCI_STATUS_OK;
- struct nci_nfcee_discover_ntf *nfcee_ntf =
+ const struct nci_nfcee_discover_ntf *nfcee_ntf =
(struct nci_nfcee_discover_ntf *)skb->data;
pr_debug("\n");
@@ -745,7 +752,7 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
}
static void nci_nfcee_action_ntf_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
pr_debug("\n");
}
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index e9605922a322..a2e72c003805 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -25,9 +25,10 @@
/* Handle NCI Response packets */
-static void nci_core_reset_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
+static void nci_core_reset_rsp_packet(struct nci_dev *ndev,
+ const struct sk_buff *skb)
{
- struct nci_core_reset_rsp *rsp = (void *) skb->data;
+ const struct nci_core_reset_rsp *rsp = (void *)skb->data;
pr_debug("status 0x%x\n", rsp->status);
@@ -43,10 +44,11 @@ static void nci_core_reset_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
}
}
-static u8 nci_core_init_rsp_packet_v1(struct nci_dev *ndev, struct sk_buff *skb)
+static u8 nci_core_init_rsp_packet_v1(struct nci_dev *ndev,
+ const struct sk_buff *skb)
{
- struct nci_core_init_rsp_1 *rsp_1 = (void *) skb->data;
- struct nci_core_init_rsp_2 *rsp_2;
+ const struct nci_core_init_rsp_1 *rsp_1 = (void *)skb->data;
+ const struct nci_core_init_rsp_2 *rsp_2;
pr_debug("status 0x%x\n", rsp_1->status);
@@ -81,10 +83,11 @@ static u8 nci_core_init_rsp_packet_v1(struct nci_dev *ndev, struct sk_buff *skb)
return NCI_STATUS_OK;
}
-static u8 nci_core_init_rsp_packet_v2(struct nci_dev *ndev, struct sk_buff *skb)
+static u8 nci_core_init_rsp_packet_v2(struct nci_dev *ndev,
+ const struct sk_buff *skb)
{
- struct nci_core_init_rsp_nci_ver2 *rsp = (void *)skb->data;
- u8 *supported_rf_interface = rsp->supported_rf_interfaces;
+ const struct nci_core_init_rsp_nci_ver2 *rsp = (void *)skb->data;
+ const u8 *supported_rf_interface = rsp->supported_rf_interfaces;
u8 rf_interface_idx = 0;
u8 rf_extension_cnt = 0;
@@ -118,7 +121,7 @@ static u8 nci_core_init_rsp_packet_v2(struct nci_dev *ndev, struct sk_buff *skb)
return NCI_STATUS_OK;
}
-static void nci_core_init_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
+static void nci_core_init_rsp_packet(struct nci_dev *ndev, const struct sk_buff *skb)
{
u8 status = 0;
@@ -160,9 +163,9 @@ exit:
}
static void nci_core_set_config_rsp_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
- struct nci_core_set_config_rsp *rsp = (void *) skb->data;
+ const struct nci_core_set_config_rsp *rsp = (void *)skb->data;
pr_debug("status 0x%x\n", rsp->status);
@@ -170,7 +173,7 @@ static void nci_core_set_config_rsp_packet(struct nci_dev *ndev,
}
static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
__u8 status = skb->data[0];
@@ -179,9 +182,10 @@ static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev,
nci_req_complete(ndev, status);
}
-static void nci_rf_disc_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
+static void nci_rf_disc_rsp_packet(struct nci_dev *ndev,
+ const struct sk_buff *skb)
{
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info;
__u8 status = skb->data[0];
pr_debug("status 0x%x\n", status);
@@ -210,7 +214,7 @@ exit:
}
static void nci_rf_disc_select_rsp_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
__u8 status = skb->data[0];
@@ -222,7 +226,7 @@ static void nci_rf_disc_select_rsp_packet(struct nci_dev *ndev,
}
static void nci_rf_deactivate_rsp_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
__u8 status = skb->data[0];
@@ -238,9 +242,9 @@ static void nci_rf_deactivate_rsp_packet(struct nci_dev *ndev,
}
static void nci_nfcee_discover_rsp_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
- struct nci_nfcee_discover_rsp *discover_rsp;
+ const struct nci_nfcee_discover_rsp *discover_rsp;
if (skb->len != 2) {
nci_req_complete(ndev, NCI_STATUS_NFCEE_PROTOCOL_ERROR);
@@ -255,7 +259,7 @@ static void nci_nfcee_discover_rsp_packet(struct nci_dev *ndev,
}
static void nci_nfcee_mode_set_rsp_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
__u8 status = skb->data[0];
@@ -264,11 +268,11 @@ static void nci_nfcee_mode_set_rsp_packet(struct nci_dev *ndev,
}
static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
__u8 status = skb->data[0];
struct nci_conn_info *conn_info = NULL;
- struct nci_core_conn_create_rsp *rsp;
+ const struct nci_core_conn_create_rsp *rsp;
pr_debug("status 0x%x\n", status);
@@ -319,7 +323,7 @@ exit:
}
static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
struct nci_conn_info *conn_info;
__u8 status = skb->data[0];
diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c
index 7d8e10e27c20..0935527d1d12 100644
--- a/net/nfc/nci/spi.c
+++ b/net/nfc/nci/spi.c
@@ -27,7 +27,7 @@
#define CRC_INIT 0xFFFF
-static int __nci_spi_send(struct nci_spi *nspi, struct sk_buff *skb,
+static int __nci_spi_send(struct nci_spi *nspi, const struct sk_buff *skb,
int cs_change)
{
struct spi_message m;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 722f7ef891e1..49089c50872e 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -530,7 +530,7 @@ free_msg:
int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx)
{
- struct nfc_se *se;
+ const struct nfc_se *se;
struct sk_buff *msg;
void *hdr;
@@ -1531,7 +1531,7 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
struct genl_info *info)
{
struct nfc_dev *dev;
- struct nfc_vendor_cmd *cmd;
+ const struct nfc_vendor_cmd *cmd;
u32 dev_idx, vid, subcmd;
u8 *data;
size_t data_len;
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 889fefd64e56..de2ec66d7e83 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -48,7 +48,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
u8 comm_mode, u8 rf_mode);
int nfc_llcp_register_device(struct nfc_dev *dev);
void nfc_llcp_unregister_device(struct nfc_dev *dev);
-int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len);
+int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len);
u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len);
int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb);
struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 5e39640becdb..0ca214ab5aef 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -140,7 +140,7 @@ static void rawsock_data_exchange_complete(void *context, struct sk_buff *skb,
{
struct sock *sk = (struct sock *) context;
- BUG_ON(in_irq());
+ BUG_ON(in_hardirq());
pr_debug("sk=%p err=%d\n", sk, err);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index ef15d9eb4774..076774034bb9 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -924,7 +924,13 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
break;
case OVS_USERSPACE_ATTR_PID:
- upcall.portid = nla_get_u32(a);
+ if (dp->user_features &
+ OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
+ upcall.portid =
+ ovs_dp_get_upcall_portid(dp,
+ smp_processor_id());
+ else
+ upcall.portid = nla_get_u32(a);
break;
case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index bc164b35e67d..67ad08320886 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -133,6 +133,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
static void ovs_dp_masks_rebalance(struct work_struct *work);
+static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *);
+
/* Must be called with rcu_read_lock or ovs_mutex. */
const char *ovs_dp_name(const struct datapath *dp)
{
@@ -166,6 +168,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
free_percpu(dp->stats_percpu);
kfree(dp->ports);
ovs_meters_exit(dp);
+ kfree(rcu_dereference_raw(dp->upcall_portids));
kfree(dp);
}
@@ -239,7 +242,13 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_MISS;
- upcall.portid = ovs_vport_find_upcall_portid(p, skb);
+
+ if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
+ upcall.portid =
+ ovs_dp_get_upcall_portid(dp, smp_processor_id());
+ else
+ upcall.portid = ovs_vport_find_upcall_portid(p, skb);
+
upcall.mru = OVS_CB(skb)->mru;
error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
if (unlikely(error))
@@ -1594,16 +1603,70 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb,
DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
+static int ovs_dp_set_upcall_portids(struct datapath *dp,
+ const struct nlattr *ids)
+{
+ struct dp_nlsk_pids *old, *dp_nlsk_pids;
+
+ if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
+ return -EINVAL;
+
+ old = ovsl_dereference(dp->upcall_portids);
+
+ dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids),
+ GFP_KERNEL);
+ if (!dp_nlsk_pids)
+ return -ENOMEM;
+
+ dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32);
+ nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids));
+
+ rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids);
+
+ kfree_rcu(old, rcu);
+
+ return 0;
+}
+
+u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
+{
+ struct dp_nlsk_pids *dp_nlsk_pids;
+
+ dp_nlsk_pids = rcu_dereference(dp->upcall_portids);
+
+ if (dp_nlsk_pids) {
+ if (cpu_id < dp_nlsk_pids->n_pids) {
+ return dp_nlsk_pids->pids[cpu_id];
+ } else if (dp_nlsk_pids->n_pids > 0 &&
+ cpu_id >= dp_nlsk_pids->n_pids) {
+ /* If the number of netlink PIDs is mismatched with
+ * the number of CPUs as seen by the kernel, log this
+ * and send the upcall to an arbitrary socket (0) in
+ * order to not drop packets
+ */
+ pr_info_ratelimited("cpu_id mismatch with handler threads");
+ return dp_nlsk_pids->pids[cpu_id %
+ dp_nlsk_pids->n_pids];
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+}
+
static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
{
u32 user_features = 0;
+ int err;
if (a[OVS_DP_ATTR_USER_FEATURES]) {
user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
if (user_features & ~(OVS_DP_F_VPORT_PIDS |
OVS_DP_F_UNALIGNED |
- OVS_DP_F_TC_RECIRC_SHARING))
+ OVS_DP_F_TC_RECIRC_SHARING |
+ OVS_DP_F_DISPATCH_UPCALL_PER_CPU))
return -EOPNOTSUPP;
#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
@@ -1624,6 +1687,15 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
dp->user_features = user_features;
+ if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU &&
+ a[OVS_DP_ATTR_PER_CPU_PIDS]) {
+ /* Upcall Netlink Port IDs have been updated */
+ err = ovs_dp_set_upcall_portids(dp,
+ a[OVS_DP_ATTR_PER_CPU_PIDS]);
+ if (err)
+ return err;
+ }
+
if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
static_branch_enable(&tc_recirc_sharing_support);
else
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 38f7d3e66ca6..fcfe6cb46441 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -51,6 +51,21 @@ struct dp_stats_percpu {
};
/**
+ * struct dp_nlsk_pids - array of netlink portids of for a datapath.
+ * This is used when OVS_DP_F_DISPATCH_UPCALL_PER_CPU
+ * is enabled and must be protected by rcu.
+ * @rcu: RCU callback head for deferred destruction.
+ * @n_pids: Size of @pids array.
+ * @pids: Array storing the Netlink socket PIDs indexed by CPU ID for packets
+ * that miss the flow table.
+ */
+struct dp_nlsk_pids {
+ struct rcu_head rcu;
+ u32 n_pids;
+ u32 pids[];
+};
+
+/**
* struct datapath - datapath for flow-based packet switching
* @rcu: RCU callback head for deferred destruction.
* @list_node: Element in global 'dps' list.
@@ -61,6 +76,7 @@ struct dp_stats_percpu {
* @net: Reference to net namespace.
* @max_headroom: the maximum headroom of all vports in this datapath; it will
* be used by all the internal vports in this dp.
+ * @upcall_portids: RCU protected 'struct dp_nlsk_pids'.
*
* Context: See the comment on locking at the top of datapath.c for additional
* locking information.
@@ -87,6 +103,8 @@ struct datapath {
/* Switch meters. */
struct dp_meter_table meter_tbl;
+
+ struct dp_nlsk_pids __rcu *upcall_portids;
};
/**
@@ -243,6 +261,8 @@ int ovs_dp_upcall(struct datapath *, struct sk_buff *,
const struct sw_flow_key *, const struct dp_upcall_info *,
uint32_t cutlen);
+u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id);
+
const char *ovs_dp_name(const struct datapath *dp);
struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
u32 portid, u32 seq, u8 cmd);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index e586424d8b04..9713035b89e3 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -293,14 +293,14 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
}
/**
- * Parse vlan tag from vlan header.
+ * parse_vlan_tag - Parse vlan tag from vlan header.
* @skb: skb containing frame to parse
* @key_vh: pointer to parsed vlan tag
* @untag_vlan: should the vlan header be removed from the frame
*
- * Returns ERROR on memory error.
- * Returns 0 if it encounters a non-vlan or incomplete packet.
- * Returns 1 after successfully parsing vlan tag.
+ * Return: ERROR on memory error.
+ * %0 if it encounters a non-vlan or incomplete packet.
+ * %1 after successfully parsing vlan tag.
*/
static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh,
bool untag_vlan)
@@ -532,6 +532,7 @@ static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
* L3 header
* @key: output flow key
*
+ * Return: %0 if successful, otherwise a negative errno value.
*/
static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
{
@@ -748,8 +749,6 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
*
* The caller must ensure that skb->len >= ETH_HLEN.
*
- * Returns 0 if successful, otherwise a negative errno value.
- *
* Initializes @skb header fields as follows:
*
* - skb->mac_header: the L2 header.
@@ -764,6 +763,8 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
*
* - skb->protocol: the type of the data starting at skb->network_header.
* Equals to key->eth.type.
+ *
+ * Return: %0 if successful, otherwise a negative errno value.
*/
static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
{
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 88deb5b41429..cf2ce5812489 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -507,6 +507,7 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
}
skb->dev = vport->dev;
+ skb->tstamp = 0;
vport->ops->send(skb);
return;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 57a1971f29e5..543365f58e97 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -250,8 +250,7 @@ static struct net_device *packet_cached_dev_get(struct packet_sock *po)
rcu_read_lock();
dev = rcu_dereference(po->cached_dev);
- if (likely(dev))
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
return dev;
@@ -3024,8 +3023,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
out_free:
kfree_skb(skb);
out_unlock:
- if (dev)
- dev_put(dev);
+ dev_put(dev);
out:
return err;
}
@@ -3158,8 +3156,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
}
}
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
proto_curr = po->prot_hook.type;
dev_curr = po->prot_hook.dev;
@@ -3196,8 +3193,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
packet_cached_dev_assign(po, dev);
}
}
- if (dev_curr)
- dev_put(dev_curr);
+ dev_put(dev_curr);
if (proto == 0 || !need_rehook)
goto out_unlock;
@@ -4109,8 +4105,7 @@ static int packet_notifier(struct notifier_block *this,
if (msg == NETDEV_UNREGISTER) {
packet_cached_dev_reset(po);
WRITE_ONCE(po->ifindex, -1);
- if (po->prot_hook.dev)
- dev_put(po->prot_hook.dev);
+ dev_put(po->prot_hook.dev);
po->prot_hook.dev = NULL;
}
spin_unlock(&po->bind_lock);
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index ca6ae4c59433..65218b7ce9f9 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -275,8 +275,7 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
drop:
kfree_skb(skb);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
return err;
}
EXPORT_SYMBOL(pn_skb_send);
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index ac0fae06cc15..cde671d29d5d 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -122,8 +122,7 @@ struct net_device *phonet_device_get(struct net *net)
break;
dev = NULL;
}
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
return dev;
}
@@ -233,11 +232,11 @@ static int phonet_device_autoconf(struct net_device *dev)
struct if_phonet_req req;
int ret;
- if (!dev->netdev_ops->ndo_do_ioctl)
+ if (!dev->netdev_ops->ndo_siocdevprivate)
return -EOPNOTSUPP;
- ret = dev->netdev_ops->ndo_do_ioctl(dev, (struct ifreq *)&req,
- SIOCPNGAUTOCONF);
+ ret = dev->netdev_ops->ndo_siocdevprivate(dev, (struct ifreq *)&req,
+ NULL, SIOCPNGAUTOCONF);
if (ret < 0)
return ret;
@@ -411,8 +410,7 @@ struct net_device *phonet_route_output(struct net *net, u8 daddr)
daddr >>= 2;
rcu_read_lock();
dev = rcu_dereference(routes->table[daddr]);
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
if (!dev)
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 2599235d592e..71e2caf6ab85 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -379,8 +379,7 @@ static int pn_socket_ioctl(struct socket *sock, unsigned int cmd,
saddr = PN_NO_ADDR;
release_sock(sk);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
if (saddr == PN_NO_ADDR)
return -EHOSTUNREACH;
diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
index 1dc955ca57d3..fa611678af05 100644
--- a/net/qrtr/mhi.c
+++ b/net/qrtr/mhi.c
@@ -15,7 +15,6 @@ struct qrtr_mhi_dev {
struct qrtr_endpoint ep;
struct mhi_device *mhi_dev;
struct device *dev;
- struct completion ready;
};
/* From MHI to QRTR */
@@ -51,10 +50,6 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep);
int rc;
- rc = wait_for_completion_interruptible(&qdev->ready);
- if (rc)
- goto free_skb;
-
if (skb->sk)
sock_hold(skb->sk);
@@ -84,7 +79,7 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
int rc;
/* start channels */
- rc = mhi_prepare_for_transfer(mhi_dev, 0);
+ rc = mhi_prepare_for_transfer(mhi_dev);
if (rc)
return rc;
@@ -101,15 +96,6 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
if (rc)
return rc;
- /* start channels */
- rc = mhi_prepare_for_transfer(mhi_dev, MHI_CH_INBOUND_ALLOC_BUFS);
- if (rc) {
- qrtr_endpoint_unregister(&qdev->ep);
- dev_set_drvdata(&mhi_dev->dev, NULL);
- return rc;
- }
-
- complete_all(&qdev->ready);
dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n");
return 0;
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 171b7f3be6ef..525e3ea063b1 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -493,7 +493,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
goto err;
}
- if (len != ALIGN(size, 4) + hdrlen)
+ if (!size || size & 3 || len != size + hdrlen)
goto err;
if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA &&
@@ -506,8 +506,12 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
if (cb->type == QRTR_TYPE_NEW_SERVER) {
/* Remote node endpoint can bridge other distant nodes */
- const struct qrtr_ctrl_pkt *pkt = data + hdrlen;
+ const struct qrtr_ctrl_pkt *pkt;
+ if (size < sizeof(*pkt))
+ goto err;
+
+ pkt = data + hdrlen;
qrtr_node_assign(node, le32_to_cpu(pkt->server.node));
}
@@ -1157,14 +1161,14 @@ static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
rc = put_user(len, (int __user *)argp);
break;
case SIOCGIFADDR:
- if (copy_from_user(&ifr, argp, sizeof(ifr))) {
+ if (get_user_ifreq(&ifr, NULL, argp)) {
rc = -EFAULT;
break;
}
sq = (struct sockaddr_qrtr *)&ifr.ifr_addr;
*sq = ipc->us;
- if (copy_to_user(argp, &ifr, sizeof(ifr))) {
+ if (put_user_ifreq(&ifr, argp)) {
rc = -EFAULT;
break;
}
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 9b6ffff72f2d..28c1b0022178 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -131,9 +131,9 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
cpu_relax();
}
- ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len,
+ ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len,
&off, PAGE_SIZE);
- if (unlikely(ret != ibmr->sg_len))
+ if (unlikely(ret != ibmr->sg_dma_len))
return ret < 0 ? ret : -EINVAL;
if (cmpxchg(&frmr->fr_state,
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index 0885b22e5c0e..accd35c05577 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -21,6 +21,8 @@ config AF_RXRPC
See Documentation/networking/rxrpc.rst.
+if AF_RXRPC
+
config AF_RXRPC_IPV6
bool "IPv6 support for RxRPC"
depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC)
@@ -30,7 +32,6 @@ config AF_RXRPC_IPV6
config AF_RXRPC_INJECT_LOSS
bool "Inject packet loss into RxRPC packet stream"
- depends on AF_RXRPC
help
Say Y here to inject packet loss by discarding some received and some
transmitted packets.
@@ -38,7 +39,6 @@ config AF_RXRPC_INJECT_LOSS
config AF_RXRPC_DEBUG
bool "RxRPC dynamic debugging"
- depends on AF_RXRPC
help
Say Y here to make runtime controllable debugging messages appear.
@@ -47,7 +47,6 @@ config AF_RXRPC_DEBUG
config RXKAD
bool "RxRPC Kerberos security"
- depends on AF_RXRPC
select CRYPTO
select CRYPTO_MANAGER
select CRYPTO_SKCIPHER
@@ -58,3 +57,5 @@ config RXKAD
through the use of the key retention service.
See Documentation/networking/rxrpc.rst.
+
+endif
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index d17a66aab8ee..7dd3a2dc5fa4 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -495,7 +495,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
p->tcfa_tm.install = jiffies;
p->tcfa_tm.lastuse = jiffies;
p->tcfa_tm.firstuse = 0;
- p->tcfa_flags = flags;
+ p->tcfa_flags = flags & TCA_ACT_FLAGS_USER_MASK;
if (est) {
err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
&p->tcfa_rate_est,
@@ -941,7 +941,7 @@ void tcf_idr_insert_many(struct tc_action *actions[])
}
}
-struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
+struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
bool rtnl_held,
struct netlink_ext_ack *extack)
{
@@ -951,7 +951,7 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
struct nlattr *kind;
int err;
- if (name == NULL) {
+ if (!police) {
err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
tcf_action_policy, extack);
if (err < 0)
@@ -967,7 +967,7 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
return ERR_PTR(err);
}
} else {
- if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
+ if (strlcpy(act_name, "police", IFNAMSIZ) >= IFNAMSIZ) {
NL_SET_ERR_MSG(extack, "TC action name too long");
return ERR_PTR(-EINVAL);
}
@@ -1004,12 +1004,11 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
- char *name, int ovr, int bind,
struct tc_action_ops *a_o, int *init_res,
- bool rtnl_held,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
- struct nla_bitfield32 flags = { 0, 0 };
+ bool police = flags & TCA_ACT_FLAGS_POLICE;
+ struct nla_bitfield32 userflags = { 0, 0 };
u8 hw_stats = TCA_ACT_HW_STATS_ANY;
struct nlattr *tb[TCA_ACT_MAX + 1];
struct tc_cookie *cookie = NULL;
@@ -1017,7 +1016,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
int err;
/* backward compatibility for policer */
- if (name == NULL) {
+ if (!police) {
err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
tcf_action_policy, extack);
if (err < 0)
@@ -1032,22 +1031,22 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
}
hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]);
if (tb[TCA_ACT_FLAGS])
- flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
+ userflags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
- err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
- rtnl_held, tp, flags.value, extack);
+ err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, tp,
+ userflags.value | flags, extack);
} else {
- err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
- tp, flags.value, extack);
+ err = a_o->init(net, nla, est, &a, tp, userflags.value | flags,
+ extack);
}
if (err < 0)
goto err_out;
*init_res = err;
- if (!name && tb[TCA_ACT_COOKIE])
+ if (!police && tb[TCA_ACT_COOKIE])
tcf_set_action_cookie(&a->act_cookie, cookie);
- if (!name)
+ if (!police)
a->hw_stats = hw_stats;
return a;
@@ -1063,9 +1062,9 @@ err_out:
/* Returns numbers of initialized actions or negative error. */
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
- struct nlattr *est, char *name, int ovr, int bind,
- struct tc_action *actions[], int init_res[], size_t *attr_size,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ struct nlattr *est, struct tc_action *actions[],
+ int init_res[], size_t *attr_size, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {};
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
@@ -1082,7 +1081,9 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
- a_o = tc_action_load_ops(name, tb[i], rtnl_held, extack);
+ a_o = tc_action_load_ops(tb[i], flags & TCA_ACT_FLAGS_POLICE,
+ !(flags & TCA_ACT_FLAGS_NO_RTNL),
+ extack);
if (IS_ERR(a_o)) {
err = PTR_ERR(a_o);
goto err_mod;
@@ -1091,9 +1092,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
- act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
- ops[i - 1], &init_res[i - 1], rtnl_held,
- extack);
+ act = tcf_action_init_1(net, tp, tb[i], est, ops[i - 1],
+ &init_res[i - 1], flags, extack);
if (IS_ERR(act)) {
err = PTR_ERR(act);
goto err;
@@ -1113,7 +1113,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
goto err_mod;
err:
- tcf_action_destroy(actions, bind);
+ tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND);
err_mod:
for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
if (ops[i])
@@ -1351,8 +1351,6 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
module_put(ops->owner);
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
- if (err > 0)
- return 0;
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification");
@@ -1423,8 +1421,6 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
- if (ret > 0)
- return 0;
return ret;
}
@@ -1481,7 +1477,6 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
- int err = 0;
skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
GFP_KERNEL);
@@ -1495,15 +1490,12 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
return -EINVAL;
}
- err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
- if (err > 0)
- err = 0;
- return err;
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
}
static int tcf_action_add(struct net *net, struct nlattr *nla,
- struct nlmsghdr *n, u32 portid, int ovr,
+ struct nlmsghdr *n, u32 portid, u32 flags,
struct netlink_ext_ack *extack)
{
size_t attr_size = 0;
@@ -1512,8 +1504,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
int init_res[TCA_ACT_MAX_PRIO] = {};
for (loop = 0; loop < 10; loop++) {
- ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0,
- actions, init_res, &attr_size, true, extack);
+ ret = tcf_action_init(net, NULL, nla, NULL, actions, init_res,
+ &attr_size, flags, extack);
if (ret != -EAGAIN)
break;
}
@@ -1543,7 +1535,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_ROOT_MAX + 1];
u32 portid = NETLINK_CB(skb).portid;
- int ret = 0, ovr = 0;
+ u32 flags = 0;
+ int ret = 0;
if ((n->nlmsg_type != RTM_GETACTION) &&
!netlink_capable(skb, CAP_NET_ADMIN))
@@ -1569,8 +1562,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
* is zero) then just set this
*/
if (n->nlmsg_flags & NLM_F_REPLACE)
- ovr = 1;
- ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
+ flags = TCA_ACT_FLAGS_REPLACE;
+ ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, flags,
extack);
break;
case RTM_DELACTION:
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index e409a0005717..5c36013339e1 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -47,11 +47,11 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
if (at_ingress) {
__skb_push(skb, skb->mac_len);
bpf_compute_data_pointers(skb);
- filter_res = BPF_PROG_RUN(filter, skb);
+ filter_res = bpf_prog_run(filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
bpf_compute_data_pointers(skb);
- filter_res = BPF_PROG_RUN(filter, skb);
+ filter_res = bpf_prog_run(filter, skb);
}
if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK)
skb_orphan(skb);
@@ -275,11 +275,11 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act,
- int replace, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tcf_bpf_cfg cfg, old;
@@ -317,7 +317,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (bind)
return 0;
- if (!replace) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*act, bind);
return -EEXIST;
}
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index e19885d7fe2c..94e78ac7a748 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -96,12 +96,12 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
struct nlattr *tb[TCA_CONNMARK_MAX + 1];
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_chain *goto_ch = NULL;
struct tcf_connmark_info *ci;
struct tc_connmark *parm;
@@ -144,7 +144,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
ci = to_connmark(*a);
if (bind)
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 4fa4fcb842ba..a15ec95e69c3 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -41,11 +41,12 @@ static unsigned int csum_net_id;
static struct tc_action_ops act_csum_ops;
static int tcf_csum_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held, struct tcf_proto *tp,
+ struct nlattr *est, struct tc_action **a,
+ struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_csum_params *params_new;
struct nlattr *tb[TCA_CSUM_MAX + 1];
struct tcf_chain *goto_ch = NULL;
@@ -78,7 +79,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
} else if (err > 0) {
if (bind)/* dont override defaults */
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 1b4b3514c94f..ad9df0cb4b98 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1235,11 +1235,11 @@ static int tcf_ct_fill_params(struct net *net,
static int tcf_ct_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int replace, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ct_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_ct_params *params = NULL;
struct nlattr *tb[TCA_CT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
@@ -1279,7 +1279,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
if (bind)
return 0;
- if (!replace) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index b20c8ce59905..549374a2d008 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -154,11 +154,11 @@ static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = {
static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
u32 dscpmask = 0, dscpstatemask, index;
struct nlattr *tb[TCA_CTINFO_MAX + 1];
struct tcf_ctinfo_params *cp_new;
@@ -221,7 +221,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
} else if (err > 0) {
if (bind) /* don't override defaults */
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 73c3926358a0..d8dce173df37 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -52,11 +52,11 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_GACT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_gact *parm;
@@ -109,7 +109,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
} else if (err > 0) {
if (bind)/* dont override defaults */
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index a78cb7965718..7df72a4197a3 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -295,12 +295,12 @@ static void gate_setup_timer(struct tcf_gate *gact, u64 basetime,
static int tcf_gate_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gate_net_id);
enum tk_offsets tk_offset = TK_OFFS_TAI;
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_GATE_MAX + 1];
struct tcf_chain *goto_ch = NULL;
u64 cycletime = 0, basetime = 0;
@@ -364,7 +364,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index a2ddea04183a..7064a365a1a9 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -479,11 +479,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
static int tcf_ife_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_IFE_MAX + 1];
struct nlattr *tb2[IFE_META_MAX + 1];
struct tcf_chain *goto_ch = NULL;
@@ -532,7 +532,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
kfree(p);
return err;
}
- err = load_metalist(tb2, rtnl_held);
+ err = load_metalist(tb2, !(flags & TCA_ACT_FLAGS_NO_RTNL));
if (err) {
kfree(p);
return err;
@@ -560,7 +560,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
return ret;
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
kfree(p);
return -EEXIST;
@@ -600,7 +600,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}
if (tb[TCA_IFE_METALST]) {
- err = populate_metalist(ife, tb2, exists, rtnl_held);
+ err = populate_metalist(ife, tb2, exists,
+ !(flags & TCA_ACT_FLAGS_NO_RTNL));
if (err)
goto metadata_parse_err;
} else {
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index ac7297f42355..265b1443e252 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -94,10 +94,11 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- const struct tc_action_ops *ops, int ovr, int bind,
+ const struct tc_action_ops *ops,
struct tcf_proto *tp, u32 flags)
{
struct tc_action_net *tn = net_generic(net, id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_IPT_MAX + 1];
struct tcf_ipt *ipt;
struct xt_entry_target *td, *t;
@@ -154,7 +155,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
if (bind)/* dont override defaults */
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
@@ -201,21 +202,21 @@ err1:
}
static int tcf_ipt_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held, struct tcf_proto *tp,
+ struct nlattr *est, struct tc_action **a,
+ struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
- return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
- bind, tp, flags);
+ return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops,
+ tp, flags);
}
static int tcf_xt_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool unlocked, struct tcf_proto *tp,
+ struct nlattr *est, struct tc_action **a,
+ struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
- return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
- bind, tp, flags);
+ return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops,
+ tp, flags);
}
static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 7153c67f641e..d64b0eeccbe4 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -78,8 +78,7 @@ static void tcf_mirred_release(struct tc_action *a)
/* last reference to action, no need to lock */
dev = rcu_dereference_protected(m->tcfm_dev, 1);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
}
static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
@@ -91,11 +90,11 @@ static struct tc_action_ops act_mirred_ops;
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_MIRRED_MAX + 1];
struct tcf_chain *goto_ch = NULL;
bool mac_header_xmit = false;
@@ -155,7 +154,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
return ret;
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
@@ -180,8 +179,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
mac_header_xmit = dev_is_mac_header_xmit(dev);
dev = rcu_replace_pointer(m->tcfm_dev, dev,
lockdep_is_held(&m->tcf_lock));
- if (dev)
- dev_put(dev);
+ dev_put(dev);
m->tcfm_mac_header_xmit = mac_header_xmit;
}
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
@@ -273,6 +271,9 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
goto out;
}
+ /* All mirred/redirected skbs should clear previous ct info */
+ nf_reset_ct(skb2);
+
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
expects_nh = want_ingress || !m_mac_header_xmit;
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index d1486ea496a2..e4529b428cf4 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -152,11 +152,11 @@ static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = {
static int tcf_mpls_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mpls_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_MPLS_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tcf_mpls_params *p;
@@ -255,7 +255,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 1ebd2a86d980..7dd6b586ba7f 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -34,11 +34,11 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
};
static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
- struct tc_action **a, int ovr, int bind,
- bool rtnl_held, struct tcf_proto *tp,
+ struct tc_action **a, struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_NAT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_nat *parm;
@@ -70,7 +70,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
} else if (err > 0) {
if (bind)
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index b45304446e13..c6c862c459cc 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -136,11 +136,11 @@ nla_failure:
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_PEDIT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_pedit_key *keys = NULL;
@@ -198,7 +198,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
} else if (err > 0) {
if (bind)
goto out_free;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
ret = -EEXIST;
goto out_release;
}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0fab8de176d2..832157a840fc 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -48,11 +48,11 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
static int tcf_police_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
int ret = 0, tcfp_result = TC_ACT_OK, err, size;
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_POLICE_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_police *parm;
@@ -97,7 +97,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
ret = ACT_P_CREATED;
spin_lock_init(&(to_police(*a)->tcfp_lock));
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 6a0c16e4351d..230501eb9e06 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -34,11 +34,12 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
};
static int tcf_sample_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held, struct tcf_proto *tp,
+ struct nlattr *est, struct tc_action **a,
+ struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_SAMPLE_MAX + 1];
struct psample_group *psample_group;
u32 psample_group_num, rate, index;
@@ -75,7 +76,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
return ret;
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 726cc956d06f..cbbe1861d3a2 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -85,11 +85,11 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_DEF_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_defact *parm;
@@ -147,7 +147,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
tcf_action_set_ctrlact(*a, parm->action, goto_ch);
ret = ACT_P_CREATED;
} else {
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
err = -EEXIST;
goto release_idr;
}
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index e5f3fb8b00e3..605418538347 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -96,11 +96,11 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 act_flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+ bool bind = act_flags & TCA_ACT_FLAGS_BIND;
struct tcf_skbedit_params *params_new;
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
@@ -186,7 +186,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
ret = ACT_P_CREATED;
} else {
d = to_skbedit(*a);
- if (!ovr) {
+ if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 8d17a543cc9f..ecb9ee666095 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
+#include <net/inet_ecn.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
@@ -21,15 +22,13 @@
static unsigned int skbmod_net_id;
static struct tc_action_ops act_skbmod_ops;
-#define MAX_EDIT_LEN ETH_HLEN
static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_skbmod *d = to_skbmod(a);
- int action;
+ int action, max_edit_len, err;
struct tcf_skbmod_params *p;
u64 flags;
- int err;
tcf_lastuse_update(&d->tcf_tm);
bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
@@ -38,19 +37,34 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
if (unlikely(action == TC_ACT_SHOT))
goto drop;
- if (!skb->dev || skb->dev->type != ARPHRD_ETHER)
- return action;
+ max_edit_len = skb_mac_header_len(skb);
+ p = rcu_dereference_bh(d->skbmod_p);
+ flags = p->flags;
+
+ /* tcf_skbmod_init() guarantees "flags" to be one of the following:
+ * 1. a combination of SKBMOD_F_{DMAC,SMAC,ETYPE}
+ * 2. SKBMOD_F_SWAPMAC
+ * 3. SKBMOD_F_ECN
+ * SKBMOD_F_ECN only works with IP packets; all other flags only work with Ethernet
+ * packets.
+ */
+ if (flags == SKBMOD_F_ECN) {
+ switch (skb_protocol(skb, true)) {
+ case cpu_to_be16(ETH_P_IP):
+ case cpu_to_be16(ETH_P_IPV6):
+ max_edit_len += skb_network_header_len(skb);
+ break;
+ default:
+ goto out;
+ }
+ } else if (!skb->dev || skb->dev->type != ARPHRD_ETHER) {
+ goto out;
+ }
- /* XXX: if you are going to edit more fields beyond ethernet header
- * (example when you add IP header replacement or vlan swap)
- * then MAX_EDIT_LEN needs to change appropriately
- */
- err = skb_ensure_writable(skb, MAX_EDIT_LEN);
+ err = skb_ensure_writable(skb, max_edit_len);
if (unlikely(err)) /* best policy is to drop on the floor */
goto drop;
- p = rcu_dereference_bh(d->skbmod_p);
- flags = p->flags;
if (flags & SKBMOD_F_DMAC)
ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
if (flags & SKBMOD_F_SMAC)
@@ -66,6 +80,10 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr);
}
+ if (flags & SKBMOD_F_ECN)
+ INET_ECN_set_ce(skb);
+
+out:
return action;
drop:
@@ -82,11 +100,12 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+ bool ovr = flags & TCA_ACT_FLAGS_REPLACE;
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_SKBMOD_MAX + 1];
struct tcf_skbmod_params *p, *p_old;
struct tcf_chain *goto_ch = NULL;
@@ -129,6 +148,8 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
index = parm->index;
if (parm->flags & SKBMOD_F_SWAPMAC)
lflags = SKBMOD_F_SWAPMAC;
+ if (parm->flags & SKBMOD_F_ECN)
+ lflags = SKBMOD_F_ECN;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (err < 0)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 85c0d0d5b9da..d9cd174eecb7 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -355,11 +355,11 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 act_flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+ bool bind = act_flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
struct tcf_tunnel_key_params *params_new;
struct metadata_dst *metadata = NULL;
@@ -504,7 +504,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) {
NL_SET_ERR_MSG(extack, "TC IDR already exists");
ret = -EEXIST;
goto release_tun_meta;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 71f2015c70ca..e4dc5a555bd8 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -114,11 +114,11 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
static int tcf_vlan_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_VLAN_MAX + 1];
struct tcf_chain *goto_ch = NULL;
bool push_prio_exists = false;
@@ -223,7 +223,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index e3e79e9bd706..2ef8f5a6205a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -634,6 +634,7 @@ static void tcf_block_offload_init(struct flow_block_offload *bo,
bo->block_shared = shared;
bo->extack = extack;
bo->sch = sch;
+ bo->cb_list_head = &flow_block->cb_list;
INIT_LIST_HEAD(&bo->cb_list);
}
@@ -1577,21 +1578,11 @@ reset:
#endif
}
-int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+int tcf_classify(struct sk_buff *skb,
+ const struct tcf_block *block,
+ const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
{
- u32 last_executed_chain = 0;
-
- return __tcf_classify(skb, tp, tp, res, compat_mode,
- &last_executed_chain);
-}
-EXPORT_SYMBOL(tcf_classify);
-
-int tcf_classify_ingress(struct sk_buff *skb,
- const struct tcf_block *ingress_block,
- const struct tcf_proto *tp,
- struct tcf_result *res, bool compat_mode)
-{
#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
u32 last_executed_chain = 0;
@@ -1603,20 +1594,22 @@ int tcf_classify_ingress(struct sk_buff *skb,
struct tc_skb_ext *ext;
int ret;
- ext = skb_ext_find(skb, TC_SKB_EXT);
+ if (block) {
+ ext = skb_ext_find(skb, TC_SKB_EXT);
- if (ext && ext->chain) {
- struct tcf_chain *fchain;
+ if (ext && ext->chain) {
+ struct tcf_chain *fchain;
- fchain = tcf_chain_lookup_rcu(ingress_block, ext->chain);
- if (!fchain)
- return TC_ACT_SHOT;
+ fchain = tcf_chain_lookup_rcu(block, ext->chain);
+ if (!fchain)
+ return TC_ACT_SHOT;
- /* Consume, so cloned/redirect skbs won't inherit ext */
- skb_ext_del(skb, TC_SKB_EXT);
+ /* Consume, so cloned/redirect skbs won't inherit ext */
+ skb_ext_del(skb, TC_SKB_EXT);
- tp = rcu_dereference_bh(fchain->filter_chain);
- last_executed_chain = fchain->index;
+ tp = rcu_dereference_bh(fchain->filter_chain);
+ last_executed_chain = fchain->index;
+ }
}
ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
@@ -1635,7 +1628,7 @@ int tcf_classify_ingress(struct sk_buff *skb,
return ret;
#endif
}
-EXPORT_SYMBOL(tcf_classify_ingress);
+EXPORT_SYMBOL(tcf_classify);
struct tcf_chain_info {
struct tcf_proto __rcu **pprev;
@@ -1870,13 +1863,10 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
}
if (unicast)
- err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ err = rtnl_unicast(skb, net, portid);
else
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
-
- if (err > 0)
- err = 0;
return err;
}
@@ -1909,15 +1899,13 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
}
if (unicast)
- err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ err = rtnl_unicast(skb, net, portid);
else
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
- if (err > 0)
- err = 0;
return err;
}
@@ -1962,6 +1950,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
int err;
int tp_created;
bool rtnl_held = false;
+ u32 flags;
if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -1982,6 +1971,7 @@ replay:
tp = NULL;
cl = 0;
block = NULL;
+ flags = 0;
if (prio == 0) {
/* If no priority is provided by the user,
@@ -2125,9 +2115,12 @@ replay:
goto errout;
}
+ if (!(n->nlmsg_flags & NLM_F_CREATE))
+ flags |= TCA_ACT_FLAGS_REPLACE;
+ if (!rtnl_held)
+ flags |= TCA_ACT_FLAGS_NO_RTNL;
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
- n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
- rtnl_held, extack);
+ flags, extack);
if (err == 0) {
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_NEWTFILTER, false, rtnl_held);
@@ -2711,13 +2704,11 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
}
if (unicast)
- err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ err = rtnl_unicast(skb, net, portid);
else
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
flags & NLM_F_ECHO);
- if (err > 0)
- err = 0;
return err;
}
@@ -2741,7 +2732,7 @@ static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
}
if (unicast)
- return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ return rtnl_unicast(skb, net, portid);
return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
}
@@ -3035,8 +3026,8 @@ void tcf_exts_destroy(struct tcf_exts *exts)
EXPORT_SYMBOL(tcf_exts_destroy);
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
- struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ struct nlattr *rate_tlv, struct tcf_exts *exts,
+ u32 flags, struct netlink_ext_ack *extack)
{
#ifdef CONFIG_NET_CLS_ACT
{
@@ -3047,13 +3038,15 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
if (exts->police && tb[exts->police]) {
struct tc_action_ops *a_o;
- a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack);
+ a_o = tc_action_load_ops(tb[exts->police], true,
+ !(flags & TCA_ACT_FLAGS_NO_RTNL),
+ extack);
if (IS_ERR(a_o))
return PTR_ERR(a_o);
+ flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND;
act = tcf_action_init_1(net, tp, tb[exts->police],
- rate_tlv, "police", ovr,
- TCA_ACT_BIND, a_o, init_res,
- rtnl_held, extack);
+ rate_tlv, a_o, init_res, flags,
+ extack);
module_put(a_o->owner);
if (IS_ERR(act))
return PTR_ERR(act);
@@ -3065,10 +3058,10 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
} else if (exts->action && tb[exts->action]) {
int err;
+ flags |= TCA_ACT_FLAGS_BIND;
err = tcf_action_init(net, tp, tb[exts->action],
- rate_tlv, NULL, ovr, TCA_ACT_BIND,
- exts->actions, init_res,
- &attr_size, rtnl_held, extack);
+ rate_tlv, exts->actions, init_res,
+ &attr_size, flags, extack);
if (err < 0)
return err;
exts->nr_actions = err;
@@ -3832,7 +3825,7 @@ struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, stru
fl = rcu_dereference_bh(qe->filter_chain);
- switch (tcf_classify(skb, fl, &cl_res, false)) {
+ switch (tcf_classify(skb, NULL, fl, &cl_res, false)) {
case TC_ACT_SHOT:
qdisc_qstats_drop(sch);
__qdisc_drop(skb, to_free);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index f256a7c69093..8158fc9ee1ab 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -145,12 +145,12 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
static int basic_set_parms(struct net *net, struct tcf_proto *tp,
struct basic_filter *f, unsigned long base,
struct nlattr **tb,
- struct nlattr *est, bool ovr,
+ struct nlattr *est, u32 flags,
struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
if (err < 0)
return err;
@@ -169,8 +169,8 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
static int basic_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ struct nlattr **tca, void **arg,
+ u32 flags, struct netlink_ext_ack *extack)
{
int err;
struct basic_head *head = rtnl_dereference(tp->root);
@@ -216,7 +216,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
goto errout;
}
- err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr,
+ err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], flags,
extack);
if (err < 0) {
if (!fold)
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index fa739efa59f4..df19a847829e 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -96,11 +96,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
/* It is safe to push/pull even if skb_shared() */
__skb_push(skb, skb->mac_len);
bpf_compute_data_pointers(skb);
- filter_res = BPF_PROG_RUN(prog->filter, skb);
+ filter_res = bpf_prog_run(prog->filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
bpf_compute_data_pointers(skb);
- filter_res = BPF_PROG_RUN(prog->filter, skb);
+ filter_res = bpf_prog_run(prog->filter, skb);
}
if (prog->exts_integrated) {
@@ -404,7 +404,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_bpf_prog *prog, unsigned long base,
- struct nlattr **tb, struct nlattr *est, bool ovr,
+ struct nlattr **tb, struct nlattr *est, u32 flags,
struct netlink_ext_ack *extack)
{
bool is_bpf, is_ebpf, have_exts = false;
@@ -416,7 +416,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
return -EINVAL;
- ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true,
+ ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags,
extack);
if (ret < 0)
return ret;
@@ -455,7 +455,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -500,7 +500,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
goto errout;
prog->handle = handle;
- ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr,
+ ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags,
extack);
if (ret < 0)
goto errout_idr;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index fb881144fa01..ed00001b528a 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -76,7 +76,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work)
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_CGROUP_MAX + 1];
@@ -108,8 +108,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr,
- true, extack);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, flags,
+ extack);
if (err < 0)
goto errout;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 87398af2715a..972303aa8edd 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -387,7 +387,7 @@ static void flow_destroy_filter_work(struct work_struct *work)
static int flow_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
@@ -442,8 +442,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto err2;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
- true, extack);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, flags,
+ extack);
if (err < 0)
goto err2;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index d7869a984881..23b21253b3c3 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1915,23 +1915,22 @@ errout_cleanup:
static int fl_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_fl_filter *f, struct fl_flow_mask *mask,
unsigned long base, struct nlattr **tb,
- struct nlattr *est, bool ovr,
- struct fl_flow_tmplt *tmplt, bool rtnl_held,
+ struct nlattr *est,
+ struct fl_flow_tmplt *tmplt, u32 flags,
struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, rtnl_held,
- extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
if (err < 0)
return err;
if (tb[TCA_FLOWER_CLASSID]) {
f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
- if (!rtnl_held)
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
rtnl_lock();
tcf_bind_filter(tp, &f->res, base);
- if (!rtnl_held)
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
rtnl_unlock();
}
@@ -1975,10 +1974,11 @@ static int fl_ht_insert_unique(struct cls_fl_filter *fnew,
static int fl_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = fl_head_dereference(tp);
+ bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL);
struct cls_fl_filter *fold = *arg;
struct cls_fl_filter *fnew;
struct fl_flow_mask *mask;
@@ -2034,8 +2034,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
}
}
- err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr,
- tp->chain->tmplt_priv, rtnl_held, extack);
+ err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE],
+ tp->chain->tmplt_priv, flags, extack);
if (err)
goto errout;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index ec945294626a..8654b0ce997c 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -198,15 +198,15 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
static int fw_set_parms(struct net *net, struct tcf_proto *tp,
struct fw_filter *f, struct nlattr **tb,
- struct nlattr **tca, unsigned long base, bool ovr,
+ struct nlattr **tca, unsigned long base, u32 flags,
struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
u32 mask;
int err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr,
- true, extack);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, flags,
+ extack);
if (err < 0)
return err;
@@ -237,8 +237,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
static int fw_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca, void **arg,
- bool ovr, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = *arg;
@@ -277,7 +276,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
return err;
}
- err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr, extack);
+ err = fw_set_parms(net, tp, fnew, tb, tca, base, flags, extack);
if (err < 0) {
tcf_exts_destroy(&fnew->exts);
kfree(fnew);
@@ -326,7 +325,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
f->id = handle;
f->tp = tp;
- err = fw_set_parms(net, tp, f, tb, tca, base, ovr, extack);
+ err = fw_set_parms(net, tp, f, tb, tca, base, flags, extack);
if (err < 0)
goto errout;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index cafb84480bab..24f0046ce0b3 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -163,13 +163,12 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
static int mall_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_mall_head *head,
unsigned long base, struct nlattr **tb,
- struct nlattr *est, bool ovr,
+ struct nlattr *est, u32 flags,
struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true,
- extack);
+ err = tcf_exts_validate(net, tp, tb, est, &head->exts, flags, extack);
if (err < 0)
return err;
@@ -183,13 +182,13 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
static int mall_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
struct cls_mall_head *new;
- u32 flags = 0;
+ u32 userflags = 0;
int err;
if (!tca[TCA_OPTIONS])
@@ -204,8 +203,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
return err;
if (tb[TCA_MATCHALL_FLAGS]) {
- flags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]);
- if (!tc_flags_valid(flags))
+ userflags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]);
+ if (!tc_flags_valid(userflags))
return -EINVAL;
}
@@ -220,14 +219,14 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
if (!handle)
handle = 1;
new->handle = handle;
- new->flags = flags;
+ new->flags = userflags;
new->pf = alloc_percpu(struct tc_matchall_pcnt);
if (!new->pf) {
err = -ENOMEM;
goto err_alloc_percpu;
}
- err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr,
+ err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], flags,
extack);
if (err)
goto err_set_parms;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 5efa3e7ace15..a35ab8c27866 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -382,7 +382,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
unsigned long base, struct route4_filter *f,
u32 handle, struct route4_head *head,
struct nlattr **tb, struct nlattr *est, int new,
- bool ovr, struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
u32 id = 0, to = 0, nhandle = 0x8000;
struct route4_filter *fp;
@@ -390,7 +390,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
struct route4_bucket *b;
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
if (err < 0)
return err;
@@ -464,8 +464,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
static int route4_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ struct nlattr **tca, void **arg, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter __rcu **fp;
@@ -510,7 +510,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
}
err = route4_set_parms(net, tp, base, f, handle, head, tb,
- tca[TCA_RATE], new, ovr, extack);
+ tca[TCA_RATE], new, flags, extack);
if (err < 0)
goto errout;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 27a4b6dbcf57..5cd9d6b143c4 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -470,9 +470,8 @@ static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
static int rsvp_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
- u32 handle,
- struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ u32 handle, struct nlattr **tca,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
@@ -499,7 +498,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true,
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, flags,
extack);
if (err < 0)
goto errout2;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index e9a8a2c86bbd..742c7d49a958 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -330,7 +330,7 @@ static int
tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
u32 handle, struct tcindex_data *p,
struct tcindex_filter_result *r, struct nlattr **tb,
- struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
+ struct nlattr *est, u32 flags, struct netlink_ext_ack *extack)
{
struct tcindex_filter_result new_filter_result, *old_r = r;
struct tcindex_data *cp = NULL, *oldp;
@@ -342,7 +342,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &e, flags, extack);
if (err < 0)
goto errout;
@@ -529,8 +529,8 @@ errout:
static int
tcindex_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ struct nlattr **tca, void **arg, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -551,7 +551,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
return err;
return tcindex_set_parms(net, tp, base, handle, p, r, tb,
- tca[TCA_RATE], ovr, extack);
+ tca[TCA_RATE], flags, extack);
}
static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 6e1abe805448..4272814487f0 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -709,12 +709,12 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
unsigned long base,
struct tc_u_knode *n, struct nlattr **tb,
- struct nlattr *est, bool ovr,
+ struct nlattr *est, u32 flags,
struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &n->exts, flags, extack);
if (err < 0)
return err;
@@ -840,7 +840,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr, bool rtnl_held,
+ struct nlattr **tca, void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
@@ -849,7 +849,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tc_u32_sel *s;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_U32_MAX + 1];
- u32 htid, flags = 0;
+ u32 htid, userflags = 0;
size_t sel_size;
int err;
@@ -868,8 +868,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return err;
if (tb[TCA_U32_FLAGS]) {
- flags = nla_get_u32(tb[TCA_U32_FLAGS]);
- if (!tc_flags_valid(flags)) {
+ userflags = nla_get_u32(tb[TCA_U32_FLAGS]);
+ if (!tc_flags_valid(userflags)) {
NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags");
return -EINVAL;
}
@@ -884,7 +884,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
- if ((n->flags ^ flags) &
+ if ((n->flags ^ userflags) &
~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) {
NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags");
return -EINVAL;
@@ -895,7 +895,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -ENOMEM;
err = u32_set_parms(net, tp, base, new, tb,
- tca[TCA_RATE], ovr, extack);
+ tca[TCA_RATE], flags, extack);
if (err) {
u32_destroy_key(new, false);
@@ -955,9 +955,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
ht->handle = handle;
ht->prio = tp->prio;
idr_init(&ht->handle_idr);
- ht->flags = flags;
+ ht->flags = userflags;
- err = u32_replace_hw_hnode(tp, ht, flags, extack);
+ err = u32_replace_hw_hnode(tp, ht, userflags, extack);
if (err) {
idr_remove(&tp_c->handle_idr, handle);
kfree(ht);
@@ -1038,7 +1038,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
RCU_INIT_POINTER(n->ht_up, ht);
n->handle = handle;
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
- n->flags = flags;
+ n->flags = userflags;
err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE);
if (err < 0)
@@ -1060,7 +1060,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
}
#endif
- err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr,
+ err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], flags,
extack);
if (err == 0) {
struct tc_u_knode __rcu **ins;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f87d07736a14..5e90e9b160e3 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1845,7 +1845,6 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- int err = 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
@@ -1856,11 +1855,8 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
return -EINVAL;
}
- err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
- if (err > 0)
- err = 0;
- return err;
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
}
static int tclass_del_notify(struct net *net,
@@ -1894,8 +1890,6 @@ static int tclass_del_notify(struct net *net,
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
- if (err > 0)
- err = 0;
return err;
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index d0c9a57398fc..7d8518176b45 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -394,7 +394,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
list_for_each_entry(flow, &p->flows, list) {
fl = rcu_dereference_bh(flow->filter_list);
if (fl) {
- result = tcf_classify(skb, fl, &res, true);
+ result = tcf_classify(skb, NULL, fl, &res, true);
if (result < 0)
continue;
flow = (struct atm_flow_data *)res.class;
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 951542843cab..3c2300d14468 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -720,7 +720,7 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
skip_hash:
if (flow_override)
flow_hash = flow_override - 1;
- else if (use_skbhash)
+ else if (use_skbhash && (flow_mode & CAKE_FLOW_FLOWS))
flow_hash = skb->hash;
if (host_override) {
dsthost_hash = host_override - 1;
@@ -1665,7 +1665,7 @@ static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t,
goto hash;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tcf_classify(skb, filter, &res, false);
+ result = tcf_classify(skb, NULL, filter, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index b79a7e27bb31..e0da15530f0e 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -228,7 +228,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
/*
* Step 2+n. Apply classifier.
*/
- result = tcf_classify(skb, fl, &res, true);
+ result = tcf_classify(skb, NULL, fl, &res, true);
if (!fl || result < 0)
goto fallback;
@@ -1614,7 +1614,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
if (err) {
kfree(cl);
- return err;
+ goto failure;
}
if (tca[TCA_RATE]) {
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index fc1e47069593..642cd179b7a7 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -317,7 +317,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
fl = rcu_dereference_bh(q->filter_list);
- result = tcf_classify(skb, fl, &res, false);
+ result = tcf_classify(skb, NULL, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index d320bcfb2da2..4c100d105269 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -242,7 +242,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
else {
struct tcf_result res;
struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
- int result = tcf_classify(skb, fl, &res, false);
+ int result = tcf_classify(skb, NULL, fl, &res, false);
pr_debug("result %d class 0x%04x\n", result, res.classid);
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index c1e84d1eeaba..1f857ffd1ac2 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -390,7 +390,7 @@ static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
if (TC_H_MAJ(skb->priority) != sch->handle) {
fl = rcu_dereference_bh(q->filter_list);
- err = tcf_classify(skb, fl, &res, false);
+ err = tcf_classify(skb, NULL, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
@@ -660,6 +660,13 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
q->nbands = nbands;
+ for (i = nstrict; i < q->nstrict; i++) {
+ INIT_LIST_HEAD(&q->classes[i].alist);
+ if (q->classes[i].qdisc->q.qlen) {
+ list_add_tail(&q->classes[i].alist, &q->active);
+ q->classes[i].deficit = quanta[i];
+ }
+ }
q->nstrict = nstrict;
memcpy(q->prio2band, priomap, sizeof(priomap));
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index bbd5f8753600..c4afdd026f51 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -91,7 +91,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
return fq_codel_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tcf_classify(skb, filter, &res, false);
+ result = tcf_classify(skb, NULL, filter, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index cac684952edc..830f3559f727 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -94,7 +94,7 @@ static unsigned int fq_pie_classify(struct sk_buff *skb, struct Qdisc *sch,
return fq_pie_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tcf_classify(skb, filter, &res, false);
+ result = tcf_classify(skb, NULL, filter, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index bf0034c66e35..b7ac30cca035 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1130,7 +1130,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
head = &q->root;
tcf = rcu_dereference_bh(q->root.filter_list);
- while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
+ while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 5f7ac27a5264..5067a6e5d4fd 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -125,6 +125,7 @@ struct htb_class {
struct htb_class_leaf {
int deficit[TC_HTB_MAXDEPTH];
struct Qdisc *q;
+ struct netdev_queue *offload_queue;
} leaf;
struct htb_class_inner {
struct htb_prio clprio[TC_HTB_NUMPRIO];
@@ -238,7 +239,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
+ while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
@@ -1411,24 +1412,47 @@ htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
return old_q;
}
-static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new)
+static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
+{
+ struct netdev_queue *queue;
+
+ queue = cl->leaf.offload_queue;
+ if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
+ WARN_ON(cl->leaf.q->dev_queue != queue);
+
+ return queue;
+}
+
+static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
+ struct htb_class *cl_new, bool destroying)
{
struct netdev_queue *queue_old, *queue_new;
struct net_device *dev = qdisc_dev(sch);
- struct Qdisc *qdisc;
- queue_old = netdev_get_tx_queue(dev, qid_old);
- queue_new = netdev_get_tx_queue(dev, qid_new);
+ queue_old = htb_offload_get_queue(cl_old);
+ queue_new = htb_offload_get_queue(cl_new);
- if (dev->flags & IFF_UP)
- dev_deactivate(dev);
- qdisc = dev_graft_qdisc(queue_old, NULL);
- qdisc->dev_queue = queue_new;
- qdisc = dev_graft_qdisc(queue_new, qdisc);
- if (dev->flags & IFF_UP)
- dev_activate(dev);
+ if (!destroying) {
+ struct Qdisc *qdisc;
- WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
+ if (dev->flags & IFF_UP)
+ dev_deactivate(dev);
+ qdisc = dev_graft_qdisc(queue_old, NULL);
+ WARN_ON(qdisc != cl_old->leaf.q);
+ }
+
+ if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
+ cl_old->leaf.q->dev_queue = queue_new;
+ cl_old->leaf.offload_queue = queue_new;
+
+ if (!destroying) {
+ struct Qdisc *qdisc;
+
+ qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
+ if (dev->flags & IFF_UP)
+ dev_activate(dev);
+ WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
+ }
}
static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
@@ -1442,10 +1466,8 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (cl->level)
return -EINVAL;
- if (q->offload) {
- dev_queue = new->dev_queue;
- WARN_ON(dev_queue != cl->leaf.q->dev_queue);
- }
+ if (q->offload)
+ dev_queue = htb_offload_get_queue(cl);
if (!new) {
new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
@@ -1514,6 +1536,8 @@ static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
parent->ctokens = parent->cbuffer;
parent->t_c = ktime_get_ns();
parent->cmode = HTB_CAN_SEND;
+ if (q->offload)
+ parent->leaf.offload_queue = cl->leaf.offload_queue;
}
static void htb_parent_to_leaf_offload(struct Qdisc *sch,
@@ -1534,6 +1558,7 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
struct netlink_ext_ack *extack)
{
struct tc_htb_qopt_offload offload_opt;
+ struct netdev_queue *dev_queue;
struct Qdisc *q = cl->leaf.q;
struct Qdisc *old = NULL;
int err;
@@ -1542,16 +1567,15 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
return -EINVAL;
WARN_ON(!q);
- if (!destroying) {
- /* On destroy of HTB, two cases are possible:
- * 1. q is a normal qdisc, but q->dev_queue has noop qdisc.
- * 2. q is a noop qdisc (for nodes that were inner),
- * q->dev_queue is noop_netdev_queue.
+ dev_queue = htb_offload_get_queue(cl);
+ old = htb_graft_helper(dev_queue, NULL);
+ if (destroying)
+ /* Before HTB is destroyed, the kernel grafts noop_qdisc to
+ * all queues.
*/
- old = htb_graft_helper(q->dev_queue, NULL);
- WARN_ON(!old);
+ WARN_ON(!(old->flags & TCQ_F_BUILTIN));
+ else
WARN_ON(old != q);
- }
if (cl->parent) {
cl->parent->bstats_bias.bytes += q->bstats.bytes;
@@ -1570,18 +1594,17 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
if (!err || destroying)
qdisc_put(old);
else
- htb_graft_helper(q->dev_queue, old);
+ htb_graft_helper(dev_queue, old);
if (last_child)
return err;
- if (!err && offload_opt.moved_qid != 0) {
- if (destroying)
- q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch),
- offload_opt.qid);
- else
- htb_offload_move_qdisc(sch, offload_opt.moved_qid,
- offload_opt.qid);
+ if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
+ u32 classid = TC_H_MAJ(sch->handle) |
+ TC_H_MIN(offload_opt.classid);
+ struct htb_class *moved_cl = htb_find(classid, sch);
+
+ htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
}
return err;
@@ -1704,9 +1727,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg,
}
if (last_child) {
- struct netdev_queue *dev_queue;
+ struct netdev_queue *dev_queue = sch->dev_queue;
+
+ if (q->offload)
+ dev_queue = htb_offload_get_queue(cl);
- dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue;
new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
cl->parent->common.classid,
NULL);
@@ -1878,7 +1903,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
}
dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
} else { /* First child. */
- dev_queue = parent->leaf.q->dev_queue;
+ dev_queue = htb_offload_get_queue(parent);
old_q = htb_graft_helper(dev_queue, NULL);
WARN_ON(old_q != parent->leaf.q);
offload_opt = (struct tc_htb_qopt_offload) {
@@ -1935,6 +1960,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
/* leaf (we) needs elementary qdisc */
cl->leaf.q = new_q ? new_q : &noop_qdisc;
+ if (q->offload)
+ cl->leaf.offload_queue = dev_queue;
cl->parent = parent;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 5c27b4270b90..e282e7382117 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -36,7 +36,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
int err;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- err = tcf_classify(skb, fl, &res, false);
+ err = tcf_classify(skb, NULL, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 3eabb871a1d5..03fdf31ccb6a 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -39,7 +39,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
if (TC_H_MAJ(skb->priority) != sch->handle) {
fl = rcu_dereference_bh(q->filter_list);
- err = tcf_classify(skb, fl, &res, false);
+ err = tcf_classify(skb, NULL, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index b692a0de1ad5..58a9d42b52b8 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -690,7 +690,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
fl = rcu_dereference_bh(q->filter_list);
- result = tcf_classify(skb, fl, &res, false);
+ result = tcf_classify(skb, NULL, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index dde829d4b9f8..3d061a13d7ed 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -257,7 +257,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
struct tcf_result res;
int result;
- result = tcf_classify(skb, fl, &res, false);
+ result = tcf_classify(skb, NULL, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 066754a18569..f8e569f79f13 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -178,7 +178,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
return sfq_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tcf_classify(skb, fl, &res, false);
+ result = tcf_classify(skb, NULL, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 9c79374457a0..1ab2fc933a21 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1513,7 +1513,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
taprio_set_picos_per_byte(dev, q);
if (mqprio) {
- netdev_set_num_tc(dev, mqprio->num_tc);
+ err = netdev_set_num_tc(dev, mqprio->num_tc);
+ if (err)
+ goto free_sched;
for (i = 0; i < mqprio->num_tc; i++)
netdev_set_tc_queue(dev, i,
mqprio->count[i],
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 898389611ae8..c038efc23ce3 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -795,7 +795,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
reason_code = SMC_CLC_DECL_NOSRVLINK;
goto connect_abort;
}
- smc->conn.lnk = link;
+ smc_switch_link_and_count(&smc->conn, link);
}
/* create send buffer and rmb */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index cd0d7c908b2a..af227b65669e 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -917,8 +917,8 @@ static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
return rc;
}
-static void smc_switch_link_and_count(struct smc_connection *conn,
- struct smc_link *to_lnk)
+void smc_switch_link_and_count(struct smc_connection *conn,
+ struct smc_link *to_lnk)
{
atomic_dec(&conn->lnk->conn_cnt);
conn->lnk = to_lnk;
@@ -1752,21 +1752,30 @@ out:
return rc;
}
-/* convert the RMB size into the compressed notation - minimum 16K.
+#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
+#define SMCR_RMBE_SIZES 5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */
+
+/* convert the RMB size into the compressed notation (minimum 16K, see
+ * SMCD/R_DMBE_SIZES.
* In contrast to plain ilog2, this rounds towards the next power of 2,
* so the socket application gets at least its desired sndbuf / rcvbuf size.
*/
-static u8 smc_compress_bufsize(int size)
+static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
{
+ const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE;
u8 compressed;
if (size <= SMC_BUF_MIN_SIZE)
return 0;
- size = (size - 1) >> 14;
- compressed = ilog2(size) + 1;
- if (compressed >= SMC_RMBE_SIZES)
- compressed = SMC_RMBE_SIZES - 1;
+ size = (size - 1) >> 14; /* convert to 16K multiple */
+ compressed = min_t(u8, ilog2(size) + 1,
+ is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES);
+
+ if (!is_smcd && is_rmb)
+ /* RMBs are backed by & limited to max size of scatterlists */
+ compressed = min_t(u8, compressed, ilog2(max_scat >> 14));
+
return compressed;
}
@@ -1982,17 +1991,12 @@ out:
return rc;
}
-#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
-
static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
bool is_dmb, int bufsize)
{
struct smc_buf_desc *buf_desc;
int rc;
- if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
- return ERR_PTR(-EAGAIN);
-
/* try to alloc a new DMB */
buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
if (!buf_desc)
@@ -2041,9 +2045,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
/* use socket send buffer size (w/o overhead) as start value */
sk_buf_size = smc->sk.sk_sndbuf / 2;
- for (bufsize_short = smc_compress_bufsize(sk_buf_size);
+ for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
bufsize_short >= 0; bufsize_short--) {
-
if (is_rmb) {
lock = &lgr->rmbs_lock;
buf_list = &lgr->rmbs[bufsize_short];
@@ -2052,8 +2055,6 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
buf_list = &lgr->sndbufs[bufsize_short];
}
bufsize = smc_uncompress_bufsize(bufsize_short);
- if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
- continue;
/* check for reusable slot in the link group */
buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 6d6fd1397c87..c043ecdca5c4 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -97,6 +97,7 @@ struct smc_link {
unsigned long *wr_tx_mask; /* bit mask of used indexes */
u32 wr_tx_cnt; /* number of WR send buffers */
wait_queue_head_t wr_tx_wait; /* wait for free WR send buf */
+ atomic_t wr_tx_refcnt; /* tx refs to link */
struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */
struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */
@@ -109,6 +110,7 @@ struct smc_link {
struct ib_reg_wr wr_reg; /* WR register memory region */
wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */
+ atomic_t wr_reg_refcnt; /* reg refs to link */
enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */
u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/
@@ -444,6 +446,8 @@ void smc_core_exit(void);
int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
u8 link_idx, struct smc_init_info *ini);
void smcr_link_clear(struct smc_link *lnk, bool log);
+void smc_switch_link_and_count(struct smc_connection *conn,
+ struct smc_link *to_lnk);
int smcr_buf_map_lgr(struct smc_link *lnk);
int smcr_buf_reg_lgr(struct smc_link *lnk);
void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type);
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 7d7ba0320d5a..a8845343d183 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -753,8 +753,7 @@ void smc_ib_ndev_change(struct net_device *ndev, unsigned long event)
if (!libdev->ops.get_netdev)
continue;
lndev = libdev->ops.get_netdev(libdev, i + 1);
- if (lndev)
- dev_put(lndev);
+ dev_put(lndev);
if (lndev != ndev)
continue;
if (event == NETDEV_REGISTER)
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 273eaf1bfe49..2e7560eba981 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -888,6 +888,7 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
if (!rc)
goto out;
out_clear_lnk:
+ lnk_new->state = SMC_LNK_INACTIVE;
smcr_link_clear(lnk_new, false);
out_reject:
smc_llc_cli_add_link_reject(qentry);
@@ -1184,6 +1185,7 @@ int smc_llc_srv_add_link(struct smc_link *link)
goto out_err;
return 0;
out_err:
+ link_new->state = SMC_LNK_INACTIVE;
smcr_link_clear(link_new, false);
return rc;
}
@@ -1286,10 +1288,8 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
del_llc->reason = 0;
smc_llc_send_message(lnk, &qentry->msg); /* response */
- if (smc_link_downing(&lnk_del->state)) {
- if (smc_switch_conns(lgr, lnk_del, false))
- smc_wr_tx_wait_no_pending_sends(lnk_del);
- }
+ if (smc_link_downing(&lnk_del->state))
+ smc_switch_conns(lgr, lnk_del, false);
smcr_link_clear(lnk_del, true);
active_links = smc_llc_active_link_count(lgr);
@@ -1805,8 +1805,6 @@ void smc_llc_link_clear(struct smc_link *link, bool log)
link->smcibdev->ibdev->name, link->ibport);
complete(&link->llc_testlink_resp);
cancel_delayed_work_sync(&link->llc_testlink_wrk);
- smc_wr_wakeup_reg_wait(link);
- smc_wr_wakeup_tx_wait(link);
}
/* register a new rtoken at the remote peer (for all links) */
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 6f6d33edb135..4a964e9190b0 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -394,8 +394,7 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
return 0;
out_put:
- if (ndev)
- dev_put(ndev);
+ dev_put(ndev);
return rc;
}
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 289025cd545a..c79361dfcdfb 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -496,7 +496,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn,
/* Wakeup sndbuf consumers from any context (IRQ or process)
* since there is more data to transmit; usable snd_wnd as max transmit
*/
-static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
+static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
struct smc_link *link = conn->lnk;
@@ -550,6 +550,22 @@ out_unlock:
return rc;
}
+static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
+{
+ struct smc_link *link = conn->lnk;
+ int rc = -ENOLINK;
+
+ if (!link)
+ return rc;
+
+ atomic_inc(&link->wr_tx_refcnt);
+ if (smc_link_usable(link))
+ rc = _smcr_tx_sndbuf_nonempty(conn);
+ if (atomic_dec_and_test(&link->wr_tx_refcnt))
+ wake_up_all(&link->wr_tx_wait);
+ return rc;
+}
+
static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index cbc73a7e4d59..a419e9af36b9 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -322,9 +322,12 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
if (rc)
return rc;
+ atomic_inc(&link->wr_reg_refcnt);
rc = wait_event_interruptible_timeout(link->wr_reg_wait,
(link->wr_reg_state != POSTED),
SMC_WR_REG_MR_WAIT_TIME);
+ if (atomic_dec_and_test(&link->wr_reg_refcnt))
+ wake_up_all(&link->wr_reg_wait);
if (!rc) {
/* timeout - terminate link */
smcr_link_down_cond_sched(link);
@@ -566,10 +569,15 @@ void smc_wr_free_link(struct smc_link *lnk)
return;
ibdev = lnk->smcibdev->ibdev;
+ smc_wr_wakeup_reg_wait(lnk);
+ smc_wr_wakeup_tx_wait(lnk);
+
if (smc_wr_tx_wait_no_pending_sends(lnk))
memset(lnk->wr_tx_mask, 0,
BITS_TO_LONGS(SMC_WR_BUF_CNT) *
sizeof(*lnk->wr_tx_mask));
+ wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt)));
+ wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt)));
if (lnk->wr_rx_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
@@ -728,7 +736,9 @@ int smc_wr_create_link(struct smc_link *lnk)
memset(lnk->wr_tx_mask, 0,
BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
init_waitqueue_head(&lnk->wr_tx_wait);
+ atomic_set(&lnk->wr_tx_refcnt, 0);
init_waitqueue_head(&lnk->wr_reg_wait);
+ atomic_set(&lnk->wr_reg_refcnt, 0);
return rc;
dma_unmap:
diff --git a/net/socket.c b/net/socket.c
index 0b2dad3bdf7f..7f64a6eccf63 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -212,6 +212,7 @@ static const char * const pf_family_names[] = {
[PF_QIPCRTR] = "PF_QIPCRTR",
[PF_SMC] = "PF_SMC",
[PF_XDP] = "PF_XDP",
+ [PF_MCTP] = "PF_MCTP",
};
/*
@@ -1064,9 +1065,13 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
*/
static DEFINE_MUTEX(br_ioctl_mutex);
-static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
+static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br,
+ unsigned int cmd, struct ifreq *ifr,
+ void __user *uarg);
-void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
+void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
+ unsigned int cmd, struct ifreq *ifr,
+ void __user *uarg))
{
mutex_lock(&br_ioctl_mutex);
br_ioctl_hook = hook;
@@ -1074,6 +1079,22 @@ void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
}
EXPORT_SYMBOL(brioctl_set);
+int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
+ struct ifreq *ifr, void __user *uarg)
+{
+ int err = -ENOPKG;
+
+ if (!br_ioctl_hook)
+ request_module("bridge");
+
+ mutex_lock(&br_ioctl_mutex);
+ if (br_ioctl_hook)
+ err = br_ioctl_hook(net, br, cmd, ifr, uarg);
+ mutex_unlock(&br_ioctl_mutex);
+
+ return err;
+}
+
static DEFINE_MUTEX(vlan_ioctl_mutex);
static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
@@ -1088,8 +1109,11 @@ EXPORT_SYMBOL(vlan_ioctl_set);
static long sock_do_ioctl(struct net *net, struct socket *sock,
unsigned int cmd, unsigned long arg)
{
+ struct ifreq ifr;
+ bool need_copyout;
int err;
void __user *argp = (void __user *)arg;
+ void __user *data;
err = sock->ops->ioctl(sock, cmd, arg);
@@ -1100,25 +1124,16 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
if (err != -ENOIOCTLCMD)
return err;
- if (cmd == SIOCGIFCONF) {
- struct ifconf ifc;
- if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
- return -EFAULT;
- rtnl_lock();
- err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
- rtnl_unlock();
- if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
- err = -EFAULT;
- } else {
- struct ifreq ifr;
- bool need_copyout;
- if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+ if (!is_socket_ioctl_cmd(cmd))
+ return -ENOTTY;
+
+ if (get_user_ifreq(&ifr, &data, argp))
+ return -EFAULT;
+ err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
+ if (!err && need_copyout)
+ if (put_user_ifreq(&ifr, argp))
return -EFAULT;
- err = dev_ioctl(net, cmd, &ifr, &need_copyout);
- if (!err && need_copyout)
- if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
- return -EFAULT;
- }
+
return err;
}
@@ -1140,12 +1155,13 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
net = sock_net(sk);
if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
struct ifreq ifr;
+ void __user *data;
bool need_copyout;
- if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+ if (get_user_ifreq(&ifr, &data, argp))
return -EFAULT;
- err = dev_ioctl(net, cmd, &ifr, &need_copyout);
+ err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
if (!err && need_copyout)
- if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
+ if (put_user_ifreq(&ifr, argp))
return -EFAULT;
} else
#ifdef CONFIG_WEXT_CORE
@@ -1170,14 +1186,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
case SIOCSIFBR:
case SIOCBRADDBR:
case SIOCBRDELBR:
- err = -ENOPKG;
- if (!br_ioctl_hook)
- request_module("bridge");
-
- mutex_lock(&br_ioctl_mutex);
- if (br_ioctl_hook)
- err = br_ioctl_hook(net, cmd, argp);
- mutex_unlock(&br_ioctl_mutex);
+ err = br_ioctl_call(net, NULL, cmd, NULL, argp);
break;
case SIOCGIFVLAN:
case SIOCSIFVLAN:
@@ -1217,6 +1226,11 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
cmd == SIOCGSTAMP_NEW,
false);
break;
+
+ case SIOCGIFCONF:
+ err = dev_ifconf(net, argp);
+ break;
+
default:
err = sock_do_ioctl(net, sock, cmd, arg);
break;
@@ -1722,32 +1736,22 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
return __sys_listen(fd, backlog);
}
-int __sys_accept4_file(struct file *file, unsigned file_flags,
+struct file *do_accept(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen, int flags,
- unsigned long nofile)
+ int __user *upeer_addrlen, int flags)
{
struct socket *sock, *newsock;
struct file *newfile;
- int err, len, newfd;
+ int err, len;
struct sockaddr_storage address;
- if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
- return -EINVAL;
-
- if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
- flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
-
sock = sock_from_file(file);
- if (!sock) {
- err = -ENOTSOCK;
- goto out;
- }
+ if (!sock)
+ return ERR_PTR(-ENOTSOCK);
- err = -ENFILE;
newsock = sock_alloc();
if (!newsock)
- goto out;
+ return ERR_PTR(-ENFILE);
newsock->type = sock->type;
newsock->ops = sock->ops;
@@ -1758,18 +1762,9 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
*/
__module_get(newsock->ops->owner);
- newfd = __get_unused_fd_flags(flags, nofile);
- if (unlikely(newfd < 0)) {
- err = newfd;
- sock_release(newsock);
- goto out;
- }
newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
- if (IS_ERR(newfile)) {
- err = PTR_ERR(newfile);
- put_unused_fd(newfd);
- goto out;
- }
+ if (IS_ERR(newfile))
+ return newfile;
err = security_socket_accept(sock, newsock);
if (err)
@@ -1794,16 +1789,38 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
}
/* File flags are not inherited via accept() unlike another OSes. */
-
- fd_install(newfd, newfile);
- err = newfd;
-out:
- return err;
+ return newfile;
out_fd:
fput(newfile);
- put_unused_fd(newfd);
- goto out;
+ return ERR_PTR(err);
+}
+
+int __sys_accept4_file(struct file *file, unsigned file_flags,
+ struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags,
+ unsigned long nofile)
+{
+ struct file *newfile;
+ int newfd;
+
+ if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ return -EINVAL;
+ if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
+ flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+
+ newfd = __get_unused_fd_flags(flags, nofile);
+ if (unlikely(newfd < 0))
+ return newfd;
+
+ newfile = do_accept(file, file_flags, upeer_sockaddr, upeer_addrlen,
+ flags);
+ if (IS_ERR(newfile)) {
+ put_unused_fd(newfd);
+ return PTR_ERR(newfile);
+ }
+ fd_install(newfd, newfile);
+ return newfd;
}
/*
@@ -3126,154 +3143,55 @@ void socket_seq_show(struct seq_file *seq)
}
#endif /* CONFIG_PROC_FS */
-#ifdef CONFIG_COMPAT
-static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
+/* Handle the fact that while struct ifreq has the same *layout* on
+ * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
+ * which are handled elsewhere, it still has different *size* due to
+ * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
+ * resulting in struct ifreq being 32 and 40 bytes respectively).
+ * As a result, if the struct happens to be at the end of a page and
+ * the next page isn't readable/writable, we get a fault. To prevent
+ * that, copy back and forth to the full size.
+ */
+int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
{
- struct compat_ifconf ifc32;
- struct ifconf ifc;
- int err;
+ if (in_compat_syscall()) {
+ struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
- if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
- return -EFAULT;
+ memset(ifr, 0, sizeof(*ifr));
+ if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
+ return -EFAULT;
- ifc.ifc_len = ifc32.ifc_len;
- ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
+ if (ifrdata)
+ *ifrdata = compat_ptr(ifr32->ifr_data);
- rtnl_lock();
- err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
- rtnl_unlock();
- if (err)
- return err;
+ return 0;
+ }
- ifc32.ifc_len = ifc.ifc_len;
- if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
+ if (copy_from_user(ifr, arg, sizeof(*ifr)))
return -EFAULT;
+ if (ifrdata)
+ *ifrdata = ifr->ifr_data;
+
return 0;
}
+EXPORT_SYMBOL(get_user_ifreq);
-static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
+int put_user_ifreq(struct ifreq *ifr, void __user *arg)
{
- struct compat_ethtool_rxnfc __user *compat_rxnfc;
- bool convert_in = false, convert_out = false;
- size_t buf_size = 0;
- struct ethtool_rxnfc __user *rxnfc = NULL;
- struct ifreq ifr;
- u32 rule_cnt = 0, actual_rule_cnt;
- u32 ethcmd;
- u32 data;
- int ret;
+ size_t size = sizeof(*ifr);
- if (get_user(data, &ifr32->ifr_ifru.ifru_data))
- return -EFAULT;
+ if (in_compat_syscall())
+ size = sizeof(struct compat_ifreq);
- compat_rxnfc = compat_ptr(data);
-
- if (get_user(ethcmd, &compat_rxnfc->cmd))
+ if (copy_to_user(arg, ifr, size))
return -EFAULT;
- /* Most ethtool structures are defined without padding.
- * Unfortunately struct ethtool_rxnfc is an exception.
- */
- switch (ethcmd) {
- default:
- break;
- case ETHTOOL_GRXCLSRLALL:
- /* Buffer size is variable */
- if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
- return -EFAULT;
- if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
- return -ENOMEM;
- buf_size += rule_cnt * sizeof(u32);
- fallthrough;
- case ETHTOOL_GRXRINGS:
- case ETHTOOL_GRXCLSRLCNT:
- case ETHTOOL_GRXCLSRULE:
- case ETHTOOL_SRXCLSRLINS:
- convert_out = true;
- fallthrough;
- case ETHTOOL_SRXCLSRLDEL:
- buf_size += sizeof(struct ethtool_rxnfc);
- convert_in = true;
- rxnfc = compat_alloc_user_space(buf_size);
- break;
- }
-
- if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
- return -EFAULT;
-
- ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
-
- if (convert_in) {
- /* We expect there to be holes between fs.m_ext and
- * fs.ring_cookie and at the end of fs, but nowhere else.
- */
- BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
- sizeof(compat_rxnfc->fs.m_ext) !=
- offsetof(struct ethtool_rxnfc, fs.m_ext) +
- sizeof(rxnfc->fs.m_ext));
- BUILD_BUG_ON(
- offsetof(struct compat_ethtool_rxnfc, fs.location) -
- offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
- offsetof(struct ethtool_rxnfc, fs.location) -
- offsetof(struct ethtool_rxnfc, fs.ring_cookie));
-
- if (copy_in_user(rxnfc, compat_rxnfc,
- (void __user *)(&rxnfc->fs.m_ext + 1) -
- (void __user *)rxnfc) ||
- copy_in_user(&rxnfc->fs.ring_cookie,
- &compat_rxnfc->fs.ring_cookie,
- (void __user *)(&rxnfc->fs.location + 1) -
- (void __user *)&rxnfc->fs.ring_cookie))
- return -EFAULT;
- if (ethcmd == ETHTOOL_GRXCLSRLALL) {
- if (put_user(rule_cnt, &rxnfc->rule_cnt))
- return -EFAULT;
- } else if (copy_in_user(&rxnfc->rule_cnt,
- &compat_rxnfc->rule_cnt,
- sizeof(rxnfc->rule_cnt)))
- return -EFAULT;
- }
-
- ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
- if (ret)
- return ret;
-
- if (convert_out) {
- if (copy_in_user(compat_rxnfc, rxnfc,
- (const void __user *)(&rxnfc->fs.m_ext + 1) -
- (const void __user *)rxnfc) ||
- copy_in_user(&compat_rxnfc->fs.ring_cookie,
- &rxnfc->fs.ring_cookie,
- (const void __user *)(&rxnfc->fs.location + 1) -
- (const void __user *)&rxnfc->fs.ring_cookie) ||
- copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
- sizeof(rxnfc->rule_cnt)))
- return -EFAULT;
-
- if (ethcmd == ETHTOOL_GRXCLSRLALL) {
- /* As an optimisation, we only copy the actual
- * number of rules that the underlying
- * function returned. Since Mallory might
- * change the rule count in user memory, we
- * check that it is less than the rule count
- * originally given (as the user buffer size),
- * which has been range-checked.
- */
- if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
- return -EFAULT;
- if (actual_rule_cnt < rule_cnt)
- rule_cnt = actual_rule_cnt;
- if (copy_in_user(&compat_rxnfc->rule_locs[0],
- &rxnfc->rule_locs[0],
- rule_cnt * sizeof(u32)))
- return -EFAULT;
- }
- }
-
return 0;
}
+EXPORT_SYMBOL(put_user_ifreq);
+#ifdef CONFIG_COMPAT
static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
{
compat_uptr_t uptr32;
@@ -3281,7 +3199,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32
void __user *saved;
int err;
- if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
+ if (get_user_ifreq(&ifr, NULL, uifr32))
return -EFAULT;
if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
@@ -3290,10 +3208,10 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32
saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
- err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
+ err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL);
if (!err) {
ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
- if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
+ if (put_user_ifreq(&ifr, uifr32))
err = -EFAULT;
}
return err;
@@ -3304,97 +3222,15 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
struct compat_ifreq __user *u_ifreq32)
{
struct ifreq ifreq;
- u32 data32;
+ void __user *data;
- if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
- return -EFAULT;
- if (get_user(data32, &u_ifreq32->ifr_data))
+ if (!is_socket_ioctl_cmd(cmd))
+ return -ENOTTY;
+ if (get_user_ifreq(&ifreq, &data, u_ifreq32))
return -EFAULT;
- ifreq.ifr_data = compat_ptr(data32);
+ ifreq.ifr_data = data;
- return dev_ioctl(net, cmd, &ifreq, NULL);
-}
-
-static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
- unsigned int cmd,
- struct compat_ifreq __user *uifr32)
-{
- struct ifreq __user *uifr;
- int err;
-
- /* Handle the fact that while struct ifreq has the same *layout* on
- * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
- * which are handled elsewhere, it still has different *size* due to
- * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
- * resulting in struct ifreq being 32 and 40 bytes respectively).
- * As a result, if the struct happens to be at the end of a page and
- * the next page isn't readable/writable, we get a fault. To prevent
- * that, copy back and forth to the full size.
- */
-
- uifr = compat_alloc_user_space(sizeof(*uifr));
- if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
- return -EFAULT;
-
- err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
-
- if (!err) {
- switch (cmd) {
- case SIOCGIFFLAGS:
- case SIOCGIFMETRIC:
- case SIOCGIFMTU:
- case SIOCGIFMEM:
- case SIOCGIFHWADDR:
- case SIOCGIFINDEX:
- case SIOCGIFADDR:
- case SIOCGIFBRDADDR:
- case SIOCGIFDSTADDR:
- case SIOCGIFNETMASK:
- case SIOCGIFPFLAGS:
- case SIOCGIFTXQLEN:
- case SIOCGMIIPHY:
- case SIOCGMIIREG:
- case SIOCGIFNAME:
- if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
- err = -EFAULT;
- break;
- }
- }
- return err;
-}
-
-static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
- struct compat_ifreq __user *uifr32)
-{
- struct ifreq ifr;
- struct compat_ifmap __user *uifmap32;
- int err;
-
- uifmap32 = &uifr32->ifr_ifru.ifru_map;
- err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
- err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
- err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
- err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
- err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
- err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
- err |= get_user(ifr.ifr_map.port, &uifmap32->port);
- if (err)
- return -EFAULT;
-
- err = dev_ioctl(net, cmd, &ifr, NULL);
-
- if (cmd == SIOCGIFMAP && !err) {
- err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
- err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
- err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
- err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
- err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
- err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
- err |= put_user(ifr.ifr_map.port, &uifmap32->port);
- if (err)
- err = -EFAULT;
- }
- return err;
+ return dev_ioctl(net, cmd, &ifreq, data, NULL);
}
/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
@@ -3420,21 +3256,14 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
struct net *net = sock_net(sk);
if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
- return compat_ifr_data_ioctl(net, cmd, argp);
+ return sock_ioctl(file, cmd, (unsigned long)argp);
switch (cmd) {
case SIOCSIFBR:
case SIOCGIFBR:
return old_bridge_ioctl(argp);
- case SIOCGIFCONF:
- return compat_dev_ifconf(net, argp);
- case SIOCETHTOOL:
- return ethtool_ioctl(net, argp);
case SIOCWANDEV:
return compat_siocwandev(net, argp);
- case SIOCGIFMAP:
- case SIOCSIFMAP:
- return compat_sioc_ifmap(net, cmd, argp);
case SIOCGSTAMP_OLD:
case SIOCGSTAMPNS_OLD:
if (!sock->ops->gettstamp)
@@ -3442,6 +3271,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
!COMPAT_USE_64BIT_TIME);
+ case SIOCETHTOOL:
case SIOCBONDSLAVEINFOQUERY:
case SIOCBONDINFOQUERY:
case SIOCSHWTSTAMP:
@@ -3459,10 +3289,13 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCGSKNS:
case SIOCGSTAMP_NEW:
case SIOCGSTAMPNS_NEW:
+ case SIOCGIFCONF:
return sock_ioctl(file, cmd, arg);
case SIOCGIFFLAGS:
case SIOCSIFFLAGS:
+ case SIOCGIFMAP:
+ case SIOCSIFMAP:
case SIOCGIFMETRIC:
case SIOCSIFMETRIC:
case SIOCGIFMTU:
@@ -3499,8 +3332,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCBONDRELEASE:
case SIOCBONDSETHWADDR:
case SIOCBONDCHANGEACTIVE:
- return compat_ifreq_ioctl(net, sock, cmd, argp);
-
case SIOCSARP:
case SIOCGARP:
case SIOCDARP:
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index a81be45f40d9..3d685fe328fa 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1980,7 +1980,7 @@ gss_svc_init_net(struct net *net)
goto out2;
return 0;
out2:
- destroy_use_gss_proxy_proc_entry(net);
+ rsi_cache_destroy_net(net);
out1:
rsc_cache_destroy_net(net);
return rv;
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 56029e3af6ff..827bf3a28178 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -8,14 +8,14 @@
#include <linux/debugfs.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/clnt.h>
+
#include "netns.h"
+#include "fail.h"
static struct dentry *topdir;
static struct dentry *rpc_clnt_dir;
static struct dentry *rpc_xprt_dir;
-unsigned int rpc_inject_disconnect;
-
static int
tasks_show(struct seq_file *f, void *v)
{
@@ -235,8 +235,6 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
/* make tasks file */
debugfs_create_file("info", S_IFREG | 0400, xprt->debugfs, xprt,
&xprt_info_fops);
-
- atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect);
}
void
@@ -246,56 +244,30 @@ rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt)
xprt->debugfs = NULL;
}
-static int
-fault_open(struct inode *inode, struct file *filp)
-{
- filp->private_data = kmalloc(128, GFP_KERNEL);
- if (!filp->private_data)
- return -ENOMEM;
- return 0;
-}
+#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
+struct fail_sunrpc_attr fail_sunrpc = {
+ .attr = FAULT_ATTR_INITIALIZER,
+};
+EXPORT_SYMBOL_GPL(fail_sunrpc);
-static int
-fault_release(struct inode *inode, struct file *filp)
+static void fail_sunrpc_init(void)
{
- kfree(filp->private_data);
- return 0;
-}
+ struct dentry *dir;
-static ssize_t
-fault_disconnect_read(struct file *filp, char __user *user_buf,
- size_t len, loff_t *offset)
-{
- char *buffer = (char *)filp->private_data;
- size_t size;
+ dir = fault_create_debugfs_attr("fail_sunrpc", NULL,
+ &fail_sunrpc.attr);
- size = sprintf(buffer, "%u\n", rpc_inject_disconnect);
- return simple_read_from_buffer(user_buf, len, offset, buffer, size);
-}
+ debugfs_create_bool("ignore-client-disconnect", S_IFREG | 0600, dir,
+ &fail_sunrpc.ignore_client_disconnect);
-static ssize_t
-fault_disconnect_write(struct file *filp, const char __user *user_buf,
- size_t len, loff_t *offset)
+ debugfs_create_bool("ignore-server-disconnect", S_IFREG | 0600, dir,
+ &fail_sunrpc.ignore_server_disconnect);
+}
+#else
+static void fail_sunrpc_init(void)
{
- char buffer[16];
-
- if (len >= sizeof(buffer))
- len = sizeof(buffer) - 1;
- if (copy_from_user(buffer, user_buf, len))
- return -EFAULT;
- buffer[len] = '\0';
- if (kstrtouint(buffer, 10, &rpc_inject_disconnect))
- return -EINVAL;
- return len;
}
-
-static const struct file_operations fault_disconnect_fops = {
- .owner = THIS_MODULE,
- .open = fault_open,
- .read = fault_disconnect_read,
- .write = fault_disconnect_write,
- .release = fault_release,
-};
+#endif
void __exit
sunrpc_debugfs_exit(void)
@@ -309,16 +281,11 @@ sunrpc_debugfs_exit(void)
void __init
sunrpc_debugfs_init(void)
{
- struct dentry *rpc_fault_dir;
-
topdir = debugfs_create_dir("sunrpc", NULL);
rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir);
rpc_xprt_dir = debugfs_create_dir("rpc_xprt", topdir);
- rpc_fault_dir = debugfs_create_dir("inject_fault", topdir);
-
- debugfs_create_file("disconnect", S_IFREG | 0400, rpc_fault_dir, NULL,
- &fault_disconnect_fops);
+ fail_sunrpc_init();
}
diff --git a/net/sunrpc/fail.h b/net/sunrpc/fail.h
new file mode 100644
index 000000000000..69dc30cc44b8
--- /dev/null
+++ b/net/sunrpc/fail.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2021, Oracle. All rights reserved.
+ */
+
+#ifndef _NET_SUNRPC_FAIL_H_
+#define _NET_SUNRPC_FAIL_H_
+
+#include <linux/fault-inject.h>
+
+#if IS_ENABLED(CONFIG_FAULT_INJECTION)
+
+struct fail_sunrpc_attr {
+ struct fault_attr attr;
+
+ bool ignore_client_disconnect;
+
+ bool ignore_server_disconnect;
+};
+
+extern struct fail_sunrpc_attr fail_sunrpc;
+
+#endif /* CONFIG_FAULT_INJECTION */
+
+#endif /* _NET_SUNRPC_FAIL_H_ */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 0de918cb3d90..bfcbaf7b3822 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -31,6 +31,8 @@
#include <trace/events/sunrpc.h>
+#include "fail.h"
+
#define RPCDBG_FACILITY RPCDBG_SVCDSP
static void svc_unregister(const struct svc_serv *serv, struct net *net);
@@ -838,6 +840,27 @@ svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrser
}
EXPORT_SYMBOL_GPL(svc_set_num_threads_sync);
+/**
+ * svc_rqst_replace_page - Replace one page in rq_pages[]
+ * @rqstp: svc_rqst with pages to replace
+ * @page: replacement page
+ *
+ * When replacing a page in rq_pages, batch the release of the
+ * replaced pages to avoid hammering the page allocator.
+ */
+void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page)
+{
+ if (*rqstp->rq_next_page) {
+ if (!pagevec_space(&rqstp->rq_pvec))
+ __pagevec_release(&rqstp->rq_pvec);
+ pagevec_add(&rqstp->rq_pvec, *rqstp->rq_next_page);
+ }
+
+ get_page(page);
+ *(rqstp->rq_next_page++) = page;
+}
+EXPORT_SYMBOL_GPL(svc_rqst_replace_page);
+
/*
* Called from a server thread as it's exiting. Caller must hold the "service
* mutex" for the service.
@@ -1503,6 +1526,12 @@ svc_process(struct svc_rqst *rqstp)
struct svc_serv *serv = rqstp->rq_server;
u32 dir;
+#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
+ if (!fail_sunrpc.ignore_server_disconnect &&
+ should_fail(&fail_sunrpc.attr, 1))
+ svc_xprt_deferred_close(rqstp->rq_xprt);
+#endif
+
/*
* Setup response xdr_buf.
* Initially it has just one page
@@ -1630,6 +1659,21 @@ u32 svc_max_payload(const struct svc_rqst *rqstp)
EXPORT_SYMBOL_GPL(svc_max_payload);
/**
+ * svc_proc_name - Return RPC procedure name in string form
+ * @rqstp: svc_rqst to operate on
+ *
+ * Return value:
+ * Pointer to a NUL-terminated string
+ */
+const char *svc_proc_name(const struct svc_rqst *rqstp)
+{
+ if (rqstp && rqstp->rq_procinfo)
+ return rqstp->rq_procinfo->pc_name;
+ return "unknown";
+}
+
+
+/**
* svc_encode_result_payload - mark a range of bytes as a result payload
* @rqstp: svc_rqst to operate on
* @offset: payload's byte offset in rqstp->rq_res
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d66a8e44a1ae..e1153cba9cc6 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -539,6 +539,7 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
kfree(rqstp->rq_deferred);
rqstp->rq_deferred = NULL;
+ pagevec_release(&rqstp->rq_pvec);
svc_free_res_pages(rqstp);
rqstp->rq_res.page_len = 0;
rqstp->rq_res.page_base = 0;
@@ -664,6 +665,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
struct xdr_buf *arg = &rqstp->rq_arg;
unsigned long pages, filled;
+ pagevec_init(&rqstp->rq_pvec);
+
pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT;
if (pages > RPCSVC_MAXPAGES) {
pr_warn_once("svc: warning: pages=%lu > RPCSVC_MAXPAGES=%lu\n",
@@ -835,7 +838,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
rqstp->rq_stime = ktime_get();
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
- }
+ } else
+ svc_xprt_received(xprt);
out:
trace_svc_handle_xprt(xprt, len);
return len;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index fb6db09725c7..05abe344a269 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -56,6 +56,7 @@
#include "sunrpc.h"
#include "sysfs.h"
+#include "fail.h"
/*
* Local variables
@@ -855,6 +856,19 @@ xprt_init_autodisconnect(struct timer_list *t)
queue_work(xprtiod_workqueue, &xprt->task_cleanup);
}
+#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
+static void xprt_inject_disconnect(struct rpc_xprt *xprt)
+{
+ if (!fail_sunrpc.ignore_client_disconnect &&
+ should_fail(&fail_sunrpc.attr, 1))
+ xprt->ops->inject_disconnect(xprt);
+}
+#else
+static inline void xprt_inject_disconnect(struct rpc_xprt *xprt)
+{
+}
+#endif
+
bool xprt_lock_connect(struct rpc_xprt *xprt,
struct rpc_task *task,
void *cookie)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 1e651447dc4e..e27433f08ca7 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -35,6 +35,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc);
* controlling svcxprt_rdma is destroyed.
*/
struct svc_rdma_rw_ctxt {
+ struct llist_node rw_node;
struct list_head rw_list;
struct rdma_rw_ctx rw_ctx;
unsigned int rw_nents;
@@ -53,19 +54,19 @@ static struct svc_rdma_rw_ctxt *
svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
{
struct svc_rdma_rw_ctxt *ctxt;
+ struct llist_node *node;
spin_lock(&rdma->sc_rw_ctxt_lock);
-
- ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts);
- if (ctxt) {
- list_del(&ctxt->rw_list);
- spin_unlock(&rdma->sc_rw_ctxt_lock);
+ node = llist_del_first(&rdma->sc_rw_ctxts);
+ spin_unlock(&rdma->sc_rw_ctxt_lock);
+ if (node) {
+ ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
} else {
- spin_unlock(&rdma->sc_rw_ctxt_lock);
ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
GFP_KERNEL);
if (!ctxt)
goto out_noctx;
+
INIT_LIST_HEAD(&ctxt->rw_list);
}
@@ -83,14 +84,18 @@ out_noctx:
return NULL;
}
-static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
- struct svc_rdma_rw_ctxt *ctxt)
+static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
+ struct svc_rdma_rw_ctxt *ctxt,
+ struct llist_head *list)
{
sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE);
+ llist_add(&ctxt->rw_node, list);
+}
- spin_lock(&rdma->sc_rw_ctxt_lock);
- list_add(&ctxt->rw_list, &rdma->sc_rw_ctxts);
- spin_unlock(&rdma->sc_rw_ctxt_lock);
+static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
+ struct svc_rdma_rw_ctxt *ctxt)
+{
+ __svc_rdma_put_rw_ctxt(rdma, ctxt, &rdma->sc_rw_ctxts);
}
/**
@@ -101,9 +106,10 @@ static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma)
{
struct svc_rdma_rw_ctxt *ctxt;
+ struct llist_node *node;
- while ((ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts)) != NULL) {
- list_del(&ctxt->rw_list);
+ while ((node = llist_del_first(&rdma->sc_rw_ctxts)) != NULL) {
+ ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
kfree(ctxt);
}
}
@@ -171,20 +177,35 @@ static void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
cc->cc_sqecount = 0;
}
+/*
+ * The consumed rw_ctx's are cleaned and placed on a local llist so
+ * that only one atomic llist operation is needed to put them all
+ * back on the free list.
+ */
static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
enum dma_data_direction dir)
{
struct svcxprt_rdma *rdma = cc->cc_rdma;
+ struct llist_node *first, *last;
struct svc_rdma_rw_ctxt *ctxt;
+ LLIST_HEAD(free);
+ first = last = NULL;
while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) {
list_del(&ctxt->rw_list);
rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp,
rdma->sc_port_num, ctxt->rw_sg_table.sgl,
ctxt->rw_nents, dir);
- svc_rdma_put_rw_ctxt(rdma, ctxt);
+ __svc_rdma_put_rw_ctxt(rdma, ctxt, &free);
+
+ ctxt->rw_node.next = first;
+ first = &ctxt->rw_node;
+ if (!last)
+ last = first;
}
+ if (first)
+ llist_add_batch(first, last, &rdma->sc_rw_ctxts);
}
/* State for sending a Write or Reply chunk.
@@ -248,8 +269,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_write(wc, &cc->cc_cid);
- atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
- wake_up(&rdma->sc_send_wait);
+ svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
if (unlikely(wc->status != IB_WC_SUCCESS))
svc_xprt_deferred_close(&rdma->sc_xprt);
@@ -304,9 +324,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_read(wc, &cc->cc_cid);
- atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
- wake_up(&rdma->sc_send_wait);
-
+ svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
cc->cc_status = wc->status;
complete(&cc->cc_done);
return;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index d6bbafb773e1..599021b2391d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -113,13 +113,6 @@
static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc);
-static inline struct svc_rdma_send_ctxt *
-svc_rdma_next_send_ctxt(struct list_head *list)
-{
- return list_first_entry_or_null(list, struct svc_rdma_send_ctxt,
- sc_list);
-}
-
static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
struct rpc_rdma_cid *cid)
{
@@ -182,9 +175,10 @@ fail0:
void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
{
struct svc_rdma_send_ctxt *ctxt;
+ struct llist_node *node;
- while ((ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts))) {
- list_del(&ctxt->sc_list);
+ while ((node = llist_del_first(&rdma->sc_send_ctxts)) != NULL) {
+ ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node);
ib_dma_unmap_single(rdma->sc_pd->device,
ctxt->sc_sges[0].addr,
rdma->sc_max_req_size,
@@ -204,12 +198,13 @@ void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
{
struct svc_rdma_send_ctxt *ctxt;
+ struct llist_node *node;
spin_lock(&rdma->sc_send_lock);
- ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts);
- if (!ctxt)
+ node = llist_del_first(&rdma->sc_send_ctxts);
+ if (!node)
goto out_empty;
- list_del(&ctxt->sc_list);
+ ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node);
spin_unlock(&rdma->sc_send_lock);
out:
@@ -253,9 +248,21 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
ctxt->sc_sges[i].length);
}
- spin_lock(&rdma->sc_send_lock);
- list_add(&ctxt->sc_list, &rdma->sc_send_ctxts);
- spin_unlock(&rdma->sc_send_lock);
+ llist_add(&ctxt->sc_node, &rdma->sc_send_ctxts);
+}
+
+/**
+ * svc_rdma_wake_send_waiters - manage Send Queue accounting
+ * @rdma: controlling transport
+ * @avail: Number of additional SQEs that are now available
+ *
+ */
+void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail)
+{
+ atomic_add(avail, &rdma->sc_sq_avail);
+ smp_mb__after_atomic();
+ if (unlikely(waitqueue_active(&rdma->sc_send_wait)))
+ wake_up(&rdma->sc_send_wait);
}
/**
@@ -275,11 +282,9 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
+ svc_rdma_wake_send_waiters(rdma, 1);
complete(&ctxt->sc_done);
- atomic_inc(&rdma->sc_sq_avail);
- wake_up(&rdma->sc_send_wait);
-
if (unlikely(wc->status != IB_WC_SUCCESS))
svc_xprt_deferred_close(&rdma->sc_xprt);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index d94b7759ada1..94b20fb47135 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -136,9 +136,9 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
- INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
+ init_llist_head(&cma_xprt->sc_send_ctxts);
init_llist_head(&cma_xprt->sc_recv_ctxts);
- INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
+ init_llist_head(&cma_xprt->sc_rw_ctxts);
init_waitqueue_head(&cma_xprt->sc_send_wait);
spin_lock_init(&cma_xprt->sc_lock);
@@ -545,7 +545,6 @@ static void __svc_rdma_free(struct work_struct *work)
{
struct svcxprt_rdma *rdma =
container_of(work, struct svcxprt_rdma, sc_work);
- struct svc_xprt *xprt = &rdma->sc_xprt;
/* This blocks until the Completion Queues are empty */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -553,12 +552,6 @@ static void __svc_rdma_free(struct work_struct *work)
svc_rdma_flush_recv_queues(rdma);
- /* Final put of backchannel client transport */
- if (xprt->xpt_bc_xprt) {
- xprt_put(xprt->xpt_bc_xprt);
- xprt->xpt_bc_xprt = NULL;
- }
-
svc_rdma_destroy_rw_ctxts(rdma);
svc_rdma_send_ctxts_destroy(rdma);
svc_rdma_recv_ctxts_destroy(rdma);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index e573dcecdd66..b7dbdcbdeb6c 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -3149,24 +3149,6 @@ void cleanup_socket_xprt(void)
xprt_unregister_transport(&xs_bc_tcp_transport);
}
-static int param_set_uint_minmax(const char *val,
- const struct kernel_param *kp,
- unsigned int min, unsigned int max)
-{
- unsigned int num;
- int ret;
-
- if (!val)
- return -EINVAL;
- ret = kstrtouint(val, 0, &num);
- if (ret)
- return ret;
- if (num < min || num > max)
- return -EINVAL;
- *((unsigned int *)kp->arg) = num;
- return 0;
-}
-
static int param_set_portnr(const char *val, const struct kernel_param *kp)
{
return param_set_uint_minmax(val, kp,
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 070698dd19bc..0b2c18efc079 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -378,6 +378,266 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
}
EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
+struct switchdev_nested_priv {
+ bool (*check_cb)(const struct net_device *dev);
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev);
+ const struct net_device *dev;
+ struct net_device *lower_dev;
+};
+
+static int switchdev_lower_dev_walk(struct net_device *lower_dev,
+ struct netdev_nested_priv *priv)
+{
+ struct switchdev_nested_priv *switchdev_priv = priv->data;
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev);
+ bool (*check_cb)(const struct net_device *dev);
+ const struct net_device *dev;
+
+ check_cb = switchdev_priv->check_cb;
+ foreign_dev_check_cb = switchdev_priv->foreign_dev_check_cb;
+ dev = switchdev_priv->dev;
+
+ if (check_cb(lower_dev) && !foreign_dev_check_cb(lower_dev, dev)) {
+ switchdev_priv->lower_dev = lower_dev;
+ return 1;
+ }
+
+ return 0;
+}
+
+static struct net_device *
+switchdev_lower_dev_find(struct net_device *dev,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev))
+{
+ struct switchdev_nested_priv switchdev_priv = {
+ .check_cb = check_cb,
+ .foreign_dev_check_cb = foreign_dev_check_cb,
+ .dev = dev,
+ .lower_dev = NULL,
+ };
+ struct netdev_nested_priv priv = {
+ .data = &switchdev_priv,
+ };
+
+ netdev_walk_all_lower_dev_rcu(dev, switchdev_lower_dev_walk, &priv);
+
+ return switchdev_priv.lower_dev;
+}
+
+static int __switchdev_handle_fdb_add_to_device(struct net_device *dev,
+ const struct net_device *orig_dev,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*add_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info),
+ int (*lag_add_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info))
+{
+ const struct switchdev_notifier_info *info = &fdb_info->info;
+ struct net_device *br, *lower_dev;
+ struct list_head *iter;
+ int err = -EOPNOTSUPP;
+
+ if (check_cb(dev))
+ return add_cb(dev, orig_dev, info->ctx, fdb_info);
+
+ if (netif_is_lag_master(dev)) {
+ if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
+ goto maybe_bridged_with_us;
+
+ /* This is a LAG interface that we offload */
+ if (!lag_add_cb)
+ return -EOPNOTSUPP;
+
+ return lag_add_cb(dev, orig_dev, info->ctx, fdb_info);
+ }
+
+ /* Recurse through lower interfaces in case the FDB entry is pointing
+ * towards a bridge device.
+ */
+ if (netif_is_bridge_master(dev)) {
+ if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
+ return 0;
+
+ /* This is a bridge interface that we offload */
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ /* Do not propagate FDB entries across bridges */
+ if (netif_is_bridge_master(lower_dev))
+ continue;
+
+ /* Bridge ports might be either us, or LAG interfaces
+ * that we offload.
+ */
+ if (!check_cb(lower_dev) &&
+ !switchdev_lower_dev_find(lower_dev, check_cb,
+ foreign_dev_check_cb))
+ continue;
+
+ err = __switchdev_handle_fdb_add_to_device(lower_dev, orig_dev,
+ fdb_info, check_cb,
+ foreign_dev_check_cb,
+ add_cb, lag_add_cb);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ }
+
+ return 0;
+ }
+
+maybe_bridged_with_us:
+ /* Event is neither on a bridge nor a LAG. Check whether it is on an
+ * interface that is in a bridge with us.
+ */
+ br = netdev_master_upper_dev_get_rcu(dev);
+ if (!br || !netif_is_bridge_master(br))
+ return 0;
+
+ if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb))
+ return 0;
+
+ return __switchdev_handle_fdb_add_to_device(br, orig_dev, fdb_info,
+ check_cb, foreign_dev_check_cb,
+ add_cb, lag_add_cb);
+}
+
+int switchdev_handle_fdb_add_to_device(struct net_device *dev,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*add_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info),
+ int (*lag_add_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info))
+{
+ int err;
+
+ err = __switchdev_handle_fdb_add_to_device(dev, dev, fdb_info,
+ check_cb,
+ foreign_dev_check_cb,
+ add_cb, lag_add_cb);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_fdb_add_to_device);
+
+static int __switchdev_handle_fdb_del_to_device(struct net_device *dev,
+ const struct net_device *orig_dev,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*del_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info),
+ int (*lag_del_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info))
+{
+ const struct switchdev_notifier_info *info = &fdb_info->info;
+ struct net_device *br, *lower_dev;
+ struct list_head *iter;
+ int err = -EOPNOTSUPP;
+
+ if (check_cb(dev))
+ return del_cb(dev, orig_dev, info->ctx, fdb_info);
+
+ if (netif_is_lag_master(dev)) {
+ if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
+ goto maybe_bridged_with_us;
+
+ /* This is a LAG interface that we offload */
+ if (!lag_del_cb)
+ return -EOPNOTSUPP;
+
+ return lag_del_cb(dev, orig_dev, info->ctx, fdb_info);
+ }
+
+ /* Recurse through lower interfaces in case the FDB entry is pointing
+ * towards a bridge device.
+ */
+ if (netif_is_bridge_master(dev)) {
+ if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
+ return 0;
+
+ /* This is a bridge interface that we offload */
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ /* Do not propagate FDB entries across bridges */
+ if (netif_is_bridge_master(lower_dev))
+ continue;
+
+ /* Bridge ports might be either us, or LAG interfaces
+ * that we offload.
+ */
+ if (!check_cb(lower_dev) &&
+ !switchdev_lower_dev_find(lower_dev, check_cb,
+ foreign_dev_check_cb))
+ continue;
+
+ err = __switchdev_handle_fdb_del_to_device(lower_dev, orig_dev,
+ fdb_info, check_cb,
+ foreign_dev_check_cb,
+ del_cb, lag_del_cb);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ }
+
+ return 0;
+ }
+
+maybe_bridged_with_us:
+ /* Event is neither on a bridge nor a LAG. Check whether it is on an
+ * interface that is in a bridge with us.
+ */
+ br = netdev_master_upper_dev_get_rcu(dev);
+ if (!br || !netif_is_bridge_master(br))
+ return 0;
+
+ if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb))
+ return 0;
+
+ return __switchdev_handle_fdb_del_to_device(br, orig_dev, fdb_info,
+ check_cb, foreign_dev_check_cb,
+ del_cb, lag_del_cb);
+}
+
+int switchdev_handle_fdb_del_to_device(struct net_device *dev,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*del_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info),
+ int (*lag_del_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info))
+{
+ int err;
+
+ err = __switchdev_handle_fdb_del_to_device(dev, dev, fdb_info,
+ check_cb,
+ foreign_dev_check_cb,
+ del_cb, lag_del_cb);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_fdb_del_to_device);
+
static int __switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
@@ -549,3 +809,51 @@ int switchdev_handle_port_attr_set(struct net_device *dev,
return err;
}
EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set);
+
+int switchdev_bridge_port_offload(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_notifier_brport_info brport_info = {
+ .brport = {
+ .dev = dev,
+ .ctx = ctx,
+ .atomic_nb = atomic_nb,
+ .blocking_nb = blocking_nb,
+ .tx_fwd_offload = tx_fwd_offload,
+ },
+ };
+ int err;
+
+ ASSERT_RTNL();
+
+ err = call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_OFFLOADED,
+ brport_dev, &brport_info.info,
+ extack);
+ return notifier_to_errno(err);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload);
+
+void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+ const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+ struct switchdev_notifier_brport_info brport_info = {
+ .brport = {
+ .ctx = ctx,
+ .atomic_nb = atomic_nb,
+ .blocking_nb = blocking_nb,
+ },
+ };
+
+ ASSERT_RTNL();
+
+ call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_UNOFFLOADED,
+ brport_dev, &brport_info.info,
+ NULL);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index cf586840caeb..1b7a487c8841 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -913,7 +913,7 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr)
skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0,
dnode, l->addr, dport, 0, 0);
if (!skb)
- return -ENOMEM;
+ return -ENOBUFS;
msg_set_dest_droppable(buf_msg(skb), true);
TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr);
skb_queue_tail(&l->wakeupq, skb);
@@ -1031,7 +1031,7 @@ void tipc_link_reset(struct tipc_link *l)
*
* Consumes the buffer chain.
* Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
- * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS or -ENOMEM
+ * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
*/
int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
struct sk_buff_head *xmitq)
@@ -1089,7 +1089,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
if (!_skb) {
kfree_skb(skb);
__skb_queue_purge(list);
- return -ENOMEM;
+ return -ENOBUFS;
}
__skb_queue_tail(transmq, skb);
tipc_link_set_skb_retransmit_time(skb, l);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 75b99b7eda22..e3105ba407c7 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1518,7 +1518,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
if (unlikely(syn && !rc)) {
tipc_set_sk_state(sk, TIPC_CONNECTING);
- if (timeout) {
+ if (dlen && timeout) {
timeout = msecs_to_jiffies(timeout);
tipc_wait_for_connect(sock, &timeout);
}
@@ -1886,6 +1886,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
bool connected = !tipc_sk_type_connectionless(sk);
struct tipc_sock *tsk = tipc_sk(sk);
int rc, err, hlen, dlen, copy;
+ struct tipc_skb_cb *skb_cb;
struct sk_buff_head xmitq;
struct tipc_msg *hdr;
struct sk_buff *skb;
@@ -1909,6 +1910,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
if (unlikely(rc))
goto exit;
skb = skb_peek(&sk->sk_receive_queue);
+ skb_cb = TIPC_SKB_CB(skb);
hdr = buf_msg(skb);
dlen = msg_data_sz(hdr);
hlen = msg_hdr_sz(hdr);
@@ -1928,18 +1930,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
/* Capture data if non-error msg, otherwise just set return value */
if (likely(!err)) {
- copy = min_t(int, dlen, buflen);
- if (unlikely(copy != dlen))
- m->msg_flags |= MSG_TRUNC;
- rc = skb_copy_datagram_msg(skb, hlen, m, copy);
+ int offset = skb_cb->bytes_read;
+
+ copy = min_t(int, dlen - offset, buflen);
+ rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
+ if (unlikely(rc))
+ goto exit;
+ if (unlikely(offset + copy < dlen)) {
+ if (flags & MSG_EOR) {
+ if (!(flags & MSG_PEEK))
+ skb_cb->bytes_read = offset + copy;
+ } else {
+ m->msg_flags |= MSG_TRUNC;
+ skb_cb->bytes_read = 0;
+ }
+ } else {
+ if (flags & MSG_EOR)
+ m->msg_flags |= MSG_EOR;
+ skb_cb->bytes_read = 0;
+ }
} else {
copy = 0;
rc = 0;
- if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
+ if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) {
rc = -ECONNRESET;
+ goto exit;
+ }
}
- if (unlikely(rc))
- goto exit;
/* Mark message as group event if applicable */
if (unlikely(grp_evt)) {
@@ -1962,6 +1979,9 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
tipc_node_distr_xmit(sock_net(sk), &xmitq);
}
+ if (skb_cb->bytes_read)
+ goto exit;
+
tsk_advance_rx_queue(sk);
if (likely(!connected))
diff --git a/net/unix/Kconfig b/net/unix/Kconfig
index b6c4282899ec..b7f811216820 100644
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -25,6 +25,11 @@ config UNIX_SCM
depends on UNIX
default y
+config AF_UNIX_OOB
+ bool
+ depends on UNIX
+ default y
+
config UNIX_DIAG
tristate "UNIX: socket monitoring interface"
depends on UNIX
diff --git a/net/unix/Makefile b/net/unix/Makefile
index 54e58cc4f945..20491825b4d0 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_UNIX) += unix.o
unix-y := af_unix.o garbage.o
unix-$(CONFIG_SYSCTL) += sysctl_net_unix.o
+unix-$(CONFIG_BPF_SYSCALL) += unix_bpf.o
obj-$(CONFIG_UNIX_DIAG) += unix_diag.o
unix_diag-y := diag.o
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ba7ced947e51..eb47b9de2380 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -113,6 +113,7 @@
#include <linux/security.h>
#include <linux/freezer.h>
#include <linux/file.h>
+#include <linux/btf_ids.h>
#include "scm.h"
@@ -494,6 +495,7 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
sk_error_report(other);
}
}
+ other->sk_state = TCP_CLOSE;
}
static void unix_sock_destructor(struct sock *sk)
@@ -502,6 +504,12 @@ static void unix_sock_destructor(struct sock *sk)
skb_queue_purge(&sk->sk_receive_queue);
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (u->oob_skb) {
+ kfree_skb(u->oob_skb);
+ u->oob_skb = NULL;
+ }
+#endif
WARN_ON(refcount_read(&sk->sk_wmem_alloc));
WARN_ON(!sk_unhashed(sk));
WARN_ON(sk->sk_socket);
@@ -669,6 +677,10 @@ static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
unsigned int flags);
static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
+static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor);
+static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor);
static int unix_dgram_connect(struct socket *, struct sockaddr *,
int, int);
static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -722,6 +734,7 @@ static const struct proto_ops unix_stream_ops = {
.shutdown = unix_shutdown,
.sendmsg = unix_stream_sendmsg,
.recvmsg = unix_stream_recvmsg,
+ .read_sock = unix_stream_read_sock,
.mmap = sock_no_mmap,
.sendpage = unix_stream_sendpage,
.splice_read = unix_stream_splice_read,
@@ -746,6 +759,7 @@ static const struct proto_ops unix_dgram_ops = {
.listen = sock_no_listen,
.shutdown = unix_shutdown,
.sendmsg = unix_dgram_sendmsg,
+ .read_sock = unix_read_sock,
.recvmsg = unix_dgram_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
@@ -777,13 +791,42 @@ static const struct proto_ops unix_seqpacket_ops = {
.show_fdinfo = unix_show_fdinfo,
};
-static struct proto unix_proto = {
- .name = "UNIX",
+static void unix_close(struct sock *sk, long timeout)
+{
+ /* Nothing to do here, unix socket does not need a ->close().
+ * This is merely for sockmap.
+ */
+}
+
+static void unix_unhash(struct sock *sk)
+{
+ /* Nothing to do here, unix socket does not need a ->unhash().
+ * This is merely for sockmap.
+ */
+}
+
+struct proto unix_dgram_proto = {
+ .name = "UNIX-DGRAM",
.owner = THIS_MODULE,
.obj_size = sizeof(struct unix_sock),
+ .close = unix_close,
+#ifdef CONFIG_BPF_SYSCALL
+ .psock_update_sk_prot = unix_dgram_bpf_update_proto,
+#endif
};
-static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
+struct proto unix_stream_proto = {
+ .name = "UNIX-STREAM",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct unix_sock),
+ .close = unix_close,
+ .unhash = unix_unhash,
+#ifdef CONFIG_BPF_SYSCALL
+ .psock_update_sk_prot = unix_stream_bpf_update_proto,
+#endif
+};
+
+static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
{
struct sock *sk = NULL;
struct unix_sock *u;
@@ -792,7 +835,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
goto out;
- sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
+ if (type == SOCK_STREAM)
+ sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
+ else /*dgram and seqpacket */
+ sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
+
if (!sk)
goto out;
@@ -854,7 +901,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
return -ESOCKTNOSUPPORT;
}
- return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
+ return unix_create1(net, sock, kern, sock->type) ? 0 : -ENOMEM;
}
static int unix_release(struct socket *sock)
@@ -864,6 +911,7 @@ static int unix_release(struct socket *sock)
if (!sk)
return 0;
+ sk->sk_prot->close(sk, 0);
unix_release_sock(sk, 0);
sock->sk = NULL;
@@ -1174,6 +1222,7 @@ restart:
if (err)
goto out_unlock;
+ sk->sk_state = other->sk_state = TCP_ESTABLISHED;
} else {
/*
* 1003.1g breaking connected state with AF_UNSPEC
@@ -1187,7 +1236,10 @@ restart:
*/
if (unix_peer(sk)) {
struct sock *old_peer = unix_peer(sk);
+
unix_peer(sk) = other;
+ if (!other)
+ sk->sk_state = TCP_CLOSE;
unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
unix_state_double_unlock(sk, other);
@@ -1199,6 +1251,7 @@ restart:
unix_peer(sk) = other;
unix_state_double_unlock(sk, other);
}
+
return 0;
out_unlock:
@@ -1264,7 +1317,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
err = -ENOMEM;
/* create new sock for complete connection */
- newsk = unix_create1(sock_net(sk), NULL, 0);
+ newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
if (newsk == NULL)
goto out;
@@ -1431,12 +1484,10 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
init_peercred(ska);
init_peercred(skb);
- if (ska->sk_type != SOCK_DGRAM) {
- ska->sk_state = TCP_ESTABLISHED;
- skb->sk_state = TCP_ESTABLISHED;
- socka->state = SS_CONNECTED;
- sockb->state = SS_CONNECTED;
- }
+ ska->sk_state = TCP_ESTABLISHED;
+ skb->sk_state = TCP_ESTABLISHED;
+ socka->state = SS_CONNECTED;
+ sockb->state = SS_CONNECTED;
return 0;
}
@@ -1782,6 +1833,7 @@ restart_locked:
unix_state_unlock(sk);
+ sk->sk_state = TCP_CLOSE;
unix_dgram_disconnected(sk, other);
sock_put(other);
err = -ECONNREFUSED;
@@ -1872,6 +1924,53 @@ out:
*/
#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
+{
+ struct unix_sock *ousk = unix_sk(other);
+ struct sk_buff *skb;
+ int err = 0;
+
+ skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
+
+ if (!skb)
+ return err;
+
+ skb_put(skb, 1);
+ err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
+
+ if (err) {
+ kfree_skb(skb);
+ return err;
+ }
+
+ unix_state_lock(other);
+
+ if (sock_flag(other, SOCK_DEAD) ||
+ (other->sk_shutdown & RCV_SHUTDOWN)) {
+ unix_state_unlock(other);
+ kfree_skb(skb);
+ return -EPIPE;
+ }
+
+ maybe_add_creds(skb, sock, other);
+ skb_get(skb);
+
+ if (ousk->oob_skb)
+ consume_skb(ousk->oob_skb);
+
+ ousk->oob_skb = skb;
+
+ scm_stat_add(other, skb);
+ skb_queue_tail(&other->sk_receive_queue, skb);
+ sk_send_sigurg(other);
+ unix_state_unlock(other);
+ other->sk_data_ready(other);
+
+ return err;
+}
+#endif
+
static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
@@ -1890,8 +1989,14 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
return err;
err = -EOPNOTSUPP;
- if (msg->msg_flags&MSG_OOB)
- goto out_err;
+ if (msg->msg_flags & MSG_OOB) {
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+ if (len)
+ len--;
+ else
+#endif
+ goto out_err;
+ }
if (msg->msg_namelen) {
err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
@@ -1956,6 +2061,15 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
sent += size;
}
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+ if (msg->msg_flags & MSG_OOB) {
+ err = queue_oob(sock, msg, other);
+ if (err)
+ goto out_err;
+ sent++;
+ }
+#endif
+
scm_destroy(&scm);
return sent;
@@ -2128,11 +2242,11 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
}
}
-static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
- size_t size, int flags)
+int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
+ int flags)
{
struct scm_cookie scm;
- struct sock *sk = sock->sk;
+ struct socket *sock = sk->sk_socket;
struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb, *last;
long timeo;
@@ -2235,6 +2349,55 @@ out:
return err;
}
+static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
+{
+ struct sock *sk = sock->sk;
+
+#ifdef CONFIG_BPF_SYSCALL
+ const struct proto *prot = READ_ONCE(sk->sk_prot);
+
+ if (prot != &unix_dgram_proto)
+ return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
+ flags & ~MSG_DONTWAIT, NULL);
+#endif
+ return __unix_dgram_recvmsg(sk, msg, size, flags);
+}
+
+static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor)
+{
+ int copied = 0;
+
+ while (1) {
+ struct unix_sock *u = unix_sk(sk);
+ struct sk_buff *skb;
+ int used, err;
+
+ mutex_lock(&u->iolock);
+ skb = skb_recv_datagram(sk, 0, 1, &err);
+ mutex_unlock(&u->iolock);
+ if (!skb)
+ return err;
+
+ used = recv_actor(desc, skb, 0, skb->len);
+ if (used <= 0) {
+ if (!copied)
+ copied = used;
+ kfree_skb(skb);
+ break;
+ } else if (used <= skb->len) {
+ copied += used;
+ }
+
+ kfree_skb(skb);
+ if (!desc->count)
+ break;
+ }
+
+ return copied;
+}
+
/*
* Sleep until more data has arrived. But check for races..
*/
@@ -2294,6 +2457,86 @@ struct unix_stream_read_state {
unsigned int splice_flags;
};
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+static int unix_stream_recv_urg(struct unix_stream_read_state *state)
+{
+ struct socket *sock = state->socket;
+ struct sock *sk = sock->sk;
+ struct unix_sock *u = unix_sk(sk);
+ int chunk = 1;
+ struct sk_buff *oob_skb;
+
+ mutex_lock(&u->iolock);
+ unix_state_lock(sk);
+
+ if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
+ unix_state_unlock(sk);
+ mutex_unlock(&u->iolock);
+ return -EINVAL;
+ }
+
+ oob_skb = u->oob_skb;
+
+ if (!(state->flags & MSG_PEEK)) {
+ u->oob_skb = NULL;
+ }
+
+ unix_state_unlock(sk);
+
+ chunk = state->recv_actor(oob_skb, 0, chunk, state);
+
+ if (!(state->flags & MSG_PEEK)) {
+ UNIXCB(oob_skb).consumed += 1;
+ kfree_skb(oob_skb);
+ }
+
+ mutex_unlock(&u->iolock);
+
+ if (chunk < 0)
+ return -EFAULT;
+
+ state->msg->msg_flags |= MSG_OOB;
+ return 1;
+}
+
+static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
+ int flags, int copied)
+{
+ struct unix_sock *u = unix_sk(sk);
+
+ if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
+ skb_unlink(skb, &sk->sk_receive_queue);
+ consume_skb(skb);
+ skb = NULL;
+ } else {
+ if (skb == u->oob_skb) {
+ if (copied) {
+ skb = NULL;
+ } else if (sock_flag(sk, SOCK_URGINLINE)) {
+ if (!(flags & MSG_PEEK)) {
+ u->oob_skb = NULL;
+ consume_skb(skb);
+ }
+ } else if (!(flags & MSG_PEEK)) {
+ skb_unlink(skb, &sk->sk_receive_queue);
+ consume_skb(skb);
+ skb = skb_peek(&sk->sk_receive_queue);
+ }
+ }
+ }
+ return skb;
+}
+#endif
+
+static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor)
+{
+ if (unlikely(sk->sk_state != TCP_ESTABLISHED))
+ return -ENOTCONN;
+
+ return unix_read_sock(sk, desc, recv_actor);
+}
+
static int unix_stream_read_generic(struct unix_stream_read_state *state,
bool freezable)
{
@@ -2319,6 +2562,9 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
if (unlikely(flags & MSG_OOB)) {
err = -EOPNOTSUPP;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ err = unix_stream_recv_urg(state);
+#endif
goto out;
}
@@ -2347,6 +2593,18 @@ redo:
}
last = skb = skb_peek(&sk->sk_receive_queue);
last_len = last ? last->len : 0;
+
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (skb) {
+ skb = manage_oob(skb, sk, flags, copied);
+ if (!skb) {
+ unix_state_unlock(sk);
+ if (copied)
+ break;
+ goto redo;
+ }
+ }
+#endif
again:
if (skb == NULL) {
if (copied >= target)
@@ -2504,6 +2762,20 @@ static int unix_stream_read_actor(struct sk_buff *skb,
return ret ?: chunk;
}
+int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
+ size_t size, int flags)
+{
+ struct unix_stream_read_state state = {
+ .recv_actor = unix_stream_read_actor,
+ .socket = sk->sk_socket,
+ .msg = msg,
+ .size = size,
+ .flags = flags
+ };
+
+ return unix_stream_read_generic(&state, true);
+}
+
static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
@@ -2515,6 +2787,14 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
.flags = flags
};
+#ifdef CONFIG_BPF_SYSCALL
+ struct sock *sk = sock->sk;
+ const struct proto *prot = READ_ONCE(sk->sk_prot);
+
+ if (prot != &unix_stream_proto)
+ return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
+ flags & ~MSG_DONTWAIT, NULL);
+#endif
return unix_stream_read_generic(&state, true);
}
@@ -2575,7 +2855,10 @@ static int unix_shutdown(struct socket *sock, int mode)
(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
int peer_mode = 0;
+ const struct proto *prot = READ_ONCE(other->sk_prot);
+ if (prot->unhash)
+ prot->unhash(other);
if (mode&RCV_SHUTDOWN)
peer_mode |= SEND_SHUTDOWN;
if (mode&SEND_SHUTDOWN)
@@ -2584,10 +2867,12 @@ static int unix_shutdown(struct socket *sock, int mode)
other->sk_shutdown |= peer_mode;
unix_state_unlock(other);
other->sk_state_change(other);
- if (peer_mode == SHUTDOWN_MASK)
+ if (peer_mode == SHUTDOWN_MASK) {
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
- else if (peer_mode & RCV_SHUTDOWN)
+ other->sk_state = TCP_CLOSE;
+ } else if (peer_mode & RCV_SHUTDOWN) {
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
+ }
}
if (other)
sock_put(other);
@@ -2682,6 +2967,20 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCUNIXFILE:
err = unix_open_file(sk);
break;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ case SIOCATMARK:
+ {
+ struct sk_buff *skb;
+ struct unix_sock *u = unix_sk(sk);
+ int answ = 0;
+
+ skb = skb_peek(&sk->sk_receive_queue);
+ if (skb && skb == u->oob_skb)
+ answ = 1;
+ err = put_user(answ, (int __user *)arg);
+ }
+ break;
+#endif
default:
err = -ENOIOCTLCMD;
break;
@@ -2918,6 +3217,64 @@ static const struct seq_operations unix_seq_ops = {
.stop = unix_seq_stop,
.show = unix_seq_show,
};
+
+#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_iter__unix {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct unix_sock *, unix_sk);
+ uid_t uid __aligned(8);
+};
+
+static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
+ struct unix_sock *unix_sk, uid_t uid)
+{
+ struct bpf_iter__unix ctx;
+
+ meta->seq_num--; /* skip SEQ_START_TOKEN */
+ ctx.meta = meta;
+ ctx.unix_sk = unix_sk;
+ ctx.uid = uid;
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ struct sock *sk = v;
+ uid_t uid;
+
+ if (v == SEQ_START_TOKEN)
+ return 0;
+
+ uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, false);
+ return unix_prog_seq_show(prog, &meta, v, uid);
+}
+
+static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ if (!v) {
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, true);
+ if (prog)
+ (void)unix_prog_seq_show(prog, &meta, v, 0);
+ }
+
+ unix_seq_stop(seq, v);
+}
+
+static const struct seq_operations bpf_iter_unix_seq_ops = {
+ .start = unix_seq_start,
+ .next = unix_seq_next,
+ .stop = bpf_iter_unix_seq_stop,
+ .show = bpf_iter_unix_seq_show,
+};
+#endif
#endif
static const struct net_proto_family unix_family_ops = {
@@ -2958,13 +3315,48 @@ static struct pernet_operations unix_net_ops = {
.exit = unix_net_exit,
};
+#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
+ struct unix_sock *unix_sk, uid_t uid)
+
+static const struct bpf_iter_seq_info unix_seq_info = {
+ .seq_ops = &bpf_iter_unix_seq_ops,
+ .init_seq_private = bpf_iter_init_seq_net,
+ .fini_seq_private = bpf_iter_fini_seq_net,
+ .seq_priv_size = sizeof(struct seq_net_private),
+};
+
+static struct bpf_iter_reg unix_reg_info = {
+ .target = "unix",
+ .ctx_arg_info_size = 1,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__unix, unix_sk),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+ .seq_info = &unix_seq_info,
+};
+
+static void __init bpf_iter_register(void)
+{
+ unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
+ if (bpf_iter_reg_target(&unix_reg_info))
+ pr_warn("Warning: could not register bpf iterator unix\n");
+}
+#endif
+
static int __init af_unix_init(void)
{
int rc = -1;
BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
- rc = proto_register(&unix_proto, 1);
+ rc = proto_register(&unix_dgram_proto, 1);
+ if (rc != 0) {
+ pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
+ goto out;
+ }
+
+ rc = proto_register(&unix_stream_proto, 1);
if (rc != 0) {
pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
goto out;
@@ -2972,6 +3364,12 @@ static int __init af_unix_init(void)
sock_register(&unix_family_ops);
register_pernet_subsys(&unix_net_ops);
+ unix_bpf_build_proto();
+
+#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+ bpf_iter_register();
+#endif
+
out:
return rc;
}
@@ -2979,7 +3377,8 @@ out:
static void __exit af_unix_exit(void)
{
sock_unregister(PF_UNIX);
- proto_unregister(&unix_proto);
+ proto_unregister(&unix_dgram_proto);
+ proto_unregister(&unix_stream_proto);
unregister_pernet_subsys(&unix_net_ops);
}
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
new file mode 100644
index 000000000000..b927e2baae50
--- /dev/null
+++ b/net/unix/unix_bpf.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Cong Wang <cong.wang@bytedance.com> */
+
+#include <linux/skmsg.h>
+#include <linux/bpf.h>
+#include <net/sock.h>
+#include <net/af_unix.h>
+
+#define unix_sk_has_data(__sk, __psock) \
+ ({ !skb_queue_empty(&__sk->sk_receive_queue) || \
+ !skb_queue_empty(&__psock->ingress_skb) || \
+ !list_empty(&__psock->ingress_msg); \
+ })
+
+static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock,
+ long timeo)
+{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ struct unix_sock *u = unix_sk(sk);
+ int ret = 0;
+
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ return 1;
+
+ if (!timeo)
+ return ret;
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ if (!unix_sk_has_data(sk, psock)) {
+ mutex_unlock(&u->iolock);
+ wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+ mutex_lock(&u->iolock);
+ ret = unix_sk_has_data(sk, psock);
+ }
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
+ return ret;
+}
+
+static int __unix_recvmsg(struct sock *sk, struct msghdr *msg,
+ size_t len, int flags)
+{
+ if (sk->sk_type == SOCK_DGRAM)
+ return __unix_dgram_recvmsg(sk, msg, len, flags);
+ else
+ return __unix_stream_recvmsg(sk, msg, len, flags);
+}
+
+static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
+ size_t len, int nonblock, int flags,
+ int *addr_len)
+{
+ struct unix_sock *u = unix_sk(sk);
+ struct sk_psock *psock;
+ int copied;
+
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock))
+ return __unix_recvmsg(sk, msg, len, flags);
+
+ mutex_lock(&u->iolock);
+ if (!skb_queue_empty(&sk->sk_receive_queue) &&
+ sk_psock_queue_empty(psock)) {
+ mutex_unlock(&u->iolock);
+ sk_psock_put(sk, psock);
+ return __unix_recvmsg(sk, msg, len, flags);
+ }
+
+msg_bytes_ready:
+ copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
+ if (!copied) {
+ long timeo;
+ int data;
+
+ timeo = sock_rcvtimeo(sk, nonblock);
+ data = unix_msg_wait_data(sk, psock, timeo);
+ if (data) {
+ if (!sk_psock_queue_empty(psock))
+ goto msg_bytes_ready;
+ mutex_unlock(&u->iolock);
+ sk_psock_put(sk, psock);
+ return __unix_recvmsg(sk, msg, len, flags);
+ }
+ copied = -EAGAIN;
+ }
+ mutex_unlock(&u->iolock);
+ sk_psock_put(sk, psock);
+ return copied;
+}
+
+static struct proto *unix_dgram_prot_saved __read_mostly;
+static DEFINE_SPINLOCK(unix_dgram_prot_lock);
+static struct proto unix_dgram_bpf_prot;
+
+static struct proto *unix_stream_prot_saved __read_mostly;
+static DEFINE_SPINLOCK(unix_stream_prot_lock);
+static struct proto unix_stream_bpf_prot;
+
+static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
+{
+ *prot = *base;
+ prot->close = sock_map_close;
+ prot->recvmsg = unix_bpf_recvmsg;
+}
+
+static void unix_stream_bpf_rebuild_protos(struct proto *prot,
+ const struct proto *base)
+{
+ *prot = *base;
+ prot->close = sock_map_close;
+ prot->recvmsg = unix_bpf_recvmsg;
+ prot->unhash = sock_map_unhash;
+}
+
+static void unix_dgram_bpf_check_needs_rebuild(struct proto *ops)
+{
+ if (unlikely(ops != smp_load_acquire(&unix_dgram_prot_saved))) {
+ spin_lock_bh(&unix_dgram_prot_lock);
+ if (likely(ops != unix_dgram_prot_saved)) {
+ unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, ops);
+ smp_store_release(&unix_dgram_prot_saved, ops);
+ }
+ spin_unlock_bh(&unix_dgram_prot_lock);
+ }
+}
+
+static void unix_stream_bpf_check_needs_rebuild(struct proto *ops)
+{
+ if (unlikely(ops != smp_load_acquire(&unix_stream_prot_saved))) {
+ spin_lock_bh(&unix_stream_prot_lock);
+ if (likely(ops != unix_stream_prot_saved)) {
+ unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, ops);
+ smp_store_release(&unix_stream_prot_saved, ops);
+ }
+ spin_unlock_bh(&unix_stream_prot_lock);
+ }
+}
+
+int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
+{
+ if (sk->sk_type != SOCK_DGRAM)
+ return -EOPNOTSUPP;
+
+ if (restore) {
+ sk->sk_write_space = psock->saved_write_space;
+ WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+ return 0;
+ }
+
+ unix_dgram_bpf_check_needs_rebuild(psock->sk_proto);
+ WRITE_ONCE(sk->sk_prot, &unix_dgram_bpf_prot);
+ return 0;
+}
+
+int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
+{
+ if (restore) {
+ sk->sk_write_space = psock->saved_write_space;
+ WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+ return 0;
+ }
+
+ unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
+ WRITE_ONCE(sk->sk_prot, &unix_stream_bpf_prot);
+ return 0;
+}
+
+void __init unix_bpf_build_proto(void)
+{
+ unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, &unix_dgram_proto);
+ unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, &unix_stream_proto);
+
+}
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index e0c2c992ad9c..4f7c99dfd16c 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -357,11 +357,14 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
static void virtio_vsock_reset_sock(struct sock *sk)
{
- lock_sock(sk);
+ /* vmci_transport.c doesn't take sk_lock here either. At least we're
+ * under vsock_table_lock so the sock cannot disappear while we're
+ * executing.
+ */
+
sk->sk_state = TCP_CLOSE;
sk->sk_err = ECONNRESET;
sk_error_report(sk);
- release_sock(sk);
}
static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 16c88beea48b..bf7cd4752547 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -759,6 +759,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_RECONNECT_REQUESTED] = { .type = NLA_REJECT },
[NL80211_ATTR_SAR_SPEC] = NLA_POLICY_NESTED(sar_policy),
[NL80211_ATTR_DISABLE_HE] = { .type = NLA_FLAG },
+ [NL80211_ATTR_OBSS_COLOR_BITMAP] = { .type = NLA_U64 },
+ [NL80211_ATTR_COLOR_CHANGE_COUNT] = { .type = NLA_U8 },
+ [NL80211_ATTR_COLOR_CHANGE_COLOR] = { .type = NLA_U8 },
+ [NL80211_ATTR_COLOR_CHANGE_ELEMS] = NLA_POLICY_NESTED(nl80211_policy),
};
/* policy for the key attributes */
@@ -6527,8 +6531,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
err = rdev_change_station(rdev, dev, mac_addr, &params);
out_put_vlan:
- if (params.vlan)
- dev_put(params.vlan);
+ dev_put(params.vlan);
return err;
}
@@ -6763,8 +6766,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
err = rdev_add_station(rdev, dev, mac_addr, &params);
- if (params.vlan)
- dev_put(params.vlan);
+ dev_put(params.vlan);
return err;
}
@@ -8489,8 +8491,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
goto out_free;
nl80211_send_scan_start(rdev, wdev);
- if (wdev->netdev)
- dev_hold(wdev->netdev);
+ dev_hold(wdev->netdev);
return 0;
@@ -14803,6 +14804,106 @@ bad_tid_conf:
return ret;
}
+static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct cfg80211_color_change_settings params = {};
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct nlattr **tb;
+ u16 offset;
+ int err;
+
+ if (!rdev->ops->color_change)
+ return -EOPNOTSUPP;
+
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_BSS_COLOR))
+ return -EOPNOTSUPP;
+
+ if (wdev->iftype != NL80211_IFTYPE_AP)
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[NL80211_ATTR_COLOR_CHANGE_COUNT] ||
+ !info->attrs[NL80211_ATTR_COLOR_CHANGE_COLOR] ||
+ !info->attrs[NL80211_ATTR_COLOR_CHANGE_ELEMS])
+ return -EINVAL;
+
+ params.count = nla_get_u8(info->attrs[NL80211_ATTR_COLOR_CHANGE_COUNT]);
+ params.color = nla_get_u8(info->attrs[NL80211_ATTR_COLOR_CHANGE_COLOR]);
+
+ err = nl80211_parse_beacon(rdev, info->attrs, &params.beacon_next);
+ if (err)
+ return err;
+
+ tb = kcalloc(NL80211_ATTR_MAX + 1, sizeof(*tb), GFP_KERNEL);
+ if (!tb)
+ return -ENOMEM;
+
+ err = nla_parse_nested(tb, NL80211_ATTR_MAX,
+ info->attrs[NL80211_ATTR_COLOR_CHANGE_ELEMS],
+ nl80211_policy, info->extack);
+ if (err)
+ goto out;
+
+ err = nl80211_parse_beacon(rdev, tb, &params.beacon_color_change);
+ if (err)
+ goto out;
+
+ if (!tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (nla_len(tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]) != sizeof(u16)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ offset = nla_get_u16(tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]);
+ if (offset >= params.beacon_color_change.tail_len) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (params.beacon_color_change.tail[offset] != params.count) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ params.counter_offset_beacon = offset;
+
+ if (tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]) {
+ if (nla_len(tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]) !=
+ sizeof(u16)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ offset = nla_get_u16(tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]);
+ if (offset >= params.beacon_color_change.probe_resp_len) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (params.beacon_color_change.probe_resp[offset] !=
+ params.count) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ params.counter_offset_presp = offset;
+ }
+
+ wdev_lock(wdev);
+ err = rdev_color_change(rdev, dev, &params);
+ wdev_unlock(wdev);
+
+out:
+ kfree(tb);
+ return err;
+}
+
#define NL80211_FLAG_NEED_WIPHY 0x01
#define NL80211_FLAG_NEED_NETDEV 0x02
#define NL80211_FLAG_NEED_RTNL 0x04
@@ -14860,9 +14961,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
return -ENETDOWN;
}
- if (dev)
- dev_hold(dev);
-
+ dev_hold(dev);
info->user_ptr[0] = rdev;
}
@@ -14884,8 +14983,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) {
struct wireless_dev *wdev = info->user_ptr[1];
- if (wdev->netdev)
- dev_put(wdev->netdev);
+ dev_put(wdev->netdev);
} else {
dev_put(info->user_ptr[1]);
}
@@ -15801,6 +15899,14 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.internal_flags = NL80211_FLAG_NEED_WIPHY |
NL80211_FLAG_NEED_RTNL,
},
+ {
+ .cmd = NL80211_CMD_COLOR_CHANGE_REQUEST,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = nl80211_color_change,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
};
static struct genl_family nl80211_fam __ro_after_init = {
@@ -17430,6 +17536,51 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev,
}
EXPORT_SYMBOL(cfg80211_ch_switch_started_notify);
+int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp,
+ enum nl80211_commands cmd, u8 count,
+ u64 color_bitmap)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ struct sk_buff *msg;
+ void *hdr;
+
+ ASSERT_WDEV_LOCK(wdev);
+
+ trace_cfg80211_bss_color_notify(dev, cmd, count, color_bitmap);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, cmd);
+ if (!hdr)
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
+ goto nla_put_failure;
+
+ if (cmd == NL80211_CMD_COLOR_CHANGE_STARTED &&
+ nla_put_u32(msg, NL80211_ATTR_COLOR_CHANGE_COUNT, count))
+ goto nla_put_failure;
+
+ if (cmd == NL80211_CMD_OBSS_COLOR_COLLISION &&
+ nla_put_u64_64bit(msg, NL80211_ATTR_OBSS_COLOR_BITMAP,
+ color_bitmap, NL80211_ATTR_PAD))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
+ msg, 0, NL80211_MCGRP_MLME, gfp);
+
+nla_put_failure:
+ nlmsg_free(msg);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(cfg80211_bss_color_notify);
+
void
nl80211_radar_notify(struct cfg80211_registered_device *rdev,
const struct cfg80211_chan_def *chandef,
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index 36f1b59a78bf..ae2e1a896461 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -115,23 +115,22 @@ int ieee80211_radiotap_iterator_init(
iterator->_max_length = get_unaligned_le16(&radiotap_header->it_len);
iterator->_arg_index = 0;
iterator->_bitmap_shifter = get_unaligned_le32(&radiotap_header->it_present);
- iterator->_arg = (uint8_t *)radiotap_header + sizeof(*radiotap_header);
+ iterator->_arg = (uint8_t *)radiotap_header->it_optional;
iterator->_reset_on_ext = 0;
- iterator->_next_bitmap = &radiotap_header->it_present;
- iterator->_next_bitmap++;
+ iterator->_next_bitmap = radiotap_header->it_optional;
iterator->_vns = vns;
iterator->current_namespace = &radiotap_ns;
iterator->is_radiotap_ns = 1;
/* find payload start allowing for extended bitmap(s) */
- if (iterator->_bitmap_shifter & (1<<IEEE80211_RADIOTAP_EXT)) {
+ if (iterator->_bitmap_shifter & (BIT(IEEE80211_RADIOTAP_EXT))) {
if ((unsigned long)iterator->_arg -
(unsigned long)iterator->_rtheader + sizeof(uint32_t) >
(unsigned long)iterator->_max_length)
return -EINVAL;
while (get_unaligned_le32(iterator->_arg) &
- (1 << IEEE80211_RADIOTAP_EXT)) {
+ (BIT(IEEE80211_RADIOTAP_EXT))) {
iterator->_arg += sizeof(uint32_t);
/*
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index b1d37f582dc6..ce6bf218a1a3 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1368,4 +1368,17 @@ static inline int rdev_set_sar_specs(struct cfg80211_registered_device *rdev,
return ret;
}
+static inline int rdev_color_change(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_color_change_settings *params)
+{
+ int ret;
+
+ trace_rdev_color_change(&rdev->wiphy, dev, params);
+ ret = rdev->ops->color_change(&rdev->wiphy, dev, params);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+
+ return ret;
+}
+
#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index c2d0ff7f089f..df87c7f3a049 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -171,9 +171,11 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy)
{
const struct ieee80211_regdomain *regd = NULL;
const struct ieee80211_regdomain *wiphy_regd = NULL;
+ enum nl80211_dfs_regions dfs_region;
rcu_read_lock();
regd = get_cfg80211_regdom();
+ dfs_region = regd->dfs_region;
if (!wiphy)
goto out;
@@ -182,6 +184,11 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy)
if (!wiphy_regd)
goto out;
+ if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) {
+ dfs_region = wiphy_regd->dfs_region;
+ goto out;
+ }
+
if (wiphy_regd->dfs_region == regd->dfs_region)
goto out;
@@ -193,7 +200,7 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy)
out:
rcu_read_unlock();
- return regd->dfs_region;
+ return dfs_region;
}
static void rcu_free_regdom(const struct ieee80211_regdomain *r)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 7897b1478c3c..11c68b159324 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -975,8 +975,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
}
#endif
- if (wdev->netdev)
- dev_put(wdev->netdev);
+ dev_put(wdev->netdev);
kfree(rdev->int_scan_req);
rdev->int_scan_req = NULL;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 440bce5f0274..19b78d472283 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -3597,6 +3597,52 @@ TRACE_EVENT(rdev_set_sar_specs,
WIPHY_PR_ARG, __entry->type, __entry->num)
);
+TRACE_EVENT(rdev_color_change,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_color_change_settings *params),
+ TP_ARGS(wiphy, netdev, params),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __field(u8, count)
+ __field(u16, bcn_ofs)
+ __field(u16, pres_ofs)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ __entry->count = params->count;
+ __entry->bcn_ofs = params->counter_offset_beacon;
+ __entry->pres_ofs = params->counter_offset_presp;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT
+ ", count: %u",
+ WIPHY_PR_ARG, NETDEV_PR_ARG,
+ __entry->count)
+);
+
+TRACE_EVENT(cfg80211_bss_color_notify,
+ TP_PROTO(struct net_device *netdev,
+ enum nl80211_commands cmd,
+ u8 count, u64 color_bitmap),
+ TP_ARGS(netdev, cmd, count, color_bitmap),
+ TP_STRUCT__entry(
+ NETDEV_ENTRY
+ __field(u32, cmd)
+ __field(u8, count)
+ __field(u64, color_bitmap)
+ ),
+ TP_fast_assign(
+ NETDEV_ASSIGN;
+ __entry->cmd = cmd;
+ __entry->count = count;
+ __entry->color_bitmap = color_bitmap;
+ ),
+ TP_printk(NETDEV_PR_FMT ", cmd: %x, count: %u, bitmap: %llx",
+ NETDEV_PR_ARG, __entry->cmd, __entry->count,
+ __entry->color_bitmap)
+);
+
#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7f881f5a5897..37d17a79617c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3157,6 +3157,11 @@ ok:
return dst;
nopol:
+ if (!(dst_orig->dev->flags & IFF_LOOPBACK) &&
+ !xfrm_default_allow(net, dir)) {
+ err = -EPERM;
+ goto error;
+ }
if (!(flags & XFRM_LOOKUP_ICMP)) {
dst = dst_orig;
goto ok;
@@ -3545,6 +3550,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
}
if (!pol) {
+ if (!xfrm_default_allow(net, dir)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
+ return 0;
+ }
+
if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) {
xfrm_secpath_reject(xerr_idx, skb, &fl);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
@@ -3599,6 +3609,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
tpp[ti++] = &pols[pi]->xfrm_vec[i];
}
xfrm_nr = ti;
+
+ if (!xfrm_default_allow(net, dir) && !xfrm_nr) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
+ goto reject;
+ }
+
if (npols > 1) {
xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
tpp = stp;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 7aff641c717d..03b66d154b2b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1961,6 +1961,59 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
return skb;
}
+static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct nlattr **attrs)
+{
+ struct net *net = sock_net(skb->sk);
+ struct xfrm_userpolicy_default *up = nlmsg_data(nlh);
+ u8 dirmask;
+ u8 old_default = net->xfrm.policy_default;
+
+ if (up->dirmask >= XFRM_USERPOLICY_DIRMASK_MAX)
+ return -EINVAL;
+
+ dirmask = (1 << up->dirmask) & XFRM_POL_DEFAULT_MASK;
+
+ net->xfrm.policy_default = (old_default & (0xff ^ dirmask))
+ | (up->action << up->dirmask);
+
+ rt_genid_bump_all(net);
+
+ return 0;
+}
+
+static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct nlattr **attrs)
+{
+ struct sk_buff *r_skb;
+ struct nlmsghdr *r_nlh;
+ struct net *net = sock_net(skb->sk);
+ struct xfrm_userpolicy_default *r_up, *up;
+ int len = NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_default));
+ u32 portid = NETLINK_CB(skb).portid;
+ u32 seq = nlh->nlmsg_seq;
+
+ up = nlmsg_data(nlh);
+
+ r_skb = nlmsg_new(len, GFP_ATOMIC);
+ if (!r_skb)
+ return -ENOMEM;
+
+ r_nlh = nlmsg_put(r_skb, portid, seq, XFRM_MSG_GETDEFAULT, sizeof(*r_up), 0);
+ if (!r_nlh) {
+ kfree_skb(r_skb);
+ return -EMSGSIZE;
+ }
+
+ r_up = nlmsg_data(r_nlh);
+
+ r_up->action = ((net->xfrm.policy_default & (1 << up->dirmask)) >> up->dirmask);
+ r_up->dirmask = up->dirmask;
+ nlmsg_end(r_skb, r_nlh);
+
+ return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid);
+}
+
static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
struct nlattr **attrs)
{
@@ -2664,6 +2717,8 @@ const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
[XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32),
[XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
[XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
+ [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default),
+ [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default),
};
EXPORT_SYMBOL_GPL(xfrm_msg_min);
@@ -2743,6 +2798,8 @@ static const struct xfrm_link {
.nla_pol = xfrma_spd_policy,
.nla_max = XFRMA_SPD_MAX },
[XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo },
+ [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = { .doit = xfrm_set_default },
+ [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = { .doit = xfrm_get_default },
};
static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore
index 0b9548ea8477..fcba217f0ae2 100644
--- a/samples/bpf/.gitignore
+++ b/samples/bpf/.gitignore
@@ -45,11 +45,13 @@ xdp_monitor
xdp_redirect
xdp_redirect_cpu
xdp_redirect_map
+xdp_redirect_map_multi
xdp_router_ipv4
xdp_rxq_info
xdp_sample_pkts
xdp_tx_iptunnel
xdpsock
+xdpsock_ctrl_proc
xsk_fwd
testfile.img
hbm_out.log
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 036998d11ded..4dc20be5fb96 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -39,11 +39,6 @@ tprogs-y += lwt_len_hist
tprogs-y += xdp_tx_iptunnel
tprogs-y += test_map_in_map
tprogs-y += per_socket_stats_example
-tprogs-y += xdp_redirect
-tprogs-y += xdp_redirect_map
-tprogs-y += xdp_redirect_map_multi
-tprogs-y += xdp_redirect_cpu
-tprogs-y += xdp_monitor
tprogs-y += xdp_rxq_info
tprogs-y += syscall_tp
tprogs-y += cpustat
@@ -57,11 +52,18 @@ tprogs-y += xdp_sample_pkts
tprogs-y += ibumad
tprogs-y += hbm
+tprogs-y += xdp_redirect_cpu
+tprogs-y += xdp_redirect_map_multi
+tprogs-y += xdp_redirect_map
+tprogs-y += xdp_redirect
+tprogs-y += xdp_monitor
+
# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o
+XDP_SAMPLE := xdp_sample_user.o
fds_example-objs := fds_example.o
sockex1-objs := sockex1_user.o
@@ -98,11 +100,6 @@ lwt_len_hist-objs := lwt_len_hist_user.o
xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o
test_map_in_map-objs := test_map_in_map_user.o
per_socket_stats_example-objs := cookie_uid_helper_example.o
-xdp_redirect-objs := xdp_redirect_user.o
-xdp_redirect_map-objs := xdp_redirect_map_user.o
-xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o
-xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o
-xdp_monitor-objs := xdp_monitor_user.o
xdp_rxq_info-objs := xdp_rxq_info_user.o
syscall_tp-objs := syscall_tp_user.o
cpustat-objs := cpustat_user.o
@@ -116,6 +113,12 @@ xdp_sample_pkts-objs := xdp_sample_pkts_user.o
ibumad-objs := ibumad_user.o
hbm-objs := hbm.o $(CGROUP_HELPERS)
+xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o $(XDP_SAMPLE)
+xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o $(XDP_SAMPLE)
+xdp_redirect_map-objs := xdp_redirect_map_user.o $(XDP_SAMPLE)
+xdp_redirect-objs := xdp_redirect_user.o $(XDP_SAMPLE)
+xdp_monitor-objs := xdp_monitor_user.o $(XDP_SAMPLE)
+
# Tell kbuild to always build the programs
always-y := $(tprogs-y)
always-y += sockex1_kern.o
@@ -160,11 +163,6 @@ always-y += tcp_clamp_kern.o
always-y += tcp_basertt_kern.o
always-y += tcp_tos_reflect_kern.o
always-y += tcp_dumpstats_kern.o
-always-y += xdp_redirect_kern.o
-always-y += xdp_redirect_map_kern.o
-always-y += xdp_redirect_map_multi_kern.o
-always-y += xdp_redirect_cpu_kern.o
-always-y += xdp_monitor_kern.o
always-y += xdp_rxq_info_kern.o
always-y += xdp2skb_meta_kern.o
always-y += syscall_tp_kern.o
@@ -276,6 +274,11 @@ $(LIBBPF): FORCE
$(MAKE) -C $(dir $@) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \
LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ O=
+BPFTOOLDIR := $(TOOLS_PATH)/bpf/bpftool
+BPFTOOL := $(BPFTOOLDIR)/bpftool
+$(BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)
+ $(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../
+
$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE
$(call filechk,offsets,__SYSCALL_NRS_H__)
@@ -306,6 +309,12 @@ verify_target_bpf: verify_cmds
$(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF)
$(src)/*.c: verify_target_bpf $(LIBBPF)
+$(obj)/xdp_redirect_cpu_user.o: $(obj)/xdp_redirect_cpu.skel.h
+$(obj)/xdp_redirect_map_multi_user.o: $(obj)/xdp_redirect_map_multi.skel.h
+$(obj)/xdp_redirect_map_user.o: $(obj)/xdp_redirect_map.skel.h
+$(obj)/xdp_redirect_user.o: $(obj)/xdp_redirect.skel.h
+$(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h
+
$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
$(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
$(obj)/hbm.o: $(src)/hbm.h
@@ -313,6 +322,76 @@ $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
-include $(BPF_SAMPLES_PATH)/Makefile.target
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
+ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+ ../../../../vmlinux \
+ /sys/kernel/btf/vmlinux \
+ /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
+ifeq ($(VMLINUX_BTF),)
+$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
+endif
+
+$(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL)
+ifeq ($(VMLINUX_H),)
+ $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+else
+ $(Q)cp "$(VMLINUX_H)" $@
+endif
+
+clean-files += vmlinux.h
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) -dM -E - </dev/null | grep '#define __riscv_xlen ' | sed 's/#define /-D/' | sed 's/ /=/')
+endef
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
+
+$(obj)/xdp_redirect_cpu.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_redirect_map_multi.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_redirect_map.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_redirect.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_monitor.bpf.o: $(obj)/xdp_sample.bpf.o
+
+$(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/xdp_sample_shared.h
+ @echo " CLANG-BPF " $@
+ $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(SRCARCH) \
+ -Wno-compare-distinct-pointer-types -I$(srctree)/include \
+ -I$(srctree)/samples/bpf -I$(srctree)/tools/include \
+ -I$(srctree)/tools/lib $(CLANG_SYS_INCLUDES) \
+ -c $(filter %.bpf.c,$^) -o $@
+
+LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect_map_multi.skel.h \
+ xdp_redirect_map.skel.h xdp_redirect.skel.h xdp_monitor.skel.h
+clean-files += $(LINKED_SKELS)
+
+xdp_redirect_cpu.skel.h-deps := xdp_redirect_cpu.bpf.o xdp_sample.bpf.o
+xdp_redirect_map_multi.skel.h-deps := xdp_redirect_map_multi.bpf.o xdp_sample.bpf.o
+xdp_redirect_map.skel.h-deps := xdp_redirect_map.bpf.o xdp_sample.bpf.o
+xdp_redirect.skel.h-deps := xdp_redirect.bpf.o xdp_sample.bpf.o
+xdp_monitor.skel.h-deps := xdp_monitor.bpf.o xdp_sample.bpf.o
+
+LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.bpf.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
+
+BPF_SRCS_LINKED := $(notdir $(wildcard $(src)/*.bpf.c))
+BPF_OBJS_LINKED := $(patsubst %.bpf.c,$(obj)/%.bpf.o, $(BPF_SRCS_LINKED))
+BPF_SKELS_LINKED := $(addprefix $(obj)/,$(LINKED_SKELS))
+
+$(BPF_SKELS_LINKED): $(BPF_OBJS_LINKED) $(BPFTOOL)
+ @echo " BPF GEN-OBJ " $(@:.skel.h=)
+ $(Q)$(BPFTOOL) gen object $(@:.skel.h=.lbpf.o) $(addprefix $(obj)/,$($(@F)-deps))
+ @echo " BPF GEN-SKEL" $(@:.skel.h=)
+ $(Q)$(BPFTOOL) gen skeleton $(@:.skel.h=.lbpf.o) name $(notdir $(@:.skel.h=)) > $@
+
# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
# But, there is no easy way to fix it, so just exclude it since it is
# useless for BPF samples.
diff --git a/samples/bpf/Makefile.target b/samples/bpf/Makefile.target
index 7621f55e2947..5a368affa038 100644
--- a/samples/bpf/Makefile.target
+++ b/samples/bpf/Makefile.target
@@ -73,3 +73,14 @@ quiet_cmd_tprog-cobjs = CC $@
cmd_tprog-cobjs = $(CC) $(tprogc_flags) -c -o $@ $<
$(tprog-cobjs): $(obj)/%.o: $(src)/%.c FORCE
$(call if_changed_dep,tprog-cobjs)
+
+# Override includes for xdp_sample_user.o because $(srctree)/usr/include in
+# TPROGS_CFLAGS causes conflicts
+XDP_SAMPLE_CFLAGS += -Wall -O2 -lm \
+ -I./tools/include \
+ -I./tools/include/uapi \
+ -I./tools/lib \
+ -I./tools/testing/selftests/bpf
+$(obj)/xdp_sample_user.o: $(src)/xdp_sample_user.c \
+ $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h
+ $(CC) $(XDP_SAMPLE_CFLAGS) -c -o $@ $<
diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c
index cc3bce8d3aac..54958802c032 100644
--- a/samples/bpf/cookie_uid_helper_example.c
+++ b/samples/bpf/cookie_uid_helper_example.c
@@ -167,7 +167,7 @@ static void prog_load(void)
static void prog_attach_iptables(char *file)
{
int ret;
- char rules[100];
+ char rules[256];
if (bpf_obj_pin(prog_fd, file))
error(1, errno, "bpf_obj_pin");
@@ -175,8 +175,13 @@ static void prog_attach_iptables(char *file)
printf("file path too long: %s\n", file);
exit(1);
}
- sprintf(rules, "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT",
- file);
+ ret = snprintf(rules, sizeof(rules),
+ "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT",
+ file);
+ if (ret < 0 || ret >= sizeof(rules)) {
+ printf("error constructing iptables command\n");
+ exit(1);
+ }
ret = system(rules);
if (ret < 0) {
printf("iptables rule update failed: %d/n", WEXITSTATUS(ret));
diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c
index 14b792915a9c..4866afd054da 100644
--- a/samples/bpf/offwaketime_kern.c
+++ b/samples/bpf/offwaketime_kern.c
@@ -20,6 +20,7 @@
})
#define MINBLOCK_US 1
+#define MAX_ENTRIES 10000
struct key_t {
char waker[TASK_COMM_LEN];
@@ -32,14 +33,14 @@ struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, struct key_t);
__type(value, u64);
- __uint(max_entries, 10000);
+ __uint(max_entries, MAX_ENTRIES);
} counts SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u32);
__type(value, u64);
- __uint(max_entries, 10000);
+ __uint(max_entries, MAX_ENTRIES);
} start SEC(".maps");
struct wokeby_t {
@@ -51,14 +52,14 @@ struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u32);
__type(value, struct wokeby_t);
- __uint(max_entries, 10000);
+ __uint(max_entries, MAX_ENTRIES);
} wokeby SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__uint(key_size, sizeof(u32));
__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
- __uint(max_entries, 10000);
+ __uint(max_entries, MAX_ENTRIES);
} stackmap SEC(".maps");
#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh
index e68b9ee6814b..35db26f736b9 100755
--- a/samples/bpf/test_override_return.sh
+++ b/samples/bpf/test_override_return.sh
@@ -1,5 +1,6 @@
#!/bin/bash
+rm -r tmpmnt
rm -f testfile.img
dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
DEVICE=$(losetup --show -f testfile.img)
diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c
index cea399424bca..566e6440e8c2 100644
--- a/samples/bpf/tracex4_user.c
+++ b/samples/bpf/tracex4_user.c
@@ -32,7 +32,7 @@ static void print_old_objects(int fd)
__u64 key, next_key;
struct pair v;
- key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */
+ key = write(1, "\e[1;1H\e[2J", 11); /* clear screen */
key = -1;
while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c
index fdcd6580dd73..8be7ce18d3ba 100644
--- a/samples/bpf/tracex7_user.c
+++ b/samples/bpf/tracex7_user.c
@@ -14,6 +14,11 @@ int main(int argc, char **argv)
int ret = 0;
FILE *f;
+ if (!argv[1]) {
+ fprintf(stderr, "ERROR: Run with the btrfs device argument!\n");
+ return 0;
+ }
+
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c
index 34b64394ed9c..f0c5d95084de 100644
--- a/samples/bpf/xdp1_kern.c
+++ b/samples/bpf/xdp1_kern.c
@@ -57,6 +57,7 @@ int xdp_prog1(struct xdp_md *ctx)
h_proto = eth->h_proto;
+ /* Handle VLAN tagged packet */
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;
@@ -66,6 +67,7 @@ int xdp_prog1(struct xdp_md *ctx)
return rc;
h_proto = vhdr->h_vlan_encapsulated_proto;
}
+ /* Handle double VLAN tagged packet */
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;
diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c
index c787f4b49646..d8a64ab077b0 100644
--- a/samples/bpf/xdp2_kern.c
+++ b/samples/bpf/xdp2_kern.c
@@ -73,6 +73,7 @@ int xdp_prog1(struct xdp_md *ctx)
h_proto = eth->h_proto;
+ /* Handle VLAN tagged packet */
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;
@@ -82,6 +83,7 @@ int xdp_prog1(struct xdp_md *ctx)
return rc;
h_proto = vhdr->h_vlan_encapsulated_proto;
}
+ /* Handle double VLAN tagged packet */
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;
diff --git a/samples/bpf/xdp_monitor.bpf.c b/samples/bpf/xdp_monitor.bpf.c
new file mode 100644
index 000000000000..cfb41e2205f4
--- /dev/null
+++ b/samples/bpf/xdp_monitor.bpf.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc.
+ *
+ * XDP monitor tool, based on tracepoints
+ */
+#include "xdp_sample.bpf.h"
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
deleted file mode 100644
index 5c955b812c47..000000000000
--- a/samples/bpf/xdp_monitor_kern.c
+++ /dev/null
@@ -1,257 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc.
- *
- * XDP monitor tool, based on tracepoints
- */
-#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, u64);
- __uint(max_entries, 2);
- /* TODO: have entries for all possible errno's */
-} redirect_err_cnt SEC(".maps");
-
-#define XDP_UNKNOWN XDP_REDIRECT + 1
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, u64);
- __uint(max_entries, XDP_UNKNOWN + 1);
-} exception_cnt SEC(".maps");
-
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct xdp_redirect_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int prog_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12 size:4; signed:0;
- int ifindex; // offset:16 size:4; signed:1;
- int err; // offset:20 size:4; signed:1;
- int to_ifindex; // offset:24 size:4; signed:1;
- u32 map_id; // offset:28 size:4; signed:0;
- int map_index; // offset:32 size:4; signed:1;
-}; // offset:36
-
-enum {
- XDP_REDIRECT_SUCCESS = 0,
- XDP_REDIRECT_ERROR = 1
-};
-
-static __always_inline
-int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
-{
- u32 key = XDP_REDIRECT_ERROR;
- int err = ctx->err;
- u64 *cnt;
-
- if (!err)
- key = XDP_REDIRECT_SUCCESS;
-
- cnt = bpf_map_lookup_elem(&redirect_err_cnt, &key);
- if (!cnt)
- return 1;
- *cnt += 1;
-
- return 0; /* Indicate event was filtered (no further processing)*/
- /*
- * Returning 1 here would allow e.g. a perf-record tracepoint
- * to see and record these events, but it doesn't work well
- * in-practice as stopping perf-record also unload this
- * bpf_prog. Plus, there is additional overhead of doing so.
- */
-}
-
-SEC("tracepoint/xdp/xdp_redirect_err")
-int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
-{
- return xdp_redirect_collect_stat(ctx);
-}
-
-
-SEC("tracepoint/xdp/xdp_redirect_map_err")
-int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
-{
- return xdp_redirect_collect_stat(ctx);
-}
-
-/* Likely unloaded when prog starts */
-SEC("tracepoint/xdp/xdp_redirect")
-int trace_xdp_redirect(struct xdp_redirect_ctx *ctx)
-{
- return xdp_redirect_collect_stat(ctx);
-}
-
-/* Likely unloaded when prog starts */
-SEC("tracepoint/xdp/xdp_redirect_map")
-int trace_xdp_redirect_map(struct xdp_redirect_ctx *ctx)
-{
- return xdp_redirect_collect_stat(ctx);
-}
-
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct xdp_exception_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int prog_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int ifindex; // offset:16; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_exception")
-int trace_xdp_exception(struct xdp_exception_ctx *ctx)
-{
- u64 *cnt;
- u32 key;
-
- key = ctx->act;
- if (key > XDP_REDIRECT)
- key = XDP_UNKNOWN;
-
- cnt = bpf_map_lookup_elem(&exception_cnt, &key);
- if (!cnt)
- return 1;
- *cnt += 1;
-
- return 0;
-}
-
-/* Common stats data record shared with _user.c */
-struct datarec {
- u64 processed;
- u64 dropped;
- u64 info;
- u64 err;
-};
-#define MAX_CPUS 64
-
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, MAX_CPUS);
-} cpumap_enqueue_cnt SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, 1);
-} cpumap_kthread_cnt SEC(".maps");
-
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct cpumap_enqueue_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int map_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int cpu; // offset:16; size:4; signed:1;
- unsigned int drops; // offset:20; size:4; signed:0;
- unsigned int processed; // offset:24; size:4; signed:0;
- int to_cpu; // offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_enqueue")
-int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
-{
- u32 to_cpu = ctx->to_cpu;
- struct datarec *rec;
-
- if (to_cpu >= MAX_CPUS)
- return 1;
-
- rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
- if (!rec)
- return 0;
- rec->processed += ctx->processed;
- rec->dropped += ctx->drops;
-
- /* Record bulk events, then userspace can calc average bulk size */
- if (ctx->processed > 0)
- rec->info += 1;
-
- return 0;
-}
-
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct cpumap_kthread_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int map_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int cpu; // offset:16; size:4; signed:1;
- unsigned int drops; // offset:20; size:4; signed:0;
- unsigned int processed; // offset:24; size:4; signed:0;
- int sched; // offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_kthread")
-int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
-{
- struct datarec *rec;
- u32 key = 0;
-
- rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
- if (!rec)
- return 0;
- rec->processed += ctx->processed;
- rec->dropped += ctx->drops;
-
- /* Count times kthread yielded CPU via schedule call */
- if (ctx->sched)
- rec->info++;
-
- return 0;
-}
-
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, 1);
-} devmap_xmit_cnt SEC(".maps");
-
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct devmap_xmit_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int from_ifindex; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int to_ifindex; // offset:16; size:4; signed:1;
- int drops; // offset:20; size:4; signed:1;
- int sent; // offset:24; size:4; signed:1;
- int err; // offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_devmap_xmit")
-int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx)
-{
- struct datarec *rec;
- u32 key = 0;
-
- rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &key);
- if (!rec)
- return 0;
- rec->processed += ctx->sent;
- rec->dropped += ctx->drops;
-
- /* Record bulk events, then userspace can calc average bulk size */
- rec->info += 1;
-
- /* Record error cases, where no frame were sent */
- if (ctx->err)
- rec->err++;
-
- /* Catch API error of drv ndo_xdp_xmit sent more than count */
- if (ctx->drops < 0)
- rec->err++;
-
- return 1;
-}
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index 49ebc49aefc3..fb9391a5ec62 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -1,15 +1,12 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
- */
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */
static const char *__doc__=
- "XDP monitor tool, based on tracepoints\n"
-;
+"XDP monitor tool, based on tracepoints\n";
static const char *__doc_err_only__=
- " NOTICE: Only tracking XDP redirect errors\n"
- " Enable TX success stats via '--stats'\n"
- " (which comes with a per packet processing overhead)\n"
-;
+" NOTICE: Only tracking XDP redirect errors\n"
+" Enable redirect success stats via '-s/--stats'\n"
+" (which comes with a per packet processing overhead)\n";
#include <errno.h>
#include <stdio.h>
@@ -20,768 +17,103 @@ static const char *__doc_err_only__=
#include <ctype.h>
#include <unistd.h>
#include <locale.h>
-
#include <sys/resource.h>
#include <getopt.h>
#include <net/if.h>
#include <time.h>
-
#include <signal.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_monitor.skel.h"
-enum map_type {
- REDIRECT_ERR_CNT,
- EXCEPTION_CNT,
- CPUMAP_ENQUEUE_CNT,
- CPUMAP_KTHREAD_CNT,
- DEVMAP_XMIT_CNT,
-};
+static int mask = SAMPLE_REDIRECT_ERR_CNT | SAMPLE_CPUMAP_ENQUEUE_CNT |
+ SAMPLE_CPUMAP_KTHREAD_CNT | SAMPLE_EXCEPTION_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
-static const char *const map_type_strings[] = {
- [REDIRECT_ERR_CNT] = "redirect_err_cnt",
- [EXCEPTION_CNT] = "exception_cnt",
- [CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt",
- [CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt",
- [DEVMAP_XMIT_CNT] = "devmap_xmit_cnt",
-};
-
-#define NUM_MAP 5
-#define NUM_TP 8
-
-static int tp_cnt;
-static int map_cnt;
-static int verbose = 1;
-static bool debug = false;
-struct bpf_map *map_data[NUM_MAP] = {};
-struct bpf_link *tp_links[NUM_TP] = {};
-struct bpf_object *obj;
+DEFINE_SAMPLE_INIT(xdp_monitor);
static const struct option long_options[] = {
- {"help", no_argument, NULL, 'h' },
- {"debug", no_argument, NULL, 'D' },
- {"stats", no_argument, NULL, 'S' },
- {"sec", required_argument, NULL, 's' },
- {0, 0, NULL, 0 }
-};
-
-static void int_exit(int sig)
-{
- /* Detach tracepoints */
- while (tp_cnt)
- bpf_link__destroy(tp_links[--tp_cnt]);
-
- bpf_object__close(obj);
- exit(0);
-}
-
-/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
-#define EXIT_FAIL_MEM 5
-
-static void usage(char *argv[])
-{
- int i;
- printf("\nDOCUMENTATION:\n%s\n", __doc__);
- printf("\n");
- printf(" Usage: %s (options-see-below)\n",
- argv[0]);
- printf(" Listing options:\n");
- for (i = 0; long_options[i].name != 0; i++) {
- printf(" --%-15s", long_options[i].name);
- if (long_options[i].flag != NULL)
- printf(" flag (internal value:%d)",
- *long_options[i].flag);
- else
- printf("short-option: -%c",
- long_options[i].val);
- printf("\n");
- }
- printf("\n");
-}
-
-#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
-static __u64 gettime(void)
-{
- struct timespec t;
- int res;
-
- res = clock_gettime(CLOCK_MONOTONIC, &t);
- if (res < 0) {
- fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
- exit(EXIT_FAILURE);
- }
- return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
-}
-
-enum {
- REDIR_SUCCESS = 0,
- REDIR_ERROR = 1,
-};
-#define REDIR_RES_MAX 2
-static const char *redir_names[REDIR_RES_MAX] = {
- [REDIR_SUCCESS] = "Success",
- [REDIR_ERROR] = "Error",
-};
-static const char *err2str(int err)
-{
- if (err < REDIR_RES_MAX)
- return redir_names[err];
- return NULL;
-}
-/* enum xdp_action */
-#define XDP_UNKNOWN XDP_REDIRECT + 1
-#define XDP_ACTION_MAX (XDP_UNKNOWN + 1)
-static const char *xdp_action_names[XDP_ACTION_MAX] = {
- [XDP_ABORTED] = "XDP_ABORTED",
- [XDP_DROP] = "XDP_DROP",
- [XDP_PASS] = "XDP_PASS",
- [XDP_TX] = "XDP_TX",
- [XDP_REDIRECT] = "XDP_REDIRECT",
- [XDP_UNKNOWN] = "XDP_UNKNOWN",
-};
-static const char *action2str(int action)
-{
- if (action < XDP_ACTION_MAX)
- return xdp_action_names[action];
- return NULL;
-}
-
-/* Common stats data record shared with _kern.c */
-struct datarec {
- __u64 processed;
- __u64 dropped;
- __u64 info;
- __u64 err;
-};
-#define MAX_CPUS 64
-
-/* Userspace structs for collection of stats from maps */
-struct record {
- __u64 timestamp;
- struct datarec total;
- struct datarec *cpu;
+ { "help", no_argument, NULL, 'h' },
+ { "stats", no_argument, NULL, 's' },
+ { "interval", required_argument, NULL, 'i' },
+ { "verbose", no_argument, NULL, 'v' },
+ {}
};
-struct u64rec {
- __u64 processed;
-};
-struct record_u64 {
- /* record for _kern side __u64 values */
- __u64 timestamp;
- struct u64rec total;
- struct u64rec *cpu;
-};
-
-struct stats_record {
- struct record_u64 xdp_redirect[REDIR_RES_MAX];
- struct record_u64 xdp_exception[XDP_ACTION_MAX];
- struct record xdp_cpumap_kthread;
- struct record xdp_cpumap_enqueue[MAX_CPUS];
- struct record xdp_devmap_xmit;
-};
-
-static bool map_collect_record(int fd, __u32 key, struct record *rec)
-{
- /* For percpu maps, userspace gets a value per possible CPU */
- unsigned int nr_cpus = bpf_num_possible_cpus();
- struct datarec values[nr_cpus];
- __u64 sum_processed = 0;
- __u64 sum_dropped = 0;
- __u64 sum_info = 0;
- __u64 sum_err = 0;
- int i;
-
- if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
- fprintf(stderr,
- "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
- return false;
- }
- /* Get time as close as possible to reading map contents */
- rec->timestamp = gettime();
-
- /* Record and sum values from each CPU */
- for (i = 0; i < nr_cpus; i++) {
- rec->cpu[i].processed = values[i].processed;
- sum_processed += values[i].processed;
- rec->cpu[i].dropped = values[i].dropped;
- sum_dropped += values[i].dropped;
- rec->cpu[i].info = values[i].info;
- sum_info += values[i].info;
- rec->cpu[i].err = values[i].err;
- sum_err += values[i].err;
- }
- rec->total.processed = sum_processed;
- rec->total.dropped = sum_dropped;
- rec->total.info = sum_info;
- rec->total.err = sum_err;
- return true;
-}
-
-static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec)
-{
- /* For percpu maps, userspace gets a value per possible CPU */
- unsigned int nr_cpus = bpf_num_possible_cpus();
- struct u64rec values[nr_cpus];
- __u64 sum_total = 0;
- int i;
-
- if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
- fprintf(stderr,
- "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
- return false;
- }
- /* Get time as close as possible to reading map contents */
- rec->timestamp = gettime();
-
- /* Record and sum values from each CPU */
- for (i = 0; i < nr_cpus; i++) {
- rec->cpu[i].processed = values[i].processed;
- sum_total += values[i].processed;
- }
- rec->total.processed = sum_total;
- return true;
-}
-
-static double calc_period(struct record *r, struct record *p)
-{
- double period_ = 0;
- __u64 period = 0;
-
- period = r->timestamp - p->timestamp;
- if (period > 0)
- period_ = ((double) period / NANOSEC_PER_SEC);
-
- return period_;
-}
-
-static double calc_period_u64(struct record_u64 *r, struct record_u64 *p)
-{
- double period_ = 0;
- __u64 period = 0;
-
- period = r->timestamp - p->timestamp;
- if (period > 0)
- period_ = ((double) period / NANOSEC_PER_SEC);
-
- return period_;
-}
-
-static double calc_pps(struct datarec *r, struct datarec *p, double period)
-{
- __u64 packets = 0;
- double pps = 0;
-
- if (period > 0) {
- packets = r->processed - p->processed;
- pps = packets / period;
- }
- return pps;
-}
-
-static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period)
-{
- __u64 packets = 0;
- double pps = 0;
-
- if (period > 0) {
- packets = r->processed - p->processed;
- pps = packets / period;
- }
- return pps;
-}
-
-static double calc_drop(struct datarec *r, struct datarec *p, double period)
-{
- __u64 packets = 0;
- double pps = 0;
-
- if (period > 0) {
- packets = r->dropped - p->dropped;
- pps = packets / period;
- }
- return pps;
-}
-
-static double calc_info(struct datarec *r, struct datarec *p, double period)
-{
- __u64 packets = 0;
- double pps = 0;
-
- if (period > 0) {
- packets = r->info - p->info;
- pps = packets / period;
- }
- return pps;
-}
-
-static double calc_err(struct datarec *r, struct datarec *p, double period)
-{
- __u64 packets = 0;
- double pps = 0;
-
- if (period > 0) {
- packets = r->err - p->err;
- pps = packets / period;
- }
- return pps;
-}
-
-static void stats_print(struct stats_record *stats_rec,
- struct stats_record *stats_prev,
- bool err_only)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- int rec_i = 0, i, to_cpu;
- double t = 0, pps = 0;
-
- /* Header */
- printf("%-15s %-7s %-12s %-12s %-9s\n",
- "XDP-event", "CPU:to", "pps", "drop-pps", "extra-info");
-
- /* tracepoint: xdp:xdp_redirect_* */
- if (err_only)
- rec_i = REDIR_ERROR;
-
- for (; rec_i < REDIR_RES_MAX; rec_i++) {
- struct record_u64 *rec, *prev;
- char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
- char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
-
- rec = &stats_rec->xdp_redirect[rec_i];
- prev = &stats_prev->xdp_redirect[rec_i];
- t = calc_period_u64(rec, prev);
-
- for (i = 0; i < nr_cpus; i++) {
- struct u64rec *r = &rec->cpu[i];
- struct u64rec *p = &prev->cpu[i];
-
- pps = calc_pps_u64(r, p, t);
- if (pps > 0)
- printf(fmt1, "XDP_REDIRECT", i,
- rec_i ? 0.0: pps, rec_i ? pps : 0.0,
- err2str(rec_i));
- }
- pps = calc_pps_u64(&rec->total, &prev->total, t);
- printf(fmt2, "XDP_REDIRECT", "total",
- rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i));
- }
-
- /* tracepoint: xdp:xdp_exception */
- for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
- struct record_u64 *rec, *prev;
- char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
- char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
-
- rec = &stats_rec->xdp_exception[rec_i];
- prev = &stats_prev->xdp_exception[rec_i];
- t = calc_period_u64(rec, prev);
-
- for (i = 0; i < nr_cpus; i++) {
- struct u64rec *r = &rec->cpu[i];
- struct u64rec *p = &prev->cpu[i];
-
- pps = calc_pps_u64(r, p, t);
- if (pps > 0)
- printf(fmt1, "Exception", i,
- 0.0, pps, action2str(rec_i));
- }
- pps = calc_pps_u64(&rec->total, &prev->total, t);
- if (pps > 0)
- printf(fmt2, "Exception", "total",
- 0.0, pps, action2str(rec_i));
- }
-
- /* cpumap enqueue stats */
- for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
- char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
- char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
- struct record *rec, *prev;
- char *info_str = "";
- double drop, info;
-
- rec = &stats_rec->xdp_cpumap_enqueue[to_cpu];
- prev = &stats_prev->xdp_cpumap_enqueue[to_cpu];
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop(r, p, t);
- info = calc_info(r, p, t);
- if (info > 0) {
- info_str = "bulk-average";
- info = pps / info; /* calc average bulk size */
- }
- if (pps > 0)
- printf(fmt1, "cpumap-enqueue",
- i, to_cpu, pps, drop, info, info_str);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- if (pps > 0) {
- drop = calc_drop(&rec->total, &prev->total, t);
- info = calc_info(&rec->total, &prev->total, t);
- if (info > 0) {
- info_str = "bulk-average";
- info = pps / info; /* calc average bulk size */
- }
- printf(fmt2, "cpumap-enqueue",
- "sum", to_cpu, pps, drop, info, info_str);
- }
- }
-
- /* cpumap kthread stats */
- {
- char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n";
- char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n";
- struct record *rec, *prev;
- double drop, info;
- char *i_str = "";
-
- rec = &stats_rec->xdp_cpumap_kthread;
- prev = &stats_prev->xdp_cpumap_kthread;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop(r, p, t);
- info = calc_info(r, p, t);
- if (info > 0)
- i_str = "sched";
- if (pps > 0 || drop > 0)
- printf(fmt1, "cpumap-kthread",
- i, pps, drop, info, i_str);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop(&rec->total, &prev->total, t);
- info = calc_info(&rec->total, &prev->total, t);
- if (info > 0)
- i_str = "sched-sum";
- printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
- }
-
- /* devmap ndo_xdp_xmit stats */
- {
- char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s %s\n";
- char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s %s\n";
- struct record *rec, *prev;
- double drop, info, err;
- char *i_str = "";
- char *err_str = "";
-
- rec = &stats_rec->xdp_devmap_xmit;
- prev = &stats_prev->xdp_devmap_xmit;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop(r, p, t);
- info = calc_info(r, p, t);
- err = calc_err(r, p, t);
- if (info > 0) {
- i_str = "bulk-average";
- info = (pps+drop) / info; /* calc avg bulk */
- }
- if (err > 0)
- err_str = "drv-err";
- if (pps > 0 || drop > 0)
- printf(fmt1, "devmap-xmit",
- i, pps, drop, info, i_str, err_str);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop(&rec->total, &prev->total, t);
- info = calc_info(&rec->total, &prev->total, t);
- err = calc_err(&rec->total, &prev->total, t);
- if (info > 0) {
- i_str = "bulk-average";
- info = (pps+drop) / info; /* calc avg bulk */
- }
- if (err > 0)
- err_str = "drv-err";
- printf(fmt2, "devmap-xmit", "total", pps, drop,
- info, i_str, err_str);
- }
-
- printf("\n");
-}
-
-static bool stats_collect(struct stats_record *rec)
-{
- int fd;
- int i;
-
- /* TODO: Detect if someone unloaded the perf event_fd's, as
- * this can happen by someone running perf-record -e
- */
-
- fd = bpf_map__fd(map_data[REDIRECT_ERR_CNT]);
- for (i = 0; i < REDIR_RES_MAX; i++)
- map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
-
- fd = bpf_map__fd(map_data[EXCEPTION_CNT]);
- for (i = 0; i < XDP_ACTION_MAX; i++) {
- map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
- }
-
- fd = bpf_map__fd(map_data[CPUMAP_ENQUEUE_CNT]);
- for (i = 0; i < MAX_CPUS; i++)
- map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
-
- fd = bpf_map__fd(map_data[CPUMAP_KTHREAD_CNT]);
- map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
-
- fd = bpf_map__fd(map_data[DEVMAP_XMIT_CNT]);
- map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
-
- return true;
-}
-
-static void *alloc_rec_per_cpu(int record_size)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- void *array;
-
- array = calloc(nr_cpus, record_size);
- if (!array) {
- fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
- exit(EXIT_FAIL_MEM);
- }
- return array;
-}
-
-static struct stats_record *alloc_stats_record(void)
-{
- struct stats_record *rec;
- int rec_sz;
- int i;
-
- /* Alloc main stats_record structure */
- rec = calloc(1, sizeof(*rec));
- if (!rec) {
- fprintf(stderr, "Mem alloc error\n");
- exit(EXIT_FAIL_MEM);
- }
-
- /* Alloc stats stored per CPU for each record */
- rec_sz = sizeof(struct u64rec);
- for (i = 0; i < REDIR_RES_MAX; i++)
- rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz);
-
- for (i = 0; i < XDP_ACTION_MAX; i++)
- rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz);
-
- rec_sz = sizeof(struct datarec);
- rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
- rec->xdp_devmap_xmit.cpu = alloc_rec_per_cpu(rec_sz);
-
- for (i = 0; i < MAX_CPUS; i++)
- rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
-
- return rec;
-}
-
-static void free_stats_record(struct stats_record *r)
-{
- int i;
-
- for (i = 0; i < REDIR_RES_MAX; i++)
- free(r->xdp_redirect[i].cpu);
-
- for (i = 0; i < XDP_ACTION_MAX; i++)
- free(r->xdp_exception[i].cpu);
-
- free(r->xdp_cpumap_kthread.cpu);
- free(r->xdp_devmap_xmit.cpu);
-
- for (i = 0; i < MAX_CPUS; i++)
- free(r->xdp_cpumap_enqueue[i].cpu);
-
- free(r);
-}
-
-/* Pointer swap trick */
-static inline void swap(struct stats_record **a, struct stats_record **b)
-{
- struct stats_record *tmp;
-
- tmp = *a;
- *a = *b;
- *b = tmp;
-}
-
-static void stats_poll(int interval, bool err_only)
-{
- struct stats_record *rec, *prev;
-
- rec = alloc_stats_record();
- prev = alloc_stats_record();
- stats_collect(rec);
-
- if (err_only)
- printf("\n%s\n", __doc_err_only__);
-
- /* Trick to pretty printf with thousands separators use %' */
- setlocale(LC_NUMERIC, "en_US");
-
- /* Header */
- if (verbose)
- printf("\n%s", __doc__);
-
- /* TODO Need more advanced stats on error types */
- if (verbose) {
- printf(" - Stats map0: %s\n", bpf_map__name(map_data[0]));
- printf(" - Stats map1: %s\n", bpf_map__name(map_data[1]));
- printf("\n");
- }
- fflush(stdout);
-
- while (1) {
- swap(&prev, &rec);
- stats_collect(rec);
- stats_print(rec, prev, err_only);
- fflush(stdout);
- sleep(interval);
- }
-
- free_stats_record(rec);
- free_stats_record(prev);
-}
-
-static void print_bpf_prog_info(void)
-{
- struct bpf_program *prog;
- struct bpf_map *map;
- int i = 0;
-
- /* Prog info */
- printf("Loaded BPF prog have %d bpf program(s)\n", tp_cnt);
- bpf_object__for_each_program(prog, obj) {
- printf(" - prog_fd[%d] = fd(%d)\n", i, bpf_program__fd(prog));
- i++;
- }
-
- i = 0;
- /* Maps info */
- printf("Loaded BPF prog have %d map(s)\n", map_cnt);
- bpf_object__for_each_map(map, obj) {
- const char *name = bpf_map__name(map);
- int fd = bpf_map__fd(map);
-
- printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name);
- i++;
- }
-
- /* Event info */
- printf("Searching for (max:%d) event file descriptor(s)\n", tp_cnt);
- for (i = 0; i < tp_cnt; i++) {
- int fd = bpf_link__fd(tp_links[i]);
-
- if (fd != -1)
- printf(" - event_fd[%d] = fd(%d)\n", i, fd);
- }
-}
int main(int argc, char **argv)
{
- struct bpf_program *prog;
- int longindex = 0, opt;
- int ret = EXIT_FAILURE;
- enum map_type type;
- char filename[256];
-
- /* Default settings: */
+ unsigned long interval = 2;
+ int ret = EXIT_FAIL_OPTION;
+ struct xdp_monitor *skel;
bool errors_only = true;
- int interval = 2;
+ int longindex = 0, opt;
+ bool error = true;
/* Parse commands line args */
- while ((opt = getopt_long(argc, argv, "hDSs:",
+ while ((opt = getopt_long(argc, argv, "si:vh",
long_options, &longindex)) != -1) {
switch (opt) {
- case 'D':
- debug = true;
- break;
- case 'S':
+ case 's':
errors_only = false;
+ mask |= SAMPLE_REDIRECT_CNT;
break;
- case 's':
- interval = atoi(optarg);
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ sample_switch_mode();
break;
case 'h':
+ error = false;
default:
- usage(argv);
+ sample_usage(argv, long_options, __doc__, mask, error);
return ret;
}
}
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- /* Remove tracepoint program when program is interrupted or killed */
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
-
- obj = bpf_object__open_file(filename, NULL);
- if (libbpf_get_error(obj)) {
- printf("ERROR: opening BPF object file failed\n");
- obj = NULL;
- goto cleanup;
- }
-
- /* load BPF program */
- if (bpf_object__load(obj)) {
- printf("ERROR: loading BPF object file failed\n");
- goto cleanup;
+ skel = xdp_monitor__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_monitor__open: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
}
- for (type = 0; type < NUM_MAP; type++) {
- map_data[type] =
- bpf_object__find_map_by_name(obj, map_type_strings[type]);
-
- if (libbpf_get_error(map_data[type])) {
- printf("ERROR: finding a map in obj file failed\n");
- goto cleanup;
- }
- map_cnt++;
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- bpf_object__for_each_program(prog, obj) {
- tp_links[tp_cnt] = bpf_program__attach(prog);
- if (libbpf_get_error(tp_links[tp_cnt])) {
- printf("ERROR: bpf_program__attach failed\n");
- tp_links[tp_cnt] = NULL;
- goto cleanup;
- }
- tp_cnt++;
+ ret = xdp_monitor__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_monitor__load: %s\n", strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- if (debug) {
- print_bpf_prog_info();
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- /* Unload/stop tracepoint event by closing bpf_link's */
- if (errors_only) {
- /* The bpf_link[i] depend on the order of
- * the functions was defined in _kern.c
- */
- bpf_link__destroy(tp_links[2]); /* tracepoint/xdp/xdp_redirect */
- tp_links[2] = NULL;
+ if (errors_only)
+ printf("%s", __doc_err_only__);
- bpf_link__destroy(tp_links[3]); /* tracepoint/xdp/xdp_redirect_map */
- tp_links[3] = NULL;
+ ret = sample_run(interval, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
-
- stats_poll(interval, errors_only);
-
- ret = EXIT_SUCCESS;
-
-cleanup:
- /* Detach tracepoints */
- while (tp_cnt)
- bpf_link__destroy(tp_links[--tp_cnt]);
-
- bpf_object__close(obj);
- return ret;
+ ret = EXIT_OK;
+end_destroy:
+ xdp_monitor__destroy(skel);
+end:
+ sample_exit(ret);
}
diff --git a/samples/bpf/xdp_redirect.bpf.c b/samples/bpf/xdp_redirect.bpf.c
new file mode 100644
index 000000000000..7c02bacfe96b
--- /dev/null
+++ b/samples/bpf/xdp_redirect.bpf.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
+
+const volatile int ifindex_out;
+
+SEC("xdp")
+int xdp_redirect_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
+ struct ethhdr *eth = data;
+ struct datarec *rec;
+ u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ swap_src_dst_mac(data);
+ return bpf_redirect(ifindex_out, 0);
+}
+
+/* Redirect require an XDP bpf_prog loaded on the TX device */
+SEC("xdp")
+int xdp_redirect_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu.bpf.c
index 8255025dea97..f10fe3cf25f6 100644
--- a/samples/bpf/xdp_redirect_cpu_kern.c
+++ b/samples/bpf/xdp_redirect_cpu.bpf.c
@@ -2,74 +2,18 @@
*
* GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
*/
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/if_packet.h>
-#include <uapi/linux/if_vlan.h>
-#include <uapi/linux/ip.h>
-#include <uapi/linux/ipv6.h>
-#include <uapi/linux/in.h>
-#include <uapi/linux/tcp.h>
-#include <uapi/linux/udp.h>
-
-#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
#include "hash_func01.h"
-#define MAX_CPUS NR_CPUS
-
/* Special map type that can XDP_REDIRECT frames to another CPU */
struct {
__uint(type, BPF_MAP_TYPE_CPUMAP);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(struct bpf_cpumap_val));
- __uint(max_entries, MAX_CPUS);
} cpu_map SEC(".maps");
-/* Common stats data record to keep userspace more simple */
-struct datarec {
- __u64 processed;
- __u64 dropped;
- __u64 issue;
- __u64 xdp_pass;
- __u64 xdp_drop;
- __u64 xdp_redirect;
-};
-
-/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
- * feedback. Redirect TX errors can be caught via a tracepoint.
- */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, 1);
-} rx_cnt SEC(".maps");
-
-/* Used by trace point */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, 2);
- /* TODO: have entries for all possible errno's */
-} redirect_err_cnt SEC(".maps");
-
-/* Used by trace point */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, MAX_CPUS);
-} cpumap_enqueue_cnt SEC(".maps");
-
-/* Used by trace point */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, 1);
-} cpumap_kthread_cnt SEC(".maps");
-
/* Set of maps controlling available CPU, and for iterating through
* selectable redirect CPUs.
*/
@@ -77,14 +21,15 @@ struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, u32);
__type(value, u32);
- __uint(max_entries, MAX_CPUS);
} cpus_available SEC(".maps");
+
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, u32);
__type(value, u32);
__uint(max_entries, 1);
} cpus_count SEC(".maps");
+
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__type(key, u32);
@@ -92,24 +37,16 @@ struct {
__uint(max_entries, 1);
} cpus_iterator SEC(".maps");
-/* Used by trace point */
struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
__uint(max_entries, 1);
-} exception_cnt SEC(".maps");
+} tx_port SEC(".maps");
-/* Helper parse functions */
+char tx_mac_addr[ETH_ALEN];
-/* Parse Ethernet layer 2, extract network layer 3 offset and protocol
- *
- * Returns false on error and non-supported ether-type
- */
-struct vlan_hdr {
- __be16 h_vlan_TCI;
- __be16 h_vlan_encapsulated_proto;
-};
+/* Helper parse functions */
static __always_inline
bool parse_eth(struct ethhdr *eth, void *data_end,
@@ -125,11 +62,12 @@ bool parse_eth(struct ethhdr *eth, void *data_end,
eth_type = eth->h_proto;
/* Skip non 802.3 Ethertypes */
- if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN))
+ if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0))
return false;
/* Handle VLAN tagged packet */
- if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
+ if (eth_type == bpf_htons(ETH_P_8021Q) ||
+ eth_type == bpf_htons(ETH_P_8021AD)) {
struct vlan_hdr *vlan_hdr;
vlan_hdr = (void *)eth + offset;
@@ -139,7 +77,8 @@ bool parse_eth(struct ethhdr *eth, void *data_end,
eth_type = vlan_hdr->h_vlan_encapsulated_proto;
}
/* Handle double VLAN tagged packet */
- if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
+ if (eth_type == bpf_htons(ETH_P_8021Q) ||
+ eth_type == bpf_htons(ETH_P_8021AD)) {
struct vlan_hdr *vlan_hdr;
vlan_hdr = (void *)eth + offset;
@@ -149,7 +88,7 @@ bool parse_eth(struct ethhdr *eth, void *data_end,
eth_type = vlan_hdr->h_vlan_encapsulated_proto;
}
- *eth_proto = ntohs(eth_type);
+ *eth_proto = bpf_ntohs(eth_type);
*l3_offset = offset;
return true;
}
@@ -172,7 +111,7 @@ u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
if (udph + 1 > data_end)
return 0;
- dport = ntohs(udph->dest);
+ dport = bpf_ntohs(udph->dest);
return dport;
}
@@ -200,50 +139,48 @@ int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
return ip6h->nexthdr;
}
-SEC("xdp_cpu_map0")
+SEC("xdp")
int xdp_prognum0_no_touch(struct xdp_md *ctx)
{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct datarec *rec;
u32 *cpu_selected;
- u32 cpu_dest;
- u32 key = 0;
+ u32 cpu_dest = 0;
+ u32 key0 = 0;
/* Only use first entry in cpus_available */
- cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
if (!cpu_selected)
return XDP_ABORTED;
cpu_dest = *cpu_selected;
- /* Count RX packet in map */
rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-SEC("xdp_cpu_map1_touch_data")
+SEC("xdp")
int xdp_prognum1_touch_data(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct ethhdr *eth = data;
struct datarec *rec;
u32 *cpu_selected;
- u32 cpu_dest;
+ u32 cpu_dest = 0;
+ u32 key0 = 0;
u16 eth_type;
- u32 key = 0;
/* Only use first entry in cpus_available */
- cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
if (!cpu_selected)
return XDP_ABORTED;
cpu_dest = *cpu_selected;
@@ -252,36 +189,33 @@ int xdp_prognum1_touch_data(struct xdp_md *ctx)
if (eth + 1 > data_end)
return XDP_ABORTED;
- /* Count RX packet in map */
rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
/* Read packet data, and use it (drop non 802.3 Ethertypes) */
eth_type = eth->h_proto;
- if (ntohs(eth_type) < ETH_P_802_3_MIN) {
- rec->dropped++;
+ if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) {
+ NO_TEAR_INC(rec->dropped);
return XDP_DROP;
}
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-SEC("xdp_cpu_map2_round_robin")
+SEC("xdp")
int xdp_prognum2_round_robin(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
- struct ethhdr *eth = data;
+ u32 key = bpf_get_smp_processor_id();
struct datarec *rec;
- u32 cpu_dest;
- u32 *cpu_lookup;
+ u32 cpu_dest = 0;
u32 key0 = 0;
u32 *cpu_selected;
@@ -307,40 +241,37 @@ int xdp_prognum2_round_robin(struct xdp_md *ctx)
return XDP_ABORTED;
cpu_dest = *cpu_selected;
- /* Count RX packet in map */
- rec = bpf_map_lookup_elem(&rx_cnt, &key0);
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-SEC("xdp_cpu_map3_proto_separate")
+SEC("xdp")
int xdp_prognum3_proto_separate(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct ethhdr *eth = data;
u8 ip_proto = IPPROTO_UDP;
struct datarec *rec;
u16 eth_proto = 0;
u64 l3_offset = 0;
u32 cpu_dest = 0;
- u32 cpu_idx = 0;
u32 *cpu_lookup;
- u32 key = 0;
+ u32 cpu_idx = 0;
- /* Count RX packet in map */
rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
return XDP_PASS; /* Just skip */
@@ -381,35 +312,33 @@ int xdp_prognum3_proto_separate(struct xdp_md *ctx)
return XDP_ABORTED;
cpu_dest = *cpu_lookup;
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-SEC("xdp_cpu_map4_ddos_filter_pktgen")
+SEC("xdp")
int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct ethhdr *eth = data;
u8 ip_proto = IPPROTO_UDP;
struct datarec *rec;
u16 eth_proto = 0;
u64 l3_offset = 0;
u32 cpu_dest = 0;
+ u32 *cpu_lookup;
u32 cpu_idx = 0;
u16 dest_port;
- u32 *cpu_lookup;
- u32 key = 0;
- /* Count RX packet in map */
rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
return XDP_PASS; /* Just skip */
@@ -443,8 +372,7 @@ int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
/* DDoS filter UDP port 9 (pktgen) */
dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
if (dest_port == 9) {
- if (rec)
- rec->dropped++;
+ NO_TEAR_INC(rec->dropped);
return XDP_DROP;
}
break;
@@ -457,11 +385,10 @@ int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
return XDP_ABORTED;
cpu_dest = *cpu_lookup;
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
@@ -496,10 +423,10 @@ u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
if (ip6h + 1 > data_end)
return 0;
- cpu_hash = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
- cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
- cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
- cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
+ cpu_hash = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0];
+ cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1];
+ cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2];
+ cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3];
cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
return cpu_hash;
@@ -509,30 +436,29 @@ u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
* hashing scheme is symmetric, meaning swapping IP src/dest still hit
* same CPU.
*/
-SEC("xdp_cpu_map5_lb_hash_ip_pairs")
+SEC("xdp")
int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct ethhdr *eth = data;
- u8 ip_proto = IPPROTO_UDP;
struct datarec *rec;
u16 eth_proto = 0;
u64 l3_offset = 0;
u32 cpu_dest = 0;
u32 cpu_idx = 0;
u32 *cpu_lookup;
+ u32 key0 = 0;
u32 *cpu_max;
u32 cpu_hash;
- u32 key = 0;
- /* Count RX packet in map */
rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
- cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
+ cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
if (!cpu_max)
return XDP_ABORTED;
@@ -560,171 +486,56 @@ int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
return XDP_ABORTED;
cpu_dest = *cpu_lookup;
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-char _license[] SEC("license") = "GPL";
-
-/*** Trace point code ***/
-
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct xdp_redirect_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int prog_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12 size:4; signed:0;
- int ifindex; // offset:16 size:4; signed:1;
- int err; // offset:20 size:4; signed:1;
- int to_ifindex; // offset:24 size:4; signed:1;
- u32 map_id; // offset:28 size:4; signed:0;
- int map_index; // offset:32 size:4; signed:1;
-}; // offset:36
-
-enum {
- XDP_REDIRECT_SUCCESS = 0,
- XDP_REDIRECT_ERROR = 1
-};
-
-static __always_inline
-int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
+SEC("xdp_cpumap/redirect")
+int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
{
- u32 key = XDP_REDIRECT_ERROR;
- struct datarec *rec;
- int err = ctx->err;
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ u64 nh_off;
- if (!err)
- key = XDP_REDIRECT_SUCCESS;
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
- rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
- if (!rec)
- return 0;
- rec->dropped += 1;
-
- return 0; /* Indicate event was filtered (no further processing)*/
- /*
- * Returning 1 here would allow e.g. a perf-record tracepoint
- * to see and record these events, but it doesn't work well
- * in-practice as stopping perf-record also unload this
- * bpf_prog. Plus, there is additional overhead of doing so.
- */
+ swap_src_dst_mac(data);
+ return bpf_redirect_map(&tx_port, 0, 0);
}
-SEC("tracepoint/xdp/xdp_redirect_err")
-int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
+SEC("xdp_cpumap/pass")
+int xdp_redirect_cpu_pass(struct xdp_md *ctx)
{
- return xdp_redirect_collect_stat(ctx);
+ return XDP_PASS;
}
-SEC("tracepoint/xdp/xdp_redirect_map_err")
-int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
+SEC("xdp_cpumap/drop")
+int xdp_redirect_cpu_drop(struct xdp_md *ctx)
{
- return xdp_redirect_collect_stat(ctx);
+ return XDP_DROP;
}
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct xdp_exception_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int prog_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int ifindex; // offset:16; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_exception")
-int trace_xdp_exception(struct xdp_exception_ctx *ctx)
+SEC("xdp_devmap/egress")
+int xdp_redirect_egress_prog(struct xdp_md *ctx)
{
- struct datarec *rec;
- u32 key = 0;
-
- rec = bpf_map_lookup_elem(&exception_cnt, &key);
- if (!rec)
- return 1;
- rec->dropped += 1;
-
- return 0;
-}
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ u64 nh_off;
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct cpumap_enqueue_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int map_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int cpu; // offset:16; size:4; signed:1;
- unsigned int drops; // offset:20; size:4; signed:0;
- unsigned int processed; // offset:24; size:4; signed:0;
- int to_cpu; // offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_enqueue")
-int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
-{
- u32 to_cpu = ctx->to_cpu;
- struct datarec *rec;
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
- if (to_cpu >= MAX_CPUS)
- return 1;
+ __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
- rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
- if (!rec)
- return 0;
- rec->processed += ctx->processed;
- rec->dropped += ctx->drops;
-
- /* Record bulk events, then userspace can calc average bulk size */
- if (ctx->processed > 0)
- rec->issue += 1;
-
- /* Inception: It's possible to detect overload situations, via
- * this tracepoint. This can be used for creating a feedback
- * loop to XDP, which can take appropriate actions to mitigate
- * this overload situation.
- */
- return 0;
+ return XDP_PASS;
}
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct cpumap_kthread_ctx {
- u64 __pad; // First 8 bytes are not accessible
- int map_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int cpu; // offset:16; size:4; signed:1;
- unsigned int drops; // offset:20; size:4; signed:0;
- unsigned int processed; // offset:24; size:4; signed:0;
- int sched; // offset:28; size:4; signed:1;
- unsigned int xdp_pass; // offset:32; size:4; signed:0;
- unsigned int xdp_drop; // offset:36; size:4; signed:0;
- unsigned int xdp_redirect; // offset:40; size:4; signed:0;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_kthread")
-int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
-{
- struct datarec *rec;
- u32 key = 0;
-
- rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
- if (!rec)
- return 0;
- rec->processed += ctx->processed;
- rec->dropped += ctx->drops;
- rec->xdp_pass += ctx->xdp_pass;
- rec->xdp_drop += ctx->xdp_drop;
- rec->xdp_redirect += ctx->xdp_redirect;
-
- /* Count times kthread yielded CPU via schedule call */
- if (ctx->sched)
- rec->issue++;
-
- return 0;
-}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 576411612523..6e25fba64c72 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -2,7 +2,16 @@
/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
*/
static const char *__doc__ =
- " XDP redirect with a CPU-map type \"BPF_MAP_TYPE_CPUMAP\"";
+"XDP CPU redirect tool, using BPF_MAP_TYPE_CPUMAP\n"
+"Usage: xdp_redirect_cpu -d <IFINDEX|IFNAME> -c 0 ... -c N\n"
+"Valid specification for CPUMAP BPF program:\n"
+" --mprog-name/-e pass (use built-in XDP_PASS program)\n"
+" --mprog-name/-e drop (use built-in XDP_DROP program)\n"
+" --redirect-device/-r <ifindex|ifname> (use built-in DEVMAP redirect program)\n"
+" Custom CPUMAP BPF program:\n"
+" --mprog-filename/-f <filename> --mprog-name/-e <program>\n"
+" Optionally, also pass --redirect-map/-m and --redirect-device/-r together\n"
+" to configure DEVMAP in BPF object <filename>\n";
#include <errno.h>
#include <signal.h>
@@ -18,558 +27,62 @@ static const char *__doc__ =
#include <net/if.h>
#include <time.h>
#include <linux/limits.h>
-
#include <arpa/inet.h>
#include <linux/if_link.h>
-
-/* How many xdp_progs are defined in _kern.c */
-#define MAX_PROG 6
-
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-
#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_redirect_cpu.skel.h"
-static int ifindex = -1;
-static char ifname_buf[IF_NAMESIZE];
-static char *ifname;
-static __u32 prog_id;
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int n_cpus;
-
-enum map_type {
- CPU_MAP,
- RX_CNT,
- REDIRECT_ERR_CNT,
- CPUMAP_ENQUEUE_CNT,
- CPUMAP_KTHREAD_CNT,
- CPUS_AVAILABLE,
- CPUS_COUNT,
- CPUS_ITERATOR,
- EXCEPTION_CNT,
-};
-
-static const char *const map_type_strings[] = {
- [CPU_MAP] = "cpu_map",
- [RX_CNT] = "rx_cnt",
- [REDIRECT_ERR_CNT] = "redirect_err_cnt",
- [CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt",
- [CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt",
- [CPUS_AVAILABLE] = "cpus_available",
- [CPUS_COUNT] = "cpus_count",
- [CPUS_ITERATOR] = "cpus_iterator",
- [EXCEPTION_CNT] = "exception_cnt",
-};
+static int map_fd;
+static int avail_fd;
+static int count_fd;
-#define NUM_TP 5
-#define NUM_MAP 9
-struct bpf_link *tp_links[NUM_TP] = {};
-static int map_fds[NUM_MAP];
-static int tp_cnt = 0;
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+ SAMPLE_CPUMAP_ENQUEUE_CNT | SAMPLE_CPUMAP_KTHREAD_CNT |
+ SAMPLE_EXCEPTION_CNT;
-/* Exit return codes */
-#define EXIT_OK 0
-#define EXIT_FAIL 1
-#define EXIT_FAIL_OPTION 2
-#define EXIT_FAIL_XDP 3
-#define EXIT_FAIL_BPF 4
-#define EXIT_FAIL_MEM 5
+DEFINE_SAMPLE_INIT(xdp_redirect_cpu);
static const struct option long_options[] = {
- {"help", no_argument, NULL, 'h' },
- {"dev", required_argument, NULL, 'd' },
- {"skb-mode", no_argument, NULL, 'S' },
- {"sec", required_argument, NULL, 's' },
- {"progname", required_argument, NULL, 'p' },
- {"qsize", required_argument, NULL, 'q' },
- {"cpu", required_argument, NULL, 'c' },
- {"stress-mode", no_argument, NULL, 'x' },
- {"no-separators", no_argument, NULL, 'z' },
- {"force", no_argument, NULL, 'F' },
- {"mprog-disable", no_argument, NULL, 'n' },
- {"mprog-name", required_argument, NULL, 'e' },
- {"mprog-filename", required_argument, NULL, 'f' },
- {"redirect-device", required_argument, NULL, 'r' },
- {"redirect-map", required_argument, NULL, 'm' },
- {0, 0, NULL, 0 }
+ { "help", no_argument, NULL, 'h' },
+ { "dev", required_argument, NULL, 'd' },
+ { "skb-mode", no_argument, NULL, 'S' },
+ { "progname", required_argument, NULL, 'p' },
+ { "qsize", required_argument, NULL, 'q' },
+ { "cpu", required_argument, NULL, 'c' },
+ { "stress-mode", no_argument, NULL, 'x' },
+ { "force", no_argument, NULL, 'F' },
+ { "interval", required_argument, NULL, 'i' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "stats", no_argument, NULL, 's' },
+ { "mprog-name", required_argument, NULL, 'e' },
+ { "mprog-filename", required_argument, NULL, 'f' },
+ { "redirect-device", required_argument, NULL, 'r' },
+ { "redirect-map", required_argument, NULL, 'm' },
+ {}
};
-static void int_exit(int sig)
-{
- __u32 curr_prog_id = 0;
-
- if (ifindex > -1) {
- if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(EXIT_FAIL);
- }
- if (prog_id == curr_prog_id) {
- fprintf(stderr,
- "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
- ifindex, ifname);
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
- } else if (!curr_prog_id) {
- printf("couldn't find a prog id on a given iface\n");
- } else {
- printf("program on interface changed, not removing\n");
- }
- }
- /* Detach tracepoints */
- while (tp_cnt)
- bpf_link__destroy(tp_links[--tp_cnt]);
-
- exit(EXIT_OK);
-}
-
static void print_avail_progs(struct bpf_object *obj)
{
struct bpf_program *pos;
+ printf(" Programs to be used for -p/--progname:\n");
bpf_object__for_each_program(pos, obj) {
- if (bpf_program__is_xdp(pos))
- printf(" %s\n", bpf_program__section_name(pos));
- }
-}
-
-static void usage(char *argv[], struct bpf_object *obj)
-{
- int i;
-
- printf("\nDOCUMENTATION:\n%s\n", __doc__);
- printf("\n");
- printf(" Usage: %s (options-see-below)\n", argv[0]);
- printf(" Listing options:\n");
- for (i = 0; long_options[i].name != 0; i++) {
- printf(" --%-12s", long_options[i].name);
- if (long_options[i].flag != NULL)
- printf(" flag (internal value:%d)",
- *long_options[i].flag);
- else
- printf(" short-option: -%c",
- long_options[i].val);
- printf("\n");
- }
- printf("\n Programs to be used for --progname:\n");
- print_avail_progs(obj);
- printf("\n");
-}
-
-/* gettime returns the current time of day in nanoseconds.
- * Cost: clock_gettime (ns) => 26ns (CLOCK_MONOTONIC)
- * clock_gettime (ns) => 9ns (CLOCK_MONOTONIC_COARSE)
- */
-#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
-static __u64 gettime(void)
-{
- struct timespec t;
- int res;
-
- res = clock_gettime(CLOCK_MONOTONIC, &t);
- if (res < 0) {
- fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
- exit(EXIT_FAIL);
- }
- return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
-}
-
-/* Common stats data record shared with _kern.c */
-struct datarec {
- __u64 processed;
- __u64 dropped;
- __u64 issue;
- __u64 xdp_pass;
- __u64 xdp_drop;
- __u64 xdp_redirect;
-};
-struct record {
- __u64 timestamp;
- struct datarec total;
- struct datarec *cpu;
-};
-struct stats_record {
- struct record rx_cnt;
- struct record redir_err;
- struct record kthread;
- struct record exception;
- struct record enq[];
-};
-
-static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
-{
- /* For percpu maps, userspace gets a value per possible CPU */
- unsigned int nr_cpus = bpf_num_possible_cpus();
- struct datarec values[nr_cpus];
- __u64 sum_xdp_redirect = 0;
- __u64 sum_xdp_pass = 0;
- __u64 sum_xdp_drop = 0;
- __u64 sum_processed = 0;
- __u64 sum_dropped = 0;
- __u64 sum_issue = 0;
- int i;
-
- if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
- fprintf(stderr,
- "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
- return false;
- }
- /* Get time as close as possible to reading map contents */
- rec->timestamp = gettime();
-
- /* Record and sum values from each CPU */
- for (i = 0; i < nr_cpus; i++) {
- rec->cpu[i].processed = values[i].processed;
- sum_processed += values[i].processed;
- rec->cpu[i].dropped = values[i].dropped;
- sum_dropped += values[i].dropped;
- rec->cpu[i].issue = values[i].issue;
- sum_issue += values[i].issue;
- rec->cpu[i].xdp_pass = values[i].xdp_pass;
- sum_xdp_pass += values[i].xdp_pass;
- rec->cpu[i].xdp_drop = values[i].xdp_drop;
- sum_xdp_drop += values[i].xdp_drop;
- rec->cpu[i].xdp_redirect = values[i].xdp_redirect;
- sum_xdp_redirect += values[i].xdp_redirect;
- }
- rec->total.processed = sum_processed;
- rec->total.dropped = sum_dropped;
- rec->total.issue = sum_issue;
- rec->total.xdp_pass = sum_xdp_pass;
- rec->total.xdp_drop = sum_xdp_drop;
- rec->total.xdp_redirect = sum_xdp_redirect;
- return true;
-}
-
-static struct datarec *alloc_record_per_cpu(void)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- struct datarec *array;
-
- array = calloc(nr_cpus, sizeof(struct datarec));
- if (!array) {
- fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
- exit(EXIT_FAIL_MEM);
- }
- return array;
-}
-
-static struct stats_record *alloc_stats_record(void)
-{
- struct stats_record *rec;
- int i, size;
-
- size = sizeof(*rec) + n_cpus * sizeof(struct record);
- rec = malloc(size);
- if (!rec) {
- fprintf(stderr, "Mem alloc error\n");
- exit(EXIT_FAIL_MEM);
- }
- memset(rec, 0, size);
- rec->rx_cnt.cpu = alloc_record_per_cpu();
- rec->redir_err.cpu = alloc_record_per_cpu();
- rec->kthread.cpu = alloc_record_per_cpu();
- rec->exception.cpu = alloc_record_per_cpu();
- for (i = 0; i < n_cpus; i++)
- rec->enq[i].cpu = alloc_record_per_cpu();
-
- return rec;
-}
-
-static void free_stats_record(struct stats_record *r)
-{
- int i;
-
- for (i = 0; i < n_cpus; i++)
- free(r->enq[i].cpu);
- free(r->exception.cpu);
- free(r->kthread.cpu);
- free(r->redir_err.cpu);
- free(r->rx_cnt.cpu);
- free(r);
-}
-
-static double calc_period(struct record *r, struct record *p)
-{
- double period_ = 0;
- __u64 period = 0;
-
- period = r->timestamp - p->timestamp;
- if (period > 0)
- period_ = ((double) period / NANOSEC_PER_SEC);
-
- return period_;
-}
-
-static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
-{
- __u64 packets = 0;
- __u64 pps = 0;
-
- if (period_ > 0) {
- packets = r->processed - p->processed;
- pps = packets / period_;
- }
- return pps;
-}
-
-static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_)
-{
- __u64 packets = 0;
- __u64 pps = 0;
-
- if (period_ > 0) {
- packets = r->dropped - p->dropped;
- pps = packets / period_;
- }
- return pps;
-}
-
-static __u64 calc_errs_pps(struct datarec *r,
- struct datarec *p, double period_)
-{
- __u64 packets = 0;
- __u64 pps = 0;
-
- if (period_ > 0) {
- packets = r->issue - p->issue;
- pps = packets / period_;
- }
- return pps;
-}
-
-static void calc_xdp_pps(struct datarec *r, struct datarec *p,
- double *xdp_pass, double *xdp_drop,
- double *xdp_redirect, double period_)
-{
- *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0;
- if (period_ > 0) {
- *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_;
- *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_;
- *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_;
- }
-}
-
-static void stats_print(struct stats_record *stats_rec,
- struct stats_record *stats_prev,
- char *prog_name, char *mprog_name, int mprog_fd)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- double pps = 0, drop = 0, err = 0;
- bool mprog_enabled = false;
- struct record *rec, *prev;
- int to_cpu;
- double t;
- int i;
-
- if (mprog_fd > 0)
- mprog_enabled = true;
-
- /* Header */
- printf("Running XDP/eBPF prog_name:%s\n", prog_name);
- printf("%-15s %-7s %-14s %-11s %-9s\n",
- "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info");
-
- /* XDP rx_cnt */
- {
- char *fmt_rx = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
- char *fm2_rx = "%-15s %-7s %'-14.0f %'-11.0f\n";
- char *errstr = "";
-
- rec = &stats_rec->rx_cnt;
- prev = &stats_prev->rx_cnt;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop_pps(r, p, t);
- err = calc_errs_pps(r, p, t);
- if (err > 0)
- errstr = "cpu-dest/err";
- if (pps > 0)
- printf(fmt_rx, "XDP-RX",
- i, pps, drop, err, errstr);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop_pps(&rec->total, &prev->total, t);
- err = calc_errs_pps(&rec->total, &prev->total, t);
- printf(fm2_rx, "XDP-RX", "total", pps, drop);
- }
-
- /* cpumap enqueue stats */
- for (to_cpu = 0; to_cpu < n_cpus; to_cpu++) {
- char *fmt = "%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
- char *fm2 = "%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
- char *errstr = "";
-
- rec = &stats_rec->enq[to_cpu];
- prev = &stats_prev->enq[to_cpu];
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop_pps(r, p, t);
- err = calc_errs_pps(r, p, t);
- if (err > 0) {
- errstr = "bulk-average";
- err = pps / err; /* calc average bulk size */
- }
- if (pps > 0)
- printf(fmt, "cpumap-enqueue",
- i, to_cpu, pps, drop, err, errstr);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- if (pps > 0) {
- drop = calc_drop_pps(&rec->total, &prev->total, t);
- err = calc_errs_pps(&rec->total, &prev->total, t);
- if (err > 0) {
- errstr = "bulk-average";
- err = pps / err; /* calc average bulk size */
- }
- printf(fm2, "cpumap-enqueue",
- "sum", to_cpu, pps, drop, err, errstr);
- }
- }
-
- /* cpumap kthread stats */
- {
- char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
- char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f %s\n";
- char *e_str = "";
-
- rec = &stats_rec->kthread;
- prev = &stats_prev->kthread;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop_pps(r, p, t);
- err = calc_errs_pps(r, p, t);
- if (err > 0)
- e_str = "sched";
- if (pps > 0)
- printf(fmt_k, "cpumap_kthread",
- i, pps, drop, err, e_str);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop_pps(&rec->total, &prev->total, t);
- err = calc_errs_pps(&rec->total, &prev->total, t);
- if (err > 0)
- e_str = "sched-sum";
- printf(fm2_k, "cpumap_kthread", "total", pps, drop, err, e_str);
- }
-
- /* XDP redirect err tracepoints (very unlikely) */
- {
- char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
- char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
-
- rec = &stats_rec->redir_err;
- prev = &stats_prev->redir_err;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop_pps(r, p, t);
- if (pps > 0)
- printf(fmt_err, "redirect_err", i, pps, drop);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop_pps(&rec->total, &prev->total, t);
- printf(fm2_err, "redirect_err", "total", pps, drop);
- }
-
- /* XDP general exception tracepoints */
- {
- char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
- char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
-
- rec = &stats_rec->exception;
- prev = &stats_prev->exception;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop_pps(r, p, t);
- if (pps > 0)
- printf(fmt_err, "xdp_exception", i, pps, drop);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop_pps(&rec->total, &prev->total, t);
- printf(fm2_err, "xdp_exception", "total", pps, drop);
- }
-
- /* CPUMAP attached XDP program that runs on remote/destination CPU */
- if (mprog_enabled) {
- char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f\n";
- char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f\n";
- double xdp_pass, xdp_drop, xdp_redirect;
-
- printf("\n2nd remote XDP/eBPF prog_name: %s\n", mprog_name);
- printf("%-15s %-7s %-14s %-11s %-9s\n",
- "XDP-cpumap", "CPU:to", "xdp-pass", "xdp-drop", "xdp-redir");
-
- rec = &stats_rec->kthread;
- prev = &stats_prev->kthread;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- calc_xdp_pps(r, p, &xdp_pass, &xdp_drop,
- &xdp_redirect, t);
- if (xdp_pass > 0 || xdp_drop > 0 || xdp_redirect > 0)
- printf(fmt_k, "xdp-in-kthread", i, xdp_pass, xdp_drop,
- xdp_redirect);
+ if (bpf_program__is_xdp(pos)) {
+ if (!strncmp(bpf_program__name(pos), "xdp_prognum",
+ sizeof("xdp_prognum") - 1))
+ printf(" %s\n", bpf_program__name(pos));
}
- calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop,
- &xdp_redirect, t);
- printf(fm2_k, "xdp-in-kthread", "total", xdp_pass, xdp_drop, xdp_redirect);
}
-
- printf("\n");
- fflush(stdout);
}
-static void stats_collect(struct stats_record *rec)
+static void usage(char *argv[], const struct option *long_options,
+ const char *doc, int mask, bool error, struct bpf_object *obj)
{
- int fd, i;
-
- fd = map_fds[RX_CNT];
- map_collect_percpu(fd, 0, &rec->rx_cnt);
-
- fd = map_fds[REDIRECT_ERR_CNT];
- map_collect_percpu(fd, 1, &rec->redir_err);
-
- fd = map_fds[CPUMAP_ENQUEUE_CNT];
- for (i = 0; i < n_cpus; i++)
- map_collect_percpu(fd, i, &rec->enq[i]);
-
- fd = map_fds[CPUMAP_KTHREAD_CNT];
- map_collect_percpu(fd, 0, &rec->kthread);
-
- fd = map_fds[EXCEPTION_CNT];
- map_collect_percpu(fd, 0, &rec->exception);
-}
-
-
-/* Pointer swap trick */
-static inline void swap(struct stats_record **a, struct stats_record **b)
-{
- struct stats_record *tmp;
-
- tmp = *a;
- *a = *b;
- *b = tmp;
+ sample_usage(argv, long_options, doc, mask, error);
+ print_avail_progs(obj);
}
static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
@@ -582,39 +95,41 @@ static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
/* Add a CPU entry to cpumap, as this allocate a cpu entry in
* the kernel for the cpu.
*/
- ret = bpf_map_update_elem(map_fds[CPU_MAP], &cpu, value, 0);
- if (ret) {
- fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
- exit(EXIT_FAIL_BPF);
+ ret = bpf_map_update_elem(map_fd, &cpu, value, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Create CPU entry failed: %s\n", strerror(errno));
+ return ret;
}
/* Inform bpf_prog's that a new CPU is available to select
* from via some control maps.
*/
- ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &avail_idx, &cpu, 0);
- if (ret) {
- fprintf(stderr, "Add to avail CPUs failed\n");
- exit(EXIT_FAIL_BPF);
+ ret = bpf_map_update_elem(avail_fd, &avail_idx, &cpu, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Add to avail CPUs failed: %s\n", strerror(errno));
+ return ret;
}
/* When not replacing/updating existing entry, bump the count */
- ret = bpf_map_lookup_elem(map_fds[CPUS_COUNT], &key, &curr_cpus_count);
- if (ret) {
- fprintf(stderr, "Failed reading curr cpus_count\n");
- exit(EXIT_FAIL_BPF);
+ ret = bpf_map_lookup_elem(count_fd, &key, &curr_cpus_count);
+ if (ret < 0) {
+ fprintf(stderr, "Failed reading curr cpus_count: %s\n",
+ strerror(errno));
+ return ret;
}
if (new) {
curr_cpus_count++;
- ret = bpf_map_update_elem(map_fds[CPUS_COUNT], &key,
+ ret = bpf_map_update_elem(count_fd, &key,
&curr_cpus_count, 0);
- if (ret) {
- fprintf(stderr, "Failed write curr cpus_count\n");
- exit(EXIT_FAIL_BPF);
+ if (ret < 0) {
+ fprintf(stderr, "Failed write curr cpus_count: %s\n",
+ strerror(errno));
+ return ret;
}
}
- /* map_fd[7] = cpus_iterator */
- printf("%s CPU:%u as idx:%u qsize:%d prog_fd: %d (cpus_count:%u)\n",
- new ? "Add-new":"Replace", cpu, avail_idx,
+
+ printf("%s CPU: %u as idx: %u qsize: %d cpumap_prog_fd: %d (cpus_count: %u)\n",
+ new ? "Add new" : "Replace", cpu, avail_idx,
value->qsize, value->bpf_prog.fd, curr_cpus_count);
return 0;
@@ -623,24 +138,29 @@ static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
/* CPUs are zero-indexed. Thus, add a special sentinel default value
* in map cpus_available to mark CPU index'es not configured
*/
-static void mark_cpus_unavailable(void)
+static int mark_cpus_unavailable(void)
{
+ int ret, i, n_cpus = libbpf_num_possible_cpus();
__u32 invalid_cpu = n_cpus;
- int ret, i;
for (i = 0; i < n_cpus; i++) {
- ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &i,
+ ret = bpf_map_update_elem(avail_fd, &i,
&invalid_cpu, 0);
- if (ret) {
- fprintf(stderr, "Failed marking CPU unavailable\n");
- exit(EXIT_FAIL_BPF);
+ if (ret < 0) {
+ fprintf(stderr, "Failed marking CPU unavailable: %s\n",
+ strerror(errno));
+ return ret;
}
}
+
+ return 0;
}
/* Stress cpumap management code by concurrently changing underlying cpumap */
-static void stress_cpumap(struct bpf_cpumap_val *value)
+static void stress_cpumap(void *ctx)
{
+ struct bpf_cpumap_val *value = ctx;
+
/* Changing qsize will cause kernel to free and alloc a new
* bpf_cpu_map_entry, with an associated/complicated tear-down
* procedure.
@@ -653,220 +173,263 @@ static void stress_cpumap(struct bpf_cpumap_val *value)
create_cpu_entry(1, value, 0, false);
}
-static void stats_poll(int interval, bool use_separators, char *prog_name,
- char *mprog_name, struct bpf_cpumap_val *value,
- bool stress_mode)
+static int set_cpumap_prog(struct xdp_redirect_cpu *skel,
+ const char *redir_interface, const char *redir_map,
+ const char *mprog_filename, const char *mprog_name)
{
- struct stats_record *record, *prev;
- int mprog_fd;
-
- record = alloc_stats_record();
- prev = alloc_stats_record();
- stats_collect(record);
-
- /* Trick to pretty printf with thousands separators use %' */
- if (use_separators)
- setlocale(LC_NUMERIC, "en_US");
-
- while (1) {
- swap(&prev, &record);
- mprog_fd = value->bpf_prog.fd;
- stats_collect(record);
- stats_print(record, prev, prog_name, mprog_name, mprog_fd);
- sleep(interval);
- if (stress_mode)
- stress_cpumap(value);
- }
-
- free_stats_record(record);
- free_stats_record(prev);
-}
-
-static int init_tracepoints(struct bpf_object *obj)
-{
- struct bpf_program *prog;
-
- bpf_object__for_each_program(prog, obj) {
- if (bpf_program__is_tracepoint(prog) != true)
- continue;
-
- tp_links[tp_cnt] = bpf_program__attach(prog);
- if (libbpf_get_error(tp_links[tp_cnt])) {
- tp_links[tp_cnt] = NULL;
- return -EINVAL;
+ if (mprog_filename) {
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int ret;
+
+ if (!mprog_name) {
+ fprintf(stderr, "BPF program not specified for file %s\n",
+ mprog_filename);
+ goto end;
+ }
+ if ((redir_interface && !redir_map) || (!redir_interface && redir_map)) {
+ fprintf(stderr, "--redirect-%s specified but --redirect-%s not specified\n",
+ redir_interface ? "device" : "map", redir_interface ? "map" : "device");
+ goto end;
}
- tp_cnt++;
- }
-
- return 0;
-}
-
-static int init_map_fds(struct bpf_object *obj)
-{
- enum map_type type;
-
- for (type = 0; type < NUM_MAP; type++) {
- map_fds[type] =
- bpf_object__find_map_fd_by_name(obj,
- map_type_strings[type]);
-
- if (map_fds[type] < 0)
- return -ENOENT;
- }
-
- return 0;
-}
-static int load_cpumap_prog(char *file_name, char *prog_name,
- char *redir_interface, char *redir_map)
-{
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- .expected_attach_type = BPF_XDP_CPUMAP,
- .file = file_name,
- };
- struct bpf_program *prog;
- struct bpf_object *obj;
- int fd;
+ /* Custom BPF program */
+ obj = bpf_object__open_file(mprog_filename, NULL);
+ if (!obj) {
+ ret = -errno;
+ fprintf(stderr, "Failed to bpf_prog_load_xattr: %s\n",
+ strerror(errno));
+ return ret;
+ }
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &fd))
- return -1;
+ ret = bpf_object__load(obj);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr, "Failed to bpf_object__load: %s\n",
+ strerror(errno));
+ return ret;
+ }
- if (fd < 0) {
- fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
- strerror(errno));
- return fd;
- }
+ if (redir_map) {
+ int err, redir_map_fd, ifindex_out, key = 0;
- if (redir_interface && redir_map) {
- int err, map_fd, ifindex_out, key = 0;
+ redir_map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
+ if (redir_map_fd < 0) {
+ fprintf(stderr, "Failed to bpf_object__find_map_fd_by_name: %s\n",
+ strerror(errno));
+ return redir_map_fd;
+ }
- map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
- if (map_fd < 0)
- return map_fd;
+ ifindex_out = if_nametoindex(redir_interface);
+ if (!ifindex_out)
+ ifindex_out = strtoul(redir_interface, NULL, 0);
+ if (!ifindex_out) {
+ fprintf(stderr, "Bad interface name or index\n");
+ return -EINVAL;
+ }
- ifindex_out = if_nametoindex(redir_interface);
- if (!ifindex_out)
- return -1;
+ err = bpf_map_update_elem(redir_map_fd, &key, &ifindex_out, 0);
+ if (err < 0)
+ return err;
+ }
- err = bpf_map_update_elem(map_fd, &key, &ifindex_out, 0);
- if (err < 0)
- return err;
- }
+ prog = bpf_object__find_program_by_name(obj, mprog_name);
+ if (!prog) {
+ ret = -errno;
+ fprintf(stderr, "Failed to bpf_object__find_program_by_name: %s\n",
+ strerror(errno));
+ return ret;
+ }
- prog = bpf_object__find_program_by_title(obj, prog_name);
- if (!prog) {
- fprintf(stderr, "bpf_object__find_program_by_title failed\n");
- return EXIT_FAIL;
+ return bpf_program__fd(prog);
+ } else {
+ if (mprog_name) {
+ if (redir_interface || redir_map) {
+ fprintf(stderr, "Need to specify --mprog-filename/-f\n");
+ goto end;
+ }
+ if (!strcmp(mprog_name, "pass") || !strcmp(mprog_name, "drop")) {
+ /* Use built-in pass/drop programs */
+ return *mprog_name == 'p' ? bpf_program__fd(skel->progs.xdp_redirect_cpu_pass)
+ : bpf_program__fd(skel->progs.xdp_redirect_cpu_drop);
+ } else {
+ fprintf(stderr, "Unknown name \"%s\" for built-in BPF program\n",
+ mprog_name);
+ goto end;
+ }
+ } else {
+ if (redir_map) {
+ fprintf(stderr, "Need to specify --mprog-filename, --mprog-name and"
+ " --redirect-device with --redirect-map\n");
+ goto end;
+ }
+ if (redir_interface) {
+ /* Use built-in devmap redirect */
+ struct bpf_devmap_val val = {};
+ int ifindex_out, err;
+ __u32 key = 0;
+
+ if (!redir_interface)
+ return 0;
+
+ ifindex_out = if_nametoindex(redir_interface);
+ if (!ifindex_out)
+ ifindex_out = strtoul(redir_interface, NULL, 0);
+ if (!ifindex_out) {
+ fprintf(stderr, "Bad interface name or index\n");
+ return -EINVAL;
+ }
+
+ if (get_mac_addr(ifindex_out, skel->bss->tx_mac_addr) < 0) {
+ printf("Get interface %d mac failed\n", ifindex_out);
+ return -EINVAL;
+ }
+
+ val.ifindex = ifindex_out;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_egress_prog);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.tx_port), &key, &val, 0);
+ if (err < 0)
+ return -errno;
+
+ return bpf_program__fd(skel->progs.xdp_redirect_cpu_devmap);
+ }
+ }
}
- return bpf_program__fd(prog);
+ /* Disabled */
+ return 0;
+end:
+ fprintf(stderr, "Invalid options for CPUMAP BPF program\n");
+ return -EINVAL;
}
int main(int argc, char **argv)
{
- char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
- char *mprog_filename = "xdp_redirect_kern.o";
- char *redir_interface = NULL, *redir_map = NULL;
- char *mprog_name = "xdp_redirect_dummy";
- bool mprog_disable = false;
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_UNSPEC,
- };
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
+ const char *redir_interface = NULL, *redir_map = NULL;
+ const char *mprog_filename = NULL, *mprog_name = NULL;
+ struct xdp_redirect_cpu *skel;
+ struct bpf_map_info info = {};
+ char ifname_buf[IF_NAMESIZE];
struct bpf_cpumap_val value;
- bool use_separators = true;
+ __u32 infosz = sizeof(info);
+ int ret = EXIT_FAIL_OPTION;
+ unsigned long interval = 2;
bool stress_mode = false;
struct bpf_program *prog;
- struct bpf_object *obj;
- int err = EXIT_FAIL;
- char filename[256];
+ const char *prog_name;
+ bool generic = false;
+ bool force = false;
int added_cpus = 0;
+ bool error = true;
int longindex = 0;
- int interval = 2;
int add_cpu = -1;
- int opt, prog_fd;
- int *cpu, i;
+ int ifindex = -1;
+ int *cpu, i, opt;
+ char *ifname;
__u32 qsize;
-
- n_cpus = get_nprocs_conf();
-
- /* Notice: choosing he queue size is very important with the
- * ixgbe driver, because it's driver page recycling trick is
- * dependend on pages being returned quickly. The number of
- * out-standing packets in the system must be less-than 2x
- * RX-ring size.
+ int n_cpus;
+
+ n_cpus = libbpf_num_possible_cpus();
+
+ /* Notice: Choosing the queue size is very important when CPU is
+ * configured with power-saving states.
+ *
+ * If deepest state take 133 usec to wakeup from (133/10^6). When link
+ * speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can
+ * arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) =
+ * 166250 bytes. With MTU size packets this is 110 packets, and with
+ * minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets.
+ *
+ * Setting default cpumap queue to 2048 as worst-case (small packet)
+ * should be +64 packet due kthread wakeup call (due to xdp_do_flush)
+ * worst-case is 2043 packets.
+ *
+ * Sysadm can configured system to avoid deep-sleep via:
+ * tuned-adm profile network-latency
*/
- qsize = 128+64;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
-
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
- return err;
+ qsize = 2048;
- if (prog_fd < 0) {
- fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
+ skel = xdp_redirect_cpu__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_redirect_cpu__open: %s\n",
strerror(errno));
- return err;
+ ret = EXIT_FAIL_BPF;
+ goto end;
+ }
+
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- if (init_tracepoints(obj) < 0) {
- fprintf(stderr, "ERR: bpf_program__attach failed\n");
- return err;
+ if (bpf_map__set_max_entries(skel->maps.cpu_map, n_cpus) < 0) {
+ fprintf(stderr, "Failed to set max entries for cpu_map map: %s",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- if (init_map_fds(obj) < 0) {
- fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
- return err;
+ if (bpf_map__set_max_entries(skel->maps.cpus_available, n_cpus) < 0) {
+ fprintf(stderr, "Failed to set max entries for cpus_available map: %s",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- mark_cpus_unavailable();
- cpu = malloc(n_cpus * sizeof(int));
+ cpu = calloc(n_cpus, sizeof(int));
if (!cpu) {
- fprintf(stderr, "failed to allocate cpu array\n");
- return err;
+ fprintf(stderr, "Failed to allocate cpu array\n");
+ goto end_destroy;
}
- memset(cpu, 0, n_cpus * sizeof(int));
- /* Parse commands line args */
- while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:",
+ prog = skel->progs.xdp_prognum5_lb_hash_ip_pairs;
+ while ((opt = getopt_long(argc, argv, "d:si:Sxp:f:e:r:m:c:q:Fvh",
long_options, &longindex)) != -1) {
switch (opt) {
case 'd':
if (strlen(optarg) >= IF_NAMESIZE) {
- fprintf(stderr, "ERR: --dev name too long\n");
- goto error;
+ fprintf(stderr, "-d/--dev name too long\n");
+ goto end_cpu;
}
ifname = (char *)&ifname_buf;
- strncpy(ifname, optarg, IF_NAMESIZE);
+ safe_strncpy(ifname, optarg, sizeof(ifname));
ifindex = if_nametoindex(ifname);
- if (ifindex == 0) {
- fprintf(stderr,
- "ERR: --dev name unknown err(%d):%s\n",
+ if (!ifindex)
+ ifindex = strtoul(optarg, NULL, 0);
+ if (!ifindex) {
+ fprintf(stderr, "Bad interface index or name (%d): %s\n",
errno, strerror(errno));
- goto error;
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
}
break;
case 's':
- interval = atoi(optarg);
+ mask |= SAMPLE_REDIRECT_MAP_CNT;
+ break;
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
break;
case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
+ generic = true;
break;
case 'x':
stress_mode = true;
break;
- case 'z':
- use_separators = false;
- break;
case 'p':
/* Selecting eBPF prog to load */
prog_name = optarg;
- break;
- case 'n':
- mprog_disable = true;
+ prog = bpf_object__find_program_by_name(skel->obj,
+ prog_name);
+ if (!prog) {
+ fprintf(stderr,
+ "Failed to find program %s specified by"
+ " option -p/--progname\n",
+ prog_name);
+ print_avail_progs(skel->obj);
+ goto end_cpu;
+ }
break;
case 'f':
mprog_filename = optarg;
@@ -876,6 +439,7 @@ int main(int argc, char **argv)
break;
case 'r':
redir_interface = optarg;
+ mask |= SAMPLE_DEVMAP_XMIT_CNT_MULTI;
break;
case 'm':
redir_map = optarg;
@@ -885,93 +449,115 @@ int main(int argc, char **argv)
add_cpu = strtoul(optarg, NULL, 0);
if (add_cpu >= n_cpus) {
fprintf(stderr,
- "--cpu nr too large for cpumap err(%d):%s\n",
+ "--cpu nr too large for cpumap err (%d):%s\n",
errno, strerror(errno));
- goto error;
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
}
cpu[added_cpus++] = add_cpu;
break;
case 'q':
- qsize = atoi(optarg);
+ qsize = strtoul(optarg, NULL, 0);
break;
case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ force = true;
+ break;
+ case 'v':
+ sample_switch_mode();
break;
case 'h':
- error:
+ error = false;
default:
- free(cpu);
- usage(argv, obj);
- return EXIT_FAIL_OPTION;
+ usage(argv, long_options, __doc__, mask, error, skel->obj);
+ goto end_cpu;
}
}
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
- xdp_flags |= XDP_FLAGS_DRV_MODE;
-
- /* Required option */
+ ret = EXIT_FAIL_OPTION;
if (ifindex == -1) {
- fprintf(stderr, "ERR: required option --dev missing\n");
- usage(argv, obj);
- err = EXIT_FAIL_OPTION;
- goto out;
+ fprintf(stderr, "Required option --dev missing\n");
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
}
- /* Required option */
+
if (add_cpu == -1) {
- fprintf(stderr, "ERR: required option --cpu missing\n");
- fprintf(stderr, " Specify multiple --cpu option to add more\n");
- usage(argv, obj);
- err = EXIT_FAIL_OPTION;
- goto out;
+ fprintf(stderr, "Required option --cpu missing\n"
+ "Specify multiple --cpu option to add more\n");
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
}
- value.bpf_prog.fd = 0;
- if (!mprog_disable)
- value.bpf_prog.fd = load_cpumap_prog(mprog_filename, mprog_name,
- redir_interface, redir_map);
- if (value.bpf_prog.fd < 0) {
- err = value.bpf_prog.fd;
- goto out;
+ skel->rodata->from_match[0] = ifindex;
+ if (redir_interface)
+ skel->rodata->to_match[0] = if_nametoindex(redir_interface);
+
+ ret = xdp_redirect_cpu__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_redirect_cpu__load: %s\n",
+ strerror(errno));
+ goto end_cpu;
}
- value.qsize = qsize;
- for (i = 0; i < added_cpus; i++)
- create_cpu_entry(cpu[i], &value, i, true);
+ ret = bpf_obj_get_info_by_fd(bpf_map__fd(skel->maps.cpu_map), &info, &infosz);
+ if (ret < 0) {
+ fprintf(stderr, "Failed bpf_obj_get_info_by_fd for cpumap: %s\n",
+ strerror(errno));
+ goto end_cpu;
+ }
- /* Remove XDP program when program is interrupted or killed */
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ skel->bss->cpumap_map_id = info.id;
- prog = bpf_object__find_program_by_title(obj, prog_name);
- if (!prog) {
- fprintf(stderr, "bpf_object__find_program_by_title failed\n");
- goto out;
+ map_fd = bpf_map__fd(skel->maps.cpu_map);
+ avail_fd = bpf_map__fd(skel->maps.cpus_available);
+ count_fd = bpf_map__fd(skel->maps.cpus_count);
+
+ ret = mark_cpus_unavailable();
+ if (ret < 0) {
+ fprintf(stderr, "Unable to mark CPUs as unavailable\n");
+ goto end_cpu;
}
- prog_fd = bpf_program__fd(prog);
- if (prog_fd < 0) {
- fprintf(stderr, "bpf_program__fd failed\n");
- goto out;
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_cpu;
}
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
- fprintf(stderr, "link set xdp fd failed\n");
- err = EXIT_FAIL_XDP;
- goto out;
+ value.bpf_prog.fd = set_cpumap_prog(skel, redir_interface, redir_map,
+ mprog_filename, mprog_name);
+ if (value.bpf_prog.fd < 0) {
+ fprintf(stderr, "Failed to set CPUMAP BPF program: %s\n",
+ strerror(-value.bpf_prog.fd));
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ ret = EXIT_FAIL_BPF;
+ goto end_cpu;
}
+ value.qsize = qsize;
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (err) {
- printf("can't get prog info - %s\n", strerror(errno));
- goto out;
+ for (i = 0; i < added_cpus; i++) {
+ if (create_cpu_entry(cpu[i], &value, i, true) < 0) {
+ fprintf(stderr, "Cannot proceed, exiting\n");
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
+ }
}
- prog_id = info.id;
- stats_poll(interval, use_separators, prog_name, mprog_name,
- &value, stress_mode);
+ ret = EXIT_FAIL_XDP;
+ if (sample_install_xdp(prog, ifindex, generic, force) < 0)
+ goto end_cpu;
- err = EXIT_OK;
-out:
+ ret = sample_run(interval, stress_mode ? stress_cpumap : NULL, &value);
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_cpu;
+ }
+ ret = EXIT_OK;
+end_cpu:
free(cpu);
- return err;
+end_destroy:
+ xdp_redirect_cpu__destroy(skel);
+end:
+ sample_exit(ret);
}
diff --git a/samples/bpf/xdp_redirect_kern.c b/samples/bpf/xdp_redirect_kern.c
deleted file mode 100644
index d26ec3aa215e..000000000000
--- a/samples/bpf/xdp_redirect_kern.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <bpf/bpf_helpers.h>
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __type(key, int);
- __type(value, int);
- __uint(max_entries, 1);
-} tx_port SEC(".maps");
-
-/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
- * feedback. Redirect TX errors can be caught via a tracepoint.
- */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, long);
- __uint(max_entries, 1);
-} rxcnt SEC(".maps");
-
-static void swap_src_dst_mac(void *data)
-{
- unsigned short *p = data;
- unsigned short dst[3];
-
- dst[0] = p[0];
- dst[1] = p[1];
- dst[2] = p[2];
- p[0] = p[3];
- p[1] = p[4];
- p[2] = p[5];
- p[3] = dst[0];
- p[4] = dst[1];
- p[5] = dst[2];
-}
-
-SEC("xdp_redirect")
-int xdp_redirect_prog(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct ethhdr *eth = data;
- int rc = XDP_DROP;
- int *ifindex, port = 0;
- long *value;
- u32 key = 0;
- u64 nh_off;
-
- nh_off = sizeof(*eth);
- if (data + nh_off > data_end)
- return rc;
-
- ifindex = bpf_map_lookup_elem(&tx_port, &port);
- if (!ifindex)
- return rc;
-
- value = bpf_map_lookup_elem(&rxcnt, &key);
- if (value)
- *value += 1;
-
- swap_src_dst_mac(data);
- return bpf_redirect(*ifindex, 0);
-}
-
-/* Redirect require an XDP bpf_prog loaded on the TX device */
-SEC("xdp_redirect_dummy")
-int xdp_redirect_dummy_prog(struct xdp_md *ctx)
-{
- return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map.bpf.c
index a92b8e567bdd..59efd656e1b2 100644
--- a/samples/bpf/xdp_redirect_map_kern.c
+++ b/samples/bpf/xdp_redirect_map.bpf.c
@@ -10,14 +10,10 @@
* General Public License for more details.
*/
#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <bpf/bpf_helpers.h>
+
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
/* The 2nd xdp prog on egress does not support skb mode, so we define two
* maps, tx_port_general and tx_port_native.
@@ -26,114 +22,71 @@ struct {
__uint(type, BPF_MAP_TYPE_DEVMAP);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(int));
- __uint(max_entries, 100);
+ __uint(max_entries, 1);
} tx_port_general SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_DEVMAP);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(struct bpf_devmap_val));
- __uint(max_entries, 100);
-} tx_port_native SEC(".maps");
-
-/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
- * feedback. Redirect TX errors can be caught via a tracepoint.
- */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, long);
- __uint(max_entries, 1);
-} rxcnt SEC(".maps");
-
-/* map to store egress interface mac address */
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __type(key, u32);
- __type(value, __be64);
__uint(max_entries, 1);
-} tx_mac SEC(".maps");
-
-static void swap_src_dst_mac(void *data)
-{
- unsigned short *p = data;
- unsigned short dst[3];
+} tx_port_native SEC(".maps");
- dst[0] = p[0];
- dst[1] = p[1];
- dst[2] = p[2];
- p[0] = p[3];
- p[1] = p[4];
- p[2] = p[5];
- p[3] = dst[0];
- p[4] = dst[1];
- p[5] = dst[2];
-}
+/* store egress interface mac address */
+const volatile char tx_mac_addr[ETH_ALEN];
static __always_inline int xdp_redirect_map(struct xdp_md *ctx, void *redirect_map)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct ethhdr *eth = data;
- int rc = XDP_DROP;
- long *value;
- u32 key = 0;
+ struct datarec *rec;
u64 nh_off;
- int vport;
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
- return rc;
-
- /* constant virtual port */
- vport = 0;
-
- /* count packet in global counter */
- value = bpf_map_lookup_elem(&rxcnt, &key);
- if (value)
- *value += 1;
+ return XDP_DROP;
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
swap_src_dst_mac(data);
-
- /* send packet out physical port */
- return bpf_redirect_map(redirect_map, vport, 0);
+ return bpf_redirect_map(redirect_map, 0, 0);
}
-SEC("xdp_redirect_general")
+SEC("xdp")
int xdp_redirect_map_general(struct xdp_md *ctx)
{
return xdp_redirect_map(ctx, &tx_port_general);
}
-SEC("xdp_redirect_native")
+SEC("xdp")
int xdp_redirect_map_native(struct xdp_md *ctx)
{
return xdp_redirect_map(ctx, &tx_port_native);
}
-SEC("xdp_devmap/map_prog")
+SEC("xdp_devmap/egress")
int xdp_redirect_map_egress(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct ethhdr *eth = data;
- __be64 *mac;
- u32 key = 0;
u64 nh_off;
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
return XDP_DROP;
- mac = bpf_map_lookup_elem(&tx_mac, &key);
- if (mac)
- __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
+ __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
return XDP_PASS;
}
/* Redirect require an XDP bpf_prog loaded on the TX device */
-SEC("xdp_redirect_dummy")
+SEC("xdp")
int xdp_redirect_dummy_prog(struct xdp_md *ctx)
{
return XDP_PASS;
diff --git a/samples/bpf/xdp_redirect_map_multi_kern.c b/samples/bpf/xdp_redirect_map_multi.bpf.c
index 71aa23d1cb2b..8f59d430cb64 100644
--- a/samples/bpf/xdp_redirect_map_multi_kern.c
+++ b/samples/bpf/xdp_redirect_map_multi.bpf.c
@@ -1,11 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <bpf/bpf_helpers.h>
+
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
+
+enum {
+ BPF_F_BROADCAST = (1ULL << 3),
+ BPF_F_EXCLUDE_INGRESS = (1ULL << 4),
+};
struct {
__uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
@@ -21,50 +24,41 @@ struct {
__uint(max_entries, 32);
} forward_map_native SEC(".maps");
+/* map to store egress interfaces mac addresses */
struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, long);
- __uint(max_entries, 1);
-} rxcnt SEC(".maps");
-
-/* map to store egress interfaces mac addresses, set the
- * max_entries to 1 and extend it in user sapce prog.
- */
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(type, BPF_MAP_TYPE_HASH);
__type(key, u32);
__type(value, __be64);
- __uint(max_entries, 1);
+ __uint(max_entries, 32);
} mac_map SEC(".maps");
static int xdp_redirect_map(struct xdp_md *ctx, void *forward_map)
{
- long *value;
- u32 key = 0;
+ u32 key = bpf_get_smp_processor_id();
+ struct datarec *rec;
- /* count packet in global counter */
- value = bpf_map_lookup_elem(&rxcnt, &key);
- if (value)
- *value += 1;
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
- return bpf_redirect_map(forward_map, key,
+ return bpf_redirect_map(forward_map, 0,
BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
}
-SEC("xdp_redirect_general")
+SEC("xdp")
int xdp_redirect_map_general(struct xdp_md *ctx)
{
return xdp_redirect_map(ctx, &forward_map_general);
}
-SEC("xdp_redirect_native")
+SEC("xdp")
int xdp_redirect_map_native(struct xdp_md *ctx)
{
return xdp_redirect_map(ctx, &forward_map_native);
}
-SEC("xdp_devmap/map_prog")
+SEC("xdp_devmap/egress")
int xdp_devmap_prog(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
diff --git a/samples/bpf/xdp_redirect_map_multi_user.c b/samples/bpf/xdp_redirect_map_multi_user.c
index 84cdbbed20b7..315314716121 100644
--- a/samples/bpf/xdp_redirect_map_multi_user.c
+++ b/samples/bpf/xdp_redirect_map_multi_user.c
@@ -1,7 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
+static const char *__doc__ =
+"XDP multi redirect tool, using BPF_MAP_TYPE_DEVMAP and BPF_F_BROADCAST flag for bpf_redirect_map\n"
+"Usage: xdp_redirect_map_multi <IFINDEX|IFNAME> <IFINDEX|IFNAME> ... <IFINDEX|IFNAME>\n";
+
#include <linux/bpf.h>
#include <linux/if_link.h>
#include <assert.h>
+#include <getopt.h>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
@@ -15,106 +20,54 @@
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
-
-#include "bpf_util.h"
+#include <linux/if_ether.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_redirect_map_multi.skel.h"
#define MAX_IFACE_NUM 32
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static int ifaces[MAX_IFACE_NUM] = {};
-static int rxcnt_map_fd;
-
-static void int_exit(int sig)
-{
- __u32 prog_id = 0;
- int i;
-
- for (i = 0; ifaces[i] > 0; i++) {
- if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(1);
- }
- if (prog_id)
- bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
- }
-
- exit(0);
-}
-
-static void poll_stats(int interval)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- __u64 values[nr_cpus], prev[nr_cpus];
-
- memset(prev, 0, sizeof(prev));
-
- while (1) {
- __u64 sum = 0;
- __u32 key = 0;
- int i;
- sleep(interval);
- assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
- for (i = 0; i < nr_cpus; i++)
- sum += (values[i] - prev[i]);
- if (sum)
- printf("Forwarding %10llu pkt/s\n", sum / interval);
- memcpy(prev, values, sizeof(values));
- }
-}
-
-static int get_mac_addr(unsigned int ifindex, void *mac_addr)
-{
- char ifname[IF_NAMESIZE];
- struct ifreq ifr;
- int fd, ret = -1;
-
- fd = socket(AF_INET, SOCK_DGRAM, 0);
- if (fd < 0)
- return ret;
-
- if (!if_indextoname(ifindex, ifname))
- goto err_out;
-
- strcpy(ifr.ifr_name, ifname);
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+ SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_SKIP_HEADING;
- if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
- goto err_out;
+DEFINE_SAMPLE_INIT(xdp_redirect_map_multi);
- memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
- ret = 0;
+static const struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "skb-mode", no_argument, NULL, 'S' },
+ { "force", no_argument, NULL, 'F' },
+ { "load-egress", no_argument, NULL, 'X' },
+ { "stats", no_argument, NULL, 's' },
+ { "interval", required_argument, NULL, 'i' },
+ { "verbose", no_argument, NULL, 'v' },
+ {}
+};
-err_out:
- close(fd);
- return ret;
-}
-
-static int update_mac_map(struct bpf_object *obj)
+static int update_mac_map(struct bpf_map *map)
{
- int i, ret = -1, mac_map_fd;
+ int mac_map_fd = bpf_map__fd(map);
unsigned char mac_addr[6];
unsigned int ifindex;
-
- mac_map_fd = bpf_object__find_map_fd_by_name(obj, "mac_map");
- if (mac_map_fd < 0) {
- printf("find mac map fd failed\n");
- return ret;
- }
+ int i, ret = -1;
for (i = 0; ifaces[i] > 0; i++) {
ifindex = ifaces[i];
ret = get_mac_addr(ifindex, mac_addr);
if (ret < 0) {
- printf("get interface %d mac failed\n", ifindex);
+ fprintf(stderr, "get interface %d mac failed\n",
+ ifindex);
return ret;
}
ret = bpf_map_update_elem(mac_map_fd, &ifindex, mac_addr, 0);
- if (ret) {
- perror("bpf_update_elem mac_map_fd");
+ if (ret < 0) {
+ fprintf(stderr, "Failed to update mac address for ifindex %d\n",
+ ifindex);
return ret;
}
}
@@ -122,181 +75,159 @@ static int update_mac_map(struct bpf_object *obj)
return 0;
}
-static void usage(const char *prog)
-{
- fprintf(stderr,
- "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
- "OPTS:\n"
- " -S use skb-mode\n"
- " -N enforce native mode\n"
- " -F force loading prog\n"
- " -X load xdp program on egress\n",
- prog);
-}
-
int main(int argc, char **argv)
{
- int i, ret, opt, forward_map_fd, max_ifindex = 0;
- struct bpf_program *ingress_prog, *egress_prog;
- int ingress_prog_fd, egress_prog_fd = 0;
- struct bpf_devmap_val devmap_val;
- bool attach_egress_prog = false;
+ struct bpf_devmap_val devmap_val = {};
+ struct xdp_redirect_map_multi *skel;
+ struct bpf_program *ingress_prog;
+ bool xdp_devmap_attached = false;
+ struct bpf_map *forward_map;
+ int ret = EXIT_FAIL_OPTION;
+ unsigned long interval = 2;
char ifname[IF_NAMESIZE];
- struct bpf_map *mac_map;
- struct bpf_object *obj;
unsigned int ifindex;
- char filename[256];
-
- while ((opt = getopt(argc, argv, "SNFX")) != -1) {
+ bool generic = false;
+ bool force = false;
+ bool tried = false;
+ bool error = true;
+ int i, opt;
+
+ while ((opt = getopt_long(argc, argv, "hSFXi:vs",
+ long_options, NULL)) != -1) {
switch (opt) {
case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
- break;
- case 'N':
- /* default, set below */
+ generic = true;
+ /* devmap_xmit tracepoint not available */
+ mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI);
break;
case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ force = true;
break;
case 'X':
- attach_egress_prog = true;
+ xdp_devmap_attached = true;
+ break;
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ sample_switch_mode();
break;
+ case 's':
+ mask |= SAMPLE_REDIRECT_MAP_CNT;
+ break;
+ case 'h':
+ error = false;
default:
- usage(basename(argv[0]));
- return 1;
+ sample_usage(argv, long_options, __doc__, mask, error);
+ return ret;
}
}
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
- xdp_flags |= XDP_FLAGS_DRV_MODE;
- } else if (attach_egress_prog) {
- printf("Load xdp program on egress with SKB mode not supported yet\n");
- return 1;
+ if (argc <= optind + 1) {
+ sample_usage(argv, long_options, __doc__, mask, error);
+ return ret;
}
- if (optind == argc) {
- printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
- return 1;
+ skel = xdp_redirect_map_multi__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_redirect_map_multi__open: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
}
- printf("Get interfaces");
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ ret = EXIT_FAIL_OPTION;
for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
ifaces[i] = if_nametoindex(argv[optind + i]);
if (!ifaces[i])
ifaces[i] = strtoul(argv[optind + i], NULL, 0);
if (!if_indextoname(ifaces[i], ifname)) {
- perror("Invalid interface name or i");
- return 1;
+ fprintf(stderr, "Bad interface index or name\n");
+ sample_usage(argv, long_options, __doc__, mask, true);
+ goto end_destroy;
}
- /* Find the largest index number */
- if (ifaces[i] > max_ifindex)
- max_ifindex = ifaces[i];
-
- printf(" %d", ifaces[i]);
- }
- printf("\n");
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- obj = bpf_object__open(filename);
- if (libbpf_get_error(obj)) {
- printf("ERROR: opening BPF object file failed\n");
- obj = NULL;
- goto err_out;
+ skel->rodata->from_match[i] = ifaces[i];
+ skel->rodata->to_match[i] = ifaces[i];
}
- /* Reset the map size to max ifindex + 1 */
- if (attach_egress_prog) {
- mac_map = bpf_object__find_map_by_name(obj, "mac_map");
- ret = bpf_map__resize(mac_map, max_ifindex + 1);
- if (ret < 0) {
- printf("ERROR: reset mac map size failed\n");
- goto err_out;
- }
+ ret = xdp_redirect_map_multi__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_redirect_map_multi__load: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- /* load BPF program */
- if (bpf_object__load(obj)) {
- printf("ERROR: loading BPF object file failed\n");
- goto err_out;
- }
-
- if (xdp_flags & XDP_FLAGS_SKB_MODE) {
- ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_general");
- forward_map_fd = bpf_object__find_map_fd_by_name(obj, "forward_map_general");
- } else {
- ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_native");
- forward_map_fd = bpf_object__find_map_fd_by_name(obj, "forward_map_native");
- }
- if (!ingress_prog || forward_map_fd < 0) {
- printf("finding ingress_prog/forward_map in obj file failed\n");
- goto err_out;
- }
-
- ingress_prog_fd = bpf_program__fd(ingress_prog);
- if (ingress_prog_fd < 0) {
- printf("find ingress_prog fd failed\n");
- goto err_out;
- }
-
- rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
- if (rxcnt_map_fd < 0) {
- printf("bpf_object__find_map_fd_by_name failed\n");
- goto err_out;
- }
-
- if (attach_egress_prog) {
+ if (xdp_devmap_attached) {
/* Update mac_map with all egress interfaces' mac addr */
- if (update_mac_map(obj) < 0) {
- printf("Error: update mac map failed");
- goto err_out;
+ if (update_mac_map(skel->maps.mac_map) < 0) {
+ fprintf(stderr, "Updating mac address failed\n");
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
+ }
- /* Find egress prog fd */
- egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
- if (!egress_prog) {
- printf("finding egress_prog in obj file failed\n");
- goto err_out;
- }
- egress_prog_fd = bpf_program__fd(egress_prog);
- if (egress_prog_fd < 0) {
- printf("find egress_prog fd failed\n");
- goto err_out;
- }
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
- /* Remove attached program when program is interrupted or killed */
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ ingress_prog = skel->progs.xdp_redirect_map_native;
+ forward_map = skel->maps.forward_map_native;
- /* Init forward multicast groups */
for (i = 0; ifaces[i] > 0; i++) {
ifindex = ifaces[i];
+ ret = EXIT_FAIL_XDP;
+restart:
/* bind prog_fd to each interface */
- ret = bpf_set_link_xdp_fd(ifindex, ingress_prog_fd, xdp_flags);
- if (ret) {
- printf("Set xdp fd failed on %d\n", ifindex);
- goto err_out;
+ if (sample_install_xdp(ingress_prog, ifindex, generic, force) < 0) {
+ if (generic && !tried) {
+ fprintf(stderr,
+ "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n");
+ ingress_prog = skel->progs.xdp_redirect_map_general;
+ forward_map = skel->maps.forward_map_general;
+ tried = true;
+ goto restart;
+ }
+ goto end_destroy;
}
/* Add all the interfaces to forward group and attach
- * egress devmap programe if exist
+ * egress devmap program if exist
*/
devmap_val.ifindex = ifindex;
- devmap_val.bpf_prog.fd = egress_prog_fd;
- ret = bpf_map_update_elem(forward_map_fd, &ifindex, &devmap_val, 0);
- if (ret) {
- perror("bpf_map_update_elem forward_map");
- goto err_out;
+ if (xdp_devmap_attached)
+ devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_devmap_prog);
+ ret = bpf_map_update_elem(bpf_map__fd(forward_map), &ifindex, &devmap_val, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to update devmap value: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
}
- poll_stats(2);
-
- return 0;
-
-err_out:
- return 1;
+ ret = sample_run(interval, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+ ret = EXIT_OK;
+end_destroy:
+ xdp_redirect_map_multi__destroy(skel);
+end:
+ sample_exit(ret);
}
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
index 0e8192688dfc..b6e4fc849577 100644
--- a/samples/bpf/xdp_redirect_map_user.c
+++ b/samples/bpf/xdp_redirect_map_user.c
@@ -1,6 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
*/
+static const char *__doc__ =
+"XDP redirect tool, using BPF_MAP_TYPE_DEVMAP\n"
+"Usage: xdp_redirect_map <IFINDEX|IFNAME>_IN <IFINDEX|IFNAME>_OUT\n";
+
#include <linux/bpf.h>
#include <linux/if_link.h>
#include <assert.h>
@@ -13,165 +17,83 @@
#include <net/if.h>
#include <unistd.h>
#include <libgen.h>
-#include <sys/resource.h>
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-
-#include "bpf_util.h"
+#include <getopt.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_redirect_map.skel.h"
-static int ifindex_in;
-static int ifindex_out;
-static bool ifindex_out_xdp_dummy_attached = true;
-static bool xdp_devmap_attached;
-static __u32 prog_id;
-static __u32 dummy_prog_id;
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int rxcnt_map_fd;
-
-static void int_exit(int sig)
-{
- __u32 curr_prog_id = 0;
-
- if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(1);
- }
- if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
- else if (!curr_prog_id)
- printf("couldn't find a prog id on iface IN\n");
- else
- printf("program on iface IN changed, not removing\n");
-
- if (ifindex_out_xdp_dummy_attached) {
- curr_prog_id = 0;
- if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id,
- xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(1);
- }
- if (dummy_prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
- else if (!curr_prog_id)
- printf("couldn't find a prog id on iface OUT\n");
- else
- printf("program on iface OUT changed, not removing\n");
- }
- exit(0);
-}
-
-static void poll_stats(int interval, int ifindex)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- __u64 values[nr_cpus], prev[nr_cpus];
-
- memset(prev, 0, sizeof(prev));
-
- while (1) {
- __u64 sum = 0;
- __u32 key = 0;
- int i;
-
- sleep(interval);
- assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
- for (i = 0; i < nr_cpus; i++)
- sum += (values[i] - prev[i]);
- if (sum)
- printf("ifindex %i: %10llu pkt/s\n",
- ifindex, sum / interval);
- memcpy(prev, values, sizeof(values));
- }
-}
-
-static int get_mac_addr(unsigned int ifindex_out, void *mac_addr)
-{
- char ifname[IF_NAMESIZE];
- struct ifreq ifr;
- int fd, ret = -1;
-
- fd = socket(AF_INET, SOCK_DGRAM, 0);
- if (fd < 0)
- return ret;
-
- if (!if_indextoname(ifindex_out, ifname))
- goto err_out;
-
- strcpy(ifr.ifr_name, ifname);
-
- if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
- goto err_out;
-
- memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
- ret = 0;
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+ SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
-err_out:
- close(fd);
- return ret;
-}
+DEFINE_SAMPLE_INIT(xdp_redirect_map);
-static void usage(const char *prog)
-{
- fprintf(stderr,
- "usage: %s [OPTS] <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n\n"
- "OPTS:\n"
- " -S use skb-mode\n"
- " -N enforce native mode\n"
- " -F force loading prog\n"
- " -X load xdp program on egress\n",
- prog);
-}
+static const struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "skb-mode", no_argument, NULL, 'S' },
+ { "force", no_argument, NULL, 'F' },
+ { "load-egress", no_argument, NULL, 'X' },
+ { "stats", no_argument, NULL, 's' },
+ { "interval", required_argument, NULL, 'i' },
+ { "verbose", no_argument, NULL, 'v' },
+ {}
+};
int main(int argc, char **argv)
{
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_UNSPEC,
- };
- struct bpf_program *prog, *dummy_prog, *devmap_prog;
- int prog_fd, dummy_prog_fd, devmap_prog_fd = 0;
- int tx_port_map_fd, tx_mac_map_fd;
- struct bpf_devmap_val devmap_val;
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- const char *optstr = "FSNX";
- struct bpf_object *obj;
- int ret, opt, key = 0;
- char filename[256];
-
- while ((opt = getopt(argc, argv, optstr)) != -1) {
+ struct bpf_devmap_val devmap_val = {};
+ bool xdp_devmap_attached = false;
+ struct xdp_redirect_map *skel;
+ char str[2 * IF_NAMESIZE + 1];
+ char ifname_out[IF_NAMESIZE];
+ struct bpf_map *tx_port_map;
+ char ifname_in[IF_NAMESIZE];
+ int ifindex_in, ifindex_out;
+ unsigned long interval = 2;
+ int ret = EXIT_FAIL_OPTION;
+ struct bpf_program *prog;
+ bool generic = false;
+ bool force = false;
+ bool tried = false;
+ bool error = true;
+ int opt, key = 0;
+
+ while ((opt = getopt_long(argc, argv, "hSFXi:vs",
+ long_options, NULL)) != -1) {
switch (opt) {
case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
- break;
- case 'N':
- /* default, set below */
+ generic = true;
+ /* devmap_xmit tracepoint not available */
+ mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI);
break;
case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ force = true;
break;
case 'X':
xdp_devmap_attached = true;
break;
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ sample_switch_mode();
+ break;
+ case 's':
+ mask |= SAMPLE_REDIRECT_MAP_CNT;
+ break;
+ case 'h':
+ error = false;
default:
- usage(basename(argv[0]));
- return 1;
+ sample_usage(argv, long_options, __doc__, mask, error);
+ return ret;
}
}
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
- xdp_flags |= XDP_FLAGS_DRV_MODE;
- } else if (xdp_devmap_attached) {
- printf("Load xdp program on egress with SKB mode not supported yet\n");
- return 1;
- }
-
- if (optind == argc) {
- printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
- return 1;
+ if (argc <= optind + 1) {
+ sample_usage(argv, long_options, __doc__, mask, true);
+ goto end;
}
ifindex_in = if_nametoindex(argv[optind]);
@@ -182,107 +104,116 @@ int main(int argc, char **argv)
if (!ifindex_out)
ifindex_out = strtoul(argv[optind + 1], NULL, 0);
- printf("input: %d output: %d\n", ifindex_in, ifindex_out);
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
-
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
- return 1;
-
- if (xdp_flags & XDP_FLAGS_SKB_MODE) {
- prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_general");
- tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port_general");
- } else {
- prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_native");
- tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port_native");
- }
- dummy_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_dummy_prog");
- if (!prog || dummy_prog < 0 || tx_port_map_fd < 0) {
- printf("finding prog/dummy_prog/tx_port_map in obj file failed\n");
- goto out;
- }
- prog_fd = bpf_program__fd(prog);
- dummy_prog_fd = bpf_program__fd(dummy_prog);
- if (prog_fd < 0 || dummy_prog_fd < 0 || tx_port_map_fd < 0) {
- printf("bpf_prog_load_xattr: %s\n", strerror(errno));
- return 1;
- }
-
- tx_mac_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_mac");
- rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
- if (tx_mac_map_fd < 0 || rxcnt_map_fd < 0) {
- printf("bpf_object__find_map_fd_by_name failed\n");
- return 1;
+ if (!ifindex_in || !ifindex_out) {
+ fprintf(stderr, "Bad interface index or name\n");
+ sample_usage(argv, long_options, __doc__, mask, true);
+ goto end;
}
- if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) {
- printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
- return 1;
+ skel = xdp_redirect_map__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_redirect_map__open: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
}
- ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (ret) {
- printf("can't get prog info - %s\n", strerror(errno));
- return ret;
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- prog_id = info.id;
-
- /* Loading dummy XDP prog on out-device */
- if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd,
- (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
- printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
- ifindex_out_xdp_dummy_attached = false;
- }
-
- memset(&info, 0, sizeof(info));
- ret = bpf_obj_get_info_by_fd(dummy_prog_fd, &info, &info_len);
- if (ret) {
- printf("can't get prog info - %s\n", strerror(errno));
- return ret;
- }
- dummy_prog_id = info.id;
/* Load 2nd xdp prog on egress. */
if (xdp_devmap_attached) {
- unsigned char mac_addr[6];
-
- devmap_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_egress");
- if (!devmap_prog) {
- printf("finding devmap_prog in obj file failed\n");
- goto out;
- }
- devmap_prog_fd = bpf_program__fd(devmap_prog);
- if (devmap_prog_fd < 0) {
- printf("finding devmap_prog fd failed\n");
- goto out;
- }
-
- if (get_mac_addr(ifindex_out, mac_addr) < 0) {
- printf("get interface %d mac failed\n", ifindex_out);
- goto out;
+ ret = get_mac_addr(ifindex_out, skel->rodata->tx_mac_addr);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to get interface %d mac address: %s\n",
+ ifindex_out, strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
+ }
- ret = bpf_map_update_elem(tx_mac_map_fd, &key, mac_addr, 0);
- if (ret) {
- perror("bpf_update_elem tx_mac_map_fd");
- goto out;
+ skel->rodata->from_match[0] = ifindex_in;
+ skel->rodata->to_match[0] = ifindex_out;
+
+ ret = xdp_redirect_map__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_redirect_map__load: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+
+ prog = skel->progs.xdp_redirect_map_native;
+ tx_port_map = skel->maps.tx_port_native;
+restart:
+ if (sample_install_xdp(prog, ifindex_in, generic, force) < 0) {
+ /* First try with struct bpf_devmap_val as value for generic
+ * mode, then fallback to sizeof(int) for older kernels.
+ */
+ fprintf(stderr,
+ "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n");
+ if (generic && !tried) {
+ prog = skel->progs.xdp_redirect_map_general;
+ tx_port_map = skel->maps.tx_port_general;
+ tried = true;
+ goto restart;
}
+ ret = EXIT_FAIL_XDP;
+ goto end_destroy;
}
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ /* Loading dummy XDP prog on out-device */
+ sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out, generic, force);
devmap_val.ifindex = ifindex_out;
- devmap_val.bpf_prog.fd = devmap_prog_fd;
- ret = bpf_map_update_elem(tx_port_map_fd, &key, &devmap_val, 0);
- if (ret) {
- perror("bpf_update_elem");
- goto out;
- }
-
- poll_stats(2, ifindex_out);
-
-out:
- return 0;
+ if (xdp_devmap_attached)
+ devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_map_egress);
+ ret = bpf_map_update_elem(bpf_map__fd(tx_port_map), &key, &devmap_val, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to update devmap value: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ ret = EXIT_FAIL;
+ if (!if_indextoname(ifindex_in, ifname_in)) {
+ fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in,
+ strerror(errno));
+ goto end_destroy;
+ }
+
+ if (!if_indextoname(ifindex_out, ifname_out)) {
+ fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out,
+ strerror(errno));
+ goto end_destroy;
+ }
+
+ safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str));
+ printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n",
+ ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out));
+ snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out);
+
+ ret = sample_run(interval, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+ ret = EXIT_OK;
+end_destroy:
+ xdp_redirect_map__destroy(skel);
+end:
+ sample_exit(ret);
}
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index 93854e135134..7af5b07a7523 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -1,6 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
*/
+static const char *__doc__ =
+"XDP redirect tool, using bpf_redirect helper\n"
+"Usage: xdp_redirect <IFINDEX|IFNAME>_IN <IFINDEX|IFNAME>_OUT\n";
+
#include <linux/bpf.h>
#include <linux/if_link.h>
#include <assert.h>
@@ -13,126 +17,73 @@
#include <net/if.h>
#include <unistd.h>
#include <libgen.h>
+#include <getopt.h>
#include <sys/resource.h>
-
-#include "bpf_util.h"
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_redirect.skel.h"
-static int ifindex_in;
-static int ifindex_out;
-static bool ifindex_out_xdp_dummy_attached = true;
-static __u32 prog_id;
-static __u32 dummy_prog_id;
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int rxcnt_map_fd;
-
-static void int_exit(int sig)
-{
- __u32 curr_prog_id = 0;
-
- if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(1);
- }
- if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
- else if (!curr_prog_id)
- printf("couldn't find a prog id on iface IN\n");
- else
- printf("program on iface IN changed, not removing\n");
-
- if (ifindex_out_xdp_dummy_attached) {
- curr_prog_id = 0;
- if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id,
- xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(1);
- }
- if (dummy_prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
- else if (!curr_prog_id)
- printf("couldn't find a prog id on iface OUT\n");
- else
- printf("program on iface OUT changed, not removing\n");
- }
- exit(0);
-}
-
-static void poll_stats(int interval, int ifindex)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- __u64 values[nr_cpus], prev[nr_cpus];
-
- memset(prev, 0, sizeof(prev));
-
- while (1) {
- __u64 sum = 0;
- __u32 key = 0;
- int i;
-
- sleep(interval);
- assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
- for (i = 0; i < nr_cpus; i++)
- sum += (values[i] - prev[i]);
- if (sum)
- printf("ifindex %i: %10llu pkt/s\n",
- ifindex, sum / interval);
- memcpy(prev, values, sizeof(values));
- }
-}
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_CNT |
+ SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
-static void usage(const char *prog)
-{
- fprintf(stderr,
- "usage: %s [OPTS] <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n\n"
- "OPTS:\n"
- " -S use skb-mode\n"
- " -N enforce native mode\n"
- " -F force loading prog\n",
- prog);
-}
+DEFINE_SAMPLE_INIT(xdp_redirect);
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h' },
+ {"skb-mode", no_argument, NULL, 'S' },
+ {"force", no_argument, NULL, 'F' },
+ {"stats", no_argument, NULL, 's' },
+ {"interval", required_argument, NULL, 'i' },
+ {"verbose", no_argument, NULL, 'v' },
+ {}
+};
int main(int argc, char **argv)
{
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
- struct bpf_program *prog, *dummy_prog;
- int prog_fd, tx_port_map_fd, opt;
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- const char *optstr = "FSN";
- struct bpf_object *obj;
- char filename[256];
- int dummy_prog_fd;
- int ret, key = 0;
-
- while ((opt = getopt(argc, argv, optstr)) != -1) {
+ int ifindex_in, ifindex_out, opt;
+ char str[2 * IF_NAMESIZE + 1];
+ char ifname_out[IF_NAMESIZE];
+ char ifname_in[IF_NAMESIZE];
+ int ret = EXIT_FAIL_OPTION;
+ unsigned long interval = 2;
+ struct xdp_redirect *skel;
+ bool generic = false;
+ bool force = false;
+ bool error = true;
+
+ while ((opt = getopt_long(argc, argv, "hSFi:vs",
+ long_options, NULL)) != -1) {
switch (opt) {
case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
- break;
- case 'N':
- /* default, set below */
+ generic = true;
+ mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI);
break;
case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ force = true;
+ break;
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ sample_switch_mode();
+ break;
+ case 's':
+ mask |= SAMPLE_REDIRECT_CNT;
break;
+ case 'h':
+ error = false;
default:
- usage(basename(argv[0]));
- return 1;
+ sample_usage(argv, long_options, __doc__, mask, error);
+ return ret;
}
}
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
- xdp_flags |= XDP_FLAGS_DRV_MODE;
-
- if (optind + 2 != argc) {
- printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
- return 1;
+ if (argc <= optind + 1) {
+ sample_usage(argv, long_options, __doc__, mask, true);
+ return ret;
}
ifindex_in = if_nametoindex(argv[optind]);
@@ -143,75 +94,80 @@ int main(int argc, char **argv)
if (!ifindex_out)
ifindex_out = strtoul(argv[optind + 1], NULL, 0);
- printf("input: %d output: %d\n", ifindex_in, ifindex_out);
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
-
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
- return 1;
-
- prog = bpf_program__next(NULL, obj);
- dummy_prog = bpf_program__next(prog, obj);
- if (!prog || !dummy_prog) {
- printf("finding a prog in obj file failed\n");
- return 1;
+ if (!ifindex_in || !ifindex_out) {
+ fprintf(stderr, "Bad interface index or name\n");
+ sample_usage(argv, long_options, __doc__, mask, true);
+ goto end;
}
- /* bpf_prog_load_xattr gives us the pointer to first prog's fd,
- * so we're missing only the fd for dummy prog
- */
- dummy_prog_fd = bpf_program__fd(dummy_prog);
- if (prog_fd < 0 || dummy_prog_fd < 0) {
- printf("bpf_prog_load_xattr: %s\n", strerror(errno));
- return 1;
+
+ skel = xdp_redirect__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_redirect__open: %s\n", strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
}
- tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port");
- rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
- if (tx_port_map_fd < 0 || rxcnt_map_fd < 0) {
- printf("bpf_object__find_map_fd_by_name failed\n");
- return 1;
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) {
- printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
- return 1;
+ skel->rodata->from_match[0] = ifindex_in;
+ skel->rodata->to_match[0] = ifindex_out;
+ skel->rodata->ifindex_out = ifindex_out;
+
+ ret = xdp_redirect__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_redirect__load: %s\n", strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (ret) {
- printf("can't get prog info - %s\n", strerror(errno));
- return ret;
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
- prog_id = info.id;
+
+ ret = EXIT_FAIL_XDP;
+ if (sample_install_xdp(skel->progs.xdp_redirect_prog, ifindex_in,
+ generic, force) < 0)
+ goto end_destroy;
/* Loading dummy XDP prog on out-device */
- if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd,
- (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
- printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
- ifindex_out_xdp_dummy_attached = false;
+ sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out,
+ generic, force);
+
+ ret = EXIT_FAIL;
+ if (!if_indextoname(ifindex_in, ifname_in)) {
+ fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in,
+ strerror(errno));
+ goto end_destroy;
}
- memset(&info, 0, sizeof(info));
- ret = bpf_obj_get_info_by_fd(dummy_prog_fd, &info, &info_len);
- if (ret) {
- printf("can't get prog info - %s\n", strerror(errno));
- return ret;
+ if (!if_indextoname(ifindex_out, ifname_out)) {
+ fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out,
+ strerror(errno));
+ goto end_destroy;
}
- dummy_prog_id = info.id;
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str));
+ printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n",
+ ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out));
+ snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out);
- /* bpf redirect port */
- ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0);
- if (ret) {
- perror("bpf_update_elem");
- goto out;
+ ret = sample_run(interval, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
-
- poll_stats(2, ifindex_out);
-
-out:
- return ret;
+ ret = EXIT_OK;
+end_destroy:
+ xdp_redirect__destroy(skel);
+end:
+ sample_exit(ret);
}
diff --git a/samples/bpf/xdp_sample.bpf.c b/samples/bpf/xdp_sample.bpf.c
new file mode 100644
index 000000000000..0eb7e1dcae22
--- /dev/null
+++ b/samples/bpf/xdp_sample.bpf.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0
+/* GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */
+#include "xdp_sample.bpf.h"
+
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+array_map rx_cnt SEC(".maps");
+array_map redir_err_cnt SEC(".maps");
+array_map cpumap_enqueue_cnt SEC(".maps");
+array_map cpumap_kthread_cnt SEC(".maps");
+array_map exception_cnt SEC(".maps");
+array_map devmap_xmit_cnt SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 32 * 32);
+ __type(key, u64);
+ __type(value, struct datarec);
+} devmap_xmit_cnt_multi SEC(".maps");
+
+const volatile int nr_cpus = 0;
+
+/* These can be set before loading so that redundant comparisons can be DCE'd by
+ * the verifier, and only actual matches are tried after loading tp_btf program.
+ * This allows sample to filter tracepoint stats based on net_device.
+ */
+const volatile int from_match[32] = {};
+const volatile int to_match[32] = {};
+
+int cpumap_map_id = 0;
+
+/* Find if b is part of set a, but if a is empty set then evaluate to true */
+#define IN_SET(a, b) \
+ ({ \
+ bool __res = !(a)[0]; \
+ for (int i = 0; i < ARRAY_SIZE(a) && (a)[i]; i++) { \
+ __res = (a)[i] == (b); \
+ if (__res) \
+ break; \
+ } \
+ __res; \
+ })
+
+static __always_inline __u32 xdp_get_err_key(int err)
+{
+ switch (err) {
+ case 0:
+ return 0;
+ case -EINVAL:
+ return 2;
+ case -ENETDOWN:
+ return 3;
+ case -EMSGSIZE:
+ return 4;
+ case -EOPNOTSUPP:
+ return 5;
+ case -ENOSPC:
+ return 6;
+ default:
+ return 1;
+ }
+}
+
+static __always_inline int xdp_redirect_collect_stat(int from, int err)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ u32 key = XDP_REDIRECT_ERROR;
+ struct datarec *rec;
+ u32 idx;
+
+ if (!IN_SET(from_match, from))
+ return 0;
+
+ key = xdp_get_err_key(err);
+
+ idx = key * nr_cpus + cpu;
+ rec = bpf_map_lookup_elem(&redir_err_cnt, &idx);
+ if (!rec)
+ return 0;
+ if (key)
+ NO_TEAR_INC(rec->dropped);
+ else
+ NO_TEAR_INC(rec->processed);
+ return 0; /* Indicate event was filtered (no further processing)*/
+ /*
+ * Returning 1 here would allow e.g. a perf-record tracepoint
+ * to see and record these events, but it doesn't work well
+ * in-practice as stopping perf-record also unload this
+ * bpf_prog. Plus, there is additional overhead of doing so.
+ */
+}
+
+SEC("tp_btf/xdp_redirect_err")
+int BPF_PROG(tp_xdp_redirect_err, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect_map_err")
+int BPF_PROG(tp_xdp_redirect_map_err, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect")
+int BPF_PROG(tp_xdp_redirect, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect_map")
+int BPF_PROG(tp_xdp_redirect_map, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_cpumap_enqueue")
+int BPF_PROG(tp_xdp_cpumap_enqueue, int map_id, unsigned int processed,
+ unsigned int drops, int to_cpu)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ struct datarec *rec;
+ u32 idx;
+
+ if (cpumap_map_id && cpumap_map_id != map_id)
+ return 0;
+
+ idx = to_cpu * nr_cpus + cpu;
+ rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &idx);
+ if (!rec)
+ return 0;
+ NO_TEAR_ADD(rec->processed, processed);
+ NO_TEAR_ADD(rec->dropped, drops);
+ /* Record bulk events, then userspace can calc average bulk size */
+ if (processed > 0)
+ NO_TEAR_INC(rec->issue);
+ /* Inception: It's possible to detect overload situations, via
+ * this tracepoint. This can be used for creating a feedback
+ * loop to XDP, which can take appropriate actions to mitigate
+ * this overload situation.
+ */
+ return 0;
+}
+
+SEC("tp_btf/xdp_cpumap_kthread")
+int BPF_PROG(tp_xdp_cpumap_kthread, int map_id, unsigned int processed,
+ unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats)
+{
+ struct datarec *rec;
+ u32 cpu;
+
+ if (cpumap_map_id && cpumap_map_id != map_id)
+ return 0;
+
+ cpu = bpf_get_smp_processor_id();
+ rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &cpu);
+ if (!rec)
+ return 0;
+ NO_TEAR_ADD(rec->processed, processed);
+ NO_TEAR_ADD(rec->dropped, drops);
+ NO_TEAR_ADD(rec->xdp_pass, xdp_stats->pass);
+ NO_TEAR_ADD(rec->xdp_drop, xdp_stats->drop);
+ NO_TEAR_ADD(rec->xdp_redirect, xdp_stats->redirect);
+ /* Count times kthread yielded CPU via schedule call */
+ if (sched)
+ NO_TEAR_INC(rec->issue);
+ return 0;
+}
+
+SEC("tp_btf/xdp_exception")
+int BPF_PROG(tp_xdp_exception, const struct net_device *dev,
+ const struct bpf_prog *xdp, u32 act)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ struct datarec *rec;
+ u32 key = act, idx;
+
+ if (!IN_SET(from_match, dev->ifindex))
+ return 0;
+ if (!IN_SET(to_match, dev->ifindex))
+ return 0;
+
+ if (key > XDP_REDIRECT)
+ key = XDP_REDIRECT + 1;
+
+ idx = key * nr_cpus + cpu;
+ rec = bpf_map_lookup_elem(&exception_cnt, &idx);
+ if (!rec)
+ return 0;
+ NO_TEAR_INC(rec->dropped);
+
+ return 0;
+}
+
+SEC("tp_btf/xdp_devmap_xmit")
+int BPF_PROG(tp_xdp_devmap_xmit, const struct net_device *from_dev,
+ const struct net_device *to_dev, int sent, int drops, int err)
+{
+ struct datarec *rec;
+ int idx_in, idx_out;
+ u32 cpu;
+
+ idx_in = from_dev->ifindex;
+ idx_out = to_dev->ifindex;
+
+ if (!IN_SET(from_match, idx_in))
+ return 0;
+ if (!IN_SET(to_match, idx_out))
+ return 0;
+
+ cpu = bpf_get_smp_processor_id();
+ rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &cpu);
+ if (!rec)
+ return 0;
+ NO_TEAR_ADD(rec->processed, sent);
+ NO_TEAR_ADD(rec->dropped, drops);
+ /* Record bulk events, then userspace can calc average bulk size */
+ NO_TEAR_INC(rec->info);
+ /* Record error cases, where no frame were sent */
+ /* Catch API error of drv ndo_xdp_xmit sent more than count */
+ if (err || drops < 0)
+ NO_TEAR_INC(rec->issue);
+ return 0;
+}
+
+SEC("tp_btf/xdp_devmap_xmit")
+int BPF_PROG(tp_xdp_devmap_xmit_multi, const struct net_device *from_dev,
+ const struct net_device *to_dev, int sent, int drops, int err)
+{
+ struct datarec empty = {};
+ struct datarec *rec;
+ int idx_in, idx_out;
+ u64 idx;
+
+ idx_in = from_dev->ifindex;
+ idx_out = to_dev->ifindex;
+ idx = idx_in;
+ idx = idx << 32 | idx_out;
+
+ if (!IN_SET(from_match, idx_in))
+ return 0;
+ if (!IN_SET(to_match, idx_out))
+ return 0;
+
+ bpf_map_update_elem(&devmap_xmit_cnt_multi, &idx, &empty, BPF_NOEXIST);
+ rec = bpf_map_lookup_elem(&devmap_xmit_cnt_multi, &idx);
+ if (!rec)
+ return 0;
+
+ NO_TEAR_ADD(rec->processed, sent);
+ NO_TEAR_ADD(rec->dropped, drops);
+ NO_TEAR_INC(rec->info);
+ if (err || drops < 0)
+ NO_TEAR_INC(rec->issue);
+ return 0;
+}
diff --git a/samples/bpf/xdp_sample.bpf.h b/samples/bpf/xdp_sample.bpf.h
new file mode 100644
index 000000000000..25b1dbe9b37b
--- /dev/null
+++ b/samples/bpf/xdp_sample.bpf.h
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _XDP_SAMPLE_BPF_H
+#define _XDP_SAMPLE_BPF_H
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+#include "xdp_sample_shared.h"
+
+#define ETH_ALEN 6
+#define ETH_P_802_3_MIN 0x0600
+#define ETH_P_8021Q 0x8100
+#define ETH_P_8021AD 0x88A8
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86DD
+#define ETH_P_ARP 0x0806
+#define IPPROTO_ICMPV6 58
+
+#define EINVAL 22
+#define ENETDOWN 100
+#define EMSGSIZE 90
+#define EOPNOTSUPP 95
+#define ENOSPC 28
+
+typedef struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __type(key, unsigned int);
+ __type(value, struct datarec);
+} array_map;
+
+extern array_map rx_cnt;
+extern const volatile int nr_cpus;
+
+enum {
+ XDP_REDIRECT_SUCCESS = 0,
+ XDP_REDIRECT_ERROR = 1
+};
+
+static __always_inline void swap_src_dst_mac(void *data)
+{
+ unsigned short *p = data;
+ unsigned short dst[3];
+
+ dst[0] = p[0];
+ dst[1] = p[1];
+ dst[2] = p[2];
+ p[0] = p[3];
+ p[1] = p[4];
+ p[2] = p[5];
+ p[3] = dst[0];
+ p[4] = dst[1];
+ p[5] = dst[2];
+}
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
+ __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define bpf_ntohs(x) __builtin_bswap16(x)
+#define bpf_htons(x) __builtin_bswap16(x)
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
+ __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define bpf_ntohs(x) (x)
+#define bpf_htons(x) (x)
+#else
+# error "Endianness detection needs to be set up for your compiler?!"
+#endif
+
+/*
+ * Note: including linux/compiler.h or linux/kernel.h for the macros below
+ * conflicts with vmlinux.h include in BPF files, so we define them here.
+ *
+ * Following functions are taken from kernel sources and
+ * break aliasing rules in their original form.
+ *
+ * While kernel is compiled with -fno-strict-aliasing,
+ * perf uses -Wstrict-aliasing=3 which makes build fail
+ * under gcc 4.4.
+ *
+ * Using extra __may_alias__ type to allow aliasing
+ * in this case.
+ */
+typedef __u8 __attribute__((__may_alias__)) __u8_alias_t;
+typedef __u16 __attribute__((__may_alias__)) __u16_alias_t;
+typedef __u32 __attribute__((__may_alias__)) __u32_alias_t;
+typedef __u64 __attribute__((__may_alias__)) __u64_alias_t;
+
+static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(__u8_alias_t *) res = *(volatile __u8_alias_t *) p; break;
+ case 2: *(__u16_alias_t *) res = *(volatile __u16_alias_t *) p; break;
+ case 4: *(__u32_alias_t *) res = *(volatile __u32_alias_t *) p; break;
+ case 8: *(__u64_alias_t *) res = *(volatile __u64_alias_t *) p; break;
+ default:
+ asm volatile ("" : : : "memory");
+ __builtin_memcpy((void *)res, (const void *)p, size);
+ asm volatile ("" : : : "memory");
+ }
+}
+
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(volatile __u8_alias_t *) p = *(__u8_alias_t *) res; break;
+ case 2: *(volatile __u16_alias_t *) p = *(__u16_alias_t *) res; break;
+ case 4: *(volatile __u32_alias_t *) p = *(__u32_alias_t *) res; break;
+ case 8: *(volatile __u64_alias_t *) p = *(__u64_alias_t *) res; break;
+ default:
+ asm volatile ("" : : : "memory");
+ __builtin_memcpy((void *)p, (const void *)res, size);
+ asm volatile ("" : : : "memory");
+ }
+}
+
+#define READ_ONCE(x) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u = \
+ { .__c = { 0 } }; \
+ __read_once_size(&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
+
+#define WRITE_ONCE(x, val) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u = \
+ { .__val = (val) }; \
+ __write_once_size(&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
+
+/* Add a value using relaxed read and relaxed write. Less expensive than
+ * fetch_add when there is no write concurrency.
+ */
+#define NO_TEAR_ADD(x, val) WRITE_ONCE((x), READ_ONCE(x) + (val))
+#define NO_TEAR_INC(x) NO_TEAR_ADD((x), 1)
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#endif
diff --git a/samples/bpf/xdp_sample_shared.h b/samples/bpf/xdp_sample_shared.h
new file mode 100644
index 000000000000..8a7669a5d563
--- /dev/null
+++ b/samples/bpf/xdp_sample_shared.h
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef _XDP_SAMPLE_SHARED_H
+#define _XDP_SAMPLE_SHARED_H
+
+struct datarec {
+ size_t processed;
+ size_t dropped;
+ size_t issue;
+ union {
+ size_t xdp_pass;
+ size_t info;
+ };
+ size_t xdp_drop;
+ size_t xdp_redirect;
+} __attribute__((aligned(64)));
+
+#endif
diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c
new file mode 100644
index 000000000000..b32d82178199
--- /dev/null
+++ b/samples/bpf/xdp_sample_user.c
@@ -0,0 +1,1673 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/ethtool.h>
+#include <linux/hashtable.h>
+#include <linux/if_link.h>
+#include <linux/jhash.h>
+#include <linux/limits.h>
+#include <linux/list.h>
+#include <linux/sockios.h>
+#include <locale.h>
+#include <math.h>
+#include <net/if.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/signalfd.h>
+#include <sys/sysinfo.h>
+#include <sys/timerfd.h>
+#include <sys/utsname.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+
+#define __sample_print(fmt, cond, ...) \
+ ({ \
+ if (cond) \
+ printf(fmt, ##__VA_ARGS__); \
+ })
+
+#define print_always(fmt, ...) __sample_print(fmt, 1, ##__VA_ARGS__)
+#define print_default(fmt, ...) \
+ __sample_print(fmt, sample_log_level & LL_DEFAULT, ##__VA_ARGS__)
+#define __print_err(err, fmt, ...) \
+ ({ \
+ __sample_print(fmt, err > 0 || sample_log_level & LL_DEFAULT, \
+ ##__VA_ARGS__); \
+ sample_err_exp = sample_err_exp ? true : err > 0; \
+ })
+#define print_err(err, fmt, ...) __print_err(err, fmt, ##__VA_ARGS__)
+
+#define __COLUMN(x) "%'10" x " %-13s"
+#define FMT_COLUMNf __COLUMN(".0f")
+#define FMT_COLUMNd __COLUMN("d")
+#define FMT_COLUMNl __COLUMN("llu")
+#define RX(rx) rx, "rx/s"
+#define PPS(pps) pps, "pkt/s"
+#define DROP(drop) drop, "drop/s"
+#define ERR(err) err, "error/s"
+#define HITS(hits) hits, "hit/s"
+#define XMIT(xmit) xmit, "xmit/s"
+#define PASS(pass) pass, "pass/s"
+#define REDIR(redir) redir, "redir/s"
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+
+#define XDP_UNKNOWN (XDP_REDIRECT + 1)
+#define XDP_ACTION_MAX (XDP_UNKNOWN + 1)
+#define XDP_REDIRECT_ERR_MAX 7
+
+enum map_type {
+ MAP_RX,
+ MAP_REDIRECT_ERR,
+ MAP_CPUMAP_ENQUEUE,
+ MAP_CPUMAP_KTHREAD,
+ MAP_EXCEPTION,
+ MAP_DEVMAP_XMIT,
+ MAP_DEVMAP_XMIT_MULTI,
+ NUM_MAP,
+};
+
+enum log_level {
+ LL_DEFAULT = 1U << 0,
+ LL_SIMPLE = 1U << 1,
+ LL_DEBUG = 1U << 2,
+};
+
+struct record {
+ __u64 timestamp;
+ struct datarec total;
+ struct datarec *cpu;
+};
+
+struct map_entry {
+ struct hlist_node node;
+ __u64 pair;
+ struct record val;
+};
+
+struct stats_record {
+ struct record rx_cnt;
+ struct record redir_err[XDP_REDIRECT_ERR_MAX];
+ struct record kthread;
+ struct record exception[XDP_ACTION_MAX];
+ struct record devmap_xmit;
+ DECLARE_HASHTABLE(xmit_map, 5);
+ struct record enq[];
+};
+
+struct sample_output {
+ struct {
+ __u64 rx;
+ __u64 redir;
+ __u64 drop;
+ __u64 drop_xmit;
+ __u64 err;
+ __u64 xmit;
+ } totals;
+ struct {
+ __u64 pps;
+ __u64 drop;
+ __u64 err;
+ } rx_cnt;
+ struct {
+ __u64 suc;
+ __u64 err;
+ } redir_cnt;
+ struct {
+ __u64 hits;
+ } except_cnt;
+ struct {
+ __u64 pps;
+ __u64 drop;
+ __u64 err;
+ double bavg;
+ } xmit_cnt;
+};
+
+struct xdp_desc {
+ int ifindex;
+ __u32 prog_id;
+ int flags;
+} sample_xdp_progs[32];
+
+struct datarec *sample_mmap[NUM_MAP];
+struct bpf_map *sample_map[NUM_MAP];
+size_t sample_map_count[NUM_MAP];
+enum log_level sample_log_level;
+struct sample_output sample_out;
+unsigned long sample_interval;
+bool sample_err_exp;
+int sample_xdp_cnt;
+int sample_n_cpus;
+int sample_sig_fd;
+int sample_mask;
+
+static const char *xdp_redirect_err_names[XDP_REDIRECT_ERR_MAX] = {
+ /* Key=1 keeps unknown errors */
+ "Success",
+ "Unknown",
+ "EINVAL",
+ "ENETDOWN",
+ "EMSGSIZE",
+ "EOPNOTSUPP",
+ "ENOSPC",
+};
+
+/* Keyed from Unknown */
+static const char *xdp_redirect_err_help[XDP_REDIRECT_ERR_MAX - 1] = {
+ "Unknown error",
+ "Invalid redirection",
+ "Device being redirected to is down",
+ "Packet length too large for device",
+ "Operation not supported",
+ "No space in ptr_ring of cpumap kthread",
+};
+
+static const char *xdp_action_names[XDP_ACTION_MAX] = {
+ [XDP_ABORTED] = "XDP_ABORTED",
+ [XDP_DROP] = "XDP_DROP",
+ [XDP_PASS] = "XDP_PASS",
+ [XDP_TX] = "XDP_TX",
+ [XDP_REDIRECT] = "XDP_REDIRECT",
+ [XDP_UNKNOWN] = "XDP_UNKNOWN",
+};
+
+static __u64 gettime(void)
+{
+ struct timespec t;
+ int res;
+
+ res = clock_gettime(CLOCK_MONOTONIC, &t);
+ if (res < 0) {
+ fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
+ return UINT64_MAX;
+ }
+ return (__u64)t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+}
+
+static const char *action2str(int action)
+{
+ if (action < XDP_ACTION_MAX)
+ return xdp_action_names[action];
+ return NULL;
+}
+
+static void sample_print_help(int mask)
+{
+ printf("Output format description\n\n"
+ "By default, redirect success statistics are disabled, use -s to enable.\n"
+ "The terse output mode is default, verbose mode can be activated using -v\n"
+ "Use SIGQUIT (Ctrl + \\) to switch the mode dynamically at runtime\n\n"
+ "Terse mode displays at most the following fields:\n"
+ " rx/s Number of packets received per second\n"
+ " redir/s Number of packets successfully redirected per second\n"
+ " err,drop/s Aggregated count of errors per second (including dropped packets)\n"
+ " xmit/s Number of packets transmitted on the output device per second\n\n"
+ "Output description for verbose mode:\n"
+ " FIELD DESCRIPTION\n");
+
+ if (mask & SAMPLE_RX_CNT) {
+ printf(" receive\t\tDisplays the number of packets received & errors encountered\n"
+ " \t\t\tWhenever an error or packet drop occurs, details of per CPU error\n"
+ " \t\t\tand drop statistics will be expanded inline in terse mode.\n"
+ " \t\t\t\tpkt/s - Packets received per second\n"
+ " \t\t\t\tdrop/s - Packets dropped per second\n"
+ " \t\t\t\terror/s - Errors encountered per second\n\n");
+ }
+ if (mask & (SAMPLE_REDIRECT_CNT | SAMPLE_REDIRECT_ERR_CNT)) {
+ printf(" redirect\t\tDisplays the number of packets successfully redirected\n"
+ " \t\t\tErrors encountered are expanded under redirect_err field\n"
+ " \t\t\tNote that passing -s to enable it has a per packet overhead\n"
+ " \t\t\t\tredir/s - Packets redirected successfully per second\n\n"
+ " redirect_err\t\tDisplays the number of packets that failed redirection\n"
+ " \t\t\tThe errno is expanded under this field with per CPU count\n"
+ " \t\t\tThe recognized errors are:\n");
+
+ for (int i = 2; i < XDP_REDIRECT_ERR_MAX; i++)
+ printf("\t\t\t %s: %s\n", xdp_redirect_err_names[i],
+ xdp_redirect_err_help[i - 1]);
+
+ printf(" \n\t\t\t\terror/s - Packets that failed redirection per second\n\n");
+ }
+
+ if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT) {
+ printf(" enqueue to cpu N\tDisplays the number of packets enqueued to bulk queue of CPU N\n"
+ " \t\t\tExpands to cpu:FROM->N to display enqueue stats for each CPU enqueuing to CPU N\n"
+ " \t\t\tReceived packets can be associated with the CPU redirect program is enqueuing \n"
+ " \t\t\tpackets to.\n"
+ " \t\t\t\tpkt/s - Packets enqueued per second from other CPU to CPU N\n"
+ " \t\t\t\tdrop/s - Packets dropped when trying to enqueue to CPU N\n"
+ " \t\t\t\tbulk-avg - Average number of packets processed for each event\n\n");
+ }
+
+ if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+ printf(" kthread\t\tDisplays the number of packets processed in CPUMAP kthread for each CPU\n"
+ " \t\t\tPackets consumed from ptr_ring in kthread, and its xdp_stats (after calling \n"
+ " \t\t\tCPUMAP bpf prog) are expanded below this. xdp_stats are expanded as a total and\n"
+ " \t\t\tthen per-CPU to associate it to each CPU's pinned CPUMAP kthread.\n"
+ " \t\t\t\tpkt/s - Packets consumed per second from ptr_ring\n"
+ " \t\t\t\tdrop/s - Packets dropped per second in kthread\n"
+ " \t\t\t\tsched - Number of times kthread called schedule()\n\n"
+ " \t\t\txdp_stats (also expands to per-CPU counts)\n"
+ " \t\t\t\tpass/s - XDP_PASS count for CPUMAP program execution\n"
+ " \t\t\t\tdrop/s - XDP_DROP count for CPUMAP program execution\n"
+ " \t\t\t\tredir/s - XDP_REDIRECT count for CPUMAP program execution\n\n");
+ }
+
+ if (mask & SAMPLE_EXCEPTION_CNT) {
+ printf(" xdp_exception\t\tDisplays xdp_exception tracepoint events\n"
+ " \t\t\tThis can occur due to internal driver errors, unrecognized\n"
+ " \t\t\tXDP actions and due to explicit user trigger by use of XDP_ABORTED\n"
+ " \t\t\tEach action is expanded below this field with its count\n"
+ " \t\t\t\thit/s - Number of times the tracepoint was hit per second\n\n");
+ }
+
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT) {
+ printf(" devmap_xmit\t\tDisplays devmap_xmit tracepoint events\n"
+ " \t\t\tThis tracepoint is invoked for successful transmissions on output\n"
+ " \t\t\tdevice but these statistics are not available for generic XDP mode,\n"
+ " \t\t\thence they will be omitted from the output when using SKB mode\n"
+ " \t\t\t\txmit/s - Number of packets that were transmitted per second\n"
+ " \t\t\t\tdrop/s - Number of packets that failed transmissions per second\n"
+ " \t\t\t\tdrv_err/s - Number of internal driver errors per second\n"
+ " \t\t\t\tbulk-avg - Average number of packets processed for each event\n\n");
+ }
+}
+
+void sample_usage(char *argv[], const struct option *long_options,
+ const char *doc, int mask, bool error)
+{
+ int i;
+
+ if (!error)
+ sample_print_help(mask);
+
+ printf("\n%s\nOption for %s:\n", doc, argv[0]);
+ for (i = 0; long_options[i].name != 0; i++) {
+ printf(" --%-15s", long_options[i].name);
+ if (long_options[i].flag != NULL)
+ printf(" flag (internal value: %d)",
+ *long_options[i].flag);
+ else
+ printf("\t short-option: -%c", long_options[i].val);
+ printf("\n");
+ }
+ printf("\n");
+}
+
+static struct datarec *alloc_record_per_cpu(void)
+{
+ unsigned int nr_cpus = libbpf_num_possible_cpus();
+ struct datarec *array;
+
+ array = calloc(nr_cpus, sizeof(*array));
+ if (!array) {
+ fprintf(stderr, "Failed to allocate memory (nr_cpus: %u)\n",
+ nr_cpus);
+ return NULL;
+ }
+ return array;
+}
+
+static int map_entry_init(struct map_entry *e, __u64 pair)
+{
+ e->pair = pair;
+ INIT_HLIST_NODE(&e->node);
+ e->val.timestamp = gettime();
+ e->val.cpu = alloc_record_per_cpu();
+ if (!e->val.cpu)
+ return -ENOMEM;
+ return 0;
+}
+
+static void map_collect_percpu(struct datarec *values, struct record *rec)
+{
+ /* For percpu maps, userspace gets a value per possible CPU */
+ unsigned int nr_cpus = libbpf_num_possible_cpus();
+ __u64 sum_xdp_redirect = 0;
+ __u64 sum_processed = 0;
+ __u64 sum_xdp_pass = 0;
+ __u64 sum_xdp_drop = 0;
+ __u64 sum_dropped = 0;
+ __u64 sum_issue = 0;
+ int i;
+
+ /* Get time as close as possible to reading map contents */
+ rec->timestamp = gettime();
+
+ /* Record and sum values from each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ rec->cpu[i].processed = READ_ONCE(values[i].processed);
+ rec->cpu[i].dropped = READ_ONCE(values[i].dropped);
+ rec->cpu[i].issue = READ_ONCE(values[i].issue);
+ rec->cpu[i].xdp_pass = READ_ONCE(values[i].xdp_pass);
+ rec->cpu[i].xdp_drop = READ_ONCE(values[i].xdp_drop);
+ rec->cpu[i].xdp_redirect = READ_ONCE(values[i].xdp_redirect);
+
+ sum_processed += rec->cpu[i].processed;
+ sum_dropped += rec->cpu[i].dropped;
+ sum_issue += rec->cpu[i].issue;
+ sum_xdp_pass += rec->cpu[i].xdp_pass;
+ sum_xdp_drop += rec->cpu[i].xdp_drop;
+ sum_xdp_redirect += rec->cpu[i].xdp_redirect;
+ }
+
+ rec->total.processed = sum_processed;
+ rec->total.dropped = sum_dropped;
+ rec->total.issue = sum_issue;
+ rec->total.xdp_pass = sum_xdp_pass;
+ rec->total.xdp_drop = sum_xdp_drop;
+ rec->total.xdp_redirect = sum_xdp_redirect;
+}
+
+static int map_collect_percpu_devmap(int map_fd, struct stats_record *rec)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ __u32 batch, count = 32;
+ struct datarec *values;
+ bool init = false;
+ __u64 *keys;
+ int i, ret;
+
+ keys = calloc(count, sizeof(__u64));
+ if (!keys)
+ return -ENOMEM;
+ values = calloc(count * nr_cpus, sizeof(struct datarec));
+ if (!values) {
+ free(keys);
+ return -ENOMEM;
+ }
+
+ for (;;) {
+ bool exit = false;
+
+ ret = bpf_map_lookup_batch(map_fd, init ? &batch : NULL, &batch,
+ keys, values, &count, NULL);
+ if (ret < 0 && errno != ENOENT)
+ break;
+ if (errno == ENOENT)
+ exit = true;
+
+ init = true;
+ for (i = 0; i < count; i++) {
+ struct map_entry *e, *x = NULL;
+ __u64 pair = keys[i];
+ struct datarec *arr;
+
+ arr = &values[i * nr_cpus];
+ hash_for_each_possible(rec->xmit_map, e, node, pair) {
+ if (e->pair == pair) {
+ x = e;
+ break;
+ }
+ }
+ if (!x) {
+ x = calloc(1, sizeof(*x));
+ if (!x)
+ goto cleanup;
+ if (map_entry_init(x, pair) < 0) {
+ free(x);
+ goto cleanup;
+ }
+ hash_add(rec->xmit_map, &x->node, pair);
+ }
+ map_collect_percpu(arr, &x->val);
+ }
+
+ if (exit)
+ break;
+ count = 32;
+ }
+
+ free(values);
+ free(keys);
+ return 0;
+cleanup:
+ free(values);
+ free(keys);
+ return -ENOMEM;
+}
+
+static struct stats_record *alloc_stats_record(void)
+{
+ struct stats_record *rec;
+ int i;
+
+ rec = calloc(1, sizeof(*rec) + sample_n_cpus * sizeof(struct record));
+ if (!rec) {
+ fprintf(stderr, "Failed to allocate memory\n");
+ return NULL;
+ }
+
+ if (sample_mask & SAMPLE_RX_CNT) {
+ rec->rx_cnt.cpu = alloc_record_per_cpu();
+ if (!rec->rx_cnt.cpu) {
+ fprintf(stderr,
+ "Failed to allocate rx_cnt per-CPU array\n");
+ goto end_rec;
+ }
+ }
+ if (sample_mask & (SAMPLE_REDIRECT_CNT | SAMPLE_REDIRECT_ERR_CNT)) {
+ for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++) {
+ rec->redir_err[i].cpu = alloc_record_per_cpu();
+ if (!rec->redir_err[i].cpu) {
+ fprintf(stderr,
+ "Failed to allocate redir_err per-CPU array for "
+ "\"%s\" case\n",
+ xdp_redirect_err_names[i]);
+ while (i--)
+ free(rec->redir_err[i].cpu);
+ goto end_rx_cnt;
+ }
+ }
+ }
+ if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+ rec->kthread.cpu = alloc_record_per_cpu();
+ if (!rec->kthread.cpu) {
+ fprintf(stderr,
+ "Failed to allocate kthread per-CPU array\n");
+ goto end_redir;
+ }
+ }
+ if (sample_mask & SAMPLE_EXCEPTION_CNT) {
+ for (i = 0; i < XDP_ACTION_MAX; i++) {
+ rec->exception[i].cpu = alloc_record_per_cpu();
+ if (!rec->exception[i].cpu) {
+ fprintf(stderr,
+ "Failed to allocate exception per-CPU array for "
+ "\"%s\" case\n",
+ action2str(i));
+ while (i--)
+ free(rec->exception[i].cpu);
+ goto end_kthread;
+ }
+ }
+ }
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT) {
+ rec->devmap_xmit.cpu = alloc_record_per_cpu();
+ if (!rec->devmap_xmit.cpu) {
+ fprintf(stderr,
+ "Failed to allocate devmap_xmit per-CPU array\n");
+ goto end_exception;
+ }
+ }
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ hash_init(rec->xmit_map);
+ if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT) {
+ for (i = 0; i < sample_n_cpus; i++) {
+ rec->enq[i].cpu = alloc_record_per_cpu();
+ if (!rec->enq[i].cpu) {
+ fprintf(stderr,
+ "Failed to allocate enqueue per-CPU array for "
+ "CPU %d\n",
+ i);
+ while (i--)
+ free(rec->enq[i].cpu);
+ goto end_devmap_xmit;
+ }
+ }
+ }
+
+ return rec;
+
+end_devmap_xmit:
+ free(rec->devmap_xmit.cpu);
+end_exception:
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ free(rec->exception[i].cpu);
+end_kthread:
+ free(rec->kthread.cpu);
+end_redir:
+ for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++)
+ free(rec->redir_err[i].cpu);
+end_rx_cnt:
+ free(rec->rx_cnt.cpu);
+end_rec:
+ free(rec);
+ return NULL;
+}
+
+static void free_stats_record(struct stats_record *r)
+{
+ struct hlist_node *tmp;
+ struct map_entry *e;
+ int i;
+
+ for (i = 0; i < sample_n_cpus; i++)
+ free(r->enq[i].cpu);
+ hash_for_each_safe(r->xmit_map, i, tmp, e, node) {
+ hash_del(&e->node);
+ free(e->val.cpu);
+ free(e);
+ }
+ free(r->devmap_xmit.cpu);
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ free(r->exception[i].cpu);
+ free(r->kthread.cpu);
+ for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++)
+ free(r->redir_err[i].cpu);
+ free(r->rx_cnt.cpu);
+ free(r);
+}
+
+static double calc_period(struct record *r, struct record *p)
+{
+ double period_ = 0;
+ __u64 period = 0;
+
+ period = r->timestamp - p->timestamp;
+ if (period > 0)
+ period_ = ((double)period / NANOSEC_PER_SEC);
+
+ return period_;
+}
+
+static double sample_round(double val)
+{
+ if (val - floor(val) < 0.5)
+ return floor(val);
+ return ceil(val);
+}
+
+static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->processed - p->processed;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->dropped - p->dropped;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static __u64 calc_errs_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->issue - p->issue;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static __u64 calc_info_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->info - p->info;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static void calc_xdp_pps(struct datarec *r, struct datarec *p, double *xdp_pass,
+ double *xdp_drop, double *xdp_redirect, double period_)
+{
+ *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0;
+ if (period_ > 0) {
+ *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_;
+ *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_;
+ *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_;
+ }
+}
+
+static void stats_get_rx_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus, struct sample_output *out)
+{
+ struct record *rec, *prev;
+ double t, pps, drop, err;
+ int i;
+
+ rec = &stats_rec->rx_cnt;
+ prev = &stats_prev->rx_cnt;
+ t = calc_period(rec, prev);
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ "\n",
+ str, PPS(pps), DROP(drop), ERR(err));
+ }
+
+ if (out) {
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ out->rx_cnt.pps = pps;
+ out->rx_cnt.drop = drop;
+ out->rx_cnt.err = err;
+ out->totals.rx += pps;
+ out->totals.drop += drop;
+ out->totals.err += err;
+ }
+}
+
+static void stats_get_cpumap_enqueue(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus)
+{
+ struct record *rec, *prev;
+ double t, pps, drop, err;
+ int i, to_cpu;
+
+ /* cpumap enqueue stats */
+ for (to_cpu = 0; to_cpu < sample_n_cpus; to_cpu++) {
+ rec = &stats_rec->enq[to_cpu];
+ prev = &stats_prev->enq[to_cpu];
+ t = calc_period(rec, prev);
+
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ if (pps > 0 || drop > 0) {
+ char str[64];
+
+ snprintf(str, sizeof(str), "enqueue to cpu %d", to_cpu);
+
+ if (err > 0)
+ err = pps / err; /* calc average bulk size */
+
+ print_err(drop,
+ " %-20s " FMT_COLUMNf FMT_COLUMNf __COLUMN(
+ ".2f") "\n",
+ str, PPS(pps), DROP(drop), err, "bulk-avg");
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d->%d", i, to_cpu);
+ if (err > 0)
+ err = pps / err; /* calc average bulk size */
+ print_default(
+ " %-18s " FMT_COLUMNf FMT_COLUMNf __COLUMN(
+ ".2f") "\n",
+ str, PPS(pps), DROP(drop), err, "bulk-avg");
+ }
+ }
+}
+
+static void stats_get_cpumap_remote(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus)
+{
+ double xdp_pass, xdp_drop, xdp_redirect;
+ struct record *rec, *prev;
+ double t;
+ int i;
+
+ rec = &stats_rec->kthread;
+ prev = &stats_prev->kthread;
+ t = calc_period(rec, prev);
+
+ calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop,
+ &xdp_redirect, t);
+ if (xdp_pass || xdp_drop || xdp_redirect) {
+ print_err(xdp_drop,
+ " %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf "\n",
+ "xdp_stats", PASS(xdp_pass), DROP(xdp_drop),
+ REDIR(xdp_redirect));
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ calc_xdp_pps(r, p, &xdp_pass, &xdp_drop, &xdp_redirect, t);
+ if (!xdp_pass && !xdp_drop && !xdp_redirect)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-16s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ "\n",
+ str, PASS(xdp_pass), DROP(xdp_drop),
+ REDIR(xdp_redirect));
+ }
+}
+
+static void stats_get_cpumap_kthread(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus)
+{
+ struct record *rec, *prev;
+ double t, pps, drop, err;
+ int i;
+
+ rec = &stats_rec->kthread;
+ prev = &stats_prev->kthread;
+ t = calc_period(rec, prev);
+
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ print_err(drop, " %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf "\n",
+ pps ? "kthread total" : "kthread", PPS(pps), DROP(drop), err,
+ "sched");
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ "\n",
+ str, PPS(pps), DROP(drop), err, "sched");
+ }
+}
+
+static void stats_get_redirect_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out)
+{
+ struct record *rec, *prev;
+ double t, pps;
+ int i;
+
+ rec = &stats_rec->redir_err[0];
+ prev = &stats_prev->redir_err[0];
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ if (!pps)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-18s " FMT_COLUMNf "\n", str, REDIR(pps));
+ }
+
+ if (out) {
+ pps = calc_pps(&rec->total, &prev->total, t);
+ out->redir_cnt.suc = pps;
+ out->totals.redir += pps;
+ }
+}
+
+static void stats_get_redirect_err_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out)
+{
+ struct record *rec, *prev;
+ double t, drop, sum = 0;
+ int rec_i, i;
+
+ for (rec_i = 1; rec_i < XDP_REDIRECT_ERR_MAX; rec_i++) {
+ char str[64];
+
+ rec = &stats_rec->redir_err[rec_i];
+ prev = &stats_prev->redir_err[rec_i];
+ t = calc_period(rec, prev);
+
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ if (drop > 0 && !out) {
+ snprintf(str, sizeof(str),
+ sample_log_level & LL_DEFAULT ? "%s total" :
+ "%s",
+ xdp_redirect_err_names[rec_i]);
+ print_err(drop, " %-18s " FMT_COLUMNf "\n", str,
+ ERR(drop));
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ double drop;
+
+ drop = calc_drop_pps(r, p, t);
+ if (!drop)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-16s" FMT_COLUMNf "\n", str,
+ ERR(drop));
+ }
+
+ sum += drop;
+ }
+
+ if (out) {
+ out->redir_cnt.err = sum;
+ out->totals.err += sum;
+ }
+}
+
+static void stats_get_exception_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out)
+{
+ double t, drop, sum = 0;
+ struct record *rec, *prev;
+ int rec_i, i;
+
+ for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
+ rec = &stats_rec->exception[rec_i];
+ prev = &stats_prev->exception[rec_i];
+ t = calc_period(rec, prev);
+
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ /* Fold out errors after heading */
+ sum += drop;
+
+ if (drop > 0 && !out) {
+ print_always(" %-18s " FMT_COLUMNf "\n",
+ action2str(rec_i), ERR(drop));
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+ double drop;
+
+ drop = calc_drop_pps(r, p, t);
+ if (!drop)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-16s" FMT_COLUMNf "\n",
+ str, ERR(drop));
+ }
+ }
+ }
+
+ if (out) {
+ out->except_cnt.hits = sum;
+ out->totals.err += sum;
+ }
+}
+
+static void stats_get_devmap_xmit(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out)
+{
+ double pps, drop, info, err;
+ struct record *rec, *prev;
+ double t;
+ int i;
+
+ rec = &stats_rec->devmap_xmit;
+ prev = &stats_prev->devmap_xmit;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ info = calc_info_pps(r, p, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+ print_default(" %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ __COLUMN(".2f") "\n",
+ str, XMIT(pps), DROP(drop), err, "drv_err/s",
+ info, "bulk-avg");
+ }
+ if (out) {
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ info = calc_info_pps(&rec->total, &prev->total, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ out->xmit_cnt.pps = pps;
+ out->xmit_cnt.drop = drop;
+ out->xmit_cnt.bavg = info;
+ out->xmit_cnt.err = err;
+ out->totals.xmit += pps;
+ out->totals.drop_xmit += drop;
+ out->totals.err += err;
+ }
+}
+
+static void stats_get_devmap_xmit_multi(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out,
+ bool xmit_total)
+{
+ double pps, drop, info, err;
+ struct map_entry *entry;
+ struct record *r, *p;
+ double t;
+ int bkt;
+
+ hash_for_each(stats_rec->xmit_map, bkt, entry, node) {
+ struct map_entry *e, *x = NULL;
+ char ifname_from[IFNAMSIZ];
+ char ifname_to[IFNAMSIZ];
+ const char *fstr, *tstr;
+ unsigned long prev_time;
+ struct record beg = {};
+ __u32 from_idx, to_idx;
+ char str[128];
+ __u64 pair;
+ int i;
+
+ prev_time = sample_interval * NANOSEC_PER_SEC;
+
+ pair = entry->pair;
+ from_idx = pair >> 32;
+ to_idx = pair & 0xFFFFFFFF;
+
+ r = &entry->val;
+ beg.timestamp = r->timestamp - prev_time;
+
+ /* Find matching entry from stats_prev map */
+ hash_for_each_possible(stats_prev->xmit_map, e, node, pair) {
+ if (e->pair == pair) {
+ x = e;
+ break;
+ }
+ }
+ if (x)
+ p = &x->val;
+ else
+ p = &beg;
+ t = calc_period(r, p);
+ pps = calc_pps(&r->total, &p->total, t);
+ drop = calc_drop_pps(&r->total, &p->total, t);
+ info = calc_info_pps(&r->total, &p->total, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+ err = calc_errs_pps(&r->total, &p->total, t);
+
+ if (out) {
+ /* We are responsible for filling out totals */
+ out->totals.xmit += pps;
+ out->totals.drop_xmit += drop;
+ out->totals.err += err;
+ continue;
+ }
+
+ fstr = tstr = NULL;
+ if (if_indextoname(from_idx, ifname_from))
+ fstr = ifname_from;
+ if (if_indextoname(to_idx, ifname_to))
+ tstr = ifname_to;
+
+ snprintf(str, sizeof(str), "xmit %s->%s", fstr ?: "?",
+ tstr ?: "?");
+ /* Skip idle streams of redirection */
+ if (pps || drop || err) {
+ print_err(drop,
+ " %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ __COLUMN(".2f") "\n", str, XMIT(pps), DROP(drop),
+ err, "drv_err/s", info, "bulk-avg");
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *rc = &r->cpu[i];
+ struct datarec *pc, p_beg = {};
+ char str[64];
+
+ pc = p == &beg ? &p_beg : &p->cpu[i];
+
+ pps = calc_pps(rc, pc, t);
+ drop = calc_drop_pps(rc, pc, t);
+ err = calc_errs_pps(rc, pc, t);
+
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ info = calc_info_pps(rc, pc, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+
+ print_default(" %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ __COLUMN(".2f") "\n", str, XMIT(pps),
+ DROP(drop), err, "drv_err/s", info, "bulk-avg");
+ }
+ }
+}
+
+static void stats_print(const char *prefix, int mask, struct stats_record *r,
+ struct stats_record *p, struct sample_output *out)
+{
+ int nr_cpus = libbpf_num_possible_cpus();
+ const char *str;
+
+ print_always("%-23s", prefix ?: "Summary");
+ if (mask & SAMPLE_RX_CNT)
+ print_always(FMT_COLUMNl, RX(out->totals.rx));
+ if (mask & SAMPLE_REDIRECT_CNT)
+ print_always(FMT_COLUMNl, REDIR(out->totals.redir));
+ printf(FMT_COLUMNl,
+ out->totals.err + out->totals.drop + out->totals.drop_xmit,
+ "err,drop/s");
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT ||
+ mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ printf(FMT_COLUMNl, XMIT(out->totals.xmit));
+ printf("\n");
+
+ if (mask & SAMPLE_RX_CNT) {
+ str = (sample_log_level & LL_DEFAULT) && out->rx_cnt.pps ?
+ "receive total" :
+ "receive";
+ print_err((out->rx_cnt.err || out->rx_cnt.drop),
+ " %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl "\n",
+ str, PPS(out->rx_cnt.pps), DROP(out->rx_cnt.drop),
+ ERR(out->rx_cnt.err));
+
+ stats_get_rx_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT)
+ stats_get_cpumap_enqueue(r, p, nr_cpus);
+
+ if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+ stats_get_cpumap_kthread(r, p, nr_cpus);
+ stats_get_cpumap_remote(r, p, nr_cpus);
+ }
+
+ if (mask & SAMPLE_REDIRECT_CNT) {
+ str = out->redir_cnt.suc ? "redirect total" : "redirect";
+ print_default(" %-20s " FMT_COLUMNl "\n", str,
+ REDIR(out->redir_cnt.suc));
+
+ stats_get_redirect_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_REDIRECT_ERR_CNT) {
+ str = (sample_log_level & LL_DEFAULT) && out->redir_cnt.err ?
+ "redirect_err total" :
+ "redirect_err";
+ print_err(out->redir_cnt.err, " %-20s " FMT_COLUMNl "\n", str,
+ ERR(out->redir_cnt.err));
+
+ stats_get_redirect_err_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_EXCEPTION_CNT) {
+ str = out->except_cnt.hits ? "xdp_exception total" :
+ "xdp_exception";
+
+ print_err(out->except_cnt.hits, " %-20s " FMT_COLUMNl "\n", str,
+ HITS(out->except_cnt.hits));
+
+ stats_get_exception_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT) {
+ str = (sample_log_level & LL_DEFAULT) && out->xmit_cnt.pps ?
+ "devmap_xmit total" :
+ "devmap_xmit";
+
+ print_err(out->xmit_cnt.err || out->xmit_cnt.drop,
+ " %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl
+ __COLUMN(".2f") "\n",
+ str, XMIT(out->xmit_cnt.pps),
+ DROP(out->xmit_cnt.drop), out->xmit_cnt.err,
+ "drv_err/s", out->xmit_cnt.bavg, "bulk-avg");
+
+ stats_get_devmap_xmit(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ stats_get_devmap_xmit_multi(r, p, nr_cpus, NULL,
+ mask & SAMPLE_DEVMAP_XMIT_CNT);
+
+ if (sample_log_level & LL_DEFAULT ||
+ ((sample_log_level & LL_SIMPLE) && sample_err_exp)) {
+ sample_err_exp = false;
+ printf("\n");
+ }
+}
+
+int sample_setup_maps(struct bpf_map **maps)
+{
+ sample_n_cpus = libbpf_num_possible_cpus();
+
+ for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) {
+ sample_map[i] = maps[i];
+
+ switch (i) {
+ case MAP_RX:
+ case MAP_CPUMAP_KTHREAD:
+ case MAP_DEVMAP_XMIT:
+ sample_map_count[i] = sample_n_cpus;
+ break;
+ case MAP_REDIRECT_ERR:
+ sample_map_count[i] =
+ XDP_REDIRECT_ERR_MAX * sample_n_cpus;
+ break;
+ case MAP_EXCEPTION:
+ sample_map_count[i] = XDP_ACTION_MAX * sample_n_cpus;
+ case MAP_CPUMAP_ENQUEUE:
+ sample_map_count[i] = sample_n_cpus * sample_n_cpus;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (bpf_map__resize(sample_map[i], sample_map_count[i]) < 0)
+ return -errno;
+ }
+ sample_map[MAP_DEVMAP_XMIT_MULTI] = maps[MAP_DEVMAP_XMIT_MULTI];
+ return 0;
+}
+
+static int sample_setup_maps_mappings(void)
+{
+ for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) {
+ size_t size = sample_map_count[i] * sizeof(struct datarec);
+
+ sample_mmap[i] = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, bpf_map__fd(sample_map[i]), 0);
+ if (sample_mmap[i] == MAP_FAILED)
+ return -errno;
+ }
+ return 0;
+}
+
+int __sample_init(int mask)
+{
+ sigset_t st;
+
+ sigemptyset(&st);
+ sigaddset(&st, SIGQUIT);
+ sigaddset(&st, SIGINT);
+ sigaddset(&st, SIGTERM);
+
+ if (sigprocmask(SIG_BLOCK, &st, NULL) < 0)
+ return -errno;
+
+ sample_sig_fd = signalfd(-1, &st, SFD_CLOEXEC | SFD_NONBLOCK);
+ if (sample_sig_fd < 0)
+ return -errno;
+
+ sample_mask = mask;
+
+ return sample_setup_maps_mappings();
+}
+
+static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags)
+{
+ __u32 cur_prog_id = 0;
+ int ret;
+
+ if (prog_id) {
+ ret = bpf_get_link_xdp_id(ifindex, &cur_prog_id, xdp_flags);
+ if (ret < 0)
+ return -errno;
+
+ if (prog_id != cur_prog_id) {
+ print_always(
+ "Program on ifindex %d does not match installed "
+ "program, skipping unload\n",
+ ifindex);
+ return -ENOENT;
+ }
+ }
+
+ return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+}
+
+int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
+ bool force)
+{
+ int ret, xdp_flags = 0;
+ __u32 prog_id = 0;
+
+ if (sample_xdp_cnt == 32) {
+ fprintf(stderr,
+ "Total limit for installed XDP programs in a sample reached\n");
+ return -ENOTSUP;
+ }
+
+ xdp_flags |= !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0;
+ xdp_flags |= generic ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
+ ret = bpf_set_link_xdp_fd(ifindex, bpf_program__fd(xdp_prog),
+ xdp_flags);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "Failed to install program \"%s\" on ifindex %d, mode = %s, "
+ "force = %s: %s\n",
+ bpf_program__name(xdp_prog), ifindex,
+ generic ? "skb" : "native", force ? "true" : "false",
+ strerror(-ret));
+ return ret;
+ }
+
+ ret = bpf_get_link_xdp_id(ifindex, &prog_id, xdp_flags);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "Failed to get XDP program id for ifindex %d, removing program: %s\n",
+ ifindex, strerror(errno));
+ __sample_remove_xdp(ifindex, 0, xdp_flags);
+ return ret;
+ }
+ sample_xdp_progs[sample_xdp_cnt++] =
+ (struct xdp_desc){ ifindex, prog_id, xdp_flags };
+
+ return 0;
+}
+
+static void sample_summary_print(void)
+{
+ double period = sample_out.rx_cnt.pps;
+
+ if (sample_out.totals.rx) {
+ double pkts = sample_out.totals.rx;
+
+ print_always(" Packets received : %'-10llu\n",
+ sample_out.totals.rx);
+ print_always(" Average packets/s : %'-10.0f\n",
+ sample_round(pkts / period));
+ }
+ if (sample_out.totals.redir) {
+ double pkts = sample_out.totals.redir;
+
+ print_always(" Packets redirected : %'-10llu\n",
+ sample_out.totals.redir);
+ print_always(" Average redir/s : %'-10.0f\n",
+ sample_round(pkts / period));
+ }
+ if (sample_out.totals.drop)
+ print_always(" Rx dropped : %'-10llu\n",
+ sample_out.totals.drop);
+ if (sample_out.totals.drop_xmit)
+ print_always(" Tx dropped : %'-10llu\n",
+ sample_out.totals.drop_xmit);
+ if (sample_out.totals.err)
+ print_always(" Errors recorded : %'-10llu\n",
+ sample_out.totals.err);
+ if (sample_out.totals.xmit) {
+ double pkts = sample_out.totals.xmit;
+
+ print_always(" Packets transmitted : %'-10llu\n",
+ sample_out.totals.xmit);
+ print_always(" Average transmit/s : %'-10.0f\n",
+ sample_round(pkts / period));
+ }
+}
+
+void sample_exit(int status)
+{
+ size_t size;
+
+ for (int i = 0; i < NUM_MAP; i++) {
+ size = sample_map_count[i] * sizeof(**sample_mmap);
+ munmap(sample_mmap[i], size);
+ }
+ while (sample_xdp_cnt--) {
+ int i = sample_xdp_cnt, ifindex, xdp_flags;
+ __u32 prog_id;
+
+ prog_id = sample_xdp_progs[i].prog_id;
+ ifindex = sample_xdp_progs[i].ifindex;
+ xdp_flags = sample_xdp_progs[i].flags;
+
+ __sample_remove_xdp(ifindex, prog_id, xdp_flags);
+ }
+ sample_summary_print();
+ close(sample_sig_fd);
+ exit(status);
+}
+
+static int sample_stats_collect(struct stats_record *rec)
+{
+ int i;
+
+ if (sample_mask & SAMPLE_RX_CNT)
+ map_collect_percpu(sample_mmap[MAP_RX], &rec->rx_cnt);
+
+ if (sample_mask & SAMPLE_REDIRECT_CNT)
+ map_collect_percpu(sample_mmap[MAP_REDIRECT_ERR], &rec->redir_err[0]);
+
+ if (sample_mask & SAMPLE_REDIRECT_ERR_CNT) {
+ for (i = 1; i < XDP_REDIRECT_ERR_MAX; i++)
+ map_collect_percpu(&sample_mmap[MAP_REDIRECT_ERR][i * sample_n_cpus],
+ &rec->redir_err[i]);
+ }
+
+ if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT)
+ for (i = 0; i < sample_n_cpus; i++)
+ map_collect_percpu(&sample_mmap[MAP_CPUMAP_ENQUEUE][i * sample_n_cpus],
+ &rec->enq[i]);
+
+ if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT)
+ map_collect_percpu(sample_mmap[MAP_CPUMAP_KTHREAD],
+ &rec->kthread);
+
+ if (sample_mask & SAMPLE_EXCEPTION_CNT)
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ map_collect_percpu(&sample_mmap[MAP_EXCEPTION][i * sample_n_cpus],
+ &rec->exception[i]);
+
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT)
+ map_collect_percpu(sample_mmap[MAP_DEVMAP_XMIT], &rec->devmap_xmit);
+
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) {
+ if (map_collect_percpu_devmap(bpf_map__fd(sample_map[MAP_DEVMAP_XMIT_MULTI]), rec) < 0)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void sample_summary_update(struct sample_output *out, int interval)
+{
+ sample_out.totals.rx += out->totals.rx;
+ sample_out.totals.redir += out->totals.redir;
+ sample_out.totals.drop += out->totals.drop;
+ sample_out.totals.drop_xmit += out->totals.drop_xmit;
+ sample_out.totals.err += out->totals.err;
+ sample_out.totals.xmit += out->totals.xmit;
+ sample_out.rx_cnt.pps += interval;
+}
+
+static void sample_stats_print(int mask, struct stats_record *cur,
+ struct stats_record *prev, char *prog_name,
+ int interval)
+{
+ struct sample_output out = {};
+
+ if (mask & SAMPLE_RX_CNT)
+ stats_get_rx_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_REDIRECT_CNT)
+ stats_get_redirect_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_REDIRECT_ERR_CNT)
+ stats_get_redirect_err_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_EXCEPTION_CNT)
+ stats_get_exception_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT)
+ stats_get_devmap_xmit(cur, prev, 0, &out);
+ else if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ stats_get_devmap_xmit_multi(cur, prev, 0, &out,
+ mask & SAMPLE_DEVMAP_XMIT_CNT);
+ sample_summary_update(&out, interval);
+
+ stats_print(prog_name, mask, cur, prev, &out);
+}
+
+void sample_switch_mode(void)
+{
+ sample_log_level ^= LL_DEBUG - 1;
+}
+
+static int sample_signal_cb(void)
+{
+ struct signalfd_siginfo si;
+ int r;
+
+ r = read(sample_sig_fd, &si, sizeof(si));
+ if (r < 0)
+ return -errno;
+
+ switch (si.ssi_signo) {
+ case SIGQUIT:
+ sample_switch_mode();
+ printf("\n");
+ break;
+ default:
+ printf("\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Pointer swap trick */
+static void swap(struct stats_record **a, struct stats_record **b)
+{
+ struct stats_record *tmp;
+
+ tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
+static int sample_timer_cb(int timerfd, struct stats_record **rec,
+ struct stats_record **prev, int interval)
+{
+ char line[64] = "Summary";
+ int ret;
+ __u64 t;
+
+ ret = read(timerfd, &t, sizeof(t));
+ if (ret < 0)
+ return -errno;
+
+ swap(prev, rec);
+ ret = sample_stats_collect(*rec);
+ if (ret < 0)
+ return ret;
+
+ if (sample_xdp_cnt == 2 && !(sample_mask & SAMPLE_SKIP_HEADING)) {
+ char fi[IFNAMSIZ];
+ char to[IFNAMSIZ];
+ const char *f, *t;
+
+ f = t = NULL;
+ if (if_indextoname(sample_xdp_progs[0].ifindex, fi))
+ f = fi;
+ if (if_indextoname(sample_xdp_progs[1].ifindex, to))
+ t = to;
+
+ snprintf(line, sizeof(line), "%s->%s", f ?: "?", t ?: "?");
+ }
+
+ sample_stats_print(sample_mask, *rec, *prev, line, interval);
+ return 0;
+}
+
+int sample_run(int interval, void (*post_cb)(void *), void *ctx)
+{
+ struct timespec ts = { interval, 0 };
+ struct itimerspec its = { ts, ts };
+ struct stats_record *rec, *prev;
+ struct pollfd pfd[2] = {};
+ int timerfd, ret;
+
+ if (!interval) {
+ fprintf(stderr, "Incorrect interval 0\n");
+ return -EINVAL;
+ }
+ sample_interval = interval;
+ /* Pretty print numbers */
+ setlocale(LC_NUMERIC, "en_US.UTF-8");
+
+ timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
+ if (timerfd < 0)
+ return -errno;
+ timerfd_settime(timerfd, 0, &its, NULL);
+
+ pfd[0].fd = sample_sig_fd;
+ pfd[0].events = POLLIN;
+
+ pfd[1].fd = timerfd;
+ pfd[1].events = POLLIN;
+
+ ret = -ENOMEM;
+ rec = alloc_stats_record();
+ if (!rec)
+ goto end;
+ prev = alloc_stats_record();
+ if (!prev)
+ goto end_rec;
+
+ ret = sample_stats_collect(rec);
+ if (ret < 0)
+ goto end_rec_prev;
+
+ for (;;) {
+ ret = poll(pfd, 2, -1);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ else
+ break;
+ }
+
+ if (pfd[0].revents & POLLIN)
+ ret = sample_signal_cb();
+ else if (pfd[1].revents & POLLIN)
+ ret = sample_timer_cb(timerfd, &rec, &prev, interval);
+
+ if (ret)
+ break;
+
+ if (post_cb)
+ post_cb(ctx);
+ }
+
+end_rec_prev:
+ free_stats_record(prev);
+end_rec:
+ free_stats_record(rec);
+end:
+ close(timerfd);
+
+ return ret;
+}
+
+const char *get_driver_name(int ifindex)
+{
+ struct ethtool_drvinfo drv = {};
+ char ifname[IF_NAMESIZE];
+ static char drvname[32];
+ struct ifreq ifr = {};
+ int fd, r = 0;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return "[error]";
+
+ if (!if_indextoname(ifindex, ifname))
+ goto end;
+
+ drv.cmd = ETHTOOL_GDRVINFO;
+ safe_strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+ ifr.ifr_data = (void *)&drv;
+
+ r = ioctl(fd, SIOCETHTOOL, &ifr);
+ if (r)
+ goto end;
+
+ safe_strncpy(drvname, drv.driver, sizeof(drvname));
+
+ close(fd);
+ return drvname;
+
+end:
+ r = errno;
+ close(fd);
+ return r == EOPNOTSUPP ? "loopback" : "[error]";
+}
+
+int get_mac_addr(int ifindex, void *mac_addr)
+{
+ char ifname[IF_NAMESIZE];
+ struct ifreq ifr = {};
+ int fd, r;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (!if_indextoname(ifindex, ifname)) {
+ r = -errno;
+ goto end;
+ }
+
+ safe_strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+ r = ioctl(fd, SIOCGIFHWADDR, &ifr);
+ if (r) {
+ r = -errno;
+ goto end;
+ }
+
+ memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
+
+end:
+ close(fd);
+ return r;
+}
+
+__attribute__((constructor)) static void sample_ctor(void)
+{
+ if (libbpf_set_strict_mode(LIBBPF_STRICT_ALL) < 0) {
+ fprintf(stderr, "Failed to set libbpf strict mode: %s\n",
+ strerror(errno));
+ /* Just exit, nothing to cleanup right now */
+ exit(EXIT_FAIL_BPF);
+ }
+}
diff --git a/samples/bpf/xdp_sample_user.h b/samples/bpf/xdp_sample_user.h
new file mode 100644
index 000000000000..d97465ff8c62
--- /dev/null
+++ b/samples/bpf/xdp_sample_user.h
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef XDP_SAMPLE_USER_H
+#define XDP_SAMPLE_USER_H
+
+#include <bpf/libbpf.h>
+#include <linux/compiler.h>
+
+#include "xdp_sample_shared.h"
+
+enum stats_mask {
+ _SAMPLE_REDIRECT_MAP = 1U << 0,
+ SAMPLE_RX_CNT = 1U << 1,
+ SAMPLE_REDIRECT_ERR_CNT = 1U << 2,
+ SAMPLE_CPUMAP_ENQUEUE_CNT = 1U << 3,
+ SAMPLE_CPUMAP_KTHREAD_CNT = 1U << 4,
+ SAMPLE_EXCEPTION_CNT = 1U << 5,
+ SAMPLE_DEVMAP_XMIT_CNT = 1U << 6,
+ SAMPLE_REDIRECT_CNT = 1U << 7,
+ SAMPLE_REDIRECT_MAP_CNT = SAMPLE_REDIRECT_CNT | _SAMPLE_REDIRECT_MAP,
+ SAMPLE_REDIRECT_ERR_MAP_CNT = SAMPLE_REDIRECT_ERR_CNT | _SAMPLE_REDIRECT_MAP,
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI = 1U << 8,
+ SAMPLE_SKIP_HEADING = 1U << 9,
+};
+
+/* Exit return codes */
+#define EXIT_OK 0
+#define EXIT_FAIL 1
+#define EXIT_FAIL_OPTION 2
+#define EXIT_FAIL_XDP 3
+#define EXIT_FAIL_BPF 4
+#define EXIT_FAIL_MEM 5
+
+int sample_setup_maps(struct bpf_map **maps);
+int __sample_init(int mask);
+void sample_exit(int status);
+int sample_run(int interval, void (*post_cb)(void *), void *ctx);
+
+void sample_switch_mode(void);
+int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
+ bool force);
+void sample_usage(char *argv[], const struct option *long_options,
+ const char *doc, int mask, bool error);
+
+const char *get_driver_name(int ifindex);
+int get_mac_addr(int ifindex, void *mac_addr);
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstringop-truncation"
+__attribute__((unused))
+static inline char *safe_strncpy(char *dst, const char *src, size_t size)
+{
+ if (!size)
+ return dst;
+ strncpy(dst, src, size - 1);
+ dst[size - 1] = '\0';
+ return dst;
+}
+#pragma GCC diagnostic pop
+
+#define __attach_tp(name) \
+ ({ \
+ if (!bpf_program__is_tracing(skel->progs.name)) \
+ return -EINVAL; \
+ skel->links.name = bpf_program__attach(skel->progs.name); \
+ if (!skel->links.name) \
+ return -errno; \
+ })
+
+#define sample_init_pre_load(skel) \
+ ({ \
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus(); \
+ sample_setup_maps((struct bpf_map *[]){ \
+ skel->maps.rx_cnt, skel->maps.redir_err_cnt, \
+ skel->maps.cpumap_enqueue_cnt, \
+ skel->maps.cpumap_kthread_cnt, \
+ skel->maps.exception_cnt, skel->maps.devmap_xmit_cnt, \
+ skel->maps.devmap_xmit_cnt_multi }); \
+ })
+
+#define DEFINE_SAMPLE_INIT(name) \
+ static int sample_init(struct name *skel, int mask) \
+ { \
+ int ret; \
+ ret = __sample_init(mask); \
+ if (ret < 0) \
+ return ret; \
+ if (mask & SAMPLE_REDIRECT_MAP_CNT) \
+ __attach_tp(tp_xdp_redirect_map); \
+ if (mask & SAMPLE_REDIRECT_CNT) \
+ __attach_tp(tp_xdp_redirect); \
+ if (mask & SAMPLE_REDIRECT_ERR_MAP_CNT) \
+ __attach_tp(tp_xdp_redirect_map_err); \
+ if (mask & SAMPLE_REDIRECT_ERR_CNT) \
+ __attach_tp(tp_xdp_redirect_err); \
+ if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT) \
+ __attach_tp(tp_xdp_cpumap_enqueue); \
+ if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) \
+ __attach_tp(tp_xdp_cpumap_kthread); \
+ if (mask & SAMPLE_EXCEPTION_CNT) \
+ __attach_tp(tp_xdp_exception); \
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT) \
+ __attach_tp(tp_xdp_devmap_xmit); \
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) \
+ __attach_tp(tp_xdp_devmap_xmit_multi); \
+ return 0; \
+ }
+
+#endif
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 33d0bdebbed8..49d7a6ad7e39 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -1,12 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2017 - 2018 Intel Corporation. */
-#include <asm/barrier.h>
#include <errno.h>
#include <getopt.h>
#include <libgen.h>
#include <linux/bpf.h>
-#include <linux/compiler.h>
#include <linux/if_link.h>
#include <linux/if_xdp.h>
#include <linux/if_ether.h>
@@ -653,17 +651,15 @@ out:
return result;
}
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl);
-
/*
* This is a version of ip_compute_csum() optimized for IP headers,
* which always checksum on 4 octet boundaries.
* This function code has been taken from
* Linux kernel lib/checksum.c
*/
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
- return (__force __sum16)~do_csum(iph, ihl * 4);
+ return (__sum16)~do_csum(iph, ihl * 4);
}
/*
@@ -673,11 +669,11 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
*/
static inline __sum16 csum_fold(__wsum csum)
{
- u32 sum = (__force u32)csum;
+ u32 sum = (u32)csum;
sum = (sum & 0xffff) + (sum >> 16);
sum = (sum & 0xffff) + (sum >> 16);
- return (__force __sum16)~sum;
+ return (__sum16)~sum;
}
/*
@@ -703,16 +699,16 @@ __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
__u32 len, __u8 proto, __wsum sum)
{
- unsigned long long s = (__force u32)sum;
+ unsigned long long s = (u32)sum;
- s += (__force u32)saddr;
- s += (__force u32)daddr;
+ s += (u32)saddr;
+ s += (u32)daddr;
#ifdef __BIG_ENDIAN__
s += proto + len;
#else
s += (proto + len) << 8;
#endif
- return (__force __wsum)from64to32(s);
+ return (__wsum)from64to32(s);
}
/*
diff --git a/samples/mei/mei-amt-version.c b/samples/mei/mei-amt-version.c
index ad3e56042f96..867debd3b912 100644
--- a/samples/mei/mei-amt-version.c
+++ b/samples/mei/mei-amt-version.c
@@ -154,31 +154,52 @@ err:
static ssize_t mei_recv_msg(struct mei *me, unsigned char *buffer,
ssize_t len, unsigned long timeout)
{
+ struct timeval tv;
+ fd_set set;
ssize_t rc;
+ tv.tv_sec = timeout / 1000;
+ tv.tv_usec = (timeout % 1000) * 1000000;
+
mei_msg(me, "call read length = %zd\n", len);
+ FD_ZERO(&set);
+ FD_SET(me->fd, &set);
+ rc = select(me->fd + 1, &set, NULL, NULL, &tv);
+ if (rc > 0 && FD_ISSET(me->fd, &set)) {
+ mei_msg(me, "have reply\n");
+ } else if (rc == 0) {
+ rc = -1;
+ mei_err(me, "read failed on timeout\n");
+ goto out;
+ } else { /* rc < 0 */
+ rc = errno;
+ mei_err(me, "read failed on select with status %zd %s\n",
+ rc, strerror(errno));
+ goto out;
+ }
+
rc = read(me->fd, buffer, len);
if (rc < 0) {
mei_err(me, "read failed with status %zd %s\n",
rc, strerror(errno));
- mei_deinit(me);
- } else {
- mei_msg(me, "read succeeded with result %zd\n", rc);
+ goto out;
}
+
+ mei_msg(me, "read succeeded with result %zd\n", rc);
+
+out:
+ if (rc < 0)
+ mei_deinit(me);
+
return rc;
}
static ssize_t mei_send_msg(struct mei *me, const unsigned char *buffer,
ssize_t len, unsigned long timeout)
{
- struct timeval tv;
ssize_t written;
ssize_t rc;
- fd_set set;
-
- tv.tv_sec = timeout / 1000;
- tv.tv_usec = (timeout % 1000) * 1000000;
mei_msg(me, "call write length = %zd\n", len);
@@ -189,19 +210,7 @@ static ssize_t mei_send_msg(struct mei *me, const unsigned char *buffer,
written, strerror(errno));
goto out;
}
-
- FD_ZERO(&set);
- FD_SET(me->fd, &set);
- rc = select(me->fd + 1 , &set, NULL, NULL, &tv);
- if (rc > 0 && FD_ISSET(me->fd, &set)) {
- mei_msg(me, "write success\n");
- } else if (rc == 0) {
- mei_err(me, "write failed on timeout with status\n");
- goto out;
- } else { /* rc < 0 */
- mei_err(me, "write failed on select with status %zd\n", rc);
- goto out;
- }
+ mei_msg(me, "write success\n");
rc = written;
out:
diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh
index a335393157eb..933194257a24 100644
--- a/samples/pktgen/functions.sh
+++ b/samples/pktgen/functions.sh
@@ -123,7 +123,7 @@ function root_check_run_with_sudo() {
if [ "$EUID" -ne 0 ]; then
if [ -x $0 ]; then # Directly executable use sudo
info "Not root, running with sudo"
- sudo "$0" "$@"
+ sudo -E "$0" "$@"
exit $?
fi
err 4 "cannot perform sudo run of $0"
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
index 30a610b541ad..99ec0688b044 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
@@ -89,14 +89,21 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
pg_set $dev "burst $BURST"
done
+# Run if user hits control-c
+function print_result() {
+ # Print results
+ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
+ dev=${DEV}@${thread}
+ echo "Device: $dev"
+ cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+ done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
# start_run
echo "Running... ctrl^C to stop" >&2
pg_ctrl "start"
echo "Done" >&2
-# Print results
-for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
- dev=${DEV}@${thread}
- echo "Device: $dev"
- cat /proc/net/pktgen/$dev | grep -A2 "Result:"
-done
+print_result
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
index a6195bd77532..04b0dd0c36d6 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
@@ -69,14 +69,21 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
pg_set $dev "xmit_mode queue_xmit"
done
+# Run if user hits control-c
+function print_result {
+ # Print results
+ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
+ dev=${DEV}@${thread}
+ echo "Device: $dev"
+ cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+ done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
# start_run
echo "Running... ctrl^C to stop" >&2
pg_ctrl "start"
echo "Done" >&2
-# Print results
-for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
- dev=${DEV}@${thread}
- echo "Device: $dev"
- cat /proc/net/pktgen/$dev | grep -A2 "Result:"
-done
+print_result
diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh
index 246cfe02bb82..09a92ea963f9 100755
--- a/samples/pktgen/pktgen_sample01_simple.sh
+++ b/samples/pktgen/pktgen_sample01_simple.sh
@@ -79,15 +79,22 @@ pg_set $DEV "flag UDPSRC_RND"
pg_set $DEV "udp_src_min $UDP_SRC_MIN"
pg_set $DEV "udp_src_max $UDP_SRC_MAX"
+# Run if user hits control-c
+function print_result() {
+ # Print results
+ echo "Result device: $DEV"
+ cat /proc/net/pktgen/$DEV
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
if [ -z "$APPEND" ]; then
# start_run
echo "Running... ctrl^C to stop" >&2
pg_ctrl "start"
echo "Done" >&2
- # Print results
- echo "Result device: $DEV"
- cat /proc/net/pktgen/$DEV
+ print_result
else
echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
fi \ No newline at end of file
diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh
index c6af3d9d5171..7fa41c84c32f 100755
--- a/samples/pktgen/pktgen_sample02_multiqueue.sh
+++ b/samples/pktgen/pktgen_sample02_multiqueue.sh
@@ -83,18 +83,25 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
pg_set $dev "udp_src_max $UDP_SRC_MAX"
done
-if [ -z "$APPEND" ]; then
- # start_run
- echo "Running... ctrl^C to stop" >&2
- pg_ctrl "start"
- echo "Done" >&2
-
+# Run if user hits control-c
+function print_result() {
# Print results
for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
dev=${DEV}@${thread}
echo "Device: $dev"
cat /proc/net/pktgen/$dev | grep -A2 "Result:"
done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
+if [ -z "$APPEND" ]; then
+ # start_run
+ echo "Running... ctrl^C to stop" >&2
+ pg_ctrl "start"
+ echo "Done" >&2
+
+ print_result
else
echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
fi
diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
index ab87de440277..8bf2fdffba16 100755
--- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh
+++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
@@ -85,7 +85,7 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
done
# Run if user hits control-c
-function control_c() {
+function print_result() {
# Print results
for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
dev=${DEV}@${thread}
@@ -94,11 +94,13 @@ function control_c() {
done
}
# trap keyboard interrupt (Ctrl-C)
-trap control_c SIGINT
+trap true SIGINT
if [ -z "$APPEND" ]; then
echo "Running... ctrl^C to stop" >&2
pg_ctrl "start"
+
+ print_result
else
echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
fi
diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh
index 56c5f5af350f..cff51f861506 100755
--- a/samples/pktgen/pktgen_sample04_many_flows.sh
+++ b/samples/pktgen/pktgen_sample04_many_flows.sh
@@ -13,13 +13,15 @@ root_check_run_with_sudo "$@"
# Parameter parsing via include
source ${basedir}/parameters.sh
# Set some default params, if they didn't get set
-[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+if [ -z "$DEST_IP" ]; then
+ [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
+fi
[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
[ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely
if [ -n "$DEST_IP" ]; then
- validate_addr $DEST_IP
- read -r DST_MIN DST_MAX <<< $(parse_addr $DEST_IP)
+ validate_addr${IP6} $DEST_IP
+ read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
fi
if [ -n "$DST_PORT" ]; then
read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
@@ -62,8 +64,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
# Single destination
pg_set $dev "dst_mac $DST_MAC"
- pg_set $dev "dst_min $DST_MIN"
- pg_set $dev "dst_max $DST_MAX"
+ pg_set $dev "dst${IP6}_min $DST_MIN"
+ pg_set $dev "dst${IP6}_max $DST_MAX"
if [ -n "$DST_PORT" ]; then
# Single destination port or random port range
diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
index 6e0effabca59..3578d0aa4ac5 100755
--- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh
+++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
@@ -17,14 +17,16 @@ root_check_run_with_sudo "$@"
# Parameter parsing via include
source ${basedir}/parameters.sh
# Set some default params, if they didn't get set
-[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+if [ -z "$DEST_IP" ]; then
+ [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
+fi
[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
[ -z "$BURST" ] && BURST=32
[ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely
if [ -n "$DEST_IP" ]; then
- validate_addr $DEST_IP
- read -r DST_MIN DST_MAX <<< $(parse_addr $DEST_IP)
+ validate_addr${IP6} $DEST_IP
+ read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
fi
if [ -n "$DST_PORT" ]; then
read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
@@ -52,8 +54,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
# Single destination
pg_set $dev "dst_mac $DST_MAC"
- pg_set $dev "dst_min $DST_MIN"
- pg_set $dev "dst_max $DST_MAX"
+ pg_set $dev "dst${IP6}_min $DST_MIN"
+ pg_set $dev "dst${IP6}_max $DST_MAX"
if [ -n "$DST_PORT" ]; then
# Single destination port or random port range
diff --git a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
index 7c27923083a6..264cc5db9c49 100755
--- a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
+++ b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
@@ -100,12 +100,8 @@ for ((i = 0; i < $THREADS; i++)); do
pg_set $dev "udp_src_max $UDP_SRC_MAX"
done
-# start_run
-if [ -z "$APPEND" ]; then
- echo "Running... ctrl^C to stop" >&2
- pg_ctrl "start"
- echo "Done" >&2
-
+# Run if user hits control-c
+function print_result() {
# Print results
for ((i = 0; i < $THREADS; i++)); do
thread=${cpu_array[$((i+F_THREAD))]}
@@ -113,6 +109,17 @@ if [ -z "$APPEND" ]; then
echo "Device: $dev"
cat /proc/net/pktgen/$dev | grep -A2 "Result:"
done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
+# start_run
+if [ -z "$APPEND" ]; then
+ echo "Running... ctrl^C to stop" >&2
+ pg_ctrl "start"
+ echo "Done" >&2
+
+ print_result
else
echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
fi
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index 13a35f7cbe66..e61471ab7d14 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -141,6 +141,33 @@
* In most cases, the __assign_str() macro will take the same
* parameters as the __string() macro had to declare the string.
*
+ * __string_len: This is a helper to a __dynamic_array, but it understands
+ * that the array has characters in it, and with the combined
+ * use of __assign_str_len(), it will allocate 'len' + 1 bytes
+ * in the ring buffer and add a '\0' to the string. This is
+ * useful if the string being saved has no terminating '\0' byte.
+ * It requires that the length of the string is known as it acts
+ * like a memcpy().
+ *
+ * Declared with:
+ *
+ * __string_len(foo, bar, len)
+ *
+ * To assign this string, use the helper macro __assign_str_len().
+ *
+ * __assign_str(foo, bar, len);
+ *
+ * Then len + 1 is allocated to the ring buffer, and a nul terminating
+ * byte is added. This is similar to:
+ *
+ * memcpy(__get_str(foo), bar, len);
+ * __get_str(foo)[len] = 0;
+ *
+ * The advantage of using this over __dynamic_array, is that it
+ * takes care of allocating the extra byte on the ring buffer
+ * for the '\0' terminating byte, and __get_str(foo) can be used
+ * in the TP_printk().
+ *
* __bitmask: This is another kind of __dynamic_array, but it expects
* an array of longs, and the number of bits to parse. It takes
* two parameters (name, nr_bits), where name is the name of the
diff --git a/scripts/atomic/check-atomics.sh b/scripts/atomic/check-atomics.sh
index 9c7fbd4bcbce..0e7bab3eb0d1 100755
--- a/scripts/atomic/check-atomics.sh
+++ b/scripts/atomic/check-atomics.sh
@@ -14,9 +14,9 @@ if [ $? -ne 0 ]; then
fi
cat <<EOF |
-asm-generic/atomic-instrumented.h
-asm-generic/atomic-long.h
-linux/atomic-arch-fallback.h
+linux/atomic/atomic-instrumented.h
+linux/atomic/atomic-long.h
+linux/atomic/atomic-arch-fallback.h
EOF
while read header; do
OLDSUM="$(tail -n 1 ${LINUXDIR}/include/${header})"
diff --git a/scripts/atomic/fallbacks/acquire b/scripts/atomic/fallbacks/acquire
index 59c00529dc7c..ef764085c79a 100755
--- a/scripts/atomic/fallbacks/acquire
+++ b/scripts/atomic/fallbacks/acquire
@@ -1,8 +1,8 @@
cat <<EOF
static __always_inline ${ret}
-${arch}${atomic}_${pfx}${name}${sfx}_acquire(${params})
+arch_${atomic}_${pfx}${name}${sfx}_acquire(${params})
{
- ${ret} ret = ${arch}${atomic}_${pfx}${name}${sfx}_relaxed(${args});
+ ${ret} ret = arch_${atomic}_${pfx}${name}${sfx}_relaxed(${args});
__atomic_acquire_fence();
return ret;
}
diff --git a/scripts/atomic/fallbacks/add_negative b/scripts/atomic/fallbacks/add_negative
index a66635bceefb..15caa2eb2371 100755
--- a/scripts/atomic/fallbacks/add_negative
+++ b/scripts/atomic/fallbacks/add_negative
@@ -1,6 +1,6 @@
cat <<EOF
/**
- * ${arch}${atomic}_add_negative - add and test if negative
+ * arch_${atomic}_add_negative - add and test if negative
* @i: integer value to add
* @v: pointer of type ${atomic}_t
*
@@ -9,8 +9,8 @@ cat <<EOF
* result is greater than or equal to zero.
*/
static __always_inline bool
-${arch}${atomic}_add_negative(${int} i, ${atomic}_t *v)
+arch_${atomic}_add_negative(${int} i, ${atomic}_t *v)
{
- return ${arch}${atomic}_add_return(i, v) < 0;
+ return arch_${atomic}_add_return(i, v) < 0;
}
EOF
diff --git a/scripts/atomic/fallbacks/add_unless b/scripts/atomic/fallbacks/add_unless
index 2ff598a3f9ec..9e5159c2ccfc 100755
--- a/scripts/atomic/fallbacks/add_unless
+++ b/scripts/atomic/fallbacks/add_unless
@@ -1,6 +1,6 @@
cat << EOF
/**
- * ${arch}${atomic}_add_unless - add unless the number is already a given value
+ * arch_${atomic}_add_unless - add unless the number is already a given value
* @v: pointer of type ${atomic}_t
* @a: the amount to add to v...
* @u: ...unless v is equal to u.
@@ -9,8 +9,8 @@ cat << EOF
* Returns true if the addition was done.
*/
static __always_inline bool
-${arch}${atomic}_add_unless(${atomic}_t *v, ${int} a, ${int} u)
+arch_${atomic}_add_unless(${atomic}_t *v, ${int} a, ${int} u)
{
- return ${arch}${atomic}_fetch_add_unless(v, a, u) != u;
+ return arch_${atomic}_fetch_add_unless(v, a, u) != u;
}
EOF
diff --git a/scripts/atomic/fallbacks/andnot b/scripts/atomic/fallbacks/andnot
index 3f18663dcefb..5a42f54a3595 100755
--- a/scripts/atomic/fallbacks/andnot
+++ b/scripts/atomic/fallbacks/andnot
@@ -1,7 +1,7 @@
cat <<EOF
static __always_inline ${ret}
-${arch}${atomic}_${pfx}andnot${sfx}${order}(${int} i, ${atomic}_t *v)
+arch_${atomic}_${pfx}andnot${sfx}${order}(${int} i, ${atomic}_t *v)
{
- ${retstmt}${arch}${atomic}_${pfx}and${sfx}${order}(~i, v);
+ ${retstmt}arch_${atomic}_${pfx}and${sfx}${order}(~i, v);
}
EOF
diff --git a/scripts/atomic/fallbacks/dec b/scripts/atomic/fallbacks/dec
index e2e01f0574bb..8c144c818e9e 100755
--- a/scripts/atomic/fallbacks/dec
+++ b/scripts/atomic/fallbacks/dec
@@ -1,7 +1,7 @@
cat <<EOF
static __always_inline ${ret}
-${arch}${atomic}_${pfx}dec${sfx}${order}(${atomic}_t *v)
+arch_${atomic}_${pfx}dec${sfx}${order}(${atomic}_t *v)
{
- ${retstmt}${arch}${atomic}_${pfx}sub${sfx}${order}(1, v);
+ ${retstmt}arch_${atomic}_${pfx}sub${sfx}${order}(1, v);
}
EOF
diff --git a/scripts/atomic/fallbacks/dec_and_test b/scripts/atomic/fallbacks/dec_and_test
index e8a5e492eb5f..8549f359bd0e 100755
--- a/scripts/atomic/fallbacks/dec_and_test
+++ b/scripts/atomic/fallbacks/dec_and_test
@@ -1,6 +1,6 @@
cat <<EOF
/**
- * ${arch}${atomic}_dec_and_test - decrement and test
+ * arch_${atomic}_dec_and_test - decrement and test
* @v: pointer of type ${atomic}_t
*
* Atomically decrements @v by 1 and
@@ -8,8 +8,8 @@ cat <<EOF
* cases.
*/
static __always_inline bool
-${arch}${atomic}_dec_and_test(${atomic}_t *v)
+arch_${atomic}_dec_and_test(${atomic}_t *v)
{
- return ${arch}${atomic}_dec_return(v) == 0;
+ return arch_${atomic}_dec_return(v) == 0;
}
EOF
diff --git a/scripts/atomic/fallbacks/dec_if_positive b/scripts/atomic/fallbacks/dec_if_positive
index 527adec89c37..86bdced3428d 100755
--- a/scripts/atomic/fallbacks/dec_if_positive
+++ b/scripts/atomic/fallbacks/dec_if_positive
@@ -1,14 +1,14 @@
cat <<EOF
static __always_inline ${ret}
-${arch}${atomic}_dec_if_positive(${atomic}_t *v)
+arch_${atomic}_dec_if_positive(${atomic}_t *v)
{
- ${int} dec, c = ${arch}${atomic}_read(v);
+ ${int} dec, c = arch_${atomic}_read(v);
do {
dec = c - 1;
if (unlikely(dec < 0))
break;
- } while (!${arch}${atomic}_try_cmpxchg(v, &c, dec));
+ } while (!arch_${atomic}_try_cmpxchg(v, &c, dec));
return dec;
}
diff --git a/scripts/atomic/fallbacks/dec_unless_positive b/scripts/atomic/fallbacks/dec_unless_positive
index dcab6848ca1e..c531d5afecc4 100755
--- a/scripts/atomic/fallbacks/dec_unless_positive
+++ b/scripts/atomic/fallbacks/dec_unless_positive
@@ -1,13 +1,13 @@
cat <<EOF
static __always_inline bool
-${arch}${atomic}_dec_unless_positive(${atomic}_t *v)
+arch_${atomic}_dec_unless_positive(${atomic}_t *v)
{
- ${int} c = ${arch}${atomic}_read(v);
+ ${int} c = arch_${atomic}_read(v);
do {
if (unlikely(c > 0))
return false;
- } while (!${arch}${atomic}_try_cmpxchg(v, &c, c - 1));
+ } while (!arch_${atomic}_try_cmpxchg(v, &c, c - 1));
return true;
}
diff --git a/scripts/atomic/fallbacks/fence b/scripts/atomic/fallbacks/fence
index 3764fc8ce945..07757d8e338e 100755
--- a/scripts/atomic/fallbacks/fence
+++ b/scripts/atomic/fallbacks/fence
@@ -1,10 +1,10 @@
cat <<EOF
static __always_inline ${ret}
-${arch}${atomic}_${pfx}${name}${sfx}(${params})
+arch_${atomic}_${pfx}${name}${sfx}(${params})
{
${ret} ret;
__atomic_pre_full_fence();
- ret = ${arch}${atomic}_${pfx}${name}${sfx}_relaxed(${args});
+ ret = arch_${atomic}_${pfx}${name}${sfx}_relaxed(${args});
__atomic_post_full_fence();
return ret;
}
diff --git a/scripts/atomic/fallbacks/fetch_add_unless b/scripts/atomic/fallbacks/fetch_add_unless
index 0e0b9aef1515..68ce13c8b9da 100755
--- a/scripts/atomic/fallbacks/fetch_add_unless
+++ b/scripts/atomic/fallbacks/fetch_add_unless
@@ -1,6 +1,6 @@
cat << EOF
/**
- * ${arch}${atomic}_fetch_add_unless - add unless the number is already a given value
+ * arch_${atomic}_fetch_add_unless - add unless the number is already a given value
* @v: pointer of type ${atomic}_t
* @a: the amount to add to v...
* @u: ...unless v is equal to u.
@@ -9,14 +9,14 @@ cat << EOF
* Returns original value of @v
*/
static __always_inline ${int}
-${arch}${atomic}_fetch_add_unless(${atomic}_t *v, ${int} a, ${int} u)
+arch_${atomic}_fetch_add_unless(${atomic}_t *v, ${int} a, ${int} u)
{
- ${int} c = ${arch}${atomic}_read(v);
+ ${int} c = arch_${atomic}_read(v);
do {
if (unlikely(c == u))
break;
- } while (!${arch}${atomic}_try_cmpxchg(v, &c, c + a));
+ } while (!arch_${atomic}_try_cmpxchg(v, &c, c + a));
return c;
}
diff --git a/scripts/atomic/fallbacks/inc b/scripts/atomic/fallbacks/inc
index 15ec62946e8c..3c2c3739169e 100755
--- a/scripts/atomic/fallbacks/inc
+++ b/scripts/atomic/fallbacks/inc
@@ -1,7 +1,7 @@
cat <<EOF
static __always_inline ${ret}
-${arch}${atomic}_${pfx}inc${sfx}${order}(${atomic}_t *v)
+arch_${atomic}_${pfx}inc${sfx}${order}(${atomic}_t *v)
{
- ${retstmt}${arch}${atomic}_${pfx}add${sfx}${order}(1, v);
+ ${retstmt}arch_${atomic}_${pfx}add${sfx}${order}(1, v);
}
EOF
diff --git a/scripts/atomic/fallbacks/inc_and_test b/scripts/atomic/fallbacks/inc_and_test
index cecc8322a21f..0cf23fe1efb8 100755
--- a/scripts/atomic/fallbacks/inc_and_test
+++ b/scripts/atomic/fallbacks/inc_and_test
@@ -1,6 +1,6 @@
cat <<EOF
/**
- * ${arch}${atomic}_inc_and_test - increment and test
+ * arch_${atomic}_inc_and_test - increment and test
* @v: pointer of type ${atomic}_t
*
* Atomically increments @v by 1
@@ -8,8 +8,8 @@ cat <<EOF
* other cases.
*/
static __always_inline bool
-${arch}${atomic}_inc_and_test(${atomic}_t *v)
+arch_${atomic}_inc_and_test(${atomic}_t *v)
{
- return ${arch}${atomic}_inc_return(v) == 0;
+ return arch_${atomic}_inc_return(v) == 0;
}
EOF
diff --git a/scripts/atomic/fallbacks/inc_not_zero b/scripts/atomic/fallbacks/inc_not_zero
index 50f2d4d48279..ed8a1f562667 100755
--- a/scripts/atomic/fallbacks/inc_not_zero
+++ b/scripts/atomic/fallbacks/inc_not_zero
@@ -1,14 +1,14 @@
cat <<EOF
/**
- * ${arch}${atomic}_inc_not_zero - increment unless the number is zero
+ * arch_${atomic}_inc_not_zero - increment unless the number is zero
* @v: pointer of type ${atomic}_t
*
* Atomically increments @v by 1, if @v is non-zero.
* Returns true if the increment was done.
*/
static __always_inline bool
-${arch}${atomic}_inc_not_zero(${atomic}_t *v)
+arch_${atomic}_inc_not_zero(${atomic}_t *v)
{
- return ${arch}${atomic}_add_unless(v, 1, 0);
+ return arch_${atomic}_add_unless(v, 1, 0);
}
EOF
diff --git a/scripts/atomic/fallbacks/inc_unless_negative b/scripts/atomic/fallbacks/inc_unless_negative
index 87629e0d4a80..95d8ce48233f 100755
--- a/scripts/atomic/fallbacks/inc_unless_negative
+++ b/scripts/atomic/fallbacks/inc_unless_negative
@@ -1,13 +1,13 @@
cat <<EOF
static __always_inline bool
-${arch}${atomic}_inc_unless_negative(${atomic}_t *v)
+arch_${atomic}_inc_unless_negative(${atomic}_t *v)
{
- ${int} c = ${arch}${atomic}_read(v);
+ ${int} c = arch_${atomic}_read(v);
do {
if (unlikely(c < 0))
return false;
- } while (!${arch}${atomic}_try_cmpxchg(v, &c, c + 1));
+ } while (!arch_${atomic}_try_cmpxchg(v, &c, c + 1));
return true;
}
diff --git a/scripts/atomic/fallbacks/read_acquire b/scripts/atomic/fallbacks/read_acquire
index 341a88dccaa7..803ba7561076 100755
--- a/scripts/atomic/fallbacks/read_acquire
+++ b/scripts/atomic/fallbacks/read_acquire
@@ -1,6 +1,6 @@
cat <<EOF
static __always_inline ${ret}
-${arch}${atomic}_read_acquire(const ${atomic}_t *v)
+arch_${atomic}_read_acquire(const ${atomic}_t *v)
{
return smp_load_acquire(&(v)->counter);
}
diff --git a/scripts/atomic/fallbacks/release b/scripts/atomic/fallbacks/release
index f8906d537c0f..b46feb56d69c 100755
--- a/scripts/atomic/fallbacks/release
+++ b/scripts/atomic/fallbacks/release
@@ -1,8 +1,8 @@
cat <<EOF
static __always_inline ${ret}
-${arch}${atomic}_${pfx}${name}${sfx}_release(${params})
+arch_${atomic}_${pfx}${name}${sfx}_release(${params})
{
__atomic_release_fence();
- ${retstmt}${arch}${atomic}_${pfx}${name}${sfx}_relaxed(${args});
+ ${retstmt}arch_${atomic}_${pfx}${name}${sfx}_relaxed(${args});
}
EOF
diff --git a/scripts/atomic/fallbacks/set_release b/scripts/atomic/fallbacks/set_release
index 76068272d5f5..86ede759f24e 100755
--- a/scripts/atomic/fallbacks/set_release
+++ b/scripts/atomic/fallbacks/set_release
@@ -1,6 +1,6 @@
cat <<EOF
static __always_inline void
-${arch}${atomic}_set_release(${atomic}_t *v, ${int} i)
+arch_${atomic}_set_release(${atomic}_t *v, ${int} i)
{
smp_store_release(&(v)->counter, i);
}
diff --git a/scripts/atomic/fallbacks/sub_and_test b/scripts/atomic/fallbacks/sub_and_test
index c580f4c2136e..260f37341c88 100755
--- a/scripts/atomic/fallbacks/sub_and_test
+++ b/scripts/atomic/fallbacks/sub_and_test
@@ -1,6 +1,6 @@
cat <<EOF
/**
- * ${arch}${atomic}_sub_and_test - subtract value from variable and test result
+ * arch_${atomic}_sub_and_test - subtract value from variable and test result
* @i: integer value to subtract
* @v: pointer of type ${atomic}_t
*
@@ -9,8 +9,8 @@ cat <<EOF
* other cases.
*/
static __always_inline bool
-${arch}${atomic}_sub_and_test(${int} i, ${atomic}_t *v)
+arch_${atomic}_sub_and_test(${int} i, ${atomic}_t *v)
{
- return ${arch}${atomic}_sub_return(i, v) == 0;
+ return arch_${atomic}_sub_return(i, v) == 0;
}
EOF
diff --git a/scripts/atomic/fallbacks/try_cmpxchg b/scripts/atomic/fallbacks/try_cmpxchg
index 06db0f738e45..890f850ede37 100755
--- a/scripts/atomic/fallbacks/try_cmpxchg
+++ b/scripts/atomic/fallbacks/try_cmpxchg
@@ -1,9 +1,9 @@
cat <<EOF
static __always_inline bool
-${arch}${atomic}_try_cmpxchg${order}(${atomic}_t *v, ${int} *old, ${int} new)
+arch_${atomic}_try_cmpxchg${order}(${atomic}_t *v, ${int} *old, ${int} new)
{
${int} r, o = *old;
- r = ${arch}${atomic}_cmpxchg${order}(v, o, new);
+ r = arch_${atomic}_cmpxchg${order}(v, o, new);
if (unlikely(r != o))
*old = r;
return likely(r == o);
diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh
index 317a6cec76e1..8e2da71f1d5f 100755
--- a/scripts/atomic/gen-atomic-fallback.sh
+++ b/scripts/atomic/gen-atomic-fallback.sh
@@ -2,11 +2,10 @@
# SPDX-License-Identifier: GPL-2.0
ATOMICDIR=$(dirname $0)
-ARCH=$2
. ${ATOMICDIR}/atomic-tbl.sh
-#gen_template_fallback(template, meta, pfx, name, sfx, order, arch, atomic, int, args...)
+#gen_template_fallback(template, meta, pfx, name, sfx, order, atomic, int, args...)
gen_template_fallback()
{
local template="$1"; shift
@@ -15,11 +14,10 @@ gen_template_fallback()
local name="$1"; shift
local sfx="$1"; shift
local order="$1"; shift
- local arch="$1"; shift
local atomic="$1"; shift
local int="$1"; shift
- local atomicname="${arch}${atomic}_${pfx}${name}${sfx}${order}"
+ local atomicname="arch_${atomic}_${pfx}${name}${sfx}${order}"
local ret="$(gen_ret_type "${meta}" "${int}")"
local retstmt="$(gen_ret_stmt "${meta}")"
@@ -34,7 +32,7 @@ gen_template_fallback()
fi
}
-#gen_proto_fallback(meta, pfx, name, sfx, order, arch, atomic, int, args...)
+#gen_proto_fallback(meta, pfx, name, sfx, order, atomic, int, args...)
gen_proto_fallback()
{
local meta="$1"; shift
@@ -65,44 +63,26 @@ gen_proto_order_variant()
local name="$1"; shift
local sfx="$1"; shift
local order="$1"; shift
- local arch="$1"
- local atomic="$2"
+ local atomic="$1"
- local basename="${arch}${atomic}_${pfx}${name}${sfx}"
+ local basename="arch_${atomic}_${pfx}${name}${sfx}"
- printf "#define arch_${basename}${order} ${basename}${order}\n"
+ printf "#define ${basename}${order} ${basename}${order}\n"
}
-#gen_proto_order_variants(meta, pfx, name, sfx, arch, atomic, int, args...)
+#gen_proto_order_variants(meta, pfx, name, sfx, atomic, int, args...)
gen_proto_order_variants()
{
local meta="$1"; shift
local pfx="$1"; shift
local name="$1"; shift
local sfx="$1"; shift
- local arch="$1"
- local atomic="$2"
+ local atomic="$1"
- local basename="${arch}${atomic}_${pfx}${name}${sfx}"
+ local basename="arch_${atomic}_${pfx}${name}${sfx}"
local template="$(find_fallback_template "${pfx}" "${name}" "${sfx}" "${order}")"
- if [ -z "$arch" ]; then
- gen_proto_order_variant "${meta}" "${pfx}" "${name}" "${sfx}" "" "$@"
-
- if meta_has_acquire "${meta}"; then
- gen_proto_order_variant "${meta}" "${pfx}" "${name}" "${sfx}" "_acquire" "$@"
- fi
- if meta_has_release "${meta}"; then
- gen_proto_order_variant "${meta}" "${pfx}" "${name}" "${sfx}" "_release" "$@"
- fi
- if meta_has_relaxed "${meta}"; then
- gen_proto_order_variant "${meta}" "${pfx}" "${name}" "${sfx}" "_relaxed" "$@"
- fi
-
- echo ""
- fi
-
# If we don't have relaxed atomics, then we don't bother with ordering fallbacks
# read_acquire and set_release need to be templated, though
if ! meta_has_relaxed "${meta}"; then
@@ -128,7 +108,7 @@ gen_proto_order_variants()
gen_basic_fallbacks "${basename}"
if [ ! -z "${template}" ]; then
- printf "#endif /* ${arch}${atomic}_${pfx}${name}${sfx} */\n\n"
+ printf "#endif /* ${basename} */\n\n"
gen_proto_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "" "$@"
gen_proto_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "_acquire" "$@"
gen_proto_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "_release" "$@"
@@ -187,38 +167,38 @@ gen_try_cmpxchg_fallback()
local order="$1"; shift;
cat <<EOF
-#ifndef ${ARCH}try_cmpxchg${order}
-#define ${ARCH}try_cmpxchg${order}(_ptr, _oldp, _new) \\
+#ifndef arch_try_cmpxchg${order}
+#define arch_try_cmpxchg${order}(_ptr, _oldp, _new) \\
({ \\
typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \\
- ___r = ${ARCH}cmpxchg${order}((_ptr), ___o, (_new)); \\
+ ___r = arch_cmpxchg${order}((_ptr), ___o, (_new)); \\
if (unlikely(___r != ___o)) \\
*___op = ___r; \\
likely(___r == ___o); \\
})
-#endif /* ${ARCH}try_cmpxchg${order} */
+#endif /* arch_try_cmpxchg${order} */
EOF
}
gen_try_cmpxchg_fallbacks()
{
- printf "#ifndef ${ARCH}try_cmpxchg_relaxed\n"
- printf "#ifdef ${ARCH}try_cmpxchg\n"
+ printf "#ifndef arch_try_cmpxchg_relaxed\n"
+ printf "#ifdef arch_try_cmpxchg\n"
- gen_basic_fallbacks "${ARCH}try_cmpxchg"
+ gen_basic_fallbacks "arch_try_cmpxchg"
- printf "#endif /* ${ARCH}try_cmpxchg */\n\n"
+ printf "#endif /* arch_try_cmpxchg */\n\n"
for order in "" "_acquire" "_release" "_relaxed"; do
gen_try_cmpxchg_fallback "${order}"
done
- printf "#else /* ${ARCH}try_cmpxchg_relaxed */\n"
+ printf "#else /* arch_try_cmpxchg_relaxed */\n"
- gen_order_fallbacks "${ARCH}try_cmpxchg"
+ gen_order_fallbacks "arch_try_cmpxchg"
- printf "#endif /* ${ARCH}try_cmpxchg_relaxed */\n\n"
+ printf "#endif /* arch_try_cmpxchg_relaxed */\n\n"
}
cat << EOF
@@ -234,14 +214,14 @@ cat << EOF
EOF
-for xchg in "${ARCH}xchg" "${ARCH}cmpxchg" "${ARCH}cmpxchg64"; do
+for xchg in "arch_xchg" "arch_cmpxchg" "arch_cmpxchg64"; do
gen_xchg_fallbacks "${xchg}"
done
gen_try_cmpxchg_fallbacks
grep '^[a-z]' "$1" | while read name meta args; do
- gen_proto "${meta}" "${name}" "${ARCH}" "atomic" "int" ${args}
+ gen_proto "${meta}" "${name}" "atomic" "int" ${args}
done
cat <<EOF
@@ -252,7 +232,7 @@ cat <<EOF
EOF
grep '^[a-z]' "$1" | while read name meta args; do
- gen_proto "${meta}" "${name}" "${ARCH}" "atomic64" "s64" ${args}
+ gen_proto "${meta}" "${name}" "atomic64" "s64" ${args}
done
cat <<EOF
diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh
index b0c45aee19d7..035ceb4ee85c 100755
--- a/scripts/atomic/gen-atomic-instrumented.sh
+++ b/scripts/atomic/gen-atomic-instrumented.sh
@@ -121,8 +121,8 @@ cat << EOF
* arch_ variants (i.e. arch_atomic_read()/arch_atomic_cmpxchg()) to avoid
* double instrumentation.
*/
-#ifndef _ASM_GENERIC_ATOMIC_INSTRUMENTED_H
-#define _ASM_GENERIC_ATOMIC_INSTRUMENTED_H
+#ifndef _LINUX_ATOMIC_INSTRUMENTED_H
+#define _LINUX_ATOMIC_INSTRUMENTED_H
#include <linux/build_bug.h>
#include <linux/compiler.h>
@@ -138,6 +138,11 @@ grep '^[a-z]' "$1" | while read name meta args; do
gen_proto "${meta}" "${name}" "atomic64" "s64" ${args}
done
+grep '^[a-z]' "$1" | while read name meta args; do
+ gen_proto "${meta}" "${name}" "atomic_long" "long" ${args}
+done
+
+
for xchg in "xchg" "cmpxchg" "cmpxchg64" "try_cmpxchg"; do
for order in "" "_acquire" "_release" "_relaxed"; do
gen_xchg "${xchg}${order}" ""
@@ -158,5 +163,5 @@ gen_xchg "cmpxchg_double_local" "2 * "
cat <<EOF
-#endif /* _ASM_GENERIC_ATOMIC_INSTRUMENTED_H */
+#endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
EOF
diff --git a/scripts/atomic/gen-atomic-long.sh b/scripts/atomic/gen-atomic-long.sh
index e318d3f92e53..eda89cea6e1d 100755
--- a/scripts/atomic/gen-atomic-long.sh
+++ b/scripts/atomic/gen-atomic-long.sh
@@ -47,9 +47,9 @@ gen_proto_order_variant()
cat <<EOF
static __always_inline ${ret}
-atomic_long_${name}(${params})
+arch_atomic_long_${name}(${params})
{
- ${retstmt}${atomic}_${name}(${argscast});
+ ${retstmt}arch_${atomic}_${name}(${argscast});
}
EOF
@@ -61,8 +61,8 @@ cat << EOF
// Generated by $0
// DO NOT MODIFY THIS FILE DIRECTLY
-#ifndef _ASM_GENERIC_ATOMIC_LONG_H
-#define _ASM_GENERIC_ATOMIC_LONG_H
+#ifndef _LINUX_ATOMIC_LONG_H
+#define _LINUX_ATOMIC_LONG_H
#include <linux/compiler.h>
#include <asm/types.h>
@@ -98,5 +98,5 @@ done
cat <<EOF
#endif /* CONFIG_64BIT */
-#endif /* _ASM_GENERIC_ATOMIC_LONG_H */
+#endif /* _LINUX_ATOMIC_LONG_H */
EOF
diff --git a/scripts/atomic/gen-atomics.sh b/scripts/atomic/gen-atomics.sh
index f776a574224d..5b98a8307693 100755
--- a/scripts/atomic/gen-atomics.sh
+++ b/scripts/atomic/gen-atomics.sh
@@ -8,9 +8,9 @@ ATOMICTBL=${ATOMICDIR}/atomics.tbl
LINUXDIR=${ATOMICDIR}/../..
cat <<EOF |
-gen-atomic-instrumented.sh asm-generic/atomic-instrumented.h
-gen-atomic-long.sh asm-generic/atomic-long.h
-gen-atomic-fallback.sh linux/atomic-arch-fallback.h arch_
+gen-atomic-instrumented.sh linux/atomic/atomic-instrumented.h
+gen-atomic-long.sh linux/atomic/atomic-long.h
+gen-atomic-fallback.sh linux/atomic/atomic-arch-fallback.h
EOF
while read script header args; do
/bin/sh ${ATOMICDIR}/${script} ${ATOMICTBL} ${args} > ${LINUXDIR}/include/${header}
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index 2d94025b38e9..00ac7b79cddb 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -547,6 +547,7 @@ class PrinterHelpers(Printer):
'struct inode',
'struct socket',
'struct file',
+ 'struct bpf_timer',
]
known_types = {
'...',
@@ -594,6 +595,7 @@ class PrinterHelpers(Printer):
'struct inode',
'struct socket',
'struct file',
+ 'struct bpf_timer',
}
mapped_types = {
'u8': '__u8',
diff --git a/scripts/spdxcheck-test.sh b/scripts/spdxcheck-test.sh
index cfea6a0d1cc0..cb76324756bd 100644
--- a/scripts/spdxcheck-test.sh
+++ b/scripts/spdxcheck-test.sh
@@ -1,12 +1,10 @@
#!/bin/sh
-for PYTHON in python2 python3; do
- # run check on a text and a binary file
- for FILE in Makefile Documentation/logo.gif; do
- $PYTHON scripts/spdxcheck.py $FILE
- $PYTHON scripts/spdxcheck.py - < $FILE
- done
-
- # run check on complete tree to catch any other issues
- $PYTHON scripts/spdxcheck.py > /dev/null
+# run check on a text and a binary file
+for FILE in Makefile Documentation/logo.gif; do
+ python3 scripts/spdxcheck.py $FILE
+ python3 scripts/spdxcheck.py - < $FILE
done
+
+# run check on complete tree to catch any other issues
+python3 scripts/spdxcheck.py > /dev/null
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 287b90509006..673833f94069 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -985,6 +985,7 @@ void ima_measure_critical_data(const char *event_label,
CRITICAL_DATA, 0, event_label,
hash);
}
+EXPORT_SYMBOL_GPL(ima_measure_critical_data);
static int __init init_ima(void)
{
diff --git a/security/integrity/platform_certs/efi_parser.c b/security/integrity/platform_certs/efi_parser.c
index 18f01f36fe6a..d98260f8402a 100644
--- a/security/integrity/platform_certs/efi_parser.c
+++ b/security/integrity/platform_certs/efi_parser.c
@@ -55,7 +55,7 @@ int __init parse_efi_signature_list(
memcpy(&list, data, sizeof(list));
pr_devel("LIST[%04x] guid=%pUl ls=%x hs=%x ss=%x\n",
offs,
- list.signature_type.b, list.signature_list_size,
+ &list.signature_type, list.signature_list_size,
list.signature_header_size, list.signature_size);
lsize = list.signature_list_size;
diff --git a/security/security.c b/security/security.c
index 09533cbb7221..9ffa9e9c5c55 100644
--- a/security/security.c
+++ b/security/security.c
@@ -58,10 +58,11 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
[LOCKDOWN_MMIOTRACE] = "unsafe mmio",
[LOCKDOWN_DEBUGFS] = "debugfs access",
[LOCKDOWN_XMON_WR] = "xmon write access",
+ [LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM",
[LOCKDOWN_INTEGRITY_MAX] = "integrity",
[LOCKDOWN_KCORE] = "/proc/kcore access",
[LOCKDOWN_KPROBES] = "use of kprobes",
- [LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
+ [LOCKDOWN_BPF_READ_KERNEL] = "use of bpf to read kernel RAM",
[LOCKDOWN_PERF] = "unsafe use of perf",
[LOCKDOWN_TRACEFS] = "use of tracefs",
[LOCKDOWN_XMON_RW] = "xmon read and write access",
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b0032c42333e..6517f221d52c 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1330,7 +1330,9 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc
return SECCLASS_SMC_SOCKET;
case PF_XDP:
return SECCLASS_XDP_SOCKET;
-#if PF_MAX > 45
+ case PF_MCTP:
+ return SECCLASS_MCTP_SOCKET;
+#if PF_MAX > 46
#error New address family defined, please update this function.
#endif
}
@@ -3325,6 +3327,8 @@ static int selinux_inode_setxattr(struct user_namespace *mnt_userns,
}
ab = audit_log_start(audit_context(),
GFP_ATOMIC, AUDIT_SELINUX_ERR);
+ if (!ab)
+ return rc;
audit_log_format(ab, "op=setxattr invalid_context=");
audit_log_n_untrustedstring(ab, value, audit_size);
audit_log_end(ab);
@@ -6552,6 +6556,8 @@ static int selinux_setprocattr(const char *name, void *value, size_t size)
ab = audit_log_start(audit_context(),
GFP_ATOMIC,
AUDIT_SELINUX_ERR);
+ if (!ab)
+ return error;
audit_log_format(ab, "op=fscreate invalid_context=");
audit_log_n_untrustedstring(ab, value, audit_size);
audit_log_end(ab);
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index 62d19bccf3de..084757ff4390 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -246,6 +246,8 @@ struct security_class_mapping secclass_map[] = {
NULL } },
{ "xdp_socket",
{ COMMON_SOCK_PERMS, NULL } },
+ { "mctp_socket",
+ { COMMON_SOCK_PERMS, NULL } },
{ "perf_event",
{ "open", "cpu", "kernel", "tracepoint", "read", "write", NULL } },
{ "lockdown",
@@ -255,6 +257,6 @@ struct security_class_mapping secclass_map[] = {
{ NULL }
};
-#if PF_MAX > 45
+#if PF_MAX > 46
#error New address family defined, please update secclass_map.
#endif
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index d84c77f370dc..e5f1b2757a83 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -1673,6 +1673,8 @@ static int compute_sid_handle_invalid_context(
if (context_struct_to_string(policydb, newcontext, &n, &nlen))
goto out;
ab = audit_log_start(audit_context(), GFP_ATOMIC, AUDIT_SELINUX_ERR);
+ if (!ab)
+ goto out;
audit_log_format(ab,
"op=security_compute_sid invalid_context=");
/* no need to record the NUL with untrusted strings */
diff --git a/security/smack/smack.h b/security/smack/smack.h
index c3cfbdf4944a..99c3422596ab 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -302,7 +302,7 @@ int smack_populate_secattr(struct smack_known *skp);
/*
* Shared data.
*/
-extern int smack_enabled;
+extern int smack_enabled __initdata;
extern int smack_cipso_direct;
extern int smack_cipso_mapped;
extern struct smack_known *smack_net_ambient;
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index 1f391f6a3d47..d2186e2757be 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -81,23 +81,22 @@ int log_policy = SMACK_AUDIT_DENIED;
int smk_access_entry(char *subject_label, char *object_label,
struct list_head *rule_list)
{
- int may = -ENOENT;
struct smack_rule *srp;
list_for_each_entry_rcu(srp, rule_list, list) {
if (srp->smk_object->smk_known == object_label &&
srp->smk_subject->smk_known == subject_label) {
- may = srp->smk_access;
- break;
+ int may = srp->smk_access;
+ /*
+ * MAY_WRITE implies MAY_LOCK.
+ */
+ if ((may & MAY_WRITE) == MAY_WRITE)
+ may |= MAY_LOCK;
+ return may;
}
}
- /*
- * MAY_WRITE implies MAY_LOCK.
- */
- if ((may & MAY_WRITE) == MAY_WRITE)
- may |= MAY_LOCK;
- return may;
+ return -ENOENT;
}
/**
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 223a6da0e6dc..cacbe7518519 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -54,7 +54,7 @@
static DEFINE_MUTEX(smack_ipv6_lock);
static LIST_HEAD(smk_ipv6_port_list);
struct kmem_cache *smack_rule_cache;
-int smack_enabled;
+int smack_enabled __initdata;
#define A(s) {"smack"#s, sizeof("smack"#s) - 1, Opt_##s}
static struct {
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 09c0e2a6489c..71323d807dbf 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -251,7 +251,10 @@ static bool hw_support_mmap(struct snd_pcm_substream *substream)
switch (substream->dma_buffer.dev.type) {
case SNDRV_DMA_TYPE_UNKNOWN:
- return false;
+ /* we can't know the device, so just assume that the driver does
+ * everything right
+ */
+ return true;
case SNDRV_DMA_TYPE_CONTINUOUS:
case SNDRV_DMA_TYPE_VMALLOC:
return true;
diff --git a/sound/firewire/oxfw/oxfw-stream.c b/sound/firewire/oxfw/oxfw-stream.c
index 0ef242fdd3bc..fff18b5d4e05 100644
--- a/sound/firewire/oxfw/oxfw-stream.c
+++ b/sound/firewire/oxfw/oxfw-stream.c
@@ -153,7 +153,7 @@ static int init_stream(struct snd_oxfw *oxfw, struct amdtp_stream *stream)
struct cmp_connection *conn;
enum cmp_direction c_dir;
enum amdtp_stream_direction s_dir;
- unsigned int flags = CIP_UNAWARE_SYT;
+ unsigned int flags = 0;
int err;
if (!(oxfw->quirks & SND_OXFW_QUIRK_BLOCKING_TRANSMISSION))
@@ -161,6 +161,13 @@ static int init_stream(struct snd_oxfw *oxfw, struct amdtp_stream *stream)
else
flags |= CIP_BLOCKING;
+ // OXFW 970/971 has no function to generate playback timing according to the sequence
+ // of value in syt field, thus the packet should include NO_INFO value in the field.
+ // However, some models just ignore data blocks in packet with NO_INFO for audio data
+ // processing.
+ if (!(oxfw->quirks & SND_OXFW_QUIRK_IGNORE_NO_INFO_PACKET))
+ flags |= CIP_UNAWARE_SYT;
+
if (stream == &oxfw->tx_stream) {
conn = &oxfw->out_conn;
c_dir = CMP_OUTPUT;
diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c
index 84971d78d152..cb5b5e3a481b 100644
--- a/sound/firewire/oxfw/oxfw.c
+++ b/sound/firewire/oxfw/oxfw.c
@@ -159,8 +159,10 @@ static int detect_quirks(struct snd_oxfw *oxfw, const struct ieee1394_device_id
return snd_oxfw_scs1x_add(oxfw);
}
- if (entry->vendor_id == OUI_APOGEE && entry->model_id == MODEL_DUET_FW)
- oxfw->quirks |= SND_OXFW_QUIRK_BLOCKING_TRANSMISSION;
+ if (entry->vendor_id == OUI_APOGEE && entry->model_id == MODEL_DUET_FW) {
+ oxfw->quirks |= SND_OXFW_QUIRK_BLOCKING_TRANSMISSION |
+ SND_OXFW_QUIRK_IGNORE_NO_INFO_PACKET;
+ }
/*
* TASCAM FireOne has physical control and requires a pair of additional
diff --git a/sound/firewire/oxfw/oxfw.h b/sound/firewire/oxfw/oxfw.h
index ee47abcb0c90..c13034f6c2ca 100644
--- a/sound/firewire/oxfw/oxfw.h
+++ b/sound/firewire/oxfw/oxfw.h
@@ -42,6 +42,11 @@ enum snd_oxfw_quirk {
SND_OXFW_QUIRK_BLOCKING_TRANSMISSION = 0x04,
// Stanton SCS1.d and SCS1.m support unique transaction.
SND_OXFW_QUIRK_SCS_TRANSACTION = 0x08,
+ // Apogee Duet FireWire ignores data blocks in packet with NO_INFO for audio data
+ // processing, while output level meter moves. Any value in syt field of packet takes
+ // the device to process audio data even if the value is invalid in a point of
+ // IEC 61883-1/6.
+ SND_OXFW_QUIRK_IGNORE_NO_INFO_PACKET = 0x10,
};
/* This is an arbitrary number for convinience. */
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index e97d00585e8e..481d8f8d3396 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -3460,7 +3460,7 @@ static int cap_put_caller(struct snd_kcontrol *kcontrol,
struct hda_gen_spec *spec = codec->spec;
const struct hda_input_mux *imux;
struct nid_path *path;
- int i, adc_idx, err = 0;
+ int i, adc_idx, ret, err = 0;
imux = &spec->input_mux;
adc_idx = kcontrol->id.index;
@@ -3470,9 +3470,13 @@ static int cap_put_caller(struct snd_kcontrol *kcontrol,
if (!path || !path->ctls[type])
continue;
kcontrol->private_value = path->ctls[type];
- err = func(kcontrol, ucontrol);
- if (err < 0)
+ ret = func(kcontrol, ucontrol);
+ if (ret < 0) {
+ err = ret;
break;
+ }
+ if (ret > 0)
+ err = 1;
}
mutex_unlock(&codec->control_mutex);
if (err >= 0 && spec->cap_sync_hook)
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 0322b289505e..0062c18b646a 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -883,10 +883,11 @@ static unsigned int azx_get_pos_skl(struct azx *chip, struct azx_dev *azx_dev)
return azx_get_pos_posbuf(chip, azx_dev);
}
-static void azx_shutdown_chip(struct azx *chip)
+static void __azx_shutdown_chip(struct azx *chip, bool skip_link_reset)
{
azx_stop_chip(chip);
- azx_enter_link_reset(chip);
+ if (!skip_link_reset)
+ azx_enter_link_reset(chip);
azx_clear_irq_pending(chip);
display_power(chip, false);
}
@@ -895,6 +896,11 @@ static void azx_shutdown_chip(struct azx *chip)
static DEFINE_MUTEX(card_list_lock);
static LIST_HEAD(card_list);
+static void azx_shutdown_chip(struct azx *chip)
+{
+ __azx_shutdown_chip(chip, false);
+}
+
static void azx_add_card_list(struct azx *chip)
{
struct hda_intel *hda = container_of(chip, struct hda_intel, chip);
@@ -2385,7 +2391,7 @@ static void azx_shutdown(struct pci_dev *pci)
return;
chip = card->private_data;
if (chip && chip->running)
- azx_shutdown_chip(chip);
+ __azx_shutdown_chip(chip, true);
}
/* PCI IDs */
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 21c521596c9d..7ad689f991e7 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6658,6 +6658,7 @@ enum {
ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP,
ALC623_FIXUP_LENOVO_THINKSTATION_P340,
ALC255_FIXUP_ACER_HEADPHONE_AND_MIC,
+ ALC236_FIXUP_HP_LIMIT_INT_MIC_BOOST,
};
static const struct hda_fixup alc269_fixups[] = {
@@ -8242,6 +8243,12 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC255_FIXUP_XIAOMI_HEADSET_MIC
},
+ [ALC236_FIXUP_HP_LIMIT_INT_MIC_BOOST] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc269_fixup_limit_int_mic_boost,
+ .chained = true,
+ .chain_id = ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF,
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -8332,6 +8339,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x0a2e, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1028, 0x0a58, "Dell", ALC255_FIXUP_DELL_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1028, 0x0a61, "Dell XPS 15 9510", ALC289_FIXUP_DUAL_SPK),
SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -8431,13 +8439,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP),
+ SND_PCI_QUIRK(0x103c, 0x8805, "HP ProBook 650 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x880d, "HP EliteBook 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8847, "HP EliteBook x360 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x884b, "HP EliteBook 840 Aero G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
- SND_PCI_QUIRK(0x103c, 0x8862, "HP ProBook 445 G8 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
- SND_PCI_QUIRK(0x103c, 0x8863, "HP ProBook 445 G8 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8862, "HP ProBook 445 G8 Notebook PC", ALC236_FIXUP_HP_LIMIT_INT_MIC_BOOST),
+ SND_PCI_QUIRK(0x103c, 0x8863, "HP ProBook 445 G8 Notebook PC", ALC236_FIXUP_HP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x103c, 0x886d, "HP ZBook Fury 17.3 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
@@ -8465,6 +8474,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1043, 0x1740, "ASUS UX430UA", ALC295_FIXUP_ASUS_DACS),
SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK),
+ SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK),
SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS),
SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x18f1, "Asus FX505DT", ALC256_FIXUP_ASUS_HEADSET_MIC),
diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index a5c1a2c4eae4..773a136161f1 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -1041,6 +1041,7 @@ static const struct hda_fixup via_fixups[] = {
};
static const struct snd_pci_quirk vt2002p_fixups[] = {
+ SND_PCI_QUIRK(0x1043, 0x13f7, "Asus B23E", VIA_FIXUP_POWER_SAVE),
SND_PCI_QUIRK(0x1043, 0x1487, "Asus G75", VIA_FIXUP_ASUS_G75),
SND_PCI_QUIRK(0x1043, 0x8532, "Asus X202E", VIA_FIXUP_INTMIC_BOOST),
SND_PCI_QUIRK_VENDOR(0x1558, "Clevo", VIA_FIXUP_POWER_SAVE),
diff --git a/sound/soc/Kconfig b/sound/soc/Kconfig
index 8a13462e1a63..5dcf77af07af 100644
--- a/sound/soc/Kconfig
+++ b/sound/soc/Kconfig
@@ -36,6 +36,7 @@ config SND_SOC_COMPRESS
config SND_SOC_TOPOLOGY
bool
+ select SND_DYNAMIC_MINORS
config SND_SOC_TOPOLOGY_KUNIT_TEST
tristate "KUnit tests for SoC topology"
diff --git a/sound/soc/amd/acp-da7219-max98357a.c b/sound/soc/amd/acp-da7219-max98357a.c
index 9449fb40a956..3c60c5f96dcb 100644
--- a/sound/soc/amd/acp-da7219-max98357a.c
+++ b/sound/soc/amd/acp-da7219-max98357a.c
@@ -525,6 +525,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = {
| SND_SOC_DAIFMT_CBM_CFM,
.init = cz_da7219_init,
.dpcm_playback = 1,
+ .stop_dma_first = 1,
.ops = &cz_da7219_play_ops,
SND_SOC_DAILINK_REG(designware1, dlgs, platform),
},
@@ -534,6 +535,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = {
.dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
| SND_SOC_DAIFMT_CBM_CFM,
.dpcm_capture = 1,
+ .stop_dma_first = 1,
.ops = &cz_da7219_cap_ops,
SND_SOC_DAILINK_REG(designware2, dlgs, platform),
},
@@ -543,6 +545,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = {
.dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
| SND_SOC_DAIFMT_CBM_CFM,
.dpcm_playback = 1,
+ .stop_dma_first = 1,
.ops = &cz_max_play_ops,
SND_SOC_DAILINK_REG(designware3, mx, platform),
},
@@ -553,6 +556,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = {
.dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
| SND_SOC_DAIFMT_CBM_CFM,
.dpcm_capture = 1,
+ .stop_dma_first = 1,
.ops = &cz_dmic0_cap_ops,
SND_SOC_DAILINK_REG(designware3, adau, platform),
},
@@ -563,6 +567,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = {
.dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
| SND_SOC_DAIFMT_CBM_CFM,
.dpcm_capture = 1,
+ .stop_dma_first = 1,
.ops = &cz_dmic1_cap_ops,
SND_SOC_DAILINK_REG(designware2, adau, platform),
},
diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c
index 143155a840ac..cc1ce6f22caa 100644
--- a/sound/soc/amd/acp-pcm-dma.c
+++ b/sound/soc/amd/acp-pcm-dma.c
@@ -969,7 +969,7 @@ static int acp_dma_hw_params(struct snd_soc_component *component,
acp_set_sram_bank_state(rtd->acp_mmio, 0, true);
/* Save for runtime private data */
- rtd->dma_addr = substream->dma_buffer.addr;
+ rtd->dma_addr = runtime->dma_addr;
rtd->order = get_order(size);
/* Fill the page table entries in ACP SRAM */
diff --git a/sound/soc/amd/raven/acp3x-pcm-dma.c b/sound/soc/amd/raven/acp3x-pcm-dma.c
index 8148b0d22e88..597d7c4b2a6b 100644
--- a/sound/soc/amd/raven/acp3x-pcm-dma.c
+++ b/sound/soc/amd/raven/acp3x-pcm-dma.c
@@ -286,7 +286,7 @@ static int acp3x_dma_hw_params(struct snd_soc_component *component,
pr_err("pinfo failed\n");
}
size = params_buffer_bytes(params);
- rtd->dma_addr = substream->dma_buffer.addr;
+ rtd->dma_addr = substream->runtime->dma_addr;
rtd->num_pages = (PAGE_ALIGN(size) >> PAGE_SHIFT);
config_acp3x_dma(rtd, substream->stream);
return 0;
diff --git a/sound/soc/amd/renoir/acp3x-pdm-dma.c b/sound/soc/amd/renoir/acp3x-pdm-dma.c
index bd20622b0933..0391c28dd078 100644
--- a/sound/soc/amd/renoir/acp3x-pdm-dma.c
+++ b/sound/soc/amd/renoir/acp3x-pdm-dma.c
@@ -242,7 +242,7 @@ static int acp_pdm_dma_hw_params(struct snd_soc_component *component,
return -EINVAL;
size = params_buffer_bytes(params);
period_bytes = params_period_bytes(params);
- rtd->dma_addr = substream->dma_buffer.addr;
+ rtd->dma_addr = substream->runtime->dma_addr;
rtd->num_pages = (PAGE_ALIGN(size) >> PAGE_SHIFT);
config_acp_dma(rtd, substream->stream);
init_pdm_ring_buffer(MEM_WINDOW_START, size, period_bytes,
diff --git a/sound/soc/amd/renoir/rn-pci-acp3x.c b/sound/soc/amd/renoir/rn-pci-acp3x.c
index 19438da5dfa5..7b8040e812a1 100644
--- a/sound/soc/amd/renoir/rn-pci-acp3x.c
+++ b/sound/soc/amd/renoir/rn-pci-acp3x.c
@@ -382,6 +382,8 @@ static const struct dev_pm_ops rn_acp_pm = {
.runtime_resume = snd_rn_acp_resume,
.suspend = snd_rn_acp_suspend,
.resume = snd_rn_acp_resume,
+ .restore = snd_rn_acp_resume,
+ .poweroff = snd_rn_acp_suspend,
};
static void snd_rn_acp_remove(struct pci_dev *pci)
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index a3b784ed4f70..9ff1600ca823 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -187,6 +187,7 @@ config SND_SOC_ALL_CODECS
imply SND_SOC_RT715_SDCA_SDW
imply SND_SOC_RT1308_SDW
imply SND_SOC_RT1316_SDW
+ imply SND_SOC_SDW_MOCKUP
imply SND_SOC_SGTL5000
imply SND_SOC_SI476X
imply SND_SOC_SIMPLE_AMPLIFIER
@@ -1287,6 +1288,23 @@ config SND_SOC_RT715_SDCA_SDW
select REGMAP_SOUNDWIRE
select REGMAP_SOUNDWIRE_MBQ
+config SND_SOC_SDW_MOCKUP
+ tristate "SoundWire mockup codec"
+ depends on EXPERT
+ depends on SOUNDWIRE
+ help
+ This option enables a SoundWire mockup codec that does not drive the
+ bus, take part in the command/command protocol or generate data on a
+ Source port.
+ This option is only intended to be used for tests on a device
+ with a connector, in combination with a bus analyzer, or to test new
+ topologies that differ from the actual hardware layout.
+ This mockup device could be totally virtual but could also be a
+ real physical one with one key restriction: it is not allowed by the
+ SoundWire specification to be configured via a sideband mechanism and
+ generate audio data for capture. However, nothing prevents such a
+ peripheral device from snooping the bus.
+
#Freescale sgtl5000 codec
config SND_SOC_SGTL5000
tristate "Freescale SGTL5000 CODEC"
@@ -1559,6 +1577,7 @@ config SND_SOC_WCD934X
config SND_SOC_WCD938X
depends on SND_SOC_WCD938X_SDW
tristate
+ depends on SOUNDWIRE || !SOUNDWIRE
config SND_SOC_WCD938X_SDW
tristate "WCD9380/WCD9385 Codec - SDW"
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index de8b83dd2c76..8dcea2c4604a 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -203,6 +203,7 @@ snd-soc-rt711-objs := rt711.o rt711-sdw.o
snd-soc-rt711-sdca-objs := rt711-sdca.o rt711-sdca-sdw.o
snd-soc-rt715-objs := rt715.o rt715-sdw.o
snd-soc-rt715-sdca-objs := rt715-sdca.o rt715-sdca-sdw.o
+snd-soc-sdw-mockup-objs := sdw-mockup.o
snd-soc-sgtl5000-objs := sgtl5000.o
snd-soc-alc5623-objs := alc5623.o
snd-soc-alc5632-objs := alc5632.o
@@ -530,6 +531,7 @@ obj-$(CONFIG_SND_SOC_RT711) += snd-soc-rt711.o
obj-$(CONFIG_SND_SOC_RT711_SDCA_SDW) += snd-soc-rt711-sdca.o
obj-$(CONFIG_SND_SOC_RT715) += snd-soc-rt715.o
obj-$(CONFIG_SND_SOC_RT715_SDCA_SDW) += snd-soc-rt715-sdca.o
+obj-$(CONFIG_SND_SOC_SDW_MOCKUP) += snd-soc-sdw-mockup.o
obj-$(CONFIG_SND_SOC_SGTL5000) += snd-soc-sgtl5000.o
obj-$(CONFIG_SND_SOC_SIGMADSP) += snd-soc-sigmadsp.o
obj-$(CONFIG_SND_SOC_SIGMADSP_I2C) += snd-soc-sigmadsp-i2c.o
@@ -583,7 +585,10 @@ obj-$(CONFIG_SND_SOC_WCD_MBHC) += snd-soc-wcd-mbhc.o
obj-$(CONFIG_SND_SOC_WCD9335) += snd-soc-wcd9335.o
obj-$(CONFIG_SND_SOC_WCD934X) += snd-soc-wcd934x.o
obj-$(CONFIG_SND_SOC_WCD938X) += snd-soc-wcd938x.o
-obj-$(CONFIG_SND_SOC_WCD938X_SDW) += snd-soc-wcd938x-sdw.o
+ifdef CONFIG_SND_SOC_WCD938X_SDW
+# avoid link failure by forcing sdw code built-in when needed
+obj-$(CONFIG_SND_SOC_WCD938X) += snd-soc-wcd938x-sdw.o
+endif
obj-$(CONFIG_SND_SOC_WL1273) += snd-soc-wl1273.o
obj-$(CONFIG_SND_SOC_WM0010) += snd-soc-wm0010.o
obj-$(CONFIG_SND_SOC_WM1250_EV1) += snd-soc-wm1250-ev1.o
diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c
index eff013f295be..99c022be94a6 100644
--- a/sound/soc/codecs/cs42l42.c
+++ b/sound/soc/codecs/cs42l42.c
@@ -405,7 +405,7 @@ static const struct regmap_config cs42l42_regmap = {
.use_single_write = true,
};
-static DECLARE_TLV_DB_SCALE(adc_tlv, -9600, 100, false);
+static DECLARE_TLV_DB_SCALE(adc_tlv, -9700, 100, true);
static DECLARE_TLV_DB_SCALE(mixer_tlv, -6300, 100, true);
static const char * const cs42l42_hpf_freq_text[] = {
@@ -425,34 +425,23 @@ static SOC_ENUM_SINGLE_DECL(cs42l42_wnf3_freq_enum, CS42L42_ADC_WNF_HPF_CTL,
CS42L42_ADC_WNF_CF_SHIFT,
cs42l42_wnf3_freq_text);
-static const char * const cs42l42_wnf05_freq_text[] = {
- "280Hz", "315Hz", "350Hz", "385Hz",
- "420Hz", "455Hz", "490Hz", "525Hz"
-};
-
-static SOC_ENUM_SINGLE_DECL(cs42l42_wnf05_freq_enum, CS42L42_ADC_WNF_HPF_CTL,
- CS42L42_ADC_WNF_CF_SHIFT,
- cs42l42_wnf05_freq_text);
-
static const struct snd_kcontrol_new cs42l42_snd_controls[] = {
/* ADC Volume and Filter Controls */
SOC_SINGLE("ADC Notch Switch", CS42L42_ADC_CTL,
- CS42L42_ADC_NOTCH_DIS_SHIFT, true, false),
+ CS42L42_ADC_NOTCH_DIS_SHIFT, true, true),
SOC_SINGLE("ADC Weak Force Switch", CS42L42_ADC_CTL,
CS42L42_ADC_FORCE_WEAK_VCM_SHIFT, true, false),
SOC_SINGLE("ADC Invert Switch", CS42L42_ADC_CTL,
CS42L42_ADC_INV_SHIFT, true, false),
SOC_SINGLE("ADC Boost Switch", CS42L42_ADC_CTL,
CS42L42_ADC_DIG_BOOST_SHIFT, true, false),
- SOC_SINGLE_SX_TLV("ADC Volume", CS42L42_ADC_VOLUME,
- CS42L42_ADC_VOL_SHIFT, 0xA0, 0x6C, adc_tlv),
+ SOC_SINGLE_S8_TLV("ADC Volume", CS42L42_ADC_VOLUME, -97, 12, adc_tlv),
SOC_SINGLE("ADC WNF Switch", CS42L42_ADC_WNF_HPF_CTL,
CS42L42_ADC_WNF_EN_SHIFT, true, false),
SOC_SINGLE("ADC HPF Switch", CS42L42_ADC_WNF_HPF_CTL,
CS42L42_ADC_HPF_EN_SHIFT, true, false),
SOC_ENUM("HPF Corner Freq", cs42l42_hpf_freq_enum),
SOC_ENUM("WNF 3dB Freq", cs42l42_wnf3_freq_enum),
- SOC_ENUM("WNF 05dB Freq", cs42l42_wnf05_freq_enum),
/* DAC Volume and Filter Controls */
SOC_SINGLE("DACA Invert Switch", CS42L42_DAC_CTL1,
@@ -471,8 +460,8 @@ static const struct snd_soc_dapm_widget cs42l42_dapm_widgets[] = {
SND_SOC_DAPM_OUTPUT("HP"),
SND_SOC_DAPM_DAC("DAC", NULL, CS42L42_PWR_CTL1, CS42L42_HP_PDN_SHIFT, 1),
SND_SOC_DAPM_MIXER("MIXER", CS42L42_PWR_CTL1, CS42L42_MIXER_PDN_SHIFT, 1, NULL, 0),
- SND_SOC_DAPM_AIF_IN("SDIN1", NULL, 0, CS42L42_ASP_RX_DAI0_EN, CS42L42_ASP_RX0_CH1_SHIFT, 0),
- SND_SOC_DAPM_AIF_IN("SDIN2", NULL, 1, CS42L42_ASP_RX_DAI0_EN, CS42L42_ASP_RX0_CH2_SHIFT, 0),
+ SND_SOC_DAPM_AIF_IN("SDIN1", NULL, 0, SND_SOC_NOPM, 0, 0),
+ SND_SOC_DAPM_AIF_IN("SDIN2", NULL, 1, SND_SOC_NOPM, 0, 0),
/* Playback Requirements */
SND_SOC_DAPM_SUPPLY("ASP DAI0", CS42L42_PWR_CTL1, CS42L42_ASP_DAI_PDN_SHIFT, 1, NULL, 0),
@@ -630,6 +619,8 @@ static int cs42l42_pll_config(struct snd_soc_component *component)
for (i = 0; i < ARRAY_SIZE(pll_ratio_table); i++) {
if (pll_ratio_table[i].sclk == clk) {
+ cs42l42->pll_config = i;
+
/* Configure the internal sample rate */
snd_soc_component_update_bits(component, CS42L42_MCLK_CTL,
CS42L42_INTERNAL_FS_MASK,
@@ -638,14 +629,9 @@ static int cs42l42_pll_config(struct snd_soc_component *component)
(pll_ratio_table[i].mclk_int !=
24000000)) <<
CS42L42_INTERNAL_FS_SHIFT);
- /* Set the MCLK src (PLL or SCLK) and the divide
- * ratio
- */
+
snd_soc_component_update_bits(component, CS42L42_MCLK_SRC_SEL,
- CS42L42_MCLK_SRC_SEL_MASK |
CS42L42_MCLKDIV_MASK,
- (pll_ratio_table[i].mclk_src_sel
- << CS42L42_MCLK_SRC_SEL_SHIFT) |
(pll_ratio_table[i].mclk_div <<
CS42L42_MCLKDIV_SHIFT));
/* Set up the LRCLK */
@@ -681,15 +667,6 @@ static int cs42l42_pll_config(struct snd_soc_component *component)
CS42L42_FSYNC_PULSE_WIDTH_MASK,
CS42L42_FRAC1_VAL(fsync - 1) <<
CS42L42_FSYNC_PULSE_WIDTH_SHIFT);
- snd_soc_component_update_bits(component,
- CS42L42_ASP_FRM_CFG,
- CS42L42_ASP_5050_MASK,
- CS42L42_ASP_5050_MASK);
- /* Set the frame delay to 1.0 SCLK clocks */
- snd_soc_component_update_bits(component, CS42L42_ASP_FRM_CFG,
- CS42L42_ASP_FSD_MASK,
- CS42L42_ASP_FSD_1_0 <<
- CS42L42_ASP_FSD_SHIFT);
/* Set the sample rates (96k or lower) */
snd_soc_component_update_bits(component, CS42L42_FS_RATE_EN,
CS42L42_FS_EN_MASK,
@@ -789,7 +766,18 @@ static int cs42l42_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
/* interface format */
switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
case SND_SOC_DAIFMT_I2S:
- case SND_SOC_DAIFMT_LEFT_J:
+ /*
+ * 5050 mode, frame starts on falling edge of LRCLK,
+ * frame delayed by 1.0 SCLKs
+ */
+ snd_soc_component_update_bits(component,
+ CS42L42_ASP_FRM_CFG,
+ CS42L42_ASP_STP_MASK |
+ CS42L42_ASP_5050_MASK |
+ CS42L42_ASP_FSD_MASK,
+ CS42L42_ASP_5050_MASK |
+ (CS42L42_ASP_FSD_1_0 <<
+ CS42L42_ASP_FSD_SHIFT));
break;
default:
return -EINVAL;
@@ -819,6 +807,25 @@ static int cs42l42_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
return 0;
}
+static int cs42l42_dai_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
+{
+ struct snd_soc_component *component = dai->component;
+ struct cs42l42_private *cs42l42 = snd_soc_component_get_drvdata(component);
+
+ /*
+ * Sample rates < 44.1 kHz would produce an out-of-range SCLK with
+ * a standard I2S frame. If the machine driver sets SCLK it must be
+ * legal.
+ */
+ if (cs42l42->sclk)
+ return 0;
+
+ /* Machine driver has not set a SCLK, limit bottom end to 44.1 kHz */
+ return snd_pcm_hw_constraint_minmax(substream->runtime,
+ SNDRV_PCM_HW_PARAM_RATE,
+ 44100, 192000);
+}
+
static int cs42l42_pcm_hw_params(struct snd_pcm_substream *substream,
struct snd_pcm_hw_params *params,
struct snd_soc_dai *dai)
@@ -832,6 +839,10 @@ static int cs42l42_pcm_hw_params(struct snd_pcm_substream *substream,
cs42l42->srate = params_rate(params);
cs42l42->bclk = snd_soc_params_to_bclk(params);
+ /* I2S frame always has 2 channels even for mono audio */
+ if (channels == 1)
+ cs42l42->bclk *= 2;
+
switch(substream->stream) {
case SNDRV_PCM_STREAM_CAPTURE:
if (channels == 2) {
@@ -855,6 +866,17 @@ static int cs42l42_pcm_hw_params(struct snd_pcm_substream *substream,
snd_soc_component_update_bits(component, CS42L42_ASP_RX_DAI0_CH2_AP_RES,
CS42L42_ASP_RX_CH_AP_MASK |
CS42L42_ASP_RX_CH_RES_MASK, val);
+
+ /* Channel B comes from the last active channel */
+ snd_soc_component_update_bits(component, CS42L42_SP_RX_CH_SEL,
+ CS42L42_SP_RX_CHB_SEL_MASK,
+ (channels - 1) << CS42L42_SP_RX_CHB_SEL_SHIFT);
+
+ /* Both LRCLK slots must be enabled */
+ snd_soc_component_update_bits(component, CS42L42_ASP_RX_DAI0_EN,
+ CS42L42_ASP_RX0_CH_EN_MASK,
+ BIT(CS42L42_ASP_RX0_CH1_SHIFT) |
+ BIT(CS42L42_ASP_RX0_CH2_SHIFT));
break;
default:
break;
@@ -900,13 +922,21 @@ static int cs42l42_mute_stream(struct snd_soc_dai *dai, int mute, int stream)
*/
regmap_multi_reg_write(cs42l42->regmap, cs42l42_to_osc_seq,
ARRAY_SIZE(cs42l42_to_osc_seq));
+
+ /* Must disconnect PLL before stopping it */
+ snd_soc_component_update_bits(component,
+ CS42L42_MCLK_SRC_SEL,
+ CS42L42_MCLK_SRC_SEL_MASK,
+ 0);
+ usleep_range(100, 200);
+
snd_soc_component_update_bits(component, CS42L42_PLL_CTL1,
CS42L42_PLL_START_MASK, 0);
}
} else {
if (!cs42l42->stream_use) {
/* SCLK must be running before codec unmute */
- if ((cs42l42->bclk < 11289600) && (cs42l42->sclk < 11289600)) {
+ if (pll_ratio_table[cs42l42->pll_config].mclk_src_sel) {
snd_soc_component_update_bits(component, CS42L42_PLL_CTL1,
CS42L42_PLL_START_MASK, 1);
@@ -927,6 +957,12 @@ static int cs42l42_mute_stream(struct snd_soc_dai *dai, int mute, int stream)
CS42L42_PLL_LOCK_TIMEOUT_US);
if (ret < 0)
dev_warn(component->dev, "PLL failed to lock: %d\n", ret);
+
+ /* PLL must be running to drive glitchless switch logic */
+ snd_soc_component_update_bits(component,
+ CS42L42_MCLK_SRC_SEL,
+ CS42L42_MCLK_SRC_SEL_MASK,
+ CS42L42_MCLK_SRC_SEL_MASK);
}
/* Mark SCLK as present, turn off internal oscillator */
@@ -960,8 +996,8 @@ static int cs42l42_mute_stream(struct snd_soc_dai *dai, int mute, int stream)
SNDRV_PCM_FMTBIT_S24_LE |\
SNDRV_PCM_FMTBIT_S32_LE )
-
static const struct snd_soc_dai_ops cs42l42_ops = {
+ .startup = cs42l42_dai_startup,
.hw_params = cs42l42_pcm_hw_params,
.set_fmt = cs42l42_set_dai_fmt,
.set_sysclk = cs42l42_set_sysclk,
diff --git a/sound/soc/codecs/cs42l42.h b/sound/soc/codecs/cs42l42.h
index 206b3c81d3e0..8734f6828f3e 100644
--- a/sound/soc/codecs/cs42l42.h
+++ b/sound/soc/codecs/cs42l42.h
@@ -653,6 +653,8 @@
/* Page 0x25 Audio Port Registers */
#define CS42L42_SP_RX_CH_SEL (CS42L42_PAGE_25 + 0x01)
+#define CS42L42_SP_RX_CHB_SEL_SHIFT 2
+#define CS42L42_SP_RX_CHB_SEL_MASK (3 << CS42L42_SP_RX_CHB_SEL_SHIFT)
#define CS42L42_SP_RX_ISOC_CTL (CS42L42_PAGE_25 + 0x02)
#define CS42L42_SP_RX_RSYNC_SHIFT 6
@@ -775,6 +777,7 @@ struct cs42l42_private {
struct gpio_desc *reset_gpio;
struct completion pdn_done;
struct snd_soc_jack *jack;
+ int pll_config;
int bclk;
u32 sclk;
u32 srate;
diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c
index 15bd8335f667..db88be48c998 100644
--- a/sound/soc/codecs/nau8824.c
+++ b/sound/soc/codecs/nau8824.c
@@ -828,36 +828,6 @@ static void nau8824_int_status_clear_all(struct regmap *regmap)
}
}
-static void nau8824_dapm_disable_pin(struct nau8824 *nau8824, const char *pin)
-{
- struct snd_soc_dapm_context *dapm = nau8824->dapm;
- const char *prefix = dapm->component->name_prefix;
- char prefixed_pin[80];
-
- if (prefix) {
- snprintf(prefixed_pin, sizeof(prefixed_pin), "%s %s",
- prefix, pin);
- snd_soc_dapm_disable_pin(dapm, prefixed_pin);
- } else {
- snd_soc_dapm_disable_pin(dapm, pin);
- }
-}
-
-static void nau8824_dapm_enable_pin(struct nau8824 *nau8824, const char *pin)
-{
- struct snd_soc_dapm_context *dapm = nau8824->dapm;
- const char *prefix = dapm->component->name_prefix;
- char prefixed_pin[80];
-
- if (prefix) {
- snprintf(prefixed_pin, sizeof(prefixed_pin), "%s %s",
- prefix, pin);
- snd_soc_dapm_force_enable_pin(dapm, prefixed_pin);
- } else {
- snd_soc_dapm_force_enable_pin(dapm, pin);
- }
-}
-
static void nau8824_eject_jack(struct nau8824 *nau8824)
{
struct snd_soc_dapm_context *dapm = nau8824->dapm;
@@ -866,8 +836,8 @@ static void nau8824_eject_jack(struct nau8824 *nau8824)
/* Clear all interruption status */
nau8824_int_status_clear_all(regmap);
- nau8824_dapm_disable_pin(nau8824, "SAR");
- nau8824_dapm_disable_pin(nau8824, "MICBIAS");
+ snd_soc_dapm_disable_pin(dapm, "SAR");
+ snd_soc_dapm_disable_pin(dapm, "MICBIAS");
snd_soc_dapm_sync(dapm);
/* Enable the insertion interruption, disable the ejection
@@ -897,8 +867,8 @@ static void nau8824_jdet_work(struct work_struct *work)
struct regmap *regmap = nau8824->regmap;
int adc_value, event = 0, event_mask = 0;
- nau8824_dapm_enable_pin(nau8824, "MICBIAS");
- nau8824_dapm_enable_pin(nau8824, "SAR");
+ snd_soc_dapm_enable_pin(dapm, "MICBIAS");
+ snd_soc_dapm_enable_pin(dapm, "SAR");
snd_soc_dapm_sync(dapm);
msleep(100);
@@ -909,8 +879,8 @@ static void nau8824_jdet_work(struct work_struct *work)
if (adc_value < HEADSET_SARADC_THD) {
event |= SND_JACK_HEADPHONE;
- nau8824_dapm_disable_pin(nau8824, "SAR");
- nau8824_dapm_disable_pin(nau8824, "MICBIAS");
+ snd_soc_dapm_disable_pin(dapm, "SAR");
+ snd_soc_dapm_disable_pin(dapm, "MICBIAS");
snd_soc_dapm_sync(dapm);
} else {
event |= SND_JACK_HEADSET;
diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c
index abcd6f483788..51ecaa2abcd1 100644
--- a/sound/soc/codecs/rt5682.c
+++ b/sound/soc/codecs/rt5682.c
@@ -44,6 +44,7 @@ static const struct reg_sequence patch_list[] = {
{RT5682_I2C_CTRL, 0x000f},
{RT5682_PLL2_INTERNAL, 0x8266},
{RT5682_SAR_IL_CMD_3, 0x8365},
+ {RT5682_SAR_IL_CMD_6, 0x0180},
};
void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev)
diff --git a/sound/soc/codecs/sdw-mockup.c b/sound/soc/codecs/sdw-mockup.c
new file mode 100644
index 000000000000..8ea13cfa9f8e
--- /dev/null
+++ b/sound/soc/codecs/sdw-mockup.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// sdw-mockup.c -- a mockup SoundWire codec for tests where only the host
+// drives the bus.
+//
+// Copyright(c) 2021 Intel Corporation
+//
+//
+
+#include <linux/device.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/soundwire/sdw.h>
+#include <linux/soundwire/sdw_type.h>
+#include <linux/soundwire/sdw_registers.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+
+struct sdw_mockup_priv {
+ struct sdw_slave *slave;
+};
+
+struct sdw_stream_data {
+ struct sdw_stream_runtime *sdw_stream;
+};
+
+static int sdw_mockup_component_probe(struct snd_soc_component *component)
+{
+ return 0;
+}
+
+static void sdw_mockup_component_remove(struct snd_soc_component *component)
+{
+}
+
+static const struct snd_soc_component_driver snd_soc_sdw_mockup_component = {
+ .probe = sdw_mockup_component_probe,
+ .remove = sdw_mockup_component_remove,
+};
+
+static int sdw_mockup_set_sdw_stream(struct snd_soc_dai *dai, void *sdw_stream,
+ int direction)
+{
+ struct sdw_stream_data *stream;
+
+ if (!sdw_stream)
+ return 0;
+
+ stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+ if (!stream)
+ return -ENOMEM;
+
+ stream->sdw_stream = sdw_stream;
+
+ /* Use tx_mask or rx_mask to configure stream tag and set dma_data */
+ if (direction == SNDRV_PCM_STREAM_PLAYBACK)
+ dai->playback_dma_data = stream;
+ else
+ dai->capture_dma_data = stream;
+
+ return 0;
+}
+
+static void sdw_mockup_shutdown(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai)
+{
+ struct sdw_stream_data *stream;
+
+ stream = snd_soc_dai_get_dma_data(dai, substream);
+ snd_soc_dai_set_dma_data(dai, substream, NULL);
+ kfree(stream);
+}
+
+static int sdw_mockup_pcm_hw_params(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *params,
+ struct snd_soc_dai *dai)
+{
+ struct snd_soc_component *component = dai->component;
+ struct sdw_mockup_priv *sdw_mockup = snd_soc_component_get_drvdata(component);
+ struct sdw_stream_config stream_config;
+ struct sdw_port_config port_config;
+ enum sdw_data_direction direction;
+ struct sdw_stream_data *stream;
+ int num_channels;
+ int port;
+ int ret;
+
+ stream = snd_soc_dai_get_dma_data(dai, substream);
+ if (!stream)
+ return -EINVAL;
+
+ if (!sdw_mockup->slave)
+ return -EINVAL;
+
+ /* SoundWire specific configuration */
+ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+ direction = SDW_DATA_DIR_RX;
+ port = 1;
+ } else {
+ direction = SDW_DATA_DIR_TX;
+ port = 8;
+ }
+
+ stream_config.frame_rate = params_rate(params);
+ stream_config.ch_count = params_channels(params);
+ stream_config.bps = snd_pcm_format_width(params_format(params));
+ stream_config.direction = direction;
+
+ num_channels = params_channels(params);
+ port_config.ch_mask = (1 << num_channels) - 1;
+ port_config.num = port;
+
+ ret = sdw_stream_add_slave(sdw_mockup->slave, &stream_config,
+ &port_config, 1, stream->sdw_stream);
+ if (ret)
+ dev_err(dai->dev, "Unable to configure port\n");
+
+ return ret;
+}
+
+static int sdw_mockup_pcm_hw_free(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai)
+{
+ struct snd_soc_component *component = dai->component;
+ struct sdw_mockup_priv *sdw_mockup = snd_soc_component_get_drvdata(component);
+ struct sdw_stream_data *stream =
+ snd_soc_dai_get_dma_data(dai, substream);
+
+ if (!sdw_mockup->slave)
+ return -EINVAL;
+
+ sdw_stream_remove_slave(sdw_mockup->slave, stream->sdw_stream);
+ return 0;
+}
+
+static const struct snd_soc_dai_ops sdw_mockup_ops = {
+ .hw_params = sdw_mockup_pcm_hw_params,
+ .hw_free = sdw_mockup_pcm_hw_free,
+ .set_sdw_stream = sdw_mockup_set_sdw_stream,
+ .shutdown = sdw_mockup_shutdown,
+};
+
+static struct snd_soc_dai_driver sdw_mockup_dai[] = {
+ {
+ .name = "sdw-mockup-aif1",
+ .id = 1,
+ .playback = {
+ .stream_name = "DP1 Playback",
+ .channels_min = 1,
+ .channels_max = 2,
+ },
+ .capture = {
+ .stream_name = "DP8 Capture",
+ .channels_min = 1,
+ .channels_max = 2,
+ },
+ .ops = &sdw_mockup_ops,
+ },
+};
+
+static int sdw_mockup_update_status(struct sdw_slave *slave,
+ enum sdw_slave_status status)
+{
+ return 0;
+}
+
+static int sdw_mockup_read_prop(struct sdw_slave *slave)
+{
+ struct sdw_slave_prop *prop = &slave->prop;
+ int nval;
+ int i, j;
+ u32 bit;
+ unsigned long addr;
+ struct sdw_dpn_prop *dpn;
+
+ prop->paging_support = false;
+
+ /*
+ * first we need to allocate memory for set bits in port lists
+ * the port allocation is completely arbitrary:
+ * DP0 is not supported
+ * DP1 is sink
+ * DP8 is source
+ */
+ prop->source_ports = BIT(8);
+ prop->sink_ports = BIT(1);
+
+ nval = hweight32(prop->source_ports);
+ prop->src_dpn_prop = devm_kcalloc(&slave->dev, nval,
+ sizeof(*prop->src_dpn_prop),
+ GFP_KERNEL);
+ if (!prop->src_dpn_prop)
+ return -ENOMEM;
+
+ i = 0;
+ dpn = prop->src_dpn_prop;
+ addr = prop->source_ports;
+ for_each_set_bit(bit, &addr, 32) {
+ dpn[i].num = bit;
+ dpn[i].type = SDW_DPN_FULL;
+ dpn[i].simple_ch_prep_sm = true;
+ i++;
+ }
+
+ /* do this again for sink now */
+ nval = hweight32(prop->sink_ports);
+ prop->sink_dpn_prop = devm_kcalloc(&slave->dev, nval,
+ sizeof(*prop->sink_dpn_prop),
+ GFP_KERNEL);
+ if (!prop->sink_dpn_prop)
+ return -ENOMEM;
+
+ j = 0;
+ dpn = prop->sink_dpn_prop;
+ addr = prop->sink_ports;
+ for_each_set_bit(bit, &addr, 32) {
+ dpn[j].num = bit;
+ dpn[j].type = SDW_DPN_FULL;
+ dpn[j].simple_ch_prep_sm = true;
+ j++;
+ }
+
+ prop->simple_clk_stop_capable = true;
+
+ /* wake-up event */
+ prop->wake_capable = 0;
+
+ return 0;
+}
+
+static int sdw_mockup_bus_config(struct sdw_slave *slave,
+ struct sdw_bus_params *params)
+{
+ return 0;
+}
+
+static int sdw_mockup_interrupt_callback(struct sdw_slave *slave,
+ struct sdw_slave_intr_status *status)
+{
+ return 0;
+}
+
+static const struct sdw_slave_ops sdw_mockup_slave_ops = {
+ .read_prop = sdw_mockup_read_prop,
+ .interrupt_callback = sdw_mockup_interrupt_callback,
+ .update_status = sdw_mockup_update_status,
+ .bus_config = sdw_mockup_bus_config,
+};
+
+static int sdw_mockup_sdw_probe(struct sdw_slave *slave,
+ const struct sdw_device_id *id)
+{
+ struct device *dev = &slave->dev;
+ struct sdw_mockup_priv *sdw_mockup;
+ int ret;
+
+ sdw_mockup = devm_kzalloc(dev, sizeof(*sdw_mockup), GFP_KERNEL);
+ if (!sdw_mockup)
+ return -ENOMEM;
+
+ dev_set_drvdata(dev, sdw_mockup);
+ sdw_mockup->slave = slave;
+
+ slave->is_mockup_device = true;
+
+ ret = devm_snd_soc_register_component(dev,
+ &snd_soc_sdw_mockup_component,
+ sdw_mockup_dai,
+ ARRAY_SIZE(sdw_mockup_dai));
+
+ return ret;
+}
+
+static int sdw_mockup_sdw_remove(struct sdw_slave *slave)
+{
+ return 0;
+}
+
+/*
+ * Intel reserved parts ID with the following mapping expected:
+ * 0xAAAA: generic full-duplex codec
+ * 0xAA55: headset codec (mock-up of RT711/RT5682) - full-duplex
+ * 0x55AA: amplifier (mock-up of RT1308/Maxim 98373) - playback only with
+ * IV feedback
+ * 0x5555: mic codec (mock-up of RT715) - capture-only
+ */
+static const struct sdw_device_id sdw_mockup_id[] = {
+ SDW_SLAVE_ENTRY_EXT(0x0105, 0xAAAA, 0x0, 0, 0),
+ SDW_SLAVE_ENTRY_EXT(0x0105, 0xAA55, 0x0, 0, 0),
+ SDW_SLAVE_ENTRY_EXT(0x0105, 0x55AA, 0x0, 0, 0),
+ SDW_SLAVE_ENTRY_EXT(0x0105, 0x5555, 0x0, 0, 0),
+ {},
+};
+MODULE_DEVICE_TABLE(sdw, sdw_mockup_id);
+
+static struct sdw_driver sdw_mockup_sdw_driver = {
+ .driver = {
+ .name = "sdw-mockup",
+ .owner = THIS_MODULE,
+ },
+ .probe = sdw_mockup_sdw_probe,
+ .remove = sdw_mockup_sdw_remove,
+ .ops = &sdw_mockup_slave_ops,
+ .id_table = sdw_mockup_id,
+};
+module_sdw_driver(sdw_mockup_sdw_driver);
+
+MODULE_DESCRIPTION("ASoC SDW mockup codec driver");
+MODULE_AUTHOR("Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c
index b504d63385b3..52d2c968b5c0 100644
--- a/sound/soc/codecs/tlv320aic31xx.c
+++ b/sound/soc/codecs/tlv320aic31xx.c
@@ -35,6 +35,9 @@
#include "tlv320aic31xx.h"
+static int aic31xx_set_jack(struct snd_soc_component *component,
+ struct snd_soc_jack *jack, void *data);
+
static const struct reg_default aic31xx_reg_defaults[] = {
{ AIC31XX_CLKMUX, 0x00 },
{ AIC31XX_PLLPR, 0x11 },
@@ -1256,6 +1259,13 @@ static int aic31xx_power_on(struct snd_soc_component *component)
return ret;
}
+ /*
+ * The jack detection configuration is in the same register
+ * that is used to report jack detect status so is volatile
+ * and not covered by the cache sync, restore it separately.
+ */
+ aic31xx_set_jack(component, aic31xx->jack, NULL);
+
return 0;
}
diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c
index dcd8aeb45cb3..2e9175b37dc9 100644
--- a/sound/soc/codecs/tlv320aic32x4.c
+++ b/sound/soc/codecs/tlv320aic32x4.c
@@ -682,11 +682,20 @@ static int aic32x4_set_dosr(struct snd_soc_component *component, u16 dosr)
static int aic32x4_set_processing_blocks(struct snd_soc_component *component,
u8 r_block, u8 p_block)
{
- if (r_block > 18 || p_block > 25)
- return -EINVAL;
+ struct aic32x4_priv *aic32x4 = snd_soc_component_get_drvdata(component);
+
+ if (aic32x4->type == AIC32X4_TYPE_TAS2505) {
+ if (r_block || p_block > 3)
+ return -EINVAL;
- snd_soc_component_write(component, AIC32X4_ADCSPB, r_block);
- snd_soc_component_write(component, AIC32X4_DACSPB, p_block);
+ snd_soc_component_write(component, AIC32X4_DACSPB, p_block);
+ } else { /* AIC32x4 */
+ if (r_block > 18 || p_block > 25)
+ return -EINVAL;
+
+ snd_soc_component_write(component, AIC32X4_ADCSPB, r_block);
+ snd_soc_component_write(component, AIC32X4_DACSPB, p_block);
+ }
return 0;
}
@@ -695,6 +704,7 @@ static int aic32x4_setup_clocks(struct snd_soc_component *component,
unsigned int sample_rate, unsigned int channels,
unsigned int bit_depth)
{
+ struct aic32x4_priv *aic32x4 = snd_soc_component_get_drvdata(component);
u8 aosr;
u16 dosr;
u8 adc_resource_class, dac_resource_class;
@@ -721,19 +731,28 @@ static int aic32x4_setup_clocks(struct snd_soc_component *component,
adc_resource_class = 6;
dac_resource_class = 8;
dosr_increment = 8;
- aic32x4_set_processing_blocks(component, 1, 1);
+ if (aic32x4->type == AIC32X4_TYPE_TAS2505)
+ aic32x4_set_processing_blocks(component, 0, 1);
+ else
+ aic32x4_set_processing_blocks(component, 1, 1);
} else if (sample_rate <= 96000) {
aosr = 64;
adc_resource_class = 6;
dac_resource_class = 8;
dosr_increment = 4;
- aic32x4_set_processing_blocks(component, 1, 9);
+ if (aic32x4->type == AIC32X4_TYPE_TAS2505)
+ aic32x4_set_processing_blocks(component, 0, 1);
+ else
+ aic32x4_set_processing_blocks(component, 1, 9);
} else if (sample_rate == 192000) {
aosr = 32;
adc_resource_class = 3;
dac_resource_class = 4;
dosr_increment = 2;
- aic32x4_set_processing_blocks(component, 13, 19);
+ if (aic32x4->type == AIC32X4_TYPE_TAS2505)
+ aic32x4_set_processing_blocks(component, 0, 1);
+ else
+ aic32x4_set_processing_blocks(component, 13, 19);
} else {
dev_err(component->dev, "Sampling rate not supported\n");
return -EINVAL;
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 549d98241dae..fe15cbc7bcaf 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -747,7 +747,6 @@ static void wm_adsp2_init_debugfs(struct wm_adsp *dsp,
static void wm_adsp2_cleanup_debugfs(struct wm_adsp *dsp)
{
wm_adsp_debugfs_clear(dsp);
- debugfs_remove_recursive(dsp->debugfs_root);
}
#else
static inline void wm_adsp2_init_debugfs(struct wm_adsp *dsp,
diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c
index 4124aa2fc247..905c7965f653 100644
--- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c
+++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c
@@ -127,7 +127,7 @@ static void sst_fill_alloc_params(struct snd_pcm_substream *substream,
snd_pcm_uframes_t period_size;
ssize_t periodbytes;
ssize_t buffer_bytes = snd_pcm_lib_buffer_bytes(substream);
- u32 buffer_addr = virt_to_phys(substream->dma_buffer.area);
+ u32 buffer_addr = virt_to_phys(substream->runtime->dma_area);
channels = substream->runtime->channels;
period_size = substream->runtime->period_size;
@@ -233,7 +233,6 @@ static int sst_platform_alloc_stream(struct snd_pcm_substream *substream,
/* set codec params and inform SST driver the same */
sst_fill_pcm_params(substream, &param);
sst_fill_alloc_params(substream, &alloc_params);
- substream->runtime->dma_area = substream->dma_buffer.area;
str_params.sparams = param;
str_params.aparams = alloc_params;
str_params.codec = SST_CODEC_TYPE_PCM;
diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig
index 7e29b0d911e2..046955bf717c 100644
--- a/sound/soc/intel/boards/Kconfig
+++ b/sound/soc/intel/boards/Kconfig
@@ -602,6 +602,7 @@ config SND_SOC_INTEL_SOUNDWIRE_SOF_MACH
select SND_SOC_DMIC
select SND_SOC_INTEL_HDA_DSP_COMMON
select SND_SOC_INTEL_SOF_MAXIM_COMMON
+ select SND_SOC_SDW_MOCKUP
help
Add support for Intel SoundWire-based platforms connected to
MAX98373, RT700, RT711, RT1308 and RT715
diff --git a/sound/soc/intel/boards/sof_da7219_max98373.c b/sound/soc/intel/boards/sof_da7219_max98373.c
index 896251d742fe..b7b3b0bf994a 100644
--- a/sound/soc/intel/boards/sof_da7219_max98373.c
+++ b/sound/soc/intel/boards/sof_da7219_max98373.c
@@ -404,7 +404,7 @@ static int audio_probe(struct platform_device *pdev)
return -ENOMEM;
/* By default dais[0] is configured for max98373 */
- if (!strcmp(pdev->name, "sof_da7219_max98360a")) {
+ if (!strcmp(pdev->name, "sof_da7219_mx98360a")) {
dais[0] = (struct snd_soc_dai_link) {
.name = "SSP1-Codec",
.id = 0,
diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c
index 1a867c73a48e..82d909ef7a97 100644
--- a/sound/soc/intel/boards/sof_sdw.c
+++ b/sound/soc/intel/boards/sof_sdw.c
@@ -328,6 +328,19 @@ static const struct snd_soc_ops sdw_ops = {
.shutdown = sdw_shutdown,
};
+static int sof_sdw_mic_codec_mockup_init(const struct snd_soc_acpi_link_adr *link,
+ struct snd_soc_dai_link *dai_links,
+ struct sof_sdw_codec_info *info,
+ bool playback)
+{
+ /*
+ * force DAI link to use same ID as RT715 and DMIC
+ * to reuse topologies
+ */
+ dai_links->id = SDW_DMIC_DAI_ID;
+ return 0;
+}
+
static struct sof_sdw_codec_info codec_info_list[] = {
{
.part_id = 0x700,
@@ -410,6 +423,34 @@ static struct sof_sdw_codec_info codec_info_list[] = {
.dai_name = "rt5682-sdw",
.init = sof_sdw_rt5682_init,
},
+ {
+ .part_id = 0xaaaa, /* generic codec mockup */
+ .version_id = 0,
+ .direction = {true, true},
+ .dai_name = "sdw-mockup-aif1",
+ .init = NULL,
+ },
+ {
+ .part_id = 0xaa55, /* headset codec mockup */
+ .version_id = 0,
+ .direction = {true, true},
+ .dai_name = "sdw-mockup-aif1",
+ .init = NULL,
+ },
+ {
+ .part_id = 0x55aa, /* amplifier mockup */
+ .version_id = 0,
+ .direction = {true, false},
+ .dai_name = "sdw-mockup-aif1",
+ .init = NULL,
+ },
+ {
+ .part_id = 0x5555,
+ .version_id = 0,
+ .direction = {false, true},
+ .dai_name = "sdw-mockup-aif1",
+ .init = sof_sdw_mic_codec_mockup_init,
+ },
};
static inline int find_codec_info_part(u64 adr)
diff --git a/sound/soc/intel/common/Makefile b/sound/soc/intel/common/Makefile
index 12a205ccdeeb..fef0b2d1de68 100644
--- a/sound/soc/intel/common/Makefile
+++ b/sound/soc/intel/common/Makefile
@@ -9,7 +9,8 @@ snd-soc-acpi-intel-match-objs := soc-acpi-intel-byt-match.o soc-acpi-intel-cht-m
soc-acpi-intel-cml-match.o soc-acpi-intel-icl-match.o \
soc-acpi-intel-tgl-match.o soc-acpi-intel-ehl-match.o \
soc-acpi-intel-jsl-match.o soc-acpi-intel-adl-match.o \
- soc-acpi-intel-hda-match.o
+ soc-acpi-intel-hda-match.o \
+ soc-acpi-intel-sdw-mockup-match.o
obj-$(CONFIG_SND_SOC_INTEL_SST) += snd-soc-sst-dsp.o snd-soc-sst-ipc.o
obj-$(CONFIG_SND_SOC_ACPI_INTEL_MATCH) += snd-soc-acpi-intel-match.o
diff --git a/sound/soc/intel/common/soc-acpi-intel-cnl-match.c b/sound/soc/intel/common/soc-acpi-intel-cnl-match.c
index 39dad32564e6..94b650767e11 100644
--- a/sound/soc/intel/common/soc-acpi-intel-cnl-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-cnl-match.c
@@ -9,6 +9,7 @@
#include <sound/soc-acpi.h>
#include <sound/soc-acpi-intel-match.h>
#include "../skylake/skl.h"
+#include "soc-acpi-intel-sdw-mockup-match.h"
static struct skl_machine_pdata cnl_pdata = {
.use_tplg_pcm = true,
@@ -60,6 +61,20 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_cnl_sdw_machines[] = {
.sof_fw_filename = "sof-cnl.ri",
.sof_tplg_filename = "sof-cnl-rt5682-sdw2.tplg"
},
+ {
+ .link_mask = GENMASK(3, 0),
+ .links = sdw_mockup_headset_2amps_mic,
+ .drv_name = "sof_sdw",
+ .sof_fw_filename = "sof-cnl.ri",
+ .sof_tplg_filename = "sof-cml-rt711-rt1308-rt715.tplg",
+ },
+ {
+ .link_mask = BIT(0) | BIT(1) | BIT(3),
+ .links = sdw_mockup_headset_1amp_mic,
+ .drv_name = "sof_sdw",
+ .sof_fw_filename = "sof-cnl.ri",
+ .sof_tplg_filename = "sof-cml-rt711-rt1308-mono-rt715.tplg",
+ },
{}
};
EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_cnl_sdw_machines);
diff --git a/sound/soc/intel/common/soc-acpi-intel-sdw-mockup-match.c b/sound/soc/intel/common/soc-acpi-intel-sdw-mockup-match.c
new file mode 100644
index 000000000000..a3d33997736a
--- /dev/null
+++ b/sound/soc/intel/common/soc-acpi-intel-sdw-mockup-match.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// soc-acpi-intel-sdw-mockup-match.c - tables and support for SoundWire
+// mockup device ACPI enumeration.
+//
+// Copyright (c) 2021, Intel Corporation.
+//
+
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include "soc-acpi-intel-sdw-mockup-match.h"
+
+static const struct snd_soc_acpi_endpoint sdw_mockup_single_endpoint = {
+ .num = 0,
+ .aggregated = 0,
+ .group_position = 0,
+ .group_id = 0,
+};
+
+static const struct snd_soc_acpi_endpoint sdw_mockup_l_endpoint = {
+ .num = 0,
+ .aggregated = 1,
+ .group_position = 0,
+ .group_id = 1,
+};
+
+static const struct snd_soc_acpi_endpoint sdw_mockup_r_endpoint = {
+ .num = 0,
+ .aggregated = 1,
+ .group_position = 1,
+ .group_id = 1,
+};
+
+static const struct snd_soc_acpi_adr_device sdw_mockup_headset_0_adr[] = {
+ {
+ .adr = 0x0000000105AA5500ull,
+ .num_endpoints = 1,
+ .endpoints = &sdw_mockup_single_endpoint,
+ .name_prefix = "sdw_mockup_headset0"
+ }
+};
+
+static const struct snd_soc_acpi_adr_device sdw_mockup_headset_1_adr[] = {
+ {
+ .adr = 0x0001000105AA5500ull,
+ .num_endpoints = 1,
+ .endpoints = &sdw_mockup_single_endpoint,
+ .name_prefix = "sdw_mockup_headset1"
+ }
+};
+
+static const struct snd_soc_acpi_adr_device sdw_mockup_amp_1_adr[] = {
+ {
+ .adr = 0x000100010555AA00ull,
+ .num_endpoints = 1,
+ .endpoints = &sdw_mockup_single_endpoint,
+ .name_prefix = "sdw_mockup_amp1"
+ }
+};
+
+static const struct snd_soc_acpi_adr_device sdw_mockup_amp_2_adr[] = {
+ {
+ .adr = 0x000200010555AA00ull,
+ .num_endpoints = 1,
+ .endpoints = &sdw_mockup_single_endpoint,
+ .name_prefix = "sdw_mockup_amp2"
+ }
+};
+
+static const struct snd_soc_acpi_adr_device sdw_mockup_mic_0_adr[] = {
+ {
+ .adr = 0x0000000105555500ull,
+ .num_endpoints = 1,
+ .endpoints = &sdw_mockup_single_endpoint,
+ .name_prefix = "sdw_mockup_mic0"
+ }
+};
+
+static const struct snd_soc_acpi_adr_device sdw_mockup_mic_3_adr[] = {
+ {
+ .adr = 0x0003000105555500ull,
+ .num_endpoints = 1,
+ .endpoints = &sdw_mockup_single_endpoint,
+ .name_prefix = "sdw_mockup_mic3"
+ }
+};
+
+static const struct snd_soc_acpi_adr_device sdw_mockup_amp_1_group1_adr[] = {
+ {
+ .adr = 0x000100010555AA00ull,
+ .num_endpoints = 1,
+ .endpoints = &sdw_mockup_l_endpoint,
+ .name_prefix = "sdw_mockup_amp1_l"
+ }
+};
+
+static const struct snd_soc_acpi_adr_device sdw_mockup_amp_2_group1_adr[] = {
+ {
+ .adr = 0x000200010555AA00ull,
+ .num_endpoints = 1,
+ .endpoints = &sdw_mockup_r_endpoint,
+ .name_prefix = "sdw_mockup_amp2_r"
+ }
+};
+
+const struct snd_soc_acpi_link_adr sdw_mockup_headset_1amp_mic[] = {
+ {
+ .mask = BIT(0),
+ .num_adr = ARRAY_SIZE(sdw_mockup_headset_0_adr),
+ .adr_d = sdw_mockup_headset_0_adr,
+ },
+ {
+ .mask = BIT(1),
+ .num_adr = ARRAY_SIZE(sdw_mockup_amp_1_adr),
+ .adr_d = sdw_mockup_amp_1_adr,
+ },
+ {
+ .mask = BIT(3),
+ .num_adr = ARRAY_SIZE(sdw_mockup_mic_3_adr),
+ .adr_d = sdw_mockup_mic_3_adr,
+ },
+ {}
+};
+
+const struct snd_soc_acpi_link_adr sdw_mockup_headset_2amps_mic[] = {
+ {
+ .mask = BIT(0),
+ .num_adr = ARRAY_SIZE(sdw_mockup_headset_0_adr),
+ .adr_d = sdw_mockup_headset_0_adr,
+ },
+ {
+ .mask = BIT(1),
+ .num_adr = ARRAY_SIZE(sdw_mockup_amp_1_group1_adr),
+ .adr_d = sdw_mockup_amp_1_group1_adr,
+ },
+ {
+ .mask = BIT(2),
+ .num_adr = ARRAY_SIZE(sdw_mockup_amp_2_group1_adr),
+ .adr_d = sdw_mockup_amp_2_group1_adr,
+ },
+ {
+ .mask = BIT(3),
+ .num_adr = ARRAY_SIZE(sdw_mockup_mic_3_adr),
+ .adr_d = sdw_mockup_mic_3_adr,
+ },
+ {}
+};
+
+const struct snd_soc_acpi_link_adr sdw_mockup_mic_headset_1amp[] = {
+ {
+ .mask = BIT(1),
+ .num_adr = ARRAY_SIZE(sdw_mockup_headset_1_adr),
+ .adr_d = sdw_mockup_headset_1_adr,
+ },
+ {
+ .mask = BIT(2),
+ .num_adr = ARRAY_SIZE(sdw_mockup_amp_2_adr),
+ .adr_d = sdw_mockup_amp_2_adr,
+ },
+ {
+ .mask = BIT(0),
+ .num_adr = ARRAY_SIZE(sdw_mockup_mic_0_adr),
+ .adr_d = sdw_mockup_mic_0_adr,
+ },
+ {}
+};
diff --git a/sound/soc/intel/common/soc-acpi-intel-sdw-mockup-match.h b/sound/soc/intel/common/soc-acpi-intel-sdw-mockup-match.h
new file mode 100644
index 000000000000..c99eecd19e03
--- /dev/null
+++ b/sound/soc/intel/common/soc-acpi-intel-sdw-mockup-match.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * soc-acpi-intel-sdw-mockup-match.h - tables and support for SoundWire
+ * mockup device ACPI enumeration.
+ *
+ * Copyright (c) 2021, Intel Corporation.
+ *
+ */
+
+#ifndef _SND_SOC_ACPI_INTEL_SDW_MOCKUP_MATCH
+#define _SND_SOC_ACPI_INTEL_SDW_MOCKUP_MATCH
+
+extern const struct snd_soc_acpi_link_adr sdw_mockup_headset_1amp_mic[];
+extern const struct snd_soc_acpi_link_adr sdw_mockup_headset_2amps_mic[];
+extern const struct snd_soc_acpi_link_adr sdw_mockup_mic_headset_1amp[];
+
+#endif
diff --git a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c
index 66595e3ab13f..e2488f0eaff8 100644
--- a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c
@@ -8,6 +8,7 @@
#include <sound/soc-acpi.h>
#include <sound/soc-acpi-intel-match.h>
+#include "soc-acpi-intel-sdw-mockup-match.h"
static const struct snd_soc_acpi_codecs tgl_codecs = {
.num_codecs = 1,
@@ -351,6 +352,28 @@ EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_tgl_machines);
/* this table is used when there is no I2S codec present */
struct snd_soc_acpi_mach snd_soc_acpi_intel_tgl_sdw_machines[] = {
+ /* mockup tests need to be first */
+ {
+ .link_mask = GENMASK(3, 0),
+ .links = sdw_mockup_headset_2amps_mic,
+ .drv_name = "sof_sdw",
+ .sof_fw_filename = "sof-tgl.ri",
+ .sof_tplg_filename = "sof-tgl-rt711-rt1308-rt715.tplg",
+ },
+ {
+ .link_mask = BIT(0) | BIT(1) | BIT(3),
+ .links = sdw_mockup_headset_1amp_mic,
+ .drv_name = "sof_sdw",
+ .sof_fw_filename = "sof-tgl.ri",
+ .sof_tplg_filename = "sof-tgl-rt711-rt1308-mono-rt715.tplg",
+ },
+ {
+ .link_mask = BIT(0) | BIT(1) | BIT(2),
+ .links = sdw_mockup_mic_headset_1amp,
+ .drv_name = "sof_sdw",
+ .sof_fw_filename = "sof-tgl.ri",
+ .sof_tplg_filename = "sof-tgl-rt715-rt711-rt1308-mono.tplg",
+ },
{
.link_mask = 0x7,
.links = tgl_sdw_rt711_link1_rt1308_link2_rt715_link0,
diff --git a/sound/soc/kirkwood/kirkwood-dma.c b/sound/soc/kirkwood/kirkwood-dma.c
index c2a5933bfcfc..700a18561a94 100644
--- a/sound/soc/kirkwood/kirkwood-dma.c
+++ b/sound/soc/kirkwood/kirkwood-dma.c
@@ -104,8 +104,6 @@ static int kirkwood_dma_open(struct snd_soc_component *component,
int err;
struct snd_pcm_runtime *runtime = substream->runtime;
struct kirkwood_dma_data *priv = kirkwood_priv(substream);
- const struct mbus_dram_target_info *dram;
- unsigned long addr;
snd_soc_set_runtime_hwparams(substream, &kirkwood_dma_snd_hw);
@@ -142,20 +140,14 @@ static int kirkwood_dma_open(struct snd_soc_component *component,
writel((unsigned int)-1, priv->io + KIRKWOOD_ERR_MASK);
}
- dram = mv_mbus_dram_info();
- addr = substream->dma_buffer.addr;
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
if (priv->substream_play)
return -EBUSY;
priv->substream_play = substream;
- kirkwood_dma_conf_mbus_windows(priv->io,
- KIRKWOOD_PLAYBACK_WIN, addr, dram);
} else {
if (priv->substream_rec)
return -EBUSY;
priv->substream_rec = substream;
- kirkwood_dma_conf_mbus_windows(priv->io,
- KIRKWOOD_RECORD_WIN, addr, dram);
}
return 0;
@@ -182,6 +174,23 @@ static int kirkwood_dma_close(struct snd_soc_component *component,
return 0;
}
+static int kirkwood_dma_hw_params(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *params)
+{
+ struct kirkwood_dma_data *priv = kirkwood_priv(substream);
+ const struct mbus_dram_target_info *dram = mv_mbus_dram_info();
+ unsigned long addr = substream->runtime->dma_addr;
+
+ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+ kirkwood_dma_conf_mbus_windows(priv->io,
+ KIRKWOOD_PLAYBACK_WIN, addr, dram);
+ else
+ kirkwood_dma_conf_mbus_windows(priv->io,
+ KIRKWOOD_RECORD_WIN, addr, dram);
+ return 0;
+}
+
static int kirkwood_dma_prepare(struct snd_soc_component *component,
struct snd_pcm_substream *substream)
{
@@ -246,6 +255,7 @@ const struct snd_soc_component_driver kirkwood_soc_component = {
.name = DRV_NAME,
.open = kirkwood_dma_open,
.close = kirkwood_dma_close,
+ .hw_params = kirkwood_dma_hw_params,
.prepare = kirkwood_dma_prepare,
.pointer = kirkwood_dma_pointer,
.pcm_construct = kirkwood_dma_new,
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index 3a5e84e16a87..c8dfd0de30e4 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -148,86 +148,75 @@ int snd_soc_component_set_bias_level(struct snd_soc_component *component,
return soc_component_ret(component, ret);
}
-static int soc_component_pin(struct snd_soc_component *component,
- const char *pin,
- int (*pin_func)(struct snd_soc_dapm_context *dapm,
- const char *pin))
-{
- struct snd_soc_dapm_context *dapm =
- snd_soc_component_get_dapm(component);
- char *full_name;
- int ret;
-
- if (!component->name_prefix) {
- ret = pin_func(dapm, pin);
- goto end;
- }
-
- full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
- if (!full_name) {
- ret = -ENOMEM;
- goto end;
- }
-
- ret = pin_func(dapm, full_name);
- kfree(full_name);
-end:
- return soc_component_ret(component, ret);
-}
-
int snd_soc_component_enable_pin(struct snd_soc_component *component,
const char *pin)
{
- return soc_component_pin(component, pin, snd_soc_dapm_enable_pin);
+ struct snd_soc_dapm_context *dapm =
+ snd_soc_component_get_dapm(component);
+ return snd_soc_dapm_enable_pin(dapm, pin);
}
EXPORT_SYMBOL_GPL(snd_soc_component_enable_pin);
int snd_soc_component_enable_pin_unlocked(struct snd_soc_component *component,
const char *pin)
{
- return soc_component_pin(component, pin, snd_soc_dapm_enable_pin_unlocked);
+ struct snd_soc_dapm_context *dapm =
+ snd_soc_component_get_dapm(component);
+ return snd_soc_dapm_enable_pin_unlocked(dapm, pin);
}
EXPORT_SYMBOL_GPL(snd_soc_component_enable_pin_unlocked);
int snd_soc_component_disable_pin(struct snd_soc_component *component,
const char *pin)
{
- return soc_component_pin(component, pin, snd_soc_dapm_disable_pin);
+ struct snd_soc_dapm_context *dapm =
+ snd_soc_component_get_dapm(component);
+ return snd_soc_dapm_disable_pin(dapm, pin);
}
EXPORT_SYMBOL_GPL(snd_soc_component_disable_pin);
int snd_soc_component_disable_pin_unlocked(struct snd_soc_component *component,
const char *pin)
{
- return soc_component_pin(component, pin, snd_soc_dapm_disable_pin_unlocked);
+ struct snd_soc_dapm_context *dapm =
+ snd_soc_component_get_dapm(component);
+ return snd_soc_dapm_disable_pin_unlocked(dapm, pin);
}
EXPORT_SYMBOL_GPL(snd_soc_component_disable_pin_unlocked);
int snd_soc_component_nc_pin(struct snd_soc_component *component,
const char *pin)
{
- return soc_component_pin(component, pin, snd_soc_dapm_nc_pin);
+ struct snd_soc_dapm_context *dapm =
+ snd_soc_component_get_dapm(component);
+ return snd_soc_dapm_nc_pin(dapm, pin);
}
EXPORT_SYMBOL_GPL(snd_soc_component_nc_pin);
int snd_soc_component_nc_pin_unlocked(struct snd_soc_component *component,
const char *pin)
{
- return soc_component_pin(component, pin, snd_soc_dapm_nc_pin_unlocked);
+ struct snd_soc_dapm_context *dapm =
+ snd_soc_component_get_dapm(component);
+ return snd_soc_dapm_nc_pin_unlocked(dapm, pin);
}
EXPORT_SYMBOL_GPL(snd_soc_component_nc_pin_unlocked);
int snd_soc_component_get_pin_status(struct snd_soc_component *component,
const char *pin)
{
- return soc_component_pin(component, pin, snd_soc_dapm_get_pin_status);
+ struct snd_soc_dapm_context *dapm =
+ snd_soc_component_get_dapm(component);
+ return snd_soc_dapm_get_pin_status(dapm, pin);
}
EXPORT_SYMBOL_GPL(snd_soc_component_get_pin_status);
int snd_soc_component_force_enable_pin(struct snd_soc_component *component,
const char *pin)
{
- return soc_component_pin(component, pin, snd_soc_dapm_force_enable_pin);
+ struct snd_soc_dapm_context *dapm =
+ snd_soc_component_get_dapm(component);
+ return snd_soc_dapm_force_enable_pin(dapm, pin);
}
EXPORT_SYMBOL_GPL(snd_soc_component_force_enable_pin);
@@ -235,7 +224,9 @@ int snd_soc_component_force_enable_pin_unlocked(
struct snd_soc_component *component,
const char *pin)
{
- return soc_component_pin(component, pin, snd_soc_dapm_force_enable_pin_unlocked);
+ struct snd_soc_dapm_context *dapm =
+ snd_soc_component_get_dapm(component);
+ return snd_soc_dapm_force_enable_pin_unlocked(dapm, pin);
}
EXPORT_SYMBOL_GPL(snd_soc_component_force_enable_pin_unlocked);
diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig
index 4bce89b5ea40..4447f515e8b1 100644
--- a/sound/soc/sof/intel/Kconfig
+++ b/sound/soc/sof/intel/Kconfig
@@ -278,6 +278,8 @@ config SND_SOC_SOF_HDA
config SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
tristate
+ select SOUNDWIRE_INTEL if SND_SOC_SOF_INTEL_SOUNDWIRE
+ select SND_INTEL_SOUNDWIRE_ACPI if SND_SOC_SOF_INTEL_SOUNDWIRE
config SND_SOC_SOF_INTEL_SOUNDWIRE
tristate "SOF support for SoundWire"
@@ -285,8 +287,6 @@ config SND_SOC_SOF_INTEL_SOUNDWIRE
depends on SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
depends on ACPI && SOUNDWIRE
depends on !(SOUNDWIRE=m && SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE=y)
- select SOUNDWIRE_INTEL
- select SND_INTEL_SOUNDWIRE_ACPI
help
This adds support for SoundWire with Sound Open Firmware
for Intel(R) platforms.
diff --git a/sound/soc/sof/intel/hda-ipc.c b/sound/soc/sof/intel/hda-ipc.c
index c91aa951df22..acfeca42604c 100644
--- a/sound/soc/sof/intel/hda-ipc.c
+++ b/sound/soc/sof/intel/hda-ipc.c
@@ -107,8 +107,8 @@ void hda_dsp_ipc_get_reply(struct snd_sof_dev *sdev)
} else {
/* reply correct size ? */
if (reply.hdr.size != msg->reply_size &&
- /* getter payload is never known upfront */
- !(reply.hdr.cmd & SOF_IPC_GLB_PROBE)) {
+ /* getter payload is never known upfront */
+ ((reply.hdr.cmd & SOF_GLB_TYPE_MASK) != SOF_IPC_GLB_PROBE)) {
dev_err(sdev->dev, "error: reply expected %zu got %u bytes\n",
msg->reply_size, reply.hdr.size);
ret = -EINVAL;
diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
index e1e368ff2b12..891e6e1b9121 100644
--- a/sound/soc/sof/intel/hda.c
+++ b/sound/soc/sof/intel/hda.c
@@ -187,12 +187,16 @@ static int hda_sdw_probe(struct snd_sof_dev *sdev)
int hda_sdw_startup(struct snd_sof_dev *sdev)
{
struct sof_intel_hda_dev *hdev;
+ struct snd_sof_pdata *pdata = sdev->pdata;
hdev = sdev->pdata->hw_pdata;
if (!hdev->sdw)
return 0;
+ if (pdata->machine && !pdata->machine->mach_params.link_mask)
+ return 0;
+
return sdw_intel_startup(hdev->sdw);
}
@@ -1002,6 +1006,14 @@ static int hda_generic_machine_select(struct snd_sof_dev *sdev)
hda_mach->mach_params.dmic_num = dmic_num;
pdata->machine = hda_mach;
pdata->tplg_filename = tplg_filename;
+
+ if (codec_num == 2) {
+ /*
+ * Prevent SoundWire links from starting when an external
+ * HDaudio codec is used
+ */
+ hda_mach->mach_params.link_mask = 0;
+ }
}
}
diff --git a/sound/soc/uniphier/aio-dma.c b/sound/soc/uniphier/aio-dma.c
index 3c1628a3a1ac..3d9736e7381f 100644
--- a/sound/soc/uniphier/aio-dma.c
+++ b/sound/soc/uniphier/aio-dma.c
@@ -198,7 +198,7 @@ static int uniphier_aiodma_mmap(struct snd_soc_component *component,
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
return remap_pfn_range(vma, vma->vm_start,
- substream->dma_buffer.addr >> PAGE_SHIFT,
+ substream->runtime->dma_addr >> PAGE_SHIFT,
vma->vm_end - vma->vm_start, vma->vm_page_prot);
}
diff --git a/sound/soc/xilinx/xlnx_formatter_pcm.c b/sound/soc/xilinx/xlnx_formatter_pcm.c
index 1d59fb668c77..91afea9d5de6 100644
--- a/sound/soc/xilinx/xlnx_formatter_pcm.c
+++ b/sound/soc/xilinx/xlnx_formatter_pcm.c
@@ -452,8 +452,8 @@ static int xlnx_formatter_pcm_hw_params(struct snd_soc_component *component,
stream_data->buffer_size = size;
- low = lower_32_bits(substream->dma_buffer.addr);
- high = upper_32_bits(substream->dma_buffer.addr);
+ low = lower_32_bits(runtime->dma_addr);
+ high = upper_32_bits(runtime->dma_addr);
writel(low, stream_data->mmio + XLNX_AUD_BUFF_ADDR_LSB);
writel(high, stream_data->mmio + XLNX_AUD_BUFF_ADDR_MSB);
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index ff4d327a582e..88b28aa7431f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -12,7 +12,8 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **btf** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | {**-d** | **--debug** } |
+ { **-B** | **--base-btf** } }
*COMMANDS* := { **dump** | **help** }
@@ -73,6 +74,20 @@ OPTIONS
=======
.. include:: common_options.rst
+ -B, --base-btf *FILE*
+ Pass a base BTF object. Base BTF objects are typically used
+ with BTF objects for kernel modules. To avoid duplicating
+ all kernel symbols required by modules, BTF objects for
+ modules are "split", they are built incrementally on top of
+ the kernel (vmlinux) BTF object. So the base BTF reference
+ should usually point to the kernel BTF.
+
+ When the main BTF object to process (for example, the
+ module BTF to dump) is passed as a *FILE*, bpftool attempts
+ to autodetect the path for the base object, and passing
+ this option is optional. When the main BTF object is passed
+ through other handles, this option becomes necessary.
+
EXAMPLES
========
**# bpftool btf dump id 1226**
@@ -217,3 +232,34 @@ All the standard ways to specify map or program are supported:
**# bpftool btf dump prog tag b88e0a09b1d9759d**
**# bpftool btf dump prog pinned /sys/fs/bpf/prog_name**
+
+|
+| **# bpftool btf dump file /sys/kernel/btf/i2c_smbus**
+| (or)
+| **# I2C_SMBUS_ID=$(bpftool btf show -p | jq '.[] | select(.name=="i2c_smbus").id')**
+| **# bpftool btf dump id ${I2C_SMBUS_ID} -B /sys/kernel/btf/vmlinux**
+
+::
+
+ [104848] STRUCT 'i2c_smbus_alert' size=40 vlen=2
+ 'alert' type_id=393 bits_offset=0
+ 'ara' type_id=56050 bits_offset=256
+ [104849] STRUCT 'alert_data' size=12 vlen=3
+ 'addr' type_id=16 bits_offset=0
+ 'type' type_id=56053 bits_offset=32
+ 'data' type_id=7 bits_offset=64
+ [104850] PTR '(anon)' type_id=104848
+ [104851] PTR '(anon)' type_id=104849
+ [104852] FUNC 'i2c_register_spd' type_id=84745 linkage=static
+ [104853] FUNC 'smbalert_driver_init' type_id=1213 linkage=static
+ [104854] FUNC_PROTO '(anon)' ret_type_id=18 vlen=1
+ 'ara' type_id=56050
+ [104855] FUNC 'i2c_handle_smbus_alert' type_id=104854 linkage=static
+ [104856] FUNC 'smbalert_remove' type_id=104854 linkage=static
+ [104857] FUNC_PROTO '(anon)' ret_type_id=18 vlen=2
+ 'ara' type_id=56050
+ 'id' type_id=56056
+ [104858] FUNC 'smbalert_probe' type_id=104857 linkage=static
+ [104859] FUNC 'smbalert_work' type_id=9695 linkage=static
+ [104860] FUNC 'smbus_alert' type_id=71367 linkage=static
+ [104861] FUNC 'smbus_do_alert' type_id=84827 linkage=static
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index baee8591ac76..3e4395eede4f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -12,7 +12,8 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **cgroup** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+ { **-f** | **--bpffs** } }
*COMMANDS* :=
{ **show** | **list** | **tree** | **attach** | **detach** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index dd3771bdbc57..ab9f57ee4c3a 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -12,7 +12,7 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **feature** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
*COMMANDS* := { **probe** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index 7cd6681137f3..2ef2f2df0279 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -12,7 +12,8 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **gen** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+ { **-L** | **--use-loader** } }
*COMMAND* := { **object** | **skeleton** | **help** }
@@ -152,6 +153,12 @@ OPTIONS
=======
.. include:: common_options.rst
+ -L, --use-loader
+ For skeletons, generate a "light" skeleton (also known as "loader"
+ skeleton). A light skeleton contains a loader eBPF program. It does
+ not use the majority of the libbpf infrastructure, and does not need
+ libelf.
+
EXAMPLES
========
**$ cat example1.bpf.c**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
index 51f49bead619..471f363a725a 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
@@ -12,6 +12,8 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **iter** *COMMAND*
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
+
*COMMANDS* := { **pin** | **help** }
ITER COMMANDS
diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
index 5f7db2a837cc..0de90f086238 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -12,7 +12,8 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **link** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+ { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
*COMMANDS* := { **show** | **list** | **pin** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 3d52256ba75f..d0c4abe08aba 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -12,7 +12,8 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **map** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+ { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
*COMMANDS* :=
{ **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index d8165d530937..1ae0375e8fea 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -12,7 +12,7 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **net** *COMMAND*
- *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
*COMMANDS* :=
{ **show** | **list** | **attach** | **detach** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
index e958ce91de72..ce52798a917d 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -12,7 +12,7 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **perf** *COMMAND*
- *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
*COMMANDS* :=
{ **show** | **list** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index fe1b38e7e887..91608cb7e44a 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -12,7 +12,9 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **prog** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+ { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } |
+ { **-L** | **--use-loader** } }
*COMMANDS* :=
{ **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load**
@@ -48,10 +50,11 @@ PROG COMMANDS
| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
| }
| *ATTACH_TYPE* := {
-| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
+| **msg_verdict** | **skb_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
| }
| *METRICs* := {
-| **cycles** | **instructions** | **l1d_loads** | **llc_misses**
+| **cycles** | **instructions** | **l1d_loads** | **llc_misses** |
+| **itlb_misses** | **dtlb_misses**
| }
@@ -223,6 +226,20 @@ OPTIONS
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
+ -L, --use-loader
+ Load program as a "loader" program. This is useful to debug
+ the generation of such programs. When this option is in
+ use, bpftool attempts to load the programs from the object
+ file into the kernel, but does not pin them (therefore, the
+ *PATH* must not be provided).
+
+ When combined with the **-d**\ \|\ **--debug** option,
+ additional debug messages are generated, and the execution
+ of the loader program will use the **bpf_trace_printk**\ ()
+ helper to log each step of loading BTF, creating the maps,
+ and loading the programs (see **bpftool prog tracelog** as
+ a way to dump those messages).
+
EXAMPLES
========
**# bpftool prog show**
@@ -326,3 +343,16 @@ EXAMPLES
40176203 cycles (83.05%)
42518139 instructions # 1.06 insns per cycle (83.39%)
123 llc_misses # 2.89 LLC misses per million insns (83.15%)
+
+|
+| Output below is for the trace logs.
+| Run in separate terminals:
+| **# bpftool prog tracelog**
+| **# bpftool prog load -L -d file.o**
+
+::
+
+ bpftool-620059 [004] d... 2634685.517903: bpf_trace_printk: btf_load size 665 r=5
+ bpftool-620059 [004] d... 2634685.517912: bpf_trace_printk: map_create sample_map idx 0 type 2 value_size 4 value_btf_id 0 r=6
+ bpftool-620059 [004] d... 2634685.517997: bpf_trace_printk: prog_load sample insn_cnt 13 r=7
+ bpftool-620059 [004] d... 2634685.517999: bpf_trace_printk: close(5) = 0
diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
index 506e70ee78e9..02afc0fc14cb 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
@@ -12,7 +12,7 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **struct_ops** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
*COMMANDS* :=
{ **show** | **list** | **dump** | **register** | **unregister** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index e7d949334961..bb23f55bb05a 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -18,15 +18,15 @@ SYNOPSIS
*OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** }
- *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
- | { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-V** | **--version** } |
+ { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
*MAP-COMMANDS* :=
- { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
- | **delete** | **pin** | **event_pipe** | **help** }
+ { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** |
+ **delete** | **pin** | **event_pipe** | **help** }
- *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
- | **load** | **attach** | **detach** | **help** }
+ *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** |
+ **load** | **attach** | **detach** | **help** }
*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index cc33c5824a2f..88e2bcf16cca 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -260,7 +260,8 @@ _bpftool()
# Deal with options
if [[ ${words[cword]} == -* ]]; then
- local c='--version --json --pretty --bpffs --mapcompat --debug'
+ local c='--version --json --pretty --bpffs --mapcompat --debug \
+ --use-loader --base-btf'
COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
return 0
fi
@@ -278,7 +279,7 @@ _bpftool()
_sysfs_get_netdevs
return 0
;;
- file|pinned)
+ file|pinned|-B|--base-btf)
_filedir
return 0
;;
@@ -291,7 +292,8 @@ _bpftool()
# Remove all options so completions don't have to deal with them.
local i
for (( i=1; i < ${#words[@]}; )); do
- if [[ ${words[i]::1} == - ]]; then
+ if [[ ${words[i]::1} == - ]] &&
+ [[ ${words[i]} != "-B" ]] && [[ ${words[i]} != "--base-btf" ]]; then
words=( "${words[@]:0:i}" "${words[@]:i+1}" )
[[ $i -le $cword ]] && cword=$(( cword - 1 ))
else
@@ -343,7 +345,8 @@ _bpftool()
local PROG_TYPE='id pinned tag name'
local MAP_TYPE='id pinned name'
- local METRIC_TYPE='cycles instructions l1d_loads llc_misses'
+ local METRIC_TYPE='cycles instructions l1d_loads llc_misses \
+ itlb_misses dtlb_misses'
case $command in
show|list)
[[ $prev != "$command" ]] && return 0
@@ -404,8 +407,10 @@ _bpftool()
return 0
;;
5)
- COMPREPLY=( $( compgen -W 'msg_verdict stream_verdict \
- stream_parser flow_dissector' -- "$cur" ) )
+ local BPFTOOL_PROG_ATTACH_TYPES='msg_verdict \
+ skb_verdict stream_verdict stream_parser \
+ flow_dissector'
+ COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_ATTACH_TYPES" -- "$cur" ) )
return 0
;;
6)
@@ -464,7 +469,7 @@ _bpftool()
case $prev in
type)
- COMPREPLY=( $( compgen -W "socket kprobe \
+ local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \
kretprobe classifier flow_dissector \
action tracepoint raw_tracepoint \
xdp perf_event cgroup/skb cgroup/sock \
@@ -479,8 +484,8 @@ _bpftool()
cgroup/post_bind4 cgroup/post_bind6 \
cgroup/sysctl cgroup/getsockopt \
cgroup/setsockopt cgroup/sock_release struct_ops \
- fentry fexit freplace sk_lookup" -- \
- "$cur" ) )
+ fentry fexit freplace sk_lookup'
+ COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_LOAD_TYPES" -- "$cur" ) )
return 0
;;
id)
@@ -698,15 +703,15 @@ _bpftool()
return 0
;;
type)
- COMPREPLY=( $( compgen -W 'hash array prog_array \
- perf_event_array percpu_hash percpu_array \
- stack_trace cgroup_array lru_hash \
+ local BPFTOOL_MAP_CREATE_TYPES='hash array \
+ prog_array perf_event_array percpu_hash \
+ percpu_array stack_trace cgroup_array lru_hash \
lru_percpu_hash lpm_trie array_of_maps \
hash_of_maps devmap devmap_hash sockmap cpumap \
xskmap sockhash cgroup_storage reuseport_sockarray \
percpu_cgroup_storage queue stack sk_storage \
- struct_ops inode_storage task_storage' -- \
- "$cur" ) )
+ struct_ops inode_storage task_storage ringbuf'
+ COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) )
return 0
;;
key|value|flags|entries)
@@ -1017,34 +1022,37 @@ _bpftool()
return 0
;;
attach|detach)
- local ATTACH_TYPES='ingress egress sock_create sock_ops \
- device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
+ local BPFTOOL_CGROUP_ATTACH_TYPES='ingress egress \
+ sock_create sock_ops device \
+ bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
getpeername4 getpeername6 getsockname4 getsockname6 \
sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
setsockopt sock_release'
local ATTACH_FLAGS='multi override'
local PROG_TYPE='id pinned tag name'
- case $prev in
- $command)
- _filedir
- return 0
- ;;
- ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
- post_bind4|post_bind6|connect4|connect6|getpeername4|\
- getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\
- recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release)
+ # Check for $prev = $command first
+ if [ $prev = $command ]; then
+ _filedir
+ return 0
+ # Then check for attach type. This is done outside of the
+ # "case $prev in" to avoid writing the whole list of attach
+ # types again as pattern to match (where we cannot reuse
+ # our variable).
+ elif [[ $BPFTOOL_CGROUP_ATTACH_TYPES =~ $prev ]]; then
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
"$cur" ) )
return 0
- ;;
+ fi
+ # case/esac for the other cases
+ case $prev in
id)
_bpftool_get_prog_ids
return 0
;;
*)
- if ! _bpftool_search_list "$ATTACH_TYPES"; then
- COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- \
- "$cur" ) )
+ if ! _bpftool_search_list "$BPFTOOL_CGROUP_ATTACH_TYPES"; then
+ COMPREPLY=( $( compgen -W \
+ "$BPFTOOL_CGROUP_ATTACH_TYPES" -- "$cur" ) )
elif [[ "$command" == "attach" ]]; then
# We have an attach type on the command line,
# but it is not the previous word, or
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 385d5c955cf3..f7e5ff3586c9 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -580,16 +580,12 @@ static int do_dump(int argc, char **argv)
}
if (!btf) {
- err = btf__get_from_id(btf_id, &btf);
+ btf = btf__load_from_kernel_by_id_split(btf_id, base_btf);
+ err = libbpf_get_error(btf);
if (err) {
p_err("get btf by id (%u): %s", btf_id, strerror(err));
goto done;
}
- if (!btf) {
- err = -ENOENT;
- p_err("can't find btf with ID (%u)", btf_id);
- goto done;
- }
}
if (dump_c) {
@@ -985,7 +981,8 @@ static int do_help(int argc, char **argv)
" FORMAT := { raw | c }\n"
" " HELP_SPEC_MAP "\n"
" " HELP_SPEC_PROGRAM "\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-B|--base-btf} }\n"
"",
bin_name, "btf");
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 7ca54d046362..9c25286a5c73 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -64,8 +64,10 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
}
info = &prog_info->info;
- if (!info->btf_id || !info->nr_func_info ||
- btf__get_from_id(info->btf_id, &prog_btf))
+ if (!info->btf_id || !info->nr_func_info)
+ goto print;
+ prog_btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (libbpf_get_error(prog_btf))
goto print;
finfo = u64_to_ptr(info->func_info);
func_type = btf__type_by_id(prog_btf, finfo->type_id);
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 6e53b1d393f4..3571a281c43f 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -501,7 +501,8 @@ static int do_help(int argc, char **argv)
HELP_SPEC_ATTACH_TYPES "\n"
" " HELP_SPEC_ATTACH_FLAGS "\n"
" " HELP_SPEC_PROGRAM "\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-f|--bpffs} }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index dc6daa193557..d42d930a3ec4 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -67,6 +67,12 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
[BPF_MODIFY_RETURN] = "mod_ret",
[BPF_LSM_MAC] = "lsm_mac",
[BPF_SK_LOOKUP] = "sk_lookup",
+ [BPF_TRACE_ITER] = "trace_iter",
+ [BPF_XDP_DEVMAP] = "xdp_devmap",
+ [BPF_XDP_CPUMAP] = "xdp_cpumap",
+ [BPF_XDP] = "xdp",
+ [BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select",
+ [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate",
};
void p_err(const char *fmt, ...)
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 40a88df275f9..7f36385aa9e2 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -1005,6 +1005,7 @@ static int do_help(int argc, char **argv)
" %1$s %2$s help\n"
"\n"
" COMPONENT := { kernel | dev NAME }\n"
+ " " HELP_SPEC_OPTIONS " }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 1d71ff8c52fa..d40d92bbf0e4 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -1026,7 +1026,8 @@ static int do_help(int argc, char **argv)
" %1$s %2$s skeleton FILE [name OBJECT_NAME]\n"
" %1$s %2$s help\n"
"\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-L|--use-loader} }\n"
"",
bin_name, "gen");
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
index 3b1aad7535dd..84a9b01d956d 100644
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -97,7 +97,9 @@ static int do_help(int argc, char **argv)
fprintf(stderr,
"Usage: %1$s %2$s pin OBJ PATH [map MAP]\n"
" %1$s %2$s help\n"
+ "\n"
" " HELP_SPEC_MAP "\n"
+ " " HELP_SPEC_OPTIONS " }\n"
"",
bin_name, "iter");
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index e77e1525d20a..8cc3e36f8cc6 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -401,7 +401,8 @@ static int do_help(int argc, char **argv)
" %1$s %2$s help\n"
"\n"
" " HELP_SPEC_LINK "\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-f|--bpffs} | {-n|--nomount} }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 3ddfd4843738..02eaaf065f65 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -64,7 +64,8 @@ static int do_help(int argc, char **argv)
" %s version\n"
"\n"
" OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-V|--version} }\n"
"",
bin_name, bin_name, bin_name);
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index c1cf29798b99..90caa42aac4c 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -57,8 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr)
#define HELP_SPEC_PROGRAM \
"PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }"
#define HELP_SPEC_OPTIONS \
- "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} |\n" \
- "\t {-m|--mapcompat} | {-n|--nomount} }"
+ "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}"
#define HELP_SPEC_MAP \
"MAP := { id MAP_ID | pinned FILE | name MAP_NAME }"
#define HELP_SPEC_LINK \
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 09ae0381205b..407071d54ab1 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -807,10 +807,11 @@ static struct btf *get_map_kv_btf(const struct bpf_map_info *info)
} else if (info->btf_value_type_id) {
int err;
- err = btf__get_from_id(info->btf_id, &btf);
- if (err || !btf) {
+ btf = btf__load_from_kernel_by_id(info->btf_id);
+ err = libbpf_get_error(btf);
+ if (err) {
p_err("failed to get btf");
- btf = err ? ERR_PTR(err) : ERR_PTR(-ESRCH);
+ btf = ERR_PTR(err);
}
}
@@ -1039,11 +1040,10 @@ static void print_key_value(struct bpf_map_info *info, void *key,
void *value)
{
json_writer_t *btf_wtr;
- struct btf *btf = NULL;
- int err;
+ struct btf *btf;
- err = btf__get_from_id(info->btf_id, &btf);
- if (err) {
+ btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (libbpf_get_error(btf)) {
p_err("failed to get btf");
return;
}
@@ -1466,8 +1466,9 @@ static int do_help(int argc, char **argv)
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
" queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
- " task_storage }\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " task_storage }\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-f|--bpffs} | {-n|--nomount} }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index f836d115d7d6..649053704bd7 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -729,6 +729,7 @@ static int do_help(int argc, char **argv)
"\n"
" " HELP_SPEC_PROGRAM "\n"
" ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n"
+ " " HELP_SPEC_OPTIONS " }\n"
"\n"
"Note: Only xdp and tc attachments are supported now.\n"
" For progs attached to cgroups, use \"bpftool cgroup\"\n"
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
index ad23934819c7..50de087b0db7 100644
--- a/tools/bpf/bpftool/perf.c
+++ b/tools/bpf/bpftool/perf.c
@@ -231,7 +231,10 @@ static int do_show(int argc, char **argv)
static int do_help(int argc, char **argv)
{
fprintf(stderr,
- "Usage: %1$s %2$s { show | list | help }\n"
+ "Usage: %1$s %2$s { show | list }\n"
+ " %1$s %2$s help }\n"
+ "\n"
+ " " HELP_SPEC_OPTIONS " }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index cc48726740ad..9c3e343b7d87 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -249,10 +249,10 @@ static void show_prog_metadata(int fd, __u32 num_maps)
struct bpf_map_info map_info;
struct btf_var_secinfo *vsi;
bool printed_header = false;
- struct btf *btf = NULL;
unsigned int i, vlen;
void *value = NULL;
const char *name;
+ struct btf *btf;
int err;
if (!num_maps)
@@ -263,8 +263,8 @@ static void show_prog_metadata(int fd, __u32 num_maps)
if (!value)
return;
- err = btf__get_from_id(map_info.btf_id, &btf);
- if (err || !btf)
+ btf = btf__load_from_kernel_by_id(map_info.btf_id);
+ if (libbpf_get_error(btf))
goto out_free;
t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id);
@@ -646,9 +646,12 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
member_len = info->xlated_prog_len;
}
- if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) {
- p_err("failed to get btf");
- return -1;
+ if (info->btf_id) {
+ btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (libbpf_get_error(btf)) {
+ p_err("failed to get btf");
+ return -1;
+ }
}
func_info = u64_to_ptr(info->func_info);
@@ -781,6 +784,8 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
kernel_syms_destroy(&dd);
}
+ btf__free(btf);
+
return 0;
}
@@ -2002,8 +2007,8 @@ static char *profile_target_name(int tgt_fd)
struct bpf_prog_info_linear *info_linear;
struct bpf_func_info *func_info;
const struct btf_type *t;
+ struct btf *btf = NULL;
char *name = NULL;
- struct btf *btf;
info_linear = bpf_program__get_prog_info_linear(
tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
@@ -2012,12 +2017,17 @@ static char *profile_target_name(int tgt_fd)
return NULL;
}
- if (info_linear->info.btf_id == 0 ||
- btf__get_from_id(info_linear->info.btf_id, &btf)) {
+ if (info_linear->info.btf_id == 0) {
p_err("prog FD %d doesn't have valid btf", tgt_fd);
goto out;
}
+ btf = btf__load_from_kernel_by_id(info_linear->info.btf_id);
+ if (libbpf_get_error(btf)) {
+ p_err("failed to load btf for prog FD %d", tgt_fd);
+ goto out;
+ }
+
func_info = u64_to_ptr(info_linear->info.func_info);
t = btf__type_by_id(btf, func_info[0].type_id);
if (!t) {
@@ -2027,6 +2037,7 @@ static char *profile_target_name(int tgt_fd)
}
name = strdup(btf__name_by_offset(btf, t->name_off));
out:
+ btf__free(btf);
free(info_linear);
return name;
}
@@ -2245,10 +2256,12 @@ static int do_help(int argc, char **argv)
" cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
" cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
" struct_ops | fentry | fexit | freplace | sk_lookup }\n"
- " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
- " flow_dissector }\n"
+ " ATTACH_TYPE := { msg_verdict | skb_verdict | stream_verdict |\n"
+ " stream_parser | flow_dissector }\n"
" METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n"
+ " {-L|--use-loader} }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
index b58b91f62ffb..ab2d2290569a 100644
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -572,8 +572,8 @@ static int do_help(int argc, char **argv)
" %1$s %2$s unregister STRUCT_OPS_MAP\n"
" %1$s %2$s help\n"
"\n"
- " OPTIONS := { {-j|--json} [{-p|--pretty}] }\n"
" STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n"
+ " " HELP_SPEC_OPTIONS " }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 3ad9301b0f00..de6365b53c9c 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -291,7 +291,7 @@ static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh)
sh->sh_addralign = expected;
if (gelf_update_shdr(scn, sh) == 0) {
- printf("FAILED cannot update section header: %s\n",
+ pr_err("FAILED cannot update section header: %s\n",
elf_errmsg(-1));
return -1;
}
@@ -317,6 +317,7 @@ static int elf_collect(struct object *obj)
elf = elf_begin(fd, ELF_C_RDWR_MMAP, NULL);
if (!elf) {
+ close(fd);
pr_err("FAILED cannot create ELF descriptor: %s\n",
elf_errmsg(-1));
return -1;
@@ -484,7 +485,7 @@ static int symbols_resolve(struct object *obj)
err = libbpf_get_error(btf);
if (err) {
pr_err("FAILED: load BTF from %s: %s\n",
- obj->path, strerror(-err));
+ obj->btf ?: obj->path, strerror(-err));
return -1;
}
@@ -555,8 +556,7 @@ static int id_patch(struct object *obj, struct btf_id *id)
int i;
if (!id->id) {
- pr_err("FAILED unresolved symbol %s\n", id->name);
- return -EINVAL;
+ pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name);
}
for (i = 0; i < id->addr_cnt; i++) {
@@ -734,8 +734,9 @@ int main(int argc, const char **argv)
err = 0;
out:
- if (obj.efile.elf)
+ if (obj.efile.elf) {
elf_end(obj.efile.elf);
- close(obj.efile.fd);
+ close(obj.efile.fd);
+ }
return err;
}
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 8b7a9830dd22..3430667b0d24 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -1031,7 +1031,7 @@ struct sys_stat_struct {
* scall32-o32.S in the kernel sources.
* - the system call is performed by calling "syscall"
* - syscall return comes in v0, and register a3 needs to be checked to know
- * if an error occured, in which case errno is in v0.
+ * if an error occurred, in which case errno is in v0.
* - the arguments are cast to long and assigned into the target registers
* which are then simply passed as registers to the asm code, so that we
* don't have to experience issues with register constraints.
@@ -2244,6 +2244,19 @@ unsigned int sleep(unsigned int seconds)
}
static __attribute__((unused))
+int msleep(unsigned int msecs)
+{
+ struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 };
+
+ if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+ return (my_timeval.tv_sec * 1000) +
+ (my_timeval.tv_usec / 1000) +
+ !!(my_timeval.tv_usec % 1000);
+ else
+ return 0;
+}
+
+static __attribute__((unused))
int stat(const char *path, struct stat *buf)
{
int ret = sys_stat(path, buf);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index bf9252c7381e..791f31dd0abe 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -84,7 +84,7 @@ struct bpf_lpm_trie_key {
struct bpf_cgroup_storage_key {
__u64 cgroup_inode_id; /* cgroup inode id */
- __u32 attach_type; /* program attach type */
+ __u32 attach_type; /* program attach type (enum bpf_attach_type) */
};
union bpf_iter_link_info {
@@ -324,9 +324,6 @@ union bpf_iter_link_info {
* **BPF_PROG_TYPE_SK_LOOKUP**
* *data_in* and *data_out* must be NULL.
*
- * **BPF_PROG_TYPE_XDP**
- * *ctx_in* and *ctx_out* must be NULL.
- *
* **BPF_PROG_TYPE_RAW_TRACEPOINT**,
* **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
*
@@ -996,6 +993,7 @@ enum bpf_attach_type {
BPF_SK_SKB_VERDICT,
BPF_SK_REUSEPORT_SELECT,
BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
+ BPF_PERF_EVENT,
__MAX_BPF_ATTACH_TYPE
};
@@ -1009,6 +1007,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_ITER = 4,
BPF_LINK_TYPE_NETNS = 5,
BPF_LINK_TYPE_XDP = 6,
+ BPF_LINK_TYPE_PERF_EVENT = 7,
MAX_BPF_LINK_TYPE,
};
@@ -1449,6 +1448,13 @@ union bpf_attr {
__aligned_u64 iter_info; /* extra bpf_iter_link_info */
__u32 iter_info_len; /* iter_info length */
};
+ struct {
+ /* black box user-provided value passed through
+ * to BPF program at the execution time and
+ * accessible through bpf_get_attach_cookie() BPF helper
+ */
+ __u64 bpf_cookie;
+ } perf_event;
};
} link_create;
@@ -3249,7 +3255,7 @@ union bpf_attr {
* long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
* Description
* Select a **SO_REUSEPORT** socket from a
- * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
+ * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*.
* It checks the selected socket is matching the incoming
* request in the socket buffer.
* Return
@@ -4780,6 +4786,97 @@ union bpf_attr {
* Execute close syscall for given FD.
* Return
* A syscall result.
+ *
+ * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags)
+ * Description
+ * Initialize the timer.
+ * First 4 bits of *flags* specify clockid.
+ * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed.
+ * All other bits of *flags* are reserved.
+ * The verifier will reject the program if *timer* is not from
+ * the same *map*.
+ * Return
+ * 0 on success.
+ * **-EBUSY** if *timer* is already initialized.
+ * **-EINVAL** if invalid *flags* are passed.
+ * **-EPERM** if *timer* is in a map that doesn't have any user references.
+ * The user space should either hold a file descriptor to a map with timers
+ * or pin such map in bpffs. When map is unpinned or file descriptor is
+ * closed all timers in the map will be cancelled and freed.
+ *
+ * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn)
+ * Description
+ * Configure the timer to call *callback_fn* static function.
+ * Return
+ * 0 on success.
+ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
+ * **-EPERM** if *timer* is in a map that doesn't have any user references.
+ * The user space should either hold a file descriptor to a map with timers
+ * or pin such map in bpffs. When map is unpinned or file descriptor is
+ * closed all timers in the map will be cancelled and freed.
+ *
+ * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags)
+ * Description
+ * Set timer expiration N nanoseconds from the current time. The
+ * configured callback will be invoked in soft irq context on some cpu
+ * and will not repeat unless another bpf_timer_start() is made.
+ * In such case the next invocation can migrate to a different cpu.
+ * Since struct bpf_timer is a field inside map element the map
+ * owns the timer. The bpf_timer_set_callback() will increment refcnt
+ * of BPF program to make sure that callback_fn code stays valid.
+ * When user space reference to a map reaches zero all timers
+ * in a map are cancelled and corresponding program's refcnts are
+ * decremented. This is done to make sure that Ctrl-C of a user
+ * process doesn't leave any timers running. If map is pinned in
+ * bpffs the callback_fn can re-arm itself indefinitely.
+ * bpf_map_update/delete_elem() helpers and user space sys_bpf commands
+ * cancel and free the timer in the given map element.
+ * The map can contain timers that invoke callback_fn-s from different
+ * programs. The same callback_fn can serve different timers from
+ * different maps if key/value layout matches across maps.
+ * Every bpf_timer_set_callback() can have different callback_fn.
+ *
+ * Return
+ * 0 on success.
+ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier
+ * or invalid *flags* are passed.
+ *
+ * long bpf_timer_cancel(struct bpf_timer *timer)
+ * Description
+ * Cancel the timer and wait for callback_fn to finish if it was running.
+ * Return
+ * 0 if the timer was not active.
+ * 1 if the timer was active.
+ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
+ * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its
+ * own timer which would have led to a deadlock otherwise.
+ *
+ * u64 bpf_get_func_ip(void *ctx)
+ * Description
+ * Get address of the traced function (for tracing and kprobe programs).
+ * Return
+ * Address of the traced function.
+ *
+ * u64 bpf_get_attach_cookie(void *ctx)
+ * Description
+ * Get bpf_cookie value provided (optionally) during the program
+ * attachment. It might be different for each individual
+ * attachment, even if BPF program itself is the same.
+ * Expects BPF program context *ctx* as a first argument.
+ *
+ * Supported for the following program types:
+ * - kprobe/uprobe;
+ * - tracepoint;
+ * - perf_event.
+ * Return
+ * Value specified by user at BPF link creation/attachment time
+ * or 0, if it was not specified.
+ *
+ * long bpf_task_pt_regs(struct task_struct *task)
+ * Description
+ * Get the struct pt_regs associated with **task**.
+ * Return
+ * A pointer to struct pt_regs.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -4951,6 +5048,13 @@ union bpf_attr {
FN(sys_bpf), \
FN(btf_find_by_name_kind), \
FN(sys_close), \
+ FN(timer_init), \
+ FN(timer_set_callback), \
+ FN(timer_start), \
+ FN(timer_cancel), \
+ FN(get_func_ip), \
+ FN(get_attach_cookie), \
+ FN(task_pt_regs), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6077,6 +6181,11 @@ struct bpf_spin_lock {
__u32 val;
};
+struct bpf_timer {
+ __u64 :64;
+ __u64 :64;
+} __attribute__((aligned(8)));
+
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.
diff --git a/tools/include/uapi/linux/ethtool.h b/tools/include/uapi/linux/ethtool.h
index c86c3e942df9..47afae3895ec 100644
--- a/tools/include/uapi/linux/ethtool.h
+++ b/tools/include/uapi/linux/ethtool.h
@@ -48,4 +48,57 @@ struct ethtool_channels {
__u32 combined_count;
};
+#define ETHTOOL_FWVERS_LEN 32
+#define ETHTOOL_BUSINFO_LEN 32
+#define ETHTOOL_EROMVERS_LEN 32
+
+/**
+ * struct ethtool_drvinfo - general driver and device information
+ * @cmd: Command number = %ETHTOOL_GDRVINFO
+ * @driver: Driver short name. This should normally match the name
+ * in its bus driver structure (e.g. pci_driver::name). Must
+ * not be an empty string.
+ * @version: Driver version string; may be an empty string
+ * @fw_version: Firmware version string; may be an empty string
+ * @erom_version: Expansion ROM version string; may be an empty string
+ * @bus_info: Device bus address. This should match the dev_name()
+ * string for the underlying bus device, if there is one. May be
+ * an empty string.
+ * @reserved2: Reserved for future use; see the note on reserved space.
+ * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and
+ * %ETHTOOL_SPFLAGS commands; also the number of strings in the
+ * %ETH_SS_PRIV_FLAGS set
+ * @n_stats: Number of u64 statistics returned by the %ETHTOOL_GSTATS
+ * command; also the number of strings in the %ETH_SS_STATS set
+ * @testinfo_len: Number of results returned by the %ETHTOOL_TEST
+ * command; also the number of strings in the %ETH_SS_TEST set
+ * @eedump_len: Size of EEPROM accessible through the %ETHTOOL_GEEPROM
+ * and %ETHTOOL_SEEPROM commands, in bytes
+ * @regdump_len: Size of register dump returned by the %ETHTOOL_GREGS
+ * command, in bytes
+ *
+ * Users can use the %ETHTOOL_GSSET_INFO command to get the number of
+ * strings in any string set (from Linux 2.6.34).
+ *
+ * Drivers should set at most @driver, @version, @fw_version and
+ * @bus_info in their get_drvinfo() implementation. The ethtool
+ * core fills in the other fields using other driver operations.
+ */
+struct ethtool_drvinfo {
+ __u32 cmd;
+ char driver[32];
+ char version[32];
+ char fw_version[ETHTOOL_FWVERS_LEN];
+ char bus_info[ETHTOOL_BUSINFO_LEN];
+ char erom_version[ETHTOOL_EROMVERS_LEN];
+ char reserved2[12];
+ __u32 n_priv_flags;
+ __u32 n_stats;
+ __u32 testinfo_len;
+ __u32 eedump_len;
+ __u32 regdump_len;
+};
+
+#define ETHTOOL_GDRVINFO 0x00000003
+
#endif /* _UAPI_LINUX_ETHTOOL_H */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index d208b2af697f..b3610fdd1fee 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -230,6 +230,7 @@ enum {
IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */
IFLA_INET6_TOKEN, /* device token */
IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */
+ IFLA_INET6_RA_MTU, /* mtu carried in the RA message */
__IFLA_INET6_MAX
};
@@ -653,6 +654,7 @@ enum {
IFLA_BOND_AD_ACTOR_SYSTEM,
IFLA_BOND_TLB_DYNAMIC_LB,
IFLA_BOND_PEER_NOTIF_DELAY,
+ IFLA_BOND_AD_LACP_ACTIVE,
__IFLA_BOND_MAX,
};
diff --git a/tools/io_uring/io_uring-cp.c b/tools/io_uring/io_uring-cp.c
index 81461813ec62..d9bd6f5f8f46 100644
--- a/tools/io_uring/io_uring-cp.c
+++ b/tools/io_uring/io_uring-cp.c
@@ -131,8 +131,7 @@ static int copy_file(struct io_uring *ring, off_t insize)
writes = reads = offset = 0;
while (insize || write_left) {
- unsigned long had_reads;
- int got_comp;
+ int had_reads, got_comp;
/*
* Queue up as many reads as we can
@@ -174,8 +173,13 @@ static int copy_file(struct io_uring *ring, off_t insize)
if (!got_comp) {
ret = io_uring_wait_cqe(ring, &cqe);
got_comp = 1;
- } else
+ } else {
ret = io_uring_peek_cqe(ring, &cqe);
+ if (ret == -EAGAIN) {
+ cqe = NULL;
+ ret = 0;
+ }
+ }
if (ret < 0) {
fprintf(stderr, "io_uring_peek_cqe: %s\n",
strerror(-ret));
@@ -194,7 +198,7 @@ static int copy_file(struct io_uring *ring, off_t insize)
fprintf(stderr, "cqe failed: %s\n",
strerror(-cqe->res));
return 1;
- } else if ((size_t) cqe->res != data->iov.iov_len) {
+ } else if (cqe->res != data->iov.iov_len) {
/* Short read/write, adjust and requeue */
data->iov.iov_base += cqe->res;
data->iov.iov_len -= cqe->res;
@@ -221,6 +225,25 @@ static int copy_file(struct io_uring *ring, off_t insize)
}
}
+ /* wait out pending writes */
+ while (writes) {
+ struct io_data *data;
+
+ ret = io_uring_wait_cqe(ring, &cqe);
+ if (ret) {
+ fprintf(stderr, "wait_cqe=%d\n", ret);
+ return 1;
+ }
+ if (cqe->res < 0) {
+ fprintf(stderr, "write res=%d\n", cqe->res);
+ return 1;
+ }
+ data = io_uring_cqe_get_data(cqe);
+ free(data);
+ writes--;
+ io_uring_cqe_seen(ring, cqe);
+ }
+
return 0;
}
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 430f6874fa41..94f0a146bb7b 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,3 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
- btf_dump.o ringbuf.o strset.o linker.o gen_loader.o
+ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index ec14aa725bb0..74c3b73a5fbe 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -4,8 +4,9 @@
RM ?= rm
srctree = $(abs_srctree)
+VERSION_SCRIPT := libbpf.map
LIBBPF_VERSION := $(shell \
- grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \
+ grep -oE '^LIBBPF_([0-9.]+)' $(VERSION_SCRIPT) | \
sort -rV | head -n1 | cut -d'_' -f2)
LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION)))
@@ -110,7 +111,6 @@ SHARED_OBJDIR := $(OUTPUT)sharedobjs/
STATIC_OBJDIR := $(OUTPUT)staticobjs/
BPF_IN_SHARED := $(SHARED_OBJDIR)libbpf-in.o
BPF_IN_STATIC := $(STATIC_OBJDIR)libbpf-in.o
-VERSION_SCRIPT := libbpf.map
BPF_HELPER_DEFS := $(OUTPUT)bpf_helper_defs.h
LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
@@ -163,10 +163,10 @@ $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
-$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)
+$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(VERSION_SCRIPT)
$(QUIET_LINK)$(CC) $(LDFLAGS) \
--shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
- -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -lz -o $@
+ -Wl,--version-script=$(VERSION_SCRIPT) $< -lelf -lz -o $@
@ln -sf $(@F) $(OUTPUT)libbpf.so
@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
@@ -181,7 +181,7 @@ $(OUTPUT)libbpf.pc:
check: check_abi
-check_abi: $(OUTPUT)libbpf.so
+check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT)
@if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \
echo "Warning: Num of global symbols in $(BPF_IN_SHARED)" \
"($(GLOBAL_SYM_COUNT)) does NOT match with num of" \
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 86dcac44f32f..2401fad090c5 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -684,8 +684,13 @@ int bpf_link_create(int prog_fd, int target_fd,
iter_info_len = OPTS_GET(opts, iter_info_len, 0);
target_btf_id = OPTS_GET(opts, target_btf_id, 0);
- if (iter_info_len && target_btf_id)
- return libbpf_err(-EINVAL);
+ /* validate we don't have unexpected combinations of non-zero fields */
+ if (iter_info_len || target_btf_id) {
+ if (iter_info_len && target_btf_id)
+ return libbpf_err(-EINVAL);
+ if (!OPTS_ZEROED(opts, target_btf_id))
+ return libbpf_err(-EINVAL);
+ }
memset(&attr, 0, sizeof(attr));
attr.link_create.prog_fd = prog_fd;
@@ -693,14 +698,27 @@ int bpf_link_create(int prog_fd, int target_fd,
attr.link_create.attach_type = attach_type;
attr.link_create.flags = OPTS_GET(opts, flags, 0);
- if (iter_info_len) {
- attr.link_create.iter_info =
- ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
- attr.link_create.iter_info_len = iter_info_len;
- } else if (target_btf_id) {
+ if (target_btf_id) {
attr.link_create.target_btf_id = target_btf_id;
+ goto proceed;
}
+ switch (attach_type) {
+ case BPF_TRACE_ITER:
+ attr.link_create.iter_info = ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
+ attr.link_create.iter_info_len = iter_info_len;
+ break;
+ case BPF_PERF_EVENT:
+ attr.link_create.perf_event.bpf_cookie = OPTS_GET(opts, perf_event.bpf_cookie, 0);
+ if (!OPTS_ZEROED(opts, perf_event))
+ return libbpf_err(-EINVAL);
+ break;
+ default:
+ if (!OPTS_ZEROED(opts, flags))
+ return libbpf_err(-EINVAL);
+ break;
+ }
+proceed:
fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
return libbpf_err_errno(fd);
}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 4f758f8f50cd..6fffb3cdf39b 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -177,8 +177,14 @@ struct bpf_link_create_opts {
union bpf_iter_link_info *iter_info;
__u32 iter_info_len;
__u32 target_btf_id;
+ union {
+ struct {
+ __u64 bpf_cookie;
+ } perf_event;
+ };
+ size_t :0;
};
-#define bpf_link_create_opts__last_field target_btf_id
+#define bpf_link_create_opts__last_field perf_event
LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index b46760b93bb4..77dc24d58302 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -804,6 +804,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
btf->nr_types = 0;
btf->start_id = 1;
btf->start_str_off = 0;
+ btf->fd = -1;
if (base_btf) {
btf->base_btf = base_btf;
@@ -832,8 +833,6 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
if (err)
goto done;
- btf->fd = -1;
-
done:
if (err) {
btf__free(btf);
@@ -1180,7 +1179,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
-int btf__load(struct btf *btf)
+int btf__load_into_kernel(struct btf *btf)
{
__u32 log_buf_size = 0, raw_size;
char *log_buf = NULL;
@@ -1228,6 +1227,7 @@ done:
free(log_buf);
return libbpf_err(err);
}
+int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel")));
int btf__fd(const struct btf *btf)
{
@@ -1382,21 +1382,35 @@ exit_free:
return btf;
}
-int btf__get_from_id(__u32 id, struct btf **btf)
+struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
{
- struct btf *res;
- int err, btf_fd;
+ struct btf *btf;
+ int btf_fd;
- *btf = NULL;
btf_fd = bpf_btf_get_fd_by_id(id);
if (btf_fd < 0)
- return libbpf_err(-errno);
-
- res = btf_get_from_fd(btf_fd, NULL);
- err = libbpf_get_error(res);
+ return libbpf_err_ptr(-errno);
+ btf = btf_get_from_fd(btf_fd, base_btf);
close(btf_fd);
+ return libbpf_ptr(btf);
+}
+
+struct btf *btf__load_from_kernel_by_id(__u32 id)
+{
+ return btf__load_from_kernel_by_id_split(id, NULL);
+}
+
+int btf__get_from_id(__u32 id, struct btf **btf)
+{
+ struct btf *res;
+ int err;
+
+ *btf = NULL;
+ res = btf__load_from_kernel_by_id(id);
+ err = libbpf_get_error(res);
+
if (err)
return libbpf_err(err);
@@ -4021,7 +4035,7 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
*/
if (d->hypot_adjust_canon)
continue;
-
+
if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
d->map[t_id] = c_id;
@@ -4394,7 +4408,7 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
* Probe few well-known locations for vmlinux kernel image and try to load BTF
* data out of it to use for target BTF.
*/
-struct btf *libbpf_find_kernel_btf(void)
+struct btf *btf__load_vmlinux_btf(void)
{
struct {
const char *path_fmt;
@@ -4440,6 +4454,16 @@ struct btf *libbpf_find_kernel_btf(void)
return libbpf_err_ptr(-ESRCH);
}
+struct btf *libbpf_find_kernel_btf(void) __attribute__((alias("btf__load_vmlinux_btf")));
+
+struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf)
+{
+ char path[80];
+
+ snprintf(path, sizeof(path), "/sys/kernel/btf/%s", module_name);
+ return btf__parse_split(path, vmlinux_btf);
+}
+
int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)
{
int i, n, err;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index b54f1c3ebd57..4a711f990904 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -44,8 +44,17 @@ LIBBPF_API struct btf *btf__parse_elf_split(const char *path, struct btf *base_b
LIBBPF_API struct btf *btf__parse_raw(const char *path);
LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf);
+LIBBPF_API struct btf *btf__load_vmlinux_btf(void);
+LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf);
+LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
+
+LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id);
+LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf);
+LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
+
LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
LIBBPF_API int btf__load(struct btf *btf);
+LIBBPF_API int btf__load_into_kernel(struct btf *btf);
LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
const char *type_name);
LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
@@ -66,7 +75,6 @@ LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
-LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
__u32 expected_key_size,
__u32 expected_value_size,
@@ -89,8 +97,6 @@ int btf_ext__reloc_line_info(const struct btf *btf,
LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
-LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
-
LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf,
@@ -184,6 +190,25 @@ LIBBPF_API int
btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
const struct btf_dump_emit_type_decl_opts *opts);
+
+struct btf_dump_type_data_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ const char *indent_str;
+ int indent_level;
+ /* below match "show" flags for bpf_show_snprintf() */
+ bool compact; /* no newlines/indentation */
+ bool skip_names; /* skip member/type names */
+ bool emit_zeroes; /* show 0-valued fields */
+ size_t :0;
+};
+#define btf_dump_type_data_opts__last_field emit_zeroes
+
+LIBBPF_API int
+btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
+ const void *data, size_t data_sz,
+ const struct btf_dump_type_data_opts *opts);
+
/*
* A set of helpers for easier BTF types handling
*/
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 5dc6b5172bb3..e4b483f15fb9 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -10,6 +10,8 @@
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
+#include <ctype.h>
+#include <endian.h>
#include <errno.h>
#include <linux/err.h>
#include <linux/btf.h>
@@ -53,6 +55,26 @@ struct btf_dump_type_aux_state {
__u8 referenced: 1;
};
+/* indent string length; one indent string is added for each indent level */
+#define BTF_DATA_INDENT_STR_LEN 32
+
+/*
+ * Common internal data for BTF type data dump operations.
+ */
+struct btf_dump_data {
+ const void *data_end; /* end of valid data to show */
+ bool compact;
+ bool skip_names;
+ bool emit_zeroes;
+ __u8 indent_lvl; /* base indent level */
+ char indent_str[BTF_DATA_INDENT_STR_LEN];
+ /* below are used during iteration */
+ int depth;
+ bool is_array_member;
+ bool is_array_terminated;
+ bool is_array_char;
+};
+
struct btf_dump {
const struct btf *btf;
const struct btf_ext *btf_ext;
@@ -60,6 +82,7 @@ struct btf_dump {
struct btf_dump_opts opts;
int ptr_sz;
bool strip_mods;
+ bool skip_anon_defs;
int last_id;
/* per-type auxiliary state */
@@ -89,6 +112,10 @@ struct btf_dump {
* name occurrences
*/
struct hashmap *ident_names;
+ /*
+ * data for typed display; allocated if needed.
+ */
+ struct btf_dump_data *typed_dump;
};
static size_t str_hash_fn(const void *key, void *ctx)
@@ -765,11 +792,11 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
break;
case BTF_KIND_FUNC_PROTO: {
const struct btf_param *p = btf_params(t);
- __u16 vlen = btf_vlen(t);
+ __u16 n = btf_vlen(t);
int i;
btf_dump_emit_type(d, t->type, cont_id);
- for (i = 0; i < vlen; i++, p++)
+ for (i = 0; i < n; i++, p++)
btf_dump_emit_type(d, p->type, cont_id);
break;
@@ -852,8 +879,9 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d,
static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
const struct btf_type *t)
{
- btf_dump_printf(d, "%s %s",
+ btf_dump_printf(d, "%s%s%s",
btf_is_struct(t) ? "struct" : "union",
+ t->name_off ? " " : "",
btf_dump_type_name(d, id));
}
@@ -1259,7 +1287,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
case BTF_KIND_UNION:
btf_dump_emit_mods(d, decls);
/* inline anonymous struct/union */
- if (t->name_off == 0)
+ if (t->name_off == 0 && !d->skip_anon_defs)
btf_dump_emit_struct_def(d, id, t, lvl);
else
btf_dump_emit_struct_fwd(d, id, t);
@@ -1267,7 +1295,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
case BTF_KIND_ENUM:
btf_dump_emit_mods(d, decls);
/* inline anonymous enum */
- if (t->name_off == 0)
+ if (t->name_off == 0 && !d->skip_anon_defs)
btf_dump_emit_enum_def(d, id, t, lvl);
else
btf_dump_emit_enum_fwd(d, id, t);
@@ -1392,6 +1420,39 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
btf_dump_emit_name(d, fname, last_was_ptr);
}
+/* show type name as (type_name) */
+static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id,
+ bool top_level)
+{
+ const struct btf_type *t;
+
+ /* for array members, we don't bother emitting type name for each
+ * member to avoid the redundancy of
+ * .name = (char[4])[(char)'f',(char)'o',(char)'o',]
+ */
+ if (d->typed_dump->is_array_member)
+ return;
+
+ /* avoid type name specification for variable/section; it will be done
+ * for the associated variable value(s).
+ */
+ t = btf__type_by_id(d->btf, id);
+ if (btf_is_var(t) || btf_is_datasec(t))
+ return;
+
+ if (top_level)
+ btf_dump_printf(d, "(");
+
+ d->skip_anon_defs = true;
+ d->strip_mods = true;
+ btf_dump_emit_type_decl(d, id, "", 0);
+ d->strip_mods = false;
+ d->skip_anon_defs = false;
+
+ if (top_level)
+ btf_dump_printf(d, ")");
+}
+
/* return number of duplicates (occurrences) of a given name */
static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
const char *orig_name)
@@ -1442,3 +1503,803 @@ static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id)
{
return btf_dump_resolve_name(d, id, d->ident_names);
}
+
+static int btf_dump_dump_type_data(struct btf_dump *d,
+ const char *fname,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz);
+
+static const char *btf_dump_data_newline(struct btf_dump *d)
+{
+ return d->typed_dump->compact || d->typed_dump->depth == 0 ? "" : "\n";
+}
+
+static const char *btf_dump_data_delim(struct btf_dump *d)
+{
+ return d->typed_dump->depth == 0 ? "" : ",";
+}
+
+static void btf_dump_data_pfx(struct btf_dump *d)
+{
+ int i, lvl = d->typed_dump->indent_lvl + d->typed_dump->depth;
+
+ if (d->typed_dump->compact)
+ return;
+
+ for (i = 0; i < lvl; i++)
+ btf_dump_printf(d, "%s", d->typed_dump->indent_str);
+}
+
+/* A macro is used here as btf_type_value[s]() appends format specifiers
+ * to the format specifier passed in; these do the work of appending
+ * delimiters etc while the caller simply has to specify the type values
+ * in the format specifier + value(s).
+ */
+#define btf_dump_type_values(d, fmt, ...) \
+ btf_dump_printf(d, fmt "%s%s", \
+ ##__VA_ARGS__, \
+ btf_dump_data_delim(d), \
+ btf_dump_data_newline(d))
+
+static int btf_dump_unsupported_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id)
+{
+ btf_dump_printf(d, "<unsupported kind:%u>", btf_kind(t));
+ return -ENOTSUP;
+}
+
+static int btf_dump_get_bitfield_value(struct btf_dump *d,
+ const struct btf_type *t,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz,
+ __u64 *value)
+{
+ __u16 left_shift_bits, right_shift_bits;
+ __u8 nr_copy_bits, nr_copy_bytes;
+ const __u8 *bytes = data;
+ int sz = t->size;
+ __u64 num = 0;
+ int i;
+
+ /* Maximum supported bitfield size is 64 bits */
+ if (sz > 8) {
+ pr_warn("unexpected bitfield size %d\n", sz);
+ return -EINVAL;
+ }
+
+ /* Bitfield value retrieval is done in two steps; first relevant bytes are
+ * stored in num, then we left/right shift num to eliminate irrelevant bits.
+ */
+ nr_copy_bits = bit_sz + bits_offset;
+ nr_copy_bytes = t->size;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ for (i = nr_copy_bytes - 1; i >= 0; i--)
+ num = num * 256 + bytes[i];
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ for (i = 0; i < nr_copy_bytes; i++)
+ num = num * 256 + bytes[i];
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+ left_shift_bits = 64 - nr_copy_bits;
+ right_shift_bits = 64 - bit_sz;
+
+ *value = (num << left_shift_bits) >> right_shift_bits;
+
+ return 0;
+}
+
+static int btf_dump_bitfield_check_zero(struct btf_dump *d,
+ const struct btf_type *t,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ __u64 check_num;
+ int err;
+
+ err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &check_num);
+ if (err)
+ return err;
+ if (check_num == 0)
+ return -ENODATA;
+ return 0;
+}
+
+static int btf_dump_bitfield_data(struct btf_dump *d,
+ const struct btf_type *t,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ __u64 print_num;
+ int err;
+
+ err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &print_num);
+ if (err)
+ return err;
+
+ btf_dump_type_values(d, "0x%llx", (unsigned long long)print_num);
+
+ return 0;
+}
+
+/* ints, floats and ptrs */
+static int btf_dump_base_type_check_zero(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ static __u8 bytecmp[16] = {};
+ int nr_bytes;
+
+ /* For pointer types, pointer size is not defined on a per-type basis.
+ * On dump creation however, we store the pointer size.
+ */
+ if (btf_kind(t) == BTF_KIND_PTR)
+ nr_bytes = d->ptr_sz;
+ else
+ nr_bytes = t->size;
+
+ if (nr_bytes < 1 || nr_bytes > 16) {
+ pr_warn("unexpected size %d for id [%u]\n", nr_bytes, id);
+ return -EINVAL;
+ }
+
+ if (memcmp(data, bytecmp, nr_bytes) == 0)
+ return -ENODATA;
+ return 0;
+}
+
+static bool ptr_is_aligned(const void *data, int data_sz)
+{
+ return ((uintptr_t)data) % data_sz == 0;
+}
+
+static int btf_dump_int_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 type_id,
+ const void *data,
+ __u8 bits_offset)
+{
+ __u8 encoding = btf_int_encoding(t);
+ bool sign = encoding & BTF_INT_SIGNED;
+ int sz = t->size;
+
+ if (sz == 0) {
+ pr_warn("unexpected size %d for id [%u]\n", sz, type_id);
+ return -EINVAL;
+ }
+
+ /* handle packed int data - accesses of integers not aligned on
+ * int boundaries can cause problems on some platforms.
+ */
+ if (!ptr_is_aligned(data, sz))
+ return btf_dump_bitfield_data(d, t, data, 0, 0);
+
+ switch (sz) {
+ case 16: {
+ const __u64 *ints = data;
+ __u64 lsi, msi;
+
+ /* avoid use of __int128 as some 32-bit platforms do not
+ * support it.
+ */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ lsi = ints[0];
+ msi = ints[1];
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ lsi = ints[1];
+ msi = ints[0];
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+ if (msi == 0)
+ btf_dump_type_values(d, "0x%llx", (unsigned long long)lsi);
+ else
+ btf_dump_type_values(d, "0x%llx%016llx", (unsigned long long)msi,
+ (unsigned long long)lsi);
+ break;
+ }
+ case 8:
+ if (sign)
+ btf_dump_type_values(d, "%lld", *(long long *)data);
+ else
+ btf_dump_type_values(d, "%llu", *(unsigned long long *)data);
+ break;
+ case 4:
+ if (sign)
+ btf_dump_type_values(d, "%d", *(__s32 *)data);
+ else
+ btf_dump_type_values(d, "%u", *(__u32 *)data);
+ break;
+ case 2:
+ if (sign)
+ btf_dump_type_values(d, "%d", *(__s16 *)data);
+ else
+ btf_dump_type_values(d, "%u", *(__u16 *)data);
+ break;
+ case 1:
+ if (d->typed_dump->is_array_char) {
+ /* check for null terminator */
+ if (d->typed_dump->is_array_terminated)
+ break;
+ if (*(char *)data == '\0') {
+ d->typed_dump->is_array_terminated = true;
+ break;
+ }
+ if (isprint(*(char *)data)) {
+ btf_dump_type_values(d, "'%c'", *(char *)data);
+ break;
+ }
+ }
+ if (sign)
+ btf_dump_type_values(d, "%d", *(__s8 *)data);
+ else
+ btf_dump_type_values(d, "%u", *(__u8 *)data);
+ break;
+ default:
+ pr_warn("unexpected sz %d for id [%u]\n", sz, type_id);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+union float_data {
+ long double ld;
+ double d;
+ float f;
+};
+
+static int btf_dump_float_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 type_id,
+ const void *data)
+{
+ const union float_data *flp = data;
+ union float_data fl;
+ int sz = t->size;
+
+ /* handle unaligned data; copy to local union */
+ if (!ptr_is_aligned(data, sz)) {
+ memcpy(&fl, data, sz);
+ flp = &fl;
+ }
+
+ switch (sz) {
+ case 16:
+ btf_dump_type_values(d, "%Lf", flp->ld);
+ break;
+ case 8:
+ btf_dump_type_values(d, "%lf", flp->d);
+ break;
+ case 4:
+ btf_dump_type_values(d, "%f", flp->f);
+ break;
+ default:
+ pr_warn("unexpected size %d for id [%u]\n", sz, type_id);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int btf_dump_var_data(struct btf_dump *d,
+ const struct btf_type *v,
+ __u32 id,
+ const void *data)
+{
+ enum btf_func_linkage linkage = btf_var(v)->linkage;
+ const struct btf_type *t;
+ const char *l;
+ __u32 type_id;
+
+ switch (linkage) {
+ case BTF_FUNC_STATIC:
+ l = "static ";
+ break;
+ case BTF_FUNC_EXTERN:
+ l = "extern ";
+ break;
+ case BTF_FUNC_GLOBAL:
+ default:
+ l = "";
+ break;
+ }
+
+ /* format of output here is [linkage] [type] [varname] = (type)value,
+ * for example "static int cpu_profile_flip = (int)1"
+ */
+ btf_dump_printf(d, "%s", l);
+ type_id = v->type;
+ t = btf__type_by_id(d->btf, type_id);
+ btf_dump_emit_type_cast(d, type_id, false);
+ btf_dump_printf(d, " %s = ", btf_name_of(d, v->name_off));
+ return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0);
+}
+
+static int btf_dump_array_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ const struct btf_array *array = btf_array(t);
+ const struct btf_type *elem_type;
+ __u32 i, elem_size = 0, elem_type_id;
+ bool is_array_member;
+
+ elem_type_id = array->type;
+ elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
+ elem_size = btf__resolve_size(d->btf, elem_type_id);
+ if (elem_size <= 0) {
+ pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id);
+ return -EINVAL;
+ }
+
+ if (btf_is_int(elem_type)) {
+ /*
+ * BTF_INT_CHAR encoding never seems to be set for
+ * char arrays, so if size is 1 and element is
+ * printable as a char, we'll do that.
+ */
+ if (elem_size == 1)
+ d->typed_dump->is_array_char = true;
+ }
+
+ /* note that we increment depth before calling btf_dump_print() below;
+ * this is intentional. btf_dump_data_newline() will not print a
+ * newline for depth 0 (since this leaves us with trailing newlines
+ * at the end of typed display), so depth is incremented first.
+ * For similar reasons, we decrement depth before showing the closing
+ * parenthesis.
+ */
+ d->typed_dump->depth++;
+ btf_dump_printf(d, "[%s", btf_dump_data_newline(d));
+
+ /* may be a multidimensional array, so store current "is array member"
+ * status so we can restore it correctly later.
+ */
+ is_array_member = d->typed_dump->is_array_member;
+ d->typed_dump->is_array_member = true;
+ for (i = 0; i < array->nelems; i++, data += elem_size) {
+ if (d->typed_dump->is_array_terminated)
+ break;
+ btf_dump_dump_type_data(d, NULL, elem_type, elem_type_id, data, 0, 0);
+ }
+ d->typed_dump->is_array_member = is_array_member;
+ d->typed_dump->depth--;
+ btf_dump_data_pfx(d);
+ btf_dump_type_values(d, "]");
+
+ return 0;
+}
+
+static int btf_dump_struct_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ const struct btf_member *m = btf_members(t);
+ __u16 n = btf_vlen(t);
+ int i, err;
+
+ /* note that we increment depth before calling btf_dump_print() below;
+ * this is intentional. btf_dump_data_newline() will not print a
+ * newline for depth 0 (since this leaves us with trailing newlines
+ * at the end of typed display), so depth is incremented first.
+ * For similar reasons, we decrement depth before showing the closing
+ * parenthesis.
+ */
+ d->typed_dump->depth++;
+ btf_dump_printf(d, "{%s", btf_dump_data_newline(d));
+
+ for (i = 0; i < n; i++, m++) {
+ const struct btf_type *mtype;
+ const char *mname;
+ __u32 moffset;
+ __u8 bit_sz;
+
+ mtype = btf__type_by_id(d->btf, m->type);
+ mname = btf_name_of(d, m->name_off);
+ moffset = btf_member_bit_offset(t, i);
+
+ bit_sz = btf_member_bitfield_size(t, i);
+ err = btf_dump_dump_type_data(d, mname, mtype, m->type, data + moffset / 8,
+ moffset % 8, bit_sz);
+ if (err < 0)
+ return err;
+ }
+ d->typed_dump->depth--;
+ btf_dump_data_pfx(d);
+ btf_dump_type_values(d, "}");
+ return err;
+}
+
+union ptr_data {
+ unsigned int p;
+ unsigned long long lp;
+};
+
+static int btf_dump_ptr_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ if (ptr_is_aligned(data, d->ptr_sz) && d->ptr_sz == sizeof(void *)) {
+ btf_dump_type_values(d, "%p", *(void **)data);
+ } else {
+ union ptr_data pt;
+
+ memcpy(&pt, data, d->ptr_sz);
+ if (d->ptr_sz == 4)
+ btf_dump_type_values(d, "0x%x", pt.p);
+ else
+ btf_dump_type_values(d, "0x%llx", pt.lp);
+ }
+ return 0;
+}
+
+static int btf_dump_get_enum_value(struct btf_dump *d,
+ const struct btf_type *t,
+ const void *data,
+ __u32 id,
+ __s64 *value)
+{
+ int sz = t->size;
+
+ /* handle unaligned enum value */
+ if (!ptr_is_aligned(data, sz)) {
+ __u64 val;
+ int err;
+
+ err = btf_dump_get_bitfield_value(d, t, data, 0, 0, &val);
+ if (err)
+ return err;
+ *value = (__s64)val;
+ return 0;
+ }
+
+ switch (t->size) {
+ case 8:
+ *value = *(__s64 *)data;
+ return 0;
+ case 4:
+ *value = *(__s32 *)data;
+ return 0;
+ case 2:
+ *value = *(__s16 *)data;
+ return 0;
+ case 1:
+ *value = *(__s8 *)data;
+ return 0;
+ default:
+ pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id);
+ return -EINVAL;
+ }
+}
+
+static int btf_dump_enum_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ const struct btf_enum *e;
+ __s64 value;
+ int i, err;
+
+ err = btf_dump_get_enum_value(d, t, data, id, &value);
+ if (err)
+ return err;
+
+ for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) {
+ if (value != e->val)
+ continue;
+ btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
+ return 0;
+ }
+
+ btf_dump_type_values(d, "%d", value);
+ return 0;
+}
+
+static int btf_dump_datasec_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ const struct btf_var_secinfo *vsi;
+ const struct btf_type *var;
+ __u32 i;
+ int err;
+
+ btf_dump_type_values(d, "SEC(\"%s\") ", btf_name_of(d, t->name_off));
+
+ for (i = 0, vsi = btf_var_secinfos(t); i < btf_vlen(t); i++, vsi++) {
+ var = btf__type_by_id(d->btf, vsi->type);
+ err = btf_dump_dump_type_data(d, NULL, var, vsi->type, data + vsi->offset, 0, 0);
+ if (err < 0)
+ return err;
+ btf_dump_printf(d, ";");
+ }
+ return 0;
+}
+
+/* return size of type, or if base type overflows, return -E2BIG. */
+static int btf_dump_type_data_check_overflow(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+ __u8 bits_offset)
+{
+ __s64 size = btf__resolve_size(d->btf, id);
+
+ if (size < 0 || size >= INT_MAX) {
+ pr_warn("unexpected size [%zu] for id [%u]\n",
+ (size_t)size, id);
+ return -EINVAL;
+ }
+
+ /* Only do overflow checking for base types; we do not want to
+ * avoid showing part of a struct, union or array, even if we
+ * do not have enough data to show the full object. By
+ * restricting overflow checking to base types we can ensure
+ * that partial display succeeds, while avoiding overflowing
+ * and using bogus data for display.
+ */
+ t = skip_mods_and_typedefs(d->btf, id, NULL);
+ if (!t) {
+ pr_warn("unexpected error skipping mods/typedefs for id [%u]\n",
+ id);
+ return -EINVAL;
+ }
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_ENUM:
+ if (data + bits_offset / 8 + size > d->typed_dump->data_end)
+ return -E2BIG;
+ break;
+ default:
+ break;
+ }
+ return (int)size;
+}
+
+static int btf_dump_type_data_check_zero(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ __s64 value;
+ int i, err;
+
+ /* toplevel exceptions; we show zero values if
+ * - we ask for them (emit_zeros)
+ * - if we are at top-level so we see "struct empty { }"
+ * - or if we are an array member and the array is non-empty and
+ * not a char array; we don't want to be in a situation where we
+ * have an integer array 0, 1, 0, 1 and only show non-zero values.
+ * If the array contains zeroes only, or is a char array starting
+ * with a '\0', the array-level check_zero() will prevent showing it;
+ * we are concerned with determining zero value at the array member
+ * level here.
+ */
+ if (d->typed_dump->emit_zeroes || d->typed_dump->depth == 0 ||
+ (d->typed_dump->is_array_member &&
+ !d->typed_dump->is_array_char))
+ return 0;
+
+ t = skip_mods_and_typedefs(d->btf, id, NULL);
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ if (bit_sz)
+ return btf_dump_bitfield_check_zero(d, t, data, bits_offset, bit_sz);
+ return btf_dump_base_type_check_zero(d, t, id, data);
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_PTR:
+ return btf_dump_base_type_check_zero(d, t, id, data);
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *array = btf_array(t);
+ const struct btf_type *elem_type;
+ __u32 elem_type_id, elem_size;
+ bool ischar;
+
+ elem_type_id = array->type;
+ elem_size = btf__resolve_size(d->btf, elem_type_id);
+ elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
+
+ ischar = btf_is_int(elem_type) && elem_size == 1;
+
+ /* check all elements; if _any_ element is nonzero, all
+ * of array is displayed. We make an exception however
+ * for char arrays where the first element is 0; these
+ * are considered zeroed also, even if later elements are
+ * non-zero because the string is terminated.
+ */
+ for (i = 0; i < array->nelems; i++) {
+ if (i == 0 && ischar && *(char *)data == 0)
+ return -ENODATA;
+ err = btf_dump_type_data_check_zero(d, elem_type,
+ elem_type_id,
+ data +
+ (i * elem_size),
+ bits_offset, 0);
+ if (err != -ENODATA)
+ return err;
+ }
+ return -ENODATA;
+ }
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m = btf_members(t);
+ __u16 n = btf_vlen(t);
+
+ /* if any struct/union member is non-zero, the struct/union
+ * is considered non-zero and dumped.
+ */
+ for (i = 0; i < n; i++, m++) {
+ const struct btf_type *mtype;
+ __u32 moffset;
+
+ mtype = btf__type_by_id(d->btf, m->type);
+ moffset = btf_member_bit_offset(t, i);
+
+ /* btf_int_bits() does not store member bitfield size;
+ * bitfield size needs to be stored here so int display
+ * of member can retrieve it.
+ */
+ bit_sz = btf_member_bitfield_size(t, i);
+ err = btf_dump_type_data_check_zero(d, mtype, m->type, data + moffset / 8,
+ moffset % 8, bit_sz);
+ if (err != ENODATA)
+ return err;
+ }
+ return -ENODATA;
+ }
+ case BTF_KIND_ENUM:
+ err = btf_dump_get_enum_value(d, t, data, id, &value);
+ if (err)
+ return err;
+ if (value == 0)
+ return -ENODATA;
+ return 0;
+ default:
+ return 0;
+ }
+}
+
+/* returns size of data dumped, or error. */
+static int btf_dump_dump_type_data(struct btf_dump *d,
+ const char *fname,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ int size, err;
+
+ size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset);
+ if (size < 0)
+ return size;
+ err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz);
+ if (err) {
+ /* zeroed data is expected and not an error, so simply skip
+ * dumping such data. Record other errors however.
+ */
+ if (err == -ENODATA)
+ return size;
+ return err;
+ }
+ btf_dump_data_pfx(d);
+
+ if (!d->typed_dump->skip_names) {
+ if (fname && strlen(fname) > 0)
+ btf_dump_printf(d, ".%s = ", fname);
+ btf_dump_emit_type_cast(d, id, true);
+ }
+
+ t = skip_mods_and_typedefs(d->btf, id, NULL);
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_UNKN:
+ case BTF_KIND_FWD:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_FUNC_PROTO:
+ err = btf_dump_unsupported_data(d, t, id);
+ break;
+ case BTF_KIND_INT:
+ if (bit_sz)
+ err = btf_dump_bitfield_data(d, t, data, bits_offset, bit_sz);
+ else
+ err = btf_dump_int_data(d, t, id, data, bits_offset);
+ break;
+ case BTF_KIND_FLOAT:
+ err = btf_dump_float_data(d, t, id, data);
+ break;
+ case BTF_KIND_PTR:
+ err = btf_dump_ptr_data(d, t, id, data);
+ break;
+ case BTF_KIND_ARRAY:
+ err = btf_dump_array_data(d, t, id, data);
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ err = btf_dump_struct_data(d, t, id, data);
+ break;
+ case BTF_KIND_ENUM:
+ /* handle bitfield and int enum values */
+ if (bit_sz) {
+ __u64 print_num;
+ __s64 enum_val;
+
+ err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz,
+ &print_num);
+ if (err)
+ break;
+ enum_val = (__s64)print_num;
+ err = btf_dump_enum_data(d, t, id, &enum_val);
+ } else
+ err = btf_dump_enum_data(d, t, id, data);
+ break;
+ case BTF_KIND_VAR:
+ err = btf_dump_var_data(d, t, id, data);
+ break;
+ case BTF_KIND_DATASEC:
+ err = btf_dump_datasec_data(d, t, id, data);
+ break;
+ default:
+ pr_warn("unexpected kind [%u] for id [%u]\n",
+ BTF_INFO_KIND(t->info), id);
+ return -EINVAL;
+ }
+ if (err < 0)
+ return err;
+ return size;
+}
+
+int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
+ const void *data, size_t data_sz,
+ const struct btf_dump_type_data_opts *opts)
+{
+ struct btf_dump_data typed_dump = {};
+ const struct btf_type *t;
+ int ret;
+
+ if (!OPTS_VALID(opts, btf_dump_type_data_opts))
+ return libbpf_err(-EINVAL);
+
+ t = btf__type_by_id(d->btf, id);
+ if (!t)
+ return libbpf_err(-ENOENT);
+
+ d->typed_dump = &typed_dump;
+ d->typed_dump->data_end = data + data_sz;
+ d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0);
+
+ /* default indent string is a tab */
+ if (!opts->indent_str)
+ d->typed_dump->indent_str[0] = '\t';
+ else
+ strncat(d->typed_dump->indent_str, opts->indent_str,
+ sizeof(d->typed_dump->indent_str) - 1);
+
+ d->typed_dump->compact = OPTS_GET(opts, compact, false);
+ d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false);
+ d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false);
+
+ ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0);
+
+ d->typed_dump = NULL;
+
+ return libbpf_err(ret);
+}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6f5e2757bb3c..88d8825fc6f6 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -193,6 +193,8 @@ enum kern_feature_id {
FEAT_MODULE_BTF,
/* BTF_KIND_FLOAT support */
FEAT_BTF_FLOAT,
+ /* BPF perf link support */
+ FEAT_PERF_LINK,
__FEAT_CNT,
};
@@ -498,6 +500,10 @@ struct bpf_object {
* it at load time.
*/
struct btf *btf_vmlinux;
+ /* Path to the custom BTF to be used for BPF CO-RE relocations as an
+ * override for vmlinux BTF.
+ */
+ char *btf_custom_path;
/* vmlinux BTF override for CO-RE relocations */
struct btf *btf_vmlinux_override;
/* Lazily initialized kernel module BTFs */
@@ -591,11 +597,6 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)
insn->off == 0;
}
-static bool is_ldimm64_insn(struct bpf_insn *insn)
-{
- return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
-}
-
static bool is_call_insn(const struct bpf_insn *insn)
{
return insn->code == (BPF_JMP | BPF_CALL);
@@ -2645,8 +2646,10 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
struct bpf_program *prog;
int i;
- /* CO-RE relocations need kernel BTF */
- if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
+ /* CO-RE relocations need kernel BTF, only when btf_custom_path
+ * is not specified
+ */
+ if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
return true;
/* Support for typed ksyms needs kernel BTF */
@@ -2679,7 +2682,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
if (!force && !obj_needs_vmlinux_btf(obj))
return 0;
- obj->btf_vmlinux = libbpf_find_kernel_btf();
+ obj->btf_vmlinux = btf__load_vmlinux_btf();
err = libbpf_get_error(obj->btf_vmlinux);
if (err) {
pr_warn("Error loading vmlinux BTF: %d\n", err);
@@ -2768,7 +2771,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
*/
btf__set_fd(kern_btf, 0);
} else {
- err = btf__load(kern_btf);
+ err = btf__load_into_kernel(kern_btf);
}
if (sanitize) {
if (!err) {
@@ -3894,6 +3897,42 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
return 0;
}
+static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
+{
+ char file[PATH_MAX], buff[4096];
+ FILE *fp;
+ __u32 val;
+ int err;
+
+ snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
+ memset(info, 0, sizeof(*info));
+
+ fp = fopen(file, "r");
+ if (!fp) {
+ err = -errno;
+ pr_warn("failed to open %s: %d. No procfs support?\n", file,
+ err);
+ return err;
+ }
+
+ while (fgets(buff, sizeof(buff), fp)) {
+ if (sscanf(buff, "map_type:\t%u", &val) == 1)
+ info->type = val;
+ else if (sscanf(buff, "key_size:\t%u", &val) == 1)
+ info->key_size = val;
+ else if (sscanf(buff, "value_size:\t%u", &val) == 1)
+ info->value_size = val;
+ else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
+ info->max_entries = val;
+ else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
+ info->map_flags = val;
+ }
+
+ fclose(fp);
+
+ return 0;
+}
+
int bpf_map__reuse_fd(struct bpf_map *map, int fd)
{
struct bpf_map_info info = {};
@@ -3902,6 +3941,8 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
char *new_name;
err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err && errno == EINVAL)
+ err = bpf_get_map_info_from_fdinfo(fd, &info);
if (err)
return libbpf_err(err);
@@ -4298,6 +4339,37 @@ static int probe_module_btf(void)
return !err;
}
+static int probe_perf_link(void)
+{
+ struct bpf_load_program_attr attr;
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd, link_fd, err;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_TRACEPOINT;
+ attr.insns = insns;
+ attr.insns_cnt = ARRAY_SIZE(insns);
+ attr.license = "GPL";
+ prog_fd = bpf_load_program_xattr(&attr, NULL, 0);
+ if (prog_fd < 0)
+ return -errno;
+
+ /* use invalid perf_event FD to get EBADF, if link is supported;
+ * otherwise EINVAL should be returned
+ */
+ link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
+ err = -errno; /* close() can clobber errno */
+
+ if (link_fd >= 0)
+ close(link_fd);
+ close(prog_fd);
+
+ return link_fd < 0 && err == -EBADF;
+}
+
enum kern_feature_result {
FEAT_UNKNOWN = 0,
FEAT_SUPPORTED = 1,
@@ -4348,6 +4420,9 @@ static struct kern_feature_desc {
[FEAT_BTF_FLOAT] = {
"BTF_KIND_FLOAT support", probe_kern_btf_float,
},
+ [FEAT_PERF_LINK] = {
+ "BPF perf link support", probe_perf_link,
+ },
};
static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
@@ -4381,12 +4456,16 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
struct bpf_map_info map_info = {};
char msg[STRERR_BUFSIZE];
__u32 map_info_len;
+ int err;
map_info_len = sizeof(map_info);
- if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) {
- pr_warn("failed to get map info for map FD %d: %s\n",
- map_fd, libbpf_strerror_r(errno, msg, sizeof(msg)));
+ err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
+ if (err && errno == EINVAL)
+ err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
+ if (err) {
+ pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
+ libbpf_strerror_r(errno, msg, sizeof(msg)));
return false;
}
@@ -4479,6 +4558,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
{
struct bpf_create_map_attr create_attr;
struct bpf_map_def *def = &map->def;
+ int err = 0;
memset(&create_attr, 0, sizeof(create_attr));
@@ -4521,8 +4601,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
if (bpf_map_type__is_map_in_map(def->type)) {
if (map->inner_map) {
- int err;
-
err = bpf_object__create_map(obj, map->inner_map, true);
if (err) {
pr_warn("map '%s': failed to create inner map: %d\n",
@@ -4547,8 +4625,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
if (map->fd < 0 && (create_attr.btf_key_type_id ||
create_attr.btf_value_type_id)) {
char *cp, errmsg[STRERR_BUFSIZE];
- int err = -errno;
+ err = -errno;
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
map->name, cp, err);
@@ -4560,8 +4638,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
map->fd = bpf_create_map_xattr(&create_attr);
}
- if (map->fd < 0)
- return -errno;
+ err = map->fd < 0 ? -errno : 0;
if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
if (obj->gen_loader)
@@ -4570,7 +4647,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
zfree(&map->inner_map);
}
- return 0;
+ return err;
}
static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
@@ -4616,10 +4693,13 @@ bpf_object__create_maps(struct bpf_object *obj)
char *cp, errmsg[STRERR_BUFSIZE];
unsigned int i, j;
int err;
+ bool retried;
for (i = 0; i < obj->nr_maps; i++) {
map = &obj->maps[i];
+ retried = false;
+retry:
if (map->pin_path) {
err = bpf_object__reuse_map(map);
if (err) {
@@ -4627,6 +4707,12 @@ bpf_object__create_maps(struct bpf_object *obj)
map->name);
goto err_out;
}
+ if (retried && map->fd < 0) {
+ pr_warn("map '%s': cannot find pinned map\n",
+ map->name);
+ err = -ENOENT;
+ goto err_out;
+ }
}
if (map->fd >= 0) {
@@ -4660,9 +4746,13 @@ bpf_object__create_maps(struct bpf_object *obj)
if (map->pin_path && !map->pinned) {
err = bpf_map__pin(map, NULL);
if (err) {
+ zclose(map->fd);
+ if (!retried && err == -EEXIST) {
+ retried = true;
+ goto retry;
+ }
pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
map->name, map->pin_path, err);
- zclose(map->fd);
goto err_out;
}
}
@@ -4679,279 +4769,6 @@ err_out:
return err;
}
-#define BPF_CORE_SPEC_MAX_LEN 64
-
-/* represents BPF CO-RE field or array element accessor */
-struct bpf_core_accessor {
- __u32 type_id; /* struct/union type or array element type */
- __u32 idx; /* field index or array index */
- const char *name; /* field name or NULL for array accessor */
-};
-
-struct bpf_core_spec {
- const struct btf *btf;
- /* high-level spec: named fields and array indices only */
- struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
- /* original unresolved (no skip_mods_or_typedefs) root type ID */
- __u32 root_type_id;
- /* CO-RE relocation kind */
- enum bpf_core_relo_kind relo_kind;
- /* high-level spec length */
- int len;
- /* raw, low-level spec: 1-to-1 with accessor spec string */
- int raw_spec[BPF_CORE_SPEC_MAX_LEN];
- /* raw spec length */
- int raw_len;
- /* field bit offset represented by spec */
- __u32 bit_offset;
-};
-
-static bool str_is_empty(const char *s)
-{
- return !s || !s[0];
-}
-
-static bool is_flex_arr(const struct btf *btf,
- const struct bpf_core_accessor *acc,
- const struct btf_array *arr)
-{
- const struct btf_type *t;
-
- /* not a flexible array, if not inside a struct or has non-zero size */
- if (!acc->name || arr->nelems > 0)
- return false;
-
- /* has to be the last member of enclosing struct */
- t = btf__type_by_id(btf, acc->type_id);
- return acc->idx == btf_vlen(t) - 1;
-}
-
-static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
-{
- switch (kind) {
- case BPF_FIELD_BYTE_OFFSET: return "byte_off";
- case BPF_FIELD_BYTE_SIZE: return "byte_sz";
- case BPF_FIELD_EXISTS: return "field_exists";
- case BPF_FIELD_SIGNED: return "signed";
- case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
- case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
- case BPF_TYPE_ID_LOCAL: return "local_type_id";
- case BPF_TYPE_ID_TARGET: return "target_type_id";
- case BPF_TYPE_EXISTS: return "type_exists";
- case BPF_TYPE_SIZE: return "type_size";
- case BPF_ENUMVAL_EXISTS: return "enumval_exists";
- case BPF_ENUMVAL_VALUE: return "enumval_value";
- default: return "unknown";
- }
-}
-
-static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
-{
- switch (kind) {
- case BPF_FIELD_BYTE_OFFSET:
- case BPF_FIELD_BYTE_SIZE:
- case BPF_FIELD_EXISTS:
- case BPF_FIELD_SIGNED:
- case BPF_FIELD_LSHIFT_U64:
- case BPF_FIELD_RSHIFT_U64:
- return true;
- default:
- return false;
- }
-}
-
-static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
-{
- switch (kind) {
- case BPF_TYPE_ID_LOCAL:
- case BPF_TYPE_ID_TARGET:
- case BPF_TYPE_EXISTS:
- case BPF_TYPE_SIZE:
- return true;
- default:
- return false;
- }
-}
-
-static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
-{
- switch (kind) {
- case BPF_ENUMVAL_EXISTS:
- case BPF_ENUMVAL_VALUE:
- return true;
- default:
- return false;
- }
-}
-
-/*
- * Turn bpf_core_relo into a low- and high-level spec representation,
- * validating correctness along the way, as well as calculating resulting
- * field bit offset, specified by accessor string. Low-level spec captures
- * every single level of nestedness, including traversing anonymous
- * struct/union members. High-level one only captures semantically meaningful
- * "turning points": named fields and array indicies.
- * E.g., for this case:
- *
- * struct sample {
- * int __unimportant;
- * struct {
- * int __1;
- * int __2;
- * int a[7];
- * };
- * };
- *
- * struct sample *s = ...;
- *
- * int x = &s->a[3]; // access string = '0:1:2:3'
- *
- * Low-level spec has 1:1 mapping with each element of access string (it's
- * just a parsed access string representation): [0, 1, 2, 3].
- *
- * High-level spec will capture only 3 points:
- * - intial zero-index access by pointer (&s->... is the same as &s[0]...);
- * - field 'a' access (corresponds to '2' in low-level spec);
- * - array element #3 access (corresponds to '3' in low-level spec).
- *
- * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
- * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
- * spec and raw_spec are kept empty.
- *
- * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
- * string to specify enumerator's value index that need to be relocated.
- */
-static int bpf_core_parse_spec(const struct btf *btf,
- __u32 type_id,
- const char *spec_str,
- enum bpf_core_relo_kind relo_kind,
- struct bpf_core_spec *spec)
-{
- int access_idx, parsed_len, i;
- struct bpf_core_accessor *acc;
- const struct btf_type *t;
- const char *name;
- __u32 id;
- __s64 sz;
-
- if (str_is_empty(spec_str) || *spec_str == ':')
- return -EINVAL;
-
- memset(spec, 0, sizeof(*spec));
- spec->btf = btf;
- spec->root_type_id = type_id;
- spec->relo_kind = relo_kind;
-
- /* type-based relocations don't have a field access string */
- if (core_relo_is_type_based(relo_kind)) {
- if (strcmp(spec_str, "0"))
- return -EINVAL;
- return 0;
- }
-
- /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
- while (*spec_str) {
- if (*spec_str == ':')
- ++spec_str;
- if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
- return -EINVAL;
- if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
- return -E2BIG;
- spec_str += parsed_len;
- spec->raw_spec[spec->raw_len++] = access_idx;
- }
-
- if (spec->raw_len == 0)
- return -EINVAL;
-
- t = skip_mods_and_typedefs(btf, type_id, &id);
- if (!t)
- return -EINVAL;
-
- access_idx = spec->raw_spec[0];
- acc = &spec->spec[0];
- acc->type_id = id;
- acc->idx = access_idx;
- spec->len++;
-
- if (core_relo_is_enumval_based(relo_kind)) {
- if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
- return -EINVAL;
-
- /* record enumerator name in a first accessor */
- acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
- return 0;
- }
-
- if (!core_relo_is_field_based(relo_kind))
- return -EINVAL;
-
- sz = btf__resolve_size(btf, id);
- if (sz < 0)
- return sz;
- spec->bit_offset = access_idx * sz * 8;
-
- for (i = 1; i < spec->raw_len; i++) {
- t = skip_mods_and_typedefs(btf, id, &id);
- if (!t)
- return -EINVAL;
-
- access_idx = spec->raw_spec[i];
- acc = &spec->spec[spec->len];
-
- if (btf_is_composite(t)) {
- const struct btf_member *m;
- __u32 bit_offset;
-
- if (access_idx >= btf_vlen(t))
- return -EINVAL;
-
- bit_offset = btf_member_bit_offset(t, access_idx);
- spec->bit_offset += bit_offset;
-
- m = btf_members(t) + access_idx;
- if (m->name_off) {
- name = btf__name_by_offset(btf, m->name_off);
- if (str_is_empty(name))
- return -EINVAL;
-
- acc->type_id = id;
- acc->idx = access_idx;
- acc->name = name;
- spec->len++;
- }
-
- id = m->type;
- } else if (btf_is_array(t)) {
- const struct btf_array *a = btf_array(t);
- bool flex;
-
- t = skip_mods_and_typedefs(btf, a->type, &id);
- if (!t)
- return -EINVAL;
-
- flex = is_flex_arr(btf, acc - 1, a);
- if (!flex && access_idx >= a->nelems)
- return -EINVAL;
-
- spec->spec[spec->len].type_id = id;
- spec->spec[spec->len].idx = access_idx;
- spec->len++;
-
- sz = btf__resolve_size(btf, id);
- if (sz < 0)
- return sz;
- spec->bit_offset += access_idx * sz * 8;
- } else {
- pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
- type_id, spec_str, i, id, btf_kind_str(t));
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
static bool bpf_core_is_flavor_sep(const char *s)
{
/* check X___Y name pattern, where X and Y are not underscores */
@@ -4964,7 +4781,7 @@ static bool bpf_core_is_flavor_sep(const char *s)
* before last triple underscore. Struct name part after last triple
* underscore is ignored by BPF CO-RE relocation during relocation matching.
*/
-static size_t bpf_core_essential_name_len(const char *name)
+size_t bpf_core_essential_name_len(const char *name)
{
size_t n = strlen(name);
int i;
@@ -4976,34 +4793,20 @@ static size_t bpf_core_essential_name_len(const char *name)
return n;
}
-struct core_cand
-{
- const struct btf *btf;
- const struct btf_type *t;
- const char *name;
- __u32 id;
-};
-
-/* dynamically sized list of type IDs and its associated struct btf */
-struct core_cand_list {
- struct core_cand *cands;
- int len;
-};
-
-static void bpf_core_free_cands(struct core_cand_list *cands)
+static void bpf_core_free_cands(struct bpf_core_cand_list *cands)
{
free(cands->cands);
free(cands);
}
-static int bpf_core_add_cands(struct core_cand *local_cand,
+static int bpf_core_add_cands(struct bpf_core_cand *local_cand,
size_t local_essent_len,
const struct btf *targ_btf,
const char *targ_btf_name,
int targ_start_id,
- struct core_cand_list *cands)
+ struct bpf_core_cand_list *cands)
{
- struct core_cand *new_cands, *cand;
+ struct bpf_core_cand *new_cands, *cand;
const struct btf_type *t;
const char *targ_name;
size_t targ_essent_len;
@@ -5139,11 +4942,11 @@ err_out:
return 0;
}
-static struct core_cand_list *
+static struct bpf_core_cand_list *
bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
{
- struct core_cand local_cand = {};
- struct core_cand_list *cands;
+ struct bpf_core_cand local_cand = {};
+ struct bpf_core_cand_list *cands;
const struct btf *main_btf;
size_t local_essent_len;
int err, i;
@@ -5197,165 +5000,6 @@ err_out:
return ERR_PTR(err);
}
-/* Check two types for compatibility for the purpose of field access
- * relocation. const/volatile/restrict and typedefs are skipped to ensure we
- * are relocating semantically compatible entities:
- * - any two STRUCTs/UNIONs are compatible and can be mixed;
- * - any two FWDs are compatible, if their names match (modulo flavor suffix);
- * - any two PTRs are always compatible;
- * - for ENUMs, names should be the same (ignoring flavor suffix) or at
- * least one of enums should be anonymous;
- * - for ENUMs, check sizes, names are ignored;
- * - for INT, size and signedness are ignored;
- * - any two FLOATs are always compatible;
- * - for ARRAY, dimensionality is ignored, element types are checked for
- * compatibility recursively;
- * - everything else shouldn't be ever a target of relocation.
- * These rules are not set in stone and probably will be adjusted as we get
- * more experience with using BPF CO-RE relocations.
- */
-static int bpf_core_fields_are_compat(const struct btf *local_btf,
- __u32 local_id,
- const struct btf *targ_btf,
- __u32 targ_id)
-{
- const struct btf_type *local_type, *targ_type;
-
-recur:
- local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
- targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
- if (!local_type || !targ_type)
- return -EINVAL;
-
- if (btf_is_composite(local_type) && btf_is_composite(targ_type))
- return 1;
- if (btf_kind(local_type) != btf_kind(targ_type))
- return 0;
-
- switch (btf_kind(local_type)) {
- case BTF_KIND_PTR:
- case BTF_KIND_FLOAT:
- return 1;
- case BTF_KIND_FWD:
- case BTF_KIND_ENUM: {
- const char *local_name, *targ_name;
- size_t local_len, targ_len;
-
- local_name = btf__name_by_offset(local_btf,
- local_type->name_off);
- targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
- local_len = bpf_core_essential_name_len(local_name);
- targ_len = bpf_core_essential_name_len(targ_name);
- /* one of them is anonymous or both w/ same flavor-less names */
- return local_len == 0 || targ_len == 0 ||
- (local_len == targ_len &&
- strncmp(local_name, targ_name, local_len) == 0);
- }
- case BTF_KIND_INT:
- /* just reject deprecated bitfield-like integers; all other
- * integers are by default compatible between each other
- */
- return btf_int_offset(local_type) == 0 &&
- btf_int_offset(targ_type) == 0;
- case BTF_KIND_ARRAY:
- local_id = btf_array(local_type)->type;
- targ_id = btf_array(targ_type)->type;
- goto recur;
- default:
- pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
- btf_kind(local_type), local_id, targ_id);
- return 0;
- }
-}
-
-/*
- * Given single high-level named field accessor in local type, find
- * corresponding high-level accessor for a target type. Along the way,
- * maintain low-level spec for target as well. Also keep updating target
- * bit offset.
- *
- * Searching is performed through recursive exhaustive enumeration of all
- * fields of a struct/union. If there are any anonymous (embedded)
- * structs/unions, they are recursively searched as well. If field with
- * desired name is found, check compatibility between local and target types,
- * before returning result.
- *
- * 1 is returned, if field is found.
- * 0 is returned if no compatible field is found.
- * <0 is returned on error.
- */
-static int bpf_core_match_member(const struct btf *local_btf,
- const struct bpf_core_accessor *local_acc,
- const struct btf *targ_btf,
- __u32 targ_id,
- struct bpf_core_spec *spec,
- __u32 *next_targ_id)
-{
- const struct btf_type *local_type, *targ_type;
- const struct btf_member *local_member, *m;
- const char *local_name, *targ_name;
- __u32 local_id;
- int i, n, found;
-
- targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
- if (!targ_type)
- return -EINVAL;
- if (!btf_is_composite(targ_type))
- return 0;
-
- local_id = local_acc->type_id;
- local_type = btf__type_by_id(local_btf, local_id);
- local_member = btf_members(local_type) + local_acc->idx;
- local_name = btf__name_by_offset(local_btf, local_member->name_off);
-
- n = btf_vlen(targ_type);
- m = btf_members(targ_type);
- for (i = 0; i < n; i++, m++) {
- __u32 bit_offset;
-
- bit_offset = btf_member_bit_offset(targ_type, i);
-
- /* too deep struct/union/array nesting */
- if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
- return -E2BIG;
-
- /* speculate this member will be the good one */
- spec->bit_offset += bit_offset;
- spec->raw_spec[spec->raw_len++] = i;
-
- targ_name = btf__name_by_offset(targ_btf, m->name_off);
- if (str_is_empty(targ_name)) {
- /* embedded struct/union, we need to go deeper */
- found = bpf_core_match_member(local_btf, local_acc,
- targ_btf, m->type,
- spec, next_targ_id);
- if (found) /* either found or error */
- return found;
- } else if (strcmp(local_name, targ_name) == 0) {
- /* matching named field */
- struct bpf_core_accessor *targ_acc;
-
- targ_acc = &spec->spec[spec->len++];
- targ_acc->type_id = targ_id;
- targ_acc->idx = i;
- targ_acc->name = targ_name;
-
- *next_targ_id = m->type;
- found = bpf_core_fields_are_compat(local_btf,
- local_member->type,
- targ_btf, m->type);
- if (!found)
- spec->len--; /* pop accessor */
- return found;
- }
- /* member turned out not to be what we looked for */
- spec->bit_offset -= bit_offset;
- spec->raw_len--;
- }
-
- return 0;
-}
-
/* Check local and target types for compatibility. This check is used for
* type-based CO-RE relocations and follow slightly different rules than
* field-based relocations. This function assumes that root types were already
@@ -5375,8 +5019,8 @@ static int bpf_core_match_member(const struct btf *local_btf,
* These rules are not set in stone and probably will be adjusted as we get
* more experience with using BPF CO-RE relocations.
*/
-static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
- const struct btf *targ_btf, __u32 targ_id)
+int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id)
{
const struct btf_type *local_type, *targ_type;
int depth = 32; /* max recursion depth */
@@ -5450,671 +5094,6 @@ recur:
}
}
-/*
- * Try to match local spec to a target type and, if successful, produce full
- * target spec (high-level, low-level + bit offset).
- */
-static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
- const struct btf *targ_btf, __u32 targ_id,
- struct bpf_core_spec *targ_spec)
-{
- const struct btf_type *targ_type;
- const struct bpf_core_accessor *local_acc;
- struct bpf_core_accessor *targ_acc;
- int i, sz, matched;
-
- memset(targ_spec, 0, sizeof(*targ_spec));
- targ_spec->btf = targ_btf;
- targ_spec->root_type_id = targ_id;
- targ_spec->relo_kind = local_spec->relo_kind;
-
- if (core_relo_is_type_based(local_spec->relo_kind)) {
- return bpf_core_types_are_compat(local_spec->btf,
- local_spec->root_type_id,
- targ_btf, targ_id);
- }
-
- local_acc = &local_spec->spec[0];
- targ_acc = &targ_spec->spec[0];
-
- if (core_relo_is_enumval_based(local_spec->relo_kind)) {
- size_t local_essent_len, targ_essent_len;
- const struct btf_enum *e;
- const char *targ_name;
-
- /* has to resolve to an enum */
- targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
- if (!btf_is_enum(targ_type))
- return 0;
-
- local_essent_len = bpf_core_essential_name_len(local_acc->name);
-
- for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
- targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
- targ_essent_len = bpf_core_essential_name_len(targ_name);
- if (targ_essent_len != local_essent_len)
- continue;
- if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
- targ_acc->type_id = targ_id;
- targ_acc->idx = i;
- targ_acc->name = targ_name;
- targ_spec->len++;
- targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
- targ_spec->raw_len++;
- return 1;
- }
- }
- return 0;
- }
-
- if (!core_relo_is_field_based(local_spec->relo_kind))
- return -EINVAL;
-
- for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
- targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
- &targ_id);
- if (!targ_type)
- return -EINVAL;
-
- if (local_acc->name) {
- matched = bpf_core_match_member(local_spec->btf,
- local_acc,
- targ_btf, targ_id,
- targ_spec, &targ_id);
- if (matched <= 0)
- return matched;
- } else {
- /* for i=0, targ_id is already treated as array element
- * type (because it's the original struct), for others
- * we should find array element type first
- */
- if (i > 0) {
- const struct btf_array *a;
- bool flex;
-
- if (!btf_is_array(targ_type))
- return 0;
-
- a = btf_array(targ_type);
- flex = is_flex_arr(targ_btf, targ_acc - 1, a);
- if (!flex && local_acc->idx >= a->nelems)
- return 0;
- if (!skip_mods_and_typedefs(targ_btf, a->type,
- &targ_id))
- return -EINVAL;
- }
-
- /* too deep struct/union/array nesting */
- if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
- return -E2BIG;
-
- targ_acc->type_id = targ_id;
- targ_acc->idx = local_acc->idx;
- targ_acc->name = NULL;
- targ_spec->len++;
- targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
- targ_spec->raw_len++;
-
- sz = btf__resolve_size(targ_btf, targ_id);
- if (sz < 0)
- return sz;
- targ_spec->bit_offset += local_acc->idx * sz * 8;
- }
- }
-
- return 1;
-}
-
-static int bpf_core_calc_field_relo(const struct bpf_program *prog,
- const struct bpf_core_relo *relo,
- const struct bpf_core_spec *spec,
- __u32 *val, __u32 *field_sz, __u32 *type_id,
- bool *validate)
-{
- const struct bpf_core_accessor *acc;
- const struct btf_type *t;
- __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
- const struct btf_member *m;
- const struct btf_type *mt;
- bool bitfield;
- __s64 sz;
-
- *field_sz = 0;
-
- if (relo->kind == BPF_FIELD_EXISTS) {
- *val = spec ? 1 : 0;
- return 0;
- }
-
- if (!spec)
- return -EUCLEAN; /* request instruction poisoning */
-
- acc = &spec->spec[spec->len - 1];
- t = btf__type_by_id(spec->btf, acc->type_id);
-
- /* a[n] accessor needs special handling */
- if (!acc->name) {
- if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
- *val = spec->bit_offset / 8;
- /* remember field size for load/store mem size */
- sz = btf__resolve_size(spec->btf, acc->type_id);
- if (sz < 0)
- return -EINVAL;
- *field_sz = sz;
- *type_id = acc->type_id;
- } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
- sz = btf__resolve_size(spec->btf, acc->type_id);
- if (sz < 0)
- return -EINVAL;
- *val = sz;
- } else {
- pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
- prog->name, relo->kind, relo->insn_off / 8);
- return -EINVAL;
- }
- if (validate)
- *validate = true;
- return 0;
- }
-
- m = btf_members(t) + acc->idx;
- mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
- bit_off = spec->bit_offset;
- bit_sz = btf_member_bitfield_size(t, acc->idx);
-
- bitfield = bit_sz > 0;
- if (bitfield) {
- byte_sz = mt->size;
- byte_off = bit_off / 8 / byte_sz * byte_sz;
- /* figure out smallest int size necessary for bitfield load */
- while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
- if (byte_sz >= 8) {
- /* bitfield can't be read with 64-bit read */
- pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
- prog->name, relo->kind, relo->insn_off / 8);
- return -E2BIG;
- }
- byte_sz *= 2;
- byte_off = bit_off / 8 / byte_sz * byte_sz;
- }
- } else {
- sz = btf__resolve_size(spec->btf, field_type_id);
- if (sz < 0)
- return -EINVAL;
- byte_sz = sz;
- byte_off = spec->bit_offset / 8;
- bit_sz = byte_sz * 8;
- }
-
- /* for bitfields, all the relocatable aspects are ambiguous and we
- * might disagree with compiler, so turn off validation of expected
- * value, except for signedness
- */
- if (validate)
- *validate = !bitfield;
-
- switch (relo->kind) {
- case BPF_FIELD_BYTE_OFFSET:
- *val = byte_off;
- if (!bitfield) {
- *field_sz = byte_sz;
- *type_id = field_type_id;
- }
- break;
- case BPF_FIELD_BYTE_SIZE:
- *val = byte_sz;
- break;
- case BPF_FIELD_SIGNED:
- /* enums will be assumed unsigned */
- *val = btf_is_enum(mt) ||
- (btf_int_encoding(mt) & BTF_INT_SIGNED);
- if (validate)
- *validate = true; /* signedness is never ambiguous */
- break;
- case BPF_FIELD_LSHIFT_U64:
-#if __BYTE_ORDER == __LITTLE_ENDIAN
- *val = 64 - (bit_off + bit_sz - byte_off * 8);
-#else
- *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
-#endif
- break;
- case BPF_FIELD_RSHIFT_U64:
- *val = 64 - bit_sz;
- if (validate)
- *validate = true; /* right shift is never ambiguous */
- break;
- case BPF_FIELD_EXISTS:
- default:
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
-static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
- const struct bpf_core_spec *spec,
- __u32 *val)
-{
- __s64 sz;
-
- /* type-based relos return zero when target type is not found */
- if (!spec) {
- *val = 0;
- return 0;
- }
-
- switch (relo->kind) {
- case BPF_TYPE_ID_TARGET:
- *val = spec->root_type_id;
- break;
- case BPF_TYPE_EXISTS:
- *val = 1;
- break;
- case BPF_TYPE_SIZE:
- sz = btf__resolve_size(spec->btf, spec->root_type_id);
- if (sz < 0)
- return -EINVAL;
- *val = sz;
- break;
- case BPF_TYPE_ID_LOCAL:
- /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
- default:
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
-static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
- const struct bpf_core_spec *spec,
- __u32 *val)
-{
- const struct btf_type *t;
- const struct btf_enum *e;
-
- switch (relo->kind) {
- case BPF_ENUMVAL_EXISTS:
- *val = spec ? 1 : 0;
- break;
- case BPF_ENUMVAL_VALUE:
- if (!spec)
- return -EUCLEAN; /* request instruction poisoning */
- t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
- e = btf_enum(t) + spec->spec[0].idx;
- *val = e->val;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
-struct bpf_core_relo_res
-{
- /* expected value in the instruction, unless validate == false */
- __u32 orig_val;
- /* new value that needs to be patched up to */
- __u32 new_val;
- /* relocation unsuccessful, poison instruction, but don't fail load */
- bool poison;
- /* some relocations can't be validated against orig_val */
- bool validate;
- /* for field byte offset relocations or the forms:
- * *(T *)(rX + <off>) = rY
- * rX = *(T *)(rY + <off>),
- * we remember original and resolved field size to adjust direct
- * memory loads of pointers and integers; this is necessary for 32-bit
- * host kernel architectures, but also allows to automatically
- * relocate fields that were resized from, e.g., u32 to u64, etc.
- */
- bool fail_memsz_adjust;
- __u32 orig_sz;
- __u32 orig_type_id;
- __u32 new_sz;
- __u32 new_type_id;
-};
-
-/* Calculate original and target relocation values, given local and target
- * specs and relocation kind. These values are calculated for each candidate.
- * If there are multiple candidates, resulting values should all be consistent
- * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
- * If instruction has to be poisoned, *poison will be set to true.
- */
-static int bpf_core_calc_relo(const struct bpf_program *prog,
- const struct bpf_core_relo *relo,
- int relo_idx,
- const struct bpf_core_spec *local_spec,
- const struct bpf_core_spec *targ_spec,
- struct bpf_core_relo_res *res)
-{
- int err = -EOPNOTSUPP;
-
- res->orig_val = 0;
- res->new_val = 0;
- res->poison = false;
- res->validate = true;
- res->fail_memsz_adjust = false;
- res->orig_sz = res->new_sz = 0;
- res->orig_type_id = res->new_type_id = 0;
-
- if (core_relo_is_field_based(relo->kind)) {
- err = bpf_core_calc_field_relo(prog, relo, local_spec,
- &res->orig_val, &res->orig_sz,
- &res->orig_type_id, &res->validate);
- err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
- &res->new_val, &res->new_sz,
- &res->new_type_id, NULL);
- if (err)
- goto done;
- /* Validate if it's safe to adjust load/store memory size.
- * Adjustments are performed only if original and new memory
- * sizes differ.
- */
- res->fail_memsz_adjust = false;
- if (res->orig_sz != res->new_sz) {
- const struct btf_type *orig_t, *new_t;
-
- orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
- new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
-
- /* There are two use cases in which it's safe to
- * adjust load/store's mem size:
- * - reading a 32-bit kernel pointer, while on BPF
- * size pointers are always 64-bit; in this case
- * it's safe to "downsize" instruction size due to
- * pointer being treated as unsigned integer with
- * zero-extended upper 32-bits;
- * - reading unsigned integers, again due to
- * zero-extension is preserving the value correctly.
- *
- * In all other cases it's incorrect to attempt to
- * load/store field because read value will be
- * incorrect, so we poison relocated instruction.
- */
- if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
- goto done;
- if (btf_is_int(orig_t) && btf_is_int(new_t) &&
- btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
- btf_int_encoding(new_t) != BTF_INT_SIGNED)
- goto done;
-
- /* mark as invalid mem size adjustment, but this will
- * only be checked for LDX/STX/ST insns
- */
- res->fail_memsz_adjust = true;
- }
- } else if (core_relo_is_type_based(relo->kind)) {
- err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
- err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
- } else if (core_relo_is_enumval_based(relo->kind)) {
- err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
- err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
- }
-
-done:
- if (err == -EUCLEAN) {
- /* EUCLEAN is used to signal instruction poisoning request */
- res->poison = true;
- err = 0;
- } else if (err == -EOPNOTSUPP) {
- /* EOPNOTSUPP means unknown/unsupported relocation */
- pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
- prog->name, relo_idx, core_relo_kind_str(relo->kind),
- relo->kind, relo->insn_off / 8);
- }
-
- return err;
-}
-
-/*
- * Turn instruction for which CO_RE relocation failed into invalid one with
- * distinct signature.
- */
-static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
- int insn_idx, struct bpf_insn *insn)
-{
- pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
- prog->name, relo_idx, insn_idx);
- insn->code = BPF_JMP | BPF_CALL;
- insn->dst_reg = 0;
- insn->src_reg = 0;
- insn->off = 0;
- /* if this instruction is reachable (not a dead code),
- * verifier will complain with the following message:
- * invalid func unknown#195896080
- */
- insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
-}
-
-static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
-{
- switch (BPF_SIZE(insn->code)) {
- case BPF_DW: return 8;
- case BPF_W: return 4;
- case BPF_H: return 2;
- case BPF_B: return 1;
- default: return -1;
- }
-}
-
-static int insn_bytes_to_bpf_size(__u32 sz)
-{
- switch (sz) {
- case 8: return BPF_DW;
- case 4: return BPF_W;
- case 2: return BPF_H;
- case 1: return BPF_B;
- default: return -1;
- }
-}
-
-/*
- * Patch relocatable BPF instruction.
- *
- * Patched value is determined by relocation kind and target specification.
- * For existence relocations target spec will be NULL if field/type is not found.
- * Expected insn->imm value is determined using relocation kind and local
- * spec, and is checked before patching instruction. If actual insn->imm value
- * is wrong, bail out with error.
- *
- * Currently supported classes of BPF instruction are:
- * 1. rX = <imm> (assignment with immediate operand);
- * 2. rX += <imm> (arithmetic operations with immediate operand);
- * 3. rX = <imm64> (load with 64-bit immediate value);
- * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
- * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
- * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
- */
-static int bpf_core_patch_insn(struct bpf_program *prog,
- const struct bpf_core_relo *relo,
- int relo_idx,
- const struct bpf_core_relo_res *res)
-{
- __u32 orig_val, new_val;
- struct bpf_insn *insn;
- int insn_idx;
- __u8 class;
-
- if (relo->insn_off % BPF_INSN_SZ)
- return -EINVAL;
- insn_idx = relo->insn_off / BPF_INSN_SZ;
- /* adjust insn_idx from section frame of reference to the local
- * program's frame of reference; (sub-)program code is not yet
- * relocated, so it's enough to just subtract in-section offset
- */
- insn_idx = insn_idx - prog->sec_insn_off;
- insn = &prog->insns[insn_idx];
- class = BPF_CLASS(insn->code);
-
- if (res->poison) {
-poison:
- /* poison second part of ldimm64 to avoid confusing error from
- * verifier about "unknown opcode 00"
- */
- if (is_ldimm64_insn(insn))
- bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
- bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
- return 0;
- }
-
- orig_val = res->orig_val;
- new_val = res->new_val;
-
- switch (class) {
- case BPF_ALU:
- case BPF_ALU64:
- if (BPF_SRC(insn->code) != BPF_K)
- return -EINVAL;
- if (res->validate && insn->imm != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
- prog->name, relo_idx,
- insn_idx, insn->imm, orig_val, new_val);
- return -EINVAL;
- }
- orig_val = insn->imm;
- insn->imm = new_val;
- pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
- prog->name, relo_idx, insn_idx,
- orig_val, new_val);
- break;
- case BPF_LDX:
- case BPF_ST:
- case BPF_STX:
- if (res->validate && insn->off != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
- prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
- return -EINVAL;
- }
- if (new_val > SHRT_MAX) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
- prog->name, relo_idx, insn_idx, new_val);
- return -ERANGE;
- }
- if (res->fail_memsz_adjust) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
- "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
- prog->name, relo_idx, insn_idx);
- goto poison;
- }
-
- orig_val = insn->off;
- insn->off = new_val;
- pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
- prog->name, relo_idx, insn_idx, orig_val, new_val);
-
- if (res->new_sz != res->orig_sz) {
- int insn_bytes_sz, insn_bpf_sz;
-
- insn_bytes_sz = insn_bpf_size_to_bytes(insn);
- if (insn_bytes_sz != res->orig_sz) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
- prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
- return -EINVAL;
- }
-
- insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
- if (insn_bpf_sz < 0) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
- prog->name, relo_idx, insn_idx, res->new_sz);
- return -EINVAL;
- }
-
- insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
- pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
- prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
- }
- break;
- case BPF_LD: {
- __u64 imm;
-
- if (!is_ldimm64_insn(insn) ||
- insn[0].src_reg != 0 || insn[0].off != 0 ||
- insn_idx + 1 >= prog->insns_cnt ||
- insn[1].code != 0 || insn[1].dst_reg != 0 ||
- insn[1].src_reg != 0 || insn[1].off != 0) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
- prog->name, relo_idx, insn_idx);
- return -EINVAL;
- }
-
- imm = insn[0].imm + ((__u64)insn[1].imm << 32);
- if (res->validate && imm != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
- prog->name, relo_idx,
- insn_idx, (unsigned long long)imm,
- orig_val, new_val);
- return -EINVAL;
- }
-
- insn[0].imm = new_val;
- insn[1].imm = 0; /* currently only 32-bit values are supported */
- pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
- prog->name, relo_idx, insn_idx,
- (unsigned long long)imm, new_val);
- break;
- }
- default:
- pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
- prog->name, relo_idx, insn_idx, insn->code,
- insn->src_reg, insn->dst_reg, insn->off, insn->imm);
- return -EINVAL;
- }
-
- return 0;
-}
-
-/* Output spec definition in the format:
- * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
- * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
- */
-static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
-{
- const struct btf_type *t;
- const struct btf_enum *e;
- const char *s;
- __u32 type_id;
- int i;
-
- type_id = spec->root_type_id;
- t = btf__type_by_id(spec->btf, type_id);
- s = btf__name_by_offset(spec->btf, t->name_off);
-
- libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
-
- if (core_relo_is_type_based(spec->relo_kind))
- return;
-
- if (core_relo_is_enumval_based(spec->relo_kind)) {
- t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
- e = btf_enum(t) + spec->raw_spec[0];
- s = btf__name_by_offset(spec->btf, e->name_off);
-
- libbpf_print(level, "::%s = %u", s, e->val);
- return;
- }
-
- if (core_relo_is_field_based(spec->relo_kind)) {
- for (i = 0; i < spec->len; i++) {
- if (spec->spec[i].name)
- libbpf_print(level, ".%s", spec->spec[i].name);
- else if (i > 0 || spec->spec[i].idx > 0)
- libbpf_print(level, "[%u]", spec->spec[i].idx);
- }
-
- libbpf_print(level, " (");
- for (i = 0; i < spec->raw_len; i++)
- libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
-
- if (spec->bit_offset % 8)
- libbpf_print(level, " @ offset %u.%u)",
- spec->bit_offset / 8, spec->bit_offset % 8);
- else
- libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
- return;
- }
-}
-
static size_t bpf_core_hash_fn(const void *key, void *ctx)
{
return (size_t)key;
@@ -6130,73 +5109,33 @@ static void *u32_as_hash_key(__u32 x)
return (void *)(uintptr_t)x;
}
-/*
- * CO-RE relocate single instruction.
- *
- * The outline and important points of the algorithm:
- * 1. For given local type, find corresponding candidate target types.
- * Candidate type is a type with the same "essential" name, ignoring
- * everything after last triple underscore (___). E.g., `sample`,
- * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
- * for each other. Names with triple underscore are referred to as
- * "flavors" and are useful, among other things, to allow to
- * specify/support incompatible variations of the same kernel struct, which
- * might differ between different kernel versions and/or build
- * configurations.
- *
- * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
- * converter, when deduplicated BTF of a kernel still contains more than
- * one different types with the same name. In that case, ___2, ___3, etc
- * are appended starting from second name conflict. But start flavors are
- * also useful to be defined "locally", in BPF program, to extract same
- * data from incompatible changes between different kernel
- * versions/configurations. For instance, to handle field renames between
- * kernel versions, one can use two flavors of the struct name with the
- * same common name and use conditional relocations to extract that field,
- * depending on target kernel version.
- * 2. For each candidate type, try to match local specification to this
- * candidate target type. Matching involves finding corresponding
- * high-level spec accessors, meaning that all named fields should match,
- * as well as all array accesses should be within the actual bounds. Also,
- * types should be compatible (see bpf_core_fields_are_compat for details).
- * 3. It is supported and expected that there might be multiple flavors
- * matching the spec. As long as all the specs resolve to the same set of
- * offsets across all candidates, there is no error. If there is any
- * ambiguity, CO-RE relocation will fail. This is necessary to accomodate
- * imprefection of BTF deduplication, which can cause slight duplication of
- * the same BTF type, if some directly or indirectly referenced (by
- * pointer) type gets resolved to different actual types in different
- * object files. If such situation occurs, deduplicated BTF will end up
- * with two (or more) structurally identical types, which differ only in
- * types they refer to through pointer. This should be OK in most cases and
- * is not an error.
- * 4. Candidate types search is performed by linearly scanning through all
- * types in target BTF. It is anticipated that this is overall more
- * efficient memory-wise and not significantly worse (if not better)
- * CPU-wise compared to prebuilding a map from all local type names to
- * a list of candidate type names. It's also sped up by caching resolved
- * list of matching candidates per each local "root" type ID, that has at
- * least one bpf_core_relo associated with it. This list is shared
- * between multiple relocations for the same type ID and is updated as some
- * of the candidates are pruned due to structural incompatibility.
- */
static int bpf_core_apply_relo(struct bpf_program *prog,
const struct bpf_core_relo *relo,
int relo_idx,
const struct btf *local_btf,
struct hashmap *cand_cache)
{
- struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
const void *type_key = u32_as_hash_key(relo->type_id);
- struct bpf_core_relo_res cand_res, targ_res;
+ struct bpf_core_cand_list *cands = NULL;
+ const char *prog_name = prog->name;
const struct btf_type *local_type;
const char *local_name;
- struct core_cand_list *cands = NULL;
- __u32 local_id;
- const char *spec_str;
- int i, j, err;
+ __u32 local_id = relo->type_id;
+ struct bpf_insn *insn;
+ int insn_idx, err;
+
+ if (relo->insn_off % BPF_INSN_SZ)
+ return -EINVAL;
+ insn_idx = relo->insn_off / BPF_INSN_SZ;
+ /* adjust insn_idx from section frame of reference to the local
+ * program's frame of reference; (sub-)program code is not yet
+ * relocated, so it's enough to just subtract in-section offset
+ */
+ insn_idx = insn_idx - prog->sec_insn_off;
+ if (insn_idx > prog->insns_cnt)
+ return -EINVAL;
+ insn = &prog->insns[insn_idx];
- local_id = relo->type_id;
local_type = btf__type_by_id(local_btf, local_id);
if (!local_type)
return -EINVAL;
@@ -6205,51 +5144,19 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
if (!local_name)
return -EINVAL;
- spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
- if (str_is_empty(spec_str))
- return -EINVAL;
-
if (prog->obj->gen_loader) {
- pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n",
+ pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n",
prog - prog->obj->programs, relo->insn_off / 8,
- local_name, spec_str, relo->kind);
+ local_name, relo->kind);
return -ENOTSUP;
}
- err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
- if (err) {
- pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
- prog->name, relo_idx, local_id, btf_kind_str(local_type),
- str_is_empty(local_name) ? "<anon>" : local_name,
- spec_str, err);
- return -EINVAL;
- }
-
- pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
- relo_idx, core_relo_kind_str(relo->kind), relo->kind);
- bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
- libbpf_print(LIBBPF_DEBUG, "\n");
- /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
- if (relo->kind == BPF_TYPE_ID_LOCAL) {
- targ_res.validate = true;
- targ_res.poison = false;
- targ_res.orig_val = local_spec.root_type_id;
- targ_res.new_val = local_spec.root_type_id;
- goto patch_insn;
- }
-
- /* libbpf doesn't support candidate search for anonymous types */
- if (str_is_empty(spec_str)) {
- pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
- prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
- return -EOPNOTSUPP;
- }
-
- if (!hashmap__find(cand_cache, type_key, (void **)&cands)) {
+ if (relo->kind != BPF_TYPE_ID_LOCAL &&
+ !hashmap__find(cand_cache, type_key, (void **)&cands)) {
cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
if (IS_ERR(cands)) {
pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
- prog->name, relo_idx, local_id, btf_kind_str(local_type),
+ prog_name, relo_idx, local_id, btf_kind_str(local_type),
local_name, PTR_ERR(cands));
return PTR_ERR(cands);
}
@@ -6260,97 +5167,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
}
}
- for (i = 0, j = 0; i < cands->len; i++) {
- err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
- cands->cands[i].id, &cand_spec);
- if (err < 0) {
- pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
- prog->name, relo_idx, i);
- bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
- libbpf_print(LIBBPF_WARN, ": %d\n", err);
- return err;
- }
-
- pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
- relo_idx, err == 0 ? "non-matching" : "matching", i);
- bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
- libbpf_print(LIBBPF_DEBUG, "\n");
-
- if (err == 0)
- continue;
-
- err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
- if (err)
- return err;
-
- if (j == 0) {
- targ_res = cand_res;
- targ_spec = cand_spec;
- } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
- /* if there are many field relo candidates, they
- * should all resolve to the same bit offset
- */
- pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
- prog->name, relo_idx, cand_spec.bit_offset,
- targ_spec.bit_offset);
- return -EINVAL;
- } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
- /* all candidates should result in the same relocation
- * decision and value, otherwise it's dangerous to
- * proceed due to ambiguity
- */
- pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
- prog->name, relo_idx,
- cand_res.poison ? "failure" : "success", cand_res.new_val,
- targ_res.poison ? "failure" : "success", targ_res.new_val);
- return -EINVAL;
- }
-
- cands->cands[j++] = cands->cands[i];
- }
-
- /*
- * For BPF_FIELD_EXISTS relo or when used BPF program has field
- * existence checks or kernel version/config checks, it's expected
- * that we might not find any candidates. In this case, if field
- * wasn't found in any candidate, the list of candidates shouldn't
- * change at all, we'll just handle relocating appropriately,
- * depending on relo's kind.
- */
- if (j > 0)
- cands->len = j;
-
- /*
- * If no candidates were found, it might be both a programmer error,
- * as well as expected case, depending whether instruction w/
- * relocation is guarded in some way that makes it unreachable (dead
- * code) if relocation can't be resolved. This is handled in
- * bpf_core_patch_insn() uniformly by replacing that instruction with
- * BPF helper call insn (using invalid helper ID). If that instruction
- * is indeed unreachable, then it will be ignored and eliminated by
- * verifier. If it was an error, then verifier will complain and point
- * to a specific instruction number in its log.
- */
- if (j == 0) {
- pr_debug("prog '%s': relo #%d: no matching targets found\n",
- prog->name, relo_idx);
-
- /* calculate single target relo result explicitly */
- err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
- if (err)
- return err;
- }
-
-patch_insn:
- /* bpf_core_patch_insn() should know how to handle missing targ_spec */
- err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
- if (err) {
- pr_warn("prog '%s': relo #%d: failed to patch insn #%zu: %d\n",
- prog->name, relo_idx, relo->insn_off / BPF_INSN_SZ, err);
- return -EINVAL;
- }
-
- return 0;
+ return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands);
}
static int
@@ -6496,11 +5313,11 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
}
insn[1].imm = ext->kcfg.data_off;
} else /* EXT_KSYM */ {
- if (ext->ksym.type_id) { /* typed ksyms */
+ if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
insn[0].src_reg = BPF_PSEUDO_BTF_ID;
insn[0].imm = ext->ksym.kernel_btf_id;
insn[1].imm = ext->ksym.kernel_btf_obj_fd;
- } else { /* typeless ksyms */
+ } else { /* typeless ksyms or unresolved typed ksyms */
insn[0].imm = (__u32)ext->ksym.addr;
insn[1].imm = ext->ksym.addr >> 32;
}
@@ -7190,7 +6007,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj)
for (i = 0; i < obj->nr_programs; i++) {
struct bpf_program *p = &obj->programs[i];
-
+
if (!p->nr_reloc)
continue;
@@ -7554,7 +6371,7 @@ static struct bpf_object *
__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts)
{
- const char *obj_name, *kconfig;
+ const char *obj_name, *kconfig, *btf_tmp_path;
struct bpf_program *prog;
struct bpf_object *obj;
char tmp_name[64];
@@ -7585,11 +6402,26 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
if (IS_ERR(obj))
return obj;
+ btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
+ if (btf_tmp_path) {
+ if (strlen(btf_tmp_path) >= PATH_MAX) {
+ err = -ENAMETOOLONG;
+ goto out;
+ }
+ obj->btf_custom_path = strdup(btf_tmp_path);
+ if (!obj->btf_custom_path) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
kconfig = OPTS_GET(opts, kconfig, NULL);
if (kconfig) {
obj->kconfig = strdup(kconfig);
- if (!obj->kconfig)
- return ERR_PTR(-ENOMEM);
+ if (!obj->kconfig) {
+ err = -ENOMEM;
+ goto out;
+ }
}
err = bpf_object__elf_init(obj);
@@ -7812,11 +6644,8 @@ static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
break;
}
}
- if (id <= 0) {
- pr_warn("extern (%s ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
- __btf_kind_str(kind), ksym_name);
+ if (id <= 0)
return -ESRCH;
- }
*res_btf = btf;
*res_btf_fd = btf_fd;
@@ -7833,8 +6662,13 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
struct btf *btf = NULL;
id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd);
- if (id < 0)
+ if (id == -ESRCH && ext->is_weak) {
+ return 0;
+ } else if (id < 0) {
+ pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
+ ext->name);
return id;
+ }
/* find local type_id */
local_type_id = ext->ksym.type_id;
@@ -8055,7 +6889,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
err = err ? : bpf_object__sanitize_maps(obj);
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
err = err ? : bpf_object__create_maps(obj);
- err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
+ err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path);
err = err ? : bpf_object__load_progs(obj, attr->log_level);
if (obj->gen_loader) {
@@ -8450,6 +7284,11 @@ const char *bpf_map__get_pin_path(const struct bpf_map *map)
return map->pin_path;
}
+const char *bpf_map__pin_path(const struct bpf_map *map)
+{
+ return map->pin_path;
+}
+
bool bpf_map__is_pinned(const struct bpf_map *map)
{
return map->pinned;
@@ -8702,6 +7541,7 @@ void bpf_object__close(struct bpf_object *obj)
for (i = 0; i < obj->nr_maps; i++)
bpf_map__destroy(&obj->maps[i]);
+ zfree(&obj->btf_custom_path);
zfree(&obj->kconfig);
zfree(&obj->externs);
obj->nr_extern = 0;
@@ -9471,7 +8311,7 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
ret = snprintf(btf_type_name, sizeof(btf_type_name),
"%s%s", prefix, name);
/* snprintf returns the number of characters written excluding the
- * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
+ * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
* indicates truncation.
*/
if (ret < 0 || ret >= sizeof(btf_type_name))
@@ -9495,7 +8335,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,
struct btf *btf;
int err;
- btf = libbpf_find_kernel_btf();
+ btf = btf__load_vmlinux_btf();
err = libbpf_get_error(btf);
if (err) {
pr_warn("vmlinux BTF is not found\n");
@@ -9514,8 +8354,8 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
{
struct bpf_prog_info_linear *info_linear;
struct bpf_prog_info *info;
- struct btf *btf = NULL;
- int err = -EINVAL;
+ struct btf *btf;
+ int err;
info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
err = libbpf_get_error(info_linear);
@@ -9524,12 +8364,15 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
attach_prog_fd);
return err;
}
+
+ err = -EINVAL;
info = &info_linear->info;
if (!info->btf_id) {
pr_warn("The target program doesn't have BTF\n");
goto out;
}
- if (btf__get_from_id(info->btf_id, &btf)) {
+ btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (libbpf_get_error(btf)) {
pr_warn("Failed to get BTF of the program\n");
goto out;
}
@@ -10003,7 +8846,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
struct bpf_link {
int (*detach)(struct bpf_link *link);
- int (*destroy)(struct bpf_link *link);
+ void (*dealloc)(struct bpf_link *link);
char *pin_path; /* NULL, if not pinned */
int fd; /* hook FD, -1 if not applicable */
bool disconnected;
@@ -10013,7 +8856,7 @@ struct bpf_link {
int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
{
int ret;
-
+
ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
return libbpf_err_errno(ret);
}
@@ -10042,11 +8885,12 @@ int bpf_link__destroy(struct bpf_link *link)
if (!link->disconnected && link->detach)
err = link->detach(link);
- if (link->destroy)
- link->destroy(link);
if (link->pin_path)
free(link->pin_path);
- free(link);
+ if (link->dealloc)
+ link->dealloc(link);
+ else
+ free(link);
return libbpf_err(err);
}
@@ -10143,23 +8987,42 @@ int bpf_link__unpin(struct bpf_link *link)
return 0;
}
-static int bpf_link__detach_perf_event(struct bpf_link *link)
+struct bpf_link_perf {
+ struct bpf_link link;
+ int perf_event_fd;
+};
+
+static int bpf_link_perf_detach(struct bpf_link *link)
{
- int err;
+ struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
+ int err = 0;
- err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
- if (err)
+ if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
err = -errno;
+ if (perf_link->perf_event_fd != link->fd)
+ close(perf_link->perf_event_fd);
close(link->fd);
+
return libbpf_err(err);
}
-struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd)
+static void bpf_link_perf_dealloc(struct bpf_link *link)
+{
+ struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
+
+ free(perf_link);
+}
+
+struct bpf_link *bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd,
+ const struct bpf_perf_event_opts *opts)
{
char errmsg[STRERR_BUFSIZE];
- struct bpf_link *link;
- int prog_fd, err;
+ struct bpf_link_perf *link;
+ int prog_fd, link_fd = -1, err;
+
+ if (!OPTS_VALID(opts, bpf_perf_event_opts))
+ return libbpf_err_ptr(-EINVAL);
if (pfd < 0) {
pr_warn("prog '%s': invalid perf event FD %d\n",
@@ -10176,27 +9039,59 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pf
link = calloc(1, sizeof(*link));
if (!link)
return libbpf_err_ptr(-ENOMEM);
- link->detach = &bpf_link__detach_perf_event;
- link->fd = pfd;
+ link->link.detach = &bpf_link_perf_detach;
+ link->link.dealloc = &bpf_link_perf_dealloc;
+ link->perf_event_fd = pfd;
- if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
- err = -errno;
- free(link);
- pr_warn("prog '%s': failed to attach to pfd %d: %s\n",
- prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- if (err == -EPROTO)
- pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
- prog->name, pfd);
- return libbpf_err_ptr(err);
+ if (kernel_supports(prog->obj, FEAT_PERF_LINK)) {
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
+ .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
+
+ link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
+ if (link_fd < 0) {
+ err = -errno;
+ pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
+ prog->name, pfd,
+ err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ goto err_out;
+ }
+ link->link.fd = link_fd;
+ } else {
+ if (OPTS_GET(opts, bpf_cookie, 0)) {
+ pr_warn("prog '%s': user context value is not supported\n", prog->name);
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
+ err = -errno;
+ pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
+ prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ if (err == -EPROTO)
+ pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
+ prog->name, pfd);
+ goto err_out;
+ }
+ link->link.fd = pfd;
}
if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
err = -errno;
- free(link);
- pr_warn("prog '%s': failed to enable pfd %d: %s\n",
+ pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return libbpf_err_ptr(err);
+ goto err_out;
}
- return link;
+
+ return &link->link;
+err_out:
+ if (link_fd >= 0)
+ close(link_fd);
+ free(link);
+ return libbpf_err_ptr(err);
+}
+
+struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd)
+{
+ return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
}
/*
@@ -10257,13 +9152,19 @@ static int determine_uprobe_retprobe_bit(void)
return parse_uint_from_file(file, "config:%d\n");
}
+#define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
+#define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
+
static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
- uint64_t offset, int pid)
+ uint64_t offset, int pid, size_t ref_ctr_off)
{
struct perf_event_attr attr = {};
char errmsg[STRERR_BUFSIZE];
int type, pfd, err;
+ if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
+ return -EINVAL;
+
type = uprobe ? determine_uprobe_perf_type()
: determine_kprobe_perf_type();
if (type < 0) {
@@ -10286,6 +9187,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
}
attr.size = sizeof(attr);
attr.type = type;
+ attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
attr.config2 = offset; /* kprobe_addr or probe_offset */
@@ -10304,23 +9206,34 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
return pfd;
}
-struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
- bool retprobe,
- const char *func_name)
+struct bpf_link *
+bpf_program__attach_kprobe_opts(struct bpf_program *prog,
+ const char *func_name,
+ const struct bpf_kprobe_opts *opts)
{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
+ unsigned long offset;
+ bool retprobe;
int pfd, err;
+ if (!OPTS_VALID(opts, bpf_kprobe_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ retprobe = OPTS_GET(opts, retprobe, false);
+ offset = OPTS_GET(opts, offset, 0);
+ pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+
pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
- 0 /* offset */, -1 /* pid */);
+ offset, -1 /* pid */, 0 /* ref_ctr_off */);
if (pfd < 0) {
pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return libbpf_err_ptr(pfd);
}
- link = bpf_program__attach_perf_event(prog, pfd);
+ link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
err = libbpf_get_error(link);
if (err) {
close(pfd);
@@ -10332,29 +9245,70 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
return link;
}
+struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
+ bool retprobe,
+ const char *func_name)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
+ .retprobe = retprobe,
+ );
+
+ return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
+}
+
static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
struct bpf_program *prog)
{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
+ unsigned long offset = 0;
+ struct bpf_link *link;
const char *func_name;
- bool retprobe;
+ char *func;
+ int n, err;
func_name = prog->sec_name + sec->len;
- retprobe = strcmp(sec->sec, "kretprobe/") == 0;
+ opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0;
+
+ n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
+ if (n < 1) {
+ err = -EINVAL;
+ pr_warn("kprobe name is invalid: %s\n", func_name);
+ return libbpf_err_ptr(err);
+ }
+ if (opts.retprobe && offset != 0) {
+ free(func);
+ err = -EINVAL;
+ pr_warn("kretprobes do not support offset specification\n");
+ return libbpf_err_ptr(err);
+ }
- return bpf_program__attach_kprobe(prog, retprobe, func_name);
+ opts.offset = offset;
+ link = bpf_program__attach_kprobe_opts(prog, func, &opts);
+ free(func);
+ return link;
}
-struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
- bool retprobe, pid_t pid,
- const char *binary_path,
- size_t func_offset)
+LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid,
+ const char *binary_path, size_t func_offset,
+ const struct bpf_uprobe_opts *opts)
{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
+ size_t ref_ctr_off;
int pfd, err;
+ bool retprobe;
+
+ if (!OPTS_VALID(opts, bpf_uprobe_opts))
+ return libbpf_err_ptr(-EINVAL);
- pfd = perf_event_open_probe(true /* uprobe */, retprobe,
- binary_path, func_offset, pid);
+ retprobe = OPTS_GET(opts, retprobe, false);
+ ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
+ pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+
+ pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
+ func_offset, pid, ref_ctr_off);
if (pfd < 0) {
pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
prog->name, retprobe ? "uretprobe" : "uprobe",
@@ -10362,7 +9316,7 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return libbpf_err_ptr(pfd);
}
- link = bpf_program__attach_perf_event(prog, pfd);
+ link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
err = libbpf_get_error(link);
if (err) {
close(pfd);
@@ -10375,6 +9329,16 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
return link;
}
+struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
+ bool retprobe, pid_t pid,
+ const char *binary_path,
+ size_t func_offset)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
+
+ return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
+}
+
static int determine_tracepoint_id(const char *tp_category,
const char *tp_name)
{
@@ -10425,14 +9389,21 @@ static int perf_event_open_tracepoint(const char *tp_category,
return pfd;
}
-struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
- const char *tp_category,
- const char *tp_name)
+struct bpf_link *bpf_program__attach_tracepoint_opts(struct bpf_program *prog,
+ const char *tp_category,
+ const char *tp_name,
+ const struct bpf_tracepoint_opts *opts)
{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
int pfd, err;
+ if (!OPTS_VALID(opts, bpf_tracepoint_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+
pfd = perf_event_open_tracepoint(tp_category, tp_name);
if (pfd < 0) {
pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
@@ -10440,7 +9411,7 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return libbpf_err_ptr(pfd);
}
- link = bpf_program__attach_perf_event(prog, pfd);
+ link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
err = libbpf_get_error(link);
if (err) {
close(pfd);
@@ -10452,6 +9423,13 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
return link;
}
+struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
+ const char *tp_category,
+ const char *tp_name)
+{
+ return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
+}
+
static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
struct bpf_program *prog)
{
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6e61342ba56c..f177d897c5f7 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -94,8 +94,15 @@ struct bpf_object_open_opts {
* system Kconfig for CONFIG_xxx externs.
*/
const char *kconfig;
+ /* Path to the custom BTF to be used for BPF CO-RE relocations.
+ * This custom BTF completely replaces the use of vmlinux BTF
+ * for the purpose of CO-RE relocations.
+ * NOTE: any other BPF feature (e.g., fentry/fexit programs,
+ * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux.
+ */
+ const char *btf_custom_path;
};
-#define bpf_object_open_opts__last_field kconfig
+#define bpf_object_open_opts__last_field btf_custom_path
LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
LIBBPF_API struct bpf_object *
@@ -237,20 +244,86 @@ LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
LIBBPF_API struct bpf_link *
bpf_program__attach(struct bpf_program *prog);
+
+struct bpf_perf_event_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+};
+#define bpf_perf_event_opts__last_field bpf_cookie
+
LIBBPF_API struct bpf_link *
bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd,
+ const struct bpf_perf_event_opts *opts);
+
+struct bpf_kprobe_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+ /* function's offset to install kprobe to */
+ unsigned long offset;
+ /* kprobe is return probe */
+ bool retprobe;
+ size_t :0;
+};
+#define bpf_kprobe_opts__last_field retprobe
+
LIBBPF_API struct bpf_link *
bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
const char *func_name);
LIBBPF_API struct bpf_link *
+bpf_program__attach_kprobe_opts(struct bpf_program *prog,
+ const char *func_name,
+ const struct bpf_kprobe_opts *opts);
+
+struct bpf_uprobe_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* offset of kernel reference counted USDT semaphore, added in
+ * a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe")
+ */
+ size_t ref_ctr_offset;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+ /* uprobe is return probe, invoked at function return time */
+ bool retprobe;
+ size_t :0;
+};
+#define bpf_uprobe_opts__last_field retprobe
+
+LIBBPF_API struct bpf_link *
bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
pid_t pid, const char *binary_path,
size_t func_offset);
LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid,
+ const char *binary_path, size_t func_offset,
+ const struct bpf_uprobe_opts *opts);
+
+struct bpf_tracepoint_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+};
+#define bpf_tracepoint_opts__last_field bpf_cookie
+
+LIBBPF_API struct bpf_link *
bpf_program__attach_tracepoint(struct bpf_program *prog,
const char *tp_category,
const char *tp_name);
LIBBPF_API struct bpf_link *
+bpf_program__attach_tracepoint_opts(struct bpf_program *prog,
+ const char *tp_category,
+ const char *tp_name,
+ const struct bpf_tracepoint_opts *opts);
+
+LIBBPF_API struct bpf_link *
bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
const char *tp_name);
LIBBPF_API struct bpf_link *
@@ -477,6 +550,7 @@ LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
+LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map);
LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 944c99d1ded3..bbc53bb25f68 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -371,7 +371,18 @@ LIBBPF_0.4.0 {
LIBBPF_0.5.0 {
global:
bpf_map__initial_value;
+ bpf_map__pin_path;
bpf_map_lookup_and_delete_elem_flags;
+ bpf_program__attach_kprobe_opts;
+ bpf_program__attach_perf_event_opts;
+ bpf_program__attach_tracepoint_opts;
+ bpf_program__attach_uprobe_opts;
bpf_object__gen_loader;
+ btf__load_from_kernel_by_id;
+ btf__load_from_kernel_by_id_split;
+ btf__load_into_kernel;
+ btf__load_module_btf;
+ btf__load_vmlinux_btf;
+ btf_dump__dump_type_data;
libbpf_set_strict_mode;
} LIBBPF_0.4.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 016ca7cb4f8a..533b0211f40a 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -14,6 +14,7 @@
#include <errno.h>
#include <linux/err.h>
#include "libbpf_legacy.h"
+#include "relo_core.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -195,6 +196,17 @@ void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
size_t cur_cnt, size_t max_cnt, size_t add_cnt);
int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
+static inline bool libbpf_is_mem_zeroed(const char *p, ssize_t len)
+{
+ while (len > 0) {
+ if (*p)
+ return false;
+ p++;
+ len--;
+ }
+ return true;
+}
+
static inline bool libbpf_validate_opts(const char *opts,
size_t opts_sz, size_t user_sz,
const char *type_name)
@@ -203,16 +215,9 @@ static inline bool libbpf_validate_opts(const char *opts,
pr_warn("%s size (%zu) is too small\n", type_name, user_sz);
return false;
}
- if (user_sz > opts_sz) {
- size_t i;
-
- for (i = opts_sz; i < user_sz; i++) {
- if (opts[i]) {
- pr_warn("%s has non-zero extra bytes\n",
- type_name);
- return false;
- }
- }
+ if (!libbpf_is_mem_zeroed(opts + opts_sz, (ssize_t)user_sz - opts_sz)) {
+ pr_warn("%s has non-zero extra bytes\n", type_name);
+ return false;
}
return true;
}
@@ -232,6 +237,14 @@ static inline bool libbpf_validate_opts(const char *opts,
(opts)->field = value; \
} while (0)
+#define OPTS_ZEROED(opts, last_nonzero_field) \
+({ \
+ ssize_t __off = offsetofend(typeof(*(opts)), last_nonzero_field); \
+ !(opts) || libbpf_is_mem_zeroed((const void *)opts + __off, \
+ (opts)->sz - __off); \
+})
+
+
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
@@ -366,76 +379,6 @@ struct bpf_line_info_min {
__u32 line_col;
};
-/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
- * has to be adjusted by relocations.
- */
-enum bpf_core_relo_kind {
- BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */
- BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */
- BPF_FIELD_EXISTS = 2, /* field existence in target kernel */
- BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */
- BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */
- BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */
- BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */
- BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */
- BPF_TYPE_EXISTS = 8, /* type existence in target kernel */
- BPF_TYPE_SIZE = 9, /* type size in bytes */
- BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
- BPF_ENUMVAL_VALUE = 11, /* enum value integer value */
-};
-
-/* The minimum bpf_core_relo checked by the loader
- *
- * CO-RE relocation captures the following data:
- * - insn_off - instruction offset (in bytes) within a BPF program that needs
- * its insn->imm field to be relocated with actual field info;
- * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
- * type or field;
- * - access_str_off - offset into corresponding .BTF string section. String
- * interpretation depends on specific relocation kind:
- * - for field-based relocations, string encodes an accessed field using
- * a sequence of field and array indices, separated by colon (:). It's
- * conceptually very close to LLVM's getelementptr ([0]) instruction's
- * arguments for identifying offset to a field.
- * - for type-based relocations, strings is expected to be just "0";
- * - for enum value-based relocations, string contains an index of enum
- * value within its enum type;
- *
- * Example to provide a better feel.
- *
- * struct sample {
- * int a;
- * struct {
- * int b[10];
- * };
- * };
- *
- * struct sample *s = ...;
- * int x = &s->a; // encoded as "0:0" (a is field #0)
- * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1,
- * // b is field #0 inside anon struct, accessing elem #5)
- * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
- *
- * type_id for all relocs in this example will capture BTF type id of
- * `struct sample`.
- *
- * Such relocation is emitted when using __builtin_preserve_access_index()
- * Clang built-in, passing expression that captures field address, e.g.:
- *
- * bpf_probe_read(&dst, sizeof(dst),
- * __builtin_preserve_access_index(&src->a.b.c));
- *
- * In this case Clang will emit field relocation recording necessary data to
- * be able to find offset of embedded `a.b.c` field within `src` struct.
- *
- * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
- */
-struct bpf_core_relo {
- __u32 insn_off;
- __u32 type_id;
- __u32 access_str_off;
- enum bpf_core_relo_kind kind;
-};
typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx);
typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx);
@@ -494,4 +437,14 @@ static inline void *libbpf_ptr(void *ret)
return ret;
}
+static inline bool str_is_empty(const char *s)
+{
+ return !s || !s[0];
+}
+
+static inline bool is_ldimm64_insn(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index ecaae2927ab8..cd8c703dde71 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -75,6 +75,9 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
break;
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+ xattr.expected_attach_type = BPF_CGROUP_GETSOCKOPT;
+ break;
case BPF_PROG_TYPE_SK_LOOKUP:
xattr.expected_attach_type = BPF_SK_LOOKUP;
break;
@@ -104,7 +107,6 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
case BPF_PROG_TYPE_SK_REUSEPORT:
case BPF_PROG_TYPE_FLOW_DISSECTOR:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
- case BPF_PROG_TYPE_CGROUP_SOCKOPT:
case BPF_PROG_TYPE_TRACING:
case BPF_PROG_TYPE_STRUCT_OPS:
case BPF_PROG_TYPE_EXT:
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
new file mode 100644
index 000000000000..4016ed492d0c
--- /dev/null
+++ b/tools/lib/bpf/relo_core.c
@@ -0,0 +1,1295 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2019 Facebook */
+
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <ctype.h>
+#include <linux/err.h>
+
+#include "libbpf.h"
+#include "bpf.h"
+#include "btf.h"
+#include "str_error.h"
+#include "libbpf_internal.h"
+
+#define BPF_CORE_SPEC_MAX_LEN 64
+
+/* represents BPF CO-RE field or array element accessor */
+struct bpf_core_accessor {
+ __u32 type_id; /* struct/union type or array element type */
+ __u32 idx; /* field index or array index */
+ const char *name; /* field name or NULL for array accessor */
+};
+
+struct bpf_core_spec {
+ const struct btf *btf;
+ /* high-level spec: named fields and array indices only */
+ struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+ /* original unresolved (no skip_mods_or_typedefs) root type ID */
+ __u32 root_type_id;
+ /* CO-RE relocation kind */
+ enum bpf_core_relo_kind relo_kind;
+ /* high-level spec length */
+ int len;
+ /* raw, low-level spec: 1-to-1 with accessor spec string */
+ int raw_spec[BPF_CORE_SPEC_MAX_LEN];
+ /* raw spec length */
+ int raw_len;
+ /* field bit offset represented by spec */
+ __u32 bit_offset;
+};
+
+static bool is_flex_arr(const struct btf *btf,
+ const struct bpf_core_accessor *acc,
+ const struct btf_array *arr)
+{
+ const struct btf_type *t;
+
+ /* not a flexible array, if not inside a struct or has non-zero size */
+ if (!acc->name || arr->nelems > 0)
+ return false;
+
+ /* has to be the last member of enclosing struct */
+ t = btf__type_by_id(btf, acc->type_id);
+ return acc->idx == btf_vlen(t) - 1;
+}
+
+static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_FIELD_BYTE_OFFSET: return "byte_off";
+ case BPF_FIELD_BYTE_SIZE: return "byte_sz";
+ case BPF_FIELD_EXISTS: return "field_exists";
+ case BPF_FIELD_SIGNED: return "signed";
+ case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
+ case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
+ case BPF_TYPE_ID_LOCAL: return "local_type_id";
+ case BPF_TYPE_ID_TARGET: return "target_type_id";
+ case BPF_TYPE_EXISTS: return "type_exists";
+ case BPF_TYPE_SIZE: return "type_size";
+ case BPF_ENUMVAL_EXISTS: return "enumval_exists";
+ case BPF_ENUMVAL_VALUE: return "enumval_value";
+ default: return "unknown";
+ }
+}
+
+static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_FIELD_BYTE_OFFSET:
+ case BPF_FIELD_BYTE_SIZE:
+ case BPF_FIELD_EXISTS:
+ case BPF_FIELD_SIGNED:
+ case BPF_FIELD_LSHIFT_U64:
+ case BPF_FIELD_RSHIFT_U64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_TYPE_ID_LOCAL:
+ case BPF_TYPE_ID_TARGET:
+ case BPF_TYPE_EXISTS:
+ case BPF_TYPE_SIZE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_ENUMVAL_EXISTS:
+ case BPF_ENUMVAL_VALUE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
+ * Turn bpf_core_relo into a low- and high-level spec representation,
+ * validating correctness along the way, as well as calculating resulting
+ * field bit offset, specified by accessor string. Low-level spec captures
+ * every single level of nestedness, including traversing anonymous
+ * struct/union members. High-level one only captures semantically meaningful
+ * "turning points": named fields and array indicies.
+ * E.g., for this case:
+ *
+ * struct sample {
+ * int __unimportant;
+ * struct {
+ * int __1;
+ * int __2;
+ * int a[7];
+ * };
+ * };
+ *
+ * struct sample *s = ...;
+ *
+ * int x = &s->a[3]; // access string = '0:1:2:3'
+ *
+ * Low-level spec has 1:1 mapping with each element of access string (it's
+ * just a parsed access string representation): [0, 1, 2, 3].
+ *
+ * High-level spec will capture only 3 points:
+ * - intial zero-index access by pointer (&s->... is the same as &s[0]...);
+ * - field 'a' access (corresponds to '2' in low-level spec);
+ * - array element #3 access (corresponds to '3' in low-level spec).
+ *
+ * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
+ * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
+ * spec and raw_spec are kept empty.
+ *
+ * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
+ * string to specify enumerator's value index that need to be relocated.
+ */
+static int bpf_core_parse_spec(const struct btf *btf,
+ __u32 type_id,
+ const char *spec_str,
+ enum bpf_core_relo_kind relo_kind,
+ struct bpf_core_spec *spec)
+{
+ int access_idx, parsed_len, i;
+ struct bpf_core_accessor *acc;
+ const struct btf_type *t;
+ const char *name;
+ __u32 id;
+ __s64 sz;
+
+ if (str_is_empty(spec_str) || *spec_str == ':')
+ return -EINVAL;
+
+ memset(spec, 0, sizeof(*spec));
+ spec->btf = btf;
+ spec->root_type_id = type_id;
+ spec->relo_kind = relo_kind;
+
+ /* type-based relocations don't have a field access string */
+ if (core_relo_is_type_based(relo_kind)) {
+ if (strcmp(spec_str, "0"))
+ return -EINVAL;
+ return 0;
+ }
+
+ /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
+ while (*spec_str) {
+ if (*spec_str == ':')
+ ++spec_str;
+ if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
+ return -EINVAL;
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+ spec_str += parsed_len;
+ spec->raw_spec[spec->raw_len++] = access_idx;
+ }
+
+ if (spec->raw_len == 0)
+ return -EINVAL;
+
+ t = skip_mods_and_typedefs(btf, type_id, &id);
+ if (!t)
+ return -EINVAL;
+
+ access_idx = spec->raw_spec[0];
+ acc = &spec->spec[0];
+ acc->type_id = id;
+ acc->idx = access_idx;
+ spec->len++;
+
+ if (core_relo_is_enumval_based(relo_kind)) {
+ if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
+ return -EINVAL;
+
+ /* record enumerator name in a first accessor */
+ acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
+ return 0;
+ }
+
+ if (!core_relo_is_field_based(relo_kind))
+ return -EINVAL;
+
+ sz = btf__resolve_size(btf, id);
+ if (sz < 0)
+ return sz;
+ spec->bit_offset = access_idx * sz * 8;
+
+ for (i = 1; i < spec->raw_len; i++) {
+ t = skip_mods_and_typedefs(btf, id, &id);
+ if (!t)
+ return -EINVAL;
+
+ access_idx = spec->raw_spec[i];
+ acc = &spec->spec[spec->len];
+
+ if (btf_is_composite(t)) {
+ const struct btf_member *m;
+ __u32 bit_offset;
+
+ if (access_idx >= btf_vlen(t))
+ return -EINVAL;
+
+ bit_offset = btf_member_bit_offset(t, access_idx);
+ spec->bit_offset += bit_offset;
+
+ m = btf_members(t) + access_idx;
+ if (m->name_off) {
+ name = btf__name_by_offset(btf, m->name_off);
+ if (str_is_empty(name))
+ return -EINVAL;
+
+ acc->type_id = id;
+ acc->idx = access_idx;
+ acc->name = name;
+ spec->len++;
+ }
+
+ id = m->type;
+ } else if (btf_is_array(t)) {
+ const struct btf_array *a = btf_array(t);
+ bool flex;
+
+ t = skip_mods_and_typedefs(btf, a->type, &id);
+ if (!t)
+ return -EINVAL;
+
+ flex = is_flex_arr(btf, acc - 1, a);
+ if (!flex && access_idx >= a->nelems)
+ return -EINVAL;
+
+ spec->spec[spec->len].type_id = id;
+ spec->spec[spec->len].idx = access_idx;
+ spec->len++;
+
+ sz = btf__resolve_size(btf, id);
+ if (sz < 0)
+ return sz;
+ spec->bit_offset += access_idx * sz * 8;
+ } else {
+ pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
+ type_id, spec_str, i, id, btf_kind_str(t));
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/* Check two types for compatibility for the purpose of field access
+ * relocation. const/volatile/restrict and typedefs are skipped to ensure we
+ * are relocating semantically compatible entities:
+ * - any two STRUCTs/UNIONs are compatible and can be mixed;
+ * - any two FWDs are compatible, if their names match (modulo flavor suffix);
+ * - any two PTRs are always compatible;
+ * - for ENUMs, names should be the same (ignoring flavor suffix) or at
+ * least one of enums should be anonymous;
+ * - for ENUMs, check sizes, names are ignored;
+ * - for INT, size and signedness are ignored;
+ * - any two FLOATs are always compatible;
+ * - for ARRAY, dimensionality is ignored, element types are checked for
+ * compatibility recursively;
+ * - everything else shouldn't be ever a target of relocation.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_fields_are_compat(const struct btf *local_btf,
+ __u32 local_id,
+ const struct btf *targ_btf,
+ __u32 targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+
+recur:
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!local_type || !targ_type)
+ return -EINVAL;
+
+ if (btf_is_composite(local_type) && btf_is_composite(targ_type))
+ return 1;
+ if (btf_kind(local_type) != btf_kind(targ_type))
+ return 0;
+
+ switch (btf_kind(local_type)) {
+ case BTF_KIND_PTR:
+ case BTF_KIND_FLOAT:
+ return 1;
+ case BTF_KIND_FWD:
+ case BTF_KIND_ENUM: {
+ const char *local_name, *targ_name;
+ size_t local_len, targ_len;
+
+ local_name = btf__name_by_offset(local_btf,
+ local_type->name_off);
+ targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
+ local_len = bpf_core_essential_name_len(local_name);
+ targ_len = bpf_core_essential_name_len(targ_name);
+ /* one of them is anonymous or both w/ same flavor-less names */
+ return local_len == 0 || targ_len == 0 ||
+ (local_len == targ_len &&
+ strncmp(local_name, targ_name, local_len) == 0);
+ }
+ case BTF_KIND_INT:
+ /* just reject deprecated bitfield-like integers; all other
+ * integers are by default compatible between each other
+ */
+ return btf_int_offset(local_type) == 0 &&
+ btf_int_offset(targ_type) == 0;
+ case BTF_KIND_ARRAY:
+ local_id = btf_array(local_type)->type;
+ targ_id = btf_array(targ_type)->type;
+ goto recur;
+ default:
+ pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
+ btf_kind(local_type), local_id, targ_id);
+ return 0;
+ }
+}
+
+/*
+ * Given single high-level named field accessor in local type, find
+ * corresponding high-level accessor for a target type. Along the way,
+ * maintain low-level spec for target as well. Also keep updating target
+ * bit offset.
+ *
+ * Searching is performed through recursive exhaustive enumeration of all
+ * fields of a struct/union. If there are any anonymous (embedded)
+ * structs/unions, they are recursively searched as well. If field with
+ * desired name is found, check compatibility between local and target types,
+ * before returning result.
+ *
+ * 1 is returned, if field is found.
+ * 0 is returned if no compatible field is found.
+ * <0 is returned on error.
+ */
+static int bpf_core_match_member(const struct btf *local_btf,
+ const struct bpf_core_accessor *local_acc,
+ const struct btf *targ_btf,
+ __u32 targ_id,
+ struct bpf_core_spec *spec,
+ __u32 *next_targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+ const struct btf_member *local_member, *m;
+ const char *local_name, *targ_name;
+ __u32 local_id;
+ int i, n, found;
+
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!targ_type)
+ return -EINVAL;
+ if (!btf_is_composite(targ_type))
+ return 0;
+
+ local_id = local_acc->type_id;
+ local_type = btf__type_by_id(local_btf, local_id);
+ local_member = btf_members(local_type) + local_acc->idx;
+ local_name = btf__name_by_offset(local_btf, local_member->name_off);
+
+ n = btf_vlen(targ_type);
+ m = btf_members(targ_type);
+ for (i = 0; i < n; i++, m++) {
+ __u32 bit_offset;
+
+ bit_offset = btf_member_bit_offset(targ_type, i);
+
+ /* too deep struct/union/array nesting */
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+
+ /* speculate this member will be the good one */
+ spec->bit_offset += bit_offset;
+ spec->raw_spec[spec->raw_len++] = i;
+
+ targ_name = btf__name_by_offset(targ_btf, m->name_off);
+ if (str_is_empty(targ_name)) {
+ /* embedded struct/union, we need to go deeper */
+ found = bpf_core_match_member(local_btf, local_acc,
+ targ_btf, m->type,
+ spec, next_targ_id);
+ if (found) /* either found or error */
+ return found;
+ } else if (strcmp(local_name, targ_name) == 0) {
+ /* matching named field */
+ struct bpf_core_accessor *targ_acc;
+
+ targ_acc = &spec->spec[spec->len++];
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = i;
+ targ_acc->name = targ_name;
+
+ *next_targ_id = m->type;
+ found = bpf_core_fields_are_compat(local_btf,
+ local_member->type,
+ targ_btf, m->type);
+ if (!found)
+ spec->len--; /* pop accessor */
+ return found;
+ }
+ /* member turned out not to be what we looked for */
+ spec->bit_offset -= bit_offset;
+ spec->raw_len--;
+ }
+
+ return 0;
+}
+
+/*
+ * Try to match local spec to a target type and, if successful, produce full
+ * target spec (high-level, low-level + bit offset).
+ */
+static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
+ const struct btf *targ_btf, __u32 targ_id,
+ struct bpf_core_spec *targ_spec)
+{
+ const struct btf_type *targ_type;
+ const struct bpf_core_accessor *local_acc;
+ struct bpf_core_accessor *targ_acc;
+ int i, sz, matched;
+
+ memset(targ_spec, 0, sizeof(*targ_spec));
+ targ_spec->btf = targ_btf;
+ targ_spec->root_type_id = targ_id;
+ targ_spec->relo_kind = local_spec->relo_kind;
+
+ if (core_relo_is_type_based(local_spec->relo_kind)) {
+ return bpf_core_types_are_compat(local_spec->btf,
+ local_spec->root_type_id,
+ targ_btf, targ_id);
+ }
+
+ local_acc = &local_spec->spec[0];
+ targ_acc = &targ_spec->spec[0];
+
+ if (core_relo_is_enumval_based(local_spec->relo_kind)) {
+ size_t local_essent_len, targ_essent_len;
+ const struct btf_enum *e;
+ const char *targ_name;
+
+ /* has to resolve to an enum */
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
+ if (!btf_is_enum(targ_type))
+ return 0;
+
+ local_essent_len = bpf_core_essential_name_len(local_acc->name);
+
+ for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
+ targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
+ targ_essent_len = bpf_core_essential_name_len(targ_name);
+ if (targ_essent_len != local_essent_len)
+ continue;
+ if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = i;
+ targ_acc->name = targ_name;
+ targ_spec->len++;
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+ targ_spec->raw_len++;
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ if (!core_relo_is_field_based(local_spec->relo_kind))
+ return -EINVAL;
+
+ for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
+ &targ_id);
+ if (!targ_type)
+ return -EINVAL;
+
+ if (local_acc->name) {
+ matched = bpf_core_match_member(local_spec->btf,
+ local_acc,
+ targ_btf, targ_id,
+ targ_spec, &targ_id);
+ if (matched <= 0)
+ return matched;
+ } else {
+ /* for i=0, targ_id is already treated as array element
+ * type (because it's the original struct), for others
+ * we should find array element type first
+ */
+ if (i > 0) {
+ const struct btf_array *a;
+ bool flex;
+
+ if (!btf_is_array(targ_type))
+ return 0;
+
+ a = btf_array(targ_type);
+ flex = is_flex_arr(targ_btf, targ_acc - 1, a);
+ if (!flex && local_acc->idx >= a->nelems)
+ return 0;
+ if (!skip_mods_and_typedefs(targ_btf, a->type,
+ &targ_id))
+ return -EINVAL;
+ }
+
+ /* too deep struct/union/array nesting */
+ if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = local_acc->idx;
+ targ_acc->name = NULL;
+ targ_spec->len++;
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+ targ_spec->raw_len++;
+
+ sz = btf__resolve_size(targ_btf, targ_id);
+ if (sz < 0)
+ return sz;
+ targ_spec->bit_offset += local_acc->idx * sz * 8;
+ }
+ }
+
+ return 1;
+}
+
+static int bpf_core_calc_field_relo(const char *prog_name,
+ const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val, __u32 *field_sz, __u32 *type_id,
+ bool *validate)
+{
+ const struct bpf_core_accessor *acc;
+ const struct btf_type *t;
+ __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
+ const struct btf_member *m;
+ const struct btf_type *mt;
+ bool bitfield;
+ __s64 sz;
+
+ *field_sz = 0;
+
+ if (relo->kind == BPF_FIELD_EXISTS) {
+ *val = spec ? 1 : 0;
+ return 0;
+ }
+
+ if (!spec)
+ return -EUCLEAN; /* request instruction poisoning */
+
+ acc = &spec->spec[spec->len - 1];
+ t = btf__type_by_id(spec->btf, acc->type_id);
+
+ /* a[n] accessor needs special handling */
+ if (!acc->name) {
+ if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
+ *val = spec->bit_offset / 8;
+ /* remember field size for load/store mem size */
+ sz = btf__resolve_size(spec->btf, acc->type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *field_sz = sz;
+ *type_id = acc->type_id;
+ } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
+ sz = btf__resolve_size(spec->btf, acc->type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *val = sz;
+ } else {
+ pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
+ prog_name, relo->kind, relo->insn_off / 8);
+ return -EINVAL;
+ }
+ if (validate)
+ *validate = true;
+ return 0;
+ }
+
+ m = btf_members(t) + acc->idx;
+ mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
+ bit_off = spec->bit_offset;
+ bit_sz = btf_member_bitfield_size(t, acc->idx);
+
+ bitfield = bit_sz > 0;
+ if (bitfield) {
+ byte_sz = mt->size;
+ byte_off = bit_off / 8 / byte_sz * byte_sz;
+ /* figure out smallest int size necessary for bitfield load */
+ while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
+ if (byte_sz >= 8) {
+ /* bitfield can't be read with 64-bit read */
+ pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
+ prog_name, relo->kind, relo->insn_off / 8);
+ return -E2BIG;
+ }
+ byte_sz *= 2;
+ byte_off = bit_off / 8 / byte_sz * byte_sz;
+ }
+ } else {
+ sz = btf__resolve_size(spec->btf, field_type_id);
+ if (sz < 0)
+ return -EINVAL;
+ byte_sz = sz;
+ byte_off = spec->bit_offset / 8;
+ bit_sz = byte_sz * 8;
+ }
+
+ /* for bitfields, all the relocatable aspects are ambiguous and we
+ * might disagree with compiler, so turn off validation of expected
+ * value, except for signedness
+ */
+ if (validate)
+ *validate = !bitfield;
+
+ switch (relo->kind) {
+ case BPF_FIELD_BYTE_OFFSET:
+ *val = byte_off;
+ if (!bitfield) {
+ *field_sz = byte_sz;
+ *type_id = field_type_id;
+ }
+ break;
+ case BPF_FIELD_BYTE_SIZE:
+ *val = byte_sz;
+ break;
+ case BPF_FIELD_SIGNED:
+ /* enums will be assumed unsigned */
+ *val = btf_is_enum(mt) ||
+ (btf_int_encoding(mt) & BTF_INT_SIGNED);
+ if (validate)
+ *validate = true; /* signedness is never ambiguous */
+ break;
+ case BPF_FIELD_LSHIFT_U64:
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ *val = 64 - (bit_off + bit_sz - byte_off * 8);
+#else
+ *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
+#endif
+ break;
+ case BPF_FIELD_RSHIFT_U64:
+ *val = 64 - bit_sz;
+ if (validate)
+ *validate = true; /* right shift is never ambiguous */
+ break;
+ case BPF_FIELD_EXISTS:
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val)
+{
+ __s64 sz;
+
+ /* type-based relos return zero when target type is not found */
+ if (!spec) {
+ *val = 0;
+ return 0;
+ }
+
+ switch (relo->kind) {
+ case BPF_TYPE_ID_TARGET:
+ *val = spec->root_type_id;
+ break;
+ case BPF_TYPE_EXISTS:
+ *val = 1;
+ break;
+ case BPF_TYPE_SIZE:
+ sz = btf__resolve_size(spec->btf, spec->root_type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *val = sz;
+ break;
+ case BPF_TYPE_ID_LOCAL:
+ /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val)
+{
+ const struct btf_type *t;
+ const struct btf_enum *e;
+
+ switch (relo->kind) {
+ case BPF_ENUMVAL_EXISTS:
+ *val = spec ? 1 : 0;
+ break;
+ case BPF_ENUMVAL_VALUE:
+ if (!spec)
+ return -EUCLEAN; /* request instruction poisoning */
+ t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
+ e = btf_enum(t) + spec->spec[0].idx;
+ *val = e->val;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+struct bpf_core_relo_res
+{
+ /* expected value in the instruction, unless validate == false */
+ __u32 orig_val;
+ /* new value that needs to be patched up to */
+ __u32 new_val;
+ /* relocation unsuccessful, poison instruction, but don't fail load */
+ bool poison;
+ /* some relocations can't be validated against orig_val */
+ bool validate;
+ /* for field byte offset relocations or the forms:
+ * *(T *)(rX + <off>) = rY
+ * rX = *(T *)(rY + <off>),
+ * we remember original and resolved field size to adjust direct
+ * memory loads of pointers and integers; this is necessary for 32-bit
+ * host kernel architectures, but also allows to automatically
+ * relocate fields that were resized from, e.g., u32 to u64, etc.
+ */
+ bool fail_memsz_adjust;
+ __u32 orig_sz;
+ __u32 orig_type_id;
+ __u32 new_sz;
+ __u32 new_type_id;
+};
+
+/* Calculate original and target relocation values, given local and target
+ * specs and relocation kind. These values are calculated for each candidate.
+ * If there are multiple candidates, resulting values should all be consistent
+ * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
+ * If instruction has to be poisoned, *poison will be set to true.
+ */
+static int bpf_core_calc_relo(const char *prog_name,
+ const struct bpf_core_relo *relo,
+ int relo_idx,
+ const struct bpf_core_spec *local_spec,
+ const struct bpf_core_spec *targ_spec,
+ struct bpf_core_relo_res *res)
+{
+ int err = -EOPNOTSUPP;
+
+ res->orig_val = 0;
+ res->new_val = 0;
+ res->poison = false;
+ res->validate = true;
+ res->fail_memsz_adjust = false;
+ res->orig_sz = res->new_sz = 0;
+ res->orig_type_id = res->new_type_id = 0;
+
+ if (core_relo_is_field_based(relo->kind)) {
+ err = bpf_core_calc_field_relo(prog_name, relo, local_spec,
+ &res->orig_val, &res->orig_sz,
+ &res->orig_type_id, &res->validate);
+ err = err ?: bpf_core_calc_field_relo(prog_name, relo, targ_spec,
+ &res->new_val, &res->new_sz,
+ &res->new_type_id, NULL);
+ if (err)
+ goto done;
+ /* Validate if it's safe to adjust load/store memory size.
+ * Adjustments are performed only if original and new memory
+ * sizes differ.
+ */
+ res->fail_memsz_adjust = false;
+ if (res->orig_sz != res->new_sz) {
+ const struct btf_type *orig_t, *new_t;
+
+ orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
+ new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
+
+ /* There are two use cases in which it's safe to
+ * adjust load/store's mem size:
+ * - reading a 32-bit kernel pointer, while on BPF
+ * size pointers are always 64-bit; in this case
+ * it's safe to "downsize" instruction size due to
+ * pointer being treated as unsigned integer with
+ * zero-extended upper 32-bits;
+ * - reading unsigned integers, again due to
+ * zero-extension is preserving the value correctly.
+ *
+ * In all other cases it's incorrect to attempt to
+ * load/store field because read value will be
+ * incorrect, so we poison relocated instruction.
+ */
+ if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
+ goto done;
+ if (btf_is_int(orig_t) && btf_is_int(new_t) &&
+ btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
+ btf_int_encoding(new_t) != BTF_INT_SIGNED)
+ goto done;
+
+ /* mark as invalid mem size adjustment, but this will
+ * only be checked for LDX/STX/ST insns
+ */
+ res->fail_memsz_adjust = true;
+ }
+ } else if (core_relo_is_type_based(relo->kind)) {
+ err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
+ err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
+ } else if (core_relo_is_enumval_based(relo->kind)) {
+ err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
+ err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
+ }
+
+done:
+ if (err == -EUCLEAN) {
+ /* EUCLEAN is used to signal instruction poisoning request */
+ res->poison = true;
+ err = 0;
+ } else if (err == -EOPNOTSUPP) {
+ /* EOPNOTSUPP means unknown/unsupported relocation */
+ pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
+ prog_name, relo_idx, core_relo_kind_str(relo->kind),
+ relo->kind, relo->insn_off / 8);
+ }
+
+ return err;
+}
+
+/*
+ * Turn instruction for which CO_RE relocation failed into invalid one with
+ * distinct signature.
+ */
+static void bpf_core_poison_insn(const char *prog_name, int relo_idx,
+ int insn_idx, struct bpf_insn *insn)
+{
+ pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
+ prog_name, relo_idx, insn_idx);
+ insn->code = BPF_JMP | BPF_CALL;
+ insn->dst_reg = 0;
+ insn->src_reg = 0;
+ insn->off = 0;
+ /* if this instruction is reachable (not a dead code),
+ * verifier will complain with the following message:
+ * invalid func unknown#195896080
+ */
+ insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+}
+
+static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
+{
+ switch (BPF_SIZE(insn->code)) {
+ case BPF_DW: return 8;
+ case BPF_W: return 4;
+ case BPF_H: return 2;
+ case BPF_B: return 1;
+ default: return -1;
+ }
+}
+
+static int insn_bytes_to_bpf_size(__u32 sz)
+{
+ switch (sz) {
+ case 8: return BPF_DW;
+ case 4: return BPF_W;
+ case 2: return BPF_H;
+ case 1: return BPF_B;
+ default: return -1;
+ }
+}
+
+/*
+ * Patch relocatable BPF instruction.
+ *
+ * Patched value is determined by relocation kind and target specification.
+ * For existence relocations target spec will be NULL if field/type is not found.
+ * Expected insn->imm value is determined using relocation kind and local
+ * spec, and is checked before patching instruction. If actual insn->imm value
+ * is wrong, bail out with error.
+ *
+ * Currently supported classes of BPF instruction are:
+ * 1. rX = <imm> (assignment with immediate operand);
+ * 2. rX += <imm> (arithmetic operations with immediate operand);
+ * 3. rX = <imm64> (load with 64-bit immediate value);
+ * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
+ * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
+ * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
+ */
+static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
+ int insn_idx, const struct bpf_core_relo *relo,
+ int relo_idx, const struct bpf_core_relo_res *res)
+{
+ __u32 orig_val, new_val;
+ __u8 class;
+
+ class = BPF_CLASS(insn->code);
+
+ if (res->poison) {
+poison:
+ /* poison second part of ldimm64 to avoid confusing error from
+ * verifier about "unknown opcode 00"
+ */
+ if (is_ldimm64_insn(insn))
+ bpf_core_poison_insn(prog_name, relo_idx, insn_idx + 1, insn + 1);
+ bpf_core_poison_insn(prog_name, relo_idx, insn_idx, insn);
+ return 0;
+ }
+
+ orig_val = res->orig_val;
+ new_val = res->new_val;
+
+ switch (class) {
+ case BPF_ALU:
+ case BPF_ALU64:
+ if (BPF_SRC(insn->code) != BPF_K)
+ return -EINVAL;
+ if (res->validate && insn->imm != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
+ prog_name, relo_idx,
+ insn_idx, insn->imm, orig_val, new_val);
+ return -EINVAL;
+ }
+ orig_val = insn->imm;
+ insn->imm = new_val;
+ pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
+ prog_name, relo_idx, insn_idx,
+ orig_val, new_val);
+ break;
+ case BPF_LDX:
+ case BPF_ST:
+ case BPF_STX:
+ if (res->validate && insn->off != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
+ prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val);
+ return -EINVAL;
+ }
+ if (new_val > SHRT_MAX) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
+ prog_name, relo_idx, insn_idx, new_val);
+ return -ERANGE;
+ }
+ if (res->fail_memsz_adjust) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
+ "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
+ prog_name, relo_idx, insn_idx);
+ goto poison;
+ }
+
+ orig_val = insn->off;
+ insn->off = new_val;
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
+ prog_name, relo_idx, insn_idx, orig_val, new_val);
+
+ if (res->new_sz != res->orig_sz) {
+ int insn_bytes_sz, insn_bpf_sz;
+
+ insn_bytes_sz = insn_bpf_size_to_bytes(insn);
+ if (insn_bytes_sz != res->orig_sz) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
+ prog_name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
+ return -EINVAL;
+ }
+
+ insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
+ if (insn_bpf_sz < 0) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
+ prog_name, relo_idx, insn_idx, res->new_sz);
+ return -EINVAL;
+ }
+
+ insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
+ prog_name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
+ }
+ break;
+ case BPF_LD: {
+ __u64 imm;
+
+ if (!is_ldimm64_insn(insn) ||
+ insn[0].src_reg != 0 || insn[0].off != 0 ||
+ insn[1].code != 0 || insn[1].dst_reg != 0 ||
+ insn[1].src_reg != 0 || insn[1].off != 0) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
+ prog_name, relo_idx, insn_idx);
+ return -EINVAL;
+ }
+
+ imm = insn[0].imm + ((__u64)insn[1].imm << 32);
+ if (res->validate && imm != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
+ prog_name, relo_idx,
+ insn_idx, (unsigned long long)imm,
+ orig_val, new_val);
+ return -EINVAL;
+ }
+
+ insn[0].imm = new_val;
+ insn[1].imm = 0; /* currently only 32-bit values are supported */
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
+ prog_name, relo_idx, insn_idx,
+ (unsigned long long)imm, new_val);
+ break;
+ }
+ default:
+ pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
+ prog_name, relo_idx, insn_idx, insn->code,
+ insn->src_reg, insn->dst_reg, insn->off, insn->imm);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Output spec definition in the format:
+ * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
+ * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
+ */
+static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
+{
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ const char *s;
+ __u32 type_id;
+ int i;
+
+ type_id = spec->root_type_id;
+ t = btf__type_by_id(spec->btf, type_id);
+ s = btf__name_by_offset(spec->btf, t->name_off);
+
+ libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
+
+ if (core_relo_is_type_based(spec->relo_kind))
+ return;
+
+ if (core_relo_is_enumval_based(spec->relo_kind)) {
+ t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
+ e = btf_enum(t) + spec->raw_spec[0];
+ s = btf__name_by_offset(spec->btf, e->name_off);
+
+ libbpf_print(level, "::%s = %u", s, e->val);
+ return;
+ }
+
+ if (core_relo_is_field_based(spec->relo_kind)) {
+ for (i = 0; i < spec->len; i++) {
+ if (spec->spec[i].name)
+ libbpf_print(level, ".%s", spec->spec[i].name);
+ else if (i > 0 || spec->spec[i].idx > 0)
+ libbpf_print(level, "[%u]", spec->spec[i].idx);
+ }
+
+ libbpf_print(level, " (");
+ for (i = 0; i < spec->raw_len; i++)
+ libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
+
+ if (spec->bit_offset % 8)
+ libbpf_print(level, " @ offset %u.%u)",
+ spec->bit_offset / 8, spec->bit_offset % 8);
+ else
+ libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
+ return;
+ }
+}
+
+/*
+ * CO-RE relocate single instruction.
+ *
+ * The outline and important points of the algorithm:
+ * 1. For given local type, find corresponding candidate target types.
+ * Candidate type is a type with the same "essential" name, ignoring
+ * everything after last triple underscore (___). E.g., `sample`,
+ * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
+ * for each other. Names with triple underscore are referred to as
+ * "flavors" and are useful, among other things, to allow to
+ * specify/support incompatible variations of the same kernel struct, which
+ * might differ between different kernel versions and/or build
+ * configurations.
+ *
+ * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
+ * converter, when deduplicated BTF of a kernel still contains more than
+ * one different types with the same name. In that case, ___2, ___3, etc
+ * are appended starting from second name conflict. But start flavors are
+ * also useful to be defined "locally", in BPF program, to extract same
+ * data from incompatible changes between different kernel
+ * versions/configurations. For instance, to handle field renames between
+ * kernel versions, one can use two flavors of the struct name with the
+ * same common name and use conditional relocations to extract that field,
+ * depending on target kernel version.
+ * 2. For each candidate type, try to match local specification to this
+ * candidate target type. Matching involves finding corresponding
+ * high-level spec accessors, meaning that all named fields should match,
+ * as well as all array accesses should be within the actual bounds. Also,
+ * types should be compatible (see bpf_core_fields_are_compat for details).
+ * 3. It is supported and expected that there might be multiple flavors
+ * matching the spec. As long as all the specs resolve to the same set of
+ * offsets across all candidates, there is no error. If there is any
+ * ambiguity, CO-RE relocation will fail. This is necessary to accomodate
+ * imprefection of BTF deduplication, which can cause slight duplication of
+ * the same BTF type, if some directly or indirectly referenced (by
+ * pointer) type gets resolved to different actual types in different
+ * object files. If such situation occurs, deduplicated BTF will end up
+ * with two (or more) structurally identical types, which differ only in
+ * types they refer to through pointer. This should be OK in most cases and
+ * is not an error.
+ * 4. Candidate types search is performed by linearly scanning through all
+ * types in target BTF. It is anticipated that this is overall more
+ * efficient memory-wise and not significantly worse (if not better)
+ * CPU-wise compared to prebuilding a map from all local type names to
+ * a list of candidate type names. It's also sped up by caching resolved
+ * list of matching candidates per each local "root" type ID, that has at
+ * least one bpf_core_relo associated with it. This list is shared
+ * between multiple relocations for the same type ID and is updated as some
+ * of the candidates are pruned due to structural incompatibility.
+ */
+int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
+ int insn_idx,
+ const struct bpf_core_relo *relo,
+ int relo_idx,
+ const struct btf *local_btf,
+ struct bpf_core_cand_list *cands)
+{
+ struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
+ struct bpf_core_relo_res cand_res, targ_res;
+ const struct btf_type *local_type;
+ const char *local_name;
+ __u32 local_id;
+ const char *spec_str;
+ int i, j, err;
+
+ local_id = relo->type_id;
+ local_type = btf__type_by_id(local_btf, local_id);
+ if (!local_type)
+ return -EINVAL;
+
+ local_name = btf__name_by_offset(local_btf, local_type->name_off);
+ if (!local_name)
+ return -EINVAL;
+
+ spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
+ if (str_is_empty(spec_str))
+ return -EINVAL;
+
+ err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
+ if (err) {
+ pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
+ prog_name, relo_idx, local_id, btf_kind_str(local_type),
+ str_is_empty(local_name) ? "<anon>" : local_name,
+ spec_str, err);
+ return -EINVAL;
+ }
+
+ pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,
+ relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
+ libbpf_print(LIBBPF_DEBUG, "\n");
+
+ /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
+ if (relo->kind == BPF_TYPE_ID_LOCAL) {
+ targ_res.validate = true;
+ targ_res.poison = false;
+ targ_res.orig_val = local_spec.root_type_id;
+ targ_res.new_val = local_spec.root_type_id;
+ goto patch_insn;
+ }
+
+ /* libbpf doesn't support candidate search for anonymous types */
+ if (str_is_empty(spec_str)) {
+ pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
+ prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+ return -EOPNOTSUPP;
+ }
+
+
+ for (i = 0, j = 0; i < cands->len; i++) {
+ err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
+ cands->cands[i].id, &cand_spec);
+ if (err < 0) {
+ pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
+ prog_name, relo_idx, i);
+ bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
+ libbpf_print(LIBBPF_WARN, ": %d\n", err);
+ return err;
+ }
+
+ pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,
+ relo_idx, err == 0 ? "non-matching" : "matching", i);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+ libbpf_print(LIBBPF_DEBUG, "\n");
+
+ if (err == 0)
+ continue;
+
+ err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
+ if (err)
+ return err;
+
+ if (j == 0) {
+ targ_res = cand_res;
+ targ_spec = cand_spec;
+ } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
+ /* if there are many field relo candidates, they
+ * should all resolve to the same bit offset
+ */
+ pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
+ prog_name, relo_idx, cand_spec.bit_offset,
+ targ_spec.bit_offset);
+ return -EINVAL;
+ } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
+ /* all candidates should result in the same relocation
+ * decision and value, otherwise it's dangerous to
+ * proceed due to ambiguity
+ */
+ pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
+ prog_name, relo_idx,
+ cand_res.poison ? "failure" : "success", cand_res.new_val,
+ targ_res.poison ? "failure" : "success", targ_res.new_val);
+ return -EINVAL;
+ }
+
+ cands->cands[j++] = cands->cands[i];
+ }
+
+ /*
+ * For BPF_FIELD_EXISTS relo or when used BPF program has field
+ * existence checks or kernel version/config checks, it's expected
+ * that we might not find any candidates. In this case, if field
+ * wasn't found in any candidate, the list of candidates shouldn't
+ * change at all, we'll just handle relocating appropriately,
+ * depending on relo's kind.
+ */
+ if (j > 0)
+ cands->len = j;
+
+ /*
+ * If no candidates were found, it might be both a programmer error,
+ * as well as expected case, depending whether instruction w/
+ * relocation is guarded in some way that makes it unreachable (dead
+ * code) if relocation can't be resolved. This is handled in
+ * bpf_core_patch_insn() uniformly by replacing that instruction with
+ * BPF helper call insn (using invalid helper ID). If that instruction
+ * is indeed unreachable, then it will be ignored and eliminated by
+ * verifier. If it was an error, then verifier will complain and point
+ * to a specific instruction number in its log.
+ */
+ if (j == 0) {
+ pr_debug("prog '%s': relo #%d: no matching targets found\n",
+ prog_name, relo_idx);
+
+ /* calculate single target relo result explicitly */
+ err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res);
+ if (err)
+ return err;
+ }
+
+patch_insn:
+ /* bpf_core_patch_insn() should know how to handle missing targ_spec */
+ err = bpf_core_patch_insn(prog_name, insn, insn_idx, relo, relo_idx, &targ_res);
+ if (err) {
+ pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
+ prog_name, relo_idx, relo->insn_off / 8, err);
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h
new file mode 100644
index 000000000000..3b9f8f18346c
--- /dev/null
+++ b/tools/lib/bpf/relo_core.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2019 Facebook */
+
+#ifndef __RELO_CORE_H
+#define __RELO_CORE_H
+
+/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
+ * has to be adjusted by relocations.
+ */
+enum bpf_core_relo_kind {
+ BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */
+ BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */
+ BPF_FIELD_EXISTS = 2, /* field existence in target kernel */
+ BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */
+ BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */
+ BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */
+ BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */
+ BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */
+ BPF_TYPE_EXISTS = 8, /* type existence in target kernel */
+ BPF_TYPE_SIZE = 9, /* type size in bytes */
+ BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
+ BPF_ENUMVAL_VALUE = 11, /* enum value integer value */
+};
+
+/* The minimum bpf_core_relo checked by the loader
+ *
+ * CO-RE relocation captures the following data:
+ * - insn_off - instruction offset (in bytes) within a BPF program that needs
+ * its insn->imm field to be relocated with actual field info;
+ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
+ * type or field;
+ * - access_str_off - offset into corresponding .BTF string section. String
+ * interpretation depends on specific relocation kind:
+ * - for field-based relocations, string encodes an accessed field using
+ * a sequence of field and array indices, separated by colon (:). It's
+ * conceptually very close to LLVM's getelementptr ([0]) instruction's
+ * arguments for identifying offset to a field.
+ * - for type-based relocations, strings is expected to be just "0";
+ * - for enum value-based relocations, string contains an index of enum
+ * value within its enum type;
+ *
+ * Example to provide a better feel.
+ *
+ * struct sample {
+ * int a;
+ * struct {
+ * int b[10];
+ * };
+ * };
+ *
+ * struct sample *s = ...;
+ * int x = &s->a; // encoded as "0:0" (a is field #0)
+ * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1,
+ * // b is field #0 inside anon struct, accessing elem #5)
+ * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
+ *
+ * type_id for all relocs in this example will capture BTF type id of
+ * `struct sample`.
+ *
+ * Such relocation is emitted when using __builtin_preserve_access_index()
+ * Clang built-in, passing expression that captures field address, e.g.:
+ *
+ * bpf_probe_read(&dst, sizeof(dst),
+ * __builtin_preserve_access_index(&src->a.b.c));
+ *
+ * In this case Clang will emit field relocation recording necessary data to
+ * be able to find offset of embedded `a.b.c` field within `src` struct.
+ *
+ * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
+ */
+struct bpf_core_relo {
+ __u32 insn_off;
+ __u32 type_id;
+ __u32 access_str_off;
+ enum bpf_core_relo_kind kind;
+};
+
+struct bpf_core_cand {
+ const struct btf *btf;
+ const struct btf_type *t;
+ const char *name;
+ __u32 id;
+};
+
+/* dynamically sized list of type IDs and its associated struct btf */
+struct bpf_core_cand_list {
+ struct bpf_core_cand *cands;
+ int len;
+};
+
+int bpf_core_apply_relo_insn(const char *prog_name,
+ struct bpf_insn *insn, int insn_idx,
+ const struct bpf_core_relo *relo, int relo_idx,
+ const struct btf *local_btf,
+ struct bpf_core_cand_list *cands);
+int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id);
+
+size_t bpf_core_essential_name_len(const char *name);
+#endif
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index cdecda1ddd36..996d025b8ed8 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -223,10 +223,10 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
free(info_linear);
return -1;
}
- if (btf__get_from_id(info->btf_id, &btf)) {
+ btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (libbpf_get_error(btf)) {
pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id);
err = -1;
- btf = NULL;
goto out;
}
perf_env__fetch_btf(env, info->btf_id, btf);
@@ -296,7 +296,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
out:
free(info_linear);
- free(btf);
+ btf__free(btf);
return err ? -1 : 0;
}
@@ -478,7 +478,8 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
if (btf_id == 0)
goto out;
- if (btf__get_from_id(btf_id, &btf)) {
+ btf = btf__load_from_kernel_by_id(btf_id);
+ if (libbpf_get_error(btf)) {
pr_debug("%s: failed to get BTF of id %u, aborting\n",
__func__, btf_id);
goto out;
@@ -486,7 +487,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
perf_env__fetch_btf(env, btf_id, btf);
out:
- free(btf);
+ btf__free(btf);
close(fd);
}
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
index 8150e03367bb..ba0f20853651 100644
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -64,8 +64,8 @@ static char *bpf_target_prog_name(int tgt_fd)
struct bpf_prog_info_linear *info_linear;
struct bpf_func_info *func_info;
const struct btf_type *t;
+ struct btf *btf = NULL;
char *name = NULL;
- struct btf *btf;
info_linear = bpf_program__get_prog_info_linear(
tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
@@ -74,12 +74,17 @@ static char *bpf_target_prog_name(int tgt_fd)
return NULL;
}
- if (info_linear->info.btf_id == 0 ||
- btf__get_from_id(info_linear->info.btf_id, &btf)) {
+ if (info_linear->info.btf_id == 0) {
pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd);
goto out;
}
+ btf = btf__load_from_kernel_by_id(info_linear->info.btf_id);
+ if (libbpf_get_error(btf)) {
+ pr_debug("failed to load btf for prog FD %d\n", tgt_fd);
+ goto out;
+ }
+
func_info = u64_to_ptr(info_linear->info.func_info);
t = btf__type_by_id(btf, func_info[0].type_id);
if (!t) {
@@ -89,6 +94,7 @@ static char *bpf_target_prog_name(int tgt_fd)
}
name = strdup(btf__name_by_offset(btf, t->name_off));
out:
+ btf__free(btf);
free(info_linear);
return name;
}
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 54f367cbadae..b1bff5fb0f65 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -434,7 +434,7 @@ static int nd_intel_test_finish_query(struct nfit_test *t,
dev_dbg(dev, "%s: transition out verify\n", __func__);
fw->state = FW_STATE_UPDATED;
fw->missed_activate = false;
- /* fall through */
+ fallthrough;
case FW_STATE_UPDATED:
nd_cmd->status = 0;
/* bogus test version */
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index fb010a35d61a..c852eb40c4f7 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -35,9 +35,11 @@ TARGETS += memory-hotplug
TARGETS += mincore
TARGETS += mount
TARGETS += mount_setattr
+TARGETS += move_mount_set_group
TARGETS += mqueue
TARGETS += nci
TARGETS += net
+TARGETS += net/af_unix
TARGETS += net/forwarding
TARGETS += net/mptcp
TARGETS += netfilter
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index addcfd8b615e..433f8bef261e 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -23,7 +23,6 @@ test_skb_cgroup_id_user
test_cgroup_storage
test_flow_dissector
flow_dissector_load
-test_netcnt
test_tcpnotify_user
test_libbpf
test_tcp_check_syncookie_user
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f405b20c1e6c..866531c08e4f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -38,7 +38,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_verifier_log test_dev_cgroup \
test_sock test_sockmap get_cgroup_id_user \
test_cgroup_storage \
- test_netcnt test_tcpnotify_user test_sysctl \
+ test_tcpnotify_user test_sysctl \
test_progs-no_alu32
# Also test bpf-gcc, if present
@@ -79,7 +79,7 @@ TEST_PROGS := test_kmod.sh \
TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \
- test_xdp_vlan.sh
+ test_xdp_vlan.sh test_bpftool.py
# Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
@@ -187,6 +187,8 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \
cp $(SCRATCH_DIR)/runqslower $@
+TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL)
+
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
@@ -197,7 +199,6 @@ $(OUTPUT)/test_sockmap: cgroup_helpers.c
$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
-$(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
$(OUTPUT)/test_sysctl: cgroup_helpers.c
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 8deec1ca9150..9b17f2867488 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -19,6 +19,13 @@ the CI. It builds the kernel (without overwriting your existing Kconfig), recomp
bpf selftests, runs them (by default ``tools/testing/selftests/bpf/test_progs``) and
saves the resulting output (by default in ``~/.bpf_selftests``).
+Script dependencies:
+- clang (preferably built from sources, https://github.com/llvm/llvm-project);
+- pahole (preferably built from sources, https://git.kernel.org/pub/scm/devel/pahole/pahole.git/);
+- qemu;
+- docutils (for ``rst2man``);
+- libcap-devel.
+
For more information on about using the script, run:
.. code-block:: console
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 029589c008c9..b1ede6f0b821 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -12,6 +12,10 @@
SEC("struct_ops/"#name) \
BPF_PROG(name, args)
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
#define tcp_jiffies32 ((__u32)bpf_jiffies64())
struct sock_common {
@@ -27,6 +31,7 @@ enum sk_pacing {
struct sock {
struct sock_common __sk_common;
+#define sk_state __sk_common.skc_state
unsigned long sk_pacing_rate;
__u32 sk_pacing_status; /* see enum sk_pacing */
} __attribute__((preserve_access_index));
@@ -203,6 +208,20 @@ static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
}
+static __always_inline bool tcp_cc_eq(const char *a, const char *b)
+{
+ int i;
+
+ for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+ if (a[i] != b[i])
+ return false;
+ if (!a[i])
+ break;
+ }
+
+ return true;
+}
+
extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
diff --git a/tools/testing/selftests/bpf/netcnt_common.h b/tools/testing/selftests/bpf/netcnt_common.h
index 81084c1c2c23..0ab1c88041cd 100644
--- a/tools/testing/selftests/bpf/netcnt_common.h
+++ b/tools/testing/selftests/bpf/netcnt_common.h
@@ -6,19 +6,39 @@
#define MAX_PERCPU_PACKETS 32
-struct percpu_net_cnt {
- __u64 packets;
- __u64 bytes;
+/* sizeof(struct bpf_local_storage_elem):
+ *
+ * It really is about 128 bytes on x86_64, but allocate more to account for
+ * possible layout changes, different architectures, etc.
+ * The kernel will wrap up to PAGE_SIZE internally anyway.
+ */
+#define SIZEOF_BPF_LOCAL_STORAGE_ELEM 256
- __u64 prev_ts;
+/* Try to estimate kernel's BPF_LOCAL_STORAGE_MAX_VALUE_SIZE: */
+#define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE (0xFFFF - \
+ SIZEOF_BPF_LOCAL_STORAGE_ELEM)
- __u64 prev_packets;
- __u64 prev_bytes;
+#define PCPU_MIN_UNIT_SIZE 32768
+
+union percpu_net_cnt {
+ struct {
+ __u64 packets;
+ __u64 bytes;
+
+ __u64 prev_ts;
+
+ __u64 prev_packets;
+ __u64 prev_bytes;
+ };
+ __u8 data[PCPU_MIN_UNIT_SIZE];
};
-struct net_cnt {
- __u64 packets;
- __u64 bytes;
+union net_cnt {
+ struct {
+ __u64 packets;
+ __u64 bytes;
+ };
+ __u8 data[BPF_LOCAL_STORAGE_MAX_VALUE_SIZE];
};
#endif
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 2060bc122c53..7e9f6375757a 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -66,17 +66,13 @@ int settimeo(int fd, int timeout_ms)
#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
-int start_server(int family, int type, const char *addr_str, __u16 port,
- int timeout_ms)
+static int __start_server(int type, const struct sockaddr *addr,
+ socklen_t addrlen, int timeout_ms, bool reuseport)
{
- struct sockaddr_storage addr = {};
- socklen_t len;
+ int on = 1;
int fd;
- if (make_sockaddr(family, addr_str, port, &addr, &len))
- return -1;
-
- fd = socket(family, type, 0);
+ fd = socket(addr->sa_family, type, 0);
if (fd < 0) {
log_err("Failed to create server socket");
return -1;
@@ -85,7 +81,13 @@ int start_server(int family, int type, const char *addr_str, __u16 port,
if (settimeo(fd, timeout_ms))
goto error_close;
- if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
+ if (reuseport &&
+ setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) {
+ log_err("Failed to set SO_REUSEPORT");
+ return -1;
+ }
+
+ if (bind(fd, addr, addrlen) < 0) {
log_err("Failed to bind socket");
goto error_close;
}
@@ -104,6 +106,69 @@ error_close:
return -1;
}
+int start_server(int family, int type, const char *addr_str, __u16 port,
+ int timeout_ms)
+{
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ return -1;
+
+ return __start_server(type, (struct sockaddr *)&addr,
+ addrlen, timeout_ms, false);
+}
+
+int *start_reuseport_server(int family, int type, const char *addr_str,
+ __u16 port, int timeout_ms, unsigned int nr_listens)
+{
+ struct sockaddr_storage addr;
+ unsigned int nr_fds = 0;
+ socklen_t addrlen;
+ int *fds;
+
+ if (!nr_listens)
+ return NULL;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ return NULL;
+
+ fds = malloc(sizeof(*fds) * nr_listens);
+ if (!fds)
+ return NULL;
+
+ fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen,
+ timeout_ms, true);
+ if (fds[0] == -1)
+ goto close_fds;
+ nr_fds = 1;
+
+ if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen))
+ goto close_fds;
+
+ for (; nr_fds < nr_listens; nr_fds++) {
+ fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr,
+ addrlen, timeout_ms, true);
+ if (fds[nr_fds] == -1)
+ goto close_fds;
+ }
+
+ return fds;
+
+close_fds:
+ free_fds(fds, nr_fds);
+ return NULL;
+}
+
+void free_fds(int *fds, unsigned int nr_close_fds)
+{
+ if (fds) {
+ while (nr_close_fds)
+ close(fds[--nr_close_fds]);
+ free(fds);
+ }
+}
+
int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
int timeout_ms)
{
@@ -153,13 +218,18 @@ static int connect_fd_to_addr(int fd,
return 0;
}
-int connect_to_fd(int server_fd, int timeout_ms)
+static const struct network_helper_opts default_opts;
+
+int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
{
struct sockaddr_storage addr;
struct sockaddr_in *addr_in;
socklen_t addrlen, optlen;
int fd, type;
+ if (!opts)
+ opts = &default_opts;
+
optlen = sizeof(type);
if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
log_err("getsockopt(SOL_TYPE)");
@@ -179,7 +249,12 @@ int connect_to_fd(int server_fd, int timeout_ms)
return -1;
}
- if (settimeo(fd, timeout_ms))
+ if (settimeo(fd, opts->timeout_ms))
+ goto error_close;
+
+ if (opts->cc && opts->cc[0] &&
+ setsockopt(fd, SOL_TCP, TCP_CONGESTION, opts->cc,
+ strlen(opts->cc) + 1))
goto error_close;
if (connect_fd_to_addr(fd, &addr, addrlen))
@@ -192,6 +267,15 @@ error_close:
return -1;
}
+int connect_to_fd(int server_fd, int timeout_ms)
+{
+ struct network_helper_opts opts = {
+ .timeout_ms = timeout_ms,
+ };
+
+ return connect_to_fd_opts(server_fd, &opts);
+}
+
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
{
struct sockaddr_storage addr;
@@ -217,6 +301,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
if (family == AF_INET) {
struct sockaddr_in *sin = (void *)addr;
+ memset(addr, 0, sizeof(*sin));
sin->sin_family = AF_INET;
sin->sin_port = htons(port);
if (addr_str &&
@@ -230,6 +315,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
} else if (family == AF_INET6) {
struct sockaddr_in6 *sin6 = (void *)addr;
+ memset(addr, 0, sizeof(*sin6));
sin6->sin6_family = AF_INET6;
sin6->sin6_port = htons(port);
if (addr_str &&
@@ -243,3 +329,15 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
}
return -1;
}
+
+char *ping_command(int family)
+{
+ if (family == AF_INET6) {
+ /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
+ if (!system("which ping6 >/dev/null 2>&1"))
+ return "ping6";
+ else
+ return "ping -6";
+ }
+ return "ping";
+}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 5e0d51c07b63..da7e132657d5 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -17,6 +17,11 @@ typedef __u16 __sum16;
#define VIP_NUM 5
#define MAGIC_BYTES 123
+struct network_helper_opts {
+ const char *cc;
+ int timeout_ms;
+};
+
/* ipv4 test vector */
struct ipv4_packet {
struct ethhdr eth;
@@ -36,11 +41,17 @@ extern struct ipv6_packet pkt_v6;
int settimeo(int fd, int timeout_ms);
int start_server(int family, int type, const char *addr, __u16 port,
int timeout_ms);
+int *start_reuseport_server(int family, int type, const char *addr_str,
+ __u16 port, int timeout_ms,
+ unsigned int nr_listens);
+void free_fds(int *fds, unsigned int nr_close_fds);
int connect_to_fd(int server_fd, int timeout_ms);
+int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts);
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
int timeout_ms);
int make_sockaddr(int family, const char *addr_str, __u16 port,
struct sockaddr_storage *addr, socklen_t *len);
+char *ping_command(int family);
#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index ec11e20d2b92..bf307bb9e446 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -2,79 +2,28 @@
#include <test_progs.h>
#include "test_attach_probe.skel.h"
-#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
-
-#define OP_RT_RA_MASK 0xffff0000UL
-#define LIS_R2 0x3c400000UL
-#define ADDIS_R2_R12 0x3c4c0000UL
-#define ADDI_R2_R2 0x38420000UL
-
-static ssize_t get_offset(ssize_t addr, ssize_t base)
-{
- u32 *insn = (u32 *) addr;
-
- /*
- * A PPC64 ABIv2 function may have a local and a global entry
- * point. We need to use the local entry point when patching
- * functions, so identify and step over the global entry point
- * sequence.
- *
- * The global entry point sequence is always of the form:
- *
- * addis r2,r12,XXXX
- * addi r2,r2,XXXX
- *
- * A linker optimisation may convert the addis to lis:
- *
- * lis r2,XXXX
- * addi r2,r2,XXXX
- */
- if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
- ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
- ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
- return (ssize_t)(insn + 2) - base;
- else
- return addr - base;
-}
-#else
-#define get_offset(addr, base) (addr - base)
-#endif
-
-ssize_t get_base_addr() {
- size_t start, offset;
- char buf[256];
- FILE *f;
-
- f = fopen("/proc/self/maps", "r");
- if (!f)
- return -errno;
-
- while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
- &start, buf, &offset) == 3) {
- if (strcmp(buf, "r-xp") == 0) {
- fclose(f);
- return start - offset;
- }
- }
-
- fclose(f);
- return -EINVAL;
-}
+/* this is how USDT semaphore is actually defined, except volatile modifier */
+volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
void test_attach_probe(void)
{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
int duration = 0;
struct bpf_link *kprobe_link, *kretprobe_link;
struct bpf_link *uprobe_link, *uretprobe_link;
struct test_attach_probe* skel;
size_t uprobe_offset;
- ssize_t base_addr;
+ ssize_t base_addr, ref_ctr_offset;
base_addr = get_base_addr();
if (CHECK(base_addr < 0, "get_base_addr",
"failed to find base addr: %zd", base_addr))
return;
- uprobe_offset = get_offset((size_t)&get_base_addr, base_addr);
+ uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+
+ ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr);
+ if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
+ return;
skel = test_attach_probe__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
@@ -96,20 +45,28 @@ void test_attach_probe(void)
goto cleanup;
skel->links.handle_kretprobe = kretprobe_link;
- uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
- false /* retprobe */,
- 0 /* self pid */,
- "/proc/self/exe",
- uprobe_offset);
+ ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before");
+
+ uprobe_opts.retprobe = false;
+ uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+ uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe,
+ 0 /* self pid */,
+ "/proc/self/exe",
+ uprobe_offset,
+ &uprobe_opts);
if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
goto cleanup;
skel->links.handle_uprobe = uprobe_link;
- uretprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uretprobe,
- true /* retprobe */,
- -1 /* any pid */,
- "/proc/self/exe",
- uprobe_offset);
+ ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after");
+
+ /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */
+ uprobe_opts.retprobe = true;
+ uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+ uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe,
+ -1 /* any pid */,
+ "/proc/self/exe",
+ uprobe_offset, &uprobe_opts);
if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe"))
goto cleanup;
skel->links.handle_uretprobe = uretprobe_link;
@@ -136,4 +93,5 @@ void test_attach_probe(void)
cleanup:
test_attach_probe__destroy(skel);
+ ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_cleanup");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
new file mode 100644
index 000000000000..5eea3c3a40fe
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include "test_bpf_cookie.skel.h"
+
+static void kprobe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
+
+ /* attach two kprobes */
+ opts.bpf_cookie = 0x1;
+ opts.retprobe = false;
+ link1 = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x2;
+ opts.retprobe = false;
+ link2 = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* attach two kretprobes */
+ opts.bpf_cookie = 0x10;
+ opts.retprobe = true;
+ retlink1 = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(retlink1, "retlink1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x20;
+ opts.retprobe = true;
+ retlink2 = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(retlink2, "retlink2"))
+ goto cleanup;
+
+ /* trigger kprobe && kretprobe */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->kprobe_res, 0x1 | 0x2, "kprobe_res");
+ ASSERT_EQ(skel->bss->kretprobe_res, 0x10 | 0x20, "kretprobe_res");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(retlink1);
+ bpf_link__destroy(retlink2);
+}
+
+static void uprobe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
+ size_t uprobe_offset;
+ ssize_t base_addr;
+
+ base_addr = get_base_addr();
+ uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+
+ /* attach two uprobes */
+ opts.bpf_cookie = 0x100;
+ opts.retprobe = false;
+ link1 = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, 0 /* self pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x200;
+ opts.retprobe = false;
+ link2 = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, -1 /* any pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* attach two uretprobes */
+ opts.bpf_cookie = 0x1000;
+ opts.retprobe = true;
+ retlink1 = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, -1 /* any pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(retlink1, "retlink1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x2000;
+ opts.retprobe = true;
+ retlink2 = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, 0 /* self pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(retlink2, "retlink2"))
+ goto cleanup;
+
+ /* trigger uprobe && uretprobe */
+ get_base_addr();
+
+ ASSERT_EQ(skel->bss->uprobe_res, 0x100 | 0x200, "uprobe_res");
+ ASSERT_EQ(skel->bss->uretprobe_res, 0x1000 | 0x2000, "uretprobe_res");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(retlink1);
+ bpf_link__destroy(retlink2);
+}
+
+static void tp_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tracepoint_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL, *link3 = NULL;
+
+ /* attach first tp prog */
+ opts.bpf_cookie = 0x10000;
+ link1 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp1,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ /* attach second tp prog */
+ opts.bpf_cookie = 0x20000;
+ link2 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp2,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* trigger tracepoints */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->tp_res, 0x10000 | 0x20000, "tp_res1");
+
+ /* now we detach first prog and will attach third one, which causes
+ * two internal calls to bpf_prog_array_copy(), shuffling
+ * bpf_prog_array_items around. We test here that we don't lose track
+ * of associated bpf_cookies.
+ */
+ bpf_link__destroy(link1);
+ link1 = NULL;
+ kern_sync_rcu();
+ skel->bss->tp_res = 0;
+
+ /* attach third tp prog */
+ opts.bpf_cookie = 0x40000;
+ link3 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp3,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link3, "link3"))
+ goto cleanup;
+
+ /* trigger tracepoints */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->tp_res, 0x20000 | 0x40000, "tp_res2");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(link3);
+}
+
+static void burn_cpu(void)
+{
+ volatile int j = 0;
+ cpu_set_t cpu_set;
+ int i, err;
+
+ /* generate some branches on cpu 0 */
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ ASSERT_OK(err, "set_thread_affinity");
+
+ /* spin the loop for a while (random high number) */
+ for (i = 0; i < 1000000; ++i)
+ ++j;
+}
+
+static void pe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, opts);
+ struct bpf_link *link = NULL;
+ struct perf_event_attr attr;
+ int pfd = -1;
+
+ /* create perf event */
+ memset(&attr, 0, sizeof(attr));
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.freq = 1;
+ attr.sample_freq = 4000;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (!ASSERT_GE(pfd, 0, "perf_fd"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x100000;
+ link = bpf_program__attach_perf_event_opts(skel->progs.handle_pe, pfd, &opts);
+ if (!ASSERT_OK_PTR(link, "link1"))
+ goto cleanup;
+
+ burn_cpu(); /* trigger BPF prog */
+
+ ASSERT_EQ(skel->bss->pe_res, 0x100000, "pe_res1");
+
+ /* prevent bpf_link__destroy() closing pfd itself */
+ bpf_link__disconnect(link);
+ /* close BPF link's FD explicitly */
+ close(bpf_link__fd(link));
+ /* free up memory used by struct bpf_link */
+ bpf_link__destroy(link);
+ link = NULL;
+ kern_sync_rcu();
+ skel->bss->pe_res = 0;
+
+ opts.bpf_cookie = 0x200000;
+ link = bpf_program__attach_perf_event_opts(skel->progs.handle_pe, pfd, &opts);
+ if (!ASSERT_OK_PTR(link, "link2"))
+ goto cleanup;
+
+ burn_cpu(); /* trigger BPF prog */
+
+ ASSERT_EQ(skel->bss->pe_res, 0x200000, "pe_res2");
+
+cleanup:
+ close(pfd);
+ bpf_link__destroy(link);
+}
+
+void test_bpf_cookie(void)
+{
+ struct test_bpf_cookie *skel;
+
+ skel = test_bpf_cookie__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->my_tid = syscall(SYS_gettid);
+
+ if (test__start_subtest("kprobe"))
+ kprobe_subtest(skel);
+ if (test__start_subtest("uprobe"))
+ uprobe_subtest(skel);
+ if (test__start_subtest("tracepoint"))
+ tp_subtest(skel);
+ if (test__start_subtest("perf_event"))
+ pe_subtest(skel);
+
+ test_bpf_cookie__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 1f1aade56504..77ac24b191d4 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -13,6 +13,7 @@
#include "bpf_iter_tcp6.skel.h"
#include "bpf_iter_udp4.skel.h"
#include "bpf_iter_udp6.skel.h"
+#include "bpf_iter_unix.skel.h"
#include "bpf_iter_test_kern1.skel.h"
#include "bpf_iter_test_kern2.skel.h"
#include "bpf_iter_test_kern3.skel.h"
@@ -313,6 +314,19 @@ static void test_udp6(void)
bpf_iter_udp6__destroy(skel);
}
+static void test_unix(void)
+{
+ struct bpf_iter_unix *skel;
+
+ skel = bpf_iter_unix__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_unix__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_unix);
+
+ bpf_iter_unix__destroy(skel);
+}
+
/* The expected string is less than 16 bytes */
static int do_read_with_fd(int iter_fd, const char *expected,
bool read_one_char)
@@ -1255,6 +1269,8 @@ void test_bpf_iter(void)
test_udp4();
if (test__start_subtest("udp6"))
test_udp6();
+ if (test__start_subtest("unix"))
+ test_unix();
if (test__start_subtest("anon"))
test_anon_iter(false);
if (test__start_subtest("anon-read-one-char"))
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
new file mode 100644
index 000000000000..85babb0487b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "bpf_dctcp.skel.h"
+#include "bpf_cubic.skel.h"
+#include "bpf_iter_setsockopt.skel.h"
+
+static int create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
+ return -1;
+
+ return 0;
+}
+
+static unsigned int set_bpf_cubic(int *fds, unsigned int nr_fds)
+{
+ unsigned int i;
+
+ for (i = 0; i < nr_fds; i++) {
+ if (setsockopt(fds[i], SOL_TCP, TCP_CONGESTION, "bpf_cubic",
+ sizeof("bpf_cubic")))
+ return i;
+ }
+
+ return nr_fds;
+}
+
+static unsigned int check_bpf_dctcp(int *fds, unsigned int nr_fds)
+{
+ char tcp_cc[16];
+ socklen_t optlen = sizeof(tcp_cc);
+ unsigned int i;
+
+ for (i = 0; i < nr_fds; i++) {
+ if (getsockopt(fds[i], SOL_TCP, TCP_CONGESTION,
+ tcp_cc, &optlen) ||
+ strcmp(tcp_cc, "bpf_dctcp"))
+ return i;
+ }
+
+ return nr_fds;
+}
+
+static int *make_established(int listen_fd, unsigned int nr_est,
+ int **paccepted_fds)
+{
+ int *est_fds, *accepted_fds;
+ unsigned int i;
+
+ est_fds = malloc(sizeof(*est_fds) * nr_est);
+ if (!est_fds)
+ return NULL;
+
+ accepted_fds = malloc(sizeof(*accepted_fds) * nr_est);
+ if (!accepted_fds) {
+ free(est_fds);
+ return NULL;
+ }
+
+ for (i = 0; i < nr_est; i++) {
+ est_fds[i] = connect_to_fd(listen_fd, 0);
+ if (est_fds[i] == -1)
+ break;
+ if (set_bpf_cubic(&est_fds[i], 1) != 1) {
+ close(est_fds[i]);
+ break;
+ }
+
+ accepted_fds[i] = accept(listen_fd, NULL, 0);
+ if (accepted_fds[i] == -1) {
+ close(est_fds[i]);
+ break;
+ }
+ }
+
+ if (!ASSERT_EQ(i, nr_est, "create established fds")) {
+ free_fds(accepted_fds, i);
+ free_fds(est_fds, i);
+ return NULL;
+ }
+
+ *paccepted_fds = accepted_fds;
+ return est_fds;
+}
+
+static unsigned short get_local_port(int fd)
+{
+ struct sockaddr_in6 addr;
+ socklen_t addrlen = sizeof(addr);
+
+ if (!getsockname(fd, &addr, &addrlen))
+ return ntohs(addr.sin6_port);
+
+ return 0;
+}
+
+static void do_bpf_iter_setsockopt(struct bpf_iter_setsockopt *iter_skel,
+ bool random_retry)
+{
+ int *reuse_listen_fds = NULL, *accepted_fds = NULL, *est_fds = NULL;
+ unsigned int nr_reuse_listens = 256, nr_est = 256;
+ int err, iter_fd = -1, listen_fd = -1;
+ char buf;
+
+ /* Prepare non-reuseport listen_fd */
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(listen_fd, 0, "start_server"))
+ return;
+ if (!ASSERT_EQ(set_bpf_cubic(&listen_fd, 1), 1,
+ "set listen_fd to cubic"))
+ goto done;
+ iter_skel->bss->listen_hport = get_local_port(listen_fd);
+ if (!ASSERT_NEQ(iter_skel->bss->listen_hport, 0,
+ "get_local_port(listen_fd)"))
+ goto done;
+
+ /* Connect to non-reuseport listen_fd */
+ est_fds = make_established(listen_fd, nr_est, &accepted_fds);
+ if (!ASSERT_OK_PTR(est_fds, "create established"))
+ goto done;
+
+ /* Prepare reuseport listen fds */
+ reuse_listen_fds = start_reuseport_server(AF_INET6, SOCK_STREAM,
+ "::1", 0, 0,
+ nr_reuse_listens);
+ if (!ASSERT_OK_PTR(reuse_listen_fds, "start_reuseport_server"))
+ goto done;
+ if (!ASSERT_EQ(set_bpf_cubic(reuse_listen_fds, nr_reuse_listens),
+ nr_reuse_listens, "set reuse_listen_fds to cubic"))
+ goto done;
+ iter_skel->bss->reuse_listen_hport = get_local_port(reuse_listen_fds[0]);
+ if (!ASSERT_NEQ(iter_skel->bss->reuse_listen_hport, 0,
+ "get_local_port(reuse_listen_fds[0])"))
+ goto done;
+
+ /* Run bpf tcp iter to switch from bpf_cubic to bpf_dctcp */
+ iter_skel->bss->random_retry = random_retry;
+ iter_fd = bpf_iter_create(bpf_link__fd(iter_skel->links.change_tcp_cc));
+ if (!ASSERT_GE(iter_fd, 0, "create iter_fd"))
+ goto done;
+
+ while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+ errno == EAGAIN)
+ ;
+ if (!ASSERT_OK(err, "read iter error"))
+ goto done;
+
+ /* Check reuseport listen fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(reuse_listen_fds, nr_reuse_listens),
+ nr_reuse_listens,
+ "check reuse_listen_fds dctcp");
+
+ /* Check non reuseport listen fd for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(&listen_fd, 1), 1,
+ "check listen_fd dctcp");
+
+ /* Check established fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(est_fds, nr_est), nr_est,
+ "check est_fds dctcp");
+
+ /* Check accepted fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(accepted_fds, nr_est), nr_est,
+ "check accepted_fds dctcp");
+
+done:
+ if (iter_fd != -1)
+ close(iter_fd);
+ if (listen_fd != -1)
+ close(listen_fd);
+ free_fds(reuse_listen_fds, nr_reuse_listens);
+ free_fds(accepted_fds, nr_est);
+ free_fds(est_fds, nr_est);
+}
+
+void test_bpf_iter_setsockopt(void)
+{
+ struct bpf_iter_setsockopt *iter_skel = NULL;
+ struct bpf_cubic *cubic_skel = NULL;
+ struct bpf_dctcp *dctcp_skel = NULL;
+ struct bpf_link *cubic_link = NULL;
+ struct bpf_link *dctcp_link = NULL;
+
+ if (create_netns())
+ return;
+
+ /* Load iter_skel */
+ iter_skel = bpf_iter_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(iter_skel, "iter_skel"))
+ return;
+ iter_skel->links.change_tcp_cc = bpf_program__attach_iter(iter_skel->progs.change_tcp_cc, NULL);
+ if (!ASSERT_OK_PTR(iter_skel->links.change_tcp_cc, "attach iter"))
+ goto done;
+
+ /* Load bpf_cubic */
+ cubic_skel = bpf_cubic__open_and_load();
+ if (!ASSERT_OK_PTR(cubic_skel, "cubic_skel"))
+ goto done;
+ cubic_link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
+ if (!ASSERT_OK_PTR(cubic_link, "cubic_link"))
+ goto done;
+
+ /* Load bpf_dctcp */
+ dctcp_skel = bpf_dctcp__open_and_load();
+ if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
+ goto done;
+ dctcp_link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+ if (!ASSERT_OK_PTR(dctcp_link, "dctcp_link"))
+ goto done;
+
+ do_bpf_iter_setsockopt(iter_skel, true);
+ do_bpf_iter_setsockopt(iter_skel, false);
+
+done:
+ bpf_link__destroy(cubic_link);
+ bpf_link__destroy(dctcp_link);
+ bpf_cubic__destroy(cubic_skel);
+ bpf_dctcp__destroy(dctcp_skel);
+ bpf_iter_setsockopt__destroy(iter_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index efe1e979affb..94e03df69d71 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -4,37 +4,22 @@
#include <linux/err.h>
#include <netinet/tcp.h>
#include <test_progs.h>
+#include "network_helpers.h"
#include "bpf_dctcp.skel.h"
#include "bpf_cubic.skel.h"
#include "bpf_tcp_nogpl.skel.h"
+#include "bpf_dctcp_release.skel.h"
#define min(a, b) ((a) < (b) ? (a) : (b))
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
static const unsigned int total_bytes = 10 * 1024 * 1024;
-static const struct timeval timeo_sec = { .tv_sec = 10 };
-static const size_t timeo_optlen = sizeof(timeo_sec);
static int expected_stg = 0xeB9F;
static int stop, duration;
-static int settimeo(int fd)
-{
- int err;
-
- err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
- timeo_optlen);
- if (CHECK(err == -1, "setsockopt(fd, SO_RCVTIMEO)", "errno:%d\n",
- errno))
- return -1;
-
- err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec,
- timeo_optlen);
- if (CHECK(err == -1, "setsockopt(fd, SO_SNDTIMEO)", "errno:%d\n",
- errno))
- return -1;
-
- return 0;
-}
-
static int settcpca(int fd, const char *tcp_ca)
{
int err;
@@ -61,7 +46,7 @@ static void *server(void *arg)
goto done;
}
- if (settimeo(fd)) {
+ if (settimeo(fd, 0)) {
err = -errno;
goto done;
}
@@ -114,7 +99,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
}
if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) ||
- settimeo(lfd) || settimeo(fd))
+ settimeo(lfd, 0) || settimeo(fd, 0))
goto done;
/* bind, listen and start server thread to accept */
@@ -267,6 +252,77 @@ static void test_invalid_license(void)
libbpf_set_print(old_print_fn);
}
+static void test_dctcp_fallback(void)
+{
+ int err, lfd = -1, cli_fd = -1, srv_fd = -1;
+ struct network_helper_opts opts = {
+ .cc = "cubic",
+ };
+ struct bpf_dctcp *dctcp_skel;
+ struct bpf_link *link = NULL;
+ char srv_cc[16];
+ socklen_t cc_len = sizeof(srv_cc);
+
+ dctcp_skel = bpf_dctcp__open();
+ if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
+ return;
+ strcpy(dctcp_skel->rodata->fallback, "cubic");
+ if (!ASSERT_OK(bpf_dctcp__load(dctcp_skel), "bpf_dctcp__load"))
+ goto done;
+
+ link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+ if (!ASSERT_OK_PTR(link, "dctcp link"))
+ goto done;
+
+ lfd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(lfd, 0, "lfd") ||
+ !ASSERT_OK(settcpca(lfd, "bpf_dctcp"), "lfd=>bpf_dctcp"))
+ goto done;
+
+ cli_fd = connect_to_fd_opts(lfd, &opts);
+ if (!ASSERT_GE(cli_fd, 0, "cli_fd"))
+ goto done;
+
+ srv_fd = accept(lfd, NULL, 0);
+ if (!ASSERT_GE(srv_fd, 0, "srv_fd"))
+ goto done;
+ ASSERT_STREQ(dctcp_skel->bss->cc_res, "cubic", "cc_res");
+ ASSERT_EQ(dctcp_skel->bss->tcp_cdg_res, -ENOTSUPP, "tcp_cdg_res");
+
+ err = getsockopt(srv_fd, SOL_TCP, TCP_CONGESTION, srv_cc, &cc_len);
+ if (!ASSERT_OK(err, "getsockopt(srv_fd, TCP_CONGESTION)"))
+ goto done;
+ ASSERT_STREQ(srv_cc, "cubic", "srv_fd cc");
+
+done:
+ bpf_link__destroy(link);
+ bpf_dctcp__destroy(dctcp_skel);
+ if (lfd != -1)
+ close(lfd);
+ if (srv_fd != -1)
+ close(srv_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+}
+
+static void test_rel_setsockopt(void)
+{
+ struct bpf_dctcp_release *rel_skel;
+ libbpf_print_fn_t old_print_fn;
+
+ err_str = "unknown func bpf_setsockopt";
+ found = false;
+
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+ rel_skel = bpf_dctcp_release__open_and_load();
+ libbpf_set_print(old_print_fn);
+
+ ASSERT_ERR_PTR(rel_skel, "rel_skel");
+ ASSERT_TRUE(found, "expected_err_msg");
+
+ bpf_dctcp_release__destroy(rel_skel);
+}
+
void test_bpf_tcp_ca(void)
{
if (test__start_subtest("dctcp"))
@@ -275,4 +331,8 @@ void test_bpf_tcp_ca(void)
test_cubic();
if (test__start_subtest("invalid_license"))
test_invalid_license();
+ if (test__start_subtest("dctcp_fallback"))
+ test_dctcp_fallback();
+ if (test__start_subtest("rel_setsockopt"))
+ test_rel_setsockopt();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 857e3f26086f..649f87382c8d 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -4350,7 +4350,8 @@ static void do_test_file(unsigned int test_num)
goto done;
}
- err = btf__get_from_id(info.btf_id, &btf);
+ btf = btf__load_from_kernel_by_id(info.btf_id);
+ err = libbpf_get_error(btf);
if (CHECK(err, "cannot get btf from kernel, err: %d", err))
goto done;
@@ -4386,6 +4387,7 @@ skip:
fprintf(stderr, "OK");
done:
+ btf__free(btf);
free(func_info);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 1b90e684ff13..52ccf0cf35e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -232,7 +232,593 @@ err_out:
btf__free(btf);
}
+#define STRSIZE 4096
+
+static void btf_dump_snprintf(void *ctx, const char *fmt, va_list args)
+{
+ char *s = ctx, new[STRSIZE];
+
+ vsnprintf(new, STRSIZE, fmt, args);
+ if (strlen(s) < STRSIZE)
+ strncat(s, new, STRSIZE - strlen(s) - 1);
+}
+
+static int btf_dump_data(struct btf *btf, struct btf_dump *d,
+ char *name, char *prefix, __u64 flags, void *ptr,
+ size_t ptr_sz, char *str, const char *expected_val)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ size_t type_sz;
+ __s32 type_id;
+ int ret = 0;
+
+ if (flags & BTF_F_COMPACT)
+ opts.compact = true;
+ if (flags & BTF_F_NONAME)
+ opts.skip_names = true;
+ if (flags & BTF_F_ZERO)
+ opts.emit_zeroes = true;
+ if (prefix) {
+ ASSERT_STRNEQ(name, prefix, strlen(prefix),
+ "verify prefix match");
+ name += strlen(prefix) + 1;
+ }
+ type_id = btf__find_by_name(btf, name);
+ if (!ASSERT_GE(type_id, 0, "find type id"))
+ return -ENOENT;
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+ ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, &opts);
+ if (type_sz <= ptr_sz) {
+ if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
+ return -EINVAL;
+ } else {
+ if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG"))
+ return -EINVAL;
+ }
+ if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match"))
+ return -EFAULT;
+ return 0;
+}
+
+#define TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \
+ _expected, ...) \
+ do { \
+ char __ptrtype[64] = #_type; \
+ char *_ptrtype = (char *)__ptrtype; \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _ptrtype, _prefix, _flags, \
+ _ptr, sizeof(_type), _str, \
+ _expected); \
+ } while (0)
+
+/* Use where expected data string matches its stringified declaration */
+#define TEST_BTF_DUMP_DATA_C(_b, _d, _prefix, _str, _type, _flags, \
+ ...) \
+ TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \
+ "(" #_type ")" #__VA_ARGS__, __VA_ARGS__)
+
+/* overflow test; pass typesize < expected type size, ensure E2BIG returned */
+#define TEST_BTF_DUMP_DATA_OVER(_b, _d, _prefix, _str, _type, _type_sz, \
+ _expected, ...) \
+ do { \
+ char __ptrtype[64] = #_type; \
+ char *_ptrtype = (char *)__ptrtype; \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _ptrtype, _prefix, 0, \
+ _ptr, _type_sz, _str, _expected); \
+ } while (0)
+
+#define TEST_BTF_DUMP_VAR(_b, _d, _prefix, _str, _var, _type, _flags, \
+ _expected, ...) \
+ do { \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _var, _prefix, _flags, \
+ _ptr, sizeof(_type), _str, \
+ _expected); \
+ } while (0)
+
+static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+#ifdef __SIZEOF_INT128__
+ __int128 i = 0xffffffffffffffff;
+
+ /* this dance is required because we cannot directly initialize
+ * a 128-bit value to anything larger than a 64-bit value.
+ */
+ i = (i << 64) | (i - 1);
+#endif
+ /* simple int */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, 1234);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "1234", 1234);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)1234", 1234);
+
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT, "(int)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_ZERO,
+ "(int)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, -4567);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "-4567", -4567);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)-4567", -4567);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, int, sizeof(int)-1, "", 1);
+
+#ifdef __SIZEOF_INT128__
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128, BTF_F_COMPACT,
+ "(__int128)0xffffffffffffffff",
+ 0xffffffffffffffff);
+ ASSERT_OK(btf_dump_data(btf, d, "__int128", NULL, 0, &i, 16, str,
+ "(__int128)0xfffffffffffffffffffffffffffffffe"),
+ "dump __int128");
+#endif
+}
+
+static void test_btf_dump_float_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ float t1 = 1.234567;
+ float t2 = -1.234567;
+ float t3 = 0.0;
+ double t4 = 5.678912;
+ double t5 = -5.678912;
+ double t6 = 0.0;
+ long double t7 = 9.876543;
+ long double t8 = -9.876543;
+ long double t9 = 0.0;
+
+ /* since the kernel does not likely have any float types in its BTF, we
+ * will need to add some of various sizes.
+ */
+
+ ASSERT_GT(btf__add_float(btf, "test_float", 4), 0, "add float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t1, 4, str,
+ "(test_float)1.234567"), "dump float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t2, 4, str,
+ "(test_float)-1.234567"), "dump float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t3, 4, str,
+ "(test_float)0.000000"), "dump float");
+
+ ASSERT_GT(btf__add_float(btf, "test_double", 8), 0, "add_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t4, 8, str,
+ "(test_double)5.678912"), "dump double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t5, 8, str,
+ "(test_double)-5.678912"), "dump double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t6, 8, str,
+ "(test_double)0.000000"), "dump double");
+
+ ASSERT_GT(btf__add_float(btf, "test_long_double", 16), 0, "add long double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t7, 16,
+ str, "(test_long_double)9.876543"),
+ "dump long_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t8, 16,
+ str, "(test_long_double)-9.876543"),
+ "dump long_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t9, 16,
+ str, "(test_long_double)0.000000"),
+ "dump long_double");
+}
+
+static void test_btf_dump_char_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* simple char */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, char, BTF_F_COMPACT, 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+ "100", 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)100", 100);
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT,
+ "(char)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_ZERO,
+ "(char)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)0", 0);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, char, sizeof(char)-1, "", 100);
+}
+
+static void test_btf_dump_typedef_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* simple typedef */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, uint64_t, BTF_F_COMPACT, 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+ "1", 1);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)1", 1);
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT, "(u64)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_ZERO,
+ "(u64)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)0", 0);
+
+ /* typedef struct */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, atomic_t, BTF_F_COMPACT,
+ {.counter = (int)1,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+ "{1,}", { .counter = 1 });
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+" .counter = (int)1,\n"
+"}",
+ {.counter = 1,});
+ /* typedef with 0 value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT, "(atomic_t){}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+ "{}", {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+"}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_ZERO,
+ "(atomic_t){.counter = (int)0,}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "{0,}", {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_ZERO,
+"(atomic_t){\n"
+" .counter = (int)0,\n"
+"}",
+ { .counter = 0,});
+
+ /* overflow should show type but not value since it overflows */
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, atomic_t, sizeof(atomic_t)-1,
+ "(atomic_t){\n", { .counter = 1});
+}
+
+static void test_btf_dump_enum_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* enum where enum value does (and does not) exist */
+ TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+ "(enum bpf_cmd)BPF_MAP_CREATE", 0);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+ "(enum bpf_cmd)BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "BPF_MAP_CREATE", 0);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_ZERO,
+ "(enum bpf_cmd)BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "BPF_MAP_CREATE", BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, 2000);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "2000", 2000);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+ "(enum bpf_cmd)2000", 2000);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "enum", str, enum bpf_cmd,
+ sizeof(enum bpf_cmd) - 1, "", BPF_MAP_CREATE);
+}
+
+static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ char zero_data[512] = { };
+ char type_data[512];
+ void *fops = type_data;
+ void *skb = type_data;
+ size_t type_sz;
+ __s32 type_id;
+ char *cmpstr;
+ int ret;
+
+ memset(type_data, 255, sizeof(type_data));
+
+ /* simple struct */
+ TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+ {.name_off = (__u32)3,.val = (__s32)-1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{3,-1,}",
+ { .name_off = 3, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+" .name_off = (__u32)3,\n"
+" .val = (__s32)-1,\n"
+"}",
+ { .name_off = 3, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{-1,}",
+ { .name_off = 0, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "{0,-1,}",
+ { .name_off = 0, .val = -1,});
+ /* empty struct should be printed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+ "(struct btf_enum){}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+"}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_ZERO,
+ "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_ZERO,
+"(struct btf_enum){\n"
+" .name_off = (__u32)0,\n"
+" .val = (__s32)0,\n"
+"}",
+ { .name_off = 0, .val = 0,});
+
+ /* struct with pointers */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+ "(struct list_head){.next = (struct list_head *)0x1,}",
+ { .next = (struct list_head *)1 });
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+" .next = (struct list_head *)0x1,\n"
+"}",
+ { .next = (struct list_head *)1 });
+ /* NULL pointer should not be displayed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+ "(struct list_head){}",
+ { .next = (struct list_head *)0 });
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+"}",
+ { .next = (struct list_head *)0 });
+
+ /* struct with function pointers */
+ type_id = btf__find_by_name(btf, "file_operations");
+ if (ASSERT_GT(type_id, 0, "find type id")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ ret = btf_dump__dump_type_data(d, type_id, fops, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping file_operations");
+ cmpstr =
+"(struct file_operations){\n"
+" .owner = (struct module *)0xffffffffffffffff,\n"
+" .llseek = (loff_t (*)(struct file *, loff_t, int))0xffffffffffffffff,";
+
+ ASSERT_STRNEQ(str, cmpstr, strlen(cmpstr), "file_operations");
+ }
+
+ /* struct with char array */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){.name = (char[16])['f','o','o',],}",
+ { .name = "foo",});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{['f','o','o',],}",
+ {.name = "foo",});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, 0,
+"(struct bpf_prog_info){\n"
+" .name = (char[16])[\n"
+" 'f',\n"
+" 'o',\n"
+" 'o',\n"
+" ],\n"
+"}",
+ {.name = "foo",});
+ /* leading null char means do not display string */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){}",
+ {.name = {'\0', 'f', 'o', 'o'}});
+ /* handle non-printable characters */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){.name = (char[16])[1,2,3,],}",
+ { .name = {1, 2, 3, 0}});
+
+ /* struct with non-char array */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+ "(struct __sk_buff){.cb = (__u32[5])[1,2,3,4,5,],}",
+ { .cb = {1, 2, 3, 4, 5,},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{[1,2,3,4,5,],}",
+ { .cb = { 1, 2, 3, 4, 5},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+" .cb = (__u32[5])[\n"
+" 1,\n"
+" 2,\n"
+" 3,\n"
+" 4,\n"
+" 5,\n"
+" ],\n"
+"}",
+ { .cb = { 1, 2, 3, 4, 5},});
+ /* For non-char, arrays, show non-zero values only */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+ "(struct __sk_buff){.cb = (__u32[5])[0,0,1,0,0,],}",
+ { .cb = { 0, 0, 1, 0, 0},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+" .cb = (__u32[5])[\n"
+" 0,\n"
+" 0,\n"
+" 1,\n"
+" 0,\n"
+" 0,\n"
+" ],\n"
+"}",
+ { .cb = { 0, 0, 1, 0, 0},});
+
+ /* struct with bitfields */
+ TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+ {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{1,0x2,0x3,4,5,}",
+ { .code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4,
+ .imm = 5,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, 0,
+"(struct bpf_insn){\n"
+" .code = (__u8)1,\n"
+" .dst_reg = (__u8)0x2,\n"
+" .src_reg = (__u8)0x3,\n"
+" .off = (__s16)4,\n"
+" .imm = (__s32)5,\n"
+"}",
+ {.code = 1, .dst_reg = 2, .src_reg = 3, .off = 4, .imm = 5});
+
+ /* zeroed bitfields should not be displayed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+ "(struct bpf_insn){.dst_reg = (__u8)0x1,}",
+ { .code = 0, .dst_reg = 1});
+
+ /* struct with enum bitfield */
+ type_id = btf__find_by_name(btf, "fs_context");
+ if (ASSERT_GT(type_id, 0, "find fs_context")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ opts.emit_zeroes = true;
+ ret = btf_dump__dump_type_data(d, type_id, zero_data, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping fs_context");
+
+ ASSERT_NEQ(strstr(str, "FS_CONTEXT_FOR_MOUNT"), NULL,
+ "bitfield value not present");
+ }
+
+ /* struct with nested anon union */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_sock_ops, BTF_F_COMPACT,
+ "(struct bpf_sock_ops){.op = (__u32)1,(union){.args = (__u32[4])[1,2,3,4,],.reply = (__u32)1,.replylong = (__u32[4])[1,2,3,4,],},}",
+ { .op = 1, .args = { 1, 2, 3, 4}});
+
+ /* union with nested struct */
+ TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT,
+ "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},}",
+ { .map = { .map_fd = 1 }});
+
+ /* struct skb with nested structs/unions; because type output is so
+ * complex, we don't do a string comparison, just verify we return
+ * the type size as the amount of data displayed.
+ */
+ type_id = btf__find_by_name(btf, "sk_buff");
+ if (ASSERT_GT(type_id, 0, "find struct sk_buff")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ ret = btf_dump__dump_type_data(d, type_id, skb, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping sk_buff");
+ }
+
+ /* overflow bpf_sock_ops struct with final element nonzero/zero.
+ * Regardless of the value of the final field, we don't have all the
+ * data we need to display it, so we should trigger an overflow.
+ * In other words oveflow checking should trump "is field zero?"
+ * checks because if we've overflowed, it shouldn't matter what the
+ * field is - we can't trust its value so shouldn't display it.
+ */
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+ sizeof(struct bpf_sock_ops) - 1,
+ "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+ { .op = 1, .skb_tcp_flags = 2});
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+ sizeof(struct bpf_sock_ops) - 1,
+ "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+ { .op = 1, .skb_tcp_flags = 0});
+}
+
+static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT,
+ "int cpu_number = (int)100", 100);
+ TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT,
+ "static int cpu_profile_flip = (int)2", 2);
+}
+
+static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str,
+ const char *name, const char *expected_val,
+ void *data, size_t data_sz)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ int ret = 0, cmp;
+ size_t secsize;
+ __s32 type_id;
+
+ opts.compact = true;
+
+ type_id = btf__find_by_name(btf, name);
+ if (!ASSERT_GT(type_id, 0, "find type id"))
+ return;
+
+ secsize = btf__resolve_size(btf, type_id);
+ ASSERT_EQ(secsize, 0, "verify section size");
+
+ str[0] = '\0';
+ ret = btf_dump__dump_type_data(d, type_id, data, data_sz, &opts);
+ ASSERT_EQ(ret, 0, "unexpected return value");
+
+ cmp = strcmp(str, expected_val);
+ ASSERT_EQ(cmp, 0, "ensure expected/actual match");
+}
+
+static void test_btf_dump_datasec_data(char *str)
+{
+ struct btf *btf = btf__parse("xdping_kern.o", NULL);
+ struct btf_dump_opts opts = { .ctx = str };
+ char license[4] = "GPL";
+ struct btf_dump *d;
+
+ if (!ASSERT_OK_PTR(btf, "xdping_kern.o BTF not found"))
+ return;
+
+ d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
+ if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+ return;
+
+ test_btf_datasec(btf, d, str, "license",
+ "SEC(\"license\") char[4] _license = (char[4])['G','P','L',];",
+ license, sizeof(license));
+}
+
void test_btf_dump() {
+ char str[STRSIZE];
+ struct btf_dump_opts opts = { .ctx = str };
+ struct btf_dump *d;
+ struct btf *btf;
int i;
for (i = 0; i < ARRAY_SIZE(btf_dump_test_cases); i++) {
@@ -245,4 +831,33 @@ void test_btf_dump() {
}
if (test__start_subtest("btf_dump: incremental"))
test_btf_dump_incremental();
+
+ btf = libbpf_find_kernel_btf();
+ if (!ASSERT_OK_PTR(btf, "no kernel BTF found"))
+ return;
+
+ d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
+ if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+ return;
+
+ /* Verify type display for various types. */
+ if (test__start_subtest("btf_dump: int_data"))
+ test_btf_dump_int_data(btf, d, str);
+ if (test__start_subtest("btf_dump: float_data"))
+ test_btf_dump_float_data(btf, d, str);
+ if (test__start_subtest("btf_dump: char_data"))
+ test_btf_dump_char_data(btf, d, str);
+ if (test__start_subtest("btf_dump: typedef_data"))
+ test_btf_dump_typedef_data(btf, d, str);
+ if (test__start_subtest("btf_dump: enum_data"))
+ test_btf_dump_enum_data(btf, d, str);
+ if (test__start_subtest("btf_dump: struct_data"))
+ test_btf_dump_struct_data(btf, d, str);
+ if (test__start_subtest("btf_dump: var_data"))
+ test_btf_dump_var_data(btf, d, str);
+ btf_dump__free(d);
+ btf__free(btf);
+
+ if (test__start_subtest("btf_dump: datasec_data"))
+ test_btf_dump_datasec_data(str);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_module.c b/tools/testing/selftests/bpf/prog_tests/btf_module.c
new file mode 100644
index 000000000000..2239d1fe0332
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_module.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static const char *module_name = "bpf_testmod";
+static const char *symbol_name = "bpf_testmod_test_read";
+
+void test_btf_module()
+{
+ struct btf *vmlinux_btf, *module_btf;
+ __s32 type_id;
+
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
+ return;
+
+ module_btf = btf__load_module_btf(module_name, vmlinux_btf);
+ if (!ASSERT_OK_PTR(module_btf, "could not load module BTF"))
+ goto cleanup;
+
+ type_id = btf__find_by_name(module_btf, symbol_name);
+ ASSERT_GT(type_id, 0, "func not found");
+
+cleanup:
+ btf__free(module_btf);
+ btf__free(vmlinux_btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
index 981c251453d9..3d4b2a358d47 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
@@ -53,8 +53,8 @@ void test_core_autosize(void)
char btf_file[] = "/tmp/core_autosize.btf.XXXXXX";
int err, fd = -1, zero = 0;
int char_id, short_id, int_id, long_long_id, void_ptr_id, id;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
struct test_core_autosize* skel = NULL;
- struct bpf_object_load_attr load_attr = {};
struct bpf_program *prog;
struct bpf_map *bss_map;
struct btf *btf = NULL;
@@ -125,9 +125,10 @@ void test_core_autosize(void)
fd = -1;
/* open and load BPF program with custom BTF as the kernel BTF */
- skel = test_core_autosize__open();
+ open_opts.btf_custom_path = btf_file;
+ skel = test_core_autosize__open_opts(&open_opts);
if (!ASSERT_OK_PTR(skel, "skel_open"))
- return;
+ goto cleanup;
/* disable handle_signed() for now */
prog = bpf_object__find_program_by_name(skel->obj, "handle_signed");
@@ -135,9 +136,7 @@ void test_core_autosize(void)
goto cleanup;
bpf_program__set_autoload(prog, false);
- load_attr.obj = skel->obj;
- load_attr.target_btf_path = btf_file;
- err = bpf_object__load_xattr(&load_attr);
+ err = bpf_object__load(skel->obj);
if (!ASSERT_OK(err, "prog_load"))
goto cleanup;
@@ -204,14 +203,13 @@ void test_core_autosize(void)
skel = NULL;
/* now re-load with handle_signed() enabled, it should fail loading */
- skel = test_core_autosize__open();
+ open_opts.btf_custom_path = btf_file;
+ skel = test_core_autosize__open_opts(&open_opts);
if (!ASSERT_OK_PTR(skel, "skel_open"))
- return;
+ goto cleanup;
- load_attr.obj = skel->obj;
- load_attr.target_btf_path = btf_file;
- err = bpf_object__load_xattr(&load_attr);
- if (!ASSERT_ERR(err, "bad_prog_load"))
+ err = test_core_autosize__load(skel);
+ if (!ASSERT_ERR(err, "skel_load"))
goto cleanup;
cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index d02e064c535f..4739b15b2a97 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -816,7 +816,7 @@ static size_t roundup_page(size_t sz)
void test_core_reloc(void)
{
const size_t mmap_sz = roundup_page(sizeof(struct data));
- struct bpf_object_load_attr load_attr = {};
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
struct core_reloc_test_case *test_case;
const char *tp_name, *probe_name;
int err, i, equal;
@@ -846,9 +846,16 @@ void test_core_reloc(void)
continue;
}
- obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
+ if (test_case->btf_src_file) {
+ err = access(test_case->btf_src_file, R_OK);
+ if (!ASSERT_OK(err, "btf_src_file"))
+ goto cleanup;
+ }
+
+ open_opts.btf_custom_path = test_case->btf_src_file;
+ obj = bpf_object__open_file(test_case->bpf_obj_file, &open_opts);
if (!ASSERT_OK_PTR(obj, "obj_open"))
- continue;
+ goto cleanup;
probe_name = "raw_tracepoint/sys_enter";
tp_name = "sys_enter";
@@ -862,17 +869,7 @@ void test_core_reloc(void)
"prog '%s' not found\n", probe_name))
goto cleanup;
-
- if (test_case->btf_src_file) {
- err = access(test_case->btf_src_file, R_OK);
- if (!ASSERT_OK(err, "btf_src_file"))
- goto cleanup;
- }
-
- load_attr.obj = obj;
- load_attr.log_level = 0;
- load_attr.target_btf_path = test_case->btf_src_file;
- err = bpf_object__load_xattr(&load_attr);
+ err = bpf_object__load(obj);
if (err) {
if (!test_case->fails)
ASSERT_OK(err, "obj_load");
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
new file mode 100644
index 000000000000..02a465f36d59
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "get_func_ip_test.skel.h"
+
+void test_get_func_ip_test(void)
+{
+ struct get_func_ip_test *skel = NULL;
+ __u32 duration = 0, retval;
+ int err, prog_fd;
+
+ skel = get_func_ip_test__open();
+ if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open"))
+ return;
+
+ /* test6 is x86_64 specifc because of the instruction
+ * offset, disabling it for all other archs
+ */
+#ifndef __x86_64__
+ bpf_program__set_autoload(skel->progs.test6, false);
+ bpf_program__set_autoload(skel->progs.test7, false);
+#endif
+
+ err = get_func_ip_test__load(skel);
+ if (!ASSERT_OK(err, "get_func_ip_test__load"))
+ goto cleanup;
+
+ err = get_func_ip_test__attach(skel);
+ if (!ASSERT_OK(err, "get_func_ip_test__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+ NULL, NULL, &retval, &duration);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(retval, 0, "test_run");
+
+ prog_fd = bpf_program__fd(skel->progs.test5);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+ NULL, NULL, &retval, &duration);
+
+ ASSERT_OK(err, "test_run");
+
+ ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
+ ASSERT_EQ(skel->bss->test2_result, 1, "test2_result");
+ ASSERT_EQ(skel->bss->test3_result, 1, "test3_result");
+ ASSERT_EQ(skel->bss->test4_result, 1, "test4_result");
+ ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
+#ifdef __x86_64__
+ ASSERT_EQ(skel->bss->test6_result, 1, "test6_result");
+ ASSERT_EQ(skel->bss->test7_result, 1, "test7_result");
+#endif
+
+cleanup:
+ get_func_ip_test__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index 30a7b9b837bf..9611f2bc50df 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -44,7 +44,7 @@ static void test_subprog(void)
ASSERT_OK(err, "bpf_prog_test_run(test1)");
ASSERT_EQ(retval, 10, "test1-retval");
ASSERT_NEQ(skel->data->active_res, -1, "active_res");
- ASSERT_EQ(skel->data->sk_state, BPF_TCP_CLOSE, "sk_state");
+ ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res");
kfunc_call_test_subprog__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
index 67bebd324147..cf3acfa5a91d 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -6,6 +6,7 @@
#include <bpf/btf.h>
#include "test_ksyms_btf.skel.h"
#include "test_ksyms_btf_null_check.skel.h"
+#include "test_ksyms_weak.skel.h"
static int duration;
@@ -81,6 +82,33 @@ static void test_null_check(void)
test_ksyms_btf_null_check__destroy(skel);
}
+static void test_weak_syms(void)
+{
+ struct test_ksyms_weak *skel;
+ struct test_ksyms_weak__data *data;
+ int err;
+
+ skel = test_ksyms_weak__open_and_load();
+ if (CHECK(!skel, "test_ksyms_weak__open_and_load", "failed\n"))
+ return;
+
+ err = test_ksyms_weak__attach(skel);
+ if (CHECK(err, "test_ksyms_weak__attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ data = skel->data;
+ ASSERT_EQ(data->out__existing_typed, 0, "existing typed ksym");
+ ASSERT_NEQ(data->out__existing_typeless, -1, "existing typeless ksym");
+ ASSERT_EQ(data->out__non_existent_typeless, 0, "nonexistent typeless ksym");
+ ASSERT_EQ(data->out__non_existent_typed, 0, "nonexistent typed ksym");
+
+cleanup:
+ test_ksyms_weak__destroy(skel);
+}
+
void test_ksyms_btf(void)
{
int percpu_datasec;
@@ -105,4 +133,7 @@ void test_ksyms_btf(void)
if (test__start_subtest("null_check"))
test_null_check();
+
+ if (test__start_subtest("weak_ksyms"))
+ test_weak_syms();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c
new file mode 100644
index 000000000000..6ede48bde91b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/sysinfo.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "netcnt_prog.skel.h"
+#include "netcnt_common.h"
+
+#define CG_NAME "/netcnt"
+
+void test_netcnt(void)
+{
+ union percpu_net_cnt *percpu_netcnt = NULL;
+ struct bpf_cgroup_storage_key key;
+ int map_fd, percpu_map_fd;
+ struct netcnt_prog *skel;
+ unsigned long packets;
+ union net_cnt netcnt;
+ unsigned long bytes;
+ int cpu, nproc;
+ int cg_fd = -1;
+ char cmd[128];
+
+ skel = netcnt_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load"))
+ return;
+
+ nproc = get_nprocs_conf();
+ percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
+ if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)"))
+ goto err;
+
+ cg_fd = test__join_cgroup(CG_NAME);
+ if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup"))
+ goto err;
+
+ skel->links.bpf_nextcnt = bpf_program__attach_cgroup(skel->progs.bpf_nextcnt, cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.bpf_nextcnt,
+ "attach_cgroup(bpf_nextcnt)"))
+ goto err;
+
+ snprintf(cmd, sizeof(cmd), "%s ::1 -A -c 10000 -q > /dev/null", ping_command(AF_INET6));
+ ASSERT_OK(system(cmd), cmd);
+
+ map_fd = bpf_map__fd(skel->maps.netcnt);
+ if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, &key), "bpf_map_get_next_key"))
+ goto err;
+
+ if (!ASSERT_OK(bpf_map_lookup_elem(map_fd, &key, &netcnt), "bpf_map_lookup_elem(netcnt)"))
+ goto err;
+
+ percpu_map_fd = bpf_map__fd(skel->maps.percpu_netcnt);
+ if (!ASSERT_OK(bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0]),
+ "bpf_map_lookup_elem(percpu_netcnt)"))
+ goto err;
+
+ /* Some packets can be still in per-cpu cache, but not more than
+ * MAX_PERCPU_PACKETS.
+ */
+ packets = netcnt.packets;
+ bytes = netcnt.bytes;
+ for (cpu = 0; cpu < nproc; cpu++) {
+ ASSERT_LE(percpu_netcnt[cpu].packets, MAX_PERCPU_PACKETS, "MAX_PERCPU_PACKETS");
+
+ packets += percpu_netcnt[cpu].packets;
+ bytes += percpu_netcnt[cpu].bytes;
+ }
+
+ /* No packets should be lost */
+ ASSERT_EQ(packets, 10000, "packets");
+
+ /* Let's check that bytes counter matches the number of packets
+ * multiplied by the size of ipv6 ICMP packet.
+ */
+ ASSERT_EQ(bytes, packets * 104, "bytes");
+
+err:
+ if (cg_fd != -1)
+ close(cg_fd);
+ free(percpu_netcnt);
+ netcnt_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
new file mode 100644
index 000000000000..71d8f3ba7d6b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "netns_cookie_prog.skel.h"
+#include "network_helpers.h"
+
+#ifndef SO_NETNS_COOKIE
+#define SO_NETNS_COOKIE 71
+#endif
+
+static int duration;
+
+void test_netns_cookie(void)
+{
+ int server_fd = -1, client_fd = -1, cgroup_fd = -1;
+ int err, val, ret, map, verdict;
+ struct netns_cookie_prog *skel;
+ uint64_t cookie_expected_value;
+ socklen_t vallen = sizeof(cookie_expected_value);
+ static const char send_msg[] = "message";
+
+ skel = netns_cookie_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ cgroup_fd = test__join_cgroup("/netns_cookie");
+ if (CHECK(cgroup_fd < 0, "join_cgroup", "cgroup creation failed\n"))
+ goto done;
+
+ skel->links.get_netns_cookie_sockops = bpf_program__attach_cgroup(
+ skel->progs.get_netns_cookie_sockops, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach"))
+ goto done;
+
+ verdict = bpf_program__fd(skel->progs.get_netns_cookie_sk_msg);
+ map = bpf_map__fd(skel->maps.sock_map);
+ err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "prog_attach"))
+ goto done;
+
+ server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno))
+ goto done;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (CHECK(client_fd < 0, "connect_to_fd", "errno %d\n", errno))
+ goto done;
+
+ ret = send(client_fd, send_msg, sizeof(send_msg), 0);
+ if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n", ret))
+ goto done;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sockops_netns_cookies),
+ &client_fd, &val);
+ if (!ASSERT_OK(err, "map_lookup(sockops_netns_cookies)"))
+ goto done;
+
+ err = getsockopt(client_fd, SOL_SOCKET, SO_NETNS_COOKIE,
+ &cookie_expected_value, &vallen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto done;
+
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value");
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies),
+ &client_fd, &val);
+ if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)"))
+ goto done;
+
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value");
+
+done:
+ if (server_fd != -1)
+ close(server_fd);
+ if (client_fd != -1)
+ close(client_fd);
+ if (cgroup_fd != -1)
+ close(cgroup_fd);
+ netns_cookie_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c
new file mode 100644
index 000000000000..b1abd0c46607
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <test_progs.h>
+#include "test_perf_link.skel.h"
+
+static void burn_cpu(void)
+{
+ volatile int j = 0;
+ cpu_set_t cpu_set;
+ int i, err;
+
+ /* generate some branches on cpu 0 */
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ ASSERT_OK(err, "set_thread_affinity");
+
+ /* spin the loop for a while (random high number) */
+ for (i = 0; i < 1000000; ++i)
+ ++j;
+}
+
+void test_perf_link(void)
+{
+ struct test_perf_link *skel = NULL;
+ struct perf_event_attr attr;
+ int pfd = -1, link_fd = -1, err;
+ int run_cnt_before, run_cnt_after;
+ struct bpf_link_info info;
+ __u32 info_len = sizeof(info);
+
+ /* create perf event */
+ memset(&attr, 0, sizeof(attr));
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.freq = 1;
+ attr.sample_freq = 4000;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (!ASSERT_GE(pfd, 0, "perf_fd"))
+ goto cleanup;
+
+ skel = test_perf_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ link_fd = bpf_link_create(bpf_program__fd(skel->progs.handler), pfd,
+ BPF_PERF_EVENT, NULL);
+ if (!ASSERT_GE(link_fd, 0, "link_fd"))
+ goto cleanup;
+
+ memset(&info, 0, sizeof(info));
+ err = bpf_obj_get_info_by_fd(link_fd, &info, &info_len);
+ if (!ASSERT_OK(err, "link_get_info"))
+ goto cleanup;
+
+ ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "link_type");
+ ASSERT_GT(info.id, 0, "link_id");
+ ASSERT_GT(info.prog_id, 0, "link_prog_id");
+
+ /* ensure we get at least one perf_event prog execution */
+ burn_cpu();
+ ASSERT_GT(skel->bss->run_cnt, 0, "run_cnt");
+
+ /* perf_event is still active, but we close link and BPF program
+ * shouldn't be executed anymore
+ */
+ close(link_fd);
+ link_fd = -1;
+
+ /* make sure there are no stragglers */
+ kern_sync_rcu();
+
+ run_cnt_before = skel->bss->run_cnt;
+ burn_cpu();
+ run_cnt_after = skel->bss->run_cnt;
+
+ ASSERT_EQ(run_cnt_before, run_cnt_after, "run_cnt_before_after");
+
+cleanup:
+ if (link_fd >= 0)
+ close(link_fd);
+ if (pfd >= 0)
+ close(pfd);
+ test_perf_link__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c
index fcf54b3a1dd0..d4b953ae3407 100644
--- a/tools/testing/selftests/bpf/prog_tests/pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/pinning.c
@@ -125,6 +125,10 @@ void test_pinning(void)
if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
goto out;
+ /* get pinning path */
+ if (!ASSERT_STREQ(bpf_map__pin_path(map), pinpath, "get pin path"))
+ goto out;
+
/* set pinning path of other map and re-pin all */
map = bpf_object__find_map_by_name(obj, "nopinmap");
if (CHECK(!map, "find map", "NULL map"))
@@ -134,6 +138,11 @@ void test_pinning(void)
if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
goto out;
+ /* get pinning path after set */
+ if (!ASSERT_STREQ(bpf_map__pin_path(map), custpinpath,
+ "get pin path after set"))
+ goto out;
+
/* should only pin the one unpinned map */
err = bpf_object__pin_maps(obj, NULL);
if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index de2688166696..4e91f4d6466c 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -34,8 +34,8 @@ void test_reference_tracking(void)
if (!test__start_subtest(title))
continue;
- /* Expect verifier failure if test name has 'fail' */
- if (strstr(title, "fail") != NULL) {
+ /* Expect verifier failure if test name has 'err' */
+ if (strstr(title, "err_") != NULL) {
libbpf_print_fn_t old_print_fn;
old_print_fn = libbpf_set_print(NULL);
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 023cc532992d..776916b61c40 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <sys/time.h>
+#include <sys/resource.h>
#include "test_send_signal_kern.skel.h"
int sigusr1_received = 0;
@@ -10,29 +12,25 @@ static void sigusr1_handler(int signum)
}
static void test_send_signal_common(struct perf_event_attr *attr,
- bool signal_thread,
- const char *test_name)
+ bool signal_thread)
{
struct test_send_signal_kern *skel;
int pipe_c2p[2], pipe_p2c[2];
int err = -1, pmu_fd = -1;
- __u32 duration = 0;
char buf[256];
pid_t pid;
- if (CHECK(pipe(pipe_c2p), test_name,
- "pipe pipe_c2p error: %s\n", strerror(errno)))
+ if (!ASSERT_OK(pipe(pipe_c2p), "pipe_c2p"))
return;
- if (CHECK(pipe(pipe_p2c), test_name,
- "pipe pipe_p2c error: %s\n", strerror(errno))) {
+ if (!ASSERT_OK(pipe(pipe_p2c), "pipe_p2c")) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
return;
}
pid = fork();
- if (CHECK(pid < 0, test_name, "fork error: %s\n", strerror(errno))) {
+ if (!ASSERT_GE(pid, 0, "fork")) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
close(pipe_p2c[0]);
@@ -41,26 +39,40 @@ static void test_send_signal_common(struct perf_event_attr *attr,
}
if (pid == 0) {
+ int old_prio;
+
/* install signal handler and notify parent */
signal(SIGUSR1, sigusr1_handler);
close(pipe_c2p[0]); /* close read */
close(pipe_p2c[1]); /* close write */
+ /* boost with a high priority so we got a higher chance
+ * that if an interrupt happens, the underlying task
+ * is this process.
+ */
+ errno = 0;
+ old_prio = getpriority(PRIO_PROCESS, 0);
+ ASSERT_OK(errno, "getpriority");
+ ASSERT_OK(setpriority(PRIO_PROCESS, 0, -20), "setpriority");
+
/* notify parent signal handler is installed */
- CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
/* make sure parent enabled bpf program to send_signal */
- CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+ ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
/* wait a little for signal handler */
sleep(1);
buf[0] = sigusr1_received ? '2' : '0';
- CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
/* wait for parent notification and exit */
- CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+ ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
+
+ /* restore the old priority */
+ ASSERT_OK(setpriority(PRIO_PROCESS, 0, old_prio), "setpriority");
close(pipe_c2p[1]);
close(pipe_p2c[0]);
@@ -71,20 +83,19 @@ static void test_send_signal_common(struct perf_event_attr *attr,
close(pipe_p2c[0]); /* close read */
skel = test_send_signal_kern__open_and_load();
- if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
goto skel_open_load_failure;
if (!attr) {
err = test_send_signal_kern__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed\n")) {
+ if (!ASSERT_OK(err, "skel_attach")) {
err = -1;
goto destroy_skel;
}
} else {
pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
-1 /* group id */, 0 /* flags */);
- if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n",
- strerror(errno))) {
+ if (!ASSERT_GE(pmu_fd, 0, "perf_event_open")) {
err = -1;
goto destroy_skel;
}
@@ -96,7 +107,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
}
/* wait until child signal handler installed */
- CHECK(read(pipe_c2p[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+ ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read");
/* trigger the bpf send_signal */
skel->bss->pid = pid;
@@ -104,21 +115,21 @@ static void test_send_signal_common(struct perf_event_attr *attr,
skel->bss->signal_thread = signal_thread;
/* notify child that bpf program can send_signal now */
- CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
/* wait for result */
err = read(pipe_c2p[0], buf, 1);
- if (CHECK(err < 0, test_name, "reading pipe error: %s\n", strerror(errno)))
+ if (!ASSERT_GE(err, 0, "reading pipe"))
goto disable_pmu;
- if (CHECK(err == 0, test_name, "reading pipe error: size 0\n")) {
+ if (!ASSERT_GT(err, 0, "reading pipe error: size 0")) {
err = -1;
goto disable_pmu;
}
- CHECK(buf[0] != '2', test_name, "incorrect result\n");
+ ASSERT_EQ(buf[0], '2', "incorrect result");
/* notify child safe to exit */
- CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
disable_pmu:
close(pmu_fd);
@@ -132,7 +143,7 @@ skel_open_load_failure:
static void test_send_signal_tracepoint(bool signal_thread)
{
- test_send_signal_common(NULL, signal_thread, "tracepoint");
+ test_send_signal_common(NULL, signal_thread);
}
static void test_send_signal_perf(bool signal_thread)
@@ -143,7 +154,7 @@ static void test_send_signal_perf(bool signal_thread)
.config = PERF_COUNT_SW_CPU_CLOCK,
};
- test_send_signal_common(&attr, signal_thread, "perf_sw_event");
+ test_send_signal_common(&attr, signal_thread);
}
static void test_send_signal_nmi(bool signal_thread)
@@ -172,7 +183,7 @@ static void test_send_signal_nmi(bool signal_thread)
close(pmu_fd);
}
- test_send_signal_common(&attr, signal_thread, "perf_hw_event");
+ test_send_signal_common(&attr, signal_thread);
}
void test_send_signal(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
index dffbcaa1ec98..8fd1b4b29a0e 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -19,7 +19,7 @@
#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
-#define EXP_STR_OUT "str1 longstr"
+#define EXP_STR_OUT "str1 a b c d e longstr"
#define EXP_STR_RET sizeof(EXP_STR_OUT)
#define EXP_OVER_OUT "%over"
@@ -114,6 +114,8 @@ void test_snprintf_negative(void)
ASSERT_ERR(load_single_snprintf("%"), "invalid specifier 3");
ASSERT_ERR(load_single_snprintf("%12345678"), "invalid specifier 4");
ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5");
+ ASSERT_ERR(load_single_snprintf("%lc"), "invalid specifier 6");
+ ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7");
ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 515229f24a93..5c5979046523 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -351,9 +351,11 @@ static void test_insert_opened(int family, int sotype, int mapfd)
errno = 0;
value = s;
err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
- if (!err || errno != EOPNOTSUPP)
- FAIL_ERRNO("map_update: expected EOPNOTSUPP");
-
+ if (sotype == SOCK_STREAM) {
+ if (!err || errno != EOPNOTSUPP)
+ FAIL_ERRNO("map_update: expected EOPNOTSUPP");
+ } else if (err)
+ FAIL_ERRNO("map_update: expected success");
xclose(s);
}
@@ -919,6 +921,23 @@ static const char *redir_mode_str(enum redir_mode mode)
}
}
+static int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
+{
+ u64 value;
+ u32 key;
+ int err;
+
+ key = 0;
+ value = fd1;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ return err;
+
+ key = 1;
+ value = fd2;
+ return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+}
+
static void redir_to_connected(int family, int sotype, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
@@ -928,9 +947,9 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
unsigned int pass;
socklen_t len;
int err, n;
- u64 value;
u32 key;
char b;
+ int retries = 100;
zero_verdict_count(verd_mapfd);
@@ -965,15 +984,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (p1 < 0)
goto close_cli1;
- key = 0;
- value = p0;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
- if (err)
- goto close_peer1;
-
- key = 1;
- value = p1;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
goto close_peer1;
@@ -991,10 +1002,15 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
goto close_peer1;
if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
+again:
n = read(c0, &b, 1);
- if (n < 0)
+ if (n < 0) {
+ if (errno == EAGAIN && retries--) {
+ usleep(1000);
+ goto again;
+ }
FAIL_ERRNO("%s: read", log_prefix);
+ }
if (n == 0)
FAIL("%s: incomplete read", log_prefix);
@@ -1061,7 +1077,6 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
int s, c, p, err, n;
unsigned int drop;
socklen_t len;
- u64 value;
u32 key;
zero_verdict_count(verd_mapfd);
@@ -1086,15 +1101,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
if (p < 0)
goto close_cli;
- key = 0;
- value = s;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
- if (err)
- goto close_peer;
-
- key = 1;
- value = p;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_mapfd, s, p);
if (err)
goto close_peer;
@@ -1346,7 +1353,6 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
int s1, s2, c, err;
unsigned int drop;
socklen_t len;
- u64 value;
u32 key;
zero_verdict_count(verd_map);
@@ -1360,16 +1366,10 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
if (s2 < 0)
goto close_srv1;
- key = 0;
- value = s1;
- err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_map, s1, s2);
if (err)
goto close_srv2;
- key = 1;
- value = s2;
- err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
-
/* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
len = sizeof(addr);
err = xgetsockname(s2, sockaddr(&addr), &len);
@@ -1441,6 +1441,8 @@ static const char *family_str(sa_family_t family)
return "IPv4";
case AF_INET6:
return "IPv6";
+ case AF_UNIX:
+ return "Unix";
default:
return "unknown";
}
@@ -1563,6 +1565,101 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
}
}
+static void unix_redir_to_connected(int sotype, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ int retries = 100;
+ int err, n;
+ int sfd[2];
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
+ return;
+ c0 = sfd[0], p0 = sfd[1];
+
+ if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
+ goto close0;
+ c1 = sfd[0], p1 = sfd[1];
+
+ err = add_to_sockmap(sock_mapfd, p0, p1);
+ if (err)
+ goto close;
+
+ n = write(c1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto close;
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ goto close;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+again:
+ n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+ if (n < 0) {
+ if (errno == EAGAIN && retries--) {
+ usleep(1000);
+ goto again;
+ }
+ FAIL_ERRNO("%s: read", log_prefix);
+ }
+ if (n == 0)
+ FAIL("%s: incomplete read", log_prefix);
+
+close:
+ xclose(c1);
+ xclose(p1);
+close0:
+ xclose(c0);
+ xclose(p0);
+}
+
+static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int sotype)
+{
+ const char *family_name, *map_name;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(AF_UNIX);
+ map_name = map_type_str(map);
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+ if (!test__start_subtest(s))
+ return;
+ unix_skb_redir_to_connected(skel, map, sotype);
+}
+
static void test_reuseport(struct test_sockmap_listen *skel,
struct bpf_map *map, int family, int sotype)
{
@@ -1603,33 +1700,27 @@ static void test_reuseport(struct test_sockmap_listen *skel,
}
}
-static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
- int verd_mapfd, enum redir_mode mode)
+static int inet_socketpair(int family, int type, int *s, int *c)
{
- const char *log_prefix = redir_mode_str(mode);
struct sockaddr_storage addr;
- int c0, c1, p0, p1;
- unsigned int pass;
- int retries = 100;
socklen_t len;
- int err, n;
- u64 value;
- u32 key;
- char b;
-
- zero_verdict_count(verd_mapfd);
+ int p0, c0;
+ int err;
- p0 = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ p0 = socket_loopback(family, type | SOCK_NONBLOCK);
if (p0 < 0)
- return;
+ return p0;
+
len = sizeof(addr);
err = xgetsockname(p0, sockaddr(&addr), &len);
if (err)
goto close_peer0;
- c0 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
- if (c0 < 0)
+ c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
+ if (c0 < 0) {
+ err = c0;
goto close_peer0;
+ }
err = xconnect(c0, sockaddr(&addr), len);
if (err)
goto close_cli0;
@@ -1640,35 +1731,133 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
if (err)
goto close_cli0;
- p1 = socket_loopback(family, sotype | SOCK_NONBLOCK);
- if (p1 < 0)
- goto close_cli0;
- err = xgetsockname(p1, sockaddr(&addr), &len);
+ *s = p0;
+ *c = c0;
+ return 0;
+
+close_cli0:
+ xclose(c0);
+close_peer0:
+ xclose(p0);
+ return err;
+}
+
+static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
+ enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ int retries = 100;
+ int err, n;
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
+ if (err)
+ return;
+ err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
if (err)
goto close_cli0;
- c1 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
- if (c1 < 0)
- goto close_peer1;
- err = xconnect(c1, sockaddr(&addr), len);
+ err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
goto close_cli1;
- err = xgetsockname(c1, sockaddr(&addr), &len);
- if (err)
+
+ n = write(c1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
goto close_cli1;
- err = xconnect(p1, sockaddr(&addr), len);
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
if (err)
goto close_cli1;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
- key = 0;
- value = p0;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+again:
+ n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+ if (n < 0) {
+ if (errno == EAGAIN && retries--) {
+ usleep(1000);
+ goto again;
+ }
+ FAIL_ERRNO("%s: read", log_prefix);
+ }
+ if (n == 0)
+ FAIL("%s: incomplete read", log_prefix);
+
+close_cli1:
+ xclose(c1);
+ xclose(p1);
+close_cli0:
+ xclose(c0);
+ xclose(p0);
+}
+
+static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
if (err)
- goto close_cli1;
+ return;
- key = 1;
- value = p1;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ skel->bss->test_ingress = false;
+ udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family)
+{
+ const char *family_name, *map_name;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(family);
+ map_name = map_type_str(map);
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+ if (!test__start_subtest(s))
+ return;
+ udp_skb_redir_to_connected(skel, map, family);
+}
+
+static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ int retries = 100;
+ int err, n;
+ int sfd[2];
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
+ return;
+ c0 = sfd[0], p0 = sfd[1];
+
+ err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
+ if (err)
+ goto close;
+
+ err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
goto close_cli1;
@@ -1690,8 +1879,10 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
again:
n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
if (n < 0) {
- if (errno == EAGAIN && retries--)
+ if (errno == EAGAIN && retries--) {
+ usleep(1000);
goto again;
+ }
FAIL_ERRNO("%s: read", log_prefix);
}
if (n == 0)
@@ -1699,16 +1890,102 @@ again:
close_cli1:
xclose(c1);
-close_peer1:
+ xclose(p1);
+close:
+ xclose(c0);
+ xclose(p0);
+}
+
+static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_EGRESS);
+ inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_INGRESS);
+ inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ int err, n;
+ int sfd[2];
+ u32 key;
+ char b;
+ int retries = 100;
+
+ zero_verdict_count(verd_mapfd);
+
+ err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
+ if (err)
+ return;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
+ goto close_cli0;
+ c1 = sfd[0], p1 = sfd[1];
+
+ err = add_to_sockmap(sock_mapfd, p0, p1);
+ if (err)
+ goto close;
+
+ n = write(c1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto close;
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ goto close;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+again:
+ n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+ if (n < 0) {
+ if (errno == EAGAIN && retries--) {
+ usleep(1000);
+ goto again;
+ }
+ FAIL_ERRNO("%s: read", log_prefix);
+ }
+ if (n == 0)
+ FAIL("%s: incomplete read", log_prefix);
+
+close:
+ xclose(c1);
xclose(p1);
close_cli0:
xclose(c0);
-close_peer0:
xclose(p0);
+
}
-static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int family)
+static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
{
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
@@ -1720,17 +1997,21 @@ static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
return;
skel->bss->test_ingress = false;
- udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
- REDIR_EGRESS);
+ unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_EGRESS);
+ unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_EGRESS);
skel->bss->test_ingress = true;
- udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
- REDIR_INGRESS);
+ unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_INGRESS);
+ unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_INGRESS);
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
}
-static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
- int family)
+static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family)
{
const char *family_name, *map_name;
char s[MAX_TEST_NAME];
@@ -1740,7 +2021,8 @@ static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map
snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
if (!test__start_subtest(s))
return;
- udp_skb_redir_to_connected(skel, map, family);
+ inet_unix_skb_redir_to_connected(skel, map, family);
+ unix_inet_skb_redir_to_connected(skel, map, family);
}
static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
@@ -1752,6 +2034,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
test_reuseport(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_DGRAM);
test_udp_redir(skel, map, family);
+ test_udp_unix_redir(skel, map, family);
}
void test_sockmap_listen(void)
@@ -1767,10 +2050,14 @@ void test_sockmap_listen(void)
skel->bss->test_sockmap = true;
run_tests(skel, skel->maps.sock_map, AF_INET);
run_tests(skel, skel->maps.sock_map, AF_INET6);
+ test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
+ test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
skel->bss->test_sockmap = false;
run_tests(skel, skel->maps.sock_hash, AF_INET);
run_tests(skel, skel->maps.sock_hash, AF_INET6);
+ test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
+ test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
test_sockmap_listen__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index ec281b0363b8..86f97681ad89 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -195,8 +195,10 @@ static void run_test(int cgroup_fd)
pthread_mutex_lock(&server_started_mtx);
if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
- (void *)&server_fd)))
+ (void *)&server_fd))) {
+ pthread_mutex_unlock(&server_started_mtx);
goto close_server_fd;
+ }
pthread_cond_wait(&server_started, &server_started_mtx);
pthread_mutex_unlock(&server_started_mtx);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
new file mode 100644
index 000000000000..6b53b3cb8dad
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <netinet/tcp.h>
+#include "sockopt_qos_to_cc.skel.h"
+
+static void run_setsockopt_test(int cg_fd, int sock_fd)
+{
+ socklen_t optlen;
+ char cc[16]; /* TCP_CA_NAME_MAX */
+ int buf;
+ int err = -1;
+
+ buf = 0x2D;
+ err = setsockopt(sock_fd, SOL_IPV6, IPV6_TCLASS, &buf, sizeof(buf));
+ if (!ASSERT_OK(err, "setsockopt(sock_fd, IPV6_TCLASS)"))
+ return;
+
+ /* Verify the setsockopt cc change */
+ optlen = sizeof(cc);
+ err = getsockopt(sock_fd, SOL_TCP, TCP_CONGESTION, cc, &optlen);
+ if (!ASSERT_OK(err, "getsockopt(sock_fd, TCP_CONGESTION)"))
+ return;
+
+ if (!ASSERT_STREQ(cc, "reno", "getsockopt(sock_fd, TCP_CONGESTION)"))
+ return;
+}
+
+void test_sockopt_qos_to_cc(void)
+{
+ struct sockopt_qos_to_cc *skel;
+ char cc_cubic[16] = "cubic"; /* TCP_CA_NAME_MAX */
+ int cg_fd = -1;
+ int sock_fd = -1;
+ int err;
+
+ cg_fd = test__join_cgroup("/sockopt_qos_to_cc");
+ if (!ASSERT_GE(cg_fd, 0, "cg-join(sockopt_qos_to_cc)"))
+ return;
+
+ skel = sockopt_qos_to_cc__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto done;
+
+ sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd, 0, "v6 socket open"))
+ goto done;
+
+ err = setsockopt(sock_fd, SOL_TCP, TCP_CONGESTION, &cc_cubic,
+ sizeof(cc_cubic));
+ if (!ASSERT_OK(err, "setsockopt(sock_fd, TCP_CONGESTION)"))
+ goto done;
+
+ skel->links.sockopt_qos_to_cc =
+ bpf_program__attach_cgroup(skel->progs.sockopt_qos_to_cc,
+ cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.sockopt_qos_to_cc,
+ "prog_attach(sockopt_qos_to_cc)"))
+ goto done;
+
+ run_setsockopt_test(cg_fd, sock_fd);
+
+done:
+ if (sock_fd != -1)
+ close(sock_fd);
+ if (cg_fd != -1)
+ close(cg_fd);
+ /* destroy can take null and error pointer */
+ sockopt_qos_to_cc__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
new file mode 100644
index 000000000000..53f0e0fa1a53
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <linux/ptrace.h>
+#include "test_task_pt_regs.skel.h"
+
+void test_task_pt_regs(void)
+{
+ struct test_task_pt_regs *skel;
+ struct bpf_link *uprobe_link;
+ size_t uprobe_offset;
+ ssize_t base_addr;
+ bool match;
+
+ base_addr = get_base_addr();
+ if (!ASSERT_GT(base_addr, 0, "get_base_addr"))
+ return;
+ uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+
+ skel = test_task_pt_regs__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+ if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
+ goto cleanup;
+
+ uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
+ false /* retprobe */,
+ 0 /* self pid */,
+ "/proc/self/exe",
+ uprobe_offset);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
+ goto cleanup;
+ skel->links.handle_uprobe = uprobe_link;
+
+ /* trigger & validate uprobe */
+ get_base_addr();
+
+ if (!ASSERT_EQ(skel->bss->uprobe_res, 1, "check_uprobe_res"))
+ goto cleanup;
+
+ match = !memcmp(&skel->bss->current_regs, &skel->bss->ctx_regs,
+ sizeof(skel->bss->current_regs));
+ ASSERT_TRUE(match, "check_regs_match");
+
+cleanup:
+ test_task_pt_regs__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 5703c918812b..e7201ba29ccd 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -13,15 +13,16 @@
#define _GNU_SOURCE
#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
#include <linux/limits.h>
#include <linux/sysctl.h>
-#include <linux/if_tun.h>
-#include <linux/if.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
-#include <sys/stat.h>
#include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
#include "test_progs.h"
#include "network_helpers.h"
@@ -391,9 +392,7 @@ done:
static int test_ping(int family, const char *addr)
{
- const char *ping = family == AF_INET6 ? "ping6" : "ping";
-
- SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
+ SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
return 0;
fail:
return -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
new file mode 100644
index 000000000000..25f40e1b9967
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "timer.skel.h"
+
+static int timer(struct timer *timer_skel)
+{
+ int err, prog_fd;
+ __u32 duration = 0, retval;
+
+ err = timer__attach(timer_skel);
+ if (!ASSERT_OK(err, "timer_attach"))
+ return err;
+
+ ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1");
+ ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1");
+
+ prog_fd = bpf_program__fd(timer_skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+ NULL, NULL, &retval, &duration);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(retval, 0, "test_run");
+ timer__detach(timer_skel);
+
+ usleep(50); /* 10 usecs should be enough, but give it extra */
+ /* check that timer_cb1() was executed 10+10 times */
+ ASSERT_EQ(timer_skel->data->callback_check, 42, "callback_check2");
+ ASSERT_EQ(timer_skel->data->callback2_check, 42, "callback2_check2");
+
+ /* check that timer_cb2() was executed twice */
+ ASSERT_EQ(timer_skel->bss->bss_data, 10, "bss_data");
+
+ /* check that there were no errors in timer execution */
+ ASSERT_EQ(timer_skel->bss->err, 0, "err");
+
+ /* check that code paths completed */
+ ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok");
+
+ return 0;
+}
+
+void test_timer(void)
+{
+ struct timer *timer_skel = NULL;
+ int err;
+
+ timer_skel = timer__open_and_load();
+ if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
+ goto cleanup;
+
+ err = timer(timer_skel);
+ ASSERT_OK(err, "timer");
+cleanup:
+ timer__destroy(timer_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
new file mode 100644
index 000000000000..ced8f6cf347c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "timer_mim.skel.h"
+#include "timer_mim_reject.skel.h"
+
+static int timer_mim(struct timer_mim *timer_skel)
+{
+ __u32 duration = 0, retval;
+ __u64 cnt1, cnt2;
+ int err, prog_fd, key1 = 1;
+
+ err = timer_mim__attach(timer_skel);
+ if (!ASSERT_OK(err, "timer_attach"))
+ return err;
+
+ prog_fd = bpf_program__fd(timer_skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+ NULL, NULL, &retval, &duration);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(retval, 0, "test_run");
+ timer_mim__detach(timer_skel);
+
+ /* check that timer_cb[12] are incrementing 'cnt' */
+ cnt1 = READ_ONCE(timer_skel->bss->cnt);
+ for (int i = 0; i < 100; i++) {
+ cnt2 = READ_ONCE(timer_skel->bss->cnt);
+ if (cnt2 != cnt1)
+ break;
+ usleep(200); /* 100 times more than interval */
+ }
+ ASSERT_GT(cnt2, cnt1, "cnt");
+
+ ASSERT_EQ(timer_skel->bss->err, 0, "err");
+ /* check that code paths completed */
+ ASSERT_EQ(timer_skel->bss->ok, 1 | 2, "ok");
+
+ close(bpf_map__fd(timer_skel->maps.inner_htab));
+ err = bpf_map_delete_elem(bpf_map__fd(timer_skel->maps.outer_arr), &key1);
+ ASSERT_EQ(err, 0, "delete inner map");
+
+ /* check that timer_cb[12] are no longer running */
+ cnt1 = READ_ONCE(timer_skel->bss->cnt);
+ for (int i = 0; i < 100; i++) {
+ usleep(200); /* 100 times more than interval */
+ cnt2 = READ_ONCE(timer_skel->bss->cnt);
+ if (cnt2 == cnt1)
+ break;
+ }
+ ASSERT_EQ(cnt2, cnt1, "cnt");
+
+ return 0;
+}
+
+void test_timer_mim(void)
+{
+ struct timer_mim_reject *timer_reject_skel = NULL;
+ libbpf_print_fn_t old_print_fn = NULL;
+ struct timer_mim *timer_skel = NULL;
+ int err;
+
+ old_print_fn = libbpf_set_print(NULL);
+ timer_reject_skel = timer_mim_reject__open_and_load();
+ libbpf_set_print(old_print_fn);
+ if (!ASSERT_ERR_PTR(timer_reject_skel, "timer_reject_skel_load"))
+ goto cleanup;
+
+ timer_skel = timer_mim__open_and_load();
+ if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
+ goto cleanup;
+
+ err = timer_mim(timer_skel);
+ ASSERT_OK(err, "timer_mim");
+cleanup:
+ timer_mim__destroy(timer_skel);
+ timer_mim_reject__destroy(timer_reject_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
new file mode 100644
index 000000000000..370d220288a6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/**
+ * Test XDP bonding support
+ *
+ * Sets up two bonded veth pairs between two fresh namespaces
+ * and verifies that XDP_TX program loaded on a bond device
+ * are correctly loaded onto the slave devices and XDP_TX'd
+ * packets are balanced using bonding.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <net/if.h>
+#include <linux/if_link.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include <linux/if_bonding.h>
+#include <linux/limits.h>
+#include <linux/udp.h>
+
+#include "xdp_dummy.skel.h"
+#include "xdp_redirect_multi_kern.skel.h"
+#include "xdp_tx.skel.h"
+
+#define BOND1_MAC {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}
+#define BOND1_MAC_STR "00:11:22:33:44:55"
+#define BOND2_MAC {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}
+#define BOND2_MAC_STR "00:22:33:44:55:66"
+#define NPACKETS 100
+
+static int root_netns_fd = -1;
+
+static void restore_root_netns(void)
+{
+ ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "restore_root_netns");
+}
+
+static int setns_by_name(char *name)
+{
+ int nsfd, err;
+ char nspath[PATH_MAX];
+
+ snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+ nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+ if (nsfd < 0)
+ return -1;
+
+ err = setns(nsfd, CLONE_NEWNET);
+ close(nsfd);
+ return err;
+}
+
+static int get_rx_packets(const char *iface)
+{
+ FILE *f;
+ char line[512];
+ int iface_len = strlen(iface);
+
+ f = fopen("/proc/net/dev", "r");
+ if (!f)
+ return -1;
+
+ while (fgets(line, sizeof(line), f)) {
+ char *p = line;
+
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ if (!strncmp(p, iface, iface_len)) {
+ p += iface_len;
+ if (*p++ != ':')
+ continue;
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ while (*p && *p != ' ')
+ p++; /* skip rx bytes */
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ fclose(f);
+ return atoi(p);
+ }
+ }
+ fclose(f);
+ return -1;
+}
+
+#define MAX_BPF_LINKS 8
+
+struct skeletons {
+ struct xdp_dummy *xdp_dummy;
+ struct xdp_tx *xdp_tx;
+ struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+
+ int nlinks;
+ struct bpf_link *links[MAX_BPF_LINKS];
+};
+
+static int xdp_attach(struct skeletons *skeletons, struct bpf_program *prog, char *iface)
+{
+ struct bpf_link *link;
+ int ifindex;
+
+ ifindex = if_nametoindex(iface);
+ if (!ASSERT_GT(ifindex, 0, "get ifindex"))
+ return -1;
+
+ if (!ASSERT_LE(skeletons->nlinks+1, MAX_BPF_LINKS, "too many XDP programs attached"))
+ return -1;
+
+ link = bpf_program__attach_xdp(prog, ifindex);
+ if (!ASSERT_OK_PTR(link, "attach xdp program"))
+ return -1;
+
+ skeletons->links[skeletons->nlinks++] = link;
+ return 0;
+}
+
+enum {
+ BOND_ONE_NO_ATTACH = 0,
+ BOND_BOTH_AND_ATTACH,
+};
+
+static const char * const mode_names[] = {
+ [BOND_MODE_ROUNDROBIN] = "balance-rr",
+ [BOND_MODE_ACTIVEBACKUP] = "active-backup",
+ [BOND_MODE_XOR] = "balance-xor",
+ [BOND_MODE_BROADCAST] = "broadcast",
+ [BOND_MODE_8023AD] = "802.3ad",
+ [BOND_MODE_TLB] = "balance-tlb",
+ [BOND_MODE_ALB] = "balance-alb",
+};
+
+static const char * const xmit_policy_names[] = {
+ [BOND_XMIT_POLICY_LAYER2] = "layer2",
+ [BOND_XMIT_POLICY_LAYER34] = "layer3+4",
+ [BOND_XMIT_POLICY_LAYER23] = "layer2+3",
+ [BOND_XMIT_POLICY_ENCAP23] = "encap2+3",
+ [BOND_XMIT_POLICY_ENCAP34] = "encap3+4",
+};
+
+static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy,
+ int bond_both_attach)
+{
+#define SYS(fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_OK(system(cmd), cmd)) \
+ return -1; \
+ })
+
+ SYS("ip netns add ns_dst");
+ SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst");
+ SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst");
+
+ SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s",
+ mode_names[mode], xmit_policy_names[xmit_policy]);
+ SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none");
+ SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s",
+ mode_names[mode], xmit_policy_names[xmit_policy]);
+ SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none");
+
+ SYS("ip link set veth1_1 master bond1");
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+ SYS("ip link set veth1_2 master bond1");
+ } else {
+ SYS("ip link set veth1_2 up addrgenmode none");
+
+ if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2"))
+ return -1;
+ }
+
+ SYS("ip -netns ns_dst link set veth2_1 master bond2");
+
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH)
+ SYS("ip -netns ns_dst link set veth2_2 master bond2");
+ else
+ SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none");
+
+ /* Load a dummy program on sending side as with veth peer needs to have a
+ * XDP program loaded as well.
+ */
+ if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "bond1"))
+ return -1;
+
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+ if (!ASSERT_OK(setns_by_name("ns_dst"), "set netns to ns_dst"))
+ return -1;
+
+ if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond2"))
+ return -1;
+
+ restore_root_netns();
+ }
+
+ return 0;
+
+#undef SYS
+}
+
+static void bonding_cleanup(struct skeletons *skeletons)
+{
+ restore_root_netns();
+ while (skeletons->nlinks) {
+ skeletons->nlinks--;
+ bpf_link__destroy(skeletons->links[skeletons->nlinks]);
+ }
+ ASSERT_OK(system("ip link delete bond1"), "delete bond1");
+ ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1");
+ ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2");
+ ASSERT_OK(system("ip netns delete ns_dst"), "delete ns_dst");
+}
+
+static int send_udp_packets(int vary_dst_ip)
+{
+ struct ethhdr eh = {
+ .h_source = BOND1_MAC,
+ .h_dest = BOND2_MAC,
+ .h_proto = htons(ETH_P_IP),
+ };
+ uint8_t buf[128] = {};
+ struct iphdr *iph = (struct iphdr *)(buf + sizeof(eh));
+ struct udphdr *uh = (struct udphdr *)(buf + sizeof(eh) + sizeof(*iph));
+ int i, s = -1;
+ int ifindex;
+
+ s = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
+ if (!ASSERT_GE(s, 0, "socket"))
+ goto err;
+
+ ifindex = if_nametoindex("bond1");
+ if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex"))
+ goto err;
+
+ memcpy(buf, &eh, sizeof(eh));
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->tos = 16;
+ iph->id = 1;
+ iph->ttl = 64;
+ iph->protocol = IPPROTO_UDP;
+ iph->saddr = 1;
+ iph->daddr = 2;
+ iph->tot_len = htons(sizeof(buf) - ETH_HLEN);
+ iph->check = 0;
+
+ for (i = 1; i <= NPACKETS; i++) {
+ int n;
+ struct sockaddr_ll saddr_ll = {
+ .sll_ifindex = ifindex,
+ .sll_halen = ETH_ALEN,
+ .sll_addr = BOND2_MAC,
+ };
+
+ /* vary the UDP destination port for even distribution with roundrobin/xor modes */
+ uh->dest++;
+
+ if (vary_dst_ip)
+ iph->daddr++;
+
+ n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll));
+ if (!ASSERT_EQ(n, sizeof(buf), "sendto"))
+ goto err;
+ }
+
+ return 0;
+
+err:
+ if (s >= 0)
+ close(s);
+ return -1;
+}
+
+static void test_xdp_bonding_with_mode(struct skeletons *skeletons, int mode, int xmit_policy)
+{
+ int bond1_rx;
+
+ if (bonding_setup(skeletons, mode, xmit_policy, BOND_BOTH_AND_ATTACH))
+ goto out;
+
+ if (send_udp_packets(xmit_policy != BOND_XMIT_POLICY_LAYER34))
+ goto out;
+
+ bond1_rx = get_rx_packets("bond1");
+ ASSERT_EQ(bond1_rx, NPACKETS, "expected more received packets");
+
+ switch (mode) {
+ case BOND_MODE_ROUNDROBIN:
+ case BOND_MODE_XOR: {
+ int veth1_rx = get_rx_packets("veth1_1");
+ int veth2_rx = get_rx_packets("veth1_2");
+ int diff = abs(veth1_rx - veth2_rx);
+
+ ASSERT_GE(veth1_rx + veth2_rx, NPACKETS, "expected more packets");
+
+ switch (xmit_policy) {
+ case BOND_XMIT_POLICY_LAYER2:
+ ASSERT_GE(diff, NPACKETS,
+ "expected packets on only one of the interfaces");
+ break;
+ case BOND_XMIT_POLICY_LAYER23:
+ case BOND_XMIT_POLICY_LAYER34:
+ ASSERT_LT(diff, NPACKETS/2,
+ "expected even distribution of packets");
+ break;
+ default:
+ PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+ break;
+ }
+ break;
+ }
+ case BOND_MODE_ACTIVEBACKUP: {
+ int veth1_rx = get_rx_packets("veth1_1");
+ int veth2_rx = get_rx_packets("veth1_2");
+ int diff = abs(veth1_rx - veth2_rx);
+
+ ASSERT_GE(diff, NPACKETS,
+ "expected packets on only one of the interfaces");
+ break;
+ }
+ default:
+ PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+ break;
+ }
+
+out:
+ bonding_cleanup(skeletons);
+}
+
+/* Test the broadcast redirection using xdp_redirect_map_multi_prog and adding
+ * all the interfaces to it and checking that broadcasting won't send the packet
+ * to neither the ingress bond device (bond2) or its slave (veth2_1).
+ */
+static void test_xdp_bonding_redirect_multi(struct skeletons *skeletons)
+{
+ static const char * const ifaces[] = {"bond2", "veth2_1", "veth2_2"};
+ int veth1_1_rx, veth1_2_rx;
+ int err;
+
+ if (bonding_setup(skeletons, BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23,
+ BOND_ONE_NO_ATTACH))
+ goto out;
+
+
+ if (!ASSERT_OK(setns_by_name("ns_dst"), "could not set netns to ns_dst"))
+ goto out;
+
+ /* populate the devmap with the relevant interfaces */
+ for (int i = 0; i < ARRAY_SIZE(ifaces); i++) {
+ int ifindex = if_nametoindex(ifaces[i]);
+ int map_fd = bpf_map__fd(skeletons->xdp_redirect_multi_kern->maps.map_all);
+
+ if (!ASSERT_GT(ifindex, 0, "could not get interface index"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &ifindex, &ifindex, 0);
+ if (!ASSERT_OK(err, "add interface to map_all"))
+ goto out;
+ }
+
+ if (xdp_attach(skeletons,
+ skeletons->xdp_redirect_multi_kern->progs.xdp_redirect_map_multi_prog,
+ "bond2"))
+ goto out;
+
+ restore_root_netns();
+
+ if (send_udp_packets(BOND_MODE_ROUNDROBIN))
+ goto out;
+
+ veth1_1_rx = get_rx_packets("veth1_1");
+ veth1_2_rx = get_rx_packets("veth1_2");
+
+ ASSERT_EQ(veth1_1_rx, 0, "expected no packets on veth1_1");
+ ASSERT_GE(veth1_2_rx, NPACKETS, "expected packets on veth1_2");
+
+out:
+ restore_root_netns();
+ bonding_cleanup(skeletons);
+}
+
+/* Test that XDP programs cannot be attached to both the bond master and slaves simultaneously */
+static void test_xdp_bonding_attach(struct skeletons *skeletons)
+{
+ struct bpf_link *link = NULL;
+ struct bpf_link *link2 = NULL;
+ int veth, bond;
+ int err;
+
+ if (!ASSERT_OK(system("ip link add veth type veth"), "add veth"))
+ goto out;
+ if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+ goto out;
+
+ veth = if_nametoindex("veth");
+ if (!ASSERT_GE(veth, 0, "if_nametoindex veth"))
+ goto out;
+ bond = if_nametoindex("bond");
+ if (!ASSERT_GE(bond, 0, "if_nametoindex bond"))
+ goto out;
+
+ /* enslaving with a XDP program loaded fails */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+ if (!ASSERT_OK_PTR(link, "attach program to veth"))
+ goto out;
+
+ err = system("ip link set veth master bond");
+ if (!ASSERT_NEQ(err, 0, "attaching slave with xdp program expected to fail"))
+ goto out;
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ err = system("ip link set veth master bond");
+ if (!ASSERT_OK(err, "set veth master"))
+ goto out;
+
+ /* attaching to slave when master has no program is allowed */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+ if (!ASSERT_OK_PTR(link, "attach program to slave when enslaved"))
+ goto out;
+
+ /* attaching to master not allowed when slave has program loaded */
+ link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ if (!ASSERT_ERR_PTR(link2, "attach program to master when slave has program"))
+ goto out;
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ /* attaching XDP program to master allowed when slave has no program */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ if (!ASSERT_OK_PTR(link, "attach program to master"))
+ goto out;
+
+ /* attaching to slave not allowed when master has program loaded */
+ link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ ASSERT_ERR_PTR(link2, "attach program to slave when master has program");
+
+out:
+ bpf_link__destroy(link);
+ bpf_link__destroy(link2);
+
+ system("ip link del veth");
+ system("ip link del bond");
+}
+
+static int libbpf_debug_print(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ if (level != LIBBPF_WARN)
+ vprintf(format, args);
+ return 0;
+}
+
+struct bond_test_case {
+ char *name;
+ int mode;
+ int xmit_policy;
+};
+
+static struct bond_test_case bond_test_cases[] = {
+ { "xdp_bonding_roundrobin", BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, },
+ { "xdp_bonding_activebackup", BOND_MODE_ACTIVEBACKUP, BOND_XMIT_POLICY_LAYER23 },
+
+ { "xdp_bonding_xor_layer2", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER2, },
+ { "xdp_bonding_xor_layer23", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER23, },
+ { "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, },
+};
+
+void test_xdp_bonding(void)
+{
+ libbpf_print_fn_t old_print_fn;
+ struct skeletons skeletons = {};
+ int i;
+
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+ root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+ goto out;
+
+ skeletons.xdp_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load"))
+ goto out;
+
+ skeletons.xdp_tx = xdp_tx__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load"))
+ goto out;
+
+ skeletons.xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_redirect_multi_kern,
+ "xdp_redirect_multi_kern__open_and_load"))
+ goto out;
+
+ if (test__start_subtest("xdp_bonding_attach"))
+ test_xdp_bonding_attach(&skeletons);
+
+ for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) {
+ struct bond_test_case *test_case = &bond_test_cases[i];
+
+ if (test__start_subtest(test_case->name))
+ test_xdp_bonding_with_mode(
+ &skeletons,
+ test_case->mode,
+ test_case->xmit_policy);
+ }
+
+ if (test__start_subtest("xdp_bonding_redirect_multi"))
+ test_xdp_bonding_redirect_multi(&skeletons);
+
+out:
+ xdp_dummy__destroy(skeletons.xdp_dummy);
+ xdp_tx__destroy(skeletons.xdp_tx);
+ xdp_redirect_multi_kern__destroy(skeletons.xdp_redirect_multi_kern);
+
+ libbpf_set_print(old_print_fn);
+ if (root_netns_fd >= 0)
+ close(root_netns_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
new file mode 100644
index 000000000000..ab4952b9fb1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_xdp_context_test_run.skel.h"
+
+void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
+ __u32 data_meta, __u32 data, __u32 data_end,
+ __u32 ingress_ifindex, __u32 rx_queue_index,
+ __u32 egress_ifindex)
+{
+ struct xdp_md ctx = {
+ .data = data,
+ .data_end = data_end,
+ .data_meta = data_meta,
+ .ingress_ifindex = ingress_ifindex,
+ .rx_queue_index = rx_queue_index,
+ .egress_ifindex = egress_ifindex,
+ };
+ int err;
+
+ opts.ctx_in = &ctx;
+ opts.ctx_size_in = sizeof(ctx);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_EQ(errno, EINVAL, "errno-EINVAL");
+ ASSERT_ERR(err, "bpf_prog_test_run");
+}
+
+void test_xdp_context_test_run(void)
+{
+ struct test_xdp_context_test_run *skel = NULL;
+ char data[sizeof(pkt_v4) + sizeof(__u32)];
+ char bad_ctx[sizeof(struct xdp_md) + 1];
+ struct xdp_md ctx_in, ctx_out;
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data,
+ .data_size_in = sizeof(data),
+ .ctx_out = &ctx_out,
+ .ctx_size_out = sizeof(ctx_out),
+ .repeat = 1,
+ );
+ int err, prog_fd;
+
+ skel = test_xdp_context_test_run__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+ prog_fd = bpf_program__fd(skel->progs.xdp_context);
+
+ /* Data past the end of the kernel's struct xdp_md must be 0 */
+ bad_ctx[sizeof(bad_ctx) - 1] = 1;
+ opts.ctx_in = bad_ctx;
+ opts.ctx_size_in = sizeof(bad_ctx);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_EQ(errno, E2BIG, "extradata-errno");
+ ASSERT_ERR(err, "bpf_prog_test_run(extradata)");
+
+ *(__u32 *)data = XDP_PASS;
+ *(struct ipv4_packet *)(data + sizeof(__u32)) = pkt_v4;
+ opts.ctx_in = &ctx_in;
+ opts.ctx_size_in = sizeof(ctx_in);
+ memset(&ctx_in, 0, sizeof(ctx_in));
+ ctx_in.data_meta = 0;
+ ctx_in.data = sizeof(__u32);
+ ctx_in.data_end = ctx_in.data + sizeof(pkt_v4);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(err, "bpf_prog_test_run(valid)");
+ ASSERT_EQ(opts.retval, XDP_PASS, "valid-retval");
+ ASSERT_EQ(opts.data_size_out, sizeof(pkt_v4), "valid-datasize");
+ ASSERT_EQ(opts.ctx_size_out, opts.ctx_size_in, "valid-ctxsize");
+ ASSERT_EQ(ctx_out.data_meta, 0, "valid-datameta");
+ ASSERT_EQ(ctx_out.data, 0, "valid-data");
+ ASSERT_EQ(ctx_out.data_end, sizeof(pkt_v4), "valid-dataend");
+
+ /* Meta data's size must be a multiple of 4 */
+ test_xdp_context_error(prog_fd, opts, 0, 1, sizeof(data), 0, 0, 0);
+
+ /* data_meta must reference the start of data */
+ test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data),
+ 0, 0, 0);
+
+ /* Meta data must be 32 bytes or smaller */
+ test_xdp_context_error(prog_fd, opts, 0, 36, sizeof(data), 0, 0, 0);
+
+ /* Total size of data must match data_end - data_meta */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
+ sizeof(data) - 1, 0, 0, 0);
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
+ sizeof(data) + 1, 0, 0, 0);
+
+ /* RX queue cannot be specified without specifying an ingress */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+ 0, 1, 0);
+
+ /* Interface 1 is always the loopback interface which always has only
+ * one RX queue (index 0). This makes index 1 an invalid rx queue index
+ * for interface 1.
+ */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+ 1, 1, 0);
+
+ /* The egress cannot be specified */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+ 0, 0, 1);
+
+ test_xdp_context_test_run__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index 0176573fe4e7..8755effd80b0 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -7,64 +7,53 @@
#define IFINDEX_LO 1
-void test_xdp_with_cpumap_helpers(void)
+void test_xdp_cpumap_attach(void)
{
struct test_xdp_with_cpumap_helpers *skel;
struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
struct bpf_cpumap_val val = {
.qsize = 192,
};
- __u32 duration = 0, idx = 0;
- __u32 len = sizeof(info);
int err, prog_fd, map_fd;
+ __u32 idx = 0;
skel = test_xdp_with_cpumap_helpers__open_and_load();
- if (CHECK_FAIL(!skel)) {
- perror("test_xdp_with_cpumap_helpers__open_and_load");
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
return;
- }
- /* can not attach program with cpumaps that allow programs
- * as xdp generic
- */
prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP",
- "should have failed\n");
+ if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP"))
+ goto out_close;
+
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+ ASSERT_OK(err, "XDP program detach");
prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
map_fd = bpf_map__fd(skel->maps.cpu_map);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
goto out_close;
val.bpf_prog.fd = prog_fd;
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err, "Add program to cpumap entry", "err %d errno %d\n",
- err, errno);
+ ASSERT_OK(err, "Add program to cpumap entry");
err = bpf_map_lookup_elem(map_fd, &idx, &val);
- CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno);
- CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry",
- "expected %u read %u\n", info.id, val.bpf_prog.id);
+ ASSERT_OK(err, "Read cpumap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
/* can not attach BPF_XDP_CPUMAP program to a device */
err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program",
- "should have failed\n");
+ if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program"))
+ bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
val.qsize = 192;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry",
- "should have failed\n");
+ ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
out_close:
test_xdp_with_cpumap_helpers__destroy(skel);
}
-
-void test_xdp_cpumap_attach(void)
-{
- if (test__start_subtest("cpumap_with_progs"))
- test_xdp_with_cpumap_helpers();
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 88ef3ec8ac4c..c72af030ff10 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -16,50 +16,45 @@ void test_xdp_with_devmap_helpers(void)
.ifindex = IFINDEX_LO,
};
__u32 len = sizeof(info);
- __u32 duration = 0, idx = 0;
int err, dm_fd, map_fd;
+ __u32 idx = 0;
skel = test_xdp_with_devmap_helpers__open_and_load();
- if (CHECK_FAIL(!skel)) {
- perror("test_xdp_with_devmap_helpers__open_and_load");
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
return;
- }
- /* can not attach program with DEVMAPs that allow programs
- * as xdp generic
- */
dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Generic attach of program with 8-byte devmap",
- "should have failed\n");
+ if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap"))
+ goto out_close;
+
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+ ASSERT_OK(err, "XDP program detach");
dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
map_fd = bpf_map__fd(skel->maps.dm_ports);
err = bpf_obj_get_info_by_fd(dm_fd, &info, &len);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
goto out_close;
val.bpf_prog.fd = dm_fd;
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err, "Add program to devmap entry",
- "err %d errno %d\n", err, errno);
+ ASSERT_OK(err, "Add program to devmap entry");
err = bpf_map_lookup_elem(map_fd, &idx, &val);
- CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno);
- CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry",
- "expected %u read %u\n", info.id, val.bpf_prog.id);
+ ASSERT_OK(err, "Read devmap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id");
/* can not attach BPF_XDP_DEVMAP program to a device */
err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program",
- "should have failed\n");
+ if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program"))
+ bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
val.ifindex = 1;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry",
- "should have failed\n");
+ ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry");
out_close:
test_xdp_with_devmap_helpers__destroy(skel);
@@ -68,12 +63,10 @@ out_close:
void test_neg_xdp_devmap_helpers(void)
{
struct test_xdp_devmap_helpers *skel;
- __u32 duration = 0;
skel = test_xdp_devmap_helpers__open_and_load();
- if (CHECK(skel,
- "Load of XDP program accessing egress ifindex without attach type",
- "should have failed\n")) {
+ if (!ASSERT_EQ(skel, NULL,
+ "Load of XDP program accessing egress ifindex without attach type")) {
test_xdp_devmap_helpers__destroy(skel);
}
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index fd42247da8b4..9573be6122be 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -17,6 +17,11 @@
char _license[] SEC("license") = "GPL";
+volatile const char fallback[TCP_CA_NAME_MAX];
+const char bpf_dctcp[] = "bpf_dctcp";
+const char tcp_cdg[] = "cdg";
+char cc_res[TCP_CA_NAME_MAX];
+int tcp_cdg_res = 0;
int stg_result = 0;
struct {
@@ -57,6 +62,26 @@ void BPF_PROG(dctcp_init, struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
int *stg;
+ if (!(tp->ecn_flags & TCP_ECN_OK) && fallback[0]) {
+ /* Switch to fallback */
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)fallback, sizeof(fallback));
+ /* Switch back to myself which the bpf trampoline
+ * stopped calling dctcp_init recursively.
+ */
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)bpf_dctcp, sizeof(bpf_dctcp));
+ /* Switch back to fallback */
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)fallback, sizeof(fallback));
+ /* Expecting -ENOTSUPP for tcp_cdg_res */
+ tcp_cdg_res = bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)tcp_cdg, sizeof(tcp_cdg));
+ bpf_getsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)cc_res, sizeof(cc_res));
+ return;
+ }
+
ca->prior_rcv_nxt = tp->rcv_nxt;
ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
ca->loss_cwnd = 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
new file mode 100644
index 000000000000..d836f7c372f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+const char cubic[] = "cubic";
+
+void BPF_STRUCT_OPS(dctcp_nouse_release, struct sock *sk)
+{
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)cubic, sizeof(cubic));
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops dctcp_rel = {
+ .release = (void *)dctcp_nouse_release,
+ .name = "bpf_dctcp_rel",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index 3d83b185c4bc..8cfaeba1ddbf 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -12,6 +12,7 @@
#define tcp6_sock tcp6_sock___not_used
#define bpf_iter__udp bpf_iter__udp___not_used
#define udp6_sock udp6_sock___not_used
+#define bpf_iter__unix bpf_iter__unix___not_used
#define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
#define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
#define bpf_iter__sockmap bpf_iter__sockmap___not_used
@@ -32,6 +33,7 @@
#undef tcp6_sock
#undef bpf_iter__udp
#undef udp6_sock
+#undef bpf_iter__unix
#undef bpf_iter__bpf_map_elem
#undef bpf_iter__bpf_sk_storage_map
#undef bpf_iter__sockmap
@@ -103,6 +105,12 @@ struct udp6_sock {
struct ipv6_pinfo inet6;
} __attribute__((preserve_access_index));
+struct bpf_iter__unix {
+ struct bpf_iter_meta *meta;
+ struct unix_sock *unix_sk;
+ uid_t uid;
+} __attribute__((preserve_access_index));
+
struct bpf_iter__bpf_map_elem {
struct bpf_iter_meta *meta;
struct bpf_map *map;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
new file mode 100644
index 000000000000..b77adfd55d73
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define bpf_tcp_sk(skc) ({ \
+ struct sock_common *_skc = skc; \
+ sk = NULL; \
+ tp = NULL; \
+ if (_skc) { \
+ tp = bpf_skc_to_tcp_sock(_skc); \
+ sk = (struct sock *)tp; \
+ } \
+ tp; \
+})
+
+unsigned short reuse_listen_hport = 0;
+unsigned short listen_hport = 0;
+char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic";
+char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp";
+bool random_retry = false;
+
+static bool tcp_cc_eq(const char *a, const char *b)
+{
+ int i;
+
+ for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+ if (a[i] != b[i])
+ return false;
+ if (!a[i])
+ break;
+ }
+
+ return true;
+}
+
+SEC("iter/tcp")
+int change_tcp_cc(struct bpf_iter__tcp *ctx)
+{
+ char cur_cc[TCP_CA_NAME_MAX];
+ struct tcp_sock *tp;
+ struct sock *sk;
+ int ret;
+
+ if (!bpf_tcp_sk(ctx->sk_common))
+ return 0;
+
+ if (sk->sk_family != AF_INET6 ||
+ (sk->sk_state != TCP_LISTEN &&
+ sk->sk_state != TCP_ESTABLISHED) ||
+ (sk->sk_num != reuse_listen_hport &&
+ sk->sk_num != listen_hport &&
+ bpf_ntohs(sk->sk_dport) != listen_hport))
+ return 0;
+
+ if (bpf_getsockopt(tp, SOL_TCP, TCP_CONGESTION,
+ cur_cc, sizeof(cur_cc)))
+ return 0;
+
+ if (!tcp_cc_eq(cur_cc, cubic_cc))
+ return 0;
+
+ if (random_retry && bpf_get_prandom_u32() % 4 == 1)
+ return 1;
+
+ bpf_setsockopt(tp, SOL_TCP, TCP_CONGESTION, dctcp_cc, sizeof(dctcp_cc));
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
index 2e4775c35414..92267abb462f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -121,7 +121,7 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
}
BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
- seq_num, src, srcp, destp, destp);
+ seq_num, src, srcp, dest, destp);
BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
state,
tp->write_seq - tp->snd_una, rx_queue,
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
new file mode 100644
index 000000000000..94423902685d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+SEC("iter/unix")
+int dump_unix(struct bpf_iter__unix *ctx)
+{
+ struct unix_sock *unix_sk = ctx->unix_sk;
+ struct sock *sk = (struct sock *)unix_sk;
+ struct seq_file *seq;
+ __u32 seq_num;
+
+ if (!unix_sk)
+ return 0;
+
+ seq = ctx->meta->seq;
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "Num RefCount Protocol Flags Type St Inode Path\n");
+
+ BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %8lu",
+ unix_sk,
+ sk->sk_refcnt.refs.counter,
+ 0,
+ sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
+ sk->sk_type,
+ sk->sk_socket ?
+ (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
+ (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
+ sock_i_ino(sk));
+
+ if (unix_sk->addr) {
+ if (!UNIX_ABSTRACT(unix_sk)) {
+ BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
+ } else {
+ /* The name of the abstract UNIX domain socket starts
+ * with '\0' and can contain '\0'. The null bytes
+ * should be escaped as done in unix_seq_show().
+ */
+ __u64 i, len;
+
+ len = unix_sk->addr->len - sizeof(short);
+
+ BPF_SEQ_PRINTF(seq, " @");
+
+ for (i = 1; i < len; i++) {
+ /* unix_mkname() tests this upper bound. */
+ if (i >= sizeof(struct sockaddr_un))
+ break;
+
+ BPF_SEQ_PRINTF(seq, "%c",
+ unix_sk->addr->name->sun_path[i] ?:
+ '@');
+ }
+ }
+ }
+
+ BPF_SEQ_PRINTF(seq, "\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 01378911252b..eef5646ddb19 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -5,6 +5,14 @@
#define AF_INET 2
#define AF_INET6 10
+#define __SO_ACCEPTCON (1 << 16)
+#define UNIX_HASH_SIZE 256
+#define UNIX_ABSTRACT(unix_sk) (unix_sk->addr->hash < UNIX_HASH_SIZE)
+
+#define SOL_TCP 6
+#define TCP_CONGESTION 13
+#define TCP_CA_NAME_MAX 16
+
#define ICSK_TIME_RETRANS 1
#define ICSK_TIME_PROBE0 3
#define ICSK_TIME_LOSS_PROBE 5
@@ -32,6 +40,8 @@
#define ir_v6_rmt_addr req.__req_common.skc_v6_daddr
#define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr
+#define sk_num __sk_common.skc_num
+#define sk_dport __sk_common.skc_dport
#define sk_family __sk_common.skc_family
#define sk_rmem_alloc sk_backlog.rmem_alloc
#define sk_refcnt __sk_common.skc_refcnt
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
new file mode 100644
index 000000000000..a587aeca5ae0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+extern const void bpf_fentry_test1 __ksym;
+extern const void bpf_fentry_test2 __ksym;
+extern const void bpf_fentry_test3 __ksym;
+extern const void bpf_fentry_test4 __ksym;
+extern const void bpf_modify_return_test __ksym;
+extern const void bpf_fentry_test6 __ksym;
+extern const void bpf_fentry_test7 __ksym;
+
+__u64 test1_result = 0;
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test1_result = (const void *) addr == &bpf_fentry_test1;
+ return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fexit/bpf_fentry_test2")
+int BPF_PROG(test2, int a)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test2_result = (const void *) addr == &bpf_fentry_test2;
+ return 0;
+}
+
+__u64 test3_result = 0;
+SEC("kprobe/bpf_fentry_test3")
+int test3(struct pt_regs *ctx)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test3_result = (const void *) addr == &bpf_fentry_test3;
+ return 0;
+}
+
+__u64 test4_result = 0;
+SEC("kretprobe/bpf_fentry_test4")
+int BPF_KRETPROBE(test4)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test4_result = (const void *) addr == &bpf_fentry_test4;
+ return 0;
+}
+
+__u64 test5_result = 0;
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(test5, int a, int *b, int ret)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test5_result = (const void *) addr == &bpf_modify_return_test;
+ return ret;
+}
+
+__u64 test6_result = 0;
+SEC("kprobe/bpf_fentry_test6+0x5")
+int test6(struct pt_regs *ctx)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test6_result = (const void *) addr == &bpf_fentry_test6 + 5;
+ return 0;
+}
+
+__u64 test7_result = 0;
+SEC("kprobe/bpf_fentry_test7+5")
+int test7(struct pt_regs *ctx)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test7_result = (const void *) addr == &bpf_fentry_test7 + 5;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
index b2dcb7d9cb03..5fbd9e232d44 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
@@ -9,7 +9,7 @@ extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
__u32 c, __u64 d) __ksym;
extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym;
int active_res = -1;
-int sk_state = -1;
+int sk_state_res = -1;
int __noinline f1(struct __sk_buff *skb)
{
@@ -28,7 +28,7 @@ int __noinline f1(struct __sk_buff *skb)
if (active)
active_res = *active;
- sk_state = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;
+ sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->sk_state;
return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
}
diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c
index d071adf178bd..43649bce4c54 100644
--- a/tools/testing/selftests/bpf/progs/netcnt_prog.c
+++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c
@@ -13,21 +13,21 @@
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
__type(key, struct bpf_cgroup_storage_key);
- __type(value, struct percpu_net_cnt);
+ __type(value, union percpu_net_cnt);
} percpu_netcnt SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
__type(key, struct bpf_cgroup_storage_key);
- __type(value, struct net_cnt);
+ __type(value, union net_cnt);
} netcnt SEC(".maps");
SEC("cgroup/skb")
int bpf_nextcnt(struct __sk_buff *skb)
{
- struct percpu_net_cnt *percpu_cnt;
+ union percpu_net_cnt *percpu_cnt;
char fmt[] = "%d %llu %llu\n";
- struct net_cnt *cnt;
+ union net_cnt *cnt;
__u64 ts, dt;
int ret;
diff --git a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
new file mode 100644
index 000000000000..aeff3a4f9287
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+#define AF_INET6 10
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sockops_netns_cookies SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_msg_netns_cookies SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sockops")
+int get_netns_cookie_sockops(struct bpf_sock_ops *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ int *cookie;
+ __u32 key = 0;
+
+ if (ctx->family != AF_INET6)
+ return 1;
+
+ if (!sk)
+ return 1;
+
+ switch (ctx->op) {
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ cookie = bpf_sk_storage_get(&sockops_netns_cookies, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!cookie)
+ return 1;
+
+ *cookie = bpf_get_netns_cookie(ctx);
+ break;
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ bpf_sock_map_update(ctx, &sock_map, &key, BPF_NOEXIST);
+ break;
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+SEC("sk_msg")
+int get_netns_cookie_sk_msg(struct sk_msg_md *msg)
+{
+ struct bpf_sock *sk = msg->sk;
+ int *cookie;
+
+ if (msg->family != AF_INET6)
+ return 1;
+
+ if (!sk)
+ return 1;
+
+ cookie = bpf_sk_storage_get(&sk_msg_netns_cookies, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!cookie)
+ return 1;
+
+ *cookie = bpf_get_netns_cookie(msg);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
new file mode 100644
index 000000000000..1bce83b6e3a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <string.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("cgroup/setsockopt")
+int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
+{
+ void *optval_end = ctx->optval_end;
+ int *optval = ctx->optval;
+ char buf[TCP_CA_NAME_MAX];
+ char cc_reno[TCP_CA_NAME_MAX] = "reno";
+ char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
+
+ if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS)
+ return 1;
+
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ if (bpf_getsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
+ return 0;
+
+ if (!tcp_cc_eq(buf, cc_cubic))
+ return 0;
+
+ if (*optval == 0x2d) {
+ if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &cc_reno,
+ sizeof(cc_reno)))
+ return 0;
+ }
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index 8acdb99b5959..79c8139b63b8 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -33,6 +33,14 @@ int _getsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
struct sockopt_sk *storage;
+ /* Make sure bpf_get_netns_cookie is callable.
+ */
+ if (bpf_get_netns_cookie(NULL) == 0)
+ return 0;
+
+ if (bpf_get_netns_cookie(ctx) == 0)
+ return 0;
+
if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS;
* let next BPF program in the cgroup chain or kernel
@@ -123,6 +131,14 @@ int _setsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
struct sockopt_sk *storage;
+ /* Make sure bpf_get_netns_cookie is callable.
+ */
+ if (bpf_get_netns_cookie(NULL) == 0)
+ return 0;
+
+ if (bpf_get_netns_cookie(ctx) == 0)
+ return 0;
+
if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS;
* let next BPF program in the cgroup chain or kernel
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
new file mode 100644
index 000000000000..2d3a7710e2ce
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int my_tid;
+
+int kprobe_res;
+int kprobe_multi_res;
+int kretprobe_res;
+int uprobe_res;
+int uretprobe_res;
+int tp_res;
+int pe_res;
+
+static void update(void *ctx, int *res)
+{
+ if (my_tid != (u32)bpf_get_current_pid_tgid())
+ return;
+
+ *res |= bpf_get_attach_cookie(ctx);
+}
+
+SEC("kprobe/sys_nanosleep")
+int handle_kprobe(struct pt_regs *ctx)
+{
+ update(ctx, &kprobe_res);
+ return 0;
+}
+
+SEC("kretprobe/sys_nanosleep")
+int handle_kretprobe(struct pt_regs *ctx)
+{
+ update(ctx, &kretprobe_res);
+ return 0;
+}
+
+SEC("uprobe/trigger_func")
+int handle_uprobe(struct pt_regs *ctx)
+{
+ update(ctx, &uprobe_res);
+ return 0;
+}
+
+SEC("uretprobe/trigger_func")
+int handle_uretprobe(struct pt_regs *ctx)
+{
+ update(ctx, &uretprobe_res);
+ return 0;
+}
+
+/* bpf_prog_array, used by kernel internally to keep track of attached BPF
+ * programs to a given BPF hook (e.g., for tracepoints) doesn't allow the same
+ * BPF program to be attached multiple times. So have three identical copies
+ * ready to attach to the same tracepoint.
+ */
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp1(struct pt_regs *ctx)
+{
+ update(ctx, &tp_res);
+ return 0;
+}
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp2(struct pt_regs *ctx)
+{
+ update(ctx, &tp_res);
+ return 0;
+}
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp3(void *ctx)
+{
+ update(ctx, &tp_res);
+ return 1;
+}
+
+SEC("perf_event")
+int handle_pe(struct pt_regs *ctx)
+{
+ update(ctx, &pe_res);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_core_autosize.c b/tools/testing/selftests/bpf/progs/test_core_autosize.c
index 44f5aa2e8956..9a7829c5e4a7 100644
--- a/tools/testing/selftests/bpf/progs/test_core_autosize.c
+++ b/tools/testing/selftests/bpf/progs/test_core_autosize.c
@@ -125,6 +125,16 @@ int handle_downsize(void *ctx)
return 0;
}
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define bpf_core_read_int bpf_core_read
+#else
+#define bpf_core_read_int(dst, sz, src) ({ \
+ /* Prevent "subtraction from stack pointer prohibited" */ \
+ volatile long __off = sizeof(*dst) - (sz); \
+ bpf_core_read((char *)(dst) + __off, sz, src); \
+})
+#endif
+
SEC("raw_tp/sys_enter")
int handle_probed(void *ctx)
{
@@ -132,23 +142,23 @@ int handle_probed(void *ctx)
__u64 tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
ptr_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val1), &in->val1);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val1), &in->val1);
val1_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val2), &in->val2);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val2), &in->val2);
val2_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val3), &in->val3);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val3), &in->val3);
val3_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val4), &in->val4);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val4), &in->val4);
val4_probed = tmp;
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
new file mode 100644
index 000000000000..5f8379aadb29
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test weak ksyms.
+ *
+ * Copyright (c) 2021 Google
+ */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+int out__existing_typed = -1;
+__u64 out__existing_typeless = -1;
+
+__u64 out__non_existent_typeless = -1;
+__u64 out__non_existent_typed = -1;
+
+/* existing weak symbols */
+
+/* test existing weak symbols can be resolved. */
+extern const struct rq runqueues __ksym __weak; /* typed */
+extern const void bpf_prog_active __ksym __weak; /* typeless */
+
+
+/* non-existent weak symbols. */
+
+/* typeless symbols, default to zero. */
+extern const void bpf_link_fops1 __ksym __weak;
+
+/* typed symbols, default to zero. */
+extern const int bpf_link_fops2 __ksym __weak;
+
+SEC("raw_tp/sys_enter")
+int pass_handler(const void *ctx)
+{
+ struct rq *rq;
+
+ /* tests existing symbols. */
+ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
+ if (rq)
+ out__existing_typed = rq->cpu;
+ out__existing_typeless = (__u64)&bpf_prog_active;
+
+ /* tests non-existent symbols. */
+ out__non_existent_typeless = (__u64)&bpf_link_fops1;
+
+ /* tests non-existent symbols. */
+ out__non_existent_typed = (__u64)&bpf_link_fops2;
+
+ if (&bpf_link_fops2) /* can't happen */
+ out__non_existent_typed = (__u64)bpf_per_cpu_ptr(&bpf_link_fops2, 0);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c
new file mode 100644
index 000000000000..703c08e06442
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Isovalent, Inc. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, int);
+ __uint(max_entries, 4);
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 0); /* This will make map creation to fail */
+ __uint(key_size, sizeof(__u32));
+ __array(values, struct inner);
+} mim SEC(".maps");
+
+SEC("xdp")
+int xdp_noop0(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_link.c b/tools/testing/selftests/bpf/progs/test_perf_link.c
new file mode 100644
index 000000000000..c1db9fd98d0c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_perf_link.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int run_cnt = 0;
+
+SEC("perf_event")
+int handler(struct pt_regs *ctx)
+{
+ __sync_fetch_and_add(&run_cnt, 1);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index e83d0b48d80c..8249075f088f 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -91,7 +91,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_use_after_free")
+SEC("classifier/err_use_after_free")
int bpf_sk_lookup_uaf(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -106,7 +106,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb)
return family;
}
-SEC("classifier/fail_modify_sk_pointer")
+SEC("classifier/err_modify_sk_pointer")
int bpf_sk_lookup_modptr(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -121,7 +121,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_modify_sk_or_null_pointer")
+SEC("classifier/err_modify_sk_or_null_pointer")
int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -135,7 +135,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_no_release")
+SEC("classifier/err_no_release")
int bpf_sk_lookup_test2(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -144,7 +144,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_release_twice")
+SEC("classifier/err_release_twice")
int bpf_sk_lookup_test3(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -156,7 +156,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_release_unchecked")
+SEC("classifier/err_release_unchecked")
int bpf_sk_lookup_test4(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -173,7 +173,7 @@ void lookup_no_release(struct __sk_buff *skb)
bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
}
-SEC("classifier/fail_no_release_subcall")
+SEC("classifier/err_no_release_subcall")
int bpf_sk_lookup_test5(struct __sk_buff *skb)
{
lookup_no_release(skb);
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c
index e2ad26150f9b..8fda07544023 100644
--- a/tools/testing/selftests/bpf/progs/test_snprintf.c
+++ b/tools/testing/selftests/bpf/progs/test_snprintf.c
@@ -59,9 +59,9 @@ int handler(const void *ctx)
/* Kernel pointers */
addr_ret = BPF_SNPRINTF(addr_out, sizeof(addr_out), "%pK %px %p",
0, 0xFFFF00000ADD4E55, 0xFFFF00000ADD4E55);
- /* Strings embedding */
- str_ret = BPF_SNPRINTF(str_out, sizeof(str_out), "%s %+05s",
- str1, longstr);
+ /* Strings and single-byte character embedding */
+ str_ret = BPF_SNPRINTF(str_out, sizeof(str_out), "%s % 9c %+2c %-3c %04c %0c %+05s",
+ str1, 'a', 'b', 'c', 'd', 'e', longstr);
/* Overflow */
over_ret = BPF_SNPRINTF(over_out, sizeof(over_out), "%%overflow");
/* Padding of fixed width numbers */
diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
new file mode 100644
index 000000000000..6c059f1cfa1b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct pt_regs current_regs = {};
+struct pt_regs ctx_regs = {};
+int uprobe_res = 0;
+
+SEC("uprobe/trigger_func")
+int handle_uprobe(struct pt_regs *ctx)
+{
+ struct task_struct *current;
+ struct pt_regs *regs;
+
+ current = bpf_get_current_task_btf();
+ regs = (struct pt_regs *) bpf_task_pt_regs(current);
+ __builtin_memcpy(&current_regs, regs, sizeof(*regs));
+ __builtin_memcpy(&ctx_regs, ctx, sizeof(*ctx));
+
+ /* Prove that uprobe was run */
+ uprobe_res = 1;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 84cd63259554..a0e7762b1e5a 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -528,7 +528,6 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb)
static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
{
- char buf[sizeof(struct v6hdr)];
struct gre_hdr greh;
struct udphdr udph;
int olen = len;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c
new file mode 100644
index 000000000000..d7b88cd05afd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp")
+int xdp_context(struct xdp_md *xdp)
+{
+ void *data = (void *)(long)xdp->data;
+ __u32 *metadata = (void *)(long)xdp->data_meta;
+ __u32 ret;
+
+ if (metadata + 1 > data)
+ return XDP_ABORTED;
+ ret = *metadata;
+ if (bpf_xdp_adjust_meta(xdp, 4))
+ return XDP_ABORTED;
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
new file mode 100644
index 000000000000..5f5309791649
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+ int counter;
+ struct bpf_timer timer;
+ struct bpf_spin_lock lock; /* unused */
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap_malloc SEC(".maps");
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 4);
+ __type(key, int);
+ __type(value, struct elem);
+} lru SEC(".maps");
+
+__u64 bss_data;
+__u64 err;
+__u64 ok;
+__u64 callback_check = 52;
+__u64 callback2_check = 52;
+
+#define ARRAY 1
+#define HTAB 2
+#define HTAB_MALLOC 3
+#define LRU 4
+
+/* callback for array and lru timers */
+static int timer_cb1(void *map, int *key, struct bpf_timer *timer)
+{
+ /* increment bss variable twice.
+ * Once via array timer callback and once via lru timer callback
+ */
+ bss_data += 5;
+
+ /* *key == 0 - the callback was called for array timer.
+ * *key == 4 - the callback was called from lru timer.
+ */
+ if (*key == ARRAY) {
+ struct bpf_timer *lru_timer;
+ int lru_key = LRU;
+
+ /* rearm array timer to be called again in ~35 seconds */
+ if (bpf_timer_start(timer, 1ull << 35, 0) != 0)
+ err |= 1;
+
+ lru_timer = bpf_map_lookup_elem(&lru, &lru_key);
+ if (!lru_timer)
+ return 0;
+ bpf_timer_set_callback(lru_timer, timer_cb1);
+ if (bpf_timer_start(lru_timer, 0, 0) != 0)
+ err |= 2;
+ } else if (*key == LRU) {
+ int lru_key, i;
+
+ for (i = LRU + 1;
+ i <= 100 /* for current LRU eviction algorithm this number
+ * should be larger than ~ lru->max_entries * 2
+ */;
+ i++) {
+ struct elem init = {};
+
+ /* lru_key cannot be used as loop induction variable
+ * otherwise the loop will be unbounded.
+ */
+ lru_key = i;
+
+ /* add more elements into lru map to push out current
+ * element and force deletion of this timer
+ */
+ bpf_map_update_elem(map, &lru_key, &init, 0);
+ /* look it up to bump it into active list */
+ bpf_map_lookup_elem(map, &lru_key);
+
+ /* keep adding until *key changes underneath,
+ * which means that key/timer memory was reused
+ */
+ if (*key != LRU)
+ break;
+ }
+
+ /* check that the timer was removed */
+ if (bpf_timer_cancel(timer) != -EINVAL)
+ err |= 4;
+ ok |= 1;
+ }
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ struct bpf_timer *arr_timer, *lru_timer;
+ struct elem init = {};
+ int lru_key = LRU;
+ int array_key = ARRAY;
+
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
+
+ bpf_map_update_elem(&lru, &lru_key, &init, 0);
+ lru_timer = bpf_map_lookup_elem(&lru, &lru_key);
+ if (!lru_timer)
+ return 0;
+ bpf_timer_init(lru_timer, &lru, CLOCK_MONOTONIC);
+
+ bpf_timer_set_callback(arr_timer, timer_cb1);
+ bpf_timer_start(arr_timer, 0 /* call timer_cb1 asap */, 0);
+
+ /* init more timers to check that array destruction
+ * doesn't leak timer memory.
+ */
+ array_key = 0;
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
+ return 0;
+}
+
+/* callback for prealloc and non-prealloca hashtab timers */
+static int timer_cb2(void *map, int *key, struct hmap_elem *val)
+{
+ if (*key == HTAB)
+ callback_check--;
+ else
+ callback2_check--;
+ if (val->counter > 0 && --val->counter) {
+ /* re-arm the timer again to execute after 1 usec */
+ bpf_timer_start(&val->timer, 1000, 0);
+ } else if (*key == HTAB) {
+ struct bpf_timer *arr_timer;
+ int array_key = ARRAY;
+
+ /* cancel arr_timer otherwise bpf_fentry_test1 prog
+ * will stay alive forever.
+ */
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ if (bpf_timer_cancel(arr_timer) != 1)
+ /* bpf_timer_cancel should return 1 to indicate
+ * that arr_timer was active at this time
+ */
+ err |= 8;
+
+ /* try to cancel ourself. It shouldn't deadlock. */
+ if (bpf_timer_cancel(&val->timer) != -EDEADLK)
+ err |= 16;
+
+ /* delete this key and this timer anyway.
+ * It shouldn't deadlock either.
+ */
+ bpf_map_delete_elem(map, key);
+
+ /* in preallocated hashmap both 'key' and 'val' could have been
+ * reused to store another map element (like in LRU above),
+ * but in controlled test environment the below test works.
+ * It's not a use-after-free. The memory is owned by the map.
+ */
+ if (bpf_timer_start(&val->timer, 1000, 0) != -EINVAL)
+ err |= 32;
+ ok |= 2;
+ } else {
+ if (*key != HTAB_MALLOC)
+ err |= 64;
+
+ /* try to cancel ourself. It shouldn't deadlock. */
+ if (bpf_timer_cancel(&val->timer) != -EDEADLK)
+ err |= 128;
+
+ /* delete this key and this timer anyway.
+ * It shouldn't deadlock either.
+ */
+ bpf_map_delete_elem(map, key);
+
+ /* in non-preallocated hashmap both 'key' and 'val' are RCU
+ * protected and still valid though this element was deleted
+ * from the map. Arm this timer for ~35 seconds. When callback
+ * finishes the call_rcu will invoke:
+ * htab_elem_free_rcu
+ * check_and_free_timer
+ * bpf_timer_cancel_and_free
+ * to cancel this 35 second sleep and delete the timer for real.
+ */
+ if (bpf_timer_start(&val->timer, 1ull << 35, 0) != 0)
+ err |= 256;
+ ok |= 4;
+ }
+ return 0;
+}
+
+int bpf_timer_test(void)
+{
+ struct hmap_elem *val;
+ int key = HTAB, key_malloc = HTAB_MALLOC;
+
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val) {
+ if (bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME) != 0)
+ err |= 512;
+ bpf_timer_set_callback(&val->timer, timer_cb2);
+ bpf_timer_start(&val->timer, 1000, 0);
+ }
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val) {
+ if (bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME) != 0)
+ err |= 1024;
+ bpf_timer_set_callback(&val->timer, timer_cb2);
+ bpf_timer_start(&val->timer, 1000, 0);
+ }
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test2")
+int BPF_PROG(test2, int a, int b)
+{
+ struct hmap_elem init = {}, *val;
+ int key = HTAB, key_malloc = HTAB_MALLOC;
+
+ init.counter = 10; /* number of times to trigger timer_cb2 */
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+ /* update the same key to free the timer */
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+ /* update the same key to free the timer */
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+
+ /* init more timers to check that htab operations
+ * don't leak timer memory.
+ */
+ key = 0;
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+ bpf_map_delete_elem(&hmap, &key);
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+
+ /* and with non-prealloc htab */
+ key_malloc = 0;
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+ bpf_map_delete_elem(&hmap_malloc, &key_malloc);
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+
+ return bpf_timer_test();
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_mim.c b/tools/testing/selftests/bpf/progs/timer_mim.c
new file mode 100644
index 000000000000..2fee7ab105ef
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_mim.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+ int pad; /* unused */
+ struct bpf_timer timer;
+};
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1024);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} inner_htab SEC(".maps");
+
+#define ARRAY_KEY 1
+#define HASH_KEY 1234
+
+struct outer_arr {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __array(values, struct inner_map);
+} outer_arr SEC(".maps") = {
+ .values = { [ARRAY_KEY] = &inner_htab },
+};
+
+__u64 err;
+__u64 ok;
+__u64 cnt;
+
+static int timer_cb1(void *map, int *key, struct hmap_elem *val);
+
+static int timer_cb2(void *map, int *key, struct hmap_elem *val)
+{
+ cnt++;
+ bpf_timer_set_callback(&val->timer, timer_cb1);
+ if (bpf_timer_start(&val->timer, 1000, 0))
+ err |= 1;
+ ok |= 1;
+ return 0;
+}
+
+/* callback for inner hash map */
+static int timer_cb1(void *map, int *key, struct hmap_elem *val)
+{
+ cnt++;
+ bpf_timer_set_callback(&val->timer, timer_cb2);
+ if (bpf_timer_start(&val->timer, 1000, 0))
+ err |= 2;
+ /* Do a lookup to make sure 'map' and 'key' pointers are correct */
+ bpf_map_lookup_elem(map, key);
+ ok |= 2;
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ struct hmap_elem init = {};
+ struct bpf_map *inner_map;
+ struct hmap_elem *val;
+ int array_key = ARRAY_KEY;
+ int hash_key = HASH_KEY;
+
+ inner_map = bpf_map_lookup_elem(&outer_arr, &array_key);
+ if (!inner_map)
+ return 0;
+
+ bpf_map_update_elem(inner_map, &hash_key, &init, 0);
+ val = bpf_map_lookup_elem(inner_map, &hash_key);
+ if (!val)
+ return 0;
+
+ bpf_timer_init(&val->timer, inner_map, CLOCK_MONOTONIC);
+ if (bpf_timer_set_callback(&val->timer, timer_cb1))
+ err |= 4;
+ if (bpf_timer_start(&val->timer, 0, 0))
+ err |= 8;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_mim_reject.c b/tools/testing/selftests/bpf/progs/timer_mim_reject.c
new file mode 100644
index 000000000000..5d648e3d8a41
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_mim_reject.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+ int pad; /* unused */
+ struct bpf_timer timer;
+};
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1024);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} inner_htab SEC(".maps");
+
+#define ARRAY_KEY 1
+#define ARRAY_KEY2 2
+#define HASH_KEY 1234
+
+struct outer_arr {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __array(values, struct inner_map);
+} outer_arr SEC(".maps") = {
+ .values = { [ARRAY_KEY] = &inner_htab },
+};
+
+__u64 err;
+__u64 ok;
+__u64 cnt;
+
+/* callback for inner hash map */
+static int timer_cb(void *map, int *key, struct hmap_elem *val)
+{
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ struct hmap_elem init = {};
+ struct bpf_map *inner_map, *inner_map2;
+ struct hmap_elem *val;
+ int array_key = ARRAY_KEY;
+ int array_key2 = ARRAY_KEY2;
+ int hash_key = HASH_KEY;
+
+ inner_map = bpf_map_lookup_elem(&outer_arr, &array_key);
+ if (!inner_map)
+ return 0;
+
+ inner_map2 = bpf_map_lookup_elem(&outer_arr, &array_key2);
+ if (!inner_map2)
+ return 0;
+ bpf_map_update_elem(inner_map, &hash_key, &init, 0);
+ val = bpf_map_lookup_elem(inner_map, &hash_key);
+ if (!val)
+ return 0;
+
+ bpf_timer_init(&val->timer, inner_map2, CLOCK_MONOTONIC);
+ if (bpf_timer_set_callback(&val->timer, timer_cb))
+ err |= 4;
+ if (bpf_timer_start(&val->timer, 0, 0))
+ err |= 8;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c
index 94e6c2b281cb..5f725c720e00 100644
--- a/tools/testing/selftests/bpf/progs/xdp_tx.c
+++ b/tools/testing/selftests/bpf/progs/xdp_tx.c
@@ -3,7 +3,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("tx")
+SEC("xdp")
int xdp_tx(struct xdp_md *xdp)
{
return XDP_TX;
diff --git a/tools/testing/selftests/bpf/test_bpftool.sh b/tools/testing/selftests/bpf/test_bpftool.sh
index 66690778e36d..718f59692ccb 100755
--- a/tools/testing/selftests/bpf/test_bpftool.sh
+++ b/tools/testing/selftests/bpf/test_bpftool.sh
@@ -2,4 +2,10 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2020 SUSE LLC.
+# 'make -C tools/testing/selftests/bpf install' will install to SCRIPT_DIR
+SCRIPT_DIR=$(dirname $(realpath $0))
+
+# 'make -C tools/testing/selftests/bpf' will install to BPFTOOL_INSTALL_PATH
+BPFTOOL_INSTALL_PATH="$SCRIPT_DIR"/tools/sbin
+export PATH=$SCRIPT_DIR:$BPFTOOL_INSTALL_PATH:$PATH
python3 -m unittest -v test_bpftool.TestBpftool
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index ac349a5cea7e..b03a87571592 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -22,7 +22,7 @@ KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
cd $KDIR_ROOT_DIR
if [ ! -e tools/bpf/bpftool/Makefile ]; then
echo -e "skip: bpftool files not found!\n"
- exit 0
+ exit 4 # KSFT_SKIP=4
fi
ERROR=0
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
new file mode 100755
index 000000000000..be54b7335a76
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -0,0 +1,586 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+#
+# Copyright (C) 2021 Isovalent, Inc.
+
+import argparse
+import re
+import os, sys
+
+LINUX_ROOT = os.path.abspath(os.path.join(__file__,
+ os.pardir, os.pardir, os.pardir, os.pardir, os.pardir))
+BPFTOOL_DIR = os.path.join(LINUX_ROOT, 'tools/bpf/bpftool')
+retval = 0
+
+class BlockParser(object):
+ """
+ A parser for extracting set of values from blocks such as enums.
+ @reader: a pointer to the open file to parse
+ """
+ def __init__(self, reader):
+ self.reader = reader
+
+ def search_block(self, start_marker):
+ """
+ Search for a given structure in a file.
+ @start_marker: regex marking the beginning of a structure to parse
+ """
+ offset = self.reader.tell()
+ array_start = re.search(start_marker, self.reader.read())
+ if array_start is None:
+ raise Exception('Failed to find start of block')
+ self.reader.seek(offset + array_start.start())
+
+ def parse(self, pattern, end_marker):
+ """
+ Parse a block and return a set of values. Values to extract must be
+ on separate lines in the file.
+ @pattern: pattern used to identify the values to extract
+ @end_marker: regex marking the end of the block to parse
+ """
+ entries = set()
+ while True:
+ line = self.reader.readline()
+ if not line or re.match(end_marker, line):
+ break
+ capture = pattern.search(line)
+ if capture and pattern.groups >= 1:
+ entries.add(capture.group(1))
+ return entries
+
+class ArrayParser(BlockParser):
+ """
+ A parser for extracting dicionaries of values from some BPF-related arrays.
+ @reader: a pointer to the open file to parse
+ @array_name: name of the array to parse
+ """
+ end_marker = re.compile('^};')
+
+ def __init__(self, reader, array_name):
+ self.array_name = array_name
+ self.start_marker = re.compile(f'(static )?const char \* const {self.array_name}\[.*\] = {{\n')
+ super().__init__(reader)
+
+ def search_block(self):
+ """
+ Search for the given array in a file.
+ """
+ super().search_block(self.start_marker);
+
+ def parse(self):
+ """
+ Parse a block and return data as a dictionary. Items to extract must be
+ on separate lines in the file.
+ """
+ pattern = re.compile('\[(BPF_\w*)\]\s*= "(.*)",?$')
+ entries = {}
+ while True:
+ line = self.reader.readline()
+ if line == '' or re.match(self.end_marker, line):
+ break
+ capture = pattern.search(line)
+ if capture:
+ entries[capture.group(1)] = capture.group(2)
+ return entries
+
+class InlineListParser(BlockParser):
+ """
+ A parser for extracting set of values from inline lists.
+ """
+ def parse(self, pattern, end_marker):
+ """
+ Parse a block and return a set of values. Multiple values to extract
+ can be on a same line in the file.
+ @pattern: pattern used to identify the values to extract
+ @end_marker: regex marking the end of the block to parse
+ """
+ entries = set()
+ while True:
+ line = self.reader.readline()
+ if not line:
+ break
+ entries.update(pattern.findall(line))
+ if re.search(end_marker, line):
+ break
+ return entries
+
+class FileExtractor(object):
+ """
+ A generic reader for extracting data from a given file. This class contains
+ several helper methods that wrap arround parser objects to extract values
+ from different structures.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def __init__(self):
+ self.reader = open(self.filename, 'r')
+
+ def close(self):
+ """
+ Close the file used by the parser.
+ """
+ self.reader.close()
+
+ def reset_read(self):
+ """
+ Reset the file position indicator for this parser. This is useful when
+ parsing several structures in the file without respecting the order in
+ which those structures appear in the file.
+ """
+ self.reader.seek(0)
+
+ def get_types_from_array(self, array_name):
+ """
+ Search for and parse an array associating names to BPF_* enum members,
+ for example:
+
+ const char * const prog_type_name[] = {
+ [BPF_PROG_TYPE_UNSPEC] = "unspec",
+ [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
+ [BPF_PROG_TYPE_KPROBE] = "kprobe",
+ };
+
+ Return a dictionary with the enum member names as keys and the
+ associated names as values, for example:
+
+ {'BPF_PROG_TYPE_UNSPEC': 'unspec',
+ 'BPF_PROG_TYPE_SOCKET_FILTER': 'socket_filter',
+ 'BPF_PROG_TYPE_KPROBE': 'kprobe'}
+
+ @array_name: name of the array to parse
+ """
+ array_parser = ArrayParser(self.reader, array_name)
+ array_parser.search_block()
+ return array_parser.parse()
+
+ def get_enum(self, enum_name):
+ """
+ Search for and parse an enum containing BPF_* members, for example:
+
+ enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_KPROBE,
+ };
+
+ Return a set containing all member names, for example:
+
+ {'BPF_PROG_TYPE_UNSPEC',
+ 'BPF_PROG_TYPE_SOCKET_FILTER',
+ 'BPF_PROG_TYPE_KPROBE'}
+
+ @enum_name: name of the enum to parse
+ """
+ start_marker = re.compile(f'enum {enum_name} {{\n')
+ pattern = re.compile('^\s*(BPF_\w+),?$')
+ end_marker = re.compile('^};')
+ parser = BlockParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+ def __get_description_list(self, start_marker, pattern, end_marker):
+ parser = InlineListParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+ def get_rst_list(self, block_name):
+ """
+ Search for and parse a list of type names from RST documentation, for
+ example:
+
+ | *TYPE* := {
+ | **socket** | **kprobe** |
+ | **kretprobe**
+ | }
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(f'\*{block_name}\* := {{')
+ pattern = re.compile('\*\*([\w/-]+)\*\*')
+ end_marker = re.compile('}\n')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def get_help_list(self, block_name):
+ """
+ Search for and parse a list of type names from a help message in
+ bpftool, for example:
+
+ " TYPE := { socket | kprobe |\\n"
+ " kretprobe }\\n"
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(f'"\s*{block_name} := {{')
+ pattern = re.compile('([\w/]+) [|}]')
+ end_marker = re.compile('}')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def get_help_list_macro(self, macro):
+ """
+ Search for and parse a list of values from a help message starting with
+ a macro in bpftool, for example:
+
+ " " HELP_SPEC_OPTIONS " |\\n"
+ " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} }\\n"
+
+ Return a set containing all item names, for example:
+
+ {'-f', '--bpffs', '-m', '--mapcompat', '-n', '--nomount'}
+
+ @macro: macro starting the block, 'HELP_SPEC_OPTIONS' in the example
+ """
+ start_marker = re.compile(f'"\s*{macro}\s*" [|}}]')
+ pattern = re.compile('([\w-]+) ?(?:\||}[ }\]])')
+ end_marker = re.compile('}\\\\n')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def default_options(self):
+ """
+ Return the default options contained in HELP_SPEC_OPTIONS
+ """
+ return { '-j', '--json', '-p', '--pretty', '-d', '--debug' }
+
+ def get_bashcomp_list(self, block_name):
+ """
+ Search for and parse a list of type names from a variable in bash
+ completion file, for example:
+
+ local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \\
+ kretprobe'
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(f'local {block_name}=\'')
+ pattern = re.compile('(?:.*=\')?([\w/]+)')
+ end_marker = re.compile('\'$')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+class SourceFileExtractor(FileExtractor):
+ """
+ An abstract extractor for a source file with usage message.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def get_options(self):
+ return self.default_options().union(self.get_help_list_macro('HELP_SPEC_OPTIONS'))
+
+class ProgFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's prog.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'prog.c')
+
+ def get_prog_types(self):
+ return self.get_types_from_array('prog_type_name')
+
+ def get_attach_types(self):
+ return self.get_types_from_array('attach_type_strings')
+
+ def get_prog_attach_help(self):
+ return self.get_help_list('ATTACH_TYPE')
+
+class MapFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's map.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'map.c')
+
+ def get_map_types(self):
+ return self.get_types_from_array('map_type_name')
+
+ def get_map_help(self):
+ return self.get_help_list('TYPE')
+
+class CgroupFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's cgroup.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'cgroup.c')
+
+ def get_prog_attach_help(self):
+ return self.get_help_list('ATTACH_TYPE')
+
+class CommonFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's common.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'common.c')
+
+ def __init__(self):
+ super().__init__()
+ self.attach_types = {}
+
+ def get_attach_types(self):
+ if not self.attach_types:
+ self.attach_types = self.get_types_from_array('attach_type_name')
+ return self.attach_types
+
+ def get_cgroup_attach_types(self):
+ if not self.attach_types:
+ self.get_attach_types()
+ cgroup_types = {}
+ for (key, value) in self.attach_types.items():
+ if key.find('BPF_CGROUP') != -1:
+ cgroup_types[key] = value
+ return cgroup_types
+
+class GenericSourceExtractor(SourceFileExtractor):
+ """
+ An extractor for generic source code files.
+ """
+ filename = ""
+
+ def __init__(self, filename):
+ self.filename = os.path.join(BPFTOOL_DIR, filename)
+ super().__init__()
+
+class BpfHeaderExtractor(FileExtractor):
+ """
+ An extractor for the UAPI BPF header.
+ """
+ filename = os.path.join(LINUX_ROOT, 'tools/include/uapi/linux/bpf.h')
+
+ def get_prog_types(self):
+ return self.get_enum('bpf_prog_type')
+
+ def get_map_types(self):
+ return self.get_enum('bpf_map_type')
+
+ def get_attach_types(self):
+ return self.get_enum('bpf_attach_type')
+
+class ManPageExtractor(FileExtractor):
+ """
+ An abstract extractor for an RST documentation page.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def get_options(self):
+ return self.get_rst_list('OPTIONS')
+
+class ManProgExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-prog.rst.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-prog.rst')
+
+ def get_attach_types(self):
+ return self.get_rst_list('ATTACH_TYPE')
+
+class ManMapExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-map.rst.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-map.rst')
+
+ def get_map_types(self):
+ return self.get_rst_list('TYPE')
+
+class ManCgroupExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-cgroup.rst.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-cgroup.rst')
+
+ def get_attach_types(self):
+ return self.get_rst_list('ATTACH_TYPE')
+
+class ManGenericExtractor(ManPageExtractor):
+ """
+ An extractor for generic RST documentation pages.
+ """
+ filename = ""
+
+ def __init__(self, filename):
+ self.filename = os.path.join(BPFTOOL_DIR, filename)
+ super().__init__()
+
+class BashcompExtractor(FileExtractor):
+ """
+ An extractor for bpftool's bash completion file.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'bash-completion/bpftool')
+
+ def get_prog_attach_types(self):
+ return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES')
+
+ def get_map_types(self):
+ return self.get_bashcomp_list('BPFTOOL_MAP_CREATE_TYPES')
+
+ def get_cgroup_attach_types(self):
+ return self.get_bashcomp_list('BPFTOOL_CGROUP_ATTACH_TYPES')
+
+def verify(first_set, second_set, message):
+ """
+ Print all values that differ between two sets.
+ @first_set: one set to compare
+ @second_set: another set to compare
+ @message: message to print for values belonging to only one of the sets
+ """
+ global retval
+ diff = first_set.symmetric_difference(second_set)
+ if diff:
+ print(message, diff)
+ retval = 1
+
+def main():
+ # No arguments supported at this time, but print usage for -h|--help
+ argParser = argparse.ArgumentParser(description="""
+ Verify that bpftool's code, help messages, documentation and bash
+ completion are all in sync on program types, map types, attach types, and
+ options. Also check that bpftool is in sync with the UAPI BPF header.
+ """)
+ args = argParser.parse_args()
+
+ # Map types (enum)
+
+ bpf_info = BpfHeaderExtractor()
+ ref = bpf_info.get_map_types()
+
+ map_info = MapFileExtractor()
+ source_map_items = map_info.get_map_types()
+ map_types_enum = set(source_map_items.keys())
+
+ verify(ref, map_types_enum,
+ f'Comparing BPF header (enum bpf_map_type) and {MapFileExtractor.filename} (map_type_name):')
+
+ # Map types (names)
+
+ source_map_types = set(source_map_items.values())
+ source_map_types.discard('unspec')
+
+ help_map_types = map_info.get_map_help()
+ help_map_options = map_info.get_options()
+ map_info.close()
+
+ man_map_info = ManMapExtractor()
+ man_map_options = man_map_info.get_options()
+ man_map_types = man_map_info.get_map_types()
+ man_map_info.close()
+
+ bashcomp_info = BashcompExtractor()
+ bashcomp_map_types = bashcomp_info.get_map_types()
+
+ verify(source_map_types, help_map_types,
+ f'Comparing {MapFileExtractor.filename} (map_type_name) and {MapFileExtractor.filename} (do_help() TYPE):')
+ verify(source_map_types, man_map_types,
+ f'Comparing {MapFileExtractor.filename} (map_type_name) and {ManMapExtractor.filename} (TYPE):')
+ verify(help_map_options, man_map_options,
+ f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):')
+ verify(source_map_types, bashcomp_map_types,
+ f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):')
+
+ # Program types (enum)
+
+ ref = bpf_info.get_prog_types()
+
+ prog_info = ProgFileExtractor()
+ prog_types = set(prog_info.get_prog_types().keys())
+
+ verify(ref, prog_types,
+ f'Comparing BPF header (enum bpf_prog_type) and {ProgFileExtractor.filename} (prog_type_name):')
+
+ # Attach types (enum)
+
+ ref = bpf_info.get_attach_types()
+ bpf_info.close()
+
+ common_info = CommonFileExtractor()
+ attach_types = common_info.get_attach_types()
+
+ verify(ref, attach_types,
+ f'Comparing BPF header (enum bpf_attach_type) and {CommonFileExtractor.filename} (attach_type_name):')
+
+ # Attach types (names)
+
+ source_prog_attach_types = set(prog_info.get_attach_types().values())
+
+ help_prog_attach_types = prog_info.get_prog_attach_help()
+ help_prog_options = prog_info.get_options()
+ prog_info.close()
+
+ man_prog_info = ManProgExtractor()
+ man_prog_options = man_prog_info.get_options()
+ man_prog_attach_types = man_prog_info.get_attach_types()
+ man_prog_info.close()
+
+ bashcomp_info.reset_read() # We stopped at map types, rewind
+ bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types()
+
+ verify(source_prog_attach_types, help_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):')
+ verify(source_prog_attach_types, man_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ManProgExtractor.filename} (ATTACH_TYPE):')
+ verify(help_prog_options, man_prog_options,
+ f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):')
+ verify(source_prog_attach_types, bashcomp_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):')
+
+ # Cgroup attach types
+
+ source_cgroup_attach_types = set(common_info.get_cgroup_attach_types().values())
+ common_info.close()
+
+ cgroup_info = CgroupFileExtractor()
+ help_cgroup_attach_types = cgroup_info.get_prog_attach_help()
+ help_cgroup_options = cgroup_info.get_options()
+ cgroup_info.close()
+
+ man_cgroup_info = ManCgroupExtractor()
+ man_cgroup_options = man_cgroup_info.get_options()
+ man_cgroup_attach_types = man_cgroup_info.get_attach_types()
+ man_cgroup_info.close()
+
+ bashcomp_cgroup_attach_types = bashcomp_info.get_cgroup_attach_types()
+ bashcomp_info.close()
+
+ verify(source_cgroup_attach_types, help_cgroup_attach_types,
+ f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):')
+ verify(source_cgroup_attach_types, man_cgroup_attach_types,
+ f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {ManCgroupExtractor.filename} (ATTACH_TYPE):')
+ verify(help_cgroup_options, man_cgroup_options,
+ f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):')
+ verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types,
+ f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):')
+
+ # Options for remaining commands
+
+ for cmd in [ 'btf', 'feature', 'gen', 'iter', 'link', 'net', 'perf', 'struct_ops', ]:
+ source_info = GenericSourceExtractor(cmd + '.c')
+ help_cmd_options = source_info.get_options()
+ source_info.close()
+
+ man_cmd_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool-' + cmd + '.rst'))
+ man_cmd_options = man_cmd_info.get_options()
+ man_cmd_info.close()
+
+ verify(help_cmd_options, man_cmd_options,
+ f'Comparing {source_info.filename} (do_help() OPTIONS) and {man_cmd_info.filename} (OPTIONS):')
+
+ source_main_info = GenericSourceExtractor('main.c')
+ help_main_options = source_main_info.get_options()
+ source_main_info.close()
+
+ man_main_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool.rst'))
+ man_main_options = man_main_info.get_options()
+ man_main_info.close()
+
+ verify(help_main_options, man_main_options,
+ f'Comparing {source_main_info.filename} (do_help() OPTIONS) and {man_main_info.filename} (OPTIONS):')
+
+ sys.exit(retval)
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh
index ed12111cd2f0..679cf968c7d1 100755
--- a/tools/testing/selftests/bpf/test_doc_build.sh
+++ b/tools/testing/selftests/bpf/test_doc_build.sh
@@ -4,11 +4,17 @@ set -e
# Assume script is located under tools/testing/selftests/bpf/. We want to start
# build attempts from the top of kernel repository.
-SCRIPT_REL_PATH=$(realpath --relative-to=$PWD $0)
+SCRIPT_REL_PATH=$(realpath $0)
SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH)
-KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
+KDIR_ROOT_DIR=$(realpath $SCRIPT_REL_DIR/../../../../)
+SCRIPT_REL_DIR=$(dirname $(realpath --relative-to=$KDIR_ROOT_DIR $SCRIPT_REL_PATH))
cd $KDIR_ROOT_DIR
+if [ ! -e $PWD/$SCRIPT_REL_DIR/Makefile ]; then
+ echo -e "skip: bpftool files not found!\n"
+ exit 4 # KSFT_SKIP=4
+fi
+
for tgt in docs docs-clean; do
make -s -C $PWD/$SCRIPT_REL_DIR $tgt;
done
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 30cbf5d98f7d..c7a36a9378f8 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -764,8 +764,8 @@ static void test_sockmap(unsigned int tasks, void *data)
udp = socket(AF_INET, SOCK_DGRAM, 0);
i = 0;
err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
- if (!err) {
- printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
+ if (err) {
+ printf("Failed socket update SOCK_DGRAM '%i:%i'\n",
i, udp);
goto out_sockmap;
}
@@ -985,7 +985,7 @@ static void test_sockmap(unsigned int tasks, void *data)
FD_ZERO(&w);
FD_SET(sfd[3], &w);
- to.tv_sec = 1;
+ to.tv_sec = 30;
to.tv_usec = 0;
s = select(sfd[3] + 1, &w, NULL, NULL, &to);
if (s == -1) {
@@ -1153,12 +1153,17 @@ out_sockmap:
}
#define MAPINMAP_PROG "./test_map_in_map.o"
+#define MAPINMAP_INVALID_PROG "./test_map_in_map_invalid.o"
static void test_map_in_map(void)
{
struct bpf_object *obj;
struct bpf_map *map;
int mim_fd, fd, err;
int pos = 0;
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ __u32 id = 0;
+ libbpf_print_fn_t old_print_fn;
obj = bpf_object__open(MAPINMAP_PROG);
@@ -1228,11 +1233,72 @@ static void test_map_in_map(void)
}
close(fd);
+ fd = -1;
bpf_object__close(obj);
+
+ /* Test that failing bpf_object__create_map() destroys the inner map */
+ obj = bpf_object__open(MAPINMAP_INVALID_PROG);
+ err = libbpf_get_error(obj);
+ if (err) {
+ printf("Failed to load %s program: %d %d",
+ MAPINMAP_INVALID_PROG, err, errno);
+ goto out_map_in_map;
+ }
+
+ map = bpf_object__find_map_by_name(obj, "mim");
+ if (!map) {
+ printf("Failed to load array of maps from test prog\n");
+ goto out_map_in_map;
+ }
+
+ old_print_fn = libbpf_set_print(NULL);
+
+ err = bpf_object__load(obj);
+ if (!err) {
+ printf("Loading obj supposed to fail\n");
+ goto out_map_in_map;
+ }
+
+ libbpf_set_print(old_print_fn);
+
+ /* Iterate over all maps to check whether the internal map
+ * ("mim.internal") has been destroyed.
+ */
+ while (true) {
+ err = bpf_map_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT)
+ break;
+ printf("Failed to get next map: %d", errno);
+ goto out_map_in_map;
+ }
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ printf("Failed to get map by id %u: %d", id, errno);
+ goto out_map_in_map;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ printf("Failed to get map info by fd %d: %d", fd,
+ errno);
+ goto out_map_in_map;
+ }
+
+ if (!strcmp(info.name, "mim.inner")) {
+ printf("Inner map mim.inner was not destroyed\n");
+ goto out_map_in_map;
+ }
+ }
+
return;
out_map_in_map:
- close(fd);
+ if (fd >= 0)
+ close(fd);
exit(1);
}
@@ -1330,15 +1396,22 @@ static void test_map_stress(void)
#define DO_DELETE 0
#define MAP_RETRIES 20
+#define MAX_DELAY_US 50000
+#define MIN_DELAY_RANGE_US 5000
static int map_update_retriable(int map_fd, const void *key, const void *value,
int flags, int attempts)
{
+ int delay = rand() % MIN_DELAY_RANGE_US;
+
while (bpf_map_update_elem(map_fd, key, value, flags)) {
if (!attempts || (errno != EAGAIN && errno != EBUSY))
return -errno;
- usleep(1);
+ if (delay <= MAX_DELAY_US / 2)
+ delay *= 2;
+
+ usleep(delay);
attempts--;
}
@@ -1347,11 +1420,16 @@ static int map_update_retriable(int map_fd, const void *key, const void *value,
static int map_delete_retriable(int map_fd, const void *key, int attempts)
{
+ int delay = rand() % MIN_DELAY_RANGE_US;
+
while (bpf_map_delete_elem(map_fd, key)) {
if (!attempts || (errno != EAGAIN && errno != EBUSY))
return -errno;
- usleep(1);
+ if (delay <= MAX_DELAY_US / 2)
+ delay *= 2;
+
+ usleep(delay);
attempts--;
}
diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c
deleted file mode 100644
index a7b9a69f4fd5..000000000000
--- a/tools/testing/selftests/bpf/test_netcnt.c
+++ /dev/null
@@ -1,148 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/sysinfo.h>
-#include <sys/time.h>
-
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-#include "netcnt_common.h"
-
-#define BPF_PROG "./netcnt_prog.o"
-#define TEST_CGROUP "/test-network-counters/"
-
-static int bpf_find_map(const char *test, struct bpf_object *obj,
- const char *name)
-{
- struct bpf_map *map;
-
- map = bpf_object__find_map_by_name(obj, name);
- if (!map) {
- printf("%s:FAIL:map '%s' not found\n", test, name);
- return -1;
- }
- return bpf_map__fd(map);
-}
-
-int main(int argc, char **argv)
-{
- struct percpu_net_cnt *percpu_netcnt;
- struct bpf_cgroup_storage_key key;
- int map_fd, percpu_map_fd;
- int error = EXIT_FAILURE;
- struct net_cnt netcnt;
- struct bpf_object *obj;
- int prog_fd, cgroup_fd;
- unsigned long packets;
- unsigned long bytes;
- int cpu, nproc;
- __u32 prog_cnt;
-
- nproc = get_nprocs_conf();
- percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
- if (!percpu_netcnt) {
- printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
- goto err;
- }
-
- if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB,
- &obj, &prog_fd)) {
- printf("Failed to load bpf program\n");
- goto out;
- }
-
- cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
- if (cgroup_fd < 0)
- goto err;
-
- /* Attach bpf program */
- if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
- printf("Failed to attach bpf program");
- goto err;
- }
-
- if (system("which ping6 &>/dev/null") == 0)
- assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null"));
- else
- assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null"));
-
- if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
- &prog_cnt)) {
- printf("Failed to query attached programs");
- goto err;
- }
-
- map_fd = bpf_find_map(__func__, obj, "netcnt");
- if (map_fd < 0) {
- printf("Failed to find bpf map with net counters");
- goto err;
- }
-
- percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt");
- if (percpu_map_fd < 0) {
- printf("Failed to find bpf map with percpu net counters");
- goto err;
- }
-
- if (bpf_map_get_next_key(map_fd, NULL, &key)) {
- printf("Failed to get key in cgroup storage\n");
- goto err;
- }
-
- if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) {
- printf("Failed to lookup cgroup storage\n");
- goto err;
- }
-
- if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) {
- printf("Failed to lookup percpu cgroup storage\n");
- goto err;
- }
-
- /* Some packets can be still in per-cpu cache, but not more than
- * MAX_PERCPU_PACKETS.
- */
- packets = netcnt.packets;
- bytes = netcnt.bytes;
- for (cpu = 0; cpu < nproc; cpu++) {
- if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) {
- printf("Unexpected percpu value: %llu\n",
- percpu_netcnt[cpu].packets);
- goto err;
- }
-
- packets += percpu_netcnt[cpu].packets;
- bytes += percpu_netcnt[cpu].bytes;
- }
-
- /* No packets should be lost */
- if (packets != 10000) {
- printf("Unexpected packet count: %lu\n", packets);
- goto err;
- }
-
- /* Let's check that bytes counter matches the number of packets
- * multiplied by the size of ipv6 ICMP packet.
- */
- if (bytes != packets * 104) {
- printf("Unexpected bytes count: %lu\n", bytes);
- goto err;
- }
-
- error = 0;
- printf("test_netcnt:PASS\n");
-
-err:
- cleanup_cgroup_environment();
- free(percpu_netcnt);
-
-out:
- return error;
-}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 6f103106a39b..cc1cd240445d 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -13,6 +13,28 @@
#include <execinfo.h> /* backtrace */
#include <linux/membarrier.h>
+/* Adapted from perf/util/string.c */
+static bool glob_match(const char *str, const char *pat)
+{
+ while (*str && *pat && *pat != '*') {
+ if (*str != *pat)
+ return false;
+ str++;
+ pat++;
+ }
+ /* Check wild card */
+ if (*pat == '*') {
+ while (*pat == '*')
+ pat++;
+ if (!*pat) /* Tail wild card matches all */
+ return true;
+ while (*str)
+ if (glob_match(str++, pat))
+ return true;
+ }
+ return !*str && !*pat;
+}
+
#define EXIT_NO_TEST 2
#define EXIT_ERR_SETUP_INFRA 3
@@ -55,12 +77,12 @@ static bool should_run(struct test_selector *sel, int num, const char *name)
int i;
for (i = 0; i < sel->blacklist.cnt; i++) {
- if (strstr(name, sel->blacklist.strs[i]))
+ if (glob_match(name, sel->blacklist.strs[i]))
return false;
}
for (i = 0; i < sel->whitelist.cnt; i++) {
- if (strstr(name, sel->whitelist.strs[i]))
+ if (glob_match(name, sel->whitelist.strs[i]))
return true;
}
@@ -148,18 +170,18 @@ void test__end_subtest()
struct prog_test_def *test = env.test;
int sub_error_cnt = test->error_cnt - test->old_error_cnt;
+ dump_test_log(test, sub_error_cnt);
+
+ fprintf(env.stdout, "#%d/%d %s/%s:%s\n",
+ test->test_num, test->subtest_num, test->test_name, test->subtest_name,
+ sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
+
if (sub_error_cnt)
env.fail_cnt++;
else if (test->skip_cnt == 0)
env.sub_succ_cnt++;
skip_account();
- dump_test_log(test, sub_error_cnt);
-
- fprintf(env.stdout, "#%d/%d %s:%s\n",
- test->test_num, test->subtest_num, test->subtest_name,
- sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
-
free(test->subtest_name);
test->subtest_name = NULL;
}
@@ -450,6 +472,8 @@ enum ARG_KEYS {
ARG_VERBOSE = 'v',
ARG_GET_TEST_CNT = 'c',
ARG_LIST_TEST_NAMES = 'l',
+ ARG_TEST_NAME_GLOB_ALLOWLIST = 'a',
+ ARG_TEST_NAME_GLOB_DENYLIST = 'd',
};
static const struct argp_option opts[] = {
@@ -467,6 +491,10 @@ static const struct argp_option opts[] = {
"Get number of selected top-level tests " },
{ "list", ARG_LIST_TEST_NAMES, NULL, 0,
"List test names that would run (without running them) " },
+ { "allow", ARG_TEST_NAME_GLOB_ALLOWLIST, "NAMES", 0,
+ "Run tests with name matching the pattern (supports '*' wildcard)." },
+ { "deny", ARG_TEST_NAME_GLOB_DENYLIST, "NAMES", 0,
+ "Don't run tests with name matching the pattern (supports '*' wildcard)." },
{},
};
@@ -491,36 +519,48 @@ static void free_str_set(const struct str_set *set)
free(set->strs);
}
-static int parse_str_list(const char *s, struct str_set *set)
+static int parse_str_list(const char *s, struct str_set *set, bool is_glob_pattern)
{
char *input, *state = NULL, *next, **tmp, **strs = NULL;
- int cnt = 0;
+ int i, cnt = 0;
input = strdup(s);
if (!input)
return -ENOMEM;
- set->cnt = 0;
- set->strs = NULL;
-
while ((next = strtok_r(state ? NULL : input, ",", &state))) {
tmp = realloc(strs, sizeof(*strs) * (cnt + 1));
if (!tmp)
goto err;
strs = tmp;
- strs[cnt] = strdup(next);
- if (!strs[cnt])
- goto err;
+ if (is_glob_pattern) {
+ strs[cnt] = strdup(next);
+ if (!strs[cnt])
+ goto err;
+ } else {
+ strs[cnt] = malloc(strlen(next) + 2 + 1);
+ if (!strs[cnt])
+ goto err;
+ sprintf(strs[cnt], "*%s*", next);
+ }
cnt++;
}
- set->cnt = cnt;
- set->strs = (const char **)strs;
+ tmp = realloc(set->strs, sizeof(*strs) * (cnt + set->cnt));
+ if (!tmp)
+ goto err;
+ memcpy(tmp + set->cnt, strs, sizeof(*strs) * cnt);
+ set->strs = (const char **)tmp;
+ set->cnt += cnt;
+
free(input);
+ free(strs);
return 0;
err:
+ for (i = 0; i < cnt; i++)
+ free(strs[i]);
free(strs);
free(input);
return -ENOMEM;
@@ -553,29 +593,35 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
}
break;
}
+ case ARG_TEST_NAME_GLOB_ALLOWLIST:
case ARG_TEST_NAME: {
char *subtest_str = strchr(arg, '/');
if (subtest_str) {
*subtest_str = '\0';
if (parse_str_list(subtest_str + 1,
- &env->subtest_selector.whitelist))
+ &env->subtest_selector.whitelist,
+ key == ARG_TEST_NAME_GLOB_ALLOWLIST))
return -ENOMEM;
}
- if (parse_str_list(arg, &env->test_selector.whitelist))
+ if (parse_str_list(arg, &env->test_selector.whitelist,
+ key == ARG_TEST_NAME_GLOB_ALLOWLIST))
return -ENOMEM;
break;
}
+ case ARG_TEST_NAME_GLOB_DENYLIST:
case ARG_TEST_NAME_BLACKLIST: {
char *subtest_str = strchr(arg, '/');
if (subtest_str) {
*subtest_str = '\0';
if (parse_str_list(subtest_str + 1,
- &env->subtest_selector.blacklist))
+ &env->subtest_selector.blacklist,
+ key == ARG_TEST_NAME_GLOB_DENYLIST))
return -ENOMEM;
}
- if (parse_str_list(arg, &env->test_selector.blacklist))
+ if (parse_str_list(arg, &env->test_selector.blacklist,
+ key == ARG_TEST_NAME_GLOB_DENYLIST))
return -ENOMEM;
break;
}
@@ -755,7 +801,7 @@ int main(int argc, char **argv)
save_netns();
stdio_hijack();
env.has_testmod = true;
- if (load_bpf_testmod()) {
+ if (!env.list_test_names && load_bpf_testmod()) {
fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
env.has_testmod = false;
}
@@ -786,24 +832,25 @@ int main(int argc, char **argv)
test__end_subtest();
test->tested = true;
- if (test->error_cnt)
- env.fail_cnt++;
- else
- env.succ_cnt++;
- skip_account();
dump_test_log(test, test->error_cnt);
fprintf(env.stdout, "#%d %s:%s\n",
test->test_num, test->test_name,
- test->error_cnt ? "FAIL" : "OK");
+ test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
+
+ if (test->error_cnt)
+ env.fail_cnt++;
+ else
+ env.succ_cnt++;
+ skip_account();
reset_affinity();
restore_netns();
if (test->need_cgroup_cleanup)
cleanup_cgroup_environment();
}
- if (env.has_testmod)
+ if (!env.list_test_names && env.has_testmod)
unload_bpf_testmod();
stdio_restore();
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 8ef7f334e715..c8c2bf878f67 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -221,6 +221,18 @@ extern int test__join_cgroup(const char *path);
___ok; \
})
+#define ASSERT_STRNEQ(actual, expected, len, name) ({ \
+ static int duration = 0; \
+ const char *___act = actual; \
+ const char *___exp = expected; \
+ int ___len = len; \
+ bool ___ok = strncmp(___act, ___exp, ___len) == 0; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual '%.*s' != expected '%.*s'\n", \
+ (name), ___len, ___act, ___len, ___exp); \
+ ___ok; \
+})
+
#define ASSERT_OK(res, name) ({ \
static int duration = 0; \
long long ___res = (res); \
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index c9dde9b9d987..088fcad138c9 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -69,7 +69,7 @@ cleanup() {
}
server_listen() {
- ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" &
+ ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" &
server_pid=$!
sleep 0.2
}
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
index ba8ffcdaac30..995278e684b6 100755
--- a/tools/testing/selftests/bpf/test_xdp_veth.sh
+++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
@@ -108,7 +108,7 @@ ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
-ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx
+ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp
ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
trap cleanup EXIT
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 46633a3bfb0b..cd7bf32e6a17 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -63,14 +63,11 @@
# ----------------
# Must run with CAP_NET_ADMIN capability.
#
-# Run (full color-coded output):
-# sudo ./test_xsk.sh -c
+# Run:
+# sudo ./test_xsk.sh
#
# If running from kselftests:
-# sudo make colorconsole=1 run_tests
-#
-# Run (full output without color-coding):
-# sudo ./test_xsk.sh
+# sudo make run_tests
#
# Run with verbose output:
# sudo ./test_xsk.sh -v
@@ -83,7 +80,6 @@
while getopts "cvD" flag
do
case "${flag}" in
- c) colorconsole=1;;
v) verbose=1;;
D) dump_pkts=1;;
esac
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 1bbd1d9830c8..e7a19b04d4ea 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -136,3 +136,90 @@ void read_trace_pipe(void)
}
}
}
+
+#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
+
+#define OP_RT_RA_MASK 0xffff0000UL
+#define LIS_R2 0x3c400000UL
+#define ADDIS_R2_R12 0x3c4c0000UL
+#define ADDI_R2_R2 0x38420000UL
+
+ssize_t get_uprobe_offset(const void *addr, ssize_t base)
+{
+ u32 *insn = (u32 *)(uintptr_t)addr;
+
+ /*
+ * A PPC64 ABIv2 function may have a local and a global entry
+ * point. We need to use the local entry point when patching
+ * functions, so identify and step over the global entry point
+ * sequence.
+ *
+ * The global entry point sequence is always of the form:
+ *
+ * addis r2,r12,XXXX
+ * addi r2,r2,XXXX
+ *
+ * A linker optimisation may convert the addis to lis:
+ *
+ * lis r2,XXXX
+ * addi r2,r2,XXXX
+ */
+ if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+ ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+ ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+ return (ssize_t)(insn + 2) - base;
+ else
+ return (uintptr_t)addr - base;
+}
+
+#else
+
+ssize_t get_uprobe_offset(const void *addr, ssize_t base)
+{
+ return (uintptr_t)addr - base;
+}
+
+#endif
+
+ssize_t get_base_addr(void)
+{
+ size_t start, offset;
+ char buf[256];
+ FILE *f;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -errno;
+
+ while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
+ &start, buf, &offset) == 3) {
+ if (strcmp(buf, "r-xp") == 0) {
+ fclose(f);
+ return start - offset;
+ }
+ }
+
+ fclose(f);
+ return -EINVAL;
+}
+
+ssize_t get_rel_offset(uintptr_t addr)
+{
+ size_t start, end, offset;
+ char buf[256];
+ FILE *f;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -errno;
+
+ while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &offset) == 4) {
+ if (addr >= start && addr < end) {
+ fclose(f);
+ return (size_t)addr - start + offset;
+ }
+ }
+
+ fclose(f);
+ return -EINVAL;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index f62fdef9e589..d907b445524d 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -18,4 +18,8 @@ int kallsyms_find(const char *sym, unsigned long long *addr);
void read_trace_pipe(void);
+ssize_t get_uprobe_offset(const void *addr, ssize_t base);
+ssize_t get_base_addr(void);
+ssize_t get_rel_offset(uintptr_t addr);
+
#endif
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index 2c8935b3e65d..ee454327e5c6 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -159,3 +159,15 @@
.result = ACCEPT,
.retval = 2,
},
+{
+ "dead code: zero extension",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -4),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 0,
+},
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 1135fb980814..f53ce2683f8d 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -70,7 +70,6 @@
#include <errno.h>
#include <getopt.h>
#include <asm/barrier.h>
-typedef __u16 __sum16;
#include <linux/if_link.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
@@ -106,14 +105,9 @@ static const u16 UDP_PORT2 = 2121;
static void __exit_with_error(int error, const char *file, const char *func, int line)
{
- if (configured_mode == TEST_MODE_UNCONFIGURED) {
- ksft_exit_fail_msg
- ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
- } else {
- ksft_test_result_fail
- ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
- ksft_exit_xfail();
- }
+ ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error,
+ strerror(error));
+ ksft_exit_xfail();
}
#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
@@ -126,7 +120,7 @@ static void __exit_with_error(int error, const char *file, const char *func, int
test_type == TEST_TYPE_STATS ? "Stats" : "",\
test_type == TEST_TYPE_BPF_RES ? "BPF RES" : ""))
-static void *memset32_htonl(void *dest, u32 val, u32 size)
+static void memset32_htonl(void *dest, u32 val, u32 size)
{
u32 *ptr = (u32 *)dest;
int i;
@@ -135,11 +129,6 @@ static void *memset32_htonl(void *dest, u32 val, u32 size)
for (i = 0; i < (size & (~0x3)); i += 4)
ptr[i >> 2] = val;
-
- for (; i < size; i++)
- ((char *)dest)[i] = ((char *)&val)[i & 3];
-
- return dest;
}
/*
@@ -230,13 +219,13 @@ static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr)
ip_hdr->check = 0;
}
-static void gen_udp_hdr(struct generic_data *data, struct ifobject *ifobject,
+static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject,
struct udphdr *udp_hdr)
{
udp_hdr->source = htons(ifobject->src_port);
udp_hdr->dest = htons(ifobject->dst_port);
udp_hdr->len = htons(UDP_PKT_SIZE);
- memset32_htonl(pkt_data + PKT_HDR_SIZE, htonl(data->seqnum), UDP_PKT_DATA_SIZE);
+ memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE);
}
static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
@@ -246,12 +235,7 @@ static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
}
-static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
-{
- memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE);
-}
-
-static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
+static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size, int idx)
{
struct xsk_umem_config cfg = {
.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
@@ -260,7 +244,6 @@ static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
.frame_headroom = frame_headroom,
.flags = XSK_UMEM__DEFAULT_FLAGS
};
- int size = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
struct xsk_umem_info *umem;
int ret;
@@ -271,7 +254,7 @@ static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
ret = xsk_umem__create(&umem->umem, buffer, size,
&umem->fq, &umem->cq, &cfg);
if (ret)
- exit_with_error(ret);
+ exit_with_error(-ret);
umem->buffer = buffer;
@@ -285,7 +268,7 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
- exit_with_error(ret);
+ exit_with_error(-ret);
for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
*xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
@@ -333,20 +316,19 @@ static struct option long_options[] = {
{"queue", optional_argument, 0, 'q'},
{"dump-pkts", optional_argument, 0, 'D'},
{"verbose", no_argument, 0, 'v'},
- {"tx-pkt-count", optional_argument, 0, 'C'},
{0, 0, 0, 0}
};
static void usage(const char *prog)
{
const char *str =
- " Usage: %s [OPTIONS]\n"
- " Options:\n"
- " -i, --interface Use interface\n"
- " -q, --queue=n Use queue n (default 0)\n"
- " -D, --dump-pkts Dump packets L2 - L5\n"
- " -v, --verbose Verbose output\n"
- " -C, --tx-pkt-count=n Number of packets to send\n";
+ " Usage: %s [OPTIONS]\n"
+ " Options:\n"
+ " -i, --interface Use interface\n"
+ " -q, --queue=n Use queue n (default 0)\n"
+ " -D, --dump-pkts Dump packets L2 - L5\n"
+ " -v, --verbose Verbose output\n";
+
ksft_print_msg(str, prog);
}
@@ -392,7 +374,7 @@ static void parse_command_line(int argc, char **argv)
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "i:DC:v", long_options, &option_index);
+ c = getopt_long(argc, argv, "i:Dv", long_options, &option_index);
if (c == -1)
break;
@@ -413,13 +395,10 @@ static void parse_command_line(int argc, char **argv)
interface_index++;
break;
case 'D':
- debug_pkt_dump = 1;
- break;
- case 'C':
- opt_pkt_count = atoi(optarg);
+ opt_pkt_dump = true;
break;
case 'v':
- opt_verbose = 1;
+ opt_verbose = true;
break;
default:
usage(basename(argv[0]));
@@ -427,17 +406,143 @@ static void parse_command_line(int argc, char **argv)
}
}
- if (!opt_pkt_count) {
- print_verbose("No tx-pkt-count specified, using default %u\n", DEFAULT_PKT_CNT);
- opt_pkt_count = DEFAULT_PKT_CNT;
- }
-
if (!validate_interfaces()) {
usage(basename(argv[0]));
ksft_exit_xfail();
}
}
+static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
+{
+ if (pkt_nb >= pkt_stream->nb_pkts)
+ return NULL;
+
+ return &pkt_stream->pkts[pkt_nb];
+}
+
+static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ pkt_stream = malloc(sizeof(*pkt_stream));
+ if (!pkt_stream)
+ exit_with_error(ENOMEM);
+
+ pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
+ if (!pkt_stream->pkts)
+ exit_with_error(ENOMEM);
+
+ pkt_stream->nb_pkts = nb_pkts;
+ for (i = 0; i < nb_pkts; i++) {
+ pkt_stream->pkts[i].addr = (i % num_frames) * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ pkt_stream->pkts[i].len = pkt_len;
+ pkt_stream->pkts[i].payload = i;
+ }
+
+ return pkt_stream;
+}
+
+static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
+{
+ struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb);
+ struct udphdr *udp_hdr;
+ struct ethhdr *eth_hdr;
+ struct iphdr *ip_hdr;
+ void *data;
+
+ if (!pkt)
+ return NULL;
+
+ data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr);
+ udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr));
+ ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr));
+ eth_hdr = (struct ethhdr *)data;
+
+ gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr);
+ gen_ip_hdr(ifobject, ip_hdr);
+ gen_udp_csum(udp_hdr, ip_hdr);
+ gen_eth_hdr(ifobject, eth_hdr);
+
+ return pkt;
+}
+
+static void pkt_dump(void *pkt, u32 len)
+{
+ char s[INET_ADDRSTRLEN];
+ struct ethhdr *ethhdr;
+ struct udphdr *udphdr;
+ struct iphdr *iphdr;
+ int payload, i;
+
+ ethhdr = pkt;
+ iphdr = pkt + sizeof(*ethhdr);
+ udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr);
+
+ /*extract L2 frame */
+ fprintf(stdout, "DEBUG>> L2: dst mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ fprintf(stdout, "%02X", ethhdr->h_dest[i]);
+
+ fprintf(stdout, "\nDEBUG>> L2: src mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ fprintf(stdout, "%02X", ethhdr->h_source[i]);
+
+ /*extract L3 frame */
+ fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
+ fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
+ inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
+ fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
+ inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
+ /*extract L4 frame */
+ fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
+ fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
+ /*extract L5 frame */
+ payload = *((uint32_t *)(pkt + PKT_HDR_SIZE));
+
+ fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
+ fprintf(stdout, "---------------------------------------\n");
+}
+
+static bool is_pkt_valid(struct pkt *pkt, void *buffer, const struct xdp_desc *desc)
+{
+ void *data = xsk_umem__get_data(buffer, desc->addr);
+ struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr));
+
+ if (!pkt) {
+ ksft_test_result_fail("ERROR: [%s] too many packets received\n", __func__);
+ return false;
+ }
+
+ if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
+ u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
+
+ if (opt_pkt_dump && test_type != TEST_TYPE_STATS)
+ pkt_dump(data, PKT_SIZE);
+
+ if (pkt->len != desc->len) {
+ ksft_test_result_fail
+ ("ERROR: [%s] expected length [%d], got length [%d]\n",
+ __func__, pkt->len, desc->len);
+ return false;
+ }
+
+ if (pkt->payload != seqnum) {
+ ksft_test_result_fail
+ ("ERROR: [%s] expected seqnum [%d], got seqnum [%d]\n",
+ __func__, pkt->payload, seqnum);
+ return false;
+ }
+ } else {
+ ksft_print_msg("Invalid frame received: ");
+ ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
+ iphdr->tos);
+ return false;
+ }
+
+ return true;
+}
+
static void kick_tx(struct xsk_socket_info *xsk)
{
int ret;
@@ -448,7 +553,7 @@ static void kick_tx(struct xsk_socket_info *xsk)
exit_with_error(errno);
}
-static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
+static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
{
unsigned int rcvd;
u32 idx;
@@ -463,133 +568,108 @@ static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
if (rcvd) {
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
xsk->outstanding_tx -= rcvd;
- xsk->tx_npkts += rcvd;
}
}
-static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds)
+static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *xsk,
+ struct pollfd *fds)
{
- unsigned int rcvd, i;
- u32 idx_rx = 0, idx_fq = 0;
+ u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkt_count = 0;
+ struct pkt *pkt;
int ret;
- rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
- if (!rcvd) {
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
- ret = poll(fds, 1, POLL_TMOUT);
- if (ret < 0)
- exit_with_error(ret);
+ pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++);
+ while (pkt) {
+ rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ if (!rcvd) {
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+ ret = poll(fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ exit_with_error(-ret);
+ }
+ continue;
}
- return;
- }
- ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
- while (ret != rcvd) {
- if (ret < 0)
- exit_with_error(ret);
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
- ret = poll(fds, 1, POLL_TMOUT);
+ ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+ while (ret != rcvd) {
if (ret < 0)
- exit_with_error(ret);
+ exit_with_error(-ret);
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+ ret = poll(fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ exit_with_error(-ret);
+ }
+ ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
}
- ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
- }
-
- for (i = 0; i < rcvd; i++) {
- u64 addr, orig;
-
- addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
- xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
- orig = xsk_umem__extract_addr(addr);
- addr = xsk_umem__add_offset_to_addr(addr);
- pkt_node_rx = malloc(sizeof(struct pkt) + PKT_SIZE);
- if (!pkt_node_rx)
- exit_with_error(errno);
+ for (i = 0; i < rcvd; i++) {
+ const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
+ u64 addr = desc->addr, orig;
- pkt_node_rx->pkt_frame = malloc(PKT_SIZE);
- if (!pkt_node_rx->pkt_frame)
- exit_with_error(errno);
+ orig = xsk_umem__extract_addr(addr);
+ addr = xsk_umem__add_offset_to_addr(addr);
+ if (!is_pkt_valid(pkt, xsk->umem->buffer, desc))
+ return;
- memcpy(pkt_node_rx->pkt_frame, xsk_umem__get_data(xsk->umem->buffer, addr),
- PKT_SIZE);
-
- TAILQ_INSERT_HEAD(&head, pkt_node_rx, pkt_nodes);
+ *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
+ pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++);
+ }
- *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
+ xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
+ xsk_ring_cons__release(&xsk->rx, rcvd);
}
-
- xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
- xsk_ring_cons__release(&xsk->rx, rcvd);
- xsk->rx_npkts += rcvd;
}
-static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
+static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb)
{
- u32 idx = 0;
- unsigned int i;
- bool tx_invalid_test = stat_test_type == STAT_TEST_TX_INVALID;
- u32 len = tx_invalid_test ? XSK_UMEM__DEFAULT_FRAME_SIZE + 1 : PKT_SIZE;
+ struct xsk_socket_info *xsk = ifobject->xsk;
+ u32 i, idx;
- while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size)
- complete_tx_only(xsk, batch_size);
+ while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE)
+ complete_pkts(xsk, BATCH_SIZE);
- for (i = 0; i < batch_size; i++) {
+ for (i = 0; i < BATCH_SIZE; i++) {
struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+ struct pkt *pkt = pkt_generate(ifobject, pkt_nb);
- tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
- tx_desc->len = len;
- }
+ if (!pkt)
+ break;
- xsk_ring_prod__submit(&xsk->tx, batch_size);
- if (!tx_invalid_test) {
- xsk->outstanding_tx += batch_size;
- } else if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
- kick_tx(xsk);
+ tx_desc->addr = pkt->addr;
+ tx_desc->len = pkt->len;
+ pkt_nb++;
}
- *frameptr += batch_size;
- *frameptr %= num_frames;
- complete_tx_only(xsk, batch_size);
-}
-
-static int get_batch_size(int pkt_cnt)
-{
- if (!opt_pkt_count)
- return BATCH_SIZE;
- if (pkt_cnt + BATCH_SIZE <= opt_pkt_count)
- return BATCH_SIZE;
+ xsk_ring_prod__submit(&xsk->tx, i);
+ if (stat_test_type != STAT_TEST_TX_INVALID)
+ xsk->outstanding_tx += i;
+ else if (xsk_ring_prod__needs_wakeup(&xsk->tx))
+ kick_tx(xsk);
+ complete_pkts(xsk, i);
- return opt_pkt_count - pkt_cnt;
+ return i;
}
-static void complete_tx_only_all(struct ifobject *ifobject)
+static void wait_for_tx_completion(struct xsk_socket_info *xsk)
{
- bool pending;
-
- do {
- pending = false;
- if (ifobject->xsk->outstanding_tx) {
- complete_tx_only(ifobject->xsk, BATCH_SIZE);
- pending = !!ifobject->xsk->outstanding_tx;
- }
- } while (pending);
+ while (xsk->outstanding_tx)
+ complete_pkts(xsk, BATCH_SIZE);
}
-static void tx_only_all(struct ifobject *ifobject)
+static void send_pkts(struct ifobject *ifobject)
{
struct pollfd fds[MAX_SOCKS] = { };
- u32 frame_nb = 0;
- int pkt_cnt = 0;
- int ret;
+ u32 pkt_cnt = 0;
fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
fds[0].events = POLLOUT;
- while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
- int batch_size = get_batch_size(pkt_cnt);
+ while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
+ u32 sent;
if (test_type == TEST_TYPE_POLL) {
+ int ret;
+
ret = poll(fds, 1, POLL_TMOUT);
if (ret <= 0)
continue;
@@ -598,78 +678,30 @@ static void tx_only_all(struct ifobject *ifobject)
continue;
}
- tx_only(ifobject->xsk, &frame_nb, batch_size);
- pkt_cnt += batch_size;
+ sent = __send_pkts(ifobject, pkt_cnt);
+ pkt_cnt += sent;
+ usleep(10);
}
- if (opt_pkt_count)
- complete_tx_only_all(ifobject);
+ wait_for_tx_completion(ifobject->xsk);
}
-static void worker_pkt_dump(void)
-{
- struct ethhdr *ethhdr;
- struct iphdr *iphdr;
- struct udphdr *udphdr;
- char s[128];
- int payload;
- void *ptr;
-
- fprintf(stdout, "---------------------------------------\n");
- for (int iter = 0; iter < num_frames - 1; iter++) {
- ptr = pkt_buf[iter]->payload;
- ethhdr = ptr;
- iphdr = ptr + sizeof(*ethhdr);
- udphdr = ptr + sizeof(*ethhdr) + sizeof(*iphdr);
-
- /*extract L2 frame */
- fprintf(stdout, "DEBUG>> L2: dst mac: ");
- for (int i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ethhdr->h_dest[i]);
-
- fprintf(stdout, "\nDEBUG>> L2: src mac: ");
- for (int i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ethhdr->h_source[i]);
-
- /*extract L3 frame */
- fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
- fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
- inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
- fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
- inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
- /*extract L4 frame */
- fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
- fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
- /*extract L5 frame */
- payload = *((uint32_t *)(ptr + PKT_HDR_SIZE));
-
- if (payload == EOT) {
- print_verbose("End-of-transmission frame received\n");
- fprintf(stdout, "---------------------------------------\n");
- break;
- }
- fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
- fprintf(stdout, "---------------------------------------\n");
- }
-}
-
-static void worker_stats_validate(struct ifobject *ifobject)
+static bool rx_stats_are_valid(struct ifobject *ifobject)
{
+ u32 xsk_stat = 0, expected_stat = ifobject->pkt_stream->nb_pkts;
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ int fd = xsk_socket__fd(xsk);
struct xdp_statistics stats;
socklen_t optlen;
int err;
- struct xsk_socket *xsk = stat_test_type == STAT_TEST_TX_INVALID ?
- ifdict[!ifobject->ifdict_index]->xsk->xsk :
- ifobject->xsk->xsk;
- int fd = xsk_socket__fd(xsk);
- unsigned long xsk_stat = 0, expected_stat = opt_pkt_count;
-
- sigvar = 0;
optlen = sizeof(stats);
err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
- if (err)
- return;
+ if (err) {
+ ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+ __func__, -err, strerror(-err));
+ return true;
+ }
if (optlen == sizeof(struct xdp_statistics)) {
switch (stat_test_type) {
@@ -677,8 +709,7 @@ static void worker_stats_validate(struct ifobject *ifobject)
xsk_stat = stats.rx_dropped;
break;
case STAT_TEST_TX_INVALID:
- xsk_stat = stats.tx_invalid_descs;
- break;
+ return true;
case STAT_TEST_RX_FULL:
xsk_stat = stats.rx_ring_full;
expected_stat -= RX_FULL_RXQSIZE;
@@ -691,99 +722,70 @@ static void worker_stats_validate(struct ifobject *ifobject)
}
if (xsk_stat == expected_stat)
- sigvar = 1;
+ return true;
}
+
+ return false;
}
-static void worker_pkt_validate(void)
+static void tx_stats_validate(struct ifobject *ifobject)
{
- u32 payloadseqnum = -2;
- struct iphdr *iphdr;
-
- while (1) {
- pkt_node_rx_q = TAILQ_LAST(&head, head_s);
- if (!pkt_node_rx_q)
- break;
-
- iphdr = (struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr));
-
- /*do not increment pktcounter if !(tos=0x9 and ipv4) */
- if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
- payloadseqnum = *((uint32_t *)(pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE));
- if (debug_pkt_dump && payloadseqnum != EOT) {
- pkt_obj = malloc(sizeof(*pkt_obj));
- pkt_obj->payload = malloc(PKT_SIZE);
- memcpy(pkt_obj->payload, pkt_node_rx_q->pkt_frame, PKT_SIZE);
- pkt_buf[payloadseqnum] = pkt_obj;
- }
-
- if (payloadseqnum == EOT) {
- print_verbose("End-of-transmission frame received: PASS\n");
- sigvar = 1;
- break;
- }
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ int fd = xsk_socket__fd(xsk);
+ struct xdp_statistics stats;
+ socklen_t optlen;
+ int err;
- if (prev_pkt + 1 != payloadseqnum) {
- ksft_test_result_fail
- ("ERROR: [%s] prev_pkt [%d], payloadseqnum [%d]\n",
- __func__, prev_pkt, payloadseqnum);
- ksft_exit_xfail();
- }
+ optlen = sizeof(stats);
+ err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+ if (err) {
+ ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+ __func__, -err, strerror(-err));
+ return;
+ }
- prev_pkt = payloadseqnum;
- pkt_counter++;
- } else {
- ksft_print_msg("Invalid frame received: ");
- ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
- iphdr->tos);
- }
+ if (stats.tx_invalid_descs == ifobject->pkt_stream->nb_pkts)
+ return;
- TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes);
- free(pkt_node_rx_q->pkt_frame);
- free(pkt_node_rx_q);
- pkt_node_rx_q = NULL;
- }
+ ksft_test_result_fail("ERROR: [%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
+ __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
}
static void thread_common_ops(struct ifobject *ifobject, void *bufs)
{
- int umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ u64 umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+ size_t mmap_sz = umem_sz;
int ctr = 0;
int ret;
ifobject->ns_fd = switch_namespace(ifobject->nsname);
if (test_type == TEST_TYPE_BPF_RES)
- umem_sz *= 2;
+ mmap_sz *= 2;
- bufs = mmap(NULL, umem_sz,
- PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
if (bufs == MAP_FAILED)
exit_with_error(errno);
- xsk_configure_umem(ifobject, bufs, 0);
- ifobject->umem = ifobject->umem_arr[0];
- ret = xsk_configure_socket(ifobject, 0);
-
- /* Retry Create Socket if it fails as xsk_socket__create()
- * is asynchronous
- */
- while (ret && ctr < SOCK_RECONF_CTR) {
- xsk_configure_umem(ifobject, bufs, 0);
+ while (ctr++ < SOCK_RECONF_CTR) {
+ xsk_configure_umem(ifobject, bufs, umem_sz, 0);
ifobject->umem = ifobject->umem_arr[0];
ret = xsk_configure_socket(ifobject, 0);
+ if (!ret)
+ break;
+
+ /* Retry Create Socket if it fails as xsk_socket__create() is asynchronous */
usleep(USLEEP_MAX);
- ctr++;
+ if (ctr >= SOCK_RECONF_CTR)
+ exit_with_error(-ret);
}
- if (ctr >= SOCK_RECONF_CTR)
- exit_with_error(ret);
-
ifobject->umem = ifobject->umem_arr[0];
ifobject->xsk = ifobject->xsk_arr[0];
if (test_type == TEST_TYPE_BPF_RES) {
- xsk_configure_umem(ifobject, (u8 *)bufs + (umem_sz / 2), 1);
+ xsk_configure_umem(ifobject, (u8 *)bufs + umem_sz, umem_sz, 1);
ifobject->umem = ifobject->umem_arr[1];
ret = xsk_configure_socket(ifobject, 1);
}
@@ -809,33 +811,18 @@ static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
static void *worker_testapp_validate_tx(void *arg)
{
- struct udphdr *udp_hdr =
- (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr));
- struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr));
- struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
struct ifobject *ifobject = (struct ifobject *)arg;
- struct generic_data data;
void *bufs = NULL;
if (!second_step)
thread_common_ops(ifobject, bufs);
- for (int i = 0; i < num_frames; i++) {
- /*send EOT frame */
- if (i == (num_frames - 1))
- data.seqnum = -1;
- else
- data.seqnum = i;
- gen_udp_hdr(&data, ifobject, udp_hdr);
- gen_ip_hdr(ifobject, ip_hdr);
- gen_udp_csum(udp_hdr, ip_hdr);
- gen_eth_hdr(ifobject, eth_hdr);
- gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
- }
+ print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts,
+ ifobject->ifname);
+ send_pkts(ifobject);
- print_verbose("Sending %d packets on interface %s\n",
- (opt_pkt_count - 1), ifobject->ifname);
- tx_only_all(ifobject);
+ if (stat_test_type == STAT_TEST_TX_INVALID)
+ tx_stats_validate(ifobject);
testapp_cleanup_xsk_res(ifobject);
pthread_exit(NULL);
@@ -853,31 +840,16 @@ static void *worker_testapp_validate_rx(void *arg)
if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
xsk_populate_fill_ring(ifobject->umem);
- TAILQ_INIT(&head);
- if (debug_pkt_dump) {
- pkt_buf = calloc(num_frames, sizeof(*pkt_buf));
- if (!pkt_buf)
- exit_with_error(errno);
- }
-
fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
fds[0].events = POLLIN;
pthread_barrier_wait(&barr);
- while (1) {
- if (test_type != TEST_TYPE_STATS) {
- rx_pkt(ifobject->xsk, fds);
- worker_pkt_validate();
- } else {
- worker_stats_validate(ifobject);
- }
- if (sigvar)
- break;
- }
-
- print_verbose("Received %d packets on interface %s\n",
- pkt_counter, ifobject->ifname);
+ if (test_type == TEST_TYPE_STATS)
+ while (!rx_stats_are_valid(ifobject))
+ continue;
+ else
+ receive_pkts(ifobject->pkt_stream, ifobject->xsk, fds);
if (test_type == TEST_TYPE_TEARDOWN)
print_verbose("Destroying socket\n");
@@ -890,10 +862,18 @@ static void testapp_validate(void)
{
bool bidi = test_type == TEST_TYPE_BIDI;
bool bpf = test_type == TEST_TYPE_BPF_RES;
+ struct pkt_stream *pkt_stream;
if (pthread_barrier_init(&barr, NULL, 2))
exit_with_error(errno);
+ if (stat_test_type == STAT_TEST_TX_INVALID)
+ pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE);
+ else
+ pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, PKT_SIZE);
+ ifdict_tx->pkt_stream = pkt_stream;
+ ifdict_rx->pkt_stream = pkt_stream;
+
/*Spawn RX thread */
pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx);
@@ -907,15 +887,6 @@ static void testapp_validate(void)
pthread_join(t1, NULL);
pthread_join(t0, NULL);
- if (debug_pkt_dump && test_type != TEST_TYPE_STATS) {
- worker_pkt_dump();
- for (int iter = 0; iter < num_frames - 1; iter++) {
- free(pkt_buf[iter]->payload);
- free(pkt_buf[iter]);
- }
- free(pkt_buf);
- }
-
if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS))
print_ksft_result();
}
@@ -925,9 +896,6 @@ static void testapp_teardown(void)
int i;
for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
- pkt_counter = 0;
- prev_pkt = -1;
- sigvar = 0;
print_verbose("Creating socket\n");
testapp_validate();
}
@@ -953,9 +921,6 @@ static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2)
static void testapp_bidi(void)
{
for (int i = 0; i < MAX_BIDI_ITER; i++) {
- pkt_counter = 0;
- prev_pkt = -1;
- sigvar = 0;
print_verbose("Creating socket\n");
testapp_validate();
if (!second_step) {
@@ -987,9 +952,6 @@ static void testapp_bpf_res(void)
int i;
for (i = 0; i < MAX_BPF_ITER; i++) {
- pkt_counter = 0;
- prev_pkt = -1;
- sigvar = 0;
print_verbose("Creating socket\n");
testapp_validate();
if (!second_step)
@@ -1017,6 +979,8 @@ static void testapp_stats(void)
case STAT_TEST_RX_FULL:
rxqsize = RX_FULL_RXQSIZE;
break;
+ case STAT_TEST_TX_INVALID:
+ continue;
default:
break;
}
@@ -1062,10 +1026,7 @@ static void run_pkt_test(int mode, int type)
/* reset defaults after potential previous test */
xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
- pkt_counter = 0;
second_step = 0;
- prev_pkt = -1;
- sigvar = 0;
stat_test_type = -1;
rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
@@ -1102,62 +1063,70 @@ static void run_pkt_test(int mode, int type)
}
}
+static struct ifobject *ifobject_create(void)
+{
+ struct ifobject *ifobj;
+
+ ifobj = calloc(1, sizeof(struct ifobject));
+ if (!ifobj)
+ return NULL;
+
+ ifobj->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
+ if (!ifobj->xsk_arr)
+ goto out_xsk_arr;
+
+ ifobj->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
+ if (!ifobj->umem_arr)
+ goto out_umem_arr;
+
+ return ifobj;
+
+out_umem_arr:
+ free(ifobj->xsk_arr);
+out_xsk_arr:
+ free(ifobj);
+ return NULL;
+}
+
+static void ifobject_delete(struct ifobject *ifobj)
+{
+ free(ifobj->umem_arr);
+ free(ifobj->xsk_arr);
+ free(ifobj);
+}
+
int main(int argc, char **argv)
{
struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
- bool failure = false;
int i, j;
if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
exit_with_error(errno);
- for (int i = 0; i < MAX_INTERFACES; i++) {
- ifdict[i] = malloc(sizeof(struct ifobject));
+ for (i = 0; i < MAX_INTERFACES; i++) {
+ ifdict[i] = ifobject_create();
if (!ifdict[i])
- exit_with_error(errno);
-
- ifdict[i]->ifdict_index = i;
- ifdict[i]->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
- if (!ifdict[i]->xsk_arr) {
- failure = true;
- goto cleanup;
- }
- ifdict[i]->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
- if (!ifdict[i]->umem_arr) {
- failure = true;
- goto cleanup;
- }
+ exit_with_error(ENOMEM);
}
setlocale(LC_ALL, "");
parse_command_line(argc, argv);
- num_frames = ++opt_pkt_count;
-
- init_iface(ifdict[0], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
- init_iface(ifdict[1], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
+ init_iface(ifdict[tx], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
+ init_iface(ifdict[rx], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
- for (i = 0; i < TEST_MODE_MAX; i++) {
- for (j = 0; j < TEST_TYPE_MAX; j++)
+ for (i = 0; i < TEST_MODE_MAX; i++)
+ for (j = 0; j < TEST_TYPE_MAX; j++) {
run_pkt_test(i, j);
- }
-
-cleanup:
- for (int i = 0; i < MAX_INTERFACES; i++) {
- if (ifdict[i]->ns_fd != -1)
- close(ifdict[i]->ns_fd);
- free(ifdict[i]->xsk_arr);
- free(ifdict[i]->umem_arr);
- free(ifdict[i]);
- }
+ usleep(USLEEP_MAX);
+ }
- if (failure)
- exit_with_error(errno);
+ for (i = 0; i < MAX_INTERFACES; i++)
+ ifobject_delete(ifdict[i]);
ksft_exit_pass();
-
return 0;
}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 6c428b276ab6..7e49b9fbe25e 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -34,28 +34,23 @@
#define IP_PKT_TOS 0x9
#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
-#define EOT (-1)
-#define USLEEP_MAX 200000
+#define USLEEP_MAX 10000
#define SOCK_RECONF_CTR 10
-#define BATCH_SIZE 64
+#define BATCH_SIZE 8
#define POLL_TMOUT 1000
-#define DEFAULT_PKT_CNT 10000
+#define DEFAULT_PKT_CNT (4 * 1024)
#define RX_FULL_RXQSIZE 32
+#define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1)
#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
-typedef __u32 u32;
-typedef __u16 u16;
-typedef __u8 u8;
-
-enum TEST_MODES {
- TEST_MODE_UNCONFIGURED = -1,
+enum test_mode {
TEST_MODE_SKB,
TEST_MODE_DRV,
TEST_MODE_MAX
};
-enum TEST_TYPES {
+enum test_type {
TEST_TYPE_NOPOLL,
TEST_TYPE_POLL,
TEST_TYPE_TEARDOWN,
@@ -65,7 +60,7 @@ enum TEST_TYPES {
TEST_TYPE_MAX
};
-enum STAT_TEST_TYPES {
+enum stat_test_type {
STAT_TEST_RX_DROPPED,
STAT_TEST_TX_INVALID,
STAT_TEST_RX_FULL,
@@ -73,21 +68,16 @@ enum STAT_TEST_TYPES {
STAT_TEST_TYPE_MAX
};
-static int configured_mode = TEST_MODE_UNCONFIGURED;
-static u8 debug_pkt_dump;
-static u32 num_frames;
+static int configured_mode;
+static bool opt_pkt_dump;
+static u32 num_frames = DEFAULT_PKT_CNT / 4;
static bool second_step;
static int test_type;
-static int opt_pkt_count;
-static u8 opt_verbose;
+static bool opt_verbose;
static u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static u32 xdp_bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
-static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
-static u32 pkt_counter;
-static long prev_pkt = -1;
-static int sigvar;
static int stat_test_type;
static u32 rxqsize;
static u32 frame_headroom;
@@ -104,10 +94,6 @@ struct xsk_socket_info {
struct xsk_ring_prod tx;
struct xsk_umem_info *umem;
struct xsk_socket *xsk;
- unsigned long rx_npkts;
- unsigned long tx_npkts;
- unsigned long prev_rx_npkts;
- unsigned long prev_tx_npkts;
u32 outstanding_tx;
};
@@ -118,8 +104,15 @@ struct flow_vector {
} vector;
};
-struct generic_data {
- u32 seqnum;
+struct pkt {
+ u64 addr;
+ u32 len;
+ u32 payload;
+};
+
+struct pkt_stream {
+ u32 nb_pkts;
+ struct pkt *pkts;
};
struct ifobject {
@@ -131,8 +124,8 @@ struct ifobject {
struct xsk_umem_info *umem;
void *(*func_ptr)(void *arg);
struct flow_vector fv;
+ struct pkt_stream *pkt_stream;
int ns_fd;
- int ifdict_index;
u32 dst_ip;
u32 src_ip;
u16 src_port;
@@ -149,18 +142,4 @@ static struct ifobject *ifdict_tx;
pthread_barrier_t barr;
pthread_t t0, t1;
-TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
-struct head_s *head_p;
-struct pkt {
- char *pkt_frame;
-
- TAILQ_ENTRY(pkt) pkt_nodes;
-} *pkt_node_rx, *pkt_node_rx_q;
-
-struct pkt_frame {
- char *payload;
-} *pkt_obj;
-
-struct pkt_frame **pkt_buf;
-
#endif /* XDPXCEIVER_H */
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index dac1c5f78752..bf29d2549bee 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -8,14 +8,8 @@ ksft_xfail=2
ksft_xpass=3
ksft_skip=4
-GREEN='\033[0;92m'
-YELLOW='\033[0;93m'
-RED='\033[0;31m'
-NC='\033[0m'
-STACK_LIM=131072
SPECFILE=veth.spec
XSKOBJ=xdpxceiver
-NUMPKTS=10000
validate_root_exec()
{
@@ -50,22 +44,12 @@ validate_veth_spec_file()
test_status()
{
statusval=$1
- if [ -n "${colorconsole+set}" ]; then
- if [ $statusval -eq 2 ]; then
- echo -e "${YELLOW}$2${NC}: [ ${RED}FAIL${NC} ]"
- elif [ $statusval -eq 1 ]; then
- echo -e "${YELLOW}$2${NC}: [ ${RED}SKIPPED${NC} ]"
- elif [ $statusval -eq 0 ]; then
- echo -e "${YELLOW}$2${NC}: [ ${GREEN}PASS${NC} ]"
- fi
- else
- if [ $statusval -eq 2 ]; then
- echo -e "$2: [ FAIL ]"
- elif [ $statusval -eq 1 ]; then
- echo -e "$2: [ SKIPPED ]"
- elif [ $statusval -eq 0 ]; then
- echo -e "$2: [ PASS ]"
- fi
+ if [ $statusval -eq 2 ]; then
+ echo -e "$2: [ FAIL ]"
+ elif [ $statusval -eq 1 ]; then
+ echo -e "$2: [ SKIPPED ]"
+ elif [ $statusval -eq 0 ]; then
+ echo -e "$2: [ PASS ]"
fi
}
@@ -107,5 +91,5 @@ validate_ip_utility()
execxdpxceiver()
{
- ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} -C ${NUMPKTS} ${VERBOSE_ARG} ${DUMP_PKTS_ARG}
+ ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${VERBOSE_ARG} ${DUMP_PKTS_ARG}
}
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 477cbb042f5b..0315955ff0f4 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -62,6 +62,9 @@ static int __do_binderfs_test(struct __test_metadata *_metadata)
struct binder_version version = { 0 };
char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX",
device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME];
+ static const char * const binder_features[] = {
+ "oneway_spam_detection",
+ };
change_mountns(_metadata);
@@ -150,6 +153,20 @@ static int __do_binderfs_test(struct __test_metadata *_metadata)
}
/* success: binder-control device removal failed as expected */
+
+ for (int i = 0; i < ARRAY_SIZE(binder_features); i++) {
+ snprintf(device_path, sizeof(device_path), "%s/features/%s",
+ binderfs_mntpt, binder_features[i]);
+ fd = open(device_path, O_CLOEXEC | O_RDONLY);
+ EXPECT_GE(fd, 0) {
+ TH_LOG("%s - Failed to open binder feature: %s",
+ strerror(errno), binder_features[i]);
+ goto umount;
+ }
+ close(fd);
+ }
+
+ /* success: binder feature files found */
result = 0;
umount:
diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index 013446e87f1f..38edea25631b 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -6,3 +6,5 @@ CONFIG_HARDENED_USERCOPY=y
# CONFIG_HARDENED_USERCOPY_FALLBACK is not set
CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
+CONFIG_UBSAN_BOUNDS=y
+CONFIG_UBSAN_TRAP=y
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 846cfd508d3c..09f7bfa383cc 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -7,6 +7,7 @@ EXCEPTION
#EXHAUST_STACK Corrupts memory on failure
#CORRUPT_STACK Crashes entire system on success
#CORRUPT_STACK_STRONG Crashes entire system on success
+ARRAY_BOUNDS
CORRUPT_LIST_ADD list_add corruption
CORRUPT_LIST_DEL list_del corruption
STACK_GUARD_PAGE_LEADING
@@ -72,4 +73,6 @@ USERCOPY_KERNEL
STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
CFI_FORWARD_PROTO
FORTIFIED_STRSCPY
+FORTIFIED_OBJECT
+FORTIFIED_SUBOBJECT
PPC_SLB_MULTIHIT Recovered
diff --git a/tools/testing/selftests/move_mount_set_group/.gitignore b/tools/testing/selftests/move_mount_set_group/.gitignore
new file mode 100644
index 000000000000..f5e339268720
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/.gitignore
@@ -0,0 +1 @@
+move_mount_set_group_test
diff --git a/tools/testing/selftests/move_mount_set_group/Makefile b/tools/testing/selftests/move_mount_set_group/Makefile
new file mode 100644
index 000000000000..80c2d86812b0
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for mount selftests.
+CFLAGS = -g -I../../../../usr/include/ -Wall -O2
+
+TEST_GEN_FILES += move_mount_set_group_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/move_mount_set_group/config b/tools/testing/selftests/move_mount_set_group/config
new file mode 100644
index 000000000000..416bd53ce982
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
new file mode 100644
index 000000000000..860198f83a53
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <sys/syscall.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef CLONE_NEWNS
+#define CLONE_NEWNS 0x00020000
+#endif
+
+#ifndef CLONE_NEWUSER
+#define CLONE_NEWUSER 0x10000000
+#endif
+
+#ifndef MS_SHARED
+#define MS_SHARED (1 << 20)
+#endif
+
+#ifndef MS_PRIVATE
+#define MS_PRIVATE (1<<18)
+#endif
+
+#ifndef MOVE_MOUNT_SET_GROUP
+#define MOVE_MOUNT_SET_GROUP 0x00000100
+#endif
+
+#ifndef MOVE_MOUNT_F_EMPTY_PATH
+#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004
+#endif
+
+#ifndef MOVE_MOUNT_T_EMPTY_PATH
+#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040
+#endif
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static int write_file(const char *path, const void *buf, size_t count)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
+ if (fd < 0)
+ return -1;
+
+ ret = write_nointr(fd, buf, count);
+ close(fd);
+ if (ret < 0 || (size_t)ret != count)
+ return -1;
+
+ return 0;
+}
+
+static int create_and_enter_userns(void)
+{
+ uid_t uid;
+ gid_t gid;
+ char map[100];
+
+ uid = getuid();
+ gid = getgid();
+
+ if (unshare(CLONE_NEWUSER))
+ return -1;
+
+ if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
+ errno != ENOENT)
+ return -1;
+
+ snprintf(map, sizeof(map), "0 %d 1", uid);
+ if (write_file("/proc/self/uid_map", map, strlen(map)))
+ return -1;
+
+
+ snprintf(map, sizeof(map), "0 %d 1", gid);
+ if (write_file("/proc/self/gid_map", map, strlen(map)))
+ return -1;
+
+ if (setgid(0))
+ return -1;
+
+ if (setuid(0))
+ return -1;
+
+ return 0;
+}
+
+static int prepare_unpriv_mountns(void)
+{
+ if (create_and_enter_userns())
+ return -1;
+
+ if (unshare(CLONE_NEWNS))
+ return -1;
+
+ if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+ return -1;
+
+ return 0;
+}
+
+static char *get_field(char *src, int nfields)
+{
+ int i;
+ char *p = src;
+
+ for (i = 0; i < nfields; i++) {
+ while (*p && *p != ' ' && *p != '\t')
+ p++;
+
+ if (!*p)
+ break;
+
+ p++;
+ }
+
+ return p;
+}
+
+static void null_endofword(char *word)
+{
+ while (*word && *word != ' ' && *word != '\t')
+ word++;
+ *word = '\0';
+}
+
+static bool is_shared_mount(const char *path)
+{
+ size_t len = 0;
+ char *line = NULL;
+ FILE *f = NULL;
+
+ f = fopen("/proc/self/mountinfo", "re");
+ if (!f)
+ return false;
+
+ while (getline(&line, &len, f) != -1) {
+ char *opts, *target;
+
+ target = get_field(line, 4);
+ if (!target)
+ continue;
+
+ opts = get_field(target, 2);
+ if (!opts)
+ continue;
+
+ null_endofword(target);
+
+ if (strcmp(target, path) != 0)
+ continue;
+
+ null_endofword(opts);
+ if (strstr(opts, "shared:"))
+ return true;
+ }
+
+ free(line);
+ fclose(f);
+
+ return false;
+}
+
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
+#endif
+
+#define SET_GROUP_FROM "/tmp/move_mount_set_group_supported_from"
+#define SET_GROUP_TO "/tmp/move_mount_set_group_supported_to"
+
+static int move_mount_set_group_supported(void)
+{
+ int ret;
+
+ if (mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"))
+ return -1;
+
+ if (mount(NULL, "/tmp", NULL, MS_PRIVATE, 0))
+ return -1;
+
+ if (mkdir(SET_GROUP_FROM, 0777))
+ return -1;
+
+ if (mkdir(SET_GROUP_TO, 0777))
+ return -1;
+
+ if (mount("testing", SET_GROUP_FROM, "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"))
+ return -1;
+
+ if (mount(SET_GROUP_FROM, SET_GROUP_TO, NULL, MS_BIND, NULL))
+ return -1;
+
+ if (mount(NULL, SET_GROUP_FROM, NULL, MS_SHARED, 0))
+ return -1;
+
+ ret = syscall(SYS_move_mount, AT_FDCWD, SET_GROUP_FROM,
+ AT_FDCWD, SET_GROUP_TO, MOVE_MOUNT_SET_GROUP);
+ umount2("/tmp", MNT_DETACH);
+
+ return ret < 0 ? false : true;
+}
+
+FIXTURE(move_mount_set_group) {
+};
+
+#define SET_GROUP_A "/tmp/A"
+
+FIXTURE_SETUP(move_mount_set_group)
+{
+ int ret;
+
+ ASSERT_EQ(prepare_unpriv_mountns(), 0);
+
+ ret = move_mount_set_group_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
+
+ umount2("/tmp", MNT_DETACH);
+
+ ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir(SET_GROUP_A, 0777), 0);
+
+ ASSERT_EQ(mount("testing", SET_GROUP_A, "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+}
+
+FIXTURE_TEARDOWN(move_mount_set_group)
+{
+ int ret;
+
+ ret = move_mount_set_group_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
+
+ umount2("/tmp", MNT_DETACH);
+}
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+{
+ void *stack;
+
+ stack = malloc(__STACK_SIZE);
+ if (!stack)
+ return -ENOMEM;
+
+#ifdef __ia64__
+ return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#else
+ return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#endif
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+struct child_args {
+ int unsfd;
+ int mntnsfd;
+ bool shared;
+ int mntfd;
+};
+
+static int get_nestedns_mount_cb(void *data)
+{
+ struct child_args *ca = (struct child_args *)data;
+ int ret;
+
+ ret = prepare_unpriv_mountns();
+ if (ret)
+ return 1;
+
+ if (ca->shared) {
+ ret = mount(NULL, SET_GROUP_A, NULL, MS_SHARED, 0);
+ if (ret)
+ return 1;
+ }
+
+ ret = open("/proc/self/ns/user", O_RDONLY);
+ if (ret < 0)
+ return 1;
+ ca->unsfd = ret;
+
+ ret = open("/proc/self/ns/mnt", O_RDONLY);
+ if (ret < 0)
+ return 1;
+ ca->mntnsfd = ret;
+
+ ret = open(SET_GROUP_A, O_RDONLY);
+ if (ret < 0)
+ return 1;
+ ca->mntfd = ret;
+
+ return 0;
+}
+
+TEST_F(move_mount_set_group, complex_sharing_copying)
+{
+ struct child_args ca_from = {
+ .shared = true,
+ };
+ struct child_args ca_to = {
+ .shared = false,
+ };
+ pid_t pid;
+ int ret;
+
+ ret = move_mount_set_group_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
+
+ pid = do_clone(get_nestedns_mount_cb, (void *)&ca_from, CLONE_VFORK |
+ CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
+ ASSERT_EQ(wait_for_pid(pid), 0);
+
+ pid = do_clone(get_nestedns_mount_cb, (void *)&ca_to, CLONE_VFORK |
+ CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
+ ASSERT_EQ(wait_for_pid(pid), 0);
+
+ ASSERT_EQ(syscall(SYS_move_mount, ca_from.mntfd, "",
+ ca_to.mntfd, "", MOVE_MOUNT_SET_GROUP
+ | MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH),
+ 0);
+
+ ASSERT_EQ(setns(ca_to.mntnsfd, CLONE_NEWNS), 0);
+ ASSERT_EQ(is_shared_mount(SET_GROUP_A), 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c
index 57b505cb1561..e1bf55dabdf6 100644
--- a/tools/testing/selftests/nci/nci_dev.c
+++ b/tools/testing/selftests/nci/nci_dev.c
@@ -57,6 +57,29 @@ const __u8 nci_init_rsp_v2[] = {0x40, 0x01, 0x1c, 0x00, 0x1a, 0x7e, 0x06,
const __u8 nci_rf_disc_map_rsp[] = {0x41, 0x00, 0x01, 0x00};
const __u8 nci_rf_disc_rsp[] = {0x41, 0x03, 0x01, 0x00};
const __u8 nci_rf_deact_rsp[] = {0x41, 0x06, 0x01, 0x00};
+const __u8 nci_rf_deact_ntf[] = {0x61, 0x06, 0x02, 0x00, 0x00};
+const __u8 nci_rf_activate_ntf[] = {0x61, 0x05, 0x1D, 0x01, 0x02, 0x04, 0x00,
+ 0xFF, 0xFF, 0x0C, 0x44, 0x03, 0x07, 0x04,
+ 0x62, 0x26, 0x11, 0x80, 0x1D, 0x80, 0x01,
+ 0x20, 0x00, 0x00, 0x00, 0x06, 0x05, 0x75,
+ 0x77, 0x81, 0x02, 0x80};
+const __u8 nci_t4t_select_cmd[] = {0x00, 0x00, 0x0C, 0x00, 0xA4, 0x04, 0x00,
+ 0x07, 0xD2, 0x76, 0x00, 0x00, 0x85, 0x01, 0x01};
+const __u8 nci_t4t_select_cmd2[] = {0x00, 0x00, 0x07, 0x00, 0xA4, 0x00, 0x0C, 0x02,
+ 0xE1, 0x03};
+const __u8 nci_t4t_select_cmd3[] = {0x00, 0x00, 0x07, 0x00, 0xA4, 0x00, 0x0C, 0x02,
+ 0xE1, 0x04};
+const __u8 nci_t4t_read_cmd[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x00, 0x0F};
+const __u8 nci_t4t_read_rsp[] = {0x00, 0x00, 0x11, 0x00, 0x0F, 0x20, 0x00, 0x3B,
+ 0x00, 0x34, 0x04, 0x06, 0xE1, 0x04, 0x08, 0x00,
+ 0x00, 0x00, 0x90, 0x00};
+const __u8 nci_t4t_read_cmd2[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x00, 0x02};
+const __u8 nci_t4t_read_rsp2[] = {0x00, 0x00, 0x04, 0x00, 0x0F, 0x90, 0x00};
+const __u8 nci_t4t_read_cmd3[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x02, 0x0F};
+const __u8 nci_t4t_read_rsp3[] = {0x00, 0x00, 0x11, 0xD1, 0x01, 0x0B, 0x54, 0x02,
+ 0x65, 0x6E, 0x4E, 0x46, 0x43, 0x20, 0x54, 0x45,
+ 0x53, 0x54, 0x90, 0x00};
+const __u8 nci_t4t_rsp_ok[] = {0x00, 0x00, 0x02, 0x90, 0x00};
struct msgtemplate {
struct nlmsghdr n;
@@ -87,7 +110,7 @@ error:
static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
__u8 genl_cmd, int nla_num, __u16 nla_type[],
- void *nla_data[], int nla_len[])
+ void *nla_data[], int nla_len[], __u16 flags)
{
struct sockaddr_nl nladdr;
struct msgtemplate msg;
@@ -98,7 +121,7 @@ static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
msg.n.nlmsg_type = nlmsg_type;
- msg.n.nlmsg_flags = NLM_F_REQUEST;
+ msg.n.nlmsg_flags = flags;
msg.n.nlmsg_seq = 0;
msg.n.nlmsg_pid = nlmsg_pid;
msg.g.cmd = genl_cmd;
@@ -110,11 +133,11 @@ static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
na->nla_type = nla_type[cnt];
na->nla_len = nla_len[cnt] + NLA_HDRLEN;
- if (nla_len > 0)
+ if (nla_len[cnt] > 0)
memcpy(NLA_DATA(na), nla_data[cnt], nla_len[cnt]);
- msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
- prv_len = na->nla_len;
+ prv_len = NLA_ALIGN(nla_len[cnt]) + NLA_HDRLEN;
+ msg.n.nlmsg_len += prv_len;
}
buf = (char *)&msg;
@@ -146,11 +169,11 @@ static int send_get_nfc_family(int sd, __u32 pid)
nla_get_family_data = family_name;
return send_cmd_mt_nla(sd, GENL_ID_CTRL, pid, CTRL_CMD_GETFAMILY,
- 1, &nla_get_family_type,
- &nla_get_family_data, &nla_get_family_len);
+ 1, &nla_get_family_type, &nla_get_family_data,
+ &nla_get_family_len, NLM_F_REQUEST);
}
-static int get_family_id(int sd, __u32 pid)
+static int get_family_id(int sd, __u32 pid, __u32 *event_group)
{
struct {
struct nlmsghdr n;
@@ -158,8 +181,9 @@ static int get_family_id(int sd, __u32 pid)
char buf[512];
} ans;
struct nlattr *na;
- int rep_len;
+ int resp_len;
__u16 id;
+ int len;
int rc;
rc = send_get_nfc_family(sd, pid);
@@ -167,17 +191,49 @@ static int get_family_id(int sd, __u32 pid)
if (rc < 0)
return 0;
- rep_len = recv(sd, &ans, sizeof(ans), 0);
+ resp_len = recv(sd, &ans, sizeof(ans), 0);
- if (ans.n.nlmsg_type == NLMSG_ERROR || rep_len < 0 ||
- !NLMSG_OK(&ans.n, rep_len))
+ if (ans.n.nlmsg_type == NLMSG_ERROR || resp_len < 0 ||
+ !NLMSG_OK(&ans.n, resp_len))
return 0;
+ len = 0;
+ resp_len = GENLMSG_PAYLOAD(&ans.n);
na = (struct nlattr *)GENLMSG_DATA(&ans);
- na = (struct nlattr *)((char *)na + NLA_ALIGN(na->nla_len));
- if (na->nla_type == CTRL_ATTR_FAMILY_ID)
- id = *(__u16 *)NLA_DATA(na);
+ while (len < resp_len) {
+ len += NLA_ALIGN(na->nla_len);
+ if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
+ id = *(__u16 *)NLA_DATA(na);
+ } else if (na->nla_type == CTRL_ATTR_MCAST_GROUPS) {
+ struct nlattr *nested_na;
+ struct nlattr *group_na;
+ int group_attr_len;
+ int group_attr;
+
+ nested_na = (struct nlattr *)((char *)na + NLA_HDRLEN);
+ group_na = (struct nlattr *)((char *)nested_na + NLA_HDRLEN);
+ group_attr_len = 0;
+
+ for (group_attr = CTRL_ATTR_MCAST_GRP_UNSPEC;
+ group_attr < CTRL_ATTR_MCAST_GRP_MAX; group_attr++) {
+ if (group_na->nla_type == CTRL_ATTR_MCAST_GRP_ID) {
+ *event_group = *(__u32 *)((char *)group_na +
+ NLA_HDRLEN);
+ break;
+ }
+
+ group_attr_len += NLA_ALIGN(group_na->nla_len) +
+ NLA_HDRLEN;
+ if (group_attr_len >= nested_na->nla_len)
+ break;
+
+ group_na = (struct nlattr *)((char *)group_na +
+ NLA_ALIGN(group_na->nla_len));
+ }
+ }
+ na = (struct nlattr *)(GENLMSG_DATA(&ans) + len);
+ }
return id;
}
@@ -189,12 +245,12 @@ static int send_cmd_with_idx(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
int nla_len = 4;
return send_cmd_mt_nla(sd, nlmsg_type, nlmsg_pid, genl_cmd, 1,
- &nla_type, &nla_data, &nla_len);
+ &nla_type, &nla_data, &nla_len, NLM_F_REQUEST);
}
static int get_nci_devid(int sd, __u16 fid, __u32 pid, int dev_id, struct msgtemplate *msg)
{
- int rc, rep_len;
+ int rc, resp_len;
rc = send_cmd_with_idx(sd, fid, pid, NFC_CMD_GET_DEVICE, dev_id);
if (rc < 0) {
@@ -202,14 +258,14 @@ static int get_nci_devid(int sd, __u16 fid, __u32 pid, int dev_id, struct msgtem
goto error;
}
- rep_len = recv(sd, msg, sizeof(*msg), 0);
- if (rep_len < 0) {
+ resp_len = recv(sd, msg, sizeof(*msg), 0);
+ if (resp_len < 0) {
rc = -2;
goto error;
}
if (msg->n.nlmsg_type == NLMSG_ERROR ||
- !NLMSG_OK(&msg->n, rep_len)) {
+ !NLMSG_OK(&msg->n, resp_len)) {
rc = -3;
goto error;
}
@@ -222,21 +278,21 @@ error:
static __u8 get_dev_enable_state(struct msgtemplate *msg)
{
struct nlattr *na;
- int rep_len;
+ int resp_len;
int len;
- rep_len = GENLMSG_PAYLOAD(&msg->n);
+ resp_len = GENLMSG_PAYLOAD(&msg->n);
na = (struct nlattr *)GENLMSG_DATA(msg);
len = 0;
- while (len < rep_len) {
+ while (len < resp_len) {
len += NLA_ALIGN(na->nla_len);
if (na->nla_type == NFC_ATTR_DEVICE_POWERED)
return *(char *)NLA_DATA(na);
na = (struct nlattr *)(GENLMSG_DATA(msg) + len);
}
- return rep_len;
+ return resp_len;
}
FIXTURE(NCI) {
@@ -270,8 +326,7 @@ static void *virtual_dev_open(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_reset_cmd))
@@ -280,8 +335,7 @@ static void *virtual_dev_open(void *data)
goto error;
write(dev_fd, nci_reset_rsp, sizeof(nci_reset_rsp));
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_init_cmd))
@@ -290,8 +344,7 @@ static void *virtual_dev_open(void *data)
goto error;
write(dev_fd, nci_init_rsp, sizeof(nci_init_rsp));
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_rf_disc_map_cmd))
@@ -313,8 +366,7 @@ static void *virtual_dev_open_v2(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_reset_cmd))
@@ -324,8 +376,7 @@ static void *virtual_dev_open_v2(void *data)
write(dev_fd, nci_reset_rsp_v2, sizeof(nci_reset_rsp_v2));
write(dev_fd, nci_reset_ntf, sizeof(nci_reset_ntf));
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_init_cmd_v2))
@@ -334,8 +385,7 @@ static void *virtual_dev_open_v2(void *data)
goto error;
write(dev_fd, nci_init_rsp_v2, sizeof(nci_init_rsp_v2));
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_rf_disc_map_cmd))
@@ -353,6 +403,7 @@ FIXTURE_SETUP(NCI)
{
struct msgtemplate msg;
pthread_t thread_t;
+ __u32 event_group;
int status;
int rc;
@@ -364,12 +415,16 @@ FIXTURE_SETUP(NCI)
ASSERT_NE(self->sd, -1);
self->pid = getpid();
- self->fid = get_family_id(self->sd, self->pid);
+ self->fid = get_family_id(self->sd, self->pid, &event_group);
ASSERT_NE(self->fid, -1);
self->virtual_nci_fd = open("/dev/virtual_nci", O_RDWR);
ASSERT_GT(self->virtual_nci_fd, -1);
+ rc = setsockopt(self->sd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &event_group,
+ sizeof(event_group));
+ ASSERT_NE(rc, -1);
+
rc = ioctl(self->virtual_nci_fd, IOCTL_GET_NCIDEV_IDX, &self->dev_idex);
ASSERT_EQ(rc, 0);
@@ -402,8 +457,7 @@ static void *virtual_deinit(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_reset_cmd))
@@ -425,8 +479,7 @@ static void *virtual_deinit_v2(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_reset_cmd))
@@ -489,16 +542,14 @@ static void *virtual_poll_start(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_rf_discovery_cmd))
goto error;
if (memcmp(nci_rf_discovery_cmd, buf, len))
goto error;
- write(dev_fd, nci_rf_disc_rsp, sizeof(nci_rf_disc_rsp))
- ;
+ write(dev_fd, nci_rf_disc_rsp, sizeof(nci_rf_disc_rsp));
return (void *)0;
error:
@@ -513,8 +564,7 @@ static void *virtual_poll_stop(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_rf_deact_cmd))
@@ -528,38 +578,282 @@ error:
return (void *)-1;
}
-TEST_F(NCI, start_poll)
+int start_polling(int dev_idx, int proto, int virtual_fd, int sd, int fid, int pid)
{
__u16 nla_start_poll_type[2] = {NFC_ATTR_DEVICE_INDEX,
NFC_ATTR_PROTOCOLS};
- void *nla_start_poll_data[2] = {&self->dev_idex, &self->proto};
+ void *nla_start_poll_data[2] = {&dev_idx, &proto};
int nla_start_poll_len[2] = {4, 4};
pthread_t thread_t;
int status;
int rc;
rc = pthread_create(&thread_t, NULL, virtual_poll_start,
- (void *)&self->virtual_nci_fd);
- ASSERT_GT(rc, -1);
+ (void *)&virtual_fd);
+ if (rc < 0)
+ return rc;
- rc = send_cmd_mt_nla(self->sd, self->fid, self->pid,
- NFC_CMD_START_POLL, 2, nla_start_poll_type,
- nla_start_poll_data, nla_start_poll_len);
- EXPECT_EQ(rc, 0);
+ rc = send_cmd_mt_nla(sd, fid, pid, NFC_CMD_START_POLL, 2, nla_start_poll_type,
+ nla_start_poll_data, nla_start_poll_len, NLM_F_REQUEST);
+ if (rc != 0)
+ return rc;
pthread_join(thread_t, (void **)&status);
- ASSERT_EQ(status, 0);
+ return status;
+}
+
+int stop_polling(int dev_idx, int virtual_fd, int sd, int fid, int pid)
+{
+ pthread_t thread_t;
+ int status;
+ int rc;
rc = pthread_create(&thread_t, NULL, virtual_poll_stop,
- (void *)&self->virtual_nci_fd);
- ASSERT_GT(rc, -1);
+ (void *)&virtual_fd);
+ if (rc < 0)
+ return rc;
- rc = send_cmd_with_idx(self->sd, self->fid, self->pid,
- NFC_CMD_STOP_POLL, self->dev_idex);
- EXPECT_EQ(rc, 0);
+ rc = send_cmd_with_idx(sd, fid, pid,
+ NFC_CMD_STOP_POLL, dev_idx);
+ if (rc != 0)
+ return rc;
pthread_join(thread_t, (void **)&status);
+ return status;
+}
+
+TEST_F(NCI, start_poll)
+{
+ int status;
+
+ status = start_polling(self->dev_idex, self->proto, self->virtual_nci_fd,
+ self->sd, self->fid, self->pid);
+ EXPECT_EQ(status, 0);
+
+ status = stop_polling(self->dev_idex, self->virtual_nci_fd, self->sd,
+ self->fid, self->pid);
+ EXPECT_EQ(status, 0);
+}
+
+int get_taginfo(int dev_idx, int sd, int fid, int pid)
+{
+ struct {
+ struct nlmsghdr n;
+ struct genlmsghdr g;
+ char buf[512];
+ } ans;
+
+ struct nlattr *na;
+ __u32 protocol;
+ int targetidx;
+ __u8 sel_res;
+ int resp_len;
+ int len;
+
+ __u16 tagid_type;
+ void *tagid_type_data;
+ int tagid_len;
+
+ tagid_type = NFC_ATTR_DEVICE_INDEX;
+ tagid_type_data = &dev_idx;
+ tagid_len = 4;
+
+ send_cmd_mt_nla(sd, fid, pid, NFC_CMD_GET_TARGET, 1, &tagid_type,
+ &tagid_type_data, &tagid_len, NLM_F_REQUEST | NLM_F_DUMP);
+ resp_len = recv(sd, &ans, sizeof(ans), 0);
+ if (ans.n.nlmsg_type == NLMSG_ERROR || resp_len < 0 ||
+ !NLMSG_OK(&ans.n, resp_len))
+ return -1;
+
+ resp_len = GENLMSG_PAYLOAD(&ans.n);
+ na = (struct nlattr *)GENLMSG_DATA(&ans);
+
+ len = 0;
+ targetidx = -1;
+ protocol = -1;
+ sel_res = -1;
+
+ while (len < resp_len) {
+ len += NLA_ALIGN(na->nla_len);
+
+ if (na->nla_type == NFC_ATTR_TARGET_INDEX)
+ targetidx = *(int *)((char *)na + NLA_HDRLEN);
+ else if (na->nla_type == NFC_ATTR_TARGET_SEL_RES)
+ sel_res = *(__u8 *)((char *)na + NLA_HDRLEN);
+ else if (na->nla_type == NFC_ATTR_PROTOCOLS)
+ protocol = *(__u32 *)((char *)na + NLA_HDRLEN);
+
+ na = (struct nlattr *)(GENLMSG_DATA(&ans) + len);
+ }
+
+ if (targetidx == -1 || sel_res != 0x20 || protocol != NFC_PROTO_ISO14443_MASK)
+ return -1;
+
+ return targetidx;
+}
+
+int connect_socket(int dev_idx, int target_idx)
+{
+ struct sockaddr_nfc addr;
+ int sock;
+ int err = 0;
+
+ sock = socket(AF_NFC, SOCK_SEQPACKET, NFC_SOCKPROTO_RAW);
+ if (sock == -1)
+ return -1;
+
+ addr.sa_family = AF_NFC;
+ addr.dev_idx = dev_idx;
+ addr.target_idx = target_idx;
+ addr.nfc_protocol = NFC_PROTO_ISO14443;
+
+ err = connect(sock, (struct sockaddr *)&addr, sizeof(addr));
+ if (err) {
+ close(sock);
+ return -1;
+ }
+
+ return sock;
+}
+
+int connect_tag(int dev_idx, int virtual_fd, int sd, int fid, int pid)
+{
+ struct genlmsghdr *genlhdr;
+ struct nlattr *na;
+ char evt_data[255];
+ int target_idx;
+ int resp_len;
+ int evt_dev;
+
+ write(virtual_fd, nci_rf_activate_ntf, sizeof(nci_rf_activate_ntf));
+ resp_len = recv(sd, evt_data, sizeof(evt_data), 0);
+ if (resp_len < 0)
+ return -1;
+
+ genlhdr = (struct genlmsghdr *)((struct nlmsghdr *)evt_data + 1);
+ na = (struct nlattr *)(genlhdr + 1);
+ evt_dev = *(int *)((char *)na + NLA_HDRLEN);
+ if (dev_idx != evt_dev)
+ return -1;
+
+ target_idx = get_taginfo(dev_idx, sd, fid, pid);
+ if (target_idx == -1)
+ return -1;
+ return connect_socket(dev_idx, target_idx);
+}
+
+int read_write_nci_cmd(int nfc_sock, int virtual_fd, const __u8 *cmd, __u32 cmd_len,
+ const __u8 *rsp, __u32 rsp_len)
+{
+ char buf[256];
+ unsigned int len;
+
+ send(nfc_sock, &cmd[3], cmd_len - 3, 0);
+ len = read(virtual_fd, buf, cmd_len);
+ if (len < 0 || memcmp(buf, cmd, cmd_len))
+ return -1;
+
+ write(virtual_fd, rsp, rsp_len);
+ len = recv(nfc_sock, buf, rsp_len - 2, 0);
+ if (len < 0 || memcmp(&buf[1], &rsp[3], rsp_len - 3))
+ return -1;
+
+ return 0;
+}
+
+int read_tag(int nfc_sock, int virtual_fd)
+{
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd,
+ sizeof(nci_t4t_select_cmd), nci_t4t_rsp_ok,
+ sizeof(nci_t4t_rsp_ok)))
+ return -1;
+
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd2,
+ sizeof(nci_t4t_select_cmd2), nci_t4t_rsp_ok,
+ sizeof(nci_t4t_rsp_ok)))
+ return -1;
+
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd,
+ sizeof(nci_t4t_read_cmd), nci_t4t_read_rsp,
+ sizeof(nci_t4t_read_rsp)))
+ return -1;
+
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd3,
+ sizeof(nci_t4t_select_cmd3), nci_t4t_rsp_ok,
+ sizeof(nci_t4t_rsp_ok)))
+ return -1;
+
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd2,
+ sizeof(nci_t4t_read_cmd2), nci_t4t_read_rsp2,
+ sizeof(nci_t4t_read_rsp2)))
+ return -1;
+
+ return read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd3,
+ sizeof(nci_t4t_read_cmd3), nci_t4t_read_rsp3,
+ sizeof(nci_t4t_read_rsp3));
+}
+
+static void *virtual_deactivate_proc(void *data)
+{
+ int virtual_fd;
+ char buf[256];
+ int deactcmd_len;
+ int len;
+
+ virtual_fd = *(int *)data;
+ deactcmd_len = sizeof(nci_rf_deact_cmd);
+ len = read(virtual_fd, buf, deactcmd_len);
+ if (len != deactcmd_len || memcmp(buf, nci_rf_deact_cmd, deactcmd_len))
+ return (void *)-1;
+
+ write(virtual_fd, nci_rf_deact_rsp, sizeof(nci_rf_deact_rsp));
+ write(virtual_fd, nci_rf_deact_ntf, sizeof(nci_rf_deact_ntf));
+
+ return (void *)0;
+}
+
+int disconnect_tag(int nfc_sock, int virtual_fd)
+{
+ pthread_t thread_t;
+ char buf[256];
+ int status;
+ int len;
+
+ send(nfc_sock, &nci_t4t_select_cmd3[3], sizeof(nci_t4t_select_cmd3) - 3, 0);
+ len = read(virtual_fd, buf, sizeof(nci_t4t_select_cmd3));
+ if (len < 0 || memcmp(buf, nci_t4t_select_cmd3, sizeof(nci_t4t_select_cmd3)))
+ return -1;
+
+ len = recv(nfc_sock, buf, sizeof(nci_t4t_rsp_ok), 0);
+ if (len != -1)
+ return -1;
+
+ status = pthread_create(&thread_t, NULL, virtual_deactivate_proc,
+ (void *)&virtual_fd);
+
+ close(nfc_sock);
+ pthread_join(thread_t, (void **)&status);
+ return status;
+}
+
+TEST_F(NCI, t4t_tag_read)
+{
+ int nfc_sock;
+ int status;
+
+ status = start_polling(self->dev_idex, self->proto, self->virtual_nci_fd,
+ self->sd, self->fid, self->pid);
+ EXPECT_EQ(status, 0);
+
+ nfc_sock = connect_tag(self->dev_idex, self->virtual_nci_fd, self->sd,
+ self->fid, self->pid);
+ ASSERT_GT(nfc_sock, -1);
+
+ status = read_tag(nfc_sock, self->virtual_nci_fd);
ASSERT_EQ(status, 0);
+
+ status = disconnect_tag(nfc_sock, self->virtual_nci_fd);
+ EXPECT_EQ(status, 0);
}
TEST_F(NCI, deinit)
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 79c9eb0034d5..378c0aac5a1a 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -25,6 +25,8 @@ TEST_PROGS += bareudp.sh
TEST_PROGS += unicast_extensions.sh
TEST_PROGS += udpgro_fwd.sh
TEST_PROGS += veth.sh
+TEST_PROGS += ioam6.sh
+TEST_PROGS += gro.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -36,8 +38,11 @@ TEST_GEN_FILES += fin_ack_lat
TEST_GEN_FILES += reuseaddr_ports_exhausted
TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
TEST_GEN_FILES += ipsec
+TEST_GEN_FILES += ioam6_parser
+TEST_GEN_FILES += gro
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+TEST_GEN_FILES += toeplitz
TEST_FILES := settings
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
new file mode 100644
index 000000000000..cfc7f4f97fd1
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -0,0 +1,5 @@
+##TEST_GEN_FILES := test_unix_oob
+TEST_PROGS := test_unix_oob
+include ../../lib.mk
+
+all: $(TEST_PROGS)
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
new file mode 100644
index 000000000000..0f3e3763f4f8
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <netinet/tcp.h>
+#include <sys/un.h>
+#include <sys/signal.h>
+#include <sys/poll.h>
+
+static int pipefd[2];
+static int signal_recvd;
+static pid_t producer_id;
+static char sock_name[32];
+
+static void sig_hand(int sn, siginfo_t *si, void *p)
+{
+ signal_recvd = sn;
+}
+
+static int set_sig_handler(int signal)
+{
+ struct sigaction sa;
+
+ sa.sa_sigaction = sig_hand;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO | SA_RESTART;
+
+ return sigaction(signal, &sa, NULL);
+}
+
+static void set_filemode(int fd, int set)
+{
+ int flags = fcntl(fd, F_GETFL, 0);
+
+ if (set)
+ flags &= ~O_NONBLOCK;
+ else
+ flags |= O_NONBLOCK;
+ fcntl(fd, F_SETFL, flags);
+}
+
+static void signal_producer(int fd)
+{
+ char cmd;
+
+ cmd = 'S';
+ write(fd, &cmd, sizeof(cmd));
+}
+
+static void wait_for_signal(int fd)
+{
+ char buf[5];
+
+ read(fd, buf, 5);
+}
+
+static void die(int status)
+{
+ fflush(NULL);
+ unlink(sock_name);
+ kill(producer_id, SIGTERM);
+ exit(status);
+}
+
+int is_sioctatmark(int fd)
+{
+ int ans = -1;
+
+ if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) {
+#ifdef DEBUG
+ perror("SIOCATMARK Failed");
+#endif
+ }
+ return ans;
+}
+
+void read_oob(int fd, char *c)
+{
+
+ *c = ' ';
+ if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) {
+#ifdef DEBUG
+ perror("Reading MSG_OOB Failed");
+#endif
+ }
+}
+
+int read_data(int pfd, char *buf, int size)
+{
+ int len = 0;
+
+ memset(buf, size, '0');
+ len = read(pfd, buf, size);
+#ifdef DEBUG
+ if (len < 0)
+ perror("read failed");
+#endif
+ return len;
+}
+
+static void wait_for_data(int pfd, int event)
+{
+ struct pollfd pfds[1];
+
+ pfds[0].fd = pfd;
+ pfds[0].events = event;
+ poll(pfds, 1, -1);
+}
+
+void producer(struct sockaddr_un *consumer_addr)
+{
+ int cfd;
+ char buf[64];
+ int i;
+
+ memset(buf, 'x', sizeof(buf));
+ cfd = socket(AF_UNIX, SOCK_STREAM, 0);
+
+ wait_for_signal(pipefd[0]);
+ if (connect(cfd, (struct sockaddr *)consumer_addr,
+ sizeof(struct sockaddr)) != 0) {
+ perror("Connect failed");
+ kill(0, SIGTERM);
+ exit(1);
+ }
+
+ for (i = 0; i < 2; i++) {
+ /* Test 1: Test for SIGURG and OOB */
+ wait_for_signal(pipefd[0]);
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '@';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 2: Test for OOB being overwitten */
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '%';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '#';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 3: Test for SIOCATMARK */
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '@';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '%';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ send(cfd, buf, sizeof(buf), 0);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 4: Test for 1byte OOB msg */
+ memset(buf, 'x', sizeof(buf));
+ buf[0] = '@';
+ send(cfd, buf, 1, MSG_OOB);
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ int lfd, pfd;
+ struct sockaddr_un consumer_addr, paddr;
+ socklen_t len = sizeof(consumer_addr);
+ char buf[1024];
+ int on = 0;
+ char oob;
+ int flags;
+ int atmark;
+ char *tmp_file;
+
+ lfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ memset(&consumer_addr, 0, sizeof(consumer_addr));
+ consumer_addr.sun_family = AF_UNIX;
+ sprintf(sock_name, "unix_oob_%d", getpid());
+ unlink(sock_name);
+ strcpy(consumer_addr.sun_path, sock_name);
+
+ if ((bind(lfd, (struct sockaddr *)&consumer_addr,
+ sizeof(consumer_addr))) != 0) {
+ perror("socket bind failed");
+ exit(1);
+ }
+
+ pipe(pipefd);
+
+ listen(lfd, 1);
+
+ producer_id = fork();
+ if (producer_id == 0) {
+ producer(&consumer_addr);
+ exit(0);
+ }
+
+ set_sig_handler(SIGURG);
+ signal_producer(pipefd[1]);
+
+ pfd = accept(lfd, (struct sockaddr *) &paddr, &len);
+ fcntl(pfd, F_SETOWN, getpid());
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 1:
+ * veriyf that SIGURG is
+ * delivered and 63 bytes are
+ * read and oob is '@'
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ read_oob(pfd, &oob);
+ len = read_data(pfd, buf, 1024);
+ if (!signal_recvd || len != 63 || oob != '@') {
+ fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 2:
+ * Verify that the first OOB is over written by
+ * the 2nd one and the first OOB is returned as
+ * part of the read, and sigurg is received.
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = 0;
+ while (len < 70)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ read_oob(pfd, &oob);
+ if (!signal_recvd || len != 127 || oob != '#') {
+ fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 3:
+ * verify that 2nd oob over writes
+ * the first one and read breaks at
+ * oob boundary returning 127 bytes
+ * and sigurg is received and atmark
+ * is set.
+ * oob is '%' and second read returns
+ * 64 bytes.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 150)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ read_oob(pfd, &oob);
+
+ if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
+ fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ",
+ "atmark %d\n", signal_recvd, len, oob, atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+
+ len = read_data(pfd, buf, 1024);
+ if (len != 64) {
+ fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 4:
+ * verify that a single byte
+ * oob message is delivered.
+ * set non blocking mode and
+ * check proper error is
+ * returned and sigurg is
+ * received and correct
+ * oob is read.
+ */
+
+ set_filemode(pfd, 0);
+
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+ if ((len == -1) && (errno == 11))
+ len = 0;
+
+ read_oob(pfd, &oob);
+
+ if (!signal_recvd || len != 0 || oob != '@') {
+ fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ set_filemode(pfd, 1);
+
+ /* Inline Testing */
+
+ on = 1;
+ if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) {
+ perror("SO_OOBINLINE");
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 1 -- Inline:
+ * Check that SIGURG is
+ * delivered and 63 bytes are
+ * read and oob is '@'
+ */
+
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+
+ if (!signal_recvd || len != 63) {
+ fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n",
+ signal_recvd, len);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+
+ if (len != 1) {
+ fprintf(stderr,
+ "Test 1.1 Inline failed, sigurg %d len %d oob %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 2 -- Inline:
+ * Verify that the first OOB is over written by
+ * the 2nd one and read breaks correctly on
+ * 2nd OOB boundary with the first OOB returned as
+ * part of the read, and sigurg is delivered and
+ * siocatmark returns true.
+ * next read returns one byte, the oob byte
+ * and siocatmark returns false.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 70)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 127 || atmark != 1 || !signal_recvd) {
+ fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n",
+ len, atmark);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 1 || buf[0] != '#' || atmark == 1) {
+ fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n",
+ len, buf[0], atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 3 -- Inline:
+ * verify that 2nd oob over writes
+ * the first one and read breaks at
+ * oob boundary returning 127 bytes
+ * and sigurg is received and siocatmark
+ * is true after the read.
+ * subsequent read returns 65 bytes
+ * because of oob which should be '%'.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 126)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (!signal_recvd || len != 127 || !atmark) {
+ fprintf(stderr,
+ "Test 3 Inline failed, sigurg %d len %d data %c\n",
+ signal_recvd, len, buf[0]);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 65 || buf[0] != '%' || atmark != 0) {
+ fprintf(stderr,
+ "Test 3.1 Inline failed, len %d oob %c atmark %d\n",
+ len, buf[0], atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 4 -- Inline:
+ * verify that a single
+ * byte oob message is delivered
+ * and read returns one byte, the oob
+ * byte and sigurg is received
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+ if (!signal_recvd || len != 1 || buf[0] != '@') {
+ fprintf(stderr,
+ "Test 4 Inline failed, signal %d len %d data %c\n",
+ signal_recvd, len, buf[0]);
+ die(1);
+ }
+ die(0);
+}
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 6f905b53904f..21b646d10b88 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -42,3 +42,4 @@ CONFIG_NET_CLS_FLOWER=m
CONFIG_NET_ACT_TUNNEL_KEY=m
CONFIG_NET_ACT_MIRRED=m
CONFIG_BAREUDP=m
+CONFIG_IPV6_IOAM6_LWTUNNEL=y
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index a8ad92850e63..13350cd5c8ac 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -37,6 +37,9 @@
#
# server / client nomenclature relative to ns-A
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
VERBOSE=0
NSA_DEV=eth1
@@ -3879,6 +3882,32 @@ use_case_ping_lla_multi()
log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-C"
}
+# Perform IPv{4,6} SNAT on ns-A, and verify TCP connection is successfully
+# established with ns-B.
+use_case_snat_on_vrf()
+{
+ setup "yes"
+
+ local port="12345"
+
+ run_cmd iptables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF}
+ run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
+
+ run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} &
+ sleep 1
+ run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port}
+ log_test $? 0 "IPv4 TCP connection over VRF with SNAT"
+
+ run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} &
+ sleep 1
+ run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port}
+ log_test $? 0 "IPv6 TCP connection over VRF with SNAT"
+
+ # Cleanup
+ run_cmd iptables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF}
+ run_cmd ip6tables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
+}
+
use_cases()
{
log_section "Use cases"
@@ -3886,6 +3915,8 @@ use_cases()
use_case_br
log_subsection "Ping LLA with multiple interfaces"
use_case_ping_lla_multi
+ log_subsection "SNAT on VRF"
+ use_case_snat_on_vrf
}
################################################################################
@@ -3946,7 +3977,7 @@ fi
which nettest >/dev/null
if [ $? -ne 0 ]; then
echo "'nettest' command not found; skipping tests"
- exit 0
+ exit $ksft_skip
fi
declare -i nfail=0
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index a93e6b690e06..43ea8407a82e 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -3,6 +3,9 @@
# This test is for checking IPv4 and IPv6 FIB rules API
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
ret=0
PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
@@ -238,12 +241,12 @@ run_fibrule_tests()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
# start clean
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 13d3d4428a32..2c14a86adaaa 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -1,6 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
##############################################################################
# Defines
@@ -9,11 +12,11 @@ if [[ ! -v DEVLINK_DEV ]]; then
| jq -r '.port | keys[]' | cut -d/ -f-2)
if [ -z "$DEVLINK_DEV" ]; then
echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
- exit 1
+ exit $ksft_skip
fi
if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
echo "SKIP: devlink device's bus is not PCI"
- exit 1
+ exit $ksft_skip
fi
DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
@@ -22,7 +25,7 @@ elif [[ ! -z "$DEVLINK_DEV" ]]; then
devlink dev show $DEVLINK_DEV &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: devlink device \"$DEVLINK_DEV\" not found"
- exit 1
+ exit $ksft_skip
fi
fi
@@ -32,19 +35,19 @@ fi
devlink help 2>&1 | grep resource &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 too old, missing devlink resource support"
- exit 1
+ exit $ksft_skip
fi
devlink help 2>&1 | grep trap &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 too old, missing devlink trap support"
- exit 1
+ exit $ksft_skip
fi
devlink dev help 2>&1 | grep info &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 too old, missing devlink dev info support"
- exit 1
+ exit $ksft_skip
fi
##############################################################################
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 42e28c983d41..e7fc5c35b569 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -4,6 +4,9 @@
##############################################################################
# Defines
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
# Can be overridden by the configuration file.
PING=${PING:=ping}
PING6=${PING6:=ping6}
@@ -38,7 +41,7 @@ check_tc_version()
tc -j &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing JSON support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -51,7 +54,7 @@ check_tc_mpls_support()
matchall action pipe &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing MPLS support"
- return 1
+ return $ksft_skip
fi
tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
matchall
@@ -69,7 +72,7 @@ check_tc_mpls_lse_stats()
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support"
- return 1
+ return $ksft_skip
fi
tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null
@@ -79,7 +82,7 @@ check_tc_mpls_lse_stats()
if [[ $ret -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters"
- return 1
+ return $ksft_skip
fi
}
@@ -88,7 +91,7 @@ check_tc_shblock_support()
tc filter help 2>&1 | grep block &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing shared block support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -97,7 +100,7 @@ check_tc_chain_support()
tc help 2>&1|grep chain &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing chain support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -106,7 +109,7 @@ check_tc_action_hw_stats_support()
tc actions help 2>&1 | grep -q hw_stats
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -115,13 +118,13 @@ check_ethtool_lanes_support()
ethtool --help 2>&1| grep lanes &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: ethtool too old; it is missing lanes support"
- exit 1
+ exit $ksft_skip
fi
}
if [[ "$(id -u)" -ne 0 ]]; then
echo "SKIP: need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [[ "$CHECK_TC" = "yes" ]]; then
@@ -134,7 +137,7 @@ require_command()
if [[ ! -x "$(command -v "$cmd")" ]]; then
echo "SKIP: $cmd not installed"
- exit 1
+ exit $ksft_skip
fi
}
@@ -143,7 +146,7 @@ require_command $MZ
if [[ ! -v NUM_NETIFS ]]; then
echo "SKIP: importer does not define \"NUM_NETIFS\""
- exit 1
+ exit $ksft_skip
fi
##############################################################################
@@ -203,7 +206,7 @@ for ((i = 1; i <= NUM_NETIFS; ++i)); do
ip link show dev ${NETIFS[p$i]} &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: could not find all required interfaces"
- exit 1
+ exit $ksft_skip
fi
done
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index 76efb1f8375e..a0d612e04990 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -411,7 +411,7 @@ ping_ipv6()
ip nexthop ls >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "Nexthop objects not supported; skipping tests"
- exit 0
+ exit $ksft_skip
fi
trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
index 4898dd4118f1..cb08ffe2356a 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -386,7 +386,7 @@ ping_ipv6()
ip nexthop ls >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "Nexthop objects not supported; skipping tests"
- exit 0
+ exit $ksft_skip
fi
trap cleanup EXIT
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
new file mode 100644
index 000000000000..cf37ce86b0fd
--- /dev/null
+++ b/tools/testing/selftests/net/gro.c
@@ -0,0 +1,1095 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This testsuite provides conformance testing for GRO coalescing.
+ *
+ * Test cases:
+ * 1.data
+ * Data packets of the same size and same header setup with correct
+ * sequence numbers coalesce. The one exception being the last data
+ * packet coalesced: it can be smaller than the rest and coalesced
+ * as long as it is in the same flow.
+ * 2.ack
+ * Pure ACK does not coalesce.
+ * 3.flags
+ * Specific test cases: no packets with PSH, SYN, URG, RST set will
+ * be coalesced.
+ * 4.tcp
+ * Packets with incorrect checksum, non-consecutive seqno and
+ * different TCP header options shouldn't coalesce. Nit: given that
+ * some extension headers have paddings, such as timestamp, headers
+ * that are padding differently would not be coalesced.
+ * 5.ip:
+ * Packets with different (ECN, TTL, TOS) header, ip options or
+ * ip fragments (ipv6) shouldn't coalesce.
+ * 6.large:
+ * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
+ *
+ * MSS is defined as 4096 - header because if it is too small
+ * (i.e. 1500 MTU - header), it will result in many packets,
+ * increasing the "large" test case's flakiness. This is because
+ * due to time sensitivity in the coalescing window, the receiver
+ * may not coalesce all of the packets.
+ *
+ * Note the timing issue applies to all of the test cases, so some
+ * flakiness is to be expected.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+
+#define DPORT 8000
+#define SPORT 1500
+#define PAYLOAD_LEN 100
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define NUM_PACKETS 4
+#define START_SEQ 100
+#define START_ACK 100
+#define SIP6 "fdaa::2"
+#define DIP6 "fdaa::1"
+#define SIP4 "192.168.1.200"
+#define DIP4 "192.168.1.100"
+#define ETH_P_NONE 0
+#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
+#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+
+static int proto = -1;
+static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
+static char *testname = "data";
+static char *ifname = "eth0";
+static char *smac = "aa:00:00:00:00:02";
+static char *dmac = "aa:00:00:00:00:01";
+static bool verbose;
+static bool tx_socket = true;
+static int tcp_offset = -1;
+static int total_hdr_len = -1;
+static int ethhdr_proto = -1;
+
+static void vlog(const char *fmt, ...)
+{
+ va_list args;
+
+ if (verbose) {
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ }
+}
+
+static void setup_sock_filter(int fd)
+{
+ const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
+ const int ethproto_off = offsetof(struct ethhdr, h_proto);
+ int optlen = 0;
+ int ipproto_off;
+ int next_off;
+
+ if (proto == PF_INET)
+ next_off = offsetof(struct iphdr, protocol);
+ else
+ next_off = offsetof(struct ipv6hdr, nexthdr);
+ ipproto_off = ETH_HLEN + next_off;
+
+ if (strcmp(testname, "ip") == 0) {
+ if (proto == PF_INET)
+ optlen = sizeof(struct ip_timestamp);
+ else
+ optlen = sizeof(struct ip6_frag);
+ }
+
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+
+ struct sock_fprog bpf = {
+ .len = ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
+ error(1, errno, "error setting filter");
+}
+
+static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
+{
+ uint16_t *words = data;
+ int i;
+
+ for (i = 0; i < len / 2; i++)
+ sum += words[i];
+ if (len & 1)
+ sum += ((char *)data)[len - 1];
+ return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+ sum = checksum_nofold(data, len, sum);
+ while (sum > 0xFFFF)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ return ~sum;
+}
+
+static uint16_t tcp_checksum(void *buf, int payload_len)
+{
+ struct pseudo_header6 {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ uint16_t protocol;
+ uint16_t payload_len;
+ } ph6;
+ struct pseudo_header4 {
+ struct in_addr saddr;
+ struct in_addr daddr;
+ uint16_t protocol;
+ uint16_t payload_len;
+ } ph4;
+ uint32_t sum = 0;
+
+ if (proto == PF_INET6) {
+ if (inet_pton(AF_INET6, SIP6, &ph6.saddr) != 1)
+ error(1, errno, "inet_pton6 source ip pseudo");
+ if (inet_pton(AF_INET6, DIP6, &ph6.daddr) != 1)
+ error(1, errno, "inet_pton6 dest ip pseudo");
+ ph6.protocol = htons(IPPROTO_TCP);
+ ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+ sum = checksum_nofold(&ph6, sizeof(ph6), 0);
+ } else if (proto == PF_INET) {
+ if (inet_pton(AF_INET, SIP4, &ph4.saddr) != 1)
+ error(1, errno, "inet_pton source ip pseudo");
+ if (inet_pton(AF_INET, DIP4, &ph4.daddr) != 1)
+ error(1, errno, "inet_pton dest ip pseudo");
+ ph4.protocol = htons(IPPROTO_TCP);
+ ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+ sum = checksum_nofold(&ph4, sizeof(ph4), 0);
+ }
+
+ return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
+}
+
+static void read_MAC(uint8_t *mac_addr, char *mac)
+{
+ if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+ &mac_addr[0], &mac_addr[1], &mac_addr[2],
+ &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
+ error(1, 0, "sscanf");
+}
+
+static void fill_datalinklayer(void *buf)
+{
+ struct ethhdr *eth = buf;
+
+ memcpy(eth->h_dest, dst_mac, ETH_ALEN);
+ memcpy(eth->h_source, src_mac, ETH_ALEN);
+ eth->h_proto = ethhdr_proto;
+}
+
+static void fill_networklayer(void *buf, int payload_len)
+{
+ struct ipv6hdr *ip6h = buf;
+ struct iphdr *iph = buf;
+
+ if (proto == PF_INET6) {
+ memset(ip6h, 0, sizeof(*ip6h));
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
+ ip6h->nexthdr = IPPROTO_TCP;
+ ip6h->hop_limit = 8;
+ if (inet_pton(AF_INET6, SIP6, &ip6h->saddr) != 1)
+ error(1, errno, "inet_pton source ip6");
+ if (inet_pton(AF_INET6, DIP6, &ip6h->daddr) != 1)
+ error(1, errno, "inet_pton dest ip6");
+ } else if (proto == PF_INET) {
+ memset(iph, 0, sizeof(*iph));
+
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->ttl = 8;
+ iph->protocol = IPPROTO_TCP;
+ iph->tot_len = htons(sizeof(struct tcphdr) +
+ payload_len + sizeof(struct iphdr));
+ iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
+ if (inet_pton(AF_INET, SIP4, &iph->saddr) != 1)
+ error(1, errno, "inet_pton source ip");
+ if (inet_pton(AF_INET, DIP4, &iph->daddr) != 1)
+ error(1, errno, "inet_pton dest ip");
+ iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
+ }
+}
+
+static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
+ int payload_len, int fin)
+{
+ struct tcphdr *tcph = buf;
+
+ memset(tcph, 0, sizeof(*tcph));
+
+ tcph->source = htons(SPORT);
+ tcph->dest = htons(DPORT);
+ tcph->seq = ntohl(START_SEQ + seq_offset);
+ tcph->ack_seq = ntohl(START_ACK + ack_offset);
+ tcph->ack = 1;
+ tcph->fin = fin;
+ tcph->doff = 5;
+ tcph->window = htons(TCP_MAXWIN);
+ tcph->urg_ptr = 0;
+ tcph->check = tcp_checksum(tcph, payload_len);
+}
+
+static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
+{
+ int ret = -1;
+
+ ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
+ if (ret == -1)
+ error(1, errno, "sendto failure");
+ if (ret != len)
+ error(1, errno, "sendto wrong length");
+}
+
+static void create_packet(void *buf, int seq_offset, int ack_offset,
+ int payload_len, int fin)
+{
+ memset(buf, 0, total_hdr_len);
+ memset(buf + total_hdr_len, 'a', payload_len);
+ fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
+ payload_len, fin);
+ fill_networklayer(buf + ETH_HLEN, payload_len);
+ fill_datalinklayer(buf);
+}
+
+/* send one extra flag, not first and not last pkt */
+static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
+ int rst, int urg)
+{
+ static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ int payload_len, pkt_size, flag, i;
+ struct tcphdr *tcph;
+
+ payload_len = PAYLOAD_LEN * psh;
+ pkt_size = total_hdr_len + payload_len;
+ flag = NUM_PACKETS / 2;
+
+ create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
+
+ tcph = (struct tcphdr *)(flag_buf + tcp_offset);
+ tcph->psh = psh;
+ tcph->syn = syn;
+ tcph->rst = rst;
+ tcph->urg = urg;
+ tcph->check = 0;
+ tcph->check = tcp_checksum(tcph, payload_len);
+
+ for (i = 0; i < NUM_PACKETS + 1; i++) {
+ if (i == flag) {
+ write_packet(fd, flag_buf, pkt_size, daddr);
+ continue;
+ }
+ create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+ }
+}
+
+/* Test for data of same length, smaller than previous
+ * and of different lengths
+ */
+static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
+ int payload_len1, int payload_len2)
+{
+ static char buf[ETH_HLEN + IP_MAXPACKET];
+
+ create_packet(buf, 0, 0, payload_len1, 0);
+ write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
+ create_packet(buf, payload_len1, 0, payload_len2, 0);
+ write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
+}
+
+/* If incoming segments make tracked segment length exceed
+ * legal IP datagram length, do not coalesce
+ */
+static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
+{
+ static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
+ static char last[TOTAL_HDR_LEN + MSS];
+ static char new_seg[TOTAL_HDR_LEN + MSS];
+ int i;
+
+ for (i = 0; i < NUM_LARGE_PKT; i++)
+ create_packet(pkts[i], i * MSS, 0, MSS, 0);
+ create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
+ create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
+
+ for (i = 0; i < NUM_LARGE_PKT; i++)
+ write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
+ write_packet(fd, last, total_hdr_len + remainder, daddr);
+ write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
+}
+
+/* Pure acks and dup acks don't coalesce */
+static void send_ack(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN];
+
+ create_packet(buf, 0, 0, 0, 0);
+ write_packet(fd, buf, total_hdr_len, daddr);
+ write_packet(fd, buf, total_hdr_len, daddr);
+ create_packet(buf, 0, 1, 0, 0);
+ write_packet(fd, buf, total_hdr_len, daddr);
+}
+
+static void recompute_packet(char *buf, char *no_ext, int extlen)
+{
+ struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ memmove(buf, no_ext, total_hdr_len);
+ memmove(buf + total_hdr_len + extlen,
+ no_ext + total_hdr_len, PAYLOAD_LEN);
+
+ tcphdr->doff = tcphdr->doff + (extlen / 4);
+ tcphdr->check = 0;
+ tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
+ if (proto == PF_INET) {
+ iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else {
+ ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+ }
+}
+
+static void tcp_write_options(char *buf, int kind, int ts)
+{
+ struct tcp_option_ts {
+ uint8_t kind;
+ uint8_t len;
+ uint32_t tsval;
+ uint32_t tsecr;
+ } *opt_ts = (void *)buf;
+ struct tcp_option_window {
+ uint8_t kind;
+ uint8_t len;
+ uint8_t shift;
+ } *opt_window = (void *)buf;
+
+ switch (kind) {
+ case TCPOPT_NOP:
+ buf[0] = TCPOPT_NOP;
+ break;
+ case TCPOPT_WINDOW:
+ memset(opt_window, 0, sizeof(struct tcp_option_window));
+ opt_window->kind = TCPOPT_WINDOW;
+ opt_window->len = TCPOLEN_WINDOW;
+ opt_window->shift = 0;
+ break;
+ case TCPOPT_TIMESTAMP:
+ memset(opt_ts, 0, sizeof(struct tcp_option_ts));
+ opt_ts->kind = TCPOPT_TIMESTAMP;
+ opt_ts->len = TCPOLEN_TIMESTAMP;
+ opt_ts->tsval = ts;
+ opt_ts->tsecr = 0;
+ break;
+ default:
+ error(1, 0, "unimplemented TCP option");
+ break;
+ }
+}
+
+/* TCP with options is always a permutation of {TS, NOP, NOP}.
+ * Implement different orders to verify coalescing stops.
+ */
+static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
+{
+ switch (order) {
+ case 0:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
+ TCPOPT_TIMESTAMP, ts);
+ break;
+ case 1:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 1,
+ TCPOPT_TIMESTAMP, ts);
+ tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
+ TCPOPT_NOP, 0);
+ break;
+ case 2:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
+ tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
+ TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
+ TCPOPT_NOP, 0);
+ break;
+ default:
+ error(1, 0, "unknown order");
+ break;
+ }
+ recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
+}
+
+/* Packets with invalid checksum don't coalesce. */
+static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ tcph->check = tcph->check - 1;
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packets with non-consecutive sequence number don't coalesce.*/
+static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ tcph->seq = ntohl(htonl(tcph->seq) + 1);
+ tcph->check = 0;
+ tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packet with different timestamp option or different timestamps
+ * don't coalesce.
+ */
+static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+ int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 0, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 0, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 1);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 2);
+ write_packet(fd, extpkt, pkt_size, daddr);
+}
+
+/* Packet with different tcp options don't coalesce. */
+static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+ static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
+ int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+ int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt1, buf, 0, 0);
+ write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt1, buf, 0, 0);
+ write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
+ tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
+ recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
+ write_packet(fd, extpkt2, extpkt2_size, daddr);
+}
+
+static void add_ipv4_ts_option(void *buf, void *optpkt)
+{
+ struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
+ int optlen = sizeof(struct ip_timestamp);
+ struct iphdr *iph;
+
+ if (optlen % 4)
+ error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
+
+ ts->ipt_code = IPOPT_TS;
+ ts->ipt_len = optlen;
+ ts->ipt_ptr = 5;
+ ts->ipt_flg = IPOPT_TS_TSONLY;
+
+ memcpy(optpkt, buf, tcp_offset);
+ memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+ iph = (struct iphdr *)(optpkt + ETH_HLEN);
+ iph->ihl = 5 + (optlen / 4);
+ iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
+}
+
+/* IPv4 options shouldn't coalesce */
+static void send_ip_options(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
+ int optlen = sizeof(struct ip_timestamp);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+ add_ipv4_ts_option(buf, optpkt);
+ write_packet(fd, optpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+}
+
+/* IPv4 fragments shouldn't coalesce */
+static void send_fragment4(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[IP_MAXPACKET];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ /* Once fragmented, packet would retain the total_len.
+ * Tcp header is prepared as if rest of data is in follow-up frags,
+ * but follow up frags aren't actually sent.
+ */
+ memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
+ fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
+ fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN);
+ fill_datalinklayer(buf);
+
+ iph->frag_off = htons(0x6000); // DF = 1, MF = 1
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv4 packets with different ttl don't coalesce.*/
+static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ iph->ttl = 7;
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different tos don't coalesce.*/
+static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ if (proto == PF_INET) {
+ iph->tos = 1;
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else if (proto == PF_INET6) {
+ ip6h->priority = 0xf;
+ }
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different ECN don't coalesce.*/
+static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ if (proto == PF_INET) {
+ buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else {
+ buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
+ }
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv6 fragments and packets with extensions don't coalesce.*/
+static void send_fragment6(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
+ sizeof(struct ip6_frag)];
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+ struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
+ int extlen = sizeof(struct ip6_frag);
+ int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
+ int extpkt_len = bufpkt_len + extlen;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, bufpkt_len, daddr);
+ }
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ memset(extpkt, 0, extpkt_len);
+
+ ip6h->nexthdr = IPPROTO_FRAGMENT;
+ ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+ frag->ip6f_nxt = IPPROTO_TCP;
+
+ memcpy(extpkt, buf, tcp_offset);
+ memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+ write_packet(fd, extpkt, extpkt_len, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, bufpkt_len, daddr);
+}
+
+static void bind_packetsocket(int fd)
+{
+ struct sockaddr_ll daddr = {};
+
+ daddr.sll_family = AF_PACKET;
+ daddr.sll_protocol = ethhdr_proto;
+ daddr.sll_ifindex = if_nametoindex(ifname);
+ if (daddr.sll_ifindex == 0)
+ error(1, errno, "if_nametoindex");
+
+ if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
+ error(1, errno, "could not bind socket");
+}
+
+static void set_timeout(int fd)
+{
+ struct timeval timeout;
+
+ timeout.tv_sec = 120;
+ timeout.tv_usec = 0;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
+ sizeof(timeout)) < 0)
+ error(1, errno, "cannot set timeout, setsockopt failed");
+}
+
+static void check_recv_pkts(int fd, int *correct_payload,
+ int correct_num_pkts)
+{
+ static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
+ struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
+ struct tcphdr *tcph;
+ bool bad_packet = false;
+ int tcp_ext_len = 0;
+ int ip_ext_len = 0;
+ int pkt_size = -1;
+ int data_len = 0;
+ int num_pkt = 0;
+ int i;
+
+ vlog("Expected {");
+ for (i = 0; i < correct_num_pkts; i++)
+ vlog("%d ", correct_payload[i]);
+ vlog("}, Total %d packets\nReceived {", correct_num_pkts);
+
+ while (1) {
+ pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
+ if (pkt_size < 0)
+ error(1, errno, "could not receive");
+
+ if (iph->version == 4)
+ ip_ext_len = (iph->ihl - 5) * 4;
+ else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
+ ip_ext_len = sizeof(struct ip6_frag);
+
+ tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
+
+ if (tcph->fin)
+ break;
+
+ tcp_ext_len = (tcph->doff - 5) * 4;
+ data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
+ /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
+ * Ipv4/tcp packets without at least 6 bytes of data will be padded.
+ * Packet sockets are protocol agnostic, and will not trim the padding.
+ */
+ if (pkt_size == ETH_ZLEN && iph->version == 4) {
+ data_len = ntohs(iph->tot_len)
+ - sizeof(struct tcphdr) - sizeof(struct iphdr);
+ }
+ vlog("%d ", data_len);
+ if (data_len != correct_payload[num_pkt]) {
+ vlog("[!=%d]", correct_payload[num_pkt]);
+ bad_packet = true;
+ }
+ num_pkt++;
+ }
+ vlog("}, Total %d packets.\n", num_pkt);
+ if (num_pkt != correct_num_pkts)
+ error(1, 0, "incorrect number of packets");
+ if (bad_packet)
+ error(1, 0, "incorrect packet geometry");
+
+ printf("Test succeeded\n\n");
+}
+
+static void gro_sender(void)
+{
+ static char fin_pkt[MAX_HDR_LEN];
+ struct sockaddr_ll daddr = {};
+ int txfd = -1;
+
+ txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
+ if (txfd < 0)
+ error(1, errno, "socket creation");
+
+ memset(&daddr, 0, sizeof(daddr));
+ daddr.sll_ifindex = if_nametoindex(ifname);
+ if (daddr.sll_ifindex == 0)
+ error(1, errno, "if_nametoindex");
+ daddr.sll_family = AF_PACKET;
+ memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
+ daddr.sll_halen = ETH_ALEN;
+ create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
+
+ if (strcmp(testname, "data") == 0) {
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ack") == 0) {
+ send_ack(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "flags") == 0) {
+ send_flags(txfd, &daddr, 1, 0, 0, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 1, 0, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 0, 1, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 0, 0, 1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "tcp") == 0) {
+ send_changed_checksum(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_seq(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_ts(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_diff_opt(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip") == 0) {
+ send_changed_ECN(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_tos(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ if (proto == PF_INET) {
+ /* Modified packets may be received out of order.
+ * Sleep function added to enforce test boundaries
+ * so that fin pkts are not received prior to other pkts.
+ */
+ sleep(1);
+ send_changed_ttl(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ send_ip_options(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ send_fragment4(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (proto == PF_INET6) {
+ send_fragment6(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ }
+ } else if (strcmp(testname, "large") == 0) {
+ /* 20 is the difference between min iphdr size
+ * and min ipv6hdr size. Like MAX_HDR_SIZE,
+ * MAX_PAYLOAD is defined with the larger header of the two.
+ */
+ int offset = proto == PF_INET ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+ send_large(txfd, &daddr, remainder);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_large(txfd, &daddr, remainder + 1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else {
+ error(1, 0, "Unknown testcase");
+ }
+
+ if (close(txfd))
+ error(1, errno, "socket close");
+}
+
+static void gro_receiver(void)
+{
+ static int correct_payload[NUM_PACKETS];
+ int rxfd = -1;
+
+ rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
+ if (rxfd < 0)
+ error(1, 0, "socket creation");
+ setup_sock_filter(rxfd);
+ set_timeout(rxfd);
+ bind_packetsocket(rxfd);
+
+ memset(correct_payload, 0, sizeof(correct_payload));
+
+ if (strcmp(testname, "data") == 0) {
+ printf("pure data packet of same size: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("large data packets followed by a smaller one: ");
+ correct_payload[0] = PAYLOAD_LEN * 1.5;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("small data packets followed by a larger one: ");
+ correct_payload[0] = PAYLOAD_LEN / 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ack") == 0) {
+ printf("duplicate ack and pure ack: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "flags") == 0) {
+ correct_payload[0] = PAYLOAD_LEN * 3;
+ correct_payload[1] = PAYLOAD_LEN * 2;
+
+ printf("psh flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = 0;
+ correct_payload[2] = PAYLOAD_LEN * 2;
+ printf("syn flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("rst flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("urg flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "tcp") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ correct_payload[2] = PAYLOAD_LEN;
+ correct_payload[3] = PAYLOAD_LEN;
+
+ printf("changed checksum does not coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("Wrong Seq number doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("Different timestamp doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 4);
+
+ printf("Different options doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ip") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+
+ printf("different ECN doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("different tos doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ if (proto == PF_INET) {
+ printf("different ttl doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("ip options doesn't coalesce: ");
+ correct_payload[2] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("fragmented ip4 doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (proto == PF_INET6) {
+ /* GRO doesn't check for ipv6 hop limit when flushing.
+ * Hence no corresponding test to the ipv4 case.
+ */
+ printf("fragmented ip6 doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ }
+ } else if (strcmp(testname, "large") == 0) {
+ int offset = proto == PF_INET ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+ correct_payload[0] = (MAX_PAYLOAD + offset);
+ correct_payload[1] = remainder;
+ printf("Shouldn't coalesce if exceed IP max pkt size: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* last segment sent individually, doesn't start new segment */
+ correct_payload[0] = correct_payload[0] - remainder;
+ correct_payload[1] = remainder + 1;
+ correct_payload[2] = remainder + 1;
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else {
+ error(1, 0, "Test case error, should never trigger");
+ }
+
+ if (close(rxfd))
+ error(1, 0, "socket close");
+}
+
+static void parse_args(int argc, char **argv)
+{
+ static const struct option opts[] = {
+ { "dmac", required_argument, NULL, 'D' },
+ { "iface", required_argument, NULL, 'i' },
+ { "ipv4", no_argument, NULL, '4' },
+ { "ipv6", no_argument, NULL, '6' },
+ { "rx", no_argument, NULL, 'r' },
+ { "smac", required_argument, NULL, 'S' },
+ { "test", required_argument, NULL, 't' },
+ { "verbose", no_argument, NULL, 'v' },
+ { 0, 0, 0, 0 }
+ };
+ int c;
+
+ while ((c = getopt_long(argc, argv, "46D:i:rS:t:v", opts, NULL)) != -1) {
+ switch (c) {
+ case '4':
+ proto = PF_INET;
+ ethhdr_proto = htons(ETH_P_IP);
+ break;
+ case '6':
+ proto = PF_INET6;
+ ethhdr_proto = htons(ETH_P_IPV6);
+ break;
+ case 'D':
+ dmac = optarg;
+ break;
+ case 'i':
+ ifname = optarg;
+ break;
+ case 'r':
+ tx_socket = false;
+ break;
+ case 'S':
+ smac = optarg;
+ break;
+ case 't':
+ testname = optarg;
+ break;
+ case 'v':
+ verbose = true;
+ break;
+ default:
+ error(1, 0, "%s invalid option %c\n", __func__, c);
+ break;
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_args(argc, argv);
+
+ if (proto == PF_INET) {
+ tcp_offset = ETH_HLEN + sizeof(struct iphdr);
+ total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+ } else if (proto == PF_INET6) {
+ tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
+ total_hdr_len = MAX_HDR_LEN;
+ } else {
+ error(1, 0, "Protocol family is not ipv4 or ipv6");
+ }
+
+ read_MAC(src_mac, smac);
+ read_MAC(dst_mac, dmac);
+
+ if (tx_socket)
+ gro_sender();
+ else
+ gro_receiver();
+ return 0;
+}
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
new file mode 100755
index 000000000000..342ad27f631b
--- /dev/null
+++ b/tools/testing/selftests/net/gro.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly SERVER_MAC="aa:00:00:00:00:02"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large")
+readonly PROTOS=("ipv4" "ipv6")
+dev=""
+test="all"
+proto="ipv4"
+
+run_test() {
+ local server_pid=0
+ local exit_code=0
+ local protocol=$1
+ local test=$2
+ local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \
+ "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
+
+ setup_ns
+ # Each test is run 3 times to deflake, because given the receive timing,
+ # not all packets that should coalesce will be considered in the same flow
+ # on every try.
+ for tries in {1..3}; do
+ # Actual test starts here
+ ip netns exec server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
+ 1>>log.txt &
+ server_pid=$!
+ sleep 0.5 # to allow for socket init
+ ip netns exec client_ns ./gro "${ARGS[@]}" "--iface" "client" \
+ 1>>log.txt
+ wait "${server_pid}"
+ exit_code=$?
+ if [[ "${exit_code}" -eq 0 ]]; then
+ break;
+ fi
+ done
+ cleanup_ns
+ echo ${exit_code}
+}
+
+run_all_tests() {
+ local failed_tests=()
+ for proto in "${PROTOS[@]}"; do
+ for test in "${TESTS[@]}"; do
+ echo "running test ${proto} ${test}" >&2
+ exit_code=$(run_test $proto $test)
+ if [[ "${exit_code}" -ne 0 ]]; then
+ failed_tests+=("${proto}_${test}")
+ fi;
+ done;
+ done
+ if [[ ${#failed_tests[@]} -ne 0 ]]; then
+ echo "failed tests: ${failed_tests[*]}. \
+ Please see log.txt for more logs"
+ exit 1
+ else
+ echo "All Tests Succeeded!"
+ fi;
+}
+
+usage() {
+ echo "Usage: $0 \
+ [-i <DEV>] \
+ [-t data|ack|flags|tcp|ip|large] \
+ [-p <ipv4|ipv6>]" 1>&2;
+ exit 1;
+}
+
+while getopts "i:t:p:" opt; do
+ case "${opt}" in
+ i)
+ dev="${OPTARG}"
+ ;;
+ t)
+ test="${OPTARG}"
+ ;;
+ p)
+ proto="${OPTARG}"
+ ;;
+ *)
+ usage
+ ;;
+ esac
+done
+
+if [ -n "$dev" ]; then
+ source setup_loopback.sh
+else
+ source setup_veth.sh
+fi
+
+setup
+trap cleanup EXIT
+if [[ "${test}" == "all" ]]; then
+ run_all_tests
+else
+ run_test "${proto}" "${test}"
+fi;
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
new file mode 100755
index 000000000000..3caf72bb9c6a
--- /dev/null
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -0,0 +1,652 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Author: Justin Iurman <justin.iurman@uliege.be>
+#
+# This script evaluates the IOAM insertion for IPv6 by checking the IOAM data
+# consistency directly inside packets on the receiver side. Tests are divided
+# into three categories: OUTPUT (evaluates the IOAM processing by the sender),
+# INPUT (evaluates the IOAM processing by the receiver) and GLOBAL (evaluates
+# wider use cases that do not fall into the other two categories). Both OUTPUT
+# and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL
+# tests use the entire three-node topology (alpha, beta, gamma). Each test is
+# documented inside its own handler in the code below.
+#
+# An IOAM domain is configured from Alpha to Gamma but not on the reverse path.
+# When either Beta or Gamma is the destination (depending on the test category),
+# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | Alpha netns | | Gamma netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | db01::2/64 | | | | db02::2/64 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +----------------------------------------------------+
+# | . . |
+# | +-------------+ +-------------+ |
+# | | veth0 | | veth1 | |
+# | | db01::1/64 | ................ | db02::1/64 | |
+# | +-------------+ +-------------+ |
+# | |
+# | Beta netns |
+# | |
+# +----------------------------------------------------+
+#
+#
+#
+# =============================================================
+# | Alpha - IOAM configuration |
+# +===========================================================+
+# | Node ID | 1 |
+# +-----------------------------------------------------------+
+# | Node Wide ID | 11111111 |
+# +-----------------------------------------------------------+
+# | Ingress ID | 0xffff (default value) |
+# +-----------------------------------------------------------+
+# | Ingress Wide ID | 0xffffffff (default value) |
+# +-----------------------------------------------------------+
+# | Egress ID | 101 |
+# +-----------------------------------------------------------+
+# | Egress Wide ID | 101101 |
+# +-----------------------------------------------------------+
+# | Namespace Data | 0xdeadbee0 |
+# +-----------------------------------------------------------+
+# | Namespace Wide Data | 0xcafec0caf00dc0de |
+# +-----------------------------------------------------------+
+# | Schema ID | 777 |
+# +-----------------------------------------------------------+
+# | Schema Data | something that will be 4n-aligned |
+# +-----------------------------------------------------------+
+#
+#
+# =============================================================
+# | Beta - IOAM configuration |
+# +===========================================================+
+# | Node ID | 2 |
+# +-----------------------------------------------------------+
+# | Node Wide ID | 22222222 |
+# +-----------------------------------------------------------+
+# | Ingress ID | 201 |
+# +-----------------------------------------------------------+
+# | Ingress Wide ID | 201201 |
+# +-----------------------------------------------------------+
+# | Egress ID | 202 |
+# +-----------------------------------------------------------+
+# | Egress Wide ID | 202202 |
+# +-----------------------------------------------------------+
+# | Namespace Data | 0xdeadbee1 |
+# +-----------------------------------------------------------+
+# | Namespace Wide Data | 0xcafec0caf11dc0de |
+# +-----------------------------------------------------------+
+# | Schema ID | 666 |
+# +-----------------------------------------------------------+
+# | Schema Data | Hello there -Obi |
+# +-----------------------------------------------------------+
+#
+#
+# =============================================================
+# | Gamma - IOAM configuration |
+# +===========================================================+
+# | Node ID | 3 |
+# +-----------------------------------------------------------+
+# | Node Wide ID | 33333333 |
+# +-----------------------------------------------------------+
+# | Ingress ID | 301 |
+# +-----------------------------------------------------------+
+# | Ingress Wide ID | 301301 |
+# +-----------------------------------------------------------+
+# | Egress ID | 0xffff (default value) |
+# +-----------------------------------------------------------+
+# | Egress Wide ID | 0xffffffff (default value) |
+# +-----------------------------------------------------------+
+# | Namespace Data | 0xdeadbee2 |
+# +-----------------------------------------------------------+
+# | Namespace Wide Data | 0xcafec0caf22dc0de |
+# +-----------------------------------------------------------+
+# | Schema ID | 0xffffff (= None) |
+# +-----------------------------------------------------------+
+# | Schema Data | |
+# +-----------------------------------------------------------+
+
+
+################################################################################
+# #
+# WARNING: Be careful if you modify the block below - it MUST be kept #
+# synchronized with configurations inside ioam6_parser.c and always #
+# reflect the same. #
+# #
+################################################################################
+
+ALPHA=(
+ 1 # ID
+ 11111111 # Wide ID
+ 0xffff # Ingress ID
+ 0xffffffff # Ingress Wide ID
+ 101 # Egress ID
+ 101101 # Egress Wide ID
+ 0xdeadbee0 # Namespace Data
+ 0xcafec0caf00dc0de # Namespace Wide Data
+ 777 # Schema ID (0xffffff = None)
+ "something that will be 4n-aligned" # Schema Data
+)
+
+BETA=(
+ 2
+ 22222222
+ 201
+ 201201
+ 202
+ 202202
+ 0xdeadbee1
+ 0xcafec0caf11dc0de
+ 666
+ "Hello there -Obi"
+)
+
+GAMMA=(
+ 3
+ 33333333
+ 301
+ 301301
+ 0xffff
+ 0xffffffff
+ 0xdeadbee2
+ 0xcafec0caf22dc0de
+ 0xffffff
+ ""
+)
+
+TESTS_OUTPUT="
+ out_undef_ns
+ out_no_room
+ out_bits
+ out_full_supp_trace
+"
+
+TESTS_INPUT="
+ in_undef_ns
+ in_no_room
+ in_oflag
+ in_bits
+ in_full_supp_trace
+"
+
+TESTS_GLOBAL="
+ fwd_full_supp_trace
+"
+
+
+################################################################################
+# #
+# LIBRARY #
+# #
+################################################################################
+
+check_kernel_compatibility()
+{
+ ip netns add ioam-tmp-node
+ ip link add name veth0 netns ioam-tmp-node type veth \
+ peer name veth1 netns ioam-tmp-node
+
+ ip -netns ioam-tmp-node link set veth0 up
+ ip -netns ioam-tmp-node link set veth1 up
+
+ ip -netns ioam-tmp-node ioam namespace add 0 &>/dev/null
+ ns_ad=$?
+
+ ip -netns ioam-tmp-node ioam namespace show | grep -q "namespace 0"
+ ns_sh=$?
+
+ if [[ $ns_ad != 0 || $ns_sh != 0 ]]
+ then
+ echo "SKIP: kernel version probably too old, missing ioam support"
+ ip link del veth0 2>/dev/null || true
+ ip netns del ioam-tmp-node || true
+ exit 1
+ fi
+
+ ip -netns ioam-tmp-node route add db02::/64 encap ioam6 trace prealloc \
+ type 0x800000 ns 0 size 4 dev veth0 &>/dev/null
+ tr_ad=$?
+
+ ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6 trace"
+ tr_sh=$?
+
+ if [[ $tr_ad != 0 || $tr_sh != 0 ]]
+ then
+ echo "SKIP: cannot attach an ioam trace to a route, did you compile" \
+ "without CONFIG_IPV6_IOAM6_LWTUNNEL?"
+ ip link del veth0 2>/dev/null || true
+ ip netns del ioam-tmp-node || true
+ exit 1
+ fi
+
+ ip link del veth0 2>/dev/null || true
+ ip netns del ioam-tmp-node || true
+}
+
+cleanup()
+{
+ ip link del ioam-veth-alpha 2>/dev/null || true
+ ip link del ioam-veth-gamma 2>/dev/null || true
+
+ ip netns del ioam-node-alpha || true
+ ip netns del ioam-node-beta || true
+ ip netns del ioam-node-gamma || true
+}
+
+setup()
+{
+ ip netns add ioam-node-alpha
+ ip netns add ioam-node-beta
+ ip netns add ioam-node-gamma
+
+ ip link add name ioam-veth-alpha netns ioam-node-alpha type veth \
+ peer name ioam-veth-betaL netns ioam-node-beta
+ ip link add name ioam-veth-betaR netns ioam-node-beta type veth \
+ peer name ioam-veth-gamma netns ioam-node-gamma
+
+ ip -netns ioam-node-alpha link set ioam-veth-alpha name veth0
+ ip -netns ioam-node-beta link set ioam-veth-betaL name veth0
+ ip -netns ioam-node-beta link set ioam-veth-betaR name veth1
+ ip -netns ioam-node-gamma link set ioam-veth-gamma name veth0
+
+ ip -netns ioam-node-alpha addr add db01::2/64 dev veth0
+ ip -netns ioam-node-alpha link set veth0 up
+ ip -netns ioam-node-alpha link set lo up
+ ip -netns ioam-node-alpha route add db02::/64 via db01::1 dev veth0
+ ip -netns ioam-node-alpha route del db01::/64
+ ip -netns ioam-node-alpha route add db01::/64 dev veth0
+
+ ip -netns ioam-node-beta addr add db01::1/64 dev veth0
+ ip -netns ioam-node-beta addr add db02::1/64 dev veth1
+ ip -netns ioam-node-beta link set veth0 up
+ ip -netns ioam-node-beta link set veth1 up
+ ip -netns ioam-node-beta link set lo up
+
+ ip -netns ioam-node-gamma addr add db02::2/64 dev veth0
+ ip -netns ioam-node-gamma link set veth0 up
+ ip -netns ioam-node-gamma link set lo up
+ ip -netns ioam-node-gamma route add db01::/64 via db02::1 dev veth0
+
+ # - IOAM config -
+ ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]}
+ ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]}
+ ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]}
+ ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]}
+ ip -netns ioam-node-alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]}
+ ip -netns ioam-node-alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}"
+ ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]}
+
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.all.forwarding=1
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]}
+ ip -netns ioam-node-beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]}
+ ip -netns ioam-node-beta ioam schema add ${BETA[8]} "${BETA[9]}"
+ ip -netns ioam-node-beta ioam namespace set 123 schema ${BETA[8]}
+
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]}
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]}
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]}
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]}
+ ip -netns ioam-node-gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]}
+
+ sleep 1
+
+ ip netns exec ioam-node-alpha ping6 -c 5 -W 1 db02::2 &>/dev/null
+ if [ $? != 0 ]
+ then
+ echo "Setup FAILED"
+ cleanup &>/dev/null
+ exit 0
+ fi
+}
+
+log_test_passed()
+{
+ local desc=$1
+ printf "TEST: %-60s [ OK ]\n" "${desc}"
+}
+
+log_test_failed()
+{
+ local desc=$1
+ printf "TEST: %-60s [FAIL]\n" "${desc}"
+}
+
+run_test()
+{
+ local name=$1
+ local desc=$2
+ local node_src=$3
+ local node_dst=$4
+ local ip6_src=$5
+ local ip6_dst=$6
+ local if_dst=$7
+ local trace_type=$8
+ local ioam_ns=$9
+
+ ip netns exec $node_dst ./ioam6_parser $if_dst $name $ip6_src $ip6_dst \
+ $trace_type $ioam_ns &
+ local spid=$!
+ sleep 0.1
+
+ ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null
+ if [ $? != 0 ]
+ then
+ log_test_failed "${desc}"
+ kill -2 $spid &>/dev/null
+ else
+ wait $spid
+ [ $? = 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}"
+ fi
+}
+
+run()
+{
+ echo
+ echo "OUTPUT tests"
+ printf "%0.s-" {1..74}
+ echo
+
+ # set OUTPUT settings
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0
+
+ for t in $TESTS_OUTPUT
+ do
+ $t
+ done
+
+ # clean OUTPUT settings
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+ ip -netns ioam-node-alpha route change db01::/64 dev veth0
+
+
+ echo
+ echo "INPUT tests"
+ printf "%0.s-" {1..74}
+ echo
+
+ # set INPUT settings
+ ip -netns ioam-node-alpha ioam namespace del 123
+
+ for t in $TESTS_INPUT
+ do
+ $t
+ done
+
+ # clean INPUT settings
+ ip -netns ioam-node-alpha ioam namespace add 123 \
+ data ${ALPHA[6]} wide ${ALPHA[7]}
+ ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]}
+ ip -netns ioam-node-alpha route change db01::/64 dev veth0
+
+
+ echo
+ echo "GLOBAL tests"
+ printf "%0.s-" {1..74}
+ echo
+
+ for t in $TESTS_GLOBAL
+ do
+ $t
+ done
+}
+
+bit2type=(
+ 0x800000 0x400000 0x200000 0x100000 0x080000 0x040000 0x020000 0x010000
+ 0x008000 0x004000 0x002000 0x001000 0x000800 0x000400 0x000200 0x000100
+ 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002
+)
+bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 )
+
+
+################################################################################
+# #
+# OUTPUT tests #
+# #
+# Two nodes (sender/receiver), IOAM disabled on ingress for the receiver. #
+################################################################################
+
+out_undef_ns()
+{
+ ##############################################################################
+ # Make sure that the encap node won't fill the trace if the chosen IOAM #
+ # namespace is not configured locally. #
+ ##############################################################################
+ local desc="Unknown IOAM namespace"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0x800000 ns 0 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0x800000 0
+}
+
+out_no_room()
+{
+ ##############################################################################
+ # Make sure that the encap node won't fill the trace and will set the #
+ # Overflow flag since there is no room enough for its data. #
+ ##############################################################################
+ local desc="Missing trace room"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xc00000 ns 123 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xc00000 123
+}
+
+out_bits()
+{
+ ##############################################################################
+ # Make sure that, for each trace type bit, the encap node will either: #
+ # (i) fill the trace with its data when it is a supported bit #
+ # (ii) not fill the trace with its data when it is an unsupported bit #
+ ##############################################################################
+ local desc="Trace type with bit <n> only"
+
+ local tmp=${bit2size[22]}
+ bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) ))
+
+ for i in {0..22}
+ do
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \
+ prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0
+
+ run_test "out_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \
+ db01::2 db01::1 veth0 ${bit2type[$i]} 123
+ done
+
+ bit2size[22]=$tmp
+}
+
+out_full_supp_trace()
+{
+ ##############################################################################
+ # Make sure that the encap node will correctly fill a full trace. Be careful,#
+ # "full trace" here does NOT mean all bits (only supported ones). #
+ ##############################################################################
+ local desc="Full supported trace"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xfff002 ns 123 size 100 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xfff002 123
+}
+
+
+################################################################################
+# #
+# INPUT tests #
+# #
+# Two nodes (sender/receiver), the sender MUST NOT fill the trace upon #
+# insertion -> the IOAM namespace configured on the sender is removed #
+# and is used in the inserted trace to force the sender not to fill it. #
+################################################################################
+
+in_undef_ns()
+{
+ ##############################################################################
+ # Make sure that the receiving node won't fill the trace if the related IOAM #
+ # namespace is not configured locally. #
+ ##############################################################################
+ local desc="Unknown IOAM namespace"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0x800000 ns 0 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0x800000 0
+}
+
+in_no_room()
+{
+ ##############################################################################
+ # Make sure that the receiving node won't fill the trace and will set the #
+ # Overflow flag if there is no room enough for its data. #
+ ##############################################################################
+ local desc="Missing trace room"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xc00000 ns 123 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xc00000 123
+}
+
+in_bits()
+{
+ ##############################################################################
+ # Make sure that, for each trace type bit, the receiving node will either: #
+ # (i) fill the trace with its data when it is a supported bit #
+ # (ii) not fill the trace with its data when it is an unsupported bit #
+ ##############################################################################
+ local desc="Trace type with bit <n> only"
+
+ local tmp=${bit2size[22]}
+ bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) ))
+
+ for i in {0..22}
+ do
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \
+ prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0
+
+ run_test "in_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \
+ db01::2 db01::1 veth0 ${bit2type[$i]} 123
+ done
+
+ bit2size[22]=$tmp
+}
+
+in_oflag()
+{
+ ##############################################################################
+ # Make sure that the receiving node won't fill the trace since the Overflow #
+ # flag is set. #
+ ##############################################################################
+ local desc="Overflow flag is set"
+
+ # Exception:
+ # Here, we need the sender to set the Overflow flag. For that, we will add
+ # back the IOAM namespace that was previously configured on the sender.
+ ip -netns ioam-node-alpha ioam namespace add 123
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xc00000 ns 123 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xc00000 123
+
+ # And we clean the exception for this test to get things back to normal for
+ # other INPUT tests
+ ip -netns ioam-node-alpha ioam namespace del 123
+}
+
+in_full_supp_trace()
+{
+ ##############################################################################
+ # Make sure that the receiving node will correctly fill a full trace. Be #
+ # careful, "full trace" here does NOT mean all bits (only supported ones). #
+ ##############################################################################
+ local desc="Full supported trace"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xfff002 ns 123 size 80 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xfff002 123
+}
+
+
+################################################################################
+# #
+# GLOBAL tests #
+# #
+# Three nodes (sender/router/receiver), IOAM fully enabled on every node. #
+################################################################################
+
+fwd_full_supp_trace()
+{
+ ##############################################################################
+ # Make sure that all three nodes correctly filled the full supported trace #
+ # by checking that the trace data is consistent with the predefined config. #
+ ##############################################################################
+ local desc="Forward - Full supported trace"
+
+ ip -netns ioam-node-alpha route change db02::/64 encap ioam6 trace prealloc \
+ type 0xfff002 ns 123 size 244 via db01::1 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-gamma db01::2 \
+ db02::2 veth0 0xfff002 123
+}
+
+
+################################################################################
+# #
+# MAIN #
+# #
+################################################################################
+
+if [ "$(id -u)" -ne 0 ]
+then
+ echo "SKIP: Need root privileges"
+ exit 1
+fi
+
+if [ ! -x "$(command -v ip)" ]
+then
+ echo "SKIP: Could not run test without ip tool"
+ exit 1
+fi
+
+ip ioam &>/dev/null
+if [ $? = 1 ]
+then
+ echo "SKIP: iproute2 too old, missing ioam command"
+ exit 1
+fi
+
+check_kernel_compatibility
+
+cleanup &>/dev/null
+setup
+run
+cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c
new file mode 100644
index 000000000000..d376cb2c383c
--- /dev/null
+++ b/tools/testing/selftests/net/ioam6_parser.c
@@ -0,0 +1,720 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Author: Justin Iurman (justin.iurman@uliege.be)
+ *
+ * IOAM tester for IPv6, see ioam6.sh for details on each test case.
+ */
+#include <arpa/inet.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/const.h>
+#include <linux/if_ether.h>
+#include <linux/ioam6.h>
+#include <linux/ipv6.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+struct ioam_config {
+ __u32 id;
+ __u64 wide;
+ __u16 ingr_id;
+ __u16 egr_id;
+ __u32 ingr_wide;
+ __u32 egr_wide;
+ __u32 ns_data;
+ __u64 ns_wide;
+ __u32 sc_id;
+ __u8 hlim;
+ char *sc_data;
+};
+
+/*
+ * Be careful if you modify structs below - everything MUST be kept synchronized
+ * with configurations inside ioam6.sh and always reflect the same.
+ */
+
+static struct ioam_config node1 = {
+ .id = 1,
+ .wide = 11111111,
+ .ingr_id = 0xffff, /* default value */
+ .egr_id = 101,
+ .ingr_wide = 0xffffffff, /* default value */
+ .egr_wide = 101101,
+ .ns_data = 0xdeadbee0,
+ .ns_wide = 0xcafec0caf00dc0de,
+ .sc_id = 777,
+ .sc_data = "something that will be 4n-aligned",
+ .hlim = 64,
+};
+
+static struct ioam_config node2 = {
+ .id = 2,
+ .wide = 22222222,
+ .ingr_id = 201,
+ .egr_id = 202,
+ .ingr_wide = 201201,
+ .egr_wide = 202202,
+ .ns_data = 0xdeadbee1,
+ .ns_wide = 0xcafec0caf11dc0de,
+ .sc_id = 666,
+ .sc_data = "Hello there -Obi",
+ .hlim = 63,
+};
+
+static struct ioam_config node3 = {
+ .id = 3,
+ .wide = 33333333,
+ .ingr_id = 301,
+ .egr_id = 0xffff, /* default value */
+ .ingr_wide = 301301,
+ .egr_wide = 0xffffffff, /* default value */
+ .ns_data = 0xdeadbee2,
+ .ns_wide = 0xcafec0caf22dc0de,
+ .sc_id = 0xffffff, /* default value */
+ .sc_data = NULL,
+ .hlim = 62,
+};
+
+enum {
+ /**********
+ * OUTPUT *
+ **********/
+ TEST_OUT_UNDEF_NS,
+ TEST_OUT_NO_ROOM,
+ TEST_OUT_BIT0,
+ TEST_OUT_BIT1,
+ TEST_OUT_BIT2,
+ TEST_OUT_BIT3,
+ TEST_OUT_BIT4,
+ TEST_OUT_BIT5,
+ TEST_OUT_BIT6,
+ TEST_OUT_BIT7,
+ TEST_OUT_BIT8,
+ TEST_OUT_BIT9,
+ TEST_OUT_BIT10,
+ TEST_OUT_BIT11,
+ TEST_OUT_BIT12,
+ TEST_OUT_BIT13,
+ TEST_OUT_BIT14,
+ TEST_OUT_BIT15,
+ TEST_OUT_BIT16,
+ TEST_OUT_BIT17,
+ TEST_OUT_BIT18,
+ TEST_OUT_BIT19,
+ TEST_OUT_BIT20,
+ TEST_OUT_BIT21,
+ TEST_OUT_BIT22,
+ TEST_OUT_FULL_SUPP_TRACE,
+
+ /*********
+ * INPUT *
+ *********/
+ TEST_IN_UNDEF_NS,
+ TEST_IN_NO_ROOM,
+ TEST_IN_OFLAG,
+ TEST_IN_BIT0,
+ TEST_IN_BIT1,
+ TEST_IN_BIT2,
+ TEST_IN_BIT3,
+ TEST_IN_BIT4,
+ TEST_IN_BIT5,
+ TEST_IN_BIT6,
+ TEST_IN_BIT7,
+ TEST_IN_BIT8,
+ TEST_IN_BIT9,
+ TEST_IN_BIT10,
+ TEST_IN_BIT11,
+ TEST_IN_BIT12,
+ TEST_IN_BIT13,
+ TEST_IN_BIT14,
+ TEST_IN_BIT15,
+ TEST_IN_BIT16,
+ TEST_IN_BIT17,
+ TEST_IN_BIT18,
+ TEST_IN_BIT19,
+ TEST_IN_BIT20,
+ TEST_IN_BIT21,
+ TEST_IN_BIT22,
+ TEST_IN_FULL_SUPP_TRACE,
+
+ /**********
+ * GLOBAL *
+ **********/
+ TEST_FWD_FULL_SUPP_TRACE,
+
+ __TEST_MAX,
+};
+
+static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h,
+ __u32 trace_type, __u16 ioam_ns)
+{
+ if (__be16_to_cpu(ioam6h->namespace_id) != ioam_ns ||
+ __be32_to_cpu(ioam6h->type_be32) != (trace_type << 8))
+ return 1;
+
+ switch (tid) {
+ case TEST_OUT_UNDEF_NS:
+ case TEST_IN_UNDEF_NS:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 1 ||
+ ioam6h->remlen != 1;
+
+ case TEST_OUT_NO_ROOM:
+ case TEST_IN_NO_ROOM:
+ case TEST_IN_OFLAG:
+ return !ioam6h->overflow ||
+ ioam6h->nodelen != 2 ||
+ ioam6h->remlen != 1;
+
+ case TEST_OUT_BIT0:
+ case TEST_IN_BIT0:
+ case TEST_OUT_BIT1:
+ case TEST_IN_BIT1:
+ case TEST_OUT_BIT2:
+ case TEST_IN_BIT2:
+ case TEST_OUT_BIT3:
+ case TEST_IN_BIT3:
+ case TEST_OUT_BIT4:
+ case TEST_IN_BIT4:
+ case TEST_OUT_BIT5:
+ case TEST_IN_BIT5:
+ case TEST_OUT_BIT6:
+ case TEST_IN_BIT6:
+ case TEST_OUT_BIT7:
+ case TEST_IN_BIT7:
+ case TEST_OUT_BIT11:
+ case TEST_IN_BIT11:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 1 ||
+ ioam6h->remlen;
+
+ case TEST_OUT_BIT8:
+ case TEST_IN_BIT8:
+ case TEST_OUT_BIT9:
+ case TEST_IN_BIT9:
+ case TEST_OUT_BIT10:
+ case TEST_IN_BIT10:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 2 ||
+ ioam6h->remlen;
+
+ case TEST_OUT_BIT12:
+ case TEST_IN_BIT12:
+ case TEST_OUT_BIT13:
+ case TEST_IN_BIT13:
+ case TEST_OUT_BIT14:
+ case TEST_IN_BIT14:
+ case TEST_OUT_BIT15:
+ case TEST_IN_BIT15:
+ case TEST_OUT_BIT16:
+ case TEST_IN_BIT16:
+ case TEST_OUT_BIT17:
+ case TEST_IN_BIT17:
+ case TEST_OUT_BIT18:
+ case TEST_IN_BIT18:
+ case TEST_OUT_BIT19:
+ case TEST_IN_BIT19:
+ case TEST_OUT_BIT20:
+ case TEST_IN_BIT20:
+ case TEST_OUT_BIT21:
+ case TEST_IN_BIT21:
+ return ioam6h->overflow ||
+ ioam6h->nodelen ||
+ ioam6h->remlen != 1;
+
+ case TEST_OUT_BIT22:
+ case TEST_IN_BIT22:
+ return ioam6h->overflow ||
+ ioam6h->nodelen ||
+ ioam6h->remlen;
+
+ case TEST_OUT_FULL_SUPP_TRACE:
+ case TEST_IN_FULL_SUPP_TRACE:
+ case TEST_FWD_FULL_SUPP_TRACE:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 15 ||
+ ioam6h->remlen;
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h,
+ const struct ioam_config cnf)
+{
+ unsigned int len;
+ __u8 aligned;
+ __u64 raw64;
+ __u32 raw32;
+
+ if (ioam6h->type.bit0) {
+ raw32 = __be32_to_cpu(*((__u32 *)*p));
+ if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff))
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit1) {
+ raw32 = __be32_to_cpu(*((__u32 *)*p));
+ if (cnf.ingr_id != (raw32 >> 16) ||
+ cnf.egr_id != (raw32 & 0xffff))
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit2)
+ *p += sizeof(__u32);
+
+ if (ioam6h->type.bit3)
+ *p += sizeof(__u32);
+
+ if (ioam6h->type.bit4) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit5) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ns_data)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit6) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit7) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit8) {
+ raw64 = __be64_to_cpu(*((__u64 *)*p));
+ if (cnf.hlim != (raw64 >> 56) ||
+ cnf.wide != (raw64 & 0xffffffffffffff))
+ return 1;
+ *p += sizeof(__u64);
+ }
+
+ if (ioam6h->type.bit9) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ingr_wide)
+ return 1;
+ *p += sizeof(__u32);
+
+ if (__be32_to_cpu(*((__u32 *)*p)) != cnf.egr_wide)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit10) {
+ if (__be64_to_cpu(*((__u64 *)*p)) != cnf.ns_wide)
+ return 1;
+ *p += sizeof(__u64);
+ }
+
+ if (ioam6h->type.bit11) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit22) {
+ len = cnf.sc_data ? strlen(cnf.sc_data) : 0;
+ aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0;
+
+ raw32 = __be32_to_cpu(*((__u32 *)*p));
+ if (aligned != (raw32 >> 24) * 4 ||
+ cnf.sc_id != (raw32 & 0xffffff))
+ return 1;
+ *p += sizeof(__u32);
+
+ if (cnf.sc_data) {
+ if (strncmp((char *)*p, cnf.sc_data, len))
+ return 1;
+
+ *p += len;
+ aligned -= len;
+
+ while (aligned--) {
+ if (**p != '\0')
+ return 1;
+ *p += sizeof(__u8);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int check_ioam_header_and_data(int tid, struct ioam6_trace_hdr *ioam6h,
+ __u32 trace_type, __u16 ioam_ns)
+{
+ __u8 *p;
+
+ if (check_ioam_header(tid, ioam6h, trace_type, ioam_ns))
+ return 1;
+
+ p = ioam6h->data + ioam6h->remlen * 4;
+
+ switch (tid) {
+ case TEST_OUT_BIT0:
+ case TEST_OUT_BIT1:
+ case TEST_OUT_BIT2:
+ case TEST_OUT_BIT3:
+ case TEST_OUT_BIT4:
+ case TEST_OUT_BIT5:
+ case TEST_OUT_BIT6:
+ case TEST_OUT_BIT7:
+ case TEST_OUT_BIT8:
+ case TEST_OUT_BIT9:
+ case TEST_OUT_BIT10:
+ case TEST_OUT_BIT11:
+ case TEST_OUT_BIT22:
+ case TEST_OUT_FULL_SUPP_TRACE:
+ return check_ioam6_data(&p, ioam6h, node1);
+
+ case TEST_IN_BIT0:
+ case TEST_IN_BIT1:
+ case TEST_IN_BIT2:
+ case TEST_IN_BIT3:
+ case TEST_IN_BIT4:
+ case TEST_IN_BIT5:
+ case TEST_IN_BIT6:
+ case TEST_IN_BIT7:
+ case TEST_IN_BIT8:
+ case TEST_IN_BIT9:
+ case TEST_IN_BIT10:
+ case TEST_IN_BIT11:
+ case TEST_IN_BIT22:
+ case TEST_IN_FULL_SUPP_TRACE:
+ {
+ __u32 tmp32 = node2.egr_wide;
+ __u16 tmp16 = node2.egr_id;
+ int res;
+
+ node2.egr_id = 0xffff;
+ node2.egr_wide = 0xffffffff;
+
+ res = check_ioam6_data(&p, ioam6h, node2);
+
+ node2.egr_id = tmp16;
+ node2.egr_wide = tmp32;
+
+ return res;
+ }
+
+ case TEST_FWD_FULL_SUPP_TRACE:
+ if (check_ioam6_data(&p, ioam6h, node3))
+ return 1;
+ if (check_ioam6_data(&p, ioam6h, node2))
+ return 1;
+ return check_ioam6_data(&p, ioam6h, node1);
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static int str2id(const char *tname)
+{
+ if (!strcmp("out_undef_ns", tname))
+ return TEST_OUT_UNDEF_NS;
+ if (!strcmp("out_no_room", tname))
+ return TEST_OUT_NO_ROOM;
+ if (!strcmp("out_bit0", tname))
+ return TEST_OUT_BIT0;
+ if (!strcmp("out_bit1", tname))
+ return TEST_OUT_BIT1;
+ if (!strcmp("out_bit2", tname))
+ return TEST_OUT_BIT2;
+ if (!strcmp("out_bit3", tname))
+ return TEST_OUT_BIT3;
+ if (!strcmp("out_bit4", tname))
+ return TEST_OUT_BIT4;
+ if (!strcmp("out_bit5", tname))
+ return TEST_OUT_BIT5;
+ if (!strcmp("out_bit6", tname))
+ return TEST_OUT_BIT6;
+ if (!strcmp("out_bit7", tname))
+ return TEST_OUT_BIT7;
+ if (!strcmp("out_bit8", tname))
+ return TEST_OUT_BIT8;
+ if (!strcmp("out_bit9", tname))
+ return TEST_OUT_BIT9;
+ if (!strcmp("out_bit10", tname))
+ return TEST_OUT_BIT10;
+ if (!strcmp("out_bit11", tname))
+ return TEST_OUT_BIT11;
+ if (!strcmp("out_bit12", tname))
+ return TEST_OUT_BIT12;
+ if (!strcmp("out_bit13", tname))
+ return TEST_OUT_BIT13;
+ if (!strcmp("out_bit14", tname))
+ return TEST_OUT_BIT14;
+ if (!strcmp("out_bit15", tname))
+ return TEST_OUT_BIT15;
+ if (!strcmp("out_bit16", tname))
+ return TEST_OUT_BIT16;
+ if (!strcmp("out_bit17", tname))
+ return TEST_OUT_BIT17;
+ if (!strcmp("out_bit18", tname))
+ return TEST_OUT_BIT18;
+ if (!strcmp("out_bit19", tname))
+ return TEST_OUT_BIT19;
+ if (!strcmp("out_bit20", tname))
+ return TEST_OUT_BIT20;
+ if (!strcmp("out_bit21", tname))
+ return TEST_OUT_BIT21;
+ if (!strcmp("out_bit22", tname))
+ return TEST_OUT_BIT22;
+ if (!strcmp("out_full_supp_trace", tname))
+ return TEST_OUT_FULL_SUPP_TRACE;
+ if (!strcmp("in_undef_ns", tname))
+ return TEST_IN_UNDEF_NS;
+ if (!strcmp("in_no_room", tname))
+ return TEST_IN_NO_ROOM;
+ if (!strcmp("in_oflag", tname))
+ return TEST_IN_OFLAG;
+ if (!strcmp("in_bit0", tname))
+ return TEST_IN_BIT0;
+ if (!strcmp("in_bit1", tname))
+ return TEST_IN_BIT1;
+ if (!strcmp("in_bit2", tname))
+ return TEST_IN_BIT2;
+ if (!strcmp("in_bit3", tname))
+ return TEST_IN_BIT3;
+ if (!strcmp("in_bit4", tname))
+ return TEST_IN_BIT4;
+ if (!strcmp("in_bit5", tname))
+ return TEST_IN_BIT5;
+ if (!strcmp("in_bit6", tname))
+ return TEST_IN_BIT6;
+ if (!strcmp("in_bit7", tname))
+ return TEST_IN_BIT7;
+ if (!strcmp("in_bit8", tname))
+ return TEST_IN_BIT8;
+ if (!strcmp("in_bit9", tname))
+ return TEST_IN_BIT9;
+ if (!strcmp("in_bit10", tname))
+ return TEST_IN_BIT10;
+ if (!strcmp("in_bit11", tname))
+ return TEST_IN_BIT11;
+ if (!strcmp("in_bit12", tname))
+ return TEST_IN_BIT12;
+ if (!strcmp("in_bit13", tname))
+ return TEST_IN_BIT13;
+ if (!strcmp("in_bit14", tname))
+ return TEST_IN_BIT14;
+ if (!strcmp("in_bit15", tname))
+ return TEST_IN_BIT15;
+ if (!strcmp("in_bit16", tname))
+ return TEST_IN_BIT16;
+ if (!strcmp("in_bit17", tname))
+ return TEST_IN_BIT17;
+ if (!strcmp("in_bit18", tname))
+ return TEST_IN_BIT18;
+ if (!strcmp("in_bit19", tname))
+ return TEST_IN_BIT19;
+ if (!strcmp("in_bit20", tname))
+ return TEST_IN_BIT20;
+ if (!strcmp("in_bit21", tname))
+ return TEST_IN_BIT21;
+ if (!strcmp("in_bit22", tname))
+ return TEST_IN_BIT22;
+ if (!strcmp("in_full_supp_trace", tname))
+ return TEST_IN_FULL_SUPP_TRACE;
+ if (!strcmp("fwd_full_supp_trace", tname))
+ return TEST_FWD_FULL_SUPP_TRACE;
+
+ return -1;
+}
+
+static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2)
+{
+ return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) |
+ (a1->s6_addr32[1] ^ a2->s6_addr32[1]) |
+ (a1->s6_addr32[2] ^ a2->s6_addr32[2]) |
+ (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0;
+}
+
+static int get_u32(__u32 *val, const char *arg, int base)
+{
+ unsigned long res;
+ char *ptr;
+
+ if (!arg || !*arg)
+ return -1;
+ res = strtoul(arg, &ptr, base);
+
+ if (!ptr || ptr == arg || *ptr)
+ return -1;
+
+ if (res == ULONG_MAX && errno == ERANGE)
+ return -1;
+
+ if (res > 0xFFFFFFFFUL)
+ return -1;
+
+ *val = res;
+ return 0;
+}
+
+static int get_u16(__u16 *val, const char *arg, int base)
+{
+ unsigned long res;
+ char *ptr;
+
+ if (!arg || !*arg)
+ return -1;
+ res = strtoul(arg, &ptr, base);
+
+ if (!ptr || ptr == arg || *ptr)
+ return -1;
+
+ if (res == ULONG_MAX && errno == ERANGE)
+ return -1;
+
+ if (res > 0xFFFFUL)
+ return -1;
+
+ *val = res;
+ return 0;
+}
+
+static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = {
+ [TEST_OUT_UNDEF_NS] = check_ioam_header,
+ [TEST_OUT_NO_ROOM] = check_ioam_header,
+ [TEST_OUT_BIT0] = check_ioam_header_and_data,
+ [TEST_OUT_BIT1] = check_ioam_header_and_data,
+ [TEST_OUT_BIT2] = check_ioam_header_and_data,
+ [TEST_OUT_BIT3] = check_ioam_header_and_data,
+ [TEST_OUT_BIT4] = check_ioam_header_and_data,
+ [TEST_OUT_BIT5] = check_ioam_header_and_data,
+ [TEST_OUT_BIT6] = check_ioam_header_and_data,
+ [TEST_OUT_BIT7] = check_ioam_header_and_data,
+ [TEST_OUT_BIT8] = check_ioam_header_and_data,
+ [TEST_OUT_BIT9] = check_ioam_header_and_data,
+ [TEST_OUT_BIT10] = check_ioam_header_and_data,
+ [TEST_OUT_BIT11] = check_ioam_header_and_data,
+ [TEST_OUT_BIT12] = check_ioam_header,
+ [TEST_OUT_BIT13] = check_ioam_header,
+ [TEST_OUT_BIT14] = check_ioam_header,
+ [TEST_OUT_BIT15] = check_ioam_header,
+ [TEST_OUT_BIT16] = check_ioam_header,
+ [TEST_OUT_BIT17] = check_ioam_header,
+ [TEST_OUT_BIT18] = check_ioam_header,
+ [TEST_OUT_BIT19] = check_ioam_header,
+ [TEST_OUT_BIT20] = check_ioam_header,
+ [TEST_OUT_BIT21] = check_ioam_header,
+ [TEST_OUT_BIT22] = check_ioam_header_and_data,
+ [TEST_OUT_FULL_SUPP_TRACE] = check_ioam_header_and_data,
+ [TEST_IN_UNDEF_NS] = check_ioam_header,
+ [TEST_IN_NO_ROOM] = check_ioam_header,
+ [TEST_IN_OFLAG] = check_ioam_header,
+ [TEST_IN_BIT0] = check_ioam_header_and_data,
+ [TEST_IN_BIT1] = check_ioam_header_and_data,
+ [TEST_IN_BIT2] = check_ioam_header_and_data,
+ [TEST_IN_BIT3] = check_ioam_header_and_data,
+ [TEST_IN_BIT4] = check_ioam_header_and_data,
+ [TEST_IN_BIT5] = check_ioam_header_and_data,
+ [TEST_IN_BIT6] = check_ioam_header_and_data,
+ [TEST_IN_BIT7] = check_ioam_header_and_data,
+ [TEST_IN_BIT8] = check_ioam_header_and_data,
+ [TEST_IN_BIT9] = check_ioam_header_and_data,
+ [TEST_IN_BIT10] = check_ioam_header_and_data,
+ [TEST_IN_BIT11] = check_ioam_header_and_data,
+ [TEST_IN_BIT12] = check_ioam_header,
+ [TEST_IN_BIT13] = check_ioam_header,
+ [TEST_IN_BIT14] = check_ioam_header,
+ [TEST_IN_BIT15] = check_ioam_header,
+ [TEST_IN_BIT16] = check_ioam_header,
+ [TEST_IN_BIT17] = check_ioam_header,
+ [TEST_IN_BIT18] = check_ioam_header,
+ [TEST_IN_BIT19] = check_ioam_header,
+ [TEST_IN_BIT20] = check_ioam_header,
+ [TEST_IN_BIT21] = check_ioam_header,
+ [TEST_IN_BIT22] = check_ioam_header_and_data,
+ [TEST_IN_FULL_SUPP_TRACE] = check_ioam_header_and_data,
+ [TEST_FWD_FULL_SUPP_TRACE] = check_ioam_header_and_data,
+};
+
+int main(int argc, char **argv)
+{
+ int fd, size, hoplen, tid, ret = 1;
+ struct in6_addr src, dst;
+ struct ioam6_hdr *opt;
+ struct ipv6hdr *ip6h;
+ __u8 buffer[400], *p;
+ __u16 ioam_ns;
+ __u32 tr_type;
+
+ if (argc != 7)
+ goto out;
+
+ tid = str2id(argv[2]);
+ if (tid < 0 || !func[tid])
+ goto out;
+
+ if (inet_pton(AF_INET6, argv[3], &src) != 1 ||
+ inet_pton(AF_INET6, argv[4], &dst) != 1)
+ goto out;
+
+ if (get_u32(&tr_type, argv[5], 16) ||
+ get_u16(&ioam_ns, argv[6], 0))
+ goto out;
+
+ fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6));
+ if (!fd)
+ goto out;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+ argv[1], strlen(argv[1])))
+ goto close;
+
+recv:
+ size = recv(fd, buffer, sizeof(buffer), 0);
+ if (size <= 0)
+ goto close;
+
+ ip6h = (struct ipv6hdr *)buffer;
+
+ if (!ipv6_addr_equal(&ip6h->saddr, &src) ||
+ !ipv6_addr_equal(&ip6h->daddr, &dst))
+ goto recv;
+
+ if (ip6h->nexthdr != IPPROTO_HOPOPTS)
+ goto close;
+
+ p = buffer + sizeof(*ip6h);
+ hoplen = (p[1] + 1) << 3;
+ p += sizeof(struct ipv6_hopopt_hdr);
+
+ while (hoplen > 0) {
+ opt = (struct ioam6_hdr *)p;
+
+ if (opt->opt_type == IPV6_TLV_IOAM &&
+ opt->type == IOAM6_TYPE_PREALLOC) {
+ p += sizeof(*opt);
+ ret = func[tid](tid, (struct ioam6_trace_hdr *)p,
+ tr_type, ioam_ns);
+ break;
+ }
+
+ p += opt->opt_len + 2;
+ hoplen -= opt->opt_len + 2;
+ }
+close:
+ close(fd);
+out:
+ return ret;
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index f02f4de2f3a0..255793c5ac4f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -3,8 +3,10 @@
ret=0
sin=""
+sinfail=""
sout=""
cin=""
+cinfail=""
cinsent=""
cout=""
ksft_skip=4
@@ -76,6 +78,14 @@ init()
done
}
+init_shapers()
+{
+ for i in `seq 1 4`; do
+ tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1
+ tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1
+ done
+}
+
cleanup_partial()
{
rm -f "$capout"
@@ -88,8 +98,8 @@ cleanup_partial()
cleanup()
{
- rm -f "$cin" "$cout"
- rm -f "$sin" "$sout" "$cinsent"
+ rm -f "$cin" "$cout" "$sinfail"
+ rm -f "$sin" "$sout" "$cinsent" "$cinfail"
cleanup_partial
}
@@ -211,11 +221,15 @@ link_failure()
{
ns="$1"
- l=$((RANDOM%4))
- l=$((l+1))
+ if [ -z "$FAILING_LINKS" ]; then
+ l=$((RANDOM%4))
+ FAILING_LINKS=$((l+1))
+ fi
- veth="ns1eth$l"
- ip -net "$ns" link set "$veth" down
+ for l in $FAILING_LINKS; do
+ veth="ns1eth$l"
+ ip -net "$ns" link set "$veth" down
+ done
}
# $1: IP address
@@ -280,10 +294,17 @@ do_transfer()
local_addr="0.0.0.0"
fi
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
- ${local_addr} < "$sin" > "$sout" &
+ if [ "$test_link_fail" -eq 2 ];then
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ $mptcp_connect -t ${timeout_poll} -l -p $port -s ${cl_proto} \
+ ${local_addr} < "$sinfail" > "$sout" &
+ else
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ ${local_addr} < "$sin" > "$sout" &
+ fi
spid=$!
sleep 1
@@ -294,7 +315,7 @@ do_transfer()
$mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
$connect_addr < "$cin" > "$cout" &
else
- ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | \
+ ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
tee "$cinsent" | \
timeout ${timeout_test} \
ip netns exec ${connector_ns} \
@@ -323,17 +344,18 @@ do_transfer()
let rm_nr_ns1=-addr_nr_ns1
if [ $rm_nr_ns1 -lt 8 ]; then
counter=1
+ pos=1
dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
if [ ${#dump[@]} -gt 0 ]; then
- id=${dump[1]}
sleep 1
while [ $counter -le $rm_nr_ns1 ]
do
+ id=${dump[$pos]}
ip netns exec ${listener_ns} ./pm_nl_ctl del $id
sleep 1
let counter+=1
- let id+=1
+ let pos+=5
done
fi
elif [ $rm_nr_ns1 -eq 8 ]; then
@@ -345,6 +367,12 @@ do_transfer()
fi
fi
+ flags="subflow"
+ if [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then
+ flags="${flags},fullmesh"
+ addr_nr_ns2=${addr_nr_ns2:9}
+ fi
+
if [ $addr_nr_ns2 -gt 0 ]; then
let add_nr_ns2=addr_nr_ns2
counter=3
@@ -356,7 +384,7 @@ do_transfer()
else
addr="10.0.$counter.2"
fi
- ip netns exec $ns2 ./pm_nl_ctl add $addr flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add $addr flags $flags
let counter+=1
let add_nr_ns2-=1
done
@@ -365,17 +393,18 @@ do_transfer()
let rm_nr_ns2=-addr_nr_ns2
if [ $rm_nr_ns2 -lt 8 ]; then
counter=1
+ pos=1
dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
if [ ${#dump[@]} -gt 0 ]; then
- id=${dump[1]}
sleep 1
while [ $counter -le $rm_nr_ns2 ]
do
+ id=${dump[$pos]}
ip netns exec ${connector_ns} ./pm_nl_ctl del $id
sleep 1
let counter+=1
- let id+=1
+ let pos+=5
done
fi
elif [ $rm_nr_ns2 -eq 8 ]; then
@@ -434,7 +463,11 @@ do_transfer()
return 1
fi
- check_transfer $sin $cout "file received by client"
+ if [ "$test_link_fail" -eq 2 ];then
+ check_transfer $sinfail $cout "file received by client"
+ else
+ check_transfer $sin $cout "file received by client"
+ fi
retc=$?
if [ "$test_link_fail" -eq 0 ];then
check_transfer $cin $sout "file received by server"
@@ -477,29 +510,33 @@ run_tests()
lret=0
oldin=""
- if [ "$test_linkfail" -eq 1 ];then
- size=$((RANDOM%1024))
+ # create the input file for the failure test when
+ # the first failure test run
+ if [ "$test_linkfail" -ne 0 -a -z "$cinfail" ]; then
+ # the client file must be considerably larger
+ # of the maximum expected cwin value, or the
+ # link utilization will be not predicable
+ size=$((RANDOM%2))
size=$((size+1))
- size=$((size*128))
+ size=$((size*8192))
+ size=$((size + ( $RANDOM % 8192) ))
- oldin=$(mktemp)
- cp "$cin" "$oldin"
- make_file "$cin" "client" $size
+ cinfail=$(mktemp)
+ make_file "$cinfail" "client" $size
fi
- do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
- ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup}
- lret=$?
+ if [ "$test_linkfail" -eq 2 -a -z "$sinfail" ]; then
+ size=$((RANDOM%16))
+ size=$((size+1))
+ size=$((size*2048))
- if [ "$test_linkfail" -eq 1 ];then
- cp "$oldin" "$cin"
- rm -f "$oldin"
+ sinfail=$(mktemp)
+ make_file "$sinfail" "server" $size
fi
- if [ $lret -ne 0 ]; then
- ret=$lret
- return
- fi
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
+ ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup}
+ lret=$?
}
chk_csum_nr()
@@ -541,6 +578,43 @@ chk_csum_nr()
fi
}
+chk_fail_nr()
+{
+ local mp_fail_nr_tx=$1
+ local mp_fail_nr_rx=$2
+ local count
+ local dump_stats
+
+ printf "%-39s %s" " " "ftx"
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$mp_fail_nr_tx" ]; then
+ echo "[fail] got $count MP_FAIL[s] TX expected $mp_fail_nr_tx"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - frx "
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$mp_fail_nr_rx" ]; then
+ echo "[fail] got $count MP_FAIL[s] RX expected $mp_fail_nr_rx"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
+ fi
+}
+
chk_join_nr()
{
local msg="$1"
@@ -590,6 +664,47 @@ chk_join_nr()
fi
if [ $checksum -eq 1 ]; then
chk_csum_nr
+ chk_fail_nr 0 0
+ fi
+}
+
+# a negative value for 'stale_max' means no upper bound:
+# for bidirectional transfer, if one peer sleep for a while
+# - as these tests do - we can have a quite high number of
+# stale/recover conversions, proportional to
+# sleep duration/ MPTCP-level RTX interval.
+chk_stale_nr()
+{
+ local ns=$1
+ local stale_min=$2
+ local stale_max=$3
+ local stale_delta=$4
+ local dump_stats
+ local stale_nr
+ local recover_nr
+
+ printf "%-39s %-18s" " " "stale"
+ stale_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowStale | awk '{print $2}'`
+ [ -z "$stale_nr" ] && stale_nr=0
+ recover_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowRecover | awk '{print $2}'`
+ [ -z "$recover_nr" ] && recover_nr=0
+
+ if [ $stale_nr -lt $stale_min ] ||
+ [ $stale_max -gt 0 -a $stale_nr -gt $stale_max ] ||
+ [ $((stale_nr - $recover_nr)) -ne $stale_delta ]; then
+ echo "[fail] got $stale_nr stale[s] $recover_nr recover[s], " \
+ " expected stale in range [$stale_min..$stale_max]," \
+ " stale-recover delta $stale_delta "
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ if [ "${dump_stats}" = 1 ]; then
+ echo $ns stats
+ ip netns exec $ns ip -s link show
+ ip netns exec $ns nstat -as | grep MPTcp
fi
}
@@ -801,6 +916,27 @@ chk_prio_nr()
fi
}
+chk_link_usage()
+{
+ local ns=$1
+ local link=$2
+ local out=$3
+ local expected_rate=$4
+ local tx_link=`ip netns exec $ns cat /sys/class/net/$link/statistics/tx_bytes`
+ local tx_total=`ls -l $out | awk '{print $5}'`
+ local tx_rate=$((tx_link * 100 / $tx_total))
+ local tolerance=5
+
+ printf "%-39s %-18s" " " "link usage"
+ if [ $tx_rate -lt $((expected_rate - $tolerance)) -o \
+ $tx_rate -gt $((expected_rate + $tolerance)) ]; then
+ echo "[fail] got $tx_rate% usage, expected $expected_rate%"
+ ret=1
+ else
+ echo "[ ok ]"
+ fi
+}
+
subflows_tests()
{
reset
@@ -918,20 +1054,101 @@ signal_address_tests()
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal invalid addresses" 1 1 1
chk_add_nr 3 3
+
+ # signal addresses race test
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 4 4
+ ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_add_nr 4 4
}
link_failure_tests()
{
# accept and use add_addr with additional subflows and link loss
reset
+
+ # without any b/w limit each veth could spool the packets and get
+ # them acked at xmit time, so that the corresponding subflow will
+ # have almost always no outstanding pkts, the scheduler will pick
+ # always the first subflow and we will have hard time testing
+ # active backup and link switch-over.
+ # Let's set some arbitrary (low) virtual link limits.
+ init_shapers
ip netns exec $ns1 ./pm_nl_ctl limits 0 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 3
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
run_tests $ns1 $ns2 10.0.1.1 1
chk_join_nr "multiple flows, signal, link failure" 3 3 3
chk_add_nr 1 1
+ chk_stale_nr $ns2 1 5 1
+
+ # accept and use add_addr with additional subflows and link loss
+ # for bidirectional transfer
+ reset
+ init_shapers
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 2
+ chk_join_nr "multi flows, signal, bidi, link fail" 3 3 3
+ chk_add_nr 1 1
+ chk_stale_nr $ns2 1 -1 1
+
+ # 2 subflows plus 1 backup subflow with a lossy link, backup
+ # will never be used
+ reset
+ init_shapers
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ export FAILING_LINKS="1"
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+ run_tests $ns1 $ns2 10.0.1.1 1
+ chk_join_nr "backup subflow unused, link failure" 2 2 2
+ chk_add_nr 1 1
+ chk_link_usage $ns2 ns2eth3 $cinsent 0
+
+ # 2 lossy links after half transfer, backup will get half of
+ # the traffic
+ reset
+ init_shapers
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+ export FAILING_LINKS="1 2"
+ run_tests $ns1 $ns2 10.0.1.1 1
+ chk_join_nr "backup flow used, multi links fail" 2 2 2
+ chk_add_nr 1 1
+ chk_stale_nr $ns2 2 4 2
+ chk_link_usage $ns2 ns2eth3 $cinsent 50
+
+ # use a backup subflow with the first subflow on a lossy link
+ # for bidirectional transfer
+ reset
+ init_shapers
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+ run_tests $ns1 $ns2 10.0.1.1 2
+ chk_join_nr "backup flow used, bidi, link failure" 2 2 2
+ chk_add_nr 1 1
+ chk_stale_nr $ns2 1 -1 2
+ chk_link_usage $ns2 ns2eth3 $cinsent 50
}
add_addr_timeout_tests()
@@ -1530,6 +1747,55 @@ deny_join_id0_tests()
chk_join_nr "subflow and address allow join id0 2" 1 1 1
}
+fullmesh_tests()
+{
+ # fullmesh 1
+ # 2 fullmesh addrs in ns2, added before the connection,
+ # 1 non-fullmesh addr in ns1, added during the connection.
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 4
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 4
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,fullmesh
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,fullmesh
+ run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
+ chk_join_nr "fullmesh test 2x1" 4 4 4
+ chk_add_nr 1 1
+
+ # fullmesh 2
+ # 1 non-fullmesh addr in ns1, added before the connection,
+ # 1 fullmesh addr in ns2, added during the connection.
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 3
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow
+ chk_join_nr "fullmesh test 1x1" 3 3 3
+ chk_add_nr 1 1
+
+ # fullmesh 3
+ # 1 non-fullmesh addr in ns1, added before the connection,
+ # 2 fullmesh addrs in ns2, added during the connection.
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 5
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 5
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
+ chk_join_nr "fullmesh test 1x2" 5 5 5
+ chk_add_nr 1 1
+
+ # fullmesh 4
+ # 1 non-fullmesh addr in ns1, added before the connection,
+ # 2 fullmesh addrs in ns2, added during the connection,
+ # limit max_subflows to 4.
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 4
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 4
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
+ chk_join_nr "fullmesh test 1x2, limited" 4 4 4
+ chk_add_nr 1 1
+}
+
all_tests()
{
subflows_tests
@@ -1545,6 +1811,7 @@ all_tests()
syncookies_tests
checksum_tests
deny_join_id0_tests
+ fullmesh_tests
}
usage()
@@ -1563,6 +1830,7 @@ usage()
echo " -k syncookies_tests"
echo " -S checksum_tests"
echo " -d deny_join_id0_tests"
+ echo " -m fullmesh_tests"
echo " -c capture pcap files"
echo " -C enable data checksum"
echo " -h help"
@@ -1598,7 +1866,7 @@ if [ $do_all_tests -eq 1 ]; then
exit $ret
fi
-while getopts 'fsltra64bpkdchCS' opt; do
+while getopts 'fsltra64bpkdmchCS' opt; do
case $opt in
f)
subflows_tests
@@ -1639,6 +1907,9 @@ while getopts 'fsltra64bpkdchCS' opt; do
d)
deny_join_id0_tests
;;
+ m)
+ fullmesh_tests
+ ;;
c)
;;
C)
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 115decfdc1ef..354784512748 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -25,7 +25,7 @@
static void syntax(char *argv[])
{
fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]);
- fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n");
+ fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
fprintf(stderr, "\tdel <id> [<ip>]\n");
fprintf(stderr, "\tget <id>\n");
fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n");
@@ -236,11 +236,18 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
flags |= MPTCP_PM_ADDR_FLAG_SIGNAL;
else if (!strcmp(tok, "backup"))
flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else if (!strcmp(tok, "fullmesh"))
+ flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
else
error(1, errno,
"unknown flag %s", argv[arg]);
}
+ if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL &&
+ flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+ error(1, errno, "error flag fullmesh");
+ }
+
rta = (void *)(data + off);
rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
rta->rta_len = RTA_LENGTH(4);
@@ -422,6 +429,13 @@ static void print_addr(struct rtattr *attrs, int len)
printf(",");
}
+ if (flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+ printf("fullmesh");
+ flags &= ~MPTCP_PM_ADDR_FLAG_FULLMESH;
+ if (flags)
+ printf(",");
+ }
+
/* bump unknown flags, if any */
if (flags)
printf("0x%x", flags);
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index db4521335722..3653d6468c67 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -111,8 +111,8 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
static void sock_fanout_set_cbpf(int fd)
{
struct sock_filter bpf_filter[] = {
- BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80), /* ldb [80] */
- BPF_STMT(BPF_RET+BPF_A, 0), /* ret A */
+ BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 80), /* ldb [80] */
+ BPF_STMT(BPF_RET | BPF_A, 0), /* ret A */
};
struct sock_fprog bpf_prog;
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
index 170be65e0816..1cbfeb5052ec 100755
--- a/tools/testing/selftests/net/psock_snd.sh
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -86,9 +86,6 @@ echo "raw truncate hlen - 1 (expected to fail: EINVAL)"
echo "raw gso min size"
./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
-echo "raw gso min size - 1 (expected to fail)"
-(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
-
echo "raw gso max size"
./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
index 8b42e8b04e0f..a59cb6a3c4f5 100755
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -1,9 +1,12 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
if [ $(id -u) != 0 ]; then
echo $msg must be run as root >&2
- exit 0
+ exit $ksft_skip
fi
ret=0
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh
new file mode 100755
index 000000000000..e57bbfbc5208
--- /dev/null
+++ b/tools/testing/selftests/net/setup_loopback.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
+readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
+readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
+readonly HARD_IRQS="$(< ${IRQ_PATH})"
+
+netdev_check_for_carrier() {
+ local -r dev="$1"
+
+ for i in {1..5}; do
+ carrier="$(cat /sys/class/net/${dev}/carrier)"
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "carrier not ready yet..." >&2
+ sleep 1
+ else
+ echo "carrier ready" >&2
+ break
+ fi
+ done
+ echo "${carrier}"
+}
+
+# Assumes that there is no existing ipvlan device on the physical device
+setup_loopback_environment() {
+ local dev="$1"
+
+ # Fail hard if cannot turn on loopback mode for current NIC
+ ethtool -K "${dev}" loopback on || exit 1
+ sleep 1
+
+ # Check for the carrier
+ carrier=$(netdev_check_for_carrier ${dev})
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "setup_loopback_environment failed"
+ exit 1
+ fi
+}
+
+setup_macvlan_ns(){
+ local -r link_dev="$1"
+ local -r ns_name="$2"
+ local -r ns_dev="$3"
+ local -r ns_mac="$4"
+ local -r addr="$5"
+
+ ip link add link "${link_dev}" dev "${ns_dev}" \
+ address "${ns_mac}" type macvlan
+ exit_code=$?
+ if [[ "${exit_code}" -ne 0 ]]; then
+ echo "setup_macvlan_ns failed"
+ exit $exit_code
+ fi
+
+ [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+ ip link set dev "${ns_dev}" netns "${ns_name}"
+ ip -netns "${ns_name}" link set dev "${ns_dev}" up
+ if [[ -n "${addr}" ]]; then
+ ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}"
+ fi
+
+ sleep 1
+}
+
+cleanup_macvlan_ns(){
+ while (( $# >= 2 )); do
+ ns_name="$1"
+ ns_dev="$2"
+ ip -netns "${ns_name}" link del dev "${ns_dev}"
+ ip netns del "${ns_name}"
+ shift 2
+ done
+}
+
+cleanup_loopback(){
+ local -r dev="$1"
+
+ ethtool -K "${dev}" loopback off
+ sleep 1
+
+ # Check for the carrier
+ carrier=$(netdev_check_for_carrier ${dev})
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "setup_loopback_environment failed"
+ exit 1
+ fi
+}
+
+setup_interrupt() {
+ # Use timer on host to trigger the network stack
+ # Also disable device interrupt to not depend on NIC interrupt
+ # Reduce test flakiness caused by unexpected interrupts
+ echo 100000 >"${FLUSH_PATH}"
+ echo 50 >"${IRQ_PATH}"
+}
+
+setup_ns() {
+ # Set up server_ns namespace and client_ns namespace
+ setup_macvlan_ns "${dev}" server_ns server "${SERVER_MAC}"
+ setup_macvlan_ns "${dev}" client_ns client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+ cleanup_macvlan_ns server_ns server client_ns client
+}
+
+setup() {
+ setup_loopback_environment "${dev}"
+ setup_interrupt
+}
+
+cleanup() {
+ cleanup_loopback "${dev}"
+
+ echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
+ echo "${HARD_IRQS}" >"${IRQ_PATH}"
+}
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh
new file mode 100644
index 000000000000..1003ddf7b3b2
--- /dev/null
+++ b/tools/testing/selftests/net/setup_veth.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+setup_veth_ns() {
+ local -r link_dev="$1"
+ local -r ns_name="$2"
+ local -r ns_dev="$3"
+ local -r ns_mac="$4"
+
+ [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+ echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
+ ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535
+ ip -netns "${ns_name}" link set dev "${ns_dev}" up
+
+ ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off
+}
+
+setup_ns() {
+ # Set up server_ns namespace and client_ns namespace
+ ip link add name server type veth peer name client
+
+ setup_veth_ns "${dev}" server_ns server "${SERVER_MAC}"
+ setup_veth_ns "${dev}" client_ns client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+ local ns_name
+
+ for ns_name in client_ns server_ns; do
+ [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}"
+ done
+}
+
+setup() {
+ # no global init setup step needed
+ :
+}
+
+cleanup() {
+ cleanup_ns
+}
diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
index 75ada17ac061..aebaab8ce44c 100755
--- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -193,6 +193,9 @@
# +---------------------------------------------------+
#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
readonly LOCALSID_TABLE_ID=90
readonly IPv6_RT_NETWORK=fd00
readonly IPv6_HS_NETWORK=cafe
@@ -543,18 +546,18 @@ host_vpn_isolation_tests()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
- exit 0
+ exit $ksft_skip
fi
cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
index ad7a9fc59934..1003119773e5 100755
--- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -163,6 +163,9 @@
# +---------------------------------------------------+
#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
readonly LOCALSID_TABLE_ID=90
readonly IPv6_RT_NETWORK=fd00
readonly IPv4_HS_NETWORK=10.0.0
@@ -464,18 +467,18 @@ host_vpn_isolation_tests()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
- exit 0
+ exit $ksft_skip
fi
cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
index 68708f5e26a0..b9b06ef80d88 100755
--- a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
@@ -164,6 +164,9 @@
# +---------------------------------------------------+
#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
readonly LOCALSID_TABLE_ID=90
readonly IPv6_RT_NETWORK=fd00
readonly IPv6_HS_NETWORK=cafe
@@ -472,18 +475,18 @@ host_vpn_isolation_tests()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
- exit 0
+ exit $ksft_skip
fi
cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c
new file mode 100644
index 000000000000..710ac956bdb3
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Toeplitz test
+ *
+ * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
+ * 2. Compute the rx_hash in software based on the packet contents
+ * 3. Compare the two
+ *
+ * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
+ *
+ * If '-C $rx_irq_cpu_list' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the rxqueue that RSS would select based on this rx_hash
+ * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
+ * 7. Compare the cpus from 4 and 6
+ *
+ * Else if '-r $rps_bitmap' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
+ * 6. Compare the cpus from 4 and 5
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define TOEPLITZ_KEY_MIN_LEN 40
+#define TOEPLITZ_KEY_MAX_LEN 60
+
+#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */
+#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
+#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
+
+#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
+
+#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */
+
+#define RPS_MAX_CPUS 16UL /* must be a power of 2 */
+
+/* configuration options (cmdline arguments) */
+static uint16_t cfg_dport = 8000;
+static int cfg_family = AF_INET6;
+static char *cfg_ifname = "eth0";
+static int cfg_num_queues;
+static int cfg_num_rps_cpus;
+static bool cfg_sink;
+static int cfg_type = SOCK_STREAM;
+static int cfg_timeout_msec = 1000;
+static bool cfg_verbose;
+
+/* global vars */
+static int num_cpus;
+static int ring_block_nr;
+static int ring_block_sz;
+
+/* stats */
+static int frames_received;
+static int frames_nohash;
+static int frames_error;
+
+#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0)
+
+/* tpacket ring */
+struct ring_state {
+ int fd;
+ char *mmap;
+ int idx;
+ int cpu;
+};
+
+static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */
+static int rps_silo_to_cpu[RPS_MAX_CPUS];
+static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static struct ring_state rings[RSS_MAX_CPUS];
+
+static inline uint32_t toeplitz(const unsigned char *four_tuple,
+ const unsigned char *key)
+{
+ int i, bit, ret = 0;
+ uint32_t key32;
+
+ key32 = ntohl(*((uint32_t *)key));
+ key += 4;
+
+ for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
+ for (bit = 7; bit >= 0; bit--) {
+ if (four_tuple[i] & (1 << bit))
+ ret ^= key32;
+
+ key32 <<= 1;
+ key32 |= !!(key[0] & (1 << bit));
+ }
+ key++;
+ }
+
+ return ret;
+}
+
+/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
+static void verify_rss(uint32_t rx_hash, int cpu)
+{
+ int queue = rx_hash % cfg_num_queues;
+
+ log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
+ if (rx_irq_cpus[queue] != cpu) {
+ log_verbose(". error: rss cpu mismatch (%d)", cpu);
+ frames_error++;
+ }
+}
+
+static void verify_rps(uint64_t rx_hash, int cpu)
+{
+ int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
+
+ log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
+ if (rps_silo_to_cpu[silo] != cpu) {
+ log_verbose(". error: rps cpu mismatch (%d)", cpu);
+ frames_error++;
+ }
+}
+
+static void log_rxhash(int cpu, uint32_t rx_hash,
+ const char *addrs, int addr_len)
+{
+ char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
+ uint16_t *ports;
+
+ if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
+ !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
+ error(1, 0, "address parse error");
+
+ ports = (void *)addrs + (addr_len * 2);
+ log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
+ cpu, rx_hash, saddr, daddr,
+ ntohs(ports[0]), ntohs(ports[1]));
+}
+
+/* Compare computed rxhash with rxhash received from tpacket_v3 */
+static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
+{
+ unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
+ uint32_t rx_hash_sw;
+ const char *addrs;
+ int addr_len;
+
+ if (cfg_family == AF_INET) {
+ addr_len = sizeof(struct in_addr);
+ addrs = pkt + offsetof(struct iphdr, saddr);
+ } else {
+ addr_len = sizeof(struct in6_addr);
+ addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
+ }
+
+ memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
+ rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
+
+ if (cfg_verbose)
+ log_rxhash(cpu, rx_hash, addrs, addr_len);
+
+ if (rx_hash != rx_hash_sw) {
+ log_verbose(" != expected 0x%x\n", rx_hash_sw);
+ frames_error++;
+ return;
+ }
+
+ log_verbose(" OK");
+ if (cfg_num_queues)
+ verify_rss(rx_hash, cpu);
+ else if (cfg_num_rps_cpus)
+ verify_rps(rx_hash, cpu);
+ log_verbose("\n");
+}
+
+static char *recv_frame(const struct ring_state *ring, char *frame)
+{
+ struct tpacket3_hdr *hdr = (void *)frame;
+
+ if (hdr->hv1.tp_rxhash)
+ verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
+ ring->cpu);
+ else
+ frames_nohash++;
+
+ return frame + hdr->tp_next_offset;
+}
+
+/* A single TPACKET_V3 block can hold multiple frames */
+static void recv_block(struct ring_state *ring)
+{
+ struct tpacket_block_desc *block;
+ char *frame;
+ int i;
+
+ block = (void *)(ring->mmap + ring->idx * ring_block_sz);
+ if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
+ return;
+
+ frame = (char *)block;
+ frame += block->hdr.bh1.offset_to_first_pkt;
+
+ for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
+ frame = recv_frame(ring, frame);
+ frames_received++;
+ }
+
+ block->hdr.bh1.block_status = TP_STATUS_KERNEL;
+ ring->idx = (ring->idx + 1) % ring_block_nr;
+}
+
+/* simple test: sleep once unconditionally and then process all rings */
+static void process_rings(void)
+{
+ int i;
+
+ usleep(1000 * cfg_timeout_msec);
+
+ for (i = 0; i < num_cpus; i++)
+ recv_block(&rings[i]);
+
+ fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
+ frames_received - frames_nohash - frames_error,
+ frames_nohash, frames_error);
+}
+
+static char *setup_ring(int fd)
+{
+ struct tpacket_req3 req3 = {0};
+ void *ring;
+
+ req3.tp_retire_blk_tov = cfg_timeout_msec;
+ req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+ req3.tp_frame_size = 2048;
+ req3.tp_frame_nr = 1 << 10;
+ req3.tp_block_nr = 2;
+
+ req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
+ req3.tp_block_size /= req3.tp_block_nr;
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
+ error(1, errno, "setsockopt PACKET_RX_RING");
+
+ ring_block_sz = req3.tp_block_size;
+ ring_block_nr = req3.tp_block_nr;
+
+ ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
+ if (ring == MAP_FAILED)
+ error(1, 0, "mmap failed");
+
+ return ring;
+}
+
+static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = sizeof(filter) / sizeof(struct sock_filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+/* filter on transport protocol and destination port */
+static void set_filter(int fd)
+{
+ const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */
+ uint8_t proto;
+
+ proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+ if (cfg_family == AF_INET)
+ __set_filter(fd, offsetof(struct iphdr, protocol), proto,
+ sizeof(struct iphdr) + off_dport);
+ else
+ __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
+ sizeof(struct ip6_hdr) + off_dport);
+}
+
+/* drop everything: used temporarily during setup */
+static void set_filter_null(int fd)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = sizeof(filter) / sizeof(struct sock_filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+static int create_ring(char **ring)
+{
+ struct fanout_args args = {
+ .id = 1,
+ .type_flags = PACKET_FANOUT_CPU,
+ .max_num_members = RSS_MAX_CPUS
+ };
+ struct sockaddr_ll ll = { 0 };
+ int fd, val;
+
+ fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket creation failed");
+
+ val = TPACKET_V3;
+ if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
+ error(1, errno, "setsockopt PACKET_VERSION");
+ *ring = setup_ring(fd);
+
+ /* block packets until all rings are added to the fanout group:
+ * else packets can arrive during setup and get misclassified
+ */
+ set_filter_null(fd);
+
+ ll.sll_family = AF_PACKET;
+ ll.sll_ifindex = if_nametoindex(cfg_ifname);
+ ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
+ htons(ETH_P_IPV6);
+ if (bind(fd, (void *)&ll, sizeof(ll)))
+ error(1, errno, "bind");
+
+ /* must come after bind: verifies all programs in group match */
+ if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
+ /* on failure, retry using old API if that is sufficient:
+ * it has a hard limit of 256 sockets, so only try if
+ * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
+ * in this API, the third argument is left implicit.
+ */
+ if (cfg_num_queues || num_cpus > 256 ||
+ setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+ &args, sizeof(uint32_t)))
+ error(1, errno, "setsockopt PACKET_FANOUT cpu");
+ }
+
+ return fd;
+}
+
+/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
+static int setup_sink(void)
+{
+ int fd, val;
+
+ fd = socket(cfg_family, cfg_type, 0);
+ if (fd == -1)
+ error(1, errno, "socket %d.%d", cfg_family, cfg_type);
+
+ val = 1 << 20;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
+ error(1, errno, "setsockopt rcvbuf");
+
+ return fd;
+}
+
+static void setup_rings(void)
+{
+ int i;
+
+ for (i = 0; i < num_cpus; i++) {
+ rings[i].cpu = i;
+ rings[i].fd = create_ring(&rings[i].mmap);
+ }
+
+ /* accept packets once all rings in the fanout group are up */
+ for (i = 0; i < num_cpus; i++)
+ set_filter(rings[i].fd);
+}
+
+static void cleanup_rings(void)
+{
+ int i;
+
+ for (i = 0; i < num_cpus; i++) {
+ if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
+ error(1, errno, "munmap");
+ if (close(rings[i].fd))
+ error(1, errno, "close");
+ }
+}
+
+static void parse_cpulist(const char *arg)
+{
+ do {
+ rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
+
+ arg = strchr(arg, ',');
+ if (!arg)
+ break;
+ arg++; // skip ','
+ } while (1);
+}
+
+static void show_cpulist(void)
+{
+ int i;
+
+ for (i = 0; i < cfg_num_queues; i++)
+ fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
+}
+
+static void show_silos(void)
+{
+ int i;
+
+ for (i = 0; i < cfg_num_rps_cpus; i++)
+ fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
+}
+
+static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
+{
+ int i, ret, off;
+
+ if (slen < TOEPLITZ_STR_MIN_LEN ||
+ slen > TOEPLITZ_STR_MAX_LEN + 1)
+ error(1, 0, "invalid toeplitz key");
+
+ for (i = 0, off = 0; off < slen; i++, off += 3) {
+ ret = sscanf(str + off, "%hhx", &key[i]);
+ if (ret != 1)
+ error(1, 0, "key parse error at %d off %d len %d",
+ i, off, slen);
+ }
+}
+
+static void parse_rps_bitmap(const char *arg)
+{
+ unsigned long bitmap;
+ int i;
+
+ bitmap = strtoul(arg, NULL, 0);
+
+ if (bitmap & ~(RPS_MAX_CPUS - 1))
+ error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
+ bitmap, RPS_MAX_CPUS - 1);
+
+ for (i = 0; i < RPS_MAX_CPUS; i++)
+ if (bitmap & 1UL << i)
+ rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ static struct option long_options[] = {
+ {"dport", required_argument, 0, 'd'},
+ {"cpus", required_argument, 0, 'C'},
+ {"key", required_argument, 0, 'k'},
+ {"iface", required_argument, 0, 'i'},
+ {"ipv4", no_argument, 0, '4'},
+ {"ipv6", no_argument, 0, '6'},
+ {"sink", no_argument, 0, 's'},
+ {"tcp", no_argument, 0, 't'},
+ {"timeout", required_argument, 0, 'T'},
+ {"udp", no_argument, 0, 'u'},
+ {"verbose", no_argument, 0, 'v'},
+ {"rps", required_argument, 0, 'r'},
+ {0, 0, 0, 0}
+ };
+ bool have_toeplitz = false;
+ int index, c;
+
+ while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) {
+ switch (c) {
+ case '4':
+ cfg_family = AF_INET;
+ break;
+ case '6':
+ cfg_family = AF_INET6;
+ break;
+ case 'C':
+ parse_cpulist(optarg);
+ break;
+ case 'd':
+ cfg_dport = strtol(optarg, NULL, 0);
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ case 'k':
+ parse_toeplitz_key(optarg, strlen(optarg),
+ toeplitz_key);
+ have_toeplitz = true;
+ break;
+ case 'r':
+ parse_rps_bitmap(optarg);
+ break;
+ case 's':
+ cfg_sink = true;
+ break;
+ case 't':
+ cfg_type = SOCK_STREAM;
+ break;
+ case 'T':
+ cfg_timeout_msec = strtol(optarg, NULL, 0);
+ break;
+ case 'u':
+ cfg_type = SOCK_DGRAM;
+ break;
+ case 'v':
+ cfg_verbose = true;
+ break;
+
+ default:
+ error(1, 0, "unknown option %c", optopt);
+ break;
+ }
+ }
+
+ if (!have_toeplitz)
+ error(1, 0, "Must supply rss key ('-k')");
+
+ num_cpus = get_nprocs();
+ if (num_cpus > RSS_MAX_CPUS)
+ error(1, 0, "increase RSS_MAX_CPUS");
+
+ if (cfg_num_queues && cfg_num_rps_cpus)
+ error(1, 0,
+ "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
+ if (cfg_verbose) {
+ show_cpulist();
+ show_silos();
+ }
+}
+
+int main(int argc, char **argv)
+{
+ const int min_tests = 10;
+ int fd_sink = -1;
+
+ parse_opts(argc, argv);
+
+ if (cfg_sink)
+ fd_sink = setup_sink();
+
+ setup_rings();
+ process_rings();
+ cleanup_rings();
+
+ if (cfg_sink && close(fd_sink))
+ error(1, errno, "close sink");
+
+ if (frames_received - frames_nohash < min_tests)
+ error(1, 0, "too few frames for verification");
+
+ return frames_error;
+}
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
new file mode 100755
index 000000000000..0a49907cd4fe
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz.sh
@@ -0,0 +1,199 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping
+# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu
+# ('-rps <rps_map>')
+#
+# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action,
+# which is a driver-specific encoding.
+#
+# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \
+# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]
+
+source setup_loopback.sh
+readonly SERVER_IP4="192.168.1.200/24"
+readonly SERVER_IP6="fda8::1/64"
+readonly SERVER_MAC="aa:00:00:00:00:02"
+
+readonly CLIENT_IP4="192.168.1.100/24"
+readonly CLIENT_IP6="fda8::2/64"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+
+PORT=8000
+KEY="$(</proc/sys/net/core/netdev_rss_key)"
+TEST_RSS=false
+RPS_MAP=""
+PROTO_FLAG=""
+IP_FLAG=""
+DEV="eth0"
+
+# Return the number of rxqs among which RSS is configured to spread packets.
+# This is determined by reading the RSS indirection table using ethtool.
+get_rss_cfg_num_rxqs() {
+ echo $(ethtool -x "${DEV}" |
+ egrep [[:space:]]+[0-9]+:[[:space:]]+ |
+ cut -d: -f2- |
+ awk '{$1=$1};1' |
+ tr ' ' '\n' |
+ sort -u |
+ wc -l)
+}
+
+# Return a list of the receive irq handler cpus.
+# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc.
+# Reads /sys/kernel/irq/ in order, so algorithm depends on
+# irq_{rxq-0} < irq_{rxq-1}, etc.
+get_rx_irq_cpus() {
+ CPUS=""
+ # sort so that irq 2 is read before irq 10
+ SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V)
+ # Consider only as many queues as RSS actually uses. We assume that
+ # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1).
+ RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs)
+ RXQ_COUNT=0
+
+ for i in ${SORTED_IRQS}
+ do
+ [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break
+ # lookup relevant IRQs by action name
+ [[ -e "$i/actions" ]] || continue
+ cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue
+ irqname=$(<"$i/actions")
+
+ # does the IRQ get called
+ irqcount=$(cat "$i/per_cpu_count" | tr -d '0,')
+ [[ -n "${irqcount}" ]] || continue
+
+ # lookup CPU
+ irq=$(basename "$i")
+ cpu=$(cat "/proc/irq/$irq/smp_affinity_list")
+
+ if [[ -z "${CPUS}" ]]; then
+ CPUS="${cpu}"
+ else
+ CPUS="${CPUS},${cpu}"
+ fi
+ RXQ_COUNT=$((RXQ_COUNT+1))
+ done
+
+ echo "${CPUS}"
+}
+
+get_disable_rfs_cmd() {
+ echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;"
+}
+
+get_set_rps_bitmaps_cmd() {
+ CMD=""
+ for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus
+ do
+ CMD="${CMD} echo $1 > ${i};"
+ done
+
+ echo "${CMD}"
+}
+
+get_disable_rps_cmd() {
+ echo "$(get_set_rps_bitmaps_cmd 0)"
+}
+
+die() {
+ echo "$1"
+ exit 1
+}
+
+check_nic_rxhash_enabled() {
+ local -r pattern="receive-hashing:\ on"
+
+ ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled"
+}
+
+parse_opts() {
+ local prog=$0
+ shift 1
+
+ while [[ "$1" =~ "-" ]]; do
+ if [[ "$1" = "-irq_prefix" ]]; then
+ shift
+ IRQ_PATTERN="^$1-[0-9]*$"
+ elif [[ "$1" = "-u" || "$1" = "-t" ]]; then
+ PROTO_FLAG="$1"
+ elif [[ "$1" = "-4" ]]; then
+ IP_FLAG="$1"
+ SERVER_IP="${SERVER_IP4}"
+ CLIENT_IP="${CLIENT_IP4}"
+ elif [[ "$1" = "-6" ]]; then
+ IP_FLAG="$1"
+ SERVER_IP="${SERVER_IP6}"
+ CLIENT_IP="${CLIENT_IP6}"
+ elif [[ "$1" = "-rss" ]]; then
+ TEST_RSS=true
+ elif [[ "$1" = "-rps" ]]; then
+ shift
+ RPS_MAP="$1"
+ elif [[ "$1" = "-i" ]]; then
+ shift
+ DEV="$1"
+ else
+ die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \
+ [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]"
+ fi
+ shift
+ done
+}
+
+setup() {
+ setup_loopback_environment "${DEV}"
+
+ # Set up server_ns namespace and client_ns namespace
+ setup_macvlan_ns "${DEV}" server_ns server \
+ "${SERVER_MAC}" "${SERVER_IP}"
+ setup_macvlan_ns "${DEV}" client_ns client \
+ "${CLIENT_MAC}" "${CLIENT_IP}"
+}
+
+cleanup() {
+ cleanup_macvlan_ns server_ns server client_ns client
+ cleanup_loopback "${DEV}"
+}
+
+parse_opts $0 $@
+
+setup
+trap cleanup EXIT
+
+check_nic_rxhash_enabled
+
+# Actual test starts here
+if [[ "${TEST_RSS}" = true ]]; then
+ # RPS/RFS must be disabled because they move packets between cpus,
+ # which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
+ eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
+ ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+ -C "$(get_rx_irq_cpus)" -s -v &
+elif [[ ! -z "${RPS_MAP}" ]]; then
+ eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
+ ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+ -r "0x${RPS_MAP}" -s -v &
+else
+ ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
+fi
+
+server_pid=$!
+
+ip netns exec client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
+ "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
+
+client_pid=$!
+
+wait "${server_pid}"
+exit_code=$?
+kill -9 "${client_pid}"
+if [[ "${exit_code}" -eq 0 ]]; then
+ echo "Test Succeeded!"
+fi
+exit "${exit_code}"
diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh
new file mode 100755
index 000000000000..2fef34f4aba1
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz_client.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A simple program for generating traffic for the toeplitz test.
+#
+# This program sends packets periodically for, conservatively, 20 seconds. The
+# intent is for the calling program to kill this program once it is no longer
+# needed, rather than waiting for the 20 second expiration.
+
+send_traffic() {
+ expiration=$((SECONDS+20))
+ while [[ "${SECONDS}" -lt "${expiration}" ]]
+ do
+ if [[ "${PROTO}" == "-u" ]]; then
+ echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}"
+ else
+ echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}"
+ fi
+ sleep 0.001
+ done
+}
+
+PROTO=$1
+IPVER=$2
+ADDR=$3
+PORT=$4
+
+send_traffic
diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
index 66354cdd5ce4..2d10ccac898a 100755
--- a/tools/testing/selftests/net/unicast_extensions.sh
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -28,12 +28,15 @@
# These tests provide an easy way to flip the expected result of any
# of these behaviors for testing kernel patches that change them.
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
# nettest can be run from PATH or from same directory as this selftest
if ! which nettest >/dev/null; then
PATH=$PWD:$PATH
if ! which nettest >/dev/null; then
echo "'nettest' command not found; skipping tests"
- exit 0
+ exit $ksft_skip
fi
fi
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 11d7cdb898c0..19eac3e44c06 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -13,7 +13,7 @@ readonly NS_DST=$BASE$DST
readonly BM_NET_V4=192.168.1.
readonly BM_NET_V6=2001:db8::
-readonly NPROCS=`nproc`
+readonly CPUS=`nproc`
ret=0
cleanup() {
@@ -75,6 +75,29 @@ chk_tso_flag() {
__chk_flag "$1" $2 $3 tcp-segmentation-offload
}
+chk_channels() {
+ local msg="$1"
+ local target=$2
+ local rx=$3
+ local tx=$4
+
+ local dev=veth$target
+
+ local cur_rx=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep RX: | tail -n 1 | awk '{print $2}' `
+ local cur_tx=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep TX: | tail -n 1 | awk '{print $2}'`
+ local cur_combined=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep Combined: | tail -n 1 | awk '{print $2}'`
+
+ printf "%-60s" "$msg"
+ if [ "$cur_rx" = "$rx" -a "$cur_tx" = "$tx" -a "$cur_combined" = "n/a" ]; then
+ echo " ok "
+ else
+ echo " fail rx:$rx:$cur_rx tx:$tx:$cur_tx combined:n/a:$cur_combined"
+ fi
+}
+
chk_gro() {
local msg="$1"
local expected=$2
@@ -107,11 +130,100 @@ chk_gro() {
fi
}
+__change_channels()
+{
+ local cur_cpu
+ local end=$1
+ local cur
+ local i
+
+ while true; do
+ printf -v cur '%(%s)T'
+ [ $cur -le $end ] || break
+
+ for i in `seq 1 $CPUS`; do
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx $i tx $i
+ ip netns exec $NS_DST ethtool -L veth$DST rx $i tx $i
+ done
+
+ for i in `seq 1 $((CPUS - 1))`; do
+ cur_cpu=$((CPUS - $i))
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx $cur_cpu tx $cur_cpu
+ ip netns exec $NS_DST ethtool -L veth$DST rx $cur_cpu tx $cur_cpu
+ done
+ done
+}
+
+__send_data() {
+ local end=$1
+
+ while true; do
+ printf -v cur '%(%s)T'
+ [ $cur -le $end ] || break
+
+ ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 1000 -M 300 -D $BM_NET_V4$DST
+ done
+}
+
+do_stress() {
+ local end
+ printf -v end '%(%s)T'
+ end=$((end + $STRESS))
+
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 3 tx 3
+ ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+
+ ip netns exec $NS_DST ./udpgso_bench_rx &
+ local rx_pid=$!
+
+ echo "Running stress test for $STRESS seconds..."
+ __change_channels $end &
+ local ch_pid=$!
+ __send_data $end &
+ local data_pid_1=$!
+ __send_data $end &
+ local data_pid_2=$!
+ __send_data $end &
+ local data_pid_3=$!
+ __send_data $end &
+ local data_pid_4=$!
+
+ wait $ch_pid $data_pid_1 $data_pid_2 $data_pid_3 $data_pid_4
+ kill -9 $rx_pid
+ echo "done"
+
+ # restore previous setting
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 tx 2
+ ip netns exec $NS_DST ethtool -L veth$DST rx 2 tx 1
+}
+
+usage() {
+ echo "Usage: $0 [-h] [-s <seconds>]"
+ echo -e "\t-h: show this help"
+ echo -e "\t-s: run optional stress tests for the given amount of seconds"
+}
+
+STRESS=0
+while getopts "hs:" option; do
+ case "$option" in
+ "h")
+ usage $0
+ exit 0
+ ;;
+ "s")
+ STRESS=$OPTARG
+ ;;
+ esac
+done
+
if [ ! -f ../bpf/xdp_dummy.o ]; then
echo "Missing xdp_dummy helper. Build bpf selftest first"
exit 1
fi
+[ $CPUS -lt 2 ] && echo "Only one CPU available, some tests will be skipped"
+[ $STRESS -gt 0 -a $CPUS -lt 3 ] && echo " stress test will be skipped, too"
+
create_ns
chk_gro_flag "default - gro flag" $SRC off
chk_gro_flag " - peer gro flag" $DST off
@@ -134,6 +246,8 @@ chk_gro " - aggregation with TSO off" 1
cleanup
create_ns
+chk_channels "default channels" $DST 1 1
+
ip -n $NS_DST link set dev veth$DST down
ip netns exec $NS_DST ethtool -K veth$DST gro on
chk_gro_flag "with gro enabled on link down - gro flag" $DST on
@@ -147,6 +261,56 @@ chk_gro " - aggregation with TSO off" 1
cleanup
create_ns
+
+CUR_TX=1
+CUR_RX=1
+if [ $CPUS -gt 1 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST tx 2
+ chk_channels "setting tx channels" $DST 1 2
+ CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+ chk_channels "setting both rx and tx channels" $DST 3 3
+ CUR_RX=3
+ CUR_TX=3
+fi
+
+ip netns exec $NS_DST ethtool -L veth$DST combined 2 2>/dev/null
+chk_channels "bad setting: combined channels" $DST $CUR_RX $CUR_TX
+
+ip netns exec $NS_DST ethtool -L veth$DST tx $((CPUS + 1)) 2>/dev/null
+chk_channels "setting invalid channels nr" $DST $CUR_RX $CUR_TX
+
+if [ $CPUS -gt 1 ]; then
+ # this also tests queues nr reduction
+ ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null
+ printf "%-60s" "bad setting: XDP with RX nr less than TX"
+ ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
+ section xdp_dummy 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+
+ # the following tests will run with multiple channels active
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 2
+ ip netns exec $NS_DST ethtool -L veth$DST rx 2
+ ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
+ section xdp_dummy 2>/dev/null
+ printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set"
+ ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+ CUR_RX=2
+ CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+ printf "%-60s" "bad setting: increasing peer TX nr above RX with XDP set"
+ ip netns exec $NS_SRC ethtool -L veth$SRC tx 3 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+ chk_channels "setting invalid channels nr" $DST 2 2
+fi
+
ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
chk_gro_flag "with xdp attached - gro flag" $DST on
chk_gro_flag " - peer gro flag" $SRC off
@@ -167,10 +331,27 @@ chk_gro_flag " - after gro on xdp off, gro flag" $DST on
chk_gro_flag " - peer gro flag" $SRC off
chk_tso_flag " - tso flag" $SRC on
chk_tso_flag " - peer tso flag" $DST on
+
+if [ $CPUS -gt 1 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST tx 1
+ chk_channels "decreasing tx channels with device down" $DST 2 1
+fi
+
ip -n $NS_DST link set dev veth$DST up
ip -n $NS_SRC link set dev veth$SRC up
chk_gro " - aggregation" 1
+if [ $CPUS -gt 1 ]; then
+ [ $STRESS -gt 0 -a $CPUS -gt 2 ] && do_stress
+
+ ip -n $NS_DST link set dev veth$DST down
+ ip -n $NS_SRC link set dev veth$SRC down
+ ip netns exec $NS_DST ethtool -L veth$DST tx 2
+ chk_channels "increasing tx channels with device down" $DST 2 2
+ ip -n $NS_DST link set dev veth$DST up
+ ip -n $NS_SRC link set dev veth$SRC up
+fi
+
ip netns exec $NS_DST ethtool -K veth$DST gro off
ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
chk_gro "aggregation again with default and TSO off" 10
diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
index 18b982d611de..865d53c1781c 100755
--- a/tools/testing/selftests/net/vrf_strict_mode_test.sh
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -3,6 +3,9 @@
# This test is designed for testing the new VRF strict_mode functionality.
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
ret=0
# identifies the "init" network namespace which is often called root network
@@ -371,18 +374,18 @@ vrf_strict_mode_check_support()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
- exit 0
+ exit $ksft_skip
fi
cleanup &> /dev/null
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
index 15d937ba96ca..fd1ffaa5a135 100755
--- a/tools/testing/selftests/rcutorture/bin/jitter.sh
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -68,16 +68,12 @@ do
cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
srand(n + me + systime());
ncpus = split(cpus, ca);
- curcpu = ca[int(rand() * ncpus + 1)];
- z = "";
- for (i = 1; 4 * i <= curcpu; i++)
- z = z "0";
- print "0x" 2 ^ (curcpu % 4) z;
+ print ca[int(rand() * ncpus + 1)];
}' < /dev/null`
n=$(($n+1))
- if ! taskset -p $cpumask $$ > /dev/null 2>&1
+ if ! taskset -c -p $cpumask $$ > /dev/null 2>&1
then
- echo taskset failure: '"taskset -p ' $cpumask $$ '"'
+ echo taskset failure: '"taskset -c -p ' $cpumask $$ '"'
exit 1
fi
diff --git a/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
index e5cc6b2f195e..1af5d6b86b39 100755
--- a/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
+++ b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
@@ -14,7 +14,7 @@ if test -z "$TORTURE_KCONFIG_KCSAN_ARG"
then
exit 0
fi
-cat $1/*/console.log |
+find $1 -name console.log -exec cat {} \; |
grep "BUG: KCSAN: " |
sed -e 's/^\[[^]]*] //' |
sort |
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
index d8c8483c46f1..5a0023d183da 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -142,7 +142,7 @@ then
echo "Cannot copy from $oldrun to $rundir."
usage
fi
-rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log
+rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log
touch "$rundir/log"
echo $scriptname $args | tee -a "$rundir/log"
echo $oldrun > "$rundir/re-run"
@@ -179,6 +179,6 @@ if test -n "$dryrun"
then
echo ---- Dryrun complete, directory: $rundir | tee -a "$rundir/log"
else
- ( cd "$rundir"; sh $T/runbatches.sh )
+ ( cd "$rundir"; sh $T/runbatches.sh ) | tee -a "$rundir/log"
kvm-end-run-stats.sh "$rundir" "$starttime"
fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh
new file mode 100755
index 000000000000..f99b2c146f83
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh
@@ -0,0 +1,106 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Produce awk statements roughly depicting the system's CPU and cache
+# layout. If the required information is not available, produce
+# error messages as awk comments. Successful exit regardless.
+#
+# Usage: kvm-assign-cpus.sh /path/to/sysfs
+
+T=/tmp/kvm-assign-cpus.sh.$$
+trap 'rm -rf $T' 0 2
+mkdir $T
+
+sysfsdir=${1-/sys/devices/system/node}
+if ! cd "$sysfsdir" > $T/msg 2>&1
+then
+ sed -e 's/^/# /' < $T/msg
+ exit 0
+fi
+nodelist="`ls -d node*`"
+for i in node*
+do
+ if ! test -d $i/
+ then
+ echo "# Not a directory: $sysfsdir/node*"
+ exit 0
+ fi
+ for j in $i/cpu*/cache/index*
+ do
+ if ! test -d $j/
+ then
+ echo "# Not a directory: $sysfsdir/$j"
+ exit 0
+ else
+ break
+ fi
+ done
+ indexlist="`ls -d $i/cpu* | grep 'cpu[0-9][0-9]*' | head -1 | sed -e 's,^.*$,ls -d &/cache/index*,' | sh | sed -e 's,^.*/,,'`"
+ break
+done
+for i in node*/cpu*/cache/index*/shared_cpu_list
+do
+ if ! test -f $i
+ then
+ echo "# Not a file: $sysfsdir/$i"
+ exit 0
+ else
+ break
+ fi
+done
+firstshared=
+for i in $indexlist
+do
+ rm -f $T/cpulist
+ for n in node*
+ do
+ f="$n/cpu*/cache/$i/shared_cpu_list"
+ if ! cat $f > $T/msg 2>&1
+ then
+ sed -e 's/^/# /' < $T/msg
+ exit 0
+ fi
+ cat $f >> $T/cpulist
+ done
+ if grep -q '[-,]' $T/cpulist
+ then
+ if test -z "$firstshared"
+ then
+ firstshared="$i"
+ fi
+ fi
+done
+if test -z "$firstshared"
+then
+ splitindex="`echo $indexlist | sed -e 's/ .*$//'`"
+else
+ splitindex="$firstshared"
+fi
+nodenum=0
+for n in node*
+do
+ cat $n/cpu*/cache/$splitindex/shared_cpu_list | sort -u -k1n |
+ awk -v nodenum="$nodenum" '
+ BEGIN {
+ idx = 0;
+ }
+
+ {
+ nlists = split($0, cpulists, ",");
+ for (i = 1; i <= nlists; i++) {
+ listsize = split(cpulists[i], cpus, "-");
+ if (listsize == 1)
+ cpus[2] = cpus[1];
+ for (j = cpus[1]; j <= cpus[2]; j++) {
+ print "cpu[" nodenum "][" idx "] = " j ";";
+ idx++;
+ }
+ }
+ }
+
+ END {
+ print "nodecpus[" nodenum "] = " idx ";";
+ }'
+ nodenum=`expr $nodenum + 1`
+done
+echo "numnodes = $nodenum;"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh b/tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh
new file mode 100755
index 000000000000..20c7c53c5795
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh
@@ -0,0 +1,88 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Create an awk script that takes as input numbers of CPUs and outputs
+# lists of CPUs, one per line in both cases.
+#
+# Usage: kvm-get-cpus-script.sh /path/to/cpu/arrays /path/to/put/script [ /path/to/state ]
+#
+# The CPU arrays are output by kvm-assign-cpus.sh, and are valid awk
+# statements initializing the variables describing the system's topology.
+#
+# The optional state is input by this script (if the file exists and is
+# non-empty), and can also be output by this script.
+
+cpuarrays="${1-/sys/devices/system/node}"
+scriptfile="${2}"
+statefile="${3}"
+
+if ! test -f "$cpuarrays"
+then
+ echo "File not found: $cpuarrays" 1>&2
+ exit 1
+fi
+scriptdir="`dirname "$scriptfile"`"
+if ! test -d "$scriptdir" || ! test -x "$scriptdir" || ! test -w "$scriptdir"
+then
+ echo "Directory not usable for script output: $scriptdir"
+ exit 1
+fi
+
+cat << '___EOF___' > "$scriptfile"
+BEGIN {
+___EOF___
+cat "$cpuarrays" >> "$scriptfile"
+if test -r "$statefile"
+then
+ cat "$statefile" >> "$scriptfile"
+fi
+cat << '___EOF___' >> "$scriptfile"
+}
+
+# Do we have the system architecture to guide CPU affinity?
+function gotcpus()
+{
+ return numnodes != "";
+}
+
+# Return a comma-separated list of the next n CPUs.
+function nextcpus(n, i, s)
+{
+ for (i = 0; i < n; i++) {
+ if (nodecpus[curnode] == "")
+ curnode = 0;
+ if (cpu[curnode][curcpu[curnode]] == "")
+ curcpu[curnode] = 0;
+ if (s != "")
+ s = s ",";
+ s = s cpu[curnode][curcpu[curnode]];
+ curcpu[curnode]++;
+ curnode++
+ }
+ return s;
+}
+
+# Dump out the current node/CPU state so that a later invocation of this
+# script can continue where this one left off. Of course, this only works
+# when a state file was specified and where there was valid sysfs state.
+# Returns 1 if the state was dumped, 0 otherwise.
+#
+# Dumping the state for one system configuration and loading it into
+# another isn't likely to do what you want, whatever that might be.
+function dumpcpustate( i, fn)
+{
+___EOF___
+echo ' fn = "'"$statefile"'";' >> $scriptfile
+cat << '___EOF___' >> "$scriptfile"
+ if (fn != "" && gotcpus()) {
+ print "curnode = " curnode ";" > fn;
+ for (i = 0; i < numnodes; i++)
+ if (curcpu[i] != "")
+ print "curcpu[" i "] = " curcpu[i] ";" >> fn;
+ return 1;
+ }
+ if (fn != "")
+ print "# No CPU state to dump." > fn;
+ return 0;
+}
+___EOF___
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
index f3a7a5e2b89d..db2c0e2c8e1d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
@@ -25,7 +25,7 @@ then
echo "$configfile -------"
else
title="$configfile ------- $ncs acquisitions/releases"
- dur=`sed -e 's/^.* locktorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
+ dur=`grep -v '^#' $i/qemu-cmd | sed -e 's/^.* locktorture.shutdown_secs=//' -e 's/ .*$//' 2> /dev/null`
if test -z "$dur"
then
:
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
index 671bfee4fcef..3afa5c6eda4f 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
@@ -25,7 +25,7 @@ if test -z "$nscfs"
then
echo "$configfile ------- "
else
- dur="`sed -e 's/^.* scftorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`"
+ dur="`grep -v '^#' $i/qemu-cmd | sed -e 's/^.* scftorture.shutdown_secs=//' -e 's/ .*$//' 2> /dev/null`"
if test -z "$dur"
then
rate=""
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index e01b31b87044..0a5419982ab3 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -74,7 +74,10 @@ do
done
if test -f "$rd/kcsan.sum"
then
- if grep -q CONFIG_KCSAN=y $T
+ if ! test -f $T
+ then
+ :
+ elif grep -q CONFIG_KCSAN=y $T
then
echo "Compiler or architecture does not support KCSAN!"
echo Did you forget to switch your compiler with '--kmake-arg CC=<cc-that-supports-kcsan>'?
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh
new file mode 100755
index 000000000000..014ce68260d7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Periodically scan a directory tree to prevent files from being reaped
+# by systemd and friends on long runs.
+#
+# Usage: kvm-remote-noreap.sh pathname
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+pathname="$1"
+if test "$pathname" = ""
+then
+ echo Usage: kvm-remote-noreap.sh pathname
+ exit 1
+fi
+if ! test -d "$pathname"
+then
+ echo Usage: kvm-remote-noreap.sh pathname
+ echo " pathname must be a directory."
+ exit 2
+fi
+
+while test -d "$pathname"
+do
+ find "$pathname" -type f -exec touch -c {} \; > /dev/null 2>&1
+ sleep 30
+done
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index 79e680e0e7bf..03126eb6ec5a 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -124,10 +124,12 @@ awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" '
n = $1;
sub(/\./, "", n);
fn = dest "/kvm-remote-" n ".sh"
+ print "kvm-remote-noreap.sh " rundir " &" > fn;
scenarios = "";
for (i = 2; i <= NF; i++)
scenarios = scenarios " " $i;
- print "kvm-test-1-run-batch.sh" scenarios > fn;
+ print "kvm-test-1-run-batch.sh" scenarios >> fn;
+ print "sync" >> fn;
print "rm " rundir "/remote.run" >> fn;
}'
chmod +x $T/bin/kvm-remote-*.sh
@@ -172,11 +174,20 @@ checkremotefile () {
do
ssh $1 "test -f \"$2\""
ret=$?
- if test "$ret" -ne 255
+ if test "$ret" -eq 255
then
+ echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date`
+ elif test "$ret" -eq 0
+ then
+ return 0
+ elif test "$ret" -eq 1
+ then
+ echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\"
+ return 1
+ else
+ echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date`
return $ret
fi
- echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date`
sleep $sleeptime
done
}
@@ -242,7 +253,8 @@ do
do
sleep 30
done
- ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu_pid */qemu-retval; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
+ echo " ---" Collecting results from $i `date`
+ ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
done
( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
index 7ea0809e229e..1e29d656501b 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
@@ -50,10 +50,34 @@ grep '^#' $1/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings
echo ---- System running test: `uname -a`
echo ---- Starting kernels. `date` | tee -a log
$TORTURE_JITTER_START
+kvm-assign-cpus.sh /sys/devices/system/node > $T/cpuarray.awk
for i in "$@"
do
echo ---- System running test: `uname -a` > $i/kvm-test-1-run-qemu.sh.out
echo > $i/kvm-test-1-run-qemu.sh.out
+ export TORTURE_AFFINITY=
+ kvm-get-cpus-script.sh $T/cpuarray.awk $T/cpubatches.awk $T/cpustate
+ cat << ' ___EOF___' >> $T/cpubatches.awk
+ END {
+ affinitylist = "";
+ if (!gotcpus()) {
+ print "echo No CPU-affinity information, so no taskset command.";
+ } else if (cpu_count !~ /^[0-9][0-9]*$/) {
+ print "echo " scenario ": Bogus number of CPUs (old qemu-cmd?), so no taskset command.";
+ } else {
+ affinitylist = nextcpus(cpu_count);
+ if (!(affinitylist ~ /^[0-9,-][0-9,-]*$/))
+ print "echo " scenario ": Bogus CPU-affinity information, so no taskset command.";
+ else if (!dumpcpustate())
+ print "echo " scenario ": Could not dump state, so no taskset command.";
+ else
+ print "export TORTURE_AFFINITY=" affinitylist;
+ }
+ }
+ ___EOF___
+ cpu_count="`grep '# TORTURE_CPU_COUNT=' $i/qemu-cmd | sed -e 's/^.*=//'`"
+ affinity_export="`awk -f $T/cpubatches.awk -v cpu_count="$cpu_count" -v scenario=$i < /dev/null`"
+ $affinity_export
kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 &
done
for i in $runfiles
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
index 5b1aa2a4f3f6..44280582c594 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
@@ -39,27 +39,34 @@ echo ' ---' `date`: Starting kernel, PID $$
grep '^#' $resdir/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings
. $T/qemu-cmd-settings
-# Decorate qemu-cmd with redirection, backgrounding, and PID capture
-sed -e 's/$/ 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd
-echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd
+# Decorate qemu-cmd with affinity, redirection, backgrounding, and PID capture
+taskset_command=
+if test -n "$TORTURE_AFFINITY"
+then
+ taskset_command="taskset -c $TORTURE_AFFINITY "
+fi
+sed -e 's/^[^#].*$/'"$taskset_command"'& 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd
+echo 'qemu_pid=$!' >> $T/qemu-cmd
+echo 'echo $qemu_pid > $resdir/qemu-pid' >> $T/qemu-cmd
+echo 'taskset -c -p $qemu_pid > $resdir/qemu-affinity' >> $T/qemu-cmd
# In case qemu refuses to run...
echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
# Attempt to run qemu
kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
-( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
+( . $T/qemu-cmd; wait `cat $resdir/qemu-pid`; echo $? > $resdir/qemu-retval ) &
commandcompleted=0
if test -z "$TORTURE_KCONFIG_GDB_ARG"
then
sleep 10 # Give qemu's pid a chance to reach the file
- if test -s "$resdir/qemu_pid"
+ if test -s "$resdir/qemu-pid"
then
- qemu_pid=`cat "$resdir/qemu_pid"`
- echo Monitoring qemu job at pid $qemu_pid
+ qemu_pid=`cat "$resdir/qemu-pid"`
+ echo Monitoring qemu job at pid $qemu_pid `date`
else
qemu_pid=""
- echo Monitoring qemu job at yet-as-unknown pid
+ echo Monitoring qemu job at yet-as-unknown pid `date`
fi
fi
if test -n "$TORTURE_KCONFIG_GDB_ARG"
@@ -82,9 +89,9 @@ then
fi
while :
do
- if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+ if test -z "$qemu_pid" && test -s "$resdir/qemu-pid"
then
- qemu_pid=`cat "$resdir/qemu_pid"`
+ qemu_pid=`cat "$resdir/qemu-pid"`
fi
kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
@@ -115,22 +122,22 @@ do
break
fi
done
-if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+if test -z "$qemu_pid" && test -s "$resdir/qemu-pid"
then
- qemu_pid=`cat "$resdir/qemu_pid"`
+ qemu_pid=`cat "$resdir/qemu-pid"`
fi
-if test $commandcompleted -eq 0 -a -n "$qemu_pid"
+if test $commandcompleted -eq 0 && test -n "$qemu_pid"
then
if ! test -f "$resdir/../STOP.1"
then
- echo Grace period for qemu job at pid $qemu_pid
+ echo Grace period for qemu job at pid $qemu_pid `date`
fi
oldline="`tail $resdir/console.log`"
while :
do
if test -f "$resdir/../STOP.1"
then
- echo "PID $qemu_pid killed due to run STOP.1 request" >> $resdir/Warnings 2>&1
+ echo "PID $qemu_pid killed due to run STOP.1 request `date`" >> $resdir/Warnings 2>&1
kill -KILL $qemu_pid
break
fi
@@ -152,13 +159,17 @@ then
then
last_ts=0
fi
- if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ if test "$newline" != "$oldline" && test "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE)) && test "$last_ts" -gt "$TORTURE_SHUTDOWN_GRACE"
then
must_continue=yes
+ if test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ then
+ echo Continuing at console.log time $last_ts \"`tail -n 1 $resdir/console.log`\" `date`
+ fi
fi
- if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ if test $must_continue = no && test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
then
- echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1
+ echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds `date`" >> $resdir/Warnings 2>&1
kill -KILL $qemu_pid
break
fi
@@ -172,5 +183,3 @@ fi
# Tell the script that this run is done.
rm -f $resdir/build.run
-
-parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 420ed5ce9d32..f4c8055dbf7a 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -205,6 +205,7 @@ echo "# TORTURE_KCONFIG_GDB_ARG=\"$TORTURE_KCONFIG_GDB_ARG\"" >> $resdir/qemu-cm
echo "# TORTURE_JITTER_START=\"$TORTURE_JITTER_START\"" >> $resdir/qemu-cmd
echo "# TORTURE_JITTER_STOP=\"$TORTURE_JITTER_STOP\"" >> $resdir/qemu-cmd
echo "# TORTURE_TRUST_MAKE=\"$TORTURE_TRUST_MAKE\"; export TORTURE_TRUST_MAKE" >> $resdir/qemu-cmd
+echo "# TORTURE_CPU_COUNT=$cpu_count" >> $resdir/qemu-cmd
if test -n "$TORTURE_BUILDONLY"
then
@@ -214,3 +215,4 @@ then
fi
kvm-test-1-run-qemu.sh $resdir
+parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index b4ac4ee33222..f442d84fb2a3 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -430,17 +430,10 @@ then
git diff HEAD >> $resdir/$ds/testid.txt
fi
___EOF___
-awk < $T/cfgcpu.pack \
- -v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
- -v CONFIGDIR="$CONFIGFRAG/" \
- -v KVM="$KVM" \
- -v ncpus=$cpus \
- -v jitter="$jitter" \
- -v rd=$resdir/$ds/ \
- -v dur=$dur \
- -v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
- -v TORTURE_BOOTARGS="$TORTURE_BOOTARGS" \
-'BEGIN {
+kvm-assign-cpus.sh /sys/devices/system/node > $T/cpuarray.awk
+kvm-get-cpus-script.sh $T/cpuarray.awk $T/dumpbatches.awk
+cat << '___EOF___' >> $T/dumpbatches.awk
+BEGIN {
i = 0;
}
@@ -451,7 +444,7 @@ awk < $T/cfgcpu.pack \
}
# Dump out the scripting required to run one test batch.
-function dump(first, pastlast, batchnum)
+function dump(first, pastlast, batchnum, affinitylist)
{
print "echo ----Start batch " batchnum ": `date` | tee -a " rd "log";
print "needqemurun="
@@ -483,6 +476,14 @@ function dump(first, pastlast, batchnum)
print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log";
print "mkdir " rd cfr[jn] " || :";
print "touch " builddir ".wait";
+ affinitylist = "";
+ if (gotcpus()) {
+ affinitylist = nextcpus(cpusr[jn]);
+ }
+ if (affinitylist ~ /^[0-9,-][0-9,-]*$/)
+ print "export TORTURE_AFFINITY=" affinitylist;
+ else
+ print "export TORTURE_AFFINITY=";
print "kvm-test-1-run.sh " CONFIGDIR cf[j], rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &"
print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log";
print "while test -f " builddir ".wait"
@@ -560,7 +561,19 @@ END {
# Dump the last batch.
if (ncpus != 0)
dump(first, i, batchnum);
-}' >> $T/script
+}
+___EOF___
+awk < $T/cfgcpu.pack \
+ -v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
+ -v CONFIGDIR="$CONFIGFRAG/" \
+ -v KVM="$KVM" \
+ -v ncpus=$cpus \
+ -v jitter="$jitter" \
+ -v rd=$resdir/$ds/ \
+ -v dur=$dur \
+ -v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
+ -v TORTURE_BOOTARGS="$TORTURE_BOOTARGS" \
+ -f $T/dumpbatches.awk >> $T/script
echo kvm-end-run-stats.sh "$resdir/$ds" "$starttime" >> $T/script
# Extract the tests and their batches from the script.
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 53ec7c046262..363f56081eff 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -53,6 +53,7 @@ do_refscale=yes
do_kvfree=yes
do_kasan=yes
do_kcsan=no
+do_clocksourcewd=yes
# doyesno - Helper function for yes/no arguments
function doyesno () {
@@ -72,6 +73,7 @@ usage () {
echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\""
echo " --doall"
echo " --doallmodconfig / --do-no-allmodconfig"
+ echo " --do-clocksourcewd / --do-no-clocksourcewd"
echo " --do-kasan / --do-no-kasan"
echo " --do-kcsan / --do-no-kcsan"
echo " --do-kvfree / --do-no-kvfree"
@@ -109,7 +111,7 @@ do
configs_scftorture="$configs_scftorture $2"
shift
;;
- --doall)
+ --do-all|--doall)
do_allmodconfig=yes
do_rcutorture=yes
do_locktorture=yes
@@ -119,10 +121,14 @@ do
do_kvfree=yes
do_kasan=yes
do_kcsan=yes
+ do_clocksourcewd=yes
;;
--do-allmodconfig|--do-no-allmodconfig)
do_allmodconfig=`doyesno "$1" --do-allmodconfig`
;;
+ --do-clocksourcewd|--do-no-clocksourcewd)
+ do_clocksourcewd=`doyesno "$1" --do-clocksourcewd`
+ ;;
--do-kasan|--do-no-kasan)
do_kasan=`doyesno "$1" --do-kasan`
;;
@@ -135,7 +141,7 @@ do
--do-locktorture|--do-no-locktorture)
do_locktorture=`doyesno "$1" --do-locktorture`
;;
- --do-none)
+ --do-none|--donone)
do_allmodconfig=no
do_rcutorture=no
do_locktorture=no
@@ -145,6 +151,7 @@ do
do_kvfree=no
do_kasan=no
do_kcsan=no
+ do_clocksourcewd=no
;;
--do-rcuscale|--do-no-rcuscale)
do_rcuscale=`doyesno "$1" --do-rcuscale`
@@ -279,9 +286,9 @@ function torture_one {
# torture_bootargs="[ kernel boot arguments ]"
# torture_set flavor [ kvm.sh arguments ]
#
-# Note that "flavor" is an arbitrary string. Supply --torture if needed.
-# Note that quoting is problematic. So on the command line, pass multiple
-# values with multiple kvm.sh argument instances.
+# Note that "flavor" is an arbitrary string that does not affect kvm.sh
+# in any way. So also supply --torture if you need something other than
+# the default.
function torture_set {
local cur_kcsan_kmake_args=
local kcsan_kmake_tag=
@@ -377,6 +384,22 @@ then
torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
fi
+if test "$do_clocksourcewd" = "yes"
+then
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000"
+ torture_set "clocksourcewd-1" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make
+
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 clocksource.max_cswd_read_retries=1"
+ torture_set "clocksourcewd-2" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make
+
+ # In case our work is already done...
+ if test "$do_rcutorture" != "yes"
+ then
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000"
+ torture_set "clocksourcewd-3" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --trust-make
+ fi
+fi
+
echo " --- " $scriptname $args
echo " --- " Done `date` | tee -a $T/log
ret=0
@@ -395,6 +418,10 @@ then
nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`"
ret=2
fi
+if test "$do_kcsan" = "yes"
+then
+ TORTURE_KCONFIG_KCSAN_ARG=1 tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh tools/testing/selftests/rcutorture/res/$ds > tools/testing/selftests/rcutorture/res/$ds/kcsan.sum
+fi
echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log
echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log
tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`"
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
index bafe94cbd739..3ca112444ce7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=2
+CONFIG_NR_CPUS=4
CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
index bafe94cbd739..3ca112444ce7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=2
+CONFIG_NR_CPUS=4
CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
index ea4399020c6c..dc02083803ce 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=2
+CONFIG_NR_CPUS=4
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c
index dee7a3d6c5a5..92bbc5a15c39 100644
--- a/tools/testing/selftests/sgx/sigstruct.c
+++ b/tools/testing/selftests/sgx/sigstruct.c
@@ -55,10 +55,27 @@ static bool alloc_q1q2_ctx(const uint8_t *s, const uint8_t *m,
return true;
}
+static void reverse_bytes(void *data, int length)
+{
+ int i = 0;
+ int j = length - 1;
+ uint8_t temp;
+ uint8_t *ptr = data;
+
+ while (i < j) {
+ temp = ptr[i];
+ ptr[i] = ptr[j];
+ ptr[j] = temp;
+ i++;
+ j--;
+ }
+}
+
static bool calc_q1q2(const uint8_t *s, const uint8_t *m, uint8_t *q1,
uint8_t *q2)
{
struct q1q2_ctx ctx;
+ int len;
if (!alloc_q1q2_ctx(s, m, &ctx)) {
fprintf(stderr, "Not enough memory for Q1Q2 calculation\n");
@@ -89,8 +106,10 @@ static bool calc_q1q2(const uint8_t *s, const uint8_t *m, uint8_t *q1,
goto out;
}
- BN_bn2bin(ctx.q1, q1);
- BN_bn2bin(ctx.q2, q2);
+ len = BN_bn2bin(ctx.q1, q1);
+ reverse_bytes(q1, len);
+ len = BN_bn2bin(ctx.q2, q2);
+ reverse_bytes(q2, len);
free_q1q2_ctx(&ctx);
return true;
@@ -152,22 +171,6 @@ static RSA *gen_sign_key(void)
return key;
}
-static void reverse_bytes(void *data, int length)
-{
- int i = 0;
- int j = length - 1;
- uint8_t temp;
- uint8_t *ptr = data;
-
- while (i < j) {
- temp = ptr[i];
- ptr[i] = ptr[j];
- ptr[j] = temp;
- i++;
- j--;
- }
-}
-
enum mrtags {
MRECREATE = 0x0045544145524345,
MREADD = 0x0000000044444145,
@@ -367,8 +370,6 @@ bool encl_measure(struct encl *encl)
/* BE -> LE */
reverse_bytes(sigstruct->signature, SGX_MODULUS_SIZE);
reverse_bytes(sigstruct->modulus, SGX_MODULUS_SIZE);
- reverse_bytes(sigstruct->q1, SGX_MODULUS_SIZE);
- reverse_bytes(sigstruct->q2, SGX_MODULUS_SIZE);
EVP_MD_CTX_destroy(ctx);
RSA_free(key);
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
index 6eb4c4f97060..742f2290973e 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
@@ -417,5 +417,29 @@
"teardown": [
"$TC actions flush action skbmod"
]
+ },
+ {
+ "id": "fe09",
+ "name": "Add skbmod action to mark ECN bits",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod ecn",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 1",
+ "matchPattern": "action order [0-9]*: skbmod pipe ecn",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
new file mode 100644
index 000000000000..88a20c781e49
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
@@ -0,0 +1,137 @@
+[
+ {
+ "id": "ce7d",
+ "name": "Add mq Qdisc to multi-queue device (4 queues)",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "4",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "2f82",
+ "name": "Add mq Qdisc to multi-queue device (256 queues)",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 256\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "256",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "c525",
+ "name": "Add duplicate mq Qdisc",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: mq"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "4",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "128a",
+ "name": "Delete nonexistent mq Qdisc",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "03a9",
+ "name": "Delete mq Qdisc twice",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: mq",
+ "$TC qdisc del dev $ETH root handle 1: mq"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "be0f",
+ "name": "Add mq Qdisc to single-queue device",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
index cd4a27ee1466..ea04f04c173e 100644
--- a/tools/testing/selftests/tc-testing/tdc_config.py
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -17,6 +17,7 @@ NAMES = {
'DEV1': 'v0p1',
'DEV2': '',
'DUMMY': 'dummy1',
+ 'ETH': 'eth0',
'BATCH_FILE': './batch.txt',
'BATCH_DIR': 'tmp',
# Length of time in seconds to wait before terminating a command
diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile
index b587b9a7a124..0d7bbe49359d 100644
--- a/tools/virtio/Makefile
+++ b/tools/virtio/Makefile
@@ -4,7 +4,8 @@ test: virtio_test vringh_test
virtio_test: virtio_ring.o virtio_test.o
vringh_test: vringh_test.o vringh.o virtio_ring.o
-CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h
+CFLAGS += -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h
+LDFLAGS += -lpthread
vpath %.c ../../drivers/virtio ../../drivers/vhost
mod:
${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V}
diff --git a/tools/virtio/linux/spinlock.h b/tools/virtio/linux/spinlock.h
new file mode 100644
index 000000000000..028e3cdcc5d3
--- /dev/null
+++ b/tools/virtio/linux/spinlock.h
@@ -0,0 +1,56 @@
+#ifndef SPINLOCK_H_STUB
+#define SPINLOCK_H_STUB
+
+#include <pthread.h>
+
+typedef pthread_spinlock_t spinlock_t;
+
+static inline void spin_lock_init(spinlock_t *lock)
+{
+ int r = pthread_spin_init(lock, 0);
+ assert(!r);
+}
+
+static inline void spin_lock(spinlock_t *lock)
+{
+ int ret = pthread_spin_lock(lock);
+ assert(!ret);
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+ int ret = pthread_spin_unlock(lock);
+ assert(!ret);
+}
+
+static inline void spin_lock_bh(spinlock_t *lock)
+{
+ spin_lock(lock);
+}
+
+static inline void spin_unlock_bh(spinlock_t *lock)
+{
+ spin_unlock(lock);
+}
+
+static inline void spin_lock_irq(spinlock_t *lock)
+{
+ spin_lock(lock);
+}
+
+static inline void spin_unlock_irq(spinlock_t *lock)
+{
+ spin_unlock(lock);
+}
+
+static inline void spin_lock_irqsave(spinlock_t *lock, unsigned long f)
+{
+ spin_lock(lock);
+}
+
+static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long f)
+{
+ spin_unlock(lock);
+}
+
+#endif
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index 5d90254ddae4..363b98228301 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -3,6 +3,7 @@
#define LINUX_VIRTIO_H
#include <linux/scatterlist.h>
#include <linux/kernel.h>
+#include <linux/spinlock.h>
struct device {
void *parent;
@@ -12,6 +13,7 @@ struct virtio_device {
struct device dev;
u64 features;
struct list_head vqs;
+ spinlock_t vqs_list_lock;
};
struct virtqueue {